pax_global_header00006660000000000000000000000064140033314670014513gustar00rootroot0000000000000052 comment=12069d720f43830ae9791e8b0f4c4fa3c88012a0 simde-0.7.2/000077500000000000000000000000001400333146700126225ustar00rootroot00000000000000simde-0.7.2/.all-contributorsrc000066400000000000000000000122611400333146700164550ustar00rootroot00000000000000{ "projectName": "simde", "projectOwner": "nemequ", "repoType": "github", "repoHost": "https://github.com", "files": [ "README.md" ], "imageSize": 100, "commit": true, "commitConvention": "none", "contributors": [ { "login": "nemequ", "name": "Evan Nemerson", "avatar_url": "https://avatars0.githubusercontent.com/u/1151?v=4", "profile": "https://nemequ.github.io/", "contributions": [ "code", "content", "doc", "example", "ideas", "question", "review", "test", "tutorial", "talk", "bug", "infra", "maintenance", "projectManagement" ] }, { "login": "mr-c", "name": "Michael R. Crusoe", "avatar_url": "https://avatars3.githubusercontent.com/u/1330696?v=4", "profile": "https://orcid.org/0000-0002-2961-9670", "contributions": [ "bug", "code", "eventOrganizing", "fundingFinding", "ideas", "infra", "platform", "test" ] }, { "login": "himanshi18037", "name": "HIMANSHI MATHUR", "avatar_url": "https://avatars1.githubusercontent.com/u/43923076?v=4", "profile": "https://github.com/himanshi18037", "contributions": [ "code", "test" ] }, { "login": "masterchef2209", "name": "Hidayat Khan", "avatar_url": "https://avatars0.githubusercontent.com/u/27916352?v=4", "profile": "https://github.com/masterchef2209", "contributions": [ "code", "test" ] }, { "login": "rosbif", "name": "rosbif", "avatar_url": "https://avatars1.githubusercontent.com/u/553899?v=4", "profile": "https://github.com/rosbif", "contributions": [ "code", "test", "bug", "ideas" ] }, { "login": "junaruga", "name": "Jun Aruga", "avatar_url": "https://avatars2.githubusercontent.com/u/121989?v=4", "profile": "http://junaruga.hatenablog.com/", "contributions": [ "code", "ideas", "platform", "infra", "maintenance", "test", "bug" ] }, { "login": "marmeladema", "name": "Élie ROUDNINSKI", "avatar_url": "https://avatars2.githubusercontent.com/u/1629419?v=4", "profile": "https://github.com/marmeladema", "contributions": [ "code", "test" ] }, { "login": "jsbache", "name": "Jesper Storm Bache", "avatar_url": "https://avatars3.githubusercontent.com/u/7937081?v=4", "profile": "http://www.bache.name", "contributions": [ "code" ] }, { "login": "jeffdaily", "name": "Jeff Daily", "avatar_url": "https://avatars1.githubusercontent.com/u/904248?v=4", "profile": "https://github.com/jeffdaily", "contributions": [ "code", "infra" ] }, { "login": "yekm", "name": "Pavel", "avatar_url": "https://avatars2.githubusercontent.com/u/205196?v=4", "profile": "https://github.com/yekm", "contributions": [ "code" ] }, { "login": "sabarishbollapragada", "name": "Sabarish Bollapragada", "avatar_url": "https://avatars3.githubusercontent.com/u/36379720?v=4", "profile": "https://github.com/sabarishbollapragada", "contributions": [ "code" ] }, { "login": "gh2o", "name": "Gavin Li", "avatar_url": "https://avatars2.githubusercontent.com/u/371529?v=4", "profile": "http://www.thegavinli.com/", "contributions": [ "code" ] }, { "login": "betajippity", "name": "Yining Karl Li", "avatar_url": "https://avatars0.githubusercontent.com/u/1057198?v=4", "profile": "http://www.yiningkarlli.com", "contributions": [ "code" ] }, { "login": "anirbandey303", "name": "Anirban Dey", "avatar_url": "https://avatars1.githubusercontent.com/u/29774651?v=4", "profile": "https://www.facebook.com/anirbandey303", "contributions": [ "doc" ] }, { "login": "Un1Gfn", "name": "Darren Ng", "avatar_url": "https://avatars3.githubusercontent.com/u/28521292?v=4", "profile": "https://github.com/Un1Gfn", "contributions": [ "doc" ] }, { "login": "FaresSalem", "name": "FaresSalem", "avatar_url": "https://avatars0.githubusercontent.com/u/7736245?v=4", "profile": "https://github.com/FaresSalem", "contributions": [ "doc" ] }, { "login": "GorePradnyesh", "name": "Pradnyesh Gore", "avatar_url": "https://avatars0.githubusercontent.com/u/843197?v=4", "profile": "https://github.com/GorePradnyesh", "contributions": [ "code" ] }, { "login": "seanptmaher", "name": "Sean Maher", "avatar_url": "https://avatars0.githubusercontent.com/u/39571964?v=4", "profile": "https://github.com/seanptmaher", "contributions": [ "code" ] } ], "contributorsPerLine": 7 } simde-0.7.2/.appveyor.yml000066400000000000000000000030231400333146700152660ustar00rootroot00000000000000environment: MESON_EXE_WRAPPER: "['C:\\Program Files\\OpenCppCoverage\\OpenCppCoverage.exe', '--export_type=cobertura', '--']" matrix: - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019 VCVARSALL: C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat PYTHON_DIR: C:\Python38-x64 - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 VCVARSALL: C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat PYTHON_DIR: C:\Python38-x64 - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015 VCVARSALL: C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat PYTHON_DIR: C:\Python38-x64 branches: except: - /^ci/(?!appveyor)(/.+)?/ platform: - x64 install: - cmd: set PATH=%PYTHON_DIR%;%PYTHON_DIR%\scripts;%PATH% - cmd: pip install --upgrade ninja meson - cmd: choco install opencppcoverage codecov - cmd: call "%VCVARSALL%" amd64 before_build: - ps: | git submodule -q update --init --recursive build_script: - cmd: meson --backend=ninja build - cmd: ninja -C build -v test ## This seems to work; data shows up on CodeCov, but it emits an error ## which makes AppVeyor thing the whole build failed. Example: ## https://ci.appveyor.com/project/quixdb/ci-noise/build/job/e7d1wpt9icu77fn2 # after_test: # - ps: | # Invoke-WebRequest -Uri 'https://codecov.io/bash' -OutFile codecov.sh # $env:APPVEYOR="true" # $env:CI="true" # bash codecov.sh -f "run-testsCoverage.xml" simde-0.7.2/.azure-pipelines.yml000066400000000000000000000055301400333146700165420ustar00rootroot00000000000000pool: vmImage: 'ubuntu-latest' trigger: branches: exclude: - master # - ci/* # include: # - ci/azure* jobs: - job: clang_analysis variables: CC: clang-10 CXX: clang++-10 # CFLAGS: -Weverything -Werror # CXXFLAGS: -Weverything -Werror steps: - script: uname -a && cat /proc/cpuinfo /proc/meminfo displayName: System Information - script: sudo add-apt-repository 'ppa:ubuntu-toolchain-r/test' && sudo apt-get update && sudo apt-get install -y ninja-build ninja-build python3-pip python3-setuptools python3-wheel clang-10 clang-tools-10 displayName: Install APT Dependencies - script: pip3 install -v meson displayName: Install pip Dependencies - script: scan-build-10 /home/vsts/.local/bin/meson setup build -Db_sanitize=address,undefined displayName: Configure - script: scan-build-10 ninja -C build -v displayName: Build - script: ninja -C build -v test displayName: Test - job: gcc_analysis variables: CC: gcc-10 CXX: g++-10 CFLAGS: -fanalyzer -Wextra -Werror CXXFLAGS: -fanalyzer -Wextra -Werror steps: - script: uname -a && cat /proc/cpuinfo /proc/meminfo displayName: System Information - script: sudo add-apt-repository 'ppa:ubuntu-toolchain-r/test' && sudo apt-get update && sudo apt-get install -y ninja-build ninja-build python3-pip python3-setuptools python3-wheel gcc-10 g++-10 displayName: Install APT Dependencies - script: pip3 install meson displayName: Install pip Dependencies - script: /home/vsts/.local/bin/meson setup build -Db_sanitize=address,undefined displayName: Configure - script: ninja -C build -v displayName: Build - script: ninja -C build -v test displayName: Test # - job: loongson3a # container: # image: debian:unstable # options: "--name ci-container -v /usr/bin/docker:/tmp/docker:ro" # steps: # # https://github.com/Microsoft/azure-pipelines-agent/issues/2043 # - script: /tmp/docker exec -t -u 0 ci-container sh -c "apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -o Dpkg::Options::="--force-confold" -y install sudo" # displayName: 'Install Sudo in container' # - script: uname -a && cat /proc/cpuinfo /proc/meminfo # displayName: System Information # - script: sudo dpkg --add-architecture mips64el && sudo apt-get update && sudo apt-get install -y ninja-build ninja-build python3-pip python3-setuptools python3-wheel libc6:mips64el gcc-10-mips64el-linux-gnuabi64 g++-10-mips64el-linux-gnuabi64 qemu-user-static # displayName: Install APT Dependencies # - script: pip3 install meson # displayName: Install pip Dependencies # - script: /home/vsts/.local/bin/meson setup build --cross-file=docker/cross-files/loongson-gcc-10.cross # displayName: Configure # - script: ninja -C build -v # displayName: Build # - script: ninja -C build -v test # displayName: Test simde-0.7.2/.circleci/000077500000000000000000000000001400333146700144555ustar00rootroot00000000000000simde-0.7.2/.circleci/config.yml000066400000000000000000000073551400333146700164570ustar00rootroot00000000000000version: 2.1 jobs: gcc: docker: - image: debian:testing-slim auth: username: $DOCKERHUB_USERNAME password: $DOCKERHUB_PASSWORD environment: CFLAGS: -Wextra -Werror -march=native CXXFLAGS: -Wextra -Werror -march=native steps: - checkout - run: cat /proc/cpuinfo /proc/meminfo - run: apt-get update && apt-get install -y ninja-build ninja-build python3-pip python3-setuptools python3-wheel gcovr - run: apt-get install -y gcc g++ - run: pip3 install meson - run: CC=gcc CXX=g++ /usr/local/bin/meson setup build -Db_coverage=true - run: ninja -C build -v -j 3 - run: ninja -C build -v test clang: docker: - image: debian:testing-slim auth: username: $DOCKERHUB_USERNAME password: $DOCKERHUB_PASSWORD environment: CFLAGS: -Weverything -Werror -march=native CXXFLAGS: -Weverything -Werror -march=native steps: - checkout - run: cat /proc/cpuinfo /proc/meminfo - run: apt-get update && apt-get install -y ninja-build ninja-build python3-pip python3-setuptools python3-wheel gcovr - run: apt-get install -y clang - run: pip3 install meson - run: CC=clang CXX=clang++ /usr/local/bin/meson setup build -Db_coverage=true - run: ninja -C build -v -j 3 - run: ninja -C build -v test loongson: docker: - image: debian:testing-slim auth: username: $DOCKERHUB_USERNAME password: $DOCKERHUB_PASSWORD environment: CC: mips64el-linux-gnuabi64-gcc CXX: mips64el-linux-gnuabi64-g++ CFLAGS: -Wextra -Werror CXXFLAGS: -Wextra -Werror steps: - checkout - run: cat /proc/cpuinfo /proc/meminfo - run: dpkg --add-architecture mips64el - run: apt-get update && apt-get install -y ninja-build ninja-build python3-pip python3-setuptools python3-wheel gcovr - run: apt-get install -y gcc-mips64el-linux-gnuabi64 g++-mips64el-linux-gnuabi64 qemu-user-static libc6-dev:mips64el libstdc++6:mips64el - run: pip3 install meson - run: /usr/local/bin/meson setup build -Db_coverage=true --cross-file=docker/cross-files/loongson-gcc.cross - run: ninja -C build -v -j 3 # - run: ninja -C build -v -j 3 test i686: docker: - image: debian:testing-slim auth: username: $DOCKERHUB_USERNAME password: $DOCKERHUB_PASSWORD environment: CFLAGS: -Wextra -Werror -march=native CXXFLAGS: -Wextra -Werror -march=native steps: - checkout - run: name: System Information command: cat /proc/cpuinfo /proc/meminfo - run: name: Install architecture command: | dpkg --add-architecture i386 - run: apt-get update && apt-get install -y ninja-build ninja-build python3-pip python3-setuptools python3-wheel gcovr - run: apt-get install -y {gcc,g++}-i686-linux-gnu libc6-dev:i386 - run: pip3 install meson - run: CC=x86_64-linux-gnu-gcc CXX=x86_64-linux-gnu-g++ /usr/local/bin/meson setup build -Db_coverage=true - run: ninja -C build -v -j 3 - run: ninja -C build -v test workflows: version: 2 test: jobs: - gcc: filters: branches: ignore: - master - /^ci/(?!circleci).*$/ - clang: filters: branches: ignore: - master - /^ci/(?!circleci).*$/ - loongson: filters: branches: ignore: - master - /^ci/(?!circleci).*$/ - i686: filters: branches: ignore: - master - /^ci/(?!circleci).*$/ simde-0.7.2/.cirrus.yml000066400000000000000000000013131400333146700147300ustar00rootroot00000000000000container: image: debian:testing-slim task: name: Sanitizers skip: $BRANCH == "master" || ($BRANCH != 'ci/cirrus' && $BRANCH =~ '^ci/.+') env: CC: clang CXX: clang++ CFLAGS: -Wextra -Werror -march=native CXXFLAGS: -Wextra -Werror -march=native system_info_script: - cat /proc/cpuinfo - cat /proc/meminfo install_dependencies_script: - apt-get update - apt-get install -y ninja-build ninja-build python3-pip python3-setuptools python3-wheel gcovr clang - pip3 install meson configure_script: - /usr/local/bin/meson setup build -Db_coverage=true -Db_sanitize=address,undefined build_script: - ninja -C build -v -j 3 test_script: - ninja -C build -v test simde-0.7.2/.drone.star000066400000000000000000000067721400333146700147160ustar00rootroot00000000000000# -*- Python -*- # Drone CI Starlark configuration file. # https://docs.drone.io/pipeline/scripting/starlark/ # Run `drone starlark convert --stdout` to verify `.drone.star`. def main(ctx): jobs = [] for arch in ['aarch64', 'armv8', 'armv7']: drone_arch = 'arm' if arch == 'aarch64': drone_arch = 'arm64' for compiler in ['gcc', 'clang']: cc = compiler cxx = compiler + '++' compiler_flags = [] packages = ['ninja-build', 'git-core', 'python3-pip', 'gcovr'] if arch == 'armv7': compiler_flags.extend(['-march=armv7-a', '-mfpu=neon']) elif arch == 'armv8': compiler_flags.extend(['-march=armv8-a', '-mfpu=neon']) elif arch == 'aarch64': compiler_flags.extend(['-march=armv8-a+simd+crypto+crc']) if compiler == 'gcc': cxx = 'g++' compiler_flags.extend(['-Wextra', '-Werror']) packages.extend(['gcc', 'g++']) elif compiler == 'clang': compiler_flags.extend(['-Weverything', '-Werror']) packages.extend(['clang']) cflags = ' '.join(compiler_flags) cxxflags = ' '.join(compiler_flags) environment = { "CC": cc, "CXX": cxx, "CFLAGS": cflags, "CXXFLAGS": cxxflags, "DEBIAN_FRONTEND": "noninteractive", } exclude_branches = ['master'] for provider in ['gha', 'cirrus', 'semaphore', 'circleci', 'appveyor', 'azure', 'travis']: exclude_branches.append('ci/' + provider) exclude_branches.append('ci/' + provider + '/**') jobs.append({ "kind": "pipeline", "type": "docker", "name": compiler + ' ' + arch, "platform": { "os": "linux", "arch": drone_arch }, "steps": [ { "name": "test", "image": "ubuntu:bionic", "environment": environment, "commands": [ "cat /proc/cpuinfo", "apt-get update -y", "apt-get install -y " + " ".join(packages), "pip3 install meson", "meson build -Db_coverage=true || (cat build/meson-logs/meson-log.txt; false)", "ninja -C build -v test", ], } ], "trigger": { "branch": { "exclude": exclude_branches } }, }) # jobs.append({ # "kind": "pipeline", # "type": "docker", # "name": "native aliases", # "platform": { # "os": "linux", # "arch": "arm64" # }, # "steps": [ # { # "name": "test", # "image": "ubuntu:bionic", # "environment": { # "CC": "clang", # "CXX": "clang++", # "CFLAGS": "-march=armv8a+simd+crypto+crc -Weverything -Wextra -DSIMDE_ENABLE_NATIVE_ALIASES -DSIMDE_NATIVE_ALIASES_TESTING", # "CXXFLAGS": "-march=armv8a+simd+crypto+crc -Weverything -Wextra -DSIMDE_ENABLE_NATIVE_ALIASES -DSIMDE_NATIVE_ALIASES_TESTING", # "DEBIAN_FRONTEND": "noninteractive", # }, # "commands": [ # "cat /proc/cpuinfo", # "apt-get update -y", # "apt-get install -y " + " ".join(['ninja-build', 'git-core', 'python3-pip', 'gcovr', 'clang', 'curl']), # "pip3 install meson", # "(cd test && sh -x ./native-aliases.sh)", # "meson build -Db_coverage=true", # "ninja -C build -v test", # ], # }, # ], # "trigger": { # "branch": { # "exclude": exclude_branches # } # } # }) return jobs simde-0.7.2/.editorconfig000066400000000000000000000010111400333146700152700ustar00rootroot00000000000000# Configuration file for EditorConfig # More information is available under http://EditorConfig.org # Ignore any other files further up in the file system root = true # Configuration for all files [*] # Enforce Unix style line endings (\n only) end_of_line = lf # Always end files with a blank line insert_final_newline = true # Force space characters for indentation indent_style = space # Always indent by 2 characters indent_size = 2 # Remove whitespace characters at the end of line trim_trailing_whitespace = true simde-0.7.2/.github/000077500000000000000000000000001400333146700141625ustar00rootroot00000000000000simde-0.7.2/.github/workflows/000077500000000000000000000000001400333146700162175ustar00rootroot00000000000000simde-0.7.2/.github/workflows/ci.yml000066400000000000000000000357711400333146700173520ustar00rootroot00000000000000name: CI on: push: branches-ignore: - 'master' - 'ci/**' - '!ci/gha**' pull_request: branches: - 'master' jobs: formatting: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 with: fetch-depth: 2 - name: Install pcre2grep run: sudo apt-get update && sudo apt-get install -y pcre2-utils # Check for trailing whitespace - name: Trailing whitespace run: find simde/ -name '*.c' -o -name '*.h' -exec grep -nP '\s+$' {} + && exit 1 || exit 0 # We use spaces, not tabs. I don't want to start a holy war here; # I don't actually have a strong preference between the two, but I # do have a strong preference for consistency, so don't @ me. - name: Tabs run: find simde/ -name '*.c' -o -name '*.h' -exec grep -nP '\t' {} + && exit 1 || exit 0 # s/8/16/ will result in this if the input is x86. - name: Bad substitutions run: git grep -i 'x''1''6''6' && exit 1 || exit 0 - name: Incorrect assertions in test/ run: grep -PR '(?<=[^a-zA-Z0-9_])simde_assert_u?int(8|16|32|64)(?>[^a-zA-Z0-9_])' test/ && exit 1 || exit 0 # Check to make sure no source files have the executable bit set - name: Executable sources run: find \( -name '*.c' -o -name '*.h' \) -executable | grep -q '.' && exit 1 || exit 0 # Make sure neon.h includes all the NEON headers. - name: Missing NEON includes run: for f in simde/arm/neon/*.h; do grep -q "include \"neon/$(basename "$f")\"" simde/arm/neon.h || (echo "Missing $f" && exit 1); done # Make sure we can find the expected header guards. It's easy to miss this when doing C&P - name: Header guards run: for file in $(find simde/*/ -name '*.h'); do grep -q "$(echo "$file" | tr '[:lower:]' '[:upper:]' | tr '[:punct:]' '_')" "$file" || (echo "Missing or incorrect header guard in $file" && exit 1); done # There should be an empty line at the end of every file - name: Newline at EOF run: for file in $(find simde -name '*.h'); do if [ ! -z "$(tail -c 1 "$file")" ]; then echo "No newline at end of $file" && exit 1; fi; done # Don't #ifndef ; use !defined(...) instead. ifndef leads to annoying inconsistencies - name: ifndef run: for file in $(find simde -name '*.h'); do grep -qP '^ *# *ifndef ' "${file}" && exit 1 || exit 0; done # List of headers we want Meson to install - name: Meson install headers run: for file in $(find simde -name '*.h'); do grep -qF "$(basename "${file}" .h)" meson.build || (echo "${file} missing from top-level meson.build" && exit 1); done # Make sure we don't accidentally use `vector ...` instead of SIMDE_POWER_ALTIVEC_VECTOR(...) - name: AltiVec raw vector keyword run: find simde/ -name '*.c' -o -name '*.h' -exec grep -nP 'vector( +)((bool|signed|unsigned) +)?(double|float|long long|long|int|short|char)' {} + && exit 1 || exit 0 # Check indentation of preprocessor directives. - name: Preprocessor directive indentation run: find simde/*/ -name 'avx*.h' -exec pcre2grep -M '{\n#' {} + && exit 1 || exit 0 # Running the tests in SDE is pretty slow, so we put these early # in the file so hopefully they start sooner. SDE: runs-on: ubuntu-latest strategy: matrix: arch: ["icelake-server"] env: CC: clang CXX: clang++ CFLAGS: -march=icelake-server -Wall -Wextra -Werror CXXFLAGS: -march=icelake-server -Wall -Wextra -Werror steps: - uses: actions/checkout@v2 with: submodules: recursive - name: CPU Information run: cat /proc/cpuinfo - name: Install APT Dependencies run: sudo add-apt-repository 'ppa:ubuntu-toolchain-r/test' && sudo apt-get update && sudo apt-get install -y ninja-build ninja-build python3-pip python3-setuptools python3-wheel parallel gcovr - name: Install pip Dependencies run: pip3 install meson - name: Download SDE run: ./test/download-sde.sh ~/sde - name: Configure run: ~/.local/bin/meson setup build -Db_coverage=true - name: Build run: ninja -C build -v - name: Test run: | find build/test/ -type f -executable -exec parallel ~/sde/sde64 -- ::: {} \; - name: Coverage Report run: ninja -C build -v coverage-xml - name: CodeCov.io uses: codecov/codecov-action@v1 with: file: ./build/meson-logs/coverage.xml sleef-SDE: runs-on: ubuntu-20.04 strategy: matrix: arch: ["icelake-server"] env: CC: clang CXX: clang++ CFLAGS: -march=icelake-server -Wall -Wextra -Werror CXXFLAGS: -march=icelake-server -Wall -Wextra -Werror steps: - uses: actions/checkout@v2 with: submodules: recursive - name: CPU Information run: cat /proc/cpuinfo - name: Install APT Dependencies run: sudo add-apt-repository 'ppa:ubuntu-toolchain-r/test' && sudo apt-get update && sudo apt-get install -y ninja-build ninja-build python3-pip python3-setuptools python3-wheel parallel gcovr libsleef-dev - name: Install pip Dependencies run: pip3 install meson - name: Download SDE run: ./test/download-sde.sh ~/sde - name: Configure run: ~/.local/bin/meson setup build -Dsleef=enabled -Db_coverage=true - name: Build run: ninja -C build -v - name: Test run: | find build/test/ -type f -executable -exec parallel ~/sde/sde64 -- ::: {} \; - name: Coverage Report run: ninja -C build -v coverage-xml - name: CodeCov.io uses: codecov/codecov-action@v1 with: file: ./build/meson-logs/coverage.xml x86: runs-on: ubuntu-latest strategy: matrix: isax: ["", "-msse3", "-mssse3", "-msse4.1", "-msse4.2", "-mavx", "-mfma", "-mavx2", "-march=native"] env: CFLAGS: -Wall -Wextra -Werror ${{ matrix.isax }} CXXFLAGS: -Wall -Wextra -Werror ${{ matrix.isax }} steps: - uses: actions/checkout@v2 with: submodules: recursive - name: CPU Information run: cat /proc/cpuinfo - name: Install APT Dependencies run: sudo add-apt-repository 'ppa:ubuntu-toolchain-r/test' && sudo apt-get update && sudo apt-get install -y ninja-build ninja-build python3-pip python3-setuptools python3-wheel parallel gcovr - name: Install pip Dependencies run: pip3 install meson - name: Configure run: ~/.local/bin/meson setup build -Db_coverage=true - name: Build run: ninja -C build -v - name: Test run: ninja -C build -v test - name: Coverage Report run: ninja -C build -v coverage-xml - name: CodeCov.io uses: codecov/codecov-action@v1 with: file: ./build/meson-logs/coverage.xml avx512: runs-on: ubuntu-latest strategy: matrix: isax: ["-mavx512f", "-mavx512bw", "-mavx512cd", "-mavx512dq", "-mavx512vl", "-mavx512bw -mavx512vl", "-mavx512vl -mavx512dq"] env: CFLAGS: -Wall -Wextra -Werror ${{ matrix.isax }} CXXFLAGS: -Wall -Wextra -Werror ${{ matrix.isax }} steps: - uses: actions/checkout@v2 with: submodules: recursive - name: CPU Information run: cat /proc/cpuinfo - name: Install APT Dependencies run: sudo add-apt-repository 'ppa:ubuntu-toolchain-r/test' && sudo apt-get update && sudo apt-get install -y ninja-build ninja-build python3-pip python3-setuptools python3-wheel parallel gcovr - name: Install pip Dependencies run: pip3 install meson - name: Configure run: ~/.local/bin/meson setup build -Db_coverage=true - name: Build run: ninja -C build -v # We can't actually run the tests without AVX-512. GitHub Actions doesn't # currently support it, and SDE is too slow. # - name: Test # run: ninja -C build -v test # - name: Coverage Report # run: ninja -C build -v coverage-xml # - name: CodeCov.io # uses: codecov/codecov-action@v1 # with: # file: ./build/meson-logs/coverage.xml emscripten: runs-on: ubuntu-latest env: CFLAGS: -Weverything -Werror -O3 -msimd128 CXXFLAGS: -Weverything -Werror -O3 -msimd128 steps: - uses: actions/checkout@v2 with: submodules: recursive - name: CPU Information run: cat /proc/cpuinfo - name: Install APT Dependencies run: sudo add-apt-repository ppa:ubuntu-toolchain-r/test && sudo apt-get update && sudo apt-get -yq install ninja-build - name: Install emscripten run: | git clone https://github.com/emscripten-core/emsdk.git cd emsdk ./emsdk update-tags ./emsdk install tot ./emsdk activate tot source emsdk_env.sh - name: Install v8 run: sudo npm install jsvu -g && jsvu --os=linux64 --engines=v8 - name: Configure run: mkdir test/build && cd test/build && ../../emsdk/upstream/emscripten/emcmake cmake -G Ninja .. - name: Build run: ninja -C test/build -v - name: Test run: cd test/build && ~/.jsvu/v8 --experimental-wasm-simd run-tests.js native-aliases: runs-on: ubuntu-latest env: CC: gcc-10 CXX: g++-10 CFLAGS: -DSIMDE_ENABLE_NATIVE_ALIASES -DSIMDE_NATIVE_ALIASES_TESTING -march=native -Wall -Wextra -Werror CXXFLAGS: -DSIMDE_ENABLE_NATIVE_ALIASES -DSIMDE_NATIVE_ALIASES_TESTING -march=native -Wall -Wextra -Werror steps: - uses: actions/checkout@v2 with: submodules: recursive - name: CPU Information run: cat /proc/cpuinfo - name: Install APT Dependencies run: sudo add-apt-repository ppa:ubuntu-toolchain-r/test && sudo apt-get update && sudo apt-get -yq install libxml2-utils ninja-build python3-pip python3-setuptools python3-wheel gcc-10 g++-10 parallel gcovr - name: Install pip Dependencies run: pip3 install meson - name: Convert run: ./test/native-aliases.sh - name: Configure run: ~/.local/bin/meson setup build -Db_coverage=true - name: Build run: ninja -C build -v - name: Test run: ninja -C build -v test - name: Coverage Report run: ninja -C build -v coverage-xml - name: CodeCov.io uses: codecov/codecov-action@v1 with: file: ./build/meson-logs/coverage.xml sleef: runs-on: ubuntu-20.04 env: CFLAGS: -march=native -Wall -Wextra -Werror CXXFLAGS: -march=native -Wall -Wextra -Werror steps: - uses: actions/checkout@v2 with: submodules: recursive - name: CPU Information run: cat /proc/cpuinfo - name: Install APT Dependencies run: sudo add-apt-repository 'ppa:ubuntu-toolchain-r/test' && sudo apt-get update && sudo apt-get install -y ninja-build ninja-build python3-pip python3-setuptools python3-wheel parallel gcovr libsleef-dev - name: Install pip Dependencies run: pip3 install meson - name: Configure run: ~/.local/bin/meson setup build -Dsleef=enabled -Db_coverage=true - name: Build run: ninja -C build -v - name: Test run: ninja -C build -v test - name: Coverage Report run: ninja -C build -v coverage-xml - name: CodeCov.io uses: codecov/codecov-action@v1 with: file: ./build/meson-logs/coverage.xml xenial-gcc: runs-on: ubuntu-16.04 strategy: matrix: version: ["4.7", "4.8", "4.9", "5"] env: CC: gcc-${{ matrix.version }} CXX: g++-${{ matrix.version }} CFLAGS: -mavx2 -Wall -Wextra -Werror CXXFLAGS: -mavx2 -Wall -Wextra -Werror steps: - uses: actions/checkout@v2 with: submodules: recursive - name: CPU Information run: cat /proc/cpuinfo - name: Install APT Dependencies run: sudo add-apt-repository ppa:ubuntu-toolchain-r/test && sudo apt-get update && sudo apt-get -yq install ninja-build parallel "${CC}" "${CXX}" - name: Configure run: mkdir test/build && cd test/build && cmake -G Ninja .. - name: Build run: ninja -C test/build -v - name: Test run: ninja -C test/build -v test xenial-clang: runs-on: ubuntu-16.04 strategy: matrix: version: ["3.5", "3.9", "6.0"] env: CC: clang-${{ matrix.version }} CXX: clang++-${{ matrix.version }} CFLAGS: -mavx2 -Wall -Wextra -Werror CXXFLAGS: -mavx2 -Wall -Wextra -Werror steps: - uses: actions/checkout@v2 with: submodules: recursive - name: CPU Information run: cat /proc/cpuinfo - name: Install APT Dependencies run: sudo add-apt-repository ppa:ubuntu-toolchain-r/test && sudo apt-get update && sudo apt-get -yq install ninja-build parallel clang-${{ matrix.version }} - name: Configure run: mkdir test/build && cd test/build && cmake -G Ninja .. - name: Build run: ninja -C test/build -v - name: Test run: ninja -C test/build -v test macos: runs-on: macos-latest strategy: matrix: # https://www.jessesquires.com/blog/2020/01/06/selecting-an-xcode-version-on-github-ci/ # https://github.com/actions/virtual-environments/blob/master/images/macos/macos-10.15-Readme.md#xcode xcode: ["11.3.1", "11.7", "12.3"] env: DEVELOPER_DIR: /Applications/Xcode_${{ matrix.xcode }}.app steps: - uses: actions/checkout@v2 with: submodules: recursive - name: System Information run: system_profiler - name: Compiler version run: cc --version - name: Install Homebrew Dependencies run: brew install meson ninja gcovr - name: Configure run: meson setup build -Db_coverage=true - name: Build run: ninja -C build -v - name: Coverage Report run: ninja -C build -v coverage-xml - name: CodeCov.io uses: codecov/codecov-action@v1 with: file: ./build/meson-logs/coverage.xml icc: runs-on: ubuntu-latest env: CC: /home/runner/.local/bin/icc CXX: /home/runner/.local/bin/icpc CFLAGS: -Werror -wd13200 -wd13203 -wd16219 -Wall -Werror -march=native CXXFLAGS: -wd13200 -wd13203 -wd16219 -Wall -Werror -march=native steps: - uses: actions/checkout@v2 - name: CPU Information run: cat /proc/cpuinfo - name: Install APT Dependencies run: sudo apt-get install -y ninja-build ninja-build python3-pip python3-setuptools python3-wheel - name: Install ICC run: | curl -s https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB | sudo apt-key add - sudo add-apt-repository 'deb https://apt.repos.intel.com/oneapi all main' sudo apt-get update sudo apt-get install -y intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic ninja-build ninja-build python3-pip python3-setuptools python3-wheel mkdir -p ~/.local/bin/ || true for exe in icc icpc; do printf '#!/bin/bash\nARGS="$@"\nsource /opt/intel/oneapi/compiler/latest/env/vars.sh >/dev/null\n%s ${ARGS}\n' "${exe}" > ~/.local/bin/"${exe}" chmod 0755 ~/.local/bin/"${exe}"; done - name: Install pip Dependencies run: pip3 install meson - name: Configure run: ~/.local/bin/meson setup build - name: Build run: ninja -C build -v - name: Test run: ninja -C build -v test simde-0.7.2/.github/workflows/codeql-analysis.yml000066400000000000000000000033061400333146700220340ustar00rootroot00000000000000name: "CodeQL" on: push: branches: [master] pull_request: # The branches below must be a subset of the branches above branches: [master] schedule: - cron: '0 8 * * 3' jobs: analyze: name: Analyze runs-on: ubuntu-latest strategy: fail-fast: false matrix: # Override automatic language detection by changing the below list # Supported options are ['csharp', 'cpp', 'go', 'java', 'javascript', 'python'] language: ['cpp'] # Learn more... # https://docs.github.com/en/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#overriding-automatic-language-detection steps: - name: Checkout repository uses: actions/checkout@v2 with: # We must fetch at least the immediate parents so that if this is # a pull request then we can checkout the head. fetch-depth: 2 # If this run was triggered by a pull request event, then checkout # the head of the pull request instead of the merge commit. - run: git checkout HEAD^2 if: ${{ github.event_name == 'pull_request' }} # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL uses: github/codeql-action/init@v1 with: languages: ${{ matrix.language }} - run: | sudo add-apt-repository 'ppa:ubuntu-toolchain-r/test' && sudo apt-get update && sudo apt-get install -y ninja-build ninja-build python3-pip python3-setuptools python3-wheel gcc-10 g++-10 pip3 install meson $HOME/.local/bin/meson setup build ninja -C build -v ninja -C build -v test - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@v1 simde-0.7.2/.github/workflows/ossar-analysis.yml000066400000000000000000000025121400333146700217120ustar00rootroot00000000000000# This workflow integrates a collection of open source static analysis tools # with GitHub code scanning. For documentation, or to provide feedback, visit # https://github.com/github/ossar-action name: OSSAR on: push: branches: [master] jobs: OSSAR-Scan: # OSSAR runs on windows-latest. # ubuntu-latest and macos-latest support coming soon runs-on: windows-latest steps: # Checkout your code repository to scan - name: Checkout repository uses: actions/checkout@v2 with: # We must fetch at least the immediate parents so that if this is # a pull request then we can checkout the head. fetch-depth: 2 # If this run was triggered by a pull request event, then checkout # the head of the pull request instead of the merge commit. - run: git checkout HEAD^2 if: ${{ github.event_name == 'pull_request' }} # Install dotnet, used by OSSAR - name: Install .NET uses: actions/setup-dotnet@v1 with: dotnet-version: '3.1.201' # Run open source static analysis tools - name: Run OSSAR uses: github/ossar-action@v1 id: ossar # Upload results to the Security tab - name: Upload OSSAR results uses: github/codeql-action/upload-sarif@v1 with: sarif_file: ${{ steps.ossar.outputs.sarifFile }} simde-0.7.2/.gitignore000066400000000000000000000000131400333146700146040ustar00rootroot00000000000000*~ /build* simde-0.7.2/.gitmodules000066400000000000000000000001221400333146700147720ustar00rootroot00000000000000[submodule "munit"] path = test/munit url = https://github.com/nemequ/munit.git simde-0.7.2/.no-test/000077500000000000000000000000001400333146700142715ustar00rootroot00000000000000simde-0.7.2/.no-test/README.md000066400000000000000000000006531400333146700155540ustar00rootroot00000000000000# SIMDe Without Test Cases This repository contains only the core of [SIMDe](https://github.com/simd-everywhere/simde/simde). It is generated automatically for every commit to master, and is intended to be used as a submodule in projects which don't want to include the (rather large) test cases. All development work happens in the main repository, please do not file issues or create pull requests against this repository. simde-0.7.2/.semaphore/000077500000000000000000000000001400333146700146635ustar00rootroot00000000000000simde-0.7.2/.semaphore/semaphore.yml000066400000000000000000000042731400333146700173770ustar00rootroot00000000000000version: v1.0 name: Build agent: machine: type: e1-standard-2 os_image: ubuntu1804 blocks: - name: CI skip: when: "branch = 'master' OR (branch !~ '^ci/semaphore(/.*)?' AND branch =~ '^ci/.+')" task: jobs: - name: GCC matrix: - env_var: GCC_VERSION values: ['8', '7', '6', '5'] commands: - cat /proc/cpuinfo - checkout - git submodule update --init - sudo apt-get update - sudo apt-get install -y ninja-build python3-pip python3-setuptools gcovr gcc-${GCC_VERSION} g++-${GCC_VERSION} - pip3 install meson - mkdir build - CC="gcc-${GCC_VERSION}" CXX="g++-${GCC_VERSION}" ~/.local/bin/meson setup build -Db_coverage=true --optimization $(expr $RANDOM % 3) - ninja -C build -v - './build/test/run-tests --list | grep -oP ''^/([^\/]+)/([^\/]+)'' | sort -u | xargs parallel ./build/test/run-tests --color always {} :::' - ninja -C build coverage-xml env_vars: - name: CFLAGS value: '-Wextra -Werror -march=native' - name: CXXFLAGS value: '-Wextra -Werror -march=native' - name: clang matrix: - env_var: CLANG_VERSION values: ['10', '8', '6.0', '4.0', '3.9'] commands: - cat /proc/cpuinfo - checkout - git submodule update --init - sudo apt-get update - sudo apt-get install -y ninja-build python3-pip python3-setuptools gcovr clang-${CLANG_VERSION} - pip3 install meson - mkdir build - CC="clang-${CLANG_VERSION}" CXX="clang++-${CLANG_VERSION}" ~/.local/bin/meson setup build -Db_coverage=true - ninja -C build -v - './build/test/run-tests --list | grep -oP ''^/([^\/]+)/([^\/]+)'' | sort -u | xargs parallel ./build/test/run-tests --color always {} :::' - ninja -C build coverage-xml env_vars: - name: CFLAGS value: -Weverything -Werror -march=native - name: CXXFLAGS value: -Weverything -Werror -march=native simde-0.7.2/.travis.yml000066400000000000000000000163411400333146700147400ustar00rootroot00000000000000language: c dist: bionic cache: ccache # env: # global: # - ARCH_FLAGS="-march=native" # - BUILD_CPP_TESTS=ON # - CMAKE_GENERATOR='Ninja' # - RUN_TESTS=true # - OPTIMIZATION_FLAGS='' # - DIAGNOSTIC_FLAGS='-Wall -Wextra -Werror' # - TEST_ARG_SEPARATOR="" jobs: include: # - name: "gcc-8 x86" # if: branch != master OR type == pull_request # arch: amd64 # compiler: gcc-8 # env: # - C_COMPILER=gcc-8 # - CXX_COMPILER=g++-8 # - ARCH_FLAGS="-m32" # addons: # apt: # packages: # - libc6:i386 # - libc6-dev:i386 # - gcc-8:i386 # - g++-8:i386 # - cpp-8:i386 # - binutils:i386 # - binutils-i686-linux-gnu:i386 # - name: "aarch64" # if: branch != master OR type == pull_request # arch: arm64 # compiler: gcc-10 # env: # - C_COMPILER=gcc-10 # - CXX_COMPILER=g++-10 # - ARCH_FLAGS="-march=armv8-a+simd+crc" # - OPTIMIZATION_FLAGS="-O1" # addons: # apt: # sources: # - sourceline: "ppa:ubuntu-toolchain-r/test" # key_url: https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x60c317803a41ba51845e371a1e9377a2ba9ef27f # packages: # - gcc-10 # - g++-10 # - name: "ppc64le" # if: branch != master OR type == pull_request # arch: ppc64le # compiler: gcc-10 # env: # - ARCH_FLAGS="-mcpu=native" # - C_COMPILER=gcc-10 # - CXX_COMPILER=g++-10 # - OPTIMIZATION_FLAGS=-O2 # addons: # apt: # packages: # - gcc-10 # - g++-10 # addons: # apt: # sources: # - sourceline: "ppa:ubuntu-toolchain-r/test" # key_url: https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x60c317803a41ba51845e371a1e9377a2ba9ef27f # packages: # - gcc-10 # - g++-10 # - name: "MIPS Loongson-MMI (Compile Only)" # if: branch != master OR type == pull_request # arch: amd64 # compiler: mips64el-linux-gnuabi64-gcc # env: # - C_COMPILER=mips64el-linux-gnuabi64-gcc # - CXX_COMPILER=mips64el-linux-gnuabi64-g++ # - ARCH_FLAGS="-march=loongson3a" # - RUN_TESTS=false # addons: # apt: # packages: # - gcc-mips64el-linux-gnuabi64 # - g++-mips64el-linux-gnuabi64 # # These next two ere largely to detect missing AVX-512 functions # # since Travis is the only CI right now with AVX-512 hardware and SDE # # is too slow. # - name: "gcc-6" # dist: bionic # if: branch != master OR type == pull_request # env: # - C_COMPILER=gcc-6 # - CXX_COMPILER=g++-6 # addons: # apt: # packages: # - gcc-6 # - g++-6 # - name: "clang-3.5" # dist: xenial # if: branch != master OR type == pull_request # env: # - C_COMPILER=clang-3.5 # - CXX_COMPILER=clang++-3.5 # addons: # apt: # packages: # - clang-3.5 # # - name: "pgcc" # # if: branch != master OR type == pull_request # # env: # # - C_COMPILER=pgcc # # - CXX_COMPILER=pgc++ # # - ARCH_FLAGS="-m64" # # - BUILD_CPP_TESTS=OFF # # - DIAGNOSTIC_FLAGS= # # install: # # - curl 'https://raw.githubusercontent.com/nemequ/pgi-travis/master/install-pgi.sh' | /bin/sh # - name: "xlc" # if: branch != master OR type == pull_request # arch: ppc64le # install: # - wget -q https://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/public.gpg -O- | sudo apt-key add - # - echo "deb https://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/ ${TRAVIS_DIST} main" >> /etc/apt/sources.list # - sudo apt-get update # - export XLC_VERSION="$(apt-cache search '^xlc\.[0-9]+\.[0-9]+\.[0-9]+$' | awk '{ print substr($1, 5) }')" # - sudo apt-get install "xlc.${XLC_VERSION}" "xlc-license-community.${XLC_VERSION}" # - sudo /opt/ibm/xlC/${XLC_VERSION}/bin/xlc_configure <<< 1 >/dev/null # env: # - ARCH_FLAGS=-qarch=auto # - C_COMPILER=xlc # - CXX_COMPILER=xlc++ # - name: msvc x86_64 # if: branch != master OR type == pull_request # os: windows # env: # - ARCH_FLAGS="/arch:AVX2" # - CMAKE_GENERATOR="Visual Studio 15 2017 Win64" # - OPTIMIZATION_FLAGS="/Ox" # - DIAGNOSTIC_FLAGS="/W4 /WX" # - name: msvc arm # if: branch != master OR type == pull_request # os: windows # env: # - ARCH_FLAGS="" # - CMAKE_GENERATOR="Visual Studio 15 2017 ARM" # - RUN_TESTS=false # - OPTIMIZATION_FLAGS="/Ox" # - DIAGNOSTIC_FLAGS="/W4 /WX" # - name: s390x # if: branch != master OR type == pull_request # arch: s390x ## ## Special builds to deploy from master ## - name: push-to-no-tests if: branch = master AND type != pull_request AND repo = simd-everywhere/simde addons: apt: packages: - python3-pip - python3-setuptools before_install: - git fetch --unshallow - pip3 install git-filter-repo script: - | git filter-repo --force --path-rename COPYING:.no-test/COPYING && \ git filter-repo --path-rename '.no-test/':'simde/' && \ git filter-repo --subdirectory-filter simde after_script: - git push -q "https://${NO_TESTS_TOKEN}@github.com/simd-everywhere/simde-no-tests" master # allow_failures: # - name: msvc x86 # before_install: # - | # if [ "${CMAKE_GENERATOR}" = "Ninja" ]; then # case "${TRAVIS_OS_NAME}" in # "linux") # sudo apt-get install -y ninja-build parallel # ;; # "osx") # brew install ninja # ;; # "windows") # choco install ninja # ;; # esac # fi # - cat /proc/cpuinfo || true # - cat /proc/meminfo || true # ## If we use the matrix to set CC/CXX Travis overwrites the values, # ## so instead we use C/CXX_COMPILER, then copy the values to CC/CXX # ## here (after Travis has set CC/CXX). # - | # if [ -n "${C_COMPILER}" ]; then # export CC="${C_COMPILER}" # fi # if [ -n "${CXX_COMPILER}" ]; then # export CXX="${CXX_COMPILER}" # fi # script: # - mkdir "${TRAVIS_BUILD_DIR}/test/build" && cd "${TRAVIS_BUILD_DIR}/test/build" # - | # ${CONFIGURE_WRAPPER} cmake .. \ # -G "${CMAKE_GENERATOR}" \ # -DBUILD_CPP_TESTS=${BUILD_CPP_TESTS} \ # -DCMAKE_CROSSCOMPILING_EMULATOR="${TEST_WRAPPER}" \ # -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON \ # -DCMAKE_C_FLAGS="${ARCH_FLAGS} ${OPTIMIZATION_FLAGS} ${DIAGNOSTIC_FLAGS} ${COMPILER_FLAGS} ${CFLAGS}" \ # -DCMAKE_CXX_FLAGS="${ARCH_FLAGS} ${OPTIMIZATION_FLAGS} ${DIAGNOSTIC_FLAGS} ${COMPILER_FLAGS} ${CXXFLAGS}" \ # ${CMAKE_ARGS} || (cat CMakeFiles/CMakeError.log && false) && \ # ${BUILD_WRAPPER} cmake --build . # - | # if [ "${RUN_TESTS}" = "true" ]; then # if [ "windows" != "${TRAVIS_OS_NAME}" ]; then # if command -v parallel; then # ${TEST_WRAPPER} ./run-tests${EXECUTABLE_EXTENSION} --list | grep -oP '^/(x86|arm/neon)/([^\/]+)' | sort -u | xargs parallel ${TEST_WRAPPER} ./run-tests${EXECUTABLE_EXTENSION} ${TEST_ARG_SEPARATOR} {} ::: || exit 1 # else # ${TEST_WRAPPER} ./run-tests${EXECUTABLE_EXTENSION} || exit 1 # fi # else # ctest --output-on-failure --interactive-debug-mode 0 -C Debug -V || exit 1 # fi # fi notifications: email: false simde-0.7.2/CONTRIBUTING.md000066400000000000000000000053431400333146700150600ustar00rootroot00000000000000# Contributing to SIMDe First off, if you're even reading this, thank you! There is a lot of work to do, and any help is appreciated. If you haven't already, please read the [README](https://github.com/simd-everywhere/simde/blob/master/README.md). The [wiki](https://github.com/simd-everywhere/simde/wiki) also has some good information, especially the [FAQ](https://github.com/simd-everywhere/simde/wiki/FAQ) and a guide on how to [implement a new function](https://github.com/simd-everywhere/simde/wiki/Implementing-a-New-Function). For information on developing for architectures you don't have access to, please see the [Development Environment](https://github.com/simd-everywhere/simde/wiki/Development-Environment) page on the wiki. If you still have questions, or if anything below doesn't make sense to you, please feel free to use the [issue tracker](https://github.com/simd-everywhere/simde/issues) or the [mailing list](https://groups.google.com/forum/#!forum/simde) to ask. I know the SIMDe documentation needs a lot of improvement, and asking questions will help us understand what is missing, so please don't be shy! ## Building the Tests SIMDe contains an extensive test suite used for development. Most users will never need to build the suite, but if you're contributing code to SIMDe you'll need to build them. Here is the basic procedure for compiling and running the tests: ```bash mkdir build cd build CFLAGS="-march=native" CXXFLAGS="-march=native" meson .. ninja test ``` Note that `-march=native` may not be the right flag for your compiler. That should work for most compilers on x86/x86_64, though MSVC is an exception (try `/arch:AVX2` instead of `-march=native`). On other architectures please consult your compiler documentation to find out what flags you should use to enable the SIMD extension for your target platform. Here are a few to try: * ARM: * `-march=armv8-a+simd` (for AArch64) * `-march=armv8-a+simd -mfpu=auto` (for ARMv8) * `-march=armv7-a -mfpu=neon` (for ARMv7) * POWER * `-mcpu=native` If you need a flag not listed above, please let us know so we can add it to the list. You may also want to take a look at the [Docker container](https://github.com/simd-everywhere/simde/tree/master/docker) which has many builds pre-configured, including cross-compilers and emulators. ## Coding Style SIMDe has an [EditorConfig](https://editorconfig.org/) file to configure your editor for things like tabs vs. spaces, how many spaces, etc. If you use an editor which doesn't support it out of the box then odds are good there is a plugin you can download; please do so. For other coding style information, please see the [Coding Style](https://github.com/simd-everywhere/simde/wiki/Coding-Style) document in the Wiki. simde-0.7.2/COPYING000066400000000000000000000020651400333146700136600ustar00rootroot00000000000000Copyright (c) 2017 Evan Nemerson Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. simde-0.7.2/README.md000066400000000000000000000620711400333146700141070ustar00rootroot00000000000000# SIMD Everywhere [![All Contributors](https://img.shields.io/badge/all_contributors-18-orange.svg?style=flat-square)](#contributors-) [![Gitter chat](https://badges.gitter.im/gitterHQ/gitter.png)](https://gitter.im/simd-everywhere/community) The SIMDe header-only library provides fast, portable implementations of [SIMD intrinsics](https://en.wikipedia.org/wiki/SIMD) on hardware which doesn't natively support them, such as calling [SSE](https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions) functions on ARM. There is no performance penalty if the hardware supports the native implementation (*e.g.*, SSE/[AVX](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions) runs at full speed on [x86](https://en.wikipedia.org/wiki/X86), [NEON](https://en.wikipedia.org/wiki/ARM_architecture#Advanced_SIMD_(Neon)) on [ARM](https://en.wikipedia.org/wiki/ARM_architecture), *etc.*). This makes porting code to other architectures much easier in a few key ways: First, instead of forcing you to rewrite everything for each architecture, SIMDe lets you get a port up and running almost effortlessly. You can then start working on switching the most performance-critical sections to native intrinsics, improving performance gradually. SIMDe lets (for example) SSE/AVX and NEON code exist side-by-side, in the same implementation. Second, SIMDe makes it easier to write code targeting [ISA](https://en.wikipedia.org/wiki/Instruction_set_architecture) extensions you don't have convenient access to. You can run NEON code on your x86 machine *without an emulator*. Obviously you'll eventually want to test on the actual hardware you're targeting, but for most development, SIMDe can provide a much easier path. SIMDe takes a very different approach from most other SIMD abstraction layers in that it aims to expose the entire functionality of the underlying instruction set. Instead of limiting functionality to the lowest common denominator, SIMDe tries to minimize the amount of effort required to port while still allowing you the space to optimize as needed. The current focus is on writing complete portable implementations, though a large number of functions already have accelerated implementations using one (or more) of the following: * SIMD intrinsics from other ISA extensions (e.g., using NEON to implement SSE). * Compiler-specific vector extensions and built-ins such as [`__builtin_shufflevector`](http://clang.llvm.org/docs/LanguageExtensions.html#langext-builtin-shufflevector) and [`__builtin_convertvector`](http://clang.llvm.org/docs/LanguageExtensions.html#langext-builtin-convertvector) * Compiler auto-vectorization hints, using: * [OpenMP 4 SIMD](http://www.openmp.org/) * [Cilk Plus](https://www.cilkplus.org/) * [GCC loop-specific pragmas](https://gcc.gnu.org/onlinedocs/gcc/Loop-Specific-Pragmas.html) * [clang pragma loop hint directives](http://llvm.org/docs/Vectorizers.html#pragma-loop-hint-directives) You can [try SIMDe online](https://simde.netlify.app/godbolt/demo) using Compiler Explorer and an amalgamated SIMDe header. If you have any questions, please feel free to use the [issue tracker](https://github.com/simd-everywhere/simde/issues) or the [mailing list](https://groups.google.com/forum/#!forum/simde). ## Current Status There are currently complete implementations of the following instruction sets: * [MMX](https://en.wikipedia.org/wiki/MMX_(instruction_set)) * [SSE](https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions) * [SSE2](https://en.wikipedia.org/wiki/SSE2) * [SSE3](https://en.wikipedia.org/wiki/SSE3) * [SSSE3](https://en.wikipedia.org/wiki/SSSE3) * [SSE4.1](https://en.wikipedia.org/wiki/SSE4#SSE4.1) * [AVX](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions) * [AVX2](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#Advanced_Vector_Extensions_2) * [FMA](https://en.wikipedia.org/wiki/FMA_instruction_set) * [GFNI](https://en.wikipedia.org/wiki/AVX-512#GFNI) * [CLMUL](https://en.wikipedia.org/wiki/CLMUL_instruction_set) * [XOP](https://en.wikipedia.org/wiki/XOP_instruction_set) * [SVML](https://software.intel.com/content/www/us/en/develop/documentation/cpp-compiler-developer-guide-and-reference/top/compiler-reference/intrinsics/intrinsics-for-intel-advanced-vector-extensions-512-intel-avx-512-instructions/intrinsics-for-arithmetic-operations-1/intrinsics-for-short-vector-math-library-svml-operations.html) As well as partial support for many others; see the [instruction-set-support](https://github.com/simd-everywhere/simde/issues?q=is%3Aissue+is%3Aopen+label%3Ainstruction-set-support+sort%3Aupdated-desc) label in the issue tracker for details on progress. If you'd like to be notified when an instruction set is available you may subscribe to the relevant issue. If you have a project you're interested in using with SIMDe but we don't yet support all the functions you need, please file an issue with a list of what's missing so we know what to prioritize. The default branch is protected so commits never reach it unless they have passed extensive CI checks. Status badges don't really make sense since they will always be green, but here are the links: * [GitHub Actions](https://github.com/simd-everywhere/simde/actions) * [Cirrus CI](https://cirrus-ci.com/github/simd-everywhere/simde) * [Semaphore CI](https://nemequ.semaphoreci.com/projects/simde) * [Circle CI](https://app.circleci.com/pipelines/github/simd-everywhere/simde) * [AppVeyor](https://ci.appveyor.com/project/nemequ/simde) * [Azure Pipelines](https://dev.azure.com/simd-everywhere/SIMDe/_build) * [Drone CI](https://cloud.drone.io/simd-everywhere/simde/) * [Travis CI](https://travis-ci.org/simd-everywhere/simde) If you're adding a new build I suggest Cirrus CI, which is where we currently have the most room given the number of builds currently on the platform and the quotas for free/open-source usage. Alternately, feel free to set up another provider (such as [Codefresh](https://codefresh.io/), [Shippable](https://www.shippable.com/), [Bitrise](https://www.bitrise.io/), [Werkaer](https://app.wercker.com/), etc.). *Notice*: we plan on changing the name of the default branch from "master" to something else soon; we are just trying to wait to see what name git settles on so we can be consistent. ## Contributing First off, if you're reading this: thank you! Even considering contributing to SIMDe is very much appreciated! SIMDe is a fairly large undertaking; there are a *lot* of functions to get through and a lot of opportunities for optimization on different platforms, so we're very happy for any help you can provide. Programmers of all skill levels are welcome, there are lots of tasks which are pretty straightforward and don't require any special expertise. If you're not sure how you'd like to contribute, please consider taking a look at [the issue tracker](https://github.com/simd-everywhere/simde/issues). There is a [good first issue](https://github.com/simd-everywhere/simde/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) tag if you want to ease into a your first contributions, but if you're interested in something else please get in touch via the issue tracker; we're happy to help you get a handle on whatever you are interested in. If you're interested in implementing currently unimplemented functions, there is [a guide](https://github.com/simd-everywhere/simde/wiki/Implementing-a-New-Function) explaining how to add new functions and how to quickly and easily get a test case in place. It's a bit rough right now, but if anything is unclear please feel free to use the issue tracker to ask about anything you're not clear on. ## Usage First, it is important to note that *you do not need two separate versions* (one using SIMDe, the other native). If the native functions are available SIMDe will use them, and compilers easily optimize away any overhead from SIMDe; all they have to do is some basic inlining. `-O2` should be enough, but we strongly recommend `-O3` (or whatever flag instructs your compiler to aggressizely optimize) since many of the portable fallbacks are substantially faster with aggressive auto-vectorization that isn't enabled at lower optimization levels. Each instruction set has a separate file; `x86/mmx.h` for MMX, `x86/sse.h` for SSE, `x86/sse2.h` for SSE2, and so on. Just include the header for whichever instruction set(s) you want *instead of the native version* (if you include the native version after SIMDe it will result in compile-time errors if native aliases are enabled). SIMDe will provide the fastest implementation it can given which extensions you've enabled in your compiler (i.e., if you want to use NEON to implement SSE, you may need to pass something like `-mfpu=neon` or `-march=armv8-a+simd`. See [GCC ARM-Options](https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html) for more information). If you define `SIMDE_ENABLE_NATIVE_ALIASES` before including SIMDe you can use the same names as the native functions. Unfortunately, this is somewhat error-prone due to portability issues in the APIs, so it's recommended to only do this for testing. When `SIMDE_ENABLE_NATIVE_ALIASES` is undefined only the versions prefixed with `simde_` will be available; for example, the MMX `_mm_add_pi8` intrinsic becomes `simde_mm_add_pi8`, and `__m64` becomes `simde__m64`. Since SIMDe is meant to be portable, many functions which assume types are of a specific size have been altered to use fixed-width types instead. For example, Intel's APIs use `char` for signed 8-bit integers, but `char` on ARM is generally unsigned. SIMDe uses `int8_t` to make the API portable, but that means your code may require some minor changes (such as using `int8_t` instead of `char`) to work on other platforms. That said, the changes are usually quite minor. It's often enough to just use search and replace, manual changes are required pretty infrequently. For best performance, in addition to `-O3` (or whatever your compiler's equivalent is), you should enable OpenMP 4 SIMD support by defining `SIMDE_ENABLE_OPENMP` before including any SIMDe headers, and enabling OpenMP support in your compiler. GCC and ICC both support a flag to enable only OpenMP SIMD support instead of full OpenMP (the OpenMP SIMD support doesn't require the OpenMP run-time library); for GCC the flag is `-fopenmp-simd` (requires GCC version 4.9 or later), for ICC the flag is `-qopenmp-simd`. SIMDe also supports using [Cilk Plus](https://www.cilkplus.org/), [GCC loop-specific pragmas](https://gcc.gnu.org/onlinedocs/gcc/Loop-Specific-Pragmas.html), or [clang pragma loop hint directives](http://llvm.org/docs/Vectorizers.html#pragma-loop-hint-directives), though these are not nearly as effective as OpenMP SIMD and depending on them will likely result in less efficient code. ## Portability ### Compilers SIMDe does depend on some C99 features, though the subset supported by MSVC also works. While we do our best to make sure we provide optimized implementations where they are supported, SIMDe does contain portable fallbacks which are designed to work on any C99 compiler. Every commit is tested in CI on multiple compilers, platforms, and configurations, and our test coverage is extremely extensive. Currently tested compilers include: * GCC versions back to 4.8 * Clang versions back to 3.8 * Microsoft Visual Studio back to 12 (2013) * IBM XL C/C++ * Intel C/C++ Compiler (ICC) I'm generally willing to accept patches to add support for other compilers, as long as they're not too disruptive, *especially* if we can get CI support going. We currently use Travis CI, AppVeyor, and Microsoft Azure Pipelines, but other CI platforms can be added as necessary. ### Hardware The following architectures are tested in CI for every commit: * x86_64 * x86 * AArch64 * ARMv8 * ARMv7 * PPC64 * MIPS Loongson We would love to add more, so patches are extremely welcome! ## Related Projects * The "builtins" module in [portable-snippets](https://github.com/nemequ/portable-snippets) does much the same thing, but for compiler-specific intrinsics (think `__builtin_clz` and `_BitScanForward`), **not** SIMD intrinsics. * Intel offers an emulator, the [Intel® Software Development Emulator](https://software.intel.com/en-us/articles/intel-software-development-emulator/) which can be used to develop software which uses Intel intrinsics without having to own hardware which supports them, though it doesn't help for deployment. * [Iris](https://github.com/AlexYaruki/iris) is the only other project I'm aware of which is attempting to create portable implementations like SIMDe. SIMDe is much further along on the Intel side, but Iris looks to be in better shape on ARM. C++-only, Apache 2.0 license. AFAICT there are no accelerated fallbacks, nor is there a good way to add them since it relies extensively on templates. * There are a few projects trying to implement one set with another: * [ARM_NEON_2_x86_SSE](https://github.com/intel/ARM_NEON_2_x86_SSE) — implementing NEON using SSE. Quite extensive, Apache 2.0 license. * [sse2neon](https://github.com/jratcliff63367/sse2neon) — implementing SSE using NEON. This code has already been merged into SIMDe. * [veclib](https://github.com/IvantheDugtrio/veclib) — implementing SSE2 using AltiVec/VMX, using a non-free IBM library called [powerveclib](https://www.ibm.com/developerworks/community/groups/community/powerveclib/) * [SSE-to-NEON](https://github.com/otim/SSE-to-NEON) — implementing SSE with NEON. Non-free, C++. * [arm-neon-tests](https://github.com/christophe-lyon/arm-neon-tests) contains tests to verify NEON implementations. If you know of any other related projects, please [let us know](https://github.com/simd-everywhere/simde/issues/new)! ## Caveats Sometime features can't be emulated. If SIMDe is operating in native mode the functions will work as expected, but if there is no native support some caveats apply: * Many functions require and/or . SIMDe will still work without those headers, but the results of those functions are undefined. * x86 / x86_64 * SSE * `SIMDE_MM_SET_ROUNDING_MODE()` will use `fesetround()`, altering the global rounding mode. * `simde_mm_getcsr` and `simde_mm_setcsr` only implement bits 13 and 14 (rounding mode). * AVX * `simde_mm256_test*` do not set the CF/ZF registers as there is no portable way to implement that functionality. * `simde_mm256_zeroall` and `simde_mm256_zeroupper` are not implemented as there is no portable way to implement that functionality. Additionally, there are some known limitations which apply when using native aliases (`SIMDE_ENABLE_NATIVE_ALIASES`): * On Windows x86 (but not x86_64), some MMX functions and SSE/SSE2 functions which use MMX types (__m64) other than for pointers may return incorrect results. Also, as mentioned earlier, while some APIs make assumptions about basic types (*e.g.*, `int` is 32 bits), SIMDe does not, so many types have been altered to use portable fixed-width versions such as `int32_t`. If you find any other differences, please file an issue so we can either fix it or add it to the list above. ## Benefactors SIMDe uses resources provided for free by a number of organizations. While this shouldn't be taken to imply endorsement of SIMDe, we're tremendously grateful for their support: * [IntegriCloud](https://integricloud.com/) — provides access to a very fast POWER9 server for developing AltiVec/VMX support. * [GCC Compile Farm](https://gcc.gnu.org/wiki/CompileFarm) — provides access to a wide range of machines with different architectures for developing support for various ISA extensions. * [CodeCov.io](https://codecov.io/) — provides code coverage analysis for our test cases. * [Google](https://www.google.com/) ­— financing [Summer of Code](https://summerofcode.withgoogle.com/), substantial amounts of code (Sean Maher's contributions), and an [Open Source Peer Bonus](https://opensource.google/docs/growing/peer-bonus/). Without such organizations donating resources, SIMDe wouldn't be nearly as useful or usable as it is today. We would also like to thank anyone who has helped develop the myriad of software on which SIMDe relies, including compilers and analysis tools. Finally, a special thank you to [anyone who has contributed](https://github.com/simd-everywhere/simde/graphs/contributors) to SIMDe, filed bugs, provided suggestions, or helped with SIMDe development in any way. ## License SIMDe is distributed under an MIT-style license; see COPYING for details. ## Contributors ✨ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)):

Evan Nemerson

💻 🖋 📖 💡 🤔 💬 👀 ⚠️ 📢 🐛 🚇 🚧 📆

Michael R. Crusoe

🐛 💻 📋 🔍 🤔 🚇 📦 ⚠️

HIMANSHI MATHUR

💻 ⚠️

Hidayat Khan

💻 ⚠️

rosbif

💻 ⚠️ 🐛 🤔

Jun Aruga

💻 🤔 📦 🚇 🚧 ⚠️ 🐛

Élie ROUDNINSKI

💻 ⚠️

Jesper Storm Bache

💻

Jeff Daily

💻 🚇

Pavel

💻

Sabarish Bollapragada

💻

Gavin Li

💻

Yining Karl Li

💻

Anirban Dey

📖

Darren Ng

📖

FaresSalem

📖

Pradnyesh Gore

💻

Sean Maher

💻
This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind are welcome! simde-0.7.2/amalgamate.py000077500000000000000000000041471400333146700152760ustar00rootroot00000000000000#!/usr/bin/python3 # amalgamate.py # Written by Evan Nemerson # # To the extent possible under law, the author(s) have dedicated all # copyright and related and neighboring rights to this software to # the public domain worldwide. This software is distributed without # any warranty. # # For details, see . # SPDX-License-Identifier: CC0-1.0 # Quick and dirty script to amalgamate C into a single file. Includes # using angle brackets (#include ) will be preserved, but for # includes using double quotes (#include "foo.h") the file will be # included by this script. # # If you make any improvements please report them in the SIMDe issue # tracker at or # directly to the author so they can be merged back into the original # version. import sys, re, os, subprocess amalgamate_include = re.compile('^\\s*#\\s*include\\s+\\"([^)]+)\\"\\s$') already_included = [] def amalgamate(filename, stream): full_path = os.path.realpath(os.path.realpath(filename)) srcdir = os.path.dirname(full_path) print('/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */') git_id = subprocess.check_output(["git", "rev-parse", "HEAD"], cwd=srcdir).decode().strip() print("/* {:s} */".format(git_id)) if full_path not in already_included: already_included.insert(-1, full_path) with open(filename) as input_file: stream.write('/* :: Begin ' + os.path.relpath(full_path) + ' :: */\n') for source_line in input_file: a9e_inc_m = amalgamate_include.match(source_line) if a9e_inc_m: amalgamate(os.path.join(srcdir, a9e_inc_m.group(1)), stream) else: stream.write(source_line) stream.write('/* :: End ' + os.path.relpath(full_path) + ' :: */\n') if len(sys.argv) != 2: sys.stderr.write("USAGE: " + sys.argv[0] + ' SOURCE_FILE\n\n') sys.stderr.write("This will print a copy of $SOURCE_FILE to stdout, while replacing\n") sys.stderr.write("all '#include AMALGAMATE(file)' lines with copies of file.\n") sys.exit(1) amalgamate(sys.argv[1], sys.stdout) simde-0.7.2/codecov.yml000066400000000000000000000000171400333146700147650ustar00rootroot00000000000000comment: false simde-0.7.2/docker/000077500000000000000000000000001400333146700140715ustar00rootroot00000000000000simde-0.7.2/docker/Dockerfile000066400000000000000000000065761400333146700161010ustar00rootroot00000000000000# Dockerfile for SIMDe development ARG release=testing FROM debian:${release}-slim ARG DEBIAN_FRONTEND=noninteractive COPY docker/bin /tmp/simde-bin RUN \ for script in simde-reset-build.sh; do \ ln -s /usr/local/src/simde/docker/bin/"${script}" /usr/bin/"${script}"; \ done # Multiarch RUN \ apt-get update -y && \ apt-get upgrade -y && \ for arch in armhf arm64 ppc64el s390x i386 mips64el; do \ dpkg --add-architecture "$arch"; \ done; \ apt-get update -y # Common packages RUN \ apt-get install -yq \ git build-essential \ meson cmake \ '^clang-[0-9\.]+$' \ '^g(cc|\+\+)-[0-9\.]+$' \ gdb valgrind \ qemu binfmt-support qemu-user-static \ creduce screen htop parallel nano rsync strace \ npm libsleef-dev # GCC cross-compilers RUN \ apt-get install -y apt-file && \ apt-file update && \ PACKAGES_TO_INSTALL=""; \ for ARCH in $(dpkg --print-foreign-architectures); do \ PACKAGES_TO_INSTALL="${PACKAGES_TO_INSTALL} libc6:${ARCH} ^libstdc\+\+\-[0-9]+\-dev:${ARCH}"; \ for pkg in $(apt-file search -x "/usr/bin/$(/tmp/simde-bin/arch2gcc.sh ${ARCH})-g(cc|\+\+)-[0-9\.]+" | grep -Po '^([^ ]+)(? /etc/apt/trusted.gpg.d/intel.gpg && \ echo "deb [arch=amd64] https://apt.repos.intel.com/oneapi all main" > /etc/apt/sources.list.d/oneAPI.list && \ apt-get update && \ apt-get install -yq intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic && \ for exe in icc icpc; do \ printf '#!/bin/bash\nARGS="$@"\nsource /opt/intel/oneapi/compiler/latest/env/vars.sh >/dev/null\n%s ${ARGS}\n' "${exe}" > /usr/bin/"${exe}" && \ chmod 0755 /usr/bin/"${exe}" ; \ done # # xlc -- Install fails. # # Once IBM releases a version for Ubuntu Focal (20.04) I hope I can # # get this working. # RUN \ # curl -s 'https://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/public.gpg' | apt-key add - && \ # echo "deb [arch=ppc64el] https://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/ bionic main" > /etc/apt/sources.list.d/xlc.list && \ # apt-get update && \ # XLC_VERSION="$(apt-cache search '^xlc\.[0-9]+\.[0-9]+\.[0-9]+$' | awk '{ print substr($1, 5) }')" && \ # apt-get install "xlc.${XLC_VERSION}:ppc64el" "xlc-license-community.${XLC_VERSION}:ppc64el" && \ # /opt/ibm/xlC/${XLC_VERSION}/bin/xlc_configure <<< 1 >/dev/null # Intel SDE COPY test/download-sde.sh /tmp/simde-bin/download-sde.sh RUN \ "/tmp/simde-bin/download-sde.sh" "/opt/intel/sde" && \ for executable in sde sde64; do \ ln -s "/opt/intel/sde/${executable}" "/usr/bin/${executable}"; \ done # Emscripten RUN \ git clone https://github.com/emscripten-core/emsdk.git /opt/emsdk && \ cd /opt/emsdk && ./emsdk update-tags && ./emsdk install tot && ./emsdk activate tot && \ ln -s /opt/emsdk/upstream/bin/wasm-ld /usr/bin/wasm-ld && \ npm install jsvu -g && jsvu --os=linux64 --engines=v8 && ln -s "/root/.jsvu/v8" "/usr/bin/v8" # Meson cross files RUN \ mkdir -p "/usr/local/share/meson/cross" && ln -s /usr/local/src/simde/docker/cross-files /usr/local/share/meson/cross/simde RUN mkdir -p /opt/simde WORKDIR /opt/simde simde-0.7.2/docker/README.md000066400000000000000000000042431400333146700153530ustar00rootroot00000000000000# SIMDe Development Container The basic idea is to set up a Debian system with lots of different compilers and emulators for different architectures and configure multiple builds in different directories. Note that there are also several docker files in the test/ subdirectory. These can be used to test other operating systems, especially older compilers which aren't supported on Debian testing anymore. To use this, just run the `simde-dev.sh` script and go grab ~a cup of coffee~ lunch (it will take a while, and download a *lot* of packages). Once the container is ready it will drop you into a bash shell in `/opt/simde`; this is the build directory. This will bind the parent directory (the root of the SIMDe checkout) to `/usr/local/src/simde`; any changes to either will propogate to the other, meaning you can continue using your normal development environment and just re-run ninja in the container to (re)build SIMDe. If you would like build directories to persist across multiple invocations of `simde-dev.sh`, you can set the `PERSISTENT_BUILD_DIR` to a directory on the host filesystem and it will be mapped to `/opt/simde` in the container. Once your container is finished building, just `simde-reset-build.sh` and it should be populated with a bunch of subdirectories which you can build with ninja. For example, `ninja -C gcc-10` will build SIMDe using GCC 10. If you want to run the tests, `ninja -C gcc-10 test` (or `cd gcc-10 && ninja test`). You can also run `simde-reset-build.sh build-name` to (re)generate a single build. # Debian Version By default, we use Debian testing. If you would like to use Debian unstable instead, just run `simde-dev.sh unstable`. ## Altering or Adding Builds Each build has an associated Meson cross file (see the `cross-files/` subdirectory). We kind of abuse these by adding flags like `-Wextra`, `-Werror`, `-march=...`, *etc.*, which aren't really about cross-compilation. However, you can add or remove C/C++ flags in the cross files, or if you want to *add* flags you can just put them in the `CFLAGS`/`CXXFLAGS` environment variables and reconfigure the build. You can also create a new cross file with your preferred configuration. simde-0.7.2/docker/bin/000077500000000000000000000000001400333146700146415ustar00rootroot00000000000000simde-0.7.2/docker/bin/arch2gcc.sh000077500000000000000000000016611400333146700166600ustar00rootroot00000000000000#!/bin/sh case "${1}" in "alpha") echo "alpha-linux-gnu" ;; "amd64") echo "x86_64-linux-gnu" ;; "arm64") echo "aarch64-linux-gnu" ;; "armhf") echo "arm-linux-gnueabihf" ;; "hppa") echo "hppa-linux-gnu" ;; "hppa64") echo "hppa64-linux-gnu" ;; "i386") echo "i686-linux-gnu" ;; "m68k") echo "m68k-linux-gnu" ;; "mips64el") echo "mips64el-linux-gnuabi64" ;; "mipsel") echo "mips64-linux-gnuabi64" ;; "ppc") echo "powerpc-linux-gnu" ;; "ppc64") echo "powerpc64-linux-gnu" ;; "ppc64el") echo "powerpc64le-linux-gnu" ;; "riscv64") echo "riscv64-linux-gnu" ;; "s390x") echo "s390x-linux-gnu" ;; "sh4") echo "sh4-linux-gnu" ;; "sparc64") echo "sparc64-linux-gnu" ;; "x32") echo "x86_64-linux-gnux32" ;; *) echo "Unknown architecture: ${1}" >&2 exit 1 ;; esac simde-0.7.2/docker/bin/simde-check-all.sh000077500000000000000000000003741400333146700201260ustar00rootroot00000000000000#!/bin/bash if [ $# = 0 ]; then for crossfile in /usr/local/share/meson/cross/simde/*.cross; do basename "$crossfile" .cross done | xargs "$0" else while [ $# -gt 0 ]; do ninja -C /opt/simde-build/"$1" test || exit 1 shift done fi simde-0.7.2/docker/bin/simde-reset-build.sh000077500000000000000000000013621400333146700205200ustar00rootroot00000000000000#!/bin/bash function configure { if [ -e "/opt/simde/${1}" ]; then rm -rf "/opt/simde/${1}"; fi if [ -e "/usr/local/share/meson/cross/simde/${1}.cross" ]; then meson --cross-file="/usr/local/share/meson/cross/simde/${1}.cross" "/opt/simde/${1}" "/usr/local/src/simde" fi } if [ $# -gt 0 ]; then while [ $# -ge 1 ]; do configure "$1" shift done else for cross in /usr/local/share/meson/cross/simde/*.cross; do target="$(basename "$cross" .cross)" if [ ! -e "/opt/simde/${target}" ]; then configure "$(basename "${target}" .cross)" fi done for target in "aarch64-clang-10" "emscripten"; do if [ ! -e "/opt/simde/${target}" ]; then configure "$(basename "${target}" .cross)" fi done fi simde-0.7.2/docker/cross-files/000077500000000000000000000000001400333146700163225ustar00rootroot00000000000000simde-0.7.2/docker/cross-files/aarch64+sve-clang-10.cross000066400000000000000000000012531400333146700227170ustar00rootroot00000000000000[binaries] c = '/usr/bin/clang-10' cpp = '/usr/bin/clang++-10' ar = '/usr/bin/llvm-ar-10' strip = '/usr/bin/llvm-strip-10' objcopy = '/usr/bin/llvm-objcopy-10' ld = '/usr/bin/llvm-ld-10' exe_wrapper = 'qemu-aarch64-static' [properties] c_args = ['--target=aarch64-linux-gnu', '-march=armv8-a+sve', '-isystem=/usr/aarch64-linux-gnu/include', '-Weverything', '-Werror'] cpp_args = ['--target=aarch64-linux-gnu', '-march=armv8-a+sve', '-isystem=/usr/aarch64-linux-gnu/include', '-Weverything', '-Werror'] c_link_args = ['--target=aarch64-linux-gnu'] cpp_link_args = ['--target=aarch64-linux-gnu'] [host_machine] system = 'linux' cpu_family = 'aarch64' cpu = 'arm64' endian = 'little' simde-0.7.2/docker/cross-files/aarch64+sve-gcc-10.cross000066400000000000000000000007711400333146700223730ustar00rootroot00000000000000[binaries] c = '/usr/bin/aarch64-linux-gnu-gcc-10' cpp = '/usr/bin/aarch64-linux-gnu-g++-10' ar = '/usr/bin/aarch64-linux-gnu-ar' strip = '/usr/bin/aarch64-linux-gnu-strip' objcopy = '/usr/bin/aarch64-linux-gnu-objcopy' ld = '/usr/bin/aarch64-linux-gnu-ld' exe_wrapper = 'qemu-aarch64-static' [properties] c_args = ['-march=armv8-a+sve', '-Wextra', '-Werror'] cpp_args = ['-march=armv8-a+sve', '-Wextra', '-Werror'] [host_machine] system = 'linux' cpu_family = 'aarch64' cpu = 'arm64' endian = 'little' simde-0.7.2/docker/cross-files/aarch64-clang-10.cross000066400000000000000000000012551400333146700221300ustar00rootroot00000000000000[binaries] c = '/usr/bin/clang-10' cpp = '/usr/bin/clang++-10' ar = '/usr/bin/llvm-ar-10' strip = '/usr/bin/llvm-strip-10' objcopy = '/usr/bin/llvm-objcopy-10' ld = '/usr/bin/llvm-ld-10' exe_wrapper = 'qemu-aarch64-static' [properties] c_args = ['--target=aarch64-linux-gnu', '-march=armv8-a+simd', '-isystem=/usr/aarch64-linux-gnu/include', '-Weverything', '-Werror'] cpp_args = ['--target=aarch64-linux-gnu', '-march=armv8-a+simd', '-isystem=/usr/aarch64-linux-gnu/include', '-Weverything', '-Werror'] c_link_args = ['--target=aarch64-linux-gnu'] cpp_link_args = ['--target=aarch64-linux-gnu'] [host_machine] system = 'linux' cpu_family = 'aarch64' cpu = 'arm64' endian = 'little' simde-0.7.2/docker/cross-files/aarch64-gcc-10.cross000066400000000000000000000010211400333146700215670ustar00rootroot00000000000000[binaries] c = '/usr/bin/aarch64-linux-gnu-gcc-10' cpp = '/usr/bin/aarch64-linux-gnu-g++-10' ar = '/usr/bin/aarch64-linux-gnu-ar' strip = '/usr/bin/aarch64-linux-gnu-strip' objcopy = '/usr/bin/aarch64-linux-gnu-objcopy' ld = '/usr/bin/aarch64-linux-gnu-ld' exe_wrapper = 'qemu-aarch64-static' [properties] c_args = ['-march=armv8-a+simd+crypto+crc', '-Wextra', '-Werror'] cpp_args = ['-march=armv8-a+simd+crypto+crc', '-Wextra', '-Werror'] [host_machine] system = 'linux' cpu_family = 'aarch64' cpu = 'arm64' endian = 'little' simde-0.7.2/docker/cross-files/aarch64-gcc-9.cross000066400000000000000000000007711400333146700215320ustar00rootroot00000000000000[binaries] c = '/usr/bin/aarch64-linux-gnu-gcc-9' cpp = '/usr/bin/aarch64-linux-gnu-g++-9' ar = '/usr/bin/aarch64-linux-gnu-ar' strip = '/usr/bin/aarch64-linux-gnu-strip' objcopy = '/usr/bin/aarch64-linux-gnu-objcopy' ld = '/usr/bin/aarch64-linux-gnu-ld' exe_wrapper = 'qemu-aarch64-static' [properties] c_args = ['-march=armv8-a+simd', '-Wextra', '-Werror'] cpp_args = ['-march=armv8-a+simd', '-Wextra', '-Werror'] [host_machine] system = 'linux' cpu_family = 'aarch64' cpu = 'arm64' endian = 'little' simde-0.7.2/docker/cross-files/armv8+mve-gcc-10.cross000066400000000000000000000010651400333146700221670ustar00rootroot00000000000000[binaries] c = '/usr/bin/arm-linux-gnueabihf-gcc-10' cpp = '/usr/bin/arm-linux-gnueabihf-g++-10' ar = '/usr/bin/arm-linux-gnueabihf-ar' strip = '/usr/bin/arm-linux-gnueabihf-strip' objcopy = '/usr/bin/arm-linux-gnueabihf-objcopy' ld = '/usr/bin/arm-linux-gnueabihf-ld' exe_wrapper = 'qemu-arm-static' [properties] c_args = ['-march=armv8.1-m.main+mve', '-mfloat-abi=hard', '-Wextra', '-Werror'] cpp_args = ['-march=armv8.1-m.main+mve', '-mfloat-abi=hard', '-Wextra', '-Werror'] [host_machine] system = 'linux' cpu_family = 'arm' cpu = 'armv8-a' endian = 'little' simde-0.7.2/docker/cross-files/armv8-clang-10.cross000066400000000000000000000012511400333146700217310ustar00rootroot00000000000000[binaries] c = '/usr/bin/clang-10' cpp = '/usr/bin/clang++-10' ar = '/usr/bin/llvm-ar-10' strip = '/usr/bin/llvm-strip-10' objcopy = '/usr/bin/llvm-objcopy-10' ld = '/usr/bin/llvm-ld-10' exe_wrapper = 'qemu-arm-static' [properties] c_args = ['--target=arm-linux-gnueabihf', '-march=armv8-a', '-isystem=/usr/arm-linux-gnueabihf/include', '-Weverything', '-Werror'] cpp_args = ['--target=arm-linux-gnueabihf', '-march=armv8-a', '-isystem=/usr/arm-linux-gnueabihf/include', '-Weverything', '-Werror'] c_link_args = ['--target=arm-linux-gnueabihf'] cpp_link_args = ['--target=arm-linux-gnueabihf'] [host_machine] system = 'linux' cpu_family = 'arm' cpu = 'armv8-a' endian = 'little' simde-0.7.2/docker/cross-files/armv8-clang-8.cross000066400000000000000000000012431400333146700216610ustar00rootroot00000000000000[binaries] c = '/usr/bin/clang-8' cpp = '/usr/bin/clang++-8' ar = '/usr/bin/llvm-ar-8' strip = '/usr/bin/llvm-strip-8' objcopy = '/usr/bin/llvm-objcopy-8' ld = '/usr/bin/llvm-ld-8' exe_wrapper = 'qemu-arm-static' [properties] c_args = ['--target=arm-linux-gnueabihf', '-march=armv8-a', '-isystem=/usr/arm-linux-gnueabihf/include', '-Weverything', '-Werror'] cpp_args = ['--target=arm-linux-gnueabihf', '-march=armv8-a', '-isystem=/usr/arm-linux-gnueabihf/include', '-Weverything', '-Werror'] c_link_args = ['--target=arm-linux-gnueabihf'] cpp_link_args = ['--target=arm-linux-gnueabihf'] [host_machine] system = 'linux' cpu_family = 'arm' cpu = 'armv8-a' endian = 'little' simde-0.7.2/docker/cross-files/armv8-clang-9.cross000066400000000000000000000012431400333146700216620ustar00rootroot00000000000000[binaries] c = '/usr/bin/clang-9' cpp = '/usr/bin/clang++-9' ar = '/usr/bin/llvm-ar-9' strip = '/usr/bin/llvm-strip-9' objcopy = '/usr/bin/llvm-objcopy-9' ld = '/usr/bin/llvm-ld-9' exe_wrapper = 'qemu-arm-static' [properties] c_args = ['--target=arm-linux-gnueabihf', '-march=armv8-a', '-isystem=/usr/arm-linux-gnueabihf/include', '-Weverything', '-Werror'] cpp_args = ['--target=arm-linux-gnueabihf', '-march=armv8-a', '-isystem=/usr/arm-linux-gnueabihf/include', '-Weverything', '-Werror'] c_link_args = ['--target=arm-linux-gnueabihf'] cpp_link_args = ['--target=arm-linux-gnueabihf'] [host_machine] system = 'linux' cpu_family = 'arm' cpu = 'armv8-a' endian = 'little' simde-0.7.2/docker/cross-files/armv8-gcc-10.cross000066400000000000000000000010351400333146700214010ustar00rootroot00000000000000[binaries] c = '/usr/bin/arm-linux-gnueabihf-gcc-10' cpp = '/usr/bin/arm-linux-gnueabihf-g++-10' ar = '/usr/bin/arm-linux-gnueabihf-ar' strip = '/usr/bin/arm-linux-gnueabihf-strip' objcopy = '/usr/bin/arm-linux-gnueabihf-objcopy' ld = '/usr/bin/arm-linux-gnueabihf-ld' exe_wrapper = 'qemu-arm-static' [properties] c_args = ['-march=armv8-a+simd', '-mfpu=auto', '-Wextra', '-Werror'] cpp_args = ['-march=armv8-a+simd', '-mfpu=auto', '-Wextra', '-Werror'] [host_machine] system = 'linux' cpu_family = 'arm' cpu = 'armv8-a' endian = 'little' simde-0.7.2/docker/cross-files/armv8-gcc-9.cross000066400000000000000000000010331400333146700213270ustar00rootroot00000000000000[binaries] c = '/usr/bin/arm-linux-gnueabihf-gcc-9' cpp = '/usr/bin/arm-linux-gnueabihf-g++-9' ar = '/usr/bin/arm-linux-gnueabihf-ar' strip = '/usr/bin/arm-linux-gnueabihf-strip' objcopy = '/usr/bin/arm-linux-gnueabihf-objcopy' ld = '/usr/bin/arm-linux-gnueabihf-ld' exe_wrapper = 'qemu-arm-static' [properties] c_args = ['-march=armv8-a+simd', '-mfpu=auto', '-Wextra', '-Werror'] cpp_args = ['-march=armv8-a+simd', '-mfpu=auto', '-Wextra', '-Werror'] [host_machine] system = 'linux' cpu_family = 'arm' cpu = 'armv8-a' endian = 'little' simde-0.7.2/docker/cross-files/clang-10.cross000066400000000000000000000004661400333146700207050ustar00rootroot00000000000000[binaries] c = '/usr/bin/clang-10' cpp = '/usr/bin/clang++-10' ar = '/usr/bin/llvm-ar-10' strip = '/usr/bin/llvm-strip-10' objcopy = '/usr/bin/llvm-objcopy-10' ld = '/usr/bin/llvm-ld-10' [properties] c_args = ['-march=native','-Weverything', '-Werror'] cpp_args = ['-march=native', '-Weverything', '-Werror'] simde-0.7.2/docker/cross-files/clang-11.cross000066400000000000000000000004661400333146700207060ustar00rootroot00000000000000[binaries] c = '/usr/bin/clang-11' cpp = '/usr/bin/clang++-11' ar = '/usr/bin/llvm-ar-11' strip = '/usr/bin/llvm-strip-11' objcopy = '/usr/bin/llvm-objcopy-11' ld = '/usr/bin/llvm-ld-11' [properties] c_args = ['-march=native','-Weverything', '-Werror'] cpp_args = ['-march=native', '-Weverything', '-Werror'] simde-0.7.2/docker/cross-files/clang-9.cross000066400000000000000000000004601400333146700206270ustar00rootroot00000000000000[binaries] c = '/usr/bin/clang-9' cpp = '/usr/bin/clang++-9' ar = '/usr/bin/llvm-ar-9' strip = '/usr/bin/llvm-strip-9' objcopy = '/usr/bin/llvm-objcopy-9' ld = '/usr/bin/llvm-ld-9' [properties] c_args = ['-march=native','-Weverything', '-Werror'] cpp_args = ['-march=native', '-Weverything', '-Werror'] simde-0.7.2/docker/cross-files/emscripten.cross000066400000000000000000000010341400333146700215440ustar00rootroot00000000000000[binaries] c = '/opt/emsdk/upstream/emscripten/emcc' cpp = '/opt/emsdk/upstream/emscripten/em++' ar = '/opt/emsdk/upstream/emscripten/emar' strip = '/opt/emsdk/upstream/bin/llvm-strip' objcopy = '/opt/emsdk/upstream/bin/llvm-objcopy' exe_wrapper = ['/usr/bin/v8', '--experimental-wasm-simd'] [properties] skip_sanity_check = true c_args = ['-Weverything', '-Werror', '-O3', '-msimd128'] cpp_args = ['-Weverything', '-Werror', '-O3', '-msimd128'] [host_machine] system = 'emscripten' cpu_family = 'wasm32' cpu = 'wasm32' endian = 'little' simde-0.7.2/docker/cross-files/gcc-10.cross000066400000000000000000000004071400333146700203500ustar00rootroot00000000000000[binaries] c = '/usr/bin/gcc-10' cpp = '/usr/bin/g++-10' ar = '/usr/bin/ar' strip = '/usr/bin/strip' objcopy = '/usr/bin/objcopy' ld = '/usr/bin/ld' [properties] c_args = ['-Wextra', '-Werror', '-march=native'] cpp_args = ['-Wextra', '-Werror', '-march=native'] simde-0.7.2/docker/cross-files/gcc-8.cross000066400000000000000000000004051400333146700202750ustar00rootroot00000000000000[binaries] c = '/usr/bin/gcc-8' cpp = '/usr/bin/g++-8' ar = '/usr/bin/ar' strip = '/usr/bin/strip' objcopy = '/usr/bin/objcopy' ld = '/usr/bin/ld' [properties] c_args = ['-Wextra', '-Werror', '-march=native'] cpp_args = ['-Wextra', '-Werror', '-march=native'] simde-0.7.2/docker/cross-files/gcc-9.cross000066400000000000000000000004051400333146700202760ustar00rootroot00000000000000[binaries] c = '/usr/bin/gcc-9' cpp = '/usr/bin/g++-9' ar = '/usr/bin/ar' strip = '/usr/bin/strip' objcopy = '/usr/bin/objcopy' ld = '/usr/bin/ld' [properties] c_args = ['-Wextra', '-Werror', '-march=native'] cpp_args = ['-Wextra', '-Werror', '-march=native'] simde-0.7.2/docker/cross-files/i686-all-gcc-10.cross000066400000000000000000000006521400333146700216120ustar00rootroot00000000000000[binaries] c = '/usr/bin/i686-linux-gnu-gcc-10' cpp = '/usr/bin/i686-linux-gnu-g++-10' ar = '/usr/bin/ar' strip = '/usr/bin/strip' objcopy = '/usr/bin/objcopy' ld = '/usr/bin/ld' exe_wrapper = ['sde', '-future', '--'] [properties] c_args = ['-march=tigerlake', '-Wextra', '-Werror'] cpp_args = ['-march=tigerlake', '-Wextra', '-Werror'] [host_machine] system = 'linux' cpu_family = 'i686-all' cpu = 'i686' endian = 'little' simde-0.7.2/docker/cross-files/i686-all-gcc-9.cross000066400000000000000000000006471400333146700215460ustar00rootroot00000000000000[binaries] c = '/usr/bin/i686-linux-gnu-gcc-9' cpp = '/usr/bin/i686-linux-gnu-g++-9' ar = '/usr/bin/ar' strip = '/usr/bin/strip' objcopy = '/usr/bin/objcopy' ld = '/usr/bin/ld' exe_wrapper = ['sde', '--'] [properties] c_args = ['-march=icelake-server', '-Wextra', '-Werror'] cpp_args = ['-march=icelake-server', '-Wextra', '-Werror'] [host_machine] system = 'linux' cpu_family = 'i686-all' cpu = 'i686' endian = 'little' simde-0.7.2/docker/cross-files/i686-clang-10.cross000066400000000000000000000010631400333146700213710ustar00rootroot00000000000000[binaries] c = '/usr/bin/clang-10' cpp = '/usr/bin/clang++-10' ar = '/usr/bin/llvm-ar-10' strip = '/usr/bin/llvm-strip-10' objcopy = '/usr/bin/llvm-objcopy-10' ld = '/usr/bin/llvm-ld-10' exe_wrapper = 'qemu-arm-static' [properties] c_args = ['--target=i686-linux-gnu', '-isystem=/usr/i686-linux-gnu/include', '-march=native', '-Weverything', '-Werror'] cpp_args = ['--target=i686-linux-gnu', '-isystem=/usr/i686-linux-gnu/include', '-march=native', '-Weverything', '-Werror'] c_link_args = ['--target=i686-linux-gnu'] cpp_link_args = ['--target=i686-linux-gnu'] simde-0.7.2/docker/cross-files/i686-gcc-10.cross000066400000000000000000000005411400333146700210410ustar00rootroot00000000000000[binaries] c = '/usr/bin/i686-linux-gnu-gcc-10' cpp = '/usr/bin/i686-linux-gnu-g++-10' ar = '/usr/bin/i686-linux-gnu-ar' strip = '/usr/bin/i686-linux-gnu-strip' objcopy = '/usr/bin/i686-linux-gnu-objcopy' ld = '/usr/bin/i686-linux-gnu-ld' [properties] c_args = ['-Wextra', '-Werror', '-march=native'] cpp_args = ['-Wextra', '-Werror', '-march=native'] simde-0.7.2/docker/cross-files/i686-gcc-9.cross000066400000000000000000000005371400333146700207760ustar00rootroot00000000000000[binaries] c = '/usr/bin/i686-linux-gnu-gcc-9' cpp = '/usr/bin/i686-linux-gnu-g++-9' ar = '/usr/bin/i686-linux-gnu-ar' strip = '/usr/bin/i686-linux-gnu-strip' objcopy = '/usr/bin/i686-linux-gnu-objcopy' ld = '/usr/bin/i686-linux-gnu-ld' [properties] c_args = ['-Wextra', '-Werror', '-march=native'] cpp_args = ['-Wextra', '-Werror', '-march=native'] simde-0.7.2/docker/cross-files/icc.cross000066400000000000000000000005121400333146700201310ustar00rootroot00000000000000[binaries] c = '/usr/bin/icc' cpp = '/usr/bin/icpc' ar = '/usr/bin/ar' strip = '/usr/bin/strip' objcopy = '/usr/bin/objcopy' ld = '/usr/bin/ld' [properties] c_args = ['-Wextra', '-wd13200', '-wd13203', '-wd16219', '-Werror', '-march=native'] cpp_args = ['-Wextra', '-wd13200', '-wd13203', '-wd16219', '-Werror', '-march=native'] simde-0.7.2/docker/cross-files/intel-all-clang-10.cross000066400000000000000000000010661400333146700225610ustar00rootroot00000000000000[binaries] c = '/usr/bin/clang-10' cpp = '/usr/bin/clang++-10' ar = '/usr/bin/llvm-ar-10' strip = '/usr/bin/llvm-strip-10' objcopy = '/usr/bin/llvm-objcopy-10' ld = '/usr/bin/llvm-ld-10' exe_wrapper = ['sde64', '-future', '--'] [properties] c_args = ['-march=tigerlake', '-Weverything', '-Wno-newline-eof', '-Wno-missing-variable-declarations', '-Werror'] cpp_args = ['-march=tigerlake', '-Weverything', '-Wno-newline-eof', '-Wno-missing-variable-declarations', '-Werror'] [host_machine] system = 'linux' cpu_family = 'x86_64-all' cpu = 'x86_64' endian = 'little' simde-0.7.2/docker/cross-files/intel-all-gcc-10.cross000066400000000000000000000006221400333146700222260ustar00rootroot00000000000000[binaries] c = '/usr/bin/gcc-10' cpp = '/usr/bin/g++-10' ar = '/usr/bin/ar' strip = '/usr/bin/strip' objcopy = '/usr/bin/objcopy' ld = '/usr/bin/ld' exe_wrapper = ['sde64', '-future', '--'] [properties] c_args = ['-march=tigerlake', '-Wextra', '-Werror'] cpp_args = ['-march=tigerlake', '-Wextra', '-Werror'] [host_machine] system = 'linux' cpu_family = 'x86_64-all' cpu = 'x86_64' endian = 'little' simde-0.7.2/docker/cross-files/intel-all-icc.cross000066400000000000000000000007251400333146700220160ustar00rootroot00000000000000[binaries] c = '/usr/bin/icc' cpp = '/usr/bin/icpc' ar = '/usr/bin/ar' strip = '/usr/bin/strip' objcopy = '/usr/bin/objcopy' ld = '/usr/bin/ld' exe_wrapper = ['sde64', '-future', '--'] [properties] c_args = ['-march=tigerlake', '-Wextra', '-wd13200', '-wd13203', '-wd16219', '-Werror'] cpp_args = ['-march=tigerlake', '-Wextra', '-wd13200', '-wd13203', '-wd16219', '-Werror'] [host_machine] system = 'linux' cpu_family = 'x86_64-all' cpu = 'x86_64' endian = 'little' simde-0.7.2/docker/cross-files/loongson-gcc-10.cross000066400000000000000000000010751400333146700222060ustar00rootroot00000000000000[binaries] c = '/usr/bin/mips64el-linux-gnuabi64-gcc-10' cpp = '/usr/bin/mips64el-linux-gnuabi64-g++-10' ar = '/usr/bin/mips64el-linux-gnuabi64-ar' strip = '/usr/bin/mips64el-linux-gnuabi64-strip' objcopy = '/usr/bin/mips64el-linux-gnuabi64-objcopy' ld = '/usr/bin/mips64el-linux-gnuabi64-ld' exe_wrapper = ['qemu-mips64el-static', '-cpu', 'Loongson-3A4000'] [properties] c_args = ['-march=loongson3a', '-Wextra', '-Werror'] cpp_args = ['-march=loongson3a', '-Wextra', '-Werror'] [host_machine] system = 'linux' cpu_family = 'mips64el' cpu = 'mips64el' endian = 'little' simde-0.7.2/docker/cross-files/loongson-gcc.cross000066400000000000000000000010131400333146700217600ustar00rootroot00000000000000[binaries] c = '/usr/bin/mips64el-linux-gnuabi64-gcc' cpp = '/usr/bin/mips64el-linux-gnuabi64-g++' ar = '/usr/bin/mips64el-linux-gnuabi64-ar' strip = '/usr/bin/mips64el-linux-gnuabi64-strip' objcopy = '/usr/bin/mips64el-linux-gnuabi64-objcopy' ld = '/usr/bin/mips64el-linux-gnuabi64-ld' exe_wrapper = ['qemu-mips64el-static', '-cpu', 'Loongson-3A4000'] [properties] c_args = ['-march=loongson3a'] cpp_args = ['-march=loongson3a'] [host_machine] system = 'linux' cpu_family = 'mips64el' cpu = 'mips64el' endian = 'little' simde-0.7.2/docker/cross-files/mips64el-gcc-10.cross000066400000000000000000000010121400333146700220020ustar00rootroot00000000000000[binaries] c = '/usr/bin/mips64el-linux-gnuabi64-gcc-10' cpp = '/usr/bin/mips64el-linux-gnuabi64-g++-10' ar = '/usr/bin/mips64el-linux-gnuabi64-ar' strip = '/usr/bin/mips64el-linux-gnuabi64-strip' objcopy = '/usr/bin/mips64el-linux-gnuabi64-objcopy' ld = '/usr/bin/mips64el-linux-gnuabi64-ld' exe_wrapper = ['qemu-mips64el-static'] [properties] c_args = ['-mmsa', '-Wextra', '-Werror'] cpp_args = ['-mmsa', '-Wextra', '-Werror'] [host_machine] system = 'linux' cpu_family = 'mips64el' cpu = 'mips64el' endian = 'little' simde-0.7.2/docker/cross-files/power8-clang-9.cross000066400000000000000000000013131400333146700220470ustar00rootroot00000000000000[binaries] c = '/usr/bin/clang-9' cpp = '/usr/bin/clang++-9' ar = '/usr/bin/llvm-ar-9' strip = '/usr/bin/llvm-strip-9' objcopy = '/usr/bin/llvm-objcopy-9' ld = '/usr/bin/llvm-ld-9' exe_wrapper = ['qemu-ppc64le-static', '-cpu', 'power8'] [properties] c_args = ['--target=powerpc64le-linux-gnu', '-mcpu=power8', '-isystem=/usr/powerpc64le-linux-gnu/include', '-Weverything', '-Werror'] cpp_args = ['--target=powerpc64le-linux-gnu', '-mcpu=power8', '-isystem=/usr/powerpc64le-linux-gnu/include', '-Weverything', '-Werror'] c_link_args = ['--target=powerpc64le-linux-gnu'] cpp_link_args = ['--target=powerpc64le-linux-gnu'] [host_machine] system = 'linux' cpu_family = 'powerpc64le' cpu = 'ppc64el' endian = 'little' simde-0.7.2/docker/cross-files/power8-gcc-10.cross000066400000000000000000000010371400333146700215720ustar00rootroot00000000000000[binaries] c = '/usr/bin/powerpc64le-linux-gnu-gcc-10' cpp = '/usr/bin/powerpc64le-linux-gnu-g++-10' ar = '/usr/bin/powerpc64le-linux-gnu-ar' strip = '/usr/bin/powerpc64le-linux-gnu-strip' objcopy = '/usr/bin/powerpc64le-linux-gnu-objcopy' ld = '/usr/bin/powerpc64le-linux-gnu-ld' exe_wrapper = ['qemu-ppc64le-static', '-cpu', 'power8'] [properties] c_args = ['-mcpu=power8', '-Wextra', '-Werror'] cpp_args = ['-mcpu=power8', '-Wextra', '-Werror'] [host_machine] system = 'linux' cpu_family = 'powerpc64le' cpu = 'ppc64el' endian = 'little' simde-0.7.2/docker/cross-files/power9-clang-10.cross000066400000000000000000000013211400333146700221170ustar00rootroot00000000000000[binaries] c = '/usr/bin/clang-10' cpp = '/usr/bin/clang++-10' ar = '/usr/bin/llvm-ar-10' strip = '/usr/bin/llvm-strip-10' objcopy = '/usr/bin/llvm-objcopy-10' ld = '/usr/bin/llvm-ld-10' exe_wrapper = ['qemu-ppc64le-static', '-cpu', 'power9'] [properties] c_args = ['--target=powerpc64le-linux-gnu', '-mcpu=power9', '-isystem=/usr/powerpc64le-linux-gnu/include', '-Weverything', '-Werror'] cpp_args = ['--target=powerpc64le-linux-gnu', '-mcpu=power9', '-isystem=/usr/powerpc64le-linux-gnu/include', '-Weverything', '-Werror'] c_link_args = ['--target=powerpc64le-linux-gnu'] cpp_link_args = ['--target=powerpc64le-linux-gnu'] [host_machine] system = 'linux' cpu_family = 'powerpc64le' cpu = 'ppc64el' endian = 'little' simde-0.7.2/docker/cross-files/power9-gcc-10.cross000066400000000000000000000010371400333146700215730ustar00rootroot00000000000000[binaries] c = '/usr/bin/powerpc64le-linux-gnu-gcc-10' cpp = '/usr/bin/powerpc64le-linux-gnu-g++-10' ar = '/usr/bin/powerpc64le-linux-gnu-ar' strip = '/usr/bin/powerpc64le-linux-gnu-strip' objcopy = '/usr/bin/powerpc64le-linux-gnu-objcopy' ld = '/usr/bin/powerpc64le-linux-gnu-ld' exe_wrapper = ['qemu-ppc64le-static', '-cpu', 'power9'] [properties] c_args = ['-mcpu=power9', '-Wextra', '-Werror'] cpp_args = ['-mcpu=power9', '-Wextra', '-Werror'] [host_machine] system = 'linux' cpu_family = 'powerpc64le' cpu = 'ppc64el' endian = 'little' simde-0.7.2/docker/cross-files/s390x-gcc-10.cross000066400000000000000000000006741400333146700212420ustar00rootroot00000000000000[binaries] c = '/usr/bin/s390x-linux-gnu-gcc-10' cpp = '/usr/bin/s390x-linux-gnu-g++-10' ar = '/usr/bin/s390x-linux-gnu-ar' strip = '/usr/bin/s390x-linux-gnu-strip' objcopy = '/usr/bin/s390x-linux-gnu-objcopy' ld = '/usr/bin/s390x-linux-gnu-ld' exe_wrapper = ['qemu-s390x-static'] [properties] c_args = ['-Wextra', '-Werror'] cpp_args = ['-Wextra', '-Werror'] [host_machine] system = 'linux' cpu_family = 's390x' cpu = 's390x' endian = 'big' simde-0.7.2/docker/cross-files/s390x-gcc-9.cross000066400000000000000000000006721400333146700211700ustar00rootroot00000000000000[binaries] c = '/usr/bin/s390x-linux-gnu-gcc-9' cpp = '/usr/bin/s390x-linux-gnu-g++-9' ar = '/usr/bin/s390x-linux-gnu-ar' strip = '/usr/bin/s390x-linux-gnu-strip' objcopy = '/usr/bin/s390x-linux-gnu-objcopy' ld = '/usr/bin/s390x-linux-gnu-ld' exe_wrapper = ['qemu-s390x-static'] [properties] c_args = ['-Wextra', '-Werror'] cpp_args = ['-Wextra', '-Werror'] [host_machine] system = 'linux' cpu_family = 's390x' cpu = 's390x' endian = 'big' simde-0.7.2/docker/cross-files/sleef-gcc-10.cross000066400000000000000000000004241400333146700214430ustar00rootroot00000000000000[binaries] c = '/usr/bin/gcc-10' cpp = '/usr/bin/g++-10' ar = '/usr/bin/ar' strip = '/usr/bin/strip' objcopy = '/usr/bin/objcopy' ld = '/usr/bin/ld' [properties] c_args = ['-Wextra', '-Werror', '-march=native'] cpp_args = ['-Wextra', '-Werror', '-march=native'] sleef = true simde-0.7.2/docker/simde-dev.sh000077500000000000000000000030311400333146700163020ustar00rootroot00000000000000#!/bin/bash -e # See documentation in README.md DOCKER="$(command -v podman || command -v docker)" DOCKER_DIR="$(dirname "${0}")" VOLUME_OPTIONS="" CAPABILITIES="" RELEASE="testing" if [ "${OSTYPE}" == "linux-gnu" ] && [ "$(basename "${DOCKER}")" = "podman" ]; then CAPABILITIES="--cap-add=CAP_SYS_PTRACE"; fi if [ ! -z "${1}" ]; then RELEASE="${1}" fi IMAGE_NAME="simde-dev-${RELEASE}" "${DOCKER}" build --build-arg "release=${RELEASE}" -t "${IMAGE_NAME}" ${CAPABILITIES} -f "${DOCKER_DIR}/Dockerfile" "${DOCKER_DIR}/.." if [ "$(basename "${DOCKER}")" = "podman" ]; then VOLUME_OPTIONS=":z"; elif [ "${OSTYPE}" == "darwin" ]; then VOLUME_OPTIONS=":delegated" fi if [ "${OSTYPE}" == "darwin" ]; then if [ -z "${PERSISTENT_BUILD_DIR}" ]; then PERSISTENT_BUILD_ARGS=""; else PERSISTENT_BUILD_DIR="$(realpath "${PERSISTENT_BUILD_DIR}")" PERSISTENT_BUILD_ARGS="--mount type=bind,source=\"${PERSISTENT_BUILD_DIR}\",target=/opt/simde" fi "${DOCKER}" run --mount type=bind,source="$(realpath "${DOCKER_DIR}/..")",target=/usr/local/src/simde ${PERSISTENT_BUILD_ARGS} ${CAPABILITIES} --rm -it "${IMAGE_NAME}" /bin/bash else if [ -z "${PERSISTENT_BUILD_DIR}" ]; then PERSISTENT_BUILD_ARGS=""; else PERSISTENT_BUILD_DIR="$(realpath "${PERSISTENT_BUILD_DIR}")" PERSISTENT_BUILD_ARGS="-v \"${PERSISTENT_BUILD_DIR}\":/opt/simde${VOLUME_OPTIONS}" fi "${DOCKER}" run -v "$(realpath "${DOCKER_DIR}/..")":/usr/local/src/simde${VOLUME_OPTIONS} ${PERSISTENT_BUILD_ARGS} ${CAPABILITIES} --rm -it "${IMAGE_NAME}" /bin/bash fi simde-0.7.2/meson.build000066400000000000000000000105071400333146700147670ustar00rootroot00000000000000project('SIMDe', 'c', 'cpp', default_options: ['c_std=c99'], license: 'MIT', version: '0.7.2') # also update SIMDE_VESION_* in simde/simde-common.h cc = meson.get_compiler('c') cxx = meson.get_compiler('cpp') simde_neon_families = [ 'aba', 'abd', 'abdl', 'abs', 'add', 'addl', 'addlv', 'addl_high', 'addv', 'addw', 'addw_high', 'and', 'bic', 'bsl', 'cagt', 'ceq', 'ceqz', 'cge', 'cgez', 'cgt', 'cgtz', 'cle', 'cls', 'clez', 'clt', 'cltz', 'clz', 'cnt', 'cvt', 'combine', 'create', 'dot', 'dot_lane', 'dup_n', 'dup_lane', 'eor', 'ext', 'get_high', 'get_lane', 'get_low', 'hadd', 'hsub', 'ld1', 'ld3', 'ld4', 'max', 'maxnm', 'maxv', 'min', 'minnm', 'minv', 'mla', 'mla_n', 'mlal', 'mlal_high', 'mlal_n', 'mls', 'mlsl', 'mlsl_high', 'mlsl_n', 'movl', 'movl_high', 'movn', 'movn_high', 'mul', 'mul_lane', 'mul_n', 'mull', 'mull_high', 'mull_n', 'mvn', 'neg', 'orn', 'orr', 'padal', 'padd', 'paddl', 'pmax', 'pmin', 'qadd', 'qabs', 'qdmulh', 'qdmull', 'qrdmulh', 'qrdmulh_n', 'qmovn', 'qmovn_high', 'qmovun', 'qneg', 'qshl', 'qsub', 'qtbl', 'qtbx', 'rbit', 'reinterpret', 'rev16', 'rev32', 'rev64', 'rhadd', 'rnd', 'rndi', 'rndm', 'rndn', 'rndp', 'rshl', 'rshr_n', 'rsra_n', 'set_lane', 'shl', 'shl_n', 'shr_n', 'sra_n', 'st1', 'st1_lane', 'st3', 'st4', 'sub', 'subl', 'subw', 'subw_high', 'tbl', 'tbx', 'trn1', 'trn2', 'trn', 'tst', 'uqadd', 'uzp1', 'uzp2', 'uzp', 'zip1', 'zip2', 'zip', ] simde_avx512_families = [ '2intersect', 'abs', 'add', 'adds', 'and', 'andnot', 'avg', 'blend', 'broadcast', 'cast', 'cmp', 'cmpeq', 'cmpge', 'cmpgt', 'cmple', 'cmplt', 'copysign', 'cvt', 'cvts', 'div', 'extract', 'fmadd', 'fmsub', 'fnmadd', 'fnmsub', 'insert', 'kshift', 'load', 'loadu', 'lzcnt', 'madd', 'maddubs', 'max', 'min', 'mov', 'mov_mask', 'movm', 'mul', 'mulhi', 'mulhrs', 'mullo', 'negate', 'or', 'packs', 'packus', 'permutexvar', 'permutex2var', 'sad', 'set', 'set1', 'set4', 'setr', 'setr4', 'setzero', 'setone', 'shuffle', 'sll', 'slli', 'sllv', 'sqrt', 'sra', 'srai', 'srav', 'srl', 'srli', 'srlv', 'store', 'storeu', 'sub', 'subs', 'test', 'unpacklo', 'unpackhi', 'xor', 'xorsign', ] if not meson.is_subproject() and get_option('tests') subdir('test') endif simde_neon_family_headers = ['simde/arm/neon/types.h'] foreach neon_family : simde_neon_families simde_neon_family_headers += 'simde/arm/neon/' + neon_family + '.h' endforeach simde_avx512_family_headers = ['simde/x86/avx512/types.h'] foreach avx512_family : simde_avx512_families simde_avx512_family_headers += 'simde/x86/avx512/' + avx512_family + '.h' endforeach if not meson.is_subproject() install_headers( files([ 'simde/check.h', 'simde/debug-trap.h', 'simde/hedley.h', 'simde/simde-align.h', 'simde/simde-arch.h', 'simde/simde-common.h', 'simde/simde-constify.h', 'simde/simde-detect-clang.h', 'simde/simde-diagnostic.h', 'simde/simde-features.h', 'simde/simde-math.h', 'simde/simde-complex.h', ]), subdir: 'simde') install_headers( files([ 'simde/x86/avx2.h', 'simde/x86/avx.h', 'simde/x86/clmul.h', 'simde/x86/fma.h', 'simde/x86/gfni.h', 'simde/x86/mmx.h', 'simde/x86/sse2.h', 'simde/x86/sse3.h', 'simde/x86/sse4.1.h', 'simde/x86/sse4.2.h', 'simde/x86/sse.h', 'simde/x86/ssse3.h', 'simde/x86/svml.h', 'simde/x86/xop.h', ]), subdir: 'simde/x86') install_headers( files(simde_avx512_family_headers), subdir: 'simde/x86/avx512') install_headers( files([ 'simde/arm/neon.h', ]), subdir: 'simde/arm') install_headers( files(simde_neon_family_headers), subdir: 'simde/arm/neon') import('pkgconfig').generate( version : meson.project_version(), name : 'SIMDe', filebase : 'simde', description : 'Portable SIMD wrapper library') endif simde_dep = declare_dependency( include_directories : include_directories('.'), version : meson.project_version()) simde-0.7.2/meson_options.txt000066400000000000000000000003431400333146700162570ustar00rootroot00000000000000option('tests', type : 'boolean', value : true, description : 'Enable dependencies required for testing') option('sleef', type : 'feature', value : 'disabled', description : 'Enable SLEEF integration') simde-0.7.2/netlify.toml000066400000000000000000000015121400333146700151700ustar00rootroot00000000000000[build] publish = 'web' command = 'mkdir -p web/amalgamated/x86 && (cd simde && for header in x86/*.h; do ../amalgamate.py "$header" > ../web/amalgamated/"$header"; done); mkdir -p web/amalgamated/arm && (cd simde/arm && ../../amalgamate.py neon.h > ../../web/amalgamated/arm/neon.h)' [[headers]] for = "/amalgamated/*/*.h" [headers.values] Access-Control-Allow-Origin = "*" [[redirects]] from = "/godbolt/simple" to = "https://godbolt.org/z/Y3XexA" status = 302 force = true [[redirects]] from = "/godbolt/demo" to = "https://godbolt.org/z/keBtF9" status = 302 force = true [[redirects]] from = "/godbolt/arm/neon/simple" to = "https://godbolt.org/z/6rdp4L" status = 302 force = true [[redirects]] from = "/godbolt/arm/neon/demo" to = "https://godbolt.org/z/sHVz49" status = 302 force = true simde-0.7.2/simde/000077500000000000000000000000001400333146700137235ustar00rootroot00000000000000simde-0.7.2/simde/arm/000077500000000000000000000000001400333146700145025ustar00rootroot00000000000000simde-0.7.2/simde/arm/neon.h000066400000000000000000000105331400333146700156140ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_H) #define SIMDE_ARM_NEON_H #include "neon/types.h" #include "neon/aba.h" #include "neon/abd.h" #include "neon/abdl.h" #include "neon/abs.h" #include "neon/add.h" #include "neon/addl.h" #include "neon/addlv.h" #include "neon/addl_high.h" #include "neon/addv.h" #include "neon/addw.h" #include "neon/addw_high.h" #include "neon/and.h" #include "neon/bic.h" #include "neon/bsl.h" #include "neon/cagt.h" #include "neon/ceq.h" #include "neon/ceqz.h" #include "neon/cge.h" #include "neon/cgez.h" #include "neon/cgt.h" #include "neon/cgtz.h" #include "neon/cle.h" #include "neon/clez.h" #include "neon/cls.h" #include "neon/clt.h" #include "neon/cltz.h" #include "neon/clz.h" #include "neon/cnt.h" #include "neon/cvt.h" #include "neon/combine.h" #include "neon/create.h" #include "neon/dot.h" #include "neon/dot_lane.h" #include "neon/dup_lane.h" #include "neon/dup_n.h" #include "neon/eor.h" #include "neon/ext.h" #include "neon/get_high.h" #include "neon/get_lane.h" #include "neon/get_low.h" #include "neon/hadd.h" #include "neon/hsub.h" #include "neon/ld1.h" #include "neon/ld3.h" #include "neon/ld4.h" #include "neon/max.h" #include "neon/maxnm.h" #include "neon/maxv.h" #include "neon/min.h" #include "neon/minnm.h" #include "neon/minv.h" #include "neon/mla.h" #include "neon/mla_n.h" #include "neon/mlal.h" #include "neon/mlal_high.h" #include "neon/mlal_n.h" #include "neon/mls.h" #include "neon/mlsl.h" #include "neon/mlsl_high.h" #include "neon/mlsl_n.h" #include "neon/movl.h" #include "neon/movl_high.h" #include "neon/movn.h" #include "neon/movn_high.h" #include "neon/mul.h" #include "neon/mul_lane.h" #include "neon/mul_n.h" #include "neon/mull.h" #include "neon/mull_high.h" #include "neon/mull_n.h" #include "neon/mvn.h" #include "neon/neg.h" #include "neon/orn.h" #include "neon/orr.h" #include "neon/padal.h" #include "neon/padd.h" #include "neon/paddl.h" #include "neon/pmax.h" #include "neon/pmin.h" #include "neon/qabs.h" #include "neon/qadd.h" #include "neon/qdmulh.h" #include "neon/qdmull.h" #include "neon/qrdmulh.h" #include "neon/qrdmulh_n.h" #include "neon/qmovn.h" #include "neon/qmovun.h" #include "neon/qmovn_high.h" #include "neon/qneg.h" #include "neon/qsub.h" #include "neon/qshl.h" #include "neon/qtbl.h" #include "neon/qtbx.h" #include "neon/rbit.h" #include "neon/reinterpret.h" #include "neon/rev16.h" #include "neon/rev32.h" #include "neon/rev64.h" #include "neon/rhadd.h" #include "neon/rnd.h" #include "neon/rndm.h" #include "neon/rndi.h" #include "neon/rndn.h" #include "neon/rndp.h" #include "neon/rshl.h" #include "neon/rshr_n.h" #include "neon/rsra_n.h" #include "neon/set_lane.h" #include "neon/shl.h" #include "neon/shl_n.h" #include "neon/shr_n.h" #include "neon/sra_n.h" #include "neon/st1.h" #include "neon/st1_lane.h" #include "neon/st3.h" #include "neon/st4.h" #include "neon/sub.h" #include "neon/subl.h" #include "neon/subw.h" #include "neon/subw_high.h" #include "neon/tbl.h" #include "neon/tbx.h" #include "neon/trn.h" #include "neon/trn1.h" #include "neon/trn2.h" #include "neon/tst.h" #include "neon/uqadd.h" #include "neon/uzp.h" #include "neon/uzp1.h" #include "neon/uzp2.h" #include "neon/zip.h" #include "neon/zip1.h" #include "neon/zip2.h" #endif /* SIMDE_ARM_NEON_H */ simde-0.7.2/simde/arm/neon/000077500000000000000000000000001400333146700154415ustar00rootroot00000000000000simde-0.7.2/simde/arm/neon/aba.h000066400000000000000000000142041400333146700163360ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_ABA_H) #define SIMDE_ARM_NEON_ABA_H #include "abd.h" #include "add.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vaba_s8(simde_int8x8_t a, simde_int8x8_t b, simde_int8x8_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vaba_s8(a, b, c); #else return simde_vadd_s8(simde_vabd_s8(b, c), a); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaba_s8 #define vaba_s8(a, b, c) simde_vaba_s8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vaba_s16(simde_int16x4_t a, simde_int16x4_t b, simde_int16x4_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vaba_s16(a, b, c); #else return simde_vadd_s16(simde_vabd_s16(b, c), a); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaba_s16 #define vaba_s16(a, b, c) simde_vaba_s16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vaba_s32(simde_int32x2_t a, simde_int32x2_t b, simde_int32x2_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vaba_s32(a, b, c); #else return simde_vadd_s32(simde_vabd_s32(b, c), a); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaba_s32 #define vaba_s32(a, b, c) simde_vaba_s32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vaba_u8(simde_uint8x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vaba_u8(a, b, c); #else return simde_vadd_u8(simde_vabd_u8(b, c), a); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaba_u8 #define vaba_u8(a, b, c) simde_vaba_u8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vaba_u16(simde_uint16x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vaba_u16(a, b, c); #else return simde_vadd_u16(simde_vabd_u16(b, c), a); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaba_u16 #define vaba_u16(a, b, c) simde_vaba_u16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vaba_u32(simde_uint32x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vaba_u32(a, b, c); #else return simde_vadd_u32(simde_vabd_u32(b, c), a); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaba_u32 #define vaba_u32(a, b, c) simde_vaba_u32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vabaq_s8(simde_int8x16_t a, simde_int8x16_t b, simde_int8x16_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabaq_s8(a, b, c); #else return simde_vaddq_s8(simde_vabdq_s8(b, c), a); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabaq_s8 #define vabaq_s8(a, b, c) simde_vabaq_s8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vabaq_s16(simde_int16x8_t a, simde_int16x8_t b, simde_int16x8_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabaq_s16(a, b, c); #else return simde_vaddq_s16(simde_vabdq_s16(b, c), a); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabaq_s16 #define vabaq_s16(a, b, c) simde_vabaq_s16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vabaq_s32(simde_int32x4_t a, simde_int32x4_t b, simde_int32x4_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabaq_s32(a, b, c); #else return simde_vaddq_s32(simde_vabdq_s32(b, c), a); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabaq_s32 #define vabaq_s32(a, b, c) simde_vabaq_s32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vabaq_u8(simde_uint8x16_t a, simde_uint8x16_t b, simde_uint8x16_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabaq_u8(a, b, c); #else return simde_vaddq_u8(simde_vabdq_u8(b, c), a); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabaq_u8 #define vabaq_u8(a, b, c) simde_vabaq_u8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vabaq_u16(simde_uint16x8_t a, simde_uint16x8_t b, simde_uint16x8_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabaq_u16(a, b, c); #else return simde_vaddq_u16(simde_vabdq_u16(b, c), a); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabaq_u16 #define vabaq_u16(a, b, c) simde_vabaq_u16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vabaq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabaq_u32(a, b, c); #else return simde_vaddq_u32(simde_vabdq_u32(b, c), a); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabaq_u32 #define vabaq_u32(a, b, c) simde_vabaq_u32((a), (b), (c)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_ABA_H) */ simde-0.7.2/simde/arm/neon/abd.h000066400000000000000000000262711400333146700163500ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_ABD_H) #define SIMDE_ARM_NEON_ABD_H #include "abs.h" #include "subl.h" #include "movn.h" #include "movl.h" #include "reinterpret.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32_t simde_vabds_f32(simde_float32_t a, simde_float32_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vabds_f32(a, b); #else simde_float32_t r = a - b; return r < 0 ? -r : r; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vabds_f32 #define vabds_f32(a, b) simde_vabds_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64_t simde_vabdd_f64(simde_float64_t a, simde_float64_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vabdd_f64(a, b); #else simde_float64_t r = a - b; return r < 0 ? -r : r; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vabdd_f64 #define vabdd_f64(a, b) simde_vabdd_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vabd_f32(simde_float32x2_t a, simde_float32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabd_f32(a, b); #else return simde_vabs_f32(simde_vsub_f32(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabd_f32 #define vabd_f32(a, b) simde_vabd_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vabd_f64(simde_float64x1_t a, simde_float64x1_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vabd_f64(a, b); #else return simde_vabs_f64(simde_vsub_f64(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vabd_f64 #define vabd_f64(a, b) simde_vabd_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vabd_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabd_s8(a, b); #else return simde_vmovn_s16(simde_vabsq_s16(simde_vsubl_s8(a, b))); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabd_s8 #define vabd_s8(a, b) simde_vabd_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vabd_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabd_s16(a, b); #else return simde_vmovn_s32(simde_vabsq_s32(simde_vsubl_s16(a, b))); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabd_s16 #define vabd_s16(a, b) simde_vabd_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vabd_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabd_s32(a, b); #else return simde_vmovn_s64(simde_vabsq_s64(simde_vsubl_s32(a, b))); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabd_s32 #define vabd_s32(a, b) simde_vabd_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vabd_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabd_u8(a, b); #else return simde_vmovn_u16( simde_vreinterpretq_u16_s16( simde_vabsq_s16( simde_vsubq_s16( simde_vreinterpretq_s16_u16(simde_vmovl_u8(a)), simde_vreinterpretq_s16_u16(simde_vmovl_u8(b)))))); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabd_u8 #define vabd_u8(a, b) simde_vabd_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vabd_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabd_u16(a, b); #else return simde_vmovn_u32( simde_vreinterpretq_u32_s32( simde_vabsq_s32( simde_vsubq_s32( simde_vreinterpretq_s32_u32(simde_vmovl_u16(a)), simde_vreinterpretq_s32_u32(simde_vmovl_u16(b)))))); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabd_u16 #define vabd_u16(a, b) simde_vabd_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vabd_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabd_u32(a, b); #else return simde_vmovn_u64( simde_vreinterpretq_u64_s64( simde_vabsq_s64( simde_vsubq_s64( simde_vreinterpretq_s64_u64(simde_vmovl_u32(a)), simde_vreinterpretq_s64_u64(simde_vmovl_u32(b)))))); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabd_u32 #define vabd_u32(a, b) simde_vabd_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vabdq_f32(simde_float32x4_t a, simde_float32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabdq_f32(a, b); #else return simde_vabsq_f32(simde_vsubq_f32(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabdq_f32 #define vabdq_f32(a, b) simde_vabdq_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vabdq_f64(simde_float64x2_t a, simde_float64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vabdq_f64(a, b); #else return simde_vabsq_f64(simde_vsubq_f64(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vabdq_f64 #define vabdq_f64(a, b) simde_vabdq_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vabdq_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabdq_s8(a, b); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { int16_t tmp = HEDLEY_STATIC_CAST(int16_t, a_.values[i]) - HEDLEY_STATIC_CAST(int16_t, b_.values[i]); r_.values[i] = HEDLEY_STATIC_CAST(int8_t, tmp < 0 ? -tmp : tmp); } return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabdq_s8 #define vabdq_s8(a, b) simde_vabdq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vabdq_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabdq_s16(a, b); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { int32_t tmp = HEDLEY_STATIC_CAST(int32_t, a_.values[i]) - HEDLEY_STATIC_CAST(int32_t, b_.values[i]); r_.values[i] = HEDLEY_STATIC_CAST(int16_t, tmp < 0 ? -tmp : tmp); } return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabdq_s16 #define vabdq_s16(a, b) simde_vabdq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vabdq_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabdq_s32(a, b); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { int64_t tmp = HEDLEY_STATIC_CAST(int64_t, a_.values[i]) - HEDLEY_STATIC_CAST(int64_t, b_.values[i]); r_.values[i] = HEDLEY_STATIC_CAST(int32_t, tmp < 0 ? -tmp : tmp); } return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabdq_s32 #define vabdq_s32(a, b) simde_vabdq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vabdq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabdq_u8(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) return vec_absd(a, b); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { int16_t tmp = HEDLEY_STATIC_CAST(int16_t, a_.values[i]) - HEDLEY_STATIC_CAST(int16_t, b_.values[i]); r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, tmp < 0 ? -tmp : tmp); } return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabdq_u8 #define vabdq_u8(a, b) simde_vabdq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vabdq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabdq_u16(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) return vec_absd(a, b); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { int32_t tmp = HEDLEY_STATIC_CAST(int32_t, a_.values[i]) - HEDLEY_STATIC_CAST(int32_t, b_.values[i]); r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, tmp < 0 ? -tmp : tmp); } return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabdq_u16 #define vabdq_u16(a, b) simde_vabdq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vabdq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabdq_u32(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) return vec_absd(a, b); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { int64_t tmp = HEDLEY_STATIC_CAST(int64_t, a_.values[i]) - HEDLEY_STATIC_CAST(int64_t, b_.values[i]); r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, tmp < 0 ? -tmp : tmp); } return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabdq_u32 #define vabdq_u32(a, b) simde_vabdq_u32((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_ABD_H) */ simde-0.7.2/simde/arm/neon/abdl.h000066400000000000000000000102521400333146700165140ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_ABDL_H) #define SIMDE_ARM_NEON_ABDL_H #include "abs.h" #include "subl.h" #include "movl.h" #include "reinterpret.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vabdl_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabdl_s8(a, b); #else return simde_vabsq_s16(simde_vsubl_s8(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabdl_s8 #define vabdl_s8(a, b) simde_vabdl_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vabdl_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabdl_s16(a, b); #else return simde_vabsq_s32(simde_vsubl_s16(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabdl_s16 #define vabdl_s16(a, b) simde_vabdl_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vabdl_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabdl_s32(a, b); #else return simde_vabsq_s64(simde_vsubl_s32(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabdl_s32 #define vabdl_s32(a, b) simde_vabdl_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vabdl_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabdl_u8(a, b); #else return simde_vreinterpretq_u16_s16( simde_vabsq_s16( simde_vsubq_s16( simde_vreinterpretq_s16_u16(simde_vmovl_u8(a)), simde_vreinterpretq_s16_u16(simde_vmovl_u8(b)) ) ) ); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabdl_u8 #define vabdl_u8(a, b) simde_vabdl_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vabdl_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabdl_u16(a, b); #else return simde_vreinterpretq_u32_s32( simde_vabsq_s32( simde_vsubq_s32( simde_vreinterpretq_s32_u32(simde_vmovl_u16(a)), simde_vreinterpretq_s32_u32(simde_vmovl_u16(b)) ) ) ); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabdl_u16 #define vabdl_u16(a, b) simde_vabdl_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vabdl_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabdl_u32(a, b); #else return simde_vreinterpretq_u64_s64( simde_vabsq_s64( simde_vsubq_s64( simde_vreinterpretq_s64_u64(simde_vmovl_u32(a)), simde_vreinterpretq_s64_u64(simde_vmovl_u32(b)) ) ) ); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabdl_u32 #define vabdl_u32(a, b) simde_vabdl_u32((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_ABDL_H) */ simde-0.7.2/simde/arm/neon/abs.h000066400000000000000000000305531400333146700163650ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_ABS_H) #define SIMDE_ARM_NEON_ABS_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES int64_t simde_vabsd_s64(int64_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,1,0)) return vabsd_s64(a); #else return a < 0 ? -a : a; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vabsd_s64 #define vabsd_s64(a) simde_vabsd_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vabs_f32(simde_float32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabs_f32(a); #else simde_float32x2_private r_, a_ = simde_float32x2_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i]; } return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabs_f32 #define vabs_f32(a) simde_vabs_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vabs_f64(simde_float64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vabs_f64(a); #else simde_float64x1_private r_, a_ = simde_float64x1_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i]; } return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vabs_f64 #define vabs_f64(a) simde_vabs_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vabs_s8(simde_int8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabs_s8(a); #elif defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_abs_pi8(a); #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a); #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT8_C(0)); r_.values = (-a_.values & m) | (a_.values & ~m); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i]; } #endif return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabs_s8 #define vabs_s8(a) simde_vabs_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vabs_s16(simde_int16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabs_s16(a); #elif defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_abs_pi16(a); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT16_C(0)); r_.values = (-a_.values & m) | (a_.values & ~m); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i]; } #endif return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabs_s16 #define vabs_s16(a) simde_vabs_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vabs_s32(simde_int32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabs_s32(a); #elif defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_abs_pi32(a); #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT32_C(0)); r_.values = (-a_.values & m) | (a_.values & ~m); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i]; } #endif return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabs_s32 #define vabs_s32(a) simde_vabs_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vabs_s64(simde_int64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vabs_s64(a); #else simde_int64x1_private r_, a_ = simde_int64x1_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT64_C(0)); r_.values = (-a_.values & m) | (a_.values & ~m); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i]; } #endif return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabs_s64 #define vabs_s64(a) simde_vabs_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vabsq_f32(simde_float32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabsq_f32(a); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_abs(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f32x4_abs(a); #elif defined(SIMDE_X86_AVX512F_NATIVE) && \ (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) return _mm512_castps512_ps128(_mm512_abs_ps(_mm512_castps128_ps512(a))); #else simde_float32x4_private r_, a_ = simde_float32x4_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_math_fabsf(a_.values[i]); } return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabsq_f32 #define vabsq_f32(a) simde_vabsq_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vabsq_f64(simde_float64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vabsq_f64(a); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return vec_abs(a); #elif defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) return _mm512_castpd512_pd128(_mm512_abs_pd(_mm512_castpd128_pd512(a))); #else simde_float64x2_private r_, a_ = simde_float64x2_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_math_fabs(a_.values[i]); } return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vabsq_f64 #define vabsq_f64(a) simde_vabsq_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vabsq_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabsq_s8(a); #elif defined(SIMDE_X86_SSSE3_NATIVE) return _mm_abs_epi8(a); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_abs(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i8x16_abs(a); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a); #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT8_C(0)); r_.values = (-a_.values & m) | (a_.values & ~m); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i]; } #endif return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabsq_s8 #define vabsq_s8(a) simde_vabsq_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vabsq_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabsq_s16(a); #elif defined(SIMDE_X86_SSSE3_NATIVE) return _mm_abs_epi16(a); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_abs(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i16x8_abs(a); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a); #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT16_C(0)); r_.values = (-a_.values & m) | (a_.values & ~m); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i]; } #endif return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabsq_s16 #define vabsq_s16(a) simde_vabsq_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vabsq_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vabsq_s32(a); #elif defined(SIMDE_X86_SSSE3_NATIVE) return _mm_abs_epi32(a); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_abs(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i32x4_abs(a); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a); #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT32_C(0)); r_.values = (-a_.values & m) | (a_.values & ~m); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i]; } #endif return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabsq_s32 #define vabsq_s32(a) simde_vabsq_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vabsq_s64(simde_int64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vabsq_s64(a); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbslq_s64(vreinterpretq_u64_s64(vshrq_n_s64(a, 63)), vsubq_s64(vdupq_n_s64(0), a), a); #elif defined(SIMDE_POWER_ALTIVEC_P64_NATIVE) && !defined(HEDLEY_IBM_VERSION) return vec_abs(a); #else simde_int64x2_private r_, a_ = simde_int64x2_to_private(a); #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT64_C(0)); r_.values = (-a_.values & m) | (a_.values & ~m); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i]; } #endif return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vabsq_s64 #define vabsq_s64(a) simde_vabsq_s64(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_ABS_H) */ simde-0.7.2/simde/arm/neon/add.h000066400000000000000000000452041400333146700163470ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_ADD_H) #define SIMDE_ARM_NEON_ADD_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES int64_t simde_vaddd_s64(int64_t a, int64_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vaddd_s64(a, b); #else return a + b; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddd_s64 #define vaddd_s64(a, b) simde_vaddd_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES uint64_t simde_vaddd_u64(uint64_t a, uint64_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vaddd_u64(a, b); #else return a + b; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddd_u64 #define vaddd_u64(a, b) simde_vaddd_u64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vadd_f32(simde_float32x2_t a, simde_float32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vadd_f32(a, b); #else simde_float32x2_private r_, a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values + b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i]; } #endif return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vadd_f32 #define vadd_f32(a, b) simde_vadd_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vadd_f64(simde_float64x1_t a, simde_float64x1_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vadd_f64(a, b); #else simde_float64x1_private r_, a_ = simde_float64x1_to_private(a), b_ = simde_float64x1_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values + b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i]; } #endif return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vadd_f64 #define vadd_f64(a, b) simde_vadd_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vadd_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vadd_s8(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_add_pi8(a, b); #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values + b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i]; } #endif return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vadd_s8 #define vadd_s8(a, b) simde_vadd_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vadd_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vadd_s16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_add_pi16(a, b); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values + b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i]; } #endif return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vadd_s16 #define vadd_s16(a, b) simde_vadd_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vadd_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vadd_s32(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_add_pi32(a, b); #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values + b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i]; } #endif return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vadd_s32 #define vadd_s32(a, b) simde_vadd_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vadd_s64(simde_int64x1_t a, simde_int64x1_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vadd_s64(a, b); #else simde_int64x1_private r_, a_ = simde_int64x1_to_private(a), b_ = simde_int64x1_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values + b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i]; } #endif return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vadd_s64 #define vadd_s64(a, b) simde_vadd_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vadd_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vadd_u8(a, b); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values + b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i]; } #endif return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vadd_u8 #define vadd_u8(a, b) simde_vadd_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vadd_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vadd_u16(a, b); #else simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a), b_ = simde_uint16x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values + b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i]; } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vadd_u16 #define vadd_u16(a, b) simde_vadd_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vadd_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vadd_u32(a, b); #else simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a), b_ = simde_uint32x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values + b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i]; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vadd_u32 #define vadd_u32(a, b) simde_vadd_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vadd_u64(simde_uint64x1_t a, simde_uint64x1_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vadd_u64(a, b); #else simde_uint64x1_private r_, a_ = simde_uint64x1_to_private(a), b_ = simde_uint64x1_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values + b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i]; } #endif return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vadd_u64 #define vadd_u64(a, b) simde_vadd_u64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vaddq_f32(simde_float32x4_t a, simde_float32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vaddq_f32(a, b); #elif defined(SIMDE_X86_SSE_NATIVE) return _mm_add_ps(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_POWER_ALTIVEC_VECTOR(float) a_ , b_, r_; a_ = a; b_ = b; r_ = vec_add(a_, b_); return r_; #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f32x4_add(a, b); #else simde_float32x4_private r_, a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values + b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i]; } #endif return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddq_f32 #define vaddq_f32(a, b) simde_vaddq_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vaddq_f64(simde_float64x2_t a, simde_float64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vaddq_f64(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_add_pd(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return vec_add(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_add(a, b); #else simde_float64x2_private r_, a_ = simde_float64x2_to_private(a), b_ = simde_float64x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values + b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i]; } #endif return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddq_f64 #define vaddq_f64(a, b) simde_vaddq_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vaddq_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vaddq_s8(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_add_epi8(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_add(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i8x16_add(a, b); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values + b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i]; } #endif return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddq_s8 #define vaddq_s8(a, b) simde_vaddq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vaddq_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vaddq_s16(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_add_epi16(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_add(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i16x8_add(a, b); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values + b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i]; } #endif return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddq_s16 #define vaddq_s16(a, b) simde_vaddq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vaddq_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vaddq_s32(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_add_epi32(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_add(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i32x4_add(a, b); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values + b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i]; } #endif return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddq_s32 #define vaddq_s32(a, b) simde_vaddq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vaddq_s64(simde_int64x2_t a, simde_int64x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vaddq_s64(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_add_epi64(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return vec_add(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i64x2_add(a, b); #else simde_int64x2_private r_, a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values + b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i]; } #endif return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddq_s64 #define vaddq_s64(a, b) simde_vaddq_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vaddq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vaddq_u8(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_add(a, b); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values + b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i]; } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddq_u8 #define vaddq_u8(a, b) simde_vaddq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vaddq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vaddq_u16(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_add(a, b); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values + b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i]; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddq_u16 #define vaddq_u16(a, b) simde_vaddq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vaddq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vaddq_u32(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_add(a, b); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values + b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i]; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddq_u32 #define vaddq_u32(a, b) simde_vaddq_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vaddq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vaddq_u64(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return vec_add(a, b); #else simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a), b_ = simde_uint64x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values + b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i]; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddq_u64 #define vaddq_u64(a, b) simde_vaddq_u64((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_ADD_H) */ simde-0.7.2/simde/arm/neon/addl.h000066400000000000000000000075101400333146700165210ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_ADDL_H) #define SIMDE_ARM_NEON_ADDL_H #include "add.h" #include "movl.h" #include "movl_high.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vaddl_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vaddl_s8(a, b); #else return simde_vaddq_s16(simde_vmovl_s8(a), simde_vmovl_s8(b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddl_s8 #define vaddl_s8(a, b) simde_vaddl_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vaddl_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vaddl_s16(a, b); #else return simde_vaddq_s32(simde_vmovl_s16(a), simde_vmovl_s16(b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddl_s16 #define vaddl_s16(a, b) simde_vaddl_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vaddl_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vaddl_s32(a, b); #else return simde_vaddq_s64(simde_vmovl_s32(a), simde_vmovl_s32(b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddl_s32 #define vaddl_s32(a, b) simde_vaddl_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vaddl_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vaddl_u8(a, b); #else return simde_vaddq_u16(simde_vmovl_u8(a), simde_vmovl_u8(b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddl_u8 #define vaddl_u8(a, b) simde_vaddl_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vaddl_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vaddl_u16(a, b); #else return simde_vaddq_u32(simde_vmovl_u16(a), simde_vmovl_u16(b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddl_u16 #define vaddl_u16(a, b) simde_vaddl_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vaddl_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vaddl_u32(a, b); #else return simde_vaddq_u64(simde_vmovl_u32(a), simde_vmovl_u32(b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddl_u32 #define vaddl_u32(a, b) simde_vaddl_u32((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_ADDL_H) */ simde-0.7.2/simde/arm/neon/addl_high.h000066400000000000000000000100551400333146700175160ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_ADDL_HIGH_H) #define SIMDE_ARM_NEON_ADDL_HIGH_H #include "add.h" #include "movl.h" #include "movl_high.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vaddl_high_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vaddl_high_s8(a, b); #else return simde_vaddq_s16(simde_vmovl_high_s8(a), simde_vmovl_high_s8(b)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddl_high_s8 #define vaddl_high_s8(a, b) simde_vaddl_high_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vaddl_high_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vaddl_high_s16(a, b); #else return simde_vaddq_s32(simde_vmovl_high_s16(a), simde_vmovl_high_s16(b)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddl_high_s16 #define vaddl_high_s16(a, b) simde_vaddl_high_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vaddl_high_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vaddl_high_s32(a, b); #else return simde_vaddq_s64(simde_vmovl_high_s32(a), simde_vmovl_high_s32(b)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddl_high_s32 #define vaddl_high_s32(a, b) simde_vaddl_high_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vaddl_high_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vaddl_high_u8(a, b); #else return simde_vaddq_u16(simde_vmovl_high_u8(a), simde_vmovl_high_u8(b)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddl_high_u8 #define vaddl_high_u8(a, b) simde_vaddl_high_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vaddl_high_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vaddl_high_u16(a, b); #else return simde_vaddq_u32(simde_vmovl_high_u16(a), simde_vmovl_high_u16(b)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddl_high_u16 #define vaddl_high_u16(a, b) simde_vaddl_high_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vaddl_high_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vaddl_high_u32(a, b); #else return simde_vaddq_u64(simde_vmovl_high_u32(a), simde_vmovl_high_u32(b)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddl_high_u32 #define vaddl_high_u32(a, b) simde_vaddl_high_u32((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_ADDL_HIGH_H) */ simde-0.7.2/simde/arm/neon/addlv.h000066400000000000000000000203171400333146700167070ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_ADDLV_H) #define SIMDE_ARM_NEON_ADDLV_H #include "types.h" #include "movl.h" #include "addv.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES int16_t simde_vaddlv_s8(simde_int8x8_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vaddlv_s8(a); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vaddvq_s16(simde_vmovl_s8(a)); #else simde_int8x8_private a_ = simde_int8x8_to_private(a); int16_t r = 0; SIMDE_VECTORIZE_REDUCTION(+:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r += a_.values[i]; } return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddlv_s8 #define vaddlv_s8(a) simde_vaddlv_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_vaddlv_s16(simde_int16x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vaddlv_s16(a); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vaddvq_s32(simde_vmovl_s16(a)); #else simde_int16x4_private a_ = simde_int16x4_to_private(a); int32_t r = 0; SIMDE_VECTORIZE_REDUCTION(+:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r += a_.values[i]; } return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddlv_s16 #define vaddlv_s16(a) simde_vaddlv_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES int64_t simde_vaddlv_s32(simde_int32x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vaddlv_s32(a); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vaddvq_s64(simde_vmovl_s32(a)); #else simde_int32x2_private a_ = simde_int32x2_to_private(a); int64_t r = 0; SIMDE_VECTORIZE_REDUCTION(+:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r += a_.values[i]; } return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddlv_s32 #define vaddlv_s32(a) simde_vaddlv_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES uint16_t simde_vaddlv_u8(simde_uint8x8_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vaddlv_u8(a); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vaddvq_u16(simde_vmovl_u8(a)); #else simde_uint8x8_private a_ = simde_uint8x8_to_private(a); uint16_t r = 0; SIMDE_VECTORIZE_REDUCTION(+:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r += a_.values[i]; } return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddlv_u8 #define vaddlv_u8(a) simde_vaddlv_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES uint32_t simde_vaddlv_u16(simde_uint16x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vaddlv_u16(a); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vaddvq_u32(simde_vmovl_u16(a)); #else simde_uint16x4_private a_ = simde_uint16x4_to_private(a); uint32_t r = 0; SIMDE_VECTORIZE_REDUCTION(+:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r += a_.values[i]; } return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddlv_u16 #define vaddlv_u16(a) simde_vaddlv_u16(a) #endif SIMDE_FUNCTION_ATTRIBUTES uint64_t simde_vaddlv_u32(simde_uint32x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vaddlv_u32(a); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vaddvq_u64(simde_vmovl_u32(a)); #else simde_uint32x2_private a_ = simde_uint32x2_to_private(a); uint64_t r = 0; SIMDE_VECTORIZE_REDUCTION(+:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r += a_.values[i]; } return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddlv_u32 #define vaddlv_u32(a) simde_vaddlv_u32(a) #endif SIMDE_FUNCTION_ATTRIBUTES int16_t simde_vaddlvq_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vaddlvq_s8(a); #else simde_int8x16_private a_ = simde_int8x16_to_private(a); int16_t r = 0; SIMDE_VECTORIZE_REDUCTION(+:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r += a_.values[i]; } return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddlvq_s8 #define vaddlvq_s8(a) simde_vaddlvq_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_vaddlvq_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vaddlvq_s16(a); #else simde_int16x8_private a_ = simde_int16x8_to_private(a); int32_t r = 0; SIMDE_VECTORIZE_REDUCTION(+:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r += a_.values[i]; } return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddlvq_s16 #define vaddlvq_s16(a) simde_vaddlvq_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES int64_t simde_vaddlvq_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vaddlvq_s32(a); #else simde_int32x4_private a_ = simde_int32x4_to_private(a); int64_t r = 0; SIMDE_VECTORIZE_REDUCTION(+:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r += a_.values[i]; } return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddlvq_s32 #define vaddlvq_s32(a) simde_vaddlvq_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES uint16_t simde_vaddlvq_u8(simde_uint8x16_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vaddlvq_u8(a); #else simde_uint8x16_private a_ = simde_uint8x16_to_private(a); uint16_t r = 0; SIMDE_VECTORIZE_REDUCTION(+:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r += a_.values[i]; } return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddlvq_u8 #define vaddlvq_u8(a) simde_vaddlvq_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES uint32_t simde_vaddlvq_u16(simde_uint16x8_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vaddlvq_u16(a); #else simde_uint16x8_private a_ = simde_uint16x8_to_private(a); uint32_t r = 0; SIMDE_VECTORIZE_REDUCTION(+:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r += a_.values[i]; } return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddlvq_u16 #define vaddlvq_u16(a) simde_vaddlvq_u16(a) #endif SIMDE_FUNCTION_ATTRIBUTES uint64_t simde_vaddlvq_u32(simde_uint32x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vaddlvq_u32(a); #else simde_uint32x4_private a_ = simde_uint32x4_to_private(a); uint64_t r = 0; SIMDE_VECTORIZE_REDUCTION(+:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r += a_.values[i]; } return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddlvq_u32 #define vaddlvq_u32(a) simde_vaddlvq_u32(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_ADDLV_H) */ simde-0.7.2/simde/arm/neon/addv.h000066400000000000000000000243541400333146700165400ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_ADDV_H) #define SIMDE_ARM_NEON_ADDV_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32_t simde_vaddv_f32(simde_float32x2_t a) { simde_float32_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vaddv_f32(a); #else simde_float32x2_private a_ = simde_float32x2_to_private(a); r = 0; SIMDE_VECTORIZE_REDUCTION(+:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r += a_.values[i]; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddv_f32 #define vaddv_f32(v) simde_vaddv_f32(v) #endif SIMDE_FUNCTION_ATTRIBUTES int8_t simde_vaddv_s8(simde_int8x8_t a) { int8_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vaddv_s8(a); #else simde_int8x8_private a_ = simde_int8x8_to_private(a); r = 0; SIMDE_VECTORIZE_REDUCTION(+:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r += a_.values[i]; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddv_s8 #define vaddv_s8(v) simde_vaddv_s8(v) #endif SIMDE_FUNCTION_ATTRIBUTES int16_t simde_vaddv_s16(simde_int16x4_t a) { int16_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vaddv_s16(a); #else simde_int16x4_private a_ = simde_int16x4_to_private(a); r = 0; SIMDE_VECTORIZE_REDUCTION(+:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r += a_.values[i]; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddv_s16 #define vaddv_s16(v) simde_vaddv_s16(v) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_vaddv_s32(simde_int32x2_t a) { int32_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vaddv_s32(a); #else simde_int32x2_private a_ = simde_int32x2_to_private(a); r = 0; SIMDE_VECTORIZE_REDUCTION(+:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r += a_.values[i]; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddv_s32 #define vaddv_s32(v) simde_vaddv_s32(v) #endif SIMDE_FUNCTION_ATTRIBUTES uint8_t simde_vaddv_u8(simde_uint8x8_t a) { uint8_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vaddv_u8(a); #else simde_uint8x8_private a_ = simde_uint8x8_to_private(a); r = 0; SIMDE_VECTORIZE_REDUCTION(+:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r += a_.values[i]; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddv_u8 #define vaddv_u8(v) simde_vaddv_u8(v) #endif SIMDE_FUNCTION_ATTRIBUTES uint16_t simde_vaddv_u16(simde_uint16x4_t a) { uint16_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vaddv_u16(a); #else simde_uint16x4_private a_ = simde_uint16x4_to_private(a); r = 0; SIMDE_VECTORIZE_REDUCTION(+:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r += a_.values[i]; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddv_u16 #define vaddv_u16(v) simde_vaddv_u16(v) #endif SIMDE_FUNCTION_ATTRIBUTES uint32_t simde_vaddv_u32(simde_uint32x2_t a) { uint32_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vaddv_u32(a); #else simde_uint32x2_private a_ = simde_uint32x2_to_private(a); r = 0; SIMDE_VECTORIZE_REDUCTION(+:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r += a_.values[i]; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddv_u32 #define vaddv_u32(v) simde_vaddv_u32(v) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32_t simde_vaddvq_f32(simde_float32x4_t a) { simde_float32_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vaddvq_f32(a); #else simde_float32x4_private a_ = simde_float32x4_to_private(a); r = 0; SIMDE_VECTORIZE_REDUCTION(+:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r += a_.values[i]; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddvq_f32 #define vaddvq_f32(v) simde_vaddvq_f32(v) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64_t simde_vaddvq_f64(simde_float64x2_t a) { simde_float64_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vaddvq_f64(a); #else simde_float64x2_private a_ = simde_float64x2_to_private(a); r = 0; SIMDE_VECTORIZE_REDUCTION(+:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r += a_.values[i]; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddvq_f64 #define vaddvq_f64(v) simde_vaddvq_f64(v) #endif SIMDE_FUNCTION_ATTRIBUTES int8_t simde_vaddvq_s8(simde_int8x16_t a) { int8_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vaddvq_s8(a); #else simde_int8x16_private a_ = simde_int8x16_to_private(a); r = 0; SIMDE_VECTORIZE_REDUCTION(+:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r += a_.values[i]; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddvq_s8 #define vaddvq_s8(v) simde_vaddvq_s8(v) #endif SIMDE_FUNCTION_ATTRIBUTES int16_t simde_vaddvq_s16(simde_int16x8_t a) { int16_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vaddvq_s16(a); #else simde_int16x8_private a_ = simde_int16x8_to_private(a); r = 0; SIMDE_VECTORIZE_REDUCTION(+:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r += a_.values[i]; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddvq_s16 #define vaddvq_s16(v) simde_vaddvq_s16(v) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_vaddvq_s32(simde_int32x4_t a) { int32_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vaddvq_s32(a); #else simde_int32x4_private a_ = simde_int32x4_to_private(a); r = 0; SIMDE_VECTORIZE_REDUCTION(+:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r += a_.values[i]; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddvq_s32 #define vaddvq_s32(v) simde_vaddvq_s32(v) #endif SIMDE_FUNCTION_ATTRIBUTES int64_t simde_vaddvq_s64(simde_int64x2_t a) { int64_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vaddvq_s64(a); #else simde_int64x2_private a_ = simde_int64x2_to_private(a); r = 0; SIMDE_VECTORIZE_REDUCTION(+:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r += a_.values[i]; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddvq_s64 #define vaddvq_s64(v) simde_vaddvq_s64(v) #endif SIMDE_FUNCTION_ATTRIBUTES uint8_t simde_vaddvq_u8(simde_uint8x16_t a) { uint8_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vaddvq_u8(a); #else simde_uint8x16_private a_ = simde_uint8x16_to_private(a); r = 0; SIMDE_VECTORIZE_REDUCTION(+:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r += a_.values[i]; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddvq_u8 #define vaddvq_u8(v) simde_vaddvq_u8(v) #endif SIMDE_FUNCTION_ATTRIBUTES uint16_t simde_vaddvq_u16(simde_uint16x8_t a) { uint16_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vaddvq_u16(a); #else simde_uint16x8_private a_ = simde_uint16x8_to_private(a); r = 0; SIMDE_VECTORIZE_REDUCTION(+:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r += a_.values[i]; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddvq_u16 #define vaddvq_u16(v) simde_vaddvq_u16(v) #endif SIMDE_FUNCTION_ATTRIBUTES uint32_t simde_vaddvq_u32(simde_uint32x4_t a) { uint32_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vaddvq_u32(a); #else simde_uint32x4_private a_ = simde_uint32x4_to_private(a); r = 0; SIMDE_VECTORIZE_REDUCTION(+:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r += a_.values[i]; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddvq_u32 #define vaddvq_u32(v) simde_vaddvq_u32(v) #endif SIMDE_FUNCTION_ATTRIBUTES uint64_t simde_vaddvq_u64(simde_uint64x2_t a) { uint64_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vaddvq_u64(a); #else simde_uint64x2_private a_ = simde_uint64x2_to_private(a); r = 0; SIMDE_VECTORIZE_REDUCTION(+:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r += a_.values[i]; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddvq_u64 #define vaddvq_u64(v) simde_vaddvq_u64(v) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_ADDV_H) */ simde-0.7.2/simde/arm/neon/addw.h000066400000000000000000000165151400333146700165410ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_ADDW_H) #define SIMDE_ARM_NEON_ADDW_H #include "types.h" #include "add.h" #include "movl.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vaddw_s8(simde_int16x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vaddw_s8(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vaddq_s16(a, simde_vmovl_s8(b)); #else simde_int16x8_private r_; simde_int16x8_private a_ = simde_int16x8_to_private(a); simde_int8x8_private b_ = simde_int8x8_to_private(b); #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.values, b_.values); r_.values += a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i]; } #endif return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddw_s8 #define vaddw_s8(a, b) simde_vaddw_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vaddw_s16(simde_int32x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vaddw_s16(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vaddq_s32(a, simde_vmovl_s16(b)); #else simde_int32x4_private r_; simde_int32x4_private a_ = simde_int32x4_to_private(a); simde_int16x4_private b_ = simde_int16x4_to_private(b); #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.values, b_.values); r_.values += a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i]; } #endif return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddw_s16 #define vaddw_s16(a, b) simde_vaddw_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vaddw_s32(simde_int64x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vaddw_s32(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vaddq_s64(a, simde_vmovl_s32(b)); #else simde_int64x2_private r_; simde_int64x2_private a_ = simde_int64x2_to_private(a); simde_int32x2_private b_ = simde_int32x2_to_private(b); #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.values, b_.values); r_.values += a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i]; } #endif return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddw_s32 #define vaddw_s32(a, b) simde_vaddw_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vaddw_u8(simde_uint16x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vaddw_u8(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vaddq_u16(a, simde_vmovl_u8(b)); #else simde_uint16x8_private r_; simde_uint16x8_private a_ = simde_uint16x8_to_private(a); simde_uint8x8_private b_ = simde_uint8x8_to_private(b); #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.values, b_.values); r_.values += a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i]; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddw_u8 #define vaddw_u8(a, b) simde_vaddw_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vaddw_u16(simde_uint32x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vaddw_u16(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vaddq_u32(a, simde_vmovl_u16(b)); #else simde_uint32x4_private r_; simde_uint32x4_private a_ = simde_uint32x4_to_private(a); simde_uint16x4_private b_ = simde_uint16x4_to_private(b); #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.values, b_.values); r_.values += a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i]; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddw_u16 #define vaddw_u16(a, b) simde_vaddw_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vaddw_u32(simde_uint64x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vaddw_u32(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vaddq_u64(a, simde_vmovl_u32(b)); #else simde_uint64x2_private r_; simde_uint64x2_private a_ = simde_uint64x2_to_private(a); simde_uint32x2_private b_ = simde_uint32x2_to_private(b); #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.values, b_.values); r_.values += a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i]; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vaddw_u32 #define vaddw_u32(a, b) simde_vaddw_u32((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_ADDW_H) */ simde-0.7.2/simde/arm/neon/addw_high.h000066400000000000000000000151041400333146700175310ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_ADDW_HIGH_H) #define SIMDE_ARM_NEON_ADDW_HIGH_H #include "types.h" #include "movl.h" #include "add.h" #include "get_high.h" #include "get_low.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vaddw_high_s8(simde_int16x8_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vaddw_high_s8(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vaddq_s16(a, simde_vmovl_s8(simde_vget_high_s8(b))); #else simde_int16x8_private r_; simde_int16x8_private a_ = simde_int16x8_to_private(a); simde_int8x16_private b_ = simde_int8x16_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; } return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddw_high_s8 #define vaddw_high_s8(a, b) simde_vaddw_high_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vaddw_high_s16(simde_int32x4_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vaddw_high_s16(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vaddq_s32(a, simde_vmovl_s16(simde_vget_high_s16(b))); #else simde_int32x4_private r_; simde_int32x4_private a_ = simde_int32x4_to_private(a); simde_int16x8_private b_ = simde_int16x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; } return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddw_high_s16 #define vaddw_high_s16(a, b) simde_vaddw_high_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vaddw_high_s32(simde_int64x2_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vaddw_high_s32(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vaddq_s64(a, simde_vmovl_s32(simde_vget_high_s32(b))); #else simde_int64x2_private r_; simde_int64x2_private a_ = simde_int64x2_to_private(a); simde_int32x4_private b_ = simde_int32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; } return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddw_high_s32 #define vaddw_high_s32(a, b) simde_vaddw_high_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vaddw_high_u8(simde_uint16x8_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vaddw_high_u8(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vaddq_u16(a, simde_vmovl_u8(simde_vget_high_u8(b))); #else simde_uint16x8_private r_; simde_uint16x8_private a_ = simde_uint16x8_to_private(a); simde_uint8x16_private b_ = simde_uint8x16_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; } return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddw_high_u8 #define vaddw_high_u8(a, b) simde_vaddw_high_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vaddw_high_u16(simde_uint32x4_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vaddw_high_u16(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vaddq_u32(a, simde_vmovl_u16(simde_vget_high_u16(b))); #else simde_uint32x4_private r_; simde_uint32x4_private a_ = simde_uint32x4_to_private(a); simde_uint16x8_private b_ = simde_uint16x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; } return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddw_high_u16 #define vaddw_high_u16(a, b) simde_vaddw_high_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vaddw_high_u32(simde_uint64x2_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vaddw_high_u32(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vaddq_u64(a, simde_vmovl_u32(simde_vget_high_u32(b))); #else simde_uint64x2_private r_; simde_uint64x2_private a_ = simde_uint64x2_to_private(a); simde_uint32x4_private b_ = simde_uint32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; } return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vaddw_high_u32 #define vaddw_high_u32(a, b) simde_vaddw_high_u32((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_ADDW_HIGH_H) */ simde-0.7.2/simde/arm/neon/and.h000066400000000000000000000367261400333146700163720ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_ARM_NEON_AND_H) #define SIMDE_ARM_NEON_AND_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vand_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vand_s8(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_and_si64(a, b); #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values & b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & b_.values[i]; } #endif return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vand_s8 #define vand_s8(a, b) simde_vand_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vand_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vand_s16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_and_si64(a, b); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values & b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & b_.values[i]; } #endif return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vand_s16 #define vand_s16(a, b) simde_vand_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vand_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vand_s32(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_and_si64(a, b); #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values & b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & b_.values[i]; } #endif return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vand_s32 #define vand_s32(a, b) simde_vand_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vand_s64(simde_int64x1_t a, simde_int64x1_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vand_s64(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_and_si64(a, b); #else simde_int64x1_private r_, a_ = simde_int64x1_to_private(a), b_ = simde_int64x1_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values & b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & b_.values[i]; } #endif return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vand_s64 #define vand_s64(a, b) simde_vand_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vand_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vand_u8(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_and_si64(a, b); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values & b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & b_.values[i]; } #endif return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vand_u8 #define vand_u8(a, b) simde_vand_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vand_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vand_u16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_and_si64(a, b); #else simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a), b_ = simde_uint16x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values & b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & b_.values[i]; } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vand_u16 #define vand_u16(a, b) simde_vand_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vand_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vand_u32(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_and_si64(a, b); #else simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a), b_ = simde_uint32x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values & b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & b_.values[i]; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vand_u32 #define vand_u32(a, b) simde_vand_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vand_u64(simde_uint64x1_t a, simde_uint64x1_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vand_u64(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_and_si64(a, b); #else simde_uint64x1_private r_, a_ = simde_uint64x1_to_private(a), b_ = simde_uint64x1_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values & b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & b_.values[i]; } #endif return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vand_u64 #define vand_u64(a, b) simde_vand_u64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vandq_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vandq_s8(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_and_si128(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_and(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_and(a, b); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values & b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & b_.values[i]; } #endif return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vandq_s8 #define vandq_s8(a, b) simde_vandq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vandq_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vandq_s16(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_and_si128(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_and(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_and(a, b); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values & b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & b_.values[i]; } #endif return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vandq_s16 #define vandq_s16(a, b) simde_vandq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vandq_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vandq_s32(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_and_si128(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_and(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_and(a, b); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values & b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & b_.values[i]; } #endif return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vandq_s32 #define vandq_s32(a, b) simde_vandq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vandq_s64(simde_int64x2_t a, simde_int64x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vandq_s64(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_and_si128(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return vec_and(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_and(a, b); #else simde_int64x2_private r_, a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values & b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & b_.values[i]; } #endif return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vandq_s64 #define vandq_s64(a, b) simde_vandq_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vandq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vandq_u8(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_and_si128(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_and(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_and(a, b); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values & b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & b_.values[i]; } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vandq_u8 #define vandq_u8(a, b) simde_vandq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vandq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vandq_u16(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_and_si128(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_and(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_and(a, b); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values & b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & b_.values[i]; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vandq_u16 #define vandq_u16(a, b) simde_vandq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vandq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vandq_u32(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_and_si128(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_and(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_and(a, b); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values & b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & b_.values[i]; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vandq_u32 #define vandq_u32(a, b) simde_vandq_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vandq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vandq_u64(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_and_si128(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return vec_and(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_and(a, b); #else simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a), b_ = simde_uint64x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values & b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & b_.values[i]; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vandq_u64 #define vandq_u64(a, b) simde_vandq_u64((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_AND_H) */ simde-0.7.2/simde/arm/neon/bic.h000066400000000000000000000326701400333146700163570ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_BIC_H) #define SIMDE_ARM_NEON_BIC_H #include "dup_n.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vbic_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbic_s8(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_andnot_si64(b, a); #else simde_int8x8_private a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b), r_; for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & ~b_.values[i]; } return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbic_s8 #define vbic_s8(a, b) simde_vbic_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vbic_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbic_s16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_andnot_si64(b, a); #else simde_int16x4_private a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b), r_; for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & ~b_.values[i]; } return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbic_s16 #define vbic_s16(a, b) simde_vbic_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vbic_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbic_s32(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_andnot_si64(b, a); #else simde_int32x2_private a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b), r_; for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & ~b_.values[i]; } return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbic_s32 #define vbic_s32(a, b) simde_vbic_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vbic_s64(simde_int64x1_t a, simde_int64x1_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbic_s64(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_andnot_si64(b, a); #else simde_int64x1_private a_ = simde_int64x1_to_private(a), b_ = simde_int64x1_to_private(b), r_; for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & ~b_.values[i]; } return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbic_s64 #define vbic_s64(a, b) simde_vbic_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vbic_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbic_u8(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_andnot_si64(b, a); #else simde_uint8x8_private a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b), r_; for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & ~b_.values[i]; } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbic_u8 #define vbic_u8(a, b) simde_vbic_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vbic_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbic_u16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_andnot_si64(b, a); #else simde_uint16x4_private a_ = simde_uint16x4_to_private(a), b_ = simde_uint16x4_to_private(b), r_; for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & ~b_.values[i]; } return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbic_u16 #define vbic_u16(a, b) simde_vbic_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vbic_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbic_u32(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_andnot_si64(b, a); #else simde_uint32x2_private a_ = simde_uint32x2_to_private(a), b_ = simde_uint32x2_to_private(b), r_; for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & ~b_.values[i]; } return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbic_u32 #define vbic_u32(a, b) simde_vbic_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vbic_u64(simde_uint64x1_t a, simde_uint64x1_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbic_u64(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_andnot_si64(b, a); #else simde_uint64x1_private a_ = simde_uint64x1_to_private(a), b_ = simde_uint64x1_to_private(b), r_; for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & ~b_.values[i]; } return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbic_u64 #define vbic_u64(a, b) simde_vbic_u64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vbicq_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbicq_s8(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_andnot_si128(b, a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_andnot(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_andc(a, b); #else simde_int8x16_private a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b), r_; for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & ~b_.values[i]; } return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbicq_s8 #define vbicq_s8(a, b) simde_vbicq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vbicq_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbicq_s16(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_andnot_si128(b, a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_andnot(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_andc(a, b); #else simde_int16x8_private a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b), r_; for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & ~b_.values[i]; } return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbicq_s16 #define vbicq_s16(a, b) simde_vbicq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vbicq_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbicq_s32(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_andnot_si128(b, a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_andnot(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_andc(a, b); #else simde_int32x4_private a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b), r_; for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & ~b_.values[i]; } return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbicq_s32 #define vbicq_s32(a, b) simde_vbicq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vbicq_s64(simde_int64x2_t a, simde_int64x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbicq_s64(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_andnot_si128(b, a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_andnot(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return vec_andc(a, b); #else simde_int64x2_private a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(b), r_; for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & ~b_.values[i]; } return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbicq_s64 #define vbicq_s64(a, b) simde_vbicq_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vbicq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbicq_u8(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_andnot_si128(b, a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_andnot(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_andc(a, b); #else simde_uint8x16_private a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b), r_; for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & ~b_.values[i]; } return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbicq_u8 #define vbicq_u8(a, b) simde_vbicq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vbicq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbicq_u16(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_andnot_si128(b, a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_andnot(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_andc(a, b); #else simde_uint16x8_private a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b), r_; for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & ~b_.values[i]; } return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbicq_u16 #define vbicq_u16(a, b) simde_vbicq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vbicq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbicq_u32(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_andnot_si128(b, a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_andnot(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_andc(a, b); #else simde_uint32x4_private a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b), r_; for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & ~b_.values[i]; } return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbicq_u32 #define vbicq_u32(a, b) simde_vbicq_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vbicq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbicq_u64(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_andnot_si128(b, a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_andnot(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return vec_andc(a, b); #else simde_uint64x2_private a_ = simde_uint64x2_to_private(a), b_ = simde_uint64x2_to_private(b), r_; for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] & ~b_.values[i]; } return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbicq_u64 #define vbicq_u64(a, b) simde_vbicq_u64((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_BIC_H) */ simde-0.7.2/simde/arm/neon/bsl.h000066400000000000000000000346571400333146700164110ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_BSL_H) #define SIMDE_ARM_NEON_BSL_H #include "types.h" #include "reinterpret.h" #include "and.h" #include "eor.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vbsl_u8(simde_uint8x8_t a, simde_uint8x8_t b, simde_uint8x8_t c); SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vbslq_u8(simde_uint8x16_t a, simde_uint8x16_t b, simde_uint8x16_t c); SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vbsl_f32(simde_uint32x2_t a, simde_float32x2_t b, simde_float32x2_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbsl_f32(a, b, c); #else simde_uint8x8_t a_ = simde_vreinterpret_u8_u32(a), b_ = simde_vreinterpret_u8_f32(b), c_ = simde_vreinterpret_u8_f32(c); return simde_vreinterpret_f32_u8(simde_vbsl_u8(a_, b_, c_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbsl_f32 #define vbsl_f32(a, b, c) simde_vbsl_f32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vbsl_f64(simde_uint64x1_t a, simde_float64x1_t b, simde_float64x1_t c) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vbsl_f64(a, b, c); #else simde_uint8x8_t a_ = simde_vreinterpret_u8_u64(a), b_ = simde_vreinterpret_u8_f64(b), c_ = simde_vreinterpret_u8_f64(c); return simde_vreinterpret_f64_u8(simde_vbsl_u8(a_, b_, c_)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vbsl_f64 #define vbsl_f64(a, b, c) simde_vbsl_f64((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vbsl_s8(simde_uint8x8_t a, simde_int8x8_t b, simde_int8x8_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbsl_s8(a, b, c); #else simde_uint8x8_t a_ = a, b_ = simde_vreinterpret_u8_s8(b), c_ = simde_vreinterpret_u8_s8(c); return simde_vreinterpret_s8_u8(simde_vbsl_u8(a_, b_, c_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbsl_s8 #define vbsl_s8(a, b, c) simde_vbsl_s8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vbsl_s16(simde_uint16x4_t a, simde_int16x4_t b, simde_int16x4_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbsl_s16(a, b, c); #else simde_uint8x8_t a_ = simde_vreinterpret_u8_u16(a), b_ = simde_vreinterpret_u8_s16(b), c_ = simde_vreinterpret_u8_s16(c); return simde_vreinterpret_s16_u8(simde_vbsl_u8(a_, b_, c_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbsl_s16 #define vbsl_s16(a, b, c) simde_vbsl_s16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vbsl_s32(simde_uint32x2_t a, simde_int32x2_t b, simde_int32x2_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbsl_s32(a, b, c); #else simde_uint8x8_t a_ = simde_vreinterpret_u8_u32(a), b_ = simde_vreinterpret_u8_s32(b), c_ = simde_vreinterpret_u8_s32(c); return simde_vreinterpret_s32_u8(simde_vbsl_u8(a_, b_, c_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbsl_s32 #define vbsl_s32(a, b, c) simde_vbsl_s32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vbsl_s64(simde_uint64x1_t a, simde_int64x1_t b, simde_int64x1_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbsl_s64(a, b, c); #else simde_uint8x8_t a_ = simde_vreinterpret_u8_u64(a), b_ = simde_vreinterpret_u8_s64(b), c_ = simde_vreinterpret_u8_s64(c); return simde_vreinterpret_s64_u8(simde_vbsl_u8(a_, b_, c_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbsl_s64 #define vbsl_s64(a, b, c) simde_vbsl_s64((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vbsl_u8(simde_uint8x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbsl_u8(a, b, c); #else return simde_veor_u8(c, simde_vand_u8(simde_veor_u8(b, c), a)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbsl_u8 #define vbsl_u8(a, b, c) simde_vbsl_u8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vbsl_u16(simde_uint16x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbsl_u16(a, b, c); #else simde_uint8x8_t a_ = simde_vreinterpret_u8_u16(a), b_ = simde_vreinterpret_u8_u16(b), c_ = simde_vreinterpret_u8_u16(c); return simde_vreinterpret_u16_u8(simde_vbsl_u8(a_, b_, c_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbsl_u16 #define vbsl_u16(a, b, c) simde_vbsl_u16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vbsl_u32(simde_uint32x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbsl_u32(a, b, c); #else simde_uint8x8_t a_ = simde_vreinterpret_u8_u32(a), b_ = simde_vreinterpret_u8_u32(b), c_ = simde_vreinterpret_u8_u32(c); return simde_vreinterpret_u32_u8(simde_vbsl_u8(a_, b_, c_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbsl_u32 #define vbsl_u32(a, b, c) simde_vbsl_u32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vbsl_u64(simde_uint64x1_t a, simde_uint64x1_t b, simde_uint64x1_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbsl_u64(a, b, c); #else simde_uint8x8_t a_ = simde_vreinterpret_u8_u64(a), b_ = simde_vreinterpret_u8_u64(b), c_ = simde_vreinterpret_u8_u64(c); return simde_vreinterpret_u64_u8(simde_vbsl_u8(a_, b_, c_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbsl_u64 #define vbsl_u64(a, b, c) simde_vbsl_u64((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vbslq_f32(simde_uint32x4_t a, simde_float32x4_t b, simde_float32x4_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbslq_f32(a, b, c); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_bitselect(b, c, a); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_castsi128_ps(_mm_ternarylogic_epi32(a, _mm_castps_si128(b), _mm_castps_si128(c), 0xca)); #else simde_uint8x16_t a_ = simde_vreinterpretq_u8_u32(a), b_ = simde_vreinterpretq_u8_f32(b), c_ = simde_vreinterpretq_u8_f32(c); return simde_vreinterpretq_f32_u8(simde_vbslq_u8(a_, b_, c_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbslq_f32 #define vbslq_f32(a, b, c) simde_vbslq_f32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vbslq_f64(simde_uint64x2_t a, simde_float64x2_t b, simde_float64x2_t c) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vbslq_f64(a, b, c); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_bitselect(b, c, a); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_castsi128_pd(_mm_ternarylogic_epi32(a, _mm_castpd_si128(b), _mm_castpd_si128(c), 0xca)); #else simde_uint8x16_t a_ = simde_vreinterpretq_u8_u64(a), b_ = simde_vreinterpretq_u8_f64(b), c_ = simde_vreinterpretq_u8_f64(c); return simde_vreinterpretq_f64_u8(simde_vbslq_u8(a_, b_, c_)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vbslq_f64 #define vbslq_f64(a, b, c) simde_vbslq_f64((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vbslq_s8(simde_uint8x16_t a, simde_int8x16_t b, simde_int8x16_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbslq_s8(a, b, c); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_bitselect(b, c, a); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_ternarylogic_epi32(a, b, c, 0xca); #else simde_uint8x16_t a_ = (a), b_ = simde_vreinterpretq_u8_s8(b), c_ = simde_vreinterpretq_u8_s8(c); return simde_vreinterpretq_s8_u8(simde_vbslq_u8(a_, b_, c_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbslq_s8 #define vbslq_s8(a, b, c) simde_vbslq_s8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vbslq_s16(simde_uint16x8_t a, simde_int16x8_t b, simde_int16x8_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbslq_s16(a, b, c); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_bitselect(b, c, a); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_sel(c, b, a); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_ternarylogic_epi32(a, b, c, 0xca); #else simde_uint8x16_t a_ = simde_vreinterpretq_u8_u16(a), b_ = simde_vreinterpretq_u8_s16(b), c_ = simde_vreinterpretq_u8_s16(c); return simde_vreinterpretq_s16_u8(simde_vbslq_u8(a_, b_, c_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbslq_s16 #define vbslq_s16(a, b, c) simde_vbslq_s16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vbslq_s32(simde_uint32x4_t a, simde_int32x4_t b, simde_int32x4_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbslq_s32(a, b, c); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_bitselect(b, c, a); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_ternarylogic_epi32(a, b, c, 0xca); #else simde_uint8x16_t a_ = simde_vreinterpretq_u8_u32(a), b_ = simde_vreinterpretq_u8_s32(b), c_ = simde_vreinterpretq_u8_s32(c); return simde_vreinterpretq_s32_u8(simde_vbslq_u8(a_, b_, c_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbslq_s32 #define vbslq_s32(a, b, c) simde_vbslq_s32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vbslq_s64(simde_uint64x2_t a, simde_int64x2_t b, simde_int64x2_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbslq_s64(a, b, c); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_bitselect(b, c, a); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_ternarylogic_epi32(a, b, c, 0xca); #else simde_uint8x16_t a_ = simde_vreinterpretq_u8_u64(a), b_ = simde_vreinterpretq_u8_s64(b), c_ = simde_vreinterpretq_u8_s64(c); return simde_vreinterpretq_s64_u8(simde_vbslq_u8(a_, b_, c_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbslq_s64 #define vbslq_s64(a, b, c) simde_vbslq_s64((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vbslq_u8(simde_uint8x16_t a, simde_uint8x16_t b, simde_uint8x16_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbslq_u8(a, b, c); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_bitselect(b, c, a); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_sel(c, b, a); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_ternarylogic_epi32(a, b, c, 0xca); #else return simde_veorq_u8(c, simde_vandq_u8(simde_veorq_u8(c, b), a)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbslq_u8 #define vbslq_u8(a, b, c) simde_vbslq_u8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vbslq_u16(simde_uint16x8_t a, simde_uint16x8_t b, simde_uint16x8_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbslq_u16(a, b, c); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_bitselect(b, c, a); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_ternarylogic_epi32(a, b, c, 0xca); #else simde_uint8x16_t a_ = simde_vreinterpretq_u8_u16(a), b_ = simde_vreinterpretq_u8_u16(b), c_ = simde_vreinterpretq_u8_u16(c); return simde_vreinterpretq_u16_u8(simde_vbslq_u8(a_, b_, c_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbslq_u16 #define vbslq_u16(a, b, c) simde_vbslq_u16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vbslq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbslq_u32(a, b, c); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_bitselect(b, c, a); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_ternarylogic_epi32(a, b, c, 0xca); #else simde_uint8x16_t a_ = simde_vreinterpretq_u8_u32(a), b_ = simde_vreinterpretq_u8_u32(b), c_ = simde_vreinterpretq_u8_u32(c); return simde_vreinterpretq_u32_u8(simde_vbslq_u8(a_, b_, c_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbslq_u32 #define vbslq_u32(a, b, c) simde_vbslq_u32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vbslq_u64(simde_uint64x2_t a, simde_uint64x2_t b, simde_uint64x2_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vbslq_u64(a, b, c); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_bitselect(b, c, a); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_ternarylogic_epi32(a, b, c, 0xca); #else simde_uint8x16_t a_ = simde_vreinterpretq_u8_u64(a), b_ = simde_vreinterpretq_u8_u64(b), c_ = simde_vreinterpretq_u8_u64(c); return simde_vreinterpretq_u64_u8(simde_vbslq_u8(a_, b_, c_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vbslq_u64 #define vbslq_u64(a, b, c) simde_vbslq_u64((a), (b), (c)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_BSL_H) */ simde-0.7.2/simde/arm/neon/cagt.h000066400000000000000000000122771400333146700165410ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_CAGT_H) #define SIMDE_ARM_NEON_CAGT_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES uint32_t simde_vcagts_f32(simde_float32_t a, simde_float32_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcagts_f32(a, b); #else return (simde_math_fabsf(a) > simde_math_fabsf(b)) ? ~UINT32_C(0) : UINT32_C(0); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcagts_f32 #define vcagts_f32(a, b) simde_vcagts_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES uint64_t simde_vcagtd_f64(simde_float64_t a, simde_float64_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcagtd_f64(a, b); #else return (simde_math_fabs(a) > simde_math_fabs(b)) ? ~UINT64_C(0) : UINT64_C(0); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcagtd_f64 #define vcagtd_f64(a, b) simde_vcagtd_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vcagt_f32(simde_float32x2_t a, simde_float32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcagt_f32(a, b); #else simde_uint32x2_private r_; simde_float32x2_private a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vcagts_f32(a_.values[i], b_.values[i]); } return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcagt_f32 #define vcagt_f32(a, b) simde_vcagt_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vcagt_f64(simde_float64x1_t a, simde_float64x1_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcagt_f64(a, b); #else simde_uint64x1_private r_; simde_float64x1_private a_ = simde_float64x1_to_private(a), b_ = simde_float64x1_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vcagtd_f64(a_.values[i], b_.values[i]); } return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcagt_f64 #define vcagt_f64(a, b) simde_vcagt_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vcagtq_f32(simde_float32x4_t a, simde_float32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcagtq_f32(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f32x4_gt(wasm_f32x4_abs(a), wasm_f32x4_abs(b)); #else simde_uint32x4_private r_; simde_float32x4_private a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vcagts_f32(a_.values[i], b_.values[i]); } return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcagtq_f32 #define vcagtq_f32(a, b) simde_vcagtq_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vcagtq_f64(simde_float64x2_t a, simde_float64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcagtq_f64(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_gt(wasm_f64x2_abs(a), wasm_f64x2_abs(b)); #else simde_uint64x2_private r_; simde_float64x2_private a_ = simde_float64x2_to_private(a), b_ = simde_float64x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vcagtd_f64(a_.values[i], b_.values[i]); } return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcagtq_f64 #define vcagtq_f64(a, b) simde_vcagtq_f64((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_CAGT_H) */ simde-0.7.2/simde/arm/neon/ceq.h000066400000000000000000000544621400333146700163750ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_CEQ_H) #define SIMDE_ARM_NEON_CEQ_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES uint32_t simde_vceqs_f32(simde_float32_t a, simde_float32_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vceqs_f32(a, b); #else return (a == b) ? ~UINT32_C(0) : UINT32_C(0); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vceqs_f32 #define vceqs_f32(a, b) simde_vceqs_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES uint64_t simde_vceqd_f64(simde_float64_t a, simde_float64_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vceqd_f64(a, b); #else return (a == b) ? ~UINT64_C(0) : UINT64_C(0); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vceqd_f64 #define vceqd_f64(a, b) simde_vceqd_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES uint64_t simde_vceqd_s64(int64_t a, int64_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return HEDLEY_STATIC_CAST(uint64_t, vceqd_s64(a, b)); #else return (a == b) ? ~UINT64_C(0) : UINT64_C(0); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vceqd_s64 #define vceqd_s64(a, b) simde_vceqd_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES uint64_t simde_vceqd_u64(uint64_t a, uint64_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vceqd_u64(a, b); #else return (a == b) ? ~UINT64_C(0) : UINT64_C(0); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vceqd_u64 #define vceqd_u64(a, b) simde_vceqd_u64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vceq_f32(simde_float32x2_t a, simde_float32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vceq_f32(a, b); #else simde_uint32x2_private r_; simde_float32x2_private a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT32_C(0) : UINT32_C(0); } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceq_f32 #define vceq_f32(a, b) simde_vceq_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vceq_f64(simde_float64x1_t a, simde_float64x1_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vceq_f64(a, b); #else simde_uint64x1_private r_; simde_float64x1_private a_ = simde_float64x1_to_private(a), b_ = simde_float64x1_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0); } #endif return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vceq_f64 #define vceq_f64(a, b) simde_vceq_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vceq_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vceq_s8(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_cmpeq_pi8(a, b); #else simde_uint8x8_private r_; simde_int8x8_private a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT8_C(0) : UINT8_C(0); } #endif return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceq_s8 #define vceq_s8(a, b) simde_vceq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vceq_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vceq_s16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_cmpeq_pi16(a, b); #else simde_uint16x4_private r_; simde_int16x4_private a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT16_C(0) : UINT16_C(0); } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceq_s16 #define vceq_s16(a, b) simde_vceq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vceq_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vceq_s32(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_cmpeq_pi32(a, b); #else simde_uint32x2_private r_; simde_int32x2_private a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT32_C(0) : UINT32_C(0); } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceq_s32 #define vceq_s32(a, b) simde_vceq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vceq_s64(simde_int64x1_t a, simde_int64x1_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vceq_s64(a, b); #else simde_uint64x1_private r_; simde_int64x1_private a_ = simde_int64x1_to_private(a), b_ = simde_int64x1_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0); } #endif return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceq_s64 #define vceq_s64(a, b) simde_vceq_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vceq_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vceq_u8(a, b); #else simde_uint8x8_private r_; simde_uint8x8_private a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT8_C(0) : UINT8_C(0); } #endif return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceq_u8 #define vceq_u8(a, b) simde_vceq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vceq_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vceq_u16(a, b); #else simde_uint16x4_private r_; simde_uint16x4_private a_ = simde_uint16x4_to_private(a), b_ = simde_uint16x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT16_C(0) : UINT16_C(0); } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceq_u16 #define vceq_u16(a, b) simde_vceq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vceq_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vceq_u32(a, b); #else simde_uint32x2_private r_; simde_uint32x2_private a_ = simde_uint32x2_to_private(a), b_ = simde_uint32x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT32_C(0) : UINT32_C(0); } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceq_u32 #define vceq_u32(a, b) simde_vceq_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vceq_u64(simde_uint64x1_t a, simde_uint64x1_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vceq_u64(a, b); #else simde_uint64x1_private r_; simde_uint64x1_private a_ = simde_uint64x1_to_private(a), b_ = simde_uint64x1_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0); } #endif return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceq_u64 #define vceq_u64(a, b) simde_vceq_u64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vceqq_f32(simde_float32x4_t a, simde_float32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vceqq_f32(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_castps_si128(_mm_cmpeq_ps(a, b)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpeq(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f32x4_eq(a, b); #else simde_uint32x4_private r_; simde_float32x4_private a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT32_C(0) : UINT32_C(0); } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceqq_f32 #define vceqq_f32(a, b) simde_vceqq_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vceqq_f64(simde_float64x2_t a, simde_float64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vceqq_f64(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_castpd_si128(_mm_cmpeq_pd(a, b)); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpeq(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_eq(a, b); #else simde_uint64x2_private r_; simde_float64x2_private a_ = simde_float64x2_to_private(a), b_ = simde_float64x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0); } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vceqq_f64 #define vceqq_f64(a, b) simde_vceqq_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vceqq_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vceqq_s8(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpeq_epi8(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpeq(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i8x16_eq(a, b); #else simde_uint8x16_private r_; simde_int8x16_private a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT8_C(0) : UINT8_C(0); } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceqq_s8 #define vceqq_s8(a, b) simde_vceqq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vceqq_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vceqq_s16(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpeq_epi16(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpeq(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i16x8_eq(a, b); #else simde_uint16x8_private r_; simde_int16x8_private a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT16_C(0) : UINT16_C(0); } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceqq_s16 #define vceqq_s16(a, b) simde_vceqq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vceqq_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vceqq_s32(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpeq_epi32(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpeq(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i32x4_eq(a, b); #else simde_uint32x4_private r_; simde_int32x4_private a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT32_C(0) : UINT32_C(0); } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceqq_s32 #define vceqq_s32(a, b) simde_vceqq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vceqq_s64(simde_int64x2_t a, simde_int64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vceqq_s64(a, b); #elif defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_cmpeq_epi64(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpeq(a, b)); #else simde_uint64x2_private r_; simde_int64x2_private a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0); } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceqq_s64 #define vceqq_s64(a, b) simde_vceqq_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vceqq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vceqq_u8(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpeq_epi8(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpeq(a, b)); #else simde_uint8x16_private r_; simde_uint8x16_private a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT8_C(0) : UINT8_C(0); } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceqq_u8 #define vceqq_u8(a, b) simde_vceqq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vceqq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vceqq_u16(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpeq_epi16(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpeq(a, b)); #else simde_uint16x8_private r_; simde_uint16x8_private a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT16_C(0) : UINT16_C(0); } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceqq_u16 #define vceqq_u16(a, b) simde_vceqq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vceqq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vceqq_u32(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpeq_epi32(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpeq(a, b)); #else simde_uint32x4_private r_; simde_uint32x4_private a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT32_C(0) : UINT32_C(0); } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceqq_u32 #define vceqq_u32(a, b) simde_vceqq_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vceqq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vceqq_u64(a, b); #elif defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_cmpeq_epi64(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpeq(a, b)); #else simde_uint64x2_private r_; simde_uint64x2_private a_ = simde_uint64x2_to_private(a), b_ = simde_uint64x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0); } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceqq_u64 #define vceqq_u64(a, b) simde_vceqq_u64((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_CEQ_H) */ simde-0.7.2/simde/arm/neon/ceqz.h000066400000000000000000000216071400333146700165620ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_CEQZ_H) #define SIMDE_ARM_NEON_CEQZ_H #include "ceq.h" #include "dup_n.h" #include "types.h" #include "reinterpret.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vceqz_f32(simde_float32x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vceqz_f32(a); #else return simde_vceq_f32(a, simde_vdup_n_f32(0.0f)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceqz_f32 #define vceqz_f32(a) simde_vceqz_f32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vceqz_f64(simde_float64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vceqz_f64(a); #else return simde_vceq_f64(a, simde_vdup_n_f64(0.0)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vceqz_f64 #define vceqz_f64(a) simde_vceqz_f64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vceqz_s8(simde_int8x8_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vceqz_s8(a); #else return simde_vceq_s8(a, simde_vdup_n_s8(0)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceqz_s8 #define vceqz_s8(a) simde_vceqz_s8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vceqz_s16(simde_int16x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vceqz_s16(a); #else return simde_vceq_s16(a, simde_vdup_n_s16(0)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceqz_s16 #define vceqz_s16(a) simde_vceqz_s16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vceqz_s32(simde_int32x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vceqz_s32(a); #else return simde_vceq_s32(a, simde_vdup_n_s32(0)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceqz_s32 #define vceqz_s32(a) simde_vceqz_s32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vceqz_s64(simde_int64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vceqz_s64(a); #else return simde_vceq_s64(a, simde_vdup_n_s64(0)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceqz_s64 #define vceqz_s64(a) simde_vceqz_s64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vceqz_u8(simde_uint8x8_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vceqz_u8(a); #else return simde_vceq_u8(a, simde_vdup_n_u8(0)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceqz_u8 #define vceqz_u8(a) simde_vceqz_u8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vceqz_u16(simde_uint16x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vceqz_u16(a); #else return simde_vceq_u16(a, simde_vdup_n_u16(0)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceqz_u16 #define vceqz_u16(a) simde_vceqz_u16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vceqz_u32(simde_uint32x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vceqz_u32(a); #else return simde_vceq_u32(a, simde_vdup_n_u32(0)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceqz_u32 #define vceqz_u32(a) simde_vceqz_u32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vceqz_u64(simde_uint64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vceqz_u64(a); #else return simde_vceq_u64(a, simde_vdup_n_u64(0)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceqz_u64 #define vceqz_u64(a) simde_vceqz_u64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vceqzq_f32(simde_float32x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vceqzq_f32(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f32x4_eq(wasm_f32x4_splat(0), a); #else return simde_vceqq_f32(a, simde_vdupq_n_f32(0)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceqzq_f32 #define vceqzq_f32(a) simde_vceqzq_f32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vceqzq_f64(simde_float64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vceqzq_f64(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_eq(wasm_f64x2_splat(0), a); #else return simde_vceqq_f64(a, simde_vdupq_n_f64(0)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vceqzq_f64 #define vceqzq_f64(a) simde_vceqzq_f64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vceqzq_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vceqzq_s8(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i8x16_eq(wasm_i8x16_splat(0), a); #else return simde_vceqq_s8(a, simde_vdupq_n_s8(0)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceqzq_s8 #define vceqzq_s8(a) simde_vceqzq_s8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vceqzq_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vceqzq_s16(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i16x8_eq(wasm_i16x8_splat(0), a); #else return simde_vceqq_s16(a, simde_vdupq_n_s16(0)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceqzq_s16 #define vceqzq_s16(a) simde_vceqzq_s16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vceqzq_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vceqzq_s32(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i32x4_eq(wasm_i32x4_splat(0), a); #else return simde_vceqq_s32(a, simde_vdupq_n_s32(0)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceqzq_s32 #define vceqzq_s32(a) simde_vceqzq_s32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vceqzq_s64(simde_int64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vceqzq_s64(a); #else return simde_vceqq_s64(a, simde_vdupq_n_s64(0)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceqzq_s64 #define vceqzq_s64(a) simde_vceqzq_s64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vceqzq_u8(simde_uint8x16_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vceqzq_u8(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i8x16_eq(wasm_i8x16_splat(0), a); #else return simde_vceqq_u8(a, simde_vdupq_n_u8(0)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceqzq_u8 #define vceqzq_u8(a) simde_vceqzq_u8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vceqzq_u16(simde_uint16x8_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vceqzq_u16(a); #else return simde_vceqq_u16(a, simde_vdupq_n_u16(0)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceqzq_u16 #define vceqzq_u16(a) simde_vceqzq_u16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vceqzq_u32(simde_uint32x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vceqzq_u32(a); #else return simde_vceqq_u32(a, simde_vdupq_n_u32(0)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceqzq_u32 #define vceqzq_u32(a) simde_vceqzq_u32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vceqzq_u64(simde_uint64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vceqzq_u64(a); #else return simde_vceqq_u64(a, simde_vdupq_n_u64(0)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vceqzq_u64 #define vceqzq_u64(a) simde_vceqzq_u64((a)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_CEQZ_H) */ simde-0.7.2/simde/arm/neon/cge.h000066400000000000000000000544261400333146700163630ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_ARM_NEON_CGE_H) #define SIMDE_ARM_NEON_CGE_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vcgeq_f32(simde_float32x4_t a, simde_float32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcgeq_f32(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_castps_si128(_mm_cmpge_ps(a, b)); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpge(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f32x4_ge(a, b); #else simde_float32x4_private a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(b); simde_uint32x4_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT32_MAX : 0; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcgeq_f32 #define vcgeq_f32(a, b) simde_vcgeq_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vcgeq_f64(simde_float64x2_t a, simde_float64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgeq_f64(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_castpd_si128(_mm_cmpge_pd(a, b)); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpge(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_ge(a, b); #else simde_float64x2_private a_ = simde_float64x2_to_private(a), b_ = simde_float64x2_to_private(b); simde_uint64x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT64_MAX : 0; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgeq_f64 #define vcgeq_f64(a, b) simde_vcgeq_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vcgeq_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcgeq_s8(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_or_si128(_mm_cmpgt_epi8(a, b), _mm_cmpeq_epi8(a, b)); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpge(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i8x16_ge(a, b); #else simde_int8x16_private a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); simde_uint8x16_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT8_MAX : 0; } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcgeq_s8 #define vcgeq_s8(a, b) simde_vcgeq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vcgeq_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcgeq_s16(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_or_si128(_mm_cmpgt_epi16(a, b), _mm_cmpeq_epi16(a, b)); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpge(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i16x8_ge(a, b); #else simde_int16x8_private a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); simde_uint16x8_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT16_MAX : 0; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcgeq_s16 #define vcgeq_s16(a, b) simde_vcgeq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vcgeq_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcgeq_s32(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_or_si128(_mm_cmpgt_epi32(a, b), _mm_cmpeq_epi32(a, b)); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpge(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i32x4_ge(a, b); #else simde_int32x4_private a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); simde_uint32x4_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT32_MAX : 0; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcgeq_s32 #define vcgeq_s32(a, b) simde_vcgeq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vcgeq_s64(simde_int64x2_t a, simde_int64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgeq_s64(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u64_s32(vmvnq_s32(vreinterpretq_s32_s64(vshrq_n_s64(vqsubq_s64(a, b), 63)))); #elif defined(SIMDE_X86_SSE4_2_NATIVE) return _mm_or_si128(_mm_cmpgt_epi64(a, b), _mm_cmpeq_epi64(a, b)); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpge(a, b)); #else simde_int64x2_private a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(b); simde_uint64x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT64_MAX : 0; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgeq_s64 #define vcgeq_s64(a, b) simde_vcgeq_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vcgeq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcgeq_u8(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) __m128i sign_bits = _mm_set1_epi8(INT8_MIN); return _mm_or_si128(_mm_cmpgt_epi8(_mm_xor_si128(a, sign_bits), _mm_xor_si128(b, sign_bits)), _mm_cmpeq_epi8(a, b)); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpge(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_u8x16_ge(a, b); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT8_MAX : 0; } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcgeq_u8 #define vcgeq_u8(a, b) simde_vcgeq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vcgeq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcgeq_u16(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) __m128i sign_bits = _mm_set1_epi16(INT16_MIN); return _mm_or_si128(_mm_cmpgt_epi16(_mm_xor_si128(a, sign_bits), _mm_xor_si128(b, sign_bits)), _mm_cmpeq_epi16(a, b)); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpge(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_u16x8_ge(a, b); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT16_MAX : 0; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcgeq_u16 #define vcgeq_u16(a, b) simde_vcgeq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vcgeq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcgeq_u32(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) __m128i sign_bits = _mm_set1_epi32(INT32_MIN); return _mm_or_si128(_mm_cmpgt_epi32(_mm_xor_si128(a, sign_bits), _mm_xor_si128(b, sign_bits)), _mm_cmpeq_epi32(a, b)); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpge(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_u32x4_ge(a, b); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT32_MAX : 0; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcgeq_u32 #define vcgeq_u32(a, b) simde_vcgeq_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vcgeq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgeq_u64(a, b); #elif defined(SIMDE_X86_SSE4_2_NATIVE) __m128i sign_bits = _mm_set1_epi64x(INT64_MIN); return _mm_or_si128(_mm_cmpgt_epi64(_mm_xor_si128(a, sign_bits), _mm_xor_si128(b, sign_bits)), _mm_cmpeq_epi64(a, b)); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpge(a, b)); #else simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a), b_ = simde_uint64x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT64_MAX : 0; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgeq_u64 #define vcgeq_u64(a, b) simde_vcgeq_u64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vcge_f32(simde_float32x2_t a, simde_float32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcge_f32(a, b); #else simde_float32x2_private a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(b); simde_uint32x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT32_MAX : 0; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcge_f32 #define vcge_f32(a, b) simde_vcge_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vcge_f64(simde_float64x1_t a, simde_float64x1_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcge_f64(a, b); #else simde_float64x1_private a_ = simde_float64x1_to_private(a), b_ = simde_float64x1_to_private(b); simde_uint64x1_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT64_MAX : 0; } #endif return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcge_f64 #define vcge_f64(a, b) simde_vcge_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vcge_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcge_s8(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_or_si64(_mm_cmpgt_pi8(a, b), _mm_cmpeq_pi8(a, b)); #else simde_int8x8_private a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b); simde_uint8x8_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT8_MAX : 0; } #endif return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcge_s8 #define vcge_s8(a, b) simde_vcge_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vcge_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcge_s16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_or_si64(_mm_cmpgt_pi16(a, b), _mm_cmpeq_pi16(a, b)); #else simde_int16x4_private a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); simde_uint16x4_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT16_MAX : 0; } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcge_s16 #define vcge_s16(a, b) simde_vcge_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vcge_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcge_s32(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_or_si64(_mm_cmpgt_pi32(a, b), _mm_cmpeq_pi32(a, b)); #else simde_int32x2_private a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); simde_uint32x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT32_MAX : 0; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcge_s32 #define vcge_s32(a, b) simde_vcge_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vcge_s64(simde_int64x1_t a, simde_int64x1_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcge_s64(a, b); #else simde_int64x1_private a_ = simde_int64x1_to_private(a), b_ = simde_int64x1_to_private(b); simde_uint64x1_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT64_MAX : 0; } #endif return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcge_s64 #define vcge_s64(a, b) simde_vcge_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vcge_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcge_u8(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) __m64 sign_bits = _mm_set1_pi8(INT8_MIN); return _mm_or_si64(_mm_cmpgt_pi8(_mm_xor_si64(a, sign_bits), _mm_xor_si64(b, sign_bits)), _mm_cmpeq_pi8(a, b)); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT8_MAX : 0; } #endif return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcge_u8 #define vcge_u8(a, b) simde_vcge_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vcge_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcge_u16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) __m64 sign_bits = _mm_set1_pi16(INT16_MIN); return _mm_or_si64(_mm_cmpgt_pi16(_mm_xor_si64(a, sign_bits), _mm_xor_si64(b, sign_bits)), _mm_cmpeq_pi16(a, b)); #else simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a), b_ = simde_uint16x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT16_MAX : 0; } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcge_u16 #define vcge_u16(a, b) simde_vcge_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vcge_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcge_u32(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) __m64 sign_bits = _mm_set1_pi32(INT32_MIN); return _mm_or_si64(_mm_cmpgt_pi32(_mm_xor_si64(a, sign_bits), _mm_xor_si64(b, sign_bits)), _mm_cmpeq_pi32(a, b)); #else simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a), b_ = simde_uint32x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT32_MAX : 0; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcge_u32 #define vcge_u32(a, b) simde_vcge_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vcge_u64(simde_uint64x1_t a, simde_uint64x1_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcge_u64(a, b); #else simde_uint64x1_private r_, a_ = simde_uint64x1_to_private(a), b_ = simde_uint64x1_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT64_MAX : 0; } #endif return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcge_u64 #define vcge_u64(a, b) simde_vcge_u64((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_CGE_H) */ simde-0.7.2/simde/arm/neon/cgez.h000066400000000000000000000271541400333146700165530ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_ARM_NEON_CGEZ_H) #define SIMDE_ARM_NEON_CGEZ_H #include "cge.h" #include "dup_n.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vcgezq_f32(simde_float32x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgezq_f32(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcgeq_f32(a, simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))); #else simde_float32x4_private a_ = simde_float32x4_to_private(a); simde_uint32x4_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= SIMDE_FLOAT32_C(0.0)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgezq_f32 #define vcgezq_f32(a) simde_vcgezq_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vcgezq_f64(simde_float64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgezq_f64(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcgeq_f64(a, simde_vdupq_n_f64(SIMDE_FLOAT64_C(0.0))); #else simde_float64x2_private a_ = simde_float64x2_to_private(a); simde_uint64x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= SIMDE_FLOAT64_C(0.0)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgezq_f64 #define vcgezq_f64(a) simde_vcgezq_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vcgezq_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgezq_s8(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcgeq_s8(a, simde_vdupq_n_s8(0)); #else simde_int8x16_private a_ = simde_int8x16_to_private(a); simde_uint8x16_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= 0) ? UINT8_MAX : 0; } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgezq_s8 #define vcgezq_s8(a) simde_vcgezq_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vcgezq_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgezq_s16(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcgeq_s16(a, simde_vdupq_n_s16(0)); #else simde_int16x8_private a_ = simde_int16x8_to_private(a); simde_uint16x8_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= 0) ? UINT16_MAX : 0; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgezq_s16 #define vcgezq_s16(a) simde_vcgezq_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vcgezq_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgezq_s32(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcgeq_s32(a, simde_vdupq_n_s32(0)); #else simde_int32x4_private a_ = simde_int32x4_to_private(a); simde_uint32x4_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= 0) ? UINT32_MAX : 0; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgezq_s32 #define vcgezq_s32(a) simde_vcgezq_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vcgezq_s64(simde_int64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgezq_s64(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcgeq_s64(a, simde_vdupq_n_s64(0)); #else simde_int64x2_private a_ = simde_int64x2_to_private(a); simde_uint64x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= 0) ? UINT64_MAX : 0; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgezq_s64 #define vcgezq_s64(a) simde_vcgezq_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vcgez_f32(simde_float32x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgez_f32(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcge_f32(a, simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))); #else simde_float32x2_private a_ = simde_float32x2_to_private(a); simde_uint32x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= SIMDE_FLOAT32_C(0.0)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgez_f32 #define vcgez_f32(a) simde_vcgez_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vcgez_f64(simde_float64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgez_f64(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcge_f64(a, simde_vdup_n_f64(SIMDE_FLOAT64_C(0.0))); #else simde_float64x1_private a_ = simde_float64x1_to_private(a); simde_uint64x1_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= SIMDE_FLOAT64_C(0.0)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0; } #endif return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgez_f64 #define vcgez_f64(a) simde_vcgez_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vcgez_s8(simde_int8x8_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgez_s8(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcge_s8(a, simde_vdup_n_s8(0)); #else simde_int8x8_private a_ = simde_int8x8_to_private(a); simde_uint8x8_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= 0) ? UINT8_MAX : 0; } #endif return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgez_s8 #define vcgez_s8(a) simde_vcgez_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vcgez_s16(simde_int16x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgez_s16(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcge_s16(a, simde_vdup_n_s16(0)); #else simde_int16x4_private a_ = simde_int16x4_to_private(a); simde_uint16x4_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= 0) ? UINT16_MAX : 0; } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgez_s16 #define vcgez_s16(a) simde_vcgez_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vcgez_s32(simde_int32x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgez_s32(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcge_s32(a, simde_vdup_n_s32(0)); #else simde_int32x2_private a_ = simde_int32x2_to_private(a); simde_uint32x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= 0) ? UINT32_MAX : 0; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgez_s32 #define vcgez_s32(a) simde_vcgez_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vcgez_s64(simde_int64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgez_s64(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcge_s64(a, simde_vdup_n_s64(0)); #else simde_int64x1_private a_ = simde_int64x1_to_private(a); simde_uint64x1_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >= 0) ? UINT64_MAX : 0; } #endif return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgez_s64 #define vcgez_s64(a) simde_vcgez_s64(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_CGEZ_H) */ simde-0.7.2/simde/arm/neon/cgt.h000066400000000000000000000542071400333146700163770ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_ARM_NEON_CGT_H) #define SIMDE_ARM_NEON_CGT_H #include "combine.h" #include "get_low.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vcgtq_f32(simde_float32x4_t a, simde_float32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcgtq_f32(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_castps_si128(_mm_cmpgt_ps(a, b)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpgt(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f32x4_gt(a, b); #else simde_float32x4_private a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(b); simde_uint32x4_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT32_MAX : 0; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcgtq_f32 #define vcgtq_f32(a, b) simde_vcgtq_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vcgtq_f64(simde_float64x2_t a, simde_float64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgtq_f64(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_castpd_si128(_mm_cmpgt_pd(a, b)); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_gt(a, b); #else simde_float64x2_private a_ = simde_float64x2_to_private(a), b_ = simde_float64x2_to_private(b); simde_uint64x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT64_MAX : 0; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgtq_f64 #define vcgtq_f64(a, b) simde_vcgtq_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vcgtq_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcgtq_s8(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpgt_epi8(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpgt(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i8x16_gt(a, b); #else simde_int8x16_private a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); simde_uint8x16_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT8_MAX : 0; } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcgtq_s8 #define vcgtq_s8(a, b) simde_vcgtq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vcgtq_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcgtq_s16(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpgt_epi16(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpgt(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i16x8_gt(a, b); #else simde_int16x8_private a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); simde_uint16x8_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT16_MAX : 0; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcgtq_s16 #define vcgtq_s16(a, b) simde_vcgtq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vcgtq_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcgtq_s32(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpgt_epi32(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpgt(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i32x4_gt(a, b); #else simde_int32x4_private a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); simde_uint32x4_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT32_MAX : 0; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcgtq_s32 #define vcgtq_s32(a, b) simde_vcgtq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vcgtq_s64(simde_int64x2_t a, simde_int64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgtq_s64(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u64_s64(vshrq_n_s64(vqsubq_s64(b, a), 63)); #elif defined(SIMDE_X86_SSE4_2_NATIVE) return _mm_cmpgt_epi64(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) /* https://stackoverflow.com/a/65175746/501126 */ __m128i r = _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_sub_epi64(b, a)); r = _mm_or_si128(r, _mm_cmpgt_epi32(a, b)); return _mm_shuffle_epi32(r, _MM_SHUFFLE(3,3,1,1)); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a, b)); #else simde_int64x2_private a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(b); simde_uint64x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT64_MAX : 0; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgtq_s64 #define vcgtq_s64(a, b) simde_vcgtq_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vcgtq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcgtq_u8(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) __m128i sign_bit = _mm_set1_epi8(INT8_MIN); return _mm_cmpgt_epi8(_mm_xor_si128(a, sign_bit), _mm_xor_si128(b, sign_bit)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpgt(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_u8x16_gt(a, b); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT8_MAX : 0; } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcgtq_u8 #define vcgtq_u8(a, b) simde_vcgtq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vcgtq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcgtq_u16(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) __m128i sign_bit = _mm_set1_epi16(INT16_MIN); return _mm_cmpgt_epi16(_mm_xor_si128(a, sign_bit), _mm_xor_si128(b, sign_bit)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpgt(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_u16x8_gt(a, b); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT16_MAX : 0; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcgtq_u16 #define vcgtq_u16(a, b) simde_vcgtq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vcgtq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcgtq_u32(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) __m128i sign_bit = _mm_set1_epi32(INT32_MIN); return _mm_cmpgt_epi32(_mm_xor_si128(a, sign_bit), _mm_xor_si128(b, sign_bit)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpgt(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_u32x4_gt(a, b); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT32_MAX : 0; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcgtq_u32 #define vcgtq_u32(a, b) simde_vcgtq_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vcgtq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgtq_u64(a, b); #elif defined(SIMDE_X86_SSE4_2_NATIVE) __m128i sign_bit = _mm_set1_epi64x(INT64_MIN); return _mm_cmpgt_epi64(_mm_xor_si128(a, sign_bit), _mm_xor_si128(b, sign_bit)); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a, b)); #else simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a), b_ = simde_uint64x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT64_MAX : 0; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgtq_u64 #define vcgtq_u64(a, b) simde_vcgtq_u64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vcgt_f32(simde_float32x2_t a, simde_float32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcgt_f32(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE >= 128 return simde_vget_low_u32(simde_vcgtq_f32(simde_vcombine_f32(a, a), simde_vcombine_f32(b, b))); #else simde_float32x2_private a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(b); simde_uint32x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT32_MAX : 0; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcgt_f32 #define vcgt_f32(a, b) simde_vcgt_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vcgt_f64(simde_float64x1_t a, simde_float64x1_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgt_f64(a, b); #else simde_float64x1_private a_ = simde_float64x1_to_private(a), b_ = simde_float64x1_to_private(b); simde_uint64x1_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT64_MAX : 0; } #endif return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgt_f64 #define vcgt_f64(a, b) simde_vcgt_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vcgt_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcgt_s8(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_cmpgt_pi8(a, b); #else simde_int8x8_private a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b); simde_uint8x8_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT8_MAX : 0; } #endif return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcgt_s8 #define vcgt_s8(a, b) simde_vcgt_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vcgt_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcgt_s16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_cmpgt_pi16(a, b); #else simde_int16x4_private a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); simde_uint16x4_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT16_MAX : 0; } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcgt_s16 #define vcgt_s16(a, b) simde_vcgt_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vcgt_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcgt_s32(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_cmpgt_pi32(a, b); #else simde_int32x2_private a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); simde_uint32x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT32_MAX : 0; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcgt_s32 #define vcgt_s32(a, b) simde_vcgt_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vcgt_s64(simde_int64x1_t a, simde_int64x1_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgt_s64(a, b); #else simde_int64x1_private a_ = simde_int64x1_to_private(a), b_ = simde_int64x1_to_private(b); simde_uint64x1_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT64_MAX : 0; } #endif return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgt_s64 #define vcgt_s64(a, b) simde_vcgt_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vcgt_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcgt_u8(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) __m64 sign_bit = _mm_set1_pi8(INT8_MIN); return _mm_cmpgt_pi8(_mm_xor_si64(a, sign_bit), _mm_xor_si64(b, sign_bit)); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT8_MAX : 0; } #endif return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcgt_u8 #define vcgt_u8(a, b) simde_vcgt_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vcgt_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcgt_u16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) __m64 sign_bit = _mm_set1_pi16(INT16_MIN); return _mm_cmpgt_pi16(_mm_xor_si64(a, sign_bit), _mm_xor_si64(b, sign_bit)); #else simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a), b_ = simde_uint16x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT16_MAX : 0; } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcgt_u16 #define vcgt_u16(a, b) simde_vcgt_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vcgt_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcgt_u32(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) __m64 sign_bit = _mm_set1_pi32(INT32_MIN); return _mm_cmpgt_pi32(_mm_xor_si64(a, sign_bit), _mm_xor_si64(b, sign_bit)); #else simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a), b_ = simde_uint32x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT32_MAX : 0; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcgt_u32 #define vcgt_u32(a, b) simde_vcgt_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vcgt_u64(simde_uint64x1_t a, simde_uint64x1_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgt_u64(a, b); #else simde_uint64x1_private r_, a_ = simde_uint64x1_to_private(a), b_ = simde_uint64x1_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT64_MAX : 0; } #endif return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgt_u64 #define vcgt_u64(a, b) simde_vcgt_u64((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_CGT_H) */ simde-0.7.2/simde/arm/neon/cgtz.h000066400000000000000000000271761400333146700165760ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_ARM_NEON_CGTZ_H) #define SIMDE_ARM_NEON_CGTZ_H #include "cgt.h" #include "combine.h" #include "dup_n.h" #include "get_low.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vcgtzq_f32(simde_float32x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgtzq_f32(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcgtq_f32(a, simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))); #else simde_float32x4_private a_ = simde_float32x4_to_private(a); simde_uint32x4_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > SIMDE_FLOAT32_C(0.0)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgtzq_f32 #define vcgtzq_f32(a) simde_vcgtzq_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vcgtzq_f64(simde_float64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgtzq_f64(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcgtq_f64(a, simde_vdupq_n_f64(SIMDE_FLOAT64_C(0.0))); #else simde_float64x2_private a_ = simde_float64x2_to_private(a); simde_uint64x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > SIMDE_FLOAT64_C(0.0)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgtzq_f64 #define vcgtzq_f64(a) simde_vcgtzq_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vcgtzq_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgtzq_s8(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcgtq_s8(a, simde_vdupq_n_s8(0)); #else simde_int8x16_private a_ = simde_int8x16_to_private(a); simde_uint8x16_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > 0) ? UINT8_MAX : 0; } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgtzq_s8 #define vcgtzq_s8(a) simde_vcgtzq_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vcgtzq_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgtzq_s16(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcgtq_s16(a, simde_vdupq_n_s16(0)); #else simde_int16x8_private a_ = simde_int16x8_to_private(a); simde_uint16x8_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > 0) ? UINT16_MAX : 0; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgtzq_s16 #define vcgtzq_s16(a) simde_vcgtzq_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vcgtzq_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgtzq_s32(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcgtq_s32(a, simde_vdupq_n_s32(0)); #else simde_int32x4_private a_ = simde_int32x4_to_private(a); simde_uint32x4_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > 0) ? UINT32_MAX : 0; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgtzq_s32 #define vcgtzq_s32(a) simde_vcgtzq_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vcgtzq_s64(simde_int64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgtzq_s64(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcgtq_s64(a, simde_vdupq_n_s64(0)); #else simde_int64x2_private a_ = simde_int64x2_to_private(a); simde_uint64x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > 0) ? UINT64_MAX : 0; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgtzq_s64 #define vcgtzq_s64(a) simde_vcgtzq_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vcgtz_f32(simde_float32x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgtz_f32(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcgt_f32(a, simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))); #else simde_float32x2_private a_ = simde_float32x2_to_private(a); simde_uint32x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > SIMDE_FLOAT32_C(0.0)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgtz_f32 #define vcgtz_f32(a) simde_vcgtz_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vcgtz_f64(simde_float64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgtz_f64(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcgt_f64(a, simde_vdup_n_f64(SIMDE_FLOAT64_C(0.0))); #else simde_float64x1_private a_ = simde_float64x1_to_private(a); simde_uint64x1_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > SIMDE_FLOAT64_C(0.0)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0; } #endif return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgtz_f64 #define vcgtz_f64(a) simde_vcgtz_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vcgtz_s8(simde_int8x8_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgtz_s8(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcgt_s8(a, simde_vdup_n_s8(0)); #else simde_int8x8_private a_ = simde_int8x8_to_private(a); simde_uint8x8_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > 0) ? UINT8_MAX : 0; } #endif return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgtz_s8 #define vcgtz_s8(a) simde_vcgtz_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vcgtz_s16(simde_int16x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgtz_s16(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcgt_s16(a, simde_vdup_n_s16(0)); #else simde_int16x4_private a_ = simde_int16x4_to_private(a); simde_uint16x4_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > 0) ? UINT16_MAX : 0; } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgtz_s16 #define vcgtz_s16(a) simde_vcgtz_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vcgtz_s32(simde_int32x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgtz_s32(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcgt_s32(a, simde_vdup_n_s32(0)); #else simde_int32x2_private a_ = simde_int32x2_to_private(a); simde_uint32x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > 0) ? UINT32_MAX : 0; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgtz_s32 #define vcgtz_s32(a) simde_vcgtz_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vcgtz_s64(simde_int64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcgtz_s64(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcgt_s64(a, simde_vdup_n_s64(0)); #else simde_int64x1_private a_ = simde_int64x1_to_private(a); simde_uint64x1_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > 0) ? UINT64_MAX : 0; } #endif return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcgtz_s64 #define vcgtz_s64(a) simde_vcgtz_s64(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_CGTZ_H) */ simde-0.7.2/simde/arm/neon/cle.h000066400000000000000000000544261400333146700163700ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_ARM_NEON_CLE_H) #define SIMDE_ARM_NEON_CLE_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vcleq_f32(simde_float32x4_t a, simde_float32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcleq_f32(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_castps_si128(_mm_cmple_ps(a, b)); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmple(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f32x4_le(a, b); #else simde_float32x4_private a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(b); simde_uint32x4_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT32_MAX : 0; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcleq_f32 #define vcleq_f32(a, b) simde_vcleq_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vcleq_f64(simde_float64x2_t a, simde_float64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcleq_f64(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_castpd_si128(_mm_cmple_pd(a, b)); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmple(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_le(a, b); #else simde_float64x2_private a_ = simde_float64x2_to_private(a), b_ = simde_float64x2_to_private(b); simde_uint64x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT64_MAX : 0; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcleq_f64 #define vcleq_f64(a, b) simde_vcleq_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vcleq_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcleq_s8(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_or_si128(_mm_cmpgt_epi8(b, a), _mm_cmpeq_epi8(a, b)); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmple(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i8x16_le(a, b); #else simde_int8x16_private a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); simde_uint8x16_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT8_MAX : 0; } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcleq_s8 #define vcleq_s8(a, b) simde_vcleq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vcleq_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcleq_s16(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_or_si128(_mm_cmpgt_epi16(b, a), _mm_cmpeq_epi16(a, b)); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmple(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i16x8_le(a, b); #else simde_int16x8_private a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); simde_uint16x8_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT16_MAX : 0; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcleq_s16 #define vcleq_s16(a, b) simde_vcleq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vcleq_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcleq_s32(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_or_si128(_mm_cmpgt_epi32(b, a), _mm_cmpeq_epi32(a, b)); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmple(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i32x4_le(a, b); #else simde_int32x4_private a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); simde_uint32x4_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT32_MAX : 0; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcleq_s32 #define vcleq_s32(a, b) simde_vcleq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vcleq_s64(simde_int64x2_t a, simde_int64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcleq_s64(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u64_s32(vmvnq_s32(vreinterpretq_s32_s64(vshrq_n_s64(vqsubq_s64(b, a), 63)))); #elif defined(SIMDE_X86_SSE4_2_NATIVE) return _mm_or_si128(_mm_cmpgt_epi64(b, a), _mm_cmpeq_epi64(a, b)); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmple(a, b)); #else simde_int64x2_private a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(b); simde_uint64x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT64_MAX : 0; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcleq_s64 #define vcleq_s64(a, b) simde_vcleq_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vcleq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcleq_u8(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) __m128i sign_bits = _mm_set1_epi8(INT8_MIN); return _mm_or_si128(_mm_cmpgt_epi8(_mm_xor_si128(b, sign_bits), _mm_xor_si128(a, sign_bits)), _mm_cmpeq_epi8(a, b)); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmple(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_u8x16_le(a, b); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT8_MAX : 0; } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcleq_u8 #define vcleq_u8(a, b) simde_vcleq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vcleq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcleq_u16(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) __m128i sign_bits = _mm_set1_epi16(INT16_MIN); return _mm_or_si128(_mm_cmpgt_epi16(_mm_xor_si128(b, sign_bits), _mm_xor_si128(a, sign_bits)), _mm_cmpeq_epi16(a, b)); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmple(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_u16x8_le(a, b); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT16_MAX : 0; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcleq_u16 #define vcleq_u16(a, b) simde_vcleq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vcleq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcleq_u32(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) __m128i sign_bits = _mm_set1_epi32(INT32_MIN); return _mm_or_si128(_mm_cmpgt_epi32(_mm_xor_si128(b, sign_bits), _mm_xor_si128(a, sign_bits)), _mm_cmpeq_epi32(a, b)); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmple(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_u32x4_le(a, b); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT32_MAX : 0; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcleq_u32 #define vcleq_u32(a, b) simde_vcleq_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vcleq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcleq_u64(a, b); #elif defined(SIMDE_X86_SSE4_2_NATIVE) __m128i sign_bits = _mm_set1_epi64x(INT64_MIN); return _mm_or_si128(_mm_cmpgt_epi64(_mm_xor_si128(b, sign_bits), _mm_xor_si128(a, sign_bits)), _mm_cmpeq_epi64(a, b)); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmple(a, b)); #else simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a), b_ = simde_uint64x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT64_MAX : 0; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcleq_u64 #define vcleq_u64(a, b) simde_vcleq_u64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vcle_f32(simde_float32x2_t a, simde_float32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcle_f32(a, b); #else simde_float32x2_private a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(b); simde_uint32x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT32_MAX : 0; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcle_f32 #define vcle_f32(a, b) simde_vcle_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vcle_f64(simde_float64x1_t a, simde_float64x1_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcle_f64(a, b); #else simde_float64x1_private a_ = simde_float64x1_to_private(a), b_ = simde_float64x1_to_private(b); simde_uint64x1_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT64_MAX : 0; } #endif return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcle_f64 #define vcle_f64(a, b) simde_vcle_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vcle_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcle_s8(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_or_si64(_mm_cmpgt_pi8(b, a), _mm_cmpeq_pi8(a, b)); #else simde_int8x8_private a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b); simde_uint8x8_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT8_MAX : 0; } #endif return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcle_s8 #define vcle_s8(a, b) simde_vcle_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vcle_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcle_s16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_or_si64(_mm_cmpgt_pi16(b, a), _mm_cmpeq_pi16(a, b)); #else simde_int16x4_private a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); simde_uint16x4_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT16_MAX : 0; } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcle_s16 #define vcle_s16(a, b) simde_vcle_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vcle_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcle_s32(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_or_si64(_mm_cmpgt_pi32(b, a), _mm_cmpeq_pi32(a, b)); #else simde_int32x2_private a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); simde_uint32x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT32_MAX : 0; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcle_s32 #define vcle_s32(a, b) simde_vcle_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vcle_s64(simde_int64x1_t a, simde_int64x1_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcle_s64(a, b); #else simde_int64x1_private a_ = simde_int64x1_to_private(a), b_ = simde_int64x1_to_private(b); simde_uint64x1_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT64_MAX : 0; } #endif return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcle_s64 #define vcle_s64(a, b) simde_vcle_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vcle_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcle_u8(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) __m64 sign_bits = _mm_set1_pi8(INT8_MIN); return _mm_or_si64(_mm_cmpgt_pi8(_mm_xor_si64(b, sign_bits), _mm_xor_si64(a, sign_bits)), _mm_cmpeq_pi8(a, b)); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT8_MAX : 0; } #endif return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcle_u8 #define vcle_u8(a, b) simde_vcle_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vcle_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcle_u16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) __m64 sign_bits = _mm_set1_pi16(INT16_MIN); return _mm_or_si64(_mm_cmpgt_pi16(_mm_xor_si64(b, sign_bits), _mm_xor_si64(a, sign_bits)), _mm_cmpeq_pi16(a, b)); #else simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a), b_ = simde_uint16x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT16_MAX : 0; } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcle_u16 #define vcle_u16(a, b) simde_vcle_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vcle_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcle_u32(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) __m64 sign_bits = _mm_set1_pi32(INT32_MIN); return _mm_or_si64(_mm_cmpgt_pi32(_mm_xor_si64(b, sign_bits), _mm_xor_si64(a, sign_bits)), _mm_cmpeq_pi32(a, b)); #else simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a), b_ = simde_uint32x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT32_MAX : 0; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcle_u32 #define vcle_u32(a, b) simde_vcle_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vcle_u64(simde_uint64x1_t a, simde_uint64x1_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcle_u64(a, b); #else simde_uint64x1_private r_, a_ = simde_uint64x1_to_private(a), b_ = simde_uint64x1_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT64_MAX : 0; } #endif return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcle_u64 #define vcle_u64(a, b) simde_vcle_u64((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_CLE_H) */ simde-0.7.2/simde/arm/neon/clez.h000066400000000000000000000271541400333146700165600ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_ARM_NEON_CLEZ_H) #define SIMDE_ARM_NEON_CLEZ_H #include "cle.h" #include "dup_n.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vclezq_f32(simde_float32x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vclezq_f32(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcleq_f32(a, simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))); #else simde_float32x4_private a_ = simde_float32x4_to_private(a); simde_uint32x4_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= SIMDE_FLOAT32_C(0.0)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vclezq_f32 #define vclezq_f32(a) simde_vclezq_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vclezq_f64(simde_float64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vclezq_f64(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcleq_f64(a, simde_vdupq_n_f64(SIMDE_FLOAT64_C(0.0))); #else simde_float64x2_private a_ = simde_float64x2_to_private(a); simde_uint64x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= SIMDE_FLOAT64_C(0.0)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vclezq_f64 #define vclezq_f64(a) simde_vclezq_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vclezq_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vclezq_s8(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcleq_s8(a, simde_vdupq_n_s8(0)); #else simde_int8x16_private a_ = simde_int8x16_to_private(a); simde_uint8x16_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= 0) ? UINT8_MAX : 0; } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vclezq_s8 #define vclezq_s8(a) simde_vclezq_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vclezq_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vclezq_s16(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcleq_s16(a, simde_vdupq_n_s16(0)); #else simde_int16x8_private a_ = simde_int16x8_to_private(a); simde_uint16x8_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= 0) ? UINT16_MAX : 0; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vclezq_s16 #define vclezq_s16(a) simde_vclezq_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vclezq_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vclezq_s32(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcleq_s32(a, simde_vdupq_n_s32(0)); #else simde_int32x4_private a_ = simde_int32x4_to_private(a); simde_uint32x4_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= 0) ? UINT32_MAX : 0; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vclezq_s32 #define vclezq_s32(a) simde_vclezq_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vclezq_s64(simde_int64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vclezq_s64(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcleq_s64(a, simde_vdupq_n_s64(0)); #else simde_int64x2_private a_ = simde_int64x2_to_private(a); simde_uint64x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= 0) ? UINT64_MAX : 0; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vclezq_s64 #define vclezq_s64(a) simde_vclezq_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vclez_f32(simde_float32x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vclez_f32(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcle_f32(a, simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))); #else simde_float32x2_private a_ = simde_float32x2_to_private(a); simde_uint32x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= SIMDE_FLOAT32_C(0.0)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vclez_f32 #define vclez_f32(a) simde_vclez_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vclez_f64(simde_float64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vclez_f64(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcle_f64(a, simde_vdup_n_f64(SIMDE_FLOAT64_C(0.0))); #else simde_float64x1_private a_ = simde_float64x1_to_private(a); simde_uint64x1_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= SIMDE_FLOAT64_C(0.0)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0; } #endif return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vclez_f64 #define vclez_f64(a) simde_vclez_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vclez_s8(simde_int8x8_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vclez_s8(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcle_s8(a, simde_vdup_n_s8(0)); #else simde_int8x8_private a_ = simde_int8x8_to_private(a); simde_uint8x8_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= 0) ? UINT8_MAX : 0; } #endif return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vclez_s8 #define vclez_s8(a) simde_vclez_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vclez_s16(simde_int16x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vclez_s16(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcle_s16(a, simde_vdup_n_s16(0)); #else simde_int16x4_private a_ = simde_int16x4_to_private(a); simde_uint16x4_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= 0) ? UINT16_MAX : 0; } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vclez_s16 #define vclez_s16(a) simde_vclez_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vclez_s32(simde_int32x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vclez_s32(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcle_s32(a, simde_vdup_n_s32(0)); #else simde_int32x2_private a_ = simde_int32x2_to_private(a); simde_uint32x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= 0) ? UINT32_MAX : 0; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vclez_s32 #define vclez_s32(a) simde_vclez_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vclez_s64(simde_int64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vclez_s64(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vcle_s64(a, simde_vdup_n_s64(0)); #else simde_int64x1_private a_ = simde_int64x1_to_private(a); simde_uint64x1_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] <= 0) ? UINT64_MAX : 0; } #endif return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vclez_s64 #define vclez_s64(a) simde_vclez_s64(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_CLEZ_H) */ simde-0.7.2/simde/arm/neon/cls.h000066400000000000000000000113661400333146700164020ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_CLS_H) #define SIMDE_ARM_NEON_CLS_H #include "types.h" #include "bsl.h" #include "clz.h" #include "cltz.h" #include "dup_n.h" #include "mvn.h" #include "sub.h" #include "reinterpret.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vcls_s8(simde_int8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcls_s8(a); #else return simde_vsub_s8(simde_vclz_s8(simde_vbsl_s8(simde_vcltz_s8(a), simde_vmvn_s8(a), a)), simde_vdup_n_s8(INT8_C(1))); #endif } #define simde_vcls_u8(a) simde_vcls_s8(simde_vreinterpret_s8_u8(a)) #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcls_s8 #define vcls_s8(a) simde_vcls_s8(a) #undef vcls_u8 #define vcls_u8(a) simde_vcls_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vcls_s16(simde_int16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcls_s16(a); #else return simde_vsub_s16(simde_vclz_s16(simde_vbsl_s16(simde_vcltz_s16(a), simde_vmvn_s16(a), a)), simde_vdup_n_s16(INT16_C(1))); #endif } #define simde_vcls_u16(a) simde_vcls_s16(simde_vreinterpret_s16_u16(a)) #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcls_s16 #define vcls_s16(a) simde_vcls_s16(a) #undef vcls_u16 #define vcls_u16(a) simde_vcls_u16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vcls_s32(simde_int32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcls_s32(a); #else return simde_vsub_s32(simde_vclz_s32(simde_vbsl_s32(simde_vcltz_s32(a), simde_vmvn_s32(a), a)), simde_vdup_n_s32(INT32_C(1))); #endif } #define simde_vcls_u32(a) simde_vcls_s32(simde_vreinterpret_s32_u32(a)) #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcls_s32 #define vcls_s32(a) simde_vcls_s32(a) #undef vcls_u32 #define vcls_u32(a) simde_vcls_u32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vclsq_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vclsq_s8(a); #else return simde_vsubq_s8(simde_vclzq_s8(simde_vbslq_s8(simde_vcltzq_s8(a), simde_vmvnq_s8(a), a)), simde_vdupq_n_s8(INT8_C(1))); #endif } #define simde_vclsq_u8(a) simde_vclsq_s8(simde_vreinterpretq_s8_u8(a)) #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vclsq_s8 #define vclsq_s8(a) simde_vclsq_s8(a) #undef vclsq_u8 #define vclsq_u8(a) simde_vclsq_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vclsq_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vclsq_s16(a); #else return simde_vsubq_s16(simde_vclzq_s16(simde_vbslq_s16(simde_vcltzq_s16(a), simde_vmvnq_s16(a), a)), simde_vdupq_n_s16(INT16_C(1))); #endif } #define simde_vclsq_u16(a) simde_vclsq_s16(simde_vreinterpretq_s16_u16(a)) #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vclsq_s16 #define vclsq_s16(a) simde_vclsq_s16(a) #undef vclsq_u16 #define vclsq_u16(a) simde_vclsq_u16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vclsq_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vclsq_s32(a); #else return simde_vsubq_s32(simde_vclzq_s32(simde_vbslq_s32(simde_vcltzq_s32(a), simde_vmvnq_s32(a), a)), simde_vdupq_n_s32(INT32_C(1))); #endif } #define simde_vclsq_u32(a) simde_vclsq_s32(simde_vreinterpretq_s32_u32(a)) #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vclsq_s32 #define vclsq_s32(a) simde_vclsq_s32(a) #undef vclsq_u32 #define vclsq_u32(a) simde_vclsq_u32(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_CLS_H) */ simde-0.7.2/simde/arm/neon/clt.h000066400000000000000000000534001400333146700163760ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_ARM_NEON_CLT_H) #define SIMDE_ARM_NEON_CLT_H #include "combine.h" #include "get_low.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vcltq_f32(simde_float32x4_t a, simde_float32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcltq_f32(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_castps_si128(_mm_cmplt_ps(a, b)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmplt(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f32x4_lt(a, b); #else simde_float32x4_private a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(b); simde_uint32x4_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT32_MAX : 0; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcltq_f32 #define vcltq_f32(a, b) simde_vcltq_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vcltq_f64(simde_float64x2_t a, simde_float64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcltq_f64(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_castpd_si128(_mm_cmplt_pd(a, b)); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmplt(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_lt(a, b); #else simde_float64x2_private a_ = simde_float64x2_to_private(a), b_ = simde_float64x2_to_private(b); simde_uint64x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT64_MAX : 0; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcltq_f64 #define vcltq_f64(a, b) simde_vcltq_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vcltq_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcltq_s8(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmplt_epi8(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmplt(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i8x16_lt(a, b); #else simde_int8x16_private a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); simde_uint8x16_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT8_MAX : 0; } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcltq_s8 #define vcltq_s8(a, b) simde_vcltq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vcltq_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcltq_s16(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmplt_epi16(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmplt(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i16x8_lt(a, b); #else simde_int16x8_private a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); simde_uint16x8_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT16_MAX : 0; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcltq_s16 #define vcltq_s16(a, b) simde_vcltq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vcltq_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcltq_s32(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmplt_epi32(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmplt(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i32x4_lt(a, b); #else simde_int32x4_private a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); simde_uint32x4_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT32_MAX : 0; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcltq_s32 #define vcltq_s32(a, b) simde_vcltq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vcltq_s64(simde_int64x2_t a, simde_int64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcltq_s64(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u64_s64(vshrq_n_s64(vqsubq_s64(a, b), 63)); #elif defined(SIMDE_X86_SSE4_2_NATIVE) return _mm_cmpgt_epi64(b, a); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmplt(a, b)); #else simde_int64x2_private a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(b); simde_uint64x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT64_MAX : 0; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcltq_s64 #define vcltq_s64(a, b) simde_vcltq_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vcltq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcltq_u8(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) __m128i sign_bits = _mm_set1_epi8(INT8_MIN); return _mm_cmplt_epi8(_mm_xor_si128(a, sign_bits), _mm_xor_si128(b, sign_bits)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmplt(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_u8x16_lt(a, b); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT8_MAX : 0; } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcltq_u8 #define vcltq_u8(a, b) simde_vcltq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vcltq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcltq_u16(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) __m128i sign_bits = _mm_set1_epi16(INT16_MIN); return _mm_cmplt_epi16(_mm_xor_si128(a, sign_bits), _mm_xor_si128(b, sign_bits)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmplt(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_u16x8_lt(a, b); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT16_MAX : 0; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcltq_u16 #define vcltq_u16(a, b) simde_vcltq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vcltq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcltq_u32(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) __m128i sign_bits = _mm_set1_epi32(INT32_MIN); return _mm_cmplt_epi32(_mm_xor_si128(a, sign_bits), _mm_xor_si128(b, sign_bits)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmplt(a, b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_u32x4_lt(a, b); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT32_MAX : 0; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcltq_u32 #define vcltq_u32(a, b) simde_vcltq_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vcltq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcltq_u64(a, b); #elif defined(SIMDE_X86_SSE4_2_NATIVE) __m128i sign_bits = _mm_set1_epi64x(INT64_MIN); return _mm_cmpgt_epi64(_mm_xor_si128(b, sign_bits), _mm_xor_si128(a, sign_bits)); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmplt(a, b)); #else simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a), b_ = simde_uint64x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT64_MAX : 0; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcltq_u64 #define vcltq_u64(a, b) simde_vcltq_u64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vclt_f32(simde_float32x2_t a, simde_float32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vclt_f32(a, b); #else simde_float32x2_private a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(b); simde_uint32x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT32_MAX : 0; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vclt_f32 #define vclt_f32(a, b) simde_vclt_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vclt_f64(simde_float64x1_t a, simde_float64x1_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vclt_f64(a, b); #else simde_float64x1_private a_ = simde_float64x1_to_private(a), b_ = simde_float64x1_to_private(b); simde_uint64x1_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT64_MAX : 0; } #endif return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vclt_f64 #define vclt_f64(a, b) simde_vclt_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vclt_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vclt_s8(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_cmpgt_pi8(b, a); #else simde_int8x8_private a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b); simde_uint8x8_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT8_MAX : 0; } #endif return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vclt_s8 #define vclt_s8(a, b) simde_vclt_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vclt_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vclt_s16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_cmpgt_pi16(b, a); #else simde_int16x4_private a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); simde_uint16x4_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT16_MAX : 0; } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vclt_s16 #define vclt_s16(a, b) simde_vclt_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vclt_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vclt_s32(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_cmpgt_pi32(b, a); #else simde_int32x2_private a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); simde_uint32x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT32_MAX : 0; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vclt_s32 #define vclt_s32(a, b) simde_vclt_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vclt_s64(simde_int64x1_t a, simde_int64x1_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vclt_s64(a, b); #else simde_int64x1_private a_ = simde_int64x1_to_private(a), b_ = simde_int64x1_to_private(b); simde_uint64x1_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT64_MAX : 0; } #endif return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vclt_s64 #define vclt_s64(a, b) simde_vclt_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vclt_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vclt_u8(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) __m64 sign_bits = _mm_set1_pi8(INT8_MIN); return _mm_cmpgt_pi8(_mm_xor_si64(b, sign_bits), _mm_xor_si64(a, sign_bits)); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT8_MAX : 0; } #endif return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vclt_u8 #define vclt_u8(a, b) simde_vclt_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vclt_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vclt_u16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) __m64 sign_bits = _mm_set1_pi16(INT16_MIN); return _mm_cmpgt_pi16(_mm_xor_si64(b, sign_bits), _mm_xor_si64(a, sign_bits)); #else simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a), b_ = simde_uint16x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT16_MAX : 0; } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vclt_u16 #define vclt_u16(a, b) simde_vclt_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vclt_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vclt_u32(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) __m64 sign_bits = _mm_set1_pi32(INT32_MIN); return _mm_cmpgt_pi32(_mm_xor_si64(b, sign_bits), _mm_xor_si64(a, sign_bits)); #else simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a), b_ = simde_uint32x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT32_MAX : 0; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vclt_u32 #define vclt_u32(a, b) simde_vclt_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vclt_u64(simde_uint64x1_t a, simde_uint64x1_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vclt_u64(a, b); #else simde_uint64x1_private r_, a_ = simde_uint64x1_to_private(a), b_ = simde_uint64x1_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT64_MAX : 0; } #endif return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vclt_u64 #define vclt_u64(a, b) simde_vclt_u64((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_CLT_H) */ simde-0.7.2/simde/arm/neon/cltz.h000066400000000000000000000171331400333146700165730ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ /* TODO: float fallbacks should use vclt(a, vdup_n(0.0)) */ #if !defined(SIMDE_ARM_NEON_CLTZ_H) #define SIMDE_ARM_NEON_CLTZ_H #include "types.h" #include "shr_n.h" #include "reinterpret.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vcltz_f32(simde_float32x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcltz_f32(a); #else simde_float32x2_private a_ = simde_float32x2_to_private(a); simde_uint32x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < SIMDE_FLOAT32_C(0.0)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < SIMDE_FLOAT32_C(0.0)) ? ~UINT32_C(0) : UINT32_C(0); } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcltz_f32 #define vcltz_f32(a) simde_vcltz_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vcltz_f64(simde_float64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcltz_f64(a); #else simde_float64x1_private a_ = simde_float64x1_to_private(a); simde_uint64x1_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < SIMDE_FLOAT64_C(0.0)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < SIMDE_FLOAT64_C(0.0)) ? ~UINT64_C(0) : UINT64_C(0); } #endif return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcltz_f64 #define vcltz_f64(a) simde_vcltz_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vcltz_s8(simde_int8x8_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcltz_s8(a); #else return simde_vreinterpret_u8_s8(simde_vshr_n_s8(a, 7)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcltz_s8 #define vcltz_s8(a) simde_vcltz_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vcltz_s16(simde_int16x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcltz_s16(a); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_srai_pi16(a, 15); #else return simde_vreinterpret_u16_s16(simde_vshr_n_s16(a, 15)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcltz_s16 #define vcltz_s16(a) simde_vcltz_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vcltz_s32(simde_int32x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcltz_s32(a); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_srai_pi32(a, 31); #else return simde_vreinterpret_u32_s32(simde_vshr_n_s32(a, 31)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcltz_s32 #define vcltz_s32(a) simde_vcltz_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vcltz_s64(simde_int64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcltz_s64(a); #else return simde_vreinterpret_u64_s64(simde_vshr_n_s64(a, 63)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcltz_s64 #define vcltz_s64(a) simde_vcltz_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vcltzq_f32(simde_float32x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcltzq_f32(a); #else simde_float32x4_private a_ = simde_float32x4_to_private(a); simde_uint32x4_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < SIMDE_FLOAT32_C(0.0)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < SIMDE_FLOAT32_C(0.0)) ? ~UINT32_C(0) : UINT32_C(0); } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcltzq_f32 #define vcltzq_f32(a) simde_vcltzq_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vcltzq_f64(simde_float64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcltzq_f64(a); #else simde_float64x2_private a_ = simde_float64x2_to_private(a); simde_uint64x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < SIMDE_FLOAT64_C(0.0)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < SIMDE_FLOAT64_C(0.0)) ? ~UINT64_C(0) : UINT64_C(0); } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcltzq_f64 #define vcltzq_f64(a) simde_vcltzq_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vcltzq_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcltzq_s8(a); #else return simde_vreinterpretq_u8_s8(simde_vshrq_n_s8(a, 7)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcltzq_s8 #define vcltzq_s8(a) simde_vcltzq_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vcltzq_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcltzq_s16(a); #else return simde_vreinterpretq_u16_s16(simde_vshrq_n_s16(a, 15)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcltzq_s16 #define vcltzq_s16(a) simde_vcltzq_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vcltzq_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcltzq_s32(a); #else return simde_vreinterpretq_u32_s32(simde_vshrq_n_s32(a, 31)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcltzq_s32 #define vcltzq_s32(a) simde_vcltzq_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vcltzq_s64(simde_int64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcltzq_s64(a); #else return simde_vreinterpretq_u64_s64(simde_vshrq_n_s64(a, 63)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcltzq_s64 #define vcltzq_s64(a) simde_vcltzq_s64(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_CLTZ_H) */ simde-0.7.2/simde/arm/neon/clz.h000066400000000000000000000301431400333146700164030ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_CLZ_H) #define SIMDE_ARM_NEON_CLZ_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES uint8_t simde_x_vclzb_u8(uint8_t a) { #if \ defined(SIMDE_BUILTIN_SUFFIX_8_) && \ ( \ SIMDE_BUILTIN_HAS_8_(clz) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ HEDLEY_IBM_VERSION_CHECK(13,1,0) \ ) if (HEDLEY_UNLIKELY(a == 0)) return 8 * sizeof(r); return HEDLEY_STATIC_CAST(uint8_t, SIMDE_BUILTIN_8_(clz)(HEDLEY_STATIC_CAST(unsigned SIMDE_BUILTIN_TYPE_8_, a))); #else uint8_t r; uint8_t shift; if (HEDLEY_UNLIKELY(a == 0)) return 8 * sizeof(r); r = HEDLEY_STATIC_CAST(uint8_t, (a > UINT8_C(0x0F)) << 2); a >>= r; shift = HEDLEY_STATIC_CAST(uint8_t, (a > UINT8_C(0x03)) << 1); a >>= shift; r |= shift; r |= (a >> 1); return ((8 * sizeof(r)) - 1) - r; #endif } SIMDE_FUNCTION_ATTRIBUTES uint16_t simde_x_vclzh_u16(uint16_t a) { #if \ defined(SIMDE_BUILTIN_SUFFIX_16_) && \ ( \ SIMDE_BUILTIN_HAS_16_(clz) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ HEDLEY_IBM_VERSION_CHECK(13,1,0) \ ) if (HEDLEY_UNLIKELY(a == 0)) return 8 * sizeof(r); return HEDLEY_STATIC_CAST(uint16_t, SIMDE_BUILTIN_16_(clz)(HEDLEY_STATIC_CAST(unsigned SIMDE_BUILTIN_TYPE_16_, a))); #else uint16_t r; uint16_t shift; if (HEDLEY_UNLIKELY(a == 0)) return 8 * sizeof(r); r = HEDLEY_STATIC_CAST(uint16_t, (a > UINT16_C(0x00FF)) << 3); a >>= r; shift = HEDLEY_STATIC_CAST(uint16_t, (a > UINT16_C(0x000F)) << 2); a >>= shift; r |= shift; shift = HEDLEY_STATIC_CAST(uint16_t, (a > UINT16_C(0x0003)) << 1); a >>= shift; r |= shift; r |= (a >> 1); return ((8 * sizeof(r)) - 1) - r; #endif } SIMDE_FUNCTION_ATTRIBUTES uint32_t simde_x_vclzs_u32(uint32_t a) { #if \ defined(SIMDE_BUILTIN_SUFFIX_32_) && \ ( \ SIMDE_BUILTIN_HAS_32_(clz) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ HEDLEY_IBM_VERSION_CHECK(13,1,0) \ ) if (HEDLEY_UNLIKELY(a == 0)) return 8 * sizeof(a); return HEDLEY_STATIC_CAST(uint32_t, SIMDE_BUILTIN_32_(clz)(HEDLEY_STATIC_CAST(unsigned SIMDE_BUILTIN_TYPE_32_, a))); #else uint32_t r; uint32_t shift; if (HEDLEY_UNLIKELY(a == 0)) return 8 * sizeof(a); r = HEDLEY_STATIC_CAST(uint32_t, (a > UINT32_C(0xFFFF)) << 4); a >>= r; shift = HEDLEY_STATIC_CAST(uint32_t, (a > UINT32_C(0x00FF)) << 3); a >>= shift; r |= shift; shift = HEDLEY_STATIC_CAST(uint32_t, (a > UINT32_C(0x000F)) << 2); a >>= shift; r |= shift; shift = HEDLEY_STATIC_CAST(uint32_t, (a > UINT32_C(0x0003)) << 1); a >>= shift; r |= shift; r |= (a >> 1); return ((8 * sizeof(r)) - 1) - r; #endif } SIMDE_FUNCTION_ATTRIBUTES int8_t simde_x_vclzb_s8(int8_t a) { return HEDLEY_STATIC_CAST(int8_t, simde_x_vclzb_u8(HEDLEY_STATIC_CAST(uint8_t, a))); } SIMDE_FUNCTION_ATTRIBUTES int16_t simde_x_vclzh_s16(int16_t a) { return HEDLEY_STATIC_CAST(int16_t, simde_x_vclzh_u16(HEDLEY_STATIC_CAST(uint16_t, a))); } SIMDE_FUNCTION_ATTRIBUTES int32_t simde_x_vclzs_s32(int32_t a) { return HEDLEY_STATIC_CAST(int32_t, simde_x_vclzs_u32(HEDLEY_STATIC_CAST(uint32_t, a))); } SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vclz_s8(simde_int8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vclz_s8(a); #else simde_int8x8_private a_ = simde_int8x8_to_private(a), r_; for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_x_vclzb_s8(a_.values[i]); } return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vclz_s8 #define vclz_s8(a) simde_vclz_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vclz_s16(simde_int16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vclz_s16(a); #else simde_int16x4_private a_ = simde_int16x4_to_private(a), r_; for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_x_vclzh_s16(a_.values[i]); } return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vclz_s16 #define vclz_s16(a) simde_vclz_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vclz_s32(simde_int32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vclz_s32(a); #else simde_int32x2_private a_ = simde_int32x2_to_private(a), r_; for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_x_vclzs_s32(a_.values[i]); } return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vclz_s32 #define vclz_s32(a) simde_vclz_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vclz_u8(simde_uint8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vclz_u8(a); #else simde_uint8x8_private a_ = simde_uint8x8_to_private(a), r_; for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_x_vclzb_u8(a_.values[i]); } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vclz_u8 #define vclz_u8(a) simde_vclz_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vclz_u16(simde_uint16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vclz_u16(a); #else simde_uint16x4_private a_ = simde_uint16x4_to_private(a), r_; for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_x_vclzh_u16(a_.values[i]); } return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vclz_u16 #define vclz_u16(a) simde_vclz_u16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vclz_u32(simde_uint32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vclz_u32(a); #else simde_uint32x2_private a_ = simde_uint32x2_to_private(a), r_; for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_x_vclzs_u32(a_.values[i]); } return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vclz_u32 #define vclz_u32(a) simde_vclz_u32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vclzq_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vclzq_s8(a); #elif defined(SIMDE_X86_GFNI_NATIVE) /* https://gist.github.com/animetosho/6cb732ccb5ecd86675ca0a442b3c0622 */ a = _mm_gf2p8affine_epi64_epi8(a, _mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, 0x80402010), HEDLEY_STATIC_CAST(int32_t, 0x08040201), HEDLEY_STATIC_CAST(int32_t, 0x80402010), HEDLEY_STATIC_CAST(int32_t, 0x08040201)), 0); a = _mm_andnot_si128(_mm_add_epi8(a, _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, 0xff))), a); return _mm_gf2p8affine_epi64_epi8(a, _mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, 0xaaccf0ff), 0, HEDLEY_STATIC_CAST(int32_t, 0xaaccf0ff), 0), 8); #else simde_int8x16_private a_ = simde_int8x16_to_private(a), r_; for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_x_vclzb_s8(a_.values[i]); } return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vclzq_s8 #define vclzq_s8(a) simde_vclzq_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vclzq_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vclzq_s16(a); #else simde_int16x8_private a_ = simde_int16x8_to_private(a), r_; for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_x_vclzh_s16(a_.values[i]); } return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vclzq_s16 #define vclzq_s16(a) simde_vclzq_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vclzq_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vclzq_s32(a); #else simde_int32x4_private a_ = simde_int32x4_to_private(a), r_; for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_x_vclzs_s32(a_.values[i]); } return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vclzq_s32 #define vclzq_s32(a) simde_vclzq_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vclzq_u8(simde_uint8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vclzq_u8(a); #elif defined(SIMDE_X86_GFNI_NATIVE) a = _mm_gf2p8affine_epi64_epi8(a, _mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, 0x80402010), HEDLEY_STATIC_CAST(int32_t, 0x08040201), HEDLEY_STATIC_CAST(int32_t, 0x80402010), HEDLEY_STATIC_CAST(int32_t, 0x08040201)), 0); a = _mm_andnot_si128(_mm_add_epi8(a, _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, 0xff))), a); return _mm_gf2p8affine_epi64_epi8(a, _mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, 0xaaccf0ff), 0, HEDLEY_STATIC_CAST(int32_t, 0xaaccf0ff), 0), 8); #else simde_uint8x16_private a_ = simde_uint8x16_to_private(a), r_; for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_x_vclzb_u8(a_.values[i]); } return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vclzq_u8 #define vclzq_u8(a) simde_vclzq_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vclzq_u16(simde_uint16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vclzq_u16(a); #else simde_uint16x8_private a_ = simde_uint16x8_to_private(a), r_; for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_x_vclzh_u16(a_.values[i]); } return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vclzq_u16 #define vclzq_u16(a) simde_vclzq_u16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vclzq_u32(simde_uint32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vclzq_u32(a); #else simde_uint32x4_private a_ = simde_uint32x4_to_private(a), r_; for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_x_vclzs_u32(a_.values[i]); } return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vclzq_u32 #define vclzq_u32(a) simde_vclzq_u32(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_CLZ_H) */ simde-0.7.2/simde/arm/neon/cnt.h000066400000000000000000000104301400333146700163740ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_CNT_H) #define SIMDE_ARM_NEON_CNT_H #include "types.h" #include HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES uint8_t simde_x_arm_neon_cntb(uint8_t v) { v = v - ((v >> 1) & (85)); v = (v & (51)) + ((v >> (2)) & (51)); v = (v + (v >> (4))) & (15); return HEDLEY_STATIC_CAST(uint8_t, v) >> (sizeof(uint8_t) - 1) * CHAR_BIT; } SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vcnt_s8(simde_int8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcnt_s8(a); #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int8_t, simde_x_arm_neon_cntb(HEDLEY_STATIC_CAST(uint8_t, a_.values[i]))); } return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcnt_s8 #define vcnt_s8(a) simde_vcnt_s8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vcnt_u8(simde_uint8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcnt_u8(a); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_x_arm_neon_cntb(a_.values[i]); } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcnt_u8 #define vcnt_u8(a) simde_vcnt_u8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vcntq_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcntq_s8(a); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_popcnt(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), a))); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int8_t, simde_x_arm_neon_cntb(HEDLEY_STATIC_CAST(uint8_t, a_.values[i]))); } return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcntq_s8 #define vcntq_s8(a) simde_vcntq_s8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vcntq_u8(simde_uint8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcntq_u8(a); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return vec_popcnt(a); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_x_arm_neon_cntb(a_.values[i]); } return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcntq_u8 #define vcntq_u8(a) simde_vcntq_u8((a)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_CNT_H) */ simde-0.7.2/simde/arm/neon/combine.h000066400000000000000000000265321400333146700172360ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the folhighing conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_COMBINE_H) #define SIMDE_ARM_NEON_COMBINE_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vcombine_f32(simde_float32x2_t low, simde_float32x2_t high) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcombine_f32(low, high); #else simde_float32x4_private r_; simde_float32x2_private low_ = simde_float32x2_to_private(low), high_ = simde_float32x2_to_private(high); /* Note: __builtin_shufflevector can have a the output contain * twice the number of elements, __builtin_shuffle cannot. * Using SIMDE_SHUFFLE_VECTOR_ here would not work. */ #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3); #else size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway ; i++) { r_.values[i] = low_.values[i]; r_.values[i + halfway] = high_.values[i]; } #endif return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcombine_f32 #define vcombine_f32(low, high) simde_vcombine_f32((low), (high)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vcombine_f64(simde_float64x1_t low, simde_float64x1_t high) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcombine_f64(low, high); #else simde_float64x2_private r_; simde_float64x1_private low_ = simde_float64x1_to_private(low), high_ = simde_float64x1_to_private(high); #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1); #else size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway ; i++) { r_.values[i] = low_.values[i]; r_.values[i + halfway] = high_.values[i]; } #endif return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcombine_f64 #define vcombine_f64(low, high) simde_vcombine_f64((low), (high)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vcombine_s8(simde_int8x8_t low, simde_int8x8_t high) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcombine_s8(low, high); #else simde_int8x16_private r_; simde_int8x8_private low_ = simde_int8x8_to_private(low), high_ = simde_int8x8_to_private(high); #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); #else size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway ; i++) { r_.values[i] = low_.values[i]; r_.values[i + halfway] = high_.values[i]; } #endif return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcombine_s8 #define vcombine_s8(low, high) simde_vcombine_s8((low), (high)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vcombine_s16(simde_int16x4_t low, simde_int16x4_t high) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcombine_s16(low, high); #else simde_int16x8_private r_; simde_int16x4_private low_ = simde_int16x4_to_private(low), high_ = simde_int16x4_to_private(high); #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3, 4, 5, 6, 7); #else size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway ; i++) { r_.values[i] = low_.values[i]; r_.values[i + halfway] = high_.values[i]; } #endif return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcombine_s16 #define vcombine_s16(low, high) simde_vcombine_s16((low), (high)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vcombine_s32(simde_int32x2_t low, simde_int32x2_t high) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcombine_s32(low, high); #else simde_int32x4_private r_; simde_int32x2_private low_ = simde_int32x2_to_private(low), high_ = simde_int32x2_to_private(high); #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3); #else size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway ; i++) { r_.values[i] = low_.values[i]; r_.values[i + halfway] = high_.values[i]; } #endif return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcombine_s32 #define vcombine_s32(low, high) simde_vcombine_s32((low), (high)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vcombine_s64(simde_int64x1_t low, simde_int64x1_t high) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcombine_s64(low, high); #else simde_int64x2_private r_; simde_int64x1_private low_ = simde_int64x1_to_private(low), high_ = simde_int64x1_to_private(high); #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1); #else size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway ; i++) { r_.values[i] = low_.values[i]; r_.values[i + halfway] = high_.values[i]; } #endif return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcombine_s64 #define vcombine_s64(low, high) simde_vcombine_s64((low), (high)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vcombine_u8(simde_uint8x8_t low, simde_uint8x8_t high) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcombine_u8(low, high); #else simde_uint8x16_private r_; simde_uint8x8_private low_ = simde_uint8x8_to_private(low), high_ = simde_uint8x8_to_private(high); #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); #else size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway ; i++) { r_.values[i] = low_.values[i]; r_.values[i + halfway] = high_.values[i]; } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcombine_u8 #define vcombine_u8(low, high) simde_vcombine_u8((low), (high)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vcombine_u16(simde_uint16x4_t low, simde_uint16x4_t high) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcombine_u16(low, high); #else simde_uint16x8_private r_; simde_uint16x4_private low_ = simde_uint16x4_to_private(low), high_ = simde_uint16x4_to_private(high); #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3, 4, 5, 6, 7); #else size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway ; i++) { r_.values[i] = low_.values[i]; r_.values[i + halfway] = high_.values[i]; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcombine_u16 #define vcombine_u16(low, high) simde_vcombine_u16((low), (high)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vcombine_u32(simde_uint32x2_t low, simde_uint32x2_t high) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcombine_u32(low, high); #else simde_uint32x4_private r_; simde_uint32x2_private low_ = simde_uint32x2_to_private(low), high_ = simde_uint32x2_to_private(high); #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3); #else size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway ; i++) { r_.values[i] = low_.values[i]; r_.values[i + halfway] = high_.values[i]; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcombine_u32 #define vcombine_u32(low, high) simde_vcombine_u32((low), (high)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vcombine_u64(simde_uint64x1_t low, simde_uint64x1_t high) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcombine_u64(low, high); #else simde_uint64x2_private r_; simde_uint64x1_private low_ = simde_uint64x1_to_private(low), high_ = simde_uint64x1_to_private(high); #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1); #else size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway ; i++) { r_.values[i] = low_.values[i]; r_.values[i + halfway] = high_.values[i]; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcombine_u64 #define vcombine_u64(low, high) simde_vcombine_u64((low), (high)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_COMBINE_H) */ simde-0.7.2/simde/arm/neon/create.h000066400000000000000000000120661400333146700170620ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ /* N.B. CM: vcreate_f16 and vcreate_bf16 are omitted as * SIMDe has no 16-bit floating point support. * Idem for the poly types. */ #if !defined(SIMDE_ARM_NEON_CREATE_H) #define SIMDE_ARM_NEON_CREATE_H #include "dup_n.h" #include "reinterpret.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vcreate_s8(uint64_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcreate_s8(a); #else return simde_vreinterpret_s8_u64(simde_vdup_n_u64(a)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcreate_s8 #define vcreate_s8(a) simde_vcreate_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vcreate_s16(uint64_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcreate_s16(a); #else return simde_vreinterpret_s16_u64(simde_vdup_n_u64(a)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcreate_s16 #define vcreate_s16(a) simde_vcreate_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vcreate_s32(uint64_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcreate_s32(a); #else return simde_vreinterpret_s32_u64(simde_vdup_n_u64(a)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcreate_s32 #define vcreate_s32(a) simde_vcreate_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vcreate_s64(uint64_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcreate_s64(a); #else return simde_vreinterpret_s64_u64(simde_vdup_n_u64(a)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcreate_s64 #define vcreate_s64(a) simde_vcreate_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vcreate_u8(uint64_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcreate_u8(a); #else return simde_vreinterpret_u8_u64(simde_vdup_n_u64(a)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcreate_u8 #define vcreate_u8(a) simde_vcreate_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vcreate_u16(uint64_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcreate_u16(a); #else return simde_vreinterpret_u16_u64(simde_vdup_n_u64(a)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcreate_u16 #define vcreate_u16(a) simde_vcreate_u16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vcreate_u32(uint64_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcreate_u32(a); #else return simde_vreinterpret_u32_u64(simde_vdup_n_u64(a)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcreate_u32 #define vcreate_u32(a) simde_vcreate_u32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vcreate_u64(uint64_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcreate_u64(a); #else return simde_vdup_n_u64(a); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcreate_u64 #define vcreate_u64(a) simde_vcreate_u64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vcreate_f32(uint64_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcreate_f32(a); #else return simde_vreinterpret_f32_u64(simde_vdup_n_u64(a)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcreate_f32 #define vcreate_f32(a) simde_vcreate_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vcreate_f64(uint64_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcreate_f64(a); #else return simde_vreinterpret_f64_u64(simde_vdup_n_u64(a)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcreate_f64 #define vcreate_f64(a) simde_vcreate_f64(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_CREATE_H) */ simde-0.7.2/simde/arm/neon/cvt.h000066400000000000000000000355141400333146700164160ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher */ /* * Implementor's notes: seanptmaher * * this intrinsic has many (7 pages worth) of variations on it, so * this file might become a dumping ground for various * implementations. I'm only going to write the `cvt` intrinsics for * now, but off the top of my head, we've got: * * - vcvt* <- round towards 0 * - vcvtn* <- round to nearest with ties to even * - vcvtm* <- round towards minus infinity * - vcvtp* <- round towards plus infinity * - vcvta* <- round to nearest with ties to away * * and there are a bunch of variations on these. More information can * be found here: * https://developer.arm.com/architectures/instruction-sets/simd-isas/neon/intrinsics?page=1&search=vcvt */ #if !defined(SIMDE_ARM_NEON_CVT_H) #define SIMDE_ARM_NEON_CVT_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES int32_t simde_vcvts_s32_f32(simde_float32 a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcvts_s32_f32(a); #else return HEDLEY_STATIC_CAST(int32_t, a); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcvts_s32_f32 #define vcvts_s32_f32(a) simde_vcvts_s32_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES uint32_t simde_vcvts_u32_f32(simde_float32 a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) return vcvts_u32_f32(a); #else return HEDLEY_STATIC_CAST(uint32_t, (a < 0) ? 0 : a); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcvts_u32_f32 #define vcvts_u32_f32(a) simde_vcvts_u32_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32 simde_vcvts_f32_s32(int32_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcvts_f32_s32(a); #else return HEDLEY_STATIC_CAST(simde_float32, a); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcvts_f32_s32 #define vcvts_f32_s32(a) simde_vcvts_f32_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32 simde_vcvts_f32_u32 (uint32_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) return vcvts_f32_u32(a); #else return HEDLEY_STATIC_CAST(simde_float32, a); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcvts_f32_u32 #define vcvts_f32_u32(a) simde_vcvts_f32_u32(a) #endif SIMDE_FUNCTION_ATTRIBUTES int64_t simde_vcvtd_s64_f64(simde_float64 a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcvtd_s64_f64(a); #else return HEDLEY_STATIC_CAST(int64_t, a); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcvtd_s64_f64 #define vcvtd_s64_f64(a) simde_vcvtd_s64_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES uint64_t simde_vcvtd_u64_f64(simde_float64 a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) return vcvtd_u64_f64(a); #else return HEDLEY_STATIC_CAST(uint64_t, (a < 0) ? 0 : a); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcvtd_u64_f64 #define vcvtd_u64_f64(a) simde_vcvtd_u64_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64 simde_vcvtd_f64_s64(int64_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcvtd_f64_s64(a); #else return HEDLEY_STATIC_CAST(simde_float64, a); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcvtd_f64_s64 #define vcvtd_f64_s64(a) simde_vcvtd_f64_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64 simde_vcvtd_f64_u64(uint64_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) return vcvtd_f64_u64(a); #else return HEDLEY_STATIC_CAST(simde_float64, a); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcvtd_f64_u64 #define vcvtd_f64_u64(a) simde_vcvtd_f64_u64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vcvt_s32_f32(simde_float32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcvt_s32_f32(a); #else simde_float32x2_private a_ = simde_float32x2_to_private(a); simde_int32x2_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vcvts_s32_f32(a_.values[i]); } return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcvt_s32_f32 #define vcvt_s32_f32(a) simde_vcvt_s32_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vcvt_u32_f32(simde_float32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) return vcvt_u32_f32(a); #else simde_float32x2_private a_ = simde_float32x2_to_private(a); simde_uint32x2_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vcvts_u32_f32(a_.values[i]); } return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcvt_u32_f32 #define vcvt_u32_f32(a) simde_vcvt_u32_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vcvt_s64_f64(simde_float64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcvt_s64_f64(a); #else simde_float64x1_private a_ = simde_float64x1_to_private(a); simde_int64x1_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vcvtd_s64_f64(a_.values[i]); } return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcvt_s64_f64 #define vcvt_s64_f64(a) simde_vcvt_s64_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vcvt_u64_f64(simde_float64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) return vcvt_u64_f64(a); #else simde_float64x1_private a_ = simde_float64x1_to_private(a); simde_uint64x1_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vcvtd_u64_f64(a_.values[i]); } return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcvt_u64_f64 #define vcvt_u64_f64(a) simde_vcvt_u64_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vcvtq_s32_f32(simde_float32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcvtq_s32_f32(a); #else simde_float32x4_private a_ = simde_float32x4_to_private(a); simde_int32x4_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vcvts_s32_f32(a_.values[i]); } return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcvtq_s32_f32 #define vcvtq_s32_f32(a) simde_vcvtq_s32_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vcvtq_u32_f32(simde_float32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) return vcvtq_u32_f32(a); #else simde_float32x4_private a_ = simde_float32x4_to_private(a); simde_uint32x4_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vcvts_u32_f32(a_.values[i]); } return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcvtq_u32_f32 #define vcvtq_u32_f32(a) simde_vcvtq_u32_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vcvtq_s64_f64(simde_float64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcvtq_s64_f64(a); #else simde_float64x2_private a_ = simde_float64x2_to_private(a); simde_int64x2_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vcvtd_s64_f64(a_.values[i]); } return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcvtq_s64_f64 #define vcvtq_s64_f64(a) simde_vcvtq_s64_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vcvtq_u64_f64(simde_float64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) return vcvtq_u64_f64(a); #else simde_float64x2_private a_ = simde_float64x2_to_private(a); simde_uint64x2_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vcvtd_u64_f64(a_.values[i]); } return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcvtq_u64_f64 #define vcvtq_u64_f64(a) simde_vcvtq_u64_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vcvt_f32_s32(simde_int32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcvt_f32_s32(a); #else simde_int32x2_private a_ = simde_int32x2_to_private(a); simde_float32x2_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vcvts_f32_s32(a_.values[i]); } return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcvt_f32_s32 #define vcvt_f32_s32(a) simde_vcvt_f32_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vcvt_f32_u32(simde_uint32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) return vcvt_f32_u32(a); #else simde_uint32x2_private a_ = simde_uint32x2_to_private(a); simde_float32x2_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vcvts_f32_u32(a_.values[i]); } return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcvt_f32_u32 #define vcvt_f32_u32(a) simde_vcvt_f32_u32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vcvt_f64_s64(simde_int64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcvt_f64_s64(a); #else simde_int64x1_private a_ = simde_int64x1_to_private(a); simde_float64x1_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vcvtd_f64_s64(a_.values[i]); } return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcvt_f64_s64 #define vcvt_f64_s64(a) simde_vcvt_f64_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vcvt_f64_u64(simde_uint64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) return vcvt_f64_u64(a); #else simde_uint64x1_private a_ = simde_uint64x1_to_private(a); simde_float64x1_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vcvtd_f64_u64(a_.values[i]); } return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcvt_f64_u64 #define vcvt_f64_u64(a) simde_vcvt_f64_u64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vcvtq_f32_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcvtq_f32_s32(a); #else simde_int32x4_private a_ = simde_int32x4_to_private(a); simde_float32x4_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vcvts_f32_s32(a_.values[i]); } return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcvtq_f32_s32 #define vcvtq_f32_s32(a) simde_vcvtq_f32_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vcvtq_f32_u32(simde_uint32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) return vcvtq_f32_u32(a); #else simde_uint32x4_private a_ = simde_uint32x4_to_private(a); simde_float32x4_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vcvts_f32_u32(a_.values[i]); } return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcvtq_f32_u32 #define vcvtq_f32_u32(a) simde_vcvtq_f32_u32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vcvtq_f64_s64(simde_int64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcvtq_f64_s64(a); #else simde_int64x2_private a_ = simde_int64x2_to_private(a); simde_float64x2_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vcvtd_f64_s64(a_.values[i]); } return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcvtq_f64_s64 #define vcvtq_f64_s64(a) simde_vcvtq_f64_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vcvtq_f64_u64(simde_uint64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) return vcvtq_f64_u64(a); #else simde_uint64x2_private a_ = simde_uint64x2_to_private(a); simde_float64x2_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vcvtd_f64_u64(a_.values[i]); } return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcvtq_f64_u64 #define vcvtq_f64_u64(a) simde_vcvtq_f64_u64(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* SIMDE_ARM_NEON_CVT_H */ simde-0.7.2/simde/arm/neon/dot.h000066400000000000000000000151101400333146700163760ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_DOT_H) #define SIMDE_ARM_NEON_DOT_H #include "types.h" #include "add.h" #include "combine.h" #include "dup_n.h" #include "get_low.h" #include "get_high.h" #include "paddl.h" #include "movn.h" #include "mull.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vdot_s32(simde_int32x2_t r, simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) return vdot_s32(r, a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) return simde_vadd_s32(r, simde_vmovn_s64(simde_vpaddlq_s32(simde_vpaddlq_s16(simde_vmull_s8(a, b))))); #else simde_int32x2_private r_; simde_int8x8_private a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b); for (int i = 0 ; i < 2 ; i++) { int32_t acc = 0; SIMDE_VECTORIZE_REDUCTION(+:acc) for (int j = 0 ; j < 4 ; j++) { const int idx = j + (i << 2); acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx]); } r_.values[i] = acc; } return simde_vadd_s32(r, simde_int32x2_from_private(r_)); #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD)) #undef vdot_s32 #define vdot_s32(r, a, b) simde_vdot_s32((r), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vdot_u32(simde_uint32x2_t r, simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) return vdot_u32(r, a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) return simde_vadd_u32(r, simde_vmovn_u64(simde_vpaddlq_u32(simde_vpaddlq_u16(simde_vmull_u8(a, b))))); #else simde_uint32x2_private r_; simde_uint8x8_private a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b); for (int i = 0 ; i < 2 ; i++) { uint32_t acc = 0; SIMDE_VECTORIZE_REDUCTION(+:acc) for (int j = 0 ; j < 4 ; j++) { const int idx = j + (i << 2); acc += HEDLEY_STATIC_CAST(uint32_t, a_.values[idx]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[idx]); } r_.values[i] = acc; } return simde_vadd_u32(r, simde_uint32x2_from_private(r_)); #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD)) #undef vdot_u32 #define vdot_u32(r, a, b) simde_vdot_u32((r), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vdotq_s32(simde_int32x4_t r, simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) return vdotq_s32(r, a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) return simde_vaddq_s32(r, simde_vcombine_s32(simde_vmovn_s64(simde_vpaddlq_s32(simde_vpaddlq_s16(simde_vmull_s8(simde_vget_low_s8(a), simde_vget_low_s8(b))))), simde_vmovn_s64(simde_vpaddlq_s32(simde_vpaddlq_s16(simde_vmull_s8(simde_vget_high_s8(a), simde_vget_high_s8(b))))))); #else simde_int32x4_private r_; simde_int8x16_private a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); for (int i = 0 ; i < 4 ; i++) { int32_t acc = 0; SIMDE_VECTORIZE_REDUCTION(+:acc) for (int j = 0 ; j < 4 ; j++) { const int idx = j + (i << 2); acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx]); } r_.values[i] = acc; } return simde_vaddq_s32(r, simde_int32x4_from_private(r_)); #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD)) #undef vdotq_s32 #define vdotq_s32(r, a, b) simde_vdotq_s32((r), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vdotq_u32(simde_uint32x4_t r, simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) return vdotq_u32(r, a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) return simde_vaddq_u32(r, simde_vcombine_u32(simde_vmovn_u64(simde_vpaddlq_u32(simde_vpaddlq_u16(simde_vmull_u8(simde_vget_low_u8(a), simde_vget_low_u8(b))))), simde_vmovn_u64(simde_vpaddlq_u32(simde_vpaddlq_u16(simde_vmull_u8(simde_vget_high_u8(a), simde_vget_high_u8(b))))))); #else simde_uint32x4_private r_; simde_uint8x16_private a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b); for (int i = 0 ; i < 4 ; i++) { uint32_t acc = 0; SIMDE_VECTORIZE_REDUCTION(+:acc) for (int j = 0 ; j < 4 ; j++) { const int idx = j + (i << 2); acc += HEDLEY_STATIC_CAST(uint32_t, a_.values[idx]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[idx]); } r_.values[i] = acc; } return simde_vaddq_u32(r, simde_uint32x4_from_private(r_)); #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD)) #undef vdotq_u32 #define vdotq_u32(r, a, b) simde_vdotq_u32((r), (a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_DOT_H) */ simde-0.7.2/simde/arm/neon/dot_lane.h000066400000000000000000000176311400333146700174070ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_DOT_LANE_H) #define SIMDE_ARM_NEON_DOT_LANE_H #include "types.h" #include "add.h" #include "combine.h" #include "dup_n.h" #include "get_low.h" #include "get_high.h" #include "paddl.h" #include "movn.h" #include "mull.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vdot_lane_s32(simde_int32x2_t r, simde_int8x8_t a, simde_int8x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_int32x2_t result; #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_DOT_PROD) SIMDE_CONSTIFY_2_(vdot_lane_s32, result, (HEDLEY_UNCREACHABLE(), result), lane, r, a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) simde_uint32x2_t mask; SIMDE_CONSTIFY_2_(vset_lane_u32, mask, (HEDLEY_UNREACHABLE(), mask), lane, UINT32_MAX, vdup_n_u32(0)); result = vbsl_s32(mask, vadd_s32(r, vmovn_s64(vpaddlq_s32(vpaddlq_s16(vmull_s8(a, b))))), r); #else simde_int32x2_private r_ = simde_int32x2_to_private(simde_vdup_n_s32(0)); simde_int8x8_private a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b); int32_t acc = 0; SIMDE_VECTORIZE_REDUCTION(+:acc) for (int j = 0 ; j < 4 ; j++) { const int idx = j + (lane << 2); acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx]); } r_.values[lane] = acc; result = simde_vadd_s32(r, simde_int32x2_from_private(r_)); #endif return result; } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD)) #undef vdot_lane_s32 #define vdot_lane_s32(r, a, b, lane) simde_vdot_lane_s32((r), (a), (b), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vdot_lane_u32(simde_uint32x2_t r, simde_uint8x8_t a, simde_uint8x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_uint32x2_t result; #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_DOT_PROD) SIMDE_CONSTIFY_2_(vdot_lane_u32, result, (HEDLEY_UNCREACHABLE(), result), lane, r, a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) simde_uint32x2_t mask; SIMDE_CONSTIFY_2_(vset_lane_u32, mask, (HEDLEY_UNREACHABLE(), mask), lane, UINT32_MAX, vdup_n_u32(0)); result = vbsl_u32(mask, vadd_u32(r, vmovn_u64(vpaddlq_u32(vpaddlq_u16(vmull_u8(a, b))))), r); #else simde_uint32x2_private r_ = simde_uint32x2_to_private(simde_vdup_n_u32(0)); simde_uint8x8_private a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b); uint32_t acc = 0; SIMDE_VECTORIZE_REDUCTION(+:acc) for (int j = 0 ; j < 4 ; j++) { const int idx = j + (lane << 2); acc += HEDLEY_STATIC_CAST(uint32_t, a_.values[idx]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[idx]); } r_.values[lane] = acc; result = simde_vadd_u32(r, simde_uint32x2_from_private(r_)); #endif return result; } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD)) #undef vdot_lane_u32 #define vdot_lane_u32(r, a, b, lane) simde_vdot_lane_u32((r), (a), (b), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vdot_laneq_s32(simde_int32x4_t r, simde_int8x16_t a, simde_int8x16_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { simde_int32x4_t result; #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_DOT_PROD) SIMDE_CONSTIFY_4_(vdot_laneq_s32, result, (HEDLEY_UNCREACHABLE(), result), lane, r, a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) simde_uint32x4_t mask; SIMDE_CONSTIFY_4_(vsetq_lane_u32, mask, (HEDLEY_UNREACHABLE(), mask), lane, UINT32_MAX, vdupq_n_u32(0)); result = vbslq_s32(mask, vaddq_s32(r, vcombine_s32(vmovn_s64(vpaddlq_s32(vpaddlq_s16(vmull_s8(vget_low_s8(a), vget_low_s8(b))))), vmovn_s64(vpaddlq_s32(vpaddlq_s16(vmull_s8(vget_high_s8(a), vget_high_s8(b))))))), r); #else simde_int32x4_private r_ = simde_int32x4_to_private(simde_vdupq_n_s32(0)); simde_int8x16_private a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); int32_t acc = 0; SIMDE_VECTORIZE_REDUCTION(+:acc) for (int j = 0 ; j < 4 ; j++) { const int idx = j + (lane << 2); acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx]); } r_.values[lane] = acc; result = simde_vaddq_s32(r, simde_int32x4_from_private(r_)); #endif return result; } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD)) #undef vdot_laneq_s32 #define vdot_laneq_s32(r, a, b, lane) simde_vdot_laneq_s32((r), (a), (b), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vdot_laneq_u32(simde_uint32x4_t r, simde_uint8x16_t a, simde_uint8x16_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { simde_uint32x4_t result; #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_DOT_PROD) SIMDE_CONSTIFY_4_(vdot_laneq_u32, result, (HEDLEY_UNCREACHABLE(), result), lane, r, a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) simde_uint32x4_t mask; SIMDE_CONSTIFY_4_(vsetq_lane_u32, mask, (HEDLEY_UNREACHABLE(), mask), lane, UINT32_MAX, vdupq_n_u32(0)); result = vbslq_u32(mask, vaddq_u32(r, vcombine_u32(vmovn_u64(vpaddlq_u32(vpaddlq_u16(vmull_u8(vget_low_u8(a), vget_low_u8(b))))), vmovn_u64(vpaddlq_u32(vpaddlq_u16(vmull_u8(vget_high_u8(a), vget_high_u8(b))))))), r); #else simde_uint32x4_private r_ = simde_uint32x4_to_private(simde_vdupq_n_u32(0)); simde_uint8x16_private a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b); uint32_t acc = 0; SIMDE_VECTORIZE_REDUCTION(+:acc) for (int j = 0 ; j < 4 ; j++) { const int idx = j + (lane << 2); acc += HEDLEY_STATIC_CAST(uint32_t, a_.values[idx]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[idx]); } r_.values[lane] = acc; result = simde_vaddq_u32(r, simde_uint32x4_from_private(r_)); #endif return result; } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD)) #undef vdot_laneq_u32 #define vdot_laneq_u32(r, a, b, lane) simde_vdot_laneq_u32((r), (a), (b), (lane)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_DOT_LANE_H) */ simde-0.7.2/simde/arm/neon/dup_lane.h000066400000000000000000000520021400333146700174000ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_DUP_LANE_H) #define SIMDE_ARM_NEON_DUP_LANE_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vdup_lane_f32(simde_float32x2_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float32x2_private vec_ = simde_float32x2_to_private(vec), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = vec_.values[lane]; } return simde_float32x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vdup_lane_f32(vec, lane) vdup_lane_f32(vec, lane) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vdup_lane_f32 #define vdup_lane_f32(vec, lane) simde_vdup_lane_f32((vec), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vdup_lane_f64(simde_float64x1_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { (void) lane; return vec; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vdup_lane_f64 #define vdup_lane_f64(vec, lane) simde_vdup_lane_f64((vec), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vdup_lane_s8(simde_int8x8_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { simde_int8x8_private vec_ = simde_int8x8_to_private(vec), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = vec_.values[lane]; } return simde_int8x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vdup_lane_s8(vec, lane) vdup_lane_s8(vec, lane) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vdup_lane_s8 #define vdup_lane_s8(vec, lane) simde_vdup_lane_s8((vec), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vdup_lane_s16(simde_int16x4_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { simde_int16x4_private vec_ = simde_int16x4_to_private(vec), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = vec_.values[lane]; } return simde_int16x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vdup_lane_s16(vec, lane) vdup_lane_s16(vec, lane) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vdup_lane_s16 #define vdup_lane_s16(vec, lane) simde_vdup_lane_s16((vec), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vdup_lane_s32(simde_int32x2_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_int32x2_private vec_ = simde_int32x2_to_private(vec), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = vec_.values[lane]; } return simde_int32x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vdup_lane_s32(vec, lane) vdup_lane_s32(vec, lane) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vdup_lane_s32 #define vdup_lane_s32(vec, lane) simde_vdup_lane_s32((vec), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vdup_lane_s64(simde_int64x1_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { simde_int64x1_private vec_ = simde_int64x1_to_private(vec), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = vec_.values[lane]; } return simde_int64x1_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vdup_lane_s64(vec, lane) vdup_lane_s64(vec, lane) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vdup_lane_s64 #define vdup_lane_s64(vec, lane) simde_vdup_lane_s64((vec), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vdup_lane_u8(simde_uint8x8_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { simde_uint8x8_private vec_ = simde_uint8x8_to_private(vec), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = vec_.values[lane]; } return simde_uint8x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vdup_lane_u8(vec, lane) vdup_lane_u8(vec, lane) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vdup_lane_u8 #define vdup_lane_u8(vec, lane) simde_vdup_lane_u8((vec), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vdup_lane_u16(simde_uint16x4_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { simde_uint16x4_private vec_ = simde_uint16x4_to_private(vec), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = vec_.values[lane]; } return simde_uint16x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vdup_lane_u16(vec, lane) vdup_lane_u16(vec, lane) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vdup_lane_u16 #define vdup_lane_u16(vec, lane) simde_vdup_lane_u16((vec), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vdup_lane_u32(simde_uint32x2_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_uint32x2_private vec_ = simde_uint32x2_to_private(vec), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = vec_.values[lane]; } return simde_uint32x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vdup_lane_u32(vec, lane) vdup_lane_u32(vec, lane) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vdup_lane_u32 #define vdup_lane_u32(vec, lane) simde_vdup_lane_u32((vec), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vdup_lane_u64(simde_uint64x1_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { simde_uint64x1_private vec_ = simde_uint64x1_to_private(vec), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = vec_.values[lane]; } return simde_uint64x1_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vdup_lane_u64(vec, lane) vdup_lane_u64(vec, lane) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vdup_lane_u64 #define vdup_lane_u64(vec, lane) simde_vdup_lane_u64((vec), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vdup_laneq_f32(simde_float32x4_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { simde_float32x4_private vec_ = simde_float32x4_to_private(vec); simde_float32x2_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = vec_.values[lane]; } return simde_float32x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vdup_laneq_f32(vec, lane) vdup_laneq_f32(vec, lane) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vdup_laneq_f32 #define vdup_laneq_f32(vec, lane) simde_vdup_laneq_f32((vec), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vdup_laneq_f64(simde_float64x2_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float64x2_private vec_ = simde_float64x2_to_private(vec); simde_float64x1_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = vec_.values[lane]; } return simde_float64x1_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vdup_laneq_f64 #define vdup_laneq_f64(vec, lane) simde_vdup_laneq_f64((vec), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vdup_laneq_s8(simde_int8x16_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { simde_int8x16_private vec_ = simde_int8x16_to_private(vec); simde_int8x8_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = vec_.values[lane]; } return simde_int8x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vdup_laneq_s8(vec, lane) vdup_laneq_s8(vec, lane) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vdup_laneq_s8 #define vdup_laneq_s8(vec, lane) simde_vdup_laneq_s8((vec), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vdup_laneq_s16(simde_int16x8_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { simde_int16x8_private vec_ = simde_int16x8_to_private(vec); simde_int16x4_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = vec_.values[lane]; } return simde_int16x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vdup_laneq_s16(vec, lane) vdup_laneq_s16(vec, lane) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vdup_laneq_s16 #define vdup_laneq_s16(vec, lane) simde_vdup_laneq_s16((vec), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vdup_laneq_s32(simde_int32x4_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { simde_int32x4_private vec_ = simde_int32x4_to_private(vec); simde_int32x2_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = vec_.values[lane]; } return simde_int32x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vdup_laneq_s32(vec, lane) vdup_laneq_s32(vec, lane) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vdup_laneq_s32 #define vdup_laneq_s32(vec, lane) simde_vdup_laneq_s32((vec), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vdup_laneq_s64(simde_int64x2_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_int64x2_private vec_ = simde_int64x2_to_private(vec); simde_int64x1_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = vec_.values[lane]; } return simde_int64x1_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vdup_laneq_s64(vec, lane) vdup_laneq_s64(vec, lane) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vdup_laneq_s64 #define vdup_laneq_s64(vec, lane) simde_vdup_laneq_s64((vec), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vdup_laneq_u8(simde_uint8x16_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { simde_uint8x16_private vec_ = simde_uint8x16_to_private(vec); simde_uint8x8_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = vec_.values[lane]; } return simde_uint8x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vdup_laneq_u8(vec, lane) vdup_laneq_u8(vec, lane) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vdup_laneq_u8 #define vdup_laneq_u8(vec, lane) simde_vdup_laneq_u8((vec), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vdup_laneq_u16(simde_uint16x8_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { simde_uint16x8_private vec_ = simde_uint16x8_to_private(vec); simde_uint16x4_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = vec_.values[lane]; } return simde_uint16x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vdup_laneq_u16(vec, lane) vdup_laneq_u16(vec, lane) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vdup_laneq_u16 #define vdup_laneq_u16(vec, lane) simde_vdup_laneq_u16((vec), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vdup_laneq_u32(simde_uint32x4_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { simde_uint32x4_private vec_ = simde_uint32x4_to_private(vec); simde_uint32x2_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = vec_.values[lane]; } return simde_uint32x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vdup_laneq_u32(vec, lane) vdup_laneq_u32(vec, lane) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vdup_laneq_u32 #define vdup_laneq_u32(vec, lane) simde_vdup_laneq_u32((vec), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vdup_laneq_u64(simde_uint64x2_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_uint64x2_private vec_ = simde_uint64x2_to_private(vec); simde_uint64x1_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = vec_.values[lane]; } return simde_uint64x1_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vdup_laneq_u64(vec, lane) vdup_laneq_u64(vec, lane) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vdup_laneq_u64 #define vdup_laneq_u64(vec, lane) simde_vdup_laneq_u64((vec), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vdupq_laneq_f32(simde_float32x4_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { simde_float32x4_private vec_ = simde_float32x4_to_private(vec), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = vec_.values[lane]; } return simde_float32x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vdupq_laneq_f32(vec, lane) vdupq_laneq_f32(vec, lane) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vdupq_laneq_f32 #define vdupq_laneq_f32(vec, lane) simde_vdupq_laneq_f32((vec), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vdupq_laneq_f64(simde_float64x2_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float64x2_private vec_ = simde_float64x2_to_private(vec), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = vec_.values[lane]; } return simde_float64x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vdupq_laneq_f64 #define vdupq_laneq_f64(vec, lane) simde_vdupq_laneq_f64((vec), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vdupq_laneq_s8(simde_int8x16_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { simde_int8x16_private vec_ = simde_int8x16_to_private(vec), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = vec_.values[lane]; } return simde_int8x16_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vdupq_laneq_s8(vec, lane) vdupq_laneq_s8(vec, lane) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vdupq_laneq_s8 #define vdupq_laneq_s8(vec, lane) simde_vdupq_laneq_s8((vec), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vdupq_laneq_s16(simde_int16x8_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { simde_int16x8_private vec_ = simde_int16x8_to_private(vec), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = vec_.values[lane]; } return simde_int16x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vdupq_laneq_s16(vec, lane) vdupq_laneq_s16(vec, lane) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vdupq_laneq_s16 #define vdupq_laneq_s16(vec, lane) simde_vdupq_laneq_s16((vec), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vdupq_laneq_s32(simde_int32x4_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { simde_int32x4_private vec_ = simde_int32x4_to_private(vec), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = vec_.values[lane]; } return simde_int32x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vdupq_laneq_s32(vec, lane) vdupq_laneq_s32(vec, lane) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vdupq_laneq_s32 #define vdupq_laneq_s32(vec, lane) simde_vdupq_laneq_s32((vec), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vdupq_laneq_s64(simde_int64x2_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_int64x2_private vec_ = simde_int64x2_to_private(vec), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = vec_.values[lane]; } return simde_int64x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vdupq_laneq_s64(vec, lane) vdupq_laneq_s64(vec, lane) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vdupq_laneq_s64 #define vdupq_laneq_s64(vec, lane) simde_vdupq_laneq_s64((vec), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vdupq_laneq_u8(simde_uint8x16_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { simde_uint8x16_private vec_ = simde_uint8x16_to_private(vec), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = vec_.values[lane]; } return simde_uint8x16_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vdupq_laneq_u8(vec, lane) vdupq_laneq_u8(vec, lane) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vdupq_laneq_u8 #define vdupq_laneq_u8(vec, lane) simde_vdupq_laneq_u8((vec), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vdupq_laneq_u16(simde_uint16x8_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { simde_uint16x8_private vec_ = simde_uint16x8_to_private(vec), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = vec_.values[lane]; } return simde_uint16x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vdupq_laneq_u16(vec, lane) vdupq_laneq_u16(vec, lane) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vdupq_laneq_u16 #define vdupq_laneq_u16(vec, lane) simde_vdupq_laneq_u16((vec), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vdupq_laneq_u32(simde_uint32x4_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { simde_uint32x4_private vec_ = simde_uint32x4_to_private(vec), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = vec_.values[lane]; } return simde_uint32x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vdupq_laneq_u32(vec, lane) vdupq_laneq_u32(vec, lane) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vdupq_laneq_u32 #define vdupq_laneq_u32(vec, lane) simde_vdupq_laneq_u32((vec), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vdupq_laneq_u64(simde_uint64x2_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_uint64x2_private vec_ = simde_uint64x2_to_private(vec), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = vec_.values[lane]; } return simde_uint64x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vdupq_laneq_u64(vec, lane) vdupq_laneq_u64(vec, lane) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vdupq_laneq_u64 #define vdupq_laneq_u64(vec, lane) simde_vdupq_laneq_u64((vec), (lane)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_DUP_LANE_H) */ simde-0.7.2/simde/arm/neon/dup_n.h000066400000000000000000000355071400333146700167310ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Sean Maher (Copyright owned by Google, LLC) * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_DUP_N_H) #define SIMDE_ARM_NEON_DUP_N_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vdup_n_f32(float value) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vdup_n_f32(value); #else simde_float32x2_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = value; } return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vdup_n_f32 #define vdup_n_f32(value) simde_vdup_n_f32((value)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vdup_n_f64(double value) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vdup_n_f64(value); #else simde_float64x1_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = value; } return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vdup_n_f64 #define vdup_n_f64(value) simde_vdup_n_f64((value)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vdup_n_s8(int8_t value) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vdup_n_s8(value); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_set1_pi8(value); #else simde_int8x8_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = value; } return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vdup_n_s8 #define vdup_n_s8(value) simde_vdup_n_s8((value)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vdup_n_s16(int16_t value) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vdup_n_s16(value); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_set1_pi16(value); #else simde_int16x4_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = value; } return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vdup_n_s16 #define vdup_n_s16(value) simde_vdup_n_s16((value)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vdup_n_s32(int32_t value) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vdup_n_s32(value); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_set1_pi32(value); #else simde_int32x2_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = value; } return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vdup_n_s32 #define vdup_n_s32(value) simde_vdup_n_s32((value)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vdup_n_s64(int64_t value) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vdup_n_s64(value); #else simde_int64x1_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = value; } return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vdup_n_s64 #define vdup_n_s64(value) simde_vdup_n_s64((value)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vdup_n_u8(uint8_t value) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vdup_n_u8(value); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_set1_pi8(HEDLEY_STATIC_CAST(int8_t, value)); #else simde_uint8x8_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = value; } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vdup_n_u8 #define vdup_n_u8(value) simde_vdup_n_u8((value)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vdup_n_u16(uint16_t value) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vdup_n_u16(value); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_set1_pi16(HEDLEY_STATIC_CAST(int16_t, value)); #else simde_uint16x4_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = value; } return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vdup_n_u16 #define vdup_n_u16(value) simde_vdup_n_u16((value)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vdup_n_u32(uint32_t value) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vdup_n_u32(value); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_set1_pi32(HEDLEY_STATIC_CAST(int32_t, value)); #else simde_uint32x2_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = value; } return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vdup_n_u32 #define vdup_n_u32(value) simde_vdup_n_u32((value)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vdup_n_u64(uint64_t value) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vdup_n_u64(value); #else simde_uint64x1_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = value; } return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vdup_n_u64 #define vdup_n_u64(value) simde_vdup_n_u64((value)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vdupq_n_f32(float value) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vdupq_n_f32(value); #elif defined(SIMDE_X86_SSE_NATIVE) return _mm_set1_ps(value); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f32x4_splat(value); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) (void) value; return vec_splats(value); #else simde_float32x4_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = value; } return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vdupq_n_f32 #define vdupq_n_f32(value) simde_vdupq_n_f32((value)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vdupq_n_f64(double value) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vdupq_n_f64(value); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_set1_pd(value); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_splat(value); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) (void) value; return vec_splats(value); #else simde_float64x2_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = value; } return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vdupq_n_f64 #define vdupq_n_f64(value) simde_vdupq_n_f64((value)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vdupq_n_s8(int8_t value) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vdupq_n_s8(value); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_set1_epi8(value); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i8x16_splat(value); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_splats(value); #else simde_int8x16_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = value; } return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vdupq_n_s8 #define vdupq_n_s8(value) simde_vdupq_n_s8((value)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vdupq_n_s16(int16_t value) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vdupq_n_s16(value); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_set1_epi16(value); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i16x8_splat(value); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_splats(value); #else simde_int16x8_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = value; } return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vdupq_n_s16 #define vdupq_n_s16(value) simde_vdupq_n_s16((value)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vdupq_n_s32(int32_t value) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vdupq_n_s32(value); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_set1_epi32(value); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i32x4_splat(value); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_splats(value); #else simde_int32x4_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = value; } return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vdupq_n_s32 #define vdupq_n_s32(value) simde_vdupq_n_s32((value)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vdupq_n_s64(int64_t value) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vdupq_n_s64(value); #elif defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) return _mm_set1_epi64x(value); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i64x2_splat(value); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return vec_splats(HEDLEY_STATIC_CAST(signed long long, value)); #else simde_int64x2_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = value; } return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vdupq_n_s64 #define vdupq_n_s64(value) simde_vdupq_n_s64((value)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vdupq_n_u8(uint8_t value) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vdupq_n_u8(value); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); #elif defined (SIMDE_WASM_SIMD128_NATIVE) return wasm_i8x16_splat(HEDLEY_STATIC_CAST(int8_t, value)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_splats(value); #else simde_uint8x16_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = value; } return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vdupq_n_u8 #define vdupq_n_u8(value) simde_vdupq_n_u8((value)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vdupq_n_u16(uint16_t value) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vdupq_n_u16(value); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); #elif defined (SIMDE_WASM_SIMD128_NATIVE) return wasm_i16x8_splat(HEDLEY_STATIC_CAST(int16_t, value)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_splats(value); #else simde_uint16x8_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = value; } return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vdupq_n_u16 #define vdupq_n_u16(value) simde_vdupq_n_u16((value)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vdupq_n_u32(uint32_t value) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vdupq_n_u32(value); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); #elif defined (SIMDE_WASM_SIMD128_NATIVE) return wasm_i32x4_splat(HEDLEY_STATIC_CAST(int32_t, value)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_splats(value); #else simde_uint32x4_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = value; } return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vdupq_n_u32 #define vdupq_n_u32(value) simde_vdupq_n_u32((value)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vdupq_n_u64(uint64_t value) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vdupq_n_u64(value); #elif defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) return _mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); #elif defined (SIMDE_WASM_SIMD128_NATIVE) return wasm_i64x2_splat(HEDLEY_STATIC_CAST(int64_t, value)); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value)); #else simde_uint64x2_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = value; } return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vdupq_n_u64 #define vdupq_n_u64(value) simde_vdupq_n_u64((value)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_DUP_N_H) */ simde-0.7.2/simde/arm/neon/eor.h000066400000000000000000000367261400333146700164150ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_ARM_NEON_EOR_H) #define SIMDE_ARM_NEON_EOR_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_veor_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return veor_s8(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_xor_si64(a, b); #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values ^ b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] ^ b_.values[i]; } #endif return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef veor_s8 #define veor_s8(a, b) simde_veor_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_veor_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return veor_s16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_xor_si64(a, b); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values ^ b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] ^ b_.values[i]; } #endif return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef veor_s16 #define veor_s16(a, b) simde_veor_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_veor_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return veor_s32(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_xor_si64(a, b); #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values ^ b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] ^ b_.values[i]; } #endif return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef veor_s32 #define veor_s32(a, b) simde_veor_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_veor_s64(simde_int64x1_t a, simde_int64x1_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return veor_s64(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_xor_si64(a, b); #else simde_int64x1_private r_, a_ = simde_int64x1_to_private(a), b_ = simde_int64x1_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values ^ b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] ^ b_.values[i]; } #endif return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef veor_s64 #define veor_s64(a, b) simde_veor_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_veor_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return veor_u8(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_xor_si64(a, b); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values ^ b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] ^ b_.values[i]; } #endif return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef veor_u8 #define veor_u8(a, b) simde_veor_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_veor_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return veor_u16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_xor_si64(a, b); #else simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a), b_ = simde_uint16x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values ^ b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] ^ b_.values[i]; } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef veor_u16 #define veor_u16(a, b) simde_veor_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_veor_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return veor_u32(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_xor_si64(a, b); #else simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a), b_ = simde_uint32x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values ^ b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] ^ b_.values[i]; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef veor_u32 #define veor_u32(a, b) simde_veor_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_veor_u64(simde_uint64x1_t a, simde_uint64x1_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return veor_u64(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_xor_si64(a, b); #else simde_uint64x1_private r_, a_ = simde_uint64x1_to_private(a), b_ = simde_uint64x1_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values ^ b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] ^ b_.values[i]; } #endif return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef veor_u64 #define veor_u64(a, b) simde_veor_u64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_veorq_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return veorq_s8(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_xor_si128(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_xor(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_xor(a, b); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values ^ b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] ^ b_.values[i]; } #endif return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef veorq_s8 #define veorq_s8(a, b) simde_veorq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_veorq_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return veorq_s16(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_xor_si128(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_xor(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_xor(a, b); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values ^ b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] ^ b_.values[i]; } #endif return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef veorq_s16 #define veorq_s16(a, b) simde_veorq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_veorq_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return veorq_s32(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_xor_si128(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_xor(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_xor(a, b); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values ^ b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] ^ b_.values[i]; } #endif return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef veorq_s32 #define veorq_s32(a, b) simde_veorq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_veorq_s64(simde_int64x2_t a, simde_int64x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return veorq_s64(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_xor_si128(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return vec_xor(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_xor(a, b); #else simde_int64x2_private r_, a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values ^ b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] ^ b_.values[i]; } #endif return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef veorq_s64 #define veorq_s64(a, b) simde_veorq_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_veorq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return veorq_u8(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_xor_si128(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_xor(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_xor(a, b); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values ^ b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] ^ b_.values[i]; } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef veorq_u8 #define veorq_u8(a, b) simde_veorq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_veorq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return veorq_u16(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_xor_si128(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_xor(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_xor(a, b); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values ^ b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] ^ b_.values[i]; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef veorq_u16 #define veorq_u16(a, b) simde_veorq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_veorq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return veorq_u32(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_xor_si128(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_xor(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_xor(a, b); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values ^ b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] ^ b_.values[i]; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef veorq_u32 #define veorq_u32(a, b) simde_veorq_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_veorq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return veorq_u64(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_xor_si128(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return vec_xor(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_xor(a, b); #else simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a), b_ = simde_uint64x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values ^ b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] ^ b_.values[i]; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef veorq_u64 #define veorq_u64(a, b) simde_veorq_u64((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_EOR_H) */ simde-0.7.2/simde/arm/neon/ext.h000066400000000000000000001160701400333146700164170ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_EXT_H) #define SIMDE_ARM_NEON_EXT_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vext_f32(simde_float32x2_t a, simde_float32x2_t b, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) simde_float32x2_t r; SIMDE_CONSTIFY_2_(vext_f32, r, (HEDLEY_UNREACHABLE(), a), n, a, b); return r; #else simde_float32x2_private a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(b), r_ = a_; const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { size_t src = i + n_; r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1]; } return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) #define simde_vext_f32(a, b, n) (__extension__ ({ \ simde_float32x2_t simde_vext_f32_r; \ if (!__builtin_constant_p(n)) { \ simde_vext_f32_r = simde_vext_f32(a, b, n); \ } else { \ const int simde_vext_f32_n = HEDLEY_STATIC_CAST(int8_t, n); \ simde_float32x2_private simde_vext_f32_r_; \ simde_vext_f32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, simde_float32x2_to_private(a).values, simde_float32x2_to_private(b).values, \ HEDLEY_STATIC_CAST(int8_t, simde_vext_f32_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vext_f32_n + 1)); \ simde_vext_f32_r = simde_float32x2_from_private(simde_vext_f32_r_); \ } \ simde_vext_f32_r; \ })) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vext_f32 #define vext_f32(a, b, n) simde_vext_f32((a), (b), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vext_f64(simde_float64x1_t a, simde_float64x1_t b, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 0) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) (void) n; return vext_f64(a, b, 0); #else simde_float64x1_private a_ = simde_float64x1_to_private(a), b_ = simde_float64x1_to_private(b), r_ = a_; const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { size_t src = i + n_; r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 0]; } return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) #define simde_vext_f64(a, b, n) (__extension__ ({ \ simde_float64x1_t simde_vext_f64_r; \ if (!__builtin_constant_p(n)) { \ simde_vext_f64_r = simde_vext_f64(a, b, n); \ } else { \ const int simde_vext_f64_n = HEDLEY_STATIC_CAST(int8_t, n); \ simde_float64x1_private simde_vext_f64_r_; \ simde_vext_f64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 8, simde_float64x1_to_private(a).values, simde_float64x1_to_private(b).values, \ HEDLEY_STATIC_CAST(int8_t, simde_vext_f64_n)); \ simde_vext_f64_r = simde_float64x1_from_private(simde_vext_f64_r_); \ } \ simde_vext_f64_r; \ })) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vext_f64 #define vext_f64(a, b, n) simde_vext_f64((a), (b), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vext_s8(simde_int8x8_t a, simde_int8x8_t b, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) simde_int8x8_t r; SIMDE_CONSTIFY_8_(vext_s8, r, (HEDLEY_UNREACHABLE(), a), n, a, b); return r; #else simde_int8x8_private a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b), r_ = a_; const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { size_t src = i + n_; r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7]; } return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) #define simde_vext_s8(a, b, n) (__extension__ ({ \ simde_int8x8_t simde_vext_s8_r; \ if (!__builtin_constant_p(n)) { \ simde_vext_s8_r = simde_vext_s8(a, b, n); \ } else { \ const int simde_vext_s8_n = HEDLEY_STATIC_CAST(int8_t, n); \ simde_int8x8_private simde_vext_s8_r_; \ simde_vext_s8_r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, simde_int8x8_to_private(a).values, simde_int8x8_to_private(b).values, \ HEDLEY_STATIC_CAST(int8_t, simde_vext_s8_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vext_s8_n + 1), \ HEDLEY_STATIC_CAST(int8_t, simde_vext_s8_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vext_s8_n + 3), \ HEDLEY_STATIC_CAST(int8_t, simde_vext_s8_n + 4), HEDLEY_STATIC_CAST(int8_t, simde_vext_s8_n + 5), \ HEDLEY_STATIC_CAST(int8_t, simde_vext_s8_n + 6), HEDLEY_STATIC_CAST(int8_t, simde_vext_s8_n + 7)); \ simde_vext_s8_r = simde_int8x8_from_private(simde_vext_s8_r_); \ } \ simde_vext_s8_r; \ })) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vext_s8 #define vext_s8(a, b, n) simde_vext_s8((a), (b), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vext_s16(simde_int16x4_t a, simde_int16x4_t b, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) simde_int16x4_t r; SIMDE_CONSTIFY_4_(vext_s16, r, (HEDLEY_UNREACHABLE(), a), n, a, b); return r; #else simde_int16x4_private a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b), r_ = a_; const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { size_t src = i + n_; r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3]; } return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) #define simde_vext_s16(a, b, n) (__extension__ ({ \ simde_int16x4_t simde_vext_s16_r; \ if (!__builtin_constant_p(n)) { \ simde_vext_s16_r = simde_vext_s16(a, b, n); \ } else { \ const int simde_vext_s16_n = HEDLEY_STATIC_CAST(int8_t, n); \ simde_int16x4_private simde_vext_s16_r_; \ simde_vext_s16_r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, simde_int16x4_to_private(a).values, simde_int16x4_to_private(b).values, \ HEDLEY_STATIC_CAST(int8_t, simde_vext_s16_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vext_s16_n + 1), \ HEDLEY_STATIC_CAST(int8_t, simde_vext_s16_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vext_s16_n + 3)); \ simde_vext_s16_r = simde_int16x4_from_private(simde_vext_s16_r_); \ } \ simde_vext_s16_r; \ })) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vext_s16 #define vext_s16(a, b, n) simde_vext_s16((a), (b), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vext_s32(simde_int32x2_t a, simde_int32x2_t b, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) simde_int32x2_t r; SIMDE_CONSTIFY_2_(vext_s32, r, (HEDLEY_UNREACHABLE(), a), n, a, b); return r; #else simde_int32x2_private a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b), r_ = a_; const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { size_t src = i + n_; r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1]; } return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) #define simde_vext_s32(a, b, n) (__extension__ ({ \ simde_int32x2_t simde_vext_s32_r; \ if (!__builtin_constant_p(n)) { \ simde_vext_s32_r = simde_vext_s32(a, b, n); \ } else { \ const int simde_vext_s32_n = HEDLEY_STATIC_CAST(int8_t, n); \ simde_int32x2_private simde_vext_s32_r_; \ simde_vext_s32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, simde_int32x2_to_private(a).values, simde_int32x2_to_private(b).values, \ HEDLEY_STATIC_CAST(int8_t, simde_vext_s32_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vext_s32_n + 1)); \ simde_vext_s32_r = simde_int32x2_from_private(simde_vext_s32_r_); \ } \ simde_vext_s32_r; \ })) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vext_s32 #define vext_s32(a, b, n) simde_vext_s32((a), (b), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vext_s64(simde_int64x1_t a, simde_int64x1_t b, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 0) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) (void) n; return vext_s64(a, b, 0); #else simde_int64x1_private a_ = simde_int64x1_to_private(a), b_ = simde_int64x1_to_private(b), r_ = a_; const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { size_t src = i + n_; r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 0]; } return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) #define simde_vext_s64(a, b, n) (__extension__ ({ \ simde_int64x1_t simde_vext_s64_r; \ if (!__builtin_constant_p(n)) { \ simde_vext_s64_r = simde_vext_s64(a, b, n); \ } else { \ const int simde_vext_s64_n = HEDLEY_STATIC_CAST(int8_t, n); \ simde_int64x1_private simde_vext_s64_r_; \ simde_vext_s64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 8, simde_int64x1_to_private(a).values, simde_int64x1_to_private(b).values, \ HEDLEY_STATIC_CAST(int8_t, simde_vext_s64_n + 0)); \ simde_vext_s64_r = simde_int64x1_from_private(simde_vext_s64_r_); \ } \ simde_vext_s64_r; \ })) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vext_s64 #define vext_s64(a, b, n) simde_vext_s64((a), (b), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vext_u8(simde_uint8x8_t a, simde_uint8x8_t b, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) simde_uint8x8_t r; SIMDE_CONSTIFY_8_(vext_u8, r, (HEDLEY_UNREACHABLE(), a), n, a, b); return r; #else simde_uint8x8_private a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b), r_ = a_; const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { size_t src = i + n_; r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7]; } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) #define simde_vext_u8(a, b, n) (__extension__ ({ \ simde_uint8x8_t simde_vext_u8_r; \ if (!__builtin_constant_p(n)) { \ simde_vext_u8_r = simde_vext_u8(a, b, n); \ } else { \ const int simde_vext_u8_n = HEDLEY_STATIC_CAST(int8_t, n); \ simde_uint8x8_private simde_vext_u8_r_; \ simde_vext_u8_r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, simde_uint8x8_to_private(a).values, simde_uint8x8_to_private(b).values, \ HEDLEY_STATIC_CAST(int8_t, simde_vext_u8_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vext_u8_n + 1), \ HEDLEY_STATIC_CAST(int8_t, simde_vext_u8_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vext_u8_n + 3), \ HEDLEY_STATIC_CAST(int8_t, simde_vext_u8_n + 4), HEDLEY_STATIC_CAST(int8_t, simde_vext_u8_n + 5), \ HEDLEY_STATIC_CAST(int8_t, simde_vext_u8_n + 6), HEDLEY_STATIC_CAST(int8_t, simde_vext_u8_n + 7)); \ simde_vext_u8_r = simde_uint8x8_from_private(simde_vext_u8_r_); \ } \ simde_vext_u8_r; \ })) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vext_u8 #define vext_u8(a, b, n) simde_vext_u8((a), (b), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vext_u16(simde_uint16x4_t a, simde_uint16x4_t b, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) simde_uint16x4_t r; SIMDE_CONSTIFY_4_(vext_u16, r, (HEDLEY_UNREACHABLE(), a), n, a, b); return r; #else simde_uint16x4_private a_ = simde_uint16x4_to_private(a), b_ = simde_uint16x4_to_private(b), r_ = a_; const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { size_t src = i + n_; r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3]; } return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) #define simde_vext_u16(a, b, n) (__extension__ ({ \ simde_uint16x4_t simde_vext_u16_r; \ if (!__builtin_constant_p(n)) { \ simde_vext_u16_r = simde_vext_u16(a, b, n); \ } else { \ const int simde_vext_u16_n = HEDLEY_STATIC_CAST(int8_t, n); \ simde_uint16x4_private simde_vext_u16_r_; \ simde_vext_u16_r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, simde_uint16x4_to_private(a).values, simde_uint16x4_to_private(b).values, \ HEDLEY_STATIC_CAST(int8_t, simde_vext_u16_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vext_u16_n + 1), \ HEDLEY_STATIC_CAST(int8_t, simde_vext_u16_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vext_u16_n + 3)); \ simde_vext_u16_r = simde_uint16x4_from_private(simde_vext_u16_r_); \ } \ simde_vext_u16_r; \ })) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vext_u16 #define vext_u16(a, b, n) simde_vext_u16((a), (b), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vext_u32(simde_uint32x2_t a, simde_uint32x2_t b, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) simde_uint32x2_t r; SIMDE_CONSTIFY_2_(vext_u32, r, (HEDLEY_UNREACHABLE(), a), n, a, b); return r; #else simde_uint32x2_private a_ = simde_uint32x2_to_private(a), b_ = simde_uint32x2_to_private(b), r_ = a_; const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { size_t src = i + n_; r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1]; } return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) #define simde_vext_u32(a, b, n) (__extension__ ({ \ simde_uint32x2_t simde_vext_u32_r; \ if (!__builtin_constant_p(n)) { \ simde_vext_u32_r = simde_vext_u32(a, b, n); \ } else { \ const int simde_vext_u32_n = HEDLEY_STATIC_CAST(int8_t, n); \ simde_uint32x2_private simde_vext_u32_r_; \ simde_vext_u32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, simde_uint32x2_to_private(a).values, simde_uint32x2_to_private(b).values, \ HEDLEY_STATIC_CAST(int8_t, simde_vext_u32_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vext_u32_n + 1)); \ simde_vext_u32_r = simde_uint32x2_from_private(simde_vext_u32_r_); \ } \ simde_vext_u32_r; \ })) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vext_u32 #define vext_u32(a, b, n) simde_vext_u32((a), (b), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vext_u64(simde_uint64x1_t a, simde_uint64x1_t b, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 0) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) (void) n; return vext_u64(a, b, 0); #else simde_uint64x1_private a_ = simde_uint64x1_to_private(a), b_ = simde_uint64x1_to_private(b), r_ = a_; const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { size_t src = i + n_; r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 0]; } return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) #define simde_vext_u64(a, b, n) (__extension__ ({ \ simde_uint64x1_t simde_vext_u64_r; \ if (!__builtin_constant_p(n)) { \ simde_vext_u64_r = simde_vext_u64(a, b, n); \ } else { \ const int simde_vext_u64_n = HEDLEY_STATIC_CAST(int8_t, n); \ simde_uint64x1_private simde_vext_u64_r_; \ simde_vext_u64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 8, simde_uint64x1_to_private(a).values, simde_uint64x1_to_private(b).values, \ HEDLEY_STATIC_CAST(int8_t, simde_vext_u64_n + 0)); \ simde_vext_u64_r = simde_uint64x1_from_private(simde_vext_u64_r_); \ } \ simde_vext_u64_r; \ })) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vext_u64 #define vext_u64(a, b, n) simde_vext_u64((a), (b), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vextq_f32(simde_float32x4_t a, simde_float32x4_t b, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) simde_float32x4_t r; SIMDE_CONSTIFY_4_(vextq_f32, r, (HEDLEY_UNREACHABLE(), a), n, a, b); return r; #else simde_float32x4_private a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(b), r_ = a_; const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { size_t src = i + n_; r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3]; } return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) #define simde_vextq_f32(a, b, n) (__extension__ ({ \ simde_float32x4_t simde_vextq_f32_r; \ if (!__builtin_constant_p(n)) { \ simde_vextq_f32_r = simde_vextq_f32(a, b, n); \ } else { \ const int simde_vextq_f32_n = HEDLEY_STATIC_CAST(int8_t, n); \ simde_float32x4_private simde_vextq_f32_r_; \ simde_vextq_f32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, simde_float32x4_to_private(a).values, simde_float32x4_to_private(b).values, \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_f32_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vextq_f32_n + 1), \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_f32_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vextq_f32_n + 3)); \ simde_vextq_f32_r = simde_float32x4_from_private(simde_vextq_f32_r_); \ } \ simde_vextq_f32_r; \ })) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vextq_f32 #define vextq_f32(a, b, n) simde_vextq_f32((a), (b), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vextq_f64(simde_float64x2_t a, simde_float64x2_t b, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) simde_float64x2_t r; SIMDE_CONSTIFY_2_(vextq_f64, r, (HEDLEY_UNREACHABLE(), a), n, a, b); return r; #else simde_float64x2_private a_ = simde_float64x2_to_private(a), b_ = simde_float64x2_to_private(b), r_ = a_; const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { size_t src = i + n_; r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1]; } return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) #define simde_vextq_f64(a, b, n) (__extension__ ({ \ simde_float64x2_t simde_vextq_f64_r; \ if (!__builtin_constant_p(n)) { \ simde_vextq_f64_r = simde_vextq_f64(a, b, n); \ } else { \ const int simde_vextq_f64_n = HEDLEY_STATIC_CAST(int8_t, n); \ simde_float64x2_private simde_vextq_f64_r_; \ simde_vextq_f64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, simde_float64x2_to_private(a).values, simde_float64x2_to_private(b).values, \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_f64_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vextq_f64_n + 1)); \ simde_vextq_f64_r = simde_float64x2_from_private(simde_vextq_f64_r_); \ } \ simde_vextq_f64_r; \ })) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vextq_f64 #define vextq_f64(a, b, n) simde_vextq_f64((a), (b), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vextq_s8(simde_int8x16_t a, simde_int8x16_t b, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) simde_int8x16_t r; SIMDE_CONSTIFY_16_(vextq_s8, r, (HEDLEY_UNREACHABLE(), a), n, a, b); return r; #else simde_int8x16_private a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b), r_ = a_; const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { size_t src = i + n_; r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 15]; } return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) #define simde_vextq_s8(a, b, n) (__extension__ ({ \ simde_int8x16_t simde_vextq_s8_r; \ if (!__builtin_constant_p(n)) { \ simde_vextq_s8_r = simde_vextq_s8(a, b, n); \ } else { \ const int simde_vextq_s8_n = HEDLEY_STATIC_CAST(int8_t, n); \ simde_int8x16_private simde_vextq_s8_r_; \ simde_vextq_s8_r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, simde_int8x16_to_private(a).values, simde_int8x16_to_private(b).values, \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 1), \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 3), \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 4), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 5), \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 6), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 7), \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 8), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 9), \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 10), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 11), \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 12), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 13), \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 14), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s8_n + 15)); \ simde_vextq_s8_r = simde_int8x16_from_private(simde_vextq_s8_r_); \ } \ simde_vextq_s8_r; \ })) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vextq_s8 #define vextq_s8(a, b, n) simde_vextq_s8((a), (b), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vextq_s16(simde_int16x8_t a, simde_int16x8_t b, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) simde_int16x8_t r; SIMDE_CONSTIFY_8_(vextq_s16, r, (HEDLEY_UNREACHABLE(), a), n, a, b); return r; #else simde_int16x8_private a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b), r_ = a_; const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { size_t src = i + n_; r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7]; } return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) #define simde_vextq_s16(a, b, n) (__extension__ ({ \ simde_int16x8_t simde_vextq_s16_r; \ if (!__builtin_constant_p(n)) { \ simde_vextq_s16_r = simde_vextq_s16(a, b, n); \ } else { \ const int simde_vextq_s16_n = HEDLEY_STATIC_CAST(int8_t, n); \ simde_int16x8_private simde_vextq_s16_r_; \ simde_vextq_s16_r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, simde_int16x8_to_private(a).values, simde_int16x8_to_private(b).values, \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_s16_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s16_n + 1), \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_s16_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s16_n + 3), \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_s16_n + 4), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s16_n + 5), \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_s16_n + 6), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s16_n + 7)); \ simde_vextq_s16_r = simde_int16x8_from_private(simde_vextq_s16_r_); \ } \ simde_vextq_s16_r; \ })) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vextq_s16 #define vextq_s16(a, b, n) simde_vextq_s16((a), (b), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vextq_s32(simde_int32x4_t a, simde_int32x4_t b, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) simde_int32x4_t r; SIMDE_CONSTIFY_4_(vextq_s32, r, (HEDLEY_UNREACHABLE(), a), n, a, b); return r; #else simde_int32x4_private a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b), r_ = a_; const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { size_t src = i + n_; r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3]; } return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) #define simde_vextq_s32(a, b, n) (__extension__ ({ \ simde_int32x4_t simde_vextq_s32_r; \ if (!__builtin_constant_p(n)) { \ simde_vextq_s32_r = simde_vextq_s32(a, b, n); \ } else { \ const int simde_vextq_s32_n = HEDLEY_STATIC_CAST(int8_t, n); \ simde_int32x4_private simde_vextq_s32_r_; \ simde_vextq_s32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, simde_int32x4_to_private(a).values, simde_int32x4_to_private(b).values, \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_s32_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s32_n + 1), \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_s32_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s32_n + 3)); \ simde_vextq_s32_r = simde_int32x4_from_private(simde_vextq_s32_r_); \ } \ simde_vextq_s32_r; \ })) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vextq_s32 #define vextq_s32(a, b, n) simde_vextq_s32((a), (b), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vextq_s64(simde_int64x2_t a, simde_int64x2_t b, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) simde_int64x2_t r; SIMDE_CONSTIFY_2_(vextq_s64, r, (HEDLEY_UNREACHABLE(), a), n, a, b); return r; #else simde_int64x2_private a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(b), r_ = a_; const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { size_t src = i + n_; r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1]; } return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) #define simde_vextq_s64(a, b, n) (__extension__ ({ \ simde_int64x2_t simde_vextq_s64_r; \ if (!__builtin_constant_p(n)) { \ simde_vextq_s64_r = simde_vextq_s64(a, b, n); \ } else { \ const int simde_vextq_s64_n = HEDLEY_STATIC_CAST(int8_t, n); \ simde_int64x2_private simde_vextq_s64_r_; \ simde_vextq_s64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, simde_int64x2_to_private(a).values, simde_int64x2_to_private(b).values, \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_s64_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vextq_s64_n + 1)); \ simde_vextq_s64_r = simde_int64x2_from_private(simde_vextq_s64_r_); \ } \ simde_vextq_s64_r; \ })) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vextq_s64 #define vextq_s64(a, b, n) simde_vextq_s64((a), (b), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vextq_u8(simde_uint8x16_t a, simde_uint8x16_t b, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) simde_uint8x16_t r; SIMDE_CONSTIFY_16_(vextq_u8, r, (HEDLEY_UNREACHABLE(), a), n, a, b); return r; #else simde_uint8x16_private a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b), r_ = a_; const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { size_t src = i + n_; r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 15]; } return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) #define simde_vextq_u8(a, b, n) (__extension__ ({ \ simde_uint8x16_t simde_vextq_u8_r; \ if (!__builtin_constant_p(n)) { \ simde_vextq_u8_r = simde_vextq_u8(a, b, n); \ } else { \ const int simde_vextq_u8_n = HEDLEY_STATIC_CAST(int8_t, n); \ simde_uint8x16_private simde_vextq_u8_r_; \ simde_vextq_u8_r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, simde_uint8x16_to_private(a).values, simde_uint8x16_to_private(b).values, \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 1), \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 3), \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 4), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 5), \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 6), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 7), \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 8), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 9), \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 10), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 11), \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 12), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 13), \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 14), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u8_n + 15)); \ simde_vextq_u8_r = simde_uint8x16_from_private(simde_vextq_u8_r_); \ } \ simde_vextq_u8_r; \ })) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vextq_u8 #define vextq_u8(a, b, n) simde_vextq_u8((a), (b), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vextq_u16(simde_uint16x8_t a, simde_uint16x8_t b, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) simde_uint16x8_t r; SIMDE_CONSTIFY_8_(vextq_u16, r, (HEDLEY_UNREACHABLE(), a), n, a, b); return r; #else simde_uint16x8_private a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b), r_ = a_; const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { size_t src = i + n_; r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7]; } return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) #define simde_vextq_u16(a, b, n) (__extension__ ({ \ simde_uint16x8_t simde_vextq_u16_r; \ if (!__builtin_constant_p(n)) { \ simde_vextq_u16_r = simde_vextq_u16(a, b, n); \ } else { \ const int simde_vextq_u16_n = HEDLEY_STATIC_CAST(int8_t, n); \ simde_uint16x8_private simde_vextq_u16_r_; \ simde_vextq_u16_r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, simde_uint16x8_to_private(a).values, simde_uint16x8_to_private(b).values, \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_u16_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u16_n + 1), \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_u16_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u16_n + 3), \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_u16_n + 4), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u16_n + 5), \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_u16_n + 6), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u16_n + 7)); \ simde_vextq_u16_r = simde_uint16x8_from_private(simde_vextq_u16_r_); \ } \ simde_vextq_u16_r; \ })) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vextq_u16 #define vextq_u16(a, b, n) simde_vextq_u16((a), (b), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vextq_u32(simde_uint32x4_t a, simde_uint32x4_t b, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) simde_uint32x4_t r; SIMDE_CONSTIFY_4_(vextq_u32, r, (HEDLEY_UNREACHABLE(), a), n, a, b); return r; #else simde_uint32x4_private a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b), r_ = a_; const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { size_t src = i + n_; r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3]; } return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) #define simde_vextq_u32(a, b, n) (__extension__ ({ \ simde_uint32x4_t simde_vextq_u32_r; \ if (!__builtin_constant_p(n)) { \ simde_vextq_u32_r = simde_vextq_u32(a, b, n); \ } else { \ const int simde_vextq_u32_n = HEDLEY_STATIC_CAST(int8_t, n); \ simde_uint32x4_private simde_vextq_u32_r_; \ simde_vextq_u32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, simde_uint32x4_to_private(a).values, simde_uint32x4_to_private(b).values, \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_u32_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u32_n + 1), \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_u32_n + 2), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u32_n + 3)); \ simde_vextq_u32_r = simde_uint32x4_from_private(simde_vextq_u32_r_); \ } \ simde_vextq_u32_r; \ })) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vextq_u32 #define vextq_u32(a, b, n) simde_vextq_u32((a), (b), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vextq_u64(simde_uint64x2_t a, simde_uint64x2_t b, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) simde_uint64x2_t r; SIMDE_CONSTIFY_2_(vextq_u64, r, (HEDLEY_UNREACHABLE(), a), n, a, b); return r; #else simde_uint64x2_private a_ = simde_uint64x2_to_private(a), b_ = simde_uint64x2_to_private(b), r_ = a_; const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { size_t src = i + n_; r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1]; } return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) #define simde_vextq_u64(a, b, n) (__extension__ ({ \ simde_uint64x2_t simde_vextq_u64_r; \ if (!__builtin_constant_p(n)) { \ simde_vextq_u64_r = simde_vextq_u64(a, b, n); \ } else { \ const int simde_vextq_u64_n = HEDLEY_STATIC_CAST(int8_t, n); \ simde_uint64x2_private simde_vextq_u64_r_; \ simde_vextq_u64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, simde_uint64x2_to_private(a).values, simde_uint64x2_to_private(b).values, \ HEDLEY_STATIC_CAST(int8_t, simde_vextq_u64_n + 0), HEDLEY_STATIC_CAST(int8_t, simde_vextq_u64_n + 1)); \ simde_vextq_u64_r = simde_uint64x2_from_private(simde_vextq_u64_r_); \ } \ simde_vextq_u64_r; \ })) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vextq_u64 #define vextq_u64(a, b, n) simde_vextq_u64((a), (b), (n)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_EXT_H) */ simde-0.7.2/simde/arm/neon/get_high.h000066400000000000000000000173351400333146700174010ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_GET_HIGH_H) #define SIMDE_ARM_NEON_GET_HIGH_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vget_high_f32(simde_float32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vget_high_f32(a); #else simde_float32x2_private r_; simde_float32x4_private a_ = simde_float32x4_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; } return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vget_high_f32 #define vget_high_f32(a) simde_vget_high_f32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vget_high_f64(simde_float64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vget_high_f64(a); #else simde_float64x1_private r_; simde_float64x2_private a_ = simde_float64x2_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; } return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vget_high_f64 #define vget_high_f64(a) simde_vget_high_f64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vget_high_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vget_high_s8(a); #else simde_int8x8_private r_; simde_int8x16_private a_ = simde_int8x16_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; } return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vget_high_s8 #define vget_high_s8(a) simde_vget_high_s8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vget_high_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vget_high_s16(a); #else simde_int16x4_private r_; simde_int16x8_private a_ = simde_int16x8_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; } return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vget_high_s16 #define vget_high_s16(a) simde_vget_high_s16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vget_high_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vget_high_s32(a); #else simde_int32x2_private r_; simde_int32x4_private a_ = simde_int32x4_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; } return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vget_high_s32 #define vget_high_s32(a) simde_vget_high_s32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vget_high_s64(simde_int64x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vget_high_s64(a); #else simde_int64x1_private r_; simde_int64x2_private a_ = simde_int64x2_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; } return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vget_high_s64 #define vget_high_s64(a) simde_vget_high_s64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vget_high_u8(simde_uint8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vget_high_u8(a); #else simde_uint8x8_private r_; simde_uint8x16_private a_ = simde_uint8x16_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vget_high_u8 #define vget_high_u8(a) simde_vget_high_u8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vget_high_u16(simde_uint16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vget_high_u16(a); #else simde_uint16x4_private r_; simde_uint16x8_private a_ = simde_uint16x8_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; } return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vget_high_u16 #define vget_high_u16(a) simde_vget_high_u16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vget_high_u32(simde_uint32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vget_high_u32(a); #else simde_uint32x2_private r_; simde_uint32x4_private a_ = simde_uint32x4_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; } return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vget_high_u32 #define vget_high_u32(a) simde_vget_high_u32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vget_high_u64(simde_uint64x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vget_high_u64(a); #else simde_uint64x1_private r_; simde_uint64x2_private a_ = simde_uint64x2_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; } return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vget_high_u64 #define vget_high_u64(a) simde_vget_high_u64((a)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_GET_HIGH_H) */ simde-0.7.2/simde/arm/neon/get_lane.h000066400000000000000000000341731400333146700174000ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_GET_LANE_H) #define SIMDE_ARM_NEON_GET_LANE_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32_t simde_vget_lane_f32(simde_float32x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float32_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_2_(vget_lane_f32, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT32_C(0.0)), lane, v); #else simde_float32x2_private v_ = simde_float32x2_to_private(v); r = v_.values[lane]; #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vget_lane_f32 #define vget_lane_f32(v, lane) simde_vget_lane_f32((v), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64_t simde_vget_lane_f64(simde_float64x1_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { simde_float64_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) (void) lane; return vget_lane_f64(v, 0); #else simde_float64x1_private v_ = simde_float64x1_to_private(v); r = v_.values[lane]; #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vget_lane_f64 #define vget_lane_f64(v, lane) simde_vget_lane_f64((v), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES int8_t simde_vget_lane_s8(simde_int8x8_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { int8_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_8_(vget_lane_s8, r, (HEDLEY_UNREACHABLE(), INT8_C(0)), lane, v); #else simde_int8x8_private v_ = simde_int8x8_to_private(v); r = v_.values[lane]; #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vget_lane_s8 #define vget_lane_s8(v, lane) simde_vget_lane_s8((v), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES int16_t simde_vget_lane_s16(simde_int16x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { int16_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_4_(vget_lane_s16, r, (HEDLEY_UNREACHABLE(), INT16_C(0)), lane, v); #else simde_int16x4_private v_ = simde_int16x4_to_private(v); r = v_.values[lane]; #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vget_lane_s16 #define vget_lane_s16(v, lane) simde_vget_lane_s16((v), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_vget_lane_s32(simde_int32x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { int32_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_2_(vget_lane_s32, r, (HEDLEY_UNREACHABLE(), INT32_C(0)), lane, v); #else simde_int32x2_private v_ = simde_int32x2_to_private(v); r = v_.values[lane]; #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vget_lane_s32 #define vget_lane_s32(v, lane) simde_vget_lane_s32((v), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES int64_t simde_vget_lane_s64(simde_int64x1_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { int64_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) (void) lane; return vget_lane_s64(v, 0); #else simde_int64x1_private v_ = simde_int64x1_to_private(v); r = v_.values[lane]; #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vget_lane_s64 #define vget_lane_s64(v, lane) simde_vget_lane_s64((v), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES uint8_t simde_vget_lane_u8(simde_uint8x8_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { uint8_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_8_(vget_lane_u8, r, (HEDLEY_UNREACHABLE(), UINT8_C(0)), lane, v); #else simde_uint8x8_private v_ = simde_uint8x8_to_private(v); r = v_.values[lane]; #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vget_lane_u8 #define vget_lane_u8(v, lane) simde_vget_lane_u8((v), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES uint16_t simde_vget_lane_u16(simde_uint16x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { uint16_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_4_(vget_lane_u16, r, (HEDLEY_UNREACHABLE(), UINT16_C(0)), lane, v); #else simde_uint16x4_private v_ = simde_uint16x4_to_private(v); r = v_.values[lane]; #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vget_lane_u16 #define vget_lane_u16(v, lane) simde_vget_lane_u16((v), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES uint32_t simde_vget_lane_u32(simde_uint32x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { uint32_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_2_(vget_lane_u32, r, (HEDLEY_UNREACHABLE(), UINT32_C(0)), lane, v); #else simde_uint32x2_private v_ = simde_uint32x2_to_private(v); r = v_.values[lane]; #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vget_lane_u32 #define vget_lane_u32(v, lane) simde_vget_lane_u32((v), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES uint64_t simde_vget_lane_u64(simde_uint64x1_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { uint64_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) (void) lane; return vget_lane_u64(v, 0); #else simde_uint64x1_private v_ = simde_uint64x1_to_private(v); r = v_.values[lane]; #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vget_lane_u64 #define vget_lane_u64(v, lane) simde_vget_lane_u64((v), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32_t simde_vgetq_lane_f32(simde_float32x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { simde_float32_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_4_(vgetq_lane_f32, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT32_C(0.0)), lane, v); #elif defined(SIMDE_WASM_SIMD128_NATIVE) SIMDE_CONSTIFY_4_(wasm_f32x4_extract_lane, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT32_C(0.0)), lane, v); #else simde_float32x4_private v_ = simde_float32x4_to_private(v); r = v_.values[lane]; #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vgetq_lane_f32 #define vgetq_lane_f32(v, lane) simde_vgetq_lane_f32((v), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64_t simde_vgetq_lane_f64(simde_float64x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float64_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) SIMDE_CONSTIFY_2_(vgetq_lane_f64, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT64_C(0.0)), lane, v); #elif defined(SIMDE_WASM_SIMD128_NATIVE) SIMDE_CONSTIFY_2_(wasm_f64x2_extract_lane, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT64_C(0.0)), lane, v); #else simde_float64x2_private v_ = simde_float64x2_to_private(v); r = v_.values[lane]; #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vgetq_lane_f64 #define vgetq_lane_f64(v, lane) simde_vgetq_lane_f64((v), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES int8_t simde_vgetq_lane_s8(simde_int8x16_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { int8_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_16_(vgetq_lane_s8, r, (HEDLEY_UNREACHABLE(), INT8_C(0)), lane, v); #elif defined(SIMDE_WASM_SIMD128_NATIVE) int r_; SIMDE_CONSTIFY_16_(wasm_i8x16_extract_lane, r_, (HEDLEY_UNREACHABLE(), INT8_C(0)), lane, v); r = HEDLEY_STATIC_CAST(int8_t, r_); #else simde_int8x16_private v_ = simde_int8x16_to_private(v); r = v_.values[lane]; #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vgetq_lane_s8 #define vgetq_lane_s8(v, lane) simde_vgetq_lane_s8((v), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES int16_t simde_vgetq_lane_s16(simde_int16x8_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { int16_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_8_(vgetq_lane_s16, r, (HEDLEY_UNREACHABLE(), INT16_C(0)), lane, v); #elif defined(SIMDE_WASM_SIMD128_NATIVE) int r_; SIMDE_CONSTIFY_8_(wasm_i16x8_extract_lane, r_, (HEDLEY_UNREACHABLE(), INT16_C(0)), lane, v); r = HEDLEY_STATIC_CAST(int16_t, r_); #else simde_int16x8_private v_ = simde_int16x8_to_private(v); r = v_.values[lane]; #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vgetq_lane_s16 #define vgetq_lane_s16(v, lane) simde_vgetq_lane_s16((v), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_vgetq_lane_s32(simde_int32x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { int32_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_4_(vgetq_lane_s32, r, (HEDLEY_UNREACHABLE(), INT32_C(0)), lane, v); #elif defined(SIMDE_WASM_SIMD128_NATIVE) int r_; SIMDE_CONSTIFY_4_(wasm_i32x4_extract_lane, r_, (HEDLEY_UNREACHABLE(), INT32_C(0)), lane, v); r = HEDLEY_STATIC_CAST(int32_t, r_); #else simde_int32x4_private v_ = simde_int32x4_to_private(v); r = v_.values[lane]; #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vgetq_lane_s32 #define vgetq_lane_s32(v, lane) simde_vgetq_lane_s32((v), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES int64_t simde_vgetq_lane_s64(simde_int64x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { int64_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_2_(vgetq_lane_s64, r, (HEDLEY_UNREACHABLE(), INT64_C(0)), lane, v); #elif defined(SIMDE_WASM_SIMD128_NATIVE) long long r_; SIMDE_CONSTIFY_2_(wasm_i64x2_extract_lane, r_, (HEDLEY_UNREACHABLE(), INT64_C(0)), lane, v); r = HEDLEY_STATIC_CAST(int64_t, r_); #else simde_int64x2_private v_ = simde_int64x2_to_private(v); r = v_.values[lane]; #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vgetq_lane_s64 #define vgetq_lane_s64(v, lane) simde_vgetq_lane_s64((v), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES uint8_t simde_vgetq_lane_u8(simde_uint8x16_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { uint8_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_16_(vgetq_lane_u8, r, (HEDLEY_UNREACHABLE(), UINT8_C(0)), lane, v); #elif defined(SIMDE_WASM_SIMD128_NATIVE) int r_; SIMDE_CONSTIFY_16_(wasm_i8x16_extract_lane, r_, (HEDLEY_UNREACHABLE(), UINT8_C(0)), lane, v); r = HEDLEY_STATIC_CAST(uint8_t, r_); #else simde_uint8x16_private v_ = simde_uint8x16_to_private(v); r = v_.values[lane]; #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vgetq_lane_u8 #define vgetq_lane_u8(v, lane) simde_vgetq_lane_u8((v), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES uint16_t simde_vgetq_lane_u16(simde_uint16x8_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { uint16_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_8_(vgetq_lane_u16, r, (HEDLEY_UNREACHABLE(), UINT16_C(0)), lane, v); #elif defined(SIMDE_WASM_SIMD128_NATIVE) int r_; SIMDE_CONSTIFY_8_(wasm_i16x8_extract_lane, r_, (HEDLEY_UNREACHABLE(), UINT16_C(0)), lane, v); r = HEDLEY_STATIC_CAST(uint16_t, r_); #else simde_uint16x8_private v_ = simde_uint16x8_to_private(v); r = v_.values[lane]; #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vgetq_lane_u16 #define vgetq_lane_u16(v, lane) simde_vgetq_lane_u16((v), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES uint32_t simde_vgetq_lane_u32(simde_uint32x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { uint32_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_4_(vgetq_lane_u32, r, (HEDLEY_UNREACHABLE(), UINT32_C(0)), lane, v); #elif defined(SIMDE_WASM_SIMD128_NATIVE) int r_; SIMDE_CONSTIFY_4_(wasm_i32x4_extract_lane, r_, (HEDLEY_UNREACHABLE(), UINT32_C(0)), lane, v); r = HEDLEY_STATIC_CAST(uint32_t, r_); #else simde_uint32x4_private v_ = simde_uint32x4_to_private(v); r = v_.values[lane]; #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vgetq_lane_u32 #define vgetq_lane_u32(v, lane) simde_vgetq_lane_u32((v), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES uint64_t simde_vgetq_lane_u64(simde_uint64x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { uint64_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_2_(vgetq_lane_u64, r, (HEDLEY_UNREACHABLE(), UINT64_C(0)), lane, v); #elif defined(SIMDE_WASM_SIMD128_NATIVE) long long r_; SIMDE_CONSTIFY_2_(wasm_i64x2_extract_lane, r_, (HEDLEY_UNREACHABLE(), UINT64_C(0)), lane, v); r = HEDLEY_STATIC_CAST(uint64_t, r_); #else simde_uint64x2_private v_ = simde_uint64x2_to_private(v); r = v_.values[lane]; #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vgetq_lane_u64 #define vgetq_lane_u64(v, lane) simde_vgetq_lane_u64((v), (lane)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_GET_LANE_H) */ simde-0.7.2/simde/arm/neon/get_low.h000066400000000000000000000200561400333146700172550ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_GET_LOW_H) #define SIMDE_ARM_NEON_GET_LOW_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vget_low_f32(simde_float32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vget_low_f32(a); #else simde_float32x2_private r_; simde_float32x4_private a_ = simde_float32x4_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i]; } return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vget_low_f32 #define vget_low_f32(a) simde_vget_low_f32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vget_low_f64(simde_float64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vget_low_f64(a); #else simde_float64x1_private r_; simde_float64x2_private a_ = simde_float64x2_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i]; } return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vget_low_f64 #define vget_low_f64(a) simde_vget_low_f64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vget_low_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vget_low_s8(a); #elif defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_movepi64_pi64(a); #else simde_int8x8_private r_; simde_int8x16_private a_ = simde_int8x16_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i]; } return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vget_low_s8 #define vget_low_s8(a) simde_vget_low_s8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vget_low_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vget_low_s16(a); #elif defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_movepi64_pi64(a); #else simde_int16x4_private r_; simde_int16x8_private a_ = simde_int16x8_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i]; } return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vget_low_s16 #define vget_low_s16(a) simde_vget_low_s16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vget_low_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vget_low_s32(a); #elif defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_movepi64_pi64(a); #else simde_int32x2_private r_; simde_int32x4_private a_ = simde_int32x4_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i]; } return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vget_low_s32 #define vget_low_s32(a) simde_vget_low_s32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vget_low_s64(simde_int64x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vget_low_s64(a); #elif defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_movepi64_pi64(a); #else simde_int64x1_private r_; simde_int64x2_private a_ = simde_int64x2_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i]; } return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vget_low_s64 #define vget_low_s64(a) simde_vget_low_s64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vget_low_u8(simde_uint8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vget_low_u8(a); #elif defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_movepi64_pi64(a); #else simde_uint8x8_private r_; simde_uint8x16_private a_ = simde_uint8x16_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i]; } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vget_low_u8 #define vget_low_u8(a) simde_vget_low_u8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vget_low_u16(simde_uint16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vget_low_u16(a); #elif defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_movepi64_pi64(a); #else simde_uint16x4_private r_; simde_uint16x8_private a_ = simde_uint16x8_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i]; } return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vget_low_u16 #define vget_low_u16(a) simde_vget_low_u16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vget_low_u32(simde_uint32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vget_low_u32(a); #elif defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_movepi64_pi64(a); #else simde_uint32x2_private r_; simde_uint32x4_private a_ = simde_uint32x4_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i]; } return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vget_low_u32 #define vget_low_u32(a) simde_vget_low_u32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vget_low_u64(simde_uint64x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vget_low_u64(a); #elif defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_movepi64_pi64(a); #else simde_uint64x1_private r_; simde_uint64x2_private a_ = simde_uint64x2_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i]; } return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vget_low_u64 #define vget_low_u64(a) simde_vget_low_u64((a)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_GET_LOW_H) */ simde-0.7.2/simde/arm/neon/hadd.h000066400000000000000000000227561400333146700165260ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ /* TODO: the 128-bit versions only require AVX-512 because of the final * conversions from larger types down to smaller ones. We could get * the same results from AVX/AVX2 instructions with some shuffling * to extract the low half of each input element to the low half * of a 256-bit vector, then cast that to a 128-bit vector. */ #if !defined(SIMDE_ARM_NEON_HADD_H) #define SIMDE_ARM_NEON_HADD_H #include "addl.h" #include "shr_n.h" #include "movn.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vhadd_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vhadd_s8(a, b); #else return simde_vmovn_s16(simde_vshrq_n_s16(simde_vaddl_s8(a, b), 1)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vhadd_s8 #define vhadd_s8(a, b) simde_vhadd_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vhadd_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vhadd_s16(a, b); #else return simde_vmovn_s32(simde_vshrq_n_s32(simde_vaddl_s16(a, b), 1)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vhadd_s16 #define vhadd_s16(a, b) simde_vhadd_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vhadd_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vhadd_s32(a, b); #else return simde_vmovn_s64(simde_vshrq_n_s64(simde_vaddl_s32(a, b), 1)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vhadd_s32 #define vhadd_s32(a, b) simde_vhadd_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vhadd_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vhadd_u8(a, b); #else return simde_vmovn_u16(simde_vshrq_n_u16(simde_vaddl_u8(a, b), 1)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vhadd_u8 #define vhadd_u8(a, b) simde_vhadd_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vhadd_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vhadd_u16(a, b); #else return simde_vmovn_u32(simde_vshrq_n_u32(simde_vaddl_u16(a, b), 1)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vhadd_u16 #define vhadd_u16(a, b) simde_vhadd_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vhadd_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vhadd_u32(a, b); #else return simde_vmovn_u64(simde_vshrq_n_u64(simde_vaddl_u32(a, b), 1)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vhadd_u32 #define vhadd_u32(a, b) simde_vhadd_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vhaddq_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vhaddq_s8(a, b); #elif defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm256_cvtepi16_epi8(_mm256_srai_epi16(_mm256_add_epi16(_mm256_cvtepi8_epi16(a), _mm256_cvtepi8_epi16(b)), 1)); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int8_t, (HEDLEY_STATIC_CAST(int16_t, a_.values[i]) + HEDLEY_STATIC_CAST(int16_t, b_.values[i])) >> 1); } return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vhaddq_s8 #define vhaddq_s8(a, b) simde_vhaddq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vhaddq_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vhaddq_s16(a, b); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_cvtepi32_epi16(_mm256_srai_epi32(_mm256_add_epi32(_mm256_cvtepi16_epi32(a), _mm256_cvtepi16_epi32(b)), 1)); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int16_t, (HEDLEY_STATIC_CAST(int32_t, a_.values[i]) + HEDLEY_STATIC_CAST(int32_t, b_.values[i])) >> 1); } return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vhaddq_s16 #define vhaddq_s16(a, b) simde_vhaddq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vhaddq_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vhaddq_s32(a, b); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_cvtepi64_epi32(_mm256_srai_epi64(_mm256_add_epi64(_mm256_cvtepi32_epi64(a), _mm256_cvtepi32_epi64(b)), 1)); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int32_t, (HEDLEY_STATIC_CAST(int64_t, a_.values[i]) + HEDLEY_STATIC_CAST(int64_t, b_.values[i])) >> 1); } return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vhaddq_s32 #define vhaddq_s32(a, b) simde_vhaddq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vhaddq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vhaddq_u8(a, b); #elif defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm256_cvtepi16_epi8(_mm256_srli_epi16(_mm256_add_epi16(_mm256_cvtepu8_epi16(a), _mm256_cvtepu8_epi16(b)), 1)); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, (HEDLEY_STATIC_CAST(uint16_t, a_.values[i]) + HEDLEY_STATIC_CAST(uint16_t, b_.values[i])) >> 1); } return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vhaddq_u8 #define vhaddq_u8(a, b) simde_vhaddq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vhaddq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vhaddq_u16(a, b); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_cvtepi32_epi16(_mm256_srli_epi32(_mm256_add_epi32(_mm256_cvtepu16_epi32(a), _mm256_cvtepu16_epi32(b)), 1)); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, a_.values[i]) + HEDLEY_STATIC_CAST(uint32_t, b_.values[i])) >> 1); } return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vhaddq_u16 #define vhaddq_u16(a, b) simde_vhaddq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vhaddq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vhaddq_u32(a, b); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_cvtepi64_epi32(_mm256_srli_epi64(_mm256_add_epi64(_mm256_cvtepu32_epi64(a), _mm256_cvtepu32_epi64(b)), 1)); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, a_.values[i]) + HEDLEY_STATIC_CAST(uint64_t, b_.values[i])) >> 1); } return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vhaddq_u32 #define vhaddq_u32(a, b) simde_vhaddq_u32((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_HADD_H) */ simde-0.7.2/simde/arm/neon/hsub.h000066400000000000000000000227561400333146700165670ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ /* TODO: the 128-bit versions only require AVX-512 because of the final * conversions from larger types down to smaller ones. We could get * the same results from AVX/AVX2 instructions with some shuffling * to extract the low half of each input element to the low half * of a 256-bit vector, then cast that to a 128-bit vector. */ #if !defined(SIMDE_ARM_NEON_HSUB_H) #define SIMDE_ARM_NEON_HSUB_H #include "subl.h" #include "shr_n.h" #include "movn.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vhsub_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vhsub_s8(a, b); #else return simde_vmovn_s16(simde_vshrq_n_s16(simde_vsubl_s8(a, b), 1)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vhsub_s8 #define vhsub_s8(a, b) simde_vhsub_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vhsub_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vhsub_s16(a, b); #else return simde_vmovn_s32(simde_vshrq_n_s32(simde_vsubl_s16(a, b), 1)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vhsub_s16 #define vhsub_s16(a, b) simde_vhsub_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vhsub_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vhsub_s32(a, b); #else return simde_vmovn_s64(simde_vshrq_n_s64(simde_vsubl_s32(a, b), 1)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vhsub_s32 #define vhsub_s32(a, b) simde_vhsub_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vhsub_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vhsub_u8(a, b); #else return simde_vmovn_u16(simde_vshrq_n_u16(simde_vsubl_u8(a, b), 1)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vhsub_u8 #define vhsub_u8(a, b) simde_vhsub_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vhsub_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vhsub_u16(a, b); #else return simde_vmovn_u32(simde_vshrq_n_u32(simde_vsubl_u16(a, b), 1)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vhsub_u16 #define vhsub_u16(a, b) simde_vhsub_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vhsub_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vhsub_u32(a, b); #else return simde_vmovn_u64(simde_vshrq_n_u64(simde_vsubl_u32(a, b), 1)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vhsub_u32 #define vhsub_u32(a, b) simde_vhsub_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vhsubq_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vhsubq_s8(a, b); #elif defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm256_cvtepi16_epi8(_mm256_srai_epi16(_mm256_sub_epi16(_mm256_cvtepi8_epi16(a), _mm256_cvtepi8_epi16(b)), 1)); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int8_t, (HEDLEY_STATIC_CAST(int16_t, a_.values[i]) - HEDLEY_STATIC_CAST(int16_t, b_.values[i])) >> 1); } return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vhsubq_s8 #define vhsubq_s8(a, b) simde_vhsubq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vhsubq_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vhsubq_s16(a, b); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_cvtepi32_epi16(_mm256_srai_epi32(_mm256_sub_epi32(_mm256_cvtepi16_epi32(a), _mm256_cvtepi16_epi32(b)), 1)); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int16_t, (HEDLEY_STATIC_CAST(int32_t, a_.values[i]) - HEDLEY_STATIC_CAST(int32_t, b_.values[i])) >> 1); } return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vhsubq_s16 #define vhsubq_s16(a, b) simde_vhsubq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vhsubq_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vhsubq_s32(a, b); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_cvtepi64_epi32(_mm256_srai_epi64(_mm256_sub_epi64(_mm256_cvtepi32_epi64(a), _mm256_cvtepi32_epi64(b)), 1)); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int32_t, (HEDLEY_STATIC_CAST(int64_t, a_.values[i]) - HEDLEY_STATIC_CAST(int64_t, b_.values[i])) >> 1); } return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vhsubq_s32 #define vhsubq_s32(a, b) simde_vhsubq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vhsubq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vhsubq_u8(a, b); #elif defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm256_cvtepi16_epi8(_mm256_srli_epi16(_mm256_sub_epi16(_mm256_cvtepu8_epi16(a), _mm256_cvtepu8_epi16(b)), 1)); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, (HEDLEY_STATIC_CAST(uint16_t, a_.values[i]) - HEDLEY_STATIC_CAST(uint16_t, b_.values[i])) >> 1); } return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vhsubq_u8 #define vhsubq_u8(a, b) simde_vhsubq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vhsubq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vhsubq_u16(a, b); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_cvtepi32_epi16(_mm256_srli_epi32(_mm256_sub_epi32(_mm256_cvtepu16_epi32(a), _mm256_cvtepu16_epi32(b)), 1)); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, a_.values[i]) - HEDLEY_STATIC_CAST(uint32_t, b_.values[i])) >> 1); } return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vhsubq_u16 #define vhsubq_u16(a, b) simde_vhsubq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vhsubq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vhsubq_u32(a, b); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_cvtepi64_epi32(_mm256_srli_epi64(_mm256_sub_epi64(_mm256_cvtepu32_epi64(a), _mm256_cvtepu32_epi64(b)), 1)); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, a_.values[i]) - HEDLEY_STATIC_CAST(uint64_t, b_.values[i])) >> 1); } return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vhsubq_u32 #define vhsubq_u32(a, b) simde_vhsubq_u32((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_HSUB_H) */ simde-0.7.2/simde/arm/neon/ld1.h000066400000000000000000000270151400333146700162770ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_LD1_H) #define SIMDE_ARM_NEON_LD1_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vld1_f32(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(2)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld1_f32(ptr); #else simde_float32x2_private r_; simde_memcpy(&r_, ptr, sizeof(r_)); return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld1_f32 #define vld1_f32(a) simde_vld1_f32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vld1_f64(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(1)]) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vld1_f64(ptr); #else simde_float64x1_private r_; simde_memcpy(&r_, ptr, sizeof(r_)); return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vld1_f64 #define vld1_f64(a) simde_vld1_f64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vld1_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld1_s8(ptr); #else simde_int8x8_private r_; simde_memcpy(&r_, ptr, sizeof(r_)); return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld1_s8 #define vld1_s8(a) simde_vld1_s8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vld1_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld1_s16(ptr); #else simde_int16x4_private r_; simde_memcpy(&r_, ptr, sizeof(r_)); return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld1_s16 #define vld1_s16(a) simde_vld1_s16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vld1_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld1_s32(ptr); #else simde_int32x2_private r_; simde_memcpy(&r_, ptr, sizeof(r_)); return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld1_s32 #define vld1_s32(a) simde_vld1_s32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vld1_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(1)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld1_s64(ptr); #else simde_int64x1_private r_; simde_memcpy(&r_, ptr, sizeof(r_)); return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld1_s64 #define vld1_s64(a) simde_vld1_s64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vld1_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld1_u8(ptr); #else simde_uint8x8_private r_; simde_memcpy(&r_, ptr, sizeof(r_)); return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld1_u8 #define vld1_u8(a) simde_vld1_u8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vld1_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld1_u16(ptr); #else simde_uint16x4_private r_; simde_memcpy(&r_, ptr, sizeof(r_)); return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld1_u16 #define vld1_u16(a) simde_vld1_u16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vld1_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld1_u32(ptr); #else simde_uint32x2_private r_; simde_memcpy(&r_, ptr, sizeof(r_)); return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld1_u32 #define vld1_u32(a) simde_vld1_u32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vld1_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(1)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld1_u64(ptr); #else simde_uint64x1_private r_; simde_memcpy(&r_, ptr, sizeof(r_)); return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld1_u64 #define vld1_u64(a) simde_vld1_u64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vld1q_f32(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(4)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld1q_f32(ptr); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadu_ps(ptr); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_load(ptr); #else simde_float32x4_private r_; simde_memcpy(&r_, ptr, sizeof(r_)); return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld1q_f32 #define vld1q_f32(a) simde_vld1q_f32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vld1q_f64(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(2)]) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vld1q_f64(ptr); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadu_pd(ptr); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_load(ptr); #else simde_float64x2_private r_; simde_memcpy(&r_, ptr, sizeof(r_)); return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vld1q_f64 #define vld1q_f64(a) simde_vld1q_f64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vld1q_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld1q_s8(ptr); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadu_si128(SIMDE_ALIGN_CAST(const __m128i*, ptr)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_load(ptr); #else simde_int8x16_private r_; simde_memcpy(&r_, ptr, sizeof(r_)); return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld1q_s8 #define vld1q_s8(a) simde_vld1q_s8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vld1q_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld1q_s16(ptr); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadu_si128(SIMDE_ALIGN_CAST(const __m128i*, ptr)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_load(ptr); #else simde_int16x8_private r_; simde_memcpy(&r_, ptr, sizeof(r_)); return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld1q_s16 #define vld1q_s16(a) simde_vld1q_s16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vld1q_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld1q_s32(ptr); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadu_si128(SIMDE_ALIGN_CAST(const __m128i*, ptr)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_load(ptr); #else simde_int32x4_private r_; simde_memcpy(&r_, ptr, sizeof(r_)); return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld1q_s32 #define vld1q_s32(a) simde_vld1q_s32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vld1q_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld1q_s64(ptr); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadu_si128(SIMDE_ALIGN_CAST(const __m128i*, ptr)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_load(ptr); #else simde_int64x2_private r_; simde_memcpy(&r_, ptr, sizeof(r_)); return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld1q_s64 #define vld1q_s64(a) simde_vld1q_s64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vld1q_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld1q_u8(ptr); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadu_si128(SIMDE_ALIGN_CAST(const __m128i*, ptr)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_load(ptr); #else simde_uint8x16_private r_; simde_memcpy(&r_, ptr, sizeof(r_)); return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld1q_u8 #define vld1q_u8(a) simde_vld1q_u8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vld1q_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld1q_u16(ptr); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadu_si128(SIMDE_ALIGN_CAST(const __m128i*, ptr)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_load(ptr); #else simde_uint16x8_private r_; simde_memcpy(&r_, ptr, sizeof(r_)); return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld1q_u16 #define vld1q_u16(a) simde_vld1q_u16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vld1q_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld1q_u32(ptr); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadu_si128(SIMDE_ALIGN_CAST(const __m128i*, ptr)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_load(ptr); #else simde_uint32x4_private r_; simde_memcpy(&r_, ptr, sizeof(r_)); return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld1q_u32 #define vld1q_u32(a) simde_vld1q_u32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vld1q_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld1q_u64(ptr); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadu_si128(SIMDE_ALIGN_CAST(const __m128i*, ptr)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_load(ptr); #else simde_uint64x2_private r_; simde_memcpy(&r_, ptr, sizeof(r_)); return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld1q_u64 #define vld1q_u64(a) simde_vld1q_u64((a)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_LD1_H) */ simde-0.7.2/simde/arm/neon/ld3.h000066400000000000000000000403711400333146700163010ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher */ #if !defined(SIMDE_ARM_NEON_LD3_H) #define SIMDE_ARM_NEON_LD3_H #include "types.h" #include "ld1.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS #if defined(HEDLEY_GCC_VERSION) SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ #endif SIMDE_BEGIN_DECLS_ #if !defined(SIMDE_BUG_INTEL_857088) SIMDE_FUNCTION_ATTRIBUTES simde_float32x2x3_t simde_vld3_f32(simde_float32 const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3_f32(ptr); #else simde_float32x2_private r_[3]; for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; } } simde_float32x2x3_t r = { { simde_float32x2_from_private(r_[0]), simde_float32x2_from_private(r_[1]), simde_float32x2_from_private(r_[2]) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld3_f32 #define vld3_f32(a) simde_vld3_f32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1x3_t simde_vld3_f64(simde_float64 const *ptr) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vld3_f64(ptr); #else simde_float64x1_private r_[3]; for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; } } simde_float64x1x3_t r = { { simde_float64x1_from_private(r_[0]), simde_float64x1_from_private(r_[1]), simde_float64x1_from_private(r_[2]) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vld3_f64 #define vld3_f64(a) simde_vld3_f64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8x3_t simde_vld3_s8(int8_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3_s8(ptr); #else simde_int8x8_private r_[3]; for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; } } simde_int8x8x3_t r = { { simde_int8x8_from_private(r_[0]), simde_int8x8_from_private(r_[1]), simde_int8x8_from_private(r_[2]) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld3_s8 #define vld3_s8(a) simde_vld3_s8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4x3_t simde_vld3_s16(int16_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3_s16(ptr); #else simde_int16x4_private r_[3]; for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; } } simde_int16x4x3_t r = { { simde_int16x4_from_private(r_[0]), simde_int16x4_from_private(r_[1]), simde_int16x4_from_private(r_[2]) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld3_s16 #define vld3_s16(a) simde_vld3_s16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2x3_t simde_vld3_s32(int32_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3_s32(ptr); #else simde_int32x2_private r_[3]; for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; } } simde_int32x2x3_t r = { { simde_int32x2_from_private(r_[0]), simde_int32x2_from_private(r_[1]), simde_int32x2_from_private(r_[2]) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld3_s32 #define vld3_s32(a) simde_vld3_s32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1x3_t simde_vld3_s64(int64_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3_s64(ptr); #else simde_int64x1_private r_[3]; for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; } } simde_int64x1x3_t r = { { simde_int64x1_from_private(r_[0]), simde_int64x1_from_private(r_[1]), simde_int64x1_from_private(r_[2]) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vld3_s64 #define vld3_s64(a) simde_vld3_s64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8x3_t simde_vld3_u8(uint8_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3_u8(ptr); #else simde_uint8x8_private r_[3]; for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; } } simde_uint8x8x3_t r = { { simde_uint8x8_from_private(r_[0]), simde_uint8x8_from_private(r_[1]), simde_uint8x8_from_private(r_[2]) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld3_u8 #define vld3_u8(a) simde_vld3_u8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4x3_t simde_vld3_u16(uint16_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3_u16(ptr); #else simde_uint16x4_private r_[3]; for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; } } simde_uint16x4x3_t r = { { simde_uint16x4_from_private(r_[0]), simde_uint16x4_from_private(r_[1]), simde_uint16x4_from_private(r_[2]) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld3_u16 #define vld3_u16(a) simde_vld3_u16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2x3_t simde_vld3_u32(uint32_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3_u32(ptr); #else simde_uint32x2_private r_[3]; for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; } } simde_uint32x2x3_t r = { { simde_uint32x2_from_private(r_[0]), simde_uint32x2_from_private(r_[1]), simde_uint32x2_from_private(r_[2]) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld3_u32 #define vld3_u32(a) simde_vld3_u32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1x3_t simde_vld3_u64(uint64_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3_u64(ptr); #else simde_uint64x1_private r_[3]; for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; } } simde_uint64x1x3_t r = { { simde_uint64x1_from_private(r_[0]), simde_uint64x1_from_private(r_[1]), simde_uint64x1_from_private(r_[2]) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vld3_u64 #define vld3_u64(a) simde_vld3_u64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4x3_t simde_vld3q_f32(simde_float32 const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3q_f32(ptr); #else simde_float32x4_private r_[3]; for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; } } simde_float32x4x3_t r = { { simde_float32x4_from_private(r_[0]), simde_float32x4_from_private(r_[1]), simde_float32x4_from_private(r_[2]) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld3q_f32 #define vld3q_f32(a) simde_vld3q_f32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2x3_t simde_vld3q_f64(simde_float64 const *ptr) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vld3q_f64(ptr); #else simde_float64x2_private r_[3]; for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; } } simde_float64x2x3_t r = { { simde_float64x2_from_private(r_[0]), simde_float64x2_from_private(r_[1]), simde_float64x2_from_private(r_[2]) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vld3q_f64 #define vld3q_f64(a) simde_vld3q_f64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16x3_t simde_vld3q_s8(int8_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3q_s8(ptr); #else simde_int8x16_private r_[3]; for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; } } simde_int8x16x3_t r = { { simde_int8x16_from_private(r_[0]), simde_int8x16_from_private(r_[1]), simde_int8x16_from_private(r_[2]) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld3q_s8 #define vld3q_s8(a) simde_vld3q_s8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8x3_t simde_vld3q_s16(int16_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3q_s16(ptr); #else simde_int16x8_private r_[3]; for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; } } simde_int16x8x3_t r = { { simde_int16x8_from_private(r_[0]), simde_int16x8_from_private(r_[1]), simde_int16x8_from_private(r_[2]) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld3q_s16 #define vld3q_s16(a) simde_vld3q_s16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4x3_t simde_vld3q_s32(int32_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3q_s32(ptr); #else simde_int32x4_private r_[3]; for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; } } simde_int32x4x3_t r = { { simde_int32x4_from_private(r_[0]), simde_int32x4_from_private(r_[1]), simde_int32x4_from_private(r_[2]) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld3q_s32 #define vld3q_s32(a) simde_vld3q_s32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2x3_t simde_vld3q_s64(int64_t const *ptr) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vld3q_s64(ptr); #else simde_int64x2_private r_[3]; for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; } } simde_int64x2x3_t r = { { simde_int64x2_from_private(r_[0]), simde_int64x2_from_private(r_[1]), simde_int64x2_from_private(r_[2]) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vld3q_s64 #define vld3q_s64(a) simde_vld3q_s64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16x3_t simde_vld3q_u8(uint8_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3q_u8(ptr); #else simde_uint8x16_private r_[3]; for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; } } simde_uint8x16x3_t r = { { simde_uint8x16_from_private(r_[0]), simde_uint8x16_from_private(r_[1]), simde_uint8x16_from_private(r_[2]) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld3q_u8 #define vld3q_u8(a) simde_vld3q_u8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8x3_t simde_vld3q_u16(uint16_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3q_u16(ptr); #else simde_uint16x8_private r_[3]; for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; } } simde_uint16x8x3_t r = { { simde_uint16x8_from_private(r_[0]), simde_uint16x8_from_private(r_[1]), simde_uint16x8_from_private(r_[2]) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld3q_u16 #define vld3q_u16(a) simde_vld3q_u16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4x3_t simde_vld3q_u32(uint32_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3q_u32(ptr); #else simde_uint32x4_private r_[3]; for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; } } simde_uint32x4x3_t r = { { simde_uint32x4_from_private(r_[0]), simde_uint32x4_from_private(r_[1]), simde_uint32x4_from_private(r_[2]) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld3q_u32 #define vld3q_u32(a) simde_vld3q_u32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2x3_t simde_vld3q_u64(uint64_t const *ptr) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vld3q_u64(ptr); #else simde_uint64x2_private r_[3]; for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; } } simde_uint64x2x3_t r = { { simde_uint64x2_from_private(r_[0]), simde_uint64x2_from_private(r_[1]), simde_uint64x2_from_private(r_[2]) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vld3q_u64 #define vld3q_u64(a) simde_vld3q_u64((a)) #endif #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_LD3_H) */ simde-0.7.2/simde/arm/neon/ld4.h000066400000000000000000000357301400333146700163050ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher */ #if !defined(SIMDE_ARM_NEON_LD4_H) #define SIMDE_ARM_NEON_LD4_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS #if defined(HEDLEY_GCC_VERSION) SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ #endif SIMDE_BEGIN_DECLS_ #if !defined(SIMDE_BUG_INTEL_857088) SIMDE_FUNCTION_ATTRIBUTES simde_float32x2x4_t simde_vld4_f32(simde_float32 const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld4_f32(ptr); #else simde_float32x2_private a_[4]; for (size_t i = 0; i < (sizeof(simde_float32x2_t) / sizeof(*ptr)) * 4 ; i++) { a_[i % 4].values[i / 4] = ptr[i]; } simde_float32x2x4_t s_ = { { simde_float32x2_from_private(a_[0]), simde_float32x2_from_private(a_[1]), simde_float32x2_from_private(a_[2]), simde_float32x2_from_private(a_[3]) } }; return (s_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld4_f32 #define vld4_f32(a) simde_vld4_f32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1x4_t simde_vld4_f64(simde_float64 const *ptr) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vld4_f64(ptr); #else simde_float64x1_private a_[4]; for (size_t i = 0; i < (sizeof(simde_float64x1_t) / sizeof(*ptr)) * 4 ; i++) { a_[i % 4].values[i / 4] = ptr[i]; } simde_float64x1x4_t s_ = { { simde_float64x1_from_private(a_[0]), simde_float64x1_from_private(a_[1]), simde_float64x1_from_private(a_[2]), simde_float64x1_from_private(a_[3]) } }; return s_; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vld4_f64 #define vld4_f64(a) simde_vld4_f64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8x4_t simde_vld4_s8(int8_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld4_s8(ptr); #else simde_int8x8_private a_[4]; for (size_t i = 0; i < (sizeof(simde_int8x8_t) / sizeof(*ptr)) * 4 ; i++) { a_[i % 4].values[i / 4] = ptr[i]; } simde_int8x8x4_t s_ = { { simde_int8x8_from_private(a_[0]), simde_int8x8_from_private(a_[1]), simde_int8x8_from_private(a_[2]), simde_int8x8_from_private(a_[3]) } }; return s_; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld4_s8 #define vld4_s8(a) simde_vld4_s8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4x4_t simde_vld4_s16(int16_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld4_s16(ptr); #else simde_int16x4_private a_[4]; for (size_t i = 0; i < (sizeof(simde_int16x4_t) / sizeof(*ptr)) * 4 ; i++) { a_[i % 4].values[i / 4] = ptr[i]; } simde_int16x4x4_t s_ = { { simde_int16x4_from_private(a_[0]), simde_int16x4_from_private(a_[1]), simde_int16x4_from_private(a_[2]), simde_int16x4_from_private(a_[3]) } }; return s_; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld4_s16 #define vld4_s16(a) simde_vld4_s16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2x4_t simde_vld4_s32(int32_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld4_s32(ptr); #else simde_int32x2_private a_[4]; for (size_t i = 0; i < (sizeof(simde_int32x2_t) / sizeof(*ptr)) * 4 ; i++) { a_[i % 4].values[i / 4] = ptr[i]; } simde_int32x2x4_t s_ = { { simde_int32x2_from_private(a_[0]), simde_int32x2_from_private(a_[1]), simde_int32x2_from_private(a_[2]), simde_int32x2_from_private(a_[3]) } }; return s_; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld4_s32 #define vld4_s32(a) simde_vld4_s32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1x4_t simde_vld4_s64(int64_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld4_s64(ptr); #else simde_int64x1_private a_[4]; for (size_t i = 0; i < (sizeof(simde_int64x1_t) / sizeof(*ptr)) * 4 ; i++) { a_[i % 4].values[i / 4] = ptr[i]; } simde_int64x1x4_t s_ = { { simde_int64x1_from_private(a_[0]), simde_int64x1_from_private(a_[1]), simde_int64x1_from_private(a_[2]), simde_int64x1_from_private(a_[3]) } }; return s_; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vld4_s64 #define vld4_s64(a) simde_vld4_s64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8x4_t simde_vld4_u8(uint8_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld4_u8(ptr); #else simde_uint8x8_private a_[4]; for (size_t i = 0; i < (sizeof(simde_uint8x8_t) / sizeof(*ptr)) * 4 ; i++) { a_[i % 4].values[i / 4] = ptr[i]; } simde_uint8x8x4_t s_ = { { simde_uint8x8_from_private(a_[0]), simde_uint8x8_from_private(a_[1]), simde_uint8x8_from_private(a_[2]), simde_uint8x8_from_private(a_[3]) } }; return s_; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld4_u8 #define vld4_u8(a) simde_vld4_u8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4x4_t simde_vld4_u16(uint16_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld4_u16(ptr); #else simde_uint16x4_private a_[4]; for (size_t i = 0; i < (sizeof(simde_uint16x4_t) / sizeof(*ptr)) * 4 ; i++) { a_[i % 4].values[i / 4] = ptr[i]; } simde_uint16x4x4_t s_ = { { simde_uint16x4_from_private(a_[0]), simde_uint16x4_from_private(a_[1]), simde_uint16x4_from_private(a_[2]), simde_uint16x4_from_private(a_[3]) } }; return s_; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld4_u16 #define vld4_u16(a) simde_vld4_u16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2x4_t simde_vld4_u32(uint32_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld4_u32(ptr); #else simde_uint32x2_private a_[4]; for (size_t i = 0; i < (sizeof(simde_uint32x2_t) / sizeof(*ptr)) * 4 ; i++) { a_[i % 4].values[i / 4] = ptr[i]; } simde_uint32x2x4_t s_ = { { simde_uint32x2_from_private(a_[0]), simde_uint32x2_from_private(a_[1]), simde_uint32x2_from_private(a_[2]), simde_uint32x2_from_private(a_[3]) } }; return s_; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld4_u32 #define vld4_u32(a) simde_vld4_u32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1x4_t simde_vld4_u64(uint64_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld4_u64(ptr); #else simde_uint64x1_private a_[4]; for (size_t i = 0; i < (sizeof(simde_uint64x1_t) / sizeof(*ptr)) * 4 ; i++) { a_[i % 4].values[i / 4] = ptr[i]; } simde_uint64x1x4_t s_ = { { simde_uint64x1_from_private(a_[0]), simde_uint64x1_from_private(a_[1]), simde_uint64x1_from_private(a_[2]), simde_uint64x1_from_private(a_[3]) } }; return s_; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vld4_u64 #define vld4_u64(a) simde_vld4_u64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4x4_t simde_vld4q_f32(simde_float32 const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld4q_f32(ptr); #else simde_float32x4_private a_[4]; for (size_t i = 0; i < (sizeof(simde_float32x4_t) / sizeof(*ptr)) * 4 ; i++) { a_[i % 4].values[i / 4] = ptr[i]; } simde_float32x4x4_t s_ = { { simde_float32x4_from_private(a_[0]), simde_float32x4_from_private(a_[1]), simde_float32x4_from_private(a_[2]), simde_float32x4_from_private(a_[3]) } }; return s_; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld4q_f32 #define vld4q_f32(a) simde_vld4q_f32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2x4_t simde_vld4q_f64(simde_float64 const *ptr) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vld4q_f64(ptr); #else simde_float64x2_private a_[4]; for (size_t i = 0; i < (sizeof(simde_float64x2_t) / sizeof(*ptr)) * 4 ; i++) { a_[i % 4].values[i / 4] = ptr[i]; } simde_float64x2x4_t s_ = { { simde_float64x2_from_private(a_[0]), simde_float64x2_from_private(a_[1]), simde_float64x2_from_private(a_[2]), simde_float64x2_from_private(a_[3]) } }; return s_; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vld4q_f64 #define vld4q_f64(a) simde_vld4q_f64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16x4_t simde_vld4q_s8(int8_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld4q_s8(ptr); #else simde_int8x16_private a_[4]; for (size_t i = 0; i < (sizeof(simde_int8x16_t) / sizeof(*ptr)) * 4 ; i++) { a_[i % 4].values[i / 4] = ptr[i]; } simde_int8x16x4_t s_ = { { simde_int8x16_from_private(a_[0]), simde_int8x16_from_private(a_[1]), simde_int8x16_from_private(a_[2]), simde_int8x16_from_private(a_[3]) } }; return s_; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld4q_s8 #define vld4q_s8(a) simde_vld4q_s8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8x4_t simde_vld4q_s16(int16_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld4q_s16(ptr); #else simde_int16x8_private a_[4]; for (size_t i = 0; i < (sizeof(simde_int16x8_t) / sizeof(*ptr)) * 4 ; i++) { a_[i % 4].values[i / 4] = ptr[i]; } simde_int16x8x4_t s_ = { { simde_int16x8_from_private(a_[0]), simde_int16x8_from_private(a_[1]), simde_int16x8_from_private(a_[2]), simde_int16x8_from_private(a_[3]) } }; return s_; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld4q_s16 #define vld4q_s16(a) simde_vld4q_s16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4x4_t simde_vld4q_s32(int32_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld4q_s32(ptr); #else simde_int32x4_private a_[4]; for (size_t i = 0; i < (sizeof(simde_int32x4_t) / sizeof(*ptr)) * 4 ; i++) { a_[i % 4].values[i / 4] = ptr[i]; } simde_int32x4x4_t s_ = { { simde_int32x4_from_private(a_[0]), simde_int32x4_from_private(a_[1]), simde_int32x4_from_private(a_[2]), simde_int32x4_from_private(a_[3]) } }; return s_; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld4q_s32 #define vld4q_s32(a) simde_vld4q_s32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2x4_t simde_vld4q_s64(int64_t const *ptr) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vld4q_s64(ptr); #else simde_int64x2_private a_[4]; for (size_t i = 0; i < (sizeof(simde_int64x2_t) / sizeof(*ptr)) * 4 ; i++) { a_[i % 4].values[i / 4] = ptr[i]; } simde_int64x2x4_t s_ = { { simde_int64x2_from_private(a_[0]), simde_int64x2_from_private(a_[1]), simde_int64x2_from_private(a_[2]), simde_int64x2_from_private(a_[3]) } }; return s_; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vld4q_s64 #define vld4q_s64(a) simde_vld4q_s64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16x4_t simde_vld4q_u8(uint8_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld4q_u8(ptr); #else simde_uint8x16_private a_[4]; for (size_t i = 0; i < (sizeof(simde_uint8x16_t) / sizeof(*ptr)) * 4 ; i++) { a_[i % 4].values[i / 4] = ptr[i]; } simde_uint8x16x4_t s_ = { { simde_uint8x16_from_private(a_[0]), simde_uint8x16_from_private(a_[1]), simde_uint8x16_from_private(a_[2]), simde_uint8x16_from_private(a_[3]) } }; return s_; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld4q_u8 #define vld4q_u8(a) simde_vld4q_u8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8x4_t simde_vld4q_u16(uint16_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld4q_u16(ptr); #else simde_uint16x8_private a_[4]; for (size_t i = 0; i < (sizeof(simde_uint16x8_t) / sizeof(*ptr)) * 4 ; i++) { a_[i % 4].values[i / 4] = ptr[i]; } simde_uint16x8x4_t s_ = { { simde_uint16x8_from_private(a_[0]), simde_uint16x8_from_private(a_[1]), simde_uint16x8_from_private(a_[2]), simde_uint16x8_from_private(a_[3]) } }; return s_; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld4q_u16 #define vld4q_u16(a) simde_vld4q_u16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4x4_t simde_vld4q_u32(uint32_t const *ptr) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld4q_u32(ptr); #else simde_uint32x4_private a_[4]; for (size_t i = 0; i < (sizeof(simde_uint32x4_t) / sizeof(*ptr)) * 4 ; i++) { a_[i % 4].values[i / 4] = ptr[i]; } simde_uint32x4x4_t s_ = { { simde_uint32x4_from_private(a_[0]), simde_uint32x4_from_private(a_[1]), simde_uint32x4_from_private(a_[2]), simde_uint32x4_from_private(a_[3]) } }; return s_; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vld4q_u32 #define vld4q_u32(a) simde_vld4q_u32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2x4_t simde_vld4q_u64(uint64_t const *ptr) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vld4q_u64(ptr); #else simde_uint64x2_private a_[4]; for (size_t i = 0; i < (sizeof(simde_uint64x2_t) / sizeof(*ptr)) * 4 ; i++) { a_[i % 4].values[i / 4] = ptr[i]; } simde_uint64x2x4_t s_ = { { simde_uint64x2_from_private(a_[0]), simde_uint64x2_from_private(a_[1]), simde_uint64x2_from_private(a_[2]), simde_uint64x2_from_private(a_[3]) } }; return s_; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vld4q_u64 #define vld4q_u64(a) simde_vld4q_u64((a)) #endif #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_LD4_H) */ simde-0.7.2/simde/arm/neon/max.h000066400000000000000000000451161400333146700164060ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_MAX_H) #define SIMDE_ARM_NEON_MAX_H #include "types.h" #include "cgt.h" #include "bsl.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vmax_f32(simde_float32x2_t a, simde_float32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmax_f32(a, b); #else simde_float32x2_private r_, a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { #if !defined(SIMDE_FAST_NANS) r_.values[i] = (a_.values[i] >= b_.values[i]) ? a_.values[i] : ((a_.values[i] < b_.values[i]) ? b_.values[i] : SIMDE_MATH_NANF); #else r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; #endif } return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmax_f32 #define vmax_f32(a, b) simde_vmax_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vmax_f64(simde_float64x1_t a, simde_float64x1_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmax_f64(a, b); #else simde_float64x1_private r_, a_ = simde_float64x1_to_private(a), b_ = simde_float64x1_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { #if !defined(SIMDE_FAST_NANS) r_.values[i] = (a_.values[i] >= b_.values[i]) ? a_.values[i] : ((a_.values[i] < b_.values[i]) ? b_.values[i] : SIMDE_MATH_NAN); #else r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; #endif } return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmax_f64 #define vmax_f64(a, b) simde_vmax_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vmax_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmax_s8(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbsl_s8(simde_vcgt_s8(a, b), a, b); #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmax_s8 #define vmax_s8(a, b) simde_vmax_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vmax_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmax_s16(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbsl_s16(simde_vcgt_s16(a, b), a, b); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmax_s16 #define vmax_s16(a, b) simde_vmax_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vmax_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmax_s32(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbsl_s32(simde_vcgt_s32(a, b), a, b); #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmax_s32 #define vmax_s32(a, b) simde_vmax_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_x_vmax_s64(simde_int64x1_t a, simde_int64x1_t b) { #if SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbsl_s64(simde_vcgt_s64(a, b), a, b); #else simde_int64x1_private r_, a_ = simde_int64x1_to_private(a), b_ = simde_int64x1_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_int64x1_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vmax_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmax_u8(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbsl_u8(simde_vcgt_u8(a, b), a, b); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmax_u8 #define vmax_u8(a, b) simde_vmax_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vmax_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmax_u16(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbsl_u16(simde_vcgt_u16(a, b), a, b); #else simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a), b_ = simde_uint16x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmax_u16 #define vmax_u16(a, b) simde_vmax_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vmax_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmax_u32(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbsl_u32(simde_vcgt_u32(a, b), a, b); #else simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a), b_ = simde_uint32x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmax_u32 #define vmax_u32(a, b) simde_vmax_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_x_vmax_u64(simde_uint64x1_t a, simde_uint64x1_t b) { #if SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbsl_u64(simde_vcgt_u64(a, b), a, b); #else simde_uint64x1_private r_, a_ = simde_uint64x1_to_private(a), b_ = simde_uint64x1_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_uint64x1_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vmaxq_f32(simde_float32x4_t a, simde_float32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmaxq_f32(a, b); #elif defined(SIMDE_X86_SSE_NATIVE) #if !defined(SIMDE_FAST_NANS) __m128 nan_mask = _mm_cmpunord_ps(a, b); __m128 res = _mm_max_ps(a, b); res = _mm_andnot_ps(nan_mask, res); res = _mm_or_ps(res, _mm_and_ps(_mm_set1_ps(SIMDE_MATH_NANF), nan_mask)); return res; #else return _mm_max_ps(a, b); #endif #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_NANS) return vec_max(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f32x4_max(a, b); #else simde_float32x4_private r_, a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { #if !defined(SIMDE_FAST_NANS) r_.values[i] = (a_.values[i] >= b_.values[i]) ? a_.values[i] : ((a_.values[i] < b_.values[i]) ? b_.values[i] : SIMDE_MATH_NANF); #else r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; #endif } return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmaxq_f32 #define vmaxq_f32(a, b) simde_vmaxq_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vmaxq_f64(simde_float64x2_t a, simde_float64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmaxq_f64(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) #if !defined(SIMDE_FAST_NANS) __m128d nan_mask = _mm_cmpunord_pd(a, b); __m128d res = _mm_max_pd(a, b); res = _mm_andnot_pd(nan_mask, res); res = _mm_or_pd(res, _mm_and_pd(_mm_set1_pd(SIMDE_MATH_NAN), nan_mask)); return res; #else return _mm_max_pd(a, b); #endif #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(SIMDE_FAST_NANS) return vec_max(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_max(a, b); #else simde_float64x2_private r_, a_ = simde_float64x2_to_private(a), b_ = simde_float64x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { #if !defined(SIMDE_FAST_NANS) r_.values[i] = (a_.values[i] >= b_.values[i]) ? a_.values[i] : ((a_.values[i] < b_.values[i]) ? b_.values[i] : SIMDE_MATH_NAN); #else r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; #endif } return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmaxq_f64 #define vmaxq_f64(a, b) simde_vmaxq_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vmaxq_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmaxq_s8(a, b); #elif defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_max_epi8(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_max(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i8x16_max(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbslq_s8(simde_vcgtq_s8(a, b), a, b); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmaxq_s8 #define vmaxq_s8(a, b) simde_vmaxq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vmaxq_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmaxq_s16(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_max_epi16(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_max(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i16x8_max(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbslq_s16(simde_vcgtq_s16(a, b), a, b); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmaxq_s16 #define vmaxq_s16(a, b) simde_vmaxq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vmaxq_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmaxq_s32(a, b); #elif defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_max_epi32(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_max(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i32x4_max(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbslq_s32(simde_vcgtq_s32(a, b), a, b); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmaxq_s32 #define vmaxq_s32(a, b) simde_vmaxq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_x_vmaxq_s64(simde_int64x2_t a, simde_int64x2_t b) { #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return vec_max(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbslq_s64(simde_vcgtq_s64(a, b), a, b); #else simde_int64x2_private r_, a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_int64x2_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vmaxq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmaxq_u8(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_max_epu8(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_max(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_u8x16_max(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbslq_u8(simde_vcgtq_u8(a, b), a, b); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmaxq_u8 #define vmaxq_u8(a, b) simde_vmaxq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vmaxq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmaxq_u16(a, b); #elif defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_max_epu16(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_max(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_u16x8_max(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbslq_u16(simde_vcgtq_u16(a, b), a, b); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmaxq_u16 #define vmaxq_u16(a, b) simde_vmaxq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vmaxq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmaxq_u32(a, b); #elif defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_max_epu32(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_max(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_u32x4_max(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbslq_u32(simde_vcgtq_u32(a, b), a, b); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmaxq_u32 #define vmaxq_u32(a, b) simde_vmaxq_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_x_vmaxq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return vec_max(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbslq_u64(simde_vcgtq_u64(a, b), a, b); #else simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a), b_ = simde_uint64x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_uint64x2_from_private(r_); #endif } SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_MAX_H) */ simde-0.7.2/simde/arm/neon/maxnm.h000066400000000000000000000154041400333146700167360ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_MAXNM_H) #define SIMDE_ARM_NEON_MAXNM_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vmaxnm_f32(simde_float32x2_t a, simde_float32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (__ARM_NEON_FP >= 6) return vmaxnm_f32(a, b); #else simde_float32x2_private r_, a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { #if defined(simde_math_fmaxf) r_.values[i] = fmaxf(a_.values[i], b_.values[i]); #else if (a_.values[i] > b_.values[i]) { r_.values[i] = a_.values[i]; } else if (a_.values[i] < b_.values[i]) { r_.values[i] = b_.values[i]; } else if (a_.values[i] == a_.values[i]) { r_.values[i] = a_.values[i]; } else { r_.values[i] = b_.values[i]; } #endif } return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmaxnm_f32 #define vmaxnm_f32(a, b) simde_vmaxnm_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vmaxnm_f64(simde_float64x1_t a, simde_float64x1_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmaxnm_f64(a, b); #else simde_float64x1_private r_, a_ = simde_float64x1_to_private(a), b_ = simde_float64x1_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { #if defined(simde_math_fmax) r_.values[i] = fmax(a_.values[i], b_.values[i]); #else if (a_.values[i] > b_.values[i]) { r_.values[i] = a_.values[i]; } else if (a_.values[i] < b_.values[i]) { r_.values[i] = b_.values[i]; } else if (a_.values[i] == a_.values[i]) { r_.values[i] = a_.values[i]; } else { r_.values[i] = b_.values[i]; } #endif } return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmaxnm_f64 #define vmaxnm_f64(a, b) simde_vmaxnm_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vmaxnmq_f32(simde_float32x4_t a, simde_float32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (__ARM_NEON_FP >= 6) return vmaxnmq_f32(a, b); #elif defined(SIMDE_X86_SSE_NATIVE) #if !defined(SIMDE_FAST_NANS) __m128 r = _mm_max_ps(a, b); __m128 bnan = _mm_cmpunord_ps(b, b); r = _mm_andnot_ps(bnan, r); r = _mm_or_ps(r, _mm_and_ps(a, bnan)); return r; #else return _mm_max_ps(a, b); #endif #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_max(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) return wasm_f32x4_max(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_NANS) return simde_vbslq_f32(simde_vcgeq_f32(a, b), a, b); #else simde_float32x4_private r_, a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { #if defined(simde_math_fmaxf) r_.values[i] = fmaxf(a_.values[i], b_.values[i]); #else if (a_.values[i] > b_.values[i]) { r_.values[i] = a_.values[i]; } else if (a_.values[i] < b_.values[i]) { r_.values[i] = b_.values[i]; } else if (a_.values[i] == a_.values[i]) { r_.values[i] = a_.values[i]; } else { r_.values[i] = b_.values[i]; } #endif } return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmaxnmq_f32 #define vmaxnmq_f32(a, b) simde_vmaxnmq_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vmaxnmq_f64(simde_float64x2_t a, simde_float64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmaxnmq_f64(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) #if !defined(SIMDE_FAST_NANS) __m128d r = _mm_max_pd(a, b); __m128d bnan = _mm_cmpunord_pd(b, b); r = _mm_andnot_pd(bnan, r); r = _mm_or_pd(r, _mm_and_pd(a, bnan)); return r; #else return _mm_max_pd(a, b); #endif #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return vec_max(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) return wasm_f64x2_max(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_NANS) return simde_vbslq_f64(simde_vcgeq_f64(a, b), a, b); #else simde_float64x2_private r_, a_ = simde_float64x2_to_private(a), b_ = simde_float64x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { #if defined(simde_math_fmax) r_.values[i] = fmax(a_.values[i], b_.values[i]); #else if (a_.values[i] > b_.values[i]) { r_.values[i] = a_.values[i]; } else if (a_.values[i] < b_.values[i]) { r_.values[i] = b_.values[i]; } else if (a_.values[i] == a_.values[i]) { r_.values[i] = a_.values[i]; } else { r_.values[i] = b_.values[i]; } #endif } return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmaxnmq_f64 #define vmaxnmq_f64(a, b) simde_vmaxnmq_f64((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_MAXNM_H) */ simde-0.7.2/simde/arm/neon/maxv.h000066400000000000000000000232471400333146700165750ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_MAXV_H) #define SIMDE_ARM_NEON_MAXV_H #include "types.h" #include HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32_t simde_vmaxv_f32(simde_float32x2_t a) { simde_float32_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vmaxv_f32(a); #else simde_float32x2_private a_ = simde_float32x2_to_private(a); r = -SIMDE_MATH_INFINITYF; SIMDE_VECTORIZE_REDUCTION(max:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r = a_.values[i] > r ? a_.values[i] : r; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmaxv_f32 #define vmaxv_f32(v) simde_vmaxv_f32(v) #endif SIMDE_FUNCTION_ATTRIBUTES int8_t simde_vmaxv_s8(simde_int8x8_t a) { int8_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vmaxv_s8(a); #else simde_int8x8_private a_ = simde_int8x8_to_private(a); r = INT8_MIN; SIMDE_VECTORIZE_REDUCTION(max:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r = a_.values[i] > r ? a_.values[i] : r; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmaxv_s8 #define vmaxv_s8(v) simde_vmaxv_s8(v) #endif SIMDE_FUNCTION_ATTRIBUTES int16_t simde_vmaxv_s16(simde_int16x4_t a) { int16_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vmaxv_s16(a); #else simde_int16x4_private a_ = simde_int16x4_to_private(a); r = INT16_MIN; SIMDE_VECTORIZE_REDUCTION(max:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r = a_.values[i] > r ? a_.values[i] : r; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmaxv_s16 #define vmaxv_s16(v) simde_vmaxv_s16(v) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_vmaxv_s32(simde_int32x2_t a) { int32_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vmaxv_s32(a); #else simde_int32x2_private a_ = simde_int32x2_to_private(a); r = INT32_MIN; SIMDE_VECTORIZE_REDUCTION(max:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r = a_.values[i] > r ? a_.values[i] : r; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmaxv_s32 #define vmaxv_s32(v) simde_vmaxv_s32(v) #endif SIMDE_FUNCTION_ATTRIBUTES uint8_t simde_vmaxv_u8(simde_uint8x8_t a) { uint8_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vmaxv_u8(a); #else simde_uint8x8_private a_ = simde_uint8x8_to_private(a); r = 0; SIMDE_VECTORIZE_REDUCTION(max:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r = a_.values[i] > r ? a_.values[i] : r; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmaxv_u8 #define vmaxv_u8(v) simde_vmaxv_u8(v) #endif SIMDE_FUNCTION_ATTRIBUTES uint16_t simde_vmaxv_u16(simde_uint16x4_t a) { uint16_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vmaxv_u16(a); #else simde_uint16x4_private a_ = simde_uint16x4_to_private(a); r = 0; SIMDE_VECTORIZE_REDUCTION(max:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r = a_.values[i] > r ? a_.values[i] : r; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmaxv_u16 #define vmaxv_u16(v) simde_vmaxv_u16(v) #endif SIMDE_FUNCTION_ATTRIBUTES uint32_t simde_vmaxv_u32(simde_uint32x2_t a) { uint32_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vmaxv_u32(a); #else simde_uint32x2_private a_ = simde_uint32x2_to_private(a); r = 0; SIMDE_VECTORIZE_REDUCTION(max:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r = a_.values[i] > r ? a_.values[i] : r; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmaxv_u32 #define vmaxv_u32(v) simde_vmaxv_u32(v) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32_t simde_vmaxvq_f32(simde_float32x4_t a) { simde_float32_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vmaxvq_f32(a); #else simde_float32x4_private a_ = simde_float32x4_to_private(a); r = -SIMDE_MATH_INFINITYF; SIMDE_VECTORIZE_REDUCTION(max:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r = a_.values[i] > r ? a_.values[i] : r; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmaxvq_f32 #define vmaxvq_f32(v) simde_vmaxvq_f32(v) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64_t simde_vmaxvq_f64(simde_float64x2_t a) { simde_float64_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vmaxvq_f64(a); #else simde_float64x2_private a_ = simde_float64x2_to_private(a); r = -SIMDE_MATH_INFINITY; SIMDE_VECTORIZE_REDUCTION(max:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r = a_.values[i] > r ? a_.values[i] : r; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmaxvq_f64 #define vmaxvq_f64(v) simde_vmaxvq_f64(v) #endif SIMDE_FUNCTION_ATTRIBUTES int8_t simde_vmaxvq_s8(simde_int8x16_t a) { int8_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vmaxvq_s8(a); #else simde_int8x16_private a_ = simde_int8x16_to_private(a); r = INT8_MIN; SIMDE_VECTORIZE_REDUCTION(max:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r = a_.values[i] > r ? a_.values[i] : r; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmaxvq_s8 #define vmaxvq_s8(v) simde_vmaxvq_s8(v) #endif SIMDE_FUNCTION_ATTRIBUTES int16_t simde_vmaxvq_s16(simde_int16x8_t a) { int16_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vmaxvq_s16(a); #else simde_int16x8_private a_ = simde_int16x8_to_private(a); r = INT16_MIN; SIMDE_VECTORIZE_REDUCTION(max:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r = a_.values[i] > r ? a_.values[i] : r; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmaxvq_s16 #define vmaxvq_s16(v) simde_vmaxvq_s16(v) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_vmaxvq_s32(simde_int32x4_t a) { int32_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vmaxvq_s32(a); #else simde_int32x4_private a_ = simde_int32x4_to_private(a); r = INT32_MIN; SIMDE_VECTORIZE_REDUCTION(max:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r = a_.values[i] > r ? a_.values[i] : r; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmaxvq_s32 #define vmaxvq_s32(v) simde_vmaxvq_s32(v) #endif SIMDE_FUNCTION_ATTRIBUTES uint8_t simde_vmaxvq_u8(simde_uint8x16_t a) { uint8_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vmaxvq_u8(a); #else simde_uint8x16_private a_ = simde_uint8x16_to_private(a); r = 0; SIMDE_VECTORIZE_REDUCTION(max:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r = a_.values[i] > r ? a_.values[i] : r; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmaxvq_u8 #define vmaxvq_u8(v) simde_vmaxvq_u8(v) #endif SIMDE_FUNCTION_ATTRIBUTES uint16_t simde_vmaxvq_u16(simde_uint16x8_t a) { uint16_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vmaxvq_u16(a); #else simde_uint16x8_private a_ = simde_uint16x8_to_private(a); r = 0; SIMDE_VECTORIZE_REDUCTION(max:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r = a_.values[i] > r ? a_.values[i] : r; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmaxvq_u16 #define vmaxvq_u16(v) simde_vmaxvq_u16(v) #endif SIMDE_FUNCTION_ATTRIBUTES uint32_t simde_vmaxvq_u32(simde_uint32x4_t a) { uint32_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vmaxvq_u32(a); #else simde_uint32x4_private a_ = simde_uint32x4_to_private(a); r = 0; SIMDE_VECTORIZE_REDUCTION(max:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r = a_.values[i] > r ? a_.values[i] : r; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmaxvq_u32 #define vmaxvq_u32(v) simde_vmaxvq_u32(v) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_MAXV_H) */ simde-0.7.2/simde/arm/neon/min.h000066400000000000000000000500071400333146700163770ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_MIN_H) #define SIMDE_ARM_NEON_MIN_H #include "types.h" #include "cgt.h" #include "ceq.h" #include "bsl.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vmin_f32(simde_float32x2_t a, simde_float32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmin_f32(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(64) simde_float32x2_t r = simde_vbsl_f32(simde_vcgt_f32(b, a), a, b); #if !defined(SIMDE_FAST_NANS) r = simde_vbsl_f32(simde_vceq_f32(a, a), simde_vbsl_f32(simde_vceq_f32(b, b), r, b), a); #endif return r; #else simde_float32x2_private r_, a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { #if !defined(SIMDE_FAST_NANS) if (simde_math_isnanf(a_.values[i])) { r_.values[i] = a_.values[i]; } else if (simde_math_isnanf(b_.values[i])) { r_.values[i] = b_.values[i]; } else { r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; } #else r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; #endif } return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmin_f32 #define vmin_f32(a, b) simde_vmin_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vmin_f64(simde_float64x1_t a, simde_float64x1_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmin_f64(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(64) simde_float64x1_t r = simde_vbsl_f64(simde_vcgt_f64(b, a), a, b); #if !defined(SIMDE_FAST_NANS) r = simde_vbsl_f64(simde_vceq_f64(a, a), simde_vbsl_f64(simde_vceq_f64(b, b), r, b), a); #endif return r; #else simde_float64x1_private r_, a_ = simde_float64x1_to_private(a), b_ = simde_float64x1_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { #if !defined(SIMDE_FAST_NANS) if (simde_math_isnan(a_.values[i])) { r_.values[i] = a_.values[i]; } else if (simde_math_isnan(b_.values[i])) { r_.values[i] = b_.values[i]; } else { r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; } #else r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; #endif } return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmin_f64 #define vmin_f64(a, b) simde_vmin_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vmin_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmin_s8(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbsl_s8(simde_vcgt_s8(b, a), a, b); #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmin_s8 #define vmin_s8(a, b) simde_vmin_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vmin_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmin_s16(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbsl_s16(simde_vcgt_s16(b, a), a, b); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmin_s16 #define vmin_s16(a, b) simde_vmin_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vmin_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmin_s32(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbsl_s32(simde_vcgt_s32(b, a), a, b); #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmin_s32 #define vmin_s32(a, b) simde_vmin_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_x_vmin_s64(simde_int64x1_t a, simde_int64x1_t b) { #if SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbsl_s64(simde_vcgt_s64(b, a), a, b); #else simde_int64x1_private r_, a_ = simde_int64x1_to_private(a), b_ = simde_int64x1_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_int64x1_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vmin_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmin_u8(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbsl_u8(simde_vcgt_u8(b, a), a, b); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmin_u8 #define vmin_u8(a, b) simde_vmin_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vmin_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmin_u16(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbsl_u16(simde_vcgt_u16(b, a), a, b); #else simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a), b_ = simde_uint16x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmin_u16 #define vmin_u16(a, b) simde_vmin_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vmin_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmin_u32(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbsl_u32(simde_vcgt_u32(b, a), a, b); #else simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a), b_ = simde_uint32x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmin_u32 #define vmin_u32(a, b) simde_vmin_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_x_vmin_u64(simde_uint64x1_t a, simde_uint64x1_t b) { #if SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbsl_u64(simde_vcgt_u64(b, a), a, b); #else simde_uint64x1_private r_, a_ = simde_uint64x1_to_private(a), b_ = simde_uint64x1_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_uint64x1_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vminq_f32(simde_float32x4_t a, simde_float32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vminq_f32(a, b); #elif defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_FAST_NANS) return _mm_min_ps(a, b); #elif defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_blendv_ps(_mm_set1_ps(SIMDE_MATH_NANF), _mm_min_ps(a, b), _mm_cmpord_ps(a, b)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_NANS) return vec_min(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f32x4_min(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 simde_float32x4_t r = simde_vbslq_f32(simde_vcgtq_f32(b, a), a, b); #if !defined(SIMDE_FAST_NANS) r = simde_vbslq_f32(simde_vceqq_f32(a, a), simde_vbslq_f32(simde_vceqq_f32(b, b), r, b), a); #endif return r; #else simde_float32x4_private r_, a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { #if !defined(SIMDE_FAST_NANS) if (simde_math_isnanf(a_.values[i])) { r_.values[i] = a_.values[i]; } else if (simde_math_isnanf(b_.values[i])) { r_.values[i] = b_.values[i]; } else { r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; } #else r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; #endif } return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vminq_f32 #define vminq_f32(a, b) simde_vminq_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vminq_f64(simde_float64x2_t a, simde_float64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vminq_f64(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_FAST_NANS) return _mm_min_pd(a, b); #elif defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_blendv_pd(_mm_set1_pd(SIMDE_MATH_NAN), _mm_min_pd(a, b), _mm_cmpord_pd(a, b)); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(SIMDE_FAST_NANS) return vec_min(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_min(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 simde_float64x2_t r = simde_vbslq_f64(simde_vcgtq_f64(b, a), a, b); #if !defined(SIMDE_FAST_NANS) r = simde_vbslq_f64(simde_vceqq_f64(a, a), simde_vbslq_f64(simde_vceqq_f64(b, b), r, b), a); #endif return r; #else simde_float64x2_private r_, a_ = simde_float64x2_to_private(a), b_ = simde_float64x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { #if !defined(SIMDE_FAST_NANS) if (simde_math_isnan(a_.values[i])) { r_.values[i] = a_.values[i]; } else if (simde_math_isnan(b_.values[i])) { r_.values[i] = b_.values[i]; } else { r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; } #else r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; #endif } return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vminq_f64 #define vminq_f64(a, b) simde_vminq_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vminq_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vminq_s8(a, b); #elif defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_min_epi8(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_min(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i8x16_min(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbslq_s8(simde_vcgtq_s8(b, a), a, b); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vminq_s8 #define vminq_s8(a, b) simde_vminq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vminq_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vminq_s16(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_min_epi16(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_min(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i16x8_min(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbslq_s16(simde_vcgtq_s16(b, a), a, b); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vminq_s16 #define vminq_s16(a, b) simde_vminq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vminq_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vminq_s32(a, b); #elif defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_min_epi32(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_min(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i32x4_min(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbslq_s32(simde_vcgtq_s32(b, a), a, b); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vminq_s32 #define vminq_s32(a, b) simde_vminq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_x_vminq_s64(simde_int64x2_t a, simde_int64x2_t b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_min_epi64(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return vec_min(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbslq_s64(simde_vcgtq_s64(b, a), a, b); #else simde_int64x2_private r_, a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_int64x2_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vminq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vminq_u8(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_min_epu8(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_min(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_u8x16_min(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbslq_u8(simde_vcgtq_u8(b, a), a, b); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vminq_u8 #define vminq_u8(a, b) simde_vminq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vminq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vminq_u16(a, b); #elif defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_min_epu16(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_min(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_u16x8_min(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbslq_u16(simde_vcgtq_u16(b, a), a, b); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vminq_u16 #define vminq_u16(a, b) simde_vminq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vminq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vminq_u32(a, b); #elif defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_min_epu32(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_min(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_u32x4_min(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbslq_u32(simde_vcgtq_u32(b, a), a, b); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vminq_u32 #define vminq_u32(a, b) simde_vminq_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_x_vminq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return vec_min(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vbslq_u64(simde_vcgtq_u64(b, a), a, b); #else simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a), b_ = simde_uint64x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; } return simde_uint64x2_from_private(r_); #endif } SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_MIN_H) */ simde-0.7.2/simde/arm/neon/minnm.h000066400000000000000000000154041400333146700167340ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_MINNM_H) #define SIMDE_ARM_NEON_MINNM_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vminnm_f32(simde_float32x2_t a, simde_float32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (__ARM_NEON_FP >= 6) return vminnm_f32(a, b); #else simde_float32x2_private r_, a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { #if defined(simde_math_fminf) r_.values[i] = fminf(a_.values[i], b_.values[i]); #else if (a_.values[i] < b_.values[i]) { r_.values[i] = a_.values[i]; } else if (a_.values[i] > b_.values[i]) { r_.values[i] = b_.values[i]; } else if (a_.values[i] == a_.values[i]) { r_.values[i] = a_.values[i]; } else { r_.values[i] = b_.values[i]; } #endif } return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vminnm_f32 #define vminnm_f32(a, b) simde_vminnm_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vminnm_f64(simde_float64x1_t a, simde_float64x1_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vminnm_f64(a, b); #else simde_float64x1_private r_, a_ = simde_float64x1_to_private(a), b_ = simde_float64x1_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { #if defined(simde_math_fmin) r_.values[i] = fmin(a_.values[i], b_.values[i]); #else if (a_.values[i] < b_.values[i]) { r_.values[i] = a_.values[i]; } else if (a_.values[i] > b_.values[i]) { r_.values[i] = b_.values[i]; } else if (a_.values[i] == a_.values[i]) { r_.values[i] = a_.values[i]; } else { r_.values[i] = b_.values[i]; } #endif } return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vminnm_f64 #define vminnm_f64(a, b) simde_vminnm_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vminnmq_f32(simde_float32x4_t a, simde_float32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (__ARM_NEON_FP >= 6) return vminnmq_f32(a, b); #elif defined(SIMDE_X86_SSE_NATIVE) #if !defined(SIMDE_FAST_NANS) __m128 r = _mm_min_ps(a, b); __m128 bnan = _mm_cmpunord_ps(b, b); r = _mm_andnot_ps(bnan, r); r = _mm_or_ps(r, _mm_and_ps(a, bnan)); return r; #else return _mm_min_ps(a, b); #endif #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_min(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) return wasm_f32x4_min(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_NANS) return simde_vbslq_f32(simde_vcgeq_f32(a, b), a, b); #else simde_float32x4_private r_, a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { #if defined(simde_math_fminf) r_.values[i] = fminf(a_.values[i], b_.values[i]); #else if (a_.values[i] < b_.values[i]) { r_.values[i] = a_.values[i]; } else if (a_.values[i] > b_.values[i]) { r_.values[i] = b_.values[i]; } else if (a_.values[i] == a_.values[i]) { r_.values[i] = a_.values[i]; } else { r_.values[i] = b_.values[i]; } #endif } return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vminnmq_f32 #define vminnmq_f32(a, b) simde_vminnmq_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vminnmq_f64(simde_float64x2_t a, simde_float64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vminnmq_f64(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) #if !defined(SIMDE_FAST_NANS) __m128d r = _mm_min_pd(a, b); __m128d bnan = _mm_cmpunord_pd(b, b); r = _mm_andnot_pd(bnan, r); r = _mm_or_pd(r, _mm_and_pd(a, bnan)); return r; #else return _mm_min_pd(a, b); #endif #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return vec_min(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) return wasm_f64x2_min(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_NANS) return simde_vbslq_f64(simde_vcgeq_f64(a, b), a, b); #else simde_float64x2_private r_, a_ = simde_float64x2_to_private(a), b_ = simde_float64x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { #if defined(simde_math_fmin) r_.values[i] = fmin(a_.values[i], b_.values[i]); #else if (a_.values[i] < b_.values[i]) { r_.values[i] = a_.values[i]; } else if (a_.values[i] > b_.values[i]) { r_.values[i] = b_.values[i]; } else if (a_.values[i] == a_.values[i]) { r_.values[i] = a_.values[i]; } else { r_.values[i] = b_.values[i]; } #endif } return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vminnmq_f64 #define vminnmq_f64(a, b) simde_vminnmq_f64((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_MINNM_H) */ simde-0.7.2/simde/arm/neon/minv.h000066400000000000000000000247771400333146700166040ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_MINV_H) #define SIMDE_ARM_NEON_MINV_H #include "types.h" #include HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32_t simde_vminv_f32(simde_float32x2_t a) { simde_float32_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vminv_f32(a); #else simde_float32x2_private a_ = simde_float32x2_to_private(a); r = SIMDE_MATH_INFINITYF; #if defined(SIMDE_FAST_NANS) SIMDE_VECTORIZE_REDUCTION(min:r) #else SIMDE_VECTORIZE #endif for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { #if defined(SIMDE_FAST_NANS) r = a_.values[i] < r ? a_.values[i] : r; #else r = (a_.values[i] < r) ? a_.values[i] : ((a_.values[i] >= r) ? r : ((a_.values[i] == a_.values[i]) ? r : a_.values[i])); #endif } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vminv_f32 #define vminv_f32(v) simde_vminv_f32(v) #endif SIMDE_FUNCTION_ATTRIBUTES int8_t simde_vminv_s8(simde_int8x8_t a) { int8_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vminv_s8(a); #else simde_int8x8_private a_ = simde_int8x8_to_private(a); r = INT8_MAX; SIMDE_VECTORIZE_REDUCTION(min:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r = a_.values[i] < r ? a_.values[i] : r; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vminv_s8 #define vminv_s8(v) simde_vminv_s8(v) #endif SIMDE_FUNCTION_ATTRIBUTES int16_t simde_vminv_s16(simde_int16x4_t a) { int16_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vminv_s16(a); #else simde_int16x4_private a_ = simde_int16x4_to_private(a); r = INT16_MAX; SIMDE_VECTORIZE_REDUCTION(min:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r = a_.values[i] < r ? a_.values[i] : r; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vminv_s16 #define vminv_s16(v) simde_vminv_s16(v) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_vminv_s32(simde_int32x2_t a) { int32_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vminv_s32(a); #else simde_int32x2_private a_ = simde_int32x2_to_private(a); r = INT32_MAX; SIMDE_VECTORIZE_REDUCTION(min:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r = a_.values[i] < r ? a_.values[i] : r; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vminv_s32 #define vminv_s32(v) simde_vminv_s32(v) #endif SIMDE_FUNCTION_ATTRIBUTES uint8_t simde_vminv_u8(simde_uint8x8_t a) { uint8_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vminv_u8(a); #else simde_uint8x8_private a_ = simde_uint8x8_to_private(a); r = UINT8_MAX; SIMDE_VECTORIZE_REDUCTION(min:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r = a_.values[i] < r ? a_.values[i] : r; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vminv_u8 #define vminv_u8(v) simde_vminv_u8(v) #endif SIMDE_FUNCTION_ATTRIBUTES uint16_t simde_vminv_u16(simde_uint16x4_t a) { uint16_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vminv_u16(a); #else simde_uint16x4_private a_ = simde_uint16x4_to_private(a); r = UINT16_MAX; SIMDE_VECTORIZE_REDUCTION(min:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r = a_.values[i] < r ? a_.values[i] : r; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vminv_u16 #define vminv_u16(v) simde_vminv_u16(v) #endif SIMDE_FUNCTION_ATTRIBUTES uint32_t simde_vminv_u32(simde_uint32x2_t a) { uint32_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vminv_u32(a); #else simde_uint32x2_private a_ = simde_uint32x2_to_private(a); r = UINT32_MAX; SIMDE_VECTORIZE_REDUCTION(min:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r = a_.values[i] < r ? a_.values[i] : r; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vminv_u32 #define vminv_u32(v) simde_vminv_u32(v) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32_t simde_vminvq_f32(simde_float32x4_t a) { simde_float32_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vminvq_f32(a); #else simde_float32x4_private a_ = simde_float32x4_to_private(a); r = SIMDE_MATH_INFINITYF; #if defined(SIMDE_FAST_NANS) SIMDE_VECTORIZE_REDUCTION(min:r) #else SIMDE_VECTORIZE #endif for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { #if defined(SIMDE_FAST_NANS) r = a_.values[i] < r ? a_.values[i] : r; #else r = (a_.values[i] < r) ? a_.values[i] : ((a_.values[i] >= r) ? r : ((a_.values[i] == a_.values[i]) ? r : a_.values[i])); #endif } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vminvq_f32 #define vminvq_f32(v) simde_vminvq_f32(v) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64_t simde_vminvq_f64(simde_float64x2_t a) { simde_float64_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vminvq_f64(a); #else simde_float64x2_private a_ = simde_float64x2_to_private(a); r = SIMDE_MATH_INFINITY; #if defined(SIMDE_FAST_NANS) SIMDE_VECTORIZE_REDUCTION(min:r) #else SIMDE_VECTORIZE #endif for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { #if defined(SIMDE_FAST_NANS) r = a_.values[i] < r ? a_.values[i] : r; #else r = (a_.values[i] < r) ? a_.values[i] : ((a_.values[i] >= r) ? r : ((a_.values[i] == a_.values[i]) ? r : a_.values[i])); #endif } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vminvq_f64 #define vminvq_f64(v) simde_vminvq_f64(v) #endif SIMDE_FUNCTION_ATTRIBUTES int8_t simde_vminvq_s8(simde_int8x16_t a) { int8_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vminvq_s8(a); #else simde_int8x16_private a_ = simde_int8x16_to_private(a); r = INT8_MAX; SIMDE_VECTORIZE_REDUCTION(min:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r = a_.values[i] < r ? a_.values[i] : r; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vminvq_s8 #define vminvq_s8(v) simde_vminvq_s8(v) #endif SIMDE_FUNCTION_ATTRIBUTES int16_t simde_vminvq_s16(simde_int16x8_t a) { int16_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vminvq_s16(a); #else simde_int16x8_private a_ = simde_int16x8_to_private(a); r = INT16_MAX; SIMDE_VECTORIZE_REDUCTION(min:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r = a_.values[i] < r ? a_.values[i] : r; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vminvq_s16 #define vminvq_s16(v) simde_vminvq_s16(v) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_vminvq_s32(simde_int32x4_t a) { int32_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vminvq_s32(a); #else simde_int32x4_private a_ = simde_int32x4_to_private(a); r = INT32_MAX; SIMDE_VECTORIZE_REDUCTION(min:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r = a_.values[i] < r ? a_.values[i] : r; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vminvq_s32 #define vminvq_s32(v) simde_vminvq_s32(v) #endif SIMDE_FUNCTION_ATTRIBUTES uint8_t simde_vminvq_u8(simde_uint8x16_t a) { uint8_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vminvq_u8(a); #else simde_uint8x16_private a_ = simde_uint8x16_to_private(a); r = UINT8_MAX; SIMDE_VECTORIZE_REDUCTION(min:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r = a_.values[i] < r ? a_.values[i] : r; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vminvq_u8 #define vminvq_u8(v) simde_vminvq_u8(v) #endif SIMDE_FUNCTION_ATTRIBUTES uint16_t simde_vminvq_u16(simde_uint16x8_t a) { uint16_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vminvq_u16(a); #else simde_uint16x8_private a_ = simde_uint16x8_to_private(a); r = UINT16_MAX; SIMDE_VECTORIZE_REDUCTION(min:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r = a_.values[i] < r ? a_.values[i] : r; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vminvq_u16 #define vminvq_u16(v) simde_vminvq_u16(v) #endif SIMDE_FUNCTION_ATTRIBUTES uint32_t simde_vminvq_u32(simde_uint32x4_t a) { uint32_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vminvq_u32(a); #else simde_uint32x4_private a_ = simde_uint32x4_to_private(a); r = UINT32_MAX; SIMDE_VECTORIZE_REDUCTION(min:r) for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { r = a_.values[i] < r ? a_.values[i] : r; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vminvq_u32 #define vminvq_u32(v) simde_vminvq_u32(v) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_MINV_H) */ simde-0.7.2/simde/arm/neon/mla.h000066400000000000000000000377221400333146700163760ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_MLA_H) #define SIMDE_ARM_NEON_MLA_H #include "types.h" #include "add.h" #include "mul.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vmla_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32x2_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmla_f32(a, b, c); #else simde_float32x2_private r_, a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(b), c_ = simde_float32x2_to_private(c); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = (b_.values * c_.values) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; } #endif return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmla_f32 #define vmla_f32(a, b, c) simde_vmla_f32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vmla_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64x1_t c) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmla_f64(a, b, c); #else simde_float64x1_private r_, a_ = simde_float64x1_to_private(a), b_ = simde_float64x1_to_private(b), c_ = simde_float64x1_to_private(c); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = (b_.values * c_.values) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; } #endif return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmla_f64 #define vmla_f64(a, b, c) simde_vmla_f64((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vmla_s8(simde_int8x8_t a, simde_int8x8_t b, simde_int8x8_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmla_s8(a, b, c); #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b), c_ = simde_int8x8_to_private(c); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = (b_.values * c_.values) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; } #endif return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmla_s8 #define vmla_s8(a, b, c) simde_vmla_s8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vmla_s16(simde_int16x4_t a, simde_int16x4_t b, simde_int16x4_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmla_s16(a, b, c); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b), c_ = simde_int16x4_to_private(c); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = (b_.values * c_.values) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; } #endif return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmla_s16 #define vmla_s16(a, b, c) simde_vmla_s16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vmla_s32(simde_int32x2_t a, simde_int32x2_t b, simde_int32x2_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmla_s32(a, b, c); #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b), c_ = simde_int32x2_to_private(c); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = (b_.values * c_.values) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; } #endif return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmla_s32 #define vmla_s32(a, b, c) simde_vmla_s32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vmla_u8(simde_uint8x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmla_u8(a, b, c); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b), c_ = simde_uint8x8_to_private(c); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = (b_.values * c_.values) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; } #endif return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmla_u8 #define vmla_u8(a, b, c) simde_vmla_u8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vmla_u16(simde_uint16x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmla_u16(a, b, c); #else simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a), b_ = simde_uint16x4_to_private(b), c_ = simde_uint16x4_to_private(c); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = (b_.values * c_.values) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmla_u16 #define vmla_u16(a, b, c) simde_vmla_u16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vmla_u32(simde_uint32x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmla_u32(a, b, c); #else simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a), b_ = simde_uint32x2_to_private(b), c_ = simde_uint32x2_to_private(c); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = (b_.values * c_.values) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmla_u32 #define vmla_u32(a, b, c) simde_vmla_u32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vmlaq_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32x4_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlaq_f32(a, b, c); #elif defined(SIMDE_X86_FMA_NATIVE) return _mm_fmadd_ps(b, c, a); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_madd(b, c, a); #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) return simde_vaddq_f32(simde_vmulq_f32(b, c), a); #else simde_float32x4_private r_, a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(b), c_ = simde_float32x4_to_private(c); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = (b_.values * c_.values) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; } #endif return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlaq_f32 #define vmlaq_f32(a, b, c) simde_vmlaq_f32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vmlaq_f64(simde_float64x2_t a, simde_float64x2_t b, simde_float64x2_t c) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmlaq_f64(a, b, c); #elif defined(SIMDE_X86_FMA_NATIVE) return _mm_fmadd_pd(b, c, a); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return vec_madd(b, c, a); #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) return simde_vaddq_f64(simde_vmulq_f64(b, c), a); #else simde_float64x2_private r_, a_ = simde_float64x2_to_private(a), b_ = simde_float64x2_to_private(b), c_ = simde_float64x2_to_private(c); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = (b_.values * c_.values) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; } #endif return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmlaq_f64 #define vmlaq_f64(a, b, c) simde_vmlaq_f64((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vmlaq_s8(simde_int8x16_t a, simde_int8x16_t b, simde_int8x16_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlaq_s8(a, b, c); #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) return simde_vaddq_s8(simde_vmulq_s8(b, c), a); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b), c_ = simde_int8x16_to_private(c); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = (b_.values * c_.values) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; } #endif return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlaq_s8 #define vmlaq_s8(a, b, c) simde_vmlaq_s8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vmlaq_s16(simde_int16x8_t a, simde_int16x8_t b, simde_int16x8_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlaq_s16(a, b, c); #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) return simde_vaddq_s16(simde_vmulq_s16(b, c), a); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b), c_ = simde_int16x8_to_private(c); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = (b_.values * c_.values) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; } #endif return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlaq_s16 #define vmlaq_s16(a, b, c) simde_vmlaq_s16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vmlaq_s32(simde_int32x4_t a, simde_int32x4_t b, simde_int32x4_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlaq_s32(a, b, c); #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) return simde_vaddq_s32(simde_vmulq_s32(b, c), a); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b), c_ = simde_int32x4_to_private(c); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = (b_.values * c_.values) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; } #endif return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlaq_s32 #define vmlaq_s32(a, b, c) simde_vmlaq_s32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vmlaq_u8(simde_uint8x16_t a, simde_uint8x16_t b, simde_uint8x16_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlaq_u8(a, b, c); #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) return simde_vaddq_u8(simde_vmulq_u8(b, c), a); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b), c_ = simde_uint8x16_to_private(c); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = (b_.values * c_.values) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlaq_u8 #define vmlaq_u8(a, b, c) simde_vmlaq_u8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vmlaq_u16(simde_uint16x8_t a, simde_uint16x8_t b, simde_uint16x8_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlaq_u16(a, b, c); #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) return simde_vaddq_u16(simde_vmulq_u16(b, c), a); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b), c_ = simde_uint16x8_to_private(c); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = (b_.values * c_.values) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlaq_u16 #define vmlaq_u16(a, b, c) simde_vmlaq_u16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vmlaq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlaq_u32(a, b, c); #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) return simde_vaddq_u32(simde_vmulq_u32(b, c), a); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b), c_ = simde_uint32x4_to_private(c); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = (b_.values * c_.values) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlaq_u32 #define vmlaq_u32(a, b, c) simde_vmlaq_u32((a), (b), (c)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_MLA_H) */ simde-0.7.2/simde/arm/neon/mla_n.h000066400000000000000000000240141400333146700167010ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_MLA_N_H) #define SIMDE_ARM_NEON_MLA_N_H #include "types.h" #include "add.h" #include "mul.h" #include "mul_n.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vmla_n_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32 c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmla_n_f32(a, b, c); #else simde_float32x2_private r_, a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) r_.values = (b_.values * c) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c) + a_.values[i]; } #endif return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmla_n_f32 #define vmla_n_f32(a, b, c) simde_vmla_n_f32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vmla_n_s16(simde_int16x4_t a, simde_int16x4_t b, int16_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmla_n_s16(a, b, c); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) r_.values = (b_.values * c) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c) + a_.values[i]; } #endif return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmla_n_s16 #define vmla_n_s16(a, b, c) simde_vmla_n_s16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vmla_n_s32(simde_int32x2_t a, simde_int32x2_t b, int32_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmla_n_s32(a, b, c); #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = (b_.values * c) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c) + a_.values[i]; } #endif return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmla_n_s32 #define vmla_n_s32(a, b, c) simde_vmla_n_s32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vmla_n_u16(simde_uint16x4_t a, simde_uint16x4_t b, uint16_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmla_n_u16(a, b, c); #else simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a), b_ = simde_uint16x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = (b_.values * c) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c) + a_.values[i]; } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmla_n_u16 #define vmla_n_u16(a, b, c) simde_vmla_n_u16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vmla_n_u32(simde_uint32x2_t a, simde_uint32x2_t b, uint32_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmla_n_u32(a, b, c); #else simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a), b_ = simde_uint32x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = (b_.values * c) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c) + a_.values[i]; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmla_n_u32 #define vmla_n_u32(a, b, c) simde_vmla_n_u32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vmlaq_n_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32 c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlaq_n_f32(a, b, c); #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) return simde_vaddq_f32(simde_vmulq_n_f32(b, c), a); #else simde_float32x4_private r_, a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) r_.values = (b_.values * c) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c) + a_.values[i]; } #endif return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlaq_n_f32 #define vmlaq_n_f32(a, b, c) simde_vmlaq_n_f32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vmlaq_n_s16(simde_int16x8_t a, simde_int16x8_t b, int16_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlaq_n_s16(a, b, c); #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) return simde_vaddq_s16(simde_vmulq_n_s16(b, c), a); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) r_.values = (b_.values * c) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c) + a_.values[i]; } #endif return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlaq_n_s16 #define vmlaq_n_s16(a, b, c) simde_vmlaq_n_s16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vmlaq_n_s32(simde_int32x4_t a, simde_int32x4_t b, int32_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlaq_n_s32(a, b, c); #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) return simde_vaddq_s32(simde_vmulq_n_s32(b, c), a); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = (b_.values * c) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c) + a_.values[i]; } #endif return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlaq_n_s32 #define vmlaq_n_s32(a, b, c) simde_vmlaq_n_s32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vmlaq_n_u16(simde_uint16x8_t a, simde_uint16x8_t b, uint16_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlaq_n_u16(a, b, c); #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) return simde_vaddq_u16(simde_vmulq_n_u16(b, c), a); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = (b_.values * c) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c) + a_.values[i]; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlaq_n_u16 #define vmlaq_n_u16(a, b, c) simde_vmlaq_n_u16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vmlaq_n_u32(simde_uint32x4_t a, simde_uint32x4_t b, uint32_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlaq_n_u32(a, b, c); #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) return simde_vaddq_u32(simde_vmulq_n_u32(b, c), a); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = (b_.values * c) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c) + a_.values[i]; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlaq_n_u32 #define vmlaq_n_u32(a, b, c) simde_vmlaq_n_u32((a), (b), (c)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_MLA_N_H) */ simde-0.7.2/simde/arm/neon/mlal.h000066400000000000000000000116411400333146700165420ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_MLAL_H) #define SIMDE_ARM_NEON_MLAL_H #include "movl.h" #include "mla.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vmlal_s8(simde_int16x8_t a, simde_int8x8_t b, simde_int8x8_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlal_s8(a, b, c); #else return simde_vmlaq_s16(a, simde_vmovl_s8(b), simde_vmovl_s8(c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlal_s8 #define vmlal_s8(a, b, c) simde_vmlal_s8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vmlal_s16(simde_int32x4_t a, simde_int16x4_t b, simde_int16x4_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlal_s16(a, b, c); #else return simde_vmlaq_s32(a, simde_vmovl_s16(b), simde_vmovl_s16(c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlal_s16 #define vmlal_s16(a, b, c) simde_vmlal_s16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vmlal_s32(simde_int64x2_t a, simde_int32x2_t b, simde_int32x2_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlal_s32(a, b, c); #else simde_int64x2_private r_, a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(simde_vmovl_s32(b)), c_ = simde_int64x2_to_private(simde_vmovl_s32(c)); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = (b_.values * c_.values) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; } #endif return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlal_s32 #define vmlal_s32(a, b, c) simde_vmlal_s32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vmlal_u8(simde_uint16x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlal_u8(a, b, c); #else return simde_vmlaq_u16(a, simde_vmovl_u8(b), simde_vmovl_u8(c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlal_u8 #define vmlal_u8(a, b, c) simde_vmlal_u8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vmlal_u16(simde_uint32x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlal_u16(a, b, c); #else return simde_vmlaq_u32(a, simde_vmovl_u16(b), simde_vmovl_u16(c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlal_u16 #define vmlal_u16(a, b, c) simde_vmlal_u16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vmlal_u32(simde_uint64x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlal_u32(a, b, c); #else simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a), b_ = simde_uint64x2_to_private(simde_vmovl_u32(b)), c_ = simde_uint64x2_to_private(simde_vmovl_u32(c)); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = (b_.values * c_.values) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlal_u32 #define vmlal_u32(a, b, c) simde_vmlal_u32((a), (b), (c)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_MLAL_H) */ simde-0.7.2/simde/arm/neon/mlal_high.h000066400000000000000000000122131400333146700175350ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_MLAL_HIGH_H) #define SIMDE_ARM_NEON_MLAL_HIGH_H #include "movl_high.h" #include "mla.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vmlal_high_s8(simde_int16x8_t a, simde_int8x16_t b, simde_int8x16_t c) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmlal_high_s8(a, b, c); #else return simde_vmlaq_s16(a, simde_vmovl_high_s8(b), simde_vmovl_high_s8(c)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmlal_high_s8 #define vmlal_high_s8(a, b, c) simde_vmlal_high_s8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vmlal_high_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t c) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmlal_high_s16(a, b, c); #else return simde_vmlaq_s32(a, simde_vmovl_high_s16(b), simde_vmovl_high_s16(c)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmlal_high_s16 #define vmlal_high_s16(a, b, c) simde_vmlal_high_s16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vmlal_high_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t c) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmlal_high_s32(a, b, c); #else simde_int64x2_private r_, a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(simde_vmovl_high_s32(b)), c_ = simde_int64x2_to_private(simde_vmovl_high_s32(c)); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = (b_.values * c_.values) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; } #endif return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmlal_high_s32 #define vmlal_high_s32(a, b, c) simde_vmlal_high_s32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vmlal_high_u8(simde_uint16x8_t a, simde_uint8x16_t b, simde_uint8x16_t c) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmlal_high_u8(a, b, c); #else return simde_vmlaq_u16(a, simde_vmovl_high_u8(b), simde_vmovl_high_u8(c)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmlal_high_u8 #define vmlal_high_u8(a, b, c) simde_vmlal_high_u8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vmlal_high_u16(simde_uint32x4_t a, simde_uint16x8_t b, simde_uint16x8_t c) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmlal_high_u16(a, b, c); #else return simde_vmlaq_u32(a, simde_vmovl_high_u16(b), simde_vmovl_high_u16(c)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmlal_high_u16 #define vmlal_high_u16(a, b, c) simde_vmlal_high_u16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vmlal_high_u32(simde_uint64x2_t a, simde_uint32x4_t b, simde_uint32x4_t c) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmlal_high_u32(a, b, c); #else simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a), b_ = simde_uint64x2_to_private(simde_vmovl_high_u32(b)), c_ = simde_uint64x2_to_private(simde_vmovl_high_u32(c)); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = (b_.values * c_.values) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmlal_high_u32 #define vmlal_high_u32(a, b, c) simde_vmlal_high_u32((a), (b), (c)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_MLAL_HIGH_H) */ simde-0.7.2/simde/arm/neon/mlal_n.h000066400000000000000000000100651400333146700170560ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_MLAL_N_H) #define SIMDE_ARM_NEON_MLAL_N_H #include "movl.h" #include "dup_n.h" #include "mla.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vmlal_n_s16(simde_int32x4_t a, simde_int16x4_t b, int16_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlal_n_s16(a, b, c); #else return simde_vmlaq_s32(a, simde_vmovl_s16(b), simde_vdupq_n_s32(c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlal_n_s16 #define vmlal_n_s16(a, b, c) simde_vmlal_n_s16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vmlal_n_s32(simde_int64x2_t a, simde_int32x2_t b, int32_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlal_n_s32(a, b, c); #else simde_int64x2_private r_, a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(simde_vmovl_s32(b)), c_ = simde_int64x2_to_private(simde_vdupq_n_s64(c)); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = (b_.values * c_.values) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; } #endif return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlal_n_s32 #define vmlal_n_s32(a, b, c) simde_vmlal_n_s32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vmlal_n_u16(simde_uint32x4_t a, simde_uint16x4_t b, uint16_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlal_n_u16(a, b, c); #else return simde_vmlaq_u32(a, simde_vmovl_u16(b), simde_vdupq_n_u32(c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlal_n_u16 #define vmlal_n_u16(a, b, c) simde_vmlal_n_u16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vmlal_n_u32(simde_uint64x2_t a, simde_uint32x2_t b, uint32_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlal_n_u32(a, b, c); #else simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a), b_ = simde_uint64x2_to_private(simde_vmovl_u32(b)), c_ = simde_uint64x2_to_private(simde_vdupq_n_u64(c)); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = (b_.values * c_.values) + a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlal_n_u32 #define vmlal_n_u32(a, b, c) simde_vmlal_n_u32((a), (b), (c)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_MLAL_N_H) */ simde-0.7.2/simde/arm/neon/mls.h000066400000000000000000000173621400333146700164160ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_MLS_H) #define SIMDE_ARM_NEON_MLS_H #include "mul.h" #include "sub.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vmls_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32x2_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmls_f32(a, b, c); #else return simde_vsub_f32(a, simde_vmul_f32(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmls_f32 #define vmls_f32(a, b, c) simde_vmls_f32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vmls_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64x1_t c) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmls_f64(a, b, c); #else return simde_vsub_f64(a, simde_vmul_f64(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmls_f64 #define vmls_f64(a, b, c) simde_vmls_f64((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vmls_s8(simde_int8x8_t a, simde_int8x8_t b, simde_int8x8_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmls_s8(a, b, c); #else return simde_vsub_s8(a, simde_vmul_s8(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmls_s8 #define vmls_s8(a, b, c) simde_vmls_s8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vmls_s16(simde_int16x4_t a, simde_int16x4_t b, simde_int16x4_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmls_s16(a, b, c); #else return simde_vsub_s16(a, simde_vmul_s16(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmls_s16 #define vmls_s16(a, b, c) simde_vmls_s16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vmls_s32(simde_int32x2_t a, simde_int32x2_t b, simde_int32x2_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmls_s32(a, b, c); #else return simde_vsub_s32(a, simde_vmul_s32(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmls_s32 #define vmls_s32(a, b, c) simde_vmls_s32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vmls_u8(simde_uint8x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmls_u8(a, b, c); #else return simde_vsub_u8(a, simde_vmul_u8(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmls_u8 #define vmls_u8(a, b, c) simde_vmls_u8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vmls_u16(simde_uint16x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmls_u16(a, b, c); #else return simde_vsub_u16(a, simde_vmul_u16(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmls_u16 #define vmls_u16(a, b, c) simde_vmls_u16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vmls_u32(simde_uint32x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmls_u32(a, b, c); #else return simde_vsub_u32(a, simde_vmul_u32(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmls_u32 #define vmls_u32(a, b, c) simde_vmls_u32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vmlsq_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32x4_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlsq_f32(a, b, c); #else return simde_vsubq_f32(a, simde_vmulq_f32(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlsq_f32 #define vmlsq_f32(a, b, c) simde_vmlsq_f32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vmlsq_f64(simde_float64x2_t a, simde_float64x2_t b, simde_float64x2_t c) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmlsq_f64(a, b, c); #else return simde_vsubq_f64(a, simde_vmulq_f64(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlsq_f64 #define vmlsq_f64(a, b, c) simde_vmlsq_f64((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vmlsq_s8(simde_int8x16_t a, simde_int8x16_t b, simde_int8x16_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlsq_s8(a, b, c); #else return simde_vsubq_s8(a, simde_vmulq_s8(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlsq_s8 #define vmlsq_s8(a, b, c) simde_vmlsq_s8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vmlsq_s16(simde_int16x8_t a, simde_int16x8_t b, simde_int16x8_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlsq_s16(a, b, c); #else return simde_vsubq_s16(a, simde_vmulq_s16(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlsq_s16 #define vmlsq_s16(a, b, c) simde_vmlsq_s16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vmlsq_s32(simde_int32x4_t a, simde_int32x4_t b, simde_int32x4_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlsq_s32(a, b, c); #else return simde_vsubq_s32(a, simde_vmulq_s32(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlsq_s32 #define vmlsq_s32(a, b, c) simde_vmlsq_s32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vmlsq_u8(simde_uint8x16_t a, simde_uint8x16_t b, simde_uint8x16_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlsq_u8(a, b, c); #else return simde_vsubq_u8(a, simde_vmulq_u8(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlsq_u8 #define vmlsq_u8(a, b, c) simde_vmlsq_u8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vmlsq_u16(simde_uint16x8_t a, simde_uint16x8_t b, simde_uint16x8_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlsq_u16(a, b, c); #else return simde_vsubq_u16(a, simde_vmulq_u16(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlsq_u16 #define vmlsq_u16(a, b, c) simde_vmlsq_u16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vmlsq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlsq_u32(a, b, c); #else return simde_vsubq_u32(a, simde_vmulq_u32(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlsq_u32 #define vmlsq_u32(a, b, c) simde_vmlsq_u32((a), (b), (c)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_MLS_H) */ simde-0.7.2/simde/arm/neon/mlsl.h000066400000000000000000000074601400333146700165700ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_MLSL_H) #define SIMDE_ARM_NEON_MLSL_H #include "mull.h" #include "sub.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vmlsl_s8(simde_int16x8_t a, simde_int8x8_t b, simde_int8x8_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlsl_s8(a, b, c); #else return simde_vsubq_s16(a, simde_vmull_s8(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlsl_s8 #define vmlsl_s8(a, b, c) simde_vmlsl_s8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vmlsl_s16(simde_int32x4_t a, simde_int16x4_t b, simde_int16x4_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlsl_s16(a, b, c); #else return simde_vsubq_s32(a, simde_vmull_s16(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlsl_s16 #define vmlsl_s16(a, b, c) simde_vmlsl_s16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vmlsl_s32(simde_int64x2_t a, simde_int32x2_t b, simde_int32x2_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlsl_s32(a, b, c); #else return simde_vsubq_s64(a, simde_vmull_s32(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlsl_s32 #define vmlsl_s32(a, b, c) simde_vmlsl_s32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vmlsl_u8(simde_uint16x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlsl_u8(a, b, c); #else return simde_vsubq_u16(a, simde_vmull_u8(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlsl_u8 #define vmlsl_u8(a, b, c) simde_vmlsl_u8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vmlsl_u16(simde_uint32x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlsl_u16(a, b, c); #else return simde_vsubq_u32(a, simde_vmull_u16(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlsl_u16 #define vmlsl_u16(a, b, c) simde_vmlsl_u16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vmlsl_u32(simde_uint64x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlsl_u32(a, b, c); #else return simde_vsubq_u64(a, simde_vmull_u32(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlsl_u32 #define vmlsl_u32(a, b, c) simde_vmlsl_u32((a), (b), (c)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_MLSL_H) */ simde-0.7.2/simde/arm/neon/mlsl_high.h000066400000000000000000000077741400333146700175770ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_MLSL_HIGH_H) #define SIMDE_ARM_NEON_MLSL_HIGH_H #include "mull_high.h" #include "sub.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vmlsl_high_s8(simde_int16x8_t a, simde_int8x16_t b, simde_int8x16_t c) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmlsl_high_s8(a, b, c); #else return simde_vsubq_s16(a, simde_vmull_high_s8(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmlsl_high_s8 #define vmlsl_high_s8(a, b, c) simde_vmlsl_high_s8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vmlsl_high_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t c) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmlsl_high_s16(a, b, c); #else return simde_vsubq_s32(a, simde_vmull_high_s16(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmlsl_high_s16 #define vmlsl_high_s16(a, b, c) simde_vmlsl_high_s16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vmlsl_high_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t c) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmlsl_high_s32(a, b, c); #else return simde_vsubq_s64(a, simde_vmull_high_s32(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmlsl_high_s32 #define vmlsl_high_s32(a, b, c) simde_vmlsl_high_s32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vmlsl_high_u8(simde_uint16x8_t a, simde_uint8x16_t b, simde_uint8x16_t c) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmlsl_high_u8(a, b, c); #else return simde_vsubq_u16(a, simde_vmull_high_u8(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmlsl_high_u8 #define vmlsl_high_u8(a, b, c) simde_vmlsl_high_u8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vmlsl_high_u16(simde_uint32x4_t a, simde_uint16x8_t b, simde_uint16x8_t c) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmlsl_high_u16(a, b, c); #else return simde_vsubq_u32(a, simde_vmull_high_u16(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmlsl_high_u16 #define vmlsl_high_u16(a, b, c) simde_vmlsl_high_u16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vmlsl_high_u32(simde_uint64x2_t a, simde_uint32x4_t b, simde_uint32x4_t c) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmlsl_high_u32(a, b, c); #else return simde_vsubq_u64(a, simde_vmull_high_u32(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmlsl_high_u32 #define vmlsl_high_u32(a, b, c) simde_vmlsl_high_u32((a), (b), (c)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_MLSL_HIGH_H) */ simde-0.7.2/simde/arm/neon/mlsl_n.h000066400000000000000000000060461400333146700171040ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_MLSL_N_H) #define SIMDE_ARM_NEON_MLSL_N_H #include "mull_n.h" #include "sub.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vmlsl_n_s16(simde_int32x4_t a, simde_int16x4_t b, int16_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlsl_n_s16(a, b, c); #else return simde_vsubq_s32(a, simde_vmull_n_s16(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlsl_n_s16 #define vmlsl_n_s16(a, b, c) simde_vmlsl_n_s16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vmlsl_n_s32(simde_int64x2_t a, simde_int32x2_t b, int32_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlsl_n_s32(a, b, c); #else return simde_vsubq_s64(a, simde_vmull_n_s32(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlsl_n_s32 #define vmlsl_n_s32(a, b, c) simde_vmlsl_n_s32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vmlsl_n_u16(simde_uint32x4_t a, simde_uint16x4_t b, uint16_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlsl_n_u16(a, b, c); #else return simde_vsubq_u32(a, simde_vmull_n_u16(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlsl_n_u16 #define vmlsl_n_u16(a, b, c) simde_vmlsl_n_u16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vmlsl_n_u32(simde_uint64x2_t a, simde_uint32x2_t b, uint32_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmlsl_n_u32(a, b, c); #else return simde_vsubq_u64(a, simde_vmull_n_u32(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmlsl_n_u32 #define vmlsl_n_u32(a, b, c) simde_vmlsl_n_u32((a), (b), (c)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_MLSL_N_H) */ simde-0.7.2/simde/arm/neon/movl.h000066400000000000000000000142361400333146700165750ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_MOVL_H) #define SIMDE_ARM_NEON_MOVL_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vmovl_s8(simde_int8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmovl_s8(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i16x8_load_8x8(&a); #else simde_int16x8_private r_; simde_int8x8_private a_ = simde_int8x8_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.values, a_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int16_t, a_.values[i]); } #endif return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmovl_s8 #define vmovl_s8(a) simde_vmovl_s8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vmovl_s16(simde_int16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmovl_s16(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i32x4_load_16x4(&a); #else simde_int32x4_private r_; simde_int16x4_private a_ = simde_int16x4_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.values, a_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int32_t, a_.values[i]); } #endif return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmovl_s16 #define vmovl_s16(a) simde_vmovl_s16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vmovl_s32(simde_int32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmovl_s32(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i64x2_load_32x2(&a); #else simde_int64x2_private r_; simde_int32x2_private a_ = simde_int32x2_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.values, a_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int64_t, a_.values[i]); } #endif return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmovl_s32 #define vmovl_s32(a) simde_vmovl_s32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vmovl_u8(simde_uint8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmovl_u8(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_u16x8_load_8x8(&a); #else simde_uint16x8_private r_; simde_uint8x8_private a_ = simde_uint8x8_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.values, a_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, a_.values[i]); } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmovl_u8 #define vmovl_u8(a) simde_vmovl_u8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vmovl_u16(simde_uint16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmovl_u16(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_u32x4_load_16x4(&a); #else simde_uint32x4_private r_; simde_uint16x4_private a_ = simde_uint16x4_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.values, a_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i]); } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmovl_u16 #define vmovl_u16(a) simde_vmovl_u16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vmovl_u32(simde_uint32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmovl_u32(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_u64x2_load_32x2(&a); #else simde_uint64x2_private r_; simde_uint32x2_private a_ = simde_uint32x2_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.values, a_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint64_t, a_.values[i]); } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmovl_u32 #define vmovl_u32(a) simde_vmovl_u32((a)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_MOVL_H) */ simde-0.7.2/simde/arm/neon/movl_high.h000066400000000000000000000073101400333146700175670ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_MOVL_HIGH_H) #define SIMDE_ARM_NEON_MOVL_HIGH_H #include "types.h" #include "movl.h" #include "get_high.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vmovl_high_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmovl_high_s8(a); #else return simde_vmovl_s8(simde_vget_high_s8(a)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmovl_high_s8 #define vmovl_high_s8(a) simde_vmovl_high_s8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vmovl_high_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmovl_high_s16(a); #else return simde_vmovl_s16(simde_vget_high_s16(a)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmovl_high_s16 #define vmovl_high_s16(a) simde_vmovl_high_s16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vmovl_high_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmovl_high_s32(a); #else return simde_vmovl_s32(simde_vget_high_s32(a)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmovl_high_s32 #define vmovl_high_s32(a) simde_vmovl_high_s32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vmovl_high_u8(simde_uint8x16_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmovl_high_u8(a); #else return simde_vmovl_u8(simde_vget_high_u8(a)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmovl_high_u8 #define vmovl_high_u8(a) simde_vmovl_high_u8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vmovl_high_u16(simde_uint16x8_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmovl_high_u16(a); #else return simde_vmovl_u16(simde_vget_high_u16(a)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmovl_high_u16 #define vmovl_high_u16(a) simde_vmovl_high_u16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vmovl_high_u32(simde_uint32x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmovl_high_u32(a); #else return simde_vmovl_u32(simde_vget_high_u32(a)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmovl_high_u32 #define vmovl_high_u32(a) simde_vmovl_high_u32((a)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_MOVL_HIGH_H) */ simde-0.7.2/simde/arm/neon/movn.h000066400000000000000000000131651400333146700165770ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_MOVN_H) #define SIMDE_ARM_NEON_MOVN_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vmovn_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmovn_s16(a); #else simde_int8x8_private r_; simde_int16x8_private a_ = simde_int16x8_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.values, a_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int8_t, a_.values[i]); } #endif return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmovn_s16 #define vmovn_s16(a) simde_vmovn_s16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vmovn_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmovn_s32(a); #else simde_int16x4_private r_; simde_int32x4_private a_ = simde_int32x4_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.values, a_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int16_t, a_.values[i]); } #endif return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmovn_s32 #define vmovn_s32(a) simde_vmovn_s32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vmovn_s64(simde_int64x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmovn_s64(a); #else simde_int32x2_private r_; simde_int64x2_private a_ = simde_int64x2_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.values, a_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int32_t, a_.values[i]); } #endif return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmovn_s64 #define vmovn_s64(a) simde_vmovn_s64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vmovn_u16(simde_uint16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmovn_u16(a); #else simde_uint8x8_private r_; simde_uint16x8_private a_ = simde_uint16x8_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.values, a_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, a_.values[i]); } #endif return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmovn_u16 #define vmovn_u16(a) simde_vmovn_u16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vmovn_u32(simde_uint32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmovn_u32(a); #else simde_uint16x4_private r_; simde_uint32x4_private a_ = simde_uint32x4_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.values, a_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, a_.values[i]); } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmovn_u32 #define vmovn_u32(a) simde_vmovn_u32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vmovn_u64(simde_uint64x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmovn_u64(a); #else simde_uint32x2_private r_; simde_uint64x2_private a_ = simde_uint64x2_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.values, a_.values); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i]); } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmovn_u64 #define vmovn_u64(a) simde_vmovn_u64((a)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_MOVN_H) */ simde-0.7.2/simde/arm/neon/movn_high.h000066400000000000000000000075011400333146700175730ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_MOVN_HIGH_H) #define SIMDE_ARM_NEON_MOVN_HIGH_H #include "types.h" #include "movn.h" #include "combine.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vmovn_high_s16(simde_int8x8_t r, simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmovn_high_s16(r, a); #else return simde_vcombine_s8(r, simde_vmovn_s16(a)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmovn_high_s16 #define vmovn_high_s16(r, a) simde_vmovn_high_s16((r), (a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vmovn_high_s32(simde_int16x4_t r, simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmovn_high_s32(r, a); #else return simde_vcombine_s16(r, simde_vmovn_s32(a)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmovn_high_s32 #define vmovn_high_s32(r, a) simde_vmovn_high_s32((r), (a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vmovn_high_s64(simde_int32x2_t r, simde_int64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmovn_high_s64(r, a); #else return simde_vcombine_s32(r, simde_vmovn_s64(a)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmovn_high_s64 #define vmovn_high_s64(r, a) simde_vmovn_high_s64((r), (a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vmovn_high_u16(simde_uint8x8_t r, simde_uint16x8_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmovn_high_u16(r, a); #else return simde_vcombine_u8(r, simde_vmovn_u16(a)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmovn_high_u16 #define vmovn_high_u16(r, a) simde_vmovn_high_u16((r), (a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vmovn_high_u32(simde_uint16x4_t r, simde_uint32x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmovn_high_u32(r, a); #else return simde_vcombine_u16(r, simde_vmovn_u32(a)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmovn_high_u32 #define vmovn_high_u32(r, a) simde_vmovn_high_u32((r), (a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vmovn_high_u64(simde_uint32x2_t r, simde_uint64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmovn_high_u64(r, a); #else return simde_vcombine_u32(r, simde_vmovn_u64(a)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmovn_high_u64 #define vmovn_high_u64(r, a) simde_vmovn_high_u64((r), (a)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_MOVN_HIGH_H) */ simde-0.7.2/simde/arm/neon/mul.h000066400000000000000000000402261400333146700164130ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_MUL_H) #define SIMDE_ARM_NEON_MUL_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vmul_f32(simde_float32x2_t a, simde_float32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmul_f32(a, b); #else simde_float32x2_private r_, a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values * b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[i]; } #endif return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmul_f32 #define vmul_f32(a, b) simde_vmul_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vmul_f64(simde_float64x1_t a, simde_float64x1_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmul_f64(a, b); #else simde_float64x1_private r_, a_ = simde_float64x1_to_private(a), b_ = simde_float64x1_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values * b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[i]; } #endif return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmul_f64 #define vmul_f64(a, b) simde_vmul_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vmul_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmul_s8(a, b); #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values * b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[i]; } #endif return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmul_s8 #define vmul_s8(a, b) simde_vmul_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vmul_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmul_s16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _m_pmullw(a, b); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values * b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[i]; } #endif return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmul_s16 #define vmul_s16(a, b) simde_vmul_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vmul_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmul_s32(a, b); #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values * b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[i]; } #endif return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmul_s32 #define vmul_s32(a, b) simde_vmul_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_x_vmul_s64(simde_int64x1_t a, simde_int64x1_t b) { simde_int64x1_private r_, a_ = simde_int64x1_to_private(a), b_ = simde_int64x1_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values * b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[i]; } #endif return simde_int64x1_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vmul_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmul_u8(a, b); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values * b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[i]; } #endif return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmul_u8 #define vmul_u8(a, b) simde_vmul_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vmul_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmul_u16(a, b); #else simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a), b_ = simde_uint16x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values * b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[i]; } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmul_u16 #define vmul_u16(a, b) simde_vmul_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vmul_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmul_u32(a, b); #else simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a), b_ = simde_uint32x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values * b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[i]; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmul_u32 #define vmul_u32(a, b) simde_vmul_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_x_vmul_u64(simde_uint64x1_t a, simde_uint64x1_t b) { simde_uint64x1_private r_, a_ = simde_uint64x1_to_private(a), b_ = simde_uint64x1_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values * b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[i]; } #endif return simde_uint64x1_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vmulq_f32(simde_float32x4_t a, simde_float32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmulq_f32(a, b); #elif defined(SIMDE_X86_SSE_NATIVE) return _mm_mul_ps(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f32x4_mul(a, b); #else simde_float32x4_private r_, a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values * b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[i]; } #endif return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmulq_f32 #define vmulq_f32(a, b) simde_vmulq_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vmulq_f64(simde_float64x2_t a, simde_float64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmulq_f64(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_mul_pd(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_mul(a, b); #else simde_float64x2_private r_, a_ = simde_float64x2_to_private(a), b_ = simde_float64x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values * b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[i]; } #endif return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmulq_f64 #define vmulq_f64(a, b) simde_vmulq_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vmulq_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmulq_s8(a, b); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values * b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[i]; } #endif return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmulq_s8 #define vmulq_s8(a, b) simde_vmulq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vmulq_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmulq_s16(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_mullo_epi16(a, b); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values * b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[i]; } #endif return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmulq_s16 #define vmulq_s16(a, b) simde_vmulq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vmulq_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmulq_s32(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i32x4_mul(a, b); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values * b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[i]; } #endif return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmulq_s32 #define vmulq_s32(a, b) simde_vmulq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_x_vmulq_s64(simde_int64x2_t a, simde_int64x2_t b) { #if defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i64x2_mul(a, b); #else simde_int64x2_private r_, a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values * b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[i]; } #endif return simde_int64x2_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vmulq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmulq_u8(a, b); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values * b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[i]; } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmulq_u8 #define vmulq_u8(a, b) simde_vmulq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vmulq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmulq_u16(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i16x8_mul(a, b); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values * b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[i]; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmulq_u16 #define vmulq_u16(a, b) simde_vmulq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vmulq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmulq_u32(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i32x4_mul(a, b); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values * b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[i]; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmulq_u32 #define vmulq_u32(a, b) simde_vmulq_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_x_vmulq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { #if defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i64x2_mul(a, b); #else simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a), b_ = simde_uint64x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values * b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[i]; } #endif return simde_uint64x2_from_private(r_); #endif } SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_MUL_H) */ simde-0.7.2/simde/arm/neon/mul_lane.h000066400000000000000000000357331400333146700174210ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_MUL_LANE_H) #define SIMDE_ARM_NEON_MUL_LANE_H #include "types.h" #include "mul.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vmul_lane_f32(simde_float32x2_t a, simde_float32x2_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float32x2_private r_, a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[lane]; } return simde_float32x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vmul_lane_f32(a, b, lane) vmul_lane_f32((a), (b), (lane)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmul_lane_f32 #define vmul_lane_f32(a, b, lane) simde_vmul_lane_f32((a), (b), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vmul_lane_f64(simde_float64x1_t a, simde_float64x1_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { simde_float64x1_private r_, a_ = simde_float64x1_to_private(a), b_ = simde_float64x1_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[lane]; } return simde_float64x1_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vmul_lane_f64(a, b, lane) vmul_lane_f64((a), (b), (lane)) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmul_lane_f64 #define vmul_lane_f64(a, b, lane) simde_vmul_lane_f64((a), (b), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vmul_lane_s16(simde_int16x4_t a, simde_int16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { simde_int16x4_private r_, a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[lane]; } return simde_int16x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vmul_lane_s16(a, b, lane) vmul_lane_s16((a), (b), (lane)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmul_lane_s16 #define vmul_lane_s16(a, b, lane) simde_vmul_lane_s16((a), (b), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vmul_lane_s32(simde_int32x2_t a, simde_int32x2_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_int32x2_private r_, a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[lane]; } return simde_int32x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vmul_lane_s32(a, b, lane) vmul_lane_s32((a), (b), (lane)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmul_lane_s32 #define vmul_lane_s32(a, b, lane) simde_vmul_lane_s32((a), (b), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vmul_lane_u16(simde_uint16x4_t a, simde_uint16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a), b_ = simde_uint16x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[lane]; } return simde_uint16x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vmul_lane_u16(a, b, lane) vmul_lane_u16((a), (b), (lane)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmul_lane_u16 #define vmul_lane_u16(a, b, lane) simde_vmul_lane_u16((a), (b), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vmul_lane_u32(simde_uint32x2_t a, simde_uint32x2_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a), b_ = simde_uint32x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[lane]; } return simde_uint32x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vmul_lane_u32(a, b, lane) vmul_lane_u32((a), (b), (lane)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmul_lane_u32 #define vmul_lane_u32(a, b, lane) simde_vmul_lane_u32((a), (b), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vmulq_lane_f32(simde_float32x4_t a, simde_float32x2_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float32x4_private r_, a_ = simde_float32x4_to_private(a); simde_float32x2_private b_ = simde_float32x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[lane]; } return simde_float32x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vmulq_lane_f32(a, b, lane) vmulq_lane_f32((a), (b), (lane)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmulq_lane_f32 #define vmulq_lane_f32(a, b, lane) simde_vmulq_lane_f32((a), (b), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vmulq_lane_f64(simde_float64x2_t a, simde_float64x1_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { simde_float64x2_private r_, a_ = simde_float64x2_to_private(a); simde_float64x1_private b_ = simde_float64x1_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[lane]; } return simde_float64x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vmulq_lane_f64(a, b, lane) vmulq_lane_f64((a), (b), (lane)) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmulq_lane_f64 #define vmulq_lane_f64(a, b, lane) simde_vmulq_lane_f64((a), (b), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vmulq_lane_s16(simde_int16x8_t a, simde_int16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { simde_int16x8_private r_, a_ = simde_int16x8_to_private(a); simde_int16x4_private b_ = simde_int16x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[lane]; } return simde_int16x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vmulq_lane_s16(a, b, lane) vmulq_lane_s16((a), (b), (lane)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmulq_lane_s16 #define vmulq_lane_s16(a, b, lane) simde_vmulq_lane_s16((a), (b), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vmulq_lane_s32(simde_int32x4_t a, simde_int32x2_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_int32x4_private r_, a_ = simde_int32x4_to_private(a); simde_int32x2_private b_ = simde_int32x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[lane]; } return simde_int32x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vmulq_lane_s32(a, b, lane) vmulq_lane_s32((a), (b), (lane)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmulq_lane_s32 #define vmulq_lane_s32(a, b, lane) simde_vmulq_lane_s32((a), (b), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vmulq_lane_u16(simde_uint16x8_t a, simde_uint16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a); simde_uint16x4_private b_ = simde_uint16x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[lane]; } return simde_uint16x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vmulq_lane_u16(a, b, lane) vmulq_lane_u16((a), (b), (lane)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmulq_lane_u16 #define vmulq_lane_u16(a, b, lane) simde_vmulq_lane_u16((a), (b), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vmulq_lane_u32(simde_uint32x4_t a, simde_uint32x2_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a); simde_uint32x2_private b_ = simde_uint32x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[lane]; } return simde_uint32x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vmulq_lane_u32(a, b, lane) vmulq_lane_u32((a), (b), (lane)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmulq_lane_u32 #define vmulq_lane_u32(a, b, lane) simde_vmulq_lane_u32((a), (b), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vmulq_laneq_f32(simde_float32x4_t a, simde_float32x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { simde_float32x4_private r_, a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[lane]; } return simde_float32x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vmulq_laneq_f32(a, b, lane) vmulq_laneq_f32((a), (b), (lane)) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmulq_laneq_f32 #define vmulq_laneq_f32(a, b, lane) simde_vmulq_laneq_f32((a), (b), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vmulq_laneq_f64(simde_float64x2_t a, simde_float64x2_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float64x2_private r_, a_ = simde_float64x2_to_private(a), b_ = simde_float64x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[lane]; } return simde_float64x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vmulq_laneq_f64(a, b, lane) vmulq_laneq_f64((a), (b), (lane)) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmulq_laneq_f64 #define vmulq_laneq_f64(a, b, lane) simde_vmulq_laneq_f64((a), (b), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vmulq_laneq_s16(simde_int16x8_t a, simde_int16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { simde_int16x8_private r_, a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[lane]; } return simde_int16x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vmulq_laneq_s16(a, b, lane) vmulq_laneq_s16((a), (b), (lane)) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmulq_laneq_s16 #define vmulq_laneq_s16(a, b, lane) simde_vmulq_laneq_s16((a), (b), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vmulq_laneq_s32(simde_int32x4_t a, simde_int32x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { simde_int32x4_private r_, a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[lane]; } return simde_int32x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vmulq_laneq_s32(a, b, lane) vmulq_laneq_s32((a), (b), (lane)) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmulq_laneq_s32 #define vmulq_laneq_s32(a, b, lane) simde_vmulq_laneq_s32((a), (b), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vmulq_laneq_u16(simde_uint16x8_t a, simde_uint16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[lane]; } return simde_uint16x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vmulq_laneq_u16(a, b, lane) vmulq_laneq_u16((a), (b), (lane)) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmulq_laneq_u16 #define vmulq_laneq_u16(a, b, lane) simde_vmulq_laneq_u16((a), (b), (lane)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vmulq_laneq_u32(simde_uint32x4_t a, simde_uint32x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b_.values[lane]; } return simde_uint32x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vmulq_laneq_u32(a, b, lane) vmulq_laneq_u32((a), (b), (lane)) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmulq_laneq_u32 #define vmulq_laneq_u32(a, b, lane) simde_vmulq_laneq_u32((a), (b), (lane)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_MUL_LANE_H) */ simde-0.7.2/simde/arm/neon/mul_n.h000066400000000000000000000253321400333146700167310ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_MUL_N_H) #define SIMDE_ARM_NEON_MUL_N_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vmul_n_f32(simde_float32x2_t a, simde_float32 b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmul_n_f32(a, b); #else simde_float32x2_private r_, a_ = simde_float32x2_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) r_.values = a_.values * b; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b; } #endif return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmul_n_f32 #define vmul_n_f32(a, b) simde_vmul_n_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vmul_n_f64(simde_float64x1_t a, simde_float64 b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmul_n_f64(a, b); #else simde_float64x1_private r_, a_ = simde_float64x1_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) r_.values = a_.values * b; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b; } #endif return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmul_n_f64 #define vmul_n_f64(a, b) simde_vmul_n_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vmul_n_s16(simde_int16x4_t a, int16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmul_n_s16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _m_pmullw(a, _mm_set1_pi16(b)); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values * b; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b; } #endif return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmul_n_s16 #define vmul_n_s16(a, b) simde_vmul_n_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vmul_n_s32(simde_int32x2_t a, int32_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmul_n_s32(a, b); #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values * b; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b; } #endif return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmul_n_s32 #define vmul_n_s32(a, b) simde_vmul_n_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vmul_n_u16(simde_uint16x4_t a, uint16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmul_n_u16(a, b); #else simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values * b; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b; } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmul_n_u16 #define vmul_n_u16(a, b) simde_vmul_n_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vmul_n_u32(simde_uint32x2_t a, uint32_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmul_n_u32(a, b); #else simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values * b; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmul_n_u32 #define vmul_n_u32(a, b) simde_vmul_n_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vmulq_n_f32(simde_float32x4_t a, simde_float32 b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmulq_n_f32(a, b); #elif defined(SIMDE_X86_SSE_NATIVE) return _mm_mul_ps(a, _mm_set1_ps(b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f32x4_mul(a, wasm_f32x4_splat(b)); #else simde_float32x4_private r_, a_ = simde_float32x4_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) r_.values = a_.values * b; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b; } #endif return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmulq_n_f32 #define vmulq_n_f32(a, b) simde_vmulq_n_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vmulq_n_f64(simde_float64x2_t a, simde_float64 b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmulq_n_f64(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_mul_pd(a, _mm_set1_pd(b)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_mul(a, wasm_f64x2_splat(b)); #else simde_float64x2_private r_, a_ = simde_float64x2_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) r_.values = a_.values * b; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b; } #endif return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmulq_n_f64 #define vmulq_n_f64(a, b) simde_vmulq_n_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vmulq_n_s16(simde_int16x8_t a, int16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmulq_n_s16(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_mullo_epi16(a, _mm_set1_epi16(b)); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values * b; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b; } #endif return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmulq_n_s16 #define vmulq_n_s16(a, b) simde_vmulq_n_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vmulq_n_s32(simde_int32x4_t a, int32_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmulq_n_s32(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i32x4_mul(a, wasm_i32x4_splat(b)); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values * b; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b; } #endif return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmulq_n_s32 #define vmulq_n_s32(a, b) simde_vmulq_n_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vmulq_n_u16(simde_uint16x8_t a, uint16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmulq_n_u16(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i16x8_mul(a, wasm_i16x8_splat(HEDLEY_STATIC_CAST(int16_t, b))); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values * b; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmulq_n_u16 #define vmulq_n_u16(a, b) simde_vmulq_n_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vmulq_n_u32(simde_uint32x4_t a, uint32_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmulq_n_u32(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i32x4_mul(a, wasm_i32x4_splat(HEDLEY_STATIC_CAST(int32_t, b))); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values * b; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] * b; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmulq_n_u32 #define vmulq_n_u32(a, b) simde_vmulq_n_u32((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_MUL_N_H) */ simde-0.7.2/simde/arm/neon/mull.h000066400000000000000000000172561400333146700165760ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_MULL_H) #define SIMDE_ARM_NEON_MULL_H #include "types.h" #include "mul.h" #include "movl.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vmull_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmull_s8(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vmulq_s16(simde_vmovl_s8(a), simde_vmovl_s8(b)); #else simde_int16x8_private r_; simde_int8x8_private a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b); #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) __typeof__(r_.values) av, bv; SIMDE_CONVERT_VECTOR_(av, a_.values); SIMDE_CONVERT_VECTOR_(bv, b_.values); r_.values = av * bv; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int16_t, a_.values[i]) * HEDLEY_STATIC_CAST(int16_t, b_.values[i]); } #endif return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmull_s8 #define vmull_s8(a, b) simde_vmull_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vmull_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmull_s16(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vmulq_s32(simde_vmovl_s16(a), simde_vmovl_s16(b)); #else simde_int32x4_private r_; simde_int16x4_private a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) __typeof__(r_.values) av, bv; SIMDE_CONVERT_VECTOR_(av, a_.values); SIMDE_CONVERT_VECTOR_(bv, b_.values); r_.values = av * bv; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int32_t, a_.values[i]) * HEDLEY_STATIC_CAST(int32_t, b_.values[i]); } #endif return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmull_s16 #define vmull_s16(a, b) simde_vmull_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vmull_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmull_s32(a, b); #else simde_int64x2_private r_; simde_int32x2_private a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) __typeof__(r_.values) av, bv; SIMDE_CONVERT_VECTOR_(av, a_.values); SIMDE_CONVERT_VECTOR_(bv, b_.values); r_.values = av * bv; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int64_t, a_.values[i]) * HEDLEY_STATIC_CAST(int64_t, b_.values[i]); } #endif return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmull_s32 #define vmull_s32(a, b) simde_vmull_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vmull_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmull_u8(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vmulq_u16(simde_vmovl_u8(a), simde_vmovl_u8(b)); #else simde_uint16x8_private r_; simde_uint8x8_private a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b); #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) __typeof__(r_.values) av, bv; SIMDE_CONVERT_VECTOR_(av, a_.values); SIMDE_CONVERT_VECTOR_(bv, b_.values); r_.values = av * bv; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, a_.values[i]) * HEDLEY_STATIC_CAST(uint16_t, b_.values[i]); } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmull_u8 #define vmull_u8(a, b) simde_vmull_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vmull_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmull_u16(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vmulq_u32(simde_vmovl_u16(a), simde_vmovl_u16(b)); #else simde_uint32x4_private r_; simde_uint16x4_private a_ = simde_uint16x4_to_private(a), b_ = simde_uint16x4_to_private(b); #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) __typeof__(r_.values) av, bv; SIMDE_CONVERT_VECTOR_(av, a_.values); SIMDE_CONVERT_VECTOR_(bv, b_.values); r_.values = av * bv; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[i]); } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmull_u16 #define vmull_u16(a, b) simde_vmull_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vmull_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmull_u32(a, b); #else simde_uint64x2_private r_; simde_uint32x2_private a_ = simde_uint32x2_to_private(a), b_ = simde_uint32x2_to_private(b); #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) __typeof__(r_.values) av, bv; SIMDE_CONVERT_VECTOR_(av, a_.values); SIMDE_CONVERT_VECTOR_(bv, b_.values); r_.values = av * bv; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint64_t, a_.values[i]) * HEDLEY_STATIC_CAST(uint64_t, b_.values[i]); } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmull_u32 #define vmull_u32(a, b) simde_vmull_u32((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_MULL_H) */ simde-0.7.2/simde/arm/neon/mull_high.h000066400000000000000000000077141400333146700175730ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_MULL_HIGH_H) #define SIMDE_ARM_NEON_MULL_HIGH_H #include "types.h" #include "mul.h" #include "movl_high.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vmull_high_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmull_high_s8(a, b); #else return simde_vmulq_s16(simde_vmovl_high_s8(a), simde_vmovl_high_s8(b)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmull_high_s8 #define vmull_high_s8(a, b) simde_vmull_high_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vmull_high_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmull_high_s16(a, b); #else return simde_vmulq_s32(simde_vmovl_high_s16(a), simde_vmovl_high_s16(b)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmull_high_s16 #define vmull_high_s16(a, b) simde_vmull_high_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vmull_high_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmull_high_s32(a, b); #else return simde_x_vmulq_s64(simde_vmovl_high_s32(a), simde_vmovl_high_s32(b)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmull_high_s32 #define vmull_high_s32(a, b) simde_vmull_high_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vmull_high_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmull_high_u8(a, b); #else return simde_vmulq_u16(simde_vmovl_high_u8(a), simde_vmovl_high_u8(b)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmull_high_u8 #define vmull_high_u8(a, b) simde_vmull_high_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vmull_high_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmull_high_u16(a, b); #else return simde_vmulq_u32(simde_vmovl_high_u16(a), simde_vmovl_high_u16(b)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmull_high_u16 #define vmull_high_u16(a, b) simde_vmull_high_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vmull_high_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vmull_high_u32(a, b); #else return simde_x_vmulq_u64(simde_vmovl_high_u32(a), simde_vmovl_high_u32(b)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vmull_high_u32 #define vmull_high_u32(a, b) simde_vmull_high_u32((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_MULL_HIGH_H) */ simde-0.7.2/simde/arm/neon/mull_n.h000066400000000000000000000122221400333146700170770ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_MULL_N_H) #define SIMDE_ARM_NEON_MULL_N_H #include "types.h" #include "mul_n.h" #include "movl.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vmull_n_s16(simde_int16x4_t a, int16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmull_n_s16(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vmulq_n_s32(simde_vmovl_s16(a), b); #else simde_int32x4_private r_; simde_int16x4_private a_ = simde_int16x4_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) __typeof__(r_.values) av; SIMDE_CONVERT_VECTOR_(av, a_.values); r_.values = av * b; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int32_t, a_.values[i]) * HEDLEY_STATIC_CAST(int32_t, b); } #endif return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmull_n_s16 #define vmull_n_s16(a, b) simde_vmull_n_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vmull_n_s32(simde_int32x2_t a, int32_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmull_n_s32(a, b); #else simde_int64x2_private r_; simde_int32x2_private a_ = simde_int32x2_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) __typeof__(r_.values) av; SIMDE_CONVERT_VECTOR_(av, a_.values); r_.values = av * b; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int64_t, a_.values[i]) * HEDLEY_STATIC_CAST(int64_t, b); } #endif return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmull_n_s32 #define vmull_n_s32(a, b) simde_vmull_n_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vmull_n_u16(simde_uint16x4_t a, uint16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmull_n_u16(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vmulq_n_u32(simde_vmovl_u16(a), b); #else simde_uint32x4_private r_; simde_uint16x4_private a_ = simde_uint16x4_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) __typeof__(r_.values) av; SIMDE_CONVERT_VECTOR_(av, a_.values); r_.values = av * b; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i]) * HEDLEY_STATIC_CAST(uint32_t, b); } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmull_n_u16 #define vmull_n_u16(a, b) simde_vmull_n_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vmull_n_u32(simde_uint32x2_t a, uint32_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmull_n_u32(a, b); #else simde_uint64x2_private r_; simde_uint32x2_private a_ = simde_uint32x2_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) __typeof__(r_.values) av; SIMDE_CONVERT_VECTOR_(av, a_.values); r_.values = av * b; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint64_t, a_.values[i]) * HEDLEY_STATIC_CAST(uint64_t, b); } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmull_n_u32 #define vmull_n_u32(a, b) simde_vmull_n_u32((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_MULL_H) */ simde-0.7.2/simde/arm/neon/mvn.h000066400000000000000000000272361400333146700164240ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_ARM_NEON_MVN_H) #define SIMDE_ARM_NEON_MVN_H #include "combine.h" #include "get_low.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vmvnq_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmvnq_s8(a); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_ternarylogic_epi32(a, a, a, 0x55); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_andnot_si128(a, _mm_cmpeq_epi8(a, a)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_nor(a, a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_not(a); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = ~a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ~(a_.values[i]); } #endif return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmvnq_s8 #define vmvnq_s8(a) simde_vmvnq_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vmvnq_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmvnq_s16(a); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_ternarylogic_epi32(a, a, a, 0x55); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_andnot_si128(a, _mm_cmpeq_epi16(a, a)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_nor(a, a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_not(a); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = ~a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ~(a_.values[i]); } #endif return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmvnq_s16 #define vmvnq_s16(a) simde_vmvnq_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vmvnq_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmvnq_s32(a); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_ternarylogic_epi32(a, a, a, 0x55); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_andnot_si128(a, _mm_cmpeq_epi32(a, a)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_nor(a, a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_not(a); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = ~a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ~(a_.values[i]); } #endif return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmvnq_s32 #define vmvnq_s32(a) simde_vmvnq_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vmvnq_u8(simde_uint8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmvnq_u8(a); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_ternarylogic_epi32(a, a, a, 0x55); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_andnot_si128(a, _mm_cmpeq_epi8(a, a)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_nor(a, a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_not(a); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = ~a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ~(a_.values[i]); } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmvnq_u8 #define vmvnq_u8(a) simde_vmvnq_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vmvnq_u16(simde_uint16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmvnq_u16(a); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_ternarylogic_epi32(a, a, a, 0x55); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_andnot_si128(a, _mm_cmpeq_epi16(a, a)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_nor(a, a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_not(a); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = ~a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ~(a_.values[i]); } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmvnq_u16 #define vmvnq_u16(a) simde_vmvnq_u16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vmvnq_u32(simde_uint32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmvnq_u32(a); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_ternarylogic_epi32(a, a, a, 0x55); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_andnot_si128(a, _mm_cmpeq_epi32(a, a)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_nor(a, a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_not(a); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = ~a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ~(a_.values[i]); } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmvnq_u32 #define vmvnq_u32(a) simde_vmvnq_u32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vmvn_s8(simde_int8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmvn_s8(a); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_andnot_si64(a, _mm_cmpeq_pi8(a, a)); #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = ~a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ~(a_.values[i]); } #endif return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmvn_s8 #define vmvn_s8(a) simde_vmvn_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vmvn_s16(simde_int16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmvn_s16(a); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_andnot_si64(a, _mm_cmpeq_pi16(a, a)); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = ~a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ~(a_.values[i]); } #endif return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmvn_s16 #define vmvn_s16(a) simde_vmvn_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vmvn_s32(simde_int32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmvn_s32(a); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_andnot_si64(a, _mm_cmpeq_pi32(a, a)); #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = ~a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ~(a_.values[i]); } #endif return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmvn_s32 #define vmvn_s32(a) simde_vmvn_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vmvn_u8(simde_uint8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmvn_u8(a); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_andnot_si64(a, _mm_cmpeq_pi8(a, a)); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = ~a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ~(a_.values[i]); } #endif return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmvn_u8 #define vmvn_u8(a) simde_vmvn_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vmvn_u16(simde_uint16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmvn_u16(a); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_andnot_si64(a, _mm_cmpeq_pi16(a, a)); #else simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = ~a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ~(a_.values[i]); } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmvn_u16 #define vmvn_u16(a) simde_vmvn_u16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vmvn_u32(simde_uint32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmvn_u32(a); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_andnot_si64(a, _mm_cmpeq_pi32(a, a)); #else simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = ~a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ~(a_.values[i]); } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vmvn_u32 #define vmvn_u32(a) simde_vmvn_u32(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_MVN_H) */ simde-0.7.2/simde/arm/neon/neg.h000066400000000000000000000246361400333146700163760ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_NEG_H) #define SIMDE_ARM_NEON_NEG_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vneg_f32(simde_float32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vneg_f32(a); #else simde_float32x2_private r_, a_ = simde_float32x2_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = -a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = -(a_.values[i]); } #endif return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vneg_f32 #define vneg_f32(a) simde_vneg_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vneg_f64(simde_float64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vneg_f64(a); #else simde_float64x1_private r_, a_ = simde_float64x1_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = -a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = -(a_.values[i]); } #endif return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vneg_f64 #define vneg_f64(a) simde_vneg_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vneg_s8(simde_int8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vneg_s8(a); #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = -a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = -(a_.values[i]); } #endif return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vneg_s8 #define vneg_s8(a) simde_vneg_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vneg_s16(simde_int16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vneg_s16(a); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = -a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = -(a_.values[i]); } #endif return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vneg_s16 #define vneg_s16(a) simde_vneg_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vneg_s32(simde_int32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vneg_s32(a); #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = -a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = -(a_.values[i]); } #endif return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vneg_s32 #define vneg_s32(a) simde_vneg_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vneg_s64(simde_int64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vneg_s64(a); #else simde_int64x1_private r_, a_ = simde_int64x1_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = -a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = -(a_.values[i]); } #endif return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vneg_s64 #define vneg_s64(a) simde_vneg_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vnegq_f32(simde_float32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vnegq_f32(a); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) return vec_neg(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f32x4_neg(a); #else simde_float32x4_private r_, a_ = simde_float32x4_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = -a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = -(a_.values[i]); } #endif return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vnegq_f32 #define vnegq_f32(a) simde_vnegq_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vnegq_f64(simde_float64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vnegq_f64(a); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) return vec_neg(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_neg(a); #else simde_float64x2_private r_, a_ = simde_float64x2_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = -a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = -(a_.values[i]); } #endif return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vnegq_f64 #define vnegq_f64(a) simde_vnegq_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vnegq_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vnegq_s8(a); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) return vec_neg(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i8x16_neg(a); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = -a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = -(a_.values[i]); } #endif return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vnegq_s8 #define vnegq_s8(a) simde_vnegq_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vnegq_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vnegq_s16(a); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) return vec_neg(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i16x8_neg(a); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = -a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = -(a_.values[i]); } #endif return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vnegq_s16 #define vnegq_s16(a) simde_vnegq_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vnegq_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vnegq_s32(a); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) return vec_neg(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i32x4_neg(a); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = -a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = -(a_.values[i]); } #endif return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vnegq_s32 #define vnegq_s32(a) simde_vnegq_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vnegq_s64(simde_int64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vnegq_s64(a); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) return vec_neg(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i64x2_neg(a); #else simde_int64x2_private r_, a_ = simde_int64x2_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = -a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = -(a_.values[i]); } #endif return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vnegq_s64 #define vnegq_s64(a) simde_vnegq_s64(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_NEG_H) */ simde-0.7.2/simde/arm/neon/orn.h000066400000000000000000000343271400333146700164210ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_ORN_H) #define SIMDE_ARM_NEON_ORN_H #include "orr.h" #include "mvn.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vorn_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vorn_s8(a, b); #else simde_int8x8_private a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b), r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | ~(b_.values); #else for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | ~b_.values[i]; } #endif return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vorn_s8 #define vorn_s8(a, b) simde_vorn_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vorn_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vorn_s16(a, b); #else simde_int16x4_private a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b), r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | ~(b_.values); #else for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | ~b_.values[i]; } #endif return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vorn_s16 #define vorn_s16(a, b) simde_vorn_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vorn_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vorn_s32(a, b); #else simde_int32x2_private a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b), r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | ~(b_.values); #else for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | ~b_.values[i]; } #endif return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vorn_s32 #define vorn_s32(a, b) simde_vorn_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vorn_s64(simde_int64x1_t a, simde_int64x1_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vorn_s64(a, b); #else simde_int64x1_private a_ = simde_int64x1_to_private(a), b_ = simde_int64x1_to_private(b), r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | ~(b_.values); #else for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | ~b_.values[i]; } #endif return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vorn_s64 #define vorn_s64(a, b) simde_vorn_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vorn_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vorn_u8(a, b); #else simde_uint8x8_private a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b), r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | ~(b_.values); #else for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | ~b_.values[i]; } #endif return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vorn_u8 #define vorn_u8(a, b) simde_vorn_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vorn_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vorn_u16(a, b); #else simde_uint16x4_private a_ = simde_uint16x4_to_private(a), b_ = simde_uint16x4_to_private(b), r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | ~(b_.values); #else for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | ~b_.values[i]; } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vorn_u16 #define vorn_u16(a, b) simde_vorn_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vorn_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vorn_u32(a, b); #else simde_uint32x2_private a_ = simde_uint32x2_to_private(a), b_ = simde_uint32x2_to_private(b), r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | ~(b_.values); #else for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | ~b_.values[i]; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vorn_u32 #define vorn_u32(a, b) simde_vorn_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vorn_u64(simde_uint64x1_t a, simde_uint64x1_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vorn_u64(a, b); #else simde_uint64x1_private a_ = simde_uint64x1_to_private(a), b_ = simde_uint64x1_to_private(b), r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | ~(b_.values); #else for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | ~b_.values[i]; } #endif return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vorn_u64 #define vorn_u64(a, b) simde_vorn_u64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vornq_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vornq_s8(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return vec_orc(a, b); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_ternarylogic_epi32(a, b, a, 0xf3); #else simde_int8x16_private a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b), r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | ~(b_.values); #else for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | ~b_.values[i]; } #endif return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vornq_s8 #define vornq_s8(a, b) simde_vornq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vornq_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vornq_s16(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return vec_orc(a, b); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_ternarylogic_epi32(a, b, a, 0xf3); #else simde_int16x8_private a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b), r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | ~(b_.values); #else for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | ~b_.values[i]; } #endif return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vornq_s16 #define vornq_s16(a, b) simde_vornq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vornq_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vornq_s32(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return vec_orc(a, b); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_ternarylogic_epi32(a, b, a, 0xf3); #else simde_int32x4_private a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b), r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | ~(b_.values); #else for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | ~b_.values[i]; } #endif return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vornq_s32 #define vornq_s32(a, b) simde_vornq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vornq_s64(simde_int64x2_t a, simde_int64x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vornq_s64(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return vec_orc(a, b); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_ternarylogic_epi64(a, b, a, 0xf3); #else simde_int64x2_private a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(b), r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | ~(b_.values); #else for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | ~b_.values[i]; } #endif return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vornq_s64 #define vornq_s64(a, b) simde_vornq_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vornq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vornq_u8(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return vec_orc(a, b); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_ternarylogic_epi32(a, b, a, 0xf3); #else simde_uint8x16_private a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b), r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | ~(b_.values); #else for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | ~b_.values[i]; } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vornq_u8 #define vornq_u8(a, b) simde_vornq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vornq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vornq_u16(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return vec_orc(a, b); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_ternarylogic_epi32(a, b, a, 0xf3); #else simde_uint16x8_private a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b), r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | ~(b_.values); #else for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | ~b_.values[i]; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vornq_u16 #define vornq_u16(a, b) simde_vornq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vornq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vornq_u32(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return vec_orc(a, b); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_ternarylogic_epi32(a, b, a, 0xf3); #else simde_uint32x4_private a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b), r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | ~(b_.values); #else for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | ~b_.values[i]; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vornq_u32 #define vornq_u32(a, b) simde_vornq_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vornq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vornq_u64(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return vec_orc(a, b); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_ternarylogic_epi64(a, b, a, 0xf3); #else simde_uint64x2_private a_ = simde_uint64x2_to_private(a), b_ = simde_uint64x2_to_private(b), r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | ~(b_.values); #else for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | ~b_.values[i]; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vornq_u64 #define vornq_u64(a, b) simde_vornq_u64((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_ORN_H) */ simde-0.7.2/simde/arm/neon/orr.h000066400000000000000000000366661400333146700164350ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_ARM_NEON_ORR_H) #define SIMDE_ARM_NEON_ORR_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vorr_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vorr_s8(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_or_si64(a, b); #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | b_.values[i]; } #endif return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vorr_s8 #define vorr_s8(a, b) simde_vorr_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vorr_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vorr_s16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_or_si64(a, b); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | b_.values[i]; } #endif return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vorr_s16 #define vorr_s16(a, b) simde_vorr_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vorr_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vorr_s32(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_or_si64(a, b); #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | b_.values[i]; } #endif return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vorr_s32 #define vorr_s32(a, b) simde_vorr_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vorr_s64(simde_int64x1_t a, simde_int64x1_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vorr_s64(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_or_si64(a, b); #else simde_int64x1_private r_, a_ = simde_int64x1_to_private(a), b_ = simde_int64x1_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | b_.values[i]; } #endif return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vorr_s64 #define vorr_s64(a, b) simde_vorr_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vorr_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vorr_u8(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_or_si64(a, b); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | b_.values[i]; } #endif return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vorr_u8 #define vorr_u8(a, b) simde_vorr_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vorr_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vorr_u16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_or_si64(a, b); #else simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a), b_ = simde_uint16x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | b_.values[i]; } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vorr_u16 #define vorr_u16(a, b) simde_vorr_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vorr_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vorr_u32(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_or_si64(a, b); #else simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a), b_ = simde_uint32x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | b_.values[i]; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vorr_u32 #define vorr_u32(a, b) simde_vorr_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vorr_u64(simde_uint64x1_t a, simde_uint64x1_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vorr_u64(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_or_si64(a, b); #else simde_uint64x1_private r_, a_ = simde_uint64x1_to_private(a), b_ = simde_uint64x1_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | b_.values[i]; } #endif return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vorr_u64 #define vorr_u64(a, b) simde_vorr_u64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vorrq_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vorrq_s8(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_or_si128(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_or(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_or(a, b); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | b_.values[i]; } #endif return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vorrq_s8 #define vorrq_s8(a, b) simde_vorrq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vorrq_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vorrq_s16(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_or_si128(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_or(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_or(a, b); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | b_.values[i]; } #endif return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vorrq_s16 #define vorrq_s16(a, b) simde_vorrq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vorrq_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vorrq_s32(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_or_si128(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_or(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_or(a, b); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | b_.values[i]; } #endif return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vorrq_s32 #define vorrq_s32(a, b) simde_vorrq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vorrq_s64(simde_int64x2_t a, simde_int64x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vorrq_s64(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_or_si128(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return vec_or(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_or(a, b); #else simde_int64x2_private r_, a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | b_.values[i]; } #endif return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vorrq_s64 #define vorrq_s64(a, b) simde_vorrq_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vorrq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vorrq_u8(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_or_si128(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_or(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_or(a, b); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | b_.values[i]; } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vorrq_u8 #define vorrq_u8(a, b) simde_vorrq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vorrq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vorrq_u16(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_or_si128(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_or(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_or(a, b); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | b_.values[i]; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vorrq_u16 #define vorrq_u16(a, b) simde_vorrq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vorrq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vorrq_u32(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_or_si128(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_or(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_or(a, b); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | b_.values[i]; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vorrq_u32 #define vorrq_u32(a, b) simde_vorrq_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vorrq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vorrq_u64(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_or_si128(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return vec_or(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_or(a, b); #else simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a), b_ = simde_uint64x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values | b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] | b_.values[i]; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vorrq_u64 #define vorrq_u64(a, b) simde_vorrq_u64((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_ORR_H) */ simde-0.7.2/simde/arm/neon/padal.h000066400000000000000000000137611400333146700167030ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_PADAL_H) #define SIMDE_ARM_NEON_PADAL_H #include "types.h" #include "add.h" #include "paddl.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vpadal_s8(simde_int16x4_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpadal_s8(a, b); #else return simde_vadd_s16(a, simde_vpaddl_s8(b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpadal_s8 #define vpadal_s8(a, b) simde_vpadal_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vpadal_s16(simde_int32x2_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpadal_s16(a, b); #else return simde_vadd_s32(a, simde_vpaddl_s16(b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpadal_s16 #define vpadal_s16(a, b) simde_vpadal_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vpadal_s32(simde_int64x1_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpadal_s32(a, b); #else return simde_vadd_s64(a, simde_vpaddl_s32(b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpadal_s32 #define vpadal_s32(a, b) simde_vpadal_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vpadal_u8(simde_uint16x4_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpadal_u8(a, b); #else return simde_vadd_u16(a, simde_vpaddl_u8(b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpadal_u8 #define vpadal_u8(a, b) simde_vpadal_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vpadal_u16(simde_uint32x2_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpadal_u16(a, b); #else return simde_vadd_u32(a, simde_vpaddl_u16(b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpadal_u16 #define vpadal_u16(a, b) simde_vpadal_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vpadal_u32(simde_uint64x1_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpadal_u32(a, b); #else return simde_vadd_u64(a, simde_vpaddl_u32(b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpadal_u32 #define vpadal_u32(a, b) simde_vpadal_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vpadalq_s8(simde_int16x8_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpadalq_s8(a, b); #else return simde_vaddq_s16(a, simde_vpaddlq_s8(b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpadalq_s8 #define vpadalq_s8(a, b) simde_vpadalq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vpadalq_s16(simde_int32x4_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpadalq_s16(a, b); #else return simde_vaddq_s32(a, simde_vpaddlq_s16(b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpadalq_s16 #define vpadalq_s16(a, b) simde_vpadalq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vpadalq_s32(simde_int64x2_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpadalq_s32(a, b); #else return simde_vaddq_s64(a, simde_vpaddlq_s32(b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpadalq_s32 #define vpadalq_s32(a, b) simde_vpadalq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vpadalq_u8(simde_uint16x8_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpadalq_u8(a, b); #else return simde_vaddq_u16(a, simde_vpaddlq_u8(b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpadalq_u8 #define vpadalq_u8(a, b) simde_vpadalq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vpadalq_u16(simde_uint32x4_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpadalq_u16(a, b); #else return simde_vaddq_u32(a, simde_vpaddlq_u16(b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpadalq_u16 #define vpadalq_u16(a, b) simde_vpadalq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vpadalq_u32(simde_uint64x2_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpadalq_u32(a, b); #else return simde_vaddq_u64(a, simde_vpaddlq_u32(b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpadalq_u32 #define vpadalq_u32(a, b) simde_vpadalq_u32((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* SIMDE_ARM_NEON_PADAL_H */ simde-0.7.2/simde/arm/neon/padd.h000066400000000000000000000213271400333146700165270ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_PADD_H) #define SIMDE_ARM_NEON_PADD_H #include "add.h" #include "uzp1.h" #include "uzp2.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vpadd_f32(simde_float32x2_t a, simde_float32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) return vpadd_f32(a, b); #else return simde_vadd_f32(simde_vuzp1_f32(a, b), simde_vuzp2_f32(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpadd_f32 #define vpadd_f32(a, b) simde_vpadd_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vpadd_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpadd_s8(a, b); #else return simde_vadd_s8(simde_vuzp1_s8(a, b), simde_vuzp2_s8(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpadd_s8 #define vpadd_s8(a, b) simde_vpadd_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vpadd_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpadd_s16(a, b); #elif defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_hadd_pi16(a, b); #else return simde_vadd_s16(simde_vuzp1_s16(a, b), simde_vuzp2_s16(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpadd_s16 #define vpadd_s16(a, b) simde_vpadd_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vpadd_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpadd_s32(a, b); #elif defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_hadd_pi32(a, b); #else return simde_vadd_s32(simde_vuzp1_s32(a, b), simde_vuzp2_s32(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpadd_s32 #define vpadd_s32(a, b) simde_vpadd_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vpadd_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpadd_u8(a, b); #else return simde_vadd_u8(simde_vuzp1_u8(a, b), simde_vuzp2_u8(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpadd_u8 #define vpadd_u8(a, b) simde_vpadd_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vpadd_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpadd_u16(a, b); #else return simde_vadd_u16(simde_vuzp1_u16(a, b), simde_vuzp2_u16(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpadd_u16 #define vpadd_u16(a, b) simde_vpadd_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vpadd_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpadd_u32(a, b); #else return simde_vadd_u32(simde_vuzp1_u32(a, b), simde_vuzp2_u32(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpadd_u32 #define vpadd_u32(a, b) simde_vpadd_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vpaddq_f32(simde_float32x4_t a, simde_float32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vpaddq_f32(a, b); #elif defined(SIMDE_X86_SSE3_NATIVE) return _mm_hadd_ps(a, b); #else return simde_vaddq_f32(simde_vuzp1q_f32(a, b), simde_vuzp2q_f32(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpaddq_f32 #define vpaddq_f32(a, b) simde_vpaddq_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vpaddq_f64(simde_float64x2_t a, simde_float64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vpaddq_f64(a, b); #elif defined(SIMDE_X86_SSE3_NATIVE) return _mm_hadd_pd(a, b); #else return simde_vaddq_f64(simde_vuzp1q_f64(a, b), simde_vuzp2q_f64(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vpaddq_f64 #define vpaddq_f64(a, b) simde_vpaddq_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vpaddq_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vpaddq_s8(a, b); #else return simde_vaddq_s8(simde_vuzp1q_s8(a, b), simde_vuzp2q_s8(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpaddq_s8 #define vpaddq_s8(a, b) simde_vpaddq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vpaddq_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vpaddq_s16(a, b); #elif defined(SIMDE_X86_SSSE3_NATIVE) return _mm_hadd_epi16(a, b); #else return simde_vaddq_s16(simde_vuzp1q_s16(a, b), simde_vuzp2q_s16(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpaddq_s16 #define vpaddq_s16(a, b) simde_vpaddq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vpaddq_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vpaddq_s32(a, b); #elif defined(SIMDE_X86_SSSE3_NATIVE) return _mm_hadd_epi32(a, b); #else return simde_vaddq_s32(simde_vuzp1q_s32(a, b), simde_vuzp2q_s32(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpaddq_s32 #define vpaddq_s32(a, b) simde_vpaddq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vpaddq_s64(simde_int64x2_t a, simde_int64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vpaddq_s64(a, b); #else return simde_vaddq_s64(simde_vuzp1q_s64(a, b), simde_vuzp2q_s64(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpaddq_s64 #define vpaddq_s64(a, b) simde_vpaddq_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vpaddq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vpaddq_u8(a, b); #else return simde_vaddq_u8(simde_vuzp1q_u8(a, b), simde_vuzp2q_u8(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpaddq_u8 #define vpaddq_u8(a, b) simde_vpaddq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vpaddq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vpaddq_u16(a, b); #else return simde_vaddq_u16(simde_vuzp1q_u16(a, b), simde_vuzp2q_u16(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpaddq_u16 #define vpaddq_u16(a, b) simde_vpaddq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vpaddq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vpaddq_u32(a, b); #else return simde_vaddq_u32(simde_vuzp1q_u32(a, b), simde_vuzp2q_u32(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpaddq_u32 #define vpaddq_u32(a, b) simde_vpaddq_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vpaddq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vpaddq_u64(a, b); #else return simde_vaddq_u64(simde_vuzp1q_u64(a, b), simde_vuzp2q_u64(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpaddq_u64 #define vpaddq_u64(a, b) simde_vpaddq_u64((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_PADD_H) */ simde-0.7.2/simde/arm/neon/paddl.h000066400000000000000000000166001400333146700167010ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_PADDL_H) #define SIMDE_ARM_NEON_PADDL_H #include "add.h" #include "get_high.h" #include "get_low.h" #include "movl.h" #include "movl_high.h" #include "padd.h" #include "reinterpret.h" #include "shl_n.h" #include "shr_n.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vpaddl_s8(simde_int8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpaddl_s8(a); #else simde_int16x8_t tmp = simde_vmovl_s8(a); return simde_vpadd_s16(simde_vget_low_s16(tmp), simde_vget_high_s16(tmp)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpaddl_s8 #define vpaddl_s8(a) simde_vpaddl_s8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vpaddl_s16(simde_int16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpaddl_s16(a); #else simde_int32x4_t tmp = simde_vmovl_s16(a); return simde_vpadd_s32(simde_vget_low_s32(tmp), simde_vget_high_s32(tmp)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpaddl_s16 #define vpaddl_s16(a) simde_vpaddl_s16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vpaddl_s32(simde_int32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpaddl_s32(a); #else simde_int64x2_t tmp = simde_vmovl_s32(a); return simde_vadd_s64(simde_vget_low_s64(tmp), simde_vget_high_s64(tmp)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpaddl_s32 #define vpaddl_s32(a) simde_vpaddl_s32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vpaddl_u8(simde_uint8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpaddl_u8(a); #else simde_uint16x8_t tmp = simde_vmovl_u8(a); return simde_vpadd_u16(simde_vget_low_u16(tmp), simde_vget_high_u16(tmp)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpaddl_u8 #define vpaddl_u8(a) simde_vpaddl_u8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vpaddl_u16(simde_uint16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpaddl_u16(a); #else simde_uint32x4_t tmp = simde_vmovl_u16(a); return simde_vpadd_u32(simde_vget_low_u32(tmp), simde_vget_high_u32(tmp)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpaddl_u16 #define vpaddl_u16(a) simde_vpaddl_u16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vpaddl_u32(simde_uint32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpaddl_u32(a); #else simde_uint64x2_t tmp = simde_vmovl_u32(a); return simde_vadd_u64(simde_vget_low_u64(tmp), simde_vget_high_u64(tmp)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpaddl_u32 #define vpaddl_u32(a) simde_vpaddl_u32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vpaddlq_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpaddlq_s8(a); #else simde_int16x8_t lo = simde_vshrq_n_s16(simde_vshlq_n_s16(simde_vreinterpretq_s16_s8(a), 8), 8); simde_int16x8_t hi = simde_vshrq_n_s16(simde_vreinterpretq_s16_s8(a), 8); return simde_vaddq_s16(lo, hi); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpaddlq_s8 #define vpaddlq_s8(a) simde_vpaddlq_s8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vpaddlq_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpaddlq_s16(a); #else simde_int32x4_t lo = simde_vshrq_n_s32(simde_vshlq_n_s32(simde_vreinterpretq_s32_s16(a), 16), 16); simde_int32x4_t hi = simde_vshrq_n_s32(simde_vreinterpretq_s32_s16(a), 16); return simde_vaddq_s32(lo, hi); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpaddlq_s16 #define vpaddlq_s16(a) simde_vpaddlq_s16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vpaddlq_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpaddlq_s32(a); #elif defined(SIMDE_X86_SSE4_1_NATIVE) __m128i lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(a, 0xe8)); __m128i hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(a, 0xed)); return _mm_add_epi64(lo, hi); #else simde_int64x2_t lo = simde_vshrq_n_s64(simde_vshlq_n_s64(simde_vreinterpretq_s64_s32(a), 32), 32); simde_int64x2_t hi = simde_vshrq_n_s64(simde_vreinterpretq_s64_s32(a), 32); return simde_vaddq_s64(lo, hi); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpaddlq_s32 #define vpaddlq_s32(a) simde_vpaddlq_s32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vpaddlq_u8(simde_uint8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpaddlq_u8(a); #else simde_uint16x8_t lo = simde_vshrq_n_u16(simde_vshlq_n_u16(simde_vreinterpretq_u16_u8(a), 8), 8); simde_uint16x8_t hi = simde_vshrq_n_u16(simde_vreinterpretq_u16_u8(a), 8); return simde_vaddq_u16(lo, hi); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpaddlq_u8 #define vpaddlq_u8(a) simde_vpaddlq_u8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vpaddlq_u16(simde_uint16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpaddlq_u16(a); #else simde_uint32x4_t lo = simde_vshrq_n_u32(simde_vshlq_n_u32(simde_vreinterpretq_u32_u16(a), 16), 16); simde_uint32x4_t hi = simde_vshrq_n_u32(simde_vreinterpretq_u32_u16(a), 16); return simde_vaddq_u32(lo, hi); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpaddlq_u16 #define vpaddlq_u16(a) simde_vpaddlq_u16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vpaddlq_u32(simde_uint32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpaddlq_u32(a); #else simde_uint64x2_t lo = simde_vshrq_n_u64(simde_vshlq_n_u64(simde_vreinterpretq_u64_u32(a), 32), 32); simde_uint64x2_t hi = simde_vshrq_n_u64(simde_vreinterpretq_u64_u32(a), 32); return simde_vaddq_u64(lo, hi); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpaddlq_u32 #define vpaddlq_u32(a) simde_vpaddlq_u32((a)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* SIMDE_ARM_NEON_PADDL_H */ simde-0.7.2/simde/arm/neon/pmax.h000066400000000000000000000166241400333146700165700ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_PMAX_H) #define SIMDE_ARM_NEON_PMAX_H #include "types.h" #include "max.h" #include "uzp1.h" #include "uzp2.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vpmax_f32(simde_float32x2_t a, simde_float32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpmax_f32(a, b); #else return simde_vmax_f32(simde_vuzp1_f32(a, b), simde_vuzp2_f32(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpmax_f32 #define vpmax_f32(a, b) simde_vpmax_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vpmax_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpmax_s8(a, b); #else return simde_vmax_s8(simde_vuzp1_s8(a, b), simde_vuzp2_s8(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpmax_s8 #define vpmax_s8(a, b) simde_vpmax_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vpmax_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpmax_s16(a, b); #else return simde_vmax_s16(simde_vuzp1_s16(a, b), simde_vuzp2_s16(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpmax_s16 #define vpmax_s16(a, b) simde_vpmax_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vpmax_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpmax_s32(a, b); #else return simde_vmax_s32(simde_vuzp1_s32(a, b), simde_vuzp2_s32(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpmax_s32 #define vpmax_s32(a, b) simde_vpmax_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vpmax_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpmax_u8(a, b); #else return simde_vmax_u8(simde_vuzp1_u8(a, b), simde_vuzp2_u8(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpmax_u8 #define vpmax_u8(a, b) simde_vpmax_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vpmax_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpmax_u16(a, b); #else return simde_vmax_u16(simde_vuzp1_u16(a, b), simde_vuzp2_u16(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpmax_u16 #define vpmax_u16(a, b) simde_vpmax_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vpmax_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpmax_u32(a, b); #else return simde_vmax_u32(simde_vuzp1_u32(a, b), simde_vuzp2_u32(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpmax_u32 #define vpmax_u32(a, b) simde_vpmax_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vpmaxq_f32(simde_float32x4_t a, simde_float32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vpmaxq_f32(a, b); #else return simde_vmaxq_f32(simde_vuzp1q_f32(a, b), simde_vuzp2q_f32(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpmaxq_f32 #define vpmaxq_f32(a, b) simde_vpmaxq_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vpmaxq_f64(simde_float64x2_t a, simde_float64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vpmaxq_f64(a, b); #else return simde_vmaxq_f64(simde_vuzp1q_f64(a, b), simde_vuzp2q_f64(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vpmaxq_f64 #define vpmaxq_f64(a, b) simde_vpmaxq_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vpmaxq_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vpmaxq_s8(a, b); #else return simde_vmaxq_s8(simde_vuzp1q_s8(a, b), simde_vuzp2q_s8(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpmaxq_s8 #define vpmaxq_s8(a, b) simde_vpmaxq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vpmaxq_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vpmaxq_s16(a, b); #else return simde_vmaxq_s16(simde_vuzp1q_s16(a, b), simde_vuzp2q_s16(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpmaxq_s16 #define vpmaxq_s16(a, b) simde_vpmaxq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vpmaxq_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vpmaxq_s32(a, b); #else return simde_vmaxq_s32(simde_vuzp1q_s32(a, b), simde_vuzp2q_s32(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpmaxq_s32 #define vpmaxq_s32(a, b) simde_vpmaxq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vpmaxq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vpmaxq_u8(a, b); #else return simde_vmaxq_u8(simde_vuzp1q_u8(a, b), simde_vuzp2q_u8(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpmaxq_u8 #define vpmaxq_u8(a, b) simde_vpmaxq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vpmaxq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vpmaxq_u16(a, b); #else return simde_vmaxq_u16(simde_vuzp1q_u16(a, b), simde_vuzp2q_u16(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpmaxq_u16 #define vpmaxq_u16(a, b) simde_vpmaxq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vpmaxq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vpmaxq_u32(a, b); #else return simde_vmaxq_u32(simde_vuzp1q_u32(a, b), simde_vuzp2q_u32(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpmaxq_u32 #define vpmaxq_u32(a, b) simde_vpmaxq_u32((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_PMAX_H) */ simde-0.7.2/simde/arm/neon/pmin.h000066400000000000000000000172221400333146700165610ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_PMIN_H) #define SIMDE_ARM_NEON_PMIN_H #include "types.h" #include "min.h" #include "uzp1.h" #include "uzp2.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vpmin_f32(simde_float32x2_t a, simde_float32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpmin_f32(a, b); #else return simde_vmin_f32(simde_vuzp1_f32(a, b), simde_vuzp2_f32(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpmin_f32 #define vpmin_f32(a, b) simde_vpmin_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vpmin_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpmin_s8(a, b); #else return simde_vmin_s8(simde_vuzp1_s8(a, b), simde_vuzp2_s8(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpmin_s8 #define vpmin_s8(a, b) simde_vpmin_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vpmin_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpmin_s16(a, b); #else return simde_vmin_s16(simde_vuzp1_s16(a, b), simde_vuzp2_s16(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpmin_s16 #define vpmin_s16(a, b) simde_vpmin_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vpmin_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpmin_s32(a, b); #else return simde_vmin_s32(simde_vuzp1_s32(a, b), simde_vuzp2_s32(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpmin_s32 #define vpmin_s32(a, b) simde_vpmin_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vpmin_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpmin_u8(a, b); #else return simde_vmin_u8(simde_vuzp1_u8(a, b), simde_vuzp2_u8(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpmin_u8 #define vpmin_u8(a, b) simde_vpmin_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vpmin_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpmin_u16(a, b); #else return simde_vmin_u16(simde_vuzp1_u16(a, b), simde_vuzp2_u16(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpmin_u16 #define vpmin_u16(a, b) simde_vpmin_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vpmin_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vpmin_u32(a, b); #else return simde_vmin_u32(simde_vuzp1_u32(a, b), simde_vuzp2_u32(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpmin_u32 #define vpmin_u32(a, b) simde_vpmin_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vpminq_f32(simde_float32x4_t a, simde_float32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vpminq_f32(a, b); #elif defined(SIMDE_X86_SSE_NATIVE) __m128 e = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); __m128 o = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); return _mm_min_ps(e, o); #else return simde_vminq_f32(simde_vuzp1q_f32(a, b), simde_vuzp2q_f32(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpminq_f32 #define vpminq_f32(a, b) simde_vpminq_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vpminq_f64(simde_float64x2_t a, simde_float64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vpminq_f64(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) __m128d e = _mm_unpacklo_pd(a, b); __m128d o = _mm_unpackhi_pd(a, b); return _mm_min_pd(e, o); #else return simde_vminq_f64(simde_vuzp1q_f64(a, b), simde_vuzp2q_f64(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vpminq_f64 #define vpminq_f64(a, b) simde_vpminq_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vpminq_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vpminq_s8(a, b); #else return simde_vminq_s8(simde_vuzp1q_s8(a, b), simde_vuzp2q_s8(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpminq_s8 #define vpminq_s8(a, b) simde_vpminq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vpminq_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vpminq_s16(a, b); #else return simde_vminq_s16(simde_vuzp1q_s16(a, b), simde_vuzp2q_s16(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpminq_s16 #define vpminq_s16(a, b) simde_vpminq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vpminq_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vpminq_s32(a, b); #else return simde_vminq_s32(simde_vuzp1q_s32(a, b), simde_vuzp2q_s32(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpminq_s32 #define vpminq_s32(a, b) simde_vpminq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vpminq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vpminq_u8(a, b); #else return simde_vminq_u8(simde_vuzp1q_u8(a, b), simde_vuzp2q_u8(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpminq_u8 #define vpminq_u8(a, b) simde_vpminq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vpminq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vpminq_u16(a, b); #else return simde_vminq_u16(simde_vuzp1q_u16(a, b), simde_vuzp2q_u16(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpminq_u16 #define vpminq_u16(a, b) simde_vpminq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vpminq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vpminq_u32(a, b); #else return simde_vminq_u32(simde_vuzp1q_u32(a, b), simde_vuzp2q_u32(a, b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vpminq_u32 #define vpminq_u32(a, b) simde_vpminq_u32((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_PMIN_H) */ simde-0.7.2/simde/arm/neon/qabs.h000066400000000000000000000163061400333146700165460ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_QABS_H) #define SIMDE_ARM_NEON_QABS_H #include "types.h" #include "abs.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES int8_t simde_vqabsb_s8(int8_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqabsb_s8(a); #else return a == INT8_MIN ? INT8_MAX : (a < 0 ? -a : a); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqabsb_s8 #define vqabsb_s8(a) simde_vqabsb_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES int16_t simde_vqabsh_s16(int16_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqabsh_s16(a); #else return a == INT16_MIN ? INT16_MAX : (a < 0 ? -a : a); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqabsh_s16 #define vqabsh_s16(a) simde_vqabsh_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_vqabss_s32(int32_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqabss_s32(a); #else return a == INT32_MIN ? INT32_MAX : (a < 0 ? -a : a); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqabss_s32 #define vqabss_s32(a) simde_vqabss_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES int64_t simde_vqabsd_s64(int64_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqabsd_s64(a); #else return a == INT64_MIN ? INT64_MAX : (a < 0 ? -a : a); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqabsd_s64 #define vqabsd_s64(a) simde_vqabsd_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vqabs_s8(simde_int8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqabs_s8(a); #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqabsb_s8(a_.values[i]); } return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqabs_s8 #define vqabs_s8(a) simde_vqabs_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vqabs_s16(simde_int16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqabs_s16(a); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqabsh_s16(a_.values[i]); } return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqabs_s16 #define vqabs_s16(a) simde_vqabs_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vqabs_s32(simde_int32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqabs_s32(a); #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqabss_s32(a_.values[i]); } return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqabs_s32 #define vqabs_s32(a) simde_vqabs_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vqabs_s64(simde_int64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqabs_s64(a); #else simde_int64x1_private r_, a_ = simde_int64x1_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqabsd_s64(a_.values[i]); } return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqabs_s64 #define vqabs_s64(a) simde_vqabs_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vqabsq_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqabsq_s8(a); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqabsb_s8(a_.values[i]); } return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqabsq_s8 #define vqabsq_s8(a) simde_vqabsq_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vqabsq_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqabsq_s16(a); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqabsh_s16(a_.values[i]); } return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqabsq_s16 #define vqabsq_s16(a) simde_vqabsq_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vqabsq_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqabsq_s32(a); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqabss_s32(a_.values[i]); } return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqabsq_s32 #define vqabsq_s32(a) simde_vqabsq_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vqabsq_s64(simde_int64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqabsq_s64(a); #else simde_int64x2_private r_, a_ = simde_int64x2_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqabsd_s64(a_.values[i]); } return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqabsq_s64 #define vqabsq_s64(a) simde_vqabsq_s64(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_QABS_H) */ simde-0.7.2/simde/arm/neon/qadd.h000066400000000000000000000367611400333146700165400ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_QADD_H) #define SIMDE_ARM_NEON_QADD_H #include "types.h" #include "add.h" #include "bsl.h" #include "cgt.h" #include "dup_n.h" #include "sub.h" #include HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES int8_t simde_vqaddb_s8(int8_t a, int8_t b) { return simde_math_adds_i8(a, b); } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqaddb_s8 #define vqaddb_s8(a, b) simde_vqaddb_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int16_t simde_vqaddh_s16(int16_t a, int16_t b) { return simde_math_adds_i16(a, b); } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqaddh_s16 #define vqaddh_s16(a, b) simde_vqaddh_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_vqadds_s32(int32_t a, int32_t b) { return simde_math_adds_i32(a, b); } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqadds_s32 #define vqadds_s32(a, b) simde_vqadds_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int64_t simde_vqaddd_s64(int64_t a, int64_t b) { return simde_math_adds_i64(a, b); } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqaddd_s64 #define vqaddd_s64(a, b) simde_vqaddd_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES uint8_t simde_vqaddb_u8(uint8_t a, uint8_t b) { return simde_math_adds_u8(a, b); } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqaddb_u8 #define vqaddb_u8(a, b) simde_vqaddb_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES uint16_t simde_vqaddh_u16(uint16_t a, uint16_t b) { return simde_math_adds_u16(a, b); } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqaddh_u16 #define vqaddh_u16(a, b) simde_vqaddh_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES uint32_t simde_vqadds_u32(uint32_t a, uint32_t b) { return simde_math_adds_u32(a, b); } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqadds_u32 #define vqadds_u32(a, b) simde_vqadds_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES uint64_t simde_vqaddd_u64(uint64_t a, uint64_t b) { return simde_math_adds_u64(a, b); } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqaddd_u64 #define vqaddd_u64(a, b) simde_vqaddd_u64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vqadd_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqadd_s8(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_adds_pi8(a, b); #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqaddb_s8(a_.values[i], b_.values[i]); } return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqadd_s8 #define vqadd_s8(a, b) simde_vqadd_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vqadd_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqadd_s16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_adds_pi16(a, b); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqaddh_s16(a_.values[i], b_.values[i]); } return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqadd_s16 #define vqadd_s16(a, b) simde_vqadd_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vqadd_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqadd_s32(a, b); #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqadds_s32(a_.values[i], b_.values[i]); } return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqadd_s32 #define vqadd_s32(a, b) simde_vqadd_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vqadd_s64(simde_int64x1_t a, simde_int64x1_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqadd_s64(a, b); #else simde_int64x1_private r_, a_ = simde_int64x1_to_private(a), b_ = simde_int64x1_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqaddd_s64(a_.values[i], b_.values[i]); } return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqadd_s64 #define vqadd_s64(a, b) simde_vqadd_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vqadd_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqadd_u8(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_adds_pu8(a, b); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqaddb_u8(a_.values[i], b_.values[i]); } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqadd_u8 #define vqadd_u8(a, b) simde_vqadd_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vqadd_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqadd_u16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_adds_pu16(a, b); #else simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a), b_ = simde_uint16x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqaddh_u16(a_.values[i], b_.values[i]); } return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqadd_u16 #define vqadd_u16(a, b) simde_vqadd_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vqadd_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqadd_u32(a, b); #else simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a), b_ = simde_uint32x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqadds_u32(a_.values[i], b_.values[i]); } return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqadd_u32 #define vqadd_u32(a, b) simde_vqadd_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vqadd_u64(simde_uint64x1_t a, simde_uint64x1_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqadd_u64(a, b); #else simde_uint64x1_private r_, a_ = simde_uint64x1_to_private(a), b_ = simde_uint64x1_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqaddd_u64(a_.values[i], b_.values[i]); } return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqadd_u64 #define vqadd_u64(a, b) simde_vqadd_u64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vqaddq_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqaddq_s8(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i8x16_add_saturate(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_adds_epi8(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6) return vec_adds(a, b); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqaddb_s8(a_.values[i], b_.values[i]); } return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqaddq_s8 #define vqaddq_s8(a, b) simde_vqaddq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vqaddq_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqaddq_s16(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i16x8_add_saturate(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_adds_epi16(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6) return vec_adds(a, b); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqaddh_s16(a_.values[i], b_.values[i]); } return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqaddq_s16 #define vqaddq_s16(a, b) simde_vqaddq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vqaddq_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqaddq_s32(a, b); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_cvtsepi64_epi32(_mm256_add_epi64(_mm256_cvtepi32_epi64(a), _mm256_cvtepi32_epi64(b))); #elif defined(SIMDE_POWER_ALTIVEC_P6) return vec_adds(a, b); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqadds_s32(a_.values[i], b_.values[i]); } return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqaddq_s32 #define vqaddq_s32(a, b) simde_vqaddq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vqaddq_s64(simde_int64x2_t a, simde_int64x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqaddq_s64(a, b); #else simde_int64x2_private r_, a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqaddd_s64(a_.values[i], b_.values[i]); } return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqaddq_s64 #define vqaddq_s64(a, b) simde_vqaddq_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vqaddq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqaddq_u8(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_u8x16_add_saturate(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_adds_epu8(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6) return vec_adds(a, b); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqaddb_u8(a_.values[i], b_.values[i]); } return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqaddq_u8 #define vqaddq_u8(a, b) simde_vqaddq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vqaddq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqaddq_u16(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_u16x8_add_saturate(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_adds_epu16(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6) return vec_adds(a, b); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqaddh_u16(a_.values[i], b_.values[i]); } return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqaddq_u16 #define vqaddq_u16(a, b) simde_vqaddq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vqaddq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqaddq_u32(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6) return vec_adds(a, b); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqadds_u32(a_.values[i], b_.values[i]); } return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqaddq_u32 #define vqaddq_u32(a, b) simde_vqaddq_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vqaddq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqaddq_u64(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 const simde_uint64x2_t max = simde_vdupq_n_u64(UINT64_MAX); return simde_vbslq_u64(simde_vcgtq_u64(a, simde_vsubq_u64(max, b)), max, simde_vaddq_u64(a, b)); #else simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a), b_ = simde_uint64x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqaddd_u64(a_.values[i], b_.values[i]); } return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqaddq_u64 #define vqaddq_u64(a, b) simde_vqaddq_u64((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_QADD_H) */ simde-0.7.2/simde/arm/neon/qdmulh.h000066400000000000000000000077521400333146700171170ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_QDMULH_H) #define SIMDE_ARM_NEON_QDMULH_H #include "types.h" #include "combine.h" #include "get_high.h" #include "get_low.h" #include "qdmull.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vqdmulh_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqdmulh_s16(a, b); #else simde_int16x4_private r_; simde_int32x4_t r = simde_vqdmull_s16(a, b); simde_int32x4_private r_2 = simde_int32x4_to_private(r); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int16_t, r_2.values[i] >> 16); } return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqdmulh_s16 #define vqdmulh_s16(a, b) simde_vqdmulh_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vqdmulh_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqdmulh_s32(a, b); #else simde_int32x2_private r_; simde_int64x2_t r = simde_vqdmull_s32(a, b); simde_int64x2_private r_2 = simde_int64x2_to_private(r); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int32_t, r_2.values[i] >> 32); } return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqdmulh_s32 #define vqdmulh_s32(a, b) simde_vqdmulh_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vqdmulhq_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqdmulhq_s16(a, b); #else return simde_vcombine_s16(simde_vqdmulh_s16(simde_vget_low_s16(a), simde_vget_low_s16(b)), simde_vqdmulh_s16(simde_vget_high_s16(a), simde_vget_high_s16(b))); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqdmulhq_s16 #define vqdmulhq_s16(a, b) simde_vqdmulhq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vqdmulhq_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqdmulhq_s32(a, b); #else return simde_vcombine_s32(simde_vqdmulh_s32(simde_vget_low_s32(a), simde_vget_low_s32(b)), simde_vqdmulh_s32(simde_vget_high_s32(a), simde_vget_high_s32(b))); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqdmulhq_s32 #define vqdmulhq_s32(a, b) simde_vqdmulhq_s32((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_QDMULH_H) */ simde-0.7.2/simde/arm/neon/qdmull.h000066400000000000000000000100701400333146700171060ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ /* Implementation notes (seanptmaher): * * It won't overflow during the multiplication, it'll ever only double * the bit length, we only care about the overflow during the shift, * so do the multiplication, then the shift with saturation */ #if !defined(SIMDE_ARM_NEON_QDMULL_H) #define SIMDE_ARM_NEON_QDMULL_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES int32_t simde_vqdmullh_s16(int16_t a, int16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqdmullh_s16(a, b); #else int32_t mul = (HEDLEY_STATIC_CAST(int32_t, a) * HEDLEY_STATIC_CAST(int32_t, b)); return (labs(mul) & (1 << 30)) ? ((mul < 0) ? INT32_MIN : INT32_MAX) : mul << 1; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqdmullh_s16 #define vqdmullh_s16(a, b) simde_vqdmullh_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int64_t simde_vqdmulls_s32(int32_t a, int32_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqdmulls_s32(a, b); #else int64_t mul = (HEDLEY_STATIC_CAST(int64_t, a) * HEDLEY_STATIC_CAST(int64_t, b)); return ((a > 0 ? a : -a) & (HEDLEY_STATIC_CAST(int64_t, 1) << 62)) ? ((mul < 0) ? INT64_MIN : INT64_MAX) : mul << 1; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqdmulls_s16 #define vqdmulls_s16(a, b) simde_vqdmulls_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vqdmull_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqdmull_s16(a, b); #else simde_int32x4_private r_; simde_int16x4_private a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqdmullh_s16(a_.values[i], b_.values[i]); } return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqdmull_s16 #define vqdmull_s16(a, b) simde_vqdmull_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vqdmull_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqdmull_s32(a, b); #else simde_int64x2_private r_; simde_int32x2_private a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqdmulls_s32(a_.values[i], b_.values[i]); } return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqdmull_s32 #define vqdmull_s32(a, b) simde_vqdmull_s32((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_QDMULL_H) */ simde-0.7.2/simde/arm/neon/qmovn.h000066400000000000000000000201121400333146700167460ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_QMOVN_H) #define SIMDE_ARM_NEON_QMOVN_H #include "types.h" #include "dup_n.h" #include "min.h" #include "max.h" #include "movn.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES int8_t simde_vqmovnh_s16(int16_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqmovnh_s16(a); #else return (a > INT8_MAX) ? INT8_MAX : ((a < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqmovnh_s16 #define vqmovnh_s16(a) simde_vqmovnh_s16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES int16_t simde_vqmovns_s32(int32_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqmovns_s32(a); #else return (a > INT16_MAX) ? INT16_MAX : ((a < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, a)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqmovns_s32 #define vqmovns_s32(a) simde_vqmovns_s32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_vqmovnd_s64(int64_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqmovnd_s64(a); #else return (a > INT32_MAX) ? INT32_MAX : ((a < INT32_MIN) ? INT32_MIN : HEDLEY_STATIC_CAST(int32_t, a)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqmovnd_s64 #define vqmovnd_s64(a) simde_vqmovnd_s64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES uint8_t simde_vqmovnh_u16(uint16_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqmovnh_u16(a); #else return (a > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, a); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqmovnh_u16 #define vqmovnh_u16(a) simde_vqmovnh_u16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES uint16_t simde_vqmovns_u32(uint32_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqmovns_u32(a); #else return (a > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, a); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqmovns_u32 #define vqmovns_u32(a) simde_vqmovns_u32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES uint32_t simde_vqmovnd_u64(uint64_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqmovnd_u64(a); #else return (a > UINT32_MAX) ? UINT32_MAX : HEDLEY_STATIC_CAST(uint32_t, a); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqmovnd_u64 #define vqmovnd_u64(a) simde_vqmovnd_u64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vqmovn_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqmovn_s16(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vmovn_s16(simde_vmaxq_s16(simde_vdupq_n_s16(INT8_MIN), simde_vminq_s16(simde_vdupq_n_s16(INT8_MAX), a))); #else simde_int8x8_private r_; simde_int16x8_private a_ = simde_int16x8_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqmovnh_s16(a_.values[i]); } return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqmovn_s16 #define vqmovn_s16(a) simde_vqmovn_s16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vqmovn_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqmovn_s32(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vmovn_s32(simde_vmaxq_s32(simde_vdupq_n_s32(INT16_MIN), simde_vminq_s32(simde_vdupq_n_s32(INT16_MAX), a))); #else simde_int16x4_private r_; simde_int32x4_private a_ = simde_int32x4_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqmovns_s32(a_.values[i]); } return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqmovn_s32 #define vqmovn_s32(a) simde_vqmovn_s32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vqmovn_s64(simde_int64x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqmovn_s64(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vmovn_s64(simde_x_vmaxq_s64(simde_vdupq_n_s64(INT32_MIN), simde_x_vminq_s64(simde_vdupq_n_s64(INT32_MAX), a))); #else simde_int32x2_private r_; simde_int64x2_private a_ = simde_int64x2_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqmovnd_s64(a_.values[i]); } return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqmovn_s64 #define vqmovn_s64(a) simde_vqmovn_s64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vqmovn_u16(simde_uint16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqmovn_u16(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vmovn_u16(simde_vminq_u16(a, simde_vdupq_n_u16(UINT8_MAX))); #else simde_uint8x8_private r_; simde_uint16x8_private a_ = simde_uint16x8_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqmovnh_u16(a_.values[i]); } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqmovn_u16 #define vqmovn_u16(a) simde_vqmovn_u16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vqmovn_u32(simde_uint32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqmovn_u32(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vmovn_u32(simde_vminq_u32(a, simde_vdupq_n_u32(UINT16_MAX))); #else simde_uint16x4_private r_; simde_uint32x4_private a_ = simde_uint32x4_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqmovns_u32(a_.values[i]); } return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqmovn_u32 #define vqmovn_u32(a) simde_vqmovn_u32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vqmovn_u64(simde_uint64x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqmovn_u64(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vmovn_u64(simde_x_vminq_u64(a, simde_vdupq_n_u64(UINT32_MAX))); #else simde_uint32x2_private r_; simde_uint64x2_private a_ = simde_uint64x2_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqmovnd_u64(a_.values[i]); } return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqmovn_u64 #define vqmovn_u64(a) simde_vqmovn_u64((a)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_QMOVN_H) */ simde-0.7.2/simde/arm/neon/qmovn_high.h000066400000000000000000000076751400333146700177700ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_QMOVN_HIGH_H) #define SIMDE_ARM_NEON_QMOVN_HIGH_H #include "types.h" #include "combine.h" #include "qmovn.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vqmovn_high_s16(simde_int8x8_t r, simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqmovn_high_s16(r, a); #else return simde_vcombine_s8(r, simde_vqmovn_s16(a)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqmovn_high_s16 #define vqmovn_high_s16(r, a) simde_vqmovn_high_s16((r), (a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vqmovn_high_s32(simde_int16x4_t r, simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqmovn_high_s32(r, a); #else return simde_vcombine_s16(r, simde_vqmovn_s32(a)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqmovn_high_s32 #define vqmovn_high_s32(r, a) simde_vqmovn_high_s32((r), (a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vqmovn_high_s64(simde_int32x2_t r, simde_int64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqmovn_high_s64(r, a); #else return simde_vcombine_s32(r, simde_vqmovn_s64(a)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqmovn_high_s64 #define vqmovn_high_s64(r, a) simde_vqmovn_high_s64((r), (a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vqmovn_high_u16(simde_uint8x8_t r, simde_uint16x8_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqmovn_high_u16(r, a); #else return simde_vcombine_u8(r, simde_vqmovn_u16(a)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqmovn_high_u16 #define vqmovn_high_u16(r, a) simde_vqmovn_high_u16((r), (a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vqmovn_high_u32(simde_uint16x4_t r, simde_uint32x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqmovn_high_u32(r, a); #else return simde_vcombine_u16(r, simde_vqmovn_u32(a)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqmovn_high_u32 #define vqmovn_high_u32(r, a) simde_vqmovn_high_u32((r), (a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vqmovn_high_u64(simde_uint32x2_t r, simde_uint64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqmovn_high_u64(r, a); #else return simde_vcombine_u32(r, simde_vqmovn_u64(a)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqmovn_high_u64 #define vqmovn_high_u64(r, a) simde_vqmovn_high_u64((r), (a)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_QMOVN_HIGH_H) */ simde-0.7.2/simde/arm/neon/qmovun.h000066400000000000000000000121141400333146700171360ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_QMOVUN_H) #define SIMDE_ARM_NEON_QMOVUN_H #include "types.h" #include "dup_n.h" #include "min.h" #include "max.h" #include "movn.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES uint8_t simde_vqmovunh_s16(int16_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return HEDLEY_STATIC_CAST(uint8_t, vqmovunh_s16(a)); #else return (a > UINT8_MAX) ? UINT8_MAX : ((a < 0) ? 0 : HEDLEY_STATIC_CAST(uint8_t, a)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqmovunh_s16 #define vqmovunh_s16(a) simde_vqmovunh_s16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES uint16_t simde_vqmovuns_s32(int32_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return HEDLEY_STATIC_CAST(uint16_t, vqmovuns_s32(a)); #else return (a > UINT16_MAX) ? UINT16_MAX : ((a < 0) ? 0 : HEDLEY_STATIC_CAST(uint16_t, a)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqmovuns_s32 #define vqmovuns_s32(a) simde_vqmovuns_s32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES uint32_t simde_vqmovund_s64(int64_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return HEDLEY_STATIC_CAST(uint32_t, vqmovund_s64(a)); #else return (a > UINT32_MAX) ? UINT32_MAX : ((a < 0) ? 0 : HEDLEY_STATIC_CAST(uint32_t, a)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqmovund_s64 #define vqmovund_s64(a) simde_vqmovund_s64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vqmovun_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqmovun_s16(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vmovn_u16(simde_vreinterpretq_u16_s16(simde_vmaxq_s16(simde_vdupq_n_s16(0), simde_vminq_s16(simde_vdupq_n_s16(UINT8_MAX), a)))); #else simde_uint8x8_private r_; simde_int16x8_private a_ = simde_int16x8_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqmovunh_s16(a_.values[i]); } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqmovun_s16 #define vqmovun_s16(a) simde_vqmovun_s16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vqmovun_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqmovun_s32(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vmovn_u32(simde_vreinterpretq_u32_s32(simde_vmaxq_s32(simde_vdupq_n_s32(0), simde_vminq_s32(simde_vdupq_n_s32(UINT16_MAX), a)))); #else simde_uint16x4_private r_; simde_int32x4_private a_ = simde_int32x4_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqmovuns_s32(a_.values[i]); } return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqmovun_s32 #define vqmovun_s32(a) simde_vqmovun_s32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vqmovun_s64(simde_int64x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqmovun_s64(a); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vmovn_u64(simde_vreinterpretq_u64_s64(simde_x_vmaxq_s64(simde_vdupq_n_s64(0), simde_x_vminq_s64(simde_vdupq_n_s64(UINT32_MAX), a)))); #else simde_uint32x2_private r_; simde_int64x2_private a_ = simde_int64x2_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqmovund_s64(a_.values[i]); } return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqmovun_s64 #define vqmovun_s64(a) simde_vqmovun_s64((a)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_QMOVUN_H) */ simde-0.7.2/simde/arm/neon/qneg.h000066400000000000000000000206301400333146700165450ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_QNEG_H) #define SIMDE_ARM_NEON_QNEG_H #include "types.h" #if !defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE) || 1 #include "dup_n.h" #include "max.h" #include "neg.h" #endif HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES int8_t simde_vqnegb_s8(int8_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqnegb_s8(a); #else return a == INT8_MIN ? INT8_MAX : -a; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqnegb_s8 #define vqnegb_s8(a) simde_vqnegb_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES int16_t simde_vqnegh_s16(int16_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqnegh_s16(a); #else return a == INT16_MIN ? INT16_MAX : -a; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqnegh_s16 #define vqnegh_s16(a) simde_vqnegh_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_vqnegs_s32(int32_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqnegs_s32(a); #else return a == INT32_MIN ? INT32_MAX : -a; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqnegs_s32 #define vqnegs_s32(a) simde_vqnegs_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES int64_t simde_vqnegd_s64(int64_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqnegd_s64(a); #else return a == INT64_MIN ? INT64_MAX : -a; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqnegd_s64 #define vqnegd_s64(a) simde_vqnegd_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vqneg_s8(simde_int8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqneg_s8(a); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(64) return simde_vneg_s8(simde_vmax_s8(a, simde_vdup_n_s8(INT8_MIN + 1))); #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] == INT8_MIN) ? INT8_MAX : -(a_.values[i]); } return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqneg_s8 #define vqneg_s8(a) simde_vqneg_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vqneg_s16(simde_int16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqneg_s16(a); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(64) return simde_vneg_s16(simde_vmax_s16(a, simde_vdup_n_s16(INT16_MIN + 1))); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] == INT16_MIN) ? INT16_MAX : -(a_.values[i]); } return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqneg_s16 #define vqneg_s16(a) simde_vqneg_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vqneg_s32(simde_int32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqneg_s32(a); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(64) return simde_vneg_s32(simde_vmax_s32(a, simde_vdup_n_s32(INT32_MIN + 1))); #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] == INT32_MIN) ? INT32_MAX : -(a_.values[i]); } return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqneg_s32 #define vqneg_s32(a) simde_vqneg_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vqneg_s64(simde_int64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqneg_s64(a); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vneg_s64(simde_x_vmax_s64(a, simde_vdup_n_s64(INT64_MIN + 1))); #else simde_int64x1_private r_, a_ = simde_int64x1_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] == INT64_MIN) ? INT64_MAX : -(a_.values[i]); } return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqneg_s64 #define vqneg_s64(a) simde_vqneg_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vqnegq_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqnegq_s8(a); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vnegq_s8(simde_vmaxq_s8(a, simde_vdupq_n_s8(INT8_MIN + 1))); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] == INT8_MIN) ? INT8_MAX : -(a_.values[i]); } return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqnegq_s8 #define vqnegq_s8(a) simde_vqnegq_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vqnegq_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqnegq_s16(a); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vnegq_s16(simde_vmaxq_s16(a, simde_vdupq_n_s16(INT16_MIN + 1))); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] == INT16_MIN) ? INT16_MAX : -(a_.values[i]); } return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqnegq_s16 #define vqnegq_s16(a) simde_vqnegq_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vqnegq_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqnegq_s32(a); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vnegq_s32(simde_vmaxq_s32(a, simde_vdupq_n_s32(INT32_MIN + 1))); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] == INT32_MIN) ? INT32_MAX : -(a_.values[i]); } return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqnegq_s32 #define vqnegq_s32(a) simde_vqnegq_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vqnegq_s64(simde_int64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqnegq_s64(a); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vnegq_s64(simde_x_vmaxq_s64(a, simde_vdupq_n_s64(INT64_MIN + 1))); #else simde_int64x2_private r_, a_ = simde_int64x2_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] == INT64_MIN) ? INT64_MAX : -(a_.values[i]); } return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqnegq_s64 #define vqnegq_s64(a) simde_vqnegq_s64(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_QNEG_H) */ simde-0.7.2/simde/arm/neon/qrdmulh.h000066400000000000000000000122141400333146700172660ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_QRDMULH_H) #define SIMDE_ARM_NEON_QRDMULH_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES int16_t simde_vqrdmulhh_s16(int16_t a, int16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqrdmulhh_s16(a, b); #else return HEDLEY_STATIC_CAST(int16_t, (((1 << 15) + ((HEDLEY_STATIC_CAST(int32_t, (HEDLEY_STATIC_CAST(int32_t, a) * HEDLEY_STATIC_CAST(int32_t, b)))) << 1)) >> 16) & 0xffff); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqrdmulhh_s16 #define vqrdmulhh_s16(a, b) simde_vqrdmulhh_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_vqrdmulhs_s32(int32_t a, int32_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqrdmulhs_s32(a, b); #else return HEDLEY_STATIC_CAST(int32_t, (((HEDLEY_STATIC_CAST(int64_t, 1) << 31) + ((HEDLEY_STATIC_CAST(int64_t, (HEDLEY_STATIC_CAST(int64_t, a) * HEDLEY_STATIC_CAST(int64_t, b)))) << 1)) >> 32) & 0xffffffff); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqrdmulhs_s32 #define vqrdmulhs_s32(a, b) simde_vqrdmulhs_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vqrdmulh_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqrdmulh_s16(a, b); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqrdmulhh_s16(a_.values[i], b_.values[i]); } return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqrdmulh_s16 #define vqrdmulh_s16(a, b) simde_vqrdmulh_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vqrdmulh_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqrdmulh_s32(a, b); #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqrdmulhs_s32(a_.values[i], b_.values[i]); } return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqrdmulh_s32 #define vqrdmulh_s32(a, b) simde_vqrdmulh_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vqrdmulhq_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqrdmulhq_s16(a, b); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqrdmulhh_s16(a_.values[i], b_.values[i]); } return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqrdmulhq_s16 #define vqrdmulhq_s16(a, b) simde_vqrdmulhq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vqrdmulhq_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqrdmulhq_s32(a, b); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqrdmulhs_s32(a_.values[i], b_.values[i]); } return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqrdmulhq_s32 #define vqrdmulhq_s32(a, b) simde_vqrdmulhq_s32((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_QRDMULH_H) */ simde-0.7.2/simde/arm/neon/qrdmulh_n.h000066400000000000000000000077701400333146700176160ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_QRDMULH_N_H) #define SIMDE_ARM_NEON_QRDMULH_N_H #include "types.h" #include "combine.h" #include "qrdmulh.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vqrdmulh_n_s16(simde_int16x4_t a, int16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqrdmulh_n_s16(a, b); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqrdmulhh_s16(a_.values[i], b); } return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqrdmulh_n_s16 #define vqrdmulh_n_s16(a, b) simde_vqrdmulh_n_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vqrdmulh_n_s32(simde_int32x2_t a, int32_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqrdmulh_n_s32(a, b); #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqrdmulhs_s32(a_.values[i], b); } return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqrdmulh_n_s32 #define vqrdmulh_n_s32(a, b) simde_vqrdmulh_n_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vqrdmulhq_n_s16(simde_int16x8_t a, int16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqrdmulhq_n_s16(a, b); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqrdmulhh_s16(a_.values[i], b); } return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqrdmulhq_n_s16 #define vqrdmulhq_n_s16(a, b) simde_vqrdmulhq_n_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vqrdmulhq_n_s32(simde_int32x4_t a, int32_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqrdmulhq_n_s32(a, b); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqrdmulhs_s32(a_.values[i], b); } return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqrdmulhq_n_s32 #define vqrdmulhq_n_s32(a, b) simde_vqrdmulhq_n_s32((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_QRDMULH_H) */ simde-0.7.2/simde/arm/neon/qshl.h000066400000000000000000000453651400333146700165760ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_ARM_NEON_QSHL_H) #define SIMDE_ARM_NEON_QSHL_H #include "types.h" #include "cls.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES int8_t simde_vqshlb_s8(int8_t a, int8_t b) { int8_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vqshlb_s8(a, b); #else if (b < -7) b = -7; if (b <= 0) { r = a >> -b; } else if (b < 7) { r = HEDLEY_STATIC_CAST(int8_t, a << b); if ((r >> b) != a) { r = (a < 0) ? INT8_MIN : INT8_MAX; } } else if (a == 0) { r = 0; } else { r = (a < 0) ? INT8_MIN : INT8_MAX; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqshlb_s8 #define vqshlb_s8(a, b) simde_vqshlb_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int16_t simde_vqshlh_s16(int16_t a, int16_t b) { int16_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vqshlh_s16(a, b); #else int8_t b8 = HEDLEY_STATIC_CAST(int8_t, b); if (b8 < -15) b8 = -15; if (b8 <= 0) { r = a >> -b8; } else if (b8 < 15) { r = HEDLEY_STATIC_CAST(int16_t, a << b8); if ((r >> b8) != a) { r = (a < 0) ? INT16_MIN : INT16_MAX; } } else if (a == 0) { r = 0; } else { r = (a < 0) ? INT16_MIN : INT16_MAX; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqshlh_s16 #define vqshlh_s16(a, b) simde_vqshlh_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_vqshls_s32(int32_t a, int32_t b) { int32_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vqshls_s32(a, b); #else int8_t b8 = HEDLEY_STATIC_CAST(int8_t, b); if (b8 < -31) b8 = -31; if (b8 <= 0) { r = a >> -b8; } else if (b8 < 31) { r = HEDLEY_STATIC_CAST(int32_t, a << b8); if ((r >> b8) != a) { r = (a < 0) ? INT32_MIN : INT32_MAX; } } else if (a == 0) { r = 0; } else { r = (a < 0) ? INT32_MIN : INT32_MAX; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqshls_s32 #define vqshls_s32(a, b) simde_vqshls_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int64_t simde_vqshld_s64(int64_t a, int64_t b) { int64_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vqshld_s64(a, b); #else int8_t b8 = HEDLEY_STATIC_CAST(int8_t, b); if (b8 < -63) b8 = -63; if (b8 <= 0) { r = a >> -b8; } else if (b8 < 63) { r = HEDLEY_STATIC_CAST(int64_t, a << b8); if ((r >> b8) != a) { r = (a < 0) ? INT64_MIN : INT64_MAX; } } else if (a == 0) { r = 0; } else { r = (a < 0) ? INT64_MIN : INT64_MAX; } #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqshld_s64 #define vqshld_s64(a, b) simde_vqshld_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES uint8_t simde_vqshlb_u8(uint8_t a, int8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(11,0,0) return vqshlb_u8(a, HEDLEY_STATIC_CAST(uint8_t, b)); #elif HEDLEY_HAS_WARNING("-Wsign-conversion") /* https://github.com/llvm/llvm-project/commit/f0a78bdfdc6d56b25e0081884580b3960a3c2429 */ HEDLEY_DIAGNOSTIC_PUSH #pragma clang diagnostic ignored "-Wsign-conversion" return vqshlb_u8(a, b); HEDLEY_DIAGNOSTIC_POP #else return vqshlb_u8(a, b); #endif #else uint8_t r; if (b < -7) b = -7; if (b <= 0) { r = a >> -b; } else if (b < 7) { r = HEDLEY_STATIC_CAST(uint8_t, a << b); if ((r >> b) != a) { r = UINT8_MAX; } } else if (a == 0) { r = 0; } else { r = UINT8_MAX; } return r; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqshlb_u8 #define vqshlb_u8(a, b) simde_vqshlb_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES uint16_t simde_vqshlh_u16(uint16_t a, int16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(11,0,0) return vqshlh_u16(a, HEDLEY_STATIC_CAST(uint16_t, b)); #elif HEDLEY_HAS_WARNING("-Wsign-conversion") HEDLEY_DIAGNOSTIC_PUSH #pragma clang diagnostic ignored "-Wsign-conversion" return vqshlh_u16(a, b); HEDLEY_DIAGNOSTIC_POP #else return vqshlh_u16(a, b); #endif #else uint16_t r; if (b < -15) b = -15; if (b <= 0) { r = a >> -b; } else if (b < 15) { r = HEDLEY_STATIC_CAST(uint16_t, a << b); if ((r >> b) != a) { r = UINT16_MAX; } } else if (a == 0) { r = 0; } else { r = UINT16_MAX; } return r; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqshlh_u16 #define vqshlh_u16(a, b) simde_vqshlh_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES uint32_t simde_vqshls_u32(uint32_t a, int32_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(11,0,0) return vqshls_u32(a, HEDLEY_STATIC_CAST(uint16_t, b)); #elif HEDLEY_HAS_WARNING("-Wsign-conversion") HEDLEY_DIAGNOSTIC_PUSH #pragma clang diagnostic ignored "-Wsign-conversion" return vqshls_u32(a, b); HEDLEY_DIAGNOSTIC_POP #else return vqshls_u32(a, b); #endif #else uint32_t r; if (b < -31) b = -31; if (b <= 0) { r = HEDLEY_STATIC_CAST(uint32_t, a >> -b); } else if (b < 31) { r = a << b; if ((r >> b) != a) { r = UINT32_MAX; } } else if (a == 0) { r = 0; } else { r = UINT32_MAX; } return r; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqshls_u32 #define vqshls_u32(a, b) simde_vqshls_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES uint64_t simde_vqshld_u64(uint64_t a, int64_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(11,0,0) return vqshld_u64(a, HEDLEY_STATIC_CAST(uint16_t, b)); #elif HEDLEY_HAS_WARNING("-Wsign-conversion") HEDLEY_DIAGNOSTIC_PUSH #pragma clang diagnostic ignored "-Wsign-conversion" return vqshld_u64(a, b); HEDLEY_DIAGNOSTIC_POP #else return vqshld_u64(a, b); #endif #else uint64_t r; if (b < -63) b = -63; if (b <= 0) { r = a >> -b; } else if (b < 63) { r = HEDLEY_STATIC_CAST(uint64_t, a << b); if ((r >> b) != a) { r = UINT64_MAX; } } else if (a == 0) { r = 0; } else { r = UINT64_MAX; } return r; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqshldb_u64 #define vqshld_u64(a, b) simde_vqshld_u64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vqshl_s8 (const simde_int8x8_t a, const simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqshl_s8(a, b); #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqshlb_s8(a_.values[i], b_.values[i]); } return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqshl_s8 #define vqshl_s8(a, b) simde_vqshl_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vqshl_s16 (const simde_int16x4_t a, const simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqshl_s16(a, b); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqshlh_s16(a_.values[i], b_.values[i]); } return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqshl_s16 #define vqshl_s16(a, b) simde_vqshl_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vqshl_s32 (const simde_int32x2_t a, const simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqshl_s32(a, b); #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqshls_s32(a_.values[i], b_.values[i]); } return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqshl_s32 #define vqshl_s32(a, b) simde_vqshl_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vqshl_s64 (const simde_int64x1_t a, const simde_int64x1_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqshl_s64(a, b); #else simde_int64x1_private r_, a_ = simde_int64x1_to_private(a), b_ = simde_int64x1_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqshld_s64(a_.values[i], b_.values[i]); } return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqshl_s64 #define vqshl_s64(a, b) simde_vqshl_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vqshl_u8 (const simde_uint8x8_t a, const simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqshl_u8(a, b); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a); simde_int8x8_private b_ = simde_int8x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqshlb_u8(a_.values[i], b_.values[i]); } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqshl_u8 #define vqshl_u8(a, b) simde_vqshl_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vqshl_u16 (const simde_uint16x4_t a, const simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqshl_u16(a, b); #else simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a); simde_int16x4_private b_ = simde_int16x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqshlh_u16(a_.values[i], b_.values[i]); } return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqshl_u16 #define vqshl_u16(a, b) simde_vqshl_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vqshl_u32 (const simde_uint32x2_t a, const simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqshl_u32(a, b); #else simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a); simde_int32x2_private b_ = simde_int32x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqshls_u32(a_.values[i], b_.values[i]); } return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqshl_u32 #define vqshl_u32(a, b) simde_vqshl_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vqshl_u64 (const simde_uint64x1_t a, const simde_int64x1_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqshl_u64(a, b); #else simde_uint64x1_private r_, a_ = simde_uint64x1_to_private(a); simde_int64x1_private b_ = simde_int64x1_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqshld_u64(a_.values[i], b_.values[i]); } return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqshl_u64 #define vqshl_u64(a, b) simde_vqshl_u64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vqshlq_s8 (const simde_int8x16_t a, const simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqshlq_s8(a, b); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqshlb_s8(a_.values[i], b_.values[i]); } return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqshlq_s8 #define vqshlq_s8(a, b) simde_vqshlq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vqshlq_s16 (const simde_int16x8_t a, const simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqshlq_s16(a, b); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqshlh_s16(a_.values[i], b_.values[i]); } return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqshlq_s16 #define vqshlq_s16(a, b) simde_vqshlq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vqshlq_s32 (const simde_int32x4_t a, const simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqshlq_s32(a, b); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqshls_s32(a_.values[i], b_.values[i]); } return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqshlq_s32 #define vqshlq_s32(a, b) simde_vqshlq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vqshlq_s64 (const simde_int64x2_t a, const simde_int64x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqshlq_s64(a, b); #else simde_int64x2_private r_, a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqshld_s64(a_.values[i], b_.values[i]); } return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqshlq_s64 #define vqshlq_s64(a, b) simde_vqshlq_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vqshlq_u8 (const simde_uint8x16_t a, const simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqshlq_u8(a, b); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a); simde_int8x16_private b_ = simde_int8x16_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqshlb_u8(a_.values[i], b_.values[i]); } return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqshlq_u8 #define vqshlq_u8(a, b) simde_vqshlq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vqshlq_u16 (const simde_uint16x8_t a, const simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqshlq_u16(a, b); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a); simde_int16x8_private b_ = simde_int16x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqshlh_u16(a_.values[i], b_.values[i]); } return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqshlq_u16 #define vqshlq_u16(a, b) simde_vqshlq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vqshlq_u32 (const simde_uint32x4_t a, const simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqshlq_u32(a, b); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a); simde_int32x4_private b_ = simde_int32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqshls_u32(a_.values[i], b_.values[i]); } return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqshlq_u32 #define vqshlq_u32(a, b) simde_vqshlq_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vqshlq_u64 (const simde_uint64x2_t a, const simde_int64x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqshlq_u64(a, b); #else simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a); simde_int64x2_private b_ = simde_int64x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqshld_u64(a_.values[i], b_.values[i]); } return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqshlq_u64 #define vqshlq_u64(a, b) simde_vqshlq_u64((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_QSHL_H) */ simde-0.7.2/simde/arm/neon/qsub.h000066400000000000000000000363231400333146700165730ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_QSUB_H) #define SIMDE_ARM_NEON_QSUB_H #include "types.h" #include "sub.h" #include "bsl.h" #include "cgt.h" #include "dup_n.h" #include "sub.h" #include HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES int8_t simde_vqsubb_s8(int8_t a, int8_t b) { return simde_math_subs_i8(a, b); } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqsubb_s8 #define vqsubb_s8(a, b) simde_vqsubb_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int16_t simde_vqsubh_s16(int16_t a, int16_t b) { return simde_math_subs_i16(a, b); } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqsubh_s16 #define vqsubh_s16(a, b) simde_vqsubh_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_vqsubs_s32(int32_t a, int32_t b) { return simde_math_subs_i32(a, b); } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqsubs_s32 #define vqsubs_s32(a, b) simde_vqsubs_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int64_t simde_vqsubd_s64(int64_t a, int64_t b) { return simde_math_subs_i64(a, b); } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqsubd_s64 #define vqsubd_s64(a, b) simde_vqsubd_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES uint8_t simde_vqsubb_u8(uint8_t a, uint8_t b) { return simde_math_subs_u8(a, b); } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqsubb_u8 #define vqsubb_u8(a, b) simde_vqsubb_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES uint16_t simde_vqsubh_u16(uint16_t a, uint16_t b) { return simde_math_subs_u16(a, b); } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqsubh_u16 #define vqsubh_u16(a, b) simde_vqsubh_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES uint32_t simde_vqsubs_u32(uint32_t a, uint32_t b) { return simde_math_subs_u32(a, b); } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqsubs_u32 #define vqsubs_u32(a, b) simde_vqsubs_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES uint64_t simde_vqsubd_u64(uint64_t a, uint64_t b) { return simde_math_subs_u64(a, b); } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqsubd_u64 #define vqsubd_u64(a, b) simde_vqsubd_u64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vqsub_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqsub_s8(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_subs_pi8(a, b); #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqsubb_s8(a_.values[i], b_.values[i]); } return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqsub_s8 #define vqsub_s8(a, b) simde_vqsub_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vqsub_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqsub_s16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_subs_pi16(a, b); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqsubh_s16(a_.values[i], b_.values[i]); } return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqsub_s16 #define vqsub_s16(a, b) simde_vqsub_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vqsub_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqsub_s32(a, b); #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqsubs_s32(a_.values[i], b_.values[i]); } return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqsub_s32 #define vqsub_s32(a, b) simde_vqsub_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vqsub_s64(simde_int64x1_t a, simde_int64x1_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqsub_s64(a, b); #else simde_int64x1_private r_, a_ = simde_int64x1_to_private(a), b_ = simde_int64x1_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqsubd_s64(a_.values[i], b_.values[i]); } return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqsub_s64 #define vqsub_s64(a, b) simde_vqsub_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vqsub_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqsub_u8(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_subs_pu8(a, b); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqsubb_u8(a_.values[i], b_.values[i]); } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqsub_u8 #define vqsub_u8(a, b) simde_vqsub_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vqsub_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqsub_u16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_subs_pu16(a, b); #else simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a), b_ = simde_uint16x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqsubh_u16(a_.values[i], b_.values[i]); } return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqsub_u16 #define vqsub_u16(a, b) simde_vqsub_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vqsub_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqsub_u32(a, b); #else simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a), b_ = simde_uint32x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqsubs_u32(a_.values[i], b_.values[i]); } return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqsub_u32 #define vqsub_u32(a, b) simde_vqsub_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vqsub_u64(simde_uint64x1_t a, simde_uint64x1_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqsub_u64(a, b); #else simde_uint64x1_private r_, a_ = simde_uint64x1_to_private(a), b_ = simde_uint64x1_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqsubd_u64(a_.values[i], b_.values[i]); } return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqsub_u64 #define vqsub_u64(a, b) simde_vqsub_u64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vqsubq_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqsubq_s8(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i8x16_sub_saturate(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_subs_epi8(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6) return vec_subs(a, b); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqsubb_s8(a_.values[i], b_.values[i]); } return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqsubq_s8 #define vqsubq_s8(a, b) simde_vqsubq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vqsubq_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqsubq_s16(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i16x8_sub_saturate(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_subs_epi16(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6) return vec_subs(a, b); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqsubh_s16(a_.values[i], b_.values[i]); } return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqsubq_s16 #define vqsubq_s16(a, b) simde_vqsubq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vqsubq_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqsubq_s32(a, b); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_cvtsepi64_epi32(_mm256_sub_epi64(_mm256_cvtepi32_epi64(a), _mm256_cvtepi32_epi64(b))); #elif defined(SIMDE_POWER_ALTIVEC_P6) return vec_subs(a, b); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqsubs_s32(a_.values[i], b_.values[i]); } return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqsubq_s32 #define vqsubq_s32(a, b) simde_vqsubq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vqsubq_s64(simde_int64x2_t a, simde_int64x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqsubq_s64(a, b); #else simde_int64x2_private r_, a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqsubd_s64(a_.values[i], b_.values[i]); } return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqsubq_s64 #define vqsubq_s64(a, b) simde_vqsubq_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vqsubq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqsubq_u8(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_u8x16_sub_saturate(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_subs_epu8(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6) return vec_subs(a, b); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqsubb_u8(a_.values[i], b_.values[i]); } return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqsubq_u8 #define vqsubq_u8(a, b) simde_vqsubq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vqsubq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqsubq_u16(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_u16x8_sub_saturate(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_subs_epu16(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6) return vec_subs(a, b); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqsubh_u16(a_.values[i], b_.values[i]); } return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqsubq_u16 #define vqsubq_u16(a, b) simde_vqsubq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vqsubq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqsubq_u32(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6) return vec_subs(a, b); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqsubs_u32(a_.values[i], b_.values[i]); } return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqsubq_u32 #define vqsubq_u32(a, b) simde_vqsubq_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vqsubq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqsubq_u64(a, b); #else simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a), b_ = simde_uint64x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vqsubd_u64(a_.values[i], b_.values[i]); } return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqsubq_u64 #define vqsubq_u64(a, b) simde_vqsubq_u64((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_QSUB_H) */ simde-0.7.2/simde/arm/neon/qtbl.h000066400000000000000000000421471400333146700165640ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_ARM_NEON_QTBL_H) #define SIMDE_ARM_NEON_QTBL_H #include "reinterpret.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vqtbl1_u8(simde_uint8x16_t t, simde_uint8x8_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbl1_u8(t, idx); #elif defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) __m128i idx128 = _mm_set1_epi64(idx); __m128i r128 = _mm_shuffle_epi8(t, _mm_or_si128(idx128, _mm_cmpgt_epi8(idx128, _mm_set1_epi8(15)))); return _mm_movepi64_pi64(r128); #else simde_uint8x16_private t_ = simde_uint8x16_to_private(t); simde_uint8x8_private r_, idx_ = simde_uint8x8_to_private(idx); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (idx_.values[i] < 16) ? t_.values[idx_.values[i]] : 0; } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbl1_u8 #define vqtbl1_u8(t, idx) simde_vqtbl1_u8((t), (idx)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vqtbl1_s8(simde_int8x16_t t, simde_uint8x8_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbl1_s8(t, idx); #else return simde_vreinterpret_s8_u8(simde_vqtbl1_u8(simde_vreinterpretq_u8_s8(t), idx)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbl1_s8 #define vqtbl1_s8(t, idx) simde_vqtbl1_s8((t), (idx)) #endif #if !defined(SIMDE_BUG_INTEL_857088) SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vqtbl2_u8(simde_uint8x16x2_t t, simde_uint8x8_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbl2_u8(t, idx); #elif defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) __m128i idx128 = _mm_set1_epi64(idx); idx128 = _mm_or_si128(idx128, _mm_cmpgt_epi8(idx128, _mm_set1_epi8(31))); __m128i r128_0 = _mm_shuffle_epi8(t.val[0], idx128); __m128i r128_1 = _mm_shuffle_epi8(t.val[1], idx128); __m128i r128 = _mm_blendv_epi8(r128_0, r128_1, _mm_slli_epi32(idx128, 3)); return _mm_movepi64_pi64(r128); #else simde_uint8x16_private t_[2] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]) }; simde_uint8x8_private r_, idx_ = simde_uint8x8_to_private(idx); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (idx_.values[i] < 32) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : 0; } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbl2_u8 #define vqtbl2_u8(t, idx) simde_vqtbl2_u8((t), (idx)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vqtbl2_s8(simde_int8x16x2_t t, simde_uint8x8_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbl2_s8(t, idx); #else simde_uint8x16x2_t t_; simde_memcpy(&t_, &t, sizeof(t_)); return simde_vreinterpret_s8_u8(simde_vqtbl2_u8(t_, idx)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbl2_s8 #define vqtbl2_s8(t, idx) simde_vqtbl2_s8((t), (idx)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vqtbl3_u8(simde_uint8x16x3_t t, simde_uint8x8_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbl3_u8(t, idx); #elif defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) __m128i idx128 = _mm_set1_epi64(idx); idx128 = _mm_or_si128(idx128, _mm_cmpgt_epi8(idx128, _mm_set1_epi8(47))); __m128i r128_0 = _mm_shuffle_epi8(t.val[0], idx128); __m128i r128_1 = _mm_shuffle_epi8(t.val[1], idx128); __m128i r128_01 = _mm_blendv_epi8(r128_0, r128_1, _mm_slli_epi32(idx128, 3)); __m128i r128_2 = _mm_shuffle_epi8(t.val[2], idx128); __m128i r128 = _mm_blendv_epi8(r128_01, r128_2, _mm_slli_epi32(idx128, 2)); return _mm_movepi64_pi64(r128); #else simde_uint8x16_private t_[3] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]), simde_uint8x16_to_private(t.val[2]) }; simde_uint8x8_private r_, idx_ = simde_uint8x8_to_private(idx); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (idx_.values[i] < 48) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : 0; } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbl3_u8 #define vqtbl3_u8(t, idx) simde_vqtbl3_u8((t), (idx)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vqtbl3_s8(simde_int8x16x3_t t, simde_uint8x8_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbl3_s8(t, idx); #else simde_uint8x16x3_t t_; simde_memcpy(&t_, &t, sizeof(t_)); return simde_vreinterpret_s8_u8(simde_vqtbl3_u8(t_, idx)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbl3_s8 #define vqtbl3_s8(t, idx) simde_vqtbl3_s8((t), (idx)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vqtbl4_u8(simde_uint8x16x4_t t, simde_uint8x8_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbl4_u8(t, idx); #elif defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) __m128i idx128 = _mm_set1_epi64(idx); idx128 = _mm_or_si128(idx128, _mm_cmpgt_epi8(idx128, _mm_set1_epi8(63))); __m128i idx128_shl3 = _mm_slli_epi32(idx128, 3); __m128i r128_0 = _mm_shuffle_epi8(t.val[0], idx128); __m128i r128_1 = _mm_shuffle_epi8(t.val[1], idx128); __m128i r128_01 = _mm_blendv_epi8(r128_0, r128_1, idx128_shl3); __m128i r128_2 = _mm_shuffle_epi8(t.val[2], idx128); __m128i r128_3 = _mm_shuffle_epi8(t.val[3], idx128); __m128i r128_23 = _mm_blendv_epi8(r128_2, r128_3, idx128_shl3); __m128i r128 = _mm_blendv_epi8(r128_01, r128_23, _mm_slli_epi32(idx128, 2)); return _mm_movepi64_pi64(r128); #else simde_uint8x16_private t_[4] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]), simde_uint8x16_to_private(t.val[2]), simde_uint8x16_to_private(t.val[3]) }; simde_uint8x8_private r_, idx_ = simde_uint8x8_to_private(idx); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (idx_.values[i] < 64) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : 0; } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbl4_u8 #define vqtbl4_u8(t, idx) simde_vqtbl4_u8((t), (idx)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vqtbl4_s8(simde_int8x16x4_t t, simde_uint8x8_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbl4_s8(t, idx); #else simde_uint8x16x4_t t_; simde_memcpy(&t_, &t, sizeof(t_)); return simde_vreinterpret_s8_u8(simde_vqtbl4_u8(t_, idx)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbl4_s8 #define vqtbl4_s8(t, idx) simde_vqtbl4_s8((t), (idx)) #endif #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vqtbl1q_u8(simde_uint8x16_t t, simde_uint8x16_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbl1q_u8(t, idx); #elif defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_shuffle_epi8(t, _mm_or_si128(idx, _mm_cmpgt_epi8(idx, _mm_set1_epi8(15)))); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_and(vec_perm(t, t, idx), vec_cmplt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 16)))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v8x16_swizzle(t, idx); #else simde_uint8x16_private t_ = simde_uint8x16_to_private(t); simde_uint8x16_private r_, idx_ = simde_uint8x16_to_private(idx); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (idx_.values[i] < 16) ? t_.values[idx_.values[i]] : 0; } return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbl1q_u8 #define vqtbl1q_u8(t, idx) simde_vqtbl1q_u8((t), (idx)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vqtbl1q_s8(simde_int8x16_t t, simde_uint8x16_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbl1q_s8(t, idx); #else return simde_vreinterpretq_s8_u8(simde_vqtbl1q_u8(simde_vreinterpretq_u8_s8(t), idx)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbl1q_s8 #define vqtbl1q_s8(t, idx) simde_vqtbl1q_s8((t), (idx)) #endif #if !defined(SIMDE_BUG_INTEL_857088) SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vqtbl2q_u8(simde_uint8x16x2_t t, simde_uint8x16_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbl2q_u8(t, idx); #elif defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) idx = _mm_or_si128(idx, _mm_cmpgt_epi8(idx, _mm_set1_epi8(31))); __m128i r_0 = _mm_shuffle_epi8(t.val[0], idx); __m128i r_1 = _mm_shuffle_epi8(t.val[1], idx); return _mm_blendv_epi8(r_0, r_1, _mm_slli_epi32(idx, 3)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_and(vec_perm(t.val[0], t.val[1], idx), vec_cmplt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 32)))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_or(wasm_v8x16_swizzle(t.val[0], idx), wasm_v8x16_swizzle(t.val[1], wasm_i8x16_sub(idx, wasm_i8x16_splat(16)))); #else simde_uint8x16_private t_[2] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]) }; simde_uint8x16_private r_, idx_ = simde_uint8x16_to_private(idx); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (idx_.values[i] < 32) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : 0; } return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbl2q_u8 #define vqtbl2q_u8(t, idx) simde_vqtbl2q_u8((t), (idx)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vqtbl2q_s8(simde_int8x16x2_t t, simde_uint8x16_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbl2q_s8(t, idx); #else simde_uint8x16x2_t t_; simde_memcpy(&t_, &t, sizeof(t_)); return simde_vreinterpretq_s8_u8(simde_vqtbl2q_u8(t_, idx)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbl2q_s8 #define vqtbl2q_s8(t, idx) simde_vqtbl2q_s8((t), (idx)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vqtbl3q_u8(simde_uint8x16x3_t t, simde_uint8x16_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbl3q_u8(t, idx); #elif defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) idx = _mm_or_si128(idx, _mm_cmpgt_epi8(idx, _mm_set1_epi8(47))); __m128i r_0 = _mm_shuffle_epi8(t.val[0], idx); __m128i r_1 = _mm_shuffle_epi8(t.val[1], idx); __m128i r_01 = _mm_blendv_epi8(r_0, r_1, _mm_slli_epi32(idx, 3)); __m128i r_2 = _mm_shuffle_epi8(t.val[2], idx); return _mm_blendv_epi8(r_01, r_2, _mm_slli_epi32(idx, 2)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) r_01 = vec_perm(t.val[0], t.val[1], idx); SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) r_2 = vec_perm(t.val[2], t.val[2], idx); return vec_and(vec_sel(r_01, r_2, vec_cmpgt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 31)))), vec_cmplt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 48)))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_or(wasm_v128_or(wasm_v8x16_swizzle(t.val[0], idx), wasm_v8x16_swizzle(t.val[1], wasm_i8x16_sub(idx, wasm_i8x16_splat(16)))), wasm_v8x16_swizzle(t.val[2], wasm_i8x16_sub(idx, wasm_i8x16_splat(32)))); #else simde_uint8x16_private t_[3] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]), simde_uint8x16_to_private(t.val[2]) }; simde_uint8x16_private r_, idx_ = simde_uint8x16_to_private(idx); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (idx_.values[i] < 48) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : 0; } return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbl3q_u8 #define vqtbl3q_u8(t, idx) simde_vqtbl3q_u8((t), (idx)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vqtbl3q_s8(simde_int8x16x3_t t, simde_uint8x16_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbl3q_s8(t, idx); #else simde_uint8x16x3_t t_; simde_memcpy(&t_, &t, sizeof(t_)); return simde_vreinterpretq_s8_u8(simde_vqtbl3q_u8(t_, idx)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbl3q_s8 #define vqtbl3q_s8(t, idx) simde_vqtbl3q_s8((t), (idx)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vqtbl4q_u8(simde_uint8x16x4_t t, simde_uint8x16_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbl4q_u8(t, idx); #elif defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) idx = _mm_or_si128(idx, _mm_cmpgt_epi8(idx, _mm_set1_epi8(63))); __m128i idx_shl3 = _mm_slli_epi32(idx, 3); __m128i r_0 = _mm_shuffle_epi8(t.val[0], idx); __m128i r_1 = _mm_shuffle_epi8(t.val[1], idx); __m128i r_01 = _mm_blendv_epi8(r_0, r_1, idx_shl3); __m128i r_2 = _mm_shuffle_epi8(t.val[2], idx); __m128i r_3 = _mm_shuffle_epi8(t.val[3], idx); __m128i r_23 = _mm_blendv_epi8(r_2, r_3, idx_shl3); return _mm_blendv_epi8(r_01, r_23, _mm_slli_epi32(idx, 2)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) r_01 = vec_perm(t.val[0], t.val[1], idx); SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) r_23 = vec_perm(t.val[2], t.val[3], idx); return vec_and(vec_sel(r_01, r_23, vec_cmpgt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 31)))), vec_cmplt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 64)))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_or(wasm_v128_or(wasm_v8x16_swizzle(t.val[0], idx), wasm_v8x16_swizzle(t.val[1], wasm_i8x16_sub(idx, wasm_i8x16_splat(16)))), wasm_v128_or(wasm_v8x16_swizzle(t.val[2], wasm_i8x16_sub(idx, wasm_i8x16_splat(32))), wasm_v8x16_swizzle(t.val[3], wasm_i8x16_sub(idx, wasm_i8x16_splat(48))))); #else simde_uint8x16_private t_[4] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]), simde_uint8x16_to_private(t.val[2]), simde_uint8x16_to_private(t.val[3]) }; simde_uint8x16_private r_, idx_ = simde_uint8x16_to_private(idx); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (idx_.values[i] < 64) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : 0; } return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbl4q_u8 #define vqtbl4q_u8(t, idx) simde_vqtbl4q_u8((t), (idx)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vqtbl4q_s8(simde_int8x16x4_t t, simde_uint8x16_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbl4q_s8(t, idx); #else simde_uint8x16x4_t t_; simde_memcpy(&t_, &t, sizeof(t_)); return simde_vreinterpretq_s8_u8(simde_vqtbl4q_u8(t_, idx)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbl4q_s8 #define vqtbl4q_s8(t, idx) simde_vqtbl4q_s8((t), (idx)) #endif #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_QTBL_H) */ simde-0.7.2/simde/arm/neon/qtbx.h000066400000000000000000000455061400333146700166020ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_ARM_NEON_QTBX_H) #define SIMDE_ARM_NEON_QTBX_H #include "reinterpret.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vqtbx1_u8(simde_uint8x8_t a, simde_uint8x16_t t, simde_uint8x8_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbx1_u8(a, t, idx); #elif defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) __m128i idx128 = _mm_set1_epi64(idx); idx128 = _mm_or_si128(idx128, _mm_cmpgt_epi8(idx128, _mm_set1_epi8(15))); __m128i r128 = _mm_shuffle_epi8(t, idx128); r128 = _mm_blendv_epi8(r128, _mm_set1_epi64(a), idx128); return _mm_movepi64_pi64(r128); #else simde_uint8x16_private t_ = simde_uint8x16_to_private(t); simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), idx_ = simde_uint8x8_to_private(idx); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (idx_.values[i] < 16) ? t_.values[idx_.values[i]] : a_.values[i]; } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbx1_u8 #define vqtbx1_u8(a, t, idx) simde_vqtbx1_u8((a), (t), (idx)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vqtbx1_s8(simde_int8x8_t a, simde_int8x16_t t, simde_uint8x8_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbx1_s8(a, t, idx); #else return simde_vreinterpret_s8_u8(simde_vqtbx1_u8(simde_vreinterpret_u8_s8(a), simde_vreinterpretq_u8_s8(t), idx)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbx1_s8 #define vqtbx1_s8(a, t, idx) simde_vqtbx1_s8((a), (t), (idx)) #endif #if !defined(SIMDE_BUG_INTEL_857088) SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vqtbx2_u8(simde_uint8x8_t a, simde_uint8x16x2_t t, simde_uint8x8_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbx2_u8(a, t, idx); #elif defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) __m128i idx128 = _mm_set1_epi64(idx); idx128 = _mm_or_si128(idx128, _mm_cmpgt_epi8(idx128, _mm_set1_epi8(31))); __m128i r128_0 = _mm_shuffle_epi8(t.val[0], idx128); __m128i r128_1 = _mm_shuffle_epi8(t.val[1], idx128); __m128i r128 = _mm_blendv_epi8(r128_0, r128_1, _mm_slli_epi32(idx128, 3)); r128 = _mm_blendv_epi8(r128, _mm_set1_epi64(a), idx128); return _mm_movepi64_pi64(r128); #else simde_uint8x16_private t_[2] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]) }; simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), idx_ = simde_uint8x8_to_private(idx); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (idx_.values[i] < 32) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : a_.values[i]; } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbx2_u8 #define vqtbx2_u8(a, t, idx) simde_vqtbx2_u8((a), (t), (idx)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vqtbx2_s8(simde_int8x8_t a, simde_int8x16x2_t t, simde_uint8x8_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbx2_s8(a, t, idx); #else simde_uint8x16x2_t t_; simde_memcpy(&t_, &t, sizeof(t_)); return simde_vreinterpret_s8_u8(simde_vqtbx2_u8(simde_vreinterpret_u8_s8(a), t_, idx)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbx2_s8 #define vqtbx2_s8(a, t, idx) simde_vqtbx2_s8((a), (t), (idx)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vqtbx3_u8(simde_uint8x8_t a, simde_uint8x16x3_t t, simde_uint8x8_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbx3_u8(a, t, idx); #elif defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) __m128i idx128 = _mm_set1_epi64(idx); idx128 = _mm_or_si128(idx128, _mm_cmpgt_epi8(idx128, _mm_set1_epi8(47))); __m128i r128_0 = _mm_shuffle_epi8(t.val[0], idx128); __m128i r128_1 = _mm_shuffle_epi8(t.val[1], idx128); __m128i r128_01 = _mm_blendv_epi8(r128_0, r128_1, _mm_slli_epi32(idx128, 3)); __m128i r128_2 = _mm_shuffle_epi8(t.val[2], idx128); __m128i r128 = _mm_blendv_epi8(r128_01, r128_2, _mm_slli_epi32(idx128, 2)); r128 = _mm_blendv_epi8(r128, _mm_set1_epi64(a), idx128); return _mm_movepi64_pi64(r128); #else simde_uint8x16_private t_[3] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]), simde_uint8x16_to_private(t.val[2]) }; simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), idx_ = simde_uint8x8_to_private(idx); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (idx_.values[i] < 48) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : a_.values[i]; } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbx3_u8 #define vqtbx3_u8(a, t, idx) simde_vqtbx3_u8((a), (t), (idx)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vqtbx3_s8(simde_int8x8_t a, simde_int8x16x3_t t, simde_uint8x8_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbx3_s8(a, t, idx); #else simde_uint8x16x3_t t_; simde_memcpy(&t_, &t, sizeof(t_)); return simde_vreinterpret_s8_u8(simde_vqtbx3_u8(simde_vreinterpret_u8_s8(a), t_, idx)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbx3_s8 #define vqtbx3_s8(a, t, idx) simde_vqtbx3_s8((a), (t), (idx)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vqtbx4_u8(simde_uint8x8_t a, simde_uint8x16x4_t t, simde_uint8x8_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbx4_u8(a, t, idx); #elif defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) __m128i idx128 = _mm_set1_epi64(idx); idx128 = _mm_or_si128(idx128, _mm_cmpgt_epi8(idx128, _mm_set1_epi8(63))); __m128i idx128_shl3 = _mm_slli_epi32(idx128, 3); __m128i r128_0 = _mm_shuffle_epi8(t.val[0], idx128); __m128i r128_1 = _mm_shuffle_epi8(t.val[1], idx128); __m128i r128_01 = _mm_blendv_epi8(r128_0, r128_1, idx128_shl3); __m128i r128_2 = _mm_shuffle_epi8(t.val[2], idx128); __m128i r128_3 = _mm_shuffle_epi8(t.val[3], idx128); __m128i r128_23 = _mm_blendv_epi8(r128_2, r128_3, idx128_shl3); __m128i r128 = _mm_blendv_epi8(r128_01, r128_23, _mm_slli_epi32(idx128, 2)); r128 = _mm_blendv_epi8(r128, _mm_set1_epi64(a), idx128); return _mm_movepi64_pi64(r128); #else simde_uint8x16_private t_[4] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]), simde_uint8x16_to_private(t.val[2]), simde_uint8x16_to_private(t.val[3]) }; simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), idx_ = simde_uint8x8_to_private(idx); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (idx_.values[i] < 64) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : a_.values[i]; } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbx4_u8 #define vqtbx4_u8(a, t, idx) simde_vqtbx4_u8((a), (t), (idx)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vqtbx4_s8(simde_int8x8_t a, simde_int8x16x4_t t, simde_uint8x8_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbx4_s8(a, t, idx); #else simde_uint8x16x4_t t_; simde_memcpy(&t_, &t, sizeof(t_)); return simde_vreinterpret_s8_u8(simde_vqtbx4_u8(simde_vreinterpret_u8_s8(a), t_, idx)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbx4_s8 #define vqtbx4_s8(a, t, idx) simde_vqtbx4_s8((a), (t), (idx)) #endif #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vqtbx1q_u8(simde_uint8x16_t a, simde_uint8x16_t t, simde_uint8x16_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbx1q_u8(a, t, idx); #elif defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) idx = _mm_or_si128(idx, _mm_cmpgt_epi8(idx, _mm_set1_epi8(15))); return _mm_blendv_epi8(_mm_shuffle_epi8(t, idx), a, idx); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_sel(a, vec_perm(t, t, idx), vec_cmplt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 16)))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_or(wasm_v8x16_swizzle(t, idx), wasm_v128_and(a, wasm_u8x16_gt(idx, wasm_i8x16_splat(15)))); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a), t_ = simde_uint8x16_to_private(t), idx_ = simde_uint8x16_to_private(idx); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (idx_.values[i] < 16) ? t_.values[idx_.values[i]] : a_.values[i]; } return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbx1q_u8 #define vqtbx1q_u8(a, t, idx) simde_vqtbx1q_u8((a), (t), (idx)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vqtbx1q_s8(simde_int8x16_t a, simde_int8x16_t t, simde_uint8x16_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbx1q_s8(a, t, idx); #else return simde_vreinterpretq_s8_u8(simde_vqtbx1q_u8(simde_vreinterpretq_u8_s8(a), simde_vreinterpretq_u8_s8(t), idx)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbx1q_s8 #define vqtbx1q_s8(a, t, idx) simde_vqtbx1q_s8((a), (t), (idx)) #endif #if !defined(SIMDE_BUG_INTEL_857088) SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vqtbx2q_u8(simde_uint8x16_t a, simde_uint8x16x2_t t, simde_uint8x16_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbx2q_u8(a, t, idx); #elif defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) idx = _mm_or_si128(idx, _mm_cmpgt_epi8(idx, _mm_set1_epi8(31))); __m128i r_0 = _mm_shuffle_epi8(t.val[0], idx); __m128i r_1 = _mm_shuffle_epi8(t.val[1], idx); __m128i r = _mm_blendv_epi8(r_0, r_1, _mm_slli_epi32(idx, 3)); return _mm_blendv_epi8(r, a, idx); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_sel(a, vec_perm(t.val[0], t.val[1], idx), vec_cmplt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 32)))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_or(wasm_v128_or(wasm_v8x16_swizzle(t.val[0], idx), wasm_v8x16_swizzle(t.val[1], wasm_i8x16_sub(idx, wasm_i8x16_splat(16)))), wasm_v128_and(a, wasm_u8x16_gt(idx, wasm_i8x16_splat(31)))); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a), t_[2] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]) }, idx_ = simde_uint8x16_to_private(idx); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (idx_.values[i] < 32) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : a_.values[i]; } return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbx2q_u8 #define vqtbx2q_u8(a, t, idx) simde_vqtbx2q_u8((a), (t), (idx)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vqtbx2q_s8(simde_int8x16_t a, simde_int8x16x2_t t, simde_uint8x16_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbx2q_s8(a, t, idx); #else simde_uint8x16x2_t t_; simde_memcpy(&t_, &t, sizeof(t_)); return simde_vreinterpretq_s8_u8(simde_vqtbx2q_u8(simde_vreinterpretq_u8_s8(a), t_, idx)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbx2q_s8 #define vqtbx2q_s8(a, t, idx) simde_vqtbx2q_s8((a), (t), (idx)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vqtbx3q_u8(simde_uint8x16_t a, simde_uint8x16x3_t t, simde_uint8x16_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbx3q_u8(a, t, idx); #elif defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) idx = _mm_or_si128(idx, _mm_cmpgt_epi8(idx, _mm_set1_epi8(47))); __m128i r_0 = _mm_shuffle_epi8(t.val[0], idx); __m128i r_1 = _mm_shuffle_epi8(t.val[1], idx); __m128i r_01 = _mm_blendv_epi8(r_0, r_1, _mm_slli_epi32(idx, 3)); __m128i r_2 = _mm_shuffle_epi8(t.val[2], idx); __m128i r = _mm_blendv_epi8(r_01, r_2, _mm_slli_epi32(idx, 2)); return _mm_blendv_epi8(r, a, idx); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) r_01 = vec_perm(t.val[0], t.val[1], idx); SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) r_2 = vec_perm(t.val[2], t.val[2], idx); return vec_sel(a, vec_sel(r_01, r_2, vec_cmpgt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 31)))), vec_cmplt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 48)))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_or(wasm_v128_or(wasm_v8x16_swizzle(t.val[0], idx), wasm_v8x16_swizzle(t.val[1], wasm_i8x16_sub(idx, wasm_i8x16_splat(16)))), wasm_v128_or(wasm_v8x16_swizzle(t.val[2], wasm_i8x16_sub(idx, wasm_i8x16_splat(32))), wasm_v128_and(a, wasm_u8x16_gt(idx, wasm_i8x16_splat(47))))); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a), t_[3] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]), simde_uint8x16_to_private(t.val[2]) }, idx_ = simde_uint8x16_to_private(idx); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (idx_.values[i] < 48) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : a_.values[i]; } return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbx3q_u8 #define vqtbx3q_u8(a, t, idx) simde_vqtbx3q_u8((a), (t), (idx)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vqtbx3q_s8(simde_int8x16_t a, simde_int8x16x3_t t, simde_uint8x16_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbx3q_s8(a, t, idx); #else simde_uint8x16x3_t t_; simde_memcpy(&t_, &t, sizeof(t_)); return simde_vreinterpretq_s8_u8(simde_vqtbx3q_u8(simde_vreinterpretq_u8_s8(a), t_, idx)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbx3q_s8 #define vqtbx3q_s8(a, t, idx) simde_vqtbx3q_s8((a), (t), (idx)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vqtbx4q_u8(simde_uint8x16_t a, simde_uint8x16x4_t t, simde_uint8x16_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbx4q_u8(a, t, idx); #elif defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) idx = _mm_or_si128(idx, _mm_cmpgt_epi8(idx, _mm_set1_epi8(63))); __m128i idx_shl3 = _mm_slli_epi32(idx, 3); __m128i r_0 = _mm_shuffle_epi8(t.val[0], idx); __m128i r_1 = _mm_shuffle_epi8(t.val[1], idx); __m128i r_01 = _mm_blendv_epi8(r_0, r_1, idx_shl3); __m128i r_2 = _mm_shuffle_epi8(t.val[2], idx); __m128i r_3 = _mm_shuffle_epi8(t.val[3], idx); __m128i r_23 = _mm_blendv_epi8(r_2, r_3, idx_shl3); __m128i r = _mm_blendv_epi8(r_01, r_23, _mm_slli_epi32(idx, 2)); return _mm_blendv_epi8(r, a, idx); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) r_01 = vec_perm(t.val[0], t.val[1], idx); SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) r_23 = vec_perm(t.val[2], t.val[3], idx); return vec_sel(a, vec_sel(r_01, r_23, vec_cmpgt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 31)))), vec_cmplt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 64)))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_or(wasm_v128_or(wasm_v128_or(wasm_v8x16_swizzle(t.val[0], idx), wasm_v8x16_swizzle(t.val[1], wasm_i8x16_sub(idx, wasm_i8x16_splat(16)))), wasm_v128_or(wasm_v8x16_swizzle(t.val[2], wasm_i8x16_sub(idx, wasm_i8x16_splat(32))), wasm_v8x16_swizzle(t.val[3], wasm_i8x16_sub(idx, wasm_i8x16_splat(48))))), wasm_v128_and(a, wasm_u8x16_gt(idx, wasm_i8x16_splat(63)))); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a), t_[4] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]), simde_uint8x16_to_private(t.val[2]), simde_uint8x16_to_private(t.val[3]) }, idx_ = simde_uint8x16_to_private(idx); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (idx_.values[i] < 64) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : a_.values[i]; } return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbx4q_u8 #define vqtbx4q_u8(a, t, idx) simde_vqtbx4q_u8((a), (t), (idx)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vqtbx4q_s8(simde_int8x16_t a, simde_int8x16x4_t t, simde_uint8x16_t idx) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqtbx4q_s8(a, t, idx); #else simde_uint8x16x4_t t_; simde_memcpy(&t_, &t, sizeof(t_)); return simde_vreinterpretq_s8_u8(simde_vqtbx4q_u8(simde_vreinterpretq_u8_s8(a), t_, idx)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqtbx4q_s8 #define vqtbx4q_s8(a, t, idx) simde_vqtbx4q_s8((a), (t), (idx)) #endif #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_QTBX_H) */ simde-0.7.2/simde/arm/neon/rbit.h000066400000000000000000000143601400333146700165560ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ /* The GFNI implementation is based on Wojciech Muła's work at * http://0x80.pl/articles/avx512-galois-field-for-bit-shuffling.html#bit-shuffling via * https://github.com/InstLatx64/InstLatX64_Demo/blob/49c27effdfd5a45f27e0ccb6e2f3be5f27c3845d/GFNI_Demo.h#L173 */ #if !defined(SIMDE_ARM_NEON_RBIT_H) #define SIMDE_ARM_NEON_RBIT_H #include "reinterpret.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vrbit_u8(simde_uint8x8_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vrbit_u8(a); #elif defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_X86_GFNI_NATIVE) __m128i tmp = _mm_movpi64_epi64(a); tmp = _mm_gf2p8affine_epi64_epi8(tmp, _mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C(0x8040201008040201))), 0); return _mm_movepi64_pi64(tmp); #elif defined(SIMDE_X86_MMX_NATIVE) __m64 mask; mask = _mm_set1_pi8(0x55); a = _mm_or_si64(_mm_andnot_si64(mask, _mm_slli_pi16(a, 1)), _mm_and_si64(mask, _mm_srli_pi16(a, 1))); mask = _mm_set1_pi8(0x33); a = _mm_or_si64(_mm_andnot_si64(mask, _mm_slli_pi16(a, 2)), _mm_and_si64(mask, _mm_srli_pi16(a, 2))); mask = _mm_set1_pi8(0x0F); a = _mm_or_si64(_mm_andnot_si64(mask, _mm_slli_pi16(a, 4)), _mm_and_si64(mask, _mm_srli_pi16(a, 4))); return a; #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { #if HEDLEY_HAS_BUILTIN(__builtin_bitreverse8) && !defined(HEDLEY_IBM_VERSION) r_.values[i] = __builtin_bitreverse8(a_.values[i]); #else r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, (((a_.values[i] * UINT64_C(0x80200802)) & UINT64_C(0x0884422110)) * UINT64_C(0x0101010101)) >> 32); #endif } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vrbit_u8 #define vrbit_u8(a) simde_vrbit_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vrbit_s8(simde_int8x8_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vrbit_s8(a); #else return simde_vreinterpret_s8_u8(simde_vrbit_u8(simde_vreinterpret_u8_s8(a))); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vrbit_s8 #define vrbit_s8(a) simde_vrbit_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vrbitq_u8(simde_uint8x16_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vrbitq_u8(a); #elif defined(SIMDE_X86_GFNI_NATIVE) return _mm_gf2p8affine_epi64_epi8(a, _mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C(0x8040201008040201))), 0); #elif defined(SIMDE_X86_SSE2_NATIVE) __m128i mask; mask = _mm_set1_epi8(0x55); a = _mm_or_si128(_mm_andnot_si128(mask, _mm_slli_epi16(a, 1)), _mm_and_si128(mask, _mm_srli_epi16(a, 1))); mask = _mm_set1_epi8(0x33); a = _mm_or_si128(_mm_andnot_si128(mask, _mm_slli_epi16(a, 2)), _mm_and_si128(mask, _mm_srli_epi16(a, 2))); mask = _mm_set1_epi8(0x0F); a = _mm_or_si128(_mm_andnot_si128(mask, _mm_slli_epi16(a, 4)), _mm_and_si128(mask, _mm_srli_epi16(a, 4))); return a; #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) shift; shift = vec_splat_u8(1); a = vec_sel(vec_sl(a, shift), vec_sr(a, shift), vec_splats(HEDLEY_STATIC_CAST(unsigned char, 0x55))); shift = vec_splat_u8(2); a = vec_sel(vec_sl(a, shift), vec_sr(a, shift), vec_splats(HEDLEY_STATIC_CAST(unsigned char, 0x33))); shift = vec_splat_u8(4); a = vec_or(vec_sl(a, shift), vec_sr(a, shift)); return a; #elif defined(SIMDE_WASM_SIMD128_NATIVE) a = wasm_v128_bitselect(wasm_u8x16_shr(a, 1), wasm_i8x16_shl(a, 1), wasm_i8x16_splat(0x55)); a = wasm_v128_bitselect(wasm_u8x16_shr(a, 2), wasm_i8x16_shl(a, 2), wasm_i8x16_splat(0x33)); a = wasm_v128_or(wasm_u8x16_shr(a, 4), wasm_i8x16_shl(a, 4)); return a; #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { #if HEDLEY_HAS_BUILTIN(__builtin_bitreverse8) && !defined(HEDLEY_IBM_VERSION) r_.values[i] = __builtin_bitreverse8(a_.values[i]); #else r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, (((a_.values[i] * UINT64_C(0x80200802)) & UINT64_C(0x0884422110)) * UINT64_C(0x0101010101)) >> 32); #endif } return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vrbitq_u8 #define vrbitq_u8(a) simde_vrbitq_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vrbitq_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vrbitq_s8(a); #else return simde_vreinterpretq_s8_u8(simde_vrbitq_u8(simde_vreinterpretq_u8_s8(a))); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vrbitq_s8 #define vrbitq_s8(a) simde_vrbitq_s8(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_RBIT_H) */ simde-0.7.2/simde/arm/neon/reinterpret.h000066400000000000000000002730201400333146700201610ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_REINTERPRET_H) #define SIMDE_ARM_NEON_REINTERPRET_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vreinterpret_s8_s16(simde_int16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s8_s16(a); #else simde_int8x8_private r_; simde_int16x4_private a_ = simde_int16x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s8_s16 #define vreinterpret_s8_s16(a) simde_vreinterpret_s8_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vreinterpret_s8_s32(simde_int32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s8_s32(a); #else simde_int8x8_private r_; simde_int32x2_private a_ = simde_int32x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s8_s32 #define vreinterpret_s8_s32(a) simde_vreinterpret_s8_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vreinterpret_s8_s64(simde_int64x1_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s8_s64(a); #else simde_int8x8_private r_; simde_int64x1_private a_ = simde_int64x1_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s8_s64 #define vreinterpret_s8_s64(a) simde_vreinterpret_s8_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vreinterpret_s8_u8(simde_uint8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s8_u8(a); #else simde_int8x8_private r_; simde_uint8x8_private a_ = simde_uint8x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s8_u8 #define vreinterpret_s8_u8(a) simde_vreinterpret_s8_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vreinterpret_s8_u16(simde_uint16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s8_u16(a); #else simde_int8x8_private r_; simde_uint16x4_private a_ = simde_uint16x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s8_u16 #define vreinterpret_s8_u16(a) simde_vreinterpret_s8_u16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vreinterpret_s8_u32(simde_uint32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s8_u32(a); #else simde_int8x8_private r_; simde_uint32x2_private a_ = simde_uint32x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s8_u32 #define vreinterpret_s8_u32(a) simde_vreinterpret_s8_u32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vreinterpret_s8_u64(simde_uint64x1_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s8_u64(a); #else simde_int8x8_private r_; simde_uint64x1_private a_ = simde_uint64x1_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s8_u64 #define vreinterpret_s8_u64(a) simde_vreinterpret_s8_u64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vreinterpret_s8_f32(simde_float32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s8_f32(a); #else simde_int8x8_private r_; simde_float32x2_private a_ = simde_float32x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s8_f32 #define vreinterpret_s8_f32(a) simde_vreinterpret_s8_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vreinterpret_s8_f64(simde_float64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpret_s8_f64(a); #else simde_int8x8_private r_; simde_float64x1_private a_ = simde_float64x1_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s8_f64 #define vreinterpret_s8_f64(a) simde_vreinterpret_s8_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vreinterpretq_s8_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s8_s16(a); #else simde_int8x16_private r_; simde_int16x8_private a_ = simde_int16x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s8_s16 #define vreinterpretq_s8_s16(a) simde_vreinterpretq_s8_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vreinterpretq_s8_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s8_s32(a); #else simde_int8x16_private r_; simde_int32x4_private a_ = simde_int32x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s8_s32 #define vreinterpretq_s8_s32(a) simde_vreinterpretq_s8_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vreinterpretq_s8_s64(simde_int64x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s8_s64(a); #else simde_int8x16_private r_; simde_int64x2_private a_ = simde_int64x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s8_s64 #define vreinterpretq_s8_s64(a) simde_vreinterpretq_s8_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vreinterpretq_s8_u8(simde_uint8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s8_u8(a); #else simde_int8x16_private r_; simde_uint8x16_private a_ = simde_uint8x16_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s8_u8 #define vreinterpretq_s8_u8(a) simde_vreinterpretq_s8_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vreinterpretq_s8_u16(simde_uint16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s8_u16(a); #else simde_int8x16_private r_; simde_uint16x8_private a_ = simde_uint16x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s8_u16 #define vreinterpretq_s8_u16(a) simde_vreinterpretq_s8_u16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vreinterpretq_s8_u32(simde_uint32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s8_u32(a); #else simde_int8x16_private r_; simde_uint32x4_private a_ = simde_uint32x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s8_u32 #define vreinterpretq_s8_u32(a) simde_vreinterpretq_s8_u32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vreinterpretq_s8_u64(simde_uint64x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s8_u64(a); #else simde_int8x16_private r_; simde_uint64x2_private a_ = simde_uint64x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s8_u64 #define vreinterpretq_s8_u64(a) simde_vreinterpretq_s8_u64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vreinterpretq_s8_f32(simde_float32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s8_f32(a); #else simde_int8x16_private r_; simde_float32x4_private a_ = simde_float32x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s8_f32 #define vreinterpretq_s8_f32(a) simde_vreinterpretq_s8_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vreinterpretq_s8_f64(simde_float64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpretq_s8_f64(a); #else simde_int8x16_private r_; simde_float64x2_private a_ = simde_float64x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s8_f64 #define vreinterpretq_s8_f64(a) simde_vreinterpretq_s8_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vreinterpret_s16_s8(simde_int8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s16_s8(a); #else simde_int16x4_private r_; simde_int8x8_private a_ = simde_int8x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s16_s8 #define vreinterpret_s16_s8(a) simde_vreinterpret_s16_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vreinterpret_s16_s32(simde_int32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s16_s32(a); #else simde_int16x4_private r_; simde_int32x2_private a_ = simde_int32x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s16_s32 #define vreinterpret_s16_s32(a) simde_vreinterpret_s16_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vreinterpret_s16_s64(simde_int64x1_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s16_s64(a); #else simde_int16x4_private r_; simde_int64x1_private a_ = simde_int64x1_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s16_s64 #define vreinterpret_s16_s64(a) simde_vreinterpret_s16_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vreinterpret_s16_u8(simde_uint8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s16_u8(a); #else simde_int16x4_private r_; simde_uint8x8_private a_ = simde_uint8x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s16_u8 #define vreinterpret_s16_u8(a) simde_vreinterpret_s16_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vreinterpret_s16_u16(simde_uint16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s16_u16(a); #else simde_int16x4_private r_; simde_uint16x4_private a_ = simde_uint16x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s16_u16 #define vreinterpret_s16_u16(a) simde_vreinterpret_s16_u16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vreinterpret_s16_u32(simde_uint32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s16_u32(a); #else simde_int16x4_private r_; simde_uint32x2_private a_ = simde_uint32x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s16_u32 #define vreinterpret_s16_u32(a) simde_vreinterpret_s16_u32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vreinterpret_s16_u64(simde_uint64x1_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s16_u64(a); #else simde_int16x4_private r_; simde_uint64x1_private a_ = simde_uint64x1_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s16_u64 #define vreinterpret_s16_u64(a) simde_vreinterpret_s16_u64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vreinterpret_s16_f32(simde_float32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s16_f32(a); #else simde_int16x4_private r_; simde_float32x2_private a_ = simde_float32x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s16_f32 #define vreinterpret_s16_f32(a) simde_vreinterpret_s16_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vreinterpret_s16_f64(simde_float64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpret_s16_f64(a); #else simde_int16x4_private r_; simde_float64x1_private a_ = simde_float64x1_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s16_f64 #define vreinterpret_s16_f64(a) simde_vreinterpret_s16_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vreinterpretq_s16_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s16_s8(a); #else simde_int16x8_private r_; simde_int8x16_private a_ = simde_int8x16_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s16_s8 #define vreinterpretq_s16_s8(a) simde_vreinterpretq_s16_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vreinterpretq_s16_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s16_s32(a); #else simde_int16x8_private r_; simde_int32x4_private a_ = simde_int32x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s16_s32 #define vreinterpretq_s16_s32(a) simde_vreinterpretq_s16_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vreinterpretq_s16_s64(simde_int64x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s16_s64(a); #else simde_int16x8_private r_; simde_int64x2_private a_ = simde_int64x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s16_s64 #define vreinterpretq_s16_s64(a) simde_vreinterpretq_s16_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vreinterpretq_s16_u8(simde_uint8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s16_u8(a); #else simde_int16x8_private r_; simde_uint8x16_private a_ = simde_uint8x16_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s16_u8 #define vreinterpretq_s16_u8(a) simde_vreinterpretq_s16_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vreinterpretq_s16_u16(simde_uint16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s16_u16(a); #else simde_int16x8_private r_; simde_uint16x8_private a_ = simde_uint16x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s16_u16 #define vreinterpretq_s16_u16(a) simde_vreinterpretq_s16_u16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vreinterpretq_s16_u32(simde_uint32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s16_u32(a); #else simde_int16x8_private r_; simde_uint32x4_private a_ = simde_uint32x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s16_u32 #define vreinterpretq_s16_u32(a) simde_vreinterpretq_s16_u32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vreinterpretq_s16_u64(simde_uint64x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s16_u64(a); #else simde_int16x8_private r_; simde_uint64x2_private a_ = simde_uint64x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s16_u64 #define vreinterpretq_s16_u64(a) simde_vreinterpretq_s16_u64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vreinterpretq_s16_f32(simde_float32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s16_f32(a); #else simde_int16x8_private r_; simde_float32x4_private a_ = simde_float32x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s16_f32 #define vreinterpretq_s16_f32(a) simde_vreinterpretq_s16_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vreinterpretq_s16_f64(simde_float64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpretq_s16_f64(a); #else simde_int16x8_private r_; simde_float64x2_private a_ = simde_float64x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s16_f64 #define vreinterpretq_s16_f64(a) simde_vreinterpretq_s16_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vreinterpret_s32_s8(simde_int8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s32_s8(a); #else simde_int32x2_private r_; simde_int8x8_private a_ = simde_int8x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s32_s8 #define vreinterpret_s32_s8(a) simde_vreinterpret_s32_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vreinterpret_s32_s16(simde_int16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s32_s16(a); #else simde_int32x2_private r_; simde_int16x4_private a_ = simde_int16x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s32_s16 #define vreinterpret_s32_s16(a) simde_vreinterpret_s32_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vreinterpret_s32_s64(simde_int64x1_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s32_s64(a); #else simde_int32x2_private r_; simde_int64x1_private a_ = simde_int64x1_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s32_s64 #define vreinterpret_s32_s64(a) simde_vreinterpret_s32_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vreinterpret_s32_u8(simde_uint8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s32_u8(a); #else simde_int32x2_private r_; simde_uint8x8_private a_ = simde_uint8x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s32_u8 #define vreinterpret_s32_u8(a) simde_vreinterpret_s32_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vreinterpret_s32_u16(simde_uint16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s32_u16(a); #else simde_int32x2_private r_; simde_uint16x4_private a_ = simde_uint16x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s32_u16 #define vreinterpret_s32_u16(a) simde_vreinterpret_s32_u16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vreinterpret_s32_u32(simde_uint32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s32_u32(a); #else simde_int32x2_private r_; simde_uint32x2_private a_ = simde_uint32x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s32_u32 #define vreinterpret_s32_u32(a) simde_vreinterpret_s32_u32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vreinterpret_s32_u64(simde_uint64x1_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s32_u64(a); #else simde_int32x2_private r_; simde_uint64x1_private a_ = simde_uint64x1_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s32_u64 #define vreinterpret_s32_u64(a) simde_vreinterpret_s32_u64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vreinterpret_s32_f32(simde_float32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s32_f32(a); #else simde_int32x2_private r_; simde_float32x2_private a_ = simde_float32x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s32_f32 #define vreinterpret_s32_f32(a) simde_vreinterpret_s32_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vreinterpret_s32_f64(simde_float64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpret_s32_f64(a); #else simde_int32x2_private r_; simde_float64x1_private a_ = simde_float64x1_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s32_f64 #define vreinterpret_s32_f64(a) simde_vreinterpret_s32_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vreinterpretq_s32_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s32_s8(a); #else simde_int32x4_private r_; simde_int8x16_private a_ = simde_int8x16_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s32_s8 #define vreinterpretq_s32_s8(a) simde_vreinterpretq_s32_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vreinterpretq_s32_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s32_s16(a); #else simde_int32x4_private r_; simde_int16x8_private a_ = simde_int16x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s32_s16 #define vreinterpretq_s32_s16(a) simde_vreinterpretq_s32_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vreinterpretq_s32_s64(simde_int64x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s32_s64(a); #else simde_int32x4_private r_; simde_int64x2_private a_ = simde_int64x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s32_s64 #define vreinterpretq_s32_s64(a) simde_vreinterpretq_s32_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vreinterpretq_s32_u8(simde_uint8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s32_u8(a); #else simde_int32x4_private r_; simde_uint8x16_private a_ = simde_uint8x16_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s32_u8 #define vreinterpretq_s32_u8(a) simde_vreinterpretq_s32_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vreinterpretq_s32_u16(simde_uint16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s32_u16(a); #else simde_int32x4_private r_; simde_uint16x8_private a_ = simde_uint16x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s32_u16 #define vreinterpretq_s32_u16(a) simde_vreinterpretq_s32_u16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vreinterpretq_s32_u32(simde_uint32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s32_u32(a); #else simde_int32x4_private r_; simde_uint32x4_private a_ = simde_uint32x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s32_u32 #define vreinterpretq_s32_u32(a) simde_vreinterpretq_s32_u32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vreinterpretq_s32_u64(simde_uint64x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s32_u64(a); #else simde_int32x4_private r_; simde_uint64x2_private a_ = simde_uint64x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s32_u64 #define vreinterpretq_s32_u64(a) simde_vreinterpretq_s32_u64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vreinterpretq_s32_f32(simde_float32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s32_f32(a); #else simde_int32x4_private r_; simde_float32x4_private a_ = simde_float32x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s32_f32 #define vreinterpretq_s32_f32(a) simde_vreinterpretq_s32_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vreinterpretq_s32_f64(simde_float64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpretq_s32_f64(a); #else simde_int32x4_private r_; simde_float64x2_private a_ = simde_float64x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s32_f64 #define vreinterpretq_s32_f64(a) simde_vreinterpretq_s32_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vreinterpret_s64_s8(simde_int8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s64_s8(a); #else simde_int64x1_private r_; simde_int8x8_private a_ = simde_int8x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s64_s8 #define vreinterpret_s64_s8(a) simde_vreinterpret_s64_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vreinterpret_s64_s16(simde_int16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s64_s16(a); #else simde_int64x1_private r_; simde_int16x4_private a_ = simde_int16x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s64_s16 #define vreinterpret_s64_s16(a) simde_vreinterpret_s64_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vreinterpret_s64_s32(simde_int32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s64_s32(a); #else simde_int64x1_private r_; simde_int32x2_private a_ = simde_int32x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s64_s32 #define vreinterpret_s64_s32(a) simde_vreinterpret_s64_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vreinterpret_s64_u8(simde_uint8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s64_u8(a); #else simde_int64x1_private r_; simde_uint8x8_private a_ = simde_uint8x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s64_u8 #define vreinterpret_s64_u8(a) simde_vreinterpret_s64_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vreinterpret_s64_u16(simde_uint16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s64_u16(a); #else simde_int64x1_private r_; simde_uint16x4_private a_ = simde_uint16x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s64_u16 #define vreinterpret_s64_u16(a) simde_vreinterpret_s64_u16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vreinterpret_s64_u32(simde_uint32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s64_u32(a); #else simde_int64x1_private r_; simde_uint32x2_private a_ = simde_uint32x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s64_u32 #define vreinterpret_s64_u32(a) simde_vreinterpret_s64_u32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vreinterpret_s64_u64(simde_uint64x1_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s64_u64(a); #else simde_int64x1_private r_; simde_uint64x1_private a_ = simde_uint64x1_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s64_u64 #define vreinterpret_s64_u64(a) simde_vreinterpret_s64_u64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vreinterpret_s64_f32(simde_float32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_s64_f32(a); #else simde_int64x1_private r_; simde_float32x2_private a_ = simde_float32x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s64_f32 #define vreinterpret_s64_f32(a) simde_vreinterpret_s64_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vreinterpret_s64_f64(simde_float64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpret_s64_f64(a); #else simde_int64x1_private r_; simde_float64x1_private a_ = simde_float64x1_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpret_s64_f64 #define vreinterpret_s64_f64(a) simde_vreinterpret_s64_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vreinterpretq_s64_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s64_s8(a); #else simde_int64x2_private r_; simde_int8x16_private a_ = simde_int8x16_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s64_s8 #define vreinterpretq_s64_s8(a) simde_vreinterpretq_s64_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vreinterpretq_s64_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s64_s16(a); #else simde_int64x2_private r_; simde_int16x8_private a_ = simde_int16x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s64_s16 #define vreinterpretq_s64_s16(a) simde_vreinterpretq_s64_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vreinterpretq_s64_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s64_s32(a); #else simde_int64x2_private r_; simde_int32x4_private a_ = simde_int32x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s64_s32 #define vreinterpretq_s64_s32(a) simde_vreinterpretq_s64_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vreinterpretq_s64_u8(simde_uint8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s64_u8(a); #else simde_int64x2_private r_; simde_uint8x16_private a_ = simde_uint8x16_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s64_u8 #define vreinterpretq_s64_u8(a) simde_vreinterpretq_s64_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vreinterpretq_s64_u16(simde_uint16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s64_u16(a); #else simde_int64x2_private r_; simde_uint16x8_private a_ = simde_uint16x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s64_u16 #define vreinterpretq_s64_u16(a) simde_vreinterpretq_s64_u16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vreinterpretq_s64_u32(simde_uint32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s64_u32(a); #else simde_int64x2_private r_; simde_uint32x4_private a_ = simde_uint32x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s64_u32 #define vreinterpretq_s64_u32(a) simde_vreinterpretq_s64_u32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vreinterpretq_s64_u64(simde_uint64x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s64_u64(a); #else simde_int64x2_private r_; simde_uint64x2_private a_ = simde_uint64x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s64_u64 #define vreinterpretq_s64_u64(a) simde_vreinterpretq_s64_u64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vreinterpretq_s64_f32(simde_float32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s64_f32(a); #else simde_int64x2_private r_; simde_float32x4_private a_ = simde_float32x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s64_f32 #define vreinterpretq_s64_f32(a) simde_vreinterpretq_s64_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vreinterpretq_s64_f64(simde_float64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpretq_s64_f64(a); #else simde_int64x2_private r_; simde_float64x2_private a_ = simde_float64x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_s64_f64 #define vreinterpretq_s64_f64(a) simde_vreinterpretq_s64_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vreinterpret_u8_s8(simde_int8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u8_s8(a); #else simde_uint8x8_private r_; simde_int8x8_private a_ = simde_int8x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u8_s8 #define vreinterpret_u8_s8(a) simde_vreinterpret_u8_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vreinterpret_u8_s16(simde_int16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u8_s16(a); #else simde_uint8x8_private r_; simde_int16x4_private a_ = simde_int16x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u8_s16 #define vreinterpret_u8_s16(a) simde_vreinterpret_u8_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vreinterpret_u8_s32(simde_int32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u8_s32(a); #else simde_uint8x8_private r_; simde_int32x2_private a_ = simde_int32x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u8_s32 #define vreinterpret_u8_s32(a) simde_vreinterpret_u8_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vreinterpret_u8_s64(simde_int64x1_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u8_s64(a); #else simde_uint8x8_private r_; simde_int64x1_private a_ = simde_int64x1_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u8_s64 #define vreinterpret_u8_s64(a) simde_vreinterpret_u8_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vreinterpret_u8_u16(simde_uint16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u8_u16(a); #else simde_uint8x8_private r_; simde_uint16x4_private a_ = simde_uint16x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u8_u16 #define vreinterpret_u8_u16(a) simde_vreinterpret_u8_u16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vreinterpret_u8_u32(simde_uint32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u8_u32(a); #else simde_uint8x8_private r_; simde_uint32x2_private a_ = simde_uint32x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u8_u32 #define vreinterpret_u8_u32(a) simde_vreinterpret_u8_u32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vreinterpret_u8_u64(simde_uint64x1_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u8_u64(a); #else simde_uint8x8_private r_; simde_uint64x1_private a_ = simde_uint64x1_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u8_u64 #define vreinterpret_u8_u64(a) simde_vreinterpret_u8_u64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vreinterpret_u8_f32(simde_float32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u8_f32(a); #else simde_uint8x8_private r_; simde_float32x2_private a_ = simde_float32x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u8_f32 #define vreinterpret_u8_f32(a) simde_vreinterpret_u8_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vreinterpret_u8_f64(simde_float64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpret_u8_f64(a); #else simde_uint8x8_private r_; simde_float64x1_private a_ = simde_float64x1_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u8_f64 #define vreinterpret_u8_f64(a) simde_vreinterpret_u8_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vreinterpretq_u8_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u8_s8(a); #else simde_uint8x16_private r_; simde_int8x16_private a_ = simde_int8x16_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u8_s8 #define vreinterpretq_u8_s8(a) simde_vreinterpretq_u8_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vreinterpretq_u8_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u8_s16(a); #else simde_uint8x16_private r_; simde_int16x8_private a_ = simde_int16x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u8_s16 #define vreinterpretq_u8_s16(a) simde_vreinterpretq_u8_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vreinterpretq_u8_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u8_s32(a); #else simde_uint8x16_private r_; simde_int32x4_private a_ = simde_int32x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u8_s32 #define vreinterpretq_u8_s32(a) simde_vreinterpretq_u8_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vreinterpretq_u8_s64(simde_int64x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u8_s64(a); #else simde_uint8x16_private r_; simde_int64x2_private a_ = simde_int64x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u8_s64 #define vreinterpretq_u8_s64(a) simde_vreinterpretq_u8_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vreinterpretq_u8_u16(simde_uint16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u8_u16(a); #else simde_uint8x16_private r_; simde_uint16x8_private a_ = simde_uint16x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u8_u16 #define vreinterpretq_u8_u16(a) simde_vreinterpretq_u8_u16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vreinterpretq_u8_u32(simde_uint32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u8_u32(a); #else simde_uint8x16_private r_; simde_uint32x4_private a_ = simde_uint32x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u8_u32 #define vreinterpretq_u8_u32(a) simde_vreinterpretq_u8_u32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vreinterpretq_u8_u64(simde_uint64x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u8_u64(a); #else simde_uint8x16_private r_; simde_uint64x2_private a_ = simde_uint64x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u8_u64 #define vreinterpretq_u8_u64(a) simde_vreinterpretq_u8_u64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vreinterpretq_u8_f32(simde_float32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u8_f32(a); #else simde_uint8x16_private r_; simde_float32x4_private a_ = simde_float32x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u8_f32 #define vreinterpretq_u8_f32(a) simde_vreinterpretq_u8_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vreinterpretq_u8_f64(simde_float64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpretq_u8_f64(a); #else simde_uint8x16_private r_; simde_float64x2_private a_ = simde_float64x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u8_f64 #define vreinterpretq_u8_f64(a) simde_vreinterpretq_u8_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vreinterpret_u16_s8(simde_int8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u16_s8(a); #else simde_uint16x4_private r_; simde_int8x8_private a_ = simde_int8x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u16_s8 #define vreinterpret_u16_s8(a) simde_vreinterpret_u16_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vreinterpret_u16_s16(simde_int16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u16_s16(a); #else simde_uint16x4_private r_; simde_int16x4_private a_ = simde_int16x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u16_s16 #define vreinterpret_u16_s16(a) simde_vreinterpret_u16_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vreinterpret_u16_s32(simde_int32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u16_s32(a); #else simde_uint16x4_private r_; simde_int32x2_private a_ = simde_int32x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u16_s32 #define vreinterpret_u16_s32(a) simde_vreinterpret_u16_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vreinterpret_u16_s64(simde_int64x1_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u16_s64(a); #else simde_uint16x4_private r_; simde_int64x1_private a_ = simde_int64x1_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u16_s64 #define vreinterpret_u16_s64(a) simde_vreinterpret_u16_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vreinterpret_u16_u8(simde_uint8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u16_u8(a); #else simde_uint16x4_private r_; simde_uint8x8_private a_ = simde_uint8x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u16_u8 #define vreinterpret_u16_u8(a) simde_vreinterpret_u16_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vreinterpret_u16_u32(simde_uint32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u16_u32(a); #else simde_uint16x4_private r_; simde_uint32x2_private a_ = simde_uint32x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u16_u32 #define vreinterpret_u16_u32(a) simde_vreinterpret_u16_u32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vreinterpret_u16_u64(simde_uint64x1_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u16_u64(a); #else simde_uint16x4_private r_; simde_uint64x1_private a_ = simde_uint64x1_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u16_u64 #define vreinterpret_u16_u64(a) simde_vreinterpret_u16_u64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vreinterpret_u16_f32(simde_float32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u16_f32(a); #else simde_uint16x4_private r_; simde_float32x2_private a_ = simde_float32x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u16_f32 #define vreinterpret_u16_f32(a) simde_vreinterpret_u16_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vreinterpret_u16_f64(simde_float64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpret_u16_f64(a); #else simde_uint16x4_private r_; simde_float64x1_private a_ = simde_float64x1_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u16_f64 #define vreinterpret_u16_f64(a) simde_vreinterpret_u16_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vreinterpretq_u16_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u16_s8(a); #else simde_uint16x8_private r_; simde_int8x16_private a_ = simde_int8x16_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u16_s8 #define vreinterpretq_u16_s8(a) simde_vreinterpretq_u16_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vreinterpretq_u16_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u16_s16(a); #else simde_uint16x8_private r_; simde_int16x8_private a_ = simde_int16x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u16_s16 #define vreinterpretq_u16_s16(a) simde_vreinterpretq_u16_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vreinterpretq_u16_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u16_s32(a); #else simde_uint16x8_private r_; simde_int32x4_private a_ = simde_int32x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u16_s32 #define vreinterpretq_u16_s32(a) simde_vreinterpretq_u16_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vreinterpretq_u16_s64(simde_int64x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u16_s64(a); #else simde_uint16x8_private r_; simde_int64x2_private a_ = simde_int64x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u16_s64 #define vreinterpretq_u16_s64(a) simde_vreinterpretq_u16_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vreinterpretq_u16_u8(simde_uint8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u16_u8(a); #else simde_uint16x8_private r_; simde_uint8x16_private a_ = simde_uint8x16_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u16_u8 #define vreinterpretq_u16_u8(a) simde_vreinterpretq_u16_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vreinterpretq_u16_u32(simde_uint32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u16_u32(a); #else simde_uint16x8_private r_; simde_uint32x4_private a_ = simde_uint32x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u16_u32 #define vreinterpretq_u16_u32(a) simde_vreinterpretq_u16_u32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vreinterpretq_u16_u64(simde_uint64x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u16_u64(a); #else simde_uint16x8_private r_; simde_uint64x2_private a_ = simde_uint64x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u16_u64 #define vreinterpretq_u16_u64(a) simde_vreinterpretq_u16_u64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vreinterpretq_u16_f32(simde_float32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u16_f32(a); #else simde_uint16x8_private r_; simde_float32x4_private a_ = simde_float32x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u16_f32 #define vreinterpretq_u16_f32(a) simde_vreinterpretq_u16_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vreinterpretq_u16_f64(simde_float64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpretq_u16_f64(a); #else simde_uint16x8_private r_; simde_float64x2_private a_ = simde_float64x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u16_f64 #define vreinterpretq_u16_f64(a) simde_vreinterpretq_u16_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vreinterpret_u32_s8(simde_int8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u32_s8(a); #else simde_uint32x2_private r_; simde_int8x8_private a_ = simde_int8x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u32_s8 #define vreinterpret_u32_s8(a) simde_vreinterpret_u32_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vreinterpret_u32_s16(simde_int16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u32_s16(a); #else simde_uint32x2_private r_; simde_int16x4_private a_ = simde_int16x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u32_s16 #define vreinterpret_u32_s16(a) simde_vreinterpret_u32_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vreinterpret_u32_s32(simde_int32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u32_s32(a); #else simde_uint32x2_private r_; simde_int32x2_private a_ = simde_int32x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u32_s32 #define vreinterpret_u32_s32(a) simde_vreinterpret_u32_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vreinterpret_u32_s64(simde_int64x1_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u32_s64(a); #else simde_uint32x2_private r_; simde_int64x1_private a_ = simde_int64x1_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u32_s64 #define vreinterpret_u32_s64(a) simde_vreinterpret_u32_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vreinterpret_u32_u8(simde_uint8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u32_u8(a); #else simde_uint32x2_private r_; simde_uint8x8_private a_ = simde_uint8x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u32_u8 #define vreinterpret_u32_u8(a) simde_vreinterpret_u32_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vreinterpret_u32_u16(simde_uint16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u32_u16(a); #else simde_uint32x2_private r_; simde_uint16x4_private a_ = simde_uint16x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u32_u16 #define vreinterpret_u32_u16(a) simde_vreinterpret_u32_u16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vreinterpret_u32_u64(simde_uint64x1_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u32_u64(a); #else simde_uint32x2_private r_; simde_uint64x1_private a_ = simde_uint64x1_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u32_u64 #define vreinterpret_u32_u64(a) simde_vreinterpret_u32_u64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vreinterpret_u32_f32(simde_float32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u32_f32(a); #else simde_uint32x2_private r_; simde_float32x2_private a_ = simde_float32x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u32_f32 #define vreinterpret_u32_f32(a) simde_vreinterpret_u32_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vreinterpret_u32_f64(simde_float64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpret_u32_f64(a); #else simde_uint32x2_private r_; simde_float64x1_private a_ = simde_float64x1_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u32_f64 #define vreinterpret_u32_f64(a) simde_vreinterpret_u32_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vreinterpretq_u32_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u32_s8(a); #else simde_uint32x4_private r_; simde_int8x16_private a_ = simde_int8x16_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u32_s8 #define vreinterpretq_u32_s8(a) simde_vreinterpretq_u32_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vreinterpretq_u32_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u32_s16(a); #else simde_uint32x4_private r_; simde_int16x8_private a_ = simde_int16x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u32_s16 #define vreinterpretq_u32_s16(a) simde_vreinterpretq_u32_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vreinterpretq_u32_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u32_s32(a); #else simde_uint32x4_private r_; simde_int32x4_private a_ = simde_int32x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u32_s32 #define vreinterpretq_u32_s32(a) simde_vreinterpretq_u32_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vreinterpretq_u32_s64(simde_int64x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u32_s64(a); #else simde_uint32x4_private r_; simde_int64x2_private a_ = simde_int64x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u32_s64 #define vreinterpretq_u32_s64(a) simde_vreinterpretq_u32_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vreinterpretq_u32_u8(simde_uint8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u32_u8(a); #else simde_uint32x4_private r_; simde_uint8x16_private a_ = simde_uint8x16_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u32_u8 #define vreinterpretq_u32_u8(a) simde_vreinterpretq_u32_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vreinterpretq_u32_u16(simde_uint16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u32_u16(a); #else simde_uint32x4_private r_; simde_uint16x8_private a_ = simde_uint16x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u32_u16 #define vreinterpretq_u32_u16(a) simde_vreinterpretq_u32_u16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vreinterpretq_u32_u64(simde_uint64x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u32_u64(a); #else simde_uint32x4_private r_; simde_uint64x2_private a_ = simde_uint64x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u32_u64 #define vreinterpretq_u32_u64(a) simde_vreinterpretq_u32_u64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vreinterpretq_u32_f32(simde_float32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u32_f32(a); #else simde_uint32x4_private r_; simde_float32x4_private a_ = simde_float32x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u32_f32 #define vreinterpretq_u32_f32(a) simde_vreinterpretq_u32_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vreinterpretq_u32_f64(simde_float64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpretq_u32_f64(a); #else simde_uint32x4_private r_; simde_float64x2_private a_ = simde_float64x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u32_f64 #define vreinterpretq_u32_f64(a) simde_vreinterpretq_u32_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vreinterpret_u64_s8(simde_int8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u64_s8(a); #else simde_uint64x1_private r_; simde_int8x8_private a_ = simde_int8x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u64_s8 #define vreinterpret_u64_s8(a) simde_vreinterpret_u64_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vreinterpret_u64_s16(simde_int16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u64_s16(a); #else simde_uint64x1_private r_; simde_int16x4_private a_ = simde_int16x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u64_s16 #define vreinterpret_u64_s16(a) simde_vreinterpret_u64_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vreinterpret_u64_s32(simde_int32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u64_s32(a); #else simde_uint64x1_private r_; simde_int32x2_private a_ = simde_int32x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u64_s32 #define vreinterpret_u64_s32(a) simde_vreinterpret_u64_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vreinterpret_u64_s64(simde_int64x1_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u64_s64(a); #else simde_uint64x1_private r_; simde_int64x1_private a_ = simde_int64x1_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u64_s64 #define vreinterpret_u64_s64(a) simde_vreinterpret_u64_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vreinterpret_u64_u8(simde_uint8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u64_u8(a); #else simde_uint64x1_private r_; simde_uint8x8_private a_ = simde_uint8x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u64_u8 #define vreinterpret_u64_u8(a) simde_vreinterpret_u64_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vreinterpret_u64_u16(simde_uint16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u64_u16(a); #else simde_uint64x1_private r_; simde_uint16x4_private a_ = simde_uint16x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u64_u16 #define vreinterpret_u64_u16(a) simde_vreinterpret_u64_u16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vreinterpret_u64_u32(simde_uint32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u64_u32(a); #else simde_uint64x1_private r_; simde_uint32x2_private a_ = simde_uint32x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u64_u32 #define vreinterpret_u64_u32(a) simde_vreinterpret_u64_u32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vreinterpret_u64_f32(simde_float32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_u64_f32(a); #else simde_uint64x1_private r_; simde_float32x2_private a_ = simde_float32x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u64_f32 #define vreinterpret_u64_f32(a) simde_vreinterpret_u64_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vreinterpret_u64_f64(simde_float64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpret_u64_f64(a); #else simde_uint64x1_private r_; simde_float64x1_private a_ = simde_float64x1_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpret_u64_f64 #define vreinterpret_u64_f64(a) simde_vreinterpret_u64_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vreinterpretq_u64_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u64_s8(a); #else simde_uint64x2_private r_; simde_int8x16_private a_ = simde_int8x16_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u64_s8 #define vreinterpretq_u64_s8(a) simde_vreinterpretq_u64_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vreinterpretq_u64_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u64_s16(a); #else simde_uint64x2_private r_; simde_int16x8_private a_ = simde_int16x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u64_s16 #define vreinterpretq_u64_s16(a) simde_vreinterpretq_u64_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vreinterpretq_u64_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u64_s32(a); #else simde_uint64x2_private r_; simde_int32x4_private a_ = simde_int32x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u64_s32 #define vreinterpretq_u64_s32(a) simde_vreinterpretq_u64_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vreinterpretq_u64_s64(simde_int64x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u64_s64(a); #else simde_uint64x2_private r_; simde_int64x2_private a_ = simde_int64x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u64_s64 #define vreinterpretq_u64_s64(a) simde_vreinterpretq_u64_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vreinterpretq_u64_u8(simde_uint8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u64_u8(a); #else simde_uint64x2_private r_; simde_uint8x16_private a_ = simde_uint8x16_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u64_u8 #define vreinterpretq_u64_u8(a) simde_vreinterpretq_u64_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vreinterpretq_u64_u16(simde_uint16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u64_u16(a); #else simde_uint64x2_private r_; simde_uint16x8_private a_ = simde_uint16x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u64_u16 #define vreinterpretq_u64_u16(a) simde_vreinterpretq_u64_u16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vreinterpretq_u64_u32(simde_uint32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u64_u32(a); #else simde_uint64x2_private r_; simde_uint32x4_private a_ = simde_uint32x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u64_u32 #define vreinterpretq_u64_u32(a) simde_vreinterpretq_u64_u32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vreinterpretq_u64_f32(simde_float32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_u64_f32(a); #else simde_uint64x2_private r_; simde_float32x4_private a_ = simde_float32x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u64_f32 #define vreinterpretq_u64_f32(a) simde_vreinterpretq_u64_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vreinterpretq_u64_f64(simde_float64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpretq_u64_f64(a); #else simde_uint64x2_private r_; simde_float64x2_private a_ = simde_float64x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_u64_f64 #define vreinterpretq_u64_f64(a) simde_vreinterpretq_u64_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vreinterpret_f32_s8(simde_int8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_f32_s8(a); #else simde_float32x2_private r_; simde_int8x8_private a_ = simde_int8x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_f32_s8 #define vreinterpret_f32_s8(a) simde_vreinterpret_f32_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vreinterpret_f32_s16(simde_int16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_f32_s16(a); #else simde_float32x2_private r_; simde_int16x4_private a_ = simde_int16x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_f32_s16 #define vreinterpret_f32_s16(a) simde_vreinterpret_f32_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vreinterpret_f32_s32(simde_int32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_f32_s32(a); #else simde_float32x2_private r_; simde_int32x2_private a_ = simde_int32x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_f32_s32 #define vreinterpret_f32_s32(a) simde_vreinterpret_f32_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vreinterpret_f32_s64(simde_int64x1_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_f32_s64(a); #else simde_float32x2_private r_; simde_int64x1_private a_ = simde_int64x1_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_f32_s64 #define vreinterpret_f32_s64(a) simde_vreinterpret_f32_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vreinterpret_f32_u8(simde_uint8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_f32_u8(a); #else simde_float32x2_private r_; simde_uint8x8_private a_ = simde_uint8x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_f32_u8 #define vreinterpret_f32_u8(a) simde_vreinterpret_f32_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vreinterpret_f32_u16(simde_uint16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_f32_u16(a); #else simde_float32x2_private r_; simde_uint16x4_private a_ = simde_uint16x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_f32_u16 #define vreinterpret_f32_u16(a) simde_vreinterpret_f32_u16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vreinterpret_f32_u32(simde_uint32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_f32_u32(a); #else simde_float32x2_private r_; simde_uint32x2_private a_ = simde_uint32x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_f32_u32 #define vreinterpret_f32_u32(a) simde_vreinterpret_f32_u32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vreinterpret_f32_u64(simde_uint64x1_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpret_f32_u64(a); #else simde_float32x2_private r_; simde_uint64x1_private a_ = simde_uint64x1_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpret_f32_u64 #define vreinterpret_f32_u64(a) simde_vreinterpret_f32_u64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vreinterpret_f32_f64(simde_float64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpret_f32_f64(a); #else simde_float32x2_private r_; simde_float64x1_private a_ = simde_float64x1_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpret_f32_f64 #define vreinterpret_f32_f64(a) simde_vreinterpret_f32_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vreinterpretq_f32_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_f32_s8(a); #else simde_float32x4_private r_; simde_int8x16_private a_ = simde_int8x16_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_f32_s8 #define vreinterpretq_f32_s8(a) simde_vreinterpretq_f32_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vreinterpretq_f32_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_f32_s16(a); #else simde_float32x4_private r_; simde_int16x8_private a_ = simde_int16x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_f32_s16 #define vreinterpretq_f32_s16(a) simde_vreinterpretq_f32_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vreinterpretq_f32_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_f32_s32(a); #else simde_float32x4_private r_; simde_int32x4_private a_ = simde_int32x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_f32_s32 #define vreinterpretq_f32_s32(a) simde_vreinterpretq_f32_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vreinterpretq_f32_s64(simde_int64x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_f32_s64(a); #else simde_float32x4_private r_; simde_int64x2_private a_ = simde_int64x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_f32_s64 #define vreinterpretq_f32_s64(a) simde_vreinterpretq_f32_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vreinterpretq_f32_u8(simde_uint8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_f32_u8(a); #else simde_float32x4_private r_; simde_uint8x16_private a_ = simde_uint8x16_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_f32_u8 #define vreinterpretq_f32_u8(a) simde_vreinterpretq_f32_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vreinterpretq_f32_u16(simde_uint16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_f32_u16(a); #else simde_float32x4_private r_; simde_uint16x8_private a_ = simde_uint16x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_f32_u16 #define vreinterpretq_f32_u16(a) simde_vreinterpretq_f32_u16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vreinterpretq_f32_u32(simde_uint32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_f32_u32(a); #else simde_float32x4_private r_; simde_uint32x4_private a_ = simde_uint32x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_f32_u32 #define vreinterpretq_f32_u32(a) simde_vreinterpretq_f32_u32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vreinterpretq_f32_u64(simde_uint64x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_f32_u64(a); #else simde_float32x4_private r_; simde_uint64x2_private a_ = simde_uint64x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_f32_u64 #define vreinterpretq_f32_u64(a) simde_vreinterpretq_f32_u64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vreinterpretq_f32_f64(simde_float64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpretq_f32_f64(a); #else simde_float32x4_private r_; simde_float64x2_private a_ = simde_float64x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_f32_f64 #define vreinterpretq_f32_f64(a) simde_vreinterpretq_f32_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vreinterpret_f64_s8(simde_int8x8_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpret_f64_s8(a); #else simde_float64x1_private r_; simde_int8x8_private a_ = simde_int8x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpret_f64_s8 #define vreinterpret_f64_s8(a) simde_vreinterpret_f64_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vreinterpret_f64_s16(simde_int16x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpret_f64_s16(a); #else simde_float64x1_private r_; simde_int16x4_private a_ = simde_int16x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpret_f64_s16 #define vreinterpret_f64_s16(a) simde_vreinterpret_f64_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vreinterpret_f64_s32(simde_int32x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpret_f64_s32(a); #else simde_float64x1_private r_; simde_int32x2_private a_ = simde_int32x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpret_f64_s32 #define vreinterpret_f64_s32(a) simde_vreinterpret_f64_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vreinterpret_f64_s64(simde_int64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpret_f64_s64(a); #else simde_float64x1_private r_; simde_int64x1_private a_ = simde_int64x1_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpret_f64_s64 #define vreinterpret_f64_s64(a) simde_vreinterpret_f64_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vreinterpret_f64_u8(simde_uint8x8_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpret_f64_u8(a); #else simde_float64x1_private r_; simde_uint8x8_private a_ = simde_uint8x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpret_f64_u8 #define vreinterpret_f64_u8(a) simde_vreinterpret_f64_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vreinterpret_f64_u16(simde_uint16x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpret_f64_u16(a); #else simde_float64x1_private r_; simde_uint16x4_private a_ = simde_uint16x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpret_f64_u16 #define vreinterpret_f64_u16(a) simde_vreinterpret_f64_u16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vreinterpret_f64_u32(simde_uint32x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpret_f64_u32(a); #else simde_float64x1_private r_; simde_uint32x2_private a_ = simde_uint32x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpret_f64_u32 #define vreinterpret_f64_u32(a) simde_vreinterpret_f64_u32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vreinterpret_f64_u64(simde_uint64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpret_f64_u64(a); #else simde_float64x1_private r_; simde_uint64x1_private a_ = simde_uint64x1_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpret_f64_u64 #define vreinterpret_f64_u64(a) simde_vreinterpret_f64_u64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vreinterpret_f64_f32(simde_float32x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpret_f64_f32(a); #else simde_float64x1_private r_; simde_float32x2_private a_ = simde_float32x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpret_f64_f32 #define vreinterpret_f64_f32(a) simde_vreinterpret_f64_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vreinterpretq_f64_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpretq_f64_s8(a); #else simde_float64x2_private r_; simde_int8x16_private a_ = simde_int8x16_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_f64_s8 #define vreinterpretq_f64_s8(a) simde_vreinterpretq_f64_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vreinterpretq_f64_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpretq_f64_s16(a); #else simde_float64x2_private r_; simde_int16x8_private a_ = simde_int16x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_f64_s16 #define vreinterpretq_f64_s16(a) simde_vreinterpretq_f64_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vreinterpretq_f64_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpretq_f64_s32(a); #else simde_float64x2_private r_; simde_int32x4_private a_ = simde_int32x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_f64_s32 #define vreinterpretq_f64_s32(a) simde_vreinterpretq_f64_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vreinterpretq_f64_s64(simde_int64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpretq_f64_s64(a); #else simde_float64x2_private r_; simde_int64x2_private a_ = simde_int64x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_f64_s64 #define vreinterpretq_f64_s64(a) simde_vreinterpretq_f64_s64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vreinterpretq_f64_u8(simde_uint8x16_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpretq_f64_u8(a); #else simde_float64x2_private r_; simde_uint8x16_private a_ = simde_uint8x16_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_f64_u8 #define vreinterpretq_f64_u8(a) simde_vreinterpretq_f64_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vreinterpretq_f64_u16(simde_uint16x8_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpretq_f64_u16(a); #else simde_float64x2_private r_; simde_uint16x8_private a_ = simde_uint16x8_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_f64_u16 #define vreinterpretq_f64_u16(a) simde_vreinterpretq_f64_u16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vreinterpretq_f64_u32(simde_uint32x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpretq_f64_u32(a); #else simde_float64x2_private r_; simde_uint32x4_private a_ = simde_uint32x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_f64_u32 #define vreinterpretq_f64_u32(a) simde_vreinterpretq_f64_u32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vreinterpretq_f64_u64(simde_uint64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpretq_f64_u64(a); #else simde_float64x2_private r_; simde_uint64x2_private a_ = simde_uint64x2_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_f64_u64 #define vreinterpretq_f64_u64(a) simde_vreinterpretq_f64_u64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vreinterpretq_f64_f32(simde_float32x4_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpretq_f64_f32(a); #else simde_float64x2_private r_; simde_float32x4_private a_ = simde_float32x4_to_private(a); simde_memcpy(&r_, &a_, sizeof(r_)); return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vreinterpretq_f64_f32 #define vreinterpretq_f64_f32(a) simde_vreinterpretq_f64_f32(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif simde-0.7.2/simde/arm/neon/rev16.h000066400000000000000000000112451400333146700165600ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_ARM_NEON_REV16_H) #define SIMDE_ARM_NEON_REV16_H #include "reinterpret.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vrev16_s8(simde_int8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrev16_s8(a); #elif defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_shuffle_pi8(a, _mm_set_pi8(6, 7, 4, 5, 2, 3, 0, 1)); #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, a_.values, 1, 0, 3, 2, 5, 4, 7, 6); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i ^ 1]; } #endif return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrev16_s8 #define vrev16_s8(a) simde_vrev16_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vrev16_u8(simde_uint8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrev16_u8(a); #else return simde_vreinterpret_u8_s8(simde_vrev16_s8(simde_vreinterpret_s8_u8(a))); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrev16_u8 #define vrev16_u8(a) simde_vrev16_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vrev16q_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrev16q_s8(a); #elif defined(SIMDE_X86_SSSE3_NATIVE) return _mm_shuffle_epi8(a, _mm_set_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1)); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_revb(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), a))); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_reve(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_reve(a)))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v8x16_shuffle(a, a, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, a_.values, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i ^ 1]; } #endif return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrev16q_s8 #define vrev16q_s8(a) simde_vrev16q_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vrev16q_u8(simde_uint8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrev16q_u8(a); #else return simde_vreinterpretq_u8_s8(simde_vrev16q_s8(simde_vreinterpretq_s8_u8(a))); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrev16q_u8 #define vrev16q_u8(a) simde_vrev16q_u8(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_REV16_H) */ simde-0.7.2/simde/arm/neon/rev32.h000066400000000000000000000176101400333146700165600ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_ARM_NEON_REV32_H) #define SIMDE_ARM_NEON_REV32_H #include "reinterpret.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vrev32_s8(simde_int8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrev32_s8(a); #elif defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_shuffle_pi8(a, _mm_set_pi8(4, 5, 6, 7, 0, 1, 2, 3)); #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, a_.values, 3, 2, 1, 0, 7, 6, 5, 4); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i ^ 3]; } #endif return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrev32_s8 #define vrev32_s8(a) simde_vrev32_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vrev32_s16(simde_int16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrev32_s16(a); #elif defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_shuffle_pi16(a, (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 0, 3, 2); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i ^ 1]; } #endif return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrev32_s16 #define vrev32_s16(a) simde_vrev32_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vrev32_u8(simde_uint8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrev32_u8(a); #else return simde_vreinterpret_u8_s8(simde_vrev32_s8(simde_vreinterpret_s8_u8(a))); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrev32_u8 #define vrev32_u8(a) simde_vrev32_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vrev32_u16(simde_uint16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrev32_u16(a); #else return simde_vreinterpret_u16_s16(simde_vrev32_s16(simde_vreinterpret_s16_u16(a))); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrev32_u16 #define vrev32_u16(a) simde_vrev32_u16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vrev32q_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrev32q_s8(a); #elif defined(SIMDE_X86_SSSE3_NATIVE) return _mm_shuffle_epi8(a, _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_revb(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), a))); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_reve(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_reve(a)))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v8x16_shuffle(a, a, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, a_.values, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i ^ 3]; } #endif return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrev32q_s8 #define vrev32q_s8(a) simde_vrev32q_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vrev32q_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrev32q_s16(a); #elif defined(SIMDE_X86_SSSE3_NATIVE) return _mm_shuffle_epi8(a, _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2)); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_shufflehi_epi16(_mm_shufflelo_epi16(a, (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)), (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_reve(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_reve(a)))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v8x16_shuffle(a, a, 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, a_.values, 1, 0, 3, 2, 5, 4, 7, 6); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i ^ 1]; } #endif return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrev32q_s16 #define vrev32q_s16(a) simde_vrev32q_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vrev32q_u8(simde_uint8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrev32q_u8(a); #else return simde_vreinterpretq_u8_s8(simde_vrev32q_s8(simde_vreinterpretq_s8_u8(a))); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrev32q_u8 #define vrev32q_u8(a) simde_vrev32q_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vrev32q_u16(simde_uint16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrev32q_u16(a); #else return simde_vreinterpretq_u16_s16(simde_vrev32q_s16(simde_vreinterpretq_s16_u16(a))); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrev32q_u16 #define vrev32q_u16(a) simde_vrev32q_u16(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_REV32_H) */ simde-0.7.2/simde/arm/neon/rev64.h000066400000000000000000000271721400333146700165710ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ /* N.B. CM: vrev64_f16 and vrev64q_f16 are omitted as * SIMDe has no 16-bit floating point support. */ #if !defined(SIMDE_ARM_NEON_REV64_H) #define SIMDE_ARM_NEON_REV64_H #include "reinterpret.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vrev64_s8(simde_int8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrev64_s8(a); #elif defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_shuffle_pi8(a, _mm_set_pi8(0, 1, 2, 3, 4, 5, 6, 7)); #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, a_.values, 7, 6, 5, 4, 3, 2, 1, 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i ^ 7]; } #endif return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrev64_s8 #define vrev64_s8(a) simde_vrev64_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vrev64_s16(simde_int16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrev64_s16(a); #elif defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_shuffle_pi16(a, (0 << 6) | (1 << 4) | (2 << 2) | (3 << 0)); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 3, 2, 1, 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i ^ 3]; } #endif return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrev64_s16 #define vrev64_s16(a) simde_vrev64_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vrev64_s32(simde_int32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrev64_s32(a); #elif defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_shuffle_pi16(a, (1 << 6) | (0 << 4) | (3 << 2) | (2 << 0)); #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i ^ 1]; } #endif return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrev64_s32 #define vrev64_s32(a) simde_vrev64_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vrev64_u8(simde_uint8x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrev64_u8(a); #else return simde_vreinterpret_u8_s8(simde_vrev64_s8(simde_vreinterpret_s8_u8(a))); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrev64_u8 #define vrev64_u8(a) simde_vrev64_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vrev64_u16(simde_uint16x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrev64_u16(a); #else return simde_vreinterpret_u16_s16(simde_vrev64_s16(simde_vreinterpret_s16_u16(a))); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrev64_u16 #define vrev64_u16(a) simde_vrev64_u16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vrev64_u32(simde_uint32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrev64_u32(a); #else return simde_vreinterpret_u32_s32(simde_vrev64_s32(simde_vreinterpret_s32_u32(a))); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrev64_u32 #define vrev64_u32(a) simde_vrev64_u32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vrev64_f32(simde_float32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrev64_f32(a); #else return simde_vreinterpret_f32_s32(simde_vrev64_s32(simde_vreinterpret_s32_f32(a))); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrev64_f32 #define vrev64_f32(a) simde_vrev64_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vrev64q_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrev64q_s8(a); #elif defined(SIMDE_X86_SSSE3_NATIVE) return _mm_shuffle_epi8(a, _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_revb(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a))); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_reve(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_reve(a)))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v8x16_shuffle(a, a, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, a_.values, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i ^ 7]; } #endif return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrev64q_s8 #define vrev64q_s8(a) simde_vrev64q_s8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vrev64q_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrev64q_s16(a); #elif defined(SIMDE_X86_SSSE3_NATIVE) return _mm_shuffle_epi8(a, _mm_set_epi8(9, 8, 11, 10, 13, 12, 15, 14, 1, 0, 3, 2, 5, 4, 7, 6)); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_shufflehi_epi16(_mm_shufflelo_epi16(a, (0 << 6) | (1 << 4) | (2 << 2) | (3 << 0)), (0 << 6) | (1 << 4) | (2 << 2) | (3 << 0)); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_reve(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_reve(a)))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v8x16_shuffle(a, a, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, a_.values, 3, 2, 1, 0, 7, 6, 5, 4); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i ^ 3]; } #endif return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrev64q_s16 #define vrev64q_s16(a) simde_vrev64q_s16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vrev64q_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrev64q_s32(a); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_shuffle_epi32(a, (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_reve(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_reve(a)))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v8x16_shuffle(a, a, 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 0, 3, 2); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i ^ 1]; } #endif return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrev64q_s32 #define vrev64q_s32(a) simde_vrev64q_s32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vrev64q_u8(simde_uint8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrev64q_u8(a); #else return simde_vreinterpretq_u8_s8(simde_vrev64q_s8(simde_vreinterpretq_s8_u8(a))); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrev64q_u8 #define vrev64q_u8(a) simde_vrev64q_u8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vrev64q_u16(simde_uint16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrev64q_u16(a); #else return simde_vreinterpretq_u16_s16(simde_vrev64q_s16(simde_vreinterpretq_s16_u16(a))); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrev64q_u16 #define vrev64q_u16(a) simde_vrev64q_u16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vrev64q_u32(simde_uint32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrev64q_u32(a); #else return simde_vreinterpretq_u32_s32(simde_vrev64q_s32(simde_vreinterpretq_s32_u32(a))); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrev64q_u32 #define vrev64q_u32(a) simde_vrev64q_u32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vrev64q_f32(simde_float32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrev64q_f32(a); #else return simde_vreinterpretq_f32_s32(simde_vrev64q_s32(simde_vreinterpretq_s32_f32(a))); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrev64q_f32 #define vrev64q_f32(a) simde_vrev64q_f32(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_REV64_H) */ simde-0.7.2/simde/arm/neon/rhadd.h000066400000000000000000000376161400333146700167110ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_RHADD_H) #define SIMDE_ARM_NEON_RHADD_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vrhadd_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrhadd_s8(a, b); #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = (((a_.values >> HEDLEY_STATIC_CAST(int8_t, 1)) + (b_.values >> HEDLEY_STATIC_CAST(int8_t, 1))) + ((a_.values | b_.values) & HEDLEY_STATIC_CAST(int8_t, 1))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (((a_.values[i] >> HEDLEY_STATIC_CAST(int8_t, 1)) + (b_.values[i] >> HEDLEY_STATIC_CAST(int8_t, 1))) + ((a_.values[i] | b_.values[i]) & HEDLEY_STATIC_CAST(int8_t, 1))); } #endif return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrhadd_s8 #define vrhadd_s8(a, b) simde_vrhadd_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vrhadd_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrhadd_s16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_add_pi16(_m_pand(_m_por(a, b), _mm_set1_pi16(HEDLEY_STATIC_CAST(int16_t, 1))), _mm_add_pi16(_m_psrawi(a, 1), _m_psrawi(b, 1))); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = (((a_.values >> HEDLEY_STATIC_CAST(int16_t, 1)) + (b_.values >> HEDLEY_STATIC_CAST(int16_t, 1))) + ((a_.values | b_.values) & HEDLEY_STATIC_CAST(int16_t, 1))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (((a_.values[i] >> HEDLEY_STATIC_CAST(int16_t, 1)) + (b_.values[i] >> HEDLEY_STATIC_CAST(int16_t, 1))) + ((a_.values[i] | b_.values[i]) & HEDLEY_STATIC_CAST(int16_t, 1))); } #endif return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrhadd_s16 #define vrhadd_s16(a, b) simde_vrhadd_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vrhadd_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrhadd_s32(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_add_pi32(_m_pand(_m_por(a, b), _mm_set1_pi32(HEDLEY_STATIC_CAST(int32_t, 1))), _mm_add_pi32(_m_psradi(a, 1), _m_psradi(b, 1))); #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = (((a_.values >> HEDLEY_STATIC_CAST(int32_t, 1)) + (b_.values >> HEDLEY_STATIC_CAST(int32_t, 1))) + ((a_.values | b_.values) & HEDLEY_STATIC_CAST(int32_t, 1))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (((a_.values[i] >> HEDLEY_STATIC_CAST(int32_t, 1)) + (b_.values[i] >> HEDLEY_STATIC_CAST(int32_t, 1))) + ((a_.values[i] | b_.values[i]) & HEDLEY_STATIC_CAST(int32_t, 1))); } #endif return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrhadd_s32 #define vrhadd_s32(a, b) simde_vrhadd_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vrhadd_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrhadd_u8(a, b); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = (((a_.values >> HEDLEY_STATIC_CAST(uint8_t, 1)) + (b_.values >> HEDLEY_STATIC_CAST(uint8_t, 1))) + ((a_.values | b_.values) & HEDLEY_STATIC_CAST(uint8_t, 1))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (((a_.values[i] >> HEDLEY_STATIC_CAST(uint8_t, 1)) + (b_.values[i] >> HEDLEY_STATIC_CAST(uint8_t, 1))) + ((a_.values[i] | b_.values[i]) & HEDLEY_STATIC_CAST(uint8_t, 1))); } #endif return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrhadd_u8 #define vrhadd_u8(a, b) simde_vrhadd_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vrhadd_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrhadd_u16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_add_pi16(_m_pand(_m_por(a, b), _mm_set1_pi16(HEDLEY_STATIC_CAST(int16_t, 1))), _mm_add_pi16(_mm_srli_pi16(a, 1), _mm_srli_pi16(b, 1))); #else simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a), b_ = simde_uint16x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = (((a_.values >> HEDLEY_STATIC_CAST(uint16_t, 1)) + (b_.values >> HEDLEY_STATIC_CAST(uint16_t, 1))) + ((a_.values | b_.values) & HEDLEY_STATIC_CAST(uint16_t, 1))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (((a_.values[i] >> HEDLEY_STATIC_CAST(uint16_t, 1)) + (b_.values[i] >> HEDLEY_STATIC_CAST(uint16_t, 1))) + ((a_.values[i] | b_.values[i]) & HEDLEY_STATIC_CAST(uint16_t, 1))); } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrhadd_u16 #define vrhadd_u16(a, b) simde_vrhadd_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vrhadd_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrhadd_u32(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_add_pi32(_m_pand(_m_por(a, b), _mm_set1_pi32(HEDLEY_STATIC_CAST(int32_t, 1))), _mm_add_pi32(_mm_srli_pi32(a, 1), _mm_srli_pi32(b, 1))); #else simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a), b_ = simde_uint32x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = (((a_.values >> HEDLEY_STATIC_CAST(uint32_t, 1)) + (b_.values >> HEDLEY_STATIC_CAST(uint32_t, 1))) + ((a_.values | b_.values) & HEDLEY_STATIC_CAST(uint32_t, 1))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (((a_.values[i] >> HEDLEY_STATIC_CAST(uint32_t, 1)) + (b_.values[i] >> HEDLEY_STATIC_CAST(uint32_t, 1))) + ((a_.values[i] | b_.values[i]) & HEDLEY_STATIC_CAST(uint32_t, 1))); } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrhadd_u32 #define vrhadd_u32(a, b) simde_vrhadd_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vrhaddq_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrhaddq_s8(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i8x16_add(wasm_i8x16_add(wasm_i8x16_shr(a, 1), wasm_i8x16_shr(b, 1)), wasm_v128_and(wasm_v128_or(a, b), wasm_i8x16_splat(1))); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = (((a_.values >> HEDLEY_STATIC_CAST(int8_t, 1)) + (b_.values >> HEDLEY_STATIC_CAST(int8_t, 1))) + ((a_.values | b_.values) & HEDLEY_STATIC_CAST(int8_t, 1))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (((a_.values[i] >> HEDLEY_STATIC_CAST(int8_t, 1)) + (b_.values[i] >> HEDLEY_STATIC_CAST(int8_t, 1))) + ((a_.values[i] | b_.values[i]) & HEDLEY_STATIC_CAST(int8_t, 1))); } #endif return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrhaddq_s8 #define vrhaddq_s8(a, b) simde_vrhaddq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vrhaddq_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrhaddq_s16(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_add_epi16(_mm_and_si128(_mm_or_si128(a, b), _mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, 1))), _mm_add_epi16(_mm_srai_epi16(a, 1), _mm_srai_epi16(b, 1))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i16x8_add(wasm_i16x8_add(wasm_i16x8_shr(a, 1), wasm_i16x8_shr(b, 1)), wasm_v128_and(wasm_v128_or(a, b), wasm_i16x8_splat(1))); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = (((a_.values >> HEDLEY_STATIC_CAST(int16_t, 1)) + (b_.values >> HEDLEY_STATIC_CAST(int16_t, 1))) + ((a_.values | b_.values) & HEDLEY_STATIC_CAST(int16_t, 1))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (((a_.values[i] >> HEDLEY_STATIC_CAST(int16_t, 1)) + (b_.values[i] >> HEDLEY_STATIC_CAST(int16_t, 1))) + ((a_.values[i] | b_.values[i]) & HEDLEY_STATIC_CAST(int16_t, 1))); } #endif return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrhaddq_s16 #define vrhaddq_s16(a, b) simde_vrhaddq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vrhaddq_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrhaddq_s32(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_add_epi32(_mm_and_si128(_mm_or_si128(a, b), _mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, 1))), _mm_add_epi32(_mm_srai_epi32(a, 1), _mm_srai_epi32(b, 1))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i32x4_add(wasm_i32x4_add(wasm_i32x4_shr(a, 1), wasm_i32x4_shr(b, 1)), wasm_v128_and(wasm_v128_or(a, b), wasm_i32x4_splat(1))); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = (((a_.values >> HEDLEY_STATIC_CAST(int32_t, 1)) + (b_.values >> HEDLEY_STATIC_CAST(int32_t, 1))) + ((a_.values | b_.values) & HEDLEY_STATIC_CAST(int32_t, 1))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (((a_.values[i] >> HEDLEY_STATIC_CAST(int32_t, 1)) + (b_.values[i] >> HEDLEY_STATIC_CAST(int32_t, 1))) + ((a_.values[i] | b_.values[i]) & HEDLEY_STATIC_CAST(int32_t, 1))); } #endif return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrhaddq_s32 #define vrhaddq_s32(a, b) simde_vrhaddq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vrhaddq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrhaddq_u8(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_u8x16_avgr(a, b); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = (((a_.values >> HEDLEY_STATIC_CAST(uint8_t, 1)) + (b_.values >> HEDLEY_STATIC_CAST(uint8_t, 1))) + ((a_.values | b_.values) & HEDLEY_STATIC_CAST(uint8_t, 1))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (((a_.values[i] >> HEDLEY_STATIC_CAST(uint8_t, 1)) + (b_.values[i] >> HEDLEY_STATIC_CAST(uint8_t, 1))) + ((a_.values[i] | b_.values[i]) & HEDLEY_STATIC_CAST(uint8_t, 1))); } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrhaddq_u8 #define vrhaddq_u8(a, b) simde_vrhaddq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vrhaddq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrhaddq_u16(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_u16x8_avgr(a, b); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = (((a_.values >> HEDLEY_STATIC_CAST(uint16_t, 1)) + (b_.values >> HEDLEY_STATIC_CAST(uint16_t, 1))) + ((a_.values | b_.values) & HEDLEY_STATIC_CAST(uint16_t, 1))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (((a_.values[i] >> HEDLEY_STATIC_CAST(uint16_t, 1)) + (b_.values[i] >> HEDLEY_STATIC_CAST(uint16_t, 1))) + ((a_.values[i] | b_.values[i]) & HEDLEY_STATIC_CAST(uint16_t, 1))); } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrhaddq_u16 #define vrhaddq_u16(a, b) simde_vrhaddq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vrhaddq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrhaddq_u32(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i32x4_add(wasm_i32x4_add(wasm_u32x4_shr(a, 1), wasm_u32x4_shr(b, 1)), wasm_v128_and(wasm_v128_or(a, b), wasm_i32x4_splat(1))); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = (((a_.values >> HEDLEY_STATIC_CAST(uint32_t, 1)) + (b_.values >> HEDLEY_STATIC_CAST(uint32_t, 1))) + ((a_.values | b_.values) & HEDLEY_STATIC_CAST(uint32_t, 1))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (((a_.values[i] >> HEDLEY_STATIC_CAST(uint32_t, 1)) + (b_.values[i] >> HEDLEY_STATIC_CAST(uint32_t, 1))) + ((a_.values[i] | b_.values[i]) & HEDLEY_STATIC_CAST(uint32_t, 1))); } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrhaddq_u32 #define vrhaddq_u32(a, b) simde_vrhaddq_u32((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_RHADD_H) */ simde-0.7.2/simde/arm/neon/rnd.h000066400000000000000000000102551400333146700164000ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_RND_H) #define SIMDE_ARM_NEON_RND_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vrnd_f32(simde_float32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vrnd_f32(a); #else simde_float32x2_private r_, a_ = simde_float32x2_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_math_truncf(a_.values[i]); } return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrnd_f32 #define vrnd_f32(a) simde_vrnd_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vrnd_f64(simde_float64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vrnd_f64(a); #else simde_float64x1_private r_, a_ = simde_float64x1_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_math_trunc(a_.values[i]); } return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vrnd_f64 #define vrnd_f64(a) simde_vrnd_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vrndq_f32(simde_float32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vrndq_f32(a); #elif defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_round_ps(a, _MM_FROUND_TO_ZERO); #elif defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_trunc_ps(a); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_trunc(a); #else simde_float32x4_private r_, a_ = simde_float32x4_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_math_truncf(a_.values[i]); } return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrndq_f32 #define vrndq_f32(a) simde_vrndq_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vrndq_f64(simde_float64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vrndq_f64(a); #elif defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_round_pd(a, _MM_FROUND_TO_ZERO); #elif defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_trunc_pd(a); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return vec_trunc(a); #else simde_float64x2_private r_, a_ = simde_float64x2_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_math_trunc(a_.values[i]); } return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vrndq_f64 #define vrndq_f64(a) simde_vrndq_f64(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_RND_H) */ simde-0.7.2/simde/arm/neon/rndi.h000066400000000000000000000076101400333146700165520ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020-2021 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_RNDI_H) #define SIMDE_ARM_NEON_RNDI_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vrndi_f32(simde_float32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vrndi_f32(a); #else simde_float32x2_private r_, a_ = simde_float32x2_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_math_nearbyintf(a_.values[i]); } return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrndi_f32 #define vrndi_f32(a) simde_vrndi_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vrndi_f64(simde_float64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vrndi_f64(a); #else simde_float64x1_private r_, a_ = simde_float64x1_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_math_nearbyint(a_.values[i]); } return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vrndi_f64 #define vrndi_f64(a) simde_vrndi_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vrndiq_f32(simde_float32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vrndiq_f32(a); #elif defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_round_ps(a, _MM_FROUND_CUR_DIRECTION); #else simde_float32x4_private r_, a_ = simde_float32x4_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_math_nearbyintf(a_.values[i]); } return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrndiq_f32 #define vrndiq_f32(a) simde_vrndiq_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vrndiq_f64(simde_float64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vrndiq_f64(a); #elif defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_round_pd(a, _MM_FROUND_CUR_DIRECTION); #else simde_float64x2_private r_, a_ = simde_float64x2_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_math_nearbyint(a_.values[i]); } return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vrndiq_f64 #define vrndiq_f64(a) simde_vrndiq_f64(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_RNDI_H) */ simde-0.7.2/simde/arm/neon/rndm.h000066400000000000000000000103121400333146700165470ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020-2021 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_RNDM_H) #define SIMDE_ARM_NEON_RNDM_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vrndm_f32(simde_float32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vrndm_f32(a); #else simde_float32x2_private r_, a_ = simde_float32x2_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_math_floorf(a_.values[i]); } return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrndm_f32 #define vrndm_f32(a) simde_vrndm_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vrndm_f64(simde_float64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vrndm_f64(a); #else simde_float64x1_private r_, a_ = simde_float64x1_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_math_floor(a_.values[i]); } return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vrndm_f64 #define vrndm_f64(a) simde_vrndm_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vrndmq_f32(simde_float32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vrndmq_f32(a); #elif defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF); #elif defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_floor_ps(a); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_floor(a); #else simde_float32x4_private r_, a_ = simde_float32x4_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_math_floorf(a_.values[i]); } return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrndmq_f32 #define vrndmq_f32(a) simde_vrndmq_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vrndmq_f64(simde_float64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vrndmq_f64(a); #elif defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_round_pd(a, _MM_FROUND_TO_NEG_INF); #elif defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_floor_pd(a); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return vec_floor(a); #else simde_float64x2_private r_, a_ = simde_float64x2_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_math_floor(a_.values[i]); } return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vrndmq_f64 #define vrndmq_f64(a) simde_vrndmq_f64(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_RNDM_H) */ simde-0.7.2/simde/arm/neon/rndn.h000066400000000000000000000076121400333146700165610ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020-2021 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_RNDN_H) #define SIMDE_ARM_NEON_RNDN_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vrndn_f32(simde_float32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vrndn_f32(a); #else simde_float32x2_private r_, a_ = simde_float32x2_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_math_roundevenf(a_.values[i]); } return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrndn_f32 #define vrndn_f32(a) simde_vrndn_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vrndn_f64(simde_float64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vrndn_f64(a); #else simde_float64x1_private r_, a_ = simde_float64x1_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_math_roundeven(a_.values[i]); } return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vrndn_f64 #define vrndn_f64(a) simde_vrndn_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vrndnq_f32(simde_float32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vrndnq_f32(a); #elif defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT); #else simde_float32x4_private r_, a_ = simde_float32x4_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_math_roundevenf(a_.values[i]); } return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrndnq_f32 #define vrndnq_f32(a) simde_vrndnq_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vrndnq_f64(simde_float64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vrndnq_f64(a); #elif defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_round_pd(a, _MM_FROUND_TO_NEAREST_INT); #else simde_float64x2_private r_, a_ = simde_float64x2_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_math_roundeven(a_.values[i]); } return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vrndnq_f64 #define vrndnq_f64(a) simde_vrndnq_f64(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_RNDN_H) */ simde-0.7.2/simde/arm/neon/rndp.h000066400000000000000000000103021400333146700165510ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020-2021 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_RNDP_H) #define SIMDE_ARM_NEON_RNDP_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vrndp_f32(simde_float32x2_t a) { #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vrndp_f32(a); #else simde_float32x2_private r_, a_ = simde_float32x2_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_math_ceilf(a_.values[i]); } return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrndp_f32 #define vrndp_f32(a) simde_vrndp_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vrndp_f64(simde_float64x1_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vrndp_f64(a); #else simde_float64x1_private r_, a_ = simde_float64x1_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_math_ceil(a_.values[i]); } return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vrndp_f64 #define vrndp_f64(a) simde_vrndp_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vrndpq_f32(simde_float32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vrndpq_f32(a); #elif defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_round_ps(a, _MM_FROUND_TO_POS_INF); #elif defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_ceil_ps(a); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_ceil(a); #else simde_float32x4_private r_, a_ = simde_float32x4_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_math_ceilf(a_.values[i]); } return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrndpq_f32 #define vrndpq_f32(a) simde_vrndpq_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vrndpq_f64(simde_float64x2_t a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vrndpq_f64(a); #elif defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_round_pd(a, _MM_FROUND_TO_POS_INF); #elif defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_ceil_pd(a); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return vec_ceil(a); #else simde_float64x2_private r_, a_ = simde_float64x2_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_math_ceil(a_.values[i]); } return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vrndpq_f64 #define vrndpq_f64(a) simde_vrndpq_f64(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_RNDP_H) */ simde-0.7.2/simde/arm/neon/rshl.h000066400000000000000000001256511400333146700165740ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_ARM_NEON_RSHL_H) #define SIMDE_ARM_NEON_RSHL_H #include "types.h" /* Notes from the implementer (Christopher Moore aka rosbif) * * I have tried to exactly reproduce the documented behaviour of the * ARM NEON rshl and rshlq intrinsics. * This is complicated for the following reasons:- * * a) Negative shift counts shift right. * * b) Only the low byte of the shift count is used but the shift count * is not limited to 8-bit values (-128 to 127). * * c) Overflow must be avoided when rounding, together with sign change * warning/errors in the C versions. * * d) Intel SIMD is not nearly as complete as NEON and AltiVec. * There were no intrisics with a vector shift count before AVX2 which * only has 32 and 64-bit logical ones and only a 32-bit arithmetic * one. The others need AVX512. There are no 8-bit shift intrinsics at * all, even with a scalar shift count. It is surprising to use AVX2 * and even AVX512 to implement a 64-bit vector operation. * * e) Many shift implementations, and the C standard, do not treat a * shift count >= the object's size in bits as one would expect. * (Personally I feel that > is silly but == can be useful.) * * Note that even the C17/18 standard does not define the behaviour of * a right shift of a negative value. * However Evan and I agree that all compilers likely to be used * implement this as an arithmetic right shift with sign extension. * If this is not the case it could be replaced by a logical right shift * if negative values are complemented before and after the shift. * * Some of the SIMD translations may be slower than the portable code, * particularly those for vectors with only one or two elements. * But I had fun writing them ;-) * */ HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vrshl_s8 (const simde_int8x8_t a, const simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrshl_s8(a, b); #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) const __m128i zero = _mm_setzero_si128(); const __m128i ff = _mm_cmpeq_epi16(zero, zero); __m128i a128 = _mm_cvtepi8_epi16(_mm_movpi64_epi64(a)); __m128i b128 = _mm_cvtepi8_epi16(_mm_movpi64_epi64(b)); __m128i a128_shr = _mm_srav_epi16(a128, _mm_xor_si128(b128, ff)); __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi16(a128, b128), _mm_srai_epi16(_mm_sub_epi16(a128_shr, ff), 1), _mm_cmpgt_epi16(zero, b128)); return _mm_movepi64_pi64(_mm_cvtepi16_epi8(r128)); #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) const __m256i zero = _mm256_setzero_si256(); const __m256i ff = _mm256_cmpeq_epi32(zero, zero); __m256i a256 = _mm256_cvtepi8_epi32(_mm_movpi64_epi64(a)); __m256i b256 = _mm256_cvtepi8_epi32(_mm_movpi64_epi64(b)); __m256i a256_shr = _mm256_srav_epi32(a256, _mm256_xor_si256(b256, ff)); __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi32(a256, b256), _mm256_srai_epi32(_mm256_sub_epi32(a256_shr, ff), 1), _mm256_cmpgt_epi32(zero, b256)); r256 = _mm256_shuffle_epi8(r256, _mm256_set1_epi32(0x0C080400)); return _mm_set_pi32(_mm256_extract_epi32(r256, 4), _mm256_extract_epi32(r256, 0)); #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int8_t, (abs(b_.values[i]) >= 8) ? 0 : (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : ((a_.values[i] + (1 << (-b_.values[i] - 1))) >> -b_.values[i])); } return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshl_s8 #define vrshl_s8(a, b) simde_vrshl_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vrshl_s16 (const simde_int16x4_t a, const simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrshl_s16(a, b); #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) const __m128i zero = _mm_setzero_si128(); const __m128i ff = _mm_cmpeq_epi32(zero, zero); __m128i a128 = _mm_cvtepi16_epi32(_mm_movpi64_epi64(a)); __m128i b128 = _mm_cvtepi16_epi32(_mm_movpi64_epi64(b)); b128 = _mm_srai_epi32(_mm_slli_epi32(b128, 24), 24); __m128i a128_shr = _mm_srav_epi32(a128, _mm_xor_si128(b128, ff)); __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi32(a128, b128), _mm_srai_epi32(_mm_sub_epi32(a128_shr, ff), 1), _mm_cmpgt_epi32(zero, b128)); return _mm_movepi64_pi64(_mm_shuffle_epi8(r128, _mm_set1_epi64x(0x0D0C090805040100))); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); r_.values[i] = HEDLEY_STATIC_CAST(int16_t, (abs(b_.values[i]) >= 16) ? 0 : (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : ((a_.values[i] + (1 << (-b_.values[i] - 1))) >> -b_.values[i])); } return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshl_s16 #define vrshl_s16(a, b) simde_vrshl_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vrshl_s32 (const simde_int32x2_t a, const simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrshl_s32(a, b); #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) const __m128i zero = _mm_setzero_si128(); const __m128i ff = _mm_cmpeq_epi32(zero, zero); __m128i a128 = _mm_movpi64_epi64(a); __m128i b128 = _mm_movpi64_epi64(b); b128 = _mm_srai_epi32(_mm_slli_epi32(b128, 24), 24); __m128i a128_shr = _mm_srav_epi32(a128, _mm_xor_si128(b128, ff)); __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi32(a128, b128), _mm_srai_epi32(_mm_sub_epi32(a128_shr, ff), 1), _mm_cmpgt_epi32(zero, b128)); return _mm_movepi64_pi64(r128); #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); r_.values[i] = HEDLEY_STATIC_CAST(int32_t, (abs(b_.values[i]) >= 32) ? 0 : (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : ((a_.values[i] + (1 << (-b_.values[i] - 1))) >> -b_.values[i])); } return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshl_s32 #define vrshl_s32(a, b) simde_vrshl_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vrshl_s64 (const simde_int64x1_t a, const simde_int64x1_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrshl_s64(a, b); #elif defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) const __m128i zero = _mm_setzero_si128(); const __m128i ff = _mm_cmpeq_epi64(zero, zero); __m128i a128 = _mm_movpi64_epi64(a); __m128i b128 = _mm_movpi64_epi64(b); b128 = _mm_srai_epi64(_mm_slli_epi64(b128, 56), 56); __m128i a128_shr = _mm_srav_epi64(a128, _mm_xor_si128(b128, ff)); __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi64(a128, b128), _mm_srai_epi64(_mm_sub_epi64(a128_shr, ff), 1), _mm_cmpgt_epi64(zero, b128)); return _mm_movepi64_pi64(r128); #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) const __m128i zero = _mm_setzero_si128(); const __m128i ones = _mm_set1_epi64x(1); __m128i a128 = _mm_movpi64_epi64(a); __m128i b128 = _mm_movpi64_epi64(b); __m128i maska = _mm_cmpgt_epi64(zero, a128); __m128i b128_abs = _mm_and_si128(_mm_abs_epi8(b128), _mm_set1_epi64x(0xFF)); __m128i a128_rnd = _mm_and_si128(_mm_srlv_epi64(a128, _mm_sub_epi64(b128_abs, ones)), ones); __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi64(a128, b128_abs), _mm_add_epi64(_mm_xor_si128(_mm_srlv_epi64(_mm_xor_si128(a128, maska), b128_abs), maska), a128_rnd), _mm_cmpgt_epi64(zero, _mm_slli_epi64(b128, 56))); return _mm_movepi64_pi64(r128); #else simde_int64x1_private r_, a_ = simde_int64x1_to_private(a), b_ = simde_int64x1_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); r_.values[i] = HEDLEY_STATIC_CAST(int64_t, (llabs(b_.values[i]) >= 64) ? 0 : (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : ((a_.values[i] + (INT64_C(1) << (-b_.values[i] - 1))) >> -b_.values[i])); } return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshl_s64 #define vrshl_s64(a, b) simde_vrshl_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vrshl_u8 (const simde_uint8x8_t a, const simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrshl_u8(a, b); #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) const __m128i zero = _mm_setzero_si128(); const __m128i ff = _mm_cmpeq_epi16(zero, zero); __m128i a128 = _mm_cvtepu8_epi16(_mm_movpi64_epi64(a)); __m128i b128 = _mm_cvtepi8_epi16(_mm_movpi64_epi64(b)); __m128i a128_shr = _mm_srlv_epi16(a128, _mm_xor_si128(b128, ff)); __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi16(a128, b128), _mm_srli_epi16(_mm_sub_epi16(a128_shr, ff), 1), _mm_cmpgt_epi16(zero, b128)); return _mm_movepi64_pi64(_mm_cvtepi16_epi8(r128)); #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) const __m256i zero = _mm256_setzero_si256(); const __m256i ff = _mm256_cmpeq_epi32(zero, zero); __m256i a256 = _mm256_cvtepu8_epi32(_mm_movpi64_epi64(a)); __m256i b256 = _mm256_cvtepi8_epi32(_mm_movpi64_epi64(b)); __m256i a256_shr = _mm256_srlv_epi32(a256, _mm256_xor_si256(b256, ff)); __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi32(a256, b256), _mm256_srli_epi32(_mm256_sub_epi32(a256_shr, ff), 1), _mm256_cmpgt_epi32(zero, b256)); r256 = _mm256_shuffle_epi8(r256, _mm256_set1_epi32(0x0C080400)); return _mm_set_pi32(_mm256_extract_epi32(r256, 4), _mm256_extract_epi32(r256, 0)); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a); simde_int8x8_private b_ = simde_int8x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, (b_.values[i] >= 8) ? 0 : (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : (b_.values[i] >= -8) ? (((b_.values[i] == -8) ? 0 : (a_.values[i] >> -b_.values[i])) + ((a_.values[i] >> (-b_.values[i] - 1)) & 1)) : 0); } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshl_u8 #define vrshl_u8(a, b) simde_vrshl_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vrshl_u16 (const simde_uint16x4_t a, const simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrshl_u16(a, b); #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) const __m128i zero = _mm_setzero_si128(); const __m128i ff = _mm_cmpeq_epi32(zero, zero); __m128i a128 = _mm_cvtepu16_epi32(_mm_movpi64_epi64(a)); __m128i b128 = _mm_cvtepi16_epi32(_mm_movpi64_epi64(b)); b128 = _mm_srai_epi32(_mm_slli_epi32(b128, 24), 24); __m128i a128_shr = _mm_srlv_epi32(a128, _mm_xor_si128(b128, ff)); __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi32(a128, b128), _mm_srli_epi32(_mm_sub_epi32(a128_shr, ff), 1), _mm_cmpgt_epi32(zero, b128)); return _mm_movepi64_pi64(_mm_shuffle_epi8(r128, _mm_set1_epi64x(0x0D0C090805040100))); #else simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a); simde_int16x4_private b_ = simde_int16x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, (b_.values[i] >= 16) ? 0 : (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : (b_.values[i] >= -16) ? (((b_.values[i] == -16) ? 0 : (a_.values[i] >> -b_.values[i])) + ((a_.values[i] >> (-b_.values[i] - 1)) & 1)) : 0); } return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshl_u16 #define vrshl_u16(a, b) simde_vrshl_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vrshl_u32 (const simde_uint32x2_t a, const simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrshl_u32(a, b); #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) const __m128i zero = _mm_setzero_si128(); const __m128i ff = _mm_cmpeq_epi32(zero, zero); __m128i a128 = _mm_movpi64_epi64(a); __m128i b128 = _mm_movpi64_epi64(b); b128 = _mm_srai_epi32(_mm_slli_epi32(b128, 24), 24); __m128i a128_shr = _mm_srlv_epi32(a128, _mm_xor_si128(b128, ff)); __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi32(a128, b128), _mm_srli_epi32(_mm_sub_epi32(a128_shr, ff), 1), _mm_cmpgt_epi32(zero, b128)); return _mm_movepi64_pi64(r128); #else simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a); simde_int32x2_private b_ = simde_int32x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); r_.values[i] = (b_.values[i] >= 32) ? 0 : (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : (b_.values[i] >= -32) ? (((b_.values[i] == -32) ? 0 : (a_.values[i] >> -b_.values[i])) + ((a_.values[i] >> (-b_.values[i] - 1)) & 1)) : 0; } return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshl_u32 #define vrshl_u32(a, b) simde_vrshl_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vrshl_u64 (const simde_uint64x1_t a, const simde_int64x1_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrshl_u64(a, b); #elif defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) const __m128i zero = _mm_setzero_si128(); const __m128i ff = _mm_cmpeq_epi64(zero, zero); __m128i a128 = _mm_movpi64_epi64(a); __m128i b128 = _mm_movpi64_epi64(b); b128 = _mm_srai_epi64(_mm_slli_epi64(b128, 56), 56); __m128i a128_shr = _mm_srlv_epi64(a128, _mm_xor_si128(b128, ff)); __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi64(a128, b128), _mm_srli_epi64(_mm_sub_epi64(a128_shr, ff), 1), _mm_cmpgt_epi64(zero, b128)); return _mm_movepi64_pi64(r128); #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) const __m128i ones = _mm_set1_epi64x(1); const __m128i a128 = _mm_movpi64_epi64(a); __m128i b128 = _mm_movpi64_epi64(b); __m128i b128_abs = _mm_and_si128(_mm_abs_epi8(b128), _mm_set1_epi64x(0xFF)); __m128i a128_shr = _mm_srlv_epi64(a128, _mm_sub_epi64(b128_abs, ones)); __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi64(a128, b128_abs), _mm_srli_epi64(_mm_add_epi64(a128_shr, ones), 1), _mm_cmpgt_epi64(_mm_setzero_si128(), _mm_slli_epi64(b128, 56))); return _mm_movepi64_pi64(r128); #else simde_uint64x1_private r_, a_ = simde_uint64x1_to_private(a); simde_int64x1_private b_ = simde_int64x1_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); r_.values[i] = (b_.values[i] >= 64) ? 0 : (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : (b_.values[i] >= -64) ? (((b_.values[i] == -64) ? 0 : (a_.values[i] >> -b_.values[i])) + ((a_.values[i] >> (-b_.values[i] - 1)) & 1)) : 0; } return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshl_u64 #define vrshl_u64(a, b) simde_vrshl_u64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vrshlq_s8 (const simde_int8x16_t a, const simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrshlq_s8(a, b); #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) const __m256i zero = _mm256_setzero_si256(); const __m256i ff = _mm256_cmpeq_epi16(zero, zero); __m256i a256 = _mm256_cvtepi8_epi16(a); __m256i b256 = _mm256_cvtepi8_epi16(b); __m256i a256_shr = _mm256_srav_epi16(a256, _mm256_xor_si256(b256, ff)); __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi16(a256, b256), _mm256_srai_epi16(_mm256_sub_epi16(a256_shr, ff), 1), _mm256_cmpgt_epi16(zero, b256)); return _mm256_cvtepi16_epi8(r256); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) const SIMDE_POWER_ALTIVEC_VECTOR( signed char) zero = vec_splats(HEDLEY_STATIC_CAST( signed char, 0)); const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) ones = vec_splats(HEDLEY_STATIC_CAST(unsigned char, 1)); const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) max = vec_splats(HEDLEY_STATIC_CAST(unsigned char, 8)); SIMDE_POWER_ALTIVEC_VECTOR(signed char) a_shr; SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) b_abs; b_abs = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_abs(b)); a_shr = vec_sra(a, vec_sub(b_abs, ones)); return vec_and(vec_sel(vec_sl(a, b_abs), vec_add(vec_sra(a_shr, ones), vec_and(a_shr, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), ones))), vec_cmplt(b, zero)), vec_cmplt(b_abs, max)); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int8_t, (abs(b_.values[i]) >= 8) ? 0 : (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : ((a_.values[i] + (1 << (-b_.values[i] - 1))) >> -b_.values[i])); } return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshlq_s8 #define vrshlq_s8(a, b) simde_vrshlq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vrshlq_s16 (const simde_int16x8_t a, const simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrshlq_s16(a, b); #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) const __m128i zero = _mm_setzero_si128(); const __m128i ff = _mm_cmpeq_epi16(zero, zero); __m128i b_ = _mm_srai_epi16(_mm_slli_epi16(b, 8), 8); __m128i a_shr = _mm_srav_epi16(a, _mm_xor_si128(b_, ff)); return _mm_blendv_epi8(_mm_sllv_epi16(a, b_), _mm_srai_epi16(_mm_sub_epi16(a_shr, ff), 1), _mm_cmpgt_epi16(zero, b_)); #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_ARCH_AMD64) const __m256i zero = _mm256_setzero_si256(); const __m256i ff = _mm256_cmpeq_epi32(zero, zero); __m256i a256 = _mm256_cvtepi16_epi32(a); __m256i b256 = _mm256_cvtepi16_epi32(b); b256 = _mm256_srai_epi32(_mm256_slli_epi32(b256, 24), 24); __m256i a256_shr = _mm256_srav_epi32(a256, _mm256_xor_si256(b256, ff)); __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi32(a256, b256), _mm256_srai_epi32(_mm256_sub_epi32(a256_shr, ff), 1), _mm256_cmpgt_epi32(zero, b256)); r256 = _mm256_shuffle_epi8(r256, _mm256_set1_epi64x(0x0D0C090805040100)); return _mm_set_epi64x(_mm256_extract_epi64(r256, 2), _mm256_extract_epi64(r256, 0)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) const SIMDE_POWER_ALTIVEC_VECTOR( signed short) zero = vec_splats(HEDLEY_STATIC_CAST( signed short, 0)); const SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) ones = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 1)); const SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) shift = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 16 - 8)); const SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) max = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 16)); const SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) ff = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0xFF)); SIMDE_POWER_ALTIVEC_VECTOR(signed short) a_shr; SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) b_abs; b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), ff); a_shr = vec_sra(a, vec_sub(b_abs, ones)); return vec_and(vec_sel(vec_sl(a, b_abs), vec_add(vec_sra(a_shr, ones), vec_and(a_shr, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), ones))), vec_cmplt(vec_sl(b, shift), zero)), vec_cmplt(b_abs, max)); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); r_.values[i] = HEDLEY_STATIC_CAST(int16_t, (abs(b_.values[i]) >= 16) ? 0 : (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : ((a_.values[i] + (1 << (-b_.values[i] - 1))) >> -b_.values[i])); } return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshlq_s16 #define vrshlq_s16(a, b) simde_vrshlq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vrshlq_s32 (const simde_int32x4_t a, const simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrshlq_s32(a, b); #elif defined(SIMDE_X86_AVX2_NATIVE) const __m128i zero = _mm_setzero_si128(); const __m128i ff = _mm_cmpeq_epi32(zero, zero); __m128i b_ = _mm_srai_epi32(_mm_slli_epi32(b, 24), 24); __m128i a_shr = _mm_srav_epi32(a, _mm_xor_si128(b_, ff)); return _mm_blendv_epi8(_mm_sllv_epi32(a, b_), _mm_srai_epi32(_mm_sub_epi32(a_shr, ff), 1), _mm_cmpgt_epi32(zero, b_)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) const SIMDE_POWER_ALTIVEC_VECTOR( signed int) zero = vec_splats(HEDLEY_STATIC_CAST( signed int, 0)); const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) ones = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 1)); const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) shift = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32 - 8)); const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) max = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32)); const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) ff = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 0xFF)); SIMDE_POWER_ALTIVEC_VECTOR(signed int) a_shr; SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) b_abs; b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), ff); a_shr = vec_sra(a, vec_sub(b_abs, ones)); return vec_and(vec_sel(vec_sl(a, b_abs), vec_add(vec_sra(a_shr, ones), vec_and(a_shr, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), ones))), vec_cmplt(vec_sl(b, shift), zero)), vec_cmplt(b_abs, max)); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); r_.values[i] = HEDLEY_STATIC_CAST(int32_t, (abs(b_.values[i]) >= 32) ? 0 : (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : ((a_.values[i] + (1 << (-b_.values[i] - 1))) >> -b_.values[i])); } return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshlq_s32 #define vrshlq_s32(a, b) simde_vrshlq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vrshlq_s64 (const simde_int64x2_t a, const simde_int64x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrshlq_s64(a, b); #elif defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) const __m128i zero = _mm_setzero_si128(); const __m128i ff = _mm_cmpeq_epi32(zero, zero); __m128i b_ = _mm_srai_epi64(_mm_slli_epi64(b, 56), 56); __m128i a_shr = _mm_srav_epi64(a, _mm_xor_si128(b_, ff)); return _mm_blendv_epi8(_mm_sllv_epi64(a, b_), _mm_srai_epi64(_mm_sub_epi64(a_shr, ff), 1), _mm_cmpgt_epi64(zero, b_)); #elif defined(SIMDE_X86_AVX2_NATIVE) const __m128i zero = _mm_setzero_si128(); const __m128i ones = _mm_set1_epi64x(1); __m128i maska = _mm_cmpgt_epi64(zero, a); __m128i b_abs = _mm_and_si128(_mm_abs_epi8(b), _mm_set1_epi64x(0xFF)); __m128i a_rnd = _mm_and_si128(_mm_srlv_epi64(a, _mm_sub_epi64(b_abs, ones)), ones); return _mm_blendv_epi8(_mm_sllv_epi64(a, b_abs), _mm_add_epi64(_mm_xor_si128(_mm_srlv_epi64(_mm_xor_si128(a, maska), b_abs), maska), a_rnd), _mm_cmpgt_epi64(zero, _mm_slli_epi64(b, 56))); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) const SIMDE_POWER_ALTIVEC_VECTOR( signed long long) zero = vec_splats(HEDLEY_STATIC_CAST( signed long long, 0)); const SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) ones = vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 1)); const SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) shift = vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 64 - 8)); const SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) max = vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 64)); const SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) ff = vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 0xFF)); SIMDE_POWER_ALTIVEC_VECTOR(signed long long) a_shr; SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) b_abs; b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), ff); a_shr = vec_sra(a, vec_sub(b_abs, ones)); HEDLEY_DIAGNOSTIC_PUSH #if defined(SIMDE_BUG_CLANG_46770) SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ #endif return vec_and(vec_sel(vec_sl(a, b_abs), vec_add(vec_sra(a_shr, ones), vec_and(a_shr, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), ones))), vec_cmplt(vec_sl(b, shift), zero)), vec_cmplt(b_abs, max)); HEDLEY_DIAGNOSTIC_POP #else simde_int64x2_private r_, a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); r_.values[i] = HEDLEY_STATIC_CAST(int64_t, (llabs(b_.values[i]) >= 64) ? 0 : (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : ((a_.values[i] + (INT64_C(1) << (-b_.values[i] - 1))) >> -b_.values[i])); } return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshlq_s64 #define vrshlq_s64(a, b) simde_vrshlq_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vrshlq_u8 (const simde_uint8x16_t a, const simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrshlq_u8(a, b); #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) const __m256i zero = _mm256_setzero_si256(); const __m256i ff = _mm256_cmpeq_epi32(zero, zero); __m256i a256 = _mm256_cvtepu8_epi16(a); __m256i b256 = _mm256_cvtepi8_epi16(b); __m256i a256_shr = _mm256_srlv_epi16(a256, _mm256_xor_si256(b256, ff)); __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi16(a256, b256), _mm256_srli_epi16(_mm256_sub_epi16(a256_shr, ff), 1), _mm256_cmpgt_epi16(zero, b256)); return _mm256_cvtepi16_epi8(r256); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) const SIMDE_POWER_ALTIVEC_VECTOR( signed char) zero = vec_splats(HEDLEY_STATIC_CAST( signed char, 0)); const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) ones = vec_splats(HEDLEY_STATIC_CAST(unsigned char, 1)); const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) max = vec_splats(HEDLEY_STATIC_CAST(unsigned char, 8)); SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) b_abs, b_abs_dec, a_shr; b_abs = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_abs(b)); b_abs_dec = vec_sub(b_abs, ones); a_shr = vec_and(vec_sr(a, b_abs_dec), vec_cmplt(b_abs_dec, max)); return vec_sel(vec_and(vec_sl(a, b_abs), vec_cmplt(b_abs, max)), vec_sr(vec_add(a_shr, ones), ones), vec_cmplt(b, zero)); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a); simde_int8x16_private b_ = simde_int8x16_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, (b_.values[i] >= 8) ? 0 : (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : (b_.values[i] >= -8) ? (((b_.values[i] == -8) ? 0 : (a_.values[i] >> -b_.values[i])) + ((a_.values[i] >> (-b_.values[i] - 1)) & 1)) : 0); } return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshlq_u8 #define vrshlq_u8(a, b) simde_vrshlq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vrshlq_u16 (const simde_uint16x8_t a, const simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrshlq_u16(a, b); #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) const __m128i zero = _mm_setzero_si128(); const __m128i ff = _mm_cmpeq_epi16(zero, zero); __m128i b_ = _mm_srai_epi16(_mm_slli_epi16(b, 8), 8); __m128i a_shr = _mm_srlv_epi16(a, _mm_xor_si128(b_, ff)); return _mm_blendv_epi8(_mm_sllv_epi16(a, b_), _mm_srli_epi16(_mm_sub_epi16(a_shr, ff), 1), _mm_cmpgt_epi16(zero, b_)); #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_ARCH_AMD64) const __m256i zero = _mm256_setzero_si256(); const __m256i ff = _mm256_cmpeq_epi32(zero, zero); __m256i a256 = _mm256_cvtepu16_epi32(a); __m256i b256 = _mm256_cvtepi16_epi32(b); b256 = _mm256_srai_epi32(_mm256_slli_epi32(b256, 24), 24); __m256i a256_shr = _mm256_srlv_epi32(a256, _mm256_xor_si256(b256, ff)); __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi32(a256, b256), _mm256_srli_epi32(_mm256_sub_epi32(a256_shr, ff), 1), _mm256_cmpgt_epi32(zero, b256)); r256 = _mm256_shuffle_epi8(r256, _mm256_set1_epi64x(0x0D0C090805040100)); return _mm_set_epi64x(_mm256_extract_epi64(r256, 2), _mm256_extract_epi64(r256, 0)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) const SIMDE_POWER_ALTIVEC_VECTOR( signed short) zero = vec_splats(HEDLEY_STATIC_CAST( signed short, 0)); const SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) ones = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 1)); const SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) shift = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 16 - 8)); const SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) max = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 16)); const SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) ff = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0xFF)); SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) b_abs, b_abs_dec, a_shr; b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), ff); b_abs_dec = vec_sub(b_abs, ones); a_shr = vec_and(vec_sr(a, b_abs_dec), vec_cmplt(b_abs_dec, max)); return vec_sel(vec_and(vec_sl(a, b_abs), vec_cmplt(b_abs, max)), vec_sr(vec_add(a_shr, ones), ones), vec_cmplt(vec_sl(b, shift), zero)); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a); simde_int16x8_private b_ = simde_int16x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, (b_.values[i] >= 16) ? 0 : (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : (b_.values[i] >= -16) ? (((b_.values[i] == -16) ? 0 : (a_.values[i] >> -b_.values[i])) + ((a_.values[i] >> (-b_.values[i] - 1)) & 1)) : 0); } return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshlq_u16 #define vrshlq_u16(a, b) simde_vrshlq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vrshlq_u32 (const simde_uint32x4_t a, const simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrshlq_u32(a, b); #elif defined(SIMDE_X86_AVX2_NATIVE) const __m128i zero = _mm_setzero_si128(); const __m128i ff = _mm_cmpeq_epi32(zero, zero); __m128i b_ = _mm_srai_epi32(_mm_slli_epi32(b, 24), 24); __m128i a_shr = _mm_srlv_epi32(a, _mm_xor_si128(b_, ff)); return _mm_blendv_epi8(_mm_sllv_epi32(a, b_), _mm_srli_epi32(_mm_sub_epi32(a_shr, ff), 1), _mm_cmpgt_epi32(zero, b_)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) const SIMDE_POWER_ALTIVEC_VECTOR( signed int) zero = vec_splats(HEDLEY_STATIC_CAST( signed int, 0)); const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) ones = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 1)); const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) shift = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32 - 8)); const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) max = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32)); const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) ff = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 0xFF)); SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) b_abs, b_abs_dec, a_shr; b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), ff); b_abs_dec = vec_sub(b_abs, ones); a_shr = vec_and(vec_sr(a, b_abs_dec), vec_cmplt(b_abs_dec, max)); return vec_sel(vec_and(vec_sl(a, b_abs), vec_cmplt(b_abs, max)), vec_sr(vec_add(a_shr, ones), ones), vec_cmplt(vec_sl(b, shift), zero)); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a); simde_int32x4_private b_ = simde_int32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); r_.values[i] = (b_.values[i] >= 32) ? 0 : (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : (b_.values[i] >= -32) ? (((b_.values[i] == -32) ? 0 : (a_.values[i] >> -b_.values[i])) + ((a_.values[i] >> (-b_.values[i] - 1)) & 1)) : 0; } return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshlq_u32 #define vrshlq_u32(a, b) simde_vrshlq_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vrshlq_u64 (const simde_uint64x2_t a, const simde_int64x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vrshlq_u64(a, b); #elif defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) const __m128i zero = _mm_setzero_si128(); const __m128i ff = _mm_cmpeq_epi64(zero, zero); __m128i b_ = _mm_srai_epi64(_mm_slli_epi64(b, 56), 56); __m128i a_shr = _mm_srlv_epi64(a, _mm_xor_si128(b_, ff)); return _mm_blendv_epi8(_mm_sllv_epi64(a, b_), _mm_srli_epi64(_mm_sub_epi64(a_shr, ff), 1), _mm_cmpgt_epi64(zero, b_)); #elif defined(SIMDE_X86_AVX2_NATIVE) const __m128i ones = _mm_set1_epi64x(1); __m128i b_abs = _mm_and_si128(_mm_abs_epi8(b), _mm_set1_epi64x(0xFF)); __m128i a_shr = _mm_srlv_epi64(a, _mm_sub_epi64(b_abs, ones)); return _mm_blendv_epi8(_mm_sllv_epi64(a, b_abs), _mm_srli_epi64(_mm_add_epi64(a_shr, ones), 1), _mm_cmpgt_epi64(_mm_setzero_si128(), _mm_slli_epi64(b, 56))); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) const SIMDE_POWER_ALTIVEC_VECTOR( signed long long) zero = vec_splats(HEDLEY_STATIC_CAST( signed long long, 0)); const SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) ones = vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 1)); const SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) shift = vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 64 - 8)); const SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) max = vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 64)); const SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) ff = vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 0xFF)); SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) b_abs, b_abs_dec, a_shr; b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), ff); b_abs_dec = vec_sub(b_abs, ones); a_shr = vec_and(vec_sr(a, b_abs_dec), vec_cmplt(b_abs_dec, max)); HEDLEY_DIAGNOSTIC_PUSH #if defined(SIMDE_BUG_CLANG_46770) SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ #endif return vec_sel(vec_and(vec_sl(a, b_abs), vec_cmplt(b_abs, max)), vec_sr(vec_add(a_shr, ones), ones), vec_cmplt(vec_sl(b, shift), zero)); HEDLEY_DIAGNOSTIC_POP #else simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a); simde_int64x2_private b_ = simde_int64x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); r_.values[i] = (b_.values[i] >= 64) ? 0 : (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : (b_.values[i] >= -64) ? (((b_.values[i] == -64) ? 0 : (a_.values[i] >> -b_.values[i])) + ((a_.values[i] >> (-b_.values[i] - 1)) & 1)) : 0; } return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshlq_u64 #define vrshlq_u64(a, b) simde_vrshlq_u64((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_RSHL_H) */ simde-0.7.2/simde/arm/neon/rshr_n.h000066400000000000000000000405501400333146700171110ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_ARM_NEON_RSHR_N_H) #define SIMDE_ARM_NEON_RSHR_N_H #include "combine.h" #include "dup_n.h" #include "get_low.h" #include "reinterpret.h" #include "shr_n.h" #include "sub.h" #include "tst.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vrshrq_n_s8 (const simde_int8x16_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { simde_int8x16_private r_, a_ = simde_int8x16_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int8_t, (a_.values[i] + (1 << (n - 1))) >> n); } return simde_int8x16_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrshrq_n_s8(a, n) vrshrq_n_s8((a), (n)) #elif SIMDE_NATURAL_VECTOR_SIZE > 0 #define simde_vrshrq_n_s8(a, n) simde_vsubq_s8(simde_vshrq_n_s8((a), (n)), simde_vreinterpretq_s8_u8( \ simde_vtstq_u8(simde_vreinterpretq_u8_s8(a), \ simde_vdupq_n_u8(HEDLEY_STATIC_CAST(uint8_t, 1 << ((n) - 1)))))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshrq_n_s8 #define vrshrq_n_s8(a, n) simde_vrshrq_n_s8((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vrshrq_n_s16 (const simde_int16x8_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { simde_int16x8_private r_, a_ = simde_int16x8_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int16_t, (a_.values[i] + (1 << (n - 1))) >> n); } return simde_int16x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrshrq_n_s16(a, n) vrshrq_n_s16((a), (n)) #elif SIMDE_NATURAL_VECTOR_SIZE > 0 #define simde_vrshrq_n_s16(a, n) simde_vsubq_s16(simde_vshrq_n_s16((a), (n)), simde_vreinterpretq_s16_u16( \ simde_vtstq_u16(simde_vreinterpretq_u16_s16(a), \ simde_vdupq_n_u16(HEDLEY_STATIC_CAST(uint16_t, 1 << ((n) - 1)))))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshrq_n_s16 #define vrshrq_n_s16(a, n) simde_vrshrq_n_s16((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vrshrq_n_s32 (const simde_int32x4_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { simde_int32x4_private r_, a_ = simde_int32x4_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >> ((n == 32) ? 31 : n)) + ((a_.values[i] & HEDLEY_STATIC_CAST(int32_t, UINT32_C(1) << (n - 1))) != 0); } return simde_int32x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrshrq_n_s32(a, n) vrshrq_n_s32((a), (n)) #elif SIMDE_NATURAL_VECTOR_SIZE > 0 #define simde_vrshrq_n_s32(a, n) simde_vsubq_s32(simde_vshrq_n_s32((a), (n)), \ simde_vreinterpretq_s32_u32(simde_vtstq_u32(simde_vreinterpretq_u32_s32(a), \ simde_vdupq_n_u32(UINT32_C(1) << ((n) - 1))))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshrq_n_s32 #define vrshrq_n_s32(a, n) simde_vrshrq_n_s32((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vrshrq_n_s64 (const simde_int64x2_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { simde_int64x2_private r_, a_ = simde_int64x2_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >> ((n == 64) ? 63 : n)) + ((a_.values[i] & HEDLEY_STATIC_CAST(int64_t, UINT64_C(1) << (n - 1))) != 0); } return simde_int64x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrshrq_n_s64(a, n) vrshrq_n_s64((a), (n)) #elif SIMDE_NATURAL_VECTOR_SIZE > 0 #define simde_vrshrq_n_s64(a, n) simde_vsubq_s64(simde_vshrq_n_s64((a), (n)), \ simde_vreinterpretq_s64_u64(simde_vtstq_u64(simde_vreinterpretq_u64_s64(a), \ simde_vdupq_n_u64(UINT64_C(1) << ((n) - 1))))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshrq_n_s64 #define vrshrq_n_s64(a, n) simde_vrshrq_n_s64((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vrshrq_n_u8 (const simde_uint8x16_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.values[i] + (1 << (n - 1))) >> n); } return simde_uint8x16_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrshrq_n_u8(a, n) vrshrq_n_u8((a), (n)) #elif SIMDE_NATURAL_VECTOR_SIZE > 0 #define simde_vrshrq_n_u8(a, n) simde_vsubq_u8(simde_vshrq_n_u8((a), (n)), \ simde_vtstq_u8((a), simde_vdupq_n_u8(HEDLEY_STATIC_CAST(uint8_t, 1 << ((n) - 1))))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshrq_n_u8 #define vrshrq_n_u8(a, n) simde_vrshrq_n_u8((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vrshrq_n_u16 (const simde_uint16x8_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.values[i] + (1 << (n - 1))) >> n); } return simde_uint16x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrshrq_n_u16(a, n) vrshrq_n_u16((a), (n)) #elif SIMDE_NATURAL_VECTOR_SIZE > 0 #define simde_vrshrq_n_u16(a, n) simde_vsubq_u16(simde_vshrq_n_u16((a), (n)), \ simde_vtstq_u16((a), simde_vdupq_n_u16(HEDLEY_STATIC_CAST(uint16_t, 1 << ((n) - 1))))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshrq_n_u16 #define vrshrq_n_u16(a, n) simde_vrshrq_n_u16((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vrshrq_n_u32 (const simde_uint32x4_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ((n == 32) ? 0 : (a_.values[i] >> n)) + ((a_.values[i] & (UINT32_C(1) << (n - 1))) != 0); } return simde_uint32x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrshrq_n_u32(a, n) vrshrq_n_u32((a), (n)) #elif SIMDE_NATURAL_VECTOR_SIZE > 0 #define simde_vrshrq_n_u32(a, n) simde_vsubq_u32(simde_vshrq_n_u32((a), (n)), \ simde_vtstq_u32((a), simde_vdupq_n_u32(UINT32_C(1) << ((n) - 1)))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshrq_n_u32 #define vrshrq_n_u32(a, n) simde_vrshrq_n_u32((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vrshrq_n_u64 (const simde_uint64x2_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ((n == 64) ? 0 : (a_.values[i] >> n)) + ((a_.values[i] & (UINT64_C(1) << (n - 1))) != 0); } return simde_uint64x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrshrq_n_u64(a, n) vrshrq_n_u64((a), (n)) #elif SIMDE_NATURAL_VECTOR_SIZE > 0 #define simde_vrshrq_n_u64(a, n) simde_vsubq_u64(simde_vshrq_n_u64((a), (n)), \ simde_vtstq_u64((a), simde_vdupq_n_u64(UINT64_C(1) << ((n) - 1)))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshrq_n_u64 #define vrshrq_n_u64(a, n) simde_vrshrq_n_u64((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vrshr_n_s8 (const simde_int8x8_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { simde_int8x8_private r_, a_ = simde_int8x8_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int8_t, (a_.values[i] + (1 << (n - 1))) >> n); } return simde_int8x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrshr_n_s8(a, n) vrshr_n_s8((a), (n)) #elif SIMDE_NATURAL_VECTOR_SIZE > 0 #define simde_vrshr_n_s8(a, n) simde_vsub_s8(simde_vshr_n_s8((a), (n)), simde_vreinterpret_s8_u8( \ simde_vtst_u8(simde_vreinterpret_u8_s8(a), \ simde_vdup_n_u8(HEDLEY_STATIC_CAST(uint8_t, 1 << ((n) - 1)))))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshr_n_s8 #define vrshr_n_s8(a, n) simde_vrshr_n_s8((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vrshr_n_s16 (const simde_int16x4_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { simde_int16x4_private r_, a_ = simde_int16x4_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int16_t, (a_.values[i] + (1 << (n - 1))) >> n); } return simde_int16x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrshr_n_s16(a, n) vrshr_n_s16((a), (n)) #elif SIMDE_NATURAL_VECTOR_SIZE > 0 #define simde_vrshr_n_s16(a, n) simde_vsub_s16(simde_vshr_n_s16((a), (n)), simde_vreinterpret_s16_u16( \ simde_vtst_u16(simde_vreinterpret_u16_s16(a), \ simde_vdup_n_u16(HEDLEY_STATIC_CAST(uint16_t, 1 << ((n) - 1)))))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshr_n_s16 #define vrshr_n_s16(a, n) simde_vrshr_n_s16((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vrshr_n_s32 (const simde_int32x2_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { simde_int32x2_private r_, a_ = simde_int32x2_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >> ((n == 32) ? 31 : n)) + ((a_.values[i] & HEDLEY_STATIC_CAST(int32_t, UINT32_C(1) << (n - 1))) != 0); } return simde_int32x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrshr_n_s32(a, n) vrshr_n_s32((a), (n)) #elif SIMDE_NATURAL_VECTOR_SIZE > 0 #define simde_vrshr_n_s32(a, n) simde_vsub_s32(simde_vshr_n_s32((a), (n)), \ simde_vreinterpret_s32_u32(simde_vtst_u32(simde_vreinterpret_u32_s32(a), \ simde_vdup_n_u32(UINT32_C(1) << ((n) - 1))))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshr_n_s32 #define vrshr_n_s32(a, n) simde_vrshr_n_s32((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vrshr_n_s64 (const simde_int64x1_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { simde_int64x1_private r_, a_ = simde_int64x1_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (a_.values[i] >> ((n == 64) ? 63 : n)) + ((a_.values[i] & HEDLEY_STATIC_CAST(int64_t, UINT64_C(1) << (n - 1))) != 0); } return simde_int64x1_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrshr_n_s64(a, n) vrshr_n_s64((a), (n)) #elif SIMDE_NATURAL_VECTOR_SIZE > 0 #define simde_vrshr_n_s64(a, n) simde_vsub_s64(simde_vshr_n_s64((a), (n)), \ simde_vreinterpret_s64_u64(simde_vtst_u64(simde_vreinterpret_u64_s64(a), \ simde_vdup_n_u64(UINT64_C(1) << ((n) - 1))))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshr_n_s64 #define vrshr_n_s64(a, n) simde_vrshr_n_s64((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vrshr_n_u8 (const simde_uint8x8_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.values[i] + (1 << (n - 1))) >> n); } return simde_uint8x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrshr_n_u8(a, n) vrshr_n_u8((a), (n)) #elif SIMDE_NATURAL_VECTOR_SIZE > 0 #define simde_vrshr_n_u8(a, n) simde_vsub_u8(simde_vshr_n_u8((a), (n)), \ simde_vtst_u8((a), simde_vdup_n_u8(HEDLEY_STATIC_CAST(uint8_t, 1 << ((n) - 1))))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshr_n_u8 #define vrshr_n_u8(a, n) simde_vrshr_n_u8((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vrshr_n_u16 (const simde_uint16x4_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.values[i] + (1 << (n - 1))) >> n); } return simde_uint16x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrshr_n_u16(a, n) vrshr_n_u16((a), (n)) #elif SIMDE_NATURAL_VECTOR_SIZE > 0 #define simde_vrshr_n_u16(a, n) simde_vsub_u16(simde_vshr_n_u16((a), (n)), \ simde_vtst_u16((a), simde_vdup_n_u16(HEDLEY_STATIC_CAST(uint16_t, 1 << ((n) - 1))))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshr_n_u16 #define vrshr_n_u16(a, n) simde_vrshr_n_u16((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vrshr_n_u32 (const simde_uint32x2_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ((n == 32) ? 0 : (a_.values[i] >> n)) + ((a_.values[i] & (UINT32_C(1) << (n - 1))) != 0); } return simde_uint32x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrshr_n_u32(a, n) vrshr_n_u32((a), (n)) #elif SIMDE_NATURAL_VECTOR_SIZE > 0 #define simde_vrshr_n_u32(a, n) simde_vsub_u32(simde_vshr_n_u32((a), (n)), \ simde_vtst_u32((a), simde_vdup_n_u32(UINT32_C(1) << ((n) - 1)))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshr_n_u32 #define vrshr_n_u32(a, n) simde_vrshr_n_u32((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vrshr_n_u64 (const simde_uint64x1_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { simde_uint64x1_private r_, a_ = simde_uint64x1_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ((n == 64) ? 0 : (a_.values[i] >> n)) + ((a_.values[i] & (UINT64_C(1) << (n - 1))) != 0); } return simde_uint64x1_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrshr_n_u64(a, n) vrshr_n_u64((a), (n)) #elif SIMDE_NATURAL_VECTOR_SIZE > 0 #define simde_vrshr_n_u64(a, n) simde_vsub_u64(simde_vshr_n_u64((a), (n)), \ simde_vtst_u64((a), simde_vdup_n_u64(UINT64_C(1) << ((n) - 1)))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrshr_n_u64 #define vrshr_n_u64(a, n) simde_vrshr_n_u64((a), (n)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_RSHR_N_H) */ simde-0.7.2/simde/arm/neon/rsra_n.h000066400000000000000000000163231400333146700171030ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_ARM_NEON_RSRA_N_H) #define SIMDE_ARM_NEON_RSRA_N_H #include "add.h" #include "combine.h" #include "get_low.h" #include "rshr_n.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ /* Remark: For these instructions * 1 <= n <= data element size in bits * so 0 <= n - 1 < data element size in bits */ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrsraq_n_s8(a, b, n) vrsraq_n_s8((a), (b), (n)) #else #define simde_vrsraq_n_s8(a, b, n) simde_vaddq_s8((a), simde_vrshrq_n_s8((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrsraq_n_s8 #define vrsraq_n_s8(a, b, n) simde_vrsraq_n_s8((a), (b), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrsraq_n_s16(a, b, n) vrsraq_n_s16((a), (b), (n)) #else #define simde_vrsraq_n_s16(a, b, n) simde_vaddq_s16((a), simde_vrshrq_n_s16((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrsraq_n_s16 #define vrsraq_n_s16(a, b, n) simde_vrsraq_n_s16((a), (b), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrsraq_n_s32(a, b, n) vrsraq_n_s32((a), (b), (n)) #else #define simde_vrsraq_n_s32(a, b, n) simde_vaddq_s32((a), simde_vrshrq_n_s32((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrsraq_n_s32 #define vrsraq_n_s32(a, b, n) simde_vrsraq_n_s32((a), (b), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrsraq_n_s64(a, b, n) vrsraq_n_s64((a), (b), (n)) #else #define simde_vrsraq_n_s64(a, b, n) simde_vaddq_s64((a), simde_vrshrq_n_s64((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrsraq_n_s64 #define vrsraq_n_s64(a, b, n) simde_vrsraq_n_s64((a), (b), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrsraq_n_u8(a, b, n) vrsraq_n_u8((a), (b), (n)) #else #define simde_vrsraq_n_u8(a, b, n) simde_vaddq_u8((a), simde_vrshrq_n_u8((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrsraq_n_u8 #define vrsraq_n_u8(a, b, n) simde_vrsraq_n_u8((a), (b), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrsraq_n_u16(a, b, n) vrsraq_n_u16((a), (b), (n)) #else #define simde_vrsraq_n_u16(a, b, n) simde_vaddq_u16((a), simde_vrshrq_n_u16((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrsraq_n_u16 #define vrsraq_n_u16(a, b, n) simde_vrsraq_n_u16((a), (b), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrsraq_n_u32(a, b, n) vrsraq_n_u32((a), (b), (n)) #else #define simde_vrsraq_n_u32(a, b, n) simde_vaddq_u32((a), simde_vrshrq_n_u32((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrsraq_n_u32 #define vrsraq_n_u32(a, b, n) simde_vrsraq_n_u32((a), (b), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrsraq_n_u64(a, b, n) vrsraq_n_u64((a), (b), (n)) #else #define simde_vrsraq_n_u64(a, b, n) simde_vaddq_u64((a), simde_vrshrq_n_u64((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrsraq_n_u64 #define vrsraq_n_u64(a, b, n) simde_vrsraq_n_u64((a), (b), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrsra_n_s8(a, b, n) vrsra_n_s8((a), (b), (n)) #else #define simde_vrsra_n_s8(a, b, n) simde_vadd_s8((a), simde_vrshr_n_s8((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrsra_n_s8 #define vrsra_n_s8(a, b, n) simde_vrsra_n_s8((a), (b), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrsra_n_s16(a, b, n) vrsra_n_s16((a), (b), (n)) #else #define simde_vrsra_n_s16(a, b, n) simde_vadd_s16((a), simde_vrshr_n_s16((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrsra_n_s16 #define vrsra_n_s16(a, b, n) simde_vrsra_n_s16((a), (b), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrsra_n_s32(a, b, n) vrsra_n_s32((a), (b), (n)) #else #define simde_vrsra_n_s32(a, b, n) simde_vadd_s32((a), simde_vrshr_n_s32((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrsra_n_s32 #define vrsra_n_s32(a, b, n) simde_vrsra_n_s32((a), (b), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrsra_n_s64(a, b, n) vrsra_n_s64((a), (b), (n)) #else #define simde_vrsra_n_s64(a, b, n) simde_vadd_s64((a), simde_vrshr_n_s64((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrsra_n_s64 #define vrsra_n_s64(a, b, n) simde_vrsra_n_s64((a), (b), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrsra_n_u8(a, b, n) vrsra_n_u8((a), (b), (n)) #else #define simde_vrsra_n_u8(a, b, n) simde_vadd_u8((a), simde_vrshr_n_u8((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrsra_n_u8 #define vrsra_n_u8(a, b, n) simde_vrsra_n_u8((a), (b), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrsra_n_u16(a, b, n) vrsra_n_u16((a), (b), (n)) #else #define simde_vrsra_n_u16(a, b, n) simde_vadd_u16((a), simde_vrshr_n_u16((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrsra_n_u16 #define vrsra_n_u16(a, b, n) simde_vrsra_n_u16((a), (b), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrsra_n_u32(a, b, n) vrsra_n_u32((a), (b), (n)) #else #define simde_vrsra_n_u32(a, b, n) simde_vadd_u32((a), simde_vrshr_n_u32((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrsra_n_u32 #define vrsra_n_u32(a, b, n) simde_vrsra_n_u32((a), (b), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vrsra_n_u64(a, b, n) vrsra_n_u64((a), (b), (n)) #else #define simde_vrsra_n_u64(a, b, n) simde_vadd_u64((a), simde_vrshr_n_u64((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vrsra_n_u64 #define vrsra_n_u64(a, b, n) simde_vrsra_n_u64((a), (b), (n)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_RSRA_N_H) */ simde-0.7.2/simde/arm/neon/set_lane.h000066400000000000000000000327351400333146700174160ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_SET_LANE_H) #define SIMDE_ARM_NEON_SET_LANE_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vset_lane_f32(simde_float32_t a, simde_float32x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float32x2_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) SIMDE_CONSTIFY_2_(vset_lane_f32, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); #else simde_float32x2_private v_ = simde_float32x2_to_private(v); v_.values[lane] = a; r = simde_float32x2_from_private(v_); #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vset_lane_f32 #define vset_lane_f32(a, b, c) simde_vset_lane_f32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vset_lane_f64(simde_float64_t a, simde_float64x1_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { simde_float64x1_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) (void) lane; r = vset_lane_f64(a, v, 0); #else simde_float64x1_private v_ = simde_float64x1_to_private(v); v_.values[lane] = a; r = simde_float64x1_from_private(v_); #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vset_lane_f64 #define vset_lane_f64(a, b, c) simde_vset_lane_f64((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vset_lane_s8(int8_t a, simde_int8x8_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { simde_int8x8_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_8_(vset_lane_s8, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); #else simde_int8x8_private v_ = simde_int8x8_to_private(v); v_.values[lane] = a; r = simde_int8x8_from_private(v_); #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vset_lane_s8 #define vset_lane_s8(a, b, c) simde_vset_lane_s8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vset_lane_s16(int16_t a, simde_int16x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { simde_int16x4_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_4_(vset_lane_s16, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); #else simde_int16x4_private v_ = simde_int16x4_to_private(v); v_.values[lane] = a; r = simde_int16x4_from_private(v_); #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vset_lane_s16 #define vset_lane_s16(a, b, c) simde_vset_lane_s16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vset_lane_s32(int32_t a, simde_int32x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_int32x2_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_2_(vset_lane_s32, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); #else simde_int32x2_private v_ = simde_int32x2_to_private(v); v_.values[lane] = a; r = simde_int32x2_from_private(v_); #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vset_lane_s32 #define vset_lane_s32(a, b, c) simde_vset_lane_s32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vset_lane_s64(int64_t a, simde_int64x1_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { simde_int64x1_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) (void) lane; r = vset_lane_s64(a, v, 0); #else simde_int64x1_private v_ = simde_int64x1_to_private(v); v_.values[lane] = a; r = simde_int64x1_from_private(v_); #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vset_lane_s64 #define vset_lane_s64(a, b, c) simde_vset_lane_s64((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vset_lane_u8(uint8_t a, simde_uint8x8_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { simde_uint8x8_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_8_(vset_lane_u8, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); #else simde_uint8x8_private v_ = simde_uint8x8_to_private(v); v_.values[lane] = a; r = simde_uint8x8_from_private(v_); #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vset_lane_u8 #define vset_lane_u8(a, b, c) simde_vset_lane_u8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vset_lane_u16(uint16_t a, simde_uint16x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { simde_uint16x4_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_4_(vset_lane_u16, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); #else simde_uint16x4_private v_ = simde_uint16x4_to_private(v); v_.values[lane] = a; r = simde_uint16x4_from_private(v_); #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vset_lane_u16 #define vset_lane_u16(a, b, c) simde_vset_lane_u16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vset_lane_u32(uint32_t a, simde_uint32x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_uint32x2_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_2_(vset_lane_u32, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); #else simde_uint32x2_private v_ = simde_uint32x2_to_private(v); v_.values[lane] = a; r = simde_uint32x2_from_private(v_); #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vset_lane_u32 #define vset_lane_u32(a, b, c) simde_vset_lane_u32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vset_lane_u64(uint64_t a, simde_uint64x1_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { simde_uint64x1_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) (void) lane; r = vset_lane_u64(a, v, 0); #else simde_uint64x1_private v_ = simde_uint64x1_to_private(v); v_.values[lane] = a; r = simde_uint64x1_from_private(v_); #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vset_lane_u64 #define vset_lane_u64(a, b, c) simde_vset_lane_u64((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vsetq_lane_f32(simde_float32_t a, simde_float32x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { simde_float32x4_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_4_(vsetq_lane_f32, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); #else simde_float32x4_private v_ = simde_float32x4_to_private(v); v_.values[lane] = a; r = simde_float32x4_from_private(v_); #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsetq_lane_f32 #define vsetq_lane_f32(a, b, c) simde_vsetq_lane_f32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vsetq_lane_f64(simde_float64_t a, simde_float64x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_float64x2_t r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) SIMDE_CONSTIFY_2_(vsetq_lane_f64, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); #else simde_float64x2_private v_ = simde_float64x2_to_private(v); v_.values[lane] = a; r = simde_float64x2_from_private(v_); #endif return r; } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vsetq_lane_f64 #define vsetq_lane_f64(a, b, c) simde_vsetq_lane_f64((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vsetq_lane_s8(int8_t a, simde_int8x16_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { simde_int8x16_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_16_(vsetq_lane_s8, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); #else simde_int8x16_private v_ = simde_int8x16_to_private(v); v_.values[lane] = a; r = simde_int8x16_from_private(v_); #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsetq_lane_s8 #define vsetq_lane_s8(a, b, c) simde_vsetq_lane_s8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vsetq_lane_s16(int16_t a, simde_int16x8_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { simde_int16x8_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_8_(vsetq_lane_s16, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); #else simde_int16x8_private v_ = simde_int16x8_to_private(v); v_.values[lane] = a; r = simde_int16x8_from_private(v_); #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsetq_lane_s16 #define vsetq_lane_s16(a, b, c) simde_vsetq_lane_s16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vsetq_lane_s32(int32_t a, simde_int32x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { simde_int32x4_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_4_(vsetq_lane_s32, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); #else simde_int32x4_private v_ = simde_int32x4_to_private(v); v_.values[lane] = a; r = simde_int32x4_from_private(v_); #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsetq_lane_s32 #define vsetq_lane_s32(a, b, c) simde_vsetq_lane_s32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vsetq_lane_s64(int64_t a, simde_int64x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_int64x2_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_2_(vsetq_lane_s64, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); #else simde_int64x2_private v_ = simde_int64x2_to_private(v); v_.values[lane] = a; r = simde_int64x2_from_private(v_); #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsetq_lane_s64 #define vsetq_lane_s64(a, b, c) simde_vsetq_lane_s64((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vsetq_lane_u8(uint8_t a, simde_uint8x16_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { simde_uint8x16_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_16_(vsetq_lane_u8, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); #else simde_uint8x16_private v_ = simde_uint8x16_to_private(v); v_.values[lane] = a; r = simde_uint8x16_from_private(v_); #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsetq_lane_u8 #define vsetq_lane_u8(a, b, c) simde_vsetq_lane_u8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vsetq_lane_u16(uint16_t a, simde_uint16x8_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { simde_uint16x8_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_8_(vsetq_lane_u16, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); #else simde_uint16x8_private v_ = simde_uint16x8_to_private(v); v_.values[lane] = a; r = simde_uint16x8_from_private(v_); #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsetq_lane_u16 #define vsetq_lane_u16(a, b, c) simde_vsetq_lane_u16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vsetq_lane_u32(uint32_t a, simde_uint32x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { simde_uint32x4_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_4_(vsetq_lane_u32, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); #else simde_uint32x4_private v_ = simde_uint32x4_to_private(v); v_.values[lane] = a; r = simde_uint32x4_from_private(v_); #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsetq_lane_u32 #define vsetq_lane_u32(a, b, c) simde_vsetq_lane_u32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vsetq_lane_u64(uint64_t a, simde_uint64x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { simde_uint64x2_t r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_2_(vsetq_lane_u64, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); #else simde_uint64x2_private v_ = simde_uint64x2_to_private(v); v_.values[lane] = a; r = simde_uint64x2_from_private(v_); #endif return r; } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsetq_lane_u64 #define vsetq_lane_u64(a, b, c) simde_vsetq_lane_u64((a), (b), (c)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_SET_LANE_H) */ simde-0.7.2/simde/arm/neon/shl.h000066400000000000000000001053271400333146700164100ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_ARM_NEON_SHL_H) #define SIMDE_ARM_NEON_SHL_H #include "types.h" /* Notes from the implementer (Christopher Moore aka rosbif) * * I have tried to exactly reproduce the documented behaviour of the * ARM NEON shl and shlq intrinsics. * This is complicated for the following reasons:- * * a) Negative shift counts shift right. * * b) Only the low byte of the shift count is used but the shift count * is not limited to 8-bit values (-128 to 127). * * c) Intel SIMD is not nearly as complete as NEON and AltiVec. * There were no intrisics with a vector shift count before AVX2 which * only has 32 and 64-bit logical ones and only a 32-bit arithmetic * one. The others need AVX512. There are no 8-bit shift intrinsics at * all, even with a scalar shift count. It is surprising to use AVX2 * and even AVX512 to implement a 64-bit vector operation. * * d) Many shift implementations, and the C standard, do not treat a * shift count >= the object's size in bits as one would expect. * (Personally I feel that > is silly but == can be useful.) * * Maybe it would be useful for SIMDe to have a flag enabling a fast * implementation where the result is only guaranteed for shift counts * conforming to the C standard. * * Note that even the C17/18 standard does not define the behaviour of * a right shift of a negative value. * However Evan and I agree that all compilers likely to be used * implement this as an arithmetic right shift with sign extension. * If this is not the case it could be replaced by a logical right shift * if negative values are complemented before and after the shift. * * Some of the SIMD translations may be slower than the portable code, * particularly those for vectors with only one or two elements. * But I had fun writing them ;-) * */ HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vshl_s8 (const simde_int8x8_t a, const simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vshl_s8(a, b); #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) __m128i a128 = _mm_cvtepi8_epi16(_mm_movpi64_epi64(a)); __m128i b128 = _mm_cvtepi8_epi16(_mm_movpi64_epi64(b)); __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi16(a128, b128), _mm_srav_epi16(a128, _mm_abs_epi16(b128)), _mm_cmpgt_epi16(_mm_setzero_si128(), b128)); return _mm_movepi64_pi64(_mm_cvtepi16_epi8(r128)); #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) __m256i a256 = _mm256_cvtepi8_epi32(_mm_movpi64_epi64(a)); __m256i b256 = _mm256_cvtepi8_epi32(_mm_movpi64_epi64(b)); __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi32(a256, b256), _mm256_srav_epi32(a256, _mm256_abs_epi32(b256)), _mm256_cmpgt_epi32(_mm256_setzero_si256(), b256)); r256 = _mm256_shuffle_epi8(r256, _mm256_set1_epi32(0x0C080400)); return _mm_set_pi32(_mm256_extract_epi32(r256, 4), _mm256_extract_epi32(r256, 0)); #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int8_t, (b_.values[i] >= 0) ? (b_.values[i] >= 8) ? 0 : (a_.values[i] << b_.values[i]) : (b_.values[i] <= -8) ? (a_.values[i] >> 7) : (a_.values[i] >> -b_.values[i])); } return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshl_s8 #define vshl_s8(a, b) simde_vshl_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vshl_s16 (const simde_int16x4_t a, const simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vshl_s16(a, b); #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) __m128i a128 = _mm_cvtepi16_epi32(_mm_movpi64_epi64(a)); __m128i b128 = _mm_cvtepi16_epi32(_mm_movpi64_epi64(b)); b128 = _mm_srai_epi32(_mm_slli_epi32(b128, 24), 24); __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi32(a128, b128), _mm_srav_epi32(a128, _mm_abs_epi32(b128)), _mm_cmpgt_epi32(_mm_setzero_si128(), b128)); return _mm_movepi64_pi64(_mm_shuffle_epi8(r128, _mm_set1_epi64x(0x0D0C090805040100))); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); r_.values[i] = HEDLEY_STATIC_CAST(int16_t, (b_.values[i] >= 0) ? (b_.values[i] >= 16) ? 0 : (a_.values[i] << b_.values[i]) : (b_.values[i] <= -16) ? (a_.values[i] >> 15) : (a_.values[i] >> -b_.values[i])); } return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshl_s16 #define vshl_s16(a, b) simde_vshl_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vshl_s32 (const simde_int32x2_t a, const simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vshl_s32(a, b); #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) __m128i a128 = _mm_movpi64_epi64(a); __m128i b128 = _mm_movpi64_epi64(b); b128 = _mm_srai_epi32(_mm_slli_epi32(b128, 24), 24); __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi32(a128, b128), _mm_srav_epi32(a128, _mm_abs_epi32(b128)), _mm_cmpgt_epi32(_mm_setzero_si128(), b128)); return _mm_movepi64_pi64(r128); #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); r_.values[i] = (b_.values[i] >= 0) ? (b_.values[i] >= 32) ? 0 : (a_.values[i] << b_.values[i]) : (b_.values[i] <= -32) ? (a_.values[i] >> 31) : (a_.values[i] >> -b_.values[i]); } return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshl_s32 #define vshl_s32(a, b) simde_vshl_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vshl_s64 (const simde_int64x1_t a, const simde_int64x1_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vshl_s64(a, b); #elif defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) __m128i zero = _mm_setzero_si128(); __m128i a128 = _mm_movpi64_epi64(a); __m128i b128 = _mm_movpi64_epi64(b); b128 = _mm_srai_epi64(_mm_slli_epi64(b128, 56), 56); __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi64(a128, b128), _mm_srav_epi64(a128, _mm_sub_epi64(zero, b128)), _mm_cmpgt_epi64(zero, b128)); return _mm_movepi64_pi64(r128); #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) __m128i zero = _mm_setzero_si128(); __m128i a128 = _mm_movpi64_epi64(a); __m128i b128 = _mm_movpi64_epi64(b); __m128i maska = _mm_cmpgt_epi64(zero, a128); __m128i b_abs = _mm_and_si128(_mm_abs_epi8(b128), _mm_set1_epi64x(0xFF)); __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi64(a128, b_abs), _mm_xor_si128(_mm_srlv_epi64(_mm_xor_si128(a128, maska), b_abs), maska), _mm_cmpgt_epi64(zero, _mm_slli_epi64(b128, 56))); return _mm_movepi64_pi64(r128); #else simde_int64x1_private r_, a_ = simde_int64x1_to_private(a), b_ = simde_int64x1_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); r_.values[i] = (b_.values[i] >= 0) ? (b_.values[i] >= 64) ? 0 : (a_.values[i] << b_.values[i]) : (b_.values[i] <= -64) ? (a_.values[i] >> 63) : (a_.values[i] >> -b_.values[i]); } return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshl_s64 #define vshl_s64(a, b) simde_vshl_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vshl_u8 (const simde_uint8x8_t a, const simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vshl_u8(a, b); #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) __m128i a128 = _mm_cvtepu8_epi16(_mm_movpi64_epi64(a)); __m128i b128 = _mm_cvtepi8_epi16(_mm_movpi64_epi64(b)); __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi16(a128, b128), _mm_srlv_epi16(a128, _mm_abs_epi16(b128)), _mm_cmpgt_epi16(_mm_setzero_si128(), b128)); return _mm_movepi64_pi64(_mm_cvtepi16_epi8(r128)); #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) __m256i a256 = _mm256_cvtepu8_epi32(_mm_movpi64_epi64(a)); __m256i b256 = _mm256_cvtepi8_epi32(_mm_movpi64_epi64(b)); __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi32(a256, b256), _mm256_srlv_epi32(a256, _mm256_abs_epi32(b256)), _mm256_cmpgt_epi32(_mm256_setzero_si256(), b256)); r256 = _mm256_shuffle_epi8(r256, _mm256_set1_epi32(0x0C080400)); return _mm_set_pi32(_mm256_extract_epi32(r256, 4), _mm256_extract_epi32(r256, 0)); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a); simde_int8x8_private b_ = simde_int8x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, (abs(b_.values[i]) >= 8) ? 0 : (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : (a_.values[i] >> -b_.values[i])); } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshl_u8 #define vshl_u8(a, b) simde_vshl_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vshl_u16 (const simde_uint16x4_t a, const simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vshl_u16(a, b); #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) __m128i a128 = _mm_cvtepu16_epi32(_mm_movpi64_epi64(a)); __m128i b128 = _mm_cvtepi16_epi32(_mm_movpi64_epi64(b)); b128 = _mm_srai_epi32(_mm_slli_epi32(b128, 24), 24); __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi32(a128, b128), _mm_srlv_epi32(a128, _mm_abs_epi32(b128)), _mm_cmpgt_epi32(_mm_setzero_si128(), b128)); return _mm_movepi64_pi64(_mm_shuffle_epi8(r128, _mm_set1_epi64x(0x0D0C090805040100))); #else simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a); simde_int16x4_private b_ = simde_int16x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, (abs(b_.values[i]) >= 16) ? 0 : (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : (a_.values[i] >> -b_.values[i])); } return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshl_u16 #define vshl_u16(a, b) simde_vshl_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vshl_u32 (const simde_uint32x2_t a, const simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vshl_u32(a, b); #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) __m128i a128 = _mm_movpi64_epi64(a); __m128i b128 = _mm_movpi64_epi64(b); b128 = _mm_srai_epi32(_mm_slli_epi32(b128, 24), 24); __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi32(a128, b128), _mm_srlv_epi32(a128, _mm_abs_epi32(b128)), _mm_cmpgt_epi32(_mm_setzero_si128(), b128)); return _mm_movepi64_pi64(r128); #else simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a); simde_int32x2_private b_ = simde_int32x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); r_.values[i] = (abs(b_.values[i]) >= 32) ? 0 : (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : (a_.values[i] >> -b_.values[i]); } return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshl_u32 #define vshl_u32(a, b) simde_vshl_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vshl_u64 (const simde_uint64x1_t a, const simde_int64x1_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vshl_u64(a, b); #elif defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) __m128i zero = _mm_setzero_si128(); __m128i a128 = _mm_movpi64_epi64(a); __m128i b128 = _mm_movpi64_epi64(b); b128 = _mm_srai_epi64(_mm_slli_epi64(b128, 56), 56); __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi64(a128, b128), _mm_srlv_epi64(a128, _mm_sub_epi64(zero, b128)), _mm_cmpgt_epi64(zero, b128)); return _mm_movepi64_pi64(r128); #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) __m128i a128 = _mm_movpi64_epi64(a); __m128i b128 = _mm_movpi64_epi64(b); __m128i b_abs = _mm_and_si128(_mm_abs_epi8(b128), _mm_set1_epi64x(0xFF)); __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi64(a128, b_abs), _mm_srlv_epi64(a128, b_abs), _mm_cmpgt_epi64(_mm_setzero_si128(), _mm_slli_epi64(b128, 56))); return _mm_movepi64_pi64(r128); #else simde_uint64x1_private r_, a_ = simde_uint64x1_to_private(a); simde_int64x1_private b_ = simde_int64x1_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); r_.values[i] = (llabs(b_.values[i]) >= 64) ? 0 : (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : (a_.values[i] >> -b_.values[i]); } return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshl_u64 #define vshl_u64(a, b) simde_vshl_u64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vshlq_s8 (const simde_int8x16_t a, const simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vshlq_s8(a, b); #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) __m256i a256 = _mm256_cvtepi8_epi16(a); __m256i b256 = _mm256_cvtepi8_epi16(b); __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi16(a256, b256), _mm256_srav_epi16(a256, _mm256_abs_epi16(b256)), _mm256_cmpgt_epi16(_mm256_setzero_si256(), b256)); return _mm256_cvtepi16_epi8(r256); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_POWER_ALTIVEC_VECTOR(signed char) a_shl, a_shr; SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) b_abs, b_max; SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL char) b_mask; b_abs = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_abs(b)); b_max = vec_splat_u8(7); #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) a_shl = vec_and(vec_sl(a, b_abs), vec_cmple(b_abs, b_max)); #else a_shl = vec_and(vec_sl(a, b_abs), vec_cmplt(b_abs, vec_splat_u8(8))); #endif a_shr = vec_sra(a, vec_min(b_abs, b_max)); b_mask = vec_cmplt(b, vec_splat_s8(0)); return vec_sel(a_shl, a_shr, b_mask); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int8_t, (b_.values[i] >= 0) ? (b_.values[i] >= 8) ? 0 : (a_.values[i] << b_.values[i]) : (b_.values[i] <= -8) ? (a_.values[i] >> 7) : (a_.values[i] >> -b_.values[i])); } return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshlq_s8 #define vshlq_s8(a, b) simde_vshlq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vshlq_s16 (const simde_int16x8_t a, const simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vshlq_s16(a, b); #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) __m128i b_ = _mm_srai_epi16(_mm_slli_epi16(b, 8), 8); return _mm_blendv_epi8(_mm_sllv_epi16(a, b_), _mm_srav_epi16(a, _mm_abs_epi16(b_)), _mm_cmpgt_epi16(_mm_setzero_si128(), b_)); #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_ARCH_AMD64) __m256i a256 = _mm256_cvtepi16_epi32(a); __m256i b256 = _mm256_cvtepi16_epi32(b); b256 = _mm256_srai_epi32(_mm256_slli_epi32(b256, 24), 24); __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi32(a256, b256), _mm256_srav_epi32(a256, _mm256_abs_epi32(b256)), _mm256_cmpgt_epi32(_mm256_setzero_si256(), b256)); r256 = _mm256_shuffle_epi8(r256, _mm256_set1_epi64x(0x0D0C090805040100)); return _mm_set_epi64x(_mm256_extract_epi64(r256, 2), _mm256_extract_epi64(r256, 0)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_POWER_ALTIVEC_VECTOR(signed short) a_shl, a_shr; SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) b_abs, b_max; SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL short) b_mask; b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0xFF))); b_max = vec_splat_u16(15); #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) a_shl = vec_and(vec_sl(a, b_abs), vec_cmple(b_abs, b_max)); #else a_shl = vec_and(vec_sl(a, b_abs), vec_cmplt(b_abs, vec_splats(HEDLEY_STATIC_CAST(unsigned short, 16)))); #endif a_shr = vec_sra(a, vec_min(b_abs, b_max)); b_mask = vec_cmplt(vec_sl(b, vec_splat_u16(8)), vec_splat_s16(0)); return vec_sel(a_shl, a_shr, b_mask); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); r_.values[i] = HEDLEY_STATIC_CAST(int16_t, (b_.values[i] >= 0) ? (b_.values[i] >= 16) ? 0 : (a_.values[i] << b_.values[i]) : (b_.values[i] <= -16) ? (a_.values[i] >> 15) : (a_.values[i] >> -b_.values[i])); } return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshlq_s16 #define vshlq_s16(a, b) simde_vshlq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vshlq_s32 (const simde_int32x4_t a, const simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vshlq_s32(a, b); #elif defined(SIMDE_X86_AVX2_NATIVE) __m128i b_ = _mm_srai_epi32(_mm_slli_epi32(b, 24), 24); return _mm_blendv_epi8(_mm_sllv_epi32(a, b_), _mm_srav_epi32(a, _mm_abs_epi32(b_)), _mm_cmpgt_epi32(_mm_setzero_si128(), b_)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_POWER_ALTIVEC_VECTOR(signed int) a_shl, a_shr; SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) b_abs, b_max; SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL int) b_mask; b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), vec_splats(HEDLEY_STATIC_CAST(unsigned int, 0xFF))); b_max = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 31)); #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) a_shl = vec_and(vec_sl(a, b_abs), vec_cmple(b_abs, b_max)); #else a_shl = vec_and(vec_sl(a, b_abs), vec_cmplt(b_abs, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32)))); #endif a_shr = vec_sra(a, vec_min(b_abs, b_max)); b_mask = vec_cmplt(vec_sl(b, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 24))), vec_splat_s32(0)); return vec_sel(a_shl, a_shr, b_mask); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); r_.values[i] = (b_.values[i] >= 0) ? (b_.values[i] >= 32) ? 0 : (a_.values[i] << b_.values[i]) : (b_.values[i] <= -32) ? (a_.values[i] >> 31) : (a_.values[i] >> -b_.values[i]); } return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshlq_s32 #define vshlq_s32(a, b) simde_vshlq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vshlq_s64 (const simde_int64x2_t a, const simde_int64x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vshlq_s64(a, b); #elif defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) __m128i zero = _mm_setzero_si128(); __m128i b_ = _mm_srai_epi64(_mm_slli_epi64(b, 56), 56); return _mm_blendv_epi8(_mm_sllv_epi64(a, b_), _mm_srav_epi64(a, _mm_sub_epi64(zero, b_)), _mm_cmpgt_epi64(zero, b_)); #elif defined(SIMDE_X86_AVX2_NATIVE) __m128i zero = _mm_setzero_si128(); __m128i maska = _mm_cmpgt_epi64(zero, a); __m128i b_abs = _mm_and_si128(_mm_abs_epi8(b), _mm_set1_epi64x(0xFF)); return _mm_blendv_epi8(_mm_sllv_epi64(a, b_abs), _mm_xor_si128(_mm_srlv_epi64(_mm_xor_si128(a, maska), b_abs), maska), _mm_cmpgt_epi64(zero, _mm_slli_epi64(b, 56))); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) SIMDE_POWER_ALTIVEC_VECTOR(signed long long) a_shl, a_shr; SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) b_abs, b_max; SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL long long) b_mask; b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 0xFF))); b_max = vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 63)); a_shl = vec_and(vec_sl(a, b_abs), vec_cmple(b_abs, b_max)); a_shr = vec_sra(a, vec_min(b_abs, b_max)); b_mask = vec_cmplt(vec_sl(b, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 56))), vec_splats(HEDLEY_STATIC_CAST(signed long long, 0))); HEDLEY_DIAGNOSTIC_PUSH #if defined(SIMDE_BUG_CLANG_46770) SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ #endif return vec_sel(a_shl, a_shr, b_mask); HEDLEY_DIAGNOSTIC_POP #else simde_int64x2_private r_, a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); r_.values[i] = (b_.values[i] >= 0) ? (b_.values[i] >= 64) ? 0 : (a_.values[i] << b_.values[i]) : (b_.values[i] <= -64) ? (a_.values[i] >> 63) : (a_.values[i] >> -b_.values[i]); } return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshlq_s64 #define vshlq_s64(a, b) simde_vshlq_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vshlq_u8 (const simde_uint8x16_t a, const simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vshlq_u8(a, b); #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) __m256i a256 = _mm256_cvtepu8_epi16(a); __m256i b256 = _mm256_cvtepi8_epi16(b); __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi16(a256, b256), _mm256_srlv_epi16(a256, _mm256_abs_epi16(b256)), _mm256_cmpgt_epi16(_mm256_setzero_si256(), b256)); return _mm256_cvtepi16_epi8(r256); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) b_abs; SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL char) b_mask; b_abs = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_abs(b)); b_mask = vec_cmplt(b, vec_splat_s8(0)); return vec_and(vec_sel(vec_sl(a, b_abs), vec_sr(a, b_abs), b_mask), vec_cmplt(b_abs, vec_splat_u8(8))); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a); simde_int8x16_private b_ = simde_int8x16_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, (abs(b_.values[i]) >= 8) ? 0 : (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : (a_.values[i] >> -b_.values[i])); } return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshlq_u8 #define vshlq_u8(a, b) simde_vshlq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vshlq_u16 (const simde_uint16x8_t a, const simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vshlq_u16(a, b); #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) __m128i b_ = _mm_srai_epi16(_mm_slli_epi16(b, 8), 8); return _mm_blendv_epi8(_mm_sllv_epi16(a, b_), _mm_srlv_epi16(a, _mm_abs_epi16(b_)), _mm_cmpgt_epi16(_mm_setzero_si128(), b_)); #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_ARCH_AMD64) __m256i a256 = _mm256_cvtepu16_epi32(a); __m256i b256 = _mm256_cvtepi16_epi32(b); b256 = _mm256_srai_epi32(_mm256_slli_epi32(b256, 24), 24); __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi32(a256, b256), _mm256_srlv_epi32(a256, _mm256_abs_epi32(b256)), _mm256_cmpgt_epi32(_mm256_setzero_si256(), b256)); r256 = _mm256_shuffle_epi8(r256, _mm256_set1_epi64x(0x0D0C090805040100)); return _mm_set_epi64x(_mm256_extract_epi64(r256, 2), _mm256_extract_epi64(r256, 0)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) b_abs; SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL short) b_mask; b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0xFF))); b_mask = vec_cmplt(vec_sl(b, vec_splat_u16(8)), vec_splat_s16(0)); #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return vec_and(vec_sel(vec_sl(a, b_abs), vec_sr(a, b_abs), b_mask), vec_cmple(b_abs, vec_splat_u16(15))); #else return vec_and(vec_sel(vec_sl(a, b_abs), vec_sr(a, b_abs), b_mask), vec_cmplt(b_abs, vec_splats(HEDLEY_STATIC_CAST(unsigned short, 16)))); #endif #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a); simde_int16x8_private b_ = simde_int16x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, (abs(b_.values[i]) >= 16) ? 0 : (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : (a_.values[i] >> -b_.values[i])); } return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshlq_u16 #define vshlq_u16(a, b) simde_vshlq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vshlq_u32 (const simde_uint32x4_t a, const simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vshlq_u32(a, b); #elif defined(SIMDE_X86_AVX2_NATIVE) __m128i b_ = _mm_srai_epi32(_mm_slli_epi32(b, 24), 24); return _mm_blendv_epi8(_mm_sllv_epi32(a, b_), _mm_srlv_epi32(a, _mm_abs_epi32(b_)), _mm_cmpgt_epi32(_mm_setzero_si128(), b_)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) b_abs; SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL int) b_mask; b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), vec_splats(HEDLEY_STATIC_CAST(unsigned int, 0xFF))); b_mask = vec_cmplt(vec_sl(b, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 24))), vec_splat_s32(0)); return vec_and(vec_sel(vec_sl(a, b_abs), vec_sr(a, b_abs), b_mask), vec_cmplt(b_abs, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32)))); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a); simde_int32x4_private b_ = simde_int32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); r_.values[i] = (abs(b_.values[i]) >= 32) ? 0 : (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : (a_.values[i] >> -b_.values[i]); } return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshlq_u32 #define vshlq_u32(a, b) simde_vshlq_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vshlq_u64 (const simde_uint64x2_t a, const simde_int64x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vshlq_u64(a, b); #elif defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) __m128i zero = _mm_setzero_si128(); __m128i b_ = _mm_srai_epi64(_mm_slli_epi64(b, 56), 56); return _mm_blendv_epi8(_mm_sllv_epi64(a, b_), _mm_srlv_epi64(a, _mm_sub_epi64(zero, b_)), _mm_cmpgt_epi64(zero, b_)); #elif defined(SIMDE_X86_AVX2_NATIVE) __m128i b_abs = _mm_and_si128(_mm_abs_epi8(b), _mm_set1_epi64x(0xFF)); return _mm_blendv_epi8(_mm_sllv_epi64(a, b_abs), _mm_srlv_epi64(a, b_abs), _mm_cmpgt_epi64(_mm_setzero_si128(), _mm_slli_epi64(b, 56))); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) b_abs; SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL long long) b_mask; b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 0xFF))); b_mask = vec_cmplt(vec_sl(b, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 56))), vec_splats(HEDLEY_STATIC_CAST(signed long long, 0))); HEDLEY_DIAGNOSTIC_PUSH #if defined(SIMDE_BUG_CLANG_46770) SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ #endif return vec_and(vec_sel(vec_sl(a, b_abs), vec_sr(a, b_abs), b_mask), vec_cmplt(b_abs, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 64)))); HEDLEY_DIAGNOSTIC_POP #else simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a); simde_int64x2_private b_ = simde_int64x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); r_.values[i] = (llabs(b_.values[i]) >= 64) ? 0 : (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : (a_.values[i] >> -b_.values[i]); } return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshlq_u64 #define vshlq_u64(a, b) simde_vshlq_u64((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_SHL_H) */ simde-0.7.2/simde/arm/neon/shl_n.h000066400000000000000000000433261400333146700167250ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_ARM_NEON_SHL_N_H) #define SIMDE_ARM_NEON_SHL_N_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vshl_n_s8 (const simde_int8x8_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { simde_int8x8_private r_, a_ = simde_int8x8_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values << HEDLEY_STATIC_CAST(int8_t, n); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int8_t, a_.values[i] << n); } #endif return simde_int8x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshl_n_s8(a, n) vshl_n_s8((a), (n)) #elif defined(SIMDE_X86_MMX_NATIVE) #define simde_vshl_n_s8(a, n) \ _mm_andnot_si64(_mm_set1_pi8((1 << n) - 1), _mm_slli_si64((a), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshl_n_s8 #define vshl_n_s8(a, n) simde_vshl_n_s8((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vshl_n_s16 (const simde_int16x4_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { simde_int16x4_private r_, a_ = simde_int16x4_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values << HEDLEY_STATIC_CAST(int16_t, n); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int16_t, a_.values[i] << n); } #endif return simde_int16x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshl_n_s16(a, n) vshl_n_s16((a), (n)) #elif defined(SIMDE_X86_MMX_NATIVE) #define simde_vshl_n_s16(a, n) _mm_slli_pi16((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshl_n_s16 #define vshl_n_s16(a, n) simde_vshl_n_s16((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vshl_n_s32 (const simde_int32x2_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 31) { simde_int32x2_private r_, a_ = simde_int32x2_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values << n; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int32_t, a_.values[i] << n); } #endif return simde_int32x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshl_n_s32(a, n) vshl_n_s32((a), (n)) #elif defined(SIMDE_X86_MMX_NATIVE) #define simde_vshl_n_s32(a, n) _mm_slli_pi32((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshl_n_s32 #define vshl_n_s32(a, n) simde_vshl_n_s32((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vshl_n_s64 (const simde_int64x1_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { simde_int64x1_private r_, a_ = simde_int64x1_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values << n; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int64_t, a_.values[i] << n); } #endif return simde_int64x1_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshl_n_s64(a, n) vshl_n_s64((a), (n)) #elif defined(SIMDE_X86_MMX_NATIVE) #define simde_vshl_n_s64(a, n) _mm_slli_si64((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshl_n_s64 #define vshl_n_s64(a, n) simde_vshl_n_s64((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vshl_n_u8 (const simde_uint8x8_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values << HEDLEY_STATIC_CAST(uint8_t, n); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, a_.values[i] << n); } #endif return simde_uint8x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshl_n_u8(a, n) vshl_n_u8((a), (n)) #elif defined(SIMDE_X86_MMX_NATIVE) #define simde_vshl_n_u8(a, n) \ _mm_andnot_si64(_mm_set1_pi8((1 << n) - 1), _mm_slli_si64((a), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshl_n_u8 #define vshl_n_u8(a, n) simde_vshl_n_u8((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vshl_n_u16 (const simde_uint16x4_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values << HEDLEY_STATIC_CAST(uint16_t, n); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, a_.values[i] << n); } #endif return simde_uint16x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshl_n_u16(a, n) vshl_n_u16((a), (n)) #elif defined(SIMDE_X86_MMX_NATIVE) #define simde_vshl_n_u16(a, n) _mm_slli_pi16((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshl_n_u16 #define vshl_n_u16(a, n) simde_vshl_n_u16((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vshl_n_u32 (const simde_uint32x2_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 31) { simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values << n; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i] << n); } #endif return simde_uint32x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshl_n_u32(a, n) vshl_n_u32((a), (n)) #elif defined(SIMDE_X86_MMX_NATIVE) #define simde_vshl_n_u32(a, n) _mm_slli_pi32((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshl_n_u32 #define vshl_n_u32(a, n) simde_vshl_n_u32((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vshl_n_u64 (const simde_uint64x1_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { simde_uint64x1_private r_, a_ = simde_uint64x1_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values << n; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint64_t, a_.values[i] << n); } #endif return simde_uint64x1_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshl_n_u64(a, n) vshl_n_u64((a), (n)) #elif defined(SIMDE_X86_MMX_NATIVE) #define simde_vshl_n_u64(a, n) _mm_slli_si64((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshl_n_u64 #define vshl_n_u64(a, n) simde_vshl_n_u64((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vshlq_n_s8 (const simde_int8x16_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { #if defined(SIMDE_X86_GFNI_NATIVE) /* https://wunkolo.github.io/post/2020/11/gf2p8affineqb-int8-shifting/ */ return _mm_gf2p8affine_epi64_epi8(a, _mm_set1_epi64x(INT64_C(0x0102040810204080) >> (n * 8)), 0); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_andnot_si128(_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, (1 << n) - 1)), _mm_slli_epi64(a, n)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i8x16_shl(a, n); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values << HEDLEY_STATIC_CAST(int8_t, n); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int8_t, a_.values[i] << n); } #endif return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshlq_n_s8(a, n) vshlq_n_s8((a), (n)) #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) #define simde_vshlq_n_s8(a, n) vec_sl((a), vec_splats(SIMDE_CHECKED_STATIC_CAST(unsigned char, int, (n)))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshlq_n_s8 #define vshlq_n_s8(a, n) simde_vshlq_n_s8((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vshlq_n_s16 (const simde_int16x8_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_slli_epi16((a), (n)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i16x8_shl((a), (n)); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values << HEDLEY_STATIC_CAST(int16_t, n); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int16_t, a_.values[i] << n); } #endif return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshlq_n_s16(a, n) vshlq_n_s16((a), (n)) #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) #define simde_vshlq_n_s16(a, n) vec_sl((a), vec_splats(SIMDE_CHECKED_STATIC_CAST(unsigned short, int, (n)))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshlq_n_s16 #define vshlq_n_s16(a, n) simde_vshlq_n_s16((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vshlq_n_s32 (const simde_int32x4_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 31) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_slli_epi32((a), (n)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i32x4_shl((a), (n)); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values << n; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int32_t, a_.values[i] << n); } #endif return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshlq_n_s32(a, n) vshlq_n_s32((a), (n)) #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) #define simde_vshlq_n_s32(a, n) vec_sl((a), vec_splats(HEDLEY_STATIC_CAST(unsigned int, (n)))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshlq_n_s32 #define vshlq_n_s32(a, n) simde_vshlq_n_s32((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vshlq_n_s64 (const simde_int64x2_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_slli_epi64((a), (n)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i64x2_shl((a), (n)); #else simde_int64x2_private r_, a_ = simde_int64x2_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values << n; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int64_t, a_.values[i] << n); } #endif return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshlq_n_s64(a, n) vshlq_n_s64((a), (n)) #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) #define simde_vshlq_n_s64(a, n) vec_sl((a), vec_splats(HEDLEY_STATIC_CAST(unsigned long long, (n)))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshlq_n_s64 #define vshlq_n_s64(a, n) simde_vshlq_n_s64((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vshlq_n_u8 (const simde_uint8x16_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { #if defined(SIMDE_X86_GFNI_NATIVE) /* https://wunkolo.github.io/post/2020/11/gf2p8affineqb-int8-shifting/ */ return _mm_gf2p8affine_epi64_epi8(a, _mm_set1_epi64x(INT64_C(0x0102040810204080) >> (n * 8)), 0); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_andnot_si128(_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, (1 << n) - 1)), _mm_slli_epi64((a), (n))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i8x16_shl((a), (n)); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values << HEDLEY_STATIC_CAST(uint8_t, n); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, a_.values[i] << n); } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshlq_n_u8(a, n) vshlq_n_u8((a), (n)) #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) #define simde_vshlq_n_u8(a, n) vec_sl((a), vec_splat_u8(n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshlq_n_u8 #define vshlq_n_u8(a, n) simde_vshlq_n_u8((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vshlq_n_u16 (const simde_uint16x8_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_slli_epi16((a), (n)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i16x8_shl((a), (n)); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values << HEDLEY_STATIC_CAST(uint16_t, n); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, a_.values[i] << n); } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshlq_n_u16(a, n) vshlq_n_u16((a), (n)) #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) #define simde_vshlq_n_u16(a, n) vec_sl((a), vec_splat_u16(n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshlq_n_u16 #define vshlq_n_u16(a, n) simde_vshlq_n_u16((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vshlq_n_u32 (const simde_uint32x4_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 31) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_slli_epi32((a), (n)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i32x4_shl((a), (n)); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values << n; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i] << n); } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshlq_n_u32(a, n) vshlq_n_u32((a), (n)) #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) #define simde_vshlq_n_u32(a, n) vec_sl((a), vec_splats(HEDLEY_STATIC_CAST(unsigned int, (n)))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshlq_n_u32 #define vshlq_n_u32(a, n) simde_vshlq_n_u32((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vshlq_n_u64 (const simde_uint64x2_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_slli_epi64((a), (n)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i64x2_shl((a), (n)); #else simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values << n; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(uint64_t, a_.values[i] << n); } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshlq_n_u64(a, n) vshlq_n_u64((a), (n)) #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) #define simde_vshlq_n_u64(a, n) vec_sl((a), vec_splats(HEDLEY_STATIC_CAST(unsigned long long, (n)))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshlq_n_u64 #define vshlq_n_u64(a, n) simde_vshlq_n_u64((a), (n)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_SHL_N_H) */ simde-0.7.2/simde/arm/neon/shr_n.h000066400000000000000000000457531400333146700167410ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_ARM_NEON_SHR_N_H) #define SIMDE_ARM_NEON_SHR_N_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vshr_n_s8 (const simde_int8x8_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { simde_int8x8_private r_, a_ = simde_int8x8_to_private(a); int32_t n_ = (n == 8) ? 7 : n; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values >> n_; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int8_t, a_.values[i] >> n_); } #endif return simde_int8x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshr_n_s8(a, n) vshr_n_s8((a), (n)) #elif defined(SIMDE_X86_MMX_NATIVE) #define simde_vshr_n_s8(a, n) \ _mm_or_si64(_mm_andnot_si64(_mm_set1_pi16(0x00FF), _mm_srai_pi16((a), (n))), \ _mm_and_si64(_mm_set1_pi16(0x00FF), _mm_srai_pi16(_mm_slli_pi16((a), 8), 8 + (n)))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshr_n_s8 #define vshr_n_s8(a, n) simde_vshr_n_s8((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vshr_n_s16 (const simde_int16x4_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { simde_int16x4_private r_, a_ = simde_int16x4_to_private(a); int32_t n_ = (n == 16) ? 15 : n; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values >> n_; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int16_t, a_.values[i] >> n_); } #endif return simde_int16x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshr_n_s16(a, n) vshr_n_s16((a), (n)) #elif defined(SIMDE_X86_MMX_NATIVE) #define simde_vshr_n_s16(a, n) _mm_srai_pi16((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshr_n_s16 #define vshr_n_s16(a, n) simde_vshr_n_s16((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vshr_n_s32 (const simde_int32x2_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { simde_int32x2_private r_, a_ = simde_int32x2_to_private(a); int32_t n_ = (n == 32) ? 31 : n; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values >> n_; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] >> n_; } #endif return simde_int32x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshr_n_s32(a, n) vshr_n_s32((a), (n)) #elif defined(SIMDE_X86_MMX_NATIVE) #define simde_vshr_n_s32(a, n) _mm_srai_pi32((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshr_n_s32 #define vshr_n_s32(a, n) simde_vshr_n_s32((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vshr_n_s64 (const simde_int64x1_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { simde_int64x1_private r_, a_ = simde_int64x1_to_private(a); int32_t n_ = (n == 64) ? 63 : n; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values >> n_; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] >> n_; } #endif return simde_int64x1_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshr_n_s64(a, n) vshr_n_s64((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshr_n_s64 #define vshr_n_s64(a, n) simde_vshr_n_s64((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vshr_n_u8 (const simde_uint8x8_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a); if (n == 8) { simde_memset(&r_, 0, sizeof(r_)); } else { #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values >> n; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] >> n; } #endif } return simde_uint8x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshr_n_u8(a, n) vshr_n_u8((a), (n)) #elif defined(SIMDE_X86_MMX_NATIVE) #define simde_vshr_n_u8(a, n) \ _mm_and_si64(_mm_srli_si64((a), (n)), _mm_set1_pi8((1 << (8 - (n))) - 1)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshr_n_u8 #define vshr_n_u8(a, n) simde_vshr_n_u8((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vshr_n_u16 (const simde_uint16x4_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a); if (n == 16) { simde_memset(&r_, 0, sizeof(r_)); } else { #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values >> n; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] >> n; } #endif } return simde_uint16x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshr_n_u16(a, n) vshr_n_u16((a), (n)) #elif defined(SIMDE_X86_MMX_NATIVE) #define simde_vshr_n_u16(a, n) _mm_srli_pi16((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshr_n_u16 #define vshr_n_u16(a, n) simde_vshr_n_u16((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vshr_n_u32 (const simde_uint32x2_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a); if (n == 32) { simde_memset(&r_, 0, sizeof(r_)); } else { #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values >> n; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] >> n; } #endif } return simde_uint32x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshr_n_u32(a, n) vshr_n_u32((a), (n)) #elif defined(SIMDE_X86_MMX_NATIVE) #define simde_vshr_n_u32(a, n) _mm_srli_pi32((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshr_n_u32 #define vshr_n_u32(a, n) simde_vshr_n_u32((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vshr_n_u64 (const simde_uint64x1_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { simde_uint64x1_private r_, a_ = simde_uint64x1_to_private(a); if (n == 64) { simde_memset(&r_, 0, sizeof(r_)); } else { #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values >> n; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] >> n; } #endif } return simde_uint64x1_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshr_n_u64(a, n) vshr_n_u64((a), (n)) #elif defined(SIMDE_X86_MMX_NATIVE) #define simde_vshr_n_u64(a, n) _mm_srli_si64((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshr_n_u64 #define vshr_n_u64(a, n) simde_vshr_n_u64((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vshrq_n_s8 (const simde_int8x16_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { #if defined(SIMDE_X86_GFNI_NATIVE) /* https://wunkolo.github.io/post/2020/11/gf2p8affineqb-int8-shifting/ */ const int shift = (n <= 7) ? n : 7; const uint64_t matrix = (UINT64_C(0x8182848890A0C000) << (shift * 8)) ^ UINT64_C(0x8080808080808080); return _mm_gf2p8affine_epi64_epi8(a, _mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, matrix)), 0); #elif defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_blendv_epi8(_mm_srai_epi16((a), (n)), _mm_srai_epi16(_mm_slli_epi16((a), 8), 8 + (n)), _mm_set1_epi16(0x00FF)); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_or_si128(_mm_andnot_si128(_mm_set1_epi16(0x00FF), _mm_srai_epi16((a), (n))), _mm_and_si128(_mm_set1_epi16(0x00FF), _mm_srai_epi16(_mm_slli_epi16((a), 8), 8 + (n)))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i8x16_shr((a), ((n) == 8) ? 7 : (n)); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a); int32_t n_ = (n == 8) ? 7 : n; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values >> n_; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int8_t, a_.values[i] >> n_); } #endif return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshrq_n_s8(a, n) vshrq_n_s8((a), (n)) #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) #define simde_vshrq_n_s8(a, n) vec_sra((a), vec_splat_u8(((n) == 8) ? 7 : (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshrq_n_s8 #define vshrq_n_s8(a, n) simde_vshrq_n_s8((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vshrq_n_s16 (const simde_int16x8_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_srai_epi16((a), (n)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i16x8_shr((a), ((n) == 16) ? 15 : (n)); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a); int32_t n_ = (n == 16) ? 15 : n; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values >> n_; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = HEDLEY_STATIC_CAST(int16_t, a_.values[i] >> n_); } #endif return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshrq_n_s16(a, n) vshrq_n_s16((a), (n)) #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) #define simde_vshrq_n_s16(a, n) vec_sra((a), vec_splat_u16(((n) == 16) ? 15 : (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshrq_n_s16 #define vshrq_n_s16(a, n) simde_vshrq_n_s16((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vshrq_n_s32 (const simde_int32x4_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_srai_epi32((a), (n)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i32x4_shr((a), ((n) == 32) ? 31 : (n)); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a); int32_t n_ = (n == 32) ? 31 : n; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values >> n_; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] >> n_; } #endif return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshrq_n_s32(a, n) vshrq_n_s32((a), (n)) #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) #define simde_vshrq_n_s32(a, n) \ vec_sra((a), vec_splats(HEDLEY_STATIC_CAST(unsigned int, ((n) == 32) ? 31 : (n)))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshrq_n_s32 #define vshrq_n_s32(a, n) simde_vshrq_n_s32((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vshrq_n_s64 (const simde_int64x2_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { #if defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i64x2_shr((a), ((n) == 64) ? 63 : (n)); #else simde_int64x2_private r_, a_ = simde_int64x2_to_private(a); int32_t n_ = (n == 64) ? 63 : n; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values >> n_; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] >> n_; } #endif return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshrq_n_s64(a, n) vshrq_n_s64((a), (n)) #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) #define simde_vshrq_n_s64(a, n) \ vec_sra((a), vec_splats(HEDLEY_STATIC_CAST(unsigned long long, ((n) == 64) ? 63 : (n)))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshrq_n_s64 #define vshrq_n_s64(a, n) simde_vshrq_n_s64((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vshrq_n_u8 (const simde_uint8x16_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { #if defined(SIMDE_X86_GFNI_NATIVE) /* https://wunkolo.github.io/post/2020/11/gf2p8affineqb-int8-shifting/ */ return (n > 7) ? _mm_setzero_si128() : _mm_gf2p8affine_epi64_epi8(a, _mm_set1_epi64x(INT64_C(0x0102040810204080) << (n * 8)), 0); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_and_si128(_mm_srli_epi64((a), (n)), _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, (1 << (8 - (n))) - 1))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return (((n) == 8) ? wasm_i8x16_splat(0) : wasm_u8x16_shr((a), (n))); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a); if (n == 8) { simde_memset(&r_, 0, sizeof(r_)); } else { #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values >> n; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] >> n; } #endif } return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshrq_n_u8(a, n) vshrq_n_u8((a), (n)) #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) #define simde_vshrq_n_u8(a, n) \ (((n) == 8) ? vec_splat_u8(0) : vec_sr((a), vec_splat_u8(n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshrq_n_u8 #define vshrq_n_u8(a, n) simde_vshrq_n_u8((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vshrq_n_u16 (const simde_uint16x8_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_srli_epi16((a), (n)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return (((n) == 16) ? wasm_i16x8_splat(0) : wasm_u16x8_shr((a), (n))); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a); if (n == 16) { simde_memset(&r_, 0, sizeof(r_)); } else { #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values >> n; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] >> n; } #endif } return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshrq_n_u16(a, n) vshrq_n_u16((a), (n)) #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) #define simde_vshrq_n_u16(a, n) \ (((n) == 16) ? vec_splat_u16(0) : vec_sr((a), vec_splat_u16(n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshrq_n_u16 #define vshrq_n_u16(a, n) simde_vshrq_n_u16((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vshrq_n_u32 (const simde_uint32x4_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_srli_epi32((a), (n)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return (((n) == 32) ? wasm_i32x4_splat(0) : wasm_u32x4_shr((a), (n))); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a); if (n == 32) { simde_memset(&r_, 0, sizeof(r_)); } else { #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values >> n; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] >> n; } #endif } return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshrq_n_u32(a, n) vshrq_n_u32((a), (n)) #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) #define simde_vshrq_n_u32(a, n) \ (((n) == 32) ? vec_splat_u32(0) : vec_sr((a), vec_splats(HEDLEY_STATIC_CAST(unsigned int, (n))))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshrq_n_u32 #define vshrq_n_u32(a, n) simde_vshrq_n_u32((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vshrq_n_u64 (const simde_uint64x2_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_srli_epi64((a), (n)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return (((n) == 64) ? wasm_i64x2_splat(0) : wasm_u64x2_shr((a), (n))); #else simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a); if (n == 64) { simde_memset(&r_, 0, sizeof(r_)); } else { #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = a_.values >> n; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] >> n; } #endif } return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vshrq_n_u64(a, n) vshrq_n_u64((a), (n)) #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) #define simde_vshrq_n_u64(a, n) \ (((n) == 64) ? vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 0)) : vec_sr((a), vec_splats(HEDLEY_STATIC_CAST(unsigned long long, (n))))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vshrq_n_u64 #define vshrq_n_u64(a, n) simde_vshrq_n_u64((a), (n)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_SHR_N_H) */ simde-0.7.2/simde/arm/neon/sra_n.h000066400000000000000000000156621400333146700167260ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_ARM_NEON_SRA_N_H) #define SIMDE_ARM_NEON_SRA_N_H #include "add.h" #include "shr_n.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vsra_n_s8(a, b, n) vsra_n_s8((a), (b), (n)) #else #define simde_vsra_n_s8(a, b, n) simde_vadd_s8((a), simde_vshr_n_s8((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsra_n_s8 #define vsra_n_s8(a, b, n) simde_vsra_n_s8((a), (b), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vsra_n_s16(a, b, n) vsra_n_s16((a), (b), (n)) #else #define simde_vsra_n_s16(a, b, n) simde_vadd_s16((a), simde_vshr_n_s16((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsra_n_s16 #define vsra_n_s16(a, b, n) simde_vsra_n_s16((a), (b), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vsra_n_s32(a, b, n) vsra_n_s32((a), (b), (n)) #else #define simde_vsra_n_s32(a, b, n) simde_vadd_s32((a), simde_vshr_n_s32((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsra_n_s32 #define vsra_n_s32(a, b, n) simde_vsra_n_s32((a), (b), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vsra_n_s64(a, b, n) vsra_n_s64((a), (b), (n)) #else #define simde_vsra_n_s64(a, b, n) simde_vadd_s64((a), simde_vshr_n_s64((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsra_n_s64 #define vsra_n_s64(a, b, n) simde_vsra_n_s64((a), (b), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vsra_n_u8(a, b, n) vsra_n_u8((a), (b), (n)) #else #define simde_vsra_n_u8(a, b, n) simde_vadd_u8((a), simde_vshr_n_u8((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsra_n_u8 #define vsra_n_u8(a, b, n) simde_vsra_n_u8((a), (b), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vsra_n_u16(a, b, n) vsra_n_u16((a), (b), (n)) #else #define simde_vsra_n_u16(a, b, n) simde_vadd_u16((a), simde_vshr_n_u16((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsra_n_u16 #define vsra_n_u16(a, b, n) simde_vsra_n_u16((a), (b), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vsra_n_u32(a, b, n) vsra_n_u32((a), (b), (n)) #else #define simde_vsra_n_u32(a, b, n) simde_vadd_u32((a), simde_vshr_n_u32((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsra_n_u32 #define vsra_n_u32(a, b, n) simde_vsra_n_u32((a), (b), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vsra_n_u64(a, b, n) vsra_n_u64((a), (b), (n)) #else #define simde_vsra_n_u64(a, b, n) simde_vadd_u64((a), simde_vshr_n_u64((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsra_n_u64 #define vsra_n_u64(a, b, n) simde_vsra_n_u64((a), (b), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vsraq_n_s8(a, b, n) vsraq_n_s8((a), (b), (n)) #else #define simde_vsraq_n_s8(a, b, n) simde_vaddq_s8((a), simde_vshrq_n_s8((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsraq_n_s8 #define vsraq_n_s8(a, b, n) simde_vsraq_n_s8((a), (b), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vsraq_n_s16(a, b, n) vsraq_n_s16((a), (b), (n)) #else #define simde_vsraq_n_s16(a, b, n) simde_vaddq_s16((a), simde_vshrq_n_s16((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsraq_n_s16 #define vsraq_n_s16(a, b, n) simde_vsraq_n_s16((a), (b), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vsraq_n_s32(a, b, n) vsraq_n_s32((a), (b), (n)) #else #define simde_vsraq_n_s32(a, b, n) simde_vaddq_s32((a), simde_vshrq_n_s32((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsraq_n_s32 #define vsraq_n_s32(a, b, n) simde_vsraq_n_s32((a), (b), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vsraq_n_s64(a, b, n) vsraq_n_s64((a), (b), (n)) #else #define simde_vsraq_n_s64(a, b, n) simde_vaddq_s64((a), simde_vshrq_n_s64((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsraq_n_s64 #define vsraq_n_s64(a, b, n) simde_vsraq_n_s64((a), (b), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vsraq_n_u8(a, b, n) vsraq_n_u8((a), (b), (n)) #else #define simde_vsraq_n_u8(a, b, n) simde_vaddq_u8((a), simde_vshrq_n_u8((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsraq_n_u8 #define vsraq_n_u8(a, b, n) simde_vsraq_n_u8((a), (b), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vsraq_n_u16(a, b, n) vsraq_n_u16((a), (b), (n)) #else #define simde_vsraq_n_u16(a, b, n) simde_vaddq_u16((a), simde_vshrq_n_u16((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsraq_n_u16 #define vsraq_n_u16(a, b, n) simde_vsraq_n_u16((a), (b), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vsraq_n_u32(a, b, n) vsraq_n_u32((a), (b), (n)) #else #define simde_vsraq_n_u32(a, b, n) simde_vaddq_u32((a), simde_vshrq_n_u32((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsraq_n_u32 #define vsraq_n_u32(a, b, n) simde_vsraq_n_u32((a), (b), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_vsraq_n_u64(a, b, n) vsraq_n_u64((a), (b), (n)) #else #define simde_vsraq_n_u64(a, b, n) simde_vaddq_u64((a), simde_vshrq_n_u64((b), (n))) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsraq_n_u64 #define vsraq_n_u64(a, b, n) simde_vsraq_n_u64((a), (b), (n)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_SRA_N_H) */ simde-0.7.2/simde/arm/neon/st1.h000066400000000000000000000247141400333146700163310ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_ST1_H) #define SIMDE_ARM_NEON_ST1_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES void simde_vst1_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_float32x2_t val) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) vst1_f32(ptr, val); #else simde_float32x2_private val_ = simde_float32x2_to_private(val); simde_memcpy(ptr, &val_, sizeof(val_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1_f32 #define vst1_f32(a, b) simde_vst1_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1_f64(simde_float64_t ptr[HEDLEY_ARRAY_PARAM(1)], simde_float64x1_t val) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) vst1_f64(ptr, val); #else simde_float64x1_private val_ = simde_float64x1_to_private(val); simde_memcpy(ptr, &val_, sizeof(val_)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vst1_f64 #define vst1_f64(a, b) simde_vst1_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1_s8(int8_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_int8x8_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1_s8(ptr, val); #else simde_int8x8_private val_ = simde_int8x8_to_private(val); simde_memcpy(ptr, &val_, sizeof(val_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1_s8 #define vst1_s8(a, b) simde_vst1_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1_s16(int16_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_int16x4_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1_s16(ptr, val); #else simde_int16x4_private val_ = simde_int16x4_to_private(val); simde_memcpy(ptr, &val_, sizeof(val_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1_s16 #define vst1_s16(a, b) simde_vst1_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1_s32(int32_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_int32x2_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1_s32(ptr, val); #else simde_int32x2_private val_ = simde_int32x2_to_private(val); simde_memcpy(ptr, &val_, sizeof(val_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1_s32 #define vst1_s32(a, b) simde_vst1_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1_s64(int64_t ptr[HEDLEY_ARRAY_PARAM(1)], simde_int64x1_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1_s64(ptr, val); #else simde_int64x1_private val_ = simde_int64x1_to_private(val); simde_memcpy(ptr, &val_, sizeof(val_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1_s64 #define vst1_s64(a, b) simde_vst1_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1_u8(uint8_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_uint8x8_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1_u8(ptr, val); #else simde_uint8x8_private val_ = simde_uint8x8_to_private(val); simde_memcpy(ptr, &val_, sizeof(val_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1_u8 #define vst1_u8(a, b) simde_vst1_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1_u16(uint16_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint16x4_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1_u16(ptr, val); #else simde_uint16x4_private val_ = simde_uint16x4_to_private(val); simde_memcpy(ptr, &val_, sizeof(val_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1_u16 #define vst1_u16(a, b) simde_vst1_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1_u32(uint32_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint32x2_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1_u32(ptr, val); #else simde_uint32x2_private val_ = simde_uint32x2_to_private(val); simde_memcpy(ptr, &val_, sizeof(val_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1_u32 #define vst1_u32(a, b) simde_vst1_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(1)], simde_uint64x1_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1_u64(ptr, val); #else simde_uint64x1_private val_ = simde_uint64x1_to_private(val); simde_memcpy(ptr, &val_, sizeof(val_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1_u64 #define vst1_u64(a, b) simde_vst1_u64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1q_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_float32x4_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1q_f32(ptr, val); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && 0 vec_st(val, 0, ptr); #else simde_float32x4_private val_ = simde_float32x4_to_private(val); simde_memcpy(ptr, &val_, sizeof(val_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1q_f32 #define vst1q_f32(a, b) simde_vst1q_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1q_f64(simde_float64_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_float64x2_t val) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) vst1q_f64(ptr, val); #else simde_float64x2_private val_ = simde_float64x2_to_private(val); simde_memcpy(ptr, &val_, sizeof(val_)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vst1q_f64 #define vst1q_f64(a, b) simde_vst1q_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1q_s8(int8_t ptr[HEDLEY_ARRAY_PARAM(16)], simde_int8x16_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1q_s8(ptr, val); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && 0 vec_st(val, 0, ptr); #else simde_int8x16_private val_ = simde_int8x16_to_private(val); simde_memcpy(ptr, &val_, sizeof(val_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1q_s8 #define vst1q_s8(a, b) simde_vst1q_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1q_s16(int16_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_int16x8_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1q_s16(ptr, val); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && 0 vec_st(val, 0, ptr); #else simde_int16x8_private val_ = simde_int16x8_to_private(val); simde_memcpy(ptr, &val_, sizeof(val_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1q_s16 #define vst1q_s16(a, b) simde_vst1q_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1q_s32(int32_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_int32x4_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1q_s32(ptr, val); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && 0 vec_st(val, 0, ptr); #else simde_int32x4_private val_ = simde_int32x4_to_private(val); simde_memcpy(ptr, &val_, sizeof(val_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1q_s32 #define vst1q_s32(a, b) simde_vst1q_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1q_s64(int64_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_int64x2_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1q_s64(ptr, val); #else simde_int64x2_private val_ = simde_int64x2_to_private(val); simde_memcpy(ptr, &val_, sizeof(val_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1q_s64 #define vst1q_s64(a, b) simde_vst1q_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1q_u8(uint8_t ptr[HEDLEY_ARRAY_PARAM(16)], simde_uint8x16_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1q_u8(ptr, val); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && 0 vec_st(val, 0, ptr); #else simde_uint8x16_private val_ = simde_uint8x16_to_private(val); simde_memcpy(ptr, &val_, sizeof(val_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1q_u8 #define vst1q_u8(a, b) simde_vst1q_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1q_u16(uint16_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_uint16x8_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1q_u16(ptr, val); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && 0 vec_st(val, 0, ptr); #else simde_uint16x8_private val_ = simde_uint16x8_to_private(val); simde_memcpy(ptr, &val_, sizeof(val_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1q_u16 #define vst1q_u16(a, b) simde_vst1q_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1q_u32(uint32_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint32x4_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1q_u32(ptr, val); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && 0 vec_st(val, 0, ptr); #else simde_uint32x4_private val_ = simde_uint32x4_to_private(val); simde_memcpy(ptr, &val_, sizeof(val_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1q_u32 #define vst1q_u32(a, b) simde_vst1q_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1q_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint64x2_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1q_u64(ptr, val); #else simde_uint64x2_private val_ = simde_uint64x2_to_private(val); simde_memcpy(ptr, &val_, sizeof(val_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1q_u64 #define vst1q_u64(a, b) simde_vst1q_u64((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_ST1_H) */ simde-0.7.2/simde/arm/neon/st1_lane.h000066400000000000000000000302421400333146700173210ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_ST1_LANE_H) #define SIMDE_ARM_NEON_ST1_LANE_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES void simde_vst1_lane_f32(simde_float32_t *ptr, simde_float32x2_t val, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) SIMDE_CONSTIFY_2_NO_RESULT_(vst1_lane_f32, HEDLEY_UNREACHABLE(), lane, ptr, val); #else simde_float32x2_private val_ = simde_float32x2_to_private(val); *ptr = val_.values[lane]; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1_lane_f32 #define vst1_lane_f32(a, b, c) simde_vst1_lane_f32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1_lane_f64(simde_float64_t *ptr, simde_float64x1_t val, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) (void) lane; vst1_lane_f64(ptr, val, 0); #else simde_float64x1_private val_ = simde_float64x1_to_private(val); *ptr = val_.values[lane]; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vst1_lane_f64 #define vst1_lane_f64(a, b, c) simde_vst1_lane_f64((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1_lane_s8(int8_t *ptr, simde_int8x8_t val, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_8_NO_RESULT_(vst1_lane_s8, HEDLEY_UNREACHABLE(), lane, ptr, val); #else simde_int8x8_private val_ = simde_int8x8_to_private(val); *ptr = val_.values[lane]; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1_lane_s8 #define vst1_lane_s8(a, b, c) simde_vst1_lane_s8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1_lane_s16(int16_t *ptr, simde_int16x4_t val, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_4_NO_RESULT_(vst1_lane_s16, HEDLEY_UNREACHABLE(), lane, ptr, val); #else simde_int16x4_private val_ = simde_int16x4_to_private(val); *ptr = val_.values[lane]; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1_lane_s16 #define vst1_lane_s16(a, b, c) simde_vst1_lane_s16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1_lane_s32(int32_t *ptr, simde_int32x2_t val, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_2_NO_RESULT_(vst1_lane_s32, HEDLEY_UNREACHABLE(), lane, ptr, val); #else simde_int32x2_private val_ = simde_int32x2_to_private(val); *ptr = val_.values[lane]; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1_lane_s32 #define vst1_lane_s32(a, b, c) simde_vst1_lane_s32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1_lane_s64(int64_t *ptr, simde_int64x1_t val, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) (void) lane; vst1_lane_s64(ptr, val, 0); #else simde_int64x1_private val_ = simde_int64x1_to_private(val); *ptr = val_.values[lane]; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1_lane_s64 #define vst1_lane_s64(a, b, c) simde_vst1_lane_s64((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1_lane_u8(uint8_t *ptr, simde_uint8x8_t val, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_8_NO_RESULT_(vst1_lane_u8, HEDLEY_UNREACHABLE(), lane, ptr, val); #else simde_uint8x8_private val_ = simde_uint8x8_to_private(val); *ptr = val_.values[lane]; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1_lane_u8 #define vst1_lane_u8(a, b, c) simde_vst1_lane_u8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1_lane_u16(uint16_t *ptr, simde_uint16x4_t val, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_4_NO_RESULT_(vst1_lane_u16, HEDLEY_UNREACHABLE(), lane, ptr, val); #else simde_uint16x4_private val_ = simde_uint16x4_to_private(val); *ptr = val_.values[lane]; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1_lane_u16 #define vst1_lane_u16(a, b, c) simde_vst1_lane_u16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1_lane_u32(uint32_t *ptr, simde_uint32x2_t val, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_2_NO_RESULT_(vst1_lane_u32, HEDLEY_UNREACHABLE(), lane, ptr, val); #else simde_uint32x2_private val_ = simde_uint32x2_to_private(val); *ptr = val_.values[lane]; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1_lane_u32 #define vst1_lane_u32(a, b, c) simde_vst1_lane_u32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1_lane_u64(uint64_t *ptr, simde_uint64x1_t val, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) (void) lane; vst1_lane_u64(ptr, val, 0); #else simde_uint64x1_private val_ = simde_uint64x1_to_private(val); *ptr = val_.values[lane]; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1_lane_u64 #define vst1_lane_u64(a, b, c) simde_vst1_lane_u64((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1q_lane_f32(simde_float32_t *ptr, simde_float32x4_t val, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_4_NO_RESULT_(vst1q_lane_f32, HEDLEY_UNREACHABLE(), lane, ptr, val); #else simde_float32x4_private val_ = simde_float32x4_to_private(val); *ptr = val_.values[lane]; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1q_lane_f32 #define vst1q_lane_f32(a, b, c) simde_vst1q_lane_f32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1q_lane_f64(simde_float64_t *ptr, simde_float64x2_t val, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) SIMDE_CONSTIFY_2_NO_RESULT_(vst1q_lane_f64, HEDLEY_UNREACHABLE(), lane, ptr, val); #else simde_float64x2_private val_ = simde_float64x2_to_private(val); *ptr = val_.values[lane]; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vst1q_lane_f64 #define vst1q_lane_f64(a, b, c) simde_vst1q_lane_f64((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1q_lane_s8(int8_t *ptr, simde_int8x16_t val, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_16_NO_RESULT_(vst1q_lane_s8, HEDLEY_UNREACHABLE(), lane, ptr, val); #else simde_int8x16_private val_ = simde_int8x16_to_private(val); *ptr = val_.values[lane]; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1q_lane_s8 #define vst1q_lane_s8(a, b, c) simde_vst1q_lane_s8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1q_lane_s16(int16_t *ptr, simde_int16x8_t val, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_8_NO_RESULT_(vst1q_lane_s16, HEDLEY_UNREACHABLE(), lane, ptr, val); #else simde_int16x8_private val_ = simde_int16x8_to_private(val); *ptr = val_.values[lane]; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1q_lane_s16 #define vst1q_lane_s16(a, b, c) simde_vst1q_lane_s16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1q_lane_s32(int32_t *ptr, simde_int32x4_t val, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_4_NO_RESULT_(vst1q_lane_s32, HEDLEY_UNREACHABLE(), lane, ptr, val); #else simde_int32x4_private val_ = simde_int32x4_to_private(val); *ptr = val_.values[lane]; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1q_lane_s32 #define vst1q_lane_s32(a, b, c) simde_vst1q_lane_s32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1q_lane_s64(int64_t *ptr, simde_int64x2_t val, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_2_NO_RESULT_(vst1q_lane_s64, HEDLEY_UNREACHABLE(), lane, ptr, val); #else simde_int64x2_private val_ = simde_int64x2_to_private(val); *ptr = val_.values[lane]; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1q_lane_s64 #define vst1q_lane_s64(a, b, c) simde_vst1q_lane_s64((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1q_lane_u8(uint8_t *ptr, simde_uint8x16_t val, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_16_NO_RESULT_(vst1q_lane_u8, HEDLEY_UNREACHABLE(), lane, ptr, val); #else simde_uint8x16_private val_ = simde_uint8x16_to_private(val); *ptr = val_.values[lane]; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1q_lane_u8 #define vst1q_lane_u8(a, b, c) simde_vst1q_lane_u8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1q_lane_u16(uint16_t *ptr, simde_uint16x8_t val, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_8_NO_RESULT_(vst1q_lane_u16, HEDLEY_UNREACHABLE(), lane, ptr, val); #else simde_uint16x8_private val_ = simde_uint16x8_to_private(val); *ptr = val_.values[lane]; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1q_lane_u16 #define vst1q_lane_u16(a, b, c) simde_vst1q_lane_u16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1q_lane_u32(uint32_t *ptr, simde_uint32x4_t val, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_4_NO_RESULT_(vst1q_lane_u32, HEDLEY_UNREACHABLE(), lane, ptr, val); #else simde_uint32x4_private val_ = simde_uint32x4_to_private(val); *ptr = val_.values[lane]; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1q_lane_u32 #define vst1q_lane_u32(a, b, c) simde_vst1q_lane_u32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst1q_lane_u64(uint64_t *ptr, simde_uint64x2_t val, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_CONSTIFY_2_NO_RESULT_(vst1q_lane_u64, HEDLEY_UNREACHABLE(), lane, ptr, val); #else simde_uint64x2_private val_ = simde_uint64x2_to_private(val); *ptr = val_.values[lane]; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst1q_lane_u64 #define vst1q_lane_u64(a, b, c) simde_vst1q_lane_u64((a), (b), (c)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_ST1_LANE_H) */ simde-0.7.2/simde/arm/neon/st3.h000066400000000000000000000342771400333146700163400ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher */ #if !defined(SIMDE_ARM_NEON_ST3_H) #define SIMDE_ARM_NEON_ST3_H #include "types.h" #include "st1.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ #if !defined(SIMDE_BUG_INTEL_857088) SIMDE_FUNCTION_ATTRIBUTES void simde_vst3_f32(simde_float32_t *ptr, simde_float32x2x3_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst3_f32(ptr, val); #else simde_float32_t buf[6]; simde_float32x2_private a_[3] = { simde_float32x2_to_private(val.val[0]), simde_float32x2_to_private(val.val[1]), simde_float32x2_to_private(val.val[2]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { buf[i] = a_[i % 3].values[i / 3]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst3_f32 #define vst3_f32(a, b) simde_vst3_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst3_f64(simde_float64_t *ptr, simde_float64x1x3_t val) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) vst3_f64(ptr, val); #else simde_float64_t buf[3]; simde_float64x1_private a_[3] = { simde_float64x1_to_private(val.val[0]), simde_float64x1_to_private(val.val[1]), simde_float64x1_to_private(val.val[2]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { buf[i] = a_[i % 3].values[i / 3]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vst3_f64 #define vst3_f64(a, b) simde_vst3_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst3_s8(int8_t *ptr, simde_int8x8x3_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst3_s8(ptr, val); #else int8_t buf[24]; simde_int8x8_private a_[3] = { simde_int8x8_to_private(val.val[0]), simde_int8x8_to_private(val.val[1]), simde_int8x8_to_private(val.val[2]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { buf[i] = a_[i % 3].values[i / 3]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst3_s8 #define vst3_s8(a, b) simde_vst3_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst3_s16(int16_t *ptr, simde_int16x4x3_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst3_s16(ptr, val); #else int16_t buf[12]; simde_int16x4_private a_[3] = { simde_int16x4_to_private(val.val[0]), simde_int16x4_to_private(val.val[1]), simde_int16x4_to_private(val.val[2]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { buf[i] = a_[i % 3].values[i / 3]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst3_s16 #define vst3_s16(a, b) simde_vst3_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst3_s32(int32_t *ptr, simde_int32x2x3_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst3_s32(ptr, val); #else int32_t buf[6]; simde_int32x2_private a_[3] = { simde_int32x2_to_private(val.val[0]), simde_int32x2_to_private(val.val[1]), simde_int32x2_to_private(val.val[2]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { buf[i] = a_[i % 3].values[i / 3]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst3_s32 #define vst3_s32(a, b) simde_vst3_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst3_s64(int64_t *ptr, simde_int64x1x3_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst3_s64(ptr, val); #else int64_t buf[3]; simde_int64x1_private a_[3] = { simde_int64x1_to_private(val.val[0]), simde_int64x1_to_private(val.val[1]), simde_int64x1_to_private(val.val[2]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { buf[i] = a_[i % 3].values[i / 3]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vst3_s64 #define vst3_s64(a, b) simde_vst3_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst3_u8(uint8_t *ptr, simde_uint8x8x3_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst3_u8(ptr, val); #else uint8_t buf[24]; simde_uint8x8_private a_[3] = { simde_uint8x8_to_private(val.val[0]), simde_uint8x8_to_private(val.val[1]), simde_uint8x8_to_private(val.val[2]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { buf[i] = a_[i % 3].values[i / 3]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst3_u8 #define vst3_u8(a, b) simde_vst3_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst3_u16(uint16_t *ptr, simde_uint16x4x3_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst3_u16(ptr, val); #else uint16_t buf[12]; simde_uint16x4_private a_[3] = { simde_uint16x4_to_private(val.val[0]), simde_uint16x4_to_private(val.val[1]), simde_uint16x4_to_private(val.val[2]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { buf[i] = a_[i % 3].values[i / 3]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst3_u16 #define vst3_u16(a, b) simde_vst3_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst3_u32(uint32_t *ptr, simde_uint32x2x3_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst3_u32(ptr, val); #else uint32_t buf[6]; simde_uint32x2_private a_[3] = { simde_uint32x2_to_private(val.val[0]), simde_uint32x2_to_private(val.val[1]), simde_uint32x2_to_private(val.val[2]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { buf[i] = a_[i % 3].values[i / 3]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst3_u32 #define vst3_u32(a, b) simde_vst3_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst3_u64(uint64_t *ptr, simde_uint64x1x3_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst3_u64(ptr, val); #else uint64_t buf[3]; simde_uint64x1_private a_[3] = { simde_uint64x1_to_private(val.val[0]), simde_uint64x1_to_private(val.val[1]), simde_uint64x1_to_private(val.val[2]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { buf[i] = a_[i % 3].values[i / 3]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vst3_u64 #define vst3_u64(a, b) simde_vst3_u64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst3q_f32(simde_float32_t *ptr, simde_float32x4x3_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst3q_f32(ptr, val); #else simde_float32_t buf[12]; simde_float32x4_private a_[3] = { simde_float32x4_to_private(val.val[0]), simde_float32x4_to_private(val.val[1]), simde_float32x4_to_private(val.val[2]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { buf[i] = a_[i % 3].values[i / 3]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst3q_f32 #define vst3q_f32(a, b) simde_vst3q_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst3q_f64(simde_float64_t *ptr, simde_float64x2x3_t val) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) vst3q_f64(ptr, val); #else simde_float64_t buf[6]; simde_float64x2_private a_[3] = { simde_float64x2_to_private(val.val[0]), simde_float64x2_to_private(val.val[1]), simde_float64x2_to_private(val.val[2]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { buf[i] = a_[i % 3].values[i / 3]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vst3q_f64 #define vst3q_f64(a, b) simde_vst3q_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst3q_s8(int8_t *ptr, simde_int8x16x3_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst3q_s8(ptr, val); #else int8_t buf[48]; simde_int8x16_private a_[3] = { simde_int8x16_to_private(val.val[0]), simde_int8x16_to_private(val.val[1]), simde_int8x16_to_private(val.val[2]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { buf[i] = a_[i % 3].values[i / 3]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst3q_s8 #define vst3q_s8(a, b) simde_vst3q_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst3q_s16(int16_t *ptr, simde_int16x8x3_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst3q_s16(ptr, val); #else int16_t buf[24]; simde_int16x8_private a_[3] = { simde_int16x8_to_private(val.val[0]), simde_int16x8_to_private(val.val[1]), simde_int16x8_to_private(val.val[2]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { buf[i] = a_[i % 3].values[i / 3]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst3q_s16 #define vst3q_s16(a, b) simde_vst3q_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst3q_s32(int32_t *ptr, simde_int32x4x3_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst3q_s32(ptr, val); #else int32_t buf[12]; simde_int32x4_private a_[3] = { simde_int32x4_to_private(val.val[0]), simde_int32x4_to_private(val.val[1]), simde_int32x4_to_private(val.val[2]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { buf[i] = a_[i % 3].values[i / 3]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst3q_s32 #define vst3q_s32(a, b) simde_vst3q_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst3q_s64(int64_t *ptr, simde_int64x2x3_t val) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) vst3q_s64(ptr, val); #else int64_t buf[6]; simde_int64x2_private a_[3] = { simde_int64x2_to_private(val.val[0]), simde_int64x2_to_private(val.val[1]), simde_int64x2_to_private(val.val[2]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { buf[i] = a_[i % 3].values[i / 3]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vst3q_s64 #define vst3q_s64(a, b) simde_vst3q_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst3q_u8(uint8_t *ptr, simde_uint8x16x3_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst3q_u8(ptr, val); #else uint8_t buf[48]; simde_uint8x16_private a_[3] = { simde_uint8x16_to_private(val.val[0]), simde_uint8x16_to_private(val.val[1]), simde_uint8x16_to_private(val.val[2]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { buf[i] = a_[i % 3].values[i / 3]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst3q_u8 #define vst3q_u8(a, b) simde_vst3q_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst3q_u16(uint16_t *ptr, simde_uint16x8x3_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst3q_u16(ptr, val); #else uint16_t buf[24]; simde_uint16x8_private a_[3] = { simde_uint16x8_to_private(val.val[0]), simde_uint16x8_to_private(val.val[1]), simde_uint16x8_to_private(val.val[2]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { buf[i] = a_[i % 3].values[i / 3]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst3q_u16 #define vst3q_u16(a, b) simde_vst3q_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst3q_u32(uint32_t *ptr, simde_uint32x4x3_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst3q_u32(ptr, val); #else uint32_t buf[12]; simde_uint32x4_private a_[3] = { simde_uint32x4_to_private(val.val[0]), simde_uint32x4_to_private(val.val[1]), simde_uint32x4_to_private(val.val[2]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { buf[i] = a_[i % 3].values[i / 3]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst3q_u32 #define vst3q_u32(a, b) simde_vst3q_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst3q_u64(uint64_t *ptr, simde_uint64x2x3_t val) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) vst3q_u64(ptr, val); #else uint64_t buf[6]; simde_uint64x2_private a_[3] = { simde_uint64x2_to_private(val.val[0]), simde_uint64x2_to_private(val.val[1]), simde_uint64x2_to_private(val.val[2]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { buf[i] = a_[i % 3].values[i / 3]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vst3q_u64 #define vst3q_u64(a, b) simde_vst3q_u64((a), (b)) #endif #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_ST3_H) */ simde-0.7.2/simde/arm/neon/st4.h000066400000000000000000000372221400333146700163320ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher */ #if !defined(SIMDE_ARM_NEON_ST4_H) #define SIMDE_ARM_NEON_ST4_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ #if !defined(SIMDE_BUG_INTEL_857088) SIMDE_FUNCTION_ATTRIBUTES void simde_vst4_f32(simde_float32_t *ptr, simde_float32x2x4_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst4_f32(ptr, val); #else simde_float32_t buf[8]; simde_float32x2_private a_[4] = { simde_float32x2_to_private(val.val[0]), simde_float32x2_to_private(val.val[1]), simde_float32x2_to_private(val.val[2]), simde_float32x2_to_private(val.val[3]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { buf[i] = a_[i % 4].values[i / 4]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst4_f32 #define vst4_f32(a, b) simde_vst4_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst4_f64(simde_float64_t *ptr, simde_float64x1x4_t val) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) vst4_f64(ptr, val); #else simde_float64_t buf[4]; simde_float64x1_private a_[4] = { simde_float64x1_to_private(val.val[0]), simde_float64x1_to_private(val.val[1]), simde_float64x1_to_private(val.val[2]), simde_float64x1_to_private(val.val[3]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { buf[i] = a_[i % 4].values[i / 4]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vst4_f64 #define vst4_f64(a, b) simde_vst4_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst4_s8(int8_t *ptr, simde_int8x8x4_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst4_s8(ptr, val); #else int8_t buf[32]; simde_int8x8_private a_[4] = { simde_int8x8_to_private(val.val[0]), simde_int8x8_to_private(val.val[1]), simde_int8x8_to_private(val.val[2]), simde_int8x8_to_private(val.val[3]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { buf[i] = a_[i % 4].values[i / 4]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst4_s8 #define vst4_s8(a, b) simde_vst4_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst4_s16(int16_t *ptr, simde_int16x4x4_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst4_s16(ptr, val); #else int16_t buf[16]; simde_int16x4_private a_[4] = { simde_int16x4_to_private(val.val[0]), simde_int16x4_to_private(val.val[1]), simde_int16x4_to_private(val.val[2]), simde_int16x4_to_private(val.val[3]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { buf[i] = a_[i % 4].values[i / 4]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst4_s16 #define vst4_s16(a, b) simde_vst4_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst4_s32(int32_t *ptr, simde_int32x2x4_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst4_s32(ptr, val); #else int32_t buf[8]; simde_int32x2_private a_[4] = { simde_int32x2_to_private(val.val[0]), simde_int32x2_to_private(val.val[1]), simde_int32x2_to_private(val.val[2]), simde_int32x2_to_private(val.val[3]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { buf[i] = a_[i % 4].values[i / 4]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst4_s32 #define vst4_s32(a, b) simde_vst4_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst4_s64(int64_t *ptr, simde_int64x1x4_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst4_s64(ptr, val); #else int64_t buf[4]; simde_int64x1_private a_[4] = { simde_int64x1_to_private(val.val[0]), simde_int64x1_to_private(val.val[1]), simde_int64x1_to_private(val.val[2]), simde_int64x1_to_private(val.val[3]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { buf[i] = a_[i % 4].values[i / 4]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vst4_s64 #define vst4_s64(a, b) simde_vst4_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst4_u8(uint8_t *ptr, simde_uint8x8x4_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst4_u8(ptr, val); #else uint8_t buf[32]; simde_uint8x8_private a_[4] = { simde_uint8x8_to_private(val.val[0]), simde_uint8x8_to_private(val.val[1]), simde_uint8x8_to_private(val.val[2]), simde_uint8x8_to_private(val.val[3]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { buf[i] = a_[i % 4].values[i / 4]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst4_u8 #define vst4_u8(a, b) simde_vst4_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst4_u16(uint16_t *ptr, simde_uint16x4x4_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst4_u16(ptr, val); #else uint16_t buf[16]; simde_uint16x4_private a_[4] = { simde_uint16x4_to_private(val.val[0]), simde_uint16x4_to_private(val.val[1]), simde_uint16x4_to_private(val.val[2]), simde_uint16x4_to_private(val.val[3]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { buf[i] = a_[i % 4].values[i / 4]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst4_u16 #define vst4_u16(a, b) simde_vst4_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst4_u32(uint32_t *ptr, simde_uint32x2x4_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst4_u32(ptr, val); #else uint32_t buf[8]; simde_uint32x2_private a_[4] = { simde_uint32x2_to_private(val.val[0]), simde_uint32x2_to_private(val.val[1]), simde_uint32x2_to_private(val.val[2]), simde_uint32x2_to_private(val.val[3]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { buf[i] = a_[i % 4].values[i / 4]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst4_u32 #define vst4_u32(a, b) simde_vst4_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst4_u64(uint64_t *ptr, simde_uint64x1x4_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst4_u64(ptr, val); #else uint64_t buf[4]; simde_uint64x1_private a_[4] = { simde_uint64x1_to_private(val.val[0]), simde_uint64x1_to_private(val.val[1]), simde_uint64x1_to_private(val.val[2]), simde_uint64x1_to_private(val.val[3]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { buf[i] = a_[i % 4].values[i / 4]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vst4_u64 #define vst4_u64(a, b) simde_vst4_u64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst4q_f32(simde_float32_t *ptr, simde_float32x4x4_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst4q_f32(ptr, val); #else simde_float32_t buf[16]; simde_float32x4_private a_[4] = { simde_float32x4_to_private(val.val[0]), simde_float32x4_to_private(val.val[1]), simde_float32x4_to_private(val.val[2]), simde_float32x4_to_private(val.val[3]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { buf[i] = a_[i % 4].values[i / 4]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst4q_f32 #define vst4q_f32(a, b) simde_vst4q_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst4q_f64(simde_float64_t *ptr, simde_float64x2x4_t val) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) vst4q_f64(ptr, val); #else simde_float64_t buf[8]; simde_float64x2_private a_[4] = { simde_float64x2_to_private(val.val[0]), simde_float64x2_to_private(val.val[1]), simde_float64x2_to_private(val.val[2]), simde_float64x2_to_private(val.val[3]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { buf[i] = a_[i % 4].values[i / 4]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vst4q_f64 #define vst4q_f64(a, b) simde_vst4q_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst4q_s8(int8_t *ptr, simde_int8x16x4_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst4q_s8(ptr, val); #else int8_t buf[64]; simde_int8x16_private a_[4] = { simde_int8x16_to_private(val.val[0]), simde_int8x16_to_private(val.val[1]), simde_int8x16_to_private(val.val[2]), simde_int8x16_to_private(val.val[3]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { buf[i] = a_[i % 4].values[i / 4]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst4q_s8 #define vst4q_s8(a, b) simde_vst4q_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst4q_s16(int16_t *ptr, simde_int16x8x4_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst4q_s16(ptr, val); #else int16_t buf[32]; simde_int16x8_private a_[4] = { simde_int16x8_to_private(val.val[0]), simde_int16x8_to_private(val.val[1]), simde_int16x8_to_private(val.val[2]), simde_int16x8_to_private(val.val[3]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { buf[i] = a_[i % 4].values[i / 4]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst4q_s16 #define vst4q_s16(a, b) simde_vst4q_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst4q_s32(int32_t *ptr, simde_int32x4x4_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst4q_s32(ptr, val); #else int32_t buf[16]; simde_int32x4_private a_[4] = { simde_int32x4_to_private(val.val[0]), simde_int32x4_to_private(val.val[1]), simde_int32x4_to_private(val.val[2]), simde_int32x4_to_private(val.val[3]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { buf[i] = a_[i % 4].values[i / 4]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst4q_s32 #define vst4q_s32(a, b) simde_vst4q_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst4q_s64(int64_t *ptr, simde_int64x2x4_t val) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) vst4q_s64(ptr, val); #else int64_t buf[8]; simde_int64x2_private a_[4] = { simde_int64x2_to_private(val.val[0]), simde_int64x2_to_private(val.val[1]), simde_int64x2_to_private(val.val[2]), simde_int64x2_to_private(val.val[3]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { buf[i] = a_[i % 4].values[i / 4]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vst4q_s64 #define vst4q_s64(a, b) simde_vst4q_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst4q_u8(uint8_t *ptr, simde_uint8x16x4_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst4q_u8(ptr, val); #else uint8_t buf[64]; simde_uint8x16_private a_[4] = { simde_uint8x16_to_private(val.val[0]), simde_uint8x16_to_private(val.val[1]), simde_uint8x16_to_private(val.val[2]), simde_uint8x16_to_private(val.val[3]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { buf[i] = a_[i % 4].values[i / 4]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst4q_u8 #define vst4q_u8(a, b) simde_vst4q_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst4q_u16(uint16_t *ptr, simde_uint16x8x4_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst4q_u16(ptr, val); #else uint16_t buf[32]; simde_uint16x8_private a_[4] = { simde_uint16x8_to_private(val.val[0]), simde_uint16x8_to_private(val.val[1]), simde_uint16x8_to_private(val.val[2]), simde_uint16x8_to_private(val.val[3]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { buf[i] = a_[i % 4].values[i / 4]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst4q_u16 #define vst4q_u16(a, b) simde_vst4q_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst4q_u32(uint32_t *ptr, simde_uint32x4x4_t val) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst4q_u32(ptr, val); #else uint32_t buf[16]; simde_uint32x4_private a_[4] = { simde_uint32x4_to_private(val.val[0]), simde_uint32x4_to_private(val.val[1]), simde_uint32x4_to_private(val.val[2]), simde_uint32x4_to_private(val.val[3]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { buf[i] = a_[i % 4].values[i / 4]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vst4q_u32 #define vst4q_u32(a, b) simde_vst4q_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_vst4q_u64(uint64_t *ptr, simde_uint64x2x4_t val) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) vst4q_u64(ptr, val); #else uint64_t buf[8]; simde_uint64x2_private a_[4] = { simde_uint64x2_to_private(val.val[0]), simde_uint64x2_to_private(val.val[1]), simde_uint64x2_to_private(val.val[2]), simde_uint64x2_to_private(val.val[3]) }; for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { buf[i] = a_[i % 4].values[i / 4]; } simde_memcpy(ptr, buf, sizeof(buf)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vst4q_u64 #define vst4q_u64(a, b) simde_vst4q_u64((a), (b)) #endif #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_ST4_H) */ simde-0.7.2/simde/arm/neon/sub.h000066400000000000000000000443251400333146700164130ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_SUB_H) #define SIMDE_ARM_NEON_SUB_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vsub_f32(simde_float32x2_t a, simde_float32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsub_f32(a, b); #else simde_float32x2_private r_, a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values - b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i]; } #endif return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsub_f32 #define vsub_f32(a, b) simde_vsub_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x1_t simde_vsub_f64(simde_float64x1_t a, simde_float64x1_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vsub_f64(a, b); #else simde_float64x1_private r_, a_ = simde_float64x1_to_private(a), b_ = simde_float64x1_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values - b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i]; } #endif return simde_float64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vsub_f64 #define vsub_f64(a, b) simde_vsub_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vsub_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsub_s8(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_sub_pi8(a, b); #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values - b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i]; } #endif return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsub_s8 #define vsub_s8(a, b) simde_vsub_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vsub_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsub_s16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_sub_pi16(a, b); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values - b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i]; } #endif return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsub_s16 #define vsub_s16(a, b) simde_vsub_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vsub_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsub_s32(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_sub_pi32(a, b); #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values - b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i]; } #endif return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsub_s32 #define vsub_s32(a, b) simde_vsub_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vsub_s64(simde_int64x1_t a, simde_int64x1_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsub_s64(a, b); #else simde_int64x1_private r_, a_ = simde_int64x1_to_private(a), b_ = simde_int64x1_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values - b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i]; } #endif return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsub_s64 #define vsub_s64(a, b) simde_vsub_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vsub_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsub_u8(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_sub_pi8(a, b); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values - b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i]; } #endif return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsub_u8 #define vsub_u8(a, b) simde_vsub_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vsub_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsub_u16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_sub_pi16(a, b); #else simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a), b_ = simde_uint16x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values - b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i]; } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsub_u16 #define vsub_u16(a, b) simde_vsub_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vsub_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsub_u32(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_sub_pi32(a, b); #else simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a), b_ = simde_uint32x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values - b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i]; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsub_u32 #define vsub_u32(a, b) simde_vsub_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vsub_u64(simde_uint64x1_t a, simde_uint64x1_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsub_u64(a, b); #else simde_uint64x1_private r_, a_ = simde_uint64x1_to_private(a), b_ = simde_uint64x1_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values - b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i]; } #endif return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsub_u64 #define vsub_u64(a, b) simde_vsub_u64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vsubq_f32(simde_float32x4_t a, simde_float32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsubq_f32(a, b); #elif defined(SIMDE_X86_SSE_NATIVE) return _mm_sub_ps(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_POWER_ALTIVEC_VECTOR(float) a_ , b_, r_; a_ = a; b_ = b; r_ = vec_sub(a_, b_); return r_; #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f32x4_sub(a, b); #else simde_float32x4_private r_, a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values - b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i]; } #endif return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsubq_f32 #define vsubq_f32(a, b) simde_vsubq_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vsubq_f64(simde_float64x2_t a, simde_float64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vsubq_f64(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_sub_pd(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return vec_sub(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_sub(a, b); #else simde_float64x2_private r_, a_ = simde_float64x2_to_private(a), b_ = simde_float64x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values - b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i]; } #endif return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vsubq_f64 #define vsubq_f64(a, b) simde_vsubq_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vsubq_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsubq_s8(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_sub_epi8(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_sub(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i8x16_sub(a, b); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values - b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i]; } #endif return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsubq_s8 #define vsubq_s8(a, b) simde_vsubq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vsubq_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsubq_s16(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_sub_epi16(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_sub(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i16x8_sub(a, b); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values - b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i]; } #endif return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsubq_s16 #define vsubq_s16(a, b) simde_vsubq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vsubq_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsubq_s32(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_sub_epi32(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_sub(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i32x4_sub(a, b); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values - b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i]; } #endif return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsubq_s32 #define vsubq_s32(a, b) simde_vsubq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vsubq_s64(simde_int64x2_t a, simde_int64x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsubq_s64(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_sub_epi64(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return vec_sub(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i64x2_sub(a, b); #else simde_int64x2_private r_, a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values - b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i]; } #endif return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsubq_s64 #define vsubq_s64(a, b) simde_vsubq_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vsubq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsubq_u8(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_sub(a, b); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values - b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i]; } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsubq_u8 #define vsubq_u8(a, b) simde_vsubq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vsubq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsubq_u16(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_sub(a, b); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values - b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i]; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsubq_u16 #define vsubq_u16(a, b) simde_vsubq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vsubq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsubq_u32(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_sub(a, b); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values - b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i]; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsubq_u32 #define vsubq_u32(a, b) simde_vsubq_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vsubq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsubq_u64(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return vec_sub(a, b); #else simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a), b_ = simde_uint64x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.values = a_.values - b_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i]; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsubq_u64 #define vsubq_u64(a, b) simde_vsubq_u64((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_SUB_H) */ simde-0.7.2/simde/arm/neon/subl.h000066400000000000000000000075101400333146700165620ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_SUBL_H) #define SIMDE_ARM_NEON_SUBL_H #include "sub.h" #include "movl.h" #include "movl_high.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vsubl_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsubl_s8(a, b); #else return simde_vsubq_s16(simde_vmovl_s8(a), simde_vmovl_s8(b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsubl_s8 #define vsubl_s8(a, b) simde_vsubl_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vsubl_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsubl_s16(a, b); #else return simde_vsubq_s32(simde_vmovl_s16(a), simde_vmovl_s16(b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsubl_s16 #define vsubl_s16(a, b) simde_vsubl_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vsubl_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsubl_s32(a, b); #else return simde_vsubq_s64(simde_vmovl_s32(a), simde_vmovl_s32(b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsubl_s32 #define vsubl_s32(a, b) simde_vsubl_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vsubl_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsubl_u8(a, b); #else return simde_vsubq_u16(simde_vmovl_u8(a), simde_vmovl_u8(b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsubl_u8 #define vsubl_u8(a, b) simde_vsubl_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vsubl_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsubl_u16(a, b); #else return simde_vsubq_u32(simde_vmovl_u16(a), simde_vmovl_u16(b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsubl_u16 #define vsubl_u16(a, b) simde_vsubl_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vsubl_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsubl_u32(a, b); #else return simde_vsubq_u64(simde_vmovl_u32(a), simde_vmovl_u32(b)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsubl_u32 #define vsubl_u32(a, b) simde_vsubl_u32((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_SUBL_H) */ simde-0.7.2/simde/arm/neon/subw.h000066400000000000000000000163721400333146700166030ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_SUBW_H) #define SIMDE_ARM_NEON_SUBW_H #include "types.h" #include "sub.h" #include "movl.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vsubw_s8(simde_int16x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsubw_s8(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vsubq_s16(a, simde_vmovl_s8(b)); #else simde_int16x8_private r_; simde_int16x8_private a_ = simde_int16x8_to_private(a); simde_int8x8_private b_ = simde_int8x8_to_private(b); #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.values, b_.values); r_.values -= a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i]; } #endif return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsubw_s8 #define vsubw_s8(a, b) simde_vsubw_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vsubw_s16(simde_int32x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsubw_s16(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vsubq_s32(a, simde_vmovl_s16(b)); #else simde_int32x4_private r_; simde_int32x4_private a_ = simde_int32x4_to_private(a); simde_int16x4_private b_ = simde_int16x4_to_private(b); #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.values, b_.values); r_.values -= a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i]; } #endif return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsubw_s16 #define vsubw_s16(a, b) simde_vsubw_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vsubw_s32(simde_int64x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsubw_s32(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vsubq_s64(a, simde_vmovl_s32(b)); #else simde_int64x2_private r_; simde_int64x2_private a_ = simde_int64x2_to_private(a); simde_int32x2_private b_ = simde_int32x2_to_private(b); #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.values, b_.values); r_.values -= a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i]; } #endif return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsubw_s32 #define vsubw_s32(a, b) simde_vsubw_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vsubw_u8(simde_uint16x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsubw_u8(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vsubq_u16(a, simde_vmovl_u8(b)); #else simde_uint16x8_private r_; simde_uint16x8_private a_ = simde_uint16x8_to_private(a); simde_uint8x8_private b_ = simde_uint8x8_to_private(b); #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.values, b_.values); r_.values -= a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i]; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsubw_u8 #define vsubw_u8(a, b) simde_vsubw_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vsubw_u16(simde_uint32x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsubw_u16(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vsubq_u32(a, simde_vmovl_u16(b)); #else simde_uint32x4_private r_; simde_uint32x4_private a_ = simde_uint32x4_to_private(a); simde_uint16x4_private b_ = simde_uint16x4_to_private(b); #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.values, b_.values); r_.values -= a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i]; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsubw_u16 #define vsubw_u16(a, b) simde_vsubw_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vsubw_u32(simde_uint64x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsubw_u32(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vsubq_u64(a, simde_vmovl_u32(b)); #else simde_uint64x2_private r_; simde_uint64x2_private a_ = simde_uint64x2_to_private(a); simde_uint32x2_private b_ = simde_uint32x2_to_private(b); #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.values, b_.values); r_.values -= a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i]; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vsubw_u32 #define vsubw_u32(a, b) simde_vsubw_u32((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_SUBW_H) */ simde-0.7.2/simde/arm/neon/subw_high.h000066400000000000000000000175511400333146700176020ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_SUBW_HIGH_H) #define SIMDE_ARM_NEON_SUBW_HIGH_H #include "types.h" #include "movl.h" #include "sub.h" #include "get_high.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vsubw_high_s8(simde_int16x8_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vsubw_high_s8(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vsubq_s16(a, simde_vmovl_s8(simde_vget_high_s8(b))); #else simde_int16x8_private r_; simde_int16x8_private a_ = simde_int16x8_to_private(a); simde_int8x16_private b_ = simde_int8x16_to_private(b); #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.values, b_.values); r_.values -= a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; } #endif return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vsubw_high_s8 #define vsubw_high_s8(a, b) simde_vsubw_high_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vsubw_high_s16(simde_int32x4_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vsubw_high_s16(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vsubq_s32(a, simde_vmovl_s16(simde_vget_high_s16(b))); #else simde_int32x4_private r_; simde_int32x4_private a_ = simde_int32x4_to_private(a); simde_int16x8_private b_ = simde_int16x8_to_private(b); #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.values, b_.values); r_.values -= a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; } #endif return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vsubw_high_s16 #define vsubw_high_s16(a, b) simde_vsubw_high_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vsubw_high_s32(simde_int64x2_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vsubw_high_s32(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vsubq_s64(a, simde_vmovl_s32(simde_vget_high_s32(b))); #else simde_int64x2_private r_; simde_int64x2_private a_ = simde_int64x2_to_private(a); simde_int32x4_private b_ = simde_int32x4_to_private(b); #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.values, b_.values); r_.values -= a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; } #endif return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vsubw_high_s32 #define vsubw_high_s32(a, b) simde_vsubw_high_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vsubw_high_u8(simde_uint16x8_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vsubw_high_u8(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vsubq_u16(a, simde_vmovl_u8(simde_vget_high_u8(b))); #else simde_uint16x8_private r_; simde_uint16x8_private a_ = simde_uint16x8_to_private(a); simde_uint8x16_private b_ = simde_uint8x16_to_private(b); #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.values, b_.values); r_.values -= a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vsubw_high_u8 #define vsubw_high_u8(a, b) simde_vsubw_high_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vsubw_high_u16(simde_uint32x4_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vsubw_high_u16(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vsubq_u32(a, simde_vmovl_u16(simde_vget_high_u16(b))); #else simde_uint32x4_private r_; simde_uint32x4_private a_ = simde_uint32x4_to_private(a); simde_uint16x8_private b_ = simde_uint16x8_to_private(b); #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.values, b_.values); r_.values -= a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vsubw_high_u16 #define vsubw_high_u16(a, b) simde_vsubw_high_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vsubw_high_u32(simde_uint64x2_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vsubw_high_u32(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_vsubq_u64(a, simde_vmovl_u32(simde_vget_high_u32(b))); #else simde_uint64x2_private r_; simde_uint64x2_private a_ = simde_uint64x2_to_private(a); simde_uint32x4_private b_ = simde_uint32x4_to_private(b); #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.values, b_.values); r_.values -= a_.values; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = a_.values[i] - b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vsubw_high_u32 #define vsubw_high_u32(a, b) simde_vsubw_high_u32((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_SUBW_HIGH_H) */ simde-0.7.2/simde/arm/neon/tbl.h000066400000000000000000000171051400333146700163770ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_ARM_NEON_TBL_H) #define SIMDE_ARM_NEON_TBL_H #include "reinterpret.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vtbl1_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtbl1_u8(a, b); #elif defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_shuffle_pi8(a, _mm_or_si64(b, _mm_cmpgt_pi8(b, _mm_set1_pi8(7)))); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] < 8) ? a_.values[b_.values[i]] : 0; } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtbl1_u8 #define vtbl1_u8(a, b) simde_vtbl1_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vtbl1_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtbl1_s8(a, b); #else return simde_vreinterpret_s8_u8(simde_vtbl1_u8(simde_vreinterpret_u8_s8(a), simde_vreinterpret_u8_s8(b))); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtbl1_s8 #define vtbl1_s8(a, b) simde_vtbl1_s8((a), (b)) #endif #if !defined(SIMDE_BUG_INTEL_857088) SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vtbl2_u8(simde_uint8x8x2_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtbl2_u8(a, b); #elif defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) __m128i a128 = _mm_set_epi64(a.val[1], a.val[0]); __m128i b128 = _mm_set1_epi64(b); __m128i r128 = _mm_shuffle_epi8(a128, _mm_or_si128(b128, _mm_cmpgt_epi8(b128, _mm_set1_epi8(15)))); return _mm_movepi64_pi64(r128); #else simde_uint8x8_private r_, a_[2] = { simde_uint8x8_to_private(a.val[0]), simde_uint8x8_to_private(a.val[1]) }, b_ = simde_uint8x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] < 16) ? a_[b_.values[i] / 8].values[b_.values[i] & 7] : 0; } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtbl2_u8 #define vtbl2_u8(a, b) simde_vtbl2_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vtbl2_s8(simde_int8x8x2_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtbl2_s8(a, b); #else simde_uint8x8x2_t a_; simde_memcpy(&a_, &a, sizeof(a_)); return simde_vreinterpret_s8_u8(simde_vtbl2_u8(a_, simde_vreinterpret_u8_s8(b))); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtbl2_s8 #define vtbl2_s8(a, b) simde_vtbl2_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vtbl3_u8(simde_uint8x8x3_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtbl3_u8(a, b); #elif defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) __m128i b128 = _mm_set1_epi64(b); b128 = _mm_or_si128(b128, _mm_cmpgt_epi8(b128, _mm_set1_epi8(23))); __m128i r128_01 = _mm_shuffle_epi8(_mm_set_epi64(a.val[1], a.val[0]), b128); __m128i r128_2 = _mm_shuffle_epi8(_mm_set1_epi64(a.val[2]), b128); __m128i r128 = _mm_blendv_epi8(r128_01, r128_2, _mm_slli_epi32(b128, 3)); return _mm_movepi64_pi64(r128); #else simde_uint8x8_private r_, a_[3] = { simde_uint8x8_to_private(a.val[0]), simde_uint8x8_to_private(a.val[1]), simde_uint8x8_to_private(a.val[2]) }, b_ = simde_uint8x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] < 24) ? a_[b_.values[i] / 8].values[b_.values[i] & 7] : 0; } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtbl3_u8 #define vtbl3_u8(a, b) simde_vtbl3_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vtbl3_s8(simde_int8x8x3_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtbl3_s8(a, b); #else simde_uint8x8x3_t a_; simde_memcpy(&a_, &a, sizeof(a_)); return simde_vreinterpret_s8_u8(simde_vtbl3_u8(a_, simde_vreinterpret_u8_s8(b))); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtbl3_s8 #define vtbl3_s8(a, b) simde_vtbl3_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vtbl4_u8(simde_uint8x8x4_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtbl4_u8(a, b); #elif defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) __m128i b128 = _mm_set1_epi64(b); b128 = _mm_or_si128(b128, _mm_cmpgt_epi8(b128, _mm_set1_epi8(31))); __m128i r128_01 = _mm_shuffle_epi8(_mm_set_epi64(a.val[1], a.val[0]), b128); __m128i r128_23 = _mm_shuffle_epi8(_mm_set_epi64(a.val[3], a.val[2]), b128); __m128i r128 = _mm_blendv_epi8(r128_01, r128_23, _mm_slli_epi32(b128, 3)); return _mm_movepi64_pi64(r128); #else simde_uint8x8_private r_, a_[4] = { simde_uint8x8_to_private(a.val[0]), simde_uint8x8_to_private(a.val[1]), simde_uint8x8_to_private(a.val[2]), simde_uint8x8_to_private(a.val[3]) }, b_ = simde_uint8x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (b_.values[i] < 32) ? a_[b_.values[i] / 8].values[b_.values[i] & 7] : 0; } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtbl4_u8 #define vtbl4_u8(a, b) simde_vtbl4_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vtbl4_s8(simde_int8x8x4_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtbl4_s8(a, b); #else simde_uint8x8x4_t a_; simde_memcpy(&a_, &a, sizeof(a_)); return simde_vreinterpret_s8_u8(simde_vtbl4_u8(a_, simde_vreinterpret_u8_s8(b))); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtbl4_s8 #define vtbl4_s8(a, b) simde_vtbl4_s8((a), (b)) #endif #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_TBL_H) */ simde-0.7.2/simde/arm/neon/tbx.h000066400000000000000000000216411400333146700164130ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_ARM_NEON_TBX_H) #define SIMDE_ARM_NEON_TBX_H #include "reinterpret.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vtbx1_u8(simde_uint8x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtbx1_u8(a, b, c); #elif defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) __m128i a128 = _mm_set1_epi64(a); __m128i b128 = _mm_set1_epi64(b); __m128i c128 = _mm_set1_epi64(c); c128 = _mm_or_si128(c128, _mm_cmpgt_epi8(c128, _mm_set1_epi8(7))); __m128i r128 = _mm_shuffle_epi8(b128, c128); r128 = _mm_blendv_epi8(r128, a128, c128); return _mm_movepi64_pi64(r128); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b), c_ = simde_uint8x8_to_private(c); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (c_.values[i] < 8) ? b_.values[c_.values[i]] : a_.values[i]; } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtbx1_u8 #define vtbx1_u8(a, b, c) simde_vtbx1_u8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vtbx1_s8(simde_int8x8_t a, simde_int8x8_t b, simde_int8x8_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtbx1_s8(a, b, c); #else return simde_vreinterpret_s8_u8(simde_vtbx1_u8(simde_vreinterpret_u8_s8(a), simde_vreinterpret_u8_s8(b), simde_vreinterpret_u8_s8(c))); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtbx1_s8 #define vtbx1_s8(a, b, c) simde_vtbx1_s8((a), (b), (c)) #endif #if !defined(SIMDE_BUG_INTEL_857088) SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vtbx2_u8(simde_uint8x8_t a, simde_uint8x8x2_t b, simde_uint8x8_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtbx2_u8(a, b, c); #elif defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) __m128i a128 = _mm_set1_epi64(a); __m128i b128 = _mm_set_epi64(b.val[1], b.val[0]); __m128i c128 = _mm_set1_epi64(c); c128 = _mm_or_si128(c128, _mm_cmpgt_epi8(c128, _mm_set1_epi8(15))); __m128i r128 = _mm_shuffle_epi8(b128, c128); r128 = _mm_blendv_epi8(r128, a128, c128); return _mm_movepi64_pi64(r128); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), b_[2] = { simde_uint8x8_to_private(b.val[0]), simde_uint8x8_to_private(b.val[1]) }, c_ = simde_uint8x8_to_private(c); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (c_.values[i] < 16) ? b_[c_.values[i] / 8].values[c_.values[i] & 7] : a_.values[i]; } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtbx2_u8 #define vtbx2_u8(a, b, c) simde_vtbx2_u8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vtbx2_s8(simde_int8x8_t a, simde_int8x8x2_t b, simde_int8x8_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtbx2_s8(a, b, c); #else simde_uint8x8x2_t b_; simde_memcpy(&b_, &b, sizeof(b_)); return simde_vreinterpret_s8_u8(simde_vtbx2_u8(simde_vreinterpret_u8_s8(a), b_, simde_vreinterpret_u8_s8(c))); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtbx2_s8 #define vtbx2_s8(a, b, c) simde_vtbx2_s8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vtbx3_u8(simde_uint8x8_t a, simde_uint8x8x3_t b, simde_uint8x8_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtbx3_u8(a, b, c); #elif defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) __m128i a128 = _mm_set1_epi64(a); __m128i c128 = _mm_set1_epi64(c); c128 = _mm_or_si128(c128, _mm_cmpgt_epi8(c128, _mm_set1_epi8(23))); __m128i r128_01 = _mm_shuffle_epi8(_mm_set_epi64(b.val[1], b.val[0]), c128); __m128i r128_2 = _mm_shuffle_epi8(_mm_set1_epi64(b.val[2]), c128); __m128i r128 = _mm_blendv_epi8(r128_01, r128_2, _mm_slli_epi32(c128, 3)); r128 = _mm_blendv_epi8(r128, a128, c128); return _mm_movepi64_pi64(r128); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), b_[3] = { simde_uint8x8_to_private(b.val[0]), simde_uint8x8_to_private(b.val[1]), simde_uint8x8_to_private(b.val[2]) }, c_ = simde_uint8x8_to_private(c); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (c_.values[i] < 24) ? b_[c_.values[i] / 8].values[c_.values[i] & 7] : a_.values[i]; } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtbx3_u8 #define vtbx3_u8(a, b, c) simde_vtbx3_u8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vtbx3_s8(simde_int8x8_t a, simde_int8x8x3_t b, simde_int8x8_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtbx3_s8(a, b, c); #else simde_uint8x8x3_t b_; simde_memcpy(&b_, &b, sizeof(b_)); return simde_vreinterpret_s8_u8(simde_vtbx3_u8(simde_vreinterpret_u8_s8(a), b_, simde_vreinterpret_u8_s8(c))); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtbx3_s8 #define vtbx3_s8(a, b, c) simde_vtbx3_s8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vtbx4_u8(simde_uint8x8_t a, simde_uint8x8x4_t b, simde_uint8x8_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtbx4_u8(a, b, c); #elif defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) __m128i a128 = _mm_set1_epi64(a); __m128i c128 = _mm_set1_epi64(c); c128 = _mm_or_si128(c128, _mm_cmpgt_epi8(c128, _mm_set1_epi8(31))); __m128i r128_01 = _mm_shuffle_epi8(_mm_set_epi64(b.val[1], b.val[0]), c128); __m128i r128_23 = _mm_shuffle_epi8(_mm_set_epi64(b.val[3], b.val[2]), c128); __m128i r128 = _mm_blendv_epi8(r128_01, r128_23, _mm_slli_epi32(c128, 3)); r128 = _mm_blendv_epi8(r128, a128, c128); return _mm_movepi64_pi64(r128); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), b_[4] = { simde_uint8x8_to_private(b.val[0]), simde_uint8x8_to_private(b.val[1]), simde_uint8x8_to_private(b.val[2]), simde_uint8x8_to_private(b.val[3]) }, c_ = simde_uint8x8_to_private(c); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = (c_.values[i] < 32) ? b_[c_.values[i] / 8].values[c_.values[i] & 7] : a_.values[i]; } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtbx4_u8 #define vtbx4_u8(a, b, c) simde_vtbx4_u8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vtbx4_s8(simde_int8x8_t a, simde_int8x8x4_t b, simde_int8x8_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtbx4_s8(a, b, c); #else simde_uint8x8x4_t b_; simde_memcpy(&b_, &b, sizeof(b_)); return simde_vreinterpret_s8_u8(simde_vtbx4_u8(simde_vreinterpret_u8_s8(a), b_, simde_vreinterpret_u8_s8(c))); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtbx4_s8 #define vtbx4_s8(a, b, c) simde_vtbx4_s8((a), (b), (c)) #endif #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_TBX_H) */ simde-0.7.2/simde/arm/neon/trn.h000066400000000000000000000164131400333146700164220ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_TRN_H) && !defined(SIMDE_BUG_INTEL_857088) #define SIMDE_ARM_NEON_TRN_H #include "types.h" #include "trn1.h" #include "trn2.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2x2_t simde_vtrn_f32(simde_float32x2_t a, simde_float32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtrn_f32(a, b); #else simde_float32x2x2_t r = { { simde_vtrn1_f32(a, b), simde_vtrn2_f32(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtrn_f32 #define vtrn_f32(a, b) simde_vtrn_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8x2_t simde_vtrn_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtrn_s8(a, b); #else simde_int8x8x2_t r = { { simde_vtrn1_s8(a, b), simde_vtrn2_s8(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtrn_s8 #define vtrn_s8(a, b) simde_vtrn_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4x2_t simde_vtrn_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtrn_s16(a, b); #else simde_int16x4x2_t r = { { simde_vtrn1_s16(a, b), simde_vtrn2_s16(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtrn_s16 #define vtrn_s16(a, b) simde_vtrn_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2x2_t simde_vtrn_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtrn_s32(a, b); #else simde_int32x2x2_t r = { { simde_vtrn1_s32(a, b), simde_vtrn2_s32(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtrn_s32 #define vtrn_s32(a, b) simde_vtrn_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8x2_t simde_vtrn_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtrn_u8(a, b); #else simde_uint8x8x2_t r = { { simde_vtrn1_u8(a, b), simde_vtrn2_u8(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtrn_u8 #define vtrn_u8(a, b) simde_vtrn_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4x2_t simde_vtrn_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtrn_u16(a, b); #else simde_uint16x4x2_t r = { { simde_vtrn1_u16(a, b), simde_vtrn2_u16(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtrn_u16 #define vtrn_u16(a, b) simde_vtrn_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2x2_t simde_vtrn_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtrn_u32(a, b); #else simde_uint32x2x2_t r = { { simde_vtrn1_u32(a, b), simde_vtrn2_u32(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtrn_u32 #define vtrn_u32(a, b) simde_vtrn_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4x2_t simde_vtrnq_f32(simde_float32x4_t a, simde_float32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtrnq_f32(a, b); #else simde_float32x4x2_t r = { { simde_vtrn1q_f32(a, b), simde_vtrn2q_f32(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtrnq_f32 #define vtrnq_f32(a, b) simde_vtrnq_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16x2_t simde_vtrnq_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtrnq_s8(a, b); #else simde_int8x16x2_t r = { { simde_vtrn1q_s8(a, b), simde_vtrn2q_s8(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtrnq_s8 #define vtrnq_s8(a, b) simde_vtrnq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8x2_t simde_vtrnq_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtrnq_s16(a, b); #else simde_int16x8x2_t r = { { simde_vtrn1q_s16(a, b), simde_vtrn2q_s16(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtrnq_s16 #define vtrnq_s16(a, b) simde_vtrnq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4x2_t simde_vtrnq_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtrnq_s32(a, b); #else simde_int32x4x2_t r = { { simde_vtrn1q_s32(a, b), simde_vtrn2q_s32(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtrnq_s32 #define vtrnq_s32(a, b) simde_vtrnq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16x2_t simde_vtrnq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtrnq_u8(a, b); #else simde_uint8x16x2_t r = { { simde_vtrn1q_u8(a, b), simde_vtrn2q_u8(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtrnq_u8 #define vtrnq_u8(a, b) simde_vtrnq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8x2_t simde_vtrnq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtrnq_u16(a, b); #else simde_uint16x8x2_t r = { { simde_vtrn1q_u16(a, b), simde_vtrn2q_u16(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtrnq_u16 #define vtrnq_u16(a, b) simde_vtrnq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4x2_t simde_vtrnq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtrnq_u32(a, b); #else simde_uint32x4x2_t r = { { simde_vtrn1q_u32(a, b), simde_vtrn2q_u32(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtrnq_u32 #define vtrnq_u32(a, b) simde_vtrnq_u32((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_TRN_H) */ simde-0.7.2/simde/arm/neon/trn1.h000066400000000000000000000341741400333146700165070ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_TRN1_H) #define SIMDE_ARM_NEON_TRN1_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vtrn1_f32(simde_float32x2_t a, simde_float32x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn1_f32(a, b); #else simde_float32x2_private r_, a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx]; r_.values[idx | 1] = b_.values[idx]; } return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn1_f32 #define vtrn1_f32(a, b) simde_vtrn1_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vtrn1_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn1_s8(a, b); #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx]; r_.values[idx | 1] = b_.values[idx]; } return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn1_s8 #define vtrn1_s8(a, b) simde_vtrn1_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vtrn1_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn1_s16(a, b); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx]; r_.values[idx | 1] = b_.values[idx]; } return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn1_s16 #define vtrn1_s16(a, b) simde_vtrn1_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vtrn1_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn1_s32(a, b); #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx]; r_.values[idx | 1] = b_.values[idx]; } return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn1_s32 #define vtrn1_s32(a, b) simde_vtrn1_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vtrn1_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn1_u8(a, b); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx]; r_.values[idx | 1] = b_.values[idx]; } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn1_u8 #define vtrn1_u8(a, b) simde_vtrn1_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vtrn1_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn1_u16(a, b); #else simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a), b_ = simde_uint16x4_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx]; r_.values[idx | 1] = b_.values[idx]; } return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn1_u16 #define vtrn1_u16(a, b) simde_vtrn1_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vtrn1_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn1_u32(a, b); #else simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a), b_ = simde_uint32x2_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx]; r_.values[idx | 1] = b_.values[idx]; } return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn1_u32 #define vtrn1_u32(a, b) simde_vtrn1_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vtrn1q_f32(simde_float32x4_t a, simde_float32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn1q_f32(a, b); #else simde_float32x4_private r_, a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx]; r_.values[idx | 1] = b_.values[idx]; } return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn1q_f32 #define vtrn1q_f32(a, b) simde_vtrn1q_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vtrn1q_f64(simde_float64x2_t a, simde_float64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn1q_f64(a, b); #else simde_float64x2_private r_, a_ = simde_float64x2_to_private(a), b_ = simde_float64x2_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx]; r_.values[idx | 1] = b_.values[idx]; } return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn1q_f64 #define vtrn1q_f64(a, b) simde_vtrn1q_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vtrn1q_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn1q_s8(a, b); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx]; r_.values[idx | 1] = b_.values[idx]; } return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn1q_s8 #define vtrn1q_s8(a, b) simde_vtrn1q_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vtrn1q_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn1q_s16(a, b); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx]; r_.values[idx | 1] = b_.values[idx]; } return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn1q_s16 #define vtrn1q_s16(a, b) simde_vtrn1q_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vtrn1q_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn1q_s32(a, b); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx]; r_.values[idx | 1] = b_.values[idx]; } return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn1q_s32 #define vtrn1q_s32(a, b) simde_vtrn1q_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vtrn1q_s64(simde_int64x2_t a, simde_int64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn1q_s64(a, b); #else simde_int64x2_private r_, a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx]; r_.values[idx | 1] = b_.values[idx]; } return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn1q_s64 #define vtrn1q_s64(a, b) simde_vtrn1q_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vtrn1q_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn1q_u8(a, b); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx]; r_.values[idx | 1] = b_.values[idx]; } return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn1q_u8 #define vtrn1q_u8(a, b) simde_vtrn1q_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vtrn1q_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn1q_u16(a, b); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx]; r_.values[idx | 1] = b_.values[idx]; } return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn1q_u16 #define vtrn1q_u16(a, b) simde_vtrn1q_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vtrn1q_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn1q_u32(a, b); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx]; r_.values[idx | 1] = b_.values[idx]; } return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn1q_u32 #define vtrn1q_u32(a, b) simde_vtrn1q_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vtrn1q_u64(simde_uint64x2_t a, simde_uint64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn1q_u64(a, b); #else simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a), b_ = simde_uint64x2_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx]; r_.values[idx | 1] = b_.values[idx]; } return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn1q_u64 #define vtrn1q_u64(a, b) simde_vtrn1q_u64((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_TRN1_H) */ simde-0.7.2/simde/arm/neon/trn2.h000066400000000000000000000344031400333146700165030ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_TRN2_H) #define SIMDE_ARM_NEON_TRN2_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vtrn2_f32(simde_float32x2_t a, simde_float32x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn2_f32(a, b); #else simde_float32x2_private r_, a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx | 1]; r_.values[idx | 1] = b_.values[idx | 1]; } return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn2_f32 #define vtrn2_f32(a, b) simde_vtrn2_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vtrn2_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn2_s8(a, b); #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx | 1]; r_.values[idx | 1] = b_.values[idx | 1]; } return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn2_s8 #define vtrn2_s8(a, b) simde_vtrn2_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vtrn2_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn2_s16(a, b); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx | 1]; r_.values[idx | 1] = b_.values[idx | 1]; } return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn2_s16 #define vtrn2_s16(a, b) simde_vtrn2_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vtrn2_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn2_s32(a, b); #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx | 1]; r_.values[idx | 1] = b_.values[idx | 1]; } return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn2_s32 #define vtrn2_s32(a, b) simde_vtrn2_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vtrn2_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn2_u8(a, b); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx | 1]; r_.values[idx | 1] = b_.values[idx | 1]; } return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn2_u8 #define vtrn2_u8(a, b) simde_vtrn2_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vtrn2_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn2_u16(a, b); #else simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a), b_ = simde_uint16x4_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx | 1]; r_.values[idx | 1] = b_.values[idx | 1]; } return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn2_u16 #define vtrn2_u16(a, b) simde_vtrn2_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vtrn2_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn2_u32(a, b); #else simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a), b_ = simde_uint32x2_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx | 1]; r_.values[idx | 1] = b_.values[idx | 1]; } return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn2_u32 #define vtrn2_u32(a, b) simde_vtrn2_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vtrn2q_f32(simde_float32x4_t a, simde_float32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn2q_f32(a, b); #else simde_float32x4_private r_, a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx | 1]; r_.values[idx | 1] = b_.values[idx | 1]; } return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn2q_f32 #define vtrn2q_f32(a, b) simde_vtrn2q_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vtrn2q_f64(simde_float64x2_t a, simde_float64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn2q_f64(a, b); #else simde_float64x2_private r_, a_ = simde_float64x2_to_private(a), b_ = simde_float64x2_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx | 1]; r_.values[idx | 1] = b_.values[idx | 1]; } return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn2q_f64 #define vtrn2q_f64(a, b) simde_vtrn2q_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vtrn2q_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn2q_s8(a, b); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx | 1]; r_.values[idx | 1] = b_.values[idx | 1]; } return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn2q_s8 #define vtrn2q_s8(a, b) simde_vtrn2q_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vtrn2q_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn2q_s16(a, b); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx | 1]; r_.values[idx | 1] = b_.values[idx | 1]; } return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn2q_s16 #define vtrn2q_s16(a, b) simde_vtrn2q_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vtrn2q_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn2q_s32(a, b); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx | 1]; r_.values[idx | 1] = b_.values[idx | 1]; } return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn2q_s32 #define vtrn2q_s32(a, b) simde_vtrn2q_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vtrn2q_s64(simde_int64x2_t a, simde_int64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn2q_s64(a, b); #else simde_int64x2_private r_, a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx | 1]; r_.values[idx | 1] = b_.values[idx | 1]; } return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn2q_s64 #define vtrn2q_s64(a, b) simde_vtrn2q_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vtrn2q_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn2q_u8(a, b); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx | 1]; r_.values[idx | 1] = b_.values[idx | 1]; } return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn2q_u8 #define vtrn2q_u8(a, b) simde_vtrn2q_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vtrn2q_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn2q_u16(a, b); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx | 1]; r_.values[idx | 1] = b_.values[idx | 1]; } return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn2q_u16 #define vtrn2q_u16(a, b) simde_vtrn2q_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vtrn2q_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn2q_u32(a, b); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx | 1]; r_.values[idx | 1] = b_.values[idx | 1]; } return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn2q_u32 #define vtrn2q_u32(a, b) simde_vtrn2q_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vtrn2q_u64(simde_uint64x2_t a, simde_uint64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtrn2q_u64(a, b); #else simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a), b_ = simde_uint64x2_to_private(b); const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[idx] = a_.values[idx | 1]; r_.values[idx | 1] = b_.values[idx | 1]; } return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtrn2q_u64 #define vtrn2q_u64(a, b) simde_vtrn2q_u64((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_TRN2_H) */ simde-0.7.2/simde/arm/neon/tst.h000066400000000000000000000422201400333146700164240ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_ARM_NEON_TST_H) #define SIMDE_ARM_NEON_TST_H #include "and.h" #include "ceqz.h" #include "cgt.h" #include "combine.h" #include "dup_n.h" #include "get_low.h" #include "mvn.h" #include "reinterpret.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vtstq_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtstq_s8(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i8x16_ne(wasm_v128_and(a, b), wasm_i8x16_splat(0)); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vmvnq_u8(simde_vceqzq_s8(simde_vandq_s8(a, b))); #else simde_int8x16_private a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); simde_uint8x16_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT8_MAX : 0; } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtstq_s8 #define vtstq_s8(a, b) simde_vtstq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vtstq_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtstq_s16(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i16x8_ne(wasm_v128_and(a, b), wasm_i16x8_splat(0)); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vmvnq_u16(simde_vceqzq_s16(simde_vandq_s16(a, b))); #else simde_int16x8_private a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); simde_uint16x8_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT16_MAX : 0; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtstq_s16 #define vtstq_s16(a, b) simde_vtstq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vtstq_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtstq_s32(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i32x4_ne(wasm_v128_and(a, b), wasm_i32x4_splat(0)); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vmvnq_u32(simde_vceqzq_s32(simde_vandq_s32(a, b))); #else simde_int32x4_private a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); simde_uint32x4_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT32_MAX : 0; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtstq_s32 #define vtstq_s32(a, b) simde_vtstq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vtstq_s64(simde_int64x2_t a, simde_int64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtstq_s64(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vceqzq_u64(simde_vceqzq_s64(simde_vandq_s64(a, b))); #else simde_int64x2_private a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(b); simde_uint64x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT64_MAX : 0; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtstq_s64 #define vtstq_s64(a, b) simde_vtstq_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vtstq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtstq_u8(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i8x16_ne(wasm_v128_and(a, b), wasm_i8x16_splat(0)); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vmvnq_u8(simde_vceqzq_u8(simde_vandq_u8(a, b))); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT8_MAX : 0; } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtstq_u8 #define vtstq_u8(a, b) simde_vtstq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vtstq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtstq_u16(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i16x8_ne(wasm_v128_and(a, b), wasm_i16x8_splat(0)); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vmvnq_u16(simde_vceqzq_u16(simde_vandq_u16(a, b))); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT16_MAX : 0; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtstq_u16 #define vtstq_u16(a, b) simde_vtstq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vtstq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtstq_u32(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i32x4_ne(wasm_v128_and(a, b), wasm_i32x4_splat(0)); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vmvnq_u32(simde_vceqzq_u32(simde_vandq_u32(a, b))); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT32_MAX : 0; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtstq_u32 #define vtstq_u32(a, b) simde_vtstq_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vtstq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtstq_u64(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vceqzq_u64(simde_vceqzq_u64(simde_vandq_u64(a, b))); #else simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a), b_ = simde_uint64x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT64_MAX : 0; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtstq_u64 #define vtstq_u64(a, b) simde_vtstq_u64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vtst_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtst_s8(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vmvn_u8(simde_vceqz_s8(simde_vand_s8(a, b))); #else simde_int8x8_private a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b); simde_uint8x8_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT8_MAX : 0; } #endif return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtst_s8 #define vtst_s8(a, b) simde_vtst_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vtst_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtst_s16(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vmvn_u16(simde_vceqz_s16(simde_vand_s16(a, b))); #else simde_int16x4_private a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); simde_uint16x4_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT16_MAX : 0; } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtst_s16 #define vtst_s16(a, b) simde_vtst_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vtst_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtst_s32(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vmvn_u32(simde_vceqz_s32(simde_vand_s32(a, b))); #else simde_int32x2_private a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); simde_uint32x2_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT32_MAX : 0; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtst_s32 #define vtst_s32(a, b) simde_vtst_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vtst_s64(simde_int64x1_t a, simde_int64x1_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtst_s64(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vceqz_u64(simde_vceqz_s64(simde_vand_s64(a, b))); #else simde_int64x1_private a_ = simde_int64x1_to_private(a), b_ = simde_int64x1_to_private(b); simde_uint64x1_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT64_MAX : 0; } #endif return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtst_s64 #define vtst_s64(a, b) simde_vtst_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vtst_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtst_u8(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vmvn_u8(simde_vceqz_u8(simde_vand_u8(a, b))); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT8_MAX : 0; } #endif return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtst_u8 #define vtst_u8(a, b) simde_vtst_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vtst_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtst_u16(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vmvn_u16(simde_vceqz_u16(simde_vand_u16(a, b))); #else simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a), b_ = simde_uint16x4_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT16_MAX : 0; } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtst_u16 #define vtst_u16(a, b) simde_vtst_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vtst_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vtst_u32(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vmvn_u32(simde_vceqz_u32(simde_vand_u32(a, b))); #else simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a), b_ = simde_uint32x2_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT32_MAX : 0; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vtst_u32 #define vtst_u32(a, b) simde_vtst_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vtst_u64(simde_uint64x1_t a, simde_uint64x1_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vtst_u64(a, b); #elif SIMDE_NATURAL_VECTOR_SIZE > 0 return simde_vceqz_u64(simde_vceqz_u64(simde_vand_u64(a, b))); #else simde_uint64x1_private r_, a_ = simde_uint64x1_to_private(a), b_ = simde_uint64x1_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT64_MAX : 0; } #endif return simde_uint64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vtst_u64 #define vtst_u64(a, b) simde_vtst_u64((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_TST_H) */ simde-0.7.2/simde/arm/neon/types.h000066400000000000000000000637251400333146700167730ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_TYPES_H) #define SIMDE_ARM_NEON_TYPES_H #include "../../simde-common.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ #if defined(SIMDE_VECTOR_SUBSCRIPT) #define SIMDE_ARM_NEON_TYPE_INT_DEFINE_(Element_Type_Name, Element_Count, Alignment) \ typedef struct simde_##Element_Type_Name##x##Element_Count##_private { \ SIMDE_ALIGN_TO(Alignment) Element_Type_Name##_t values SIMDE_VECTOR(sizeof(Element_Type_Name##_t) * Element_Count); \ } simde_##Element_Type_Name##x##Element_Count##_private; #define SIMDE_ARM_NEON_TYPE_FLOAT_DEFINE_(Element_Size, Element_Count, Alignment) \ typedef struct simde_float##Element_Size##x##Element_Count##_private { \ SIMDE_ALIGN_TO(Alignment) simde_float##Element_Size values SIMDE_VECTOR(sizeof(simde_float##Element_Size) * Element_Count); \ } simde_float##Element_Size##x##Element_Count##_private; #else #define SIMDE_ARM_NEON_TYPE_INT_DEFINE_(Element_Type_Name, Element_Count, Alignment) \ typedef struct simde_##Element_Type_Name##x##Element_Count##_private { \ SIMDE_ALIGN_TO(Alignment) Element_Type_Name##_t values[Element_Count]; \ } simde_##Element_Type_Name##x##Element_Count##_private; #define SIMDE_ARM_NEON_TYPE_FLOAT_DEFINE_(Element_Size, Element_Count, Alignment) \ typedef struct simde_float##Element_Size##x##Element_Count##_private { \ SIMDE_ALIGN_TO(Alignment) simde_float##Element_Size values[Element_Count]; \ } simde_float##Element_Size##x##Element_Count##_private; #endif SIMDE_ARM_NEON_TYPE_INT_DEFINE_( int8, 8, SIMDE_ALIGN_8_) SIMDE_ARM_NEON_TYPE_INT_DEFINE_( int16, 4, SIMDE_ALIGN_8_) SIMDE_ARM_NEON_TYPE_INT_DEFINE_( int32, 2, SIMDE_ALIGN_8_) SIMDE_ARM_NEON_TYPE_INT_DEFINE_( int64, 1, SIMDE_ALIGN_8_) SIMDE_ARM_NEON_TYPE_INT_DEFINE_( uint8, 8, SIMDE_ALIGN_8_) SIMDE_ARM_NEON_TYPE_INT_DEFINE_( uint16, 4, SIMDE_ALIGN_8_) SIMDE_ARM_NEON_TYPE_INT_DEFINE_( uint32, 2, SIMDE_ALIGN_8_) SIMDE_ARM_NEON_TYPE_INT_DEFINE_( uint64, 1, SIMDE_ALIGN_8_) SIMDE_ARM_NEON_TYPE_INT_DEFINE_( int8, 16, SIMDE_ALIGN_16_) SIMDE_ARM_NEON_TYPE_INT_DEFINE_( int16, 8, SIMDE_ALIGN_16_) SIMDE_ARM_NEON_TYPE_INT_DEFINE_( int32, 4, SIMDE_ALIGN_16_) SIMDE_ARM_NEON_TYPE_INT_DEFINE_( int64, 2, SIMDE_ALIGN_16_) SIMDE_ARM_NEON_TYPE_INT_DEFINE_( uint8, 16, SIMDE_ALIGN_16_) SIMDE_ARM_NEON_TYPE_INT_DEFINE_( uint16, 8, SIMDE_ALIGN_16_) SIMDE_ARM_NEON_TYPE_INT_DEFINE_( uint32, 4, SIMDE_ALIGN_16_) SIMDE_ARM_NEON_TYPE_INT_DEFINE_( uint64, 2, SIMDE_ALIGN_16_) SIMDE_ARM_NEON_TYPE_FLOAT_DEFINE_(32, 2, SIMDE_ALIGN_8_) SIMDE_ARM_NEON_TYPE_FLOAT_DEFINE_(64, 1, SIMDE_ALIGN_8_) SIMDE_ARM_NEON_TYPE_FLOAT_DEFINE_(32, 4, SIMDE_ALIGN_16_) SIMDE_ARM_NEON_TYPE_FLOAT_DEFINE_(64, 2, SIMDE_ALIGN_16_) #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) typedef float32_t simde_float32_t; typedef int8x8_t simde_int8x8_t; typedef int16x4_t simde_int16x4_t; typedef int32x2_t simde_int32x2_t; typedef int64x1_t simde_int64x1_t; typedef uint8x8_t simde_uint8x8_t; typedef uint16x4_t simde_uint16x4_t; typedef uint32x2_t simde_uint32x2_t; typedef uint64x1_t simde_uint64x1_t; typedef float32x2_t simde_float32x2_t; typedef int8x16_t simde_int8x16_t; typedef int16x8_t simde_int16x8_t; typedef int32x4_t simde_int32x4_t; typedef int64x2_t simde_int64x2_t; typedef uint8x16_t simde_uint8x16_t; typedef uint16x8_t simde_uint16x8_t; typedef uint32x4_t simde_uint32x4_t; typedef uint64x2_t simde_uint64x2_t; typedef float32x4_t simde_float32x4_t; typedef int8x8x2_t simde_int8x8x2_t; typedef int16x4x2_t simde_int16x4x2_t; typedef int32x2x2_t simde_int32x2x2_t; typedef int64x1x2_t simde_int64x1x2_t; typedef uint8x8x2_t simde_uint8x8x2_t; typedef uint16x4x2_t simde_uint16x4x2_t; typedef uint32x2x2_t simde_uint32x2x2_t; typedef uint64x1x2_t simde_uint64x1x2_t; typedef float32x2x2_t simde_float32x2x2_t; typedef int8x16x2_t simde_int8x16x2_t; typedef int16x8x2_t simde_int16x8x2_t; typedef int32x4x2_t simde_int32x4x2_t; typedef int64x2x2_t simde_int64x2x2_t; typedef uint8x16x2_t simde_uint8x16x2_t; typedef uint16x8x2_t simde_uint16x8x2_t; typedef uint32x4x2_t simde_uint32x4x2_t; typedef uint64x2x2_t simde_uint64x2x2_t; typedef float32x4x2_t simde_float32x4x2_t; typedef int8x8x3_t simde_int8x8x3_t; typedef int16x4x3_t simde_int16x4x3_t; typedef int32x2x3_t simde_int32x2x3_t; typedef int64x1x3_t simde_int64x1x3_t; typedef uint8x8x3_t simde_uint8x8x3_t; typedef uint16x4x3_t simde_uint16x4x3_t; typedef uint32x2x3_t simde_uint32x2x3_t; typedef uint64x1x3_t simde_uint64x1x3_t; typedef float32x2x3_t simde_float32x2x3_t; typedef int8x16x3_t simde_int8x16x3_t; typedef int16x8x3_t simde_int16x8x3_t; typedef int32x4x3_t simde_int32x4x3_t; typedef int64x2x3_t simde_int64x2x3_t; typedef uint8x16x3_t simde_uint8x16x3_t; typedef uint16x8x3_t simde_uint16x8x3_t; typedef uint32x4x3_t simde_uint32x4x3_t; typedef uint64x2x3_t simde_uint64x2x3_t; typedef float32x4x3_t simde_float32x4x3_t; typedef int8x8x4_t simde_int8x8x4_t; typedef int16x4x4_t simde_int16x4x4_t; typedef int32x2x4_t simde_int32x2x4_t; typedef int64x1x4_t simde_int64x1x4_t; typedef uint8x8x4_t simde_uint8x8x4_t; typedef uint16x4x4_t simde_uint16x4x4_t; typedef uint32x2x4_t simde_uint32x2x4_t; typedef uint64x1x4_t simde_uint64x1x4_t; typedef float32x2x4_t simde_float32x2x4_t; typedef int8x16x4_t simde_int8x16x4_t; typedef int16x8x4_t simde_int16x8x4_t; typedef int32x4x4_t simde_int32x4x4_t; typedef int64x2x4_t simde_int64x2x4_t; typedef uint8x16x4_t simde_uint8x16x4_t; typedef uint16x8x4_t simde_uint16x8x4_t; typedef uint32x4x4_t simde_uint32x4x4_t; typedef uint64x2x4_t simde_uint64x2x4_t; typedef float32x4x4_t simde_float32x4x4_t; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) typedef float64_t simde_float64_t; typedef float64x1_t simde_float64x1_t; typedef float64x2_t simde_float64x2_t; typedef float64x1x2_t simde_float64x1x2_t; typedef float64x2x2_t simde_float64x2x2_t; typedef float64x1x3_t simde_float64x1x3_t; typedef float64x2x3_t simde_float64x2x3_t; typedef float64x1x4_t simde_float64x1x4_t; typedef float64x2x4_t simde_float64x2x4_t; #else #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X1 #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X2 #define SIMDE_ARM_NEON_NEED_PORTABLE_F64 #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X1XN #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X2XN #endif #elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) #define SIMDE_ARM_NEON_NEED_PORTABLE_F32 #define SIMDE_ARM_NEON_NEED_PORTABLE_F64 #define SIMDE_ARM_NEON_NEED_PORTABLE_VXN #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X1XN #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X2XN #if defined(SIMDE_X86_MMX_NATIVE) typedef __m64 simde_int8x8_t; typedef __m64 simde_int16x4_t; typedef __m64 simde_int32x2_t; typedef __m64 simde_int64x1_t; typedef __m64 simde_uint8x8_t; typedef __m64 simde_uint16x4_t; typedef __m64 simde_uint32x2_t; typedef __m64 simde_uint64x1_t; typedef __m64 simde_float32x2_t; typedef __m64 simde_float64x1_t; #else #define SIMDE_ARM_NEON_NEED_PORTABLE_I8X8 #define SIMDE_ARM_NEON_NEED_PORTABLE_I16X4 #define SIMDE_ARM_NEON_NEED_PORTABLE_I32X2 #define SIMDE_ARM_NEON_NEED_PORTABLE_I64X1 #define SIMDE_ARM_NEON_NEED_PORTABLE_U8X8 #define SIMDE_ARM_NEON_NEED_PORTABLE_U16X4 #define SIMDE_ARM_NEON_NEED_PORTABLE_U32X2 #define SIMDE_ARM_NEON_NEED_PORTABLE_U64X1 #define SIMDE_ARM_NEON_NEED_PORTABLE_F32X2 #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X1 #endif #if defined(SIMDE_X86_SSE_NATIVE) typedef __m128 simde_float32x4_t; #else #define SIMDE_ARM_NEON_NEED_PORTABLE_F32X4 #endif #if defined(SIMDE_X86_SSE2_NATIVE) typedef __m128i simde_int8x16_t; typedef __m128i simde_int16x8_t; typedef __m128i simde_int32x4_t; typedef __m128i simde_int64x2_t; typedef __m128i simde_uint8x16_t; typedef __m128i simde_uint16x8_t; typedef __m128i simde_uint32x4_t; typedef __m128i simde_uint64x2_t; typedef __m128d simde_float64x2_t; #else #define SIMDE_ARM_NEON_NEED_PORTABLE_I8X16 #define SIMDE_ARM_NEON_NEED_PORTABLE_I16X8 #define SIMDE_ARM_NEON_NEED_PORTABLE_I32X4 #define SIMDE_ARM_NEON_NEED_PORTABLE_I64X2 #define SIMDE_ARM_NEON_NEED_PORTABLE_U8X16 #define SIMDE_ARM_NEON_NEED_PORTABLE_U16X8 #define SIMDE_ARM_NEON_NEED_PORTABLE_U32X4 #define SIMDE_ARM_NEON_NEED_PORTABLE_U64X2 #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X2 #endif #elif defined(SIMDE_WASM_SIMD128_NATIVE) #define SIMDE_ARM_NEON_NEED_PORTABLE_F32 #define SIMDE_ARM_NEON_NEED_PORTABLE_F64 #define SIMDE_ARM_NEON_NEED_PORTABLE_64BIT #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X1XN #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X2XN #define SIMDE_ARM_NEON_NEED_PORTABLE_VXN typedef v128_t simde_int8x16_t; typedef v128_t simde_int16x8_t; typedef v128_t simde_int32x4_t; typedef v128_t simde_int64x2_t; typedef v128_t simde_uint8x16_t; typedef v128_t simde_uint16x8_t; typedef v128_t simde_uint32x4_t; typedef v128_t simde_uint64x2_t; typedef v128_t simde_float32x4_t; typedef v128_t simde_float64x2_t; #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) #define SIMDE_ARM_NEON_NEED_PORTABLE_F32 #define SIMDE_ARM_NEON_NEED_PORTABLE_F64 #define SIMDE_ARM_NEON_NEED_PORTABLE_64BIT #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X1XN #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X2XN #define SIMDE_ARM_NEON_NEED_PORTABLE_VXN typedef SIMDE_POWER_ALTIVEC_VECTOR(signed char) simde_int8x16_t; typedef SIMDE_POWER_ALTIVEC_VECTOR(signed short) simde_int16x8_t; typedef SIMDE_POWER_ALTIVEC_VECTOR(signed int) simde_int32x4_t; typedef SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) simde_uint8x16_t; typedef SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) simde_uint16x8_t; typedef SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) simde_uint32x4_t; typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde_float32x4_t; #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) typedef SIMDE_POWER_ALTIVEC_VECTOR(signed long long) simde_int64x2_t; typedef SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) simde_uint64x2_t; typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde_float64x2_t; #else #define SIMDE_ARM_NEON_NEED_PORTABLE_I64X2 #define SIMDE_ARM_NEON_NEED_PORTABLE_U64X2 #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X2 #endif #else #define SIMDE_ARM_NEON_NEED_PORTABLE_F32 #define SIMDE_ARM_NEON_NEED_PORTABLE_F64 #define SIMDE_ARM_NEON_NEED_PORTABLE_64BIT #define SIMDE_ARM_NEON_NEED_PORTABLE_128BIT #define SIMDE_ARM_NEON_NEED_PORTABLE_VXN #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X1XN #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X2XN #endif #if defined(SIMDE_ARM_NEON_NEED_PORTABLE_I8X8) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT) typedef simde_int8x8_private simde_int8x8_t; #endif #if defined(SIMDE_ARM_NEON_NEED_PORTABLE_I16X4) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT) typedef simde_int16x4_private simde_int16x4_t; #endif #if defined(SIMDE_ARM_NEON_NEED_PORTABLE_I32X2) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT) typedef simde_int32x2_private simde_int32x2_t; #endif #if defined(SIMDE_ARM_NEON_NEED_PORTABLE_I64X1) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT) typedef simde_int64x1_private simde_int64x1_t; #endif #if defined(SIMDE_ARM_NEON_NEED_PORTABLE_U8X8) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT) typedef simde_uint8x8_private simde_uint8x8_t; #endif #if defined(SIMDE_ARM_NEON_NEED_PORTABLE_U16X4) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT) typedef simde_uint16x4_private simde_uint16x4_t; #endif #if defined(SIMDE_ARM_NEON_NEED_PORTABLE_U32X2) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT) typedef simde_uint32x2_private simde_uint32x2_t; #endif #if defined(SIMDE_ARM_NEON_NEED_PORTABLE_U64X1) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT) typedef simde_uint64x1_private simde_uint64x1_t; #endif #if defined(SIMDE_ARM_NEON_NEED_PORTABLE_F32X2) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT) typedef simde_float32x2_private simde_float32x2_t; #endif #if defined(SIMDE_ARM_NEON_NEED_PORTABLE_F64X1) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT) typedef simde_float64x1_private simde_float64x1_t; #endif #if defined(SIMDE_ARM_NEON_NEED_PORTABLE_I8X16) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_128BIT) typedef simde_int8x16_private simde_int8x16_t; #endif #if defined(SIMDE_ARM_NEON_NEED_PORTABLE_I16X8) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_128BIT) typedef simde_int16x8_private simde_int16x8_t; #endif #if defined(SIMDE_ARM_NEON_NEED_PORTABLE_I32X4) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_128BIT) typedef simde_int32x4_private simde_int32x4_t; #endif #if defined(SIMDE_ARM_NEON_NEED_PORTABLE_I64X2) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_128BIT) typedef simde_int64x2_private simde_int64x2_t; #endif #if defined(SIMDE_ARM_NEON_NEED_PORTABLE_U8X16) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_128BIT) typedef simde_uint8x16_private simde_uint8x16_t; #endif #if defined(SIMDE_ARM_NEON_NEED_PORTABLE_U16X8) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_128BIT) typedef simde_uint16x8_private simde_uint16x8_t; #endif #if defined(SIMDE_ARM_NEON_NEED_PORTABLE_U32X4) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_128BIT) typedef simde_uint32x4_private simde_uint32x4_t; #endif #if defined(SIMDE_ARM_NEON_NEED_PORTABLE_U64X2) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_128BIT) typedef simde_uint64x2_private simde_uint64x2_t; #endif #if defined(SIMDE_ARM_NEON_NEED_PORTABLE_F32X4) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_128BIT) typedef simde_float32x4_private simde_float32x4_t; #endif #if defined(SIMDE_ARM_NEON_NEED_PORTABLE_F64X2) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_128BIT) typedef simde_float64x2_private simde_float64x2_t; #endif #if defined(SIMDE_ARM_NEON_NEED_PORTABLE_F32) typedef simde_float32 simde_float32_t; #endif #if defined(SIMDE_ARM_NEON_NEED_PORTABLE_F64) typedef simde_float64 simde_float64_t; #endif #if defined(SIMDE_ARM_NEON_NEED_PORTABLE_VXN) && !defined(SIMDE_BUG_INTEL_857088) typedef struct simde_int8x8x2_t { simde_int8x8_t val[2]; } simde_int8x8x2_t; typedef struct simde_int16x4x2_t { simde_int16x4_t val[2]; } simde_int16x4x2_t; typedef struct simde_int32x2x2_t { simde_int32x2_t val[2]; } simde_int32x2x2_t; typedef struct simde_int64x1x2_t { simde_int64x1_t val[2]; } simde_int64x1x2_t; typedef struct simde_uint8x8x2_t { simde_uint8x8_t val[2]; } simde_uint8x8x2_t; typedef struct simde_uint16x4x2_t { simde_uint16x4_t val[2]; } simde_uint16x4x2_t; typedef struct simde_uint32x2x2_t { simde_uint32x2_t val[2]; } simde_uint32x2x2_t; typedef struct simde_uint64x1x2_t { simde_uint64x1_t val[2]; } simde_uint64x1x2_t; typedef struct simde_float32x2x2_t { simde_float32x2_t val[2]; } simde_float32x2x2_t; typedef struct simde_int8x16x2_t { simde_int8x16_t val[2]; } simde_int8x16x2_t; typedef struct simde_int16x8x2_t { simde_int16x8_t val[2]; } simde_int16x8x2_t; typedef struct simde_int32x4x2_t { simde_int32x4_t val[2]; } simde_int32x4x2_t; typedef struct simde_int64x2x2_t { simde_int64x2_t val[2]; } simde_int64x2x2_t; typedef struct simde_uint8x16x2_t { simde_uint8x16_t val[2]; } simde_uint8x16x2_t; typedef struct simde_uint16x8x2_t { simde_uint16x8_t val[2]; } simde_uint16x8x2_t; typedef struct simde_uint32x4x2_t { simde_uint32x4_t val[2]; } simde_uint32x4x2_t; typedef struct simde_uint64x2x2_t { simde_uint64x2_t val[2]; } simde_uint64x2x2_t; typedef struct simde_float32x4x2_t { simde_float32x4_t val[2]; } simde_float32x4x2_t; typedef struct simde_int8x8x3_t { simde_int8x8_t val[3]; } simde_int8x8x3_t; typedef struct simde_int16x4x3_t { simde_int16x4_t val[3]; } simde_int16x4x3_t; typedef struct simde_int32x2x3_t { simde_int32x2_t val[3]; } simde_int32x2x3_t; typedef struct simde_int64x1x3_t { simde_int64x1_t val[3]; } simde_int64x1x3_t; typedef struct simde_uint8x8x3_t { simde_uint8x8_t val[3]; } simde_uint8x8x3_t; typedef struct simde_uint16x4x3_t { simde_uint16x4_t val[3]; } simde_uint16x4x3_t; typedef struct simde_uint32x2x3_t { simde_uint32x2_t val[3]; } simde_uint32x2x3_t; typedef struct simde_uint64x1x3_t { simde_uint64x1_t val[3]; } simde_uint64x1x3_t; typedef struct simde_float32x2x3_t { simde_float32x2_t val[3]; } simde_float32x2x3_t; typedef struct simde_int8x16x3_t { simde_int8x16_t val[3]; } simde_int8x16x3_t; typedef struct simde_int16x8x3_t { simde_int16x8_t val[3]; } simde_int16x8x3_t; typedef struct simde_int32x4x3_t { simde_int32x4_t val[3]; } simde_int32x4x3_t; typedef struct simde_int64x2x3_t { simde_int64x2_t val[3]; } simde_int64x2x3_t; typedef struct simde_uint8x16x3_t { simde_uint8x16_t val[3]; } simde_uint8x16x3_t; typedef struct simde_uint16x8x3_t { simde_uint16x8_t val[3]; } simde_uint16x8x3_t; typedef struct simde_uint32x4x3_t { simde_uint32x4_t val[3]; } simde_uint32x4x3_t; typedef struct simde_uint64x2x3_t { simde_uint64x2_t val[3]; } simde_uint64x2x3_t; typedef struct simde_float32x4x3_t { simde_float32x4_t val[3]; } simde_float32x4x3_t; typedef struct simde_int8x8x4_t { simde_int8x8_t val[4]; } simde_int8x8x4_t; typedef struct simde_int16x4x4_t { simde_int16x4_t val[4]; } simde_int16x4x4_t; typedef struct simde_int32x2x4_t { simde_int32x2_t val[4]; } simde_int32x2x4_t; typedef struct simde_int64x1x4_t { simde_int64x1_t val[4]; } simde_int64x1x4_t; typedef struct simde_uint8x8x4_t { simde_uint8x8_t val[4]; } simde_uint8x8x4_t; typedef struct simde_uint16x4x4_t { simde_uint16x4_t val[4]; } simde_uint16x4x4_t; typedef struct simde_uint32x2x4_t { simde_uint32x2_t val[4]; } simde_uint32x2x4_t; typedef struct simde_uint64x1x4_t { simde_uint64x1_t val[4]; } simde_uint64x1x4_t; typedef struct simde_float32x2x4_t { simde_float32x2_t val[4]; } simde_float32x2x4_t; typedef struct simde_int8x16x4_t { simde_int8x16_t val[4]; } simde_int8x16x4_t; typedef struct simde_int16x8x4_t { simde_int16x8_t val[4]; } simde_int16x8x4_t; typedef struct simde_int32x4x4_t { simde_int32x4_t val[4]; } simde_int32x4x4_t; typedef struct simde_int64x2x4_t { simde_int64x2_t val[4]; } simde_int64x2x4_t; typedef struct simde_uint8x16x4_t { simde_uint8x16_t val[4]; } simde_uint8x16x4_t; typedef struct simde_uint16x8x4_t { simde_uint16x8_t val[4]; } simde_uint16x8x4_t; typedef struct simde_uint32x4x4_t { simde_uint32x4_t val[4]; } simde_uint32x4x4_t; typedef struct simde_uint64x2x4_t { simde_uint64x2_t val[4]; } simde_uint64x2x4_t; typedef struct simde_float32x4x4_t { simde_float32x4_t val[4]; } simde_float32x4x4_t; #endif #if defined(SIMDE_ARM_NEON_NEED_PORTABLE_F64X1XN) typedef struct simde_float64x1x2_t { simde_float64x1_t val[2]; } simde_float64x1x2_t; typedef struct simde_float64x1x3_t { simde_float64x1_t val[3]; } simde_float64x1x3_t; typedef struct simde_float64x1x4_t { simde_float64x1_t val[4]; } simde_float64x1x4_t; #endif #if defined(SIMDE_ARM_NEON_NEED_PORTABLE_F64X2XN) typedef struct simde_float64x2x2_t { simde_float64x2_t val[2]; } simde_float64x2x2_t; typedef struct simde_float64x2x3_t { simde_float64x2_t val[3]; } simde_float64x2x3_t; typedef struct simde_float64x2x4_t { simde_float64x2_t val[4]; } simde_float64x2x4_t; #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) typedef simde_float32_t float32_t; typedef simde_int8x8_t int8x8_t; typedef simde_int16x4_t int16x4_t; typedef simde_int32x2_t int32x2_t; typedef simde_int64x1_t int64x1_t; typedef simde_uint8x8_t uint8x8_t; typedef simde_uint16x4_t uint16x4_t; typedef simde_uint32x2_t uint32x2_t; typedef simde_uint64x1_t uint64x1_t; typedef simde_float32x2_t float32x2_t; typedef simde_int8x16_t int8x16_t; typedef simde_int16x8_t int16x8_t; typedef simde_int32x4_t int32x4_t; typedef simde_int64x2_t int64x2_t; typedef simde_uint8x16_t uint8x16_t; typedef simde_uint16x8_t uint16x8_t; typedef simde_uint32x4_t uint32x4_t; typedef simde_uint64x2_t uint64x2_t; typedef simde_float32x4_t float32x4_t; typedef simde_int8x8x2_t int8x8x2_t; typedef simde_int16x4x2_t int16x4x2_t; typedef simde_int32x2x2_t int32x2x2_t; typedef simde_int64x1x2_t int64x1x2_t; typedef simde_uint8x8x2_t uint8x8x2_t; typedef simde_uint16x4x2_t uint16x4x2_t; typedef simde_uint32x2x2_t uint32x2x2_t; typedef simde_uint64x1x2_t uint64x1x2_t; typedef simde_float32x2x2_t float32x2x2_t; typedef simde_int8x16x2_t int8x16x2_t; typedef simde_int16x8x2_t int16x8x2_t; typedef simde_int32x4x2_t int32x4x2_t; typedef simde_int64x2x2_t int64x2x2_t; typedef simde_uint8x16x2_t uint8x16x2_t; typedef simde_uint16x8x2_t uint16x8x2_t; typedef simde_uint32x4x2_t uint32x4x2_t; typedef simde_uint64x2x2_t uint64x2x2_t; typedef simde_float32x4x2_t float32x4x2_t; typedef simde_int8x8x3_t int8x8x3_t; typedef simde_int16x4x3_t int16x4x3_t; typedef simde_int32x2x3_t int32x2x3_t; typedef simde_int64x1x3_t int64x1x3_t; typedef simde_uint8x8x3_t uint8x8x3_t; typedef simde_uint16x4x3_t uint16x4x3_t; typedef simde_uint32x2x3_t uint32x2x3_t; typedef simde_uint64x1x3_t uint64x1x3_t; typedef simde_float32x2x3_t float32x2x3_t; typedef simde_int8x16x3_t int8x16x3_t; typedef simde_int16x8x3_t int16x8x3_t; typedef simde_int32x4x3_t int32x4x3_t; typedef simde_int64x2x3_t int64x2x3_t; typedef simde_uint8x16x3_t uint8x16x3_t; typedef simde_uint16x8x3_t uint16x8x3_t; typedef simde_uint32x4x3_t uint32x4x3_t; typedef simde_uint64x2x3_t uint64x2x3_t; typedef simde_float32x4x3_t float32x4x3_t; typedef simde_int8x8x4_t int8x8x4_t; typedef simde_int16x4x4_t int16x4x4_t; typedef simde_int32x2x4_t int32x2x4_t; typedef simde_int64x1x4_t int64x1x4_t; typedef simde_uint8x8x4_t uint8x8x4_t; typedef simde_uint16x4x4_t uint16x4x4_t; typedef simde_uint32x2x4_t uint32x2x4_t; typedef simde_uint64x1x4_t uint64x1x4_t; typedef simde_float32x2x4_t float32x2x4_t; typedef simde_int8x16x4_t int8x16x4_t; typedef simde_int16x8x4_t int16x8x4_t; typedef simde_int32x4x4_t int32x4x4_t; typedef simde_int64x2x4_t int64x2x4_t; typedef simde_uint8x16x4_t uint8x16x4_t; typedef simde_uint16x8x4_t uint16x8x4_t; typedef simde_uint32x4x4_t uint32x4x4_t; typedef simde_uint64x2x4_t uint64x2x4_t; typedef simde_float32x4x4_t float32x4x4_t; #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) typedef simde_float64_t float64_t; typedef simde_float64x1_t float64x1_t; typedef simde_float64x2_t float64x2_t; typedef simde_float64x1x2_t float64x1x2_t; typedef simde_float64x2x2_t float64x2x2_t; typedef simde_float64x1x3_t float64x1x3_t; typedef simde_float64x2x3_t float64x2x3_t; typedef simde_float64x1x4_t float64x1x4_t; typedef simde_float64x2x4_t float64x2x4_t; #endif #define SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(T) \ SIMDE_FUNCTION_ATTRIBUTES simde_##T##_private simde_##T##_to_private (simde_##T##_t value) { simde_##T##_private to; simde_memcpy(&to, &value, sizeof(to)); return to; } \ SIMDE_FUNCTION_ATTRIBUTES simde_##T##_t simde_##T##_from_private (simde_##T##_private value) { simde_##T##_t to; simde_memcpy(&to, &value, sizeof(to)); return to; } SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(int8x8) SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(int16x4) SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(int32x2) SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(int64x1) SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(uint8x8) SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(uint16x4) SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(uint32x2) SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(uint64x1) SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(float32x2) SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(float64x1) SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(int8x16) SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(int16x8) SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(int32x4) SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(int64x2) SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(uint8x16) SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(uint16x8) SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(uint32x4) SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(uint64x2) SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(float32x4) SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(float64x2) SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* SIMDE_ARM_NEON_TYPES_H */ simde-0.7.2/simde/arm/neon/uqadd.h000066400000000000000000000233771400333146700167240ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_ARM_NEON_UQADD_H) #define SIMDE_ARM_NEON_UQADD_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES int8_t simde_vuqaddb_s8(int8_t a, uint8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #if defined(SIMDE_BUG_CLANG_GIT_4EC445B8) return vuqaddb_s8(a, HEDLEY_STATIC_CAST(int8_t, b)); #else return vuqaddb_s8(a, b); #endif #else int16_t r_ = HEDLEY_STATIC_CAST(int16_t, a) + HEDLEY_STATIC_CAST(int16_t, b); return (r_ < INT8_MIN) ? INT8_MIN : ((r_ > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, r_)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuqaddb_s8 #define vuqaddb_s8(a, b) simde_vuqaddb_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int16_t simde_vuqaddh_s16(int16_t a, uint16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #if defined(SIMDE_BUG_CLANG_GIT_4EC445B8) return vuqaddh_s16(a, HEDLEY_STATIC_CAST(int16_t, b)); #else return vuqaddh_s16(a, b); #endif #else int32_t r_ = HEDLEY_STATIC_CAST(int32_t, a) + HEDLEY_STATIC_CAST(int32_t, b); return (r_ < INT16_MIN) ? INT16_MIN : ((r_ > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, r_)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuqaddh_s16 #define vuqaddh_s16(a, b) simde_vuqaddh_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_vuqadds_s32(int32_t a, uint32_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #if defined(SIMDE_BUG_CLANG_GIT_4EC445B8) return vuqadds_s32(a, HEDLEY_STATIC_CAST(int32_t, b)); #else return vuqadds_s32(a, b); #endif #else int64_t r_ = HEDLEY_STATIC_CAST(int64_t, a) + HEDLEY_STATIC_CAST(int64_t, b); return (r_ < INT32_MIN) ? INT32_MIN : ((r_ > INT32_MAX) ? INT32_MAX : HEDLEY_STATIC_CAST(int32_t, r_)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuqadds_s32 #define vuqadds_s32(a, b) simde_vuqadds_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int64_t simde_vuqaddd_s64(int64_t a, uint64_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #if defined(SIMDE_BUG_CLANG_GIT_4EC445B8) return vuqaddd_s64(a, HEDLEY_STATIC_CAST(int64_t, b)); #else return vuqaddd_s64(a, b); #endif #else /* TODO: I suspect there is room for improvement here. This is * just the first thing that worked, and I don't feel like messing * with it now. */ int64_t r; if (a < 0) { uint64_t na = HEDLEY_STATIC_CAST(uint64_t, -a); if (na > b) { uint64_t t = na - b; r = (t > (HEDLEY_STATIC_CAST(uint64_t, INT64_MAX) + 1)) ? INT64_MIN : -HEDLEY_STATIC_CAST(int64_t, t); } else { uint64_t t = b - na; r = (t > (HEDLEY_STATIC_CAST(uint64_t, INT64_MAX) )) ? INT64_MAX : HEDLEY_STATIC_CAST(int64_t, t); } } else { uint64_t ua = HEDLEY_STATIC_CAST(uint64_t, a); r = ((INT64_MAX - ua) < b) ? INT64_MAX : HEDLEY_STATIC_CAST(int64_t, ua + b); } return r; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuqaddd_s64 #define vuqaddd_s64(a, b) simde_vuqaddd_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vuqadd_s8(simde_int8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuqadd_s8(a, b); #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a); simde_uint8x8_private b_ = simde_uint8x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vuqaddb_s8(a_.values[i], b_.values[i]); } return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuqadd_s8 #define vuqadd_s8(a, b) simde_vuqadd_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vuqadd_s16(simde_int16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuqadd_s16(a, b); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a); simde_uint16x4_private b_ = simde_uint16x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vuqaddh_s16(a_.values[i], b_.values[i]); } return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuqadd_s16 #define vuqadd_s16(a, b) simde_vuqadd_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vuqadd_s32(simde_int32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuqadd_s32(a, b); #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a); simde_uint32x2_private b_ = simde_uint32x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vuqadds_s32(a_.values[i], b_.values[i]); } return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuqadd_s32 #define vuqadd_s32(a, b) simde_vuqadd_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x1_t simde_vuqadd_s64(simde_int64x1_t a, simde_uint64x1_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuqadd_s64(a, b); #else simde_int64x1_private r_, a_ = simde_int64x1_to_private(a); simde_uint64x1_private b_ = simde_uint64x1_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vuqaddd_s64(a_.values[i], b_.values[i]); } return simde_int64x1_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuqadd_s64 #define vuqadd_s64(a, b) simde_vuqadd_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vuqaddq_s8(simde_int8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuqaddq_s8(a, b); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a); simde_uint8x16_private b_ = simde_uint8x16_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vuqaddb_s8(a_.values[i], b_.values[i]); } return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuqaddq_s8 #define vuqaddq_s8(a, b) simde_vuqaddq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vuqaddq_s16(simde_int16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuqaddq_s16(a, b); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a); simde_uint16x8_private b_ = simde_uint16x8_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vuqaddh_s16(a_.values[i], b_.values[i]); } return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuqaddq_s16 #define vuqaddq_s16(a, b) simde_vuqaddq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vuqaddq_s32(simde_int32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuqaddq_s32(a, b); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a); simde_uint32x4_private b_ = simde_uint32x4_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vuqadds_s32(a_.values[i], b_.values[i]); } return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuqaddq_s32 #define vuqaddq_s32(a, b) simde_vuqaddq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vuqaddq_s64(simde_int64x2_t a, simde_uint64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuqaddq_s64(a, b); #else simde_int64x2_private r_, a_ = simde_int64x2_to_private(a); simde_uint64x2_private b_ = simde_uint64x2_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vuqaddd_s64(a_.values[i], b_.values[i]); } return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuqaddq_s64 #define vuqaddq_s64(a, b) simde_vuqaddq_s64((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_UQADD_H) */ simde-0.7.2/simde/arm/neon/uzp.h000066400000000000000000000164131400333146700164350ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_UZP_H) && !defined(SIMDE_BUG_INTEL_857088) #define SIMDE_ARM_NEON_UZP_H #include "types.h" #include "uzp1.h" #include "uzp2.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2x2_t simde_vuzp_f32(simde_float32x2_t a, simde_float32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vuzp_f32(a, b); #else simde_float32x2x2_t r = { { simde_vuzp1_f32(a, b), simde_vuzp2_f32(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vuzp_f32 #define vuzp_f32(a, b) simde_vuzp_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8x2_t simde_vuzp_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vuzp_s8(a, b); #else simde_int8x8x2_t r = { { simde_vuzp1_s8(a, b), simde_vuzp2_s8(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vuzp_s8 #define vuzp_s8(a, b) simde_vuzp_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4x2_t simde_vuzp_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vuzp_s16(a, b); #else simde_int16x4x2_t r = { { simde_vuzp1_s16(a, b), simde_vuzp2_s16(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vuzp_s16 #define vuzp_s16(a, b) simde_vuzp_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2x2_t simde_vuzp_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vuzp_s32(a, b); #else simde_int32x2x2_t r = { { simde_vuzp1_s32(a, b), simde_vuzp2_s32(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vuzp_s32 #define vuzp_s32(a, b) simde_vuzp_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8x2_t simde_vuzp_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vuzp_u8(a, b); #else simde_uint8x8x2_t r = { { simde_vuzp1_u8(a, b), simde_vuzp2_u8(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vuzp_u8 #define vuzp_u8(a, b) simde_vuzp_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4x2_t simde_vuzp_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vuzp_u16(a, b); #else simde_uint16x4x2_t r = { { simde_vuzp1_u16(a, b), simde_vuzp2_u16(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vuzp_u16 #define vuzp_u16(a, b) simde_vuzp_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2x2_t simde_vuzp_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vuzp_u32(a, b); #else simde_uint32x2x2_t r = { { simde_vuzp1_u32(a, b), simde_vuzp2_u32(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vuzp_u32 #define vuzp_u32(a, b) simde_vuzp_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4x2_t simde_vuzpq_f32(simde_float32x4_t a, simde_float32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vuzpq_f32(a, b); #else simde_float32x4x2_t r = { { simde_vuzp1q_f32(a, b), simde_vuzp2q_f32(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vuzpq_f32 #define vuzpq_f32(a, b) simde_vuzpq_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16x2_t simde_vuzpq_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vuzpq_s8(a, b); #else simde_int8x16x2_t r = { { simde_vuzp1q_s8(a, b), simde_vuzp2q_s8(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vuzpq_s8 #define vuzpq_s8(a, b) simde_vuzpq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8x2_t simde_vuzpq_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vuzpq_s16(a, b); #else simde_int16x8x2_t r = { { simde_vuzp1q_s16(a, b), simde_vuzp2q_s16(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vuzpq_s16 #define vuzpq_s16(a, b) simde_vuzpq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4x2_t simde_vuzpq_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vuzpq_s32(a, b); #else simde_int32x4x2_t r = { { simde_vuzp1q_s32(a, b), simde_vuzp2q_s32(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vuzpq_s32 #define vuzpq_s32(a, b) simde_vuzpq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16x2_t simde_vuzpq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vuzpq_u8(a, b); #else simde_uint8x16x2_t r = { { simde_vuzp1q_u8(a, b), simde_vuzp2q_u8(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vuzpq_u8 #define vuzpq_u8(a, b) simde_vuzpq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8x2_t simde_vuzpq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vuzpq_u16(a, b); #else simde_uint16x8x2_t r = { { simde_vuzp1q_u16(a, b), simde_vuzp2q_u16(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vuzpq_u16 #define vuzpq_u16(a, b) simde_vuzpq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4x2_t simde_vuzpq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vuzpq_u32(a, b); #else simde_uint32x4x2_t r = { { simde_vuzp1q_u32(a, b), simde_vuzp2q_u32(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vuzpq_u32 #define vuzpq_u32(a, b) simde_vuzpq_u32((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_UZP_H) */ simde-0.7.2/simde/arm/neon/uzp1.h000066400000000000000000000505721400333146700165220ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_UZP1_H) #define SIMDE_ARM_NEON_UZP1_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vuzp1_f32(simde_float32x2_t a, simde_float32x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp1_f32(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32x2x2_t t = vuzp_f32(a, b); return t.val[0]; #else simde_float32x2_private r_, a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 0, 2); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx]; r_.values[i + halfway_point] = b_.values[idx]; } #endif return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp1_f32 #define vuzp1_f32(a, b) simde_vuzp1_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vuzp1_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp1_s8(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int8x8x2_t t = vuzp_s8(a, b); return t.val[0]; #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, b_.values, 0, 2, 4, 6, 8, 10, 12, 14); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx]; r_.values[i + halfway_point] = b_.values[idx]; } #endif return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp1_s8 #define vuzp1_s8(a, b) simde_vuzp1_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vuzp1_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp1_s16(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int16x4x2_t t = vuzp_s16(a, b); return t.val[0]; #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, b_.values, 0, 2, 4, 6); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx]; r_.values[i + halfway_point] = b_.values[idx]; } #endif return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp1_s16 #define vuzp1_s16(a, b) simde_vuzp1_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vuzp1_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp1_s32(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int32x2x2_t t = vuzp_s32(a, b); return t.val[0]; #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 0, 2); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx]; r_.values[i + halfway_point] = b_.values[idx]; } #endif return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp1_s32 #define vuzp1_s32(a, b) simde_vuzp1_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vuzp1_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp1_u8(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint8x8x2_t t = vuzp_u8(a, b); return t.val[0]; #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, b_.values, 0, 2, 4, 6, 8, 10, 12, 14); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx]; r_.values[i + halfway_point] = b_.values[idx]; } #endif return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp1_u8 #define vuzp1_u8(a, b) simde_vuzp1_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vuzp1_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp1_u16(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint16x4x2_t t = vuzp_u16(a, b); return t.val[0]; #else simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a), b_ = simde_uint16x4_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, b_.values, 0, 2, 4, 6); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx]; r_.values[i + halfway_point] = b_.values[idx]; } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp1_u16 #define vuzp1_u16(a, b) simde_vuzp1_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vuzp1_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp1_u32(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x2x2_t t = vuzp_u32(a, b); return t.val[0]; #else simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a), b_ = simde_uint32x2_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 0, 2); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx]; r_.values[i + halfway_point] = b_.values[idx]; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp1_u32 #define vuzp1_u32(a, b) simde_vuzp1_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vuzp1q_f32(simde_float32x4_t a, simde_float32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp1q_f32(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32x4x2_t t = vuzpq_f32(a, b); return t.val[0]; #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v32x4_shuffle(a, b, 0, 2, 4, 6); #elif defined(SIMDE_X86_SSE_NATIVE) return _mm_shuffle_ps(a, b, 0x88); #else simde_float32x4_private r_, a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 0, 2, 4, 6); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx]; r_.values[i + halfway_point] = b_.values[idx]; } #endif return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp1q_f32 #define vuzp1q_f32(a, b) simde_vuzp1q_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vuzp1q_f64(simde_float64x2_t a, simde_float64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp1q_f64(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v64x2_shuffle(a, b, 0, 2); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_castps_pd(_mm_movelh_ps(_mm_castpd_ps(a), _mm_castpd_ps(b))); #else simde_float64x2_private r_, a_ = simde_float64x2_to_private(a), b_ = simde_float64x2_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 0, 2); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx]; r_.values[i + halfway_point] = b_.values[idx]; } #endif return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp1q_f64 #define vuzp1q_f64(a, b) simde_vuzp1q_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vuzp1q_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp1q_s8(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int8x16x2_t t = vuzpq_s8(a, b); return t.val[0]; #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v8x16_shuffle(a, b, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, b_.values, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx]; r_.values[i + halfway_point] = b_.values[idx]; } #endif return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp1q_s8 #define vuzp1q_s8(a, b) simde_vuzp1q_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vuzp1q_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp1q_s16(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int16x8x2_t t = vuzpq_s16(a, b); return t.val[0]; #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v16x8_shuffle(a, b, 0, 2, 4, 6, 8, 10, 12, 14); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, b_.values, 0, 2, 4, 6, 8, 10, 12, 14); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx]; r_.values[i + halfway_point] = b_.values[idx]; } #endif return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp1q_s16 #define vuzp1q_s16(a, b) simde_vuzp1q_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vuzp1q_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp1q_s32(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int32x4x2_t t = vuzpq_s32(a, b); return t.val[0]; #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v32x4_shuffle(a, b, 0, 2, 4, 6); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), 0x88)); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 0, 2, 4, 6); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx]; r_.values[i + halfway_point] = b_.values[idx]; } #endif return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp1q_s32 #define vuzp1q_s32(a, b) simde_vuzp1q_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vuzp1q_s64(simde_int64x2_t a, simde_int64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp1q_s64(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v64x2_shuffle(a, b, 0, 2); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_castps_si128(_mm_movelh_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b))); #else simde_int64x2_private r_, a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 0, 2); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx]; r_.values[i + halfway_point] = b_.values[idx]; } #endif return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp1q_s64 #define vuzp1q_s64(a, b) simde_vuzp1q_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vuzp1q_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp1q_u8(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint8x16x2_t t = vuzpq_u8(a, b); return t.val[0]; #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v8x16_shuffle(a, b, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, b_.values, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx]; r_.values[i + halfway_point] = b_.values[idx]; } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp1q_u8 #define vuzp1q_u8(a, b) simde_vuzp1q_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vuzp1q_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp1q_u16(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint16x8x2_t t = vuzpq_u16(a, b); return t.val[0]; #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v16x8_shuffle(a, b, 0, 2, 4, 6, 8, 10, 12, 14); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, b_.values, 0, 2, 4, 6, 8, 10, 12, 14); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx]; r_.values[i + halfway_point] = b_.values[idx]; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp1q_u16 #define vuzp1q_u16(a, b) simde_vuzp1q_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vuzp1q_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp1q_u32(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x4x2_t t = vuzpq_u32(a, b); return t.val[0]; #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v32x4_shuffle(a, b, 0, 2, 4, 6); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), 0x88)); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 0, 2, 4, 6); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx]; r_.values[i + halfway_point] = b_.values[idx]; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp1q_u32 #define vuzp1q_u32(a, b) simde_vuzp1q_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vuzp1q_u64(simde_uint64x2_t a, simde_uint64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp1q_u64(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v64x2_shuffle(a, b, 0, 2); #elif defined(SIMDE_X86_SSE2_NATIVE) /* _mm_movelh_ps?!?! SSE is weird. */ return _mm_castps_si128(_mm_movelh_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b))); #else simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a), b_ = simde_uint64x2_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 0, 2); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx]; r_.values[i + halfway_point] = b_.values[idx]; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp1q_u64 #define vuzp1q_u64(a, b) simde_vuzp1q_u64((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_UZP1_H) */ simde-0.7.2/simde/arm/neon/uzp2.h000066400000000000000000000510561400333146700165210ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_UZP2_H) #define SIMDE_ARM_NEON_UZP2_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vuzp2_f32(simde_float32x2_t a, simde_float32x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp2_f32(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32x2x2_t t = vuzp_f32(a, b); return t.val[1]; #else simde_float32x2_private r_, a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 1, 3); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx | 1]; r_.values[i + halfway_point] = b_.values[idx | 1]; } #endif return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp2_f32 #define vuzp2_f32(a, b) simde_vuzp2_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vuzp2_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp2_s8(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int8x8x2_t t = vuzp_s8(a, b); return t.val[1]; #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, b_.values, 1, 3, 5, 7, 9, 11, 13, 15); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx | 1]; r_.values[i + halfway_point] = b_.values[idx | 1]; } #endif return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp2_s8 #define vuzp2_s8(a, b) simde_vuzp2_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vuzp2_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp2_s16(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int16x4x2_t t = vuzp_s16(a, b); return t.val[1]; #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, b_.values, 1, 3, 5, 7); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx | 1]; r_.values[i + halfway_point] = b_.values[idx | 1]; } #endif return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp2_s16 #define vuzp2_s16(a, b) simde_vuzp2_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vuzp2_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp2_s32(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int32x2x2_t t = vuzp_s32(a, b); return t.val[1]; #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 1, 3); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx | 1]; r_.values[i + halfway_point] = b_.values[idx | 1]; } #endif return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp2_s32 #define vuzp2_s32(a, b) simde_vuzp2_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vuzp2_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp2_u8(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint8x8x2_t t = vuzp_u8(a, b); return t.val[1]; #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, b_.values, 1, 3, 5, 7, 9, 11, 13, 15); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx | 1]; r_.values[i + halfway_point] = b_.values[idx | 1]; } #endif return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp2_u8 #define vuzp2_u8(a, b) simde_vuzp2_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vuzp2_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp2_u16(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint16x4x2_t t = vuzp_u16(a, b); return t.val[1]; #else simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a), b_ = simde_uint16x4_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, b_.values, 1, 3, 5, 7); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx | 1]; r_.values[i + halfway_point] = b_.values[idx | 1]; } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp2_u16 #define vuzp2_u16(a, b) simde_vuzp2_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vuzp2_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp2_u32(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x2x2_t t = vuzp_u32(a, b); return t.val[1]; #else simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a), b_ = simde_uint32x2_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 1, 3); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx | 1]; r_.values[i + halfway_point] = b_.values[idx | 1]; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp2_u32 #define vuzp2_u32(a, b) simde_vuzp2_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vuzp2q_f32(simde_float32x4_t a, simde_float32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp2q_f32(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32x4x2_t t = vuzpq_f32(a, b); return t.val[1]; #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v32x4_shuffle(a, b, 1, 3, 5, 7); #elif defined(SIMDE_X86_SSE_NATIVE) return _mm_shuffle_ps(a, b, 0xdd); #else simde_float32x4_private r_, a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 1, 3, 5, 7); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx | 1]; r_.values[i + halfway_point] = b_.values[idx | 1]; } #endif return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp2q_f32 #define vuzp2q_f32(a, b) simde_vuzp2q_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vuzp2q_f64(simde_float64x2_t a, simde_float64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp2q_f64(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v64x2_shuffle(a, b, 1, 3); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpackhi_pd(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return vec_mergel(a, b); #else simde_float64x2_private r_, a_ = simde_float64x2_to_private(a), b_ = simde_float64x2_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 1, 3); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx | 1]; r_.values[i + halfway_point] = b_.values[idx | 1]; } #endif return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp2q_f64 #define vuzp2q_f64(a, b) simde_vuzp2q_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vuzp2q_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp2q_s8(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int8x16x2_t t = vuzpq_s8(a, b); return t.val[1]; #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v8x16_shuffle(a, b, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, b_.values, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx | 1]; r_.values[i + halfway_point] = b_.values[idx | 1]; } #endif return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp2q_s8 #define vuzp2q_s8(a, b) simde_vuzp2q_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vuzp2q_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp2q_s16(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int16x8x2_t t = vuzpq_s16(a, b); return t.val[1]; #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v16x8_shuffle(a, b, 1, 3, 5, 7, 9, 11, 13, 15); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, b_.values, 1, 3, 5, 7, 9, 11, 13, 15); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx | 1]; r_.values[i + halfway_point] = b_.values[idx | 1]; } #endif return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp2q_s16 #define vuzp2q_s16(a, b) simde_vuzp2q_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vuzp2q_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp2q_s32(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int32x4x2_t t = vuzpq_s32(a, b); return t.val[1]; #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v32x4_shuffle(a, b, 1, 3, 5, 7); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), 0xdd)); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 1, 3, 5, 7); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx | 1]; r_.values[i + halfway_point] = b_.values[idx | 1]; } #endif return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp2q_s32 #define vuzp2q_s32(a, b) simde_vuzp2q_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vuzp2q_s64(simde_int64x2_t a, simde_int64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp2q_s64(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v64x2_shuffle(a, b, 1, 3); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpackhi_epi64(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return vec_mergel(a, b); #else simde_int64x2_private r_, a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 1, 3); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx | 1]; r_.values[i + halfway_point] = b_.values[idx | 1]; } #endif return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp2q_s64 #define vuzp2q_s64(a, b) simde_vuzp2q_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vuzp2q_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp2q_u8(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint8x16x2_t t = vuzpq_u8(a, b); return t.val[1]; #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v8x16_shuffle(a, b, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, b_.values, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx | 1]; r_.values[i + halfway_point] = b_.values[idx | 1]; } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp2q_u8 #define vuzp2q_u8(a, b) simde_vuzp2q_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vuzp2q_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp2q_u16(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint16x8x2_t t = vuzpq_u16(a, b); return t.val[1]; #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v16x8_shuffle(a, b, 1, 3, 5, 7, 9, 11, 13, 15); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, b_.values, 1, 3, 5, 7, 9, 11, 13, 15); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx | 1]; r_.values[i + halfway_point] = b_.values[idx | 1]; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp2q_u16 #define vuzp2q_u16(a, b) simde_vuzp2q_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vuzp2q_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp2q_u32(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x4x2_t t = vuzpq_u32(a, b); return t.val[1]; #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v32x4_shuffle(a, b, 1, 3, 5, 7); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), 0xdd)); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 1, 3, 5, 7); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx | 1]; r_.values[i + halfway_point] = b_.values[idx | 1]; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp2q_u32 #define vuzp2q_u32(a, b) simde_vuzp2q_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vuzp2q_u64(simde_uint64x2_t a, simde_uint64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vuzp2q_u64(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v64x2_shuffle(a, b, 1, 3); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpackhi_epi64(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return vec_mergel(a, b); #else simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a), b_ = simde_uint64x2_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 1, 3); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { const size_t idx = i << 1; r_.values[ i ] = a_.values[idx | 1]; r_.values[i + halfway_point] = b_.values[idx | 1]; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vuzp2q_u64 #define vuzp2q_u64(a, b) simde_vuzp2q_u64((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_UZP2_H) */ simde-0.7.2/simde/arm/neon/zip.h000066400000000000000000000164131400333146700164210ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_ZIP_H) && !defined(SIMDE_BUG_INTEL_857088) #define SIMDE_ARM_NEON_ZIP_H #include "types.h" #include "zip1.h" #include "zip2.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2x2_t simde_vzip_f32(simde_float32x2_t a, simde_float32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vzip_f32(a, b); #else simde_float32x2x2_t r = { { simde_vzip1_f32(a, b), simde_vzip2_f32(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vzip_f32 #define vzip_f32(a, b) simde_vzip_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8x2_t simde_vzip_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vzip_s8(a, b); #else simde_int8x8x2_t r = { { simde_vzip1_s8(a, b), simde_vzip2_s8(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vzip_s8 #define vzip_s8(a, b) simde_vzip_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4x2_t simde_vzip_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vzip_s16(a, b); #else simde_int16x4x2_t r = { { simde_vzip1_s16(a, b), simde_vzip2_s16(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vzip_s16 #define vzip_s16(a, b) simde_vzip_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2x2_t simde_vzip_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vzip_s32(a, b); #else simde_int32x2x2_t r = { { simde_vzip1_s32(a, b), simde_vzip2_s32(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vzip_s32 #define vzip_s32(a, b) simde_vzip_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8x2_t simde_vzip_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vzip_u8(a, b); #else simde_uint8x8x2_t r = { { simde_vzip1_u8(a, b), simde_vzip2_u8(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vzip_u8 #define vzip_u8(a, b) simde_vzip_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4x2_t simde_vzip_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vzip_u16(a, b); #else simde_uint16x4x2_t r = { { simde_vzip1_u16(a, b), simde_vzip2_u16(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vzip_u16 #define vzip_u16(a, b) simde_vzip_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2x2_t simde_vzip_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vzip_u32(a, b); #else simde_uint32x2x2_t r = { { simde_vzip1_u32(a, b), simde_vzip2_u32(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vzip_u32 #define vzip_u32(a, b) simde_vzip_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4x2_t simde_vzipq_f32(simde_float32x4_t a, simde_float32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vzipq_f32(a, b); #else simde_float32x4x2_t r = { { simde_vzip1q_f32(a, b), simde_vzip2q_f32(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vzipq_f32 #define vzipq_f32(a, b) simde_vzipq_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16x2_t simde_vzipq_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vzipq_s8(a, b); #else simde_int8x16x2_t r = { { simde_vzip1q_s8(a, b), simde_vzip2q_s8(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vzipq_s8 #define vzipq_s8(a, b) simde_vzipq_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8x2_t simde_vzipq_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vzipq_s16(a, b); #else simde_int16x8x2_t r = { { simde_vzip1q_s16(a, b), simde_vzip2q_s16(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vzipq_s16 #define vzipq_s16(a, b) simde_vzipq_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4x2_t simde_vzipq_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vzipq_s32(a, b); #else simde_int32x4x2_t r = { { simde_vzip1q_s32(a, b), simde_vzip2q_s32(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vzipq_s32 #define vzipq_s32(a, b) simde_vzipq_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16x2_t simde_vzipq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vzipq_u8(a, b); #else simde_uint8x16x2_t r = { { simde_vzip1q_u8(a, b), simde_vzip2q_u8(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vzipq_u8 #define vzipq_u8(a, b) simde_vzipq_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8x2_t simde_vzipq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vzipq_u16(a, b); #else simde_uint16x8x2_t r = { { simde_vzip1q_u16(a, b), simde_vzip2q_u16(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vzipq_u16 #define vzipq_u16(a, b) simde_vzipq_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4x2_t simde_vzipq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vzipq_u32(a, b); #else simde_uint32x4x2_t r = { { simde_vzip1q_u32(a, b), simde_vzip2q_u32(a, b) } }; return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vzipq_u32 #define vzipq_u32(a, b) simde_vzipq_u32((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_ZIP_H) */ simde-0.7.2/simde/arm/neon/zip1.h000066400000000000000000000464401400333146700165050ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_ZIP1_H) #define SIMDE_ARM_NEON_ZIP1_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vzip1_f32(simde_float32x2_t a, simde_float32x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip1_f32(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_unpacklo_pi32(a, b); #else simde_float32x2_private r_, a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 0, 2); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[2 * i ] = a_.values[i]; r_.values[2 * i + 1] = b_.values[i]; } #endif return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip1_f32 #define vzip1_f32(a, b) simde_vzip1_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vzip1_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip1_s8(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_unpacklo_pi8(a, b); #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, b_.values, 0, 8, 1, 9, 2, 10, 3, 11); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[2 * i ] = a_.values[i]; r_.values[2 * i + 1] = b_.values[i]; } #endif return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip1_s8 #define vzip1_s8(a, b) simde_vzip1_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vzip1_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip1_s16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_unpacklo_pi16(a, b); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, b_.values, 0, 4, 1, 5); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[2 * i ] = a_.values[i]; r_.values[2 * i + 1] = b_.values[i]; } #endif return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip1_s16 #define vzip1_s16(a, b) simde_vzip1_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vzip1_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip1_s32(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_unpacklo_pi32(a, b); #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 0, 2); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[2 * i ] = a_.values[i]; r_.values[2 * i + 1] = b_.values[i]; } #endif return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip1_s32 #define vzip1_s32(a, b) simde_vzip1_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vzip1_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip1_u8(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_unpacklo_pi8(a, b); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, b_.values, 0, 8, 1, 9, 2, 10, 3, 11); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[2 * i ] = a_.values[i]; r_.values[2 * i + 1] = b_.values[i]; } #endif return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip1_u8 #define vzip1_u8(a, b) simde_vzip1_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vzip1_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip1_u16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_unpacklo_pi16(a, b); #else simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a), b_ = simde_uint16x4_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, b_.values, 0, 4, 1, 5); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[2 * i ] = a_.values[i]; r_.values[2 * i + 1] = b_.values[i]; } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip1_u16 #define vzip1_u16(a, b) simde_vzip1_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vzip1_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip1_u32(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_unpacklo_pi32(a, b); #else simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a), b_ = simde_uint32x2_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 0, 2); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[2 * i ] = a_.values[i]; r_.values[2 * i + 1] = b_.values[i]; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip1_u32 #define vzip1_u32(a, b) simde_vzip1_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vzip1q_f32(simde_float32x4_t a, simde_float32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip1q_f32(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v32x4_shuffle(a, b, 0, 4, 1, 5); #elif defined(SIMDE_X86_SSE_NATIVE) return _mm_unpacklo_ps(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_mergeh(a, b); #else simde_float32x4_private r_, a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 0, 4, 1, 5); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[2 * i ] = a_.values[i]; r_.values[2 * i + 1] = b_.values[i]; } #endif return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip1q_f32 #define vzip1q_f32(a, b) simde_vzip1q_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vzip1q_f64(simde_float64x2_t a, simde_float64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip1q_f64(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v64x2_shuffle(a, b, 0, 2); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpacklo_pd(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return vec_mergeh(a, b); #else simde_float64x2_private r_, a_ = simde_float64x2_to_private(a), b_ = simde_float64x2_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 0, 2); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[2 * i ] = a_.values[i]; r_.values[2 * i + 1] = b_.values[i]; } #endif return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip1q_f64 #define vzip1q_f64(a, b) simde_vzip1q_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vzip1q_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip1q_s8(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v8x16_shuffle(a, b, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpacklo_epi8(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_mergeh(a, b); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, b_.values, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[2 * i ] = a_.values[i]; r_.values[2 * i + 1] = b_.values[i]; } #endif return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip1q_s8 #define vzip1q_s8(a, b) simde_vzip1q_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vzip1q_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip1q_s16(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v16x8_shuffle(a, b, 0, 8, 1, 9, 2, 10, 3, 11); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpacklo_epi16(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_mergeh(a, b); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, b_.values, 0, 8, 1, 9, 2, 10, 3, 11); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[2 * i ] = a_.values[i]; r_.values[2 * i + 1] = b_.values[i]; } #endif return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip1q_s16 #define vzip1q_s16(a, b) simde_vzip1q_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vzip1q_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip1q_s32(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v32x4_shuffle(a, b, 0, 4, 1, 5); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpacklo_epi32(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_mergeh(a, b); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 0, 4, 1, 5); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[2 * i ] = a_.values[i]; r_.values[2 * i + 1] = b_.values[i]; } #endif return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip1q_s32 #define vzip1q_s32(a, b) simde_vzip1q_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vzip1q_s64(simde_int64x2_t a, simde_int64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip1q_s64(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v64x2_shuffle(a, b, 0, 2); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpacklo_epi64(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return vec_mergeh(a, b); #else simde_int64x2_private r_, a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 0, 2); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[2 * i ] = a_.values[i]; r_.values[2 * i + 1] = b_.values[i]; } #endif return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip1q_s64 #define vzip1q_s64(a, b) simde_vzip1q_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vzip1q_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip1q_u8(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v8x16_shuffle(a, b, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpacklo_epi8(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_mergeh(a, b); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, b_.values, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[2 * i ] = a_.values[i]; r_.values[2 * i + 1] = b_.values[i]; } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip1q_u8 #define vzip1q_u8(a, b) simde_vzip1q_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vzip1q_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip1q_u16(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v16x8_shuffle(a, b, 0, 8, 1, 9, 2, 10, 3, 11); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpacklo_epi16(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_mergeh(a, b); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, b_.values, 0, 8, 1, 9, 2, 10, 3, 11); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[2 * i ] = a_.values[i]; r_.values[2 * i + 1] = b_.values[i]; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip1q_u16 #define vzip1q_u16(a, b) simde_vzip1q_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vzip1q_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip1q_u32(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v32x4_shuffle(a, b, 0, 4, 1, 5); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpacklo_epi32(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_mergeh(a, b); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 0, 4, 1, 5); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[2 * i ] = a_.values[i]; r_.values[2 * i + 1] = b_.values[i]; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip1q_u32 #define vzip1q_u32(a, b) simde_vzip1q_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vzip1q_u64(simde_uint64x2_t a, simde_uint64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip1q_u64(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v64x2_shuffle(a, b, 0, 2); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpacklo_epi64(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return vec_mergeh(a, b); #else simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a), b_ = simde_uint64x2_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 0, 2); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[2 * i ] = a_.values[i]; r_.values[2 * i + 1] = b_.values[i]; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip1q_u64 #define vzip1q_u64(a, b) simde_vzip1q_u64((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_ZIP1_H) */ simde-0.7.2/simde/arm/neon/zip2.h000066400000000000000000000476501400333146700165120ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) */ #if !defined(SIMDE_ARM_NEON_ZIP2_H) #define SIMDE_ARM_NEON_ZIP2_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vzip2_f32(simde_float32x2_t a, simde_float32x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip2_f32(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_unpackhi_pi32(a, b); #else simde_float32x2_private r_, a_ = simde_float32x2_to_private(a), b_ = simde_float32x2_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 1, 3); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[(2 * i) ] = a_.values[halfway_point + i]; r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; } #endif return simde_float32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip2_f32 #define vzip2_f32(a, b) simde_vzip2_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x8_t simde_vzip2_s8(simde_int8x8_t a, simde_int8x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip2_s8(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_unpackhi_pi8(a, b); #else simde_int8x8_private r_, a_ = simde_int8x8_to_private(a), b_ = simde_int8x8_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, b_.values, 4, 12, 5, 13, 6, 14, 7, 15); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[(2 * i) ] = a_.values[halfway_point + i]; r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; } #endif return simde_int8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip2_s8 #define vzip2_s8(a, b) simde_vzip2_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vzip2_s16(simde_int16x4_t a, simde_int16x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip2_s16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_unpackhi_pi16(a, b); #else simde_int16x4_private r_, a_ = simde_int16x4_to_private(a), b_ = simde_int16x4_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, b_.values, 2, 6, 3, 7); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[(2 * i) ] = a_.values[halfway_point + i]; r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; } #endif return simde_int16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip2_s16 #define vzip2_s16(a, b) simde_vzip2_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vzip2_s32(simde_int32x2_t a, simde_int32x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip2_s32(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_unpackhi_pi32(a, b); #else simde_int32x2_private r_, a_ = simde_int32x2_to_private(a), b_ = simde_int32x2_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 1, 3); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[(2 * i) ] = a_.values[halfway_point + i]; r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; } #endif return simde_int32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip2_s32 #define vzip2_s32(a, b) simde_vzip2_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x8_t simde_vzip2_u8(simde_uint8x8_t a, simde_uint8x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip2_u8(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_unpackhi_pi8(a, b); #else simde_uint8x8_private r_, a_ = simde_uint8x8_to_private(a), b_ = simde_uint8x8_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, b_.values, 4, 12, 5, 13, 6, 14, 7, 15); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[(2 * i) ] = a_.values[halfway_point + i]; r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; } #endif return simde_uint8x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip2_u8 #define vzip2_u8(a, b) simde_vzip2_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x4_t simde_vzip2_u16(simde_uint16x4_t a, simde_uint16x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip2_u16(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_unpackhi_pi16(a, b); #else simde_uint16x4_private r_, a_ = simde_uint16x4_to_private(a), b_ = simde_uint16x4_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, b_.values, 2, 6, 3, 7); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[(2 * i) ] = a_.values[halfway_point + i]; r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; } #endif return simde_uint16x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip2_u16 #define vzip2_u16(a, b) simde_vzip2_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vzip2_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip2_u32(a, b); #elif defined(SIMDE_X86_MMX_NATIVE) return _mm_unpackhi_pi32(a, b); #else simde_uint32x2_private r_, a_ = simde_uint32x2_to_private(a), b_ = simde_uint32x2_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 1, 3); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[(2 * i) ] = a_.values[halfway_point + i]; r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; } #endif return simde_uint32x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip2_u32 #define vzip2_u32(a, b) simde_vzip2_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vzip2q_f32(simde_float32x4_t a, simde_float32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip2q_f32(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v32x4_shuffle(a, b, 2, 6, 3, 7); #elif defined(SIMDE_X86_SSE_NATIVE) return _mm_unpackhi_ps(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_mergel(a, b); #else simde_float32x4_private r_, a_ = simde_float32x4_to_private(a), b_ = simde_float32x4_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 2, 6, 3, 7); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[(2 * i) ] = a_.values[halfway_point + i]; r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; } #endif return simde_float32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip2q_f32 #define vzip2q_f32(a, b) simde_vzip2q_f32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64x2_t simde_vzip2q_f64(simde_float64x2_t a, simde_float64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip2q_f64(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v64x2_shuffle(a, b, 1, 3); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpackhi_pd(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return vec_mergel(a, b); #else simde_float64x2_private r_, a_ = simde_float64x2_to_private(a), b_ = simde_float64x2_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 1, 3); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[(2 * i) ] = a_.values[halfway_point + i]; r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; } #endif return simde_float64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip2q_f64 #define vzip2q_f64(a, b) simde_vzip2q_f64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int8x16_t simde_vzip2q_s8(simde_int8x16_t a, simde_int8x16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip2q_s8(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v8x16_shuffle(a, b, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpackhi_epi8(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_mergel(a, b); #else simde_int8x16_private r_, a_ = simde_int8x16_to_private(a), b_ = simde_int8x16_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, b_.values, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[(2 * i) ] = a_.values[halfway_point + i]; r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; } #endif return simde_int8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip2q_s8 #define vzip2q_s8(a, b) simde_vzip2q_s8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int16x8_t simde_vzip2q_s16(simde_int16x8_t a, simde_int16x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip2q_s16(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v16x8_shuffle(a, b, 4, 12, 5, 13, 6, 14, 7, 15); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpackhi_epi16(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_mergel(a, b); #else simde_int16x8_private r_, a_ = simde_int16x8_to_private(a), b_ = simde_int16x8_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, b_.values, 4, 12, 5, 13, 6, 14, 7, 15); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[(2 * i) ] = a_.values[halfway_point + i]; r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; } #endif return simde_int16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip2q_s16 #define vzip2q_s16(a, b) simde_vzip2q_s16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vzip2q_s32(simde_int32x4_t a, simde_int32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip2q_s32(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v32x4_shuffle(a, b, 2, 6, 3, 7); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpackhi_epi32(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_mergel(a, b); #else simde_int32x4_private r_, a_ = simde_int32x4_to_private(a), b_ = simde_int32x4_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 2, 6, 3, 7); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[(2 * i) ] = a_.values[halfway_point + i]; r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; } #endif return simde_int32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip2q_s32 #define vzip2q_s32(a, b) simde_vzip2q_s32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vzip2q_s64(simde_int64x2_t a, simde_int64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip2q_s64(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v64x2_shuffle(a, b, 1, 3); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpackhi_epi64(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return vec_mergel(a, b); #else simde_int64x2_private r_, a_ = simde_int64x2_to_private(a), b_ = simde_int64x2_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 1, 3); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[(2 * i) ] = a_.values[halfway_point + i]; r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; } #endif return simde_int64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip2q_s64 #define vzip2q_s64(a, b) simde_vzip2q_s64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16_t simde_vzip2q_u8(simde_uint8x16_t a, simde_uint8x16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip2q_u8(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v8x16_shuffle(a, b, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpackhi_epi8(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_mergel(a, b); #else simde_uint8x16_private r_, a_ = simde_uint8x16_to_private(a), b_ = simde_uint8x16_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, b_.values, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[(2 * i) ] = a_.values[halfway_point + i]; r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; } #endif return simde_uint8x16_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip2q_u8 #define vzip2q_u8(a, b) simde_vzip2q_u8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8_t simde_vzip2q_u16(simde_uint16x8_t a, simde_uint16x8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip2q_u16(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v16x8_shuffle(a, b, 4, 12, 5, 13, 6, 14, 7, 15); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpackhi_epi16(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_mergel(a, b); #else simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a), b_ = simde_uint16x8_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, b_.values, 4, 12, 5, 13, 6, 14, 7, 15); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[(2 * i) ] = a_.values[halfway_point + i]; r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; } #endif return simde_uint16x8_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip2q_u16 #define vzip2q_u16(a, b) simde_vzip2q_u16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vzip2q_u32(simde_uint32x4_t a, simde_uint32x4_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip2q_u32(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v32x4_shuffle(a, b, 2, 6, 3, 7); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpackhi_epi32(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_mergel(a, b); #else simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a), b_ = simde_uint32x4_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 2, 6, 3, 7); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[(2 * i) ] = a_.values[halfway_point + i]; r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; } #endif return simde_uint32x4_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip2q_u32 #define vzip2q_u32(a, b) simde_vzip2q_u32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vzip2q_u64(simde_uint64x2_t a, simde_uint64x2_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vzip2q_u64(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v64x2_shuffle(a, b, 1, 3); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpackhi_epi64(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return vec_mergel(a, b); #else simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a), b_ = simde_uint64x2_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 1, 3); #else const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; SIMDE_VECTORIZE for (size_t i = 0 ; i < halfway_point ; i++) { r_.values[(2 * i) ] = a_.values[halfway_point + i]; r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; } #endif return simde_uint64x2_from_private(r_); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vzip2q_u64 #define vzip2q_u64(a, b) simde_vzip2q_u64((a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_ARM_NEON_ZIP2_H) */ simde-0.7.2/simde/check.h000066400000000000000000000231661400333146700151610ustar00rootroot00000000000000/* Check (assertions) * Portable Snippets - https://gitub.com/nemequ/portable-snippets * Created by Evan Nemerson * * To the extent possible under law, the authors have waived all * copyright and related or neighboring rights to this code. For * details, see the Creative Commons Zero 1.0 Universal license at * https://creativecommons.org/publicdomain/zero/1.0/ * * SPDX-License-Identifier: CC0-1.0 */ #if !defined(SIMDE_CHECK_H) #define SIMDE_CHECK_H #if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) # define SIMDE_NDEBUG 1 #endif #include "hedley.h" #include "simde-diagnostic.h" #include #if !defined(_WIN32) # define SIMDE_SIZE_MODIFIER "z" # define SIMDE_CHAR_MODIFIER "hh" # define SIMDE_SHORT_MODIFIER "h" #else # if defined(_M_X64) || defined(__amd64__) # define SIMDE_SIZE_MODIFIER "I64" # else # define SIMDE_SIZE_MODIFIER "" # endif # define SIMDE_CHAR_MODIFIER "" # define SIMDE_SHORT_MODIFIER "" #endif #if defined(_MSC_VER) && (_MSC_VER >= 1500) # define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) # define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) #else # define SIMDE_PUSH_DISABLE_MSVC_C4127_ # define SIMDE_POP_DISABLE_MSVC_C4127_ #endif #if !defined(simde_errorf) # if defined(__has_include) # if __has_include() # include # endif # elif defined(SIMDE_STDC_HOSTED) # if SIMDE_STDC_HOSTED == 1 # include # endif # elif defined(__STDC_HOSTED__) # if __STDC_HOSTETD__ == 1 # include # endif # endif # include "debug-trap.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ # if defined(EOF) # define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) # else # define simde_errorf(format, ...) (simde_trap()) # endif HEDLEY_DIAGNOSTIC_POP #endif #define simde_error(msg) simde_errorf("%s", msg) #if defined(SIMDE_NDEBUG) || \ (defined(__cplusplus) && (__cplusplus < 201103L)) || \ (defined(__STDC__) && (__STDC__ < 199901L)) # if defined(SIMDE_CHECK_FAIL_DEFINED) # define simde_assert(expr) # else # if defined(HEDLEY_ASSUME) # define simde_assert(expr) HEDLEY_ASSUME(expr) # elif HEDLEY_GCC_VERSION_CHECK(4,5,0) # define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) # elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) # define simde_assert(expr) __assume(expr) # else # define simde_assert(expr) # endif # endif # define simde_assert_true(expr) simde_assert(expr) # define simde_assert_false(expr) simde_assert(!(expr)) # define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) # define simde_assert_double_equal(a, b, precision) # define simde_assert_string_equal(a, b) # define simde_assert_string_not_equal(a, b) # define simde_assert_memory_equal(size, a, b) # define simde_assert_memory_not_equal(size, a, b) #else # define simde_assert(expr) \ do { \ if (!HEDLEY_LIKELY(expr)) { \ simde_error("assertion failed: " #expr "\n"); \ } \ SIMDE_PUSH_DISABLE_MSVC_C4127_ \ } while (0) \ SIMDE_POP_DISABLE_MSVC_C4127_ # define simde_assert_true(expr) \ do { \ if (!HEDLEY_LIKELY(expr)) { \ simde_error("assertion failed: " #expr " is not true\n"); \ } \ SIMDE_PUSH_DISABLE_MSVC_C4127_ \ } while (0) \ SIMDE_POP_DISABLE_MSVC_C4127_ # define simde_assert_false(expr) \ do { \ if (!HEDLEY_LIKELY(!(expr))) { \ simde_error("assertion failed: " #expr " is not false\n"); \ } \ SIMDE_PUSH_DISABLE_MSVC_C4127_ \ } while (0) \ SIMDE_POP_DISABLE_MSVC_C4127_ # define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ do { \ T simde_tmp_a_ = (a); \ T simde_tmp_b_ = (b); \ if (!(simde_tmp_a_ op simde_tmp_b_)) { \ simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ } \ SIMDE_PUSH_DISABLE_MSVC_C4127_ \ } while (0) \ SIMDE_POP_DISABLE_MSVC_C4127_ # define simde_assert_double_equal(a, b, precision) \ do { \ const double simde_tmp_a_ = (a); \ const double simde_tmp_b_ = (b); \ const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ -(simde_tmp_a_ - simde_tmp_b_) : \ (simde_tmp_a_ - simde_tmp_b_); \ if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ #a, #b, simde_tmp_a_, simde_tmp_b_); \ } \ SIMDE_PUSH_DISABLE_MSVC_C4127_ \ } while (0) \ SIMDE_POP_DISABLE_MSVC_C4127_ # include # define simde_assert_string_equal(a, b) \ do { \ const char* simde_tmp_a_ = a; \ const char* simde_tmp_b_ = b; \ if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ #a, #b, simde_tmp_a_, simde_tmp_b_); \ } \ SIMDE_PUSH_DISABLE_MSVC_C4127_ \ } while (0) \ SIMDE_POP_DISABLE_MSVC_C4127_ # define simde_assert_string_not_equal(a, b) \ do { \ const char* simde_tmp_a_ = a; \ const char* simde_tmp_b_ = b; \ if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ #a, #b, simde_tmp_a_, simde_tmp_b_); \ } \ SIMDE_PUSH_DISABLE_MSVC_C4127_ \ } while (0) \ SIMDE_POP_DISABLE_MSVC_C4127_ # define simde_assert_memory_equal(size, a, b) \ do { \ const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ const size_t simde_tmp_size_ = (size); \ if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ size_t simde_tmp_pos_; \ for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ #a, #b, simde_tmp_pos_); \ break; \ } \ } \ } \ SIMDE_PUSH_DISABLE_MSVC_C4127_ \ } while (0) \ SIMDE_POP_DISABLE_MSVC_C4127_ # define simde_assert_memory_not_equal(size, a, b) \ do { \ const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ const size_t simde_tmp_size_ = (size); \ if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ #a, #b, simde_tmp_size_); \ } \ SIMDE_PUSH_DISABLE_MSVC_C4127_ \ } while (0) \ SIMDE_POP_DISABLE_MSVC_C4127_ #endif #define simde_assert_type(T, fmt, a, op, b) \ simde_assert_type_full("", "", T, fmt, a, op, b) #define simde_assert_char(a, op, b) \ simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) #define simde_assert_uchar(a, op, b) \ simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) #define simde_assert_short(a, op, b) \ simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) #define simde_assert_ushort(a, op, b) \ simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) #define simde_assert_int(a, op, b) \ simde_assert_type(int, "d", a, op, b) #define simde_assert_uint(a, op, b) \ simde_assert_type(unsigned int, "u", a, op, b) #define simde_assert_long(a, op, b) \ simde_assert_type(long int, "ld", a, op, b) #define simde_assert_ulong(a, op, b) \ simde_assert_type(unsigned long int, "lu", a, op, b) #define simde_assert_llong(a, op, b) \ simde_assert_type(long long int, "lld", a, op, b) #define simde_assert_ullong(a, op, b) \ simde_assert_type(unsigned long long int, "llu", a, op, b) #define simde_assert_size(a, op, b) \ simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) #define simde_assert_float(a, op, b) \ simde_assert_type(float, "f", a, op, b) #define simde_assert_double(a, op, b) \ simde_assert_type(double, "g", a, op, b) #define simde_assert_ptr(a, op, b) \ simde_assert_type(const void*, "p", a, op, b) #define simde_assert_int8(a, op, b) \ simde_assert_type(int8_t, PRIi8, a, op, b) #define simde_assert_uint8(a, op, b) \ simde_assert_type(uint8_t, PRIu8, a, op, b) #define simde_assert_int16(a, op, b) \ simde_assert_type(int16_t, PRIi16, a, op, b) #define simde_assert_uint16(a, op, b) \ simde_assert_type(uint16_t, PRIu16, a, op, b) #define simde_assert_int32(a, op, b) \ simde_assert_type(int32_t, PRIi32, a, op, b) #define simde_assert_uint32(a, op, b) \ simde_assert_type(uint32_t, PRIu32, a, op, b) #define simde_assert_int64(a, op, b) \ simde_assert_type(int64_t, PRIi64, a, op, b) #define simde_assert_uint64(a, op, b) \ simde_assert_type(uint64_t, PRIu64, a, op, b) #define simde_assert_ptr_equal(a, b) \ simde_assert_ptr(a, ==, b) #define simde_assert_ptr_not_equal(a, b) \ simde_assert_ptr(a, !=, b) #define simde_assert_null(ptr) \ simde_assert_ptr(ptr, ==, NULL) #define simde_assert_not_null(ptr) \ simde_assert_ptr(ptr, !=, NULL) #define simde_assert_ptr_null(ptr) \ simde_assert_ptr(ptr, ==, NULL) #define simde_assert_ptr_not_null(ptr) \ simde_assert_ptr(ptr, !=, NULL) #endif /* !defined(SIMDE_CHECK_H) */ simde-0.7.2/simde/debug-trap.h000066400000000000000000000060571400333146700161360ustar00rootroot00000000000000/* Debugging assertions and traps * Portable Snippets - https://gitub.com/nemequ/portable-snippets * Created by Evan Nemerson * * To the extent possible under law, the authors have waived all * copyright and related or neighboring rights to this code. For * details, see the Creative Commons Zero 1.0 Universal license at * https://creativecommons.org/publicdomain/zero/1.0/ * * SPDX-License-Identifier: CC0-1.0 */ #if !defined(SIMDE_DEBUG_TRAP_H) #define SIMDE_DEBUG_TRAP_H #if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) # define SIMDE_NDEBUG 1 #endif #if defined(__has_builtin) && !defined(__ibmxl__) # if __has_builtin(__builtin_debugtrap) # define simde_trap() __builtin_debugtrap() # elif __has_builtin(__debugbreak) # define simde_trap() __debugbreak() # endif #endif #if !defined(simde_trap) # if defined(_MSC_VER) || defined(__INTEL_COMPILER) # define simde_trap() __debugbreak() # elif defined(__ARMCC_VERSION) # define simde_trap() __breakpoint(42) # elif defined(__ibmxl__) || defined(__xlC__) # include # define simde_trap() __trap(42) # elif defined(__DMC__) && defined(_M_IX86) static inline void simde_trap(void) { __asm int 3h; } # elif defined(__i386__) || defined(__x86_64__) static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } # elif defined(__thumb__) static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } # elif defined(__aarch64__) static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } # elif defined(__arm__) static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } # elif defined (__alpha__) && !defined(__osf__) static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } # elif defined(_54_) static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } # elif defined(_55_) static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } # elif defined(_64P_) static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } # elif defined(_6x_) static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } # elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) # define simde_trap() __builtin_trap() # else # include # if defined(SIGTRAP) # define simde_trap() raise(SIGTRAP) # else # define simde_trap() raise(SIGABRT) # endif # endif #endif #if defined(HEDLEY_LIKELY) # define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) #elif defined(__GNUC__) && (__GNUC__ >= 3) # define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) #else # define SIMDE_DBG_LIKELY(expr) (!!(expr)) #endif #if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) # define simde_dbg_assert(expr) do { \ if (!SIMDE_DBG_LIKELY(expr)) { \ simde_trap(); \ } \ } while (0) #else # define simde_dbg_assert(expr) #endif #endif /* !defined(SIMDE_DEBUG_TRAP_H) */ simde-0.7.2/simde/hedley.h000066400000000000000000002207051400333146700153540ustar00rootroot00000000000000/* Hedley - https://nemequ.github.io/hedley * Created by Evan Nemerson * * To the extent possible under law, the author(s) have dedicated all * copyright and related and neighboring rights to this software to * the public domain worldwide. This software is distributed without * any warranty. * * For details, see . * SPDX-License-Identifier: CC0-1.0 */ #if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 14) #if defined(HEDLEY_VERSION) # undef HEDLEY_VERSION #endif #define HEDLEY_VERSION 14 #if defined(HEDLEY_STRINGIFY_EX) # undef HEDLEY_STRINGIFY_EX #endif #define HEDLEY_STRINGIFY_EX(x) #x #if defined(HEDLEY_STRINGIFY) # undef HEDLEY_STRINGIFY #endif #define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) #if defined(HEDLEY_CONCAT_EX) # undef HEDLEY_CONCAT_EX #endif #define HEDLEY_CONCAT_EX(a,b) a##b #if defined(HEDLEY_CONCAT) # undef HEDLEY_CONCAT #endif #define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) #if defined(HEDLEY_CONCAT3_EX) # undef HEDLEY_CONCAT3_EX #endif #define HEDLEY_CONCAT3_EX(a,b,c) a##b##c #if defined(HEDLEY_CONCAT3) # undef HEDLEY_CONCAT3 #endif #define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) #if defined(HEDLEY_VERSION_ENCODE) # undef HEDLEY_VERSION_ENCODE #endif #define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) #if defined(HEDLEY_VERSION_DECODE_MAJOR) # undef HEDLEY_VERSION_DECODE_MAJOR #endif #define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) #if defined(HEDLEY_VERSION_DECODE_MINOR) # undef HEDLEY_VERSION_DECODE_MINOR #endif #define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) #if defined(HEDLEY_VERSION_DECODE_REVISION) # undef HEDLEY_VERSION_DECODE_REVISION #endif #define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) #if defined(HEDLEY_GNUC_VERSION) # undef HEDLEY_GNUC_VERSION #endif #if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) # define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) #elif defined(__GNUC__) # define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) #endif #if defined(HEDLEY_GNUC_VERSION_CHECK) # undef HEDLEY_GNUC_VERSION_CHECK #endif #if defined(HEDLEY_GNUC_VERSION) # define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_MSVC_VERSION) # undef HEDLEY_MSVC_VERSION #endif #if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) # define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) #elif defined(_MSC_FULL_VER) && !defined(__ICL) # define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) #elif defined(_MSC_VER) && !defined(__ICL) # define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) #endif #if defined(HEDLEY_MSVC_VERSION_CHECK) # undef HEDLEY_MSVC_VERSION_CHECK #endif #if !defined(HEDLEY_MSVC_VERSION) # define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) #elif defined(_MSC_VER) && (_MSC_VER >= 1400) # define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) #elif defined(_MSC_VER) && (_MSC_VER >= 1200) # define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) #else # define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) #endif #if defined(HEDLEY_INTEL_VERSION) # undef HEDLEY_INTEL_VERSION #endif #if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) # define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) #elif defined(__INTEL_COMPILER) && !defined(__ICL) # define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) #endif #if defined(HEDLEY_INTEL_VERSION_CHECK) # undef HEDLEY_INTEL_VERSION_CHECK #endif #if defined(HEDLEY_INTEL_VERSION) # define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_INTEL_CL_VERSION) # undef HEDLEY_INTEL_CL_VERSION #endif #if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) # define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) #endif #if defined(HEDLEY_INTEL_CL_VERSION_CHECK) # undef HEDLEY_INTEL_CL_VERSION_CHECK #endif #if defined(HEDLEY_INTEL_CL_VERSION) # define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_PGI_VERSION) # undef HEDLEY_PGI_VERSION #endif #if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) # define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) #endif #if defined(HEDLEY_PGI_VERSION_CHECK) # undef HEDLEY_PGI_VERSION_CHECK #endif #if defined(HEDLEY_PGI_VERSION) # define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_SUNPRO_VERSION) # undef HEDLEY_SUNPRO_VERSION #endif #if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) # define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) #elif defined(__SUNPRO_C) # define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) #elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) # define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) #elif defined(__SUNPRO_CC) # define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) #endif #if defined(HEDLEY_SUNPRO_VERSION_CHECK) # undef HEDLEY_SUNPRO_VERSION_CHECK #endif #if defined(HEDLEY_SUNPRO_VERSION) # define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_EMSCRIPTEN_VERSION) # undef HEDLEY_EMSCRIPTEN_VERSION #endif #if defined(__EMSCRIPTEN__) # define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) #endif #if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) # undef HEDLEY_EMSCRIPTEN_VERSION_CHECK #endif #if defined(HEDLEY_EMSCRIPTEN_VERSION) # define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_ARM_VERSION) # undef HEDLEY_ARM_VERSION #endif #if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) # define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) #elif defined(__CC_ARM) && defined(__ARMCC_VERSION) # define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) #endif #if defined(HEDLEY_ARM_VERSION_CHECK) # undef HEDLEY_ARM_VERSION_CHECK #endif #if defined(HEDLEY_ARM_VERSION) # define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_IBM_VERSION) # undef HEDLEY_IBM_VERSION #endif #if defined(__ibmxl__) # define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) #elif defined(__xlC__) && defined(__xlC_ver__) # define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) #elif defined(__xlC__) # define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) #endif #if defined(HEDLEY_IBM_VERSION_CHECK) # undef HEDLEY_IBM_VERSION_CHECK #endif #if defined(HEDLEY_IBM_VERSION) # define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_TI_VERSION) # undef HEDLEY_TI_VERSION #endif #if \ defined(__TI_COMPILER_VERSION__) && \ ( \ defined(__TMS470__) || defined(__TI_ARM__) || \ defined(__MSP430__) || \ defined(__TMS320C2000__) \ ) # if (__TI_COMPILER_VERSION__ >= 16000000) # define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) # endif #endif #if defined(HEDLEY_TI_VERSION_CHECK) # undef HEDLEY_TI_VERSION_CHECK #endif #if defined(HEDLEY_TI_VERSION) # define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_TI_CL2000_VERSION) # undef HEDLEY_TI_CL2000_VERSION #endif #if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) # define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) #endif #if defined(HEDLEY_TI_CL2000_VERSION_CHECK) # undef HEDLEY_TI_CL2000_VERSION_CHECK #endif #if defined(HEDLEY_TI_CL2000_VERSION) # define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_TI_CL430_VERSION) # undef HEDLEY_TI_CL430_VERSION #endif #if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) # define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) #endif #if defined(HEDLEY_TI_CL430_VERSION_CHECK) # undef HEDLEY_TI_CL430_VERSION_CHECK #endif #if defined(HEDLEY_TI_CL430_VERSION) # define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_TI_ARMCL_VERSION) # undef HEDLEY_TI_ARMCL_VERSION #endif #if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) # define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) #endif #if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) # undef HEDLEY_TI_ARMCL_VERSION_CHECK #endif #if defined(HEDLEY_TI_ARMCL_VERSION) # define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_TI_CL6X_VERSION) # undef HEDLEY_TI_CL6X_VERSION #endif #if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) # define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) #endif #if defined(HEDLEY_TI_CL6X_VERSION_CHECK) # undef HEDLEY_TI_CL6X_VERSION_CHECK #endif #if defined(HEDLEY_TI_CL6X_VERSION) # define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_TI_CL7X_VERSION) # undef HEDLEY_TI_CL7X_VERSION #endif #if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) # define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) #endif #if defined(HEDLEY_TI_CL7X_VERSION_CHECK) # undef HEDLEY_TI_CL7X_VERSION_CHECK #endif #if defined(HEDLEY_TI_CL7X_VERSION) # define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_TI_CLPRU_VERSION) # undef HEDLEY_TI_CLPRU_VERSION #endif #if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) # define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) #endif #if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) # undef HEDLEY_TI_CLPRU_VERSION_CHECK #endif #if defined(HEDLEY_TI_CLPRU_VERSION) # define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_CRAY_VERSION) # undef HEDLEY_CRAY_VERSION #endif #if defined(_CRAYC) # if defined(_RELEASE_PATCHLEVEL) # define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) # else # define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) # endif #endif #if defined(HEDLEY_CRAY_VERSION_CHECK) # undef HEDLEY_CRAY_VERSION_CHECK #endif #if defined(HEDLEY_CRAY_VERSION) # define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_IAR_VERSION) # undef HEDLEY_IAR_VERSION #endif #if defined(__IAR_SYSTEMS_ICC__) # if __VER__ > 1000 # define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) # else # define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(VER / 100, __VER__ % 100, 0) # endif #endif #if defined(HEDLEY_IAR_VERSION_CHECK) # undef HEDLEY_IAR_VERSION_CHECK #endif #if defined(HEDLEY_IAR_VERSION) # define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_TINYC_VERSION) # undef HEDLEY_TINYC_VERSION #endif #if defined(__TINYC__) # define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) #endif #if defined(HEDLEY_TINYC_VERSION_CHECK) # undef HEDLEY_TINYC_VERSION_CHECK #endif #if defined(HEDLEY_TINYC_VERSION) # define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_DMC_VERSION) # undef HEDLEY_DMC_VERSION #endif #if defined(__DMC__) # define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) #endif #if defined(HEDLEY_DMC_VERSION_CHECK) # undef HEDLEY_DMC_VERSION_CHECK #endif #if defined(HEDLEY_DMC_VERSION) # define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_COMPCERT_VERSION) # undef HEDLEY_COMPCERT_VERSION #endif #if defined(__COMPCERT_VERSION__) # define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) #endif #if defined(HEDLEY_COMPCERT_VERSION_CHECK) # undef HEDLEY_COMPCERT_VERSION_CHECK #endif #if defined(HEDLEY_COMPCERT_VERSION) # define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_PELLES_VERSION) # undef HEDLEY_PELLES_VERSION #endif #if defined(__POCC__) # define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) #endif #if defined(HEDLEY_PELLES_VERSION_CHECK) # undef HEDLEY_PELLES_VERSION_CHECK #endif #if defined(HEDLEY_PELLES_VERSION) # define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_GCC_VERSION) # undef HEDLEY_GCC_VERSION #endif #if \ defined(HEDLEY_GNUC_VERSION) && \ !defined(__clang__) && \ !defined(HEDLEY_INTEL_VERSION) && \ !defined(HEDLEY_PGI_VERSION) && \ !defined(HEDLEY_ARM_VERSION) && \ !defined(HEDLEY_TI_VERSION) && \ !defined(HEDLEY_TI_ARMCL_VERSION) && \ !defined(HEDLEY_TI_CL430_VERSION) && \ !defined(HEDLEY_TI_CL2000_VERSION) && \ !defined(HEDLEY_TI_CL6X_VERSION) && \ !defined(HEDLEY_TI_CL7X_VERSION) && \ !defined(HEDLEY_TI_CLPRU_VERSION) && \ !defined(__COMPCERT__) # define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION #endif #if defined(HEDLEY_GCC_VERSION_CHECK) # undef HEDLEY_GCC_VERSION_CHECK #endif #if defined(HEDLEY_GCC_VERSION) # define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_HAS_ATTRIBUTE) # undef HEDLEY_HAS_ATTRIBUTE #endif #if defined(__has_attribute) # define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) #else # define HEDLEY_HAS_ATTRIBUTE(attribute) (0) #endif #if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) # undef HEDLEY_GNUC_HAS_ATTRIBUTE #endif #if defined(__has_attribute) # define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) __has_attribute(attribute) #else # define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) #endif #if defined(HEDLEY_GCC_HAS_ATTRIBUTE) # undef HEDLEY_GCC_HAS_ATTRIBUTE #endif #if defined(__has_attribute) # define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) __has_attribute(attribute) #else # define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) #endif #if defined(HEDLEY_HAS_CPP_ATTRIBUTE) # undef HEDLEY_HAS_CPP_ATTRIBUTE #endif #if \ defined(__has_cpp_attribute) && \ defined(__cplusplus) && \ (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) # define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) #else # define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) #endif #if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) # undef HEDLEY_HAS_CPP_ATTRIBUTE_NS #endif #if !defined(__cplusplus) || !defined(__has_cpp_attribute) # define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) #elif \ !defined(HEDLEY_PGI_VERSION) && \ !defined(HEDLEY_IAR_VERSION) && \ (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) # define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) #else # define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) #endif #if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) # undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE #endif #if defined(__has_cpp_attribute) && defined(__cplusplus) # define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) #else # define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) #endif #if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) # undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE #endif #if defined(__has_cpp_attribute) && defined(__cplusplus) # define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) #else # define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) #endif #if defined(HEDLEY_HAS_BUILTIN) # undef HEDLEY_HAS_BUILTIN #endif #if defined(__has_builtin) # define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) #else # define HEDLEY_HAS_BUILTIN(builtin) (0) #endif #if defined(HEDLEY_GNUC_HAS_BUILTIN) # undef HEDLEY_GNUC_HAS_BUILTIN #endif #if defined(__has_builtin) # define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) #else # define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) #endif #if defined(HEDLEY_GCC_HAS_BUILTIN) # undef HEDLEY_GCC_HAS_BUILTIN #endif #if defined(__has_builtin) # define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) #else # define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) #endif #if defined(HEDLEY_HAS_FEATURE) # undef HEDLEY_HAS_FEATURE #endif #if defined(__has_feature) # define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) #else # define HEDLEY_HAS_FEATURE(feature) (0) #endif #if defined(HEDLEY_GNUC_HAS_FEATURE) # undef HEDLEY_GNUC_HAS_FEATURE #endif #if defined(__has_feature) # define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) #else # define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) #endif #if defined(HEDLEY_GCC_HAS_FEATURE) # undef HEDLEY_GCC_HAS_FEATURE #endif #if defined(__has_feature) # define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) #else # define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) #endif #if defined(HEDLEY_HAS_EXTENSION) # undef HEDLEY_HAS_EXTENSION #endif #if defined(__has_extension) # define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) #else # define HEDLEY_HAS_EXTENSION(extension) (0) #endif #if defined(HEDLEY_GNUC_HAS_EXTENSION) # undef HEDLEY_GNUC_HAS_EXTENSION #endif #if defined(__has_extension) # define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) #else # define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) #endif #if defined(HEDLEY_GCC_HAS_EXTENSION) # undef HEDLEY_GCC_HAS_EXTENSION #endif #if defined(__has_extension) # define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) #else # define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) #endif #if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) # undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE #endif #if defined(__has_declspec_attribute) # define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) #else # define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) #endif #if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) # undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE #endif #if defined(__has_declspec_attribute) # define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) #else # define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) #endif #if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) # undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE #endif #if defined(__has_declspec_attribute) # define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) #else # define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) #endif #if defined(HEDLEY_HAS_WARNING) # undef HEDLEY_HAS_WARNING #endif #if defined(__has_warning) # define HEDLEY_HAS_WARNING(warning) __has_warning(warning) #else # define HEDLEY_HAS_WARNING(warning) (0) #endif #if defined(HEDLEY_GNUC_HAS_WARNING) # undef HEDLEY_GNUC_HAS_WARNING #endif #if defined(__has_warning) # define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) #else # define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) #endif #if defined(HEDLEY_GCC_HAS_WARNING) # undef HEDLEY_GCC_HAS_WARNING #endif #if defined(__has_warning) # define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) #else # define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) #endif #if \ (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ defined(__clang__) || \ HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_TI_VERSION_CHECK(15,12,0) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) # define HEDLEY_PRAGMA(value) _Pragma(#value) #elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) # define HEDLEY_PRAGMA(value) __pragma(value) #else # define HEDLEY_PRAGMA(value) #endif #if defined(HEDLEY_DIAGNOSTIC_PUSH) # undef HEDLEY_DIAGNOSTIC_PUSH #endif #if defined(HEDLEY_DIAGNOSTIC_POP) # undef HEDLEY_DIAGNOSTIC_POP #endif #if defined(__clang__) # define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") # define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") #elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) # define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") # define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") #elif HEDLEY_GCC_VERSION_CHECK(4,6,0) # define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") # define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") #elif \ HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) # define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) # define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) #elif HEDLEY_ARM_VERSION_CHECK(5,6,0) # define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") # define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") #elif \ HEDLEY_TI_VERSION_CHECK(15,12,0) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) # define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") # define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") #elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) # define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") # define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") #else # define HEDLEY_DIAGNOSTIC_PUSH # define HEDLEY_DIAGNOSTIC_POP #endif /* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ #if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) # undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ #endif #if defined(__cplusplus) # if HEDLEY_HAS_WARNING("-Wc++98-compat") # if HEDLEY_HAS_WARNING("-Wc++17-extensions") # if HEDLEY_HAS_WARNING("-Wc++1z-extensions") # define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ HEDLEY_DIAGNOSTIC_PUSH \ _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ xpr \ HEDLEY_DIAGNOSTIC_POP # else # define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ HEDLEY_DIAGNOSTIC_PUSH \ _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ xpr \ HEDLEY_DIAGNOSTIC_POP # endif # else # define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ HEDLEY_DIAGNOSTIC_PUSH \ _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ xpr \ HEDLEY_DIAGNOSTIC_POP # endif # endif #endif #if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) # define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x #endif #if defined(HEDLEY_CONST_CAST) # undef HEDLEY_CONST_CAST #endif #if defined(__cplusplus) # define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) #elif \ HEDLEY_HAS_WARNING("-Wcast-qual") || \ HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) # define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ HEDLEY_DIAGNOSTIC_PUSH \ HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ ((T) (expr)); \ HEDLEY_DIAGNOSTIC_POP \ })) #else # define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) #endif #if defined(HEDLEY_REINTERPRET_CAST) # undef HEDLEY_REINTERPRET_CAST #endif #if defined(__cplusplus) # define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) #else # define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) #endif #if defined(HEDLEY_STATIC_CAST) # undef HEDLEY_STATIC_CAST #endif #if defined(__cplusplus) # define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) #else # define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) #endif #if defined(HEDLEY_CPP_CAST) # undef HEDLEY_CPP_CAST #endif #if defined(__cplusplus) # if HEDLEY_HAS_WARNING("-Wold-style-cast") # define HEDLEY_CPP_CAST(T, expr) \ HEDLEY_DIAGNOSTIC_PUSH \ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ ((T) (expr)) \ HEDLEY_DIAGNOSTIC_POP # elif HEDLEY_IAR_VERSION_CHECK(8,3,0) # define HEDLEY_CPP_CAST(T, expr) \ HEDLEY_DIAGNOSTIC_PUSH \ _Pragma("diag_suppress=Pe137") \ HEDLEY_DIAGNOSTIC_POP # else # define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) # endif #else # define HEDLEY_CPP_CAST(T, expr) (expr) #endif #if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) # undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED #endif #if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") # define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") #elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) # define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") #elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) # define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) #elif HEDLEY_PGI_VERSION_CHECK(20,7,0) # define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") #elif HEDLEY_PGI_VERSION_CHECK(17,10,0) # define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") #elif HEDLEY_GCC_VERSION_CHECK(4,3,0) # define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") #elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) # define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) #elif \ HEDLEY_TI_VERSION_CHECK(15,12,0) || \ (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) # define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") #elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) # define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") #elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) # define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") #elif HEDLEY_IAR_VERSION_CHECK(8,0,0) # define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") #elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) # define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") #else # define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED #endif #if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) # undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS #endif #if HEDLEY_HAS_WARNING("-Wunknown-pragmas") # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") #elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") #elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) #elif HEDLEY_PGI_VERSION_CHECK(17,10,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") #elif HEDLEY_GCC_VERSION_CHECK(4,3,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") #elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) #elif \ HEDLEY_TI_VERSION_CHECK(16,9,0) || \ HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") #elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") #elif HEDLEY_IAR_VERSION_CHECK(8,0,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") #else # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS #endif #if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) # undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES #endif #if HEDLEY_HAS_WARNING("-Wunknown-attributes") # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") #elif HEDLEY_GCC_VERSION_CHECK(4,6,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") #elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") #elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) #elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) #elif HEDLEY_PGI_VERSION_CHECK(20,7,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") #elif HEDLEY_PGI_VERSION_CHECK(17,10,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") #elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") #elif \ HEDLEY_TI_VERSION_CHECK(18,1,0) || \ HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") #elif HEDLEY_IAR_VERSION_CHECK(8,0,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") #else # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES #endif #if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) # undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL #endif #if HEDLEY_HAS_WARNING("-Wcast-qual") # define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") #elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) # define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") #elif HEDLEY_GCC_VERSION_CHECK(3,0,0) # define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") #else # define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL #endif #if defined(HEDLEY_DEPRECATED) # undef HEDLEY_DEPRECATED #endif #if defined(HEDLEY_DEPRECATED_FOR) # undef HEDLEY_DEPRECATED_FOR #endif #if \ HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) # define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) # define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) #elif \ HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) || \ HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ HEDLEY_TI_VERSION_CHECK(18,1,0) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) # define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) # define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) #elif defined(__cplusplus) && (__cplusplus >= 201402L) # define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) # define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) #elif \ HEDLEY_HAS_ATTRIBUTE(deprecated) || \ HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_TI_VERSION_CHECK(15,12,0) || \ (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) # define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) # define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) #elif \ HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) # define HEDLEY_DEPRECATED(since) __declspec(deprecated) # define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) #elif HEDLEY_IAR_VERSION_CHECK(8,0,0) # define HEDLEY_DEPRECATED(since) _Pragma("deprecated") # define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") #else # define HEDLEY_DEPRECATED(since) # define HEDLEY_DEPRECATED_FOR(since, replacement) #endif #if defined(HEDLEY_UNAVAILABLE) # undef HEDLEY_UNAVAILABLE #endif #if \ HEDLEY_HAS_ATTRIBUTE(warning) || \ HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) # define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) #else # define HEDLEY_UNAVAILABLE(available_since) #endif #if defined(HEDLEY_WARN_UNUSED_RESULT) # undef HEDLEY_WARN_UNUSED_RESULT #endif #if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) # undef HEDLEY_WARN_UNUSED_RESULT_MSG #endif #if \ HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_TI_VERSION_CHECK(15,12,0) || \ (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ HEDLEY_PGI_VERSION_CHECK(17,10,0) # define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) # define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) #elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) # define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) # define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) #elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) # define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) # define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) #elif defined(_Check_return_) /* SAL */ # define HEDLEY_WARN_UNUSED_RESULT _Check_return_ # define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ #else # define HEDLEY_WARN_UNUSED_RESULT # define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) #endif #if defined(HEDLEY_SENTINEL) # undef HEDLEY_SENTINEL #endif #if \ HEDLEY_HAS_ATTRIBUTE(sentinel) || \ HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_ARM_VERSION_CHECK(5,4,0) # define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) #else # define HEDLEY_SENTINEL(position) #endif #if defined(HEDLEY_NO_RETURN) # undef HEDLEY_NO_RETURN #endif #if HEDLEY_IAR_VERSION_CHECK(8,0,0) # define HEDLEY_NO_RETURN __noreturn #elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) # define HEDLEY_NO_RETURN __attribute__((__noreturn__)) #elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L # define HEDLEY_NO_RETURN _Noreturn #elif defined(__cplusplus) && (__cplusplus >= 201103L) # define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) #elif \ HEDLEY_HAS_ATTRIBUTE(noreturn) || \ HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ HEDLEY_TI_VERSION_CHECK(15,12,0) || \ (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) # define HEDLEY_NO_RETURN __attribute__((__noreturn__)) #elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) # define HEDLEY_NO_RETURN _Pragma("does_not_return") #elif \ HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) # define HEDLEY_NO_RETURN __declspec(noreturn) #elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) # define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") #elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) # define HEDLEY_NO_RETURN __attribute((noreturn)) #elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) # define HEDLEY_NO_RETURN __declspec(noreturn) #else # define HEDLEY_NO_RETURN #endif #if defined(HEDLEY_NO_ESCAPE) # undef HEDLEY_NO_ESCAPE #endif #if HEDLEY_HAS_ATTRIBUTE(noescape) # define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) #else # define HEDLEY_NO_ESCAPE #endif #if defined(HEDLEY_UNREACHABLE) # undef HEDLEY_UNREACHABLE #endif #if defined(HEDLEY_UNREACHABLE_RETURN) # undef HEDLEY_UNREACHABLE_RETURN #endif #if defined(HEDLEY_ASSUME) # undef HEDLEY_ASSUME #endif #if \ HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) # define HEDLEY_ASSUME(expr) __assume(expr) #elif HEDLEY_HAS_BUILTIN(__builtin_assume) # define HEDLEY_ASSUME(expr) __builtin_assume(expr) #elif \ HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) # if defined(__cplusplus) # define HEDLEY_ASSUME(expr) std::_nassert(expr) # else # define HEDLEY_ASSUME(expr) _nassert(expr) # endif #endif #if \ (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_IBM_VERSION_CHECK(13,1,5) # define HEDLEY_UNREACHABLE() __builtin_unreachable() #elif defined(HEDLEY_ASSUME) # define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) #endif #if !defined(HEDLEY_ASSUME) # if defined(HEDLEY_UNREACHABLE) # define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) # else # define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) # endif #endif #if defined(HEDLEY_UNREACHABLE) # if \ HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) # define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) # else # define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() # endif #else # define HEDLEY_UNREACHABLE_RETURN(value) return (value) #endif #if !defined(HEDLEY_UNREACHABLE) # define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) #endif HEDLEY_DIAGNOSTIC_PUSH #if HEDLEY_HAS_WARNING("-Wpedantic") # pragma clang diagnostic ignored "-Wpedantic" #endif #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) # pragma clang diagnostic ignored "-Wc++98-compat-pedantic" #endif #if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) # if defined(__clang__) # pragma clang diagnostic ignored "-Wvariadic-macros" # elif defined(HEDLEY_GCC_VERSION) # pragma GCC diagnostic ignored "-Wvariadic-macros" # endif #endif #if defined(HEDLEY_NON_NULL) # undef HEDLEY_NON_NULL #endif #if \ HEDLEY_HAS_ATTRIBUTE(nonnull) || \ HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) # define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) #else # define HEDLEY_NON_NULL(...) #endif HEDLEY_DIAGNOSTIC_POP #if defined(HEDLEY_PRINTF_FORMAT) # undef HEDLEY_PRINTF_FORMAT #endif #if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) # define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) #elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) # define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) #elif \ HEDLEY_HAS_ATTRIBUTE(format) || \ HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ HEDLEY_TI_VERSION_CHECK(15,12,0) || \ (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) # define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) #elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) # define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) #else # define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) #endif #if defined(HEDLEY_CONSTEXPR) # undef HEDLEY_CONSTEXPR #endif #if defined(__cplusplus) # if __cplusplus >= 201103L # define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) # endif #endif #if !defined(HEDLEY_CONSTEXPR) # define HEDLEY_CONSTEXPR #endif #if defined(HEDLEY_PREDICT) # undef HEDLEY_PREDICT #endif #if defined(HEDLEY_LIKELY) # undef HEDLEY_LIKELY #endif #if defined(HEDLEY_UNLIKELY) # undef HEDLEY_UNLIKELY #endif #if defined(HEDLEY_UNPREDICTABLE) # undef HEDLEY_UNPREDICTABLE #endif #if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) # define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) #endif #if \ (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION)) || \ HEDLEY_GCC_VERSION_CHECK(9,0,0) # define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) # define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) # define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) # define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) # define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) #elif \ (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ HEDLEY_TI_VERSION_CHECK(15,12,0) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ HEDLEY_CRAY_VERSION_CHECK(8,1,0) # define HEDLEY_PREDICT(expr, expected, probability) \ (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) # define HEDLEY_PREDICT_TRUE(expr, probability) \ (__extension__ ({ \ double hedley_probability_ = (probability); \ ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ })) # define HEDLEY_PREDICT_FALSE(expr, probability) \ (__extension__ ({ \ double hedley_probability_ = (probability); \ ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ })) # define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) # define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) #else # define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) # define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) # define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) # define HEDLEY_LIKELY(expr) (!!(expr)) # define HEDLEY_UNLIKELY(expr) (!!(expr)) #endif #if !defined(HEDLEY_UNPREDICTABLE) # define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) #endif #if defined(HEDLEY_MALLOC) # undef HEDLEY_MALLOC #endif #if \ HEDLEY_HAS_ATTRIBUTE(malloc) || \ HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ HEDLEY_TI_VERSION_CHECK(15,12,0) || \ (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) # define HEDLEY_MALLOC __attribute__((__malloc__)) #elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) # define HEDLEY_MALLOC _Pragma("returns_new_memory") #elif \ HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) # define HEDLEY_MALLOC __declspec(restrict) #else # define HEDLEY_MALLOC #endif #if defined(HEDLEY_PURE) # undef HEDLEY_PURE #endif #if \ HEDLEY_HAS_ATTRIBUTE(pure) || \ HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ HEDLEY_TI_VERSION_CHECK(15,12,0) || \ (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ HEDLEY_PGI_VERSION_CHECK(17,10,0) # define HEDLEY_PURE __attribute__((__pure__)) #elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) # define HEDLEY_PURE _Pragma("does_not_write_global_data") #elif defined(__cplusplus) && \ ( \ HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ ) # define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") #else # define HEDLEY_PURE #endif #if defined(HEDLEY_CONST) # undef HEDLEY_CONST #endif #if \ HEDLEY_HAS_ATTRIBUTE(const) || \ HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ HEDLEY_TI_VERSION_CHECK(15,12,0) || \ (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ HEDLEY_PGI_VERSION_CHECK(17,10,0) # define HEDLEY_CONST __attribute__((__const__)) #elif \ HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) # define HEDLEY_CONST _Pragma("no_side_effect") #else # define HEDLEY_CONST HEDLEY_PURE #endif #if defined(HEDLEY_RESTRICT) # undef HEDLEY_RESTRICT #endif #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) # define HEDLEY_RESTRICT restrict #elif \ HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ defined(__clang__) # define HEDLEY_RESTRICT __restrict #elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) # define HEDLEY_RESTRICT _Restrict #else # define HEDLEY_RESTRICT #endif #if defined(HEDLEY_INLINE) # undef HEDLEY_INLINE #endif #if \ (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ (defined(__cplusplus) && (__cplusplus >= 199711L)) # define HEDLEY_INLINE inline #elif \ defined(HEDLEY_GCC_VERSION) || \ HEDLEY_ARM_VERSION_CHECK(6,2,0) # define HEDLEY_INLINE __inline__ #elif \ HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) # define HEDLEY_INLINE __inline #else # define HEDLEY_INLINE #endif #if defined(HEDLEY_ALWAYS_INLINE) # undef HEDLEY_ALWAYS_INLINE #endif #if \ HEDLEY_HAS_ATTRIBUTE(always_inline) || \ HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ HEDLEY_TI_VERSION_CHECK(15,12,0) || \ (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) # define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE #elif \ HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) # define HEDLEY_ALWAYS_INLINE __forceinline #elif defined(__cplusplus) && \ ( \ HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ ) # define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") #elif HEDLEY_IAR_VERSION_CHECK(8,0,0) # define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") #else # define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE #endif #if defined(HEDLEY_NEVER_INLINE) # undef HEDLEY_NEVER_INLINE #endif #if \ HEDLEY_HAS_ATTRIBUTE(noinline) || \ HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ HEDLEY_TI_VERSION_CHECK(15,12,0) || \ (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) # define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) #elif \ HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) # define HEDLEY_NEVER_INLINE __declspec(noinline) #elif HEDLEY_PGI_VERSION_CHECK(10,2,0) # define HEDLEY_NEVER_INLINE _Pragma("noinline") #elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) # define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") #elif HEDLEY_IAR_VERSION_CHECK(8,0,0) # define HEDLEY_NEVER_INLINE _Pragma("inline=never") #elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) # define HEDLEY_NEVER_INLINE __attribute((noinline)) #elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) # define HEDLEY_NEVER_INLINE __declspec(noinline) #else # define HEDLEY_NEVER_INLINE #endif #if defined(HEDLEY_PRIVATE) # undef HEDLEY_PRIVATE #endif #if defined(HEDLEY_PUBLIC) # undef HEDLEY_PUBLIC #endif #if defined(HEDLEY_IMPORT) # undef HEDLEY_IMPORT #endif #if defined(_WIN32) || defined(__CYGWIN__) # define HEDLEY_PRIVATE # define HEDLEY_PUBLIC __declspec(dllexport) # define HEDLEY_IMPORT __declspec(dllimport) #else # if \ HEDLEY_HAS_ATTRIBUTE(visibility) || \ HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ ( \ defined(__TI_EABI__) && \ ( \ (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ ) \ ) # define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) # define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) # else # define HEDLEY_PRIVATE # define HEDLEY_PUBLIC # endif # define HEDLEY_IMPORT extern #endif #if defined(HEDLEY_NO_THROW) # undef HEDLEY_NO_THROW #endif #if \ HEDLEY_HAS_ATTRIBUTE(nothrow) || \ HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) # define HEDLEY_NO_THROW __attribute__((__nothrow__)) #elif \ HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) # define HEDLEY_NO_THROW __declspec(nothrow) #else # define HEDLEY_NO_THROW #endif #if defined(HEDLEY_FALL_THROUGH) # undef HEDLEY_FALL_THROUGH #endif #if \ HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ HEDLEY_GCC_VERSION_CHECK(7,0,0) # define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) #elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) # define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) #elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) # define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) #elif defined(__fallthrough) /* SAL */ # define HEDLEY_FALL_THROUGH __fallthrough #else # define HEDLEY_FALL_THROUGH #endif #if defined(HEDLEY_RETURNS_NON_NULL) # undef HEDLEY_RETURNS_NON_NULL #endif #if \ HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ HEDLEY_GCC_VERSION_CHECK(4,9,0) # define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) #elif defined(_Ret_notnull_) /* SAL */ # define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ #else # define HEDLEY_RETURNS_NON_NULL #endif #if defined(HEDLEY_ARRAY_PARAM) # undef HEDLEY_ARRAY_PARAM #endif #if \ defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ !defined(__STDC_NO_VLA__) && \ !defined(__cplusplus) && \ !defined(HEDLEY_PGI_VERSION) && \ !defined(HEDLEY_TINYC_VERSION) # define HEDLEY_ARRAY_PARAM(name) (name) #else # define HEDLEY_ARRAY_PARAM(name) #endif #if defined(HEDLEY_IS_CONSTANT) # undef HEDLEY_IS_CONSTANT #endif #if defined(HEDLEY_REQUIRE_CONSTEXPR) # undef HEDLEY_REQUIRE_CONSTEXPR #endif /* HEDLEY_IS_CONSTEXPR_ is for HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ #if defined(HEDLEY_IS_CONSTEXPR_) # undef HEDLEY_IS_CONSTEXPR_ #endif #if \ HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ HEDLEY_CRAY_VERSION_CHECK(8,1,0) # define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) #endif #if !defined(__cplusplus) # if \ HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ HEDLEY_TINYC_VERSION_CHECK(0,9,24) # if defined(__INTPTR_TYPE__) # define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) # else # include # define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) # endif # elif \ ( \ defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ !defined(HEDLEY_SUNPRO_VERSION) && \ !defined(HEDLEY_PGI_VERSION) && \ !defined(HEDLEY_IAR_VERSION)) || \ HEDLEY_HAS_EXTENSION(c_generic_selections) || \ HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ HEDLEY_ARM_VERSION_CHECK(5,3,0) # if defined(__INTPTR_TYPE__) # define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) # else # include # define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) # endif # elif \ defined(HEDLEY_GCC_VERSION) || \ defined(HEDLEY_INTEL_VERSION) || \ defined(HEDLEY_TINYC_VERSION) || \ defined(HEDLEY_TI_ARMCL_VERSION) || \ HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ defined(HEDLEY_TI_CL2000_VERSION) || \ defined(HEDLEY_TI_CL6X_VERSION) || \ defined(HEDLEY_TI_CL7X_VERSION) || \ defined(HEDLEY_TI_CLPRU_VERSION) || \ defined(__clang__) # define HEDLEY_IS_CONSTEXPR_(expr) ( \ sizeof(void) != \ sizeof(*( \ 1 ? \ ((void*) ((expr) * 0L) ) : \ ((struct { char v[sizeof(void) * 2]; } *) 1) \ ) \ ) \ ) # endif #endif #if defined(HEDLEY_IS_CONSTEXPR_) # if !defined(HEDLEY_IS_CONSTANT) # define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) # endif # define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) #else # if !defined(HEDLEY_IS_CONSTANT) # define HEDLEY_IS_CONSTANT(expr) (0) # endif # define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) #endif #if defined(HEDLEY_BEGIN_C_DECLS) # undef HEDLEY_BEGIN_C_DECLS #endif #if defined(HEDLEY_END_C_DECLS) # undef HEDLEY_END_C_DECLS #endif #if defined(HEDLEY_C_DECL) # undef HEDLEY_C_DECL #endif #if defined(__cplusplus) # define HEDLEY_BEGIN_C_DECLS extern "C" { # define HEDLEY_END_C_DECLS } # define HEDLEY_C_DECL extern "C" #else # define HEDLEY_BEGIN_C_DECLS # define HEDLEY_END_C_DECLS # define HEDLEY_C_DECL #endif #if defined(HEDLEY_STATIC_ASSERT) # undef HEDLEY_STATIC_ASSERT #endif #if \ !defined(__cplusplus) && ( \ (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ defined(_Static_assert) \ ) # define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) #elif \ (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) # define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) #else # define HEDLEY_STATIC_ASSERT(expr, message) #endif #if defined(HEDLEY_NULL) # undef HEDLEY_NULL #endif #if defined(__cplusplus) # if __cplusplus >= 201103L # define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) # elif defined(NULL) # define HEDLEY_NULL NULL # else # define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) # endif #elif defined(NULL) # define HEDLEY_NULL NULL #else # define HEDLEY_NULL ((void*) 0) #endif #if defined(HEDLEY_MESSAGE) # undef HEDLEY_MESSAGE #endif #if HEDLEY_HAS_WARNING("-Wunknown-pragmas") # define HEDLEY_MESSAGE(msg) \ HEDLEY_DIAGNOSTIC_PUSH \ HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ HEDLEY_PRAGMA(message msg) \ HEDLEY_DIAGNOSTIC_POP #elif \ HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) # define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) #elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) # define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) #elif HEDLEY_IAR_VERSION_CHECK(8,0,0) # define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) #elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) # define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) #else # define HEDLEY_MESSAGE(msg) #endif #if defined(HEDLEY_WARNING) # undef HEDLEY_WARNING #endif #if HEDLEY_HAS_WARNING("-Wunknown-pragmas") # define HEDLEY_WARNING(msg) \ HEDLEY_DIAGNOSTIC_PUSH \ HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ HEDLEY_PRAGMA(clang warning msg) \ HEDLEY_DIAGNOSTIC_POP #elif \ HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) # define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) #elif \ HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) # define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) #else # define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) #endif #if defined(HEDLEY_REQUIRE) # undef HEDLEY_REQUIRE #endif #if defined(HEDLEY_REQUIRE_MSG) # undef HEDLEY_REQUIRE_MSG #endif #if HEDLEY_HAS_ATTRIBUTE(diagnose_if) # if HEDLEY_HAS_WARNING("-Wgcc-compat") # define HEDLEY_REQUIRE(expr) \ HEDLEY_DIAGNOSTIC_PUSH \ _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ __attribute__((diagnose_if(!(expr), #expr, "error"))) \ HEDLEY_DIAGNOSTIC_POP # define HEDLEY_REQUIRE_MSG(expr,msg) \ HEDLEY_DIAGNOSTIC_PUSH \ _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ __attribute__((diagnose_if(!(expr), msg, "error"))) \ HEDLEY_DIAGNOSTIC_POP # else # define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) # define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) # endif #else # define HEDLEY_REQUIRE(expr) # define HEDLEY_REQUIRE_MSG(expr,msg) #endif #if defined(HEDLEY_FLAGS) # undef HEDLEY_FLAGS #endif #if HEDLEY_HAS_ATTRIBUTE(flag_enum) # define HEDLEY_FLAGS __attribute__((__flag_enum__)) #else # define HEDLEY_FLAGS #endif #if defined(HEDLEY_FLAGS_CAST) # undef HEDLEY_FLAGS_CAST #endif #if HEDLEY_INTEL_VERSION_CHECK(19,0,0) # define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ HEDLEY_DIAGNOSTIC_PUSH \ _Pragma("warning(disable:188)") \ ((T) (expr)); \ HEDLEY_DIAGNOSTIC_POP \ })) #else # define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) #endif #if defined(HEDLEY_EMPTY_BASES) # undef HEDLEY_EMPTY_BASES #endif #if \ (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) # define HEDLEY_EMPTY_BASES __declspec(empty_bases) #else # define HEDLEY_EMPTY_BASES #endif /* Remaining macros are deprecated. */ #if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) # undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK #endif #if defined(__clang__) # define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) #else # define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) #endif #if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) # undef HEDLEY_CLANG_HAS_ATTRIBUTE #endif #define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) #if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) # undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE #endif #define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) #if defined(HEDLEY_CLANG_HAS_BUILTIN) # undef HEDLEY_CLANG_HAS_BUILTIN #endif #define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) #if defined(HEDLEY_CLANG_HAS_FEATURE) # undef HEDLEY_CLANG_HAS_FEATURE #endif #define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) #if defined(HEDLEY_CLANG_HAS_EXTENSION) # undef HEDLEY_CLANG_HAS_EXTENSION #endif #define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) #if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) # undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE #endif #define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) #if defined(HEDLEY_CLANG_HAS_WARNING) # undef HEDLEY_CLANG_HAS_WARNING #endif #define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) #endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ simde-0.7.2/simde/simde-align.h000066400000000000000000000430511400333146700162700ustar00rootroot00000000000000/* Alignment * Created by Evan Nemerson * * To the extent possible under law, the authors have waived all * copyright and related or neighboring rights to this code. For * details, see the Creative Commons Zero 1.0 Universal license at * * * SPDX-License-Identifier: CC0-1.0 * ********************************************************************** * * This is portability layer which should help iron out some * differences across various compilers, as well as various verisons of * C and C++. * * It was originally developed for SIMD Everywhere * (), but since its only * dependency is Hedley (, also CC0) * it can easily be used in other projects, so please feel free to do * so. * * If you do use this in your project, please keep a link to SIMDe in * your code to remind you where to report any bugs and/or check for * updated versions. * * # API Overview * * The API has several parts, and most macros have a few variations. * There are APIs for declaring aligned fields/variables, optimization * hints, and run-time alignment checks. * * Briefly, macros ending with "_TO" take numeric values and are great * when you know the value you would like to use. Macros ending with * "_LIKE", on the other hand, accept a type and are used when you want * to use the alignment of a type instead of hardcoding a value. * * Documentation for each section of the API is inline. * * True to form, MSVC is the main problem and imposes several * limitations on the effectiveness of the APIs. Detailed descriptions * of the limitations of each macro are inline, but in general: * * * On C11+ or C++11+ code written using this API will work. The * ASSUME macros may or may not generate a hint to the compiler, but * that is only an optimization issue and will not actually cause * failures. * * If you're using pretty much any compiler other than MSVC, * everything should basically work as well as in C11/C++11. */ #if !defined(SIMDE_ALIGN_H) #define SIMDE_ALIGN_H #include "hedley.h" /* I know this seems a little silly, but some non-hosted compilers * don't have stddef.h, so we try to accomodate them. */ #if !defined(SIMDE_ALIGN_SIZE_T_) #if defined(__SIZE_TYPE__) #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ #elif defined(__SIZE_T_TYPE__) #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ #elif defined(__cplusplus) #include #define SIMDE_ALIGN_SIZE_T_ size_t #else #include #define SIMDE_ALIGN_SIZE_T_ size_t #endif #endif #if !defined(SIMDE_ALIGN_INTPTR_T_) #if defined(__INTPTR_TYPE__) #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ #elif defined(__PTRDIFF_TYPE__) #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ #elif defined(__PTRDIFF_T_TYPE__) #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ #elif defined(__cplusplus) #include #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t #else #include #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t #endif #endif #if defined(SIMDE_ALIGN_DEBUG) #if defined(__cplusplus) #include #else #include #endif #endif /* SIMDE_ALIGN_OF(Type) * * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. * It isn't defined everywhere (only when the compiler has some alignof- * like feature we can use to implement it), but it should work in most * modern compilers, as well as C11 and C++11. * * If we can't find an implementation for SIMDE_ALIGN_OF then the macro * will not be defined, so if you can handle that situation sensibly * you may need to sprinkle some ifdefs into your code. */ #if \ (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ (0 && HEDLEY_HAS_FEATURE(c_alignof)) #define SIMDE_ALIGN_OF(Type) _Alignof(Type) #elif \ (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) #define SIMDE_ALIGN_OF(Type) alignof(Type) #elif \ HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ defined(__IBM__ALIGNOF__) || \ defined(__clang__) #define SIMDE_ALIGN_OF(Type) __alignof__(Type) #elif \ HEDLEY_IAR_VERSION_CHECK(8,40,0) #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) #elif \ HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Probably goes back much further, but MS takes down their old docs. * If you can verify that this works in earlier versions please let * me know! */ #define SIMDE_ALIGN_OF(Type) __alignof(Type) #endif /* SIMDE_ALIGN_MAXIMUM: * * This is the maximum alignment that the compiler supports. You can * define the value prior to including SIMDe if necessary, but in that * case *please* submit an issue so we can add the platform to the * detection code. * * Most compilers are okay with types which are aligned beyond what * they think is the maximum, as long as the alignment is a power * of two. Older versions of MSVC is the exception, so we need to cap * the alignment requests at values that the implementation supports. * * XL C/C++ will accept values larger than 16 (which is the alignment * of an AltiVec vector), but will not reliably align to the larger * value, so so we cap the value at 16 there. * * If the compiler accepts any power-of-two value within reason then * this macro should be left undefined, and the SIMDE_ALIGN_CAP * macro will just return the value passed to it. */ #if !defined(SIMDE_ALIGN_MAXIMUM) #if defined(HEDLEY_MSVC_VERSION) #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) // Visual studio 2017 and newer does not need a max #else #if defined(_M_IX86) || defined(_M_AMD64) #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) /* VS 2010 is really a guess based on Wikipedia; if anyone can * test with old VS versions I'd really appreciate it. */ #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 #else #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 #endif #elif defined(_M_ARM) || defined(_M_ARM64) #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 #endif #endif #elif defined(HEDLEY_IBM_VERSION) #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 #endif #endif /* You can mostly ignore these; they're intended for internal use. * If you do need to use them please let me know; if they fulfill * a common use case I'll probably drop the trailing underscore * and make them part of the public API. */ #if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 #define SIMDE_ALIGN_64_ 64 #define SIMDE_ALIGN_32_ 32 #define SIMDE_ALIGN_16_ 16 #define SIMDE_ALIGN_8_ 8 #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 #define SIMDE_ALIGN_64_ 32 #define SIMDE_ALIGN_32_ 32 #define SIMDE_ALIGN_16_ 16 #define SIMDE_ALIGN_8_ 8 #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 #define SIMDE_ALIGN_64_ 16 #define SIMDE_ALIGN_32_ 16 #define SIMDE_ALIGN_16_ 16 #define SIMDE_ALIGN_8_ 8 #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 #define SIMDE_ALIGN_64_ 8 #define SIMDE_ALIGN_32_ 8 #define SIMDE_ALIGN_16_ 8 #define SIMDE_ALIGN_8_ 8 #else #error Max alignment expected to be >= 8 #endif #else #define SIMDE_ALIGN_64_ 64 #define SIMDE_ALIGN_32_ 32 #define SIMDE_ALIGN_16_ 16 #define SIMDE_ALIGN_8_ 8 #endif /** * SIMDE_ALIGN_CAP(Alignment) * * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. */ #if defined(SIMDE_ALIGN_MAXIMUM) #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) #else #define SIMDE_ALIGN_CAP(Alignment) (Alignment) #endif /* SIMDE_ALIGN_TO(Alignment) * * SIMDE_ALIGN_TO is used to declare types or variables. It basically * maps to the align attribute in most compilers, the align declspec * in MSVC, or _Alignas/alignas in C11/C++11. * * Example: * * struct i32x4 { * SIMDE_ALIGN_TO(16) int32_t values[4]; * } * * Limitations: * * MSVC requires that the Alignment parameter be numeric; you can't do * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is * unfortunate because that's really how the LIKE macros are * implemented, and I am not aware of a way to get anything like this * to work without using the C11/C++11 keywords. * * It also means that we can't use SIMDE_ALIGN_CAP to limit the * alignment to the value specified, which MSVC also requires, so on * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, * but should be safe to use on MSVC. * * All this is to say that, if you want your code to work on MSVC, you * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of * SIMDE_ALIGN_TO(8/16/32/64). */ #if \ HEDLEY_HAS_ATTRIBUTE(aligned) || \ HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) #elif \ (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) #elif \ (defined(__cplusplus) && (__cplusplus >= 201103L)) #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) #elif \ defined(HEDLEY_MSVC_VERSION) #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); * the alignment passed to the declspec has to be an integer. */ #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE #endif #define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) #define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) #define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) #define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) /* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) * * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's * std::assume_aligned, or __builtin_assume_aligned. It tells the * compiler to assume that the provided pointer is aligned to an * `Alignment`-byte boundary. * * If you define SIMDE_ALIGN_DEBUG prior to including this header then * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't * integrate with NDEBUG in this header, but it may be a good idea to * put something like this in your code: * * #if !defined(NDEBUG) * #define SIMDE_ALIGN_DEBUG * #endif * #include <.../simde-align.h> */ #if \ HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ HEDLEY_GCC_VERSION_CHECK(4,7,0) #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) #elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ __assume_aligned(simde_assume_aligned_t_, Alignment); \ simde_assume_aligned_t_; \ })) #elif defined(__cplusplus) && (__cplusplus > 201703L) #include #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) #else #if defined(__cplusplus) template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) #else HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) #endif { HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); return ptr; } #if defined(__cplusplus) #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) #else #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) #endif #endif #if !defined(SIMDE_ALIGN_DEBUG) #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) #else #include #if defined(__cplusplus) template static HEDLEY_ALWAYS_INLINE T* simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) #else static HEDLEY_ALWAYS_INLINE void* simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) #endif { if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); } return ptr; } #if defined(__cplusplus) #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) #else #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) #endif #endif /* SIMDE_ALIGN_LIKE(Type) * SIMDE_ALIGN_LIKE_#(Type) * * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros * except instead of an integer they take a type; basically, it's just * a more convenient way to do something like: * * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) * * The versions with a numeric suffix will fall back on using a numeric * value in the event we can't use SIMDE_ALIGN_OF(Type). This is * mainly for MSVC, where __declspec(align()) can't handle anything * other than hard-coded numeric values. */ #if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) #else #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 #endif /* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) * * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a * type instead of a numeric value. */ #if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) #endif /* SIMDE_ALIGN_CAST(Type, Pointer) * * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try * to silence warnings that some compilers may produce if you try * to assign to a type with increased alignment requirements. * * Note that it does *not* actually attempt to tell the compiler that * the pointer is aligned like the destination should be; that's the * job of the next macro. This macro is necessary for stupid APIs * like _mm_loadu_si128 where the input is a __m128i* but the function * is specifically for data which isn't necessarily aligned to * _Alignof(__m128i). */ #if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ HEDLEY_DIAGNOSTIC_PUSH \ _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ HEDLEY_DIAGNOSTIC_POP \ simde_r_; \ })) #else #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) #endif /* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) * * This is sort of like a combination of a reinterpret_cast and a * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell * the compiler that the pointer is aligned like the specified type * and casts the pointer to the specified type while suppressing any * warnings from the compiler about casting to a type with greater * alignment requirements. */ #define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) #endif /* !defined(SIMDE_ALIGN_H) */ simde-0.7.2/simde/simde-arch.h000066400000000000000000000402311400333146700161100ustar00rootroot00000000000000/* Architecture detection * Created by Evan Nemerson * * To the extent possible under law, the authors have waived all * copyright and related or neighboring rights to this code. For * details, see the Creative Commons Zero 1.0 Universal license at * * * SPDX-License-Identifier: CC0-1.0 * * Different compilers define different preprocessor macros for the * same architecture. This is an attempt to provide a single * interface which is usable on any compiler. * * In general, a macro named SIMDE_ARCH_* is defined for each * architecture the CPU supports. When there are multiple possible * versions, we try to define the macro to the target version. For * example, if you want to check for i586+, you could do something * like: * * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) * ... * #endif * * You could also just check that SIMDE_ARCH_X86 >= 5 without checking * if it's defined first, but some compilers may emit a warning about * an undefined macro being used (e.g., GCC with -Wundef). * * This was originally created for SIMDe * (hence the prefix), but this * header has no dependencies and may be used anywhere. It is * originally based on information from * , though it * has been enhanced with additional information. * * If you improve this file, or find a bug, please file the issue at * . If you copy this into * your project, even if you change the prefix, please keep the links * to SIMDe intact so others know where to report issues, submit * enhancements, and find the latest version. */ #if !defined(SIMDE_ARCH_H) #define SIMDE_ARCH_H /* Alpha */ #if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) # if defined(__alpha_ev6__) # define SIMDE_ARCH_ALPHA 6 # elif defined(__alpha_ev5__) # define SIMDE_ARCH_ALPHA 5 # elif defined(__alpha_ev4__) # define SIMDE_ARCH_ALPHA 4 # else # define SIMDE_ARCH_ALPHA 1 # endif #endif #if defined(SIMDE_ARCH_ALPHA) # define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) #else # define SIMDE_ARCH_ALPHA_CHECK(version) (0) #endif /* Atmel AVR */ #if defined(__AVR_ARCH__) # define SIMDE_ARCH_AVR __AVR_ARCH__ #endif /* AMD64 / x86_64 */ #if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) # define SIMDE_ARCH_AMD64 1000 #endif /* ARM */ #if defined(__ARM_ARCH_8A__) # define SIMDE_ARCH_ARM 82 #elif defined(__ARM_ARCH_8R__) # define SIMDE_ARCH_ARM 81 #elif defined(__ARM_ARCH_8__) # define SIMDE_ARCH_ARM 80 #elif defined(__ARM_ARCH_7S__) # define SIMDE_ARCH_ARM 74 #elif defined(__ARM_ARCH_7M__) # define SIMDE_ARCH_ARM 73 #elif defined(__ARM_ARCH_7R__) # define SIMDE_ARCH_ARM 72 #elif defined(__ARM_ARCH_7A__) # define SIMDE_ARCH_ARM 71 #elif defined(__ARM_ARCH_7__) # define SIMDE_ARCH_ARM 70 #elif defined(__ARM_ARCH) # define SIMDE_ARCH_ARM (__ARM_ARCH * 10) #elif defined(_M_ARM) # define SIMDE_ARCH_ARM (_M_ARM * 10) #elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) # define SIMDE_ARCH_ARM 1 #endif #if defined(SIMDE_ARCH_ARM ) # define SIMDE_ARCH_ARM_CHECK(version) ((version) <= SIMDE_ARCH_ARM) #else # define SIMDE_ARCH_ARM_CHECK(version) (0) #endif /* AArch64 */ #if defined(__aarch64__) || defined(_M_ARM64) # define SIMDE_ARCH_AARCH64 1000 #endif #if defined(SIMDE_ARCH_AARCH64) # define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) #else # define SIMDE_ARCH_AARCH64_CHECK(version) (0) #endif /* ARM SIMD ISA extensions */ #if defined(__ARM_NEON) # if defined(SIMDE_ARCH_AARCH64) # define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 # elif defined(SIMDE_ARCH_ARM) # define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM # endif #endif #if defined(__ARM_FEATURE_SVE) # define SIMDE_ARCH_ARM_SVE #endif /* Blackfin */ #if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) # define SIMDE_ARCH_BLACKFIN 1 #endif /* CRIS */ #if defined(__CRIS_arch_version) # define SIMDE_ARCH_CRIS __CRIS_arch_version #elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) # define SIMDE_ARCH_CRIS 1 #endif /* Convex */ #if defined(__convex_c38__) # define SIMDE_ARCH_CONVEX 38 #elif defined(__convex_c34__) # define SIMDE_ARCH_CONVEX 34 #elif defined(__convex_c32__) # define SIMDE_ARCH_CONVEX 32 #elif defined(__convex_c2__) # define SIMDE_ARCH_CONVEX 2 #elif defined(__convex__) # define SIMDE_ARCH_CONVEX 1 #endif #if defined(SIMDE_ARCH_CONVEX) # define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) #else # define SIMDE_ARCH_CONVEX_CHECK(version) (0) #endif /* Adapteva Epiphany */ #if defined(__epiphany__) # define SIMDE_ARCH_EPIPHANY 1 #endif /* Fujitsu FR-V */ #if defined(__frv__) # define SIMDE_ARCH_FRV 1 #endif /* H8/300 */ #if defined(__H8300__) # define SIMDE_ARCH_H8300 #endif /* HP/PA / PA-RISC */ #if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) # define SIMDE_ARCH_HPPA 20 #elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) # define SIMDE_ARCH_HPPA 11 #elif defined(_PA_RISC1_0) # define SIMDE_ARCH_HPPA 10 #elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) # define SIMDE_ARCH_HPPA 1 #endif #if defined(SIMDE_ARCH_HPPA) # define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) #else # define SIMDE_ARCH_HPPA_CHECK(version) (0) #endif /* x86 */ #if defined(_M_IX86) # define SIMDE_ARCH_X86 (_M_IX86 / 100) #elif defined(__I86__) # define SIMDE_ARCH_X86 __I86__ #elif defined(i686) || defined(__i686) || defined(__i686__) # define SIMDE_ARCH_X86 6 #elif defined(i586) || defined(__i586) || defined(__i586__) # define SIMDE_ARCH_X86 5 #elif defined(i486) || defined(__i486) || defined(__i486__) # define SIMDE_ARCH_X86 4 #elif defined(i386) || defined(__i386) || defined(__i386__) # define SIMDE_ARCH_X86 3 #elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) # define SIMDE_ARCH_X86 3 #endif #if defined(SIMDE_ARCH_X86) # define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) #else # define SIMDE_ARCH_X86_CHECK(version) (0) #endif /* SIMD ISA extensions for x86/x86_64 */ #if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) # if defined(_M_IX86_FP) # define SIMDE_ARCH_X86_MMX # if (_M_IX86_FP >= 1) # define SIMDE_ARCH_X86_SSE 1 # endif # if (_M_IX86_FP >= 2) # define SIMDE_ARCH_X86_SSE2 1 # endif # elif defined(_M_X64) # define SIMDE_ARCH_X86_SSE 1 # define SIMDE_ARCH_X86_SSE2 1 # else # if defined(__MMX__) # define SIMDE_ARCH_X86_MMX 1 # endif # if defined(__SSE__) # define SIMDE_ARCH_X86_SSE 1 # endif # if defined(__SSE2__) # define SIMDE_ARCH_X86_SSE2 1 # endif # endif # if defined(__SSE3__) # define SIMDE_ARCH_X86_SSE3 1 # endif # if defined(__SSSE3__) # define SIMDE_ARCH_X86_SSSE3 1 # endif # if defined(__SSE4_1__) # define SIMDE_ARCH_X86_SSE4_1 1 # endif # if defined(__SSE4_2__) # define SIMDE_ARCH_X86_SSE4_2 1 # endif # if defined(__XOP__) # define SIMDE_ARCH_X86_XOP 1 # endif # if defined(__AVX__) # define SIMDE_ARCH_X86_AVX 1 # if !defined(SIMDE_ARCH_X86_SSE3) # define SIMDE_ARCH_X86_SSE3 1 # endif # if !defined(SIMDE_ARCH_X86_SSE4_1) # define SIMDE_ARCH_X86_SSE4_1 1 # endif # if !defined(SIMDE_ARCH_X86_SSE4_1) # define SIMDE_ARCH_X86_SSE4_2 1 # endif # endif # if defined(__AVX2__) # define SIMDE_ARCH_X86_AVX2 1 # endif # if defined(__FMA__) # define SIMDE_ARCH_X86_FMA 1 # if !defined(SIMDE_ARCH_X86_AVX) # define SIMDE_ARCH_X86_AVX 1 # endif # endif # if defined(__AVX512VP2INTERSECT__) # define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 # endif # if defined(__AVX512VBMI__) # define SIMDE_ARCH_X86_AVX512VBMI 1 # endif # if defined(__AVX512BW__) # define SIMDE_ARCH_X86_AVX512BW 1 # endif # if defined(__AVX512CD__) # define SIMDE_ARCH_X86_AVX512CD 1 # endif # if defined(__AVX512DQ__) # define SIMDE_ARCH_X86_AVX512DQ 1 # endif # if defined(__AVX512F__) # define SIMDE_ARCH_X86_AVX512F 1 # endif # if defined(__AVX512VL__) # define SIMDE_ARCH_X86_AVX512VL 1 # endif # if defined(__GFNI__) # define SIMDE_ARCH_X86_GFNI 1 # endif # if defined(__PCLMUL__) # define SIMDE_ARCH_X86_PCLMUL 1 # endif # if defined(__VPCLMULQDQ__) # define SIMDE_ARCH_X86_VPCLMULQDQ 1 # endif #endif /* Itanium */ #if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) # define SIMDE_ARCH_IA64 1 #endif /* Renesas M32R */ #if defined(__m32r__) || defined(__M32R__) # define SIMDE_ARCH_M32R #endif /* Motorola 68000 */ #if defined(__mc68060__) || defined(__MC68060__) # define SIMDE_ARCH_M68K 68060 #elif defined(__mc68040__) || defined(__MC68040__) # define SIMDE_ARCH_M68K 68040 #elif defined(__mc68030__) || defined(__MC68030__) # define SIMDE_ARCH_M68K 68030 #elif defined(__mc68020__) || defined(__MC68020__) # define SIMDE_ARCH_M68K 68020 #elif defined(__mc68010__) || defined(__MC68010__) # define SIMDE_ARCH_M68K 68010 #elif defined(__mc68000__) || defined(__MC68000__) # define SIMDE_ARCH_M68K 68000 #endif #if defined(SIMDE_ARCH_M68K) # define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) #else # define SIMDE_ARCH_M68K_CHECK(version) (0) #endif /* Xilinx MicroBlaze */ #if defined(__MICROBLAZE__) || defined(__microblaze__) # define SIMDE_ARCH_MICROBLAZE #endif /* MIPS */ #if defined(_MIPS_ISA_MIPS64R2) # define SIMDE_ARCH_MIPS 642 #elif defined(_MIPS_ISA_MIPS64) # define SIMDE_ARCH_MIPS 640 #elif defined(_MIPS_ISA_MIPS32R2) # define SIMDE_ARCH_MIPS 322 #elif defined(_MIPS_ISA_MIPS32) # define SIMDE_ARCH_MIPS 320 #elif defined(_MIPS_ISA_MIPS4) # define SIMDE_ARCH_MIPS 4 #elif defined(_MIPS_ISA_MIPS3) # define SIMDE_ARCH_MIPS 3 #elif defined(_MIPS_ISA_MIPS2) # define SIMDE_ARCH_MIPS 2 #elif defined(_MIPS_ISA_MIPS1) # define SIMDE_ARCH_MIPS 1 #elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) # define SIMDE_ARCH_MIPS 1 #endif #if defined(SIMDE_ARCH_MIPS) # define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) #else # define SIMDE_ARCH_MIPS_CHECK(version) (0) #endif #if defined(__mips_loongson_mmi) # define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 #endif /* Matsushita MN10300 */ #if defined(__MN10300__) || defined(__mn10300__) # define SIMDE_ARCH_MN10300 1 #endif /* POWER */ #if defined(_M_PPC) # define SIMDE_ARCH_POWER _M_PPC #elif defined(_ARCH_PWR9) # define SIMDE_ARCH_POWER 900 #elif defined(_ARCH_PWR8) # define SIMDE_ARCH_POWER 800 #elif defined(_ARCH_PWR7) # define SIMDE_ARCH_POWER 700 #elif defined(_ARCH_PWR6) # define SIMDE_ARCH_POWER 600 #elif defined(_ARCH_PWR5) # define SIMDE_ARCH_POWER 500 #elif defined(_ARCH_PWR4) # define SIMDE_ARCH_POWER 400 #elif defined(_ARCH_440) || defined(__ppc440__) # define SIMDE_ARCH_POWER 440 #elif defined(_ARCH_450) || defined(__ppc450__) # define SIMDE_ARCH_POWER 450 #elif defined(_ARCH_601) || defined(__ppc601__) # define SIMDE_ARCH_POWER 601 #elif defined(_ARCH_603) || defined(__ppc603__) # define SIMDE_ARCH_POWER 603 #elif defined(_ARCH_604) || defined(__ppc604__) # define SIMDE_ARCH_POWER 604 #elif defined(_ARCH_605) || defined(__ppc605__) # define SIMDE_ARCH_POWER 605 #elif defined(_ARCH_620) || defined(__ppc620__) # define SIMDE_ARCH_POWER 620 #elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) # define SIMDE_ARCH_POWER 1 #endif #if defined(SIMDE_ARCH_POWER) #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) #else #define SIMDE_ARCH_POWER_CHECK(version) (0) #endif #if defined(__ALTIVEC__) # define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER #endif #if defined(SIMDE_ARCH_POWER) #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) #else #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) #endif /* SPARC */ #if defined(__sparc_v9__) || defined(__sparcv9) # define SIMDE_ARCH_SPARC 9 #elif defined(__sparc_v8__) || defined(__sparcv8) # define SIMDE_ARCH_SPARC 8 #elif defined(__sparc_v7__) || defined(__sparcv7) # define SIMDE_ARCH_SPARC 7 #elif defined(__sparc_v6__) || defined(__sparcv6) # define SIMDE_ARCH_SPARC 6 #elif defined(__sparc_v5__) || defined(__sparcv5) # define SIMDE_ARCH_SPARC 5 #elif defined(__sparc_v4__) || defined(__sparcv4) # define SIMDE_ARCH_SPARC 4 #elif defined(__sparc_v3__) || defined(__sparcv3) # define SIMDE_ARCH_SPARC 3 #elif defined(__sparc_v2__) || defined(__sparcv2) # define SIMDE_ARCH_SPARC 2 #elif defined(__sparc_v1__) || defined(__sparcv1) # define SIMDE_ARCH_SPARC 1 #elif defined(__sparc__) || defined(__sparc) # define SIMDE_ARCH_SPARC 1 #endif #if defined(SIMDE_ARCH_SPARC) #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) #else #define SIMDE_ARCH_SPARC_CHECK(version) (0) #endif /* SuperH */ #if defined(__sh5__) || defined(__SH5__) # define SIMDE_ARCH_SUPERH 5 #elif defined(__sh4__) || defined(__SH4__) # define SIMDE_ARCH_SUPERH 4 #elif defined(__sh3__) || defined(__SH3__) # define SIMDE_ARCH_SUPERH 3 #elif defined(__sh2__) || defined(__SH2__) # define SIMDE_ARCH_SUPERH 2 #elif defined(__sh1__) || defined(__SH1__) # define SIMDE_ARCH_SUPERH 1 #elif defined(__sh__) || defined(__SH__) # define SIMDE_ARCH_SUPERH 1 #endif /* IBM System z */ #if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) # define SIMDE_ARCH_SYSTEMZ #endif /* TMS320 DSP */ #if defined(_TMS320C6740) || defined(__TMS320C6740__) # define SIMDE_ARCH_TMS320 6740 #elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) # define SIMDE_ARCH_TMS320 6701 #elif defined(_TMS320C6700) || defined(__TMS320C6700__) # define SIMDE_ARCH_TMS320 6700 #elif defined(_TMS320C6600) || defined(__TMS320C6600__) # define SIMDE_ARCH_TMS320 6600 #elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) # define SIMDE_ARCH_TMS320 6401 #elif defined(_TMS320C6400) || defined(__TMS320C6400__) # define SIMDE_ARCH_TMS320 6400 #elif defined(_TMS320C6200) || defined(__TMS320C6200__) # define SIMDE_ARCH_TMS320 6200 #elif defined(_TMS320C55X) || defined(__TMS320C55X__) # define SIMDE_ARCH_TMS320 550 #elif defined(_TMS320C54X) || defined(__TMS320C54X__) # define SIMDE_ARCH_TMS320 540 #elif defined(_TMS320C28X) || defined(__TMS320C28X__) # define SIMDE_ARCH_TMS320 280 #endif #if defined(SIMDE_ARCH_TMS320) #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) #else #define SIMDE_ARCH_TMS320_CHECK(version) (0) #endif /* WebAssembly */ #if defined(__wasm__) # define SIMDE_ARCH_WASM 1 #endif #if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) # define SIMDE_ARCH_WASM_SIMD128 #endif /* Xtensa */ #if defined(__xtensa__) || defined(__XTENSA__) # define SIMDE_ARCH_XTENSA 1 #endif #endif /* !defined(SIMDE_ARCH_H) */ simde-0.7.2/simde/simde-common.h000066400000000000000000001030431400333146700164640ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2017-2020 Evan Nemerson */ #if !defined(SIMDE_COMMON_H) #define SIMDE_COMMON_H #include "hedley.h" #define SIMDE_VERSION_MAJOR 0 #define SIMDE_VERSION_MINOR 7 #define SIMDE_VERSION_MICRO 2 #define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) // Also update meson.build in the root directory of the repository #include #include #include "simde-detect-clang.h" #include "simde-arch.h" #include "simde-features.h" #include "simde-diagnostic.h" #include "simde-math.h" #include "simde-constify.h" #include "simde-align.h" /* In some situations, SIMDe has to make large performance sacrifices * for small increases in how faithfully it reproduces an API, but * only a relatively small number of users will actually need the API * to be completely accurate. The SIMDE_FAST_* options can be used to * disable these trade-offs. * * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to * enable some optimizations. Using -ffast-math and/or * -ffinite-math-only will also enable the relevant options. If you * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ /* Most programs avoid NaNs by never passing values which can result in * a NaN; for example, if you only pass non-negative values to the sqrt * functions, it won't generate a NaN. On some platforms, similar * functions handle NaNs differently; for example, the _mm_min_ps SSE * function will return 0.0 if you pass it (0.0, NaN), but the NEON * vminq_f32 function will return NaN. Making them behave like one * another is expensive; it requires generating a mask of all lanes * with NaNs, then performing the operation (e.g., vminq_f32), then * blending together the result with another vector using the mask. * * If you don't want SIMDe to worry about the differences between how * NaNs are handled on the two platforms, define this (or pass * -ffinite-math-only) */ #if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) #define SIMDE_FAST_MATH #endif #if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) #if defined(SIMDE_FAST_MATH) #define SIMDE_FAST_NANS #elif defined(__FINITE_MATH_ONLY__) #if __FINITE_MATH_ONLY__ #define SIMDE_FAST_NANS #endif #endif #endif /* Many functions are defined as using the current rounding mode * (i.e., the SIMD version of fegetround()) when converting to * an integer. For example, _mm_cvtpd_epi32. Unfortunately, * on some platforms (such as ARMv8+ where round-to-nearest is * always used, regardless of the FPSCR register) this means we * have to first query the current rounding mode, then choose * the proper function (rounnd , ceil, floor, etc.) */ #if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) #define SIMDE_FAST_ROUND_MODE #endif /* This controls how ties are rounded. For example, does 10.5 round to * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for * example) doesn't support it and it must be emulated (which is rather * slow). If you're okay with just using the default for whatever arch * you're on, you should definitely define this. * * Note that we don't use this macro to avoid correct implementations * in functions which are explicitly about rounding (such as vrnd* on * NEON, _mm_round_* on x86, etc.); it is only used for code where * rounding is a component in another function, and even then it isn't * usually a problem since such functions will use the current rounding * mode. */ #if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) #define SIMDE_FAST_ROUND_TIES #endif /* For functions which convert from one type to another (mostly from * floating point to integer types), sometimes we need to do a range * check and potentially return a different result if the value * falls outside that range. Skipping this check can provide a * performance boost, at the expense of faithfulness to the API we're * emulating. */ #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) #define SIMDE_FAST_CONVERSION_RANGE #endif #if \ HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ HEDLEY_CRAY_VERSION_CHECK(8,1,0) #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) #elif defined(__cplusplus) && (__cplusplus > 201703L) #include #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) #endif #if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) #if defined(SIMDE_CHECK_CONSTANT_) && \ SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") #else #define SIMDE_REQUIRE_CONSTANT(arg) #endif #else #define SIMDE_REQUIRE_CONSTANT(arg) #endif #define SIMDE_REQUIRE_RANGE(arg, min, max) \ HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") #define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ SIMDE_REQUIRE_CONSTANT(arg) \ SIMDE_REQUIRE_RANGE(arg, min, max) /* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty * fallback if we can't find an implementation; instead we have to * check if SIMDE_STATIC_ASSERT is defined before using it. */ #if \ !defined(__cplusplus) && ( \ (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ HEDLEY_HAS_FEATURE(c_static_assert) || \ HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ defined(_Static_assert) \ ) # define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) #elif \ (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ HEDLEY_MSVC_VERSION_CHECK(16,0,0) # define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) #endif #if \ (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_IBM_VERSION_CHECK(13,1,0) # define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) #else # define SIMDE_MAY_ALIAS #endif /* Lots of compilers support GCC-style vector extensions, but many don't support all the features. Define different macros depending on support for * SIMDE_VECTOR - Declaring a vector. * SIMDE_VECTOR_OPS - basic operations (binary and unary). * SIMDE_VECTOR_NEGATE - negating a vector * SIMDE_VECTOR_SCALAR - For binary operators, the second argument can be a scalar, in which case the result is as if that scalar had been broadcast to all lanes of a vector. * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for extracting/inserting a single element.= SIMDE_VECTOR can be assumed if any others are defined, the others are independent. */ #if !defined(SIMDE_NO_VECTOR) # if \ HEDLEY_GCC_VERSION_CHECK(4,8,0) # define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) # define SIMDE_VECTOR_OPS # define SIMDE_VECTOR_NEGATE # define SIMDE_VECTOR_SCALAR # define SIMDE_VECTOR_SUBSCRIPT # elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) # define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) # define SIMDE_VECTOR_OPS # define SIMDE_VECTOR_NEGATE /* ICC only supports SIMDE_VECTOR_SCALAR for constants */ # define SIMDE_VECTOR_SUBSCRIPT # elif \ HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) # define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) # define SIMDE_VECTOR_OPS # elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) # define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) # elif HEDLEY_HAS_ATTRIBUTE(vector_size) # define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) # define SIMDE_VECTOR_OPS # define SIMDE_VECTOR_NEGATE # define SIMDE_VECTOR_SUBSCRIPT # if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) # define SIMDE_VECTOR_SCALAR # endif # endif /* GCC and clang have built-in functions to handle shuffling and converting of vectors, but the implementations are slightly different. This macro is just an abstraction over them. Note that elem_size is in bits but vec_size is in bytes. */ # if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) HEDLEY_DIAGNOSTIC_PUSH /* We don't care about -Wvariadic-macros; all compilers that support * shufflevector/shuffle support them. */ # if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") # pragma clang diagnostic ignored "-Wc++98-compat-pedantic" # endif # if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) # pragma GCC diagnostic ignored "-Wvariadic-macros" # endif # if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) # define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) # elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) # define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ __builtin_shuffle(a, b, simde_shuffle_); \ })) # endif HEDLEY_DIAGNOSTIC_POP # endif /* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT but the code needs to be refactored a bit to take advantage. */ # if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) # if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) # if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ # define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ __typeof__(from) from_ = (from); \ ((void) from_); \ __builtin_convertvector(from_, __typeof__(to)); \ }))) # else # define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) # endif # endif # endif #endif /* Since we currently require SUBSCRIPT before using a vector in a union, we define these as dependencies of SUBSCRIPT. They are likely to disappear in the future, once SIMDe learns how to make use of vectors without using the union members. Do not use them in your code unless you're okay with it breaking when SIMDe changes. */ #if defined(SIMDE_VECTOR_SUBSCRIPT) # if defined(SIMDE_VECTOR_OPS) # define SIMDE_VECTOR_SUBSCRIPT_OPS # endif # if defined(SIMDE_VECTOR_SCALAR) # define SIMDE_VECTOR_SUBSCRIPT_SCALAR # endif #endif #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) # define SIMDE_ENABLE_OPENMP #endif #if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) # define SIMDE_ENABLE_CILKPLUS #endif #if defined(SIMDE_ENABLE_OPENMP) # define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) # define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) # if defined(__clang__) # define SIMDE_VECTORIZE_REDUCTION(r) \ HEDLEY_DIAGNOSTIC_PUSH \ _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ HEDLEY_PRAGMA(omp simd reduction(r)) \ HEDLEY_DIAGNOSTIC_POP # else # define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) # endif # define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) #elif defined(SIMDE_ENABLE_CILKPLUS) # define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) # define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) # define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) # define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) #elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) # define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) # define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) # define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE # define SIMDE_VECTORIZE_ALIGNED(a) #elif HEDLEY_GCC_VERSION_CHECK(4,9,0) # define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) # define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE # define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE # define SIMDE_VECTORIZE_ALIGNED(a) #elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) # define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) # define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE # define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE # define SIMDE_VECTORIZE_ALIGNED(a) #else # define SIMDE_VECTORIZE # define SIMDE_VECTORIZE_SAFELEN(l) # define SIMDE_VECTORIZE_REDUCTION(r) # define SIMDE_VECTORIZE_ALIGNED(a) #endif #define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) /* Intended for checking coverage, you should never use this in production. */ #if defined(SIMDE_NO_INLINE) # define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static #else # define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static #endif #if \ HEDLEY_HAS_ATTRIBUTE(unused) || \ HEDLEY_GCC_VERSION_CHECK(2,95,0) # define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) #else # define SIMDE_FUNCTION_POSSIBLY_UNUSED_ #endif #if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") # define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") #else # define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED #endif #if defined(_MSC_VER) # define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS # define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS #else # define SIMDE_BEGIN_DECLS_ \ HEDLEY_DIAGNOSTIC_PUSH \ SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED \ HEDLEY_BEGIN_C_DECLS # define SIMDE_END_DECLS_ \ HEDLEY_END_C_DECLS \ HEDLEY_DIAGNOSTIC_POP #endif #if defined(__SIZEOF_INT128__) # define SIMDE_HAVE_INT128_ HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ typedef __int128 simde_int128; typedef unsigned __int128 simde_uint128; HEDLEY_DIAGNOSTIC_POP #endif #if !defined(SIMDE_ENDIAN_LITTLE) # define SIMDE_ENDIAN_LITTLE 1234 #endif #if !defined(SIMDE_ENDIAN_BIG) # define SIMDE_ENDIAN_BIG 4321 #endif #if !defined(SIMDE_ENDIAN_ORDER) /* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ # if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) # define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE # elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) # define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG /* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ # elif defined(_BIG_ENDIAN) # define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG # elif defined(_LITTLE_ENDIAN) # define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE /* We know the endianness of some common architectures. Common * architectures not listed (ARM, POWER, MIPS, etc.) here are * bi-endian. */ # elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) # define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE # elif defined(__s390x__) || defined(__zarch__) # define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG /* Looks like we'll have to rely on the platform. If we're missing a * platform, please let us know. */ # elif defined(_WIN32) # define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE # elif defined(sun) || defined(__sun) /* Solaris */ # include # if defined(_LITTLE_ENDIAN) # define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE # elif defined(_BIG_ENDIAN) # define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG # endif # elif defined(__APPLE__) # include # if defined(__LITTLE_ENDIAN__) # define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE # elif defined(__BIG_ENDIAN__) # define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG # endif # elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) # include # if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) # define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE # elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) # define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG # endif # elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) # include # if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) # define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE # elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) # define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG # endif # endif #endif #if \ HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) #define simde_bswap64(v) __builtin_bswap64(v) #elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) #define simde_bswap64(v) _byteswap_uint64(v) #else SIMDE_FUNCTION_ATTRIBUTES uint64_t simde_bswap64(uint64_t v) { return ((v & (((uint64_t) 0xff) << 56)) >> 56) | ((v & (((uint64_t) 0xff) << 48)) >> 40) | ((v & (((uint64_t) 0xff) << 40)) >> 24) | ((v & (((uint64_t) 0xff) << 32)) >> 8) | ((v & (((uint64_t) 0xff) << 24)) << 8) | ((v & (((uint64_t) 0xff) << 16)) << 24) | ((v & (((uint64_t) 0xff) << 8)) << 40) | ((v & (((uint64_t) 0xff) )) << 56); } #endif #if !defined(SIMDE_ENDIAN_ORDER) # error Unknown byte order; please file a bug #else # if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE # define simde_endian_bswap64_be(value) simde_bswap64(value) # define simde_endian_bswap64_le(value) (value) # elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG # define simde_endian_bswap64_be(value) (value) # define simde_endian_bswap64_le(value) simde_bswap64(value) # endif #endif /* TODO: we should at least make an attempt to detect the correct types for simde_float32/float64 instead of just assuming float and double. */ #if !defined(SIMDE_FLOAT32_TYPE) # define SIMDE_FLOAT32_TYPE float # define SIMDE_FLOAT32_C(value) value##f #else # define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) #endif typedef SIMDE_FLOAT32_TYPE simde_float32; #if !defined(SIMDE_FLOAT64_TYPE) # define SIMDE_FLOAT64_TYPE double # define SIMDE_FLOAT64_C(value) value #else # define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) #endif typedef SIMDE_FLOAT64_TYPE simde_float64; #if HEDLEY_HAS_WARNING("-Wbad-function-cast") # define SIMDE_CONVERT_FTOI(T,v) \ HEDLEY_DIAGNOSTIC_PUSH \ _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ HEDLEY_STATIC_CAST(T, (v)) \ HEDLEY_DIAGNOSTIC_POP #else # define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) #endif /* TODO: detect compilers which support this outside of C11 mode */ #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) #else #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) #endif #if HEDLEY_HAS_WARNING("-Wfloat-equal") # define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") #elif HEDLEY_GCC_VERSION_CHECK(3,0,0) # define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") #else # define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL #endif /* Some functions can trade accuracy for speed. For those functions you can control the trade-off using this macro. Possible values: 0: prefer speed 1: reasonable trade-offs 2: prefer accuracy */ #if !defined(SIMDE_ACCURACY_PREFERENCE) # define SIMDE_ACCURACY_PREFERENCE 1 #endif #if defined(__STDC_HOSTED__) # define SIMDE_STDC_HOSTED __STDC_HOSTED__ #else # if \ defined(HEDLEY_PGI_VERSION) || \ defined(HEDLEY_MSVC_VERSION) # define SIMDE_STDC_HOSTED 1 # else # define SIMDE_STDC_HOSTED 0 # endif #endif /* Try to deal with environments without a standard library. */ #if !defined(simde_memcpy) #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) #endif #endif #if !defined(simde_memset) #if HEDLEY_HAS_BUILTIN(__builtin_memset) #define simde_memset(s, c, n) __builtin_memset(s, c, n) #endif #endif #if !defined(simde_memcmp) #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) #endif #endif #if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) #if !defined(SIMDE_NO_STRING_H) #if defined(__has_include) #if !__has_include() #define SIMDE_NO_STRING_H #endif #elif (SIMDE_STDC_HOSTED == 0) #define SIMDE_NO_STRING_H #endif #endif #if !defined(SIMDE_NO_STRING_H) #include #if !defined(simde_memcpy) #define simde_memcpy(dest, src, n) memcpy(dest, src, n) #endif #if !defined(simde_memset) #define simde_memset(s, c, n) memset(s, c, n) #endif #if !defined(simde_memcmp) #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) #endif #else /* These are meant to be portable, not fast. If you're hitting them you * should think about providing your own (by defining the simde_memcpy * macro prior to including any SIMDe files) or submitting a patch to * SIMDe so we can detect your system-provided memcpy/memset, like by * adding your compiler to the checks for __builtin_memcpy and/or * __builtin_memset. */ #if !defined(simde_memcpy) SIMDE_FUNCTION_ATTRIBUTES void simde_memcpy_(void* dest, const void* src, size_t len) { char* dest_ = HEDLEY_STATIC_CAST(char*, dest); char* src_ = HEDLEY_STATIC_CAST(const char*, src); for (size_t i = 0 ; i < len ; i++) { dest_[i] = src_[i]; } } #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) #endif #if !defined(simde_memset) SIMDE_FUNCTION_ATTRIBUTES void simde_memset_(void* s, int c, size_t len) { char* s_ = HEDLEY_STATIC_CAST(char*, s); char c_ = HEDLEY_STATIC_CAST(char, c); for (size_t i = 0 ; i < len ; i++) { s_[i] = c_[i]; } } #define simde_memset(s, c, n) simde_memset_(s, c, n) #endif #if !defined(simde_memcmp) SIMDE_FUCTION_ATTRIBUTES int simde_memcmp_(const void *s1, const void *s2, size_t n) { unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); for (size_t i = 0 ; i < len ; i++) { if (s1_[i] != s2_[i]) { return (int) (s1_[i] - s2_[i]); } } return 0; } #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) #endif #endif #endif #if defined(FE_ALL_EXCEPT) #define SIMDE_HAVE_FENV_H #elif defined(__has_include) #if __has_include() #include #define SIMDE_HAVE_FENV_H #endif #elif SIMDE_STDC_HOSTED == 1 #include #define SIMDE_HAVE_FENV_H #endif #if defined(EXIT_FAILURE) #define SIMDE_HAVE_STDLIB_H #elif defined(__has_include) #if __has_include() #include #define SIMDE_HAVE_STDLIB_H #endif #elif SIMDE_STDC_HOSTED == 1 #include #define SIMDE_HAVE_STDLIB_H #endif #if defined(__has_include) # if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() # include # elif __has_include() # include # endif # if __has_include() # include # endif #elif SIMDE_STDC_HOSTED == 1 # include # include #endif #include "check.h" /* GCC/clang have a bunch of functionality in builtins which we would * like to access, but the suffixes indicate whether the operate on * int, long, or long long, not fixed width types (e.g., int32_t). * we use these macros to attempt to map from fixed-width to the * names GCC uses. Note that you should still cast the input(s) and * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if * types are the same size they may not be compatible according to the * compiler. For example, on x86 long and long lonsg are generally * both 64 bits, but platforms vary on whether an int64_t is mapped * to a long or long long. */ #include HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ #if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) #define SIMDE_BUILTIN_SUFFIX_8_ #define SIMDE_BUILTIN_TYPE_8_ int #elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) #define SIMDE_BUILTIN_SUFFIX_8_ l #define SIMDE_BUILTIN_TYPE_8_ long #elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) #define SIMDE_BUILTIN_SUFFIX_8_ ll #define SIMDE_BUILTIN_TYPE_8_ long long #endif #if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) #define SIMDE_BUILTIN_SUFFIX_16_ #define SIMDE_BUILTIN_TYPE_16_ int #elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) #define SIMDE_BUILTIN_SUFFIX_16_ l #define SIMDE_BUILTIN_TYPE_16_ long #elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) #define SIMDE_BUILTIN_SUFFIX_16_ ll #define SIMDE_BUILTIN_TYPE_16_ long long #endif #if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) #define SIMDE_BUILTIN_SUFFIX_32_ #define SIMDE_BUILTIN_TYPE_32_ int #elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) #define SIMDE_BUILTIN_SUFFIX_32_ l #define SIMDE_BUILTIN_TYPE_32_ long #elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) #define SIMDE_BUILTIN_SUFFIX_32_ ll #define SIMDE_BUILTIN_TYPE_32_ long long #endif #if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) #define SIMDE_BUILTIN_SUFFIX_64_ #define SIMDE_BUILTIN_TYPE_64_ int #elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) #define SIMDE_BUILTIN_SUFFIX_64_ l #define SIMDE_BUILTIN_TYPE_64_ long #elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) #define SIMDE_BUILTIN_SUFFIX_64_ ll #define SIMDE_BUILTIN_TYPE_64_ long long #endif #if defined(SIMDE_BUILTIN_SUFFIX_8_) #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) #else #define SIMDE_BUILTIN_HAS_8_(name) 0 #endif #if defined(SIMDE_BUILTIN_SUFFIX_16_) #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) #else #define SIMDE_BUILTIN_HAS_16_(name) 0 #endif #if defined(SIMDE_BUILTIN_SUFFIX_32_) #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) #else #define SIMDE_BUILTIN_HAS_32_(name) 0 #endif #if defined(SIMDE_BUILTIN_SUFFIX_64_) #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) #else #define SIMDE_BUILTIN_HAS_64_(name) 0 #endif HEDLEY_DIAGNOSTIC_POP /* Sometimes we run into problems with specific versions of compilers which make the native versions unusable for us. Often this is due to missing functions, sometimes buggy implementations, etc. These macros are how we check for specific bugs. As they are fixed we'll start only defining them for problematic compiler versions. */ #if !defined(SIMDE_IGNORE_COMPILER_BUGS) # if defined(HEDLEY_GCC_VERSION) # if !HEDLEY_GCC_VERSION_CHECK(4,9,0) # define SIMDE_BUG_GCC_REV_208793 # endif # if !HEDLEY_GCC_VERSION_CHECK(5,0,0) # define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ # endif # if !HEDLEY_GCC_VERSION_CHECK(4,6,0) # define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ # endif # if !HEDLEY_GCC_VERSION_CHECK(8,0,0) # define SIMDE_BUG_GCC_REV_247851 # endif # if !HEDLEY_GCC_VERSION_CHECK(10,0,0) # define SIMDE_BUG_GCC_REV_274313 # define SIMDE_BUG_GCC_91341 # endif # if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) # define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR # endif # if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) # define SIMDE_BUG_GCC_BAD_VEXT_REV32 # endif # if defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) # define SIMDE_BUG_GCC_94482 # endif # if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_SYSTEMZ) # define SIMDE_BUG_GCC_53784 # endif # if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) # if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ # define SIMDE_BUG_GCC_95144 # endif # if !HEDLEY_GCC_VERSION_CHECK(11,0,0) # define SIMDE_BUG_GCC_95483 # endif # define SIMDE_BUG_GCC_98521 # endif # if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) # define SIMDE_BUG_GCC_94488 # endif # if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) # define SIMDE_BUG_GCC_REV_264019 # endif # if defined(SIMDE_ARCH_ARM) # define SIMDE_BUG_GCC_95399 # define SIMDE_BUG_GCC_95471 # elif defined(SIMDE_ARCH_POWER) # define SIMDE_BUG_GCC_95227 # define SIMDE_BUG_GCC_95782 # elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) # if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) # define SIMDE_BUG_GCC_96174 # endif # endif # define SIMDE_BUG_GCC_95399 # elif defined(__clang__) # if defined(SIMDE_ARCH_AARCH64) # define SIMDE_BUG_CLANG_45541 # define SIMDE_BUG_CLANG_46844 # define SIMDE_BUG_CLANG_48257 # if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) # define SIMDE_BUG_CLANG_BAD_VI64_OPS # endif # if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) # define SIMDE_BUG_CLANG_GIT_4EC445B8 # endif # endif # if defined(SIMDE_ARCH_POWER) # define SIMDE_BUG_CLANG_46770 # endif # if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) # define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT # endif # if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) # if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) # define SIMDE_BUG_CLANG_45931 # endif # if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) # define SIMDE_BUG_CLANG_44589 # endif # define SIMDE_BUG_CLANG_48673 # endif # define SIMDE_BUG_CLANG_45959 # elif defined(HEDLEY_MSVC_VERSION) # if defined(SIMDE_ARCH_X86) # define SIMDE_BUG_MSVC_ROUND_EXTRACT # endif # elif defined(HEDLEY_INTEL_VERSION) # define SIMDE_BUG_INTEL_857088 # endif #endif /* GCC and Clang both have the same issue: * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 * https://bugs.llvm.org/show_bug.cgi?id=45931 * This is just an easy way to work around it. */ #if \ (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ HEDLEY_GCC_VERSION_CHECK(4,3,0) # define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ HEDLEY_DIAGNOSTIC_PUSH \ HEDLEY_DIAGNOSTIC_POP \ _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ HEDLEY_DIAGNOSTIC_PUSH \ simde_bug_ignore_sign_conversion_v_; \ })) #else # define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) #endif #endif /* !defined(SIMDE_COMMON_H) */ simde-0.7.2/simde/simde-complex.h000066400000000000000000000122111400333146700166370ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020-2021 Evan Nemerson */ /* Support for complex math. * * We try to avoid inculding (in C++ mode) since it pulls in * a *lot* of code. Unfortunately this only works for GNU modes (i.e., * -std=gnu++14 not -std=c++14) unless you pass -fext-numeric-literals, * but there is no way (AFAICT) to detect that flag so we have to rely * on __STRICT_ANSI__ to instead detect GNU mode. * * This header is separate from simde-math.h since there is a good * chance it will pull in , and most of the time we don't need * complex math (on x86 only SVML uses it). */ #if !defined(SIMDE_COMPLEX_H) #define SIMDE_COMPLEX_H 1 #include "simde-math.h" #if ( \ HEDLEY_HAS_BUILTIN(__builtin_creal) || \ HEDLEY_GCC_VERSION_CHECK(4,7,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) \ ) && (!defined(__cplusplus) || !defined(__STRICT_ANSI__)) HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ typedef __complex__ float simde_cfloat32; typedef __complex__ double simde_cfloat64; HEDLEY_DIAGNOSTIC_POP #define SIMDE_MATH_CMPLX(x, y) (HEDLEY_STATIC_CAST(double, x) + HEDLEY_STATIC_CAST(double, y) * (__extension__ 1.0j)) #define SIMDE_MATH_CMPLXF(x, y) (HEDLEY_STATIC_CAST(float, x) + HEDLEY_STATIC_CAST(float, y) * (__extension__ 1.0fj)) #if !defined(simde_math_creal) #define simde_math_crealf(z) __builtin_crealf(z) #endif #if !defined(simde_math_crealf) #define simde_math_creal(z) __builtin_creal(z) #endif #if !defined(simde_math_cimag) #define simde_math_cimagf(z) __builtin_cimagf(z) #endif #if !defined(simde_math_cimagf) #define simde_math_cimag(z) __builtin_cimag(z) #endif #if !defined(simde_math_cexp) #define simde_math_cexp(z) __builtin_cexp(z) #endif #if !defined(simde_math_cexpf) #define simde_math_cexpf(z) __builtin_cexpf(z) #endif #elif !defined(__cplusplus) #include #if !defined(HEDLEY_MSVC_VERSION) typedef float _Complex simde_cfloat32; typedef double _Complex simde_cfloat64; #else typedef _Fcomplex simde_cfloat32; typedef _Dcomplex simde_cfloat64; #endif #if defined(HEDLEY_MSVC_VERSION) #define SIMDE_MATH_CMPLX(x, y) ((simde_cfloat64) { (x), (y) }) #define SIMDE_MATH_CMPLXF(x, y) ((simde_cfloat32) { (x), (y) }) #elif defined(CMPLX) && defined(CMPLXF) #define SIMDE_MATH_CMPLX(x, y) CMPLX(x, y) #define SIMDE_MATH_CMPLXF(x, y) CMPLXF(x, y) #else #define SIMDE_MATH_CMPLX(x, y) (HEDLEY_STATIC_CAST(double, x) + HEDLEY_STATIC_CAST(double, y) * I) #define SIMDE_MATH_CMPLXF(x, y) (HEDLEY_STATIC_CAST(float, x) + HEDLEY_STATIC_CAST(float, y) * I) #endif #if !defined(simde_math_creal) #define simde_math_creal(z) creal(z) #endif #if !defined(simde_math_crealf) #define simde_math_crealf(z) crealf(z) #endif #if !defined(simde_math_cimag) #define simde_math_cimag(z) cimag(z) #endif #if !defined(simde_math_cimagf) #define simde_math_cimagf(z) cimagf(z) #endif #if !defined(simde_math_cexp) #define simde_math_cexp(z) cexp(z) #endif #if !defined(simde_math_cexpf) #define simde_math_cexpf(z) cexpf(z) #endif #else HEDLEY_DIAGNOSTIC_PUSH #if defined(HEDLEY_MSVC_VERSION) #pragma warning(disable:4530) #endif #include HEDLEY_DIAGNOSTIC_POP typedef std::complex simde_cfloat32; typedef std::complex simde_cfloat64; #define SIMDE_MATH_CMPLX(x, y) (std::complex(x, y)) #define SIMDE_MATH_CMPLXF(x, y) (std::complex(x, y)) #if !defined(simde_math_creal) #define simde_math_creal(z) ((z).real()) #endif #if !defined(simde_math_crealf) #define simde_math_crealf(z) ((z).real()) #endif #if !defined(simde_math_cimag) #define simde_math_cimag(z) ((z).imag()) #endif #if !defined(simde_math_cimagf) #define simde_math_cimagf(z) ((z).imag()) #endif #if !defined(simde_math_cexp) #define simde_math_cexp(z) std::exp(z) #endif #if !defined(simde_math_cexpf) #define simde_math_cexpf(z) std::exp(z) #endif #endif #endif /* !defined(SIMDE_COMPLEX_H) */ simde-0.7.2/simde/simde-constify.h000066400000000000000000000442311400333146700170350ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ /* Constify macros. For internal use only. * * These are used to make it possible to call a function which takes * an Integer Constant Expression (ICE) using a compile time constant. * Technically it would also be possible to use a value not trivially * known by the compiler, but there would be a siginficant performance * hit (a switch switch is used). * * The basic idea is pretty simple; we just emit a do while loop which * contains a switch with a case for every possible value of the * constant. * * As long as the value you pass to the function in constant, pretty * much any copmiler shouldn't have a problem generating exactly the * same code as if you had used an ICE. * * This is intended to be used in the SIMDe implementations of * functions the compilers require to be an ICE, but the other benefit * is that if we also disable the warnings from * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests * to use non-ICE parameters */ #if !defined(SIMDE_CONSTIFY_H) #define SIMDE_CONSTIFY_H #include "simde-diagnostic.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ #define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ do { \ switch(imm) { \ case 0: result = func_name(__VA_ARGS__, 0); break; \ case 1: result = func_name(__VA_ARGS__, 1); break; \ default: result = default_case; break; \ } \ } while (0) #define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ do { \ switch(imm) { \ case 0: result = func_name(__VA_ARGS__, 0); break; \ case 1: result = func_name(__VA_ARGS__, 1); break; \ case 2: result = func_name(__VA_ARGS__, 2); break; \ case 3: result = func_name(__VA_ARGS__, 3); break; \ default: result = default_case; break; \ } \ } while (0) #define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ do { \ switch(imm) { \ case 0: result = func_name(__VA_ARGS__, 0); break; \ case 1: result = func_name(__VA_ARGS__, 1); break; \ case 2: result = func_name(__VA_ARGS__, 2); break; \ case 3: result = func_name(__VA_ARGS__, 3); break; \ case 4: result = func_name(__VA_ARGS__, 4); break; \ case 5: result = func_name(__VA_ARGS__, 5); break; \ case 6: result = func_name(__VA_ARGS__, 6); break; \ case 7: result = func_name(__VA_ARGS__, 7); break; \ default: result = default_case; break; \ } \ } while (0) #define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ do { \ switch(imm) { \ case 0: result = func_name(__VA_ARGS__, 0); break; \ case 1: result = func_name(__VA_ARGS__, 1); break; \ case 2: result = func_name(__VA_ARGS__, 2); break; \ case 3: result = func_name(__VA_ARGS__, 3); break; \ case 4: result = func_name(__VA_ARGS__, 4); break; \ case 5: result = func_name(__VA_ARGS__, 5); break; \ case 6: result = func_name(__VA_ARGS__, 6); break; \ case 7: result = func_name(__VA_ARGS__, 7); break; \ case 8: result = func_name(__VA_ARGS__, 8); break; \ case 9: result = func_name(__VA_ARGS__, 9); break; \ case 10: result = func_name(__VA_ARGS__, 10); break; \ case 11: result = func_name(__VA_ARGS__, 11); break; \ case 12: result = func_name(__VA_ARGS__, 12); break; \ case 13: result = func_name(__VA_ARGS__, 13); break; \ case 14: result = func_name(__VA_ARGS__, 14); break; \ case 15: result = func_name(__VA_ARGS__, 15); break; \ default: result = default_case; break; \ } \ } while (0) #define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ do { \ switch(imm) { \ case 0: result = func_name(__VA_ARGS__, 0); break; \ case 1: result = func_name(__VA_ARGS__, 1); break; \ case 2: result = func_name(__VA_ARGS__, 2); break; \ case 3: result = func_name(__VA_ARGS__, 3); break; \ case 4: result = func_name(__VA_ARGS__, 4); break; \ case 5: result = func_name(__VA_ARGS__, 5); break; \ case 6: result = func_name(__VA_ARGS__, 6); break; \ case 7: result = func_name(__VA_ARGS__, 7); break; \ case 8: result = func_name(__VA_ARGS__, 8); break; \ case 9: result = func_name(__VA_ARGS__, 9); break; \ case 10: result = func_name(__VA_ARGS__, 10); break; \ case 11: result = func_name(__VA_ARGS__, 11); break; \ case 12: result = func_name(__VA_ARGS__, 12); break; \ case 13: result = func_name(__VA_ARGS__, 13); break; \ case 14: result = func_name(__VA_ARGS__, 14); break; \ case 15: result = func_name(__VA_ARGS__, 15); break; \ case 16: result = func_name(__VA_ARGS__, 16); break; \ case 17: result = func_name(__VA_ARGS__, 17); break; \ case 18: result = func_name(__VA_ARGS__, 18); break; \ case 19: result = func_name(__VA_ARGS__, 19); break; \ case 20: result = func_name(__VA_ARGS__, 20); break; \ case 21: result = func_name(__VA_ARGS__, 21); break; \ case 22: result = func_name(__VA_ARGS__, 22); break; \ case 23: result = func_name(__VA_ARGS__, 23); break; \ case 24: result = func_name(__VA_ARGS__, 24); break; \ case 25: result = func_name(__VA_ARGS__, 25); break; \ case 26: result = func_name(__VA_ARGS__, 26); break; \ case 27: result = func_name(__VA_ARGS__, 27); break; \ case 28: result = func_name(__VA_ARGS__, 28); break; \ case 29: result = func_name(__VA_ARGS__, 29); break; \ case 30: result = func_name(__VA_ARGS__, 30); break; \ case 31: result = func_name(__VA_ARGS__, 31); break; \ default: result = default_case; break; \ } \ } while (0) #define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ do { \ switch(imm) { \ case 0: result = func_name(__VA_ARGS__, 0); break; \ case 1: result = func_name(__VA_ARGS__, 1); break; \ case 2: result = func_name(__VA_ARGS__, 2); break; \ case 3: result = func_name(__VA_ARGS__, 3); break; \ case 4: result = func_name(__VA_ARGS__, 4); break; \ case 5: result = func_name(__VA_ARGS__, 5); break; \ case 6: result = func_name(__VA_ARGS__, 6); break; \ case 7: result = func_name(__VA_ARGS__, 7); break; \ case 8: result = func_name(__VA_ARGS__, 8); break; \ case 9: result = func_name(__VA_ARGS__, 9); break; \ case 10: result = func_name(__VA_ARGS__, 10); break; \ case 11: result = func_name(__VA_ARGS__, 11); break; \ case 12: result = func_name(__VA_ARGS__, 12); break; \ case 13: result = func_name(__VA_ARGS__, 13); break; \ case 14: result = func_name(__VA_ARGS__, 14); break; \ case 15: result = func_name(__VA_ARGS__, 15); break; \ case 16: result = func_name(__VA_ARGS__, 16); break; \ case 17: result = func_name(__VA_ARGS__, 17); break; \ case 18: result = func_name(__VA_ARGS__, 18); break; \ case 19: result = func_name(__VA_ARGS__, 19); break; \ case 20: result = func_name(__VA_ARGS__, 20); break; \ case 21: result = func_name(__VA_ARGS__, 21); break; \ case 22: result = func_name(__VA_ARGS__, 22); break; \ case 23: result = func_name(__VA_ARGS__, 23); break; \ case 24: result = func_name(__VA_ARGS__, 24); break; \ case 25: result = func_name(__VA_ARGS__, 25); break; \ case 26: result = func_name(__VA_ARGS__, 26); break; \ case 27: result = func_name(__VA_ARGS__, 27); break; \ case 28: result = func_name(__VA_ARGS__, 28); break; \ case 29: result = func_name(__VA_ARGS__, 29); break; \ case 30: result = func_name(__VA_ARGS__, 30); break; \ case 31: result = func_name(__VA_ARGS__, 31); break; \ case 32: result = func_name(__VA_ARGS__, 32); break; \ case 33: result = func_name(__VA_ARGS__, 33); break; \ case 34: result = func_name(__VA_ARGS__, 34); break; \ case 35: result = func_name(__VA_ARGS__, 35); break; \ case 36: result = func_name(__VA_ARGS__, 36); break; \ case 37: result = func_name(__VA_ARGS__, 37); break; \ case 38: result = func_name(__VA_ARGS__, 38); break; \ case 39: result = func_name(__VA_ARGS__, 39); break; \ case 40: result = func_name(__VA_ARGS__, 40); break; \ case 41: result = func_name(__VA_ARGS__, 41); break; \ case 42: result = func_name(__VA_ARGS__, 42); break; \ case 43: result = func_name(__VA_ARGS__, 43); break; \ case 44: result = func_name(__VA_ARGS__, 44); break; \ case 45: result = func_name(__VA_ARGS__, 45); break; \ case 46: result = func_name(__VA_ARGS__, 46); break; \ case 47: result = func_name(__VA_ARGS__, 47); break; \ case 48: result = func_name(__VA_ARGS__, 48); break; \ case 49: result = func_name(__VA_ARGS__, 49); break; \ case 50: result = func_name(__VA_ARGS__, 50); break; \ case 51: result = func_name(__VA_ARGS__, 51); break; \ case 52: result = func_name(__VA_ARGS__, 52); break; \ case 53: result = func_name(__VA_ARGS__, 53); break; \ case 54: result = func_name(__VA_ARGS__, 54); break; \ case 55: result = func_name(__VA_ARGS__, 55); break; \ case 56: result = func_name(__VA_ARGS__, 56); break; \ case 57: result = func_name(__VA_ARGS__, 57); break; \ case 58: result = func_name(__VA_ARGS__, 58); break; \ case 59: result = func_name(__VA_ARGS__, 59); break; \ case 60: result = func_name(__VA_ARGS__, 60); break; \ case 61: result = func_name(__VA_ARGS__, 61); break; \ case 62: result = func_name(__VA_ARGS__, 62); break; \ case 63: result = func_name(__VA_ARGS__, 63); break; \ default: result = default_case; break; \ } \ } while (0) #define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ do { \ switch(imm) { \ case 0: func_name(__VA_ARGS__, 0); break; \ case 1: func_name(__VA_ARGS__, 1); break; \ default: default_case; break; \ } \ } while (0) #define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ do { \ switch(imm) { \ case 0: func_name(__VA_ARGS__, 0); break; \ case 1: func_name(__VA_ARGS__, 1); break; \ case 2: func_name(__VA_ARGS__, 2); break; \ case 3: func_name(__VA_ARGS__, 3); break; \ default: default_case; break; \ } \ } while (0) #define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ do { \ switch(imm) { \ case 0: func_name(__VA_ARGS__, 0); break; \ case 1: func_name(__VA_ARGS__, 1); break; \ case 2: func_name(__VA_ARGS__, 2); break; \ case 3: func_name(__VA_ARGS__, 3); break; \ case 4: func_name(__VA_ARGS__, 4); break; \ case 5: func_name(__VA_ARGS__, 5); break; \ case 6: func_name(__VA_ARGS__, 6); break; \ case 7: func_name(__VA_ARGS__, 7); break; \ default: default_case; break; \ } \ } while (0) #define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ do { \ switch(imm) { \ case 0: func_name(__VA_ARGS__, 0); break; \ case 1: func_name(__VA_ARGS__, 1); break; \ case 2: func_name(__VA_ARGS__, 2); break; \ case 3: func_name(__VA_ARGS__, 3); break; \ case 4: func_name(__VA_ARGS__, 4); break; \ case 5: func_name(__VA_ARGS__, 5); break; \ case 6: func_name(__VA_ARGS__, 6); break; \ case 7: func_name(__VA_ARGS__, 7); break; \ case 8: func_name(__VA_ARGS__, 8); break; \ case 9: func_name(__VA_ARGS__, 9); break; \ case 10: func_name(__VA_ARGS__, 10); break; \ case 11: func_name(__VA_ARGS__, 11); break; \ case 12: func_name(__VA_ARGS__, 12); break; \ case 13: func_name(__VA_ARGS__, 13); break; \ case 14: func_name(__VA_ARGS__, 14); break; \ case 15: func_name(__VA_ARGS__, 15); break; \ default: default_case; break; \ } \ } while (0) #define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ do { \ switch(imm) { \ case 0: func_name(__VA_ARGS__, 0); break; \ case 1: func_name(__VA_ARGS__, 1); break; \ case 2: func_name(__VA_ARGS__, 2); break; \ case 3: func_name(__VA_ARGS__, 3); break; \ case 4: func_name(__VA_ARGS__, 4); break; \ case 5: func_name(__VA_ARGS__, 5); break; \ case 6: func_name(__VA_ARGS__, 6); break; \ case 7: func_name(__VA_ARGS__, 7); break; \ case 8: func_name(__VA_ARGS__, 8); break; \ case 9: func_name(__VA_ARGS__, 9); break; \ case 10: func_name(__VA_ARGS__, 10); break; \ case 11: func_name(__VA_ARGS__, 11); break; \ case 12: func_name(__VA_ARGS__, 12); break; \ case 13: func_name(__VA_ARGS__, 13); break; \ case 14: func_name(__VA_ARGS__, 14); break; \ case 15: func_name(__VA_ARGS__, 15); break; \ case 16: func_name(__VA_ARGS__, 16); break; \ case 17: func_name(__VA_ARGS__, 17); break; \ case 18: func_name(__VA_ARGS__, 18); break; \ case 19: func_name(__VA_ARGS__, 19); break; \ case 20: func_name(__VA_ARGS__, 20); break; \ case 21: func_name(__VA_ARGS__, 21); break; \ case 22: func_name(__VA_ARGS__, 22); break; \ case 23: func_name(__VA_ARGS__, 23); break; \ case 24: func_name(__VA_ARGS__, 24); break; \ case 25: func_name(__VA_ARGS__, 25); break; \ case 26: func_name(__VA_ARGS__, 26); break; \ case 27: func_name(__VA_ARGS__, 27); break; \ case 28: func_name(__VA_ARGS__, 28); break; \ case 29: func_name(__VA_ARGS__, 29); break; \ case 30: func_name(__VA_ARGS__, 30); break; \ case 31: func_name(__VA_ARGS__, 31); break; \ default: default_case; break; \ } \ } while (0) #define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ do { \ switch(imm) { \ case 0: func_name(__VA_ARGS__, 0); break; \ case 1: func_name(__VA_ARGS__, 1); break; \ case 2: func_name(__VA_ARGS__, 2); break; \ case 3: func_name(__VA_ARGS__, 3); break; \ case 4: func_name(__VA_ARGS__, 4); break; \ case 5: func_name(__VA_ARGS__, 5); break; \ case 6: func_name(__VA_ARGS__, 6); break; \ case 7: func_name(__VA_ARGS__, 7); break; \ case 8: func_name(__VA_ARGS__, 8); break; \ case 9: func_name(__VA_ARGS__, 9); break; \ case 10: func_name(__VA_ARGS__, 10); break; \ case 11: func_name(__VA_ARGS__, 11); break; \ case 12: func_name(__VA_ARGS__, 12); break; \ case 13: func_name(__VA_ARGS__, 13); break; \ case 14: func_name(__VA_ARGS__, 14); break; \ case 15: func_name(__VA_ARGS__, 15); break; \ case 16: func_name(__VA_ARGS__, 16); break; \ case 17: func_name(__VA_ARGS__, 17); break; \ case 18: func_name(__VA_ARGS__, 18); break; \ case 19: func_name(__VA_ARGS__, 19); break; \ case 20: func_name(__VA_ARGS__, 20); break; \ case 21: func_name(__VA_ARGS__, 21); break; \ case 22: func_name(__VA_ARGS__, 22); break; \ case 23: func_name(__VA_ARGS__, 23); break; \ case 24: func_name(__VA_ARGS__, 24); break; \ case 25: func_name(__VA_ARGS__, 25); break; \ case 26: func_name(__VA_ARGS__, 26); break; \ case 27: func_name(__VA_ARGS__, 27); break; \ case 28: func_name(__VA_ARGS__, 28); break; \ case 29: func_name(__VA_ARGS__, 29); break; \ case 30: func_name(__VA_ARGS__, 30); break; \ case 31: func_name(__VA_ARGS__, 31); break; \ case 32: func_name(__VA_ARGS__, 32); break; \ case 33: func_name(__VA_ARGS__, 33); break; \ case 34: func_name(__VA_ARGS__, 34); break; \ case 35: func_name(__VA_ARGS__, 35); break; \ case 36: func_name(__VA_ARGS__, 36); break; \ case 37: func_name(__VA_ARGS__, 37); break; \ case 38: func_name(__VA_ARGS__, 38); break; \ case 39: func_name(__VA_ARGS__, 39); break; \ case 40: func_name(__VA_ARGS__, 40); break; \ case 41: func_name(__VA_ARGS__, 41); break; \ case 42: func_name(__VA_ARGS__, 42); break; \ case 43: func_name(__VA_ARGS__, 43); break; \ case 44: func_name(__VA_ARGS__, 44); break; \ case 45: func_name(__VA_ARGS__, 45); break; \ case 46: func_name(__VA_ARGS__, 46); break; \ case 47: func_name(__VA_ARGS__, 47); break; \ case 48: func_name(__VA_ARGS__, 48); break; \ case 49: func_name(__VA_ARGS__, 49); break; \ case 50: func_name(__VA_ARGS__, 50); break; \ case 51: func_name(__VA_ARGS__, 51); break; \ case 52: func_name(__VA_ARGS__, 52); break; \ case 53: func_name(__VA_ARGS__, 53); break; \ case 54: func_name(__VA_ARGS__, 54); break; \ case 55: func_name(__VA_ARGS__, 55); break; \ case 56: func_name(__VA_ARGS__, 56); break; \ case 57: func_name(__VA_ARGS__, 57); break; \ case 58: func_name(__VA_ARGS__, 58); break; \ case 59: func_name(__VA_ARGS__, 59); break; \ case 60: func_name(__VA_ARGS__, 60); break; \ case 61: func_name(__VA_ARGS__, 61); break; \ case 62: func_name(__VA_ARGS__, 62); break; \ case 63: func_name(__VA_ARGS__, 63); break; \ default: default_case; break; \ } \ } while (0) HEDLEY_DIAGNOSTIC_POP #endif simde-0.7.2/simde/simde-detect-clang.h000066400000000000000000000117111400333146700175260ustar00rootroot00000000000000/* Detect Clang Version * Created by Evan Nemerson * * To the extent possible under law, the author(s) have dedicated all * copyright and related and neighboring rights to this software to * the public domain worldwide. This software is distributed without * any warranty. * * For details, see . * SPDX-License-Identifier: CC0-1.0 */ /* This file was originally part of SIMDe * (). You're free to do with it as * you please, but I do have a few small requests: * * * If you make improvements, please submit them back to SIMDe * (at ) so others can * benefit from them. * * Please keep a link to SIMDe intact so people know where to submit * improvements. * * If you expose it publicly, please change the SIMDE_ prefix to * something specific to your project. * * The version numbers clang exposes (in the ___clang_major__, * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. * Vendors such as Apple will define these values to their version * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but * __clang_major__ and __clang_minor__ are defined to 4 and 0 * respectively, instead of 3 and 1. * * The solution is *usually* to use clang's feature detection macros * () * to determine if the feature you're interested in is available. This * generally works well, and it should probably be the first thing you * try. Unfortunately, it's not possible to check for everything. In * particular, compiler bugs. * * This file just uses the feature checking macros to detect features * added in specific versions of clang to identify which version of * clang the compiler is based on. * * Right now it only goes back to 3.6, but I'm happy to accept patches * to go back further. And, of course, newer versions are welcome if * they're not already present, and if you find a way to detect a point * release that would be great, too! */ #if !defined(SIMDE_DETECT_CLANG_H) #define SIMDE_DETECT_CLANG_H 1 /* Attempt to detect the upstream clang version number. I usually only * worry about major version numbers (at least for 4.0+), but if you * need more resolution I'm happy to accept patches that are able to * detect minor versions as well. That said, you'll probably have a * hard time with detection since AFAIK most minor releases don't add * anything we can detect. */ #if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) # if __has_warning("-Wformat-insufficient-args") # define SIMDE_DETECT_CLANG_VERSION 120000 # elif __has_warning("-Wimplicit-const-int-float-conversion") # define SIMDE_DETECT_CLANG_VERSION 110000 # elif __has_warning("-Wmisleading-indentation") # define SIMDE_DETECT_CLANG_VERSION 100000 # elif defined(__FILE_NAME__) # define SIMDE_DETECT_CLANG_VERSION 90000 # elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) # define SIMDE_DETECT_CLANG_VERSION 80000 # elif __has_warning("-Wc++98-compat-extra-semi") # define SIMDE_DETECT_CLANG_VERSION 70000 # elif __has_warning("-Wpragma-pack") # define SIMDE_DETECT_CLANG_VERSION 60000 # elif __has_warning("-Wbitfield-enum-conversion") # define SIMDE_DETECT_CLANG_VERSION 50000 # elif __has_attribute(diagnose_if) # define SIMDE_DETECT_CLANG_VERSION 40000 # elif __has_warning("-Wcast-calling-convention") # define SIMDE_DETECT_CLANG_VERSION 30900 # elif __has_warning("-WCL4") # define SIMDE_DETECT_CLANG_VERSION 30800 # elif __has_warning("-WIndependentClass-attribute") # define SIMDE_DETECT_CLANG_VERSION 30700 # elif __has_warning("-Wambiguous-ellipsis") # define SIMDE_DETECT_CLANG_VERSION 30600 # else # define SIMDE_DETECT_CLANG_VERSION 1 # endif #endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ /* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty * straightforward; it returns true if the compiler is a derivative * of clang >= the specified version. * * Since this file is often (primarily?) useful for working around bugs * it is also helpful to have a macro which returns true if only if the * compiler is a version of clang *older* than the specified version to * make it a bit easier to ifdef regions to add code for older versions, * such as pragmas to disable a specific warning. */ #if defined(SIMDE_DETECT_CLANG_VERSION) # define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) # define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) #else # define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) # define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) #endif #endif /* !defined(SIMDE_DETECT_CLANG_H) */ simde-0.7.2/simde/simde-diagnostic.h000066400000000000000000000465001400333146700173240ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2017-2020 Evan Nemerson */ /* SIMDe targets a very wide range of standards and compilers, and our * goal is to compile cleanly even with extremely aggressive warnings * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) * treated as errors. * * While our preference is to resolve the underlying issue a given * diagnostic is warning us about, sometimes that's not possible. * Fixing a warning in one compiler may cause problems in another. * Sometimes a warning doesn't really apply to us (false positives), * and sometimes adhering to a warning would mean dropping a feature * we *know* the compiler supports since we have tested specifically * for the compiler or feature. * * When practical, warnings are only disabled for specific code. For * a list of warnings which are enabled by default in all SIMDe code, * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the * warning stack when SIMDe is done parsing, so code which includes * SIMDe is not deprived of these warnings. */ #if !defined(SIMDE_DIAGNOSTIC_H) #define SIMDE_DIAGNOSTIC_H #include "hedley.h" #include "simde-detect-clang.h" /* This is only to help us implement functions like _mm_undefined_ps. */ #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ #endif #if HEDLEY_HAS_WARNING("-Wuninitialized") #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") #elif HEDLEY_GCC_VERSION_CHECK(4,2,0) #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") #elif HEDLEY_PGI_VERSION_CHECK(19,10,0) #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") #elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") #elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") #elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") #elif \ HEDLEY_TI_VERSION_CHECK(16,9,9) || \ HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") #elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") #elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) #endif /* GCC emits a lot of "notes" about the ABI being different for things * in newer versions of GCC. We don't really care because all our * functions are inlined and don't generate ABI. */ #if HEDLEY_GCC_VERSION_CHECK(7,0,0) #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ #endif /* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() * after each MMX function before any floating point instructions. * Some compilers warn about functions which use MMX functions but * don't call _mm_empty(). However, since SIMDe is implementyng the * MMX API we shouldn't be calling _mm_empty(); we leave it to the * caller to invoke simde_mm_empty(). */ #if HEDLEY_INTEL_VERSION_CHECK(19,0,0) #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") #elif defined(HEDLEY_MSVC_VERSION) #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) #else #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ #endif /* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they * emit a diagnostic if you use #pragma simd instead of * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to * compile with -qopenmp or -qopenmp-simd and define * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ #if HEDLEY_INTEL_VERSION_CHECK(18,0,0) #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") #else #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ #endif /* MSVC emits a diagnostic when we call a function (like * simde_mm_set_epi32) while initializing a struct. We currently do * this a *lot* in the tests. */ #if \ defined(HEDLEY_MSVC_VERSION) #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) #else #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ #endif /* This warning needs a lot of work. It is triggered if all you do is * pass the value to memcpy/__builtin_memcpy, or if you initialize a * member of the union, even if that member takes up the entire union. * Last tested with clang-10, hopefully things will improve in the * future; if clang fixes this I'd love to enable it. */ #if \ HEDLEY_HAS_WARNING("-Wconditional-uninitialized") #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ #endif /* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which * will is false. However, SIMDe uses these operations exclusively * for things like _mm_cmpeq_ps, for which we really do want to check * for equality (or inequality). * * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro * which just wraps a check in some code do disable this diagnostic I'd * be happy to accept it. */ #if \ HEDLEY_HAS_WARNING("-Wfloat-equal") || \ HEDLEY_GCC_VERSION_CHECK(3,0,0) #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ #endif /* This is because we use HEDLEY_STATIC_ASSERT for static assertions. * If Hedley can't find an implementation it will preprocess to * nothing, which means there will be a trailing semi-colon. */ #if HEDLEY_HAS_WARNING("-Wextra-semi") #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") #elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ #endif /* We do use a few variadic macros, which technically aren't available * until C99 and C++11, but every compiler I'm aware of has supported * them for much longer. That said, usage is isolated to the test * suite and compilers known to support them. */ #if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") #endif #else #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ #endif /* emscripten requires us to use a __wasm_unimplemented_simd128__ macro * before we can access certain SIMD intrinsics, but this diagnostic * warns about it being a reserved name. It is a reserved name, but * it's reserved for the compiler and we are using it to convey * information to the compiler. * * This is also used when enabling native aliases since we don't get to * choose the macro names. */ #if HEDLEY_HAS_WARNING("-Wdouble-promotion") #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ #endif /* clang 3.8 warns about the packed attribute being unnecessary when * used in the _mm_loadu_* functions. That *may* be true for version * 3.8, but for later versions it is crucial in order to make unaligned * access safe. */ #if HEDLEY_HAS_WARNING("-Wpacked") #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ #endif /* Triggered when assigning a float to a double implicitly. We use * explicit casts in SIMDe, this is only used in the test suite. */ #if HEDLEY_HAS_WARNING("-Wdouble-promotion") #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ #endif /* Several compilers treat conformant array parameters as VLAs. We * test to make sure we're in C mode (C++ doesn't support CAPs), and * that the version of the standard supports CAPs. We also reject * some buggy compilers like MSVC (the logic is in Hedley if you want * to take a look), but with certain warnings enabled some compilers * still like to emit a diagnostic. */ #if HEDLEY_HAS_WARNING("-Wvla") #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") #elif HEDLEY_GCC_VERSION_CHECK(4,3,0) #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ #endif #if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ #endif #if HEDLEY_HAS_WARNING("-Wunused-function") #define SIMDE_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION_ _Pragma("clang diagnostic ignored \"-Wunused-function\"") #elif HEDLEY_GCC_VERSION_CHECK(3,4,0) #define SIMDE_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION_ _Pragma("GCC diagnostic ignored \"-Wunused-function\"") #elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ #define SIMDE_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION_ __pragma(warning(disable:4505)) #else #define SIMDE_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION_ #endif #if HEDLEY_HAS_WARNING("-Wpass-failed") #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ #endif #if HEDLEY_HAS_WARNING("-Wpadded") #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") #elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) #else #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ #endif #if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ #endif #if HEDLEY_HAS_WARNING("-Wold-style-cast") #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ #endif #if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ #endif /* clang will emit this warning when we use C99 extensions whan not in * C99 mode, even though it does support this. In such cases we check * the compiler and version first, so we know it's not a problem. */ #if HEDLEY_HAS_WARNING("-Wc99-extensions") #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ #endif /* https://github.com/simd-everywhere/simde/issues/277 */ #if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ #endif /* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS * to silence, but you have to do that before including anything and * that would require reordering includes. */ #if defined(_MSC_VER) #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) #else #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ #endif /* Some compilers, such as clang, may use `long long` for 64-bit * integers, but `long long` triggers a diagnostic with * -Wc++98-compat-pedantic which says 'long long' is incompatible with * C++98. */ #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") #if HEDLEY_HAS_WARNING("-Wc++11-long-long") #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") #endif #else #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ #endif /* Some problem as above */ #if HEDLEY_HAS_WARNING("-Wc++11-long-long") #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ #endif /* emscripten emits this whenever stdin/stdout/stderr is used in a * macro. */ #if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ #endif /* Clang uses C11 generic selections to implement some AltiVec * functions, which triggers this diagnostic when not compiling * in C11 mode */ #if HEDLEY_HAS_WARNING("-Wc11-extensions") #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ #endif /* Clang sometimes triggers this warning in macros in the AltiVec and * NEON headers, or due to missing functions. */ #if HEDLEY_HAS_WARNING("-Wvector-conversion") #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") /* For NEON, the situation with -Wvector-conversion in clang < 10 is * bad enough that we just disable the warning altogether. */ #if defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ #endif #else #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ #endif #if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ #endif /* Prior to 5.0, clang didn't support disabling diagnostics in * statement exprs. As a result, some macros we use don't * properly silence warnings. */ #if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") #elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") #elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ #endif /* SLEEF triggers this a *lot* in their headers */ #if HEDLEY_HAS_WARNING("-Wignored-qualifiers") #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") #elif HEDLEY_GCC_VERSION_CHECK(4,3,0) #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ #endif /* GCC emits this under some circumstances when using __int128 */ #if HEDLEY_GCC_VERSION_CHECK(4,8,0) #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ #endif /* MSVC doesn't like (__assume(0), code) and will warn about code being * unreachable, but we want it there because not all compilers * understand the unreachable macro and will complain if it is missing. * I'm planning on adding a new macro to Hedley to handle this a bit * more elegantly, but until then... */ #if defined(HEDLEY_MSVC_VERSION) #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) #else #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ #endif /* This is a false positive from GCC in a few places. */ #if HEDLEY_GCC_VERSION_CHECK(4,7,0) #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ #endif #if defined(SIMDE_ENABLE_NATIVE_ALIASES) #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ #else #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ #endif #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ SIMDE_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION_ \ SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ #endif /* !defined(SIMDE_DIAGNOSTIC_H) */ simde-0.7.2/simde/simde-features.h000066400000000000000000000435641400333146700170250ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ /* simde-arch.h is used to determine which features are available according to the compiler. However, we want to make it possible to forcibly enable or disable APIs */ #if !defined(SIMDE_FEATURES_H) #define SIMDE_FEATURES_H #include "simde-arch.h" #include "simde-diagnostic.h" #if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_SVML) #define SIMDE_X86_SVML_NATIVE #endif #endif #if defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) #define SIMDE_X86_AVX512F_NATIVE #endif #if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE #endif #endif #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) #define SIMDE_X86_AVX512F_NATIVE #endif #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_AVX512VBMI) #define SIMDE_X86_AVX512VBMI_NATIVE #endif #endif #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) #define SIMDE_X86_AVX512F_NATIVE #endif #if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_AVX512CD) #define SIMDE_X86_AVX512CD_NATIVE #endif #endif #if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) #define SIMDE_X86_AVX512F_NATIVE #endif #if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_AVX512DQ) #define SIMDE_X86_AVX512DQ_NATIVE #endif #endif #if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) #define SIMDE_X86_AVX512F_NATIVE #endif #if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_AVX512VL) #define SIMDE_X86_AVX512VL_NATIVE #endif #endif #if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) #define SIMDE_X86_AVX512F_NATIVE #endif #if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_AVX512BW) #define SIMDE_X86_AVX512BW_NATIVE #endif #endif #if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) #define SIMDE_X86_AVX512F_NATIVE #endif #if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_AVX512F) #define SIMDE_X86_AVX512F_NATIVE #endif #endif #if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) #define SIMDE_X86_AVX2_NATIVE #endif #if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_FMA) #define SIMDE_X86_FMA_NATIVE #endif #endif #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) #define SIMDE_X86_AVX_NATIVE #endif #if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_AVX2) #define SIMDE_X86_AVX2_NATIVE #endif #endif #if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) #define SIMDE_X86_AVX_NATIVE #endif #if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_AVX) #define SIMDE_X86_AVX_NATIVE #endif #endif #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) #define SIMDE_X86_SSE4_2_NATIVE #endif #if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_XOP) #define SIMDE_X86_XOP_NATIVE #endif #endif #if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) #define SIMDE_X86_SSE4_2_NATIVE #endif #if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_SSE4_2) #define SIMDE_X86_SSE4_2_NATIVE #endif #endif #if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) #define SIMDE_X86_SSE4_1_NATIVE #endif #if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_SSE4_1) #define SIMDE_X86_SSE4_1_NATIVE #endif #endif #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) #define SIMDE_X86_SSSE3_NATIVE #endif #if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_SSSE3) #define SIMDE_X86_SSSE3_NATIVE #endif #endif #if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) #define SIMDE_X86_SSE3_NATIVE #endif #if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_SSE3) #define SIMDE_X86_SSE3_NATIVE #endif #endif #if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) #define SIMDE_X86_SSE2_NATIVE #endif #if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_SSE2) #define SIMDE_X86_SSE2_NATIVE #endif #endif #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) #define SIMDE_X86_SSE_NATIVE #endif #if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_SSE) #define SIMDE_X86_SSE_NATIVE #endif #endif #if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_MMX) #define SIMDE_X86_MMX_NATIVE #endif #endif #if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_GFNI) #define SIMDE_X86_GFNI_NATIVE #endif #endif #if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_PCLMUL) #define SIMDE_X86_PCLMUL_NATIVE #endif #endif #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) #define SIMDE_X86_VPCLMULQDQ_NATIVE #endif #endif #if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(__INTEL_COMPILER) #define SIMDE_X86_SVML_NATIVE #endif #endif #if defined(HEDLEY_MSVC_VERSION) #pragma warning(push) #pragma warning(disable:4799) #endif #if \ defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) #include #elif defined(SIMDE_X86_SSE4_2_NATIVE) #include #elif defined(SIMDE_X86_SSE4_1_NATIVE) #include #elif defined(SIMDE_X86_SSSE3_NATIVE) #include #elif defined(SIMDE_X86_SSE3_NATIVE) #include #elif defined(SIMDE_X86_SSE2_NATIVE) #include #elif defined(SIMDE_X86_SSE_NATIVE) #include #elif defined(SIMDE_X86_MMX_NATIVE) #include #endif #if defined(SIMDE_X86_XOP_NATIVE) #if defined(_MSC_VER) #include #else #include #endif #endif #if defined(HEDLEY_MSVC_VERSION) #pragma warning(pop) #endif #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(80) #define SIMDE_ARM_NEON_A64V8_NATIVE #endif #endif #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) #define SIMDE_ARM_NEON_A32V8_NATIVE #endif #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(80) && (__ARM_NEON_FP & 0x02) #define SIMDE_ARM_NEON_A32V8_NATIVE #endif #endif #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define SIMDE_ARM_NEON_A32V7_NATIVE #endif #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(70) #define SIMDE_ARM_NEON_A32V7_NATIVE #endif #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #include #endif #if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_ARM_SVE) #define SIMDE_ARM_SVE_NATIVE #include #endif #endif #if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_WASM_SIMD128) #define SIMDE_WASM_SIMD128_NATIVE #endif #endif #if defined(SIMDE_WASM_SIMD128_NATIVE) #if !defined(__wasm_unimplemented_simd128__) HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ #define __wasm_unimplemented_simd128__ HEDLEY_DIAGNOSTIC_POP #endif #include #endif #if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) #define SIMDE_POWER_ALTIVEC_P9_NATIVE #endif #endif #if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) #define SIMDE_POWER_ALTIVEC_P8_NATIVE #endif #if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) #define SIMDE_POWER_ALTIVEC_P8_NATIVE #endif #endif #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) #define SIMDE_POWER_ALTIVEC_P7_NATIVE #endif #if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) #define SIMDE_POWER_ALTIVEC_P7_NATIVE #endif #endif #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) #define SIMDE_POWER_ALTIVEC_P6_NATIVE #endif #if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) #define SIMDE_POWER_ALTIVEC_P6_NATIVE #endif #endif #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) #define SIMDE_POWER_ALTIVEC_P5_NATIVE #endif #if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) #define SIMDE_POWER_ALTIVEC_P5_NATIVE #endif #endif #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) /* AltiVec conflicts with lots of stuff. The bool keyword conflicts * with the bool keyword in C++ and the bool macro in C99+ (defined * in stdbool.h). The vector keyword conflicts with std::vector in * C++ if you are `using std;`. * * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` * instead, but altivec.h will unconditionally define * `vector`/`bool`/`pixel` so we need to work around that. * * Unfortunately this means that if your code uses AltiVec directly * it may break. If this is the case you'll want to define * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even * better, port your code to use the double-underscore versions. */ #if defined(bool) #undef bool #endif #include #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) #if defined(vector) #undef vector #endif #if defined(pixel) #undef pixel #endif #if defined(bool) #undef bool #endif #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ /* Use these intsead of vector/pixel/bool in SIMDe. */ #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T #define SIMDE_POWER_ALTIVEC_PIXEL __pixel #define SIMDE_POWER_ALTIVEC_BOOL __bool /* Re-define bool if we're using stdbool.h */ #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) #define bool _Bool #endif #endif #if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 #endif #endif #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) #include #endif /* This is used to determine whether or not to fall back on a vector * function in an earlier ISA extensions, as well as whether * we expected any attempts at vectorization to be fruitful or if we * expect to always be running serial code. */ #if !defined(SIMDE_NATURAL_VECTOR_SIZE) #if defined(SIMDE_X86_AVX512F_NATIVE) #define SIMDE_NATURAL_VECTOR_SIZE (512) #elif defined(SIMDE_X86_AVX_NATIVE) #define SIMDE_NATURAL_VECTOR_SIZE (256) #elif \ defined(SIMDE_X86_SSE_NATIVE) || \ defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ defined(SIMDE_WASM_SIMD128_NATIVE) || \ defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) #define SIMDE_NATURAL_VECTOR_SIZE (128) #endif #if !defined(SIMDE_NATURAL_VECTOR_SIZE) #define SIMDE_NATURAL_VECTOR_SIZE (0) #endif #endif #define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) #define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) /* Native aliases */ #if defined(SIMDE_ENABLE_NATIVE_ALIASES) #if !defined(SIMDE_X86_MMX_NATIVE) #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_SSE_NATIVE) #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_SSE2_NATIVE) #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_SSE3_NATIVE) #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_SSSE3_NATIVE) #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_SSE4_1_NATIVE) #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_SSE4_2_NATIVE) #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_AVX_NATIVE) #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_AVX2_NATIVE) #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_FMA_NATIVE) #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_AVX512F_NATIVE) #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_AVX512VL_NATIVE) #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_AVX512BW_NATIVE) #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_AVX512DQ_NATIVE) #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_AVX512CD_NATIVE) #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_GFNI_NATIVE) #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_PCLMUL_NATIVE) #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES #endif #endif /* Are floating point values stored using IEEE 754? Knowing * this at during preprocessing is a bit tricky, mostly because what * we're curious about is how values are stored and not whether the * implementation is fully conformant in terms of rounding, NaN * handling, etc. * * For example, if you use -ffast-math or -Ofast on * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 * support is not advertised (by defining __STDC_IEC_559__). * * However, what we care about is whether it is safe to assume that * floating point values are stored in IEEE 754 format, in which case * we can provide faster implementations of some functions. * * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- * so we just assume IEEE 754 for now. There is a test which verifies * this, if that test fails sowewhere please let us know and we'll add * an exception for that platform. Meanwhile, you can define * SIMDE_NO_IEEE754_STORAGE. */ #if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) #define SIMDE_IEEE754_STORAGE #endif #endif /* !defined(SIMDE_FEATURES_H) */ simde-0.7.2/simde/simde-math.h000066400000000000000000001520041400333146700161260ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2017-2020 Evan Nemerson */ /* Attempt to find math functions. Functions may be in , * , compiler built-ins/intrinsics, or platform/architecture * specific headers. In some cases, especially those not built in to * libm, we may need to define our own implementations. */ #if !defined(SIMDE_MATH_H) #define SIMDE_MATH_H 1 #include "hedley.h" #include "simde-features.h" #include #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #include #endif HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS /* SLEEF support * https://sleef.org/ * * If you include prior to including SIMDe, SIMDe will use * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to * including SIMDe to force the issue. * * Note that SLEEF does requires linking to libsleef. * * By default, SIMDe will use the 1 ULP functions, but if you use * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is * only the case for the simde_math_* functions; for code in other * SIMDe headers which calls SLEEF directly we may use functions with * greater error if the API we're implementing is less precise (for * example, SVML guarantees 4 ULP, so we will generally use the 3.5 * ULP functions from SLEEF). */ #if !defined(SIMDE_MATH_SLEEF_DISABLE) #if defined(__SLEEF_H__) #define SIMDE_MATH_SLEEF_ENABLE #endif #endif #if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ #include HEDLEY_DIAGNOSTIC_POP #endif #if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) #if defined(SLEEF_VERSION_MAJOR) #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #endif #else #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) #endif #if defined(__has_builtin) #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) #elif \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_GCC_VERSION_CHECK(4,4,0) #define SIMDE_MATH_BUILTIN_LIBM(func) (1) #else #define SIMDE_MATH_BUILTIN_LIBM(func) (0) #endif #if defined(HUGE_VAL) /* Looks like or has already been included. */ /* The math.h from libc++ (yes, the C header from the C++ standard * library) will define an isnan function, but not an isnan macro * like the C standard requires. So we detect the header guards * macro libc++ uses. */ #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) #define SIMDE_MATH_HAVE_MATH_H #elif defined(__cplusplus) #define SIMDE_MATH_HAVE_CMATH #endif #elif defined(__has_include) #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() #define SIMDE_MATH_HAVE_CMATH #include #elif __has_include() #define SIMDE_MATH_HAVE_MATH_H #include #elif !defined(SIMDE_MATH_NO_LIBM) #define SIMDE_MATH_NO_LIBM #endif #elif !defined(SIMDE_MATH_NO_LIBM) #if defined(__cplusplus) && (__cplusplus >= 201103L) #define SIMDE_MATH_HAVE_CMATH HEDLEY_DIAGNOSTIC_PUSH #if defined(HEDLEY_MSVC_VERSION) /* VS 14 emits this diagnostic about noexcept being used on a * function, which we can't do anything about. */ #pragma warning(disable:4996) #endif #include HEDLEY_DIAGNOSTIC_POP #else #define SIMDE_MATH_HAVE_MATH_H #include #endif #endif #if !defined(SIMDE_MATH_INFINITY) #if \ HEDLEY_HAS_BUILTIN(__builtin_inf) || \ HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_CRAY_VERSION_CHECK(8,1,0) #define SIMDE_MATH_INFINITY (__builtin_inf()) #elif defined(INFINITY) #define SIMDE_MATH_INFINITY INFINITY #endif #endif #if !defined(SIMDE_INFINITYF) #if \ HEDLEY_HAS_BUILTIN(__builtin_inff) || \ HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ HEDLEY_IBM_VERSION_CHECK(13,1,0) #define SIMDE_MATH_INFINITYF (__builtin_inff()) #elif defined(INFINITYF) #define SIMDE_MATH_INFINITYF INFINITYF #elif defined(SIMDE_MATH_INFINITY) #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) #endif #endif #if !defined(SIMDE_MATH_NAN) #if \ HEDLEY_HAS_BUILTIN(__builtin_nan) || \ HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ HEDLEY_IBM_VERSION_CHECK(13,1,0) #define SIMDE_MATH_NAN (__builtin_nan("")) #elif defined(NAN) #define SIMDE_MATH_NAN NAN #endif #endif #if !defined(SIMDE_NANF) #if \ HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_CRAY_VERSION_CHECK(8,1,0) #define SIMDE_MATH_NANF (__builtin_nanf("")) #elif defined(NANF) #define SIMDE_MATH_NANF NANF #elif defined(SIMDE_MATH_NAN) #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) #endif #endif #if !defined(SIMDE_MATH_PI) #if defined(M_PI) #define SIMDE_MATH_PI M_PI #else #define SIMDE_MATH_PI 3.14159265358979323846 #endif #endif #if !defined(SIMDE_MATH_PIF) #if defined(M_PI) #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) #else #define SIMDE_MATH_PIF 3.14159265358979323846f #endif #endif #if !defined(SIMDE_MATH_PI_OVER_180) #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 #endif #if !defined(SIMDE_MATH_PI_OVER_180F) #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f #endif #if !defined(SIMDE_MATH_180_OVER_PI) #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 #endif #if !defined(SIMDE_MATH_180_OVER_PIF) #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f #endif #if !defined(SIMDE_MATH_FLT_MIN) #if defined(FLT_MIN) #define SIMDE_MATH_FLT_MIN FLT_MIN #elif defined(__FLT_MIN__) #define SIMDE_MATH_FLT_MIN __FLT_MIN__ #elif defined(__cplusplus) #include #define SIMDE_MATH_FLT_MIN FLT_MIN #else #include #define SIMDE_MATH_FLT_MIN FLT_MIN #endif #endif #if !defined(SIMDE_MATH_DBL_MIN) #if defined(DBL_MIN) #define SIMDE_MATH_DBL_MIN DBL_MIN #elif defined(__DBL_MIN__) #define SIMDE_MATH_DBL_MIN __DBL_MIN__ #elif defined(__cplusplus) #include #define SIMDE_MATH_DBL_MIN DBL_MIN #else #include #define SIMDE_MATH_DBL_MIN DBL_MIN #endif #endif /*** Classification macros from C99 ***/ #if !defined(simde_math_isinf) #if SIMDE_MATH_BUILTIN_LIBM(isinf) #define simde_math_isinf(v) __builtin_isinf(v) #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_isinf(v) isinf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_isinf(v) std::isinf(v) #endif #endif #if !defined(simde_math_isinff) #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) #define simde_math_isinff(v) __builtin_isinff(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_isinff(v) std::isinf(v) #elif defined(simde_math_isinf) #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) #endif #endif #if !defined(simde_math_isnan) #if SIMDE_MATH_BUILTIN_LIBM(isnan) #define simde_math_isnan(v) __builtin_isnan(v) #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_isnan(v) isnan(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_isnan(v) std::isnan(v) #endif #endif #if !defined(simde_math_isnanf) #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ #define simde_math_isnanf(v) __builtin_isnanf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_isnanf(v) std::isnan(v) #elif defined(simde_math_isnan) #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) #endif #endif #if !defined(simde_math_isnormal) #if SIMDE_MATH_BUILTIN_LIBM(isnormal) #define simde_math_isnormal(v) __builtin_isnormal(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_isnormal(v) isnormal(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_isnormal(v) std::isnormal(v) #endif #endif #if !defined(simde_math_isnormalf) #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) #define simde_math_isnormalf(v) __builtin_isnormalf(v) #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) #define simde_math_isnormalf(v) __builtin_isnormal(v) #elif defined(isnormalf) #define simde_math_isnormalf(v) isnormalf(v) #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_isnormalf(v) isnormal(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_isnormalf(v) std::isnormal(v) #elif defined(simde_math_isnormal) #define simde_math_isnormalf(v) simde_math_isnormal(v) #endif #endif /*** Manipulation functions ***/ #if !defined(simde_math_nextafter) #if \ (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_nextafter(x, y) std::nextafter(x, y) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_nextafter(x, y) nextafter(x, y) #endif #endif #if !defined(simde_math_nextafterf) #if \ (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_nextafterf(x, y) std::nextafter(x, y) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_nextafterf(x, y) nextafterf(x, y) #endif #endif /*** Functions from C99 ***/ #if !defined(simde_math_abs) #if SIMDE_MATH_BUILTIN_LIBM(abs) #define simde_math_abs(v) __builtin_abs(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_abs(v) std::abs(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_abs(v) abs(v) #endif #endif #if !defined(simde_math_fabsf) #if SIMDE_MATH_BUILTIN_LIBM(fabsf) #define simde_math_fabsf(v) __builtin_fabsf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_fabsf(v) std::abs(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_fabsf(v) fabsf(v) #endif #endif #if !defined(simde_math_acos) #if SIMDE_MATH_BUILTIN_LIBM(acos) #define simde_math_acos(v) __builtin_acos(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_acos(v) std::acos(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_acos(v) acos(v) #endif #endif #if !defined(simde_math_acosf) #if SIMDE_MATH_BUILTIN_LIBM(acosf) #define simde_math_acosf(v) __builtin_acosf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_acosf(v) std::acos(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_acosf(v) acosf(v) #endif #endif #if !defined(simde_math_acosh) #if SIMDE_MATH_BUILTIN_LIBM(acosh) #define simde_math_acosh(v) __builtin_acosh(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_acosh(v) std::acosh(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_acosh(v) acosh(v) #endif #endif #if !defined(simde_math_acoshf) #if SIMDE_MATH_BUILTIN_LIBM(acoshf) #define simde_math_acoshf(v) __builtin_acoshf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_acoshf(v) std::acosh(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_acoshf(v) acoshf(v) #endif #endif #if !defined(simde_math_asin) #if SIMDE_MATH_BUILTIN_LIBM(asin) #define simde_math_asin(v) __builtin_asin(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_asin(v) std::asin(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_asin(v) asin(v) #endif #endif #if !defined(simde_math_asinf) #if SIMDE_MATH_BUILTIN_LIBM(asinf) #define simde_math_asinf(v) __builtin_asinf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_asinf(v) std::asin(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_asinf(v) asinf(v) #endif #endif #if !defined(simde_math_asinh) #if SIMDE_MATH_BUILTIN_LIBM(asinh) #define simde_math_asinh(v) __builtin_asinh(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_asinh(v) std::asinh(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_asinh(v) asinh(v) #endif #endif #if !defined(simde_math_asinhf) #if SIMDE_MATH_BUILTIN_LIBM(asinhf) #define simde_math_asinhf(v) __builtin_asinhf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_asinhf(v) std::asinh(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_asinhf(v) asinhf(v) #endif #endif #if !defined(simde_math_atan) #if SIMDE_MATH_BUILTIN_LIBM(atan) #define simde_math_atan(v) __builtin_atan(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_atan(v) std::atan(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_atan(v) atan(v) #endif #endif #if !defined(simde_math_atan2) #if SIMDE_MATH_BUILTIN_LIBM(atan2) #define simde_math_atan2(y, x) __builtin_atan2(y, x) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_atan2(y, x) std::atan2(y, x) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_atan2(y, x) atan2(y, x) #endif #endif #if !defined(simde_math_atan2f) #if SIMDE_MATH_BUILTIN_LIBM(atan2f) #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_atan2f(y, x) std::atan2(y, x) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_atan2f(y, x) atan2f(y, x) #endif #endif #if !defined(simde_math_atanf) #if SIMDE_MATH_BUILTIN_LIBM(atanf) #define simde_math_atanf(v) __builtin_atanf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_atanf(v) std::atan(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_atanf(v) atanf(v) #endif #endif #if !defined(simde_math_atanh) #if SIMDE_MATH_BUILTIN_LIBM(atanh) #define simde_math_atanh(v) __builtin_atanh(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_atanh(v) std::atanh(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_atanh(v) atanh(v) #endif #endif #if !defined(simde_math_atanhf) #if SIMDE_MATH_BUILTIN_LIBM(atanhf) #define simde_math_atanhf(v) __builtin_atanhf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_atanhf(v) std::atanh(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_atanhf(v) atanhf(v) #endif #endif #if !defined(simde_math_cbrt) #if SIMDE_MATH_BUILTIN_LIBM(cbrt) #define simde_math_cbrt(v) __builtin_cbrt(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_cbrt(v) std::cbrt(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_cbrt(v) cbrt(v) #endif #endif #if !defined(simde_math_cbrtf) #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) #define simde_math_cbrtf(v) __builtin_cbrtf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_cbrtf(v) std::cbrt(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_cbrtf(v) cbrtf(v) #endif #endif #if !defined(simde_math_ceil) #if SIMDE_MATH_BUILTIN_LIBM(ceil) #define simde_math_ceil(v) __builtin_ceil(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_ceil(v) std::ceil(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_ceil(v) ceil(v) #endif #endif #if !defined(simde_math_ceilf) #if SIMDE_MATH_BUILTIN_LIBM(ceilf) #define simde_math_ceilf(v) __builtin_ceilf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_ceilf(v) std::ceil(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_ceilf(v) ceilf(v) #endif #endif #if !defined(simde_math_copysign) #if SIMDE_MATH_BUILTIN_LIBM(copysign) #define simde_math_copysign(x, y) __builtin_copysign(x, y) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_copysign(x, y) std::copysign(x, y) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_copysign(x, y) copysign(x, y) #endif #endif #if !defined(simde_math_copysignf) #if SIMDE_MATH_BUILTIN_LIBM(copysignf) #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_copysignf(x, y) std::copysignf(x, y) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_copysignf(x, y) copysignf(x, y) #endif #endif #if !defined(simde_math_cos) #if SIMDE_MATH_BUILTIN_LIBM(cos) #define simde_math_cos(v) __builtin_cos(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_cos(v) std::cos(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_cos(v) cos(v) #endif #endif #if !defined(simde_math_cosf) #if defined(SIMDE_MATH_SLEEF_ENABLE) #if SIMDE_ACCURACY_PREFERENCE < 1 #define simde_math_cosf(v) Sleef_cosf_u35(v) #else #define simde_math_cosf(v) Sleef_cosf_u10(v) #endif #elif SIMDE_MATH_BUILTIN_LIBM(cosf) #define simde_math_cosf(v) __builtin_cosf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_cosf(v) std::cos(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_cosf(v) cosf(v) #endif #endif #if !defined(simde_math_cosh) #if SIMDE_MATH_BUILTIN_LIBM(cosh) #define simde_math_cosh(v) __builtin_cosh(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_cosh(v) std::cosh(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_cosh(v) cosh(v) #endif #endif #if !defined(simde_math_coshf) #if SIMDE_MATH_BUILTIN_LIBM(coshf) #define simde_math_coshf(v) __builtin_coshf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_coshf(v) std::cosh(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_coshf(v) coshf(v) #endif #endif #if !defined(simde_math_erf) #if SIMDE_MATH_BUILTIN_LIBM(erf) #define simde_math_erf(v) __builtin_erf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_erf(v) std::erf(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_erf(v) erf(v) #endif #endif #if !defined(simde_math_erff) #if SIMDE_MATH_BUILTIN_LIBM(erff) #define simde_math_erff(v) __builtin_erff(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_erff(v) std::erf(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_erff(v) erff(v) #endif #endif #if !defined(simde_math_erfc) #if SIMDE_MATH_BUILTIN_LIBM(erfc) #define simde_math_erfc(v) __builtin_erfc(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_erfc(v) std::erfc(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_erfc(v) erfc(v) #endif #endif #if !defined(simde_math_erfcf) #if SIMDE_MATH_BUILTIN_LIBM(erfcf) #define simde_math_erfcf(v) __builtin_erfcf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_erfcf(v) std::erfc(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_erfcf(v) erfcf(v) #endif #endif #if !defined(simde_math_exp) #if SIMDE_MATH_BUILTIN_LIBM(exp) #define simde_math_exp(v) __builtin_exp(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_exp(v) std::exp(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_exp(v) exp(v) #endif #endif #if !defined(simde_math_expf) #if SIMDE_MATH_BUILTIN_LIBM(expf) #define simde_math_expf(v) __builtin_expf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_expf(v) std::exp(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_expf(v) expf(v) #endif #endif #if !defined(simde_math_expm1) #if SIMDE_MATH_BUILTIN_LIBM(expm1) #define simde_math_expm1(v) __builtin_expm1(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_expm1(v) std::expm1(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_expm1(v) expm1(v) #endif #endif #if !defined(simde_math_expm1f) #if SIMDE_MATH_BUILTIN_LIBM(expm1f) #define simde_math_expm1f(v) __builtin_expm1f(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_expm1f(v) std::expm1(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_expm1f(v) expm1f(v) #endif #endif #if !defined(simde_math_exp2) #if SIMDE_MATH_BUILTIN_LIBM(exp2) #define simde_math_exp2(v) __builtin_exp2(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_exp2(v) std::exp2(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_exp2(v) exp2(v) #endif #endif #if !defined(simde_math_exp2f) #if SIMDE_MATH_BUILTIN_LIBM(exp2f) #define simde_math_exp2f(v) __builtin_exp2f(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_exp2f(v) std::exp2(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_exp2f(v) exp2f(v) #endif #endif #if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) # define simde_math_exp10(v) __builtin_exp10(v) #else # define simde_math_exp10(v) pow(10.0, (v)) #endif #if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) # define simde_math_exp10f(v) __builtin_exp10f(v) #else # define simde_math_exp10f(v) powf(10.0f, (v)) #endif #if !defined(simde_math_fabs) #if SIMDE_MATH_BUILTIN_LIBM(fabs) #define simde_math_fabs(v) __builtin_fabs(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_fabs(v) std::fabs(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_fabs(v) fabs(v) #endif #endif #if !defined(simde_math_fabsf) #if SIMDE_MATH_BUILTIN_LIBM(fabsf) #define simde_math_fabsf(v) __builtin_fabsf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_fabsf(v) std::fabs(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_fabsf(v) fabsf(v) #endif #endif #if !defined(simde_math_floor) #if SIMDE_MATH_BUILTIN_LIBM(floor) #define simde_math_floor(v) __builtin_floor(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_floor(v) std::floor(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_floor(v) floor(v) #endif #endif #if !defined(simde_math_floorf) #if SIMDE_MATH_BUILTIN_LIBM(floorf) #define simde_math_floorf(v) __builtin_floorf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_floorf(v) std::floor(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_floorf(v) floorf(v) #endif #endif #if !defined(simde_math_fma) #if SIMDE_MATH_BUILTIN_LIBM(fma) #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_fma(x, y, z) std::fma(x, y, z) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_fma(x, y, z) fma(x, y, z) #endif #endif #if !defined(simde_math_fmaf) #if SIMDE_MATH_BUILTIN_LIBM(fmaf) #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_fmaf(x, y, z) std::fma(x, y, z) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_fmaf(x, y, z) fmaf(x, y, z) #endif #endif #if !defined(simde_math_fmax) #if SIMDE_MATH_BUILTIN_LIBM(fmax) #define simde_math_fmax(x, y, z) __builtin_fmax(x, y, z) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_fmax(x, y, z) std::fmax(x, y, z) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_fmax(x, y, z) fmax(x, y, z) #endif #endif #if !defined(simde_math_fmaxf) #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) #define simde_math_fmaxf(x, y, z) __builtin_fmaxf(x, y, z) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_fmaxf(x, y, z) std::fmax(x, y, z) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_fmaxf(x, y, z) fmaxf(x, y, z) #endif #endif #if !defined(simde_math_hypot) #if SIMDE_MATH_BUILTIN_LIBM(hypot) #define simde_math_hypot(y, x) __builtin_hypot(y, x) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_hypot(y, x) std::hypot(y, x) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_hypot(y, x) hypot(y, x) #endif #endif #if !defined(simde_math_hypotf) #if SIMDE_MATH_BUILTIN_LIBM(hypotf) #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_hypotf(y, x) std::hypot(y, x) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_hypotf(y, x) hypotf(y, x) #endif #endif #if !defined(simde_math_log) #if SIMDE_MATH_BUILTIN_LIBM(log) #define simde_math_log(v) __builtin_log(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_log(v) std::log(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_log(v) log(v) #endif #endif #if !defined(simde_math_logf) #if SIMDE_MATH_BUILTIN_LIBM(logf) #define simde_math_logf(v) __builtin_logf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_logf(v) std::log(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_logf(v) logf(v) #endif #endif #if !defined(simde_math_logb) #if SIMDE_MATH_BUILTIN_LIBM(logb) #define simde_math_logb(v) __builtin_logb(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_logb(v) std::logb(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_logb(v) logb(v) #endif #endif #if !defined(simde_math_logbf) #if SIMDE_MATH_BUILTIN_LIBM(logbf) #define simde_math_logbf(v) __builtin_logbf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_logbf(v) std::logb(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_logbf(v) logbf(v) #endif #endif #if !defined(simde_math_log1p) #if SIMDE_MATH_BUILTIN_LIBM(log1p) #define simde_math_log1p(v) __builtin_log1p(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_log1p(v) std::log1p(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_log1p(v) log1p(v) #endif #endif #if !defined(simde_math_log1pf) #if SIMDE_MATH_BUILTIN_LIBM(log1pf) #define simde_math_log1pf(v) __builtin_log1pf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_log1pf(v) std::log1p(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_log1pf(v) log1pf(v) #endif #endif #if !defined(simde_math_log2) #if SIMDE_MATH_BUILTIN_LIBM(log2) #define simde_math_log2(v) __builtin_log2(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_log2(v) std::log2(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_log2(v) log2(v) #endif #endif #if !defined(simde_math_log2f) #if SIMDE_MATH_BUILTIN_LIBM(log2f) #define simde_math_log2f(v) __builtin_log2f(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_log2f(v) std::log2(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_log2f(v) log2f(v) #endif #endif #if !defined(simde_math_log10) #if SIMDE_MATH_BUILTIN_LIBM(log10) #define simde_math_log10(v) __builtin_log10(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_log10(v) std::log10(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_log10(v) log10(v) #endif #endif #if !defined(simde_math_log10f) #if SIMDE_MATH_BUILTIN_LIBM(log10f) #define simde_math_log10f(v) __builtin_log10f(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_log10f(v) std::log10(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_log10f(v) log10f(v) #endif #endif #if !defined(simde_math_modf) #if SIMDE_MATH_BUILTIN_LIBM(modf) #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_modf(x, iptr) std::modf(x, iptr) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_modf(x, iptr) modf(x, iptr) #endif #endif #if !defined(simde_math_modff) #if SIMDE_MATH_BUILTIN_LIBM(modff) #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_modff(x, iptr) std::modf(x, iptr) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_modff(x, iptr) modff(x, iptr) #endif #endif #if !defined(simde_math_nearbyint) #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) #define simde_math_nearbyint(v) __builtin_nearbyint(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_nearbyint(v) std::nearbyint(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_nearbyint(v) nearbyint(v) #endif #endif #if !defined(simde_math_nearbyintf) #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_nearbyintf(v) std::nearbyint(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_nearbyintf(v) nearbyintf(v) #endif #endif #if !defined(simde_math_pow) #if SIMDE_MATH_BUILTIN_LIBM(pow) #define simde_math_pow(y, x) __builtin_pow(y, x) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_pow(y, x) std::pow(y, x) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_pow(y, x) pow(y, x) #endif #endif #if !defined(simde_math_powf) #if SIMDE_MATH_BUILTIN_LIBM(powf) #define simde_math_powf(y, x) __builtin_powf(y, x) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_powf(y, x) std::pow(y, x) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_powf(y, x) powf(y, x) #endif #endif #if !defined(simde_math_rint) #if SIMDE_MATH_BUILTIN_LIBM(rint) #define simde_math_rint(v) __builtin_rint(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_rint(v) std::rint(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_rint(v) rint(v) #endif #endif #if !defined(simde_math_rintf) #if SIMDE_MATH_BUILTIN_LIBM(rintf) #define simde_math_rintf(v) __builtin_rintf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_rintf(v) std::rint(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_rintf(v) rintf(v) #endif #endif #if !defined(simde_math_round) #if SIMDE_MATH_BUILTIN_LIBM(round) #define simde_math_round(v) __builtin_round(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_round(v) std::round(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_round(v) round(v) #endif #endif #if !defined(simde_math_roundf) #if SIMDE_MATH_BUILTIN_LIBM(roundf) #define simde_math_roundf(v) __builtin_roundf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_roundf(v) std::round(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_roundf(v) roundf(v) #endif #endif #if !defined(simde_math_roundeven) #if \ HEDLEY_HAS_BUILTIN(__builtin_roundeven) || \ HEDLEY_GCC_VERSION_CHECK(10,0,0) #define simde_math_roundeven(v) __builtin_roundeven(v) #elif defined(simde_math_round) && defined(simde_math_fabs) static HEDLEY_INLINE double simde_math_roundeven(double v) { double rounded = simde_math_round(v); double diff = rounded - v; if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { rounded = v - diff; } return rounded; } #define simde_math_roundeven simde_math_roundeven #endif #endif #if !defined(simde_math_roundevenf) #if \ HEDLEY_HAS_BUILTIN(__builtin_roundevenf) || \ HEDLEY_GCC_VERSION_CHECK(10,0,0) #define simde_math_roundevenf(v) __builtin_roundevenf(v) #elif defined(simde_math_roundf) && defined(simde_math_fabsf) static HEDLEY_INLINE float simde_math_roundevenf(float v) { float rounded = simde_math_roundf(v); float diff = rounded - v; if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { rounded = v - diff; } return rounded; } #define simde_math_roundevenf simde_math_roundevenf #endif #endif #if !defined(simde_math_sin) #if SIMDE_MATH_BUILTIN_LIBM(sin) #define simde_math_sin(v) __builtin_sin(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_sin(v) std::sin(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_sin(v) sin(v) #endif #endif #if !defined(simde_math_sinf) #if SIMDE_MATH_BUILTIN_LIBM(sinf) #define simde_math_sinf(v) __builtin_sinf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_sinf(v) std::sin(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_sinf(v) sinf(v) #endif #endif #if !defined(simde_math_sinh) #if SIMDE_MATH_BUILTIN_LIBM(sinh) #define simde_math_sinh(v) __builtin_sinh(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_sinh(v) std::sinh(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_sinh(v) sinh(v) #endif #endif #if !defined(simde_math_sinhf) #if SIMDE_MATH_BUILTIN_LIBM(sinhf) #define simde_math_sinhf(v) __builtin_sinhf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_sinhf(v) std::sinh(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_sinhf(v) sinhf(v) #endif #endif #if !defined(simde_math_sqrt) #if SIMDE_MATH_BUILTIN_LIBM(sqrt) #define simde_math_sqrt(v) __builtin_sqrt(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_sqrt(v) std::sqrt(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_sqrt(v) sqrt(v) #endif #endif #if !defined(simde_math_sqrtf) #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) #define simde_math_sqrtf(v) __builtin_sqrtf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_sqrtf(v) std::sqrt(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_sqrtf(v) sqrtf(v) #endif #endif #if !defined(simde_math_tan) #if SIMDE_MATH_BUILTIN_LIBM(tan) #define simde_math_tan(v) __builtin_tan(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_tan(v) std::tan(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_tan(v) tan(v) #endif #endif #if !defined(simde_math_tanf) #if SIMDE_MATH_BUILTIN_LIBM(tanf) #define simde_math_tanf(v) __builtin_tanf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_tanf(v) std::tan(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_tanf(v) tanf(v) #endif #endif #if !defined(simde_math_tanh) #if SIMDE_MATH_BUILTIN_LIBM(tanh) #define simde_math_tanh(v) __builtin_tanh(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_tanh(v) std::tanh(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_tanh(v) tanh(v) #endif #endif #if !defined(simde_math_tanhf) #if SIMDE_MATH_BUILTIN_LIBM(tanhf) #define simde_math_tanhf(v) __builtin_tanhf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_tanhf(v) std::tanh(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_tanhf(v) tanhf(v) #endif #endif #if !defined(simde_math_trunc) #if SIMDE_MATH_BUILTIN_LIBM(trunc) #define simde_math_trunc(v) __builtin_trunc(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_trunc(v) std::trunc(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_trunc(v) trunc(v) #endif #endif #if !defined(simde_math_truncf) #if SIMDE_MATH_BUILTIN_LIBM(truncf) #define simde_math_truncf(v) __builtin_truncf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_truncf(v) std::trunc(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_truncf(v) truncf(v) #endif #endif /*** Additional functions not in libm ***/ #if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) static HEDLEY_INLINE double simde_math_cdfnorm(double x) { /* https://www.johndcook.com/blog/cpp_phi/ * Public Domain */ static const double a1 = 0.254829592; static const double a2 = -0.284496736; static const double a3 = 1.421413741; static const double a4 = -1.453152027; static const double a5 = 1.061405429; static const double p = 0.3275911; const int sign = x < 0; x = simde_math_fabs(x) / simde_math_sqrt(2.0); /* A&S formula 7.1.26 */ double t = 1.0 / (1.0 + p * x); double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); return 0.5 * (1.0 + (sign ? -y : y)); } #define simde_math_cdfnorm simde_math_cdfnorm #endif #if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) static HEDLEY_INLINE float simde_math_cdfnormf(float x) { /* https://www.johndcook.com/blog/cpp_phi/ * Public Domain */ static const float a1 = 0.254829592f; static const float a2 = -0.284496736f; static const float a3 = 1.421413741f; static const float a4 = -1.453152027f; static const float a5 = 1.061405429f; static const float p = 0.3275911f; const int sign = x < 0; x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); /* A&S formula 7.1.26 */ float t = 1.0f / (1.0f + p * x); float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); return 0.5f * (1.0f + (sign ? -y : y)); } #define simde_math_cdfnormf simde_math_cdfnormf #endif HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ #if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ static HEDLEY_INLINE double simde_math_cdfnorminv(double p) { static const double a[] = { -3.969683028665376e+01, 2.209460984245205e+02, -2.759285104469687e+02, 1.383577518672690e+02, -3.066479806614716e+01, 2.506628277459239e+00 }; static const double b[] = { -5.447609879822406e+01, 1.615858368580409e+02, -1.556989798598866e+02, 6.680131188771972e+01, -1.328068155288572e+01 }; static const double c[] = { -7.784894002430293e-03, -3.223964580411365e-01, -2.400758277161838e+00, -2.549732539343734e+00, 4.374664141464968e+00, 2.938163982698783e+00 }; static const double d[] = { 7.784695709041462e-03, 3.224671290700398e-01, 2.445134137142996e+00, 3.754408661907416e+00 }; static const double low = 0.02425; static const double high = 0.97575; double q, r; if (p < 0 || p > 1) { return 0.0; } else if (p == 0) { return -SIMDE_MATH_INFINITY; } else if (p == 1) { return SIMDE_MATH_INFINITY; } else if (p < low) { q = simde_math_sqrt(-2.0 * simde_math_log(p)); return (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); } else if (p > high) { q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); return -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); } else { q = p - 0.5; r = q * q; return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); } } #define simde_math_cdfnorminv simde_math_cdfnorminv #endif #if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) static HEDLEY_INLINE float simde_math_cdfnorminvf(float p) { static const float a[] = { -3.969683028665376e+01f, 2.209460984245205e+02f, -2.759285104469687e+02f, 1.383577518672690e+02f, -3.066479806614716e+01f, 2.506628277459239e+00f }; static const float b[] = { -5.447609879822406e+01f, 1.615858368580409e+02f, -1.556989798598866e+02f, 6.680131188771972e+01f, -1.328068155288572e+01f }; static const float c[] = { -7.784894002430293e-03f, -3.223964580411365e-01f, -2.400758277161838e+00f, -2.549732539343734e+00f, 4.374664141464968e+00f, 2.938163982698783e+00f }; static const float d[] = { 7.784695709041462e-03f, 3.224671290700398e-01f, 2.445134137142996e+00f, 3.754408661907416e+00f }; static const float low = 0.02425f; static const float high = 0.97575f; float q, r; if (p < 0 || p > 1) { return 0.0f; } else if (p == 0) { return -SIMDE_MATH_INFINITYF; } else if (p == 1) { return SIMDE_MATH_INFINITYF; } else if (p < low) { q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); return (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); } else if (p > high) { q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); return -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); } else { q = p - 0.5f; r = q * q; return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); } } #define simde_math_cdfnorminvf simde_math_cdfnorminvf #endif #if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) static HEDLEY_INLINE double simde_math_erfinv(double x) { /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c * * The original answer on SO uses a constant of 0.147, but in my * testing 0.14829094707965850830078125 gives a lower average absolute error * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). * That said, if your goal is to minimize the *maximum* absolute * error, 0.15449436008930206298828125 provides significantly better * results; 0.0009250640869140625000000000 vs ~ 0.005. */ double tt1, tt2, lnx; double sgn = simde_math_copysign(1.0, x); x = (1.0 - x) * (1.0 + x); lnx = simde_math_log(x); tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; tt2 = (1.0 / 0.14829094707965850830078125) * lnx; return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); } #define simde_math_erfinv simde_math_erfinv #endif #if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) static HEDLEY_INLINE float simde_math_erfinvf(float x) { float tt1, tt2, lnx; float sgn = simde_math_copysignf(1.0f, x); x = (1.0f - x) * (1.0f + x); lnx = simde_math_logf(x); tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); } #define simde_math_erfinvf simde_math_erfinvf #endif #if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) static HEDLEY_INLINE double simde_math_erfcinv(double x) { if(x >= 0.0625 && x < 2.0) { return simde_math_erfinv(1.0 - x); } else if (x < 0.0625 && x >= 1.0e-100) { double p[6] = { 0.1550470003116, 1.382719649631, 0.690969348887, -1.128081391617, 0.680544246825, -0.16444156791 }; double q[3] = { 0.155024849822, 1.385228141995, 1.000000000000 }; const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / (q[0] + t * (q[1] + t * (q[2]))); } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { double p[4] = { 0.00980456202915, 0.363667889171, 0.97302949837, -0.5374947401 }; double q[3] = { 0.00980451277802, 0.363699971544, 1.000000000000 }; const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / (q[0] + t * (q[1] + t * (q[2]))); } else if (!simde_math_isnormal(x)) { return SIMDE_MATH_INFINITY; } else { return -SIMDE_MATH_INFINITY; } } #define simde_math_erfcinv simde_math_erfcinv #endif #if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) static HEDLEY_INLINE float simde_math_erfcinvf(float x) { if(x >= 0.0625f && x < 2.0f) { return simde_math_erfinvf(1.0f - x); } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { static const float p[6] = { 0.1550470003116f, 1.382719649631f, 0.690969348887f, -1.128081391617f, 0.680544246825f -0.164441567910f }; static const float q[3] = { 0.155024849822f, 1.385228141995f, 1.000000000000f }; const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / (q[0] + t * (q[1] + t * (q[2]))); } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { static const float p[4] = { 0.00980456202915f, 0.36366788917100f, 0.97302949837000f, -0.5374947401000f }; static const float q[3] = { 0.00980451277802f, 0.36369997154400f, 1.00000000000000f }; const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / (q[0] + t * (q[1] + t * (q[2]))); } else { return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; } } #define simde_math_erfcinvf simde_math_erfcinvf #endif HEDLEY_DIAGNOSTIC_POP static HEDLEY_INLINE double simde_math_rad2deg(double radians) { return radians * SIMDE_MATH_180_OVER_PI; } static HEDLEY_INLINE float simde_math_rad2degf(float radians) { return radians * SIMDE_MATH_180_OVER_PIF; } static HEDLEY_INLINE double simde_math_deg2rad(double degrees) { return degrees * SIMDE_MATH_PI_OVER_180; } static HEDLEY_INLINE float simde_math_deg2radf(float degrees) { return degrees * (SIMDE_MATH_PI_OVER_180F); } /*** Saturated arithmetic ***/ static HEDLEY_INLINE int8_t simde_math_adds_i8(int8_t a, int8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqaddb_s8(a, b); #else uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); uint8_t r_ = a_ + b_; a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { r_ = a_; } return HEDLEY_STATIC_CAST(int8_t, r_); #endif } static HEDLEY_INLINE int16_t simde_math_adds_i16(int16_t a, int16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqaddh_s16(a, b); #else uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); uint16_t r_ = a_ + b_; a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { r_ = a_; } return HEDLEY_STATIC_CAST(int16_t, r_); #endif } static HEDLEY_INLINE int32_t simde_math_adds_i32(int32_t a, int32_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqadds_s32(a, b); #else uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); uint32_t r_ = a_ + b_; a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { r_ = a_; } return HEDLEY_STATIC_CAST(int32_t, r_); #endif } static HEDLEY_INLINE int64_t simde_math_adds_i64(int64_t a, int64_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqaddd_s64(a, b); #else uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); uint64_t r_ = a_ + b_; a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { r_ = a_; } return HEDLEY_STATIC_CAST(int64_t, r_); #endif } static HEDLEY_INLINE uint8_t simde_math_adds_u8(uint8_t a, uint8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqaddb_u8(a, b); #else uint8_t r = a + b; r |= -(r < a); return r; #endif } static HEDLEY_INLINE uint16_t simde_math_adds_u16(uint16_t a, uint16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqaddh_u16(a, b); #else uint16_t r = a + b; r |= -(r < a); return r; #endif } static HEDLEY_INLINE uint32_t simde_math_adds_u32(uint32_t a, uint32_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqadds_u32(a, b); #else uint32_t r = a + b; r |= -(r < a); return r; #endif } static HEDLEY_INLINE uint64_t simde_math_adds_u64(uint64_t a, uint64_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqaddd_u64(a, b); #else uint64_t r = a + b; r |= -(r < a); return r; #endif } static HEDLEY_INLINE int8_t simde_math_subs_i8(int8_t a, int8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqsubb_s8(a, b); #else uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); uint8_t r_ = a_ - b_; a_ = (a_ >> 7) + INT8_MAX; if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { r_ = a_; } return HEDLEY_STATIC_CAST(int8_t, r_); #endif } static HEDLEY_INLINE int16_t simde_math_subs_i16(int16_t a, int16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqsubh_s16(a, b); #else uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); uint16_t r_ = a_ - b_; a_ = (a_ >> 15) + INT16_MAX; if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { r_ = a_; } return HEDLEY_STATIC_CAST(int16_t, r_); #endif } static HEDLEY_INLINE int32_t simde_math_subs_i32(int32_t a, int32_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqsubs_s32(a, b); #else uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); uint32_t r_ = a_ - b_; a_ = (a_ >> 31) + INT32_MAX; if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { r_ = a_; } return HEDLEY_STATIC_CAST(int32_t, r_); #endif } static HEDLEY_INLINE int64_t simde_math_subs_i64(int64_t a, int64_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqsubd_s64(a, b); #else uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); uint64_t r_ = a_ - b_; a_ = (a_ >> 63) + INT64_MAX; if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { r_ = a_; } return HEDLEY_STATIC_CAST(int64_t, r_); #endif } static HEDLEY_INLINE uint8_t simde_math_subs_u8(uint8_t a, uint8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqsubb_u8(a, b); #else uint8_t res = a - b; res &= -(res <= a); return res; #endif } static HEDLEY_INLINE uint16_t simde_math_subs_u16(uint16_t a, uint16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqsubh_u16(a, b); #else uint16_t res = a - b; res &= -(res <= a); return res; #endif } static HEDLEY_INLINE uint32_t simde_math_subs_u32(uint32_t a, uint32_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqsubs_u32(a, b); #else uint32_t res = a - b; res &= -(res <= a); return res; #endif } static HEDLEY_INLINE uint64_t simde_math_subs_u64(uint64_t a, uint64_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqsubd_u64(a, b); #else uint64_t res = a - b; res &= -(res <= a); return res; #endif } HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_MATH_H) */ simde-0.7.2/simde/x86/000077500000000000000000000000001400333146700143505ustar00rootroot00000000000000simde-0.7.2/simde/x86/avx.h000066400000000000000000006005531400333146700153300ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2018-2020 Evan Nemerson * 2020 Michael R. Crusoe */ #include "sse.h" #if !defined(SIMDE_X86_AVX_H) #define SIMDE_X86_AVX_H #include "sse4.2.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ typedef union { #if defined(SIMDE_VECTOR_SUBSCRIPT) SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; #if defined(SIMDE_HAVE_INT128_) SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; #endif SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; #else SIMDE_ALIGN_TO_32 int8_t i8[32]; SIMDE_ALIGN_TO_32 int16_t i16[16]; SIMDE_ALIGN_TO_32 int32_t i32[8]; SIMDE_ALIGN_TO_32 int64_t i64[4]; SIMDE_ALIGN_TO_32 uint8_t u8[32]; SIMDE_ALIGN_TO_32 uint16_t u16[16]; SIMDE_ALIGN_TO_32 uint32_t u32[8]; SIMDE_ALIGN_TO_32 uint64_t u64[4]; SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; #if defined(SIMDE_HAVE_INT128_) SIMDE_ALIGN_TO_32 simde_int128 i128[2]; SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; #endif SIMDE_ALIGN_TO_32 simde_float32 f32[8]; SIMDE_ALIGN_TO_32 simde_float64 f64[4]; #endif SIMDE_ALIGN_TO_32 simde__m128_private m128_private[2]; SIMDE_ALIGN_TO_32 simde__m128 m128[2]; #if defined(SIMDE_X86_AVX_NATIVE) SIMDE_ALIGN_TO_32 __m256 n; #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(int) altivec_i32[2]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(long long) altivec_i64[2]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; #endif #endif } simde__m256_private; typedef union { #if defined(SIMDE_VECTOR_SUBSCRIPT) SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; #if defined(SIMDE_HAVE_INT128_) SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; #endif SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; #else SIMDE_ALIGN_TO_32 int8_t i8[32]; SIMDE_ALIGN_TO_32 int16_t i16[16]; SIMDE_ALIGN_TO_32 int32_t i32[8]; SIMDE_ALIGN_TO_32 int64_t i64[4]; SIMDE_ALIGN_TO_32 uint8_t u8[32]; SIMDE_ALIGN_TO_32 uint16_t u16[16]; SIMDE_ALIGN_TO_32 uint32_t u32[8]; SIMDE_ALIGN_TO_32 uint64_t u64[4]; #if defined(SIMDE_HAVE_INT128_) SIMDE_ALIGN_TO_32 simde_int128 i128[2]; SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; #endif SIMDE_ALIGN_TO_32 simde_float32 f32[8]; SIMDE_ALIGN_TO_32 simde_float64 f64[4]; SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; #endif SIMDE_ALIGN_TO_32 simde__m128d_private m128d_private[2]; SIMDE_ALIGN_TO_32 simde__m128d m128d[2]; #if defined(SIMDE_X86_AVX_NATIVE) SIMDE_ALIGN_TO_32 __m256d n; #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; #endif #endif } simde__m256d_private; typedef union { #if defined(SIMDE_VECTOR_SUBSCRIPT) SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; #if defined(SIMDE_HAVE_INT128_) SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; #endif SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; #else SIMDE_ALIGN_TO_32 int8_t i8[32]; SIMDE_ALIGN_TO_32 int16_t i16[16]; SIMDE_ALIGN_TO_32 int32_t i32[8]; SIMDE_ALIGN_TO_32 int64_t i64[4]; SIMDE_ALIGN_TO_32 uint8_t u8[32]; SIMDE_ALIGN_TO_32 uint16_t u16[16]; SIMDE_ALIGN_TO_32 uint32_t u32[8]; SIMDE_ALIGN_TO_32 uint64_t u64[4]; SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; #if defined(SIMDE_HAVE_INT128_) SIMDE_ALIGN_TO_32 simde_int128 i128[2]; SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; #endif SIMDE_ALIGN_TO_32 simde_float32 f32[8]; SIMDE_ALIGN_TO_32 simde_float64 f64[4]; #endif SIMDE_ALIGN_TO_32 simde__m128i_private m128i_private[2]; SIMDE_ALIGN_TO_32 simde__m128i m128i[2]; #if defined(SIMDE_X86_AVX_NATIVE) SIMDE_ALIGN_TO_32 __m256i n; #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; #endif #endif } simde__m256i_private; #if defined(SIMDE_X86_AVX_NATIVE) typedef __m256 simde__m256; typedef __m256i simde__m256i; typedef __m256d simde__m256d; #elif defined(SIMDE_VECTOR_SUBSCRIPT) typedef simde_float32 simde__m256 SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; typedef int_fast32_t simde__m256i SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; typedef simde_float64 simde__m256d SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; #else typedef simde__m256_private simde__m256; typedef simde__m256i_private simde__m256i; typedef simde__m256d_private simde__m256d; #endif #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #if !defined(HEDLEY_INTEL_VERSION) typedef simde__m256 __m256; typedef simde__m256i __m256i; typedef simde__m256d __m256d; #else #define __m256 simde__m256 #define __m256i simde__m256i #define __m256d simde__m256d #endif #endif HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256), "simde__m256 size incorrect"); HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256_private), "simde__m256_private size incorrect"); HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i), "simde__m256i size incorrect"); HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i_private), "simde__m256i_private size incorrect"); HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d), "simde__m256d size incorrect"); HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d_private), "simde__m256d_private size incorrect"); #if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256) == 32, "simde__m256 is not 32-byte aligned"); HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256_private) == 32, "simde__m256_private is not 32-byte aligned"); HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i) == 32, "simde__m256i is not 32-byte aligned"); HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i_private) == 32, "simde__m256i_private is not 32-byte aligned"); HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d) == 32, "simde__m256d is not 32-byte aligned"); HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d_private) == 32, "simde__m256d_private is not 32-byte aligned"); #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde__m256_from_private(simde__m256_private v) { simde__m256 r; simde_memcpy(&r, &v, sizeof(r)); return r; } SIMDE_FUNCTION_ATTRIBUTES simde__m256_private simde__m256_to_private(simde__m256 v) { simde__m256_private r; simde_memcpy(&r, &v, sizeof(r)); return r; } SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde__m256i_from_private(simde__m256i_private v) { simde__m256i r; simde_memcpy(&r, &v, sizeof(r)); return r; } SIMDE_FUNCTION_ATTRIBUTES simde__m256i_private simde__m256i_to_private(simde__m256i v) { simde__m256i_private r; simde_memcpy(&r, &v, sizeof(r)); return r; } SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde__m256d_from_private(simde__m256d_private v) { simde__m256d r; simde_memcpy(&r, &v, sizeof(r)); return r; } SIMDE_FUNCTION_ATTRIBUTES simde__m256d_private simde__m256d_to_private(simde__m256d v) { simde__m256d_private r; simde_memcpy(&r, &v, sizeof(r)); return r; } #define SIMDE_CMP_EQ_OQ 0 #define SIMDE_CMP_LT_OS 1 #define SIMDE_CMP_LE_OS 2 #define SIMDE_CMP_UNORD_Q 3 #define SIMDE_CMP_NEQ_UQ 4 #define SIMDE_CMP_NLT_US 5 #define SIMDE_CMP_NLE_US 6 #define SIMDE_CMP_ORD_Q 7 #define SIMDE_CMP_EQ_UQ 8 #define SIMDE_CMP_NGE_US 9 #define SIMDE_CMP_NGT_US 10 #define SIMDE_CMP_FALSE_OQ 11 #define SIMDE_CMP_NEQ_OQ 12 #define SIMDE_CMP_GE_OS 13 #define SIMDE_CMP_GT_OS 14 #define SIMDE_CMP_TRUE_UQ 15 #define SIMDE_CMP_EQ_OS 16 #define SIMDE_CMP_LT_OQ 17 #define SIMDE_CMP_LE_OQ 18 #define SIMDE_CMP_UNORD_S 19 #define SIMDE_CMP_NEQ_US 20 #define SIMDE_CMP_NLT_UQ 21 #define SIMDE_CMP_NLE_UQ 22 #define SIMDE_CMP_ORD_S 23 #define SIMDE_CMP_EQ_US 24 #define SIMDE_CMP_NGE_UQ 25 #define SIMDE_CMP_NGT_UQ 26 #define SIMDE_CMP_FALSE_OS 27 #define SIMDE_CMP_NEQ_OS 28 #define SIMDE_CMP_GE_OQ 29 #define SIMDE_CMP_GT_OQ 30 #define SIMDE_CMP_TRUE_US 31 #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) && !defined(_CMP_EQ_OQ) #define _CMP_EQ_OQ SIMDE_CMP_EQ_OQ #define _CMP_LT_OS SIMDE_CMP_LT_OS #define _CMP_LE_OS SIMDE_CMP_LE_OS #define _CMP_UNORD_Q SIMDE_CMP_UNORD_Q #define _CMP_NEQ_UQ SIMDE_CMP_NEQ_UQ #define _CMP_NLT_US SIMDE_CMP_NLT_US #define _CMP_NLE_US SIMDE_CMP_NLE_US #define _CMP_ORD_Q SIMDE_CMP_ORD_Q #define _CMP_EQ_UQ SIMDE_CMP_EQ_UQ #define _CMP_NGE_US SIMDE_CMP_NGE_US #define _CMP_NGT_US SIMDE_CMP_NGT_US #define _CMP_FALSE_OQ SIMDE_CMP_FALSE_OQ #define _CMP_NEQ_OQ SIMDE_CMP_NEQ_OQ #define _CMP_GE_OS SIMDE_CMP_GE_OS #define _CMP_GT_OS SIMDE_CMP_GT_OS #define _CMP_TRUE_UQ SIMDE_CMP_TRUE_UQ #define _CMP_EQ_OS SIMDE_CMP_EQ_OS #define _CMP_LT_OQ SIMDE_CMP_LT_OQ #define _CMP_LE_OQ SIMDE_CMP_LE_OQ #define _CMP_UNORD_S SIMDE_CMP_UNORD_S #define _CMP_NEQ_US SIMDE_CMP_NEQ_US #define _CMP_NLT_UQ SIMDE_CMP_NLT_UQ #define _CMP_NLE_UQ SIMDE_CMP_NLE_UQ #define _CMP_ORD_S SIMDE_CMP_ORD_S #define _CMP_EQ_US SIMDE_CMP_EQ_US #define _CMP_NGE_UQ SIMDE_CMP_NGE_UQ #define _CMP_NGT_UQ SIMDE_CMP_NGT_UQ #define _CMP_FALSE_OS SIMDE_CMP_FALSE_OS #define _CMP_NEQ_OS SIMDE_CMP_NEQ_OS #define _CMP_GE_OQ SIMDE_CMP_GE_OQ #define _CMP_GT_OQ SIMDE_CMP_GT_OQ #define _CMP_TRUE_US SIMDE_CMP_TRUE_US #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_castps_pd (simde__m256 a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_castps_pd(a); #else return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_castps_pd #define _mm256_castps_pd(a) simde_mm256_castps_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_castps_si256 (simde__m256 a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_castps_si256(a); #else return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_castps_si256 #define _mm256_castps_si256(a) simde_mm256_castps_si256(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_castsi256_pd (simde__m256i a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_castsi256_pd(a); #else return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_castsi256_pd #define _mm256_castsi256_pd(a) simde_mm256_castsi256_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_castsi256_ps (simde__m256i a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_castsi256_ps(a); #else return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_castsi256_ps #define _mm256_castsi256_ps(a) simde_mm256_castsi256_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_castpd_ps (simde__m256d a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_castpd_ps(a); #else return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_castpd_ps #define _mm256_castpd_ps(a) simde_mm256_castpd_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_castpd_si256 (simde__m256d a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_castpd_si256(a); #else return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_castpd_si256 #define _mm256_castpd_si256(a) simde_mm256_castpd_si256(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_setzero_si256 (void) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_setzero_si256(); #else simde__m256i_private r_; #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_setzero_si128(); r_.m128i[1] = simde_mm_setzero_si128(); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = 0; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_setzero_si256 #define _mm256_setzero_si256() simde_mm256_setzero_si256() #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_setzero_ps (void) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_setzero_ps(); #else return simde_mm256_castsi256_ps(simde_mm256_setzero_si256()); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_setzero_ps #define _mm256_setzero_ps() simde_mm256_setzero_ps() #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_setzero_pd (void) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_setzero_pd(); #else return simde_mm256_castsi256_pd(simde_mm256_setzero_si256()); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_setzero_pd #define _mm256_setzero_pd() simde_mm256_setzero_pd() #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_x_mm256_not_ps(simde__m256 a) { simde__m256_private r_, a_ = simde__m256_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = ~a_.i32; #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) r_.m128[0] = simde_x_mm_not_ps(a_.m128[0]); r_.m128[1] = simde_x_mm_not_ps(a_.m128[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = ~(a_.i32[i]); } #endif return simde__m256_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_x_mm256_select_ps(simde__m256 a, simde__m256 b, simde__m256 mask) { /* This function is for when you want to blend two elements together * according to a mask. It is similar to _mm256_blendv_ps, except that * it is undefined whether the blend is based on the highest bit in * each lane (like blendv) or just bitwise operations. This allows * us to implement the function efficiently everywhere. * * Basically, you promise that all the lanes in mask are either 0 or * ~0. */ #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_blendv_ps(a, b, mask); #else simde__m256_private r_, a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b), mask_ = simde__m256_to_private(mask); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) r_.m128[0] = simde_x_mm_select_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); r_.m128[1] = simde_x_mm_select_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); } #endif return simde__m256_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_x_mm256_not_pd(simde__m256d a) { simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = ~a_.i64; #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) r_.m128d[0] = simde_x_mm_not_pd(a_.m128d[0]); r_.m128d[1] = simde_x_mm_not_pd(a_.m128d[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = ~(a_.i64[i]); } #endif return simde__m256d_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_x_mm256_select_pd(simde__m256d a, simde__m256d b, simde__m256d mask) { /* This function is for when you want to blend two elements together * according to a mask. It is similar to _mm256_blendv_pd, except that * it is undefined whether the blend is based on the highest bit in * each lane (like blendv) or just bitwise operations. This allows * us to implement the function efficiently everywhere. * * Basically, you promise that all the lanes in mask are either 0 or * ~0. */ #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_blendv_pd(a, b, mask); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b), mask_ = simde__m256d_to_private(mask); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) r_.m128d[0] = simde_x_mm_select_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); r_.m128d[1] = simde_x_mm_select_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); } #endif return simde__m256d_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_x_mm256_setone_si256 (void) { simde__m256i_private r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) __typeof__(r_.i32f) rv = { 0, }; r_.i32f = ~rv; #elif defined(SIMDE_X86_AVX2_NATIVE) __m256i t = _mm256_setzero_si256(); r_.n = _mm256_cmpeq_epi32(t, t); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); } #endif return simde__m256i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_x_mm256_setone_ps (void) { return simde_mm256_castsi256_ps(simde_x_mm256_setone_si256()); } SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_x_mm256_setone_pd (void) { return simde_mm256_castsi256_pd(simde_x_mm256_setone_si256()); } SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_set_epi8 (int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0); #else simde__m256i_private r_; #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_set_epi8( e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0); r_.m128i[1] = simde_mm_set_epi8( e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16); #else r_.i8[ 0] = e0; r_.i8[ 1] = e1; r_.i8[ 2] = e2; r_.i8[ 3] = e3; r_.i8[ 4] = e4; r_.i8[ 5] = e5; r_.i8[ 6] = e6; r_.i8[ 7] = e7; r_.i8[ 8] = e8; r_.i8[ 9] = e9; r_.i8[10] = e10; r_.i8[11] = e11; r_.i8[12] = e12; r_.i8[13] = e13; r_.i8[14] = e14; r_.i8[15] = e15; r_.i8[16] = e16; r_.i8[17] = e17; r_.i8[18] = e18; r_.i8[19] = e19; r_.i8[20] = e20; r_.i8[21] = e21; r_.i8[22] = e22; r_.i8[23] = e23; r_.i8[24] = e24; r_.i8[25] = e25; r_.i8[26] = e26; r_.i8[27] = e27; r_.i8[28] = e28; r_.i8[29] = e29; r_.i8[30] = e30; r_.i8[31] = e31; #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_set_epi8 #define _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ simde_mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_set_epi16 (int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0); #else simde__m256i_private r_; #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_set_epi16( e7, e6, e5, e4, e3, e2, e1, e0); r_.m128i[1] = simde_mm_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8); #else r_.i16[ 0] = e0; r_.i16[ 1] = e1; r_.i16[ 2] = e2; r_.i16[ 3] = e3; r_.i16[ 4] = e4; r_.i16[ 5] = e5; r_.i16[ 6] = e6; r_.i16[ 7] = e7; r_.i16[ 8] = e8; r_.i16[ 9] = e9; r_.i16[10] = e10; r_.i16[11] = e11; r_.i16[12] = e12; r_.i16[13] = e13; r_.i16[14] = e14; r_.i16[15] = e15; #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_set_epi16 #define _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ simde_mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_set_epi32 (int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0); #else simde__m256i_private r_; #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_set_epi32(e3, e2, e1, e0); r_.m128i[1] = simde_mm_set_epi32(e7, e6, e5, e4); #else r_.i32[ 0] = e0; r_.i32[ 1] = e1; r_.i32[ 2] = e2; r_.i32[ 3] = e3; r_.i32[ 4] = e4; r_.i32[ 5] = e5; r_.i32[ 6] = e6; r_.i32[ 7] = e7; #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_set_epi32 #define _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ simde_mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_set_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_set_epi64x(e3, e2, e1, e0); #else simde__m256i_private r_; #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_set_epi64x(e1, e0); r_.m128i[1] = simde_mm_set_epi64x(e3, e2); #else r_.i64[0] = e0; r_.i64[1] = e1; r_.i64[2] = e2; r_.i64[3] = e3; #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_set_epi64x #define _mm256_set_epi64x(e3, e2, e1, e0) simde_mm256_set_epi64x(e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_x_mm256_set_epu8 (uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { simde__m256i_private r_; r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; r_.u8[16] = e16; r_.u8[17] = e17; r_.u8[18] = e18; r_.u8[19] = e19; r_.u8[20] = e20; r_.u8[20] = e20; r_.u8[21] = e21; r_.u8[22] = e22; r_.u8[23] = e23; r_.u8[24] = e24; r_.u8[25] = e25; r_.u8[26] = e26; r_.u8[27] = e27; r_.u8[28] = e28; r_.u8[29] = e29; r_.u8[30] = e30; r_.u8[31] = e31; return simde__m256i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_x_mm256_set_epu16 (uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { simde__m256i_private r_; r_.u16[ 0] = e0; r_.u16[ 1] = e1; r_.u16[ 2] = e2; r_.u16[ 3] = e3; r_.u16[ 4] = e4; r_.u16[ 5] = e5; r_.u16[ 6] = e6; r_.u16[ 7] = e7; r_.u16[ 8] = e8; r_.u16[ 9] = e9; r_.u16[10] = e10; r_.u16[11] = e11; r_.u16[12] = e12; r_.u16[13] = e13; r_.u16[14] = e14; r_.u16[15] = e15; return simde__m256i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_x_mm256_set_epu32 (uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4), HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); #else simde__m256i_private r_; #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); r_.m128i[1] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4)); #else r_.u32[ 0] = e0; r_.u32[ 1] = e1; r_.u32[ 2] = e2; r_.u32[ 3] = e3; r_.u32[ 4] = e4; r_.u32[ 5] = e5; r_.u32[ 6] = e6; r_.u32[ 7] = e7; #endif return simde__m256i_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_x_mm256_set_epu64x (uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { simde__m256i_private r_; r_.u64[0] = e0; r_.u64[1] = e1; r_.u64[2] = e2; r_.u64[3] = e3; return simde__m256i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_set_ps (simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0); #else simde__m256_private r_; #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128[0] = simde_mm_set_ps(e3, e2, e1, e0); r_.m128[1] = simde_mm_set_ps(e7, e6, e5, e4); #else r_.f32[0] = e0; r_.f32[1] = e1; r_.f32[2] = e2; r_.f32[3] = e3; r_.f32[4] = e4; r_.f32[5] = e5; r_.f32[6] = e6; r_.f32[7] = e7; #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_set_ps #define _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ simde_mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_set_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_set_pd(e3, e2, e1, e0); #else simde__m256d_private r_; #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128d[0] = simde_mm_set_pd(e1, e0); r_.m128d[1] = simde_mm_set_pd(e3, e2); #else r_.f64[0] = e0; r_.f64[1] = e1; r_.f64[2] = e2; r_.f64[3] = e3; #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_set_pd #define _mm256_set_pd(e3, e2, e1, e0) \ simde_mm256_set_pd(e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_set_m128 (simde__m128 e1, simde__m128 e0) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_insertf128_ps(_mm256_castps128_ps256(e0), e1, 1); #else simde__m256_private r_; simde__m128_private e1_ = simde__m128_to_private(e1), e0_ = simde__m128_to_private(e0); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128_private[0] = e0_; r_.m128_private[1] = e1_; #elif defined(SIMDE_HAVE_INT128_) r_.i128[0] = e0_.i128[0]; r_.i128[1] = e1_.i128[0]; #else r_.i64[0] = e0_.i64[0]; r_.i64[1] = e0_.i64[1]; r_.i64[2] = e1_.i64[0]; r_.i64[3] = e1_.i64[1]; #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_set_m128 #define _mm256_set_m128(e1, e0) simde_mm256_set_m128(e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_set_m128d (simde__m128d e1, simde__m128d e0) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_insertf128_pd(_mm256_castpd128_pd256(e0), e1, 1); #else simde__m256d_private r_; simde__m128d_private e1_ = simde__m128d_to_private(e1), e0_ = simde__m128d_to_private(e0); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128d_private[0] = e0_; r_.m128d_private[1] = e1_; #else r_.i64[0] = e0_.i64[0]; r_.i64[1] = e0_.i64[1]; r_.i64[2] = e1_.i64[0]; r_.i64[3] = e1_.i64[1]; #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_set_m128d #define _mm256_set_m128d(e1, e0) simde_mm256_set_m128d(e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_set_m128i (simde__m128i e1, simde__m128i e0) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_insertf128_si256(_mm256_castsi128_si256(e0), e1, 1); #else simde__m256i_private r_; simde__m128i_private e1_ = simde__m128i_to_private(e1), e0_ = simde__m128i_to_private(e0); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i_private[0] = e0_; r_.m128i_private[1] = e1_; #else r_.i64[0] = e0_.i64[0]; r_.i64[1] = e0_.i64[1]; r_.i64[2] = e1_.i64[0]; r_.i64[3] = e1_.i64[1]; #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_set_m128i #define _mm256_set_m128i(e1, e0) simde_mm256_set_m128i(e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_set1_epi8 (int8_t a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_set1_epi8(a); #else simde__m256i_private r_; #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_set1_epi8(a); r_.m128i[1] = simde_mm_set1_epi8(a); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_set1_epi8 #define _mm256_set1_epi8(a) simde_mm256_set1_epi8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_set1_epi16 (int16_t a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_set1_epi16(a); #else simde__m256i_private r_; #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_set1_epi16(a); r_.m128i[1] = simde_mm_set1_epi16(a); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_set1_epi16 #define _mm256_set1_epi16(a) simde_mm256_set1_epi16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_set1_epi32 (int32_t a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_set1_epi32(a); #else simde__m256i_private r_; #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_set1_epi32(a); r_.m128i[1] = simde_mm_set1_epi32(a); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_set1_epi32 #define _mm256_set1_epi32(a) simde_mm256_set1_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_set1_epi64x (int64_t a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_set1_epi64x(a); #else simde__m256i_private r_; #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_set1_epi64x(a); r_.m128i[1] = simde_mm_set1_epi64x(a); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_set1_epi64x #define _mm256_set1_epi64x(a) simde_mm256_set1_epi64x(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_set1_ps (simde_float32 a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_set1_ps(a); #else simde__m256_private r_; #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128[0] = simde_mm_set1_ps(a); r_.m128[1] = simde_mm_set1_ps(a); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = a; } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_set1_ps #define _mm256_set1_ps(a) simde_mm256_set1_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_set1_pd (simde_float64 a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_set1_pd(a); #else simde__m256d_private r_; #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128d[0] = simde_mm_set1_pd(a); r_.m128d[1] = simde_mm_set1_pd(a); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = a; } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_set1_pd #define _mm256_set1_pd(a) simde_mm256_set1_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_x_mm256_deinterleaveeven_epi16 (simde__m256i a, simde__m256i b) { simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[1], b_.m128i[1]); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30); #else const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; for (size_t i = 0 ; i < quarter_point ; i++) { r_.i16[i] = a_.i16[2 * i]; r_.i16[i + quarter_point] = b_.i16[2 * i]; r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i]; r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i]; } #endif return simde__m256i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_x_mm256_deinterleaveodd_epi16 (simde__m256i a, simde__m256i b) { simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[1], b_.m128i[1]); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31); #else const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; for (size_t i = 0 ; i < quarter_point ; i++) { r_.i16[i] = a_.i16[2 * i + 1]; r_.i16[i + quarter_point] = b_.i16[2 * i + 1]; r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i + 1]; r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i + 1]; } #endif return simde__m256i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_x_mm256_deinterleaveeven_epi32 (simde__m256i a, simde__m256i b) { simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[1], b_.m128i[1]); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 2, 8, 10, 4, 6, 12, 14); #else const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; for (size_t i = 0 ; i < quarter_point ; i++) { r_.i32[i] = a_.i32[2 * i]; r_.i32[i + quarter_point] = b_.i32[2 * i]; r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i]; r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i]; } #endif return simde__m256i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_x_mm256_deinterleaveodd_epi32 (simde__m256i a, simde__m256i b) { simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[1], b_.m128i[1]); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 1, 3, 9, 11, 5, 7, 13, 15); #else const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; for (size_t i = 0 ; i < quarter_point ; i++) { r_.i32[i] = a_.i32[2 * i + 1]; r_.i32[i + quarter_point] = b_.i32[2 * i + 1]; r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i + 1]; r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i + 1]; } #endif return simde__m256i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_x_mm256_deinterleaveeven_ps (simde__m256 a, simde__m256 b) { simde__m256_private r_, a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128[0] = simde_x_mm_deinterleaveeven_ps(a_.m128[0], b_.m128[0]); r_.m128[1] = simde_x_mm_deinterleaveeven_ps(a_.m128[1], b_.m128[1]); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 2, 8, 10, 4, 6, 12, 14); #else const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; for (size_t i = 0 ; i < quarter_point ; i++) { r_.f32[i] = a_.f32[2 * i]; r_.f32[i + quarter_point] = b_.f32[2 * i]; r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i]; r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i]; } #endif return simde__m256_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_x_mm256_deinterleaveodd_ps (simde__m256 a, simde__m256 b) { simde__m256_private r_, a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128[0] = simde_x_mm_deinterleaveodd_ps(a_.m128[0], b_.m128[0]); r_.m128[1] = simde_x_mm_deinterleaveodd_ps(a_.m128[1], b_.m128[1]); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 1, 3, 9, 11, 5, 7, 13, 15); #else const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; for (size_t i = 0 ; i < quarter_point ; i++) { r_.f32[i] = a_.f32[2 * i + 1]; r_.f32[i + quarter_point] = b_.f32[2 * i + 1]; r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i + 1]; r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i + 1]; } #endif return simde__m256_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_x_mm256_deinterleaveeven_pd (simde__m256d a, simde__m256d b) { simde__m256d_private r_, a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128d[0] = simde_x_mm_deinterleaveeven_pd(a_.m128d[0], b_.m128d[0]); r_.m128d[1] = simde_x_mm_deinterleaveeven_pd(a_.m128d[1], b_.m128d[1]); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); #else const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; for (size_t i = 0 ; i < quarter_point ; i++) { r_.f64[i] = a_.f64[2 * i]; r_.f64[i + quarter_point] = b_.f64[2 * i]; r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i]; r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i]; } #endif return simde__m256d_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_x_mm256_deinterleaveodd_pd (simde__m256d a, simde__m256d b) { simde__m256d_private r_, a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128d[0] = simde_x_mm_deinterleaveodd_pd(a_.m128d[0], b_.m128d[0]); r_.m128d[1] = simde_x_mm_deinterleaveodd_pd(a_.m128d[1], b_.m128d[1]); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); #else const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; for (size_t i = 0 ; i < quarter_point ; i++) { r_.f64[i] = a_.f64[2 * i + 1]; r_.f64[i + quarter_point] = b_.f64[2 * i + 1]; r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i + 1]; r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i + 1]; } #endif return simde__m256d_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_x_mm256_abs_ps(simde__m256 a) { simde__m256_private r_, a_ = simde__m256_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_fabsf(a_.f32[i]); } return simde__m256_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_x_mm256_abs_pd(simde__m256d a) { simde__m256d_private r_, a_ = simde__m256d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_fabs(a_.f64[i]); } return simde__m256d_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_add_ps (simde__m256 a, simde__m256 b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_add_ps(a, b); #else simde__m256_private r_, a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128[0] = simde_mm_add_ps(a_.m128[0], b_.m128[0]); r_.m128[1] = simde_mm_add_ps(a_.m128[1], b_.m128[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f32 = a_.f32 + b_.f32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = a_.f32[i] + b_.f32[i]; } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_add_ps #define _mm256_add_ps(a, b) simde_mm256_add_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_hadd_ps (simde__m256 a, simde__m256 b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_hadd_ps(a, b); #else return simde_mm256_add_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_hadd_ps #define _mm256_hadd_ps(a, b) simde_mm256_hadd_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_add_pd (simde__m256d a, simde__m256d b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_add_pd(a, b); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128d[0] = simde_mm_add_pd(a_.m128d[0], b_.m128d[0]); r_.m128d[1] = simde_mm_add_pd(a_.m128d[1], b_.m128d[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f64 = a_.f64 + b_.f64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = a_.f64[i] + b_.f64[i]; } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_add_pd #define _mm256_add_pd(a, b) simde_mm256_add_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_hadd_pd (simde__m256d a, simde__m256d b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_hadd_pd(a, b); #else return simde_mm256_add_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_hadd_pd #define _mm256_hadd_pd(a, b) simde_mm256_hadd_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_addsub_ps (simde__m256 a, simde__m256 b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_addsub_ps(a, b); #else simde__m256_private r_, a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128[0] = simde_mm_addsub_ps(a_.m128[0], b_.m128[0]); r_.m128[1] = simde_mm_addsub_ps(a_.m128[1], b_.m128[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; r_.f32[i + 1] = a_.f32[i + 1] + b_.f32[i + 1]; } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_addsub_ps #define _mm256_addsub_ps(a, b) simde_mm256_addsub_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_addsub_pd (simde__m256d a, simde__m256d b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_addsub_pd(a, b); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128d[0] = simde_mm_addsub_pd(a_.m128d[0], b_.m128d[0]); r_.m128d[1] = simde_mm_addsub_pd(a_.m128d[1], b_.m128d[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; r_.f64[i + 1] = a_.f64[i + 1] + b_.f64[i + 1]; } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_addsub_pd #define _mm256_addsub_pd(a, b) simde_mm256_addsub_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_and_ps (simde__m256 a, simde__m256 b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_and_ps(a, b); #else simde__m256_private r_, a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128[0] = simde_mm_and_ps(a_.m128[0], b_.m128[0]); r_.m128[1] = simde_mm_and_ps(a_.m128[1], b_.m128[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f & b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_and_ps #define _mm256_and_ps(a, b) simde_mm256_and_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_and_pd (simde__m256d a, simde__m256d b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_and_pd(a, b); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128d[0] = simde_mm_and_pd(a_.m128d[0], b_.m128d[0]); r_.m128d[1] = simde_mm_and_pd(a_.m128d[1], b_.m128d[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f & b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_and_pd #define _mm256_and_pd(a, b) simde_mm256_and_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_andnot_ps (simde__m256 a, simde__m256 b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_andnot_ps(a, b); #else simde__m256_private r_, a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128[0] = simde_mm_andnot_ps(a_.m128[0], b_.m128[0]); r_.m128[1] = simde_mm_andnot_ps(a_.m128[1], b_.m128[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = ~a_.i32f & b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_andnot_ps #define _mm256_andnot_ps(a, b) simde_mm256_andnot_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_andnot_pd (simde__m256d a, simde__m256d b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_andnot_pd(a, b); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128d[0] = simde_mm_andnot_pd(a_.m128d[0], b_.m128d[0]); r_.m128d[1] = simde_mm_andnot_pd(a_.m128d[1], b_.m128d[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = ~a_.i32f & b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_andnot_pd #define _mm256_andnot_pd(a, b) simde_mm256_andnot_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_blend_ps (simde__m256 a, simde__m256 b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m256_private r_, a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; } return simde__m256_from_private(r_); } #if defined(SIMDE_X86_AVX_NATIVE) # define simde_mm256_blend_ps(a, b, imm8) _mm256_blend_ps(a, b, imm8) #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) # define simde_mm256_blend_ps(a, b, imm8) \ simde_mm256_set_m128( \ simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8) >> 4), \ simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8) & 0x0F)) #endif #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_blend_ps #define _mm256_blend_ps(a, b, imm8) simde_mm256_blend_ps(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_blend_pd (simde__m256d a, simde__m256d b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { simde__m256d_private r_, a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; } return simde__m256d_from_private(r_); } #if defined(SIMDE_X86_AVX_NATIVE) # define simde_mm256_blend_pd(a, b, imm8) _mm256_blend_pd(a, b, imm8) #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) # define simde_mm256_blend_pd(a, b, imm8) \ simde_mm256_set_m128d( \ simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8) >> 2), \ simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8) & 3)) #endif #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_blend_pd #define _mm256_blend_pd(a, b, imm8) simde_mm256_blend_pd(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_blendv_ps (simde__m256 a, simde__m256 b, simde__m256 mask) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_blendv_ps(a, b, mask); #else simde__m256_private r_, a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b), mask_ = simde__m256_to_private(mask); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128[0] = simde_mm_blendv_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); r_.m128[1] = simde_mm_blendv_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.f32[i] = (mask_.u32[i] & (UINT32_C(1) << 31)) ? b_.f32[i] : a_.f32[i]; } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_blendv_ps #define _mm256_blendv_ps(a, b, imm8) simde_mm256_blendv_ps(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_blendv_pd (simde__m256d a, simde__m256d b, simde__m256d mask) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_blendv_pd(a, b, mask); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b), mask_ = simde__m256d_to_private(mask); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128d[0] = simde_mm_blendv_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); r_.m128d[1] = simde_mm_blendv_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.f64[i] = (mask_.u64[i] & (UINT64_C(1) << 63)) ? b_.f64[i] : a_.f64[i]; } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_blendv_pd #define _mm256_blendv_pd(a, b, imm8) simde_mm256_blendv_pd(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_broadcast_pd (simde__m128d const * mem_addr) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_broadcast_pd(mem_addr); #else simde__m256d_private r_; simde__m128d tmp = simde_mm_loadu_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, mem_addr)); r_.m128d[0] = tmp; r_.m128d[1] = tmp; return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_broadcast_pd #define _mm256_broadcast_pd(mem_addr) simde_mm256_broadcast_pd(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_broadcast_ps (simde__m128 const * mem_addr) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_broadcast_ps(mem_addr); #else simde__m256_private r_; simde__m128 tmp = simde_mm_loadu_ps(HEDLEY_REINTERPRET_CAST(simde_float32 const*, mem_addr)); r_.m128[0] = tmp; r_.m128[1] = tmp; return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_broadcast_ps #define _mm256_broadcast_ps(mem_addr) simde_mm256_broadcast_ps(HEDLEY_REINTERPRET_CAST(simde__m128 const*, mem_addr)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_broadcast_sd (simde_float64 const * a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_broadcast_sd(a); #else return simde_mm256_set1_pd(*a); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_broadcast_sd #define _mm256_broadcast_sd(mem_addr) simde_mm256_broadcast_sd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_broadcast_ss (simde_float32 const * a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm_broadcast_ss(a); #else return simde_mm_set1_ps(*a); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm_broadcast_ss #define _mm_broadcast_ss(mem_addr) simde_mm_broadcast_ss(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_broadcast_ss (simde_float32 const * a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_broadcast_ss(a); #else return simde_mm256_set1_ps(*a); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_broadcast_ss #define _mm256_broadcast_ss(mem_addr) simde_mm256_broadcast_ss(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_castpd128_pd256 (simde__m128d a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_castpd128_pd256(a); #else simde__m256d_private r_; simde__m128d_private a_ = simde__m128d_to_private(a); r_.m128d_private[0] = a_; return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_castpd128_pd256 #define _mm256_castpd128_pd256(a) simde_mm256_castpd128_pd256(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm256_castpd256_pd128 (simde__m256d a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_castpd256_pd128(a); #else simde__m256d_private a_ = simde__m256d_to_private(a); return a_.m128d[0]; #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_castpd256_pd128 #define _mm256_castpd256_pd128(a) simde_mm256_castpd256_pd128(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_castps128_ps256 (simde__m128 a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_castps128_ps256(a); #else simde__m256_private r_; simde__m128_private a_ = simde__m128_to_private(a); r_.m128_private[0] = a_; return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_castps128_ps256 #define _mm256_castps128_ps256(a) simde_mm256_castps128_ps256(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm256_castps256_ps128 (simde__m256 a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_castps256_ps128(a); #else simde__m256_private a_ = simde__m256_to_private(a); return a_.m128[0]; #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_castps256_ps128 #define _mm256_castps256_ps128(a) simde_mm256_castps256_ps128(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_castsi128_si256 (simde__m128i a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_castsi128_si256(a); #else simde__m256i_private r_; simde__m128i_private a_ = simde__m128i_to_private(a); r_.m128i_private[0] = a_; return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_castsi128_si256 #define _mm256_castsi128_si256(a) simde_mm256_castsi128_si256(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm256_castsi256_si128 (simde__m256i a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_castsi256_si128(a); #else simde__m256i_private a_ = simde__m256i_to_private(a); return a_.m128i[0]; #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_castsi256_si128 #define _mm256_castsi256_si128(a) simde_mm256_castsi256_si128(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_round_ps (simde__m256 a, const int rounding) { simde__m256_private r_, a_ = simde__m256_to_private(a); switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { #if defined(simde_math_nearbyintf) case SIMDE_MM_FROUND_CUR_DIRECTION: for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); } break; #endif #if defined(simde_math_roundf) case SIMDE_MM_FROUND_TO_NEAREST_INT: for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_roundf(a_.f32[i]); } break; #endif #if defined(simde_math_floorf) case SIMDE_MM_FROUND_TO_NEG_INF: for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_floorf(a_.f32[i]); } break; #endif #if defined(simde_math_ceilf) case SIMDE_MM_FROUND_TO_POS_INF: for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_ceilf(a_.f32[i]); } break; #endif #if defined(simde_math_truncf) case SIMDE_MM_FROUND_TO_ZERO: for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_truncf(a_.f32[i]); } break; #endif default: HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_ps()); } return simde__m256_from_private(r_); } #if defined(SIMDE_X86_AVX_NATIVE) # define simde_mm256_round_ps(a, rounding) _mm256_round_ps(a, rounding) #endif #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_round_ps #define _mm256_round_ps(a, rounding) simde_mm256_round_ps(a, rounding) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_round_pd (simde__m256d a, const int rounding) { simde__m256d_private r_, a_ = simde__m256d_to_private(a); switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { #if defined(simde_math_nearbyint) case SIMDE_MM_FROUND_CUR_DIRECTION: for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_nearbyint(a_.f64[i]); } break; #endif #if defined(simde_math_round) case SIMDE_MM_FROUND_TO_NEAREST_INT: for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_round(a_.f64[i]); } break; #endif #if defined(simde_math_floor) case SIMDE_MM_FROUND_TO_NEG_INF: for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_floor(a_.f64[i]); } break; #endif #if defined(simde_math_ceil) case SIMDE_MM_FROUND_TO_POS_INF: for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_ceil(a_.f64[i]); } break; #endif #if defined(simde_math_trunc) case SIMDE_MM_FROUND_TO_ZERO: for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_trunc(a_.f64[i]); } break; #endif default: HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_pd()); } return simde__m256d_from_private(r_); } #if defined(SIMDE_X86_AVX_NATIVE) # define simde_mm256_round_pd(a, rounding) _mm256_round_pd(a, rounding) #endif #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_round_pd #define _mm256_round_pd(a, rounding) simde_mm256_round_pd(a, rounding) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_ceil_pd (simde__m256d a) { return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_ceil_pd #define _mm256_ceil_pd(a) simde_mm256_ceil_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_ceil_ps (simde__m256 a) { return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_ceil_ps #define _mm256_ceil_ps(a) simde_mm256_ceil_ps(a) #endif HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL /* This implementation does not support signaling NaNs (yet?) */ SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmp_pd (simde__m128d a, simde__m128d b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { switch (imm8) { case SIMDE_CMP_EQ_OQ: case SIMDE_CMP_EQ_UQ: case SIMDE_CMP_EQ_OS: case SIMDE_CMP_EQ_US: return simde_mm_cmpeq_pd(a, b); break; case SIMDE_CMP_LT_OS: case SIMDE_CMP_NGE_US: case SIMDE_CMP_LT_OQ: case SIMDE_CMP_NGE_UQ: return simde_mm_cmplt_pd(a, b); break; case SIMDE_CMP_LE_OS: case SIMDE_CMP_NGT_US: case SIMDE_CMP_LE_OQ: case SIMDE_CMP_NGT_UQ: return simde_mm_cmple_pd(a, b); break; case SIMDE_CMP_NEQ_UQ: case SIMDE_CMP_NEQ_OQ: case SIMDE_CMP_NEQ_US: case SIMDE_CMP_NEQ_OS: return simde_mm_cmpneq_pd(a, b); break; case SIMDE_CMP_NLT_US: case SIMDE_CMP_GE_OS: case SIMDE_CMP_NLT_UQ: case SIMDE_CMP_GE_OQ: return simde_mm_cmpge_pd(a, b); break; case SIMDE_CMP_NLE_US: case SIMDE_CMP_GT_OS: case SIMDE_CMP_NLE_UQ: case SIMDE_CMP_GT_OQ: return simde_mm_cmpgt_pd(a, b); break; case SIMDE_CMP_FALSE_OQ: case SIMDE_CMP_FALSE_OS: return simde_mm_setzero_pd(); break; case SIMDE_CMP_TRUE_UQ: case SIMDE_CMP_TRUE_US: return simde_x_mm_setone_pd(); break; case SIMDE_CMP_UNORD_Q: case SIMDE_CMP_UNORD_S: return simde_mm_cmpunord_pd(a, b); break; case SIMDE_CMP_ORD_Q: case SIMDE_CMP_ORD_S: return simde_mm_cmpord_pd(a, b); break; } HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_pd()); } #if defined(SIMDE_X86_AVX_NATIVE) && (!defined(__clang__) || !defined(__AVX512F__)) # define simde_mm_cmp_pd(a, b, imm8) _mm_cmp_pd(a, b, imm8) #endif #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm_cmp_pd #define _mm_cmp_pd(a, b, imm8) simde_mm_cmp_pd(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmp_ps (simde__m128 a, simde__m128 b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { switch (imm8) { case SIMDE_CMP_EQ_OQ: case SIMDE_CMP_EQ_UQ: case SIMDE_CMP_EQ_OS: case SIMDE_CMP_EQ_US: return simde_mm_cmpeq_ps(a, b); break; case SIMDE_CMP_LT_OS: case SIMDE_CMP_NGE_US: case SIMDE_CMP_LT_OQ: case SIMDE_CMP_NGE_UQ: return simde_mm_cmplt_ps(a, b); break; case SIMDE_CMP_LE_OS: case SIMDE_CMP_NGT_US: case SIMDE_CMP_LE_OQ: case SIMDE_CMP_NGT_UQ: return simde_mm_cmple_ps(a, b); break; case SIMDE_CMP_NEQ_UQ: case SIMDE_CMP_NEQ_OQ: case SIMDE_CMP_NEQ_US: case SIMDE_CMP_NEQ_OS: return simde_mm_cmpneq_ps(a, b); break; case SIMDE_CMP_NLT_US: case SIMDE_CMP_GE_OS: case SIMDE_CMP_NLT_UQ: case SIMDE_CMP_GE_OQ: return simde_mm_cmpge_ps(a, b); break; case SIMDE_CMP_NLE_US: case SIMDE_CMP_GT_OS: case SIMDE_CMP_NLE_UQ: case SIMDE_CMP_GT_OQ: return simde_mm_cmpgt_ps(a, b); break; case SIMDE_CMP_FALSE_OQ: case SIMDE_CMP_FALSE_OS: return simde_mm_setzero_ps(); break; case SIMDE_CMP_TRUE_UQ: case SIMDE_CMP_TRUE_US: return simde_x_mm_setone_ps(); break; case SIMDE_CMP_UNORD_Q: case SIMDE_CMP_UNORD_S: return simde_mm_cmpunord_ps(a, b); break; case SIMDE_CMP_ORD_Q: case SIMDE_CMP_ORD_S: return simde_mm_cmpord_ps(a, b); break; } HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_ps()); } /* Prior to 9.0 clang has problems with _mm{,256}_cmp_{ps,pd} for all four of the true/false comparisons, but only when AVX-512 is enabled. __FILE_NAME__ was added in 9.0, so that's what we use to check for clang 9 since the version macros are unreliable. */ #if defined(SIMDE_X86_AVX_NATIVE) && (!defined(__clang__) || !defined(__AVX512F__)) # define simde_mm_cmp_ps(a, b, imm8) _mm_cmp_ps(a, b, imm8) #endif #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm_cmp_ps #define _mm_cmp_ps(a, b, imm8) simde_mm_cmp_ps(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmp_sd (simde__m128d a, simde__m128d b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); switch (imm8) { case SIMDE_CMP_EQ_OQ: r_.u64[0] = (a_.f64[0] == b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_LT_OS: r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_LE_OS: r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_UNORD_Q: #if defined(simde_math_isnan) r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_NEQ_UQ: r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_NLT_US: r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_NLE_US: r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_ORD_Q: #if defined(simde_math_isnan) r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_EQ_UQ: r_.u64[0] = (a_.f64[0] == b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_NGE_US: r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_NGT_US: r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_FALSE_OQ: r_.u64[0] = UINT64_C(0); break; case SIMDE_CMP_NEQ_OQ: r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_GE_OS: r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_GT_OS: r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_TRUE_UQ: r_.u64[0] = ~UINT64_C(0); break; case SIMDE_CMP_EQ_OS: r_.u64[0] = (a_.f64[0] == b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_LT_OQ: r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_LE_OQ: r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_UNORD_S: #if defined(simde_math_isnan) r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_NEQ_US: r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_NLT_UQ: r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_NLE_UQ: r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_ORD_S: #if defined(simde_math_isnan) r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? UINT64_C(0) : ~UINT64_C(0); #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_EQ_US: r_.u64[0] = (a_.f64[0] == b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_NGE_UQ: r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_NGT_UQ: r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_FALSE_OS: r_.u64[0] = UINT64_C(0); break; case SIMDE_CMP_NEQ_OS: r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_GE_OQ: r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_GT_OQ: r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_TRUE_US: r_.u64[0] = ~UINT64_C(0); break; } r_.u64[1] = a_.u64[1]; return simde__m128d_from_private(r_); } #if defined(SIMDE_X86_AVX_NATIVE) # define simde_mm_cmp_sd(a, b, imm8) _mm_cmp_sd(a, b, imm8) #endif #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm_cmp_sd #define _mm_cmp_sd(a, b, imm8) simde_mm_cmp_sd(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmp_ss (simde__m128 a, simde__m128 b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); switch (imm8) { case SIMDE_CMP_EQ_OQ: r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_LT_OS: r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_LE_OS: r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_UNORD_Q: #if defined(simde_math_isnanf) r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_NEQ_UQ: r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_NLT_US: r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_NLE_US: r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_ORD_Q: #if defined(simde_math_isnanf) r_.u32[0] = (!simde_math_isnanf(a_.f32[0]) && !simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_EQ_UQ: r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_NGE_US: r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_NGT_US: r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_FALSE_OQ: r_.u32[0] = UINT32_C(0); break; case SIMDE_CMP_NEQ_OQ: r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_GE_OS: r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_GT_OS: r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_TRUE_UQ: r_.u32[0] = ~UINT32_C(0); break; case SIMDE_CMP_EQ_OS: r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_LT_OQ: r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_LE_OQ: r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_UNORD_S: #if defined(simde_math_isnanf) r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_NEQ_US: r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_NLT_UQ: r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_NLE_UQ: r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_ORD_S: #if defined(simde_math_isnanf) r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? UINT32_C(0) : ~UINT32_C(0); #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_EQ_US: r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_NGE_UQ: r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_NGT_UQ: r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_FALSE_OS: r_.u32[0] = UINT32_C(0); break; case SIMDE_CMP_NEQ_OS: r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_GE_OQ: r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_GT_OQ: r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_TRUE_US: r_.u32[0] = ~UINT32_C(0); break; } r_.u32[1] = a_.u32[1]; r_.u32[2] = a_.u32[2]; r_.u32[3] = a_.u32[3]; return simde__m128_from_private(r_); } #if defined(SIMDE_X86_AVX_NATIVE) # define simde_mm_cmp_ss(a, b, imm8) _mm_cmp_ss(a, b, imm8) #endif #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm_cmp_ss #define _mm_cmp_ss(a, b, imm8) simde_mm_cmp_ss(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_cmp_pd (simde__m256d a, simde__m256d b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { simde__m256d_private r_, a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) switch (imm8) { case SIMDE_CMP_EQ_OQ: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); break; case SIMDE_CMP_LT_OS: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); break; case SIMDE_CMP_LE_OS: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); break; case SIMDE_CMP_UNORD_Q: #if defined(simde_math_isnan) for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); } #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_NEQ_UQ: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); break; case SIMDE_CMP_NLT_US: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); break; case SIMDE_CMP_NLE_US: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); break; case SIMDE_CMP_ORD_Q: #if defined(simde_math_isnan) for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); } #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_EQ_UQ: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); break; case SIMDE_CMP_NGE_US: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); break; case SIMDE_CMP_NGT_US: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); break; case SIMDE_CMP_FALSE_OQ: r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); break; case SIMDE_CMP_NEQ_OQ: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); break; case SIMDE_CMP_GE_OS: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); break; case SIMDE_CMP_GT_OS: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); break; case SIMDE_CMP_TRUE_UQ: r_ = simde__m256d_to_private(simde_x_mm256_setone_pd()); break; case SIMDE_CMP_EQ_OS: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); break; case SIMDE_CMP_LT_OQ: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); break; case SIMDE_CMP_LE_OQ: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); break; case SIMDE_CMP_UNORD_S: #if defined(simde_math_isnan) for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); } #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_NEQ_US: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); break; case SIMDE_CMP_NLT_UQ: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); break; case SIMDE_CMP_NLE_UQ: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); break; case SIMDE_CMP_ORD_S: #if defined(simde_math_isnan) for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? UINT64_C(0) : ~UINT64_C(0); } #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_EQ_US: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); break; case SIMDE_CMP_NGE_UQ: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); break; case SIMDE_CMP_NGT_UQ: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); break; case SIMDE_CMP_FALSE_OS: r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); break; case SIMDE_CMP_NEQ_OS: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); break; case SIMDE_CMP_GE_OQ: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); break; case SIMDE_CMP_GT_OQ: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); break; case SIMDE_CMP_TRUE_US: r_ = simde__m256d_to_private(simde_x_mm256_setone_pd()); break; default: HEDLEY_UNREACHABLE(); break; } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { switch (imm8) { case SIMDE_CMP_EQ_OQ: r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_LT_OS: r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_LE_OS: r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_UNORD_Q: r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_NEQ_UQ: r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_NLT_US: r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_NLE_US: r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_ORD_Q: #if defined(simde_math_isnan) r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_EQ_UQ: r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_NGE_US: r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_NGT_US: r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_FALSE_OQ: r_.u64[i] = UINT64_C(0); break; case SIMDE_CMP_NEQ_OQ: r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_GE_OS: r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_GT_OS: r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_TRUE_UQ: r_.u64[i] = ~UINT64_C(0); break; case SIMDE_CMP_EQ_OS: r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_LT_OQ: r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_LE_OQ: r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_UNORD_S: #if defined(simde_math_isnan) r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_NEQ_US: r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_NLT_UQ: r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_NLE_UQ: r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_ORD_S: #if defined(simde_math_isnan) r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? UINT64_C(0) : ~UINT64_C(0); #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_EQ_US: r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_NGE_UQ: r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_NGT_UQ: r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_FALSE_OS: r_.u64[i] = UINT64_C(0); break; case SIMDE_CMP_NEQ_OS: r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_GE_OQ: r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_GT_OQ: r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_TRUE_US: r_.u64[i] = ~UINT64_C(0); break; default: HEDLEY_UNREACHABLE(); break; } } #endif return simde__m256d_from_private(r_); } #if defined(SIMDE_X86_AVX_NATIVE) && (!defined(__clang__) || !defined(__AVX512F__)) # define simde_mm256_cmp_pd(a, b, imm8) _mm256_cmp_pd(a, b, imm8) #endif #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_cmp_pd #define _mm256_cmp_pd(a, b, imm8) simde_mm256_cmp_pd(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_cmp_ps (simde__m256 a, simde__m256 b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { simde__m256_private r_, a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) switch (imm8) { case SIMDE_CMP_EQ_OQ: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); break; case SIMDE_CMP_LT_OS: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); break; case SIMDE_CMP_LE_OS: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); break; case SIMDE_CMP_UNORD_Q: #if defined(simde_math_isnanf) for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); } #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_NEQ_UQ: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); break; case SIMDE_CMP_NLT_US: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); break; case SIMDE_CMP_NLE_US: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); break; case SIMDE_CMP_ORD_Q: #if defined(simde_math_isnanf) for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = (!simde_math_isnanf(a_.f32[i]) && !simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); } #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_EQ_UQ: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); break; case SIMDE_CMP_NGE_US: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); break; case SIMDE_CMP_NGT_US: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); break; case SIMDE_CMP_FALSE_OQ: r_ = simde__m256_to_private(simde_mm256_setzero_ps()); break; case SIMDE_CMP_NEQ_OQ: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); break; case SIMDE_CMP_GE_OS: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); break; case SIMDE_CMP_GT_OS: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); break; case SIMDE_CMP_TRUE_UQ: r_ = simde__m256_to_private(simde_x_mm256_setone_ps()); break; case SIMDE_CMP_EQ_OS: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); break; case SIMDE_CMP_LT_OQ: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); break; case SIMDE_CMP_LE_OQ: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); break; case SIMDE_CMP_UNORD_S: #if defined(simde_math_isnanf) for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); } #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_NEQ_US: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); break; case SIMDE_CMP_NLT_UQ: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); break; case SIMDE_CMP_NLE_UQ: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); break; case SIMDE_CMP_ORD_S: #if defined(simde_math_isnanf) for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); } #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_EQ_US: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); break; case SIMDE_CMP_NGE_UQ: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); break; case SIMDE_CMP_NGT_UQ: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); break; case SIMDE_CMP_FALSE_OS: r_ = simde__m256_to_private(simde_mm256_setzero_ps()); break; case SIMDE_CMP_NEQ_OS: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); break; case SIMDE_CMP_GE_OQ: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); break; case SIMDE_CMP_GT_OQ: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); break; case SIMDE_CMP_TRUE_US: r_ = simde__m256_to_private(simde_x_mm256_setone_ps()); break; default: HEDLEY_UNREACHABLE(); break; } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { switch (imm8) { case SIMDE_CMP_EQ_OQ: r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_LT_OS: r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_LE_OS: r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_UNORD_Q: #if defined(simde_math_isnanf) r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_NEQ_UQ: r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_NLT_US: r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_NLE_US: r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_ORD_Q: #if defined(simde_math_isnanf) r_.u32[i] = (!simde_math_isnanf(a_.f32[i]) && !simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_EQ_UQ: r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_NGE_US: r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_NGT_US: r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_FALSE_OQ: r_.u32[i] = UINT32_C(0); break; case SIMDE_CMP_NEQ_OQ: r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_GE_OS: r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_GT_OS: r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_TRUE_UQ: r_.u32[i] = ~UINT32_C(0); break; case SIMDE_CMP_EQ_OS: r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_LT_OQ: r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_LE_OQ: r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_UNORD_S: #if defined(simde_math_isnanf) r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_NEQ_US: r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_NLT_UQ: r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_NLE_UQ: r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_ORD_S: #if defined(simde_math_isnanf) r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_EQ_US: r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_NGE_UQ: r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_NGT_UQ: r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_FALSE_OS: r_.u32[i] = UINT32_C(0); break; case SIMDE_CMP_NEQ_OS: r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_GE_OQ: r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_GT_OQ: r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_TRUE_US: r_.u32[i] = ~UINT32_C(0); break; default: HEDLEY_UNREACHABLE(); break; } } #endif return simde__m256_from_private(r_); } #if defined(SIMDE_X86_AVX_NATIVE) && (!defined(__clang__) || !defined(__AVX512F__)) # define simde_mm256_cmp_ps(a, b, imm8) _mm256_cmp_ps(a, b, imm8) #endif #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_cmp_ps #define _mm256_cmp_ps(a, b, imm8) simde_mm256_cmp_ps(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_x_mm256_copysign_ps(simde__m256 dest, simde__m256 src) { simde__m256_private r_, dest_ = simde__m256_to_private(dest), src_ = simde__m256_to_private(src); #if defined(simde_math_copysignf) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); } #else simde__m256 sgnbit = simde_mm256_xor_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), simde_mm256_set1_ps(-SIMDE_FLOAT32_C(0.0))); return simde_mm256_xor_ps(simde_mm256_and_ps(sgnbit, src), simde_mm256_andnot_ps(sgnbit, dest)); #endif return simde__m256_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_x_mm256_copysign_pd(simde__m256d dest, simde__m256d src) { simde__m256d_private r_, dest_ = simde__m256d_to_private(dest), src_ = simde__m256d_to_private(src); #if defined(simde_math_copysign) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); } #else simde__m256d sgnbit = simde_mm256_xor_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), simde_mm256_set1_pd(-SIMDE_FLOAT64_C(0.0))); return simde_mm256_xor_pd(simde_mm256_and_pd(sgnbit, src), simde_mm256_andnot_pd(sgnbit, dest)); #endif return simde__m256d_from_private(r_); } HEDLEY_DIAGNOSTIC_POP /* -Wfloat-equal */ SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_cvtepi32_pd (simde__m128i a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_cvtepi32_pd(a); #else simde__m256d_private r_; simde__m128i_private a_ = simde__m128i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = HEDLEY_STATIC_CAST(simde_float64, a_.i32[i]); } return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_cvtepi32_pd #define _mm256_cvtepi32_pd(a) simde_mm256_cvtepi32_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_cvtepi32_ps (simde__m256i a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_cvtepi32_ps(a); #else simde__m256_private r_; simde__m256i_private a_ = simde__m256i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.i32[i]); } return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_cvtepi32_ps #define _mm256_cvtepi32_ps(a) simde_mm256_cvtepi32_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm256_cvtpd_epi32 (simde__m256d a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_cvtpd_epi32(a); #else simde__m128i_private r_; simde__m256d_private a_ = simde__m256d_to_private(a); #if defined(simde_math_nearbyint) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyint(a_.f64[i])); } #else HEDLEY_UNREACHABLE(); #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_cvtpd_epi32 #define _mm256_cvtpd_epi32(a) simde_mm256_cvtpd_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm256_cvtpd_ps (simde__m256d a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_cvtpd_ps(a); #else simde__m128_private r_; simde__m256d_private a_ = simde__m256d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_cvtpd_ps #define _mm256_cvtpd_ps(a) simde_mm256_cvtpd_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_cvtps_epi32 (simde__m256 a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_cvtps_epi32(a); #else simde__m256i_private r_; simde__m256_private a_ = simde__m256_to_private(a); #if defined(simde_math_nearbyintf) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyintf(a_.f32[i])); } #else HEDLEY_UNREACHABLE(); #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_cvtps_epi32 #define _mm256_cvtps_epi32(a) simde_mm256_cvtps_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_cvtps_pd (simde__m128 a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_cvtps_pd(a); #else simde__m256d_private r_; simde__m128_private a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { r_.f64[i] = HEDLEY_STATIC_CAST(double, a_.f32[i]); } return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_cvtps_pd #define _mm256_cvtps_pd(a) simde_mm256_cvtps_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64 simde_mm256_cvtsd_f64 (simde__m256d a) { #if defined(SIMDE_X86_AVX_NATIVE) && ( \ SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_MSVC_VERSION_CHECK(19,14,0)) return _mm256_cvtsd_f64(a); #else simde__m256d_private a_ = simde__m256d_to_private(a); return a_.f64[0]; #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_cvtsd_f64 #define _mm256_cvtsd_f64(a) simde_mm256_cvtsd_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_mm256_cvtsi256_si32 (simde__m256i a) { #if defined(SIMDE_X86_AVX_NATIVE) && ( \ SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_MSVC_VERSION_CHECK(19,14,0)) return _mm256_cvtsi256_si32(a); #else simde__m256i_private a_ = simde__m256i_to_private(a); return a_.i32[0]; #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_cvtsi256_si32 #define _mm256_cvtsi256_si32(a) simde_mm256_cvtsi256_si32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32 simde_mm256_cvtss_f32 (simde__m256 a) { #if defined(SIMDE_X86_AVX_NATIVE) && ( \ SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_MSVC_VERSION_CHECK(19,14,0)) return _mm256_cvtss_f32(a); #else simde__m256_private a_ = simde__m256_to_private(a); return a_.f32[0]; #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_cvtss_f32 #define _mm256_cvtss_f32(a) simde_mm256_cvtss_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm256_cvttpd_epi32 (simde__m256d a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_cvttpd_epi32(a); #else simde__m128i_private r_; simde__m256d_private a_ = simde__m256d_to_private(a); #if defined(simde_math_trunc) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_trunc(a_.f64[i])); } #else HEDLEY_UNREACHABLE(); #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_cvttpd_epi32 #define _mm256_cvttpd_epi32(a) simde_mm256_cvttpd_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_cvttps_epi32 (simde__m256 a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_cvttps_epi32(a); #else simde__m256i_private r_; simde__m256_private a_ = simde__m256_to_private(a); #if defined(simde_math_truncf) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_truncf(a_.f32[i])); } #else HEDLEY_UNREACHABLE(); #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_cvttps_epi32 #define _mm256_cvttps_epi32(a) simde_mm256_cvttps_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_div_ps (simde__m256 a, simde__m256 b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_div_ps(a, b); #else simde__m256_private r_, a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128[0] = simde_mm_div_ps(a_.m128[0], b_.m128[0]); r_.m128[1] = simde_mm_div_ps(a_.m128[1], b_.m128[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f32 = a_.f32 / b_.f32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = a_.f32[i] / b_.f32[i]; } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_div_ps #define _mm256_div_ps(a, b) simde_mm256_div_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_div_pd (simde__m256d a, simde__m256d b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_div_pd(a, b); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128d[0] = simde_mm_div_pd(a_.m128d[0], b_.m128d[0]); r_.m128d[1] = simde_mm_div_pd(a_.m128d[1], b_.m128d[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f64 = a_.f64 / b_.f64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = a_.f64[i] / b_.f64[i]; } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_div_pd #define _mm256_div_pd(a, b) simde_mm256_div_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm256_extractf128_pd (simde__m256d a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { simde__m256d_private a_ = simde__m256d_to_private(a); return a_.m128d[imm8]; } #if defined(SIMDE_X86_AVX_NATIVE) # define simde_mm256_extractf128_pd(a, imm8) _mm256_extractf128_pd(a, imm8) #endif #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_extractf128_pd #define _mm256_extractf128_pd(a, imm8) simde_mm256_extractf128_pd(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm256_extractf128_ps (simde__m256 a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { simde__m256_private a_ = simde__m256_to_private(a); return a_.m128[imm8]; } #if defined(SIMDE_X86_AVX_NATIVE) # define simde_mm256_extractf128_ps(a, imm8) _mm256_extractf128_ps(a, imm8) #endif #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_extractf128_ps #define _mm256_extractf128_ps(a, imm8) simde_mm256_extractf128_ps(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm256_extractf128_si256 (simde__m256i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { simde__m256i_private a_ = simde__m256i_to_private(a); return a_.m128i[imm8]; } #if defined(SIMDE_X86_AVX_NATIVE) # define simde_mm256_extractf128_si256(a, imm8) _mm256_extractf128_si256(a, imm8) #endif #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_extractf128_si256 #define _mm256_extractf128_si256(a, imm8) simde_mm256_extractf128_si256(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_floor_pd (simde__m256d a) { return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_floor_pd #define _mm256_floor_pd(a) simde_mm256_floor_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_floor_ps (simde__m256 a) { return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_floor_ps #define _mm256_floor_ps(a) simde_mm256_floor_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_insert_epi8 (simde__m256i a, int8_t i, const int index) SIMDE_REQUIRE_RANGE(index, 0, 31) { simde__m256i_private a_ = simde__m256i_to_private(a); a_.i8[index] = i; return simde__m256i_from_private(a_); } #if defined(SIMDE_X86_AVX_NATIVE) #define simde_mm256_insert_epi8(a, i, index) _mm256_insert_epi8(a, i, index) #endif #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_insert_epi8 #define _mm256_insert_epi8(a, i, index) simde_mm256_insert_epi8(a, i, index) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_insert_epi16 (simde__m256i a, int16_t i, const int index) SIMDE_REQUIRE_RANGE(index, 0, 15) { simde__m256i_private a_ = simde__m256i_to_private(a); a_.i16[index] = i; return simde__m256i_from_private(a_); } #if defined(SIMDE_X86_AVX_NATIVE) #define simde_mm256_insert_epi16(a, i, index) _mm256_insert_epi16(a, i, index) #endif #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_insert_epi16 #define _mm256_insert_epi16(a, i, imm8) simde_mm256_insert_epi16(a, i, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_insert_epi32 (simde__m256i a, int32_t i, const int index) SIMDE_REQUIRE_RANGE(index, 0, 7) { simde__m256i_private a_ = simde__m256i_to_private(a); a_.i32[index] = i; return simde__m256i_from_private(a_); } #if defined(SIMDE_X86_AVX_NATIVE) #define simde_mm256_insert_epi32(a, i, index) _mm256_insert_epi32(a, i, index) #endif #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_insert_epi32 #define _mm256_insert_epi32(a, i, index) simde_mm256_insert_epi32(a, i, index) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_insert_epi64 (simde__m256i a, int64_t i, const int index) SIMDE_REQUIRE_RANGE(index, 0, 3) { simde__m256i_private a_ = simde__m256i_to_private(a); a_.i64[index] = i; return simde__m256i_from_private(a_); } #if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \ (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) && \ SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) #define simde_mm256_insert_epi64(a, i, index) _mm256_insert_epi64(a, i, index) #endif #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_insert_epi64 #define _mm256_insert_epi64(a, i, index) simde_mm256_insert_epi64(a, i, index) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_insertf128_pd(simde__m256d a, simde__m128d b, int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { simde__m256d_private a_ = simde__m256d_to_private(a); simde__m128d_private b_ = simde__m128d_to_private(b); a_.m128d_private[imm8] = b_; return simde__m256d_from_private(a_); } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_insertf128_pd #define _mm256_insertf128_pd(a, b, imm8) simde_mm256_insertf128_pd(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_insertf128_ps(simde__m256 a, simde__m128 b, int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { simde__m256_private a_ = simde__m256_to_private(a); simde__m128_private b_ = simde__m128_to_private(b); a_.m128_private[imm8] = b_; return simde__m256_from_private(a_); } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_insertf128_ps #define _mm256_insertf128_ps(a, b, imm8) simde_mm256_insertf128_ps(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_insertf128_si256(simde__m256i a, simde__m128i b, int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { simde__m256i_private a_ = simde__m256i_to_private(a); simde__m128i_private b_ = simde__m128i_to_private(b); a_.m128i_private[imm8] = b_; return simde__m256i_from_private(a_); } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_insertf128_si256 #define _mm256_insertf128_si256(a, b, imm8) simde_mm256_insertf128_si256(a, b, imm8) #endif #if defined(SIMDE_X86_AVX_NATIVE) # define simde_mm256_dp_ps(a, b, imm8) _mm256_dp_ps(a, b, imm8) #else # define simde_mm256_dp_ps(a, b, imm8) \ simde_mm256_set_m128( \ simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), imm8), \ simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), imm8)) #endif #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_dp_ps #define _mm256_dp_ps(a, b, imm8) simde_mm256_dp_ps(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_mm256_extract_epi32 (simde__m256i a, const int index) SIMDE_REQUIRE_RANGE(index, 0, 7) { simde__m256i_private a_ = simde__m256i_to_private(a); return a_.i32[index]; } #if defined(SIMDE_X86_AVX_NATIVE) #define simde_mm256_extract_epi32(a, index) _mm256_extract_epi32(a, index) #endif #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_extract_epi32 #define _mm256_extract_epi32(a, index) simde_mm256_extract_epi32(a, index) #endif SIMDE_FUNCTION_ATTRIBUTES int64_t simde_mm256_extract_epi64 (simde__m256i a, const int index) SIMDE_REQUIRE_RANGE(index, 0, 3) { simde__m256i_private a_ = simde__m256i_to_private(a); return a_.i64[index]; } #if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) #if !defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0) #define simde_mm256_extract_epi64(a, index) _mm256_extract_epi64(a, index) #endif #endif #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_extract_epi64 #define _mm256_extract_epi64(a, index) simde_mm256_extract_epi64(a, index) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_lddqu_si256 (simde__m256i const * mem_addr) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_loadu_si256(mem_addr); #else simde__m256i r; simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); return r; #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_lddqu_si256 #define _mm256_lddqu_si256(a) simde_mm256_lddqu_si256(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_load_pd (const double mem_addr[HEDLEY_ARRAY_PARAM(4)]) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_load_pd(mem_addr); #else simde__m256d r; simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), sizeof(r)); return r; #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_load_pd #define _mm256_load_pd(a) simde_mm256_load_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_load_ps (const float mem_addr[HEDLEY_ARRAY_PARAM(8)]) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_load_ps(mem_addr); #else simde__m256 r; simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), sizeof(r)); return r; #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_load_ps #define _mm256_load_ps(a) simde_mm256_load_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_load_si256 (simde__m256i const * mem_addr) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_load_si256(mem_addr); #else simde__m256i r; simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); return r; #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_load_si256 #define _mm256_load_si256(a) simde_mm256_load_si256(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_loadu_pd (const double a[HEDLEY_ARRAY_PARAM(4)]) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_loadu_pd(a); #else simde__m256d r; simde_memcpy(&r, a, sizeof(r)); return r; #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_loadu_pd #define _mm256_loadu_pd(a) simde_mm256_loadu_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_loadu_ps (const float a[HEDLEY_ARRAY_PARAM(8)]) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_loadu_ps(a); #else simde__m256 r; simde_memcpy(&r, a, sizeof(r)); return r; #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_loadu_ps #define _mm256_loadu_ps(a) simde_mm256_loadu_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_loadu_epi8(void const * mem_addr) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_BUG_GCC_95483) return _mm256_loadu_epi8(mem_addr); #elif defined(SIMDE_X86_AVX_NATIVE) return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); #else simde__m256i r; simde_memcpy(&r, mem_addr, sizeof(r)); return r; #endif } #define simde_x_mm256_loadu_epi8(mem_addr) simde_mm256_loadu_epi8(mem_addr) #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && defined(SIMDE_BUG_GCC_95483)) #undef _mm256_loadu_epi8 #define _mm256_loadu_epi8(a) simde_mm256_loadu_epi8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_loadu_epi16(void const * mem_addr) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_BUG_GCC_95483) return _mm256_loadu_epi16(mem_addr); #elif defined(SIMDE_X86_AVX_NATIVE) return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); #else simde__m256i r; simde_memcpy(&r, mem_addr, sizeof(r)); return r; #endif } #define simde_x_mm256_loadu_epi16(mem_addr) simde_mm256_loadu_epi16(mem_addr) #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && defined(SIMDE_BUG_GCC_95483)) #undef _mm256_loadu_epi16 #define _mm256_loadu_epi16(a) simde_mm256_loadu_epi16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_loadu_epi32(void const * mem_addr) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) return _mm256_loadu_epi32(mem_addr); #elif defined(SIMDE_X86_AVX_NATIVE) return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); #else simde__m256i r; simde_memcpy(&r, mem_addr, sizeof(r)); return r; #endif } #define simde_x_mm256_loadu_epi32(mem_addr) simde_mm256_loadu_epi32(mem_addr) #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && defined(SIMDE_BUG_GCC_95483)) #undef _mm256_loadu_epi32 #define _mm256_loadu_epi32(a) simde_mm256_loadu_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_loadu_epi64(void const * mem_addr) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) return _mm256_loadu_epi64(mem_addr); #elif defined(SIMDE_X86_AVX_NATIVE) return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); #else simde__m256i r; simde_memcpy(&r, mem_addr, sizeof(r)); return r; #endif } #define simde_x_mm256_loadu_epi64(mem_addr) simde_mm256_loadu_epi64(mem_addr) #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && defined(SIMDE_BUG_GCC_95483)) #undef _mm256_loadu_epi64 #define _mm256_loadu_epi64(a) simde_mm256_loadu_epi64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_loadu_si256 (void const * mem_addr) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_loadu_si256(SIMDE_ALIGN_CAST(const __m256i*, mem_addr)); #else simde__m256i r; simde_memcpy(&r, mem_addr, sizeof(r)); return r; #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_loadu_si256 #define _mm256_loadu_si256(mem_addr) simde_mm256_loadu_si256(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_loadu2_m128 (const float hiaddr[HEDLEY_ARRAY_PARAM(4)], const float loaddr[HEDLEY_ARRAY_PARAM(4)]) { #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) return _mm256_loadu2_m128(hiaddr, loaddr); #else return simde_mm256_insertf128_ps(simde_mm256_castps128_ps256(simde_mm_loadu_ps(loaddr)), simde_mm_loadu_ps(hiaddr), 1); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_loadu2_m128 #define _mm256_loadu2_m128(hiaddr, loaddr) simde_mm256_loadu2_m128(hiaddr, loaddr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_loadu2_m128d (const double hiaddr[HEDLEY_ARRAY_PARAM(2)], const double loaddr[HEDLEY_ARRAY_PARAM(2)]) { #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) return _mm256_loadu2_m128d(hiaddr, loaddr); #else return simde_mm256_insertf128_pd(simde_mm256_castpd128_pd256(simde_mm_loadu_pd(loaddr)), simde_mm_loadu_pd(hiaddr), 1); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_loadu2_m128d #define _mm256_loadu2_m128d(hiaddr, loaddr) simde_mm256_loadu2_m128d(hiaddr, loaddr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_loadu2_m128i (const simde__m128i* hiaddr, const simde__m128i* loaddr) { #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) return _mm256_loadu2_m128i(hiaddr, loaddr); #else return simde_mm256_insertf128_si256(simde_mm256_castsi128_si256(simde_mm_loadu_si128(loaddr)), simde_mm_loadu_si128(hiaddr), 1); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_loadu2_m128i #define _mm256_loadu2_m128i(hiaddr, loaddr) simde_mm256_loadu2_m128i(hiaddr, loaddr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm_maskload_pd(mem_addr, mask); #else simde__m128d_private mem_ = simde__m128d_to_private(simde_mm_loadu_pd(mem_addr)), r_; simde__m128i_private mask_ = simde__m128i_to_private(mask); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vandq_s64(mem_.neon_i64, vshrq_n_s64(mask_.neon_i64, 63)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.i64[i] = mem_.i64[i] & (mask_.i64[i] >> 63); } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm_maskload_pd #define _mm_maskload_pd(mem_addr, mask) simde_mm_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_maskload_pd(mem_addr, mask); #else simde__m256d_private r_; simde__m256i_private mask_ = simde__m256i_to_private(mask); r_ = simde__m256d_to_private(simde_mm256_loadu_pd(mem_addr)); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.i64[i] &= mask_.i64[i] >> 63; } return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_maskload_pd #define _mm256_maskload_pd(mem_addr, mask) simde_mm256_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm_maskload_ps(mem_addr, mask); #else simde__m128_private mem_ = simde__m128_to_private(simde_mm_loadu_ps(mem_addr)), r_; simde__m128i_private mask_ = simde__m128i_to_private(mask); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vandq_s32(mem_.neon_i32, vshrq_n_s32(mask_.neon_i32, 31)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = mem_.i32[i] & (mask_.i32[i] >> 31); } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm_maskload_ps #define _mm_maskload_ps(mem_addr, mask) simde_mm_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_maskload_ps(mem_addr, mask); #else simde__m256_private r_; simde__m256i_private mask_ = simde__m256i_to_private(mask); r_ = simde__m256_to_private(simde_mm256_loadu_ps(mem_addr)); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.i32[i] &= mask_.i32[i] >> 31; } return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_maskload_ps #define _mm256_maskload_ps(mem_addr, mask) simde_mm256_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128d a) { #if defined(SIMDE_X86_AVX_NATIVE) _mm_maskstore_pd(mem_addr, mask, a); #else simde__m128i_private mask_ = simde__m128i_to_private(mask); simde__m128d_private a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { if (mask_.u64[i] >> 63) mem_addr[i] = a_.f64[i]; } #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm_maskstore_pd #define _mm_maskstore_pd(mem_addr, mask, a) simde_mm_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm256_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256d a) { #if defined(SIMDE_X86_AVX_NATIVE) _mm256_maskstore_pd(mem_addr, mask, a); #else simde__m256i_private mask_ = simde__m256i_to_private(mask); simde__m256d_private a_ = simde__m256d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { if (mask_.u64[i] & (UINT64_C(1) << 63)) mem_addr[i] = a_.f64[i]; } #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_maskstore_pd #define _mm256_maskstore_pd(mem_addr, mask, a) simde_mm256_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128 a) { #if defined(SIMDE_X86_AVX_NATIVE) _mm_maskstore_ps(mem_addr, mask, a); #else simde__m128i_private mask_ = simde__m128i_to_private(mask); simde__m128_private a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { if (mask_.u32[i] & (UINT32_C(1) << 31)) mem_addr[i] = a_.f32[i]; } #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm_maskstore_ps #define _mm_maskstore_ps(mem_addr, mask, a) simde_mm_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm256_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256 a) { #if defined(SIMDE_X86_AVX_NATIVE) _mm256_maskstore_ps(mem_addr, mask, a); #else simde__m256i_private mask_ = simde__m256i_to_private(mask); simde__m256_private a_ = simde__m256_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { if (mask_.u32[i] & (UINT32_C(1) << 31)) mem_addr[i] = a_.f32[i]; } #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_maskstore_ps #define _mm256_maskstore_ps(mem_addr, mask, a) simde_mm256_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_min_ps (simde__m256 a, simde__m256 b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_min_ps(a, b); #else simde__m256_private r_, a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128[0] = simde_mm_min_ps(a_.m128[0], b_.m128[0]); r_.m128[1] = simde_mm_min_ps(a_.m128[1], b_.m128[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_min_ps #define _mm256_min_ps(a, b) simde_mm256_min_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_min_pd (simde__m256d a, simde__m256d b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_min_pd(a, b); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128d[0] = simde_mm_min_pd(a_.m128d[0], b_.m128d[0]); r_.m128d[1] = simde_mm_min_pd(a_.m128d[1], b_.m128d[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_min_pd #define _mm256_min_pd(a, b) simde_mm256_min_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_max_ps (simde__m256 a, simde__m256 b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_max_ps(a, b); #else simde__m256_private r_, a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128[0] = simde_mm_max_ps(a_.m128[0], b_.m128[0]); r_.m128[1] = simde_mm_max_ps(a_.m128[1], b_.m128[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_max_ps #define _mm256_max_ps(a, b) simde_mm256_max_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_max_pd (simde__m256d a, simde__m256d b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_max_pd(a, b); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128d[0] = simde_mm_max_pd(a_.m128d[0], b_.m128d[0]); r_.m128d[1] = simde_mm_max_pd(a_.m128d[1], b_.m128d[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_max_pd #define _mm256_max_pd(a, b) simde_mm256_max_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_movedup_pd (simde__m256d a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_movedup_pd(a); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, a_.f64, 0, 0, 2, 2); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { r_.f64[i] = r_.f64[i + 1] = a_.f64[i]; } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_movedup_pd #define _mm256_movedup_pd(a) simde_mm256_movedup_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_movehdup_ps (simde__m256 a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_movehdup_ps(a); #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 1, 1, 3, 3, 5, 5, 7, 7); #else SIMDE_VECTORIZE for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { r_.f32[i - 1] = r_.f32[i] = a_.f32[i]; } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_movehdup_ps #define _mm256_movehdup_ps(a) simde_mm256_movehdup_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_moveldup_ps (simde__m256 a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_moveldup_ps(a); #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 0, 0, 2, 2, 4, 4, 6, 6); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { r_.f32[i] = r_.f32[i + 1] = a_.f32[i]; } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_moveldup_ps #define _mm256_moveldup_ps(a) simde_mm256_moveldup_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm256_movemask_ps (simde__m256 a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_movemask_ps(a); #else simde__m256_private a_ = simde__m256_to_private(a); int r = 0; SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { r |= (a_.u32[i] >> 31) << i; } return r; #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_movemask_ps #define _mm256_movemask_ps(a) simde_mm256_movemask_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm256_movemask_pd (simde__m256d a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_movemask_pd(a); #else simde__m256d_private a_ = simde__m256d_to_private(a); int r = 0; SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { r |= (a_.u64[i] >> 63) << i; } return r; #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_movemask_pd #define _mm256_movemask_pd(a) simde_mm256_movemask_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_mul_ps (simde__m256 a, simde__m256 b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_mul_ps(a, b); #else simde__m256_private r_, a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128[0] = simde_mm_mul_ps(a_.m128[0], b_.m128[0]); r_.m128[1] = simde_mm_mul_ps(a_.m128[1], b_.m128[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f32 = a_.f32 * b_.f32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = a_.f32[i] * b_.f32[i]; } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_mul_ps #define _mm256_mul_ps(a, b) simde_mm256_mul_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_mul_pd (simde__m256d a, simde__m256d b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_mul_pd(a, b); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128d[0] = simde_mm_mul_pd(a_.m128d[0], b_.m128d[0]); r_.m128d[1] = simde_mm_mul_pd(a_.m128d[1], b_.m128d[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f64 = a_.f64 * b_.f64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = a_.f64[i] * b_.f64[i]; } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_mul_pd #define _mm256_mul_pd(a, b) simde_mm256_mul_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_or_ps (simde__m256 a, simde__m256 b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_or_ps(a, b); #else simde__m256_private r_, a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128[0] = simde_mm_or_ps(a_.m128[0], b_.m128[0]); r_.m128[1] = simde_mm_or_ps(a_.m128[1], b_.m128[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f | b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i] | b_.u32[i]; } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_or_ps #define _mm256_or_ps(a, b) simde_mm256_or_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_or_pd (simde__m256d a, simde__m256d b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_or_pd(a, b); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128d[0] = simde_mm_or_pd(a_.m128d[0], b_.m128d[0]); r_.m128d[1] = simde_mm_or_pd(a_.m128d[1], b_.m128d[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f | b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = a_.u64[i] | b_.u64[i]; } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_or_pd #define _mm256_or_pd(a, b) simde_mm256_or_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_permute_ps (simde__m256 a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m256_private r_, a_ = simde__m256_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = a_.m128_private[i >> 2].f32[(imm8 >> ((i << 1) & 7)) & 3]; } return simde__m256_from_private(r_); } #if defined(SIMDE_X86_AVX_NATIVE) # define simde_mm256_permute_ps(a, imm8) _mm256_permute_ps(a, imm8) #endif #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_permute_ps #define _mm256_permute_ps(a, imm8) simde_mm256_permute_ps(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_permute_pd (simde__m256d a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { simde__m256d_private r_, a_ = simde__m256d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; } return simde__m256d_from_private(r_); } #if defined(SIMDE_X86_AVX_NATIVE) # define simde_mm256_permute_pd(a, imm8) _mm256_permute_pd(a, imm8) #endif #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_permute_pd #define _mm256_permute_pd(a, imm8) simde_mm256_permute_pd(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_permute_ps (simde__m128 a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = a_.f32[(imm8 >> ((i << 1) & 7)) & 3]; } return simde__m128_from_private(r_); } #if defined(SIMDE_X86_AVX_NATIVE) # define simde_mm_permute_ps(a, imm8) _mm_permute_ps(a, imm8) #endif #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm_permute_ps #define _mm_permute_ps(a, imm8) simde_mm_permute_ps(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_permute_pd (simde__m128d a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; } return simde__m128d_from_private(r_); } #if defined(SIMDE_X86_AVX_NATIVE) # define simde_mm_permute_pd(a, imm8) _mm_permute_pd(a, imm8) #endif #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm_permute_pd #define _mm_permute_pd(a, imm8) simde_mm_permute_pd(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_permutevar_ps (simde__m128 a, simde__m128i b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm_permutevar_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a); simde__m128i_private b_ = simde__m128i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = a_.f32[b_.i32[i] & 3]; } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm_permutevar_ps #define _mm_permutevar_ps(a, b) simde_mm_permutevar_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_permutevar_pd (simde__m128d a, simde__m128i b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm_permutevar_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); simde__m128i_private b_ = simde__m128i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = a_.f64[(b_.i64[i] & 2) >> 1]; } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm_permutevar_pd #define _mm_permutevar_pd(a, b) simde_mm_permutevar_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_permutevar_ps (simde__m256 a, simde__m256i b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_permutevar_ps(a, b); #else simde__m256_private r_, a_ = simde__m256_to_private(a); simde__m256i_private b_ = simde__m256i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = a_.f32[(b_.i32[i] & 3) + (i & 4)]; } return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_permutevar_ps #define _mm256_permutevar_ps(a, b) simde_mm256_permutevar_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_permutevar_pd (simde__m256d a, simde__m256i b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_permutevar_pd(a, b); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); simde__m256i_private b_ = simde__m256i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = a_.f64[((b_.i64[i] & 2) >> 1) + (i & 2)]; } return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_permutevar_pd #define _mm256_permutevar_pd(a, b) simde_mm256_permutevar_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_permute2f128_ps (simde__m256 a, simde__m256 b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m256_private r_, a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b); r_.m128_private[0] = (imm8 & 0x08) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x02) ? b_.m128_private[(imm8 ) & 1] : a_.m128_private[(imm8 ) & 1]); r_.m128_private[1] = (imm8 & 0x80) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x20) ? b_.m128_private[(imm8 >> 4) & 1] : a_.m128_private[(imm8 >> 4) & 1]); return simde__m256_from_private(r_); } #if defined(SIMDE_X86_AVX_NATIVE) # define simde_mm256_permute2f128_ps(a, b, imm8) _mm256_permute2f128_ps(a, b, imm8) #endif #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_permute2f128_ps #define _mm256_permute2f128_ps(a, b, imm8) simde_mm256_permute2f128_ps(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_permute2f128_pd (simde__m256d a, simde__m256d b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m256d_private r_, a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b); r_.m128d_private[0] = (imm8 & 0x08) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x02) ? b_.m128d_private[(imm8 ) & 1] : a_.m128d_private[(imm8 ) & 1]); r_.m128d_private[1] = (imm8 & 0x80) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x20) ? b_.m128d_private[(imm8 >> 4) & 1] : a_.m128d_private[(imm8 >> 4) & 1]); return simde__m256d_from_private(r_); } #if defined(SIMDE_X86_AVX_NATIVE) # define simde_mm256_permute2f128_pd(a, b, imm8) _mm256_permute2f128_pd(a, b, imm8) #endif #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_permute2f128_pd #define _mm256_permute2f128_pd(a, b, imm8) simde_mm256_permute2f128_pd(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_permute2f128_si256 (simde__m256i a, simde__m256i b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX_NATIVE) # define simde_mm256_permute2f128_si128(a, b, imm8) _mm256_permute2f128_si128(a, b, imm8) #endif #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_permute2f128_si256 #define _mm256_permute2f128_si256(a, b, imm8) simde_mm256_permute2f128_si256(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_rcp_ps (simde__m256 a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_rcp_ps(a); #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128[0] = simde_mm_rcp_ps(a_.m128[0]); r_.m128[1] = simde_mm_rcp_ps(a_.m128[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = SIMDE_FLOAT32_C(1.0) / a_.f32[i]; } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_rcp_ps #define _mm256_rcp_ps(a) simde_mm256_rcp_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_rsqrt_ps (simde__m256 a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_rsqrt_ps(a); #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if defined(simde_math_sqrtf) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); } #else HEDLEY_UNREACHABLE(); #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_rsqrt_ps #define _mm256_rsqrt_ps(a) simde_mm256_rsqrt_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_setr_epi8 ( int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_setr_epi8( e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0); #else return simde_mm256_set_epi8( e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_setr_epi8 #define _mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ simde_mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_setr_epi16 ( int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_setr_epi16( e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0); #else return simde_mm256_set_epi16( e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_setr_epi16 #define _mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ simde_mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_setr_epi32 ( int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0); #else return simde_mm256_set_epi32(e0, e1, e2, e3, e4, e5, e6, e7); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_setr_epi32 #define _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ simde_mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_setr_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_setr_epi64x(e3, e2, e1, e0); #else return simde_mm256_set_epi64x(e0, e1, e2, e3); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_setr_epi64x #define _mm256_setr_epi64x(e3, e2, e1, e0) \ simde_mm256_setr_epi64x(e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_setr_ps ( simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0); #else return simde_mm256_set_ps(e0, e1, e2, e3, e4, e5, e6, e7); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_setr_ps #define _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ simde_mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_setr_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_setr_pd(e3, e2, e1, e0); #else return simde_mm256_set_pd(e0, e1, e2, e3); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_setr_pd #define _mm256_setr_pd(e3, e2, e1, e0) \ simde_mm256_setr_pd(e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_setr_m128 (simde__m128 lo, simde__m128 hi) { #if defined(SIMDE_X86_AVX_NATIVE) && \ !defined(SIMDE_BUG_GCC_REV_247851) && \ SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) return _mm256_setr_m128(lo, hi); #else return simde_mm256_set_m128(hi, lo); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_setr_m128 #define _mm256_setr_m128(lo, hi) \ simde_mm256_setr_m128(lo, hi) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_setr_m128d (simde__m128d lo, simde__m128d hi) { #if defined(SIMDE_X86_AVX_NATIVE) && \ !defined(SIMDE_BUG_GCC_REV_247851) && \ SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) return _mm256_setr_m128d(lo, hi); #else return simde_mm256_set_m128d(hi, lo); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_setr_m128d #define _mm256_setr_m128d(lo, hi) \ simde_mm256_setr_m128d(lo, hi) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_setr_m128i (simde__m128i lo, simde__m128i hi) { #if defined(SIMDE_X86_AVX_NATIVE) && \ !defined(SIMDE_BUG_GCC_REV_247851) && \ SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) return _mm256_setr_m128i(lo, hi); #else return simde_mm256_set_m128i(hi, lo); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_setr_m128i #define _mm256_setr_m128i(lo, hi) \ simde_mm256_setr_m128i(lo, hi) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_shuffle_ps (simde__m256 a, simde__m256 b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m256_private r_, a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b); r_.f32[0] = a_.m128_private[0].f32[(imm8 >> 0) & 3]; r_.f32[1] = a_.m128_private[0].f32[(imm8 >> 2) & 3]; r_.f32[2] = b_.m128_private[0].f32[(imm8 >> 4) & 3]; r_.f32[3] = b_.m128_private[0].f32[(imm8 >> 6) & 3]; r_.f32[4] = a_.m128_private[1].f32[(imm8 >> 0) & 3]; r_.f32[5] = a_.m128_private[1].f32[(imm8 >> 2) & 3]; r_.f32[6] = b_.m128_private[1].f32[(imm8 >> 4) & 3]; r_.f32[7] = b_.m128_private[1].f32[(imm8 >> 6) & 3]; return simde__m256_from_private(r_); } #if defined(SIMDE_X86_AVX_NATIVE) #define simde_mm256_shuffle_ps(a, b, imm8) _mm256_shuffle_ps(a, b, imm8) #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) #define simde_mm256_shuffle_ps(a, b, imm8) \ simde_mm256_set_m128( \ simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8)), \ simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8))) #elif defined(SIMDE_SHUFFLE_VECTOR_) #define simde_mm256_shuffle_ps(a, b, imm8) \ SIMDE_SHUFFLE_VECTOR_(32, 32, a, b, \ (((imm8) >> 0) & 3) + 0, \ (((imm8) >> 2) & 3) + 0, \ (((imm8) >> 4) & 3) + 8, \ (((imm8) >> 6) & 3) + 8, \ (((imm8) >> 0) & 3) + 4, \ (((imm8) >> 2) & 3) + 4, \ (((imm8) >> 4) & 3) + 12, \ (((imm8) >> 6) & 3) + 12) #endif #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_shuffle_ps #define _mm256_shuffle_ps(a, b, imm8) simde_mm256_shuffle_ps(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_shuffle_pd (simde__m256d a, simde__m256d b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { simde__m256d_private r_, a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b); r_.f64[0] = a_.f64[((imm8 ) & 1) ]; r_.f64[1] = b_.f64[((imm8 >> 1) & 1) ]; r_.f64[2] = a_.f64[((imm8 >> 2) & 1) | 2]; r_.f64[3] = b_.f64[((imm8 >> 3) & 1) | 2]; return simde__m256d_from_private(r_); } #if defined(SIMDE_X86_AVX_NATIVE) #define simde_mm256_shuffle_pd(a, b, imm8) _mm256_shuffle_pd(a, b, imm8) #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) #define simde_mm256_shuffle_pd(a, b, imm8) \ simde_mm256_set_m128d( \ simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8 >> 0) & 3), \ simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8 >> 2) & 3)) #elif defined(SIMDE_SHUFFLE_VECTOR_) #define simde_mm256_shuffle_pd(a, b, imm8) \ SIMDE_SHUFFLE_VECTOR_(64, 32, a, b, \ (((imm8) >> 0) & 1) + 0, \ (((imm8) >> 1) & 1) + 4, \ (((imm8) >> 2) & 1) + 2, \ (((imm8) >> 3) & 1) + 6) #endif #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_shuffle_pd #define _mm256_shuffle_pd(a, b, imm8) simde_mm256_shuffle_pd(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_sqrt_ps (simde__m256 a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_sqrt_ps(a); #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128[0] = simde_mm_sqrt_ps(a_.m128[0]); r_.m128[1] = simde_mm_sqrt_ps(a_.m128[1]); #elif defined(simde_math_sqrtf) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_sqrtf(a_.f32[i]); } #else HEDLEY_UNREACHABLE(); #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_sqrt_ps #define _mm256_sqrt_ps(a) simde_mm256_sqrt_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_sqrt_pd (simde__m256d a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_sqrt_pd(a); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128d[0] = simde_mm_sqrt_pd(a_.m128d[0]); r_.m128d[1] = simde_mm_sqrt_pd(a_.m128d[1]); #elif defined(simde_math_sqrt) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_sqrt(a_.f64[i]); } #else HEDLEY_UNREACHABLE(); #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_sqrt_pd #define _mm256_sqrt_pd(a) simde_mm256_sqrt_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm256_store_ps (simde_float32 mem_addr[8], simde__m256 a) { #if defined(SIMDE_X86_AVX_NATIVE) _mm256_store_ps(mem_addr, a); #else simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_store_ps #define _mm256_store_ps(mem_addr, a) simde_mm256_store_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm256_store_pd (simde_float64 mem_addr[4], simde__m256d a) { #if defined(SIMDE_X86_AVX_NATIVE) _mm256_store_pd(mem_addr, a); #else simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_store_pd #define _mm256_store_pd(mem_addr, a) simde_mm256_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm256_store_si256 (simde__m256i* mem_addr, simde__m256i a) { #if defined(SIMDE_X86_AVX_NATIVE) _mm256_store_si256(mem_addr, a); #else simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_store_si256 #define _mm256_store_si256(mem_addr, a) simde_mm256_store_si256(mem_addr, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm256_storeu_ps (simde_float32 mem_addr[8], simde__m256 a) { #if defined(SIMDE_X86_AVX_NATIVE) _mm256_storeu_ps(mem_addr, a); #else simde_memcpy(mem_addr, &a, sizeof(a)); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_storeu_ps #define _mm256_storeu_ps(mem_addr, a) simde_mm256_storeu_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm256_storeu_pd (simde_float64 mem_addr[4], simde__m256d a) { #if defined(SIMDE_X86_AVX_NATIVE) _mm256_storeu_pd(mem_addr, a); #else simde_memcpy(mem_addr, &a, sizeof(a)); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_storeu_pd #define _mm256_storeu_pd(mem_addr, a) simde_mm256_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm256_storeu_si256 (void* mem_addr, simde__m256i a) { #if defined(SIMDE_X86_AVX_NATIVE) _mm256_storeu_si256(SIMDE_ALIGN_CAST(__m256i*, mem_addr), a); #else simde_memcpy(mem_addr, &a, sizeof(a)); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_storeu_si256 #define _mm256_storeu_si256(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm256_storeu2_m128 (simde_float32 hi_addr[4], simde_float32 lo_addr[4], simde__m256 a) { #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) _mm256_storeu2_m128(hi_addr, lo_addr, a); #else simde_mm_storeu_ps(lo_addr, simde_mm256_castps256_ps128(a)); simde_mm_storeu_ps(hi_addr, simde_mm256_extractf128_ps(a, 1)); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_storeu2_m128 #define _mm256_storeu2_m128(hi_addr, lo_addr, a) simde_mm256_storeu2_m128(hi_addr, lo_addr, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm256_storeu2_m128d (simde_float64 hi_addr[2], simde_float64 lo_addr[2], simde__m256d a) { #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) _mm256_storeu2_m128d(hi_addr, lo_addr, a); #else simde_mm_storeu_pd(lo_addr, simde_mm256_castpd256_pd128(a)); simde_mm_storeu_pd(hi_addr, simde_mm256_extractf128_pd(a, 1)); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_storeu2_m128d #define _mm256_storeu2_m128d(hi_addr, lo_addr, a) simde_mm256_storeu2_m128d(hi_addr, lo_addr, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm256_storeu2_m128i (simde__m128i* hi_addr, simde__m128i* lo_addr, simde__m256i a) { #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) _mm256_storeu2_m128i(hi_addr, lo_addr, a); #else simde_mm_storeu_si128(lo_addr, simde_mm256_castsi256_si128(a)); simde_mm_storeu_si128(hi_addr, simde_mm256_extractf128_si256(a, 1)); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_storeu2_m128i #define _mm256_storeu2_m128i(hi_addr, lo_addr, a) simde_mm256_storeu2_m128i(hi_addr, lo_addr, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm256_stream_ps (simde_float32 mem_addr[8], simde__m256 a) { #if defined(SIMDE_X86_AVX_NATIVE) _mm256_stream_ps(mem_addr, a); #else simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_stream_ps #define _mm256_stream_ps(mem_addr, a) simde_mm256_stream_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm256_stream_pd (simde_float64 mem_addr[4], simde__m256d a) { #if defined(SIMDE_X86_AVX_NATIVE) _mm256_stream_pd(mem_addr, a); #else simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_stream_pd #define _mm256_stream_pd(mem_addr, a) simde_mm256_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm256_stream_si256 (simde__m256i* mem_addr, simde__m256i a) { #if defined(SIMDE_X86_AVX_NATIVE) _mm256_stream_si256(mem_addr, a); #else simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_stream_si256 #define _mm256_stream_si256(mem_addr, a) simde_mm256_stream_si256(mem_addr, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_sub_ps (simde__m256 a, simde__m256 b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_sub_ps(a, b); #else simde__m256_private r_, a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128[0] = simde_mm_sub_ps(a_.m128[0], b_.m128[0]); r_.m128[1] = simde_mm_sub_ps(a_.m128[1], b_.m128[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f32 = a_.f32 - b_.f32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = a_.f32[i] - b_.f32[i]; } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_sub_ps #define _mm256_sub_ps(a, b) simde_mm256_sub_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_hsub_ps (simde__m256 a, simde__m256 b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_hsub_ps(a, b); #else return simde_mm256_sub_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_hsub_ps #define _mm256_hsub_ps(a, b) simde_mm256_hsub_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_sub_pd (simde__m256d a, simde__m256d b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_sub_pd(a, b); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128d[0] = simde_mm_sub_pd(a_.m128d[0], b_.m128d[0]); r_.m128d[1] = simde_mm_sub_pd(a_.m128d[1], b_.m128d[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f64 = a_.f64 - b_.f64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = a_.f64[i] - b_.f64[i]; } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_sub_pd #define _mm256_sub_pd(a, b) simde_mm256_sub_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_hsub_pd (simde__m256d a, simde__m256d b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_hsub_pd(a, b); #else return simde_mm256_sub_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_hsub_pd #define _mm256_hsub_pd(a, b) simde_mm256_hsub_pd(a, b) #endif #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_undefined_ps (void) { simde__m256_private r_; #if \ defined(SIMDE_X86_AVX_NATIVE) && \ (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) r_.n = _mm256_undefined_ps(); #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) r_ = simde__m256_to_private(simde_mm256_setzero_ps()); #endif return simde__m256_from_private(r_); } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_undefined_ps #define _mm256_undefined_ps() simde_mm256_undefined_ps() #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_undefined_pd (void) { simde__m256d_private r_; #if \ defined(SIMDE_X86_AVX_NATIVE) && \ (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) r_.n = _mm256_undefined_pd(); #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); #endif return simde__m256d_from_private(r_); } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_undefined_pd #define _mm256_undefined_pd() simde_mm256_undefined_pd() #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_undefined_si256 (void) { simde__m256i_private r_; #if \ defined(SIMDE_X86_AVX_NATIVE) && \ (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) r_.n = _mm256_undefined_si256(); #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); #endif return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_undefined_si256 #define _mm256_undefined_si256() simde_mm256_undefined_si256() #endif #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) HEDLEY_DIAGNOSTIC_POP #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_xor_ps (simde__m256 a, simde__m256 b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_xor_ps(a, b); #else simde__m256_private r_, a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128[0] = simde_mm_xor_ps(a_.m128[0], b_.m128[0]); r_.m128[1] = simde_mm_xor_ps(a_.m128[1], b_.m128[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f ^ b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i] ^ b_.u32[i]; } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_xor_ps #define _mm256_xor_ps(a, b) simde_mm256_xor_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_xor_pd (simde__m256d a, simde__m256d b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_xor_pd(a, b); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128d[0] = simde_mm_xor_pd(a_.m128d[0], b_.m128d[0]); r_.m128d[1] = simde_mm_xor_pd(a_.m128d[1], b_.m128d[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f ^ b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = a_.u64[i] ^ b_.u64[i]; } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_xor_pd #define _mm256_xor_pd(a, b) simde_mm256_xor_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_x_mm256_xorsign_ps(simde__m256 dest, simde__m256 src) { return simde_mm256_xor_ps(simde_mm256_and_ps(simde_mm256_set1_ps(-0.0f), src), dest); } SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_x_mm256_xorsign_pd(simde__m256d dest, simde__m256d src) { return simde_mm256_xor_pd(simde_mm256_and_pd(simde_mm256_set1_pd(-0.0), src), dest); } SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_x_mm256_negate_ps(simde__m256 a) { #if defined(SIMDE_X86_AVX_NATIVE) return simde_mm256_xor_ps(a,_mm256_set1_ps(SIMDE_FLOAT32_C(-0.0))); #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if defined(SIMDE_VECTOR_NEGATE) r_.f32 = -a_.f32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = -a_.f32[i]; } #endif return simde__m256_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_x_mm256_negate_pd(simde__m256d a) { #if defined(SIMDE_X86_AVX2_NATIVE) return simde_mm256_xor_pd(a, _mm256_set1_pd(SIMDE_FLOAT64_C(-0.0))); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if defined(SIMDE_VECTOR_NEGATE) r_.f64 = -a_.f64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = -a_.f64[i]; } #endif return simde__m256d_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_unpackhi_ps (simde__m256 a, simde__m256 b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_unpackhi_ps(a, b); #else simde__m256_private r_, a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 2, 10, 3, 11, 6, 14, 7, 15); #else r_.f32[0] = a_.f32[2]; r_.f32[1] = b_.f32[2]; r_.f32[2] = a_.f32[3]; r_.f32[3] = b_.f32[3]; r_.f32[4] = a_.f32[6]; r_.f32[5] = b_.f32[6]; r_.f32[6] = a_.f32[7]; r_.f32[7] = b_.f32[7]; #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_unpackhi_ps #define _mm256_unpackhi_ps(a, b) simde_mm256_unpackhi_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_unpackhi_pd (simde__m256d a, simde__m256d b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_unpackhi_pd(a, b); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); #else r_.f64[0] = a_.f64[1]; r_.f64[1] = b_.f64[1]; r_.f64[2] = a_.f64[3]; r_.f64[3] = b_.f64[3]; #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_unpackhi_pd #define _mm256_unpackhi_pd(a, b) simde_mm256_unpackhi_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_unpacklo_ps (simde__m256 a, simde__m256 b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_unpacklo_ps(a, b); #else simde__m256_private r_, a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 8, 1, 9, 4, 12, 5, 13); #else r_.f32[0] = a_.f32[0]; r_.f32[1] = b_.f32[0]; r_.f32[2] = a_.f32[1]; r_.f32[3] = b_.f32[1]; r_.f32[4] = a_.f32[4]; r_.f32[5] = b_.f32[4]; r_.f32[6] = a_.f32[5]; r_.f32[7] = b_.f32[5]; #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_unpacklo_ps #define _mm256_unpacklo_ps(a, b) simde_mm256_unpacklo_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_unpacklo_pd (simde__m256d a, simde__m256d b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_unpacklo_pd(a, b); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); #else r_.f64[0] = a_.f64[0]; r_.f64[1] = b_.f64[0]; r_.f64[2] = a_.f64[2]; r_.f64[3] = b_.f64[2]; #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_unpacklo_pd #define _mm256_unpacklo_pd(a, b) simde_mm256_unpacklo_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_zextps128_ps256 (simde__m128 a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_insertf128_ps(_mm256_setzero_ps(), a, 0); #else simde__m256_private r_; r_.m128_private[0] = simde__m128_to_private(a); r_.m128_private[1] = simde__m128_to_private(simde_mm_setzero_ps()); return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_zextps128_ps256 #define _mm256_zextps128_ps256(a) simde_mm256_zextps128_ps256(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_zextpd128_pd256 (simde__m128d a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_insertf128_pd(_mm256_setzero_pd(), a, 0); #else simde__m256d_private r_; r_.m128d_private[0] = simde__m128d_to_private(a); r_.m128d_private[1] = simde__m128d_to_private(simde_mm_setzero_pd()); return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_zextpd128_pd256 #define _mm256_zextpd128_pd256(a) simde_mm256_zextpd128_pd256(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_zextsi128_si256 (simde__m128i a) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_insertf128_si256(_mm256_setzero_si256(), a, 0); #else simde__m256i_private r_; r_.m128i_private[0] = simde__m128i_to_private(a); r_.m128i_private[1] = simde__m128i_to_private(simde_mm_setzero_si128()); return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_zextsi128_si256 #define _mm256_zextsi128_si256(a) simde_mm256_zextsi128_si256(a) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_testc_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm_testc_ps(a, b); #else simde__m128_private a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_WASM_SIMD128_NATIVE) v128_t m = wasm_u32x4_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 31); m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); return wasm_i32x4_extract_lane(m, 0); #else uint_fast32_t r = 0; SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { r |= ~a_.u32[i] & b_.u32[i]; } return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); #endif #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm_testc_ps #define _mm_testc_ps(a, b) simde_mm_testc_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_testc_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm_testc_pd(a, b); #else simde__m128d_private a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_WASM_SIMD128_NATIVE) v128_t m = wasm_u64x2_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 63); return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); #else uint_fast64_t r = 0; SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { r |= ~a_.u64[i] & b_.u64[i]; } return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); #endif #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm_testc_pd #define _mm_testc_pd(a, b) simde_mm_testc_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm256_testc_ps (simde__m256 a, simde__m256 b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_testc_ps(a, b); #else uint_fast32_t r = 0; simde__m256_private a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b); SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { r |= ~a_.u32[i] & b_.u32[i]; } return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_testc_ps #define _mm256_testc_ps(a, b) simde_mm256_testc_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm256_testc_pd (simde__m256d a, simde__m256d b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_testc_pd(a, b); #else uint_fast64_t r = 0; simde__m256d_private a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b); SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { r |= ~a_.u64[i] & b_.u64[i]; } return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_testc_pd #define _mm256_testc_pd(a, b) simde_mm256_testc_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm256_testc_si256 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_testc_si256(a, b); #else int_fast32_t r = 0; simde__m256i_private a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { r |= ~a_.i32f[i] & b_.i32f[i]; } return HEDLEY_STATIC_CAST(int, !r); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_testc_si256 #define _mm256_testc_si256(a, b) simde_mm256_testc_si256(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_testz_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm_testz_ps(a, b); #else simde__m128_private a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_WASM_SIMD128_NATIVE) v128_t m = wasm_u32x4_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 31); m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); return wasm_i32x4_extract_lane(m, 0); #else uint_fast32_t r = 0; SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { r |= a_.u32[i] & b_.u32[i]; } return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); #endif #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm_testz_ps #define _mm_testz_ps(a, b) simde_mm_testz_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_testz_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm_testz_pd(a, b); #else simde__m128d_private a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_WASM_SIMD128_NATIVE) v128_t m = wasm_u64x2_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 63); return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); #else uint_fast64_t r = 0; SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { r |= a_.u64[i] & b_.u64[i]; } return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); #endif #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm_testz_pd #define _mm_testz_pd(a, b) simde_mm_testz_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm256_testz_ps (simde__m256 a, simde__m256 b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_testz_ps(a, b); #else uint_fast32_t r = 0; simde__m256_private a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b); SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { r |= a_.u32[i] & b_.u32[i]; } return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_testz_ps #define _mm256_testz_ps(a, b) simde_mm256_testz_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm256_testz_pd (simde__m256d a, simde__m256d b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_testz_pd(a, b); #else uint_fast64_t r = 0; simde__m256d_private a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b); SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { r |= a_.u64[i] & b_.u64[i]; } return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_testz_pd #define _mm256_testz_pd(a, b) simde_mm256_testz_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm256_testz_si256 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_testz_si256(a, b); #else int_fast32_t r = 0; simde__m256i_private a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r = simde_mm_testz_si128(a_.m128i[0], b_.m128i[0]) && simde_mm_testz_si128(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { r |= a_.i32f[i] & b_.i32f[i]; } r = !r; #endif return HEDLEY_STATIC_CAST(int, r); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_testz_si256 #define _mm256_testz_si256(a, b) simde_mm256_testz_si256(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_testnzc_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm_testnzc_ps(a, b); #else simde__m128_private a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_WASM_SIMD128_NATIVE) v128_t m = wasm_u32x4_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 31); v128_t m2 = wasm_u32x4_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 31); m = wasm_v128_or(m, simde_mm_movehl_ps(m, m)); m2 = wasm_v128_or(m2, simde_mm_movehl_ps(m2, m2)); m = wasm_v128_or(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); m2 = wasm_v128_or(m2, simde_mm_shuffle_epi32(m2, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); return wasm_i32x4_extract_lane(m, 0) & wasm_i32x4_extract_lane(m2, 0); #else uint32_t rz = 0, rc = 0; for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { rc |= ~a_.u32[i] & b_.u32[i]; rz |= a_.u32[i] & b_.u32[i]; } return (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); #endif #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm_testnzc_ps #define _mm_testnzc_ps(a, b) simde_mm_testnzc_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_testnzc_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm_testnzc_pd(a, b); #else simde__m128d_private a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_WASM_SIMD128_NATIVE) v128_t m = wasm_u64x2_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 63); v128_t m2 = wasm_u64x2_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 63); return HEDLEY_STATIC_CAST(int, (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) & (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1))); #else uint64_t rc = 0, rz = 0; for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { rc |= ~a_.u64[i] & b_.u64[i]; rz |= a_.u64[i] & b_.u64[i]; } return (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); #endif #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm_testnzc_pd #define _mm_testnzc_pd(a, b) simde_mm_testnzc_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm256_testnzc_ps (simde__m256 a, simde__m256 b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_testnzc_ps(a, b); #else uint32_t rc = 0, rz = 0; simde__m256_private a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b); for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { rc |= ~a_.u32[i] & b_.u32[i]; rz |= a_.u32[i] & b_.u32[i]; } return (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_testnzc_ps #define _mm256_testnzc_ps(a, b) simde_mm256_testnzc_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm256_testnzc_pd (simde__m256d a, simde__m256d b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_testnzc_pd(a, b); #else uint64_t rc = 0, rz = 0; simde__m256d_private a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b); for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { rc |= ~a_.u64[i] & b_.u64[i]; rz |= a_.u64[i] & b_.u64[i]; } return (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_testnzc_pd #define _mm256_testnzc_pd(a, b) simde_mm256_testnzc_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm256_testnzc_si256 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_testnzc_si256(a, b); #else int32_t rc = 0, rz = 0; simde__m256i_private a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { rc |= ~a_.i32f[i] & b_.i32f[i]; rz |= a_.i32f[i] & b_.i32f[i]; } return !!(rc & rz); #endif } #if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) #undef _mm256_testnzc_si256 #define _mm256_testnzc_si256(a, b) simde_mm256_testnzc_si256(a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX_H) */ simde-0.7.2/simde/x86/avx2.h000066400000000000000000005736201400333146700154160ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2018-2020 Evan Nemerson * 2019-2020 Michael R. Crusoe * 2020 Himanshi Mathur * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX2_H) #define SIMDE_X86_AVX2_H #include "avx.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_abs_epi8 (simde__m256i a) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_abs_epi8(a); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_abs_epi8(a_.m128i[0]); r_.m128i[1] = simde_mm_abs_epi8(a_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = (a_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_abs_epi8 #define _mm256_abs_epi8(a) simde_mm256_abs_epi8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_abs_epi16 (simde__m256i a) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_abs_epi16(a); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_abs_epi16(a_.m128i[0]); r_.m128i[1] = simde_mm_abs_epi16(a_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_abs_epi16 #define _mm256_abs_epi16(a) simde_mm256_abs_epi16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_abs_epi32(simde__m256i a) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_abs_epi32(a); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_abs_epi32(a_.m128i[0]); r_.m128i[1] = simde_mm_abs_epi32(a_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { r_.i32[i] = (a_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_abs_epi32 #define _mm256_abs_epi32(a) simde_mm256_abs_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_add_epi8 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_add_epi8(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_add_epi8(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_add_epi8(a_.m128i[1], b_.m128i[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = a_.i8 + b_.i8; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a_.i8[i] + b_.i8[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_add_epi8 #define _mm256_add_epi8(a, b) simde_mm256_add_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_add_epi16 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_add_epi16(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_add_epi16(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_add_epi16(a_.m128i[1], b_.m128i[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = a_.i16 + b_.i16; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[i] + b_.i16[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_add_epi16 #define _mm256_add_epi16(a, b) simde_mm256_add_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_hadd_epi16 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_hadd_epi16(a, b); #else return simde_mm256_add_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_hadd_epi16 #define _mm256_hadd_epi16(a, b) simde_mm256_hadd_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_add_epi32 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_add_epi32(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_add_epi32(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_add_epi32(a_.m128i[1], b_.m128i[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = a_.i32 + b_.i32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] + b_.i32[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_add_epi32 #define _mm256_add_epi32(a, b) simde_mm256_add_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_hadd_epi32 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_hadd_epi32(a, b); #else return simde_mm256_add_epi32(simde_x_mm256_deinterleaveeven_epi32(a, b), simde_x_mm256_deinterleaveodd_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_hadd_epi32 #define _mm256_hadd_epi32(a, b) simde_mm256_hadd_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_add_epi64 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_add_epi64(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_add_epi64(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_add_epi64(a_.m128i[1], b_.m128i[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) r_.i64 = a_.i64 + b_.i64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[i] + b_.i64[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_add_epi64 #define _mm256_add_epi64(a, b) simde_mm256_add_epi64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_alignr_epi8 (simde__m256i a, simde__m256i b, int count) SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); if (HEDLEY_UNLIKELY(count > 31)) return simde_mm256_setzero_si256(); for (size_t h = 0 ; h < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; h++) { SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { const int srcpos = count + HEDLEY_STATIC_CAST(int, i); if (srcpos > 31) { r_.m128i_private[h].i8[i] = 0; } else if (srcpos > 15) { r_.m128i_private[h].i8[i] = a_.m128i_private[h].i8[(srcpos) & 15]; } else { r_.m128i_private[h].i8[i] = b_.m128i_private[h].i8[srcpos]; } } } return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) # define simde_mm256_alignr_epi8(a, b, count) _mm256_alignr_epi8(a, b, count) #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) # define simde_mm256_alignr_epi8(a, b, count) \ simde_mm256_set_m128i( \ simde_mm_alignr_epi8(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (count)), \ simde_mm_alignr_epi8(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (count))) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_alignr_epi8 #define _mm256_alignr_epi8(a, b, count) simde_mm256_alignr_epi8(a, b, (count)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_and_si256 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_and_si256(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_and_si128(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_and_si128(a_.m128i[1], b_.m128i[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f & b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[i] & b_.i64[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_and_si256 #define _mm256_and_si256(a, b) simde_mm256_and_si256(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_andnot_si256 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_andnot_si256(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_andnot_si128(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_andnot_si128(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_andnot_si256 #define _mm256_andnot_si256(a, b) simde_mm256_andnot_si256(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_adds_epi8 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_adds_epi8(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_adds_epi8(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_adds_epi8(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_adds_epi8 #define _mm256_adds_epi8(a, b) simde_mm256_adds_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_adds_epi16(simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_adds_epi16(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_adds_epi16(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_adds_epi16(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_adds_epi16 #define _mm256_adds_epi16(a, b) simde_mm256_adds_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_hadds_epi16 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_hadds_epi16(a, b); #else return simde_mm256_adds_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_hadds_epi16 #define _mm256_hadds_epi16(a, b) simde_mm256_hadds_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_adds_epu8 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_adds_epu8(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_adds_epu8(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_adds_epu8(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_adds_epu8 #define _mm256_adds_epu8(a, b) simde_mm256_adds_epu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_adds_epu16(simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_adds_epu16(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_adds_epu16(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_adds_epu16(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_adds_epu16 #define _mm256_adds_epu16(a, b) simde_mm256_adds_epu16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_avg_epu8 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_avg_epu8(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; } return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_avg_epu8 #define _mm256_avg_epu8(a, b) simde_mm256_avg_epu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_avg_epu16 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_avg_epu16(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; } return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_avg_epu16 #define _mm256_avg_epu16(a, b) simde_mm256_avg_epu16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_blend_epi32(simde__m128i a, simde__m128i b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = ((imm8 >> i) & 1) ? b_.i32[i] : a_.i32[i]; } return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) # define simde_mm_blend_epi32(a, b, imm8) _mm_blend_epi32(a, b, imm8) #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) # define simde_mm_blend_epi32(a, b, imm8) \ simde_mm_castps_si128(simde_mm_blend_ps(simde_mm_castsi128_ps(a), simde_mm_castsi128_ps(b), (imm8))) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_blend_epi32 #define _mm_blend_epi32(a, b, imm8) simde_mm_blend_epi32(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_blend_epi16(simde__m256i a, simde__m256i b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = ((imm8 >> i%8) & 1) ? b_.i16[i] : a_.i16[i]; } return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) # define simde_mm256_blend_epi16(a, b, imm8) _mm256_blend_epi16(a, b, imm8) #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) # define simde_mm256_blend_epi16(a, b, imm8) \ simde_mm256_set_m128i( \ simde_mm_blend_epi16(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8)), \ simde_mm_blend_epi16(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8))) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_blend_epi16 #define _mm256_blend_epi16(a, b, imm8) simde_mm256_blend_epi16(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_blend_epi32(simde__m256i a, simde__m256i b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = ((imm8 >> i) & 1) ? b_.i32[i] : a_.i32[i]; } return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) # define simde_mm256_blend_epi32(a, b, imm8) _mm256_blend_epi32(a, b, imm8) #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) # define simde_mm256_blend_epi32(a, b, imm8) \ simde_mm256_set_m128i( \ simde_mm_blend_epi32(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8) >> 4), \ simde_mm_blend_epi32(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8) & 0x0F)) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_blend_epi32 #define _mm256_blend_epi32(a, b, imm8) simde_mm256_blend_epi32(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_blendv_epi8(simde__m256i a, simde__m256i b, simde__m256i mask) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_blendv_epi8(a, b, mask); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b), mask_ = simde__m256i_to_private(mask); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_blendv_epi8(a_.m128i[0], b_.m128i[0], mask_.m128i[0]); r_.m128i[1] = simde_mm_blendv_epi8(a_.m128i[1], b_.m128i[1], mask_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { if (mask_.u8[i] & 0x80) { r_.u8[i] = b_.u8[i]; } else { r_.u8[i] = a_.u8[i]; } } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_NATIVE) # define simde_mm256_blendv_epi8(a, b, imm8) _mm256_blendv_epi8(a, b, imm8) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_blendv_epi8 #define _mm256_blendv_epi8(a, b, mask) simde_mm256_blendv_epi8(a, b, mask) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_broadcastb_epi8 (simde__m128i a) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm_broadcastb_epi8(a); #else simde__m128i_private r_; simde__m128i_private a_= simde__m128i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a_.i8[0]; } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_broadcastb_epi8 #define _mm_broadcastb_epi8(a) simde_mm_broadcastb_epi8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_broadcastb_epi8 (simde__m128i a) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_broadcastb_epi8(a); #else simde__m256i_private r_; simde__m128i_private a_= simde__m128i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a_.i8[0]; } return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_broadcastb_epi8 #define _mm256_broadcastb_epi8(a) simde_mm256_broadcastb_epi8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_broadcastw_epi16 (simde__m128i a) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm_broadcastw_epi16(a); #else simde__m128i_private r_; simde__m128i_private a_= simde__m128i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[0]; } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_broadcastw_epi16 #define _mm_broadcastw_epi16(a) simde_mm_broadcastw_epi16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_broadcastw_epi16 (simde__m128i a) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_broadcastw_epi16(a); #else simde__m256i_private r_; simde__m128i_private a_= simde__m128i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[0]; } return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_broadcastw_epi16 #define _mm256_broadcastw_epi16(a) simde_mm256_broadcastw_epi16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_broadcastd_epi32 (simde__m128i a) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm_broadcastd_epi32(a); #else simde__m128i_private r_; simde__m128i_private a_= simde__m128i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[0]; } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_broadcastd_epi32 #define _mm_broadcastd_epi32(a) simde_mm_broadcastd_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_broadcastd_epi32 (simde__m128i a) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_broadcastd_epi32(a); #else simde__m256i_private r_; simde__m128i_private a_= simde__m128i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[0]; } return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_broadcastd_epi32 #define _mm256_broadcastd_epi32(a) simde_mm256_broadcastd_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_broadcastq_epi64 (simde__m128i a) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm_broadcastq_epi64(a); #else simde__m128i_private r_; simde__m128i_private a_= simde__m128i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[0]; } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_broadcastq_epi64 #define _mm_broadcastq_epi64(a) simde_mm_broadcastq_epi64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_broadcastq_epi64 (simde__m128i a) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_broadcastq_epi64(a); #else simde__m256i_private r_; simde__m128i_private a_= simde__m128i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[0]; } return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_broadcastq_epi64 #define _mm256_broadcastq_epi64(a) simde_mm256_broadcastq_epi64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_broadcastss_ps (simde__m128 a) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm_broadcastss_ps(a); #else simde__m128_private r_; simde__m128_private a_= simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = a_.f32[0]; } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_broadcastss_ps #define _mm_broadcastss_ps(a) simde_mm_broadcastss_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_broadcastss_ps (simde__m128 a) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_broadcastss_ps(a); #else simde__m256_private r_; simde__m128_private a_= simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = a_.f32[0]; } return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_broadcastss_ps #define _mm256_broadcastss_ps(a) simde_mm256_broadcastss_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_broadcastsd_pd (simde__m128d a) { return simde_mm_movedup_pd(a); } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_broadcastsd_pd #define _mm_broadcastsd_pd(a) simde_mm_broadcastsd_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_broadcastsd_pd (simde__m128d a) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_broadcastsd_pd(a); #else simde__m256d_private r_; simde__m128d_private a_= simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = a_.f64[0]; } return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_broadcastsd_pd #define _mm256_broadcastsd_pd(a) simde_mm256_broadcastsd_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_broadcastsi128_si256 (simde__m128i a) { #if defined(SIMDE_X86_AVX2_NATIVE) && \ (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) return _mm256_broadcastsi128_si256(a); #else simde__m256i_private r_; simde__m128i_private a_ = simde__m128i_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i_private[0] = a_; r_.m128i_private[1] = a_; #else r_.i64[0] = a_.i64[0]; r_.i64[1] = a_.i64[1]; r_.i64[2] = a_.i64[0]; r_.i64[3] = a_.i64[1]; #endif return simde__m256i_from_private(r_); #endif } #define simde_mm_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_broadcastsi128_si256 #define _mm256_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) #undef _mm_broadcastsi128_si256 #define _mm_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_bslli_epi128 (simde__m256i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m256i_private r_, a_ = simde__m256i_to_private(a); const int ssize = HEDLEY_STATIC_CAST(int, (sizeof(r_.i8) / sizeof(r_.i8[0]))); SIMDE_VECTORIZE for (int i = 0 ; i < ssize ; i++) { const int e = i - imm8; if(i >= (ssize/2)) { if(e >= (ssize/2) && e < ssize) r_.i8[i] = a_.i8[e]; else r_.i8[i] = 0; } else{ if(e >= 0 && e < (ssize/2)) r_.i8[i] = a_.i8[e]; else r_.i8[i] = 0; } } return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) && \ (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) && \ SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) #define simde_mm256_bslli_epi128(a, imm8) _mm256_bslli_epi128(a, imm8) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_bslli_epi128 #define _mm256_bslli_epi128(a, imm8) simde_mm256_bslli_epi128(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_bsrli_epi128 (simde__m256i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m256i_private r_, a_ = simde__m256i_to_private(a); const int ssize = HEDLEY_STATIC_CAST(int, (sizeof(r_.i8) / sizeof(r_.i8[0]))); SIMDE_VECTORIZE for (int i = 0 ; i < ssize ; i++) { const int e = i + imm8; if(i < (ssize/2)) { if(e >= 0 && e < (ssize/2)) r_.i8[i] = a_.i8[e]; else r_.i8[i] = 0; } else{ if(e >= (ssize/2) && e < ssize) r_.i8[i] = a_.i8[e]; else r_.i8[i] = 0; } } return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) && \ (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) && \ SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) #define simde_mm256_bsrli_epi128(a, imm8) _mm256_bsrli_epi128(a, imm8) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_bsrli_epi128 #define _mm256_bsrli_epi128(a, imm8) simde_mm256_bsrli_epi128(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_cmpeq_epi8 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_cmpeq_epi8(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_cmpeq_epi8(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_cmpeq_epi8(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_cmpeq_epi8 #define _mm256_cmpeq_epi8(a, b) simde_mm256_cmpeq_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_cmpeq_epi16 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_cmpeq_epi16(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_cmpeq_epi16(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_cmpeq_epi16(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_cmpeq_epi16 #define _mm256_cmpeq_epi16(a, b) simde_mm256_cmpeq_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_cmpeq_epi32 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_cmpeq_epi32(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_cmpeq_epi32(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_cmpeq_epi32(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_cmpeq_epi32 #define _mm256_cmpeq_epi32(a, b) simde_mm256_cmpeq_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_cmpeq_epi64 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_cmpeq_epi64(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_cmpeq_epi64(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_cmpeq_epi64(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = (a_.i64[i] == b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_cmpeq_epi64 #define _mm256_cmpeq_epi64(a, b) simde_mm256_cmpeq_epi64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_cmpgt_epi8 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_cmpgt_epi8(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_cmpgt_epi8(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_cmpgt_epi8(a_.m128i[1], b_.m128i[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = HEDLEY_STATIC_CAST(__typeof__(r_.i8), a_.i8 > b_.i8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_cmpgt_epi8 #define _mm256_cmpgt_epi8(a, b) simde_mm256_cmpgt_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_cmpgt_epi16 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_cmpgt_epi16(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_cmpgt_epi16(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_cmpgt_epi16(a_.m128i[1], b_.m128i[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = a_.i16 > b_.i16; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_cmpgt_epi16 #define _mm256_cmpgt_epi16(a, b) simde_mm256_cmpgt_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_cmpgt_epi32 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_cmpgt_epi32(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_cmpgt_epi32(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_cmpgt_epi32(a_.m128i[1], b_.m128i[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), a_.i32 > b_.i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_cmpgt_epi32 #define _mm256_cmpgt_epi32(a, b) simde_mm256_cmpgt_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_cmpgt_epi64 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_cmpgt_epi64(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_cmpgt_epi64(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_cmpgt_epi64(a_.m128i[1], b_.m128i[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_cmpgt_epi64 #define _mm256_cmpgt_epi64(a, b) simde_mm256_cmpgt_epi64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_cvtepi8_epi16 (simde__m128i a) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_cvtepi8_epi16(a); #else simde__m256i_private r_; simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.i16, a_.i8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i8[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_cvtepi8_epi16 #define _mm256_cvtepi8_epi16(a) simde_mm256_cvtepi8_epi16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_cvtepi8_epi32 (simde__m128i a) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_cvtepi8_epi32(a); #else simde__m256i_private r_; simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].i8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i8[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_cvtepi8_epi32 #define _mm256_cvtepi8_epi32(a) simde_mm256_cvtepi8_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_cvtepi8_epi64 (simde__m128i a) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_cvtepi8_epi64(a); #else simde__m256i_private r_; simde__m128i_private a_ = simde__m128i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i8[i]; } return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_cvtepi8_epi64 #define _mm256_cvtepi8_epi64(a) simde_mm256_cvtepi8_epi64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_cvtepi16_epi32 (simde__m128i a) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_cvtepi16_epi32(a); #else simde__m256i_private r_; simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.i32, a_.i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i16[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_cvtepi16_epi32 #define _mm256_cvtepi16_epi32(a) simde_mm256_cvtepi16_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_cvtepi16_epi64 (simde__m128i a) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_cvtepi16_epi64(a); #else simde__m256i_private r_; simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i16[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_cvtepi16_epi64 #define _mm256_cvtepi16_epi64(a) simde_mm256_cvtepi16_epi64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_cvtepi32_epi64 (simde__m128i a) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_cvtepi32_epi64(a); #else simde__m256i_private r_; simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.i64, a_.i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i32[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_cvtepi32_epi64 #define _mm256_cvtepi32_epi64(a) simde_mm256_cvtepi32_epi64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_cvtepu8_epi16 (simde__m128i a) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_cvtepu8_epi16(a); #else simde__m256i_private r_; simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.i16, a_.u8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.u8[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_cvtepu8_epi16 #define _mm256_cvtepu8_epi16(a) simde_mm256_cvtepu8_epi16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_cvtepu8_epi32 (simde__m128i a) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_cvtepu8_epi32(a); #else simde__m256i_private r_; simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.u8[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_cvtepu8_epi32 #define _mm256_cvtepu8_epi32(a) simde_mm256_cvtepu8_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_cvtepu8_epi64 (simde__m128i a) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_cvtepu8_epi64(a); #else simde__m256i_private r_; simde__m128i_private a_ = simde__m128i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.u8[i]; } return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_cvtepu8_epi64 #define _mm256_cvtepu8_epi64(a) simde_mm256_cvtepu8_epi64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_cvtepu16_epi32 (simde__m128i a) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_cvtepu16_epi32(a); #else simde__m256i_private r_; simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.i32, a_.u16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.u16[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_cvtepu16_epi32 #define _mm256_cvtepu16_epi32(a) simde_mm256_cvtepu16_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_cvtepu16_epi64 (simde__m128i a) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_cvtepu16_epi64(a); #else simde__m256i_private r_; simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.u16[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_cvtepu16_epi64 #define _mm256_cvtepu16_epi64(a) simde_mm256_cvtepu16_epi64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_cvtepu32_epi64 (simde__m128i a) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_cvtepu32_epi64(a); #else simde__m256i_private r_; simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.i64, a_.u32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.u32[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_cvtepu32_epi64 #define _mm256_cvtepu32_epi64(a) simde_mm256_cvtepu32_epi64(a) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm256_extract_epi8 (simde__m256i a, const int index) SIMDE_REQUIRE_RANGE(index, 0, 31){ simde__m256i_private a_ = simde__m256i_to_private(a); return a_.i8[index]; } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm256_extract_epi8(a, index) _mm256_extract_epi8(a, index) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_extract_epi8 #define _mm256_extract_epi8(a, index) simde_mm256_extract_epi8(a, index) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm256_extract_epi16 (simde__m256i a, const int index) SIMDE_REQUIRE_RANGE(index, 0, 15) { simde__m256i_private a_ = simde__m256i_to_private(a); return a_.i16[index]; } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm256_extract_epi16(a, index) _mm256_extract_epi16(a, index) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_extract_epi16 #define _mm256_extract_epi16(a, index) simde_mm256_extract_epi16(a, index) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm256_extracti128_si256 (simde__m256i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { simde__m256i_private a_ = simde__m256i_to_private(a); return a_.m128i[imm8]; } #if defined(SIMDE_X86_AVX2_NATIVE) # define simde_mm256_extracti128_si256(a, imm8) _mm256_extracti128_si256(a, imm8) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_extracti128_si256 #define _mm256_extracti128_si256(a, imm8) simde_mm256_extracti128_si256(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_i32gather_epi32(const int32_t* base_addr, simde__m128i vindex, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m128i_private vindex_ = simde__m128i_to_private(vindex), r_; const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); int32_t dst; simde_memcpy(&dst, src, sizeof(dst)); r_.i32[i] = dst; } return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm_i32gather_epi32(base_addr, vindex, scale) _mm_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_i32_gather_epi32 #define _mm_i32gather_epi32(base_addr, vindex, scale) simde_mm_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_i32gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m128i_private vindex_ = simde__m128i_to_private(vindex), src_ = simde__m128i_to_private(src), mask_ = simde__m128i_to_private(mask), r_; const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { if ((mask_.i32[i] >> 31) & 1) { const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); int32_t dst; simde_memcpy(&dst, src1, sizeof(dst)); r_.i32[i] = dst; } else { r_.i32[i] = src_.i32[i]; } } return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_mask_i32gather_epi32 #define _mm_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_i32gather_epi32(const int32_t* base_addr, simde__m256i vindex, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m256i_private vindex_ = simde__m256i_to_private(vindex), r_; const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); int32_t dst; simde_memcpy(&dst, src, sizeof(dst)); r_.i32[i] = dst; } return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm256_i32gather_epi32(base_addr, vindex, scale) _mm256_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_i32_gather_epi32 #define _mm256_i32gather_epi32(base_addr, vindex, scale) simde_mm256_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask_i32gather_epi32(simde__m256i src, const int32_t* base_addr, simde__m256i vindex, simde__m256i mask, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m256i_private vindex_ = simde__m256i_to_private(vindex), src_ = simde__m256i_to_private(src), mask_ = simde__m256i_to_private(mask), r_; const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { if ((mask_.i32[i] >> 31) & 1) { const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); int32_t dst; simde_memcpy(&dst, src1, sizeof(dst)); r_.i32[i] = dst; } else { r_.i32[i] = src_.i32[i]; } } return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm256_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_i32gather_epi32 #define _mm256_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_i64gather_epi32(const int32_t* base_addr, simde__m128i vindex, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m128i_private vindex_ = simde__m128i_to_private(vindex), r_ = simde__m128i_to_private(simde_mm_setzero_si128()); const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); int32_t dst; simde_memcpy(&dst, src, sizeof(dst)); r_.i32[i] = dst; } return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm_i64gather_epi32(base_addr, vindex, scale) _mm_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_i64_gather_epi32 #define _mm_i64gather_epi32(base_addr, vindex, scale) simde_mm_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_i64gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m128i_private vindex_ = simde__m128i_to_private(vindex), src_ = simde__m128i_to_private(src), mask_ = simde__m128i_to_private(mask), r_ = simde__m128i_to_private(simde_mm_setzero_si128()); const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { if ((mask_.i32[i] >> 31) & 1) { const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); int32_t dst; simde_memcpy(&dst, src1, sizeof(dst)); r_.i32[i] = dst; } else { r_.i32[i] = src_.i32[i]; } } return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_mask_i64gather_epi32 #define _mm_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm256_i64gather_epi32(const int32_t* base_addr, simde__m256i vindex, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m256i_private vindex_ = simde__m256i_to_private(vindex); simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); int32_t dst; simde_memcpy(&dst, src, sizeof(dst)); r_.i32[i] = dst; } return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm256_i64gather_epi32(base_addr, vindex, scale) _mm256_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_i64_gather_epi32 #define _mm256_i64gather_epi32(base_addr, vindex, scale) simde_mm256_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm256_mask_i64gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m256i vindex, simde__m128i mask, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m256i_private vindex_ = simde__m256i_to_private(vindex); simde__m128i_private src_ = simde__m128i_to_private(src), mask_ = simde__m128i_to_private(mask), r_ = simde__m128i_to_private(simde_mm_setzero_si128()); const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { if ((mask_.i32[i] >> 31) & 1) { const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); int32_t dst; simde_memcpy(&dst, src1, sizeof(dst)); r_.i32[i] = dst; } else { r_.i32[i] = src_.i32[i]; } } return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm256_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_i64gather_epi32 #define _mm256_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_i32gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m128i_private vindex_ = simde__m128i_to_private(vindex), r_; const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); int64_t dst; simde_memcpy(&dst, src, sizeof(dst)); r_.i64[i] = dst; } return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) #define simde_mm_i32gather_epi64(base_addr, vindex, scale) _mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) #else #define simde_mm_i32gather_epi64(base_addr, vindex, scale) _mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) #endif #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_i32_gather_epi64 #define _mm_i32gather_epi64(base_addr, vindex, scale) simde_mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_i32gather_epi64(simde__m128i src, const int64_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m128i_private vindex_ = simde__m128i_to_private(vindex), src_ = simde__m128i_to_private(src), mask_ = simde__m128i_to_private(mask), r_; const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { if ((mask_.i64[i] >> 63) & 1) { const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); int64_t dst; simde_memcpy(&dst, src1, sizeof(dst)); r_.i64[i] = dst; } else { r_.i64[i] = src_.i64[i]; } } return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) #define simde_mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) #else #define simde_mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) #endif #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_mask_i32gather_epi64 #define _mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_i32gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m128i_private vindex_ = simde__m128i_to_private(vindex); simde__m256i_private r_; const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); int64_t dst; simde_memcpy(&dst, src, sizeof(dst)); r_.i64[i] = dst; } return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) #define simde_mm256_i32gather_epi64(base_addr, vindex, scale) _mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) #else #define simde_mm256_i32gather_epi64(base_addr, vindex, scale) _mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) #endif #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_i32_gather_epi64 #define _mm256_i32gather_epi64(base_addr, vindex, scale) simde_mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask_i32gather_epi64(simde__m256i src, const int64_t* base_addr, simde__m128i vindex, simde__m256i mask, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m256i_private src_ = simde__m256i_to_private(src), mask_ = simde__m256i_to_private(mask), r_; simde__m128i_private vindex_ = simde__m128i_to_private(vindex); const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { if ((mask_.i64[i] >> 63) & 1) { const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); int64_t dst; simde_memcpy(&dst, src1, sizeof(dst)); r_.i64[i] = dst; } else { r_.i64[i] = src_.i64[i]; } } return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) #define simde_mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) #else #define simde_mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) #endif #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_i32gather_epi64 #define _mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_i64gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m128i_private vindex_ = simde__m128i_to_private(vindex), r_ = simde__m128i_to_private(simde_mm_setzero_si128()); const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); int64_t dst; simde_memcpy(&dst, src, sizeof(dst)); r_.i64[i] = dst; } return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) #define simde_mm_i64gather_epi64(base_addr, vindex, scale) _mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) #else #define simde_mm_i64gather_epi64(base_addr, vindex, scale) _mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) #endif #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_i64_gather_epi64 #define _mm_i64gather_epi64(base_addr, vindex, scale) simde_mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_i64gather_epi64(simde__m128i src, const int64_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m128i_private vindex_ = simde__m128i_to_private(vindex), src_ = simde__m128i_to_private(src), mask_ = simde__m128i_to_private(mask), r_ = simde__m128i_to_private(simde_mm_setzero_si128()); const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { if ((mask_.i64[i] >> 63) & 1) { const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); int64_t dst; simde_memcpy(&dst, src1, sizeof(dst)); r_.i64[i] = dst; } else { r_.i64[i] = src_.i64[i]; } } return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) #define simde_mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) #else #define simde_mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) #endif #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_mask_i64gather_epi64 #define _mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_i64gather_epi64(const int64_t* base_addr, simde__m256i vindex, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m256i_private vindex_ = simde__m256i_to_private(vindex), r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); int64_t dst; simde_memcpy(&dst, src, sizeof(dst)); r_.i64[i] = dst; } return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) #define simde_mm256_i64gather_epi64(base_addr, vindex, scale) _mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) #else #define simde_mm256_i64gather_epi64(base_addr, vindex, scale) _mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) #endif #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_i64_gather_epi64 #define _mm256_i64gather_epi64(base_addr, vindex, scale) simde_mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask_i64gather_epi64(simde__m256i src, const int64_t* base_addr, simde__m256i vindex, simde__m256i mask, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m256i_private vindex_ = simde__m256i_to_private(vindex), src_ = simde__m256i_to_private(src), mask_ = simde__m256i_to_private(mask), r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { if ((mask_.i64[i] >> 63) & 1) { const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); int64_t dst; simde_memcpy(&dst, src1, sizeof(dst)); r_.i64[i] = dst; } else { r_.i64[i] = src_.i64[i]; } } return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) #define simde_mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) #else #define simde_mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) #endif #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_i64gather_epi64 #define _mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_i32gather_ps(const simde_float32* base_addr, simde__m128i vindex, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m128i_private vindex_ = simde__m128i_to_private(vindex); simde__m128_private r_; const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); simde_float32 dst; simde_memcpy(&dst, src, sizeof(dst)); r_.f32[i] = dst; } return simde__m128_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm_i32gather_ps(base_addr, vindex, scale) _mm_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_i32_gather_ps #define _mm_i32gather_ps(base_addr, vindex, scale) simde_mm_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_mask_i32gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m128i vindex, simde__m128 mask, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m128i_private vindex_ = simde__m128i_to_private(vindex); simde__m128_private src_ = simde__m128_to_private(src), mask_ = simde__m128_to_private(mask), r_; const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { if ((mask_.i32[i] >> 31) & 1) { const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); simde_float32 dst; simde_memcpy(&dst, src1, sizeof(dst)); r_.f32[i] = dst; } else { r_.f32[i] = src_.f32[i]; } } return simde__m128_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm_mask_i32gather_ps(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_mask_i32gather_ps #define _mm_mask_i32gather_ps(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_i32gather_ps(const simde_float32* base_addr, simde__m256i vindex, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m256i_private vindex_ = simde__m256i_to_private(vindex); simde__m256_private r_; const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); simde_float32 dst; simde_memcpy(&dst, src, sizeof(dst)); r_.f32[i] = dst; } return simde__m256_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm256_i32gather_ps(base_addr, vindex, scale) _mm256_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_i32_gather_ps #define _mm256_i32gather_ps(base_addr, vindex, scale) simde_mm256_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_mask_i32gather_ps(simde__m256 src, const simde_float32* base_addr, simde__m256i vindex, simde__m256 mask, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m256i_private vindex_ = simde__m256i_to_private(vindex); simde__m256_private src_ = simde__m256_to_private(src), mask_ = simde__m256_to_private(mask), r_; const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { if ((mask_.i32[i] >> 31) & 1) { const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); simde_float32 dst; simde_memcpy(&dst, src1, sizeof(dst)); r_.f32[i] = dst; } else { r_.f32[i] = src_.f32[i]; } } return simde__m256_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm256_mask_i32gather_ps(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_i32gather_ps #define _mm256_mask_i32gather_ps(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_i64gather_ps(const simde_float32* base_addr, simde__m128i vindex, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m128i_private vindex_ = simde__m128i_to_private(vindex); simde__m128_private r_ = simde__m128_to_private(simde_mm_setzero_ps()); const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); simde_float32 dst; simde_memcpy(&dst, src, sizeof(dst)); r_.f32[i] = dst; } return simde__m128_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm_i64gather_ps(base_addr, vindex, scale) _mm_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_i64_gather_ps #define _mm_i64gather_ps(base_addr, vindex, scale) simde_mm_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_mask_i64gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m128i vindex, simde__m128 mask, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m128i_private vindex_ = simde__m128i_to_private(vindex); simde__m128_private src_ = simde__m128_to_private(src), mask_ = simde__m128_to_private(mask), r_ = simde__m128_to_private(simde_mm_setzero_ps()); const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { if ((mask_.i32[i] >> 31) & 1) { const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); simde_float32 dst; simde_memcpy(&dst, src1, sizeof(dst)); r_.f32[i] = dst; } else { r_.f32[i] = src_.f32[i]; } } return simde__m128_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm_mask_i64gather_ps(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, float32_t const*, base_addr), vindex, mask, scale) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_mask_i64gather_ps #define _mm_mask_i64gather_ps(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm256_i64gather_ps(const simde_float32* base_addr, simde__m256i vindex, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m256i_private vindex_ = simde__m256i_to_private(vindex); simde__m128_private r_ = simde__m128_to_private(simde_mm_setzero_ps()); const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); simde_float32 dst; simde_memcpy(&dst, src, sizeof(dst)); r_.f32[i] = dst; } return simde__m128_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm256_i64gather_ps(base_addr, vindex, scale) _mm256_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_i64_gather_ps #define _mm256_i64gather_ps(base_addr, vindex, scale) simde_mm256_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm256_mask_i64gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m256i vindex, simde__m128 mask, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m256i_private vindex_ = simde__m256i_to_private(vindex); simde__m128_private src_ = simde__m128_to_private(src), mask_ = simde__m128_to_private(mask), r_ = simde__m128_to_private(simde_mm_setzero_ps()); const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { if ((mask_.i32[i] >> 31) & 1) { const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); simde_float32 dst; simde_memcpy(&dst, src1, sizeof(dst)); r_.f32[i] = dst; } else { r_.f32[i] = src_.f32[i]; } } return simde__m128_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm256_mask_i64gather_ps(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_i64gather_ps #define _mm256_mask_i64gather_ps(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_i32gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m128i_private vindex_ = simde__m128i_to_private(vindex); simde__m128d_private r_; const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); simde_float64 dst; simde_memcpy(&dst, src, sizeof(dst)); r_.f64[i] = dst; } return simde__m128d_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm_i32gather_pd(base_addr, vindex, scale) _mm_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_i32_gather_pd #define _mm_i32gather_pd(base_addr, vindex, scale) simde_mm_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_mask_i32gather_pd(simde__m128d src, const simde_float64* base_addr, simde__m128i vindex, simde__m128d mask, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m128i_private vindex_ = simde__m128i_to_private(vindex); simde__m128d_private src_ = simde__m128d_to_private(src), mask_ = simde__m128d_to_private(mask), r_; const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { if ((mask_.i64[i] >> 63) & 1) { const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); simde_float64 dst; simde_memcpy(&dst, src1, sizeof(dst)); r_.f64[i] = dst; } else { r_.f64[i] = src_.f64[i]; } } return simde__m128d_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm_mask_i32gather_pd(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_mask_i32gather_pd #define _mm_mask_i32gather_pd(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_i32gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m128i_private vindex_ = simde__m128i_to_private(vindex); simde__m256d_private r_; const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); simde_float64 dst; simde_memcpy(&dst, src, sizeof(dst)); r_.f64[i] = dst; } return simde__m256d_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm256_i32gather_pd(base_addr, vindex, scale) _mm256_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_i32_gather_pd #define _mm256_i32gather_pd(base_addr, vindex, scale) simde_mm256_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_mask_i32gather_pd(simde__m256d src, const simde_float64* base_addr, simde__m128i vindex, simde__m256d mask, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m256d_private src_ = simde__m256d_to_private(src), mask_ = simde__m256d_to_private(mask), r_; simde__m128i_private vindex_ = simde__m128i_to_private(vindex); const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { if ((mask_.i64[i] >> 63) & 1) { const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); simde_float64 dst; simde_memcpy(&dst, src1, sizeof(dst)); r_.f64[i] = dst; } else { r_.f64[i] = src_.f64[i]; } } return simde__m256d_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm256_mask_i32gather_pd(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_i32gather_pd #define _mm256_mask_i32gather_pd(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_i64gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m128i_private vindex_ = simde__m128i_to_private(vindex); simde__m128d_private r_ = simde__m128d_to_private(simde_mm_setzero_pd()); const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); simde_float64 dst; simde_memcpy(&dst, src, sizeof(dst)); r_.f64[i] = dst; } return simde__m128d_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm_i64gather_pd(base_addr, vindex, scale) _mm_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_i64_gather_pd #define _mm_i64gather_pd(base_addr, vindex, scale) simde_mm_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_mask_i64gather_pd(simde__m128d src, const simde_float64* base_addr, simde__m128i vindex, simde__m128d mask, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m128i_private vindex_ = simde__m128i_to_private(vindex); simde__m128d_private src_ = simde__m128d_to_private(src), mask_ = simde__m128d_to_private(mask), r_ = simde__m128d_to_private(simde_mm_setzero_pd()); const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { if ((mask_.i64[i] >> 63) & 1) { const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); simde_float64 dst; simde_memcpy(&dst, src1, sizeof(dst)); r_.f64[i] = dst; } else { r_.f64[i] = src_.f64[i]; } } return simde__m128d_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm_mask_i64gather_pd(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_mask_i64gather_pd #define _mm_mask_i64gather_pd(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_i64gather_pd(const simde_float64* base_addr, simde__m256i vindex, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m256i_private vindex_ = simde__m256i_to_private(vindex); simde__m256d_private r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); simde_float64 dst; simde_memcpy(&dst, src, sizeof(dst)); r_.f64[i] = dst; } return simde__m256d_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm256_i64gather_pd(base_addr, vindex, scale) _mm256_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_i64_gather_pd #define _mm256_i64gather_pd(base_addr, vindex, scale) simde_mm256_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_mask_i64gather_pd(simde__m256d src, const simde_float64* base_addr, simde__m256i vindex, simde__m256d mask, const int32_t scale) SIMDE_REQUIRE_CONSTANT(scale) HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { simde__m256i_private vindex_ = simde__m256i_to_private(vindex); simde__m256d_private src_ = simde__m256d_to_private(src), mask_ = simde__m256d_to_private(mask), r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { if ((mask_.i64[i] >> 63) & 1) { const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); simde_float64 dst; simde_memcpy(&dst, src1, sizeof(dst)); r_.f64[i] = dst; } else { r_.f64[i] = src_.f64[i]; } } return simde__m256d_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm256_mask_i64gather_pd(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_i64gather_pd #define _mm256_mask_i64gather_pd(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_inserti128_si256(simde__m256i a, simde__m128i b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { simde__m256i_private a_ = simde__m256i_to_private(a); simde__m128i_private b_ = simde__m128i_to_private(b); a_.m128i_private[ imm8 & 1 ] = b_; return simde__m256i_from_private(a_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm256_inserti128_si256(a, b, imm8) _mm256_inserti128_si256(a, b, imm8) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_inserti128_si256 #define _mm256_inserti128_si256(a, b, imm8) simde_mm256_inserti128_si256(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_madd_epi16 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_madd_epi16(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_madd_epi16(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_madd_epi16(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_madd_epi16 #define _mm256_madd_epi16(a, b) simde_mm256_madd_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_maddubs_epi16 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_maddubs_epi16(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_maddubs_epi16(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_maddubs_epi16(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { const int idx = HEDLEY_STATIC_CAST(int, i) << 1; int32_t ts = (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_maddubs_epi16 #define _mm256_maddubs_epi16(a, b) simde_mm256_maddubs_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskload_epi32 (const int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm_maskload_epi32(mem_addr, mask); #else simde__m128i_private mem_ = simde__m128i_to_private(simde_x_mm_loadu_epi32(mem_addr)), r_, mask_ = simde__m128i_to_private(mask); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vandq_s32(mem_.neon_i32, vshrq_n_s32(mask_.neon_i32, 31)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = mem_.i32[i] & (mask_.i32[i] >> 31); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_maskload_epi32 #define _mm_maskload_epi32(mem_addr, mask) simde_mm_maskload_epi32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr), mask) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_maskload_epi32 (const int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_maskload_epi32(mem_addr, mask); #else simde__m256i_private mask_ = simde__m256i_to_private(mask), r_ = simde__m256i_to_private(simde_x_mm256_loadu_epi32(mem_addr)); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] &= mask_.i32[i] >> 31; } return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_maskload_epi32 #define _mm256_maskload_epi32(mem_addr, mask) simde_mm256_maskload_epi32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr), mask) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskload_epi64 (const int64_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm_maskload_epi64(HEDLEY_REINTERPRET_CAST(const long long *, mem_addr), mask); #else simde__m128i_private mem_ = simde__m128i_to_private(simde_x_mm_loadu_epi64((mem_addr))), r_, mask_ = simde__m128i_to_private(mask); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vandq_s64(mem_.neon_i64, vshrq_n_s64(mask_.neon_i64, 63)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = mem_.i64[i] & (mask_.i64[i] >> 63); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_maskload_epi64 #define _mm_maskload_epi64(mem_addr, mask) simde_mm_maskload_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr), mask) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_maskload_epi64 (const int64_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_maskload_epi64(HEDLEY_REINTERPRET_CAST(const long long *, mem_addr), mask); #else simde__m256i_private mask_ = simde__m256i_to_private(mask), r_ = simde__m256i_to_private(simde_x_mm256_loadu_epi64((mem_addr))); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] &= mask_.i64[i] >> 63; } return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_maskload_epi64 #define _mm256_maskload_epi64(mem_addr, mask) simde_mm256_maskload_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr), mask) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_maskstore_epi32 (int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128i a) { #if defined(SIMDE_X86_AVX2_NATIVE) _mm_maskstore_epi32(mem_addr, mask, a); #else simde__m128i_private mask_ = simde__m128i_to_private(mask); simde__m128i_private a_ = simde__m128i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { if (mask_.u32[i] & (UINT32_C(1) << 31)) mem_addr[i] = a_.i32[i]; } #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_maskstore_epi32 #define _mm_maskstore_epi32(mem_addr, mask, a) simde_mm_maskstore_epi32(HEDLEY_REINTERPRET_CAST(int32_t *, mem_addr), mask, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm256_maskstore_epi32 (int32_t mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256i a) { #if defined(SIMDE_X86_AVX2_NATIVE) _mm256_maskstore_epi32(mem_addr, mask, a); #else simde__m256i_private mask_ = simde__m256i_to_private(mask); simde__m256i_private a_ = simde__m256i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { if (mask_.u32[i] & (UINT32_C(1) << 31)) mem_addr[i] = a_.i32[i]; } #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_maskstore_epi32 #define _mm256_maskstore_epi32(mem_addr, mask, a) simde_mm256_maskstore_epi32(HEDLEY_REINTERPRET_CAST(int32_t *, mem_addr), mask, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_maskstore_epi64 (int64_t mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128i a) { #if defined(SIMDE_X86_AVX2_NATIVE) _mm_maskstore_epi64(HEDLEY_REINTERPRET_CAST(long long *, mem_addr), mask, a); #else simde__m128i_private mask_ = simde__m128i_to_private(mask); simde__m128i_private a_ = simde__m128i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { if (mask_.u64[i] >> 63) mem_addr[i] = a_.i64[i]; } #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_maskstore_epi64 #define _mm_maskstore_epi64(mem_addr, mask, a) simde_mm_maskstore_epi64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), mask, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm256_maskstore_epi64 (int64_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256i a) { #if defined(SIMDE_X86_AVX2_NATIVE) _mm256_maskstore_epi64(HEDLEY_REINTERPRET_CAST(long long *, mem_addr), mask, a); #else simde__m256i_private mask_ = simde__m256i_to_private(mask); simde__m256i_private a_ = simde__m256i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { if (mask_.u64[i] & (UINT64_C(1) << 63)) mem_addr[i] = a_.i64[i]; } #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_maskstore_epi64 #define _mm256_maskstore_epi64(mem_addr, mask, a) simde_mm256_maskstore_epi64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), mask, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_max_epi8 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) && !defined(__PGI) return _mm256_max_epi8(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_max_epi8(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_max_epi8(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_max_epi8 #define _mm256_max_epi8(a, b) simde_mm256_max_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_max_epu8 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_max_epu8(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_max_epu8(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_max_epu8(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_max_epu8 #define _mm256_max_epu8(a, b) simde_mm256_max_epu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_max_epu16 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_max_epu16(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_max_epu16(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_max_epu16(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = (a_.u16[i] > b_.u16[i]) ? a_.u16[i] : b_.u16[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_max_epu16 #define _mm256_max_epu16(a, b) simde_mm256_max_epu16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_max_epu32 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_max_epu32(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_max_epu32(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_max_epu32(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = (a_.u32[i] > b_.u32[i]) ? a_.u32[i] : b_.u32[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_max_epu32 #define _mm256_max_epu32(a, b) simde_mm256_max_epu32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_max_epi16 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_max_epi16(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_max_epi16(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_max_epi16(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_max_epi16 #define _mm256_max_epi16(a, b) simde_mm256_max_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_max_epi32 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_max_epi32(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_max_epi32(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_max_epi32(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_max_epi32 #define _mm256_max_epi32(a, b) simde_mm256_max_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_min_epi8 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) && !defined(__PGI) return _mm256_min_epi8(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_min_epi8(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_min_epi8(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_min_epi8 #define _mm256_min_epi8(a, b) simde_mm256_min_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_min_epi16 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_min_epi16(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_min_epi16(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_min_epi16(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_min_epi16 #define _mm256_min_epi16(a, b) simde_mm256_min_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_min_epi32 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_min_epi32(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_min_epi32(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_min_epi32(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_min_epi32 #define _mm256_min_epi32(a, b) simde_mm256_min_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_min_epu8 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_min_epu8(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_min_epu8(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_min_epu8(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_min_epu8 #define _mm256_min_epu8(a, b) simde_mm256_min_epu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_min_epu16 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_min_epu16(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_min_epu16(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_min_epu16(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? a_.u16[i] : b_.u16[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_min_epu16 #define _mm256_min_epu16(a, b) simde_mm256_min_epu16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_min_epu32 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_min_epu32(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_min_epu32(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_min_epu32(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? a_.u32[i] : b_.u32[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_min_epu32 #define _mm256_min_epu32(a, b) simde_mm256_min_epu32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_mm256_movemask_epi8 (simde__m256i a) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_movemask_epi8(a); #else simde__m256i_private a_ = simde__m256i_to_private(a); uint32_t r = 0; #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { r |= HEDLEY_STATIC_CAST(uint32_t,simde_mm_movemask_epi8(a_.m128i[i])) << (16 * i); } #else r = 0; SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { r |= HEDLEY_STATIC_CAST(uint32_t, (a_.u8[31 - i] >> 7)) << (31 - i); } #endif return HEDLEY_STATIC_CAST(int32_t, r); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_movemask_epi8 #define _mm256_movemask_epi8(a) simde_mm256_movemask_epi8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mpsadbw_epu8 (simde__m256i a, simde__m256i b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); const int a_offset1 = imm8 & 4; const int b_offset1 = (imm8 & 3) << 2; const int a_offset2 = (imm8 >> 3) & 4; const int b_offset2 = ((imm8 >> 3) & 3) << 2; #if defined(simde_math_abs) const int halfway_point = HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0])) ) / 2; for (int i = 0 ; i < halfway_point ; i++) { r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 0] - b_.u8[b_offset1 + 0]))) + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 1] - b_.u8[b_offset1 + 1]))) + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 2] - b_.u8[b_offset1 + 2]))) + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 3] - b_.u8[b_offset1 + 3]))); r_.u16[halfway_point + i] = HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 0] - b_.u8[2 * halfway_point + b_offset2 + 0]))) + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 1] - b_.u8[2 * halfway_point + b_offset2 + 1]))) + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 2] - b_.u8[2 * halfway_point + b_offset2 + 2]))) + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 3] - b_.u8[2 * halfway_point + b_offset2 + 3]))); } #else HEDLEY_UNREACHABLE(); #endif return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) && SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) #define simde_mm256_mpsadbw_epu8(a, b, imm8) _mm256_mpsadbw_epu8(a, b, imm8) #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) #define simde_mm256_mpsadbw_epu8(a, b, imm8) \ simde_mm256_set_m128i( \ simde_mm_mpsadbw_epu8(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8 >> 3)), \ simde_mm_mpsadbw_epu8(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8))) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_mpsadbw_epu8 #define _mm256_mpsadbw_epu8(a, b, imm8) simde_mm256_mpsadbw_epu8(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mul_epi32 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_mul_epi32(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_mul_epi32(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_mul_epi32(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) # define _mm256_mul_epi32(a, b) simde_mm256_mul_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mul_epu32 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_mul_epu32(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_mul_epu32(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_mul_epu32(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) # define _mm256_mul_epu32(a, b) simde_mm256_mul_epu32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mulhi_epi16 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_mulhi_epi16(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); } return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) # define _mm256_mulhi_epi16(a, b) simde_mm256_mulhi_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mulhi_epu16 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_mulhi_epu16(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); } return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) # define _mm256_mulhi_epu16(a, b) simde_mm256_mulhi_epu16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mulhrs_epi16 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_mulhrs_epi16(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); } return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) # define _mm256_mulhrs_epi16(a, b) simde_mm256_mulhrs_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mullo_epi16 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_mullo_epi16(a, b); #else simde__m256i_private a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] * b_.i16[i]); } return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_mullo_epi16 #define _mm256_mullo_epi16(a, b) simde_mm256_mullo_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mullo_epi32 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_mullo_epi32(a, b); #else simde__m256i_private a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] * b_.i32[i]); } return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_mullo_epi32 #define _mm256_mullo_epi32(a, b) simde_mm256_mullo_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_x_mm256_mullo_epu32 (simde__m256i a, simde__m256i b) { simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u32 = a_.u32 * b_.u32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i] * b_.u32[i]; } #endif return simde__m256i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_or_si256 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_or_si256(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_or_si128(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_or_si128(a_.m128i[1], b_.m128i[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f | b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_or_si256 #define _mm256_or_si256(a, b) simde_mm256_or_si256(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_packs_epi16 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_packs_epi16(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_packs_epi16(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_packs_epi16(a_.m128i[1], b_.m128i[1]); #else const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0]))/2; const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0]))/4; SIMDE_VECTORIZE for (size_t i = 0 ; i < quarter_point ; i++) { r_.i8[i] = (a_.i16[i] > INT8_MAX) ? INT8_MAX : ((a_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[i])); r_.i8[i + quarter_point] = (b_.i16[i] > INT8_MAX) ? INT8_MAX : ((b_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[i])); r_.i8[halfway_point + i] = (a_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((a_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[quarter_point + i])); r_.i8[halfway_point + i + quarter_point] = (b_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((b_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[quarter_point + i])); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_packs_epi16 #define _mm256_packs_epi16(a, b) simde_mm256_packs_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_packs_epi32 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_packs_epi32(a, b); #else simde__m256i_private r_, v_[] = { simde__m256i_to_private(a), simde__m256i_to_private(b) }; #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_packs_epi32(v_[0].m128i[0], v_[1].m128i[0]); r_.m128i[1] = simde_mm_packs_epi32(v_[0].m128i[1], v_[1].m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { const int32_t v = v_[(i >> 2) & 1].i32[(i & 11) - ((i & 8) >> 1)]; r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (v > INT16_MAX) ? INT16_MAX : ((v < INT16_MIN) ? INT16_MIN : v)); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_packs_epi32 #define _mm256_packs_epi32(a, b) simde_mm256_packs_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_packus_epi16 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_packus_epi16(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_packus_epi16(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_packus_epi16(a_.m128i[1], b_.m128i[1]); #else const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 2; const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 4; SIMDE_VECTORIZE for (size_t i = 0 ; i < quarter_point ; i++) { r_.u8[i] = (a_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[i])); r_.u8[i + quarter_point] = (b_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[i])); r_.u8[halfway_point + i] = (a_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[quarter_point + i])); r_.u8[halfway_point + i + quarter_point] = (b_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[quarter_point + i])); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_packus_epi16 #define _mm256_packus_epi16(a, b) simde_mm256_packus_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_packus_epi32 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_packus_epi32(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_packus_epi32(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_packus_epi32(a_.m128i[1], b_.m128i[1]); #else const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; SIMDE_VECTORIZE for (size_t i = 0 ; i < quarter_point ; i++) { r_.u16[i] = (a_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[i])); r_.u16[i + quarter_point] = (b_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[i])); r_.u16[halfway_point + i] = (a_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[quarter_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[quarter_point + i])); r_.u16[halfway_point + i + quarter_point] = (b_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[quarter_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[quarter_point + i])); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_packus_epi32 #define _mm256_packus_epi32(a, b) simde_mm256_packus_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_permute2x128_si256 (simde__m256i a, simde__m256i b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) # define simde_mm256_permute2x128_si256(a, b, imm8) _mm256_permute2x128_si256(a, b, imm8) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_permute2x128_si256 #define _mm256_permute2x128_si256(a, b, imm8) simde_mm256_permute2x128_si256(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_permute4x64_epi64 (simde__m256i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m256i_private r_, a_ = simde__m256i_to_private(a); r_.i64[0] = (imm8 & 0x02) ? a_.i64[((imm8 ) & 1)+2] : a_.i64[(imm8 ) & 1]; r_.i64[1] = (imm8 & 0x08) ? a_.i64[((imm8 >> 2 ) & 1)+2] : a_.i64[(imm8 >> 2 ) & 1]; r_.i64[2] = (imm8 & 0x20) ? a_.i64[((imm8 >> 4 ) & 1)+2] : a_.i64[(imm8 >> 4 ) & 1]; r_.i64[3] = (imm8 & 0x80) ? a_.i64[((imm8 >> 6 ) & 1)+2] : a_.i64[(imm8 >> 6 ) & 1]; return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) # define simde_mm256_permute4x64_epi64(a, imm8) _mm256_permute4x64_epi64(a, imm8) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_permute4x64_epi64 #define _mm256_permute4x64_epi64(a, imm8) simde_mm256_permute4x64_epi64(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_permute4x64_pd (simde__m256d a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m256d_private r_, a_ = simde__m256d_to_private(a); r_.f64[0] = (imm8 & 0x02) ? a_.f64[((imm8 ) & 1)+2] : a_.f64[(imm8 ) & 1]; r_.f64[1] = (imm8 & 0x08) ? a_.f64[((imm8 >> 2 ) & 1)+2] : a_.f64[(imm8 >> 2 ) & 1]; r_.f64[2] = (imm8 & 0x20) ? a_.f64[((imm8 >> 4 ) & 1)+2] : a_.f64[(imm8 >> 4 ) & 1]; r_.f64[3] = (imm8 & 0x80) ? a_.f64[((imm8 >> 6 ) & 1)+2] : a_.f64[(imm8 >> 6 ) & 1]; return simde__m256d_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) # define simde_mm256_permute4x64_pd(a, imm8) _mm256_permute4x64_pd(a, imm8) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_permute4x64_pd #define _mm256_permute4x64_pd(a, imm8) simde_mm256_permute4x64_pd(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_permutevar8x32_epi32 (simde__m256i a, simde__m256i idx) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_permutevar8x32_epi32(a, idx); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), idx_ = simde__m256i_to_private(idx); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[idx_.i32[i] & 7]; } return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_permutevar8x32_epi32 #define _mm256_permutevar8x32_epi32(a, idx) simde_mm256_permutevar8x32_epi32(a, idx) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_permutevar8x32_ps (simde__m256 a, simde__m256i idx) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_permutevar8x32_ps(a, idx); #else simde__m256_private r_, a_ = simde__m256_to_private(a); simde__m256i_private idx_ = simde__m256i_to_private(idx); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = a_.f32[idx_.i32[i] & 7]; } return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_permutevar8x32_ps #define _mm256_permutevar8x32_ps(a, idx) simde_mm256_permutevar8x32_ps(a, idx) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_sad_epu8 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_sad_epu8(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_sad_epu8(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_sad_epu8(a_.m128i[1], b_.m128i[1]); #else for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { uint16_t tmp = 0; SIMDE_VECTORIZE_REDUCTION(+:tmp) for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 4) ; j++) { const size_t e = j + (i * 8); tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); } r_.i64[i] = tmp; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_sad_epu8 #define _mm256_sad_epu8(a, b) simde_mm256_sad_epu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_shuffle_epi8 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_shuffle_epi8(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_shuffle_epi8(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_shuffle_epi8(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; i++) { r_.u8[ i ] = (b_.u8[ i ] & 0x80) ? 0 : a_.u8[(b_.u8[ i ] & 0x0f) ]; r_.u8[i + 16] = (b_.u8[i + 16] & 0x80) ? 0 : a_.u8[(b_.u8[i + 16] & 0x0f) + 16]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_shuffle_epi8 #define _mm256_shuffle_epi8(a, b) simde_mm256_shuffle_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_shuffle_epi32 (simde__m256i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m256i_private r_, a_ = simde__m256i_to_private(a); for (size_t i = 0 ; i < ((sizeof(r_.i32) / sizeof(r_.i32[0])) / 2) ; i++) { r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; } for (size_t i = 0 ; i < ((sizeof(r_.i32) / sizeof(r_.i32[0])) / 2) ; i++) { r_.i32[i + 4] = a_.i32[((imm8 >> (i * 2)) & 3) + 4]; } return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) # define simde_mm256_shuffle_epi32(a, imm8) _mm256_shuffle_epi32(a, imm8) #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && !defined(__PGI) # define simde_mm256_shuffle_epi32(a, imm8) \ simde_mm256_set_m128i( \ simde_mm_shuffle_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ simde_mm_shuffle_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) #elif defined(SIMDE_SHUFFLE_VECTOR_) # define simde_mm256_shuffle_epi32(a, imm8) (__extension__ ({ \ const simde__m256i_private simde__tmp_a_ = simde__m256i_to_private(a); \ simde__m256i_from_private((simde__m256i_private) { .i32 = \ SIMDE_SHUFFLE_VECTOR_(32, 32, \ (simde__tmp_a_).i32, \ (simde__tmp_a_).i32, \ ((imm8) ) & 3, \ ((imm8) >> 2) & 3, \ ((imm8) >> 4) & 3, \ ((imm8) >> 6) & 3, \ (((imm8) ) & 3) + 4, \ (((imm8) >> 2) & 3) + 4, \ (((imm8) >> 4) & 3) + 4, \ (((imm8) >> 6) & 3) + 4) }); })) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_shuffle_epi32 #define _mm256_shuffle_epi32(a, imm8) simde_mm256_shuffle_epi32(a, imm8) #endif #if defined(SIMDE_X86_AVX2_NATIVE) # define simde_mm256_shufflehi_epi16(a, imm8) _mm256_shufflehi_epi16(a, imm8) #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) # define simde_mm256_shufflehi_epi16(a, imm8) \ simde_mm256_set_m128i( \ simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) #elif defined(SIMDE_SHUFFLE_VECTOR_) # define simde_mm256_shufflehi_epi16(a, imm8) (__extension__ ({ \ const simde__m256i_private simde__tmp_a_ = simde__m256i_to_private(a); \ simde__m256i_from_private((simde__m256i_private) { .i16 = \ SIMDE_SHUFFLE_VECTOR_(16, 32, \ (simde__tmp_a_).i16, \ (simde__tmp_a_).i16, \ 0, 1, 2, 3, \ (((imm8) ) & 3) + 4, \ (((imm8) >> 2) & 3) + 4, \ (((imm8) >> 4) & 3) + 4, \ (((imm8) >> 6) & 3) + 4, \ 8, 9, 10, 11, \ ((((imm8) ) & 3) + 8 + 4), \ ((((imm8) >> 2) & 3) + 8 + 4), \ ((((imm8) >> 4) & 3) + 8 + 4), \ ((((imm8) >> 6) & 3) + 8 + 4) \ ) }); })) #else # define simde_mm256_shufflehi_epi16(a, imm8) \ simde_mm256_set_m128i( \ simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 1), imm8), \ simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 0), imm8)) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_shufflehi_epi16 #define _mm256_shufflehi_epi16(a, imm8) simde_mm256_shufflehi_epi16(a, imm8) #endif #if defined(SIMDE_X86_AVX2_NATIVE) # define simde_mm256_shufflelo_epi16(a, imm8) _mm256_shufflelo_epi16(a, imm8) #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) # define simde_mm256_shufflelo_epi16(a, imm8) \ simde_mm256_set_m128i( \ simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) #elif defined(SIMDE_SHUFFLE_VECTOR_) # define simde_mm256_shufflelo_epi16(a, imm8) (__extension__ ({ \ const simde__m256i_private simde__tmp_a_ = simde__m256i_to_private(a); \ simde__m256i_from_private((simde__m256i_private) { .i16 = \ SIMDE_SHUFFLE_VECTOR_(16, 32, \ (simde__tmp_a_).i16, \ (simde__tmp_a_).i16, \ (((imm8) ) & 3), \ (((imm8) >> 2) & 3), \ (((imm8) >> 4) & 3), \ (((imm8) >> 6) & 3), \ 4, 5, 6, 7, \ ((((imm8) ) & 3) + 8), \ ((((imm8) >> 2) & 3) + 8), \ ((((imm8) >> 4) & 3) + 8), \ ((((imm8) >> 6) & 3) + 8), \ 12, 13, 14, 15) }); })) #else # define simde_mm256_shufflelo_epi16(a, imm8) \ simde_mm256_set_m128i( \ simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 1), imm8), \ simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 0), imm8)) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_shufflelo_epi16 #define _mm256_shufflelo_epi16(a, imm8) simde_mm256_shufflelo_epi16(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_sign_epi8 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_sign_epi8(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = (b_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i]; } return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_sign_epi8 #define _mm256_sign_epi8(a, b) simde_mm256_sign_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_sign_epi16 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_sign_epi16(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (b_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i]; } return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_sign_epi16 #define _mm256_sign_epi16(a, b) simde_mm256_sign_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_sign_epi32(simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_sign_epi32(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { r_.i32[i] = (b_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i]; } return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_sign_epi32 #define _mm256_sign_epi32(a, b) simde_mm256_sign_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_sll_epi16 (simde__m256i a, simde__m128i count) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_sll_epi16(a, count); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_sll_epi16(a_.m128i[0], count); r_.m128i[1] = simde_mm_sll_epi16(a_.m128i[1], count); #else simde__m128i_private count_ = simde__m128i_to_private(count); uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); if (shift > 15) return simde_mm256_setzero_si256(); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, shift); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (shift)); } #endif #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_sll_epi16 #define _mm256_sll_epi16(a, count) simde_mm256_sll_epi16(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_sll_epi32 (simde__m256i a, simde__m128i count) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_sll_epi32(a, count); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_sll_epi32(a_.m128i[0], count); r_.m128i[1] = simde_mm_sll_epi32(a_.m128i[1], count); #else simde__m128i_private count_ = simde__m128i_to_private(count); uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); if (shift > 31) return simde_mm256_setzero_si256(); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, shift); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] << (shift)); } #endif #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_sll_epi32 #define _mm256_sll_epi32(a, count) simde_mm256_sll_epi32(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_sll_epi64 (simde__m256i a, simde__m128i count) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_sll_epi64(a, count); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_sll_epi64(a_.m128i[0], count); r_.m128i[1] = simde_mm_sll_epi64(a_.m128i[1], count); #else simde__m128i_private count_ = simde__m128i_to_private(count); uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); if (shift > 63) return simde_mm256_setzero_si256(); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, shift); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] << (shift)); } #endif #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_sll_epi64 #define _mm256_sll_epi64(a, count) simde_mm256_sll_epi64(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_slli_epi16 (simde__m256i a, const int imm8) SIMDE_REQUIRE_RANGE(imm8, 0, 255) { /* Note: There is no consistency in how compilers handle values outside of the expected range, hence the discrepancy between what we allow and what Intel specifies. Some compilers will return 0, others seem to just mask off everything outside of the range. */ simde__m256i_private r_, a_ = simde__m256i_to_private(a); #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned short, imm8)); for (size_t i = 0 ; i < (sizeof(a_.altivec_i16) / sizeof(a_.altivec_i16[0])) ; i++) { r_.altivec_i16[i] = vec_sl(a_.altivec_i16[i], sv); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, imm8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (imm8 & 0xff)); } #endif return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) # define simde_mm256_slli_epi16(a, imm8) _mm256_slli_epi16(a, imm8) #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) # define simde_mm256_slli_epi16(a, imm8) \ simde_mm256_set_m128i( \ simde_mm_slli_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ simde_mm_slli_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_slli_epi16 #define _mm256_slli_epi16(a, imm8) simde_mm256_slli_epi16(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_slli_epi32 (simde__m256i a, const int imm8) SIMDE_REQUIRE_RANGE(imm8, 0, 255) { simde__m256i_private r_, a_ = simde__m256i_to_private(a); #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8)); for (size_t i = 0 ; i < (sizeof(a_.altivec_i32) / sizeof(a_.altivec_i32[0])) ; i++) { r_.altivec_i32[i] = vec_sl(a_.altivec_i32[i], sv); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, imm8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] << (imm8 & 0xff); } #endif return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) # define simde_mm256_slli_epi32(a, imm8) _mm256_slli_epi32(a, imm8) #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) # define simde_mm256_slli_epi32(a, imm8) \ simde_mm256_set_m128i( \ simde_mm_slli_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ simde_mm_slli_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_slli_epi32 #define _mm256_slli_epi32(a, imm8) simde_mm256_slli_epi32(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_slli_epi64 (simde__m256i a, const int imm8) SIMDE_REQUIRE_RANGE(imm8, 0, 255) { simde__m256i_private r_, a_ = simde__m256i_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, imm8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[i] << (imm8 & 0xff); } #endif return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) # define simde_mm256_slli_epi64(a, imm8) _mm256_slli_epi64(a, imm8) #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) # define simde_mm256_slli_epi64(a, imm8) \ simde_mm256_set_m128i( \ simde_mm_slli_epi64(simde_mm256_extracti128_si256(a, 1), (imm8)), \ simde_mm_slli_epi64(simde_mm256_extracti128_si256(a, 0), (imm8))) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_slli_epi64 #define _mm256_slli_epi64(a, imm8) simde_mm256_slli_epi64(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_slli_si256 (simde__m256i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m256i_private r_, a_ = simde__m256i_to_private(a); for (size_t h = 0 ; h < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; h++) { SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { const int e = HEDLEY_STATIC_CAST(int, i) - imm8; r_.m128i_private[h].i8[i] = (e >= 0) ? a_.m128i_private[h].i8[e] : 0; } } return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) # define simde_mm256_slli_si256(a, imm8) _mm256_slli_si256(a, imm8) #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && !defined(__PGI) # define simde_mm256_slli_si256(a, imm8) \ simde_mm256_set_m128i( \ simde_mm_slli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ simde_mm_slli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) # define simde_mm256_slli_si256(a, imm8) \ simde_mm256_set_m128i( \ simde_mm_bslli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ simde_mm_bslli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_slli_si256 #define _mm256_slli_si256(a, imm8) simde_mm256_slli_si256(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_sllv_epi32 (simde__m128i a, simde__m128i b) { simde__m128i_private a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b), r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vshlq_u32(a_.neon_u32, vreinterpretq_s32_u32(b_.neon_u32)); r_.neon_u32 = vandq_u32(r_.neon_u32, vcltq_u32(b_.neon_u32, vdupq_n_u32(32))); #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u32 = HEDLEY_STATIC_CAST(__typeof__(r_.u32), (b_.u32 < 32) & (a_.u32 << b_.u32)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] << b_.u32[i]) : 0; } #endif return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm_sllv_epi32(a, b) _mm_sllv_epi32(a, b) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_sllv_epi32 #define _mm_sllv_epi32(a, b) simde_mm_sllv_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_sllv_epi32 (simde__m256i a, simde__m256i b) { simde__m256i_private a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b), r_; #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_sllv_epi32(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_sllv_epi32(a_.m128i[1], b_.m128i[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u32 = HEDLEY_STATIC_CAST(__typeof__(r_.u32), (b_.u32 < 32) & (a_.u32 << b_.u32)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] << b_.u32[i]) : 0; } #endif return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm256_sllv_epi32(a, b) _mm256_sllv_epi32(a, b) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_sllv_epi32 #define _mm256_sllv_epi32(a, b) simde_mm256_sllv_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_sllv_epi64 (simde__m128i a, simde__m128i b) { simde__m128i_private a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b), r_; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_u64 = vshlq_u64(a_.neon_u64, vreinterpretq_s64_u64(b_.neon_u64)); r_.neon_u64 = vandq_u64(r_.neon_u64, vcltq_u64(b_.neon_u64, vdupq_n_u64(64))); #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u64 = HEDLEY_STATIC_CAST(__typeof__(r_.u64), (b_.u64 < 64) & (a_.u64 << b_.u64)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] << b_.u64[i]) : 0; } #endif return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm_sllv_epi64(a, b) _mm_sllv_epi64(a, b) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_sllv_epi64 #define _mm_sllv_epi64(a, b) simde_mm_sllv_epi64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_sllv_epi64 (simde__m256i a, simde__m256i b) { simde__m256i_private a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b), r_; #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_sllv_epi64(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_sllv_epi64(a_.m128i[1], b_.m128i[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u64 = HEDLEY_STATIC_CAST(__typeof__(r_.u64), (b_.u64 < 64) & (a_.u64 << b_.u64)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] << b_.u64[i]) : 0; } #endif return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm256_sllv_epi64(a, b) _mm256_sllv_epi64(a, b) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_sllv_epi64 #define _mm256_sllv_epi64(a, b) simde_mm256_sllv_epi64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_sra_epi16 (simde__m256i a, simde__m128i count) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_sra_epi16(a, count); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_sra_epi16(a_.m128i[0], count); r_.m128i[1] = simde_mm_sra_epi16(a_.m128i[1], count); #else simde__m128i_private count_ = simde__m128i_to_private(count); uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); if (shift > 15) shift = 15; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[i] >> shift; } #endif #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_sra_epi16 #define _mm256_sra_epi16(a, count) simde_mm256_sra_epi16(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_sra_epi32 (simde__m256i a, simde__m128i count) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_sra_epi32(a, count); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_sra_epi32(a_.m128i[0], count); r_.m128i[1] = simde_mm_sra_epi32(a_.m128i[1], count); #else simde__m128i_private count_ = simde__m128i_to_private(count); uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); if (shift > 31) shift = 31; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i32 = a_.i32 >> HEDLEY_STATIC_CAST(int16_t, shift); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] >> shift; } #endif #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_sra_epi32 #define _mm256_sra_epi32(a, count) simde_mm256_sra_epi32(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_srai_epi16 (simde__m256i a, const int imm8) SIMDE_REQUIRE_RANGE(imm8, 0, 255) { simde__m256i_private r_, a_ = simde__m256i_to_private(a); unsigned int shift = HEDLEY_STATIC_CAST(unsigned int, imm8); if (shift > 15) shift = 15; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[i] >> shift; } #endif return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) # define simde_mm256_srai_epi16(a, imm8) _mm256_srai_epi16(a, imm8) #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) # define simde_mm256_srai_epi16(a, imm8) \ simde_mm256_set_m128i( \ simde_mm_srai_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ simde_mm_srai_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_srai_epi16 #define _mm256_srai_epi16(a, imm8) simde_mm256_srai_epi16(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_srai_epi32 (simde__m256i a, const int imm8) SIMDE_REQUIRE_RANGE(imm8, 0, 255) { simde__m256i_private r_, a_ = simde__m256i_to_private(a); unsigned int shift = HEDLEY_STATIC_CAST(unsigned int, imm8); if (shift > 31) shift = 31; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i32 = a_.i32 >> HEDLEY_STATIC_CAST(int16_t, shift); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] >> shift; } #endif return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) # define simde_mm256_srai_epi32(a, imm8) _mm256_srai_epi32(a, imm8) #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) # define simde_mm256_srai_epi32(a, imm8) \ simde_mm256_set_m128i( \ simde_mm_srai_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ simde_mm_srai_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_srai_epi32 #define _mm256_srai_epi32(a, imm8) simde_mm256_srai_epi32(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_srav_epi32 (simde__m128i a, simde__m128i count) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm_srav_epi32(a, count); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), count_ = simde__m128i_to_private(count); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) int32x4_t cnt = vreinterpretq_s32_u32(vminq_u32(count_.neon_u32, vdupq_n_u32(31))); r_.neon_i32 = vshlq_s32(a_.neon_i32, vnegq_s32(cnt)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { uint32_t shift = HEDLEY_STATIC_CAST(uint32_t, count_.i32[i]); r_.i32[i] = a_.i32[i] >> HEDLEY_STATIC_CAST(int, shift > 31 ? 31 : shift); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_srav_epi32 #define _mm_srav_epi32(a, count) simde_mm_srav_epi32(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_srav_epi32 (simde__m256i a, simde__m256i count) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_srav_epi32(a, count); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), count_ = simde__m256i_to_private(count); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_srav_epi32(a_.m128i[0], count_.m128i[0]); r_.m128i[1] = simde_mm_srav_epi32(a_.m128i[1], count_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { uint32_t shift = HEDLEY_STATIC_CAST(uint32_t, count_.i32[i]); if (shift > 31) shift = 31; r_.i32[i] = a_.i32[i] >> shift; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_srav_epi32 #define _mm256_srav_epi32(a, count) simde_mm256_srav_epi32(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_srl_epi16 (simde__m256i a, simde__m128i count) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_srl_epi16(a, count); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_srl_epi16(a_.m128i[0], count); r_.m128i[1] = simde_mm_srl_epi16(a_.m128i[1], count); #else simde__m128i_private count_ = simde__m128i_to_private(count); uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 16 ? 16 : count_.i64[0])); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(int16_t, shift); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.u16[i] = a_.u16[i] >> (shift); } #endif #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_srl_epi16 #define _mm256_srl_epi16(a, count) simde_mm256_srl_epi16(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_srl_epi32 (simde__m256i a, simde__m128i count) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_srl_epi32(a, count); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_srl_epi32(a_.m128i[0], count); r_.m128i[1] = simde_mm_srl_epi32(a_.m128i[1], count); #else simde__m128i_private count_ = simde__m128i_to_private(count); uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 32 ? 32 : count_.i64[0])); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u32 = a_.u32 >> HEDLEY_STATIC_CAST(int32_t, shift); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.u32[i] = a_.u32[i] >> (shift); } #endif #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_srl_epi32 #define _mm256_srl_epi32(a, count) simde_mm256_srl_epi32(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_srl_epi64 (simde__m256i a, simde__m128i count) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_srl_epi64(a, count); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_srl_epi64(a_.m128i[0], count); r_.m128i[1] = simde_mm_srl_epi64(a_.m128i[1], count); #else simde__m128i_private count_ = simde__m128i_to_private(count); uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 64 ? 64 : count_.i64[0])); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u64 = a_.u64 >> HEDLEY_STATIC_CAST(int64_t, shift); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.u64[i] = a_.u64[i] >> (shift); } #endif #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_srl_epi64 #define _mm256_srl_epi64(a, count) simde_mm256_srl_epi64(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_srli_epi16 (simde__m256i a, const int imm8) SIMDE_REQUIRE_RANGE(imm8, 0, 255) { simde__m256i_private r_, a_ = simde__m256i_to_private(a); if (imm8 > 15) return simde_mm256_setzero_si256(); #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned short, imm8)); for (size_t i = 0 ; i < (sizeof(a_.altivec_u16) / sizeof(a_.altivec_u16[0])) ; i++) { r_.altivec_u16[i] = vec_sr(a_.altivec_u16[i], sv); } #else if (HEDLEY_STATIC_CAST(unsigned int, imm8) > 15) { simde_memset(&r_, 0, sizeof(r_)); } else { #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(int16_t, imm8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = a_.u16[i] >> imm8; } #endif } #endif return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) # define simde_mm256_srli_epi16(a, imm8) _mm256_srli_epi16(a, imm8) #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) # define simde_mm256_srli_epi16(a, imm8) \ simde_mm256_set_m128i( \ simde_mm_srli_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ simde_mm_srli_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_srli_epi16 #define _mm256_srli_epi16(a, imm8) simde_mm256_srli_epi16(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_srli_epi32 (simde__m256i a, const int imm8) SIMDE_REQUIRE_RANGE(imm8, 0, 255) { simde__m256i_private r_, a_ = simde__m256i_to_private(a); #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8)); for (size_t i = 0 ; i < (sizeof(a_.altivec_u32) / sizeof(a_.altivec_u32[0])) ; i++) { r_.altivec_u32[i] = vec_sr(a_.altivec_u32[i], sv); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u32 = a_.u32 >> HEDLEY_STATIC_CAST(int16_t, imm8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i] >> imm8; } #endif return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) # define simde_mm256_srli_epi32(a, imm8) _mm256_srli_epi32(a, imm8) #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) # define simde_mm256_srli_epi32(a, imm8) \ simde_mm256_set_m128i( \ simde_mm_srli_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ simde_mm_srli_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_srli_epi32 #define _mm256_srli_epi32(a, imm8) simde_mm256_srli_epi32(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_srli_epi64 (simde__m256i a, const int imm8) SIMDE_REQUIRE_RANGE(imm8, 0, 255) { simde__m256i_private r_, a_ = simde__m256i_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u64 = a_.u64 >> HEDLEY_STATIC_CAST(int32_t, imm8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = a_.u64[i] >> imm8; } #endif return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) # define simde_mm256_srli_epi64(a, imm8) _mm256_srli_epi64(a, imm8) #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) # define simde_mm256_srli_epi64(a, imm8) \ simde_mm256_set_m128i( \ simde_mm_srli_epi64(simde_mm256_extracti128_si256(a, 1), (imm8)), \ simde_mm_srli_epi64(simde_mm256_extracti128_si256(a, 0), (imm8))) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_srli_epi64 #define _mm256_srli_epi64(a, imm8) simde_mm256_srli_epi64(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_srli_si256 (simde__m256i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m256i_private r_, a_ = simde__m256i_to_private(a); for (size_t h = 0 ; h < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; h++) { SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { const int e = imm8 + HEDLEY_STATIC_CAST(int, i); r_.m128i_private[h].i8[i] = (e < 16) ? a_.m128i_private[h].i8[e] : 0; } } return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) # define simde_mm256_srli_si256(a, imm8) _mm256_srli_si256(a, imm8) #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && !defined(__PGI) # define simde_mm256_srli_si256(a, imm8) \ simde_mm256_set_m128i( \ simde_mm_srli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ simde_mm_srli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) # define simde_mm256_srli_si256(a, imm8) \ simde_mm256_set_m128i( \ simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_srli_si256 #define _mm256_srli_si256(a, imm8) simde_mm256_srli_si256(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_srlv_epi32 (simde__m128i a, simde__m128i b) { simde__m128i_private a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b), r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u32 = HEDLEY_STATIC_CAST(__typeof__(r_.u32), (b_.u32 < 32) & (a_.u32 >> b_.u32)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] >> b_.u32[i]) : 0; } #endif return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm_srlv_epi32(a, b) _mm_srlv_epi32(a, b) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_srlv_epi32 #define _mm_srlv_epi32(a, b) simde_mm_srlv_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_srlv_epi32 (simde__m256i a, simde__m256i b) { simde__m256i_private a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b), r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u32 = HEDLEY_STATIC_CAST(__typeof__(r_.u32), (b_.u32 < 32) & (a_.u32 >> b_.u32)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] >> b_.u32[i]) : 0; } #endif return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm256_srlv_epi32(a, b) _mm256_srlv_epi32(a, b) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_srlv_epi32 #define _mm256_srlv_epi32(a, b) simde_mm256_srlv_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_srlv_epi64 (simde__m128i a, simde__m128i b) { simde__m128i_private a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b), r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u64 = HEDLEY_STATIC_CAST(__typeof__(r_.u64), (b_.u64 < 64) & (a_.u64 >> b_.u64)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] >> b_.u64[i]) : 0; } #endif return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm_srlv_epi64(a, b) _mm_srlv_epi64(a, b) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm_srlv_epi64 #define _mm_srlv_epi64(a, b) simde_mm_srlv_epi64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_srlv_epi64 (simde__m256i a, simde__m256i b) { simde__m256i_private a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b), r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u64 = HEDLEY_STATIC_CAST(__typeof__(r_.u64), (b_.u64 < 64) & (a_.u64 >> b_.u64)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] >> b_.u64[i]) : 0; } #endif return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX2_NATIVE) #define simde_mm256_srlv_epi64(a, b) _mm256_srlv_epi64(a, b) #endif #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_srlv_epi64 #define _mm256_srlv_epi64(a, b) simde_mm256_srlv_epi64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_stream_load_si256 (const simde__m256i* mem_addr) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_stream_load_si256(HEDLEY_CONST_CAST(simde__m256i*, mem_addr)); #else simde__m256i r; simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); return r; #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) # define _mm256_stream_load_si256(mem_addr) simde_mm256_stream_load_si256(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_sub_epi8 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_sub_epi8(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_sub_epi8(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_sub_epi8(a_.m128i[1], b_.m128i[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = a_.i8 - b_.i8; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a_.i8[i] - b_.i8[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_sub_epi8 #define _mm256_sub_epi8(a, b) simde_mm256_sub_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_sub_epi16 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_sub_epi16(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_sub_epi16(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_sub_epi16(a_.m128i[1], b_.m128i[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = a_.i16 - b_.i16; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[i] - b_.i16[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_sub_epi16 #define _mm256_sub_epi16(a, b) simde_mm256_sub_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_hsub_epi16 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_hsub_epi16(a, b); #else return simde_mm256_sub_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_hsub_epi16 #define _mm256_hsub_epi16(a, b) simde_mm256_hsub_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_sub_epi32 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_sub_epi32(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_sub_epi32(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_sub_epi32(a_.m128i[1], b_.m128i[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = a_.i32 - b_.i32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] - b_.i32[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_sub_epi32 #define _mm256_sub_epi32(a, b) simde_mm256_sub_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_hsub_epi32 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_hsub_epi32(a, b); #else return simde_mm256_sub_epi32(simde_x_mm256_deinterleaveeven_epi32(a, b), simde_x_mm256_deinterleaveodd_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_hsub_epi32 #define _mm256_hsub_epi32(a, b) simde_mm256_hsub_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_sub_epi64 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_sub_epi64(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_sub_epi64(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_sub_epi64(a_.m128i[1], b_.m128i[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 - b_.i64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[i] - b_.i64[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_sub_epi64 #define _mm256_sub_epi64(a, b) simde_mm256_sub_epi64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_x_mm256_sub_epu32 (simde__m256i a, simde__m256i b) { simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u32 = a_.u32 - b_.u32; #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_x_mm_sub_epu32(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_x_mm_sub_epu32(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i] - b_.u32[i]; } #endif return simde__m256i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_subs_epi8 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_subs_epi8(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_subs_epi8(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_subs_epi8(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_subs_epi8 #define _mm256_subs_epi8(a, b) simde_mm256_subs_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_subs_epi16(simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_subs_epi16(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_subs_epi16(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_subs_epi16(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_subs_epi16 #define _mm256_subs_epi16(a, b) simde_mm256_subs_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_hsubs_epi16 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_hsubs_epi16(a, b); #else return simde_mm256_subs_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_hsubs_epi16 #define _mm256_hsubs_epi16(a, b) simde_mm256_hsubs_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_subs_epu8 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_subs_epu8(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_subs_epu8(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_subs_epu8(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_subs_epu8 #define _mm256_subs_epu8(a, b) simde_mm256_subs_epu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_subs_epu16(simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_subs_epu16(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_subs_epu16(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_subs_epu16(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_subs_epu16 #define _mm256_subs_epu16(a, b) simde_mm256_subs_epu16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_x_mm256_test_all_ones (simde__m256i a) { simde__m256i_private a_ = simde__m256i_to_private(a); int r; int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); SIMDE_VECTORIZE_REDUCTION(&:r_) for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { r_ &= a_.i32f[i]; } r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); return r; } SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_unpacklo_epi8 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_unpacklo_epi8(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 32, a_.i8, b_.i8, 0, 32, 1, 33, 2, 34, 3, 35, 4, 36, 5, 37, 6, 38, 7, 39, 16, 48, 17, 49, 18, 50, 19, 51, 20, 52, 21, 53, 22, 54, 23, 55); #else r_.m128i[0] = simde_mm_unpacklo_epi8(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_unpacklo_epi8(a_.m128i[1], b_.m128i[1]); #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_unpacklo_epi8 #define _mm256_unpacklo_epi8(a, b) simde_mm256_unpacklo_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_unpacklo_epi16 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_unpacklo_epi16(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.i16 =SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27); #else r_.m128i[0] = simde_mm_unpacklo_epi16(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_unpacklo_epi16(a_.m128i[1], b_.m128i[1]); #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_unpacklo_epi16 #define _mm256_unpacklo_epi16(a, b) simde_mm256_unpacklo_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_unpacklo_epi32 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_unpacklo_epi32(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 8, 1, 9, 4, 12, 5, 13); #else r_.m128i[0] = simde_mm_unpacklo_epi32(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_unpacklo_epi32(a_.m128i[1], b_.m128i[1]); #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_unpacklo_epi32 #define _mm256_unpacklo_epi32(a, b) simde_mm256_unpacklo_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_unpacklo_epi64 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_unpacklo_epi64(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.i64, b_.i64, 0, 4, 2, 6); #else r_.m128i[0] = simde_mm_unpacklo_epi64(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_unpacklo_epi64(a_.m128i[1], b_.m128i[1]); #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_unpacklo_epi64 #define _mm256_unpacklo_epi64(a, b) simde_mm256_unpacklo_epi64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_unpackhi_epi8 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_unpackhi_epi8(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 32, a_.i8, b_.i8, 8, 40, 9, 41, 10, 42, 11, 43, 12, 44, 13, 45, 14, 46, 15, 47, 24, 56, 25, 57, 26, 58, 27, 59, 28, 60, 29, 61, 30, 62, 31, 63); #else r_.m128i[0] = simde_mm_unpackhi_epi8(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_unpackhi_epi8(a_.m128i[1], b_.m128i[1]); #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_unpackhi_epi8 #define _mm256_unpackhi_epi8(a, b) simde_mm256_unpackhi_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_unpackhi_epi16 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_unpackhi_epi16(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31); #else r_.m128i[0] = simde_mm_unpackhi_epi16(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_unpackhi_epi16(a_.m128i[1], b_.m128i[1]); #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_unpackhi_epi16 #define _mm256_unpackhi_epi16(a, b) simde_mm256_unpackhi_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_unpackhi_epi32 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_unpackhi_epi32(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 2, 10, 3, 11, 6, 14, 7, 15); #else r_.m128i[0] = simde_mm_unpackhi_epi32(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_unpackhi_epi32(a_.m128i[1], b_.m128i[1]); #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_unpackhi_epi32 #define _mm256_unpackhi_epi32(a, b) simde_mm256_unpackhi_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_unpackhi_epi64 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_unpackhi_epi64(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.i64, b_.i64, 1, 5, 3, 7); #else r_.m128i[0] = simde_mm_unpackhi_epi64(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_unpackhi_epi64(a_.m128i[1], b_.m128i[1]); #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_unpackhi_epi64 #define _mm256_unpackhi_epi64(a, b) simde_mm256_unpackhi_epi64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_xor_si256 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) return _mm256_xor_si256(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_xor_si128(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_xor_si128(a_.m128i[1], b_.m128i[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f ^ b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[i] ^ b_.i64[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) #undef _mm256_xor_si256 #define _mm256_xor_si256(a, b) simde_mm256_xor_si256(a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX2_H) */ simde-0.7.2/simde/x86/avx512.h000066400000000000000000000062331400333146700155530ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_X86_AVX512_H) #define SIMDE_X86_AVX512_H #include "avx512/types.h" #include "avx512/2intersect.h" #include "avx512/abs.h" #include "avx512/add.h" #include "avx512/adds.h" #include "avx512/and.h" #include "avx512/andnot.h" #include "avx512/avg.h" #include "avx512/blend.h" #include "avx512/broadcast.h" #include "avx512/cast.h" #include "avx512/cmp.h" #include "avx512/cmpeq.h" #include "avx512/cmpge.h" #include "avx512/cmpgt.h" #include "avx512/cmple.h" #include "avx512/cmplt.h" #include "avx512/copysign.h" #include "avx512/cvt.h" #include "avx512/cvts.h" #include "avx512/div.h" #include "avx512/extract.h" #include "avx512/fmadd.h" #include "avx512/fmsub.h" #include "avx512/fnmadd.h" #include "avx512/fnmsub.h" #include "avx512/insert.h" #include "avx512/kshift.h" #include "avx512/load.h" #include "avx512/loadu.h" #include "avx512/lzcnt.h" #include "avx512/madd.h" #include "avx512/maddubs.h" #include "avx512/max.h" #include "avx512/min.h" #include "avx512/mov.h" #include "avx512/mov_mask.h" #include "avx512/movm.h" #include "avx512/mul.h" #include "avx512/mulhi.h" #include "avx512/mulhrs.h" #include "avx512/mullo.h" #include "avx512/negate.h" #include "avx512/or.h" #include "avx512/packs.h" #include "avx512/packus.h" #include "avx512/permutexvar.h" #include "avx512/permutex2var.h" #include "avx512/sad.h" #include "avx512/set.h" #include "avx512/set1.h" #include "avx512/set4.h" #include "avx512/setr.h" #include "avx512/setr4.h" #include "avx512/setzero.h" #include "avx512/setone.h" #include "avx512/shuffle.h" #include "avx512/sll.h" #include "avx512/slli.h" #include "avx512/sllv.h" #include "avx512/sqrt.h" #include "avx512/sra.h" #include "avx512/srai.h" #include "avx512/srav.h" #include "avx512/srl.h" #include "avx512/srli.h" #include "avx512/srlv.h" #include "avx512/store.h" #include "avx512/storeu.h" #include "avx512/sub.h" #include "avx512/subs.h" #include "avx512/test.h" #include "avx512/unpacklo.h" #include "avx512/unpackhi.h" #include "avx512/xor.h" #include "avx512/xorsign.h" #endif simde-0.7.2/simde/x86/avx512/000077500000000000000000000000001400333146700153765ustar00rootroot00000000000000simde-0.7.2/simde/x86/avx512/2intersect.h000066400000000000000000000175321400333146700176410ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Ashleigh Newman-Jones */ #if !defined(SIMDE_X86_AVX512_2INTERSECT_H) #define SIMDE_X86_AVX512_2INTERSECT_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES void simde_mm_2intersect_epi32(simde__m128i a, simde__m128i b, simde__mmask8 *k1, simde__mmask8 *k2) { #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) _mm_2intersect_epi32(a, b, k1, k2); #else simde__m128i_private a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); simde__mmask8 k1_ = 0, k2_ = 0; for (size_t i = 0 ; i < sizeof(a_.i32) / sizeof(a_.i32[0]) ; i++) { #if defined(SIMDE_ENABLE_OPENMP) #pragma omp simd reduction(|:k1_) reduction(|:k2_) #else SIMDE_VECTORIZE #endif for (size_t j = 0 ; j < sizeof(b_.i32) / sizeof(b_.i32[0]) ; j++) { const int32_t m = a_.i32[i] == b_.i32[j]; k1_ |= m << i; k2_ |= m << j; } } *k1 = k1_; *k2 = k2_; #endif } #if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef __mm_2intersect_epi32 #define __mm_2intersect_epi32(a,b, k1, k2) simde_mm_2intersect_epi32(a, b, k1, k2) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_2intersect_epi64(simde__m128i a, simde__m128i b, simde__mmask8 *k1, simde__mmask8 *k2) { #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) _mm_2intersect_epi64(a, b, k1, k2); #else simde__m128i_private a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); simde__mmask8 k1_ = 0, k2_ = 0; for (size_t i = 0 ; i < sizeof(a_.i64) / sizeof(a_.i64[0]) ; i++) { #if defined(SIMDE_ENABLE_OPENMP) #pragma omp simd reduction(|:k1_) reduction(|:k2_) #else SIMDE_VECTORIZE #endif for (size_t j = 0 ; j < sizeof(b_.i64) / sizeof(b_.i64[0]) ; j++) { const int32_t m = a_.i64[i] == b_.i64[j]; k1_ |= m << i; k2_ |= m << j; } } *k1 = k1_; *k2 = k2_; #endif } #if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef __mm_2intersect_epi64 #define __mm_2intersect_epi64(a,b, k1, k2) simde_mm_2intersect_epi64(a, b, k1, k2) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm256_2intersect_epi32(simde__m256i a, simde__m256i b, simde__mmask8 *k1, simde__mmask8 *k2) { #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) _mm256_2intersect_epi32(a, b, k1, k2); #else simde__m256i_private a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); simde__mmask8 k1_ = 0, k2_ = 0; for (size_t i = 0 ; i < sizeof(a_.i32) / sizeof(a_.i32[0]) ; i++) { #if defined(SIMDE_ENABLE_OPENMP) #pragma omp simd reduction(|:k1_) reduction(|:k2_) #else SIMDE_VECTORIZE #endif for (size_t j = 0 ; j < sizeof(b_.i32) / sizeof(b_.i32[0]) ; j++) { const int32_t m = a_.i32[i] == b_.i32[j]; k1_ |= m << i; k2_ |= m << j; } } *k1 = k1_; *k2 = k2_; #endif } #if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_2intersect_epi32 #define _mm256_2intersect_epi32(a,b, k1, k2) simde_mm256_2intersect_epi32(a, b, k1, k2) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm256_2intersect_epi64(simde__m256i a, simde__m256i b, simde__mmask8 *k1, simde__mmask8 *k2) { #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) _mm256_2intersect_epi64(a, b, k1, k2); #else simde__m256i_private a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); simde__mmask8 k1_ = 0, k2_ = 0; for (size_t i = 0 ; i < sizeof(a_.i64) / sizeof(a_.i64[0]) ; i++) { #if defined(SIMDE_ENABLE_OPENMP) #pragma omp simd reduction(|:k1_) reduction(|:k2_) #else SIMDE_VECTORIZE #endif for (size_t j = 0 ; j < sizeof(b_.i64) / sizeof(b_.i64[0]) ; j++) { const int32_t m = a_.i64[i] == b_.i64[j]; k1_ |= m << i; k2_ |= m << j; } } *k1 = k1_; *k2 = k2_; #endif } #if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_2intersect_epi64 #define _mm256_2intersect_epi64(a,b, k1, k2) simde_mm256_2intersect_epi64(a, b, k1, k2) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm512_2intersect_epi32(simde__m512i a, simde__m512i b, simde__mmask16 *k1, simde__mmask16 *k2) { #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) _mm512_2intersect_epi32(a, b, k1, k2); #else simde__m512i_private a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); simde__mmask16 k1_ = 0, k2_ = 0; for (size_t i = 0 ; i < sizeof(a_.i32) / sizeof(a_.i32[0]) ; i++) { #if defined(SIMDE_ENABLE_OPENMP) #pragma omp simd reduction(|:k1_) reduction(|:k2_) #else SIMDE_VECTORIZE #endif for (size_t j = 0 ; j < sizeof(b_.i32) / sizeof(b_.i32[0]) ; j++) { const int32_t m = a_.i32[i] == b_.i32[j]; k1_ |= m << i; k2_ |= m << j; } } *k1 = k1_; *k2 = k2_; #endif } #if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) #undef _mm512_2intersect_epi32 #define _mm512_2intersect_epi32(a, b, k1, k2) simde_mm512_2intersect_epi32(a, b, k1, k2) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm512_2intersect_epi64(simde__m512i a, simde__m512i b, simde__mmask8 *k1, simde__mmask8 *k2) { #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) _mm512_2intersect_epi64(a, b, k1, k2); #else simde__m512i_private a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); simde__mmask8 k1_ = 0, k2_ = 0; for (size_t i = 0 ; i < sizeof(a_.i64) / sizeof(a_.i64[0]) ; i++) { #if defined(SIMDE_ENABLE_OPENMP) #pragma omp simd reduction(|:k1_) reduction(|:k2_) #else SIMDE_VECTORIZE #endif for (size_t j = 0 ; j < sizeof(b_.i64) / sizeof(b_.i64[0]) ; j++) { const int32_t m = a_.i64[i] == b_.i64[j]; k1_ |= m << i; k2_ |= m << j; } } *k1 = k1_; *k2 = k2_; #endif } #if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) #undef _mm512_2intersect_epi64 #define _mm512_2intersect_epi64(a, b, k1, k2) simde_mm512_2intersect_epi64(a, b, k1, k2) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_2INTERSECT_H) */ simde-0.7.2/simde/x86/avx512/abs.h000066400000000000000000000427721400333146700163300ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_ABS_H) #define SIMDE_X86_AVX512_ABS_H #include "types.h" #include "mov.h" #include "../avx2.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_abs_epi8(simde__m128i src, simde__mmask16 k, simde__m128i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm_mask_abs_epi8(src, k, a); #else return simde_mm_mask_mov_epi8(src, k, simde_mm_abs_epi8(a)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_abs_epi8 #define _mm_mask_abs_epi8(src, k, a) simde_mm_mask_abs_epi8(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskz_abs_epi8(simde__mmask16 k, simde__m128i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm_maskz_abs_epi8(k, a); #else return simde_mm_maskz_mov_epi8(k, simde_mm_abs_epi8(a)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_abs_epi8 #define _mm_maskz_abs_epi8(k, a) simde_mm_maskz_abs_epi8(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_abs_epi16(simde__m128i src, simde__mmask8 k, simde__m128i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm_mask_abs_epi16(src, k, a); #else return simde_mm_mask_mov_epi16(src, k, simde_mm_abs_epi16(a)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_abs_epi16 #define _mm_mask_abs_epi16(src, k, a) simde_mm_mask_abs_epi16(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskz_abs_epi16(simde__mmask8 k, simde__m128i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm_maskz_abs_epi16(k, a); #else return simde_mm_maskz_mov_epi16(k, simde_mm_abs_epi16(a)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_abs_epi16 #define _mm_maskz_abs_epi16(k, a) simde_mm_maskz_abs_epi16(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_abs_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask_abs_epi32(src, k, a); #else return simde_mm_mask_mov_epi32(src, k, simde_mm_abs_epi32(a)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_abs_epi32 #define _mm_mask_abs_epi32(src, k, a) simde_mm_mask_abs_epi32(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskz_abs_epi32(simde__mmask8 k, simde__m128i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_maskz_abs_epi32(k, a); #else return simde_mm_maskz_mov_epi32(k, simde_mm_abs_epi32(a)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_abs_epi32 #define _mm_maskz_abs_epi32(k, a) simde_mm_maskz_abs_epi32(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_abs_epi64(simde__m128i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_abs_epi64(a); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i64 = vabsq_s64(a_.neon_i64); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) r_.altivec_i64 = vec_abs(a_.altivec_i64); #else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) { r_.i64[i] = (a_.i64[i] < INT64_C(0)) ? -a_.i64[i] : a_.i64[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_abs_epi64 #define _mm_abs_epi64(a) simde_mm_abs_epi64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_abs_epi64(simde__m128i src, simde__mmask8 k, simde__m128i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask_abs_epi64(src, k, a); #else return simde_mm_mask_mov_epi64(src, k, simde_mm_abs_epi64(a)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_abs_epi64 #define _mm_mask_abs_epi64(src, k, a) simde_mm_mask_abs_epi64(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskz_abs_epi64(simde__mmask8 k, simde__m128i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_maskz_abs_epi64(k, a); #else return simde_mm_maskz_mov_epi64(k, simde_mm_abs_epi64(a)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_abs_epi64 #define _mm_maskz_abs_epi64(k, a) simde_mm_maskz_abs_epi64(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_abs_epi64(simde__m256i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_abs_epi64(a); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { r_.m128i[i] = simde_mm_abs_epi64(a_.m128i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) { r_.i64[i] = (a_.i64[i] < INT64_C(0)) ? -a_.i64[i] : a_.i64[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_abs_epi64 #define _mm256_abs_epi64(a) simde_mm256_abs_epi64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask_abs_epi64(simde__m256i src, simde__mmask8 k, simde__m256i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask_abs_epi64(src, k, a); #else return simde_mm256_mask_mov_epi64(src, k, simde_mm256_abs_epi64(a)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_abs_epi64 #define _mm256_mask_abs_epi64(src, k, a) simde_mm256_mask_abs_epi64(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_maskz_abs_epi64(simde__mmask8 k, simde__m256i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_maskz_abs_epi64(k, a); #else return simde_mm256_maskz_mov_epi64(k, simde_mm256_abs_epi64(a)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_abs_epi64 #define _mm256_maskz_abs_epi64(k, a) simde_mm256_maskz_abs_epi64(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_abs_epi8 (simde__m512i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_abs_epi8(a); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_abs_epi8(a_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = (a_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_abs_epi8 #define _mm512_abs_epi8(a) simde_mm512_abs_epi8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_abs_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_abs_epi8(src, k, a); #else return simde_mm512_mask_mov_epi8(src, k, simde_mm512_abs_epi8(a)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_abs_epi8 #define _mm512_mask_abs_epi8(src, k, a) simde_mm512_mask_abs_epi8(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_abs_epi8 (simde__mmask64 k, simde__m512i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_abs_epi8(k, a); #else return simde_mm512_maskz_mov_epi8(k, simde_mm512_abs_epi8(a)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_abs_epi8 #define _mm512_maskz_abs_epi8(k, a) simde_mm512_maskz_abs_epi8(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_abs_epi16 (simde__m512i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_abs_epi16(a); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_abs_epi16(a_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_abs_epi16 #define _mm512_abs_epi16(a) simde_mm512_abs_epi16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_abs_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_abs_epi16(src, k, a); #else return simde_mm512_mask_mov_epi16(src, k, simde_mm512_abs_epi16(a)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_abs_epi16 #define _mm512_mask_abs_epi16(src, k, a) simde_mm512_mask_abs_epi16(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_abs_epi16 (simde__mmask32 k, simde__m512i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_abs_epi16(k, a); #else return simde_mm512_maskz_mov_epi16(k, simde_mm512_abs_epi16(a)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_abs_epi16 #define _mm512_maskz_abs_epi16(k, a) simde_mm512_maskz_abs_epi16(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_abs_epi32(simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_abs_epi32(a); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_abs_epi32(a_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { r_.i32[i] = (a_.i32[i] < INT64_C(0)) ? -a_.i32[i] : a_.i32[i]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_abs_epi32 #define _mm512_abs_epi32(a) simde_mm512_abs_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_abs_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_abs_epi32(src, k, a); #else return simde_mm512_mask_mov_epi32(src, k, simde_mm512_abs_epi32(a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_abs_epi32 #define _mm512_mask_abs_epi32(src, k, a) simde_mm512_mask_abs_epi32(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_abs_epi32(simde__mmask16 k, simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_abs_epi32(k, a); #else return simde_mm512_maskz_mov_epi32(k, simde_mm512_abs_epi32(a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_abs_epi32 #define _mm512_maskz_abs_epi32(k, a) simde_mm512_maskz_abs_epi32(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_abs_epi64(simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_abs_epi64(a); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_abs_epi64(a_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) { r_.i64[i] = (a_.i64[i] < INT64_C(0)) ? -a_.i64[i] : a_.i64[i]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_abs_epi64 #define _mm512_abs_epi64(a) simde_mm512_abs_epi64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_abs_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_abs_epi64(src, k, a); #else return simde_mm512_mask_mov_epi64(src, k, simde_mm512_abs_epi64(a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_abs_epi64 #define _mm512_mask_abs_epi64(src, k, a) simde_mm512_mask_abs_epi64(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_abs_epi64(simde__mmask8 k, simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_abs_epi64(k, a); #else return simde_mm512_maskz_mov_epi64(k, simde_mm512_abs_epi64(a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_abs_epi64 #define _mm512_maskz_abs_epi64(k, a) simde_mm512_maskz_abs_epi64(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_abs_ps(simde__m512 v2) { #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) return _mm512_abs_ps(v2); #else simde__m512_private r_, v2_ = simde__m512_to_private(v2); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { r_.m128_private[i].neon_f32 = vabsq_f32(v2_.m128_private[i].neon_f32); } #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { r_.m128_private[i].altivec_f32 = vec_abs(v2_.m128_private[i].altivec_f32); } #else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { r_.f32[i] = (v2_.f32[i] < INT64_C(0)) ? -v2_.f32[i] : v2_.f32[i]; } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_abs_ps #define _mm512_abs_ps(v2) simde_mm512_abs_ps(v2) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_abs_ps(simde__m512 src, simde__mmask16 k, simde__m512 v2) { #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) return _mm512_mask_abs_ps(src, k, v2); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_abs_ps(v2)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_abs_ps #define _mm512_mask_abs_ps(src, k, v2) simde_mm512_mask_abs_ps(src, k, v2) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_abs_pd(simde__m512d v2) { #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) return _mm512_abs_pd(v2); #else simde__m512d_private r_, v2_ = simde__m512d_to_private(v2); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) for (size_t i = 0 ; i < (sizeof(r_.m128d_private) / sizeof(r_.m128d_private[0])) ; i++) { r_.m128d_private[i].neon_f64 = vabsq_f64(v2_.m128d_private[i].neon_f64); } #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) for (size_t i = 0 ; i < (sizeof(r_.m128d_private) / sizeof(r_.m128d_private[0])) ; i++) { r_.m128d_private[i].altivec_f64 = vec_abs(v2_.m128d_private[i].altivec_f64); } #else SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) { r_.f64[i] = (v2_.f64[i] < INT64_C(0)) ? -v2_.f64[i] : v2_.f64[i]; } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_abs_pd #define _mm512_abs_pd(v2) simde_mm512_abs_pd(v2) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_abs_pd(simde__m512d src, simde__mmask8 k, simde__m512d v2) { #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) return _mm512_mask_abs_pd(src, k, v2); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_abs_pd(v2)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_abs_pd #define _mm512_mask_abs_pd(src, k, v2) simde_mm512_mask_abs_pd(src, k, v2) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_ABS_H) */ simde-0.7.2/simde/x86/avx512/add.h000066400000000000000000000511511400333146700163020ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_ADD_H) #define SIMDE_X86_AVX512_ADD_H #include "types.h" #include "../avx2.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_add_epi8(simde__m128i src, simde__mmask16 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm_mask_add_epi8(src, k, a, b); #else return simde_mm_mask_mov_epi8(src, k, simde_mm_add_epi8(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_add_epi8 #define _mm_mask_add_epi8(src, k, a, b) simde_mm_mask_add_epi8(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskz_add_epi8(simde__mmask16 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm_maskz_add_epi8(k, a, b); #else return simde_mm_maskz_mov_epi8(k, simde_mm_add_epi8(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_add_epi8 #define _mm_maskz_add_epi8(k, a, b) simde_mm_maskz_add_epi8(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_add_epi16(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm_mask_add_epi16(src, k, a, b); #else return simde_mm_mask_mov_epi16(src, k, simde_mm_add_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_add_epi16 #define _mm_mask_add_epi16(src, k, a, b) simde_mm_mask_add_epi16(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskz_add_epi16(simde__mmask8 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm_maskz_add_epi16(k, a, b); #else return simde_mm_maskz_mov_epi16(k, simde_mm_add_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_add_epi16 #define _mm_maskz_add_epi16(k, a, b) simde_mm_maskz_add_epi16(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_add_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask_add_epi32(src, k, a, b); #else return simde_mm_mask_mov_epi32(src, k, simde_mm_add_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_add_epi32 #define _mm_mask_add_epi32(src, k, a, b) simde_mm_mask_add_epi32(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskz_add_epi32(simde__mmask8 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_maskz_add_epi32(k, a, b); #else return simde_mm_maskz_mov_epi32(k, simde_mm_add_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_add_epi32 #define _mm_maskz_add_epi32(k, a, b) simde_mm_maskz_add_epi32(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_add_epi64(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask_add_epi64(src, k, a, b); #else return simde_mm_mask_mov_epi64(src, k, simde_mm_add_epi64(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_add_epi64 #define _mm_mask_add_epi64(src, k, a, b) simde_mm_mask_add_epi64(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskz_add_epi64(simde__mmask8 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_maskz_add_epi64(k, a, b); #else return simde_mm_maskz_mov_epi64(k, simde_mm_add_epi64(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_add_epi64 #define _mm_maskz_add_epi64(k, a, b) simde_mm_maskz_add_epi64(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_mask_add_ss(simde__m128 src, simde__mmask8 k, simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) return _mm_mask_add_ss(src, k, a, b); #elif 1 simde__m128_private src_ = simde__m128_to_private(src), a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b), r_ = simde__m128_to_private(a); r_.f32[0] = (k & 1) ? (a_.f32[0] + b_.f32[0]) : src_.f32[0]; return simde__m128_from_private(r_); #else return simde_mm_move_ss(a, simde_mm_mask_mov_ps(src, k, simde_mm_add_ps(a, b))); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm_mask_add_ss #define _mm_mask_add_ss(src, k, a, b) simde_mm_mask_add_ss(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_maskz_add_ss(simde__mmask8 k, simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) return _mm_maskz_add_ss(k, a, b); #elif 1 simde__m128_private a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b), r_ = simde__m128_to_private(a); r_.f32[0] = (k & 1) ? (a_.f32[0] + b_.f32[0]) : 0.0f; return simde__m128_from_private(r_); #else return simde_mm_move_ss(a, simde_mm_maskz_mov_ps(k, simde_mm_add_ps(a, b))); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_add_ss #define _mm_maskz_add_ss(k, a, b) simde_mm_maskz_add_ss(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask_add_epi16(simde__m256i src, simde__mmask16 k, simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm256_mask_add_epi16(src, k, a, b); #else return simde_mm256_mask_mov_epi16(src, k, simde_mm256_add_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_add_epi16 #define _mm256_mask_add_epi16(src, k, a, b) simde_mm256_mask_add_epi16(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_maskz_add_epi16(simde__mmask16 k, simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm256_maskz_add_epi16(k, a, b); #else return simde_mm256_maskz_mov_epi16(k, simde_mm256_add_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_add_epi16 #define _mm256_maskz_add_epi16(k, a, b) simde_mm256_maskz_add_epi16(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask_add_epi32(simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask_add_epi32(src, k, a, b); #else return simde_mm256_mask_mov_epi32(src, k, simde_mm256_add_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_add_epi32 #define _mm256_mask_add_epi32(src, k, a, b) simde_mm256_mask_add_epi32(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_maskz_add_epi32(simde__mmask8 k, simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_maskz_add_epi32(k, a, b); #else return simde_mm256_maskz_mov_epi32(k, simde_mm256_add_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_add_epi32 #define _mm256_maskz_add_epi32(k, a, b) simde_mm256_maskz_add_epi32(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask_add_epi64(simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask_add_epi64(src, k, a, b); #else return simde_mm256_mask_mov_epi64(src, k, simde_mm256_add_epi64(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_add_epi64 #define _mm256_mask_add_epi64(src, k, a, b) simde_mm256_mask_add_epi64(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_maskz_add_epi64(simde__mmask8 k, simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_maskz_add_epi64(k, a, b); #else return simde_mm256_maskz_mov_epi64(k, simde_mm256_add_epi64(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_add_epi64 #define _mm256_maskz_add_epi64(k, a, b) simde_mm256_maskz_add_epi64(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_add_epi8 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_add_epi8(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = a_.i8 + b_.i8; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_add_epi8(a_.m256i[i], b_.m256i[i]); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_add_epi8 #define _mm512_add_epi8(a, b) simde_mm512_add_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_add_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_add_epi8(src, k, a, b); #else return simde_mm512_mask_mov_epi8(src, k, simde_mm512_add_epi8(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_add_epi8 #define _mm512_mask_add_epi8(src, k, a, b) simde_mm512_mask_add_epi8(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_add_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_add_epi8(k, a, b); #else return simde_mm512_maskz_mov_epi8(k, simde_mm512_add_epi8(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_add_epi8 #define _mm512_maskz_add_epi8(k, a, b) simde_mm512_maskz_add_epi8(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_add_epi16 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_add_epi16(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = a_.i16 + b_.i16; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_add_epi16(a_.m256i[i], b_.m256i[i]); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_add_epi16 #define _mm512_add_epi16(a, b) simde_mm512_add_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_add_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_add_epi16(src, k, a, b); #else return simde_mm512_mask_mov_epi16(src, k, simde_mm512_add_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_add_epi16 #define _mm512_mask_add_epi16(src, k, a, b) simde_mm512_mask_add_epi16(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_add_epi16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_add_epi16(k, a, b); #else return simde_mm512_maskz_mov_epi16(k, simde_mm512_add_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_add_epi16 #define _mm512_maskz_add_epi16(k, a, b) simde_mm512_maskz_add_epi16(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_add_epi32 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_add_epi32(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_ARM_SVE_NATIVE) const size_t n = sizeof(a_.i32) / sizeof(a_.i32[0]); size_t i = 0; svbool_t pg = svwhilelt_b32(i, n); do { svint32_t va = svld1_s32(pg, &(a_.i32[i])), vb = svld1_s32(pg, &(b_.i32[i])); svst1_s32(pg, &(r_.i32[i]), svadd_s32_x(pg, va, vb)); i += svcntw(); pg = svwhilelt_b32(i, n); } while (svptest_any(svptrue_b32(), pg)); #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_add_epi32(a_.m256i[i], b_.m256i[i]); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = a_.i32 + b_.i32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_add_epi32(a_.m256i[i], b_.m256i[i]); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_add_epi32 #define _mm512_add_epi32(a, b) simde_mm512_add_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_add_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_add_epi32(src, k, a, b); #else return simde_mm512_mask_mov_epi32(src, k, simde_mm512_add_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_add_epi32 #define _mm512_mask_add_epi32(src, k, a, b) simde_mm512_mask_add_epi32(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_add_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_add_epi32(k, a, b); #else return simde_mm512_maskz_mov_epi32(k, simde_mm512_add_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_add_epi32 #define _mm512_maskz_add_epi32(k, a, b) simde_mm512_maskz_add_epi32(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_add_epi64 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_add_epi64(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_add_epi64(a_.m256i[i], b_.m256i[i]); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) r_.i64 = a_.i64 + b_.i64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_add_epi64(a_.m256i[i], b_.m256i[i]); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_add_epi64 #define _mm512_add_epi64(a, b) simde_mm512_add_epi64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_add_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_add_epi64(src, k, a, b); #else return simde_mm512_mask_mov_epi64(src, k, simde_mm512_add_epi64(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_add_epi64 #define _mm512_mask_add_epi64(src, k, a, b) simde_mm512_mask_add_epi64(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_add_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_add_epi64(k, a, b); #else return simde_mm512_maskz_mov_epi64(k, simde_mm512_add_epi64(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_add_epi64 #define _mm512_maskz_add_epi64(k, a, b) simde_mm512_maskz_add_epi64(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_add_ps (simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_add_ps(a, b); #else simde__m512_private r_, a_ = simde__m512_to_private(a), b_ = simde__m512_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f32 = a_.f32 + b_.f32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_add_ps(a_.m256[i], b_.m256[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_add_ps #define _mm512_add_ps(a, b) simde_mm512_add_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_add_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_add_ps(src, k, a, b); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_add_ps(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_add_ps #define _mm512_mask_add_ps(src, k, a, b) simde_mm512_mask_add_ps(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_maskz_add_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_add_ps(k, a, b); #else return simde_mm512_maskz_mov_ps(k, simde_mm512_add_ps(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_add_ps #define _mm512_maskz_add_ps(k, a, b) simde_mm512_maskz_add_ps(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_add_pd (simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_add_pd(a, b); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a), b_ = simde__m512d_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f64 = a_.f64 + b_.f64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_add_pd(a_.m256d[i], b_.m256d[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_add_pd #define _mm512_add_pd(a, b) simde_mm512_add_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_add_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_add_pd(src, k, a, b); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_add_pd(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_add_pd #define _mm512_mask_add_pd(src, k, a, b) simde_mm512_mask_add_pd(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_maskz_add_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_add_pd(k, a, b); #else return simde_mm512_maskz_mov_pd(k, simde_mm512_add_pd(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_add_pd #define _mm512_maskz_add_pd(k, a, b) simde_mm512_maskz_add_pd(k, a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_ADD_H) */ simde-0.7.2/simde/x86/avx512/adds.h000066400000000000000000000316121400333146700164650ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur */ #if !defined(SIMDE_X86_AVX512_ADDS_H) #define SIMDE_X86_AVX512_ADDS_H #include "types.h" #include "../avx2.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_adds_epi8(simde__m128i src, simde__mmask16 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm_mask_adds_epi8(src, k, a, b); #else return simde_mm_mask_mov_epi8(src, k, simde_mm_adds_epi8(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_adds_epi8 #define _mm_mask_adds_epi8(src, k, a, b) simde_mm_mask_adds_epi8(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskz_adds_epi8(simde__mmask16 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm_maskz_adds_epi8(k, a, b); #else return simde_mm_maskz_mov_epi8(k, simde_mm_adds_epi8(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_adds_epi8 #define _mm_maskz_adds_epi8(k, a, b) simde_mm_maskz_adds_epi8(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_adds_epi16(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm_mask_adds_epi16(src, k, a, b); #else return simde_mm_mask_mov_epi16(src, k, simde_mm_adds_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_adds_epi16 #define _mm_mask_adds_epi16(src, k, a, b) simde_mm_mask_adds_epi16(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskz_adds_epi16(simde__mmask8 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm_maskz_adds_epi16(k, a, b); #else return simde_mm_maskz_mov_epi16(k, simde_mm_adds_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_adds_epi16 #define _mm_maskz_adds_epi16(k, a, b) simde_mm_maskz_adds_epi16(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask_adds_epi8(simde__m256i src, simde__mmask32 k, simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm256_mask_adds_epi8(src, k, a, b); #else return simde_mm256_mask_mov_epi8(src, k, simde_mm256_adds_epi8(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_adds_epi8 #define _mm256_mask_adds_epi8(src, k, a, b) simde_mm256_mask_adds_epi8(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_maskz_adds_epi8(simde__mmask32 k, simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm256_maskz_adds_epi8(k, a, b); #else return simde_mm256_maskz_mov_epi8(k, simde_mm256_adds_epi8(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_adds_epi8 #define _mm256_maskz_adds_epi8(k, a, b) simde_mm256_maskz_adds_epi8(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask_adds_epi16(simde__m256i src, simde__mmask16 k, simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm256_mask_adds_epi16(src, k, a, b); #else return simde_mm256_mask_mov_epi16(src, k, simde_mm256_adds_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_adds_epi16 #define _mm256_mask_adds_epi16(src, k, a, b) simde_mm256_mask_adds_epi16(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_maskz_adds_epi16(simde__mmask16 k, simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm256_maskz_adds_epi16(k, a, b); #else return simde_mm256_maskz_mov_epi16(k, simde_mm256_adds_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_adds_epi16 #define _mm256_maskz_adds_epi16(k, a, b) simde_mm256_maskz_adds_epi16(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_adds_epi8 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_adds_epi8(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if !defined(HEDLEY_INTEL_VERSION) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_adds_epi8(a_.m256i[i], b_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_adds_epi8 #define _mm512_adds_epi8(a, b) simde_mm512_adds_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_adds_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_adds_epi8(src, k, a, b); #else return simde_mm512_mask_mov_epi8(src, k, simde_mm512_adds_epi8(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_adds_epi8 #define _mm512_mask_adds_epi8(src, k, a, b) simde_mm512_mask_adds_epi8(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_adds_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_adds_epi8(k, a, b); #else return simde_mm512_maskz_mov_epi8(k, simde_mm512_adds_epi8(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_adds_epi8 #define _mm512_maskz_adds_epi8(k, a, b) simde_mm512_maskz_adds_epi8(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_adds_epi16 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_adds_epi16(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if !defined(HEDLEY_INTEL_VERSION) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_adds_epi16(a_.m256i[i], b_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_adds_epi16 #define _mm512_adds_epi16(a, b) simde_mm512_adds_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_adds_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_adds_epi16(src, k, a, b); #else return simde_mm512_mask_mov_epi16(src, k, simde_mm512_adds_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_adds_epi16 #define _mm512_mask_adds_epi16(src, k, a, b) simde_mm512_mask_adds_epi16(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_adds_epi16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_adds_epi16(k, a, b); #else return simde_mm512_maskz_mov_epi16(k, simde_mm512_adds_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_adds_epi16 #define _mm512_maskz_adds_epi16(k, a, b) simde_mm512_maskz_adds_epi16(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_adds_epu8 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_adds_epu8(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if !defined(HEDLEY_INTEL_VERSION) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { r_.m128i[i] = simde_mm_adds_epu8(a_.m128i[i], b_.m128i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_adds_epu8 #define _mm512_adds_epu8(a, b) simde_mm512_adds_epu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_adds_epu8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_adds_epu8(src, k, a, b); #else return simde_mm512_mask_mov_epi8(src, k, simde_mm512_adds_epu8(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_adds_epu8 #define _mm512_mask_adds_epu8(src, k, a, b) simde_mm512_mask_adds_epu8(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_adds_epu8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_adds_epu8(k, a, b); #else return simde_mm512_maskz_mov_epi8(k, simde_mm512_adds_epu8(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_adds_epu8 #define _mm512_maskz_adds_epu8(k, a, b) simde_mm512_maskz_adds_epu8(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_adds_epu16 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_adds_epu16(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if !defined(HEDLEY_INTEL_VERSION) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_adds_epu16(a_.m256i[i], b_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_adds_epu16 #define _mm512_adds_epu16(a, b) simde_mm512_adds_epu16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_adds_epu16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_adds_epu16(src, k, a, b); #else return simde_mm512_mask_mov_epi16(src, k, simde_mm512_adds_epu16(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_adds_epu16 #define _mm512_mask_adds_epu16(src, k, a, b) simde_mm512_mask_adds_epu16(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_adds_epu16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_adds_epu16(k, a, b); #else return simde_mm512_maskz_mov_epi16(k, simde_mm512_adds_epu16(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_adds_epu16 #define _mm512_maskz_adds_epu16(k, a, b) simde_mm512_maskz_adds_epu16(k, a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_ADDS_H) */ simde-0.7.2/simde/x86/avx512/and.h000066400000000000000000000225421400333146700163160ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_AND_H) #define SIMDE_X86_AVX512_AND_H #include "types.h" #include "../avx2.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_and_pd (simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm512_and_pd(a, b); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a), b_ = simde__m512d_to_private(b); #if defined(SIMDE_X86_AVX_NATIVE) r_.m256d[0] = simde_mm256_and_pd(a_.m256d[0], b_.m256d[0]); r_.m256d[1] = simde_mm256_and_pd(a_.m256d[1], b_.m256d[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f & b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_and_pd #define _mm512_and_pd(a, b) simde_mm512_and_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_and_ps (simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm512_and_ps(a, b); #else simde__m512_private r_, a_ = simde__m512_to_private(a), b_ = simde__m512_to_private(b); #if defined(SIMDE_X86_AVX_NATIVE) r_.m256[0] = simde_mm256_and_ps(a_.m256[0], b_.m256[0]); r_.m256[1] = simde_mm256_and_ps(a_.m256[1], b_.m256[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f & b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_and_ps #define _mm512_and_ps(a, b) simde_mm512_and_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_and_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm512_mask_and_ps(src, k, a, b); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_and_ps(a, b)); #endif } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_and_ps #define _mm512_mask_and_ps(src, k, a, b) simde_mm512_mask_and_ps(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_maskz_and_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm512_maskz_and_ps(k, a, b); #else return simde_mm512_maskz_mov_ps(k, simde_mm512_and_ps(a, b)); #endif } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_and_ps #define _mm512_maskz_and_ps(k, a, b) simde_mm512_maskz_and_ps(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_and_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm512_mask_and_pd(src, k, a, b); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_and_pd(a, b)); #endif } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_and_pd #define _mm512_mask_and_pd(src, k, a, b) simde_mm512_mask_and_pd(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_maskz_and_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm512_maskz_and_pd(k, a, b); #else return simde_mm512_maskz_mov_pd(k, simde_mm512_and_pd(a, b)); #endif } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_and_pd #define _mm512_maskz_and_pd(k, a, b) simde_mm512_maskz_and_pd(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_and_epi32 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_and_epi32(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = a_.i32 & b_.i32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] & b_.i32[i]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_and_epi32 #define _mm512_and_epi32(a, b) simde_mm512_and_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_and_epi32(simde__m512i src, simde__mmask16 k, simde__m512i v2, simde__m512i v3) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_and_epi32(src, k, v2, v3); #else return simde_mm512_mask_mov_epi32(src, k, simde_mm512_and_epi32(v2, v3)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_and_epi32 #define _mm512_mask_and_epi32(src, k, v2, v3) simde_mm512_mask_and_epi32(src, k, v2, v3) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_and_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_and_epi32(k, a, b); #else return simde_mm512_maskz_mov_epi32(k, simde_mm512_and_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_and_epi32 #define _mm512_maskz_and_epi32(k, a, b) simde_mm512_maskz_and_epi32(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_and_epi64 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_and_epi64(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 & b_.i64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[i] & b_.i64[i]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_and_epi64 #define _mm512_and_epi64(a, b) simde_mm512_and_epi64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_and_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_and_epi64(src, k, a, b); #else return simde_mm512_mask_mov_epi64(src, k, simde_mm512_and_epi64(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_and_epi64 #define _mm512_mask_and_epi64(src, k, a, b) simde_mm512_mask_and_epi64(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_and_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_and_epi64(k, a, b); #else return simde_mm512_maskz_mov_epi64(k, simde_mm512_and_epi64(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_and_epi64 #define _mm512_maskz_and_epi64(k, a, b) simde_mm512_maskz_and_epi64(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_and_si512 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_and_si512(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_X86_AVX2_NATIVE) r_.m256i[0] = simde_mm256_and_si256(a_.m256i[0], b_.m256i[0]); r_.m256i[1] = simde_mm256_and_si256(a_.m256i[1], b_.m256i[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f & b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] & b_.i32[i]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_and_si512 #define _mm512_and_si512(a, b) simde_mm512_and_si512(a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_AND_H) */ simde-0.7.2/simde/x86/avx512/andnot.h000066400000000000000000000166141400333146700170420ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur */ #if !defined(SIMDE_X86_AVX512_ANDNOT_H) #define SIMDE_X86_AVX512_ANDNOT_H #include "types.h" #include "../avx2.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ #if defined(SIMDE_X86_AVX512DQ_NATIVE) #define simde_mm512_andnot_ps(a, b) _mm512_andnot_ps(a, b) #else #define simde_mm512_andnot_ps(a, b) simde_mm512_castsi512_ps(simde_mm512_andnot_si512(simde_mm512_castps_si512(a), simde_mm512_castps_si512(b))) #endif #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_andnot_ps #define _mm512_andnot_ps(a, b) simde_mm512_andnot_ps(a, b) #endif #if defined(SIMDE_X86_AVX512DQ_NATIVE) #define simde_mm512_mask_andnot_ps(src, k, a, b) _mm512_mask_andnot_ps((src), (k), (a), (b)) #else #define simde_mm512_mask_andnot_ps(src, k, a, b) simde_mm512_castsi512_ps(simde_mm512_mask_andnot_epi32(simde_mm512_castps_si512(src), k, simde_mm512_castps_si512(a), simde_mm512_castps_si512(b))) #endif #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_andnot_ps #define _mm512_mask_andnot_ps(src, k, a, b) simde_mm512_mask_andnot_ps(src, k, a, b) #endif #if defined(SIMDE_X86_AVX512DQ_NATIVE) #define simde_mm512_maskz_andnot_ps(k, a, b) _mm512_maskz_andnot_ps((k), (a), (b)) #else #define simde_mm512_maskz_andnot_ps(k, a, b) simde_mm512_castsi512_ps(simde_mm512_maskz_andnot_epi32(k, simde_mm512_castps_si512(a), simde_mm512_castps_si512(b))) #endif #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_andnot_ps #define _mm512_maskz_andnot_ps(k, a, b) simde_mm512_maskz_andnot_ps(k, a, b) #endif #if defined(SIMDE_X86_AVX512DQ_NATIVE) #define simde_mm512_andnot_pd(a, b) _mm512_andnot_pd(a, b) #else #define simde_mm512_andnot_pd(a, b) simde_mm512_castsi512_pd(simde_mm512_andnot_si512(simde_mm512_castpd_si512(a), simde_mm512_castpd_si512(b))) #endif #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_andnot_pd #define _mm512_andnot_pd(a, b) simde_mm512_andnot_pd(a, b) #endif #if defined(SIMDE_X86_AVX512DQ_NATIVE) #define simde_mm512_mask_andnot_pd(src, k, a, b) _mm512_mask_andnot_pd((src), (k), (a), (b)) #else #define simde_mm512_mask_andnot_pd(src, k, a, b) simde_mm512_castsi512_pd(simde_mm512_mask_andnot_epi64(simde_mm512_castpd_si512(src), k, simde_mm512_castpd_si512(a), simde_mm512_castpd_si512(b))) #endif #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_andnot_pd #define _mm512_mask_andnot_pd(src, k, a, b) simde_mm512_mask_andnot_pd(src, k, a, b) #endif #if defined(SIMDE_X86_AVX512DQ_NATIVE) #define simde_mm512_maskz_andnot_pd(k, a, b) _mm512_maskz_andnot_pd((k), (a), (b)) #else #define simde_mm512_maskz_andnot_pd(k, a, b) simde_mm512_castsi512_pd(simde_mm512_maskz_andnot_epi64(k, simde_mm512_castpd_si512(a), simde_mm512_castpd_si512(b))) #endif #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_andnot_pd #define _mm512_maskz_andnot_pd(k, a, b) simde_mm512_maskz_andnot_pd(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_andnot_si512 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_andnot_si512(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_X86_AVX2_NATIVE) r_.m256i[0] = simde_mm256_andnot_si256(a_.m256i[0], b_.m256i[0]); r_.m256i[1] = simde_mm256_andnot_si256(a_.m256i[1], b_.m256i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; } #endif return simde__m512i_from_private(r_); #endif } #define simde_mm512_andnot_epi32(a, b) simde_mm512_andnot_si512(a, b) #define simde_mm512_andnot_epi64(a, b) simde_mm512_andnot_si512(a, b) #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_andnot_si512 #define _mm512_andnot_si512(a, b) simde_mm512_andnot_si512(a, b) #undef _mm512_andnot_epi32 #define _mm512_andnot_epi32(a, b) simde_mm512_andnot_si512(a, b) #undef _mm512_andnot_epi64 #define _mm512_andnot_epi64(a, b) simde_mm512_andnot_si512(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_andnot_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_andnot_epi32(src, k, a, b); #else return simde_mm512_mask_mov_epi32(src, k, simde_mm512_andnot_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_andnot_epi32 #define _mm512_mask_andnot_epi32(src, k, a, b) simde_mm512_mask_andnot_epi32(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_andnot_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_andnot_epi32(k, a, b); #else return simde_mm512_maskz_mov_epi32(k, simde_mm512_andnot_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_andnot_epi32 #define _mm512_maskz_andnot_epi32(k, a, b) simde_mm512_maskz_andnot_epi32(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_andnot_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_andnot_epi64(src, k, a, b); #else return simde_mm512_mask_mov_epi64(src, k, simde_mm512_andnot_epi64(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_andnot_epi64 #define _mm512_mask_andnot_epi64(src, k, a, b) simde_mm512_mask_andnot_epi64(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_andnot_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_andnot_epi64(k, a, b); #else return simde_mm512_maskz_mov_epi64(k, simde_mm512_andnot_epi64(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_andnot_epi64 #define _mm512_maskz_andnot_epi64(k, a, b) simde_mm512_maskz_andnot_epi64(k, a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_ANDNOT_H) */ simde-0.7.2/simde/x86/avx512/avg.h000066400000000000000000000211371400333146700163300ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_AVG_H) #define SIMDE_X86_AVX512_AVG_H #include "types.h" #include "mov.h" #include "../avx2.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_avg_epu8(simde__m128i src, simde__mmask16 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm_mask_avg_epu8(src, k, a, b); #else return simde_mm_mask_mov_epi8(src, k, simde_mm_avg_epu8(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_avg_epu8 #define _mm_mask_avg_epu8(src, k, a, b) simde_mm_mask_avg_epu8(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskz_avg_epu8(simde__mmask16 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm_maskz_avg_epu8(k, a, b); #else return simde_mm_maskz_mov_epi8(k, simde_mm_avg_epu8(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_avg_epu8 #define _mm_maskz_avg_epu8(k, a, b) simde_mm_maskz_avg_epu8(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_avg_epu16(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm_mask_avg_epu16(src, k, a, b); #else return simde_mm_mask_mov_epi16(src, k, simde_mm_avg_epu16(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_avg_epu16 #define _mm_mask_avg_epu16(src, k, a, b) simde_mm_mask_avg_epu16(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskz_avg_epu16(simde__mmask8 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm_maskz_avg_epu16(k, a, b); #else return simde_mm_maskz_mov_epi16(k, simde_mm_avg_epu16(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_avg_epu16 #define _mm_maskz_avg_epu16(k, a, b) simde_mm_maskz_avg_epu16(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask_avg_epu8(simde__m256i src, simde__mmask32 k, simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm256_mask_avg_epu8(src, k, a, b); #else return simde_mm256_mask_mov_epi8(src, k, simde_mm256_avg_epu8(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_avg_epu8 #define _mm256_mask_avg_epu8(src, k, a, b) simde_mm256_mask_avg_epu8(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_maskz_avg_epu8(simde__mmask32 k, simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm256_maskz_avg_epu8(k, a, b); #else return simde_mm256_maskz_mov_epi8(k, simde_mm256_avg_epu8(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_avg_epu8 #define _mm256_maskz_avg_epu8(k, a, b) simde_mm256_maskz_avg_epu8(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask_avg_epu16(simde__m256i src, simde__mmask16 k, simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm256_mask_avg_epu16(src, k, a, b); #else return simde_mm256_mask_mov_epi16(src, k, simde_mm256_avg_epu16(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_avg_epu16 #define _mm256_mask_avg_epu16(src, k, a, b) simde_mm256_mask_avg_epu16(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_maskz_avg_epu16(simde__mmask16 k, simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm256_maskz_avg_epu16(k, a, b); #else return simde_mm256_maskz_mov_epi16(k, simde_mm256_avg_epu16(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_avg_epu16 #define _mm256_maskz_avg_epu16(k, a, b) simde_mm256_maskz_avg_epu16(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_avg_epu8 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_avg_epu8(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; } return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_avg_epu8 #define _mm512_avg_epu8(a, b) simde_mm512_avg_epu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_avg_epu8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_avg_epu8(src, k, a, b); #else return simde_mm512_mask_mov_epi8(src, k, simde_mm512_avg_epu8(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_avg_epu8 #define _mm512_mask_avg_epu8(src, k, a, b) simde_mm512_mask_avg_epu8(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_avg_epu8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_avg_epu8(k, a, b); #else return simde_mm512_maskz_mov_epi8(k, simde_mm512_avg_epu8(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_avg_epu8 #define _mm512_maskz_avg_epu8(k, a, b) simde_mm512_maskz_avg_epu8(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_avg_epu16 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_avg_epu16(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; } return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_avg_epu16 #define _mm512_avg_epu16(a, b) simde_mm512_avg_epu16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_avg_epu16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_avg_epu16(src, k, a, b); #else return simde_mm512_mask_mov_epi16(src, k, simde_mm512_avg_epu16(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_avg_epu16 #define _mm512_mask_avg_epu16(src, k, a, b) simde_mm512_mask_avg_epu16(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_avg_epu16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_avg_epu16(k, a, b); #else return simde_mm512_maskz_mov_epi16(k, simde_mm512_avg_epu16(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_avg_epu16 #define _mm512_maskz_avg_epu16(k, a, b) simde_mm512_maskz_avg_epu16(k, a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_AVG_H) */ simde-0.7.2/simde/x86/avx512/blend.h000066400000000000000000000224101400333146700166320ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_BLEND_H) #define SIMDE_X86_AVX512_BLEND_H #include "types.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_blend_epi8(simde__mmask16 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm_mask_blend_epi8(k, a, b); #else return simde_mm_mask_mov_epi8(a, k, b); #endif } #if defined(SIMDE_X86_AVX256BW_ENABLE_NATIVE_ALIASES) #undef _mm_mask_blend_epi8 #define _mm_mask_blend_epi8(k, a, b) simde_mm_mask_blend_epi8(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_blend_epi16(simde__mmask8 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm_mask_blend_epi16(k, a, b); #else return simde_mm_mask_mov_epi16(a, k, b); #endif } #if defined(SIMDE_X86_AVX256BW_ENABLE_NATIVE_ALIASES) #undef _mm_mask_blend_epi16 #define _mm_mask_blend_epi16(k, a, b) simde_mm_mask_blend_epi16(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_blend_epi32(simde__mmask8 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask_blend_epi32(k, a, b); #else return simde_mm_mask_mov_epi32(a, k, b); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_blend_epi32 #define _mm_mask_blend_epi32(k, a, b) simde_mm_mask_blend_epi32(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_blend_epi64(simde__mmask8 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask_blend_epi64(k, a, b); #else return simde_mm_mask_mov_epi64(a, k, b); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_blend_epi64 #define _mm_mask_blend_epi64(k, a, b) simde_mm_mask_blend_epi64(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_mask_blend_ps(simde__mmask8 k, simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask_blend_ps(k, a, b); #else return simde_mm_mask_mov_ps(a, k, b); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_blend_ps #define _mm_mask_blend_ps(k, a, b) simde_mm_mask_blend_ps(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_mask_blend_pd(simde__mmask8 k, simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask_blend_pd(k, a, b); #else return simde_mm_mask_mov_pd(a, k, b); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_blend_pd #define _mm_mask_blend_pd(k, a, b) simde_mm_mask_blend_pd(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask_blend_epi8(simde__mmask32 k, simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm256_mask_blend_epi8(k, a, b); #else return simde_mm256_mask_mov_epi8(a, k, b); #endif } #if defined(SIMDE_X86_AVX256BW_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_blend_epi8 #define _mm256_mask_blend_epi8(k, a, b) simde_mm256_mask_blend_epi8(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask_blend_epi16(simde__mmask16 k, simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm256_mask_blend_epi16(k, a, b); #else return simde_mm256_mask_mov_epi16(a, k, b); #endif } #if defined(SIMDE_X86_AVX256BW_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_blend_epi16 #define _mm256_mask_blend_epi16(k, a, b) simde_mm256_mask_blend_epi16(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask_blend_epi32(simde__mmask8 k, simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask_blend_epi32(k, a, b); #else return simde_mm256_mask_mov_epi32(a, k, b); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_blend_epi32 #define _mm256_mask_blend_epi32(k, a, b) simde_mm256_mask_blend_epi32(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask_blend_epi64(simde__mmask8 k, simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask_blend_epi64(k, a, b); #else return simde_mm256_mask_mov_epi64(a, k, b); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_blend_epi64 #define _mm256_mask_blend_epi64(k, a, b) simde_mm256_mask_blend_epi64(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_mask_blend_ps(simde__mmask8 k, simde__m256 a, simde__m256 b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask_blend_ps(k, a, b); #else return simde_mm256_mask_mov_ps(a, k, b); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_blend_ps #define _mm256_mask_blend_ps(k, a, b) simde_mm256_mask_blend_ps(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_mask_blend_pd(simde__mmask8 k, simde__m256d a, simde__m256d b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask_blend_pd(k, a, b); #else return simde_mm256_mask_mov_pd(a, k, b); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_blend_pd #define _mm256_mask_blend_pd(k, a, b) simde_mm256_mask_blend_pd(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_blend_epi8(simde__mmask64 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_blend_epi8(k, a, b); #else return simde_mm512_mask_mov_epi8(a, k, b); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_blend_epi8 #define _mm512_mask_blend_epi8(k, a, b) simde_mm512_mask_blend_epi8(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_blend_epi16(simde__mmask32 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_blend_epi16(k, a, b); #else return simde_mm512_mask_mov_epi16(a, k, b); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_blend_epi16 #define _mm512_mask_blend_epi16(k, a, b) simde_mm512_mask_blend_epi16(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_blend_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_blend_epi32(k, a, b); #else return simde_mm512_mask_mov_epi32(a, k, b); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_blend_epi32 #define _mm512_mask_blend_epi32(k, a, b) simde_mm512_mask_blend_epi32(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_blend_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_blend_epi64(k, a, b); #else return simde_mm512_mask_mov_epi64(a, k, b); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_blend_epi64 #define _mm512_mask_blend_epi64(k, a, b) simde_mm512_mask_blend_epi64(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_blend_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_blend_ps(k, a, b); #else return simde_mm512_mask_mov_ps(a, k, b); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_blend_ps #define _mm512_mask_blend_ps(k, a, b) simde_mm512_mask_blend_ps(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_blend_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_blend_pd(k, a, b); #else return simde_mm512_mask_mov_pd(a, k, b); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_blend_pd #define _mm512_mask_blend_pd(k, a, b) simde_mm512_mask_blend_pd(k, a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_BLEND_H) */ simde-0.7.2/simde/x86/avx512/broadcast.h000066400000000000000000000710301400333146700175120ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan * 2020 Christopher Moore */ #if !defined(SIMDE_X86_AVX512_BROADCAST_H) #define SIMDE_X86_AVX512_BROADCAST_H #include "types.h" #include "../avx2.h" #include "mov.h" #include "cast.h" #include "set1.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_broadcast_f32x2 (simde__m128 a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm256_broadcast_f32x2(a); #else simde__m256_private r_; simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 1, 0, 1, 0, 1, 0, 1); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { r_.f32[ i ] = a_.f32[0]; r_.f32[i + 1] = a_.f32[1]; } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm256_broadcast_f32x2 #define _mm256_broadcast_f32x2(a) simde_mm256_broadcast_f32x2(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_mask_broadcast_f32x2(simde__m256 src, simde__mmask8 k, simde__m128 a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm256_mask_broadcast_f32x2(src, k, a); #else return simde_mm256_mask_mov_ps(src, k, simde_mm256_broadcast_f32x2(a)); #endif } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_broadcast_f32x2 #define _mm256_mask_broadcast_f32x2(src, k, a) simde_mm256_mask_broadcast_f32x2(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_maskz_broadcast_f32x2(simde__mmask8 k, simde__m128 a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm256_maskz_broadcast_f32x2(k, a); #else return simde_mm256_maskz_mov_ps(k, simde_mm256_broadcast_f32x2(a)); #endif } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_broadcast_f32x2 #define _mm256_maskz_broadcast_f32x2(k, a) simde_mm256_maskz_broadcast_f32x2(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_broadcast_f32x2 (simde__m128 a) { #if defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm512_broadcast_f32x2(a); #else simde__m512_private r_; simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) { r_.f32[ i ] = a_.f32[0]; r_.f32[i + 1] = a_.f32[1]; } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_broadcast_f32x2 #define _mm512_broadcast_f32x2(a) simde_mm512_broadcast_f32x2(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_broadcast_f32x2(simde__m512 src, simde__mmask16 k, simde__m128 a) { #if defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm512_mask_broadcast_f32x2(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_broadcast_f32x2(a)); #endif } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_broadcast_f32x2 #define _mm512_mask_broadcast_f32x2(src, k, a) simde_mm512_mask_broadcast_f32x2(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_maskz_broadcast_f32x2(simde__mmask16 k, simde__m128 a) { #if defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm512_maskz_broadcast_f32x2(k, a); #else return simde_mm512_maskz_mov_ps(k, simde_mm512_broadcast_f32x2(a)); #endif } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_broadcast_f32x2 #define _mm512_maskz_broadcast_f32x2(k, a) simde_mm512_maskz_broadcast_f32x2(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_broadcast_f32x8 (simde__m256 a) { #if defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm512_broadcast_f32x8(a); #else simde__m512_private r_; simde__m256_private a_ = simde__m256_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=8) { r_.f32[ i ] = a_.f32[0]; r_.f32[i + 1] = a_.f32[1]; r_.f32[i + 2] = a_.f32[2]; r_.f32[i + 3] = a_.f32[3]; r_.f32[i + 4] = a_.f32[4]; r_.f32[i + 5] = a_.f32[5]; r_.f32[i + 6] = a_.f32[6]; r_.f32[i + 7] = a_.f32[7]; } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_broadcast_f32x8 #define _mm512_broadcast_f32x8(a) simde_mm512_broadcast_f32x8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_broadcast_f32x8(simde__m512 src, simde__mmask16 k, simde__m256 a) { #if defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm512_mask_broadcast_f32x8(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_broadcast_f32x8(a)); #endif } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_broadcast_f32x8 #define _mm512_mask_broadcast_f32x8(src, k, a) simde_mm512_mask_broadcast_f32x8(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_maskz_broadcast_f32x8(simde__mmask16 k, simde__m256 a) { #if defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm512_maskz_broadcast_f32x8(k, a); #else return simde_mm512_maskz_mov_ps(k, simde_mm512_broadcast_f32x8(a)); #endif } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_broadcast_f32x8 #define _mm512_maskz_broadcast_f32x8(k, a) simde_mm512_maskz_broadcast_f32x8(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_broadcast_f64x2 (simde__m128d a) { #if defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm512_broadcast_f64x2(a); #else simde__m512d_private r_; simde__m128d_private a_ = simde__m128d_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) r_.f64 = __builtin_shufflevector(a_.f64, a_.f64, 0, 1, 0, 1, 0, 1, 0, 1); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { r_.f64[ i ] = a_.f64[0]; r_.f64[i + 1] = a_.f64[1]; } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_broadcast_f64x2 #define _mm512_broadcast_f64x2(a) simde_mm512_broadcast_f64x2(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_broadcast_f64x2(simde__m512d src, simde__mmask8 k, simde__m128d a) { #if defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm512_mask_broadcast_f64x2(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_broadcast_f64x2(a)); #endif } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_broadcast_f64x2 #define _mm512_mask_broadcast_f64x2(src, k, a) simde_mm512_mask_broadcast_f64x2(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_maskz_broadcast_f64x2(simde__mmask8 k, simde__m128d a) { #if defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm512_maskz_broadcast_f64x2(k, a); #else return simde_mm512_maskz_mov_pd(k, simde_mm512_broadcast_f64x2(a)); #endif } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_broadcast_f64x2 #define _mm512_maskz_broadcast_f64x2(k, a) simde_mm512_maskz_broadcast_f64x2(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_broadcast_f32x4 (simde__m128 a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_broadcast_f32x4(a); #else simde__m256_private r_; simde__m128_private a_ = simde__m128_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128_private[0] = a_; r_.m128_private[1] = a_; #elif defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 1, 2, 3, 0, 1, 2, 3); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 4) { r_.f32[ i ] = a_.f32[0]; r_.f32[i + 1] = a_.f32[1]; r_.f32[i + 2] = a_.f32[2]; r_.f32[i + 3] = a_.f32[3]; } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm256_broadcast_f32x4 #define _mm256_broadcast_f32x4(a) simde_mm256_broadcast_f32x4(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_mask_broadcast_f32x4(simde__m256 src, simde__mmask8 k, simde__m128 a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask_broadcast_f32x4(src, k, a); #else return simde_mm256_mask_mov_ps(src, k, simde_mm256_broadcast_f32x4(a)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_broadcast_f32x4 #define _mm256_mask_broadcast_f32x4(src, k, a) simde_mm256_mask_broadcast_f32x4(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_maskz_broadcast_f32x4(simde__mmask8 k, simde__m128 a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_maskz_broadcast_f32x4(k, a); #else return simde_mm256_maskz_mov_ps(k, simde_mm256_broadcast_f32x4(a)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_broadcast_f32x4 #define _mm256_maskz_broadcast_f32x4(k, a) simde_mm256_maskz_broadcast_f32x4(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_broadcast_f64x2 (simde__m128d a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm256_broadcast_f64x2(a); #else simde__m256d_private r_; simde__m128d_private a_ = simde__m128d_to_private(a); /* I don't have a bug # for this, but when compiled with clang-10 without optimization on aarch64 * the __builtin_shufflevector version doesn't work correctly. clang 9 and 11 aren't a problem */ #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && \ (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION < 100000 || SIMDE_DETECT_CLANG_VERSION > 100000)) r_.f64 = __builtin_shufflevector(a_.f64, a_.f64, 0, 1, 0, 1); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { r_.f64[ i ] = a_.f64[0]; r_.f64[i + 1] = a_.f64[1]; } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_broadcast_f64x2 #define _mm256_broadcast_f64x2(a) simde_mm256_broadcast_f64x2(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_mask_broadcast_f64x2(simde__m256d src, simde__mmask8 k, simde__m128d a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm256_mask_broadcast_f64x2(src, k, a); #else return simde_mm256_mask_mov_pd(src, k, simde_mm256_broadcast_f64x2(a)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_broadcast_f64x2 #define _mm256_mask_broadcast_f64x2(src, k, a) simde_mm256_mask_broadcast_f64x2(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_maskz_broadcast_f64x2(simde__mmask8 k, simde__m128d a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm256_maskz_broadcast_f64x2(k, a); #else return simde_mm256_maskz_mov_pd(k, simde_mm256_broadcast_f64x2(a)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_broadcast_f64x2 #define _mm256_maskz_broadcast_f64x2(k, a) simde_mm256_maskz_broadcast_f64x2(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_broadcast_f32x4 (simde__m128 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_broadcast_f32x4(a); #else simde__m512_private r_; #if defined(SIMDE_X86_AVX2_NATIVE) r_.m256[1] = r_.m256[0] = simde_mm256_castsi256_ps(simde_mm256_broadcastsi128_si256(simde_mm_castps_si128(a))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = a; } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_broadcast_f32x4 #define _mm512_broadcast_f32x4(a) simde_mm512_broadcast_f32x4(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_broadcast_f32x4(simde__m512 src, simde__mmask16 k, simde__m128 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_broadcast_f32x4(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_broadcast_f32x4(a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_broadcast_f32x4 #define _mm512_mask_broadcast_f32x4(src, k, a) simde_mm512_mask_broadcast_f32x4(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_maskz_broadcast_f32x4(simde__mmask16 k, simde__m128 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_broadcast_f32x4(k, a); #else return simde_mm512_maskz_mov_ps(k, simde_mm512_broadcast_f32x4(a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_broadcast_f32x4 #define _mm512_maskz_broadcast_f32x4(k, a) simde_mm512_maskz_broadcast_f32x4(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_broadcast_f64x4 (simde__m256d a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_broadcast_f64x4(a); #else simde__m512d_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = a; } return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_broadcast_f64x4 #define _mm512_broadcast_f64x4(a) simde_mm512_broadcast_f64x4(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_broadcast_f64x4(simde__m512d src, simde__mmask8 k, simde__m256d a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_broadcast_f64x4(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_broadcast_f64x4(a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_broadcast_f64x4 #define _mm512_mask_broadcast_f64x4(src, k, a) simde_mm512_mask_broadcast_f64x4(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_maskz_broadcast_f64x4(simde__mmask8 k, simde__m256d a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_broadcast_f64x4(k, a); #else return simde_mm512_maskz_mov_pd(k, simde_mm512_broadcast_f64x4(a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_broadcast_f64x4 #define _mm512_maskz_broadcast_f64x4(k, a) simde_mm512_maskz_broadcast_f64x4(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_broadcast_i32x4 (simde__m128i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_broadcast_i32x4(a); #else simde__m512i_private r_; #if defined(SIMDE_X86_AVX2_NATIVE) r_.m256i[1] = r_.m256i[0] = simde_mm256_broadcastsi128_si256(a); #elif defined(SIMDE_X86_SSE2_NATIVE) r_.m128i[3] = r_.m128i[2] = r_.m128i[1] = r_.m128i[0] = a; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { r_.m128i[i] = a; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_broadcast_i32x4 #define _mm512_broadcast_i32x4(a) simde_mm512_broadcast_i32x4(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_broadcast_i32x4(simde__m512i src, simde__mmask16 k, simde__m128i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_broadcast_i32x4(src, k, a); #else return simde_mm512_mask_mov_epi32(src, k, simde_mm512_broadcast_i32x4(a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_broadcast_i32x4 #define _mm512_mask_broadcast_i32x4(src, k, a) simde_mm512_mask_broadcast_i32x4(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_broadcast_i32x4(simde__mmask16 k, simde__m128i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_broadcast_i32x4(k, a); #else return simde_mm512_maskz_mov_epi32(k, simde_mm512_broadcast_i32x4(a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_broadcast_i32x4 #define _mm512_maskz_broadcast_i32x4(k, a) simde_mm512_maskz_broadcast_i32x4(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_broadcast_i64x4 (simde__m256i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_broadcast_i64x4(a); #else simde__m512i_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = a; } return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_broadcast_i64x4 #define _mm512_broadcast_i64x4(a) simde_mm512_broadcast_i64x4(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_broadcast_i64x4(simde__m512i src, simde__mmask8 k, simde__m256i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_broadcast_i64x4(src, k, a); #else return simde_mm512_mask_mov_epi64(src, k, simde_mm512_broadcast_i64x4(a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_broadcast_i64x4 #define _mm512_mask_broadcast_i64x4(src, k, a) simde_mm512_mask_broadcast_i64x4(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_broadcast_i64x4(simde__mmask8 k, simde__m256i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_broadcast_i64x4(k, a); #else return simde_mm512_maskz_mov_epi64(k, simde_mm512_broadcast_i64x4(a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_broadcast_i64x4 #define _mm512_maskz_broadcast_i64x4(k, a) simde_mm512_maskz_broadcast_i64x4(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_broadcastd_epi32 (simde__m128i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_broadcastd_epi32(a); #else simde__m512i_private r_; simde__m128i_private a_= simde__m128i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[0]; } return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_broadcastd_epi32 #define _mm512_broadcastd_epi32(a) simde_mm512_broadcastd_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_broadcastd_epi32(simde__m512i src, simde__mmask16 k, simde__m128i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_broadcastd_epi32(src, k, a); #else return simde_mm512_mask_mov_epi32(src, k, simde_mm512_broadcastd_epi32(a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_broadcastd_epi32 #define _mm512_mask_broadcastd_epi32(src, k, a) simde_mm512_mask_broadcastd_epi32(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_broadcastd_epi32(simde__mmask16 k, simde__m128i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_broadcastd_epi32(k, a); #else return simde_mm512_maskz_mov_epi32(k, simde_mm512_broadcastd_epi32(a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_broadcastd_epi32 #define _mm512_maskz_broadcastd_epi32(k, a) simde_mm512_maskz_broadcastd_epi32(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_broadcastq_epi64 (simde__m128i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_broadcastq_epi64(a); #else simde__m512i_private r_; simde__m128i_private a_= simde__m128i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[0]; } return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_broadcastq_epi64 #define _mm512_broadcastq_epi64(a) simde_mm512_broadcastq_epi64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_broadcastq_epi64(simde__m512i src, simde__mmask8 k, simde__m128i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_broadcastq_epi64(src, k, a); #else return simde_mm512_mask_mov_epi64(src, k, simde_mm512_broadcastq_epi64(a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_broadcastq_epi64 #define _mm512_mask_broadcastq_epi64(src, k, a) simde_mm512_mask_broadcastq_epi64(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_broadcastq_epi64(simde__mmask8 k, simde__m128i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_broadcastq_epi64(k, a); #else return simde_mm512_maskz_mov_epi64(k, simde_mm512_broadcastq_epi64(a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_broadcastq_epi64 #define _mm512_maskz_broadcastq_epi64(k, a) simde_mm512_maskz_broadcastq_epi64(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_broadcastss_ps (simde__m128 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_broadcastss_ps(a); #else simde__m512_private r_; simde__m128_private a_= simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = a_.f32[0]; } return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_broadcastss_ps #define _mm512_broadcastss_ps(a) simde_mm512_broadcastss_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_broadcastss_ps(simde__m512 src, simde__mmask16 k, simde__m128 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_broadcastss_ps(src, k, a); #else simde__m512_private src_ = simde__m512_to_private(src), r_; simde__m128_private a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = ((k >> i) & 1) ? a_.f32[0] : src_.f32[i]; } return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_broadcastss_ps #define _mm512_mask_broadcastss_ps(src, k, a) simde_mm512_mask_broadcastss_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_maskz_broadcastss_ps(simde__mmask16 k, simde__m128 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_broadcastss_ps(k, a); #else simde__m512_private r_; simde__m128_private a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = ((k >> i) & 1) ? a_.f32[0] : INT32_C(0); } return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_broadcastss_ps #define _mm512_maskz_broadcastss_ps(k, a) simde_mm512_maskz_broadcastss_ps(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_broadcastsd_pd (simde__m128d a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_broadcastsd_pd(a); #else simde__m512d_private r_; simde__m128d_private a_= simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = a_.f64[0]; } return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_broadcastsd_pd #define _mm512_broadcastsd_pd(a) simde_mm512_broadcastsd_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_broadcastsd_pd(simde__m512d src, simde__mmask8 k, simde__m128d a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_broadcastsd_pd(src, k, a); #else simde__m512d_private src_ = simde__m512d_to_private(src), r_; simde__m128d_private a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = ((k >> i) & 1) ? a_.f64[0] : src_.f64[i]; } return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_broadcastsd_pd #define _mm512_mask_broadcastsd_pd(src, k, a) simde_mm512_mask_broadcastsd_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_maskz_broadcastsd_pd(simde__mmask8 k, simde__m128d a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_broadcastsd_pd(k, a); #else simde__m512d_private r_; simde__m128d_private a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = ((k >> i) & 1) ? a_.f64[0] : INT64_C(0); } return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_broadcastsd_pd #define _mm512_maskz_broadcastsd_pd(k, a) simde_mm512_maskz_broadcastsd_pd(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_broadcastb_epi8 (simde__m128i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_broadcastb_epi8(a); #else simde__m128i_private a_= simde__m128i_to_private(a); return simde_mm512_set1_epi8(a_.i8[0]); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_broadcastb_epi8 #define _mm512_broadcastb_epi8(a) simde_mm512_broadcastb_epi8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_broadcastb_epi8 (simde__m512i src, simde__mmask64 k, simde__m128i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_broadcastb_epi8(src, k, a); #else return simde_mm512_mask_mov_epi8(src, k, simde_mm512_broadcastb_epi8(a)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_broadcastb_epi8 #define _mm512_mask_broadcastb_epi8(src, k, a) simde_mm512_mask_broadcastb_epi8(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_broadcastb_epi8 (simde__mmask64 k, simde__m128i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_broadcastb_epi8(k, a); #else return simde_mm512_maskz_mov_epi8(k, simde_mm512_broadcastb_epi8(a)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_broadcastb_epi8 #define _mm512_maskz_broadcastb_epi8(k, a) simde_mm512_maskz_broadcastb_epi8(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_broadcastw_epi16 (simde__m128i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_broadcastw_epi16(a); #else simde__m128i_private a_= simde__m128i_to_private(a); return simde_mm512_set1_epi16(a_.i16[0]); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_broadcastw_epi16 #define _mm512_broadcastw_epi16(a) simde_mm512_broadcastw_epi16(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_BROADCAST_H) */ simde-0.7.2/simde/x86/avx512/cast.h000066400000000000000000000216061400333146700165060ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur * 2020 Hidayat Khan * 2020 Christopher Moore */ #if !defined(SIMDE_X86_AVX512_CAST_H) #define SIMDE_X86_AVX512_CAST_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_castpd_ps (simde__m512d a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_castpd_ps(a); #else simde__m512 r; simde_memcpy(&r, &a, sizeof(r)); return r; #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_castpd_ps #define _mm512_castpd_ps(a) simde_mm512_castpd_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_castpd_si512 (simde__m512d a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_castpd_si512(a); #else simde__m512i r; simde_memcpy(&r, &a, sizeof(r)); return r; #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_castpd_si512 #define _mm512_castpd_si512(a) simde_mm512_castpd_si512(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_castps_pd (simde__m512 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_castps_pd(a); #else simde__m512d r; simde_memcpy(&r, &a, sizeof(r)); return r; #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_castps_pd #define _mm512_castps_pd(a) simde_mm512_castps_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_castps_si512 (simde__m512 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_castps_si512(a); #else simde__m512i r; simde_memcpy(&r, &a, sizeof(r)); return r; #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_castps_si512 #define _mm512_castps_si512(a) simde_mm512_castps_si512(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_castsi512_ps (simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_castsi512_ps(a); #else simde__m512 r; simde_memcpy(&r, &a, sizeof(r)); return r; #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_castsi512_ps #define _mm512_castsi512_ps(a) simde_mm512_castsi512_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_castsi512_pd (simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_castsi512_pd(a); #else simde__m512d r; simde_memcpy(&r, &a, sizeof(r)); return r; #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_castsi512_pd #define _mm512_castsi512_pd(a) simde_mm512_castsi512_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_castpd128_pd512 (simde__m128d a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_castpd128_pd512(a); #else simde__m512d_private r_; r_.m128d[0] = a; return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_castpd128_pd512 #define _mm512_castpd128_pd512(a) simde_mm512_castpd128_pd512(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_castpd256_pd512 (simde__m256d a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_castpd256_pd512(a); #else simde__m512d_private r_; r_.m256d[0] = a; return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_castpd256_pd512 #define _mm512_castpd256_pd512(a) simde_mm512_castpd256_pd512(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm512_castpd512_pd128 (simde__m512d a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_castpd512_pd128(a); #else simde__m512d_private a_ = simde__m512d_to_private(a); return a_.m128d[0]; #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_castpd512_pd128 #define _mm512_castpd512_pd128(a) simde_mm512_castpd512_pd128(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm512_castpd512_pd256 (simde__m512d a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_castpd512_pd256(a); #else simde__m512d_private a_ = simde__m512d_to_private(a); return a_.m256d[0]; #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_castpd512_pd256 #define _mm512_castpd512_pd256(a) simde_mm512_castpd512_pd256(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_castps128_ps512 (simde__m128 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_castps128_ps512(a); #else simde__m512_private r_; r_.m128[0] = a; return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_castps128_ps512 #define _mm512_castps128_ps512(a) simde_mm512_castps128_ps512(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_castps256_ps512 (simde__m256 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_castps256_ps512(a); #else simde__m512_private r_; r_.m256[0] = a; return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_castps256_ps512 #define _mm512_castps256_ps512(a) simde_mm512_castps256_ps512(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm512_castps512_ps128 (simde__m512 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_castps512_ps128(a); #else simde__m512_private a_ = simde__m512_to_private(a); return a_.m128[0]; #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_castps512_ps128 #define _mm512_castps512_ps128(a) simde_mm512_castps512_ps128(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm512_castps512_ps256 (simde__m512 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_castps512_ps256(a); #else simde__m512_private a_ = simde__m512_to_private(a); return a_.m256[0]; #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_castps512_ps256 #define _mm512_castps512_ps256(a) simde_mm512_castps512_ps256(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_castsi128_si512 (simde__m128i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_castsi128_si512(a); #else simde__m512i_private r_; r_.m128i[0] = a; return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_castsi128_si512 #define _mm512_castsi128_si512(a) simde_mm512_castsi128_si512(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_castsi256_si512 (simde__m256i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_castsi256_si512(a); #else simde__m512i_private r_; r_.m256i[0] = a; return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_castsi256_si512 #define _mm512_castsi256_si512(a) simde_mm512_castsi256_si512(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm512_castsi512_si128 (simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_castsi512_si128(a); #else simde__m512i_private a_ = simde__m512i_to_private(a); return a_.m128i[0]; #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_castsi512_si128 #define _mm512_castsi512_si128(a) simde_mm512_castsi512_si128(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm512_castsi512_si256 (simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_castsi512_si256(a); #else simde__m512i_private a_ = simde__m512i_to_private(a); return a_.m256i[0]; #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_castsi512_si256 #define _mm512_castsi512_si256(a) simde_mm512_castsi512_si256(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_CAST_H) */ simde-0.7.2/simde/x86/avx512/cmp.h000066400000000000000000000551011400333146700163300ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur */ #if !defined(SIMDE_X86_AVX512_CMP_H) #define SIMDE_X86_AVX512_CMP_H #include "types.h" #include "../avx2.h" #include "mov.h" #include "mov_mask.h" #include "setzero.h" #include "setone.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__mmask16 simde_mm512_cmp_ps_mask (simde__m512 a, simde__m512 b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { #if defined(SIMDE_X86_AVX512F_NATIVE) simde__mmask16 r; SIMDE_CONSTIFY_32_(_mm512_cmp_ps_mask, r, (HEDLEY_UNREACHABLE(), 0), imm8, a, b); return r; #else simde__m512_private r_, a_ = simde__m512_to_private(a), b_ = simde__m512_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) switch (imm8) { case SIMDE_CMP_EQ_OQ: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); break; case SIMDE_CMP_LT_OS: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); break; case SIMDE_CMP_LE_OS: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); break; case SIMDE_CMP_UNORD_Q: #if defined(simde_math_isnanf) for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); } #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_NEQ_UQ: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); break; case SIMDE_CMP_NLT_US: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); break; case SIMDE_CMP_NLE_US: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); break; case SIMDE_CMP_ORD_Q: #if defined(simde_math_isnanf) for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = (!simde_math_isnanf(a_.f32[i]) && !simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); } #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_EQ_UQ: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); break; case SIMDE_CMP_NGE_US: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); break; case SIMDE_CMP_NGT_US: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); break; case SIMDE_CMP_FALSE_OQ: r_ = simde__m512_to_private(simde_mm512_setzero_ps()); break; case SIMDE_CMP_NEQ_OQ: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); break; case SIMDE_CMP_GE_OS: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); break; case SIMDE_CMP_GT_OS: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); break; case SIMDE_CMP_TRUE_UQ: r_ = simde__m512_to_private(simde_x_mm512_setone_ps()); break; case SIMDE_CMP_EQ_OS: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); break; case SIMDE_CMP_LT_OQ: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); break; case SIMDE_CMP_LE_OQ: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); break; case SIMDE_CMP_UNORD_S: #if defined(simde_math_isnanf) for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); } #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_NEQ_US: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); break; case SIMDE_CMP_NLT_UQ: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); break; case SIMDE_CMP_NLE_UQ: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); break; case SIMDE_CMP_ORD_S: #if defined(simde_math_isnanf) for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); } #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_EQ_US: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); break; case SIMDE_CMP_NGE_UQ: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); break; case SIMDE_CMP_NGT_UQ: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); break; case SIMDE_CMP_FALSE_OS: r_ = simde__m512_to_private(simde_mm512_setzero_ps()); break; case SIMDE_CMP_NEQ_OS: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); break; case SIMDE_CMP_GE_OQ: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); break; case SIMDE_CMP_GT_OQ: r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); break; case SIMDE_CMP_TRUE_US: r_ = simde__m512_to_private(simde_x_mm512_setone_ps()); break; default: HEDLEY_UNREACHABLE(); break; } #else /* defined(SIMDE_VECTOR_SUBSCRIPT_OPS) */ SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { switch (imm8) { case SIMDE_CMP_EQ_OQ: r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_LT_OS: r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_LE_OS: r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_UNORD_Q: #if defined(simde_math_isnanf) r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_NEQ_UQ: r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_NLT_US: r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_NLE_US: r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_ORD_Q: #if defined(simde_math_isnanf) r_.u32[i] = (!simde_math_isnanf(a_.f32[i]) && !simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_EQ_UQ: r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_NGE_US: r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_NGT_US: r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_FALSE_OQ: r_.u32[i] = UINT32_C(0); break; case SIMDE_CMP_NEQ_OQ: r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_GE_OS: r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_GT_OS: r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_TRUE_UQ: r_.u32[i] = ~UINT32_C(0); break; case SIMDE_CMP_EQ_OS: r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_LT_OQ: r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_LE_OQ: r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_UNORD_S: #if defined(simde_math_isnanf) r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_NEQ_US: r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_NLT_UQ: r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_NLE_UQ: r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_ORD_S: #if defined(simde_math_isnanf) r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_EQ_US: r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_NGE_UQ: r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_NGT_UQ: r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_FALSE_OS: r_.u32[i] = UINT32_C(0); break; case SIMDE_CMP_NEQ_OS: r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_GE_OQ: r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_GT_OQ: r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); break; case SIMDE_CMP_TRUE_US: r_.u32[i] = ~UINT32_C(0); break; default: HEDLEY_UNREACHABLE(); break; } } #endif return simde_mm512_movepi32_mask(simde_mm512_castps_si512(simde__m512_from_private(r_))); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_cmp_ps_mask #define _mm512_cmp_ps_mask(a, b, imm8) simde_mm512_cmp_ps_mask((a), (b), (imm8)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask8 simde_mm512_cmp_pd_mask (simde__m512d a, simde__m512d b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { #if defined(SIMDE_X86_AVX512F_NATIVE) simde__mmask8 r; SIMDE_CONSTIFY_32_(_mm512_cmp_pd_mask, r, (HEDLEY_UNREACHABLE(), 0), imm8, a, b); return r; #else simde__m512d_private r_, a_ = simde__m512d_to_private(a), b_ = simde__m512d_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) switch (imm8) { case SIMDE_CMP_EQ_OQ: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); break; case SIMDE_CMP_LT_OS: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); break; case SIMDE_CMP_LE_OS: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); break; case SIMDE_CMP_UNORD_Q: #if defined(simde_math_isnanf) for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.u64[i] = (simde_math_isnanf(a_.f64[i]) || simde_math_isnanf(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); } #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_NEQ_UQ: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); break; case SIMDE_CMP_NLT_US: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); break; case SIMDE_CMP_NLE_US: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); break; case SIMDE_CMP_ORD_Q: #if defined(simde_math_isnanf) for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.u64[i] = (!simde_math_isnanf(a_.f64[i]) && !simde_math_isnanf(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); } #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_EQ_UQ: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); break; case SIMDE_CMP_NGE_US: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); break; case SIMDE_CMP_NGT_US: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); break; case SIMDE_CMP_FALSE_OQ: r_ = simde__m512d_to_private(simde_mm512_setzero_pd()); break; case SIMDE_CMP_NEQ_OQ: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); break; case SIMDE_CMP_GE_OS: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); break; case SIMDE_CMP_GT_OS: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); break; case SIMDE_CMP_TRUE_UQ: r_ = simde__m512d_to_private(simde_x_mm512_setone_pd()); break; case SIMDE_CMP_EQ_OS: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); break; case SIMDE_CMP_LT_OQ: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); break; case SIMDE_CMP_LE_OQ: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); break; case SIMDE_CMP_UNORD_S: #if defined(simde_math_isnanf) for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.u64[i] = (simde_math_isnanf(a_.f64[i]) || simde_math_isnanf(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); } #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_NEQ_US: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); break; case SIMDE_CMP_NLT_UQ: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); break; case SIMDE_CMP_NLE_UQ: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); break; case SIMDE_CMP_ORD_S: #if defined(simde_math_isnanf) for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.u64[i] = (simde_math_isnanf(a_.f64[i]) || simde_math_isnanf(b_.f64[i])) ? UINT64_C(0) : ~UINT64_C(0); } #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_EQ_US: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); break; case SIMDE_CMP_NGE_UQ: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); break; case SIMDE_CMP_NGT_UQ: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); break; case SIMDE_CMP_FALSE_OS: r_ = simde__m512d_to_private(simde_mm512_setzero_pd()); break; case SIMDE_CMP_NEQ_OS: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); break; case SIMDE_CMP_GE_OQ: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); break; case SIMDE_CMP_GT_OQ: r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); break; case SIMDE_CMP_TRUE_US: r_ = simde__m512d_to_private(simde_x_mm512_setone_pd()); break; default: HEDLEY_UNREACHABLE(); break; } #else /* defined(SIMDE_VECTOR_SUBSCRIPT_OPS) */ SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { switch (imm8) { case SIMDE_CMP_EQ_OQ: r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_LT_OS: r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_LE_OS: r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_UNORD_Q: #if defined(simde_math_isnanf) r_.u64[i] = (simde_math_isnanf(a_.f64[i]) || simde_math_isnanf(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_NEQ_UQ: r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_NLT_US: r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_NLE_US: r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_ORD_Q: #if defined(simde_math_isnanf) r_.u64[i] = (!simde_math_isnanf(a_.f64[i]) && !simde_math_isnanf(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_EQ_UQ: r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_NGE_US: r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_NGT_US: r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_FALSE_OQ: r_.u64[i] = UINT64_C(0); break; case SIMDE_CMP_NEQ_OQ: r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_GE_OS: r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_GT_OS: r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_TRUE_UQ: r_.u64[i] = ~UINT64_C(0); break; case SIMDE_CMP_EQ_OS: r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_LT_OQ: r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_LE_OQ: r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_UNORD_S: #if defined(simde_math_isnanf) r_.u64[i] = (simde_math_isnanf(a_.f64[i]) || simde_math_isnanf(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_NEQ_US: r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_NLT_UQ: r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_NLE_UQ: r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_ORD_S: #if defined(simde_math_isnanf) r_.u64[i] = (simde_math_isnanf(a_.f64[i]) || simde_math_isnanf(b_.f64[i])) ? UINT64_C(0) : ~UINT64_C(0); #else HEDLEY_UNREACHABLE(); #endif break; case SIMDE_CMP_EQ_US: r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_NGE_UQ: r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_NGT_UQ: r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_FALSE_OS: r_.u64[i] = UINT64_C(0); break; case SIMDE_CMP_NEQ_OS: r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_GE_OQ: r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_GT_OQ: r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); break; case SIMDE_CMP_TRUE_US: r_.u64[i] = ~UINT64_C(0); break; default: HEDLEY_UNREACHABLE(); break; } } #endif return simde_mm512_movepi64_mask(simde_mm512_castpd_si512(simde__m512d_from_private(r_))); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_cmp_pd_mask #define _mm512_cmp_pd_mask(a, b, imm8) simde_mm512_cmp_pd_mask((a), (b), (imm8)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_CMP_H) */ simde-0.7.2/simde/x86/avx512/cmpeq.h000066400000000000000000000135071400333146700166620ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur */ #if !defined(SIMDE_X86_AVX512_CMPEQ_H) #define SIMDE_X86_AVX512_CMPEQ_H #include "types.h" #include "../avx2.h" #include "mov.h" #include "mov_mask.h" #include "cmp.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__mmask64 simde_mm512_cmpeq_epi8_mask (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_cmpeq_epi8_mask(a, b); #else simde__m512i_private a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); simde__mmask64 r; #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) r = 0; SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { const uint32_t t = HEDLEY_STATIC_CAST(uint32_t, simde_mm256_movemask_epi8(simde_mm256_cmpeq_epi8(a_.m256i[i], b_.m256i[i]))); r |= HEDLEY_STATIC_CAST(uint64_t, t) << (i * 32); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) simde__m512i_private tmp; tmp.i8 = HEDLEY_STATIC_CAST(__typeof__(tmp.i8), a_.i8 == b_.i8); r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); #else r = 0; SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { r |= (a_.u8[i] == b_.u8[i]) ? (UINT64_C(1) << i) : 0; } #endif return r; #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_cmpeq_epi8_mask #define _mm512_cmpeq_epi8_mask(a, b) simde_mm512_cmpeq_epi8_mask(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask16 simde_mm512_cmpeq_epi32_mask (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_cmpeq_epi32_mask(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_cmpeq_epi32(a_.m256i[i], b_.m256i[i]); } return simde_mm512_movepi32_mask(simde__m512i_from_private(r_)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_cmpeq_epi32_mask #define _mm512_cmpeq_epi32_mask(a, b) simde_mm512_cmpeq_epi32_mask(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask16 simde_mm512_mask_cmpeq_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_cmpeq_epi32_mask(k1, a, b); #else return simde_mm512_cmpeq_epi32_mask(a, b) & k1; #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cmpeq_epi32_mask #define _mm512_mask_cmpeq_epi32_mask(k1, a, b) simde_mm512_mask_cmpeq_epi32_mask(k1, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask8 simde_mm512_cmpeq_epi64_mask (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_cmpeq_epi64_mask(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_cmpeq_epi64(a_.m256i[i], b_.m256i[i]); } return simde_mm512_movepi64_mask(simde__m512i_from_private(r_)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_cmpeq_epi64_mask #define _mm512_cmpeq_epi64_mask(a, b) simde_mm512_cmpeq_epi64_mask(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask8 simde_mm512_mask_cmpeq_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_cmpeq_epi64_mask(k1, a, b); #else return simde_mm512_cmpeq_epi64_mask(a, b) & k1; #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cmpeq_epi64_mask #define _mm512_mask_cmpeq_epi64_mask(k1, a, b) simde_mm512_mask_cmpeq_epi64_mask(k1, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask16 simde_mm512_cmpeq_ps_mask (simde__m512 a, simde__m512 b) { return simde_mm512_cmp_ps_mask(a, b, SIMDE_CMP_EQ_OQ); } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_cmpeq_ps_mask #define _mm512_cmpeq_ps_mask(a, b) simde_mm512_cmp_ps_mask(a, b, SIMDE_CMP_EQ_OQ) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask8 simde_mm512_cmpeq_pd_mask (simde__m512d a, simde__m512d b) { return simde_mm512_cmp_pd_mask(a, b, SIMDE_CMP_EQ_OQ); } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_cmpeq_pd_mask #define _mm512_cmpeq_pd_mask(a, b) simde_mm512_cmp_pd_mask(a, b, SIMDE_CMP_EQ_OQ) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_CMPEQ_H) */ simde-0.7.2/simde/x86/avx512/cmpge.h000066400000000000000000000065111400333146700166450ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_X86_AVX512_CMPGE_H) #define SIMDE_X86_AVX512_CMPGE_H #include "types.h" #include "mov.h" #include "mov_mask.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__mmask64 simde_mm512_cmpge_epi8_mask (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_cmpge_epi8_mask(a, b); #else simde__m512i_private a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); simde__mmask64 r = 0; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) simde__m512i_private tmp; tmp.i8 = HEDLEY_STATIC_CAST(__typeof__(tmp.i8), a_.i8 >= b_.i8); r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); #else SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { r |= (a_.i8[i] >= b_.i8[i]) ? (UINT64_C(1) << i) : 0; } #endif return r; #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_cmpge_epi8_mask #define _mm512_cmpge_epi8_mask(a, b) simde_mm512_cmpge_epi8_mask(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask64 simde_mm512_cmpge_epu8_mask (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_cmpge_epu8_mask(a, b); #else simde__m512i_private a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); simde__mmask64 r = 0; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) simde__m512i_private tmp; tmp.i8 = HEDLEY_STATIC_CAST(__typeof__(tmp.i8), a_.u8 >= b_.u8); r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); #else SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { r |= (a_.u8[i] >= b_.u8[i]) ? (UINT64_C(1) << i) : 0; } #endif return r; #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_cmpge_epu8_mask #define _mm512_cmpge_epu8_mask(a, b) simde_mm512_cmpge_epu8_mask(a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_CMPGE_H) */ simde-0.7.2/simde/x86/avx512/cmpgt.h000066400000000000000000000141601400333146700166630ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_X86_AVX512_CMPGT_H) #define SIMDE_X86_AVX512_CMPGT_H #include "types.h" #include "../avx2.h" #include "mov.h" #include "mov_mask.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__mmask64 simde_mm512_cmpgt_epi8_mask (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_cmpgt_epi8_mask(a, b); #else simde__m512i_private a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); simde__mmask64 r; #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) && !defined(HEDLEY_INTEL_VERSION) r = 0; SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { const uint32_t t = HEDLEY_STATIC_CAST(uint32_t, simde_mm256_movemask_epi8(simde_mm256_cmpgt_epi8(a_.m256i[i], b_.m256i[i]))); r |= HEDLEY_STATIC_CAST(uint64_t, t) << HEDLEY_STATIC_CAST(uint64_t, i * 32); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) simde__m512i_private tmp; tmp.i8 = HEDLEY_STATIC_CAST(__typeof__(tmp.i8), a_.i8 > b_.i8); r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); #else r = 0; SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { r |= (a_.i8[i] > b_.i8[i]) ? (UINT64_C(1) << i) : 0; } #endif return r; #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_cmpgt_epi8_mask #define _mm512_cmpgt_epi8_mask(a, b) simde_mm512_cmpgt_epi8_mask(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask64 simde_mm512_cmpgt_epu8_mask (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_cmpgt_epu8_mask(a, b); #else simde__m512i_private a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); simde__mmask64 r = 0; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) simde__m512i_private tmp; tmp.i8 = HEDLEY_STATIC_CAST(__typeof__(tmp.i8), a_.u8 > b_.u8); r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); #else SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { r |= (a_.u8[i] > b_.u8[i]) ? (UINT64_C(1) << i) : 0; } #endif return r; #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_cmpgt_epu8_mask #define _mm512_cmpgt_epu8_mask(a, b) simde_mm512_cmpgt_epu8_mask(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask16 simde_mm512_cmpgt_epi32_mask (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_cmpgt_epi32_mask(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_cmpgt_epi32(a_.m256i[i], b_.m256i[i]); } return simde_mm512_movepi32_mask(simde__m512i_from_private(r_)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_cmpgt_epi32_mask #define _mm512_cmpgt_epi32_mask(a, b) simde_mm512_cmpgt_epi32_mask(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask16 simde_mm512_mask_cmpgt_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_cmpgt_epi32_mask(k1, a, b); #else return simde_mm512_cmpgt_epi32_mask(a, b) & k1; #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cmpgt_epi32_mask #define _mm512_mask_cmpgt_epi32_mask(k1, a, b) simde_mm512_mask_cmpgt_epi32_mask(k1, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask8 simde_mm512_cmpgt_epi64_mask (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_cmpgt_epi64_mask(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_cmpgt_epi64(a_.m256i[i], b_.m256i[i]); } return simde_mm512_movepi64_mask(simde__m512i_from_private(r_)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_cmpgt_epi64_mask #define _mm512_cmpgt_epi64_mask(a, b) simde_mm512_cmpgt_epi64_mask(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask8 simde_mm512_mask_cmpgt_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_cmpgt_epi64_mask(k1, a, b); #else return simde_mm512_cmpgt_epi64_mask(a, b) & k1; #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cmpgt_epi64_mask #define _mm512_mask_cmpgt_epi64_mask(k1, a, b) simde_mm512_mask_cmpgt_epi64_mask(k1, a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_CMPGT_H) */ simde-0.7.2/simde/x86/avx512/cmple.h000066400000000000000000000064301400333146700166520ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_X86_AVX512_CMPLE_H) #define SIMDE_X86_AVX512_CMPLE_H #include "types.h" #include "mov.h" #include "mov_mask.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__mmask64 simde_mm512_cmple_epi8_mask (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_cmple_epi8_mask(a, b); #else simde__m512i_private a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); simde__mmask64 r = 0; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) simde__m512i_private tmp; tmp.i8 = HEDLEY_STATIC_CAST(__typeof__(tmp.i8), a_.i8 <= b_.i8); r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); #else SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { r |= (a_.i8[i] <= b_.i8[i]) ? (UINT64_C(1) << i) : 0; } #endif return r; #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_cmple_epi8_mask #define _mm512_cmple_epi8_mask(a, b) simde_mm512_cmple_epi8_mask(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask64 simde_mm512_cmple_epu8_mask (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_cmple_epu8_mask(a, b); #else simde__m512i_private a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); simde__mmask64 r = 0; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) simde__m512i_private tmp; tmp.i8 = HEDLEY_STATIC_CAST(__typeof__(tmp.i8), a_.u8 <= b_.u8); r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); #else SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { r |= (a_.u8[i] <= b_.u8[i]) ? (UINT64_C(1) << i) : 0; } #endif return r; #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_cmple_epu8_mask #define _mm512_cmple_epu8_mask(a, b) simde_mm512_cmple_epu8_mask(a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_CMPLE_H) */ simde-0.7.2/simde/x86/avx512/cmplt.h000066400000000000000000000076541400333146700167020ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_X86_AVX512_CMPLT_H) #define SIMDE_X86_AVX512_CMPLT_H #include "types.h" #include "mov.h" #include "cmp.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__mmask16 simde_mm512_cmplt_ps_mask (simde__m512 a, simde__m512 b) { return simde_mm512_cmp_ps_mask(a, b, SIMDE_CMP_LT_OQ); } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_cmplt_ps_mask #define _mm512_cmplt_ps_mask(a, b) simde_mm512_cmp_ps_mask(a, b, SIMDE_CMP_LT_OQ) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask8 simde_mm512_cmplt_pd_mask (simde__m512d a, simde__m512d b) { return simde_mm512_cmp_pd_mask(a, b, SIMDE_CMP_LT_OQ); } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_cmplt_pd_mask #define _mm512_cmplt_pd_mask(a, b) simde_mm512_cmp_pd_mask(a, b, SIMDE_CMP_LT_OQ) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask64 simde_mm512_cmplt_epi8_mask (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_cmplt_epi8_mask(a, b); #else simde__m512i_private a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); simde__mmask64 r = 0; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) simde__m512i_private tmp; tmp.i8 = HEDLEY_STATIC_CAST(__typeof__(tmp.i8), a_.i8 < b_.i8); r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); #else SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { r |= (a_.i8[i] < b_.i8[i]) ? (UINT64_C(1) << i) : 0; } #endif return r; #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_cmplt_epi8_mask #define _mm512_cmplt_epi8_mask(a, b) simde_mm512_cmplt_epi8_mask(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask64 simde_mm512_cmplt_epu8_mask (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_cmplt_epu8_mask(a, b); #else simde__m512i_private a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); simde__mmask64 r = 0; #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) simde__m512i_private tmp; tmp.i8 = HEDLEY_STATIC_CAST(__typeof__(tmp.i8), a_.u8 < b_.u8); r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); #else SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { r |= (a_.u8[i] < b_.u8[i]) ? (UINT64_C(1) << i) : 0; } #endif return r; #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_cmplt_epu8_mask #define _mm512_cmplt_epu8_mask(a, b) simde_mm512_cmplt_epu8_mask(a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_CMPLT_H) */ simde-0.7.2/simde/x86/avx512/copysign.h000066400000000000000000000057001400333146700174040ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur */ #if !defined(SIMDE_X86_AVX512_COPYSIGN_H) #define SIMDE_X86_AVX512_COPYSIGN_H #include "types.h" #include "mov.h" #include "and.h" #include "andnot.h" #include "xor.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_x_mm512_copysign_ps(simde__m512 dest, simde__m512 src) { simde__m512_private r_, dest_ = simde__m512_to_private(dest), src_ = simde__m512_to_private(src); #if defined(simde_math_copysignf) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); } #else simde__m512 sgnbit = simde_mm512_xor_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), simde_mm512_set1_ps(-SIMDE_FLOAT32_C(0.0))); return simde_mm512_xor_ps(simde_mm512_and_ps(sgnbit, src), simde_mm512_andnot_ps(sgnbit, dest)); #endif return simde__m512_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_x_mm512_copysign_pd(simde__m512d dest, simde__m512d src) { simde__m512d_private r_, dest_ = simde__m512d_to_private(dest), src_ = simde__m512d_to_private(src); #if defined(simde_math_copysign) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); } #else simde__m512d sgnbit = simde_mm512_xor_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), simde_mm512_set1_pd(-SIMDE_FLOAT64_C(0.0))); return simde_mm512_xor_pd(simde_mm512_and_pd(sgnbit, src), simde_mm512_andnot_pd(sgnbit, dest)); #endif return simde__m512d_from_private(r_); } SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_COPYSIGN_H) */ simde-0.7.2/simde/x86/avx512/cvt.h000066400000000000000000000075671400333146700163620ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_CVT_H) #define SIMDE_X86_AVX512_CVT_H #include "types.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm512_cvtepi16_epi8 (simde__m512i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_cvtepi16_epi8(a); #else simde__m256i_private r_; simde__m512i_private a_ = simde__m512i_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.i8, a_.i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_cvtepi16_epi8 #define _mm512_cvtepi16_epi8(a) simde_mm512_cvtepi16_epi8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm512_mask_cvtepi16_epi8 (simde__m256i src, simde__mmask32 k, simde__m512i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_cvtepi16_epi8(src, k, a); #else return simde_mm256_mask_mov_epi8(src, k, simde_mm512_cvtepi16_epi8(a)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cvtepi16_epi8 #define _mm512_mask_cvtepi16_epi8(src, k, a) simde_mm512_mask_cvtepi16_epi8(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm512_maskz_cvtepi16_epi8 (simde__mmask32 k, simde__m512i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_cvtepi16_epi8(k, a); #else return simde_mm256_maskz_mov_epi8(k, simde_mm512_cvtepi16_epi8(a)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_cvtepi16_epi8 #define _mm512_maskz_cvtepi16_epi8(k, a) simde_mm512_maskz_cvtepi16_epi8(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_cvtepi8_epi16 (simde__m256i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_cvtepi8_epi16(a); #else simde__m512i_private r_; simde__m256i_private a_ = simde__m256i_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.i16, a_.i8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i8[i]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_cvtepi8_epi16 #define _mm512_cvtepi8_epi16(a) simde_mm512_cvtepi8_epi16(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_CVT_H) */ simde-0.7.2/simde/x86/avx512/cvts.h000066400000000000000000000556351400333146700165440ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_CVTS_H) #define SIMDE_X86_AVX512_CVTS_H #include "types.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cvtsepi16_epi8 (simde__m128i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm_cvtsepi16_epi8(a); #else simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); simde__m128i_private a_ = simde__m128i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { r_.i8[i] = (a_.i16[i] < INT8_MIN) ? (INT8_MIN) : ((a_.i16[i] > INT8_MAX) ? (INT8_MAX) : HEDLEY_STATIC_CAST(int8_t, a_.i16[i])); } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_cvtsepi16_epi8 #define _mm_cvtsepi16_epi8(a) simde_mm_cvtsepi16_epi8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm256_cvtsepi16_epi8 (simde__m256i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm256_cvtsepi16_epi8(a); #else simde__m128i_private r_; simde__m256i_private a_ = simde__m256i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = (a_.i16[i] < INT8_MIN) ? (INT8_MIN) : ((a_.i16[i] > INT8_MAX) ? (INT8_MAX) : HEDLEY_STATIC_CAST(int8_t, a_.i16[i])); } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_cvtsepi16_epi8 #define _mm256_cvtsepi16_epi8(a) simde_mm256_cvtsepi16_epi8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cvtsepi32_epi8 (simde__m128i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm_cvtsepi32_epi8(a); #else simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); simde__m128i_private a_ = simde__m128i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { r_.i8[i] = (a_.i32[i] < INT8_MIN) ? (INT8_MIN) : ((a_.i32[i] > INT8_MAX) ? (INT8_MAX) : HEDLEY_STATIC_CAST(int8_t, a_.i32[i])); } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_cvtsepi32_epi8 #define _mm_cvtsepi32_epi8(a) simde_mm_cvtsepi32_epi8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm256_cvtsepi32_epi8 (simde__m256i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm256_cvtsepi32_epi8(a); #else simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); simde__m256i_private a_ = simde__m256i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { r_.i8[i] = (a_.i32[i] < INT8_MIN) ? (INT8_MIN) : ((a_.i32[i] > INT8_MAX) ? (INT8_MAX) : HEDLEY_STATIC_CAST(int8_t, a_.i32[i])); } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_cvtsepi32_epi8 #define _mm256_cvtsepi32_epi8(a) simde_mm256_cvtsepi32_epi8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cvtsepi32_epi16 (simde__m128i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm_cvtsepi32_epi16(a); #else simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); simde__m128i_private a_ = simde__m128i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { r_.i16[i] = (a_.i32[i] < INT16_MIN) ? (INT16_MIN) : ((a_.i32[i] > INT16_MAX) ? (INT16_MAX) : HEDLEY_STATIC_CAST(int16_t, a_.i32[i])); } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_cvtsepi32_epi16 #define _mm_cvtsepi32_epi16(a) simde_mm_cvtsepi32_epi16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm256_cvtsepi32_epi16 (simde__m256i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm256_cvtsepi32_epi16(a); #else simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); simde__m256i_private a_ = simde__m256i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { r_.i16[i] = (a_.i32[i] < INT16_MIN) ? (INT16_MIN) : ((a_.i32[i] > INT16_MAX) ? (INT16_MAX) : HEDLEY_STATIC_CAST(int16_t, a_.i32[i])); } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_cvtsepi32_epi16 #define _mm256_cvtsepi32_epi16(a) simde_mm256_cvtsepi32_epi16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cvtsepi64_epi8 (simde__m128i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm_cvtsepi64_epi8(a); #else simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); simde__m128i_private a_ = simde__m128i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { r_.i8[i] = (a_.i64[i] < INT8_MIN) ? (INT8_MIN) : ((a_.i64[i] > INT8_MAX) ? (INT8_MAX) : HEDLEY_STATIC_CAST(int8_t, a_.i64[i])); } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_cvtsepi64_epi8 #define _mm_cvtsepi64_epi8(a) simde_mm_cvtsepi64_epi8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm256_cvtsepi64_epi8 (simde__m256i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm256_cvtsepi64_epi8(a); #else simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); simde__m256i_private a_ = simde__m256i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { r_.i8[i] = (a_.i64[i] < INT8_MIN) ? (INT8_MIN) : ((a_.i64[i] > INT8_MAX) ? (INT8_MAX) : HEDLEY_STATIC_CAST(int8_t, a_.i64[i])); } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_cvtsepi64_epi8 #define _mm256_cvtsepi64_epi8(a) simde_mm256_cvtsepi64_epi8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm512_cvtsepi16_epi8 (simde__m512i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_cvtsepi16_epi8(a); #else simde__m256i_private r_; simde__m512i_private a_ = simde__m512i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = (a_.i16[i] < INT8_MIN) ? (INT8_MIN) : ((a_.i16[i] > INT8_MAX) ? (INT8_MAX) : HEDLEY_STATIC_CAST(int8_t, a_.i16[i])); } return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_cvtsepi16_epi8 #define _mm512_cvtsepi16_epi8(a) simde_mm512_cvtsepi16_epi8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm512_mask_cvtsepi16_epi8 (simde__m256i src, simde__mmask32 k, simde__m512i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_cvtsepi16_epi8(src, k, a); #else return simde_mm256_mask_mov_epi8(src, k, simde_mm512_cvtsepi16_epi8(a)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cvtsepi16_epi8 #define _mm512_mask_cvtsepi16_epi8(src, k, a) simde_mm512_mask_cvtsepi16_epi8(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm512_maskz_cvtsepi16_epi8 (simde__mmask32 k, simde__m512i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_cvtsepi16_epi8(k, a); #else return simde_mm256_maskz_mov_epi8(k, simde_mm512_cvtsepi16_epi8(a)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_cvtsepi16_epi8 #define _mm512_maskz_cvtsepi16_epi8(k, a) simde_mm512_maskz_cvtsepi16_epi8(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm512_cvtsepi32_epi8 (simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_cvtsepi32_epi8(a); #else simde__m128i_private r_; simde__m512i_private a_ = simde__m512i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { r_.i8[i] = (a_.i32[i] < INT8_MIN) ? (INT8_MIN) : ((a_.i32[i] > INT8_MAX) ? (INT8_MAX) : HEDLEY_STATIC_CAST(int8_t, a_.i32[i])); } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_cvtsepi32_epi8 #define _mm512_cvtsepi32_epi8(a) simde_mm512_cvtsepi32_epi8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm512_mask_cvtsepi32_epi8 (simde__m128i src, simde__mmask16 k, simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_cvtsepi32_epi8(src, k, a); #else simde__m128i_private r_; simde__m128i_private src_ = simde__m128i_to_private(src); simde__m512i_private a_ = simde__m512i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { r_.i8[i] = ((k>>i) &1 ) ? ((a_.i32[i] < INT8_MIN) ? (INT8_MIN) : ((a_.i32[i] > INT8_MAX) ? (INT8_MAX) : HEDLEY_STATIC_CAST(int8_t, a_.i32[i]))) : src_.i8[i] ; } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cvtsepi32_epi8 #define _mm512_mask_cvtsepi32_epi8(src, k, a) simde_mm512_mask_cvtsepi32_epi8(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm512_maskz_cvtsepi32_epi8 (simde__mmask16 k, simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_cvtsepi32_epi8(k, a); #else simde__m128i_private r_; simde__m512i_private a_ = simde__m512i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { r_.i8[i] = ((k>>i) &1 ) ? ((a_.i32[i] < INT8_MIN) ? (INT8_MIN) : ((a_.i32[i] > INT8_MAX) ? (INT8_MAX) : HEDLEY_STATIC_CAST(int8_t, a_.i32[i]))) : INT8_C(0) ; } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_cvtsepi32_epi8 #define _mm512_maskz_cvtsepi32_epi8(k, a) simde_mm512_maskz_cvtsepi32_epi8(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm512_cvtsepi32_epi16 (simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_cvtsepi32_epi16(a); #else simde__m256i_private r_; simde__m512i_private a_ = simde__m512i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { r_.i16[i] = (a_.i32[i] < INT16_MIN) ? (INT16_MIN) : ((a_.i32[i] > INT16_MAX) ? (INT16_MAX) : HEDLEY_STATIC_CAST(int16_t, a_.i32[i])); } return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_cvtsepi32_epi16 #define _mm512_cvtsepi32_epi16(a) simde_mm512_cvtsepi32_epi16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm512_mask_cvtsepi32_epi16 (simde__m256i src, simde__mmask16 k, simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_cvtsepi32_epi16(src, k, a); #else simde__m256i_private r_; simde__m256i_private src_ = simde__m256i_to_private(src); simde__m512i_private a_ = simde__m512i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { r_.i16[i] = ((k>>i) &1 ) ? ((a_.i32[i] < INT16_MIN) ? (INT16_MIN) : ((a_.i32[i] > INT16_MAX) ? (INT16_MAX) : HEDLEY_STATIC_CAST(int16_t, a_.i32[i]))) : src_.i16[i]; } return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cvtsepi32_epi16 #define _mm512_mask_cvtsepi32_epi16(src, k, a) simde_mm512_mask_cvtsepi32_epi16(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm512_maskz_cvtsepi32_epi16 (simde__mmask16 k, simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_cvtsepi32_epi16(k, a); #else simde__m256i_private r_; simde__m512i_private a_ = simde__m512i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { r_.i16[i] = ((k>>i) &1 ) ? ((a_.i32[i] < INT16_MIN) ? (INT16_MIN) : ((a_.i32[i] > INT16_MAX) ? (INT16_MAX) : HEDLEY_STATIC_CAST(int16_t, a_.i32[i]))) : INT16_C(0); } return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_cvtsepi32_epi16 #define _mm512_maskz_cvtsepi32_epi16(k, a) simde_mm512_maskz_cvtsepi32_epi16(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm512_cvtsepi64_epi8 (simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_cvtsepi64_epi8(a); #else simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); simde__m512i_private a_ = simde__m512i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { r_.i8[i] = (a_.i64[i] < INT8_MIN) ? (INT8_MIN) : ((a_.i64[i] > INT8_MAX) ? (INT8_MAX) : HEDLEY_STATIC_CAST(int8_t, a_.i64[i])); } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_cvtsepi64_epi8 #define _mm512_cvtsepi64_epi8(a) simde_mm512_cvtsepi64_epi8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm512_mask_cvtsepi64_epi8 (simde__m128i src, simde__mmask8 k, simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_cvtsepi64_epi8(src, k, a); #else simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); simde__m128i_private src_ = simde__m128i_to_private(src); simde__m512i_private a_ = simde__m512i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { r_.i8[i] = ((k>>i) &1 ) ? ((a_.i64[i] < INT8_MIN) ? (INT8_MIN) : ((a_.i64[i] > INT8_MAX) ? (INT8_MAX) : HEDLEY_STATIC_CAST(int8_t, a_.i64[i]))) : src_.i8[i]; } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cvtsepi64_epi8 #define _mm512_mask_cvtsepi64_epi8(src, k, a) simde_mm512_mask_cvtsepi64_epi8(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm512_maskz_cvtsepi64_epi8 (simde__mmask8 k, simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_cvtsepi64_epi8(k, a); #else simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); simde__m512i_private a_ = simde__m512i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { r_.i8[i] = ((k>>i) &1 ) ? ((a_.i64[i] < INT8_MIN) ? (INT8_MIN) : ((a_.i64[i] > INT8_MAX) ? (INT8_MAX) : HEDLEY_STATIC_CAST(int8_t, a_.i64[i]))) : INT8_C(0); } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_cvtsepi64_epi8 #define _mm512_maskz_cvtsepi64_epi8(k, a) simde_mm512_maskz_cvtsepi64_epi8(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm512_cvtsepi64_epi16 (simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_cvtsepi64_epi16(a); #else simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); simde__m512i_private a_ = simde__m512i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { r_.i16[i] = (a_.i64[i] < INT16_MIN) ? (INT16_MIN) : ((a_.i64[i] > INT16_MAX) ? (INT16_MAX) : HEDLEY_STATIC_CAST(int16_t, a_.i64[i])); } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_cvtsepi64_epi16 #define _mm512_cvtsepi64_epi16(a) simde_mm512_cvtsepi64_epi16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm512_mask_cvtsepi64_epi16 (simde__m128i src, simde__mmask8 k, simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_cvtsepi64_epi16(src, k, a); #else simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); simde__m128i_private src_ = simde__m128i_to_private(src); simde__m512i_private a_ = simde__m512i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { r_.i16[i] = ((k>>i) & 1) ? ((a_.i64[i] < INT16_MIN) ? (INT16_MIN) : ((a_.i64[i] > INT16_MAX) ? (INT16_MAX) : HEDLEY_STATIC_CAST(int16_t, a_.i64[i]))) : src_.i16[i]; } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cvtsepi64_epi16 #define _mm512_mask_cvtsepi64_epi16(src, k, a) simde_mm512_mask_cvtsepi64_epi16(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm512_maskz_cvtsepi64_epi16 (simde__mmask8 k, simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_cvtsepi64_epi16(k, a); #else simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); simde__m512i_private a_ = simde__m512i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { r_.i16[i] = ((k>>i) & 1) ? ((a_.i64[i] < INT16_MIN) ? (INT16_MIN) : ((a_.i64[i] > INT16_MAX) ? (INT16_MAX) : HEDLEY_STATIC_CAST(int16_t, a_.i64[i]))) : INT16_C(0); } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_cvtsepi64_epi16 #define _mm512_maskz_cvtsepi64_epi16(k, a) simde_mm512_maskz_cvtsepi64_epi16(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm512_cvtsepi64_epi32 (simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_cvtsepi64_epi32(a); #else simde__m256i_private r_; simde__m512i_private a_ = simde__m512i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { r_.i32[i] = (a_.i64[i] < INT32_MIN) ? (INT32_MIN) : ((a_.i64[i] > INT32_MAX) ? (INT32_MAX) : HEDLEY_STATIC_CAST(int32_t, a_.i64[i])); } return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_cvtsepi64_epi32 #define _mm512_cvtsepi64_epi32(a) simde_mm512_cvtsepi64_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm512_mask_cvtsepi64_epi32 (simde__m256i src, simde__mmask8 k, simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_cvtsepi64_epi32(src, k, a); #else simde__m256i_private r_; simde__m256i_private src_ = simde__m256i_to_private(src); simde__m512i_private a_ = simde__m512i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { r_.i32[i] = ((k>>i) & 1) ? ((a_.i64[i] < INT32_MIN) ? (INT32_MIN) : ((a_.i64[i] > INT32_MAX) ? (INT32_MAX) : HEDLEY_STATIC_CAST(int32_t, a_.i64[i]))) : src_.i32[i]; } return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cvtsepi64_epi32 #define _mm512_mask_cvtsepi64_epi32(src, k, a) simde_mm512_mask_cvtsepi64_epi32(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm512_maskz_cvtsepi64_epi32 (simde__mmask8 k, simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_cvtsepi64_epi32(k, a); #else simde__m256i_private r_; simde__m512i_private a_ = simde__m512i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { r_.i32[i] = ((k>>i) & 1) ? ((a_.i64[i] < INT32_MIN) ? (INT32_MIN) : ((a_.i64[i] > INT32_MAX) ? (INT32_MAX) : HEDLEY_STATIC_CAST(int32_t, a_.i64[i]))) : INT32_C(0); } return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_cvtsepi64_epi32 #define _mm512_maskz_cvtsepi64_epi32(k, a) simde_mm512_maskz_cvtsepi64_epi32(k, a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_CVTS_H) */ simde-0.7.2/simde/x86/avx512/div.h000066400000000000000000000122511400333146700163320ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_DIV_H) #define SIMDE_X86_AVX512_DIV_H #include "types.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_div_ps (simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_div_ps(a, b); #else simde__m512_private r_, a_ = simde__m512_to_private(a), b_ = simde__m512_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_div_ps(a_.m256[i], b_.m256[i]); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f32 = a_.f32 / b_.f32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_div_ps(a_.m256[i], b_.m256[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_div_ps #define _mm512_div_ps(a, b) simde_mm512_div_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_div_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_div_ps(src, k, a, b); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_div_ps(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_div_ps #define _mm512_mask_div_ps(src, k, a, b) simde_mm512_mask_div_ps(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_maskz_div_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_div_ps(k, a, b); #else return simde_mm512_maskz_mov_ps(k, simde_mm512_div_ps(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_div_ps #define _mm512_maskz_div_ps(k, a, b) simde_mm512_maskz_div_ps(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_div_pd (simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_div_pd(a, b); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a), b_ = simde__m512d_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_div_pd(a_.m256d[i], b_.m256d[i]); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f64 = a_.f64 / b_.f64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_div_pd(a_.m256d[i], b_.m256d[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_div_pd #define _mm512_div_pd(a, b) simde_mm512_div_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_div_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_div_pd(src, k, a, b); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_div_pd(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_div_pd #define _mm512_mask_div_pd(src, k, a, b) simde_mm512_mask_div_pd(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_maskz_div_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_div_pd(k, a, b); #else return simde_mm512_maskz_mov_pd(k, simde_mm512_div_pd(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_div_pd #define _mm512_maskz_div_pd(k, a, b) simde_mm512_maskz_div_pd(k, a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_DIV_H) */ simde-0.7.2/simde/x86/avx512/extract.h000066400000000000000000000211521400333146700172220ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_X86_AVX512_EXTRACT_H) #define SIMDE_X86_AVX512_EXTRACT_H #include "types.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm512_extractf32x4_ps (simde__m512 a, int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { simde__m512_private a_ = simde__m512_to_private(a); /* GCC 6 generates an ICE */ #if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(7,0,0) return a_.m128[imm8 & 3]; #else simde__m128_private r_; const size_t offset = HEDLEY_STATIC_CAST(size_t, imm8 & 3) * (sizeof(r_.f32) / sizeof(r_.f32[0])); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = a_.f32[i + offset]; } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) #define simde_mm512_extractf32x4_ps(a, imm8) _mm512_extractf32x4_ps(a, imm8) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_extractf32x4_ps #define _mm512_extractf32x4_ps(a, imm8) simde_mm512_extractf32x4_ps(a, imm8) #endif #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) #define simde_mm512_mask_extractf32x4_ps(src, k, a, imm8) _mm512_mask_extractf32x4_ps(src, k, a, imm8) #else #define simde_mm512_mask_extractf32x4_ps(src, k, a, imm8) simde_mm_mask_mov_ps(src, k, simde_mm512_extractf32x4_ps(a, imm8)) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_extractf32x4_ps #define _mm512_mask_extractf32x4_ps(src, k, a, imm8) simde_mm512_mask_extractf32x4_ps(src, k, a, imm8) #endif #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) #define simde_mm512_maskz_extractf32x4_ps(k, a, imm8) _mm512_maskz_extractf32x4_ps(k, a, imm8) #else #define simde_mm512_maskz_extractf32x4_ps(k, a, imm8) simde_mm_maskz_mov_ps(k, simde_mm512_extractf32x4_ps(a, imm8)) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_extractf32x4_ps #define _mm512_maskz_extractf32x4_ps(k, a, imm8) simde_mm512_maskz_extractf32x4_ps(k, a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm512_extractf64x4_pd (simde__m512d a, int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { simde__m512d_private a_ = simde__m512d_to_private(a); return a_.m256d[imm8 & 1]; } #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) #define simde_mm512_extractf64x4_pd(a, imm8) _mm512_extractf64x4_pd(a, imm8) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_extractf64x4_pd #define _mm512_extractf64x4_pd(a, imm8) simde_mm512_extractf64x4_pd(a, imm8) #endif #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) #define simde_mm512_mask_extractf64x4_pd(src, k, a, imm8) _mm512_mask_extractf64x4_pd(src, k, a, imm8) #else #define simde_mm512_mask_extractf64x4_pd(src, k, a, imm8) simde_mm256_mask_mov_pd(src, k, simde_mm512_extractf64x4_pd(a, imm8)) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_extractf64x4_pd #define _mm512_mask_extractf64x4_pd(src, k, a, imm8) simde_mm512_mask_extractf64x4_pd(src, k, a, imm8) #endif #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) #define simde_mm512_maskz_extractf64x4_pd(k, a, imm8) _mm512_maskz_extractf64x4_pd(k, a, imm8) #else #define simde_mm512_maskz_extractf64x4_pd(k, a, imm8) simde_mm256_maskz_mov_pd(k, simde_mm512_extractf64x4_pd(a, imm8)) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_extractf64x4_pd #define _mm512_maskz_extractf64x4_pd(k, a, imm8) simde_mm512_maskz_extractf64x4_pd(k, a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm512_extracti32x4_epi32 (simde__m512i a, int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { simde__m512i_private a_ = simde__m512i_to_private(a); return a_.m128i[imm8 & 3]; } #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) #define simde_mm512_extracti32x4_epi32(a, imm8) _mm512_extracti32x4_epi32(a, imm8) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_extracti32x4_epi32 #define _mm512_extracti32x4_epi32(a, imm8) simde_mm512_extracti32x4_epi32(a, imm8) #endif #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) #define simde_mm512_mask_extracti32x4_epi32(src, k, a, imm8) _mm512_mask_extracti32x4_epi32(src, k, a, imm8) #else #define simde_mm512_mask_extracti32x4_epi32(src, k, a, imm8) simde_mm_mask_mov_epi32(src, k, simde_mm512_extracti32x4_epi32(a, imm8)) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_extracti32x4_epi32 #define _mm512_mask_extracti32x4_epi32(src, k, a, imm8) simde_mm512_mask_extracti32x4_epi32(src, k, a, imm8) #endif #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) #define simde_mm512_maskz_extracti32x4_epi32(k, a, imm8) _mm512_maskz_extracti32x4_epi32(k, a, imm8) #else #define simde_mm512_maskz_extracti32x4_epi32(k, a, imm8) simde_mm_maskz_mov_epi32(k, simde_mm512_extracti32x4_epi32(a, imm8)) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_extracti32x4_epi32 #define _mm512_maskz_extracti32x4_epi32(k, a, imm8) simde_mm512_maskz_extracti32x4_epi32(k, a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm512_extracti64x4_epi64 (simde__m512i a, int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { simde__m512i_private a_ = simde__m512i_to_private(a); return a_.m256i[imm8 & 1]; } #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) #define simde_mm512_extracti64x4_epi64(a, imm8) _mm512_extracti64x4_epi64(a, imm8) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_extracti64x4_epi64 #define _mm512_extracti64x4_epi64(a, imm8) simde_mm512_extracti64x4_epi64(a, imm8) #endif #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) #define simde_mm512_mask_extracti64x4_epi64(src, k, a, imm8) _mm512_mask_extracti64x4_epi64(src, k, a, imm8) #else #define simde_mm512_mask_extracti64x4_epi64(src, k, a, imm8) simde_mm256_mask_mov_epi64(src, k, simde_mm512_extracti64x4_epi64(a, imm8)) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_extracti64x4_epi64 #define _mm512_mask_extracti64x4_epi64(src, k, a, imm8) simde_mm512_mask_extracti64x4_epi64(src, k, a, imm8) #endif #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) #define simde_mm512_maskz_extracti64x4_epi64(k, a, imm8) _mm512_maskz_extracti64x4_epi64(k, a, imm8) #else #define simde_mm512_maskz_extracti64x4_epi64(k, a, imm8) simde_mm256_maskz_mov_epi64(k, simde_mm512_extracti64x4_epi64(a, imm8)) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_extracti64x4_epi64 #define _mm512_maskz_extracti64x4_epi64(k, a, imm8) simde_mm512_maskz_extracti64x4_epi64(k, a, imm8) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_EXTRACT_H) */ simde-0.7.2/simde/x86/avx512/fmadd.h000066400000000000000000000107231400333146700166250ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur */ #if !defined(SIMDE_X86_AVX512_FMADD_H) #define SIMDE_X86_AVX512_FMADD_H #include "types.h" #include "mov.h" #include "../fma.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_fmadd_ps (simde__m512 a, simde__m512 b, simde__m512 c) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_fmadd_ps(a, b, c); #else simde__m512_private r_, a_ = simde__m512_to_private(a), b_ = simde__m512_to_private(b), c_ = simde__m512_to_private(c); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_fmadd_ps(a_.m256[i], b_.m256[i], c_.m256[i]); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f32 = (a_.f32 * b_.f32) + c_.f32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = (a_.f32[i] * b_.f32[i]) + c_.f32[i]; } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_fmadd_ps #define _mm512_fmadd_ps(a, b, c) simde_mm512_fmadd_ps(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_fmadd_ps(simde__m512 a, simde__mmask16 k, simde__m512 b, simde__m512 c) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_fmadd_ps(a, k, b, c); #else return simde_mm512_mask_mov_ps(a, k, simde_mm512_fmadd_ps(a, b, c)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_fmadd_ps #define _mm512_mask_fmadd_ps(a, k, b, c) simde_mm512_mask_fmadd_ps(a, k, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_maskz_fmadd_ps(simde__mmask16 k, simde__m512 a, simde__m512 b, simde__m512 c) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_fmadd_ps(k, a, b, c); #else return simde_mm512_maskz_mov_ps(k, simde_mm512_fmadd_ps(a, b, c)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_fmadd_ps #define _mm512_maskz_fmadd_ps(k, a, b, c) simde_mm512_maskz_fmadd_ps(k, a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_fmadd_pd (simde__m512d a, simde__m512d b, simde__m512d c) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_fmadd_pd(a, b, c); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a), b_ = simde__m512d_to_private(b), c_ = simde__m512d_to_private(c); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_fmadd_pd(a_.m256d[i], b_.m256d[i], c_.m256d[i]); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f64 = (a_.f64 * b_.f64) + c_.f64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = (a_.f64[i] * b_.f64[i]) + c_.f64[i]; } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_fmadd_pd #define _mm512_fmadd_pd(a, b, c) simde_mm512_fmadd_pd(a, b, c) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_FMADD_H) */ simde-0.7.2/simde/x86/avx512/fmsub.h000066400000000000000000000070211400333146700166630ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 kitegi */ #if !defined(SIMDE_X86_AVX512_FMSUB_H) #define SIMDE_X86_AVX512_FMSUB_H #include "types.h" #include "mov.h" #include "../fma.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_fmsub_ps (simde__m512 a, simde__m512 b, simde__m512 c) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_fmsub_ps(a, b, c); #else simde__m512_private r_, a_ = simde__m512_to_private(a), b_ = simde__m512_to_private(b), c_ = simde__m512_to_private(c); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_fmsub_ps(a_.m256[i], b_.m256[i], c_.m256[i]); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f32 = (a_.f32 * b_.f32) - c_.f32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = (a_.f32[i] * b_.f32[i]) - c_.f32[i]; } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_fmsub_ps #define _mm512_fmsub_ps(a, b, c) simde_mm512_fmsub_ps(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_fmsub_pd (simde__m512d a, simde__m512d b, simde__m512d c) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_fmsub_pd(a, b, c); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a), b_ = simde__m512d_to_private(b), c_ = simde__m512d_to_private(c); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_fmsub_pd(a_.m256d[i], b_.m256d[i], c_.m256d[i]); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f64 = (a_.f64 * b_.f64) - c_.f64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = (a_.f64[i] * b_.f64[i]) - c_.f64[i]; } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_fmsub_pd #define _mm512_fmsub_pd(a, b, c) simde_mm512_fmsub_pd(a, b, c) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_FMSUB_H) */ simde-0.7.2/simde/x86/avx512/fnmadd.h000066400000000000000000000070441400333146700170050ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 kitegi */ #if !defined(SIMDE_X86_AVX512_FNMADD_H) #define SIMDE_X86_AVX512_FNMADD_H #include "types.h" #include "mov.h" #include "../fma.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_fnmadd_ps (simde__m512 a, simde__m512 b, simde__m512 c) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_fnmadd_ps(a, b, c); #else simde__m512_private r_, a_ = simde__m512_to_private(a), b_ = simde__m512_to_private(b), c_ = simde__m512_to_private(c); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_fnmadd_ps(a_.m256[i], b_.m256[i], c_.m256[i]); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f32 = -(a_.f32 * b_.f32) + c_.f32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = -(a_.f32[i] * b_.f32[i]) + c_.f32[i]; } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_fnmadd_ps #define _mm512_fnmadd_ps(a, b, c) simde_mm512_fnmadd_ps(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_fnmadd_pd (simde__m512d a, simde__m512d b, simde__m512d c) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_fnmadd_pd(a, b, c); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a), b_ = simde__m512d_to_private(b), c_ = simde__m512d_to_private(c); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_fnmadd_pd(a_.m256d[i], b_.m256d[i], c_.m256d[i]); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f64 = -(a_.f64 * b_.f64) + c_.f64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = -(a_.f64[i] * b_.f64[i]) + c_.f64[i]; } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_fnmadd_pd #define _mm512_fnmadd_pd(a, b, c) simde_mm512_fnmadd_pd(a, b, c) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_FNMADD_H) */ simde-0.7.2/simde/x86/avx512/fnmsub.h000066400000000000000000000070441400333146700170460ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 kitegi */ #if !defined(SIMDE_X86_AVX512_FNMSUB_H) #define SIMDE_X86_AVX512_FNMSUB_H #include "types.h" #include "mov.h" #include "../fma.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_fnmsub_ps (simde__m512 a, simde__m512 b, simde__m512 c) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_fnmsub_ps(a, b, c); #else simde__m512_private r_, a_ = simde__m512_to_private(a), b_ = simde__m512_to_private(b), c_ = simde__m512_to_private(c); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_fnmsub_ps(a_.m256[i], b_.m256[i], c_.m256[i]); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f32 = -(a_.f32 * b_.f32) - c_.f32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = -(a_.f32[i] * b_.f32[i]) - c_.f32[i]; } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_fnmsub_ps #define _mm512_fnmsub_ps(a, b, c) simde_mm512_fnmsub_ps(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_fnmsub_pd (simde__m512d a, simde__m512d b, simde__m512d c) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_fnmsub_pd(a, b, c); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a), b_ = simde__m512d_to_private(b), c_ = simde__m512d_to_private(c); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_fnmsub_pd(a_.m256d[i], b_.m256d[i], c_.m256d[i]); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f64 = -(a_.f64 * b_.f64) - c_.f64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = -(a_.f64[i] * b_.f64[i]) - c_.f64[i]; } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_fnmsub_pd #define _mm512_fnmsub_pd(a, b, c) simde_mm512_fnmsub_pd(a, b, c) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_FNMSUB_H) */ simde-0.7.2/simde/x86/avx512/insert.h000066400000000000000000000171711400333146700170620ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_X86_AVX512_INSERT_H) #define SIMDE_X86_AVX512_INSERT_H #include "types.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_insertf32x4 (simde__m512 a, simde__m128 b, int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { simde__m512_private a_ = simde__m512_to_private(a); a_.m128[imm8 & 3] = b; return simde__m512_from_private(a_); } #if defined(SIMDE_X86_AVX512F_NATIVE) #define simde_mm512_insertf32x4(a, b, imm8) _mm512_insertf32x4(a, b, imm8) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_insertf32x4 #define _mm512_insertf32x4(a, b, imm8) simde_mm512_insertf32x4(a, b, imm8) #endif #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,0,0)) #define simde_mm512_mask_insertf32x4(src, k, a, b, imm8) _mm512_mask_insertf32x4(src, k, a, b, imm8) #else #define simde_mm512_mask_insertf32x4(src, k, a, b, imm8) simde_mm512_mask_mov_ps(src, k, simde_mm512_insertf32x4(a, b, imm8)) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_insertf32x4 #define _mm512_mask_insertf32x4(src, k, a, b, imm8) simde_mm512_mask_insertf32x4(src, k, a, b, imm8) #endif #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,0,0)) #define simde_mm512_maskz_insertf32x4(k, a, b, imm8) _mm512_maskz_insertf32x4(k, a, b, imm8) #else #define simde_mm512_maskz_insertf32x4(k, a, b, imm8) simde_mm512_maskz_mov_ps(k, simde_mm512_insertf32x4(a, b, imm8)) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_insertf32x4 #define _mm512_maskz_insertf32x4(k, a, b, imm8) simde_mm512_maskz_insertf32x4(k, a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_insertf64x4 (simde__m512d a, simde__m256d b, int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { simde__m512d_private a_ = simde__m512d_to_private(a); a_.m256d[imm8 & 1] = b; return simde__m512d_from_private(a_); } #if defined(SIMDE_X86_AVX512F_NATIVE) #define simde_mm512_insertf64x4(a, b, imm8) _mm512_insertf64x4(a, b, imm8) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_insertf64x4 #define _mm512_insertf64x4(a, b, imm8) simde_mm512_insertf64x4(a, b, imm8) #endif #if defined(SIMDE_X86_AVX512F_NATIVE) #define simde_mm512_mask_insertf64x4(src, k, a, b, imm8) _mm512_mask_insertf64x4(src, k, a, b, imm8) #else #define simde_mm512_mask_insertf64x4(src, k, a, b, imm8) simde_mm512_mask_mov_pd(src, k, simde_mm512_insertf64x4(a, b, imm8)) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_insertf64x4 #define _mm512_mask_insertf64x4(src, k, a, b, imm8) simde_mm512_mask_insertf64x4(src, k, a, b, imm8) #endif #if defined(SIMDE_X86_AVX512F_NATIVE) #define simde_mm512_maskz_insertf64x4(k, a, b, imm8) _mm512_maskz_insertf64x4(k, a, b, imm8) #else #define simde_mm512_maskz_insertf64x4(k, a, b, imm8) simde_mm512_maskz_mov_pd(k, simde_mm512_insertf64x4(a, b, imm8)) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_insertf64x4 #define _mm512_maskz_insertf64x4(k, a, b, imm8) simde_mm512_maskz_insertf64x4(k, a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_inserti32x4 (simde__m512i a, simde__m128i b, int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { simde__m512i_private a_ = simde__m512i_to_private(a); a_.m128i[imm8 & 3] = b; return simde__m512i_from_private(a_); } #if defined(SIMDE_X86_AVX512F_NATIVE) #define simde_mm512_inserti32x4(a, b, imm8) _mm512_inserti32x4(a, b, imm8) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_inserti32x4 #define _mm512_inserti32x4(a, b, imm8) simde_mm512_inserti32x4(a, b, imm8) #endif #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,0,0)) #define simde_mm512_mask_inserti32x4(src, k, a, b, imm8) _mm512_mask_inserti32x4(src, k, a, b, imm8) #else #define simde_mm512_mask_inserti32x4(src, k, a, b, imm8) simde_mm512_mask_mov_epi32(src, k, simde_mm512_inserti32x4(a, b, imm8)) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_inserti32x4 #define _mm512_mask_inserti32x4(src, k, a, b, imm8) simde_mm512_mask_inserti32x4(src, k, a, b, imm8) #endif #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,0,0)) #define simde_mm512_maskz_inserti32x4(k, a, b, imm8) _mm512_maskz_inserti32x4(k, a, b, imm8) #else #define simde_mm512_maskz_inserti32x4(k, a, b, imm8) simde_mm512_maskz_mov_epi32(k, simde_mm512_inserti32x4(a, b, imm8)) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_inserti32x4 #define _mm512_maskz_inserti32x4(k, a, b, imm8) simde_mm512_maskz_inserti32x4(k, a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_inserti64x4 (simde__m512i a, simde__m256i b, int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { simde__m512i_private a_ = simde__m512i_to_private(a); a_.m256i[imm8 & 1] = b; return simde__m512i_from_private(a_); } #if defined(SIMDE_X86_AVX512F_NATIVE) #define simde_mm512_inserti64x4(a, b, imm8) _mm512_inserti64x4(a, b, imm8) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_inserti64x4 #define _mm512_inserti64x4(a, b, imm8) simde_mm512_inserti64x4(a, b, imm8) #endif #if defined(SIMDE_X86_AVX512F_NATIVE) #define simde_mm512_mask_inserti64x4(src, k, a, b, imm8) _mm512_mask_inserti64x4(src, k, a, b, imm8) #else #define simde_mm512_mask_inserti64x4(src, k, a, b, imm8) simde_mm512_mask_mov_epi64(src, k, simde_mm512_inserti64x4(a, b, imm8)) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_inserti64x4 #define _mm512_mask_inserti64x4(src, k, a, b, imm8) simde_mm512_mask_inserti64x4(src, k, a, b, imm8) #endif #if defined(SIMDE_X86_AVX512F_NATIVE) #define simde_mm512_maskz_inserti64x4(k, a, b, imm8) _mm512_maskz_inserti64x4(k, a, b, imm8) #else #define simde_mm512_maskz_inserti64x4(k, a, b, imm8) simde_mm512_maskz_mov_epi64(k, simde_mm512_inserti64x4(a, b, imm8)) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_inserti64x4 #define _mm512_maskz_inserti64x4(k, a, b, imm8) simde_mm512_maskz_inserti64x4(k, a, b, imm8) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_INSERT_H) */ simde-0.7.2/simde/x86/avx512/kshift.h000066400000000000000000000134371400333146700170470ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_X86_AVX512_KSHIFT_H) #define SIMDE_X86_AVX512_KSHIFT_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__mmask16 simde_kshiftli_mask16 (simde__mmask16 a, unsigned int count) SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { return HEDLEY_STATIC_CAST(simde__mmask16, (count <= 15) ? (a << count) : 0); } #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) #define simde_kshiftli_mask16(a, count) _kshiftli_mask16(a, count) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _kshiftli_mask16 #define _kshiftli_mask16(a, count) simde_kshiftli_mask16(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask32 simde_kshiftli_mask32 (simde__mmask32 a, unsigned int count) SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { return (count <= 31) ? (a << count) : 0; } #if defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) #define simde_kshiftli_mask32(a, count) _kshiftli_mask32(a, count) #endif #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _kshiftli_mask32 #define _kshiftli_mask32(a, count) simde_kshiftli_mask32(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask64 simde_kshiftli_mask64 (simde__mmask64 a, unsigned int count) SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { return (count <= 63) ? (a << count) : 0; } #if defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) #define simde_kshiftli_mask64(a, count) _kshiftli_mask64(a, count) #endif #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _kshiftli_mask64 #define _kshiftli_mask64(a, count) simde_kshiftli_mask64(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask8 simde_kshiftli_mask8 (simde__mmask8 a, unsigned int count) SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { return HEDLEY_STATIC_CAST(simde__mmask8, (count <= 7) ? (a << count) : 0); } #if defined(SIMDE_X86_AVX512DQ_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) #define simde_kshiftli_mask8(a, count) _kshiftli_mask8(a, count) #endif #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _kshiftli_mask8 #define _kshiftli_mask8(a, count) simde_kshiftli_mask8(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask16 simde_kshiftri_mask16 (simde__mmask16 a, unsigned int count) SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { return HEDLEY_STATIC_CAST(simde__mmask16, (count <= 15) ? (a >> count) : 0); } #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) #define simde_kshiftri_mask16(a, count) _kshiftri_mask16(a, count) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _kshiftri_mask16 #define _kshiftri_mask16(a, count) simde_kshiftri_mask16(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask32 simde_kshiftri_mask32 (simde__mmask32 a, unsigned int count) SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { return (count <= 31) ? (a >> count) : 0; } #if defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) #define simde_kshiftri_mask32(a, count) _kshiftri_mask32(a, count) #endif #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _kshiftri_mask32 #define _kshiftri_mask32(a, count) simde_kshiftri_mask32(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask64 simde_kshiftri_mask64 (simde__mmask64 a, unsigned int count) SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { return (count <= 63) ? (a >> count) : 0; } #if defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) #define simde_kshiftri_mask64(a, count) _kshiftri_mask64(a, count) #endif #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _kshiftri_mask64 #define _kshiftri_mask64(a, count) simde_kshiftri_mask64(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask8 simde_kshiftri_mask8 (simde__mmask8 a, unsigned int count) SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { return HEDLEY_STATIC_CAST(simde__mmask8, (count <= 7) ? (a >> count) : 0); } #if defined(SIMDE_X86_AVX512DQ_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) #define simde_kshiftri_mask8(a, count) _kshiftri_mask8(a, count) #endif #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _kshiftri_mask8 #define _kshiftri_mask8(a, count) simde_kshiftri_mask8(a, count) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_KSHIFT_H) */ simde-0.7.2/simde/x86/avx512/load.h000066400000000000000000000047741400333146700165020ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_X86_AVX512_LOAD_H) #define SIMDE_X86_AVX512_LOAD_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_load_si512 (void const * mem_addr) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_load_si512(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512i)); #else simde__m512i r; simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512i), sizeof(r)); return r; #endif } #define simde_mm512_load_epi8(mem_addr) simde_mm512_load_si512(mem_addr) #define simde_mm512_load_epi16(mem_addr) simde_mm512_load_si512(mem_addr) #define simde_mm512_load_epi32(mem_addr) simde_mm512_load_si512(mem_addr) #define simde_mm512_load_epi64(mem_addr) simde_mm512_load_si512(mem_addr) #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_load_epi8 #undef _mm512_load_epi16 #undef _mm512_load_epi32 #undef _mm512_load_epi64 #undef _mm512_load_si512 #define _mm512_load_si512(a) simde_mm512_load_si512(a) #define _mm512_load_epi8(a) simde_mm512_load_si512(a) #define _mm512_load_epi16(a) simde_mm512_load_si512(a) #define _mm512_load_epi32(a) simde_mm512_load_si512(a) #define _mm512_load_epi64(a) simde_mm512_load_si512(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_LOAD_H) */ simde-0.7.2/simde/x86/avx512/loadu.h000066400000000000000000000073051400333146700166600ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_X86_AVX512_LOADU_H) #define SIMDE_X86_AVX512_LOADU_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_loadu_ps (void const * mem_addr) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_loadu_ps(mem_addr); #else simde__m512 r; simde_memcpy(&r, mem_addr, sizeof(r)); return r; #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_loadu_ps #define _mm512_loadu_ps(a) simde_mm512_loadu_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_loadu_pd (void const * mem_addr) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_loadu_pd(mem_addr); #else simde__m512d r; simde_memcpy(&r, mem_addr, sizeof(r)); return r; #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_loadu_pd #define _mm512_loadu_pd(a) simde_mm512_loadu_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_loadu_si512 (void const * mem_addr) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_loadu_si512(HEDLEY_REINTERPRET_CAST(void const*, mem_addr)); #else simde__m512i r; #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_PACKED_ struct simde_mm512_loadu_si512_s { __typeof__(r) v; } __attribute__((__packed__, __may_alias__)); r = HEDLEY_REINTERPRET_CAST(const struct simde_mm512_loadu_si512_s *, mem_addr)->v; HEDLEY_DIAGNOSTIC_POP #else simde_memcpy(&r, mem_addr, sizeof(r)); #endif return r; #endif } #define simde_mm512_loadu_epi8(mem_addr) simde_mm512_loadu_si512(mem_addr) #define simde_mm512_loadu_epi16(mem_addr) simde_mm512_loadu_si512(mem_addr) #define simde_mm512_loadu_epi32(mem_addr) simde_mm512_loadu_si512(mem_addr) #define simde_mm512_loadu_epi64(mem_addr) simde_mm512_loadu_si512(mem_addr) #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_loadu_epi8 #undef _mm512_loadu_epi16 #define _mm512_loadu_epi8(a) simde_mm512_loadu_si512(a) #define _mm512_loadu_epi16(a) simde_mm512_loadu_si512(a) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_loadu_epi32 #undef _mm512_loadu_epi64 #undef _mm512_loadu_si512 #define _mm512_loadu_si512(a) simde_mm512_loadu_si512(a) #define _mm512_loadu_epi32(a) simde_mm512_loadu_si512(a) #define _mm512_loadu_epi64(a) simde_mm512_loadu_si512(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_LOADU_H) */ simde-0.7.2/simde/x86/avx512/lzcnt.h000066400000000000000000000164361400333146700167130ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_X86_AVX512_LZCNT_H) #define SIMDE_X86_AVX512_LZCNT_H #include "types.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ #if \ ( HEDLEY_HAS_BUILTIN(__builtin_clz) || \ HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) ) && \ defined(__INT_MAX__) && defined(__LONG_MAX__) && defined(__LONG_LONG_MAX__) && \ defined(__INT32_MAX__) && defined(__INT64_MAX__) #if __INT_MAX__ == __INT32_MAX__ #define simde_x_clz32(v) __builtin_clz(HEDLEY_STATIC_CAST(unsigned int, (v))) #elif __LONG_MAX__ == __INT32_MAX__ #define simde_x_clz32(v) __builtin_clzl(HEDLEY_STATIC_CAST(unsigned long, (v))) #elif __LONG_LONG_MAX__ == __INT32_MAX__ #define simde_x_clz32(v) __builtin_clzll(HEDLEY_STATIC_CAST(unsigned long long, (v))) #endif #if __INT_MAX__ == __INT64_MAX__ #define simde_x_clz64(v) __builtin_clz(HEDLEY_STATIC_CAST(unsigned int, (v))) #elif __LONG_MAX__ == __INT64_MAX__ #define simde_x_clz64(v) __builtin_clzl(HEDLEY_STATIC_CAST(unsigned long, (v))) #elif __LONG_LONG_MAX__ == __INT64_MAX__ #define simde_x_clz64(v) __builtin_clzll(HEDLEY_STATIC_CAST(unsigned long long, (v))) #endif #elif HEDLEY_MSVC_VERSION_CHECK(14,0,0) static int simde_x_clz32(uint32_t x) { unsigned long r; _BitScanReverse(&r, x); return 31 - HEDLEY_STATIC_CAST(int, r); } #define simde_x_clz32 simde_x_clz32 static int simde_x_clz64(uint64_t x) { unsigned long r; #if defined(_M_AMD64) || defined(_M_ARM64) _BitScanReverse64(&r, x); return 63 - HEDLEY_STATIC_CAST(int, r); #else uint32_t high = HEDLEY_STATIC_CAST(uint32_t, x >> 32); if (high != 0) return _BitScanReverse(&r, HEDLEY_STATIC_CAST(unsigned long, high)); else return _BitScanReverse(&r, HEDLEY_STATIC_CAST(unsigned long, x & ~UINT32_C(0))) + 32; #endif } #define simde_x_clz64 simde_x_clz64 #endif #if !defined(simde_x_clz32) || !defined(simde_x_clz64) static uint8_t simde_x_avx512cd_lz_lookup(const uint8_t value) { static const uint8_t lut[256] = { 7, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; return lut[value]; }; #if !defined(simde_x_clz32) static int simde_x_clz32(uint32_t x) { size_t s = sizeof(x) * 8; uint32_t r; while ((s -= 8) != 0) { r = x >> s; if (r != 0) return simde_x_avx512cd_lz_lookup(HEDLEY_STATIC_CAST(uint8_t, r)) + (((sizeof(x) - 1) * 8) - s); } if (x == 0) return (int) ((sizeof(x) * 8) - 1); else return simde_x_avx512cd_lz_lookup(HEDLEY_STATIC_CAST(uint8_t, x)) + ((sizeof(x) - 1) * 8); } #endif #if !defined(simde_x_clz64) static int simde_x_clz64(uint64_t x) { size_t s = sizeof(x) * 8; uint64_t r; while ((s -= 8) != 0) { r = x >> s; if (r != 0) return simde_x_avx512cd_lz_lookup(HEDLEY_STATIC_CAST(uint8_t, r)) + (((sizeof(x) - 1) * 8) - s); } if (x == 0) return (int) ((sizeof(x) * 8) - 1); else return simde_x_avx512cd_lz_lookup(HEDLEY_STATIC_CAST(uint8_t, x)) + ((sizeof(x) - 1) * 8); } #endif #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_lzcnt_epi32(simde__m128i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) return _mm_lzcnt_epi32(a); #elif defined(SIMDE_X86_SSE2_NATIVE) /* https://stackoverflow.com/a/58827596/501126 */ a = _mm_andnot_si128(_mm_srli_epi32(a, 8), a); a = _mm_castps_si128(_mm_cvtepi32_ps(a)); a = _mm_srli_epi32(a, 23); a = _mm_subs_epu16(_mm_set1_epi32(158), a); a = _mm_min_epi16(a, _mm_set1_epi32(32)); return a; #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { r_.i32[i] = (HEDLEY_UNLIKELY(a_.i32[i] == 0) ? HEDLEY_STATIC_CAST(int32_t, sizeof(int32_t) * CHAR_BIT) : HEDLEY_STATIC_CAST(int32_t, simde_x_clz32(HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])))); } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) #undef _mm_lzcnt_epi32 #define _mm_lzcnt_epi32(a) simde_mm_lzcnt_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_lzcnt_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) return _mm_mask_lzcnt_epi32(src, k, a); #else return simde_mm_mask_mov_epi32(src, k, simde_mm_lzcnt_epi32(a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm_mask_lzcnt_epi32 #define _mm_mask_lzcnt_epi32(src, k, a) simde_mm_mask_lzcnt_epi32(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskz_lzcnt_epi32(simde__mmask8 k, simde__m128i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) return _mm_maskz_lzcnt_epi32(k, a); #else return simde_mm_maskz_mov_epi32(k, simde_mm_lzcnt_epi32(a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_lzcnt_epi32 #define _mm_maskz_lzcnt_epi32(k, a) simde_mm_maskz_lzcnt_epi32(k, a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_LZCNT_H) */ simde-0.7.2/simde/x86/avx512/madd.h000066400000000000000000000130241400333146700164540ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Ashleigh Newman-Jones */ #if !defined(SIMDE_X86_AVX512_MADD_H) #define SIMDE_X86_AVX512_MADD_H #include "types.h" #include "mov.h" #include "../avx2.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_madd_epi16 (simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask_madd_epi16(src, k, a, b); #else return simde_mm_mask_mov_epi32(src, k, simde_mm_madd_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_madd_epi16 #define _mm_mask_madd_epi16(a, b) simde_mm_mask_madd_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskz_madd_epi16 (simde__mmask8 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_maskz_madd_epi16(k, a, b); #else return simde_mm_maskz_mov_epi32(k, simde_mm_madd_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_madd_epi16 #define _mm_maskz_madd_epi16(a, b) simde_mm_maskz_madd_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask_madd_epi16 (simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask_madd_epi16(src, k, a, b); #else return simde_mm256_mask_mov_epi32(src, k, simde_mm256_madd_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_madd_epi16 #define _mm256_mask_madd_epi16(a, b) simde_mm256_mask_madd_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_maskz_madd_epi16 (simde__mmask8 k, simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_maskz_madd_epi16(k, a, b); #else return simde_mm256_maskz_mov_epi32(k, simde_mm256_madd_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_madd_epi16 #define _mm256_maskz_madd_epi16(a, b) simde_mm256_maskz_madd_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_madd_epi16 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_madd_epi16(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_madd_epi16(a_.m256i[i], b_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_madd_epi16 #define _mm512_madd_epi16(a, b) simde_mm512_madd_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_madd_epi16 (simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_madd_epi16(src, k, a, b); #else return simde_mm512_mask_mov_epi32(src, k, simde_mm512_madd_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_madd_epi16 #define _mm512_mask_madd_epi16(a, b) simde_mm512_mask_madd_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_madd_epi16 (simde__mmask16 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_madd_epi16(k, a, b); #else return simde_mm512_maskz_mov_epi32(k, simde_mm512_madd_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_madd_epi16 #define _mm512_maskz_madd_epi16(a, b) simde_mm512_maskz_madd_epi16(a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_MADD_H) */ simde-0.7.2/simde/x86/avx512/maddubs.h000066400000000000000000000137111400333146700171710ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Ashleigh Newman-Jones */ #if !defined(SIMDE_X86_AVX512_MADDUBS_H) #define SIMDE_X86_AVX512_MADDUBS_H #include "types.h" #include "mov.h" #include "../avx2.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_maddubs_epi16 (simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask_maddubs_epi16(src, k, a, b); #else return simde_mm_mask_mov_epi16(src, k, simde_mm_maddubs_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_maddubs_epi16 #define _mm_mask_maddubs_epi16(a, b) simde_mm_mask_maddubs_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskz_maddubs_epi16 (simde__mmask8 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE ) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_maskz_maddubs_epi16(k, a, b); #else return simde_mm_maskz_mov_epi16(k, simde_mm_maddubs_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_maddubs_epi16 #define _mm_maskz_maddubs_epi16(a, b) simde_mm_maskz_maddubs_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask_maddubs_epi16 (simde__m256i src, simde__mmask16 k, simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask_maddubs_epi16(src, k, a, b); #else return simde_mm256_mask_mov_epi16(src, k, simde_mm256_maddubs_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_maddubs_epi16 #define _mm256_mask_maddubs_epi16(a, b) simde_mm256_mask_maddubs_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_maskz_maddubs_epi16 (simde__mmask16 k, simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_maskz_maddubs_epi16(k, a, b); #else return simde_mm256_maskz_mov_epi16(k, simde_mm256_maddubs_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_maddubs_epi16 #define _mm256_maskz_maddubs_epi16(a, b) simde_mm256_maskz_maddubs_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maddubs_epi16 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maddubs_epi16(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_maddubs_epi16(a_.m256i[i], b_.m256i[i]); } #else for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { const int idx = HEDLEY_STATIC_CAST(int, i) << 1; int32_t ts = (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maddubs_epi16 #define _mm512_maddubs_epi16(a, b) simde_mm512_maddubs_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_maddubs_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_maddubs_epi16(src, k, a, b); #else return simde_mm512_mask_mov_epi16(src, k, simde_mm512_maddubs_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_maddubs_epi16 #define _mm512_mask_maddubs_epi16(a, b) simde_mm512_mask_maddubs_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_maddubs_epi16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_maddubs_epi16(k, a, b); #else return simde_mm512_maskz_mov_epi16(k, simde_mm512_maddubs_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_maddubs_epi16 #define _mm512_maskz_maddubs_epi16(a, b) simde_mm512_maskz_maddubs_epi16(a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_MADDUBS_H) */ simde-0.7.2/simde/x86/avx512/max.h000066400000000000000000000442451400333146700163450ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_MAX_H) #define SIMDE_X86_AVX512_MAX_H #include "types.h" #include "../avx2.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_max_epi8 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_max_epi8(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? a_.i8[i] : b_.i8[i]; } return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) # define _mm512_max_epi8(a, b) simde_mm512_max_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_max_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_max_epi8(src, k, a, b); #else return simde_mm512_mask_mov_epi8(src, k, simde_mm512_max_epi8(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_max_epi8 #define _mm512_mask_max_epi8(src, k, a, b) simde_mm512_mask_max_epi8(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_max_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_max_epi8(k, a, b); #else return simde_mm512_maskz_mov_epi8(k, simde_mm512_max_epi8(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_max_epi8 #define _mm512_maskz_max_epi8(k, a, b) simde_mm512_maskz_max_epi8(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_max_epu8 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_max_epu8(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_max_epu8(a_.m256i[i], b_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_max_epu8 #define _mm512_max_epu8(a, b) simde_mm512_max_epu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_max_epu8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_max_epu8(src, k, a, b); #else return simde_mm512_mask_mov_epi8(src, k, simde_mm512_max_epu8(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_max_epu8 #define _mm512_mask_max_epu8(src, k, a, b) simde_mm512_mask_max_epu8(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_max_epu8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_max_epu8(k, a, b); #else return simde_mm512_maskz_mov_epi8(k, simde_mm512_max_epu8(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_max_epu8 #define _mm512_maskz_max_epu8(k, a, b) simde_mm512_maskz_max_epu8(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_max_epi16 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_max_epi16(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; } return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) # define _mm512_max_epi16(a, b) simde_mm512_max_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_max_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_max_epi16(src, k, a, b); #else return simde_mm512_mask_mov_epi16(src, k, simde_mm512_max_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_max_epi16 #define _mm512_mask_max_epi16(src, k, a, b) simde_mm512_mask_max_epi16(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_max_epi16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_max_epi16(k, a, b); #else return simde_mm512_maskz_mov_epi16(k, simde_mm512_max_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_max_epi16 #define _mm512_maskz_max_epi16(k, a, b) simde_mm512_maskz_max_epi16(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_max_epu16 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_max_epu16(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_max_epu16(a_.m256i[i], b_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = (a_.u16[i] > b_.u16[i]) ? a_.u16[i] : b_.u16[i]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_max_epu16 #define _mm512_max_epu16(a, b) simde_mm512_max_epu16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_max_epu16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_max_epu16(src, k, a, b); #else return simde_mm512_mask_mov_epi16(src, k, simde_mm512_max_epu16(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_max_epu16 #define _mm512_mask_max_epu16(src, k, a, b) simde_mm512_mask_max_epu16(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_max_epu16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_max_epu16(k, a, b); #else return simde_mm512_maskz_mov_epi16(k, simde_mm512_max_epu16(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_max_epu16 #define _mm512_maskz_max_epu16(k, a, b) simde_mm512_maskz_max_epu16(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_max_epi32 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_max_epi32(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_X86_AVX2_NATIVE) r_.m256i[0] = simde_mm256_max_epi32(a_.m256i[0], b_.m256i[0]); r_.m256i[1] = simde_mm256_max_epi32(a_.m256i[1], b_.m256i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_max_epi32 #define _mm512_max_epi32(a, b) simde_mm512_max_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_max_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_max_epi32(src, k, a, b); #else return simde_mm512_mask_mov_epi32(src, k, simde_mm512_max_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_max_epi32 #define _mm512_mask_max_epi32(src, k, a, b) simde_mm512_mask_max_epi32(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_max_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_max_epi32(k, a, b); #else return simde_mm512_maskz_mov_epi32(k, simde_mm512_max_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_max_epi32 #define _mm512_maskz_max_epi32(k, a, b) simde_mm512_maskz_max_epi32(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_max_epu32 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_max_epu32(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_X86_AVX2_NATIVE) r_.m256i[0] = simde_mm256_max_epu32(a_.m256i[0], b_.m256i[0]); r_.m256i[1] = simde_mm256_max_epu32(a_.m256i[1], b_.m256i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = (a_.u32[i] > b_.u32[i]) ? a_.u32[i] : b_.u32[i]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_max_epu32 #define _mm512_max_epu32(a, b) simde_mm512_max_epu32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_max_epu32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_max_epu32(src, k, a, b); #else return simde_mm512_mask_mov_epi32(src, k, simde_mm512_max_epu32(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_max_epu32 #define _mm512_mask_max_epu32(src, k, a, b) simde_mm512_mask_max_epu32(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_max_epu32(simde__mmask16 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_max_epu32(k, a, b); #else return simde_mm512_maskz_mov_epi32(k, simde_mm512_max_epu32(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_max_epu32 #define _mm512_maskz_max_epu32(k, a, b) simde_mm512_maskz_max_epu32(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_max_epi64 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_max_epi64(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[i] > b_.i64[i] ? a_.i64[i] : b_.i64[i]; } return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_max_epi64 #define _mm512_max_epi64(a, b) simde_mm512_max_epi64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_max_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_max_epi64(src, k, a, b); #else return simde_mm512_mask_mov_epi64(src, k, simde_mm512_max_epi64(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_max_epi64 #define _mm512_mask_max_epi64(src, k, a, b) simde_mm512_mask_max_epi64(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_max_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_max_epi64(k, a, b); #else return simde_mm512_maskz_mov_epi64(k, simde_mm512_max_epi64(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_max_epi64 #define _mm512_maskz_max_epi64(k, a, b) simde_mm512_maskz_max_epi64(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_max_epu64 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_max_epu64(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = (a_.u64[i] > b_.u64[i]) ? a_.u64[i] : b_.u64[i]; } return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_max_epu64 #define _mm512_max_epu64(a, b) simde_mm512_max_epu64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_max_epu64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_max_epu64(src, k, a, b); #else return simde_mm512_mask_mov_epi64(src, k, simde_mm512_max_epu64(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_max_epu64 #define _mm512_mask_max_epu64(src, k, a, b) simde_mm512_mask_max_epu64(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_max_epu64(simde__mmask8 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_max_epu64(k, a, b); #else return simde_mm512_maskz_mov_epi64(k, simde_mm512_max_epu64(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_max_epu64 #define _mm512_maskz_max_epu64(k, a, b) simde_mm512_maskz_max_epu64(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_max_ps (simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_max_ps(a, b); #else simde__m512_private r_, a_ = simde__m512_to_private(a), b_ = simde__m512_to_private(b); #if defined(SIMDE_X86_AVX2_NATIVE) r_.m256[0] = simde_mm256_max_ps(a_.m256[0], b_.m256[0]); r_.m256[1] = simde_mm256_max_ps(a_.m256[1], b_.m256[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = a_.f32[i] > b_.f32[i] ? a_.f32[i] : b_.f32[i]; } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_max_ps #define _mm512_max_ps(a, b) simde_mm512_max_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_max_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_max_ps(src, k, a, b); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_max_ps(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_max_ps #define _mm512_mask_max_ps(src, k, a, b) simde_mm512_mask_max_ps(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_maskz_max_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_max_ps(k, a, b); #else return simde_mm512_maskz_mov_ps(k, simde_mm512_max_ps(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_max_ps #define _mm512_maskz_max_ps(k, a, b) simde_mm512_maskz_max_ps(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_max_pd (simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_max_pd(a, b); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a), b_ = simde__m512d_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = a_.f64[i] > b_.f64[i] ? a_.f64[i] : b_.f64[i]; } return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_max_pd #define _mm512_max_pd(a, b) simde_mm512_max_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_max_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_max_pd(src, k, a, b); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_max_pd(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_max_pd #define _mm512_mask_max_pd(src, k, a, b) simde_mm512_mask_max_pd(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_maskz_max_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_max_pd(k, a, b); #else return simde_mm512_maskz_mov_pd(k, simde_mm512_max_pd(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_max_pd #define _mm512_maskz_max_pd(k, a, b) simde_mm512_maskz_max_pd(k, a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_MAX_H) */ simde-0.7.2/simde/x86/avx512/min.h000066400000000000000000000442471400333146700163450ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_MIN_H) #define SIMDE_X86_AVX512_MIN_H #include "types.h" #include "../avx2.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_min_epi8 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_min_epi8(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? a_.i8[i] : b_.i8[i]; } return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) # define _mm512_min_epi8(a, b) simde_mm512_min_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_min_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_min_epi8(src, k, a, b); #else return simde_mm512_mask_mov_epi8(src, k, simde_mm512_min_epi8(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_min_epi8 #define _mm512_mask_min_epi8(src, k, a, b) simde_mm512_mask_min_epi8(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_min_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_min_epi8(k, a, b); #else return simde_mm512_maskz_mov_epi8(k, simde_mm512_min_epi8(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_min_epi8 #define _mm512_maskz_min_epi8(k, a, b) simde_mm512_maskz_min_epi8(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_min_epu8 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_min_epu8(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_min_epu8(a_.m256i[i], b_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_min_epu8 #define _mm512_min_epu8(a, b) simde_mm512_min_epu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_min_epu8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_min_epu8(src, k, a, b); #else return simde_mm512_mask_mov_epi8(src, k, simde_mm512_min_epu8(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_min_epu8 #define _mm512_mask_min_epu8(src, k, a, b) simde_mm512_mask_min_epu8(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_min_epu8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_min_epu8(k, a, b); #else return simde_mm512_maskz_mov_epi8(k, simde_mm512_min_epu8(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_min_epu8 #define _mm512_maskz_min_epu8(k, a, b) simde_mm512_maskz_min_epu8(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_min_epi16 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_min_epi16(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; } return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) # define _mm512_min_epi16(a, b) simde_mm512_min_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_min_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_min_epi16(src, k, a, b); #else return simde_mm512_mask_mov_epi16(src, k, simde_mm512_min_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_min_epi16 #define _mm512_mask_min_epi16(src, k, a, b) simde_mm512_mask_min_epi16(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_min_epi16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_min_epi16(k, a, b); #else return simde_mm512_maskz_mov_epi16(k, simde_mm512_min_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_min_epi16 #define _mm512_maskz_min_epi16(k, a, b) simde_mm512_maskz_min_epi16(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_min_epu16 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_min_epu16(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_min_epu16(a_.m256i[i], b_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? a_.u16[i] : b_.u16[i]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_min_epu16 #define _mm512_min_epu16(a, b) simde_mm512_min_epu16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_min_epu16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_min_epu16(src, k, a, b); #else return simde_mm512_mask_mov_epi16(src, k, simde_mm512_min_epu16(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_min_epu16 #define _mm512_mask_min_epu16(src, k, a, b) simde_mm512_mask_min_epu16(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_min_epu16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_min_epu16(k, a, b); #else return simde_mm512_maskz_mov_epi16(k, simde_mm512_min_epu16(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_min_epu16 #define _mm512_maskz_min_epu16(k, a, b) simde_mm512_maskz_min_epu16(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_min_epi32 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_min_epi32(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_X86_AVX2_NATIVE) r_.m256i[0] = simde_mm256_min_epi32(a_.m256i[0], b_.m256i[0]); r_.m256i[1] = simde_mm256_min_epi32(a_.m256i[1], b_.m256i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_min_epi32 #define _mm512_min_epi32(a, b) simde_mm512_min_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_min_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_min_epi32(src, k, a, b); #else return simde_mm512_mask_mov_epi32(src, k, simde_mm512_min_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_min_epi32 #define _mm512_mask_min_epi32(src, k, a, b) simde_mm512_mask_min_epi32(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_min_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_min_epi32(k, a, b); #else return simde_mm512_maskz_mov_epi32(k, simde_mm512_min_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_min_epi32 #define _mm512_maskz_min_epi32(k, a, b) simde_mm512_maskz_min_epi32(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_min_epu32 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_min_epu32(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_X86_AVX2_NATIVE) r_.m256i[0] = simde_mm256_min_epu32(a_.m256i[0], b_.m256i[0]); r_.m256i[1] = simde_mm256_min_epu32(a_.m256i[1], b_.m256i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? a_.u32[i] : b_.u32[i]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_min_epu32 #define _mm512_min_epu32(a, b) simde_mm512_min_epu32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_min_epu32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_min_epu32(src, k, a, b); #else return simde_mm512_mask_mov_epi32(src, k, simde_mm512_min_epu32(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_min_epu32 #define _mm512_mask_min_epu32(src, k, a, b) simde_mm512_mask_min_epu32(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_min_epu32(simde__mmask16 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_min_epu32(k, a, b); #else return simde_mm512_maskz_mov_epi32(k, simde_mm512_min_epu32(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_min_epu32 #define _mm512_maskz_min_epu32(k, a, b) simde_mm512_maskz_min_epu32(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_min_epi64 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_min_epi64(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[i] < b_.i64[i] ? a_.i64[i] : b_.i64[i]; } return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_min_epi64 #define _mm512_min_epi64(a, b) simde_mm512_min_epi64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_min_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_min_epi64(src, k, a, b); #else return simde_mm512_mask_mov_epi64(src, k, simde_mm512_min_epi64(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_min_epi64 #define _mm512_mask_min_epi64(src, k, a, b) simde_mm512_mask_min_epi64(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_min_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_min_epi64(k, a, b); #else return simde_mm512_maskz_mov_epi64(k, simde_mm512_min_epi64(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_min_epi64 #define _mm512_maskz_min_epi64(k, a, b) simde_mm512_maskz_min_epi64(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_min_epu64 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_min_epu64(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = (a_.u64[i] < b_.u64[i]) ? a_.u64[i] : b_.u64[i]; } return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_min_epu64 #define _mm512_min_epu64(a, b) simde_mm512_min_epu64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_min_epu64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_min_epu64(src, k, a, b); #else return simde_mm512_mask_mov_epi64(src, k, simde_mm512_min_epu64(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_min_epu64 #define _mm512_mask_min_epu64(src, k, a, b) simde_mm512_mask_min_epu64(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_min_epu64(simde__mmask8 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_min_epu64(k, a, b); #else return simde_mm512_maskz_mov_epi64(k, simde_mm512_min_epu64(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_min_epu64 #define _mm512_maskz_min_epu64(k, a, b) simde_mm512_maskz_min_epu64(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_min_ps (simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_min_ps(a, b); #else simde__m512_private r_, a_ = simde__m512_to_private(a), b_ = simde__m512_to_private(b); #if defined(SIMDE_X86_AVX2_NATIVE) r_.m256[0] = simde_mm256_min_ps(a_.m256[0], b_.m256[0]); r_.m256[1] = simde_mm256_min_ps(a_.m256[1], b_.m256[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = a_.f32[i] < b_.f32[i] ? a_.f32[i] : b_.f32[i]; } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_min_ps #define _mm512_min_ps(a, b) simde_mm512_min_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_min_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_min_ps(src, k, a, b); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_min_ps(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_min_ps #define _mm512_mask_min_ps(src, k, a, b) simde_mm512_mask_min_ps(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_maskz_min_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_min_ps(k, a, b); #else return simde_mm512_maskz_mov_ps(k, simde_mm512_min_ps(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_min_ps #define _mm512_maskz_min_ps(k, a, b) simde_mm512_maskz_min_ps(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_min_pd (simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_min_pd(a, b); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a), b_ = simde__m512d_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = a_.f64[i] < b_.f64[i] ? a_.f64[i] : b_.f64[i]; } return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_min_pd #define _mm512_min_pd(a, b) simde_mm512_min_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_min_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_min_pd(src, k, a, b); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_min_pd(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_min_pd #define _mm512_mask_min_pd(src, k, a, b) simde_mm512_mask_min_pd(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_maskz_min_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_min_pd(k, a, b); #else return simde_mm512_maskz_mov_pd(k, simde_mm512_min_pd(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_min_pd #define _mm512_maskz_min_pd(k, a, b) simde_mm512_maskz_min_pd(k, a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_MIN_H) */ simde-0.7.2/simde/x86/avx512/mov.h000066400000000000000000000731011400333146700163520ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_X86_AVX512_MOV_H) #define SIMDE_X86_AVX512_MOV_H #include "types.h" #include "cast.h" #include "set.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_mov_epi8 (simde__m128i src, simde__mmask16 k, simde__m128i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask_mov_epi8(src, k, a); #else simde__m128i_private src_ = simde__m128i_to_private(src), a_ = simde__m128i_to_private(a), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : src_.i8[i]; } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_mov_epi8 #define _mm_mask_mov_epi8(src, k, a) simde_mm_mask_mov_epi8(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_mov_epi16 (simde__m128i src, simde__mmask8 k, simde__m128i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask_mov_epi16(src, k, a); #else simde__m128i_private src_ = simde__m128i_to_private(src), a_ = simde__m128i_to_private(a), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : src_.i16[i]; } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_mov_epi16 #define _mm_mask_mov_epi16(src, k, a) simde_mm_mask_mov_epi16(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_mov_epi32 (simde__m128i src, simde__mmask8 k, simde__m128i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask_mov_epi32(src, k, a); #else simde__m128i_private src_ = simde__m128i_to_private(src), a_ = simde__m128i_to_private(a), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : src_.i32[i]; } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_mov_epi32 #define _mm_mask_mov_epi32(src, k, a) simde_mm_mask_mov_epi32(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_mov_epi64 (simde__m128i src, simde__mmask8 k, simde__m128i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask_mov_epi64(src, k, a); #else simde__m128i_private src_ = simde__m128i_to_private(src), a_ = simde__m128i_to_private(a), r_; /* N.B. CM: No fallbacks as there are only two elements */ SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i]; } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_mov_epi64 #define _mm_mask_mov_epi64(src, k, a) simde_mm_mask_mov_epi64(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_mask_mov_pd(simde__m128d src, simde__mmask8 k, simde__m128d a) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask_mov_pd(src, k, a); #else return simde_mm_castsi128_pd(simde_mm_mask_mov_epi64(simde_mm_castpd_si128(src), k, simde_mm_castpd_si128(a))); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_mov_pd #define _mm_mask_mov_pd(src, k, a) simde_mm_mask_mov_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_mask_mov_ps (simde__m128 src, simde__mmask8 k, simde__m128 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask_mov_ps(src, k, a); #else return simde_mm_castsi128_ps(simde_mm_mask_mov_epi32(simde_mm_castps_si128(src), k, simde_mm_castps_si128(a))); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_mov_ps #define _mm_mask_mov_ps(src, k, a) simde_mm_mask_mov_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask_mov_epi8 (simde__m256i src, simde__mmask32 k, simde__m256i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask_mov_epi8(src, k, a); #else simde__m256i_private r_, src_ = simde__m256i_to_private(src), a_ = simde__m256i_to_private(a); #if defined(SIMDE_X86_SSSE3_NATIVE) r_.m128i[0] = simde_mm_mask_mov_epi8(src_.m128i[0], HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m128i[0]); r_.m128i[1] = simde_mm_mask_mov_epi8(src_.m128i[1], HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : src_.i8[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_mov_epi8 #define _mm256_mask_mov_epi8(src, k, a) simde_mm256_mask_mov_epi8(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask_mov_epi16 (simde__m256i src, simde__mmask16 k, simde__m256i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask_mov_epi16(src, k, a); #else simde__m256i_private src_ = simde__m256i_to_private(src), a_ = simde__m256i_to_private(a), r_; #if defined(SIMDE_X86_SSE2_NATIVE) r_.m128i[0] = simde_mm_mask_mov_epi16(src_.m128i[0], HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m128i[0]); r_.m128i[1] = simde_mm_mask_mov_epi16(src_.m128i[1], HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : src_.i16[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_mov_epi16 #define _mm256_mask_mov_epi16(src, k, a) simde_mm256_mask_mov_epi16(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask_mov_epi32 (simde__m256i src, simde__mmask8 k, simde__m256i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask_mov_epi32(src, k, a); #else simde__m256i_private src_ = simde__m256i_to_private(src), a_ = simde__m256i_to_private(a), r_; #if defined(SIMDE_X86_SSE2_NATIVE) r_.m128i[0] = simde_mm_mask_mov_epi32(src_.m128i[0], k , a_.m128i[0]); r_.m128i[1] = simde_mm_mask_mov_epi32(src_.m128i[1], k >> 4, a_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : src_.i32[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_mov_epi32 #define _mm256_mask_mov_epi32(src, k, a) simde_mm256_mask_mov_epi32(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask_mov_epi64 (simde__m256i src, simde__mmask8 k, simde__m256i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask_mov_epi64(src, k, a); #else simde__m256i_private src_ = simde__m256i_to_private(src), a_ = simde__m256i_to_private(a), r_; /* N.B. CM: This fallback may not be faster as there are only four elements */ #if defined(SIMDE_X86_SSE2_NATIVE) r_.m128i[0] = simde_mm_mask_mov_epi64(src_.m128i[0], k , a_.m128i[0]); r_.m128i[1] = simde_mm_mask_mov_epi64(src_.m128i[1], k >> 2, a_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_mov_epi64 #define _mm256_mask_mov_epi64(src, k, a) simde_mm256_mask_mov_epi64(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_mask_mov_pd (simde__m256d src, simde__mmask8 k, simde__m256d a) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask_mov_pd(src, k, a); #else return simde_mm256_castsi256_pd(simde_mm256_mask_mov_epi64(simde_mm256_castpd_si256(src), k, simde_mm256_castpd_si256(a))); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_mov_pd #define _mm256_mask_mov_pd(src, k, a) simde_mm256_mask_mov_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_mask_mov_ps (simde__m256 src, simde__mmask8 k, simde__m256 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask_mov_ps(src, k, a); #else return simde_mm256_castsi256_ps(simde_mm256_mask_mov_epi32(simde_mm256_castps_si256(src), k, simde_mm256_castps_si256(a))); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_mov_ps #define _mm256_mask_mov_ps(src, k, a) simde_mm256_mask_mov_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_mov_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_mov_epi8(src, k, a); #else simde__m512i_private src_ = simde__m512i_to_private(src), a_ = simde__m512i_to_private(a), r_; #if defined(SIMDE_X86_SSSE3_NATIVE) r_.m256i[0] = simde_mm256_mask_mov_epi8(src_.m256i[0], HEDLEY_STATIC_CAST(simde__mmask32, k ), a_.m256i[0]); r_.m256i[1] = simde_mm256_mask_mov_epi8(src_.m256i[1], HEDLEY_STATIC_CAST(simde__mmask32, k >> 32), a_.m256i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : src_.i8[i]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_mov_epi8 #define _mm512_mask_mov_epi8(src, k, a) simde_mm512_mask_mov_epi8(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_mov_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_mov_epi16(src, k, a); #else simde__m512i_private src_ = simde__m512i_to_private(src), a_ = simde__m512i_to_private(a), r_; #if defined(SIMDE_X86_SSE2_NATIVE) r_.m256i[0] = simde_mm256_mask_mov_epi16(src_.m256i[0], HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m256i[0]); r_.m256i[1] = simde_mm256_mask_mov_epi16(src_.m256i[1], HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m256i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : src_.i16[i]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_mov_epi16 #define _mm512_mask_mov_epi16(src, k, a) simde_mm512_mask_mov_epi16(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_mov_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_mov_epi32(src, k, a); #else simde__m512i_private src_ = simde__m512i_to_private(src), a_ = simde__m512i_to_private(a), r_; #if defined(SIMDE_X86_SSE2_NATIVE) r_.m256i[0] = simde_mm256_mask_mov_epi32(src_.m256i[0], HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m256i[0]); r_.m256i[1] = simde_mm256_mask_mov_epi32(src_.m256i[1], HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m256i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : src_.i32[i]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_mov_epi32 #define _mm512_mask_mov_epi32(src, k, a) simde_mm512_mask_mov_epi32(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_mov_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_mov_epi64(src, k, a); #else simde__m512i_private src_ = simde__m512i_to_private(src), a_ = simde__m512i_to_private(a), r_; /* N.B. CM: Without AVX2 this fallback may not be faster as there are only eight elements */ #if defined(SIMDE_X86_SSE2_NATIVE) r_.m256i[0] = simde_mm256_mask_mov_epi64(src_.m256i[0], k , a_.m256i[0]); r_.m256i[1] = simde_mm256_mask_mov_epi64(src_.m256i[1], k >> 4, a_.m256i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_mov_epi64 #define _mm512_mask_mov_epi64(src, k, a) simde_mm512_mask_mov_epi64(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_mov_pd (simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_mov_pd(src, k, a); #else return simde_mm512_castsi512_pd(simde_mm512_mask_mov_epi64(simde_mm512_castpd_si512(src), k, simde_mm512_castpd_si512(a))); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_mov_pd #define _mm512_mask_mov_pd(src, k, a) simde_mm512_mask_mov_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_mov_ps (simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_mov_ps(src, k, a); #else return simde_mm512_castsi512_ps(simde_mm512_mask_mov_epi32(simde_mm512_castps_si512(src), k, simde_mm512_castps_si512(a))); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_mov_ps #define _mm512_mask_mov_ps(src, k, a) simde_mm512_mask_mov_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskz_mov_epi8 (simde__mmask16 k, simde__m128i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_maskz_mov_epi8(k, a); #else simde__m128i_private a_ = simde__m128i_to_private(a), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : INT8_C(0); } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_mov_epi8 #define _mm_maskz_mov_epi8(k, a) simde_mm_maskz_mov_epi8(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskz_mov_epi16 (simde__mmask8 k, simde__m128i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_maskz_mov_epi16(k, a); #else simde__m128i_private a_ = simde__m128i_to_private(a), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : INT16_C(0); } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_mov_epi16 #define _mm_maskz_mov_epi16(k, a) simde_mm_maskz_mov_epi16(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskz_mov_epi32 (simde__mmask8 k, simde__m128i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_maskz_mov_epi32(k, a); #else simde__m128i_private a_ = simde__m128i_to_private(a), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : INT32_C(0); } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_mov_epi32 #define _mm_maskz_mov_epi32(k, a) simde_mm_maskz_mov_epi32(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskz_mov_epi64 (simde__mmask8 k, simde__m128i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_maskz_mov_epi64(k, a); #else simde__m128i_private a_ = simde__m128i_to_private(a), r_; /* N.B. CM: No fallbacks as there are only two elements */ SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : INT64_C(0); } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_mov_epi64 #define _mm_maskz_mov_epi64(k, a) simde_mm_maskz_mov_epi64(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_maskz_mov_pd (simde__mmask8 k, simde__m128d a) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_maskz_mov_pd(k, a); #else return simde_mm_castsi128_pd(simde_mm_maskz_mov_epi64(k, simde_mm_castpd_si128(a))); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_mov_pd #define _mm_maskz_mov_pd(k, a) simde_mm_maskz_mov_pd(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_maskz_mov_ps (simde__mmask8 k, simde__m128 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_maskz_mov_ps(k, a); #else return simde_mm_castsi128_ps(simde_mm_maskz_mov_epi32(k, simde_mm_castps_si128(a))); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_mov_ps #define _mm_maskz_mov_ps(k, a) simde_mm_maskz_mov_ps(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_maskz_mov_epi8 (simde__mmask32 k, simde__m256i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_maskz_mov_epi8(k, a); #else simde__m256i_private a_ = simde__m256i_to_private(a), r_; #if defined(SIMDE_X86_SSSE3_NATIVE) r_.m128i[0] = simde_mm_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m128i[0]); r_.m128i[1] = simde_mm_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : INT8_C(0); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_mov_epi8 #define _mm256_maskz_mov_epi8(k, a) simde_mm256_maskz_mov_epi8(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_maskz_mov_epi16 (simde__mmask16 k, simde__m256i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_maskz_mov_epi16(k, a); #else simde__m256i_private a_ = simde__m256i_to_private(a), r_; #if defined(SIMDE_X86_SSE2_NATIVE) r_.m128i[0] = simde_mm_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m128i[0]); r_.m128i[1] = simde_mm_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : INT16_C(0); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_mov_epi16 #define _mm256_maskz_mov_epi16(k, a) simde_mm256_maskz_mov_epi16(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_maskz_mov_epi32 (simde__mmask8 k, simde__m256i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_maskz_mov_epi32(k, a); #else simde__m256i_private a_ = simde__m256i_to_private(a), r_; #if defined(SIMDE_X86_SSE2_NATIVE) r_.m128i[0] = simde_mm_maskz_mov_epi32(k , a_.m128i[0]); r_.m128i[1] = simde_mm_maskz_mov_epi32(k >> 4, a_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : INT32_C(0); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_mov_epi32 #define _mm256_maskz_mov_epi32(k, a) simde_mm256_maskz_mov_epi32(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_maskz_mov_epi64 (simde__mmask8 k, simde__m256i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_maskz_mov_epi64(k, a); #else simde__m256i_private a_ = simde__m256i_to_private(a), r_; /* N.B. CM: This fallback may not be faster as there are only four elements */ #if defined(SIMDE_X86_SSE2_NATIVE) r_.m128i[0] = simde_mm_maskz_mov_epi64(k , a_.m128i[0]); r_.m128i[1] = simde_mm_maskz_mov_epi64(k >> 2, a_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : INT64_C(0); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_mov_epi64 #define _mm256_maskz_mov_epi64(k, a) simde_mm256_maskz_mov_epi64(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_maskz_mov_pd (simde__mmask8 k, simde__m256d a) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_maskz_mov_pd(k, a); #else return simde_mm256_castsi256_pd(simde_mm256_maskz_mov_epi64(k, simde_mm256_castpd_si256(a))); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_mov_pd #define _mm256_maskz_mov_pd(k, a) simde_mm256_maskz_mov_pd(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_maskz_mov_ps (simde__mmask8 k, simde__m256 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_maskz_mov_ps(k, a); #else return simde_mm256_castsi256_ps(simde_mm256_maskz_mov_epi32(k, simde_mm256_castps_si256(a))); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_mov_ps #define _mm256_maskz_mov_ps(k, a) simde_mm256_maskz_mov_ps(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_mov_epi8 (simde__mmask64 k, simde__m512i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_mov_epi8(k, a); #else simde__m512i_private a_ = simde__m512i_to_private(a), r_; #if defined(SIMDE_X86_SSSE3_NATIVE) r_.m256i[0] = simde_mm256_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask32, k ), a_.m256i[0]); r_.m256i[1] = simde_mm256_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask32, k >> 32), a_.m256i[1]); #else for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : INT8_C(0); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_mov_epi8 #define _mm512_maskz_mov_epi8(k, a) simde_mm512_maskz_mov_epi8(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_mov_epi16 (simde__mmask32 k, simde__m512i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_mov_epi16(k, a); #else simde__m512i_private a_ = simde__m512i_to_private(a), r_; #if defined(SIMDE_X86_SSE2_NATIVE) r_.m256i[0] = simde_mm256_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m256i[0]); r_.m256i[1] = simde_mm256_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m256i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : INT16_C(0); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_mov_epi16 #define _mm512_maskz_mov_epi16(k, a) simde_mm512_maskz_mov_epi16(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_mov_epi32 (simde__mmask16 k, simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_mov_epi32(k, a); #else simde__m512i_private a_ = simde__m512i_to_private(a), r_; #if defined(SIMDE_X86_SSE2_NATIVE) r_.m256i[0] = simde_mm256_maskz_mov_epi32(HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m256i[0]); r_.m256i[1] = simde_mm256_maskz_mov_epi32(HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m256i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : INT32_C(0); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_mov_epi32 #define _mm512_maskz_mov_epi32(k, a) simde_mm512_maskz_mov_epi32(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_mov_epi64 (simde__mmask8 k, simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_mov_epi64(k, a); #else simde__m512i_private a_ = simde__m512i_to_private(a), r_; /* N.B. CM: Without AVX2 this fallback may not be faster as there are only eight elements */ #if defined(SIMDE_X86_SSE2_NATIVE) r_.m256i[0] = simde_mm256_maskz_mov_epi64(k , a_.m256i[0]); r_.m256i[1] = simde_mm256_maskz_mov_epi64(k >> 4, a_.m256i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : INT64_C(0); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_mov_epi64 #define _mm512_maskz_mov_epi64(k, a) simde_mm512_maskz_mov_epi64(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_maskz_mov_pd (simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_mov_pd(k, a); #else return simde_mm512_castsi512_pd(simde_mm512_maskz_mov_epi64(k, simde_mm512_castpd_si512(a))); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_mov_pd #define _mm512_maskz_mov_pd(k, a) simde_mm512_maskz_mov_pd(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_maskz_mov_ps (simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_mov_ps(k, a); #else return simde_mm512_castsi512_ps(simde_mm512_maskz_mov_epi32(k, simde_mm512_castps_si512(a))); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_mov_ps #define _mm512_maskz_mov_ps(k, a) simde_mm512_maskz_mov_ps(k, a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_MOV_H) */ simde-0.7.2/simde/x86/avx512/mov_mask.h000066400000000000000000000272261400333146700173740ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_X86_AVX512_MOV_MASK_H) #define SIMDE_X86_AVX512_MOV_MASK_H #include "types.h" #include "../avx2.h" #include "cast.h" #include "set.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__mmask16 simde_mm_movepi8_mask (simde__m128i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm_movepi8_mask(a); #elif defined(SIMDE_X86_SSE2_NATIVE) return HEDLEY_STATIC_CAST(simde__mmask16, simde_mm_movemask_epi8(a)); #else simde__m128i_private a_ = simde__m128i_to_private(a); simde__mmask16 r = 0; SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { r |= (a_.i8[i] < 0) ? (UINT64_C(1) << i) : 0; } return r; #endif } #if defined(SIMDE_X86_AVX256BW_ENABLE_NATIVE_ALIASES) #undef _mm_movepi8_mask #define _mm_movepi8_mask(a) simde_mm_movepi8_mask(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask8 simde_mm_movepi16_mask (simde__m128i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm_movepi16_mask(a); #elif defined(SIMDE_X86_SSE2_NATIVE) /* There is no 32-bit _mm_movemask_* function, so we use * _mm_movemask_epi8 then extract the odd bits. */ uint_fast16_t r = HEDLEY_STATIC_CAST(uint_fast16_t, simde_mm_movemask_epi8(a)); r = ( (r >> 1)) & UINT32_C(0x5555); r = (r | (r >> 1)) & UINT32_C(0x3333); r = (r | (r >> 2)) & UINT32_C(0x0f0f); r = (r | (r >> 4)) & UINT32_C(0x00ff); return HEDLEY_STATIC_CAST(simde__mmask8, r); #else simde__m128i_private a_ = simde__m128i_to_private(a); simde__mmask8 r = 0; SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { r |= (a_.i16[i] < 0) ? (UINT32_C(1) << i) : 0; } return r; #endif } #if defined(SIMDE_X86_AVX256BW_ENABLE_NATIVE_ALIASES) #undef _mm_movepi16_mask #define _mm_movepi16_mask(a) simde_mm_movepi16_mask(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask8 simde_mm_movepi32_mask (simde__m128i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm_movepi32_mask(a); #elif defined(SIMDE_X86_SSE2_NATIVE) return HEDLEY_STATIC_CAST(simde__mmask8, simde_mm_movemask_ps(simde_mm_castsi128_ps(a))); #else simde__m128i_private a_ = simde__m128i_to_private(a); simde__mmask8 r = 0; SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { r |= (a_.i32[i] < 0) ? (UINT32_C(1) << i) : 0; } return r; #endif } #if defined(SIMDE_X86_AVX256DQ_ENABLE_NATIVE_ALIASES) #undef _mm_movepi32_mask #define _mm_movepi32_mask(a) simde_mm_movepi32_mask(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask8 simde_mm_movepi64_mask (simde__m128i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm_movepi64_mask(a); #elif defined(SIMDE_X86_SSE2_NATIVE) return HEDLEY_STATIC_CAST(simde__mmask8, simde_mm_movemask_pd(simde_mm_castsi128_pd(a))); #else simde__m128i_private a_ = simde__m128i_to_private(a); simde__mmask8 r = 0; SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { r |= (a_.i64[i] < 0) ? (UINT32_C(1) << i) : 0; } return r; #endif } #if defined(SIMDE_X86_AVX256DQ_ENABLE_NATIVE_ALIASES) #undef _mm_movepi64_mask #define _mm_movepi64_mask(a) simde_mm_movepi64_mask(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask32 simde_mm256_movepi8_mask (simde__m256i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm256_movepi8_mask(a); #else simde__m256i_private a_ = simde__m256i_to_private(a); simde__mmask32 r = 0; #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { r |= HEDLEY_STATIC_CAST(simde__mmask32, simde_mm_movepi8_mask(a_.m128i[i])) << (i * 16); } #else SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { r |= (a_.i8[i] < 0) ? (UINT64_C(1) << i) : 0; } #endif return HEDLEY_STATIC_CAST(simde__mmask32, r); #endif } #if defined(SIMDE_X86_AVX256BW_ENABLE_NATIVE_ALIASES) #undef _mm256_movepi8_mask #define _mm256_movepi8_mask(a) simde_mm256_movepi8_mask(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask16 simde_mm256_movepi16_mask (simde__m256i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm256_movepi16_mask(a); #else simde__m256i_private a_ = simde__m256i_to_private(a); simde__mmask16 r = 0; #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { r |= HEDLEY_STATIC_CAST(simde__mmask16, simde_mm_movepi16_mask(a_.m128i[i])) << (i * 8); } #else SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { r |= (a_.i16[i] < 0) ? (UINT32_C(1) << i) : 0; } #endif return r; #endif } #if defined(SIMDE_X86_AVX256BW_ENABLE_NATIVE_ALIASES) #undef _mm256_movepi16_mask #define _mm256_movepi16_mask(a) simde_mm256_movepi16_mask(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask8 simde_mm256_movepi32_mask (simde__m256i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm256_movepi32_mask(a); #else simde__m256i_private a_ = simde__m256i_to_private(a); simde__mmask8 r = 0; #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { r |= HEDLEY_STATIC_CAST(simde__mmask16, simde_mm_movepi32_mask(a_.m128i[i])) << (i * 4); } #else SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { r |= (a_.i32[i] < 0) ? (UINT32_C(1) << i) : 0; } #endif return r; #endif } #if defined(SIMDE_X86_AVX256DQ_ENABLE_NATIVE_ALIASES) #undef _mm256_movepi32_mask #define _mm256_movepi32_mask(a) simde_mm256_movepi32_mask(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask8 simde_mm256_movepi64_mask (simde__m256i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm256_movepi64_mask(a); #else simde__m256i_private a_ = simde__m256i_to_private(a); simde__mmask8 r = 0; #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { r |= HEDLEY_STATIC_CAST(simde__mmask8, simde_mm_movepi64_mask(a_.m128i[i])) << (i * 2); } #else SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { r |= (a_.i64[i] < 0) ? (UINT32_C(1) << i) : 0; } #endif return r; #endif } #if defined(SIMDE_X86_AVX256DQ_ENABLE_NATIVE_ALIASES) #undef _mm256_movepi64_mask #define _mm256_movepi64_mask(a) simde_mm256_movepi64_mask(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask64 simde_mm512_movepi8_mask (simde__m512i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_movepi8_mask(a); #else simde__m512i_private a_ = simde__m512i_to_private(a); simde__mmask64 r = 0; #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { r |= HEDLEY_STATIC_CAST(simde__mmask64, simde_mm256_movepi8_mask(a_.m256i[i])) << (i * 32); } #else r = 0; SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { r |= (a_.i8[i] < 0) ? (UINT64_C(1) << i) : 0; } #endif return HEDLEY_STATIC_CAST(simde__mmask64, r); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_movepi8_mask #define _mm512_movepi8_mask(a) simde_mm512_movepi8_mask(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask32 simde_mm512_movepi16_mask (simde__m512i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_movepi16_mask(a); #else simde__m512i_private a_ = simde__m512i_to_private(a); simde__mmask32 r = 0; #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { r |= HEDLEY_STATIC_CAST(simde__mmask32, simde_mm256_movepi16_mask(a_.m256i[i])) << (i * 16); } #else SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { r |= (a_.i16[i] < 0) ? (UINT32_C(1) << i) : 0; } #endif return r; #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_movepi16_mask #define _mm512_movepi16_mask(a) simde_mm512_movepi16_mask(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask16 simde_mm512_movepi32_mask (simde__m512i a) { #if defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm512_movepi32_mask(a); #else simde__m512i_private a_ = simde__m512i_to_private(a); simde__mmask16 r = 0; #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { r |= HEDLEY_STATIC_CAST(simde__mmask16, simde_mm256_movepi32_mask(a_.m256i[i])) << (i * 8); } #else SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { r |= (a_.i32[i] < 0) ? (UINT32_C(1) << i) : 0; } #endif return r; #endif } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_movepi32_mask #define _mm512_movepi32_mask(a) simde_mm512_movepi32_mask(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask8 simde_mm512_movepi64_mask (simde__m512i a) { #if defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm512_movepi64_mask(a); #else simde__m512i_private a_ = simde__m512i_to_private(a); simde__mmask8 r = 0; #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { r |= simde_mm256_movepi64_mask(a_.m256i[i]) << (i * 4); } #else SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { r |= (a_.i64[i] < 0) ? (UINT32_C(1) << i) : 0; } #endif return r; #endif } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_movepi64_mask #define _mm512_movepi64_mask(a) simde_mm512_movepi64_mask(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_MOV_MASK_H) */ simde-0.7.2/simde/x86/avx512/movm.h000066400000000000000000000365431400333146700165400ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_X86_AVX512_MOVM_H) #define SIMDE_X86_AVX512_MOVM_H #include "types.h" #include "../avx2.h" #include "set.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_movm_epi8 (simde__mmask16 k) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_movm_epi8(k); #elif defined(SIMDE_X86_SSSE3_NATIVE) const simde__m128i zero = simde_mm_setzero_si128(); const simde__m128i bits = simde_mm_set_epi16(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80); const simde__m128i shuffle = simde_mm_set_epi8(15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0); simde__m128i r; r = simde_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k)); r = simde_mm_mullo_epi16(r, bits); r = simde_mm_shuffle_epi8(r, shuffle); r = simde_mm_cmpgt_epi8(zero, r); return r; #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) static const int8_t pos_data[] = { 7, 6, 5, 4, 3, 2, 1, 0 }; int8x8_t pos = vld1_s8(pos_data); r_.neon_i8 = vcombine_s8( vshr_n_s8(vshl_s8(vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, k)), pos), 7), vshr_n_s8(vshl_s8(vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, k >> 8)), pos), 7)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = ((k >> i) & 1) ? ~INT8_C(0) : INT8_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_movm_epi8 #define _mm_movm_epi8(k) simde_mm_movm_epi8(k) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_movm_epi8 (simde__mmask32 k) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_movm_epi8(k); #elif defined(SIMDE_X86_AVX2_NATIVE) const simde__m256i zero = simde_mm256_setzero_si256(); const simde__m256i bits = simde_mm256_broadcastsi128_si256(simde_mm_set_epi16(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80)); const simde__m256i shuffle = simde_mm256_broadcastsi128_si256(simde_mm_set_epi8(15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0)); simde__m256i r; r = simde_mm256_set_m128i(_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k >> 16)), _mm_set1_epi16(HEDLEY_STATIC_CAST(short, k))); r = simde_mm256_mullo_epi16(r, bits); r = simde_mm256_shuffle_epi8(r, shuffle); r = simde_mm256_cmpgt_epi8(zero, r); return r; #else simde__m256i_private r_; #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_movm_epi8(HEDLEY_STATIC_CAST(simde__mmask16, k)); r_.m128i[1] = simde_mm_movm_epi8(HEDLEY_STATIC_CAST(simde__mmask16, k >> 16)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = ((k >> i) & 1) ? ~INT8_C(0) : INT8_C(0); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_movm_epi8 #define _mm256_movm_epi8(k) simde_mm256_movm_epi8(k) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_movm_epi8 (simde__mmask64 k) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_movm_epi8(k); #else simde__m512i_private r_; #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) r_.m256i[0] = simde_mm256_movm_epi8(HEDLEY_STATIC_CAST(simde__mmask32, k)); r_.m256i[1] = simde_mm256_movm_epi8(HEDLEY_STATIC_CAST(simde__mmask32, k >> 32)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = ((k >> i) & 1) ? ~INT8_C(0) : INT8_C(0); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_movm_epi8 #define _mm512_movm_epi8(k) simde_mm512_movm_epi8(k) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_movm_epi16 (simde__mmask8 k) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_movm_epi16(k); #elif defined(SIMDE_X86_SSE2_NATIVE) const simde__m128i bits = simde_mm_set_epi16(0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, INT16_MIN /* 0x8000 */); simde__m128i r; r = simde_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k)); r = simde_mm_mullo_epi16(r, bits); r = simde_mm_srai_epi16(r, 15); return r; #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) static const int16_t pos_data[] = { 15, 14, 13, 12, 11, 10, 9, 8 }; const int16x8_t pos = vld1q_s16(pos_data); r_.neon_i16 = vshrq_n_s16(vshlq_s16(vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, k)), pos), 15); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = ((k >> i) & 1) ? ~INT16_C(0) : INT16_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_movm_epi16 #define _mm_movm_epi16(k) simde_mm_movm_epi16(k) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_movm_epi16 (simde__mmask16 k) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_movm_epi16(k); #elif defined(SIMDE_X86_AVX2_NATIVE) const __m256i bits = _mm256_set_epi16(0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, INT16_MIN /* 0x8000 */); __m256i r; r = _mm256_set1_epi16(HEDLEY_STATIC_CAST(short, k)); r = _mm256_mullo_epi16(r, bits); r = _mm256_srai_epi16(r, 15); return r; #else simde__m256i_private r_; #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_movm_epi16(HEDLEY_STATIC_CAST(simde__mmask8, k)); r_.m128i[1] = simde_mm_movm_epi16(HEDLEY_STATIC_CAST(simde__mmask8, k >> 8)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = ((k >> i) & 1) ? ~INT16_C(0) : INT16_C(0); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_movm_epi16 #define _mm256_movm_epi16(k) simde_mm256_movm_epi16(k) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_movm_epi16 (simde__mmask32 k) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm512_movm_epi16(k); #else simde__m512i_private r_; #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) r_.m256i[0] = simde_mm256_movm_epi16(HEDLEY_STATIC_CAST(simde__mmask16, k)); r_.m256i[1] = simde_mm256_movm_epi16(HEDLEY_STATIC_CAST(simde__mmask16, k >> 16)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = ((k >> i) & 1) ? ~INT16_C(0) : INT16_C(0); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_movm_epi16 #define _mm512_movm_epi16(k) simde_mm512_movm_epi16(k) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_movm_epi32 (simde__mmask8 k) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm_movm_epi32(k); #elif defined(SIMDE_X86_AVX2_NATIVE) const __m128i shifts = _mm_set_epi32(28, 29, 30, 31); __m128i r; r = _mm_set1_epi32(HEDLEY_STATIC_CAST(int, k)); r = _mm_sllv_epi32(r, shifts); r = _mm_srai_epi32(r, 31); return r; #elif defined(SIMDE_X86_SSE2_NATIVE) const simde__m128i bits = simde_mm_set_epi32(0x10000000, 0x20000000, 0x40000000, INT32_MIN /* 0x80000000 */); simde__m128i r; r = simde_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k)); r = simde_mm_mullo_epi16(r, bits); r = simde_mm_srai_epi32(r, 31); return r; #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) static const int32_t pos_data[] = { 31, 30, 29, 28 }; const int32x4_t pos = vld1q_s32(pos_data); r_.neon_i32 = vshrq_n_s32(vshlq_s32(vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, k)), pos), 31); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = ((k >> i) & 1) ? ~INT32_C(0) : INT32_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_movm_epi32 #define _mm_movm_epi32(k) simde_mm_movm_epi32(k) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_movm_epi32 (simde__mmask8 k) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm256_movm_epi32(k); #elif defined(SIMDE_X86_AVX2_NATIVE) const __m256i shifts = _mm256_set_epi32(24, 25, 26, 27, 28, 29, 30, 31); __m256i r; r = _mm256_set1_epi32(HEDLEY_STATIC_CAST(int, k)); r = _mm256_sllv_epi32(r, shifts); r = _mm256_srai_epi32(r, 31); return r; #else simde__m256i_private r_; #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_movm_epi32(k ); r_.m128i[1] = simde_mm_movm_epi32(k >> 4); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = ((k >> i) & 1) ? ~INT32_C(0) : INT32_C(0); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_movm_epi32 #define _mm256_movm_epi32(k) simde_mm256_movm_epi32(k) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_movm_epi32 (simde__mmask16 k) { #if defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm512_movm_epi32(k); #else simde__m512i_private r_; #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) r_.m256i[0] = simde_mm256_movm_epi32(HEDLEY_STATIC_CAST(simde__mmask8, k )); r_.m256i[1] = simde_mm256_movm_epi32(HEDLEY_STATIC_CAST(simde__mmask8, k >> 8)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = ((k >> i) & 1) ? ~INT32_C(0) : INT32_C(0); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_movm_epi32 #define _mm512_movm_epi32(k) simde_mm512_movm_epi32(k) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_movm_epi64 (simde__mmask8 k) { #if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_movm_epi64(k); /* N.B. CM: These fallbacks may not be faster as there are only two elements */ #elif defined(SIMDE_X86_AVX2_NATIVE) const __m128i shifts = _mm_set_epi32(30, 30, 31, 31); __m128i r; r = _mm_set1_epi32(HEDLEY_STATIC_CAST(int, k)); r = _mm_sllv_epi32(r, shifts); r = _mm_srai_epi32(r, 31); return r; #elif defined(SIMDE_X86_SSE2_NATIVE) const simde__m128i bits = simde_mm_set_epi32(0x40000000, 0x40000000, INT32_MIN /* 0x80000000 */, INT32_MIN /* 0x80000000 */); simde__m128i r; r = simde_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k)); r = simde_mm_mullo_epi16(r, bits); r = simde_mm_srai_epi32(r, 31); return r; #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) static const int64_t pos_data[] = { 63, 62 }; const int64x2_t pos = vld1q_s64(pos_data); r_.neon_i64 = vshrq_n_s64(vshlq_s64(vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, k)), pos), 63); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = ((k >> i) & 1) ? ~INT64_C(0) : INT64_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_movm_epi64 #define _mm_movm_epi64(k) simde_mm_movm_epi64(k) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_movm_epi64 (simde__mmask8 k) { #if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_movm_epi64(k); #elif defined(SIMDE_X86_AVX2_NATIVE) const __m256i shifts = _mm256_set_epi32(28, 28, 29, 29, 30, 30, 31, 31); __m256i r; r = _mm256_set1_epi32(HEDLEY_STATIC_CAST(int, k)); r = _mm256_sllv_epi32(r, shifts); r = _mm256_srai_epi32(r, 31); return r; #else simde__m256i_private r_; /* N.B. CM: This fallback may not be faster as there are only four elements */ #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_movm_epi64(k ); r_.m128i[1] = simde_mm_movm_epi64(k >> 2); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = ((k >> i) & 1) ? ~INT64_C(0) : INT64_C(0); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_movm_epi64 #define _mm256_movm_epi64(k) simde_mm256_movm_epi64(k) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_movm_epi64 (simde__mmask8 k) { #if defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm512_movm_epi64(k); #else simde__m512i_private r_; /* N.B. CM: Without AVX2 this fallback may not be faster as there are only eight elements */ #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) r_.m256i[0] = simde_mm256_movm_epi64(k ); r_.m256i[1] = simde_mm256_movm_epi64(k >> 4); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = ((k >> i) & 1) ? ~INT64_C(0) : INT64_C(0); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_movm_epi64 #define _mm512_movm_epi64(k) simde_mm512_movm_epi64(k) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_MOVM_H) */ simde-0.7.2/simde/x86/avx512/mul.h000066400000000000000000000216771400333146700163610ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_MUL_H) #define SIMDE_X86_AVX512_MUL_H #include "types.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mul_ps (simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mul_ps(a, b); #else simde__m512_private r_, a_ = simde__m512_to_private(a), b_ = simde__m512_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f32 = a_.f32 * b_.f32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_mul_ps(a_.m256[i], b_.m256[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mul_ps #define _mm512_mul_ps(a, b) simde_mm512_mul_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_mul_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_mul_ps(src, k, a, b); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_mul_ps(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_mul_ps #define _mm512_mask_mul_ps(src, k, a, b) simde_mm512_mask_mul_ps(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_maskz_mul_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_mul_ps(k, a, b); #else return simde_mm512_maskz_mov_ps(k, simde_mm512_mul_ps(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_mul_ps #define _mm512_maskz_mul_ps(k, a, b) simde_mm512_maskz_mul_ps(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mul_pd (simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mul_pd(a, b); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a), b_ = simde__m512d_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f64 = a_.f64 * b_.f64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_mul_pd(a_.m256d[i], b_.m256d[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mul_pd #define _mm512_mul_pd(a, b) simde_mm512_mul_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_mul_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_mul_pd(src, k, a, b); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_mul_pd(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_mul_pd #define _mm512_mask_mul_pd(src, k, a, b) simde_mm512_mask_mul_pd(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_maskz_mul_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_mul_pd(k, a, b); #else return simde_mm512_maskz_mov_pd(k, simde_mm512_mul_pd(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_mul_pd #define _mm512_maskz_mul_pd(k, a, b) simde_mm512_maskz_mul_pd(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mul_epi32 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mul_epi32(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_SHUFFLE_VECTOR_) simde__m512i_private x; __typeof__(r_.i64) ta, tb; /* Get even numbered 32-bit values */ x.i32 = SIMDE_SHUFFLE_VECTOR_(32, 64, a_.i32, b_.i32, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); /* Cast to 64 bits */ SIMDE_CONVERT_VECTOR_(ta, x.m256i_private[0].i32); SIMDE_CONVERT_VECTOR_(tb, x.m256i_private[1].i32); r_.i64 = ta * tb; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[i << 1]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i << 1]); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mul_epi32 #define _mm512_mul_epi32(a, b) simde_mm512_mul_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_mul_epi32(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_mul_epi32(src, k, a, b); #else return simde_mm512_mask_mov_epi64(src, k, simde_mm512_mul_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_mul_epi32 #define _mm512_mask_mul_epi32(src, k, a, b) simde_mm512_mask_mul_epi32(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_mul_epi32(simde__mmask8 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_mul_epi32(k, a, b); #else return simde_mm512_maskz_mov_epi64(k, simde_mm512_mul_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_mul_epi32 #define _mm512_maskz_mul_epi32(k, a, b) simde_mm512_maskz_mul_epi32(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mul_epu32 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mul_epu32(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_SHUFFLE_VECTOR_) simde__m512i_private x; __typeof__(r_.u64) ta, tb; x.u32 = SIMDE_SHUFFLE_VECTOR_(32, 64, a_.u32, b_.u32, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); SIMDE_CONVERT_VECTOR_(ta, x.m256i_private[0].u32); SIMDE_CONVERT_VECTOR_(tb, x.m256i_private[1].u32); r_.u64 = ta * tb; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i << 1]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i << 1]); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mul_epu32 #define _mm512_mul_epu32(a, b) simde_mm512_mul_epu32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_mul_epu32(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_mul_epu32(src, k, a, b); #else return simde_mm512_mask_mov_epi64(src, k, simde_mm512_mul_epu32(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_mul_epu32 #define _mm512_mask_mul_epu32(src, k, a, b) simde_mm512_mask_mul_epu32(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_mul_epu32(simde__mmask8 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_mul_epu32(k, a, b); #else return simde_mm512_maskz_mov_epi64(k, simde_mm512_mul_epu32(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_mul_epu32 #define _mm512_maskz_mul_epu32(k, a, b) simde_mm512_maskz_mul_epu32(k, a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_MUL_H) */ simde-0.7.2/simde/x86/avx512/mulhi.h000066400000000000000000000043521400333146700166710ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_MULHI_H) #define SIMDE_X86_AVX512_MULHI_H #include "types.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mulhi_epi16 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mulhi_epi16(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); } return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mulhi_epi16 #define _mm512_mulhi_epi16(a, b) simde_mm512_mulhi_epi16(a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_MULHI_H) */ simde-0.7.2/simde/x86/avx512/mulhrs.h000066400000000000000000000043401400333146700170620ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_MULHRS_H) #define SIMDE_X86_AVX512_MULHRS_H #include "types.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mulhrs_epi16 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mulhrs_epi16(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); } return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mulhrs_epi16 #define _mm512_mulhrs_epi16(a, b) simde_mm512_mulhrs_epi16(a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_MULHRS_H) */ simde-0.7.2/simde/x86/avx512/mullo.h000066400000000000000000000074021400333146700167020ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_MULLO_H) #define SIMDE_X86_AVX512_MULLO_H #include "types.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mullo_epi16 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mullo_epi16(a, b); #else simde__m512i_private a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] * b_.i16[i]); } return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mullo_epi16 #define _mm512_mullo_epi16(a, b) simde_mm512_mullo_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mullo_epi32 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mullo_epi32(a, b); #else simde__m512i_private a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] * b_.i32[i]); } return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mullo_epi32 #define _mm512_mullo_epi32(a, b) simde_mm512_mullo_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_mullo_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_mullo_epi32(src, k, a, b); #else return simde_mm512_mask_mov_epi32(src, k, simde_mm512_mullo_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_mullo_epi32 #define _mm512_mask_mullo_epi32(src, k, a, b) simde_mm512_mask_mullo_epi32(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_mullo_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_mullo_epi32(k, a, b); #else return simde_mm512_maskz_mov_epi32(k, simde_mm512_mullo_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_mullo_epi32 #define _mm512_maskz_mullo_epi32(k, a, b) simde_mm512_maskz_mullo_epi32(k, a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_MULLO_H) */ simde-0.7.2/simde/x86/avx512/negate.h000066400000000000000000000051171400333146700170160ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur */ #if !defined(SIMDE_X86_AVX512_NEGATE_H) #define SIMDE_X86_AVX512_NEGATE_H #include "types.h" #include "mov.h" #include "xor.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_x_mm512_negate_ps(simde__m512 a) { #if defined(SIMDE_X86_AVX512DQ_NATIVE) return simde_mm512_xor_ps(a,_mm512_set1_ps(SIMDE_FLOAT32_C(-0.0))); #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if defined(SIMDE_VECTOR_NEGATE) r_.f32 = -a_.f32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = -a_.f32[i]; } #endif return simde__m512_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_x_mm512_negate_pd(simde__m512d a) { #if defined(SIMDE_X86_AVX512DQ_NATIVE) return simde_mm512_xor_pd(a, _mm512_set1_pd(SIMDE_FLOAT64_C(-0.0))); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if defined(SIMDE_VECTOR_NEGATE) r_.f64 = -a_.f64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = -a_.f64[i]; } #endif return simde__m512d_from_private(r_); #endif } SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_NEGATE_H) */ simde-0.7.2/simde/x86/avx512/or.h000066400000000000000000000174711400333146700162010ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_OR_H) #define SIMDE_X86_AVX512_OR_H #include "types.h" #include "../avx2.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_or_ps (simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm512_or_ps(a, b); #else simde__m512_private r_, a_ = simde__m512_to_private(a), b_ = simde__m512_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) r_.m256[0] = simde_mm256_or_ps(a_.m256[0], b_.m256[0]); r_.m256[1] = simde_mm256_or_ps(a_.m256[1], b_.m256[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) r_.i32f = a_.i32f | b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_or_ps #define _mm512_or_ps(a, b) simde_mm512_or_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_or_pd (simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm512_or_pd(a, b); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a), b_ = simde__m512d_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) r_.m256d[0] = simde_mm256_or_pd(a_.m256d[0], b_.m256d[0]); r_.m256d[1] = simde_mm256_or_pd(a_.m256d[1], b_.m256d[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) r_.i32f = a_.i32f | b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_or_pd #define _mm512_or_pd(a, b) simde_mm512_or_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_or_epi32 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_or_epi32(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = a_.i32 | b_.i32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] | b_.i32[i]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_or_epi32 #define _mm512_or_epi32(a, b) simde_mm512_or_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_or_epi32(simde__m512i src, simde__mmask16 k, simde__m512i v2, simde__m512i v3) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_or_epi32(src, k, v2, v3); #else return simde_mm512_mask_mov_epi32(src, k, simde_mm512_or_epi32(v2, v3)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_or_epi32 #define _mm512_mask_or_epi32(src, k, v2, v3) simde_mm512_mask_or_epi32(src, k, v2, v3) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_or_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_or_epi32(k, a, b); #else return simde_mm512_maskz_mov_epi32(k, simde_mm512_or_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_or_epi32 #define _mm512_maskz_or_epi32(k, a, b) simde_mm512_maskz_or_epi32(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_or_epi64 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_or_epi64(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_or_si256(a_.m256i[i], b_.m256i[i]); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) r_.i32f = a_.i32f | b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_or_epi64 #define _mm512_or_epi64(a, b) simde_mm512_or_epi64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_or_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_or_epi64(src, k, a, b); #else return simde_mm512_mask_mov_epi64(src, k, simde_mm512_or_epi64(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_or_epi64 #define _mm512_mask_or_epi64(src, k, a, b) simde_mm512_mask_or_epi64(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_or_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_or_epi64(k, a, b); #else return simde_mm512_maskz_mov_epi64(k, simde_mm512_or_epi64(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_or_epi64 #define _mm512_maskz_or_epi64(k, a, b) simde_mm512_maskz_or_epi64(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_or_si512 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_or_si512(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_X86_AVX2_NATIVE) r_.m256i[0] = simde_mm256_or_si256(a_.m256i[0], b_.m256i[0]); r_.m256i[1] = simde_mm256_or_si256(a_.m256i[1], b_.m256i[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f | b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_or_si512 #define _mm512_or_si512(a, b) simde_mm512_or_si512(a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_OR_H) */ simde-0.7.2/simde/x86/avx512/packs.h000066400000000000000000000152431400333146700166550ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_PACKS_H) #define SIMDE_X86_AVX512_PACKS_H #include "types.h" #include "../avx2.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_packs_epi16 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_packs_epi16(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) r_.m256i[0] = simde_mm256_packs_epi16(a_.m256i[0], b_.m256i[0]); r_.m256i[1] = simde_mm256_packs_epi16(a_.m256i[1], b_.m256i[1]); #else const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 2; const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 4; const size_t octet_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 8; SIMDE_VECTORIZE for (size_t i = 0 ; i < octet_point ; i++) { r_.i8[i] = (a_.i16[i] > INT8_MAX) ? INT8_MAX : ((a_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[i])); r_.i8[i + octet_point] = (b_.i16[i] > INT8_MAX) ? INT8_MAX : ((b_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[i])); r_.i8[quarter_point + i] = (a_.i16[octet_point + i] > INT8_MAX) ? INT8_MAX : ((a_.i16[octet_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[octet_point + i])); r_.i8[quarter_point + i + octet_point] = (b_.i16[octet_point + i] > INT8_MAX) ? INT8_MAX : ((b_.i16[octet_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[octet_point + i])); r_.i8[halfway_point + i] = (a_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((a_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[quarter_point + i])); r_.i8[halfway_point + i + octet_point] = (b_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((b_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[quarter_point + i])); r_.i8[halfway_point + quarter_point + i] = (a_.i16[quarter_point + octet_point + i] > INT8_MAX) ? INT8_MAX : ((a_.i16[quarter_point + octet_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[quarter_point + octet_point + i])); r_.i8[halfway_point + quarter_point + i + octet_point] = (b_.i16[quarter_point + octet_point + i] > INT8_MAX) ? INT8_MAX : ((b_.i16[quarter_point + octet_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[quarter_point + octet_point + i])); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_packs_epi16 #define _mm512_packs_epi16(a, b) simde_mm512_packs_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_packs_epi32 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_packs_epi32(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) r_.m256i[0] = simde_mm256_packs_epi32(a_.m256i[0], b_.m256i[0]); r_.m256i[1] = simde_mm256_packs_epi32(a_.m256i[1], b_.m256i[1]); #else const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; const size_t octet_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 8; SIMDE_VECTORIZE for (size_t i = 0 ; i < octet_point ; i++) { r_.i16[i] = (a_.i32[i] > INT16_MAX) ? INT16_MAX : ((a_.i32[i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, a_.i32[i])); r_.i16[i + octet_point] = (b_.i32[i] > INT16_MAX) ? INT16_MAX : ((b_.i32[i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, b_.i32[i])); r_.i16[quarter_point + i] = (a_.i32[octet_point + i] > INT16_MAX) ? INT16_MAX : ((a_.i32[octet_point + i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, a_.i32[octet_point + i])); r_.i16[quarter_point + i + octet_point] = (b_.i32[octet_point + i] > INT16_MAX) ? INT16_MAX : ((b_.i32[octet_point + i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, b_.i32[octet_point + i])); r_.i16[halfway_point + i] = (a_.i32[quarter_point + i] > INT16_MAX) ? INT16_MAX : ((a_.i32[quarter_point +i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, a_.i32[quarter_point + i])); r_.i16[halfway_point + i + octet_point] = (b_.i32[quarter_point + i] > INT16_MAX) ? INT16_MAX : ((b_.i32[quarter_point + i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, b_.i32[quarter_point +i])); r_.i16[halfway_point + quarter_point + i] = (a_.i32[quarter_point + octet_point + i] > INT16_MAX) ? INT16_MAX : ((a_.i32[quarter_point + octet_point + i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, a_.i32[quarter_point + octet_point + i])); r_.i16[halfway_point + quarter_point + i + octet_point] = (b_.i32[quarter_point + octet_point + i] > INT16_MAX) ? INT16_MAX : ((b_.i32[quarter_point + octet_point + i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, b_.i32[quarter_point + octet_point + i])); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_packs_epi32 #define _mm512_packs_epi32(a, b) simde_mm512_packs_epi32(a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_PACKS_H) */ simde-0.7.2/simde/x86/avx512/packus.h000066400000000000000000000152611400333146700170420ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_PACKUS_H) #define SIMDE_X86_AVX512_PACKUS_H #include "types.h" #include "../avx2.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_packus_epi16 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_packus_epi16(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) r_.m256i[0] = simde_mm256_packus_epi16(a_.m256i[0], b_.m256i[0]); r_.m256i[1] = simde_mm256_packus_epi16(a_.m256i[1], b_.m256i[1]); #else const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 2; const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 4; const size_t octet_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 8; SIMDE_VECTORIZE for (size_t i = 0 ; i < octet_point ; i++) { r_.u8[i] = (a_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[i])); r_.u8[i + octet_point] = (b_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[i])); r_.u8[quarter_point + i] = (a_.i16[octet_point + i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[octet_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[octet_point + i])); r_.u8[quarter_point + i + octet_point] = (b_.i16[octet_point + i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[octet_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[octet_point + i])); r_.u8[halfway_point + i] = (a_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[quarter_point + i])); r_.u8[halfway_point + i + octet_point] = (b_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[quarter_point + i])); r_.u8[halfway_point + quarter_point + i] = (a_.i16[quarter_point + octet_point + i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[quarter_point + octet_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[quarter_point + octet_point + i])); r_.u8[halfway_point + quarter_point + i + octet_point] = (b_.i16[quarter_point + octet_point + i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[quarter_point + octet_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[quarter_point + octet_point + i])); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_packus_epi16 #define _mm512_packus_epi16(a, b) simde_mm512_packus_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_packus_epi32 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_packus_epi32(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_packus_epi32(a_.m256i[i], b_.m256i[i]); } #else const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; const size_t octet_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 8; SIMDE_VECTORIZE for (size_t i = 0 ; i < octet_point ; i++) { r_.u16[i] = (a_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[i])); r_.u16[i + octet_point] = (b_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[i])); r_.u16[quarter_point + i] = (a_.i32[octet_point + i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[octet_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[octet_point + i])); r_.u16[quarter_point + i + octet_point] = (b_.i32[octet_point + i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[octet_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[octet_point + i])); r_.u16[halfway_point + i] = (a_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[quarter_point +i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[quarter_point + i])); r_.u16[halfway_point + i + octet_point] = (b_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[quarter_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[quarter_point +i])); r_.u16[halfway_point + quarter_point + i] = (a_.i32[quarter_point + octet_point + i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[quarter_point + octet_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[quarter_point + octet_point + i])); r_.u16[halfway_point + quarter_point + i + octet_point] = (b_.i32[quarter_point + octet_point + i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[quarter_point + octet_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[quarter_point + octet_point + i])); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_packus_epi32 #define _mm512_packus_epi32(a, b) simde_mm512_packus_epi32(a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_PACKUS_H) */ simde-0.7.2/simde/x86/avx512/permutex2var.h000066400000000000000000002103111400333146700202110ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_X86_AVX512_PERMUTEX2VAR_H) #define SIMDE_X86_AVX512_PERMUTEX2VAR_H #include "types.h" #include "and.h" #include "andnot.h" #include "blend.h" #include "mov.h" #include "or.h" #include "set1.h" #include "slli.h" #include "srli.h" #include "test.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ /* The following generic code avoids many, nearly identical, repetitions of fairly complex code. * If the compiler optimizes well, in particular extracting invariant code from loops * and simplifying code involving constants passed as arguments, it should not be * significantly slower than specific code. * Note that when the original vector contains few elements, these implementations * may not be faster than portable code. */ #if defined(SIMDE_X86_SSSE3_NATIVE) || defined(SIMDE_ARM_NEON_A64V8_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_WASM_SIMD128_NATIVE) #define SIMDE_X_PERMUTEX2VAR_USE_GENERIC #endif #if defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_permutex2var128 (const simde__m128i *a, const simde__m128i idx, const simde__m128i *b, const unsigned int log2_index_size, const unsigned int log2_data_length) { const int idx_mask = (1 << (5 - log2_index_size + log2_data_length)) - 1; #if defined(SIMDE_X86_SSE3_NATIVE) __m128i ra, rb, t, test, select, index; const __m128i sixteen = _mm_set1_epi8(16); /* Avoid the mullo intrinsics which have high latency (and the 32-bit one requires SSE4.1) */ switch (log2_index_size) { default: /* Avoid uninitialized variable warning/error */ case 0: index = _mm_and_si128(idx, _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, idx_mask))); break; case 1: index = _mm_and_si128(idx, _mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, idx_mask))); index = _mm_slli_epi32(index, 1); t = _mm_slli_epi32(index, 8); index = _mm_or_si128(index, t); index = _mm_add_epi16(index, _mm_set1_epi16(0x0100)); break; case 2: index = _mm_and_si128(idx, _mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, idx_mask))); index = _mm_slli_epi32(index, 2); t = _mm_slli_epi32(index, 8); index = _mm_or_si128(index, t); t = _mm_slli_epi32(index, 16); index = _mm_or_si128(index, t); index = _mm_add_epi32(index, _mm_set1_epi32(0x03020100)); break; } test = index; index = _mm_and_si128(index, _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, (1 << (4 + log2_data_length)) - 1))); test = _mm_cmpgt_epi8(test, index); ra = _mm_shuffle_epi8(a[0], index); rb = _mm_shuffle_epi8(b[0], index); #if defined(SIMDE_X86_SSE4_1_NATIVE) SIMDE_VECTORIZE for (int i = 1 ; i < (1 << log2_data_length) ; i++) { select = _mm_cmplt_epi8(index, sixteen); index = _mm_sub_epi8(index, sixteen); ra = _mm_blendv_epi8(_mm_shuffle_epi8(a[i], index), ra, select); rb = _mm_blendv_epi8(_mm_shuffle_epi8(b[i], index), rb, select); } return _mm_blendv_epi8(ra, rb, test); #else SIMDE_VECTORIZE for (int i = 1 ; i < (1 << log2_data_length) ; i++) { select = _mm_cmplt_epi8(index, sixteen); index = _mm_sub_epi8(index, sixteen); ra = _mm_or_si128(_mm_andnot_si128(select, _mm_shuffle_epi8(a[i], index)), _mm_and_si128(select, ra)); rb = _mm_or_si128(_mm_andnot_si128(select, _mm_shuffle_epi8(b[i], index)), _mm_and_si128(select, rb)); } return _mm_or_si128(_mm_andnot_si128(test, ra), _mm_and_si128(test, rb)); #endif #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint8x16_t index, r; uint16x8_t index16; uint32x4_t index32; uint8x16x2_t table2_a, table2_b; uint8x16x4_t table4_a, table4_b; switch (log2_index_size) { case 0: index = vandq_u8(simde__m128i_to_neon_u8(idx), vdupq_n_u8(HEDLEY_STATIC_CAST(uint8_t, idx_mask))); break; case 1: index16 = vandq_u16(simde__m128i_to_neon_u16(idx), vdupq_n_u16(HEDLEY_STATIC_CAST(uint16_t, idx_mask))); index16 = vmulq_n_u16(index16, 0x0202); index16 = vaddq_u16(index16, vdupq_n_u16(0x0100)); index = vreinterpretq_u8_u16(index16); break; case 2: index32 = vandq_u32(simde__m128i_to_neon_u32(idx), vdupq_n_u32(HEDLEY_STATIC_CAST(uint32_t, idx_mask))); index32 = vmulq_n_u32(index32, 0x04040404); index32 = vaddq_u32(index32, vdupq_n_u32(0x03020100)); index = vreinterpretq_u8_u32(index32); break; } uint8x16_t mask = vdupq_n_u8(HEDLEY_STATIC_CAST(uint8_t, (1 << (4 + log2_data_length)) - 1)); switch (log2_data_length) { case 0: r = vqtbx1q_u8(vqtbl1q_u8(simde__m128i_to_neon_u8(b[0]), vandq_u8(index, mask)), simde__m128i_to_neon_u8(a[0]), index); break; case 1: table2_a.val[0] = simde__m128i_to_neon_u8(a[0]); table2_a.val[1] = simde__m128i_to_neon_u8(a[1]); table2_b.val[0] = simde__m128i_to_neon_u8(b[0]); table2_b.val[1] = simde__m128i_to_neon_u8(b[1]); r = vqtbx2q_u8(vqtbl2q_u8(table2_b, vandq_u8(index, mask)), table2_a, index); break; case 2: table4_a.val[0] = simde__m128i_to_neon_u8(a[0]); table4_a.val[1] = simde__m128i_to_neon_u8(a[1]); table4_a.val[2] = simde__m128i_to_neon_u8(a[2]); table4_a.val[3] = simde__m128i_to_neon_u8(a[3]); table4_b.val[0] = simde__m128i_to_neon_u8(b[0]); table4_b.val[1] = simde__m128i_to_neon_u8(b[1]); table4_b.val[2] = simde__m128i_to_neon_u8(b[2]); table4_b.val[3] = simde__m128i_to_neon_u8(b[3]); r = vqtbx4q_u8(vqtbl4q_u8(table4_b, vandq_u8(index, mask)), table4_a, index); break; } return simde__m128i_from_neon_u8(r); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) r, ra, rb, t, index, s, thirty_two = vec_splats(HEDLEY_STATIC_CAST(uint8_t, 32)); SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) index16; SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) temp32, index32; SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL char) select, test; switch (log2_index_size) { default: /* Avoid uninitialized variable warning/error */ case 0: index = vec_and(simde__m128i_to_altivec_u8(idx), vec_splats(HEDLEY_STATIC_CAST(uint8_t, idx_mask))); break; case 1: index16 = simde__m128i_to_altivec_u16(idx); index16 = vec_and(index16, vec_splats(HEDLEY_STATIC_CAST(uint16_t, idx_mask))); index16 = vec_mladd(index16, vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0202)), vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0100))); index = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index16); break; case 2: index32 = simde__m128i_to_altivec_u32(idx); index32 = vec_and(index32, vec_splats(HEDLEY_STATIC_CAST(uint32_t, idx_mask))); /* Multiply index32 by 0x04040404; unfortunately vec_mul isn't available so (mis)use 16-bit vec_mladd */ temp32 = vec_sl(index32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 16))); index32 = vec_add(index32, temp32); index32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_mladd(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), index32), vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0404)), vec_splat_u16(0))); index32 = vec_add(index32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 0x03020100))); index = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index32); break; } if (log2_data_length == 0) { r = vec_perm(simde__m128i_to_altivec_u8(a[0]), simde__m128i_to_altivec_u8(b[0]), HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index)); } else { s = index; index = vec_and(index, vec_splats(HEDLEY_STATIC_CAST(uint8_t, (1 << (4 + log2_data_length)) - 1))); test = vec_cmpgt(s, index); ra = vec_perm(simde__m128i_to_altivec_u8(a[0]), simde__m128i_to_altivec_u8(a[1]), index); rb = vec_perm(simde__m128i_to_altivec_u8(b[0]), simde__m128i_to_altivec_u8(b[1]), index); SIMDE_VECTORIZE for (int i = 2 ; i < (1 << log2_data_length) ; i += 2) { select = vec_cmplt(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), index), HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), thirty_two)); index = vec_sub(index, thirty_two); t = vec_perm(simde__m128i_to_altivec_u8(a[i]), simde__m128i_to_altivec_u8(a[i + 1]), index); ra = vec_sel(t, ra, select); t = vec_perm(simde__m128i_to_altivec_u8(b[i]), simde__m128i_to_altivec_u8(b[i + 1]), index); rb = vec_sel(t, rb, select); } r = vec_sel(ra, rb, test); } return simde__m128i_from_altivec_u8(r); #elif defined(SIMDE_WASM_SIMD128_NATIVE) const v128_t sixteen = wasm_i8x16_splat(16); v128_t index = simde__m128i_to_wasm_v128(idx); switch (log2_index_size) { case 0: index = wasm_v128_and(index, wasm_i8x16_splat(HEDLEY_STATIC_CAST(int8_t, idx_mask))); break; case 1: index = wasm_v128_and(index, wasm_i16x8_splat(HEDLEY_STATIC_CAST(int16_t, idx_mask))); index = wasm_i16x8_mul(index, wasm_i16x8_splat(0x0202)); index = wasm_i16x8_add(index, wasm_i16x8_splat(0x0100)); break; case 2: index = wasm_v128_and(index, wasm_i32x4_splat(HEDLEY_STATIC_CAST(int32_t, idx_mask))); index = wasm_i32x4_mul(index, wasm_i32x4_splat(0x04040404)); index = wasm_i32x4_add(index, wasm_i32x4_splat(0x03020100)); break; } v128_t r = wasm_v8x16_swizzle(simde__m128i_to_wasm_v128(a[0]), index); SIMDE_VECTORIZE for (int i = 1 ; i < (1 << log2_data_length) ; i++) { index = wasm_i8x16_sub(index, sixteen); r = wasm_v128_or(r, wasm_v8x16_swizzle(simde__m128i_to_wasm_v128(a[i]), index)); } SIMDE_VECTORIZE for (int i = 0 ; i < (1 << log2_data_length) ; i++) { index = wasm_i8x16_sub(index, sixteen); r = wasm_v128_or(r, wasm_v8x16_swizzle(simde__m128i_to_wasm_v128(b[i]), index)); } return simde__m128i_from_wasm_v128(r); #endif } SIMDE_FUNCTION_ATTRIBUTES void simde_x_permutex2var (simde__m128i *r, const simde__m128i *a, const simde__m128i *idx, const simde__m128i *b, const unsigned int log2_index_size, const unsigned int log2_data_length) { SIMDE_VECTORIZE for (int i = 0 ; i < (1 << log2_data_length) ; i++) { r[i] = simde_x_permutex2var128(a, idx[i], b, log2_index_size, log2_data_length); } } #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_permutex2var_epi16 (simde__m128i a, simde__m128i idx, simde__m128i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_permutex2var_epi16(a, idx, b); #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) simde__m128i r; simde_x_permutex2var(&r, &a, &idx, &b, 1, 0); return r; #else simde__m128i_private a_ = simde__m128i_to_private(a), idx_ = simde__m128i_to_private(idx), b_ = simde__m128i_to_private(b), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = ((idx_.i16[i] & 8) ? b_ : a_).i16[idx_.i16[i] & 7]; } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_permutex2var_epi16 #define _mm_permutex2var_epi16(a, idx, b) simde_mm_permutex2var_epi16(a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_permutex2var_epi16 (simde__m128i a, simde__mmask8 k, simde__m128i idx, simde__m128i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask_permutex2var_epi16(a, k, idx, b); #else return simde_mm_mask_mov_epi16(a, k, simde_mm_permutex2var_epi16(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_permutex2var_epi16 #define _mm_mask_permutex2var_epi16(a, k, idx, b) simde_mm_mask_permutex2var_epi16(a, k, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask2_permutex2var_epi16 (simde__m128i a, simde__m128i idx, simde__mmask8 k, simde__m128i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask2_permutex2var_epi16(a, idx, k, b); #else return simde_mm_mask_mov_epi16(idx, k, simde_mm_permutex2var_epi16(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask2_permutex2var_epi16 #define _mm_mask2_permutex2var_epi16(a, idx, k, b) simde_mm_mask2_permutex2var_epi16(a, idx, k, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskz_permutex2var_epi16 (simde__mmask8 k, simde__m128i a, simde__m128i idx, simde__m128i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_maskz_permutex2var_epi16(k, a, idx, b); #else return simde_mm_maskz_mov_epi16(k, simde_mm_permutex2var_epi16(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_permutex2var_epi16 #define _mm_maskz_permutex2var_epi16(k, a, idx, b) simde_mm_maskz_permutex2var_epi16(k, a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_permutex2var_epi32 (simde__m128i a, simde__m128i idx, simde__m128i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_permutex2var_epi32(a, idx, b); #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) /* This may not be faster than the portable version */ simde__m128i r; simde_x_permutex2var(&r, &a, &idx, &b, 2, 0); return r; #else simde__m128i_private a_ = simde__m128i_to_private(a), idx_ = simde__m128i_to_private(idx), b_ = simde__m128i_to_private(b), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = ((idx_.i32[i] & 4) ? b_ : a_).i32[idx_.i32[i] & 3]; } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_permutex2var_epi32 #define _mm_permutex2var_epi32(a, idx, b) simde_mm_permutex2var_epi32(a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_permutex2var_epi32 (simde__m128i a, simde__mmask8 k, simde__m128i idx, simde__m128i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask_permutex2var_epi32(a, k, idx, b); #else return simde_mm_mask_mov_epi32(a, k, simde_mm_permutex2var_epi32(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_permutex2var_epi32 #define _mm_mask_permutex2var_epi32(a, k, idx, b) simde_mm_mask_permutex2var_epi32(a, k, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask2_permutex2var_epi32 (simde__m128i a, simde__m128i idx, simde__mmask8 k, simde__m128i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask2_permutex2var_epi32(a, idx, k, b); #else return simde_mm_mask_mov_epi32(idx, k, simde_mm_permutex2var_epi32(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask2_permutex2var_epi32 #define _mm_mask2_permutex2var_epi32(a, idx, k, b) simde_mm_mask2_permutex2var_epi32(a, idx, k, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskz_permutex2var_epi32 (simde__mmask8 k, simde__m128i a, simde__m128i idx, simde__m128i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_maskz_permutex2var_epi32(k, a, idx, b); #else return simde_mm_maskz_mov_epi32(k, simde_mm_permutex2var_epi32(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_permutex2var_epi32 #define _mm_maskz_permutex2var_epi32(k, a, idx, b) simde_mm_maskz_permutex2var_epi32(k, a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_permutex2var_epi64 (simde__m128i a, simde__m128i idx, simde__m128i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_permutex2var_epi64(a, idx, b); #else simde__m128i_private a_ = simde__m128i_to_private(a), idx_ = simde__m128i_to_private(idx), b_ = simde__m128i_to_private(b), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = ((idx_.i64[i] & 2) ? b_ : a_).i64[idx_.i64[i] & 1]; } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_permutex2var_epi64 #define _mm_permutex2var_epi64(a, idx, b) simde_mm_permutex2var_epi64(a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_permutex2var_epi64 (simde__m128i a, simde__mmask8 k, simde__m128i idx, simde__m128i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask_permutex2var_epi64(a, k, idx, b); #else return simde_mm_mask_mov_epi64(a, k, simde_mm_permutex2var_epi64(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_permutex2var_epi64 #define _mm_mask_permutex2var_epi64(a, k, idx, b) simde_mm_mask_permutex2var_epi64(a, k, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask2_permutex2var_epi64 (simde__m128i a, simde__m128i idx, simde__mmask8 k, simde__m128i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask2_permutex2var_epi64(a, idx, k, b); #else return simde_mm_mask_mov_epi64(idx, k, simde_mm_permutex2var_epi64(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask2_permutex2var_epi64 #define _mm_mask2_permutex2var_epi64(a, idx, k, b) simde_mm_mask2_permutex2var_epi64(a, idx, k, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskz_permutex2var_epi64 (simde__mmask8 k, simde__m128i a, simde__m128i idx, simde__m128i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_maskz_permutex2var_epi64(k, a, idx, b); #else return simde_mm_maskz_mov_epi64(k, simde_mm_permutex2var_epi64(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_permutex2var_epi64 #define _mm_maskz_permutex2var_epi64(k, a, idx, b) simde_mm_maskz_permutex2var_epi64(k, a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_permutex2var_epi8 (simde__m128i a, simde__m128i idx, simde__m128i b) { #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_permutex2var_epi8(a, idx, b); #elif defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_cvtepi32_epi8(_mm512_permutex2var_epi32(_mm512_cvtepu8_epi32(a), _mm512_cvtepu8_epi32(idx), _mm512_cvtepu8_epi32(b))); #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) simde__m128i r; simde_x_permutex2var(&r, &a, &idx, &b, 0, 0); return r; #else simde__m128i_private a_ = simde__m128i_to_private(a), idx_ = simde__m128i_to_private(idx), b_ = simde__m128i_to_private(b), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = ((idx_.i8[i] & 0x10) ? b_ : a_).i8[idx_.i8[i] & 0x0F]; } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_permutex2var_epi8 #define _mm_permutex2var_epi8(a, idx, b) simde_mm_permutex2var_epi8(a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_permutex2var_epi8 (simde__m128i a, simde__mmask16 k, simde__m128i idx, simde__m128i b) { #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask_permutex2var_epi8(a, k, idx, b); #else return simde_mm_mask_mov_epi8(a, k, simde_mm_permutex2var_epi8(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_permutex2var_epi8 #define _mm_mask_permutex2var_epi8(a, k, idx, b) simde_mm_mask_permutex2var_epi8(a, k, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask2_permutex2var_epi8 (simde__m128i a, simde__m128i idx, simde__mmask16 k, simde__m128i b) { #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask2_permutex2var_epi8(a, idx, k, b); #else return simde_mm_mask_mov_epi8(idx, k, simde_mm_permutex2var_epi8(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask2_permutex2var_epi8 #define _mm_mask2_permutex2var_epi8(a, idx, k, b) simde_mm_mask2_permutex2var_epi8(a, idx, k, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskz_permutex2var_epi8 (simde__mmask16 k, simde__m128i a, simde__m128i idx, simde__m128i b) { #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_maskz_permutex2var_epi8(k, a, idx, b); #else return simde_mm_maskz_mov_epi8(k, simde_mm_permutex2var_epi8(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_permutex2var_epi8 #define _mm_maskz_permutex2var_epi8(k, a, idx, b) simde_mm_maskz_permutex2var_epi8(k, a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_permutex2var_pd (simde__m128d a, simde__m128i idx, simde__m128d b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_permutex2var_pd(a, idx, b); #else return simde_mm_castsi128_pd(simde_mm_permutex2var_epi64(simde_mm_castpd_si128(a), idx, simde_mm_castpd_si128(b))); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_permutex2var_pd #define _mm_permutex2var_pd(a, idx, b) simde_mm_permutex2var_pd(a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_mask_permutex2var_pd (simde__m128d a, simde__mmask8 k, simde__m128i idx, simde__m128d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask_permutex2var_pd(a, k, idx, b); #else return simde_mm_mask_mov_pd(a, k, simde_mm_permutex2var_pd(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_permutex2var_pd #define _mm_mask_permutex2var_pd(a, k, idx, b) simde_mm_mask_permutex2var_pd(a, k, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_mask2_permutex2var_pd (simde__m128d a, simde__m128i idx, simde__mmask8 k, simde__m128d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask2_permutex2var_pd(a, idx, k, b); #else return simde_mm_mask_mov_pd(simde_mm_castsi128_pd(idx), k, simde_mm_permutex2var_pd(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask2_permutex2var_pd #define _mm_mask2_permutex2var_pd(a, idx, k, b) simde_mm_mask2_permutex2var_pd(a, idx, k, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_maskz_permutex2var_pd (simde__mmask8 k, simde__m128d a, simde__m128i idx, simde__m128d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_maskz_permutex2var_pd(k, a, idx, b); #else return simde_mm_maskz_mov_pd(k, simde_mm_permutex2var_pd(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_permutex2var_pd #define _mm_maskz_permutex2var_pd(k, a, idx, b) simde_mm_maskz_permutex2var_pd(k, a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_permutex2var_ps (simde__m128 a, simde__m128i idx, simde__m128 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_permutex2var_ps(a, idx, b); #else return simde_mm_castsi128_ps(simde_mm_permutex2var_epi32(simde_mm_castps_si128(a), idx, simde_mm_castps_si128(b))); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_permutex2var_ps #define _mm_permutex2var_ps(a, idx, b) simde_mm_permutex2var_ps(a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_mask_permutex2var_ps (simde__m128 a, simde__mmask8 k, simde__m128i idx, simde__m128 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask_permutex2var_ps(a, k, idx, b); #else return simde_mm_mask_mov_ps(a, k, simde_mm_permutex2var_ps(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_permutex2var_ps #define _mm_mask_permutex2var_ps(a, k, idx, b) simde_mm_mask_permutex2var_ps(a, k, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_mask2_permutex2var_ps (simde__m128 a, simde__m128i idx, simde__mmask8 k, simde__m128 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask2_permutex2var_ps(a, idx, k, b); #else return simde_mm_mask_mov_ps(simde_mm_castsi128_ps(idx), k, simde_mm_permutex2var_ps(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask2_permutex2var_ps #define _mm_mask2_permutex2var_ps(a, idx, k, b) simde_mm_mask2_permutex2var_ps(a, idx, k, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_maskz_permutex2var_ps (simde__mmask8 k, simde__m128 a, simde__m128i idx, simde__m128 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_maskz_permutex2var_ps(k, a, idx, b); #else return simde_mm_maskz_mov_ps(k, simde_mm_permutex2var_ps(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_permutex2var_ps #define _mm_maskz_permutex2var_ps(k, a, idx, b) simde_mm_maskz_permutex2var_ps(k, a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_permutex2var_epi16 (simde__m256i a, simde__m256i idx, simde__m256i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_permutex2var_epi16(a, idx, b); #elif defined(SIMDE_X86_AVX2_NATIVE) __m256i hilo, hilo2, hi, lo, idx2, ta, tb, select; const __m256i ones = _mm256_set1_epi16(1); idx2 = _mm256_srli_epi32(idx, 1); ta = _mm256_permutevar8x32_epi32(a, idx2); tb = _mm256_permutevar8x32_epi32(b, idx2); select = _mm256_slli_epi32(idx2, 28); hilo = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), _mm256_castsi256_ps(tb), _mm256_castsi256_ps(select))); idx2 = _mm256_srli_epi32(idx2, 16); ta = _mm256_permutevar8x32_epi32(a, idx2); tb = _mm256_permutevar8x32_epi32(b, idx2); select = _mm256_slli_epi32(idx2, 28); hilo2 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), _mm256_castsi256_ps(tb), _mm256_castsi256_ps(select))); lo = _mm256_blend_epi16(_mm256_slli_epi32(hilo2, 16), hilo, 0x55); hi = _mm256_blend_epi16(hilo2, _mm256_srli_epi32(hilo, 16), 0x55); select = _mm256_cmpeq_epi16(_mm256_and_si256(idx, ones), ones); return _mm256_blendv_epi8(lo, hi, select); #else simde__m256i_private a_ = simde__m256i_to_private(a), idx_ = simde__m256i_to_private(idx), b_ = simde__m256i_to_private(b), r_; #if defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 1, 1); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = ((idx_.i16[i] & 0x10) ? b_ : a_).i16[idx_.i16[i] & 0x0F]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_permutex2var_epi16 #define _mm256_permutex2var_epi16(a, idx, b) simde_mm256_permutex2var_epi16(a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask_permutex2var_epi16 (simde__m256i a, simde__mmask16 k, simde__m256i idx, simde__m256i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask_permutex2var_epi16(a, k, idx, b); #else return simde_mm256_mask_mov_epi16(a, k, simde_mm256_permutex2var_epi16(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_permutex2var_epi16 #define _mm256_mask_permutex2var_epi16(a, k, idx, b) simde_mm256_mask_permutex2var_epi16(a, k, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask2_permutex2var_epi16 (simde__m256i a, simde__m256i idx, simde__mmask16 k, simde__m256i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask2_permutex2var_epi16(a, idx, k, b); #else return simde_mm256_mask_mov_epi16(idx, k, simde_mm256_permutex2var_epi16(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask2_permutex2var_epi16 #define _mm256_mask2_permutex2var_epi16(a, idx, k, b) simde_mm256_mask2_permutex2var_epi16(a, idx, k, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_maskz_permutex2var_epi16 (simde__mmask16 k, simde__m256i a, simde__m256i idx, simde__m256i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_maskz_permutex2var_epi16(k, a, idx, b); #else return simde_mm256_maskz_mov_epi16(k, simde_mm256_permutex2var_epi16(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_permutex2var_epi16 #define _mm256_maskz_permutex2var_epi16(k, a, idx, b) simde_mm256_maskz_permutex2var_epi16(k, a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_permutex2var_epi32 (simde__m256i a, simde__m256i idx, simde__m256i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_permutex2var_epi32(a, idx, b); #elif defined(SIMDE_X86_AVX2_NATIVE) __m256i ta, tb, select; ta = _mm256_permutevar8x32_epi32(a, idx); tb = _mm256_permutevar8x32_epi32(b, idx); select = _mm256_slli_epi32(idx, 28); return _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), _mm256_castsi256_ps(tb), _mm256_castsi256_ps(select))); #else simde__m256i_private a_ = simde__m256i_to_private(a), idx_ = simde__m256i_to_private(idx), b_ = simde__m256i_to_private(b), r_; #if defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 2, 1); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = ((idx_.i32[i] & 8) ? b_ : a_).i32[idx_.i32[i] & 7]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_permutex2var_epi32 #define _mm256_permutex2var_epi32(a, idx, b) simde_mm256_permutex2var_epi32(a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask_permutex2var_epi32 (simde__m256i a, simde__mmask8 k, simde__m256i idx, simde__m256i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask_permutex2var_epi32(a, k, idx, b); #else return simde_mm256_mask_mov_epi32(a, k, simde_mm256_permutex2var_epi32(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_permutex2var_epi32 #define _mm256_mask_permutex2var_epi32(a, k, idx, b) simde_mm256_mask_permutex2var_epi32(a, k, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask2_permutex2var_epi32 (simde__m256i a, simde__m256i idx, simde__mmask8 k, simde__m256i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask2_permutex2var_epi32(a, idx, k, b); #else return simde_mm256_mask_mov_epi32(idx, k, simde_mm256_permutex2var_epi32(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask2_permutex2var_epi32 #define _mm256_mask2_permutex2var_epi32(a, idx, k, b) simde_mm256_mask2_permutex2var_epi32(a, idx, k, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_maskz_permutex2var_epi32 (simde__mmask8 k, simde__m256i a, simde__m256i idx, simde__m256i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_maskz_permutex2var_epi32(k, a, idx, b); #else return simde_mm256_maskz_mov_epi32(k, simde_mm256_permutex2var_epi32(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_permutex2var_epi32 #define _mm256_maskz_permutex2var_epi32(k, a, idx, b) simde_mm256_maskz_permutex2var_epi32(k, a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_permutex2var_epi64 (simde__m256i a, simde__m256i idx, simde__m256i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_permutex2var_epi64(a, idx, b); #else simde__m256i_private a_ = simde__m256i_to_private(a), idx_ = simde__m256i_to_private(idx), b_ = simde__m256i_to_private(b), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = ((idx_.i64[i] & 4) ? b_ : a_).i64[idx_.i64[i] & 3]; } return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_permutex2var_epi64 #define _mm256_permutex2var_epi64(a, idx, b) simde_mm256_permutex2var_epi64(a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask_permutex2var_epi64 (simde__m256i a, simde__mmask8 k, simde__m256i idx, simde__m256i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask_permutex2var_epi64(a, k, idx, b); #else return simde_mm256_mask_mov_epi64(a, k, simde_mm256_permutex2var_epi64(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_permutex2var_epi64 #define _mm256_mask_permutex2var_epi64(a, k, idx, b) simde_mm256_mask_permutex2var_epi64(a, k, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask2_permutex2var_epi64 (simde__m256i a, simde__m256i idx, simde__mmask8 k, simde__m256i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask2_permutex2var_epi64(a, idx, k, b); #else return simde_mm256_mask_mov_epi64(idx, k, simde_mm256_permutex2var_epi64(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask2_permutex2var_epi64 #define _mm256_mask2_permutex2var_epi64(a, idx, k, b) simde_mm256_mask2_permutex2var_epi64(a, idx, k, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_maskz_permutex2var_epi64 (simde__mmask8 k, simde__m256i a, simde__m256i idx, simde__m256i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_maskz_permutex2var_epi64(k, a, idx, b); #else return simde_mm256_maskz_mov_epi64(k, simde_mm256_permutex2var_epi64(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_permutex2var_epi64 #define _mm256_maskz_permutex2var_epi64(k, a, idx, b) simde_mm256_maskz_permutex2var_epi64(k, a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_permutex2var_epi8 (simde__m256i a, simde__m256i idx, simde__m256i b) { #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_permutex2var_epi8(a, idx, b); #elif defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_cvtepi16_epi8(_mm512_permutex2var_epi16(_mm512_cvtepu8_epi16(a), _mm512_cvtepu8_epi16(idx), _mm512_cvtepu8_epi16(b))); #elif defined(SIMDE_X86_AVX2_NATIVE) __m256i t0, t1, index, select0x10, select0x20, a01, b01; const __m256i mask = _mm256_set1_epi8(0x3F); const __m256i a0 = _mm256_permute4x64_epi64(a, (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); const __m256i a1 = _mm256_permute4x64_epi64(a, (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); const __m256i b0 = _mm256_permute4x64_epi64(b, (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); const __m256i b1 = _mm256_permute4x64_epi64(b, (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); index = _mm256_and_si256(idx, mask); t0 = _mm256_shuffle_epi8(a0, index); t1 = _mm256_shuffle_epi8(a1, index); select0x10 = _mm256_slli_epi64(index, 3); a01 = _mm256_blendv_epi8(t0, t1, select0x10); t0 = _mm256_shuffle_epi8(b0, index); t1 = _mm256_shuffle_epi8(b1, index); b01 = _mm256_blendv_epi8(t0, t1, select0x10); select0x20 = _mm256_slli_epi64(index, 2); return _mm256_blendv_epi8(a01, b01, select0x20); #else simde__m256i_private a_ = simde__m256i_to_private(a), idx_ = simde__m256i_to_private(idx), b_ = simde__m256i_to_private(b), r_; #if defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 0, 1); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = ((idx_.i8[i] & 0x20) ? b_ : a_).i8[idx_.i8[i] & 0x1F]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_permutex2var_epi8 #define _mm256_permutex2var_epi8(a, idx, b) simde_mm256_permutex2var_epi8(a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask_permutex2var_epi8 (simde__m256i a, simde__mmask32 k, simde__m256i idx, simde__m256i b) { #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask_permutex2var_epi8(a, k, idx, b); #else return simde_mm256_mask_mov_epi8(a, k, simde_mm256_permutex2var_epi8(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_permutex2var_epi8 #define _mm256_mask_permutex2var_epi8(a, k, idx, b) simde_mm256_mask_permutex2var_epi8(a, k, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask2_permutex2var_epi8 (simde__m256i a, simde__m256i idx, simde__mmask32 k, simde__m256i b) { #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask2_permutex2var_epi8(a, idx, k, b); #else return simde_mm256_mask_mov_epi8(idx, k, simde_mm256_permutex2var_epi8(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask2_permutex2var_epi8 #define _mm256_mask2_permutex2var_epi8(a, idx, k, b) simde_mm256_mask2_permutex2var_epi8(a, idx, k, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_maskz_permutex2var_epi8 (simde__mmask32 k, simde__m256i a, simde__m256i idx, simde__m256i b) { #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_maskz_permutex2var_epi8(k, a, idx, b); #else return simde_mm256_maskz_mov_epi8(k, simde_mm256_permutex2var_epi8(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_permutex2var_epi8 #define _mm256_maskz_permutex2var_epi8(k, a, idx, b) simde_mm256_maskz_permutex2var_epi8(k, a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_permutex2var_pd (simde__m256d a, simde__m256i idx, simde__m256d b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_permutex2var_pd(a, idx, b); #else return simde_mm256_castsi256_pd(simde_mm256_permutex2var_epi64(simde_mm256_castpd_si256(a), idx, simde_mm256_castpd_si256(b))); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_permutex2var_pd #define _mm256_permutex2var_pd(a, idx, b) simde_mm256_permutex2var_pd(a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_mask_permutex2var_pd (simde__m256d a, simde__mmask8 k, simde__m256i idx, simde__m256d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask_permutex2var_pd(a, k, idx, b); #else return simde_mm256_mask_mov_pd(a, k, simde_mm256_permutex2var_pd(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_permutex2var_pd #define _mm256_mask_permutex2var_pd(a, k, idx, b) simde_mm256_mask_permutex2var_pd(a, k, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_mask2_permutex2var_pd (simde__m256d a, simde__m256i idx, simde__mmask8 k, simde__m256d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask2_permutex2var_pd(a, idx, k, b); #else return simde_mm256_mask_mov_pd(simde_mm256_castsi256_pd(idx), k, simde_mm256_permutex2var_pd(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask2_permutex2var_pd #define _mm256_mask2_permutex2var_pd(a, idx, k, b) simde_mm256_mask2_permutex2var_pd(a, idx, k, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_maskz_permutex2var_pd (simde__mmask8 k, simde__m256d a, simde__m256i idx, simde__m256d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_maskz_permutex2var_pd(k, a, idx, b); #else return simde_mm256_maskz_mov_pd(k, simde_mm256_permutex2var_pd(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_permutex2var_pd #define _mm256_maskz_permutex2var_pd(k, a, idx, b) simde_mm256_maskz_permutex2var_pd(k, a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_permutex2var_ps (simde__m256 a, simde__m256i idx, simde__m256 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_permutex2var_ps(a, idx, b); #else return simde_mm256_castsi256_ps(simde_mm256_permutex2var_epi32(simde_mm256_castps_si256(a), idx, simde_mm256_castps_si256(b))); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_permutex2var_ps #define _mm256_permutex2var_ps(a, idx, b) simde_mm256_permutex2var_ps(a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_mask_permutex2var_ps (simde__m256 a, simde__mmask8 k, simde__m256i idx, simde__m256 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask_permutex2var_ps(a, k, idx, b); #else return simde_mm256_mask_mov_ps(a, k, simde_mm256_permutex2var_ps(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_permutex2var_ps #define _mm256_mask_permutex2var_ps(a, k, idx, b) simde_mm256_mask_permutex2var_ps(a, k, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_mask2_permutex2var_ps (simde__m256 a, simde__m256i idx, simde__mmask8 k, simde__m256 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask2_permutex2var_ps(a, idx, k, b); #else return simde_mm256_mask_mov_ps(simde_mm256_castsi256_ps(idx), k, simde_mm256_permutex2var_ps(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask2_permutex2var_ps #define _mm256_mask2_permutex2var_ps(a, idx, k, b) simde_mm256_mask2_permutex2var_ps(a, idx, k, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_maskz_permutex2var_ps (simde__mmask8 k, simde__m256 a, simde__m256i idx, simde__m256 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_maskz_permutex2var_ps(k, a, idx, b); #else return simde_mm256_maskz_mov_ps(k, simde_mm256_permutex2var_ps(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_permutex2var_ps #define _mm256_maskz_permutex2var_ps(k, a, idx, b) simde_mm256_maskz_permutex2var_ps(k, a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_permutex2var_epi16 (simde__m512i a, simde__m512i idx, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_permutex2var_epi16(a, idx, b); #else simde__m512i_private a_ = simde__m512i_to_private(a), idx_ = simde__m512i_to_private(idx), b_ = simde__m512i_to_private(b), r_; #if defined(SIMDE_X86_AVX2_NATIVE) __m256i hilo, hilo1, hilo2, hi, lo, idx1, idx2, ta, tb, select; const __m256i ones = _mm256_set1_epi16(1); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m256i_private) / sizeof(r_.m256i_private[0])) ; i++) { idx1 = idx_.m256i[i]; idx2 = _mm256_srli_epi32(idx1, 1); select = _mm256_slli_epi32(idx2, 27); ta = _mm256_permutevar8x32_epi32(a_.m256i[0], idx2); tb = _mm256_permutevar8x32_epi32(b_.m256i[0], idx2); hilo = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), _mm256_castsi256_ps(tb), _mm256_castsi256_ps(select))); ta = _mm256_permutevar8x32_epi32(a_.m256i[1], idx2); tb = _mm256_permutevar8x32_epi32(b_.m256i[1], idx2); hilo1 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), _mm256_castsi256_ps(tb), _mm256_castsi256_ps(select))); select = _mm256_add_epi32(select, select); hilo1 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(hilo), _mm256_castsi256_ps(hilo1), _mm256_castsi256_ps(select))); idx2 = _mm256_srli_epi32(idx2, 16); select = _mm256_slli_epi32(idx2, 27); ta = _mm256_permutevar8x32_epi32(a_.m256i[0], idx2); tb = _mm256_permutevar8x32_epi32(b_.m256i[0], idx2); hilo = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), _mm256_castsi256_ps(tb), _mm256_castsi256_ps(select))); ta = _mm256_permutevar8x32_epi32(a_.m256i[1], idx2); tb = _mm256_permutevar8x32_epi32(b_.m256i[1], idx2); hilo2 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), _mm256_castsi256_ps(tb), _mm256_castsi256_ps(select))); select = _mm256_add_epi32(select, select); hilo2 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(hilo), _mm256_castsi256_ps(hilo2), _mm256_castsi256_ps(select))); lo = _mm256_blend_epi16(_mm256_slli_epi32(hilo2, 16), hilo1, 0x55); hi = _mm256_blend_epi16(hilo2, _mm256_srli_epi32(hilo1, 16), 0x55); select = _mm256_cmpeq_epi16(_mm256_and_si256(idx1, ones), ones); r_.m256i[i] = _mm256_blendv_epi8(lo, hi, select); } #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 1, 2); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = ((idx_.i16[i] & 0x20) ? b_ : a_).i16[idx_.i16[i] & 0x1F]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm512_permutex2var_epi16 #define _mm512_permutex2var_epi16(a, idx, b) simde_mm512_permutex2var_epi16(a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_permutex2var_epi16 (simde__m512i a, simde__mmask32 k, simde__m512i idx, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_permutex2var_epi16(a, k, idx, b); #else return simde_mm512_mask_mov_epi16(a, k, simde_mm512_permutex2var_epi16(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_permutex2var_epi16 #define _mm512_mask_permutex2var_epi16(a, k, idx, b) simde_mm512_mask_permutex2var_epi16(a, k, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask2_permutex2var_epi16 (simde__m512i a, simde__m512i idx, simde__mmask32 k, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask2_permutex2var_epi16(a, idx, k, b); #else return simde_mm512_mask_mov_epi16(idx, k, simde_mm512_permutex2var_epi16(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask2_permutex2var_epi16 #define _mm512_mask2_permutex2var_epi16(a, idx, k, b) simde_mm512_mask2_permutex2var_epi16(a, idx, k, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_permutex2var_epi16 (simde__mmask32 k, simde__m512i a, simde__m512i idx, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_permutex2var_epi16(k, a, idx, b); #else return simde_mm512_maskz_mov_epi16(k, simde_mm512_permutex2var_epi16(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_permutex2var_epi16 #define _mm512_maskz_permutex2var_epi16(k, a, idx, b) simde_mm512_maskz_permutex2var_epi16(k, a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_permutex2var_epi32 (simde__m512i a, simde__m512i idx, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_permutex2var_epi32(a, idx, b); #else simde__m512i_private a_ = simde__m512i_to_private(a), idx_ = simde__m512i_to_private(idx), b_ = simde__m512i_to_private(b), r_; #if defined(SIMDE_X86_AVX2_NATIVE) __m256i index, t0, t1, a01, b01, select; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m256i_private) / sizeof(r_.m256i_private[0])) ; i++) { index = idx_.m256i[i]; t0 = _mm256_permutevar8x32_epi32(a_.m256i[0], index); t1 = _mm256_permutevar8x32_epi32(a_.m256i[1], index); select = _mm256_slli_epi32(index, 28); a01 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(t0), _mm256_castsi256_ps(t1), _mm256_castsi256_ps(select))); t0 = _mm256_permutevar8x32_epi32(b_.m256i[0], index); t1 = _mm256_permutevar8x32_epi32(b_.m256i[1], index); b01 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(t0), _mm256_castsi256_ps(t1), _mm256_castsi256_ps(select))); select = _mm256_slli_epi32(index, 27); r_.m256i[i] = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(a01), _mm256_castsi256_ps(b01), _mm256_castsi256_ps(select))); } #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 2, 2); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = ((idx_.i32[i] & 0x10) ? b_ : a_).i32[idx_.i32[i] & 0x0F]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_permutex2var_epi32 #define _mm512_permutex2var_epi32(a, idx, b) simde_mm512_permutex2var_epi32(a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_permutex2var_epi32 (simde__m512i a, simde__mmask16 k, simde__m512i idx, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_permutex2var_epi32(a, k, idx, b); #else return simde_mm512_mask_mov_epi32(a, k, simde_mm512_permutex2var_epi32(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_permutex2var_epi32 #define _mm512_mask_permutex2var_epi32(a, k, idx, b) simde_mm512_mask_permutex2var_epi32(a, k, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask2_permutex2var_epi32 (simde__m512i a, simde__m512i idx, simde__mmask16 k, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask2_permutex2var_epi32(a, idx, k, b); #else return simde_mm512_mask_mov_epi32(idx, k, simde_mm512_permutex2var_epi32(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask2_permutex2var_epi32 #define _mm512_mask2_permutex2var_epi32(a, idx, k, b) simde_mm512_mask2_permutex2var_epi32(a, idx, k, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_permutex2var_epi32 (simde__mmask16 k, simde__m512i a, simde__m512i idx, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_permutex2var_epi32(k, a, idx, b); #else return simde_mm512_maskz_mov_epi32(k, simde_mm512_permutex2var_epi32(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_permutex2var_epi32 #define _mm512_maskz_permutex2var_epi32(k, a, idx, b) simde_mm512_maskz_permutex2var_epi32(k, a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_permutex2var_epi64 (simde__m512i a, simde__m512i idx, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_permutex2var_epi64(a, idx, b); #else simde__m512i_private a_ = simde__m512i_to_private(a), idx_ = simde__m512i_to_private(idx), b_ = simde__m512i_to_private(b), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = ((idx_.i64[i] & 8) ? b_ : a_).i64[idx_.i64[i] & 7]; } return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_permutex2var_epi64 #define _mm512_permutex2var_epi64(a, idx, b) simde_mm512_permutex2var_epi64(a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_permutex2var_epi64 (simde__m512i a, simde__mmask8 k, simde__m512i idx, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_permutex2var_epi64(a, k, idx, b); #else return simde_mm512_mask_mov_epi64(a, k, simde_mm512_permutex2var_epi64(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_permutex2var_epi64 #define _mm512_mask_permutex2var_epi64(a, k, idx, b) simde_mm512_mask_permutex2var_epi64(a, k, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask2_permutex2var_epi64 (simde__m512i a, simde__m512i idx, simde__mmask8 k, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask2_permutex2var_epi64(a, idx, k, b); #else return simde_mm512_mask_mov_epi64(idx, k, simde_mm512_permutex2var_epi64(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask2_permutex2var_epi64 #define _mm512_mask2_permutex2var_epi64(a, idx, k, b) simde_mm512_mask2_permutex2var_epi64(a, idx, k, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_permutex2var_epi64 (simde__mmask8 k, simde__m512i a, simde__m512i idx, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_permutex2var_epi64(k, a, idx, b); #else return simde_mm512_maskz_mov_epi64(k, simde_mm512_permutex2var_epi64(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_permutex2var_epi64 #define _mm512_maskz_permutex2var_epi64(k, a, idx, b) simde_mm512_maskz_permutex2var_epi64(k, a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_permutex2var_epi8 (simde__m512i a, simde__m512i idx, simde__m512i b) { #if defined(SIMDE_X86_AVX512VBMI_NATIVE) return _mm512_permutex2var_epi8(a, idx, b); #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) __m512i hilo, hi, lo, hi2, lo2, idx2; const __m512i ones = _mm512_set1_epi8(1); const __m512i low_bytes = _mm512_set1_epi16(0x00FF); idx2 = _mm512_srli_epi16(idx, 1); hilo = _mm512_permutex2var_epi16(a, idx2, b); __mmask64 mask = _mm512_test_epi8_mask(idx, ones); lo = _mm512_and_si512(hilo, low_bytes); hi = _mm512_srli_epi16(hilo, 8); idx2 = _mm512_srli_epi16(idx, 9); hilo = _mm512_permutex2var_epi16(a, idx2, b); lo2 = _mm512_slli_epi16(hilo, 8); hi2 = _mm512_andnot_si512(low_bytes, hilo); lo = _mm512_or_si512(lo, lo2); hi = _mm512_or_si512(hi, hi2); return _mm512_mask_blend_epi8(mask, lo, hi); #else simde__m512i_private a_ = simde__m512i_to_private(a), idx_ = simde__m512i_to_private(idx), b_ = simde__m512i_to_private(b), r_; #if defined(SIMDE_X86_AVX2_NATIVE) __m256i t0, t1, index, select0x10, select0x20, select0x40, t01, t23, a0123, b0123; const __m256i mask = _mm256_set1_epi8(0x7F); const __m256i a0 = _mm256_permute4x64_epi64(a_.m256i[0], (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); const __m256i a1 = _mm256_permute4x64_epi64(a_.m256i[0], (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); const __m256i a2 = _mm256_permute4x64_epi64(a_.m256i[1], (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); const __m256i a3 = _mm256_permute4x64_epi64(a_.m256i[1], (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); const __m256i b0 = _mm256_permute4x64_epi64(b_.m256i[0], (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); const __m256i b1 = _mm256_permute4x64_epi64(b_.m256i[0], (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); const __m256i b2 = _mm256_permute4x64_epi64(b_.m256i[1], (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); const __m256i b3 = _mm256_permute4x64_epi64(b_.m256i[1], (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m256i_private) / sizeof(r_.m256i_private[0])) ; i++) { index = _mm256_and_si256(idx_.m256i[i], mask); t0 = _mm256_shuffle_epi8(a0, index); t1 = _mm256_shuffle_epi8(a1, index); select0x10 = _mm256_slli_epi64(index, 3); t01 = _mm256_blendv_epi8(t0, t1, select0x10); t0 = _mm256_shuffle_epi8(a2, index); t1 = _mm256_shuffle_epi8(a3, index); t23 = _mm256_blendv_epi8(t0, t1, select0x10); select0x20 = _mm256_slli_epi64(index, 2); a0123 = _mm256_blendv_epi8(t01, t23, select0x20); t0 = _mm256_shuffle_epi8(b0, index); t1 = _mm256_shuffle_epi8(b1, index); t01 = _mm256_blendv_epi8(t0, t1, select0x10); t0 = _mm256_shuffle_epi8(b2, index); t1 = _mm256_shuffle_epi8(b3, index); t23 = _mm256_blendv_epi8(t0, t1, select0x10); b0123 = _mm256_blendv_epi8(t01, t23, select0x20); select0x40 = _mm256_slli_epi64(index, 1); r_.m256i[i] = _mm256_blendv_epi8(a0123, b0123, select0x40); } #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 0, 2); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = ((idx_.i8[i] & 0x40) ? b_ : a_).i8[idx_.i8[i] & 0x3F]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) #undef _mm512_permutex2var_epi8 #define _mm512_permutex2var_epi8(a, idx, b) simde_mm512_permutex2var_epi8(a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_permutex2var_epi8 (simde__m512i a, simde__mmask64 k, simde__m512i idx, simde__m512i b) { #if defined(SIMDE_X86_AVX512VBMI_NATIVE) return _mm512_mask_permutex2var_epi8(a, k, idx, b); #else return simde_mm512_mask_mov_epi8(a, k, simde_mm512_permutex2var_epi8(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_permutex2var_epi8 #define _mm512_mask_permutex2var_epi8(a, k, idx, b) simde_mm512_mask_permutex2var_epi8(a, k, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask2_permutex2var_epi8 (simde__m512i a, simde__m512i idx, simde__mmask64 k, simde__m512i b) { #if defined(SIMDE_X86_AVX512VBMI_NATIVE) return _mm512_mask2_permutex2var_epi8(a, idx, k, b); #else return simde_mm512_mask_mov_epi8(idx, k, simde_mm512_permutex2var_epi8(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) #undef _mm512_mask2_permutex2var_epi8 #define _mm512_mask2_permutex2var_epi8(a, idx, k, b) simde_mm512_mask2_permutex2var_epi8(a, idx, k, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_permutex2var_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i idx, simde__m512i b) { #if defined(SIMDE_X86_AVX512VBMI_NATIVE) return _mm512_maskz_permutex2var_epi8(k, a, idx, b); #else return simde_mm512_maskz_mov_epi8(k, simde_mm512_permutex2var_epi8(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_permutex2var_epi8 #define _mm512_maskz_permutex2var_epi8(k, a, idx, b) simde_mm512_maskz_permutex2var_epi8(k, a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_permutex2var_pd (simde__m512d a, simde__m512i idx, simde__m512d b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_permutex2var_pd(a, idx, b); #else return simde_mm512_castsi512_pd(simde_mm512_permutex2var_epi64(simde_mm512_castpd_si512(a), idx, simde_mm512_castpd_si512(b))); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_permutex2var_pd #define _mm512_permutex2var_pd(a, idx, b) simde_mm512_permutex2var_pd(a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_permutex2var_pd (simde__m512d a, simde__mmask8 k, simde__m512i idx, simde__m512d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_permutex2var_pd(a, k, idx, b); #else return simde_mm512_mask_mov_pd(a, k, simde_mm512_permutex2var_pd(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_permutex2var_pd #define _mm512_mask_permutex2var_pd(a, k, idx, b) simde_mm512_mask_permutex2var_pd(a, k, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask2_permutex2var_pd (simde__m512d a, simde__m512i idx, simde__mmask8 k, simde__m512d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask2_permutex2var_pd(a, idx, k, b); #else return simde_mm512_mask_mov_pd(simde_mm512_castsi512_pd(idx), k, simde_mm512_permutex2var_pd(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask2_permutex2var_pd #define _mm512_mask2_permutex2var_pd(a, idx, k, b) simde_mm512_mask2_permutex2var_pd(a, idx, k, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_maskz_permutex2var_pd (simde__mmask8 k, simde__m512d a, simde__m512i idx, simde__m512d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_permutex2var_pd(k, a, idx, b); #else return simde_mm512_maskz_mov_pd(k, simde_mm512_permutex2var_pd(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_permutex2var_pd #define _mm512_maskz_permutex2var_pd(k, a, idx, b) simde_mm512_maskz_permutex2var_pd(k, a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_permutex2var_ps (simde__m512 a, simde__m512i idx, simde__m512 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_permutex2var_ps(a, idx, b); #else return simde_mm512_castsi512_ps(simde_mm512_permutex2var_epi32(simde_mm512_castps_si512(a), idx, simde_mm512_castps_si512(b))); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_permutex2var_ps #define _mm512_permutex2var_ps(a, idx, b) simde_mm512_permutex2var_ps(a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_permutex2var_ps (simde__m512 a, simde__mmask16 k, simde__m512i idx, simde__m512 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_permutex2var_ps(a, k, idx, b); #else return simde_mm512_mask_mov_ps(a, k, simde_mm512_permutex2var_ps(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_permutex2var_ps #define _mm512_mask_permutex2var_ps(a, k, idx, b) simde_mm512_mask_permutex2var_ps(a, k, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask2_permutex2var_ps (simde__m512 a, simde__m512i idx, simde__mmask16 k, simde__m512 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask2_permutex2var_ps(a, idx, k, b); #else return simde_mm512_mask_mov_ps(simde_mm512_castsi512_ps(idx), k, simde_mm512_permutex2var_ps(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask2_permutex2var_ps #define _mm512_mask2_permutex2var_ps(a, idx, k, b) simde_mm512_mask2_permutex2var_ps(a, idx, k, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_maskz_permutex2var_ps (simde__mmask16 k, simde__m512 a, simde__m512i idx, simde__m512 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_permutex2var_ps(k, a, idx, b); #else return simde_mm512_maskz_mov_ps(k, simde_mm512_permutex2var_ps(a, idx, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_permutex2var_ps #define _mm512_maskz_permutex2var_ps(k, a, idx, b) simde_mm512_maskz_permutex2var_ps(k, a, idx, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_PERMUTEX2VAR_H) */ simde-0.7.2/simde/x86/avx512/permutexvar.h000066400000000000000000001412171400333146700201370ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_X86_AVX512_PERMUTEXVAR_H) #define SIMDE_X86_AVX512_PERMUTEXVAR_H #include "types.h" #include "and.h" #include "andnot.h" #include "blend.h" #include "mov.h" #include "or.h" #include "set1.h" #include "slli.h" #include "srli.h" #include "test.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_permutexvar_epi16 (simde__m128i idx, simde__m128i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_permutexvar_epi16(idx, a); #elif defined(SIMDE_X86_SSSE3_NATIVE) simde__m128i mask16 = simde_mm_set1_epi16(0x0007); simde__m128i shift16 = simde_mm_set1_epi16(0x0202); simde__m128i byte_index16 = simde_mm_set1_epi16(0x0100); simde__m128i index16 = simde_mm_and_si128(idx, mask16); index16 = simde_mm_mullo_epi16(index16, shift16); index16 = simde_mm_add_epi16(index16, byte_index16); return simde_mm_shuffle_epi8(a, index16); #else simde__m128i_private idx_ = simde__m128i_to_private(idx), a_ = simde__m128i_to_private(a), r_; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint16x8_t mask16 = vdupq_n_u16(0x0007); uint16x8_t byte_index16 = vdupq_n_u16(0x0100); uint16x8_t index16 = vandq_u16(idx_.neon_u16, mask16); index16 = vmulq_n_u16(index16, 0x0202); index16 = vaddq_u16(index16, byte_index16); r_.neon_u8 = vqtbl1q_u8(a_.neon_u8, vreinterpretq_u8_u16(index16)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) index16; index16 = vec_and(idx_.altivec_u16, vec_splat_u16(7)); index16 = vec_mladd(index16, vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0202)), vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0100))); r_.altivec_u8 = vec_perm(a_.altivec_u8, a_.altivec_u8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index16)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) const v128_t mask16 = wasm_i16x8_splat(0x0007); const v128_t shift16 = wasm_i16x8_splat(0x0202); const v128_t byte_index16 = wasm_i16x8_splat(0x0100); v128_t index16 = wasm_v128_and(idx_.wasm_v128, mask16); index16 = wasm_i16x8_mul(index16, shift16); index16 = wasm_i16x8_add(index16, byte_index16); r_.wasm_v128 = wasm_v8x16_swizzle(a_.wasm_v128, index16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[idx_.i16[i] & 0x07]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_permutexvar_epi16 #define _mm_permutexvar_epi16(idx, a) simde_mm_permutexvar_epi16(idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_permutexvar_epi16 (simde__m128i src, simde__mmask8 k, simde__m128i idx, simde__m128i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask_permutexvar_epi16(src, k, idx, a); #else return simde_mm_mask_mov_epi16(src, k, simde_mm_permutexvar_epi16(idx, a)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_permutexvar_epi16 #define _mm_mask_permutexvar_epi16(src, k, idx, a) simde_mm_mask_permutexvar_epi16(src, k, idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskz_permutexvar_epi16 (simde__mmask8 k, simde__m128i idx, simde__m128i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_maskz_permutexvar_epi16(k, idx, a); #else return simde_mm_maskz_mov_epi16(k, simde_mm_permutexvar_epi16(idx, a)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_permutexvar_epi16 #define _mm_maskz_permutexvar_epi16(k, idx, a) simde_mm_maskz_permutexvar_epi16(k, idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_permutexvar_epi8 (simde__m128i idx, simde__m128i a) { #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_permutexvar_epi8(idx, a); #elif defined(SIMDE_X86_SSSE3_NATIVE) simde__m128i mask = simde_mm_set1_epi8(0x0F); simde__m128i index = simde_mm_and_si128(idx, mask); return simde_mm_shuffle_epi8(a, index); #else simde__m128i_private idx_ = simde__m128i_to_private(idx), a_ = simde__m128i_to_private(a), r_; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint8x16_t mask = vdupq_n_u8(0x0F); uint8x16_t index = vandq_u8(idx_.neon_u8, mask); r_.neon_u8 = vqtbl1q_u8(a_.neon_u8, index); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_u8 = vec_perm(a_.altivec_u8, a_.altivec_u8, idx_.altivec_u8); #elif defined(SIMDE_WASM_SIMD128_NATIVE) const v128_t mask = wasm_i8x16_splat(0x0F); v128_t index = wasm_v128_and(idx_.wasm_v128, mask); r_.wasm_v128 = wasm_v8x16_swizzle(a_.wasm_v128, index); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a_.i8[idx_.i8[i] & 0x0F]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_permutexvar_epi8 #define _mm_permutexvar_epi8(idx, a) simde_mm_permutexvar_epi8(idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_permutexvar_epi8 (simde__m128i src, simde__mmask16 k, simde__m128i idx, simde__m128i a) { #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask_permutexvar_epi8(src, k, idx, a); #else return simde_mm_mask_mov_epi8(src, k, simde_mm_permutexvar_epi8(idx, a)); #endif } #if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_permutexvar_epi8 #define _mm_mask_permutexvar_epi8(src, k, idx, a) simde_mm_mask_permutexvar_epi8(src, k, idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskz_permutexvar_epi8 (simde__mmask16 k, simde__m128i idx, simde__m128i a) { #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_maskz_permutexvar_epi8(k, idx, a); #else return simde_mm_maskz_mov_epi8(k, simde_mm_permutexvar_epi8(idx, a)); #endif } #if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_permutexvar_epi8 #define _mm_maskz_permutexvar_epi8(k, idx, a) simde_mm_maskz_permutexvar_epi8(k, idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_permutexvar_epi16 (simde__m256i idx, simde__m256i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_permutexvar_epi16(idx, a); #elif defined(SIMDE_X86_AVX2_NATIVE) simde__m256i mask16 = simde_mm256_set1_epi16(0x001F); simde__m256i shift16 = simde_mm256_set1_epi16(0x0202); simde__m256i byte_index16 = simde_mm256_set1_epi16(0x0100); simde__m256i index16 = simde_mm256_and_si256(idx, mask16); index16 = simde_mm256_mullo_epi16(index16, shift16); simde__m256i lo = simde_mm256_permute4x64_epi64(a, (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); simde__m256i hi = simde_mm256_permute4x64_epi64(a, (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); simde__m256i select = simde_mm256_slli_epi64(index16, 3); index16 = simde_mm256_add_epi16(index16, byte_index16); lo = simde_mm256_shuffle_epi8(lo, index16); hi = simde_mm256_shuffle_epi8(hi, index16); return simde_mm256_blendv_epi8(lo, hi, select); #else simde__m256i_private idx_ = simde__m256i_to_private(idx), a_ = simde__m256i_to_private(a), r_; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint8x16x2_t table = { { a_.m128i_private[0].neon_u8, a_.m128i_private[1].neon_u8 } }; uint16x8_t mask16 = vdupq_n_u16(0x000F); uint16x8_t byte_index16 = vdupq_n_u16(0x0100); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { uint16x8_t index16 = vandq_u16(idx_.m128i_private[i].neon_u16, mask16); index16 = vmulq_n_u16(index16, 0x0202); index16 = vaddq_u16(index16, byte_index16); r_.m128i_private[i].neon_u8 = vqtbl2q_u8(table, vreinterpretq_u8_u16(index16)); } #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) index16, mask16, shift16, byte_index16; mask16 = vec_splat_u16(0x000F); shift16 = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0202)); byte_index16 = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0100)); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { index16 = vec_and(idx_.m128i_private[i].altivec_u16, mask16); index16 = vec_mladd(index16, shift16, byte_index16); r_.m128i_private[i].altivec_u8 = vec_perm(a_.m128i_private[0].altivec_u8, a_.m128i_private[1].altivec_u8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index16)); } #elif defined(SIMDE_WASM_SIMD128_NATIVE) v128_t index, index16, r, t; const v128_t mask16 = wasm_i16x8_splat(0x000F); const v128_t shift16 = wasm_i16x8_splat(0x0202); const v128_t byte_index16 = wasm_i16x8_splat(0x0100); const v128_t sixteen = wasm_i8x16_splat(16); const v128_t a0 = a_.m128i_private[0].wasm_v128; const v128_t a1 = a_.m128i_private[1].wasm_v128; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { index16 = wasm_v128_and(idx_.m128i_private[i].wasm_v128, mask16); index16 = wasm_i16x8_mul(index16, shift16); index = wasm_i16x8_add(index16, byte_index16); r = wasm_v8x16_swizzle(a0, index); index = wasm_i8x16_sub(index, sixteen); t = wasm_v8x16_swizzle(a1, index); r_.m128i_private[i].wasm_v128 = wasm_v128_or(r, t); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[idx_.i16[i] & 0x0F]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_permutexvar_epi16 #define _mm256_permutexvar_epi16(idx, a) simde_mm256_permutexvar_epi16(idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask_permutexvar_epi16 (simde__m256i src, simde__mmask16 k, simde__m256i idx, simde__m256i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask_permutexvar_epi16(src, k, idx, a); #else return simde_mm256_mask_mov_epi16(src, k, simde_mm256_permutexvar_epi16(idx, a)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_permutexvar_epi16 #define _mm256_mask_permutexvar_epi16(src, k, idx, a) simde_mm256_mask_permutexvar_epi16(src, k, idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_maskz_permutexvar_epi16 (simde__mmask16 k, simde__m256i idx, simde__m256i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_maskz_permutexvar_epi16(k, idx, a); #else return simde_mm256_maskz_mov_epi16(k, simde_mm256_permutexvar_epi16(idx, a)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_permutexvar_epi16 #define _mm256_maskz_permutexvar_epi16(k, idx, a) simde_mm256_maskz_permutexvar_epi16(k, idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_permutexvar_epi32 (simde__m256i idx, simde__m256i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_permutexvar_epi32(idx, a); #elif defined(SIMDE_X86_AVX2_NATIVE) return simde_mm256_permutevar8x32_epi32(a, idx); #else simde__m256i_private idx_ = simde__m256i_to_private(idx), a_ = simde__m256i_to_private(a), r_; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint8x16x2_t table = { { a_.m128i_private[0].neon_u8, a_.m128i_private[1].neon_u8 } }; uint32x4_t mask32 = vdupq_n_u32(0x00000007); uint32x4_t byte_index32 = vdupq_n_u32(0x03020100); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { uint32x4_t index32 = vandq_u32(idx_.m128i_private[i].neon_u32, mask32); index32 = vmulq_n_u32(index32, 0x04040404); index32 = vaddq_u32(index32, byte_index32); r_.m128i_private[i].neon_u8 = vqtbl2q_u8(table, vreinterpretq_u8_u32(index32)); } #else #if !defined(__INTEL_COMPILER) SIMDE_VECTORIZE #endif for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[idx_.i32[i] & 0x07]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_permutexvar_epi32 #define _mm256_permutexvar_epi32(idx, a) simde_mm256_permutexvar_epi32(idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask_permutexvar_epi32 (simde__m256i src, simde__mmask8 k, simde__m256i idx, simde__m256i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask_permutexvar_epi32(src, k, idx, a); #else return simde_mm256_mask_mov_epi32(src, k, simde_mm256_permutexvar_epi32(idx, a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_permutexvar_epi32 #define _mm256_mask_permutexvar_epi32(src, k, idx, a) simde_mm256_mask_permutexvar_epi32(src, k, idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_maskz_permutexvar_epi32 (simde__mmask8 k, simde__m256i idx, simde__m256i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_maskz_permutexvar_epi32(k, idx, a); #else return simde_mm256_maskz_mov_epi32(k, simde_mm256_permutexvar_epi32(idx, a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_permutexvar_epi32 #define _mm256_maskz_permutexvar_epi32(k, idx, a) simde_mm256_maskz_permutexvar_epi32(k, idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_permutexvar_epi64 (simde__m256i idx, simde__m256i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_permutexvar_epi64(idx, a); #else simde__m256i_private idx_ = simde__m256i_to_private(idx), a_ = simde__m256i_to_private(a), r_; #if !defined(__INTEL_COMPILER) SIMDE_VECTORIZE #endif for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[idx_.i64[i] & 3]; } return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_permutexvar_epi64 #define _mm256_permutexvar_epi64(idx, a) simde_mm256_permutexvar_epi64(idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask_permutexvar_epi64 (simde__m256i src, simde__mmask8 k, simde__m256i idx, simde__m256i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask_permutexvar_epi64(src, k, idx, a); #else return simde_mm256_mask_mov_epi64(src, k, simde_mm256_permutexvar_epi64(idx, a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_permutexvar_epi64 #define _mm256_mask_permutexvar_epi64(src, k, idx, a) simde_mm256_mask_permutexvar_epi64(src, k, idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_maskz_permutexvar_epi64 (simde__mmask8 k, simde__m256i idx, simde__m256i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_maskz_permutexvar_epi64(k, idx, a); #else return simde_mm256_maskz_mov_epi64(k, simde_mm256_permutexvar_epi64(idx, a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_permutexvar_epi64 #define _mm256_maskz_permutexvar_epi64(k, idx, a) simde_mm256_maskz_permutexvar_epi64(k, idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_permutexvar_epi8 (simde__m256i idx, simde__m256i a) { #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_permutexvar_epi8(idx, a); #elif defined(SIMDE_X86_AVX2_NATIVE) simde__m256i mask = simde_mm256_set1_epi8(0x0F); simde__m256i lo = simde_mm256_permute4x64_epi64(a, (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); simde__m256i hi = simde_mm256_permute4x64_epi64(a, (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); simde__m256i index = simde_mm256_and_si256(idx, mask); simde__m256i select = simde_mm256_slli_epi64(idx, 3); lo = simde_mm256_shuffle_epi8(lo, index); hi = simde_mm256_shuffle_epi8(hi, index); return simde_mm256_blendv_epi8(lo, hi, select); #else simde__m256i_private idx_ = simde__m256i_to_private(idx), a_ = simde__m256i_to_private(a), r_; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint8x16x2_t table = { { a_.m128i_private[0].neon_u8, a_.m128i_private[1].neon_u8 } }; uint8x16_t mask = vdupq_n_u8(0x1F); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { r_.m128i_private[i].neon_u8 = vqtbl2q_u8(table, vandq_u8(idx_.m128i_private[i].neon_u8, mask)); } #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { r_.m128i_private[i].altivec_u8 = vec_perm(a_.m128i_private[0].altivec_u8, a_.m128i_private[1].altivec_u8, idx_.m128i_private[i].altivec_u8); } #elif defined(SIMDE_WASM_SIMD128_NATIVE) v128_t index, r, t; const v128_t mask = wasm_i8x16_splat(0x1F); const v128_t sixteen = wasm_i8x16_splat(16); const v128_t a0 = a_.m128i_private[0].wasm_v128; const v128_t a1 = a_.m128i_private[1].wasm_v128; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { index = wasm_v128_and(idx_.m128i_private[i].wasm_v128, mask); r = wasm_v8x16_swizzle(a0, index); index = wasm_i8x16_sub(index, sixteen); t = wasm_v8x16_swizzle(a1, index); r_.m128i_private[i].wasm_v128 = wasm_v128_or(r, t); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a_.i8[idx_.i8[i] & 0x1F]; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_permutexvar_epi8 #define _mm256_permutexvar_epi8(idx, a) simde_mm256_permutexvar_epi8(idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask_permutexvar_epi8 (simde__m256i src, simde__mmask32 k, simde__m256i idx, simde__m256i a) { #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask_permutexvar_epi8(src, k, idx, a); #else return simde_mm256_mask_mov_epi8(src, k, simde_mm256_permutexvar_epi8(idx, a)); #endif } #if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_permutexvar_epi8 #define _mm256_mask_permutexvar_epi8(src, k, idx, a) simde_mm256_mask_permutexvar_epi8(src, k, idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_maskz_permutexvar_epi8 (simde__mmask32 k, simde__m256i idx, simde__m256i a) { #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_maskz_permutexvar_epi8(k, idx, a); #else return simde_mm256_maskz_mov_epi8(k, simde_mm256_permutexvar_epi8(idx, a)); #endif } #if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_permutexvar_epi8 #define _mm256_maskz_permutexvar_epi8(k, idx, a) simde_mm256_maskz_permutexvar_epi8(k, idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_permutexvar_pd (simde__m256i idx, simde__m256d a) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_permutexvar_pd(idx, a); #else return simde_mm256_castsi256_pd(simde_mm256_permutexvar_epi64(idx, simde_mm256_castpd_si256(a))); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_permutexvar_pd #define _mm256_permutexvar_pd(idx, a) simde_mm256_permutexvar_pd(idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_mask_permutexvar_pd (simde__m256d src, simde__mmask8 k, simde__m256i idx, simde__m256d a) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask_permutexvar_pd(src, k, idx, a); #else return simde_mm256_mask_mov_pd(src, k, simde_mm256_permutexvar_pd(idx, a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_permutexvar_pd #define _mm256_mask_permutexvar_pd(src, k, idx, a) simde_mm256_mask_permutexvar_pd(src, k, idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_maskz_permutexvar_pd (simde__mmask8 k, simde__m256i idx, simde__m256d a) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_maskz_permutexvar_pd(k, idx, a); #else return simde_mm256_maskz_mov_pd(k, simde_mm256_permutexvar_pd(idx, a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_permutexvar_pd #define _mm256_maskz_permutexvar_pd(k, idx, a) simde_mm256_maskz_permutexvar_pd(k, idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_permutexvar_ps (simde__m256i idx, simde__m256 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_permutexvar_ps(idx, a); #elif defined(SIMDE_X86_AVX2_NATIVE) return simde_mm256_permutevar8x32_ps(a, idx); #else return simde_mm256_castsi256_ps(simde_mm256_permutexvar_epi32(idx, simde_mm256_castps_si256(a))); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_permutexvar_ps #define _mm256_permutexvar_ps(idx, a) simde_mm256_permutexvar_ps(idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_mask_permutexvar_ps (simde__m256 src, simde__mmask8 k, simde__m256i idx, simde__m256 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask_permutexvar_ps(src, k, idx, a); #else return simde_mm256_mask_mov_ps(src, k, simde_mm256_permutexvar_ps(idx, a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_permutexvar_ps #define _mm256_mask_permutexvar_ps(src, k, idx, a) simde_mm256_mask_permutexvar_ps(src, k, idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_maskz_permutexvar_ps (simde__mmask8 k, simde__m256i idx, simde__m256 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_maskz_permutexvar_ps(k, idx, a); #else return simde_mm256_maskz_mov_ps(k, simde_mm256_permutexvar_ps(idx, a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_permutexvar_ps #define _mm256_maskz_permutexvar_ps(k, idx, a) simde_mm256_maskz_permutexvar_ps(k, idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_permutexvar_epi16 (simde__m512i idx, simde__m512i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_permutexvar_epi16(idx, a); #else simde__m512i_private idx_ = simde__m512i_to_private(idx), a_ = simde__m512i_to_private(a), r_; #if defined(SIMDE_X86_AVX2_NATIVE) simde__m256i t0, t1, index, select, a01, a23; simde__m256i mask = simde_mm256_set1_epi16(0x001F); simde__m256i shift = simde_mm256_set1_epi16(0x0202); simde__m256i byte_index = simde_mm256_set1_epi16(0x0100); simde__m256i a0 = simde_mm256_broadcastsi128_si256(a_.m128i[0]); simde__m256i a1 = simde_mm256_broadcastsi128_si256(a_.m128i[1]); simde__m256i a2 = simde_mm256_broadcastsi128_si256(a_.m128i[2]); simde__m256i a3 = simde_mm256_broadcastsi128_si256(a_.m128i[3]); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m256i_private) / sizeof(r_.m256i_private[0])) ; i++) { index = idx_.m256i[i]; index = simde_mm256_and_si256(index, mask); index = simde_mm256_mullo_epi16(index, shift); index = simde_mm256_add_epi16(index, byte_index); t0 = simde_mm256_shuffle_epi8(a0, index); t1 = simde_mm256_shuffle_epi8(a1, index); select = simde_mm256_slli_epi64(index, 3); a01 = simde_mm256_blendv_epi8(t0, t1, select); t0 = simde_mm256_shuffle_epi8(a2, index); t1 = simde_mm256_shuffle_epi8(a3, index); a23 = simde_mm256_blendv_epi8(t0, t1, select); select = simde_mm256_slli_epi64(index, 2); r_.m256i[i] = simde_mm256_blendv_epi8(a01, a23, select); } #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint8x16x4_t table = { { a_.m128i_private[0].neon_u8, a_.m128i_private[1].neon_u8, a_.m128i_private[2].neon_u8, a_.m128i_private[3].neon_u8 } }; uint16x8_t mask16 = vdupq_n_u16(0x001F); uint16x8_t byte_index16 = vdupq_n_u16(0x0100); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { uint16x8_t index16 = vandq_u16(idx_.m128i_private[i].neon_u16, mask16); index16 = vmulq_n_u16(index16, 0x0202); index16 = vaddq_u16(index16, byte_index16); r_.m128i_private[i].neon_u8 = vqtbl4q_u8(table, vreinterpretq_u8_u16(index16)); } #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) index16, mask16, shift16, byte_index16; SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) index, test, r01, r23; mask16 = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x001F)); shift16 = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0202)); byte_index16 = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0100)); test = vec_splats(HEDLEY_STATIC_CAST(unsigned char, 0x20)); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { index16 = vec_and(idx_.m128i_private[i].altivec_u16, mask16); index16 = vec_mladd(index16, shift16, byte_index16); index = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index16); r01 = vec_perm(a_.m128i_private[0].altivec_u8, a_.m128i_private[1].altivec_u8, index); r23 = vec_perm(a_.m128i_private[2].altivec_u8, a_.m128i_private[3].altivec_u8, index); r_.m128i_private[i].altivec_u8 = vec_sel(r01, r23, vec_cmpeq(vec_and(index, test), test)); } #elif defined(SIMDE_WASM_SIMD128_NATIVE) v128_t index, r, t; const v128_t mask = wasm_i16x8_splat(0x001F); const v128_t shift = wasm_i16x8_splat(0x0202); const v128_t byte_index = wasm_i16x8_splat(0x0100); const v128_t sixteen = wasm_i8x16_splat(16); const v128_t a0 = a_.m128i_private[0].wasm_v128; const v128_t a1 = a_.m128i_private[1].wasm_v128; const v128_t a2 = a_.m128i_private[2].wasm_v128; const v128_t a3 = a_.m128i_private[3].wasm_v128; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { index = wasm_v128_and(idx_.m128i_private[i].wasm_v128, mask); index = wasm_i16x8_mul(index, shift); index = wasm_i16x8_add(index, byte_index); r = wasm_v8x16_swizzle(a0, index); index = wasm_i8x16_sub(index, sixteen); t = wasm_v8x16_swizzle(a1, index); r = wasm_v128_or(r, t); index = wasm_i8x16_sub(index, sixteen); t = wasm_v8x16_swizzle(a2, index); r = wasm_v128_or(r, t); index = wasm_i8x16_sub(index, sixteen); t = wasm_v8x16_swizzle(a3, index); r_.m128i_private[i].wasm_v128 = wasm_v128_or(r, t); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[idx_.i16[i] & 0x1F]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_permutexvar_epi16 #define _mm512_permutexvar_epi16(idx, a) simde_mm512_permutexvar_epi16(idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_permutexvar_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i idx, simde__m512i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_permutexvar_epi16(src, k, idx, a); #else return simde_mm512_mask_mov_epi16(src, k, simde_mm512_permutexvar_epi16(idx, a)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_permutexvar_epi16 #define _mm512_mask_permutexvar_epi16(src, k, idx, a) simde_mm512_mask_permutexvar_epi16(src, k, idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_permutexvar_epi16 (simde__mmask32 k, simde__m512i idx, simde__m512i a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_permutexvar_epi16(k, idx, a); #else return simde_mm512_maskz_mov_epi16(k, simde_mm512_permutexvar_epi16(idx, a)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_permutexvar_epi16 #define _mm512_maskz_permutexvar_epi16(k, idx, a) simde_mm512_maskz_permutexvar_epi16(k, idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_permutexvar_epi32 (simde__m512i idx, simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_permutexvar_epi32(idx, a); #else simde__m512i_private idx_ = simde__m512i_to_private(idx), a_ = simde__m512i_to_private(a), r_; #if defined(SIMDE_X86_AVX2_NATIVE) simde__m256i index, r0, r1, select; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m256i_private) / sizeof(r_.m256i_private[0])) ; i++) { index = idx_.m256i[i]; r0 = simde_mm256_permutevar8x32_epi32(a_.m256i[0], index); r1 = simde_mm256_permutevar8x32_epi32(a_.m256i[1], index); select = simde_mm256_slli_epi32(index, 28); r_.m256i[i] = simde_mm256_castps_si256(simde_mm256_blendv_ps(simde_mm256_castsi256_ps(r0), simde_mm256_castsi256_ps(r1), simde_mm256_castsi256_ps(select))); } #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint8x16x4_t table = { { a_.m128i_private[0].neon_u8, a_.m128i_private[1].neon_u8, a_.m128i_private[2].neon_u8, a_.m128i_private[3].neon_u8 } }; uint32x4_t mask32 = vdupq_n_u32(0x0000000F); uint32x4_t byte_index32 = vdupq_n_u32(0x03020100); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { uint32x4_t index32 = vandq_u32(idx_.m128i_private[i].neon_u32, mask32); index32 = vmulq_n_u32(index32, 0x04040404); index32 = vaddq_u32(index32, byte_index32); r_.m128i_private[i].neon_u8 = vqtbl4q_u8(table, vreinterpretq_u8_u32(index32)); } #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) index32, mask32, byte_index32, temp32, sixteen; SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) zero, shift; SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) index, test, r01, r23; mask32 = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 0x0000000F)); byte_index32 = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 0x03020100)); zero = vec_splat_u16(0); shift = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0404)); sixteen = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 16)); test = vec_splats(HEDLEY_STATIC_CAST(unsigned char, 0x20)); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { index32 = vec_and(idx_.m128i_private[i].altivec_u32, mask32); /* Multiply index32 by 0x04040404; unfortunately vec_mul isn't available so (mis)use 16-bit vec_mladd */ temp32 = vec_sl(index32, sixteen); index32 = vec_add(index32, temp32); index32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_mladd(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), index32), shift, zero)); index32 = vec_add(index32, byte_index32); index = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index32); r01 = vec_perm(a_.m128i_private[0].altivec_u8, a_.m128i_private[1].altivec_u8, index); r23 = vec_perm(a_.m128i_private[2].altivec_u8, a_.m128i_private[3].altivec_u8, index); r_.m128i_private[i].altivec_u8 = vec_sel(r01, r23, vec_cmpeq(vec_and(index, test), test)); } #elif defined(SIMDE_WASM_SIMD128_NATIVE) v128_t index, r, t; const v128_t mask = wasm_i32x4_splat(0x0000000F); const v128_t shift = wasm_i32x4_splat(0x04040404); const v128_t byte_index = wasm_i32x4_splat(0x03020100); const v128_t sixteen = wasm_i8x16_splat(16); const v128_t a0 = a_.m128i_private[0].wasm_v128; const v128_t a1 = a_.m128i_private[1].wasm_v128; const v128_t a2 = a_.m128i_private[2].wasm_v128; const v128_t a3 = a_.m128i_private[3].wasm_v128; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { index = wasm_v128_and(idx_.m128i_private[i].wasm_v128, mask); index = wasm_i32x4_mul(index, shift); index = wasm_i32x4_add(index, byte_index); r = wasm_v8x16_swizzle(a0, index); index = wasm_i8x16_sub(index, sixteen); t = wasm_v8x16_swizzle(a1, index); r = wasm_v128_or(r, t); index = wasm_i8x16_sub(index, sixteen); t = wasm_v8x16_swizzle(a2, index); r = wasm_v128_or(r, t); index = wasm_i8x16_sub(index, sixteen); t = wasm_v8x16_swizzle(a3, index); r_.m128i_private[i].wasm_v128 = wasm_v128_or(r, t); } #else #if !defined(__INTEL_COMPILER) SIMDE_VECTORIZE #endif for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[idx_.i32[i] & 0x0F]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_permutexvar_epi32 #define _mm512_permutexvar_epi32(idx, a) simde_mm512_permutexvar_epi32(idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_permutexvar_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i idx, simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_permutexvar_epi32(src, k, idx, a); #else return simde_mm512_mask_mov_epi32(src, k, simde_mm512_permutexvar_epi32(idx, a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_permutexvar_epi32 #define _mm512_mask_permutexvar_epi32(src, k, idx, a) simde_mm512_mask_permutexvar_epi32(src, k, idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_permutexvar_epi32 (simde__mmask16 k, simde__m512i idx, simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_permutexvar_epi32(k, idx, a); #else return simde_mm512_maskz_mov_epi32(k, simde_mm512_permutexvar_epi32(idx, a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_permutexvar_epi32 #define _mm512_maskz_permutexvar_epi32(k, idx, a) simde_mm512_maskz_permutexvar_epi32(k, idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_permutexvar_epi64 (simde__m512i idx, simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_permutexvar_epi64(idx, a); #else simde__m512i_private idx_ = simde__m512i_to_private(idx), a_ = simde__m512i_to_private(a), r_; #if !defined(__INTEL_COMPILER) SIMDE_VECTORIZE #endif for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[idx_.i64[i] & 7]; } return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_permutexvar_epi64 #define _mm512_permutexvar_epi64(idx, a) simde_mm512_permutexvar_epi64(idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_permutexvar_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i idx, simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_permutexvar_epi64(src, k, idx, a); #else return simde_mm512_mask_mov_epi64(src, k, simde_mm512_permutexvar_epi64(idx, a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_permutexvar_epi64 #define _mm512_mask_permutexvar_epi64(src, k, idx, a) simde_mm512_mask_permutexvar_epi64(src, k, idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_permutexvar_epi64 (simde__mmask8 k, simde__m512i idx, simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_permutexvar_epi64(k, idx, a); #else return simde_mm512_maskz_mov_epi64(k, simde_mm512_permutexvar_epi64(idx, a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_permutexvar_epi64 #define _mm512_maskz_permutexvar_epi64(k, idx, a) simde_mm512_maskz_permutexvar_epi64(k, idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_permutexvar_epi8 (simde__m512i idx, simde__m512i a) { #if defined(SIMDE_X86_AVX512VBMI_NATIVE) return _mm512_permutexvar_epi8(idx, a); #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) simde__m512i hilo, hi, lo, hi2, lo2, idx2; simde__m512i ones = simde_mm512_set1_epi8(1); simde__m512i low_bytes = simde_mm512_set1_epi16(0x00FF); idx2 = simde_mm512_srli_epi16(idx, 1); hilo = simde_mm512_permutexvar_epi16(idx2, a); simde__mmask64 mask = simde_mm512_test_epi8_mask(idx, ones); lo = simde_mm512_and_si512(hilo, low_bytes); hi = simde_mm512_srli_epi16(hilo, 8); idx2 = simde_mm512_srli_epi16(idx, 9); hilo = simde_mm512_permutexvar_epi16(idx2, a); lo2 = simde_mm512_slli_epi16(hilo, 8); hi2 = simde_mm512_andnot_si512(low_bytes, hilo); lo = simde_mm512_or_si512(lo, lo2); hi = simde_mm512_or_si512(hi, hi2); return simde_mm512_mask_blend_epi8(mask, lo, hi); #else simde__m512i_private idx_ = simde__m512i_to_private(idx), a_ = simde__m512i_to_private(a), r_; #if defined(SIMDE_X86_AVX2_NATIVE) simde__m256i t0, t1, index, select, a01, a23; simde__m256i mask = simde_mm256_set1_epi8(0x3F); simde__m256i a0 = simde_mm256_broadcastsi128_si256(a_.m128i[0]); simde__m256i a1 = simde_mm256_broadcastsi128_si256(a_.m128i[1]); simde__m256i a2 = simde_mm256_broadcastsi128_si256(a_.m128i[2]); simde__m256i a3 = simde_mm256_broadcastsi128_si256(a_.m128i[3]); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m256i_private) / sizeof(r_.m256i_private[0])) ; i++) { index = idx_.m256i[i]; index = simde_mm256_and_si256(index, mask); select = simde_mm256_slli_epi64(index, 3); t0 = simde_mm256_shuffle_epi8(a0, index); t1 = simde_mm256_shuffle_epi8(a1, index); a01 = simde_mm256_blendv_epi8(t0, t1, select); t0 = simde_mm256_shuffle_epi8(a2, index); t1 = simde_mm256_shuffle_epi8(a3, index); a23 = simde_mm256_blendv_epi8(t0, t1, select); select = simde_mm256_slli_epi64(index, 2); r_.m256i[i] = simde_mm256_blendv_epi8(a01, a23, select); } #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint8x16x4_t table = { { a_.m128i_private[0].neon_u8, a_.m128i_private[1].neon_u8, a_.m128i_private[2].neon_u8, a_.m128i_private[3].neon_u8 } }; uint8x16_t mask = vdupq_n_u8(0x3F); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { r_.m128i_private[i].neon_u8 = vqtbl4q_u8(table, vandq_u8(idx_.m128i_private[i].neon_u8, mask)); } #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) test, r01, r23; test = vec_splats(HEDLEY_STATIC_CAST(unsigned char, 0x20)); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { r01 = vec_perm(a_.m128i_private[0].altivec_u8, a_.m128i_private[1].altivec_u8, idx_.m128i_private[i].altivec_u8); r23 = vec_perm(a_.m128i_private[2].altivec_u8, a_.m128i_private[3].altivec_u8, idx_.m128i_private[i].altivec_u8); r_.m128i_private[i].altivec_u8 = vec_sel(r01, r23, vec_cmpeq(vec_and(idx_.m128i_private[i].altivec_u8, test), test)); } #elif defined(SIMDE_WASM_SIMD128_NATIVE) v128_t index, r, t; const v128_t mask = wasm_i8x16_splat(0x3F); const v128_t sixteen = wasm_i8x16_splat(16); const v128_t a0 = a_.m128i_private[0].wasm_v128; const v128_t a1 = a_.m128i_private[1].wasm_v128; const v128_t a2 = a_.m128i_private[2].wasm_v128; const v128_t a3 = a_.m128i_private[3].wasm_v128; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { index = wasm_v128_and(idx_.m128i_private[i].wasm_v128, mask); r = wasm_v8x16_swizzle(a0, index); index = wasm_i8x16_sub(index, sixteen); t = wasm_v8x16_swizzle(a1, index); r = wasm_v128_or(r, t); index = wasm_i8x16_sub(index, sixteen); t = wasm_v8x16_swizzle(a2, index); r = wasm_v128_or(r, t); index = wasm_i8x16_sub(index, sixteen); t = wasm_v8x16_swizzle(a3, index); r_.m128i_private[i].wasm_v128 = wasm_v128_or(r, t); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a_.i8[idx_.i8[i] & 0x3F]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) #undef _mm512_permutexvar_epi8 #define _mm512_permutexvar_epi8(idx, a) simde_mm512_permutexvar_epi8(idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_permutexvar_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i idx, simde__m512i a) { #if defined(SIMDE_X86_AVX512VBMI_NATIVE) return _mm512_mask_permutexvar_epi8(src, k, idx, a); #else return simde_mm512_mask_mov_epi8(src, k, simde_mm512_permutexvar_epi8(idx, a)); #endif } #if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_permutexvar_epi8 #define _mm512_mask_permutexvar_epi8(src, k, idx, a) simde_mm512_mask_permutexvar_epi8(src, k, idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_permutexvar_epi8 (simde__mmask64 k, simde__m512i idx, simde__m512i a) { #if defined(SIMDE_X86_AVX512VBMI_NATIVE) return _mm512_maskz_permutexvar_epi8(k, idx, a); #else return simde_mm512_maskz_mov_epi8(k, simde_mm512_permutexvar_epi8(idx, a)); #endif } #if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_permutexvar_epi8 #define _mm512_maskz_permutexvar_epi8(k, idx, a) simde_mm512_maskz_permutexvar_epi8(k, idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_permutexvar_pd (simde__m512i idx, simde__m512d a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_permutexvar_pd(idx, a); #else return simde_mm512_castsi512_pd(simde_mm512_permutexvar_epi64(idx, simde_mm512_castpd_si512(a))); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_permutexvar_pd #define _mm512_permutexvar_pd(idx, a) simde_mm512_permutexvar_pd(idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_permutexvar_pd (simde__m512d src, simde__mmask8 k, simde__m512i idx, simde__m512d a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_permutexvar_pd(src, k, idx, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_permutexvar_pd(idx, a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_permutexvar_pd #define _mm512_mask_permutexvar_pd(src, k, idx, a) simde_mm512_mask_permutexvar_pd(src, k, idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_maskz_permutexvar_pd (simde__mmask8 k, simde__m512i idx, simde__m512d a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_permutexvar_pd(k, idx, a); #else return simde_mm512_maskz_mov_pd(k, simde_mm512_permutexvar_pd(idx, a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_permutexvar_pd #define _mm512_maskz_permutexvar_pd(k, idx, a) simde_mm512_maskz_permutexvar_pd(k, idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_permutexvar_ps (simde__m512i idx, simde__m512 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_permutexvar_ps(idx, a); #else return simde_mm512_castsi512_ps(simde_mm512_permutexvar_epi32(idx, simde_mm512_castps_si512(a))); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_permutexvar_ps #define _mm512_permutexvar_ps(idx, a) simde_mm512_permutexvar_ps(idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_permutexvar_ps (simde__m512 src, simde__mmask16 k, simde__m512i idx, simde__m512 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_permutexvar_ps(src, k, idx, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_permutexvar_ps(idx, a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_permutexvar_ps #define _mm512_mask_permutexvar_ps(src, k, idx, a) simde_mm512_mask_permutexvar_ps(src, k, idx, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_maskz_permutexvar_ps (simde__mmask16 k, simde__m512i idx, simde__m512 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_permutexvar_ps(k, idx, a); #else return simde_mm512_maskz_mov_ps(k, simde_mm512_permutexvar_ps(idx, a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_permutexvar_ps #define _mm512_maskz_permutexvar_ps(k, idx, a) simde_mm512_maskz_permutexvar_ps(k, idx, a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_PERMUTEXVAR_H) */ simde-0.7.2/simde/x86/avx512/sad.h000066400000000000000000000051161400333146700163210ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_SAD_H) #define SIMDE_X86_AVX512_SAD_H #include "types.h" #include "../avx2.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_sad_epu8 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_sad_epu8(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_sad_epu8(a_.m256i[i], b_.m256i[i]); } #else for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { uint16_t tmp = 0; SIMDE_VECTORIZE_REDUCTION(+:tmp) for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 8) ; j++) { const size_t e = j + (i * 8); tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); } r_.i64[i] = tmp; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_sad_epu8 #define _mm512_sad_epu8(a, b) simde_mm512_sad_epu8(a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_SAD_H) */ simde-0.7.2/simde/x86/avx512/set.h000066400000000000000000000353051400333146700163500ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_X86_AVX512_SET_H) #define SIMDE_X86_AVX512_SET_H #include "types.h" #include "load.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_set_epi16 (int16_t e31, int16_t e30, int16_t e29, int16_t e28, int16_t e27, int16_t e26, int16_t e25, int16_t e24, int16_t e23, int16_t e22, int16_t e21, int16_t e20, int16_t e19, int16_t e18, int16_t e17, int16_t e16, int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { simde__m512i_private r_; r_.i16[ 0] = e0; r_.i16[ 1] = e1; r_.i16[ 2] = e2; r_.i16[ 3] = e3; r_.i16[ 4] = e4; r_.i16[ 5] = e5; r_.i16[ 6] = e6; r_.i16[ 7] = e7; r_.i16[ 8] = e8; r_.i16[ 9] = e9; r_.i16[10] = e10; r_.i16[11] = e11; r_.i16[12] = e12; r_.i16[13] = e13; r_.i16[14] = e14; r_.i16[15] = e15; r_.i16[16] = e16; r_.i16[17] = e17; r_.i16[18] = e18; r_.i16[19] = e19; r_.i16[20] = e20; r_.i16[21] = e21; r_.i16[22] = e22; r_.i16[23] = e23; r_.i16[24] = e24; r_.i16[25] = e25; r_.i16[26] = e26; r_.i16[27] = e27; r_.i16[28] = e28; r_.i16[29] = e29; r_.i16[30] = e30; r_.i16[31] = e31; return simde__m512i_from_private(r_); } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_set_epi16 #define _mm512_set_epi16(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi16(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_set_epi32 (int32_t e15, int32_t e14, int32_t e13, int32_t e12, int32_t e11, int32_t e10, int32_t e9, int32_t e8, int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { simde__m512i_private r_; r_.i32[ 0] = e0; r_.i32[ 1] = e1; r_.i32[ 2] = e2; r_.i32[ 3] = e3; r_.i32[ 4] = e4; r_.i32[ 5] = e5; r_.i32[ 6] = e6; r_.i32[ 7] = e7; r_.i32[ 8] = e8; r_.i32[ 9] = e9; r_.i32[10] = e10; r_.i32[11] = e11; r_.i32[12] = e12; r_.i32[13] = e13; r_.i32[14] = e14; r_.i32[15] = e15; return simde__m512i_from_private(r_); } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_set_epi32 #define _mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_set_epi64 (int64_t e7, int64_t e6, int64_t e5, int64_t e4, int64_t e3, int64_t e2, int64_t e1, int64_t e0) { simde__m512i_private r_; r_.i64[0] = e0; r_.i64[1] = e1; r_.i64[2] = e2; r_.i64[3] = e3; r_.i64[4] = e4; r_.i64[5] = e5; r_.i64[6] = e6; r_.i64[7] = e7; return simde__m512i_from_private(r_); } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_set_epi64 #define _mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_x_mm512_set_epu8 (uint8_t e63, uint8_t e62, uint8_t e61, uint8_t e60, uint8_t e59, uint8_t e58, uint8_t e57, uint8_t e56, uint8_t e55, uint8_t e54, uint8_t e53, uint8_t e52, uint8_t e51, uint8_t e50, uint8_t e49, uint8_t e48, uint8_t e47, uint8_t e46, uint8_t e45, uint8_t e44, uint8_t e43, uint8_t e42, uint8_t e41, uint8_t e40, uint8_t e39, uint8_t e38, uint8_t e37, uint8_t e36, uint8_t e35, uint8_t e34, uint8_t e33, uint8_t e32, uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { simde__m512i_private r_; r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; r_.u8[16] = e16; r_.u8[17] = e17; r_.u8[18] = e18; r_.u8[19] = e19; r_.u8[20] = e20; r_.u8[21] = e21; r_.u8[22] = e22; r_.u8[23] = e23; r_.u8[24] = e24; r_.u8[25] = e25; r_.u8[26] = e26; r_.u8[27] = e27; r_.u8[28] = e28; r_.u8[29] = e29; r_.u8[30] = e30; r_.u8[31] = e31; r_.u8[32] = e32; r_.u8[33] = e33; r_.u8[34] = e34; r_.u8[35] = e35; r_.u8[36] = e36; r_.u8[37] = e37; r_.u8[38] = e38; r_.u8[39] = e39; r_.u8[40] = e40; r_.u8[41] = e41; r_.u8[42] = e42; r_.u8[43] = e43; r_.u8[44] = e44; r_.u8[45] = e45; r_.u8[46] = e46; r_.u8[47] = e47; r_.u8[48] = e48; r_.u8[49] = e49; r_.u8[50] = e50; r_.u8[51] = e51; r_.u8[52] = e52; r_.u8[53] = e53; r_.u8[54] = e54; r_.u8[55] = e55; r_.u8[56] = e56; r_.u8[57] = e57; r_.u8[58] = e58; r_.u8[59] = e59; r_.u8[60] = e60; r_.u8[61] = e61; r_.u8[62] = e62; r_.u8[63] = e63; return simde__m512i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_x_mm512_set_epu16 (uint16_t e31, uint16_t e30, uint16_t e29, uint16_t e28, uint16_t e27, uint16_t e26, uint16_t e25, uint16_t e24, uint16_t e23, uint16_t e22, uint16_t e21, uint16_t e20, uint16_t e19, uint16_t e18, uint16_t e17, uint16_t e16, uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { simde__m512i_private r_; r_.u16[ 0] = e0; r_.u16[ 1] = e1; r_.u16[ 2] = e2; r_.u16[ 3] = e3; r_.u16[ 4] = e4; r_.u16[ 5] = e5; r_.u16[ 6] = e6; r_.u16[ 7] = e7; r_.u16[ 8] = e8; r_.u16[ 9] = e9; r_.u16[10] = e10; r_.u16[11] = e11; r_.u16[12] = e12; r_.u16[13] = e13; r_.u16[14] = e14; r_.u16[15] = e15; r_.u16[16] = e16; r_.u16[17] = e17; r_.u16[18] = e18; r_.u16[19] = e19; r_.u16[20] = e20; r_.u16[21] = e21; r_.u16[22] = e22; r_.u16[23] = e23; r_.u16[24] = e24; r_.u16[25] = e25; r_.u16[26] = e26; r_.u16[27] = e27; r_.u16[28] = e28; r_.u16[29] = e29; r_.u16[30] = e30; r_.u16[31] = e31; return simde__m512i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_x_mm512_set_epu32 (uint32_t e15, uint32_t e14, uint32_t e13, uint32_t e12, uint32_t e11, uint32_t e10, uint32_t e9, uint32_t e8, uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { simde__m512i_private r_; r_.u32[ 0] = e0; r_.u32[ 1] = e1; r_.u32[ 2] = e2; r_.u32[ 3] = e3; r_.u32[ 4] = e4; r_.u32[ 5] = e5; r_.u32[ 6] = e6; r_.u32[ 7] = e7; r_.u32[ 8] = e8; r_.u32[ 9] = e9; r_.u32[10] = e10; r_.u32[11] = e11; r_.u32[12] = e12; r_.u32[13] = e13; r_.u32[14] = e14; r_.u32[15] = e15; return simde__m512i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_x_mm512_set_epu64 (uint64_t e7, uint64_t e6, uint64_t e5, uint64_t e4, uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { simde__m512i_private r_; r_.u64[ 0] = e0; r_.u64[ 1] = e1; r_.u64[ 2] = e2; r_.u64[ 3] = e3; r_.u64[ 4] = e4; r_.u64[ 5] = e5; r_.u64[ 6] = e6; r_.u64[ 7] = e7; return simde__m512i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_set_epi8 (int8_t e63, int8_t e62, int8_t e61, int8_t e60, int8_t e59, int8_t e58, int8_t e57, int8_t e56, int8_t e55, int8_t e54, int8_t e53, int8_t e52, int8_t e51, int8_t e50, int8_t e49, int8_t e48, int8_t e47, int8_t e46, int8_t e45, int8_t e44, int8_t e43, int8_t e42, int8_t e41, int8_t e40, int8_t e39, int8_t e38, int8_t e37, int8_t e36, int8_t e35, int8_t e34, int8_t e33, int8_t e32, int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { simde__m512i_private r_; r_.i8[ 0] = e0; r_.i8[ 1] = e1; r_.i8[ 2] = e2; r_.i8[ 3] = e3; r_.i8[ 4] = e4; r_.i8[ 5] = e5; r_.i8[ 6] = e6; r_.i8[ 7] = e7; r_.i8[ 8] = e8; r_.i8[ 9] = e9; r_.i8[10] = e10; r_.i8[11] = e11; r_.i8[12] = e12; r_.i8[13] = e13; r_.i8[14] = e14; r_.i8[15] = e15; r_.i8[16] = e16; r_.i8[17] = e17; r_.i8[18] = e18; r_.i8[19] = e19; r_.i8[20] = e20; r_.i8[21] = e21; r_.i8[22] = e22; r_.i8[23] = e23; r_.i8[24] = e24; r_.i8[25] = e25; r_.i8[26] = e26; r_.i8[27] = e27; r_.i8[28] = e28; r_.i8[29] = e29; r_.i8[30] = e30; r_.i8[31] = e31; r_.i8[32] = e32; r_.i8[33] = e33; r_.i8[34] = e34; r_.i8[35] = e35; r_.i8[36] = e36; r_.i8[37] = e37; r_.i8[38] = e38; r_.i8[39] = e39; r_.i8[40] = e40; r_.i8[41] = e41; r_.i8[42] = e42; r_.i8[43] = e43; r_.i8[44] = e44; r_.i8[45] = e45; r_.i8[46] = e46; r_.i8[47] = e47; r_.i8[48] = e48; r_.i8[49] = e49; r_.i8[50] = e50; r_.i8[51] = e51; r_.i8[52] = e52; r_.i8[53] = e53; r_.i8[54] = e54; r_.i8[55] = e55; r_.i8[56] = e56; r_.i8[57] = e57; r_.i8[58] = e58; r_.i8[59] = e59; r_.i8[60] = e60; r_.i8[61] = e61; r_.i8[62] = e62; r_.i8[63] = e63; return simde__m512i_from_private(r_); } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_set_epi8 #define _mm512_set_epi8(e63, e62, e61, e60, e59, e58, e57, e56, e55, e54, e53, e52, e51, e50, e49, e48, e47, e46, e45, e44, e43, e42, e41, e40, e39, e38, e37, e36, e35, e34, e33, e32, e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi8(e63, e62, e61, e60, e59, e58, e57, e56, e55, e54, e53, e52, e51, e50, e49, e48, e47, e46, e45, e44, e43, e42, e41, e40, e39, e38, e37, e36, e35, e34, e33, e32, e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_x_mm512_set_m128i (simde__m128i a, simde__m128i b, simde__m128i c, simde__m128i d) { #if defined(SIMDE_X86_AVX512F_NATIVE) SIMDE_ALIGN_LIKE_16(simde__m128i) simde__m128i v[] = { d, c, b, a }; return simde_mm512_load_si512(HEDLEY_STATIC_CAST(__m512i *, HEDLEY_STATIC_CAST(void *, v))); #else simde__m512i_private r_; r_.m128i[0] = d; r_.m128i[1] = c; r_.m128i[2] = b; r_.m128i[3] = a; return simde__m512i_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_x_mm512_set_m256i (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) SIMDE_ALIGN_LIKE_32(simde__m256i) simde__m256i v[] = { b, a }; return simde_mm512_load_si512(HEDLEY_STATIC_CAST(__m512i *, HEDLEY_STATIC_CAST(void *, v))); #else simde__m512i_private r_; r_.m256i[0] = b; r_.m256i[1] = a; return simde__m512i_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_set_ps (simde_float32 e15, simde_float32 e14, simde_float32 e13, simde_float32 e12, simde_float32 e11, simde_float32 e10, simde_float32 e9, simde_float32 e8, simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { simde__m512_private r_; r_.f32[ 0] = e0; r_.f32[ 1] = e1; r_.f32[ 2] = e2; r_.f32[ 3] = e3; r_.f32[ 4] = e4; r_.f32[ 5] = e5; r_.f32[ 6] = e6; r_.f32[ 7] = e7; r_.f32[ 8] = e8; r_.f32[ 9] = e9; r_.f32[10] = e10; r_.f32[11] = e11; r_.f32[12] = e12; r_.f32[13] = e13; r_.f32[14] = e14; r_.f32[15] = e15; return simde__m512_from_private(r_); } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_set_ps #define _mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_set_pd (simde_float64 e7, simde_float64 e6, simde_float64 e5, simde_float64 e4, simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { simde__m512d_private r_; r_.f64[0] = e0; r_.f64[1] = e1; r_.f64[2] = e2; r_.f64[3] = e3; r_.f64[4] = e4; r_.f64[5] = e5; r_.f64[6] = e6; r_.f64[7] = e7; return simde__m512d_from_private(r_); } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_set_pd #define _mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_SET_H) */ simde-0.7.2/simde/x86/avx512/set1.h000066400000000000000000000217711400333146700164330ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur */ #if !defined(SIMDE_X86_AVX512_SET1_H) #define SIMDE_X86_AVX512_SET1_H #include "types.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_set1_epi8 (int8_t a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_set1_epi8(a); #else simde__m512i_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a; } return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_set1_epi8 #define _mm512_set1_epi8(a) simde_mm512_set1_epi8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_set1_epi8(simde__m512i src, simde__mmask64 k, int8_t a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_set1_epi8(src, k, a); #else return simde_mm512_mask_mov_epi8(src, k, simde_mm512_set1_epi8(a)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_set1_epi8 #define _mm512_mask_set1_epi8(src, k, a) simde_mm512_mask_set1_epi8(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_set1_epi8(simde__mmask64 k, int8_t a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_set1_epi8(k, a); #else return simde_mm512_maskz_mov_epi8(k, simde_mm512_set1_epi8(a)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_set1_epi8 #define _mm512_maskz_set1_epi8(k, a) simde_mm512_maskz_set1_epi8(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_set1_epi16 (int16_t a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_set1_epi16(a); #else simde__m512i_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a; } return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_set1_epi16 #define _mm512_set1_epi16(a) simde_mm512_set1_epi16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_set1_epi16(simde__m512i src, simde__mmask32 k, int16_t a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_set1_epi16(src, k, a); #else return simde_mm512_mask_mov_epi16(src, k, simde_mm512_set1_epi16(a)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_set1_epi16 #define _mm512_mask_set1_epi16(src, k, a) simde_mm512_mask_set1_epi16(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_set1_epi16(simde__mmask32 k, int16_t a) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_set1_epi16(k, a); #else return simde_mm512_maskz_mov_epi16(k, simde_mm512_set1_epi16(a)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_set1_epi16 #define _mm512_maskz_set1_epi16(k, a) simde_mm512_maskz_set1_epi16(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_set1_epi32 (int32_t a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_set1_epi32(a); #else simde__m512i_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a; } return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_set1_epi32 #define _mm512_set1_epi32(a) simde_mm512_set1_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_set1_epi32(simde__m512i src, simde__mmask16 k, int32_t a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_set1_epi32(src, k, a); #else return simde_mm512_mask_mov_epi32(src, k, simde_mm512_set1_epi32(a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_set1_epi32 #define _mm512_mask_set1_epi32(src, k, a) simde_mm512_mask_set1_epi32(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_set1_epi32(simde__mmask16 k, int32_t a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_set1_epi32(k, a); #else return simde_mm512_maskz_mov_epi32(k, simde_mm512_set1_epi32(a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_set1_epi32 #define _mm512_maskz_set1_epi32(k, a) simde_mm512_maskz_set1_epi32(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_set1_epi64 (int64_t a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_set1_epi64(a); #else simde__m512i_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a; } return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_set1_epi64 #define _mm512_set1_epi64(a) simde_mm512_set1_epi64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_set1_epi64(simde__m512i src, simde__mmask8 k, int64_t a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_set1_epi64(src, k, a); #else return simde_mm512_mask_mov_epi64(src, k, simde_mm512_set1_epi64(a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_set1_epi64 #define _mm512_mask_set1_epi64(src, k, a) simde_mm512_mask_set1_epi64(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_set1_epi64(simde__mmask8 k, int64_t a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_set1_epi64(k, a); #else return simde_mm512_maskz_mov_epi64(k, simde_mm512_set1_epi64(a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_set1_epi64 #define _mm512_maskz_set1_epi64(k, a) simde_mm512_maskz_set1_epi64(k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_x_mm512_set1_epu8 (uint8_t a) { simde__m512i_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = a; } return simde__m512i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_x_mm512_set1_epu16 (uint16_t a) { simde__m512i_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = a; } return simde__m512i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_x_mm512_set1_epu32 (uint32_t a) { simde__m512i_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a; } return simde__m512i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_x_mm512_set1_epu64 (uint64_t a) { simde__m512i_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = a; } return simde__m512i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_set1_ps (simde_float32 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_set1_ps(a); #else simde__m512_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = a; } return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_set1_ps #define _mm512_set1_ps(a) simde_mm512_set1_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_set1_pd (simde_float64 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_set1_pd(a); #else simde__m512d_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = a; } return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_set1_pd #define _mm512_set1_pd(a) simde_mm512_set1_pd(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_SET1_H) */ simde-0.7.2/simde/x86/avx512/set4.h000066400000000000000000000071711400333146700164340ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_SET4_H) #define SIMDE_X86_AVX512_SET4_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_set4_epi32 (int32_t d, int32_t c, int32_t b, int32_t a) { simde__m512i_private r_; r_.i32[ 0] = a; r_.i32[ 1] = b; r_.i32[ 2] = c; r_.i32[ 3] = d; r_.i32[ 4] = a; r_.i32[ 5] = b; r_.i32[ 6] = c; r_.i32[ 7] = d; r_.i32[ 8] = a; r_.i32[ 9] = b; r_.i32[10] = c; r_.i32[11] = d; r_.i32[12] = a; r_.i32[13] = b; r_.i32[14] = c; r_.i32[15] = d; return simde__m512i_from_private(r_); } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_set4_epi32 #define _mm512_set4_epi32(d,c,b,a) simde_mm512_set4_epi32(d,c,b,a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_set4_epi64 (int64_t d, int64_t c, int64_t b, int64_t a) { simde__m512i_private r_; r_.i64[0] = a; r_.i64[1] = b; r_.i64[2] = c; r_.i64[3] = d; r_.i64[4] = a; r_.i64[5] = b; r_.i64[6] = c; r_.i64[7] = d; return simde__m512i_from_private(r_); } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_set4_epi64 #define _mm512_set4_epi64(d,c,b,a) simde_mm512_set4_epi64(d,c,b,a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_set4_ps (simde_float32 d, simde_float32 c, simde_float32 b, simde_float32 a) { simde__m512_private r_; r_.f32[ 0] = a; r_.f32[ 1] = b; r_.f32[ 2] = c; r_.f32[ 3] = d; r_.f32[ 4] = a; r_.f32[ 5] = b; r_.f32[ 6] = c; r_.f32[ 7] = d; r_.f32[ 8] = a; r_.f32[ 9] = b; r_.f32[10] = c; r_.f32[11] = d; r_.f32[12] = a; r_.f32[13] = b; r_.f32[14] = c; r_.f32[15] = d; return simde__m512_from_private(r_); } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_set4_ps #define _mm512_set4_ps(d,c,b,a) simde_mm512_set4_ps(d,c,b,a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_set4_pd (simde_float64 d, simde_float64 c, simde_float64 b, simde_float64 a) { simde__m512d_private r_; r_.f64[0] = a; r_.f64[1] = b; r_.f64[2] = c; r_.f64[3] = d; r_.f64[4] = a; r_.f64[5] = b; r_.f64[6] = c; r_.f64[7] = d; return simde__m512d_from_private(r_); } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_set4_pd #define _mm512_set4_pd(d,c,b,a) simde_mm512_set4_pd(d,c,b,a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_SET4_H) */ simde-0.7.2/simde/x86/avx512/setone.h000066400000000000000000000040701400333146700170450ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_X86_AVX512_SETONE_H) #define SIMDE_X86_AVX512_SETONE_H #include "types.h" #include "cast.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_x_mm512_setone_si512(void) { simde__m512i_private r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); } return simde__m512i_from_private(r_); } #define simde_x_mm512_setone_epi32() simde_x_mm512_setone_si512() SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_x_mm512_setone_ps(void) { return simde_mm512_castsi512_ps(simde_x_mm512_setone_si512()); } SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_x_mm512_setone_pd(void) { return simde_mm512_castsi512_pd(simde_x_mm512_setone_si512()); } SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_SETONE_H) */ simde-0.7.2/simde/x86/avx512/setr.h000066400000000000000000000111631400333146700165260ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur */ #if !defined(SIMDE_X86_AVX512_SETR_H) #define SIMDE_X86_AVX512_SETR_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_setr_epi32 (int32_t e15, int32_t e14, int32_t e13, int32_t e12, int32_t e11, int32_t e10, int32_t e9, int32_t e8, int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { simde__m512i_private r_; r_.i32[ 0] = e15; r_.i32[ 1] = e14; r_.i32[ 2] = e13; r_.i32[ 3] = e12; r_.i32[ 4] = e11; r_.i32[ 5] = e10; r_.i32[ 6] = e9; r_.i32[ 7] = e8; r_.i32[ 8] = e7; r_.i32[ 9] = e6; r_.i32[10] = e5; r_.i32[11] = e4; r_.i32[12] = e3; r_.i32[13] = e2; r_.i32[14] = e1; r_.i32[15] = e0; return simde__m512i_from_private(r_); } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_setr_epi32 #define _mm512_setr_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_setr_epi64 (int64_t e7, int64_t e6, int64_t e5, int64_t e4, int64_t e3, int64_t e2, int64_t e1, int64_t e0) { simde__m512i_private r_; r_.i64[0] = e7; r_.i64[1] = e6; r_.i64[2] = e5; r_.i64[3] = e4; r_.i64[4] = e3; r_.i64[5] = e2; r_.i64[6] = e1; r_.i64[7] = e0; return simde__m512i_from_private(r_); } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_setr_epi64 #define _mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_setr_ps (simde_float32 e15, simde_float32 e14, simde_float32 e13, simde_float32 e12, simde_float32 e11, simde_float32 e10, simde_float32 e9, simde_float32 e8, simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { simde__m512_private r_; r_.f32[ 0] = e15; r_.f32[ 1] = e14; r_.f32[ 2] = e13; r_.f32[ 3] = e12; r_.f32[ 4] = e11; r_.f32[ 5] = e10; r_.f32[ 6] = e9; r_.f32[ 7] = e8; r_.f32[ 8] = e7; r_.f32[ 9] = e6; r_.f32[10] = e5; r_.f32[11] = e4; r_.f32[12] = e3; r_.f32[13] = e2; r_.f32[14] = e1; r_.f32[15] = e0; return simde__m512_from_private(r_); } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_setr_ps #define _mm512_setr_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_setr_pd (simde_float64 e7, simde_float64 e6, simde_float64 e5, simde_float64 e4, simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { simde__m512d_private r_; r_.f64[0] = e7; r_.f64[1] = e6; r_.f64[2] = e5; r_.f64[3] = e4; r_.f64[4] = e3; r_.f64[5] = e2; r_.f64[6] = e1; r_.f64[7] = e0; return simde__m512d_from_private(r_); } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_setr_pd #define _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_SETR_H) */ simde-0.7.2/simde/x86/avx512/setr4.h000066400000000000000000000072271400333146700166200ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur */ #if !defined(SIMDE_X86_AVX512_SETR4_H) #define SIMDE_X86_AVX512_SETR4_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_setr4_epi32 (int32_t d, int32_t c, int32_t b, int32_t a) { simde__m512i_private r_; r_.i32[ 0] = d; r_.i32[ 1] = c; r_.i32[ 2] = b; r_.i32[ 3] = a; r_.i32[ 4] = d; r_.i32[ 5] = c; r_.i32[ 6] = b; r_.i32[ 7] = a; r_.i32[ 8] = d; r_.i32[ 9] = c; r_.i32[10] = b; r_.i32[11] = a; r_.i32[12] = d; r_.i32[13] = c; r_.i32[14] = b; r_.i32[15] = a; return simde__m512i_from_private(r_); } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_setr4_epi32 #define _mm512_setr4_epi32(d,c,b,a) simde_mm512_setr4_epi32(d,c,b,a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_setr4_epi64 (int64_t d, int64_t c, int64_t b, int64_t a) { simde__m512i_private r_; r_.i64[0] = d; r_.i64[1] = c; r_.i64[2] = b; r_.i64[3] = a; r_.i64[4] = d; r_.i64[5] = c; r_.i64[6] = b; r_.i64[7] = a; return simde__m512i_from_private(r_); } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_setr4_epi64 #define _mm512_setr4_epi64(d,c,b,a) simde_mm512_setr4_epi64(d,c,b,a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_setr4_ps (simde_float32 d, simde_float32 c, simde_float32 b, simde_float32 a) { simde__m512_private r_; r_.f32[ 0] = d; r_.f32[ 1] = c; r_.f32[ 2] = b; r_.f32[ 3] = a; r_.f32[ 4] = d; r_.f32[ 5] = c; r_.f32[ 6] = b; r_.f32[ 7] = a; r_.f32[ 8] = d; r_.f32[ 9] = c; r_.f32[10] = b; r_.f32[11] = a; r_.f32[12] = d; r_.f32[13] = c; r_.f32[14] = b; r_.f32[15] = a; return simde__m512_from_private(r_); } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_setr4_ps #define _mm512_setr4_ps(d,c,b,a) simde_mm512_setr4_ps(d,c,b,a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_setr4_pd (simde_float64 d, simde_float64 c, simde_float64 b, simde_float64 a) { simde__m512d_private r_; r_.f64[0] = d; r_.f64[1] = c; r_.f64[2] = b; r_.f64[3] = a; r_.f64[4] = d; r_.f64[5] = c; r_.f64[6] = b; r_.f64[7] = a; return simde__m512d_from_private(r_); } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_setr4_pd #define _mm512_setr4_pd(d,c,b,a) simde_mm512_setr4_pd(d,c,b,a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_SETR4_H) */ simde-0.7.2/simde/x86/avx512/setzero.h000066400000000000000000000055541400333146700172530ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur * 2020 Hidayat Khan * 2020 Christopher Moore */ #if !defined(SIMDE_X86_AVX512_SETZERO_H) #define SIMDE_X86_AVX512_SETZERO_H #include "types.h" #include "cast.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_setzero_si512(void) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_setzero_si512(); #else simde__m512i r; simde_memset(&r, 0, sizeof(r)); return r; #endif } #define simde_mm512_setzero_epi32() simde_mm512_setzero_si512() #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_setzero_si512 #define _mm512_setzero_si512() simde_mm512_setzero_si512() #undef _mm512_setzero_epi32 #define _mm512_setzero_epi32() simde_mm512_setzero_si512() #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_setzero_ps(void) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_setzero_ps(); #else return simde_mm512_castsi512_ps(simde_mm512_setzero_si512()); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_setzero_si512 #define _mm512_setzero_si512() simde_mm512_setzero_si512() #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_setzero_pd(void) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_setzero_pd(); #else return simde_mm512_castsi512_pd(simde_mm512_setzero_si512()); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_setzero_si512 #define _mm512_setzero_si512() simde_mm512_setzero_si512() #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_SETZERO_H) */ simde-0.7.2/simde/x86/avx512/shuffle.h000066400000000000000000000172741400333146700172160ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #if !defined(SIMDE_X86_AVX512_SHUFFLE_H) #define SIMDE_X86_AVX512_SHUFFLE_H #include "types.h" #include "../avx2.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_shuffle_epi8 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_shuffle_epi8(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_shuffle_epi8(a_.m256i[i], b_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = (b_.u8[i] & 0x80) ? 0 : a_.u8[(b_.u8[i] & 0x0f) + (i & 0x30)]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_shuffle_epi8 #define _mm512_shuffle_epi8(a, b) simde_mm512_shuffle_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_shuffle_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_shuffle_epi8(src, k, a, b); #else return simde_mm512_mask_mov_epi8(src, k, simde_mm512_shuffle_epi8(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_shuffle_epi8 #define _mm512_mask_shuffle_epi8(src, k, a, b) simde_mm512_mask_shuffle_epi8(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_shuffle_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_shuffle_epi8(k, a, b); #else return simde_mm512_maskz_mov_epi8(k, simde_mm512_shuffle_epi8(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_shuffle_epi8 #define _mm512_maskz_shuffle_epi8(k, a, b) simde_mm512_maskz_shuffle_epi8(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_shuffle_i32x4 (simde__m256i a, simde__m256i b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); r_.m128i[0] = a_.m128i[ imm8 & 1]; r_.m128i[1] = b_.m128i[(imm8 >> 1) & 1]; return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) #define simde_mm256_shuffle_i32x4(a, b, imm8) _mm256_shuffle_i32x4(a, b, imm8) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_shuffle_i32x4 #define _mm256_shuffle_i32x4(a, b, imm8) simde_mm256_shuffle_i32x4(a, b, imm8) #endif #define simde_mm256_maskz_shuffle_i32x4(k, a, b, imm8) simde_mm256_maskz_mov_epi32(k, simde_mm256_shuffle_i32x4(a, b, imm8)) #define simde_mm256_mask_shuffle_i32x4(src, k, a, b, imm8) simde_mm256_mask_mov_epi32(src, k, simde_mm256_shuffle_i32x4(a, b, imm8)) #define simde_mm256_shuffle_f32x4(a, b, imm8) simde_mm256_castsi256_ps(simde_mm256_shuffle_i32x4(simde_mm256_castps_si256(a), simde_mm256_castps_si256(b), imm8)) #define simde_mm256_maskz_shuffle_f32x4(k, a, b, imm8) simde_mm256_maskz_mov_ps(k, simde_mm256_shuffle_f32x4(a, b, imm8)) #define simde_mm256_mask_shuffle_f32x4(src, k, a, b, imm8) simde_mm256_mask_mov_ps(src, k, simde_mm256_shuffle_f32x4(a, b, imm8)) #define simde_mm256_shuffle_i64x2(a, b, imm8) simde_mm256_shuffle_i32x4(a, b, imm8) #define simde_mm256_maskz_shuffle_i64x2(k, a, b, imm8) simde_mm256_maskz_mov_epi64(k, simde_mm256_shuffle_i64x2(a, b, imm8)) #define simde_mm256_mask_shuffle_i64x2(src, k, a, b, imm8) simde_mm256_mask_mov_epi64(src, k, simde_mm256_shuffle_i64x2(a, b, imm8)) #define simde_mm256_shuffle_f64x2(a, b, imm8) simde_mm256_castsi256_pd(simde_mm256_shuffle_i64x2(simde_mm256_castpd_si256(a), simde_mm256_castpd_si256(b), imm8)) #define simde_mm256_maskz_shuffle_f64x2(k, a, b, imm8) simde_mm256_maskz_mov_pd(k, simde_mm256_shuffle_f64x2(a, b, imm8)) #define simde_mm256_mask_shuffle_f64x2(src, k, a, b, imm8) simde_mm256_mask_mov_pd(src, k, simde_mm256_shuffle_f64x2(a, b, imm8)) SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_shuffle_i32x4 (simde__m512i a, simde__m512i b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); r_.m128i[0] = a_.m128i[ imm8 & 3]; r_.m128i[1] = a_.m128i[(imm8 >> 2) & 3]; r_.m128i[2] = b_.m128i[(imm8 >> 4) & 3]; r_.m128i[3] = b_.m128i[(imm8 >> 6) & 3]; return simde__m512i_from_private(r_); } #if defined(SIMDE_X86_AVX512F_NATIVE) #define simde_mm512_shuffle_i32x4(a, b, imm8) _mm512_shuffle_i32x4(a, b, imm8) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_shuffle_i32x4 #define _mm512_shuffle_i32x4(a, b, imm8) simde_mm512_shuffle_i32x4(a, b, imm8) #endif #define simde_mm512_maskz_shuffle_i32x4(k, a, b, imm8) simde_mm512_maskz_mov_epi32(k, simde_mm512_shuffle_i32x4(a, b, imm8)) #define simde_mm512_mask_shuffle_i32x4(src, k, a, b, imm8) simde_mm512_mask_mov_epi32(src, k, simde_mm512_shuffle_i32x4(a, b, imm8)) #define simde_mm512_shuffle_f32x4(a, b, imm8) simde_mm512_castsi512_ps(simde_mm512_shuffle_i32x4(simde_mm512_castps_si512(a), simde_mm512_castps_si512(b), imm8)) #define simde_mm512_maskz_shuffle_f32x4(k, a, b, imm8) simde_mm512_maskz_mov_ps(k, simde_mm512_shuffle_f32x4(a, b, imm8)) #define simde_mm512_mask_shuffle_f32x4(src, k, a, b, imm8) simde_mm512_mask_mov_ps(src, k, simde_mm512_shuffle_f32x4(a, b, imm8)) #define simde_mm512_shuffle_i64x2(a, b, imm8) simde_mm512_shuffle_i32x4(a, b, imm8) #define simde_mm512_maskz_shuffle_i64x2(k, a, b, imm8) simde_mm512_maskz_mov_epi64(k, simde_mm512_shuffle_i64x2(a, b, imm8)) #define simde_mm512_mask_shuffle_i64x2(src, k, a, b, imm8) simde_mm512_mask_mov_epi64(src, k, simde_mm512_shuffle_i64x2(a, b, imm8)) #define simde_mm512_shuffle_f64x2(a, b, imm8) simde_mm512_castsi512_pd(simde_mm512_shuffle_i64x2(simde_mm512_castpd_si512(a), simde_mm512_castpd_si512(b), imm8)) #define simde_mm512_maskz_shuffle_f64x2(k, a, b, imm8) simde_mm512_maskz_mov_pd(k, simde_mm512_shuffle_f64x2(a, b, imm8)) #define simde_mm512_mask_shuffle_f64x2(src, k, a, b, imm8) simde_mm512_mask_mov_pd(src, k, simde_mm512_shuffle_f64x2(a, b, imm8)) SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_SHUFFLE_H) */ simde-0.7.2/simde/x86/avx512/sll.h000066400000000000000000000177511400333146700163540ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_SLL_H) #define SIMDE_X86_AVX512_SLL_H #include "types.h" #include "../avx2.h" #include "mov.h" #include "setzero.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_sll_epi16 (simde__m512i a, simde__m128i count) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_sll_epi16(a, count); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_sll_epi16(a_.m256i[i], count); } #else simde__m128i_private count_ = simde__m128i_to_private(count); uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); if (shift > 15) return simde_mm512_setzero_si512(); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, shift); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (shift)); } #endif #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_sll_epi16 #define _mm512_sll_epi16(a, count) simde_mm512_sll_epi16(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_sll_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m128i count) { #if defined(SIMDE_X86_AVX51BW_NATIVE) return _mm512_mask_sll_epi16(src, k, a, count); #else return simde_mm512_mask_mov_epi16(src, k, simde_mm512_sll_epi16(a, count)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_sll_epi16 #define _mm512_mask_sll_epi16(src, k, a, count) simde_mm512_mask_sll_epi16(src, k, a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_sll_epi16 (simde__mmask32 k, simde__m512i a, simde__m128i count) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_sll_epi16(k, a, count); #else return simde_mm512_maskz_mov_epi16(k, simde_mm512_sll_epi16(a, count)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_sll_epi16 #define _mm512_maskz_sll_epi16(src, k, a, count) simde_mm512_maskz_sll_epi16(src, k, a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_sll_epi32 (simde__m512i a, simde__m128i count) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_sll_epi32(a, count); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_sll_epi32(a_.m256i[i], count); } #else simde__m128i_private count_ = simde__m128i_to_private(count); uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); if (shift > 31) return simde_mm512_setzero_si512(); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, shift); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] << (shift)); } #endif #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_sll_epi32 #define _mm512_sll_epi32(a, count) simde_mm512_sll_epi32(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_sll_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_sll_epi32(src, k, a, b); #else return simde_mm512_mask_mov_epi32(src, k, simde_mm512_sll_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_sll_epi32 #define _mm512_mask_sll_epi32(src, k, a, b) simde_mm512_mask_sll_epi32(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_sll_epi32(simde__mmask16 k, simde__m512i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_sll_epi32(k, a, b); #else return simde_mm512_maskz_mov_epi32(k, simde_mm512_sll_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_sll_epi32 #define _mm512_maskz_sll_epi32(k, a, b) simde_mm512_maskz_sll_epi32(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_sll_epi64 (simde__m512i a, simde__m128i count) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_sll_epi64(a, count); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_sll_epi64(a_.m256i[i], count); } #else simde__m128i_private count_ = simde__m128i_to_private(count); uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); if (shift > 63) return simde_mm512_setzero_si512(); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, shift); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] << (shift)); } #endif #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_sll_epi64 #define _mm512_sll_epi64(a, count) simde_mm512_sll_epi64(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_sll_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_sll_epi64(src, k, a, b); #else return simde_mm512_mask_mov_epi64(src, k, simde_mm512_sll_epi64(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_sll_epi64 #define _mm512_mask_sll_epi64(src, k, a, b) simde_mm512_mask_sll_epi64(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_sll_epi64(simde__mmask8 k, simde__m512i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_sll_epi64(k, a, b); #else return simde_mm512_maskz_mov_epi64(k, simde_mm512_sll_epi64(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_sll_epi64 #define _mm512_maskz_sll_epi64(k, a, b) simde_mm512_maskz_sll_epi64(k, a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_SLL_H) */ simde-0.7.2/simde/x86/avx512/slli.h000066400000000000000000000152451400333146700165210ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan * 2020 Christopher Moore */ #if !defined(SIMDE_X86_AVX512_SLLI_H) #define SIMDE_X86_AVX512_SLLI_H #include "types.h" #include "../avx2.h" #include "mov.h" #include "setzero.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_slli_epi16 (simde__m512i a, const unsigned int imm8) SIMDE_REQUIRE_RANGE(imm8, 0, 255) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && (defined(HEDLEY_GCC_VERSION) && ((__GNUC__ == 5 && __GNUC_MINOR__ == 5) || (__GNUC__ == 6 && __GNUC_MINOR__ >= 4))) simde__m512i r; SIMDE_CONSTIFY_16_(_mm512_slli_epi16, r, simde_mm512_setzero_si512(), imm8, a); return r; #elif defined(SIMDE_X86_AVX512BW_NATIVE) return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_slli_epi16(a, imm8)); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) if(imm8 < 16) r_.i16 = HEDLEY_STATIC_CAST(__typeof__(r_.i16), (a_.i16 << HEDLEY_STATIC_CAST(int16_t, imm8))); else return simde_mm512_setzero_si512(); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (imm8 < 16) ? HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (imm8 & 0xff)) : 0; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_slli_epi16 #define _mm512_slli_epi16(a, imm8) simde_mm512_slli_epi16(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_slli_epi32 (simde__m512i a, unsigned int imm8) { /* I guess the restriction was added in 6.4, back-ported to 5.5, then * removed (fixed) in 7? */ #if defined(SIMDE_X86_AVX512F_NATIVE) && (defined(HEDLEY_GCC_VERSION) && ((__GNUC__ == 5 && __GNUC_MINOR__ == 5) || (__GNUC__ == 6 && __GNUC_MINOR__ >= 4))) simde__m512i r; SIMDE_CONSTIFY_32_(_mm512_slli_epi32, r, simde_mm512_setzero_si512(), imm8, a); return r; #elif defined(SIMDE_X86_AVX512F_NATIVE) return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_slli_epi32(a, imm8)); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a); /* The Intel Intrinsics Guide says that only the 8 LSBits of imm8 are * used. In this case we should do "imm8 &= 0xff". However in * practice all bits are used. */ if (imm8 > 31) { simde_memset(&r_, 0, sizeof(r_)); } else { #if defined(SIMDE_X86_AVX2_NATIVE) r_.m256i[0] = simde_mm256_slli_epi32(a_.m256i[0], HEDLEY_STATIC_CAST(int, imm8)); r_.m256i[1] = simde_mm256_slli_epi32(a_.m256i[1], HEDLEY_STATIC_CAST(int, imm8)); #elif defined(SIMDE_X86_SSE2_NATIVE) r_.m128i[0] = simde_mm_slli_epi32(a_.m128i[0], HEDLEY_STATIC_CAST(int, imm8)); r_.m128i[1] = simde_mm_slli_epi32(a_.m128i[1], HEDLEY_STATIC_CAST(int, imm8)); r_.m128i[2] = simde_mm_slli_epi32(a_.m128i[2], HEDLEY_STATIC_CAST(int, imm8)); r_.m128i[3] = simde_mm_slli_epi32(a_.m128i[3], HEDLEY_STATIC_CAST(int, imm8)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u32 = a_.u32 << imm8; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i] << imm8; } #endif } return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_slli_epi32 #define _mm512_slli_epi32(a, imm8) simde_mm512_slli_epi32(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_slli_epi64 (simde__m512i a, unsigned int imm8) { #if defined(SIMDE_X86_AVX512F_NATIVE) && (defined(HEDLEY_GCC_VERSION) && ((__GNUC__ == 5 && __GNUC_MINOR__ == 5) || (__GNUC__ == 6 && __GNUC_MINOR__ >= 4))) simde__m512i r; SIMDE_CONSTIFY_64_(_mm512_slli_epi64, r, simde_mm512_setzero_si512(), imm8, a); return r; #elif defined(SIMDE_X86_AVX512F_NATIVE) return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_slli_epi64(a, imm8)); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a); /* The Intel Intrinsics Guide says that only the 8 LSBits of imm8 are * used. In this case we should do "imm8 &= 0xff". However in * practice all bits are used. */ if (imm8 > 63) { simde_memset(&r_, 0, sizeof(r_)); } else { #if defined(SIMDE_X86_AVX2_NATIVE) r_.m256i[0] = simde_mm256_slli_epi64(a_.m256i[0], HEDLEY_STATIC_CAST(int, imm8)); r_.m256i[1] = simde_mm256_slli_epi64(a_.m256i[1], HEDLEY_STATIC_CAST(int, imm8)); #elif defined(SIMDE_X86_SSE2_NATIVE) r_.m128i[0] = simde_mm_slli_epi64(a_.m128i[0], HEDLEY_STATIC_CAST(int, imm8)); r_.m128i[1] = simde_mm_slli_epi64(a_.m128i[1], HEDLEY_STATIC_CAST(int, imm8)); r_.m128i[2] = simde_mm_slli_epi64(a_.m128i[2], HEDLEY_STATIC_CAST(int, imm8)); r_.m128i[3] = simde_mm_slli_epi64(a_.m128i[3], HEDLEY_STATIC_CAST(int, imm8)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u64 = a_.u64 << imm8; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = a_.u64[i] << imm8; } #endif } return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_slli_epi64 #define _mm512_slli_epi64(a, imm8) simde_mm512_slli_epi64(a, imm8) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_SLLI_H) */ simde-0.7.2/simde/x86/avx512/sllv.h000066400000000000000000000044701400333146700165340ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_SLLV_H) #define SIMDE_X86_AVX512_SLLV_H #include "types.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_sllv_epi16 (simde__m512i a, simde__m512i b) { simde__m512i_private a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b), r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u16 = HEDLEY_STATIC_CAST(__typeof__(r_.u16), (b_.u16 < 16) & (a_.u16 << b_.u16)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = (b_.u16[i] < 16) ? HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << b_.u16[i])) : 0; } #endif return simde__m512i_from_private(r_); } #if defined(SIMDE_X86_AVX512BW_NATIVE) #define simde_mm512_sllv_epi16(a, b) _mm512_sllv_epi16(a, b) #endif #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_sllv_epi16 #define _mm512_sllv_epi16(a, b) simde_mm512_sllv_epi16(a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_SLLV_H) */ simde-0.7.2/simde/x86/avx512/sqrt.h000066400000000000000000000076111400333146700165450ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur */ #if !defined(SIMDE_X86_AVX512_SQRT_H) #define SIMDE_X86_AVX512_SQRT_H #include "types.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_sqrt_ps (simde__m512 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_sqrt_ps(a); #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if defined(SIMDE_X86_AVX_NATIVE) r_.m256[0] = simde_mm256_sqrt_ps(a_.m256[0]); r_.m256[1] = simde_mm256_sqrt_ps(a_.m256[1]); #elif defined(simde_math_sqrtf) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_sqrtf(a_.f32[i]); } #else HEDLEY_UNREACHABLE(); #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) # define _mm512_sqrt_ps(a) simde_mm512_sqrt_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_sqrt_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_sqrt_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_sqrt_ps(a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_sqrt_ps #define _mm512_mask_sqrt_ps(src, k, a) simde_mm512_mask_sqrt_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_sqrt_pd (simde__m512d a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_sqrt_pd(a); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if defined(SIMDE_X86_AVX_NATIVE) r_.m256d[0] = simde_mm256_sqrt_pd(a_.m256d[0]); r_.m256d[1] = simde_mm256_sqrt_pd(a_.m256d[1]); #elif defined(simde_math_sqrt) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_sqrt(a_.f64[i]); } #else HEDLEY_UNREACHABLE(); #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) # define _mm512_sqrt_pd(a) simde_mm512_sqrt_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_sqrt_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_sqrt_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_sqrt_pd(a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_sqrt_pd #define _mm512_mask_sqrt_pd(src, k, a) simde_mm512_mask_sqrt_pd(src, k, a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_SQRT_H) */ simde-0.7.2/simde/x86/avx512/sra.h000066400000000000000000000051741400333146700163430ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_SRA_H) #define SIMDE_X86_AVX512_SRA_H #include "types.h" #include "../avx2.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_sra_epi16 (simde__m512i a, simde__m128i count) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_sra_epi16(a, count); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_sra_epi16(a_.m256i[i], count); } #else simde__m128i_private count_ = simde__m128i_to_private(count); uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); if (shift > 15) shift = 15; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[i] >> shift; } #endif #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_sra_epi16 #define _mm512_sra_epi16(a, count) simde_mm512_sra_epi16(a, count) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_SRA_H) */ simde-0.7.2/simde/x86/avx512/srai.h000066400000000000000000000044441400333146700165130ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_SRAI_H) #define SIMDE_X86_AVX512_SRAI_H #include "types.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_srai_epi16 (simde__m512i a, const int imm8) { simde__m512i_private r_, a_ = simde__m512i_to_private(a); unsigned int shift = HEDLEY_STATIC_CAST(unsigned int, imm8); if (shift > 15) shift = 15; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[i] >> shift; } #endif return simde__m512i_from_private(r_); } #if defined(SIMDE_X86_AVX512BW_NATIVE) # define simde_mm512_srai_epi16(a, imm8) _mm512_srai_epi16(a, imm8) #endif #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_srai_epi16 #define _mm512_srai_epi16(a, imm8) simde_mm512_srai_epi16(a, imm8) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_SRAI_H) */ simde-0.7.2/simde/x86/avx512/srav.h000066400000000000000000000043371400333146700165310ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_SRAV_H) #define SIMDE_X86_AVX512_SRAV_H #include "types.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_srav_epi16 (simde__m512i a, simde__m512i count) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_srav_epi16(a, count); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), count_ = simde__m512i_to_private(count); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { uint32_t shift = HEDLEY_STATIC_CAST(uint32_t, count_.i16[i]); if (shift > 16) shift = 15; r_.i16[i] = a_.i16[i] >> shift; } return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_srav_epi16 #define _mm512_srav_epi16(a, count) simde_mm512_srav_epi16(a, count) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_SRAV_H) */ simde-0.7.2/simde/x86/avx512/srl.h000066400000000000000000000154201400333146700163510ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_SRL_H) #define SIMDE_X86_AVX512_SRL_H #include "types.h" #include "../avx2.h" #include "mov.h" #include "setzero.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_srl_epi16 (simde__m512i a, simde__m128i count) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_srl_epi16(a, count); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_srl_epi16(a_.m256i[i], count); } #else simde__m128i_private count_ = simde__m128i_to_private(count); if (HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]) > 15) return simde_mm512_setzero_si512(); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u16 = a_.u16 >> count_.i64[0]; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.u16[i] = a_.u16[i] >> count_.i64[0]; } #endif #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_srl_epi16 #define _mm512_srl_epi16(a, count) simde_mm512_srl_epi16(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_srl_epi32 (simde__m512i a, simde__m128i count) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_srl_epi32(a, count); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_srl_epi32(a_.m256i[i], count); } #else simde__m128i_private count_ = simde__m128i_to_private(count); if (HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]) > 31) return simde_mm512_setzero_si512(); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u32 = a_.u32 >> count_.i64[0]; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.u32[i] = a_.u32[i] >> count_.i64[0]; } #endif #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_srl_epi32 #define _mm512_srl_epi32(a, count) simde_mm512_srl_epi32(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_srl_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_srl_epi32(src, k, a, b); #else return simde_mm512_mask_mov_epi32(src, k, simde_mm512_srl_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_srl_epi32 #define _mm512_mask_srl_epi32(src, k, a, b) simde_mm512_mask_srl_epi32(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_srl_epi32(simde__mmask16 k, simde__m512i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_srl_epi32(k, a, b); #else return simde_mm512_maskz_mov_epi32(k, simde_mm512_srl_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_srl_epi32 #define _mm512_maskz_srl_epi32(k, a, b) simde_mm512_maskz_srl_epi32(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_srl_epi64 (simde__m512i a, simde__m128i count) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_srl_epi64(a, count); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_srl_epi64(a_.m256i[i], count); } #else simde__m128i_private count_ = simde__m128i_to_private(count); if (HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]) > 63) return simde_mm512_setzero_si512(); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u64 = a_.u64 >> count_.i64[0]; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.u64[i] = a_.u64[i] >> count_.i64[0]; } #endif #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_srl_epi64 #define _mm512_srl_epi64(a, count) simde_mm512_srl_epi64(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_srl_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_srl_epi64(src, k, a, b); #else return simde_mm512_mask_mov_epi64(src, k, simde_mm512_srl_epi64(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_srl_epi64 #define _mm512_mask_srl_epi64(src, k, a, b) simde_mm512_mask_srl_epi64(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_srl_epi64(simde__mmask8 k, simde__m512i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_srl_epi64(k, a, b); #else return simde_mm512_maskz_mov_epi64(k, simde_mm512_srl_epi64(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_srl_epi64 #define _mm512_maskz_srl_epi64(k, a, b) simde_mm512_maskz_srl_epi64(k, a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_SRL_H) */ simde-0.7.2/simde/x86/avx512/srli.h000066400000000000000000000147121400333146700165250ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_SRLI_H) #define SIMDE_X86_AVX512_SRLI_H #include "types.h" #include "../avx2.h" #include "mov.h" #include "setzero.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_srli_epi16 (simde__m512i a, const unsigned int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { #if defined(SIMDE_X86_AVX512BW_NATIVE) && (defined(HEDLEY_GCC_VERSION) && ((__GNUC__ == 5 && __GNUC_MINOR__ == 5) || (__GNUC__ == 6 && __GNUC_MINOR__ >= 4))) simde__m512i r; SIMDE_CONSTIFY_16_(_mm512_srli_epi16, r, simde_mm512_setzero_si512(), imm8, a); return r; #elif defined(SIMDE_X86_AVX512BW_NATIVE) return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_srli_epi16(a, imm8)); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a); if (HEDLEY_STATIC_CAST(unsigned int, imm8) > 15) return simde_mm512_setzero_si512(); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(int16_t, imm8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = a_.u16[i] >> imm8; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_NATIVE) #define simde_mm512_srli_epi16(a, imm8) _mm512_srli_epi16(a, imm8) #endif #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_srli_epi16 #define _mm512_srli_epi16(a, imm8) simde_mm512_srli_epi16(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_srli_epi32 (simde__m512i a, unsigned int imm8) { #if defined(SIMDE_X86_AVX512F_NATIVE) && (defined(HEDLEY_GCC_VERSION) && ((__GNUC__ == 5 && __GNUC_MINOR__ == 5) || (__GNUC__ == 6 && __GNUC_MINOR__ >= 4))) simde__m512i r; SIMDE_CONSTIFY_32_(_mm512_srli_epi32, r, simde_mm512_setzero_si512(), imm8, a); return r; #elif defined(SIMDE_X86_AVX512F_NATIVE) return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_srli_epi32(a, imm8)); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a); #if defined(SIMDE_X86_AVX2_NATIVE) r_.m256i[0] = simde_mm256_srli_epi32(a_.m256i[0], HEDLEY_STATIC_CAST(int, imm8)); r_.m256i[1] = simde_mm256_srli_epi32(a_.m256i[1], HEDLEY_STATIC_CAST(int, imm8)); #elif defined(SIMDE_X86_SSE2_NATIVE) r_.m128i[0] = simde_mm_srli_epi32(a_.m128i[0], HEDLEY_STATIC_CAST(int, imm8)); r_.m128i[1] = simde_mm_srli_epi32(a_.m128i[1], HEDLEY_STATIC_CAST(int, imm8)); r_.m128i[2] = simde_mm_srli_epi32(a_.m128i[2], HEDLEY_STATIC_CAST(int, imm8)); r_.m128i[3] = simde_mm_srli_epi32(a_.m128i[3], HEDLEY_STATIC_CAST(int, imm8)); #else if (imm8 > 31) { simde_memset(&r_, 0, sizeof(r_)); } else { #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u32 = a_.u32 >> imm8; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i] >> imm8; } #endif } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_srli_epi32 #define _mm512_srli_epi32(a, imm8) simde_mm512_srli_epi32(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_srli_epi64 (simde__m512i a, unsigned int imm8) { #if defined(SIMDE_X86_AVX512F_NATIVE) && (defined(HEDLEY_GCC_VERSION) && ((__GNUC__ == 5 && __GNUC_MINOR__ == 5) || (__GNUC__ == 6 && __GNUC_MINOR__ >= 4))) simde__m512i r; SIMDE_CONSTIFY_64_(_mm512_srli_epi64, r, simde_mm512_setzero_si512(), imm8, a); return r; #elif defined(SIMDE_X86_AVX512F_NATIVE) return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_srli_epi64(a, imm8)); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a); #if defined(SIMDE_X86_AVX2_NATIVE) r_.m256i[0] = simde_mm256_srli_epi64(a_.m256i[0], HEDLEY_STATIC_CAST(int, imm8)); r_.m256i[1] = simde_mm256_srli_epi64(a_.m256i[1], HEDLEY_STATIC_CAST(int, imm8)); #elif defined(SIMDE_X86_SSE2_NATIVE) r_.m128i[0] = simde_mm_srli_epi64(a_.m128i[0], HEDLEY_STATIC_CAST(int, imm8)); r_.m128i[1] = simde_mm_srli_epi64(a_.m128i[1], HEDLEY_STATIC_CAST(int, imm8)); r_.m128i[2] = simde_mm_srli_epi64(a_.m128i[2], HEDLEY_STATIC_CAST(int, imm8)); r_.m128i[3] = simde_mm_srli_epi64(a_.m128i[3], HEDLEY_STATIC_CAST(int, imm8)); #else /* The Intel Intrinsics Guide says that only the 8 LSBits of imm8 are * used. In this case we should do "imm8 &= 0xff" here. However in * practice all bits are used. */ if (imm8 > 63) { simde_memset(&r_, 0, sizeof(r_)); } else { #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u64 = a_.u64 >> imm8; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = a_.u64[i] >> imm8; } #endif } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_srli_epi64 #define _mm512_srli_epi64(a, imm8) simde_mm512_srli_epi64(a, imm8) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_SRLI_H) */ simde-0.7.2/simde/x86/avx512/srlv.h000066400000000000000000000223641400333146700165440ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_SRLV_H) #define SIMDE_X86_AVX512_SRLV_H #include "types.h" #include "../avx2.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_srlv_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX256VL_NATIVE) && defined(SIMDE_X86_AVX256BW_NATIVE) return _mm_srlv_epi16(a, b); #else simde__m128i_private a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b), r_; #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u16 = HEDLEY_STATIC_CAST(__typeof__(r_.u16), (b_.u16 < 16) & (a_.u16 >> b_.u16)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = (b_.u16[i] < 16) ? (a_.u16[i] >> b_.u16[i]) : 0; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm_srlv_epi16 #define _mm_srlv_epi16(a, b) simde_mm_srlv_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_srlv_epi16(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm_mask_srlv_epi16(src, k, a, b); #else return simde_mm_mask_mov_epi16(src, k, simde_mm_srlv_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_srlv_epi16 #define _mm_mask_srlv_epi16(src, k, a, b) simde_mm_mask_srlv_epi16(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskz_srlv_epi16(simde__mmask8 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) return _mm_maskz_srlv_epi16(k, a, b); #else return simde_mm_maskz_mov_epi16(k, simde_mm_srlv_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_srlv_epi16 #define _mm_maskz_srlv_epi16(k, a, b) simde_mm_maskz_srlv_epi16(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_srlv_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask_srlv_epi32(src, k, a, b); #else return simde_mm_mask_mov_epi32(src, k, simde_mm_srlv_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_srlv_epi32 #define _mm_mask_srlv_epi32(src, k, a, b) simde_mm_mask_srlv_epi32(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskz_srlv_epi32(simde__mmask8 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_maskz_srlv_epi32(k, a, b); #else return simde_mm_maskz_mov_epi32(k, simde_mm_srlv_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_srlv_epi32 #define _mm_maskz_srlv_epi32(k, a, b) simde_mm_maskz_srlv_epi32(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_srlv_epi64(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask_srlv_epi64(src, k, a, b); #else return simde_mm_mask_mov_epi64(src, k, simde_mm_srlv_epi64(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_srlv_epi64 #define _mm_mask_srlv_epi64(src, k, a, b) simde_mm_mask_srlv_epi64(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskz_srlv_epi64(simde__mmask8 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_maskz_srlv_epi64(k, a, b); #else return simde_mm_maskz_mov_epi64(k, simde_mm_srlv_epi64(a, b)); #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_srlv_epi64 #define _mm_maskz_srlv_epi64(k, a, b) simde_mm_maskz_srlv_epi64(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_srlv_epi16 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX256VL_NATIVE) && defined(SIMDE_X86_AVX256BW_NATIVE) return _mm256_srlv_epi16(a, b); #else simde__m256i_private a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b), r_; #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { r_.m128i[i] = simde_mm_srlv_epi16(a_.m128i[i], b_.m128i[i]); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u16 = HEDLEY_STATIC_CAST(__typeof__(r_.u16), (b_.u16 < 16) & (a_.u16 >> b_.u16)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = (b_.u16[i] < 16) ? (a_.u16[i] >> b_.u16[i]) : 0; } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm256_srlv_epi16 #define _mm256_srlv_epi16(a, b) simde_mm256_srlv_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_srlv_epi16 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_srlv_epi16(a, b); #else simde__m512i_private a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b), r_; #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_srlv_epi16(a_.m256i[i], b_.m256i[i]); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u16 = HEDLEY_STATIC_CAST(__typeof__(r_.u16), (b_.u16 < 16) & (a_.u16 >> b_.u16)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = (b_.u16[i] < 16) ? (a_.u16[i] >> b_.u16[i]) : 0; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_srlv_epi16 #define _mm512_srlv_epi16(a, b) simde_mm512_srlv_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_srlv_epi32 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_srlv_epi32(a, b); #else simde__m512i_private a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b), r_; #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_srlv_epi32(a_.m256i[i], b_.m256i[i]); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u32 = HEDLEY_STATIC_CAST(__typeof__(r_.u32), (b_.u32 < 32) & (a_.u32 >> b_.u32)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] >> b_.u32[i]) : 0; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_srlv_epi32 #define _mm512_srlv_epi32(a, b) simde_mm512_srlv_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_srlv_epi64 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_srlv_epi64(a, b); #else simde__m512i_private a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b), r_; #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_srlv_epi64(a_.m256i[i], b_.m256i[i]); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u64 = HEDLEY_STATIC_CAST(__typeof__(r_.u64), (b_.u64 < 64) & (a_.u64 >> b_.u64)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] >> b_.u64[i]) : 0; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_srlv_epi64 #define _mm512_srlv_epi64(a, b) simde_mm512_srlv_epi64(a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_SRLV_H) */ simde-0.7.2/simde/x86/avx512/store.h000066400000000000000000000066631400333146700167160ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_X86_AVX512_STORE_H) #define SIMDE_X86_AVX512_STORE_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES void simde_mm512_store_ps (void * mem_addr, simde__m512 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) _mm512_store_ps(mem_addr, a); #else simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512), &a, sizeof(a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_store_ps #define _mm512_store_ps(mem_addr, a) simde_mm512_store_ps(mem_addr, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm512_store_pd (void * mem_addr, simde__m512d a) { #if defined(SIMDE_X86_AVX512F_NATIVE) _mm512_store_pd(mem_addr, a); #else simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512d), &a, sizeof(a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_store_pd #define _mm512_store_pd(mem_addr, a) simde_mm512_store_pd(mem_addr, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm512_store_si512 (void * mem_addr, simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) _mm512_store_si512(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a); #else simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512i), &a, sizeof(a)); #endif } #define simde_mm512_store_epi8(mem_addr, a) simde_mm512_store_si512(mem_addr, a) #define simde_mm512_store_epi16(mem_addr, a) simde_mm512_store_si512(mem_addr, a) #define simde_mm512_store_epi32(mem_addr, a) simde_mm512_store_si512(mem_addr, a) #define simde_mm512_store_epi64(mem_addr, a) simde_mm512_store_si512(mem_addr, a) #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_store_epi8 #undef _mm512_store_epi16 #undef _mm512_store_epi32 #undef _mm512_store_epi64 #undef _mm512_store_si512 #define _mm512_store_si512(mem_addr, a) simde_mm512_store_si512(mem_addr, a) #define _mm512_store_epi8(mem_addr, a) simde_mm512_store_si512(mem_addr, a) #define _mm512_store_epi16(mem_addr, a) simde_mm512_store_si512(mem_addr, a) #define _mm512_store_epi32(mem_addr, a) simde_mm512_store_si512(mem_addr, a) #define _mm512_store_epi64(mem_addr, a) simde_mm512_store_si512(mem_addr, a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_STORE_H) */ simde-0.7.2/simde/x86/avx512/storeu.h000066400000000000000000000065451400333146700171020ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_X86_AVX512_STOREU_H) #define SIMDE_X86_AVX512_STOREU_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES void simde_mm512_storeu_ps (void * mem_addr, simde__m512 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) _mm512_storeu_ps(mem_addr, a); #else simde_memcpy(mem_addr, &a, sizeof(a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_storeu_ps #define _mm512_storeu_ps(mem_addr, a) simde_mm512_storeu_ps(mem_addr, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm512_storeu_pd (void * mem_addr, simde__m512d a) { #if defined(SIMDE_X86_AVX512F_NATIVE) _mm512_storeu_pd(mem_addr, a); #else simde_memcpy(mem_addr, &a, sizeof(a)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_storeu_pd #define _mm512_storeu_pd(mem_addr, a) simde_mm512_storeu_pd(mem_addr, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm512_storeu_si512 (void * mem_addr, simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) _mm512_storeu_si512(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a); #else simde_memcpy(mem_addr, &a, sizeof(a)); #endif } #define simde_mm512_storeu_epi8(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) #define simde_mm512_storeu_epi16(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) #define simde_mm512_storeu_epi32(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) #define simde_mm512_storeu_epi64(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_storeu_epi8 #undef _mm512_storeu_epi16 #undef _mm512_storeu_epi32 #undef _mm512_storeu_epi64 #undef _mm512_storeu_si512 #define _mm512_storeu_si512(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) #define _mm512_storeu_epi8(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) #define _mm512_storeu_epi16(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) #define _mm512_storeu_epi32(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) #define _mm512_storeu_epi64(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_STOREU_H) */ simde-0.7.2/simde/x86/avx512/sub.h000066400000000000000000000252101400333146700163400ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_SUB_H) #define SIMDE_X86_AVX512_SUB_H #include "types.h" #include "../avx2.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_sub_epi8 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_sub_epi8(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = a_.i8 - b_.i8; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_sub_epi8(a_.m256i[i], b_.m256i[i]); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_sub_epi8 #define _mm512_sub_epi8(a, b) simde_mm512_sub_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_sub_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_sub_epi8(src, k, a, b); #else return simde_mm512_mask_mov_epi8(src, k, simde_mm512_sub_epi8(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_sub_epi8 #define _mm512_mask_sub_epi8(src, k, a, b) simde_mm512_mask_sub_epi8(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_sub_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_sub_epi8(k, a, b); #else return simde_mm512_maskz_mov_epi8(k, simde_mm512_sub_epi8(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_sub_epi8 #define _mm512_maskz_sub_epi8(k, a, b) simde_mm512_maskz_sub_epi8(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_sub_epi16 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_sub_epi16(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = a_.i16 - b_.i16; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_sub_epi16(a_.m256i[i], b_.m256i[i]); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_sub_epi16 #define _mm512_sub_epi16(a, b) simde_mm512_sub_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_sub_epi32 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_sub_epi32(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = a_.i32 - b_.i32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_sub_epi32(a_.m256i[i], b_.m256i[i]); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_sub_epi32 #define _mm512_sub_epi32(a, b) simde_mm512_sub_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_sub_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_sub_epi32(src, k, a, b); #else return simde_mm512_mask_mov_epi32(src, k, simde_mm512_sub_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_sub_epi32 #define _mm512_mask_sub_epi32(src, k, a, b) simde_mm512_mask_sub_epi32(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_sub_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_sub_epi32(k, a, b); #else return simde_mm512_maskz_mov_epi32(k, simde_mm512_sub_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_sub_epi32 #define _mm512_maskz_sub_epi32(k, a, b) simde_mm512_maskz_sub_epi32(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_sub_epi64 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_sub_epi64(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 - b_.i64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_sub_epi64(a_.m256i[i], b_.m256i[i]); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_sub_epi64 #define _mm512_sub_epi64(a, b) simde_mm512_sub_epi64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_sub_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_sub_epi64(src, k, a, b); #else return simde_mm512_mask_mov_epi64(src, k, simde_mm512_sub_epi64(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_sub_epi64 #define _mm512_mask_sub_epi64(src, k, a, b) simde_mm512_mask_sub_epi64(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_sub_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_sub_epi64(k, a, b); #else return simde_mm512_maskz_mov_epi64(k, simde_mm512_sub_epi64(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_sub_epi64 #define _mm512_maskz_sub_epi64(k, a, b) simde_mm512_maskz_sub_epi64(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_sub_ps (simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_sub_ps(a, b); #else simde__m512_private r_, a_ = simde__m512_to_private(a), b_ = simde__m512_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f32 = a_.f32 - b_.f32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_sub_ps(a_.m256[i], b_.m256[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_sub_ps #define _mm512_sub_ps(a, b) simde_mm512_sub_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_sub_ps (simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_sub_ps(src, k, a, b); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_sub_ps(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_sub_ps #define _mm512_mask_sub_ps(src, k, a, b) simde_mm512_mask_sub_ps(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_maskz_sub_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_sub_ps(k, a, b); #else return simde_mm512_maskz_mov_ps(k, simde_mm512_sub_ps(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_sub_ps #define _mm512_maskz_sub_ps(k, a, b) simde_mm512_maskz_sub_ps(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_sub_pd (simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_sub_pd(a, b); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a), b_ = simde__m512d_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f64 = a_.f64 - b_.f64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_sub_pd(a_.m256d[i], b_.m256d[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_sub_pd #define _mm512_sub_pd(a, b) simde_mm512_sub_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_sub_pd (simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_sub_pd(src, k, a, b); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_sub_pd(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_sub_pd #define _mm512_mask_sub_pd(src, k, a, b) simde_mm512_mask_sub_pd(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_maskz_sub_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_sub_pd(k, a, b); #else return simde_mm512_maskz_mov_pd(k, simde_mm512_sub_pd(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_sub_pd #define _mm512_maskz_sub_pd(k, a, b) simde_mm512_maskz_sub_pd(k, a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_SUB_H) */ simde-0.7.2/simde/x86/avx512/subs.h000066400000000000000000000157031400333146700165310ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_SUBS_H) #define SIMDE_X86_AVX512_SUBS_H #include "types.h" #include "../avx2.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_subs_epi8 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_subs_epi8(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if !defined(HEDLEY_INTEL_VERSION) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_subs_epi8(a_.m256i[i], b_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_subs_epi8 #define _mm512_subs_epi8(a, b) simde_mm512_subs_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_subs_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_subs_epi8(src, k, a, b); #else return simde_mm512_mask_mov_epi8(src, k, simde_mm512_subs_epi8(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_subs_epi8 #define _mm512_mask_subs_epi8(src, k, a, b) simde_mm512_mask_subs_epi8(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_subs_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_subs_epi8(k, a, b); #else return simde_mm512_maskz_mov_epi8(k, simde_mm512_subs_epi8(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_subs_epi8 #define _mm512_maskz_subs_epi8(k, a, b) simde_mm512_maskz_subs_epi8(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_subs_epi16 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_subs_epi16(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if !defined(HEDLEY_INTEL_VERSION) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_subs_epi16(a_.m256i[i], b_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_subs_epi16 #define _mm512_subs_epi16(a, b) simde_mm512_subs_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_subs_epu8 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_subs_epu8(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if !defined(HEDLEY_INTEL_VERSION) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { r_.m128i[i] = simde_mm_subs_epu8(a_.m128i[i], b_.m128i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_subs_epu8 #define _mm512_subs_epu8(a, b) simde_mm512_subs_epu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_subs_epu8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_subs_epu8(src, k, a, b); #else return simde_mm512_mask_mov_epi8(src, k, simde_mm512_subs_epu8(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_subs_epu8 #define _mm512_mask_subs_epu8(src, k, a, b) simde_mm512_mask_subs_epu8(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_subs_epu8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_subs_epu8(k, a, b); #else return simde_mm512_maskz_mov_epi8(k, simde_mm512_subs_epu8(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_subs_epu8 #define _mm512_maskz_subs_epu8(k, a, b) simde_mm512_maskz_subs_epu8(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_subs_epu16 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_subs_epu16(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if !defined(HEDLEY_INTEL_VERSION) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_subs_epu16(a_.m256i[i], b_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_subs_epu16 #define _mm512_subs_epu16(a, b) simde_mm512_subs_epu16(a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_SUBS_H) */ simde-0.7.2/simde/x86/avx512/test.h000066400000000000000000000142311400333146700165270ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan * 2020 Christopher Moore */ #if !defined(SIMDE_X86_AVX512_TEST_H) #define SIMDE_X86_AVX512_TEST_H #include "types.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__mmask32 simde_mm512_test_epi16_mask (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_test_epi16_mask(a, b); #else simde__m512i_private a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); simde__mmask32 r = 0; SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { r |= HEDLEY_STATIC_CAST(simde__mmask32, !!(a_.i16[i] & b_.i16[i]) << i); } return r; #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_test_epi16_mask #define _mm512_test_epi16_mask(a, b) simde_mm512_test_epi16_mask(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask16 simde_mm512_test_epi32_mask (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_test_epi32_mask(a, b); #else simde__m512i_private a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); simde__mmask16 r = 0; SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { r |= HEDLEY_STATIC_CAST(simde__mmask16, !!(a_.i32[i] & b_.i32[i]) << i); } return r; #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_test_epi32_mask #define _mm512_test_epi32_mask(a, b) simde_mm512_test_epi32_mask(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask8 simde_mm512_test_epi64_mask (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_test_epi64_mask(a, b); #else simde__m512i_private a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); simde__mmask8 r = 0; SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { r |= HEDLEY_STATIC_CAST(simde__mmask8, !!(a_.i64[i] & b_.i64[i]) << i); } return r; #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_test_epi64_mask #define _mm512_test_epi64_mask(a, b) simde_mm512_test_epi64_mask(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask64 simde_mm512_test_epi8_mask (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_test_epi8_mask(a, b); #else simde__m512i_private a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); simde__mmask64 r = 0; SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { r |= HEDLEY_STATIC_CAST(simde__mmask64, HEDLEY_STATIC_CAST(uint64_t, !!(a_.i8[i] & b_.i8[i])) << i); } return r; #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_test_epi8_mask #define _mm512_test_epi8_mask(a, b) simde_mm512_test_epi8_mask(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask32 simde_mm512_mask_test_epi16_mask (simde__mmask32 k1, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_test_epi16_mask(k1, a, b); #else return simde_mm512_test_epi16_mask(a, b) & k1; #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_test_epi16_mask #define _mm512_mask_test_epi16_mask(k1, a, b) simde_mm512_mask_test_epi16_mask(k1, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask16 simde_mm512_mask_test_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_test_epi32_mask(k1, a, b); #else return simde_mm512_test_epi32_mask(a, b) & k1; #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_test_epi32_mask #define _mm512_mask_test_epi32_mask(k1, a, b) simde_mm512_mask_test_epi32_mask(k1, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask8 simde_mm512_mask_test_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_test_epi64_mask(k1, a, b); #else return simde_mm512_test_epi64_mask(a, b) & k1; #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_test_epi64_mask #define _mm512_mask_test_epi64_mask(k1, a, b) simde_mm512_mask_test_epi64_mask(k1, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask64 simde_mm512_mask_test_epi8_mask (simde__mmask64 k1, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_test_epi8_mask(k1, a, b); #else return simde_mm512_test_epi8_mask(a, b) & k1; #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_test_epi8_mask #define _mm512_mask_test_epi8_mask(k1, a, b) simde_mm512_mask_test_epi8_mask(k1, a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_TEST_H) */ simde-0.7.2/simde/x86/avx512/types.h000066400000000000000000000426211400333146700167200ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_X86_AVX512_TYPES_H) #define SIMDE_X86_AVX512_TYPES_H #include "../avx.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ /* The problem is that Microsoft doesn't support 64-byte aligned parameters, except for * __m512/__m512i/__m512d. Since our private union has an __m512 member it will be 64-byte * aligned even if we reduce the alignment requirements of other members. * * Even if we're on x86 and use the native AVX-512 types for arguments/return values, the * to/from private functions will break, and I'm not willing to change their APIs to use * pointers (which would also require more verbose code on the caller side) just to make * MSVC happy. * * If you want to use AVX-512 in SIMDe, you'll need to either upgrade to MSVC 2017 or later, * or upgrade to a different compiler (clang-cl, perhaps?). If you have an idea of how to * fix this without requiring API changes (except transparently through macros), patches * are welcome. */ # if defined(HEDLEY_MSVC_VERSION) && !HEDLEY_MSVC_VERSION_CHECK(19,10,0) # if defined(SIMDE_X86_AVX512F_NATIVE) # undef SIMDE_X86_AVX512F_NATIVE # pragma message("Native AVX-512 support requires MSVC 2017 or later. See comment above (in code) for details.") # endif # define SIMDE_AVX512_ALIGN SIMDE_ALIGN_TO_32 # else # define SIMDE_AVX512_ALIGN SIMDE_ALIGN_TO_64 # endif typedef union { #if defined(SIMDE_VECTOR_SUBSCRIPT) SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; #if defined(SIMDE_HAVE_INT128_) SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; #endif SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; #else SIMDE_AVX512_ALIGN int8_t i8[64]; SIMDE_AVX512_ALIGN int16_t i16[32]; SIMDE_AVX512_ALIGN int32_t i32[16]; SIMDE_AVX512_ALIGN int64_t i64[8]; SIMDE_AVX512_ALIGN uint8_t u8[64]; SIMDE_AVX512_ALIGN uint16_t u16[32]; SIMDE_AVX512_ALIGN uint32_t u32[16]; SIMDE_AVX512_ALIGN uint64_t u64[8]; SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; #if defined(SIMDE_HAVE_INT128_) SIMDE_AVX512_ALIGN simde_int128 i128[4]; SIMDE_AVX512_ALIGN simde_uint128 u128[4]; #endif SIMDE_AVX512_ALIGN simde_float32 f32[16]; SIMDE_AVX512_ALIGN simde_float64 f64[8]; #endif SIMDE_AVX512_ALIGN simde__m128_private m128_private[4]; SIMDE_AVX512_ALIGN simde__m128 m128[4]; SIMDE_AVX512_ALIGN simde__m256_private m256_private[2]; SIMDE_AVX512_ALIGN simde__m256 m256[2]; #if defined(SIMDE_X86_AVX512F_NATIVE) SIMDE_AVX512_ALIGN __m512 n; #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; #endif #endif } simde__m512_private; typedef union { #if defined(SIMDE_VECTOR_SUBSCRIPT) SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; #if defined(SIMDE_HAVE_INT128_) SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; #endif SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; #else SIMDE_AVX512_ALIGN int8_t i8[64]; SIMDE_AVX512_ALIGN int16_t i16[32]; SIMDE_AVX512_ALIGN int32_t i32[16]; SIMDE_AVX512_ALIGN int64_t i64[8]; SIMDE_AVX512_ALIGN uint8_t u8[64]; SIMDE_AVX512_ALIGN uint16_t u16[32]; SIMDE_AVX512_ALIGN uint32_t u32[16]; SIMDE_AVX512_ALIGN uint64_t u64[8]; #if defined(SIMDE_HAVE_INT128_) SIMDE_AVX512_ALIGN simde_int128 i128[4]; SIMDE_AVX512_ALIGN simde_uint128 u128[4]; #endif SIMDE_AVX512_ALIGN simde_float32 f32[16]; SIMDE_AVX512_ALIGN simde_float64 f64[8]; SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; #endif SIMDE_AVX512_ALIGN simde__m128d_private m128d_private[4]; SIMDE_AVX512_ALIGN simde__m128d m128d[4]; SIMDE_AVX512_ALIGN simde__m256d_private m256d_private[2]; SIMDE_AVX512_ALIGN simde__m256d m256d[2]; #if defined(SIMDE_X86_AVX512F_NATIVE) SIMDE_AVX512_ALIGN __m512d n; #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; #endif #endif } simde__m512d_private; typedef union { #if defined(SIMDE_VECTOR_SUBSCRIPT) SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; #if defined(SIMDE_HAVE_INT128_) SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; #endif SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; #else SIMDE_AVX512_ALIGN int8_t i8[64]; SIMDE_AVX512_ALIGN int16_t i16[32]; SIMDE_AVX512_ALIGN int32_t i32[16]; SIMDE_AVX512_ALIGN int64_t i64[8]; SIMDE_AVX512_ALIGN uint8_t u8[64]; SIMDE_AVX512_ALIGN uint16_t u16[32]; SIMDE_AVX512_ALIGN uint32_t u32[16]; SIMDE_AVX512_ALIGN uint64_t u64[8]; SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; #if defined(SIMDE_HAVE_INT128_) SIMDE_AVX512_ALIGN simde_int128 i128[4]; SIMDE_AVX512_ALIGN simde_uint128 u128[4]; #endif SIMDE_AVX512_ALIGN simde_float32 f32[16]; SIMDE_AVX512_ALIGN simde_float64 f64[8]; #endif SIMDE_AVX512_ALIGN simde__m128i_private m128i_private[4]; SIMDE_AVX512_ALIGN simde__m128i m128i[4]; SIMDE_AVX512_ALIGN simde__m256i_private m256i_private[2]; SIMDE_AVX512_ALIGN simde__m256i m256i[2]; #if defined(SIMDE_X86_AVX512F_NATIVE) SIMDE_AVX512_ALIGN __m512i n; #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; #endif #endif } simde__m512i_private; /* Intel uses the same header (immintrin.h) for everything AVX and * later. If native aliases are enabled, and the machine has native * support for AVX imintrin.h will already have been included, which * means simde__m512* will already have been defined. So, even * if the machine doesn't support AVX512F we need to use the native * type; it has already been defined. * * However, we also can't just assume that including immintrin.h does * actually define these. It could be a compiler which supports AVX * but not AVX512F, such as GCC < 4.9 or VS < 2017. That's why we * check to see if _MM_CMPINT_GE is defined; it's part of AVX512F, * so we assume that if it's present AVX-512F has already been * declared. * * Note that the choice of _MM_CMPINT_GE is deliberate; while GCC * uses the preprocessor to define all the _MM_CMPINT_* members, * in most compilers they are simply normal enum members. However, * all compilers I've looked at use an object-like macro for * _MM_CMPINT_GE, which is defined to _MM_CMPINT_NLT. _MM_CMPINT_NLT * is included in case a compiler does the reverse, though I haven't * run into one which does. * * As for the ICC check, unlike other compilers, merely using the * AVX-512 types causes ICC to generate AVX-512 instructions. */ #if (defined(_MM_CMPINT_GE) || defined(_MM_CMPINT_NLT)) && (defined(SIMDE_X86_AVX512F_NATIVE) || !defined(HEDLEY_INTEL_VERSION)) typedef __m512 simde__m512; typedef __m512i simde__m512i; typedef __m512d simde__m512d; typedef __mmask8 simde__mmask8; typedef __mmask16 simde__mmask16; #else #if defined(SIMDE_VECTOR_SUBSCRIPT) typedef simde_float32 simde__m512 SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; typedef int_fast32_t simde__m512i SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; typedef simde_float64 simde__m512d SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; #else typedef simde__m512_private simde__m512; typedef simde__m512i_private simde__m512i; typedef simde__m512d_private simde__m512d; #endif typedef uint8_t simde__mmask8; typedef uint16_t simde__mmask16; #endif /* These are really part of AVX-512VL / AVX-512BW (in GCC __mmask32 is * in avx512vlintrin.h and __mmask64 is in avx512bwintrin.h, in clang * both are in avx512bwintrin.h), not AVX-512F. However, we don't have * a good (not-compiler-specific) way to detect if these headers have * been included. In compilers which support AVX-512F but not * AVX-512BW/VL (e.g., GCC 4.9) we need typedefs since __mmask{32,64) * won't exist. * * AFAICT __mmask{32,64} are always just typedefs to uint{32,64}_t * in all compilers, so it's safe to use these instead of typedefs to * __mmask{16,32}. If you run into a problem with this please file an * issue and we'll try to figure out a work-around. */ typedef uint32_t simde__mmask32; typedef uint64_t simde__mmask64; #if !defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) #if !defined(HEDLEY_INTEL_VERSION) typedef simde__m512 __m512; typedef simde__m512i __m512i; typedef simde__m512d __m512d; #else #define __m512 simde__m512 #define __m512i simde__m512i #define __m512d simde__m512d #endif #endif HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512), "simde__m512 size incorrect"); HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512_private), "simde__m512_private size incorrect"); HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512i), "simde__m512i size incorrect"); HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512i_private), "simde__m512i_private size incorrect"); HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512d), "simde__m512d size incorrect"); HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512d_private), "simde__m512d_private size incorrect"); #if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512) == 32, "simde__m512 is not 32-byte aligned"); HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512_private) == 32, "simde__m512_private is not 32-byte aligned"); HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512i) == 32, "simde__m512i is not 32-byte aligned"); HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512i_private) == 32, "simde__m512i_private is not 32-byte aligned"); HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512d) == 32, "simde__m512d is not 32-byte aligned"); HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512d_private) == 32, "simde__m512d_private is not 32-byte aligned"); #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde__m512_from_private(simde__m512_private v) { simde__m512 r; simde_memcpy(&r, &v, sizeof(r)); return r; } SIMDE_FUNCTION_ATTRIBUTES simde__m512_private simde__m512_to_private(simde__m512 v) { simde__m512_private r; simde_memcpy(&r, &v, sizeof(r)); return r; } SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde__m512i_from_private(simde__m512i_private v) { simde__m512i r; simde_memcpy(&r, &v, sizeof(r)); return r; } SIMDE_FUNCTION_ATTRIBUTES simde__m512i_private simde__m512i_to_private(simde__m512i v) { simde__m512i_private r; simde_memcpy(&r, &v, sizeof(r)); return r; } SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde__m512d_from_private(simde__m512d_private v) { simde__m512d r; simde_memcpy(&r, &v, sizeof(r)); return r; } SIMDE_FUNCTION_ATTRIBUTES simde__m512d_private simde__m512d_to_private(simde__m512d v) { simde__m512d_private r; simde_memcpy(&r, &v, sizeof(r)); return r; } SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_TYPES_H) */ simde-0.7.2/simde/x86/avx512/unpackhi.h000066400000000000000000000321221400333146700173510ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_UNPACKHI_H) #define SIMDE_X86_AVX512_UNPACKHI_H #include "types.h" #include "../avx2.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_unpackhi_epi8 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_unpackhi_epi8(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 64, a_.i8, b_.i8, 8, 72, 9, 73, 10, 74, 11, 75, 12, 76, 13, 77, 14, 78, 15, 79, 24, 88, 25, 89, 26, 90, 27, 91, 28, 92, 29, 93, 30, 94, 31, 95, 40, 104, 41, 105, 42, 106, 43, 107, 44, 108, 45, 109, 46, 110, 47, 111, 56, 120, 57, 121, 58, 122, 59, 123, 60, 124, 61, 125, 62, 126, 63, 127); #else r_.m256i[0] = simde_mm256_unpackhi_epi8(a_.m256i[0], b_.m256i[0]); r_.m256i[1] = simde_mm256_unpackhi_epi8(a_.m256i[1], b_.m256i[1]); #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_unpackhi_epi8 #define _mm512_unpackhi_epi8(a, b) simde_mm512_unpackhi_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_unpackhi_epi8(simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_unpackhi_epi8(src, k, a, b); #else return simde_mm512_mask_mov_epi8(src, k, simde_mm512_unpackhi_epi8(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_unpackhi_epi8 #define _mm512_mask_unpackhi_epi8(src, k, a, b) simde_mm512_mask_unpackhi_epi8(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_unpackhi_epi8(simde__mmask64 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_unpackhi_epi8(k, a, b); #else return simde_mm512_maskz_mov_epi8(k, simde_mm512_unpackhi_epi8(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_unpackhi_epi8 #define _mm512_maskz_unpackhi_epi8(k, a, b) simde_mm512_maskz_unpackhi_epi8(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_unpackhi_epi16 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_unpackhi_epi16(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.i16 =SIMDE_SHUFFLE_VECTOR_(16, 64, a_.i16, b_.i16, 4, 36, 5, 37, 6, 38, 7, 39, 12, 44, 13, 45, 14, 46, 15, 47, 20, 52, 21, 53, 22, 54, 23, 55, 28, 60, 29, 61, 30, 62, 31, 63); #else r_.m256i[0] = simde_mm256_unpackhi_epi16(a_.m256i[0], b_.m256i[0]); r_.m256i[1] = simde_mm256_unpackhi_epi16(a_.m256i[1], b_.m256i[1]); #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_unpackhi_epi16 #define _mm512_unpackhi_epi16(a, b) simde_mm512_unpackhi_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_unpackhi_epi16(simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_mask_unpackhi_epi16(src, k, a, b); #else return simde_mm512_mask_mov_epi16(src, k, simde_mm512_unpackhi_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_unpackhi_epi16 #define _mm512_mask_unpackhi_epi16(src, k, a, b) simde_mm512_mask_unpackhi_epi16(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_unpackhi_epi16(simde__mmask32 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_maskz_unpackhi_epi16(k, a, b); #else return simde_mm512_maskz_mov_epi16(k, simde_mm512_unpackhi_epi16(a, b)); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_unpackhi_epi16 #define _mm512_maskz_unpackhi_epi16(k, a, b) simde_mm512_maskz_unpackhi_epi16(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_unpackhi_epi32 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_unpackhi_epi32(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 64, a_.i32, b_.i32, 2, 18, 3 , 19, 6, 22, 7, 23, 10, 26, 11, 27, 14, 30, 15, 31); #else r_.m256i[0] = simde_mm256_unpackhi_epi32(a_.m256i[0], b_.m256i[0]); r_.m256i[1] = simde_mm256_unpackhi_epi32(a_.m256i[1], b_.m256i[1]); #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_unpackhi_epi32 #define _mm512_unpackhi_epi32(a, b) simde_mm512_unpackhi_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_unpackhi_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_unpackhi_epi32(src, k, a, b); #else return simde_mm512_mask_mov_epi32(src, k, simde_mm512_unpackhi_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_unpackhi_epi32 #define _mm512_mask_unpackhi_epi32(src, k, a, b) simde_mm512_mask_unpackhi_epi32(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_unpackhi_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_unpackhi_epi32(k, a, b); #else return simde_mm512_maskz_mov_epi32(k, simde_mm512_unpackhi_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_unpackhi_epi32 #define _mm512_maskz_unpackhi_epi32(k, a, b) simde_mm512_maskz_unpackhi_epi32(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_unpackhi_epi64 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_unpackhi_epi64(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 64, a_.i64, b_.i64, 1, 9, 3, 11, 5, 13, 7, 15); #else r_.m256i[0] = simde_mm256_unpackhi_epi64(a_.m256i[0], b_.m256i[0]); r_.m256i[1] = simde_mm256_unpackhi_epi64(a_.m256i[1], b_.m256i[1]); #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_unpackhi_epi64 #define _mm512_unpackhi_epi64(a, b) simde_mm512_unpackhi_epi64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_unpackhi_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_unpackhi_epi64(src, k, a, b); #else return simde_mm512_mask_mov_epi64(src, k, simde_mm512_unpackhi_epi64(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_unpackhi_epi64 #define _mm512_mask_unpackhi_epi64(src, k, a, b) simde_mm512_mask_unpackhi_epi64(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_unpackhi_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_unpackhi_epi64(k, a, b); #else return simde_mm512_maskz_mov_epi64(k, simde_mm512_unpackhi_epi64(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_unpackhi_epi64 #define _mm512_maskz_unpackhi_epi64(k, a, b) simde_mm512_maskz_unpackhi_epi64(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_unpackhi_ps (simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_unpackhi_ps(a, b); #else simde__m512_private r_, a_ = simde__m512_to_private(a), b_ = simde__m512_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 64, a_.f32, b_.f32, 2, 18, 3 , 19, 6, 22, 7, 23, 10, 26, 11, 27, 14, 30, 15, 31); #else r_.m256[0] = simde_mm256_unpackhi_ps(a_.m256[0], b_.m256[0]); r_.m256[1] = simde_mm256_unpackhi_ps(a_.m256[1], b_.m256[1]); #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_unpackhi_ps #define _mm512_unpackhi_ps(a, b) simde_mm512_unpackhi_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_unpackhi_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_unpackhi_ps(src, k, a, b); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_unpackhi_ps(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_unpackhi_ps #define _mm512_mask_unpackhi_ps(src, k, a, b) simde_mm512_mask_unpackhi_ps(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_maskz_unpackhi_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_unpackhi_ps(k, a, b); #else return simde_mm512_maskz_mov_ps(k, simde_mm512_unpackhi_ps(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_unpackhi_ps #define _mm512_maskz_unpackhi_ps(k, a, b) simde_mm512_maskz_unpackhi_ps(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_unpackhi_pd (simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_unpackhi_pd(a, b); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a), b_ = simde__m512d_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 64, a_.f64, b_.f64, 1, 9, 3, 11, 5, 13, 7, 15); #else r_.m256d[0] = simde_mm256_unpackhi_pd(a_.m256d[0], b_.m256d[0]); r_.m256d[1] = simde_mm256_unpackhi_pd(a_.m256d[1], b_.m256d[1]); #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_unpackhi_pd #define _mm512_unpackhi_pd(a, b) simde_mm512_unpackhi_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_unpackhi_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_unpackhi_pd(src, k, a, b); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_unpackhi_pd(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_unpackhi_pd #define _mm512_mask_unpackhi_pd(src, k, a, b) simde_mm512_mask_unpackhi_pd(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_maskz_unpackhi_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_unpackhi_pd(k, a, b); #else return simde_mm512_maskz_mov_pd(k, simde_mm512_unpackhi_pd(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_unpackhi_pd #define _mm512_maskz_unpackhi_pd(k, a, b) simde_mm512_maskz_unpackhi_pd(k, a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_UNPACKHI_H) */ simde-0.7.2/simde/x86/avx512/unpacklo.h000066400000000000000000000074561400333146700173770ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_UNPACKLO_H) #define SIMDE_X86_AVX512_UNPACKLO_H #include "types.h" #include "../avx2.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_unpacklo_epi8 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_unpacklo_epi8(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 64, a_.i8, b_.i8, 0, 64, 1, 65, 2, 66, 3, 67, 4, 68, 5, 69, 6, 70, 7, 71, 16, 80, 17, 81, 18, 82, 19, 83, 20, 84, 21, 85, 22, 86, 23, 87, 32, 96, 33, 97, 34, 98, 35, 99, 36, 100, 37, 101, 38, 102, 39, 103, 48, 112, 49, 113, 50, 114, 51, 115, 52, 116, 53, 117, 54, 118, 55, 119); #else r_.m256i[0] = simde_mm256_unpacklo_epi8(a_.m256i[0], b_.m256i[0]); r_.m256i[1] = simde_mm256_unpacklo_epi8(a_.m256i[1], b_.m256i[1]); #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_unpacklo_epi8 #define _mm512_unpacklo_epi8(a, b) simde_mm512_unpacklo_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_unpacklo_epi16 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_unpacklo_epi16(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_SHUFFLE_VECTOR_) r_.i16 =SIMDE_SHUFFLE_VECTOR_(16, 64, a_.i16, b_.i16, 0, 32, 1, 33, 2, 34, 3, 35, 8, 40, 9, 41, 10, 42, 11, 43, 16, 48, 17, 49, 18, 50, 19, 51, 24, 56, 25, 57, 26, 58, 27, 59); #else r_.m256i[0] = simde_mm256_unpacklo_epi16(a_.m256i[0], b_.m256i[0]); r_.m256i[1] = simde_mm256_unpacklo_epi16(a_.m256i[1], b_.m256i[1]); #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_unpacklo_epi16 #define _mm512_unpacklo_epi16(a, b) simde_mm512_unpacklo_epi16(a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_UNPACKLO_H) */ simde-0.7.2/simde/x86/avx512/xor.h000066400000000000000000000207261400333146700163660ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_AVX512_XOR_H) #define SIMDE_X86_AVX512_XOR_H #include "types.h" #include "../avx2.h" #include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_xor_ps (simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm512_xor_ps(a, b); #else simde__m512_private r_, a_ = simde__m512_to_private(a), b_ = simde__m512_to_private(b); /* TODO: generate reduced case to give to Intel */ #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) && !defined(HEDLEY_INTEL_VERSION) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_xor_ps(a_.m256[i], b_.m256[i]); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) r_.i32f = a_.i32f ^ b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_xor_ps #define _mm512_xor_ps(a, b) simde_mm512_xor_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_xor_pd (simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_AVX512DQ_NATIVE) return _mm512_xor_pd(a, b); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a), b_ = simde__m512d_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) r_.m256d[0] = simde_mm256_xor_pd(a_.m256d[0], b_.m256d[0]); r_.m256d[1] = simde_mm256_xor_pd(a_.m256d[1], b_.m256d[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) r_.i32f = a_.i32f ^ b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_xor_pd #define _mm512_xor_pd(a, b) simde_mm512_xor_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_xor_epi32 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_xor_epi32(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_xor_si256(a_.m256i[i], b_.m256i[i]); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = a_.i32 ^ b_.i32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] ^ b_.i32[i]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_xor_epi32 #define _mm512_xor_epi32(a, b) simde_mm512_xor_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_xor_epi32(simde__m512i src, simde__mmask16 k, simde__m512i v2, simde__m512i v3) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_xor_epi32(src, k, v2, v3); #else return simde_mm512_mask_mov_epi32(src, k, simde_mm512_xor_epi32(v2, v3)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_xor_epi32 #define _mm512_mask_xor_epi32(src, k, v2, v3) simde_mm512_mask_xor_epi32(src, k, v2, v3) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_xor_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_xor_epi32(k, a, b); #else return simde_mm512_maskz_mov_epi32(k, simde_mm512_xor_epi32(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_xor_epi32 #define _mm512_maskz_xor_epi32(k, a, b) simde_mm512_maskz_xor_epi32(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_xor_epi64 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_xor_epi64(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_xor_si256(a_.m256i[i], b_.m256i[i]); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) r_.i32f = a_.i32f ^ b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_xor_epi64 #define _mm512_xor_epi64(a, b) simde_mm512_xor_epi64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_xor_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_xor_epi64(src, k, a, b); #else return simde_mm512_mask_mov_epi64(src, k, simde_mm512_xor_epi64(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_xor_epi64 #define _mm512_mask_xor_epi64(src, k, a, b) simde_mm512_mask_xor_epi64(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_xor_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_xor_epi64(k, a, b); #else return simde_mm512_maskz_mov_epi64(k, simde_mm512_xor_epi64(a, b)); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_xor_epi64 #define _mm512_maskz_xor_epi64(k, a, b) simde_mm512_maskz_xor_epi64(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_xor_si512 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_xor_si512(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_X86_AVX2_NATIVE) r_.m256i[0] = simde_mm256_xor_si256(a_.m256i[0], b_.m256i[0]); r_.m256i[1] = simde_mm256_xor_si256(a_.m256i[1], b_.m256i[1]); #elif defined(SIMDE_X86_SSE2_NATIVE) r_.m128i[0] = simde_mm_xor_si128(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_xor_si128(a_.m128i[1], b_.m128i[1]); r_.m128i[2] = simde_mm_xor_si128(a_.m128i[2], b_.m128i[2]); r_.m128i[3] = simde_mm_xor_si128(a_.m128i[3], b_.m128i[3]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f ^ b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; } #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_xor_si512 #define _mm512_xor_si512(a, b) simde_mm512_xor_si512(a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_XOR_H) */ simde-0.7.2/simde/x86/avx512/xorsign.h000066400000000000000000000045721400333146700172500ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur */ /* This is a SIMDe extension which is not part of AVX-512. It exists * because a lot of numerical methods in SIMDe have algoriths which do * something like: * * float sgn = input < 0 ? -1 : 1; * ... * return res * sgn; * * Which can be replaced with a much more efficient call to xorsign: * * return simde_x_mm512_xorsign_ps(res, input); * * While this was originally intended for use in SIMDe, please feel * free to use it in your code. */ #if !defined(SIMDE_X86_AVX512_XORSIGN_H) #define SIMDE_X86_AVX512_XORSIGN_H #include "types.h" #include "mov.h" #include "and.h" #include "xor.h" #include "set1.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_x_mm512_xorsign_ps(simde__m512 dest, simde__m512 src) { return simde_mm512_xor_ps(simde_mm512_and_ps(simde_mm512_set1_ps(-0.0f), src), dest); } SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_x_mm512_xorsign_pd(simde__m512d dest, simde__m512d src) { return simde_mm512_xor_pd(simde_mm512_and_pd(simde_mm512_set1_pd(-0.0), src), dest); } SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_AVX512_XORSIGN_H) */ simde-0.7.2/simde/x86/clmul.h000066400000000000000000000364501400333146700156450ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2016 Thomas Pornin */ /* The portable version is based on the implementation in BearSSL, * which is MIT licensed, constant-time / branch-free, and documented * at https://www.bearssl.org/constanttime.html (specifically, we use * the implementation from ghash_ctmul64.c). */ #if !defined(SIMDE_X86_CLMUL_H) #define SIMDE_X86_CLMUL_H #include "avx512/set.h" #include "avx512/setzero.h" #if !defined(SIMDE_X86_PCLMUL_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) # define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES #endif HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES uint64_t simde_x_clmul_u64(uint64_t x, uint64_t y) { uint64_t x0, x1, x2, x3; uint64_t y0, y1, y2, y3; uint64_t z0, z1, z2, z3; x0 = x & UINT64_C(0x1111111111111111); x1 = x & UINT64_C(0x2222222222222222); x2 = x & UINT64_C(0x4444444444444444); x3 = x & UINT64_C(0x8888888888888888); y0 = y & UINT64_C(0x1111111111111111); y1 = y & UINT64_C(0x2222222222222222); y2 = y & UINT64_C(0x4444444444444444); y3 = y & UINT64_C(0x8888888888888888); z0 = (x0 * y0) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1); z1 = (x0 * y1) ^ (x1 * y0) ^ (x2 * y3) ^ (x3 * y2); z2 = (x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y3); z3 = (x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0); z0 &= UINT64_C(0x1111111111111111); z1 &= UINT64_C(0x2222222222222222); z2 &= UINT64_C(0x4444444444444444); z3 &= UINT64_C(0x8888888888888888); return z0 | z1 | z2 | z3; } static uint64_t simde_x_bitreverse_u64(uint64_t v) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint8x8_t bytes = vreinterpret_u8_u64(vmov_n_u64(v)); bytes = vrbit_u8(bytes); bytes = vrev64_u8(bytes); return vget_lane_u64(vreinterpret_u64_u8(bytes), 0); #elif defined(SIMDE_X86_GFNI_NATIVE) /* I don't think there is (or likely will ever be) a CPU with GFNI * but not pclmulq, but this may be useful for things other than * _mm_clmulepi64_si128. */ __m128i vec = _mm_cvtsi64_si128(HEDLEY_STATIC_CAST(int64_t, v)); /* Reverse bits within each byte */ vec = _mm_gf2p8affine_epi64_epi8(vec, _mm_cvtsi64_si128(HEDLEY_STATIC_CAST(int64_t, UINT64_C(0x8040201008040201))), 0); /* Reverse bytes */ #if defined(SIMDE_X86_SSSE3_NATIVE) vec = _mm_shuffle_epi8(vec, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7)); #else vec = _mm_or_si128(_mm_slli_epi16(vec, 8), _mm_srli_epi16(vec, 8)); vec = _mm_shufflelo_epi16(vec, _MM_SHUFFLE(0, 1, 2, 3)); vec = _mm_shufflehi_epi16(vec, _MM_SHUFFLE(0, 1, 2, 3)); #endif return HEDLEY_STATIC_CAST(uint64_t, _mm_cvtsi128_si64(vec)); #elif HEDLEY_HAS_BUILTIN(__builtin_bitreverse64) return __builtin_bitreverse64(v); #else v = ((v >> 1) & UINT64_C(0x5555555555555555)) | ((v & UINT64_C(0x5555555555555555)) << 1); v = ((v >> 2) & UINT64_C(0x3333333333333333)) | ((v & UINT64_C(0x3333333333333333)) << 2); v = ((v >> 4) & UINT64_C(0x0F0F0F0F0F0F0F0F)) | ((v & UINT64_C(0x0F0F0F0F0F0F0F0F)) << 4); v = ((v >> 8) & UINT64_C(0x00FF00FF00FF00FF)) | ((v & UINT64_C(0x00FF00FF00FF00FF)) << 8); v = ((v >> 16) & UINT64_C(0x0000FFFF0000FFFF)) | ((v & UINT64_C(0x0000FFFF0000FFFF)) << 16); return (v >> 32) | (v << 32); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_clmulepi64_si128 (simde__m128i a, simde__m128i b, const int imm8) SIMDE_REQUIRE_CONSTANT(imm8) { simde__m128i_private a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b), r_; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_AES) uint64x1_t A = ((imm8) & 0x01) ? vget_high_u64(a_.neon_u64) : vget_low_u64(a_.neon_u64); uint64x1_t B = ((imm8) & 0x10) ? vget_high_u64(b_.neon_u64) : vget_low_u64(b_.neon_u64); #if defined(SIMDE_BUG_CLANG_48257) HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ #endif poly64_t A_ = vget_lane_p64(vreinterpret_p64_u64(A), 0); poly64_t B_ = vget_lane_p64(vreinterpret_p64_u64(B), 0); #if defined(SIMDE_BUG_CLANG_48257) HEDLEY_DIAGNOSTIC_POP #endif poly128_t R = vmull_p64(A_, B_); r_.neon_u64 = vreinterpretq_u64_p128(R); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) #if defined(SIMDE_SHUFFLE_VECTOR_) switch (imm8 & 0x11) { case 0x00: b_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, b_.u64, b_.u64, 0, 0); a_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.u64, a_.u64, 0, 0); break; case 0x01: b_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, b_.u64, b_.u64, 0, 0); a_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.u64, a_.u64, 1, 1); break; case 0x10: b_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, b_.u64, b_.u64, 1, 1); a_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.u64, a_.u64, 0, 0); break; case 0x11: b_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, b_.u64, b_.u64, 1, 1); a_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.u64, a_.u64, 1, 1); break; } #else { const uint64_t A = a_.u64[(imm8 ) & 1]; const uint64_t B = b_.u64[(imm8 >> 4) & 1]; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { a_.u64[i] = A; b_.u64[i] = B; } } #endif simde__m128i_private reversed_; { #if defined(SIMDE_SHUFFLE_VECTOR_) reversed_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.u64, b_.u64, 1, 3); #else reversed_.u64[0] = a_.u64[1]; reversed_.u64[1] = b_.u64[1]; #endif SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(reversed_.u64) / sizeof(reversed_.u64[0])) ; i++) { reversed_.u64[i] = simde_x_bitreverse_u64(reversed_.u64[i]); } } #if defined(SIMDE_SHUFFLE_VECTOR_) a_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.u64, reversed_.u64, 0, 2); b_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, b_.u64, reversed_.u64, 1, 3); #else a_.u64[1] = reversed_.u64[0]; b_.u64[1] = reversed_.u64[1]; #endif SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(reversed_.u64) / sizeof(reversed_.u64[0])) ; i++) { r_.u64[i] = simde_x_clmul_u64(a_.u64[i], b_.u64[i]); } r_.u64[1] = simde_x_bitreverse_u64(r_.u64[1]) >> 1; #else r_.u64[0] = simde_x_clmul_u64( a_.u64[imm8 & 1], b_.u64[(imm8 >> 4) & 1]); r_.u64[1] = simde_x_bitreverse_u64(simde_x_clmul_u64(simde_x_bitreverse_u64(a_.u64[imm8 & 1]), simde_x_bitreverse_u64(b_.u64[(imm8 >> 4) & 1]))) >> 1; #endif return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_PCLMUL_NATIVE) #define simde_mm_clmulepi64_si128(a, b, imm8) _mm_clmulepi64_si128(a, b, imm8) #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_AES) #define simde_mm_clmulepi64_si128(a, b, imm8) \ simde__m128i_from_neon_u64( \ vreinterpretq_u64_p128( \ vmull_p64( \ vgetq_lane_p64(vreinterpretq_p64_u64(simde__m128i_to_neon_u64(a)), (imm8 ) & 1), \ vgetq_lane_p64(vreinterpretq_p64_u64(simde__m128i_to_neon_u64(b)), (imm8 >> 4) & 1) \ ) \ ) \ ) #endif #if defined(SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES) #undef _mm_clmulepi64_si128 #define _mm_clmulepi64_si128(a, b, imm8) simde_mm_clmulepi64_si128(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_clmulepi64_epi128 (simde__m256i a, simde__m256i b, const int imm8) SIMDE_REQUIRE_CONSTANT(imm8) { simde__m256i_private a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b), r_; #if defined(SIMDE_X86_PCLMUL_NATIVE) switch (imm8 & 0x11) { case 0x00: r_.m128i[0] = _mm_clmulepi64_si128(a_.m128i[0], b_.m128i[0], 0x00); r_.m128i[1] = _mm_clmulepi64_si128(a_.m128i[1], b_.m128i[1], 0x00); break; case 0x01: r_.m128i[0] = _mm_clmulepi64_si128(a_.m128i[0], b_.m128i[0], 0x01); r_.m128i[1] = _mm_clmulepi64_si128(a_.m128i[1], b_.m128i[1], 0x01); break; case 0x10: r_.m128i[0] = _mm_clmulepi64_si128(a_.m128i[0], b_.m128i[0], 0x10); r_.m128i[1] = _mm_clmulepi64_si128(a_.m128i[1], b_.m128i[1], 0x10); break; case 0x11: r_.m128i[0] = _mm_clmulepi64_si128(a_.m128i[0], b_.m128i[0], 0x11); r_.m128i[1] = _mm_clmulepi64_si128(a_.m128i[1], b_.m128i[1], 0x11); break; } #else simde__m128i_private a_lo_, b_lo_, r_lo_, a_hi_, b_hi_, r_hi_; #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && !defined(HEDLEY_IBM_VERSION) switch (imm8 & 0x01) { case 0x00: a_lo_.u64 = __builtin_shufflevector(a_.u64, a_.u64, 0, 2); break; case 0x01: a_lo_.u64 = __builtin_shufflevector(a_.u64, a_.u64, 1, 3); break; } switch (imm8 & 0x10) { case 0x00: b_lo_.u64 = __builtin_shufflevector(b_.u64, b_.u64, 0, 2); break; case 0x10: b_lo_.u64 = __builtin_shufflevector(b_.u64, b_.u64, 1, 3); break; } #else a_lo_.u64[0] = a_.u64[((imm8 >> 0) & 1) + 0]; a_lo_.u64[1] = a_.u64[((imm8 >> 0) & 1) + 2]; b_lo_.u64[0] = b_.u64[((imm8 >> 4) & 1) + 0]; b_lo_.u64[1] = b_.u64[((imm8 >> 4) & 1) + 2]; #endif SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_hi_.u64) / sizeof(r_hi_.u64[0])) ; i++) { a_hi_.u64[i] = simde_x_bitreverse_u64(a_lo_.u64[i]); b_hi_.u64[i] = simde_x_bitreverse_u64(b_lo_.u64[i]); r_lo_.u64[i] = simde_x_clmul_u64(a_lo_.u64[i], b_lo_.u64[i]); r_hi_.u64[i] = simde_x_clmul_u64(a_hi_.u64[i], b_hi_.u64[i]); r_hi_.u64[i] = simde_x_bitreverse_u64(r_hi_.u64[i]) >> 1; } #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && !defined(HEDLEY_IBM_VERSION) r_.u64 = __builtin_shufflevector(r_lo_.u64, r_hi_.u64, 0, 2, 1, 3); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_ = simde__m256i_to_private(simde_mm256_set_m128i(simde__m128i_from_private(r_hi_), simde__m128i_from_private(r_lo_))); r_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 32, r_.u64, r_.u64, 0, 2, 1, 3); #else r_.u64[0] = r_lo_.u64[0]; r_.u64[1] = r_hi_.u64[0]; r_.u64[2] = r_lo_.u64[1]; r_.u64[3] = r_hi_.u64[1]; #endif #endif return simde__m256i_from_private(r_); } #if defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) #define simde_mm256_clmulepi64_epi128(a, b, imm8) _mm256_clmulepi64_epi128(a, b, imm8) #endif #if defined(SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES) #undef _mm256_clmulepi64_epi128 #define _mm256_clmulepi64_epi128(a, b, imm8) simde_mm256_clmulepi64_epi128(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_clmulepi64_epi128 (simde__m512i a, simde__m512i b, const int imm8) SIMDE_REQUIRE_CONSTANT(imm8) { simde__m512i_private a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b), r_; #if defined(HEDLEY_MSVC_VERSION) r_ = simde__m512i_to_private(simde_mm512_setzero_si512()); #endif #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) switch (imm8 & 0x11) { case 0x00: r_.m256i[0] = simde_mm256_clmulepi64_epi128(a_.m256i[0], b_.m256i[0], 0x00); r_.m256i[1] = simde_mm256_clmulepi64_epi128(a_.m256i[1], b_.m256i[1], 0x00); break; case 0x01: r_.m256i[0] = simde_mm256_clmulepi64_epi128(a_.m256i[0], b_.m256i[0], 0x01); r_.m256i[1] = simde_mm256_clmulepi64_epi128(a_.m256i[1], b_.m256i[1], 0x01); break; case 0x10: r_.m256i[0] = simde_mm256_clmulepi64_epi128(a_.m256i[0], b_.m256i[0], 0x10); r_.m256i[1] = simde_mm256_clmulepi64_epi128(a_.m256i[1], b_.m256i[1], 0x10); break; case 0x11: r_.m256i[0] = simde_mm256_clmulepi64_epi128(a_.m256i[0], b_.m256i[0], 0x11); r_.m256i[1] = simde_mm256_clmulepi64_epi128(a_.m256i[1], b_.m256i[1], 0x11); break; } #else simde__m256i_private a_lo_, b_lo_, r_lo_, a_hi_, b_hi_, r_hi_; #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && !defined(HEDLEY_IBM_VERSION) switch (imm8 & 0x01) { case 0x00: a_lo_.u64 = __builtin_shufflevector(a_.u64, a_.u64, 0, 2, 4, 6); break; case 0x01: a_lo_.u64 = __builtin_shufflevector(a_.u64, a_.u64, 1, 3, 5, 7); break; } switch (imm8 & 0x10) { case 0x00: b_lo_.u64 = __builtin_shufflevector(b_.u64, b_.u64, 0, 2, 4, 6); break; case 0x10: b_lo_.u64 = __builtin_shufflevector(b_.u64, b_.u64, 1, 3, 5, 7); break; } #else a_lo_.u64[0] = a_.u64[((imm8 >> 0) & 1) + 0]; a_lo_.u64[1] = a_.u64[((imm8 >> 0) & 1) + 2]; a_lo_.u64[2] = a_.u64[((imm8 >> 0) & 1) + 4]; a_lo_.u64[3] = a_.u64[((imm8 >> 0) & 1) + 6]; b_lo_.u64[0] = b_.u64[((imm8 >> 4) & 1) + 0]; b_lo_.u64[1] = b_.u64[((imm8 >> 4) & 1) + 2]; b_lo_.u64[2] = b_.u64[((imm8 >> 4) & 1) + 4]; b_lo_.u64[3] = b_.u64[((imm8 >> 4) & 1) + 6]; #endif SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_hi_.u64) / sizeof(r_hi_.u64[0])) ; i++) { a_hi_.u64[i] = simde_x_bitreverse_u64(a_lo_.u64[i]); b_hi_.u64[i] = simde_x_bitreverse_u64(b_lo_.u64[i]); r_lo_.u64[i] = simde_x_clmul_u64(a_lo_.u64[i], b_lo_.u64[i]); r_hi_.u64[i] = simde_x_clmul_u64(a_hi_.u64[i], b_hi_.u64[i]); r_hi_.u64[i] = simde_x_bitreverse_u64(r_hi_.u64[i]) >> 1; } #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && !defined(HEDLEY_IBM_VERSION) r_.u64 = __builtin_shufflevector(r_lo_.u64, r_hi_.u64, 0, 4, 1, 5, 2, 6, 3, 7); #else r_.u64[0] = r_lo_.u64[0]; r_.u64[1] = r_hi_.u64[0]; r_.u64[2] = r_lo_.u64[1]; r_.u64[3] = r_hi_.u64[1]; r_.u64[4] = r_lo_.u64[2]; r_.u64[5] = r_hi_.u64[2]; r_.u64[6] = r_lo_.u64[3]; r_.u64[7] = r_hi_.u64[3]; #endif #endif return simde__m512i_from_private(r_); } #if defined(SIMDE_X86_VPCLMULQDQ_NATIVE) #define simde_mm512_clmulepi64_epi128(a, b, imm8) _mm512_clmulepi64_epi128(a, b, imm8) #endif #if defined(SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES) #undef _mm512_clmulepi64_epi128 #define _mm512_clmulepi64_epi128(a, b, imm8) simde_mm512_clmulepi64_epi128(a, b, imm8) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_CLMUL_H) */ simde-0.7.2/simde/x86/fma.h000066400000000000000000000515311400333146700152710ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2019 Evan Nemerson */ #if !defined(SIMDE_X86_FMA_H) #define SIMDE_X86_FMA_H #include "avx.h" #if !defined(SIMDE_X86_FMA_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) # define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES #endif HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_fmadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm_fmadd_pd(a, b, c); #else simde__m128d_private a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b), c_ = simde__m128d_to_private(c), r_; #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f64 = vec_madd(a_.altivec_f64, b_.altivec_f64, c_.altivec_f64); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vmlaq_f64(c_.neon_f64, b_.neon_f64, a_.neon_f64); #elif defined(simde_math_fma) && (defined(__FP_FAST_FMA) || defined(FP_FAST_FMA)) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_fma(a_.f64[i], b_.f64[i], c_.f64[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = (a_.f64[i] * b_.f64[i]) + c_.f64[i]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm_fmadd_pd #define _mm_fmadd_pd(a, b, c) simde_mm_fmadd_pd(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_fmadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm256_fmadd_pd(a, b, c); #else return simde_mm256_add_pd(simde_mm256_mul_pd(a, b), c); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm256_fmadd_pd #define _mm256_fmadd_pd(a, b, c) simde_mm256_fmadd_pd(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_fmadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm_fmadd_ps(a, b, c); #else simde__m128_private a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b), c_ = simde__m128_to_private(c), r_; #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_fmaf(a_.f32[i], b_.f32[i], c_.f32[i]); } #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f32 = vec_madd(a_.altivec_f32, b_.altivec_f32, c_.altivec_f32); #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) r_.neon_f32 = vmlaq_f32(c_.neon_f32, b_.neon_f32, a_.neon_f32); #elif defined(simde_math_fmaf) && (defined(__FP_FAST_FMAF) || defined(FP_FAST_FMAF)) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_fmaf(a_.f32[i], b_.f32[i], c_.f32[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = (a_.f32[i] * b_.f32[i]) + c_.f32[i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm_fmadd_ps #define _mm_fmadd_ps(a, b, c) simde_mm_fmadd_ps(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_fmadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm256_fmadd_ps(a, b, c); #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) simde__m256_private a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b), c_ = simde__m256_to_private(c), r_; for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_fmadd_ps(a_.m128[i], b_.m128[i], c_.m128[i]); } return simde__m256_from_private(r_); #else return simde_mm256_add_ps(simde_mm256_mul_ps(a, b), c); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm256_fmadd_ps #define _mm256_fmadd_ps(a, b, c) simde_mm256_fmadd_ps(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_fmadd_sd (simde__m128d a, simde__m128d b, simde__m128d c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm_fmadd_sd(a, b, c); #else return simde_mm_add_sd(simde_mm_mul_sd(a, b), c); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm_fmadd_sd #define _mm_fmadd_sd(a, b, c) simde_mm_fmadd_sd(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_fmadd_ss (simde__m128 a, simde__m128 b, simde__m128 c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm_fmadd_ss(a, b, c); #else return simde_mm_add_ss(simde_mm_mul_ss(a, b), c); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm_fmadd_ss #define _mm_fmadd_ss(a, b, c) simde_mm_fmadd_ss(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_fmaddsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm_fmaddsub_pd(a, b, c); #else return simde_mm_addsub_pd(simde_mm_mul_pd(a, b), c); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm_fmaddsub_pd #define _mm_fmaddsub_pd(a, b, c) simde_mm_fmaddsub_pd(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_fmaddsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm256_fmaddsub_pd(a, b, c); #else return simde_mm256_addsub_pd(simde_mm256_mul_pd(a, b), c); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm256_fmaddsub_pd #define _mm256_fmaddsub_pd(a, b, c) simde_mm256_fmaddsub_pd(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_fmaddsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm_fmaddsub_ps(a, b, c); #else return simde_mm_addsub_ps(simde_mm_mul_ps(a, b), c); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm_fmaddsub_ps #define _mm_fmaddsub_ps(a, b, c) simde_mm_fmaddsub_ps(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_fmaddsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm256_fmaddsub_ps(a, b, c); #else return simde_mm256_addsub_ps(simde_mm256_mul_ps(a, b), c); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm256_fmaddsub_ps #define _mm256_fmaddsub_ps(a, b, c) simde_mm256_fmaddsub_ps(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_fmsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm_fmsub_pd(a, b, c); #else return simde_mm_sub_pd(simde_mm_mul_pd(a, b), c); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm_fmsub_pd #define _mm_fmsub_pd(a, b, c) simde_mm_fmsub_pd(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_fmsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm256_fmsub_pd(a, b, c); #else return simde_mm256_sub_pd(simde_mm256_mul_pd(a, b), c); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm256_fmsub_pd #define _mm256_fmsub_pd(a, b, c) simde_mm256_fmsub_pd(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_fmsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm_fmsub_ps(a, b, c); #else return simde_mm_sub_ps(simde_mm_mul_ps(a, b), c); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm_fmsub_ps #define _mm_fmsub_ps(a, b, c) simde_mm_fmsub_ps(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_fmsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm256_fmsub_ps(a, b, c); #else return simde_mm256_sub_ps(simde_mm256_mul_ps(a, b), c); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm256_fmsub_ps #define _mm256_fmsub_ps(a, b, c) simde_mm256_fmsub_ps(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_fmsub_sd (simde__m128d a, simde__m128d b, simde__m128d c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm_fmsub_sd(a, b, c); #else return simde_mm_sub_sd(simde_mm_mul_sd(a, b), c); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm_fmsub_sd #define _mm_fmsub_sd(a, b, c) simde_mm_fmsub_sd(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_fmsub_ss (simde__m128 a, simde__m128 b, simde__m128 c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm_fmsub_ss(a, b, c); #else return simde_mm_sub_ss(simde_mm_mul_ss(a, b), c); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm_fmsub_ss #define _mm_fmsub_ss(a, b, c) simde_mm_fmsub_ss(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_fmsubadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm_fmsubadd_pd(a, b, c); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b), c_ = simde__m128d_to_private(c); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { r_.f64[ i ] = (a_.f64[ i ] * b_.f64[ i ]) + c_.f64[ i ]; r_.f64[i + 1] = (a_.f64[i + 1] * b_.f64[i + 1]) - c_.f64[i + 1]; } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm_fmsubadd_pd #define _mm_fmsubadd_pd(a, b, c) simde_mm_fmsubadd_pd(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_fmsubadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm256_fmsubadd_pd(a, b, c); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b), c_ = simde__m256d_to_private(c); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { r_.f64[ i ] = (a_.f64[ i ] * b_.f64[ i ]) + c_.f64[ i ]; r_.f64[i + 1] = (a_.f64[i + 1] * b_.f64[i + 1]) - c_.f64[i + 1]; } return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm256_fmsubadd_pd #define _mm256_fmsubadd_pd(a, b, c) simde_mm256_fmsubadd_pd(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_fmsubadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm_fmsubadd_ps(a, b, c); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b), c_ = simde__m128_to_private(c); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { r_.f32[ i ] = (a_.f32[ i ] * b_.f32[ i ]) + c_.f32[ i ]; r_.f32[i + 1] = (a_.f32[i + 1] * b_.f32[i + 1]) - c_.f32[i + 1]; } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm_fmsubadd_ps #define _mm_fmsubadd_ps(a, b, c) simde_mm_fmsubadd_ps(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_fmsubadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm256_fmsubadd_ps(a, b, c); #else simde__m256_private r_, a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b), c_ = simde__m256_to_private(c); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { r_.f32[ i ] = (a_.f32[ i ] * b_.f32[ i ]) + c_.f32[ i ]; r_.f32[i + 1] = (a_.f32[i + 1] * b_.f32[i + 1]) - c_.f32[i + 1]; } return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm256_fmsubadd_ps #define _mm256_fmsubadd_ps(a, b, c) simde_mm256_fmsubadd_ps(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_fnmadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm_fnmadd_pd(a, b, c); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b), c_ = simde__m128d_to_private(c); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = -(a_.f64[i] * b_.f64[i]) + c_.f64[i]; } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm_fnmadd_pd #define _mm_fnmadd_pd(a, b, c) simde_mm_fnmadd_pd(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_fnmadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm256_fnmadd_pd(a, b, c); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b), c_ = simde__m256d_to_private(c); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = -(a_.f64[i] * b_.f64[i]) + c_.f64[i]; } return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm256_fnmadd_pd #define _mm256_fnmadd_pd(a, b, c) simde_mm256_fnmadd_pd(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_fnmadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm_fnmadd_ps(a, b, c); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b), c_ = simde__m128_to_private(c); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = -(a_.f32[i] * b_.f32[i]) + c_.f32[i]; } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm_fnmadd_ps #define _mm_fnmadd_ps(a, b, c) simde_mm_fnmadd_ps(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_fnmadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm256_fnmadd_ps(a, b, c); #else simde__m256_private r_, a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b), c_ = simde__m256_to_private(c); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = -(a_.f32[i] * b_.f32[i]) + c_.f32[i]; } return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm256_fnmadd_ps #define _mm256_fnmadd_ps(a, b, c) simde_mm256_fnmadd_ps(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_fnmadd_sd (simde__m128d a, simde__m128d b, simde__m128d c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm_fnmadd_sd(a, b, c); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b), c_ = simde__m128d_to_private(c); r_ = a_; r_.f64[0] = -(a_.f64[0] * b_.f64[0]) + c_.f64[0]; return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm_fnmadd_sd #define _mm_fnmadd_sd(a, b, c) simde_mm_fnmadd_sd(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_fnmadd_ss (simde__m128 a, simde__m128 b, simde__m128 c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm_fnmadd_ss(a, b, c); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b), c_ = simde__m128_to_private(c); r_ = a_; r_.f32[0] = -(a_.f32[0] * b_.f32[0]) + c_.f32[0]; return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm_fnmadd_ss #define _mm_fnmadd_ss(a, b, c) simde_mm_fnmadd_ss(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_fnmsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm_fnmsub_pd(a, b, c); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b), c_ = simde__m128d_to_private(c); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = -(a_.f64[i] * b_.f64[i]) - c_.f64[i]; } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm_fnmsub_pd #define _mm_fnmsub_pd(a, b, c) simde_mm_fnmsub_pd(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_fnmsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm256_fnmsub_pd(a, b, c); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b), c_ = simde__m256d_to_private(c); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = -(a_.f64[i] * b_.f64[i]) - c_.f64[i]; } return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm256_fnmsub_pd #define _mm256_fnmsub_pd(a, b, c) simde_mm256_fnmsub_pd(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_fnmsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm_fnmsub_ps(a, b, c); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b), c_ = simde__m128_to_private(c); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = -(a_.f32[i] * b_.f32[i]) - c_.f32[i]; } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm_fnmsub_ps #define _mm_fnmsub_ps(a, b, c) simde_mm_fnmsub_ps(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_fnmsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm256_fnmsub_ps(a, b, c); #else simde__m256_private r_, a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b), c_ = simde__m256_to_private(c); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = -(a_.f32[i] * b_.f32[i]) - c_.f32[i]; } return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm256_fnmsub_ps #define _mm256_fnmsub_ps(a, b, c) simde_mm256_fnmsub_ps(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_fnmsub_sd (simde__m128d a, simde__m128d b, simde__m128d c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm_fnmsub_sd(a, b, c); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b), c_ = simde__m128d_to_private(c); r_ = a_; r_.f64[0] = -(a_.f64[0] * b_.f64[0]) - c_.f64[0]; return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm_fnmsub_sd #define _mm_fnmsub_sd(a, b, c) simde_mm_fnmsub_sd(a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_fnmsub_ss (simde__m128 a, simde__m128 b, simde__m128 c) { #if defined(SIMDE_X86_FMA_NATIVE) return _mm_fnmsub_ss(a, b, c); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b), c_ = simde__m128_to_private(c); r_ = simde__m128_to_private(a); r_.f32[0] = -(a_.f32[0] * b_.f32[0]) - c_.f32[0]; return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) #undef _mm_fnmsub_ss #define _mm_fnmsub_ss(a, b, c) simde_mm_fnmsub_ss(a, b, c) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_FMA_H) */ simde-0.7.2/simde/x86/gfni.h000066400000000000000000000763111400333146700154540ustar00rootroot00000000000000/* Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Christopher Moore * 2020 Evan Nemerson */ #if !defined(SIMDE_X86_GFNI_H) #define SIMDE_X86_GFNI_H #include "avx512/add.h" #include "avx512/and.h" #include "avx512/cmpeq.h" #include "avx512/cmpge.h" #include "avx512/cmpgt.h" #include "avx512/broadcast.h" #include "avx512/permutex2var.h" #include "avx512/mov.h" #include "avx512/mov_mask.h" #include "avx512/set.h" #include "avx512/set1.h" #include "avx512/setzero.h" #include "avx512/shuffle.h" #include "avx512/srli.h" #include "avx512/xor.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ /* In all the *gf2p8affine* intrinsics the argument b must be a compile-time constant so we must use macros and simde_x_mm* helper functions */ /* N.B. The _mm*gf2p8affineinv_epi64_epi8 and _mm*gf2p8mul_epi8 intrinsics are for a Field Generator Polynomial (FGP) (aka reduction polynomial) of 0x11B */ /* Only the _mm*gf2p8affine_epi64_epi8 intrinsics do not assume this specific FGP */ /* Computing the inverse of an GF element is expensive so use this LUT for an FGP of 0x11B */ static const union { uint8_t u8[256]; simde__m128i m128i[16]; } simde_x_gf2p8inverse_lut = { { 0x00, 0x01, 0x8d, 0xf6, 0xcb, 0x52, 0x7b, 0xd1, 0xe8, 0x4f, 0x29, 0xc0, 0xb0, 0xe1, 0xe5, 0xc7, 0x74, 0xb4, 0xaa, 0x4b, 0x99, 0x2b, 0x60, 0x5f, 0x58, 0x3f, 0xfd, 0xcc, 0xff, 0x40, 0xee, 0xb2, 0x3a, 0x6e, 0x5a, 0xf1, 0x55, 0x4d, 0xa8, 0xc9, 0xc1, 0x0a, 0x98, 0x15, 0x30, 0x44, 0xa2, 0xc2, 0x2c, 0x45, 0x92, 0x6c, 0xf3, 0x39, 0x66, 0x42, 0xf2, 0x35, 0x20, 0x6f, 0x77, 0xbb, 0x59, 0x19, 0x1d, 0xfe, 0x37, 0x67, 0x2d, 0x31, 0xf5, 0x69, 0xa7, 0x64, 0xab, 0x13, 0x54, 0x25, 0xe9, 0x09, 0xed, 0x5c, 0x05, 0xca, 0x4c, 0x24, 0x87, 0xbf, 0x18, 0x3e, 0x22, 0xf0, 0x51, 0xec, 0x61, 0x17, 0x16, 0x5e, 0xaf, 0xd3, 0x49, 0xa6, 0x36, 0x43, 0xf4, 0x47, 0x91, 0xdf, 0x33, 0x93, 0x21, 0x3b, 0x79, 0xb7, 0x97, 0x85, 0x10, 0xb5, 0xba, 0x3c, 0xb6, 0x70, 0xd0, 0x06, 0xa1, 0xfa, 0x81, 0x82, 0x83, 0x7e, 0x7f, 0x80, 0x96, 0x73, 0xbe, 0x56, 0x9b, 0x9e, 0x95, 0xd9, 0xf7, 0x02, 0xb9, 0xa4, 0xde, 0x6a, 0x32, 0x6d, 0xd8, 0x8a, 0x84, 0x72, 0x2a, 0x14, 0x9f, 0x88, 0xf9, 0xdc, 0x89, 0x9a, 0xfb, 0x7c, 0x2e, 0xc3, 0x8f, 0xb8, 0x65, 0x48, 0x26, 0xc8, 0x12, 0x4a, 0xce, 0xe7, 0xd2, 0x62, 0x0c, 0xe0, 0x1f, 0xef, 0x11, 0x75, 0x78, 0x71, 0xa5, 0x8e, 0x76, 0x3d, 0xbd, 0xbc, 0x86, 0x57, 0x0b, 0x28, 0x2f, 0xa3, 0xda, 0xd4, 0xe4, 0x0f, 0xa9, 0x27, 0x53, 0x04, 0x1b, 0xfc, 0xac, 0xe6, 0x7a, 0x07, 0xae, 0x63, 0xc5, 0xdb, 0xe2, 0xea, 0x94, 0x8b, 0xc4, 0xd5, 0x9d, 0xf8, 0x90, 0x6b, 0xb1, 0x0d, 0xd6, 0xeb, 0xc6, 0x0e, 0xcf, 0xad, 0x08, 0x4e, 0xd7, 0xe3, 0x5d, 0x50, 0x1e, 0xb3, 0x5b, 0x23, 0x38, 0x34, 0x68, 0x46, 0x03, 0x8c, 0xdd, 0x9c, 0x7d, 0xa0, 0xcd, 0x1a, 0x41, 0x1c } }; SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_gf2p8matrix_multiply_epi64_epi8 (simde__m128i x, simde__m128i A) { #if defined(SIMDE_X86_SSSE3_NATIVE) simde__m128i r, a, p; const simde__m128i byte_select = simde_x_mm_set_epu64x(UINT64_C(0xFDFDFDFDFDFDFDFD), UINT64_C(0xFEFEFEFEFEFEFEFE)); const simde__m128i zero = simde_mm_setzero_si128(); a = simde_mm_shuffle_epi8(A, simde_x_mm_set_epu64x(UINT64_C(0x08090A0B0C0D0E0F), UINT64_C(0x0001020304050607))); r = zero; #if !defined(__INTEL_COMPILER) SIMDE_VECTORIZE #endif for (int i = 0 ; i < 8 ; i++) { p = simde_mm_insert_epi16(zero, simde_mm_movemask_epi8(a), 1); p = simde_mm_shuffle_epi8(p, simde_mm_sign_epi8(byte_select, x)); r = simde_mm_xor_si128(r, p); a = simde_mm_add_epi8(a, a); x = simde_mm_add_epi8(x, x); } return r; #else simde__m128i_private r_, x_ = simde__m128i_to_private(x), A_ = simde__m128i_to_private(A); const uint64_t ones = UINT64_C(0x0101010101010101); const uint64_t mask = UINT64_C(0x0102040810204080); uint64_t q; #if !defined(__INTEL_COMPILER) SIMDE_VECTORIZE #endif for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { q = simde_endian_bswap64_le(A_.u64[i / 8]); q &= HEDLEY_STATIC_CAST(uint64_t, x_.u8[i]) * ones; q ^= q >> 4; q ^= q >> 2; q ^= q >> 1; q &= ones; q *= 255; q &= mask; q |= q >> 32; q |= q >> 16; q |= q >> 8; r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, q); } return simde__m128i_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_x_mm256_gf2p8matrix_multiply_epi64_epi8 (simde__m256i x, simde__m256i A) { #if defined(SIMDE_X86_AVX2_NATIVE) simde__m256i r, a, p; const simde__m256i byte_select = simde_x_mm256_set_epu64x(UINT64_C(0x0303030303030303), UINT64_C(0x0202020202020202), UINT64_C(0x0101010101010101), UINT64_C(0x0000000000000000)); a = simde_mm256_shuffle_epi8(A, simde_mm256_broadcastsi128_si256(simde_x_mm_set_epu64x(UINT64_C(0x08090A0B0C0D0E0F), UINT64_C(0x0001020304050607)))); r = simde_mm256_setzero_si256(); #if !defined(__INTEL_COMPILER) SIMDE_VECTORIZE #endif for (int i = 0 ; i < 8 ; i++) { p = simde_mm256_set1_epi32(simde_mm256_movemask_epi8(a)); p = simde_mm256_shuffle_epi8(p, byte_select); p = simde_mm256_xor_si256(r, p); r = simde_mm256_blendv_epi8(r, p, x); a = simde_mm256_add_epi8(a, a); x = simde_mm256_add_epi8(x, x); } return r; #else simde__m256i_private r_, x_ = simde__m256i_to_private(x), A_ = simde__m256i_to_private(A); #if !defined(__INTEL_COMPILER) SIMDE_VECTORIZE #endif for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { r_.m128i[i] = simde_x_mm_gf2p8matrix_multiply_epi64_epi8(x_.m128i[i], A_.m128i[i]); } return simde__m256i_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_x_mm512_gf2p8matrix_multiply_epi64_epi8 (simde__m512i x, simde__m512i A) { #if defined(SIMDE_X86_AVX512BW_NATIVE) simde__m512i r, a, p; const simde__m512i byte_select = simde_x_mm512_set_epu64(UINT64_C(0x0707070707070707), UINT64_C(0x0606060606060606), UINT64_C(0x0505050505050505), UINT64_C(0x0404040404040404), UINT64_C(0x0303030303030303), UINT64_C(0x0202020202020202), UINT64_C(0x0101010101010101), UINT64_C(0X0000000000000000)); a = simde_mm512_shuffle_epi8(A, simde_mm512_broadcast_i32x4(simde_x_mm_set_epu64x(UINT64_C(0x08090A0B0C0D0E0F), UINT64_C(0x0001020304050607)))); r = simde_mm512_setzero_si512(); #if !defined(__INTEL_COMPILER) SIMDE_VECTORIZE #endif for (int i = 0 ; i < 8 ; i++) { p = simde_mm512_set1_epi64(HEDLEY_STATIC_CAST(int64_t, simde_mm512_movepi8_mask(a))); p = simde_mm512_maskz_shuffle_epi8(simde_mm512_movepi8_mask(x), p, byte_select); r = simde_mm512_xor_si512(r, p); a = simde_mm512_add_epi8(a, a); x = simde_mm512_add_epi8(x, x); } return r; #else simde__m512i_private r_, x_ = simde__m512i_to_private(x), A_ = simde__m512i_to_private(A); #if !defined(__INTEL_COMPILER) SIMDE_VECTORIZE #endif for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_x_mm256_gf2p8matrix_multiply_epi64_epi8(x_.m256i[i], A_.m256i[i]); } return simde__m512i_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_gf2p8inverse_epi8 (simde__m128i x) { #if defined(SIMDE_X86_SSE4_1_NATIVE) /* N.B. CM: this fallback may not be faster */ simde__m128i r, u, t, test; const simde__m128i sixteens = simde_mm_set1_epi8(16); const simde__m128i masked_x = simde_mm_and_si128(x, simde_mm_set1_epi8(0x0F)); test = simde_mm_set1_epi8(INT8_MIN /* 0x80 */); x = simde_mm_xor_si128(x, test); r = simde_mm_shuffle_epi8(simde_x_gf2p8inverse_lut.m128i[0], masked_x); #if !defined(__INTEL_COMPILER) SIMDE_VECTORIZE #endif for (int i = 1 ; i < 16 ; i++) { t = simde_mm_shuffle_epi8(simde_x_gf2p8inverse_lut.m128i[i], masked_x); test = simde_mm_add_epi8(test, sixteens); u = simde_mm_cmplt_epi8(x, test); r = simde_mm_blendv_epi8(t, r, u); } return r; #else simde__m128i_private r_, x_ = simde__m128i_to_private(x); #if !defined(__INTEL_COMPILER) SIMDE_VECTORIZE #endif for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = simde_x_gf2p8inverse_lut.u8[x_.u8[i]]; } return simde__m128i_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_x_mm256_gf2p8inverse_epi8 (simde__m256i x) { #if defined(SIMDE_X86_AVX2_NATIVE) /* N.B. CM: this fallback may not be faster */ simde__m256i r, u, t, test; const simde__m256i sixteens = simde_mm256_set1_epi8(16); const simde__m256i masked_x = simde_mm256_and_si256(x, simde_mm256_set1_epi8(0x0F)); test = simde_mm256_set1_epi8(INT8_MIN /* 0x80 */); x = simde_mm256_xor_si256(x, test); r = simde_mm256_shuffle_epi8(simde_mm256_broadcastsi128_si256(simde_x_gf2p8inverse_lut.m128i[0]), masked_x); #if !defined(__INTEL_COMPILER) SIMDE_VECTORIZE #endif for (int i = 1 ; i < 16 ; i++) { t = simde_mm256_shuffle_epi8(simde_mm256_broadcastsi128_si256(simde_x_gf2p8inverse_lut.m128i[i]), masked_x); test = simde_mm256_add_epi8(test, sixteens); u = simde_mm256_cmpgt_epi8(test, x); r = simde_mm256_blendv_epi8(t, r, u); } return r; #else simde__m256i_private r_, x_ = simde__m256i_to_private(x); #if !defined(__INTEL_COMPILER) SIMDE_VECTORIZE #endif for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { r_.m128i[i] = simde_x_mm_gf2p8inverse_epi8(x_.m128i[i]); } return simde__m256i_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_x_mm512_gf2p8inverse_epi8 (simde__m512i x) { /* N.B. CM: TODO: later add VBMI version using just two _mm512_permutex2var_epi8 and friends */ /* But except for Cannon Lake all processors with VBMI also have GFNI */ #if defined(SIMDE_X86_AVX512BW_NATIVE) /* N.B. CM: this fallback may not be faster */ simde__m512i r, test; const simde__m512i sixteens = simde_mm512_set1_epi8(16); const simde__m512i masked_x = simde_mm512_and_si512(x, simde_mm512_set1_epi8(0x0F)); r = simde_mm512_shuffle_epi8(simde_mm512_broadcast_i32x4(simde_x_gf2p8inverse_lut.m128i[0]), masked_x); test = sixteens; #if !defined(__INTEL_COMPILER) SIMDE_VECTORIZE #endif for (int i = 1 ; i < 16 ; i++) { r = simde_mm512_mask_shuffle_epi8(r, simde_mm512_cmpge_epu8_mask(x, test), simde_mm512_broadcast_i32x4(simde_x_gf2p8inverse_lut.m128i[i]), masked_x); test = simde_mm512_add_epi8(test, sixteens); } return r; #else simde__m512i_private r_, x_ = simde__m512i_to_private(x); #if !defined(__INTEL_COMPILER) SIMDE_VECTORIZE #endif for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_x_mm256_gf2p8inverse_epi8(x_.m256i[i]); } return simde__m512i_from_private(r_); #endif } #define simde_x_mm_gf2p8matrix_multiply_inverse_epi64_epi8(x, A) simde_x_mm_gf2p8matrix_multiply_epi64_epi8(simde_x_mm_gf2p8inverse_epi8(x), A) #define simde_x_mm256_gf2p8matrix_multiply_inverse_epi64_epi8(x, A) simde_x_mm256_gf2p8matrix_multiply_epi64_epi8(simde_x_mm256_gf2p8inverse_epi8(x), A) #define simde_x_mm512_gf2p8matrix_multiply_inverse_epi64_epi8(x, A) simde_x_mm512_gf2p8matrix_multiply_epi64_epi8(simde_x_mm512_gf2p8inverse_epi8(x), A) SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_gf2p8affine_epi64_epi8 (simde__m128i x, simde__m128i A, int b) SIMDE_REQUIRE_CONSTANT_RANGE(b, 0, 255) { return simde_mm_xor_si128(simde_x_mm_gf2p8matrix_multiply_epi64_epi8(x, A), simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, b))); } #if defined(SIMDE_X86_GFNI_NATIVE) #define simde_mm_gf2p8affine_epi64_epi8(x, A, b) _mm_gf2p8affine_epi64_epi8(x, A, b) #endif #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) #undef _mm_gf2p8affine_epi64_epi8 #define _mm_gf2p8affine_epi64_epi8(x, A, b) simde_mm_gf2p8affine_epi64_epi8(x, A, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_gf2p8affine_epi64_epi8 (simde__m256i x, simde__m256i A, int b) SIMDE_REQUIRE_CONSTANT_RANGE(b, 0, 255) { return simde_mm256_xor_si256(simde_x_mm256_gf2p8matrix_multiply_epi64_epi8(x, A), simde_mm256_set1_epi8(HEDLEY_STATIC_CAST(int8_t, b))); } #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) #define simde_mm256_gf2p8affine_epi64_epi8(x, A, b) _mm256_gf2p8affine_epi64_epi8(x, A, b) #endif #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) #undef _mm256_gf2p8affine_epi64_epi8 #define _mm256_gf2p8affine_epi64_epi8(x, A, b) simde_mm256_gf2p8affine_epi64_epi8(x, A, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_gf2p8affine_epi64_epi8 (simde__m512i x, simde__m512i A, int b) SIMDE_REQUIRE_CONSTANT_RANGE(b, 0, 255) { return simde_mm512_xor_si512(simde_x_mm512_gf2p8matrix_multiply_epi64_epi8(x, A), simde_mm512_set1_epi8(HEDLEY_STATIC_CAST(int8_t, b))); } #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) #define simde_mm512_gf2p8affine_epi64_epi8(x, A, b) _mm512_gf2p8affine_epi64_epi8(x, A, b) #endif #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) #undef _mm512_gf2p8affine_epi64_epi8 #define _mm512_gf2p8affine_epi64_epi8(x, A, b) simde_mm512_gf2p8affine_epi64_epi8(x, A, b) #endif #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) #define simde_mm_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) _mm_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) #else #define simde_mm_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) simde_mm_mask_mov_epi8(src, k, simde_mm_gf2p8affine_epi64_epi8(x, A, b)) #endif #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) #undef _mm_mask_gf2p8affine_epi64_epi8 #define _mm_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) simde_mm_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) #endif #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) #define simde_mm256_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) _mm256_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) #else #define simde_mm256_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) simde_mm256_mask_mov_epi8(src, k, simde_mm256_gf2p8affine_epi64_epi8(x, A, b)) #endif #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_gf2p8affine_epi64_epi8 #define _mm256_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) simde_mm256_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) #endif #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) #define simde_mm512_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) _mm512_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) #else #define simde_mm512_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) simde_mm512_mask_mov_epi8(src, k, simde_mm512_gf2p8affine_epi64_epi8(x, A, b)) #endif #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_gf2p8affine_epi64_epi8 #define _mm512_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) simde_mm512_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) #endif #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) #define simde_mm_maskz_gf2p8affine_epi64_epi8(k, x, A, b) _mm_maskz_gf2p8affine_epi64_epi8(k, x, A, b) #else #define simde_mm_maskz_gf2p8affine_epi64_epi8(k, x, A, b) simde_mm_maskz_mov_epi8(k, simde_mm_gf2p8affine_epi64_epi8(x, A, b)) #endif #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_gf2p8affine_epi64_epi8 #define _mm_maskz_gf2p8affine_epi64_epi8(k, x, A, b) simde_mm_maskz_gf2p8affine_epi64_epi8(k, x, A, b) #endif #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) #define simde_mm256_maskz_gf2p8affine_epi64_epi8(k, x, A, b) _mm256_maskz_gf2p8affine_epi64_epi8(k, x, A, b) #else #define simde_mm256_maskz_gf2p8affine_epi64_epi8(k, x, A, b) simde_mm256_maskz_mov_epi8(k, simde_mm256_gf2p8affine_epi64_epi8(x, A, b)) #endif #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_gf2p8affine_epi64_epi8 #define _mm256_maskz_gf2p8affine_epi64_epi8(k, x, A, b) simde_mm256_maskz_gf2p8affine_epi64_epi8(k, x, A, b) #endif #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) #define simde_mm512_maskz_gf2p8affine_epi64_epi8(k, x, A, b) _mm512_maskz_gf2p8affine_epi64_epi8(k, x, A, b) #else #define simde_mm512_maskz_gf2p8affine_epi64_epi8(k, x, A, b) simde_mm512_maskz_mov_epi8(k, simde_mm512_gf2p8affine_epi64_epi8(x, A, b)) #endif #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_gf2p8affine_epi64_epi8 #define _mm512_maskz_gf2p8affine_epi64_epi8(k, x, A, b) simde_mm512_maskz_gf2p8affine_epi64_epi8(k, x, A, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_gf2p8affineinv_epi64_epi8 (simde__m128i x, simde__m128i A, int b) SIMDE_REQUIRE_CONSTANT_RANGE(b, 0, 255) { return simde_mm_xor_si128(simde_x_mm_gf2p8matrix_multiply_inverse_epi64_epi8(x, A), simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, b))); } #if defined(SIMDE_X86_GFNI_NATIVE) #define simde_mm_gf2p8affineinv_epi64_epi8(x, A, b) _mm_gf2p8affineinv_epi64_epi8(x, A, b) #endif #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) #undef _mm_gf2p8affineinv_epi64_epi8 #define _mm_gf2p8affineinv_epi64_epi8(x, A, b) simde_mm_gf2p8affineinv_epi64_epi8(x, A, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_gf2p8affineinv_epi64_epi8 (simde__m256i x, simde__m256i A, int b) SIMDE_REQUIRE_CONSTANT_RANGE(b, 0, 255) { return simde_mm256_xor_si256(simde_x_mm256_gf2p8matrix_multiply_inverse_epi64_epi8(x, A), simde_mm256_set1_epi8(HEDLEY_STATIC_CAST(int8_t, b))); } #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) #define simde_mm256_gf2p8affineinv_epi64_epi8(x, A, b) _mm256_gf2p8affineinv_epi64_epi8(x, A, b) #endif #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) #undef _mm256_gf2p8affineinv_epi64_epi8 #define _mm256_gf2p8affineinv_epi64_epi8(x, A, b) simde_mm256_gf2p8affineinv_epi64_epi8(x, A, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_gf2p8affineinv_epi64_epi8 (simde__m512i x, simde__m512i A, int b) SIMDE_REQUIRE_CONSTANT_RANGE(b, 0, 255) { return simde_mm512_xor_si512(simde_x_mm512_gf2p8matrix_multiply_inverse_epi64_epi8(x, A), simde_mm512_set1_epi8(HEDLEY_STATIC_CAST(int8_t, b))); } #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) #define simde_mm512_gf2p8affineinv_epi64_epi8(x, A, b) _mm512_gf2p8affineinv_epi64_epi8(x, A, b) #endif #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) #undef _mm512_gf2p8affineinv_epi64_epi8 #define _mm512_gf2p8affineinv_epi64_epi8(x, A, b) simde_mm512_gf2p8affineinv_epi64_epi8(x, A, b) #endif #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) #define simde_mm_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) _mm_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) #else #define simde_mm_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) simde_mm_mask_mov_epi8(src, k, simde_mm_gf2p8affineinv_epi64_epi8(x, A, b)) #endif #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) #undef _mm_mask_gf2p8affineinv_epi64_epi8 #define _mm_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) simde_mm_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) #endif #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) #define simde_mm256_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) _mm256_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) #else #define simde_mm256_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) simde_mm256_mask_mov_epi8(src, k, simde_mm256_gf2p8affineinv_epi64_epi8(x, A, b)) #endif #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_gf2p8affineinv_epi64_epi8 #define _mm256_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) simde_mm256_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) #endif #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) #define simde_mm512_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) _mm512_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) #else #define simde_mm512_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) simde_mm512_mask_mov_epi8(src, k, simde_mm512_gf2p8affineinv_epi64_epi8(x, A, b)) #endif #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_gf2p8affineinv_epi64_epi8 #define _mm512_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) simde_mm512_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) #endif #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) #define simde_mm_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) _mm_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) #else #define simde_mm_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) simde_mm_maskz_mov_epi8(k, simde_mm_gf2p8affineinv_epi64_epi8(x, A, b)) #endif #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_gf2p8affineinv_epi64_epi8 #define _mm_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) simde_mm_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) #endif #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) #define simde_mm256_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) _mm256_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) #else #define simde_mm256_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) simde_mm256_maskz_mov_epi8(k, simde_mm256_gf2p8affineinv_epi64_epi8(x, A, b)) #endif #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_gf2p8affineinv_epi64_epi8 #define _mm256_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) simde_mm256_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) #endif #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) #define simde_mm512_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) _mm512_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) #else #define simde_mm512_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) simde_mm512_maskz_mov_epi8(k, simde_mm512_gf2p8affineinv_epi64_epi8(x, A, b)) #endif #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_gf2p8affineinv_epi64_epi8 #define _mm512_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) simde_mm512_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_gf2p8mul_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) simde__m128i r, t; const simde__m128i zero = simde_mm_setzero_si128(); const simde__m128i ones = simde_mm_set1_epi8(0x01); /* The field generator polynomial is 0x11B but we drop the 0x100 bit */ const simde__m128i fgp = simde_mm_set1_epi8(0x1B); r = zero; #if !defined(__INTEL_COMPILER) SIMDE_VECTORIZE #endif for (int i = 0 ; i < 8 ; i++) { t = simde_mm_and_si128(b, ones); t = simde_mm_cmpeq_epi8(t, ones); t = simde_mm_and_si128(a, t); r = simde_mm_xor_si128(r, t); t = simde_mm_cmpgt_epi8(zero, a); t = simde_mm_and_si128(fgp, t); a = simde_mm_add_epi8(a, a); a = simde_mm_xor_si128(a, t); b = simde_mm_srli_epi64(b, 1); } return r; #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); /* The field generator polynomial is 0x11B but we drop the 0x100 bit */ const uint8_t fgp = 0x1B; #if !defined(__INTEL_COMPILER) SIMDE_VECTORIZE #endif for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = 0; while ((a_.u8[i] != 0) && (b_.u8[i] != 0)) { if (b_.u8[i] & 1) r_.u8[i] ^= a_.u8[i]; if (a_.u8[i] & 0x80) a_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.u8[i] << 1) ^ fgp); else a_.u8[i] <<= 1; b_.u8[i] >>= 1; } } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_GFNI_NATIVE) #define simde_mm_gf2p8mul_epi8(a, b) _mm_gf2p8mul_epi8(a, b) #endif #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) #undef _mm_gf2p8mul_epi8 #define _mm_gf2p8mul_epi8(a, b) simde_mm_gf2p8mul_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_gf2p8mul_epi8 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_AVX2_NATIVE) simde__m256i r, t; const simde__m256i zero = simde_mm256_setzero_si256(); const simde__m256i ones = simde_mm256_set1_epi8(0x01); /* The field generator polynomial is 0x11B but we drop the 0x100 bit */ const simde__m256i fgp = simde_mm256_set1_epi8(0x1B); r = zero; #if !defined(__INTEL_COMPILER) SIMDE_VECTORIZE #endif for (int i = 0 ; i < 8 ; i++) { t = simde_mm256_and_si256(b, ones); t = simde_mm256_cmpeq_epi8(t, ones); t = simde_mm256_and_si256(a, t); r = simde_mm256_xor_si256(r, t); t = simde_mm256_cmpgt_epi8(zero, a); t = simde_mm256_and_si256(fgp, t); a = simde_mm256_add_epi8(a, a); a = simde_mm256_xor_si256(a, t); b = simde_mm256_srli_epi64(b, 1); } return r; #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if !defined(__INTEL_COMPILER) SIMDE_VECTORIZE #endif for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { r_.m128i[i] = simde_mm_gf2p8mul_epi8(a_.m128i[i], b_.m128i[i]); } return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) #define simde_mm256_gf2p8mul_epi8(a, b) _mm256_gf2p8mul_epi8(a, b) #endif #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) #undef _mm256_gf2p8mul_epi8 #define _mm256_gf2p8mul_epi8(a, b) simde_mm256_gf2p8mul_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_gf2p8mul_epi8 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) simde__m512i r, t; simde__mmask64 m; const simde__m512i zero = simde_mm512_setzero_si512(); const simde__m512i ones = simde_mm512_set1_epi8(0x01); /* The field generator polynomial is 0x11B but we drop the 0x100 bit */ const simde__m512i fgp = simde_mm512_set1_epi8(0x1B); r = zero; #if !defined(__INTEL_COMPILER) SIMDE_VECTORIZE #endif for (int i = 0 ; i < 8 ; i++) { t = simde_mm512_and_si512(b, ones); m = simde_mm512_cmpeq_epi8_mask(t, ones); t = simde_mm512_maskz_mov_epi8(m, a); r = simde_mm512_xor_si512(r, t); m = simde_mm512_cmpgt_epi8_mask(zero, a); a = simde_mm512_add_epi8(a, a); t = simde_mm512_maskz_mov_epi8(m, fgp); a = simde_mm512_xor_si512(a, t); b = simde_mm512_srli_epi64(b, 1); } return r; #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if !defined(__INTEL_COMPILER) SIMDE_VECTORIZE #endif for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { r_.m128i[i] = simde_mm_gf2p8mul_epi8(a_.m128i[i], b_.m128i[i]); } return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) #define simde_mm512_gf2p8mul_epi8(a, b) _mm512_gf2p8mul_epi8(a, b) #endif #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) #undef _mm512_gf2p8mul_epi8 #define _mm512_gf2p8mul_epi8(a, b) simde_mm512_gf2p8mul_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mask_gf2p8mul_epi8 (simde__m128i src, simde__mmask16 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_mask_gf2p8mul_epi8(src, k, a, b); #else return simde_mm_mask_mov_epi8(src, k, simde_mm_gf2p8mul_epi8(a, b)); #endif } #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) #undef _mm_mask_gf2p8mul_epi8 #define _mm_mask_gf2p8mul_epi8(src, k, a, b) simde_mm_mask_gf2p8mul_epi8(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_mask_gf2p8mul_epi8 (simde__m256i src, simde__mmask32 k, simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_mask_gf2p8mul_epi8(src, k, a, b); #else return simde_mm256_mask_mov_epi8(src, k, simde_mm256_gf2p8mul_epi8(a, b)); #endif } #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_gf2p8mul_epi8 #define _mm256_mask_gf2p8mul_epi8(src, k, a, b) simde_mm256_mask_gf2p8mul_epi8(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_gf2p8mul_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_gf2p8mul_epi8(src, k, a, b); #else return simde_mm512_mask_mov_epi8(src, k, simde_mm512_gf2p8mul_epi8(a, b)); #endif } #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_gf2p8mul_epi8 #define _mm512_mask_gf2p8mul_epi8(src, k, a, b) simde_mm512_mask_gf2p8mul_epi8(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maskz_gf2p8mul_epi8 (simde__mmask16 k, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_maskz_gf2p8mul_epi8(k, a, b); #else return simde_mm_maskz_mov_epi8(k, simde_mm_gf2p8mul_epi8(a, b)); #endif } #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_gf2p8mul_epi8 #define _mm_maskz_gf2p8mul_epi8(k, a, b) simde_mm_maskz_gf2p8mul_epi8(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_maskz_gf2p8mul_epi8 (simde__mmask32 k, simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_maskz_gf2p8mul_epi8(k, a, b); #else return simde_mm256_maskz_mov_epi8(k, simde_mm256_gf2p8mul_epi8(a, b)); #endif } #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_gf2p8mul_epi8 #define _mm256_maskz_gf2p8mul_epi8(k, a, b) simde_mm256_maskz_gf2p8mul_epi8(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_gf2p8mul_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_gf2p8mul_epi8(k, a, b); #else return simde_mm512_maskz_mov_epi8(k, simde_mm512_gf2p8mul_epi8(a, b)); #endif } #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_gf2p8mul_epi8 #define _mm512_maskz_gf2p8mul_epi8(k, a, b) simde_mm512_maskz_gf2p8mul_epi8(k, a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_GFNI_H) */ simde-0.7.2/simde/x86/mmx.h000066400000000000000000002254721400333146700153360ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2017-2020 Evan Nemerson */ #if !defined(SIMDE_X86_MMX_H) #define SIMDE_X86_MMX_H #include "../simde-common.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS #if defined(SIMDE_X86_MMX_NATIVE) #define SIMDE_X86_MMX_USE_NATIVE_TYPE #elif defined(SIMDE_X86_SSE_NATIVE) #define SIMDE_X86_MMX_USE_NATIVE_TYPE #endif #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) #include #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) #include #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) #include #endif #include #include SIMDE_BEGIN_DECLS_ typedef union { #if defined(SIMDE_VECTOR_SUBSCRIPT) SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; #else SIMDE_ALIGN_TO_8 int8_t i8[8]; SIMDE_ALIGN_TO_8 int16_t i16[4]; SIMDE_ALIGN_TO_8 int32_t i32[2]; SIMDE_ALIGN_TO_8 int64_t i64[1]; SIMDE_ALIGN_TO_8 uint8_t u8[8]; SIMDE_ALIGN_TO_8 uint16_t u16[4]; SIMDE_ALIGN_TO_8 uint32_t u32[2]; SIMDE_ALIGN_TO_8 uint64_t u64[1]; SIMDE_ALIGN_TO_8 simde_float32 f32[2]; SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; #endif #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) __m64 n; #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) int8x8_t neon_i8; int16x4_t neon_i16; int32x2_t neon_i32; int64x1_t neon_i64; uint8x8_t neon_u8; uint16x4_t neon_u16; uint32x2_t neon_u32; uint64x1_t neon_u64; float32x2_t neon_f32; #endif #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) int8x8_t mmi_i8; int16x4_t mmi_i16; int32x2_t mmi_i32; int64_t mmi_i64; uint8x8_t mmi_u8; uint16x4_t mmi_u16; uint32x2_t mmi_u32; uint64_t mmi_u64; #endif } simde__m64_private; #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) typedef __m64 simde__m64; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) typedef int32x2_t simde__m64; #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) typedef int32x2_t simde__m64; #elif defined(SIMDE_VECTOR_SUBSCRIPT) typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; #else typedef simde__m64_private simde__m64; #endif #if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES typedef simde__m64 __m64; #endif HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); #if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde__m64_from_private(simde__m64_private v) { simde__m64 r; simde_memcpy(&r, &v, sizeof(r)); return r; } SIMDE_FUNCTION_ATTRIBUTES simde__m64_private simde__m64_to_private(simde__m64 v) { simde__m64_private r; simde_memcpy(&r, &v, sizeof(r)); return r; } #define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ SIMDE_FUNCTION_ATTRIBUTES \ simde__##simde_type \ simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ simde__##simde_type##_private r_; \ r_.isax##_##fragment = value; \ return simde__##simde_type##_from_private(r_); \ } \ \ SIMDE_FUNCTION_ATTRIBUTES \ source_type \ simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ return r_.isax##_##fragment; \ } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) #endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) #endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_add_pi8(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = a_.i8 + b_.i8; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a_.i8[i] + b_.i8[i]; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) # define _m_paddb(a, b) simde_m_paddb(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_add_pi16(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = a_.i16 + b_.i16; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[i] + b_.i16[i]; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) # define _m_paddw(a, b) simde_mm_add_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_add_pi32(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = a_.i32 + b_.i32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] + b_.i32[i]; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) # define _m_paddd(a, b) simde_mm_add_pi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_adds_pi8(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { r_.i8[i] = INT8_MAX; } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { r_.i8[i] = INT8_MIN; } else { r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); } } #endif return simde__m64_from_private(r_); #endif } #define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) # define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_adds_pu8(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); if (x > UINT8_MAX) r_.u8[i] = UINT8_MAX; else r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) # define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_adds_pi16(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { r_.i16[i] = INT16_MAX; } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { r_.i16[i] = SHRT_MIN; } else { r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); } } #endif return simde__m64_from_private(r_); #endif } #define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) # define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_adds_pu16(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { const uint32_t x = a_.u16[i] + b_.u16[i]; if (x > UINT16_MAX) r_.u16[i] = UINT16_MAX; else r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) # define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_and_si64 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_and_si64(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 & b_.i64; #else r_.i64[0] = a_.i64[0] & b_.i64[0]; #endif return simde__m64_from_private(r_); #endif } #define simde_m_pand(a, b) simde_mm_and_si64(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_and_si64(a, b) simde_mm_and_si64(a, b) # define _m_pand(a, b) simde_mm_and_si64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_andnot_si64(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = ~a_.i32f & b_.i32f; #else r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); #endif return simde__m64_from_private(r_); #endif } #define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) # define _m_pandn(a, b) simde_mm_andnot_si64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_cmpeq_pi8(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) # define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_cmpeq_pi16(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) # define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_cmpeq_pi32(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) # define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_cmpgt_pi8(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) # define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_cmpgt_pi16(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) # define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_cmpgt_pi32(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) # define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int64_t simde_mm_cvtm64_si64 (simde__m64 a) { #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) return _mm_cvtm64_si64(a); #else simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) HEDLEY_DIAGNOSTIC_PUSH #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) #pragma clang diagnostic ignored "-Wvector-conversion" #endif return vget_lane_s64(a_.neon_i64, 0); HEDLEY_DIAGNOSTIC_POP #else return a_.i64[0]; #endif #endif } #define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) # define _m_to_int64(a) simde_mm_cvtm64_si64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_cvtsi32_si64 (int32_t a) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_cvtsi32_si64(a); #else simde__m64_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) const int32_t av[sizeof(r_.neon_i32) / sizeof(r_.neon_i32[0])] = { a, 0 }; r_.neon_i32 = vld1_s32(av); #else r_.i32[0] = a; r_.i32[1] = 0; #endif return simde__m64_from_private(r_); #endif } #define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) # define _m_from_int(a) simde_mm_cvtsi32_si64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_cvtsi64_m64 (int64_t a) { #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) return _mm_cvtsi64_m64(a); #else simde__m64_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vld1_s64(&a); #else r_.i64[0] = a; #endif return simde__m64_from_private(r_); #endif } #define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) # define _m_from_int64(a) simde_mm_cvtsi64_m64(a) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_mm_cvtsi64_si32 (simde__m64 a) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_cvtsi64_si32(a); #else simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) HEDLEY_DIAGNOSTIC_PUSH #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) #pragma clang diagnostic ignored "-Wvector-conversion" #endif return vget_lane_s32(a_.neon_i32, 0); HEDLEY_DIAGNOSTIC_POP #else return a_.i32[0]; #endif #endif } #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_empty (void) { #if defined(SIMDE_X86_MMX_NATIVE) _mm_empty(); #else /* noop */ #endif } #define simde_m_empty() simde_mm_empty() #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_empty() simde_mm_empty() # define _m_empty() simde_mm_empty() #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_madd_pi16(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) # define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_mulhi_pi16(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); const uint16x4_t t3 = vmovn_u32(t2); r_.neon_u16 = t3; #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) # define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_mullo_pi16(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); r_.neon_u16 = t2; #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) # define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_or_si64 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_or_si64(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 | b_.i64; #else r_.i64[0] = a_.i64[0] | b_.i64[0]; #endif return simde__m64_from_private(r_); #endif } #define simde_m_por(a, b) simde_mm_or_si64(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_or_si64(a, b) simde_mm_or_si64(a, b) # define _m_por(a, b) simde_mm_or_si64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_packs_pi16(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { if (a_.i16[i] < INT8_MIN) { r_.i8[i] = INT8_MIN; } else if (a_.i16[i] > INT8_MAX) { r_.i8[i] = INT8_MAX; } else { r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); } } SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { if (b_.i16[i] < INT8_MIN) { r_.i8[i + 4] = INT8_MIN; } else if (b_.i16[i] > INT8_MAX) { r_.i8[i + 4] = INT8_MAX; } else { r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); } } #endif return simde__m64_from_private(r_); #endif } #define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) # define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_packs_pi32(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { if (a_.i32[i] < SHRT_MIN) { r_.i16[i] = SHRT_MIN; } else if (a_.i32[i] > INT16_MAX) { r_.i16[i] = INT16_MAX; } else { r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); } } SIMDE_VECTORIZE for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { if (b_.i32[i] < SHRT_MIN) { r_.i16[i + 2] = SHRT_MIN; } else if (b_.i32[i] > INT16_MAX) { r_.i16[i + 2] = INT16_MAX; } else { r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); } } #endif return simde__m64_from_private(r_); #endif } #define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) # define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_packs_pu16(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); /* Set elements which are < 0 to 0 */ const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); /* Vector with all s16 elements set to UINT8_MAX */ const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); /* Elements which are within the acceptable range */ const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); /* Final values as 16-bit integers */ const int16x8_t values = vorrq_s16(le_max, gt_max); r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { if (a_.i16[i] > UINT8_MAX) { r_.u8[i] = UINT8_MAX; } else if (a_.i16[i] < 0) { r_.u8[i] = 0; } else { r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); } } SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { if (b_.i16[i] > UINT8_MAX) { r_.u8[i + 4] = UINT8_MAX; } else if (b_.i16[i] < 0) { r_.u8[i + 4] = 0; } else { r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); } } #endif return simde__m64_from_private(r_); #endif } #define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) # define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); #else simde__m64_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; r_.neon_i8 = vld1_s8(v); #else r_.i8[0] = e0; r_.i8[1] = e1; r_.i8[2] = e2; r_.i8[3] = e3; r_.i8[4] = e4; r_.i8[5] = e5; r_.i8[6] = e6; r_.i8[7] = e7; #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { simde__m64_private r_; #if defined(SIMDE_X86_MMX_NATIVE) r_.n = _mm_set_pi8( HEDLEY_STATIC_CAST(int8_t, e7), HEDLEY_STATIC_CAST(int8_t, e6), HEDLEY_STATIC_CAST(int8_t, e5), HEDLEY_STATIC_CAST(int8_t, e4), HEDLEY_STATIC_CAST(int8_t, e3), HEDLEY_STATIC_CAST(int8_t, e2), HEDLEY_STATIC_CAST(int8_t, e1), HEDLEY_STATIC_CAST(int8_t, e0)); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; r_.neon_u8 = vld1_u8(v); #else r_.u8[0] = e0; r_.u8[1] = e1; r_.u8[2] = e2; r_.u8[3] = e3; r_.u8[4] = e4; r_.u8[5] = e5; r_.u8[6] = e6; r_.u8[7] = e7; #endif return simde__m64_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_set_pi16(e3, e2, e1, e0); #else simde__m64_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; r_.neon_i16 = vld1_s16(v); #else r_.i16[0] = e0; r_.i16[1] = e1; r_.i16[2] = e2; r_.i16[3] = e3; #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { simde__m64_private r_; #if defined(SIMDE_X86_MMX_NATIVE) r_.n = _mm_set_pi16( HEDLEY_STATIC_CAST(int16_t, e3), HEDLEY_STATIC_CAST(int16_t, e2), HEDLEY_STATIC_CAST(int16_t, e1), HEDLEY_STATIC_CAST(int16_t, e0) ); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; r_.neon_u16 = vld1_u16(v); #else r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; #endif return simde__m64_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { simde__m64_private r_; #if defined(SIMDE_X86_MMX_NATIVE) r_.n = _mm_set_pi32( HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; r_.neon_u32 = vld1_u32(v); #else r_.u32[0] = e0; r_.u32[1] = e1; #endif return simde__m64_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_set_pi32 (int32_t e1, int32_t e0) { simde__m64_private r_; #if defined(SIMDE_X86_MMX_NATIVE) r_.n = _mm_set_pi32(e1, e0); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; r_.neon_i32 = vld1_s32(v); #else r_.i32[0] = e0; r_.i32[1] = e1; #endif return simde__m64_from_private(r_); } #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_x_mm_set_pi64 (int64_t e0) { simde__m64_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; r_.neon_i64 = vld1_s64(v); #else r_.i64[0] = e0; #endif return simde__m64_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { simde__m64_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; r_.neon_f32 = vld1_f32(v); #else r_.f32[0] = e0; r_.f32[1] = e1; #endif return simde__m64_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_set1_pi8 (int8_t a) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_set1_pi8(a); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) simde__m64_private r_; r_.neon_i8 = vmov_n_s8(a); return simde__m64_from_private(r_); #else return simde_mm_set_pi8(a, a, a, a, a, a, a, a); #endif } #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_set1_pi8(a) simde_mm_set1_pi8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_set1_pi16 (int16_t a) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_set1_pi16(a); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) simde__m64_private r_; r_.neon_i16 = vmov_n_s16(a); return simde__m64_from_private(r_); #else return simde_mm_set_pi16(a, a, a, a); #endif } #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_set1_pi16(a) simde_mm_set1_pi16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_set1_pi32 (int32_t a) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_set1_pi32(a); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) simde__m64_private r_; r_.neon_i32 = vmov_n_s32(a); return simde__m64_from_private(r_); #else return simde_mm_set_pi32(a, a); #endif } #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_set1_pi32(a) simde_mm_set1_pi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); #else return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); #endif } #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_setr_pi16(e3, e2, e1, e0); #else return simde_mm_set_pi16(e0, e1, e2, e3); #endif } #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_setr_pi32 (int32_t e1, int32_t e0) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_setr_pi32(e1, e0); #else return simde_mm_set_pi32(e0, e1); #endif } #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_setzero_si64 (void) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_setzero_si64(); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) simde__m64_private r_; r_.neon_u32 = vmov_n_u32(0); return simde__m64_from_private(r_); #else return simde_mm_set_pi32(0, 0); #endif } #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_setzero_si64() simde_mm_setzero_si64() #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_x_mm_load_si64 (const void* mem_addr) { simde__m64 r; simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); return r; } SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_x_mm_loadu_si64 (const void* mem_addr) { simde__m64 r; simde_memcpy(&r, mem_addr, sizeof(r)); return r; } SIMDE_FUNCTION_ATTRIBUTES void simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); } SIMDE_FUNCTION_ATTRIBUTES void simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { simde_memcpy(mem_addr, &value, sizeof(value)); } SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_x_mm_setone_si64 (void) { return simde_mm_set1_pi32(~INT32_C(0)); } SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_sll_pi16(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private count_ = simde__m64_to_private(count); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) HEDLEY_DIAGNOSTIC_PUSH #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) #pragma clang diagnostic ignored "-Wvector-conversion" #endif r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); HEDLEY_DIAGNOSTIC_POP #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) return simde_mm_setzero_si64(); r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i16 = a_.i16 << count_.u64[0]; #else if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { simde_memset(&r_, 0, sizeof(r_)); return simde__m64_from_private(r_); } SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) # define _m_psllw(a, count) simde_mm_sll_pi16(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_sll_pi32(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private count_ = simde__m64_to_private(count); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) HEDLEY_DIAGNOSTIC_PUSH #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) #pragma clang diagnostic ignored "-Wvector-conversion" #endif r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); HEDLEY_DIAGNOSTIC_POP #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i32 = a_.i32 << count_.u64[0]; #else if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { simde_memset(&r_, 0, sizeof(r_)); return simde__m64_from_private(r_); } SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i] << count_.u64[0]; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) # define _m_pslld(a, count) simde_mm_sll_pi32(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_slli_pi16 (simde__m64 a, int count) { #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) return _mm_slli_pi16(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) if (HEDLEY_UNLIKELY(count > 15)) return simde_mm_setzero_si64(); r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i16 = a_.i16 << count; #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i16 = psllh_s(a_.mmi_i16, b_.mmi_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) # define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_slli_pi32 (simde__m64 a, int count) { #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) return _mm_slli_pi32(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i32 = a_.i32 << count; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i] << count; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) # define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_slli_si64 (simde__m64 a, int count) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_slli_si64(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i64 = a_.i64 << count; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); #else r_.u64[0] = a_.u64[0] << count; #endif return simde__m64_from_private(r_); #endif } #define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) # define _m_psllqi(a, count) simde_mm_slli_si64(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_sll_si64(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private count_ = simde__m64_to_private(count); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 << count_.i64; #else if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { simde_memset(&r_, 0, sizeof(r_)); return simde__m64_from_private(r_); } r_.u64[0] = a_.u64[0] << count_.u64[0]; #endif return simde__m64_from_private(r_); #endif } #define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) # define _m_psllq(a, count) simde_mm_sll_si64(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_srl_pi16(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private count_ = simde__m64_to_private(count); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) return simde_mm_setzero_si64(); r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u16 = a_.u16 >> count_.u64[0]; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); #else if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { simde_memset(&r_, 0, sizeof(r_)); return simde__m64_from_private(r_); } SIMDE_VECTORIZE for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { r_.u16[i] = a_.u16[i] >> count_.u64[0]; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) # define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_srl_pi32(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private count_ = simde__m64_to_private(count); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u32 = a_.u32 >> count_.u64[0]; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); #else if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { simde_memset(&r_, 0, sizeof(r_)); return simde__m64_from_private(r_); } SIMDE_VECTORIZE for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { r_.u32[i] = a_.u32[i] >> count_.u64[0]; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) # define _m_psrld(a, count) simde_mm_srl_pi32(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_srli_pi16 (simde__m64 a, int count) { #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) return _mm_srli_pi16(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u16 = a_.u16 >> count; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = a_.u16[i] >> count; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) # define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_srli_pi32 (simde__m64 a, int count) { #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) return _mm_srli_pi32(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u32 = a_.u32 >> count; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i] >> count; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) # define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_srli_si64 (simde__m64 a, int count) { #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) return _mm_srli_si64(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u64 = a_.u64 >> count; #else r_.u64[0] = a_.u64[0] >> count; #endif return simde__m64_from_private(r_); #endif } #define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) # define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_srl_si64(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private count_ = simde__m64_to_private(count); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u64 = a_.u64 >> count_.u64; #else if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { simde_memset(&r_, 0, sizeof(r_)); return simde__m64_from_private(r_); } r_.u64[0] = a_.u64[0] >> count_.u64[0]; #endif return simde__m64_from_private(r_); #endif } #define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) # define _m_psrlq(a, count) simde_mm_srl_si64(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_srai_pi16 (simde__m64 a, int count) { #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) return _mm_srai_pi16(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i16 = a_.i16 >> (count & 0xff); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i16 = psrah_s(a_.mmi_i16, count); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[i] >> (count & 0xff); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) # define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_srai_pi32 (simde__m64 a, int count) { #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) return _mm_srai_pi32(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i32 = a_.i32 >> (count & 0xff); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i32 = psraw_s(a_.mmi_i32, count); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] >> (count & 0xff); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) # define _m_psradi(a, count) simde_mm_srai_pi32(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_sra_pi16(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private count_ = simde__m64_to_private(count); const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i16 = a_.i16 >> cnt; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[i] >> cnt; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) # define _m_psraw(a, count) simde_mm_sra_pi16(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_sra_pi32(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private count_ = simde__m64_to_private(count); const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i32 = a_.i32 >> cnt; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] >> cnt; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) # define _m_psrad(a, count) simde_mm_sra_pi32(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_sub_pi8(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = a_.i8 - b_.i8; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a_.i8[i] - b_.i8[i]; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) # define _m_psubb(a, b) simde_mm_sub_pi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_sub_pi16(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = a_.i16 - b_.i16; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[i] - b_.i16[i]; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) # define _m_psubw(a, b) simde_mm_sub_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_sub_pi32(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = a_.i32 - b_.i32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] - b_.i32[i]; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) # define _m_psubd(a, b) simde_mm_sub_pi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_subs_pi8(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { r_.i8[i] = INT8_MIN; } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { r_.i8[i] = INT8_MAX; } else { r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); } } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) # define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_subs_pu8(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { const int32_t x = a_.u8[i] - b_.u8[i]; if (x < 0) { r_.u8[i] = 0; } else if (x > UINT8_MAX) { r_.u8[i] = UINT8_MAX; } else { r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); } } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) # define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_subs_pi16(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { r_.i16[i] = SHRT_MIN; } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { r_.i16[i] = INT16_MAX; } else { r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); } } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) # define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_subs_pu16(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { const int x = a_.u16[i] - b_.u16[i]; if (x < 0) { r_.u16[i] = 0; } else if (x > UINT16_MAX) { r_.u16[i] = UINT16_MAX; } else { r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); } } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) # define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_unpackhi_pi8(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); #else r_.i8[0] = a_.i8[4]; r_.i8[1] = b_.i8[4]; r_.i8[2] = a_.i8[5]; r_.i8[3] = b_.i8[5]; r_.i8[4] = a_.i8[6]; r_.i8[5] = b_.i8[6]; r_.i8[6] = a_.i8[7]; r_.i8[7] = b_.i8[7]; #endif return simde__m64_from_private(r_); #endif } #define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) # define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_unpackhi_pi16(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); #else r_.i16[0] = a_.i16[2]; r_.i16[1] = b_.i16[2]; r_.i16[2] = a_.i16[3]; r_.i16[3] = b_.i16[3]; #endif return simde__m64_from_private(r_); #endif } #define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) # define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_unpackhi_pi32(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); #else r_.i32[0] = a_.i32[1]; r_.i32[1] = b_.i32[1]; #endif return simde__m64_from_private(r_); #endif } #define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) # define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_unpacklo_pi8(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); #else r_.i8[0] = a_.i8[0]; r_.i8[1] = b_.i8[0]; r_.i8[2] = a_.i8[1]; r_.i8[3] = b_.i8[1]; r_.i8[4] = a_.i8[2]; r_.i8[5] = b_.i8[2]; r_.i8[6] = a_.i8[3]; r_.i8[7] = b_.i8[3]; #endif return simde__m64_from_private(r_); #endif } #define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) # define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_unpacklo_pi16(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); #else r_.i16[0] = a_.i16[0]; r_.i16[1] = b_.i16[0]; r_.i16[2] = a_.i16[1]; r_.i16[3] = b_.i16[1]; #endif return simde__m64_from_private(r_); #endif } #define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) # define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_unpacklo_pi32(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); #else r_.i32[0] = a_.i32[0]; r_.i32[1] = b_.i32[0]; #endif return simde__m64_from_private(r_); #endif } #define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) # define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_xor_si64(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f ^ b_.i32f; #else r_.u64[0] = a_.u64[0] ^ b_.u64[0]; #endif return simde__m64_from_private(r_); #endif } #define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) # define _m_pxor(a, b) simde_mm_xor_si64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_m_to_int (simde__m64 a) { #if defined(SIMDE_X86_MMX_NATIVE) return _m_to_int(a); #else simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) HEDLEY_DIAGNOSTIC_PUSH #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) #pragma clang diagnostic ignored "-Wvector-conversion" #endif return vget_lane_s32(a_.neon_i32, 0); HEDLEY_DIAGNOSTIC_POP #else return a_.i32[0]; #endif #endif } #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _m_to_int(a) simde_m_to_int(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_MMX_H) */ simde-0.7.2/simde/x86/sse.h000066400000000000000000004402511400333146700153210ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2017-2020 Evan Nemerson * 2015-2017 John W. Ratcliff * 2015 Brandon Rowlett * 2015 Ken Fast */ #if !defined(SIMDE_X86_SSE_H) #define SIMDE_X86_SSE_H #include "mmx.h" #if defined(_WIN32) #include #endif HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ typedef union { #if defined(SIMDE_VECTOR_SUBSCRIPT) SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; #if defined(SIMDE_HAVE_INT128_) SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; #endif SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; #else SIMDE_ALIGN_TO_16 int8_t i8[16]; SIMDE_ALIGN_TO_16 int16_t i16[8]; SIMDE_ALIGN_TO_16 int32_t i32[4]; SIMDE_ALIGN_TO_16 int64_t i64[2]; SIMDE_ALIGN_TO_16 uint8_t u8[16]; SIMDE_ALIGN_TO_16 uint16_t u16[8]; SIMDE_ALIGN_TO_16 uint32_t u32[4]; SIMDE_ALIGN_TO_16 uint64_t u64[2]; #if defined(SIMDE_HAVE_INT128_) SIMDE_ALIGN_TO_16 simde_int128 i128[1]; SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; #endif SIMDE_ALIGN_TO_16 simde_float32 f32[4]; SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; #endif SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; SIMDE_ALIGN_TO_16 simde__m64 m64[2]; #if defined(SIMDE_X86_SSE_NATIVE) SIMDE_ALIGN_TO_16 __m128 n; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_ALIGN_TO_16 int8x16_t neon_i8; SIMDE_ALIGN_TO_16 int16x8_t neon_i16; SIMDE_ALIGN_TO_16 int32x4_t neon_i32; SIMDE_ALIGN_TO_16 int64x2_t neon_i64; SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; SIMDE_ALIGN_TO_16 float32x4_t neon_f32; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) SIMDE_ALIGN_TO_16 float64x2_t neon_f64; #endif #elif defined(SIMDE_WASM_SIMD128_NATIVE) SIMDE_ALIGN_TO_16 v128_t wasm_v128; #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; #endif #endif } simde__m128_private; #if defined(SIMDE_X86_SSE_NATIVE) typedef __m128 simde__m128; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) typedef float32x4_t simde__m128; #elif defined(SIMDE_WASM_SIMD128_NATIVE) typedef v128_t simde__m128; #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; #elif defined(SIMDE_VECTOR_SUBSCRIPT) typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; #else typedef simde__m128_private simde__m128; #endif #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) typedef simde__m128 __m128; #endif HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); #if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde__m128_from_private(simde__m128_private v) { simde__m128 r; simde_memcpy(&r, &v, sizeof(r)); return r; } SIMDE_FUNCTION_ATTRIBUTES simde__m128_private simde__m128_to_private(simde__m128 v) { simde__m128_private r; simde_memcpy(&r, &v, sizeof(r)); return r; } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) #endif #endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) #if defined(SIMDE_BUG_GCC_95782) SIMDE_FUNCTION_ATTRIBUTES SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128_to_altivec_f32(simde__m128 value) { simde__m128_private r_ = simde__m128_to_private(value); return r_.altivec_f32; } SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { simde__m128_private r_; r_.altivec_f32 = value; return simde__m128_from_private(r_); } #else SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) #endif #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) #endif #elif defined(SIMDE_WASM_SIMD128_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); #endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ enum { #if defined(SIMDE_X86_SSE_NATIVE) SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, SIMDE_MM_ROUND_UP = _MM_ROUND_UP, SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO #else SIMDE_MM_ROUND_NEAREST = 0x0000, SIMDE_MM_ROUND_DOWN = 0x2000, SIMDE_MM_ROUND_UP = 0x4000, SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 #endif }; #if defined(_MM_FROUND_TO_NEAREST_INT) # define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT # define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF # define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF # define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO # define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION # define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC # define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC #else # define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 # define SIMDE_MM_FROUND_TO_NEG_INF 0x01 # define SIMDE_MM_FROUND_TO_POS_INF 0x02 # define SIMDE_MM_FROUND_TO_ZERO 0x03 # define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 # define SIMDE_MM_FROUND_RAISE_EXC 0x00 # define SIMDE_MM_FROUND_NO_EXC 0x08 #endif #define SIMDE_MM_FROUND_NINT \ (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) #define SIMDE_MM_FROUND_FLOOR \ (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) #define SIMDE_MM_FROUND_CEIL \ (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) #define SIMDE_MM_FROUND_TRUNC \ (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) #define SIMDE_MM_FROUND_RINT \ (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) #define SIMDE_MM_FROUND_NEARBYINT \ (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) # define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT # define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF # define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF # define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO # define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION # define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC # define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT # define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR # define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL # define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC # define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT # define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT #endif #if defined(_MM_EXCEPT_INVALID) # define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID #else # define SIMDE_MM_EXCEPT_INVALID (0x0001) #endif #if defined(_MM_EXCEPT_DENORM) # define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM #else # define SIMDE_MM_EXCEPT_DENORM (0x0002) #endif #if defined(_MM_EXCEPT_DIV_ZERO) # define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO #else # define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) #endif #if defined(_MM_EXCEPT_OVERFLOW) # define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW #else # define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) #endif #if defined(_MM_EXCEPT_UNDERFLOW) # define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW #else # define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) #endif #if defined(_MM_EXCEPT_INEXACT) # define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT #else # define SIMDE_MM_EXCEPT_INEXACT (0x0020) #endif #if defined(_MM_EXCEPT_MASK) # define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK #else # define SIMDE_MM_EXCEPT_MASK \ (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) #endif #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK #endif #if defined(_MM_MASK_INVALID) # define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID #else # define SIMDE_MM_MASK_INVALID (0x0080) #endif #if defined(_MM_MASK_DENORM) # define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM #else # define SIMDE_MM_MASK_DENORM (0x0100) #endif #if defined(_MM_MASK_DIV_ZERO) # define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO #else # define SIMDE_MM_MASK_DIV_ZERO (0x0200) #endif #if defined(_MM_MASK_OVERFLOW) # define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW #else # define SIMDE_MM_MASK_OVERFLOW (0x0400) #endif #if defined(_MM_MASK_UNDERFLOW) # define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW #else # define SIMDE_MM_MASK_UNDERFLOW (0x0800) #endif #if defined(_MM_MASK_INEXACT) # define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT #else # define SIMDE_MM_MASK_INEXACT (0x1000) #endif #if defined(_MM_MASK_MASK) # define SIMDE_MM_MASK_MASK _MM_MASK_MASK #else # define SIMDE_MM_MASK_MASK \ (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) #endif #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT #define _MM_MASK_MASK SIMDE_MM_MASK_MASK #endif #if defined(_MM_FLUSH_ZERO_MASK) # define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK #else # define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) #endif #if defined(_MM_FLUSH_ZERO_ON) # define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON #else # define SIMDE_MM_FLUSH_ZERO_ON (0x8000) #endif #if defined(_MM_FLUSH_ZERO_OFF) # define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF #else # define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) #endif #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF #endif SIMDE_FUNCTION_ATTRIBUTES unsigned int SIMDE_MM_GET_ROUNDING_MODE(void) { #if defined(SIMDE_X86_SSE_NATIVE) return _MM_GET_ROUNDING_MODE(); #elif defined(SIMDE_HAVE_FENV_H) unsigned int vfe_mode; switch (fegetround()) { #if defined(FE_TONEAREST) case FE_TONEAREST: vfe_mode = SIMDE_MM_ROUND_NEAREST; break; #endif #if defined(FE_TOWARDZERO) case FE_TOWARDZERO: vfe_mode = SIMDE_MM_ROUND_DOWN; break; #endif #if defined(FE_UPWARD) case FE_UPWARD: vfe_mode = SIMDE_MM_ROUND_UP; break; #endif #if defined(FE_DOWNWARD) case FE_DOWNWARD: vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; break; #endif default: vfe_mode = SIMDE_MM_ROUND_NEAREST; break; } return vfe_mode; #else return SIMDE_MM_ROUND_NEAREST; #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() #endif SIMDE_FUNCTION_ATTRIBUTES void SIMDE_MM_SET_ROUNDING_MODE(unsigned int a) { #if defined(SIMDE_X86_SSE_NATIVE) _MM_SET_ROUNDING_MODE(a); #elif defined(SIMDE_HAVE_FENV_H) int fe_mode = FE_TONEAREST; switch (a) { #if defined(FE_TONEAREST) case SIMDE_MM_ROUND_NEAREST: fe_mode = FE_TONEAREST; break; #endif #if defined(FE_TOWARDZERO) case SIMDE_MM_ROUND_TOWARD_ZERO: fe_mode = FE_TOWARDZERO; break; #endif #if defined(FE_DOWNWARD) case SIMDE_MM_ROUND_DOWN: fe_mode = FE_DOWNWARD; break; #endif #if defined(FE_UPWARD) case SIMDE_MM_ROUND_UP: fe_mode = FE_UPWARD; break; #endif default: return; } fesetround(fe_mode); #else (void) a; #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) #endif SIMDE_FUNCTION_ATTRIBUTES uint32_t SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; #else return SIMDE_MM_FLUSH_ZERO_OFF; #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) #endif SIMDE_FUNCTION_ATTRIBUTES void SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { #if defined(SIMDE_X86_SSE_NATIVE) _MM_SET_FLUSH_ZERO_MODE(a); #else (void) a; #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) #endif SIMDE_FUNCTION_ATTRIBUTES uint32_t simde_mm_getcsr (void) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_getcsr(); #else return SIMDE_MM_GET_ROUNDING_MODE(); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) #define _mm_getcsr() simde_mm_getcsr() #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_setcsr (uint32_t a) { #if defined(SIMDE_X86_SSE_NATIVE) _mm_setcsr(a); #else SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(unsigned int, a)); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) #define _mm_setcsr(a) simde_mm_setcsr(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { simde__m128_private r_, a_ = simde__m128_to_private(a); (void) lax_rounding; /* For architectures which lack a current direction SIMD instruction. * * Note that NEON actually has a current rounding mode instruction, * but in ARMv8+ the rounding mode is ignored and nearest is always * used, so we treat ARMv7 as having a rounding mode but ARMv8 as * not. */ #if \ defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ defined(SIMDE_ARM_NEON_A32V8) if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; #endif switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { case SIMDE_MM_FROUND_CUR_DIRECTION: #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) r_.neon_f32 = vrndiq_f32(a_.neon_f32); #elif defined(simde_math_nearbyintf) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); } #else HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); #endif break; case SIMDE_MM_FROUND_TO_NEAREST_INT: #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) r_.neon_f32 = vrndnq_f32(a_.neon_f32); #elif defined(simde_math_roundevenf) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_roundevenf(a_.f32[i]); } #else HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); #endif break; case SIMDE_MM_FROUND_TO_NEG_INF: #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) r_.neon_f32 = vrndmq_f32(a_.neon_f32); #elif defined(simde_math_floorf) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_floorf(a_.f32[i]); } #else HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); #endif break; case SIMDE_MM_FROUND_TO_POS_INF: #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) r_.neon_f32 = vrndpq_f32(a_.neon_f32); #elif defined(simde_math_ceilf) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_ceilf(a_.f32[i]); } #else HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); #endif break; case SIMDE_MM_FROUND_TO_ZERO: #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) r_.neon_f32 = vrndq_f32(a_.neon_f32); #elif defined(simde_math_truncf) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_truncf(a_.f32[i]); } #else HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); #endif break; default: HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); } return simde__m128_from_private(r_); } #if defined(SIMDE_X86_SSE4_1_NATIVE) #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) #else #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) #endif #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_set_ps(e3, e2, e1, e0); #else simde__m128_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; r_.neon_f32 = vld1q_f32(data); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); #else r_.f32[0] = e0; r_.f32[1] = e1; r_.f32[2] = e2; r_.f32[3] = e3; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_set_ps1 (simde_float32 a) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_set_ps1(a); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vdupq_n_f32(a); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) (void) a; return vec_splats(a); #else return simde_mm_set_ps(a, a, a, a); #endif } #define simde_mm_set1_ps(a) simde_mm_set_ps1(a) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_set_ps1(a) simde_mm_set_ps1(a) # define _mm_set1_ps(a) simde_mm_set1_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_move_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_move_ss(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) m = { 16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; r_.altivec_f32 = vec_perm(a_.altivec_f32, b_.altivec_f32, m); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); #else r_.f32[0] = b_.f32[0]; r_.f32[1] = a_.f32[1]; r_.f32[2] = a_.f32[2]; r_.f32[3] = a_.f32[3]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_add_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_add_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f32 = a_.f32 + b_.f32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = a_.f32[i] + b_.f32[i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_add_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_add_ss(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); // the upper values in the result must be the remnants of . r_.neon_f32 = vaddq_f32(a_.neon_f32, value); #else r_.f32[0] = a_.f32[0] + b_.f32[0]; r_.f32[1] = a_.f32[1]; r_.f32[2] = a_.f32[2]; r_.f32[3] = a_.f32[3]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_and_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_and_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = a_.i32 & b_.i32; #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] & b_.i32[i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_andnot_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = ~a_.i32 & b_.i32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_xor_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_xor_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f ^ b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i] ^ b_.u32[i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_or_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_or_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f | b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i] | b_.u32[i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_x_mm_not_ps(simde__m128 a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) __m128i ai = _mm_castps_si128(a); return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); #elif defined(SIMDE_X86_SSE2_NATIVE) /* Note: we use ints instead of floats because we don't want cmpeq * to return false for (NaN, NaN) */ __m128i ai = _mm_castps_si128(a); return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); #else simde__m128_private r_, a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vmvnq_s32(a_.neon_i32); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = ~a_.i32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = ~(a_.i32[i]); } #endif return simde__m128_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { /* This function is for when you want to blend two elements together * according to a mask. It is similar to _mm_blendv_ps, except that * it is undefined whether the blend is based on the highest bit in * each lane (like blendv) or just bitwise operations. This allows * us to implement the function efficiently everywhere. * * Basically, you promise that all the lanes in mask are either 0 or * ~0. */ #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_blendv_ps(a, b, mask); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b), mask_ = simde__m128_to_private(mask); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); } #endif return simde__m128_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_avg_pu16(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) uint32_t wa SIMDE_VECTOR(16); uint32_t wb SIMDE_VECTOR(16); uint32_t wr SIMDE_VECTOR(16); SIMDE_CONVERT_VECTOR_(wa, a_.u16); SIMDE_CONVERT_VECTOR_(wb, b_.u16); wr = (wa + wb + 1) >> 1; SIMDE_CONVERT_VECTOR_(r_.u16, wr); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) # define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_avg_pu8(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) uint16_t wa SIMDE_VECTOR(16); uint16_t wb SIMDE_VECTOR(16); uint16_t wr SIMDE_VECTOR(16); SIMDE_CONVERT_VECTOR_(wa, a_.u8); SIMDE_CONVERT_VECTOR_(wb, b_.u8); wr = (wa + wb + 1) >> 1; SIMDE_CONVERT_VECTOR_(r_.u8, wr); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) # define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_x_mm_abs_ps(simde__m128 a) { #if defined(SIMDE_X86_AVX512F_NATIVE) && \ (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,1,0)) return _mm512_castps512_ps128(_mm512_abs_ps(_mm512_castps128_ps512(a))); #else simde__m128_private r_, a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vabsq_f32(a_.neon_f32); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = vec_abs(a_.altivec_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_fabsf(a_.f32[i]); } #endif return simde__m128_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cmpeq_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cmpeq_ss(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); SIMDE_VECTORIZE for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = a_.u32[i]; } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cmpge_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) return _mm_cmpge_ss(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); SIMDE_VECTORIZE for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = a_.u32[i]; } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cmpgt_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) return _mm_cmpgt_ss(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); SIMDE_VECTORIZE for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = a_.u32[i]; } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cmple_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cmple_ss(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); SIMDE_VECTORIZE for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = a_.u32[i]; } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cmplt_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cmplt_ss(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); SIMDE_VECTORIZE for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = a_.u32[i]; } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cmpneq_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && SIMDE_ARCH_POWER_CHECK(900) && !defined(HEDLEY_IBM_VERSION) /* vec_cmpne(SIMDE_POWER_ALTIVEC_VECTOR(float), SIMDE_POWER_ALTIVEC_VECTOR(float)) is missing from XL C/C++ v16.1.1, though the documentation (table 89 on page 432 of the IBM XL C/C++ for Linux Compiler Reference, Version 16.1.1) shows that it should be present. Both GCC and clang support it. */ r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpne(a_.altivec_f32, b_.altivec_f32)); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cmpneq_ss(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); SIMDE_VECTORIZE for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = a_.u32[i]; } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { return simde_mm_cmplt_ps(a, b); } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { return simde_mm_cmplt_ss(a, b); } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { return simde_mm_cmple_ps(a, b); } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { return simde_mm_cmple_ss(a, b); } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { return simde_mm_cmpgt_ps(a, b); } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { return simde_mm_cmpgt_ss(a, b); } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { return simde_mm_cmpge_ps(a, b); } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { return simde_mm_cmpge_ss(a, b); } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cmpord_ps(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) /* Note: NEON does not have ordered compare builtin Need to compare a eq a and b eq b to check for NaN Do AND of results to get final */ uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); r_.neon_u32 = vandq_u32(ceqaa, ceqbb); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); #elif defined(simde_math_isnanf) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); } #else HEDLEY_UNREACHABLE(); #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cmpunord_ps(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); #elif defined(simde_math_isnanf) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); } #else HEDLEY_UNREACHABLE(); #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) return _mm_cmpunord_ss(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(simde_math_isnanf) r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); SIMDE_VECTORIZE for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i]; } #else HEDLEY_UNREACHABLE(); #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_comieq_ss(a, b); #else simde__m128_private a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); #else return a_.f32[0] == b_.f32[0]; #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_comige_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_comige_ss(a, b); #else simde__m128_private a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); #else return a_.f32[0] >= b_.f32[0]; #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_comigt_ss(a, b); #else simde__m128_private a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); #else return a_.f32[0] > b_.f32[0]; #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_comile_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_comile_ss(a, b); #else simde__m128_private a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); #else return a_.f32[0] <= b_.f32[0]; #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_comilt_ss(a, b); #else simde__m128_private a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); #else return a_.f32[0] < b_.f32[0]; #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_comineq_ss(a, b); #else simde__m128_private a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); #else return a_.f32[0] != b_.f32[0]; #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { simde__m128_private r_, dest_ = simde__m128_to_private(dest), src_ = simde__m128_to_private(src); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) const v128_t sign_pos = wasm_f32x4_splat(-0.0f); r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); #elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) #if !defined(HEDLEY_IBM_VERSION) r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); #else r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); #endif #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); #elif defined(SIMDE_IEEE754_STORAGE) (void) src_; (void) dest_; simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); } #endif return simde__m128_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); } SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_cvt_pi2ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); #elif defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); r_.m64_private[1] = a_.m64_private[1]; #else r_.f32[0] = (simde_float32) b_.i32[0]; r_.f32[1] = (simde_float32) b_.i32[1]; r_.i32[2] = a_.i32[2]; r_.i32[3] = a_.i32[3]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_cvt_ps2pi (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_cvt_ps2pi(a); #else simde__m64_private r_; simde__m128_private a_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); #else a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); } #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cvt_si2ss(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); #else r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); r_.i32[1] = a_.i32[1]; r_.i32[2] = a_.i32[2]; r_.i32[3] = a_.i32[3]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_mm_cvt_ss2si (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cvt_ss2si(a); #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); #else simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); #if !defined(SIMDE_FAST_CONVERSION_RANGE) return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; #else return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cvtpi16_ps (simde__m64 a) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_cvtpi16_ps(a); #else simde__m128_private r_; simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); #elif defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { simde_float32 v = a_.i16[i]; r_.f32[i] = v; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_cvtpi32_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); #elif defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); r_.m64_private[1] = a_.m64_private[1]; #else r_.f32[0] = (simde_float32) b_.i32[0]; r_.f32[1] = (simde_float32) b_.i32[1]; r_.i32[2] = a_.i32[2]; r_.i32[3] = a_.i32[3]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_cvtpi32x2_ps(a, b); #else simde__m128_private r_; simde__m64_private a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); #elif defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); #else r_.f32[0] = (simde_float32) a_.i32[0]; r_.f32[1] = (simde_float32) a_.i32[1]; r_.f32[2] = (simde_float32) b_.i32[0]; r_.f32[3] = (simde_float32) b_.i32[1]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cvtpi8_ps (simde__m64 a) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_cvtpi8_ps(a); #else simde__m128_private r_; simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); #else r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_cvtps_pi16 (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_cvtps_pi16(a); #else simde__m64_private r_; simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); } #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_cvtps_pi32 (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_cvtps_pi32(a); #else simde__m64_private r_; simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { simde_float32 v = simde_math_roundf(a_.f32[i]); #if !defined(SIMDE_FAST_CONVERSION_RANGE) r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; #else r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); #endif } #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_cvtps_pi8 (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_cvtps_pi8(a); #else simde__m64_private r_; simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to * i16, combine with an all-zero vector of i16 (which will become the upper * half), narrow to i8. */ float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) r_.i8[i] = INT8_MAX; else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) r_.i8[i] = INT8_MIN; else r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); } /* Note: the upper half is undefined */ #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cvtpu16_ps (simde__m64 a) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_cvtpu16_ps(a); #else simde__m128_private r_; simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); #elif defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = (simde_float32) a_.u16[i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cvtpu8_ps (simde__m64 a) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_cvtpu8_ps(a); #else simde__m128_private r_; simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cvtsi32_ss(a, b); #else simde__m128_private r_; simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); #else r_ = a_; r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) #if !defined(__PGI) return _mm_cvtsi64_ss(a, b); #else return _mm_cvtsi64x_ss(a, b); #endif #else simde__m128_private r_; simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); #else r_ = a_; r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32 simde_mm_cvtss_f32 (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cvtss_f32(a); #else simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vgetq_lane_f32(a_.neon_f32, 0); #else return a_.f32[0]; #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_mm_cvtss_si32 (simde__m128 a) { return simde_mm_cvt_ss2si(a); } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES int64_t simde_mm_cvtss_si64 (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) #if !defined(__PGI) return _mm_cvtss_si64(a); #else return _mm_cvtss_si64x(a); #endif #else simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); #else return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_cvtt_ps2pi (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_cvtt_ps2pi(a); #else simde__m64_private r_; simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { simde_float32 v = a_.f32[i]; #if !defined(SIMDE_FAST_CONVERSION_RANGE) r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; #else r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); #endif } #endif return simde__m64_from_private(r_); #endif } #define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) # define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_mm_cvtt_ss2si (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cvtt_ss2si(a); #else simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); #else simde_float32 v = a_.f32[0]; #if !defined(SIMDE_FAST_CONVERSION_RANGE) return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; #else return SIMDE_CONVERT_FTOI(int32_t, v); #endif #endif #endif } #define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) # define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) #endif SIMDE_FUNCTION_ATTRIBUTES int64_t simde_mm_cvttss_si64 (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) #if defined(__PGI) return _mm_cvttss_si64x(a); #else return _mm_cvttss_si64(a); #endif #else simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); #else return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cmpord_ss(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); #else simde__m128_private r_, a_ = simde__m128_to_private(a); #if defined(simde_math_isnanf) r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); SIMDE_VECTORIZE for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = a_.u32[i]; } #else HEDLEY_UNREACHABLE(); #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_div_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_div_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f32 = a_.f32 / b_.f32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = a_.f32[i] / b_.f32[i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_div_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_div_ss(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32_t value = vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); #else r_.f32[0] = a_.f32[0] / b_.f32[0]; SIMDE_VECTORIZE for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = a_.f32[i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int16_t simde_mm_extract_pi16 (simde__m64 a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { simde__m64_private a_ = simde__m64_to_private(a); return a_.i16[imm8]; } #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) # if defined(SIMDE_BUG_CLANG_44589) # define simde_mm_extract_pi16(a, imm8) ( \ HEDLEY_DIAGNOSTIC_PUSH \ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") \ HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16((a), (imm8))) \ HEDLEY_DIAGNOSTIC_POP \ ) # else # define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) # endif #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) # define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) #endif #define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) # define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { simde__m64_private r_, a_ = simde__m64_to_private(a); r_.i64[0] = a_.i64[0]; r_.i16[imm8] = i; return simde__m64_from_private(r_); } #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) # if defined(SIMDE_BUG_CLANG_44589) # define ssimde_mm_insert_pi16(a, i, imm8) ( \ HEDLEY_DIAGNOSTIC_PUSH \ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") \ (_mm_insert_pi16((a), (i), (imm8))) \ HEDLEY_DIAGNOSTIC_POP \ ) # else # define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) # endif #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) # define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) #endif #define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) # define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_load_ps(mem_addr); #else simde__m128_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vld1q_f32(mem_addr); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f32 = vec_vsx_ld(0, mem_addr); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = vec_ld(0, mem_addr); #else simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_load1_ps (simde_float32 const* mem_addr) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_load_ps1(mem_addr); #else simde__m128_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vld1q_dup_f32(mem_addr); #else r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); #endif return simde__m128_from_private(r_); #endif } #define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) # define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_load_ss (simde_float32 const* mem_addr) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_load_ss(mem_addr); #else simde__m128_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); #else r_.f32[0] = *mem_addr; r_.i32[1] = 0; r_.i32[2] = 0; r_.i32[3] = 0; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); #else simde__m128_private r_, a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); #else simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); r_.f32[0] = a_.f32[0]; r_.f32[1] = a_.f32[1]; r_.f32[2] = b_.f32[0]; r_.f32[3] = b_.f32[1]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) #if HEDLEY_HAS_WARNING("-Wold-style-cast") #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) #else #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) #endif #endif /* The SSE documentation says that there are no alignment requirements for mem_addr. Unfortunately they used the __m64 type for the argument which is supposed to be 8-byte aligned, so some compilers (like clang with -Wcast-align) will generate a warning if you try to cast, say, a simde_float32* to a simde__m64* for this function. I think the choice of argument type is unfortunate, but I do think we need to stick to it here. If there is demand I can always add something like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); #else simde__m128_private r_, a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vcombine_f32(vld1_f32( HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); #else simde__m64_private b_; simde_memcpy(&b_, mem_addr, sizeof(b_)); r_.i32[0] = b_.i32[0]; r_.i32[1] = b_.i32[1]; r_.i32[2] = a_.i32[2]; r_.i32[3] = a_.i32[3]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) #if HEDLEY_HAS_WARNING("-Wold-style-cast") #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) #else #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) #endif #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_loadr_ps(mem_addr); #else simde__m128_private r_, v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vrev64q_f32(v_.neon_f32); r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) r_.altivec_f32 = vec_reve(v_.altivec_f32); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); #else r_.f32[0] = v_.f32[3]; r_.f32[1] = v_.f32[2]; r_.f32[2] = v_.f32[1]; r_.f32[3] = v_.f32[0]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_loadu_ps(mem_addr); #else simde__m128_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_load(mem_addr); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) r_.altivec_f32 = vec_vsx_ld(0, mem_addr); #else simde_memcpy(&r_, mem_addr, sizeof(r_)); #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); #else simde__m64_private a_ = simde__m64_to_private(a), mask_ = simde__m64_to_private(mask); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) if (mask_.i8[i] < 0) mem_addr[i] = a_.i8[i]; #endif } #define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) # define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_max_pi16(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) # define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_max_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_max_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_NANS) r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_max_pu8(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) # define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_max_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_max_ss(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); #else r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; r_.f32[1] = a_.f32[1]; r_.f32[2] = a_.f32[2]; r_.f32[3] = a_.f32[3]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_min_pi16(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) # define _m_pminsw(a, b) simde_mm_min_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_min_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_min_ps(a, b); #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) return simde__m128_from_neon_f32(vminq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_FAST_NANS) r_.wasm_v128 = wasm_f32x4_min(a_.wasm_v128, b_.wasm_v128); #else r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128)); #endif return simde__m128_from_private(r_); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_FAST_NANS) r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); #else r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); #endif return simde__m128_from_private(r_); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) simde__m128 mask = simde_mm_cmplt_ps(a, b); return simde_mm_or_ps(simde_mm_and_ps(mask, a), simde_mm_andnot_ps(mask, b)); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_min_pu8(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) # define _m_pminub(a, b) simde_mm_min_pu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_min_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_min_ss(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); #else r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; r_.f32[1] = a_.f32[1]; r_.f32[2] = a_.f32[2]; r_.f32[3] = a_.f32[3]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_movehl_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32x2_t a32 = vget_high_f32(a_.neon_f32); float32x2_t b32 = vget_high_f32(b_.neon_f32); r_.neon_f32 = vcombine_f32(b32, a32); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_mergel(b_.altivec_i64, a_.altivec_i64)); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); #else r_.f32[0] = b_.f32[2]; r_.f32[1] = b_.f32[3]; r_.f32[2] = a_.f32[2]; r_.f32[3] = a_.f32[3]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_movelh_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32x2_t a10 = vget_low_f32(a_.neon_f32); float32x2_t b10 = vget_low_f32(b_.neon_f32); r_.neon_f32 = vcombine_f32(a10, b10); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_mergeh(a_.altivec_i64, b_.altivec_i64)); #else r_.f32[0] = a_.f32[0]; r_.f32[1] = a_.f32[1]; r_.f32[2] = b_.f32[0]; r_.f32[3] = b_.f32[1]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_movemask_pi8 (simde__m64 a) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_movemask_pi8(a); #else simde__m64_private a_ = simde__m64_to_private(a); int r = 0; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint8x8_t input = a_.neon_u8; const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; const uint8x8_t mask_and = vdup_n_u8(0x80); const int8x8_t mask_shift = vld1_s8(xr); const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); uint8x8_t lo = mask_result; r = vaddv_u8(lo); #else const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < nmemb ; i++) { r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); } #endif return r; #endif } #define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) # define _m_pmovmskb(a) simde_mm_movemask_pi8(a) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_movemask_ps (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_movemask_ps(a); #else int r = 0; simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) static const int32_t shift_amount[] = { 0, 1, 2, 3 }; const int32x4_t shift = vld1q_s32(shift_amount); uint32x4_t tmp = vshrq_n_u32(a_.neon_u32, 31); return HEDLEY_STATIC_CAST(int, vaddvq_u32(vshlq_u32(tmp, shift))); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) // Shift out everything but the sign bits with a 32-bit unsigned shift right. uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); // Merge the two pairs together with a 64-bit unsigned shift right + add. uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); // Extract the result. return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); #else SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; } #endif return r; #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_mul_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_mul_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f32 = a_.f32 * b_.f32; #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = a_.f32[i] * b_.f32[i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_mul_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_mul_ss(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); r_.f32[0] = a_.f32[0] * b_.f32[0]; r_.f32[1] = a_.f32[1]; r_.f32[2] = a_.f32[2]; r_.f32[3] = a_.f32[3]; return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_mulhi_pu16(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); const uint32x4_t t2 = vshrq_n_u32(t1, 16); const uint16x4_t t3 = vmovn_u32(t2); r_.neon_u16 = t3; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) # define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) #endif #if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) #else #define SIMDE_MM_HINT_NTA 0 #define SIMDE_MM_HINT_T0 1 #define SIMDE_MM_HINT_T1 2 #define SIMDE_MM_HINT_T2 3 #define SIMDE_MM_HINT_ENTA 4 #define SIMDE_MM_HINT_ET0 5 #define SIMDE_MM_HINT_ET1 6 #define SIMDE_MM_HINT_ET2 7 #endif #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) HEDLEY_DIAGNOSTIC_PUSH #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") #endif #undef _MM_HINT_NTA #define _MM_HINT_NTA SIMDE_MM_HINT_NTA #undef _MM_HINT_T0 #define _MM_HINT_T0 SIMDE_MM_HINT_T0 #undef _MM_HINT_T1 #define _MM_HINT_T1 SIMDE_MM_HINT_T1 #undef _MM_HINT_T2 #define _MM_HINT_T2 SIMDE_MM_HINT_T2 #undef _MM_HINT_ETNA #define _MM_HINT_ETNA SIMDE_MM_HINT_ETNA #undef _MM_HINT_ET0 #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 #undef _MM_HINT_ET1 #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 #undef _MM_HINT_ET1 #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 HEDLEY_DIAGNOSTIC_POP #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_prefetch (char const* p, int i) { #if defined(HEDLEY_GCC_VERSION) __builtin_prefetch(p); #else (void) p; #endif (void) i; } #if defined(SIMDE_X86_SSE_NATIVE) #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ #define simde_mm_prefetch(p, i) \ (__extension__({ \ HEDLEY_DIAGNOSTIC_PUSH \ HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ _mm_prefetch((p), (i)); \ HEDLEY_DIAGNOSTIC_POP \ })) #else #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) #endif #endif #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_x_mm_negate_ps(simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); #else simde__m128_private r_, a_ = simde__m128_to_private(a); #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) r_.altivec_f32 = vec_neg(a_.altivec_f32); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vnegq_f32(a_.neon_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) r_.altivec_f32 = vec_neg(a_.altivec_f32); #elif defined(SIMDE_VECTOR_NEGATE) r_.f32 = -a_.f32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = -a_.f32[i]; } #endif return simde__m128_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_rcp_ps (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_rcp_ps(a); #else simde__m128_private r_, a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32x4_t recip = vrecpeq_f32(a_.neon_f32); #if SIMDE_ACCURACY_PREFERENCE > 0 for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); } #endif r_.neon_f32 = recip; #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = vec_re(a_.altivec_f32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.f32 = 1.0f / a_.f32; #elif defined(SIMDE_IEEE754_STORAGE) /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { int32_t ix; simde_float32 fx = a_.f32[i]; simde_memcpy(&ix, &fx, sizeof(ix)); int32_t x = INT32_C(0x7EF311C3) - ix; simde_float32 temp; simde_memcpy(&temp, &x, sizeof(temp)); r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = 1.0f / a_.f32[i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_rcp_ss (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_rcp_ss(a); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); #else simde__m128_private r_, a_ = simde__m128_to_private(a); r_.f32[0] = 1.0f / a_.f32[0]; r_.f32[1] = a_.f32[1]; r_.f32[2] = a_.f32[2]; r_.f32[3] = a_.f32[3]; return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_rsqrt_ps (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_rsqrt_ps(a); #else simde__m128_private r_, a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); #elif defined(SIMDE_IEEE754_STORAGE) /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf Pages 100 - 103 */ SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { #if SIMDE_ACCURACY_PREFERENCE <= 0 r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); #else simde_float32 x = a_.f32[i]; simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; int32_t ix; simde_memcpy(&ix, &x, sizeof(ix)); #if SIMDE_ACCURACY_PREFERENCE == 1 ix = INT32_C(0x5F375A82) - (ix >> 1); #else ix = INT32_C(0x5F37599E) - (ix >> 1); #endif simde_memcpy(&x, &ix, sizeof(x)); #if SIMDE_ACCURACY_PREFERENCE >= 2 x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); #endif x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); r_.f32[i] = x; #endif } #elif defined(simde_math_sqrtf) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); } #else HEDLEY_UNREACHABLE(); #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_rsqrt_ss (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_rsqrt_ss(a); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); #else simde__m128_private r_, a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); #elif defined(SIMDE_IEEE754_STORAGE) { #if SIMDE_ACCURACY_PREFERENCE <= 0 r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); #else simde_float32 x = a_.f32[0]; simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; int32_t ix; simde_memcpy(&ix, &x, sizeof(ix)); #if SIMDE_ACCURACY_PREFERENCE == 1 ix = INT32_C(0x5F375A82) - (ix >> 1); #else ix = INT32_C(0x5F37599E) - (ix >> 1); #endif simde_memcpy(&x, &ix, sizeof(x)); #if SIMDE_ACCURACY_PREFERENCE >= 2 x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); #endif x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); r_.f32[0] = x; #endif } r_.f32[1] = a_.f32[1]; r_.f32[2] = a_.f32[2]; r_.f32[3] = a_.f32[3]; #elif defined(simde_math_sqrtf) r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); r_.f32[1] = a_.f32[1]; r_.f32[2] = a_.f32[2]; r_.f32[3] = a_.f32[3]; #else HEDLEY_UNREACHABLE(); #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_sad_pu8(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint16x4_t t = vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)); uint16_t r0 = t[0] + t[1] + t[2] + t[3]; r_.neon_u16 = vset_lane_u16(r0, vdup_n_u16(0), 0); #else uint16_t sum = 0; #if defined(SIMDE_HAVE_STDLIB_H) SIMDE_VECTORIZE_REDUCTION(+:sum) for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { sum += HEDLEY_STATIC_CAST(uint8_t, abs(a_.u8[i] - b_.u8[i])); } r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); r_.i16[1] = 0; r_.i16[2] = 0; r_.i16[3] = 0; #else HEDLEY_UNREACHABLE(); #endif #endif return simde__m64_from_private(r_); #endif } #define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) # define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_set_ss (simde_float32 a) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_set_ss(a); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); #else return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_set_ss(a) simde_mm_set_ss(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_setr_ps(e3, e2, e1, e0); #else return simde_mm_set_ps(e0, e1, e2, e3); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_setzero_ps (void) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_setzero_ps(); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_splats(SIMDE_FLOAT32_C(0.0)); #else simde__m128 r; simde_memset(&r, 0, sizeof(r)); return r; #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_setzero_ps() simde_mm_setzero_ps() #endif #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_undefined_ps (void) { simde__m128_private r_; #if defined(SIMDE_HAVE_UNDEFINED128) r_.n = _mm_undefined_ps(); #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) r_ = simde__m128_to_private(simde_mm_setzero_ps()); #endif return simde__m128_from_private(r_); } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_undefined_ps() simde_mm_undefined_ps() #endif #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) HEDLEY_DIAGNOSTIC_POP #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_x_mm_setone_ps (void) { simde__m128 t = simde_mm_setzero_ps(); return simde_mm_cmpeq_ps(t, t); } SIMDE_FUNCTION_ATTRIBUTES void simde_mm_sfence (void) { /* TODO: Use Hedley. */ #if defined(SIMDE_X86_SSE_NATIVE) _mm_sfence(); #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) __atomic_thread_fence(__ATOMIC_SEQ_CST); #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) __atomic_thread_fence(__ATOMIC_SEQ_CST); #else atomic_thread_fence(memory_order_seq_cst); #endif #elif defined(_MSC_VER) MemoryBarrier(); #elif HEDLEY_HAS_EXTENSION(c_atomic) __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) __sync_synchronize(); #elif defined(_OPENMP) #pragma omp critical(simde_mm_sfence_) { } #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_sfence() simde_mm_sfence() #endif #define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) #endif #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) # define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) #elif defined(SIMDE_SHUFFLE_VECTOR_) # define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ const simde__m64_private simde__tmp_a_ = simde__m64_to_private(a); \ simde__m64_from_private((simde__m64_private) { .i16 = \ SIMDE_SHUFFLE_VECTOR_(16, 8, \ (simde__tmp_a_).i16, \ (simde__tmp_a_).i16, \ (((imm8) ) & 3), \ (((imm8) >> 2) & 3), \ (((imm8) >> 4) & 3), \ (((imm8) >> 6) & 3)) }); })) #else SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; } HEDLEY_DIAGNOSTIC_PUSH #if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") # pragma clang diagnostic ignored "-Wconditional-uninitialized" #endif return simde__m64_from_private(r_); HEDLEY_DIAGNOSTIC_POP } #endif #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) # define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) #else # define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) #endif #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) # define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) #endif #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) # define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_mm_shuffle_ps(a, b, imm8) \ __extension__({ \ float32x4_t ret; \ ret = vmovq_n_f32( \ vgetq_lane_f32(a, (imm8) & (0x3))); \ ret = vsetq_lane_f32( \ vgetq_lane_f32(a, ((imm8) >> 2) & 0x3), \ ret, 1); \ ret = vsetq_lane_f32( \ vgetq_lane_f32(b, ((imm8) >> 4) & 0x3), \ ret, 2); \ ret = vsetq_lane_f32( \ vgetq_lane_f32(b, ((imm8) >> 6) & 0x3), \ ret, 3); \ }) #elif defined(SIMDE_SHUFFLE_VECTOR_) # define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ simde__m128_from_private((simde__m128_private) { .f32 = \ SIMDE_SHUFFLE_VECTOR_(32, 16, \ simde__m128_to_private(a).f32, \ simde__m128_to_private(b).f32, \ (((imm8) ) & 3), \ (((imm8) >> 2) & 3), \ (((imm8) >> 4) & 3) + 4, \ (((imm8) >> 6) & 3) + 4) }); })) #else SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; return simde__m128_from_private(r_); } #endif #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_sqrt_ps (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_sqrt_ps(a); #else simde__m128_private r_, a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f32 = vsqrtq_f32(a_.neon_f32); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32x4_t est = vrsqrteq_f32(a_.neon_f32); for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); } r_.neon_f32 = vmulq_f32(a_.neon_f32, est); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f32 = vec_sqrt(a_.altivec_f32); #elif defined(simde_math_sqrt) SIMDE_VECTORIZE for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { r_.f32[i] = simde_math_sqrtf(a_.f32[i]); } #else HEDLEY_UNREACHABLE(); #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_sqrt_ss (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_sqrt_ss(a); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); #else simde__m128_private r_, a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32_t value = vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); #elif defined(simde_math_sqrtf) r_.f32[0] = simde_math_sqrtf(a_.f32[0]); r_.f32[1] = a_.f32[1]; r_.f32[2] = a_.f32[2]; r_.f32[3] = a_.f32[3]; #else HEDLEY_UNREACHABLE(); #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) _mm_store_ps(mem_addr, a); #else simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1q_f32(mem_addr, a_.neon_f32); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) vec_st(a_.altivec_f32, 0, mem_addr); #elif defined(SIMDE_WASM_SIMD128_NATIVE) wasm_v128_store(mem_addr, a_.wasm_v128); #else simde_memcpy(mem_addr, &a_, sizeof(a)); #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); #if defined(SIMDE_X86_SSE_NATIVE) _mm_store_ps1(mem_addr_, a); #else simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) wasm_v128_store(mem_addr_, wasm_v32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); #elif defined(SIMDE_SHUFFLE_VECTOR_) simde__m128_private tmp_; tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); simde_mm_store_ps(mem_addr_, tmp_.f32); #else SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { mem_addr_[i] = a_.f32[0]; } #endif #endif } #define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) # define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) _mm_store_ss(mem_addr, a); #else simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1q_lane_f32(mem_addr, a_.neon_f32, 0); #else *mem_addr = a_.f32[0]; #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); #else simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); #else simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); #else simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) dest_->neon_f32 = vget_low_f32(a_.neon_f32); #else dest_->f32[0] = a_.f32[0]; dest_->f32[1] = a_.f32[1]; #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) _mm_storer_ps(mem_addr, a); #else simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32x4_t tmp = vrev64q_f32(a_.neon_f32); vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); #elif defined(SIMDE_SHUFFLE_VECTOR_) a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); #else SIMDE_VECTORIZE_ALIGNED(mem_addr:16) for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; } #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) _mm_storeu_ps(mem_addr, a); #else simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1q_f32(mem_addr, a_.neon_f32); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) vec_vsx_st(a_.altivec_f32, 0, mem_addr); #else simde_memcpy(mem_addr, &a_, sizeof(a_)); #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_sub_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_sub_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f32 = a_.f32 - b_.f32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = a_.f32[i] - b_.f32[i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_sub_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_sub_ss(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); r_.f32[0] = a_.f32[0] - b_.f32[0]; r_.f32[1] = a_.f32[1]; r_.f32[2] = a_.f32[2]; r_.f32[3] = a_.f32[3]; return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_ucomieq_ss(a, b); #else simde__m128_private a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); int r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); #elif defined(SIMDE_HAVE_FENV_H) fenv_t envp; int x = feholdexcept(&envp); r = a_.f32[0] == b_.f32[0]; if (HEDLEY_LIKELY(x == 0)) fesetenv(&envp); #else r = a_.f32[0] == b_.f32[0]; #endif return r; #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_ucomige_ss(a, b); #else simde__m128_private a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); int r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); #elif defined(SIMDE_HAVE_FENV_H) fenv_t envp; int x = feholdexcept(&envp); r = a_.f32[0] >= b_.f32[0]; if (HEDLEY_LIKELY(x == 0)) fesetenv(&envp); #else r = a_.f32[0] >= b_.f32[0]; #endif return r; #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_ucomigt_ss(a, b); #else simde__m128_private a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); int r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); #elif defined(SIMDE_HAVE_FENV_H) fenv_t envp; int x = feholdexcept(&envp); r = a_.f32[0] > b_.f32[0]; if (HEDLEY_LIKELY(x == 0)) fesetenv(&envp); #else r = a_.f32[0] > b_.f32[0]; #endif return r; #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_ucomile_ss(a, b); #else simde__m128_private a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); int r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); #elif defined(SIMDE_HAVE_FENV_H) fenv_t envp; int x = feholdexcept(&envp); r = a_.f32[0] <= b_.f32[0]; if (HEDLEY_LIKELY(x == 0)) fesetenv(&envp); #else r = a_.f32[0] <= b_.f32[0]; #endif return r; #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_ucomilt_ss(a, b); #else simde__m128_private a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); int r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); #elif defined(SIMDE_HAVE_FENV_H) fenv_t envp; int x = feholdexcept(&envp); r = a_.f32[0] < b_.f32[0]; if (HEDLEY_LIKELY(x == 0)) fesetenv(&envp); #else r = a_.f32[0] < b_.f32[0]; #endif return r; #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_ucomineq_ss(a, b); #else simde__m128_private a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); int r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); #elif defined(SIMDE_HAVE_FENV_H) fenv_t envp; int x = feholdexcept(&envp); r = a_.f32[0] != b_.f32[0]; if (HEDLEY_LIKELY(x == 0)) fesetenv(&envp); #else r = a_.f32[0] != b_.f32[0]; #endif return r; #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) #endif #if defined(SIMDE_X86_SSE_NATIVE) # if defined(__has_builtin) # if __has_builtin(__builtin_ia32_undef128) # define SIMDE_HAVE_UNDEFINED128 # endif # elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) # define SIMDE_HAVE_UNDEFINED128 # endif #endif #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_unpackhi_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32x2_t a1 = vget_high_f32(a_.neon_f32); float32x2_t b1 = vget_high_f32(b_.neon_f32); float32x2x2_t result = vzip_f32(a1, b1); r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); #else r_.f32[0] = a_.f32[2]; r_.f32[1] = b_.f32[2]; r_.f32[2] = a_.f32[3]; r_.f32[3] = b_.f32[3]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_unpacklo_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32x2_t a1 = vget_low_f32(a_.neon_f32); float32x2_t b1 = vget_low_f32(b_.neon_f32); float32x2x2_t result = vzip_f32(a1, b1); r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); #else r_.f32[0] = a_.f32[0]; r_.f32[1] = b_.f32[0]; r_.f32[2] = a_.f32[1]; r_.f32[3] = b_.f32[1]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); #else simde__m64_private* dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), a_ = simde__m64_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) dest->i64[0] = vget_lane_s64(a_.neon_i64, 0); #else dest->i64[0] = a_.i64[0]; #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) _mm_stream_ps(mem_addr, a); #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) simde__m128_private a_ = simde__m128_to_private(a); __builtin_nontemporal_store(a_.f32, SIMDE_ALIGN_CAST(__typeof__(a_.f32)*, mem_addr)); #else simde_mm_store_ps(mem_addr, a); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ do { \ float32x4x2_t ROW01 = vtrnq_f32(row0, row1); \ float32x4x2_t ROW23 = vtrnq_f32(row2, row3); \ row0 = vcombine_f32(vget_low_f32(ROW01.val[0]), \ vget_low_f32(ROW23.val[0])); \ row1 = vcombine_f32(vget_low_f32(ROW01.val[1]), \ vget_low_f32(ROW23.val[1])); \ row2 = vcombine_f32(vget_high_f32(ROW01.val[0]), \ vget_high_f32(ROW23.val[0])); \ row3 = vcombine_f32(vget_high_f32(ROW01.val[1]), \ vget_high_f32(ROW23.val[1])); \ } while (0) #else #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ do { \ simde__m128 tmp3, tmp2, tmp1, tmp0; \ tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ row0 = simde_mm_movelh_ps(tmp0, tmp2); \ row1 = simde_mm_movehl_ps(tmp2, tmp0); \ row2 = simde_mm_movelh_ps(tmp1, tmp3); \ row3 = simde_mm_movehl_ps(tmp3, tmp1); \ } while (0) #endif #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_SSE_H) */ simde-0.7.2/simde/x86/sse2.h000066400000000000000000007355721400333146700154200ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2017-2020 Evan Nemerson * 2015-2017 John W. Ratcliff * 2015 Brandon Rowlett * 2015 Ken Fast * 2017 Hasindu Gamaarachchi * 2018 Jeff Daily */ #if !defined(SIMDE_X86_SSE2_H) #define SIMDE_X86_SSE2_H #include "sse.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ typedef union { #if defined(SIMDE_VECTOR_SUBSCRIPT) SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; #if defined(SIMDE_HAVE_INT128_) SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; #endif SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; #else SIMDE_ALIGN_TO_16 int8_t i8[16]; SIMDE_ALIGN_TO_16 int16_t i16[8]; SIMDE_ALIGN_TO_16 int32_t i32[4]; SIMDE_ALIGN_TO_16 int64_t i64[2]; SIMDE_ALIGN_TO_16 uint8_t u8[16]; SIMDE_ALIGN_TO_16 uint16_t u16[8]; SIMDE_ALIGN_TO_16 uint32_t u32[4]; SIMDE_ALIGN_TO_16 uint64_t u64[2]; #if defined(SIMDE_HAVE_INT128_) SIMDE_ALIGN_TO_16 simde_int128 i128[1]; SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; #endif SIMDE_ALIGN_TO_16 simde_float32 f32[4]; SIMDE_ALIGN_TO_16 simde_float64 f64[2]; SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; #endif SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; SIMDE_ALIGN_TO_16 simde__m64 m64[2]; #if defined(SIMDE_X86_SSE2_NATIVE) SIMDE_ALIGN_TO_16 __m128i n; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_ALIGN_TO_16 int8x16_t neon_i8; SIMDE_ALIGN_TO_16 int16x8_t neon_i16; SIMDE_ALIGN_TO_16 int32x4_t neon_i32; SIMDE_ALIGN_TO_16 int64x2_t neon_i64; SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; SIMDE_ALIGN_TO_16 float32x4_t neon_f32; #if defined(SIMDE_ARCH_AARCH64) SIMDE_ALIGN_TO_16 float64x2_t neon_f64; #endif #elif defined(SIMDE_WASM_SIMD128_NATIVE) SIMDE_ALIGN_TO_16 v128_t wasm_v128; #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; #if defined(__UINT_FAST32_TYPE__) && defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; #else SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; #endif SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; #if defined(__UINT_FAST32_TYPE__) && defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; #else SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; #endif SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; #endif #endif } simde__m128i_private; typedef union { #if defined(SIMDE_VECTOR_SUBSCRIPT) SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; #else SIMDE_ALIGN_TO_16 int8_t i8[16]; SIMDE_ALIGN_TO_16 int16_t i16[8]; SIMDE_ALIGN_TO_16 int32_t i32[4]; SIMDE_ALIGN_TO_16 int64_t i64[2]; SIMDE_ALIGN_TO_16 uint8_t u8[16]; SIMDE_ALIGN_TO_16 uint16_t u16[8]; SIMDE_ALIGN_TO_16 uint32_t u32[4]; SIMDE_ALIGN_TO_16 uint64_t u64[2]; SIMDE_ALIGN_TO_16 simde_float32 f32[4]; SIMDE_ALIGN_TO_16 simde_float64 f64[2]; SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; #endif SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; SIMDE_ALIGN_TO_16 simde__m64 m64[2]; #if defined(SIMDE_X86_SSE2_NATIVE) SIMDE_ALIGN_TO_16 __m128d n; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_ALIGN_TO_16 int8x16_t neon_i8; SIMDE_ALIGN_TO_16 int16x8_t neon_i16; SIMDE_ALIGN_TO_16 int32x4_t neon_i32; SIMDE_ALIGN_TO_16 int64x2_t neon_i64; SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; SIMDE_ALIGN_TO_16 float32x4_t neon_f32; #if defined(SIMDE_ARCH_AARCH64) SIMDE_ALIGN_TO_16 float64x2_t neon_f64; #endif #elif defined(SIMDE_WASM_SIMD128_NATIVE) SIMDE_ALIGN_TO_16 v128_t wasm_v128; #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; #if defined(__INT_FAST32_TYPE__) && defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; #else SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; #endif SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; #if defined(__UINT_FAST32_TYPE__) && defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; #else SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; #endif SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; #endif #endif } simde__m128d_private; #if defined(SIMDE_X86_SSE2_NATIVE) typedef __m128i simde__m128i; typedef __m128d simde__m128d; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) typedef int64x2_t simde__m128i; # if defined(SIMDE_ARCH_AARCH64) typedef float64x2_t simde__m128d; # elif defined(SIMDE_VECTOR_SUBSCRIPT) typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; # else typedef simde__m128d_private simde__m128d; # endif #elif defined(SIMDE_WASM_SIMD128_NATIVE) typedef v128_t simde__m128i; typedef v128_t simde__m128d; #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; #else typedef simde__m128d_private simde__m128d; #endif #elif defined(SIMDE_VECTOR_SUBSCRIPT) typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; #else typedef simde__m128i_private simde__m128i; typedef simde__m128d_private simde__m128d; #endif #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) typedef simde__m128i __m128i; typedef simde__m128d __m128d; #endif HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); #if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde__m128i_from_private(simde__m128i_private v) { simde__m128i r; simde_memcpy(&r, &v, sizeof(r)); return r; } SIMDE_FUNCTION_ATTRIBUTES simde__m128i_private simde__m128i_to_private(simde__m128i v) { simde__m128i_private r; simde_memcpy(&r, &v, sizeof(r)); return r; } SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde__m128d_from_private(simde__m128d_private v) { simde__m128d r; simde_memcpy(&r, &v, sizeof(r)); return r; } SIMDE_FUNCTION_ATTRIBUTES simde__m128d_private simde__m128d_to_private(simde__m128d v) { simde__m128d_private r; simde_memcpy(&r, &v, sizeof(r)); return r; } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) #endif #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) #endif #endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) #endif #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) #if defined(SIMDE_BUG_GCC_95782) SIMDE_FUNCTION_ATTRIBUTES SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d_to_altivec_f64(simde__m128d value) { simde__m128d_private r_ = simde__m128d_to_private(value); return r_.altivec_f64; } SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { simde__m128d_private r_; r_.altivec_f64 = value; return simde__m128d_from_private(r_); } #else SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) #endif #endif #elif defined(SIMDE_WASM_SIMD128_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); #endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_set_pd(e1, e0); #else simde__m128d_private r_; #if defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_make(e0, e1); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; r_.neon_f64 = vld1q_f64(data); #else r_.f64[0] = e0; r_.f64[1] = e1; #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_set1_pd (simde_float64 a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_set1_pd(a); #else simde__m128d_private r_; #if defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_splat(a); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vdupq_n_f64(a); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.f64[i] = a; } #endif return simde__m128d_from_private(r_); #endif } #define simde_mm_set_pd1(a) simde_mm_set1_pd(a) #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_set1_pd(a) simde_mm_set1_pd(a) #define _mm_set_pd1(a) simde_mm_set1_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_x_mm_abs_pd(simde__m128d a) { #if defined(SIMDE_X86_AVX512F_NATIVE) && \ (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,4,0)) return _mm512_castpd512_pd128(_mm512_abs_pd(_mm512_castpd128_pd512(a))); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) r_.neon_f32 = vabsq_f32(a_.neon_f32); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f32 = vec_abs(a_.altivec_f32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_fabs(a_.f64[i]); } #endif return simde__m128d_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_x_mm_not_pd(simde__m128d a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) __m128i ai = _mm_castpd_si128(a); return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vmvnq_s32(a_.neon_i32); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = ~a_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = ~(a_.i32f[i]); } #endif return simde__m128d_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { /* This function is for when you want to blend two elements together * according to a mask. It is similar to _mm_blendv_pd, except that * it is undefined whether the blend is based on the highest bit in * each lane (like blendv) or just bitwise operations. This allows * us to implement the function efficiently everywhere. * * Basically, you promise that all the lanes in mask are either 0 or * ~0. */ #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_blendv_pd(a, b, mask); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b), mask_ = simde__m128d_to_private(mask); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); } #endif return simde__m128d_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_add_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = a_.i8 + b_.i8; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a_.i8[i] + b_.i8[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_add_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = a_.i16 + b_.i16; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[i] + b_.i16[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_add_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = a_.i32 + b_.i32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] + b_.i32[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_add_epi64(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 + b_.i64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[i] + b_.i64[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_add_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_add_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f64 = a_.f64 + b_.f64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = a_.f64[i] + b_.f64[i]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_move_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_move_sd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) #if defined(HEDLEY_IBM_VERSION) r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); #else r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); #endif #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); #else r_.f64[0] = b_.f64[0]; r_.f64[1] = a_.f64[1]; #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_add_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_add_sd(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); r_.f64[0] = a_.f64[0] + b_.f64[0]; r_.f64[1] = a_.f64[1]; return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_add_si64 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_add_si64(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); #else r_.i64[0] = a_.i64[0] + b_.i64[0]; #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_adds_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i8x16_add_saturate(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { const int_fast16_t tmp = HEDLEY_STATIC_CAST(int_fast16_t, a_.i8[i]) + HEDLEY_STATIC_CAST(int_fast16_t, b_.i8[i]); r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, ((tmp < INT8_MAX) ? ((tmp > INT8_MIN) ? tmp : INT8_MIN) : INT8_MAX)); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_adds_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_add_saturate(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { const int_fast32_t tmp = HEDLEY_STATIC_CAST(int_fast32_t, a_.i16[i]) + HEDLEY_STATIC_CAST(int_fast32_t, b_.i16[i]); r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((tmp < INT16_MAX) ? ((tmp > INT16_MIN) ? tmp : INT16_MIN) : INT16_MAX)); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_adds_epu8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u8x16_add_saturate(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = ((UINT8_MAX - a_.u8[i]) > b_.u8[i]) ? (a_.u8[i] + b_.u8[i]) : UINT8_MAX; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_adds_epu16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u16x8_add_saturate(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = ((UINT16_MAX - a_.u16[i]) > b_.u16[i]) ? (a_.u16[i] + b_.u16[i]) : UINT16_MAX; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_and_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_and_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f & b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_and_si128 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_and_si128(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f & b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_andnot_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = ~a_.i32f & b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = ~a_.u64[i] & b_.u64[i]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_andnot_si128(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = ~a_.i32f & b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_xor_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_xor_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f ^ b_.i32f; #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_avg_epu8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) uint16_t wa SIMDE_VECTOR(32); uint16_t wb SIMDE_VECTOR(32); uint16_t wr SIMDE_VECTOR(32); SIMDE_CONVERT_VECTOR_(wa, a_.u8); SIMDE_CONVERT_VECTOR_(wb, b_.u8); wr = (wa + wb + 1) >> 1; SIMDE_CONVERT_VECTOR_(r_.u8, wr); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_avg_epu16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) uint32_t wa SIMDE_VECTOR(32); uint32_t wb SIMDE_VECTOR(32); uint32_t wr SIMDE_VECTOR(32); SIMDE_CONVERT_VECTOR_(wa, a_.u16); SIMDE_CONVERT_VECTOR_(wb, b_.u16); wr = (wa + wb + 1) >> 1; SIMDE_CONVERT_VECTOR_(r_.u16, wr); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_setzero_si128 (void) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_setzero_si128(); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vdupq_n_s32(0); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); #elif defined(SIMDE_VECTOR_SUBSCRIPT) r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = 0; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_setzero_si128() (simde_mm_setzero_si128()) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_bslli_si128 (simde__m128i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m128i_private r_, a_ = simde__m128i_to_private(a); if (HEDLEY_UNLIKELY((imm8 & ~15))) { return simde_mm_setzero_si128(); } #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) r_.altivec_i8 = #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) vec_slo #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ vec_sro #endif (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) r_.u128[0] = a_.u128[0] << (imm8 * 8); #else r_ = simde__m128i_to_private(simde_mm_setzero_si128()); for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a_.i8[i - imm8]; } #endif return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) #define simde_mm_bslli_si128(a, imm8) \ simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) #elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ const simde__m128i_private simde__tmp_a_ = simde__m128i_to_private(a); \ const simde__m128i_private simde__tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ simde__m128i_private simde__tmp_r_; \ if (HEDLEY_UNLIKELY(imm8 > 15)) { \ simde__tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ } else { \ simde__tmp_r_.i8 = \ SIMDE_SHUFFLE_VECTOR_(8, 16, \ simde__tmp_z_.i8, \ (simde__tmp_a_).i8, \ HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ } \ simde__m128i_from_private(simde__tmp_r_); })) #endif #define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_bsrli_si128 (simde__m128i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m128i_private r_, a_ = simde__m128i_to_private(a); if (HEDLEY_UNLIKELY((imm8 & ~15))) { return simde_mm_setzero_si128(); } #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) r_.altivec_i8 = #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) vec_sro #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ vec_slo #endif (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { const int e = HEDLEY_STATIC_CAST(int, i) + imm8; r_.i8[i] = (e < 16) ? a_.i8[e] : 0; } #endif return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) #define simde_mm_bsrli_si128(a, imm8) \ simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) #elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ const simde__m128i_private simde__tmp_a_ = simde__m128i_to_private(a); \ const simde__m128i_private simde__tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ simde__m128i_private simde__tmp_r_ = simde__m128i_to_private(a); \ if (HEDLEY_UNLIKELY(imm8 > 15)) { \ simde__tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ } else { \ simde__tmp_r_.i8 = \ SIMDE_SHUFFLE_VECTOR_(8, 16, \ simde__tmp_z_.i8, \ (simde__tmp_a_).i8, \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ } \ simde__m128i_from_private(simde__tmp_r_); })) #endif #define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_clflush (void const* p) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_clflush(p); #else (void) p; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_clflush(a, b) simde_mm_clflush() #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_comieq_sd(a, b); #else simde__m128d_private a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); #else return a_.f64[0] == b_.f64[0]; #endif #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_comige_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_comige_sd(a, b); #else simde__m128d_private a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); #else return a_.f64[0] >= b_.f64[0]; #endif #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_comigt_sd(a, b); #else simde__m128d_private a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); #else return a_.f64[0] > b_.f64[0]; #endif #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_comile_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_comile_sd(a, b); #else simde__m128d_private a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); #else return a_.f64[0] <= b_.f64[0]; #endif #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_comilt_sd(a, b); #else simde__m128d_private a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); #else return a_.f64[0] < b_.f64[0]; #endif #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_comineq_sd(a, b); #else simde__m128d_private a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); #else return a_.f64[0] != b_.f64[0]; #endif #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { simde__m128d_private r_, dest_ = simde__m128d_to_private(dest), src_ = simde__m128d_to_private(src); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); #else simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); uint64_t u64_nz; simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); uint64x2_t sign_pos = vdupq_n_u64(u64_nz); #endif r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); #elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) #if !defined(HEDLEY_IBM_VERSION) r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); #else r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); #endif #elif defined(simde_math_copysign) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); } #else simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); #endif return simde__m128d_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); } SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_castpd_ps (simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_castpd_ps(a); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpretq_f32_f64(a); #else simde__m128 r; simde_memcpy(&r, &a, sizeof(a)); return r; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_castpd_si128 (simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_castpd_si128(a); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpretq_s64_f64(a); #else simde__m128i r; simde_memcpy(&r, &a, sizeof(a)); return r; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_castps_pd (simde__m128 a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_castps_pd(a); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpretq_f64_f32(a); #else simde__m128d r; simde_memcpy(&r, &a, sizeof(a)); return r; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_castps_pd(a) simde_mm_castps_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_castps_si128 (simde__m128 a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_castps_si128(a); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); #else simde__m128i r; simde_memcpy(&r, &a, sizeof(a)); return r; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_castps_si128(a) simde_mm_castps_si128(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_castsi128_pd (simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_castsi128_pd(a); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpretq_f64_s64(a); #else simde__m128d r; simde_memcpy(&r, &a, sizeof(a)); return r; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_castsi128_ps (simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_castsi128_ps(a); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); #else simde__m128 r; simde_memcpy(&r, &a, sizeof(a)); return r; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpeq_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = HEDLEY_STATIC_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpeq_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = (a_.i16 == b_.i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpeq_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpeq_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_u64 = vceqq_s64(b_.neon_i64, a_.neon_i64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpeq_sd(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; r_.u64[1] = a_.u64[1]; return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpneq_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpneq_sd(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); r_.u64[1] = a_.u64[1]; return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmplt_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = HEDLEY_STATIC_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmplt_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = HEDLEY_STATIC_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmplt_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmplt_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmplt_sd(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); r_.u64[1] = a_.u64[1]; return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmple_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmple_sd(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); r_.u64[1] = a_.u64[1]; return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpgt_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = HEDLEY_STATIC_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpgt_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = HEDLEY_STATIC_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpgt_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpgt_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f64 = HEDLEY_STATIC_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) return _mm_cmpgt_sd(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); r_.u64[1] = a_.u64[1]; return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpge_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f64 = HEDLEY_STATIC_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) return _mm_cmpge_sd(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); r_.u64[1] = a_.u64[1]; return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpngt_pd(a, b); #else return simde_mm_cmple_pd(a, b); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) return _mm_cmpngt_sd(a, b); #else return simde_mm_cmple_sd(a, b); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpnge_pd(a, b); #else return simde_mm_cmplt_pd(a, b); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) return _mm_cmpnge_sd(a, b); #else return simde_mm_cmplt_sd(a, b); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpnlt_pd(a, b); #else return simde_mm_cmpge_pd(a, b); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpnlt_sd(a, b); #else return simde_mm_cmpge_sd(a, b); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpnle_pd(a, b); #else return simde_mm_cmpgt_pd(a, b); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpnle_sd(a, b); #else return simde_mm_cmpgt_sd(a, b); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpord_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) /* Note: NEON does not have ordered compare builtin Need to compare a eq a and b eq b to check for NaN Do AND of results to get final */ uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); r_.neon_u64 = vandq_u64(ceqaa, ceqbb); #elif defined(simde_math_isnan) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); } #else HEDLEY_UNREACHABLE(); #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64 simde_mm_cvtsd_f64 (simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) return _mm_cvtsd_f64(a); #else simde__m128d_private a_ = simde__m128d_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); #else return a_.f64[0]; #endif #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpord_sd(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(simde_math_isnan) r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); r_.u64[1] = a_.u64[1]; #else HEDLEY_UNREACHABLE(); #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpunord_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); #elif defined(simde_math_isnan) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); } #else HEDLEY_UNREACHABLE(); #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpunord_sd(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(simde_math_isnan) r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); r_.u64[1] = a_.u64[1]; #else HEDLEY_UNREACHABLE(); #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cvtepi32_pd (simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cvtepi32_pd(a); #else simde__m128d_private r_; simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = (simde_float64) a_.i32[i]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cvtepi32_ps (simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cvtepi32_ps(a); #else simde__m128_private r_; simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) HEDLEY_DIAGNOSTIC_PUSH #if HEDLEY_HAS_WARNING("-Wc11-extensions") #pragma clang diagnostic ignored "-Wc11-extensions" #endif r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); HEDLEY_DIAGNOSTIC_POP #elif defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = (simde_float32) a_.i32[i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_cvtpd_pi32 (simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_cvtpd_pi32(a); #else simde__m64_private r_; simde__m128d_private a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { simde_float64 v = simde_math_round(a_.f64[i]); #if defined(SIMDE_FAST_CONVERSION_RANGE) r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); #else r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; #endif } return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cvtpd_epi32 (simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cvtpd_epi32(a); #else simde__m128i_private r_; r_.m64[0] = simde_mm_cvtpd_pi32(a); r_.m64[1] = simde_mm_setzero_si64(); return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cvtpd_ps (simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cvtpd_ps(a); #else simde__m128_private r_; simde__m128d_private a_ = simde__m128d_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.f64); r_.m64_private[1] = simde__m64_to_private(simde_mm_setzero_si64()); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f32 = vreinterpretq_f32_f64(vcombine_f64(vreinterpret_f64_f32(vcvtx_f32_f64(a_.neon_f64)), vdup_n_f64(0))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { r_.f32[i] = (simde_float32) a_.f64[i]; } simde_memset(&(r_.m64_private[1]), 0, sizeof(r_.m64_private[1])); #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cvtpi32_pd (simde__m64 a) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_cvtpi32_pd(a); #else simde__m128d_private r_; simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = (simde_float64) a_.i32[i]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cvtps_epi32 (simde__m128 a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cvtps_epi32(a); #else simde__m128i_private r_; simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); HEDLEY_DIAGNOSTIC_POP #else a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { simde_float32 v = simde_math_roundf(a_.f32[i]); #if defined(SIMDE_FAST_CONVERSION_RANGE) r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); #else r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; #endif } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cvtps_pd (simde__m128 a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cvtps_pd(a); #else simde__m128d_private r_; simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = a_.f32[i]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_mm_cvtsd_si32 (simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cvtsd_si32(a); #else simde__m128d_private a_ = simde__m128d_to_private(a); simde_float64 v = simde_math_round(a_.f64[0]); #if defined(SIMDE_FAST_CONVERSION_RANGE) return SIMDE_CONVERT_FTOI(int32_t, v); #else return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; #endif #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) #endif SIMDE_FUNCTION_ATTRIBUTES int64_t simde_mm_cvtsd_si64 (simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) #if defined(__PGI) return _mm_cvtsd_si64x(a); #else return _mm_cvtsd_si64(a); #endif #else simde__m128d_private a_ = simde__m128d_to_private(a); return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); #endif } #define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cvtsd_ss(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a); simde__m128d_private b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); #else r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); SIMDE_VECTORIZE for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int16_t simde_x_mm_cvtsi128_si16 (simde__m128i a) { simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vgetq_lane_s16(a_.neon_i16, 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) #if defined(SIMDE_BUG_GCC_95227) (void) a_; #endif return vec_extract(a_.altivec_i16, 0); #else return a_.i16[0]; #endif } SIMDE_FUNCTION_ATTRIBUTES int32_t simde_mm_cvtsi128_si32 (simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cvtsi128_si32(a); #else simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vgetq_lane_s32(a_.neon_i32, 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) #if defined(SIMDE_BUG_GCC_95227) (void) a_; #endif return vec_extract(a_.altivec_i32, 0); #else return a_.i32[0]; #endif #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) #endif SIMDE_FUNCTION_ATTRIBUTES int64_t simde_mm_cvtsi128_si64 (simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) #if defined(__PGI) return _mm_cvtsi128_si64x(a); #else return _mm_cvtsi128_si64(a); #endif #else simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vgetq_lane_s64(a_.neon_i64, 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); #endif return a_.i64[0]; #endif } #define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cvtsi32_sd(a, b); #else simde__m128d_private r_; simde__m128d_private a_ = simde__m128d_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_AMD64) r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); #else r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); r_.i64[1] = a_.i64[1]; #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_cvtsi16_si128 (int16_t a) { simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); #else r_.i16[0] = a; r_.i16[1] = 0; r_.i16[2] = 0; r_.i16[3] = 0; r_.i16[4] = 0; r_.i16[5] = 0; r_.i16[6] = 0; r_.i16[7] = 0; #endif return simde__m128i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cvtsi32_si128 (int32_t a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cvtsi32_si128(a); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); #else r_.i32[0] = a; r_.i32[1] = 0; r_.i32[2] = 0; r_.i32[3] = 0; #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) #if !defined(__PGI) return _mm_cvtsi64_sd(a, b); #else return _mm_cvtsi64x_sd(a, b); #endif #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); #else r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); r_.f64[1] = a_.f64[1]; #endif return simde__m128d_from_private(r_); #endif } #define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cvtsi64_si128 (int64_t a) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) #if !defined(__PGI) return _mm_cvtsi64_si128(a); #else return _mm_cvtsi64x_si128(a); #endif #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i64x2_make(a, 0); #else r_.i64[0] = a; r_.i64[1] = 0; #endif return simde__m128i_from_private(r_); #endif } #define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cvtss_sd(a, b); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); #else simde__m128d_private a_ = simde__m128d_to_private(a); simde__m128_private b_ = simde__m128_to_private(b); a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); return simde__m128d_from_private(a_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_cvttpd_pi32 (simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_cvttpd_pi32(a); #else simde__m64_private r_; simde__m128d_private a_ = simde__m128d_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); #else for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { simde_float64 v = a_.f64[i]; #if defined(SIMDE_FAST_CONVERSION_RANGE) r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); #else r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; #endif } #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cvttpd_epi32 (simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cvttpd_epi32(a); #else simde__m128i_private r_; r_.m64[0] = simde_mm_cvttpd_pi32(a); r_.m64[1] = simde_mm_setzero_si64(); return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cvttps_epi32 (simde__m128 a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cvttps_epi32(a); #else simde__m128i_private r_; simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); #elif defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); #else for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { simde_float32 v = a_.f32[i]; #if defined(SIMDE_FAST_CONVERSION_RANGE) r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); #else r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; #endif } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_mm_cvttsd_si32 (simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cvttsd_si32(a); #else simde__m128d_private a_ = simde__m128d_to_private(a); simde_float64 v = a_.f64[0]; #if defined(SIMDE_FAST_CONVERSION_RANGE) return SIMDE_CONVERT_FTOI(int32_t, v); #else return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; #endif #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) #endif SIMDE_FUNCTION_ATTRIBUTES int64_t simde_mm_cvttsd_si64 (simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) #if !defined(__PGI) return _mm_cvttsd_si64(a); #else return _mm_cvttsd_si64x(a); #endif #else simde__m128d_private a_ = simde__m128d_to_private(a); return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); #endif } #define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_div_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_div_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f64 = a_.f64 / b_.f64; #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = a_.f64[i] / b_.f64[i]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_div_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_div_sd(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); #else r_.f64[0] = a_.f64[0] / b_.f64[0]; r_.f64[1] = a_.f64[1]; #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_mm_extract_epi16 (simde__m128i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { uint16_t r; simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) #if defined(SIMDE_BUG_GCC_95227) (void) a_; (void) imm8; #endif r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); #else r = a_.u16[imm8 & 7]; #endif return HEDLEY_STATIC_CAST(int32_t, r); } #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) #endif #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { simde__m128i_private a_ = simde__m128i_to_private(a); a_.i16[imm8 & 7] = i; return simde__m128i_from_private(a_); } #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) #endif #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_load_pd(mem_addr); #else simde__m128d_private r_; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vld1q_f64(mem_addr); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); #else simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_load1_pd (simde_float64 const* mem_addr) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_load1_pd(mem_addr); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return simde__m128d_from_wasm_v128(wasm_v64x2_load_splat(mem_addr)); #else return simde_mm_set1_pd(*mem_addr); #endif } #define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_load_sd (simde_float64 const* mem_addr) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_load_sd(mem_addr); #else simde__m128d_private r_; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); #else r_.f64[0] = *mem_addr; r_.u64[1] = UINT64_C(0); #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_load_si128 (simde__m128i const* mem_addr) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); #else simde__m128i_private r_; #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); #else simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadh_pd(a, mem_addr); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); #else simde_float64 t; simde_memcpy(&t, mem_addr, sizeof(t)); r_.f64[0] = a_.f64[0]; r_.f64[1] = t; #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadl_epi64(mem_addr); #else simde__m128i_private r_; int64_t value; simde_memcpy(&value, mem_addr, sizeof(value)); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); #else r_.i64[0] = value; r_.i64[1] = 0; #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadl_pd(a, mem_addr); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vcombine_f64(vld1_f64( HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); #else r_.f64[0] = *mem_addr; r_.u64[1] = a_.u64[1]; #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadr_pd(mem_addr); #else simde__m128d_private r_; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vld1q_f64(mem_addr); r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); #elif defined(SIMDE_WASM_SIMD128_NATIVE) v128_t tmp = wasm_v128_load(mem_addr); r_.wasm_v128 = wasm_v64x2_shuffle(tmp, tmp, 1, 0); #else r_.f64[0] = mem_addr[1]; r_.f64[1] = mem_addr[0]; #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadu_pd(mem_addr); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vld1q_f64(mem_addr); #else simde__m128d_private r_; simde_memcpy(&r_, mem_addr, sizeof(r_)); return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_loadu_epi8(void const * mem_addr) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_BUG_GCC_95483) return _mm_loadu_epi8(mem_addr); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); #else simde_memcpy(&r_, mem_addr, sizeof(r_)); #endif return simde__m128i_from_private(r_); #endif } #define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && defined(SIMDE_BUG_GCC_95483)) #undef _mm_loadu_epi8 #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_loadu_epi16(void const * mem_addr) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_BUG_GCC_95483) return _mm_loadu_epi16(mem_addr); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); #else simde_memcpy(&r_, mem_addr, sizeof(r_)); #endif return simde__m128i_from_private(r_); #endif } #define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && defined(SIMDE_BUG_GCC_95483)) #undef _mm_loadu_epi16 #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_loadu_epi32(void const * mem_addr) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) return _mm_loadu_epi32(mem_addr); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); #else simde_memcpy(&r_, mem_addr, sizeof(r_)); #endif return simde__m128i_from_private(r_); #endif } #define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && defined(SIMDE_BUG_GCC_95483)) #undef _mm_loadu_epi32 #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_loadu_epi64(void const * mem_addr) { #if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) return _mm_loadu_epi64(mem_addr); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); #else simde_memcpy(&r_, mem_addr, sizeof(r_)); #endif return simde__m128i_from_private(r_); #endif } #define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && defined(SIMDE_BUG_GCC_95483)) #undef _mm_loadu_epi64 #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_loadu_si128 (void const* mem_addr) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); #else simde__m128i_private r_; #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_PACKED_ struct simde_mm_loadu_si128_s { __typeof__(r_) v; } __attribute__((__packed__, __may_alias__)); r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; HEDLEY_DIAGNOSTIC_POP #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); #else simde_memcpy(&r_, mem_addr, sizeof(r_)); #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_madd_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); r_.neon_i32 = vpaddq_s32(pl, ph); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); r_.neon_i32 = vcombine_s32(rl, rh); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) static const SIMDE_POWER_ALTIVEC_VECTOR(int) tz = { 0, 0, 0, 0 }; r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, tz); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); #else simde__m128i_private a_ = simde__m128i_to_private(a), mask_ = simde__m128i_to_private(mask); for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { if (mask_.u8[i] & 0x80) { mem_addr[i] = a_.i8[i]; } } #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_mm_movemask_epi8 (simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ return _mm_movemask_epi8(a); #else int32_t r = 0; simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint8x16_t input = a_.neon_u8; const int8_t xr[16] = {-7, -6, -5, -4, -3, -2, -1, 0, -7, -6, -5, -4, -3, -2, -1, 0}; const uint8x16_t mask_and = vdupq_n_u8(0x80); const int8x16_t mask_shift = vld1q_s8(xr); const uint8x16_t mask_result = vshlq_u8(vandq_u8(input, mask_and), mask_shift); uint8x8_t lo = vget_low_u8(mask_result); uint8x8_t hi = vget_high_u8(mask_result); r = vaddv_u8(lo) + (vaddv_u8(hi) << 8); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) // Use increasingly wide shifts+adds to collect the sign bits // together. // Since the widening shifts would be rather confusing to follow in little endian, everything // will be illustrated in big endian order instead. This has a different result - the bits // would actually be reversed on a big endian machine. // Starting input (only half the elements are shown): // 89 ff 1d c0 00 10 99 33 uint8x16_t input = a_.neon_u8; // Shift out everything but the sign bits with an unsigned shift right. // // Bytes of the vector:: // 89 ff 1d c0 00 10 99 33 // \ \ \ \ \ \ \ \ high_bits = (uint16x4_t)(input >> 7) // | | | | | | | | // 01 01 00 01 00 00 01 00 // // Bits of first important lane(s): // 10001001 (89) // \______ // | // 00000001 (01) uint16x8_t high_bits = vreinterpretq_u16_u8(vshrq_n_u8(input, 7)); // Merge the even lanes together with a 16-bit unsigned shift right + add. // 'xx' represents garbage data which will be ignored in the final result. // In the important bytes, the add functions like a binary OR. // // 01 01 00 01 00 00 01 00 // \_ | \_ | \_ | \_ | paired16 = (uint32x4_t)(input + (input >> 7)) // \| \| \| \| // xx 03 xx 01 xx 00 xx 02 // // 00000001 00000001 (01 01) // \_______ | // \| // xxxxxxxx xxxxxx11 (xx 03) uint32x4_t paired16 = vreinterpretq_u32_u16(vsraq_n_u16(high_bits, high_bits, 7)); // Repeat with a wider 32-bit shift + add. // xx 03 xx 01 xx 00 xx 02 // \____ | \____ | paired32 = (uint64x1_t)(paired16 + (paired16 >> 14)) // \| \| // xx xx xx 0d xx xx xx 02 // // 00000011 00000001 (03 01) // \\_____ || // '----.\|| // xxxxxxxx xxxx1101 (xx 0d) uint64x2_t paired32 = vreinterpretq_u64_u32(vsraq_n_u32(paired16, paired16, 14)); // Last, an even wider 64-bit shift + add to get our result in the low 8 bit lanes. // xx xx xx 0d xx xx xx 02 // \_________ | paired64 = (uint8x8_t)(paired32 + (paired32 >> 28)) // \| // xx xx xx xx xx xx xx d2 // // 00001101 00000010 (0d 02) // \ \___ | | // '---. \| | // xxxxxxxx 11010010 (xx d2) uint8x16_t paired64 = vreinterpretq_u8_u64(vsraq_n_u64(paired32, paired32, 28)); // Extract the low 8 bits from each 64-bit lane with 2 8-bit extracts. // xx xx xx xx xx xx xx d2 // || return paired64[0] // d2 // Note: Little endian would return the correct value 4b (01001011) instead. r = vgetq_lane_u8(paired64, 0) | (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u8(paired64, 8)) << 8); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); #else SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { r |= (a_.u8[15 - i] >> 7) << (15 - i); } #endif return r; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_mm_movemask_pd (simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_movemask_pd(a); #else int32_t r = 0; simde__m128d_private a_ = simde__m128d_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) static const int64_t shift_amount[] = { 0, 1 }; const int64x2_t shift = vld1q_s64(shift_amount); uint64x2_t tmp = vshrq_n_u64(a_.neon_u64, 63); return HEDLEY_STATIC_CAST(int32_t, vaddvq_u64(vshlq_u64(tmp, shift))); #else SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { r |= (a_.u64[i] >> 63) << i; } #endif return r; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_movepi64_pi64 (simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_movepi64_pi64(a); #else simde__m64_private r_; simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i64 = vget_low_s64(a_.neon_i64); #else r_.i64[0] = a_.i64[0]; #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_movpi64_epi64 (simde__m64 a) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_movpi64_epi64(a); #else simde__m128i_private r_; simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); #else r_.i64[0] = a_.i64[0]; r_.i64[1] = 0; #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_min_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_min_epu8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_min_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_min_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_min_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_min_sd(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); #else r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; r_.f64[1] = a_.f64[1]; #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_max_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_max_epu8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_max_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_max_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_max_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_max_sd(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); #else r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; r_.f64[1] = a_.f64[1]; #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_move_epi64 (simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_move_epi64(a); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); #else r_.i64[0] = a_.i64[0]; r_.i64[1] = 0; #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_move_epi64(a) simde_mm_move_epi64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_mul_epu32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x2_t a_lo = vmovn_u64(a_.neon_u64); uint32x2_t b_lo = vmovn_u64(b_.neon_u64); r_.neon_u64 = vmull_u32(a_lo, b_lo); #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) __typeof__(a_.u32) z = { 0, }; a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 * b_.i64; #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vmulq_s64(a_.neon_f64, b_.neon_f64); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[i] * b_.i64[i]; } #endif return simde__m128i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 % b_.i64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[i] % b_.i64[i]; } #endif return simde__m128i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_mul_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_mul_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f64 = a_.f64 * b_.f64; #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = a_.f64[i] * b_.f64[i]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_mul_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_mul_sd(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); #else r_.f64[0] = a_.f64[0] * b_.f64[0]; r_.f64[1] = a_.f64[1]; #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) return _mm_mul_su32(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); #else r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_mulhi_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) int16x4_t a3210 = vget_low_s16(a_.neon_i16); int16x4_t b3210 = vget_low_s16(b_.neon_i16); int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); #else int16x4_t a7654 = vget_high_s16(a_.neon_i16); int16x4_t b7654 = vget_high_s16(b_.neon_i16); int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); r_.neon_u16 = rv.val[1]; #endif #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) return _mm_mulhi_epu16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint16x4_t a3210 = vget_low_u16(a_.neon_u16); uint16x4_t b3210 = vget_low_u16(b_.neon_u16); uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); #else uint16x4_t a7654 = vget_high_u16(a_.neon_u16); uint16x4_t b7654 = vget_high_u16(b_.neon_u16); uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); r_.neon_u16 = neon_r.val[1]; #endif #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_mullo_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) (void) a_; (void) b_; r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_or_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_or_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f | b_.i32f; #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_or_si128 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_or_si128(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f | b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_packs_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i8[i] = (a_.i16[i] > INT8_MAX) ? INT8_MAX : ((a_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[i])); r_.i8[i + 8] = (b_.i16[i] > INT8_MAX) ? INT8_MAX : ((b_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[i])); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_packs_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i16[i] = (a_.i32[i] > INT16_MAX) ? INT16_MAX : ((a_.i32[i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, a_.i32[i])); r_.i16[i + 4] = (b_.i32[i] > INT16_MAX) ? INT16_MAX : ((b_.i32[i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, b_.i32[i])); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_packus_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vcombine_u8(vqmovun_s16(a_.neon_i16), vqmovun_s16(b_.neon_i16)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.u8[i] = (a_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[i])); r_.u8[i + 8] = (b_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[i])); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_pause (void) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_pause(); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_pause() (simde_mm_pause()) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_sad_epu8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); r_.neon_u64 = vcombine_u64( vpaddl_u32(vpaddl_u16(vget_low_u16(t))), vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); #else for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { uint16_t tmp = 0; SIMDE_VECTORIZE_REDUCTION(+:tmp) for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { const size_t e = j + (i * 8); tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); } r_.i64[i] = tmp; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_set_epi8( e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0); #else simde__m128i_private r_; #if defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i8x16_make( e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15}; r_.neon_i8 = vld1q_s8(data); #else r_.i8[ 0] = e0; r_.i8[ 1] = e1; r_.i8[ 2] = e2; r_.i8[ 3] = e3; r_.i8[ 4] = e4; r_.i8[ 5] = e5; r_.i8[ 6] = e6; r_.i8[ 7] = e7; r_.i8[ 8] = e8; r_.i8[ 9] = e9; r_.i8[10] = e10; r_.i8[11] = e11; r_.i8[12] = e12; r_.i8[13] = e13; r_.i8[14] = e14; r_.i8[15] = e15; #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; r_.neon_i16 = vld1q_s16(data); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); #else r_.i16[0] = e0; r_.i16[1] = e1; r_.i16[2] = e2; r_.i16[3] = e3; r_.i16[4] = e4; r_.i16[5] = e5; r_.i16[6] = e6; r_.i16[7] = e7; #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_loadu_si16 (void const* mem_addr) { #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ HEDLEY_INTEL_VERSION_CHECK(20,21,1)) return _mm_loadu_si16(mem_addr); #else int16_t val; simde_memcpy(&val, mem_addr, sizeof(val)); return simde_x_mm_cvtsi16_si128(val); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_set_epi32(e3, e2, e1, e0); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; r_.neon_i32 = vld1q_s32(data); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); #else r_.i32[0] = e0; r_.i32[1] = e1; r_.i32[2] = e2; r_.i32[3] = e3; #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_loadu_si32 (void const* mem_addr) { #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ HEDLEY_INTEL_VERSION_CHECK(20,21,1)) return _mm_loadu_si32(mem_addr); #else int32_t val; simde_memcpy(&val, mem_addr, sizeof(val)); return simde_mm_cvtsi32_si128(val); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_set_epi64(e1, e0); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); #else r_.m64[0] = e0; r_.m64[1] = e1; #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_set_epi64x (int64_t e1, int64_t e0) { #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) return _mm_set_epi64x(e1, e0); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; r_.neon_i64 = vld1q_s64(data); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i64x2_make(e0, e1); #else r_.i64[0] = e0; r_.i64[1] = e1; #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_loadu_si64 (void const* mem_addr) { #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ HEDLEY_INTEL_VERSION_CHECK(20,21,1)) return _mm_loadu_si64(mem_addr); #else int64_t val; simde_memcpy(&val, mem_addr, sizeof(val)); return simde_mm_cvtsi64_si128(val); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_set_epi8( HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15}; r_.neon_u8 = vld1q_u8(data); #else r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; #endif return simde__m128i_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_set_epi16( HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; r_.neon_u16 = vld1q_u16(data); #else r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; #endif return simde__m128i_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_set_epi32( HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; r_.neon_u32 = vld1q_u32(data); #else r_.u32[0] = e0; r_.u32[1] = e1; r_.u32[2] = e2; r_.u32[3] = e3; #endif return simde__m128i_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; r_.neon_u64 = vld1q_u64(data); #else r_.u64[0] = e0; r_.u64[1] = e1; #endif return simde__m128i_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_set_sd (simde_float64 a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_set_sd(a); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); #else return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_set_sd(a) simde_mm_set_sd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_set1_epi8 (int8_t a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_set1_epi8(a); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vdupq_n_s8(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i8x16_splat(a); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_set1_epi16 (int16_t a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_set1_epi16(a); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vdupq_n_s16(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_splat(a); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_set1_epi32 (int32_t a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_set1_epi32(a); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vdupq_n_s32(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_splat(a); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_set1_epi64x (int64_t a) { #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) return _mm_set1_epi64x(a); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vdupq_n_s64(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i64x2_splat(a); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_set1_epi64 (simde__m64 a) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_set1_epi64(a); #else simde__m64_private a_ = simde__m64_to_private(a); return simde_mm_set1_epi64x(a_.i64[0]); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_set1_epu8 (uint8_t value) { #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); #else return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_set1_epu16 (uint16_t value) { #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); #else return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_set1_epu32 (uint32_t value) { #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); #else return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_set1_epu64 (uint64_t value) { #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); #else return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_setr_epi8( e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0); #else return simde_mm_set_epi8( e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); #else return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_setr_epi32(e3, e2, e1, e0); #else return simde_mm_set_epi32(e0, e1, e2, e3); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_setr_epi64(e1, e0); #else return simde_mm_set_epi64(e0, e1); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_setr_pd(e1, e0); #else return simde_mm_set_pd(e0, e1); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_setzero_pd (void) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_setzero_pd(); #else return simde_mm_castsi128_pd(simde_mm_setzero_si128()); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_setzero_pd() simde_mm_setzero_pd() #endif #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_undefined_pd (void) { simde__m128d_private r_; #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) r_.n = _mm_undefined_pd(); #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) r_ = simde__m128d_to_private(simde_mm_setzero_pd()); #endif return simde__m128d_from_private(r_); } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_undefined_pd() simde_mm_undefined_pd() #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_undefined_si128 (void) { simde__m128i_private r_; #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) r_.n = _mm_undefined_si128(); #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) r_ = simde__m128i_to_private(simde_mm_setzero_si128()); #endif return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_undefined_si128() (simde_mm_undefined_si128()) #endif #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) HEDLEY_DIAGNOSTIC_POP #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_x_mm_setone_pd (void) { return simde_mm_castps_pd(simde_x_mm_setone_ps()); } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_setone_si128 (void) { return simde_mm_castps_si128(simde_x_mm_setone_ps()); } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m128i_private r_, a_ = simde__m128i_to_private(a); for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; } return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE2_NATIVE) #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_mm_shuffle_epi32(a, imm8) \ __extension__({ \ int32x4_t ret; \ ret = vmovq_n_s32( \ vgetq_lane_s32(vreinterpretq_s32_s64(a), (imm8) & (0x3))); \ ret = vsetq_lane_s32( \ vgetq_lane_s32(vreinterpretq_s32_s64(a), ((imm8) >> 2) & 0x3), \ ret, 1); \ ret = vsetq_lane_s32( \ vgetq_lane_s32(vreinterpretq_s32_s64(a), ((imm8) >> 4) & 0x3), \ ret, 2); \ ret = vsetq_lane_s32( \ vgetq_lane_s32(vreinterpretq_s32_s64(a), ((imm8) >> 6) & 0x3), \ ret, 3); \ vreinterpretq_s64_s32(ret); \ }) #elif defined(SIMDE_SHUFFLE_VECTOR_) #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ const simde__m128i_private simde__tmp_a_ = simde__m128i_to_private(a); \ simde__m128i_from_private((simde__m128i_private) { .i32 = \ SIMDE_SHUFFLE_VECTOR_(32, 16, \ (simde__tmp_a_).i32, \ (simde__tmp_a_).i32, \ ((imm8) ) & 3, \ ((imm8) >> 2) & 3, \ ((imm8) >> 4) & 3, \ ((imm8) >> 6) & 3) }); })) #endif #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; return simde__m128d_from_private(r_); } #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) #elif defined(SIMDE_SHUFFLE_VECTOR_) #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ simde__m128d_from_private((simde__m128d_private) { .f64 = \ SIMDE_SHUFFLE_VECTOR_(64, 16, \ simde__m128d_to_private(a).f64, \ simde__m128d_to_private(b).f64, \ (((imm8) ) & 1), \ (((imm8) >> 1) & 1) + 2) }); })) #endif #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m128i_private r_, a_ = simde__m128i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { r_.i16[i] = a_.i16[i]; } for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; } return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE2_NATIVE) #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_mm_shufflehi_epi16(a, imm8) \ __extension__({ \ int16x8_t ret = vreinterpretq_s16_s64(a); \ int16x4_t highBits = vget_high_s16(ret); \ ret = vsetq_lane_s16(vget_lane_s16(highBits, (imm8) & (0x3)), ret, 4); \ ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm8) >> 2) & 0x3), ret, \ 5); \ ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm8) >> 4) & 0x3), ret, \ 6); \ ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm8) >> 6) & 0x3), ret, \ 7); \ vreinterpretq_s64_s16(ret); \ }) #elif defined(SIMDE_SHUFFLE_VECTOR_) #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ const simde__m128i_private simde__tmp_a_ = simde__m128i_to_private(a); \ simde__m128i_from_private((simde__m128i_private) { .i16 = \ SIMDE_SHUFFLE_VECTOR_(16, 16, \ (simde__tmp_a_).i16, \ (simde__tmp_a_).i16, \ 0, 1, 2, 3, \ (((imm8) ) & 3) + 4, \ (((imm8) >> 2) & 3) + 4, \ (((imm8) >> 4) & 3) + 4, \ (((imm8) >> 6) & 3) + 4) }); })) #endif #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m128i_private r_, a_ = simde__m128i_to_private(a); for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; } SIMDE_VECTORIZE for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[i]; } return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE2_NATIVE) #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_mm_shufflelo_epi16(a, imm8) \ __extension__({ \ int16x8_t ret = vreinterpretq_s16_s64(a); \ int16x4_t lowBits = vget_low_s16(ret); \ ret = vsetq_lane_s16(vget_lane_s16(lowBits, (imm8) & (0x3)), ret, 0); \ ret = vsetq_lane_s16(vget_lane_s16(lowBits, ((imm8) >> 2) & 0x3), ret, \ 1); \ ret = vsetq_lane_s16(vget_lane_s16(lowBits, ((imm8) >> 4) & 0x3), ret, \ 2); \ ret = vsetq_lane_s16(vget_lane_s16(lowBits, ((imm8) >> 6) & 0x3), ret, \ 3); \ vreinterpretq_s64_s16(ret); \ }) #elif defined(SIMDE_SHUFFLE_VECTOR_) #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ const simde__m128i_private simde__tmp_a_ = simde__m128i_to_private(a); \ simde__m128i_from_private((simde__m128i_private) { .i16 = \ SIMDE_SHUFFLE_VECTOR_(16, 16, \ (simde__tmp_a_).i16, \ (simde__tmp_a_).i16, \ (((imm8) ) & 3), \ (((imm8) >> 2) & 3), \ (((imm8) >> 4) & 3), \ (((imm8) >> 6) & 3), \ 4, 5, 6, 7) }); })) #endif #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_sll_epi16(a, count); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), count_ = simde__m128i_to_private(count); if (count_.u64[0] > 15) return simde_mm_setzero_si128(); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u16 = (a_.u16 << count_.u64[0]); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_sll_epi32(a, count); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), count_ = simde__m128i_to_private(count); if (count_.u64[0] > 31) return simde_mm_setzero_si128(); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u32 = (a_.u32 << count_.u64[0]); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_sll_epi64(a, count); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), count_ = simde__m128i_to_private(count); if (count_.u64[0] > 63) return simde_mm_setzero_si128(); const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, s) : wasm_i64x2_const(0,0); #else #if !defined(SIMDE_BUG_GCC_94488) SIMDE_VECTORIZE #endif for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = a_.u64[i] << s; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_sqrt_pd (simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_sqrt_pd(a); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vsqrtq_f64(a_.neon_f64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); #elif defined(simde_math_sqrt) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_sqrt(a_.f64[i]); } #else HEDLEY_UNREACHABLE(); #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_sqrt_sd(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(simde_math_sqrt) r_.f64[0] = simde_math_sqrt(b_.f64[0]); r_.f64[1] = a_.f64[1]; #else HEDLEY_UNREACHABLE(); #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_srl_epi16(a, count); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), count_ = simde__m128i_to_private(count); const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = a_.u16[i] >> cnt; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_srl_epi32(a, count); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), count_ = simde__m128i_to_private(count); const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, cnt); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i] >> cnt; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_srl_epi64(a, count); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), count_ = simde__m128i_to_private(count); const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, cnt); #else #if !defined(SIMDE_BUG_GCC_94488) SIMDE_VECTORIZE #endif for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = a_.u64[i] >> cnt; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_srai_epi16 (simde__m128i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { /* MSVC requires a range of (0, 255). */ simde__m128i_private r_, a_ = simde__m128i_to_private(a); const int cnt = (imm8 & ~15) ? 15 : imm8; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, cnt); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[i] >> cnt; } #endif return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE2_NATIVE) #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) #endif #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_srai_epi32 (simde__m128i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { /* MSVC requires a range of (0, 255). */ simde__m128i_private r_, a_ = simde__m128i_to_private(a); const int cnt = (imm8 & ~31) ? 31 : imm8; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, cnt); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] >> cnt; } #endif return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE2_NATIVE) #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) #endif #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_sra_epi16(a, count); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), count_ = simde__m128i_to_private(count); const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, cnt); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[i] >> cnt; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) return _mm_sra_epi32(a, count); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), count_ = simde__m128i_to_private(count); const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, cnt); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] >> cnt; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_slli_epi16 (simde__m128i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { if (HEDLEY_UNLIKELY((imm8 > 15))) { return simde_mm_setzero_si128(); } simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i16 = a_.i16 << (imm8 & 0xff); #else const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); } #endif return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE2_NATIVE) #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_mm_slli_epi16(a, imm8) \ (__extension__ ({ \ simde__m128i ret; \ if ((imm8) <= 0) { \ ret = a; \ } else if ((imm8) > 15) { \ ret = simde_mm_setzero_si128(); \ } else { \ ret = simde__m128i_from_neon_i16( \ vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15))); \ } \ ret; \ })) #elif defined(SIMDE_WASM_SIMD128_NATIVE) #define simde_mm_slli_epi16(a, imm8) \ ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) #define simde_mm_slli_epi16(a, imm8) \ ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) #endif #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_slli_epi32 (simde__m128i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { if (HEDLEY_UNLIKELY((imm8 > 31))) { return simde_mm_setzero_si128(); } simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i32 = a_.i32 << imm8; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] << (imm8 & 0xff); } #endif return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE2_NATIVE) #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_mm_slli_epi32(a, imm8) \ (__extension__ ({ \ simde__m128i ret; \ if ((imm8) <= 0) { \ ret = a; \ } else if ((imm8) > 31) { \ ret = simde_mm_setzero_si128(); \ } else { \ ret = simde__m128i_from_neon_i32( \ vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31))); \ } \ ret; \ })) #elif defined(SIMDE_WASM_SIMD128_NATIVE) #define simde_mm_slli_epi32(a, imm8) \ ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) #define simde_mm_slli_epi32(a, imm8) \ (__extension__ ({ \ simde__m128i ret; \ if ((imm8) <= 0) { \ ret = a; \ } else if ((imm8) > 31) { \ ret = simde_mm_setzero_si128(); \ } else { \ ret = simde__m128i_from_altivec_i32( \ vec_sl(simde__m128i_to_altivec_i32(a), \ vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ } \ ret; \ })) #endif #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_slli_epi64 (simde__m128i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { if (HEDLEY_UNLIKELY((imm8 > 63))) { return simde_mm_setzero_si128(); } simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i64 = a_.i64 << imm8; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[i] << (imm8 & 0xff); } #endif return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE2_NATIVE) #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_mm_slli_epi64(a, imm8) \ (__extension__ ({ \ simde__m128i ret; \ if ((imm8) <= 0) { \ ret = a; \ } else if ((imm8) > 63) { \ ret = simde_mm_setzero_si128(); \ } else { \ ret = simde__m128i_from_neon_i64( \ vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63))); \ } \ ret; \ })) #elif defined(SIMDE_WASM_SIMD128_NATIVE) #define simde_mm_slli_epi64(a, imm8) \ ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) #endif #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_srli_epi16 (simde__m128i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { if (HEDLEY_UNLIKELY((imm8 > 15))) { return simde_mm_setzero_si128(); } simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u16 = a_.u16 >> imm8; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); } #endif return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE2_NATIVE) #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_mm_srli_epi16(a, imm8) \ (__extension__ ({ \ simde__m128i ret; \ if ((imm8) <= 0) { \ ret = a; \ } else if ((imm8) > 15) { \ ret = simde_mm_setzero_si128(); \ } else { \ ret = simde__m128i_from_neon_u16( \ vshrq_n_u16(simde__m128i_to_neon_u16(a), (((imm8) & 15) | (((imm8) & 15) == 0)))); \ } \ ret; \ })) #elif defined(SIMDE_WASM_SIMD128_NATIVE) #define simde_mm_srli_epi16(a, imm8) \ ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) #define simde_mm_srli_epi16(a, imm8) \ ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) #endif #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_srli_epi32 (simde__m128i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { if (HEDLEY_UNLIKELY((imm8 > 31))) { return simde_mm_setzero_si128(); } simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u32 = a_.u32 >> (imm8 & 0xff); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); } #endif return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE2_NATIVE) #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_mm_srli_epi32(a, imm8) \ (__extension__ ({ \ simde__m128i ret; \ if ((imm8) <= 0) { \ ret = a; \ } else if ((imm8) > 31) { \ ret = simde_mm_setzero_si128(); \ } else { \ ret = simde__m128i_from_neon_u32( \ vshrq_n_u32(simde__m128i_to_neon_u32(a), (((imm8) & 31) | (((imm8) & 31) == 0)))); \ } \ ret; \ })) #elif defined(SIMDE_WASM_SIMD128_NATIVE) #define simde_mm_srli_epi32(a, imm8) \ ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) #define simde_mm_srli_epi32(a, imm8) \ (__extension__ ({ \ simde__m128i ret; \ if ((imm8) <= 0) { \ ret = a; \ } else if ((imm8) > 31) { \ ret = simde_mm_setzero_si128(); \ } else { \ ret = simde__m128i_from_altivec_i32( \ vec_sr(simde__m128i_to_altivec_i32(a), \ vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ } \ ret; \ })) #endif #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_srli_epi64 (simde__m128i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m128i_private r_, a_ = simde__m128i_to_private(a); if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) return simde_mm_setzero_si128(); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); #else #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) r_.u64 = a_.u64 >> imm8; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.u64[i] = a_.u64[i] >> imm8; } #endif #endif return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE2_NATIVE) #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_mm_srli_epi64(a, imm8) \ (__extension__ ({ \ simde__m128i ret; \ if ((imm8) <= 0) { \ ret = a; \ } else if ((imm8) > 63) { \ ret = simde_mm_setzero_si128(); \ } else { \ ret = simde__m128i_from_neon_u64( \ vshrq_n_u64(simde__m128i_to_neon_u64(a), (((imm8) & 63) | (((imm8) & 63) == 0)))); \ } \ ret; \ })) #elif defined(SIMDE_WASM_SIMD128_NATIVE) #define simde_mm_srli_epi64(a, imm8) \ ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) #endif #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_store_pd(mem_addr, a); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); #else simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_store1_pd(mem_addr, a); #else simde__m128d_private a_ = simde__m128d_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); #else mem_addr[0] = a_.f64[0]; mem_addr[1] = a_.f64[0]; #endif #endif } #define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_store_sd(mem_addr, a); #else simde__m128d_private a_ = simde__m128d_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); simde_memcpy(mem_addr, &v, sizeof(v)); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); #else simde_float64 v = a_.f64[0]; simde_memcpy(mem_addr, &v, sizeof(simde_float64)); #endif #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); #else simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); #else simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); #endif #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_storeh_pd(mem_addr, a); #else simde__m128d_private a_ = simde__m128d_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); #else *mem_addr = a_.f64[1]; #endif #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); #else simde__m128i_private a_ = simde__m128i_to_private(a); int64_t tmp; /* memcpy to prevent aliasing, tmp because we can't take the * address of a vector element. */ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) tmp = vgetq_lane_s64(a_.neon_i64, 0); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) #if defined(SIMDE_BUG_GCC_95227) (void) a_; #endif tmp = vec_extract(a_.altivec_i64, 0); #else tmp = a_.i64[0]; #endif simde_memcpy(mem_addr, &tmp, sizeof(tmp)); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_storel_pd(mem_addr, a); #else simde__m128d_private a_ = simde__m128d_to_private(a); simde_float64 tmp; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) tmp = vgetq_lane_f64(a_.neon_f64, 0); #else tmp = a_.f64[0]; #endif simde_memcpy(mem_addr, &tmp, sizeof(tmp)); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_storer_pd(mem_addr, a); #else simde__m128d_private a_ = simde__m128d_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); #elif defined(SIMDE_SHUFFLE_VECTOR_) a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); #else mem_addr[0] = a_.f64[1]; mem_addr[1] = a_.f64[0]; #endif #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_storeu_pd(mem_addr, a); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); #else simde_memcpy(mem_addr, &a, sizeof(a)); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); #else simde_memcpy(mem_addr, &a, sizeof(a)); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ HEDLEY_INTEL_VERSION_CHECK(20,21,1)) _mm_storeu_si16(mem_addr, a); #else int16_t val = simde_x_mm_cvtsi128_si16(a); simde_memcpy(mem_addr, &val, sizeof(val)); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ HEDLEY_INTEL_VERSION_CHECK(20,21,1)) _mm_storeu_si32(mem_addr, a); #else int32_t val = simde_mm_cvtsi128_si32(a); simde_memcpy(mem_addr, &val, sizeof(val)); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ HEDLEY_INTEL_VERSION_CHECK(20,21,1)) _mm_storeu_si64(mem_addr, a); #else int64_t val = simde_mm_cvtsi128_si64(a); simde_memcpy(mem_addr, &val, sizeof(val)); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_stream_pd(mem_addr, a); #else simde_memcpy(mem_addr, &a, sizeof(a)); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); #else simde_memcpy(mem_addr, &a, sizeof(a)); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_stream_si32(mem_addr, a); #else *mem_addr = a; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); #else *mem_addr = a; #endif } #define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_sub_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = a_.i8 - b_.i8; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a_.i8[i] - b_.i8[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_sub_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = a_.i16 - b_.i16; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[i] - b_.i16[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_sub_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = a_.i32 - b_.i32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] - b_.i32[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_sub_epi64(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 - b_.i64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[i] - b_.i64[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u32 = a_.u32 - b_.u32; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i] - b_.u32[i]; } #endif return simde__m128i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_sub_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_sub_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f64 = a_.f64 - b_.f64; #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = a_.f64[i] - b_.f64[i]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_sub_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_sub_sd(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); r_.f64[0] = a_.f64[0] - b_.f64[0]; r_.f64[1] = a_.f64[1]; return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_sub_si64(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 - b_.i64; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); #else r_.i64[0] = a_.i64[0] - b_.i64[0]; #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_subs_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i8x16_sub_saturate(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { r_.i8[i] = INT8_MIN; } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { r_.i8[i] = INT8_MAX; } else { r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); } } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_subs_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_sub_saturate(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { if (((b_.i16[i]) > 0 && (a_.i16[i]) < INT16_MIN + (b_.i16[i]))) { r_.i16[i] = INT16_MIN; } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { r_.i16[i] = INT16_MAX; } else { r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); } } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_subs_epu8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u8x16_sub_saturate(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { const int32_t x = a_.u8[i] - b_.u8[i]; if (x < 0) { r_.u8[i] = 0; } else if (x > UINT8_MAX) { r_.u8[i] = UINT8_MAX; } else { r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); } } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_subs_epu16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u16x8_sub_saturate(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { const int32_t x = a_.u16[i] - b_.u16[i]; if (x < 0) { r_.u16[i] = 0; } else if (x > UINT16_MAX) { r_.u16[i] = UINT16_MAX; } else { r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); } } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_ucomieq_sd(a, b); #else simde__m128d_private a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); int r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); #elif defined(SIMDE_HAVE_FENV_H) fenv_t envp; int x = feholdexcept(&envp); r = a_.f64[0] == b_.f64[0]; if (HEDLEY_LIKELY(x == 0)) fesetenv(&envp); #else r = a_.f64[0] == b_.f64[0]; #endif return r; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_ucomige_sd(a, b); #else simde__m128d_private a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); int r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); #elif defined(SIMDE_HAVE_FENV_H) fenv_t envp; int x = feholdexcept(&envp); r = a_.f64[0] >= b_.f64[0]; if (HEDLEY_LIKELY(x == 0)) fesetenv(&envp); #else r = a_.f64[0] >= b_.f64[0]; #endif return r; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_ucomigt_sd(a, b); #else simde__m128d_private a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); int r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); #elif defined(SIMDE_HAVE_FENV_H) fenv_t envp; int x = feholdexcept(&envp); r = a_.f64[0] > b_.f64[0]; if (HEDLEY_LIKELY(x == 0)) fesetenv(&envp); #else r = a_.f64[0] > b_.f64[0]; #endif return r; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_ucomile_sd(a, b); #else simde__m128d_private a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); int r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); #elif defined(SIMDE_HAVE_FENV_H) fenv_t envp; int x = feholdexcept(&envp); r = a_.f64[0] <= b_.f64[0]; if (HEDLEY_LIKELY(x == 0)) fesetenv(&envp); #else r = a_.f64[0] <= b_.f64[0]; #endif return r; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_ucomilt_sd(a, b); #else simde__m128d_private a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); int r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); #elif defined(SIMDE_HAVE_FENV_H) fenv_t envp; int x = feholdexcept(&envp); r = a_.f64[0] < b_.f64[0]; if (HEDLEY_LIKELY(x == 0)) fesetenv(&envp); #else r = a_.f64[0] < b_.f64[0]; #endif return r; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_ucomineq_sd(a, b); #else simde__m128d_private a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); int r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); #elif defined(SIMDE_HAVE_FENV_H) fenv_t envp; int x = feholdexcept(&envp); r = a_.f64[0] != b_.f64[0]; if (HEDLEY_LIKELY(x == 0)) fesetenv(&envp); #else r = a_.f64[0] != b_.f64[0]; #endif return r; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) #endif #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ #endif #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) HEDLEY_DIAGNOSTIC_POP #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_lfence (void) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_lfence(); #else simde_mm_sfence(); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_lfence() simde_mm_lfence() #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_mfence (void) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_mfence(); #else simde_mm_sfence(); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_mfence() simde_mm_mfence() #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpackhi_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); int8x8x2_t result = vzip_s8(a1, b1); r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpackhi_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int16x4_t a1 = vget_high_s16(a_.neon_i16); int16x4_t b1 = vget_high_s16(b_.neon_i16); int16x4x2_t result = vzip_s16(a1, b1); r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpackhi_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int32x2_t a1 = vget_high_s32(a_.neon_i32); int32x2_t b1 = vget_high_s32(b_.neon_i32); int32x2x2_t result = vzip_s32(a1, b1); r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpackhi_epi64(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) int64x1_t a_h = vget_high_s64(a_.neon_i64); int64x1_t b_h = vget_high_s64(b_.neon_i64); r_.neon_i64 = vcombine_s64(a_h, b_h); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpackhi_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) float64x1_t a_l = vget_high_f64(a_.f64); float64x1_t b_l = vget_high_f64(b_.f64); r_.neon_f64 = vcombine_f64(a_l, b_l); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpacklo_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); int8x8x2_t result = vzip_s8(a1, b1); r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { r_.i8[(i * 2)] = a_.i8[i]; r_.i8[(i * 2) + 1] = b_.i8[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpacklo_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int16x4_t a1 = vget_low_s16(a_.neon_i16); int16x4_t b1 = vget_low_s16(b_.neon_i16); int16x4x2_t result = vzip_s16(a1, b1); r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { r_.i16[(i * 2)] = a_.i16[i]; r_.i16[(i * 2) + 1] = b_.i16[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpacklo_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int32x2_t a1 = vget_low_s32(a_.neon_i32); int32x2_t b1 = vget_low_s32(b_.neon_i32); int32x2x2_t result = vzip_s32(a1, b1); r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { r_.i32[(i * 2)] = a_.i32[i]; r_.i32[(i * 2) + 1] = b_.i32[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpacklo_epi64(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) int64x1_t a_l = vget_low_s64(a_.i64); int64x1_t b_l = vget_low_s64(b_.i64); r_.neon_i64 = vcombine_s64(a_l, b_l); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { r_.i64[(i * 2)] = a_.i64[i]; r_.i64[(i * 2) + 1] = b_.i64[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpacklo_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) float64x1_t a_l = vget_low_f64(a_.f64); float64x1_t b_l = vget_low_f64(b_.f64); r_.neon_f64 = vcombine_f64(a_l, b_l); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { r_.f64[(i * 2)] = a_.f64[i]; r_.f64[(i * 2) + 1] = b_.f64[i]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_x_mm_negate_pd(simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) r_.altivec_f64 = vec_neg(a_.altivec_f64); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vnegq_f64(a_.neon_f64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); #elif defined(SIMDE_VECTOR_NEGATE) r_.f64 = -a_.f64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = -a_.f64[i]; } #endif return simde__m128d_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_xor_si128(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f ^ b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_not_si128 (simde__m128i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_ternarylogic_epi32(a, a, a, 0x55); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vmvnq_s32(a_.neon_i32); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = ~a_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = ~(a_.i32f[i]); } #endif return simde__m128i_from_private(r_); #endif } #define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_SSE2_H) */ simde-0.7.2/simde/x86/sse3.h000066400000000000000000000374051400333146700154070ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2017-2020 Evan Nemerson */ #if !defined(SIMDE_X86_SSE3_H) #define SIMDE_X86_SSE3_H #include "sse2.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_deinterleaveeven_epi16 (simde__m128i a, simde__m128i b) { simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i16 = vuzp1q_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); r_.neon_i16 = t.val[0]; #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14); #else const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; for(size_t i = 0 ; i < halfway_point ; i++) { r_.i16[i] = a_.i16[2 * i]; r_.i16[i + halfway_point] = b_.i16[2 * i]; } #endif return simde__m128i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_deinterleaveodd_epi16 (simde__m128i a, simde__m128i b) { simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i16 = vuzp2q_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); r_.neon_i16 = t.val[1]; #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15); #else const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; for(size_t i = 0 ; i < halfway_point ; i++) { r_.i16[i] = a_.i16[2 * i + 1]; r_.i16[i + halfway_point] = b_.i16[2 * i + 1]; } #endif return simde__m128i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_deinterleaveeven_epi32 (simde__m128i a, simde__m128i b) { simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i32 = vuzp1q_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); r_.neon_i32 = t.val[0]; #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 2, 4, 6); #else const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; for(size_t i = 0 ; i < halfway_point ; i++) { r_.i32[i] = a_.i32[2 * i]; r_.i32[i + halfway_point] = b_.i32[2 * i]; } #endif return simde__m128i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_deinterleaveodd_epi32 (simde__m128i a, simde__m128i b) { simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i32 = vuzp2q_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); r_.neon_i32 = t.val[1]; #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 1, 3, 5, 7); #else const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; for(size_t i = 0 ; i < halfway_point ; i++) { r_.i32[i] = a_.i32[2 * i + 1]; r_.i32[i + halfway_point] = b_.i32[2 * i + 1]; } #endif return simde__m128i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_x_mm_deinterleaveeven_ps (simde__m128 a, simde__m128 b) { simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f32 = vuzp1q_f32(a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); r_.neon_f32 = t.val[0]; #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 2, 4, 6); #else const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; for(size_t i = 0 ; i < halfway_point ; i++) { r_.f32[i] = a_.f32[2 * i]; r_.f32[i + halfway_point] = b_.f32[2 * i]; } #endif return simde__m128_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_x_mm_deinterleaveodd_ps (simde__m128 a, simde__m128 b) { simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f32 = vuzp2q_f32(a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); r_.neon_f32 = t.val[1]; #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 1, 3, 5, 7); #else const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; for(size_t i = 0 ; i < halfway_point ; i++) { r_.f32[i] = a_.f32[2 * i + 1]; r_.f32[i + halfway_point] = b_.f32[2 * i + 1]; } #endif return simde__m128_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_x_mm_deinterleaveeven_pd (simde__m128d a, simde__m128d b) { simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vuzp1q_f64(a_.neon_f64, b_.neon_f64); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); #else const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; for(size_t i = 0 ; i < halfway_point ; i++) { r_.f64[i] = a_.f64[2 * i]; r_.f64[i + halfway_point] = b_.f64[2 * i]; } #endif return simde__m128d_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_x_mm_deinterleaveodd_pd (simde__m128d a, simde__m128d b) { simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vuzp2q_f64(a_.neon_f64, b_.neon_f64); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); #else const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; for(size_t i = 0 ; i < halfway_point ; i++) { r_.f64[i] = a_.f64[2 * i + 1]; r_.f64[i + halfway_point] = b_.f64[2 * i + 1]; } #endif return simde__m128d_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_addsub_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE3_NATIVE) return _mm_addsub_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) float64x2_t rs = vsubq_f64(a_.neon_f64, b_.neon_f64); float64x2_t ra = vaddq_f64(a_.neon_f64, b_.neon_f64); return vcombine_f64(vget_low_f64(rs), vget_high_f64(ra)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64 - b_.f64, a_.f64 + b_.f64, 0, 3); #else for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; r_.f64[1 + i] = a_.f64[1 + i] + b_.f64[1 + i]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) # define _mm_addsub_pd(a, b) simde_mm_addsub_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_addsub_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE3_NATIVE) return _mm_addsub_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) float32x4_t rs = vsubq_f32(a_.neon_f32, b_.neon_f32); float32x4_t ra = vaddq_f32(a_.neon_f32, b_.neon_f32); return vtrn2q_f32(vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(rs))), ra); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32 - b_.f32, a_.f32 + b_.f32, 0, 5, 2, 7); #else for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; r_.f32[1 + i] = a_.f32[1 + i] + b_.f32[1 + i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) # define _mm_addsub_ps(a, b) simde_mm_addsub_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_hadd_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE3_NATIVE) return _mm_hadd_pd(a, b); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) return simde__m128d_from_neon_f64(vpaddq_f64(simde__m128d_to_neon_f64(a), simde__m128d_to_neon_f64(b))); #else return simde_mm_add_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); #endif } #if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) # define _mm_hadd_pd(a, b) simde_mm_hadd_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_hadd_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE3_NATIVE) return _mm_hadd_ps(a, b); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) return simde__m128_from_neon_f32(vpaddq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b))); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); return simde__m128_from_neon_f32(vaddq_f32(t.val[0], t.val[1])); #else return simde_mm_add_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); #endif } #if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) # define _mm_hadd_ps(a, b) simde_mm_hadd_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_hsub_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE3_NATIVE) return _mm_hsub_pd(a, b); #else return simde_mm_sub_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); #endif } #if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) # define _mm_hsub_pd(a, b) simde_mm_hsub_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_hsub_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE3_NATIVE) return _mm_hsub_ps(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); return simde__m128_from_neon_f32(vaddq_f32(t.val[0], vnegq_f32(t.val[1]))); #else return simde_mm_sub_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); #endif } #if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) # define _mm_hsub_ps(a, b) simde_mm_hsub_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_lddqu_si128 (simde__m128i const* mem_addr) { #if defined(SIMDE_X86_SSE3_NATIVE) return _mm_lddqu_si128(mem_addr); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); #else simde_memcpy(&r_, mem_addr, sizeof(r_)); #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) # define _mm_lddqu_si128(mem_addr) simde_mm_lddqu_si128(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_loaddup_pd (simde_float64 const* mem_addr) { #if defined(SIMDE_X86_SSE3_NATIVE) return _mm_loaddup_pd(mem_addr); #else simde__m128d_private r_; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vdupq_n_f64(*mem_addr); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vdupq_n_s64(*HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); #else r_.f64[0] = *mem_addr; r_.f64[1] = *mem_addr; #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) # define _mm_loaddup_pd(mem_addr) simde_mm_loaddup_pd(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_movedup_pd (simde__m128d a) { #if defined(SIMDE_X86_SSE3_NATIVE) return _mm_movedup_pd(a); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_SHUFFLE_VECTOR_) r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); #else r_.f64[0] = a_.f64[0]; r_.f64[1] = a_.f64[0]; #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) # define _mm_movedup_pd(a) simde_mm_movedup_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_movehdup_ps (simde__m128 a) { #if defined(SIMDE_X86_SSE3_NATIVE) return _mm_movehdup_ps(a); #else simde__m128_private r_, a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f32 = vtrn2q_f32(a_.neon_f32, a_.neon_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 1, 3, 3); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 1, 1, 3, 3); #else r_.f32[0] = a_.f32[1]; r_.f32[1] = a_.f32[1]; r_.f32[2] = a_.f32[3]; r_.f32[3] = a_.f32[3]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) # define _mm_movehdup_ps(a) simde_mm_movehdup_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_moveldup_ps (simde__m128 a) { #if defined(SIMDE__SSE3_NATIVE) return _mm_moveldup_ps(a); #else simde__m128_private r_, a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f32 = vtrn1q_f32(a_.neon_f32, a_.neon_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 2, 2); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 2, 2); #else r_.f32[0] = a_.f32[0]; r_.f32[1] = a_.f32[0]; r_.f32[2] = a_.f32[2]; r_.f32[3] = a_.f32[2]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) # define _mm_moveldup_ps(a) simde_mm_moveldup_ps(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_SSE3_H) */ simde-0.7.2/simde/x86/sse4.1.h000066400000000000000000002236471400333146700155540ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2017-2020 Evan Nemerson */ #include "sse.h" #if !defined(SIMDE_X86_SSE4_1_H) #define SIMDE_X86_SSE4_1_H #include "ssse3.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ #if !defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) # define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_blend_epi16 (simde__m128i a, simde__m128i b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = ((imm8 >> i) & 1) ? b_.u16[i] : a_.u16[i]; } return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE4_1_NATIVE) # define simde_mm_blend_epi16(a, b, imm8) _mm_blend_epi16(a, b, imm8) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) # define simde_mm_blend_epi16(a, b, imm8) \ (__extension__ ({ \ const uint16_t _mask[8] = { \ ((imm8) & (1 << 0)) ? 0xFFFF : 0x0000, \ ((imm8) & (1 << 1)) ? 0xFFFF : 0x0000, \ ((imm8) & (1 << 2)) ? 0xFFFF : 0x0000, \ ((imm8) & (1 << 3)) ? 0xFFFF : 0x0000, \ ((imm8) & (1 << 4)) ? 0xFFFF : 0x0000, \ ((imm8) & (1 << 5)) ? 0xFFFF : 0x0000, \ ((imm8) & (1 << 6)) ? 0xFFFF : 0x0000, \ ((imm8) & (1 << 7)) ? 0xFFFF : 0x0000 \ }; \ uint16x8_t _mask_vec = vld1q_u16(_mask); \ simde__m128i_from_neon_u16(vbslq_u16(_mask_vec, simde__m128i_to_neon_u16(b), simde__m128i_to_neon_u16(a))); \ })) #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) # define simde_mm_blend_epi16(a, b, imm8) \ (__extension__ ({ \ const SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) _mask = { \ ((imm8) & (1 << 0)) ? 0xFFFF : 0x0000, \ ((imm8) & (1 << 1)) ? 0xFFFF : 0x0000, \ ((imm8) & (1 << 2)) ? 0xFFFF : 0x0000, \ ((imm8) & (1 << 3)) ? 0xFFFF : 0x0000, \ ((imm8) & (1 << 4)) ? 0xFFFF : 0x0000, \ ((imm8) & (1 << 5)) ? 0xFFFF : 0x0000, \ ((imm8) & (1 << 6)) ? 0xFFFF : 0x0000, \ ((imm8) & (1 << 7)) ? 0xFFFF : 0x0000 \ }; \ simde__m128i_from_altivec_u16(vec_sel(simde__m128i_to_altivec_u16(a), simde__m128i_to_altivec_u16(b), _mask)); \ })) #endif #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_blend_epi16 #define _mm_blend_epi16(a, b, imm8) simde_mm_blend_epi16(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_blend_pd (simde__m128d a, simde__m128d b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; } return simde__m128d_from_private(r_); } #if defined(SIMDE_X86_SSE4_1_NATIVE) # define simde_mm_blend_pd(a, b, imm8) _mm_blend_pd(a, b, imm8) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) # define simde_mm_blend_pd(a, b, imm8) \ (__extension__ ({ \ const uint64_t _mask[2] = { \ ((imm8) & (1 << 0)) ? UINT64_MAX : 0, \ ((imm8) & (1 << 1)) ? UINT64_MAX : 0 \ }; \ uint64x2_t _mask_vec = vld1q_u64(_mask); \ simde__m128d_from_neon_u64(vbslq_u64(_mask_vec, simde__m128d_to_neon_u64(b), simde__m128d_to_neon_u64(a))); \ })) #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) # define simde_mm_blend_pd(a, b, imm8) \ (__extension__ ({ \ const SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) _mask = { \ ((imm8) & (1 << 0)) ? UINT64_MAX : 0, \ ((imm8) & (1 << 1)) ? UINT64_MAX : 0 \ }; \ simde__m128d_from_altivec_f64(vec_sel(simde__m128d_to_altivec_f64(a), simde__m128d_to_altivec_f64(b), _mask)); \ })) #endif #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_blend_pd #define _mm_blend_pd(a, b, imm8) simde_mm_blend_pd(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_blend_ps (simde__m128 a, simde__m128 b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; } return simde__m128_from_private(r_); } #if defined(SIMDE_X86_SSE4_1_NATIVE) # define simde_mm_blend_ps(a, b, imm8) _mm_blend_ps(a, b, imm8) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) # define simde_mm_blend_ps(a, b, imm8) \ (__extension__ ({ \ const uint32_t _mask[4] = { \ ((imm8) & (1 << 0)) ? UINT32_MAX : 0, \ ((imm8) & (1 << 1)) ? UINT32_MAX : 0, \ ((imm8) & (1 << 2)) ? UINT32_MAX : 0, \ ((imm8) & (1 << 3)) ? UINT32_MAX : 0 \ }; \ uint32x4_t _mask_vec = vld1q_u32(_mask); \ simde__m128_from_neon_f32(vbslq_f32(_mask_vec, simde__m128_to_neon_f32(b), simde__m128_to_neon_f32(a))); \ })) #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) # define simde_mm_blend_ps(a, b, imm8) \ (__extension__ ({ \ const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) _mask = { \ ((imm8) & (1 << 0)) ? UINT32_MAX : 0, \ ((imm8) & (1 << 1)) ? UINT32_MAX : 0, \ ((imm8) & (1 << 2)) ? UINT32_MAX : 0, \ ((imm8) & (1 << 3)) ? UINT32_MAX : 0 \ }; \ simde__m128_from_altivec_f32(vec_sel(simde__m128_to_altivec_f32(a), simde__m128_to_altivec_f32(b), _mask)); \ })) #endif #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_blend_ps #define _mm_blend_ps(a, b, imm8) simde_mm_blend_ps(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_blendv_epi8 (simde__m128i a, simde__m128i b, simde__m128i mask) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_blendv_epi8(a, b, mask); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b), mask_ = simde__m128i_to_private(mask); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) /* Use a signed shift right to create a mask with the sign bit */ mask_.neon_i8 = vshrq_n_s8(mask_.neon_i8, 7); r_.neon_i8 = vbslq_s8(mask_.neon_u8, b_.neon_i8, a_.neon_i8); #elif defined(SIMDE_WASM_SIMD128_NATIVE) v128_t m = wasm_i8x16_shr(mask_.wasm_v128, 7); r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i8 = vec_sel(a_.altivec_i8, b_.altivec_i8, vec_cmplt(mask_.altivec_i8, vec_splat_s8(0))); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) /* https://software.intel.com/en-us/forums/intel-c-compiler/topic/850087 */ #if defined(HEDLEY_INTEL_VERSION_CHECK) __typeof__(mask_.i8) z = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; mask_.i8 = HEDLEY_STATIC_CAST(__typeof__(mask_.i8), mask_.i8 < z); #else mask_.i8 >>= (CHAR_BIT * sizeof(mask_.i8[0])) - 1; #endif r_.i8 = (mask_.i8 & b_.i8) | (~mask_.i8 & a_.i8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { int8_t m = mask_.i8[i] >> 7; r_.i8[i] = (m & b_.i8[i]) | (~m & a_.i8[i]); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_blendv_epi8 #define _mm_blendv_epi8(a, b, mask) simde_mm_blendv_epi8(a, b, mask) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_blendv_epi16 (simde__m128i a, simde__m128i b, simde__m128i mask) { #if defined(SIMDE_X86_SSE2_NATIVE) mask = simde_mm_srai_epi16(mask, 15); return simde_mm_or_si128(simde_mm_and_si128(mask, b), simde_mm_andnot_si128(mask, a)); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b), mask_ = simde__m128i_to_private(mask); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) mask_ = simde__m128i_to_private(simde_mm_cmplt_epi16(mask, simde_mm_setzero_si128())); r_.neon_i16 = vbslq_s16(mask_.neon_u16, b_.neon_i16, a_.neon_i16); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i16 = vec_sel(a_.altivec_i16, b_.altivec_i16, vec_cmplt(mask_.altivec_i16, vec_splat_s16(0))); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) #if defined(HEDLEY_INTEL_VERSION_CHECK) __typeof__(mask_.i16) z = { 0, 0, 0, 0, 0, 0, 0, 0 }; mask_.i16 = mask_.i16 < z; #else mask_.i16 >>= (CHAR_BIT * sizeof(mask_.i16[0])) - 1; #endif r_.i16 = (mask_.i16 & b_.i16) | (~mask_.i16 & a_.i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { int16_t m = mask_.i16[i] >> 15; r_.i16[i] = (m & b_.i16[i]) | (~m & a_.i16[i]); } #endif return simde__m128i_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_blendv_epi32 (simde__m128i a, simde__m128i b, simde__m128i mask) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b), mask_ = simde__m128i_to_private(mask); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) mask_ = simde__m128i_to_private(simde_mm_cmplt_epi32(mask, simde_mm_setzero_si128())); r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) v128_t m = wasm_i32x4_shr(mask_.wasm_v128, 31); r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, vec_cmplt(mask_.altivec_i32, vec_splat_s32(0))); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) #if defined(HEDLEY_INTEL_VERSION_CHECK) __typeof__(mask_.i32) z = { 0, 0, 0, 0 }; mask_.i32 = HEDLEY_STATIC_CAST(__typeof__(mask_.i32), mask_.i32 < z); #else mask_.i32 >>= (CHAR_BIT * sizeof(mask_.i32[0])) - 1; #endif r_.i32 = (mask_.i32 & b_.i32) | (~mask_.i32 & a_.i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { int32_t m = mask_.i32[i] >> 31; r_.i32[i] = (m & b_.i32[i]) | (~m & a_.i32[i]); } #endif return simde__m128i_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_blendv_epi64 (simde__m128i a, simde__m128i b, simde__m128i mask) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b), _mm_castsi128_pd(mask))); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b), mask_ = simde__m128i_to_private(mask); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) mask_.u64 = vcltq_s64(mask_.i64, vdupq_n_s64(UINT64_C(0))); r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) v128_t m = wasm_i64x2_shr(mask_.wasm_v128, 63); r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) /* Using int due to clang bug #46770 */ SIMDE_POWER_ALTIVEC_VECTOR(signed long long) selector = vec_sra(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 63))); r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), selector)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) #if defined(HEDLEY_INTEL_VERSION_CHECK) __typeof__(mask_.i64) z = { 0, 0 }; mask_.i64 = HEDLEY_STATIC_CAST(__typeof__(mask_.i64), mask_.i64 < z); #else mask_.i64 >>= (CHAR_BIT * sizeof(mask_.i64[0])) - 1; #endif r_.i64 = (mask_.i64 & b_.i64) | (~mask_.i64 & a_.i64); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { int64_t m = mask_.i64[i] >> 63; r_.i64[i] = (m & b_.i64[i]) | (~m & a_.i64[i]); } #endif return simde__m128i_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_blendv_pd (simde__m128d a, simde__m128d b, simde__m128d mask) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_blendv_pd(a, b, mask); #else return simde_mm_castsi128_pd(simde_x_mm_blendv_epi64(simde_mm_castpd_si128(a), simde_mm_castpd_si128(b), simde_mm_castpd_si128(mask))); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_blendv_pd #define _mm_blendv_pd(a, b, mask) simde_mm_blendv_pd(a, b, mask) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_blendv_ps (simde__m128 a, simde__m128 b, simde__m128 mask) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_blendv_ps(a, b, mask); #else return simde_mm_castsi128_ps(simde_x_mm_blendv_epi32(simde_mm_castps_si128(a), simde_mm_castps_si128(b), simde_mm_castps_si128(mask))); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_blendv_ps #define _mm_blendv_ps(a, b, mask) simde_mm_blendv_ps(a, b, mask) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_round_pd (simde__m128d a, int rounding) SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { simde__m128d_private r_, a_ = simde__m128d_to_private(a); /* For architectures which lack a current direction SIMD instruction. */ #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; #endif switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { case SIMDE_MM_FROUND_CUR_DIRECTION: #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vrndiq_f64(a_.neon_f64); #elif defined(simde_math_nearbyint) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_nearbyint(a_.f64[i]); } #else HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); #endif break; case SIMDE_MM_FROUND_TO_NEAREST_INT: #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vrndaq_f64(a_.neon_f64); #elif defined(simde_math_roundeven) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_roundeven(a_.f64[i]); } #else HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); #endif break; case SIMDE_MM_FROUND_TO_NEG_INF: #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_floor(a_.altivec_f64)); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vrndmq_f64(a_.neon_f64); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_floor(a_.f64[i]); } #endif break; case SIMDE_MM_FROUND_TO_POS_INF: #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_ceil(a_.altivec_f64)); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vrndpq_f64(a_.neon_f64); #elif defined(simde_math_ceil) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_ceil(a_.f64[i]); } #else HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); #endif break; case SIMDE_MM_FROUND_TO_ZERO: #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_trunc(a_.altivec_f64)); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vrndq_f64(a_.neon_f64); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_trunc(a_.f64[i]); } #endif break; default: HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); } return simde__m128d_from_private(r_); } #if defined(SIMDE_X86_SSE4_1_NATIVE) #define simde_mm_round_pd(a, rounding) _mm_round_pd(a, rounding) #endif #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_round_pd #define _mm_round_pd(a, rounding) simde_mm_round_pd(a, rounding) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_ceil_pd (simde__m128d a) { return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_ceil_pd #define _mm_ceil_pd(a) simde_mm_ceil_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_ceil_ps (simde__m128 a) { return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_ceil_ps #define _mm_ceil_ps(a) simde_mm_ceil_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_ceil_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_ceil_sd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(simde_math_ceilf) r_ = simde__m128d_to_private(simde_mm_set_pd(a_.f64[1], simde_math_ceil(b_.f64[0]))); #else HEDLEY_UNREACHABLE(); #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_ceil_sd #define _mm_ceil_sd(a, b) simde_mm_ceil_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_ceil_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_ceil_ss(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_ceil_ps(b)); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(simde_math_ceilf) r_ = simde__m128_to_private(simde_mm_set_ps(a_.f32[3], a_.f32[2], a_.f32[1], simde_math_ceilf(b_.f32[0]))); #else HEDLEY_UNREACHABLE(); #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_ceil_ss #define _mm_ceil_ss(a, b) simde_mm_ceil_ss(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cmpeq_epi64 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_cmpeq_epi64(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_u64 = vceqq_u64(a_.neon_u64, b_.neon_u64); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) /* (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) */ uint32x4_t cmp = vceqq_u32(a_.neon_u32, b_.neon_u32); uint32x4_t swapped = vrev64q_u32(cmp); r_.neon_u32 = vandq_u32(cmp, swapped); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), a_.i64 == b_.i64); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmpeq(a_.altivec_i64, b_.altivec_i64)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_cmpeq_epi64 #define _mm_cmpeq_epi64(a, b) simde_mm_cmpeq_epi64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cvtepi8_epi16 (simde__m128i a) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_cvtepi8_epi16(a); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ r_.neon_i16 = s16x8; #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, -1, 0, -1, 1, -1, 2, -1, 3, -1, 4, -1, 5, -1, 6, -1, 7)); r_.i16 >>= 8; #elif defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].i8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i8[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_cvtepi8_epi16 #define _mm_cvtepi8_epi16(a) simde_mm_cvtepi8_epi16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cvtepi8_epi32 (simde__m128i a) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_cvtepi8_epi32(a); #elif defined(SIMDE_X86_SSE2_NATIVE) __m128i tmp = _mm_unpacklo_epi8(a, a); tmp = _mm_unpacklo_epi16(tmp, tmp); return _mm_srai_epi32(tmp, 24); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */ r_.neon_i32 = s32x4; #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, -1, -1, -1, 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3)); r_.i32 >>= 24; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i8[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_cvtepi8_epi32 #define _mm_cvtepi8_epi32(a) simde_mm_cvtepi8_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cvtepi8_epi64 (simde__m128i a) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_cvtepi8_epi64(a); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx xxBA */ int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0x0x 0B0A */ int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ r_.neon_i64 = s64x2; #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) /* Disabled on x86 due to lack of 64-bit arithmetic shift until * until AVX-512 (at which point we would be using the native * _mm_cvtepi_epi64 anyways). */ r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, 1)); r_.i64 >>= 56; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i8[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_cvtepi8_epi64 #define _mm_cvtepi8_epi64(a) simde_mm_cvtepi8_epi64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cvtepu8_epi16 (simde__m128i a) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_cvtepu8_epi16(a); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpacklo_epi8(a, _mm_setzero_si128()); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ r_.neon_u16 = u16x8; #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) __typeof__(r_.i8) z = { 0, }; r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23)); #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].u8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.u8[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_cvtepu8_epi16 #define _mm_cvtepu8_epi16(a) simde_mm_cvtepu8_epi16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cvtepu8_epi32 (simde__m128i a) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_cvtepu8_epi32(a); #elif defined(SIMDE_X86_SSSE3_NATIVE) __m128i s = _mm_set_epi8( 0x80, 0x80, 0x80, 0x03, 0x80, 0x80, 0x80, 0x02, 0x80, 0x80, 0x80, 0x01, 0x80, 0x80, 0x80, 0x00); return _mm_shuffle_epi8(a, s); #elif defined(SIMDE_X86_SSE2_NATIVE) __m128i z = _mm_setzero_si128(); return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */ r_.neon_u32 = u32x4; #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) __typeof__(r_.i8) z = { 0, }; r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, 0, 17, 18, 19, 1, 21, 22, 23, 2, 25, 26, 27, 3, 29, 30, 31)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.u8[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_cvtepu8_epi32 #define _mm_cvtepu8_epi32(a) simde_mm_cvtepu8_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cvtepu8_epi64 (simde__m128i a) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_cvtepu8_epi64(a); #elif defined(SIMDE_X86_SSSE3_NATIVE) __m128i s = _mm_set_epi8( 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00); return _mm_shuffle_epi8(a, s); #elif defined(SIMDE_X86_SSE2_NATIVE) __m128i z = _mm_setzero_si128(); return _mm_unpacklo_epi32(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z), z); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx xxBA */ uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0x0x 0B0A */ uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ r_.neon_u64 = u64x2; #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) __typeof__(r_.i8) z = { 0, }; r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, 0, 17, 18, 19, 20, 21, 22, 23, 1, 25, 26, 27, 28, 29, 30, 31)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.u8[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_cvtepu8_epi64 #define _mm_cvtepu8_epi64(a) simde_mm_cvtepu8_epi64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cvtepi16_epi32 (simde__m128i a) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_cvtepi16_epi32(a); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16)); #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, 8, 0, 10, 1, 12, 2, 14, 3)); r_.i32 >>= 16; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i16[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_cvtepi16_epi32 #define _mm_cvtepi16_epi32(a) simde_mm_cvtepi16_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cvtepu16_epi32 (simde__m128i a) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_cvtepu16_epi32(a); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpacklo_epi16(a, _mm_setzero_si128()); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vmovl_u16(vget_low_u16(a_.neon_u16)); #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) __typeof__(r_.u16) z = { 0, }; r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, 0, 9, 1, 11, 2, 13, 3, 15)); #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.u16[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_cvtepu16_epi32 #define _mm_cvtepu16_epi32(a) simde_mm_cvtepu16_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cvtepu16_epi64 (simde__m128i a) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_cvtepu16_epi64(a); #elif defined(SIMDE_X86_SSE2_NATIVE) __m128i z = _mm_setzero_si128(); return _mm_unpacklo_epi32(_mm_unpacklo_epi16(a, z), z); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint16x8_t u16x8 = a_.neon_u16; /* xxxx xxxx xxxx 0B0A */ uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ r_.neon_u64 = u64x2; #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) __typeof__(r_.u16) z = { 0, }; r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, 0, 9, 10, 11, 1, 13, 14, 15)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.u16[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_cvtepu16_epi64 #define _mm_cvtepu16_epi64(a) simde_mm_cvtepu16_epi64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cvtepi16_epi64 (simde__m128i a) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_cvtepi16_epi64(a); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) int16x8_t s16x8 = a_.neon_i16; /* xxxx xxxx xxxx 0B0A */ int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ r_.neon_i64 = s64x2; #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, 8, 9, 10, 0, 12, 13, 14, 1)); r_.i64 >>= 48; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i16[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_cvtepi16_epi64 #define _mm_cvtepi16_epi64(a) simde_mm_cvtepi16_epi64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cvtepi32_epi64 (simde__m128i a) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_cvtepi32_epi64(a); #elif defined(SIMDE_X86_SSE2_NATIVE) __m128i tmp = _mm_shuffle_epi32(a, 0x50); tmp = _mm_srai_epi32(tmp, 31); tmp = _mm_shuffle_epi32(tmp, 0xed); return _mm_unpacklo_epi32(a, tmp); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vmovl_s32(vget_low_s32(a_.neon_i32)); #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, a_.i32, -1, 0, -1, 1)); r_.i64 >>= 32; #elif defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i32[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_cvtepi32_epi64 #define _mm_cvtepi32_epi64(a) simde_mm_cvtepi32_epi64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cvtepu32_epi64 (simde__m128i a) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_cvtepu32_epi64(a); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpacklo_epi32(a, _mm_setzero_si128()); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u64 = vmovl_u32(vget_low_u32(a_.neon_u32)); #elif defined(SIMDE_VECTOR_SCALAR) && defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) __typeof__(r_.u32) z = { 0, }; r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 1, 6)); #elif defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.u32[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_cvtepu32_epi64 #define _mm_cvtepu32_epi64(a) simde_mm_cvtepu32_epi64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_dp_pd (simde__m128d a, simde__m128d b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); switch (imm8) { case 0xff: r_.neon_f64 = vaddq_f64(r_.neon_f64, vextq_f64(r_.neon_f64, r_.neon_f64, 1)); break; case 0x13: r_.neon_f64 = vdupq_lane_f64(vget_low_f64(r_.neon_f64), 0); break; default: { /* imm8 is a compile-time constant, so this all becomes just a load */ uint64_t mask_data[] = { (imm8 & (1 << 4)) ? ~UINT64_C(0) : UINT64_C(0), (imm8 & (1 << 5)) ? ~UINT64_C(0) : UINT64_C(0), }; r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); } r_.neon_f64 = vdupq_n_f64(vaddvq_f64(r_.neon_f64)); { uint64_t mask_data[] = { (imm8 & 1) ? ~UINT64_C(0) : UINT64_C(0), (imm8 & 2) ? ~UINT64_C(0) : UINT64_C(0) }; r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); } break; } #else simde_float64 sum = SIMDE_FLOAT64_C(0.0); SIMDE_VECTORIZE_REDUCTION(+:sum) for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { sum += ((imm8 >> (i + 4)) & 1) ? (a_.f64[i] * b_.f64[i]) : 0.0; } SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = ((imm8 >> i) & 1) ? sum : 0.0; } #endif return simde__m128d_from_private(r_); } #if defined(SIMDE_X86_SSE4_1_NATIVE) # define simde_mm_dp_pd(a, b, imm8) _mm_dp_pd(a, b, imm8) #endif #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_dp_pd #define _mm_dp_pd(a, b, imm8) simde_mm_dp_pd(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_dp_ps (simde__m128 a, simde__m128 b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); switch (imm8) { case 0xff: r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); break; case 0x7f: r_.neon_f32 = vsetq_lane_f32(0, r_.neon_f32, 3); r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); break; default: { { uint32_t mask_data[] = { (imm8 & (1 << 4)) ? ~UINT32_C(0) : UINT32_C(0), (imm8 & (1 << 5)) ? ~UINT32_C(0) : UINT32_C(0), (imm8 & (1 << 6)) ? ~UINT32_C(0) : UINT32_C(0), (imm8 & (1 << 7)) ? ~UINT32_C(0) : UINT32_C(0) }; r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); } r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); { uint32_t mask_data[] = { (imm8 & 1) ? ~UINT32_C(0) : UINT32_C(0), (imm8 & 2) ? ~UINT32_C(0) : UINT32_C(0), (imm8 & 4) ? ~UINT32_C(0) : UINT32_C(0), (imm8 & 8) ? ~UINT32_C(0) : UINT32_C(0) }; r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); } } break; } #else simde_float32 sum = SIMDE_FLOAT32_C(0.0); SIMDE_VECTORIZE_REDUCTION(+:sum) for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { sum += ((imm8 >> (i + 4)) & 1) ? (a_.f32[i] * b_.f32[i]) : SIMDE_FLOAT32_C(0.0); } SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = ((imm8 >> i) & 1) ? sum : SIMDE_FLOAT32_C(0.0); } #endif return simde__m128_from_private(r_); } #if defined(SIMDE_X86_SSE4_1_NATIVE) # define simde_mm_dp_ps(a, b, imm8) _mm_dp_ps(a, b, imm8) #endif #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_dp_ps #define _mm_dp_ps(a, b, imm8) simde_mm_dp_ps(a, b, imm8) #endif #if defined(simde_mm_extract_epi8) # undef simde_mm_extract_epi8 #endif SIMDE_FUNCTION_ATTRIBUTES int8_t simde_mm_extract_epi8 (simde__m128i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) #if defined(SIMDE_BUG_GCC_95227) (void) a_; (void) imm8; #endif return vec_extract(a_.altivec_i8, imm8); #else return a_.i8[imm8 & 15]; #endif } #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8) # define simde_mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(a, imm8)) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) # define simde_mm_extract_epi8(a, imm8) vgetq_lane_s8(simde__m128i_to_private(a).neon_i8, imm8) #endif #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_extract_epi8 #define _mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int, simde_mm_extract_epi8(a, imm8)) #endif #if defined(simde_mm_extract_epi32) # undef simde_mm_extract_epi32 #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_mm_extract_epi32 (simde__m128i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) #if defined(SIMDE_BUG_GCC_95227) (void) a_; (void) imm8; #endif return vec_extract(a_.altivec_i32, imm8); #else return a_.i32[imm8 & 3]; #endif } #if defined(SIMDE_X86_SSE4_1_NATIVE) # define simde_mm_extract_epi32(a, imm8) _mm_extract_epi32(a, imm8) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) # define simde_mm_extract_epi32(a, imm8) vgetq_lane_s32(simde__m128i_to_private(a).neon_i32, imm8) #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) # define simde_mm_extract_epi32(a, imm8) HEDLEY_STATIC_CAST(int32_t, vec_extract(simde__m128i_to_private(a).altivec_i32, imm8)) #endif #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_extract_epi32 #define _mm_extract_epi32(a, imm8) simde_mm_extract_epi32(a, imm8) #endif #if defined(simde_mm_extract_epi64) # undef simde_mm_extract_epi64 #endif SIMDE_FUNCTION_ATTRIBUTES int64_t simde_mm_extract_epi64 (simde__m128i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) #if defined(SIMDE_BUG_GCC_95227) (void) a_; (void) imm8; #endif return vec_extract(a_.altivec_i64, imm8); #else return a_.i64[imm8 & 1]; #endif } #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) # define simde_mm_extract_epi64(a, imm8) _mm_extract_epi64(a, imm8) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) # define simde_mm_extract_epi64(a, imm8) vgetq_lane_s64(simde__m128i_to_private(a).neon_i64, imm8) #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) # define simde_mm_extract_epi64(a, imm8) HEDLEY_STATIC_CAST(int64_t, vec_extract(simde__m128i_to_private(a).altivec_i64, imm8)) #endif #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_extract_epi64 #define _mm_extract_epi64(a, imm8) simde_mm_extract_epi64(a, imm8) #endif #if defined(simde_mm_extract_ps) # undef simde_mm_extract_ps #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_mm_extract_ps (simde__m128 a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { simde__m128_private a_ = simde__m128_to_private(a); return a_.i32[imm8 & 3]; } #if defined(SIMDE_X86_SSE4_1_NATIVE) #define simde_mm_extract_ps(a, imm8) _mm_extract_ps(a, imm8) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_mm_extract_ps(a, imm8) vgetq_lane_s32(simde__m128_to_private(a).neon_i32, imm8) #endif #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_extract_ps #define _mm_extract_ps(a, imm8) simde_mm_extract_ps(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_floor_pd (simde__m128d a) { return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_floor_pd #define _mm_floor_pd(a) simde_mm_floor_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_floor_ps (simde__m128 a) { return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_floor_ps #define _mm_floor_ps(a) simde_mm_floor_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_floor_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_floor_sd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(simde_math_floor) r_.f64[0] = simde_math_floor(b_.f64[0]); r_.f64[1] = a_.f64[1]; #else HEDLEY_UNREACHABLE(); #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_floor_sd #define _mm_floor_sd(a, b) simde_mm_floor_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_floor_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_floor_ss(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_floor_ps(b)); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(simde_math_floorf) r_.f32[0] = simde_math_floorf(b_.f32[0]); for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = a_.f32[i]; } #else HEDLEY_UNREACHABLE(); #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_floor_ss #define _mm_floor_ss(a, b) simde_mm_floor_ss(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_insert_epi8 (simde__m128i a, int i, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { simde__m128i_private r_ = simde__m128i_to_private(a); r_.i8[imm8] = HEDLEY_STATIC_CAST(int8_t, i); return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE4_1_NATIVE) /* clang-3.8 returns an incompatible type, so we need the cast. MSVC * can't handle the cast ("error C2440: 'type cast': cannot convert * from '__m128i' to '__m128i'"). */ #if defined(__clang__) #define simde_mm_insert_epi8(a, i, imm8) HEDLEY_STATIC_CAST(__m128i, _mm_insert_epi8(a, i, imm8)) #else #define simde_mm_insert_epi8(a, i, imm8) _mm_insert_epi8(a, i, imm8) #endif #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) # define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_neon_i8(vsetq_lane_s8(i, simde__m128i_to_private(a).i8, imm8)) #endif #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_insert_epi8 #define _mm_insert_epi8(a, i, imm8) simde_mm_insert_epi8(a, i, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_insert_epi32 (simde__m128i a, int i, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { simde__m128i_private r_ = simde__m128i_to_private(a); r_.i32[imm8] = HEDLEY_STATIC_CAST(int32_t, i); return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE4_1_NATIVE) #if defined(__clang__) #define simde_mm_insert_epi32(a, i, imm8) HEDLEY_STATIC_CAST(__m128i, _mm_insert_epi32(a, i, imm8)) #else #define simde_mm_insert_epi32(a, i, imm8) _mm_insert_epi32(a, i, imm8) #endif #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) # define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_neon_i32(vsetq_lane_s32(i, simde__m128i_to_private(a).i32, imm8)) #endif #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_insert_epi32 #define _mm_insert_epi32(a, i, imm8) simde_mm_insert_epi32(a, i, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_insert_epi64 (simde__m128i a, int64_t i, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { #if defined(SIMDE_BUG_GCC_94482) simde__m128i_private a_ = simde__m128i_to_private(a); switch(imm8) { case 0: return simde_mm_set_epi64x(a_.i64[1], i); break; case 1: return simde_mm_set_epi64x(i, a_.i64[0]); break; default: HEDLEY_UNREACHABLE(); break; } #else simde__m128i_private r_ = simde__m128i_to_private(a); r_.i64[imm8] = i; return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) # define simde_mm_insert_epi64(a, i, imm8) _mm_insert_epi64(a, i, imm8) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) # define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_neon_i64(vsetq_lane_s64(i, simde__m128i_to_private(a).i64, imm8)) #endif #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_insert_epi64 #define _mm_insert_epi64(a, i, imm8) simde_mm_insert_epi64(a, i, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_insert_ps (simde__m128 a, simde__m128 b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); a_.f32[0] = b_.f32[(imm8 >> 6) & 3]; a_.f32[(imm8 >> 4) & 3] = a_.f32[0]; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = (imm8 >> i) ? SIMDE_FLOAT32_C(0.0) : a_.f32[i]; } return simde__m128_from_private(r_); } #if defined(SIMDE_X86_SSE4_1_NATIVE) # define simde_mm_insert_ps(a, b, imm8) _mm_insert_ps(a, b, imm8) #endif #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_insert_ps #define _mm_insert_ps(a, b, imm8) simde_mm_insert_ps(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_max_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) return _mm_max_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i8x16_max(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i8 = vec_max(a_.altivec_i8, b_.altivec_i8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_max_epi8 #define _mm_max_epi8(a, b) simde_mm_max_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_max_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) return _mm_max_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_max(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i32 = vec_max(a_.altivec_i32, b_.altivec_i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_max_epi32 #define _mm_max_epi32(a, b) simde_mm_max_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_max_epu16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_max_epu16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u16x8_max(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_u16 = vec_max(a_.altivec_u16, b_.altivec_u16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = a_.u16[i] > b_.u16[i] ? a_.u16[i] : b_.u16[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_max_epu16 #define _mm_max_epu16(a, b) simde_mm_max_epu16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_max_epu32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_max_epu32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u32x4_max(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_u32 = vec_max(a_.altivec_u32, b_.altivec_u32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i] > b_.u32[i] ? a_.u32[i] : b_.u32[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_max_epu32 #define _mm_max_epu32(a, b) simde_mm_max_epu32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_min_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) return _mm_min_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i8x16_min(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_min_epi8 #define _mm_min_epi8(a, b) simde_mm_min_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_min_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) return _mm_min_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_min(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_min_epi32 #define _mm_min_epi32(a, b) simde_mm_min_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_min_epu16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_min_epu16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u16x8_min(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = a_.u16[i] < b_.u16[i] ? a_.u16[i] : b_.u16[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_min_epu16 #define _mm_min_epu16(a, b) simde_mm_min_epu16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_min_epu32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_min_epu32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u32x4_min(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i] < b_.u32[i] ? a_.u32[i] : b_.u32[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_min_epu32 #define _mm_min_epu32(a, b) simde_mm_min_epu32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_minpos_epu16 (simde__m128i a) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_minpos_epu16(a); #else simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()), a_ = simde__m128i_to_private(a); r_.u16[0] = UINT16_MAX; for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { if (a_.u16[i] < r_.u16[0]) { r_.u16[0] = a_.u16[i]; r_.u16[1] = HEDLEY_STATIC_CAST(uint16_t, i); } } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_minpos_epu16 #define _mm_minpos_epu16(a) simde_mm_minpos_epu16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mpsadbw_epu8 (simde__m128i a, simde__m128i b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); const int a_offset = imm8 & 4; const int b_offset = (imm8 & 3) << 2; #if defined(simde_math_abs) for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0]))) ; i++) { r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 0] - b_.u8[b_offset + 0]))) + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 1] - b_.u8[b_offset + 1]))) + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 2] - b_.u8[b_offset + 2]))) + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 3] - b_.u8[b_offset + 3]))); } #else HEDLEY_UNREACHABLE(); #endif return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE4_1_NATIVE) # define simde_mm_mpsadbw_epu8(a, b, imm8) _mm_mpsadbw_epu8(a, b, imm8) #endif #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_mpsadbw_epu8 #define _mm_mpsadbw_epu8(a, b, imm8) simde_mm_mpsadbw_epu8(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mul_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_mul_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) // vmull_s32 upcasts instead of masking, so we downcast. int32x2_t a_lo = vmovn_s64(a_.neon_i64); int32x2_t b_lo = vmovn_s64(b_.neon_i64); r_.neon_i64 = vmull_s32(a_lo, b_lo); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i64x2_make( wasm_i32x4_extract_lane(a_.wasm_v128, 0) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 0)), wasm_i32x4_extract_lane(a_.wasm_v128, 2) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 2))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_mul_epi32 #define _mm_mul_epi32(a, b) simde_mm_mul_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mullo_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_mullo_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vmulq_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) (void) a_; (void) b_; r_.altivec_i32 = vec_mul(a_.altivec_i32, b_.altivec_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_mul(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, (HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]))) & 0xffffffff)); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_mullo_epi32 #define _mm_mullo_epi32(a, b) simde_mm_mullo_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_mullo_epu32 (simde__m128i a, simde__m128i b) { simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vmulq_u32(a_.neon_u32, b_.neon_u32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u32 = a_.u32 * b_.u32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i] * b_.u32[i]; } #endif return simde__m128i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_packus_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_packus_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) const int32x4_t z = vdupq_n_s32(0); r_.neon_u16 = vcombine_u16( vqmovn_u32(vreinterpretq_u32_s32(vmaxq_s32(z, a_.neon_i32))), vqmovn_u32(vreinterpretq_u32_s32(vmaxq_s32(z, b_.neon_i32)))); #else for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.u16[i + 0] = (a_.i32[i] < 0) ? UINT16_C(0) : ((a_.i32[i] > UINT16_MAX) ? (UINT16_MAX) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[i])); r_.u16[i + 4] = (b_.i32[i] < 0) ? UINT16_C(0) : ((b_.i32[i] > UINT16_MAX) ? (UINT16_MAX) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[i])); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_packus_epi32 #define _mm_packus_epi32(a, b) simde_mm_packus_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_round_sd (simde__m128d a, simde__m128d b, int rounding) SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { simde__m128d_private r_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { #if defined(simde_math_nearbyint) case SIMDE_MM_FROUND_TO_NEAREST_INT: case SIMDE_MM_FROUND_CUR_DIRECTION: r_.f64[0] = simde_math_nearbyint(b_.f64[0]); break; #endif #if defined(simde_math_floor) case SIMDE_MM_FROUND_TO_NEG_INF: r_.f64[0] = simde_math_floor(b_.f64[0]); break; #endif #if defined(simde_math_ceil) case SIMDE_MM_FROUND_TO_POS_INF: r_.f64[0] = simde_math_ceil(b_.f64[0]); break; #endif #if defined(simde_math_trunc) case SIMDE_MM_FROUND_TO_ZERO: r_.f64[0] = simde_math_trunc(b_.f64[0]); break; #endif default: HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); } return simde__m128d_from_private(r_); } #if defined(SIMDE_X86_SSE4_1_NATIVE) # define simde_mm_round_sd(a, b, rounding) _mm_round_sd(a, b, rounding) #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) # define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(b, rounding)) #endif #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_round_sd #define _mm_round_sd(a, b, rounding) simde_mm_round_sd(a, b, rounding) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_round_ss (simde__m128 a, simde__m128 b, int rounding) SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { simde__m128_private r_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { #if defined(simde_math_nearbyintf) case SIMDE_MM_FROUND_TO_NEAREST_INT: case SIMDE_MM_FROUND_CUR_DIRECTION: r_.f32[0] = simde_math_nearbyintf(b_.f32[0]); break; #endif #if defined(simde_math_floorf) case SIMDE_MM_FROUND_TO_NEG_INF: r_.f32[0] = simde_math_floorf(b_.f32[0]); break; #endif #if defined(simde_math_ceilf) case SIMDE_MM_FROUND_TO_POS_INF: r_.f32[0] = simde_math_ceilf(b_.f32[0]); break; #endif #if defined(simde_math_truncf) case SIMDE_MM_FROUND_TO_ZERO: r_.f32[0] = simde_math_truncf(b_.f32[0]); break; #endif default: HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); } return simde__m128_from_private(r_); } #if defined(SIMDE_X86_SSE4_1_NATIVE) # define simde_mm_round_ss(a, b, rounding) _mm_round_ss(a, b, rounding) #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) # define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss(a, simde_mm_round_ps(b, rounding)) #endif #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_round_ss #define _mm_round_ss(a, b, rounding) simde_mm_round_ss(a, b, rounding) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_stream_load_si128 (const simde__m128i* mem_addr) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_stream_load_si128(HEDLEY_CONST_CAST(simde__m128i*, mem_addr)); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vreinterpretq_s64_s32(vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr))); #else return *mem_addr; #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_stream_load_si128 #define _mm_stream_load_si128(mem_addr) simde_mm_stream_load_si128(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_test_all_ones (simde__m128i a) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_test_all_ones(a); #else simde__m128i_private a_ = simde__m128i_to_private(a); int r; #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r = vec_all_eq(a_.altivec_i32, vec_splats(~0)); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) return r = ((vgetq_lane_s64(a_.neon_i64, 0) & vgetq_lane_s64(a_.neon_i64, 1)) == ~HEDLEY_STATIC_CAST(int64_t, 0)); #else int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); SIMDE_VECTORIZE_REDUCTION(&:r_) for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { r_ &= a_.i32f[i]; } r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); #endif return r; #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_test_all_ones #define _mm_test_all_ones(a) simde_mm_test_all_ones(a) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_test_all_zeros (simde__m128i a, simde__m128i mask) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_test_all_zeros(a, mask); #else simde__m128i_private tmp_ = simde__m128i_to_private(simde_mm_and_si128(a, mask)); int r; #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r = vec_all_eq(tmp_.altivec_i32, vec_splats(0)); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) return !(vgetq_lane_s64(tmp_.neon_i64, 0) | vgetq_lane_s64(tmp_.neon_i64, 1)); #else int_fast32_t r_ = HEDLEY_STATIC_CAST(int_fast32_t, 0); SIMDE_VECTORIZE_REDUCTION(|:r_) for (size_t i = 0 ; i < (sizeof(tmp_.i32f) / sizeof(tmp_.i32f[0])) ; i++) { r_ |= tmp_.i32f[i]; } r = !r_; #endif return r; #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_test_all_zeros #define _mm_test_all_zeros(a, mask) simde_mm_test_all_zeros(a, mask) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_test_mix_ones_zeros (simde__m128i a, simde__m128i mask) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_test_mix_ones_zeros(a, mask); #else simde__m128i_private a_ = simde__m128i_to_private(a), mask_ = simde__m128i_to_private(mask); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) int64x2_t s640 = vandq_s64(a_.neon_i64, mask_.neon_i64); int64x2_t s641 = vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a_.neon_i64))), mask_.neon_i64); return (((vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) & (vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)))!=0); #else for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) if (((a_.u64[i] & mask_.u64[i]) != 0) && ((~a_.u64[i] & mask_.u64[i]) != 0)) return 1; return 0; #endif #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_test_mix_ones_zeros #define _mm_test_mix_ones_zeros(a, mask) simde_mm_test_mix_ones_zeros(a, mask) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_testc_si128 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_testc_si128(a, b); #else simde__m128i_private a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) int64x2_t s64 = vandq_s64(~a_.neon_i64, b_.neon_i64); return !(vgetq_lane_s64(s64, 0) & vgetq_lane_s64(s64, 1)); #else int_fast32_t r = 0; SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { r |= ~a_.i32f[i] & b_.i32f[i]; } return HEDLEY_STATIC_CAST(int, !r); #endif #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_testc_si128 #define _mm_testc_si128(a, b) simde_mm_testc_si128(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_testnzc_si128 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_testnzc_si128(a, b); #else simde__m128i_private a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) int64x2_t s640 = vandq_s64(a_.neon_i64, b_.neon_i64); int64x2_t s641 = vandq_s64(~a_.neon_i64, b_.neon_i64); return (((vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) & (vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)))!=0); #else for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { if (((a_.u64[i] & b_.u64[i]) != 0) && ((~a_.u64[i] & b_.u64[i]) != 0)) return 1; } return 0; #endif #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_testnzc_si128 #define _mm_testnzc_si128(a, b) simde_mm_testnzc_si128(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_testz_si128 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_testz_si128(a, b); #else simde__m128i_private a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) int64x2_t s64 = vandq_s64(a_.neon_i64, b_.neon_i64); return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); #else for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { if ((a_.u64[i] & b_.u64[i]) == 0) return 1; } #endif return 0; #endif } #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #undef _mm_testz_si128 #define _mm_testz_si128(a, b) simde_mm_testz_si128(a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_SSE4_1_H) */ simde-0.7.2/simde/x86/sse4.2.h000066400000000000000000000271511400333146700155450ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2017 Evan Nemerson * 2020 Hidayat Khan */ #if !defined(SIMDE_X86_SSE4_2_H) #define SIMDE_X86_SSE4_2_H #include "sse4.1.h" #if defined(__ARM_ACLE) || (defined(__GNUC__) && defined(__ARM_FEATURE_CRC32)) #include #endif HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ #if defined(SIMDE_X86_SSE4_2_NATIVE) #define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS #define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS #define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS #define SIMDE_SIDD_SWORD_OPS _SIDD_SWORD_OPS #define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY #define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES #define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH #define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED #define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY #define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY #define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT #define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT #define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK #define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK #else #define SIMDE_SIDD_UBYTE_OPS 0x00 #define SIMDE_SIDD_UWORD_OPS 0x01 #define SIMDE_SIDD_SBYTE_OPS 0x02 #define SIMDE_SIDD_SWORD_OPS 0x03 #define SIMDE_SIDD_CMP_EQUAL_ANY 0x00 #define SIMDE_SIDD_CMP_RANGES 0x04 #define SIMDE_SIDD_CMP_EQUAL_EACH 0x08 #define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c #define SIMDE_SIDD_POSITIVE_POLARITY 0x00 #define SIMDE_SIDD_NEGATIVE_POLARITY 0x10 #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20 #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30 #define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00 #define SIMDE_SIDD_MOST_SIGNIFICANT 0x40 #define SIMDE_SIDD_BIT_MASK 0x00 #define SIMDE_SIDD_UNIT_MASK 0x40 #endif #if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) && !defined(_SIDD_UBYTE_OPS) #define _SIDD_UBYTE_OPS SIMDE_SIDD_UBYTE_OPS #define _SIDD_UWORD_OPS SIMDE_SIDD_UWORD_OPS #define _SIDD_SBYTE_OPS SIMDE_SIDD_SBYTE_OPS #define _SIDD_SWORD_OPS SIMDE_SIDD_SWORD_OPS #define _SIDD_CMP_EQUAL_ANY SIMDE_SIDD_CMP_EQUAL_ANY #define _SIDD_CMP_RANGES SIMDE_SIDD_CMP_RANGES #define _SIDD_CMP_EQUAL_EACH SIMDE_SIDD_CMP_EQUAL_EACH #define _SIDD_CMP_EQUAL_ORDERED SIMDE_SIDD_CMP_EQUAL_ORDERED #define _SIDD_POSITIVE_POLARITY SIMDE_SIDD_POSITIVE_POLARITY #define _SIDD_NEGATIVE_POLARITY SIMDE_SIDD_NEGATIVE_POLARITY #define _SIDD_MASKED_POSITIVE_POLARITY SIMDE_SIDD_MASKED_POSITIVE_POLARITY #define _SIDD_MASKED_NEGATIVE_POLARITY SIMDE_SIDD_MASKED_NEGATIVE_POLARITY #define _SIDD_LEAST_SIGNIFICANT SIMDE_SIDD_LEAST_SIGNIFICANT #define _SIDD_MOST_SIGNIFICANT SIMDE_SIDD_MOST_SIGNIFICANT #define _SIDD_BIT_MASK SIMDE_SIDD_BIT_MASK #define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_cmpestrs (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { #if !defined(HEDLEY_PGI_VERSION) /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ (void) a; (void) b; #endif (void) la; (void) lb; return la <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); } #if defined(SIMDE_X86_SSE4_2_NATIVE) #define simde_mm_cmpestrs(a, la, b, lb, imm8) _mm_cmpestrs(a, la, b, lb, imm8) #endif #if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) #undef _mm_cmpestrs #define _mm_cmpestrs(a, la, b, lb, imm8) simde_mm_cmpestrs(a, la, b, lb, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_cmpestrz (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { #if !defined(HEDLEY_PGI_VERSION) /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ (void) a; (void) b; #endif (void) la; (void) lb; return lb <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); } #if defined(SIMDE_X86_SSE4_2_NATIVE) #define simde_mm_cmpestrz(a, la, b, lb, imm8) _mm_cmpestrz(a, la, b, lb, imm8) #endif #if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) #undef _mm_cmpestrz #define _mm_cmpestrz(a, la, b, lb, imm8) simde_mm_cmpestrz(a, la, b, lb, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cmpgt_epi64 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE4_2_NATIVE) && 0 return _mm_cmpgt_epi64(a, b); #elif defined(SIMDE_X86_SSE2_NATIVE) /* https://stackoverflow.com/a/65175746/501126 */ __m128i r = _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_sub_epi64(b, a)); r = _mm_or_si128(r, _mm_cmpgt_epi32(a, b)); return _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_u64 = vcgtq_s64(a_.neon_i64, b_.neon_i64); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) /* https://stackoverflow.com/a/65223269/501126 */ r_.neon_i64 = vshrq_n_s64(vqsubq_s64(b_.neon_i64, a_.neon_i64), 63); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a_.altivec_i64, b_.altivec_i64)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) #undef _mm_cmpgt_epi64 #define _mm_cmpgt_epi64(a, b) simde_mm_cmpgt_epi64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_cmpistrs_8_(simde__m128i a) { simde__m128i_private a_= simde__m128i_to_private(a); const int upper_bound = (128 / 8) - 1; int a_invalid = 0; SIMDE_VECTORIZE for (int i = 0 ; i <= upper_bound ; i++) { if(!a_.i8[i]) a_invalid = 1; } return a_invalid; } SIMDE_FUNCTION_ATTRIBUTES int simde_mm_cmpistrs_16_(simde__m128i a) { simde__m128i_private a_= simde__m128i_to_private(a); const int upper_bound = (128 / 16) - 1; int a_invalid = 0; SIMDE_VECTORIZE for (int i = 0 ; i <= upper_bound ; i++) { if(!a_.i16[i]) a_invalid = 1; } return a_invalid; } #if defined(SIMDE_X86_SSE4_2_NATIVE) #define simde_mm_cmpistrs(a, b, imm8) _mm_cmpistrs(a, b, imm8) #else #define simde_mm_cmpistrs(a, b, imm8) \ (((imm8) & SIMDE_SIDD_UWORD_OPS) \ ? simde_mm_cmpistrs_16_((a)) \ : simde_mm_cmpistrs_8_((a))) #endif #if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) #undef _mm_cmpistrs #define _mm_cmpistrs(a, b, imm8) simde_mm_cmpistrs(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_cmpistrz_8_(simde__m128i b) { simde__m128i_private b_= simde__m128i_to_private(b); const int upper_bound = (128 / 8) - 1; int b_invalid = 0; SIMDE_VECTORIZE for (int i = 0 ; i <= upper_bound ; i++) { if(!b_.i8[i]) b_invalid = 1; } return b_invalid; } SIMDE_FUNCTION_ATTRIBUTES int simde_mm_cmpistrz_16_(simde__m128i b) { simde__m128i_private b_= simde__m128i_to_private(b); const int upper_bound = (128 / 16) - 1; int b_invalid = 0; SIMDE_VECTORIZE for (int i = 0 ; i <= upper_bound ; i++) { if(!b_.i16[i]) b_invalid = 1; } return b_invalid; } #if defined(SIMDE_X86_SSE4_2_NATIVE) #define simde_mm_cmpistrz(a, b, imm8) _mm_cmpistrz(a, b, imm8) #else #define simde_mm_cmpistrz(a, b, imm8) \ (((imm8) & SIMDE_SIDD_UWORD_OPS) \ ? simde_mm_cmpistrz_16_((b)) \ : simde_mm_cmpistrz_8_((b))) #endif #if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) #undef _mm_cmpistrz #define _mm_cmpistrz(a, b, imm8) simde_mm_cmpistrz(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES uint32_t simde_mm_crc32_u8(uint32_t prevcrc, uint8_t v) { #if defined(SIMDE_X86_SSE4_2_NATIVE) return _mm_crc32_u8(prevcrc, v); #else #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) return __crc32cb(prevcrc, v); #else uint32_t crc = prevcrc; crc ^= v; for(int bit = 0 ; bit < 8 ; bit++) { if (crc & 1) crc = (crc >> 1) ^ UINT32_C(0x82f63b78); else crc = (crc >> 1); } return crc; #endif #endif } #if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) #define _mm_crc32_u8(prevcrc, v) simde_mm_crc32_u8(prevcrc, v) #endif SIMDE_FUNCTION_ATTRIBUTES uint32_t simde_mm_crc32_u16(uint32_t prevcrc, uint16_t v) { #if defined(SIMDE_X86_SSE4_2_NATIVE) return _mm_crc32_u16(prevcrc, v); #else #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) return __crc32ch(prevcrc, v); #else uint32_t crc = prevcrc; crc = simde_mm_crc32_u8(crc, v & 0xff); crc = simde_mm_crc32_u8(crc, (v >> 8) & 0xff); return crc; #endif #endif } #if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) #define _mm_crc32_u16(prevcrc, v) simde_mm_crc32_u16(prevcrc, v) #endif SIMDE_FUNCTION_ATTRIBUTES uint32_t simde_mm_crc32_u32(uint32_t prevcrc, uint32_t v) { #if defined(SIMDE_X86_SSE4_2_NATIVE) return _mm_crc32_u32(prevcrc, v); #else #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) return __crc32cw(prevcrc, v); #else uint32_t crc = prevcrc; crc = simde_mm_crc32_u16(crc, v & 0xffff); crc = simde_mm_crc32_u16(crc, (v >> 16) & 0xffff); return crc; #endif #endif } #if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) #define _mm_crc32_u32(prevcrc, v) simde_mm_crc32_u32(prevcrc, v) #endif SIMDE_FUNCTION_ATTRIBUTES uint64_t simde_mm_crc32_u64(uint64_t prevcrc, uint64_t v) { #if defined(SIMDE_X86_SSE4_2_NATIVE) && defined(SIMDE_ARCH_AMD64) return _mm_crc32_u64(prevcrc, v); #else #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) return __crc32cd(HEDLEY_STATIC_CAST(uint32_t, prevcrc), v); #else uint64_t crc = prevcrc; crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), v & 0xffffffff); crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), (v >> 32) & 0xffffffff); return crc; #endif #endif } #if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) #define _mm_crc32_u64(prevcrc, v) simde_mm_crc32_u64(prevcrc, v) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_SSE4_2_H) */ simde-0.7.2/simde/x86/ssse3.h000066400000000000000000001052621400333146700155670ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2017-2020 Evan Nemerson */ #if !defined(SIMDE_X86_SSSE3_H) #define SIMDE_X86_SSSE3_H #include "sse3.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_abs_epi8 (simde__m128i a) { #if defined(SIMDE_X86_SSSE3_NATIVE) return _mm_abs_epi8(a); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vabsq_s8(a_.neon_i8); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i8 = vec_abs(a_.altivec_i8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_abs_epi8(a) simde_mm_abs_epi8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_abs_epi16 (simde__m128i a) { #if defined(SIMDE_X86_SSSE3_NATIVE) return _mm_abs_epi16(a); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vabsq_s16(a_.neon_i16); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i16 = vec_abs(a_.altivec_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_abs_epi16(a) simde_mm_abs_epi16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_abs_epi32 (simde__m128i a) { #if defined(SIMDE_X86_SSSE3_NATIVE) return _mm_abs_epi32(a); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vabsq_s32(a_.neon_i32); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i32 = vec_abs(a_.altivec_i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { #if defined(_MSC_VER) HEDLEY_DIAGNOSTIC_PUSH #pragma warning(disable:4146) #endif r_.u32[i] = (a_.i32[i] < 0) ? (- HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])) : HEDLEY_STATIC_CAST(uint32_t, a_.i32[i]); #if defined(_MSC_VER) HEDLEY_DIAGNOSTIC_POP #endif } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_abs_epi32(a) simde_mm_abs_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_abs_pi8 (simde__m64 a) { #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_abs_pi8(a); #else simde__m64_private r_, a_ = simde__m64_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vabs_s8(a_.neon_i8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); } #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_abs_pi8(a) simde_mm_abs_pi8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_abs_pi16 (simde__m64 a) { #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_abs_pi16(a); #else simde__m64_private r_, a_ = simde__m64_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vabs_s16(a_.neon_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); } #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_abs_pi16(a) simde_mm_abs_pi16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_abs_pi32 (simde__m64 a) { #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_abs_pi32(a); #else simde__m64_private r_, a_ = simde__m64_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vabs_s32(a_.neon_i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) ? (- a_.i32[i]) : a_.i32[i]); } #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_abs_pi32(a) simde_mm_abs_pi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count) SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); if (HEDLEY_UNLIKELY(count > 31)) return simde_mm_setzero_si128(); for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { const int srcpos = count + HEDLEY_STATIC_CAST(int, i); if (srcpos > 31) { r_.i8[i] = 0; } else if (srcpos > 15) { r_.i8[i] = a_.i8[(srcpos) & 15]; } else { r_.i8[i] = b_.i8[srcpos]; } } return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSSE3_NATIVE) #define simde_mm_alignr_epi8(a, b, count) _mm_alignr_epi8(a, b, count) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_mm_alignr_epi8(a, b, count) \ ( \ ((count) > 31) \ ? simde__m128i_from_neon_i8(vdupq_n_s8(0)) \ : ( \ ((count) > 15) \ ? (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(a), vdupq_n_s8(0), (count) & 15))) \ : (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(b), simde__m128i_to_neon_i8(a), ((count) & 15)))))) #endif #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) #define _mm_alignr_epi8(a, b, count) simde_mm_alignr_epi8(a, b, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_alignr_pi8 (simde__m64 a, simde__m64 b, const int count) SIMDE_REQUIRE_CONSTANT(count) { simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); if (HEDLEY_UNLIKELY(count > 15)) return simde_mm_setzero_si64(); for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { const int srcpos = count + HEDLEY_STATIC_CAST(int, i); if (srcpos > 15) { r_.i8[i] = 0; } else if (srcpos > 7) { r_.i8[i] = a_.i8[(srcpos) & 7]; } else { r_.i8[i] = b_.i8[srcpos]; } } return simde__m64_from_private(r_); } #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) # define simde_mm_alignr_pi8(a, b, count) _mm_alignr_pi8(a, b, count) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_mm_alignr_pi8(a, b, count) \ ( \ ((count) > 15) \ ? simde__m64_from_neon_i8(vdup_n_s8(0)) \ : ( \ ((count) > 7) \ ? (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(a), vdup_n_s8(0), (count) & 7))) \ : (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(b), simde__m64_to_neon_i8(a), ((count) & 7)))))) #endif #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_alignr_pi8(a, b, count) simde_mm_alignr_pi8(a, b, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSSE3_NATIVE) return _mm_shuffle_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i8 = vqtbl1q_s8(a_.neon_i8, vandq_u8(b_.neon_u8, vdupq_n_u8(0x8F))); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) /* Mask out the bits we're not interested in. vtbl will result in 0 * for any values outside of [0, 15], so if the high bit is set it * will return 0, just like in SSSE3. */ b_.neon_i8 = vandq_s8(b_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 15))); /* Convert a from an int8x16_t to an int8x8x2_t */ int8x8x2_t i; i.val[0] = vget_low_s8(a_.neon_i8); i.val[1] = vget_high_s8(a_.neon_i8); /* Table lookups */ int8x8_t l = vtbl2_s8(i, vget_low_s8(b_.neon_i8)); int8x8_t h = vtbl2_s8(i, vget_high_s8(b_.neon_i8)); r_.neon_i8 = vcombine_s8(l, h); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) /* This is a bit ugly because of the casts and the awful type * macros (SIMDE_POWER_ALTIVEC_VECTOR), but it's really just * vec_sel(vec_perm(a, a, b), 0, vec_cmplt(b, 0)) */ SIMDE_POWER_ALTIVEC_VECTOR(signed char) z = { 0, }; SIMDE_POWER_ALTIVEC_VECTOR(signed char) msb_mask = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(b_.altivec_i8, z)); SIMDE_POWER_ALTIVEC_VECTOR(signed char) c = vec_perm(a_.altivec_i8, a_.altivec_i8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), b_.altivec_i8)); r_.altivec_i8 = vec_sel(c, z, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), msb_mask)); #else for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a_.i8[b_.i8[i] & 15] & (~(b_.i8[i]) >> 7); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_shuffle_epi8(a, b) simde_mm_shuffle_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_shuffle_pi8 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_shuffle_pi8(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) b_.neon_i8 = vand_s8(b_.neon_i8, vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 7))); r_.neon_i8 = vtbl1_s8(a_.neon_i8, b_.neon_i8); #else for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.i8[i] = a_.i8[b_.i8[i] & 7] & (~(b_.i8[i]) >> 7); } #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_shuffle_pi8(a, b) simde_mm_shuffle_pi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSSE3_NATIVE) return _mm_hadd_epi16(a, b); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) return simde__m128i_from_neon_i16(vpaddq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b))); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); return simde__m128i_from_neon_i16(vaddq_s16(t.val[0], t.val[1])); #else return simde_mm_add_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); #endif } #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_hadd_epi16(a, b) simde_mm_hadd_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSSE3_NATIVE) return _mm_hadd_epi32(a, b); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) return simde__m128i_from_neon_i32(vpaddq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b))); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); return simde__m128i_from_neon_i32(vaddq_s32(t.val[0], t.val[1])); #else return simde_mm_add_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); #endif } #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_hadd_epi32(a, b) simde_mm_hadd_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_hadd_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_hadd_pi16(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i16 = vpadd_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); r_.neon_i16 = vadd_s16(t.val[0], t.val[1]); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); #else r_.i16[0] = a_.i16[0] + a_.i16[1]; r_.i16[1] = a_.i16[2] + a_.i16[3]; r_.i16[2] = b_.i16[0] + b_.i16[1]; r_.i16[3] = b_.i16[2] + b_.i16[3]; #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_hadd_pi16(a, b) simde_mm_hadd_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_hadd_pi32 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_hadd_pi32(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i32 = vpadd_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); r_.neon_i32 = vadd_s32(t.val[0], t.val[1]); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); #else r_.i32[0] = a_.i32[0] + a_.i32[1]; r_.i32[1] = b_.i32[0] + b_.i32[1]; #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_hadd_pi32(a, b) simde_mm_hadd_pi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSSE3_NATIVE) return _mm_hadds_epi16(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); return simde__m128i_from_neon_i16(vqaddq_s16(t.val[0], t.val[1])); #else return simde_mm_adds_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); #endif } #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_hadds_epi16(a, b) simde_mm_hadds_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_hadds_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_hadds_pi16(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); r_.neon_i16 = vqadd_s16(t.val[0], t.val[1]); #else for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN; int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]); r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN; } #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_hadds_pi16(a, b) simde_mm_hadds_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSSE3_NATIVE) return _mm_hsub_epi16(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); return simde__m128i_from_neon_i16(vsubq_s16(t.val[0], t.val[1])); #else return simde_mm_sub_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); #endif } #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_hsub_epi16(a, b) simde_mm_hsub_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSSE3_NATIVE) return _mm_hsub_epi32(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); return simde__m128i_from_neon_i32(vsubq_s32(t.val[0], t.val[1])); #else return simde_mm_sub_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); #endif } #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_hsub_epi32(a, b) simde_mm_hsub_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_hsub_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_hsub_pi16(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); r_.neon_i16 = vsub_s16(t.val[0], t.val[1]); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) - SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); #else r_.i16[0] = a_.i16[0] - a_.i16[1]; r_.i16[1] = a_.i16[2] - a_.i16[3]; r_.i16[2] = b_.i16[0] - b_.i16[1]; r_.i16[3] = b_.i16[2] - b_.i16[3]; #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_hsub_pi16(a, b) simde_mm_hsub_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_hsub_pi32 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_hsub_pi32(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); r_.neon_i32 = vsub_s32(t.val[0], t.val[1]); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) - SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); #else r_.i32[0] = a_.i32[0] - a_.i32[1]; r_.i32[1] = b_.i32[0] - b_.i32[1]; #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_hsub_pi32(a, b) simde_mm_hsub_pi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSSE3_NATIVE) return _mm_hsubs_epi16(a, b); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); return simde__m128i_from_neon_i16(vqsubq_s16(t.val[0], t.val[1])); #else return simde_mm_subs_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); #endif } #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_hsubs_epi16(a, b) simde_mm_hsubs_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_hsubs_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_hsubs_pi16(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); r_.neon_i16 = vqsub_s16(t.val[0], t.val[1]); #else for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { r_.i16[ i ] = simde_math_subs_i16(a_.i16[i * 2], a_.i16[(i * 2) + 1]); r_.i16[i + 2] = simde_math_subs_i16(b_.i16[i * 2], b_.i16[(i * 2) + 1]); } #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_hsubs_pi16(a, b) simde_mm_hsubs_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSSE3_NATIVE) return _mm_maddubs_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) /* Zero extend a */ int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a_.neon_u16, 8)); int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a_.neon_u16, vdupq_n_u16(0xff00))); /* Sign extend by shifting left then shifting right. */ int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b_.neon_i16, 8), 8); int16x8_t b_odd = vshrq_n_s16(b_.neon_i16, 8); /* multiply */ int16x8_t prod1 = vmulq_s16(a_even, b_even); int16x8_t prod2 = vmulq_s16(a_odd, b_odd); /* saturated add */ r_.neon_i16 = vqaddq_s16(prod1, prod2); #else for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { const int idx = HEDLEY_STATIC_CAST(int, i) << 1; int32_t ts = (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_maddubs_epi16(a, b) simde_mm_maddubs_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_maddubs_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_maddubs_pi16(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) int16x8_t ai = vreinterpretq_s16_u16(vmovl_u8(a_.neon_u8)); int16x8_t bi = vmovl_s8(b_.neon_i8); int16x8_t p = vmulq_s16(ai, bi); int16x4_t l = vget_low_s16(p); int16x4_t h = vget_high_s16(p); r_.neon_i16 = vqadd_s16(vuzp1_s16(l, h), vuzp2_s16(l, h)); #else for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { const int idx = HEDLEY_STATIC_CAST(int, i) << 1; int32_t ts = (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; } #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_maddubs_pi16(a, b) simde_mm_maddubs_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSSE3_NATIVE) return _mm_mulhrs_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) /* Multiply */ int32x4_t mul_lo = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); int32x4_t mul_hi = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); /* Rounding narrowing shift right * narrow = (int16_t)((mul + 16384) >> 15); */ int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15); int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15); /* Join together */ r_.neon_i16 = vcombine_s16(narrow_lo, narrow_hi); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_mulhrs_epi16(a, b) simde_mm_mulhrs_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_mulhrs_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_mulhrs_pi16(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) /* Multiply */ int32x4_t mul = vmull_s16(a_.neon_i16, b_.neon_i16); /* Rounding narrowing shift right * narrow = (int16_t)((mul + 16384) >> 15); */ int16x4_t narrow = vrshrn_n_s32(mul, 15); /* Join together */ r_.neon_i16 = narrow; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); } #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_mulhrs_pi16(a, b) simde_mm_mulhrs_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_sign_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSSE3_NATIVE) return _mm_sign_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint8x16_t aneg_mask = vreinterpretq_u8_s8(vshrq_n_s8(b_.neon_i8, 7)); uint8x16_t bnz_mask; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) bnz_mask = vceqzq_s8(b_.neon_i8); #else bnz_mask = vceqq_s8(b_.neon_i8, vdupq_n_s8(0)); #endif bnz_mask = vmvnq_u8(bnz_mask); r_.neon_i8 = vbslq_s8(aneg_mask, vnegq_s8(a_.neon_i8), vandq_s8(a_.neon_i8, vreinterpretq_s8_u8(bnz_mask))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) simde__m128i mask = wasm_i8x16_shr(b_.wasm_v128, 7); simde__m128i zeromask = simde_mm_cmpeq_epi8(b_.wasm_v128, simde_mm_setzero_si128()); r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi8(a_.wasm_v128, mask), mask)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_sign_epi8(a, b) simde_mm_sign_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_sign_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSSE3_NATIVE) return _mm_sign_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint16x8_t aneg_mask = vreinterpretq_u16_s16(vshrq_n_s16(b_.neon_i16, 15)); uint16x8_t bnz_mask; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) bnz_mask = vceqzq_s16(b_.neon_i16); #else bnz_mask = vceqq_s16(b_.neon_i16, vdupq_n_s16(0)); #endif bnz_mask = vmvnq_u16(bnz_mask); r_.neon_i16 = vbslq_s16(aneg_mask, vnegq_s16(a_.neon_i16), vandq_s16(a_.neon_i16, vreinterpretq_s16_u16(bnz_mask))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) simde__m128i mask = simde_mm_srai_epi16(b_.wasm_v128, 15); simde__m128i zeromask = simde_mm_cmpeq_epi16(b_.wasm_v128, simde_mm_setzero_si128()); r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi16(a_.wasm_v128, mask), mask)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] != 0) ? (a_.i16[i]) : INT16_C(0)); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_sign_epi16(a, b) simde_mm_sign_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_sign_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSSE3_NATIVE) return _mm_sign_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x4_t aneg_mask = vreinterpretq_u32_s32(vshrq_n_s32(b_.neon_i32, 31)); uint32x4_t bnz_mask; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) bnz_mask = vceqzq_s32(b_.neon_i32); #else bnz_mask = vceqq_s32(b_.neon_i32, vdupq_n_s32(0)); #endif bnz_mask = vmvnq_u32(bnz_mask); r_.neon_i32 = vbslq_s32(aneg_mask, vnegq_s32(a_.neon_i32), vandq_s32(a_.neon_i32, vreinterpretq_s32_u32(bnz_mask))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) simde__m128i mask = simde_mm_srai_epi32(b_.wasm_v128, 31); simde__m128i zeromask = simde_mm_cmpeq_epi32(b_.wasm_v128, simde_mm_setzero_si128()); r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi32(a_.wasm_v128, mask), mask)); #else for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] != 0) ? (a_.i32[i]) : INT32_C(0)); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_sign_epi32(a, b) simde_mm_sign_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_sign_pi8 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_sign_pi8(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint8x8_t aneg_mask = vreinterpret_u8_s8(vshr_n_s8(b_.neon_i8, 7)); uint8x8_t bnz_mask; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) bnz_mask = vceqz_s8(b_.neon_i8); #else bnz_mask = vceq_s8(b_.neon_i8, vdup_n_s8(0)); #endif bnz_mask = vmvn_u8(bnz_mask); r_.neon_i8 = vbsl_s8(aneg_mask, vneg_s8(a_.neon_i8), vand_s8(a_.neon_i8, vreinterpret_s8_u8(bnz_mask))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); } #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_sign_pi8(a, b) simde_mm_sign_pi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_sign_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_sign_pi16(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint16x4_t aneg_mask = vreinterpret_u16_s16(vshr_n_s16(b_.neon_i16, 15)); uint16x4_t bnz_mask; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) bnz_mask = vceqz_s16(b_.neon_i16); #else bnz_mask = vceq_s16(b_.neon_i16, vdup_n_s16(0)); #endif bnz_mask = vmvn_u16(bnz_mask); r_.neon_i16 = vbsl_s16(aneg_mask, vneg_s16(a_.neon_i16), vand_s16(a_.neon_i16, vreinterpret_s16_u16(bnz_mask))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0)); } #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_sign_pi16(a, b) simde_mm_sign_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_sign_pi32 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_sign_pi32(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x2_t aneg_mask = vreinterpret_u32_s32(vshr_n_s32(b_.neon_i32, 31)); uint32x2_t bnz_mask; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) bnz_mask = vceqz_s32(b_.neon_i32); #else bnz_mask = vceq_s32(b_.neon_i32, vdup_n_s32(0)); #endif bnz_mask = vmvn_u32(bnz_mask); r_.neon_i32 = vbsl_s32(aneg_mask, vneg_s32(a_.neon_i32), vand_s32(a_.neon_i32, vreinterpret_s32_u32(bnz_mask))); #else for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0)); } #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) # define _mm_sign_pi32(a, b) simde_mm_sign_pi32(a, b) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_SSE2_H) */ simde-0.7.2/simde/x86/svml.h000066400000000000000000014114341400333146700155120ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur */ #if !defined(SIMDE_X86_SVML_H) #define SIMDE_X86_SVML_H #include "fma.h" #include "avx2.h" #include "avx512/abs.h" #include "avx512/add.h" #include "avx512/cmp.h" #include "avx512/copysign.h" #include "avx512/xorsign.h" #include "avx512/div.h" #include "avx512/fmadd.h" #include "avx512/mov.h" #include "avx512/mul.h" #include "avx512/negate.h" #include "avx512/or.h" #include "avx512/set1.h" #include "avx512/setone.h" #include "avx512/setzero.h" #include "avx512/sqrt.h" #include "avx512/sub.h" #include "../simde-complex.h" #if !defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) # define SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES #endif HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ #if !defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) # define SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_acos_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_acos_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_acosf4_u10(a); #else return Sleef_acosf4_u35(a); #endif #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_acosf(a_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_acos_ps #define _mm_acos_ps(a) simde_mm_acos_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_acos_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_acos_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_acosd2_u10(a); #else return Sleef_acosd2_u35(a); #endif #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_acos(a_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_acos_pd #define _mm_acos_pd(a) simde_mm_acos_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_acos_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_acos_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_acosf8_u10(a); #else return Sleef_acosf8_u35(a); #endif #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_acos_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_acosf(a_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_acos_ps #define _mm256_acos_ps(a) simde_mm256_acos_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_acos_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_acos_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_acosd4_u10(a); #else return Sleef_acosd4_u35(a); #endif #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_acos_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_acos(a_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_acos_pd #define _mm256_acos_pd(a) simde_mm256_acos_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_acos_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_acos_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_acosf16_u10(a); #else return Sleef_acosf16_u35(a); #endif #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_acos_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_acosf(a_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_acos_ps #define _mm512_acos_ps(a) simde_mm512_acos_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_acos_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_acos_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_acosd8_u10(a); #else return Sleef_acosd8_u35(a); #endif #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_acos_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_acos(a_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_acos_pd #define _mm512_acos_pd(a) simde_mm512_acos_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_acos_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_acos_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_acos_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_acos_ps #define _mm512_mask_acos_ps(src, k, a) simde_mm512_mask_acos_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_acos_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_acos_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_acos_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_acos_pd #define _mm512_mask_acos_pd(src, k, a) simde_mm512_mask_acos_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_acosh_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_acosh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_acoshf4_u10(a); #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_acoshf(a_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_acosh_ps #define _mm_acosh_ps(a) simde_mm_acosh_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_acosh_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_acosh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_acoshd2_u10(a); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_acosh(a_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_acosh_pd #define _mm_acosh_pd(a) simde_mm_acosh_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_acosh_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_acosh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_acoshf8_u10(a); #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_acosh_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_acoshf(a_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_acosh_ps #define _mm256_acosh_ps(a) simde_mm256_acosh_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_acosh_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_acosh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_acoshd4_u10(a); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_acosh_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_acosh(a_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_acosh_pd #define _mm256_acosh_pd(a) simde_mm256_acosh_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_acosh_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_acosh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_acoshf16_u10(a); #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_acosh_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_acoshf(a_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_acosh_ps #define _mm512_acosh_ps(a) simde_mm512_acosh_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_acosh_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_acosh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_acoshd8_u10(a); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_acosh_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_acosh(a_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_acosh_pd #define _mm512_acosh_pd(a) simde_mm512_acosh_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_acosh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_acosh_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_acosh_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_acosh_ps #define _mm512_mask_acosh_ps(src, k, a) simde_mm512_mask_acosh_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_acosh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_acosh_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_acosh_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_acosh_pd #define _mm512_mask_acosh_pd(src, k, a) simde_mm512_mask_acosh_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_asin_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_asin_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_asinf4_u10(a); #else return Sleef_asinf4_u35(a); #endif #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_asinf(a_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_asin_ps #define _mm_asin_ps(a) simde_mm_asin_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_asin_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_asin_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_asind2_u10(a); #else return Sleef_asind2_u35(a); #endif #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_asin(a_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_asin_pd #define _mm_asin_pd(a) simde_mm_asin_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_asin_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_asin_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_asinf8_u10(a); #else return Sleef_asinf8_u35(a); #endif #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_asin_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_asinf(a_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_asin_ps #define _mm256_asin_ps(a) simde_mm256_asin_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_asin_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_asin_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_asind4_u10(a); #else return Sleef_asind4_u35(a); #endif #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_asin_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_asin(a_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_asin_pd #define _mm256_asin_pd(a) simde_mm256_asin_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_asin_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_asin_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_asinf16_u10(a); #else return Sleef_asinf16_u35(a); #endif #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_asin_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_asinf(a_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_asin_ps #define _mm512_asin_ps(a) simde_mm512_asin_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_asin_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_asin_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_asind8_u10(a); #else return Sleef_asind8_u35(a); #endif #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_asin_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_asin(a_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_asin_pd #define _mm512_asin_pd(a) simde_mm512_asin_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_asin_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_asin_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_asin_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_asin_ps #define _mm512_mask_asin_ps(src, k, a) simde_mm512_mask_asin_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_asin_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_asin_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_asin_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_asin_pd #define _mm512_mask_asin_pd(src, k, a) simde_mm512_mask_asin_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_asinh_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_asinh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_asinhf4_u10(a); #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_asinhf(a_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_asinh_ps #define _mm_asinh_ps(a) simde_mm_asinh_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_asinh_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_asinh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_asinhd2_u10(a); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_asinh(a_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_asinh_pd #define _mm_asinh_pd(a) simde_mm_asinh_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_asinh_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_asinh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_asinhf8_u10(a); #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_asinh_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_asinhf(a_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_asinh_ps #define _mm256_asinh_ps(a) simde_mm256_asinh_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_asinh_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_asinh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_asinhd4_u10(a); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_asinh_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_asinh(a_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_asinh_pd #define _mm256_asinh_pd(a) simde_mm256_asinh_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_asinh_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_asinh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_asinhf16_u10(a); #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_asinh_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_asinhf(a_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_asinh_ps #define _mm512_asinh_ps(a) simde_mm512_asinh_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_asinh_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_asinh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_asinhd8_u10(a); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_asinh_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_asinh(a_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_asinh_pd #define _mm512_asinh_pd(a) simde_mm512_asinh_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_asinh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_asinh_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_asinh_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_asinh_ps #define _mm512_mask_asinh_ps(src, k, a) simde_mm512_mask_asinh_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_asinh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_asinh_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_asinh_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_asinh_pd #define _mm512_mask_asinh_pd(src, k, a) simde_mm512_mask_asinh_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_atan_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_atan_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_atanf4_u10(a); #else return Sleef_atanf4_u35(a); #endif #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_atanf(a_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_atan_ps #define _mm_atan_ps(a) simde_mm_atan_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_atan_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_atan_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_atand2_u10(a); #else return Sleef_atand2_u35(a); #endif #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_atan(a_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_atan_pd #define _mm_atan_pd(a) simde_mm_atan_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_atan_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_atan_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_atanf8_u10(a); #else return Sleef_atanf8_u35(a); #endif #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_atan_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_atanf(a_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_atan_ps #define _mm256_atan_ps(a) simde_mm256_atan_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_atan_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_atan_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_atand4_u10(a); #else return Sleef_atand4_u35(a); #endif #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_atan_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_atan(a_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_atan_pd #define _mm256_atan_pd(a) simde_mm256_atan_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_atan_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_atan_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_atanf16_u10(a); #else return Sleef_atanf16_u35(a); #endif #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_atan_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_atanf(a_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_atan_ps #define _mm512_atan_ps(a) simde_mm512_atan_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_atan_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_atan_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_atand8_u10(a); #else return Sleef_atand8_u35(a); #endif #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_atan_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_atan(a_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_atan_pd #define _mm512_atan_pd(a) simde_mm512_atan_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_atan_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_atan_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_atan_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_atan_ps #define _mm512_mask_atan_ps(src, k, a) simde_mm512_mask_atan_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_atan_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_atan_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_atan_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_atan_pd #define _mm512_mask_atan_pd(src, k, a) simde_mm512_mask_atan_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_atan2_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_atan2_ps(a, b); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_atan2f4_u10(a, b); #else return Sleef_atan2f4_u35(a, b); #endif #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_atan2f(a_.f32[i], b_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_atan2_ps #define _mm_atan2_ps(a, b) simde_mm_atan2_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_atan2_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_atan2_pd(a, b); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_atan2d2_u10(a, b); #else return Sleef_atan2d2_u35(a, b); #endif #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_atan2(a_.f64[i], b_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_atan2_pd #define _mm_atan2_pd(a, b) simde_mm_atan2_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_atan2_ps (simde__m256 a, simde__m256 b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_atan2_ps(a, b); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_atan2f8_u10(a, b); #else return Sleef_atan2f8_u35(a, b); #endif #else simde__m256_private r_, a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_atan2_ps(a_.m128[i], b_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_atan2f(a_.f32[i], b_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_atan2_ps #define _mm256_atan2_ps(a, b) simde_mm256_atan2_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_atan2_pd (simde__m256d a, simde__m256d b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_atan2_pd(a, b); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_atan2d4_u10(a, b); #else return Sleef_atan2d4_u35(a, b); #endif #else simde__m256d_private r_, a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_atan2_pd(a_.m128d[i], b_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_atan2(a_.f64[i], b_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_atan2_pd #define _mm256_atan2_pd(a, b) simde_mm256_atan2_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_atan2_ps (simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_atan2_ps(a, b); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_atan2f16_u10(a, b); #else return Sleef_atan2f16_u35(a, b); #endif #else simde__m512_private r_, a_ = simde__m512_to_private(a), b_ = simde__m512_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_atan2_ps(a_.m256[i], b_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_atan2f(a_.f32[i], b_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_atan2_ps #define _mm512_atan2_ps(a, b) simde_mm512_atan2_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_atan2_pd (simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_atan2_pd(a, b); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_atan2d8_u10(a, b); #else return Sleef_atan2d8_u35(a, b); #endif #else simde__m512d_private r_, a_ = simde__m512d_to_private(a), b_ = simde__m512d_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_atan2_pd(a_.m256d[i], b_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_atan2(a_.f64[i], b_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_atan2_pd #define _mm512_atan2_pd(a, b) simde_mm512_atan2_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_atan2_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_atan2_ps(src, k, a, b); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_atan2_ps(a, b)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_atan2_ps #define _mm512_mask_atan2_ps(src, k, a, b) simde_mm512_mask_atan2_ps(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_atan2_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_atan2_pd(src, k, a, b); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_atan2_pd(a, b)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_atan2_pd #define _mm512_mask_atan2_pd(src, k, a, b) simde_mm512_mask_atan2_pd(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_atanh_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_atanh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_atanhf4_u10(a); #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_atanhf(a_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_atanh_ps #define _mm_atanh_ps(a) simde_mm_atanh_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_atanh_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_atanh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_atanhd2_u10(a); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_atanh(a_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_atanh_pd #define _mm_atanh_pd(a) simde_mm_atanh_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_atanh_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_atanh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_atanhf8_u10(a); #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_atanh_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_atanhf(a_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_atanh_ps #define _mm256_atanh_ps(a) simde_mm256_atanh_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_atanh_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_atanh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_atanhd4_u10(a); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_atanh_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_atanh(a_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_atanh_pd #define _mm256_atanh_pd(a) simde_mm256_atanh_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_atanh_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_atanh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_atanhf16_u10(a); #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_atanh_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_atanhf(a_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_atanh_ps #define _mm512_atanh_ps(a) simde_mm512_atanh_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_atanh_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_atanh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_atanhd8_u10(a); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_atanh_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_atanh(a_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_atanh_pd #define _mm512_atanh_pd(a) simde_mm512_atanh_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_atanh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_atanh_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_atanh_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_atanh_ps #define _mm512_mask_atanh_ps(src, k, a) simde_mm512_mask_atanh_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_atanh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_atanh_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_atanh_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_atanh_pd #define _mm512_mask_atanh_pd(src, k, a) simde_mm512_mask_atanh_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cbrt_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_cbrt_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_cbrtf4_u10(a); #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_cbrtf(a_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_cbrt_ps #define _mm_cbrt_ps(a) simde_mm_cbrt_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cbrt_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_cbrt_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_cbrtd2_u10(a); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_cbrt(a_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_cbrt_pd #define _mm_cbrt_pd(a) simde_mm_cbrt_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_cbrt_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_cbrt_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_cbrtf8_u10(a); #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_cbrt_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_cbrtf(a_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_cbrt_ps #define _mm256_cbrt_ps(a) simde_mm256_cbrt_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_cbrt_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_cbrt_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_cbrtd4_u10(a); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_cbrt_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_cbrt(a_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_cbrt_pd #define _mm256_cbrt_pd(a) simde_mm256_cbrt_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_cbrt_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_cbrt_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_cbrtf16_u10(a); #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_cbrt_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_cbrtf(a_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_cbrt_ps #define _mm512_cbrt_ps(a) simde_mm512_cbrt_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_cbrt_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_cbrt_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_cbrtd8_u10(a); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_cbrt_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_cbrt(a_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_cbrt_pd #define _mm512_cbrt_pd(a) simde_mm512_cbrt_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_cbrt_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_cbrt_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_cbrt_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cbrt_ps #define _mm512_mask_cbrt_ps(src, k, a) simde_mm512_mask_cbrt_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_cbrt_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_cbrt_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_cbrt_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cbrt_pd #define _mm512_mask_cbrt_pd(src, k, a) simde_mm512_mask_cbrt_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cexp_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_cexp_ps(a); #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) { simde_cfloat32 val = simde_math_cexpf(SIMDE_MATH_CMPLXF(a_.f32[i], a_.f32[i+1])); r_.f32[ i ] = simde_math_crealf(val); r_.f32[i + 1] = simde_math_cimagf(val); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_cexp_ps #define _mm_cexp_ps(a) simde_mm_cexp_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_cexp_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_cexp_ps(a); #else simde__m256_private r_, a_ = simde__m256_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) { simde_cfloat32 val = simde_math_cexpf(SIMDE_MATH_CMPLXF(a_.f32[i], a_.f32[i+1])); r_.f32[ i ] = simde_math_crealf(val); r_.f32[i + 1] = simde_math_cimagf(val); } return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_cexp_ps #define _mm256_cexp_ps(a) simde_mm256_cexp_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cos_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_cos_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_cosf4_u10(a); #else return Sleef_cosf4_u35(a); #endif #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_cosf(a_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_cos_ps #define _mm_cos_ps(a) simde_mm_cos_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cos_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_cos_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_cosd2_u10(a); #else return Sleef_cosd2_u35(a); #endif #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_cos(a_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_cos_pd #define _mm_cos_pd(a) simde_mm_cos_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_cos_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_cos_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_cosf8_u10(a); #else return Sleef_cosf8_u35(a); #endif #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_cos_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_cosf(a_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_cos_ps #define _mm256_cos_ps(a) simde_mm256_cos_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_cos_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_cos_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_cosd4_u10(a); #else return Sleef_cosd4_u35(a); #endif #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_cos_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_cos(a_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_cos_pd #define _mm256_cos_pd(a) simde_mm256_cos_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_cos_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_cos_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_cosf16_u10(a); #else return Sleef_cosf16_u35(a); #endif #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_cos_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_cosf(a_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_cos_ps #define _mm512_cos_ps(a) simde_mm512_cos_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_cos_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_cos_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_cosd8_u10(a); #else return Sleef_cosd8_u35(a); #endif #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_cos_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_cos(a_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_cos_pd #define _mm512_cos_pd(a) simde_mm512_cos_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_cos_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_cos_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_cos_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cos_ps #define _mm512_mask_cos_ps(src, k, a) simde_mm512_mask_cos_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_cos_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_cos_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_cos_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cos_pd #define _mm512_mask_cos_pd(src, k, a) simde_mm512_mask_cos_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_x_mm_deg2rad_ps(simde__m128 a) { #if SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_mm_mul_ps(a, simde_mm_set1_ps(SIMDE_MATH_PI_OVER_180F)); #else simde__m128_private r_, a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vmulq_n_f32(a_.neon_i32, SIMDE_MATH_PI_OVER_180F); #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) r_.f32 = a_.f32 * SIMDE_MATH_PI_OVER_180F; #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) const __typeof__(r_.f32) tmp = { SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F }; r_.f32 = a_.f32 * tmp; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_deg2radf(a_.f32[i]); } #endif return simde__m128_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_x_mm_deg2rad_pd(simde__m128d a) { #if SIMDE_NATURAL_VECTOR_SIZE_GE(128) return simde_mm_mul_pd(a, simde_mm_set1_pd(SIMDE_MATH_PI_OVER_180)); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vmulq_n_f64(a_.neon_i64, SIMDE_MATH_PI_OVER_180); #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) r_.f64 = a_.f64 * SIMDE_MATH_PI_OVER_180; #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) const __typeof__(r_.f64) tmp = { SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180 }; r_.f64 = a_.f64 * tmp; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_deg2rad(a_.f64[i]); } #endif return simde__m128d_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_x_mm256_deg2rad_ps(simde__m256 a) { #if SIMDE_NATURAL_VECTOR_SIZE_GE(256) return simde_mm256_mul_ps(a, simde_mm256_set1_ps(SIMDE_MATH_PI_OVER_180F)); #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_x_mm_deg2rad_ps(a_.m128[i]); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) r_.f32 = a_.f32 * SIMDE_MATH_PI_OVER_180F; #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) const __typeof__(r_.f32) tmp = { SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F }; r_.f32 = a_.f32 * tmp; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_deg2radf(a_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_x_mm256_deg2rad_pd(simde__m256d a) { #if SIMDE_NATURAL_VECTOR_SIZE_GE(256) return simde_mm256_mul_pd(a, simde_mm256_set1_pd(SIMDE_MATH_PI_OVER_180)); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_x_mm_deg2rad_pd(a_.m128d[i]); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) r_.f64 = a_.f64 * SIMDE_MATH_PI_OVER_180; #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) const __typeof__(r_.f64) tmp = { SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180 }; r_.f64 = a_.f64 * tmp; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_deg2rad(a_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_x_mm512_deg2rad_ps(simde__m512 a) { #if SIMDE_NATURAL_VECTOR_SIZE_GE(512) return simde_mm512_mul_ps(a, simde_mm512_set1_ps(SIMDE_MATH_PI_OVER_180F)); #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_x_mm256_deg2rad_ps(a_.m256[i]); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) r_.f32 = a_.f32 * SIMDE_MATH_PI_OVER_180F; #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) const __typeof__(r_.f32) tmp = { SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F }; r_.f32 = a_.f32 * tmp; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_deg2radf(a_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_x_mm512_deg2rad_pd(simde__m512d a) { #if SIMDE_NATURAL_VECTOR_SIZE_GE(512) return simde_mm512_mul_pd(a, simde_mm512_set1_pd(SIMDE_MATH_PI_OVER_180)); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_x_mm256_deg2rad_pd(a_.m256d[i]); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) r_.f64 = a_.f64 * SIMDE_MATH_PI_OVER_180; #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) const __typeof__(r_.f64) tmp = { SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180 }; r_.f64 = a_.f64 * tmp; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_deg2rad(a_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cosd_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_cosd_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_cosf4_u10(simde_x_mm_deg2rad_ps(a)); #else return Sleef_cosf4_u35(simde_x_mm_deg2rad_ps(a)); #endif #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_cosf(simde_math_deg2radf(a_.f32[i])); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_cosd_ps #define _mm_cosd_ps(a) simde_mm_cosd_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cosd_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_cosd_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_cosd2_u10(simde_x_mm_deg2rad_pd(a)); #else return Sleef_cosd2_u35(simde_x_mm_deg2rad_pd(a)); #endif #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_cos(simde_math_deg2rad(a_.f64[i])); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_cosd_pd #define _mm_cosd_pd(a) simde_mm_cosd_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_cosd_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_cosd_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_cosf8_u10(simde_x_mm256_deg2rad_ps(a)); #else return Sleef_cosf8_u35(simde_x_mm256_deg2rad_ps(a)); #endif #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_cosd_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_cosf(simde_math_deg2radf(a_.f32[i])); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_cosd_ps #define _mm256_cosd_ps(a) simde_mm256_cosd_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_cosd_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_cosd_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_cosd4_u10(simde_x_mm256_deg2rad_pd(a)); #else return Sleef_cosd4_u35(simde_x_mm256_deg2rad_pd(a)); #endif #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_cosd_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_cos(simde_math_deg2rad(a_.f64[i])); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_cosd_pd #define _mm256_cosd_pd(a) simde_mm256_cosd_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_cosd_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_cosd_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_cosf16_u10(simde_x_mm512_deg2rad_ps(a)); #else return Sleef_cosf16_u35(simde_x_mm512_deg2rad_ps(a)); #endif #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_cosd_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_cosf(simde_math_deg2radf(a_.f32[i])); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_cosd_ps #define _mm512_cosd_ps(a) simde_mm512_cosd_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_cosd_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_cosd_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_cosd8_u10(simde_x_mm512_deg2rad_pd(a)); #else return Sleef_cosd8_u35(simde_x_mm512_deg2rad_pd(a)); #endif #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_cosd_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_cos(simde_math_deg2rad(a_.f64[i])); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_cosd_pd #define _mm512_cosd_pd(a) simde_mm512_cosd_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_cosd_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_cosd_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_cosd_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cosd_ps #define _mm512_mask_cosd_ps(src, k, a) simde_mm512_mask_cosd_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_cosd_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_cosd_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_cosd_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cosd_pd #define _mm512_mask_cosd_pd(src, k, a) simde_mm512_mask_cosd_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cosh_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_cosh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_coshf4_u10(a); #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_coshf(a_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_cosh_ps #define _mm_cosh_ps(a) simde_mm_cosh_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cosh_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_cosh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_coshd2_u10(a); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_cosh(a_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_cosh_pd #define _mm_cosh_pd(a) simde_mm_cosh_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_cosh_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_cosh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_coshf8_u10(a); #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_cosh_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_coshf(a_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_cosh_ps #define _mm256_cosh_ps(a) simde_mm256_cosh_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_cosh_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_cosh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_coshd4_u10(a); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_cosh_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_cosh(a_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_cosh_pd #define _mm256_cosh_pd(a) simde_mm256_cosh_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_cosh_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_cosh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_coshf16_u10(a); #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_cosh_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_coshf(a_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_cosh_ps #define _mm512_cosh_ps(a) simde_mm512_cosh_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_cosh_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_cosh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_coshd8_u10(a); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_cosh_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_cosh(a_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_cosh_pd #define _mm512_cosh_pd(a) simde_mm512_cosh_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_cosh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_cosh_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_cosh_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cosh_ps #define _mm512_mask_cosh_ps(src, k, a) simde_mm512_mask_cosh_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_cosh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_cosh_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_cosh_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cosh_pd #define _mm512_mask_cosh_pd(src, k, a) simde_mm512_mask_cosh_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_div_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) return _mm_div_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = a_.i8 / b_.i8; #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i8x4_div(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a_.i8[i] / b_.i8[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_div_epi8 #define _mm_div_epi8(a, b) simde_mm_div_epi8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_div_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) return _mm_div_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = a_.i16 / b_.i16; #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x4_div(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[i] / b_.i16[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_div_epi16 #define _mm_div_epi16(a, b) simde_mm_div_epi16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_div_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) return _mm_div_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = a_.i32 / b_.i32; #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_div(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] / b_.i32[i]; } #endif return simde__m128i_from_private(r_); #endif } #define simde_mm_idiv_epi32(a, b) simde_mm_div_epi32(a, b) #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_div_epi32 #define _mm_div_epi32(a, b) simde_mm_div_epi32(a, b) #undef _mm_idiv_epi32 #define _mm_idiv_epi32(a, b) simde_mm_div_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_div_epi64 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) return _mm_div_epi64(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 / b_.i64; #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i64x4_div(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[i] / b_.i64[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_div_epi64 #define _mm_div_epi64(a, b) simde_mm_div_epi64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_div_epu8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) return _mm_div_epu8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u8 = a_.u8 / b_.u8; #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u8x16_div(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = a_.u8[i] / b_.u8[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_div_epu8 #define _mm_div_epu8(a, b) simde_mm_div_epu8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_div_epu16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) return _mm_div_epu16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u16 = a_.u16 / b_.u16; #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u16x16_div(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = a_.u16[i] / b_.u16[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_div_epu16 #define _mm_div_epu16(a, b) simde_mm_div_epu16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_div_epu32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) return _mm_div_epu32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u32 = a_.u32 / b_.u32; #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u32x16_div(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i] / b_.u32[i]; } #endif return simde__m128i_from_private(r_); #endif } #define simde_mm_udiv_epi32(a, b) simde_mm_div_epu32(a, b) #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_div_epu32 #define _mm_div_epu32(a, b) simde_mm_div_epu32(a, b) #undef _mm_udiv_epi32 #define _mm_udiv_epi32(a, b) simde_mm_div_epu32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_div_epu64 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) return _mm_div_epu64(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u64 = a_.u64 / b_.u64; #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u64x16_div(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = a_.u64[i] / b_.u64[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_div_epu64 #define _mm_div_epu64(a, b) simde_mm_div_epu64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_div_epi8 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_div_epi8(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = a_.i8 / b_.i8; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { r_.m128i[i] = simde_mm_div_epi8(a_.m128i[i], b_.m128i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a_.i8[i] / b_.i8[i]; } #endif #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_div_epi8 #define _mm256_div_epi8(a, b) simde_mm256_div_epi8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_div_epi16 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_div_epi16(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = a_.i16 / b_.i16; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { r_.m128i[i] = simde_mm_div_epi16(a_.m128i[i], b_.m128i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[i] / b_.i16[i]; } #endif #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_div_epi16 #define _mm256_div_epi16(a, b) simde_mm256_div_epi16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_div_epi32 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_div_epi32(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = a_.i32 / b_.i32; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { r_.m128i[i] = simde_mm_div_epi32(a_.m128i[i], b_.m128i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] / b_.i32[i]; } #endif #endif return simde__m256i_from_private(r_); #endif } #define simde_mm256_idiv_epi32(a, b) simde_mm256_div_epi32(a, b) #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_div_epi32 #define _mm256_div_epi32(a, b) simde_mm256_div_epi32(a, b) #undef _mm256_idiv_epi32 #define _mm256_idiv_epi32(a, b) simde_mm256_div_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_div_epi64 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_div_epi64(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 / b_.i64; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { r_.m128i[i] = simde_mm_div_epi64(a_.m128i[i], b_.m128i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[i] / b_.i64[i]; } #endif #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_div_epi64 #define _mm256_div_epi64(a, b) simde_mm256_div_epi64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_div_epu8 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_div_epu8(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u8 = a_.u8 / b_.u8; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { r_.m128i[i] = simde_mm_div_epu8(a_.m128i[i], b_.m128i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = a_.u8[i] / b_.u8[i]; } #endif #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_div_epu8 #define _mm256_div_epu8(a, b) simde_mm256_div_epu8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_div_epu16 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_div_epu16(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u16 = a_.u16 / b_.u16; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { r_.m128i[i] = simde_mm_div_epu16(a_.m128i[i], b_.m128i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = a_.u16[i] / b_.u16[i]; } #endif #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_div_epu16 #define _mm256_div_epu16(a, b) simde_mm256_div_epu16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_div_epu32 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_div_epu32(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u32 = a_.u32 / b_.u32; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { r_.m128i[i] = simde_mm_div_epu32(a_.m128i[i], b_.m128i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i] / b_.u32[i]; } #endif #endif return simde__m256i_from_private(r_); #endif } #define simde_mm256_udiv_epi32(a, b) simde_mm256_div_epu32(a, b) #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_div_epu32 #define _mm256_div_epu32(a, b) simde_mm256_div_epu32(a, b) #undef _mm256_udiv_epi32 #define _mm256_udiv_epi32(a, b) simde_mm256_div_epu32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_div_epu64 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_div_epu64(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u64 = a_.u64 / b_.u64; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { r_.m128i[i] = simde_mm_div_epu64(a_.m128i[i], b_.m128i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = a_.u64[i] / b_.u64[i]; } #endif #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_div_epu64 #define _mm256_div_epu64(a, b) simde_mm256_div_epu64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_div_epi8 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_div_epi8(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = a_.i8 / b_.i8; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_div_epi8(a_.m256i[i], b_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a_.i8[i] / b_.i8[i]; } #endif #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_div_epi8 #define _mm512_div_epi8(a, b) simde_mm512_div_epi8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_div_epi16 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_div_epi16(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = a_.i16 / b_.i16; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_div_epi16(a_.m256i[i], b_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[i] / b_.i16[i]; } #endif #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_div_epi16 #define _mm512_div_epi16(a, b) simde_mm512_div_epi16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_div_epi32 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_div_epi32(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = a_.i32 / b_.i32; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_div_epi32(a_.m256i[i], b_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] / b_.i32[i]; } #endif #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_div_epi32 #define _mm512_div_epi32(a, b) simde_mm512_div_epi32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_div_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_div_epi32(src, k, a, b); #else return simde_mm512_mask_mov_epi32(src, k, simde_mm512_div_epi32(a, b)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_div_epi32 #define _mm512_mask_div_epi32(src, k, a, b) simde_mm512_mask_div_epi32(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_div_epi64 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_div_epi64(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 / b_.i64; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_div_epi64(a_.m256i[i], b_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[i] / b_.i64[i]; } #endif #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_div_epi64 #define _mm512_div_epi64(a, b) simde_mm512_div_epi64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_div_epu8 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_div_epu8(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u8 = a_.u8 / b_.u8; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_div_epu8(a_.m256i[i], b_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = a_.u8[i] / b_.u8[i]; } #endif #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_div_epu8 #define _mm512_div_epu8(a, b) simde_mm512_div_epu8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_div_epu16 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_div_epu16(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u16 = a_.u16 / b_.u16; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_div_epu16(a_.m256i[i], b_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = a_.u16[i] / b_.u16[i]; } #endif #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_div_epu16 #define _mm512_div_epu16(a, b) simde_mm512_div_epu16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_div_epu32 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_div_epu32(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u32 = a_.u32 / b_.u32; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_div_epu32(a_.m256i[i], b_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i] / b_.u32[i]; } #endif #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_div_epu32 #define _mm512_div_epu32(a, b) simde_mm512_div_epu32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_div_epu32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_div_epu32(src, k, a, b); #else return simde_mm512_mask_mov_epi32(src, k, simde_mm512_div_epu32(a, b)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_div_epu32 #define _mm512_mask_div_epu32(src, k, a, b) simde_mm512_mask_div_epu32(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_div_epu64 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_div_epu64(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u64 = a_.u64 / b_.u64; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_div_epu64(a_.m256i[i], b_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = a_.u64[i] / b_.u64[i]; } #endif #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_div_epu64 #define _mm512_div_epu64(a, b) simde_mm512_div_epu64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_erf_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_erf_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_erff4_u10(a); #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_erff(a_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_erf_ps #define _mm_erf_ps(a) simde_mm_erf_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_erf_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_erf_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_erfd2_u10(a); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_erf(a_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_erf_pd #define _mm_erf_pd(a) simde_mm_erf_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_erf_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_erf_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_erff8_u10(a); #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_erf_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_erff(a_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_erf_ps #define _mm256_erf_ps(a) simde_mm256_erf_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_erf_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_erf_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_erfd4_u10(a); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_erf_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_erf(a_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_erf_pd #define _mm256_erf_pd(a) simde_mm256_erf_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_erf_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_erf_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_erff16_u10(a); #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_erf_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_erff(a_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_erf_ps #define _mm512_erf_ps(a) simde_mm512_erf_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_erf_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_erf_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_erfd8_u10(a); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_erf_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_erf(a_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_erf_pd #define _mm512_erf_pd(a) simde_mm512_erf_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_erf_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_erf_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_erf_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_erf_ps #define _mm512_mask_erf_ps(src, k, a) simde_mm512_mask_erf_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_erf_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_erf_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_erf_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_erf_pd #define _mm512_mask_erf_pd(src, k, a) simde_mm512_mask_erf_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_erfc_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_erfc_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_erfcf4_u15(a); #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_erfcf(a_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_erfc_ps #define _mm_erfc_ps(a) simde_mm_erfc_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_erfc_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_erfc_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_erfcd2_u15(a); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_erfc(a_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_erfc_pd #define _mm_erfc_pd(a) simde_mm_erfc_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_erfc_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_erfc_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_erfcf8_u15(a); #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_erfc_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_erfcf(a_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_erfc_ps #define _mm256_erfc_ps(a) simde_mm256_erfc_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_erfc_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_erfc_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_erfcd4_u15(a); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_erfc_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_erfc(a_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_erfc_pd #define _mm256_erfc_pd(a) simde_mm256_erfc_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_erfc_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_erfc_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_erfcf16_u15(a); #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_erfc_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_erfcf(a_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_erfc_ps #define _mm512_erfc_ps(a) simde_mm512_erfc_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_erfc_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_erfc_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_erfcd8_u15(a); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_erfc_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_erfc(a_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_erfc_pd #define _mm512_erfc_pd(a) simde_mm512_erfc_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_erfc_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_erfc_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_erfc_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_erfc_ps #define _mm512_mask_erfc_ps(src, k, a) simde_mm512_mask_erfc_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_erfc_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_erfc_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_erfc_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_erfc_pd #define _mm512_mask_erfc_pd(src, k, a) simde_mm512_mask_erfc_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_exp_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_exp_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_expf4_u10(a); #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_expf(a_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_exp_ps #define _mm_exp_ps(a) simde_mm_exp_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_exp_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_exp_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_expd2_u10(a); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_exp(a_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_exp_pd #define _mm_exp_pd(a) simde_mm_exp_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_exp_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_exp_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_expf8_u10(a); #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_exp_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_expf(a_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_exp_ps #define _mm256_exp_ps(a) simde_mm256_exp_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_exp_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_exp_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_expd4_u10(a); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_exp_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_exp(a_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_exp_pd #define _mm256_exp_pd(a) simde_mm256_exp_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_exp_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_exp_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_expf16_u10(a); #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_exp_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_expf(a_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_exp_ps #define _mm512_exp_ps(a) simde_mm512_exp_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_exp_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_exp_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_expd8_u10(a); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_exp_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_exp(a_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_exp_pd #define _mm512_exp_pd(a) simde_mm512_exp_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_exp_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_exp_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_exp_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_exp_ps #define _mm512_mask_exp_ps(src, k, a) simde_mm512_mask_exp_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_exp_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_exp_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_exp_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_exp_pd #define _mm512_mask_exp_pd(src, k, a) simde_mm512_mask_exp_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_expm1_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_expm1_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_expm1f4_u10(a); #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_expm1f(a_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_expm1_ps #define _mm_expm1_ps(a) simde_mm_expm1_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_expm1_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_expm1_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_expm1d2_u10(a); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_expm1(a_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_expm1_pd #define _mm_expm1_pd(a) simde_mm_expm1_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_expm1_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_expm1_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_expm1f8_u10(a); #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_expm1_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_expm1f(a_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_expm1_ps #define _mm256_expm1_ps(a) simde_mm256_expm1_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_expm1_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_expm1_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_expm1d4_u10(a); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_expm1_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_expm1(a_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_expm1_pd #define _mm256_expm1_pd(a) simde_mm256_expm1_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_expm1_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_expm1_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_expm1f16_u10(a); #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_expm1_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_expm1f(a_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_expm1_ps #define _mm512_expm1_ps(a) simde_mm512_expm1_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_expm1_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_expm1_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_expm1d8_u10(a); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_expm1_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_expm1(a_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_expm1_pd #define _mm512_expm1_pd(a) simde_mm512_expm1_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_expm1_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_expm1_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_expm1_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_expm1_ps #define _mm512_mask_expm1_ps(src, k, a) simde_mm512_mask_expm1_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_expm1_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_expm1_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_expm1_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_expm1_pd #define _mm512_mask_expm1_pd(src, k, a) simde_mm512_mask_expm1_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_exp2_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_exp2_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_exp2f4_u10(a); #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_exp2f(a_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_exp2_ps #define _mm_exp2_ps(a) simde_mm_exp2_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_exp2_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_exp2_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_exp2d2_u10(a); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_exp2(a_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_exp2_pd #define _mm_exp2_pd(a) simde_mm_exp2_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_exp2_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_exp2_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_exp2f8_u10(a); #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_exp2_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_exp2f(a_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_exp2_ps #define _mm256_exp2_ps(a) simde_mm256_exp2_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_exp2_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_exp2_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_exp2d4_u10(a); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_exp2_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_exp2(a_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_exp2_pd #define _mm256_exp2_pd(a) simde_mm256_exp2_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_exp2_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_exp2_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_exp2f16_u10(a); #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_exp2_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_exp2f(a_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_exp2_ps #define _mm512_exp2_ps(a) simde_mm512_exp2_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_exp2_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_exp2_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_exp2d8_u10(a); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_exp2_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_exp2(a_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_exp2_pd #define _mm512_exp2_pd(a) simde_mm512_exp2_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_exp2_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_exp2_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_exp2_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_exp2_ps #define _mm512_mask_exp2_ps(src, k, a) simde_mm512_mask_exp2_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_exp2_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_exp2_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_exp2_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_exp2_pd #define _mm512_mask_exp2_pd(src, k, a) simde_mm512_mask_exp2_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_exp10_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_exp10_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_exp10f4_u10(a); #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_exp10f(a_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_exp10_ps #define _mm_exp10_ps(a) simde_mm_exp10_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_exp10_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_exp10_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_exp10d2_u10(a); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_exp10(a_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_exp10_pd #define _mm_exp10_pd(a) simde_mm_exp10_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_exp10_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_exp10_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_exp10f8_u10(a); #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_exp10_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_exp10f(a_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_exp10_ps #define _mm256_exp10_ps(a) simde_mm256_exp10_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_exp10_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_exp10_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_exp10d4_u10(a); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_exp10_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_exp10(a_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_exp10_pd #define _mm256_exp10_pd(a) simde_mm256_exp10_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_exp10_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_exp10_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_exp10f16_u10(a); #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_exp10_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_exp10f(a_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_exp10_ps #define _mm512_exp10_ps(a) simde_mm512_exp10_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_exp10_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_exp10_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_exp10d8_u10(a); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_exp10_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_exp10(a_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_exp10_pd #define _mm512_exp10_pd(a) simde_mm512_exp10_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_exp10_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_exp10_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_exp10_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_exp10_ps #define _mm512_mask_exp10_ps(src, k, a) simde_mm512_mask_exp10_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_exp10_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_exp10_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_exp10_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_exp10_pd #define _mm512_mask_exp10_pd(src, k, a) simde_mm512_mask_exp10_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cdfnorm_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_cdfnorm_ps(a); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) /* https://www.johndcook.com/blog/cpp_phi/ */ const simde__m128 a1 = simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.254829592)); const simde__m128 a2 = simde_mm_set1_ps(SIMDE_FLOAT32_C(-0.284496736)); const simde__m128 a3 = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.421413741)); const simde__m128 a4 = simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.453152027)); const simde__m128 a5 = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.061405429)); const simde__m128 p = simde_mm_set1_ps(SIMDE_FLOAT32_C(0.3275911)); const simde__m128 one = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)); /* simde_math_fabsf(x) / sqrtf(2.0) */ const simde__m128 x = simde_mm_div_ps(simde_x_mm_abs_ps(a), simde_mm_sqrt_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0)))); /* 1.0 / (1.0 + p * x) */ const simde__m128 t = simde_mm_div_ps(one, simde_mm_add_ps(one, simde_mm_mul_ps(p, x))); /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */ simde__m128 y = simde_mm_mul_ps(a5, t); y = simde_mm_add_ps(y, a4); y = simde_mm_mul_ps(y, t); y = simde_mm_add_ps(y, a3); y = simde_mm_mul_ps(y, t); y = simde_mm_add_ps(y, a2); y = simde_mm_mul_ps(y, t); y = simde_mm_add_ps(y, a1); y = simde_mm_mul_ps(y, t); y = simde_mm_mul_ps(y, simde_mm_exp_ps(simde_mm_mul_ps(x, simde_x_mm_negate_ps(x)))); y = simde_mm_sub_ps(one, y); /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */ return simde_mm_mul_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(0.5)), simde_mm_add_ps(one, simde_x_mm_xorsign_ps(y, a))); #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_cdfnormf(a_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_cdfnorm_ps #define _mm_cdfnorm_ps(a) simde_mm_cdfnorm_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cdfnorm_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_cdfnorm_pd(a); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) /* https://www.johndcook.com/blog/cpp_phi/ */ const simde__m128d a1 = simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.254829592)); const simde__m128d a2 = simde_mm_set1_pd(SIMDE_FLOAT64_C(-0.284496736)); const simde__m128d a3 = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.421413741)); const simde__m128d a4 = simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.453152027)); const simde__m128d a5 = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.061405429)); const simde__m128d p = simde_mm_set1_pd(SIMDE_FLOAT64_C(0.6475911)); const simde__m128d one = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)); /* simde_math_fabs(x) / sqrt(2.0) */ const simde__m128d x = simde_mm_div_pd(simde_x_mm_abs_pd(a), simde_mm_sqrt_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(2.0)))); /* 1.0 / (1.0 + p * x) */ const simde__m128d t = simde_mm_div_pd(one, simde_mm_add_pd(one, simde_mm_mul_pd(p, x))); /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */ simde__m128d y = simde_mm_mul_pd(a5, t); y = simde_mm_add_pd(y, a4); y = simde_mm_mul_pd(y, t); y = simde_mm_add_pd(y, a3); y = simde_mm_mul_pd(y, t); y = simde_mm_add_pd(y, a2); y = simde_mm_mul_pd(y, t); y = simde_mm_add_pd(y, a1); y = simde_mm_mul_pd(y, t); y = simde_mm_mul_pd(y, simde_mm_exp_pd(simde_mm_mul_pd(x, simde_x_mm_negate_pd(x)))); y = simde_mm_sub_pd(one, y); /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */ return simde_mm_mul_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(0.5)), simde_mm_add_pd(one, simde_x_mm_xorsign_pd(y, a))); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_cdfnorm(a_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_cdfnorm_pd #define _mm_cdfnorm_pd(a) simde_mm_cdfnorm_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_cdfnorm_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_cdfnorm_ps(a); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) /* https://www.johndcook.com/blog/cpp_phi/ */ const simde__m256 a1 = simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.254829592)); const simde__m256 a2 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(-0.284496736)); const simde__m256 a3 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.421413741)); const simde__m256 a4 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.453152027)); const simde__m256 a5 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.061405429)); const simde__m256 p = simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.3275911)); const simde__m256 one = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)); /* simde_math_fabsf(x) / sqrtf(2.0) */ const simde__m256 x = simde_mm256_div_ps(simde_x_mm256_abs_ps(a), simde_mm256_sqrt_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0)))); /* 1.0 / (1.0 + p * x) */ const simde__m256 t = simde_mm256_div_ps(one, simde_mm256_add_ps(one, simde_mm256_mul_ps(p, x))); /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */ simde__m256 y = simde_mm256_mul_ps(a5, t); y = simde_mm256_add_ps(y, a4); y = simde_mm256_mul_ps(y, t); y = simde_mm256_add_ps(y, a3); y = simde_mm256_mul_ps(y, t); y = simde_mm256_add_ps(y, a2); y = simde_mm256_mul_ps(y, t); y = simde_mm256_add_ps(y, a1); y = simde_mm256_mul_ps(y, t); y = simde_mm256_mul_ps(y, simde_mm256_exp_ps(simde_mm256_mul_ps(x, simde_x_mm256_negate_ps(x)))); y = simde_mm256_sub_ps(one, y); /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */ return simde_mm256_mul_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.5)), simde_mm256_add_ps(one, simde_x_mm256_xorsign_ps(y, a))); #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_cdfnorm_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_cdfnormf(a_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_cdfnorm_ps #define _mm256_cdfnorm_ps(a) simde_mm256_cdfnorm_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_cdfnorm_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_cdfnorm_pd(a); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) /* https://www.johndcook.com/blog/cpp_phi/ */ const simde__m256d a1 = simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.254829592)); const simde__m256d a2 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(-0.284496736)); const simde__m256d a3 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.421413741)); const simde__m256d a4 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.453152027)); const simde__m256d a5 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.061405429)); const simde__m256d p = simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.6475911)); const simde__m256d one = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)); /* simde_math_fabs(x) / sqrt(2.0) */ const simde__m256d x = simde_mm256_div_pd(simde_x_mm256_abs_pd(a), simde_mm256_sqrt_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(2.0)))); /* 1.0 / (1.0 + p * x) */ const simde__m256d t = simde_mm256_div_pd(one, simde_mm256_add_pd(one, simde_mm256_mul_pd(p, x))); /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */ simde__m256d y = simde_mm256_mul_pd(a5, t); y = simde_mm256_add_pd(y, a4); y = simde_mm256_mul_pd(y, t); y = simde_mm256_add_pd(y, a3); y = simde_mm256_mul_pd(y, t); y = simde_mm256_add_pd(y, a2); y = simde_mm256_mul_pd(y, t); y = simde_mm256_add_pd(y, a1); y = simde_mm256_mul_pd(y, t); y = simde_mm256_mul_pd(y, simde_mm256_exp_pd(simde_mm256_mul_pd(x, simde_x_mm256_negate_pd(x)))); y = simde_mm256_sub_pd(one, y); /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */ return simde_mm256_mul_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.5)), simde_mm256_add_pd(one, simde_x_mm256_xorsign_pd(y, a))); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_cdfnorm_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_cdfnorm(a_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_cdfnorm_pd #define _mm256_cdfnorm_pd(a) simde_mm256_cdfnorm_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_cdfnorm_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_cdfnorm_ps(a); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) /* https://www.johndcook.com/blog/cpp_phi/ */ const simde__m512 a1 = simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.254829592)); const simde__m512 a2 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(-0.284496736)); const simde__m512 a3 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.421413741)); const simde__m512 a4 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.453152027)); const simde__m512 a5 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.061405429)); const simde__m512 p = simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.3275911)); const simde__m512 one = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)); /* simde_math_fabsf(x) / sqrtf(2.0) */ const simde__m512 x = simde_mm512_div_ps(simde_mm512_abs_ps(a), simde_mm512_sqrt_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(2.0)))); /* 1.0 / (1.0 + p * x) */ const simde__m512 t = simde_mm512_div_ps(one, simde_mm512_add_ps(one, simde_mm512_mul_ps(p, x))); /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */ simde__m512 y = simde_mm512_mul_ps(a5, t); y = simde_mm512_add_ps(y, a4); y = simde_mm512_mul_ps(y, t); y = simde_mm512_add_ps(y, a3); y = simde_mm512_mul_ps(y, t); y = simde_mm512_add_ps(y, a2); y = simde_mm512_mul_ps(y, t); y = simde_mm512_add_ps(y, a1); y = simde_mm512_mul_ps(y, t); y = simde_mm512_mul_ps(y, simde_mm512_exp_ps(simde_mm512_mul_ps(x, simde_x_mm512_negate_ps(x)))); y = simde_mm512_sub_ps(one, y); /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */ return simde_mm512_mul_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.5)), simde_mm512_add_ps(one, simde_x_mm512_xorsign_ps(y, a))); #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_cdfnorm_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_cdfnormf(a_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_cdfnorm_ps #define _mm512_cdfnorm_ps(a) simde_mm512_cdfnorm_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_cdfnorm_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_cdfnorm_pd(a); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) /* https://www.johndcook.com/blog/cpp_phi/ */ const simde__m512d a1 = simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.254829592)); const simde__m512d a2 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(-0.284496736)); const simde__m512d a3 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.421413741)); const simde__m512d a4 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.453152027)); const simde__m512d a5 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.061405429)); const simde__m512d p = simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.6475911)); const simde__m512d one = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)); /* simde_math_fabs(x) / sqrt(2.0) */ const simde__m512d x = simde_mm512_div_pd(simde_mm512_abs_pd(a), simde_mm512_sqrt_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(2.0)))); /* 1.0 / (1.0 + p * x) */ const simde__m512d t = simde_mm512_div_pd(one, simde_mm512_add_pd(one, simde_mm512_mul_pd(p, x))); /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */ simde__m512d y = simde_mm512_mul_pd(a5, t); y = simde_mm512_add_pd(y, a4); y = simde_mm512_mul_pd(y, t); y = simde_mm512_add_pd(y, a3); y = simde_mm512_mul_pd(y, t); y = simde_mm512_add_pd(y, a2); y = simde_mm512_mul_pd(y, t); y = simde_mm512_add_pd(y, a1); y = simde_mm512_mul_pd(y, t); y = simde_mm512_mul_pd(y, simde_mm512_exp_pd(simde_mm512_mul_pd(x, simde_x_mm512_negate_pd(x)))); y = simde_mm512_sub_pd(one, y); /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */ return simde_mm512_mul_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.5)), simde_mm512_add_pd(one, simde_x_mm512_xorsign_pd(y, a))); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_cdfnorm_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_cdfnorm(a_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_cdfnorm_pd #define _mm512_cdfnorm_pd(a) simde_mm512_cdfnorm_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_cdfnorm_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_cdfnorm_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_cdfnorm_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cdfnorm_ps #define _mm512_mask_cdfnorm_ps(src, k, a) simde_mm512_mask_cdfnorm_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_cdfnorm_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_cdfnorm_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_cdfnorm_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cdfnorm_pd #define _mm512_mask_cdfnorm_pd(src, k, a) simde_mm512_mask_cdfnorm_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_idivrem_epi32 (simde__m128i* mem_addr, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) return _mm_idivrem_epi32(HEDLEY_REINTERPRET_CAST(__m128i*, mem_addr), a, b); #else simde__m128i r; r = simde_mm_div_epi32(a, b); *mem_addr = simde_mm_sub_epi32(a, simde_mm_mullo_epi32(r, b)); return r; #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_idivrem_epi32 #define _mm_idivrem_epi32(mem_addr, a, b) simde_mm_idivrem_epi32((mem_addr),(a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_idivrem_epi32 (simde__m256i* mem_addr, simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_idivrem_epi32(HEDLEY_REINTERPRET_CAST(__m256i*, mem_addr), a, b); #else simde__m256i r; r = simde_mm256_div_epi32(a, b); *mem_addr = simde_mm256_sub_epi32(a, simde_mm256_mullo_epi32(r, b)); return r; #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_idivrem_epi32 #define _mm256_idivrem_epi32(mem_addr, a, b) simde_mm256_idivrem_epi32((mem_addr),(a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_hypot_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_hypot_ps(a, b); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_hypotf4_u05(a, b); #else return Sleef_hypotf4_u35(a, b); #endif #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_hypotf(a_.f32[i], b_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_hypot_ps #define _mm_hypot_ps(a, b) simde_mm_hypot_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_hypot_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_hypot_pd(a, b); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_hypotd2_u05(a, b); #else return Sleef_hypotd2_u35(a, b); #endif #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_hypot(a_.f64[i], b_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_hypot_pd #define _mm_hypot_pd(a, b) simde_mm_hypot_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_hypot_ps (simde__m256 a, simde__m256 b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_hypot_ps(a, b); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_hypotf8_u05(a, b); #else return Sleef_hypotf8_u35(a, b); #endif #else simde__m256_private r_, a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_hypot_ps(a_.m128[i], b_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_hypotf(a_.f32[i], b_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_hypot_ps #define _mm256_hypot_ps(a, b) simde_mm256_hypot_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_hypot_pd (simde__m256d a, simde__m256d b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_hypot_pd(a, b); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_hypotd4_u05(a, b); #else return Sleef_hypotd4_u35(a, b); #endif #else simde__m256d_private r_, a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_hypot_pd(a_.m128d[i], b_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_hypot(a_.f64[i], b_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_hypot_pd #define _mm256_hypot_pd(a, b) simde_mm256_hypot_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_hypot_ps (simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_hypot_ps(a, b); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_hypotf16_u05(a, b); #else return Sleef_hypotf16_u35(a, b); #endif #else simde__m512_private r_, a_ = simde__m512_to_private(a), b_ = simde__m512_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_hypot_ps(a_.m256[i], b_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_hypotf(a_.f32[i], b_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_hypot_ps #define _mm512_hypot_ps(a, b) simde_mm512_hypot_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_hypot_pd (simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_hypot_pd(a, b); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_hypotd8_u05(a, b); #else return Sleef_hypotd8_u35(a, b); #endif #else simde__m512d_private r_, a_ = simde__m512d_to_private(a), b_ = simde__m512d_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_hypot_pd(a_.m256d[i], b_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_hypot(a_.f64[i], b_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_hypot_pd #define _mm512_hypot_pd(a, b) simde_mm512_hypot_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_hypot_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_hypot_ps(src, k, a, b); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_hypot_ps(a, b)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_hypot_ps #define _mm512_mask_hypot_ps(src, k, a, b) simde_mm512_mask_hypot_ps(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_hypot_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_hypot_pd(src, k, a, b); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_hypot_pd(a, b)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_hypot_pd #define _mm512_mask_hypot_pd(src, k, a, b) simde_mm512_mask_hypot_pd(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_invcbrt_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_invcbrt_ps(a); #else return simde_mm_rcp_ps(simde_mm_cbrt_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_invcbrt_ps #define _mm_invcbrt_ps(a) simde_mm_invcbrt_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_invcbrt_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_invcbrt_pd(a); #else return simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm_cbrt_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_invcbrt_pd #define _mm_invcbrt_pd(a) simde_mm_invcbrt_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_invcbrt_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_invcbrt_ps(a); #else return simde_mm256_rcp_ps(simde_mm256_cbrt_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_invcbrt_ps #define _mm256_invcbrt_ps(a) simde_mm256_invcbrt_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_invcbrt_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_invcbrt_pd(a); #else return simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm256_cbrt_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_invcbrt_pd #define _mm256_invcbrt_pd(a) simde_mm256_invcbrt_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_invsqrt_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_invsqrt_ps(a); #else return simde_mm_rcp_ps(simde_mm_sqrt_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_invsqrt_ps #define _mm_invsqrt_ps(a) simde_mm_invsqrt_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_invsqrt_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_invsqrt_pd(a); #else return simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm_sqrt_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_invsqrt_pd #define _mm_invsqrt_pd(a) simde_mm_invsqrt_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_invsqrt_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_invsqrt_ps(a); #else return simde_mm256_rcp_ps(simde_mm256_sqrt_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_invsqrt_ps #define _mm256_invsqrt_ps(a) simde_mm256_invsqrt_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_invsqrt_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_invsqrt_pd(a); #else return simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm256_sqrt_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_invsqrt_pd #define _mm256_invsqrt_pd(a) simde_mm256_invsqrt_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_invsqrt_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_invsqrt_ps(a); #else return simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), simde_mm512_sqrt_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_invsqrt_ps #define _mm512_invsqrt_ps(a) simde_mm512_invsqrt_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_invsqrt_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_invsqrt_pd(a); #else return simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm512_sqrt_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_invsqrt_pd #define _mm512_invsqrt_pd(a) simde_mm512_invsqrt_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_invsqrt_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_invsqrt_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_invsqrt_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_invsqrt_ps #define _mm512_mask_invsqrt_ps(src, k, a) simde_mm512_mask_invsqrt_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_invsqrt_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_invsqrt_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_invsqrt_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_invsqrt_pd #define _mm512_mask_invsqrt_pd(src, k, a) simde_mm512_mask_invsqrt_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_log_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_log_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_logf4_u10(a); #else return Sleef_logf4_u35(a); #endif #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_logf(a_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_log_ps #define _mm_log_ps(a) simde_mm_log_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_log_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_log_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_logd2_u10(a); #else return Sleef_logd2_u35(a); #endif #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_log(a_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_log_pd #define _mm_log_pd(a) simde_mm_log_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_log_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_log_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_logf8_u10(a); #else return Sleef_logf8_u35(a); #endif #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_log_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_logf(a_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_log_ps #define _mm256_log_ps(a) simde_mm256_log_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_log_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_log_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_logd4_u10(a); #else return Sleef_logd4_u35(a); #endif #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_log_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_log(a_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_log_pd #define _mm256_log_pd(a) simde_mm256_log_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_log_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_log_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_logf16_u10(a); #else return Sleef_logf16_u35(a); #endif #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_log_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_logf(a_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_log_ps #define _mm512_log_ps(a) simde_mm512_log_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_log_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_log_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_logd8_u10(a); #else return Sleef_logd8_u35(a); #endif #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_log_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_log(a_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_log_pd #define _mm512_log_pd(a) simde_mm512_log_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_log_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_log_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_log_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_log_ps #define _mm512_mask_log_ps(src, k, a) simde_mm512_mask_log_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_log_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_log_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_log_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_log_pd #define _mm512_mask_log_pd(src, k, a) simde_mm512_mask_log_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cdfnorminv_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_cdfnorminv_ps(a); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) simde__m128 matched, retval = simde_mm_setzero_ps(); { /* if (a < 0 || a > 1) */ matched = simde_mm_or_ps(simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))), simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)))); /* We don't actually need to do anything here since we initialize * retval to 0.0. */ } { /* else if (a == 0) */ simde__m128 mask = simde_mm_cmpeq_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))); mask = simde_mm_andnot_ps(matched, mask); matched = simde_mm_or_ps(matched, mask); simde__m128 res = simde_mm_set1_ps(-SIMDE_MATH_INFINITYF); retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res)); } { /* else if (a == 1) */ simde__m128 mask = simde_mm_cmpeq_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0))); mask = simde_mm_andnot_ps(matched, mask); matched = simde_mm_or_ps(matched, mask); simde__m128 res = simde_mm_set1_ps(SIMDE_MATH_INFINITYF); retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res)); } { /* Remaining conditions. * * Including the else case in this complicates things a lot, but * we're using cheap operations to get rid of expensive multiply * and add functions. This should be a small improvement on SSE * prior to 4.1. On SSE 4.1 we can use _mm_blendv_ps which is * very fast and this becomes a huge win. NEON, AltiVec, and * WASM also have blend operations, so this should be a big win * there, too. */ /* else if (a < 0.02425) */ simde__m128 mask_lo = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.02425))); /* else if (a > 0.97575) */ simde__m128 mask_hi = simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.97575))); simde__m128 mask = simde_mm_or_ps(mask_lo, mask_hi); matched = simde_mm_or_ps(matched, mask); /* else */ simde__m128 mask_el = simde_x_mm_not_ps(matched); mask = simde_mm_or_ps(mask, mask_el); /* r = a - 0.5f */ simde__m128 r = simde_mm_sub_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.5))); /* lo: q = a * hi: q = (1.0 - a) */ simde__m128 q = simde_mm_and_ps(mask_lo, a); q = simde_mm_or_ps(q, simde_mm_and_ps(mask_hi, simde_mm_sub_ps(simde_mm_set1_ps(1.0f), a))); /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */ q = simde_mm_log_ps(q); q = simde_mm_mul_ps(q, simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.0))); q = simde_mm_sqrt_ps(q); /* el: q = r * r */ q = simde_x_mm_select_ps(q, simde_mm_mul_ps(r, r), mask_el); /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0f); */ /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */ /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */ simde__m128 numerator = simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-7.784894002430293e-03)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-3.969683028665376e+01)), mask_el); numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-3.223964580411365e-01)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.209460984245205e+02)), mask_el)); numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.400758277161838e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.759285104469687e+02)), mask_el)); numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.549732539343734e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.383577518672690e+02)), mask_el)); numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 4.374664141464968e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-3.066479806614716e+01)), mask_el)); numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.938163982698783e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.506628277459239e+00)), mask_el)); { simde__m128 multiplier; multiplier = simde_mm_and_ps(mask_lo, simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.0))); multiplier = simde_mm_or_ps(multiplier, simde_mm_and_ps(mask_hi, simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.0)))); multiplier = simde_mm_or_ps(multiplier, simde_mm_and_ps(mask_el, r)); numerator = simde_mm_mul_ps(numerator, multiplier); } /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */ /* el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */ simde__m128 denominator = simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 7.784695709041462e-03)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-5.447609879822406e+01)), mask_el); denominator = simde_mm_fmadd_ps(denominator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 3.224671290700398e-01)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.615858368580409e+02)), mask_el)); denominator = simde_mm_fmadd_ps(denominator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.445134137142996e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.556989798598866e+02)), mask_el)); denominator = simde_mm_fmadd_ps(denominator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 3.754408661907416e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 6.680131188771972e+01)), mask_el)); denominator = simde_mm_fmadd_ps(denominator, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.0)), q, mask_el), simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.328068155288572e+01)), mask_el)); denominator = simde_mm_fmadd_ps(denominator, q, simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0))); /* res = numerator / denominator; */ simde__m128 res = simde_mm_div_ps(numerator, denominator); retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res)); } return retval; #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_cdfnorminvf(a_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_cdfnorminv_ps #define _mm_cdfnorminv_ps(a) simde_mm_cdfnorminv_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cdfnorminv_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_cdfnorminv_pd(a); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) simde__m128d matched, retval = simde_mm_setzero_pd(); { /* if (a < 0 || a > 1) */ matched = simde_mm_or_pd(simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))), simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)))); /* We don't actually need to do anything here since we initialize * retval to 0.0. */ } { /* else if (a == 0) */ simde__m128d mask = simde_mm_cmpeq_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))); mask = simde_mm_andnot_pd(matched, mask); matched = simde_mm_or_pd(matched, mask); simde__m128d res = simde_mm_set1_pd(-SIMDE_MATH_INFINITY); retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res)); } { /* else if (a == 1) */ simde__m128d mask = simde_mm_cmpeq_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0))); mask = simde_mm_andnot_pd(matched, mask); matched = simde_mm_or_pd(matched, mask); simde__m128d res = simde_mm_set1_pd(SIMDE_MATH_INFINITY); retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res)); } { /* Remaining conditions. * * Including the else case in this complicates things a lot, but * we're using cheap operations to get rid of expensive multiply * and add functions. This should be a small improvement on SSE * prior to 4.1. On SSE 4.1 we can use _mm_blendv_pd which is * very fast and this becomes a huge win. NEON, AltiVec, and * WASM also have blend operations, so this should be a big win * there, too. */ /* else if (a < 0.02425) */ simde__m128d mask_lo = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.02425))); /* else if (a > 0.97575) */ simde__m128d mask_hi = simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.97575))); simde__m128d mask = simde_mm_or_pd(mask_lo, mask_hi); matched = simde_mm_or_pd(matched, mask); /* else */ simde__m128d mask_el = simde_x_mm_not_pd(matched); mask = simde_mm_or_pd(mask, mask_el); /* r = a - 0.5 */ simde__m128d r = simde_mm_sub_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.5))); /* lo: q = a * hi: q = (1.0 - a) */ simde__m128d q = simde_mm_and_pd(mask_lo, a); q = simde_mm_or_pd(q, simde_mm_and_pd(mask_hi, simde_mm_sub_pd(simde_mm_set1_pd(1.0), a))); /* q = simde_math_sqrt(-2.0 * simde_math_log(q)) */ q = simde_mm_log_pd(q); q = simde_mm_mul_pd(q, simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.0))); q = simde_mm_sqrt_pd(q); /* el: q = r * r */ q = simde_x_mm_select_pd(q, simde_mm_mul_pd(r, r), mask_el); /* lo: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0); */ /* hi: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0); */ /* el: double numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */ simde__m128d numerator = simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-7.784894002430293e-03)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-3.969683028665376e+01)), mask_el); numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-3.223964580411365e-01)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.209460984245205e+02)), mask_el)); numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.400758277161838e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.759285104469687e+02)), mask_el)); numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.549732539343734e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.383577518672690e+02)), mask_el)); numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 4.374664141464968e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-3.066479806614716e+01)), mask_el)); numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.938163982698783e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.506628277459239e+00)), mask_el)); { simde__m128d multiplier; multiplier = simde_mm_and_pd(mask_lo, simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.0))); multiplier = simde_mm_or_pd(multiplier, simde_mm_and_pd(mask_hi, simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.0)))); multiplier = simde_mm_or_pd(multiplier, simde_mm_and_pd(mask_el, r)); numerator = simde_mm_mul_pd(numerator, multiplier); } /* lo/hi: double denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */ /* el: double denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */ simde__m128d denominator = simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 7.784695709041462e-03)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-5.447609879822406e+01)), mask_el); denominator = simde_mm_fmadd_pd(denominator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 3.224671290700398e-01)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.615858368580409e+02)), mask_el)); denominator = simde_mm_fmadd_pd(denominator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.445134137142996e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.556989798598866e+02)), mask_el)); denominator = simde_mm_fmadd_pd(denominator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 3.754408661907416e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 6.680131188771972e+01)), mask_el)); denominator = simde_mm_fmadd_pd(denominator, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.0)), q, mask_el), simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.328068155288572e+01)), mask_el)); denominator = simde_mm_fmadd_pd(denominator, q, simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0))); /* res = numerator / denominator; */ simde__m128d res = simde_mm_div_pd(numerator, denominator); retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res)); } return retval; #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_cdfnorminv(a_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_cdfnorminv_pd #define _mm_cdfnorminv_pd(a) simde_mm_cdfnorminv_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_cdfnorminv_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_cdfnorminv_ps(a); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256) simde__m256 matched, retval = simde_mm256_setzero_ps(); { /* if (a < 0 || a > 1) */ matched = simde_mm256_or_ps(simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ), simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), SIMDE_CMP_GT_OQ)); /* We don't actually need to do anything here since we initialize * retval to 0.0. */ } { /* else if (a == 0) */ simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ); mask = simde_mm256_andnot_ps(matched, mask); matched = simde_mm256_or_ps(matched, mask); simde__m256 res = simde_mm256_set1_ps(-SIMDE_MATH_INFINITYF); retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res)); } { /* else if (a == 1) */ simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), SIMDE_CMP_EQ_OQ); mask = simde_mm256_andnot_ps(matched, mask); matched = simde_mm256_or_ps(matched, mask); simde__m256 res = simde_mm256_set1_ps(SIMDE_MATH_INFINITYF); retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res)); } { /* Remaining conditions. * * Including the else case in this complicates things a lot, but * we're using cheap operations to get rid of expensive multiply * and add functions. This should be a small improvement on SSE * prior to 4.1. On SSE 4.1 we can use _mm256_blendv_ps which is * very fast and this becomes a huge win. NEON, AltiVec, and * WASM also have blend operations, so this should be a big win * there, too. */ /* else if (a < 0.02425) */ simde__m256 mask_lo = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.02425)), SIMDE_CMP_LT_OQ); /* else if (a > 0.97575) */ simde__m256 mask_hi = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.97575)), SIMDE_CMP_GT_OQ); simde__m256 mask = simde_mm256_or_ps(mask_lo, mask_hi); matched = simde_mm256_or_ps(matched, mask); /* else */ simde__m256 mask_el = simde_x_mm256_not_ps(matched); mask = simde_mm256_or_ps(mask, mask_el); /* r = a - 0.5f */ simde__m256 r = simde_mm256_sub_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.5))); /* lo: q = a * hi: q = (1.0 - a) */ simde__m256 q = simde_mm256_and_ps(mask_lo, a); q = simde_mm256_or_ps(q, simde_mm256_and_ps(mask_hi, simde_mm256_sub_ps(simde_mm256_set1_ps(1.0f), a))); /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */ q = simde_mm256_log_ps(q); q = simde_mm256_mul_ps(q, simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.0))); q = simde_mm256_sqrt_ps(q); /* el: q = r * r */ q = simde_x_mm256_select_ps(q, simde_mm256_mul_ps(r, r), mask_el); /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0f); */ /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */ /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */ simde__m256 numerator = simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-7.784894002430293e-03)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-3.969683028665376e+01)), mask_el); numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-3.223964580411365e-01)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.209460984245205e+02)), mask_el)); numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.400758277161838e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.759285104469687e+02)), mask_el)); numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.549732539343734e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.383577518672690e+02)), mask_el)); numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 4.374664141464968e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-3.066479806614716e+01)), mask_el)); numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.938163982698783e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.506628277459239e+00)), mask_el)); { simde__m256 multiplier; multiplier = simde_mm256_and_ps(mask_lo, simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.0))); multiplier = simde_mm256_or_ps(multiplier, simde_mm256_and_ps(mask_hi, simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.0)))); multiplier = simde_mm256_or_ps(multiplier, simde_mm256_and_ps(mask_el, r)); numerator = simde_mm256_mul_ps(numerator, multiplier); } /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */ /* el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */ simde__m256 denominator = simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 7.784695709041462e-03)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-5.447609879822406e+01)), mask_el); denominator = simde_mm256_fmadd_ps(denominator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 3.224671290700398e-01)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.615858368580409e+02)), mask_el)); denominator = simde_mm256_fmadd_ps(denominator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.445134137142996e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.556989798598866e+02)), mask_el)); denominator = simde_mm256_fmadd_ps(denominator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 3.754408661907416e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 6.680131188771972e+01)), mask_el)); denominator = simde_mm256_fmadd_ps(denominator, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.0)), q, mask_el), simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.328068155288572e+01)), mask_el)); denominator = simde_mm256_fmadd_ps(denominator, q, simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0))); /* res = numerator / denominator; */ simde__m256 res = simde_mm256_div_ps(numerator, denominator); retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res)); } return retval; #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_cdfnorminv_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_cdfnorminvf(a_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_cdfnorminv_ps #define _mm256_cdfnorminv_ps(a) simde_mm256_cdfnorminv_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_cdfnorminv_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_cdfnorminv_pd(a); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256) simde__m256d matched, retval = simde_mm256_setzero_pd(); { /* if (a < 0 || a > 1) */ matched = simde_mm256_or_pd(simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ), simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), SIMDE_CMP_GT_OQ)); /* We don't actually need to do anything here since we initialize * retval to 0.0. */ } { /* else if (a == 0) */ simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ); mask = simde_mm256_andnot_pd(matched, mask); matched = simde_mm256_or_pd(matched, mask); simde__m256d res = simde_mm256_set1_pd(-SIMDE_MATH_INFINITY); retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res)); } { /* else if (a == 1) */ simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), SIMDE_CMP_EQ_OQ); mask = simde_mm256_andnot_pd(matched, mask); matched = simde_mm256_or_pd(matched, mask); simde__m256d res = simde_mm256_set1_pd(SIMDE_MATH_INFINITY); retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res)); } { /* Remaining conditions. * * Including the else case in this complicates things a lot, but * we're using cheap operations to get rid of expensive multiply * and add functions. This should be a small improvement on SSE * prior to 4.1. On SSE 4.1 we can use _mm256_blendv_pd which is * very fast and this becomes a huge win. NEON, AltiVec, and * WASM also have blend operations, so this should be a big win * there, too. */ /* else if (a < 0.02425) */ simde__m256d mask_lo = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.02425)), SIMDE_CMP_LT_OQ); /* else if (a > 0.97575) */ simde__m256d mask_hi = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.97575)), SIMDE_CMP_GT_OQ); simde__m256d mask = simde_mm256_or_pd(mask_lo, mask_hi); matched = simde_mm256_or_pd(matched, mask); /* else */ simde__m256d mask_el = simde_x_mm256_not_pd(matched); mask = simde_mm256_or_pd(mask, mask_el); /* r = a - 0.5 */ simde__m256d r = simde_mm256_sub_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.5))); /* lo: q = a * hi: q = (1.0 - a) */ simde__m256d q = simde_mm256_and_pd(mask_lo, a); q = simde_mm256_or_pd(q, simde_mm256_and_pd(mask_hi, simde_mm256_sub_pd(simde_mm256_set1_pd(1.0), a))); /* q = simde_math_sqrt(-2.0 * simde_math_log(q)) */ q = simde_mm256_log_pd(q); q = simde_mm256_mul_pd(q, simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.0))); q = simde_mm256_sqrt_pd(q); /* el: q = r * r */ q = simde_x_mm256_select_pd(q, simde_mm256_mul_pd(r, r), mask_el); /* lo: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0); */ /* hi: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0); */ /* el: double numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */ simde__m256d numerator = simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-7.784894002430293e-03)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-3.969683028665376e+01)), mask_el); numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-3.223964580411365e-01)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.209460984245205e+02)), mask_el)); numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.400758277161838e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.759285104469687e+02)), mask_el)); numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.549732539343734e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.383577518672690e+02)), mask_el)); numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 4.374664141464968e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-3.066479806614716e+01)), mask_el)); numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.938163982698783e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.506628277459239e+00)), mask_el)); { simde__m256d multiplier; multiplier = simde_mm256_and_pd(mask_lo, simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.0))); multiplier = simde_mm256_or_pd(multiplier, simde_mm256_and_pd(mask_hi, simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.0)))); multiplier = simde_mm256_or_pd(multiplier, simde_mm256_and_pd(mask_el, r)); numerator = simde_mm256_mul_pd(numerator, multiplier); } /* lo/hi: double denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */ /* el: double denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */ simde__m256d denominator = simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 7.784695709041462e-03)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-5.447609879822406e+01)), mask_el); denominator = simde_mm256_fmadd_pd(denominator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 3.224671290700398e-01)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.615858368580409e+02)), mask_el)); denominator = simde_mm256_fmadd_pd(denominator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.445134137142996e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.556989798598866e+02)), mask_el)); denominator = simde_mm256_fmadd_pd(denominator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 3.754408661907416e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 6.680131188771972e+01)), mask_el)); denominator = simde_mm256_fmadd_pd(denominator, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.0)), q, mask_el), simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.0)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.328068155288572e+01)), mask_el)); denominator = simde_mm256_fmadd_pd(denominator, q, simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0))); /* res = numerator / denominator; */ simde__m256d res = simde_mm256_div_pd(numerator, denominator); retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res)); } return retval; #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_cdfnorminv_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_cdfnorminv(a_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_cdfnorminv_pd #define _mm256_cdfnorminv_pd(a) simde_mm256_cdfnorminv_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_cdfnorminv_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_cdfnorminv_ps(a); #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) simde__m512_private r_, a_ = simde__m512_to_private(a); for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_cdfnorminv_ps(a_.m256[i]); } return simde__m512_from_private(r_); #else simde__m512 retval = simde_mm512_setzero_ps(); simde__mmask16 matched; { /* if (a < 0 || a > 1) */ matched = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ); matched |= simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), SIMDE_CMP_GT_OQ); /* We don't actually need to do anything here since we initialize * retval to 0.0. */ } { /* else if (a == 0) */ simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ); matched |= mask; retval = simde_mm512_mask_mov_ps(retval, mask, simde_mm512_set1_ps(-SIMDE_MATH_INFINITYF)); } { /* else if (a == 1) */ simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ); matched |= mask; retval = simde_mm512_mask_mov_ps(retval, mask, simde_mm512_set1_ps(SIMDE_MATH_INFINITYF)); } { /* else if (a < 0.02425) */ simde__mmask16 mask_lo = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.02425)), SIMDE_CMP_LT_OQ); /* else if (a > 0.97575) */ simde__mmask16 mask_hi = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.97575)), SIMDE_CMP_GT_OQ); simde__mmask16 mask = mask_lo | mask_hi; matched = matched | mask; /* else */ simde__mmask16 mask_el = ~matched; mask = mask | mask_el; /* r = a - 0.5f */ simde__m512 r = simde_mm512_sub_ps(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.5))); /* lo: q = a * hi: q = (1.0 - a) */ simde__m512 q = simde_mm512_maskz_mov_ps(mask_lo, a); q = simde_mm512_mask_sub_ps(q, mask_hi, simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), a); /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */ q = simde_mm512_log_ps(q); q = simde_mm512_mul_ps(q, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.0))); q = simde_mm512_sqrt_ps(q); /* el: q = r * r */ q = simde_mm512_mask_mul_ps(q, mask_el, r, r); /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0f); */ /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */ /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */ simde__m512 numerator = simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-7.784894002430293e-03)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-3.969683028665376e+01))); numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-3.223964580411365e-01)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.209460984245205e+02)))); numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.400758277161838e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.759285104469687e+02)))); numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.549732539343734e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.383577518672690e+02)))); numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 4.374664141464968e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-3.066479806614716e+01)))); numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.938163982698783e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.506628277459239e+00)))); { simde__m512 multiplier; multiplier = simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.0)); multiplier = simde_mm512_mask_mov_ps(multiplier, mask_hi, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.0))); multiplier = simde_mm512_mask_mov_ps(multiplier, mask_el, r); numerator = simde_mm512_mul_ps(numerator, multiplier); } /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */ /* el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */ simde__m512 denominator = simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 7.784695709041462e-03)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-5.447609879822406e+01))); denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 3.224671290700398e-01)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.615858368580409e+02)))); denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.445134137142996e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.556989798598866e+02)))); denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 3.754408661907416e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 6.680131188771972e+01)))); denominator = simde_mm512_fmadd_ps(denominator, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.0)), mask_el, q), simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.0)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.328068155288572e+01)))); denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0))); /* res = numerator / denominator; */ retval = simde_mm512_mask_div_ps(retval, mask_lo | mask_hi | mask_el, numerator, denominator); } return retval; #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_cdfnorminv_ps #define _mm512_cdfnorminv_ps(a) simde_mm512_cdfnorminv_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_cdfnorminv_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_cdfnorminv_pd(a); #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) simde__m512d_private r_, a_ = simde__m512d_to_private(a); for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_cdfnorminv_pd(a_.m256d[i]); } return simde__m512d_from_private(r_); #else simde__m512d retval = simde_mm512_setzero_pd(); simde__mmask8 matched; { /* if (a < 0 || a > 1) */ matched = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ); matched |= simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), SIMDE_CMP_GT_OQ); /* We don't actually need to do anything here since we initialize * retval to 0.0. */ } { /* else if (a == 0) */ simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ); matched |= mask; retval = simde_mm512_mask_mov_pd(retval, mask, simde_mm512_set1_pd(-SIMDE_MATH_INFINITY)); } { /* else if (a == 1) */ simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ); matched |= mask; retval = simde_mm512_mask_mov_pd(retval, mask, simde_mm512_set1_pd(SIMDE_MATH_INFINITY)); } { /* else if (a < 0.02425) */ simde__mmask8 mask_lo = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.02425)), SIMDE_CMP_LT_OQ); /* else if (a > 0.97575) */ simde__mmask8 mask_hi = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.97575)), SIMDE_CMP_GT_OQ); simde__mmask8 mask = mask_lo | mask_hi; matched = matched | mask; /* else */ simde__mmask8 mask_el = ~matched; mask = mask | mask_el; /* r = a - 0.5f */ simde__m512d r = simde_mm512_sub_pd(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.5))); /* lo: q = a * hi: q = (1.0 - a) */ simde__m512d q = a; q = simde_mm512_mask_sub_pd(q, mask_hi, simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), a); /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */ q = simde_mm512_log_pd(q); q = simde_mm512_mul_pd(q, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.0))); q = simde_mm512_sqrt_pd(q); /* el: q = r * r */ q = simde_mm512_mask_mul_pd(q, mask_el, r, r); /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0f); */ /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */ /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */ simde__m512d numerator = simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-7.784894002430293e-03)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-3.969683028665376e+01))); numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-3.223964580411365e-01)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.209460984245205e+02)))); numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.400758277161838e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.759285104469687e+02)))); numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.549732539343734e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.383577518672690e+02)))); numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 4.374664141464968e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-3.066479806614716e+01)))); numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.938163982698783e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.506628277459239e+00)))); { simde__m512d multiplier; multiplier = simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.0)); multiplier = simde_mm512_mask_mov_pd(multiplier, mask_hi, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.0))); multiplier = simde_mm512_mask_mov_pd(multiplier, mask_el, r); numerator = simde_mm512_mul_pd(numerator, multiplier); } /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */ /* el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */ simde__m512d denominator = simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 7.784695709041462e-03)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-5.447609879822406e+01))); denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 3.224671290700398e-01)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.615858368580409e+02)))); denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.445134137142996e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.556989798598866e+02)))); denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 3.754408661907416e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 6.680131188771972e+01)))); denominator = simde_mm512_fmadd_pd(denominator, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.0)), mask_el, q), simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.0)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.328068155288572e+01)))); denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0))); /* res = numerator / denominator; */ retval = simde_mm512_mask_div_pd(retval, mask_lo | mask_hi | mask_el, numerator, denominator); } return retval; #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_cdfnorminv_pd #define _mm512_cdfnorminv_pd(a) simde_mm512_cdfnorminv_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_cdfnorminv_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_cdfnorminv_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_cdfnorminv_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cdfnorminv_ps #define _mm512_mask_cdfnorminv_ps(src, k, a) simde_mm512_mask_cdfnorminv_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_cdfnorminv_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_cdfnorminv_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_cdfnorminv_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cdfnorminv_pd #define _mm512_mask_cdfnorminv_pd(src, k, a) simde_mm512_mask_cdfnorminv_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_erfinv_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_erfinv_ps(a); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c */ simde__m128 one = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)); simde__m128 lnx = simde_mm_log_ps(simde_mm_mul_ps(simde_mm_sub_ps(one, a), simde_mm_add_ps(one, a))); simde__m128 tt1 = simde_mm_mul_ps(simde_mm_set1_ps(HEDLEY_STATIC_CAST(simde_float32, SIMDE_MATH_PI)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.147))); tt1 = simde_mm_div_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0)), tt1); tt1 = simde_mm_add_ps(tt1, simde_mm_mul_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(0.5)), lnx)); simde__m128 tt2 = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0) / SIMDE_FLOAT32_C(0.147)); tt2 = simde_mm_mul_ps(tt2, lnx); simde__m128 r = simde_mm_mul_ps(tt1, tt1); r = simde_mm_sub_ps(r, tt2); r = simde_mm_sqrt_ps(r); r = simde_mm_add_ps(simde_x_mm_negate_ps(tt1), r); r = simde_mm_sqrt_ps(r); return simde_x_mm_xorsign_ps(r, a); #else simde__m128_private a_ = simde__m128_to_private(a), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_erfinvf(a_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_erfinv_ps #define _mm_erfinv_ps(a) simde_mm_erfinv_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_erfinv_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_erfinv_pd(a); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) simde__m128d one = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)); simde__m128d lnx = simde_mm_log_pd(simde_mm_mul_pd(simde_mm_sub_pd(one, a), simde_mm_add_pd(one, a))); simde__m128d tt1 = simde_mm_mul_pd(simde_mm_set1_pd(SIMDE_MATH_PI), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.147))); tt1 = simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(2.0)), tt1); tt1 = simde_mm_add_pd(tt1, simde_mm_mul_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(0.5)), lnx)); simde__m128d tt2 = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0) / SIMDE_FLOAT64_C(0.147)); tt2 = simde_mm_mul_pd(tt2, lnx); simde__m128d r = simde_mm_mul_pd(tt1, tt1); r = simde_mm_sub_pd(r, tt2); r = simde_mm_sqrt_pd(r); r = simde_mm_add_pd(simde_x_mm_negate_pd(tt1), r); r = simde_mm_sqrt_pd(r); return simde_x_mm_xorsign_pd(r, a); #else simde__m128d_private a_ = simde__m128d_to_private(a), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_erfinv(a_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_erfinv_pd #define _mm_erfinv_pd(a) simde_mm_erfinv_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_erfinv_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_erfinv_ps(a); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) simde__m256 one = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)); simde__m256 sgn = simde_x_mm256_copysign_ps(one, a); a = simde_mm256_mul_ps(simde_mm256_sub_ps(one, a), simde_mm256_add_ps(one, a)); simde__m256 lnx = simde_mm256_log_ps(a); simde__m256 tt1 = simde_mm256_mul_ps(simde_mm256_set1_ps(HEDLEY_STATIC_CAST(simde_float32, SIMDE_MATH_PI)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.147))); tt1 = simde_mm256_div_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0)), tt1); tt1 = simde_mm256_add_ps(tt1, simde_mm256_mul_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.5)), lnx)); simde__m256 tt2 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0) / SIMDE_FLOAT32_C(0.147)); tt2 = simde_mm256_mul_ps(tt2, lnx); simde__m256 r = simde_mm256_mul_ps(tt1, tt1); r = simde_mm256_sub_ps(r, tt2); r = simde_mm256_sqrt_ps(r); r = simde_mm256_add_ps(simde_x_mm256_negate_ps(tt1), r); r = simde_mm256_sqrt_ps(r); return simde_mm256_mul_ps(sgn, r); #else simde__m256_private a_ = simde__m256_to_private(a), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_erfinvf(a_.f32[i]); } return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_erfinv_ps #define _mm256_erfinv_ps(a) simde_mm256_erfinv_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_erfinv_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_erfinv_pd(a); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) simde__m256d one = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)); simde__m256d sgn = simde_x_mm256_copysign_pd(one, a); a = simde_mm256_mul_pd(simde_mm256_sub_pd(one, a), simde_mm256_add_pd(one, a)); simde__m256d lnx = simde_mm256_log_pd(a); simde__m256d tt1 = simde_mm256_mul_pd(simde_mm256_set1_pd(SIMDE_MATH_PI), simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.147))); tt1 = simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(2.0)), tt1); tt1 = simde_mm256_add_pd(tt1, simde_mm256_mul_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.5)), lnx)); simde__m256d tt2 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0) / SIMDE_FLOAT64_C(0.147)); tt2 = simde_mm256_mul_pd(tt2, lnx); simde__m256d r = simde_mm256_mul_pd(tt1, tt1); r = simde_mm256_sub_pd(r, tt2); r = simde_mm256_sqrt_pd(r); r = simde_mm256_add_pd(simde_x_mm256_negate_pd(tt1), r); r = simde_mm256_sqrt_pd(r); return simde_mm256_mul_pd(sgn, r); #else simde__m256d_private a_ = simde__m256d_to_private(a), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_erfinv(a_.f64[i]); } return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_erfinv_pd #define _mm256_erfinv_pd(a) simde_mm256_erfinv_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_erfinv_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_erfinv_ps(a); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) simde__m512 one = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)); simde__m512 sgn = simde_x_mm512_copysign_ps(one, a); a = simde_mm512_mul_ps(simde_mm512_sub_ps(one, a), simde_mm512_add_ps(one, a)); simde__m512 lnx = simde_mm512_log_ps(a); simde__m512 tt1 = simde_mm512_mul_ps(simde_mm512_set1_ps(HEDLEY_STATIC_CAST(simde_float32, SIMDE_MATH_PI)), simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.147))); tt1 = simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(2.0)), tt1); tt1 = simde_mm512_add_ps(tt1, simde_mm512_mul_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.5)), lnx)); simde__m512 tt2 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0) / SIMDE_FLOAT32_C(0.147)); tt2 = simde_mm512_mul_ps(tt2, lnx); simde__m512 r = simde_mm512_mul_ps(tt1, tt1); r = simde_mm512_sub_ps(r, tt2); r = simde_mm512_sqrt_ps(r); r = simde_mm512_add_ps(simde_x_mm512_negate_ps(tt1), r); r = simde_mm512_sqrt_ps(r); return simde_mm512_mul_ps(sgn, r); #else simde__m512_private a_ = simde__m512_to_private(a), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_erfinvf(a_.f32[i]); } return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_erfinv_ps #define _mm512_erfinv_ps(a) simde_mm512_erfinv_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_erfinv_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_erfinv_pd(a); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) simde__m512d one = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)); simde__m512d sgn = simde_x_mm512_copysign_pd(one, a); a = simde_mm512_mul_pd(simde_mm512_sub_pd(one, a), simde_mm512_add_pd(one, a)); simde__m512d lnx = simde_mm512_log_pd(a); simde__m512d tt1 = simde_mm512_mul_pd(simde_mm512_set1_pd(SIMDE_MATH_PI), simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.147))); tt1 = simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(2.0)), tt1); tt1 = simde_mm512_add_pd(tt1, simde_mm512_mul_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.5)), lnx)); simde__m512d tt2 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0) / SIMDE_FLOAT64_C(0.147)); tt2 = simde_mm512_mul_pd(tt2, lnx); simde__m512d r = simde_mm512_mul_pd(tt1, tt1); r = simde_mm512_sub_pd(r, tt2); r = simde_mm512_sqrt_pd(r); r = simde_mm512_add_pd(simde_x_mm512_negate_pd(tt1), r); r = simde_mm512_sqrt_pd(r); return simde_mm512_mul_pd(sgn, r); #else simde__m512d_private a_ = simde__m512d_to_private(a), r_; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_erfinv(a_.f64[i]); } return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_erfinv_pd #define _mm512_erfinv_pd(a) simde_mm512_erfinv_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_erfinv_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_erfinv_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_erfinv_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_erfinv_ps #define _mm512_mask_erfinv_ps(src, k, a) simde_mm512_mask_erfinv_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_erfinv_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_erfinv_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_erfinv_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_erfinv_pd #define _mm512_mask_erfinv_pd(src, k, a) simde_mm512_mask_erfinv_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_erfcinv_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_erfcinv_ps(a); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) simde__m128 matched, retval = simde_mm_setzero_ps(); { /* if (a < 2.0f && a > 0.0625f) */ matched = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0))); matched = simde_mm_and_ps(matched, simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0625)))); if (!simde_mm_test_all_zeros(simde_mm_castps_si128(matched), simde_x_mm_setone_si128())) { retval = simde_mm_erfinv_ps(simde_mm_sub_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)), a)); } if (simde_mm_test_all_ones(simde_mm_castps_si128(matched))) { return retval; } } { /* else if (a < 0.0625f && a > 0.0f) */ simde__m128 mask = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0625))); mask = simde_mm_and_ps(mask, simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0)))); mask = simde_mm_andnot_ps(matched, mask); if (!simde_mm_test_all_zeros(simde_mm_castps_si128(mask), simde_x_mm_setone_si128())) { matched = simde_mm_or_ps(matched, mask); /* t = 1/(sqrt(-log(a))) */ simde__m128 t = simde_x_mm_negate_ps(simde_mm_log_ps(a)); t = simde_mm_sqrt_ps(t); t = simde_mm_div_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)), t); const simde__m128 p[] = { simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.1550470003116)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.382719649631)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.690969348887)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.128081391617)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.680544246825)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-0.164441567910)) }; const simde__m128 q[] = { simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.155024849822)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.385228141995)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.000000000000)) }; /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */ simde__m128 numerator = simde_mm_fmadd_ps(p[5], t, p[4]); numerator = simde_mm_fmadd_ps(numerator, t, p[3]); numerator = simde_mm_fmadd_ps(numerator, t, p[2]); numerator = simde_mm_fmadd_ps(numerator, t, p[1]); numerator = simde_mm_add_ps(numerator, simde_mm_div_ps(p[0], t)); /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ simde__m128 denominator = simde_mm_fmadd_ps(q[2], t, q[1]); denominator = simde_mm_fmadd_ps(denominator, t, q[0]); simde__m128 res = simde_mm_div_ps(numerator, denominator); retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res)); } } { /* else if (a < 0.0f) */ simde__m128 mask = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))); mask = simde_mm_andnot_ps(matched, mask); if (!simde_mm_test_all_zeros(simde_mm_castps_si128(mask), simde_x_mm_setone_si128())) { matched = simde_mm_or_ps(matched, mask); /* t = 1/(sqrt(-log(a))) */ simde__m128 t = simde_x_mm_negate_ps(simde_mm_log_ps(a)); t = simde_mm_sqrt_ps(t); t = simde_mm_div_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)), t); const simde__m128 p[] = { simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.00980456202915)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.36366788917100)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.97302949837000)), simde_mm_set1_ps(SIMDE_FLOAT32_C( -0.5374947401000)) }; const simde__m128 q[] = { simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.00980451277802)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.36369997154400)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.00000000000000)) }; /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */ simde__m128 numerator = simde_mm_fmadd_ps(p[3], t, p[2]); numerator = simde_mm_fmadd_ps(numerator, t, p[1]); numerator = simde_mm_add_ps(numerator, simde_mm_div_ps(p[0], t)); /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ simde__m128 denominator = simde_mm_fmadd_ps(q[2], t, q[1]); denominator = simde_mm_fmadd_ps(denominator, t, q[0]); simde__m128 res = simde_mm_div_ps(numerator, denominator); retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res)); if (simde_mm_test_all_ones(simde_mm_castps_si128(matched))) { return retval; } } } { /* else if (a == 0.0f) */ simde__m128 mask = simde_mm_cmpeq_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))); mask = simde_mm_andnot_ps(matched, mask); matched = simde_mm_or_ps(matched, mask); simde__m128 res = simde_mm_set1_ps(SIMDE_MATH_INFINITYF); retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res)); } { /* else */ /* (a >= 2.0f) */ retval = simde_mm_or_ps(retval, simde_mm_andnot_ps(matched, simde_mm_set1_ps(-SIMDE_MATH_INFINITYF))); } return retval; #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_erfcinvf(a_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_erfcinv_ps #define _mm_erfcinv_ps(a) simde_mm_erfcinv_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_erfcinv_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_erfcinv_pd(a); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) simde__m128d matched, retval = simde_mm_setzero_pd(); { /* if (a < 2.0 && a > 0.0625) */ matched = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(2.0))); matched = simde_mm_and_pd(matched, simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0625)))); if (!simde_mm_test_all_zeros(simde_mm_castpd_si128(matched), simde_x_mm_setone_si128())) { retval = simde_mm_erfinv_pd(simde_mm_sub_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), a)); } if (simde_mm_test_all_ones(simde_mm_castpd_si128(matched))) { return retval; } } { /* else if (a < 0.0625 && a > 0.0) */ simde__m128d mask = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0625))); mask = simde_mm_and_pd(mask, simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0)))); mask = simde_mm_andnot_pd(matched, mask); if (!simde_mm_test_all_zeros(simde_mm_castpd_si128(mask), simde_x_mm_setone_si128())) { matched = simde_mm_or_pd(matched, mask); /* t = 1/(sqrt(-log(a))) */ simde__m128d t = simde_x_mm_negate_pd(simde_mm_log_pd(a)); t = simde_mm_sqrt_pd(t); t = simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), t); const simde__m128d p[] = { simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.1550470003116)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.382719649631)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.690969348887)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.128081391617)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.680544246825)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-0.164441567910)) }; const simde__m128d q[] = { simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.155024849822)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.385228141995)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.000000000000)) }; /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */ simde__m128d numerator = simde_mm_fmadd_pd(p[5], t, p[4]); numerator = simde_mm_fmadd_pd(numerator, t, p[3]); numerator = simde_mm_fmadd_pd(numerator, t, p[2]); numerator = simde_mm_fmadd_pd(numerator, t, p[1]); numerator = simde_mm_add_pd(numerator, simde_mm_div_pd(p[0], t)); /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ simde__m128d denominator = simde_mm_fmadd_pd(q[2], t, q[1]); denominator = simde_mm_fmadd_pd(denominator, t, q[0]); simde__m128d res = simde_mm_div_pd(numerator, denominator); retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res)); } } { /* else if (a < 0.0) */ simde__m128d mask = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))); mask = simde_mm_andnot_pd(matched, mask); if (!simde_mm_test_all_zeros(simde_mm_castpd_si128(mask), simde_x_mm_setone_si128())) { matched = simde_mm_or_pd(matched, mask); /* t = 1/(sqrt(-log(a))) */ simde__m128d t = simde_x_mm_negate_pd(simde_mm_log_pd(a)); t = simde_mm_sqrt_pd(t); t = simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), t); const simde__m128d p[] = { simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.00980456202915)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.36366788917100)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.97302949837000)), simde_mm_set1_pd(SIMDE_FLOAT64_C( -0.5374947401000)) }; const simde__m128d q[] = { simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.00980451277802)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.36369997154400)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.00000000000000)) }; /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */ simde__m128d numerator = simde_mm_fmadd_pd(p[3], t, p[2]); numerator = simde_mm_fmadd_pd(numerator, t, p[1]); numerator = simde_mm_add_pd(numerator, simde_mm_div_pd(p[0], t)); /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ simde__m128d denominator = simde_mm_fmadd_pd(q[2], t, q[1]); denominator = simde_mm_fmadd_pd(denominator, t, q[0]); simde__m128d res = simde_mm_div_pd(numerator, denominator); retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res)); if (simde_mm_test_all_ones(simde_mm_castpd_si128(matched))) { return retval; } } } { /* else if (a == 0.0) */ simde__m128d mask = simde_mm_cmpeq_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))); mask = simde_mm_andnot_pd(matched, mask); matched = simde_mm_or_pd(matched, mask); simde__m128d res = simde_mm_set1_pd(SIMDE_MATH_INFINITY); retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res)); } { /* else */ /* (a >= 2.0) */ retval = simde_mm_or_pd(retval, simde_mm_andnot_pd(matched, simde_mm_set1_pd(-SIMDE_MATH_INFINITY))); } return retval; #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_erfcinv(a_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_erfcinv_pd #define _mm_erfcinv_pd(a) simde_mm_erfcinv_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_erfcinv_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_erfcinv_ps(a); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256) simde__m256 matched, retval = simde_mm256_setzero_ps(); { /* if (a < 2.0f && a > 0.0625f) */ matched = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0)), SIMDE_CMP_LT_OQ); matched = simde_mm256_and_ps(matched, simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_GT_OQ)); if (!simde_mm256_testz_ps(matched, matched)) { retval = simde_mm256_erfinv_ps(simde_mm256_sub_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), a)); } if (simde_x_mm256_test_all_ones(simde_mm256_castps_si256(matched))) { return retval; } } { /* else if (a < 0.0625f && a > 0.0f) */ simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_LT_OQ); mask = simde_mm256_and_ps(mask, simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_GT_OQ)); mask = simde_mm256_andnot_ps(matched, mask); if (!simde_mm256_testz_ps(mask, mask)) { matched = simde_mm256_or_ps(matched, mask); /* t = 1/(sqrt(-log(a))) */ simde__m256 t = simde_x_mm256_negate_ps(simde_mm256_log_ps(a)); t = simde_mm256_sqrt_ps(t); t = simde_mm256_div_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), t); const simde__m256 p[] = { simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.1550470003116)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.382719649631)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.690969348887)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.128081391617)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.680544246825)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-0.16444156791)) }; const simde__m256 q[] = { simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.155024849822)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.385228141995)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.000000000000)) }; /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */ simde__m256 numerator = simde_mm256_fmadd_ps(p[5], t, p[4]); numerator = simde_mm256_fmadd_ps(numerator, t, p[3]); numerator = simde_mm256_fmadd_ps(numerator, t, p[2]); numerator = simde_mm256_fmadd_ps(numerator, t, p[1]); numerator = simde_mm256_add_ps(numerator, simde_mm256_div_ps(p[0], t)); /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ simde__m256 denominator = simde_mm256_fmadd_ps(q[2], t, q[1]); denominator = simde_mm256_fmadd_ps(denominator, t, q[0]); simde__m256 res = simde_mm256_div_ps(numerator, denominator); retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res)); } } { /* else if (a < 0.0f) */ simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ); mask = simde_mm256_andnot_ps(matched, mask); if (!simde_mm256_testz_ps(mask, mask)) { matched = simde_mm256_or_ps(matched, mask); /* t = 1/(sqrt(-log(a))) */ simde__m256 t = simde_x_mm256_negate_ps(simde_mm256_log_ps(a)); t = simde_mm256_sqrt_ps(t); t = simde_mm256_div_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), t); const simde__m256 p[] = { simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.00980456202915)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.36366788917100)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.97302949837000)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-0.5374947401000)) }; const simde__m256 q[] = { simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.00980451277802)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.36369997154400)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.00000000000000)) }; /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */ simde__m256 numerator = simde_mm256_fmadd_ps(p[3], t, p[2]); numerator = simde_mm256_fmadd_ps(numerator, t, p[1]); numerator = simde_mm256_add_ps(numerator, simde_mm256_div_ps(p[0], t)); /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ simde__m256 denominator = simde_mm256_fmadd_ps(q[2], t, q[1]); denominator = simde_mm256_fmadd_ps(denominator, t, q[0]); simde__m256 res = simde_mm256_div_ps(numerator, denominator); retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res)); if (simde_x_mm256_test_all_ones(simde_mm256_castps_si256(matched))) { return retval; } } } { /* else if (a == 0.0f) */ simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ); mask = simde_mm256_andnot_ps(matched, mask); matched = simde_mm256_or_ps(matched, mask); simde__m256 res = simde_mm256_set1_ps(SIMDE_MATH_INFINITYF); retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res)); } { /* else */ /* (a >= 2.0f) */ retval = simde_mm256_or_ps(retval, simde_mm256_andnot_ps(matched, simde_mm256_set1_ps(-SIMDE_MATH_INFINITYF))); } return retval; #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_erfcinv_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_erfcinvf(a_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_erfcinv_ps #define _mm256_erfcinv_ps(a) simde_mm256_erfcinv_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_erfcinv_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_erfcinv_pd(a); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256) simde__m256d matched, retval = simde_mm256_setzero_pd(); { /* if (a < 2.0 && a > 0.0625) */ matched = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(2.0)), SIMDE_CMP_LT_OQ); matched = simde_mm256_and_pd(matched, simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_GT_OQ)); if (!simde_mm256_testz_pd(matched, matched)) { retval = simde_mm256_erfinv_pd(simde_mm256_sub_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), a)); } if (simde_x_mm256_test_all_ones(simde_mm256_castpd_si256(matched))) { return retval; } } { /* else if (a < 0.0625 && a > 0.0) */ simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_LT_OQ); mask = simde_mm256_and_pd(mask, simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_GT_OQ)); mask = simde_mm256_andnot_pd(matched, mask); if (!simde_mm256_testz_pd(mask, mask)) { matched = simde_mm256_or_pd(matched, mask); /* t = 1/(sqrt(-log(a))) */ simde__m256d t = simde_x_mm256_negate_pd(simde_mm256_log_pd(a)); t = simde_mm256_sqrt_pd(t); t = simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), t); const simde__m256d p[] = { simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.1550470003116)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.382719649631)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.690969348887)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.128081391617)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.680544246825)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-0.16444156791)) }; const simde__m256d q[] = { simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.155024849822)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.385228141995)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.000000000000)) }; /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */ simde__m256d numerator = simde_mm256_fmadd_pd(p[5], t, p[4]); numerator = simde_mm256_fmadd_pd(numerator, t, p[3]); numerator = simde_mm256_fmadd_pd(numerator, t, p[2]); numerator = simde_mm256_fmadd_pd(numerator, t, p[1]); numerator = simde_mm256_add_pd(numerator, simde_mm256_div_pd(p[0], t)); /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ simde__m256d denominator = simde_mm256_fmadd_pd(q[2], t, q[1]); denominator = simde_mm256_fmadd_pd(denominator, t, q[0]); simde__m256d res = simde_mm256_div_pd(numerator, denominator); retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res)); } } { /* else if (a < 0.0) */ simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ); mask = simde_mm256_andnot_pd(matched, mask); if (!simde_mm256_testz_pd(mask, mask)) { matched = simde_mm256_or_pd(matched, mask); /* t = 1/(sqrt(-log(a))) */ simde__m256d t = simde_x_mm256_negate_pd(simde_mm256_log_pd(a)); t = simde_mm256_sqrt_pd(t); t = simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), t); const simde__m256d p[] = { simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.00980456202915)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.36366788917100)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.97302949837000)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-0.5374947401000)) }; const simde__m256d q[] = { simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.00980451277802)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.36369997154400)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.00000000000000)) }; /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */ simde__m256d numerator = simde_mm256_fmadd_pd(p[3], t, p[2]); numerator = simde_mm256_fmadd_pd(numerator, t, p[1]); numerator = simde_mm256_add_pd(numerator, simde_mm256_div_pd(p[0], t)); /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ simde__m256d denominator = simde_mm256_fmadd_pd(q[2], t, q[1]); denominator = simde_mm256_fmadd_pd(denominator, t, q[0]); simde__m256d res = simde_mm256_div_pd(numerator, denominator); retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res)); if (simde_x_mm256_test_all_ones(simde_mm256_castpd_si256(matched))) { return retval; } } } { /* else if (a == 0.0) */ simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ); mask = simde_mm256_andnot_pd(matched, mask); matched = simde_mm256_or_pd(matched, mask); simde__m256d res = simde_mm256_set1_pd(SIMDE_MATH_INFINITY); retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res)); } { /* else */ /* (a >= 2.0) */ retval = simde_mm256_or_pd(retval, simde_mm256_andnot_pd(matched, simde_mm256_set1_pd(-SIMDE_MATH_INFINITY))); } return retval; #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_erfcinv_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_erfcinv(a_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_erfcinv_pd #define _mm256_erfcinv_pd(a) simde_mm256_erfcinv_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_erfcinv_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_erfcinv_ps(a); #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) && (!defined(SIMDE_ARCH_ARM) || defined(SIMDE_ARCH_AARCH64)) /* The results on Arm are *slightly* off, which causes problems for * the edge cases; for example, if you pass 2.0 sqrt will be called * with a value of -0.0 instead of 0.0, resulting in a NaN. */ simde__m512_private r_, a_ = simde__m512_to_private(a); for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_erfcinv_ps(a_.m256[i]); } return simde__m512_from_private(r_); #else simde__m512 retval = simde_mm512_setzero_ps(); simde__mmask16 matched; { /* if (a < 2.0f && a > 0.0625f) */ matched = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(2.0)), SIMDE_CMP_LT_OQ); matched &= simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_GT_OQ); if (matched != 0) { retval = simde_mm512_erfinv_ps(simde_mm512_sub_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), a)); } if (matched == 1) { return retval; } } { /* else if (a < 0.0625f && a > 0.0f) */ simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_LT_OQ); mask &= simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_GT_OQ); mask = ~matched & mask; if (mask != 0) { matched = matched | mask; /* t = 1/(sqrt(-log(a))) */ simde__m512 t = simde_x_mm512_negate_ps(simde_mm512_log_ps(a)); t = simde_mm512_sqrt_ps(t); t = simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), t); const simde__m512 p[] = { simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.1550470003116)), simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.382719649631)), simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.690969348887)), simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.128081391617)), simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.680544246825)), simde_mm512_set1_ps(SIMDE_FLOAT32_C(-0.16444156791)) }; const simde__m512 q[] = { simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.155024849822)), simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.385228141995)), simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.000000000000)) }; /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */ simde__m512 numerator = simde_mm512_fmadd_ps(p[5], t, p[4]); numerator = simde_mm512_fmadd_ps(numerator, t, p[3]); numerator = simde_mm512_fmadd_ps(numerator, t, p[2]); numerator = simde_mm512_fmadd_ps(numerator, t, p[1]); numerator = simde_mm512_add_ps(numerator, simde_mm512_div_ps(p[0], t)); /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ simde__m512 denominator = simde_mm512_fmadd_ps(q[2], t, q[1]); denominator = simde_mm512_fmadd_ps(denominator, t, q[0]); simde__m512 res = simde_mm512_div_ps(numerator, denominator); retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(mask, res)); } } { /* else if (a < 0.0f) */ simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ); mask = ~matched & mask; if (mask != 0) { matched = matched | mask; /* t = 1/(sqrt(-log(a))) */ simde__m512 t = simde_x_mm512_negate_ps(simde_mm512_log_ps(a)); t = simde_mm512_sqrt_ps(t); t = simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), t); const simde__m512 p[] = { simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.00980456202915)), simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.36366788917100)), simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.97302949837000)), simde_mm512_set1_ps(SIMDE_FLOAT32_C( -0.5374947401000)) }; const simde__m512 q[] = { simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.00980451277802)), simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.36369997154400)), simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.00000000000000)) }; /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */ simde__m512 numerator = simde_mm512_fmadd_ps(p[3], t, p[2]); numerator = simde_mm512_fmadd_ps(numerator, t, p[1]); numerator = simde_mm512_add_ps(numerator, simde_mm512_div_ps(p[0], t)); /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ simde__m512 denominator = simde_mm512_fmadd_ps(q[2], t, q[1]); denominator = simde_mm512_fmadd_ps(denominator, t, q[0]); simde__m512 res = simde_mm512_div_ps(numerator, denominator); retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(mask, res)); if (matched == 1) { return retval; } } } { /* else if (a == 0.0f) */ simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ); mask = ~matched & mask; matched = matched | mask; simde__m512 res = simde_mm512_set1_ps(SIMDE_MATH_INFINITYF); retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(mask, res)); } { /* else */ /* (a >= 2.0f) */ retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(~matched, simde_mm512_set1_ps(-SIMDE_MATH_INFINITYF))); } return retval; #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_erfcinv_ps #define _mm512_erfcinv_ps(a) simde_mm512_erfcinv_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_erfcinv_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_erfcinv_pd(a); #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) simde__m512d_private r_, a_ = simde__m512d_to_private(a); for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_erfcinv_pd(a_.m256d[i]); } return simde__m512d_from_private(r_); #else simde__m512d retval = simde_mm512_setzero_pd(); simde__mmask8 matched; { /* if (a < 2.0f && a > 0.0625f) */ matched = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(2.0)), SIMDE_CMP_LT_OQ); matched &= simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_GT_OQ); if (matched != 0) { retval = simde_mm512_erfinv_pd(simde_mm512_sub_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), a)); } if (matched == 1) { return retval; } } { /* else if (a < 0.0625f && a > 0.0f) */ simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_LT_OQ); mask &= simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_GT_OQ); mask = ~matched & mask; if (mask != 0) { matched = matched | mask; /* t = 1/(sqrt(-log(a))) */ simde__m512d t = simde_x_mm512_negate_pd(simde_mm512_log_pd(a)); t = simde_mm512_sqrt_pd(t); t = simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), t); const simde__m512d p[] = { simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.1550470003116)), simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.382719649631)), simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.690969348887)), simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.128081391617)), simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.680544246825)), simde_mm512_set1_pd(SIMDE_FLOAT64_C(-0.16444156791)) }; const simde__m512d q[] = { simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.155024849822)), simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.385228141995)), simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.000000000000)) }; /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */ simde__m512d numerator = simde_mm512_fmadd_pd(p[5], t, p[4]); numerator = simde_mm512_fmadd_pd(numerator, t, p[3]); numerator = simde_mm512_fmadd_pd(numerator, t, p[2]); numerator = simde_mm512_fmadd_pd(numerator, t, p[1]); numerator = simde_mm512_add_pd(numerator, simde_mm512_div_pd(p[0], t)); /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ simde__m512d denominator = simde_mm512_fmadd_pd(q[2], t, q[1]); denominator = simde_mm512_fmadd_pd(denominator, t, q[0]); simde__m512d res = simde_mm512_div_pd(numerator, denominator); retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(mask, res)); } } { /* else if (a < 0.0f) */ simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ); mask = ~matched & mask; if (mask != 0) { matched = matched | mask; /* t = 1/(sqrt(-log(a))) */ simde__m512d t = simde_x_mm512_negate_pd(simde_mm512_log_pd(a)); t = simde_mm512_sqrt_pd(t); t = simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), t); const simde__m512d p[] = { simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.00980456202915)), simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.36366788917100)), simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.97302949837000)), simde_mm512_set1_pd(SIMDE_FLOAT64_C( -0.5374947401000)) }; const simde__m512d q[] = { simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.00980451277802)), simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.36369997154400)), simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.00000000000000)) }; /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */ simde__m512d numerator = simde_mm512_fmadd_pd(p[3], t, p[2]); numerator = simde_mm512_fmadd_pd(numerator, t, p[1]); numerator = simde_mm512_add_pd(numerator, simde_mm512_div_pd(p[0], t)); /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ simde__m512d denominator = simde_mm512_fmadd_pd(q[2], t, q[1]); denominator = simde_mm512_fmadd_pd(denominator, t, q[0]); simde__m512d res = simde_mm512_div_pd(numerator, denominator); retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(mask, res)); if (matched == 1) { return retval; } } } { /* else if (a == 0.0f) */ simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ); mask = ~matched & mask; matched = matched | mask; simde__m512d res = simde_mm512_set1_pd(SIMDE_MATH_INFINITY); retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(mask, res)); } { /* else */ /* (a >= 2.0f) */ retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(~matched, simde_mm512_set1_pd(-SIMDE_MATH_INFINITY))); } return retval; #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_erfcinv_pd #define _mm512_erfcinv_pd(a) simde_mm512_erfcinv_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_erfcinv_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_erfcinv_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_erfcinv_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_erfcinv_ps #define _mm512_mask_erfcinv_ps(src, k, a) simde_mm512_mask_erfcinv_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_erfcinv_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_erfcinv_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_erfcinv_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_erfcinv_pd #define _mm512_mask_erfcinv_pd(src, k, a) simde_mm512_mask_erfcinv_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_logb_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_logb_ps(a); #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_logbf(a_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_logb_ps #define _mm_logb_ps(a) simde_mm_logb_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_logb_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_logb_pd(a); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_logb(a_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_logb_pd #define _mm_logb_pd(a) simde_mm_logb_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_logb_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_logb_ps(a); #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_logb_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_logbf(a_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_logb_ps #define _mm256_logb_ps(a) simde_mm256_logb_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_logb_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_logb_pd(a); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_logb_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_logb(a_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_logb_pd #define _mm256_logb_pd(a) simde_mm256_logb_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_logb_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_logb_ps(a); #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_logb_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_logbf(a_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_logb_ps #define _mm512_logb_ps(a) simde_mm512_logb_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_logb_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_logb_pd(a); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_logb_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_logb(a_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_logb_pd #define _mm512_logb_pd(a) simde_mm512_logb_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_logb_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_logb_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_logb_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_logb_ps #define _mm512_mask_logb_ps(src, k, a) simde_mm512_mask_logb_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_logb_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_logb_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_logb_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_logb_pd #define _mm512_mask_logb_pd(src, k, a) simde_mm512_mask_logb_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_log2_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_log2_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1) return Sleef_log2f4_u35(a); #else return Sleef_log2f4_u10(a); #endif #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_log2f(a_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_log2_ps #define _mm_log2_ps(a) simde_mm_log2_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_log2_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_log2_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1) return Sleef_log2d2_u35(a); #else return Sleef_log2d2_u10(a); #endif #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_log2(a_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_log2_pd #define _mm_log2_pd(a) simde_mm_log2_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_log2_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_log2_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1) return Sleef_log2f8_u35(a); #else return Sleef_log2f8_u10(a); #endif #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_log2_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_log2f(a_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_log2_ps #define _mm256_log2_ps(a) simde_mm256_log2_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_log2_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_log2_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1) return Sleef_log2d4_u35(a); #else return Sleef_log2d4_u10(a); #endif #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_log2_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_log2(a_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_log2_pd #define _mm256_log2_pd(a) simde_mm256_log2_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_log2_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_log2_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1) return Sleef_log2f16_u35(a); #else return Sleef_log2f16_u10(a); #endif #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_log2_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_log2f(a_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_log2_ps #define _mm512_log2_ps(a) simde_mm512_log2_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_log2_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_log2_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1) return Sleef_log2d8_u35(a); #else return Sleef_log2d8_u10(a); #endif #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_log2_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_log2(a_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_log2_pd #define _mm512_log2_pd(a) simde_mm512_log2_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_log2_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_log2_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_log2_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_log2_ps #define _mm512_mask_log2_ps(src, k, a) simde_mm512_mask_log2_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_log2_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_log2_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_log2_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_log2_pd #define _mm512_mask_log2_pd(src, k, a) simde_mm512_mask_log2_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_log1p_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_log1p_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_log1pf4_u10(a); #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_log1pf(a_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_log1p_ps #define _mm_log1p_ps(a) simde_mm_log1p_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_log1p_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_log1p_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_log1pd2_u10(a); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_log1p(a_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_log1p_pd #define _mm_log1p_pd(a) simde_mm_log1p_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_log1p_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_log1p_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_log1pf8_u10(a); #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_log1p_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_log1pf(a_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_log1p_ps #define _mm256_log1p_ps(a) simde_mm256_log1p_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_log1p_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_log1p_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_log1pd4_u10(a); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_log1p_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_log1p(a_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_log1p_pd #define _mm256_log1p_pd(a) simde_mm256_log1p_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_log1p_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_log1p_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_log1pf16_u10(a); #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_log1p_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_log1pf(a_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_log1p_ps #define _mm512_log1p_ps(a) simde_mm512_log1p_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_log1p_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_log1p_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_log1pd8_u10(a); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_log1p_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_log1p(a_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_log1p_pd #define _mm512_log1p_pd(a) simde_mm512_log1p_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_log1p_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_log1p_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_log1p_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_log1p_ps #define _mm512_mask_log1p_ps(src, k, a) simde_mm512_mask_log1p_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_log1p_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_log1p_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_log1p_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_log1p_pd #define _mm512_mask_log1p_pd(src, k, a) simde_mm512_mask_log1p_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_log10_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_log10_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_log10f4_u10(a); #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_log10f(a_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_log10_ps #define _mm_log10_ps(a) simde_mm_log10_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_log10_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_log10_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_log10d2_u10(a); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_log10(a_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_log10_pd #define _mm_log10_pd(a) simde_mm_log10_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_log10_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_log10_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_log10f8_u10(a); #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_log10_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_log10f(a_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_log10_ps #define _mm256_log10_ps(a) simde_mm256_log10_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_log10_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_log10_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_log10d4_u10(a); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_log10_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_log10(a_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_log10_pd #define _mm256_log10_pd(a) simde_mm256_log10_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_log10_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_log10_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_log10f16_u10(a); #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_log10_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_log10f(a_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_log10_ps #define _mm512_log10_ps(a) simde_mm512_log10_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_log10_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_log10_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_log10d8_u10(a); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_log10_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_log10(a_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_log10_pd #define _mm512_log10_pd(a) simde_mm512_log10_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_log10_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_log10_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_log10_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_log10_ps #define _mm512_mask_log10_ps(src, k, a) simde_mm512_mask_log10_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_log10_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_log10_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_log10_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_log10_pd #define _mm512_mask_log10_pd(src, k, a) simde_mm512_mask_log10_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_nearbyint_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_nearbyint_ps(a); #else simde__m512_private r_, a_ = simde__m512_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); } return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_nearbyint_ps #define _mm512_nearbyint_ps(a) simde_mm512_nearbyint_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_nearbyint_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_nearbyint_pd(a); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_nearbyint(a_.f64[i]); } return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_nearbyint_pd #define _mm512_nearbyint_pd(a) simde_mm512_nearbyint_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_nearbyint_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_nearbyint_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_nearbyint_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_nearbyint_ps #define _mm512_mask_nearbyint_ps(src, k, a) simde_mm512_mask_nearbyint_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_nearbyint_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_nearbyint_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_nearbyint_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_nearbyint_pd #define _mm512_mask_nearbyint_pd(src, k, a) simde_mm512_mask_nearbyint_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_pow_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_pow_ps(a, b); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_powf4_u10(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_powf(a_.f32[i], b_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_pow_ps #define _mm_pow_ps(a, b) simde_mm_pow_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_pow_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_pow_pd(a, b); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_powd2_u10(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_pow(a_.f64[i], b_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_pow_pd #define _mm_pow_pd(a, b) simde_mm_pow_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_pow_ps (simde__m256 a, simde__m256 b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_pow_ps(a, b); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_powf8_u10(a, b); #else simde__m256_private r_, a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_powf(a_.f32[i], b_.f32[i]); } return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_pow_ps #define _mm256_pow_ps(a, b) simde_mm256_pow_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_pow_pd (simde__m256d a, simde__m256d b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_pow_pd(a, b); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_powd4_u10(a, b); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_pow(a_.f64[i], b_.f64[i]); } return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_pow_pd #define _mm256_pow_pd(a, b) simde_mm256_pow_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_pow_ps (simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_pow_ps(a, b); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_powf16_u10(a, b); #else simde__m512_private r_, a_ = simde__m512_to_private(a), b_ = simde__m512_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_powf(a_.f32[i], b_.f32[i]); } return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_pow_ps #define _mm512_pow_ps(a, b) simde_mm512_pow_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_pow_pd (simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_pow_pd(a, b); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_powd8_u10(a, b); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a), b_ = simde__m512d_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_pow(a_.f64[i], b_.f64[i]); } return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_pow_pd #define _mm512_pow_pd(a, b) simde_mm512_pow_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_pow_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_pow_ps(src, k, a, b); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_pow_ps(a, b)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_pow_ps #define _mm512_mask_pow_ps(src, k, a, b) simde_mm512_mask_pow_ps(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_pow_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_pow_pd(src, k, a, b); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_pow_pd(a, b)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_pow_pd #define _mm512_mask_pow_pd(src, k, a, b) simde_mm512_mask_pow_pd(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_clog_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_clog_ps(a); #else simde__m128_private r_, a_ = simde__m128_to_private(a); simde__m128_private pow_res_ = simde__m128_to_private(simde_mm_pow_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0)))); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { r_.f32[ i ] = simde_math_logf(simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i+1])); r_.f32[i + 1] = simde_math_atan2f(a_.f32[i + 1], a_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_clog_ps #define _mm_clog_ps(a) simde_mm_clog_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_clog_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm256_clog_ps(a); #else simde__m256_private r_, a_ = simde__m256_to_private(a); simde__m256_private pow_res_ = simde__m256_to_private(simde_mm256_pow_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0)))); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { r_.f32[ i ] = simde_math_logf(simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i + 1])); r_.f32[i + 1] = simde_math_atan2f(a_.f32[i + 1], a_.f32[i]); } return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_clog_ps #define _mm256_clog_ps(a) simde_mm256_clog_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_csqrt_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_csqrt_ps(a); #else simde__m128_private r_, a_ = simde__m128_to_private(a); simde__m128 pow_res= simde_mm_pow_ps(a,simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0))); simde__m128_private pow_res_=simde__m128_to_private(pow_res); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) { simde_float32 sign = simde_math_copysignf(SIMDE_FLOAT32_C(1.0), a_.f32[i + 1]); simde_float32 temp = simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i+1]); r_.f32[ i ] = simde_math_sqrtf(( a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0)); r_.f32[i + 1] = sign * simde_math_sqrtf((-a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0)); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_csqrt_ps #define _mm_csqrt_ps(a) simde_mm_csqrt_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_csqrt_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm256_csqrt_ps(a); #else simde__m256_private r_, a_ = simde__m256_to_private(a); simde__m256 pow_res= simde_mm256_pow_ps(a,simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0))); simde__m256_private pow_res_=simde__m256_to_private(pow_res); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) { simde_float32 sign = simde_math_copysignf(SIMDE_FLOAT32_C(1.0), a_.f32[i + 1]); simde_float32 temp = simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i+1]); r_.f32[ i ] = simde_math_sqrtf(( a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0)); r_.f32[i + 1] = sign * simde_math_sqrtf((-a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0)); } return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_csqrt_ps #define _mm256_csqrt_ps(a) simde_mm256_csqrt_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_rem_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) return _mm_rem_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = a_.i8 % b_.i8; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a_.i8[i] % b_.i8[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_rem_epi8 #define _mm_rem_epi8(a, b) simde_mm_rem_epi8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_rem_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) return _mm_rem_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = a_.i16 % b_.i16; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[i] % b_.i16[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_rem_epi16 #define _mm_rem_epi16(a, b) simde_mm_rem_epi16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_rem_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) return _mm_rem_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = a_.i32 % b_.i32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] % b_.i32[i]; } #endif return simde__m128i_from_private(r_); #endif } #define simde_mm_irem_epi32(a, b) simde_mm_rem_epi32(a, b) #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_rem_epi32 #define _mm_rem_epi32(a, b) simde_mm_rem_epi32(a, b) #undef _mm_irem_epi32 #define _mm_irem_epi32(a, b) simde_mm_rem_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_rem_epi64 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) return _mm_rem_epi64(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 % b_.i64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[i] % b_.i64[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_rem_epi64 #define _mm_rem_epi64(a, b) simde_mm_rem_epi64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_rem_epu8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) return _mm_rem_epu8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u8 = a_.u8 % b_.u8; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = a_.u8[i] % b_.u8[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_rem_epu8 #define _mm_rem_epu8(a, b) simde_mm_rem_epu8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_rem_epu16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) return _mm_rem_epu16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u16 = a_.u16 % b_.u16; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = a_.u16[i] % b_.u16[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_rem_epu16 #define _mm_rem_epu16(a, b) simde_mm_rem_epu16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_rem_epu32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) return _mm_rem_epu32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u32 = a_.u32 % b_.u32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i] % b_.u32[i]; } #endif return simde__m128i_from_private(r_); #endif } #define simde_mm_urem_epi32(a, b) simde_mm_rem_epu32(a, b) #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_rem_epu32 #define _mm_rem_epu32(a, b) simde_mm_rem_epu32(a, b) #undef _mm_urem_epi32 #define _mm_urem_epi32(a, b) simde_mm_rem_epu32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_rem_epu64 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) return _mm_rem_epu64(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u64 = a_.u64 % b_.u64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = a_.u64[i] % b_.u64[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_rem_epu64 #define _mm_rem_epu64(a, b) simde_mm_rem_epu64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_rem_epi8 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_rem_epi8(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = a_.i8 % b_.i8; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { r_.m128i[i] = simde_mm_rem_epi8(a_.m128i[i], b_.m128i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a_.i8[i] % b_.i8[i]; } #endif #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_rem_epi8 #define _mm256_rem_epi8(a, b) simde_mm256_rem_epi8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_rem_epi16 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_rem_epi16(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = a_.i16 % b_.i16; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { r_.m128i[i] = simde_mm_rem_epi16(a_.m128i[i], b_.m128i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[i] % b_.i16[i]; } #endif #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_rem_epi16 #define _mm256_rem_epi16(a, b) simde_mm256_rem_epi16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_rem_epi32 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_rem_epi32(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = a_.i32 % b_.i32; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { r_.m128i[i] = simde_mm_rem_epi32(a_.m128i[i], b_.m128i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] % b_.i32[i]; } #endif #endif return simde__m256i_from_private(r_); #endif } #define simde_mm256_irem_epi32(a, b) simde_mm256_rem_epi32(a, b) #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_rem_epi32 #define _mm256_rem_epi32(a, b) simde_mm256_rem_epi32(a, b) #undef _mm256_irem_epi32 #define _mm256_irem_epi32(a, b) simde_mm256_rem_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_rem_epi64 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_rem_epi64(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 % b_.i64; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { r_.m128i[i] = simde_mm_rem_epi64(a_.m128i[i], b_.m128i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[i] % b_.i64[i]; } #endif #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_rem_epi64 #define _mm256_rem_epi64(a, b) simde_mm256_rem_epi64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_rem_epu8 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_rem_epu8(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u8 = a_.u8 % b_.u8; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { r_.m128i[i] = simde_mm_rem_epu8(a_.m128i[i], b_.m128i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = a_.u8[i] % b_.u8[i]; } #endif #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_rem_epu8 #define _mm256_rem_epu8(a, b) simde_mm256_rem_epu8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_rem_epu16 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_rem_epu16(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u16 = a_.u16 % b_.u16; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { r_.m128i[i] = simde_mm_rem_epu16(a_.m128i[i], b_.m128i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = a_.u16[i] % b_.u16[i]; } #endif #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_rem_epu16 #define _mm256_rem_epu16(a, b) simde_mm256_rem_epu16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_rem_epu32 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_rem_epu32(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u32 = a_.u32 % b_.u32; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { r_.m128i[i] = simde_mm_rem_epu32(a_.m128i[i], b_.m128i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i] % b_.u32[i]; } #endif #endif return simde__m256i_from_private(r_); #endif } #define simde_mm256_urem_epi32(a, b) simde_mm256_rem_epu32(a, b) #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_rem_epu32 #define _mm256_rem_epu32(a, b) simde_mm256_rem_epu32(a, b) #undef _mm256_urem_epi32 #define _mm256_urem_epi32(a, b) simde_mm256_rem_epu32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_rem_epu64 (simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_rem_epu64(a, b); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u64 = a_.u64 % b_.u64; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { r_.m128i[i] = simde_mm_rem_epu64(a_.m128i[i], b_.m128i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = a_.u64[i] % b_.u64[i]; } #endif #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_rem_epu64 #define _mm256_rem_epu64(a, b) simde_mm256_rem_epu64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_rem_epi8 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_rem_epi8(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = a_.i8 % b_.i8; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_rem_epi8(a_.m256i[i], b_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a_.i8[i] % b_.i8[i]; } #endif #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_rem_epi8 #define _mm512_rem_epi8(a, b) simde_mm512_rem_epi8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_rem_epi16 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_rem_epi16(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = a_.i16 % b_.i16; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_rem_epi16(a_.m256i[i], b_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[i] % b_.i16[i]; } #endif #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_rem_epi16 #define _mm512_rem_epi16(a, b) simde_mm512_rem_epi16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_rem_epi32 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_rem_epi32(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = a_.i32 % b_.i32; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_rem_epi32(a_.m256i[i], b_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] % b_.i32[i]; } #endif #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_rem_epi32 #define _mm512_rem_epi32(a, b) simde_mm512_rem_epi32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_rem_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_rem_epi32(src, k, a, b); #else return simde_mm512_mask_mov_epi32(src, k, simde_mm512_rem_epi32(a, b)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_rem_epi32 #define _mm512_mask_rem_epi32(src, k, a, b) simde_mm512_mask_rem_epi32(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_rem_epi64 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_rem_epi64(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 % b_.i64; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_rem_epi64(a_.m256i[i], b_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[i] % b_.i64[i]; } #endif #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_rem_epi64 #define _mm512_rem_epi64(a, b) simde_mm512_rem_epi64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_rem_epu8 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_rem_epu8(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u8 = a_.u8 % b_.u8; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_rem_epu8(a_.m256i[i], b_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = a_.u8[i] % b_.u8[i]; } #endif #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_rem_epu8 #define _mm512_rem_epu8(a, b) simde_mm512_rem_epu8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_rem_epu16 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_rem_epu16(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u16 = a_.u16 % b_.u16; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_rem_epu16(a_.m256i[i], b_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = a_.u16[i] % b_.u16[i]; } #endif #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_rem_epu16 #define _mm512_rem_epu16(a, b) simde_mm512_rem_epu16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_rem_epu32 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_rem_epu32(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u32 = a_.u32 % b_.u32; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_rem_epu32(a_.m256i[i], b_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i] % b_.u32[i]; } #endif #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_rem_epu32 #define _mm512_rem_epu32(a, b) simde_mm512_rem_epu32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_rem_epu32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_rem_epu32(src, k, a, b); #else return simde_mm512_mask_mov_epi32(src, k, simde_mm512_rem_epu32(a, b)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_rem_epu32 #define _mm512_mask_rem_epu32(src, k, a, b) simde_mm512_mask_rem_epu32(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_rem_epu64 (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_rem_epu64(a, b); #else simde__m512i_private r_, a_ = simde__m512i_to_private(a), b_ = simde__m512i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u64 = a_.u64 % b_.u64; #else #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { r_.m256i[i] = simde_mm256_rem_epu64(a_.m256i[i], b_.m256i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = a_.u64[i] % b_.u64[i]; } #endif #endif return simde__m512i_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_rem_epu64 #define _mm512_rem_epu64(a, b) simde_mm512_rem_epu64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_recip_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_recip_ps(a); #else return simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), a); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_recip_ps #define _mm512_recip_ps(a) simde_mm512_recip_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_recip_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_recip_pd(a); #else return simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), a); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_recip_pd #define _mm512_recip_pd(a) simde_mm512_recip_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_recip_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_recip_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_recip_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_recip_ps #define _mm512_mask_recip_ps(src, k, a) simde_mm512_mask_recip_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_recip_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_recip_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_recip_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_recip_pd #define _mm512_mask_recip_pd(src, k, a) simde_mm512_mask_recip_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_rint_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_rint_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_rintf16(a); #else simde__m512_private r_, a_ = simde__m512_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_rintf(a_.f32[i]); } return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_rint_ps #define _mm512_rint_ps(a) simde_mm512_rint_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_rint_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_rint_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_rintd8(a); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_rint(a_.f64[i]); } return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_rint_pd #define _mm512_rint_pd(a) simde_mm512_rint_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_rint_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_rint_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_rint_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_rint_ps #define _mm512_mask_rint_ps(src, k, a) simde_mm512_mask_rint_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_rint_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_rint_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_rint_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_rint_pd #define _mm512_mask_rint_pd(src, k, a) simde_mm512_mask_rint_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_sin_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_sin_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_sinf4_u10(a); #else return Sleef_sinf4_u35(a); #endif #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_sinf(a_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_sin_ps #define _mm_sin_ps(a) simde_mm_sin_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_sin_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_sin_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_sind2_u10(a); #else return Sleef_sind2_u35(a); #endif #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_sin(a_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_sin_pd #define _mm_sin_pd(a) simde_mm_sin_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_sin_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_sin_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_sinf8_u10(a); #else return Sleef_sinf8_u35(a); #endif #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_sin_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_sinf(a_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_sin_ps #define _mm256_sin_ps(a) simde_mm256_sin_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_sin_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_sin_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_sind4_u10(a); #else return Sleef_sind4_u35(a); #endif #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_sin_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_sin(a_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_sin_pd #define _mm256_sin_pd(a) simde_mm256_sin_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_sin_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_sin_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_sinf16_u10(a); #else return Sleef_sinf16_u35(a); #endif #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_sin_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_sinf(a_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_sin_ps #define _mm512_sin_ps(a) simde_mm512_sin_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_sin_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_sin_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_sind8_u10(a); #else return Sleef_sind8_u35(a); #endif #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_sin_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_sin(a_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_sin_pd #define _mm512_sin_pd(a) simde_mm512_sin_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_sin_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_sin_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_sin_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_sin_ps #define _mm512_mask_sin_ps(src, k, a) simde_mm512_mask_sin_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_sin_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_sin_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_sin_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_sin_pd #define _mm512_mask_sin_pd(src, k, a) simde_mm512_mask_sin_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_sincos_ps (simde__m128* mem_addr, simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_sincos_ps(HEDLEY_REINTERPRET_CAST(__m128*, mem_addr), a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) Sleef___m128_2 temp; #if SIMDE_ACCURACY_PREFERENCE > 1 temp = Sleef_sincosf4_u10(a); #else temp = Sleef_sincosf4_u35(a); #endif *mem_addr = temp.y; return temp.x; #else simde__m128 r; r = simde_mm_sin_ps(a); *mem_addr = simde_mm_cos_ps(a); return r; #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_sincos_ps #define _mm_sincos_ps(mem_addr, a) simde_mm_sincos_ps((mem_addr),(a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_sincos_pd (simde__m128d* mem_addr, simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_sincos_pd(HEDLEY_REINTERPRET_CAST(__m128d*, mem_addr), a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) Sleef___m128d_2 temp; #if SIMDE_ACCURACY_PREFERENCE > 1 temp = Sleef_sincosd2_u10(a); #else temp = Sleef_sincosd2_u35(a); #endif *mem_addr = temp.y; return temp.x; #else simde__m128d r; r = simde_mm_sin_pd(a); *mem_addr = simde_mm_cos_pd(a); return r; #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_sincos_pd #define _mm_sincos_pd(mem_addr, a) simde_mm_sincos_pd((mem_addr),(a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_sincos_ps (simde__m256* mem_addr, simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_sincos_ps(HEDLEY_REINTERPRET_CAST(__m256*, mem_addr), a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) Sleef___m256_2 temp; #if SIMDE_ACCURACY_PREFERENCE > 1 temp = Sleef_sincosf8_u10(a); #else temp = Sleef_sincosf8_u35(a); #endif *mem_addr = temp.y; return temp.x; #else simde__m256 r; r = simde_mm256_sin_ps(a); *mem_addr = simde_mm256_cos_ps(a); return r; #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_sincos_ps #define _mm256_sincos_ps(mem_addr, a) simde_mm256_sincos_ps((mem_addr),(a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_sincos_pd (simde__m256d* mem_addr, simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_sincos_pd(HEDLEY_REINTERPRET_CAST(__m256d*, mem_addr), a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) Sleef___m256d_2 temp; #if SIMDE_ACCURACY_PREFERENCE > 1 temp = Sleef_sincosd4_u10(a); #else temp = Sleef_sincosd4_u35(a); #endif *mem_addr = temp.y; return temp.x; #else simde__m256d r; r = simde_mm256_sin_pd(a); *mem_addr = simde_mm256_cos_pd(a); return r; #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_sincos_pd #define _mm256_sincos_pd(mem_addr, a) simde_mm256_sincos_pd((mem_addr),(a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_sincos_ps (simde__m512* mem_addr, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_sincos_ps(HEDLEY_REINTERPRET_CAST(__m512*, mem_addr), a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) Sleef___m512_2 temp; #if SIMDE_ACCURACY_PREFERENCE > 1 temp = Sleef_sincosf16_u10(a); #else temp = Sleef_sincosf16_u35(a); #endif *mem_addr = temp.y; return temp.x; #else simde__m512 r; r = simde_mm512_sin_ps(a); *mem_addr = simde_mm512_cos_ps(a); return r; #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_sincos_ps #define _mm512_sincos_ps(mem_addr, a) simde_mm512_sincos_ps((mem_addr),(a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_sincos_pd (simde__m512d* mem_addr, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_sincos_pd(HEDLEY_REINTERPRET_CAST(__m512d*, mem_addr), a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) Sleef___m512d_2 temp; #if SIMDE_ACCURACY_PREFERENCE > 1 temp = Sleef_sincosd8_u10(a); #else temp = Sleef_sincosd8_u35(a); #endif *mem_addr = temp.y; return temp.x; #else simde__m512d r; r = simde_mm512_sin_pd(a); *mem_addr = simde_mm512_cos_pd(a); return r; #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_sincos_pd #define _mm512_sincos_pd(mem_addr, a) simde_mm512_sincos_pd((mem_addr),(a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_sincos_ps(simde__m512* mem_addr, simde__m512 sin_src, simde__m512 cos_src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_sincos_ps(mem_addr, sin_src, cos_src, k, a); #else simde__m512 cos_res, sin_res; sin_res = simde_mm512_sincos_ps(&cos_res, a); *mem_addr = simde_mm512_mask_mov_ps(cos_src, k, cos_res); return simde_mm512_mask_mov_ps(sin_src, k, sin_res); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_sincos_ps #define _mm512_mask_sincos_ps(mem_addr, sin_src, cos_src, k, a) simde_mm512_mask_sincos_ps(mem_addr, sin_src, cos_src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_sincos_pd(simde__m512d* mem_addr, simde__m512d sin_src, simde__m512d cos_src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_sincos_pd(mem_addr, sin_src, cos_src, k, a); #else simde__m512d cos_res, sin_res; sin_res = simde_mm512_sincos_pd(&cos_res, a); *mem_addr = simde_mm512_mask_mov_pd(cos_src, k, cos_res); return simde_mm512_mask_mov_pd(sin_src, k, sin_res); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_sincos_pd #define _mm512_mask_sincos_pd(mem_addr, sin_src, cos_src, k, a) simde_mm512_mask_sincos_pd(mem_addr, sin_src, cos_src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_sind_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_sind_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_sinf4_u10(simde_x_mm_deg2rad_ps(a)); #else return Sleef_sinf4_u35(simde_x_mm_deg2rad_ps(a)); #endif #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_sinf(simde_math_deg2radf(a_.f32[i])); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_sind_ps #define _mm_sind_ps(a) simde_mm_sind_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_sind_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_sind_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_sind2_u10(simde_x_mm_deg2rad_pd(a)); #else return Sleef_sind2_u35(simde_x_mm_deg2rad_pd(a)); #endif #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_sin(simde_math_deg2rad(a_.f64[i])); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_sind_pd #define _mm_sind_pd(a) simde_mm_sind_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_sind_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_sind_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_sinf8_u10(simde_x_mm256_deg2rad_ps(a)); #else return Sleef_sinf8_u35(simde_x_mm256_deg2rad_ps(a)); #endif #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_sind_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_sinf(simde_math_deg2radf(a_.f32[i])); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_sind_ps #define _mm256_sind_ps(a) simde_mm256_sind_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_sind_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_sind_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_sind4_u10(simde_x_mm256_deg2rad_pd(a)); #else return Sleef_sind4_u35(simde_x_mm256_deg2rad_pd(a)); #endif #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_sind_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_sin(simde_math_deg2rad(a_.f64[i])); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_sind_pd #define _mm256_sind_pd(a) simde_mm256_sind_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_sind_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_sind_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_sinf16_u10(simde_x_mm512_deg2rad_ps(a)); #else return Sleef_sinf16_u35(simde_x_mm512_deg2rad_ps(a)); #endif #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_sind_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_sinf(simde_math_deg2radf(a_.f32[i])); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_sind_ps #define _mm512_sind_ps(a) simde_mm512_sind_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_sind_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_sind_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_sind8_u10(simde_x_mm512_deg2rad_pd(a)); #else return Sleef_sind8_u35(simde_x_mm512_deg2rad_pd(a)); #endif #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_sind_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_sin(simde_math_deg2rad(a_.f64[i])); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_sind_pd #define _mm512_sind_pd(a) simde_mm512_sind_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_sind_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_sind_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_sind_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_sind_ps #define _mm512_mask_sind_ps(src, k, a) simde_mm512_mask_sind_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_sind_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_sind_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_sind_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_sind_pd #define _mm512_mask_sind_pd(src, k, a) simde_mm512_mask_sind_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_sinh_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_sinh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_sinhf4_u10(a); #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_sinhf(a_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_sinh_ps #define _mm_sinh_ps(a) simde_mm_sinh_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_sinh_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_sinh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_sinhd2_u10(a); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_sinh(a_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_sinh_pd #define _mm_sinh_pd(a) simde_mm_sinh_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_sinh_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_sinh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_sinhf8_u10(a); #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_sinh_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_sinhf(a_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_sinh_ps #define _mm256_sinh_ps(a) simde_mm256_sinh_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_sinh_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_sinh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_sinhd4_u10(a); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_sinh_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_sinh(a_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_sinh_pd #define _mm256_sinh_pd(a) simde_mm256_sinh_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_sinh_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_sinh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_sinhf16_u10(a); #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_sinh_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_sinhf(a_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_sinh_ps #define _mm512_sinh_ps(a) simde_mm512_sinh_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_sinh_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_sinh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_sinhd8_u10(a); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_sinh_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_sinh(a_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_sinh_pd #define _mm512_sinh_pd(a) simde_mm512_sinh_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_sinh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_sinh_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_sinh_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_sinh_ps #define _mm512_mask_sinh_ps(src, k, a) simde_mm512_mask_sinh_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_sinh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_sinh_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_sinh_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_sinh_pd #define _mm512_mask_sinh_pd(src, k, a) simde_mm512_mask_sinh_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_svml_ceil_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_svml_ceil_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_ceilf4(a); #else return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_svml_ceil_ps #define _mm_svml_ceil_ps(a) simde_mm_svml_ceil_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_svml_ceil_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_svml_ceil_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_ceild2(a); #else return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_svml_ceil_pd #define _mm_svml_ceil_pd(a) simde_mm_svml_ceil_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_svml_ceil_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_svml_ceil_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_ceilf8(a); #else return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_svml_ceil_ps #define _mm256_svml_ceil_ps(a) simde_mm256_svml_ceil_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_svml_ceil_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_svml_ceil_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_ceild4(a); #else return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_svml_ceil_pd #define _mm256_svml_ceil_pd(a) simde_mm256_svml_ceil_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_ceil_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_ceil_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_ceilf16(a); #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_ceil_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_ceilf(a_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_ceil_ps #define _mm512_ceil_ps(a) simde_mm512_ceil_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_ceil_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_ceil_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_ceild8(a); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_ceil_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_ceil(a_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_ceil_pd #define _mm512_ceil_pd(a) simde_mm512_ceil_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_ceil_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_ceil_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_ceil_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_ceil_ps #define _mm512_mask_ceil_ps(src, k, a) simde_mm512_mask_ceil_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_ceil_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_ceil_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_ceil_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_ceil_pd #define _mm512_mask_ceil_pd(src, k, a) simde_mm512_mask_ceil_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_svml_floor_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_svml_floor_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_floorf4(a); #else return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_svml_floor_ps #define _mm_svml_floor_ps(a) simde_mm_svml_floor_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_svml_floor_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_svml_floor_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_floord2(a); #else return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_svml_floor_pd #define _mm_svml_floor_pd(a) simde_mm_svml_floor_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_svml_floor_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_svml_floor_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_floorf8(a); #else return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_svml_floor_ps #define _mm256_svml_floor_ps(a) simde_mm256_svml_floor_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_svml_floor_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_svml_floor_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_floord4(a); #else return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_svml_floor_pd #define _mm256_svml_floor_pd(a) simde_mm256_svml_floor_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_floor_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_floor_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_floorf16(a); #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_floor_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_floorf(a_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_floor_ps #define _mm512_floor_ps(a) simde_mm512_floor_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_floor_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_floor_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_floord8(a); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_floor_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_floor(a_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_floor_pd #define _mm512_floor_pd(a) simde_mm512_floor_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_floor_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_floor_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_floor_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_floor_ps #define _mm512_mask_floor_ps(src, k, a) simde_mm512_mask_floor_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_floor_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_floor_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_floor_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_floor_pd #define _mm512_mask_floor_pd(src, k, a) simde_mm512_mask_floor_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_svml_round_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_svml_round_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_roundf4(a); #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_roundf(a_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_svml_round_ps #define _mm_svml_round_ps(a) simde_mm_svml_round_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_svml_round_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_svml_round_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_roundd2(a); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_round(a_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_svml_round_pd #define _mm_svml_round_pd(a) simde_mm_svml_round_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_svml_round_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_svml_round_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_roundf8(a); #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_svml_round_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_roundf(a_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_svml_round_ps #define _mm256_svml_round_ps(a) simde_mm256_svml_round_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_svml_round_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_svml_round_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_roundd4(a); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_svml_round_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_round(a_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_svml_round_pd #define _mm256_svml_round_pd(a) simde_mm256_svml_round_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_svml_round_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_svml_round_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_roundd8(a); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_svml_round_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_round(a_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_svml_round_pd #define _mm512_svml_round_pd(a) simde_mm512_svml_round_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_svml_round_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_svml_round_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_svml_round_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_svml_round_pd #define _mm512_mask_svml_round_pd(src, k, a) simde_mm512_mask_svml_round_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_svml_sqrt_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_svml_sqrt_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_sqrtf4(a); #else return simde_mm_sqrt_ps(a); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_svml_sqrt_ps #define _mm_svml_sqrt_ps(a) simde_mm_svml_sqrt_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_svml_sqrt_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_svml_sqrt_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_sqrtd2(a); #else return simde_mm_sqrt_pd(a); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_svml_sqrt_pd #define _mm_svml_sqrt_pd(a) simde_mm_svml_sqrt_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_svml_sqrt_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_svml_sqrt_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_sqrtf8(a); #else return simde_mm256_sqrt_ps(a); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_svml_sqrt_ps #define _mm256_svml_sqrt_ps(a) simde_mm256_svml_sqrt_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_svml_sqrt_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_svml_sqrt_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_sqrtd4(a); #else return simde_mm256_sqrt_pd(a); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_svml_sqrt_pd #define _mm256_svml_sqrt_pd(a) simde_mm256_svml_sqrt_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_svml_sqrt_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_svml_sqrt_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_sqrtf16(a); #else return simde_mm512_sqrt_ps(a); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_svml_sqrt_ps #define _mm512_svml_sqrt_ps(a) simde_mm512_svml_sqrt_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_svml_sqrt_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_svml_sqrt_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_sqrtd8(a); #else return simde_mm512_sqrt_pd(a); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_svml_sqrt_pd #define _mm512_svml_sqrt_pd(a) simde_mm512_svml_sqrt_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_tan_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_tan_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_tanf4_u10(a); #else return Sleef_tanf4_u35(a); #endif #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_tanf(a_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_tan_ps #define _mm_tan_ps(a) simde_mm_tan_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_tan_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_tan_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_tand2_u10(a); #else return Sleef_tand2_u35(a); #endif #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_tan(a_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_tan_pd #define _mm_tan_pd(a) simde_mm_tan_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_tan_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_tan_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_tanf8_u10(a); #else return Sleef_tanf8_u35(a); #endif #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_tan_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_tanf(a_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_tan_ps #define _mm256_tan_ps(a) simde_mm256_tan_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_tan_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_tan_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_tand4_u10(a); #else return Sleef_tand4_u35(a); #endif #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_tan_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_tan(a_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_tan_pd #define _mm256_tan_pd(a) simde_mm256_tan_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_tan_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_tan_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_tanf16_u10(a); #else return Sleef_tanf16_u35(a); #endif #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_tan_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_tanf(a_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_tan_ps #define _mm512_tan_ps(a) simde_mm512_tan_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_tan_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_tan_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_tand8_u10(a); #else return Sleef_tand8_u35(a); #endif #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_tan_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_tan(a_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_tan_pd #define _mm512_tan_pd(a) simde_mm512_tan_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_tan_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_tan_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_tan_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_tan_ps #define _mm512_mask_tan_ps(src, k, a) simde_mm512_mask_tan_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_tan_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_tan_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_tan_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_tan_pd #define _mm512_mask_tan_pd(src, k, a) simde_mm512_mask_tan_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_tand_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_tand_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_tanf4_u10(simde_x_mm_deg2rad_ps(a)); #else return Sleef_tanf4_u35(simde_x_mm_deg2rad_ps(a)); #endif #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_tanf(simde_math_deg2radf(a_.f32[i])); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_tand_ps #define _mm_tand_ps(a) simde_mm_tand_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_tand_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_tand_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_tand2_u10(simde_x_mm_deg2rad_pd(a)); #else return Sleef_tand2_u35(simde_x_mm_deg2rad_pd(a)); #endif #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_tan(simde_math_deg2rad(a_.f64[i])); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_tand_pd #define _mm_tand_pd(a) simde_mm_tand_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_tand_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_tand_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_tanf8_u10(simde_x_mm256_deg2rad_ps(a)); #else return Sleef_tanf8_u35(simde_x_mm256_deg2rad_ps(a)); #endif #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_tand_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_tanf(simde_math_deg2radf(a_.f32[i])); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_tand_ps #define _mm256_tand_ps(a) simde_mm256_tand_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_tand_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_tand_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_tand4_u10(simde_x_mm256_deg2rad_pd(a)); #else return Sleef_tand4_u35(simde_x_mm256_deg2rad_pd(a)); #endif #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_tand_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_tan(simde_math_deg2rad(a_.f64[i])); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_tand_pd #define _mm256_tand_pd(a) simde_mm256_tand_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_tand_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_tand_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_tanf16_u10(simde_x_mm512_deg2rad_ps(a)); #else return Sleef_tanf16_u35(simde_x_mm512_deg2rad_ps(a)); #endif #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_tand_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_tanf(simde_math_deg2radf(a_.f32[i])); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_tand_ps #define _mm512_tand_ps(a) simde_mm512_tand_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_tand_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_tand_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_tand8_u10(simde_x_mm512_deg2rad_pd(a)); #else return Sleef_tand8_u35(simde_x_mm512_deg2rad_pd(a)); #endif #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_tand_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_tan(simde_math_deg2rad(a_.f64[i])); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_tand_pd #define _mm512_tand_pd(a) simde_mm512_tand_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_tand_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_tand_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_tand_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_tand_ps #define _mm512_mask_tand_ps(src, k, a) simde_mm512_mask_tand_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_tand_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_tand_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_tand_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_tand_pd #define _mm512_mask_tand_pd(src, k, a) simde_mm512_mask_tand_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_tanh_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_tanh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_tanhf4_u10(a); #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_tanhf(a_.f32[i]); } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_tanh_ps #define _mm_tanh_ps(a) simde_mm_tanh_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_tanh_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_tanh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_tanhd2_u10(a); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_tanh(a_.f64[i]); } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_tanh_pd #define _mm_tanh_pd(a) simde_mm_tanh_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_tanh_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_tanh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_tanhf8_u10(a); #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_tanh_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_tanhf(a_.f32[i]); } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_tanh_ps #define _mm256_tanh_ps(a) simde_mm256_tanh_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_tanh_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_tanh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_tanhd4_u10(a); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_tanh_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_tanh(a_.f64[i]); } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_tanh_pd #define _mm256_tanh_pd(a) simde_mm256_tanh_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_tanh_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_tanh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_tanhf16_u10(a); #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_tanh_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_tanhf(a_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_tanh_ps #define _mm512_tanh_ps(a) simde_mm512_tanh_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_tanh_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_tanh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_tanhd8_u10(a); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_tanh_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_tanh(a_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_tanh_pd #define _mm512_tanh_pd(a) simde_mm512_tanh_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_tanh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_tanh_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_tanh_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_tanh_ps #define _mm512_mask_tanh_ps(src, k, a) simde_mm512_mask_tanh_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_tanh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_tanh_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_tanh_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_tanh_pd #define _mm512_mask_tanh_pd(src, k, a) simde_mm512_mask_tanh_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_trunc_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_trunc_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_truncf4(a); #else return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_ZERO); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_trunc_ps #define _mm_trunc_ps(a) simde_mm_trunc_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_trunc_pd (simde__m128d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_trunc_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) return Sleef_truncd2(a); #else return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_ZERO); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_trunc_pd #define _mm_trunc_pd(a) simde_mm_trunc_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_trunc_ps (simde__m256 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_trunc_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_truncf8(a); #else return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_ZERO); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_trunc_ps #define _mm256_trunc_ps(a) simde_mm256_trunc_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_trunc_pd (simde__m256d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_trunc_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) return Sleef_truncd4(a); #else return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_ZERO); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_trunc_pd #define _mm256_trunc_pd(a) simde_mm256_trunc_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_trunc_ps (simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_trunc_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_truncf16(a); #else simde__m512_private r_, a_ = simde__m512_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { r_.m256[i] = simde_mm256_trunc_ps(a_.m256[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_truncf(a_.f32[i]); } #endif return simde__m512_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_trunc_ps #define _mm512_trunc_ps(a) simde_mm512_trunc_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_trunc_pd (simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_trunc_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) return Sleef_truncd8(a); #else simde__m512d_private r_, a_ = simde__m512d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { r_.m256d[i] = simde_mm256_trunc_pd(a_.m256d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_trunc(a_.f64[i]); } #endif return simde__m512d_from_private(r_); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_trunc_pd #define _mm512_trunc_pd(a) simde_mm512_trunc_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_trunc_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_trunc_ps(src, k, a); #else return simde_mm512_mask_mov_ps(src, k, simde_mm512_trunc_ps(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_trunc_ps #define _mm512_mask_trunc_ps(src, k, a) simde_mm512_mask_trunc_ps(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_trunc_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_trunc_pd(src, k, a); #else return simde_mm512_mask_mov_pd(src, k, simde_mm512_trunc_pd(a)); #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_trunc_pd #define _mm512_mask_trunc_pd(src, k, a) simde_mm512_mask_trunc_pd(src, k, a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_udivrem_epi32 (simde__m128i * mem_addr, simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) return _mm_udivrem_epi32(mem_addr, a, b); #else simde__m128i r; r = simde_mm_div_epu32(a, b); *mem_addr = simde_x_mm_sub_epu32(a, simde_x_mm_mullo_epu32(r, b)); return r; #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm_udivrem_epi32 #define _mm_udivrem_epi32(mem_addr, a, b) simde_mm_udivrem_epi32((mem_addr),(a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_udivrem_epi32 (simde__m256i* mem_addr, simde__m256i a, simde__m256i b) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_udivrem_epi32(HEDLEY_REINTERPRET_CAST(__m256i*, mem_addr), a, b); #else simde__m256i r; r = simde_mm256_div_epu32(a, b); *mem_addr = simde_x_mm256_sub_epu32(a, simde_x_mm256_mullo_epu32(r, b)); return r; #endif } #if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) #undef _mm256_udivrem_epi32 #define _mm256_udivrem_epi32(mem_addr, a, b) simde_mm256_udivrem_epi32((mem_addr),(a), (b)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_SVML_H) */ simde-0.7.2/simde/x86/xop.h000066400000000000000000003400711400333146700153340ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #if !defined(SIMDE_X86_XOP_H) #define SIMDE_X86_XOP_H #include "avx2.h" #if !defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) # define SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES #endif HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cmov_si128 (simde__m128i a, simde__m128i b, simde__m128i c) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_cmov_si128(a, b, c); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_ternarylogic_epi32(a, b, c, 0xe4); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_or_si128(_mm_and_si128(c, a), _mm_andnot_si128(c, b)); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b), c_ = simde__m128i_to_private(c); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vbslq_s8(c_.neon_u8, a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, c_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_i32 = vec_sel(b_.altivec_i32, a_.altivec_i32, c_.altivec_u32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = (c_.i32f & a_.i32f) | (~c_.i32f & b_.i32f); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = (c_.i32f[i] & a_.i32f[i]) | (~c_.i32f[i] & b_.i32f[i]); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_cmov_si128(a, b, c) simde_mm_cmov_si128((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256i simde_mm256_cmov_si256 (simde__m256i a, simde__m256i b, simde__m256i c) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_98521) return _mm256_cmov_si256(a, b, c); #elif defined(SIMDE_X86_AVX512VL_NATIVE) return _mm256_ternarylogic_epi32(a, b, c, 0xe4); #elif defined(SIMDE_X86_AVX2_NATIVE) return _mm256_or_si256(_mm256_and_si256(c, a), _mm256_andnot_si256(c, b)); #else simde__m256i_private r_, a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b), c_ = simde__m256i_to_private(c); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { r_.m128i[i] = simde_mm_cmov_si128(a_.m128i[i], b_.m128i[i], c_.m128i[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = (c_.i32f[i] & a_.i32f[i]) | (~c_.i32f[i] & b_.i32f[i]); } #endif return simde__m256i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm256_cmov_si256(a, b, c) simde_mm256_cmov_si256((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comeq_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ) return _mm_com_epi8(a, b, _MM_PCOMCTRL_EQ); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comeq_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vceqq_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 == b_.i8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comeq_epi8(a, b) simde_mm_comeq_epi8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comeq_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ) return _mm_com_epi16(a, b, _MM_PCOMCTRL_EQ); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comeq_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vceqq_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 == b_.i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comeq_epi16(a, b) simde_mm_comeq_epi16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comeq_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ) return _mm_com_epi32(a, b, _MM_PCOMCTRL_EQ); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comeq_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vceqq_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comeq_epi32(a, b) simde_mm_comeq_epi32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comeq_epi64 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ) return _mm_com_epi64(a, b, _MM_PCOMCTRL_EQ); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comeq_epi64(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_u64 = vceqq_s64(a_.neon_i64, b_.neon_i64); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = (a_.i64[i] == b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comeq_epi64(a, b) simde_mm_comeq_epi64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comeq_epu8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ) return _mm_com_epu8(a, b, _MM_PCOMCTRL_EQ); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comeq_epu8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vceqq_u8(a_.neon_u8, b_.neon_u8); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 == b_.u8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = (a_.u8[i] == b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comeq_epu8(a, b) simde_mm_comeq_epu8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comeq_epu16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ) return _mm_com_epu16(a, b, _MM_PCOMCTRL_EQ); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comeq_epu16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vceqq_u16(a_.neon_u16, b_.neon_u16); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 == b_.u16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = (a_.u16[i] == b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comeq_epu16(a, b) simde_mm_comeq_epu16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comeq_epu32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ) return _mm_com_epu32(a, b, _MM_PCOMCTRL_EQ); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comeq_epu32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vceqq_u32(a_.neon_u32, b_.neon_u32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 == b_.u32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = (a_.u32[i] == b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comeq_epu32(a, b) simde_mm_comeq_epu32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comeq_epu64 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ) return _mm_com_epu64(a, b, _MM_PCOMCTRL_EQ); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comeq_epu64(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_u64 = vceqq_u64(a_.neon_u64, b_.neon_u64); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 == b_.u64); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comeq_epu64(a, b) simde_mm_comeq_epu64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comge_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE) return _mm_com_epi8(a, b, _MM_PCOMCTRL_GE); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comge_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vcgeq_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 >= b_.i8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = (a_.i8[i] >= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comge_epi8(a, b) simde_mm_comge_epi8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comge_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE) return _mm_com_epi16(a, b, _MM_PCOMCTRL_GE); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comge_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vcgeq_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 >= b_.i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] >= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comge_epi16(a, b) simde_mm_comge_epi16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comge_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE) return _mm_com_epi32(a, b, _MM_PCOMCTRL_GE); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comge_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vcgeq_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 >= b_.i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = (a_.i32[i] >= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comge_epi32(a, b) simde_mm_comge_epi32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comge_epi64 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE) return _mm_com_epi64(a, b, _MM_PCOMCTRL_GE); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comge_epi64(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_u64 = vcgeq_s64(a_.neon_i64, b_.neon_i64); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 >= b_.i64); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = (a_.i64[i] >= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comge_epi64(a, b) simde_mm_comge_epi64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comge_epu8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE) return _mm_com_epu8(a, b, _MM_PCOMCTRL_GE); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comge_epu8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vcgeq_u8(a_.neon_u8, b_.neon_u8); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 >= b_.u8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = (a_.u8[i] >= b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comge_epu8(a, b) simde_mm_comge_epu8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comge_epu16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE) return _mm_com_epu16(a, b, _MM_PCOMCTRL_GE); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comge_epu16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vcgeq_u16(a_.neon_u16, b_.neon_u16); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 >= b_.u16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = (a_.u16[i] >= b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comge_epu16(a, b) simde_mm_comge_epu16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comge_epu32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE) return _mm_com_epu32(a, b, _MM_PCOMCTRL_GE); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comge_epu32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vcgeq_u32(a_.neon_u32, b_.neon_u32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 >= b_.u32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = (a_.u32[i] >= b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comge_epu32(a, b) simde_mm_comge_epu32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comge_epu64 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE) return _mm_com_epu64(a, b, _MM_PCOMCTRL_GE); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comge_epu64(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_u64 = vcgeq_u64(a_.neon_u64, b_.neon_u64); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 >= b_.u64); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = (a_.u64[i] >= b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comge_epu64(a, b) simde_mm_comge_epu64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comgt_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT) return _mm_com_epi8(a, b, _MM_PCOMCTRL_GT); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comgt_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 > b_.i8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comgt_epi8(a, b) simde_mm_comgt_epi8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comgt_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT) return _mm_com_epi16(a, b, _MM_PCOMCTRL_GT); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comgt_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 > b_.i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comgt_epi16(a, b) simde_mm_comgt_epi16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comgt_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT) return _mm_com_epi32(a, b, _MM_PCOMCTRL_GT); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comgt_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 > b_.i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comgt_epi32(a, b) simde_mm_comgt_epi32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comgt_epi64 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT) return _mm_com_epi64(a, b, _MM_PCOMCTRL_GT); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comgt_epi64(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_u64 = vcgtq_s64(a_.neon_i64, b_.neon_i64); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comgt_epi64(a, b) simde_mm_comgt_epi64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comgt_epu8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT) return _mm_com_epu8(a, b, _MM_PCOMCTRL_GT); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comgt_epu8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vcgtq_u8(a_.neon_u8, b_.neon_u8); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 > b_.u8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comgt_epu8(a, b) simde_mm_comgt_epu8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comgt_epu16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT) return _mm_com_epu16(a, b, _MM_PCOMCTRL_GT); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comgt_epu16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vcgtq_u16(a_.neon_u16, b_.neon_u16); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 > b_.u16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = (a_.u16[i] > b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comgt_epu16(a, b) simde_mm_comgt_epu16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comgt_epu32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT) return _mm_com_epu32(a, b, _MM_PCOMCTRL_GT); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comgt_epu32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vcgtq_u32(a_.neon_u32, b_.neon_u32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 > b_.u32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = (a_.u32[i] > b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comgt_epu32(a, b) simde_mm_comgt_epu32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comgt_epu64 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT) return _mm_com_epu64(a, b, _MM_PCOMCTRL_GT); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comgt_epu64(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_u64 = vcgtq_u64(a_.neon_u64, b_.neon_u64); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 > b_.u64); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = (a_.u64[i] > b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comgt_epu64(a, b) simde_mm_comgt_epu64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comle_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE) return _mm_com_epi8(a, b, _MM_PCOMCTRL_LE); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comle_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vcleq_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 <= b_.i8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = (a_.i8[i] <= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comle_epi8(a, b) simde_mm_comle_epi8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comle_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE) return _mm_com_epi16(a, b, _MM_PCOMCTRL_LE); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comle_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 <= b_.i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] <= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comle_epi16(a, b) simde_mm_comle_epi16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comle_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE) return _mm_com_epi32(a, b, _MM_PCOMCTRL_LE); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comle_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vcleq_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 <= b_.i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = (a_.i32[i] <= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comle_epi32(a, b) simde_mm_comle_epi32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comle_epi64 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE) return _mm_com_epi64(a, b, _MM_PCOMCTRL_LE); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comle_epi64(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_u64 = vcleq_s64(a_.neon_i64, b_.neon_i64); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 <= b_.i64); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = (a_.i64[i] <= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comle_epi64(a, b) simde_mm_comle_epi64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comle_epu8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE) return _mm_com_epu8(a, b, _MM_PCOMCTRL_LE); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comle_epu8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vcleq_u8(a_.neon_u8, b_.neon_u8); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 <= b_.u8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = (a_.u8[i] <= b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comle_epu8(a, b) simde_mm_comle_epu8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comle_epu16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE) return _mm_com_epu16(a, b, _MM_PCOMCTRL_LE); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comle_epu16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vcleq_u16(a_.neon_u16, b_.neon_u16); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 <= b_.u16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = (a_.u16[i] <= b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comle_epu16(a, b) simde_mm_comle_epu16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comle_epu32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE) return _mm_com_epu32(a, b, _MM_PCOMCTRL_LE); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comle_epu32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vcleq_u32(a_.neon_u32, b_.neon_u32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 <= b_.u32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = (a_.u32[i] <= b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comle_epu32(a, b) simde_mm_comle_epu32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comle_epu64 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE) return _mm_com_epu64(a, b, _MM_PCOMCTRL_LE); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comle_epu64(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_u64 = vcleq_u64(a_.neon_u64, b_.neon_u64); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 <= b_.u64); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = (a_.u64[i] <= b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comle_epu64(a, b) simde_mm_comle_epu64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comlt_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT) return _mm_com_epi8(a, b, _MM_PCOMCTRL_LT); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comlt_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 < b_.i8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comlt_epi8(a, b) simde_mm_comlt_epi8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comlt_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT) return _mm_com_epi16(a, b, _MM_PCOMCTRL_LT); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comlt_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 < b_.i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comlt_epi16(a, b) simde_mm_comlt_epi16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comlt_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT) return _mm_com_epi32(a, b, _MM_PCOMCTRL_LT); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comlt_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 < b_.i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comlt_epi32(a, b) simde_mm_comlt_epi32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comlt_epi64 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT) return _mm_com_epi64(a, b, _MM_PCOMCTRL_LT); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comlt_epi64(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_u64 = vcltq_s64(a_.neon_i64, b_.neon_i64); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 < b_.i64); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = (a_.i64[i] < b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comlt_epi64(a, b) simde_mm_comlt_epi64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comlt_epu8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT) return _mm_com_epu8(a, b, _MM_PCOMCTRL_LT); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comlt_epu8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vcltq_u8(a_.neon_u8, b_.neon_u8); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 < b_.u8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comlt_epu8(a, b) simde_mm_comlt_epu8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comlt_epu16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT) return _mm_com_epu16(a, b, _MM_PCOMCTRL_LT); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comlt_epu16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vcltq_u16(a_.neon_u16, b_.neon_u16); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 < b_.u16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comlt_epu16(a, b) simde_mm_comlt_epu16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comlt_epu32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT) return _mm_com_epu32(a, b, _MM_PCOMCTRL_LT); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comlt_epu32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vcltq_u32(a_.neon_u32, b_.neon_u32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 < b_.u32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comlt_epu32(a, b) simde_mm_comlt_epu32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comlt_epu64 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT) return _mm_com_epu64(a, b, _MM_PCOMCTRL_LT); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comlt_epu64(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_u64 = vcltq_u64(a_.neon_u64, b_.neon_u64); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 < b_.u64); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = (a_.u64[i] < b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comlt_epu64(a, b) simde_mm_comlt_epu64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comneq_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ) return _mm_com_epi8(a, b, _MM_PCOMCTRL_NEQ); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comneq_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vmvnq_u8(vceqq_s8(a_.neon_i8, b_.neon_i8)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 != b_.i8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = (a_.i8[i] != b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comneq_epi8(a, b) simde_mm_comneq_epi8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comneq_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ) return _mm_com_epi16(a, b, _MM_PCOMCTRL_NEQ); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comneq_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vmvnq_u16(vceqq_s16(a_.neon_i16, b_.neon_i16)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 != b_.i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] != b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comneq_epi16(a, b) simde_mm_comneq_epi16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comneq_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ) return _mm_com_epi32(a, b, _MM_PCOMCTRL_NEQ); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comneq_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vmvnq_u32(vceqq_s32(a_.neon_i32, b_.neon_i32)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 != b_.i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = (a_.i32[i] != b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comneq_epi32(a, b) simde_mm_comneq_epi32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comneq_epi64 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ) return _mm_com_epi64(a, b, _MM_PCOMCTRL_NEQ); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comneq_epi64(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_s64(a_.neon_i64, b_.neon_i64))); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 != b_.i64); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = (a_.i64[i] != b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comneq_epi64(a, b) simde_mm_comneq_epi64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comneq_epu8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ) return _mm_com_epu8(a, b, _MM_PCOMCTRL_NEQ); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comneq_epu8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vmvnq_u8(vceqq_u8(a_.neon_u8, b_.neon_u8)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 != b_.u8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = (a_.u8[i] != b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comneq_epu8(a, b) simde_mm_comneq_epu8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comneq_epu16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ) return _mm_com_epu16(a, b, _MM_PCOMCTRL_NEQ); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comneq_epu16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vmvnq_u16(vceqq_u16(a_.neon_u16, b_.neon_u16)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 != b_.u16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = (a_.u16[i] != b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comneq_epu16(a, b) simde_mm_comneq_epu16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comneq_epu32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ) return _mm_com_epu32(a, b, _MM_PCOMCTRL_NEQ); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comneq_epu32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vmvnq_u32(vceqq_u32(a_.neon_u32, b_.neon_u32)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 != b_.u32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = (a_.u32[i] != b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comneq_epu32(a, b) simde_mm_comneq_epu32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comneq_epu64 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ) return _mm_com_epu64(a, b, _MM_PCOMCTRL_NEQ); #elif defined(SIMDE_X86_XOP_NATIVE) return _mm_comneq_epu64(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_u64(a_.neon_u64, b_.neon_u64))); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 != b_.u64); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = (a_.u64[i] != b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comneq_epu64(a, b) simde_mm_comneq_epu64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comfalse_epi8 (simde__m128i a, simde__m128i b) { (void) a; (void) b; return simde_mm_setzero_si128(); } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comfalse_epi8(a, b) simde_mm_comfalse_epi8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comfalse_epi16 (simde__m128i a, simde__m128i b) { (void) a; (void) b; return simde_mm_setzero_si128(); } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comfalse_epi16(a, b) simde_mm_comfalse_epi16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comfalse_epi32 (simde__m128i a, simde__m128i b) { (void) a; (void) b; return simde_mm_setzero_si128(); } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comfalse_epi32(a, b) simde_mm_comfalse_epi32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comfalse_epi64 (simde__m128i a, simde__m128i b) { (void) a; (void) b; return simde_mm_setzero_si128(); } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comfalse_epi64(a, b) simde_mm_comfalse_epi64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comfalse_epu8 (simde__m128i a, simde__m128i b) { (void) a; (void) b; return simde_mm_setzero_si128(); } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comfalse_epu8(a, b) simde_mm_comfalse_epu8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comfalse_epu16 (simde__m128i a, simde__m128i b) { (void) a; (void) b; return simde_mm_setzero_si128(); } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comfalse_epu16(a, b) simde_mm_comfalse_epu16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comfalse_epu32 (simde__m128i a, simde__m128i b) { (void) a; (void) b; return simde_mm_setzero_si128(); } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comfalse_epu32(a, b) simde_mm_comfalse_epu32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comfalse_epu64 (simde__m128i a, simde__m128i b) { (void) a; (void) b; return simde_mm_setzero_si128(); } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comfalse_epu64(a, b) simde_mm_comfalse_epu64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comtrue_epi8 (simde__m128i a, simde__m128i b) { (void) a; (void) b; return simde_x_mm_setone_si128(); } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comtrue_epi8(a, b) simde_mm_comtrue_epi8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comtrue_epi16 (simde__m128i a, simde__m128i b) { (void) a; (void) b; return simde_x_mm_setone_si128(); } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comtrue_epi16(a, b) simde_mm_comtrue_epi16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comtrue_epi32 (simde__m128i a, simde__m128i b) { (void) a; (void) b; return simde_x_mm_setone_si128(); } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comtrue_epi32(a, b) simde_mm_comtrue_epi32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comtrue_epi64 (simde__m128i a, simde__m128i b) { (void) a; (void) b; return simde_x_mm_setone_si128(); } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comtrue_epi64(a, b) simde_mm_comtrue_epi64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comtrue_epu8 (simde__m128i a, simde__m128i b) { (void) a; (void) b; return simde_x_mm_setone_si128(); } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comtrue_epu8(a, b) simde_mm_comtrue_epu8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comtrue_epu16 (simde__m128i a, simde__m128i b) { (void) a; (void) b; return simde_x_mm_setone_si128(); } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comtrue_epu16(a, b) simde_mm_comtrue_epu16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comtrue_epu32 (simde__m128i a, simde__m128i b) { (void) a; (void) b; return simde_x_mm_setone_si128(); } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comtrue_epu32(a, b) simde_mm_comtrue_epu32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_comtrue_epu64 (simde__m128i a, simde__m128i b) { (void) a; (void) b; return simde_x_mm_setone_si128(); } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_comtrue_epu64(a, b) simde_mm_comtrue_epu64((a), (b)) #endif #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT) #define SIMDE_X86_XOP_HAVE_COM_ 1 #define SIMDE_MM_PCOMCTRL_LT _MM_PCOMCTRL_LT #define SIMDE_MM_PCOMCTRL_LE _MM_PCOMCTRL_LE #define SIMDE_MM_PCOMCTRL_GT _MM_PCOMCTRL_GT #define SIMDE_MM_PCOMCTRL_GE _MM_PCOMCTRL_GE #define SIMDE_MM_PCOMCTRL_EQ _MM_PCOMCTRL_EQ #define SIMDE_MM_PCOMCTRL_NEQ _MM_PCOMCTRL_NEQ #define SIMDE_MM_PCOMCTRL_FALSE _MM_PCOMCTRL_FALSE #define SIMDE_MM_PCOMCTRL_TRUE _MM_PCOMCTRL_TRUE #else #define SIMDE_MM_PCOMCTRL_LT 0 #define SIMDE_MM_PCOMCTRL_LE 1 #define SIMDE_MM_PCOMCTRL_GT 2 #define SIMDE_MM_PCOMCTRL_GE 3 #define SIMDE_MM_PCOMCTRL_EQ 4 #define SIMDE_MM_PCOMCTRL_NEQ 5 #define SIMDE_MM_PCOMCTRL_FALSE 6 #define SIMDE_MM_PCOMCTRL_TRUE 7 #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _MM_PCOMCTRL_LT SIMDE_MM_PCOMCTRL_LT #define _MM_PCOMCTRL_LE SIMDE_MM_PCOMCTRL_LE #define _MM_PCOMCTRL_GT SIMDE_MM_PCOMCTRL_GT #define _MM_PCOMCTRL_GE SIMDE_MM_PCOMCTRL_GE #define _MM_PCOMCTRL_EQ SIMDE_MM_PCOMCTRL_EQ #define _MM_PCOMCTRL_NEQ SIMDE_MM_PCOMCTRL_NEQ #define _MM_PCOMCTRL_FALSE SIMDE_MM_PCOMCTRL_FALSE #define _MM_PCOMCTRL_TRUE SIMDE_MM_PCOMCTRL_TRUE #endif #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_com_epi8 (simde__m128i a, simde__m128i b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { switch (imm8) { case SIMDE_MM_PCOMCTRL_LT: return simde_mm_comlt_epi8(a, b); case SIMDE_MM_PCOMCTRL_LE: return simde_mm_comle_epi8(a, b); case SIMDE_MM_PCOMCTRL_GT: return simde_mm_comgt_epi8(a, b); case SIMDE_MM_PCOMCTRL_GE: return simde_mm_comge_epi8(a, b); case SIMDE_MM_PCOMCTRL_EQ: return simde_mm_comeq_epi8(a, b); case SIMDE_MM_PCOMCTRL_NEQ: return simde_mm_comneq_epi8(a, b); case SIMDE_MM_PCOMCTRL_FALSE: return simde_mm_comfalse_epi8(a, b); case SIMDE_MM_PCOMCTRL_TRUE: return simde_mm_comtrue_epi8(a, b); default: HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128()); } } #if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_) #define simde_mm_com_epi8(a, b, imm8) _mm_com_epi8((a), (b), (imm8)) #endif #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_com_epi8(a, b, imm8) simde_mm_com_epi8((a), (b), (imm8)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_com_epi16 (simde__m128i a, simde__m128i b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { switch (imm8) { case SIMDE_MM_PCOMCTRL_LT: return simde_mm_comlt_epi16(a, b); case SIMDE_MM_PCOMCTRL_LE: return simde_mm_comle_epi16(a, b); case SIMDE_MM_PCOMCTRL_GT: return simde_mm_comgt_epi16(a, b); case SIMDE_MM_PCOMCTRL_GE: return simde_mm_comge_epi16(a, b); case SIMDE_MM_PCOMCTRL_EQ: return simde_mm_comeq_epi16(a, b); case SIMDE_MM_PCOMCTRL_NEQ: return simde_mm_comneq_epi16(a, b); case SIMDE_MM_PCOMCTRL_FALSE: return simde_mm_comfalse_epi16(a, b); case SIMDE_MM_PCOMCTRL_TRUE: return simde_mm_comtrue_epi16(a, b); default: HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128()); } } #if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_) #define simde_mm_com_epi16(a, b, imm8) _mm_com_epi16((a), (b), (imm8)) #endif #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_com_epi16(a, b, imm8) simde_mm_com_epi16((a), (b), (imm8)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_com_epi32 (simde__m128i a, simde__m128i b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { switch (imm8) { case SIMDE_MM_PCOMCTRL_LT: return simde_mm_comlt_epi32(a, b); case SIMDE_MM_PCOMCTRL_LE: return simde_mm_comle_epi32(a, b); case SIMDE_MM_PCOMCTRL_GT: return simde_mm_comgt_epi32(a, b); case SIMDE_MM_PCOMCTRL_GE: return simde_mm_comge_epi32(a, b); case SIMDE_MM_PCOMCTRL_EQ: return simde_mm_comeq_epi32(a, b); case SIMDE_MM_PCOMCTRL_NEQ: return simde_mm_comneq_epi32(a, b); case SIMDE_MM_PCOMCTRL_FALSE: return simde_mm_comfalse_epi32(a, b); case SIMDE_MM_PCOMCTRL_TRUE: return simde_mm_comtrue_epi32(a, b); default: HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128()); } } #if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_) #define simde_mm_com_epi32(a, b, imm8) _mm_com_epi32((a), (b), (imm8)) #endif #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_com_epi32(a, b, imm8) simde_mm_com_epi32((a), (b), (imm8)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_com_epi64 (simde__m128i a, simde__m128i b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { switch (imm8) { case SIMDE_MM_PCOMCTRL_LT: return simde_mm_comlt_epi64(a, b); case SIMDE_MM_PCOMCTRL_LE: return simde_mm_comle_epi64(a, b); case SIMDE_MM_PCOMCTRL_GT: return simde_mm_comgt_epi64(a, b); case SIMDE_MM_PCOMCTRL_GE: return simde_mm_comge_epi64(a, b); case SIMDE_MM_PCOMCTRL_EQ: return simde_mm_comeq_epi64(a, b); case SIMDE_MM_PCOMCTRL_NEQ: return simde_mm_comneq_epi64(a, b); case SIMDE_MM_PCOMCTRL_FALSE: return simde_mm_comfalse_epi64(a, b); case SIMDE_MM_PCOMCTRL_TRUE: return simde_mm_comtrue_epi64(a, b); default: HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128()); } } #if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_) #define simde_mm_com_epi64(a, b, imm8) _mm_com_epi64((a), (b), (imm8)) #endif #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_com_epi64(a, b, imm8) simde_mm_com_epi64((a), (b), (imm8)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_com_epu8 (simde__m128i a, simde__m128i b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { switch (imm8) { case SIMDE_MM_PCOMCTRL_LT: return simde_mm_comlt_epu8(a, b); case SIMDE_MM_PCOMCTRL_LE: return simde_mm_comle_epu8(a, b); case SIMDE_MM_PCOMCTRL_GT: return simde_mm_comgt_epu8(a, b); case SIMDE_MM_PCOMCTRL_GE: return simde_mm_comge_epu8(a, b); case SIMDE_MM_PCOMCTRL_EQ: return simde_mm_comeq_epu8(a, b); case SIMDE_MM_PCOMCTRL_NEQ: return simde_mm_comneq_epu8(a, b); case SIMDE_MM_PCOMCTRL_FALSE: return simde_mm_comfalse_epu8(a, b); case SIMDE_MM_PCOMCTRL_TRUE: return simde_mm_comtrue_epu8(a, b); default: HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128()); } } #if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_) #define simde_mm_com_epu8(a, b, imm8) _mm_com_epu8((a), (b), (imm8)) #endif #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_com_epu8(a, b, imm8) simde_mm_com_epu8((a), (b), (imm8)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_com_epu16 (simde__m128i a, simde__m128i b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { switch (imm8) { case SIMDE_MM_PCOMCTRL_LT: return simde_mm_comlt_epu16(a, b); case SIMDE_MM_PCOMCTRL_LE: return simde_mm_comle_epu16(a, b); case SIMDE_MM_PCOMCTRL_GT: return simde_mm_comgt_epu16(a, b); case SIMDE_MM_PCOMCTRL_GE: return simde_mm_comge_epu16(a, b); case SIMDE_MM_PCOMCTRL_EQ: return simde_mm_comeq_epu16(a, b); case SIMDE_MM_PCOMCTRL_NEQ: return simde_mm_comneq_epu16(a, b); case SIMDE_MM_PCOMCTRL_FALSE: return simde_mm_comfalse_epu16(a, b); case SIMDE_MM_PCOMCTRL_TRUE: return simde_mm_comtrue_epu16(a, b); default: HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128()); } } #if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_) #define simde_mm_com_epu16(a, b, imm8) _mm_com_epu16((a), (b), (imm8)) #endif #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_com_epu16(a, b, imm8) simde_mm_com_epu16((a), (b), (imm8)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_com_epu32 (simde__m128i a, simde__m128i b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { switch (imm8) { case SIMDE_MM_PCOMCTRL_LT: return simde_mm_comlt_epu32(a, b); case SIMDE_MM_PCOMCTRL_LE: return simde_mm_comle_epu32(a, b); case SIMDE_MM_PCOMCTRL_GT: return simde_mm_comgt_epu32(a, b); case SIMDE_MM_PCOMCTRL_GE: return simde_mm_comge_epu32(a, b); case SIMDE_MM_PCOMCTRL_EQ: return simde_mm_comeq_epu32(a, b); case SIMDE_MM_PCOMCTRL_NEQ: return simde_mm_comneq_epu32(a, b); case SIMDE_MM_PCOMCTRL_FALSE: return simde_mm_comfalse_epu32(a, b); case SIMDE_MM_PCOMCTRL_TRUE: return simde_mm_comtrue_epu32(a, b); default: HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128()); } } #if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_) #define simde_mm_com_epu32(a, b, imm8) _mm_com_epu32((a), (b), (imm8)) #endif #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_com_epu32(a, b, imm8) simde_mm_com_epu32((a), (b), (imm8)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_com_epu64 (simde__m128i a, simde__m128i b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { switch (imm8) { case SIMDE_MM_PCOMCTRL_LT: return simde_mm_comlt_epu64(a, b); case SIMDE_MM_PCOMCTRL_LE: return simde_mm_comle_epu64(a, b); case SIMDE_MM_PCOMCTRL_GT: return simde_mm_comgt_epu64(a, b); case SIMDE_MM_PCOMCTRL_GE: return simde_mm_comge_epu64(a, b); case SIMDE_MM_PCOMCTRL_EQ: return simde_mm_comeq_epu64(a, b); case SIMDE_MM_PCOMCTRL_NEQ: return simde_mm_comneq_epu64(a, b); case SIMDE_MM_PCOMCTRL_FALSE: return simde_mm_comfalse_epu64(a, b); case SIMDE_MM_PCOMCTRL_TRUE: return simde_mm_comtrue_epu64(a, b); default: HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128()); } } #if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_) #define simde_mm_com_epu64(a, b, imm8) _mm_com_epu64((a), (b), (imm8)) #endif #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_com_epu64(a, b, imm8) simde_mm_com_epu64((a), (b), (imm8)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_frcz_ps (simde__m128 a) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_frcz_ps(a); #else simde__m128_private r_, a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { #if defined(simde_math_modff) simde_float32 integral; r_.f32[i] = simde_math_modff(a_.f32[i], &integral); #else r_.f32[i] = (a_.f32[i] / 1.0f); #endif } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_frcz_ps(a) simde_mm_frcz_ps((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_frcz_pd (simde__m128d a) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_frcz_pd(a); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { #if defined(simde_math_modf) simde_float64 integral; r_.f64[i] = simde_math_modf(a_.f64[i], &integral); #else r_.f64[i] = (a_.f64[i] / 1.0f); #endif } return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_frcz_ps(a) simde_mm_frcz_ps((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_frcz_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_BUG_CLANG_48673) return _mm_frcz_ss(a, b); #else simde__m128_private a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(simde_math_modff) simde_float32 integral; a_.f32[0] = simde_math_modff(b_.f32[0], &integral); #else a_.f32[0] = (b_.f32[0] / 1.0f); #endif return simde__m128_from_private(a_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_frcz_ss(a, b) simde_mm_frcz_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_frcz_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_BUG_CLANG_48673) return _mm_frcz_sd(a, b); #else simde__m128d_private a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(simde_math_modf) simde_float64 integral; a_.f64[0] = simde_math_modf(b_.f64[0], &integral); #else a_.f64[0] = (b_.f64[0] / 1.0f); #endif return simde__m128d_from_private(a_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_frcz_sd(a, b) simde_mm_frcz_sd((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_frcz_ps (simde__m256 a) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm256_frcz_ps(a); #else simde__m256_private r_, a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_frcz_ps(a_.m128[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { #if defined(simde_math_modff) simde_float32 integral; r_.f32[i] = simde_math_modff(a_.f32[i], &integral); #else r_.f32[i] = (a_.f32[i] / 1.0f); #endif } #endif return simde__m256_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm256_frcz_ps(a) simde_mm256_frcz_ps((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_frcz_pd (simde__m256d a) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm256_frcz_pd(a); #else simde__m256d_private r_, a_ = simde__m256d_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_frcz_pd(a_.m128d[i]); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { #if defined(simde_math_modf) simde_float64 integral; r_.f64[i] = simde_math_modf(a_.f64[i], &integral); #else r_.f64[i] = (a_.f64[i] / 1.0f); #endif } #endif return simde__m256d_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm256_frcz_ps(a) simde_mm256_frcz_ps((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_haddw_epi8 (simde__m128i a) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_haddw_epi8(a); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vpaddlq_s8(a_.neon_i8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i8[i * 2]) + HEDLEY_STATIC_CAST(int16_t, a_.i8[(i * 2) + 1]); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_haddw_epi8(a) simde_mm_haddw_epi8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_haddw_epu8 (simde__m128i a) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_haddw_epu8(a); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vpaddlq_u8(a_.neon_u8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u8[i * 2]) + HEDLEY_STATIC_CAST(uint16_t, a_.u8[(i * 2) + 1]); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_haddw_epu8(a) simde_mm_haddw_epu8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_haddd_epi8 (simde__m128i a) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_haddd_epi8(a); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vpaddlq_s16(vpaddlq_s8(a_.neon_i8)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i8[(i * 4) ]) + HEDLEY_STATIC_CAST(int32_t, a_.i8[(i * 4) + 1]) + HEDLEY_STATIC_CAST(int32_t, a_.i8[(i * 4) + 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i8[(i * 4) + 3]); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_haddd_epi8(a) simde_mm_haddd_epi8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_haddd_epi16 (simde__m128i a) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_haddd_epi16(a); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vpaddlq_s16(a_.neon_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) ]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_haddd_epi8(a) simde_mm_haddd_epi8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_haddd_epu8 (simde__m128i a) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_haddd_epu8(a); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vpaddlq_u16(vpaddlq_u8(a_.neon_u8)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u8[(i * 4) ]) + HEDLEY_STATIC_CAST(uint32_t, a_.u8[(i * 4) + 1]) + HEDLEY_STATIC_CAST(uint32_t, a_.u8[(i * 4) + 2]) + HEDLEY_STATIC_CAST(uint32_t, a_.u8[(i * 4) + 3]); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_haddd_epu8(a) simde_mm_haddd_epu8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_haddd_epu16 (simde__m128i a) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_haddd_epu16(a); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vpaddlq_u16(a_.neon_u16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u16[(i * 2) ]) + HEDLEY_STATIC_CAST(uint32_t, a_.u16[(i * 2) + 1]); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_haddd_epu8(a) simde_mm_haddd_epu8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_haddq_epi8 (simde__m128i a) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_haddq_epi8(a); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vpaddlq_s32(vpaddlq_s16(vpaddlq_s8(a_.neon_i8))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) ]) + HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) + 1]) + HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) + 2]) + HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) + 3]) + HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) + 4]) + HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) + 5]) + HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) + 6]) + HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) + 7]); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_haddq_epi8(a) simde_mm_haddq_epi8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_haddq_epi16 (simde__m128i a) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_haddq_epi16(a); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vpaddlq_s32(vpaddlq_s16(a_.neon_i16)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i16[(i * 4) ]) + HEDLEY_STATIC_CAST(int64_t, a_.i16[(i * 4) + 1]) + HEDLEY_STATIC_CAST(int64_t, a_.i16[(i * 4) + 2]) + HEDLEY_STATIC_CAST(int64_t, a_.i16[(i * 4) + 3]); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_haddq_epi16(a) simde_mm_haddq_epi16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_haddq_epi32 (simde__m128i a) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_haddq_epi32(a); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vpaddlq_s32(a_.neon_i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2) ]) + HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2) + 1]); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_haddq_epi32(a) simde_mm_haddq_epi32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_haddq_epu8 (simde__m128i a) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_haddq_epu8(a); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u64 = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(a_.neon_u8))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) ]) + HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) + 1]) + HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) + 2]) + HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) + 3]) + HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) + 4]) + HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) + 5]) + HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) + 6]) + HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) + 7]); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_haddq_epu8(a) simde_mm_haddq_epu8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_haddq_epu16 (simde__m128i a) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_haddq_epu16(a); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u64 = vpaddlq_u32(vpaddlq_u16(a_.neon_u16)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u16[(i * 4) ]) + HEDLEY_STATIC_CAST(uint64_t, a_.u16[(i * 4) + 1]) + HEDLEY_STATIC_CAST(uint64_t, a_.u16[(i * 4) + 2]) + HEDLEY_STATIC_CAST(uint64_t, a_.u16[(i * 4) + 3]); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_haddq_epu16(a) simde_mm_haddq_epu16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_haddq_epu32 (simde__m128i a) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_haddq_epu32(a); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u64 = vpaddlq_u32(a_.neon_u32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[(i * 2) ]) + HEDLEY_STATIC_CAST(uint64_t, a_.u32[(i * 2) + 1]); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_haddq_epu32(a) simde_mm_haddq_epu32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_hsubw_epi8 (simde__m128i a) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_hsubw_epi8(a); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i8[i * 2]) - HEDLEY_STATIC_CAST(int16_t, a_.i8[(i * 2) + 1]); } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_hsubw_epi8(a) simde_mm_hsubw_epi8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_hsubd_epi16 (simde__m128i a) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_hsubd_epi16(a); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) ]) - HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_hsubd_epi8(a) simde_mm_hsubd_epi8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_hsubq_epi32 (simde__m128i a) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_hsubq_epi32(a); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2) ]) - HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2) + 1]); } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_hsubq_epi32(a) simde_mm_hsubq_epi32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_macc_epi16 (simde__m128i a, simde__m128i b, simde__m128i c) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_macc_epi16(a, b, c); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b), c_ = simde__m128i_to_private(c); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vmlaq_s16(c_.neon_i16, a_.neon_i16, b_.neon_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] * b_.i16[i]) + c_.i16[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_macc_epi16(a, b, c) simde_mm_macc_epi16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_macc_epi32 (simde__m128i a, simde__m128i b, simde__m128i c) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_macc_epi32(a, b, c); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b), c_ = simde__m128i_to_private(c); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vmlaq_s32(c_.neon_i32, a_.neon_i32, b_.neon_i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = (a_.i32[i] * b_.i32[i]) + c_.i32[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_macc_epi32(a, b, c) simde_mm_macc_epi32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maccd_epi16 (simde__m128i a, simde__m128i b, simde__m128i c) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_maccd_epi16(a, b, c); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b), c_ = simde__m128i_to_private(c); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) int16x8_t even = vuzp1q_s16(a_.neon_i16, b_.neon_i16); int32x4_t a_even = vmovl_s16(vget_low_s16(even)); int32x4_t b_even = vmovl_high_s16(even); r_.neon_i32 = vmlaq_s32(c_.neon_i32, a_even, b_even); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = (HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2])) + c_.i32[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_maccd_epi16(a, b, c) simde_mm_maccd_epi16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_macclo_epi32 (simde__m128i a, simde__m128i b, simde__m128i c) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_macclo_epi32(a, b, c); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b), c_ = simde__m128i_to_private(c); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) int32x4_t even = vuzp1q_s32(a_.neon_i32, b_.neon_i32); r_.neon_i64 = vaddq_s64(vmull_s32(vget_low_s32(even), vget_high_s32(even)), c_.neon_i64); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = (HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2) + 0]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[(i * 2) + 0])) + c_.i64[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_macclo_epi16(a, b, c) simde_mm_macclo_epi16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_macchi_epi32 (simde__m128i a, simde__m128i b, simde__m128i c) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_macchi_epi32(a, b, c); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b), c_ = simde__m128i_to_private(c); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) int32x4_t even = vuzp2q_s32(a_.neon_i32, b_.neon_i32); r_.neon_i64 = vaddq_s64(vmull_s32(vget_low_s32(even), vget_high_s32(even)), c_.neon_i64); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = (HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2) + 1]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[(i * 2) + 1])) + c_.i64[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_macchi_epi16(a, b, c) simde_mm_macchi_epi16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maccs_epi16 (simde__m128i a, simde__m128i b, simde__m128i c) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_maccs_epi16(a, b, c); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b), c_ = simde__m128i_to_private(c); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) int32x4_t c_lo = vmovl_s16(vget_low_s16(c_.i16)); int32x4_t c_hi = vmovl_high_s16(c_.i16); int32x4_t lo = vmlal_s16(c_lo, vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); int32x4_t hi = vmlal_high_s16(c_hi, a_.neon_i16, b_.neon_i16); r_.neon_i16 = vcombine_s16(vqmovn_s32(lo), vqmovn_s32(hi)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { int32_t tmp = HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i]); tmp += c_.i16[i]; if (tmp > INT16_MAX) r_.i16[i] = INT16_MAX; else if (tmp < INT16_MIN) r_.i16[i] = INT16_MIN; else r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, tmp); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_maccs_epi16(a, b, c) simde_mm_maccs_epi16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maccs_epi32 (simde__m128i a, simde__m128i b, simde__m128i c) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_maccs_epi32(a, b, c); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b), c_ = simde__m128i_to_private(c); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) int64x2_t c_lo = vmovl_s32(vget_low_s32(c_.i32)); int64x2_t c_hi = vmovl_high_s32(c_.i32); int64x2_t lo = vmlal_s32(c_lo, vget_low_s32(a_.neon_i32), vget_low_s32(b_.neon_i32)); int64x2_t hi = vmlal_high_s32(c_hi, a_.neon_i32, b_.neon_i32); r_.neon_i32 = vcombine_s32(vqmovn_s64(lo), vqmovn_s64(hi)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { int64_t tmp = HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]); tmp += HEDLEY_STATIC_CAST(int64_t, c_.i32[i]); if (tmp > INT32_MAX) r_.i32[i] = INT32_MAX; else if (tmp < INT32_MIN) r_.i32[i] = INT32_MIN; else r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, tmp); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_maccs_epi32(a, b, c) simde_mm_maccs_epi32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maccsd_epi16 (simde__m128i a, simde__m128i b, simde__m128i c) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_maccsd_epi16(a, b, c); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b), c_ = simde__m128i_to_private(c); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) int16x8_t even = vuzp1q_s16(a_.neon_i16, b_.neon_i16); r_.neon_i32 = vqaddq_s32(vmull_s16(vget_low_s16(even), vget_high_s16(even)), c_.neon_i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { int32_t prod = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]); r_.i32[i] = simde_math_adds_i32(prod, c_.i32[i]); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_maccsd_epi16(a, b, c) simde_mm_maccsd_epi16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maccslo_epi32 (simde__m128i a, simde__m128i b, simde__m128i c) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_maccslo_epi32(a, b, c); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b), c_ = simde__m128i_to_private(c); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { int64_t tmp = HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2) + 0]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[(i * 2) + 0]); r_.i64[i] = simde_math_adds_i64(tmp, c_.i64[i]); } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_maccslo_epi16(a, b, c) simde_mm_maccslo_epi16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maccshi_epi32 (simde__m128i a, simde__m128i b, simde__m128i c) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_maccshi_epi32(a, b, c); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b), c_ = simde__m128i_to_private(c); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { int64_t tmp = HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2) + 1]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[(i * 2) + 1]); r_.i64[i] = simde_math_adds_i64(tmp, c_.i64[i]); } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_maccshi_epi16(a, b, c) simde_mm_maccshi_epi16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maddd_epi16 (simde__m128i a, simde__m128i b, simde__m128i c) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_maddd_epi16(a, b, c); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b), c_ = simde__m128i_to_private(c); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = (a_.i16[(i * 2) + 0] * b_.i16[(i * 2) + 0]) + (a_.i16[(i * 2) + 1] * b_.i16[(i * 2) + 1]); r_.i32[i] += c_.i32[i]; } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_maddd_epi16(a, b, c) simde_mm_maddd_epi16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_maddsd_epi16 (simde__m128i a, simde__m128i b, simde__m128i c) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_maddsd_epi16(a, b, c); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b), c_ = simde__m128i_to_private(c); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { /* The AMD64 Architecture Programmer's Manual says that "the" * addition is saturated; I'm not sure whether that means * the pairwise addition or the accumulate, or both. */ r_.i32[i] = (a_.i16[(i * 2) + 0] * b_.i16[(i * 2) + 0]) + (a_.i16[(i * 2) + 1] * b_.i16[(i * 2) + 1]); r_.i32[i] = simde_math_adds_i32(r_.i32[i], c_.i32[i]); } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_maddsd_epi16(a, b, c) simde_mm_maddsd_epi16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_sha_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_sha_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vshlq_s8(a_.neon_i8, b_.neon_i8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { if (b_.i8[i] < 0) { r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i8[i] >> -b_.i8[i]); } else { r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i8[i] << b_.i8[i]); } } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_sha_epi8(a, b) simde_mm_sha_epi8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_sha_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_sha_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vshlq_s16(a_.neon_i16, b_.neon_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { if (b_.i16[i] < 0) { r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] >> -b_.i16[i]); } else { r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << b_.i16[i]); } } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_sha_epi16(a, b) simde_mm_sha_epi16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_sha_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_sha_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vshlq_s32(a_.neon_i32, b_.neon_i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { if (b_.i32[i] < 0) { r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] >> -b_.i32[i]); } else { r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] << b_.i32[i]); } } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_sha_epi32(a, b) simde_mm_sha_epi32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_sha_epi64 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_sha_epi64(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vshlq_s64(a_.neon_i64, b_.neon_i64); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { if (b_.i64[i] < 0) { r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] >> -b_.i64[i]); } else { r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] << b_.i64[i]); } } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_sha_epi64(a, b) simde_mm_sha_epi64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_shl_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_shl_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vshlq_u8(a_.neon_u8, b_.neon_i8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { if (HEDLEY_UNLIKELY(b_.i8[i] < -7 || b_.i8[i] > 7)) { r_.u8[i] = 0; } else { if (b_.i8[i] < 0) { r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.u8[i] >> -b_.i8[i]); } else { r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.u8[i] << b_.i8[i]); } } } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_shl_epi8(a, b) simde_mm_shl_epi8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_shl_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_shl_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vshlq_u16(a_.neon_u16, b_.neon_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { if (HEDLEY_UNLIKELY(b_.i16[i] < -15 || b_.i16[i] > 15)) { r_.u16[i] = 0; } else { if (b_.i16[i] < 0) { r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] >> -b_.i16[i]); } else { r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << b_.i16[i]); } } } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_shl_epi16(a, b) simde_mm_shl_epi16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_shl_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_shl_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vshlq_u32(a_.neon_u32, b_.neon_i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { if (HEDLEY_UNLIKELY(b_.i32[i] < -31 || b_.i32[i] > 31)) { r_.u32[i] = 0; } else { if (b_.i32[i] < 0) { r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u32[i] >> -b_.i32[i]); } else { r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u32[i] << b_.i32[i]); } } } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_shl_epi32(a, b) simde_mm_shl_epi32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_shl_epi64 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_shl_epi64(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u64 = vshlq_u64(a_.neon_u64, b_.neon_i64); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { if (HEDLEY_UNLIKELY(b_.i64[i] < -63 || b_.i64[i] > 63)) { r_.u64[i] = 0; } else { if (b_.i64[i] < 0) { r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u64[i] >> -b_.i64[i]); } else { r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u64[i] << b_.i64[i]); } } } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_shl_epi64(a, b) simde_mm_shl_epi64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_rot_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_rot_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = (b_.i8[i] < 0) ? HEDLEY_STATIC_CAST(uint8_t, ((a_.u8[i] >> -b_.i8[i]) | (a_.u8[i] << ( b_.i8[i] & 7)))) : HEDLEY_STATIC_CAST(uint8_t, ((a_.u8[i] << b_.i8[i]) | (a_.u8[i] >> (-b_.i8[i] & 7)))); } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_rot_epi8(a, b) simde_mm_rot_epi8((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_rot_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_rot_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = (b_.i16[i] < 0) ? HEDLEY_STATIC_CAST(uint16_t, ((a_.u16[i] >> -b_.i16[i]) | (a_.u16[i] << ( b_.i16[i] & 15)))) : HEDLEY_STATIC_CAST(uint16_t, ((a_.u16[i] << b_.i16[i]) | (a_.u16[i] >> (-b_.i16[i] & 15)))); } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_rot_epi16(a, b) simde_mm_rot_epi16((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_rot_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_rot_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = (b_.i32[i] < 0) ? HEDLEY_STATIC_CAST(uint32_t, ((a_.u32[i] >> -b_.i32[i]) | (a_.u32[i] << ( b_.i32[i] & 31)))) : HEDLEY_STATIC_CAST(uint32_t, ((a_.u32[i] << b_.i32[i]) | (a_.u32[i] >> (-b_.i32[i] & 31)))); } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_rot_epi32(a, b) simde_mm_rot_epi32((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_rot_epi64 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_rot_epi64(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = (b_.i64[i] < 0) ? HEDLEY_STATIC_CAST(uint64_t, ((a_.u64[i] >> -b_.i64[i]) | (a_.u64[i] << ( b_.i64[i] & 63)))) : HEDLEY_STATIC_CAST(uint64_t, ((a_.u64[i] << b_.i64[i]) | (a_.u64[i] >> (-b_.i64[i] & 63)))); } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_rot_epi64(a, b) simde_mm_rot_epi64((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_roti_epi8 (simde__m128i a, const int count) { simde__m128i_private r_, a_ = simde__m128i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = (count < 0) ? HEDLEY_STATIC_CAST(uint8_t, ((a_.u8[i] >> -count) | (a_.u8[i] << ( count & 7)))) : HEDLEY_STATIC_CAST(uint8_t, ((a_.u8[i] << count) | (a_.u8[i] >> (-count & 7)))); } return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_XOP_NATIVE) #define simde_mm_roti_epi8(a, count) _mm_roti_epi8((a), (count)) #endif #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_roti_epi8(a, b) simde_mm_roti_epi8((a), (count)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_roti_epi16 (simde__m128i a, const int count) { simde__m128i_private r_, a_ = simde__m128i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = (count < 0) ? HEDLEY_STATIC_CAST(uint16_t, ((a_.u16[i] >> -count) | (a_.u16[i] << ( count & 15)))) : HEDLEY_STATIC_CAST(uint16_t, ((a_.u16[i] << count) | (a_.u16[i] >> (-count & 15)))); } return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_XOP_NATIVE) #define simde_mm_roti_epi16(a, count) _mm_roti_epi16((a), (count)) #endif #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_roti_epi16(a, count) simde_mm_roti_epi16((a), (count)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_roti_epi32 (simde__m128i a, const int count) { simde__m128i_private r_, a_ = simde__m128i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = (count < 0) ? HEDLEY_STATIC_CAST(uint32_t, ((a_.u32[i] >> -count) | (a_.u32[i] << ( count & 31)))) : HEDLEY_STATIC_CAST(uint32_t, ((a_.u32[i] << count) | (a_.u32[i] >> (-count & 31)))); } return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_XOP_NATIVE) #define simde_mm_roti_epi32(a, count) _mm_roti_epi32((a), (count)) #endif #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_roti_epi32(a, count) simde_mm_roti_epi32((a), (count)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_roti_epi64 (simde__m128i a, const int count) { simde__m128i_private r_, a_ = simde__m128i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = (count < 0) ? HEDLEY_STATIC_CAST(uint64_t, ((a_.u64[i] >> -count) | (a_.u64[i] << ( count & 63)))) : HEDLEY_STATIC_CAST(uint64_t, ((a_.u64[i] << count) | (a_.u64[i] >> (-count & 63)))); } return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_XOP_NATIVE) #define simde_mm_roti_epi64(a, count) _mm_roti_epi64((a), (count)) #endif #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_roti_epi64(a, count) simde_mm_roti_epi64((a), (count)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_perm_epi8 (simde__m128i a, simde__m128i b, simde__m128i c) { #if defined(SIMDE_X86_XOP_NATIVE) return _mm_perm_epi8(a, b, c); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b), c_ = simde__m128i_to_private(c); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { int8_t src = (c_.u8[i] & 0x10) ? b_.i8[c_.u8[i] & 0xf] : a_.i8[c_.u8[i] & 0xf]; switch (c_.u8[i] & 0xc0) { case 0x40: #if HEDLEY_HAS_BUILTIN(__builtin_bitreverse8) && !defined(HEDLEY_IBM_VERSION) src = HEDLEY_STATIC_CAST(int8_t, __builtin_bitreverse8(HEDLEY_STATIC_CAST(uint8_t, src))); #else src = HEDLEY_STATIC_CAST(int8_t, ((HEDLEY_STATIC_CAST(uint8_t, src) * UINT64_C(0x80200802)) & UINT64_C(0x0884422110)) * UINT64_C(0x0101010101) >> 32); #endif break; case 0x80: src = 0; break; case 0xc0: src >>= 7; break; } r_.i8[i] = (c_.u8[i] & 0x20) ? ~src : src; } return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_perm_epi8(a, b, c) simde_mm_perm_epi8((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_permute2_ps (simde__m128 a, simde__m128 b, simde__m128i c, const int imm8) { simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); simde__m128i_private c_ = simde__m128i_to_private(c); const int m2z = imm8 & 0x03; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { const int sel = c_.i32[i] & 0x07; const int m = c_.i32[i] & 0x08; switch (m | m2z) { case 0xa: case 0x3: r_.i32[i] = 0; break; default: r_.i32[i] = (sel > 3) ? b_.i32[sel - 4] : a_.i32[sel]; break; } } return simde__m128_from_private(r_); } #if defined(SIMDE_X86_XOP_NATIVE) #define simde_mm_permute2_ps(a, b, c, imm8) _mm_permute2_ps((a), (b), (c), (imm8)) #endif #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_permute2_ps(a, b, c, imm8) simde_mm_permute2_ps((a), (b), (c), (imm8)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_permute2_pd (simde__m128d a, simde__m128d b, simde__m128i c, const int imm8) { simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); simde__m128i_private c_ = simde__m128i_to_private(c); const int m2z = imm8 & 0x03; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { const int sel = (c_.i64[i] & 0x06) >> 1; const int m = c_.i64[i] & 0x08; switch (m | m2z) { case 0x0a: case 0x03: r_.i64[i] = 0; break; default: r_.i64[i] = (sel > 1) ? b_.i64[sel - 2] : a_.i64[sel]; break; } } return simde__m128d_from_private(r_); } #if defined(SIMDE_X86_XOP_NATIVE) #define simde_mm_permute2_pd(a, b, c, imm8) _mm_permute2_pd((a), (b), (c), (imm8)) #endif #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm_permute2_pd(a, b, c, imm8) simde_mm_permute2_pd((a), (b), (c), (imm8)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_permute2_ps (simde__m256 a, simde__m256 b, simde__m256i c, const int imm8) { simde__m256_private r_, a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b); simde__m256i_private c_ = simde__m256i_to_private(c); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { r_.m128[i] = simde_mm_permute2_ps(a_.m128[i], b_.m128[i], c_.m128i[i], imm8); } #else const int m2z = imm8 & 0x03; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { const int sel = c_.i32[i] & 0x07; const int m = c_.i32[i] & 0x08; switch (m | m2z) { case 0xa: case 0x3: r_.i32[i] = 0; break; default: r_.i32[i] = (sel > 3) ? b_.i32[sel + (HEDLEY_STATIC_CAST(int, i) & 4) - 4] : a_.i32[sel + (HEDLEY_STATIC_CAST(int, i) & 4)]; break; } } #endif return simde__m256_from_private(r_); } #if defined(SIMDE_X86_XOP_NATIVE) #define simde_mm256_permute2_ps(a, b, c, imm8) _mm256_permute2_ps((a), (b), (c), (imm8)) #endif #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm256_permute2_ps(a, b, c, imm8) simde_mm256_permute2_ps((a), (b), (c), (imm8)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m256d simde_mm256_permute2_pd (simde__m256d a, simde__m256d b, simde__m256i c, const int imm8) { simde__m256d_private r_, a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b); simde__m256i_private c_ = simde__m256i_to_private(c); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { r_.m128d[i] = simde_mm_permute2_pd(a_.m128d[i], b_.m128d[i], c_.m128i[i], imm8); } #else const int m2z = imm8 & 0x03; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { const int sel = (c_.i64[i] & 0x06) >> 1; const int m = c_.i64[i] & 0x08; switch (m | m2z) { case 0x0a: case 0x03: r_.i64[i] = 0; break; default: r_.i64[i] = (sel > 1) ? b_.i64[sel + (HEDLEY_STATIC_CAST(int, i) & 2) - 2] : a_.i64[sel + (HEDLEY_STATIC_CAST(int, i) & 2)]; break; } } #endif return simde__m256d_from_private(r_); } #if defined(SIMDE_X86_XOP_NATIVE) #define simde_mm256_permute2_pd(a, b, c, imm8) _mm256_permute2_pd((a), (b), (c), (imm8)) #endif #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) #define _mm256_permute2_pd(a, b, c, imm8) simde_mm256_permute2_pd((a), (b), (c), (imm8)) #endif HEDLEY_DIAGNOSTIC_POP SIMDE_END_DECLS_ #endif /* !defined(SIMDE_X86_XOP_H) */ simde-0.7.2/test/000077500000000000000000000000001400333146700136015ustar00rootroot00000000000000simde-0.7.2/test/.gitignore000066400000000000000000000000641400333146700155710ustar00rootroot00000000000000/munit.o /test-*.o /test-native /test-emul /iig.xml simde-0.7.2/test/CMakeLists.txt000066400000000000000000000125021400333146700163410ustar00rootroot00000000000000cmake_minimum_required(VERSION 3.0) project(simde-tests) set(CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake) include (ExtraWarningFlags) enable_testing() option(BUILD_CPP_TESTS "Build C++ tests" ON) if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/munit/munit.c") find_program(GIT git) if(GIT) execute_process(COMMAND ${GIT} submodule update --init --recursive) else() message (FATAL_ERROR "It looks like you don't have submodules checked out. Please run `git submodule update --init --recursive'") endif() endif() if(CMAKE_BUILD_TYPE STREQUAL "") set(CMAKE_BUILD_TYPE "Debug") elseif(CMAKE_BUILD_TYPE STREQUAL "Coverage") set(orig_req_libs "${CMAKE_REQUIRED_LIBRARIES}") set(CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES};--coverage") check_c_compiler_flag("--coverage" CFLAG___coverage) set(CMAKE_REQUIRED_LIBRARIES "${orig_req_libs}") if(CFLAG___coverage) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --coverage") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_C_FLAGS} --coverage") add_definitions("-DSIMDE_NO_INLINE") else() set(CMAKE_BUILD_TYPE "Debug") endif() endif() add_library(munit STATIC munit/munit.c) include(CheckSymbolExists) check_symbol_exists(clock_gettime "time.h" CLOCK_GETTIME_RES) if(CLOCK_GETTIME_RES) set(CLOCK_GETTIME_EXISTS yes) else() set(orig_req_libs "${CMAKE_REQUIRED_LIBRARIES}") set(CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES};rt") check_symbol_exists(clock_gettime "time.h" CLOCK_GETTIME_LIBRT_RES) if(CLOCK_GETTIME_LIBRT_RES) set(CLOCK_GETTIME_EXISTS yes) set(CLOCK_GETTIME_LIBRARY "rt") endif() set(CMAKE_REQUIRED_LIBRARIES "${orig_req_libs}") unset(orig_req_libs) endif() check_symbol_exists(fegetround "fenv.h" FEGETROUND_EXISTS) if(NOT FEGETROUND_EXISTS) unset(FEGETROUND_EXISTS CACHE) list(APPEND CMAKE_REQUIRED_LIBRARIES m) check_symbol_exists(fegetround "fenv.h" FEGETROUND_EXISTS) if(FEGETROUND_EXISTS) set(NEED_LIBM True) else() message(FATAL_ERROR "Unable to find fegetround") endif() endif(NOT FEGETROUND_EXISTS) set_property(TARGET munit PROPERTY C_STANDARD "99") if("${CLOCK_GETTIME_EXISTS}") target_compile_definitions(munit PRIVATE "MUNIT_ALLOW_CLOCK_GETTIME") target_link_libraries(munit "${CLOCK_GETTIME_LIBRARY}") endif() if("${OPENMP_SIMD_FLAGS}" STREQUAL "") foreach(omp_simd_flag "-fopenmp-simd" "-qopenmp-simd") string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" omp_simd_flag_name "CFLAG_${omp_simd_flag}") check_c_compiler_flag("${omp_simd_flag}" "${omp_simd_flag_name}") if(${omp_simd_flag_name}) set(OPENMP_SIMD_FLAGS "-DSIMDE_ENABLE_OPENMP ${omp_simd_flag}") break() endif() endforeach() endif() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENMP_SIMD_FLAGS}") if("${CMAKE_C_COMPILER_ID}" STREQUAL "Intel") add_definitions(-DSIMDE_FAST_MATH) endif() aux_source_directory("arm/neon" ARM_NEON_SOURCES_C) list(REMOVE_ITEM ARM_NEON_SOURCES_C "arm/neon/run-tests.c") list(REMOVE_ITEM ARM_NEON_SOURCES_C "arm/neon/skel.c") list(REMOVE_ITEM ARM_NEON_SOURCES_C "arm/neon/skel-single.c") list(REMOVE_ITEM ARM_NEON_SOURCES_C "arm/neon/skel-triple.c") aux_source_directory("x86/avx512" X86_AVX512_SOURCES_C) list(REMOVE_ITEM X86_AVX512_SOURCES_C "x86/avx512/run-tests.c") list(REMOVE_ITEM X86_AVX512_SOURCES_C "x86/avx512/skel.c") aux_source_directory("x86" X86_SOURCES_C) list(REMOVE_ITEM X86_SOURCES_C "x86/skel.c") list(REMOVE_ITEM X86_SOURCES_C "x86/run-tests.c") set(TEST_SOURCES_C ${X86_SOURCES_C} ${X86_AVX512_SOURCES_C} ${ARM_NEON_SOURCES_C}) set(TEST_RUNNER_SOURCES run-tests.c x86/run-tests.c x86/avx512/run-tests.c arm/run-tests.c arm/neon/run-tests.c) add_executable(run-tests ${TEST_RUNNER_SOURCES}) set_property(TARGET run-tests PROPERTY C_STANDARD "99") target_add_compiler_flags(run-tests "-Wno-psabi") target_link_libraries(run-tests munit) target_add_compiler_flags (munit "-w") if(NEED_LIBM) target_link_libraries(run-tests m) endif(NEED_LIBM) set(TEST_SOURCES_CPP) if(BUILD_CPP_TESTS) foreach(csource ${TEST_SOURCES_C}) configure_file("${csource}" "${CMAKE_CURRENT_BINARY_DIR}/${csource}pp") list(APPEND TEST_SOURCES_CPP "${CMAKE_CURRENT_BINARY_DIR}/${csource}pp") get_filename_component(DIR "${csource}" DIRECTORY) set_property(SOURCE "${CMAKE_CURRENT_BINARY_DIR}/${csource}pp" APPEND PROPERTY COMPILE_FLAGS " -I${CMAKE_CURRENT_SOURCE_DIR}/${DIR}") endforeach() add_definitions(-DSIMDE_BUILD_CPP_TESTS) endif(BUILD_CPP_TESTS) foreach(native native emul) add_library(simde-test-${native} STATIC ${TEST_SOURCES_C} ${TEST_SOURCES_CPP}) target_include_directories(simde-test-${native} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/..") set_property(TARGET simde-test-${native} PROPERTY C_STANDARD "99") target_link_libraries(run-tests simde-test-${native}) target_add_compiler_flags(simde-test-${native} "-Wno-psabi") endforeach(native native emul) target_compile_definitions(simde-test-emul PRIVATE SIMDE_NO_NATIVE) foreach(src ${TEST_SOURCES_C}) string(REGEX REPLACE "^(.+)\\.c$" "/\\1" TEST_NAME "${src}") add_test(NAME "${TEST_NAME}" COMMAND $ "${TEST_NAME}") endforeach(src ${TEST_SOURCES_C}) message(WARNING "CMake support is deprecated; please use Meson instead. CMake is only present " "for compilers which Meson doesn't yet support (e.g., xlc) and platforms where " "difficult to run an up-to-date copy of Meson (e.g., Ubuntu 12.04).") simde-0.7.2/test/arm/000077500000000000000000000000001400333146700143605ustar00rootroot00000000000000simde-0.7.2/test/arm/declare-suites.h000066400000000000000000000000371400333146700174420ustar00rootroot00000000000000SIMDE_TEST_DECLARE_SUITE(neon) simde-0.7.2/test/arm/meson.build000066400000000000000000000000171400333146700165200ustar00rootroot00000000000000subdir('neon') simde-0.7.2/test/arm/neon/000077500000000000000000000000001400333146700153175ustar00rootroot00000000000000simde-0.7.2/test/arm/neon/aba.c000066400000000000000000001750401400333146700162150ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN aba #include "test-neon.h" #include "../../../simde/arm/neon/aba.h" /* https://community.intel.com/t5/Intel-C-Compiler/ICC-generates-incorrect-code/td-p/1199261 */ #if defined(HEDLEY_INTEL_VERSION) # define TEST_SIMDE_VABD_NO_TEST_32 #endif static int test_simde_vaba_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t b[8]; int8_t c[8]; int8_t r[8]; } test_vec[] = { { { -INT8_C( 63), -INT8_C( 30), INT8_C( 29), -INT8_C( 57), -INT8_C( 97), -INT8_C( 25), -INT8_C( 66), -INT8_C( 96) }, { -INT8_C( 103), INT8_C( 34), INT8_C( 89), INT8_C( 7), -INT8_C( 11), INT8_C( 119), -INT8_C( 68), INT8_C( 2) }, { INT8_C( 18), INT8_C( 15), INT8_C( 41), -INT8_C( 14), -INT8_C( 24), INT8_MIN, -INT8_C( 66), -INT8_C( 116) }, { INT8_C( 58), -INT8_C( 11), INT8_C( 77), -INT8_C( 36), -INT8_C( 84), -INT8_C( 34), -INT8_C( 64), INT8_C( 22) } }, { { INT8_C( 10), -INT8_C( 95), -INT8_C( 46), INT8_C( 88), INT8_C( 46), INT8_C( 34), INT8_C( 80), -INT8_C( 17) }, { INT8_C( 4), INT8_C( 109), -INT8_C( 73), -INT8_C( 93), INT8_C( 85), INT8_C( 117), INT8_C( 67), -INT8_C( 18) }, { -INT8_C( 104), -INT8_C( 99), -INT8_C( 10), -INT8_C( 115), INT8_C( 20), -INT8_C( 78), -INT8_C( 113), INT8_C( 39) }, { INT8_C( 118), INT8_C( 113), INT8_C( 17), INT8_C( 110), INT8_C( 111), -INT8_C( 27), INT8_C( 4), INT8_C( 40) } }, { { -INT8_C( 62), -INT8_C( 71), INT8_C( 25), -INT8_C( 86), INT8_C( 57), -INT8_C( 40), INT8_C( 55), INT8_C( 67) }, { INT8_C( 121), INT8_C( 9), -INT8_C( 101), -INT8_C( 89), INT8_C( 43), -INT8_C( 20), -INT8_C( 105), INT8_C( 47) }, { INT8_C( 89), INT8_C( 78), -INT8_C( 45), -INT8_C( 82), -INT8_C( 61), INT8_C( 22), -INT8_C( 99), INT8_C( 91) }, { -INT8_C( 30), -INT8_C( 2), INT8_C( 81), -INT8_C( 79), -INT8_C( 95), INT8_C( 2), INT8_C( 61), INT8_C( 111) } }, { { -INT8_C( 77), -INT8_C( 109), -INT8_C( 23), -INT8_C( 56), INT8_C( 69), INT8_C( 120), -INT8_C( 17), INT8_C( 7) }, { INT8_C( 49), INT8_C( 8), -INT8_C( 78), INT8_C( 106), -INT8_C( 32), -INT8_C( 23), -INT8_C( 83), INT8_C( 89) }, { -INT8_C( 14), INT8_C( 73), INT8_C( 1), INT8_C( 29), INT8_C( 53), -INT8_C( 104), INT8_C( 76), -INT8_C( 114) }, { -INT8_C( 14), -INT8_C( 44), INT8_C( 56), INT8_C( 21), -INT8_C( 102), -INT8_C( 55), -INT8_C( 114), -INT8_C( 46) } }, { { -INT8_C( 26), INT8_C( 31), INT8_C( 61), -INT8_C( 87), INT8_C( 54), -INT8_C( 38), INT8_C( 5), -INT8_C( 23) }, { INT8_C( 109), -INT8_C( 18), -INT8_C( 79), -INT8_C( 78), INT8_C( 102), -INT8_C( 96), -INT8_C( 70), -INT8_C( 104) }, { -INT8_C( 87), INT8_C( 108), INT8_C( 2), -INT8_C( 119), INT8_C( 85), -INT8_C( 80), -INT8_C( 29), INT8_C( 71) }, { -INT8_C( 86), -INT8_C( 99), -INT8_C( 114), -INT8_C( 46), INT8_C( 71), -INT8_C( 22), INT8_C( 46), -INT8_C( 104) } }, { { -INT8_C( 7), -INT8_C( 28), INT8_C( 100), INT8_C( 46), INT8_C( 124), -INT8_C( 80), -INT8_C( 68), INT8_C( 98) }, { -INT8_C( 48), -INT8_C( 7), INT8_C( 11), INT8_C( 6), -INT8_C( 45), INT8_C( 16), -INT8_C( 17), INT8_C( 64) }, { -INT8_C( 2), -INT8_C( 95), -INT8_C( 13), INT8_C( 101), INT8_C( 65), -INT8_C( 83), -INT8_C( 3), -INT8_C( 22) }, { INT8_C( 39), INT8_C( 60), INT8_C( 124), -INT8_C( 115), -INT8_C( 22), INT8_C( 19), -INT8_C( 54), -INT8_C( 72) } }, { { INT8_C( 25), -INT8_C( 1), INT8_C( 116), INT8_C( 110), -INT8_C( 81), INT8_C( 87), -INT8_C( 75), -INT8_C( 88) }, { INT8_C( 59), INT8_C( 25), -INT8_C( 42), -INT8_C( 73), -INT8_C( 55), -INT8_C( 109), INT8_C( 25), -INT8_C( 103) }, { -INT8_C( 116), INT8_C( 36), -INT8_C( 97), INT8_C( 96), INT8_C( 53), -INT8_C( 113), -INT8_C( 96), INT8_C( 51) }, { -INT8_C( 56), INT8_C( 10), -INT8_C( 85), INT8_C( 23), INT8_C( 27), INT8_C( 91), INT8_C( 46), INT8_C( 66) } }, { { INT8_C( 48), -INT8_C( 109), -INT8_C( 104), INT8_C( 113), INT8_C( 64), -INT8_C( 107), INT8_C( 92), INT8_C( 89) }, { -INT8_C( 107), -INT8_C( 48), -INT8_C( 57), INT8_C( 68), INT8_C( 39), INT8_C( 124), -INT8_C( 19), INT8_C( 98) }, { -INT8_C( 107), -INT8_C( 61), INT8_C( 25), INT8_C( 95), INT8_C( 86), INT8_C( 50), -INT8_C( 8), -INT8_C( 29) }, { INT8_C( 48), -INT8_C( 96), -INT8_C( 22), -INT8_C( 116), INT8_C( 111), -INT8_C( 33), INT8_C( 103), -INT8_C( 40) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t c = simde_vld1_s8(test_vec[i].c); simde_int8x8_t r = simde_vaba_s8(a, b, c); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); simde_int8x8_t c = simde_test_arm_neon_random_i8x8(); simde_int8x8_t r = simde_vaba_s8(a, b, c); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaba_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t b[4]; int16_t c[4]; int16_t r[4]; } test_vec[] = { { { -INT16_C( 17843), INT16_C( 24491), -INT16_C( 27890), -INT16_C( 20171) }, { -INT16_C( 23451), -INT16_C( 7916), INT16_C( 14616), INT16_C( 19459) }, { INT16_C( 27799), -INT16_C( 19103), -INT16_C( 26200), INT16_C( 3764) }, { -INT16_C( 32129), -INT16_C( 29858), INT16_C( 12926), -INT16_C( 4476) } }, { { -INT16_C( 27609), -INT16_C( 31965), -INT16_C( 14129), INT16_C( 7408) }, { -INT16_C( 25470), -INT16_C( 28549), -INT16_C( 20433), -INT16_C( 27326) }, { INT16_C( 22101), INT16_C( 28022), INT16_C( 31120), INT16_C( 10169) }, { INT16_C( 19962), INT16_C( 24606), -INT16_C( 28112), -INT16_C( 20633) } }, { { INT16_C( 6886), -INT16_C( 28964), -INT16_C( 28237), -INT16_C( 9571) }, { -INT16_C( 16347), -INT16_C( 2978), INT16_C( 20104), INT16_C( 2576) }, { -INT16_C( 29718), INT16_C( 6811), -INT16_C( 8901), -INT16_C( 28497) }, { INT16_C( 20257), -INT16_C( 19175), INT16_C( 768), INT16_C( 21502) } }, { { INT16_C( 9523), -INT16_C( 15363), -INT16_C( 18785), -INT16_C( 31253) }, { -INT16_C( 14383), -INT16_C( 31725), -INT16_C( 20392), INT16_C( 32095) }, { -INT16_C( 17040), -INT16_C( 1935), -INT16_C( 32501), -INT16_C( 2557) }, { INT16_C( 12180), INT16_C( 14427), -INT16_C( 6676), INT16_C( 3399) } }, { { -INT16_C( 25076), INT16_C( 18448), -INT16_C( 16517), -INT16_C( 20776) }, { -INT16_C( 10524), -INT16_C( 31886), INT16_C( 23948), INT16_C( 23816) }, { INT16_C( 7204), INT16_C( 32226), INT16_C( 16844), INT16_C( 15866) }, { -INT16_C( 7348), INT16_C( 17024), -INT16_C( 9413), -INT16_C( 12826) } }, { { INT16_C( 27902), INT16_C( 2357), INT16_C( 14573), -INT16_C( 1281) }, { INT16_C( 4054), INT16_C( 20802), INT16_C( 6862), -INT16_C( 19712) }, { INT16_C( 29424), INT16_C( 32054), INT16_C( 16335), -INT16_C( 3110) }, { -INT16_C( 12264), INT16_C( 13609), INT16_C( 24046), INT16_C( 15321) } }, { { -INT16_C( 17317), INT16_C( 10096), INT16_C( 27645), -INT16_C( 1180) }, { -INT16_C( 25897), -INT16_C( 15355), INT16_C( 1234), -INT16_C( 22082) }, { INT16_C( 20), -INT16_C( 7430), -INT16_C( 1509), INT16_C( 2965) }, { INT16_C( 8600), INT16_C( 18021), INT16_C( 30388), INT16_C( 23867) } }, { { -INT16_C( 13204), INT16_C( 15240), INT16_C( 25355), INT16_C( 26159) }, { -INT16_C( 24801), INT16_C( 7565), -INT16_C( 3574), -INT16_C( 7912) }, { INT16_C( 7564), INT16_C( 24230), INT16_C( 25634), INT16_C( 13831) }, { INT16_C( 19161), INT16_C( 31905), -INT16_C( 10973), -INT16_C( 17634) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t c = simde_vld1_s16(test_vec[i].c); simde_int16x4_t r = simde_vaba_s16(a, b, c); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); simde_int16x4_t c = simde_test_arm_neon_random_i16x4(); simde_int16x4_t r = simde_vaba_s16(a, b, c); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaba_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t b[2]; int32_t c[2]; int32_t r[2]; } test_vec[] = { { { -INT32_C( 850247872), INT32_C( 228627924) }, { -INT32_C( 1002407850), -INT32_C( 1479470080) }, { INT32_C( 156520649), INT32_C( 1948288923) }, { INT32_C( 308680627), -INT32_C( 638580369) } }, { { INT32_C( 1567609108), -INT32_C( 1636072610) }, { -INT32_C( 1771254078), -INT32_C( 1163654045) }, { -INT32_C( 2072058748), -INT32_C( 1037348615) }, { INT32_C( 1868413778), -INT32_C( 1509767180) } }, { { INT32_C( 1003192480), INT32_C( 464579591) }, { INT32_C( 477700029), INT32_C( 1706751138) }, { INT32_C( 653993666), -INT32_C( 1210015950) }, { INT32_C( 1179486117), -INT32_C( 913620617) } }, { { INT32_C( 2101042820), INT32_C( 1312778158) }, { -INT32_C( 292942873), -INT32_C( 1274397961) }, { -INT32_C( 70220967), INT32_C( 979405687) }, { -INT32_C( 1971202570), -INT32_C( 728385490) } }, { { -INT32_C( 463446863), INT32_C( 2140881147) }, { INT32_C( 1308415646), INT32_C( 614153277) }, { INT32_C( 1041442119), -INT32_C( 1192092321) }, { -INT32_C( 196473336), -INT32_C( 347840551) } }, { { INT32_C( 397722528), -INT32_C( 11463602) }, { INT32_C( 1826861424), -INT32_C( 1863614735) }, { -INT32_C( 1830950827), INT32_C( 1807185956) }, { -INT32_C( 239432517), -INT32_C( 635630207) } }, { { -INT32_C( 39204194), -INT32_C( 2018075673) }, { -INT32_C( 1398904226), -INT32_C( 273878914) }, { -INT32_C( 1822715999), INT32_C( 1663256078) }, { INT32_C( 384607579), -INT32_C( 80940681) } }, { { INT32_C( 1391788078), INT32_C( 381529208) }, { INT32_C( 1561618038), INT32_C( 1625606658) }, { -INT32_C( 1307737292), INT32_C( 346143091) }, { -INT32_C( 33823888), INT32_C( 1660992775) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t c = simde_vld1_s32(test_vec[i].c); simde_int32x2_t r = simde_vaba_s32(a, b, c); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); simde_int32x2_t c = simde_test_arm_neon_random_i32x2(); simde_int32x2_t r = simde_vaba_s32(a, b, c); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaba_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint8_t b[8]; uint8_t c[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C( 17), UINT8_C(246), UINT8_C(124), UINT8_C( 27), UINT8_C( 1), UINT8_C( 92), UINT8_C(248), UINT8_C(208) }, { UINT8_C(212), UINT8_C(211), UINT8_C(252), UINT8_C( 10), UINT8_C( 66), UINT8_C( 76), UINT8_C( 57), UINT8_C(253) }, { UINT8_C( 40), UINT8_C(175), UINT8_C(209), UINT8_C(209), UINT8_C(140), UINT8_C(241), UINT8_C(147), UINT8_C(212) }, { UINT8_C(189), UINT8_C( 26), UINT8_C(167), UINT8_C(226), UINT8_C( 75), UINT8_C( 1), UINT8_C( 82), UINT8_C(249) } }, { { UINT8_C( 17), UINT8_C(152), UINT8_C(248), UINT8_C( 62), UINT8_C( 2), UINT8_C(241), UINT8_C( 63), UINT8_C( 19) }, { UINT8_C(231), UINT8_C(188), UINT8_C( 46), UINT8_C(233), UINT8_C( 24), UINT8_C( 39), UINT8_C(185), UINT8_C(236) }, { UINT8_C(250), UINT8_C(181), UINT8_C(247), UINT8_C( 60), UINT8_C( 1), UINT8_C( 48), UINT8_C( 58), UINT8_C( 41) }, { UINT8_C( 36), UINT8_C(159), UINT8_C(193), UINT8_C(235), UINT8_C( 25), UINT8_C(250), UINT8_C(190), UINT8_C(214) } }, { { UINT8_C(223), UINT8_C( 11), UINT8_C(251), UINT8_C(108), UINT8_C(252), UINT8_C(142), UINT8_C( 64), UINT8_C( 13) }, { UINT8_C( 38), UINT8_C( 57), UINT8_C( 75), UINT8_C( 40), UINT8_C( 42), UINT8_C(139), UINT8_C( 60), UINT8_C( 18) }, { UINT8_C( 71), UINT8_C(106), UINT8_C(251), UINT8_C( 95), UINT8_C(145), UINT8_C(180), UINT8_C( 76), UINT8_C(140) }, { UINT8_C( 0), UINT8_C( 60), UINT8_C(171), UINT8_C(163), UINT8_C( 99), UINT8_C(183), UINT8_C( 80), UINT8_C(135) } }, { { UINT8_C(105), UINT8_C( 67), UINT8_C(200), UINT8_C(106), UINT8_C(115), UINT8_C( 2), UINT8_C(148), UINT8_C( 82) }, { UINT8_C( 13), UINT8_C(143), UINT8_C(190), UINT8_C( 9), UINT8_C( 29), UINT8_MAX, UINT8_C( 23), UINT8_C( 67) }, { UINT8_C( 56), UINT8_C( 98), UINT8_C(107), UINT8_C( 98), UINT8_C(237), UINT8_C(167), UINT8_C(116), UINT8_C( 52) }, { UINT8_C(148), UINT8_C(112), UINT8_C( 27), UINT8_C(195), UINT8_C( 67), UINT8_C( 90), UINT8_C(241), UINT8_C( 97) } }, { { UINT8_C( 18), UINT8_C(111), UINT8_C(148), UINT8_C(163), UINT8_C( 35), UINT8_C(224), UINT8_C( 47), UINT8_C(140) }, { UINT8_C( 35), UINT8_C(248), UINT8_C(247), UINT8_C(150), UINT8_C(250), UINT8_C(139), UINT8_C(232), UINT8_C( 8) }, { UINT8_C( 26), UINT8_C(167), UINT8_C( 17), UINT8_C( 55), UINT8_C(166), UINT8_C( 40), UINT8_C(122), UINT8_C(222) }, { UINT8_C( 27), UINT8_C(192), UINT8_C(122), UINT8_C( 2), UINT8_C(119), UINT8_C( 67), UINT8_C(157), UINT8_C( 98) } }, { { UINT8_C(139), UINT8_C(229), UINT8_C( 64), UINT8_C(120), UINT8_C(141), UINT8_C(181), UINT8_C(173), UINT8_C(159) }, { UINT8_C( 36), UINT8_C( 65), UINT8_C( 66), UINT8_C( 72), UINT8_C( 33), UINT8_C(114), UINT8_C(212), UINT8_C( 68) }, { UINT8_C(106), UINT8_C(203), UINT8_C(218), UINT8_C(100), UINT8_C( 86), UINT8_C(194), UINT8_C(108), UINT8_C(112) }, { UINT8_C(209), UINT8_C(111), UINT8_C(216), UINT8_C(148), UINT8_C(194), UINT8_C( 5), UINT8_C( 21), UINT8_C(203) } }, { { UINT8_C(105), UINT8_C(126), UINT8_C(167), UINT8_C( 15), UINT8_C(166), UINT8_C( 33), UINT8_C(237), UINT8_C( 49) }, { UINT8_C( 7), UINT8_C( 46), UINT8_C(170), UINT8_C(148), UINT8_C(227), UINT8_C( 87), UINT8_C( 51), UINT8_C( 7) }, { UINT8_C(152), UINT8_C(117), UINT8_C( 79), UINT8_C(185), UINT8_C(231), UINT8_C( 36), UINT8_C(253), UINT8_C( 81) }, { UINT8_C(250), UINT8_C(197), UINT8_C( 2), UINT8_C( 52), UINT8_C(170), UINT8_C( 84), UINT8_C(183), UINT8_C(123) } }, { { UINT8_C(239), UINT8_C(215), UINT8_C(182), UINT8_C( 70), UINT8_C(153), UINT8_C( 34), UINT8_C(182), UINT8_C( 3) }, { UINT8_C(160), UINT8_C( 94), UINT8_C( 18), UINT8_C( 71), UINT8_C(127), UINT8_C( 0), UINT8_C(120), UINT8_C(134) }, { UINT8_C( 46), UINT8_C( 34), UINT8_C( 26), UINT8_C( 17), UINT8_C(121), UINT8_C( 77), UINT8_C( 24), UINT8_C( 17) }, { UINT8_C( 97), UINT8_C( 19), UINT8_C(190), UINT8_C(124), UINT8_C(159), UINT8_C(111), UINT8_C( 22), UINT8_C(120) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t c = simde_vld1_u8(test_vec[i].c); simde_uint8x8_t r = simde_vaba_u8(a, b, c); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t c = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t r = simde_vaba_u8(a, b, c); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaba_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t b[4]; uint16_t c[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(26819), UINT16_C(43722), UINT16_C(51084), UINT16_C(31740) }, { UINT16_C(45726), UINT16_C(14529), UINT16_C(30932), UINT16_C(30011) }, { UINT16_C(19926), UINT16_C(21948), UINT16_C(13389), UINT16_C(31708) }, { UINT16_C(52619), UINT16_C(51141), UINT16_C( 3091), UINT16_C(33437) } }, { { UINT16_C(63063), UINT16_C(53388), UINT16_C(42308), UINT16_C( 2018) }, { UINT16_C(44045), UINT16_C(39345), UINT16_C(44404), UINT16_C( 4628) }, { UINT16_C(54879), UINT16_C(13386), UINT16_C(34126), UINT16_C( 9385) }, { UINT16_C( 8361), UINT16_C(13811), UINT16_C(52586), UINT16_C( 6775) } }, { { UINT16_C(26067), UINT16_C( 8313), UINT16_C(21913), UINT16_C(61596) }, { UINT16_C(10316), UINT16_C(37057), UINT16_C(41933), UINT16_C(55959) }, { UINT16_C(18511), UINT16_C(50035), UINT16_C(35062), UINT16_C(21974) }, { UINT16_C(34262), UINT16_C(21291), UINT16_C(28784), UINT16_C(30045) } }, { { UINT16_C( 8286), UINT16_C(44169), UINT16_C(12966), UINT16_C(31184) }, { UINT16_C(18839), UINT16_C(12697), UINT16_C(13727), UINT16_C(60193) }, { UINT16_C(57950), UINT16_C(11131), UINT16_C( 4741), UINT16_C(54534) }, { UINT16_C(47397), UINT16_C(45735), UINT16_C(21952), UINT16_C(36843) } }, { { UINT16_C(31066), UINT16_C(20632), UINT16_C(28161), UINT16_C(24486) }, { UINT16_C(12175), UINT16_C(13579), UINT16_C(56162), UINT16_C(63918) }, { UINT16_C(18213), UINT16_C(50218), UINT16_C(19581), UINT16_C(56239) }, { UINT16_C(37104), UINT16_C(57271), UINT16_C(64742), UINT16_C(32165) } }, { { UINT16_C(10798), UINT16_C(46086), UINT16_C( 3132), UINT16_C(38537) }, { UINT16_C( 8582), UINT16_C(34791), UINT16_C(36240), UINT16_C( 8167) }, { UINT16_C(62140), UINT16_C( 7764), UINT16_C( 718), UINT16_C(62232) }, { UINT16_C(64356), UINT16_C( 7577), UINT16_C(38654), UINT16_C(27066) } }, { { UINT16_C(16969), UINT16_C(50871), UINT16_C(26254), UINT16_C(48545) }, { UINT16_C(43152), UINT16_C(52337), UINT16_C(64180), UINT16_C(14946) }, { UINT16_C(18715), UINT16_C(43970), UINT16_C(43478), UINT16_C(37834) }, { UINT16_C(41406), UINT16_C(59238), UINT16_C(46956), UINT16_C( 5897) } }, { { UINT16_C( 7835), UINT16_C(27057), UINT16_C(51488), UINT16_C(27228) }, { UINT16_C( 4876), UINT16_C(39472), UINT16_C(53881), UINT16_C( 2391) }, { UINT16_C(51322), UINT16_C(11989), UINT16_C(14530), UINT16_C(56937) }, { UINT16_C(54281), UINT16_C(54540), UINT16_C(25303), UINT16_C(16238) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t c = simde_vld1_u16(test_vec[i].c); simde_uint16x4_t r = simde_vaba_u16(a, b, c); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t c = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t r = simde_vaba_u16(a, b, c); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaba_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint32_t b[2]; uint32_t c[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C(1485384577), UINT32_C(1877693652) }, { UINT32_C(2480512114), UINT32_C(1929196902) }, { UINT32_C(3255577929), UINT32_C(2043438335) }, { UINT32_C(2260450392), UINT32_C(1991935085) } }, { { UINT32_C(4020805932), UINT32_C(1540166105) }, { UINT32_C( 280188476), UINT32_C( 494902954) }, { UINT32_C(2695911482), UINT32_C(3608325518) }, { UINT32_C(2141561642), UINT32_C( 358621373) } }, { { UINT32_C(3667468250), UINT32_C(2941478275) }, { UINT32_C(3768515335), UINT32_C(1211853580) }, { UINT32_C(1817767618), UINT32_C(3347699852) }, { UINT32_C(1323248671), UINT32_C( 782357251) } }, { { UINT32_C(3194435888), UINT32_C(3247799014) }, { UINT32_C( 479932313), UINT32_C(2613833364) }, { UINT32_C(4135348970), UINT32_C(2537535445) }, { UINT32_C(2554885249), UINT32_C(3324096933) } }, { { UINT32_C( 839161766), UINT32_C(2700709231) }, { UINT32_C(2908643783), UINT32_C(1953428699) }, { UINT32_C(3079670051), UINT32_C(3797113848) }, { UINT32_C(1010188034), UINT32_C( 249427084) } }, { { UINT32_C(2614677445), UINT32_C( 741480326) }, { UINT32_C( 509556399), UINT32_C(2344507588) }, { UINT32_C(2486705593), UINT32_C( 872982289) }, { UINT32_C( 296859343), UINT32_C(2213005625) } }, { { UINT32_C(2834012336), UINT32_C(3112845044) }, { UINT32_C(2488558349), UINT32_C( 700483450) }, { UINT32_C(2168987581), UINT32_C( 822871672) }, { UINT32_C(3153583104), UINT32_C(3235233266) } }, { { UINT32_C( 885409059), UINT32_C(2624114412) }, { UINT32_C(1531270247), UINT32_C(2685718418) }, { UINT32_C(2905893170), UINT32_C(2916545776) }, { UINT32_C(2260031982), UINT32_C(2854941770) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t c = simde_vld1_u32(test_vec[i].c); simde_uint32x2_t r = simde_vaba_u32(a, b, c); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t c = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t r = simde_vaba_u32(a, b, c); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vabaq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t b[16]; int8_t c[16]; int8_t r[16]; } test_vec[] = { { { INT8_C( 74), INT8_C( 104), -INT8_C( 55), INT8_C( 64), -INT8_C( 42), -INT8_C( 57), INT8_C( 12), -INT8_C( 72), -INT8_C( 63), INT8_C( 99), -INT8_C( 106), -INT8_C( 48), INT8_C( 72), INT8_C( 62), INT8_C( 20), INT8_C( 88) }, { INT8_C( 87), -INT8_C( 93), INT8_C( 118), -INT8_C( 53), -INT8_C( 18), INT8_C( 24), INT8_C( 78), -INT8_C( 62), -INT8_C( 33), -INT8_C( 104), INT8_C( 27), INT8_C( 88), INT8_C( 68), INT8_C( 102), INT8_C( 102), -INT8_C( 114) }, { -INT8_C( 50), INT8_C( 47), -INT8_C( 50), -INT8_C( 92), -INT8_C( 9), -INT8_C( 37), INT8_C( 93), -INT8_C( 72), INT8_C( 62), -INT8_C( 13), -INT8_C( 120), -INT8_C( 122), INT8_C( 50), -INT8_C( 100), -INT8_C( 33), -INT8_C( 119) }, { -INT8_C( 45), -INT8_C( 12), INT8_C( 113), INT8_C( 103), -INT8_C( 33), INT8_C( 4), INT8_C( 27), -INT8_C( 62), INT8_C( 32), -INT8_C( 66), INT8_C( 41), -INT8_C( 94), INT8_C( 90), INT8_C( 8), -INT8_C( 101), INT8_C( 93) } }, { { INT8_C( 63), INT8_C( 85), INT8_C( 84), INT8_C( 45), INT8_C( 110), -INT8_C( 94), -INT8_C( 16), INT8_C( 77), INT8_C( 59), INT8_C( 11), -INT8_C( 91), INT8_MAX, INT8_C( 113), INT8_C( 12), INT8_C( 13), INT8_C( 63) }, { INT8_C( 59), -INT8_C( 36), -INT8_C( 28), INT8_C( 50), -INT8_C( 73), INT8_C( 65), -INT8_C( 22), -INT8_C( 11), INT8_C( 52), INT8_C( 115), INT8_C( 124), INT8_C( 102), INT8_C( 15), INT8_C( 91), -INT8_C( 17), INT8_C( 79) }, { -INT8_C( 80), INT8_C( 67), INT8_C( 124), INT8_C( 30), -INT8_C( 26), INT8_C( 108), INT8_C( 107), INT8_C( 33), INT8_C( 119), INT8_C( 17), -INT8_C( 96), -INT8_C( 24), INT8_C( 29), -INT8_C( 83), INT8_C( 40), INT8_C( 88) }, { -INT8_C( 54), -INT8_C( 68), -INT8_C( 20), INT8_C( 65), -INT8_C( 99), -INT8_C( 51), INT8_C( 113), INT8_C( 121), INT8_C( 126), INT8_C( 109), -INT8_C( 127), -INT8_C( 3), INT8_MAX, -INT8_C( 70), INT8_C( 70), INT8_C( 72) } }, { { -INT8_C( 119), INT8_C( 12), -INT8_C( 117), INT8_C( 64), INT8_C( 77), INT8_C( 117), INT8_C( 54), -INT8_C( 127), -INT8_C( 24), -INT8_C( 78), -INT8_C( 24), -INT8_C( 8), INT8_C( 13), -INT8_C( 41), INT8_C( 71), -INT8_C( 67) }, { INT8_C( 27), -INT8_C( 61), -INT8_C( 36), INT8_C( 1), INT8_C( 48), INT8_C( 71), INT8_C( 34), -INT8_C( 89), INT8_C( 88), -INT8_C( 62), -INT8_C( 112), INT8_C( 117), INT8_C( 111), -INT8_C( 72), -INT8_C( 50), -INT8_C( 7) }, { -INT8_C( 60), INT8_C( 89), INT8_C( 57), INT8_C( 17), -INT8_C( 50), INT8_C( 111), -INT8_C( 110), -INT8_C( 73), INT8_C( 33), INT8_C( 122), -INT8_C( 81), INT8_C( 46), INT8_C( 82), -INT8_C( 10), -INT8_C( 20), INT8_C( 109) }, { -INT8_C( 32), -INT8_C( 94), -INT8_C( 24), INT8_C( 80), -INT8_C( 81), -INT8_C( 99), -INT8_C( 58), -INT8_C( 111), INT8_C( 31), INT8_C( 106), INT8_C( 7), INT8_C( 63), INT8_C( 42), INT8_C( 21), INT8_C( 101), INT8_C( 49) } }, { { -INT8_C( 71), -INT8_C( 56), INT8_C( 110), -INT8_C( 23), INT8_C( 15), -INT8_C( 112), -INT8_C( 111), INT8_C( 104), INT8_C( 82), INT8_C( 33), -INT8_C( 35), -INT8_C( 63), -INT8_C( 39), -INT8_C( 85), -INT8_C( 70), -INT8_C( 99) }, { INT8_C( 4), -INT8_C( 12), -INT8_C( 82), -INT8_C( 45), INT8_C( 99), INT8_C( 64), -INT8_C( 118), -INT8_C( 123), -INT8_C( 69), INT8_C( 57), -INT8_C( 77), INT8_C( 13), INT8_C( 47), -INT8_C( 97), INT8_C( 122), -INT8_C( 24) }, { INT8_C( 103), -INT8_C( 24), -INT8_C( 46), INT8_C( 119), INT8_C( 120), INT8_C( 99), -INT8_C( 33), -INT8_C( 54), -INT8_C( 124), -INT8_C( 68), -INT8_C( 117), INT8_C( 93), INT8_C( 104), INT8_C( 70), -INT8_C( 6), INT8_C( 108) }, { INT8_C( 28), -INT8_C( 44), -INT8_C( 110), -INT8_C( 115), INT8_C( 36), -INT8_C( 77), -INT8_C( 26), -INT8_C( 83), -INT8_C( 119), -INT8_C( 98), INT8_C( 5), INT8_C( 17), INT8_C( 18), INT8_C( 82), INT8_C( 58), INT8_C( 33) } }, { { INT8_C( 58), -INT8_C( 88), INT8_C( 63), -INT8_C( 99), -INT8_C( 24), -INT8_C( 55), INT8_C( 34), -INT8_C( 93), INT8_C( 2), -INT8_C( 42), -INT8_C( 80), INT8_C( 49), INT8_C( 117), INT8_C( 42), INT8_C( 26), -INT8_C( 35) }, { INT8_C( 18), -INT8_C( 20), INT8_C( 84), -INT8_C( 118), INT8_C( 79), INT8_C( 51), INT8_C( 84), -INT8_C( 45), -INT8_C( 17), -INT8_C( 32), INT8_C( 48), INT8_C( 87), INT8_C( 38), INT8_C( 42), -INT8_C( 60), INT8_C( 96) }, { -INT8_C( 46), INT8_C( 3), -INT8_C( 3), -INT8_C( 70), -INT8_C( 51), INT8_C( 32), INT8_C( 94), -INT8_C( 49), -INT8_C( 10), INT8_C( 14), INT8_C( 1), INT8_C( 107), INT8_C( 57), INT8_C( 27), INT8_C( 72), INT8_C( 75) }, { INT8_C( 122), -INT8_C( 65), -INT8_C( 106), -INT8_C( 51), INT8_C( 106), -INT8_C( 36), INT8_C( 44), -INT8_C( 89), INT8_C( 9), INT8_C( 4), -INT8_C( 33), INT8_C( 69), -INT8_C( 120), INT8_C( 57), -INT8_C( 98), -INT8_C( 14) } }, { { INT8_C( 7), -INT8_C( 100), -INT8_C( 42), INT8_C( 86), -INT8_C( 49), INT8_C( 42), INT8_C( 41), -INT8_C( 65), INT8_C( 10), INT8_C( 89), INT8_C( 22), INT8_C( 48), -INT8_C( 125), -INT8_C( 38), -INT8_C( 112), INT8_C( 85) }, { -INT8_C( 34), -INT8_C( 114), INT8_C( 15), -INT8_C( 85), -INT8_C( 82), INT8_C( 109), INT8_C( 122), -INT8_C( 92), INT8_C( 124), INT8_C( 123), INT8_C( 15), -INT8_C( 75), -INT8_C( 106), INT8_C( 88), INT8_C( 0), -INT8_C( 99) }, { -INT8_C( 12), -INT8_C( 42), -INT8_C( 13), -INT8_C( 60), INT8_C( 1), INT8_C( 28), -INT8_C( 125), INT8_C( 11), INT8_C( 117), -INT8_C( 103), INT8_C( 60), -INT8_C( 8), INT8_C( 116), -INT8_C( 52), INT8_C( 77), INT8_C( 82) }, { INT8_C( 29), -INT8_C( 28), -INT8_C( 14), INT8_C( 111), INT8_C( 34), INT8_C( 123), INT8_C( 32), INT8_C( 38), INT8_C( 17), INT8_C( 59), INT8_C( 67), INT8_C( 115), INT8_C( 97), INT8_C( 102), -INT8_C( 35), INT8_C( 10) } }, { { INT8_C( 90), INT8_C( 93), -INT8_C( 3), INT8_C( 8), -INT8_C( 54), INT8_C( 119), -INT8_C( 84), INT8_C( 70), -INT8_C( 13), -INT8_C( 68), -INT8_C( 5), -INT8_C( 119), INT8_C( 20), -INT8_C( 4), INT8_C( 39), INT8_C( 8) }, { -INT8_C( 46), INT8_C( 26), -INT8_C( 52), -INT8_C( 45), INT8_C( 55), INT8_C( 79), -INT8_C( 33), -INT8_C( 84), -INT8_C( 23), INT8_C( 27), -INT8_C( 91), INT8_C( 93), -INT8_C( 25), -INT8_C( 14), -INT8_C( 81), INT8_C( 66) }, { INT8_C( 79), -INT8_C( 84), INT8_C( 74), INT8_C( 26), INT8_C( 35), -INT8_C( 9), INT8_C( 96), INT8_C( 22), -INT8_C( 77), INT8_C( 92), -INT8_C( 96), -INT8_C( 57), INT8_C( 88), -INT8_C( 57), -INT8_C( 49), INT8_C( 42) }, { -INT8_C( 41), -INT8_C( 53), INT8_C( 123), INT8_C( 79), -INT8_C( 34), -INT8_C( 49), INT8_C( 45), -INT8_C( 80), INT8_C( 41), -INT8_C( 3), INT8_C( 0), INT8_C( 31), -INT8_C( 123), INT8_C( 39), INT8_C( 71), INT8_C( 32) } }, { { -INT8_C( 31), -INT8_C( 100), -INT8_C( 2), INT8_C( 24), -INT8_C( 21), -INT8_C( 35), -INT8_C( 59), -INT8_C( 44), -INT8_C( 8), INT8_C( 106), INT8_C( 49), -INT8_C( 33), INT8_C( 92), -INT8_C( 32), INT8_C( 33), -INT8_C( 84) }, { -INT8_C( 116), INT8_C( 108), -INT8_C( 58), -INT8_C( 80), INT8_C( 99), INT8_C( 38), -INT8_C( 58), INT8_C( 22), -INT8_C( 126), INT8_C( 102), -INT8_C( 35), -INT8_C( 38), INT8_C( 45), -INT8_C( 84), INT8_C( 5), INT8_C( 15) }, { INT8_C( 72), INT8_C( 3), INT8_C( 39), INT8_C( 52), -INT8_C( 32), -INT8_C( 20), INT8_C( 8), -INT8_C( 40), INT8_C( 86), INT8_C( 58), -INT8_C( 73), -INT8_C( 77), INT8_C( 26), -INT8_C( 39), INT8_C( 95), -INT8_C( 89) }, { -INT8_C( 99), INT8_C( 5), INT8_C( 95), -INT8_C( 100), INT8_C( 110), INT8_C( 23), INT8_C( 7), INT8_C( 18), -INT8_C( 52), -INT8_C( 106), INT8_C( 87), INT8_C( 6), INT8_C( 111), INT8_C( 13), INT8_C( 123), INT8_C( 20) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t c = simde_vld1q_s8(test_vec[i].c); simde_int8x16_t r = simde_vabaq_s8(a, b, c); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); simde_int8x16_t c = simde_test_arm_neon_random_i8x16(); simde_int8x16_t r = simde_vabaq_s8(a, b, c); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vabaq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t b[8]; int16_t c[8]; int16_t r[8]; } test_vec[] = { { { INT16_C( 30977), INT16_C( 12491), INT16_C( 2221), -INT16_C( 1303), -INT16_C( 21084), INT16_C( 23557), -INT16_C( 31169), -INT16_C( 24084) }, { INT16_C( 24241), -INT16_C( 32243), -INT16_C( 7993), INT16_C( 14763), -INT16_C( 29538), -INT16_C( 2390), -INT16_C( 29998), -INT16_C( 11061) }, { -INT16_C( 27132), -INT16_C( 20220), -INT16_C( 4449), INT16_C( 17323), -INT16_C( 20325), -INT16_C( 9569), -INT16_C( 29897), -INT16_C( 6021) }, { INT16_C( 16814), INT16_C( 24514), INT16_C( 5765), INT16_C( 1257), -INT16_C( 11871), INT16_C( 30736), -INT16_C( 31068), -INT16_C( 19044) } }, { { -INT16_C( 30487), -INT16_C( 20118), INT16_C( 5480), INT16_C( 2026), -INT16_C( 27487), INT16_C( 29949), -INT16_C( 14306), INT16_C( 8776) }, { INT16_C( 19551), -INT16_C( 300), INT16_C( 32570), -INT16_C( 10943), -INT16_C( 8144), INT16_C( 26544), INT16_C( 11116), INT16_C( 21839) }, { -INT16_C( 17996), INT16_C( 7174), -INT16_C( 3890), INT16_C( 28707), INT16_C( 8580), -INT16_C( 23580), INT16_C( 11497), INT16_C( 18629) }, { INT16_C( 7060), -INT16_C( 12644), -INT16_C( 23596), -INT16_C( 23860), -INT16_C( 10763), INT16_C( 14537), -INT16_C( 13925), INT16_C( 11986) } }, { { -INT16_C( 26248), -INT16_C( 19642), -INT16_C( 30951), INT16_C( 18824), INT16_C( 14440), -INT16_C( 11088), -INT16_C( 156), INT16_C( 6185) }, { INT16_C( 12472), -INT16_C( 31180), INT16_C( 22560), -INT16_C( 23050), -INT16_C( 9607), INT16_C( 25160), INT16_C( 3334), INT16_C( 32683) }, { -INT16_C( 3673), -INT16_C( 16334), -INT16_C( 17799), -INT16_C( 7927), -INT16_C( 17933), INT16_C( 22453), -INT16_C( 8520), INT16_C( 28783) }, { -INT16_C( 10103), -INT16_C( 4796), INT16_C( 9408), -INT16_C( 31589), INT16_C( 22766), -INT16_C( 8381), INT16_C( 11698), INT16_C( 10085) } }, { { -INT16_C( 23794), INT16_C( 12278), -INT16_C( 4613), INT16_C( 29908), INT16_C( 7367), -INT16_C( 12585), -INT16_C( 32215), -INT16_C( 12211) }, { INT16_C( 32627), -INT16_C( 4976), -INT16_C( 26311), INT16_C( 11469), -INT16_C( 32174), INT16_C( 2691), -INT16_C( 3487), INT16_C( 28538) }, { INT16_C( 29078), -INT16_C( 28258), INT16_C( 29278), INT16_C( 9478), -INT16_C( 8818), -INT16_C( 18189), INT16_C( 16479), -INT16_C( 11640) }, { -INT16_C( 20245), -INT16_C( 29976), -INT16_C( 14560), INT16_C( 31899), INT16_C( 30723), INT16_C( 8295), -INT16_C( 12249), INT16_C( 27967) } }, { { INT16_C( 6591), -INT16_C( 1601), -INT16_C( 29518), INT16_C( 1317), -INT16_C( 22257), INT16_C( 28687), -INT16_C( 30053), INT16_C( 12767) }, { INT16_C( 32507), INT16_C( 22979), -INT16_C( 13840), INT16_C( 32638), INT16_C( 29350), INT16_C( 1335), -INT16_C( 16462), INT16_C( 29399) }, { -INT16_C( 26920), -INT16_C( 29845), -INT16_C( 28637), INT16_C( 12944), -INT16_C( 24775), -INT16_C( 10846), -INT16_C( 32471), INT16_C( 9222) }, { INT16_C( 482), -INT16_C( 14313), -INT16_C( 14721), INT16_C( 21011), INT16_C( 31868), -INT16_C( 24668), -INT16_C( 14044), -INT16_C( 32592) } }, { { -INT16_C( 13825), -INT16_C( 3971), -INT16_C( 878), INT16_C( 14447), -INT16_C( 22930), INT16_C( 8253), INT16_C( 5477), INT16_C( 16018) }, { -INT16_C( 597), -INT16_C( 12599), INT16_C( 22926), -INT16_C( 14592), -INT16_C( 23816), INT16_C( 8860), -INT16_C( 23772), INT16_C( 9030) }, { -INT16_C( 15252), -INT16_C( 237), -INT16_C( 32064), INT16_C( 11831), INT16_C( 29992), -INT16_C( 29106), -INT16_C( 7798), INT16_C( 13772) }, { INT16_C( 830), INT16_C( 8391), -INT16_C( 11424), -INT16_C( 24666), INT16_C( 30878), -INT16_C( 19317), INT16_C( 21451), INT16_C( 20760) } }, { { -INT16_C( 27170), INT16_C( 27652), INT16_C( 1262), -INT16_C( 6604), -INT16_C( 12121), -INT16_C( 13560), INT16_C( 20339), -INT16_C( 7954) }, { INT16_C( 531), -INT16_C( 11297), INT16_C( 5764), -INT16_C( 21247), INT16_C( 20363), INT16_C( 5435), INT16_C( 1840), INT16_C( 3915) }, { INT16_C( 20380), -INT16_C( 30085), -INT16_C( 20653), -INT16_C( 1424), INT16_C( 31104), -INT16_C( 3131), -INT16_C( 19256), -INT16_C( 9261) }, { -INT16_C( 7321), -INT16_C( 19096), INT16_C( 27679), INT16_C( 13219), -INT16_C( 1380), -INT16_C( 4994), -INT16_C( 24101), INT16_C( 5222) } }, { { -INT16_C( 19786), INT16_C( 15022), -INT16_C( 20535), INT16_C( 21735), INT16_C( 8958), INT16_C( 12138), -INT16_C( 19159), -INT16_C( 15042) }, { -INT16_C( 18172), INT16_C( 22351), -INT16_C( 16279), -INT16_C( 5806), INT16_C( 5945), INT16_C( 476), -INT16_C( 20277), -INT16_C( 32292) }, { -INT16_C( 30110), INT16_C( 11196), -INT16_C( 23751), INT16_C( 14208), -INT16_C( 5434), -INT16_C( 4250), -INT16_C( 23393), -INT16_C( 23627) }, { -INT16_C( 7848), INT16_C( 26177), -INT16_C( 13063), -INT16_C( 23787), INT16_C( 20337), INT16_C( 16864), -INT16_C( 16043), -INT16_C( 6377) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t c = simde_vld1q_s16(test_vec[i].c); simde_int16x8_t r = simde_vabaq_s16(a, b, c); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); simde_int16x8_t c = simde_test_arm_neon_random_i16x8(); simde_int16x8_t r = simde_vabaq_s16(a, b, c); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #if !defined(TEST_SIMDE_VABD_NO_TEST_32) static int test_simde_vabaq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t b[4]; int32_t c[4]; int32_t r[4]; } test_vec[] = { { { -INT32_C( 939916194), -INT32_C( 38777660), INT32_C( 805211236), -INT32_C( 1615734212) }, { -INT32_C( 1647678108), -INT32_C( 690664944), -INT32_C( 741983436), INT32_C( 1047952352) }, { INT32_C( 1141207423), INT32_C( 557954493), INT32_C( 2119254081), INT32_C( 2132607514) }, { INT32_C( 1848969337), INT32_C( 1209841777), -INT32_C( 628518543), -INT32_C( 531079050) } }, { { INT32_C( 2132600687), INT32_C( 1716973874), INT32_C( 221912109), INT32_C( 374059159) }, { -INT32_C( 547729375), INT32_C( 1174445061), -INT32_C( 154906148), -INT32_C( 1032461997) }, { -INT32_C( 79523127), -INT32_C( 1335781245), INT32_C( 1270717364), INT32_C( 1835075660) }, { -INT32_C( 1694160361), -INT32_C( 67767116), INT32_C( 1647535621), -INT32_C( 1053370480) } }, { { INT32_C( 1565310040), INT32_C( 883182936), -INT32_C( 232101730), INT32_C( 330604618) }, { -INT32_C( 1257310670), INT32_C( 1114009486), INT32_C( 1468867339), -INT32_C( 2067468500) }, { INT32_C( 65147307), -INT32_C( 63469986), INT32_C( 955146734), INT32_C( 860594945) }, { -INT32_C( 1407199279), INT32_C( 2060662408), INT32_C( 281618875), -INT32_C( 1036299233) } }, { { INT32_C( 686381465), -INT32_C( 731230263), -INT32_C( 1641285518), -INT32_C( 1843138585) }, { INT32_C( 1586824448), INT32_C( 2036059275), INT32_C( 800213293), -INT32_C( 2040332820) }, { INT32_C( 548293463), INT32_C( 234100890), -INT32_C( 139780336), INT32_C( 260689422) }, { INT32_C( 1724912450), INT32_C( 1070728122), -INT32_C( 701291889), INT32_C( 457883657) } }, { { INT32_C( 1584209619), INT32_C( 416860394), -INT32_C( 28866030), -INT32_C( 561731193) }, { -INT32_C( 1879166219), INT32_C( 1537012299), INT32_C( 542328849), -INT32_C( 365962218) }, { -INT32_C( 448226053), INT32_C( 2013077605), INT32_C( 846546090), -INT32_C( 485426450) }, { -INT32_C( 1279817511), INT32_C( 892925700), INT32_C( 275351211), -INT32_C( 442266961) } }, { { INT32_C( 2003963692), INT32_C( 332599041), INT32_C( 1832068695), -INT32_C( 44604926) }, { INT32_C( 1675862270), INT32_C( 1809506496), INT32_C( 329076773), INT32_C( 1995877706) }, { -INT32_C( 1091671876), -INT32_C( 825114249), -INT32_C( 381942553), INT32_C( 1692898150) }, { INT32_C( 476530542), -INT32_C( 1327747510), -INT32_C( 1751879275), INT32_C( 258374630) } }, { { -INT32_C( 188167629), -INT32_C( 815816022), INT32_C( 1021508850), INT32_C( 1723062441) }, { -INT32_C( 1205558975), INT32_C( 1233646946), INT32_C( 1597162489), -INT32_C( 1966925482) }, { -INT32_C( 1904309277), INT32_C( 543022382), -INT32_C( 2107817767), INT32_C( 1508380696) }, { INT32_C( 510582673), -INT32_C( 125191458), INT32_C( 431521810), INT32_C( 903401323) } }, { { INT32_C( 319950001), -INT32_C( 94594815), -INT32_C( 1302753700), -INT32_C( 1958994520) }, { -INT32_C( 702956888), INT32_C( 1895266199), -INT32_C( 806136649), INT32_C( 355064676) }, { -INT32_C( 383239192), INT32_C( 820282580), -INT32_C( 1159578350), INT32_C( 54927194) }, { INT32_C( 639667697), INT32_C( 980388804), -INT32_C( 949311999), -INT32_C( 1658857038) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t c = simde_vld1q_s32(test_vec[i].c); simde_int32x4_t r = simde_vabaq_s32(a, b, c); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); simde_int32x4_t c = simde_test_arm_neon_random_i32x4(); simde_int32x4_t r = simde_vabaq_s32(a, b, c); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #endif static int test_simde_vabaq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; uint8_t b[16]; uint8_t c[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(216), UINT8_C( 11), UINT8_C( 40), UINT8_C(228), UINT8_C(187), UINT8_C(225), UINT8_C( 32), UINT8_C(216), UINT8_C( 90), UINT8_C(133), UINT8_C(229), UINT8_C( 30), UINT8_C(182), UINT8_C( 36), UINT8_C(239), UINT8_C(187) }, { UINT8_C(110), UINT8_C( 19), UINT8_C( 78), UINT8_C(165), UINT8_C(167), UINT8_C(104), UINT8_C(134), UINT8_C( 18), UINT8_C( 94), UINT8_C(254), UINT8_C(151), UINT8_C( 42), UINT8_C(254), UINT8_C(121), UINT8_C(219), UINT8_C(215) }, { UINT8_C(132), UINT8_C( 3), UINT8_C(187), UINT8_C( 63), UINT8_C(228), UINT8_C(220), UINT8_C( 24), UINT8_C( 62), UINT8_C( 97), UINT8_C(253), UINT8_C( 93), UINT8_C( 23), UINT8_C( 33), UINT8_C( 76), UINT8_C(211), UINT8_C(144) }, { UINT8_C(238), UINT8_C( 27), UINT8_C(149), UINT8_C( 74), UINT8_C(248), UINT8_C( 85), UINT8_C(142), UINT8_C( 4), UINT8_C( 93), UINT8_C(134), UINT8_C( 31), UINT8_C( 49), UINT8_C(147), UINT8_C( 81), UINT8_C(247), UINT8_C( 2) } }, { { UINT8_C( 95), UINT8_C( 33), UINT8_C( 53), UINT8_C( 6), UINT8_C(138), UINT8_C(187), UINT8_C( 24), UINT8_C(232), UINT8_C(186), UINT8_C(175), UINT8_C( 18), UINT8_C(184), UINT8_C( 40), UINT8_C(237), UINT8_C(143), UINT8_C(173) }, { UINT8_C(240), UINT8_C( 75), UINT8_C(236), UINT8_C(212), UINT8_C( 39), UINT8_C( 4), UINT8_C( 18), UINT8_C(136), UINT8_C( 2), UINT8_C(111), UINT8_C(160), UINT8_C( 35), UINT8_C(187), UINT8_C(115), UINT8_C(179), UINT8_C( 27) }, { UINT8_C(148), UINT8_C(232), UINT8_C( 33), UINT8_C( 30), UINT8_C(164), UINT8_C( 58), UINT8_C( 6), UINT8_C( 94), UINT8_C(233), UINT8_C( 24), UINT8_C( 22), UINT8_C( 18), UINT8_C( 5), UINT8_C(166), UINT8_C(191), UINT8_C(245) }, { UINT8_C(187), UINT8_C(190), UINT8_C( 0), UINT8_C(188), UINT8_C( 7), UINT8_C(241), UINT8_C( 36), UINT8_C( 18), UINT8_C(161), UINT8_C( 6), UINT8_C(156), UINT8_C(201), UINT8_C(222), UINT8_C( 32), UINT8_C(155), UINT8_C(135) } }, { { UINT8_C(241), UINT8_C(171), UINT8_C(201), UINT8_C( 24), UINT8_C(176), UINT8_C(220), UINT8_C(160), UINT8_C(178), UINT8_C( 75), UINT8_C( 64), UINT8_C(213), UINT8_C( 7), UINT8_C(179), UINT8_C(137), UINT8_C( 34), UINT8_C( 72) }, { UINT8_C(113), UINT8_C( 67), UINT8_C(102), UINT8_C( 21), UINT8_C(125), UINT8_C(109), UINT8_C(115), UINT8_C(103), UINT8_C(133), UINT8_C(138), UINT8_C(121), UINT8_C(139), UINT8_C( 48), UINT8_C( 56), UINT8_C(128), UINT8_C( 33) }, { UINT8_C(227), UINT8_C( 74), UINT8_C( 57), UINT8_C(147), UINT8_C( 38), UINT8_C(217), UINT8_C( 69), UINT8_C(113), UINT8_C( 26), UINT8_C( 27), UINT8_C(120), UINT8_C(205), UINT8_C(164), UINT8_C(154), UINT8_C( 21), UINT8_C( 21) }, { UINT8_C( 99), UINT8_C(178), UINT8_C(246), UINT8_C(150), UINT8_C( 7), UINT8_C( 72), UINT8_C(206), UINT8_C(188), UINT8_C(182), UINT8_C(175), UINT8_C(214), UINT8_C( 73), UINT8_C( 39), UINT8_C(235), UINT8_C(141), UINT8_C( 84) } }, { { UINT8_C(222), UINT8_C(124), UINT8_C( 43), UINT8_C( 91), UINT8_C(233), UINT8_C(158), UINT8_C(194), UINT8_C(110), UINT8_C( 40), UINT8_C( 59), UINT8_C(249), UINT8_C( 88), UINT8_C(115), UINT8_C(122), UINT8_C(121), UINT8_C( 87) }, { UINT8_C(196), UINT8_C(178), UINT8_C(234), UINT8_C(234), UINT8_C(140), UINT8_C( 48), UINT8_C( 91), UINT8_C(166), UINT8_C( 75), UINT8_C(212), UINT8_C(115), UINT8_C(239), UINT8_C(110), UINT8_C(137), UINT8_C( 4), UINT8_C( 76) }, { UINT8_C( 5), UINT8_C( 47), UINT8_C(168), UINT8_C(238), UINT8_C(206), UINT8_C(106), UINT8_C( 92), UINT8_C(246), UINT8_C(166), UINT8_C( 86), UINT8_C( 79), UINT8_C( 25), UINT8_C(208), UINT8_C(200), UINT8_C(112), UINT8_C(148) }, { UINT8_C(157), UINT8_MAX, UINT8_C(109), UINT8_C( 95), UINT8_C( 43), UINT8_C(216), UINT8_C(195), UINT8_C(190), UINT8_C(131), UINT8_C(185), UINT8_C( 29), UINT8_C( 46), UINT8_C(213), UINT8_C(185), UINT8_C(229), UINT8_C(159) } }, { { UINT8_C(123), UINT8_C( 91), UINT8_C(126), UINT8_C( 7), UINT8_C(139), UINT8_C(217), UINT8_C(173), UINT8_C(214), UINT8_C(173), UINT8_C( 32), UINT8_C(197), UINT8_C( 28), UINT8_C(169), UINT8_C(201), UINT8_C(104), UINT8_C(174) }, { UINT8_C(249), UINT8_C( 16), UINT8_C(156), UINT8_C(199), UINT8_C(123), UINT8_C(249), UINT8_C(189), UINT8_C( 33), UINT8_C( 79), UINT8_C( 12), UINT8_C( 58), UINT8_C( 31), UINT8_C(213), UINT8_C(171), UINT8_C(179), UINT8_C( 80) }, { UINT8_C( 6), UINT8_C( 49), UINT8_C( 87), UINT8_C(145), UINT8_C( 10), UINT8_C( 4), UINT8_C(103), UINT8_C(184), UINT8_C( 36), UINT8_C( 44), UINT8_C(212), UINT8_C(206), UINT8_C(245), UINT8_C( 60), UINT8_C(124), UINT8_C(238) }, { UINT8_C(110), UINT8_C(124), UINT8_C(195), UINT8_C( 61), UINT8_C(252), UINT8_C(206), UINT8_C( 3), UINT8_C(109), UINT8_C(216), UINT8_C( 64), UINT8_C( 95), UINT8_C(203), UINT8_C(201), UINT8_C( 56), UINT8_C(159), UINT8_C( 76) } }, { { UINT8_C( 77), UINT8_C( 25), UINT8_C(181), UINT8_C(200), UINT8_C( 18), UINT8_C(115), UINT8_C(233), UINT8_C( 97), UINT8_C(127), UINT8_C( 35), UINT8_C(128), UINT8_C( 84), UINT8_C(206), UINT8_C( 51), UINT8_C(164), UINT8_C(212) }, { UINT8_C(100), UINT8_C(251), UINT8_C(101), UINT8_C(110), UINT8_MAX, UINT8_C(204), UINT8_C( 38), UINT8_C( 36), UINT8_C(248), UINT8_C(250), UINT8_C(242), UINT8_C(238), UINT8_C( 55), UINT8_C(110), UINT8_C(220), UINT8_C(132) }, { UINT8_C(135), UINT8_C(146), UINT8_C( 76), UINT8_C(153), UINT8_C( 5), UINT8_C( 53), UINT8_C(250), UINT8_C(132), UINT8_C( 88), UINT8_C(122), UINT8_C(217), UINT8_C( 39), UINT8_C(173), UINT8_C(125), UINT8_C(251), UINT8_C( 17) }, { UINT8_C(112), UINT8_C(130), UINT8_C(206), UINT8_C(243), UINT8_C( 12), UINT8_C( 10), UINT8_C(189), UINT8_C(193), UINT8_C( 31), UINT8_C(163), UINT8_C(153), UINT8_C( 27), UINT8_C( 68), UINT8_C( 66), UINT8_C(195), UINT8_C( 71) } }, { { UINT8_C(121), UINT8_C( 97), UINT8_C(128), UINT8_C(120), UINT8_C( 45), UINT8_C(166), UINT8_C(156), UINT8_C( 38), UINT8_C(161), UINT8_C(142), UINT8_C( 20), UINT8_C(216), UINT8_C(253), UINT8_C(240), UINT8_C( 92), UINT8_C(132) }, { UINT8_C(130), UINT8_C(168), UINT8_C( 30), UINT8_C(135), UINT8_C(221), UINT8_C( 24), UINT8_C( 12), UINT8_C( 53), UINT8_C(147), UINT8_C(229), UINT8_C( 92), UINT8_C( 64), UINT8_C( 98), UINT8_C( 88), UINT8_C( 82), UINT8_C(219) }, { UINT8_C(185), UINT8_C(210), UINT8_C( 84), UINT8_C(230), UINT8_C(120), UINT8_C(240), UINT8_C( 12), UINT8_C( 25), UINT8_C(127), UINT8_C( 32), UINT8_C(241), UINT8_C(124), UINT8_C( 17), UINT8_C( 77), UINT8_C( 0), UINT8_C(147) }, { UINT8_C(176), UINT8_C(139), UINT8_C(182), UINT8_C(215), UINT8_C(146), UINT8_C(126), UINT8_C(156), UINT8_C( 66), UINT8_C(181), UINT8_C( 83), UINT8_C(169), UINT8_C( 20), UINT8_C( 78), UINT8_C(251), UINT8_C(174), UINT8_C(204) } }, { { UINT8_C(245), UINT8_C( 30), UINT8_C( 27), UINT8_C(210), UINT8_C( 55), UINT8_C( 39), UINT8_C( 8), UINT8_C(202), UINT8_C( 12), UINT8_C(100), UINT8_C( 10), UINT8_C(110), UINT8_C(188), UINT8_C( 92), UINT8_C( 74), UINT8_C(117) }, { UINT8_C( 46), UINT8_C(158), UINT8_C( 92), UINT8_C(167), UINT8_C(142), UINT8_C(104), UINT8_C(192), UINT8_C( 13), UINT8_C(137), UINT8_C(178), UINT8_C(137), UINT8_C(154), UINT8_MAX, UINT8_C(138), UINT8_C( 45), UINT8_C(245) }, { UINT8_C(168), UINT8_C( 72), UINT8_C(199), UINT8_C(223), UINT8_C(111), UINT8_C(207), UINT8_C(169), UINT8_C(123), UINT8_C( 52), UINT8_C(180), UINT8_C(234), UINT8_C(240), UINT8_C( 16), UINT8_C( 52), UINT8_C(102), UINT8_C( 63) }, { UINT8_C(111), UINT8_C(116), UINT8_C(134), UINT8_C( 10), UINT8_C( 86), UINT8_C(142), UINT8_C( 31), UINT8_C( 56), UINT8_C( 97), UINT8_C(102), UINT8_C(107), UINT8_C(196), UINT8_C(171), UINT8_C(178), UINT8_C(131), UINT8_C( 43) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t c = simde_vld1q_u8(test_vec[i].c); simde_uint8x16_t r = simde_vabaq_u8(a, b, c); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t c = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t r = simde_vabaq_u8(a, b, c); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vabaq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t b[8]; uint16_t c[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(49874), UINT16_C(24806), UINT16_C(42538), UINT16_C(45934), UINT16_C(63320), UINT16_C(22605), UINT16_C(31617), UINT16_C(10829) }, { UINT16_C( 5315), UINT16_C(13065), UINT16_C(46052), UINT16_C( 6318), UINT16_C(39015), UINT16_C(30472), UINT16_C(28364), UINT16_C(40630) }, { UINT16_C(39984), UINT16_C(23551), UINT16_C(27971), UINT16_C(39694), UINT16_C(23652), UINT16_C(59123), UINT16_C(16599), UINT16_C(39440) }, { UINT16_C(19007), UINT16_C(35292), UINT16_C(60619), UINT16_C(13774), UINT16_C(13147), UINT16_C(51256), UINT16_C(43382), UINT16_C(12019) } }, { { UINT16_C( 6485), UINT16_C(14797), UINT16_C(31948), UINT16_C(13137), UINT16_C(22804), UINT16_C(57771), UINT16_C(25032), UINT16_C(63615) }, { UINT16_C(32510), UINT16_C(16723), UINT16_C(25323), UINT16_C(20700), UINT16_C(53438), UINT16_C(38198), UINT16_C(17936), UINT16_C(25903) }, { UINT16_C(64863), UINT16_C(11422), UINT16_C(61305), UINT16_C(36191), UINT16_C( 2633), UINT16_C( 4462), UINT16_C(61036), UINT16_C(27145) }, { UINT16_C(38838), UINT16_C(20098), UINT16_C( 2394), UINT16_C(28628), UINT16_C( 8073), UINT16_C(25971), UINT16_C( 2596), UINT16_C(64857) } }, { { UINT16_C(23916), UINT16_C(22699), UINT16_C(34751), UINT16_C(32168), UINT16_C(56919), UINT16_C(26642), UINT16_C(16676), UINT16_C(33741) }, { UINT16_C(27710), UINT16_C(47023), UINT16_C( 3931), UINT16_C(42053), UINT16_C(45849), UINT16_C(34229), UINT16_C(49057), UINT16_C( 3823) }, { UINT16_C(39452), UINT16_C(56166), UINT16_C( 3618), UINT16_C(31064), UINT16_C(27372), UINT16_C( 4321), UINT16_C(44971), UINT16_C(60051) }, { UINT16_C(35658), UINT16_C(31842), UINT16_C(35064), UINT16_C(43157), UINT16_C( 9860), UINT16_C(56550), UINT16_C(20762), UINT16_C(24433) } }, { { UINT16_C(17179), UINT16_C(30369), UINT16_C(58962), UINT16_C(27419), UINT16_C(53402), UINT16_C(15345), UINT16_C(57487), UINT16_C(43849) }, { UINT16_C(44923), UINT16_C(40326), UINT16_C(57021), UINT16_C(43286), UINT16_C(63560), UINT16_C(62649), UINT16_C(19879), UINT16_C(49886) }, { UINT16_C(32656), UINT16_C(57912), UINT16_C(21350), UINT16_C( 77), UINT16_C(15908), UINT16_C(45883), UINT16_C(34079), UINT16_C(39519) }, { UINT16_C(29446), UINT16_C(47955), UINT16_C(29097), UINT16_C( 5092), UINT16_C(35518), UINT16_C(32111), UINT16_C( 6151), UINT16_C(54216) } }, { { UINT16_C(58676), UINT16_C(62007), UINT16_C(19908), UINT16_C( 3227), UINT16_C(21829), UINT16_C(60416), UINT16_C(56994), UINT16_C(12974) }, { UINT16_C(59230), UINT16_C(50196), UINT16_C(24890), UINT16_C(24260), UINT16_C(65440), UINT16_C(48914), UINT16_C(29060), UINT16_C(47449) }, { UINT16_C(36950), UINT16_C( 6827), UINT16_C(18141), UINT16_C( 8999), UINT16_C(10139), UINT16_C(15631), UINT16_C(48646), UINT16_C(25711) }, { UINT16_C(15420), UINT16_C(39840), UINT16_C(26657), UINT16_C(18488), UINT16_C(11594), UINT16_C(28163), UINT16_C(11044), UINT16_C(34712) } }, { { UINT16_C(33701), UINT16_C(57128), UINT16_C(60645), UINT16_C(34110), UINT16_C(20715), UINT16_C(28740), UINT16_C(40385), UINT16_C( 5929) }, { UINT16_C(54317), UINT16_C( 2610), UINT16_C(22810), UINT16_C(46637), UINT16_C(15744), UINT16_C(34547), UINT16_C(25595), UINT16_C(41194) }, { UINT16_C( 4838), UINT16_C(52095), UINT16_C(48638), UINT16_C(59984), UINT16_C(37901), UINT16_C(52826), UINT16_C(33585), UINT16_C(24294) }, { UINT16_C(17644), UINT16_C(41077), UINT16_C(20937), UINT16_C(47457), UINT16_C(42872), UINT16_C(47019), UINT16_C(48375), UINT16_C(22829) } }, { { UINT16_C( 6231), UINT16_C(29033), UINT16_C(38513), UINT16_C(61735), UINT16_C( 7123), UINT16_C(52856), UINT16_C(25214), UINT16_C(25710) }, { UINT16_C(61045), UINT16_C(29488), UINT16_C(32939), UINT16_C(47453), UINT16_C(46869), UINT16_C(18055), UINT16_C(27962), UINT16_C(37285) }, { UINT16_C( 3717), UINT16_C(62979), UINT16_C(10916), UINT16_C(30952), UINT16_C(24645), UINT16_C(49990), UINT16_C(46530), UINT16_C(14120) }, { UINT16_C(63559), UINT16_C(62524), UINT16_C(60536), UINT16_C(12700), UINT16_C(29347), UINT16_C(19255), UINT16_C(43782), UINT16_C(48875) } }, { { UINT16_C(22691), UINT16_C(20139), UINT16_C( 2264), UINT16_C(60679), UINT16_C(36800), UINT16_C(64052), UINT16_C(55804), UINT16_C(33420) }, { UINT16_C(36839), UINT16_C(35704), UINT16_C(24761), UINT16_C(65283), UINT16_C(19136), UINT16_C(33730), UINT16_C(60159), UINT16_C(41658) }, { UINT16_C(25922), UINT16_C( 7152), UINT16_C(63598), UINT16_C(11784), UINT16_C(15495), UINT16_C(33576), UINT16_C(46101), UINT16_C(64517) }, { UINT16_C(33608), UINT16_C(48691), UINT16_C(41101), UINT16_C(48642), UINT16_C(40441), UINT16_C(64206), UINT16_C( 4326), UINT16_C(56279) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t c = simde_vld1q_u16(test_vec[i].c); simde_uint16x8_t r = simde_vabaq_u16(a, b, c); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t c = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t r = simde_vabaq_u16(a, b, c); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #if !defined(TEST_SIMDE_VABD_NO_TEST_32) static int test_simde_vabaq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint32_t b[4]; uint32_t c[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(4253580867), UINT32_C(2684128222), UINT32_C(3559046869), UINT32_C(3950435497) }, { UINT32_C(2953209666), UINT32_C(3873312607), UINT32_C(1634272843), UINT32_C(4267536315) }, { UINT32_C(3422283245), UINT32_C(1181415281), UINT32_C(1595641014), UINT32_C(2873790825) }, { UINT32_C( 427687150), UINT32_C(1081058252), UINT32_C(3597678698), UINT32_C(1049213691) } }, { { UINT32_C(1465602552), UINT32_C(2872916320), UINT32_C(4195133247), UINT32_C( 66677270) }, { UINT32_C(3234788431), UINT32_C(2718382572), UINT32_C( 771826373), UINT32_C(2899921843) }, { UINT32_C(4228068508), UINT32_C(2913485165), UINT32_C(4272403688), UINT32_C(1845600286) }, { UINT32_C(2458882629), UINT32_C(3068018913), UINT32_C(3400743266), UINT32_C(1120998827) } }, { { UINT32_C(2167328661), UINT32_C(3458413832), UINT32_C( 201073751), UINT32_C( 213374575) }, { UINT32_C(2013837834), UINT32_C(3810898171), UINT32_C(2212613221), UINT32_C( 49406573) }, { UINT32_C(3129155762), UINT32_C(2911413845), UINT32_C( 968394186), UINT32_C(1699049307) }, { UINT32_C(3282646589), UINT32_C( 62930862), UINT32_C(1445292786), UINT32_C(1863017309) } }, { { UINT32_C( 635260457), UINT32_C(1661469438), UINT32_C(1021831887), UINT32_C(2118047948) }, { UINT32_C(1312408056), UINT32_C( 838582631), UINT32_C(2708124486), UINT32_C(1258795042) }, { UINT32_C(4235257086), UINT32_C(3059775975), UINT32_C( 804407139), UINT32_C( 414068767) }, { UINT32_C(3558109487), UINT32_C(3882662782), UINT32_C(2925549234), UINT32_C(2962774223) } }, { { UINT32_C(1483139057), UINT32_C(4018758056), UINT32_C( 915469076), UINT32_C(2709624739) }, { UINT32_C(1671361148), UINT32_C(3457810027), UINT32_C(1694305093), UINT32_C( 746367803) }, { UINT32_C( 998564498), UINT32_C(1462373699), UINT32_C(2760751616), UINT32_C(3460632402) }, { UINT32_C(2155935707), UINT32_C(1719227088), UINT32_C(1981915599), UINT32_C(1128922042) } }, { { UINT32_C(1815208705), UINT32_C( 641354465), UINT32_C(2425042773), UINT32_C(1975257059) }, { UINT32_C( 766525674), UINT32_C(1300617805), UINT32_C(3891335829), UINT32_C( 582301473) }, { UINT32_C(4237223450), UINT32_C(2233649200), UINT32_C(3809848576), UINT32_C(2673398197) }, { UINT32_C( 990939185), UINT32_C(1574385860), UINT32_C(2506530026), UINT32_C(4066353783) } }, { { UINT32_C(1590429969), UINT32_C(2024493539), UINT32_C(2237635940), UINT32_C(4003992788) }, { UINT32_C( 719992570), UINT32_C(4289662463), UINT32_C(1877132474), UINT32_C(2785950357) }, { UINT32_C( 654629699), UINT32_C(2426384428), UINT32_C( 555155277), UINT32_C( 235912723) }, { UINT32_C(1655792840), UINT32_C(3887771574), UINT32_C(3559613137), UINT32_C(2259063126) } }, { { UINT32_C(4080597748), UINT32_C(3253921799), UINT32_C(1110561964), UINT32_C(1390952207) }, { UINT32_C(1199172890), UINT32_C(3939965341), UINT32_C( 722201880), UINT32_C(2688096939) }, { UINT32_C( 462647828), UINT32_C( 115181146), UINT32_C(1766329946), UINT32_C(1757163853) }, { UINT32_C( 522155514), UINT32_C(2783738698), UINT32_C(2154690030), UINT32_C(2321885293) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t c = simde_vld1q_u32(test_vec[i].c); simde_uint32x4_t r = simde_vabaq_u32(a, b, c); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t c = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t r = simde_vabaq_u32(a, b, c); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #endif SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vaba_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vaba_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vaba_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vaba_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vaba_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vabaq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vabaq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vabaq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vabaq_u16) #if !defined(TEST_SIMDE_VABD_NO_TEST_32) SIMDE_TEST_FUNC_LIST_ENTRY(vabaq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vabaq_u32) #endif SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/abd.c000066400000000000000000001774631400333146700162330ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN abd #include "test-neon.h" #include "../../../simde/arm/neon/abd.h" /* https://community.intel.com/t5/Intel-C-Compiler/ICC-generates-incorrect-code/td-p/1199261 */ #if defined(HEDLEY_INTEL_VERSION) # define TEST_SIMDE_VABD_NO_TEST_32 #endif static int test_simde_vabds_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a; simde_float32 b; simde_float32 r; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 0.52), SIMDE_MATH_NANF }, { SIMDE_FLOAT32_C( 705.02), SIMDE_MATH_NANF, SIMDE_MATH_NANF }, { SIMDE_MATH_NANF, SIMDE_MATH_NANF, SIMDE_MATH_NANF }, #endif { SIMDE_FLOAT32_C( 531.62), SIMDE_FLOAT32_C( -312.36), SIMDE_FLOAT32_C( 843.98) }, { SIMDE_FLOAT32_C( -308.11), SIMDE_FLOAT32_C( 992.42), SIMDE_FLOAT32_C( 1300.53) }, { SIMDE_FLOAT32_C( -79.01), SIMDE_FLOAT32_C( 594.71), SIMDE_FLOAT32_C( 673.72) }, { SIMDE_FLOAT32_C( -32.15), SIMDE_FLOAT32_C( 114.85), SIMDE_FLOAT32_C( 147.00) }, { SIMDE_FLOAT32_C( 290.28), SIMDE_FLOAT32_C( 100.33), SIMDE_FLOAT32_C( 189.95) }, { SIMDE_FLOAT32_C( -384.37), SIMDE_FLOAT32_C( -271.65), SIMDE_FLOAT32_C( 112.72) }, { SIMDE_FLOAT32_C( 810.73), SIMDE_FLOAT32_C( -621.11), SIMDE_FLOAT32_C( 1431.84) }, { SIMDE_FLOAT32_C( -383.51), SIMDE_FLOAT32_C( 354.85), SIMDE_FLOAT32_C( 738.36) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32_t r = simde_vabds_f32(test_vec[i].a, test_vec[i].b); simde_assert_equal_f32(r, test_vec[i].r, 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32_t a = simde_test_codegen_random_f32(-1000.0f, 1000.0f); simde_float32_t b = simde_test_codegen_random_f32(-1000.0f, 1000.0f); simde_float32_t r = simde_vabds_f32(a, b); simde_test_codegen_write_f32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_f32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_f32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vabdd_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a; simde_float64 b; simde_float64 r; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 0.52), SIMDE_MATH_NAN }, { SIMDE_FLOAT64_C( 705.02), SIMDE_MATH_NAN, SIMDE_MATH_NAN }, { SIMDE_MATH_NAN, SIMDE_MATH_NAN, SIMDE_MATH_NAN }, #endif { SIMDE_FLOAT64_C( 779.86), SIMDE_FLOAT64_C( 261.91), SIMDE_FLOAT64_C( 517.95) }, { SIMDE_FLOAT64_C( 645.51), SIMDE_FLOAT64_C( 275.82), SIMDE_FLOAT64_C( 369.69) }, { SIMDE_FLOAT64_C( 482.58), SIMDE_FLOAT64_C( -380.95), SIMDE_FLOAT64_C( 863.53) }, { SIMDE_FLOAT64_C( 670.99), SIMDE_FLOAT64_C( 240.67), SIMDE_FLOAT64_C( 430.32) }, { SIMDE_FLOAT64_C( -761.70), SIMDE_FLOAT64_C( -217.28), SIMDE_FLOAT64_C( 544.42) }, { SIMDE_FLOAT64_C( -965.81), SIMDE_FLOAT64_C( -145.22), SIMDE_FLOAT64_C( 820.59) }, { SIMDE_FLOAT64_C( 857.99), SIMDE_FLOAT64_C( -115.73), SIMDE_FLOAT64_C( 973.72) }, { SIMDE_FLOAT64_C( -75.93), SIMDE_FLOAT64_C( 547.42), SIMDE_FLOAT64_C( 623.35) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64_t r = simde_vabdd_f64(test_vec[i].a, test_vec[i].b); simde_assert_equal_f64(r, test_vec[i].r, 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64_t a = simde_test_codegen_random_f64(-1000.0, 1000.0); simde_float64_t b = simde_test_codegen_random_f64(-1000.0, 1000.0); simde_float64_t r = simde_vabdd_f64(a, b); simde_test_codegen_write_f64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_f64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_f64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vabd_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; simde_float32 b[2]; simde_float32 r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( -378.92), SIMDE_FLOAT32_C( -620.80) }, { SIMDE_FLOAT32_C( 475.93), SIMDE_FLOAT32_C( 308.98) }, { SIMDE_FLOAT32_C( 854.85), SIMDE_FLOAT32_C( 929.78) } }, { { SIMDE_FLOAT32_C( 159.45), SIMDE_FLOAT32_C( -785.12) }, { SIMDE_FLOAT32_C( -745.18), SIMDE_FLOAT32_C( -374.75) }, { SIMDE_FLOAT32_C( 904.63), SIMDE_FLOAT32_C( 410.37) } }, { { SIMDE_FLOAT32_C( -423.39), SIMDE_FLOAT32_C( 68.89) }, { SIMDE_FLOAT32_C( 583.57), SIMDE_FLOAT32_C( 386.20) }, { SIMDE_FLOAT32_C( 1006.96), SIMDE_FLOAT32_C( 317.31) } }, { { SIMDE_FLOAT32_C( -153.15), SIMDE_FLOAT32_C( -120.40) }, { SIMDE_FLOAT32_C( 555.60), SIMDE_FLOAT32_C( -702.70) }, { SIMDE_FLOAT32_C( 708.75), SIMDE_FLOAT32_C( 582.30) } }, { { SIMDE_FLOAT32_C( 380.05), SIMDE_FLOAT32_C( 448.15) }, { SIMDE_FLOAT32_C( 736.37), SIMDE_FLOAT32_C( -447.93) }, { SIMDE_FLOAT32_C( 356.32), SIMDE_FLOAT32_C( 896.08) } }, { { SIMDE_FLOAT32_C( -910.48), SIMDE_FLOAT32_C( -966.37) }, { SIMDE_FLOAT32_C( -441.23), SIMDE_FLOAT32_C( -21.68) }, { SIMDE_FLOAT32_C( 469.25), SIMDE_FLOAT32_C( 944.69) } }, { { SIMDE_FLOAT32_C( -684.24), SIMDE_FLOAT32_C( -42.78) }, { SIMDE_FLOAT32_C( 785.07), SIMDE_FLOAT32_C( -505.67) }, { SIMDE_FLOAT32_C( 1469.31), SIMDE_FLOAT32_C( 462.89) } }, { { SIMDE_FLOAT32_C( 169.60), SIMDE_FLOAT32_C( 497.90) }, { SIMDE_FLOAT32_C( 36.47), SIMDE_FLOAT32_C( 790.68) }, { SIMDE_FLOAT32_C( 133.13), SIMDE_FLOAT32_C( 292.78) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x2_t r = simde_vabd_f32(a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vabd_f32(a, b); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vabd_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[1]; simde_float64 b[1]; simde_float64 r[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( 877.10) }, { SIMDE_FLOAT64_C( -487.60) }, { SIMDE_FLOAT64_C( 1364.70) } }, { { SIMDE_FLOAT64_C( 99.66) }, { SIMDE_FLOAT64_C( 36.55) }, { SIMDE_FLOAT64_C( 63.11) } }, { { SIMDE_FLOAT64_C( -272.72) }, { SIMDE_FLOAT64_C( 354.48) }, { SIMDE_FLOAT64_C( 627.20) } }, { { SIMDE_FLOAT64_C( 661.80) }, { SIMDE_FLOAT64_C( 303.89) }, { SIMDE_FLOAT64_C( 357.91) } }, { { SIMDE_FLOAT64_C( -576.63) }, { SIMDE_FLOAT64_C( 245.37) }, { SIMDE_FLOAT64_C( 822.00) } }, { { SIMDE_FLOAT64_C( -309.90) }, { SIMDE_FLOAT64_C( 270.22) }, { SIMDE_FLOAT64_C( 580.12) } }, { { SIMDE_FLOAT64_C( -875.02) }, { SIMDE_FLOAT64_C( -754.31) }, { SIMDE_FLOAT64_C( 120.71) } }, { { SIMDE_FLOAT64_C( 567.52) }, { SIMDE_FLOAT64_C( 505.03) }, { SIMDE_FLOAT64_C( 62.49) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_t b = simde_vld1_f64(test_vec[i].b); simde_float64x1_t r = simde_vabd_f64(a, b); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x1_t a = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_float64x1_t b = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_float64x1_t r = simde_vabd_f64(a, b); simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vabd_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t b[8]; int8_t r[8]; } test_vec[] = { { { INT8_MAX, INT8_MAX, INT8_C( 10), INT8_C( 10), INT8_C( 103), INT8_C( 22), -INT8_C( 16), -INT8_C( 30) }, { -INT8_C( 1), -INT8_C( 2), INT8_C( 20), -INT8_C( 10), -INT8_C( 111), INT8_C( 6), -INT8_C( 68), INT8_C( 126) }, { INT8_MIN, -INT8_C( 127), INT8_C( 10), INT8_C( 20), -INT8_C( 42), INT8_C( 16), INT8_C( 52), -INT8_C( 100) } }, { { INT8_C( 18), INT8_C( 60), INT8_C( 4), INT8_C( 117), INT8_C( 103), INT8_C( 22), -INT8_C( 16), -INT8_C( 30) }, { INT8_C( 87), -INT8_C( 83), -INT8_C( 104), -INT8_C( 48), -INT8_C( 111), INT8_C( 6), -INT8_C( 68), INT8_C( 126) }, { INT8_C( 69), -INT8_C( 113), INT8_C( 108), -INT8_C( 91), -INT8_C( 42), INT8_C( 16), INT8_C( 52), -INT8_C( 100) } }, { { -INT8_C( 87), INT8_C( 114), INT8_C( 114), INT8_C( 23), -INT8_C( 5), INT8_C( 68), -INT8_C( 72), -INT8_C( 50) }, { INT8_C( 90), -INT8_C( 66), -INT8_C( 84), INT8_C( 51), -INT8_C( 90), -INT8_C( 91), INT8_C( 49), -INT8_C( 72) }, { -INT8_C( 79), -INT8_C( 76), -INT8_C( 58), INT8_C( 28), INT8_C( 85), -INT8_C( 97), INT8_C( 121), INT8_C( 22) } }, { { -INT8_C( 31), INT8_C( 53), INT8_C( 45), INT8_C( 72), INT8_C( 75), INT8_C( 29), INT8_C( 42), -INT8_C( 94) }, { -INT8_C( 53), -INT8_C( 62), INT8_C( 115), INT8_C( 92), -INT8_C( 55), INT8_C( 47), -INT8_C( 38), INT8_C( 114) }, { INT8_C( 22), INT8_C( 115), INT8_C( 70), INT8_C( 20), -INT8_C( 126), INT8_C( 18), INT8_C( 80), -INT8_C( 48) } }, { { -INT8_C( 94), INT8_C( 77), -INT8_C( 118), -INT8_C( 99), -INT8_C( 111), INT8_C( 66), INT8_C( 107), -INT8_C( 21) }, { INT8_C( 0), INT8_C( 24), INT8_C( 30), -INT8_C( 90), -INT8_C( 67), INT8_C( 79), INT8_C( 95), -INT8_C( 97) }, { INT8_C( 94), INT8_C( 53), -INT8_C( 108), INT8_C( 9), INT8_C( 44), INT8_C( 13), INT8_C( 12), INT8_C( 76) } }, { { -INT8_C( 123), -INT8_C( 116), -INT8_C( 25), -INT8_C( 48), -INT8_C( 86), INT8_C( 18), INT8_C( 115), INT8_C( 117) }, { -INT8_C( 44), -INT8_C( 26), -INT8_C( 47), -INT8_C( 99), INT8_C( 21), -INT8_C( 85), INT8_C( 16), -INT8_C( 73) }, { INT8_C( 79), INT8_C( 90), INT8_C( 22), INT8_C( 51), INT8_C( 107), INT8_C( 103), INT8_C( 99), -INT8_C( 66) } }, { { -INT8_C( 8), -INT8_C( 102), INT8_C( 84), -INT8_C( 119), -INT8_C( 36), -INT8_C( 64), INT8_C( 116), -INT8_C( 35) }, { -INT8_C( 40), -INT8_C( 109), -INT8_C( 125), -INT8_C( 107), -INT8_C( 30), -INT8_C( 30), INT8_C( 52), INT8_C( 103) }, { INT8_C( 32), INT8_C( 7), -INT8_C( 47), INT8_C( 12), INT8_C( 6), INT8_C( 34), INT8_C( 64), -INT8_C( 118) } }, { { INT8_C( 111), INT8_C( 28), INT8_C( 56), INT8_C( 25), INT8_C( 46), -INT8_C( 85), -INT8_C( 114), INT8_C( 2) }, { -INT8_C( 111), INT8_C( 95), -INT8_C( 96), -INT8_C( 90), INT8_C( 10), -INT8_C( 80), INT8_C( 94), INT8_C( 3) }, { -INT8_C( 34), INT8_C( 67), -INT8_C( 104), INT8_C( 115), INT8_C( 36), INT8_C( 5), -INT8_C( 48), INT8_C( 1) } }, { { INT8_C( 74), -INT8_C( 78), -INT8_C( 116), INT8_C( 38), INT8_C( 114), INT8_C( 1), INT8_C( 3), INT8_C( 74) }, { -INT8_C( 108), -INT8_C( 121), -INT8_C( 32), INT8_C( 118), INT8_C( 105), INT8_C( 20), -INT8_C( 34), -INT8_C( 40) }, { -INT8_C( 74), INT8_C( 43), INT8_C( 84), INT8_C( 80), INT8_C( 9), INT8_C( 19), INT8_C( 37), INT8_C( 114) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vabd_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); simde_int8x8_t r = simde_vabd_s8(a, b); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vabd_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { { { INT16_C( 32445), -INT16_C( 19967), -INT16_C( 6025), INT16_C( 10556) }, { INT16_C( 8506), -INT16_C( 5656), -INT16_C( 12592), INT16_C( 28423) }, { INT16_C( 23939), INT16_C( 14311), INT16_C( 6567), INT16_C( 17867) } }, { { INT16_C( 28389), -INT16_C( 10835), INT16_C( 977), INT16_C( 7110) }, { INT16_C( 21821), -INT16_C( 4751), INT16_C( 1025), -INT16_C( 16711) }, { INT16_C( 6568), INT16_C( 6084), INT16_C( 48), INT16_C( 23821) } }, { { -INT16_C( 17790), -INT16_C( 1680), -INT16_C( 21341), -INT16_C( 8926) }, { INT16_C( 2765), -INT16_C( 25146), -INT16_C( 12584), -INT16_C( 17140) }, { INT16_C( 20555), INT16_C( 23466), INT16_C( 8757), INT16_C( 8214) } }, { { -INT16_C( 18116), INT16_C( 3475), INT16_C( 22972), -INT16_C( 1496) }, { -INT16_C( 25938), -INT16_C( 20505), -INT16_C( 24162), INT16_C( 8302) }, { INT16_C( 7822), INT16_C( 23980), -INT16_C( 18402), INT16_C( 9798) } }, { { -INT16_C( 8613), -INT16_C( 487), INT16_C( 15243), INT16_C( 22748) }, { -INT16_C( 23995), INT16_C( 7926), INT16_C( 624), -INT16_C( 21285) }, { INT16_C( 15382), INT16_C( 8413), INT16_C( 14619), -INT16_C( 21503) } }, { { INT16_C( 28348), INT16_C( 30905), -INT16_C( 7481), INT16_C( 30322) }, { INT16_C( 23164), INT16_C( 6693), -INT16_C( 27653), INT16_C( 22074) }, { INT16_C( 5184), INT16_C( 24212), INT16_C( 20172), INT16_C( 8248) } }, { { INT16_C( 21362), -INT16_C( 683), INT16_C( 12686), -INT16_C( 11435) }, { INT16_C( 19411), INT16_C( 17649), -INT16_C( 12978), INT16_C( 2800) }, { INT16_C( 1951), INT16_C( 18332), INT16_C( 25664), INT16_C( 14235) } }, { { -INT16_C( 21957), INT16_C( 898), -INT16_C( 2676), INT16_C( 2169) }, { -INT16_C( 25009), INT16_C( 18978), INT16_C( 23602), -INT16_C( 23392) }, { INT16_C( 3052), INT16_C( 18080), INT16_C( 26278), INT16_C( 25561) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_vabd_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); simde_int16x4_t r = simde_vabd_s16(a, b); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #if !defined(TEST_SIMDE_VABD_NO_TEST_32) static int test_simde_vabd_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { { { INT32_C( 1034024367), -INT32_C( 99551706) }, { -INT32_C( 1874984382), INT32_C( 177876687) }, { -INT32_C( 1385958547), INT32_C( 277428393) } }, { { INT32_C( 1678580952), INT32_C( 1617724945) }, { INT32_C( 1470795301), -INT32_C( 1711584278) }, { INT32_C( 207785651), -INT32_C( 965658073) } }, { { INT32_C( 1742117952), -INT32_C( 731781230) }, { -INT32_C( 1201365015), -INT32_C( 1497170227) }, { -INT32_C( 1351484329), INT32_C( 765388997) } }, { { INT32_C( 738906139), INT32_C( 2072868694) }, { -INT32_C( 254658811), -INT32_C( 1014379134) }, { INT32_C( 993564950), -INT32_C( 1207719468) } }, { { -INT32_C( 64331671), INT32_C( 818973511) }, { -INT32_C( 135776982), INT32_C( 1318955571) }, { INT32_C( 71445311), INT32_C( 499982060) } }, { { -INT32_C( 780425094), INT32_C( 608962591) }, { -INT32_C( 1038868673), INT32_C( 1451597548) }, { INT32_C( 258443579), INT32_C( 842634957) } }, { { INT32_C( 1163046910), INT32_C( 1685398074) }, { -INT32_C( 1956946601), -INT32_C( 2099644153) }, { -INT32_C( 1174973785), -INT32_C( 509925069) } }, { { -INT32_C( 1068280671), -INT32_C( 1662738596) }, { -INT32_C( 1419839042), -INT32_C( 1795038313) }, { INT32_C( 351558371), INT32_C( 132299717) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_vabd_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); simde_int32x2_t r = simde_vabd_s32(a, b); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #endif static int test_simde_vabd_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint8_t b[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C(146), UINT8_C( 83), UINT8_C(218), UINT8_C(204), UINT8_C(117), UINT8_C( 79), UINT8_C( 48), UINT8_C(205) }, { UINT8_C(172), UINT8_C(139), UINT8_C( 88), UINT8_C(179), UINT8_C(132), UINT8_C( 49), UINT8_C( 53), UINT8_C( 37) }, { UINT8_C( 26), UINT8_C( 56), UINT8_C(130), UINT8_C( 25), UINT8_C( 15), UINT8_C( 30), UINT8_C( 5), UINT8_C(168) } }, { { UINT8_C(134), UINT8_C(136), UINT8_C(229), UINT8_C(226), UINT8_C( 40), UINT8_C(202), UINT8_C(126), UINT8_C(230) }, { UINT8_C(195), UINT8_C(220), UINT8_C(145), UINT8_C( 90), UINT8_C(191), UINT8_C(146), UINT8_C(239), UINT8_C( 81) }, { UINT8_C( 61), UINT8_C( 84), UINT8_C( 84), UINT8_C(136), UINT8_C(151), UINT8_C( 56), UINT8_C(113), UINT8_C(149) } }, { { UINT8_C(229), UINT8_C(201), UINT8_C( 29), UINT8_C( 91), UINT8_C( 24), UINT8_C( 77), UINT8_C( 40), UINT8_C(196) }, { UINT8_C(217), UINT8_C(128), UINT8_C(119), UINT8_C( 93), UINT8_C(177), UINT8_C(173), UINT8_C(131), UINT8_C( 55) }, { UINT8_C( 12), UINT8_C( 73), UINT8_C( 90), UINT8_C( 2), UINT8_C(153), UINT8_C( 96), UINT8_C( 91), UINT8_C(141) } }, { { UINT8_C( 53), UINT8_C(104), UINT8_C( 26), UINT8_C( 93), UINT8_C( 50), UINT8_C(152), UINT8_C( 68), UINT8_C(245) }, { UINT8_C(117), UINT8_C(213), UINT8_C( 79), UINT8_C( 52), UINT8_C(104), UINT8_C( 62), UINT8_C(134), UINT8_C( 77) }, { UINT8_C( 64), UINT8_C(109), UINT8_C( 53), UINT8_C( 41), UINT8_C( 54), UINT8_C( 90), UINT8_C( 66), UINT8_C(168) } }, { { UINT8_C( 7), UINT8_C(163), UINT8_C(168), UINT8_C( 31), UINT8_C(241), UINT8_C(208), UINT8_C(227), UINT8_C(202) }, { UINT8_C( 80), UINT8_C( 91), UINT8_C( 39), UINT8_C( 2), UINT8_C( 8), UINT8_C(170), UINT8_C( 57), UINT8_C( 61) }, { UINT8_C( 73), UINT8_C( 72), UINT8_C(129), UINT8_C( 29), UINT8_C(233), UINT8_C( 38), UINT8_C(170), UINT8_C(141) } }, { { UINT8_C( 19), UINT8_C( 83), UINT8_C(155), UINT8_C( 69), UINT8_C(236), UINT8_C(223), UINT8_C( 59), UINT8_C( 97) }, { UINT8_C(180), UINT8_C(138), UINT8_C(149), UINT8_C( 28), UINT8_C(201), UINT8_C( 27), UINT8_C(106), UINT8_C(208) }, { UINT8_C(161), UINT8_C( 55), UINT8_C( 6), UINT8_C( 41), UINT8_C( 35), UINT8_C(196), UINT8_C( 47), UINT8_C(111) } }, { { UINT8_C(191), UINT8_C( 18), UINT8_C(240), UINT8_C(176), UINT8_C(227), UINT8_C(211), UINT8_C(122), UINT8_C( 51) }, { UINT8_C( 46), UINT8_C(161), UINT8_C( 53), UINT8_C( 54), UINT8_C( 76), UINT8_C(111), UINT8_C(116), UINT8_C( 95) }, { UINT8_C(145), UINT8_C(143), UINT8_C(187), UINT8_C(122), UINT8_C(151), UINT8_C(100), UINT8_C( 6), UINT8_C( 44) } }, { { UINT8_C(194), UINT8_C( 15), UINT8_C(164), UINT8_C(174), UINT8_C(238), UINT8_C(223), UINT8_C( 15), UINT8_C(162) }, { UINT8_C(106), UINT8_C(165), UINT8_C(191), UINT8_C( 51), UINT8_C(192), UINT8_C( 41), UINT8_C( 3), UINT8_C(127) }, { UINT8_C( 88), UINT8_C(150), UINT8_C( 27), UINT8_C(123), UINT8_C( 46), UINT8_C(182), UINT8_C( 12), UINT8_C( 35) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vabd_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t r = simde_vabd_u8(a, b); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vabd_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(62267), UINT16_C( 7727), UINT16_C(43463), UINT16_C(62802) }, { UINT16_C(34635), UINT16_C(38700), UINT16_C(41206), UINT16_C(47606) }, { UINT16_C(27632), UINT16_C(30973), UINT16_C( 2257), UINT16_C(15196) } }, { { UINT16_C(39599), UINT16_C(40295), UINT16_C(30586), UINT16_C(58431) }, { UINT16_C(65052), UINT16_C(56343), UINT16_C( 6695), UINT16_C(25436) }, { UINT16_C(25453), UINT16_C(16048), UINT16_C(23891), UINT16_C(32995) } }, { { UINT16_C(35598), UINT16_C(54657), UINT16_C(54069), UINT16_C(32970) }, { UINT16_C(63067), UINT16_C(20759), UINT16_C( 3478), UINT16_C(17674) }, { UINT16_C(27469), UINT16_C(33898), UINT16_C(50591), UINT16_C(15296) } }, { { UINT16_C(29351), UINT16_C( 8674), UINT16_C( 8937), UINT16_C( 1285) }, { UINT16_C( 7200), UINT16_C(18657), UINT16_C(15671), UINT16_C(17835) }, { UINT16_C(22151), UINT16_C( 9983), UINT16_C( 6734), UINT16_C(16550) } }, { { UINT16_C(11465), UINT16_C(65050), UINT16_C(58368), UINT16_C(23422) }, { UINT16_C(38363), UINT16_C(29100), UINT16_C(47010), UINT16_C(18871) }, { UINT16_C(26898), UINT16_C(35950), UINT16_C(11358), UINT16_C( 4551) } }, { { UINT16_C(39209), UINT16_C( 4715), UINT16_C(28859), UINT16_C(56343) }, { UINT16_C(63629), UINT16_C(50212), UINT16_C(53046), UINT16_C(65289) }, { UINT16_C(24420), UINT16_C(45497), UINT16_C(24187), UINT16_C( 8946) } }, { { UINT16_C( 9211), UINT16_C(64509), UINT16_C(31495), UINT16_C(57942) }, { UINT16_C( 784), UINT16_C(45652), UINT16_C( 3002), UINT16_C(58363) }, { UINT16_C( 8427), UINT16_C(18857), UINT16_C(28493), UINT16_C( 421) } }, { { UINT16_C(26276), UINT16_C(24821), UINT16_C( 3287), UINT16_C(25660) }, { UINT16_C(24580), UINT16_C(14888), UINT16_C(12591), UINT16_C(10809) }, { UINT16_C( 1696), UINT16_C( 9933), UINT16_C( 9304), UINT16_C(14851) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vabd_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t r = simde_vabd_u16(a, b); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #if !defined(TEST_SIMDE_VABD_NO_TEST_32) static int test_simde_vabd_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C(1529230932), UINT32_C(3242097841) }, { UINT32_C( 963875455), UINT32_C(1092382621) }, { UINT32_C( 565355477), UINT32_C(2149715220) } }, { { UINT32_C(2896237013), UINT32_C( 571530525) }, { UINT32_C(1817983037), UINT32_C(3180828265) }, { UINT32_C(1078253976), UINT32_C(2609297740) } }, { { UINT32_C(2115616204), UINT32_C(3107936057) }, { UINT32_C(2264052713), UINT32_C(4157017890) }, { UINT32_C( 148436509), UINT32_C(1049081833) } }, { { UINT32_C(1050962208), UINT32_C(2220930118) }, { UINT32_C(1458617581), UINT32_C( 521439058) }, { UINT32_C( 407655373), UINT32_C(1699491060) } }, { { UINT32_C(2124229956), UINT32_C(1832377476) }, { UINT32_C(2985503119), UINT32_C(1504295480) }, { UINT32_C( 861273163), UINT32_C( 328081996) } }, { { UINT32_C(1788300579), UINT32_C(4008638209) }, { UINT32_C( 105242291), UINT32_C(2854574438) }, { UINT32_C(1683058288), UINT32_C(1154063771) } }, { { UINT32_C( 170443398), UINT32_C( 779575198) }, { UINT32_C(3252644489), UINT32_C(1209698340) }, { UINT32_C(3082201091), UINT32_C( 430123142) } }, { { UINT32_C(3618812373), UINT32_C(1556455592) }, { UINT32_C(3831630462), UINT32_C(3918497635) }, { UINT32_C( 212818089), UINT32_C(2362042043) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vabd_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t r = simde_vabd_u32(a, b); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #endif static int test_simde_vabdq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; simde_float32 b[4]; simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 965.83), SIMDE_FLOAT32_C( -857.34), SIMDE_FLOAT32_C( -560.13), SIMDE_FLOAT32_C( -970.93) }, { SIMDE_FLOAT32_C( -821.69), SIMDE_FLOAT32_C( -283.01), SIMDE_FLOAT32_C( -858.66), SIMDE_FLOAT32_C( -780.80) }, { SIMDE_FLOAT32_C( 1787.52), SIMDE_FLOAT32_C( 574.33), SIMDE_FLOAT32_C( 298.53), SIMDE_FLOAT32_C( 190.13) } }, { { SIMDE_FLOAT32_C( 655.57), SIMDE_FLOAT32_C( 38.23), SIMDE_FLOAT32_C( 268.19), SIMDE_FLOAT32_C( 578.23) }, { SIMDE_FLOAT32_C( 673.43), SIMDE_FLOAT32_C( 933.63), SIMDE_FLOAT32_C( -513.03), SIMDE_FLOAT32_C( 540.19) }, { SIMDE_FLOAT32_C( 17.86), SIMDE_FLOAT32_C( 895.40), SIMDE_FLOAT32_C( 781.22), SIMDE_FLOAT32_C( 38.04) } }, { { SIMDE_FLOAT32_C( 559.01), SIMDE_FLOAT32_C( -436.66), SIMDE_FLOAT32_C( -201.42), SIMDE_FLOAT32_C( -191.00) }, { SIMDE_FLOAT32_C( 509.73), SIMDE_FLOAT32_C( 509.25), SIMDE_FLOAT32_C( 60.09), SIMDE_FLOAT32_C( 510.96) }, { SIMDE_FLOAT32_C( 49.28), SIMDE_FLOAT32_C( 945.91), SIMDE_FLOAT32_C( 261.51), SIMDE_FLOAT32_C( 701.96) } }, { { SIMDE_FLOAT32_C( -928.29), SIMDE_FLOAT32_C( -315.21), SIMDE_FLOAT32_C( 729.47), SIMDE_FLOAT32_C( -38.60) }, { SIMDE_FLOAT32_C( -141.89), SIMDE_FLOAT32_C( -957.64), SIMDE_FLOAT32_C( -323.78), SIMDE_FLOAT32_C( -176.06) }, { SIMDE_FLOAT32_C( 786.40), SIMDE_FLOAT32_C( 642.43), SIMDE_FLOAT32_C( 1053.25), SIMDE_FLOAT32_C( 137.46) } }, { { SIMDE_FLOAT32_C( -814.98), SIMDE_FLOAT32_C( 116.09), SIMDE_FLOAT32_C( -146.99), SIMDE_FLOAT32_C( -636.67) }, { SIMDE_FLOAT32_C( 833.08), SIMDE_FLOAT32_C( -5.65), SIMDE_FLOAT32_C( -417.47), SIMDE_FLOAT32_C( 488.65) }, { SIMDE_FLOAT32_C( 1648.06), SIMDE_FLOAT32_C( 121.74), SIMDE_FLOAT32_C( 270.48), SIMDE_FLOAT32_C( 1125.32) } }, { { SIMDE_FLOAT32_C( -967.42), SIMDE_FLOAT32_C( 850.72), SIMDE_FLOAT32_C( 66.88), SIMDE_FLOAT32_C( 706.02) }, { SIMDE_FLOAT32_C( 784.35), SIMDE_FLOAT32_C( 553.85), SIMDE_FLOAT32_C( 246.21), SIMDE_FLOAT32_C( 343.36) }, { SIMDE_FLOAT32_C( 1751.77), SIMDE_FLOAT32_C( 296.87), SIMDE_FLOAT32_C( 179.33), SIMDE_FLOAT32_C( 362.66) } }, { { SIMDE_FLOAT32_C( -882.81), SIMDE_FLOAT32_C( -955.21), SIMDE_FLOAT32_C( -847.64), SIMDE_FLOAT32_C( 626.92) }, { SIMDE_FLOAT32_C( 554.03), SIMDE_FLOAT32_C( 212.45), SIMDE_FLOAT32_C( 137.88), SIMDE_FLOAT32_C( 625.74) }, { SIMDE_FLOAT32_C( 1436.84), SIMDE_FLOAT32_C( 1167.66), SIMDE_FLOAT32_C( 985.52), SIMDE_FLOAT32_C( 1.18) } }, { { SIMDE_FLOAT32_C( 897.23), SIMDE_FLOAT32_C( -132.65), SIMDE_FLOAT32_C( -412.85), SIMDE_FLOAT32_C( -244.66) }, { SIMDE_FLOAT32_C( -90.29), SIMDE_FLOAT32_C( 263.37), SIMDE_FLOAT32_C( 579.28), SIMDE_FLOAT32_C( 94.73) }, { SIMDE_FLOAT32_C( 987.52), SIMDE_FLOAT32_C( 396.02), SIMDE_FLOAT32_C( 992.13), SIMDE_FLOAT32_C( 339.39) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r = simde_vabdq_f32(a, b); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vabdq_f32(a, b); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vabdq_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[2]; simde_float64 b[2]; simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -620.54), SIMDE_FLOAT64_C( -567.71) }, { SIMDE_FLOAT64_C( 458.06), SIMDE_FLOAT64_C( -787.46) }, { SIMDE_FLOAT64_C( 1078.60), SIMDE_FLOAT64_C( 219.75) } }, { { SIMDE_FLOAT64_C( 426.64), SIMDE_FLOAT64_C( -959.41) }, { SIMDE_FLOAT64_C( 701.18), SIMDE_FLOAT64_C( 459.23) }, { SIMDE_FLOAT64_C( 274.54), SIMDE_FLOAT64_C( 1418.64) } }, { { SIMDE_FLOAT64_C( 891.32), SIMDE_FLOAT64_C( -231.93) }, { SIMDE_FLOAT64_C( 165.24), SIMDE_FLOAT64_C( 675.67) }, { SIMDE_FLOAT64_C( 726.08), SIMDE_FLOAT64_C( 907.60) } }, { { SIMDE_FLOAT64_C( -678.09), SIMDE_FLOAT64_C( -588.55) }, { SIMDE_FLOAT64_C( 19.03), SIMDE_FLOAT64_C( -560.89) }, { SIMDE_FLOAT64_C( 697.12), SIMDE_FLOAT64_C( 27.66) } }, { { SIMDE_FLOAT64_C( -543.76), SIMDE_FLOAT64_C( 171.38) }, { SIMDE_FLOAT64_C( -933.97), SIMDE_FLOAT64_C( -989.73) }, { SIMDE_FLOAT64_C( 390.21), SIMDE_FLOAT64_C( 1161.11) } }, { { SIMDE_FLOAT64_C( -616.17), SIMDE_FLOAT64_C( 203.91) }, { SIMDE_FLOAT64_C( 636.01), SIMDE_FLOAT64_C( -718.93) }, { SIMDE_FLOAT64_C( 1252.18), SIMDE_FLOAT64_C( 922.84) } }, { { SIMDE_FLOAT64_C( -928.74), SIMDE_FLOAT64_C( -776.84) }, { SIMDE_FLOAT64_C( 36.41), SIMDE_FLOAT64_C( -19.03) }, { SIMDE_FLOAT64_C( 965.15), SIMDE_FLOAT64_C( 757.81) } }, { { SIMDE_FLOAT64_C( 486.53), SIMDE_FLOAT64_C( -384.31) }, { SIMDE_FLOAT64_C( -924.30), SIMDE_FLOAT64_C( 865.99) }, { SIMDE_FLOAT64_C( 1410.83), SIMDE_FLOAT64_C( 1250.30) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_float64x2_t r = simde_vabdq_f64(a, b); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x2_t a = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); simde_float64x2_t b = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); simde_float64x2_t r = simde_vabdq_f64(a, b); simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vabdq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t b[16]; int8_t r[16]; } test_vec[] = { { { INT8_C( 96), INT8_C( 5), INT8_C( 65), -INT8_C( 34), INT8_C( 54), -INT8_C( 32), INT8_C( 28), INT8_C( 7), -INT8_C( 121), -INT8_C( 62), -INT8_C( 84), -INT8_C( 15), -INT8_C( 127), -INT8_C( 16), INT8_C( 29), INT8_C( 20) }, { -INT8_C( 54), INT8_C( 16), -INT8_C( 97), -INT8_C( 2), INT8_C( 54), INT8_C( 104), -INT8_C( 54), -INT8_C( 118), -INT8_C( 56), INT8_C( 113), INT8_C( 40), INT8_C( 25), INT8_C( 54), INT8_C( 121), -INT8_C( 127), -INT8_C( 106) }, { -INT8_C( 106), INT8_C( 11), -INT8_C( 94), INT8_C( 32), INT8_C( 0), -INT8_C( 120), INT8_C( 82), INT8_C( 125), INT8_C( 65), -INT8_C( 81), INT8_C( 124), INT8_C( 40), -INT8_C( 75), -INT8_C( 119), -INT8_C( 100), INT8_C( 126) } }, { { INT8_C( 126), -INT8_C( 62), INT8_C( 116), -INT8_C( 76), -INT8_C( 94), -INT8_C( 112), -INT8_C( 68), INT8_C( 41), INT8_C( 82), INT8_C( 104), INT8_C( 26), -INT8_C( 45), INT8_C( 88), INT8_C( 56), -INT8_C( 24), INT8_C( 35) }, { INT8_C( 72), -INT8_C( 121), INT8_C( 33), INT8_C( 126), -INT8_C( 17), -INT8_C( 21), INT8_C( 8), -INT8_C( 72), INT8_C( 92), INT8_C( 49), -INT8_C( 47), -INT8_C( 110), -INT8_C( 86), INT8_C( 82), INT8_C( 40), INT8_C( 41) }, { INT8_C( 54), INT8_C( 59), INT8_C( 83), -INT8_C( 54), INT8_C( 77), INT8_C( 91), INT8_C( 76), INT8_C( 113), INT8_C( 10), INT8_C( 55), INT8_C( 73), INT8_C( 65), -INT8_C( 82), INT8_C( 26), INT8_C( 64), INT8_C( 6) } }, { { INT8_C( 20), -INT8_C( 100), -INT8_C( 35), -INT8_C( 74), INT8_C( 44), -INT8_C( 103), -INT8_C( 33), INT8_C( 126), INT8_C( 1), -INT8_C( 7), INT8_C( 82), INT8_C( 90), INT8_C( 49), INT8_C( 58), INT8_C( 125), INT8_C( 121) }, { -INT8_C( 63), -INT8_C( 98), -INT8_C( 8), -INT8_C( 80), -INT8_C( 119), INT8_C( 0), INT8_C( 104), -INT8_C( 27), INT8_C( 49), INT8_C( 57), INT8_C( 119), -INT8_C( 36), -INT8_C( 117), -INT8_C( 97), INT8_C( 5), -INT8_C( 97) }, { INT8_C( 83), INT8_C( 2), INT8_C( 27), INT8_C( 6), -INT8_C( 93), INT8_C( 103), -INT8_C( 119), -INT8_C( 103), INT8_C( 48), INT8_C( 64), INT8_C( 37), INT8_C( 126), -INT8_C( 90), -INT8_C( 101), INT8_C( 120), -INT8_C( 38) } }, { { INT8_C( 59), -INT8_C( 30), INT8_C( 85), INT8_C( 103), INT8_C( 124), INT8_C( 52), -INT8_C( 27), INT8_C( 125), INT8_C( 46), INT8_C( 55), -INT8_C( 41), INT8_C( 95), INT8_C( 113), INT8_C( 84), -INT8_C( 39), INT8_C( 50) }, { -INT8_C( 14), -INT8_C( 47), -INT8_C( 29), INT8_C( 123), -INT8_C( 47), INT8_C( 75), INT8_C( 96), INT8_C( 3), -INT8_C( 123), -INT8_C( 41), -INT8_C( 33), INT8_C( 16), INT8_C( 118), -INT8_C( 28), -INT8_C( 80), -INT8_C( 79) }, { INT8_C( 73), INT8_C( 17), INT8_C( 114), INT8_C( 20), -INT8_C( 85), INT8_C( 23), INT8_C( 123), INT8_C( 122), -INT8_C( 87), INT8_C( 96), INT8_C( 8), INT8_C( 79), INT8_C( 5), INT8_C( 112), INT8_C( 41), -INT8_C( 127) } }, { { -INT8_C( 58), INT8_C( 5), INT8_C( 24), INT8_C( 66), INT8_C( 58), -INT8_C( 2), -INT8_C( 64), INT8_C( 104), INT8_C( 53), -INT8_C( 105), -INT8_C( 57), -INT8_C( 89), -INT8_C( 20), -INT8_C( 96), -INT8_C( 39), -INT8_C( 34) }, { INT8_C( 113), -INT8_C( 68), INT8_C( 90), INT8_C( 67), INT8_C( 8), -INT8_C( 70), INT8_C( 70), -INT8_C( 115), -INT8_C( 110), INT8_C( 37), -INT8_C( 99), INT8_C( 8), INT8_C( 9), INT8_C( 77), -INT8_C( 70), -INT8_C( 49) }, { -INT8_C( 85), INT8_C( 73), INT8_C( 66), INT8_C( 1), INT8_C( 50), INT8_C( 68), -INT8_C( 122), -INT8_C( 37), -INT8_C( 93), -INT8_C( 114), INT8_C( 42), INT8_C( 97), INT8_C( 29), -INT8_C( 83), INT8_C( 31), INT8_C( 15) } }, { { INT8_C( 83), -INT8_C( 46), INT8_C( 18), -INT8_C( 115), -INT8_C( 48), -INT8_C( 46), -INT8_C( 11), INT8_C( 6), INT8_C( 105), -INT8_C( 68), -INT8_C( 83), INT8_C( 85), INT8_C( 93), -INT8_C( 122), INT8_C( 52), -INT8_C( 50) }, { INT8_C( 67), -INT8_C( 114), INT8_C( 17), INT8_C( 75), INT8_C( 72), INT8_C( 87), -INT8_C( 40), -INT8_C( 38), INT8_C( 124), INT8_C( 117), -INT8_C( 29), -INT8_C( 123), -INT8_C( 61), -INT8_C( 99), INT8_C( 85), INT8_C( 22) }, { INT8_C( 16), INT8_C( 68), INT8_C( 1), -INT8_C( 66), INT8_C( 120), -INT8_C( 123), INT8_C( 29), INT8_C( 44), INT8_C( 19), -INT8_C( 71), INT8_C( 54), -INT8_C( 48), -INT8_C( 102), INT8_C( 23), INT8_C( 33), INT8_C( 72) } }, { { INT8_C( 111), INT8_C( 103), -INT8_C( 93), INT8_C( 64), INT8_C( 57), -INT8_C( 104), INT8_C( 70), -INT8_C( 94), INT8_C( 84), -INT8_C( 13), -INT8_C( 8), -INT8_C( 79), INT8_C( 121), INT8_C( 44), INT8_MIN, -INT8_C( 68) }, { -INT8_C( 70), -INT8_C( 111), INT8_C( 7), INT8_C( 2), -INT8_C( 23), -INT8_C( 33), -INT8_C( 35), INT8_C( 101), INT8_C( 85), -INT8_C( 64), -INT8_C( 21), INT8_C( 24), INT8_C( 93), INT8_C( 64), INT8_C( 46), -INT8_C( 52) }, { -INT8_C( 75), -INT8_C( 42), INT8_C( 100), INT8_C( 62), INT8_C( 80), INT8_C( 71), INT8_C( 105), -INT8_C( 61), INT8_C( 1), INT8_C( 51), INT8_C( 13), INT8_C( 103), INT8_C( 28), INT8_C( 20), -INT8_C( 82), INT8_C( 16) } }, { { -INT8_C( 89), -INT8_C( 47), INT8_C( 12), -INT8_C( 32), INT8_C( 105), INT8_C( 82), -INT8_C( 126), -INT8_C( 67), INT8_C( 69), INT8_C( 122), INT8_C( 111), -INT8_C( 65), -INT8_C( 90), -INT8_C( 17), INT8_C( 123), INT8_C( 96) }, { INT8_MIN, -INT8_C( 125), INT8_C( 99), INT8_C( 105), INT8_C( 98), INT8_C( 64), -INT8_C( 49), -INT8_C( 73), INT8_C( 0), -INT8_C( 70), -INT8_C( 49), INT8_C( 93), -INT8_C( 6), -INT8_C( 3), INT8_C( 41), -INT8_C( 95) }, { INT8_C( 39), INT8_C( 78), INT8_C( 87), -INT8_C( 119), INT8_C( 7), INT8_C( 18), INT8_C( 77), INT8_C( 6), INT8_C( 69), -INT8_C( 64), -INT8_C( 96), -INT8_C( 98), INT8_C( 84), INT8_C( 14), INT8_C( 82), -INT8_C( 65) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r = simde_vabdq_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); simde_int8x16_t r = simde_vabdq_s8(a, b); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vabdq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { INT16_C( 14030), INT16_C( 14209), INT16_C( 904), -INT16_C( 12555), INT16_C( 25726), INT16_C( 9357), INT16_C( 2131), -INT16_C( 11387) }, { -INT16_C( 6005), -INT16_C( 4547), INT16_C( 3112), INT16_C( 10405), INT16_C( 30150), -INT16_C( 16251), -INT16_C( 20878), INT16_C( 16737) }, { INT16_C( 20035), INT16_C( 18756), INT16_C( 2208), INT16_C( 22960), INT16_C( 4424), INT16_C( 25608), INT16_C( 23009), INT16_C( 28124) } }, { { -INT16_C( 7452), INT16_C( 28024), INT16_C( 28133), INT16_C( 25403), -INT16_C( 14127), INT16_C( 9352), INT16_C( 3536), INT16_C( 23800) }, { INT16_C( 13813), INT16_C( 7498), -INT16_C( 4287), INT16_C( 1861), -INT16_C( 13724), -INT16_C( 10297), INT16_C( 10360), INT16_C( 23832) }, { INT16_C( 21265), INT16_C( 20526), INT16_C( 32420), INT16_C( 23542), INT16_C( 403), INT16_C( 19649), INT16_C( 6824), INT16_C( 32) } }, { { -INT16_C( 28662), -INT16_C( 4150), INT16_C( 1534), -INT16_C( 12461), -INT16_C( 9267), -INT16_C( 25100), -INT16_C( 4888), -INT16_C( 8711) }, { INT16_C( 17185), INT16_C( 25338), INT16_C( 16179), -INT16_C( 26775), INT16_C( 12297), -INT16_C( 32402), -INT16_C( 31144), INT16_C( 25310) }, { -INT16_C( 19689), INT16_C( 29488), INT16_C( 14645), INT16_C( 14314), INT16_C( 21564), INT16_C( 7302), INT16_C( 26256), -INT16_C( 31515) } }, { { -INT16_C( 22505), INT16_C( 5457), -INT16_C( 23379), INT16_C( 31460), -INT16_C( 10113), INT16_C( 26392), INT16_C( 4548), -INT16_C( 6844) }, { INT16_C( 15957), -INT16_C( 30649), -INT16_C( 20355), -INT16_C( 31201), -INT16_C( 28960), INT16_C( 14344), -INT16_C( 6636), INT16_C( 11162) }, { -INT16_C( 27074), -INT16_C( 29430), INT16_C( 3024), -INT16_C( 2875), INT16_C( 18847), INT16_C( 12048), INT16_C( 11184), INT16_C( 18006) } }, { { -INT16_C( 4977), INT16_C( 15424), INT16_C( 9616), INT16_C( 4279), -INT16_C( 12291), -INT16_C( 15753), -INT16_C( 17184), INT16_C( 13735) }, { -INT16_C( 4102), INT16_C( 30909), -INT16_C( 8801), -INT16_C( 32514), INT16_C( 1643), INT16_C( 32696), INT16_C( 21485), INT16_C( 31915) }, { INT16_C( 875), INT16_C( 15485), INT16_C( 18417), -INT16_C( 28743), INT16_C( 13934), -INT16_C( 17087), -INT16_C( 26867), INT16_C( 18180) } }, { { -INT16_C( 5313), -INT16_C( 12360), INT16_C( 28432), INT16_C( 3807), INT16_C( 22334), INT16_C( 8144), INT16_C( 30483), INT16_C( 3412) }, { INT16_C( 4710), INT16_C( 1669), -INT16_C( 31505), INT16_C( 23174), INT16_C( 16010), INT16_C( 30681), -INT16_C( 31599), -INT16_C( 12045) }, { INT16_C( 10023), INT16_C( 14029), -INT16_C( 5599), INT16_C( 19367), INT16_C( 6324), INT16_C( 22537), -INT16_C( 3454), INT16_C( 15457) } }, { { -INT16_C( 21392), -INT16_C( 32608), INT16_C( 32539), INT16_C( 23182), INT16_C( 24278), -INT16_C( 5767), -INT16_C( 12842), INT16_C( 15607) }, { INT16_C( 31967), -INT16_C( 12734), -INT16_C( 14336), -INT16_C( 29912), INT16_C( 519), -INT16_C( 26622), -INT16_C( 2426), -INT16_C( 2455) }, { -INT16_C( 12177), INT16_C( 19874), -INT16_C( 18661), -INT16_C( 12442), INT16_C( 23759), INT16_C( 20855), INT16_C( 10416), INT16_C( 18062) } }, { { INT16_C( 2466), -INT16_C( 17033), INT16_C( 1416), INT16_C( 24343), -INT16_C( 28572), INT16_C( 14920), INT16_C( 16222), INT16_C( 15734) }, { -INT16_C( 17988), -INT16_C( 17396), INT16_C( 13441), -INT16_C( 30649), INT16_C( 18998), -INT16_C( 17119), -INT16_C( 30144), -INT16_C( 7501) }, { INT16_C( 20454), INT16_C( 363), INT16_C( 12025), -INT16_C( 10544), -INT16_C( 17966), INT16_C( 32039), -INT16_C( 19170), INT16_C( 23235) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_vabdq_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); simde_int16x8_t r = simde_vabdq_s16(a, b); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #if !defined(TEST_SIMDE_VABD_NO_TEST_32) static int test_simde_vabdq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { INT32_C( 463415955), -INT32_C( 1803897040), -INT32_C( 1513176249), -INT32_C( 1092402174) }, { INT32_C( 2138828797), INT32_C( 1510457891), INT32_C( 1276585996), INT32_C( 1160694450) }, { INT32_C( 1675412842), -INT32_C( 980612365), -INT32_C( 1505205051), -INT32_C( 2041870672) } }, { { INT32_C( 627166965), -INT32_C( 860234875), -INT32_C( 1586329698), -INT32_C( 916498997) }, { INT32_C( 1732827716), -INT32_C( 1446948963), INT32_C( 720820344), -INT32_C( 1737481053) }, { INT32_C( 1105660751), INT32_C( 586714088), -INT32_C( 1987817254), INT32_C( 820982056) } }, { { INT32_C( 2008928754), INT32_C( 1262778028), -INT32_C( 924010755), INT32_C( 1334921995) }, { -INT32_C( 1011427034), -INT32_C( 1586726871), -INT32_C( 204774832), INT32_C( 2039167879) }, { -INT32_C( 1274611508), -INT32_C( 1445462397), INT32_C( 719235923), INT32_C( 704245884) } }, { { -INT32_C( 1175369716), -INT32_C( 1157352002), -INT32_C( 159059733), INT32_C( 1631917371) }, { INT32_C( 405076975), -INT32_C( 994471565), INT32_C( 2058847475), -INT32_C( 856407104) }, { INT32_C( 1580446691), INT32_C( 162880437), -INT32_C( 2077060088), -INT32_C( 1806642821) } }, { { INT32_C( 1250289035), INT32_C( 84248858), -INT32_C( 1241806471), -INT32_C( 1911144289) }, { -INT32_C( 1348060357), -INT32_C( 1082957876), -INT32_C( 1556469021), -INT32_C( 110088595) }, { -INT32_C( 1696617904), INT32_C( 1167206734), INT32_C( 314662550), INT32_C( 1801055694) } }, { { INT32_C( 759428371), -INT32_C( 130922369), INT32_C( 1907174866), -INT32_C( 1459633043) }, { -INT32_C( 883448321), -INT32_C( 410334716), INT32_C( 1653327093), INT32_C( 89914354) }, { INT32_C( 1642876692), INT32_C( 279412347), INT32_C( 253847773), INT32_C( 1549547397) } }, { { INT32_C( 1865588464), -INT32_C( 1184340761), -INT32_C( 30796399), -INT32_C( 660133415) }, { -INT32_C( 761004338), -INT32_C( 1095160119), -INT32_C( 450870030), INT32_C( 820673599) }, { -INT32_C( 1668374494), INT32_C( 89180642), INT32_C( 420073631), INT32_C( 1480807014) } }, { { INT32_C( 27204890), INT32_C( 330958721), -INT32_C( 166599651), -INT32_C( 607143923) }, { -INT32_C( 2136116553), -INT32_C( 1824626784), -INT32_C( 344432981), -INT32_C( 182754598) }, { -INT32_C( 2131645853), -INT32_C( 2139381791), INT32_C( 177833330), INT32_C( 424389325) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_vabdq_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); simde_int32x4_t r = simde_vabdq_s32(a, b); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #endif static int test_simde_vabdq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(127), UINT8_C(186), UINT8_C(246), UINT8_C( 1), UINT8_C(194), UINT8_C(177), UINT8_C( 20), UINT8_C(223), UINT8_C(149), UINT8_C( 37), UINT8_C(213), UINT8_C(163), UINT8_C(222), UINT8_C(164), UINT8_C(126), UINT8_C(149) }, { UINT8_C( 23), UINT8_C( 44), UINT8_C( 21), UINT8_C(183), UINT8_C(147), UINT8_C( 83), UINT8_C( 74), UINT8_C( 62), UINT8_C(177), UINT8_C(194), UINT8_C( 41), UINT8_C(140), UINT8_C( 37), UINT8_C( 68), UINT8_C(129), UINT8_C(164) }, { UINT8_C(104), UINT8_C(142), UINT8_C(225), UINT8_C(182), UINT8_C( 47), UINT8_C( 94), UINT8_C( 54), UINT8_C(161), UINT8_C( 28), UINT8_C(157), UINT8_C(172), UINT8_C( 23), UINT8_C(185), UINT8_C( 96), UINT8_C( 3), UINT8_C( 15) } }, { { UINT8_MAX, UINT8_C(119), UINT8_C(165), UINT8_C(193), UINT8_C( 40), UINT8_C(185), UINT8_C(160), UINT8_C(190), UINT8_C(223), UINT8_C(117), UINT8_C( 97), UINT8_C(189), UINT8_C( 26), UINT8_C(223), UINT8_C( 82), UINT8_C( 49) }, { UINT8_C( 11), UINT8_C(103), UINT8_C(232), UINT8_C(158), UINT8_C(186), UINT8_C( 51), UINT8_C(221), UINT8_C(107), UINT8_C(245), UINT8_C( 6), UINT8_C(247), UINT8_C( 26), UINT8_C( 75), UINT8_C(120), UINT8_C(191), UINT8_C( 74) }, { UINT8_C(244), UINT8_C( 16), UINT8_C( 67), UINT8_C( 35), UINT8_C(146), UINT8_C(134), UINT8_C( 61), UINT8_C( 83), UINT8_C( 22), UINT8_C(111), UINT8_C(150), UINT8_C(163), UINT8_C( 49), UINT8_C(103), UINT8_C(109), UINT8_C( 25) } }, { { UINT8_C(240), UINT8_C(100), UINT8_C( 11), UINT8_C( 24), UINT8_C( 30), UINT8_C(171), UINT8_C(214), UINT8_C(253), UINT8_C( 32), UINT8_C( 55), UINT8_C(186), UINT8_C( 58), UINT8_C( 23), UINT8_C( 12), UINT8_C(107), UINT8_C( 34) }, { UINT8_C(115), UINT8_C( 84), UINT8_C(193), UINT8_C( 45), UINT8_C(135), UINT8_C(158), UINT8_C(152), UINT8_C(124), UINT8_C(164), UINT8_C(144), UINT8_C(151), UINT8_C(239), UINT8_C( 8), UINT8_C( 86), UINT8_C( 57), UINT8_C(248) }, { UINT8_C(125), UINT8_C( 16), UINT8_C(182), UINT8_C( 21), UINT8_C(105), UINT8_C( 13), UINT8_C( 62), UINT8_C(129), UINT8_C(132), UINT8_C( 89), UINT8_C( 35), UINT8_C(181), UINT8_C( 15), UINT8_C( 74), UINT8_C( 50), UINT8_C(214) } }, { { UINT8_C(186), UINT8_C( 68), UINT8_C( 17), UINT8_C(216), UINT8_C(239), UINT8_C(231), UINT8_C(213), UINT8_C( 16), UINT8_C( 31), UINT8_C(143), UINT8_C( 74), UINT8_C( 54), UINT8_C(155), UINT8_C(182), UINT8_C( 88), UINT8_C( 14) }, { UINT8_C( 10), UINT8_C( 25), UINT8_C( 59), UINT8_C(145), UINT8_C(183), UINT8_C(212), UINT8_C( 13), UINT8_C( 92), UINT8_C(100), UINT8_C(164), UINT8_C( 75), UINT8_C(108), UINT8_C(250), UINT8_C(133), UINT8_C(101), UINT8_C(181) }, { UINT8_C(176), UINT8_C( 43), UINT8_C( 42), UINT8_C( 71), UINT8_C( 56), UINT8_C( 19), UINT8_C(200), UINT8_C( 76), UINT8_C( 69), UINT8_C( 21), UINT8_C( 1), UINT8_C( 54), UINT8_C( 95), UINT8_C( 49), UINT8_C( 13), UINT8_C(167) } }, { { UINT8_C(201), UINT8_C(118), UINT8_C(141), UINT8_C(185), UINT8_C( 93), UINT8_C( 99), UINT8_C(201), UINT8_C(124), UINT8_C(242), UINT8_C( 19), UINT8_C(178), UINT8_C(142), UINT8_C(201), UINT8_C( 11), UINT8_C(156), UINT8_C(211) }, { UINT8_C( 36), UINT8_C(216), UINT8_C(100), UINT8_C(220), UINT8_C(172), UINT8_C(114), UINT8_C( 56), UINT8_C( 16), UINT8_C( 22), UINT8_C(131), UINT8_C(124), UINT8_C( 17), UINT8_C( 8), UINT8_C(225), UINT8_C(198), UINT8_C(210) }, { UINT8_C(165), UINT8_C( 98), UINT8_C( 41), UINT8_C( 35), UINT8_C( 79), UINT8_C( 15), UINT8_C(145), UINT8_C(108), UINT8_C(220), UINT8_C(112), UINT8_C( 54), UINT8_C(125), UINT8_C(193), UINT8_C(214), UINT8_C( 42), UINT8_C( 1) } }, { { UINT8_C( 87), UINT8_C( 83), UINT8_C(139), UINT8_C(181), UINT8_C(182), UINT8_C( 84), UINT8_C( 49), UINT8_C(169), UINT8_C(103), UINT8_C(228), UINT8_C( 55), UINT8_C( 49), UINT8_C(239), UINT8_C(211), UINT8_C( 4), UINT8_C( 19) }, { UINT8_C(171), UINT8_C(105), UINT8_C(239), UINT8_C( 87), UINT8_C(219), UINT8_C( 39), UINT8_C(103), UINT8_C(241), UINT8_C(171), UINT8_C(228), UINT8_C( 2), UINT8_C(179), UINT8_C(197), UINT8_C(200), UINT8_C(133), UINT8_C( 29) }, { UINT8_C( 84), UINT8_C( 22), UINT8_C(100), UINT8_C( 94), UINT8_C( 37), UINT8_C( 45), UINT8_C( 54), UINT8_C( 72), UINT8_C( 68), UINT8_C( 0), UINT8_C( 53), UINT8_C(130), UINT8_C( 42), UINT8_C( 11), UINT8_C(129), UINT8_C( 10) } }, { { UINT8_C( 28), UINT8_C( 16), UINT8_C(210), UINT8_C(210), UINT8_C(100), UINT8_C( 3), UINT8_C(123), UINT8_C(204), UINT8_C(231), UINT8_C(178), UINT8_C(253), UINT8_C(214), UINT8_C(134), UINT8_C( 1), UINT8_C(234), UINT8_C( 49) }, { UINT8_C(106), UINT8_C(217), UINT8_C(137), UINT8_C( 69), UINT8_C( 1), UINT8_C(240), UINT8_C( 55), UINT8_C(172), UINT8_C(212), UINT8_C( 57), UINT8_C( 95), UINT8_C(154), UINT8_C( 2), UINT8_C(229), UINT8_C(183), UINT8_C( 30) }, { UINT8_C( 78), UINT8_C(201), UINT8_C( 73), UINT8_C(141), UINT8_C( 99), UINT8_C(237), UINT8_C( 68), UINT8_C( 32), UINT8_C( 19), UINT8_C(121), UINT8_C(158), UINT8_C( 60), UINT8_C(132), UINT8_C(228), UINT8_C( 51), UINT8_C( 19) } }, { { UINT8_C(245), UINT8_C(137), UINT8_C(240), UINT8_C( 90), UINT8_C(140), UINT8_C(108), UINT8_C( 38), UINT8_C(116), UINT8_C( 30), UINT8_C( 35), UINT8_C( 74), UINT8_C(164), UINT8_C( 36), UINT8_C( 52), UINT8_C(214), UINT8_C(143) }, { UINT8_C( 14), UINT8_C( 95), UINT8_C(212), UINT8_C( 15), UINT8_C( 79), UINT8_C( 11), UINT8_C(187), UINT8_C( 36), UINT8_C( 69), UINT8_C( 26), UINT8_C(190), UINT8_C( 71), UINT8_MAX, UINT8_C(117), UINT8_C(101), UINT8_C(245) }, { UINT8_C(231), UINT8_C( 42), UINT8_C( 28), UINT8_C( 75), UINT8_C( 61), UINT8_C( 97), UINT8_C(149), UINT8_C( 80), UINT8_C( 39), UINT8_C( 9), UINT8_C(116), UINT8_C( 93), UINT8_C(219), UINT8_C( 65), UINT8_C(113), UINT8_C(102) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vabdq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t r = simde_vabdq_u8(a, b); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vabdq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(22014), UINT16_C(35407), UINT16_C(30145), UINT16_C(57598), UINT16_C(18840), UINT16_C(48260), UINT16_C(23165), UINT16_C(35659) }, { UINT16_C( 8377), UINT16_C( 2458), UINT16_C(21803), UINT16_C(28717), UINT16_C(60272), UINT16_C(28599), UINT16_C( 7264), UINT16_C(24164) }, { UINT16_C(13637), UINT16_C(32949), UINT16_C( 8342), UINT16_C(28881), UINT16_C(41432), UINT16_C(19661), UINT16_C(15901), UINT16_C(11495) } }, { { UINT16_C(45938), UINT16_C(13288), UINT16_C(59176), UINT16_C(49171), UINT16_C(38960), UINT16_C(44413), UINT16_C(51442), UINT16_C(44089) }, { UINT16_C(54248), UINT16_C( 5301), UINT16_C(57897), UINT16_C(39300), UINT16_C(15565), UINT16_C(11528), UINT16_C(27992), UINT16_C(51851) }, { UINT16_C( 8310), UINT16_C( 7987), UINT16_C( 1279), UINT16_C( 9871), UINT16_C(23395), UINT16_C(32885), UINT16_C(23450), UINT16_C( 7762) } }, { { UINT16_C(29472), UINT16_C(18942), UINT16_C( 4442), UINT16_C(35337), UINT16_C(34473), UINT16_C(39992), UINT16_C(29007), UINT16_C(14152) }, { UINT16_C(64836), UINT16_C(27979), UINT16_C(53471), UINT16_C(44038), UINT16_C( 3852), UINT16_C(25817), UINT16_C(25724), UINT16_C(39983) }, { UINT16_C(35364), UINT16_C( 9037), UINT16_C(49029), UINT16_C( 8701), UINT16_C(30621), UINT16_C(14175), UINT16_C( 3283), UINT16_C(25831) } }, { { UINT16_C(11735), UINT16_C(13029), UINT16_C(61246), UINT16_C(59580), UINT16_C(62581), UINT16_C(50308), UINT16_C(52325), UINT16_C(43772) }, { UINT16_C(18377), UINT16_C(43031), UINT16_C( 7703), UINT16_C( 9044), UINT16_C(11565), UINT16_C(43400), UINT16_C(46993), UINT16_C(26693) }, { UINT16_C( 6642), UINT16_C(30002), UINT16_C(53543), UINT16_C(50536), UINT16_C(51016), UINT16_C( 6908), UINT16_C( 5332), UINT16_C(17079) } }, { { UINT16_C(11236), UINT16_C( 8858), UINT16_C(22298), UINT16_C(36618), UINT16_C(36427), UINT16_C(45396), UINT16_C(20570), UINT16_C( 9051) }, { UINT16_C(29335), UINT16_C(45003), UINT16_C( 8080), UINT16_C(48594), UINT16_C(23116), UINT16_C(56678), UINT16_C(44049), UINT16_C(62790) }, { UINT16_C(18099), UINT16_C(36145), UINT16_C(14218), UINT16_C(11976), UINT16_C(13311), UINT16_C(11282), UINT16_C(23479), UINT16_C(53739) } }, { { UINT16_C(57559), UINT16_C(61720), UINT16_C( 8759), UINT16_C(33664), UINT16_C(54449), UINT16_C( 2868), UINT16_C(36644), UINT16_C(48175) }, { UINT16_C(64001), UINT16_C(37483), UINT16_C(15642), UINT16_C(26191), UINT16_C(46744), UINT16_C(43332), UINT16_C(35426), UINT16_C(14751) }, { UINT16_C( 6442), UINT16_C(24237), UINT16_C( 6883), UINT16_C( 7473), UINT16_C( 7705), UINT16_C(40464), UINT16_C( 1218), UINT16_C(33424) } }, { { UINT16_C(46954), UINT16_C(41514), UINT16_C(43737), UINT16_C(35365), UINT16_C(22911), UINT16_C(41878), UINT16_C(50664), UINT16_C(59743) }, { UINT16_C(51903), UINT16_C(55675), UINT16_C(51976), UINT16_C(41024), UINT16_C(33921), UINT16_C(58185), UINT16_C(59406), UINT16_C(30748) }, { UINT16_C( 4949), UINT16_C(14161), UINT16_C( 8239), UINT16_C( 5659), UINT16_C(11010), UINT16_C(16307), UINT16_C( 8742), UINT16_C(28995) } }, { { UINT16_C(18079), UINT16_C(31002), UINT16_C(16368), UINT16_C(28419), UINT16_C(39320), UINT16_C(32787), UINT16_C(29278), UINT16_C( 7786) }, { UINT16_C(58685), UINT16_C(17911), UINT16_C(14256), UINT16_C(12773), UINT16_C(11963), UINT16_C(51476), UINT16_C(12311), UINT16_C(46658) }, { UINT16_C(40606), UINT16_C(13091), UINT16_C( 2112), UINT16_C(15646), UINT16_C(27357), UINT16_C(18689), UINT16_C(16967), UINT16_C(38872) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vabdq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t r = simde_vabdq_u16(a, b); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #if !defined(TEST_SIMDE_VABD_NO_TEST_32) static int test_simde_vabdq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(1731157110), UINT32_C( 886453148), UINT32_C( 733342156), UINT32_C(2571706204) }, { UINT32_C(3051241476), UINT32_C( 870761336), UINT32_C( 150862833), UINT32_C(2730442539) }, { UINT32_C(1320084366), UINT32_C( 15691812), UINT32_C( 582479323), UINT32_C( 158736335) } }, { { UINT32_C( 923397787), UINT32_C(4000112417), UINT32_C( 622404041), UINT32_C(1153327680) }, { UINT32_C( 452566178), UINT32_C(1347346527), UINT32_C( 106515419), UINT32_C( 631773322) }, { UINT32_C( 470831609), UINT32_C(2652765890), UINT32_C( 515888622), UINT32_C( 521554358) } }, { { UINT32_C( 677228806), UINT32_C(1511442833), UINT32_C( 712978410), UINT32_C( 862862737) }, { UINT32_C( 944662745), UINT32_C( 596155464), UINT32_C(1898570215), UINT32_C( 9884409) }, { UINT32_C( 267433939), UINT32_C( 915287369), UINT32_C(1185591805), UINT32_C( 852978328) } }, { { UINT32_C( 338228099), UINT32_C(2792242876), UINT32_C(4275105133), UINT32_C( 53559082) }, { UINT32_C(4013653927), UINT32_C( 34784283), UINT32_C(2675129253), UINT32_C(2443119117) }, { UINT32_C(3675425828), UINT32_C(2757458593), UINT32_C(1599975880), UINT32_C(2389560035) } }, { { UINT32_C(3131426813), UINT32_C(1918899205), UINT32_C( 745550081), UINT32_C( 388997488) }, { UINT32_C(1007053601), UINT32_C(3560904751), UINT32_C(1634972243), UINT32_C(3136426684) }, { UINT32_C(2124373212), UINT32_C(1642005546), UINT32_C( 889422162), UINT32_C(2747429196) } }, { { UINT32_C(3732183001), UINT32_C(2907755691), UINT32_C(1977204741), UINT32_C(2206992482) }, { UINT32_C(2730463859), UINT32_C(4269276842), UINT32_C(1835002545), UINT32_C(3592901117) }, { UINT32_C(1001719142), UINT32_C(1361521151), UINT32_C( 142202196), UINT32_C(1385908635) } }, { { UINT32_C(2494929896), UINT32_C(1967195504), UINT32_C( 686496454), UINT32_C(2527819554) }, { UINT32_C(3023596042), UINT32_C( 447917929), UINT32_C(2542211482), UINT32_C(1265479522) }, { UINT32_C( 528666146), UINT32_C(1519277575), UINT32_C(1855715028), UINT32_C(1262340032) } }, { { UINT32_C(3135185482), UINT32_C(3996131368), UINT32_C(1544952634), UINT32_C(2633154962) }, { UINT32_C(2488347435), UINT32_C(1957561306), UINT32_C(1997223445), UINT32_C( 801274341) }, { UINT32_C( 646838047), UINT32_C(2038570062), UINT32_C( 452270811), UINT32_C(1831880621) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vabdq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t r = simde_vabdq_u32(a, b); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #endif SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vabds_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vabdd_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vabd_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vabd_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vabd_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vabd_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vabd_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vabd_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vabdq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vabdq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vabdq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vabdq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vabdq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vabdq_u16) #if !defined(TEST_SIMDE_VABD_NO_TEST_32) SIMDE_TEST_FUNC_LIST_ENTRY(vabd_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vabd_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vabdq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vabdq_s32) #endif SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/abdl.c000066400000000000000000000466331400333146700164010ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN abdl #include "test-neon.h" #include "../../../simde/arm/neon/abdl.h" static int test_simde_vabdl_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t b[8]; int16_t r[8]; } test_vec[] = { { { -INT8_C( 21), -INT8_C( 78), -INT8_C( 101), INT8_C( 60), -INT8_C( 91), -INT8_C( 107), INT8_C( 51), -INT8_C( 56) }, { INT8_C( 53), -INT8_C( 119), -INT8_C( 75), INT8_C( 20), -INT8_C( 57), INT8_C( 101), INT8_C( 65), -INT8_C( 75) }, { INT16_C( 74), INT16_C( 41), INT16_C( 26), INT16_C( 40), INT16_C( 34), INT16_C( 208), INT16_C( 14), INT16_C( 19) } }, { { -INT8_C( 94), INT8_C( 96), -INT8_C( 70), INT8_C( 27), -INT8_C( 76), INT8_C( 78), -INT8_C( 11), INT8_C( 12) }, { -INT8_C( 6), INT8_C( 118), INT8_C( 100), INT8_C( 92), -INT8_C( 108), INT8_C( 120), -INT8_C( 37), INT8_MAX }, { INT16_C( 88), INT16_C( 22), INT16_C( 170), INT16_C( 65), INT16_C( 32), INT16_C( 42), INT16_C( 26), INT16_C( 115) } }, { { INT8_C( 43), INT8_C( 118), -INT8_C( 69), -INT8_C( 48), INT8_C( 11), -INT8_C( 17), -INT8_C( 103), INT8_C( 65) }, { INT8_C( 120), INT8_C( 78), INT8_C( 85), INT8_C( 64), -INT8_C( 76), -INT8_C( 106), -INT8_C( 11), INT8_C( 86) }, { INT16_C( 77), INT16_C( 40), INT16_C( 154), INT16_C( 112), INT16_C( 87), INT16_C( 89), INT16_C( 92), INT16_C( 21) } }, { { -INT8_C( 10), -INT8_C( 81), INT8_C( 113), -INT8_C( 86), -INT8_C( 3), INT8_C( 102), -INT8_C( 74), -INT8_C( 9) }, { -INT8_C( 35), INT8_C( 26), INT8_C( 83), INT8_C( 113), -INT8_C( 109), INT8_C( 46), -INT8_C( 16), -INT8_C( 66) }, { INT16_C( 25), INT16_C( 107), INT16_C( 30), INT16_C( 199), INT16_C( 106), INT16_C( 56), INT16_C( 58), INT16_C( 57) } }, { { -INT8_C( 92), -INT8_C( 85), -INT8_C( 114), -INT8_C( 80), -INT8_C( 102), INT8_C( 39), -INT8_C( 15), INT8_C( 19) }, { INT8_C( 118), INT8_C( 70), INT8_C( 83), INT8_C( 42), -INT8_C( 35), INT8_C( 72), INT8_MIN, -INT8_C( 45) }, { INT16_C( 210), INT16_C( 155), INT16_C( 197), INT16_C( 122), INT16_C( 67), INT16_C( 33), INT16_C( 113), INT16_C( 64) } }, { { -INT8_C( 8), -INT8_C( 14), INT8_C( 126), -INT8_C( 11), INT8_C( 88), INT8_C( 52), -INT8_C( 19), INT8_C( 53) }, { INT8_C( 79), INT8_C( 64), -INT8_C( 90), -INT8_C( 30), INT8_C( 111), -INT8_C( 106), -INT8_C( 96), INT8_C( 19) }, { INT16_C( 87), INT16_C( 78), INT16_C( 216), INT16_C( 19), INT16_C( 23), INT16_C( 158), INT16_C( 77), INT16_C( 34) } }, { { INT8_C( 66), INT8_C( 46), -INT8_C( 61), -INT8_C( 36), INT8_C( 86), -INT8_C( 76), -INT8_C( 17), -INT8_C( 52) }, { -INT8_C( 5), INT8_C( 66), -INT8_C( 10), -INT8_C( 40), -INT8_C( 117), INT8_C( 118), -INT8_C( 85), -INT8_C( 125) }, { INT16_C( 71), INT16_C( 20), INT16_C( 51), INT16_C( 4), INT16_C( 203), INT16_C( 194), INT16_C( 68), INT16_C( 73) } }, { { INT8_C( 104), INT8_C( 41), INT8_C( 120), -INT8_C( 63), INT8_C( 94), INT8_C( 101), -INT8_C( 10), -INT8_C( 83) }, { -INT8_C( 90), -INT8_C( 99), -INT8_C( 113), INT8_C( 21), INT8_C( 51), INT8_C( 47), INT8_C( 40), INT8_C( 117) }, { INT16_C( 194), INT16_C( 140), INT16_C( 233), INT16_C( 84), INT16_C( 43), INT16_C( 54), INT16_C( 50), INT16_C( 200) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int16x8_t r = simde_vabdl_s8(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); simde_int16x8_t r = simde_vabdl_s8(a, b); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vabdl_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t b[8]; int32_t r[8]; } test_vec[] = { { { -INT16_C( 21822), -INT16_C( 5449), -INT16_C( 14106), -INT16_C( 17373) }, { -INT16_C( 20174), -INT16_C( 26812), INT16_C( 3323), INT16_C( 12091) }, { INT32_C( 1648), INT32_C( 21363), INT32_C( 17429), INT32_C( 29464) } }, { { -INT16_C( 29016), -INT16_C( 25436), INT16_C( 32702), INT16_C( 7743) }, { INT16_C( 18778), -INT16_C( 10383), -INT16_C( 21633), INT16_C( 16951) }, { INT32_C( 47794), INT32_C( 15053), INT32_C( 54335), INT32_C( 9208) } }, { { -INT16_C( 4522), INT16_C( 15404), INT16_C( 20406), -INT16_C( 5639) }, { INT16_C( 15617), -INT16_C( 896), -INT16_C( 17591), -INT16_C( 3797) }, { INT32_C( 20139), INT32_C( 16300), INT32_C( 37997), INT32_C( 1842) } }, { { -INT16_C( 12470), INT16_C( 2189), -INT16_C( 13234), -INT16_C( 22234) }, { -INT16_C( 26859), -INT16_C( 27264), -INT16_C( 18621), -INT16_C( 26153) }, { INT32_C( 14389), INT32_C( 29453), INT32_C( 5387), INT32_C( 3919) } }, { { INT16_C( 933), INT16_C( 23509), -INT16_C( 12717), INT16_C( 21572) }, { -INT16_C( 15093), INT16_C( 21584), INT16_C( 31616), -INT16_C( 13755) }, { INT32_C( 16026), INT32_C( 1925), INT32_C( 44333), INT32_C( 35327) } }, { { -INT16_C( 11702), -INT16_C( 26157), -INT16_C( 1634), -INT16_C( 19390) }, { -INT16_C( 15727), -INT16_C( 11191), INT16_C( 8313), INT16_C( 7789) }, { INT32_C( 4025), INT32_C( 14966), INT32_C( 9947), INT32_C( 27179) } }, { { INT16_C( 16931), INT16_C( 30329), -INT16_C( 16879), INT16_C( 7370) }, { INT16_C( 6787), INT16_C( 881), -INT16_C( 18795), -INT16_C( 7986) }, { INT32_C( 10144), INT32_C( 29448), INT32_C( 1916), INT32_C( 15356) } }, { { -INT16_C( 24183), INT16_C( 10105), -INT16_C( 17510), INT16_C( 11227) }, { INT16_C( 9341), -INT16_C( 2305), INT16_C( 27716), INT16_C( 26644) }, { INT32_C( 33524), INT32_C( 12410), INT32_C( 45226), INT32_C( 15417) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int32x4_t r = simde_vabdl_s16(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); simde_int32x4_t r = simde_vabdl_s16(a, b); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vabdl_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[8]; int32_t b[8]; int64_t r[8]; } test_vec[] = { { { -INT32_C( 1059156561), -INT32_C( 824399541) }, { INT32_C( 1506954691), -INT32_C( 1925603324) }, { INT64_C( 2566111252), INT64_C( 1101203783) } }, { { -INT32_C( 608914879), -INT32_C( 368603027) }, { -INT32_C( 102758732), INT32_C( 576844915) }, { INT64_C( 506156147), INT64_C( 945447942) } }, { { -INT32_C( 840810623), -INT32_C( 1399079192) }, { INT32_C( 268791052), INT32_C( 1318927885) }, { INT64_C( 1109601675), INT64_C( 2718007077) } }, { { INT32_C( 1563054576), -INT32_C( 1773719071) }, { -INT32_C( 1433458889), -INT32_C( 1664290789) }, { INT64_C( 2996513465), INT64_C( 109428282) } }, { { INT32_C( 409579055), INT32_C( 2042889581) }, { -INT32_C( 2138453646), -INT32_C( 137484793) }, { INT64_C( 2548032701), INT64_C( 2180374374) } }, { { INT32_C( 1498740855), INT32_C( 1643092777) }, { -INT32_C( 586449214), -INT32_C( 1652959122) }, { INT64_C( 2085190069), INT64_C( 3296051899) } }, { { -INT32_C( 206183546), INT32_C( 1517058536) }, { INT32_C( 1239086402), -INT32_C( 1824478949) }, { INT64_C( 1445269948), INT64_C( 3341537485) } }, { { -INT32_C( 873687903), -INT32_C( 248718545) }, { -INT32_C( 942786727), -INT32_C( 1771812849) }, { INT64_C( 69098824), INT64_C( 1523094304) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int64x2_t r = simde_vabdl_s32(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); simde_int64x2_t r = simde_vabdl_s32(a, b); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vabdl_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint8_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT8_C(114), UINT8_C(157), UINT8_C(216), UINT8_C( 82), UINT8_C(129), UINT8_C( 62), UINT8_C(248), UINT8_C(156) }, { UINT8_C(177), UINT8_C( 48), UINT8_C( 68), UINT8_C(194), UINT8_C( 65), UINT8_C( 76), UINT8_C(174), UINT8_C(178) }, { UINT16_C( 63), UINT16_C( 109), UINT16_C( 148), UINT16_C( 112), UINT16_C( 64), UINT16_C( 14), UINT16_C( 74), UINT16_C( 22) } }, { { UINT8_C(191), UINT8_C(212), UINT8_C(174), UINT8_C(236), UINT8_C( 55), UINT8_C(141), UINT8_C( 55), UINT8_C(218) }, { UINT8_C( 20), UINT8_C( 68), UINT8_C( 19), UINT8_C(122), UINT8_C( 38), UINT8_C( 29), UINT8_C(194), UINT8_C(152) }, { UINT16_C( 171), UINT16_C( 144), UINT16_C( 155), UINT16_C( 114), UINT16_C( 17), UINT16_C( 112), UINT16_C( 139), UINT16_C( 66) } }, { { UINT8_C(187), UINT8_C(155), UINT8_C(234), UINT8_C( 60), UINT8_C(217), UINT8_C(226), UINT8_C(216), UINT8_C(139) }, { UINT8_C( 18), UINT8_C( 28), UINT8_C( 77), UINT8_C( 84), UINT8_C(104), UINT8_C(251), UINT8_C( 6), UINT8_C( 39) }, { UINT16_C( 169), UINT16_C( 127), UINT16_C( 157), UINT16_C( 24), UINT16_C( 113), UINT16_C( 25), UINT16_C( 210), UINT16_C( 100) } }, { { UINT8_C(207), UINT8_C(181), UINT8_C( 20), UINT8_C( 6), UINT8_C( 66), UINT8_C( 75), UINT8_C(224), UINT8_C( 87) }, { UINT8_C(143), UINT8_C(243), UINT8_C(209), UINT8_C(181), UINT8_C( 17), UINT8_C(147), UINT8_C( 77), UINT8_C(204) }, { UINT16_C( 64), UINT16_C( 62), UINT16_C( 189), UINT16_C( 175), UINT16_C( 49), UINT16_C( 72), UINT16_C( 147), UINT16_C( 117) } }, { { UINT8_C( 46), UINT8_C( 56), UINT8_C( 8), UINT8_C( 8), UINT8_C( 26), UINT8_C(225), UINT8_C(147), UINT8_C( 45) }, { UINT8_C(253), UINT8_C(224), UINT8_C(129), UINT8_C(102), UINT8_C(220), UINT8_C(135), UINT8_C(141), UINT8_C(171) }, { UINT16_C( 207), UINT16_C( 168), UINT16_C( 121), UINT16_C( 94), UINT16_C( 194), UINT16_C( 90), UINT16_C( 6), UINT16_C( 126) } }, { { UINT8_C( 60), UINT8_C(161), UINT8_C(178), UINT8_C(127), UINT8_C(236), UINT8_C(146), UINT8_C(214), UINT8_C(123) }, { UINT8_C(134), UINT8_C(167), UINT8_C( 48), UINT8_C(151), UINT8_C( 58), UINT8_C(126), UINT8_C( 99), UINT8_C(105) }, { UINT16_C( 74), UINT16_C( 6), UINT16_C( 130), UINT16_C( 24), UINT16_C( 178), UINT16_C( 20), UINT16_C( 115), UINT16_C( 18) } }, { { UINT8_C(182), UINT8_C(107), UINT8_C(113), UINT8_C(208), UINT8_C( 76), UINT8_C( 4), UINT8_C(253), UINT8_C( 74) }, { UINT8_C(228), UINT8_C(126), UINT8_C(176), UINT8_C(192), UINT8_C( 6), UINT8_C( 61), UINT8_C(108), UINT8_C( 66) }, { UINT16_C( 46), UINT16_C( 19), UINT16_C( 63), UINT16_C( 16), UINT16_C( 70), UINT16_C( 57), UINT16_C( 145), UINT16_C( 8) } }, { { UINT8_C(223), UINT8_C( 30), UINT8_C(193), UINT8_C(203), UINT8_C(176), UINT8_C(151), UINT8_C( 71), UINT8_C( 54) }, { UINT8_C( 62), UINT8_C(119), UINT8_C(205), UINT8_C(121), UINT8_C(245), UINT8_C( 48), UINT8_C(226), UINT8_C(171) }, { UINT16_C( 161), UINT16_C( 89), UINT16_C( 12), UINT16_C( 82), UINT16_C( 69), UINT16_C( 103), UINT16_C( 155), UINT16_C( 117) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint16x8_t r = simde_vabdl_u8(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); simde_uint16x8_t r = simde_vabdl_u8(a, b); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vabdl_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t b[8]; uint32_t r[8]; } test_vec[] = { { { UINT16_C(23875), UINT16_C(19232), UINT16_C(54709), UINT16_C(16394) }, { UINT16_C(53548), UINT16_C(54723), UINT16_C( 9886), UINT16_C(53583) }, { UINT32_C( 29673), UINT32_C( 35491), UINT32_C( 44823), UINT32_C( 37189) } }, { { UINT16_C( 3348), UINT16_C(14900), UINT16_C(47184), UINT16_C(20429) }, { UINT16_C(29100), UINT16_C(28740), UINT16_C(20361), UINT16_C(52445) }, { UINT32_C( 25752), UINT32_C( 13840), UINT32_C( 26823), UINT32_C( 32016) } }, { { UINT16_C(64940), UINT16_C(24856), UINT16_C( 8914), UINT16_C(65185) }, { UINT16_C(25843), UINT16_C(37331), UINT16_C( 9099), UINT16_C(40803) }, { UINT32_C( 39097), UINT32_C( 12475), UINT32_C( 185), UINT32_C( 24382) } }, { { UINT16_C(38704), UINT16_C(32985), UINT16_C(42832), UINT16_C(64719) }, { UINT16_C( 5144), UINT16_C(41580), UINT16_C(18787), UINT16_C( 3950) }, { UINT32_C( 33560), UINT32_C( 8595), UINT32_C( 24045), UINT32_C( 60769) } }, { { UINT16_C(34374), UINT16_C( 6512), UINT16_C( 4520), UINT16_C(39703) }, { UINT16_C(60278), UINT16_C( 301), UINT16_C(36878), UINT16_C(16032) }, { UINT32_C( 25904), UINT32_C( 6211), UINT32_C( 32358), UINT32_C( 23671) } }, { { UINT16_C(31015), UINT16_C(30655), UINT16_C(36384), UINT16_C(14707) }, { UINT16_C(57506), UINT16_C( 1499), UINT16_C(18729), UINT16_C(28692) }, { UINT32_C( 26491), UINT32_C( 29156), UINT32_C( 17655), UINT32_C( 13985) } }, { { UINT16_C(34000), UINT16_C(30857), UINT16_C(41110), UINT16_C( 3092) }, { UINT16_C(16779), UINT16_C(39181), UINT16_C(44497), UINT16_C(63704) }, { UINT32_C( 17221), UINT32_C( 8324), UINT32_C( 3387), UINT32_C( 60612) } }, { { UINT16_C(38694), UINT16_C(18288), UINT16_C(58149), UINT16_C(51328) }, { UINT16_C(23491), UINT16_C(60877), UINT16_C(58020), UINT16_C(29789) }, { UINT32_C( 15203), UINT32_C( 42589), UINT32_C( 129), UINT32_C( 21539) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint32x4_t r = simde_vabdl_u16(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); simde_uint32x4_t r = simde_vabdl_u16(a, b); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vabdl_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[8]; uint32_t b[8]; uint64_t r[8]; } test_vec[] = { { { UINT32_C(4243449446), UINT32_C( 302514566) }, { UINT32_C( 329979202), UINT32_C(3909845954) }, { UINT64_C( 3913470244), UINT64_C( 3607331388) } }, { { UINT32_C(1076919066), UINT32_C( 570994783) }, { UINT32_C(2937050379), UINT32_C( 505703607) }, { UINT64_C( 1860131313), UINT64_C( 65291176) } }, { { UINT32_C(3642364242), UINT32_C(1424696082) }, { UINT32_C(4217869880), UINT32_C( 887386650) }, { UINT64_C( 575505638), UINT64_C( 537309432) } }, { { UINT32_C(1299453166), UINT32_C(3480190148) }, { UINT32_C( 159285074), UINT32_C(1042785003) }, { UINT64_C( 1140168092), UINT64_C( 2437405145) } }, { { UINT32_C(3306635955), UINT32_C(2635661925) }, { UINT32_C(2996338840), UINT32_C(3790044403) }, { UINT64_C( 310297115), UINT64_C( 1154382478) } }, { { UINT32_C(1412324240), UINT32_C( 706977240) }, { UINT32_C( 137601564), UINT32_C(4165360452) }, { UINT64_C( 1274722676), UINT64_C( 3458383212) } }, { { UINT32_C( 45964701), UINT32_C(4154447711) }, { UINT32_C(1252669527), UINT32_C(1160483252) }, { UINT64_C( 1206704826), UINT64_C( 2993964459) } }, { { UINT32_C(3298384364), UINT32_C( 334413303) }, { UINT32_C(2736464479), UINT32_C( 446390653) }, { UINT64_C( 561919885), UINT64_C( 111977350) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint64x2_t r = simde_vabdl_u32(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); simde_uint64x2_t r = simde_vabdl_u32(a, b); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vabdl_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vabdl_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vabdl_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vabdl_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vabdl_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vabdl_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/abs.c000066400000000000000000000632721400333146700162420ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN abs #include "test-neon.h" #include "../../../simde/arm/neon/abs.h" static int test_simde_vabsd_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a; int64_t r; } test_vec[] = { { INT64_C( 276336164582230812), INT64_C( 276336164582230812) }, { -INT64_C( 7462378710667308762), INT64_C( 7462378710667308762) }, { -INT64_C( 3808596764535413647), INT64_C( 3808596764535413647) }, { -INT64_C( 1744909785833839419), INT64_C( 1744909785833839419) }, { INT64_C( 3578898303845682272), INT64_C( 3578898303845682272) }, { -INT64_C( 6462880738838780002), INT64_C( 6462880738838780002) }, { INT64_C( 7176398727321188705), INT64_C( 7176398727321188705) }, { INT64_C( 1281167105828772884), INT64_C( 1281167105828772884) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int64_t r = simde_vabsd_s64(test_vec[i].a); simde_assert_equal_i64(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int64_t a = simde_test_codegen_random_i64(); int64_t r = simde_vabsd_s64(a); simde_test_codegen_write_i64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vabs_f32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { simde_float32 a[2]; simde_float32 r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( 240.57), SIMDE_FLOAT32_C( 605.25) }, { SIMDE_FLOAT32_C( 240.57), SIMDE_FLOAT32_C( 605.25) } }, { { SIMDE_FLOAT32_C( -896.34), SIMDE_FLOAT32_C( -983.98) }, { SIMDE_FLOAT32_C( 896.34), SIMDE_FLOAT32_C( 983.98) } }, { { SIMDE_FLOAT32_C( 435.53), SIMDE_FLOAT32_C( -284.04) }, { SIMDE_FLOAT32_C( 435.53), SIMDE_FLOAT32_C( 284.04) } }, { { SIMDE_FLOAT32_C( 659.95), SIMDE_FLOAT32_C( -108.19) }, { SIMDE_FLOAT32_C( 659.95), SIMDE_FLOAT32_C( 108.19) } }, { { SIMDE_FLOAT32_C( -501.15), SIMDE_FLOAT32_C( 711.09) }, { SIMDE_FLOAT32_C( 501.15), SIMDE_FLOAT32_C( 711.09) } }, { { SIMDE_FLOAT32_C( 552.72), SIMDE_FLOAT32_C( -497.32) }, { SIMDE_FLOAT32_C( 552.72), SIMDE_FLOAT32_C( 497.32) } }, { { SIMDE_FLOAT32_C( 568.44), SIMDE_FLOAT32_C( -896.02) }, { SIMDE_FLOAT32_C( 568.44), SIMDE_FLOAT32_C( 896.02) } }, { { SIMDE_FLOAT32_C( 997.23), SIMDE_FLOAT32_C( -515.53) }, { SIMDE_FLOAT32_C( 997.23), SIMDE_FLOAT32_C( 515.53) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t r = simde_vabs_f32(a); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vabs_f64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { simde_float64 a[1]; simde_float64 r[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( 526.66) }, { SIMDE_FLOAT64_C( 526.66) } }, { { SIMDE_FLOAT64_C( 52.02) }, { SIMDE_FLOAT64_C( 52.02) } }, { { SIMDE_FLOAT64_C( 169.50) }, { SIMDE_FLOAT64_C( 169.50) } }, { { SIMDE_FLOAT64_C( 466.20) }, { SIMDE_FLOAT64_C( 466.20) } }, { { SIMDE_FLOAT64_C( -862.24) }, { SIMDE_FLOAT64_C( 862.24) } }, { { SIMDE_FLOAT64_C( 411.84) }, { SIMDE_FLOAT64_C( 411.84) } }, { { SIMDE_FLOAT64_C( -921.62) }, { SIMDE_FLOAT64_C( 921.62) } }, { { SIMDE_FLOAT64_C( -55.03) }, { SIMDE_FLOAT64_C( 55.03) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_t r = simde_vabs_f64(a); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; } static int test_simde_vabs_s8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { int8_t a[8]; int8_t r[8]; } test_vec[] = { { { INT8_MIN, -INT8_C( 18), INT8_C( 117), -INT8_C( 22), INT8_C( 13), -INT8_C( 62), -INT8_C( 94), INT8_C( 57) }, { INT8_MIN, INT8_C( 18), INT8_C( 117), INT8_C( 22), INT8_C( 13), INT8_C( 62), INT8_C( 94), INT8_C( 57) } }, { { INT8_C( 32), INT8_C( 12), INT8_MIN, INT8_C( 3), -INT8_C( 50), INT8_C( 38), -INT8_C( 120), INT8_C( 34) }, { INT8_C( 32), INT8_C( 12), INT8_MIN, INT8_C( 3), INT8_C( 50), INT8_C( 38), INT8_C( 120), INT8_C( 34) } }, { { INT8_C( 7), INT8_C( 100), -INT8_C( 64), -INT8_C( 52), -INT8_C( 66), -INT8_C( 82), -INT8_C( 16), INT8_C( 44) }, { INT8_C( 7), INT8_C( 100), INT8_C( 64), INT8_C( 52), INT8_C( 66), INT8_C( 82), INT8_C( 16), INT8_C( 44) } }, { { INT8_C( 62), -INT8_C( 64), INT8_C( 55), INT8_C( 87), -INT8_C( 99), INT8_C( 82), -INT8_C( 13), -INT8_C( 62) }, { INT8_C( 62), INT8_C( 64), INT8_C( 55), INT8_C( 87), INT8_C( 99), INT8_C( 82), INT8_C( 13), INT8_C( 62) } }, { { INT8_C( 64), INT8_C( 105), -INT8_C( 84), INT8_C( 77), INT8_C( 43), INT8_C( 78), -INT8_C( 121), INT8_C( 75) }, { INT8_C( 64), INT8_C( 105), INT8_C( 84), INT8_C( 77), INT8_C( 43), INT8_C( 78), INT8_C( 121), INT8_C( 75) } }, { { INT8_C( 90), INT8_C( 7), INT8_C( 78), INT8_C( 40), INT8_C( 45), -INT8_C( 41), INT8_C( 75), INT8_C( 52) }, { INT8_C( 90), INT8_C( 7), INT8_C( 78), INT8_C( 40), INT8_C( 45), INT8_C( 41), INT8_C( 75), INT8_C( 52) } }, { { INT8_C( 59), INT8_C( 11), INT8_C( 0), -INT8_C( 7), -INT8_C( 71), -INT8_C( 15), INT8_C( 38), -INT8_C( 8) }, { INT8_C( 59), INT8_C( 11), INT8_C( 0), INT8_C( 7), INT8_C( 71), INT8_C( 15), INT8_C( 38), INT8_C( 8) } }, { { -INT8_C( 79), INT8_C( 93), INT8_C( 79), INT8_C( 78), -INT8_C( 81), INT8_C( 67), INT8_C( 16), -INT8_C( 16) }, { INT8_C( 79), INT8_C( 93), INT8_C( 79), INT8_C( 78), INT8_C( 81), INT8_C( 67), INT8_C( 16), INT8_C( 16) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t r = simde_vabs_s8(a); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; } static int test_simde_vabs_s16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { int16_t a[4]; int16_t r[4]; } test_vec[] = { { { INT16_MIN, -INT16_C( 26113), -INT16_C( 28950), -INT16_C( 11718) }, { INT16_MIN, INT16_C( 26113), INT16_C( 28950), INT16_C( 11718) } }, { { INT16_C( 26788), -INT16_C( 29463), -INT16_C( 17823), -INT16_C( 31905) }, { INT16_C( 26788), INT16_C( 29463), INT16_C( 17823), INT16_C( 31905) } }, { { -INT16_C( 14514), INT16_C( 29487), INT16_C( 22021), -INT16_C( 9957) }, { INT16_C( 14514), INT16_C( 29487), INT16_C( 22021), INT16_C( 9957) } }, { { -INT16_C( 19800), INT16_C( 18730), -INT16_C( 30130), -INT16_C( 9884) }, { INT16_C( 19800), INT16_C( 18730), INT16_C( 30130), INT16_C( 9884) } }, { { INT16_C( 25580), -INT16_C( 10638), -INT16_C( 21263), -INT16_C( 26968) }, { INT16_C( 25580), INT16_C( 10638), INT16_C( 21263), INT16_C( 26968) } }, { { -INT16_C( 28395), INT16_C( 30242), -INT16_C( 32437), -INT16_C( 26118) }, { INT16_C( 28395), INT16_C( 30242), INT16_C( 32437), INT16_C( 26118) } }, { { INT16_C( 10569), INT16_C( 19980), INT16_C( 10368), INT16_C( 10279) }, { INT16_C( 10569), INT16_C( 19980), INT16_C( 10368), INT16_C( 10279) } }, { { INT16_C( 20954), INT16_C( 10354), -INT16_C( 10533), -INT16_C( 14591) }, { INT16_C( 20954), INT16_C( 10354), INT16_C( 10533), INT16_C( 14591) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t r = simde_vabs_s16(a); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; } static int test_simde_vabs_s32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { int32_t a[2]; int32_t r[2]; } test_vec[] = { { { INT32_MIN, INT32_C( 958986776) }, { INT32_MIN, INT32_C( 958986776) } }, { { INT32_C( 1449993369), -INT32_C( 502149650) }, { INT32_C( 1449993369), INT32_C( 502149650) } }, { { -INT32_C( 149790075), INT32_C( 349615921) }, { INT32_C( 149790075), INT32_C( 349615921) } }, { { INT32_C( 872241755), -INT32_C( 866869240) }, { INT32_C( 872241755), INT32_C( 866869240) } }, { { INT32_C( 1757820240), -INT32_C( 928846289) }, { INT32_C( 1757820240), INT32_C( 928846289) } }, { { INT32_C( 18812691), INT32_C( 1642344668) }, { INT32_C( 18812691), INT32_C( 1642344668) } }, { { -INT32_C( 1017579886), INT32_C( 148385709) }, { INT32_C( 1017579886), INT32_C( 148385709) } }, { { -INT32_C( 1841506935), -INT32_C( 983658379) }, { INT32_C( 1841506935), INT32_C( 983658379) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t r = simde_vabs_s32(a); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; } static int test_simde_vabs_s64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { int64_t a[1]; int64_t r[1]; } test_vec[] = { { { INT64_MIN }, { INT64_MIN } }, { { -INT64_C( 9123335531813202818) }, { INT64_C( 9123335531813202818) } }, { { -INT64_C( 969261482036724268) }, { INT64_C( 969261482036724268) } }, { { -INT64_C( 1747836245580099480) }, { INT64_C( 1747836245580099480) } }, { { -INT64_C( 3930266661464934217) }, { INT64_C( 3930266661464934217) } }, { { INT64_C( 4765662101956266231) }, { INT64_C( 4765662101956266231) } }, { { -INT64_C( 4374945379510722939) }, { INT64_C( 4374945379510722939) } }, { { INT64_C( 1548238122455985763) }, { INT64_C( 1548238122455985763) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t r = simde_vabs_s64(a); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; } static int test_simde_vabsq_f32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { simde_float32 a[4]; simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 462.30), SIMDE_FLOAT32_C( 260.51), SIMDE_FLOAT32_C( -298.75), SIMDE_FLOAT32_C( -90.05) }, { SIMDE_FLOAT32_C( 462.30), SIMDE_FLOAT32_C( 260.51), SIMDE_FLOAT32_C( 298.75), SIMDE_FLOAT32_C( 90.05) } }, { { SIMDE_FLOAT32_C( 349.12), SIMDE_FLOAT32_C( 972.04), SIMDE_FLOAT32_C( 3.43), SIMDE_FLOAT32_C( -624.53) }, { SIMDE_FLOAT32_C( 349.12), SIMDE_FLOAT32_C( 972.04), SIMDE_FLOAT32_C( 3.43), SIMDE_FLOAT32_C( 624.53) } }, { { SIMDE_FLOAT32_C( 653.64), SIMDE_FLOAT32_C( 788.52), SIMDE_FLOAT32_C( 687.80), SIMDE_FLOAT32_C( 756.52) }, { SIMDE_FLOAT32_C( 653.64), SIMDE_FLOAT32_C( 788.52), SIMDE_FLOAT32_C( 687.80), SIMDE_FLOAT32_C( 756.52) } }, { { SIMDE_FLOAT32_C( 163.46), SIMDE_FLOAT32_C( -327.92), SIMDE_FLOAT32_C( 337.82), SIMDE_FLOAT32_C( 394.49) }, { SIMDE_FLOAT32_C( 163.46), SIMDE_FLOAT32_C( 327.92), SIMDE_FLOAT32_C( 337.82), SIMDE_FLOAT32_C( 394.49) } }, { { SIMDE_FLOAT32_C( 977.72), SIMDE_FLOAT32_C( 437.45), SIMDE_FLOAT32_C( 808.81), SIMDE_FLOAT32_C( 750.24) }, { SIMDE_FLOAT32_C( 977.72), SIMDE_FLOAT32_C( 437.45), SIMDE_FLOAT32_C( 808.81), SIMDE_FLOAT32_C( 750.24) } }, { { SIMDE_FLOAT32_C( 458.56), SIMDE_FLOAT32_C( -377.89), SIMDE_FLOAT32_C( 748.70), SIMDE_FLOAT32_C( -697.44) }, { SIMDE_FLOAT32_C( 458.56), SIMDE_FLOAT32_C( 377.89), SIMDE_FLOAT32_C( 748.70), SIMDE_FLOAT32_C( 697.44) } }, { { SIMDE_FLOAT32_C( -708.39), SIMDE_FLOAT32_C( 955.88), SIMDE_FLOAT32_C( 31.34), SIMDE_FLOAT32_C( -575.79) }, { SIMDE_FLOAT32_C( 708.39), SIMDE_FLOAT32_C( 955.88), SIMDE_FLOAT32_C( 31.34), SIMDE_FLOAT32_C( 575.79) } }, { { SIMDE_FLOAT32_C( -270.33), SIMDE_FLOAT32_C( 422.59), SIMDE_FLOAT32_C( 310.75), SIMDE_FLOAT32_C( -808.03) }, { SIMDE_FLOAT32_C( 270.33), SIMDE_FLOAT32_C( 422.59), SIMDE_FLOAT32_C( 310.75), SIMDE_FLOAT32_C( 808.03) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t r = simde_vabsq_f32(a); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vabsq_f64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { simde_float64 a[2]; simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 142.01), SIMDE_FLOAT64_C( 892.18) }, { SIMDE_FLOAT64_C( 142.01), SIMDE_FLOAT64_C( 892.18) } }, { { SIMDE_FLOAT64_C( 77.64), SIMDE_FLOAT64_C( 556.98) }, { SIMDE_FLOAT64_C( 77.64), SIMDE_FLOAT64_C( 556.98) } }, { { SIMDE_FLOAT64_C( 117.55), SIMDE_FLOAT64_C( 172.99) }, { SIMDE_FLOAT64_C( 117.55), SIMDE_FLOAT64_C( 172.99) } }, { { SIMDE_FLOAT64_C( -701.87), SIMDE_FLOAT64_C( -246.57) }, { SIMDE_FLOAT64_C( 701.87), SIMDE_FLOAT64_C( 246.57) } }, { { SIMDE_FLOAT64_C( -27.40), SIMDE_FLOAT64_C( 39.73) }, { SIMDE_FLOAT64_C( 27.40), SIMDE_FLOAT64_C( 39.73) } }, { { SIMDE_FLOAT64_C( -558.82), SIMDE_FLOAT64_C( -689.88) }, { SIMDE_FLOAT64_C( 558.82), SIMDE_FLOAT64_C( 689.88) } }, { { SIMDE_FLOAT64_C( -566.76), SIMDE_FLOAT64_C( -58.12) }, { SIMDE_FLOAT64_C( 566.76), SIMDE_FLOAT64_C( 58.12) } }, { { SIMDE_FLOAT64_C( -718.46), SIMDE_FLOAT64_C( 966.37) }, { SIMDE_FLOAT64_C( 718.46), SIMDE_FLOAT64_C( 966.37) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t r = simde_vabsq_f64(a); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; } static int test_simde_vabsq_s8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { int8_t a[16]; int8_t r[16]; } test_vec[] = { { { INT8_MIN, -INT8_C( 34), -INT8_C( 36), INT8_C( 16), INT8_C( 115), INT8_C( 7), -INT8_C( 82), INT8_C( 67), -INT8_C( 76), INT8_C( 28), INT8_C( 123), -INT8_C( 110), -INT8_C( 10), -INT8_C( 106), INT8_C( 1), -INT8_C( 51) }, { INT8_MIN, INT8_C( 34), INT8_C( 36), INT8_C( 16), INT8_C( 115), INT8_C( 7), INT8_C( 82), INT8_C( 67), INT8_C( 76), INT8_C( 28), INT8_C( 123), INT8_C( 110), INT8_C( 10), INT8_C( 106), INT8_C( 1), INT8_C( 51) } }, { { -INT8_C( 101), -INT8_C( 9), INT8_C( 83), INT8_C( 73), INT8_C( 58), INT8_C( 30), INT8_C( 16), INT8_C( 111), INT8_C( 33), INT8_C( 15), INT8_C( 47), -INT8_C( 5), INT8_C( 74), INT8_C( 49), -INT8_C( 64), -INT8_C( 105) }, { INT8_C( 101), INT8_C( 9), INT8_C( 83), INT8_C( 73), INT8_C( 58), INT8_C( 30), INT8_C( 16), INT8_C( 111), INT8_C( 33), INT8_C( 15), INT8_C( 47), INT8_C( 5), INT8_C( 74), INT8_C( 49), INT8_C( 64), INT8_C( 105) } }, { { INT8_C( 16), -INT8_C( 100), -INT8_C( 89), -INT8_C( 125), -INT8_C( 93), INT8_C( 85), -INT8_C( 57), INT8_C( 88), INT8_C( 114), INT8_C( 66), -INT8_C( 22), INT8_C( 104), -INT8_C( 40), -INT8_C( 21), INT8_C( 53), INT8_C( 116) }, { INT8_C( 16), INT8_C( 100), INT8_C( 89), INT8_C( 125), INT8_C( 93), INT8_C( 85), INT8_C( 57), INT8_C( 88), INT8_C( 114), INT8_C( 66), INT8_C( 22), INT8_C( 104), INT8_C( 40), INT8_C( 21), INT8_C( 53), INT8_C( 116) } }, { { -INT8_C( 30), -INT8_C( 120), -INT8_C( 67), INT8_C( 29), -INT8_C( 90), -INT8_C( 51), -INT8_C( 116), -INT8_C( 57), -INT8_C( 36), -INT8_C( 68), -INT8_C( 62), INT8_C( 38), -INT8_C( 19), -INT8_C( 126), -INT8_C( 67), -INT8_C( 3) }, { INT8_C( 30), INT8_C( 120), INT8_C( 67), INT8_C( 29), INT8_C( 90), INT8_C( 51), INT8_C( 116), INT8_C( 57), INT8_C( 36), INT8_C( 68), INT8_C( 62), INT8_C( 38), INT8_C( 19), INT8_C( 126), INT8_C( 67), INT8_C( 3) } }, { { INT8_C( 30), INT8_C( 100), -INT8_C( 127), -INT8_C( 62), -INT8_C( 71), INT8_C( 72), INT8_C( 26), INT8_C( 43), -INT8_C( 118), INT8_C( 4), -INT8_C( 109), INT8_C( 98), -INT8_C( 17), -INT8_C( 56), -INT8_C( 42), -INT8_C( 46) }, { INT8_C( 30), INT8_C( 100), INT8_MAX, INT8_C( 62), INT8_C( 71), INT8_C( 72), INT8_C( 26), INT8_C( 43), INT8_C( 118), INT8_C( 4), INT8_C( 109), INT8_C( 98), INT8_C( 17), INT8_C( 56), INT8_C( 42), INT8_C( 46) } }, { { INT8_C( 80), -INT8_C( 109), -INT8_C( 17), -INT8_C( 9), INT8_C( 96), INT8_C( 123), -INT8_C( 66), INT8_C( 60), INT8_C( 55), -INT8_C( 127), INT8_C( 98), INT8_C( 37), INT8_C( 3), INT8_C( 31), INT8_C( 34), INT8_C( 34) }, { INT8_C( 80), INT8_C( 109), INT8_C( 17), INT8_C( 9), INT8_C( 96), INT8_C( 123), INT8_C( 66), INT8_C( 60), INT8_C( 55), INT8_MAX, INT8_C( 98), INT8_C( 37), INT8_C( 3), INT8_C( 31), INT8_C( 34), INT8_C( 34) } }, { { -INT8_C( 125), -INT8_C( 93), -INT8_C( 28), INT8_C( 61), -INT8_C( 21), -INT8_C( 2), INT8_C( 104), INT8_C( 117), INT8_C( 2), -INT8_C( 4), -INT8_C( 40), -INT8_C( 15), -INT8_C( 60), -INT8_C( 82), -INT8_C( 61), INT8_C( 21) }, { INT8_C( 125), INT8_C( 93), INT8_C( 28), INT8_C( 61), INT8_C( 21), INT8_C( 2), INT8_C( 104), INT8_C( 117), INT8_C( 2), INT8_C( 4), INT8_C( 40), INT8_C( 15), INT8_C( 60), INT8_C( 82), INT8_C( 61), INT8_C( 21) } }, { { INT8_C( 66), -INT8_C( 78), INT8_C( 12), -INT8_C( 94), INT8_C( 46), -INT8_C( 54), -INT8_C( 33), INT8_C( 101), INT8_C( 75), INT8_C( 65), -INT8_C( 118), INT8_C( 79), INT8_C( 97), -INT8_C( 83), INT8_C( 113), -INT8_C( 28) }, { INT8_C( 66), INT8_C( 78), INT8_C( 12), INT8_C( 94), INT8_C( 46), INT8_C( 54), INT8_C( 33), INT8_C( 101), INT8_C( 75), INT8_C( 65), INT8_C( 118), INT8_C( 79), INT8_C( 97), INT8_C( 83), INT8_C( 113), INT8_C( 28) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t r = simde_vabsq_s8(a); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; } static int test_simde_vabsq_s16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { int16_t a[8]; int16_t r[8]; } test_vec[] = { { { INT16_MIN, INT16_C( 8680), -INT16_C( 9431), -INT16_C( 30617), -INT16_C( 25759), INT16_C( 691), INT16_C( 1725), INT16_C( 11273) }, { INT16_MIN, INT16_C( 8680), INT16_C( 9431), INT16_C( 30617), INT16_C( 25759), INT16_C( 691), INT16_C( 1725), INT16_C( 11273) } }, { { -INT16_C( 5989), INT16_C( 4088), INT16_C( 16904), -INT16_C( 13230), INT16_C( 21746), INT16_C( 20255), INT16_C( 15195), -INT16_C( 19725) }, { INT16_C( 5989), INT16_C( 4088), INT16_C( 16904), INT16_C( 13230), INT16_C( 21746), INT16_C( 20255), INT16_C( 15195), INT16_C( 19725) } }, { { -INT16_C( 9465), INT16_C( 12756), INT16_C( 15286), INT16_C( 6329), INT16_C( 27863), -INT16_C( 27622), INT16_C( 9331), INT16_C( 3776) }, { INT16_C( 9465), INT16_C( 12756), INT16_C( 15286), INT16_C( 6329), INT16_C( 27863), INT16_C( 27622), INT16_C( 9331), INT16_C( 3776) } }, { { -INT16_C( 18420), INT16_C( 5149), INT16_C( 28922), -INT16_C( 4895), INT16_C( 196), INT16_C( 8252), INT16_C( 12091), INT16_C( 17106) }, { INT16_C( 18420), INT16_C( 5149), INT16_C( 28922), INT16_C( 4895), INT16_C( 196), INT16_C( 8252), INT16_C( 12091), INT16_C( 17106) } }, { { -INT16_C( 23029), -INT16_C( 16013), INT16_C( 11746), -INT16_C( 17959), -INT16_C( 2919), INT16_C( 3149), INT16_C( 3352), INT16_C( 9242) }, { INT16_C( 23029), INT16_C( 16013), INT16_C( 11746), INT16_C( 17959), INT16_C( 2919), INT16_C( 3149), INT16_C( 3352), INT16_C( 9242) } }, { { INT16_C( 14533), -INT16_C( 16583), INT16_C( 6824), INT16_C( 27819), -INT16_C( 6374), INT16_C( 21900), INT16_C( 24343), INT16_C( 8855) }, { INT16_C( 14533), INT16_C( 16583), INT16_C( 6824), INT16_C( 27819), INT16_C( 6374), INT16_C( 21900), INT16_C( 24343), INT16_C( 8855) } }, { { INT16_C( 2821), -INT16_C( 6173), -INT16_C( 17096), -INT16_C( 11872), -INT16_C( 4687), -INT16_C( 13858), -INT16_C( 1798), -INT16_C( 16403) }, { INT16_C( 2821), INT16_C( 6173), INT16_C( 17096), INT16_C( 11872), INT16_C( 4687), INT16_C( 13858), INT16_C( 1798), INT16_C( 16403) } }, { { INT16_C( 9776), -INT16_C( 10114), INT16_C( 10816), INT16_C( 23109), -INT16_C( 12015), INT16_C( 10415), INT16_C( 18224), INT16_C( 13898) }, { INT16_C( 9776), INT16_C( 10114), INT16_C( 10816), INT16_C( 23109), INT16_C( 12015), INT16_C( 10415), INT16_C( 18224), INT16_C( 13898) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t r = simde_vabsq_s16(a); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; } static int test_simde_vabsq_s32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { int32_t a[4]; int32_t r[4]; } test_vec[] = { { { INT32_MIN, INT32_C( 2027380581), INT32_C( 1316751660), INT32_C( 2019026169) }, { INT32_MIN, INT32_C( 2027380581), INT32_C( 1316751660), INT32_C( 2019026169) } }, { { -INT32_C( 919386617), -INT32_C( 297545572), INT32_C( 1283066671), INT32_C( 1868282067) }, { INT32_C( 919386617), INT32_C( 297545572), INT32_C( 1283066671), INT32_C( 1868282067) } }, { { -INT32_C( 1989187036), -INT32_C( 167688246), INT32_C( 1229225296), INT32_C( 1757518689) }, { INT32_C( 1989187036), INT32_C( 167688246), INT32_C( 1229225296), INT32_C( 1757518689) } }, { { INT32_C( 2100425953), -INT32_C( 194218555), INT32_C( 1396762240), -INT32_C( 1027433314) }, { INT32_C( 2100425953), INT32_C( 194218555), INT32_C( 1396762240), INT32_C( 1027433314) } }, { { -INT32_C( 800378362), -INT32_C( 909751175), INT32_C( 705825481), -INT32_C( 2020354907) }, { INT32_C( 800378362), INT32_C( 909751175), INT32_C( 705825481), INT32_C( 2020354907) } }, { { -INT32_C( 1929067320), -INT32_C( 1165922247), -INT32_C( 183647657), INT32_C( 1689833566) }, { INT32_C( 1929067320), INT32_C( 1165922247), INT32_C( 183647657), INT32_C( 1689833566) } }, { { INT32_C( 2067071746), INT32_C( 423951184), -INT32_C( 1421584890), -INT32_C( 214771926) }, { INT32_C( 2067071746), INT32_C( 423951184), INT32_C( 1421584890), INT32_C( 214771926) } }, { { -INT32_C( 713017445), -INT32_C( 24182105), INT32_C( 586456260), INT32_C( 1854319724) }, { INT32_C( 713017445), INT32_C( 24182105), INT32_C( 586456260), INT32_C( 1854319724) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t r = simde_vabsq_s32(a); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; } static int test_simde_vabsq_s64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { int64_t a[2]; int64_t r[2]; } test_vec[] = { { { INT64_MIN, -INT64_C( 2815808507348841371) }, { INT64_MIN, INT64_C( 2815808507348841371) } }, { { -INT64_C( 6379743846319689735), INT64_C( 6714660166886377872) }, { INT64_C( 6379743846319689735), INT64_C( 6714660166886377872) } }, { { INT64_C( 6202666085961610503), INT64_C( 4486453037390956375) }, { INT64_C( 6202666085961610503), INT64_C( 4486453037390956375) } }, { { -INT64_C( 8218894509259856764), -INT64_C( 784383972760114201) }, { INT64_C( 8218894509259856764), INT64_C( 784383972760114201) } }, { { -INT64_C( 2789077807162968316), INT64_C( 990379092211954748) }, { INT64_C( 2789077807162968316), INT64_C( 990379092211954748) } }, { { INT64_C( 7887851912833787884), -INT64_C( 1597006666687211790) }, { INT64_C( 7887851912833787884), INT64_C( 1597006666687211790) } }, { { INT64_C( 7412125822168790657), -INT64_C( 7623567593992412515) }, { INT64_C( 7412125822168790657), INT64_C( 7623567593992412515) } }, { { -INT64_C( 4287435838607816052), INT64_C( 8600604211809725581) }, { INT64_C( 4287435838607816052), INT64_C( 8600604211809725581) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t r = simde_vabsq_s64(a); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vabsd_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vabs_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vabs_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vabs_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vabs_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vabs_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vabs_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vabsq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vabsq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vabsq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vabsq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vabsq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vabsq_s64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/add.c000066400000000000000000001743201400333146700162220ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN add #include "test-neon.h" #include "../../../simde/arm/neon/add.h" static int test_simde_vaddd_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a; int64_t b; int64_t r; } test_vec[] = { { -INT64_C( 5376176558586261779), -INT64_C( 8975014256303024679), INT64_C( 4095553258820265158) }, { INT64_C( 4193233660194686354), INT64_C( 8392010986138408549), -INT64_C( 5861499427376456713) }, { -INT64_C( 498686658264423049), -INT64_C( 235223563850804956), -INT64_C( 733910222115228005) }, { -INT64_C( 7287872900825752874), -INT64_C( 5631607047233613604), INT64_C( 5527264125650185138) }, { INT64_C( 4017813484947424424), -INT64_C( 3919207051327069898), INT64_C( 98606433620354526) }, { -INT64_C( 3110218261649903353), INT64_C( 1009950248106779417), -INT64_C( 2100268013543123936) }, { INT64_C( 8007005661493138900), -INT64_C( 4253741394523353286), INT64_C( 3753264266969785614) }, { -INT64_C( 4311396562388294819), INT64_C( 5057573658044097211), INT64_C( 746177095655802392) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int64_t r = simde_vaddd_s64(test_vec[i].a, test_vec[i].b); simde_assert_equal_i64(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int64_t a = simde_test_codegen_random_i64(); int64_t b = simde_test_codegen_random_i64(); int64_t r = simde_vaddd_s64(a, b); simde_test_codegen_write_i64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddd_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a; uint64_t b; uint64_t r; } test_vec[] = { { UINT64_C(15770000197539399473), UINT64_C(17536172067039799771), UINT64_C(14859428190869647628) }, { UINT64_C(17353057886393621858), UINT64_C( 3930588065693821396), UINT64_C( 2836901878377891638) }, { UINT64_C(16195923950262401352), UINT64_C( 3237477675399957348), UINT64_C( 986657551952807084) }, { UINT64_C( 8959642801591104818), UINT64_C(17451817626807527669), UINT64_C( 7964716354689080871) }, { UINT64_C(12821359242969338377), UINT64_C( 7809127654626125417), UINT64_C( 2183742823885912178) }, { UINT64_C(11092975718023534798), UINT64_C( 1022251167223362963), UINT64_C(12115226885246897761) }, { UINT64_C( 1266930590655711894), UINT64_C(12040272284058140784), UINT64_C(13307202874713852678) }, { UINT64_C(12139750857182669372), UINT64_C( 7482753672160808133), UINT64_C( 1175760455633925889) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint64_t r = simde_vaddd_u64(test_vec[i].a, test_vec[i].b); simde_assert_equal_u64(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint64_t a = simde_test_codegen_random_u64(); uint64_t b = simde_test_codegen_random_u64(); uint64_t r = simde_vaddd_u64(a, b); simde_test_codegen_write_u64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_u64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vadd_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[2]; simde_float32 b[2]; simde_float32 r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( -645.65), SIMDE_FLOAT32_C( 153.94) }, { SIMDE_FLOAT32_C( 112.49), SIMDE_FLOAT32_C( -617.76) }, { SIMDE_FLOAT32_C( -533.16), SIMDE_FLOAT32_C( -463.81) } }, { { SIMDE_FLOAT32_C( -544.46), SIMDE_FLOAT32_C( -10.20) }, { SIMDE_FLOAT32_C( -53.36), SIMDE_FLOAT32_C( -475.60) }, { SIMDE_FLOAT32_C( -597.83), SIMDE_FLOAT32_C( -485.80) } }, { { SIMDE_FLOAT32_C( 567.70), SIMDE_FLOAT32_C( 173.84) }, { SIMDE_FLOAT32_C( 812.81), SIMDE_FLOAT32_C( 166.74) }, { SIMDE_FLOAT32_C( 1380.51), SIMDE_FLOAT32_C( 340.59) } }, { { SIMDE_FLOAT32_C( -854.69), SIMDE_FLOAT32_C( -924.50) }, { SIMDE_FLOAT32_C( -17.33), SIMDE_FLOAT32_C( -405.57) }, { SIMDE_FLOAT32_C( -872.02), SIMDE_FLOAT32_C( -1330.07) } }, { { SIMDE_FLOAT32_C( 507.20), SIMDE_FLOAT32_C( -886.90) }, { SIMDE_FLOAT32_C( 991.64), SIMDE_FLOAT32_C( 755.28) }, { SIMDE_FLOAT32_C( 1498.84), SIMDE_FLOAT32_C( -131.62) } }, { { SIMDE_FLOAT32_C( -419.88), SIMDE_FLOAT32_C( 108.98) }, { SIMDE_FLOAT32_C( 853.76), SIMDE_FLOAT32_C( 233.89) }, { SIMDE_FLOAT32_C( 433.88), SIMDE_FLOAT32_C( 342.87) } }, { { SIMDE_FLOAT32_C( -196.90), SIMDE_FLOAT32_C( 244.58) }, { SIMDE_FLOAT32_C( 298.75), SIMDE_FLOAT32_C( 867.12) }, { SIMDE_FLOAT32_C( 101.85), SIMDE_FLOAT32_C( 1111.70) } }, { { SIMDE_FLOAT32_C( 32.51), SIMDE_FLOAT32_C( -749.49) }, { SIMDE_FLOAT32_C( -783.09), SIMDE_FLOAT32_C( 386.86) }, { SIMDE_FLOAT32_C( -750.58), SIMDE_FLOAT32_C( -362.63) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a, b, r; a = simde_vld1_f32(test_vec[i].a); b = simde_vld1_f32(test_vec[i].b); r = simde_vadd_f32(a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vadd_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[1]; simde_float64 b[1]; simde_float64 r[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( 404.45) }, { SIMDE_FLOAT64_C( 329.40) }, { SIMDE_FLOAT64_C( 733.85) } }, { { SIMDE_FLOAT64_C( 769.10) }, { SIMDE_FLOAT64_C( 859.99) }, { SIMDE_FLOAT64_C( 1629.09) } }, { { SIMDE_FLOAT64_C( -680.80) }, { SIMDE_FLOAT64_C( -284.26) }, { SIMDE_FLOAT64_C( -965.06) } }, { { SIMDE_FLOAT64_C( -615.61) }, { SIMDE_FLOAT64_C( 886.90) }, { SIMDE_FLOAT64_C( 271.29) } }, { { SIMDE_FLOAT64_C( 889.58) }, { SIMDE_FLOAT64_C( -802.80) }, { SIMDE_FLOAT64_C( 86.78) } }, { { SIMDE_FLOAT64_C( 53.64) }, { SIMDE_FLOAT64_C( -965.11) }, { SIMDE_FLOAT64_C( -911.46) } }, { { SIMDE_FLOAT64_C( -727.30) }, { SIMDE_FLOAT64_C( -963.68) }, { SIMDE_FLOAT64_C( -1690.98) } }, { { SIMDE_FLOAT64_C( -370.68) }, { SIMDE_FLOAT64_C( 779.90) }, { SIMDE_FLOAT64_C( 409.22) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_t b = simde_vld1_f64(test_vec[i].b); simde_float64x1_t r = simde_vadd_f64(a, b); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; } static int test_simde_vadd_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int8_t b[8]; int8_t r[8]; } test_vec[] = { { { -INT8_C( 51), INT8_C( 8), -INT8_C( 16), -INT8_C( 79), -INT8_C( 121), -INT8_C( 101), -INT8_C( 47), INT8_C( 115) }, { -INT8_C( 103), INT8_C( 43), -INT8_C( 76), -INT8_C( 98), -INT8_C( 75), INT8_C( 79), -INT8_C( 61), INT8_C( 120) }, { INT8_C( 102), INT8_C( 51), -INT8_C( 92), INT8_C( 79), INT8_C( 60), -INT8_C( 22), -INT8_C( 108), -INT8_C( 21) } }, { { INT8_C( 28), -INT8_C( 124), -INT8_C( 17), -INT8_C( 117), -INT8_C( 124), -INT8_C( 22), INT8_C( 125), INT8_C( 109) }, { INT8_C( 14), -INT8_C( 125), INT8_C( 88), INT8_C( 115), -INT8_C( 23), INT8_C( 119), -INT8_C( 31), -INT8_C( 73) }, { INT8_C( 42), INT8_C( 7), INT8_C( 71), -INT8_C( 2), INT8_C( 109), INT8_C( 97), INT8_C( 94), INT8_C( 36) } }, { { INT8_MAX, -INT8_C( 47), INT8_C( 104), INT8_C( 6), INT8_C( 109), INT8_C( 57), INT8_C( 121), INT8_C( 6) }, { INT8_C( 100), INT8_C( 45), -INT8_C( 92), INT8_C( 25), INT8_C( 125), INT8_C( 104), -INT8_C( 111), -INT8_C( 103) }, { -INT8_C( 29), -INT8_C( 2), INT8_C( 12), INT8_C( 31), -INT8_C( 22), -INT8_C( 95), INT8_C( 10), -INT8_C( 97) } }, { { -INT8_C( 20), INT8_MIN, INT8_C( 37), INT8_C( 112), INT8_C( 106), -INT8_C( 94), -INT8_C( 35), INT8_C( 120) }, { INT8_C( 37), INT8_C( 54), -INT8_C( 20), INT8_C( 14), -INT8_C( 83), -INT8_C( 51), -INT8_C( 59), INT8_C( 44) }, { INT8_C( 17), -INT8_C( 74), INT8_C( 17), INT8_C( 126), INT8_C( 23), INT8_C( 111), -INT8_C( 94), -INT8_C( 92) } }, { { -INT8_C( 98), INT8_C( 45), INT8_C( 51), INT8_C( 11), INT8_C( 102), -INT8_C( 84), INT8_C( 17), -INT8_C( 53) }, { -INT8_C( 38), -INT8_C( 74), -INT8_C( 28), INT8_C( 87), INT8_C( 30), INT8_C( 118), -INT8_C( 16), INT8_C( 10) }, { INT8_C( 120), -INT8_C( 29), INT8_C( 23), INT8_C( 98), -INT8_C( 124), INT8_C( 34), INT8_C( 1), -INT8_C( 43) } }, { { -INT8_C( 10), INT8_C( 21), INT8_C( 122), INT8_C( 97), -INT8_C( 73), INT8_C( 88), -INT8_C( 39), -INT8_C( 36) }, { -INT8_C( 114), -INT8_C( 59), -INT8_C( 21), INT8_C( 59), -INT8_C( 110), -INT8_C( 80), INT8_C( 103), INT8_C( 49) }, { -INT8_C( 124), -INT8_C( 38), INT8_C( 101), -INT8_C( 100), INT8_C( 73), INT8_C( 8), INT8_C( 64), INT8_C( 13) } }, { { -INT8_C( 34), -INT8_C( 102), INT8_C( 60), INT8_C( 68), INT8_C( 71), INT8_C( 78), INT8_C( 15), INT8_C( 33) }, { INT8_C( 4), -INT8_C( 12), INT8_C( 120), INT8_C( 34), INT8_C( 106), INT8_C( 104), INT8_C( 44), INT8_C( 96) }, { -INT8_C( 30), -INT8_C( 114), -INT8_C( 76), INT8_C( 102), -INT8_C( 79), -INT8_C( 74), INT8_C( 59), -INT8_C( 127) } }, { { INT8_C( 126), -INT8_C( 90), -INT8_C( 63), INT8_C( 53), -INT8_C( 2), -INT8_C( 101), INT8_C( 18), -INT8_C( 116) }, { INT8_C( 96), -INT8_C( 3), -INT8_C( 57), -INT8_C( 13), -INT8_C( 83), INT8_C( 47), INT8_C( 36), -INT8_C( 117) }, { -INT8_C( 34), -INT8_C( 93), -INT8_C( 120), INT8_C( 40), -INT8_C( 85), -INT8_C( 54), INT8_C( 54), INT8_C( 23) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vadd_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; } static int test_simde_vadd_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { { { INT16_C( 24777), INT16_C( 4304), -INT16_C( 8274), -INT16_C( 19919) }, { -INT16_C( 22061), INT16_C( 15828), INT16_C( 18), -INT16_C( 28514) }, { INT16_C( 2716), INT16_C( 20132), -INT16_C( 8256), INT16_C( 17103) } }, { { INT16_C( 24487), -INT16_C( 23099), -INT16_C( 10246), INT16_C( 23346) }, { -INT16_C( 1580), -INT16_C( 32178), INT16_C( 29224), -INT16_C( 3571) }, { INT16_C( 22907), INT16_C( 10259), INT16_C( 18978), INT16_C( 19775) } }, { { -INT16_C( 8750), -INT16_C( 32510), INT16_C( 13501), -INT16_C( 28621) }, { INT16_C( 2269), -INT16_C( 4146), INT16_C( 27656), -INT16_C( 20609) }, { -INT16_C( 6481), INT16_C( 28880), -INT16_C( 24379), INT16_C( 16306) } }, { { INT16_C( 17867), -INT16_C( 14763), -INT16_C( 30948), -INT16_C( 3807) }, { INT16_C( 28544), -INT16_C( 22157), -INT16_C( 32543), -INT16_C( 19557) }, { -INT16_C( 19125), INT16_C( 28616), INT16_C( 2045), -INT16_C( 23364) } }, { { -INT16_C( 25250), INT16_C( 6964), INT16_C( 26833), -INT16_C( 20565) }, { INT16_C( 31088), INT16_C( 30878), INT16_C( 7909), -INT16_C( 20184) }, { INT16_C( 5838), -INT16_C( 27694), -INT16_C( 30794), INT16_C( 24787) } }, { { INT16_C( 32099), INT16_C( 32631), -INT16_C( 26620), -INT16_C( 31632) }, { -INT16_C( 7417), -INT16_C( 6099), -INT16_C( 14236), -INT16_C( 15717) }, { INT16_C( 24682), INT16_C( 26532), INT16_C( 24680), INT16_C( 18187) } }, { { -INT16_C( 12186), INT16_C( 14301), -INT16_C( 30664), -INT16_C( 22298) }, { -INT16_C( 31486), -INT16_C( 6368), INT16_C( 18595), INT16_C( 1688) }, { INT16_C( 21864), INT16_C( 7933), -INT16_C( 12069), -INT16_C( 20610) } }, { { INT16_C( 4037), -INT16_C( 13947), -INT16_C( 2393), -INT16_C( 20914) }, { INT16_C( 31705), INT16_C( 15766), INT16_C( 12868), -INT16_C( 21761) }, { -INT16_C( 29794), INT16_C( 1819), INT16_C( 10475), INT16_C( 22861) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_vadd_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; } static int test_simde_vadd_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { { { INT32_C( 987880450), INT32_C( 1742915685) }, { -INT32_C( 263323059), INT32_C( 284616523) }, { INT32_C( 724557391), INT32_C( 2027532208) } }, { { -INT32_C( 1629848586), INT32_C( 1263282289) }, { -INT32_C( 410459229), INT32_C( 395413525) }, { -INT32_C( 2040307815), INT32_C( 1658695814) } }, { { -INT32_C( 917408924), -INT32_C( 2010107077) }, { -INT32_C( 2139586763), INT32_C( 1553034854) }, { INT32_C( 1237971609), -INT32_C( 457072223) } }, { { INT32_C( 1543138281), INT32_C( 916866963) }, { INT32_C( 1058942506), INT32_C( 458665910) }, { -INT32_C( 1692886509), INT32_C( 1375532873) } }, { { INT32_C( 1575266082), INT32_C( 266671578) }, { -INT32_C( 91202156), -INT32_C( 1252646453) }, { INT32_C( 1484063926), -INT32_C( 985974875) } }, { { INT32_C( 521163148), -INT32_C( 1034570088) }, { -INT32_C( 1694403612), INT32_C( 1169577763) }, { -INT32_C( 1173240464), INT32_C( 135007675) } }, { { -INT32_C( 660366594), INT32_C( 1139247279) }, { -INT32_C( 1321371674), INT32_C( 610702488) }, { -INT32_C( 1981738268), INT32_C( 1749949767) } }, { { INT32_C( 2101573349), INT32_C( 289380652) }, { INT32_C( 799817740), -INT32_C( 1787469161) }, { -INT32_C( 1393576207), -INT32_C( 1498088509) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_vadd_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; } static int test_simde_vadd_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[1]; int64_t b[1]; int64_t r[1]; } test_vec[] = { { { -INT64_C( 8723660904129029892) }, { -INT64_C( 6446446838156874292) }, { INT64_C( 3276636331423647432) } }, { { INT64_C( 8237754855438142485) }, { INT64_C( 6759364693269282466) }, { -INT64_C( 3449624525002126665) } }, { { INT64_C( 6581439727009348398) }, { INT64_C( 4723230566403575077) }, { -INT64_C( 7142073780296628141) } }, { { -INT64_C( 5481490900376703211) }, { -INT64_C( 3201572641363423533) }, { -INT64_C( 8683063541740126744) } }, { { -INT64_C( 5124823067756684555) }, { -INT64_C( 3892366966611835) }, { -INT64_C( 5128715434723296390) } }, { { INT64_C( 2134538319889593818) }, { INT64_C( 4354666682578472181) }, { INT64_C( 6489205002468065999) } }, { { -INT64_C( 8158375143962242948) }, { INT64_C( 7722040105862391106) }, { -INT64_C( 436335038099851842) } }, { { INT64_C( 76268774866003111) }, { -INT64_C( 4682326055126867838) }, { -INT64_C( 4606057280260864727) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_int64x1_t r = simde_vadd_s64(a, b); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; } static int test_simde_vadd_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(145), UINT8_C(172), UINT8_C(192), UINT8_C(101), UINT8_C(103), UINT8_C( 94), UINT8_C(186), UINT8_C(229) }, { UINT8_C(202), UINT8_C( 17), UINT8_C(115), UINT8_C( 16), UINT8_C(110), UINT8_C( 20), UINT8_C(220), UINT8_C(201) }, { UINT8_C( 91), UINT8_C(189), UINT8_C( 51), UINT8_C(117), UINT8_C(213), UINT8_C(114), UINT8_C(150), UINT8_C(174) } }, { { UINT8_C(228), UINT8_C(100), UINT8_C(144), UINT8_C(122), UINT8_C( 55), UINT8_C(227), UINT8_C(226), UINT8_C(153) }, { UINT8_C( 24), UINT8_C(198), UINT8_C(165), UINT8_C( 23), UINT8_C(173), UINT8_C( 87), UINT8_C( 39), UINT8_C( 62) }, { UINT8_C(252), UINT8_C( 42), UINT8_C( 53), UINT8_C(145), UINT8_C(228), UINT8_C( 58), UINT8_C( 9), UINT8_C(215) } }, { { UINT8_C( 3), UINT8_C(231), UINT8_C(163), UINT8_C(106), UINT8_C( 69), UINT8_C( 93), UINT8_C( 80), UINT8_C( 15) }, { UINT8_C(110), UINT8_C(195), UINT8_C( 31), UINT8_C(221), UINT8_C(216), UINT8_C(251), UINT8_C(166), UINT8_C(188) }, { UINT8_C(113), UINT8_C(170), UINT8_C(194), UINT8_C( 71), UINT8_C( 29), UINT8_C( 88), UINT8_C(246), UINT8_C(203) } }, { { UINT8_C( 96), UINT8_C( 55), UINT8_C( 55), UINT8_C(151), UINT8_C( 26), UINT8_C( 25), UINT8_C( 48), UINT8_C( 50) }, { UINT8_C(223), UINT8_C(213), UINT8_C( 74), UINT8_C(140), UINT8_C( 45), UINT8_C(113), UINT8_C(202), UINT8_C( 48) }, { UINT8_C( 63), UINT8_C( 12), UINT8_C(129), UINT8_C( 35), UINT8_C( 71), UINT8_C(138), UINT8_C(250), UINT8_C( 98) } }, { { UINT8_C( 88), UINT8_C(109), UINT8_C(155), UINT8_C(157), UINT8_C(202), UINT8_C(235), UINT8_C(172), UINT8_C( 56) }, { UINT8_C(174), UINT8_C(203), UINT8_C( 21), UINT8_C(134), UINT8_C(198), UINT8_C(188), UINT8_C( 67), UINT8_C( 38) }, { UINT8_C( 6), UINT8_C( 56), UINT8_C(176), UINT8_C( 35), UINT8_C(144), UINT8_C(167), UINT8_C(239), UINT8_C( 94) } }, { { UINT8_C(243), UINT8_C(122), UINT8_C(189), UINT8_C( 13), UINT8_C(147), UINT8_C(238), UINT8_C( 64), UINT8_C(114) }, { UINT8_C(195), UINT8_C(138), UINT8_C(254), UINT8_C(240), UINT8_C(251), UINT8_C(200), UINT8_C( 33), UINT8_C( 83) }, { UINT8_C(182), UINT8_C( 4), UINT8_C(187), UINT8_C(253), UINT8_C(142), UINT8_C(182), UINT8_C( 97), UINT8_C(197) } }, { { UINT8_C( 53), UINT8_C(188), UINT8_C(240), UINT8_MAX, UINT8_C(167), UINT8_C(156), UINT8_C( 55), UINT8_C( 85) }, { UINT8_C(103), UINT8_C( 77), UINT8_C(220), UINT8_C( 45), UINT8_C( 9), UINT8_C( 31), UINT8_C( 84), UINT8_C(252) }, { UINT8_C(156), UINT8_C( 9), UINT8_C(204), UINT8_C( 44), UINT8_C(176), UINT8_C(187), UINT8_C(139), UINT8_C( 81) } }, { { UINT8_C(153), UINT8_C( 17), UINT8_C( 9), UINT8_C( 44), UINT8_MAX, UINT8_C( 73), UINT8_C(158), UINT8_C(195) }, { UINT8_C(211), UINT8_C(156), UINT8_C(179), UINT8_C(206), UINT8_C(100), UINT8_C(212), UINT8_C( 33), UINT8_C(153) }, { UINT8_C(108), UINT8_C(173), UINT8_C(188), UINT8_C(250), UINT8_C( 99), UINT8_C( 29), UINT8_C(191), UINT8_C( 92) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vadd_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; } static int test_simde_vadd_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C( 4496), UINT16_C(14232), UINT16_C(53165), UINT16_C( 5261) }, { UINT16_C(26908), UINT16_C( 9538), UINT16_C(38536), UINT16_C( 8481) }, { UINT16_C(31404), UINT16_C(23770), UINT16_C(26165), UINT16_C(13742) } }, { { UINT16_C(11175), UINT16_C(42829), UINT16_C(60276), UINT16_C(18538) }, { UINT16_C( 7559), UINT16_C(60182), UINT16_C(14578), UINT16_C(33412) }, { UINT16_C(18734), UINT16_C(37475), UINT16_C( 9318), UINT16_C(51950) } }, { { UINT16_C( 7241), UINT16_C(63418), UINT16_C(18411), UINT16_C( 2059) }, { UINT16_C(19888), UINT16_C(14381), UINT16_C(20451), UINT16_C(35673) }, { UINT16_C(27129), UINT16_C(12263), UINT16_C(38862), UINT16_C(37732) } }, { { UINT16_C(42618), UINT16_C(60978), UINT16_C(40081), UINT16_C( 6198) }, { UINT16_C(19897), UINT16_C(43779), UINT16_C(34693), UINT16_C(52782) }, { UINT16_C(62515), UINT16_C(39221), UINT16_C( 9238), UINT16_C(58980) } }, { { UINT16_C(59555), UINT16_C(36549), UINT16_C(53551), UINT16_C(57238) }, { UINT16_C(50206), UINT16_C( 535), UINT16_C(28691), UINT16_C(36237) }, { UINT16_C(44225), UINT16_C(37084), UINT16_C(16706), UINT16_C(27939) } }, { { UINT16_C(48918), UINT16_C(42875), UINT16_C(45659), UINT16_C( 5311) }, { UINT16_C(49919), UINT16_C(33984), UINT16_C(61001), UINT16_C(60498) }, { UINT16_C(33301), UINT16_C(11323), UINT16_C(41124), UINT16_C( 273) } }, { { UINT16_C( 6358), UINT16_C( 1402), UINT16_C( 4585), UINT16_C( 2020) }, { UINT16_C(64469), UINT16_C(59401), UINT16_C(38507), UINT16_C(33141) }, { UINT16_C( 5291), UINT16_C(60803), UINT16_C(43092), UINT16_C(35161) } }, { { UINT16_C(61525), UINT16_C(45096), UINT16_C(59298), UINT16_C(41413) }, { UINT16_C(34217), UINT16_C(61989), UINT16_C(30835), UINT16_C(18910) }, { UINT16_C(30206), UINT16_C(41549), UINT16_C(24597), UINT16_C(60323) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vadd_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; } static int test_simde_vadd_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C(2035177616), UINT32_C(1048588905) }, { UINT32_C(2552662573), UINT32_C(1981389600) }, { UINT32_C( 292872893), UINT32_C(3029978505) } }, { { UINT32_C( 774259084), UINT32_C(3520129832) }, { UINT32_C(3821270384), UINT32_C(4247560557) }, { UINT32_C( 300562172), UINT32_C(3472723093) } }, { { UINT32_C(1668709113), UINT32_C(3651270572) }, { UINT32_C(2708588673), UINT32_C(4011297379) }, { UINT32_C( 82330490), UINT32_C(3367600655) } }, { { UINT32_C(4078845643), UINT32_C(2596597289) }, { UINT32_C(1367181283), UINT32_C( 575580712) }, { UINT32_C(1151059630), UINT32_C(3172178001) } }, { { UINT32_C(3515204900), UINT32_C(1034561212) }, { UINT32_C(1390288110), UINT32_C(1916925606) }, { UINT32_C( 610525714), UINT32_C(2951486818) } }, { { UINT32_C(1566924596), UINT32_C( 838281805) }, { UINT32_C(3665982897), UINT32_C(1140641823) }, { UINT32_C( 937940197), UINT32_C(1978923628) } }, { { UINT32_C(1360298389), UINT32_C(2525937575) }, { UINT32_C(2179493339), UINT32_C(2549295459) }, { UINT32_C(3539791728), UINT32_C( 780265738) } }, { { UINT32_C(3606337929), UINT32_C( 872934531) }, { UINT32_C(2148436321), UINT32_C(4022536794) }, { UINT32_C(1459806954), UINT32_C( 600504029) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vadd_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; } static int test_simde_vadd_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[1]; uint64_t b[1]; uint64_t r[1]; } test_vec[] = { { { UINT64_C( 8271370438949591179) }, { UINT64_C( 7149156384669675836) }, { UINT64_C(15420526823619267015) } }, { { UINT64_C( 8716507174114306880) }, { UINT64_C(11246819847481853643) }, { UINT64_C( 1516582947886608907) } }, { { UINT64_C( 7033664003445314963) }, { UINT64_C(13224599815624691785) }, { UINT64_C( 1811519745360455132) } }, { { UINT64_C(14839766502604456290) }, { UINT64_C( 4045366772834167159) }, { UINT64_C( 438389201729071833) } }, { { UINT64_C(15340385730390389596) }, { UINT64_C( 9132594273923359887) }, { UINT64_C( 6026235930604197867) } }, { { UINT64_C( 2310322047046596415) }, { UINT64_C( 7327580934766068434) }, { UINT64_C( 9637902981812664849) } }, { { UINT64_C(11681996663360525247) }, { UINT64_C(15296307448611608238) }, { UINT64_C( 8531560038262581869) } }, { { UINT64_C(18043431286987964317) }, { UINT64_C(11211186651486359335) }, { UINT64_C(10807873864764772036) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_t b = simde_vld1_u64(test_vec[i].b); simde_uint64x1_t r = simde_vadd_u64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; } static int test_simde_vaddq_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; simde_float32 b[4]; simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -476.88), SIMDE_FLOAT32_C( -38.10), SIMDE_FLOAT32_C( 469.39), SIMDE_FLOAT32_C( -664.80) }, { SIMDE_FLOAT32_C( 181.43), SIMDE_FLOAT32_C( -154.03), SIMDE_FLOAT32_C( -345.56), SIMDE_FLOAT32_C( -102.63) }, { SIMDE_FLOAT32_C( -295.46), SIMDE_FLOAT32_C( -192.12), SIMDE_FLOAT32_C( 123.83), SIMDE_FLOAT32_C( -767.44) } }, { { SIMDE_FLOAT32_C( -103.40), SIMDE_FLOAT32_C( 661.42), SIMDE_FLOAT32_C( -336.65), SIMDE_FLOAT32_C( 536.30) }, { SIMDE_FLOAT32_C( 318.30), SIMDE_FLOAT32_C( -387.41), SIMDE_FLOAT32_C( 435.33), SIMDE_FLOAT32_C( -961.83) }, { SIMDE_FLOAT32_C( 214.90), SIMDE_FLOAT32_C( 274.01), SIMDE_FLOAT32_C( 98.69), SIMDE_FLOAT32_C( -425.53) } }, { { SIMDE_FLOAT32_C( 203.44), SIMDE_FLOAT32_C( -916.52), SIMDE_FLOAT32_C( -867.20), SIMDE_FLOAT32_C( 66.23) }, { SIMDE_FLOAT32_C( -667.27), SIMDE_FLOAT32_C( -968.66), SIMDE_FLOAT32_C( 101.74), SIMDE_FLOAT32_C( 202.05) }, { SIMDE_FLOAT32_C( -463.83), SIMDE_FLOAT32_C( -1885.18), SIMDE_FLOAT32_C( -765.47), SIMDE_FLOAT32_C( 268.28) } }, { { SIMDE_FLOAT32_C( -612.77), SIMDE_FLOAT32_C( -920.83), SIMDE_FLOAT32_C( 82.14), SIMDE_FLOAT32_C( -930.15) }, { SIMDE_FLOAT32_C( -763.40), SIMDE_FLOAT32_C( -818.04), SIMDE_FLOAT32_C( -97.96), SIMDE_FLOAT32_C( -240.29) }, { SIMDE_FLOAT32_C( -1376.18), SIMDE_FLOAT32_C( -1738.88), SIMDE_FLOAT32_C( -15.82), SIMDE_FLOAT32_C( -1170.43) } }, { { SIMDE_FLOAT32_C( 143.86), SIMDE_FLOAT32_C( -628.57), SIMDE_FLOAT32_C( 94.91), SIMDE_FLOAT32_C( -674.71) }, { SIMDE_FLOAT32_C( 217.40), SIMDE_FLOAT32_C( 749.35), SIMDE_FLOAT32_C( 222.65), SIMDE_FLOAT32_C( -886.00) }, { SIMDE_FLOAT32_C( 361.26), SIMDE_FLOAT32_C( 120.77), SIMDE_FLOAT32_C( 317.56), SIMDE_FLOAT32_C( -1560.71) } }, { { SIMDE_FLOAT32_C( 410.77), SIMDE_FLOAT32_C( 886.01), SIMDE_FLOAT32_C( 650.30), SIMDE_FLOAT32_C( -270.93) }, { SIMDE_FLOAT32_C( -501.41), SIMDE_FLOAT32_C( 85.64), SIMDE_FLOAT32_C( -232.77), SIMDE_FLOAT32_C( 702.03) }, { SIMDE_FLOAT32_C( -90.64), SIMDE_FLOAT32_C( 971.64), SIMDE_FLOAT32_C( 417.54), SIMDE_FLOAT32_C( 431.10) } }, { { SIMDE_FLOAT32_C( 169.12), SIMDE_FLOAT32_C( -99.97), SIMDE_FLOAT32_C( -231.73), SIMDE_FLOAT32_C( 501.86) }, { SIMDE_FLOAT32_C( -68.63), SIMDE_FLOAT32_C( 870.00), SIMDE_FLOAT32_C( -296.10), SIMDE_FLOAT32_C( 318.59) }, { SIMDE_FLOAT32_C( 100.49), SIMDE_FLOAT32_C( 770.03), SIMDE_FLOAT32_C( -527.83), SIMDE_FLOAT32_C( 820.45) } }, { { SIMDE_FLOAT32_C( 949.17), SIMDE_FLOAT32_C( 786.05), SIMDE_FLOAT32_C( 388.45), SIMDE_FLOAT32_C( -814.23) }, { SIMDE_FLOAT32_C( 968.00), SIMDE_FLOAT32_C( -709.52), SIMDE_FLOAT32_C( -54.52), SIMDE_FLOAT32_C( 111.86) }, { SIMDE_FLOAT32_C( 1917.17), SIMDE_FLOAT32_C( 76.53), SIMDE_FLOAT32_C( 333.93), SIMDE_FLOAT32_C( -702.37) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r = simde_vaddq_f32(a, b); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vaddq_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[2]; simde_float64 b[2]; simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -338.09), SIMDE_FLOAT64_C( -959.61) }, { SIMDE_FLOAT64_C( 437.15), SIMDE_FLOAT64_C( 879.31) }, { SIMDE_FLOAT64_C( 99.06), SIMDE_FLOAT64_C( -80.30) } }, { { SIMDE_FLOAT64_C( 789.73), SIMDE_FLOAT64_C( -340.20) }, { SIMDE_FLOAT64_C( 993.31), SIMDE_FLOAT64_C( 200.50) }, { SIMDE_FLOAT64_C( 1783.05), SIMDE_FLOAT64_C( -139.70) } }, { { SIMDE_FLOAT64_C( -454.19), SIMDE_FLOAT64_C( 643.62) }, { SIMDE_FLOAT64_C( 929.57), SIMDE_FLOAT64_C( 44.40) }, { SIMDE_FLOAT64_C( 475.38), SIMDE_FLOAT64_C( 688.02) } }, { { SIMDE_FLOAT64_C( -270.75), SIMDE_FLOAT64_C( -303.20) }, { SIMDE_FLOAT64_C( -253.57), SIMDE_FLOAT64_C( 898.38) }, { SIMDE_FLOAT64_C( -524.31), SIMDE_FLOAT64_C( 595.18) } }, { { SIMDE_FLOAT64_C( 596.83), SIMDE_FLOAT64_C( 514.70) }, { SIMDE_FLOAT64_C( 400.23), SIMDE_FLOAT64_C( -471.80) }, { SIMDE_FLOAT64_C( 997.06), SIMDE_FLOAT64_C( 42.90) } }, { { SIMDE_FLOAT64_C( 384.71), SIMDE_FLOAT64_C( -895.87) }, { SIMDE_FLOAT64_C( 846.79), SIMDE_FLOAT64_C( 333.88) }, { SIMDE_FLOAT64_C( 1231.50), SIMDE_FLOAT64_C( -561.99) } }, { { SIMDE_FLOAT64_C( 890.18), SIMDE_FLOAT64_C( 235.24) }, { SIMDE_FLOAT64_C( 519.64), SIMDE_FLOAT64_C( 858.18) }, { SIMDE_FLOAT64_C( 1409.82), SIMDE_FLOAT64_C( 1093.42) } }, { { SIMDE_FLOAT64_C( 525.73), SIMDE_FLOAT64_C( -534.88) }, { SIMDE_FLOAT64_C( -29.96), SIMDE_FLOAT64_C( -812.36) }, { SIMDE_FLOAT64_C( 495.77), SIMDE_FLOAT64_C( -1347.24) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_float64x2_t r = simde_vaddq_f64(a, b); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; } static int test_simde_vaddq_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int8_t b[16]; int8_t r[16]; } test_vec[] = { { { INT8_C( 111), -INT8_C( 97), -INT8_C( 90), INT8_C( 69), INT8_C( 113), INT8_C( 56), -INT8_C( 115), INT8_C( 112), INT8_C( 53), -INT8_C( 107), -INT8_C( 44), INT8_C( 82), -INT8_C( 51), -INT8_C( 96), -INT8_C( 21), -INT8_C( 97) }, { -INT8_C( 99), INT8_C( 22), INT8_C( 102), INT8_MIN, INT8_C( 111), INT8_C( 102), INT8_C( 52), -INT8_C( 31), INT8_C( 89), INT8_C( 28), -INT8_C( 108), -INT8_C( 49), INT8_C( 112), -INT8_C( 116), INT8_C( 125), -INT8_C( 33) }, { INT8_C( 12), -INT8_C( 75), INT8_C( 12), -INT8_C( 59), -INT8_C( 32), -INT8_C( 98), -INT8_C( 63), INT8_C( 81), -INT8_C( 114), -INT8_C( 79), INT8_C( 104), INT8_C( 33), INT8_C( 61), INT8_C( 44), INT8_C( 104), INT8_C( 126) } }, { { INT8_C( 43), INT8_C( 35), INT8_C( 36), -INT8_C( 100), INT8_C( 92), -INT8_C( 78), INT8_C( 12), -INT8_C( 111), INT8_C( 71), -INT8_C( 32), -INT8_C( 28), INT8_C( 20), -INT8_C( 127), -INT8_C( 49), -INT8_C( 77), INT8_C( 30) }, { -INT8_C( 26), INT8_C( 25), -INT8_C( 97), INT8_C( 85), INT8_MIN, -INT8_C( 45), INT8_C( 54), -INT8_C( 39), -INT8_C( 17), -INT8_C( 54), -INT8_C( 88), INT8_C( 95), INT8_C( 86), INT8_C( 37), INT8_C( 63), -INT8_C( 126) }, { INT8_C( 17), INT8_C( 60), -INT8_C( 61), -INT8_C( 15), -INT8_C( 36), -INT8_C( 123), INT8_C( 66), INT8_C( 106), INT8_C( 54), -INT8_C( 86), -INT8_C( 116), INT8_C( 115), -INT8_C( 41), -INT8_C( 12), -INT8_C( 14), -INT8_C( 96) } }, { { INT8_C( 73), INT8_C( 99), INT8_C( 30), -INT8_C( 91), INT8_C( 21), INT8_C( 43), INT8_C( 54), INT8_C( 92), INT8_C( 11), INT8_C( 26), INT8_C( 112), -INT8_C( 116), -INT8_C( 22), INT8_C( 35), -INT8_C( 85), -INT8_C( 48) }, { INT8_C( 61), INT8_C( 74), INT8_C( 37), -INT8_C( 67), INT8_C( 29), INT8_C( 91), -INT8_C( 106), INT8_C( 12), INT8_C( 37), INT8_C( 62), INT8_C( 108), INT8_C( 124), INT8_C( 99), -INT8_C( 85), -INT8_C( 2), -INT8_C( 84) }, { -INT8_C( 122), -INT8_C( 83), INT8_C( 67), INT8_C( 98), INT8_C( 50), -INT8_C( 122), -INT8_C( 52), INT8_C( 104), INT8_C( 48), INT8_C( 88), -INT8_C( 36), INT8_C( 8), INT8_C( 77), -INT8_C( 50), -INT8_C( 87), INT8_C( 124) } }, { { INT8_C( 14), INT8_C( 28), INT8_C( 81), INT8_C( 36), INT8_C( 71), -INT8_C( 120), INT8_MIN, INT8_C( 83), -INT8_C( 94), -INT8_C( 15), -INT8_C( 33), -INT8_C( 116), INT8_C( 20), -INT8_C( 118), INT8_C( 92), INT8_C( 81) }, { -INT8_C( 44), -INT8_C( 127), INT8_C( 14), -INT8_C( 15), -INT8_C( 36), -INT8_C( 92), -INT8_C( 2), INT8_C( 2), -INT8_C( 30), INT8_C( 106), INT8_C( 126), INT8_C( 70), INT8_C( 21), INT8_C( 124), -INT8_C( 14), INT8_C( 35) }, { -INT8_C( 30), -INT8_C( 99), INT8_C( 95), INT8_C( 21), INT8_C( 35), INT8_C( 44), INT8_C( 126), INT8_C( 85), -INT8_C( 124), INT8_C( 91), INT8_C( 93), -INT8_C( 46), INT8_C( 41), INT8_C( 6), INT8_C( 78), INT8_C( 116) } }, { { -INT8_C( 104), INT8_C( 68), INT8_C( 71), -INT8_C( 32), -INT8_C( 52), -INT8_C( 56), INT8_C( 51), INT8_C( 110), -INT8_C( 71), INT8_C( 18), -INT8_C( 5), -INT8_C( 51), -INT8_C( 99), INT8_C( 87), INT8_C( 31), INT8_C( 113) }, { -INT8_C( 39), INT8_C( 45), INT8_C( 99), -INT8_C( 75), -INT8_C( 46), INT8_C( 97), -INT8_C( 73), -INT8_C( 76), -INT8_C( 53), INT8_C( 53), -INT8_C( 6), -INT8_C( 32), -INT8_C( 79), -INT8_C( 19), INT8_C( 3), INT8_C( 74) }, { INT8_C( 113), INT8_C( 113), -INT8_C( 86), -INT8_C( 107), -INT8_C( 98), INT8_C( 41), -INT8_C( 22), INT8_C( 34), -INT8_C( 124), INT8_C( 71), -INT8_C( 11), -INT8_C( 83), INT8_C( 78), INT8_C( 68), INT8_C( 34), -INT8_C( 69) } }, { { INT8_C( 49), INT8_C( 75), INT8_C( 42), -INT8_C( 3), INT8_C( 19), INT8_C( 93), INT8_C( 107), -INT8_C( 52), INT8_C( 111), INT8_C( 102), -INT8_C( 103), INT8_C( 12), -INT8_C( 66), -INT8_C( 72), INT8_C( 126), -INT8_C( 105) }, { -INT8_C( 26), -INT8_C( 31), INT8_C( 76), -INT8_C( 72), INT8_C( 66), INT8_C( 4), INT8_C( 108), INT8_C( 13), INT8_C( 57), INT8_C( 103), -INT8_C( 19), -INT8_C( 21), INT8_C( 84), -INT8_C( 16), INT8_C( 53), -INT8_C( 123) }, { INT8_C( 23), INT8_C( 44), INT8_C( 118), -INT8_C( 75), INT8_C( 85), INT8_C( 97), -INT8_C( 41), -INT8_C( 39), -INT8_C( 88), -INT8_C( 51), -INT8_C( 122), -INT8_C( 9), INT8_C( 18), -INT8_C( 88), -INT8_C( 77), INT8_C( 28) } }, { { INT8_C( 59), INT8_C( 95), -INT8_C( 126), INT8_C( 78), -INT8_C( 68), -INT8_C( 19), INT8_C( 26), INT8_C( 43), INT8_C( 84), -INT8_C( 76), INT8_C( 56), INT8_C( 18), INT8_C( 108), -INT8_C( 74), -INT8_C( 87), INT8_C( 82) }, { -INT8_C( 105), -INT8_C( 11), INT8_C( 10), -INT8_C( 39), -INT8_C( 7), INT8_C( 119), -INT8_C( 26), INT8_C( 51), -INT8_C( 34), -INT8_C( 45), INT8_C( 30), INT8_C( 50), -INT8_C( 61), INT8_C( 83), -INT8_C( 73), -INT8_C( 1) }, { -INT8_C( 46), INT8_C( 84), -INT8_C( 116), INT8_C( 39), -INT8_C( 75), INT8_C( 100), INT8_C( 0), INT8_C( 94), INT8_C( 50), -INT8_C( 121), INT8_C( 86), INT8_C( 68), INT8_C( 47), INT8_C( 9), INT8_C( 96), INT8_C( 81) } }, { { -INT8_C( 78), INT8_C( 57), INT8_C( 77), INT8_C( 110), INT8_C( 38), INT8_C( 104), -INT8_C( 103), INT8_C( 122), INT8_C( 28), -INT8_C( 47), -INT8_C( 116), -INT8_C( 120), -INT8_C( 121), INT8_C( 53), -INT8_C( 37), INT8_C( 30) }, { INT8_C( 43), -INT8_C( 27), -INT8_C( 9), INT8_C( 36), INT8_C( 92), -INT8_C( 35), INT8_C( 87), INT8_C( 58), -INT8_C( 80), INT8_C( 117), INT8_C( 108), INT8_C( 116), -INT8_C( 56), INT8_C( 35), INT8_C( 115), INT8_C( 122) }, { -INT8_C( 35), INT8_C( 30), INT8_C( 68), -INT8_C( 110), -INT8_C( 126), INT8_C( 69), -INT8_C( 16), -INT8_C( 76), -INT8_C( 52), INT8_C( 70), -INT8_C( 8), -INT8_C( 4), INT8_C( 79), INT8_C( 88), INT8_C( 78), -INT8_C( 104) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r = simde_vaddq_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; } static int test_simde_vaddq_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { -INT16_C( 9359), INT16_C( 19853), -INT16_C( 5258), INT16_C( 25982), INT16_C( 13488), -INT16_C( 9621), -INT16_C( 8264), -INT16_C( 14382) }, { -INT16_C( 29038), INT16_C( 16526), -INT16_C( 8207), -INT16_C( 10419), -INT16_C( 27770), INT16_C( 8648), -INT16_C( 9930), -INT16_C( 22287) }, { INT16_C( 27139), -INT16_C( 29157), -INT16_C( 13465), INT16_C( 15563), -INT16_C( 14282), -INT16_C( 973), -INT16_C( 18194), INT16_C( 28867) } }, { { INT16_C( 32436), INT16_C( 10997), INT16_C( 29545), INT16_C( 6799), -INT16_C( 1112), INT16_C( 24820), -INT16_C( 14630), INT16_C( 27688) }, { -INT16_C( 18860), INT16_C( 17836), -INT16_C( 1643), INT16_C( 6940), -INT16_C( 7028), -INT16_C( 15556), INT16_C( 11710), INT16_C( 29291) }, { INT16_C( 13576), INT16_C( 28833), INT16_C( 27902), INT16_C( 13739), -INT16_C( 8140), INT16_C( 9264), -INT16_C( 2920), -INT16_C( 8557) } }, { { INT16_C( 24747), INT16_C( 5533), INT16_C( 11476), INT16_C( 31791), INT16_C( 8999), INT16_C( 476), INT16_C( 1257), INT16_C( 15981) }, { INT16_C( 6587), INT16_C( 20611), -INT16_C( 24557), -INT16_C( 24724), -INT16_C( 22396), INT16_C( 16994), -INT16_C( 12842), -INT16_C( 32331) }, { INT16_C( 31334), INT16_C( 26144), -INT16_C( 13081), INT16_C( 7067), -INT16_C( 13397), INT16_C( 17470), -INT16_C( 11585), -INT16_C( 16350) } }, { { INT16_C( 21038), INT16_C( 662), -INT16_C( 14978), -INT16_C( 22914), INT16_C( 23272), -INT16_C( 11609), INT16_C( 5471), INT16_C( 6672) }, { -INT16_C( 27858), INT16_C( 16746), -INT16_C( 10701), -INT16_C( 18207), INT16_C( 17279), INT16_C( 22010), -INT16_C( 20719), INT16_C( 16342) }, { -INT16_C( 6820), INT16_C( 17408), -INT16_C( 25679), INT16_C( 24415), -INT16_C( 24985), INT16_C( 10401), -INT16_C( 15248), INT16_C( 23014) } }, { { INT16_C( 27905), -INT16_C( 32703), -INT16_C( 16590), INT16_C( 6950), -INT16_C( 13031), INT16_C( 30957), -INT16_C( 542), INT16_C( 4498) }, { -INT16_C( 624), -INT16_C( 15278), INT16_C( 13267), INT16_C( 21116), INT16_C( 30327), -INT16_C( 30553), INT16_C( 32294), INT16_C( 10183) }, { INT16_C( 27281), INT16_C( 17555), -INT16_C( 3323), INT16_C( 28066), INT16_C( 17296), INT16_C( 404), INT16_C( 31752), INT16_C( 14681) } }, { { INT16_C( 2283), INT16_C( 7591), -INT16_C( 12857), -INT16_C( 8136), INT16_C( 9627), INT16_C( 32089), -INT16_C( 5342), -INT16_C( 19570) }, { -INT16_C( 7704), -INT16_C( 17289), -INT16_C( 3308), -INT16_C( 29938), -INT16_C( 18839), -INT16_C( 28909), -INT16_C( 9676), INT16_C( 8119) }, { -INT16_C( 5421), -INT16_C( 9698), -INT16_C( 16165), INT16_C( 27462), -INT16_C( 9212), INT16_C( 3180), -INT16_C( 15018), -INT16_C( 11451) } }, { { INT16_C( 24290), -INT16_C( 22212), INT16_C( 29996), -INT16_C( 14454), -INT16_C( 7270), -INT16_C( 17084), -INT16_C( 11314), -INT16_C( 18576) }, { -INT16_C( 6220), -INT16_C( 14221), -INT16_C( 32294), INT16_C( 17236), INT16_C( 26423), INT16_C( 27603), -INT16_C( 30142), INT16_C( 9354) }, { INT16_C( 18070), INT16_C( 29103), -INT16_C( 2298), INT16_C( 2782), INT16_C( 19153), INT16_C( 10519), INT16_C( 24080), -INT16_C( 9222) } }, { { -INT16_C( 14360), INT16_C( 5326), INT16_C( 22588), -INT16_C( 10533), INT16_C( 8251), INT16_C( 2451), INT16_C( 1011), -INT16_C( 22592) }, { INT16_C( 13290), -INT16_C( 15249), -INT16_C( 15435), -INT16_C( 5112), -INT16_C( 9429), INT16_C( 27992), -INT16_C( 7579), INT16_C( 19857) }, { -INT16_C( 1070), -INT16_C( 9923), INT16_C( 7153), -INT16_C( 15645), -INT16_C( 1178), INT16_C( 30443), -INT16_C( 6568), -INT16_C( 2735) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_vaddq_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; } static int test_simde_vaddq_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { -INT32_C( 446537815), -INT32_C( 222544457), INT32_C( 1358712669), INT32_C( 1039645779) }, { -INT32_C( 1526568976), INT32_C( 1435568682), INT32_C( 1254287845), INT32_C( 1972851916) }, { -INT32_C( 1973106791), INT32_C( 1213024225), -INT32_C( 1681966782), -INT32_C( 1282469601) } }, { { INT32_C( 1801189811), -INT32_C( 1805838537), -INT32_C( 1176151706), INT32_C( 116907030) }, { INT32_C( 1856764227), -INT32_C( 389858301), -INT32_C( 231569882), -INT32_C( 1922577958) }, { -INT32_C( 637013258), INT32_C( 2099270458), -INT32_C( 1407721588), -INT32_C( 1805670928) } }, { { -INT32_C( 84360509), INT32_C( 1083070169), -INT32_C( 973507665), -INT32_C( 1815351216) }, { -INT32_C( 335448343), -INT32_C( 640367181), INT32_C( 634062411), -INT32_C( 1817038128) }, { -INT32_C( 419808852), INT32_C( 442702988), -INT32_C( 339445254), INT32_C( 662577952) } }, { { -INT32_C( 829576203), -INT32_C( 1341252863), -INT32_C( 545912689), -INT32_C( 495828488) }, { INT32_C( 1808692408), -INT32_C( 2075876551), INT32_C( 2041122729), INT32_C( 906779457) }, { INT32_C( 979116205), INT32_C( 877837882), INT32_C( 1495210040), INT32_C( 410950969) } }, { { INT32_C( 117807366), INT32_C( 1152914357), INT32_C( 337849883), INT32_C( 670471535) }, { INT32_C( 1116914697), INT32_C( 298243687), INT32_C( 663384037), -INT32_C( 782395445) }, { INT32_C( 1234722063), INT32_C( 1451158044), INT32_C( 1001233920), -INT32_C( 111923910) } }, { { -INT32_C( 438738384), -INT32_C( 1859546762), INT32_C( 782585023), -INT32_C( 346645534) }, { -INT32_C( 953227168), -INT32_C( 1529285441), INT32_C( 801858404), INT32_C( 704653818) }, { -INT32_C( 1391965552), INT32_C( 906135093), INT32_C( 1584443427), INT32_C( 358008284) } }, { { INT32_C( 17881483), INT32_C( 697514346), INT32_C( 1750612102), INT32_C( 877899476) }, { INT32_C( 1442546070), -INT32_C( 621095818), INT32_C( 822723895), INT32_C( 2052786670) }, { INT32_C( 1460427553), INT32_C( 76418528), -INT32_C( 1721631299), -INT32_C( 1364281150) } }, { { INT32_C( 1299934179), INT32_C( 729222821), INT32_C( 462671687), INT32_C( 324003453) }, { -INT32_C( 563525016), INT32_C( 1438147358), INT32_C( 394707240), -INT32_C( 1382948150) }, { INT32_C( 736409163), -INT32_C( 2127597117), INT32_C( 857378927), -INT32_C( 1058944697) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_vaddq_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; } static int test_simde_vaddq_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; int64_t b[2]; int64_t r[2]; } test_vec[] = { { { INT64_C( 7069932454407769165), -INT64_C( 13004371797364671) }, { -INT64_C( 4140332404967654890), INT64_C( 4669791854137250134) }, { INT64_C( 2929600049440114275), INT64_C( 4656787482339885463) } }, { { INT64_C( 9004754196787087738), INT64_C( 6311525774842819585) }, { INT64_C( 6893064936395404358), INT64_C( 8024092871902594599) }, { -INT64_C( 2548924940527059520), -INT64_C( 4111125426964137432) } }, { { -INT64_C( 2319920201253745130), INT64_C( 6415393146933806960) }, { -INT64_C( 3248530561039559550), -INT64_C( 3544254053793109396) }, { -INT64_C( 5568450762293304680), INT64_C( 2871139093140697564) } }, { { INT64_C( 4725381914672747733), -INT64_C( 7178462820614140940) }, { -INT64_C( 1222143994085532274), INT64_C( 1441468807256274812) }, { INT64_C( 3503237920587215459), -INT64_C( 5736994013357866128) } }, { { INT64_C( 145904156942017013), -INT64_C( 9215373147744044954) }, { -INT64_C( 3352152200456879353), -INT64_C( 2302176223951467846) }, { -INT64_C( 3206248043514862340), INT64_C( 6929194702014038816) } }, { { -INT64_C( 7550095452228168409), -INT64_C( 5469846406055902556) }, { INT64_C( 5664886855523291254), -INT64_C( 3436716378960715133) }, { -INT64_C( 1885208596704877155), -INT64_C( 8906562785016617689) } }, { { INT64_C( 4967820718097494825), -INT64_C( 5629233198243575680) }, { INT64_C( 6531657446270508995), INT64_C( 7725668155930504637) }, { -INT64_C( 6947265909341547796), INT64_C( 2096434957686928957) } }, { { -INT64_C( 293377758463705846), INT64_C( 9127117198251772094) }, { INT64_C( 25185705384028199), INT64_C( 4761604672234922343) }, { -INT64_C( 268192053079677647), -INT64_C( 4558022203222857179) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_int64x2_t r = simde_vaddq_s64(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; } static int test_simde_vaddq_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(219), UINT8_C(217), UINT8_C(235), UINT8_C( 86), UINT8_C(143), UINT8_C(217), UINT8_C( 81), UINT8_C( 77), UINT8_C(161), UINT8_C(120), UINT8_C( 70), UINT8_C( 92), UINT8_C(128), UINT8_C(240), UINT8_C(218), UINT8_C(167) }, { UINT8_C( 24), UINT8_C(175), UINT8_C(166), UINT8_C( 91), UINT8_C( 42), UINT8_MAX, UINT8_C( 92), UINT8_C(145), UINT8_C( 32), UINT8_C(195), UINT8_C( 58), UINT8_C( 88), UINT8_C( 97), UINT8_C( 78), UINT8_C(154), UINT8_C( 60) }, { UINT8_C(243), UINT8_C(136), UINT8_C(145), UINT8_C(177), UINT8_C(185), UINT8_C(216), UINT8_C(173), UINT8_C(222), UINT8_C(193), UINT8_C( 59), UINT8_C(128), UINT8_C(180), UINT8_C(225), UINT8_C( 62), UINT8_C(116), UINT8_C(227) } }, { { UINT8_C( 39), UINT8_C(134), UINT8_C(146), UINT8_C(182), UINT8_C( 95), UINT8_C(227), UINT8_C( 3), UINT8_C( 0), UINT8_C( 91), UINT8_C( 73), UINT8_C( 93), UINT8_C(220), UINT8_C( 58), UINT8_C( 55), UINT8_C(131), UINT8_C( 82) }, { UINT8_C(231), UINT8_C( 41), UINT8_C(174), UINT8_C( 17), UINT8_C( 40), UINT8_C( 10), UINT8_C(162), UINT8_C( 73), UINT8_C(205), UINT8_C(221), UINT8_C(161), UINT8_C( 46), UINT8_C( 43), UINT8_C( 60), UINT8_C(106), UINT8_C( 83) }, { UINT8_C( 14), UINT8_C(175), UINT8_C( 64), UINT8_C(199), UINT8_C(135), UINT8_C(237), UINT8_C(165), UINT8_C( 73), UINT8_C( 40), UINT8_C( 38), UINT8_C(254), UINT8_C( 10), UINT8_C(101), UINT8_C(115), UINT8_C(237), UINT8_C(165) } }, { { UINT8_C(194), UINT8_C(252), UINT8_C( 9), UINT8_C( 33), UINT8_C(223), UINT8_C( 13), UINT8_C( 33), UINT8_C( 58), UINT8_C( 86), UINT8_C(126), UINT8_C( 22), UINT8_C(144), UINT8_C(182), UINT8_C(154), UINT8_C(227), UINT8_C(157) }, { UINT8_C(195), UINT8_C(145), UINT8_C(174), UINT8_C(236), UINT8_C(155), UINT8_C( 80), UINT8_C( 53), UINT8_C(104), UINT8_C( 45), UINT8_C(214), UINT8_C(150), UINT8_C( 89), UINT8_C( 18), UINT8_C( 0), UINT8_C(172), UINT8_C(212) }, { UINT8_C(133), UINT8_C(141), UINT8_C(183), UINT8_C( 13), UINT8_C(122), UINT8_C( 93), UINT8_C( 86), UINT8_C(162), UINT8_C(131), UINT8_C( 84), UINT8_C(172), UINT8_C(233), UINT8_C(200), UINT8_C(154), UINT8_C(143), UINT8_C(113) } }, { { UINT8_C(252), UINT8_C(181), UINT8_C(245), UINT8_C(219), UINT8_C(194), UINT8_C( 23), UINT8_C( 21), UINT8_C( 25), UINT8_C(149), UINT8_C( 44), UINT8_C(169), UINT8_C( 75), UINT8_C(198), UINT8_C(140), UINT8_C(232), UINT8_C(137) }, { UINT8_C( 29), UINT8_C(150), UINT8_C(117), UINT8_C(184), UINT8_C(231), UINT8_C(170), UINT8_C( 32), UINT8_C( 20), UINT8_C(129), UINT8_C(182), UINT8_C(109), UINT8_C(147), UINT8_C(182), UINT8_C( 25), UINT8_C(104), UINT8_C(178) }, { UINT8_C( 25), UINT8_C( 75), UINT8_C(106), UINT8_C(147), UINT8_C(169), UINT8_C(193), UINT8_C( 53), UINT8_C( 45), UINT8_C( 22), UINT8_C(226), UINT8_C( 22), UINT8_C(222), UINT8_C(124), UINT8_C(165), UINT8_C( 80), UINT8_C( 59) } }, { { UINT8_C(207), UINT8_C( 93), UINT8_C(141), UINT8_C(145), UINT8_C(116), UINT8_C(163), UINT8_C(170), UINT8_C( 10), UINT8_C(207), UINT8_C( 84), UINT8_C( 85), UINT8_C(149), UINT8_C(224), UINT8_C( 62), UINT8_C( 30), UINT8_C(254) }, { UINT8_C(212), UINT8_C(148), UINT8_C(182), UINT8_C(187), UINT8_C( 62), UINT8_C(215), UINT8_C(208), UINT8_C(191), UINT8_C(141), UINT8_C( 61), UINT8_C( 83), UINT8_C( 68), UINT8_C( 87), UINT8_C(187), UINT8_C(246), UINT8_C( 38) }, { UINT8_C(163), UINT8_C(241), UINT8_C( 67), UINT8_C( 76), UINT8_C(178), UINT8_C(122), UINT8_C(122), UINT8_C(201), UINT8_C( 92), UINT8_C(145), UINT8_C(168), UINT8_C(217), UINT8_C( 55), UINT8_C(249), UINT8_C( 20), UINT8_C( 36) } }, { { UINT8_C( 24), UINT8_C(132), UINT8_C(183), UINT8_C(141), UINT8_C( 39), UINT8_C( 98), UINT8_C(151), UINT8_C(246), UINT8_C(182), UINT8_C(236), UINT8_C(139), UINT8_C(150), UINT8_C( 42), UINT8_C(169), UINT8_C(148), UINT8_MAX }, { UINT8_C( 61), UINT8_C( 75), UINT8_C(186), UINT8_C(124), UINT8_C( 34), UINT8_C(138), UINT8_C( 59), UINT8_C(175), UINT8_C(200), UINT8_C(142), UINT8_C(243), UINT8_C( 31), UINT8_C( 73), UINT8_C(234), UINT8_C( 69), UINT8_C( 98) }, { UINT8_C( 85), UINT8_C(207), UINT8_C(113), UINT8_C( 9), UINT8_C( 73), UINT8_C(236), UINT8_C(210), UINT8_C(165), UINT8_C(126), UINT8_C(122), UINT8_C(126), UINT8_C(181), UINT8_C(115), UINT8_C(147), UINT8_C(217), UINT8_C( 97) } }, { { UINT8_C(110), UINT8_C(252), UINT8_C(239), UINT8_C(149), UINT8_C( 94), UINT8_C(134), UINT8_C(139), UINT8_C( 20), UINT8_C(114), UINT8_C( 22), UINT8_C(171), UINT8_C(157), UINT8_C(191), UINT8_C( 63), UINT8_C(156), UINT8_C(253) }, { UINT8_C(138), UINT8_C( 86), UINT8_C(121), UINT8_C(172), UINT8_C(225), UINT8_C(180), UINT8_C( 92), UINT8_C(169), UINT8_C( 67), UINT8_C( 79), UINT8_C(200), UINT8_C(140), UINT8_C( 57), UINT8_C( 13), UINT8_C(238), UINT8_C(167) }, { UINT8_C(248), UINT8_C( 82), UINT8_C(104), UINT8_C( 65), UINT8_C( 63), UINT8_C( 58), UINT8_C(231), UINT8_C(189), UINT8_C(181), UINT8_C(101), UINT8_C(115), UINT8_C( 41), UINT8_C(248), UINT8_C( 76), UINT8_C(138), UINT8_C(164) } }, { { UINT8_C( 9), UINT8_C(221), UINT8_C( 60), UINT8_C(104), UINT8_C( 99), UINT8_C(199), UINT8_C(124), UINT8_C(214), UINT8_C(221), UINT8_C( 39), UINT8_C(115), UINT8_C(157), UINT8_C(103), UINT8_C( 15), UINT8_C(154), UINT8_C(241) }, { UINT8_C(101), UINT8_C( 19), UINT8_C(158), UINT8_C( 70), UINT8_C(199), UINT8_C(250), UINT8_C(239), UINT8_C( 10), UINT8_C( 73), UINT8_C(183), UINT8_C(151), UINT8_C(131), UINT8_C(196), UINT8_C(133), UINT8_C( 42), UINT8_C(206) }, { UINT8_C(110), UINT8_C(240), UINT8_C(218), UINT8_C(174), UINT8_C( 42), UINT8_C(193), UINT8_C(107), UINT8_C(224), UINT8_C( 38), UINT8_C(222), UINT8_C( 10), UINT8_C( 32), UINT8_C( 43), UINT8_C(148), UINT8_C(196), UINT8_C(191) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vaddq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; } static int test_simde_vaddq_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(26467), UINT16_C(50742), UINT16_C(45614), UINT16_C( 3228), UINT16_C( 4058), UINT16_C(16809), UINT16_C(17182), UINT16_C(33842) }, { UINT16_C(53334), UINT16_C( 7626), UINT16_C(47818), UINT16_C( 5160), UINT16_C(49009), UINT16_C(13975), UINT16_C(49476), UINT16_C(42756) }, { UINT16_C(14265), UINT16_C(58368), UINT16_C(27896), UINT16_C( 8388), UINT16_C(53067), UINT16_C(30784), UINT16_C( 1122), UINT16_C(11062) } }, { { UINT16_C(14888), UINT16_C(22382), UINT16_C( 2796), UINT16_C(50787), UINT16_C( 3098), UINT16_C(14343), UINT16_C(14927), UINT16_C(42428) }, { UINT16_C(34570), UINT16_C(54722), UINT16_C(59969), UINT16_C(45801), UINT16_C(32937), UINT16_C(61160), UINT16_C(60481), UINT16_C(27285) }, { UINT16_C(49458), UINT16_C(11568), UINT16_C(62765), UINT16_C(31052), UINT16_C(36035), UINT16_C( 9967), UINT16_C( 9872), UINT16_C( 4177) } }, { { UINT16_C( 806), UINT16_C( 5057), UINT16_C( 9230), UINT16_C(10457), UINT16_C(57648), UINT16_C(32608), UINT16_C( 7451), UINT16_C( 9508) }, { UINT16_C(59044), UINT16_C(58874), UINT16_C(58321), UINT16_C(31383), UINT16_C(32867), UINT16_C(42344), UINT16_C(65132), UINT16_C(37647) }, { UINT16_C(59850), UINT16_C(63931), UINT16_C( 2015), UINT16_C(41840), UINT16_C(24979), UINT16_C( 9416), UINT16_C( 7047), UINT16_C(47155) } }, { { UINT16_C(53249), UINT16_C( 4006), UINT16_C(32756), UINT16_C( 9271), UINT16_C(39008), UINT16_C(31651), UINT16_C(51125), UINT16_C(22945) }, { UINT16_C(39853), UINT16_C(32318), UINT16_C(54655), UINT16_C(58105), UINT16_C(24917), UINT16_C(49799), UINT16_C(38495), UINT16_C(24917) }, { UINT16_C(27566), UINT16_C(36324), UINT16_C(21875), UINT16_C( 1840), UINT16_C(63925), UINT16_C(15914), UINT16_C(24084), UINT16_C(47862) } }, { { UINT16_C(64358), UINT16_C(23152), UINT16_C(43130), UINT16_C(56190), UINT16_C( 8512), UINT16_C(62806), UINT16_C(63464), UINT16_C(38478) }, { UINT16_C(35987), UINT16_C( 4628), UINT16_C( 3425), UINT16_C(47092), UINT16_C(31855), UINT16_C(52857), UINT16_C(52754), UINT16_C(31023) }, { UINT16_C(34809), UINT16_C(27780), UINT16_C(46555), UINT16_C(37746), UINT16_C(40367), UINT16_C(50127), UINT16_C(50682), UINT16_C( 3965) } }, { { UINT16_C(41161), UINT16_C(17363), UINT16_C(21064), UINT16_C(34846), UINT16_C(30067), UINT16_C(23677), UINT16_C(52076), UINT16_C(65522) }, { UINT16_C( 1623), UINT16_C(47121), UINT16_C( 1556), UINT16_C(33647), UINT16_C(59522), UINT16_C(37969), UINT16_C(33206), UINT16_C(32525) }, { UINT16_C(42784), UINT16_C(64484), UINT16_C(22620), UINT16_C( 2957), UINT16_C(24053), UINT16_C(61646), UINT16_C(19746), UINT16_C(32511) } }, { { UINT16_C(57633), UINT16_C(27075), UINT16_C(57651), UINT16_C(42737), UINT16_C(28246), UINT16_C(49922), UINT16_C(62521), UINT16_C(37058) }, { UINT16_C(54523), UINT16_C( 3912), UINT16_C(47322), UINT16_C(23698), UINT16_C(58272), UINT16_C(22512), UINT16_C(65124), UINT16_C(34262) }, { UINT16_C(46620), UINT16_C(30987), UINT16_C(39437), UINT16_C( 899), UINT16_C(20982), UINT16_C( 6898), UINT16_C(62109), UINT16_C( 5784) } }, { { UINT16_C(39391), UINT16_C( 4846), UINT16_C(57211), UINT16_C(53688), UINT16_C(47949), UINT16_C(34452), UINT16_C(22447), UINT16_C(43542) }, { UINT16_C(24363), UINT16_C( 1465), UINT16_C(19223), UINT16_C(46945), UINT16_C(20783), UINT16_C(37646), UINT16_C(58703), UINT16_C(11801) }, { UINT16_C(63754), UINT16_C( 6311), UINT16_C(10898), UINT16_C(35097), UINT16_C( 3196), UINT16_C( 6562), UINT16_C(15614), UINT16_C(55343) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vaddq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_vaddq_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(4181723006), UINT32_C( 885783015), UINT32_C(1673224116), UINT32_C(3775844790) }, { UINT32_C(1206306608), UINT32_C(1124026131), UINT32_C(3906276761), UINT32_C(1897393906) }, { UINT32_C(1093062318), UINT32_C(2009809146), UINT32_C(1284533581), UINT32_C(1378271400) } }, { { UINT32_C(3714734070), UINT32_C( 68236624), UINT32_C(1265159317), UINT32_C(3459085982) }, { UINT32_C(1343623997), UINT32_C(4103214427), UINT32_C( 366766114), UINT32_C(1283912534) }, { UINT32_C( 763390771), UINT32_C(4171451051), UINT32_C(1631925431), UINT32_C( 448031220) } }, { { UINT32_C(2603216971), UINT32_C(3147840294), UINT32_C(2768635911), UINT32_C(3144954750) }, { UINT32_C(2718730823), UINT32_C(3247873695), UINT32_C(1574334982), UINT32_C(2980666470) }, { UINT32_C(1026980498), UINT32_C(2100746693), UINT32_C( 48003597), UINT32_C(1830653924) } }, { { UINT32_C(1934414669), UINT32_C( 355396622), UINT32_C(1924871412), UINT32_C(2939039592) }, { UINT32_C(1481718457), UINT32_C(3743016920), UINT32_C(3208441945), UINT32_C(2574312780) }, { UINT32_C(3416133126), UINT32_C(4098413542), UINT32_C( 838346061), UINT32_C(1218385076) } }, { { UINT32_C(3322723768), UINT32_C(2665233065), UINT32_C(3608188783), UINT32_C(2139504326) }, { UINT32_C(1373099896), UINT32_C( 389083326), UINT32_C( 769092832), UINT32_C( 180766545) }, { UINT32_C( 400856368), UINT32_C(3054316391), UINT32_C( 82314319), UINT32_C(2320270871) } }, { { UINT32_C(2932921092), UINT32_C(2085399565), UINT32_C( 156458051), UINT32_C( 327735707) }, { UINT32_C(1852071856), UINT32_C( 814126160), UINT32_C(1381850368), UINT32_C(2841388196) }, { UINT32_C( 490025652), UINT32_C(2899525725), UINT32_C(1538308419), UINT32_C(3169123903) } }, { { UINT32_C( 72822007), UINT32_C( 478258137), UINT32_C(2586236159), UINT32_C(1588506286) }, { UINT32_C(1590497806), UINT32_C(2811122599), UINT32_C(1425665200), UINT32_C( 134042896) }, { UINT32_C(1663319813), UINT32_C(3289380736), UINT32_C(4011901359), UINT32_C(1722549182) } }, { { UINT32_C(1527469186), UINT32_C(4151807223), UINT32_C( 261201249), UINT32_C(1517109068) }, { UINT32_C(4189600338), UINT32_C(1033913997), UINT32_C(1116903986), UINT32_C(1900646383) }, { UINT32_C(1422102228), UINT32_C( 890753924), UINT32_C(1378105235), UINT32_C(3417755451) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vaddq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_vaddq_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; uint64_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C( 4814986688458413540), UINT64_C(17692320368709166305) }, { UINT64_C(13313924206382956537), UINT64_C(14594553297499805225) }, { UINT64_C(18128910894841370077), UINT64_C(13840129592499419914) } }, { { UINT64_C( 8988473036196107929), UINT64_C(14372448548797616092) }, { UINT64_C( 1994835094183764848), UINT64_C( 9807079594316311145) }, { UINT64_C(10983308130379872777), UINT64_C( 5732784069404375621) } }, { { UINT64_C( 1381409056111181588), UINT64_C( 8531232789823149263) }, { UINT64_C( 3492853524787409771), UINT64_C( 9168371629288304069) }, { UINT64_C( 4874262580898591359), UINT64_C(17699604419111453332) } }, { { UINT64_C(14961619866231914328), UINT64_C(12081417531889038382) }, { UINT64_C(17832984800169039493), UINT64_C(17011757792814168874) }, { UINT64_C(14347860592691402205), UINT64_C(10646431250993655640) } }, { { UINT64_C(14696404044792677158), UINT64_C(13201361342813856081) }, { UINT64_C(16277837227336350142), UINT64_C(11455340474931777913) }, { UINT64_C(12527497198419475684), UINT64_C( 6209957744036082378) } }, { { UINT64_C(16372757905063310543), UINT64_C(11169953870697739545) }, { UINT64_C(18335686276445570782), UINT64_C( 5051355942773124099) }, { UINT64_C(16261700107799329709), UINT64_C(16221309813470863644) } }, { { UINT64_C(12749384425783090810), UINT64_C(16328080845921083167) }, { UINT64_C(10919518894275568390), UINT64_C( 1555480866372949464) }, { UINT64_C( 5222159246349107584), UINT64_C(17883561712294032631) } }, { { UINT64_C( 5859481300311754763), UINT64_C( 6004676323984625006) }, { UINT64_C(10246929086205053448), UINT64_C( 9200137657917523965) }, { UINT64_C(16106410386516808211), UINT64_C(15204813981902148971) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_uint64x2_t r = simde_vaddq_u64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vaddd_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vaddd_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vadd_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vadd_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vadd_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vadd_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vadd_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vadd_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vadd_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vadd_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vadd_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vadd_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vaddq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vaddq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vaddq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vaddq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vaddq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vaddq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vaddq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vaddq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vaddq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vaddq_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/addl.c000066400000000000000000000411341400333146700163720ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN addl #include "test-neon.h" #include static int test_simde_vaddl_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int8_t b[8]; int16_t r[8]; } test_vec[] = { { { -INT8_C( 101), INT8_C( 70), INT8_C( 78), INT8_C( 35), INT8_C( 52), -INT8_C( 115), -INT8_C( 75), INT8_C( 9) }, { -INT8_C( 7), -INT8_C( 86), INT8_C( 92), INT8_C( 121), INT8_C( 108), INT8_C( 119), INT8_C( 71), -INT8_C( 69) }, { -INT16_C( 108), -INT16_C( 16), INT16_C( 170), INT16_C( 156), INT16_C( 160), INT16_C( 4), -INT16_C( 4), -INT16_C( 60) } }, { { INT8_C( 84), -INT8_C( 56), INT8_C( 30), INT8_C( 53), INT8_C( 70), INT8_C( 95), INT8_C( 116), INT8_C( 23) }, { INT8_C( 117), INT8_C( 73), INT8_C( 119), -INT8_C( 6), -INT8_C( 59), -INT8_C( 46), INT8_C( 109), INT8_C( 96) }, { INT16_C( 201), INT16_C( 17), INT16_C( 149), INT16_C( 47), INT16_C( 11), INT16_C( 49), INT16_C( 225), INT16_C( 119) } }, { { INT8_C( 24), -INT8_C( 69), -INT8_C( 124), INT8_C( 76), INT8_C( 72), INT8_C( 57), INT8_C( 86), INT8_C( 65) }, { -INT8_C( 28), -INT8_C( 78), -INT8_C( 70), INT8_C( 80), INT8_C( 42), INT8_C( 2), INT8_C( 11), INT8_C( 126) }, { -INT16_C( 4), -INT16_C( 147), -INT16_C( 194), INT16_C( 156), INT16_C( 114), INT16_C( 59), INT16_C( 97), INT16_C( 191) } }, { { -INT8_C( 54), INT8_C( 42), -INT8_C( 77), INT8_C( 16), -INT8_C( 119), INT8_C( 40), INT8_C( 39), -INT8_C( 2) }, { INT8_C( 113), -INT8_C( 97), -INT8_C( 8), INT8_C( 55), INT8_C( 113), INT8_C( 101), -INT8_C( 105), -INT8_C( 119) }, { INT16_C( 59), -INT16_C( 55), -INT16_C( 85), INT16_C( 71), -INT16_C( 6), INT16_C( 141), -INT16_C( 66), -INT16_C( 121) } }, { { INT8_C( 32), INT8_C( 27), -INT8_C( 42), INT8_C( 105), INT8_C( 85), INT8_C( 44), -INT8_C( 86), INT8_C( 57) }, { -INT8_C( 34), INT8_C( 101), -INT8_C( 119), INT8_C( 8), INT8_C( 103), -INT8_C( 108), -INT8_C( 122), INT8_C( 49) }, { -INT16_C( 2), INT16_C( 128), -INT16_C( 161), INT16_C( 113), INT16_C( 188), -INT16_C( 64), -INT16_C( 208), INT16_C( 106) } }, { { -INT8_C( 66), INT8_C( 58), INT8_C( 65), INT8_C( 71), INT8_C( 98), INT8_C( 105), INT8_C( 69), -INT8_C( 45) }, { INT8_C( 8), INT8_C( 62), INT8_C( 10), INT8_C( 121), -INT8_C( 93), -INT8_C( 94), INT8_C( 2), -INT8_C( 60) }, { -INT16_C( 58), INT16_C( 120), INT16_C( 75), INT16_C( 192), INT16_C( 5), INT16_C( 11), INT16_C( 71), -INT16_C( 105) } }, { { -INT8_C( 67), -INT8_C( 40), INT8_C( 45), INT8_C( 18), INT8_C( 4), -INT8_C( 41), INT8_C( 75), -INT8_C( 29) }, { INT8_C( 60), -INT8_C( 44), -INT8_C( 21), -INT8_C( 93), INT8_C( 105), INT8_C( 114), -INT8_C( 44), INT8_C( 39) }, { -INT16_C( 7), -INT16_C( 84), INT16_C( 24), -INT16_C( 75), INT16_C( 109), INT16_C( 73), INT16_C( 31), INT16_C( 10) } }, { { -INT8_C( 84), INT8_C( 22), INT8_C( 111), INT8_C( 14), INT8_MAX, -INT8_C( 76), -INT8_C( 31), -INT8_C( 121) }, { -INT8_C( 14), -INT8_C( 20), INT8_C( 0), -INT8_C( 106), -INT8_C( 114), INT8_C( 2), INT8_C( 90), INT8_C( 75) }, { -INT16_C( 98), INT16_C( 2), INT16_C( 111), -INT16_C( 92), INT16_C( 13), -INT16_C( 74), INT16_C( 59), -INT16_C( 46) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int16x8_t r = simde_vaddl_s8(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; } static int test_simde_vaddl_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int16_t b[4]; int32_t r[4]; } test_vec[] = { { { INT16_C( 6959), -INT16_C( 24020), INT16_C( 30648), INT16_C( 17726) }, { -INT16_C( 1768), -INT16_C( 3483), INT16_C( 13258), -INT16_C( 4684) }, { INT32_C( 5191), -INT32_C( 27503), INT32_C( 43906), INT32_C( 13042) } }, { { -INT16_C( 22668), INT16_C( 32106), -INT16_C( 15225), -INT16_C( 12164) }, { -INT16_C( 18961), -INT16_C( 23688), -INT16_C( 23438), -INT16_C( 24172) }, { -INT32_C( 41629), INT32_C( 8418), -INT32_C( 38663), -INT32_C( 36336) } }, { { -INT16_C( 16193), INT16_C( 30532), -INT16_C( 32200), INT16_C( 20669) }, { INT16_C( 8828), INT16_C( 17987), -INT16_C( 2219), -INT16_C( 14029) }, { -INT32_C( 7365), INT32_C( 48519), -INT32_C( 34419), INT32_C( 6640) } }, { { -INT16_C( 25186), INT16_C( 9543), -INT16_C( 15519), INT16_C( 20981) }, { INT16_C( 28280), -INT16_C( 5388), -INT16_C( 30702), -INT16_C( 11893) }, { INT32_C( 3094), INT32_C( 4155), -INT32_C( 46221), INT32_C( 9088) } }, { { -INT16_C( 12471), -INT16_C( 32440), INT16_C( 1362), -INT16_C( 12591) }, { INT16_C( 5160), INT16_C( 32020), INT16_C( 18188), -INT16_C( 21945) }, { -INT32_C( 7311), -INT32_C( 420), INT32_C( 19550), -INT32_C( 34536) } }, { { -INT16_C( 28956), INT16_C( 18128), -INT16_C( 15023), -INT16_C( 13929) }, { -INT16_C( 29901), INT16_C( 17843), INT16_C( 15891), INT16_C( 23574) }, { -INT32_C( 58857), INT32_C( 35971), INT32_C( 868), INT32_C( 9645) } }, { { INT16_C( 24334), INT16_C( 24797), -INT16_C( 20636), -INT16_C( 29650) }, { INT16_C( 17091), -INT16_C( 12534), INT16_C( 20873), INT16_C( 28026) }, { INT32_C( 41425), INT32_C( 12263), INT32_C( 237), -INT32_C( 1624) } }, { { INT16_C( 19167), INT16_C( 12467), INT16_C( 18959), INT16_C( 17401) }, { -INT16_C( 21291), -INT16_C( 5752), -INT16_C( 24598), -INT16_C( 1979) }, { -INT32_C( 2124), INT32_C( 6715), -INT32_C( 5639), INT32_C( 15422) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int32x4_t r = simde_vaddl_s16(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; } static int test_simde_vaddl_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int32_t b[2]; int64_t r[2]; } test_vec[] = { { { INT32_C( 1494235381), -INT32_C( 1219392167) }, { INT32_C( 1829530619), INT32_C( 1470485446) }, { INT64_C( 3323766000), INT64_C( 251093279) } }, { { -INT32_C( 366091336), INT32_C( 1956650092) }, { INT32_C( 1470883480), -INT32_C( 2032977263) }, { INT64_C( 1104792144), -INT64_C( 76327171) } }, { { -INT32_C( 874519694), INT32_C( 1803694192) }, { INT32_C( 1658359452), INT32_C( 448364130) }, { INT64_C( 783839758), INT64_C( 2252058322) } }, { { -INT32_C( 855316895), -INT32_C( 1690196733) }, { INT32_C( 485682315), -INT32_C( 1717385689) }, { -INT64_C( 369634580), -INT64_C( 3407582422) } }, { { INT32_C( 442860201), INT32_C( 1317398450) }, { -INT32_C( 659530122), INT32_C( 1039296988) }, { -INT64_C( 216669921), INT64_C( 2356695438) } }, { { INT32_C( 1393292880), INT32_C( 669928603) }, { INT32_C( 1615061049), INT32_C( 1358554790) }, { INT64_C( 3008353929), INT64_C( 2028483393) } }, { { INT32_C( 443178600), -INT32_C( 1133908154) }, { INT32_C( 697571661), -INT32_C( 748190077) }, { INT64_C( 1140750261), -INT64_C( 1882098231) } }, { { INT32_C( 405172860), -INT32_C( 146860866) }, { -INT32_C( 1688763660), -INT32_C( 789884568) }, { -INT64_C( 1283590800), -INT64_C( 936745434) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int64x2_t r = simde_vaddl_s32(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; } static int test_simde_vaddl_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; uint8_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT8_C(240), UINT8_C(249), UINT8_C( 84), UINT8_C(134), UINT8_C( 92), UINT8_C( 31), UINT8_C(197), UINT8_C( 42) }, { UINT8_C(120), UINT8_C(138), UINT8_C(245), UINT8_C( 38), UINT8_C(147), UINT8_C(110), UINT8_C(210), UINT8_C(229) }, { UINT16_C( 360), UINT16_C( 387), UINT16_C( 329), UINT16_C( 172), UINT16_C( 239), UINT16_C( 141), UINT16_C( 407), UINT16_C( 271) } }, { { UINT8_C(239), UINT8_C(203), UINT8_C( 21), UINT8_C( 81), UINT8_C(116), UINT8_C( 90), UINT8_C( 56), UINT8_C(221) }, { UINT8_C( 68), UINT8_C(222), UINT8_C( 15), UINT8_C(211), UINT8_C(100), UINT8_C( 8), UINT8_C(220), UINT8_C( 85) }, { UINT16_C( 307), UINT16_C( 425), UINT16_C( 36), UINT16_C( 292), UINT16_C( 216), UINT16_C( 98), UINT16_C( 276), UINT16_C( 306) } }, { { UINT8_C( 2), UINT8_C( 48), UINT8_C(219), UINT8_C( 94), UINT8_C( 79), UINT8_C(161), UINT8_C(137), UINT8_C(199) }, { UINT8_C( 43), UINT8_C(126), UINT8_C(237), UINT8_C(190), UINT8_C(237), UINT8_C(192), UINT8_C(163), UINT8_C(220) }, { UINT16_C( 45), UINT16_C( 174), UINT16_C( 456), UINT16_C( 284), UINT16_C( 316), UINT16_C( 353), UINT16_C( 300), UINT16_C( 419) } }, { { UINT8_C(139), UINT8_C(184), UINT8_C( 45), UINT8_MAX, UINT8_C( 18), UINT8_C(102), UINT8_C(220), UINT8_C( 86) }, { UINT8_C( 68), UINT8_C(236), UINT8_C( 41), UINT8_C(168), UINT8_C(244), UINT8_C( 5), UINT8_C(253), UINT8_C(246) }, { UINT16_C( 207), UINT16_C( 420), UINT16_C( 86), UINT16_C( 423), UINT16_C( 262), UINT16_C( 107), UINT16_C( 473), UINT16_C( 332) } }, { { UINT8_C( 54), UINT8_C(217), UINT8_C( 85), UINT8_C(133), UINT8_C(122), UINT8_C(222), UINT8_C( 77), UINT8_C(165) }, { UINT8_C( 92), UINT8_C( 58), UINT8_C(100), UINT8_C( 73), UINT8_C(250), UINT8_C( 7), UINT8_C( 38), UINT8_C(133) }, { UINT16_C( 146), UINT16_C( 275), UINT16_C( 185), UINT16_C( 206), UINT16_C( 372), UINT16_C( 229), UINT16_C( 115), UINT16_C( 298) } }, { { UINT8_C(192), UINT8_C( 83), UINT8_C(132), UINT8_C(210), UINT8_C(185), UINT8_C( 97), UINT8_C( 41), UINT8_C(253) }, { UINT8_C( 77), UINT8_C( 82), UINT8_C(166), UINT8_C( 65), UINT8_C( 88), UINT8_C(163), UINT8_C( 56), UINT8_C(142) }, { UINT16_C( 269), UINT16_C( 165), UINT16_C( 298), UINT16_C( 275), UINT16_C( 273), UINT16_C( 260), UINT16_C( 97), UINT16_C( 395) } }, { { UINT8_C(124), UINT8_C(141), UINT8_C( 19), UINT8_C(246), UINT8_C(107), UINT8_C( 96), UINT8_C(156), UINT8_C(199) }, { UINT8_C(155), UINT8_C( 0), UINT8_C( 17), UINT8_C(149), UINT8_C( 7), UINT8_C( 55), UINT8_C( 27), UINT8_C(199) }, { UINT16_C( 279), UINT16_C( 141), UINT16_C( 36), UINT16_C( 395), UINT16_C( 114), UINT16_C( 151), UINT16_C( 183), UINT16_C( 398) } }, { { UINT8_C(138), UINT8_C(159), UINT8_C(154), UINT8_C( 68), UINT8_C( 0), UINT8_C(195), UINT8_C( 65), UINT8_C( 77) }, { UINT8_C( 21), UINT8_C(231), UINT8_C(143), UINT8_C(109), UINT8_C(139), UINT8_C(199), UINT8_C(251), UINT8_C( 7) }, { UINT16_C( 159), UINT16_C( 390), UINT16_C( 297), UINT16_C( 177), UINT16_C( 139), UINT16_C( 394), UINT16_C( 316), UINT16_C( 84) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint16x8_t r = simde_vaddl_u8(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_vaddl_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint16_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT16_C(57527), UINT16_C(64774), UINT16_C(46352), UINT16_C(39096) }, { UINT16_C(39401), UINT16_C(22114), UINT16_C(21871), UINT16_C(51426) }, { UINT32_C( 96928), UINT32_C( 86888), UINT32_C( 68223), UINT32_C( 90522) } }, { { UINT16_C( 1994), UINT16_C(58923), UINT16_C(31609), UINT16_C(59535) }, { UINT16_C(58560), UINT16_C( 5121), UINT16_C(47672), UINT16_C(61477) }, { UINT32_C( 60554), UINT32_C( 64044), UINT32_C( 79281), UINT32_C( 121012) } }, { { UINT16_C(11418), UINT16_C(43757), UINT16_C(42721), UINT16_C(52034) }, { UINT16_C(42047), UINT16_C(44833), UINT16_C( 1017), UINT16_C(50295) }, { UINT32_C( 53465), UINT32_C( 88590), UINT32_C( 43738), UINT32_C( 102329) } }, { { UINT16_C(41482), UINT16_C(33706), UINT16_C(14621), UINT16_C(56684) }, { UINT16_C(27933), UINT16_C(22002), UINT16_C( 5927), UINT16_C(49477) }, { UINT32_C( 69415), UINT32_C( 55708), UINT32_C( 20548), UINT32_C( 106161) } }, { { UINT16_C(13123), UINT16_C( 9579), UINT16_C(44505), UINT16_C( 6384) }, { UINT16_C( 4434), UINT16_C(19399), UINT16_C(16149), UINT16_C( 7951) }, { UINT32_C( 17557), UINT32_C( 28978), UINT32_C( 60654), UINT32_C( 14335) } }, { { UINT16_C(47585), UINT16_C(65443), UINT16_C( 4082), UINT16_C( 4060) }, { UINT16_C(52860), UINT16_C(41829), UINT16_C(43750), UINT16_C(10596) }, { UINT32_C( 100445), UINT32_C( 107272), UINT32_C( 47832), UINT32_C( 14656) } }, { { UINT16_C(53213), UINT16_C(46670), UINT16_C(15996), UINT16_C(52943) }, { UINT16_C(38480), UINT16_C(25882), UINT16_C(10709), UINT16_C(46980) }, { UINT32_C( 91693), UINT32_C( 72552), UINT32_C( 26705), UINT32_C( 99923) } }, { { UINT16_C(10211), UINT16_C(54710), UINT16_C(37430), UINT16_C(45797) }, { UINT16_C(19041), UINT16_C(18261), UINT16_C(47604), UINT16_C(53872) }, { UINT32_C( 29252), UINT32_C( 72971), UINT32_C( 85034), UINT32_C( 99669) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint32x4_t r = simde_vaddl_u16(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_vaddl_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint32_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT32_C(2652809081), UINT32_C(1638988185) }, { UINT32_C(3786702705), UINT32_C(4014799475) }, { UINT64_C( 6439511786), UINT64_C( 5653787660) } }, { { UINT32_C( 549286435), UINT32_C(3132943308) }, { UINT32_C(4193740714), UINT32_C( 171232401) }, { UINT64_C( 4743027149), UINT64_C( 3304175709) } }, { { UINT32_C( 229134963), UINT32_C(3161348171) }, { UINT32_C(1268589271), UINT32_C(1060825628) }, { UINT64_C( 1497724234), UINT64_C( 4222173799) } }, { { UINT32_C( 677443676), UINT32_C(2397183203) }, { UINT32_C( 344447363), UINT32_C( 421444518) }, { UINT64_C( 1021891039), UINT64_C( 2818627721) } }, { { UINT32_C(1478936333), UINT32_C(4145386783) }, { UINT32_C(3561140919), UINT32_C(4162026652) }, { UINT64_C( 5040077252), UINT64_C( 8307413435) } }, { { UINT32_C(1478587252), UINT32_C( 317064079) }, { UINT32_C(2200333789), UINT32_C( 916210985) }, { UINT64_C( 3678921041), UINT64_C( 1233275064) } }, { { UINT32_C( 730841868), UINT32_C( 253928536) }, { UINT32_C(4091765846), UINT32_C(1441527777) }, { UINT64_C( 4822607714), UINT64_C( 1695456313) } }, { { UINT32_C(4205644906), UINT32_C(3977024272) }, { UINT32_C( 711996161), UINT32_C(2220887160) }, { UINT64_C( 4917641067), UINT64_C( 6197911432) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint64x2_t r = simde_vaddl_u32(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vaddl_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vaddl_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vaddl_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vaddl_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vaddl_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vaddl_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/addl_high.c000066400000000000000000000603241400333146700173730ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN addl_high #include "test-neon.h" #include static int test_simde_vaddl_high_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int8_t b[16]; int16_t r[8]; } test_vec[] = { { { INT8_C( 28), INT8_C( 57), INT8_C( 116), -INT8_C( 26), -INT8_C( 59), INT8_MIN, INT8_C( 27), INT8_C( 25), INT8_C( 118), -INT8_C( 21), -INT8_C( 106), INT8_C( 96), INT8_C( 91), -INT8_C( 73), -INT8_C( 21), -INT8_C( 93) }, { INT8_C( 19), -INT8_C( 18), -INT8_C( 14), INT8_C( 50), -INT8_C( 30), INT8_C( 108), -INT8_C( 16), INT8_C( 6), -INT8_C( 106), INT8_C( 51), -INT8_C( 85), INT8_C( 60), INT8_C( 49), -INT8_C( 12), -INT8_C( 61), INT8_C( 78) }, { INT16_C( 12), INT16_C( 30), -INT16_C( 191), INT16_C( 156), INT16_C( 140), -INT16_C( 85), -INT16_C( 82), -INT16_C( 15) } }, { { INT8_C( 46), INT8_C( 55), INT8_C( 52), -INT8_C( 13), -INT8_C( 72), INT8_C( 79), INT8_C( 13), INT8_C( 46), INT8_C( 59), -INT8_C( 93), -INT8_C( 114), -INT8_C( 106), INT8_C( 91), INT8_C( 122), INT8_C( 57), INT8_C( 110) }, { INT8_C( 104), INT8_C( 44), -INT8_C( 96), INT8_C( 74), -INT8_C( 104), -INT8_C( 111), INT8_C( 80), INT8_C( 47), -INT8_C( 60), -INT8_C( 5), INT8_C( 107), -INT8_C( 10), -INT8_C( 16), INT8_C( 46), INT8_C( 68), INT8_C( 30) }, { -INT16_C( 1), -INT16_C( 98), -INT16_C( 7), -INT16_C( 116), INT16_C( 75), INT16_C( 168), INT16_C( 125), INT16_C( 140) } }, { { INT8_C( 102), INT8_C( 120), INT8_C( 17), INT8_C( 30), -INT8_C( 57), INT8_C( 30), INT8_C( 76), INT8_C( 2), -INT8_C( 62), -INT8_C( 37), -INT8_C( 104), INT8_C( 29), INT8_C( 85), -INT8_C( 46), -INT8_C( 117), -INT8_C( 67) }, { -INT8_C( 2), INT8_C( 44), INT8_C( 7), -INT8_C( 106), -INT8_C( 67), INT8_C( 87), -INT8_C( 59), -INT8_C( 127), INT8_C( 83), INT8_C( 49), INT8_C( 119), INT8_C( 67), INT8_C( 95), -INT8_C( 69), INT8_C( 97), -INT8_C( 59) }, { INT16_C( 21), INT16_C( 12), INT16_C( 15), INT16_C( 96), INT16_C( 180), -INT16_C( 115), -INT16_C( 20), -INT16_C( 126) } }, { { INT8_C( 51), INT8_C( 114), -INT8_C( 29), -INT8_C( 5), -INT8_C( 111), INT8_C( 48), -INT8_C( 3), INT8_C( 83), INT8_C( 11), -INT8_C( 106), INT8_C( 112), INT8_C( 96), INT8_C( 104), -INT8_C( 5), INT8_C( 29), INT8_C( 102) }, { INT8_C( 39), INT8_C( 36), -INT8_C( 4), -INT8_C( 28), INT8_C( 123), -INT8_C( 62), INT8_C( 102), -INT8_C( 50), -INT8_C( 13), -INT8_C( 35), INT8_C( 17), INT8_C( 82), -INT8_C( 103), INT8_C( 114), INT8_C( 24), -INT8_C( 52) }, { -INT16_C( 2), -INT16_C( 141), INT16_C( 129), INT16_C( 178), INT16_C( 1), INT16_C( 109), INT16_C( 53), INT16_C( 50) } }, { { -INT8_C( 27), -INT8_C( 5), -INT8_C( 57), INT8_C( 118), INT8_C( 43), -INT8_C( 59), -INT8_C( 55), INT8_C( 54), INT8_C( 91), INT8_C( 57), -INT8_C( 106), -INT8_C( 61), INT8_C( 52), -INT8_C( 77), INT8_C( 41), INT8_C( 92) }, { -INT8_C( 41), INT8_C( 37), INT8_C( 64), INT8_C( 83), -INT8_C( 25), -INT8_C( 90), INT8_C( 33), -INT8_C( 38), -INT8_C( 124), INT8_C( 51), INT8_C( 45), INT8_C( 29), -INT8_C( 91), INT8_C( 69), -INT8_C( 23), -INT8_C( 118) }, { -INT16_C( 33), INT16_C( 108), -INT16_C( 61), -INT16_C( 32), -INT16_C( 39), -INT16_C( 8), INT16_C( 18), -INT16_C( 26) } }, { { INT8_C( 64), -INT8_C( 79), INT8_C( 0), INT8_C( 108), INT8_C( 118), -INT8_C( 55), -INT8_C( 94), -INT8_C( 47), INT8_C( 2), INT8_C( 57), -INT8_C( 108), INT8_C( 55), -INT8_C( 20), -INT8_C( 67), -INT8_C( 109), -INT8_C( 60) }, { -INT8_C( 30), -INT8_C( 45), INT8_C( 23), -INT8_C( 54), INT8_C( 122), INT8_C( 56), -INT8_C( 92), -INT8_C( 2), INT8_C( 107), -INT8_C( 47), INT8_C( 27), INT8_C( 17), INT8_C( 22), INT8_C( 4), -INT8_C( 101), INT8_C( 87) }, { INT16_C( 109), INT16_C( 10), -INT16_C( 81), INT16_C( 72), INT16_C( 2), -INT16_C( 63), -INT16_C( 210), INT16_C( 27) } }, { { -INT8_C( 75), -INT8_C( 100), -INT8_C( 61), INT8_C( 43), INT8_C( 101), INT8_C( 101), -INT8_C( 4), INT8_C( 104), -INT8_C( 98), -INT8_C( 112), -INT8_C( 97), -INT8_C( 117), INT8_C( 77), INT8_C( 50), INT8_C( 79), INT8_C( 48) }, { INT8_C( 5), INT8_C( 102), -INT8_C( 6), INT8_MAX, -INT8_C( 98), -INT8_C( 98), INT8_C( 125), INT8_C( 10), INT8_C( 112), -INT8_C( 104), INT8_C( 27), -INT8_C( 122), -INT8_C( 99), -INT8_C( 74), -INT8_C( 35), INT8_C( 82) }, { INT16_C( 14), -INT16_C( 216), -INT16_C( 70), -INT16_C( 239), -INT16_C( 22), -INT16_C( 24), INT16_C( 44), INT16_C( 130) } }, { { INT8_C( 82), -INT8_C( 96), INT8_C( 126), -INT8_C( 72), INT8_C( 6), INT8_C( 122), INT8_C( 32), -INT8_C( 92), INT8_C( 11), -INT8_C( 65), INT8_C( 47), INT8_C( 88), -INT8_C( 15), INT8_C( 126), -INT8_C( 120), -INT8_C( 10) }, { -INT8_C( 28), -INT8_C( 126), INT8_C( 118), -INT8_C( 125), INT8_C( 33), -INT8_C( 13), -INT8_C( 115), -INT8_C( 111), -INT8_C( 116), -INT8_C( 88), INT8_C( 23), INT8_C( 41), INT8_C( 94), -INT8_C( 11), INT8_C( 123), -INT8_C( 79) }, { -INT16_C( 105), -INT16_C( 153), INT16_C( 70), INT16_C( 129), INT16_C( 79), INT16_C( 115), INT16_C( 3), -INT16_C( 89) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int16x8_t r = simde_vaddl_high_s8(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; } static int test_simde_vaddl_high_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int16_t b[8]; int32_t r[4]; } test_vec[] = { { { INT16_C( 7144), -INT16_C( 16648), INT16_C( 24563), INT16_C( 13384), INT16_C( 30230), INT16_C( 9142), INT16_C( 26163), INT16_C( 11375) }, { -INT16_C( 20538), -INT16_C( 4117), INT16_C( 6207), INT16_C( 12335), -INT16_C( 11259), INT16_C( 30845), INT16_C( 23245), -INT16_C( 19018) }, { INT32_C( 18971), INT32_C( 39987), INT32_C( 49408), -INT32_C( 7643) } }, { { -INT16_C( 20618), INT16_C( 26995), -INT16_C( 17394), INT16_C( 9629), INT16_C( 21298), INT16_C( 25928), -INT16_C( 18502), -INT16_C( 32623) }, { INT16_C( 31847), -INT16_C( 22929), -INT16_C( 24939), -INT16_C( 25898), INT16_C( 21618), INT16_C( 16146), -INT16_C( 13906), INT16_C( 9461) }, { INT32_C( 42916), INT32_C( 42074), -INT32_C( 32408), -INT32_C( 23162) } }, { { INT16_C( 26744), -INT16_C( 31090), INT16_C( 11044), INT16_C( 22187), -INT16_C( 2945), INT16_C( 14779), INT16_C( 19883), INT16_C( 4793) }, { INT16_C( 10441), INT16_C( 24249), -INT16_C( 28730), INT16_C( 14584), INT16_C( 3043), -INT16_C( 28040), INT16_C( 28116), INT16_C( 19638) }, { INT32_C( 98), -INT32_C( 13261), INT32_C( 47999), INT32_C( 24431) } }, { { INT16_C( 17621), -INT16_C( 1326), INT16_C( 32368), -INT16_C( 4272), INT16_C( 3186), INT16_C( 7464), -INT16_C( 7847), INT16_C( 8752) }, { -INT16_C( 5879), -INT16_C( 12415), INT16_C( 31096), INT16_C( 23559), INT16_C( 32644), INT16_C( 22766), -INT16_C( 23316), -INT16_C( 15708) }, { INT32_C( 35830), INT32_C( 30230), -INT32_C( 31163), -INT32_C( 6956) } }, { { INT16_C( 30697), INT16_C( 22972), INT16_C( 3317), INT16_C( 26440), INT16_C( 28696), INT16_C( 29060), -INT16_C( 19375), INT16_C( 23188) }, { INT16_C( 5533), INT16_C( 5673), INT16_C( 12430), INT16_C( 4978), INT16_C( 24752), -INT16_C( 25493), INT16_C( 4100), -INT16_C( 4770) }, { INT32_C( 53448), INT32_C( 3567), -INT32_C( 15275), INT32_C( 18418) } }, { { INT16_C( 6791), INT16_C( 31814), -INT16_C( 29145), INT16_C( 16355), INT16_C( 26622), INT16_C( 20401), INT16_C( 17692), -INT16_C( 18007) }, { -INT16_C( 11686), -INT16_C( 5937), INT16_C( 16643), -INT16_C( 19461), INT16_C( 26529), -INT16_C( 22961), -INT16_C( 20873), -INT16_C( 365) }, { INT32_C( 53151), -INT32_C( 2560), -INT32_C( 3181), -INT32_C( 18372) } }, { { -INT16_C( 9528), -INT16_C( 4230), INT16_C( 23912), INT16_C( 26415), -INT16_C( 7996), -INT16_C( 8010), INT16_C( 24613), INT16_C( 32666) }, { INT16_C( 26930), INT16_C( 13671), INT16_C( 25515), INT16_C( 19688), INT16_C( 14538), INT16_C( 16882), -INT16_C( 31002), -INT16_C( 20929) }, { INT32_C( 6542), INT32_C( 8872), -INT32_C( 6389), INT32_C( 11737) } }, { { -INT16_C( 18080), -INT16_C( 14178), -INT16_C( 13034), -INT16_C( 9681), -INT16_C( 6483), -INT16_C( 11589), INT16_C( 21830), INT16_C( 30801) }, { -INT16_C( 18242), INT16_C( 27054), -INT16_C( 27109), -INT16_C( 6730), -INT16_C( 22322), -INT16_C( 19418), INT16_C( 25902), -INT16_C( 29085) }, { -INT32_C( 28805), -INT32_C( 31007), INT32_C( 47732), INT32_C( 1716) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int32x4_t r = simde_vaddl_high_s16(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; } static int test_simde_vaddl_high_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t b[4]; int64_t r[2]; } test_vec[] = { { { INT32_C( 1740465003), -INT32_C( 1432883366), -INT32_C( 1818865170), INT32_C( 652150671) }, { INT32_C( 199936655), -INT32_C( 1706197280), -INT32_C( 1657142648), -INT32_C( 304589439) }, { -INT64_C( 3476007818), INT64_C( 347561232) } }, { { INT32_C( 324310456), INT32_C( 1941826692), -INT32_C( 872000707), -INT32_C( 369957286) }, { -INT32_C( 1863000912), -INT32_C( 483770022), -INT32_C( 981441469), INT32_C( 1924291001) }, { -INT64_C( 1853442176), INT64_C( 1554333715) } }, { { INT32_C( 1938097902), INT32_C( 803619570), -INT32_C( 268702571), -INT32_C( 2099712302) }, { INT32_C( 621989066), INT32_C( 1342716941), INT32_C( 1511360673), -INT32_C( 791885855) }, { INT64_C( 1242658102), -INT64_C( 2891598157) } }, { { -INT32_C( 1069329970), INT32_C( 686827923), -INT32_C( 401085675), -INT32_C( 1536495655) }, { -INT32_C( 926319173), INT32_C( 1511641529), INT32_C( 1001729625), -INT32_C( 1005878794) }, { INT64_C( 600643950), -INT64_C( 2542374449) } }, { { INT32_C( 1719946963), -INT32_C( 1936755593), INT32_C( 963946080), INT32_C( 1373495190) }, { INT32_C( 354068060), -INT32_C( 781175945), INT32_C( 1460413793), INT32_C( 2031818662) }, { INT64_C( 2424359873), INT64_C( 3405313852) } }, { { -INT32_C( 589258651), INT32_C( 1953001236), -INT32_C( 1414603499), INT32_C( 419269564) }, { -INT32_C( 1456662734), -INT32_C( 1418027702), INT32_C( 1761838786), INT32_C( 48373405) }, { INT64_C( 347235287), INT64_C( 467642969) } }, { { -INT32_C( 740375874), INT32_C( 1195853617), -INT32_C( 520948444), -INT32_C( 1275531391) }, { INT32_C( 1348216070), -INT32_C( 2047027261), -INT32_C( 68223139), -INT32_C( 587345635) }, { -INT64_C( 589171583), -INT64_C( 1862877026) } }, { { -INT32_C( 978330477), INT32_C( 1192031779), INT32_C( 1831337708), -INT32_C( 199221266) }, { INT32_C( 121994308), -INT32_C( 1316142765), INT32_C( 1571584832), -INT32_C( 533092020) }, { INT64_C( 3402922540), -INT64_C( 732313286) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int64x2_t r = simde_vaddl_high_s32(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; } static int test_simde_vaddl_high_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint16_t r[8]; } test_vec[] = { { { UINT8_C( 93), UINT8_C( 0), UINT8_C( 39), UINT8_C( 27), UINT8_C(115), UINT8_C(216), UINT8_C( 74), UINT8_C(133), UINT8_C( 58), UINT8_C(177), UINT8_C( 0), UINT8_C( 47), UINT8_C(214), UINT8_C(147), UINT8_C(155), UINT8_C(151) }, { UINT8_C(136), UINT8_C(233), UINT8_C( 56), UINT8_C(198), UINT8_C(203), UINT8_C( 2), UINT8_C(239), UINT8_C( 15), UINT8_C(147), UINT8_C(119), UINT8_C(111), UINT8_C( 53), UINT8_C( 37), UINT8_C(230), UINT8_C( 19), UINT8_C(130) }, { UINT16_C( 205), UINT16_C( 296), UINT16_C( 111), UINT16_C( 100), UINT16_C( 251), UINT16_C( 377), UINT16_C( 174), UINT16_C( 281) } }, { { UINT8_C(231), UINT8_C( 58), UINT8_C(158), UINT8_C( 90), UINT8_C( 18), UINT8_C(232), UINT8_C(223), UINT8_C( 77), UINT8_C(153), UINT8_C(223), UINT8_C(124), UINT8_C(112), UINT8_C(114), UINT8_C( 23), UINT8_C( 7), UINT8_C(251) }, { UINT8_C( 0), UINT8_C( 64), UINT8_C(193), UINT8_C(204), UINT8_C( 66), UINT8_C(176), UINT8_C(219), UINT8_C(214), UINT8_C( 39), UINT8_C( 75), UINT8_C( 11), UINT8_C( 77), UINT8_C( 49), UINT8_C( 30), UINT8_C(207), UINT8_C( 24) }, { UINT16_C( 192), UINT16_C( 298), UINT16_C( 135), UINT16_C( 189), UINT16_C( 163), UINT16_C( 53), UINT16_C( 214), UINT16_C( 275) } }, { { UINT8_C( 89), UINT8_C(109), UINT8_C(114), UINT8_C(107), UINT8_C( 86), UINT8_C( 82), UINT8_C(184), UINT8_C(239), UINT8_C( 49), UINT8_C( 52), UINT8_C( 95), UINT8_C(164), UINT8_C( 76), UINT8_C(103), UINT8_C(159), UINT8_C( 76) }, { UINT8_C(167), UINT8_C( 96), UINT8_C( 24), UINT8_C(233), UINT8_C( 16), UINT8_C(244), UINT8_C(191), UINT8_C( 55), UINT8_C( 63), UINT8_C(203), UINT8_C(132), UINT8_C(112), UINT8_C(233), UINT8_C( 84), UINT8_C(137), UINT8_C( 66) }, { UINT16_C( 112), UINT16_C( 255), UINT16_C( 227), UINT16_C( 276), UINT16_C( 309), UINT16_C( 187), UINT16_C( 296), UINT16_C( 142) } }, { { UINT8_C(193), UINT8_C(251), UINT8_C(174), UINT8_C( 23), UINT8_C( 77), UINT8_C(102), UINT8_C( 7), UINT8_C(127), UINT8_C(155), UINT8_C(102), UINT8_C( 35), UINT8_C(231), UINT8_C(205), UINT8_C(194), UINT8_C( 51), UINT8_C(116) }, { UINT8_C( 34), UINT8_C( 76), UINT8_C( 94), UINT8_C( 50), UINT8_C( 64), UINT8_C( 29), UINT8_C(105), UINT8_C(127), UINT8_C(232), UINT8_C(238), UINT8_C(239), UINT8_C(210), UINT8_C( 66), UINT8_C(120), UINT8_C( 20), UINT8_C( 3) }, { UINT16_C( 387), UINT16_C( 340), UINT16_C( 274), UINT16_C( 441), UINT16_C( 271), UINT16_C( 314), UINT16_C( 71), UINT16_C( 119) } }, { { UINT8_C(116), UINT8_C(194), UINT8_C( 27), UINT8_C(193), UINT8_C( 41), UINT8_C( 34), UINT8_C( 64), UINT8_C(196), UINT8_C(136), UINT8_C( 99), UINT8_C(171), UINT8_C( 86), UINT8_C( 37), UINT8_C(222), UINT8_C(202), UINT8_C( 71) }, { UINT8_C( 42), UINT8_C( 40), UINT8_C(121), UINT8_C(106), UINT8_C( 70), UINT8_C(227), UINT8_C(233), UINT8_C( 46), UINT8_C(209), UINT8_C(217), UINT8_C( 0), UINT8_C( 19), UINT8_C( 81), UINT8_C( 21), UINT8_C( 22), UINT8_C(197) }, { UINT16_C( 345), UINT16_C( 316), UINT16_C( 171), UINT16_C( 105), UINT16_C( 118), UINT16_C( 243), UINT16_C( 224), UINT16_C( 268) } }, { { UINT8_C(215), UINT8_C( 49), UINT8_C(135), UINT8_C( 0), UINT8_C( 83), UINT8_C(199), UINT8_C(196), UINT8_C(220), UINT8_C( 43), UINT8_C(111), UINT8_C( 50), UINT8_C( 80), UINT8_C( 78), UINT8_C(252), UINT8_C(152), UINT8_C(120) }, { UINT8_C( 37), UINT8_C( 17), UINT8_C(227), UINT8_C(107), UINT8_C(244), UINT8_C(204), UINT8_C(153), UINT8_C(197), UINT8_C(165), UINT8_C(154), UINT8_C(216), UINT8_C(247), UINT8_C(175), UINT8_C(239), UINT8_C(188), UINT8_C(134) }, { UINT16_C( 208), UINT16_C( 265), UINT16_C( 266), UINT16_C( 327), UINT16_C( 253), UINT16_C( 491), UINT16_C( 340), UINT16_C( 254) } }, { { UINT8_C( 32), UINT8_C( 67), UINT8_C(135), UINT8_C(116), UINT8_C( 11), UINT8_C( 75), UINT8_C( 80), UINT8_C( 54), UINT8_C(187), UINT8_C(130), UINT8_C(134), UINT8_C( 9), UINT8_C(126), UINT8_C( 30), UINT8_C(129), UINT8_C(163) }, { UINT8_C( 48), UINT8_C(100), UINT8_C( 14), UINT8_C( 36), UINT8_C( 49), UINT8_C(168), UINT8_C(234), UINT8_C(214), UINT8_C( 66), UINT8_C(194), UINT8_C(205), UINT8_C(241), UINT8_C(177), UINT8_C(138), UINT8_C(119), UINT8_C(210) }, { UINT16_C( 253), UINT16_C( 324), UINT16_C( 339), UINT16_C( 250), UINT16_C( 303), UINT16_C( 168), UINT16_C( 248), UINT16_C( 373) } }, { { UINT8_C(205), UINT8_C(254), UINT8_C( 70), UINT8_C(216), UINT8_C( 74), UINT8_C(150), UINT8_C( 14), UINT8_C( 5), UINT8_C( 24), UINT8_C(149), UINT8_C( 14), UINT8_C(150), UINT8_C(179), UINT8_C(143), UINT8_C( 58), UINT8_C(227) }, { UINT8_C(244), UINT8_C( 72), UINT8_C( 8), UINT8_C( 37), UINT8_C(240), UINT8_C(242), UINT8_C(251), UINT8_C( 50), UINT8_C(180), UINT8_C(201), UINT8_C( 35), UINT8_C(102), UINT8_C( 83), UINT8_C(155), UINT8_C( 56), UINT8_C( 32) }, { UINT16_C( 204), UINT16_C( 350), UINT16_C( 49), UINT16_C( 252), UINT16_C( 262), UINT16_C( 298), UINT16_C( 114), UINT16_C( 259) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint16x8_t r = simde_vaddl_high_u8(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_vaddl_high_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint16_t b[8]; uint32_t r[4]; } test_vec[] = { { { UINT16_C(43232), UINT16_C(36530), UINT16_C(39899), UINT16_C(62799), UINT16_C(56662), UINT16_C(50000), UINT16_C(17537), UINT16_C(41309) }, { UINT16_C(27727), UINT16_C( 2079), UINT16_C(39125), UINT16_C(32488), UINT16_C( 2495), UINT16_C(36689), UINT16_C(27430), UINT16_C( 1731) }, { UINT32_C( 59157), UINT32_C( 86689), UINT32_C( 44967), UINT32_C( 43040) } }, { { UINT16_C(29971), UINT16_C(61077), UINT16_C(58384), UINT16_C(26595), UINT16_C(13505), UINT16_C(17194), UINT16_C(34680), UINT16_C(51172) }, { UINT16_C( 1011), UINT16_C(51407), UINT16_C(47003), UINT16_C(23111), UINT16_C(39104), UINT16_C(59370), UINT16_C(44291), UINT16_C( 5869) }, { UINT32_C( 52609), UINT32_C( 76564), UINT32_C( 78971), UINT32_C( 57041) } }, { { UINT16_C(33314), UINT16_C(12804), UINT16_C(59238), UINT16_C(10393), UINT16_C(49947), UINT16_C(37739), UINT16_C(20299), UINT16_C(15962) }, { UINT16_C(10578), UINT16_C(60679), UINT16_C(20193), UINT16_C(41287), UINT16_C(12774), UINT16_C(59784), UINT16_C(30430), UINT16_C( 255) }, { UINT32_C( 62721), UINT32_C( 97523), UINT32_C( 50729), UINT32_C( 16217) } }, { { UINT16_C( 1016), UINT16_C(24371), UINT16_C(52458), UINT16_C( 1671), UINT16_C(62096), UINT16_C(56217), UINT16_C(62529), UINT16_C(37657) }, { UINT16_C( 8221), UINT16_C(65152), UINT16_C(51054), UINT16_C(21664), UINT16_C(10489), UINT16_C(55101), UINT16_C(15518), UINT16_C(38872) }, { UINT32_C( 72585), UINT32_C( 111318), UINT32_C( 78047), UINT32_C( 76529) } }, { { UINT16_C( 2879), UINT16_C(10998), UINT16_C(32215), UINT16_C(26416), UINT16_C(51567), UINT16_C(45122), UINT16_C(23741), UINT16_C(56131) }, { UINT16_C(50044), UINT16_C(60377), UINT16_C(31114), UINT16_C(33599), UINT16_C(32162), UINT16_C(16475), UINT16_C(13241), UINT16_C(63959) }, { UINT32_C( 83729), UINT32_C( 61597), UINT32_C( 36982), UINT32_C( 120090) } }, { { UINT16_C(52542), UINT16_C( 5411), UINT16_C(21322), UINT16_C(47485), UINT16_C(48924), UINT16_C(55913), UINT16_C(44059), UINT16_C(39093) }, { UINT16_C(36463), UINT16_C(64131), UINT16_C(49672), UINT16_C(43645), UINT16_C(55359), UINT16_C(63978), UINT16_C(49675), UINT16_C(18930) }, { UINT32_C( 104283), UINT32_C( 119891), UINT32_C( 93734), UINT32_C( 58023) } }, { { UINT16_C( 5519), UINT16_C(55903), UINT16_C(56424), UINT16_C(33939), UINT16_C(64923), UINT16_C(46942), UINT16_C( 5033), UINT16_C( 6479) }, { UINT16_C(53922), UINT16_C(43539), UINT16_C(37012), UINT16_C(54356), UINT16_C(15977), UINT16_C(29901), UINT16_C(48896), UINT16_C(37054) }, { UINT32_C( 80900), UINT32_C( 76843), UINT32_C( 53929), UINT32_C( 43533) } }, { { UINT16_C( 7636), UINT16_C(15466), UINT16_C(65017), UINT16_C(38080), UINT16_C( 8186), UINT16_C(42059), UINT16_C(39474), UINT16_C(54461) }, { UINT16_C(53356), UINT16_C( 382), UINT16_C(53856), UINT16_C(51669), UINT16_C(41489), UINT16_C( 4414), UINT16_C(64609), UINT16_C(13729) }, { UINT32_C( 49675), UINT32_C( 46473), UINT32_C( 104083), UINT32_C( 68190) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint32x4_t r = simde_vaddl_high_u16(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_vaddl_high_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint32_t b[4]; uint64_t r[2]; } test_vec[] = { { { UINT32_C(3588152856), UINT32_C(3931789934), UINT32_C(3351288904), UINT32_C(2406829796) }, { UINT32_C(3539332684), UINT32_C(2548929043), UINT32_C(3124332951), UINT32_C(3615191487) }, { UINT64_C( 6475621855), UINT64_C( 6022021283) } }, { { UINT32_C(3198966351), UINT32_C(4255647412), UINT32_C(2261018786), UINT32_C( 51788471) }, { UINT32_C(1003817768), UINT32_C( 886227613), UINT32_C( 183438411), UINT32_C(3454102141) }, { UINT64_C( 2444457197), UINT64_C( 3505890612) } }, { { UINT32_C(2039188932), UINT32_C( 896938899), UINT32_C(1388067483), UINT32_C(2639647348) }, { UINT32_C(2060987101), UINT32_C( 951036908), UINT32_C( 876781239), UINT32_C(3439404040) }, { UINT64_C( 2264848722), UINT64_C( 6079051388) } }, { { UINT32_C(1162251441), UINT32_C(1534770367), UINT32_C(1806513910), UINT32_C(3842507528) }, { UINT32_C( 442556461), UINT32_C(1112674187), UINT32_C(3044512941), UINT32_C(1786935480) }, { UINT64_C( 4851026851), UINT64_C( 5629443008) } }, { { UINT32_C(3299854341), UINT32_C(2065639812), UINT32_C(1759956320), UINT32_C(4249808592) }, { UINT32_C(1511501518), UINT32_C(1788635581), UINT32_C(3055490046), UINT32_C(2434835084) }, { UINT64_C( 4815446366), UINT64_C( 6684643676) } }, { { UINT32_C(4015378282), UINT32_C(1500149241), UINT32_C( 314658882), UINT32_C( 202313534) }, { UINT32_C(2053515197), UINT32_C(2397307792), UINT32_C(2722432022), UINT32_C( 271803814) }, { UINT64_C( 3037090904), UINT64_C( 474117348) } }, { { UINT32_C( 788498741), UINT32_C(1082616318), UINT32_C(4149364921), UINT32_C( 352608600) }, { UINT32_C( 428894856), UINT32_C(2225566829), UINT32_C( 505867384), UINT32_C(2268027474) }, { UINT64_C( 4655232305), UINT64_C( 2620636074) } }, { { UINT32_C(3786747619), UINT32_C(1361132696), UINT32_C(3695801220), UINT32_C(1576160724) }, { UINT32_C( 628523703), UINT32_C(1873354230), UINT32_C(1552797450), UINT32_C( 216251433) }, { UINT64_C( 5248598670), UINT64_C( 1792412157) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint64x2_t r = simde_vaddl_high_u32(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vaddl_high_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vaddl_high_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vaddl_high_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vaddl_high_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vaddl_high_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vaddl_high_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/addlv.c000066400000000000000000000575121400333146700165670ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN addlv #include "test-neon.h" #include "../../../simde/arm/neon/addlv.h" static int test_simde_vaddlv_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int16_t r; } test_vec[] = { { { -INT8_C( 78), INT8_C( 109), INT8_C( 126), INT8_C( 107), -INT8_C( 63), -INT8_C( 93), -INT8_C( 101), INT8_C( 9) }, INT16_C( 16) }, { { INT8_C( 77), -INT8_C( 96), -INT8_C( 92), -INT8_C( 35), -INT8_C( 11), INT8_C( 95), INT8_C( 28), -INT8_C( 74) }, -INT16_C( 108) }, { { INT8_C( 20), -INT8_C( 83), INT8_C( 94), -INT8_C( 114), -INT8_C( 15), INT8_C( 30), INT8_C( 119), -INT8_C( 22) }, INT16_C( 29) }, { { -INT8_C( 61), INT8_C( 72), INT8_C( 20), INT8_C( 67), INT8_C( 26), -INT8_C( 4), -INT8_C( 83), -INT8_C( 52) }, -INT16_C( 15) }, { { INT8_C( 106), INT8_C( 43), INT8_C( 55), INT8_C( 43), -INT8_C( 50), -INT8_C( 46), INT8_C( 52), INT8_C( 27) }, INT16_C( 230) }, { { INT8_C( 115), -INT8_C( 40), -INT8_C( 8), INT8_C( 104), INT8_C( 56), INT8_C( 20), INT8_C( 30), INT8_C( 76) }, INT16_C( 353) }, { { -INT8_C( 63), INT8_C( 125), -INT8_C( 38), -INT8_C( 77), -INT8_C( 101), INT8_C( 82), -INT8_C( 99), INT8_C( 94) }, -INT16_C( 77) }, { { -INT8_C( 102), -INT8_C( 78), -INT8_C( 94), -INT8_C( 76), -INT8_C( 82), INT8_C( 79), INT8_MIN, INT8_C( 24) }, -INT16_C( 457) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); int16_t r = simde_vaddlv_s8(a); simde_assert_equal_i16(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); int16_t r = simde_vaddlv_s8(a); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddlv_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int32_t r; } test_vec[] = { { { -INT16_C( 19550), INT16_C( 29884), -INT16_C( 7291), -INT16_C( 2131) }, INT32_C( 912) }, { { INT16_C( 13054), -INT16_C( 2007), -INT16_C( 13158), INT16_C( 16493) }, INT32_C( 14382) }, { { -INT16_C( 7107), INT16_C( 30925), INT16_C( 14119), -INT16_C( 31939) }, INT32_C( 5998) }, { { INT16_C( 24111), INT16_C( 13090), INT16_C( 16713), -INT16_C( 4995) }, INT32_C( 48919) }, { { INT16_C( 14836), INT16_C( 31072), INT16_C( 3612), INT16_C( 7024) }, INT32_C( 56544) }, { { -INT16_C( 26048), -INT16_C( 9453), -INT16_C( 32410), -INT16_C( 23525) }, -INT32_C( 91436) }, { { -INT16_C( 6043), -INT16_C( 29668), INT16_C( 22815), INT16_C( 20239) }, INT32_C( 7343) }, { { INT16_C( 12728), INT16_C( 386), INT16_C( 114), INT16_C( 26605) }, INT32_C( 39833) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); int32_t r = simde_vaddlv_s16(a); simde_assert_equal_i32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); int32_t r = simde_vaddlv_s16(a); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddlv_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int64_t r; } test_vec[] = { { { INT32_C( 1457540665), -INT32_C( 1670295204) }, -INT64_C( 212754539) }, { { INT32_C( 1366787307), INT32_C( 1794478853) }, INT64_C( 3161266160) }, { { -INT32_C( 1678306693), INT32_C( 602539627) }, -INT64_C( 1075767066) }, { { -INT32_C( 1440388040), -INT32_C( 1508830612) }, -INT64_C( 2949218652) }, { { -INT32_C( 1124273568), INT32_C( 777612611) }, -INT64_C( 346660957) }, { { -INT32_C( 142618383), -INT32_C( 547261085) }, -INT64_C( 689879468) }, { { -INT32_C( 226862969), -INT32_C( 1760140193) }, -INT64_C( 1987003162) }, { { INT32_C( 1027685328), -INT32_C( 1360833715) }, -INT64_C( 333148387) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); int64_t r = simde_vaddlv_s32(a); simde_assert_equal_i64(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); int64_t r = simde_vaddlv_s32(a); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddlv_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint16_t r; } test_vec[] = { { { UINT8_C(107), UINT8_C( 47), UINT8_C(217), UINT8_C(215), UINT8_C(241), UINT8_C( 96), UINT8_C( 52), UINT8_C(181) }, UINT16_C( 1156) }, { { UINT8_C( 98), UINT8_C( 32), UINT8_C(246), UINT8_C( 32), UINT8_C(105), UINT8_C(238), UINT8_C(215), UINT8_C( 33) }, UINT16_C( 999) }, { { UINT8_C(133), UINT8_C( 3), UINT8_C(237), UINT8_C(178), UINT8_C( 19), UINT8_C(160), UINT8_C(131), UINT8_C( 32) }, UINT16_C( 893) }, { { UINT8_C( 15), UINT8_C(239), UINT8_C( 87), UINT8_C( 27), UINT8_C( 41), UINT8_C(191), UINT8_C( 83), UINT8_C(148) }, UINT16_C( 831) }, { { UINT8_C(238), UINT8_C( 44), UINT8_C(108), UINT8_C(224), UINT8_C(141), UINT8_C(160), UINT8_C(149), UINT8_C(239) }, UINT16_C( 1303) }, { { UINT8_C(193), UINT8_C(139), UINT8_C( 15), UINT8_C( 42), UINT8_C(122), UINT8_C(231), UINT8_C( 75), UINT8_MAX }, UINT16_C( 1072) }, { { UINT8_C(234), UINT8_C( 56), UINT8_C(178), UINT8_C(253), UINT8_C(216), UINT8_C( 53), UINT8_C( 29), UINT8_C(231) }, UINT16_C( 1250) }, { { UINT8_C( 37), UINT8_C(116), UINT8_C( 3), UINT8_C( 78), UINT8_C( 52), UINT8_C( 86), UINT8_C(226), UINT8_C( 34) }, UINT16_C( 632) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); uint16_t r = simde_vaddlv_u8(a); simde_assert_equal_u16(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); uint16_t r = simde_vaddlv_u8(a); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddlv_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint32_t r; } test_vec[] = { { { UINT16_C(19874), UINT16_C(36903), UINT16_C(35701), UINT16_C(56826) }, UINT32_C( 149304) }, { { UINT16_C(45372), UINT16_C(61062), UINT16_C(47626), UINT16_C(36771) }, UINT32_C( 190831) }, { { UINT16_C( 3731), UINT16_C(47398), UINT16_C(48710), UINT16_C(58267) }, UINT32_C( 158106) }, { { UINT16_C(53737), UINT16_C( 6609), UINT16_C(64523), UINT16_C(44340) }, UINT32_C( 169209) }, { { UINT16_C(23369), UINT16_C(48957), UINT16_C(14566), UINT16_C( 9116) }, UINT32_C( 96008) }, { { UINT16_C( 8937), UINT16_C(62225), UINT16_C(46300), UINT16_C(28802) }, UINT32_C( 146264) }, { { UINT16_C(43202), UINT16_C( 2345), UINT16_C(50534), UINT16_C(20460) }, UINT32_C( 116541) }, { { UINT16_C(48534), UINT16_C(41320), UINT16_C(40121), UINT16_C( 846) }, UINT32_C( 130821) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); uint32_t r = simde_vaddlv_u16(a); simde_assert_equal_u32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); uint32_t r = simde_vaddlv_u16(a); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddlv_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint64_t r; } test_vec[] = { { { UINT32_C(3737292024), UINT32_C(2902548164) }, UINT64_C( 6639840188) }, { { UINT32_C(1570771584), UINT32_C(2311922375) }, UINT64_C( 3882693959) }, { { UINT32_C( 814937802), UINT32_C(1384087227) }, UINT64_C( 2199025029) }, { { UINT32_C(4126402364), UINT32_C(2096644740) }, UINT64_C( 6223047104) }, { { UINT32_C(2455419598), UINT32_C(2571066392) }, UINT64_C( 5026485990) }, { { UINT32_C( 905371502), UINT32_C(3418342145) }, UINT64_C( 4323713647) }, { { UINT32_C(1979404729), UINT32_C( 214399696) }, UINT64_C( 2193804425) }, { { UINT32_C(3842095713), UINT32_C(3395418876) }, UINT64_C( 7237514589) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); uint64_t r = simde_vaddlv_u32(a); simde_assert_equal_u64(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); uint64_t r = simde_vaddlv_u32(a); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddlvq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int16_t r; } test_vec[] = { { { INT8_C( 26), INT8_C( 76), INT8_C( 48), -INT8_C( 60), INT8_C( 4), INT8_C( 76), INT8_C( 1), INT8_C( 45), INT8_C( 109), -INT8_C( 21), -INT8_C( 125), INT8_C( 90), INT8_MAX, INT8_C( 31), -INT8_C( 75), INT8_C( 124) }, INT16_C( 476) }, { { -INT8_C( 32), -INT8_C( 51), INT8_C( 109), INT8_C( 25), -INT8_C( 97), INT8_C( 120), INT8_C( 95), INT8_C( 30), -INT8_C( 66), INT8_C( 10), -INT8_C( 97), INT8_C( 92), INT8_C( 119), INT8_C( 111), -INT8_C( 40), -INT8_C( 111) }, INT16_C( 217) }, { { -INT8_C( 69), INT8_C( 8), INT8_C( 86), -INT8_C( 65), INT8_C( 85), INT8_C( 87), -INT8_C( 19), -INT8_C( 62), INT8_C( 66), INT8_C( 112), INT8_C( 29), -INT8_C( 63), -INT8_C( 113), -INT8_C( 46), INT8_C( 62), INT8_C( 112) }, INT16_C( 210) }, { { -INT8_C( 97), -INT8_C( 85), -INT8_C( 119), INT8_C( 63), INT8_C( 35), -INT8_C( 23), INT8_C( 93), -INT8_C( 30), -INT8_C( 13), -INT8_C( 4), INT8_C( 62), INT8_C( 106), INT8_C( 107), INT8_C( 22), -INT8_C( 4), INT8_C( 38) }, INT16_C( 151) }, { { INT8_C( 30), INT8_C( 82), -INT8_C( 26), INT8_C( 115), -INT8_C( 87), -INT8_C( 45), INT8_C( 54), -INT8_C( 21), INT8_C( 67), INT8_C( 83), -INT8_C( 84), -INT8_C( 46), INT8_C( 37), -INT8_C( 22), INT8_C( 66), -INT8_C( 60) }, INT16_C( 143) }, { { -INT8_C( 107), -INT8_C( 52), INT8_C( 3), -INT8_C( 71), -INT8_C( 75), INT8_C( 96), -INT8_C( 101), -INT8_C( 88), INT8_C( 93), -INT8_C( 39), INT8_C( 19), -INT8_C( 56), -INT8_C( 17), INT8_C( 15), -INT8_C( 17), INT8_C( 13) }, -INT16_C( 384) }, { { INT8_C( 97), -INT8_C( 43), -INT8_C( 127), INT8_C( 10), -INT8_C( 88), -INT8_C( 73), -INT8_C( 11), -INT8_C( 21), INT8_C( 10), -INT8_C( 95), -INT8_C( 67), INT8_C( 47), -INT8_C( 116), INT8_C( 0), -INT8_C( 13), INT8_C( 33) }, -INT16_C( 457) }, { { -INT8_C( 52), -INT8_C( 9), -INT8_C( 38), -INT8_C( 127), INT8_C( 87), INT8_C( 117), INT8_C( 41), -INT8_C( 76), INT8_C( 78), INT8_C( 60), INT8_C( 125), INT8_C( 61), INT8_C( 75), INT8_C( 108), INT8_C( 75), -INT8_C( 84) }, INT16_C( 441) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); int16_t r = simde_vaddlvq_s8(a); simde_assert_equal_i16(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); int16_t r = simde_vaddlvq_s8(a); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddlvq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int32_t r; } test_vec[] = { { { -INT16_C( 13247), -INT16_C( 5706), -INT16_C( 21629), -INT16_C( 29228), -INT16_C( 28339), -INT16_C( 9796), -INT16_C( 20591), INT16_C( 24058) }, -INT32_C( 104478) }, { { -INT16_C( 10842), -INT16_C( 290), INT16_C( 2122), -INT16_C( 26190), INT16_C( 12100), -INT16_C( 28458), INT16_C( 8603), -INT16_C( 9156) }, -INT32_C( 52111) }, { { -INT16_C( 3091), INT16_C( 28869), -INT16_C( 26210), -INT16_C( 5123), -INT16_C( 18133), -INT16_C( 17212), -INT16_C( 16535), INT16_C( 3866) }, -INT32_C( 53569) }, { { -INT16_C( 1900), -INT16_C( 8691), -INT16_C( 16384), INT16_C( 17783), INT16_C( 20207), -INT16_C( 29739), INT16_C( 4463), INT16_C( 23911) }, INT32_C( 9650) }, { { INT16_C( 11524), -INT16_C( 23603), -INT16_C( 13370), -INT16_C( 3698), INT16_C( 21380), -INT16_C( 4690), -INT16_C( 14318), -INT16_C( 22787) }, -INT32_C( 49562) }, { { INT16_C( 2752), -INT16_C( 15996), -INT16_C( 822), -INT16_C( 17914), -INT16_C( 9398), -INT16_C( 18107), -INT16_C( 21268), -INT16_C( 3818) }, -INT32_C( 84571) }, { { -INT16_C( 6951), -INT16_C( 24428), INT16_C( 8879), INT16_C( 13201), INT16_C( 16245), -INT16_C( 30943), INT16_C( 7687), -INT16_C( 14291) }, -INT32_C( 30601) }, { { -INT16_C( 19928), -INT16_C( 3191), -INT16_C( 28754), -INT16_C( 1875), -INT16_C( 3478), INT16_C( 22193), -INT16_C( 14178), INT16_C( 30791) }, -INT32_C( 18420) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); int32_t r = simde_vaddlvq_s16(a); simde_assert_equal_i32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); int32_t r = simde_vaddlvq_s16(a); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddlvq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int64_t r; } test_vec[] = { { { INT32_C( 1528355756), INT32_C( 1938729470), -INT32_C( 251940887), -INT32_C( 155703091) }, INT64_C( 3059441248) }, { { -INT32_C( 1997979174), INT32_C( 981505744), INT32_C( 647049864), -INT32_C( 1499539206) }, -INT64_C( 1868962772) }, { { -INT32_C( 1308510540), INT32_C( 1227198304), INT32_C( 205070399), INT32_C( 587395657) }, INT64_C( 711153820) }, { { INT32_C( 78441267), INT32_C( 155069569), INT32_C( 1479593822), INT32_C( 1560202920) }, INT64_C( 3273307578) }, { { -INT32_C( 452001915), -INT32_C( 835832945), -INT32_C( 1646631084), -INT32_C( 1916740263) }, -INT64_C( 4851206207) }, { { INT32_C( 1251044552), -INT32_C( 145502311), INT32_C( 1196458911), -INT32_C( 677163438) }, INT64_C( 1624837714) }, { { -INT32_C( 574836402), INT32_C( 950790884), -INT32_C( 1412070063), INT32_C( 725128802) }, -INT64_C( 310986779) }, { { -INT32_C( 1686779646), INT32_C( 932432024), -INT32_C( 1635851444), INT32_C( 2138382641) }, -INT64_C( 251816425) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); int64_t r = simde_vaddlvq_s32(a); simde_assert_equal_i64(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); int64_t r = simde_vaddlvq_s32(a); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddlvq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; uint16_t r; } test_vec[] = { { { UINT8_C(210), UINT8_C( 49), UINT8_C( 92), UINT8_C(183), UINT8_C( 27), UINT8_C( 7), UINT8_C(239), UINT8_C(108), UINT8_C(141), UINT8_C(197), UINT8_C( 23), UINT8_C(239), UINT8_C( 91), UINT8_C( 79), UINT8_C( 26), UINT8_C( 93) }, UINT16_C( 1804) }, { { UINT8_C( 24), UINT8_C(143), UINT8_C(249), UINT8_C(177), UINT8_C( 88), UINT8_C(140), UINT8_C(232), UINT8_C(164), UINT8_C(111), UINT8_C(103), UINT8_C( 66), UINT8_C(160), UINT8_C(136), UINT8_C(183), UINT8_C( 32), UINT8_C( 91) }, UINT16_C( 2099) }, { { UINT8_C(232), UINT8_C(124), UINT8_C( 18), UINT8_C( 3), UINT8_C(132), UINT8_C( 1), UINT8_C(111), UINT8_C( 17), UINT8_C(198), UINT8_C(135), UINT8_C( 0), UINT8_C( 33), UINT8_C(214), UINT8_C( 27), UINT8_C(127), UINT8_C(239) }, UINT16_C( 1611) }, { { UINT8_C(170), UINT8_C(120), UINT8_C(160), UINT8_C( 2), UINT8_C( 4), UINT8_C(136), UINT8_C(166), UINT8_C(115), UINT8_C(239), UINT8_C(232), UINT8_C( 19), UINT8_C(120), UINT8_C(159), UINT8_C( 51), UINT8_C(211), UINT8_C(135) }, UINT16_C( 2039) }, { { UINT8_C(176), UINT8_C(229), UINT8_C(138), UINT8_C( 52), UINT8_C(230), UINT8_C(250), UINT8_C( 69), UINT8_C(173), UINT8_C(129), UINT8_C( 69), UINT8_C(206), UINT8_C( 87), UINT8_C( 96), UINT8_C( 77), UINT8_C( 70), UINT8_C( 11) }, UINT16_C( 2062) }, { { UINT8_C(197), UINT8_C(230), UINT8_C( 13), UINT8_C(201), UINT8_C(111), UINT8_C(180), UINT8_C( 60), UINT8_C( 94), UINT8_C(156), UINT8_C( 80), UINT8_C(214), UINT8_C( 60), UINT8_C(131), UINT8_C(169), UINT8_C(195), UINT8_C( 51) }, UINT16_C( 2142) }, { { UINT8_C(142), UINT8_C( 78), UINT8_C(103), UINT8_C(117), UINT8_C( 72), UINT8_C(172), UINT8_C( 34), UINT8_C(201), UINT8_C(242), UINT8_C(240), UINT8_C( 32), UINT8_C( 82), UINT8_C( 62), UINT8_C(103), UINT8_C( 93), UINT8_C( 3) }, UINT16_C( 1776) }, { { UINT8_C( 77), UINT8_C(107), UINT8_C(205), UINT8_C(188), UINT8_C( 31), UINT8_C( 9), UINT8_C( 27), UINT8_C(187), UINT8_C( 89), UINT8_C(241), UINT8_C(247), UINT8_C(221), UINT8_C(155), UINT8_C(187), UINT8_C( 16), UINT8_C( 41) }, UINT16_C( 2028) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); uint16_t r = simde_vaddlvq_u8(a); simde_assert_equal_u16(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); uint16_t r = simde_vaddlvq_u8(a); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddlvq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint32_t r; } test_vec[] = { { { UINT16_C(30729), UINT16_C(20894), UINT16_C(49188), UINT16_C( 5658), UINT16_C(15025), UINT16_C(61289), UINT16_C(50849), UINT16_C(61426) }, UINT32_C( 295058) }, { { UINT16_C(48945), UINT16_C(20651), UINT16_C(50889), UINT16_C( 8716), UINT16_C( 952), UINT16_C(21503), UINT16_C( 4286), UINT16_C(51068) }, UINT32_C( 207010) }, { { UINT16_C( 7048), UINT16_C(44056), UINT16_C(13019), UINT16_C(36035), UINT16_C(11373), UINT16_C( 3707), UINT16_C(28402), UINT16_C( 9469) }, UINT32_C( 153109) }, { { UINT16_C(43309), UINT16_C(63092), UINT16_C(32879), UINT16_C(10009), UINT16_C( 6276), UINT16_C(17018), UINT16_C(63272), UINT16_C(45066) }, UINT32_C( 280921) }, { { UINT16_C( 8722), UINT16_C(60765), UINT16_C( 8277), UINT16_C(49786), UINT16_C(62796), UINT16_C(16080), UINT16_C(52835), UINT16_C(37218) }, UINT32_C( 296479) }, { { UINT16_C(55159), UINT16_C(59015), UINT16_C(41047), UINT16_C(56078), UINT16_C(35001), UINT16_C(57630), UINT16_C(10367), UINT16_C(37266) }, UINT32_C( 351563) }, { { UINT16_C(61258), UINT16_C(40831), UINT16_C(63759), UINT16_C(23393), UINT16_C(13038), UINT16_C(21145), UINT16_C(64512), UINT16_C(30691) }, UINT32_C( 318627) }, { { UINT16_C(27347), UINT16_C(10845), UINT16_C(27403), UINT16_C(50182), UINT16_C( 9460), UINT16_C(29605), UINT16_C(14156), UINT16_C(38405) }, UINT32_C( 207403) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); uint32_t r = simde_vaddlvq_u16(a); simde_assert_equal_u32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); uint32_t r = simde_vaddlvq_u16(a); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddlvq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint64_t r; } test_vec[] = { { { UINT32_C( 892765222), UINT32_C(1804638077), UINT32_C(3384617673), UINT32_C(4181762086) }, UINT64_C( 10263783058) }, { { UINT32_C( 371432971), UINT32_C(4258933001), UINT32_C(2574352205), UINT32_C(3710940855) }, UINT64_C( 10915659032) }, { { UINT32_C(1997760250), UINT32_C(3353519101), UINT32_C(4086341837), UINT32_C(1273811264) }, UINT64_C( 10711432452) }, { { UINT32_C(2019627119), UINT32_C(2272672569), UINT32_C(1914759099), UINT32_C(1464815709) }, UINT64_C( 7671874496) }, { { UINT32_C(3033424566), UINT32_C(3548098566), UINT32_C(2445740880), UINT32_C(1272755164) }, UINT64_C( 10300019176) }, { { UINT32_C(4257496771), UINT32_C( 881080953), UINT32_C(2124850209), UINT32_C(2882926325) }, UINT64_C( 10146354258) }, { { UINT32_C(1583326040), UINT32_C(2754796115), UINT32_C(3258317286), UINT32_C(1879970220) }, UINT64_C( 9476409661) }, { { UINT32_C(3379417679), UINT32_C( 771617036), UINT32_C(2326504597), UINT32_C(4063658138) }, UINT64_C( 10541197450) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); uint64_t r = simde_vaddlvq_u32(a); simde_assert_equal_u64(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); uint64_t r = simde_vaddlvq_u32(a); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vaddlv_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vaddlv_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vaddlv_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vaddlv_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vaddlv_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vaddlv_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vaddlvq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vaddlvq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vaddlvq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vaddlvq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vaddlvq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vaddlvq_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/addv.c000066400000000000000000000771331400333146700164140ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN addv #include "test-neon.h" #include "../../../simde/arm/neon/addv.h" static int test_simde_vaddv_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t a[2]; simde_float32_t r; } test_vec[] = { { { SIMDE_FLOAT32_C( -235.05), SIMDE_FLOAT32_C( -904.04) }, SIMDE_FLOAT32_C( -1139.09) }, { { SIMDE_FLOAT32_C( 272.68), SIMDE_FLOAT32_C( 622.28) }, SIMDE_FLOAT32_C( 894.96) }, { { SIMDE_FLOAT32_C( -525.30), SIMDE_FLOAT32_C( -528.82) }, SIMDE_FLOAT32_C( -1054.12) }, { { SIMDE_FLOAT32_C( 29.65), SIMDE_FLOAT32_C( -351.94) }, SIMDE_FLOAT32_C( -322.29) }, { { SIMDE_FLOAT32_C( 961.57), SIMDE_FLOAT32_C( 223.98) }, SIMDE_FLOAT32_C( 1185.55) }, { { SIMDE_FLOAT32_C( -664.36), SIMDE_FLOAT32_C( 112.82) }, SIMDE_FLOAT32_C( -551.54) }, { { SIMDE_FLOAT32_C( -605.37), SIMDE_FLOAT32_C( -434.90) }, SIMDE_FLOAT32_C( -1040.27) }, { { SIMDE_FLOAT32_C( -227.61), SIMDE_FLOAT32_C( -769.47) }, SIMDE_FLOAT32_C( -997.08) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32_t r = simde_vaddv_f32(a); simde_assert_equal_f32(r, test_vec[i].r, 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32_t r = simde_vaddv_f32(a); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_f32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddv_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t r; } test_vec[] = { { { -INT8_C( 38), -INT8_C( 113), -INT8_C( 89), INT8_C( 100), INT8_C( 121), INT8_C( 62), INT8_C( 96), INT8_C( 114) }, -INT8_C( 3) }, { { INT8_C( 54), INT8_C( 22), INT8_C( 118), INT8_C( 46), INT8_C( 72), INT8_C( 40), INT8_C( 123), -INT8_C( 76) }, -INT8_C( 113) }, { { -INT8_C( 45), INT8_C( 36), INT8_C( 32), -INT8_C( 99), INT8_C( 35), INT8_C( 22), -INT8_C( 20), INT8_C( 18) }, -INT8_C( 21) }, { { INT8_C( 24), INT8_C( 119), INT8_C( 80), -INT8_C( 27), INT8_C( 57), INT8_C( 66), INT8_C( 123), INT8_C( 20) }, -INT8_C( 50) }, { { -INT8_C( 46), INT8_C( 35), INT8_C( 120), INT8_C( 75), INT8_C( 97), -INT8_C( 40), -INT8_C( 67), -INT8_C( 104) }, INT8_C( 70) }, { { -INT8_C( 17), INT8_C( 51), -INT8_C( 58), INT8_C( 55), INT8_C( 91), INT8_C( 65), -INT8_C( 20), INT8_C( 46) }, -INT8_C( 43) }, { { INT8_C( 102), INT8_C( 12), -INT8_C( 52), -INT8_C( 119), INT8_C( 34), -INT8_C( 72), -INT8_C( 100), INT8_C( 58) }, INT8_C( 119) }, { { INT8_C( 48), -INT8_C( 20), INT8_C( 31), INT8_C( 105), INT8_C( 47), -INT8_C( 101), INT8_C( 125), INT8_C( 1) }, -INT8_C( 20) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); int8_t r = simde_vaddv_s8(a); simde_assert_equal_i8(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); int8_t r = simde_vaddv_s8(a); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddv_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t r; } test_vec[] = { { { -INT16_C( 2626), INT16_C( 8012), INT16_C( 2510), -INT16_C( 16969) }, -INT16_C( 9073) }, { { INT16_C( 32060), -INT16_C( 26380), -INT16_C( 8001), INT16_C( 9670) }, INT16_C( 7349) }, { { -INT16_C( 27923), INT16_C( 4014), INT16_C( 19019), INT16_C( 31562) }, INT16_C( 26672) }, { { INT16_C( 26935), INT16_C( 26340), INT16_C( 25092), -INT16_C( 15769) }, -INT16_C( 2938) }, { { -INT16_C( 19625), INT16_C( 9698), -INT16_C( 26180), -INT16_C( 1822) }, INT16_C( 27607) }, { { -INT16_C( 10473), -INT16_C( 10608), INT16_C( 22455), -INT16_C( 23301) }, -INT16_C( 21927) }, { { -INT16_C( 22039), INT16_C( 13492), -INT16_C( 268), INT16_C( 11183) }, INT16_C( 2368) }, { { -INT16_C( 27545), INT16_C( 27793), -INT16_C( 1802), INT16_C( 19758) }, INT16_C( 18204) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); int16_t r = simde_vaddv_s16(a); simde_assert_equal_i16(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); int16_t r = simde_vaddv_s16(a); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddv_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t r; } test_vec[] = { { { INT32_C( 1735594155), -INT32_C( 1050716758) }, INT32_C( 684877397) }, { { -INT32_C( 459804628), INT32_C( 814256711) }, INT32_C( 354452083) }, { { INT32_C( 795163707), -INT32_C( 1571154886) }, -INT32_C( 775991179) }, { { -INT32_C( 1643189336), -INT32_C( 1897120541) }, INT32_C( 754657419) }, { { -INT32_C( 134914227), -INT32_C( 508013132) }, -INT32_C( 642927359) }, { { -INT32_C( 1933226171), INT32_C( 482102753) }, -INT32_C( 1451123418) }, { { -INT32_C( 1001643638), -INT32_C( 563698122) }, -INT32_C( 1565341760) }, { { INT32_C( 1971156114), -INT32_C( 33265231) }, INT32_C( 1937890883) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); int32_t r = simde_vaddv_s32(a); simde_assert_equal_i32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); int32_t r = simde_vaddv_s32(a); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddv_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint8_t r; } test_vec[] = { { { UINT8_C(200), UINT8_C(249), UINT8_C(245), UINT8_C(124), UINT8_C( 78), UINT8_C(173), UINT8_C( 93), UINT8_C(147) }, UINT8_C( 29) }, { { UINT8_C(252), UINT8_C( 34), UINT8_C( 31), UINT8_C(221), UINT8_C(112), UINT8_C(220), UINT8_C(249), UINT8_C(250) }, UINT8_C( 89) }, { { UINT8_C(253), UINT8_C( 69), UINT8_C(190), UINT8_C( 51), UINT8_C(236), UINT8_C( 37), UINT8_C( 18), UINT8_C(126) }, UINT8_C(212) }, { { UINT8_C(153), UINT8_C(143), UINT8_C(243), UINT8_C( 74), UINT8_C(248), UINT8_C(247), UINT8_C( 72), UINT8_C(192) }, UINT8_C( 92) }, { { UINT8_C(241), UINT8_C( 61), UINT8_C( 60), UINT8_C( 63), UINT8_C(234), UINT8_C(154), UINT8_C(211), UINT8_C(230) }, UINT8_C(230) }, { { UINT8_C(188), UINT8_C(242), UINT8_C(195), UINT8_C( 44), UINT8_C(206), UINT8_C(189), UINT8_C( 38), UINT8_C(204) }, UINT8_C( 26) }, { { UINT8_C( 2), UINT8_C(229), UINT8_MAX, UINT8_C(238), UINT8_C( 10), UINT8_C( 17), UINT8_C(108), UINT8_C(163) }, UINT8_C(254) }, { { UINT8_C(160), UINT8_C( 96), UINT8_C(238), UINT8_C(152), UINT8_C( 87), UINT8_C( 54), UINT8_C( 88), UINT8_C( 72) }, UINT8_C(179) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); uint8_t r = simde_vaddv_u8(a); simde_assert_equal_u8(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); uint8_t r = simde_vaddv_u8(a); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddv_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t r; } test_vec[] = { { { UINT16_C(38260), UINT16_C(24200), UINT16_C(23343), UINT16_C(60229) }, UINT16_C(14960) }, { { UINT16_C( 2125), UINT16_C( 7192), UINT16_C(16069), UINT16_C(51432) }, UINT16_C(11282) }, { { UINT16_C(59171), UINT16_C(11702), UINT16_C( 9209), UINT16_C(39377) }, UINT16_C(53923) }, { { UINT16_C(49027), UINT16_C(55858), UINT16_C(35573), UINT16_C(26915) }, UINT16_C(36301) }, { { UINT16_C(43807), UINT16_C(20168), UINT16_C( 3334), UINT16_C(21306) }, UINT16_C(23079) }, { { UINT16_C(21013), UINT16_C(56175), UINT16_C(22416), UINT16_C(46243) }, UINT16_C(14775) }, { { UINT16_C(22847), UINT16_C(14561), UINT16_C(45692), UINT16_C(65489) }, UINT16_C(17517) }, { { UINT16_C( 881), UINT16_C(26586), UINT16_C(64910), UINT16_C(44496) }, UINT16_C( 5801) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); uint16_t r = simde_vaddv_u16(a); simde_assert_equal_u16(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); uint16_t r = simde_vaddv_u16(a); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddv_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint32_t r; } test_vec[] = { { { UINT32_C(2935789736), UINT32_C(3137418917) }, UINT32_C(1778241357) }, { { UINT32_C( 412512648), UINT32_C( 130824648) }, UINT32_C( 543337296) }, { { UINT32_C( 255831698), UINT32_C(3524137312) }, UINT32_C(3779969010) }, { { UINT32_C(2721703956), UINT32_C(2370832869) }, UINT32_C( 797569529) }, { { UINT32_C(1195068578), UINT32_C( 167918978) }, UINT32_C(1362987556) }, { { UINT32_C(1981978798), UINT32_C(1686040529) }, UINT32_C(3668019327) }, { { UINT32_C(4252220829), UINT32_C(3822027214) }, UINT32_C(3779280747) }, { { UINT32_C(1334118506), UINT32_C(3034436882) }, UINT32_C( 73588092) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); uint32_t r = simde_vaddv_u32(a); simde_assert_equal_u32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); uint32_t r = simde_vaddv_u32(a); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddvq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t a[4]; simde_float32_t r; } test_vec[] = { { { SIMDE_FLOAT32_C( -654.47), SIMDE_FLOAT32_C( -946.98), SIMDE_FLOAT32_C( -606.16), SIMDE_FLOAT32_C( 426.64) }, SIMDE_FLOAT32_C( -1780.97) }, { { SIMDE_FLOAT32_C( 868.73), SIMDE_FLOAT32_C( -655.12), SIMDE_FLOAT32_C( -15.45), SIMDE_FLOAT32_C( 495.29) }, SIMDE_FLOAT32_C( 693.45) }, { { SIMDE_FLOAT32_C( -337.86), SIMDE_FLOAT32_C( -244.89), SIMDE_FLOAT32_C( 98.86), SIMDE_FLOAT32_C( 743.11) }, SIMDE_FLOAT32_C( 259.22) }, { { SIMDE_FLOAT32_C( -546.07), SIMDE_FLOAT32_C( 748.90), SIMDE_FLOAT32_C( -734.79), SIMDE_FLOAT32_C( 726.97) }, SIMDE_FLOAT32_C( 195.01) }, { { SIMDE_FLOAT32_C( -248.73), SIMDE_FLOAT32_C( 463.88), SIMDE_FLOAT32_C( 495.61), SIMDE_FLOAT32_C( -608.95) }, SIMDE_FLOAT32_C( 101.81) }, { { SIMDE_FLOAT32_C( 490.96), SIMDE_FLOAT32_C( 76.01), SIMDE_FLOAT32_C( -900.59), SIMDE_FLOAT32_C( 34.68) }, SIMDE_FLOAT32_C( -298.94) }, { { SIMDE_FLOAT32_C( 663.94), SIMDE_FLOAT32_C( 660.23), SIMDE_FLOAT32_C( 682.64), SIMDE_FLOAT32_C( -644.47) }, SIMDE_FLOAT32_C( 1362.34) }, { { SIMDE_FLOAT32_C( -909.10), SIMDE_FLOAT32_C( 23.60), SIMDE_FLOAT32_C( -382.13), SIMDE_FLOAT32_C( -563.57) }, SIMDE_FLOAT32_C( -1831.20) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32_t r = simde_vaddvq_f32(a); simde_assert_equal_f32(r, test_vec[i].r, 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32_t r = simde_vaddvq_f32(a); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_f32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddvq_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64_t a[4]; simde_float64_t r; } test_vec[] = { { { SIMDE_FLOAT64_C( -439.88), SIMDE_FLOAT64_C( 593.77) }, SIMDE_FLOAT64_C( 153.89) }, { { SIMDE_FLOAT64_C( 555.36), SIMDE_FLOAT64_C( -41.81) }, SIMDE_FLOAT64_C( 513.55) }, { { SIMDE_FLOAT64_C( -578.35), SIMDE_FLOAT64_C( -756.91) }, SIMDE_FLOAT64_C( -1335.26) }, { { SIMDE_FLOAT64_C( 564.48), SIMDE_FLOAT64_C( -445.40) }, SIMDE_FLOAT64_C( 119.08) }, { { SIMDE_FLOAT64_C( 762.82), SIMDE_FLOAT64_C( -204.54) }, SIMDE_FLOAT64_C( 558.28) }, { { SIMDE_FLOAT64_C( 674.09), SIMDE_FLOAT64_C( 291.13) }, SIMDE_FLOAT64_C( 965.22) }, { { SIMDE_FLOAT64_C( -929.06), SIMDE_FLOAT64_C( 684.47) }, SIMDE_FLOAT64_C( -244.59) }, { { SIMDE_FLOAT64_C( -72.52), SIMDE_FLOAT64_C( 177.81) }, SIMDE_FLOAT64_C( 105.29) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64_t r = simde_vaddvq_f64(a); simde_assert_equal_f64(r, test_vec[i].r, 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x2_t a = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); simde_float64_t r = simde_vaddvq_f64(a); simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_f64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddvq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t r; } test_vec[] = { { { INT8_C( 16), INT8_C( 76), INT8_C( 79), INT8_C( 9), -INT8_C( 28), INT8_C( 2), -INT8_C( 67), -INT8_C( 45), INT8_C( 62), -INT8_C( 12), INT8_C( 22), -INT8_C( 28), INT8_C( 114), -INT8_C( 59), INT8_MIN, -INT8_C( 11) }, INT8_C( 2) }, { { INT8_C( 88), INT8_C( 56), -INT8_C( 101), -INT8_C( 108), -INT8_C( 120), -INT8_C( 73), INT8_C( 121), INT8_C( 8), -INT8_C( 108), INT8_C( 83), -INT8_C( 27), INT8_C( 113), INT8_C( 120), -INT8_C( 59), -INT8_C( 31), -INT8_C( 120) }, INT8_C( 98) }, { { INT8_C( 17), INT8_C( 48), -INT8_C( 111), -INT8_C( 11), INT8_C( 50), INT8_C( 79), -INT8_C( 55), INT8_C( 112), INT8_C( 67), -INT8_C( 33), INT8_C( 84), -INT8_C( 75), -INT8_C( 92), -INT8_C( 44), -INT8_C( 85), -INT8_C( 3) }, -INT8_C( 52) }, { { INT8_C( 13), INT8_C( 70), -INT8_C( 111), -INT8_C( 107), -INT8_C( 3), INT8_C( 11), -INT8_C( 99), -INT8_C( 111), INT8_C( 94), -INT8_C( 125), INT8_C( 2), -INT8_C( 42), INT8_C( 72), -INT8_C( 29), INT8_C( 94), INT8_C( 89) }, INT8_C( 74) }, { { INT8_C( 20), -INT8_C( 17), INT8_C( 79), INT8_C( 70), INT8_C( 62), INT8_C( 24), -INT8_C( 73), -INT8_C( 126), -INT8_C( 9), INT8_C( 11), INT8_C( 55), -INT8_C( 101), -INT8_C( 32), -INT8_C( 30), -INT8_C( 104), -INT8_C( 19) }, INT8_C( 66) }, { { INT8_C( 40), INT8_C( 42), -INT8_C( 126), INT8_C( 37), INT8_C( 53), INT8_C( 32), -INT8_C( 74), -INT8_C( 109), -INT8_C( 93), -INT8_C( 72), INT8_C( 105), -INT8_C( 21), -INT8_C( 100), -INT8_C( 57), INT8_C( 68), -INT8_C( 80) }, -INT8_C( 99) }, { { -INT8_C( 74), -INT8_C( 109), -INT8_C( 10), -INT8_C( 11), -INT8_C( 85), -INT8_C( 83), INT8_C( 119), -INT8_C( 94), -INT8_C( 71), -INT8_C( 82), INT8_C( 62), -INT8_C( 103), -INT8_C( 111), -INT8_C( 42), -INT8_C( 122), -INT8_C( 71) }, -INT8_C( 119) }, { { INT8_C( 0), INT8_C( 8), -INT8_C( 33), INT8_C( 53), INT8_C( 40), -INT8_C( 107), -INT8_C( 56), -INT8_C( 53), INT8_C( 78), INT8_C( 49), -INT8_C( 74), -INT8_C( 22), -INT8_C( 8), -INT8_C( 5), -INT8_C( 102), -INT8_C( 81) }, -INT8_C( 57) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); int8_t r = simde_vaddvq_s8(a); simde_assert_equal_i8(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); int8_t r = simde_vaddvq_s8(a); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddvq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t r; } test_vec[] = { { { -INT16_C( 3794), -INT16_C( 24556), INT16_C( 21790), INT16_C( 24512), INT16_C( 18139), INT16_C( 27136), INT16_C( 20420), -INT16_C( 27241) }, -INT16_C( 9130) }, { { -INT16_C( 10776), -INT16_C( 29256), INT16_C( 21382), INT16_C( 1930), INT16_C( 27942), INT16_C( 13973), -INT16_C( 28904), INT16_C( 17957) }, INT16_C( 14248) }, { { INT16_C( 14720), -INT16_C( 24858), -INT16_C( 22898), INT16_C( 27134), -INT16_C( 275), -INT16_C( 20013), INT16_C( 27214), INT16_C( 13894) }, INT16_C( 14918) }, { { -INT16_C( 448), -INT16_C( 14652), INT16_C( 20049), INT16_C( 30669), INT16_C( 25275), -INT16_C( 11091), -INT16_C( 11535), INT16_C( 28954) }, INT16_C( 1685) }, { { INT16_C( 268), -INT16_C( 26097), INT16_C( 3495), -INT16_C( 27644), -INT16_C( 10484), INT16_C( 23109), -INT16_C( 29886), -INT16_C( 32112) }, INT16_C( 31721) }, { { INT16_C( 21642), -INT16_C( 9400), INT16_C( 5538), INT16_C( 24147), INT16_C( 119), INT16_C( 26674), INT16_C( 19667), -INT16_C( 8231) }, INT16_C( 14620) }, { { -INT16_C( 6067), -INT16_C( 2695), INT16_C( 32246), INT16_C( 649), -INT16_C( 12459), -INT16_C( 26788), -INT16_C( 5030), -INT16_C( 7143) }, -INT16_C( 27287) }, { { INT16_C( 24897), -INT16_C( 7232), INT16_C( 4982), -INT16_C( 4799), INT16_C( 29459), -INT16_C( 6571), INT16_C( 11968), INT16_C( 3525) }, -INT16_C( 9307) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); int16_t r = simde_vaddvq_s16(a); simde_assert_equal_i16(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); int16_t r = simde_vaddvq_s16(a); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddvq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t r; } test_vec[] = { { { INT32_C( 409309617), INT32_C( 703009121), INT32_C( 1805496451), INT32_C( 168549695) }, -INT32_C( 1208602412) }, { { -INT32_C( 1162156857), INT32_C( 605475000), -INT32_C( 1732960282), -INT32_C( 349401798) }, INT32_C( 1655923359) }, { { INT32_C( 2130940190), INT32_C( 631827106), -INT32_C( 644856422), -INT32_C( 354116829) }, INT32_C( 1763794045) }, { { INT32_C( 950312575), INT32_C( 1432140399), INT32_C( 334303705), -INT32_C( 1107420770) }, INT32_C( 1609335909) }, { { INT32_C( 1279001002), -INT32_C( 2039290389), INT32_C( 1298072106), INT32_C( 490161054) }, INT32_C( 1027943773) }, { { INT32_C( 1364581346), INT32_C( 1873195414), INT32_C( 1636013250), INT32_C( 1478394285) }, INT32_C( 2057216999) }, { { INT32_C( 1856264835), INT32_C( 1794381375), -INT32_C( 1212722151), INT32_C( 2043998103) }, INT32_C( 186954866) }, { { INT32_C( 1623861962), -INT32_C( 1630506533), -INT32_C( 1291889915), INT32_C( 1460280788) }, INT32_C( 161746302) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); int32_t r = simde_vaddvq_s32(a); simde_assert_equal_i32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); int32_t r = simde_vaddvq_s32(a); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddvq_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[8]; int64_t r; } test_vec[] = { { { -INT64_C( 3175282498865185383), -INT64_C( 6685163521795333907) }, INT64_C( 8586298053049032326) }, { { INT64_C( 4743869010420501684), INT64_C( 3259920673574755233) }, INT64_C( 8003789683995256917) }, { { -INT64_C( 1439710424467513906), INT64_C( 4491054795359323006) }, INT64_C( 3051344370891809100) }, { { -INT64_C( 6235463451047990149), INT64_C( 3894372779454748490) }, -INT64_C( 2341090671593241659) }, { { -INT64_C( 2927499906595145100), INT64_C( 9095823207579489649) }, INT64_C( 6168323300984344549) }, { { INT64_C( 1006603734084785223), INT64_C( 6746945194881143043) }, INT64_C( 7753548928965928266) }, { { -INT64_C( 1630249770841798095), INT64_C( 740594489963636933) }, -INT64_C( 889655280878161162) }, { { INT64_C( 3135157520517692338), -INT64_C( 7644798362971640413) }, -INT64_C( 4509640842453948075) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); int64_t r = simde_vaddvq_s64(a); simde_assert_equal_i64(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); int64_t r = simde_vaddvq_s64(a); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddvq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; uint8_t r; } test_vec[] = { { { UINT8_C( 32), UINT8_C( 30), UINT8_C(162), UINT8_C(114), UINT8_C( 14), UINT8_C(107), UINT8_C(223), UINT8_C(214), UINT8_C(123), UINT8_C(157), UINT8_C(124), UINT8_C( 64), UINT8_C(165), UINT8_C( 90), UINT8_C( 91), UINT8_C( 44) }, UINT8_C(218) }, { { UINT8_C( 60), UINT8_C( 50), UINT8_C(179), UINT8_C(217), UINT8_C( 36), UINT8_C( 73), UINT8_C(187), UINT8_C(120), UINT8_C(132), UINT8_C(216), UINT8_C( 31), UINT8_C(214), UINT8_C( 17), UINT8_C( 72), UINT8_C(168), UINT8_C( 49) }, UINT8_C( 29) }, { { UINT8_C(102), UINT8_C( 74), UINT8_C(163), UINT8_C(116), UINT8_C(181), UINT8_C(131), UINT8_C( 74), UINT8_C( 48), UINT8_C( 32), UINT8_C(198), UINT8_C(113), UINT8_C(197), UINT8_C( 33), UINT8_C(204), UINT8_C(242), UINT8_C( 93) }, UINT8_C(209) }, { { UINT8_MAX, UINT8_C(165), UINT8_C( 54), UINT8_C( 35), UINT8_C(238), UINT8_C(241), UINT8_C(155), UINT8_C(115), UINT8_C(201), UINT8_C(186), UINT8_C( 73), UINT8_C(218), UINT8_C( 2), UINT8_C(242), UINT8_C( 12), UINT8_C(105) }, UINT8_C(249) }, { { UINT8_C( 60), UINT8_C(175), UINT8_C(221), UINT8_C(242), UINT8_C( 50), UINT8_C( 40), UINT8_C( 34), UINT8_C( 82), UINT8_C(238), UINT8_C(147), UINT8_C( 24), UINT8_C( 15), UINT8_C( 96), UINT8_C( 10), UINT8_C(109), UINT8_C( 95) }, UINT8_C(102) }, { { UINT8_C(175), UINT8_C(163), UINT8_C(130), UINT8_C(157), UINT8_C(149), UINT8_C( 29), UINT8_C( 16), UINT8_C( 94), UINT8_C(215), UINT8_C( 90), UINT8_C( 57), UINT8_C(217), UINT8_C( 76), UINT8_C( 69), UINT8_C( 66), UINT8_C(136) }, UINT8_C( 47) }, { { UINT8_C(244), UINT8_C( 32), UINT8_C(122), UINT8_C( 39), UINT8_C( 72), UINT8_C(157), UINT8_C(121), UINT8_C( 54), UINT8_C( 48), UINT8_C(145), UINT8_C( 70), UINT8_C(144), UINT8_C(155), UINT8_C(179), UINT8_C(239), UINT8_C( 74) }, UINT8_C(103) }, { { UINT8_C( 86), UINT8_C(113), UINT8_C(232), UINT8_C(235), UINT8_C(142), UINT8_C(248), UINT8_C( 74), UINT8_C(101), UINT8_C( 82), UINT8_C(131), UINT8_C( 63), UINT8_C(158), UINT8_C(200), UINT8_C(129), UINT8_C( 39), UINT8_C(188) }, UINT8_C(173) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); uint8_t r = simde_vaddvq_u8(a); simde_assert_equal_u8(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); uint8_t r = simde_vaddvq_u8(a); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddvq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t r; } test_vec[] = { { { UINT16_C(41377), UINT16_C(59875), UINT16_C(23870), UINT16_C(28448), UINT16_C(26350), UINT16_C(35583), UINT16_C(61209), UINT16_C(28628) }, UINT16_C(43196) }, { { UINT16_C(48224), UINT16_C(61275), UINT16_C(42421), UINT16_C( 1876), UINT16_C(37672), UINT16_C(61606), UINT16_C(52501), UINT16_C(46764) }, UINT16_C(24659) }, { { UINT16_C(36974), UINT16_C(44448), UINT16_C(49389), UINT16_C(56092), UINT16_C( 6950), UINT16_C(16229), UINT16_C(14858), UINT16_C(27566) }, UINT16_C(55898) }, { { UINT16_C( 2550), UINT16_C(43866), UINT16_C(44718), UINT16_C(54963), UINT16_C(22850), UINT16_C(22470), UINT16_C(29478), UINT16_C(37901) }, UINT16_C(62188) }, { { UINT16_C(44291), UINT16_C(61505), UINT16_C(23917), UINT16_C(37835), UINT16_C(12665), UINT16_C(33746), UINT16_C(33131), UINT16_C(25070) }, UINT16_C(10016) }, { { UINT16_C(18570), UINT16_C(14605), UINT16_C(49399), UINT16_C(14607), UINT16_C(54809), UINT16_C(16272), UINT16_C(40265), UINT16_C(19667) }, UINT16_C(31586) }, { { UINT16_C( 5451), UINT16_C(47164), UINT16_C( 1906), UINT16_C(60236), UINT16_C( 7736), UINT16_C(41839), UINT16_C(23967), UINT16_C(10757) }, UINT16_C( 2448) }, { { UINT16_C( 4774), UINT16_C(40291), UINT16_C(29394), UINT16_C(60374), UINT16_C(26184), UINT16_C(37162), UINT16_C(64771), UINT16_C(20189) }, UINT16_C(20995) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); uint16_t r = simde_vaddvq_u16(a); simde_assert_equal_u16(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); uint16_t r = simde_vaddvq_u16(a); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddvq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint32_t r; } test_vec[] = { { { UINT32_C(2231834898), UINT32_C(1500533537), UINT32_C( 301850481), UINT32_C(3812295229) }, UINT32_C(3551546849) }, { { UINT32_C(3867188756), UINT32_C(1506891280), UINT32_C(3219848124), UINT32_C( 185518328) }, UINT32_C( 189511896) }, { { UINT32_C( 42997217), UINT32_C(3646685288), UINT32_C( 501897696), UINT32_C(1862280539) }, UINT32_C(1758893444) }, { { UINT32_C(3562373315), UINT32_C(2452432598), UINT32_C( 424744737), UINT32_C(3240386527) }, UINT32_C(1090002585) }, { { UINT32_C(3703813236), UINT32_C(2511740853), UINT32_C(3551699064), UINT32_C(2302849734) }, UINT32_C(3480168295) }, { { UINT32_C( 140351282), UINT32_C(3734670013), UINT32_C(2180574114), UINT32_C(3208780875) }, UINT32_C( 674441692) }, { { UINT32_C(2258372305), UINT32_C(2652590629), UINT32_C(3094466034), UINT32_C(2973938815) }, UINT32_C(2389433191) }, { { UINT32_C( 163159883), UINT32_C(3437712170), UINT32_C(2303582014), UINT32_C(3444150524) }, UINT32_C( 758669999) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); uint32_t r = simde_vaddvq_u32(a); simde_assert_equal_u32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); uint32_t r = simde_vaddvq_u32(a); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddvq_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[8]; uint64_t r; } test_vec[] = { { { UINT64_C( 2979533814127388054), UINT64_C(14585791949184355131) }, UINT64_C(17565325763311743185) }, { { UINT64_C(13094703515321967811), UINT64_C( 3270607939043264410) }, UINT64_C(16365311454365232221) }, { { UINT64_C( 6903246528981808749), UINT64_C(10905049591847694093) }, UINT64_C(17808296120829502842) }, { { UINT64_C( 9211337766653405864), UINT64_C( 651185335746040901) }, UINT64_C( 9862523102399446765) }, { { UINT64_C( 4662485875117912368), UINT64_C(18056095645387509034) }, UINT64_C( 4271837446795869786) }, { { UINT64_C(11291068898576308557), UINT64_C( 8189938041221596931) }, UINT64_C( 1034262866088353872) }, { { UINT64_C(18007214092850648434), UINT64_C( 2563546366726353735) }, UINT64_C( 2124016385867450553) }, { { UINT64_C(11499667444815211940), UINT64_C( 252455085538152398) }, UINT64_C(11752122530353364338) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); uint64_t r = simde_vaddvq_u64(a); simde_assert_equal_u64(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); uint64_t r = simde_vaddvq_u64(a); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vaddv_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vaddv_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vaddv_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vaddv_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vaddv_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vaddv_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vaddv_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vaddvq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vaddvq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vaddvq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vaddvq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vaddvq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vaddvq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vaddvq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vaddvq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vaddvq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vaddvq_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/addw.c000066400000000000000000000431601400333146700164060ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN addw #include "test-neon.h" #include "../../../simde/arm/neon/addw.h" static int test_simde_vaddw_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int8_t b[8]; int16_t r[8]; } test_vec[] = { { { INT16_C( 1399), -INT16_C( 17500), -INT16_C( 22993), -INT16_C( 21510), INT16_C( 23499), INT16_C( 6907), INT16_C( 18650), -INT16_C( 25560) }, { -INT8_C( 17), INT8_C( 18), -INT8_C( 39), -INT8_C( 77), INT8_C( 69), INT8_C( 53), INT8_C( 99), INT8_C( 113) }, { INT16_C( 1382), -INT16_C( 17482), -INT16_C( 23032), -INT16_C( 21587), INT16_C( 23568), INT16_C( 6960), INT16_C( 18749), -INT16_C( 25447) } }, { { INT16_C( 2134), -INT16_C( 30371), INT16_C( 8145), INT16_C( 18599), INT16_C( 19236), INT16_C( 21252), -INT16_C( 271), -INT16_C( 16898) }, { INT8_C( 89), -INT8_C( 7), -INT8_C( 41), INT8_C( 51), INT8_C( 65), INT8_C( 0), -INT8_C( 49), INT8_C( 49) }, { INT16_C( 2223), -INT16_C( 30378), INT16_C( 8104), INT16_C( 18650), INT16_C( 19301), INT16_C( 21252), -INT16_C( 320), -INT16_C( 16849) } }, { { -INT16_C( 22254), INT16_C( 22500), INT16_C( 18398), INT16_C( 13768), INT16_C( 9807), INT16_C( 8638), INT16_C( 25925), INT16_C( 27241) }, { -INT8_C( 80), INT8_C( 109), -INT8_C( 67), -INT8_C( 94), INT8_C( 108), -INT8_C( 68), INT8_C( 95), -INT8_C( 59) }, { -INT16_C( 22334), INT16_C( 22609), INT16_C( 18331), INT16_C( 13674), INT16_C( 9915), INT16_C( 8570), INT16_C( 26020), INT16_C( 27182) } }, { { INT16_C( 14005), -INT16_C( 2055), -INT16_C( 14282), INT16_C( 18472), INT16_C( 3185), INT16_C( 20639), INT16_C( 26707), -INT16_C( 23931) }, { -INT8_C( 114), INT8_C( 67), -INT8_C( 61), -INT8_C( 45), -INT8_C( 87), INT8_C( 45), INT8_C( 61), INT8_C( 89) }, { INT16_C( 13891), -INT16_C( 1988), -INT16_C( 14343), INT16_C( 18427), INT16_C( 3098), INT16_C( 20684), INT16_C( 26768), -INT16_C( 23842) } }, { { -INT16_C( 1126), INT16_C( 1787), INT16_C( 23223), INT16_C( 27852), -INT16_C( 14959), -INT16_C( 14493), -INT16_C( 29811), -INT16_C( 240) }, { -INT8_C( 105), -INT8_C( 81), INT8_C( 79), -INT8_C( 22), INT8_C( 23), -INT8_C( 44), -INT8_C( 115), -INT8_C( 91) }, { -INT16_C( 1231), INT16_C( 1706), INT16_C( 23302), INT16_C( 27830), -INT16_C( 14936), -INT16_C( 14537), -INT16_C( 29926), -INT16_C( 331) } }, { { INT16_C( 20503), -INT16_C( 16263), -INT16_C( 18819), INT16_C( 6170), INT16_C( 5553), INT16_C( 26654), -INT16_C( 5520), INT16_C( 469) }, { -INT8_C( 81), INT8_C( 56), -INT8_C( 56), INT8_C( 61), -INT8_C( 60), -INT8_C( 40), INT8_C( 60), INT8_C( 91) }, { INT16_C( 20422), -INT16_C( 16207), -INT16_C( 18875), INT16_C( 6231), INT16_C( 5493), INT16_C( 26614), -INT16_C( 5460), INT16_C( 560) } }, { { -INT16_C( 29816), -INT16_C( 24762), -INT16_C( 11425), INT16_C( 30277), -INT16_C( 16861), -INT16_C( 24265), INT16_C( 20852), INT16_C( 9913) }, { INT8_C( 102), -INT8_C( 41), -INT8_C( 114), -INT8_C( 42), -INT8_C( 62), INT8_C( 99), -INT8_C( 41), INT8_C( 113) }, { -INT16_C( 29714), -INT16_C( 24803), -INT16_C( 11539), INT16_C( 30235), -INT16_C( 16923), -INT16_C( 24166), INT16_C( 20811), INT16_C( 10026) } }, { { -INT16_C( 24420), INT16_C( 24750), -INT16_C( 5512), INT16_C( 187), INT16_C( 373), -INT16_C( 11104), -INT16_C( 6700), -INT16_C( 1973) }, { -INT8_C( 93), -INT8_C( 126), -INT8_C( 103), INT8_C( 23), -INT8_C( 45), INT8_C( 82), INT8_C( 61), INT8_C( 57) }, { -INT16_C( 24513), INT16_C( 24624), -INT16_C( 5615), INT16_C( 210), INT16_C( 328), -INT16_C( 11022), -INT16_C( 6639), -INT16_C( 1916) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int16x8_t r = simde_vaddw_s8(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; } static int test_simde_vaddw_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int16_t b[4]; int32_t r[4]; } test_vec[] = { { { -INT32_C( 1388212854), -INT32_C( 137385825), -INT32_C( 2114199091), -INT32_C( 1388281654) }, { -INT16_C( 16021), INT16_C( 14313), INT16_C( 1415), INT16_C( 13855) }, { -INT32_C( 1388228875), -INT32_C( 137371512), -INT32_C( 2114197676), -INT32_C( 1388267799) } }, { { INT32_C( 1434567342), INT32_C( 335417737), INT32_C( 1052852126), -INT32_C( 1271557913) }, { INT16_C( 12401), INT16_C( 15157), INT16_C( 30129), INT16_C( 7401) }, { INT32_C( 1434579743), INT32_C( 335432894), INT32_C( 1052882255), -INT32_C( 1271550512) } }, { { -INT32_C( 1101802954), -INT32_C( 2030799912), -INT32_C( 1092913867), INT32_C( 617798022) }, { -INT16_C( 27880), -INT16_C( 158), -INT16_C( 26845), -INT16_C( 27469) }, { -INT32_C( 1101830834), -INT32_C( 2030800070), -INT32_C( 1092940712), INT32_C( 617770553) } }, { { INT32_C( 2043734216), -INT32_C( 1802127010), INT32_C( 1666378123), -INT32_C( 1846917540) }, { -INT16_C( 14917), INT16_C( 16719), INT16_C( 8607), -INT16_C( 18586) }, { INT32_C( 2043719299), -INT32_C( 1802110291), INT32_C( 1666386730), -INT32_C( 1846936126) } }, { { -INT32_C( 675821388), INT32_C( 678193760), -INT32_C( 1314833325), -INT32_C( 2142947595) }, { -INT16_C( 26593), INT16_C( 31716), -INT16_C( 12578), -INT16_C( 26100) }, { -INT32_C( 675847981), INT32_C( 678225476), -INT32_C( 1314845903), -INT32_C( 2142973695) } }, { { INT32_C( 853236883), INT32_C( 854212989), INT32_C( 1779015946), INT32_C( 1586656523) }, { INT16_C( 13233), -INT16_C( 23025), INT16_C( 21865), -INT16_C( 30425) }, { INT32_C( 853250116), INT32_C( 854189964), INT32_C( 1779037811), INT32_C( 1586626098) } }, { { -INT32_C( 888927251), INT32_C( 1818563033), -INT32_C( 358661779), -INT32_C( 1944286846) }, { INT16_C( 9770), INT16_C( 13814), -INT16_C( 30565), INT16_C( 19860) }, { -INT32_C( 888917481), INT32_C( 1818576847), -INT32_C( 358692344), -INT32_C( 1944266986) } }, { { INT32_C( 636724155), -INT32_C( 441574664), -INT32_C( 21908955), INT32_C( 812324547) }, { INT16_C( 2647), -INT16_C( 9701), INT16_C( 14227), -INT16_C( 17050) }, { INT32_C( 636726802), -INT32_C( 441584365), -INT32_C( 21894728), INT32_C( 812307497) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int32x4_t r = simde_vaddw_s16(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; } static int test_simde_vaddw_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; int32_t b[2]; int64_t r[2]; } test_vec[] = { { { -INT64_C( 35941828180514250), -INT64_C( 3275167048482511539) }, { INT32_C( 1164837346), INT32_C( 810152381) }, { -INT64_C( 35941827015676904), -INT64_C( 3275167047672359158) } }, { { INT64_C( 7520367280355497394), -INT64_C( 5568442117108591200) }, { INT32_C( 2092554896), INT32_C( 21972255) }, { INT64_C( 7520367282448052290), -INT64_C( 5568442117086618945) } }, { { INT64_C( 6932324803500490054), -INT64_C( 7385863836118137883) }, { -INT32_C( 1655510216), INT32_C( 1716456406) }, { INT64_C( 6932324801844979838), -INT64_C( 7385863834401681477) } }, { { -INT64_C( 7780757470541838107), -INT64_C( 4468190007372788497) }, { INT32_C( 148488975), INT32_C( 513891046) }, { -INT64_C( 7780757470393349132), -INT64_C( 4468190006858897451) } }, { { -INT64_C( 1981007695762885563), -INT64_C( 8721521294389451500) }, { INT32_C( 371429178), INT32_C( 1809326171) }, { -INT64_C( 1981007695391456385), -INT64_C( 8721521292580125329) } }, { { INT64_C( 5901990452037661155), -INT64_C( 4328821606770170871) }, { -INT32_C( 333064875), -INT32_C( 26010428) }, { INT64_C( 5901990451704596280), -INT64_C( 4328821606796181299) } }, { { -INT64_C( 7317806549163469141), INT64_C( 286907640752432542) }, { -INT32_C( 1626642192), -INT32_C( 1402734761) }, { -INT64_C( 7317806550790111333), INT64_C( 286907639349697781) } }, { { INT64_C( 5732814751622858957), INT64_C( 3527663835220976802) }, { -INT32_C( 1495639892), -INT32_C( 1800809052) }, { INT64_C( 5732814750127219065), INT64_C( 3527663833420167750) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int64x2_t r = simde_vaddw_s32(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; } static int test_simde_vaddw_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint8_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(10834), UINT16_C(14742), UINT16_C(18794), UINT16_C(53447), UINT16_C(64764), UINT16_C(23788), UINT16_C( 4099), UINT16_C(38745) }, { UINT8_C(208), UINT8_C(229), UINT8_C( 91), UINT8_C(123), UINT8_C( 81), UINT8_C(202), UINT8_C( 74), UINT8_C( 0) }, { UINT16_C(11042), UINT16_C(14971), UINT16_C(18885), UINT16_C(53570), UINT16_C(64845), UINT16_C(23990), UINT16_C( 4173), UINT16_C(38745) } }, { { UINT16_C( 1758), UINT16_C(13153), UINT16_C(29583), UINT16_C(57738), UINT16_C( 8349), UINT16_C( 1818), UINT16_C(57962), UINT16_C(26327) }, { UINT8_C(222), UINT8_C(195), UINT8_C(194), UINT8_C(225), UINT8_C(211), UINT8_C( 28), UINT8_C(120), UINT8_C(164) }, { UINT16_C( 1980), UINT16_C(13348), UINT16_C(29777), UINT16_C(57963), UINT16_C( 8560), UINT16_C( 1846), UINT16_C(58082), UINT16_C(26491) } }, { { UINT16_C(54017), UINT16_C(21023), UINT16_C(27038), UINT16_C(31827), UINT16_C(46191), UINT16_C(65199), UINT16_C(14631), UINT16_C(50400) }, { UINT8_C( 90), UINT8_C(250), UINT8_C(203), UINT8_C(196), UINT8_C(220), UINT8_C(162), UINT8_C( 42), UINT8_C(186) }, { UINT16_C(54107), UINT16_C(21273), UINT16_C(27241), UINT16_C(32023), UINT16_C(46411), UINT16_C(65361), UINT16_C(14673), UINT16_C(50586) } }, { { UINT16_C(60518), UINT16_C(14747), UINT16_C( 4872), UINT16_C( 2781), UINT16_C(64999), UINT16_C(34140), UINT16_C(44902), UINT16_C(54785) }, { UINT8_C( 99), UINT8_C(177), UINT8_C(212), UINT8_C(138), UINT8_C(234), UINT8_C(180), UINT8_C( 78), UINT8_C( 68) }, { UINT16_C(60617), UINT16_C(14924), UINT16_C( 5084), UINT16_C( 2919), UINT16_C(65233), UINT16_C(34320), UINT16_C(44980), UINT16_C(54853) } }, { { UINT16_C( 6575), UINT16_C(35592), UINT16_C(12988), UINT16_C( 8774), UINT16_C(57631), UINT16_C(10075), UINT16_C(14837), UINT16_C(56369) }, { UINT8_C( 54), UINT8_C(142), UINT8_C( 97), UINT8_C(156), UINT8_C( 61), UINT8_C( 98), UINT8_C(114), UINT8_C(161) }, { UINT16_C( 6629), UINT16_C(35734), UINT16_C(13085), UINT16_C( 8930), UINT16_C(57692), UINT16_C(10173), UINT16_C(14951), UINT16_C(56530) } }, { { UINT16_C(18195), UINT16_C(65067), UINT16_C(31483), UINT16_C(43586), UINT16_C(19347), UINT16_C(20278), UINT16_C(31869), UINT16_C(40049) }, { UINT8_C( 93), UINT8_C(205), UINT8_C(196), UINT8_C( 82), UINT8_C( 6), UINT8_C(245), UINT8_C( 46), UINT8_C( 60) }, { UINT16_C(18288), UINT16_C(65272), UINT16_C(31679), UINT16_C(43668), UINT16_C(19353), UINT16_C(20523), UINT16_C(31915), UINT16_C(40109) } }, { { UINT16_C(36739), UINT16_C(49624), UINT16_C(19442), UINT16_C( 1378), UINT16_C(36242), UINT16_C(36099), UINT16_C(17927), UINT16_C(39736) }, { UINT8_C(145), UINT8_C(110), UINT8_C(234), UINT8_C( 14), UINT8_C(234), UINT8_C( 92), UINT8_C(171), UINT8_C( 71) }, { UINT16_C(36884), UINT16_C(49734), UINT16_C(19676), UINT16_C( 1392), UINT16_C(36476), UINT16_C(36191), UINT16_C(18098), UINT16_C(39807) } }, { { UINT16_C(28457), UINT16_C(12186), UINT16_C(51300), UINT16_C(59499), UINT16_C(17240), UINT16_C(19113), UINT16_C( 2958), UINT16_C( 8271) }, { UINT8_C(152), UINT8_C( 83), UINT8_C(174), UINT8_C(160), UINT8_C(153), UINT8_C(230), UINT8_C( 59), UINT8_C( 42) }, { UINT16_C(28609), UINT16_C(12269), UINT16_C(51474), UINT16_C(59659), UINT16_C(17393), UINT16_C(19343), UINT16_C( 3017), UINT16_C( 8313) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint16x8_t r = simde_vaddw_u8(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_vaddw_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint16_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C( 125494402), UINT32_C(3611639350), UINT32_C(3234195889), UINT32_C( 286950810) }, { UINT16_C(45973), UINT16_C(17753), UINT16_C(40022), UINT16_C(16530) }, { UINT32_C( 125540375), UINT32_C(3611657103), UINT32_C(3234235911), UINT32_C( 286967340) } }, { { UINT32_C(4019197649), UINT32_C(1265827017), UINT32_C( 374598880), UINT32_C(3689846826) }, { UINT16_C(45953), UINT16_C( 7068), UINT16_C(46649), UINT16_C(52780) }, { UINT32_C(4019243602), UINT32_C(1265834085), UINT32_C( 374645529), UINT32_C(3689899606) } }, { { UINT32_C(3222504809), UINT32_C(4076905762), UINT32_C(2246218171), UINT32_C(1842369677) }, { UINT16_C( 9024), UINT16_C(27267), UINT16_C(29115), UINT16_C(15430) }, { UINT32_C(3222513833), UINT32_C(4076933029), UINT32_C(2246247286), UINT32_C(1842385107) } }, { { UINT32_C(1582817829), UINT32_C( 36471704), UINT32_C( 734150409), UINT32_C(2686370532) }, { UINT16_C( 83), UINT16_C(57381), UINT16_C(62805), UINT16_C(38221) }, { UINT32_C(1582817912), UINT32_C( 36529085), UINT32_C( 734213214), UINT32_C(2686408753) } }, { { UINT32_C(3556823321), UINT32_C(1729185346), UINT32_C(3234162728), UINT32_C(4123193836) }, { UINT16_C(33840), UINT16_C( 5408), UINT16_C(15943), UINT16_C(39605) }, { UINT32_C(3556857161), UINT32_C(1729190754), UINT32_C(3234178671), UINT32_C(4123233441) } }, { { UINT32_C(2474367550), UINT32_C(3895052495), UINT32_C(3703384473), UINT32_C(2537803375) }, { UINT16_C( 2358), UINT16_C( 8791), UINT16_C( 6906), UINT16_C(11031) }, { UINT32_C(2474369908), UINT32_C(3895061286), UINT32_C(3703391379), UINT32_C(2537814406) } }, { { UINT32_C(3846191006), UINT32_C(3028350325), UINT32_C(2655517647), UINT32_C(1569157315) }, { UINT16_C(17561), UINT16_C( 2105), UINT16_C(31762), UINT16_C(18591) }, { UINT32_C(3846208567), UINT32_C(3028352430), UINT32_C(2655549409), UINT32_C(1569175906) } }, { { UINT32_C(2154559365), UINT32_C(2947252753), UINT32_C( 798354362), UINT32_C(2950895072) }, { UINT16_C(11024), UINT16_C(54093), UINT16_C(54427), UINT16_C(13616) }, { UINT32_C(2154570389), UINT32_C(2947306846), UINT32_C( 798408789), UINT32_C(2950908688) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint32x4_t r = simde_vaddw_u16(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_vaddw_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; uint32_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C(18246032550608320754), UINT64_C( 5491972490029369669) }, { UINT32_C(2534732259), UINT32_C(4282430487) }, { UINT64_C(18246032553143053013), UINT64_C( 5491972494311800156) } }, { { UINT64_C( 6488175683242448873), UINT64_C( 8558374424049239001) }, { UINT32_C(1597976701), UINT32_C(2074834583) }, { UINT64_C( 6488175684840425574), UINT64_C( 8558374426124073584) } }, { { UINT64_C( 6664856254073192296), UINT64_C(17829150720575962538) }, { UINT32_C(1405070114), UINT32_C(2647229471) }, { UINT64_C( 6664856255478262410), UINT64_C(17829150723223192009) } }, { { UINT64_C(16701501908321044661), UINT64_C(15507834351980567142) }, { UINT32_C(2162756194), UINT32_C( 175593192) }, { UINT64_C(16701501910483800855), UINT64_C(15507834352156160334) } }, { { UINT64_C( 8123974462495078145), UINT64_C(14381546540155910703) }, { UINT32_C(2342583390), UINT32_C(1130551009) }, { UINT64_C( 8123974464837661535), UINT64_C(14381546541286461712) } }, { { UINT64_C(11876901113150262496), UINT64_C( 9772111181212103025) }, { UINT32_C(2654362173), UINT32_C(1030081503) }, { UINT64_C(11876901115804624669), UINT64_C( 9772111182242184528) } }, { { UINT64_C(13613022525382002119), UINT64_C( 6538762566695759479) }, { UINT32_C(3800531850), UINT32_C(1130964230) }, { UINT64_C(13613022529182533969), UINT64_C( 6538762567826723709) } }, { { UINT64_C( 3646867225230548863), UINT64_C( 640855369439733067) }, { UINT32_C(1579313525), UINT32_C(1287184578) }, { UINT64_C( 3646867226809862388), UINT64_C( 640855370726917645) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint64x2_t r = simde_vaddw_u32(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vaddw_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vaddw_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vaddw_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vaddw_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vaddw_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vaddw_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/addw_high.c000066400000000000000000000604751400333146700174150ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN addw_high #include "test-neon.h" #include "../../../simde/arm/neon/addw_high.h" static int test_simde_vaddw_high_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int8_t b[16]; int16_t r[8]; } test_vec[] = { { { INT16_C( 23541), INT16_C( 10181), -INT16_C( 30920), INT16_C( 29810), -INT16_C( 14369), INT16_C( 8626), -INT16_C( 8621), INT16_C( 19588) }, { INT8_C( 120), -INT8_C( 90), -INT8_C( 45), -INT8_C( 11), INT8_C( 119), -INT8_C( 3), INT8_C( 3), INT8_C( 41), -INT8_C( 7), -INT8_C( 7), INT8_C( 1), -INT8_C( 118), INT8_C( 56), INT8_C( 21), INT8_C( 0), INT8_C( 45) }, { INT16_C( 23534), INT16_C( 10174), -INT16_C( 30919), INT16_C( 29692), -INT16_C( 14313), INT16_C( 8647), -INT16_C( 8621), INT16_C( 19633) } }, { { -INT16_C( 14992), -INT16_C( 22187), -INT16_C( 14516), INT16_C( 11037), -INT16_C( 12401), -INT16_C( 7604), -INT16_C( 11858), INT16_C( 9774) }, { INT8_C( 119), INT8_C( 1), INT8_C( 27), -INT8_C( 18), -INT8_C( 1), INT8_C( 31), INT8_C( 24), -INT8_C( 8), INT8_C( 24), INT8_C( 25), -INT8_C( 126), INT8_C( 80), INT8_C( 46), -INT8_C( 126), INT8_C( 126), -INT8_C( 97) }, { -INT16_C( 14968), -INT16_C( 22162), -INT16_C( 14642), INT16_C( 11117), -INT16_C( 12355), -INT16_C( 7730), -INT16_C( 11732), INT16_C( 9677) } }, { { -INT16_C( 11449), -INT16_C( 27832), INT16_C( 26010), INT16_C( 10687), INT16_C( 2869), -INT16_C( 7412), INT16_C( 15068), INT16_C( 21257) }, { INT8_C( 60), INT8_C( 36), INT8_C( 66), INT8_C( 59), INT8_C( 67), INT8_C( 90), INT8_C( 51), INT8_C( 92), INT8_C( 115), -INT8_C( 75), -INT8_C( 84), -INT8_C( 94), INT8_C( 55), INT8_C( 42), INT8_C( 65), INT8_C( 126) }, { -INT16_C( 11334), -INT16_C( 27907), INT16_C( 25926), INT16_C( 10593), INT16_C( 2924), -INT16_C( 7370), INT16_C( 15133), INT16_C( 21383) } }, { { -INT16_C( 30211), -INT16_C( 26607), -INT16_C( 12050), INT16_C( 9153), -INT16_C( 12836), -INT16_C( 18426), INT16_C( 3848), INT16_C( 17420) }, { INT8_C( 52), INT8_C( 78), INT8_MAX, INT8_C( 119), -INT8_C( 88), -INT8_C( 78), -INT8_C( 45), INT8_C( 27), INT8_C( 103), INT8_MIN, -INT8_C( 67), -INT8_C( 98), -INT8_C( 86), -INT8_C( 2), INT8_C( 28), -INT8_C( 88) }, { -INT16_C( 30108), -INT16_C( 26735), -INT16_C( 12117), INT16_C( 9055), -INT16_C( 12922), -INT16_C( 18428), INT16_C( 3876), INT16_C( 17332) } }, { { INT16_C( 11655), INT16_C( 30272), INT16_C( 510), -INT16_C( 9575), -INT16_C( 24369), -INT16_C( 10350), -INT16_C( 24913), -INT16_C( 7397) }, { -INT8_C( 20), -INT8_C( 102), INT8_C( 91), -INT8_C( 108), INT8_C( 76), INT8_C( 46), -INT8_C( 80), -INT8_C( 77), -INT8_C( 82), INT8_C( 109), INT8_C( 81), INT8_C( 89), INT8_C( 108), INT8_C( 109), INT8_C( 1), -INT8_C( 13) }, { INT16_C( 11573), INT16_C( 30381), INT16_C( 591), -INT16_C( 9486), -INT16_C( 24261), -INT16_C( 10241), -INT16_C( 24912), -INT16_C( 7410) } }, { { INT16_C( 16794), -INT16_C( 26519), INT16_C( 834), INT16_C( 4466), INT16_C( 1443), INT16_C( 21224), INT16_C( 931), -INT16_C( 28618) }, { -INT8_C( 99), -INT8_C( 111), INT8_C( 36), -INT8_C( 23), -INT8_C( 65), -INT8_C( 44), -INT8_C( 100), INT8_C( 110), INT8_C( 66), -INT8_C( 19), -INT8_C( 57), -INT8_C( 82), INT8_C( 90), -INT8_C( 56), -INT8_C( 95), -INT8_C( 11) }, { INT16_C( 16860), -INT16_C( 26538), INT16_C( 777), INT16_C( 4384), INT16_C( 1533), INT16_C( 21168), INT16_C( 836), -INT16_C( 28629) } }, { { INT16_C( 2825), INT16_C( 19341), INT16_C( 14), -INT16_C( 20131), INT16_C( 17669), -INT16_C( 22525), INT16_C( 14665), -INT16_C( 6600) }, { -INT8_C( 54), INT8_C( 93), -INT8_C( 48), -INT8_C( 118), INT8_C( 49), INT8_C( 108), -INT8_C( 8), INT8_C( 115), INT8_C( 90), -INT8_C( 65), INT8_C( 33), -INT8_C( 76), -INT8_C( 121), -INT8_C( 61), -INT8_C( 87), -INT8_C( 112) }, { INT16_C( 2915), INT16_C( 19276), INT16_C( 47), -INT16_C( 20207), INT16_C( 17548), -INT16_C( 22586), INT16_C( 14578), -INT16_C( 6712) } }, { { INT16_C( 14286), -INT16_C( 8997), INT16_C( 14391), INT16_C( 15501), -INT16_C( 28546), -INT16_C( 14364), INT16_C( 7626), -INT16_C( 27475) }, { INT8_C( 122), INT8_C( 125), INT8_C( 30), -INT8_C( 85), -INT8_C( 22), INT8_C( 22), INT8_C( 31), INT8_C( 68), -INT8_C( 43), INT8_C( 64), -INT8_C( 8), INT8_C( 92), INT8_C( 3), -INT8_C( 94), -INT8_C( 20), -INT8_C( 47) }, { INT16_C( 14243), -INT16_C( 8933), INT16_C( 14383), INT16_C( 15593), -INT16_C( 28543), -INT16_C( 14458), INT16_C( 7606), -INT16_C( 27522) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int16x8_t r = simde_vaddw_high_s8(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); simde_int16x8_t r = simde_vaddw_high_s8(a, b); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddw_high_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int16_t b[8]; int32_t r[4]; } test_vec[] = { { { INT32_C( 2039093453), INT32_C( 519935452), -INT32_C( 433741944), -INT32_C( 762657082) }, { INT16_C( 22083), -INT16_C( 32736), -INT16_C( 31413), -INT16_C( 20010), INT16_C( 27699), INT16_C( 24145), INT16_C( 28574), INT16_C( 27522) }, { INT32_C( 2039121152), INT32_C( 519959597), -INT32_C( 433713370), -INT32_C( 762629560) } }, { { INT32_C( 1675890055), INT32_C( 713154978), INT32_C( 1175496320), -INT32_C( 1340564883) }, { INT16_C( 14832), INT16_C( 15152), INT16_C( 1726), -INT16_C( 3348), INT16_C( 15986), INT16_C( 4432), -INT16_C( 11347), INT16_C( 13436) }, { INT32_C( 1675906041), INT32_C( 713159410), INT32_C( 1175484973), -INT32_C( 1340551447) } }, { { -INT32_C( 2104008224), -INT32_C( 1012066238), INT32_C( 722058686), INT32_C( 1222320728) }, { INT16_C( 2907), INT16_C( 6532), INT16_C( 28689), -INT16_C( 31733), INT16_C( 23726), INT16_C( 23445), INT16_C( 4399), INT16_C( 3983) }, { -INT32_C( 2103984498), -INT32_C( 1012042793), INT32_C( 722063085), INT32_C( 1222324711) } }, { { -INT32_C( 1248778638), -INT32_C( 42451394), INT32_C( 1411940860), -INT32_C( 23329629) }, { INT16_C( 8207), INT16_C( 8472), INT16_C( 9105), INT16_C( 16293), INT16_C( 14975), -INT16_C( 20837), INT16_C( 10827), -INT16_C( 16707) }, { -INT32_C( 1248763663), -INT32_C( 42472231), INT32_C( 1411951687), -INT32_C( 23346336) } }, { { -INT32_C( 1888268463), -INT32_C( 1987253363), INT32_C( 282965356), -INT32_C( 938575175) }, { INT16_C( 9882), INT16_C( 11241), -INT16_C( 29110), -INT16_C( 13973), INT16_C( 1736), INT16_C( 5240), INT16_C( 13616), -INT16_C( 32302) }, { -INT32_C( 1888266727), -INT32_C( 1987248123), INT32_C( 282978972), -INT32_C( 938607477) } }, { { INT32_C( 303121796), -INT32_C( 1667523280), INT32_C( 195852626), -INT32_C( 1915438093) }, { -INT16_C( 16927), INT16_C( 11193), INT16_C( 9292), INT16_C( 5365), INT16_C( 27946), INT16_C( 23080), -INT16_C( 1374), INT16_C( 10204) }, { INT32_C( 303149742), -INT32_C( 1667500200), INT32_C( 195851252), -INT32_C( 1915427889) } }, { { INT32_C( 1866067263), -INT32_C( 586361718), INT32_C( 1088993357), INT32_C( 1439612019) }, { -INT16_C( 30854), -INT16_C( 14720), INT16_C( 30123), -INT16_C( 10790), INT16_C( 994), -INT16_C( 31441), INT16_C( 3069), INT16_C( 15788) }, { INT32_C( 1866068257), -INT32_C( 586393159), INT32_C( 1088996426), INT32_C( 1439627807) } }, { { -INT32_C( 2085820936), INT32_C( 123779257), -INT32_C( 465090447), INT32_C( 2134447365) }, { -INT16_C( 17764), INT16_C( 18245), INT16_C( 7983), INT16_C( 4636), INT16_C( 19490), INT16_C( 8343), INT16_C( 17239), INT16_C( 20573) }, { -INT32_C( 2085801446), INT32_C( 123787600), -INT32_C( 465073208), INT32_C( 2134467938) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int32x4_t r = simde_vaddw_high_s16(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); simde_int32x4_t r = simde_vaddw_high_s16(a, b); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddw_high_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; int32_t b[4]; int64_t r[2]; } test_vec[] = { { { -INT64_C( 5849238384416326824), INT64_C( 1277337534084417282) }, { INT32_C( 718341893), INT32_C( 1336516965), -INT32_C( 1497986143), -INT32_C( 1836582855) }, { -INT64_C( 5849238385914312967), INT64_C( 1277337532247834427) } }, { { INT64_C( 2688498497482969075), -INT64_C( 5167571939423768705) }, { -INT32_C( 2048779745), INT32_C( 1574210491), INT32_C( 1342409238), INT32_C( 1977780866) }, { INT64_C( 2688498498825378313), -INT64_C( 5167571937445987839) } }, { { INT64_C( 9052628869669357145), INT64_C( 4071532346031210517) }, { -INT32_C( 742563305), INT32_C( 70291949), -INT32_C( 1655426277), INT32_C( 370292413) }, { INT64_C( 9052628868013930868), INT64_C( 4071532346401502930) } }, { { -INT64_C( 6757876032032593732), INT64_C( 109470578694089779) }, { INT32_C( 986988876), -INT32_C( 314702638), -INT32_C( 192245193), -INT32_C( 2079679288) }, { -INT64_C( 6757876032224838925), INT64_C( 109470576614410491) } }, { { INT64_C( 284983309224615108), -INT64_C( 7688256677085225374) }, { -INT32_C( 422633196), INT32_C( 1557335333), INT32_C( 1733320095), -INT32_C( 1108583687) }, { INT64_C( 284983310957935203), -INT64_C( 7688256678193809061) } }, { { INT64_C( 344528265312873462), -INT64_C( 126208633337800863) }, { -INT32_C( 471593026), -INT32_C( 1136675044), INT32_C( 220434196), -INT32_C( 540405783) }, { INT64_C( 344528265533307658), -INT64_C( 126208633878206646) } }, { { INT64_C( 4386626828125919546), -INT64_C( 8400015066188599164) }, { INT32_C( 1366184501), INT32_C( 504212745), INT32_C( 623587644), INT32_C( 2063922752) }, { INT64_C( 4386626828749507190), -INT64_C( 8400015064124676412) } }, { { -INT64_C( 7284793306056775473), INT64_C( 2936196268991040871) }, { -INT32_C( 763744824), INT32_C( 384862170), -INT32_C( 113566792), -INT32_C( 529252591) }, { -INT64_C( 7284793306170342265), INT64_C( 2936196268461788280) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int64x2_t r = simde_vaddw_high_s32(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); simde_int64x2_t r = simde_vaddw_high_s32(a, b); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddw_high_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint8_t b[16]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(52199), UINT16_C(64907), UINT16_C(29186), UINT16_C(27031), UINT16_C(41023), UINT16_C(13213), UINT16_C(23574), UINT16_C(57179) }, { UINT8_C(137), UINT8_C(213), UINT8_C(177), UINT8_C( 99), UINT8_C( 93), UINT8_C(161), UINT8_C(121), UINT8_C( 21), UINT8_C(188), UINT8_C(180), UINT8_C( 14), UINT8_C(206), UINT8_C(243), UINT8_C(130), UINT8_C(174), UINT8_C(218) }, { UINT16_C(52387), UINT16_C(65087), UINT16_C(29200), UINT16_C(27237), UINT16_C(41266), UINT16_C(13343), UINT16_C(23748), UINT16_C(57397) } }, { { UINT16_C(14669), UINT16_C(20439), UINT16_C(28331), UINT16_C(60344), UINT16_C(21774), UINT16_C( 9502), UINT16_C(31153), UINT16_C(14852) }, { UINT8_C( 79), UINT8_C(181), UINT8_C(157), UINT8_C(172), UINT8_C( 86), UINT8_C( 22), UINT8_C(193), UINT8_C( 18), UINT8_C(202), UINT8_C(208), UINT8_C(224), UINT8_C(189), UINT8_C( 82), UINT8_C(143), UINT8_C(152), UINT8_C(160) }, { UINT16_C(14871), UINT16_C(20647), UINT16_C(28555), UINT16_C(60533), UINT16_C(21856), UINT16_C( 9645), UINT16_C(31305), UINT16_C(15012) } }, { { UINT16_C(28616), UINT16_C(29935), UINT16_C(43230), UINT16_C(60511), UINT16_C(32253), UINT16_C(44817), UINT16_C( 5622), UINT16_C(17897) }, { UINT8_C(202), UINT8_C(135), UINT8_C(241), UINT8_C( 32), UINT8_C(157), UINT8_C(179), UINT8_C( 51), UINT8_C(104), UINT8_C(131), UINT8_C( 19), UINT8_C( 37), UINT8_C(213), UINT8_C(162), UINT8_C(189), UINT8_C(117), UINT8_C(107) }, { UINT16_C(28747), UINT16_C(29954), UINT16_C(43267), UINT16_C(60724), UINT16_C(32415), UINT16_C(45006), UINT16_C( 5739), UINT16_C(18004) } }, { { UINT16_C(25901), UINT16_C( 3039), UINT16_C(15885), UINT16_C( 2807), UINT16_C( 2491), UINT16_C(45497), UINT16_C(41758), UINT16_C(59895) }, { UINT8_C( 42), UINT8_C(232), UINT8_C( 9), UINT8_C(199), UINT8_C(155), UINT8_C( 60), UINT8_C( 47), UINT8_C( 30), UINT8_C( 80), UINT8_C( 85), UINT8_C(244), UINT8_C(242), UINT8_C( 18), UINT8_C(105), UINT8_C( 93), UINT8_C( 63) }, { UINT16_C(25981), UINT16_C( 3124), UINT16_C(16129), UINT16_C( 3049), UINT16_C( 2509), UINT16_C(45602), UINT16_C(41851), UINT16_C(59958) } }, { { UINT16_C(15566), UINT16_C(56138), UINT16_C(17018), UINT16_C(13798), UINT16_C(40779), UINT16_C(27111), UINT16_C(56898), UINT16_C(27730) }, { UINT8_C(198), UINT8_C( 92), UINT8_C( 52), UINT8_C( 98), UINT8_C(152), UINT8_C( 99), UINT8_C(128), UINT8_C(232), UINT8_C(184), UINT8_C(116), UINT8_C(219), UINT8_C(203), UINT8_C(222), UINT8_C( 56), UINT8_C( 10), UINT8_C(172) }, { UINT16_C(15750), UINT16_C(56254), UINT16_C(17237), UINT16_C(14001), UINT16_C(41001), UINT16_C(27167), UINT16_C(56908), UINT16_C(27902) } }, { { UINT16_C(21877), UINT16_C(61320), UINT16_C(28311), UINT16_C(57893), UINT16_C( 3085), UINT16_C(20555), UINT16_C(40682), UINT16_C(45244) }, { UINT8_C(250), UINT8_C(240), UINT8_C( 18), UINT8_C(146), UINT8_C( 84), UINT8_C(147), UINT8_C(123), UINT8_C( 12), UINT8_C( 7), UINT8_C( 86), UINT8_C(215), UINT8_C(229), UINT8_C(142), UINT8_C(226), UINT8_C(146), UINT8_C( 3) }, { UINT16_C(21884), UINT16_C(61406), UINT16_C(28526), UINT16_C(58122), UINT16_C( 3227), UINT16_C(20781), UINT16_C(40828), UINT16_C(45247) } }, { { UINT16_C( 6711), UINT16_C(52979), UINT16_C( 6280), UINT16_C(38320), UINT16_C(64292), UINT16_C( 3813), UINT16_C(41625), UINT16_C(37822) }, { UINT8_C(146), UINT8_C(209), UINT8_C( 38), UINT8_C(230), UINT8_C(100), UINT8_C(161), UINT8_C(243), UINT8_C(107), UINT8_C(247), UINT8_C(202), UINT8_C( 81), UINT8_C(133), UINT8_C(172), UINT8_C(227), UINT8_C(137), UINT8_C(227) }, { UINT16_C( 6958), UINT16_C(53181), UINT16_C( 6361), UINT16_C(38453), UINT16_C(64464), UINT16_C( 4040), UINT16_C(41762), UINT16_C(38049) } }, { { UINT16_C(31997), UINT16_C(34225), UINT16_C(24980), UINT16_C(47130), UINT16_C( 93), UINT16_C(63174), UINT16_C(33954), UINT16_C(13450) }, { UINT8_C( 85), UINT8_C(176), UINT8_C( 27), UINT8_C(185), UINT8_C( 81), UINT8_C( 14), UINT8_C( 37), UINT8_C( 72), UINT8_C(216), UINT8_C(118), UINT8_C(205), UINT8_C(133), UINT8_C( 89), UINT8_C( 86), UINT8_C(104), UINT8_C( 86) }, { UINT16_C(32213), UINT16_C(34343), UINT16_C(25185), UINT16_C(47263), UINT16_C( 182), UINT16_C(63260), UINT16_C(34058), UINT16_C(13536) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint16x8_t r = simde_vaddw_high_u8(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); simde_uint16x8_t r = simde_vaddw_high_u8(a, b); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddw_high_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint16_t b[8]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(1725635282), UINT32_C(3625907579), UINT32_C(2546984181), UINT32_C(3201063273) }, { UINT16_C(59145), UINT16_C(23160), UINT16_C(40437), UINT16_C(52642), UINT16_C(28435), UINT16_C(27730), UINT16_C(48070), UINT16_C(39106) }, { UINT32_C(1725663717), UINT32_C(3625935309), UINT32_C(2547032251), UINT32_C(3201102379) } }, { { UINT32_C(1358929365), UINT32_C(2284395922), UINT32_C(1797257218), UINT32_C(1512696657) }, { UINT16_C(41426), UINT16_C(51124), UINT16_C(22078), UINT16_C(20885), UINT16_C(59333), UINT16_C(35773), UINT16_C(32674), UINT16_C(30500) }, { UINT32_C(1358988698), UINT32_C(2284431695), UINT32_C(1797289892), UINT32_C(1512727157) } }, { { UINT32_C(2949128988), UINT32_C(1110962496), UINT32_C( 984438505), UINT32_C( 345298754) }, { UINT16_C(18552), UINT16_C(47068), UINT16_C(29086), UINT16_C(25352), UINT16_C(50776), UINT16_C(64495), UINT16_C( 4933), UINT16_C(25202) }, { UINT32_C(2949179764), UINT32_C(1111026991), UINT32_C( 984443438), UINT32_C( 345323956) } }, { { UINT32_C(1980840502), UINT32_C( 347686955), UINT32_C(3763234462), UINT32_C(3069567549) }, { UINT16_C(53546), UINT16_C(51309), UINT16_C(30018), UINT16_C(39468), UINT16_C( 6971), UINT16_C(33173), UINT16_C( 2094), UINT16_C(25827) }, { UINT32_C(1980847473), UINT32_C( 347720128), UINT32_C(3763236556), UINT32_C(3069593376) } }, { { UINT32_C(1859843138), UINT32_C(3665990460), UINT32_C( 935055866), UINT32_C(3740119219) }, { UINT16_C(23169), UINT16_C(50086), UINT16_C(53968), UINT16_C( 2909), UINT16_C(62445), UINT16_C( 7052), UINT16_C(28667), UINT16_C(15743) }, { UINT32_C(1859905583), UINT32_C(3665997512), UINT32_C( 935084533), UINT32_C(3740134962) } }, { { UINT32_C(2678807139), UINT32_C(3883544301), UINT32_C(2988389887), UINT32_C(1720716517) }, { UINT16_C(14183), UINT16_C(14121), UINT16_C(34313), UINT16_C(63298), UINT16_C(53113), UINT16_C(29714), UINT16_C(37438), UINT16_C(41650) }, { UINT32_C(2678860252), UINT32_C(3883574015), UINT32_C(2988427325), UINT32_C(1720758167) } }, { { UINT32_C(3644939756), UINT32_C(2327952267), UINT32_C(3577602288), UINT32_C(1396428268) }, { UINT16_C(25604), UINT16_C( 3722), UINT16_C(52715), UINT16_C(25605), UINT16_C( 6044), UINT16_C(56025), UINT16_C(35753), UINT16_C(38268) }, { UINT32_C(3644945800), UINT32_C(2328008292), UINT32_C(3577638041), UINT32_C(1396466536) } }, { { UINT32_C(1953480424), UINT32_C(1795043449), UINT32_C(4232002320), UINT32_C( 223378185) }, { UINT16_C(56031), UINT16_C(51739), UINT16_C( 8359), UINT16_C(17199), UINT16_C( 2104), UINT16_C(57630), UINT16_C(39571), UINT16_C(31607) }, { UINT32_C(1953482528), UINT32_C(1795101079), UINT32_C(4232041891), UINT32_C( 223409792) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint32x4_t r = simde_vaddw_high_u16(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); simde_uint32x4_t r = simde_vaddw_high_u16(a, b); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vaddw_high_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[2]; uint32_t b[4]; uint64_t r[2]; } test_vec[] = { { { UINT64_C( 2755338853839529560), UINT64_C(15438465924354702121) }, { UINT32_C(4104149837), UINT32_C(3023622012), UINT32_C(1788171991), UINT32_C(1239813360) }, { UINT64_C( 2755338855627701551), UINT64_C(15438465925594515481) } }, { { UINT64_C(17090694131083892210), UINT64_C( 1197781320401375698) }, { UINT32_C( 923090875), UINT32_C(3874176271), UINT32_C(2203156627), UINT32_C(2144089997) }, { UINT64_C(17090694133287048837), UINT64_C( 1197781322545465695) } }, { { UINT64_C( 1278378250145949452), UINT64_C(17860002049292950535) }, { UINT32_C(3341738168), UINT32_C(2964199709), UINT32_C( 641007513), UINT32_C(1118175286) }, { UINT64_C( 1278378250786956965), UINT64_C(17860002050411125821) } }, { { UINT64_C(17021301895468297960), UINT64_C( 4890788845655101868) }, { UINT32_C(2416577907), UINT32_C(3208755238), UINT32_C(3991303607), UINT32_C(1563462773) }, { UINT64_C(17021301899459601567), UINT64_C( 4890788847218564641) } }, { { UINT64_C(13729992102635651770), UINT64_C( 5159583470156399053) }, { UINT32_C(3101140370), UINT32_C( 360126557), UINT32_C( 50486669), UINT32_C(2740990697) }, { UINT64_C(13729992102686138439), UINT64_C( 5159583472897389750) } }, { { UINT64_C( 7873924513363321973), UINT64_C( 155760600566524686) }, { UINT32_C(1639579652), UINT32_C(2792763673), UINT32_C(2024372623), UINT32_C( 538708651) }, { UINT64_C( 7873924515387694596), UINT64_C( 155760601105233337) } }, { { UINT64_C( 4106135152804454126), UINT64_C( 2542637266243338455) }, { UINT32_C(1568932676), UINT32_C(3288595253), UINT32_C( 524070260), UINT32_C(2772457655) }, { UINT64_C( 4106135153328524386), UINT64_C( 2542637269015796110) } }, { { UINT64_C(12328938046261356470), UINT64_C(13488628841000940783) }, { UINT32_C(1964553792), UINT32_C( 624499889), UINT32_C(2151970249), UINT32_C(2217051342) }, { UINT64_C(12328938048413326719), UINT64_C(13488628843217992125) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint64x2_t r = simde_vaddw_high_u32(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); simde_uint64x2_t r = simde_vaddw_high_u32(a, b); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vaddw_high_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vaddw_high_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vaddw_high_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vaddw_high_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vaddw_high_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vaddw_high_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/and.c000066400000000000000000001434411400333146700162340ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN and #include "test-neon.h" #include "../../../simde/arm/neon/and.h" static int test_simde_vand_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int8_t b[8]; int8_t r[8]; } test_vec[] = { { { INT8_C( 20), -INT8_C( 27), -INT8_C( 113), INT8_C( 29), INT8_C( 103), INT8_C( 9), -INT8_C( 114), INT8_C( 56) }, { -INT8_C( 67), INT8_C( 79), INT8_C( 119), -INT8_C( 97), INT8_C( 47), INT8_C( 81), -INT8_C( 48), INT8_C( 83) }, { INT8_C( 20), INT8_C( 69), INT8_C( 7), INT8_C( 29), INT8_C( 39), INT8_C( 1), INT8_MIN, INT8_C( 16) } }, { { INT8_C( 49), INT8_C( 9), -INT8_C( 33), INT8_C( 41), -INT8_C( 35), INT8_C( 105), -INT8_C( 38), INT8_C( 92) }, { -INT8_C( 11), -INT8_C( 35), -INT8_C( 64), -INT8_C( 71), INT8_C( 47), -INT8_C( 42), -INT8_C( 74), INT8_C( 67) }, { INT8_C( 49), INT8_C( 9), -INT8_C( 64), INT8_C( 41), INT8_C( 13), INT8_C( 64), -INT8_C( 110), INT8_C( 64) } }, { { -INT8_C( 69), INT8_C( 69), INT8_C( 96), INT8_C( 34), INT8_C( 78), -INT8_C( 18), INT8_C( 90), INT8_C( 11) }, { INT8_C( 61), -INT8_C( 46), -INT8_C( 86), INT8_C( 108), INT8_C( 35), INT8_C( 123), -INT8_C( 64), INT8_C( 84) }, { INT8_C( 57), INT8_C( 64), INT8_C( 32), INT8_C( 32), INT8_C( 2), INT8_C( 106), INT8_C( 64), INT8_C( 0) } }, { { -INT8_C( 124), -INT8_C( 97), INT8_C( 126), INT8_C( 97), INT8_C( 8), INT8_C( 88), -INT8_C( 67), -INT8_C( 3) }, { INT8_C( 53), INT8_C( 125), -INT8_C( 73), INT8_C( 101), INT8_C( 83), INT8_C( 109), -INT8_C( 88), INT8_C( 15) }, { INT8_C( 4), INT8_C( 29), INT8_C( 54), INT8_C( 97), INT8_C( 0), INT8_C( 72), -INT8_C( 88), INT8_C( 13) } }, { { -INT8_C( 78), INT8_C( 9), INT8_C( 49), INT8_C( 1), -INT8_C( 9), -INT8_C( 116), INT8_C( 12), INT8_C( 53) }, { INT8_C( 94), -INT8_C( 73), -INT8_C( 95), -INT8_C( 127), INT8_C( 50), INT8_C( 97), -INT8_C( 42), -INT8_C( 74) }, { INT8_C( 18), INT8_C( 1), INT8_C( 33), INT8_C( 1), INT8_C( 50), INT8_C( 0), INT8_C( 4), INT8_C( 52) } }, { { INT8_C( 1), INT8_C( 84), INT8_C( 23), INT8_C( 9), -INT8_C( 84), -INT8_C( 44), INT8_C( 7), -INT8_C( 31) }, { INT8_C( 82), -INT8_C( 66), INT8_C( 70), -INT8_C( 91), INT8_C( 43), -INT8_C( 17), -INT8_C( 76), -INT8_C( 35) }, { INT8_C( 0), INT8_C( 20), INT8_C( 6), INT8_C( 1), INT8_C( 40), -INT8_C( 60), INT8_C( 4), -INT8_C( 63) } }, { { -INT8_C( 8), -INT8_C( 26), -INT8_C( 34), -INT8_C( 17), INT8_C( 114), -INT8_C( 21), INT8_C( 36), -INT8_C( 48) }, { -INT8_C( 94), -INT8_C( 58), INT8_C( 81), -INT8_C( 44), INT8_C( 39), INT8_C( 39), -INT8_C( 118), INT8_C( 40) }, { -INT8_C( 96), -INT8_C( 58), INT8_C( 80), -INT8_C( 60), INT8_C( 34), INT8_C( 35), INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 123), -INT8_C( 95), INT8_C( 50), INT8_C( 39), INT8_C( 117), INT8_C( 57), INT8_C( 9), -INT8_C( 57) }, { -INT8_C( 9), INT8_C( 79), INT8_C( 109), INT8_C( 34), INT8_C( 62), INT8_C( 33), -INT8_C( 1), INT8_C( 54) }, { INT8_C( 115), INT8_C( 1), INT8_C( 32), INT8_C( 34), INT8_C( 52), INT8_C( 33), INT8_C( 9), INT8_C( 6) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vand_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; } static int test_simde_vand_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { { { -INT16_C( 8050), INT16_C( 12298), INT16_C( 9128), -INT16_C( 13417) }, { INT16_C( 29717), INT16_C( 1908), INT16_C( 23523), -INT16_C( 22120) }, { INT16_C( 24580), INT16_C( 0), INT16_C( 928), -INT16_C( 30320) } }, { { -INT16_C( 24520), INT16_C( 23713), INT16_C( 7198), -INT16_C( 10071) }, { -INT16_C( 5627), -INT16_C( 9529), -INT16_C( 12877), INT16_C( 17102) }, { -INT16_C( 24576), INT16_C( 22657), INT16_C( 3090), INT16_C( 16520) } }, { { -INT16_C( 9811), INT16_C( 22130), INT16_C( 2556), INT16_C( 4385) }, { -INT16_C( 27010), INT16_C( 24856), -INT16_C( 19983), INT16_C( 10507) }, { -INT16_C( 28628), INT16_C( 16400), INT16_C( 496), INT16_C( 257) } }, { { -INT16_C( 21423), INT16_C( 28550), INT16_C( 12233), -INT16_C( 12729) }, { INT16_C( 3609), -INT16_C( 12888), INT16_C( 30427), -INT16_C( 30449) }, { INT16_C( 3089), INT16_C( 19840), INT16_C( 9929), -INT16_C( 30713) } }, { { -INT16_C( 32433), INT16_C( 19679), INT16_C( 138), INT16_C( 2141) }, { INT16_C( 30358), -INT16_C( 30614), INT16_C( 29991), INT16_C( 30897) }, { INT16_C( 6), INT16_C( 2122), INT16_C( 2), INT16_C( 2065) } }, { { INT16_C( 14113), -INT16_C( 5401), INT16_C( 12134), -INT16_C( 32584) }, { INT16_C( 24637), INT16_C( 6477), INT16_C( 23767), INT16_C( 9890) }, { INT16_C( 8225), INT16_C( 2117), INT16_C( 3142), INT16_C( 160) } }, { { -INT16_C( 32291), INT16_C( 26482), -INT16_C( 12159), INT16_C( 6256) }, { -INT16_C( 9658), INT16_C( 28064), INT16_C( 20815), INT16_C( 28901) }, { -INT16_C( 32700), INT16_C( 25888), INT16_C( 20481), INT16_C( 4192) } }, { { -INT16_C( 13175), -INT16_C( 4261), INT16_C( 5115), INT16_C( 14703) }, { -INT16_C( 17292), INT16_C( 19282), -INT16_C( 3048), -INT16_C( 2703) }, { -INT16_C( 29696), INT16_C( 19282), INT16_C( 4120), INT16_C( 12641) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_vand_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; } static int test_simde_vand_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { { { -INT32_C( 2141526501), -INT32_C( 1272924119) }, { INT32_C( 780249120), -INT32_C( 1757745507) }, { INT32_C( 40960), -INT32_C( 1809799159) } }, { { -INT32_C( 2127995682), INT32_C( 770592686) }, { -INT32_C( 1719305170), -INT32_C( 698477125) }, { -INT32_C( 2130617330), INT32_C( 72222122) } }, { { INT32_C( 626505980), -INT32_C( 1864796304) }, { -INT32_C( 1111598304), INT32_C( 659880265) }, { INT32_C( 622204960), INT32_C( 5271872) } }, { { INT32_C( 78151253), -INT32_C( 63858739) }, { -INT32_C( 829049325), -INT32_C( 928648244) }, { INT32_C( 75511313), -INT32_C( 937323572) } }, { { INT32_C( 485358764), -INT32_C( 1817328013) }, { INT32_C( 1800497953), -INT32_C( 1164794524) }, { INT32_C( 138504224), -INT32_C( 1837071264) } }, { { -INT32_C( 239191261), -INT32_C( 437391406) }, { INT32_C( 1907589797), INT32_C( 574183542) }, { INT32_C( 1907491361), INT32_C( 539576402) } }, { { -INT32_C( 935451052), INT32_C( 240905197) }, { -INT32_C( 1149653929), INT32_C( 1970605138) }, { -INT32_C( 2009586604), INT32_C( 72419392) } }, { { INT32_C( 426128199), -INT32_C( 922856670) }, { INT32_C( 1278849494), INT32_C( 1584296714) }, { INT32_C( 136327494), INT32_C( 1215189762) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_vand_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; } static int test_simde_vand_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[1]; int64_t b[1]; int64_t r[1]; } test_vec[] = { { { INT64_C( 807394475972528543) }, { INT64_C( 594141505336878048) }, { INT64_C( 591186017644478848) } }, { { -INT64_C( 8372368850300909537) }, { -INT64_C( 8736313779588373280) }, { -INT64_C( 9024579357292576768) } }, { { INT64_C( 572871018235541225) }, { -INT64_C( 3568996398074241053) }, { INT64_C( 463906115105756897) } }, { { -INT64_C( 317422293695611863) }, { INT64_C( 2998068317509771310) }, { INT64_C( 2997218119849713704) } }, { { -INT64_C( 3950776287104948483) }, { -INT64_C( 2854522746623668508) }, { -INT64_C( 4026217353836493084) } }, { { INT64_C( 6306583366756261486) }, { INT64_C( 5676233526956788316) }, { INT64_C( 5081195077962041932) } }, { { -INT64_C( 4441217883490232146) }, { -INT64_C( 5227350641175259155) }, { -INT64_C( 9055445508946751316) } }, { { INT64_C( 3609290772723095958) }, { INT64_C( 4138689903570354896) }, { INT64_C( 3460599049529327760) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_int64x1_t r = simde_vand_s64(a, b); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; } static int test_simde_vand_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(133), UINT8_C( 80), UINT8_C( 15), UINT8_C( 44), UINT8_C( 61), UINT8_C(139), UINT8_C(144), UINT8_C( 96) }, { UINT8_C(132), UINT8_C( 43), UINT8_C( 20), UINT8_C( 6), UINT8_C(133), UINT8_C( 67), UINT8_C( 89), UINT8_C(197) }, { UINT8_C(132), UINT8_C( 0), UINT8_C( 4), UINT8_C( 4), UINT8_C( 5), UINT8_C( 3), UINT8_C( 16), UINT8_C( 64) } }, { { UINT8_C(139), UINT8_C(204), UINT8_C(203), UINT8_C( 81), UINT8_C(207), UINT8_C(212), UINT8_C( 98), UINT8_C( 41) }, { UINT8_C(237), UINT8_C(207), UINT8_C( 36), UINT8_C(127), UINT8_C(146), UINT8_C(155), UINT8_C(180), UINT8_C( 24) }, { UINT8_C(137), UINT8_C(204), UINT8_C( 0), UINT8_C( 81), UINT8_C(130), UINT8_C(144), UINT8_C( 32), UINT8_C( 8) } }, { { UINT8_C(236), UINT8_C(195), UINT8_C( 68), UINT8_C( 41), UINT8_C( 78), UINT8_C(213), UINT8_C(138), UINT8_C(211) }, { UINT8_C( 0), UINT8_C(158), UINT8_C(217), UINT8_C(133), UINT8_C(225), UINT8_C( 51), UINT8_C( 74), UINT8_C(108) }, { UINT8_C( 0), UINT8_C(130), UINT8_C( 64), UINT8_C( 1), UINT8_C( 64), UINT8_C( 17), UINT8_C( 10), UINT8_C( 64) } }, { { UINT8_MAX, UINT8_C( 21), UINT8_C(189), UINT8_C(206), UINT8_C(234), UINT8_C( 31), UINT8_C(247), UINT8_C(215) }, { UINT8_C(239), UINT8_C( 28), UINT8_C( 86), UINT8_C(129), UINT8_C(183), UINT8_C( 10), UINT8_C(153), UINT8_C(163) }, { UINT8_C(239), UINT8_C( 20), UINT8_C( 20), UINT8_C(128), UINT8_C(162), UINT8_C( 10), UINT8_C(145), UINT8_C(131) } }, { { UINT8_C(205), UINT8_C(222), UINT8_C(205), UINT8_C( 27), UINT8_C(179), UINT8_C( 87), UINT8_C(238), UINT8_C(179) }, { UINT8_C(245), UINT8_C(200), UINT8_C( 56), UINT8_C(214), UINT8_C(251), UINT8_C(130), UINT8_C( 66), UINT8_C(250) }, { UINT8_C(197), UINT8_C(200), UINT8_C( 8), UINT8_C( 18), UINT8_C(179), UINT8_C( 2), UINT8_C( 66), UINT8_C(178) } }, { { UINT8_C(152), UINT8_C( 0), UINT8_C(200), UINT8_C(130), UINT8_C( 31), UINT8_C(192), UINT8_C( 89), UINT8_C( 14) }, { UINT8_C(220), UINT8_C(175), UINT8_C(144), UINT8_C(147), UINT8_C(185), UINT8_C( 41), UINT8_C( 55), UINT8_C(134) }, { UINT8_C(152), UINT8_C( 0), UINT8_C(128), UINT8_C(130), UINT8_C( 25), UINT8_C( 0), UINT8_C( 17), UINT8_C( 6) } }, { { UINT8_C( 7), UINT8_C( 4), UINT8_C(161), UINT8_C(186), UINT8_C( 91), UINT8_C(144), UINT8_C(109), UINT8_C( 80) }, { UINT8_C( 88), UINT8_C(165), UINT8_C( 38), UINT8_C( 83), UINT8_C( 40), UINT8_C(104), UINT8_C( 77), UINT8_C(192) }, { UINT8_C( 0), UINT8_C( 4), UINT8_C( 32), UINT8_C( 18), UINT8_C( 8), UINT8_C( 0), UINT8_C( 77), UINT8_C( 64) } }, { { UINT8_C(104), UINT8_C( 21), UINT8_C( 66), UINT8_C(136), UINT8_C(213), UINT8_C(155), UINT8_C(150), UINT8_C(177) }, { UINT8_C( 74), UINT8_C( 38), UINT8_C( 69), UINT8_C( 3), UINT8_C( 80), UINT8_C(124), UINT8_C(137), UINT8_C( 87) }, { UINT8_C( 72), UINT8_C( 4), UINT8_C( 64), UINT8_C( 0), UINT8_C( 80), UINT8_C( 24), UINT8_C(128), UINT8_C( 17) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vand_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; } static int test_simde_vand_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(50812), UINT16_C(16903), UINT16_C(62815), UINT16_C(20680) }, { UINT16_C(26826), UINT16_C(61270), UINT16_C(43655), UINT16_C(46605) }, { UINT16_C(16456), UINT16_C(16902), UINT16_C(40967), UINT16_C( 4104) } }, { { UINT16_C(38137), UINT16_C( 8365), UINT16_C( 5439), UINT16_C(24413) }, { UINT16_C(56762), UINT16_C(34220), UINT16_C(16109), UINT16_C(27019) }, { UINT16_C(38072), UINT16_C( 172), UINT16_C( 5165), UINT16_C(18697) } }, { { UINT16_C(37380), UINT16_C(25515), UINT16_C(29831), UINT16_C(20916) }, { UINT16_C( 2780), UINT16_C(25408), UINT16_C(20148), UINT16_C(44569) }, { UINT16_C( 516), UINT16_C(25344), UINT16_C(17540), UINT16_C( 16) } }, { { UINT16_C(50914), UINT16_C( 8654), UINT16_C(11228), UINT16_C(38528) }, { UINT16_C(11528), UINT16_C(62747), UINT16_C(42603), UINT16_C(28511) }, { UINT16_C( 1024), UINT16_C( 8458), UINT16_C( 8776), UINT16_C( 1536) } }, { { UINT16_C( 2616), UINT16_C(49107), UINT16_C(34686), UINT16_C(23057) }, { UINT16_C(20881), UINT16_C(18110), UINT16_C(55199), UINT16_C(33268) }, { UINT16_C( 16), UINT16_C( 1682), UINT16_C(34590), UINT16_C( 16) } }, { { UINT16_C(49822), UINT16_C(31394), UINT16_C( 9197), UINT16_C(62736) }, { UINT16_C(11088), UINT16_C(48106), UINT16_C(18898), UINT16_C( 2602) }, { UINT16_C( 528), UINT16_C(15010), UINT16_C( 448), UINT16_C( 0) } }, { { UINT16_C(64852), UINT16_C(53962), UINT16_C(56196), UINT16_C( 5677) }, { UINT16_C(60204), UINT16_C(52316), UINT16_C(20674), UINT16_C(24653) }, { UINT16_C(59652), UINT16_C(49224), UINT16_C(20608), UINT16_C( 13) } }, { { UINT16_C(61458), UINT16_C(65498), UINT16_C(59923), UINT16_C(25588) }, { UINT16_C(56854), UINT16_C(59422), UINT16_C(18472), UINT16_C(31986) }, { UINT16_C(53266), UINT16_C(59418), UINT16_C(18432), UINT16_C(24816) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vand_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; } static int test_simde_vand_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C(2917061730), UINT32_C(2148387771) }, { UINT32_C(2754112333), UINT32_C( 761295916) }, { UINT32_C(2752004160), UINT32_C( 16424) } }, { { UINT32_C(1254698587), UINT32_C(1646806080) }, { UINT32_C( 516581298), UINT32_C( 271576749) }, { UINT32_C( 180889106), UINT32_C( 2638848) } }, { { UINT32_C(2109541826), UINT32_C( 654232281) }, { UINT32_C(1657415222), UINT32_C(4136577691) }, { UINT32_C(1619526658), UINT32_C( 646843033) } }, { { UINT32_C(2571130969), UINT32_C(1341876637) }, { UINT32_C(2104411600), UINT32_C(1972215219) }, { UINT32_C( 423641168), UINT32_C(1166608785) } }, { { UINT32_C(2230536875), UINT32_C(1269494037) }, { UINT32_C(2997712151), UINT32_C(4171840671) }, { UINT32_C(2158051331), UINT32_C(1218981909) } }, { { UINT32_C( 831646100), UINT32_C( 578915410) }, { UINT32_C( 77655889), UINT32_C( 930753932) }, { UINT32_C( 8448272), UINT32_C( 570428416) } }, { { UINT32_C(2377870712), UINT32_C(1977116254) }, { UINT32_C(2049476059), UINT32_C(1433653697) }, { UINT32_C( 136840536), UINT32_C(1431322688) } }, { { UINT32_C( 226952378), UINT32_C(3794733201) }, { UINT32_C(2213007351), UINT32_C(1975214589) }, { UINT32_C( 25625778), UINT32_C(1613430929) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vand_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; } static int test_simde_vand_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[1]; uint64_t b[1]; uint64_t r[1]; } test_vec[] = { { { UINT64_C( 8703332882612638225) }, { UINT64_C( 2245985253790906599) }, { UINT64_C( 1731725454804013057) } }, { { UINT64_C( 3669248827220894596) }, { UINT64_C(16650248547897175472) }, { UINT64_C( 2450380779709342080) } }, { { UINT64_C( 9619316748342078434) }, { UINT64_C( 5941927748976735909) }, { UINT64_C( 32833126715695776) } }, { { UINT64_C(17630300909072185882) }, { UINT64_C( 8940891669190675030) }, { UINT64_C( 8358796357797677586) } }, { { UINT64_C( 5601634229709634201) }, { UINT64_C(11067341544177764470) }, { UINT64_C( 690458285450627088) } }, { { UINT64_C(15405333827829241234) }, { UINT64_C( 3304822528098313927) }, { UINT64_C( 416605042737546370) } }, { { UINT64_C(15349711599961399056) }, { UINT64_C( 5963805770450118826) }, { UINT64_C( 5764925557443659776) } }, { { UINT64_C( 6420429453816026566) }, { UINT64_C( 4817328995344980761) }, { UINT64_C( 4618602023945244928) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_t b = simde_vld1_u64(test_vec[i].b); simde_uint64x1_t r = simde_vand_u64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; } static int test_simde_vandq_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int8_t b[16]; int8_t r[16]; } test_vec[] = { { { -INT8_C( 30), INT8_C( 5), -INT8_C( 37), -INT8_C( 20), -INT8_C( 83), INT8_C( 25), -INT8_C( 80), INT8_C( 43), -INT8_C( 60), INT8_C( 67), INT8_C( 86), -INT8_C( 68), -INT8_C( 54), -INT8_C( 75), -INT8_C( 102), -INT8_C( 53) }, { -INT8_C( 21), INT8_C( 69), INT8_C( 71), INT8_C( 118), INT8_C( 38), -INT8_C( 95), -INT8_C( 19), -INT8_C( 73), -INT8_C( 66), INT8_C( 41), INT8_C( 44), INT8_C( 65), -INT8_C( 73), INT8_C( 82), -INT8_C( 18), -INT8_C( 103) }, { -INT8_C( 30), INT8_C( 5), INT8_C( 67), INT8_C( 100), INT8_C( 36), INT8_C( 1), -INT8_C( 96), INT8_C( 35), -INT8_C( 124), INT8_C( 1), INT8_C( 4), INT8_C( 0), -INT8_C( 126), INT8_C( 16), -INT8_C( 118), -INT8_C( 119) } }, { { INT8_C( 87), -INT8_C( 54), -INT8_C( 122), INT8_C( 4), -INT8_C( 29), INT8_C( 54), INT8_C( 48), -INT8_C( 89), INT8_C( 121), -INT8_C( 122), INT8_C( 99), INT8_C( 67), INT8_C( 60), -INT8_C( 3), INT8_C( 15), INT8_C( 39) }, { INT8_C( 66), INT8_C( 86), -INT8_C( 99), INT8_C( 105), -INT8_C( 9), -INT8_C( 117), INT8_C( 32), -INT8_C( 75), -INT8_C( 76), INT8_C( 76), -INT8_C( 10), INT8_C( 107), -INT8_C( 98), -INT8_C( 27), INT8_C( 5), -INT8_C( 10) }, { INT8_C( 66), INT8_C( 66), -INT8_C( 124), INT8_C( 0), -INT8_C( 29), INT8_C( 2), INT8_C( 32), -INT8_C( 91), INT8_C( 48), INT8_C( 4), INT8_C( 98), INT8_C( 67), INT8_C( 28), -INT8_C( 27), INT8_C( 5), INT8_C( 38) } }, { { -INT8_C( 81), -INT8_C( 117), -INT8_C( 6), -INT8_C( 110), -INT8_C( 63), INT8_C( 42), INT8_C( 57), INT8_C( 58), -INT8_C( 79), -INT8_C( 99), INT8_C( 126), -INT8_C( 19), -INT8_C( 102), -INT8_C( 115), INT8_C( 20), -INT8_C( 35) }, { -INT8_C( 29), -INT8_C( 78), INT8_C( 70), -INT8_C( 38), INT8_C( 61), INT8_C( 102), -INT8_C( 113), -INT8_C( 15), -INT8_C( 78), -INT8_C( 122), INT8_C( 93), INT8_C( 80), INT8_C( 107), INT8_C( 98), INT8_C( 70), INT8_C( 26) }, { -INT8_C( 93), -INT8_C( 126), INT8_C( 66), -INT8_C( 110), INT8_C( 1), INT8_C( 34), INT8_C( 9), INT8_C( 48), -INT8_C( 80), -INT8_C( 124), INT8_C( 92), INT8_C( 64), INT8_C( 10), INT8_C( 0), INT8_C( 4), INT8_C( 24) } }, { { -INT8_C( 19), INT8_C( 65), -INT8_C( 84), -INT8_C( 82), INT8_C( 107), -INT8_C( 27), -INT8_C( 24), INT8_C( 28), -INT8_C( 126), INT8_C( 102), INT8_C( 9), INT8_C( 29), -INT8_C( 13), INT8_C( 30), -INT8_C( 6), -INT8_C( 42) }, { -INT8_C( 48), INT8_C( 64), -INT8_C( 80), INT8_C( 13), -INT8_C( 90), INT8_C( 64), -INT8_C( 2), INT8_C( 88), -INT8_C( 58), INT8_C( 91), -INT8_C( 88), INT8_C( 49), -INT8_C( 67), -INT8_C( 17), INT8_C( 75), -INT8_C( 86) }, { -INT8_C( 64), INT8_C( 64), -INT8_C( 96), INT8_C( 12), INT8_C( 34), INT8_C( 64), -INT8_C( 24), INT8_C( 24), -INT8_C( 126), INT8_C( 66), INT8_C( 8), INT8_C( 17), -INT8_C( 79), INT8_C( 14), INT8_C( 74), -INT8_C( 126) } }, { { INT8_C( 48), -INT8_C( 9), INT8_C( 88), -INT8_C( 101), -INT8_C( 36), INT8_C( 65), -INT8_C( 72), INT8_C( 95), -INT8_C( 89), -INT8_C( 63), INT8_C( 124), -INT8_C( 101), -INT8_C( 33), INT8_C( 118), INT8_C( 113), -INT8_C( 81) }, { -INT8_C( 74), INT8_C( 34), -INT8_C( 68), INT8_C( 92), INT8_C( 98), -INT8_C( 69), -INT8_C( 76), INT8_C( 40), INT8_C( 22), INT8_C( 92), INT8_C( 89), -INT8_C( 44), INT8_C( 75), -INT8_C( 92), INT8_C( 126), INT8_C( 123) }, { INT8_C( 48), INT8_C( 34), INT8_C( 24), INT8_C( 24), INT8_C( 64), INT8_C( 1), -INT8_C( 80), INT8_C( 8), INT8_C( 6), INT8_C( 64), INT8_C( 88), -INT8_C( 112), INT8_C( 75), INT8_C( 36), INT8_C( 112), INT8_C( 43) } }, { { -INT8_C( 101), -INT8_C( 41), INT8_C( 23), INT8_C( 119), INT8_C( 24), -INT8_C( 49), -INT8_C( 42), -INT8_C( 65), -INT8_C( 112), INT8_C( 82), INT8_C( 90), INT8_C( 112), -INT8_C( 56), -INT8_C( 52), INT8_C( 31), INT8_C( 126) }, { -INT8_C( 18), -INT8_C( 36), -INT8_C( 38), INT8_C( 80), -INT8_C( 105), -INT8_C( 114), INT8_C( 120), -INT8_C( 83), -INT8_C( 21), -INT8_C( 47), -INT8_C( 127), INT8_C( 54), INT8_C( 117), INT8_C( 0), -INT8_C( 78), INT8_C( 16) }, { -INT8_C( 118), -INT8_C( 44), INT8_C( 18), INT8_C( 80), INT8_C( 16), -INT8_C( 114), INT8_C( 80), -INT8_C( 83), INT8_MIN, INT8_C( 80), INT8_C( 0), INT8_C( 48), INT8_C( 64), INT8_C( 0), INT8_C( 18), INT8_C( 16) } }, { { -INT8_C( 41), -INT8_C( 55), -INT8_C( 121), -INT8_C( 17), -INT8_C( 104), INT8_C( 94), -INT8_C( 82), INT8_C( 40), -INT8_C( 80), INT8_C( 9), -INT8_C( 104), INT8_C( 121), -INT8_C( 43), -INT8_C( 72), -INT8_C( 9), -INT8_C( 61) }, { -INT8_C( 108), -INT8_C( 46), INT8_C( 19), INT8_C( 43), INT8_C( 96), -INT8_C( 117), -INT8_C( 40), INT8_C( 75), INT8_C( 92), INT8_C( 90), -INT8_C( 126), -INT8_C( 47), INT8_C( 90), INT8_C( 52), -INT8_C( 31), INT8_C( 49) }, { -INT8_C( 108), -INT8_C( 64), INT8_C( 3), INT8_C( 43), INT8_C( 0), INT8_C( 10), -INT8_C( 120), INT8_C( 8), INT8_C( 16), INT8_C( 8), INT8_MIN, INT8_C( 81), INT8_C( 80), INT8_C( 48), -INT8_C( 31), INT8_C( 1) } }, { { -INT8_C( 3), INT8_C( 104), INT8_C( 32), -INT8_C( 107), -INT8_C( 58), -INT8_C( 50), -INT8_C( 67), INT8_C( 119), -INT8_C( 41), INT8_C( 86), -INT8_C( 16), -INT8_C( 84), INT8_C( 14), -INT8_C( 25), INT8_C( 111), -INT8_C( 94) }, { -INT8_C( 71), -INT8_C( 126), -INT8_C( 51), INT8_C( 26), INT8_C( 13), -INT8_C( 91), INT8_C( 101), INT8_C( 105), -INT8_C( 1), -INT8_C( 25), INT8_C( 58), INT8_C( 89), INT8_C( 27), INT8_C( 27), -INT8_C( 118), INT8_C( 24) }, { -INT8_C( 71), INT8_C( 0), INT8_C( 0), INT8_C( 16), INT8_C( 4), -INT8_C( 124), INT8_C( 37), INT8_C( 97), -INT8_C( 41), INT8_C( 70), INT8_C( 48), INT8_C( 8), INT8_C( 10), INT8_C( 3), INT8_C( 10), INT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r = simde_vandq_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; } static int test_simde_vandq_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { -INT16_C( 26169), -INT16_C( 7830), INT16_C( 2962), -INT16_C( 1871), INT16_C( 6073), INT16_C( 16182), -INT16_C( 5833), INT16_C( 22880) }, { -INT16_C( 13541), INT16_C( 27717), INT16_C( 26998), INT16_C( 17851), -INT16_C( 12177), -INT16_C( 18011), -INT16_C( 6035), INT16_C( 13323) }, { -INT16_C( 30461), INT16_C( 24640), INT16_C( 2322), INT16_C( 16561), INT16_C( 4137), INT16_C( 14628), -INT16_C( 6107), INT16_C( 4096) } }, { { INT16_C( 30081), INT16_C( 4886), -INT16_C( 14463), INT16_C( 14859), INT16_C( 17118), INT16_C( 5498), -INT16_C( 9685), INT16_C( 18031) }, { -INT16_C( 19291), INT16_C( 7090), INT16_C( 28190), -INT16_C( 29343), INT16_C( 1598), -INT16_C( 21690), INT16_C( 21231), INT16_C( 28896) }, { INT16_C( 13441), INT16_C( 4882), INT16_C( 17920), INT16_C( 2049), INT16_C( 542), INT16_C( 322), INT16_C( 21035), INT16_C( 16480) } }, { { -INT16_C( 2361), INT16_C( 18564), -INT16_C( 28739), -INT16_C( 25469), -INT16_C( 559), -INT16_C( 847), INT16_C( 8407), INT16_C( 32067) }, { -INT16_C( 2603), -INT16_C( 3176), -INT16_C( 1693), -INT16_C( 23936), -INT16_C( 14848), -INT16_C( 4275), INT16_C( 11544), -INT16_C( 8097) }, { -INT16_C( 2875), INT16_C( 16512), -INT16_C( 30431), -INT16_C( 32640), -INT16_C( 15360), -INT16_C( 5119), INT16_C( 8208), INT16_C( 24643) } }, { { -INT16_C( 7389), -INT16_C( 7896), -INT16_C( 21645), INT16_C( 17533), INT16_C( 11944), -INT16_C( 32703), -INT16_C( 31665), INT16_C( 9469) }, { -INT16_C( 27271), -INT16_C( 8937), -INT16_C( 26737), -INT16_C( 28801), -INT16_C( 13219), INT16_C( 30334), -INT16_C( 8710), INT16_C( 7510) }, { -INT16_C( 32479), -INT16_C( 16128), -INT16_C( 31997), INT16_C( 1149), INT16_C( 3080), INT16_C( 64), -INT16_C( 31670), INT16_C( 1108) } }, { { INT16_C( 32449), INT16_C( 13566), INT16_C( 31530), -INT16_C( 11656), -INT16_C( 18006), -INT16_C( 1710), INT16_C( 20285), -INT16_C( 18659) }, { INT16_C( 13541), INT16_C( 29844), INT16_C( 5067), INT16_C( 10243), -INT16_C( 32289), -INT16_C( 9826), -INT16_C( 2978), INT16_C( 8183) }, { INT16_C( 13505), INT16_C( 13460), INT16_C( 4874), INT16_C( 0), -INT16_C( 32374), -INT16_C( 9966), INT16_C( 17436), INT16_C( 5909) } }, { { -INT16_C( 2701), -INT16_C( 25261), -INT16_C( 13199), INT16_C( 7023), -INT16_C( 15739), -INT16_C( 15596), INT16_C( 12561), -INT16_C( 2438) }, { INT16_C( 3685), INT16_C( 12394), INT16_C( 27937), INT16_C( 88), -INT16_C( 2066), INT16_C( 19930), -INT16_C( 11797), INT16_C( 24172) }, { INT16_C( 1121), INT16_C( 4162), INT16_C( 19489), INT16_C( 72), -INT16_C( 15740), INT16_C( 16656), INT16_C( 4353), INT16_C( 22120) } }, { { -INT16_C( 16186), INT16_C( 14331), INT16_C( 27532), INT16_C( 4434), INT16_C( 26157), INT16_C( 16084), INT16_C( 20119), -INT16_C( 971) }, { -INT16_C( 24740), INT16_C( 32044), -INT16_C( 31475), -INT16_C( 1154), INT16_C( 22652), INT16_C( 26440), -INT16_C( 19159), -INT16_C( 4154) }, { -INT16_C( 32700), INT16_C( 13608), INT16_C( 268), INT16_C( 4434), INT16_C( 16428), INT16_C( 9792), INT16_C( 1025), -INT16_C( 5116) } }, { { -INT16_C( 16011), INT16_C( 295), INT16_C( 31020), INT16_C( 22802), -INT16_C( 6176), INT16_C( 30616), -INT16_C( 13003), -INT16_C( 28044) }, { -INT16_C( 24468), INT16_C( 30991), -INT16_C( 29403), -INT16_C( 24203), -INT16_C( 16923), INT16_C( 3593), -INT16_C( 12430), -INT16_C( 6146) }, { -INT16_C( 32668), INT16_C( 263), INT16_C( 2340), INT16_C( 272), -INT16_C( 23072), INT16_C( 1544), -INT16_C( 13008), -INT16_C( 32140) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_vandq_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; } static int test_simde_vandq_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { -INT32_C( 1875313777), INT32_C( 55691321), INT32_C( 2069893365), INT32_C( 1333800119) }, { INT32_C( 1559786409), INT32_C( 423268261), INT32_C( 17750977), INT32_C( 1330314944) }, { INT32_C( 272135049), INT32_C( 17860641), INT32_C( 16781505), INT32_C( 1325411456) } }, { { INT32_C( 870286073), INT32_C( 1077293130), -INT32_C( 138701248), INT32_C( 1816542147) }, { INT32_C( 1422475182), -INT32_C( 1871903794), -INT32_C( 1617855521), INT32_C( 1928256633) }, { INT32_C( 281608872), INT32_C( 2359370), -INT32_C( 1752100288), INT32_C( 1615206465) } }, { { -INT32_C( 1448751778), INT32_C( 1038736637), INT32_C( 892642418), -INT32_C( 1901954081) }, { -INT32_C( 1998427206), INT32_C( 1293438830), INT32_C( 1139583690), -INT32_C( 457844090) }, { -INT32_C( 2002761446), INT32_C( 218647660), INT32_C( 19177538), -INT32_C( 2069865850) } }, { { -INT32_C( 1534239833), -INT32_C( 1444841929), -INT32_C( 86043110), INT32_C( 1267237265) }, { INT32_C( 1523804908), -INT32_C( 2086081351), INT32_C( 482776214), INT32_C( 369130351) }, { INT32_C( 8473252), -INT32_C( 2120195023), INT32_C( 415634450), INT32_C( 33554689) } }, { { INT32_C( 230395606), INT32_C( 532126724), INT32_C( 1125750450), INT32_C( 59678999) }, { -INT32_C( 1000513013), -INT32_C( 465107634), INT32_C( 150998426), INT32_C( 1595867272) }, { INT32_C( 68747778), INT32_C( 67568644), INT32_C( 16778386), INT32_C( 51249152) } }, { { -INT32_C( 1821582706), INT32_C( 699540343), -INT32_C( 798110791), INT32_C( 2010381164) }, { -INT32_C( 1422184099), -INT32_C( 795901386), INT32_C( 400134031), INT32_C( 511113616) }, { -INT32_C( 2094526452), INT32_C( 8520246), INT32_C( 273255305), INT32_C( 374536448) } }, { { INT32_C( 1253172179), -INT32_C( 1066114298), -INT32_C( 1701781202), INT32_C( 974218460) }, { -INT32_C( 874165099), INT32_C( 1570534862), -INT32_C( 1804241660), INT32_C( 1119087470) }, { INT32_C( 1252081809), INT32_C( 1075077382), -INT32_C( 1877974780), INT32_C( 34693196) } }, { { -INT32_C( 712219442), -INT32_C( 158007096), -INT32_C( 1097783839), INT32_C( 519611017) }, { -INT32_C( 1108681233), INT32_C( 1461421650), INT32_C( 1793822971), INT32_C( 1252826747) }, { -INT32_C( 1786231602), INT32_C( 1443954752), INT32_C( 713097441), INT32_C( 178815497) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_vandq_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; } static int test_simde_vandq_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; int64_t b[2]; int64_t r[2]; } test_vec[] = { { { -INT64_C( 4303703866289850050), -INT64_C( 2671035006846493008) }, { INT64_C( 2708714866595918025), INT64_C( 718144693475718264) }, { INT64_C( 289928024609026056), INT64_C( 641218414341264432) } }, { { INT64_C( 2646998879941183701), INT64_C( 7047910868629455909) }, { -INT64_C( 3090360644743064795), INT64_C( 4638148425744650121) }, { INT64_C( 296117537962196997), INT64_C( 4633642180416898049) } }, { { INT64_C( 5396988171449278028), -INT64_C( 8795513164860019720) }, { -INT64_C( 8190754677193953502), -INT64_C( 5741174301392740185) }, { INT64_C( 739874586727288320), -INT64_C( 9200853626348494688) } }, { { INT64_C( 8633213016863705951), INT64_C( 4640949648730033793) }, { INT64_C( 2502053587228549926), INT64_C( 4289830226683599732) }, { INT64_C( 2488542787615621894), INT64_C( 141960480623104) } }, { { -INT64_C( 3527569414960449260), INT64_C( 3529159692359111325) }, { INT64_C( 4798608279944805115), INT64_C( 7766355678831824560) }, { INT64_C( 4758057739708250128), INT64_C( 2360449165568311952) } }, { { -INT64_C( 5568125800508586340), INT64_C( 9104140459729251101) }, { INT64_C( 8660859452568941345), -INT64_C( 8350293899435815845) }, { INT64_C( 3472285212265972224), INT64_C( 871520745501771801) } }, { { INT64_C( 4978784537157246289), INT64_C( 981720861753836957) }, { INT64_C( 8986982677265290360), -INT64_C( 2740920278234252166) }, { INT64_C( 4906707116912546896), INT64_C( 690811628579226648) } }, { { INT64_C( 4455846327967772264), -INT64_C( 9162857078147103779) }, { INT64_C( 8732332341634126482), INT64_C( 1267440763426528566) }, { INT64_C( 4109069156656169472), INT64_C( 42460976704012564) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_int64x2_t r = simde_vandq_s64(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; } static int test_simde_vandq_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(149), UINT8_C(208), UINT8_C(159), UINT8_C( 24), UINT8_C(108), UINT8_C(171), UINT8_C( 77), UINT8_C(141), UINT8_C(144), UINT8_C( 49), UINT8_C(185), UINT8_C(125), UINT8_C( 75), UINT8_C(155), UINT8_C( 87), UINT8_C( 62) }, { UINT8_C(235), UINT8_C(237), UINT8_C( 83), UINT8_C(165), UINT8_C( 95), UINT8_C( 71), UINT8_C(165), UINT8_C(241), UINT8_C(229), UINT8_C( 46), UINT8_C(171), UINT8_C(216), UINT8_C( 35), UINT8_C( 53), UINT8_C(145), UINT8_C(185) }, { UINT8_C(129), UINT8_C(192), UINT8_C( 19), UINT8_C( 0), UINT8_C( 76), UINT8_C( 3), UINT8_C( 5), UINT8_C(129), UINT8_C(128), UINT8_C( 32), UINT8_C(169), UINT8_C( 88), UINT8_C( 3), UINT8_C( 17), UINT8_C( 17), UINT8_C( 56) } }, { { UINT8_C( 5), UINT8_C( 48), UINT8_C(209), UINT8_C(114), UINT8_C(219), UINT8_C( 31), UINT8_MAX, UINT8_C(107), UINT8_C( 80), UINT8_C(184), UINT8_C(232), UINT8_C(156), UINT8_C( 83), UINT8_C( 63), UINT8_C(218), UINT8_C( 63) }, { UINT8_C( 45), UINT8_C( 45), UINT8_C(228), UINT8_C(140), UINT8_C(116), UINT8_C(138), UINT8_C(125), UINT8_C( 89), UINT8_C(184), UINT8_C( 41), UINT8_C( 49), UINT8_C(219), UINT8_C( 94), UINT8_C(194), UINT8_C(148), UINT8_C( 99) }, { UINT8_C( 5), UINT8_C( 32), UINT8_C(192), UINT8_C( 0), UINT8_C( 80), UINT8_C( 10), UINT8_C(125), UINT8_C( 73), UINT8_C( 16), UINT8_C( 40), UINT8_C( 32), UINT8_C(152), UINT8_C( 82), UINT8_C( 2), UINT8_C(144), UINT8_C( 35) } }, { { UINT8_C(242), UINT8_C(102), UINT8_C(213), UINT8_C(205), UINT8_C(133), UINT8_C(213), UINT8_C( 56), UINT8_C(213), UINT8_C(141), UINT8_C( 32), UINT8_C(113), UINT8_C(225), UINT8_C( 95), UINT8_C( 75), UINT8_C( 32), UINT8_C(140) }, { UINT8_C(120), UINT8_C( 4), UINT8_C( 24), UINT8_C(236), UINT8_C(142), UINT8_C(150), UINT8_C( 69), UINT8_C( 70), UINT8_C(191), UINT8_C(118), UINT8_C( 34), UINT8_C( 29), UINT8_C( 56), UINT8_C(182), UINT8_C(128), UINT8_C( 42) }, { UINT8_C(112), UINT8_C( 4), UINT8_C( 16), UINT8_C(204), UINT8_C(132), UINT8_C(148), UINT8_C( 0), UINT8_C( 68), UINT8_C(141), UINT8_C( 32), UINT8_C( 32), UINT8_C( 1), UINT8_C( 24), UINT8_C( 2), UINT8_C( 0), UINT8_C( 8) } }, { { UINT8_C( 28), UINT8_C( 86), UINT8_C(247), UINT8_C(161), UINT8_C( 43), UINT8_C( 47), UINT8_C(119), UINT8_C(184), UINT8_C( 79), UINT8_C(232), UINT8_C(153), UINT8_C(175), UINT8_C( 52), UINT8_C(185), UINT8_C( 59), UINT8_C(172) }, { UINT8_C(190), UINT8_C( 84), UINT8_C(153), UINT8_C( 76), UINT8_C(234), UINT8_C(222), UINT8_C(147), UINT8_C(169), UINT8_C( 85), UINT8_C(181), UINT8_C(198), UINT8_C(141), UINT8_C(107), UINT8_C( 70), UINT8_C(184), UINT8_C(136) }, { UINT8_C( 28), UINT8_C( 84), UINT8_C(145), UINT8_C( 0), UINT8_C( 42), UINT8_C( 14), UINT8_C( 19), UINT8_C(168), UINT8_C( 69), UINT8_C(160), UINT8_C(128), UINT8_C(141), UINT8_C( 32), UINT8_C( 0), UINT8_C( 56), UINT8_C(136) } }, { { UINT8_C(156), UINT8_C(175), UINT8_C( 41), UINT8_C(199), UINT8_C(223), UINT8_C(160), UINT8_C(128), UINT8_C( 46), UINT8_C(137), UINT8_C( 25), UINT8_C(221), UINT8_C(189), UINT8_C(211), UINT8_C( 25), UINT8_C(105), UINT8_C(145) }, { UINT8_C(109), UINT8_C( 2), UINT8_C(221), UINT8_C( 87), UINT8_C(225), UINT8_C(112), UINT8_C( 0), UINT8_C( 54), UINT8_C( 37), UINT8_C(198), UINT8_C(195), UINT8_C(145), UINT8_C( 12), UINT8_C(123), UINT8_C( 25), UINT8_C(169) }, { UINT8_C( 12), UINT8_C( 2), UINT8_C( 9), UINT8_C( 71), UINT8_C(193), UINT8_C( 32), UINT8_C( 0), UINT8_C( 38), UINT8_C( 1), UINT8_C( 0), UINT8_C(193), UINT8_C(145), UINT8_C( 0), UINT8_C( 25), UINT8_C( 9), UINT8_C(129) } }, { { UINT8_C( 43), UINT8_C( 66), UINT8_C(112), UINT8_C( 10), UINT8_C(227), UINT8_C(240), UINT8_C( 56), UINT8_C(108), UINT8_C( 10), UINT8_C( 22), UINT8_C( 41), UINT8_C(221), UINT8_C( 47), UINT8_C(146), UINT8_C(110), UINT8_C(156) }, { UINT8_C(149), UINT8_C( 75), UINT8_C(243), UINT8_C(118), UINT8_C(188), UINT8_C(243), UINT8_C(172), UINT8_C(225), UINT8_C(185), UINT8_C(111), UINT8_C(114), UINT8_C(197), UINT8_C(235), UINT8_C(139), UINT8_C(110), UINT8_C( 22) }, { UINT8_C( 1), UINT8_C( 66), UINT8_C(112), UINT8_C( 2), UINT8_C(160), UINT8_C(240), UINT8_C( 40), UINT8_C( 96), UINT8_C( 8), UINT8_C( 6), UINT8_C( 32), UINT8_C(197), UINT8_C( 43), UINT8_C(130), UINT8_C(110), UINT8_C( 20) } }, { { UINT8_C(206), UINT8_C(223), UINT8_C( 32), UINT8_C(177), UINT8_C(207), UINT8_C( 88), UINT8_C( 29), UINT8_C(217), UINT8_C(110), UINT8_C( 70), UINT8_C(182), UINT8_C(157), UINT8_C(216), UINT8_C( 36), UINT8_C( 57), UINT8_C(109) }, { UINT8_C(112), UINT8_C( 44), UINT8_C(227), UINT8_C( 44), UINT8_C( 31), UINT8_C(143), UINT8_C( 13), UINT8_C(216), UINT8_MAX, UINT8_C(128), UINT8_C(158), UINT8_C(234), UINT8_C( 11), UINT8_C( 12), UINT8_C( 0), UINT8_C(217) }, { UINT8_C( 64), UINT8_C( 12), UINT8_C( 32), UINT8_C( 32), UINT8_C( 15), UINT8_C( 8), UINT8_C( 13), UINT8_C(216), UINT8_C(110), UINT8_C( 0), UINT8_C(150), UINT8_C(136), UINT8_C( 8), UINT8_C( 4), UINT8_C( 0), UINT8_C( 73) } }, { { UINT8_C(235), UINT8_C( 32), UINT8_C(138), UINT8_C(187), UINT8_C(120), UINT8_C(167), UINT8_C(148), UINT8_C(231), UINT8_C(237), UINT8_C( 75), UINT8_C(132), UINT8_C(198), UINT8_C(111), UINT8_C(190), UINT8_C( 51), UINT8_C(223) }, { UINT8_C(234), UINT8_C( 23), UINT8_C( 11), UINT8_C( 10), UINT8_C(166), UINT8_C( 25), UINT8_C(226), UINT8_C(165), UINT8_C(153), UINT8_C(128), UINT8_C(143), UINT8_C(164), UINT8_C(141), UINT8_C(143), UINT8_C(126), UINT8_C(120) }, { UINT8_C(234), UINT8_C( 0), UINT8_C( 10), UINT8_C( 10), UINT8_C( 32), UINT8_C( 1), UINT8_C(128), UINT8_C(165), UINT8_C(137), UINT8_C( 0), UINT8_C(132), UINT8_C(132), UINT8_C( 13), UINT8_C(142), UINT8_C( 50), UINT8_C( 88) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vandq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; } static int test_simde_vandq_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(14144), UINT16_C(21032), UINT16_C(65343), UINT16_C(36385), UINT16_C(62489), UINT16_C(21831), UINT16_C(29758), UINT16_C(19117) }, { UINT16_C( 7953), UINT16_C(17828), UINT16_C(15693), UINT16_C(42403), UINT16_C(51393), UINT16_C(33131), UINT16_C(22501), UINT16_C( 9567) }, { UINT16_C( 5888), UINT16_C(16416), UINT16_C(15629), UINT16_C(33825), UINT16_C(49153), UINT16_C( 323), UINT16_C(21540), UINT16_C( 13) } }, { { UINT16_C(34703), UINT16_C(52855), UINT16_C(39047), UINT16_C(41053), UINT16_C(42125), UINT16_C(52214), UINT16_C(41753), UINT16_C(10773) }, { UINT16_C(47554), UINT16_C( 3952), UINT16_C( 5111), UINT16_C(47284), UINT16_C( 8412), UINT16_C(49465), UINT16_C(39287), UINT16_C( 1766) }, { UINT16_C(33154), UINT16_C( 3696), UINT16_C( 4231), UINT16_C(40980), UINT16_C( 8332), UINT16_C(49456), UINT16_C(33041), UINT16_C( 516) } }, { { UINT16_C(24096), UINT16_C(42965), UINT16_C(13046), UINT16_C(33608), UINT16_C(16086), UINT16_C(61262), UINT16_C(25825), UINT16_C(41754) }, { UINT16_C(35357), UINT16_C( 5298), UINT16_C(26269), UINT16_C(31180), UINT16_C( 1670), UINT16_C(65082), UINT16_C( 8607), UINT16_C(48900) }, { UINT16_C( 2560), UINT16_C( 1168), UINT16_C( 8852), UINT16_C( 328), UINT16_C( 1670), UINT16_C(60938), UINT16_C( 8321), UINT16_C(41728) } }, { { UINT16_C(55679), UINT16_C(30055), UINT16_C(44811), UINT16_C(58105), UINT16_C(18413), UINT16_C(52945), UINT16_C(60331), UINT16_C(51569) }, { UINT16_C( 9077), UINT16_C( 5085), UINT16_C(43657), UINT16_C( 4236), UINT16_C(51120), UINT16_C(20238), UINT16_C( 4840), UINT16_C(26382) }, { UINT16_C( 373), UINT16_C( 4421), UINT16_C(43529), UINT16_C( 136), UINT16_C(18336), UINT16_C(19968), UINT16_C( 680), UINT16_C(16640) } }, { { UINT16_C(30188), UINT16_C(63452), UINT16_C(54564), UINT16_C( 4569), UINT16_C(43805), UINT16_C(51423), UINT16_C(20630), UINT16_C( 3217) }, { UINT16_C(28531), UINT16_C(64799), UINT16_C(43801), UINT16_C(51469), UINT16_C( 7026), UINT16_C(23064), UINT16_C( 9773), UINT16_C( 6593) }, { UINT16_C(25952), UINT16_C(62748), UINT16_C(33024), UINT16_C( 265), UINT16_C( 2832), UINT16_C(18456), UINT16_C( 4), UINT16_C( 2177) } }, { { UINT16_C(40604), UINT16_C(49169), UINT16_C(60019), UINT16_C(37074), UINT16_C(45461), UINT16_C(11353), UINT16_C(59906), UINT16_C(30008) }, { UINT16_C(22361), UINT16_C(29298), UINT16_C(32514), UINT16_C(30011), UINT16_C(21402), UINT16_C(51407), UINT16_C(37242), UINT16_C( 5857) }, { UINT16_C( 5656), UINT16_C(16400), UINT16_C(27138), UINT16_C( 4114), UINT16_C( 4496), UINT16_C( 2121), UINT16_C(32770), UINT16_C( 5152) } }, { { UINT16_C(61999), UINT16_C(41686), UINT16_C(43229), UINT16_C(29235), UINT16_C(35930), UINT16_C(23710), UINT16_C(54902), UINT16_C(53457) }, { UINT16_C(17453), UINT16_C(12354), UINT16_C(32451), UINT16_C(24229), UINT16_C(29905), UINT16_C(19238), UINT16_C( 1797), UINT16_C(13409) }, { UINT16_C(16429), UINT16_C( 8258), UINT16_C(10433), UINT16_C(21025), UINT16_C( 1104), UINT16_C(18438), UINT16_C( 1540), UINT16_C( 4161) } }, { { UINT16_C(14586), UINT16_C(55255), UINT16_C( 2784), UINT16_C(14921), UINT16_C(59542), UINT16_C( 3222), UINT16_C(26814), UINT16_C(60636) }, { UINT16_C( 8108), UINT16_C(28444), UINT16_C(49565), UINT16_C(28365), UINT16_C(62261), UINT16_C(15290), UINT16_C( 7163), UINT16_C(62831) }, { UINT16_C( 6312), UINT16_C(18196), UINT16_C( 128), UINT16_C(10825), UINT16_C(57364), UINT16_C( 2194), UINT16_C( 2234), UINT16_C(58444) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vandq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_vandq_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(4293273589), UINT32_C(3685031970), UINT32_C( 352705382), UINT32_C(3156153161) }, { UINT32_C(4180779521), UINT32_C(1424260668), UINT32_C(3210431843), UINT32_C(2472031390) }, { UINT32_C(4179632641), UINT32_C(1352933408), UINT32_C( 352405858), UINT32_C(2417496072) } }, { { UINT32_C(2106736475), UINT32_C(3310893151), UINT32_C(1574657299), UINT32_C(1897527920) }, { UINT32_C(3647622045), UINT32_C( 741232329), UINT32_C( 988514716), UINT32_C( 416171197) }, { UINT32_C(1493306137), UINT32_C( 67635273), UINT32_C( 415959312), UINT32_C( 268976176) } }, { { UINT32_C(3801440387), UINT32_C(2896686744), UINT32_C(3154739787), UINT32_C( 439165565) }, { UINT32_C( 938711149), UINT32_C(2187534822), UINT32_C(1740459946), UINT32_C( 377457555) }, { UINT32_C( 579928065), UINT32_C(2149785728), UINT32_C( 604570122), UINT32_C( 304939537) } }, { { UINT32_C(2230851051), UINT32_C(1311809283), UINT32_C(2668247330), UINT32_C(3384359003) }, { UINT32_C(3053497552), UINT32_C(2017027022), UINT32_C(1172371122), UINT32_C(1817927553) }, { UINT32_C(2214593728), UINT32_C(1211106050), UINT32_C( 83898402), UINT32_C(1209604097) } }, { { UINT32_C(2012238708), UINT32_C( 365306099), UINT32_C(3048525913), UINT32_C(3632164104) }, { UINT32_C(3884940825), UINT32_C(2489370849), UINT32_C(1071202494), UINT32_C( 346764703) }, { UINT32_C(1736462864), UINT32_C( 339738849), UINT32_C( 898646040), UINT32_C( 271197448) } }, { { UINT32_C(2072747144), UINT32_C( 378556860), UINT32_C( 717964322), UINT32_C(3405990321) }, { UINT32_C(2830275271), UINT32_C( 406590042), UINT32_C(4065793618), UINT32_C(3540386379) }, { UINT32_C( 679645312), UINT32_C( 269488152), UINT32_C( 574817282), UINT32_C(3271688193) } }, { { UINT32_C(1531941278), UINT32_C( 91348963), UINT32_C(3576642596), UINT32_C(1285567109) }, { UINT32_C( 519328708), UINT32_C(3090559333), UINT32_C(2460650823), UINT32_C( 778416272) }, { UINT32_C( 440668548), UINT32_C( 3150177), UINT32_C(2418674692), UINT32_C( 203436160) } }, { { UINT32_C( 613004353), UINT32_C(3089758868), UINT32_C(3146602806), UINT32_C(1342647947) }, { UINT32_C(3866033281), UINT32_C(1956554029), UINT32_C(3255191602), UINT32_C( 988900344) }, { UINT32_C( 604550145), UINT32_C( 805871620), UINT32_C(2181318706), UINT32_C( 268511880) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vandq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_vandq_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; uint64_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C( 7109514916206232913), UINT64_C(10343282227606388255) }, { UINT64_C(13806481744594689539), UINT64_C( 1434929476295376452) }, { UINT64_C( 2488819499475682305), UINT64_C( 254629577833496068) } }, { { UINT64_C(16983846929493703209), UINT64_C( 9560580713027717308) }, { UINT64_C(16581777337322056031), UINT64_C( 9142413165197685004) }, { UINT64_C(16290098746414606345), UINT64_C( 333266376929460236) } }, { { UINT64_C( 8129420312540280090), UINT64_C(12271812326554749257) }, { UINT64_C( 6696332920632181349), UINT64_C(16099254613146428824) }, { UINT64_C( 5818650787282681856), UINT64_C( 9965349149790978312) } }, { { UINT64_C( 297966258262194943), UINT64_C( 6040484805161693663) }, { UINT64_C(11369980492630449438), UINT64_C(18181866214073758638) }, { UINT64_C( 288793326679824414), UINT64_C( 5787148529890406798) } }, { { UINT64_C(10227905302983808817), UINT64_C(17925324712174927490) }, { UINT64_C( 1588135039376694888), UINT64_C(13555232943576290339) }, { UINT64_C( 288248133360948768), UINT64_C(13259028904787678210) } }, { { UINT64_C( 2319240140203543795), UINT64_C(15719906249242895958) }, { UINT64_C(16280321058098613873), UINT64_C( 5200848691728120038) }, { UINT64_C( 2319090606074268785), UINT64_C( 5199406114201952326) } }, { { UINT64_C(14099389917174552044), UINT64_C( 21140926159338495) }, { UINT64_C(14576121077305021353), UINT64_C( 6331331462427265857) }, { UINT64_C(13981434268808221096), UINT64_C( 20556516926098241) } }, { { UINT64_C(16027622640665675581), UINT64_C( 2619014137364046024) }, { UINT64_C( 4313838188608827694), UINT64_C(13754466608369113119) }, { UINT64_C( 1895317090072658220), UINT64_C( 2612239520812236808) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_uint64x2_t r = simde_vandq_u64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vand_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vand_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vand_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vand_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vand_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vand_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vand_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vand_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vandq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vandq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vandq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vandq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vandq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vandq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vandq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vandq_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/bic.c000066400000000000000000001624201400333146700162250ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN bic #include "test-neon.h" #include "../../../simde/arm/neon/bic.h" static int test_simde_vbic_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t b[8]; int8_t r[8]; } test_vec[] = { { { -INT8_C( 62), INT8_C( 9), INT8_C( 113), INT8_C( 100), -INT8_C( 96), -INT8_C( 44), INT8_C( 44), -INT8_C( 52) }, { -INT8_C( 99), -INT8_C( 76), INT8_C( 33), -INT8_C( 61), -INT8_C( 16), -INT8_C( 42), INT8_C( 93), INT8_C( 44) }, { INT8_C( 66), INT8_C( 9), INT8_C( 80), INT8_C( 36), INT8_C( 0), INT8_C( 0), INT8_C( 32), -INT8_C( 64) } }, { { INT8_C( 1), -INT8_C( 113), INT8_C( 51), -INT8_C( 4), -INT8_C( 32), INT8_C( 91), INT8_C( 93), -INT8_C( 121) }, { INT8_C( 52), INT8_C( 72), INT8_C( 55), -INT8_C( 13), -INT8_C( 124), INT8_C( 83), -INT8_C( 76), INT8_C( 70) }, { INT8_C( 1), -INT8_C( 121), INT8_C( 0), INT8_C( 12), INT8_C( 96), INT8_C( 8), INT8_C( 73), -INT8_C( 127) } }, { { INT8_C( 92), INT8_C( 37), -INT8_C( 85), -INT8_C( 3), -INT8_C( 6), -INT8_C( 41), -INT8_C( 55), -INT8_C( 105) }, { -INT8_C( 117), -INT8_C( 22), INT8_C( 91), INT8_C( 123), -INT8_C( 64), -INT8_C( 72), -INT8_C( 89), -INT8_C( 63) }, { INT8_C( 84), INT8_C( 5), -INT8_C( 96), -INT8_C( 124), INT8_C( 58), INT8_C( 71), INT8_C( 72), INT8_C( 22) } }, { { INT8_C( 71), -INT8_C( 38), -INT8_C( 66), INT8_C( 39), INT8_C( 54), INT8_C( 27), -INT8_C( 81), INT8_C( 106) }, { INT8_C( 99), -INT8_C( 26), INT8_C( 93), -INT8_C( 25), INT8_C( 58), INT8_C( 18), INT8_C( 45), -INT8_C( 106) }, { INT8_C( 4), INT8_C( 24), -INT8_C( 94), INT8_C( 0), INT8_C( 4), INT8_C( 9), -INT8_C( 126), INT8_C( 104) } }, { { INT8_C( 55), -INT8_C( 40), -INT8_C( 109), INT8_C( 49), -INT8_C( 81), INT8_C( 92), -INT8_C( 55), INT8_C( 58) }, { INT8_C( 71), INT8_C( 36), -INT8_C( 75), INT8_C( 7), -INT8_C( 36), INT8_C( 93), -INT8_C( 55), INT8_C( 36) }, { INT8_C( 48), -INT8_C( 40), INT8_C( 2), INT8_C( 48), INT8_C( 35), INT8_C( 0), INT8_C( 0), INT8_C( 26) } }, { { INT8_C( 55), -INT8_C( 121), INT8_C( 75), INT8_C( 109), -INT8_C( 94), -INT8_C( 6), -INT8_C( 40), INT8_C( 5) }, { -INT8_C( 31), INT8_C( 53), -INT8_C( 20), INT8_C( 27), INT8_C( 71), INT8_C( 25), -INT8_C( 79), INT8_MAX }, { INT8_C( 22), -INT8_C( 126), INT8_C( 3), INT8_C( 100), -INT8_C( 96), -INT8_C( 30), INT8_C( 72), INT8_C( 0) } }, { { -INT8_C( 14), INT8_C( 69), -INT8_C( 80), -INT8_C( 95), -INT8_C( 95), INT8_C( 121), -INT8_C( 36), -INT8_C( 24) }, { -INT8_C( 99), -INT8_C( 111), -INT8_C( 16), INT8_C( 122), -INT8_C( 18), -INT8_C( 71), -INT8_C( 98), INT8_C( 38) }, { INT8_C( 98), INT8_C( 68), INT8_C( 0), -INT8_C( 127), INT8_C( 1), INT8_C( 64), INT8_C( 64), -INT8_C( 56) } }, { { INT8_C( 64), -INT8_C( 23), -INT8_C( 109), -INT8_C( 30), -INT8_C( 28), INT8_C( 107), -INT8_C( 25), -INT8_C( 59) }, { -INT8_C( 95), -INT8_C( 45), -INT8_C( 32), -INT8_C( 24), -INT8_C( 20), -INT8_C( 111), INT8_C( 103), -INT8_C( 34) }, { INT8_C( 64), INT8_C( 40), INT8_C( 19), INT8_C( 2), INT8_C( 0), INT8_C( 106), INT8_MIN, INT8_C( 1) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vbic_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); simde_int8x8_t r = simde_vbic_s8(a, b); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vbic_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { { { INT16_C( 18838), -INT16_C( 1368), -INT16_C( 3988), -INT16_C( 30957) }, { -INT16_C( 22454), INT16_C( 21529), INT16_C( 11667), INT16_C( 21102) }, { INT16_C( 16788), -INT16_C( 21856), -INT16_C( 12180), -INT16_C( 31471) } }, { { INT16_C( 2977), -INT16_C( 11559), INT16_C( 6524), -INT16_C( 19665) }, { -INT16_C( 26774), INT16_C( 5880), INT16_C( 11737), INT16_C( 28443) }, { INT16_C( 2177), -INT16_C( 16383), INT16_C( 4132), -INT16_C( 28636) } }, { { -INT16_C( 15498), -INT16_C( 7575), INT16_C( 32180), -INT16_C( 406) }, { -INT16_C( 31963), -INT16_C( 18349), -INT16_C( 15952), INT16_C( 20746) }, { INT16_C( 16466), INT16_C( 16936), INT16_C( 15364), -INT16_C( 20896) } }, { { -INT16_C( 7219), INT16_C( 18723), INT16_C( 21244), INT16_C( 26620) }, { -INT16_C( 2583), -INT16_C( 15747), -INT16_C( 26590), -INT16_C( 26575) }, { INT16_C( 516), INT16_C( 2306), INT16_C( 17116), INT16_C( 26572) } }, { { -INT16_C( 25764), INT16_C( 4219), -INT16_C( 6888), INT16_C( 15630) }, { INT16_C( 24936), INT16_C( 6389), INT16_C( 35), -INT16_C( 3991) }, { -INT16_C( 26092), INT16_C( 10), -INT16_C( 6888), INT16_C( 3334) } }, { { -INT16_C( 29469), -INT16_C( 8135), INT16_C( 13790), -INT16_C( 14521) }, { -INT16_C( 15318), INT16_C( 19594), -INT16_C( 17571), -INT16_C( 17947) }, { INT16_C( 2241), -INT16_C( 24527), INT16_C( 1154), INT16_C( 17922) } }, { { INT16_C( 24662), INT16_C( 28361), -INT16_C( 10427), -INT16_C( 21077) }, { -INT16_C( 24263), INT16_C( 23749), INT16_C( 11937), -INT16_C( 31668) }, { INT16_C( 16454), INT16_C( 8712), -INT16_C( 11964), INT16_C( 10659) } }, { { -INT16_C( 31302), -INT16_C( 26524), -INT16_C( 21574), -INT16_C( 6817) }, { -INT16_C( 5776), -INT16_C( 13007), INT16_C( 5797), -INT16_C( 1146) }, { INT16_C( 1162), INT16_C( 4164), -INT16_C( 22246), INT16_C( 1113) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_vbic_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); simde_int16x4_t r = simde_vbic_s16(a, b); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vbic_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { { { -INT32_C( 1150660746), INT32_C( 1600656678) }, { INT32_C( 1471884726), INT32_C( 366741339) }, { -INT32_C( 1472183744), INT32_C( 1243615268) } }, { { INT32_C( 1202536588), INT32_C( 1546391020) }, { -INT32_C( 1691787786), -INT32_C( 359157900) }, { INT32_C( 1149501448), INT32_C( 338165896) } }, { { INT32_C( 614859262), -INT32_C( 846983658) }, { -INT32_C( 1759232196), -INT32_C( 743636921) }, { INT32_C( 545390786), INT32_C( 201330192) } }, { { INT32_C( 756701761), INT32_C( 1586054759) }, { INT32_C( 419017380), INT32_C( 1594003553) }, { INT32_C( 620906561), INT32_C( 8996358) } }, { { -INT32_C( 1467766639), -INT32_C( 210434121) }, { -INT32_C( 1903519417), -INT32_C( 614385766) }, { INT32_C( 536944784), INT32_C( 538181669) } }, { { -INT32_C( 116884334), INT32_C( 1717014978) }, { -INT32_C( 1535225533), INT32_C( 1929609697) }, { INT32_C( 1493183632), INT32_C( 72617986) } }, { { -INT32_C( 535067095), -INT32_C( 707555186) }, { -INT32_C( 1016898007), INT32_C( 664716437) }, { INT32_C( 538476544), -INT32_C( 801042422) } }, { { INT32_C( 52536896), INT32_C( 2053732407) }, { -INT32_C( 1424037687), -INT32_C( 1843519127) }, { INT32_C( 2164224), INT32_C( 1751210006) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_vbic_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); simde_int32x2_t r = simde_vbic_s32(a, b); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vbic_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[1]; int64_t b[1]; int64_t r[1]; } test_vec[] = { { { -INT64_C( 1005632261771282008) }, { INT64_C( 8314018961812778660) }, { -INT64_C( 9220554224704614136) } }, { { -INT64_C( 4274514201328319749) }, { -INT64_C( 7655118891740312633) }, { INT64_C( 4624161154700148792) } }, { { INT64_C( 2414728180700755654) }, { -INT64_C( 8653177897548564412) }, { INT64_C( 2306479010277754498) } }, { { INT64_C( 503953729577967423) }, { -INT64_C( 4416629378076152270) }, { INT64_C( 309065039712096525) } }, { { -INT64_C( 7363997450764189904) }, { INT64_C( 2603073874137458699) }, { -INT64_C( 7367886722027137232) } }, { { INT64_C( 8331197793186011481) }, { -INT64_C( 33698039489964598) }, { INT64_C( 6218838174337041) } }, { { -INT64_C( 5137066015528789562) }, { INT64_C( 1777638445839074722) }, { -INT64_C( 6911889632088287164) } }, { { INT64_C( 3530063880953792841) }, { -INT64_C( 7065163928733053148) }, { INT64_C( 2309220850802360393) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_int64x1_t r = simde_vbic_s64(a, b); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_t a = simde_test_arm_neon_random_i64x1(); simde_int64x1_t b = simde_test_arm_neon_random_i64x1(); simde_int64x1_t r = simde_vbic_s64(a, b); simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vbic_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint8_t b[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C( 61), UINT8_C(245), UINT8_C(159), UINT8_C(215), UINT8_C( 64), UINT8_C( 72), UINT8_C(123), UINT8_C( 15) }, { UINT8_C( 58), UINT8_C(163), UINT8_C(124), UINT8_C( 94), UINT8_C( 7), UINT8_C( 32), UINT8_C(188), UINT8_C(107) }, { UINT8_C( 5), UINT8_C( 84), UINT8_C(131), UINT8_C(129), UINT8_C( 64), UINT8_C( 72), UINT8_C( 67), UINT8_C( 4) } }, { { UINT8_C( 63), UINT8_C( 97), UINT8_C( 14), UINT8_C(228), UINT8_C(197), UINT8_C( 10), UINT8_C(226), UINT8_C(203) }, { UINT8_C( 13), UINT8_C(222), UINT8_C(120), UINT8_C(213), UINT8_C(244), UINT8_C(243), UINT8_C( 39), UINT8_C( 50) }, { UINT8_C( 50), UINT8_C( 33), UINT8_C( 6), UINT8_C( 32), UINT8_C( 1), UINT8_C( 8), UINT8_C(192), UINT8_C(201) } }, { { UINT8_C(232), UINT8_C(199), UINT8_C( 9), UINT8_C( 41), UINT8_C( 15), UINT8_C(132), UINT8_C( 56), UINT8_C( 73) }, { UINT8_C( 39), UINT8_C(180), UINT8_C(167), UINT8_C( 46), UINT8_C(213), UINT8_C( 99), UINT8_C(153), UINT8_C( 20) }, { UINT8_C(200), UINT8_C( 67), UINT8_C( 8), UINT8_C( 1), UINT8_C( 10), UINT8_C(132), UINT8_C( 32), UINT8_C( 73) } }, { { UINT8_C(197), UINT8_C(167), UINT8_C(249), UINT8_C(138), UINT8_C(177), UINT8_C(219), UINT8_C( 86), UINT8_C(191) }, { UINT8_C(186), UINT8_C(206), UINT8_C(148), UINT8_C(174), UINT8_C(193), UINT8_C(188), UINT8_C(224), UINT8_C(169) }, { UINT8_C( 69), UINT8_C( 33), UINT8_C(105), UINT8_C( 0), UINT8_C( 48), UINT8_C( 67), UINT8_C( 22), UINT8_C( 22) } }, { { UINT8_C(131), UINT8_C(233), UINT8_C(210), UINT8_C(146), UINT8_C(109), UINT8_C( 11), UINT8_C(219), UINT8_C(148) }, { UINT8_C(191), UINT8_C(130), UINT8_C(194), UINT8_C(148), UINT8_C(229), UINT8_C( 91), UINT8_C(169), UINT8_C(170) }, { UINT8_C( 0), UINT8_C(105), UINT8_C( 16), UINT8_C( 2), UINT8_C( 8), UINT8_C( 0), UINT8_C( 82), UINT8_C( 20) } }, { { UINT8_C( 3), UINT8_C(162), UINT8_C( 53), UINT8_C(180), UINT8_C(125), UINT8_C(139), UINT8_C(115), UINT8_C( 55) }, { UINT8_C( 89), UINT8_C( 8), UINT8_C(230), UINT8_C( 26), UINT8_C(196), UINT8_C(198), UINT8_C(195), UINT8_C( 71) }, { UINT8_C( 2), UINT8_C(162), UINT8_C( 17), UINT8_C(164), UINT8_C( 57), UINT8_C( 9), UINT8_C( 48), UINT8_C( 48) } }, { { UINT8_C(176), UINT8_C(150), UINT8_C(217), UINT8_C( 29), UINT8_C(161), UINT8_C(180), UINT8_C(178), UINT8_C( 96) }, { UINT8_C( 54), UINT8_C(116), UINT8_C(245), UINT8_C( 27), UINT8_C(208), UINT8_C(158), UINT8_C(198), UINT8_C(211) }, { UINT8_C(128), UINT8_C(130), UINT8_C( 8), UINT8_C( 4), UINT8_C( 33), UINT8_C( 32), UINT8_C( 48), UINT8_C( 32) } }, { { UINT8_C( 64), UINT8_C(251), UINT8_C(135), UINT8_C(189), UINT8_C(134), UINT8_C(251), UINT8_C(245), UINT8_C(223) }, { UINT8_C( 3), UINT8_C(219), UINT8_C(249), UINT8_C(199), UINT8_C(161), UINT8_C(188), UINT8_C( 14), UINT8_C( 81) }, { UINT8_C( 64), UINT8_C( 32), UINT8_C( 6), UINT8_C( 56), UINT8_C( 6), UINT8_C( 67), UINT8_C(241), UINT8_C(142) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vbic_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t r = simde_vbic_u8(a, b); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vbic_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(59218), UINT16_C(62319), UINT16_C( 8603), UINT16_C(53588) }, { UINT16_C(18837), UINT16_C(26092), UINT16_C(45799), UINT16_C(10040) }, { UINT16_C(42562), UINT16_C(37379), UINT16_C( 280), UINT16_C(53316) } }, { { UINT16_C(49325), UINT16_C(13284), UINT16_C(55739), UINT16_C(48658) }, { UINT16_C( 2996), UINT16_C(22149), UINT16_C(37832), UINT16_C( 6823) }, { UINT16_C(49161), UINT16_C( 8544), UINT16_C(18483), UINT16_C(42000) } }, { { UINT16_C( 5754), UINT16_C( 5390), UINT16_C(25143), UINT16_C(52710) }, { UINT16_C(53931), UINT16_C(37426), UINT16_C(27525), UINT16_C(12985) }, { UINT16_C( 1104), UINT16_C( 1292), UINT16_C( 50), UINT16_C(52550) } }, { { UINT16_C(40235), UINT16_C(58982), UINT16_C(30839), UINT16_C(11172) }, { UINT16_C(10628), UINT16_C(19585), UINT16_C(10684), UINT16_C(13926) }, { UINT16_C(37931), UINT16_C(41574), UINT16_C(20547), UINT16_C( 2432) } }, { { UINT16_C(29759), UINT16_C(30539), UINT16_C(12758), UINT16_C(33092) }, { UINT16_C(30211), UINT16_C(34835), UINT16_C(52449), UINT16_C( 3259) }, { UINT16_C( 60), UINT16_C(30536), UINT16_C(12566), UINT16_C(33092) } }, { { UINT16_C( 8554), UINT16_C(57842), UINT16_C(38553), UINT16_C( 7436) }, { UINT16_C(36543), UINT16_C(31593), UINT16_C(53431), UINT16_C(63153) }, { UINT16_C( 8512), UINT16_C(32914), UINT16_C( 1544), UINT16_C( 2316) } }, { { UINT16_C(64580), UINT16_C( 7021), UINT16_C(45357), UINT16_C(12700) }, { UINT16_C(45096), UINT16_C( 2489), UINT16_C(29820), UINT16_C(58902) }, { UINT16_C(19524), UINT16_C( 4676), UINT16_C(33025), UINT16_C( 4488) } }, { { UINT16_C( 2197), UINT16_C(12231), UINT16_C(54431), UINT16_C(24140) }, { UINT16_C(46690), UINT16_C( 6618), UINT16_C(35718), UINT16_C(51727) }, { UINT16_C( 2197), UINT16_C( 9733), UINT16_C(21529), UINT16_C( 5184) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vbic_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t r = simde_vbic_u16(a, b); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vbic_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C(3051715976), UINT32_C(1457947182) }, { UINT32_C(2925568050), UINT32_C(2861921812) }, { UINT32_C( 293952904), UINT32_C(1415741482) } }, { { UINT32_C( 500784254), UINT32_C(2457609520) }, { UINT32_C(1638618843), UINT32_C(1764539361) }, { UINT32_C( 475007012), UINT32_C(2454717456) } }, { { UINT32_C(1713312056), UINT32_C(3317499283) }, { UINT32_C(3128171941), UINT32_C( 291768723) }, { UINT32_C(1141571608), UINT32_C(3298362368) } }, { { UINT32_C(2519678309), UINT32_C(1042852706) }, { UINT32_C(3802125313), UINT32_C(3343698831) }, { UINT32_C( 337652068), UINT32_C( 941629536) } }, { { UINT32_C(1882024925), UINT32_C( 372697712) }, { UINT32_C(2597366279), UINT32_C( 413938867) }, { UINT32_C(1613578712), UINT32_C( 101894720) } }, { { UINT32_C(3551452017), UINT32_C(2266093446) }, { UINT32_C( 980005291), UINT32_C(1493284220) }, { UINT32_C(3246803536), UINT32_C(2249212546) } }, { { UINT32_C(2445946400), UINT32_C( 547815449) }, { UINT32_C(1572566954), UINT32_C( 477456299) }, { UINT32_C(2151680000), UINT32_C( 545390608) } }, { { UINT32_C(3371115586), UINT32_C(2790195707) }, { UINT32_C( 786479282), UINT32_C(2391335278) }, { UINT32_C(3222209600), UINT32_C( 541524113) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vbic_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t r = simde_vbic_u32(a, b); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vbic_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[1]; uint64_t b[1]; uint64_t r[1]; } test_vec[] = { { { UINT64_C(18179279427020476943) }, { UINT64_C(12467316631323345981) }, { UINT64_C( 5785155265054003714) } }, { { UINT64_C(12075211586440393970) }, { UINT64_C( 7239920495273276284) }, { UINT64_C( 9476277995005052034) } }, { { UINT64_C(11312432443049154735) }, { UINT64_C( 1725310925415995097) }, { UINT64_C( 9803303234419853350) } }, { { UINT64_C(12040822386820736637) }, { UINT64_C(18213694116147097356) }, { UINT64_C( 223368132537090161) } }, { { UINT64_C( 44749186997768611) }, { UINT64_C( 3066448514927305553) }, { UINT64_C( 4724610066632866) } }, { { UINT64_C( 3994330218673050524) }, { UINT64_C(12533056015108210597) }, { UINT64_C( 1297214955384537112) } }, { { UINT64_C( 4094983614992739245) }, { UINT64_C(16953867103456073841) }, { UINT64_C( 1194651959831946124) } }, { { UINT64_C( 4697853219910641103) }, { UINT64_C(15336482033135503903) }, { UINT64_C( 81065449659311552) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_t b = simde_vld1_u64(test_vec[i].b); simde_uint64x1_t r = simde_vbic_u64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x1_t a = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t b = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t r = simde_vbic_u64(a, b); simde_test_arm_neon_write_u64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vbicq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t b[16]; int8_t r[16]; } test_vec[] = { { { INT8_C( 87), -INT8_C( 97), -INT8_C( 71), INT8_C( 4), INT8_C( 24), INT8_C( 110), -INT8_C( 67), -INT8_C( 70), INT8_C( 52), INT8_C( 13), INT8_C( 65), -INT8_C( 13), INT8_C( 125), -INT8_C( 20), INT8_C( 11), -INT8_C( 57) }, { -INT8_C( 11), -INT8_C( 69), INT8_C( 81), INT8_C( 91), INT8_C( 9), INT8_C( 78), -INT8_C( 117), INT8_C( 45), -INT8_C( 35), -INT8_C( 127), INT8_C( 33), -INT8_C( 76), -INT8_C( 1), INT8_C( 20), INT8_C( 81), INT8_C( 86) }, { INT8_C( 2), INT8_C( 4), -INT8_C( 88), INT8_C( 4), INT8_C( 16), INT8_C( 32), INT8_C( 52), -INT8_C( 110), INT8_C( 32), INT8_C( 12), INT8_C( 64), INT8_C( 67), INT8_C( 0), -INT8_C( 24), INT8_C( 10), -INT8_C( 127) } }, { { -INT8_C( 77), INT8_C( 10), INT8_C( 91), -INT8_C( 53), INT8_C( 120), INT8_C( 24), -INT8_C( 122), -INT8_C( 84), INT8_C( 37), -INT8_C( 57), -INT8_C( 97), -INT8_C( 93), -INT8_C( 76), -INT8_C( 86), INT8_C( 106), -INT8_C( 87) }, { INT8_C( 101), -INT8_C( 69), INT8_C( 4), INT8_C( 111), INT8_C( 9), -INT8_C( 113), -INT8_C( 100), -INT8_C( 26), INT8_C( 16), -INT8_C( 67), -INT8_C( 102), INT8_C( 15), -INT8_C( 47), -INT8_C( 21), INT8_C( 102), -INT8_C( 124) }, { -INT8_C( 110), INT8_C( 0), INT8_C( 91), INT8_MIN, INT8_C( 112), INT8_C( 16), INT8_C( 2), INT8_C( 8), INT8_C( 37), INT8_C( 66), INT8_C( 5), -INT8_C( 96), INT8_C( 36), INT8_C( 0), INT8_C( 8), INT8_C( 41) } }, { { -INT8_C( 11), -INT8_C( 63), INT8_C( 80), INT8_C( 110), -INT8_C( 39), -INT8_C( 42), INT8_C( 26), -INT8_C( 2), -INT8_C( 99), -INT8_C( 70), -INT8_C( 95), INT8_C( 81), INT8_C( 100), INT8_C( 11), -INT8_C( 5), -INT8_C( 54) }, { -INT8_C( 58), -INT8_C( 1), INT8_C( 57), -INT8_C( 49), -INT8_C( 113), -INT8_C( 43), -INT8_C( 74), -INT8_C( 97), -INT8_C( 109), INT8_C( 80), -INT8_C( 81), INT8_C( 100), INT8_C( 60), INT8_C( 21), -INT8_C( 23), INT8_C( 49) }, { INT8_C( 49), INT8_C( 0), INT8_C( 64), INT8_C( 32), INT8_C( 80), INT8_C( 2), INT8_C( 8), INT8_C( 96), INT8_C( 12), -INT8_C( 86), INT8_C( 0), INT8_C( 17), INT8_C( 64), INT8_C( 10), INT8_C( 18), -INT8_C( 54) } }, { { -INT8_C( 42), INT8_C( 57), -INT8_C( 97), -INT8_C( 81), INT8_C( 15), -INT8_C( 70), -INT8_C( 83), -INT8_C( 84), INT8_C( 116), INT8_C( 79), -INT8_C( 2), -INT8_C( 40), INT8_C( 90), -INT8_C( 7), -INT8_C( 94), INT8_C( 33) }, { -INT8_C( 8), -INT8_C( 37), -INT8_C( 16), -INT8_C( 121), -INT8_C( 79), -INT8_C( 90), INT8_C( 39), INT8_C( 68), -INT8_C( 9), -INT8_C( 42), -INT8_C( 88), INT8_C( 51), -INT8_C( 21), -INT8_C( 111), INT8_C( 100), -INT8_C( 63) }, { INT8_C( 6), INT8_C( 32), INT8_C( 15), INT8_C( 40), INT8_C( 14), INT8_C( 24), -INT8_C( 120), -INT8_C( 88), INT8_C( 0), INT8_C( 9), INT8_C( 86), -INT8_C( 56), INT8_C( 16), INT8_C( 104), -INT8_C( 126), INT8_C( 32) } }, { { -INT8_C( 54), INT8_C( 4), INT8_C( 112), -INT8_C( 39), -INT8_C( 66), INT8_C( 29), -INT8_C( 122), INT8_C( 50), INT8_C( 108), -INT8_C( 124), INT8_C( 10), -INT8_C( 57), INT8_C( 125), -INT8_C( 83), -INT8_C( 24), INT8_C( 117) }, { -INT8_C( 120), -INT8_C( 40), -INT8_C( 3), INT8_C( 57), INT8_MAX, INT8_C( 36), INT8_C( 125), INT8_C( 118), -INT8_C( 6), INT8_C( 38), -INT8_C( 87), -INT8_C( 27), -INT8_C( 73), INT8_C( 13), -INT8_C( 90), -INT8_C( 126) }, { INT8_C( 66), INT8_C( 4), INT8_C( 0), -INT8_C( 64), INT8_MIN, INT8_C( 25), -INT8_C( 126), INT8_C( 0), INT8_C( 4), INT8_MIN, INT8_C( 2), INT8_C( 2), INT8_C( 72), -INT8_C( 96), INT8_C( 72), INT8_C( 117) } }, { { INT8_C( 17), INT8_C( 22), INT8_C( 91), -INT8_C( 49), INT8_C( 51), -INT8_C( 31), INT8_C( 1), -INT8_C( 96), INT8_C( 101), INT8_C( 12), INT8_C( 103), -INT8_C( 30), -INT8_C( 71), INT8_C( 79), INT8_C( 88), INT8_C( 65) }, { INT8_C( 39), INT8_C( 85), INT8_C( 123), -INT8_C( 90), INT8_C( 121), -INT8_C( 8), INT8_C( 28), INT8_C( 115), INT8_C( 30), -INT8_C( 59), INT8_C( 88), -INT8_C( 42), -INT8_C( 45), -INT8_C( 2), INT8_C( 88), -INT8_C( 28) }, { INT8_C( 16), INT8_C( 2), INT8_C( 0), INT8_C( 73), INT8_C( 2), INT8_C( 1), INT8_C( 1), INT8_MIN, INT8_C( 97), INT8_C( 8), INT8_C( 39), INT8_C( 32), INT8_C( 40), INT8_C( 1), INT8_C( 0), INT8_C( 1) } }, { { INT8_C( 20), -INT8_C( 77), -INT8_C( 76), INT8_C( 71), -INT8_C( 107), -INT8_C( 75), -INT8_C( 25), -INT8_C( 6), -INT8_C( 63), INT8_C( 78), -INT8_C( 35), INT8_C( 122), -INT8_C( 99), INT8_C( 53), -INT8_C( 68), -INT8_C( 59) }, { -INT8_C( 118), INT8_C( 55), INT8_C( 107), INT8_C( 3), INT8_C( 47), -INT8_C( 120), INT8_C( 118), INT8_C( 78), INT8_C( 77), -INT8_C( 50), INT8_C( 36), INT8_C( 32), -INT8_C( 52), INT8_C( 124), INT8_C( 5), -INT8_C( 32) }, { INT8_C( 20), INT8_MIN, -INT8_C( 108), INT8_C( 68), -INT8_C( 112), INT8_C( 53), -INT8_C( 127), -INT8_C( 80), INT8_MIN, INT8_C( 0), -INT8_C( 39), INT8_C( 90), INT8_C( 17), INT8_C( 1), -INT8_C( 72), INT8_C( 5) } }, { { INT8_C( 47), -INT8_C( 71), INT8_C( 39), -INT8_C( 60), INT8_C( 110), INT8_C( 15), -INT8_C( 65), INT8_C( 48), INT8_C( 93), -INT8_C( 100), -INT8_C( 86), -INT8_C( 5), -INT8_C( 47), INT8_C( 102), -INT8_C( 64), INT8_C( 91) }, { -INT8_C( 99), INT8_C( 43), INT8_C( 94), -INT8_C( 51), -INT8_C( 77), -INT8_C( 44), INT8_C( 27), INT8_C( 1), -INT8_C( 94), INT8_C( 63), INT8_C( 33), INT8_C( 110), -INT8_C( 69), INT8_C( 38), INT8_C( 78), -INT8_C( 22) }, { INT8_C( 34), -INT8_C( 112), INT8_C( 33), INT8_C( 0), INT8_C( 76), INT8_C( 11), -INT8_C( 92), INT8_C( 48), INT8_C( 93), INT8_MIN, -INT8_C( 118), -INT8_C( 111), INT8_C( 64), INT8_C( 64), INT8_MIN, INT8_C( 17) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r = simde_vbicq_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); simde_int8x16_t r = simde_vbicq_s8(a, b); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vbicq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { INT16_C( 30175), INT16_C( 20143), INT16_C( 28292), -INT16_C( 7554), INT16_C( 10250), -INT16_C( 9251), -INT16_C( 25201), INT16_C( 11318) }, { -INT16_C( 27448), INT16_C( 31993), INT16_C( 5224), INT16_C( 2685), -INT16_C( 25005), INT16_C( 3704), -INT16_C( 14651), -INT16_C( 23303) }, { INT16_C( 24855), INT16_C( 518), INT16_C( 27268), -INT16_C( 8190), INT16_C( 8200), -INT16_C( 11899), INT16_C( 6410), INT16_C( 2054) } }, { { -INT16_C( 22469), -INT16_C( 16142), INT16_C( 28694), INT16_C( 8354), INT16_C( 32665), INT16_C( 10491), INT16_C( 12572), -INT16_C( 7084) }, { INT16_C( 20165), INT16_C( 11616), -INT16_C( 8862), -INT16_C( 18889), -INT16_C( 20612), INT16_C( 16836), -INT16_C( 17035), -INT16_C( 20251) }, { -INT16_C( 24518), -INT16_C( 16238), INT16_C( 8212), INT16_C( 128), INT16_C( 20609), INT16_C( 10299), INT16_C( 8), INT16_C( 17424) } }, { { -INT16_C( 10139), INT16_C( 31600), INT16_C( 4680), -INT16_C( 7781), -INT16_C( 26991), -INT16_C( 21239), INT16_C( 24263), -INT16_C( 29550) }, { -INT16_C( 3412), INT16_C( 3769), -INT16_C( 3888), INT16_C( 19652), -INT16_C( 30305), INT16_C( 5261), INT16_C( 29254), -INT16_C( 21307) }, { INT16_C( 2113), INT16_C( 28992), INT16_C( 520), -INT16_C( 24293), INT16_C( 5632), -INT16_C( 22272), INT16_C( 3201), INT16_C( 18) } }, { { INT16_C( 13642), -INT16_C( 27865), -INT16_C( 15544), -INT16_C( 9868), INT16_C( 32345), INT16_C( 8583), INT16_C( 6620), -INT16_C( 30547) }, { INT16_C( 26379), -INT16_C( 9322), INT16_C( 23383), -INT16_C( 2265), -INT16_C( 19228), INT16_C( 10763), -INT16_C( 12249), INT16_C( 29142) }, { INT16_C( 4160), INT16_C( 33), -INT16_C( 32760), INT16_C( 2128), INT16_C( 18969), INT16_C( 388), INT16_C( 2520), -INT16_C( 30679) } }, { { -INT16_C( 506), INT16_C( 19972), INT16_C( 31169), INT16_C( 6695), -INT16_C( 20745), -INT16_C( 11461), -INT16_C( 5689), -INT16_C( 11429) }, { -INT16_C( 3760), -INT16_C( 22610), -INT16_C( 10676), INT16_C( 12446), -INT16_C( 21878), -INT16_C( 20133), INT16_C( 12666), -INT16_C( 32733) }, { INT16_C( 3590), INT16_C( 18432), INT16_C( 10625), INT16_C( 2593), INT16_C( 1141), INT16_C( 16928), -INT16_C( 14203), INT16_C( 21336) } }, { { INT16_C( 10031), -INT16_C( 3890), -INT16_C( 2400), -INT16_C( 26869), INT16_C( 18084), INT16_C( 27754), -INT16_C( 15057), INT16_C( 32575) }, { -INT16_C( 4681), INT16_C( 807), -INT16_C( 14909), INT16_C( 20020), -INT16_C( 28817), -INT16_C( 5377), INT16_C( 8896), -INT16_C( 3990) }, { INT16_C( 520), -INT16_C( 3896), INT16_C( 12832), -INT16_C( 28405), INT16_C( 16512), INT16_C( 1024), -INT16_C( 15057), INT16_C( 3861) } }, { { INT16_C( 14666), -INT16_C( 5408), -INT16_C( 5329), -INT16_C( 11390), -INT16_C( 5070), INT16_C( 24895), INT16_C( 32434), INT16_C( 27105) }, { INT16_C( 2156), INT16_C( 12140), -INT16_C( 24371), INT16_C( 15741), INT16_C( 32047), -INT16_C( 4057), -INT16_C( 28257), -INT16_C( 5664) }, { INT16_C( 12546), -INT16_C( 16256), INT16_C( 19234), -INT16_C( 15742), -INT16_C( 32752), INT16_C( 280), INT16_C( 28192), INT16_C( 1) } }, { { -INT16_C( 16182), -INT16_C( 1580), INT16_C( 22188), -INT16_C( 8499), INT16_C( 3138), -INT16_C( 3009), INT16_C( 8331), -INT16_C( 2211) }, { -INT16_C( 13784), -INT16_C( 2522), -INT16_C( 23446), -INT16_C( 26061), INT16_C( 23073), -INT16_C( 16246), INT16_C( 27371), -INT16_C( 18774) }, { INT16_C( 194), INT16_C( 2512), INT16_C( 21124), INT16_C( 17612), INT16_C( 1090), INT16_C( 13365), INT16_C( 0), INT16_C( 16725) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_vbicq_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); simde_int16x8_t r = simde_vbicq_s16(a, b); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vbicq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { -INT32_C( 693141974), INT32_C( 380927188), INT32_C( 336327817), INT32_C( 1024157716) }, { -INT32_C( 1657589454), -INT32_C( 164141355), -INT32_C( 1414020672), INT32_C( 1432445227) }, { INT32_C( 1116491272), INT32_C( 8394752), INT32_C( 336081929), INT32_C( 671746068) } }, { { -INT32_C( 1288957473), INT32_C( 382328973), -INT32_C( 383068972), INT32_C( 1864774973) }, { INT32_C( 1007442279), INT32_C( 2134066111), INT32_C( 791341572), INT32_C( 713395275) }, { -INT32_C( 2095054696), INT32_C( 13148160), -INT32_C( 1073736496), INT32_C( 1159868724) } }, { { INT32_C( 719171997), INT32_C( 1715578513), -INT32_C( 1202754693), INT32_C( 136803745) }, { -INT32_C( 1924909874), INT32_C( 2081191799), -INT32_C( 1398065567), INT32_C( 1607872706) }, { INT32_C( 580485393), INT32_C( 37847168), INT32_C( 272910618), INT32_C( 2180385) } }, { { INT32_C( 1938469857), -INT32_C( 707146918), -INT32_C( 678614986), INT32_C( 1809822877) }, { INT32_C( 1626875112), -INT32_C( 35912549), -INT32_C( 39221446), -INT32_C( 1721990984) }, { INT32_C( 318935809), INT32_C( 117568), INT32_C( 33826820), INT32_C( 1652765701) } }, { { -INT32_C( 1928534477), -INT32_C( 396171855), -INT32_C( 1430262003), -INT32_C( 1944739932) }, { INT32_C( 1592528323), INT32_C( 1281083409), INT32_C( 139003216), -INT32_C( 1197365883) }, { -INT32_C( 2130648528), -INT32_C( 1608505952), -INT32_C( 1565070835), INT32_C( 68426272) } }, { { INT32_C( 1028042380), -INT32_C( 1591367533), INT32_C( 1011606936), INT32_C( 1204314500) }, { -INT32_C( 2136623762), -INT32_C( 842268291), -INT32_C( 1948969722), INT32_C( 1178826682) }, { INT32_C( 1027738240), INT32_C( 539076738), INT32_C( 873128088), INT32_C( 25690116) } }, { { -INT32_C( 1199273691), -INT32_C( 900093646), INT32_C( 302425486), INT32_C( 1952042502) }, { INT32_C( 32833411), INT32_C( 114212864), -INT32_C( 1869503275), INT32_C( 1087820827) }, { -INT32_C( 1207959516), -INT32_C( 938399438), INT32_C( 33947914), INT32_C( 873007620) } }, { { -INT32_C( 1862772130), -INT32_C( 1839574524), -INT32_C( 39493385), -INT32_C( 1301152210) }, { -INT32_C( 38574339), -INT32_C( 66879194), INT32_C( 1082954789), -INT32_C( 947887511) }, { INT32_C( 4724738), INT32_C( 39342592), -INT32_C( 1121885998), INT32_C( 812751878) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_vbicq_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); simde_int32x4_t r = simde_vbicq_s32(a, b); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vbicq_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; int64_t b[2]; int64_t r[2]; } test_vec[] = { { { -INT64_C( 4444012918604334659), -INT64_C( 796801820583266287) }, { INT64_C( 5456661970280424598), INT64_C( 7258127664946103690) }, { -INT64_C( 9204793824029353687), -INT64_C( 8052101538401503215) } }, { { -INT64_C( 3237391306650742395), -INT64_C( 7467808198727904911) }, { -INT64_C( 3595545663819657304), INT64_C( 4062101397569644115) }, { INT64_C( 1225103902074998789), -INT64_C( 9223367638807064288) } }, { { INT64_C( 8007853128983020940), -INT64_C( 2303814167882615700) }, { -INT64_C( 4713077020254315388), INT64_C( 6942608699457886319) }, { INT64_C( 4692771238685212936), -INT64_C( 9221643431464393728) } }, { { -INT64_C( 1653999200210263394), -INT64_C( 7197492071771872508) }, { INT64_C( 352965863342984379), INT64_C( 6163597989662531156) }, { -INT64_C( 1654509463869447676), -INT64_C( 8641281750316064512) } }, { { INT64_C( 993040457400399969), INT64_C( 4221392921263939552) }, { INT64_C( 5461535899326151218), -INT64_C( 3318593889820741148) }, { INT64_C( 289558620969959489), INT64_C( 3027547110263754240) } }, { { -INT64_C( 4590726168966077900), INT64_C( 3192576021848757142) }, { -INT64_C( 2843249158965121207), INT64_C( 3454654514130648752) }, { INT64_C( 18084814499037236), INT64_C( 3951645134749958) } }, { { -INT64_C( 5915559374965923769), INT64_C( 3994711485002653769) }, { -INT64_C( 3131126388082907969), -INT64_C( 6041044099464429313) }, { INT64_C( 2982427575332667456), INT64_C( 1391630469765898240) } }, { { -INT64_C( 7761186173886425722), -INT64_C( 2630022896049189798) }, { -INT64_C( 530550670868154452), INT64_C( 300645505253414938) }, { INT64_C( 308672505199001602), -INT64_C( 2630031813639666624) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_int64x2_t r = simde_vbicq_s64(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); simde_int64x2_t b = simde_test_arm_neon_random_i64x2(); simde_int64x2_t r = simde_vbicq_s64(a, b); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vbicq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C( 6), UINT8_C(195), UINT8_C(160), UINT8_C(106), UINT8_C( 80), UINT8_C(102), UINT8_C( 27), UINT8_C( 38), UINT8_C(175), UINT8_C(183), UINT8_C(232), UINT8_C(244), UINT8_C(131), UINT8_C(155), UINT8_C(128), UINT8_C( 86) }, { UINT8_C( 16), UINT8_C(100), UINT8_C(232), UINT8_C(249), UINT8_C(152), UINT8_C( 38), UINT8_C( 57), UINT8_C( 75), UINT8_C(125), UINT8_C(193), UINT8_C(180), UINT8_C(146), UINT8_C(196), UINT8_C(103), UINT8_C( 2), UINT8_C(202) }, { UINT8_C( 6), UINT8_C(131), UINT8_C( 0), UINT8_C( 2), UINT8_C( 64), UINT8_C( 64), UINT8_C( 2), UINT8_C( 36), UINT8_C(130), UINT8_C( 54), UINT8_C( 72), UINT8_C(100), UINT8_C( 3), UINT8_C(152), UINT8_C(128), UINT8_C( 20) } }, { { UINT8_C( 42), UINT8_C(163), UINT8_C( 52), UINT8_C(122), UINT8_C( 9), UINT8_C( 80), UINT8_C(160), UINT8_C(184), UINT8_C( 7), UINT8_C(136), UINT8_C(173), UINT8_C(138), UINT8_C( 36), UINT8_C( 45), UINT8_C(225), UINT8_C( 52) }, { UINT8_C(145), UINT8_C(201), UINT8_C( 45), UINT8_C( 42), UINT8_C(239), UINT8_C(102), UINT8_C(117), UINT8_C(109), UINT8_C( 40), UINT8_C( 41), UINT8_MAX, UINT8_C(236), UINT8_C(144), UINT8_C( 2), UINT8_C(182), UINT8_C(186) }, { UINT8_C( 42), UINT8_C( 34), UINT8_C( 16), UINT8_C( 80), UINT8_C( 0), UINT8_C( 16), UINT8_C(128), UINT8_C(144), UINT8_C( 7), UINT8_C(128), UINT8_C( 0), UINT8_C( 2), UINT8_C( 36), UINT8_C( 45), UINT8_C( 65), UINT8_C( 4) } }, { { UINT8_C(165), UINT8_C(234), UINT8_C( 53), UINT8_C(174), UINT8_C( 58), UINT8_C(213), UINT8_C(103), UINT8_C( 65), UINT8_C( 94), UINT8_C( 20), UINT8_C(204), UINT8_C(130), UINT8_C( 65), UINT8_C(173), UINT8_C(182), UINT8_C(211) }, { UINT8_C(118), UINT8_C(227), UINT8_C(253), UINT8_C(102), UINT8_C( 74), UINT8_C(114), UINT8_C(211), UINT8_C(114), UINT8_C(155), UINT8_C(210), UINT8_C( 94), UINT8_C( 44), UINT8_C(212), UINT8_C( 20), UINT8_C(230), UINT8_C(121) }, { UINT8_C(129), UINT8_C( 8), UINT8_C( 0), UINT8_C(136), UINT8_C( 48), UINT8_C(133), UINT8_C( 36), UINT8_C( 1), UINT8_C( 68), UINT8_C( 4), UINT8_C(128), UINT8_C(130), UINT8_C( 1), UINT8_C(169), UINT8_C( 16), UINT8_C(130) } }, { { UINT8_C(254), UINT8_C( 27), UINT8_C( 40), UINT8_C( 57), UINT8_C(241), UINT8_C(143), UINT8_C(122), UINT8_C( 79), UINT8_C(163), UINT8_C( 70), UINT8_C(209), UINT8_C(228), UINT8_C(243), UINT8_C(135), UINT8_C(183), UINT8_C(106) }, { UINT8_C(106), UINT8_C(180), UINT8_C(208), UINT8_C(180), UINT8_C( 38), UINT8_C(163), UINT8_C( 38), UINT8_C(194), UINT8_C(117), UINT8_C(132), UINT8_C(238), UINT8_C( 74), UINT8_C(152), UINT8_C(212), UINT8_C(195), UINT8_C(151) }, { UINT8_C(148), UINT8_C( 11), UINT8_C( 40), UINT8_C( 9), UINT8_C(209), UINT8_C( 12), UINT8_C( 88), UINT8_C( 13), UINT8_C(130), UINT8_C( 66), UINT8_C( 17), UINT8_C(164), UINT8_C( 99), UINT8_C( 3), UINT8_C( 52), UINT8_C(104) } }, { { UINT8_C(240), UINT8_C(235), UINT8_C(208), UINT8_C(225), UINT8_C(122), UINT8_C( 74), UINT8_C( 48), UINT8_C( 29), UINT8_C(145), UINT8_C( 1), UINT8_C( 2), UINT8_C(132), UINT8_C(136), UINT8_C(185), UINT8_C(238), UINT8_C(242) }, { UINT8_C(110), UINT8_C(190), UINT8_C(167), UINT8_C(148), UINT8_C( 97), UINT8_C(205), UINT8_C( 86), UINT8_C(215), UINT8_C( 82), UINT8_C( 68), UINT8_C( 33), UINT8_C(234), UINT8_C( 25), UINT8_C(228), UINT8_C(129), UINT8_C( 9) }, { UINT8_C(144), UINT8_C( 65), UINT8_C( 80), UINT8_C( 97), UINT8_C( 26), UINT8_C( 2), UINT8_C( 32), UINT8_C( 8), UINT8_C(129), UINT8_C( 1), UINT8_C( 2), UINT8_C( 4), UINT8_C(128), UINT8_C( 25), UINT8_C(110), UINT8_C(242) } }, { { UINT8_C(208), UINT8_C( 81), UINT8_C(234), UINT8_C( 74), UINT8_C(156), UINT8_C( 26), UINT8_C(104), UINT8_C( 45), UINT8_C( 27), UINT8_C(106), UINT8_C(177), UINT8_C(163), UINT8_C( 35), UINT8_C(160), UINT8_C(149), UINT8_C(145) }, { UINT8_C( 94), UINT8_C( 60), UINT8_C( 38), UINT8_C(192), UINT8_C( 10), UINT8_C(124), UINT8_C(151), UINT8_C( 92), UINT8_C(193), UINT8_C(184), UINT8_C( 70), UINT8_C(218), UINT8_C(156), UINT8_C(200), UINT8_C(227), UINT8_C(108) }, { UINT8_C(128), UINT8_C( 65), UINT8_C(200), UINT8_C( 10), UINT8_C(148), UINT8_C( 2), UINT8_C(104), UINT8_C( 33), UINT8_C( 26), UINT8_C( 66), UINT8_C(177), UINT8_C( 33), UINT8_C( 35), UINT8_C( 32), UINT8_C( 20), UINT8_C(145) } }, { { UINT8_C( 25), UINT8_C(205), UINT8_C(183), UINT8_C(181), UINT8_C(231), UINT8_C( 31), UINT8_C(226), UINT8_C( 2), UINT8_C(137), UINT8_C(148), UINT8_C(165), UINT8_C(172), UINT8_C( 52), UINT8_C( 58), UINT8_C( 62), UINT8_C(146) }, { UINT8_C(119), UINT8_C(100), UINT8_C( 82), UINT8_C(129), UINT8_C(224), UINT8_C(233), UINT8_C(221), UINT8_C(161), UINT8_C(161), UINT8_C( 35), UINT8_C(123), UINT8_C( 62), UINT8_C(235), UINT8_C( 94), UINT8_C(170), UINT8_C( 5) }, { UINT8_C( 8), UINT8_C(137), UINT8_C(165), UINT8_C( 52), UINT8_C( 7), UINT8_C( 22), UINT8_C( 34), UINT8_C( 2), UINT8_C( 8), UINT8_C(148), UINT8_C(132), UINT8_C(128), UINT8_C( 20), UINT8_C( 32), UINT8_C( 20), UINT8_C(146) } }, { { UINT8_C( 43), UINT8_C( 97), UINT8_C(186), UINT8_C( 18), UINT8_C(128), UINT8_C(157), UINT8_C( 20), UINT8_C( 9), UINT8_C( 49), UINT8_C(185), UINT8_C(182), UINT8_C(101), UINT8_C(244), UINT8_C(244), UINT8_C(247), UINT8_C(107) }, { UINT8_C( 88), UINT8_C( 74), UINT8_C(236), UINT8_C( 56), UINT8_C( 51), UINT8_C(201), UINT8_C(218), UINT8_C(213), UINT8_C(236), UINT8_C( 85), UINT8_C( 19), UINT8_C(216), UINT8_C(180), UINT8_C(189), UINT8_C(221), UINT8_C(223) }, { UINT8_C( 35), UINT8_C( 33), UINT8_C( 18), UINT8_C( 2), UINT8_C(128), UINT8_C( 20), UINT8_C( 4), UINT8_C( 8), UINT8_C( 17), UINT8_C(168), UINT8_C(164), UINT8_C( 37), UINT8_C( 64), UINT8_C( 64), UINT8_C( 34), UINT8_C( 32) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vbicq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t r = simde_vbicq_u8(a, b); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vbicq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(38687), UINT16_C(40946), UINT16_C( 1588), UINT16_C(26025), UINT16_C(24512), UINT16_C(46282), UINT16_C(49747), UINT16_C(43807) }, { UINT16_C( 2828), UINT16_C(16355), UINT16_C(48596), UINT16_C(49172), UINT16_C(10003), UINT16_C(51096), UINT16_C(30181), UINT16_C( 1190) }, { UINT16_C(37907), UINT16_C(32784), UINT16_C( 544), UINT16_C( 9641), UINT16_C(22720), UINT16_C(12354), UINT16_C(33298), UINT16_C(43801) } }, { { UINT16_C(38925), UINT16_C(16803), UINT16_C(19615), UINT16_C(24487), UINT16_C(29099), UINT16_C(65043), UINT16_C(12851), UINT16_C(16297) }, { UINT16_C(36157), UINT16_C( 4479), UINT16_C(37706), UINT16_C(24017), UINT16_C(27323), UINT16_C(40996), UINT16_C(52191), UINT16_C(60580) }, { UINT16_C( 4096), UINT16_C(16512), UINT16_C(19605), UINT16_C( 550), UINT16_C( 4352), UINT16_C(24083), UINT16_C(12320), UINT16_C( 4873) } }, { { UINT16_C(18275), UINT16_C( 558), UINT16_C(54676), UINT16_C(16225), UINT16_C(29766), UINT16_C(31294), UINT16_C(59302), UINT16_C(58297) }, { UINT16_C(14452), UINT16_C(49140), UINT16_C(50892), UINT16_C(34588), UINT16_C(16688), UINT16_C( 3879), UINT16_C(51980), UINT16_C(28668) }, { UINT16_C(18179), UINT16_C( 10), UINT16_C( 4368), UINT16_C(14433), UINT16_C(13382), UINT16_C(28696), UINT16_C( 9378), UINT16_C(32769) } }, { { UINT16_C(10770), UINT16_C(42610), UINT16_C(54271), UINT16_C(17894), UINT16_C( 9288), UINT16_C(61119), UINT16_C(30987), UINT16_C(32978) }, { UINT16_C(50865), UINT16_C(32063), UINT16_C(23436), UINT16_C(48132), UINT16_C(11164), UINT16_C(43212), UINT16_C(51446), UINT16_C( 2328) }, { UINT16_C(10242), UINT16_C(33344), UINT16_C(32883), UINT16_C(16866), UINT16_C( 1088), UINT16_C(17971), UINT16_C(12553), UINT16_C(32962) } }, { { UINT16_C(35570), UINT16_C(61871), UINT16_C(38237), UINT16_C(42294), UINT16_C(63161), UINT16_C(50580), UINT16_C(26223), UINT16_C( 8261) }, { UINT16_C(33836), UINT16_C(47518), UINT16_C(41695), UINT16_C(31861), UINT16_C(16846), UINT16_C(50212), UINT16_C(15369), UINT16_C(64461) }, { UINT16_C( 2770), UINT16_C(16417), UINT16_C( 5376), UINT16_C(33026), UINT16_C(46641), UINT16_C( 400), UINT16_C(16998), UINT16_C( 0) } }, { { UINT16_C(32198), UINT16_C( 9452), UINT16_C( 8978), UINT16_C(52425), UINT16_C(23833), UINT16_C(34961), UINT16_C(54979), UINT16_C(61608) }, { UINT16_C(18010), UINT16_C(14761), UINT16_C( 7913), UINT16_C(47029), UINT16_C(55904), UINT16_C(27003), UINT16_C(18710), UINT16_C(56677) }, { UINT16_C(14724), UINT16_C( 1092), UINT16_C( 8466), UINT16_C(18504), UINT16_C( 1305), UINT16_C(32896), UINT16_C(38593), UINT16_C( 8328) } }, { { UINT16_C(20934), UINT16_C(55297), UINT16_C(51828), UINT16_C(36260), UINT16_C(13608), UINT16_C(60181), UINT16_C(48651), UINT16_C(26075) }, { UINT16_C(33796), UINT16_C(60831), UINT16_C(21667), UINT16_C( 932), UINT16_C( 8238), UINT16_C(17772), UINT16_C(53609), UINT16_C(12066) }, { UINT16_C(20930), UINT16_C( 4096), UINT16_C(35412), UINT16_C(35840), UINT16_C( 5376), UINT16_C(43537), UINT16_C(11778), UINT16_C(16601) } }, { { UINT16_C( 8995), UINT16_C(38663), UINT16_C(44269), UINT16_C( 5413), UINT16_C(15073), UINT16_C(60673), UINT16_C(56568), UINT16_C(64850) }, { UINT16_C(61793), UINT16_C( 1258), UINT16_C(36678), UINT16_C(29703), UINT16_C(29615), UINT16_C( 6329), UINT16_C(56133), UINT16_C(26695) }, { UINT16_C( 514), UINT16_C(37637), UINT16_C( 8361), UINT16_C( 288), UINT16_C( 2112), UINT16_C(58624), UINT16_C( 1208), UINT16_C(38160) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vbicq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t r = simde_vbicq_u16(a, b); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vbicq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(3976154878), UINT32_C(3691062522), UINT32_C(1472791135), UINT32_C(1079253983) }, { UINT32_C(1396981517), UINT32_C(2110213070), UINT32_C( 60129726), UINT32_C(1533795420) }, { UINT32_C(2897953010), UINT32_C(2147492912), UINT32_C(1414005313), UINT32_C( 1311619) } }, { { UINT32_C( 625437482), UINT32_C(3993061519), UINT32_C( 709282379), UINT32_C(4067072741) }, { UINT32_C(2806361817), UINT32_C(3072593401), UINT32_C(3938171278), UINT32_C(3225757333) }, { UINT32_C( 147746), UINT32_C(1208041478), UINT32_C( 4473409), UINT32_C( 841652320) } }, { { UINT32_C( 568691857), UINT32_C( 537913045), UINT32_C(2504676784), UINT32_C(3381179632) }, { UINT32_C(1534184802), UINT32_C(1746048474), UINT32_C(3830697295), UINT32_C(2242156788) }, { UINT32_C( 545521809), UINT32_C( 877061), UINT32_C( 285741232), UINT32_C(1208493056) } }, { { UINT32_C(4205218085), UINT32_C( 521844335), UINT32_C(4222968843), UINT32_C(2059746584) }, { UINT32_C(3855955466), UINT32_C( 441313227), UINT32_C(2852102325), UINT32_C(1580114745) }, { UINT32_C( 438470949), UINT32_C( 85069860), UINT32_C(1375749130), UINT32_C( 549526528) } }, { { UINT32_C(2623067437), UINT32_C(2528932491), UINT32_C(4002574806), UINT32_C(3093845934) }, { UINT32_C(1486699917), UINT32_C(3648252708), UINT32_C(3296883339), UINT32_C(1109569813) }, { UINT32_C(2218836000), UINT32_C( 109842571), UINT32_C( 705691988), UINT32_C(3091744426) } }, { { UINT32_C( 299858566), UINT32_C(3265764332), UINT32_C(3132111116), UINT32_C( 494016656) }, { UINT32_C(2054557781), UINT32_C(2253646331), UINT32_C(1884018267), UINT32_C( 229862791) }, { UINT32_C( 25782914), UINT32_C(1084494340), UINT32_C(2326800644), UINT32_C( 272633872) } }, { { UINT32_C(3558773480), UINT32_C( 966247725), UINT32_C(2415085567), UINT32_C(3048039776) }, { UINT32_C(1882137461), UINT32_C(1744274188), UINT32_C(3772203609), UINT32_C(2548927151) }, { UINT32_C(2215678088), UINT32_C( 402670625), UINT32_C( 253756838), UINT32_C( 536896832) } }, { { UINT32_C(1231817500), UINT32_C(3481404368), UINT32_C(2858382666), UINT32_C(1348472027) }, { UINT32_C(1002540847), UINT32_C(1805826066), UINT32_C(2840295930), UINT32_C( 541145092) }, { UINT32_C(1076625424), UINT32_C(2214593472), UINT32_C( 34866176), UINT32_C(1344275675) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vbicq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t r = simde_vbicq_u32(a, b); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vbicq_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[2]; uint64_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C(18078553570196237635), UINT64_C( 9046893060057023074) }, { UINT64_C( 5165534210969236), UINT64_C(15778620327605198548) }, { UINT64_C(18077911455335883075), UINT64_C( 2667543062192128034) } }, { { UINT64_C(12773001631632351896), UINT64_C( 9358616280878212884) }, { UINT64_C(14398354302523971786), UINT64_C(18305328625565531032) }, { UINT64_C( 3459539721204949520), UINT64_C( 135222923226022916) } }, { { UINT64_C(15923189826326034426), UINT64_C( 3304832213169727600) }, { UINT64_C(15596474861214369207), UINT64_C(11722325196785782858) }, { UINT64_C( 327074508143915592), UINT64_C( 959564977322649648) } }, { { UINT64_C(17054258213189055495), UINT64_C(14005673716370500557) }, { UINT64_C( 8450123078937893932), UINT64_C( 7221754397098069230) }, { UINT64_C( 9847157232331104259), UINT64_C( 9387230064850543361) } }, { { UINT64_C(15362024655052612551), UINT64_C(14670586423812717004) }, { UINT64_C(15087196070036833728), UINT64_C( 2272242312436811208) }, { UINT64_C( 292874747627184647), UINT64_C(13839637005044447236) } }, { { UINT64_C( 7611807526646103236), UINT64_C(14733200779913439243) }, { UINT64_C(17478777101190934237), UINT64_C( 6778937351991654812) }, { UINT64_C( 658229873193521152), UINT64_C( 9251589920637124611) } }, { { UINT64_C( 1396621698356901211), UINT64_C( 6218403981459104116) }, { UINT64_C(13186441964294636515), UINT64_C( 1196195757836342689) }, { UINT64_C( 72136762904310808), UINT64_C( 5063172196752728148) } }, { { UINT64_C( 310302592033407795), UINT64_C( 2727722607947615499) }, { UINT64_C( 4082004640880849109), UINT64_C( 4686853095696846918) }, { UINT64_C( 308577938604294946), UINT64_C( 2652850253405097225) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_uint64x2_t r = simde_vbicq_u64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); simde_uint64x2_t b = simde_test_arm_neon_random_u64x2(); simde_uint64x2_t r = simde_vbicq_u64(a, b); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vbic_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vbic_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vbic_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vbic_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vbic_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vbic_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vbic_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vbic_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vbicq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vbicq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vbicq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vbicq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vbicq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vbicq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vbicq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vbicq_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/bsl.c000066400000000000000000002274731400333146700162620ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN bsl #include "test-neon.h" #include "../../../simde/arm/neon/bsl.h" static int test_simde_vbsl_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; simde_float32 b[2]; simde_float32 c[2]; simde_float32 r[2]; } test_vec[] = { { { UINT32_C(2674418426), UINT32_C(3777353220) }, { SIMDE_FLOAT32_C( 405.65), SIMDE_FLOAT32_C( 927.47) }, { SIMDE_FLOAT32_C( 560.08), SIMDE_FLOAT32_C( 314.53) }, { SIMDE_FLOAT32_C( 204.28), SIMDE_FLOAT32_C( 94.92) } }, { { UINT32_C(2013377052), UINT32_C(3455611233) }, { SIMDE_FLOAT32_C( -536.59), SIMDE_FLOAT32_C( 460.49) }, { SIMDE_FLOAT32_C( 342.29), SIMDE_FLOAT32_C( 371.78) }, { SIMDE_FLOAT32_C( 340.29), SIMDE_FLOAT32_C( 451.40) } }, { { UINT32_C(3025790568), UINT32_C(2349671826) }, { SIMDE_FLOAT32_C( 788.79), SIMDE_FLOAT32_C( -926.62) }, { SIMDE_FLOAT32_C( -581.67), SIMDE_FLOAT32_C( 594.40) }, { SIMDE_FLOAT32_C( 772.66), SIMDE_FLOAT32_C( -598.96) } }, { { UINT32_C(3060552012), UINT32_C(4153264603) }, { SIMDE_FLOAT32_C( -460.00), SIMDE_FLOAT32_C( -480.26) }, { SIMDE_FLOAT32_C( -404.52), SIMDE_FLOAT32_C( 864.59) }, { SIMDE_FLOAT32_C( -460.02), SIMDE_FLOAT32_C( -416.29) } }, { { UINT32_C(1772709632), UINT32_C(1083407319) }, { SIMDE_FLOAT32_C( -513.91), SIMDE_FLOAT32_C( -140.44) }, { SIMDE_FLOAT32_C( -53.49), SIMDE_FLOAT32_C( -77.23) }, { SIMDE_FLOAT32_C( -53.24), SIMDE_FLOAT32_C( -34.11) } }, { { UINT32_C(2558525713), UINT32_C(3273960567) }, { SIMDE_FLOAT32_C( -353.14), SIMDE_FLOAT32_C( -890.91) }, { SIMDE_FLOAT32_C( 522.94), SIMDE_FLOAT32_C( 387.01) }, { SIMDE_FLOAT32_C( -1045.91), SIMDE_FLOAT32_C( -6.18) } }, { { UINT32_C(3587114586), UINT32_C(2494709396) }, { SIMDE_FLOAT32_C( -250.58), SIMDE_FLOAT32_C( 789.87) }, { SIMDE_FLOAT32_C( -999.31), SIMDE_FLOAT32_C( 725.63) }, { SIMDE_FLOAT32_C( -15.68), SIMDE_FLOAT32_C( 533.86) } }, { { UINT32_C( 951947479), UINT32_C(2368516476) }, { SIMDE_FLOAT32_C( -889.18), SIMDE_FLOAT32_C( 581.78) }, { SIMDE_FLOAT32_C( 860.56), SIMDE_FLOAT32_C( 106.82) }, { SIMDE_FLOAT32_C( 888.55), SIMDE_FLOAT32_C( 26777.97) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x2_t c = simde_vld1_f32(test_vec[i].c); simde_float32x2_t r = simde_vbsl_f32(a, b, c); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vbsl_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[1]; simde_float64 b[1]; simde_float64 c[1]; simde_float64 r[1]; } test_vec[] = { { { UINT64_C(11051150815757923623) }, { SIMDE_FLOAT64_C( -588.66) }, { SIMDE_FLOAT64_C( -849.95) }, { SIMDE_FLOAT64_C( -577.69) } }, { { UINT64_C( 3459855493406248467) }, { SIMDE_FLOAT64_C( -254.41) }, { SIMDE_FLOAT64_C( -342.68) }, { SIMDE_FLOAT64_C( -380.68) } }, { { UINT64_C( 8499033952931220346) }, { SIMDE_FLOAT64_C( 598.37) }, { SIMDE_FLOAT64_C( -678.68) }, { SIMDE_FLOAT64_C( -758.93) } }, { { UINT64_C( 4453775121523365047) }, { SIMDE_FLOAT64_C( -983.29) }, { SIMDE_FLOAT64_C( -133.31) }, { SIMDE_FLOAT64_C( -3932.65) } }, { { UINT64_C( 2741489458384578945) }, { SIMDE_FLOAT64_C( -672.98) }, { SIMDE_FLOAT64_C( -225.94) }, { SIMDE_FLOAT64_C( -168.19) } }, { { UINT64_C( 649913727035663599) }, { SIMDE_FLOAT64_C( 665.07) }, { SIMDE_FLOAT64_C( -836.34) }, { SIMDE_FLOAT64_C( -984.34) } }, { { UINT64_C( 3156572937688261107) }, { SIMDE_FLOAT64_C( -862.48) }, { SIMDE_FLOAT64_C( 875.24) }, { SIMDE_FLOAT64_C( 879.50) } }, { { UINT64_C( 6490021655321781721) }, { SIMDE_FLOAT64_C( -306.99) }, { SIMDE_FLOAT64_C( 894.38) }, { SIMDE_FLOAT64_C( 1784.82) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_float64x1_t b = simde_vld1_f64(test_vec[i].b); simde_float64x1_t c = simde_vld1_f64(test_vec[i].c); simde_float64x1_t r = simde_vbsl_f64(a, b, c); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; } static int test_simde_vbsl_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; int8_t b[8]; int8_t c[8]; int8_t r[8]; } test_vec[] = { { { UINT8_C(121), UINT8_C( 28), UINT8_C( 45), UINT8_C(151), UINT8_C(120), UINT8_C( 7), UINT8_C( 82), UINT8_C( 78) }, { INT8_C( 121), -INT8_C( 36), -INT8_C( 63), INT8_C( 101), INT8_C( 97), -INT8_C( 69), INT8_C( 84), INT8_C( 14) }, { -INT8_C( 55), INT8_C( 67), -INT8_C( 91), INT8_C( 27), -INT8_C( 125), -INT8_C( 19), -INT8_C( 21), -INT8_C( 34) }, { -INT8_C( 7), INT8_C( 95), -INT8_C( 127), INT8_C( 13), -INT8_C( 29), -INT8_C( 21), -INT8_C( 7), -INT8_C( 98) } }, { { UINT8_C(174), UINT8_C( 64), UINT8_C(203), UINT8_C( 94), UINT8_C(160), UINT8_C( 0), UINT8_C(116), UINT8_C( 25) }, { INT8_C( 28), -INT8_C( 95), -INT8_C( 79), -INT8_C( 108), -INT8_C( 88), INT8_C( 3), -INT8_C( 30), INT8_C( 34) }, { -INT8_C( 33), -INT8_C( 92), -INT8_C( 121), INT8_C( 64), INT8_C( 95), -INT8_C( 37), INT8_C( 79), INT8_C( 40) }, { INT8_C( 93), -INT8_C( 92), -INT8_C( 123), INT8_C( 20), -INT8_C( 1), -INT8_C( 37), INT8_C( 107), INT8_C( 32) } }, { { UINT8_C( 30), UINT8_C(244), UINT8_C( 67), UINT8_C(161), UINT8_C(226), UINT8_C( 47), UINT8_C(127), UINT8_C(144) }, { INT8_C( 111), INT8_C( 74), -INT8_C( 17), INT8_C( 15), INT8_C( 75), INT8_C( 99), INT8_C( 41), INT8_C( 103) }, { INT8_C( 5), -INT8_C( 38), -INT8_C( 4), -INT8_C( 83), -INT8_C( 35), -INT8_C( 34), -INT8_C( 49), -INT8_C( 68) }, { INT8_C( 15), INT8_C( 74), -INT8_C( 1), INT8_C( 13), INT8_C( 95), -INT8_C( 13), -INT8_C( 87), INT8_C( 44) } }, { { UINT8_C(130), UINT8_C( 86), UINT8_C(252), UINT8_C(225), UINT8_C( 49), UINT8_C( 75), UINT8_C( 10), UINT8_C( 79) }, { INT8_C( 64), INT8_C( 77), -INT8_C( 15), INT8_C( 34), INT8_C( 124), INT8_C( 112), -INT8_C( 78), -INT8_C( 20) }, { -INT8_C( 69), -INT8_C( 95), -INT8_C( 5), INT8_C( 6), INT8_C( 5), INT8_C( 36), INT8_C( 109), INT8_C( 10) }, { INT8_C( 57), -INT8_C( 27), -INT8_C( 13), INT8_C( 38), INT8_C( 52), INT8_C( 100), INT8_C( 103), INT8_C( 76) } }, { { UINT8_C(254), UINT8_C(105), UINT8_C(183), UINT8_C(219), UINT8_C( 72), UINT8_C(135), UINT8_C(151), UINT8_C(202) }, { -INT8_C( 35), -INT8_C( 108), -INT8_C( 84), INT8_C( 15), -INT8_C( 33), -INT8_C( 74), INT8_C( 94), INT8_C( 31) }, { INT8_C( 3), INT8_C( 79), INT8_C( 65), INT8_MIN, -INT8_C( 64), -INT8_C( 12), INT8_C( 108), INT8_C( 123) }, { -INT8_C( 35), INT8_C( 6), -INT8_C( 28), INT8_C( 11), -INT8_C( 56), -INT8_C( 10), INT8_C( 126), INT8_C( 59) } }, { { UINT8_C(149), UINT8_C(103), UINT8_C(129), UINT8_C(154), UINT8_C(140), UINT8_C(238), UINT8_C(164), UINT8_C(138) }, { INT8_C( 88), INT8_C( 92), INT8_C( 102), -INT8_C( 96), -INT8_C( 29), -INT8_C( 3), INT8_C( 106), -INT8_C( 64) }, { -INT8_C( 111), INT8_C( 22), -INT8_C( 49), INT8_C( 113), -INT8_C( 52), INT8_C( 46), -INT8_C( 112), -INT8_C( 48) }, { INT8_C( 16), INT8_C( 84), INT8_C( 78), -INT8_C( 31), -INT8_C( 64), -INT8_C( 20), INT8_C( 48), -INT8_C( 48) } }, { { UINT8_C(125), UINT8_C(210), UINT8_C( 80), UINT8_C( 61), UINT8_C(198), UINT8_C(188), UINT8_C(184), UINT8_C( 91) }, { INT8_C( 35), INT8_C( 57), -INT8_C( 10), -INT8_C( 81), INT8_C( 40), -INT8_C( 102), INT8_C( 58), INT8_MIN }, { -INT8_C( 10), -INT8_C( 96), INT8_C( 32), -INT8_C( 39), -INT8_C( 99), -INT8_C( 118), -INT8_C( 102), INT8_C( 47) }, { -INT8_C( 93), INT8_C( 48), INT8_C( 112), -INT8_C( 19), INT8_C( 25), -INT8_C( 102), INT8_C( 58), INT8_C( 36) } }, { { UINT8_C(161), UINT8_C(105), UINT8_C(160), UINT8_C(109), UINT8_C(151), UINT8_C( 48), UINT8_C( 61), UINT8_C( 21) }, { INT8_C( 2), -INT8_C( 115), INT8_C( 82), -INT8_C( 56), INT8_C( 73), INT8_C( 11), INT8_C( 36), INT8_C( 109) }, { INT8_C( 68), INT8_C( 26), INT8_C( 28), INT8_C( 108), -INT8_C( 76), INT8_C( 86), -INT8_C( 20), -INT8_C( 85) }, { INT8_C( 68), INT8_C( 27), INT8_C( 28), INT8_C( 72), INT8_C( 33), INT8_C( 70), -INT8_C( 28), -INT8_C( 81) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t c = simde_vld1_s8(test_vec[i].c); simde_int8x8_t r = simde_vbsl_s8(a, b, c); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; } static int test_simde_vbsl_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; int16_t b[4]; int16_t c[4]; int16_t r[4]; } test_vec[] = { { { UINT16_C(24105), UINT16_C(62072), UINT16_C(14929), UINT16_C(19853) }, { INT16_C( 18703), INT16_C( 2059), -INT16_C( 29657), -INT16_C( 14313) }, { INT16_C( 1857), -INT16_C( 14232), -INT16_C( 10103), -INT16_C( 1165) }, { INT16_C( 18761), INT16_C( 2056), -INT16_C( 14199), -INT16_C( 1417) } }, { { UINT16_C(64683), UINT16_C(25681), UINT16_C(27627), UINT16_C( 5351) }, { INT16_C( 24521), INT16_C( 6662), -INT16_C( 27751), -INT16_C( 22169) }, { INT16_C( 29404), INT16_C( 945), -INT16_C( 14082), INT16_C( 16332) }, { INT16_C( 24285), INT16_C( 928), -INT16_C( 31843), INT16_C( 11119) } }, { { UINT16_C(13519), UINT16_C(22791), UINT16_C(31245), UINT16_C(47188) }, { -INT16_C( 23178), INT16_C( 24860), INT16_C( 784), -INT16_C( 9610) }, { INT16_C( 31843), -INT16_C( 780), INT16_C( 23568), -INT16_C( 4955) }, { INT16_C( 27750), -INT16_C( 6668), INT16_C( 1552), -INT16_C( 8971) } }, { { UINT16_C(22222), UINT16_C(52720), UINT16_C(48158), UINT16_C(60940) }, { INT16_C( 5360), -INT16_C( 697), -INT16_C( 25714), INT16_C( 1462) }, { -INT16_C( 11712), INT16_C( 20582), -INT16_C( 9002), INT16_C( 14634) }, { -INT16_C( 27456), -INT16_C( 8890), -INT16_C( 10034), INT16_C( 5414) } }, { { UINT16_C( 8025), UINT16_C(26933), UINT16_C(56187), UINT16_C(18773) }, { INT16_C( 17713), INT16_C( 20502), INT16_C( 8961), -INT16_C( 3522) }, { -INT16_C( 31433), -INT16_C( 14865), -INT16_C( 23264), INT16_C( 24778) }, { -INT16_C( 31433), -INT16_C( 15138), INT16_C( 9985), INT16_C( 24734) } }, { { UINT16_C(12664), UINT16_C(20144), UINT16_C(56077), UINT16_C(26247) }, { -INT16_C( 17158), INT16_C( 30159), INT16_C( 9623), -INT16_C( 13890) }, { -INT16_C( 10902), INT16_C( 27673), INT16_C( 22520), INT16_C( 12126) }, { -INT16_C( 2950), INT16_C( 25737), INT16_C( 1525), INT16_C( 18910) } }, { { UINT16_C(19932), UINT16_C(64756), UINT16_C(49139), UINT16_C(27484) }, { INT16_C( 3312), -INT16_C( 583), INT16_C( 16615), -INT16_C( 7836) }, { INT16_C( 13308), -INT16_C( 27562), INT16_C( 5464), -INT16_C( 15523) }, { INT16_C( 16112), -INT16_C( 846), INT16_C( 235), -INT16_C( 7867) } }, { { UINT16_C(30442), UINT16_C(57903), UINT16_C(36301), UINT16_C(43281) }, { INT16_C( 1498), -INT16_C( 12891), INT16_C( 452), -INT16_C( 19400) }, { -INT16_C( 3827), -INT16_C( 2638), INT16_C( 5681), INT16_C( 11990) }, { -INT16_C( 31281), -INT16_C( 10827), INT16_C( 5108), -INT16_C( 22826) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t c = simde_vld1_s16(test_vec[i].c); simde_int16x4_t r = simde_vbsl_s16(a, b, c); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; } static int test_simde_vbsl_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; int32_t b[2]; int32_t c[2]; int32_t r[2]; } test_vec[] = { { { UINT32_C(1530270266), UINT32_C(1664922840) }, { INT32_C( 283414752), -INT32_C( 644297687) }, { -INT32_C( 890719696), -INT32_C( 1368609695) }, { -INT32_C( 1863536608), -INT32_C( 849834967) } }, { { UINT32_C(1917411085), UINT32_C(1617853222) }, { INT32_C( 1723638925), INT32_C( 969537625) }, { -INT32_C( 1320571512), -INT32_C( 1433738630) }, { -INT32_C( 485972851), -INT32_C( 1429669800) } }, { { UINT32_C(4118049428), UINT32_C( 732225822) }, { INT32_C( 1721625920), -INT32_C( 154727320) }, { INT32_C( 157057968), INT32_C( 54666619) }, { INT32_C( 1813831968), INT32_C( 583402601) } }, { { UINT32_C(1303678163), UINT32_C( 49757806) }, { -INT32_C( 822580048), -INT32_C( 1913021363) }, { -INT32_C( 218916983), INT32_C( 1407761059) }, { -INT32_C( 17621096), INT32_C( 1375313101) } }, { { UINT32_C(3093120061), UINT32_C(1018929001) }, { -INT32_C( 1719046357), INT32_C( 1570537901) }, { INT32_C( 975934701), -INT32_C( 1178131152) }, { -INT32_C( 1708403479), -INT32_C( 1646485191) } }, { { UINT32_C(1605089980), UINT32_C(2998047605) }, { INT32_C( 1097535447), -INT32_C( 629332305) }, { INT32_C( 1131677590), INT32_C( 1973489800) }, { INT32_C( 1098846102), -INT32_C( 684653907) } }, { { UINT32_C(3568290980), UINT32_C(2911729393) }, { -INT32_C( 1525925584), -INT32_C( 1537687604) }, { INT32_C( 2112209870), INT32_C( 2136498921) }, { -INT32_C( 1387525270), -INT32_C( 161990968) } }, { { UINT32_C(4056140905), UINT32_C(2154194140) }, { INT32_C( 575935792), -INT32_C( 1127226997) }, { -INT32_C( 413017062), INT32_C( 1770764699) }, { INT32_C( 643830834), -INT32_C( 372246133) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t c = simde_vld1_s32(test_vec[i].c); simde_int32x2_t r = simde_vbsl_s32(a, b, c); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; } static int test_simde_vbsl_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[1]; int64_t b[1]; int64_t c[1]; int64_t r[1]; } test_vec[] = { { { UINT64_C( 9620680368057188459) }, { -INT64_C( 3689875764262471268) }, { -INT64_C( 9004170336378258797) }, { -INT64_C( 8751963524792583528) } }, { { UINT64_C( 5744368692421788381) }, { INT64_C( 8786609585886222232) }, { INT64_C( 4656177399195942727) }, { INT64_C( 5311442486467149722) } }, { { UINT64_C( 5609318480735350411) }, { INT64_C( 3855942873821709648) }, { -INT64_C( 2036313555189412250) }, { -INT64_C( 6366524597640117148) } }, { { UINT64_C(11473855825243865935) }, { INT64_C( 2071316957582556929) }, { -INT64_C( 8420935098164046512) }, { INT64_C( 2034157141122684689) } }, { { UINT64_C(11574685700528718370) }, { -INT64_C( 6034023422833698929) }, { INT64_C( 5854639938047128918) }, { -INT64_C( 1072183263522621610) } }, { { UINT64_C( 9701142154830647537) }, { -INT64_C( 8496632253797601501) }, { -INT64_C( 2336401143595421380) }, { -INT64_C( 2660659904217079507) } }, { { UINT64_C( 9148854339857723556) }, { -INT64_C( 4918997769423407406) }, { INT64_C( 2461575523855237284) }, { INT64_C( 4232264231970418816) } }, { { UINT64_C( 7665017462878459639) }, { INT64_C( 8527291724535390796) }, { -INT64_C( 4283090230389707972) }, { -INT64_C( 1812869665887753396) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_int64x1_t c = simde_vld1_s64(test_vec[i].c); simde_int64x1_t r = simde_vbsl_s64(a, b, c); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; } static int test_simde_vbsl_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; uint8_t b[8]; uint8_t c[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C(180), UINT8_C( 0), UINT8_C(193), UINT8_C( 74), UINT8_C( 97), UINT8_C(212), UINT8_C( 25), UINT8_C(176) }, { UINT8_C(139), UINT8_C(110), UINT8_C( 75), UINT8_C( 82), UINT8_C( 56), UINT8_C(156), UINT8_C(198), UINT8_C( 81) }, { UINT8_C(226), UINT8_C(123), UINT8_C(246), UINT8_C( 96), UINT8_C(177), UINT8_C( 52), UINT8_C(179), UINT8_C( 94) }, { UINT8_C(194), UINT8_C(123), UINT8_C(119), UINT8_C( 98), UINT8_C(176), UINT8_C(180), UINT8_C(162), UINT8_C( 94) } }, { { UINT8_C(197), UINT8_C(122), UINT8_C(231), UINT8_C(180), UINT8_C(179), UINT8_C(166), UINT8_C( 87), UINT8_C(103) }, { UINT8_C(166), UINT8_C( 24), UINT8_C(178), UINT8_C( 8), UINT8_C(236), UINT8_C(203), UINT8_C(184), UINT8_C(119) }, { UINT8_C( 57), UINT8_C( 3), UINT8_C(201), UINT8_C(114), UINT8_C(159), UINT8_C(143), UINT8_C(195), UINT8_C(130) }, { UINT8_C(188), UINT8_C( 25), UINT8_C(170), UINT8_C( 66), UINT8_C(172), UINT8_C(139), UINT8_C(144), UINT8_C(231) } }, { { UINT8_C( 11), UINT8_C(186), UINT8_C(226), UINT8_C(188), UINT8_C(238), UINT8_C(149), UINT8_C( 27), UINT8_C(180) }, { UINT8_C( 15), UINT8_C( 2), UINT8_C(104), UINT8_C(195), UINT8_C(168), UINT8_C(191), UINT8_C( 42), UINT8_C( 79) }, { UINT8_C(216), UINT8_C(220), UINT8_C( 87), UINT8_C(196), UINT8_C(168), UINT8_C( 15), UINT8_C( 60), UINT8_C(225) }, { UINT8_C(219), UINT8_C( 70), UINT8_C(117), UINT8_C(192), UINT8_C(168), UINT8_C(159), UINT8_C( 46), UINT8_C( 69) } }, { { UINT8_C( 18), UINT8_C( 5), UINT8_C( 83), UINT8_C(178), UINT8_C(149), UINT8_C( 23), UINT8_C( 52), UINT8_C(160) }, { UINT8_C(209), UINT8_C( 22), UINT8_C( 92), UINT8_C(191), UINT8_C(172), UINT8_C(119), UINT8_C(115), UINT8_C(187) }, { UINT8_C(122), UINT8_C(219), UINT8_C(126), UINT8_C( 34), UINT8_C(155), UINT8_C(169), UINT8_C(113), UINT8_C(115) }, { UINT8_C(120), UINT8_C(222), UINT8_C(124), UINT8_C(178), UINT8_C(142), UINT8_C(191), UINT8_C(113), UINT8_C(243) } }, { { UINT8_C(133), UINT8_C(200), UINT8_C( 55), UINT8_C( 45), UINT8_C(215), UINT8_C(115), UINT8_C( 15), UINT8_C(234) }, { UINT8_C(121), UINT8_C( 98), UINT8_C(156), UINT8_C( 14), UINT8_C(121), UINT8_C(208), UINT8_C(174), UINT8_C( 74) }, { UINT8_C(230), UINT8_C( 10), UINT8_C( 10), UINT8_C(146), UINT8_C(130), UINT8_C(125), UINT8_C( 78), UINT8_C(252) }, { UINT8_C( 99), UINT8_C( 66), UINT8_C( 28), UINT8_C(158), UINT8_C( 81), UINT8_C( 92), UINT8_C( 78), UINT8_C( 94) } }, { { UINT8_C( 89), UINT8_C(204), UINT8_C( 30), UINT8_C(244), UINT8_C(117), UINT8_C(144), UINT8_C(103), UINT8_C(251) }, { UINT8_C( 88), UINT8_C(158), UINT8_C( 40), UINT8_C( 48), UINT8_C( 18), UINT8_C( 55), UINT8_C( 26), UINT8_C(139) }, { UINT8_C(154), UINT8_C(182), UINT8_C(153), UINT8_C( 19), UINT8_C(134), UINT8_C( 71), UINT8_C( 94), UINT8_C(108) }, { UINT8_C(218), UINT8_C(190), UINT8_C(137), UINT8_C( 51), UINT8_C(146), UINT8_C( 87), UINT8_C( 26), UINT8_C(143) } }, { { UINT8_C( 81), UINT8_C(104), UINT8_MAX, UINT8_C(211), UINT8_C(229), UINT8_C( 77), UINT8_C(207), UINT8_C( 62) }, { UINT8_C( 25), UINT8_C(238), UINT8_C( 50), UINT8_C(143), UINT8_C(126), UINT8_C(153), UINT8_C(138), UINT8_C(214) }, { UINT8_C( 56), UINT8_C(178), UINT8_C( 6), UINT8_C( 74), UINT8_C(234), UINT8_C( 32), UINT8_C(213), UINT8_C(132) }, { UINT8_C( 57), UINT8_C(250), UINT8_C( 50), UINT8_C(139), UINT8_C(110), UINT8_C( 41), UINT8_C(154), UINT8_C(150) } }, { { UINT8_C(214), UINT8_C(110), UINT8_C(151), UINT8_C( 92), UINT8_C(181), UINT8_C(245), UINT8_C(201), UINT8_C( 6) }, { UINT8_C( 93), UINT8_C(200), UINT8_C(218), UINT8_C( 67), UINT8_C( 21), UINT8_C(169), UINT8_C(129), UINT8_C( 46) }, { UINT8_C(151), UINT8_C(180), UINT8_C(189), UINT8_C( 21), UINT8_C( 77), UINT8_C( 71), UINT8_C(236), UINT8_C(133) }, { UINT8_C( 85), UINT8_C(216), UINT8_C(186), UINT8_C( 65), UINT8_C( 93), UINT8_C(163), UINT8_C(165), UINT8_C(135) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t c = simde_vld1_u8(test_vec[i].c); simde_uint8x8_t r = simde_vbsl_u8(a, b, c); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; } static int test_simde_vbsl_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint16_t b[4]; uint16_t c[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(53761), UINT16_C(11743), UINT16_C(59878), UINT16_C(59435) }, { UINT16_C(57726), UINT16_C(48484), UINT16_C(45929), UINT16_C(50279) }, { UINT16_C(17564), UINT16_C(43054), UINT16_C( 9881), UINT16_C(30175) }, { UINT16_C(50332), UINT16_C(44388), UINT16_C(42873), UINT16_C(54775) } }, { { UINT16_C(13387), UINT16_C(12838), UINT16_C(50137), UINT16_C(56103) }, { UINT16_C( 1941), UINT16_C(31752), UINT16_C(13296), UINT16_C(28516) }, { UINT16_C(51220), UINT16_C(32300), UINT16_C(38011), UINT16_C( 5954) }, { UINT16_C(52245), UINT16_C(31752), UINT16_C( 6130), UINT16_C(20324) } }, { { UINT16_C(29144), UINT16_C(29375), UINT16_C(40599), UINT16_C(58343) }, { UINT16_C( 3539), UINT16_C(44053), UINT16_C(15568), UINT16_C(25991) }, { UINT16_C(36675), UINT16_C(13537), UINT16_C(17859), UINT16_C(55203) }, { UINT16_C(36819), UINT16_C( 9301), UINT16_C(24016), UINT16_C(30087) } }, { { UINT16_C(53005), UINT16_C(34901), UINT16_C(39011), UINT16_C(15520) }, { UINT16_C(24329), UINT16_C(41134), UINT16_C(38398), UINT16_C(53635) }, { UINT16_C(39074), UINT16_C(29309), UINT16_C( 1493), UINT16_C( 6359) }, { UINT16_C(24491), UINT16_C(61996), UINT16_C(38390), UINT16_C( 4311) } }, { { UINT16_C(47508), UINT16_C(22348), UINT16_C(61438), UINT16_C( 3119) }, { UINT16_C(33983), UINT16_C( 8852), UINT16_C(13340), UINT16_C( 9566) }, { UINT16_C( 3220), UINT16_C(37574), UINT16_C(18849), UINT16_C(17251) }, { UINT16_C(33940), UINT16_C(33414), UINT16_C( 9245), UINT16_C(18254) } }, { { UINT16_C(57570), UINT16_C(47029), UINT16_C(36325), UINT16_C(31439) }, { UINT16_C( 7238), UINT16_C(17617), UINT16_C( 11), UINT16_C(51792) }, { UINT16_C(58757), UINT16_C(41453), UINT16_C(19225), UINT16_C(44487) }, { UINT16_C( 1351), UINT16_C( 1241), UINT16_C(16921), UINT16_C(53056) } }, { { UINT16_C(36184), UINT16_C(63807), UINT16_C(41686), UINT16_C(47165) }, { UINT16_C(62083), UINT16_C(26735), UINT16_C(16255), UINT16_C(50658) }, { UINT16_C(46171), UINT16_C(26122), UINT16_C(23220), UINT16_C(14641) }, { UINT16_C(45059), UINT16_C(28207), UINT16_C(31350), UINT16_C(33056) } }, { { UINT16_C( 7743), UINT16_C(23003), UINT16_C(41577), UINT16_C(49414) }, { UINT16_C(17967), UINT16_C( 1467), UINT16_C(63720), UINT16_C(27582) }, { UINT16_C(11754), UINT16_C(27348), UINT16_C(46700), UINT16_C(50991) }, { UINT16_C(10223), UINT16_C( 9119), UINT16_C(46188), UINT16_C(18223) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t c = simde_vld1_u16(test_vec[i].c); simde_uint16x4_t r = simde_vbsl_u16(a, b, c); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; } static int test_simde_vbsl_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint32_t b[2]; uint32_t c[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C( 432062503), UINT32_C(1205184263) }, { UINT32_C( 818040885), UINT32_C(3381211908) }, { UINT32_C( 956542168), UINT32_C(2355800916) }, { UINT32_C( 818113789), UINT32_C(3383438164) } }, { { UINT32_C(4190297053), UINT32_C(3328249247) }, { UINT32_C(3588170190), UINT32_C(4246582472) }, { UINT32_C( 154001156), UINT32_C(3956455187) }, { UINT32_C(3522102732), UINT32_C(3985880712) } }, { { UINT32_C(3072644451), UINT32_C(1564708480) }, { UINT32_C( 139921001), UINT32_C(3788421139) }, { UINT32_C(2729946585), UINT32_C(1704973409) }, { UINT32_C( 9907449), UINT32_C(1641994337) } }, { { UINT32_C(3329150387), UINT32_C(3887153284) }, { UINT32_C(2510214421), UINT32_C(3438535011) }, { UINT32_C(4224994023), UINT32_C(3688669698) }, { UINT32_C(3181299541), UINT32_C(3707625986) } }, { { UINT32_C(2961019727), UINT32_C( 454434151) }, { UINT32_C(1860273386), UINT32_C(3663041477) }, { UINT32_C(3429954408), UINT32_C(3164103636) }, { UINT32_C(1818353770), UINT32_C(3197924309) } }, { { UINT32_C(2948033965), UINT32_C(1602917135) }, { UINT32_C(2383349799), UINT32_C( 262743333) }, { UINT32_C(1870498730), UINT32_C(2252984862) }, { UINT32_C(3461319207), UINT32_C(2412331285) } }, { { UINT32_C(2572335557), UINT32_C(3377851164) }, { UINT32_C(1735920728), UINT32_C(3351643040) }, { UINT32_C( 810931723), UINT32_C(2772434939) }, { UINT32_C( 559238730), UINT32_C(3849197539) } }, { { UINT32_C(2819931274), UINT32_C(1395547790) }, { UINT32_C( 887914775), UINT32_C(3304931948) }, { UINT32_C(3995825742), UINT32_C(2226516601) }, { UINT32_C(1714405958), UINT32_C(3300778621) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t c = simde_vld1_u32(test_vec[i].c); simde_uint32x2_t r = simde_vbsl_u32(a, b, c); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; } static int test_simde_vbsl_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[1]; uint64_t b[1]; uint64_t c[1]; uint64_t r[1]; } test_vec[] = { { { UINT64_C(14528356940023175243) }, { UINT64_C(17194180434942818272) }, { UINT64_C( 7586136997276299551) }, { UINT64_C(16779928735372194132) } }, { { UINT64_C(11436712391308833254) }, { UINT64_C(15593945675035042037) }, { UINT64_C( 4879995600925107401) }, { UINT64_C(15647882750434305261) } }, { { UINT64_C( 1746683290664817516) }, { UINT64_C(15409676213281850088) }, { UINT64_C(12167280193671793930) }, { UINT64_C(12743685970628677226) } }, { { UINT64_C( 2118711289033487872) }, { UINT64_C(17747046816302536447) }, { UINT64_C(17394041849491144112) }, { UINT64_C(17600678980047904688) } }, { { UINT64_C(13016193149737162778) }, { UINT64_C( 4617103595531670853) }, { UINT64_C(16274244551423624150) }, { UINT64_C( 4709427112947291076) } }, { { UINT64_C( 3625913475667069816) }, { UINT64_C( 6347486207960368557) }, { UINT64_C( 316203249634832354) }, { UINT64_C( 1455477651332326826) } }, { { UINT64_C( 682226695321212261) }, { UINT64_C( 7644783113438554061) }, { UINT64_C(17468022663372620542) }, { UINT64_C(18023304074852354015) } }, { { UINT64_C( 9152573495301290449) }, { UINT64_C( 8928540943956962382) }, { UINT64_C( 619938784361861236) }, { UINT64_C( 8906446650004007012) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_t b = simde_vld1_u64(test_vec[i].b); simde_uint64x1_t c = simde_vld1_u64(test_vec[i].c); simde_uint64x1_t r = simde_vbsl_u64(a, b, c); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; } static int test_simde_vbslq_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; simde_float32 b[4]; simde_float32 c[4]; simde_float32 r[4]; } test_vec[] = { { { UINT32_C(2050512422), UINT32_C(1891785275), UINT32_C(3601774981), UINT32_C( 108127079) }, { SIMDE_FLOAT32_C( 726.27), SIMDE_FLOAT32_C( -285.01), SIMDE_FLOAT32_C( 100.31), SIMDE_FLOAT32_C( -361.34) }, { SIMDE_FLOAT32_C( 922.89), SIMDE_FLOAT32_C( 266.21), SIMDE_FLOAT32_C( 203.26), SIMDE_FLOAT32_C( 423.67) }, { SIMDE_FLOAT32_C( 986.89), SIMDE_FLOAT32_C( 270.01), SIMDE_FLOAT32_C( 403.71), SIMDE_FLOAT32_C( 357.43) } }, { { UINT32_C(3614509257), UINT32_C( 280160490), UINT32_C(2039147070), UINT32_C(3404352837) }, { SIMDE_FLOAT32_C( -48.71), SIMDE_FLOAT32_C( -96.93), SIMDE_FLOAT32_C( 765.30), SIMDE_FLOAT32_C( -365.42) }, { SIMDE_FLOAT32_C( 51.72), SIMDE_FLOAT32_C( 310.81), SIMDE_FLOAT32_C( 707.75), SIMDE_FLOAT32_C( -222.02) }, { SIMDE_FLOAT32_C( -51.72), SIMDE_FLOAT32_C( 275.62), SIMDE_FLOAT32_C( 745.28), SIMDE_FLOAT32_C( -364.04) } }, { { UINT32_C(1311624492), UINT32_C(2429832391), UINT32_C(1046955348), UINT32_C(1800345901) }, { SIMDE_FLOAT32_C( 173.43), SIMDE_FLOAT32_C( 396.70), SIMDE_FLOAT32_C( -158.88), SIMDE_FLOAT32_C( 270.13) }, { SIMDE_FLOAT32_C( 511.35), SIMDE_FLOAT32_C( -236.38), SIMDE_FLOAT32_C( -423.74), SIMDE_FLOAT32_C( -537.35) }, { SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( 472.82), SIMDE_FLOAT32_C( -301.72), SIMDE_FLOAT32_C(-34646.40) } }, { { UINT32_C(3818566759), UINT32_C(4063432390), UINT32_C(2722128091), UINT32_C( 3347628) }, { SIMDE_FLOAT32_C( 968.99), SIMDE_FLOAT32_C( 614.68), SIMDE_FLOAT32_C( -913.19), SIMDE_FLOAT32_C( 634.92) }, { SIMDE_FLOAT32_C( -20.17), SIMDE_FLOAT32_C( -778.48), SIMDE_FLOAT32_C( 528.43), SIMDE_FLOAT32_C( -846.74) }, { SIMDE_FLOAT32_C( 2.80), SIMDE_FLOAT32_C( 838.42), SIMDE_FLOAT32_C( -785.42), SIMDE_FLOAT32_C( -842.94) } }, { { UINT32_C(2382485607), UINT32_C( 247141543), UINT32_C(2415023561), UINT32_C(1065493860) }, { SIMDE_FLOAT32_C( 927.69), SIMDE_FLOAT32_C( 298.66), SIMDE_FLOAT32_C( -205.27), SIMDE_FLOAT32_C( 460.63) }, { SIMDE_FLOAT32_C( -702.09), SIMDE_FLOAT32_C( -280.47), SIMDE_FLOAT32_C( 122.07), SIMDE_FLOAT32_C( -733.10) }, { SIMDE_FLOAT32_C( 703.21), SIMDE_FLOAT32_C( -298.47), SIMDE_FLOAT32_C( -196.40), SIMDE_FLOAT32_C( -366.52) } }, { { UINT32_C(1673032367), UINT32_C(1729866496), UINT32_C( 720706691), UINT32_C(4198084657) }, { SIMDE_FLOAT32_C( -540.17), SIMDE_FLOAT32_C( 251.09), SIMDE_FLOAT32_C( 225.13), SIMDE_FLOAT32_C( 954.24) }, { SIMDE_FLOAT32_C( -734.26), SIMDE_FLOAT32_C( -103.09), SIMDE_FLOAT32_C( 61.68), SIMDE_FLOAT32_C( -806.57) }, { SIMDE_FLOAT32_C( -542.04), SIMDE_FLOAT32_C( -446.06), SIMDE_FLOAT32_C( 56.91), SIMDE_FLOAT32_C( 930.07) } }, { { UINT32_C(2768866254), UINT32_C(1356217504), UINT32_C(2931003054), UINT32_C(3172323130) }, { SIMDE_FLOAT32_C( -417.88), SIMDE_FLOAT32_C( -357.81), SIMDE_FLOAT32_C( -174.77), SIMDE_FLOAT32_C( 472.44) }, { SIMDE_FLOAT32_C( -587.42), SIMDE_FLOAT32_C( -441.94), SIMDE_FLOAT32_C( -296.02), SIMDE_FLOAT32_C( -127.59) }, { SIMDE_FLOAT32_C( -9.18), SIMDE_FLOAT32_C( -309.79), SIMDE_FLOAT32_C( -166.52), SIMDE_FLOAT32_C( 476.31) } }, { { UINT32_C(2620104780), UINT32_C(2037634475), UINT32_C( 18775136), UINT32_C(1716647096) }, { SIMDE_FLOAT32_C( -62.34), SIMDE_FLOAT32_C( 454.95), SIMDE_FLOAT32_C( -901.11), SIMDE_FLOAT32_C( 30.12) }, { SIMDE_FLOAT32_C( 146.40), SIMDE_FLOAT32_C( -437.09), SIMDE_FLOAT32_C( -77.62), SIMDE_FLOAT32_C( 728.52) }, { SIMDE_FLOAT32_C( -185.27), SIMDE_FLOAT32_C( -470.72), SIMDE_FLOAT32_C( -64.63), SIMDE_FLOAT32_C( 3.86) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t c = simde_vld1q_f32(test_vec[i].c); simde_float32x4_t r = simde_vbslq_f32(a, b, c); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vbslq_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; simde_float64 b[2]; simde_float64 c[2]; simde_float64 r[2]; } test_vec[] = { { { UINT64_C(13764238683935597897), UINT64_C(16898862026458326254) }, { SIMDE_FLOAT64_C( -774.89), SIMDE_FLOAT64_C( -747.47) }, { SIMDE_FLOAT64_C( -332.23), SIMDE_FLOAT64_C( 521.67) }, { SIMDE_FLOAT64_C( -266.47), SIMDE_FLOAT64_C( -651.66) } }, { { UINT64_C(13223275955338830799), UINT64_C( 7046989072648393554) }, { SIMDE_FLOAT64_C( 994.41), SIMDE_FLOAT64_C( 822.45) }, { SIMDE_FLOAT64_C( -294.88), SIMDE_FLOAT64_C( -105.01) }, { SIMDE_FLOAT64_C( 75509.48), SIMDE_FLOAT64_C( -1640.90) } }, { { UINT64_C(11175800480316490419), UINT64_C( 9412372685746910188) }, { SIMDE_FLOAT64_C( 757.22), SIMDE_FLOAT64_C( -233.80) }, { SIMDE_FLOAT64_C( -54.26), SIMDE_FLOAT64_C( 966.12) }, { SIMDE_FLOAT64_C( 38.32), SIMDE_FLOAT64_C( -3.65) } }, { { UINT64_C(13983593746892684661), UINT64_C( 316639219210923907) }, { SIMDE_FLOAT64_C( -380.48), SIMDE_FLOAT64_C( 397.51) }, { SIMDE_FLOAT64_C( -87.86), SIMDE_FLOAT64_C( -48.58) }, { SIMDE_FLOAT64_C( -95.11), SIMDE_FLOAT64_C( -198.32) } }, { { UINT64_C(11225398681450588207), UINT64_C(16926446589552624066) }, { SIMDE_FLOAT64_C( 635.25), SIMDE_FLOAT64_C( 381.12) }, { SIMDE_FLOAT64_C( -832.41), SIMDE_FLOAT64_C( -124.32) }, { SIMDE_FLOAT64_C( 592.41), SIMDE_FLOAT64_C( 509.34) } }, { { UINT64_C(18076222544372871973), UINT64_C( 4870150017706326817) }, { SIMDE_FLOAT64_C( -197.71), SIMDE_FLOAT64_C( -523.67) }, { SIMDE_FLOAT64_C( 628.41), SIMDE_FLOAT64_C( -93.61) }, { SIMDE_FLOAT64_C( -49.31), SIMDE_FLOAT64_C( -8890.06) } }, { { UINT64_C( 2218233479757940575), UINT64_C(14533229422664185268) }, { SIMDE_FLOAT64_C( 537.41), SIMDE_FLOAT64_C( -64.67) }, { SIMDE_FLOAT64_C( -883.30), SIMDE_FLOAT64_C( -802.50) }, { SIMDE_FLOAT64_C( -625.43), SIMDE_FLOAT64_C( -6.30) } }, { { UINT64_C( 2444631349764358662), UINT64_C( 3147906818953694066) }, { SIMDE_FLOAT64_C( 483.18), SIMDE_FLOAT64_C( 409.15) }, { SIMDE_FLOAT64_C( -524.40), SIMDE_FLOAT64_C( 755.41) }, { SIMDE_FLOAT64_C( -227.58), SIMDE_FLOAT64_C( 12.79) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_float64x2_t c = simde_vld1q_f64(test_vec[i].c); simde_float64x2_t r = simde_vbslq_f64(a, b, c); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; } static int test_simde_vbslq_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; int8_t b[16]; int8_t c[16]; int8_t r[16]; } test_vec[] = { { { UINT8_C(136), UINT8_C(230), UINT8_C( 97), UINT8_C( 0), UINT8_C( 20), UINT8_C( 50), UINT8_C(138), UINT8_C(231), UINT8_MAX, UINT8_C( 18), UINT8_C(190), UINT8_C(185), UINT8_C(181), UINT8_C( 25), UINT8_C( 53), UINT8_C( 6) }, { INT8_C( 60), -INT8_C( 30), INT8_C( 93), -INT8_C( 89), -INT8_C( 3), INT8_C( 121), INT8_C( 83), -INT8_C( 123), INT8_C( 102), -INT8_C( 115), -INT8_C( 97), -INT8_C( 39), -INT8_C( 104), INT8_C( 82), -INT8_C( 24), INT8_C( 32) }, { INT8_C( 56), INT8_C( 73), INT8_C( 32), INT8_C( 76), INT8_C( 123), -INT8_C( 85), INT8_C( 51), INT8_C( 122), -INT8_C( 67), -INT8_C( 15), INT8_C( 51), INT8_C( 114), INT8_C( 11), INT8_C( 104), INT8_C( 120), INT8_C( 71) }, { INT8_C( 56), -INT8_C( 21), INT8_C( 65), INT8_C( 76), INT8_MAX, -INT8_C( 71), INT8_C( 51), -INT8_C( 99), INT8_C( 102), -INT8_C( 31), -INT8_C( 97), -INT8_C( 37), -INT8_C( 102), INT8_C( 112), INT8_C( 104), INT8_C( 65) } }, { { UINT8_C( 74), UINT8_C(214), UINT8_C(238), UINT8_C( 71), UINT8_C( 79), UINT8_C( 65), UINT8_C(204), UINT8_C(181), UINT8_C(207), UINT8_C(107), UINT8_C(142), UINT8_C(103), UINT8_C(189), UINT8_C(118), UINT8_C(136), UINT8_C(245) }, { -INT8_C( 65), -INT8_C( 88), INT8_C( 65), INT8_C( 58), INT8_C( 83), INT8_C( 117), -INT8_C( 76), INT8_C( 17), INT8_C( 102), -INT8_C( 25), -INT8_C( 125), INT8_C( 113), INT8_C( 79), -INT8_C( 4), -INT8_C( 72), -INT8_C( 103) }, { -INT8_C( 46), -INT8_C( 90), -INT8_C( 32), INT8_C( 33), -INT8_C( 24), -INT8_C( 84), -INT8_C( 42), -INT8_C( 73), INT8_C( 23), INT8_C( 100), INT8_C( 30), -INT8_C( 44), -INT8_C( 37), -INT8_C( 90), -INT8_C( 55), -INT8_C( 102) }, { -INT8_C( 102), -INT8_C( 96), INT8_C( 64), INT8_C( 34), -INT8_C( 29), -INT8_C( 19), -INT8_C( 106), INT8_C( 19), INT8_C( 86), INT8_C( 103), -INT8_C( 110), -INT8_C( 15), INT8_C( 79), -INT8_C( 12), -INT8_C( 55), -INT8_C( 101) } }, { { UINT8_C( 79), UINT8_C( 11), UINT8_C(213), UINT8_C(162), UINT8_C(128), UINT8_C(137), UINT8_C(179), UINT8_C(230), UINT8_C(113), UINT8_C( 55), UINT8_C( 88), UINT8_C(192), UINT8_C( 51), UINT8_C( 16), UINT8_C( 90), UINT8_C( 5) }, { -INT8_C( 73), INT8_C( 58), INT8_C( 38), -INT8_C( 97), -INT8_C( 25), -INT8_C( 4), INT8_C( 86), -INT8_C( 2), INT8_C( 96), INT8_C( 116), -INT8_C( 45), INT8_C( 59), INT8_C( 27), -INT8_C( 100), -INT8_C( 42), INT8_C( 106) }, { -INT8_C( 89), -INT8_C( 85), INT8_C( 12), INT8_C( 39), INT8_C( 52), -INT8_C( 64), INT8_C( 14), -INT8_C( 91), -INT8_C( 9), INT8_C( 102), INT8_C( 102), INT8_C( 42), INT8_C( 118), -INT8_C( 64), INT8_C( 47), INT8_C( 45) }, { -INT8_C( 89), -INT8_C( 86), INT8_C( 12), -INT8_C( 121), -INT8_C( 76), -INT8_C( 56), INT8_C( 30), -INT8_C( 25), -INT8_C( 26), INT8_C( 116), INT8_C( 118), INT8_C( 42), INT8_C( 87), -INT8_C( 48), INT8_C( 119), INT8_C( 40) } }, { { UINT8_C(250), UINT8_C( 85), UINT8_C(204), UINT8_C(225), UINT8_C( 81), UINT8_C( 34), UINT8_C(224), UINT8_C(177), UINT8_C(151), UINT8_C(179), UINT8_C(237), UINT8_C(178), UINT8_C( 79), UINT8_C(195), UINT8_C( 28), UINT8_C(247) }, { INT8_C( 110), INT8_C( 40), INT8_C( 30), -INT8_C( 94), -INT8_C( 24), INT8_C( 44), INT8_C( 72), -INT8_C( 33), -INT8_C( 110), -INT8_C( 82), INT8_C( 9), INT8_C( 9), INT8_C( 110), INT8_C( 56), INT8_C( 54), INT8_C( 104) }, { -INT8_C( 115), INT8_C( 3), INT8_C( 74), -INT8_C( 34), INT8_C( 37), INT8_C( 42), -INT8_C( 112), -INT8_C( 68), -INT8_C( 35), INT8_C( 125), INT8_C( 110), INT8_C( 44), INT8_C( 64), -INT8_C( 118), INT8_C( 35), -INT8_C( 82) }, { INT8_C( 111), INT8_C( 2), INT8_C( 14), -INT8_C( 66), INT8_C( 100), INT8_C( 40), INT8_C( 80), -INT8_C( 99), -INT8_C( 38), -INT8_C( 18), INT8_C( 11), INT8_C( 12), INT8_C( 78), INT8_C( 8), INT8_C( 55), INT8_C( 104) } }, { { UINT8_C(179), UINT8_C( 66), UINT8_C( 80), UINT8_C(155), UINT8_C(110), UINT8_C(152), UINT8_C(123), UINT8_C( 1), UINT8_C( 70), UINT8_C(132), UINT8_C( 10), UINT8_C(180), UINT8_C(189), UINT8_C( 64), UINT8_C( 29), UINT8_C( 74) }, { INT8_C( 67), INT8_C( 103), INT8_C( 41), INT8_C( 105), -INT8_C( 111), -INT8_C( 71), INT8_C( 37), INT8_C( 110), INT8_C( 54), -INT8_C( 108), -INT8_C( 102), INT8_C( 118), INT8_C( 30), -INT8_C( 66), INT8_C( 36), -INT8_C( 47) }, { INT8_C( 0), INT8_C( 116), INT8_C( 109), INT8_C( 110), INT8_C( 13), -INT8_C( 24), INT8_C( 111), INT8_C( 83), INT8_C( 108), INT8_C( 121), INT8_C( 8), INT8_C( 41), -INT8_C( 70), INT8_C( 37), INT8_C( 116), -INT8_C( 3) }, { INT8_C( 3), INT8_C( 118), INT8_C( 45), INT8_C( 109), INT8_C( 1), -INT8_C( 8), INT8_C( 37), INT8_C( 82), INT8_C( 46), -INT8_C( 3), INT8_C( 10), INT8_C( 61), INT8_C( 30), INT8_C( 37), INT8_C( 100), -INT8_C( 11) } }, { { UINT8_C(140), UINT8_C(157), UINT8_C(102), UINT8_C( 29), UINT8_C( 86), UINT8_C(140), UINT8_C(139), UINT8_C(140), UINT8_C( 32), UINT8_C( 37), UINT8_C( 2), UINT8_C( 62), UINT8_C(227), UINT8_C( 38), UINT8_C( 16), UINT8_C(227) }, { -INT8_C( 102), INT8_C( 125), INT8_C( 82), -INT8_C( 89), INT8_C( 101), -INT8_C( 63), -INT8_C( 5), -INT8_C( 47), INT8_C( 59), INT8_C( 3), -INT8_C( 5), -INT8_C( 11), INT8_C( 40), INT8_C( 111), -INT8_C( 14), -INT8_C( 76) }, { INT8_C( 12), INT8_C( 89), -INT8_C( 47), INT8_C( 98), -INT8_C( 27), INT8_C( 92), -INT8_C( 18), INT8_C( 5), -INT8_C( 127), -INT8_C( 16), INT8_C( 67), INT8_C( 101), INT8_C( 22), INT8_C( 83), INT8_C( 72), -INT8_C( 80) }, { -INT8_C( 120), INT8_C( 93), -INT8_C( 45), INT8_C( 103), -INT8_C( 27), -INT8_C( 48), -INT8_C( 17), -INT8_C( 127), -INT8_C( 95), -INT8_C( 47), INT8_C( 67), INT8_C( 117), INT8_C( 52), INT8_C( 119), INT8_C( 88), -INT8_C( 80) } }, { { UINT8_C(208), UINT8_C(154), UINT8_C( 88), UINT8_C( 53), UINT8_C( 92), UINT8_C( 83), UINT8_C( 7), UINT8_C(151), UINT8_C( 86), UINT8_C( 2), UINT8_C(140), UINT8_C(126), UINT8_C(113), UINT8_C(126), UINT8_C( 50), UINT8_C(125) }, { -INT8_C( 41), INT8_C( 3), -INT8_C( 33), -INT8_C( 68), INT8_C( 95), -INT8_C( 51), -INT8_C( 63), -INT8_C( 32), -INT8_C( 67), INT8_C( 5), INT8_C( 69), -INT8_C( 45), INT8_C( 88), -INT8_C( 114), -INT8_C( 125), INT8_C( 41) }, { INT8_C( 40), -INT8_C( 37), INT8_C( 94), -INT8_C( 124), INT8_C( 46), INT8_C( 101), INT8_C( 27), -INT8_C( 124), INT8_C( 103), -INT8_C( 89), INT8_C( 2), -INT8_C( 40), INT8_C( 38), INT8_C( 52), INT8_C( 85), -INT8_C( 3) }, { -INT8_C( 8), INT8_C( 67), INT8_C( 94), -INT8_C( 76), INT8_C( 126), INT8_C( 101), INT8_C( 25), INT8_MIN, INT8_C( 53), -INT8_C( 91), INT8_C( 6), -INT8_C( 46), INT8_C( 86), INT8_C( 14), INT8_C( 71), -INT8_C( 87) } }, { { UINT8_C( 55), UINT8_C( 52), UINT8_C(186), UINT8_C(150), UINT8_C( 1), UINT8_C(123), UINT8_C(119), UINT8_C(190), UINT8_C(128), UINT8_C(188), UINT8_C(145), UINT8_C(217), UINT8_C( 74), UINT8_C( 21), UINT8_C( 2), UINT8_C(115) }, { -INT8_C( 16), INT8_C( 96), -INT8_C( 9), INT8_C( 31), -INT8_C( 58), INT8_C( 19), -INT8_C( 93), INT8_C( 45), -INT8_C( 70), -INT8_C( 90), INT8_C( 6), -INT8_C( 32), -INT8_C( 38), INT8_C( 91), -INT8_C( 34), INT8_C( 18) }, { -INT8_C( 112), -INT8_C( 104), -INT8_C( 88), -INT8_C( 111), INT8_C( 19), INT8_C( 31), INT8_C( 80), -INT8_C( 108), -INT8_C( 36), -INT8_C( 31), INT8_C( 109), INT8_C( 38), -INT8_C( 10), INT8_C( 111), -INT8_C( 103), -INT8_C( 25) }, { -INT8_C( 80), -INT8_C( 88), -INT8_C( 78), INT8_C( 23), INT8_C( 18), INT8_C( 23), INT8_C( 35), INT8_C( 44), -INT8_C( 36), -INT8_C( 27), INT8_C( 108), -INT8_C( 26), -INT8_C( 2), INT8_C( 123), -INT8_C( 101), -INT8_C( 106) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t c = simde_vld1q_s8(test_vec[i].c); simde_int8x16_t r = simde_vbslq_s8(a, b, c); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; } static int test_simde_vbslq_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; int16_t b[8]; int16_t c[8]; int16_t r[8]; } test_vec[] = { { { UINT16_C(29959), UINT16_C( 5999), UINT16_C(39015), UINT16_C(20558), UINT16_C(58240), UINT16_C(60934), UINT16_C(14982), UINT16_C(23147) }, { INT16_C( 25631), INT16_C( 21395), -INT16_C( 29489), -INT16_C( 5080), INT16_C( 13097), -INT16_C( 13303), -INT16_C( 17379), INT16_C( 9417) }, { INT16_C( 14385), -INT16_C( 26564), -INT16_C( 30000), INT16_C( 20712), -INT16_C( 4498), -INT16_C( 3010), -INT16_C( 21976), INT16_C( 18254) }, { INT16_C( 27703), -INT16_C( 25837), -INT16_C( 29993), INT16_C( 16552), INT16_C( 12142), -INT16_C( 9160), -INT16_C( 18388), INT16_C( 1357) } }, { { UINT16_C(57870), UINT16_C(56730), UINT16_C(49774), UINT16_C(38857), UINT16_C(54005), UINT16_C( 4707), UINT16_C(11407), UINT16_C(49207) }, { INT16_C( 29540), INT16_C( 13657), INT16_C( 16893), INT16_C( 27525), -INT16_C( 15312), INT16_C( 22623), -INT16_C( 20882), INT16_C( 31904) }, { INT16_C( 14992), -INT16_C( 422), INT16_C( 9213), -INT16_C( 3434), -INT16_C( 1546), -INT16_C( 31483), INT16_C( 15398), -INT16_C( 30139) }, { INT16_C( 31380), INT16_C( 14168), INT16_C( 25085), INT16_C( 25495), -INT16_C( 5838), -INT16_C( 27321), INT16_C( 15406), INT16_C( 19040) } }, { { UINT16_C(40623), UINT16_C(44223), UINT16_C(17888), UINT16_C( 4120), UINT16_C(30473), UINT16_C(30568), UINT16_C( 2085), UINT16_C(46579) }, { INT16_C( 19779), INT16_C( 16564), INT16_C( 19057), INT16_C( 26418), INT16_C( 14147), INT16_C( 27116), INT16_C( 12659), INT16_C( 8948) }, { -INT16_C( 19504), -INT16_C( 20273), -INT16_C( 6152), INT16_C( 448), INT16_C( 10334), -INT16_C( 31624), INT16_C( 27697), INT16_C( 29753) }, { INT16_C( 11603), INT16_C( 4340), -INT16_C( 7560), INT16_C( 464), INT16_C( 16215), -INT16_C( 7816), INT16_C( 25649), INT16_C( 24824) } }, { { UINT16_C(60857), UINT16_C(10932), UINT16_C(58935), UINT16_C(31633), UINT16_C(32030), UINT16_C(37348), UINT16_C(55471), UINT16_C(32692) }, { -INT16_C( 31860), -INT16_C( 31697), -INT16_C( 4246), -INT16_C( 14202), -INT16_C( 489), INT16_C( 18508), -INT16_C( 31126), INT16_C( 9404) }, { INT16_C( 28787), -INT16_C( 21682), -INT16_C( 8105), INT16_C( 29990), INT16_C( 2653), INT16_C( 3078), -INT16_C( 17693), INT16_C( 28555) }, { -INT16_C( 28214), -INT16_C( 32402), -INT16_C( 6558), INT16_C( 19622), INT16_C( 32343), INT16_C( 3142), -INT16_C( 23958), INT16_C( 9407) } }, { { UINT16_C(47677), UINT16_C(42995), UINT16_C(31145), UINT16_C(49520), UINT16_C(48248), UINT16_C(57865), UINT16_C(50754), UINT16_C(46598) }, { INT16_C( 21814), -INT16_C( 29343), -INT16_C( 30923), -INT16_C( 28158), INT16_C( 2449), INT16_C( 29855), INT16_C( 10947), INT16_C( 483) }, { -INT16_C( 10267), -INT16_C( 29016), INT16_C( 6224), -INT16_C( 14257), INT16_C( 22997), INT16_C( 6059), -INT16_C( 20193), INT16_C( 21965) }, { INT16_C( 22004), -INT16_C( 29335), INT16_C( 369), -INT16_C( 30705), INT16_C( 18837), INT16_C( 30123), INT16_C( 13151), INT16_C( 16843) } }, { { UINT16_C(11782), UINT16_C(15331), UINT16_C(58805), UINT16_C(18382), UINT16_C(28142), UINT16_C(45755), UINT16_C(40855), UINT16_C(31923) }, { INT16_C( 23414), -INT16_C( 14837), INT16_C( 23156), INT16_C( 18831), INT16_C( 15027), -INT16_C( 11680), INT16_C( 12011), -INT16_C( 3544) }, { INT16_C( 2908), INT16_C( 4653), -INT16_C( 1040), -INT16_C( 8359), INT16_C( 5224), INT16_C( 145), INT16_C( 17587), INT16_C( 10620) }, { INT16_C( 2910), INT16_C( 527), INT16_C( 23156), -INT16_C( 9825), INT16_C( 14498), -INT16_C( 28128), INT16_C( 20131), INT16_C( 29036) } }, { { UINT16_C(34719), UINT16_C( 5104), UINT16_C(32738), UINT16_C(38236), UINT16_C(48569), UINT16_C(42088), UINT16_C(37099), UINT16_C(18326) }, { -INT16_C( 15205), -INT16_C( 29863), -INT16_C( 19777), INT16_C( 10346), -INT16_C( 1081), INT16_C( 31272), -INT16_C( 23489), -INT16_C( 8284) }, { -INT16_C( 27604), INT16_C( 3826), INT16_C( 20243), -INT16_C( 13149), INT16_C( 2828), -INT16_C( 2192), INT16_C( 1947), INT16_C( 13886) }, { -INT16_C( 27461), INT16_C( 3922), INT16_C( 12979), INT16_C( 18667), -INT16_C( 17531), INT16_C( 29496), -INT16_C( 30917), INT16_C( 30636) } }, { { UINT16_C(39115), UINT16_C(35522), UINT16_C(11338), UINT16_C( 4530), UINT16_C(55848), UINT16_C(26508), UINT16_C(12415), UINT16_C(43846) }, { INT16_C( 14788), -INT16_C( 10311), INT16_C( 23688), -INT16_C( 27485), INT16_C( 4968), INT16_C( 907), -INT16_C( 14054), -INT16_C( 6854) }, { -INT16_C( 927), -INT16_C( 21392), INT16_C( 8744), INT16_C( 20669), INT16_C( 18941), INT16_C( 31928), -INT16_C( 391), INT16_C( 15655) }, { INT16_C( 31968), -INT16_C( 22864), INT16_C( 3624), INT16_C( 20655), INT16_C( 5117), INT16_C( 7096), -INT16_C( 12774), -INT16_C( 19165) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t c = simde_vld1q_s16(test_vec[i].c); simde_int16x8_t r = simde_vbslq_s16(a, b, c); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; } static int test_simde_vbslq_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; int32_t b[4]; int32_t c[4]; int32_t r[4]; } test_vec[] = { { { UINT32_C( 166371924), UINT32_C(3163181267), UINT32_C( 825615355), UINT32_C(4176949107) }, { -INT32_C( 45381750), INT32_C( 849081286), -INT32_C( 1238516410), INT32_C( 1893146651) }, { -INT32_C( 1787182654), INT32_C( 206636049), INT32_C( 1514047463), INT32_C( 1079129526) }, { -INT32_C( 1654930558), INT32_C( 819680450), INT32_C( 2049942854), INT32_C( 1893150871) } }, { { UINT32_C(2185076156), UINT32_C(3619018896), UINT32_C(2978865814), UINT32_C(3290522626) }, { INT32_C( 928619301), -INT32_C( 2042385505), -INT32_C( 387940046), INT32_C( 1932014518) }, { INT32_C( 1643472336), -INT32_C( 751261123), -INT32_C( 1887058547), INT32_C( 1330947626) }, { INT32_C( 1675229540), -INT32_C( 2046186819), -INT32_C( 1367309029), INT32_C( 1265934890) } }, { { UINT32_C(3766922561), UINT32_C(2321992280), UINT32_C( 41043787), UINT32_C(1249221242) }, { INT32_C( 1034644223), -INT32_C( 1575951595), -INT32_C( 751725143), INT32_C( 2099414332) }, { -INT32_C( 1956730573), -INT32_C( 1105804173), -INT32_C( 2050979829), INT32_C( 584070435) }, { INT32_C( 735717491), -INT32_C( 1240414669), -INT32_C( 2018472695), INT32_C( 1755358521) } }, { { UINT32_C(3042933664), UINT32_C( 139948127), UINT32_C(1104906245), UINT32_C(1103101198) }, { INT32_C( 432807334), -INT32_C( 304553247), -INT32_C( 1905092501), INT32_C( 1857045198) }, { INT32_C( 488837310), -INT32_C( 2061141376), INT32_C( 281477122), -INT32_C( 1538160899) }, { INT32_C( 426514878), -INT32_C( 1922012479), INT32_C( 274106371), -INT32_C( 454016257) } }, { { UINT32_C(2226986659), UINT32_C(1802605824), UINT32_C(4244235310), UINT32_C(3832195622) }, { INT32_C( 973180346), INT32_C( 163522055), INT32_C( 605718054), -INT32_C( 1345819892) }, { -INT32_C( 1976269431), INT32_C( 1240835355), -INT32_C( 1337593975), INT32_C( 1402253209) }, { INT32_C( 167873962), INT32_C( 162866203), INT32_C( 605851559), -INT32_C( 1210306659) } }, { { UINT32_C(1133352508), UINT32_C(3813428412), UINT32_C(3741804243), UINT32_C(1536151506) }, { INT32_C( 1877328724), -INT32_C( 222766232), INT32_C( 1671626186), -INT32_C( 390711636) }, { -INT32_C( 1993653300), INT32_C( 1651275919), -INT32_C( 1337822242), -INT32_C( 1777544894) }, { -INT32_C( 878197804), -INT32_C( 500664277), INT32_C( 1665299918), -INT32_C( 863627648) } }, { { UINT32_C(4228313492), UINT32_C(2532228812), UINT32_C(1761185980), UINT32_C(2471604167) }, { -INT32_C( 2112062222), -INT32_C( 739997452), INT32_C( 1032005371), -INT32_C( 1949003785) }, { INT32_C( 1300814465), INT32_C( 1424258712), -INT32_C( 826417913), INT32_C( 2137067149) }, { -INT32_C( 2121500015), -INT32_C( 756758316), -INT32_C( 1366995525), -INT32_C( 277835825) } }, { { UINT32_C(2130804362), UINT32_C( 38987270), UINT32_C( 71292172), UINT32_C(3868136293) }, { -INT32_C( 2043471891), -INT32_C( 1780869234), -INT32_C( 2107402251), INT32_C( 805422245) }, { INT32_C( 1236206402), -INT32_C( 179633687), INT32_C( 1006209750), -INT32_C( 1960736354) }, { INT32_C( 112138184), -INT32_C( 178583569), INT32_C( 1004773334), INT32_C( 690063551) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t c = simde_vld1q_s32(test_vec[i].c); simde_int32x4_t r = simde_vbslq_s32(a, b, c); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; } static int test_simde_vbslq_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; int64_t b[2]; int64_t c[2]; int64_t r[2]; } test_vec[] = { { { UINT64_C( 7795818849416578166), UINT64_C(14966646166231909917) }, { -INT64_C( 1068368472424484962), INT64_C( 91235875088826701) }, { INT64_C( 8539869812187597094), -INT64_C( 8218959441254434116) }, { INT64_C( 8260715013321461014), INT64_C( 91306241623177389) } }, { { UINT64_C(10037573859184794066), UINT64_C( 445386730649992609) }, { INT64_C( 2823088921630323806), INT64_C( 1357614561922374746) }, { -INT64_C( 8225057344842806511), -INT64_C( 7107550753161811081) }, { INT64_C( 549080619647078995), -INT64_C( 7253403027809575338) } }, { { UINT64_C( 8299431632794057116), UINT64_C(13668588338187957598) }, { INT64_C( 5645798521896181471), -INT64_C( 7516787075213618222) }, { -INT64_C( 2087194652701817989), INT64_C( 4083130274454553920) }, { -INT64_C( 4464813712291849473), -INT64_C( 7662274460797005486) } }, { { UINT64_C( 8031428440031016527), UINT64_C(10881312548301380208) }, { -INT64_C( 6153267932422234317), -INT64_C( 7094786458336682760) }, { INT64_C( 6558431740862697207), -INT64_C( 1655133656428673708) }, { INT64_C( 4184182062128864947), -INT64_C( 213981810549875340) } }, { { UINT64_C( 6880441842545013607), UINT64_C( 5495863055540556633) }, { -INT64_C( 7489797051327116856), INT64_C( 290781913680499819) }, { INT64_C( 4686086571514309443), -INT64_C( 4898175920371025562) }, { INT64_C( 1732874077065414976), -INT64_C( 5475509944796465041) } }, { { UINT64_C( 4040712711685793591), UINT64_C( 7994841704351406930) }, { -INT64_C( 9136079489626224371), -INT64_C( 8824449092932591655) }, { -INT64_C( 3949916946509531094), -INT64_C( 664091926299375826) }, { -INT64_C( 4522140434435614451), -INT64_C( 7743434016418128004) } }, { { UINT64_C(15713377964977283188), UINT64_C(13336337789699444033) }, { INT64_C( 6154414252077618217), INT64_C( 4559745962363265127) }, { -INT64_C( 7623221616342781352), INT64_C( 3852329258121026349) }, { INT64_C( 6063184672427610664), INT64_C( 4424286953917616749) } }, { { UINT64_C( 7910921318392145485), UINT64_C( 5289483196180560221) }, { INT64_C( 4909441564937500471), -INT64_C( 3678465080099256654) }, { INT64_C( 7066158834674354333), INT64_C( 701986301675294339) }, { INT64_C( 5048841978297511573), INT64_C( 5257376916863960722) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_int64x2_t c = simde_vld1q_s64(test_vec[i].c); simde_int64x2_t r = simde_vbslq_s64(a, b, c); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; } static int test_simde_vbslq_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t c[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(167), UINT8_C(219), UINT8_C(174), UINT8_C( 9), UINT8_C( 98), UINT8_C(152), UINT8_C(237), UINT8_C(192), UINT8_C(158), UINT8_C( 47), UINT8_C( 49), UINT8_C( 36), UINT8_C(247), UINT8_C(160), UINT8_C( 91), UINT8_C( 79) }, { UINT8_C(128), UINT8_C(172), UINT8_C(173), UINT8_C(235), UINT8_C(222), UINT8_C(206), UINT8_C(217), UINT8_C(159), UINT8_C( 71), UINT8_C(212), UINT8_C(238), UINT8_C(215), UINT8_C(120), UINT8_C(220), UINT8_C(145), UINT8_C( 32) }, { UINT8_C(184), UINT8_C( 63), UINT8_C( 41), UINT8_C( 26), UINT8_C(216), UINT8_C( 22), UINT8_C(218), UINT8_C(118), UINT8_C( 69), UINT8_C( 12), UINT8_C(154), UINT8_C( 60), UINT8_C(172), UINT8_C(245), UINT8_C(139), UINT8_C( 45) }, { UINT8_C(152), UINT8_C(172), UINT8_C(173), UINT8_C( 27), UINT8_C(218), UINT8_C(142), UINT8_C(219), UINT8_C(182), UINT8_C( 71), UINT8_C( 4), UINT8_C(170), UINT8_C( 28), UINT8_C(120), UINT8_C(213), UINT8_C(145), UINT8_C( 32) } }, { { UINT8_C(161), UINT8_C( 57), UINT8_C( 24), UINT8_C(128), UINT8_C( 7), UINT8_C(241), UINT8_C( 31), UINT8_C( 79), UINT8_C(198), UINT8_C( 13), UINT8_C( 38), UINT8_C( 62), UINT8_C(234), UINT8_C(183), UINT8_C( 94), UINT8_C(162) }, { UINT8_C(246), UINT8_C(135), UINT8_C(188), UINT8_C(206), UINT8_C(158), UINT8_C(150), UINT8_C( 68), UINT8_C(227), UINT8_C(162), UINT8_C(222), UINT8_C( 32), UINT8_C( 79), UINT8_C(212), UINT8_C(171), UINT8_C(124), UINT8_C(117) }, { UINT8_C(228), UINT8_C(148), UINT8_C(245), UINT8_C(236), UINT8_C(134), UINT8_C( 21), UINT8_C( 59), UINT8_C( 76), UINT8_C( 34), UINT8_C( 97), UINT8_C(138), UINT8_C( 12), UINT8_C( 24), UINT8_C(233), UINT8_C(174), UINT8_C( 14) }, { UINT8_C(228), UINT8_C(133), UINT8_C(253), UINT8_C(236), UINT8_C(134), UINT8_C(148), UINT8_C( 36), UINT8_C( 67), UINT8_C(162), UINT8_C(108), UINT8_C(168), UINT8_C( 14), UINT8_C(208), UINT8_C(235), UINT8_C(252), UINT8_C( 44) } }, { { UINT8_C(112), UINT8_C(106), UINT8_C(221), UINT8_C( 14), UINT8_C( 1), UINT8_C( 33), UINT8_C(242), UINT8_C(163), UINT8_C( 0), UINT8_C( 18), UINT8_C(242), UINT8_C(212), UINT8_C(189), UINT8_C(110), UINT8_C( 73), UINT8_C(162) }, { UINT8_C( 3), UINT8_C( 63), UINT8_C(142), UINT8_C(137), UINT8_C( 84), UINT8_C(201), UINT8_C(213), UINT8_C(118), UINT8_C( 42), UINT8_C( 95), UINT8_C(131), UINT8_C( 66), UINT8_C( 72), UINT8_C( 49), UINT8_C( 80), UINT8_C(185) }, { UINT8_C(156), UINT8_C( 45), UINT8_C(199), UINT8_C(157), UINT8_C( 79), UINT8_C(185), UINT8_C( 64), UINT8_C( 79), UINT8_C(203), UINT8_C( 51), UINT8_C( 35), UINT8_C(137), UINT8_C(161), UINT8_C(108), UINT8_C( 43), UINT8_C(164) }, { UINT8_C(140), UINT8_C( 47), UINT8_C(142), UINT8_C(153), UINT8_C( 78), UINT8_C(153), UINT8_C(208), UINT8_C(110), UINT8_C(203), UINT8_C( 51), UINT8_C(131), UINT8_C( 73), UINT8_C( 8), UINT8_C( 32), UINT8_C( 98), UINT8_C(164) } }, { { UINT8_C(171), UINT8_C(185), UINT8_C( 45), UINT8_MAX, UINT8_C(130), UINT8_C( 2), UINT8_C(118), UINT8_C(172), UINT8_C( 98), UINT8_C(249), UINT8_C(238), UINT8_C(170), UINT8_C( 42), UINT8_C( 62), UINT8_C( 99), UINT8_C(198) }, { UINT8_C(108), UINT8_C( 43), UINT8_C( 99), UINT8_C(187), UINT8_C(228), UINT8_C(164), UINT8_C( 10), UINT8_C(176), UINT8_C(215), UINT8_C( 45), UINT8_C( 57), UINT8_C(120), UINT8_C(153), UINT8_C(100), UINT8_C( 29), UINT8_C( 69) }, { UINT8_C( 29), UINT8_C( 74), UINT8_C( 68), UINT8_C(159), UINT8_C( 77), UINT8_C(186), UINT8_C( 75), UINT8_C(175), UINT8_C(179), UINT8_C( 57), UINT8_C( 89), UINT8_C(222), UINT8_C(119), UINT8_C(189), UINT8_C(164), UINT8_C(227) }, { UINT8_C( 60), UINT8_C(107), UINT8_C( 97), UINT8_C(187), UINT8_C(205), UINT8_C(184), UINT8_C( 11), UINT8_C(163), UINT8_C(211), UINT8_C( 41), UINT8_C( 57), UINT8_C(124), UINT8_C( 93), UINT8_C(165), UINT8_C(133), UINT8_C(101) } }, { { UINT8_C(232), UINT8_C( 8), UINT8_C(158), UINT8_C(204), UINT8_C(172), UINT8_C(168), UINT8_C(124), UINT8_C(131), UINT8_C(213), UINT8_C(181), UINT8_C(251), UINT8_C(111), UINT8_C( 25), UINT8_C( 24), UINT8_C(180), UINT8_C( 54) }, { UINT8_C( 99), UINT8_C(248), UINT8_C(213), UINT8_C(176), UINT8_C(179), UINT8_C( 32), UINT8_C( 95), UINT8_C(102), UINT8_C( 89), UINT8_C(184), UINT8_C( 68), UINT8_C(209), UINT8_C(117), UINT8_C(233), UINT8_C(180), UINT8_C( 93) }, { UINT8_C(241), UINT8_C( 83), UINT8_C( 42), UINT8_C(157), UINT8_C(251), UINT8_C(166), UINT8_C( 32), UINT8_C(209), UINT8_C( 92), UINT8_C( 27), UINT8_C( 64), UINT8_C(117), UINT8_C( 52), UINT8_C(244), UINT8_C(172), UINT8_C(151) }, { UINT8_C(113), UINT8_C( 91), UINT8_C(180), UINT8_C(145), UINT8_C(243), UINT8_C( 38), UINT8_C( 92), UINT8_C( 82), UINT8_C( 89), UINT8_C(186), UINT8_C( 64), UINT8_C( 81), UINT8_C( 53), UINT8_C(236), UINT8_C(188), UINT8_C(149) } }, { { UINT8_C(236), UINT8_C(129), UINT8_C( 71), UINT8_C(159), UINT8_C(162), UINT8_C(166), UINT8_C( 6), UINT8_C(251), UINT8_C( 94), UINT8_C( 74), UINT8_C(204), UINT8_C(212), UINT8_C( 51), UINT8_C(129), UINT8_C( 49), UINT8_C( 36) }, { UINT8_C(212), UINT8_C( 91), UINT8_C(193), UINT8_C(207), UINT8_C( 2), UINT8_C(225), UINT8_C(160), UINT8_C( 94), UINT8_C(253), UINT8_C(224), UINT8_C(211), UINT8_C( 49), UINT8_C(212), UINT8_C(127), UINT8_C(200), UINT8_C(193) }, { UINT8_C( 1), UINT8_C( 15), UINT8_C( 96), UINT8_C(163), UINT8_C(181), UINT8_C(102), UINT8_C(158), UINT8_C( 19), UINT8_C(177), UINT8_C(107), UINT8_C(231), UINT8_C(228), UINT8_C(236), UINT8_C( 25), UINT8_C( 9), UINT8_C(192) }, { UINT8_C(197), UINT8_C( 15), UINT8_C( 97), UINT8_C(175), UINT8_C( 23), UINT8_C(224), UINT8_C(152), UINT8_C( 90), UINT8_C(253), UINT8_C( 97), UINT8_C(227), UINT8_C( 48), UINT8_C(220), UINT8_C( 25), UINT8_C( 8), UINT8_C(192) } }, { { UINT8_C(116), UINT8_C(202), UINT8_C(143), UINT8_C(118), UINT8_C(172), UINT8_C( 48), UINT8_C(212), UINT8_C(169), UINT8_C( 16), UINT8_C(168), UINT8_C(218), UINT8_C(229), UINT8_C( 39), UINT8_C(162), UINT8_C(166), UINT8_C( 40) }, { UINT8_C(177), UINT8_C( 6), UINT8_C(203), UINT8_C(102), UINT8_C(109), UINT8_C(106), UINT8_C(121), UINT8_C( 30), UINT8_C(213), UINT8_C( 97), UINT8_C( 2), UINT8_C(193), UINT8_C(122), UINT8_C( 11), UINT8_C(129), UINT8_C(238) }, { UINT8_C(214), UINT8_C( 16), UINT8_C(101), UINT8_C(130), UINT8_C( 64), UINT8_C( 57), UINT8_C( 43), UINT8_C( 81), UINT8_C(225), UINT8_C( 5), UINT8_C( 54), UINT8_C( 9), UINT8_C(167), UINT8_C(220), UINT8_C( 49), UINT8_C( 88) }, { UINT8_C(178), UINT8_C( 18), UINT8_C(235), UINT8_C(230), UINT8_C(108), UINT8_C( 41), UINT8_C(123), UINT8_C( 88), UINT8_C(241), UINT8_C( 37), UINT8_C( 38), UINT8_C(201), UINT8_C(162), UINT8_C( 94), UINT8_C(145), UINT8_C(120) } }, { { UINT8_C(226), UINT8_C(253), UINT8_C(190), UINT8_C( 79), UINT8_C(103), UINT8_C( 55), UINT8_C(109), UINT8_C( 60), UINT8_C(152), UINT8_C(112), UINT8_C(253), UINT8_C( 18), UINT8_C(123), UINT8_C(126), UINT8_C( 1), UINT8_C( 81) }, { UINT8_C(142), UINT8_C(102), UINT8_C(211), UINT8_C(207), UINT8_C(159), UINT8_C(254), UINT8_C( 32), UINT8_C(129), UINT8_C( 3), UINT8_C( 86), UINT8_C(138), UINT8_C(170), UINT8_C( 50), UINT8_C(187), UINT8_C( 2), UINT8_C( 20) }, { UINT8_C(184), UINT8_C(192), UINT8_C(100), UINT8_C( 31), UINT8_C(248), UINT8_C(209), UINT8_C( 91), UINT8_C(144), UINT8_C( 65), UINT8_C( 88), UINT8_C(163), UINT8_C(189), UINT8_C(214), UINT8_C(164), UINT8_C( 14), UINT8_C(101) }, { UINT8_C(154), UINT8_C(100), UINT8_C(210), UINT8_C( 95), UINT8_C(159), UINT8_C(246), UINT8_C( 50), UINT8_C(128), UINT8_C( 65), UINT8_C( 88), UINT8_C(138), UINT8_C(175), UINT8_C(182), UINT8_C(186), UINT8_C( 14), UINT8_C( 52) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t c = simde_vld1q_u8(test_vec[i].c); simde_uint8x16_t r = simde_vbslq_u8(a, b, c); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; } static int test_simde_vbslq_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint16_t b[8]; uint16_t c[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(46061), UINT16_C(22274), UINT16_C(54714), UINT16_C(34948), UINT16_C(27294), UINT16_C(41079), UINT16_C(30227), UINT16_C(13653) }, { UINT16_C(23669), UINT16_C( 7091), UINT16_C(40486), UINT16_C( 9309), UINT16_C(42650), UINT16_C(59973), UINT16_C(25501), UINT16_C(35358) }, { UINT16_C( 8214), UINT16_C(53473), UINT16_C(26358), UINT16_C(37976), UINT16_C(53200), UINT16_C(58420), UINT16_C(35398), UINT16_C(47897) }, { UINT16_C( 4215), UINT16_C(37859), UINT16_C(46694), UINT16_C( 5212), UINT16_C(42970), UINT16_C(58437), UINT16_C(59989), UINT16_C(35356) } }, { { UINT16_C(52454), UINT16_C( 3542), UINT16_C(13163), UINT16_C( 1329), UINT16_C(30425), UINT16_C(30447), UINT16_C( 3802), UINT16_C(61440) }, { UINT16_C(57902), UINT16_C( 9409), UINT16_C( 6472), UINT16_C( 6328), UINT16_C(60905), UINT16_C(12284), UINT16_C( 5495), UINT16_C(24042) }, { UINT16_C(49378), UINT16_C(19818), UINT16_C(40180), UINT16_C(52562), UINT16_C(16914), UINT16_C(60484), UINT16_C(17488), UINT16_C(32477) }, { UINT16_C(49190), UINT16_C(17640), UINT16_C(40412), UINT16_C(51314), UINT16_C(25803), UINT16_C(44780), UINT16_C(17490), UINT16_C(24285) } }, { { UINT16_C(40486), UINT16_C(28323), UINT16_C(23479), UINT16_C(41095), UINT16_C(33608), UINT16_C(49103), UINT16_C(47513), UINT16_C(31517) }, { UINT16_C(34682), UINT16_C(28360), UINT16_C( 6691), UINT16_C(13883), UINT16_C(32604), UINT16_C(44066), UINT16_C(65476), UINT16_C(59947) }, { UINT16_C(52893), UINT16_C(21849), UINT16_C(57385), UINT16_C(29429), UINT16_C(50531), UINT16_C(64561), UINT16_C(20094), UINT16_C(63607) }, { UINT16_C(50875), UINT16_C(32728), UINT16_C(47659), UINT16_C(29299), UINT16_C(18283), UINT16_C(60466), UINT16_C(65510), UINT16_C(60011) } }, { { UINT16_C(16342), UINT16_C(63846), UINT16_C(41562), UINT16_C(46639), UINT16_C(21025), UINT16_C(58723), UINT16_C(36433), UINT16_C(61392) }, { UINT16_C(10588), UINT16_C(34116), UINT16_C(14601), UINT16_C(27895), UINT16_C(10750), UINT16_C(32105), UINT16_C(57463), UINT16_C(19829) }, { UINT16_C(56352), UINT16_C(31303), UINT16_C(30334), UINT16_C(40752), UINT16_C(37832), UINT16_C( 6789), UINT16_C(21793), UINT16_C(32009) }, { UINT16_C(59764), UINT16_C(33605), UINT16_C(29740), UINT16_C(11575), UINT16_C(33256), UINT16_C(32741), UINT16_C(53617), UINT16_C(23897) } }, { { UINT16_C(19838), UINT16_C(34563), UINT16_C(64134), UINT16_C(34291), UINT16_C(23587), UINT16_C(39682), UINT16_C(30525), UINT16_C(24040) }, { UINT16_C(12115), UINT16_C(53719), UINT16_C( 1958), UINT16_C(28273), UINT16_C(63131), UINT16_C(48264), UINT16_C(37195), UINT16_C(51514) }, { UINT16_C(15838), UINT16_C(25936), UINT16_C(17207), UINT16_C(23530), UINT16_C(60576), UINT16_C(56822), UINT16_C(56931), UINT16_C(46906) }, { UINT16_C(15826), UINT16_C(57683), UINT16_C( 951), UINT16_C(24185), UINT16_C(62595), UINT16_C(56564), UINT16_C(39243), UINT16_C(60218) } }, { { UINT16_C( 4366), UINT16_C(46216), UINT16_C(63768), UINT16_C(45858), UINT16_C(44015), UINT16_C(14960), UINT16_C(43580), UINT16_C( 6915) }, { UINT16_C(21479), UINT16_C( 7808), UINT16_C(27287), UINT16_C(14201), UINT16_C(28502), UINT16_C(47380), UINT16_C(20046), UINT16_C(23664) }, { UINT16_C(63839), UINT16_C(30480), UINT16_C(13042), UINT16_C(57899), UINT16_C(39901), UINT16_C( 6684), UINT16_C( 8261), UINT16_C(11317) }, { UINT16_C(63831), UINT16_C(22416), UINT16_C(27378), UINT16_C(29481), UINT16_C(15190), UINT16_C(14364), UINT16_C( 2637), UINT16_C(15412) } }, { { UINT16_C(46451), UINT16_C( 2634), UINT16_C(50207), UINT16_C(30017), UINT16_C(21811), UINT16_C(33070), UINT16_C(40867), UINT16_C( 733) }, { UINT16_C(60824), UINT16_C(35450), UINT16_C(42272), UINT16_C(64876), UINT16_C(35136), UINT16_C(34071), UINT16_C(19625), UINT16_C( 7345) }, { UINT16_C(64257), UINT16_C( 8231), UINT16_C(26815), UINT16_C(62357), UINT16_C(50366), UINT16_C(24948), UINT16_C(21091), UINT16_C(64356) }, { UINT16_C(61200), UINT16_C(10863), UINT16_C(44192), UINT16_C(63444), UINT16_C(33164), UINT16_C(57686), UINT16_C(19681), UINT16_C(63921) } }, { { UINT16_C(56895), UINT16_C(24453), UINT16_C(62083), UINT16_C(50013), UINT16_C(29819), UINT16_C( 9288), UINT16_C(63937), UINT16_C(49728) }, { UINT16_C(26612), UINT16_C(46307), UINT16_C(30928), UINT16_C(36519), UINT16_C( 6972), UINT16_C(40943), UINT16_C(21357), UINT16_C(44442) }, { UINT16_C( 8241), UINT16_C(46092), UINT16_C(26898), UINT16_C(36215), UINT16_C(49118), UINT16_C(40881), UINT16_C(61880), UINT16_C(44385) }, { UINT16_C(26164), UINT16_C(46217), UINT16_C(31120), UINT16_C(36391), UINT16_C(39868), UINT16_C(40953), UINT16_C(20857), UINT16_C(44321) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t c = simde_vld1q_u16(test_vec[i].c); simde_uint16x8_t r = simde_vbslq_u16(a, b, c); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_vbslq_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint32_t b[4]; uint32_t c[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(2860368209), UINT32_C( 846600018), UINT32_C(1097333921), UINT32_C(3276145535) }, { UINT32_C( 238363688), UINT32_C(4256380213), UINT32_C(2983547187), UINT32_C(1925569056) }, { UINT32_C(3994829467), UINT32_C(4213215834), UINT32_C( 222070926), UINT32_C(3033629323) }, { UINT32_C(1312096906), UINT32_C(4180841752), UINT32_C( 224219183), UINT32_C(1993704096) } }, { { UINT32_C(3703703206), UINT32_C(1859745339), UINT32_C(3894390471), UINT32_C( 609936776) }, { UINT32_C(2165470759), UINT32_C(2541498888), UINT32_C(1185200314), UINT32_C(3791287611) }, { UINT32_C(3082665084), UINT32_C(4196832818), UINT32_C(3454158149), UINT32_C(1391541290) }, { UINT32_C(2738863742), UINT32_C(2524885512), UINT32_C(1172629890), UINT32_C(1929084202) } }, { { UINT32_C(3134391218), UINT32_C(4031860790), UINT32_C(1127675400), UINT32_C(3877974123) }, { UINT32_C( 530572013), UINT32_C(3189359993), UINT32_C( 898366218), UINT32_C(3917970743) }, { UINT32_C(3064224384), UINT32_C(3014129066), UINT32_C(1475796459), UINT32_C(4215151374) }, { UINT32_C( 515332768), UINT32_C(3015173560), UINT32_C( 365100011), UINT32_C(4179573543) } }, { { UINT32_C(1998249470), UINT32_C(2905945250), UINT32_C(1742913583), UINT32_C(3192940861) }, { UINT32_C(1853158595), UINT32_C(3575716842), UINT32_C( 120330233), UINT32_C( 822242099) }, { UINT32_C(3953663048), UINT32_C(2157501776), UINT32_C(3689380509), UINT32_C(2795059171) }, { UINT32_C(4004566210), UINT32_C(2242501106), UINT32_C(2670017209), UINT32_C( 814317555) } }, { { UINT32_C( 370412844), UINT32_C( 585839913), UINT32_C(2150176845), UINT32_C(3417385859) }, { UINT32_C(2562087239), UINT32_C(3541585462), UINT32_C(2880372680), UINT32_C(1649559350) }, { UINT32_C(2121819733), UINT32_C(3919602844), UINT32_C(4285122940), UINT32_C(1003100916) }, { UINT32_C(2021419861), UINT32_C(3406316724), UINT32_C(4285061496), UINT32_C(1918505846) } }, { { UINT32_C(2849210739), UINT32_C(2558323663), UINT32_C( 558050282), UINT32_C(3347289458) }, { UINT32_C(2554723580), UINT32_C(3699500384), UINT32_C(2732321454), UINT32_C(2011079940) }, { UINT32_C(4129337638), UINT32_C(2274270365), UINT32_C( 984142279), UINT32_C(1661021287) }, { UINT32_C(3730944116), UINT32_C(2676159824), UINT32_C( 988543663), UINT32_C(1736617221) } }, { { UINT32_C(2298169128), UINT32_C(3680795692), UINT32_C(1786593126), UINT32_C( 199318500) }, { UINT32_C(2852192525), UINT32_C(1697746845), UINT32_C(3349142112), UINT32_C( 774545414) }, { UINT32_C( 347481575), UINT32_C( 133110433), UINT32_C(1030843481), UINT32_C(3578286792) }, { UINT32_C(2617582031), UINT32_C(1168838285), UINT32_C(1461541497), UINT32_C(3727163404) } }, { { UINT32_C(4051650899), UINT32_C( 961982680), UINT32_C(2415981962), UINT32_C(2109614997) }, { UINT32_C(4069618768), UINT32_C(3891888270), UINT32_C(3022351340), UINT32_C( 294219197) }, { UINT32_C(2399275191), UINT32_C(1137203385), UINT32_C(3805595981), UINT32_C(1147114228) }, { UINT32_C(4262543604), UINT32_C(1675151529), UINT32_C(4074006989), UINT32_C( 298433013) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t c = simde_vld1q_u32(test_vec[i].c); simde_uint32x4_t r = simde_vbslq_u32(a, b, c); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_vbslq_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; uint64_t b[2]; uint64_t c[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C( 3591810382812026283), UINT64_C(16405937380248416137) }, { UINT64_C(11295520154613915977), UINT64_C( 2687092596342288586) }, { UINT64_C(10466732805528575117), UINT64_C( 2792016118067119293) }, { UINT64_C(10430880479856511245), UINT64_C( 2673514040022294716) } }, { { UINT64_C( 5815810152152417884), UINT64_C( 5402170822885590360) }, { UINT64_C( 8670164562858372537), UINT64_C( 8710828213637800160) }, { UINT64_C( 5453936056050125530), UINT64_C(15677897956199875467) }, { UINT64_C( 6561900733515582618), UINT64_C(15700414826102440643) } }, { { UINT64_C( 7617817368925498755), UINT64_C(18041148664896162764) }, { UINT64_C(13857868751251894291), UINT64_C( 560307298380639949) }, { UINT64_C( 1764562557693419657), UINT64_C(13844451922523693832) }, { UINT64_C( 5789685480717017099), UINT64_C( 172929698024237772) } }, { { UINT64_C( 5928170346728815439), UINT64_C( 8291133416724713093) }, { UINT64_C( 6541763587432928869), UINT64_C( 8580099633672538534) }, { UINT64_C( 8500334219694430239), UINT64_C(11667858224492459662) }, { UINT64_C( 8643326235096633941), UINT64_C(17581080137045276814) } }, { { UINT64_C( 1986681675388777189), UINT64_C(15457621344735073509) }, { UINT64_C( 5745874869769961561), UINT64_C(12720051885852540072) }, { UINT64_C(15832257110833879334), UINT64_C(18310349266614756749) }, { UINT64_C(14678763632229583171), UINT64_C(13303612972326073768) } }, { { UINT64_C(12825130274157729517), UINT64_C(11365708282709001280) }, { UINT64_C(12305505563839144948), UINT64_C( 556487138878663957) }, { UINT64_C( 4997501370106183841), UINT64_C( 38856792151748725) }, { UINT64_C(16485058153964208868), UINT64_C( 412376349718983733) } }, { { UINT64_C(14444644566615901103), UINT64_C( 1188453801116525660) }, { UINT64_C( 6522642610181976892), UINT64_C(10452805128511018500) }, { UINT64_C( 365304665020889493), UINT64_C( 915913873276310582) }, { UINT64_C( 5549935201716288316), UINT64_C( 2058095493828826150) } }, { { UINT64_C( 9758687427988123438), UINT64_C( 2025395558509098408) }, { UINT64_C( 9245176770987391135), UINT64_C(17431423991996139787) }, { UINT64_C(12705905830843921738), UINT64_C( 3515717642809502503) }, { UINT64_C(12708439114211752014), UINT64_C( 3515291040853510927) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_uint64x2_t c = simde_vld1q_u64(test_vec[i].c); simde_uint64x2_t r = simde_vbslq_u64(a, b, c); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vbsl_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vbsl_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vbsl_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vbsl_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vbsl_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vbsl_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vbsl_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vbsl_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vbsl_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vbsl_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vbslq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vbslq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vbslq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vbslq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vbslq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vbslq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vbslq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vbslq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vbslq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vbslq_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/cagt.c000066400000000000000000000320071400333146700164030ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN cagt #include "test-neon.h" #include "../../../simde/arm/neon/cagt.h" static int test_simde_vcagts_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a; simde_float32 b; uint32_t r; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 0.52), UINT32_C( 0) }, { SIMDE_FLOAT32_C( 705.02), SIMDE_MATH_NANF, UINT32_C( 0) }, { SIMDE_MATH_NANF, SIMDE_MATH_NANF, UINT32_C( 0) }, #endif { SIMDE_FLOAT32_C( 8.79), SIMDE_FLOAT32_C( 792.83), UINT32_C( 0) }, { SIMDE_FLOAT32_C( -399.97), SIMDE_FLOAT32_C( -256.84), UINT32_MAX }, { SIMDE_FLOAT32_C( 231.75), SIMDE_FLOAT32_C( -411.54), UINT32_C( 0) }, { SIMDE_FLOAT32_C( 864.59), SIMDE_FLOAT32_C( -881.95), UINT32_C( 0) }, { SIMDE_FLOAT32_C( -814.20), SIMDE_FLOAT32_C( 479.81), UINT32_MAX }, { SIMDE_FLOAT32_C( 263.32), SIMDE_FLOAT32_C( -797.51), UINT32_C( 0) }, { SIMDE_FLOAT32_C( 321.47), SIMDE_FLOAT32_C( -74.97), UINT32_MAX }, { SIMDE_FLOAT32_C( -57.92), SIMDE_FLOAT32_C( 535.57), UINT32_C( 0) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint32_t r = simde_vcagts_f32(test_vec[i].a, test_vec[i].b); simde_assert_equal_u32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32_t a = simde_test_codegen_random_f32(-1000.0f, 1000.0f); simde_float32_t b = simde_test_codegen_random_f32(-1000.0f, 1000.0f); uint32_t r = simde_vcagts_f32(a, b); simde_test_codegen_write_f32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_f32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_u32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcagtd_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a; simde_float64 b; uint64_t r; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 0.52), UINT64_C( 0) }, { SIMDE_FLOAT64_C( 705.02), SIMDE_MATH_NAN, UINT64_C( 0) }, { SIMDE_MATH_NAN, SIMDE_MATH_NAN, UINT64_C( 0) }, #endif { SIMDE_FLOAT64_C( -111.66), SIMDE_FLOAT64_C( -149.68), UINT64_C( 0) }, { SIMDE_FLOAT64_C( -365.17), SIMDE_FLOAT64_C( -219.70), UINT64_MAX }, { SIMDE_FLOAT64_C( -45.32), SIMDE_FLOAT64_C( 606.55), UINT64_C( 0) }, { SIMDE_FLOAT64_C( -324.50), SIMDE_FLOAT64_C( -332.43), UINT64_C( 0) }, { SIMDE_FLOAT64_C( 611.77), SIMDE_FLOAT64_C( 425.54), UINT64_MAX }, { SIMDE_FLOAT64_C( 910.11), SIMDE_FLOAT64_C( 648.44), UINT64_MAX }, { SIMDE_FLOAT64_C( 572.56), SIMDE_FLOAT64_C( -409.05), UINT64_MAX }, { SIMDE_FLOAT64_C( 265.81), SIMDE_FLOAT64_C( -418.65), UINT64_C( 0) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint64_t r = simde_vcagtd_f64(test_vec[i].a, test_vec[i].b); simde_assert_equal_u64(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64_t a = simde_test_codegen_random_f64(-1000.0, 1000.0); simde_float64_t b = simde_test_codegen_random_f64(-1000.0, 1000.0); uint64_t r = simde_vcagtd_f64(a, b); simde_test_codegen_write_f64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_f64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_u64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcagt_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[2]; simde_float32 b[2]; uint32_t r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( 311.69), SIMDE_FLOAT32_C( -932.68) }, { SIMDE_FLOAT32_C( 98.33), SIMDE_FLOAT32_C( -552.98) }, { UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 959.61), SIMDE_FLOAT32_C( 617.75) }, { SIMDE_FLOAT32_C( -197.11), SIMDE_FLOAT32_C( 562.98) }, { UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 468.98), SIMDE_FLOAT32_C( -916.49) }, { SIMDE_FLOAT32_C( 965.35), SIMDE_FLOAT32_C( 700.25) }, { UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( -647.13), SIMDE_FLOAT32_C( -147.35) }, { SIMDE_FLOAT32_C( -117.68), SIMDE_FLOAT32_C( -241.37) }, { UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -664.10), SIMDE_FLOAT32_C( -976.12) }, { SIMDE_FLOAT32_C( 874.22), SIMDE_FLOAT32_C( -12.94) }, { UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( 25.04), SIMDE_FLOAT32_C( -125.75) }, { SIMDE_FLOAT32_C( 212.15), SIMDE_FLOAT32_C( 782.89) }, { UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 561.17), SIMDE_FLOAT32_C( 217.87) }, { SIMDE_FLOAT32_C( -238.74), SIMDE_FLOAT32_C( 679.32) }, { UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -965.46), SIMDE_FLOAT32_C( -738.96) }, { SIMDE_FLOAT32_C( -711.74), SIMDE_FLOAT32_C( 346.23) }, { UINT32_MAX, UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_uint32x2_t r = simde_vcagt_f32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; } static int test_simde_vcagt_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[1]; simde_float64 b[1]; uint64_t r[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( 85.26) }, { SIMDE_FLOAT64_C( 122.65) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -500.89) }, { SIMDE_FLOAT64_C( 936.69) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 594.89) }, { SIMDE_FLOAT64_C( 788.77) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 543.70) }, { SIMDE_FLOAT64_C( -150.09) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( -875.02) }, { SIMDE_FLOAT64_C( 442.69) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( 673.76) }, { SIMDE_FLOAT64_C( 217.24) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( 789.39) }, { SIMDE_FLOAT64_C( 718.78) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( -511.44) }, { SIMDE_FLOAT64_C( 752.01) }, { UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_t b = simde_vld1_f64(test_vec[i].b); simde_uint64x1_t r = simde_vcagt_f64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; } static int test_simde_vcagtq_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; simde_float32 b[4]; uint32_t r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 880.25), SIMDE_FLOAT32_C( 497.37), SIMDE_FLOAT32_C( 188.18), SIMDE_FLOAT32_C( -214.92) }, { SIMDE_FLOAT32_C( -292.63), SIMDE_FLOAT32_C( 165.21), SIMDE_FLOAT32_C( -507.32), SIMDE_FLOAT32_C( -554.07) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 21.58), SIMDE_FLOAT32_C( -187.66), SIMDE_FLOAT32_C( 52.34), SIMDE_FLOAT32_C( 522.72) }, { SIMDE_FLOAT32_C( 805.10), SIMDE_FLOAT32_C( -357.26), SIMDE_FLOAT32_C( 451.59), SIMDE_FLOAT32_C( 744.08) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 113.67), SIMDE_FLOAT32_C( 334.71), SIMDE_FLOAT32_C( 489.01), SIMDE_FLOAT32_C( 347.72) }, { SIMDE_FLOAT32_C( -991.50), SIMDE_FLOAT32_C( -625.74), SIMDE_FLOAT32_C( -356.50), SIMDE_FLOAT32_C( 848.94) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 90.46), SIMDE_FLOAT32_C( 858.14), SIMDE_FLOAT32_C( -123.29), SIMDE_FLOAT32_C( -917.86) }, { SIMDE_FLOAT32_C( -788.14), SIMDE_FLOAT32_C( 739.22), SIMDE_FLOAT32_C( 572.18), SIMDE_FLOAT32_C( -907.90) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( 236.59), SIMDE_FLOAT32_C( -239.64), SIMDE_FLOAT32_C( -122.81), SIMDE_FLOAT32_C( 943.97) }, { SIMDE_FLOAT32_C( 925.57), SIMDE_FLOAT32_C( 369.86), SIMDE_FLOAT32_C( -610.11), SIMDE_FLOAT32_C( -52.85) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( -817.80), SIMDE_FLOAT32_C( 442.23), SIMDE_FLOAT32_C( -530.12), SIMDE_FLOAT32_C( 987.30) }, { SIMDE_FLOAT32_C( -915.03), SIMDE_FLOAT32_C( 921.46), SIMDE_FLOAT32_C( 731.38), SIMDE_FLOAT32_C( 198.64) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( 256.18), SIMDE_FLOAT32_C( 220.39), SIMDE_FLOAT32_C( -453.64), SIMDE_FLOAT32_C( 264.67) }, { SIMDE_FLOAT32_C( 594.64), SIMDE_FLOAT32_C( 189.87), SIMDE_FLOAT32_C( 113.62), SIMDE_FLOAT32_C( -314.89) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 48.01), SIMDE_FLOAT32_C( 990.32), SIMDE_FLOAT32_C( -232.76), SIMDE_FLOAT32_C( 259.86) }, { SIMDE_FLOAT32_C( 729.55), SIMDE_FLOAT32_C( -660.58), SIMDE_FLOAT32_C( 351.97), SIMDE_FLOAT32_C( -33.86) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_uint32x4_t r = simde_vcagtq_f32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_vcagtq_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[2]; simde_float64 b[2]; uint64_t r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 230.31), SIMDE_FLOAT64_C( -618.28) }, { SIMDE_FLOAT64_C( 180.85), SIMDE_FLOAT64_C( 444.53) }, { UINT64_MAX, UINT64_MAX } }, { { SIMDE_FLOAT64_C( 217.53), SIMDE_FLOAT64_C( -615.67) }, { SIMDE_FLOAT64_C( 629.35), SIMDE_FLOAT64_C( -484.75) }, { UINT64_C( 0), UINT64_MAX } }, { { SIMDE_FLOAT64_C( 170.44), SIMDE_FLOAT64_C( -454.09) }, { SIMDE_FLOAT64_C( 330.58), SIMDE_FLOAT64_C( 520.13) }, { UINT64_C( 0), UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -764.76), SIMDE_FLOAT64_C( -650.22) }, { SIMDE_FLOAT64_C( -78.50), SIMDE_FLOAT64_C( 683.38) }, { UINT64_MAX, UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -812.10), SIMDE_FLOAT64_C( 401.95) }, { SIMDE_FLOAT64_C( -416.07), SIMDE_FLOAT64_C( 983.29) }, { UINT64_MAX, UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -496.16), SIMDE_FLOAT64_C( 249.85) }, { SIMDE_FLOAT64_C( 57.13), SIMDE_FLOAT64_C( -909.73) }, { UINT64_MAX, UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -537.53), SIMDE_FLOAT64_C( 707.06) }, { SIMDE_FLOAT64_C( -45.84), SIMDE_FLOAT64_C( -807.07) }, { UINT64_MAX, UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -27.41), SIMDE_FLOAT64_C( 231.88) }, { SIMDE_FLOAT64_C( -442.67), SIMDE_FLOAT64_C( -797.10) }, { UINT64_C( 0), UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_uint64x2_t r = simde_vcagtq_f64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vcagts_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcagtd_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcagt_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcagt_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcagtq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcagtq_f64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/ceq.c000066400000000000000000002426051400333146700162440ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN ceq #include "test-neon.h" #include "../../../simde/arm/neon/ceq.h" static int test_simde_vceqs_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a; simde_float32 b; uint32_t r; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 3.14), 0 }, { SIMDE_FLOAT32_C( 1.62), SIMDE_MATH_NANF, 0 }, { SIMDE_MATH_NANF, SIMDE_MATH_NANF, 0 }, #endif { SIMDE_FLOAT32_C( -56.05), SIMDE_FLOAT32_C( -39.63), UINT32_C( 0) }, { SIMDE_FLOAT32_C( 841.90), SIMDE_FLOAT32_C( 841.90), UINT32_MAX }, { SIMDE_FLOAT32_C( -705.13), SIMDE_FLOAT32_C( -696.24), UINT32_C( 0) }, { SIMDE_FLOAT32_C( -60.94), SIMDE_FLOAT32_C( -60.94), UINT32_MAX }, { SIMDE_FLOAT32_C( 769.23), SIMDE_FLOAT32_C( -998.79), UINT32_C( 0) }, { SIMDE_FLOAT32_C( -11.03), SIMDE_FLOAT32_C( -11.03), UINT32_MAX }, { SIMDE_FLOAT32_C( 173.25), SIMDE_FLOAT32_C( 724.98), UINT32_C( 0) }, { SIMDE_FLOAT32_C( 709.16), SIMDE_FLOAT32_C( 709.16), UINT32_MAX } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint32_t r = simde_vceqs_f32(test_vec[i].a, test_vec[i].b); simde_assert_equal_u32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32_t a = simde_test_codegen_random_f32(-1000.0f, 1000.0f); simde_float32_t b = (i & 1) ? a : simde_test_codegen_random_f32(-1000.0f, 1000.0f); uint32_t r = simde_vceqs_f32(a, b); simde_test_codegen_write_f32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_f32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_u32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vceqd_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a; simde_float64 b; uint64_t r; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 3.14), 0 }, { SIMDE_FLOAT64_C( 1.62), SIMDE_MATH_NAN, 0 }, { SIMDE_MATH_NAN, SIMDE_MATH_NAN, 0 }, #endif { SIMDE_FLOAT64_C( -577.65), SIMDE_FLOAT64_C( -703.85), UINT64_C( 0) }, { SIMDE_FLOAT64_C( 325.12), SIMDE_FLOAT64_C( 325.12), UINT64_MAX }, { SIMDE_FLOAT64_C( -527.91), SIMDE_FLOAT64_C( -305.80), UINT64_C( 0) }, { SIMDE_FLOAT64_C( -646.92), SIMDE_FLOAT64_C( -646.92), UINT64_MAX }, { SIMDE_FLOAT64_C( 438.65), SIMDE_FLOAT64_C( -673.26), UINT64_C( 0) }, { SIMDE_FLOAT64_C( -155.60), SIMDE_FLOAT64_C( -155.60), UINT64_MAX }, { SIMDE_FLOAT64_C( -252.03), SIMDE_FLOAT64_C( -719.74), UINT64_C( 0) }, { SIMDE_FLOAT64_C( 6.27), SIMDE_FLOAT64_C( 6.27), UINT64_MAX }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint64_t r = simde_vceqd_f64(test_vec[i].a, test_vec[i].b); simde_assert_equal_u64(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64_t a = simde_test_codegen_random_f64(-1000.0, 1000.0); simde_float64_t b = (i & 1) ? a : simde_test_codegen_random_f64(-1000.0, 1000.0); uint64_t r = simde_vceqd_f64(a, b); simde_test_codegen_write_f64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_f64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_u64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vceqd_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a; int64_t b; uint64_t r; } test_vec[] = { { -INT64_C( 3883316718465380623), -INT64_C( 6525049216541933883), UINT64_C( 0) }, { INT64_C( 144722424870567505), INT64_C( 144722424870567505), UINT64_MAX }, { -INT64_C( 4864164897981339177), INT64_C( 9142243167308417297), UINT64_C( 0) }, { -INT64_C( 6739643088005172605), -INT64_C( 6739643088005172605), UINT64_MAX }, { -INT64_C( 1746559855114065800), -INT64_C( 4942034653442889758), UINT64_C( 0) }, { INT64_C( 6601627071088235366), INT64_C( 6601627071088235366), UINT64_MAX }, { -INT64_C( 7451588071995398749), INT64_C( 7531456216826416525), UINT64_C( 0) }, { INT64_C( 2677530438617359042), INT64_C( 2677530438617359042), UINT64_MAX }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint64_t r = simde_vceqd_s64(test_vec[i].a, test_vec[i].b); simde_assert_equal_u64(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int64_t a = simde_test_codegen_random_i64(); int64_t b = (i & 1) ? a : simde_test_codegen_random_i64(); uint64_t r = simde_vceqd_s64(a, b); simde_test_codegen_write_i64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_u64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vceqd_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a; uint64_t b; uint64_t r; } test_vec[] = { { UINT64_C( 5604696383346243987), UINT64_C( 2792187346781598727), UINT64_C( 0) }, { UINT64_C(17677270875201804388), UINT64_C(17677270875201804388), UINT64_MAX }, { UINT64_C( 8831712695312082859), UINT64_C(13653696593774630020), UINT64_C( 0) }, { UINT64_C(17825427177876012193), UINT64_C(17825427177876012193), UINT64_MAX }, { UINT64_C( 1721557500519588795), UINT64_C( 6267904372928235163), UINT64_C( 0) }, { UINT64_C( 3411758805221171686), UINT64_C( 3411758805221171686), UINT64_MAX }, { UINT64_C(11261191358522146236), UINT64_C(17829590792684579308), UINT64_C( 0) }, { UINT64_C(16779414965382148533), UINT64_C(16779414965382148533), UINT64_MAX }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint64_t r = simde_vceqd_u64(test_vec[i].a, test_vec[i].b); simde_assert_equal_u64(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint64_t a = simde_test_codegen_random_u64(); uint64_t b = (i & 1) ? a : simde_test_codegen_random_u64(); uint64_t r = simde_vceqd_u64(a, b); simde_test_codegen_write_u64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_u64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vceq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; simde_float32 b[2]; uint32_t r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( 83.80), SIMDE_FLOAT32_C( 475.91) }, { SIMDE_FLOAT32_C( 83.80), SIMDE_FLOAT32_C( 475.91) }, { UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 315.96), SIMDE_FLOAT32_C( 49.13) }, { SIMDE_FLOAT32_C( 315.96), SIMDE_FLOAT32_C( 49.13) }, { UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 275.23), SIMDE_FLOAT32_C( -410.87) }, { SIMDE_FLOAT32_C( 275.23), SIMDE_FLOAT32_C( 115.66) }, { UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 336.04), SIMDE_FLOAT32_C( -887.79) }, { SIMDE_FLOAT32_C( 336.04), SIMDE_FLOAT32_C( 388.16) }, { UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 612.02), SIMDE_FLOAT32_C( 443.88) }, { SIMDE_FLOAT32_C( 612.02), SIMDE_FLOAT32_C( 786.43) }, { UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -252.76), SIMDE_FLOAT32_C( 831.05) }, { SIMDE_FLOAT32_C( 465.97), SIMDE_FLOAT32_C( 831.05) }, { UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( -686.12), SIMDE_FLOAT32_C( 807.98) }, { SIMDE_FLOAT32_C( -686.12), SIMDE_FLOAT32_C( 169.08) }, { UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -388.06), SIMDE_FLOAT32_C( 887.17) }, { SIMDE_FLOAT32_C( -568.13), SIMDE_FLOAT32_C( 887.17) }, { UINT32_C( 0), UINT32_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_uint32x2_t r = simde_vceq_f32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32 a_[2], b_[2]; simde_test_codegen_random_vf32(sizeof(a_) / sizeof(a_[0]), a_, -1000.0, 1000.0); simde_test_codegen_random_vf32(sizeof(b_) / sizeof(b_[0]), b_, -1000.0, 1000.0); for (size_t j = 0 ; j < (sizeof(a_) / sizeof(a_[0])) ; j++) { if (simde_test_codegen_random_i8() & 1) { a_[j] = b_[j]; } } simde_uint32x2_t r = simde_vceq_f32(simde_vld1_f32(a_), simde_vld1_f32(b_)); simde_test_codegen_write_vf32(2, sizeof(a_) / sizeof(a_[0]), a_, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_vf32(2, sizeof(b_) / sizeof(b_[0]), b_, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vceq_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[1]; simde_float64 b[1]; uint64_t r[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( 885.81) }, { SIMDE_FLOAT64_C( 885.81) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( 40.20) }, { SIMDE_FLOAT64_C( 40.20) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( -943.53) }, { SIMDE_FLOAT64_C( -943.53) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( 766.97) }, { SIMDE_FLOAT64_C( 766.97) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( 492.60) }, { SIMDE_FLOAT64_C( -737.06) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 465.88) }, { SIMDE_FLOAT64_C( 465.88) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( -665.29) }, { SIMDE_FLOAT64_C( -902.43) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -382.78) }, { SIMDE_FLOAT64_C( -377.85) }, { UINT64_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_t b = simde_vld1_f64(test_vec[i].b); simde_uint64x1_t r = simde_vceq_f64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64 a_[1], b_[1]; simde_test_codegen_random_vf64(sizeof(a_) / sizeof(a_[0]), a_, -1000.0, 1000.0); simde_test_codegen_random_vf64(sizeof(b_) / sizeof(b_[0]), b_, -1000.0, 1000.0); for (size_t j = 0 ; j < (sizeof(a_) / sizeof(a_[0])) ; j++) { if (simde_test_codegen_random_i8() & 1) { a_[j] = b_[j]; } } simde_uint64x1_t r = simde_vceq_f64(simde_vld1_f64(a_), simde_vld1_f64(b_)); simde_test_codegen_write_vf64(2, sizeof(a_) / sizeof(a_[0]), a_, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_vf64(2, sizeof(b_) / sizeof(b_[0]), b_, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vceq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t b[8]; uint8_t r[8]; } test_vec[] = { { { -INT8_C( 6), -INT8_C( 44), INT8_C( 28), INT8_C( 95), -INT8_C( 61), INT8_C( 16), INT8_C( 5), -INT8_C( 50) }, { -INT8_C( 6), -INT8_C( 88), INT8_C( 27), INT8_C( 22), -INT8_C( 61), INT8_C( 34), -INT8_C( 63), -INT8_C( 8) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { INT8_C( 114), -INT8_C( 100), -INT8_C( 127), INT8_C( 77), -INT8_C( 6), -INT8_C( 19), -INT8_C( 116), -INT8_C( 77) }, { -INT8_C( 63), -INT8_C( 88), -INT8_C( 127), INT8_C( 19), -INT8_C( 71), -INT8_C( 122), -INT8_C( 31), -INT8_C( 77) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { -INT8_C( 104), -INT8_C( 76), -INT8_C( 104), -INT8_C( 83), -INT8_C( 97), -INT8_C( 5), INT8_C( 119), INT8_C( 96) }, { -INT8_C( 104), -INT8_C( 78), -INT8_C( 104), -INT8_C( 110), -INT8_C( 97), INT8_C( 37), -INT8_C( 77), INT8_C( 96) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { INT8_C( 122), INT8_C( 3), -INT8_C( 37), INT8_C( 110), -INT8_C( 46), -INT8_C( 119), INT8_C( 113), INT8_C( 100) }, { INT8_C( 122), -INT8_C( 42), INT8_C( 18), INT8_C( 82), -INT8_C( 46), -INT8_C( 119), -INT8_C( 99), INT8_C( 106) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { -INT8_C( 20), INT8_C( 100), -INT8_C( 81), INT8_C( 122), INT8_C( 1), INT8_C( 48), -INT8_C( 33), INT8_C( 81) }, { -INT8_C( 20), INT8_C( 100), -INT8_C( 65), INT8_C( 122), INT8_C( 41), INT8_C( 48), -INT8_C( 33), -INT8_C( 93) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { -INT8_C( 55), -INT8_C( 4), -INT8_C( 112), INT8_C( 122), -INT8_C( 27), -INT8_C( 6), -INT8_C( 53), -INT8_C( 46) }, { INT8_C( 120), -INT8_C( 4), INT8_C( 113), INT8_C( 122), -INT8_C( 27), -INT8_C( 6), -INT8_C( 53), -INT8_C( 47) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { INT8_C( 29), INT8_C( 33), -INT8_C( 101), -INT8_C( 106), -INT8_C( 76), -INT8_C( 34), INT8_C( 76), INT8_C( 126) }, { INT8_C( 29), -INT8_C( 35), -INT8_C( 94), INT8_C( 12), -INT8_C( 88), -INT8_C( 17), -INT8_C( 34), INT8_C( 32) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { INT8_C( 123), -INT8_C( 18), INT8_C( 47), INT8_C( 48), INT8_C( 25), INT8_C( 24), -INT8_C( 82), INT8_C( 6) }, { INT8_C( 123), INT8_C( 10), -INT8_C( 53), INT8_C( 48), -INT8_C( 23), INT8_C( 24), -INT8_C( 82), INT8_C( 6) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_uint8x8_t r = simde_vceq_s8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int8_t a_[8], b_[8]; simde_test_codegen_random_memory(sizeof(a_), HEDLEY_REINTERPRET_CAST(uint8_t*, a_)); simde_test_codegen_random_memory(sizeof(b_), HEDLEY_REINTERPRET_CAST(uint8_t*, b_)); for (size_t j = 0 ; j < (sizeof(a_) / sizeof(a_[0])) ; j++) { if (simde_test_codegen_random_i8() & 1) { a_[j] = b_[j]; } } simde_uint8x8_t r = simde_vceq_s8(simde_vld1_s8(a_), simde_vld1_s8(b_)); simde_test_codegen_write_vi8(2, sizeof(a_) / sizeof(a_[0]), a_, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_vi8(2, sizeof(b_) / sizeof(b_[0]), b_, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vceq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t b[4]; uint16_t r[4]; } test_vec[] = { { { -INT16_C( 10359), INT16_C( 9925), INT16_C( 3010), INT16_C( 26232) }, { INT16_C( 851), INT16_C( 9925), -INT16_C( 16151), -INT16_C( 25263) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { INT16_C( 24614), -INT16_C( 1678), INT16_C( 3577), -INT16_C( 17548) }, { INT16_C( 8550), -INT16_C( 4287), INT16_C( 3577), -INT16_C( 17548) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { -INT16_C( 5626), INT16_C( 11378), -INT16_C( 6838), INT16_C( 17189) }, { -INT16_C( 5626), INT16_C( 11378), -INT16_C( 6838), INT16_C( 17189) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { -INT16_C( 28630), INT16_C( 23584), -INT16_C( 32611), -INT16_C( 18978) }, { -INT16_C( 147), INT16_C( 23584), -INT16_C( 19739), -INT16_C( 29899) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { INT16_C( 29173), INT16_C( 8094), -INT16_C( 21759), -INT16_C( 32478) }, { INT16_C( 29173), INT16_C( 8094), -INT16_C( 21759), -INT16_C( 25021) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { INT16_C( 1238), INT16_C( 27477), INT16_C( 7144), -INT16_C( 17774) }, { INT16_C( 1238), INT16_C( 27477), INT16_C( 7144), -INT16_C( 19893) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { -INT16_C( 11809), -INT16_C( 8164), INT16_C( 13638), -INT16_C( 22401) }, { -INT16_C( 11648), -INT16_C( 24256), INT16_C( 13638), INT16_C( 27816) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { INT16_C( 31806), INT16_C( 9889), -INT16_C( 18467), INT16_C( 22809) }, { INT16_C( 2906), INT16_C( 14747), -INT16_C( 18467), INT16_C( 22809) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_uint16x4_t r = simde_vceq_s16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int16_t a_[4], b_[4]; simde_test_codegen_random_memory(sizeof(a_), HEDLEY_REINTERPRET_CAST(uint8_t*, a_)); simde_test_codegen_random_memory(sizeof(b_), HEDLEY_REINTERPRET_CAST(uint8_t*, b_)); for (size_t j = 0 ; j < (sizeof(a_) / sizeof(a_[0])) ; j++) { if (simde_test_codegen_random_i8() & 1) { a_[j] = b_[j]; } } simde_uint16x4_t r = simde_vceq_s16(simde_vld1_s16(a_), simde_vld1_s16(b_)); simde_test_codegen_write_vi16(2, sizeof(a_) / sizeof(a_[0]), a_, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_vi16(2, sizeof(b_) / sizeof(b_[0]), b_, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vceq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t b[2]; uint32_t r[2]; } test_vec[] = { { { INT32_C( 1874778135), INT32_C( 443140088) }, { INT32_C( 126021491), INT32_C( 207635942) }, { UINT32_C( 0), UINT32_C( 0) } }, { { INT32_C( 854773045), INT32_C( 2091071115) }, { INT32_C( 53628719), -INT32_C( 1394914834) }, { UINT32_C( 0), UINT32_C( 0) } }, { { -INT32_C( 1710993738), -INT32_C( 135368431) }, { -INT32_C( 1710993738), INT32_C( 1432828258) }, { UINT32_MAX, UINT32_C( 0) } }, { { -INT32_C( 1167126645), INT32_C( 1287561566) }, { -INT32_C( 1560700438), INT32_C( 1556311405) }, { UINT32_C( 0), UINT32_C( 0) } }, { { INT32_C( 1967686665), -INT32_C( 216149872) }, { INT32_C( 1967686665), -INT32_C( 427502729) }, { UINT32_MAX, UINT32_C( 0) } }, { { INT32_C( 1898924935), INT32_C( 1999338411) }, { -INT32_C( 374417771), INT32_C( 1999338411) }, { UINT32_C( 0), UINT32_MAX } }, { { INT32_C( 1476217289), INT32_C( 1537377324) }, { INT32_C( 1476217289), INT32_C( 1537377324) }, { UINT32_MAX, UINT32_MAX } }, { { INT32_C( 416942211), -INT32_C( 1996388130) }, { INT32_C( 33565700), -INT32_C( 1776400409) }, { UINT32_C( 0), UINT32_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_uint32x2_t r = simde_vceq_s32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int32_t a_[2], b_[2]; simde_test_codegen_random_memory(sizeof(a_), HEDLEY_REINTERPRET_CAST(uint8_t*, a_)); simde_test_codegen_random_memory(sizeof(b_), HEDLEY_REINTERPRET_CAST(uint8_t*, b_)); for (size_t j = 0 ; j < (sizeof(a_) / sizeof(a_[0])) ; j++) { if (simde_test_codegen_random_i8() & 1) { a_[j] = b_[j]; } } simde_uint32x2_t r = simde_vceq_s32(simde_vld1_s32(a_), simde_vld1_s32(b_)); simde_test_codegen_write_vi32(2, sizeof(a_) / sizeof(a_[0]), a_, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_vi32(2, sizeof(b_) / sizeof(b_[0]), b_, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vceq_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[1]; int64_t b[1]; uint64_t r[1]; } test_vec[] = { { { INT64_C( 8535823101435549350) }, { -INT64_C( 68493279294832619) }, { UINT64_C( 0) } }, { { -INT64_C( 6450899593806032554) }, { -INT64_C( 8184543586398563143) }, { UINT64_C( 0) } }, { { INT64_C( 644778802918027581) }, { INT64_C( 9064137101271018600) }, { UINT64_C( 0) } }, { { INT64_C( 2793435887488105527) }, { -INT64_C( 4849783837811557109) }, { UINT64_C( 0) } }, { { INT64_C( 6774498698231718756) }, { -INT64_C( 6318717217871057330) }, { UINT64_C( 0) } }, { { INT64_C( 3230373444996287677) }, { INT64_C( 3230373444996287677) }, { UINT64_MAX } }, { { INT64_C( 8905651318291695689) }, { INT64_C( 8905651318291695689) }, { UINT64_MAX } }, { { INT64_C( 3813610299838181612) }, { INT64_C( 3813610299838181612) }, { UINT64_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_uint64x1_t r = simde_vceq_s64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int64_t a_[1], b_[1]; simde_test_codegen_random_memory(sizeof(a_), HEDLEY_REINTERPRET_CAST(uint8_t*, a_)); simde_test_codegen_random_memory(sizeof(b_), HEDLEY_REINTERPRET_CAST(uint8_t*, b_)); for (size_t j = 0 ; j < (sizeof(a_) / sizeof(a_[0])) ; j++) { if (simde_test_codegen_random_i8() & 1) { a_[j] = b_[j]; } } simde_uint64x1_t r = simde_vceq_s64(simde_vld1_s64(a_), simde_vld1_s64(b_)); simde_test_codegen_write_vi64(2, sizeof(a_) / sizeof(a_[0]), a_, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_vi64(2, sizeof(b_) / sizeof(b_[0]), b_, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vceq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint8_t b[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C(228), UINT8_C(114), UINT8_C( 18), UINT8_C(209), UINT8_C(222), UINT8_C(238), UINT8_C(135), UINT8_C( 83) }, { UINT8_C(120), UINT8_C( 62), UINT8_C( 18), UINT8_C( 17), UINT8_C(222), UINT8_C( 31), UINT8_C(135), UINT8_C( 19) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C(102), UINT8_C( 52), UINT8_C(143), UINT8_C( 47), UINT8_C(218), UINT8_C(224), UINT8_C(199), UINT8_C(190) }, { UINT8_C(178), UINT8_C( 52), UINT8_C(143), UINT8_C(116), UINT8_C( 34), UINT8_C(224), UINT8_C(199), UINT8_C(155) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C( 42), UINT8_C( 74), UINT8_C(211), UINT8_C( 7), UINT8_C(134), UINT8_C(117), UINT8_C(195), UINT8_C(236) }, { UINT8_C( 42), UINT8_C( 20), UINT8_C( 28), UINT8_C( 4), UINT8_C( 83), UINT8_C(117), UINT8_C(195), UINT8_C( 5) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C( 27), UINT8_C(189), UINT8_C(145), UINT8_C( 19), UINT8_C( 70), UINT8_C( 91), UINT8_C( 25), UINT8_C(138) }, { UINT8_C(166), UINT8_C(189), UINT8_C(145), UINT8_C( 44), UINT8_C( 95), UINT8_C( 22), UINT8_C( 25), UINT8_C(137) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C(163), UINT8_C(253), UINT8_C( 32), UINT8_C(224), UINT8_C(206), UINT8_C( 87), UINT8_C( 38), UINT8_C(116) }, { UINT8_C(154), UINT8_C(114), UINT8_C(109), UINT8_C(224), UINT8_C(206), UINT8_C( 87), UINT8_C(107), UINT8_C(116) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, { { UINT8_C( 60), UINT8_C( 13), UINT8_C(187), UINT8_C(123), UINT8_C(218), UINT8_C(121), UINT8_C(213), UINT8_C( 46) }, { UINT8_C( 60), UINT8_C( 13), UINT8_C( 83), UINT8_C(123), UINT8_C(148), UINT8_C(121), UINT8_C(213), UINT8_C( 46) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C(169), UINT8_C(206), UINT8_C( 15), UINT8_C(131), UINT8_C(193), UINT8_C( 31), UINT8_C( 1), UINT8_C(118) }, { UINT8_C(169), UINT8_C(130), UINT8_C( 15), UINT8_C(131), UINT8_C(193), UINT8_C(252), UINT8_C( 1), UINT8_C(253) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C(145), UINT8_C(217), UINT8_C( 82), UINT8_C(245), UINT8_C( 83), UINT8_C(159), UINT8_C(107), UINT8_C(197) }, { UINT8_C(109), UINT8_C(253), UINT8_C( 82), UINT8_C(245), UINT8_C( 28), UINT8_C( 24), UINT8_C(107), UINT8_C(197) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vceq_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint8_t a_[8], b_[8]; simde_test_codegen_random_memory(sizeof(a_), HEDLEY_REINTERPRET_CAST(uint8_t*, a_)); simde_test_codegen_random_memory(sizeof(b_), HEDLEY_REINTERPRET_CAST(uint8_t*, b_)); for (size_t j = 0 ; j < (sizeof(a_) / sizeof(a_[0])) ; j++) { if (simde_test_codegen_random_i8() & 1) { a_[j] = b_[j]; } } simde_uint8x8_t r = simde_vceq_u8(simde_vld1_u8(a_), simde_vld1_u8(b_)); simde_test_codegen_write_vu8(2, sizeof(a_) / sizeof(a_[0]), a_, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_vu8(2, sizeof(b_) / sizeof(b_[0]), b_, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vceq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(29979), UINT16_C(15451), UINT16_C(55565), UINT16_C( 2788) }, { UINT16_C(29979), UINT16_C(15451), UINT16_C(55565), UINT16_C(52687) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { UINT16_C(18039), UINT16_C( 7331), UINT16_C(33155), UINT16_C( 8027) }, { UINT16_C(60406), UINT16_C(58586), UINT16_C(28391), UINT16_C(26862) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C( 9069), UINT16_C(21606), UINT16_C(21866), UINT16_C(60672) }, { UINT16_C( 9069), UINT16_C(58546), UINT16_C(21866), UINT16_C(60672) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { UINT16_C( 2674), UINT16_C(12209), UINT16_C(62680), UINT16_C(25495) }, { UINT16_C( 2674), UINT16_C(47757), UINT16_C(62680), UINT16_C(32526) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { UINT16_C(10068), UINT16_C( 7713), UINT16_C(31613), UINT16_C(62541) }, { UINT16_C(47319), UINT16_C( 7713), UINT16_C(53919), UINT16_C(62541) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { UINT16_C(48094), UINT16_C(12953), UINT16_C( 5346), UINT16_C( 838) }, { UINT16_C(48094), UINT16_C(12953), UINT16_C( 5346), UINT16_C(24560) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { UINT16_C(16567), UINT16_C(42750), UINT16_C(53927), UINT16_C(65389) }, { UINT16_C(16567), UINT16_C(42750), UINT16_C(40229), UINT16_C(65389) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { UINT16_C(25574), UINT16_C(20571), UINT16_C(19375), UINT16_C(16048) }, { UINT16_C(25574), UINT16_C(15012), UINT16_C(10552), UINT16_C(57134) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vceq_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint16_t a_[4], b_[4]; simde_test_codegen_random_memory(sizeof(a_), HEDLEY_REINTERPRET_CAST(uint8_t*, a_)); simde_test_codegen_random_memory(sizeof(b_), HEDLEY_REINTERPRET_CAST(uint8_t*, b_)); for (size_t j = 0 ; j < (sizeof(a_) / sizeof(a_[0])) ; j++) { if (simde_test_codegen_random_i8() & 1) { a_[j] = b_[j]; } } simde_uint16x4_t r = simde_vceq_u16(simde_vld1_u16(a_), simde_vld1_u16(b_)); simde_test_codegen_write_vu16(2, sizeof(a_) / sizeof(a_[0]), a_, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_vu16(2, sizeof(b_) / sizeof(b_[0]), b_, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vceq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C(4068292116), UINT32_C(1657391483) }, { UINT32_C(2579237544), UINT32_C(3419376549) }, { UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(3471013382), UINT32_C(1572021813) }, { UINT32_C(3471013382), UINT32_C( 755365807) }, { UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C(3576563242), UINT32_C( 591969003) }, { UINT32_C(3576563242), UINT32_C( 591969003) }, { UINT32_MAX, UINT32_MAX } }, { { UINT32_C(1339722057), UINT32_C(2253574819) }, { UINT32_C(2682004405), UINT32_C(2253574819) }, { UINT32_C( 0), UINT32_MAX } }, { { UINT32_C( 817654875), UINT32_C(3300460009) }, { UINT32_C( 817654875), UINT32_C(3300460009) }, { UINT32_MAX, UINT32_MAX } }, { { UINT32_C( 929592194), UINT32_C(2855199800) }, { UINT32_C(2260674582), UINT32_C(2855199800) }, { UINT32_C( 0), UINT32_MAX } }, { { UINT32_C(2792773707), UINT32_C(2933273541) }, { UINT32_C(2373095480), UINT32_C(2969629769) }, { UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C( 198181621), UINT32_C( 318789789) }, { UINT32_C(2819473524), UINT32_C( 318789789) }, { UINT32_C( 0), UINT32_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vceq_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint32_t a_[2], b_[2]; simde_test_codegen_random_memory(sizeof(a_), HEDLEY_REINTERPRET_CAST(uint8_t*, a_)); simde_test_codegen_random_memory(sizeof(b_), HEDLEY_REINTERPRET_CAST(uint8_t*, b_)); for (size_t j = 0 ; j < (sizeof(a_) / sizeof(a_[0])) ; j++) { if (simde_test_codegen_random_i8() & 1) { a_[j] = b_[j]; } } simde_uint32x2_t r = simde_vceq_u32(simde_vld1_u32(a_), simde_vld1_u32(b_)); simde_test_codegen_write_vu32(2, sizeof(a_) / sizeof(a_[0]), a_, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_vu32(2, sizeof(b_) / sizeof(b_[0]), b_, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vceq_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[1]; uint64_t b[1]; uint64_t r[1]; } test_vec[] = { { { UINT64_C( 462129490676742370) }, { UINT64_C( 3396253101468774017) }, { UINT64_C( 0) } }, { { UINT64_C(15200919614915212372) }, { UINT64_C(15200919614915212372) }, { UINT64_MAX } }, { { UINT64_C(16437398615333454713) }, { UINT64_C(16437398615333454713) }, { UINT64_MAX } }, { { UINT64_C( 1324451836855196340) }, { UINT64_C( 1324451836855196340) }, { UINT64_MAX } }, { { UINT64_C( 4023702131190414723) }, { UINT64_C( 4659676053606091433) }, { UINT64_C( 0) } }, { { UINT64_C(10209850915128622795) }, { UINT64_C(10209850915128622795) }, { UINT64_MAX } }, { { UINT64_C( 2190433362220569284) }, { UINT64_C(11845426152445055938) }, { UINT64_C( 0) } }, { { UINT64_C( 6586157150835866609) }, { UINT64_C( 6586157150835866609) }, { UINT64_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_t b = simde_vld1_u64(test_vec[i].b); simde_uint64x1_t r = simde_vceq_u64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint64_t a_[1], b_[1]; simde_test_codegen_random_memory(sizeof(a_), HEDLEY_REINTERPRET_CAST(uint8_t*, a_)); simde_test_codegen_random_memory(sizeof(b_), HEDLEY_REINTERPRET_CAST(uint8_t*, b_)); for (size_t j = 0 ; j < (sizeof(a_) / sizeof(a_[0])) ; j++) { if (simde_test_codegen_random_i8() & 1) { a_[j] = b_[j]; } } simde_uint64x1_t r = simde_vceq_u64(simde_vld1_u64(a_), simde_vld1_u64(b_)); simde_test_codegen_write_vu64(2, sizeof(a_) / sizeof(a_[0]), a_, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_vu64(2, sizeof(b_) / sizeof(b_[0]), b_, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vceqq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; simde_float32 b[4]; uint32_t r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 151.95), SIMDE_FLOAT32_C( 801.55), SIMDE_FLOAT32_C( 470.19), SIMDE_FLOAT32_C( 494.12) }, { SIMDE_FLOAT32_C( -614.47), SIMDE_FLOAT32_C( 801.55), SIMDE_FLOAT32_C( 128.44), SIMDE_FLOAT32_C( 494.12) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( -211.99), SIMDE_FLOAT32_C( 401.96), SIMDE_FLOAT32_C( -113.21), SIMDE_FLOAT32_C( 64.60) }, { SIMDE_FLOAT32_C( -211.99), SIMDE_FLOAT32_C( 68.77), SIMDE_FLOAT32_C( -113.21), SIMDE_FLOAT32_C( 64.60) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 843.56), SIMDE_FLOAT32_C( 416.97), SIMDE_FLOAT32_C( 36.69), SIMDE_FLOAT32_C( -30.43) }, { SIMDE_FLOAT32_C( 54.01), SIMDE_FLOAT32_C( 416.97), SIMDE_FLOAT32_C( 36.69), SIMDE_FLOAT32_C( -794.04) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 264.61), SIMDE_FLOAT32_C( -269.86), SIMDE_FLOAT32_C( -951.46), SIMDE_FLOAT32_C( -765.17) }, { SIMDE_FLOAT32_C( 264.61), SIMDE_FLOAT32_C( 341.73), SIMDE_FLOAT32_C( -453.78), SIMDE_FLOAT32_C( -765.17) }, { UINT32_MAX, UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( -863.16), SIMDE_FLOAT32_C( 964.02), SIMDE_FLOAT32_C( -403.70), SIMDE_FLOAT32_C( 980.39) }, { SIMDE_FLOAT32_C( -863.16), SIMDE_FLOAT32_C( -832.51), SIMDE_FLOAT32_C( -252.53), SIMDE_FLOAT32_C( 980.39) }, { UINT32_MAX, UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( -506.48), SIMDE_FLOAT32_C( 986.65), SIMDE_FLOAT32_C( 541.36), SIMDE_FLOAT32_C( -198.05) }, { SIMDE_FLOAT32_C( -506.48), SIMDE_FLOAT32_C( 852.79), SIMDE_FLOAT32_C( 541.36), SIMDE_FLOAT32_C( -198.05) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( -68.37), SIMDE_FLOAT32_C( -248.88), SIMDE_FLOAT32_C( -917.64), SIMDE_FLOAT32_C( -455.58) }, { SIMDE_FLOAT32_C( -886.61), SIMDE_FLOAT32_C( 159.60), SIMDE_FLOAT32_C( 207.02), SIMDE_FLOAT32_C( -455.58) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( 970.82), SIMDE_FLOAT32_C( -809.19), SIMDE_FLOAT32_C( 240.85), SIMDE_FLOAT32_C( -38.07) }, { SIMDE_FLOAT32_C( 332.11), SIMDE_FLOAT32_C( -809.19), SIMDE_FLOAT32_C( 7.05), SIMDE_FLOAT32_C( -849.83) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_uint32x4_t r = simde_vceqq_f32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32 a_[4], b_[4]; simde_test_codegen_random_vf32(sizeof(a_) / sizeof(a_[0]), a_, -1000.0, 1000.0); simde_test_codegen_random_vf32(sizeof(b_) / sizeof(b_[0]), b_, -1000.0, 1000.0); for (size_t j = 0 ; j < (sizeof(a_) / sizeof(a_[0])) ; j++) { if (simde_test_codegen_random_i8() & 1) { a_[j] = b_[j]; } } simde_uint32x4_t r = simde_vceqq_f32(simde_vld1q_f32(a_), simde_vld1q_f32(b_)); simde_test_codegen_write_vf32(2, sizeof(a_) / sizeof(a_[0]), a_, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_vf32(2, sizeof(b_) / sizeof(b_[0]), b_, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vceqq_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[2]; simde_float64 b[2]; uint64_t r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 211.07), SIMDE_FLOAT64_C( -652.68) }, { SIMDE_FLOAT64_C( -764.13), SIMDE_FLOAT64_C( -652.68) }, { UINT64_C( 0), UINT64_MAX } }, { { SIMDE_FLOAT64_C( -650.75), SIMDE_FLOAT64_C( -819.92) }, { SIMDE_FLOAT64_C( -825.40), SIMDE_FLOAT64_C( -295.90) }, { UINT64_C( 0), UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 934.40), SIMDE_FLOAT64_C( -562.53) }, { SIMDE_FLOAT64_C( 934.40), SIMDE_FLOAT64_C( 790.23) }, { UINT64_MAX, UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -625.41), SIMDE_FLOAT64_C( 22.91) }, { SIMDE_FLOAT64_C( -625.41), SIMDE_FLOAT64_C( 22.91) }, { UINT64_MAX, UINT64_MAX } }, { { SIMDE_FLOAT64_C( 523.75), SIMDE_FLOAT64_C( 311.54) }, { SIMDE_FLOAT64_C( 286.95), SIMDE_FLOAT64_C( 311.54) }, { UINT64_C( 0), UINT64_MAX } }, { { SIMDE_FLOAT64_C( 41.17), SIMDE_FLOAT64_C( 277.04) }, { SIMDE_FLOAT64_C( 630.41), SIMDE_FLOAT64_C( 277.04) }, { UINT64_C( 0), UINT64_MAX } }, { { SIMDE_FLOAT64_C( -701.97), SIMDE_FLOAT64_C( -163.42) }, { SIMDE_FLOAT64_C( -701.97), SIMDE_FLOAT64_C( 980.64) }, { UINT64_MAX, UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 144.09), SIMDE_FLOAT64_C( -754.88) }, { SIMDE_FLOAT64_C( -879.09), SIMDE_FLOAT64_C( 78.48) }, { UINT64_C( 0), UINT64_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_uint64x2_t r = simde_vceqq_f64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64 a_[2], b_[2]; simde_test_codegen_random_vf64(sizeof(a_) / sizeof(a_[0]), a_, -1000.0, 1000.0); simde_test_codegen_random_vf64(sizeof(b_) / sizeof(b_[0]), b_, -1000.0, 1000.0); for (size_t j = 0 ; j < (sizeof(a_) / sizeof(a_[0])) ; j++) { if (simde_test_codegen_random_i8() & 1) { a_[j] = b_[j]; } } simde_uint64x2_t r = simde_vceqq_f64(simde_vld1q_f64(a_), simde_vld1q_f64(b_)); simde_test_codegen_write_vf64(2, sizeof(a_) / sizeof(a_[0]), a_, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_vf64(2, sizeof(b_) / sizeof(b_[0]), b_, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vceqq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t b[16]; uint8_t r[16]; } test_vec[] = { { { -INT8_C( 9), -INT8_C( 9), -INT8_C( 98), INT8_C( 88), INT8_C( 87), -INT8_C( 10), -INT8_C( 23), -INT8_C( 78), -INT8_C( 51), -INT8_C( 101), INT8_C( 84), -INT8_C( 61), -INT8_C( 126), INT8_C( 45), INT8_C( 125), INT8_C( 85) }, { INT8_C( 33), -INT8_C( 9), INT8_C( 48), -INT8_C( 50), -INT8_C( 123), -INT8_C( 10), INT8_C( 109), -INT8_C( 12), INT8_C( 61), INT8_C( 6), INT8_C( 84), -INT8_C( 65), INT8_C( 94), INT8_C( 46), INT8_C( 97), INT8_C( 85) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { -INT8_C( 32), -INT8_C( 24), INT8_C( 19), -INT8_C( 51), INT8_MAX, INT8_MIN, -INT8_C( 121), -INT8_C( 63), INT8_C( 56), INT8_C( 4), INT8_MIN, INT8_C( 92), -INT8_C( 2), -INT8_C( 31), -INT8_C( 95), -INT8_C( 17) }, { -INT8_C( 32), -INT8_C( 24), -INT8_C( 110), -INT8_C( 51), INT8_MAX, INT8_C( 66), -INT8_C( 121), -INT8_C( 79), INT8_C( 56), INT8_C( 4), INT8_C( 101), INT8_C( 92), -INT8_C( 2), -INT8_C( 61), -INT8_C( 95), -INT8_C( 17) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { -INT8_C( 62), -INT8_C( 93), INT8_C( 65), INT8_C( 6), -INT8_C( 27), -INT8_C( 40), -INT8_C( 14), -INT8_C( 27), -INT8_C( 17), INT8_C( 87), INT8_C( 98), -INT8_C( 24), INT8_C( 26), INT8_C( 27), -INT8_C( 62), INT8_C( 107) }, { -INT8_C( 48), -INT8_C( 123), INT8_C( 65), INT8_C( 6), INT8_C( 85), -INT8_C( 40), -INT8_C( 62), -INT8_C( 27), -INT8_C( 17), INT8_C( 98), INT8_C( 98), -INT8_C( 24), INT8_C( 61), INT8_C( 115), -INT8_C( 62), -INT8_C( 1) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } }, { { INT8_C( 44), INT8_C( 38), -INT8_C( 124), -INT8_C( 62), INT8_C( 42), INT8_C( 70), -INT8_C( 118), -INT8_C( 27), INT8_C( 102), -INT8_C( 127), -INT8_C( 118), -INT8_C( 26), -INT8_C( 105), -INT8_C( 60), -INT8_C( 27), INT8_C( 50) }, { INT8_C( 44), INT8_C( 38), -INT8_C( 69), -INT8_C( 62), INT8_C( 90), -INT8_C( 44), -INT8_C( 118), -INT8_C( 27), INT8_C( 102), -INT8_C( 127), -INT8_C( 118), INT8_C( 20), -INT8_C( 105), -INT8_C( 101), -INT8_C( 109), INT8_C( 50) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { -INT8_C( 7), -INT8_C( 36), -INT8_C( 26), INT8_C( 83), -INT8_C( 44), INT8_C( 112), -INT8_C( 76), -INT8_C( 112), -INT8_C( 15), -INT8_C( 62), -INT8_C( 91), -INT8_C( 120), INT8_C( 94), INT8_C( 56), -INT8_C( 70), INT8_C( 109) }, { INT8_C( 79), -INT8_C( 36), INT8_C( 99), -INT8_C( 83), -INT8_C( 44), -INT8_C( 108), -INT8_C( 76), INT8_C( 4), -INT8_C( 57), -INT8_C( 95), -INT8_C( 34), -INT8_C( 66), INT8_C( 116), INT8_C( 120), -INT8_C( 31), INT8_C( 109) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { INT8_C( 71), -INT8_C( 57), INT8_C( 99), -INT8_C( 64), INT8_C( 91), INT8_C( 23), -INT8_C( 23), INT8_C( 12), -INT8_C( 12), -INT8_C( 103), -INT8_C( 32), INT8_C( 90), INT8_C( 118), -INT8_C( 63), -INT8_C( 102), INT8_C( 75) }, { -INT8_C( 119), INT8_C( 90), INT8_C( 61), -INT8_C( 64), INT8_C( 82), -INT8_C( 58), -INT8_C( 23), INT8_C( 12), -INT8_C( 12), -INT8_C( 103), INT8_C( 37), INT8_C( 90), INT8_C( 4), -INT8_C( 119), INT8_C( 15), INT8_C( 75) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { INT8_C( 100), -INT8_C( 16), -INT8_C( 72), -INT8_C( 101), INT8_C( 65), -INT8_C( 95), -INT8_C( 62), -INT8_C( 7), INT8_C( 58), INT8_C( 78), -INT8_C( 13), INT8_C( 62), INT8_C( 88), INT8_C( 49), INT8_C( 75), -INT8_C( 64) }, { INT8_C( 17), -INT8_C( 16), INT8_C( 107), -INT8_C( 101), INT8_C( 118), INT8_C( 56), -INT8_C( 34), -INT8_C( 7), -INT8_C( 27), INT8_C( 78), -INT8_C( 13), INT8_C( 84), INT8_C( 88), INT8_C( 49), INT8_C( 75), -INT8_C( 68) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { -INT8_C( 105), -INT8_C( 72), -INT8_C( 61), INT8_C( 30), -INT8_C( 30), -INT8_C( 39), -INT8_C( 3), -INT8_C( 2), -INT8_C( 106), -INT8_C( 7), INT8_C( 42), INT8_MAX, INT8_C( 42), INT8_C( 118), INT8_C( 59), INT8_C( 88) }, { INT8_C( 121), -INT8_C( 83), -INT8_C( 61), INT8_C( 30), -INT8_C( 30), -INT8_C( 27), -INT8_C( 3), -INT8_C( 2), -INT8_C( 106), INT8_C( 26), -INT8_C( 118), -INT8_C( 26), -INT8_C( 63), -INT8_C( 41), INT8_C( 70), INT8_C( 88) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_uint8x16_t r = simde_vceqq_s8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int8_t a_[16], b_[16]; simde_test_codegen_random_memory(sizeof(a_), HEDLEY_REINTERPRET_CAST(uint8_t*, a_)); simde_test_codegen_random_memory(sizeof(b_), HEDLEY_REINTERPRET_CAST(uint8_t*, b_)); for (size_t j = 0 ; j < (sizeof(a_) / sizeof(a_[0])) ; j++) { if (simde_test_codegen_random_i8() & 1) { a_[j] = b_[j]; } } simde_uint8x16_t r = simde_vceqq_s8(simde_vld1q_s8(a_), simde_vld1q_s8(b_)); simde_test_codegen_write_vi8(2, sizeof(a_) / sizeof(a_[0]), a_, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_vi8(2, sizeof(b_) / sizeof(b_[0]), b_, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vceqq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t b[8]; uint16_t r[8]; } test_vec[] = { { { INT16_C( 30689), INT16_C( 17219), INT16_C( 24342), -INT16_C( 6278), -INT16_C( 13469), -INT16_C( 11550), -INT16_C( 32354), INT16_C( 32558) }, { INT16_C( 2591), INT16_C( 17219), INT16_C( 24342), INT16_C( 11098), -INT16_C( 31655), -INT16_C( 11550), -INT16_C( 32354), INT16_C( 32558) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { INT16_C( 13658), -INT16_C( 1829), INT16_C( 1817), INT16_C( 14349), INT16_C( 20497), INT16_C( 2181), INT16_C( 17067), INT16_C( 19767) }, { INT16_C( 13658), -INT16_C( 1829), INT16_C( 2742), -INT16_C( 20873), -INT16_C( 11205), INT16_C( 2181), INT16_C( 17067), INT16_C( 19767) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { INT16_C( 8203), -INT16_C( 17794), INT16_C( 9266), INT16_C( 20931), -INT16_C( 24826), -INT16_C( 17335), -INT16_C( 26633), -INT16_C( 7061) }, { -INT16_C( 3947), INT16_C( 16876), INT16_C( 9266), -INT16_C( 31090), INT16_C( 15459), -INT16_C( 22030), -INT16_C( 26633), INT16_C( 512) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { INT16_C( 6571), -INT16_C( 23607), -INT16_C( 13904), INT16_C( 26789), INT16_C( 25160), -INT16_C( 24352), INT16_C( 5859), INT16_C( 750) }, { INT16_C( 6571), -INT16_C( 23607), -INT16_C( 13904), INT16_C( 26789), INT16_C( 25160), -INT16_C( 26601), INT16_C( 5859), INT16_C( 750) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { -INT16_C( 593), -INT16_C( 28062), -INT16_C( 19368), INT16_C( 1263), -INT16_C( 17971), -INT16_C( 11555), INT16_C( 19586), -INT16_C( 5697) }, { -INT16_C( 593), -INT16_C( 28062), INT16_C( 20756), INT16_C( 29588), INT16_C( 24402), -INT16_C( 11555), -INT16_C( 6441), -INT16_C( 5697) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { -INT16_C( 11424), INT16_C( 11397), INT16_C( 13343), -INT16_C( 12627), -INT16_C( 12910), -INT16_C( 27533), -INT16_C( 2719), -INT16_C( 10908) }, { -INT16_C( 27052), INT16_C( 11397), INT16_C( 17788), INT16_C( 19221), -INT16_C( 12910), -INT16_C( 27533), -INT16_C( 24716), -INT16_C( 10908) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { -INT16_C( 16011), -INT16_C( 10660), -INT16_C( 8314), INT16_C( 2698), INT16_C( 6801), INT16_C( 30346), INT16_C( 19284), -INT16_C( 2675) }, { -INT16_C( 6632), -INT16_C( 29317), -INT16_C( 8314), -INT16_C( 1950), INT16_C( 6801), INT16_C( 30346), -INT16_C( 5249), -INT16_C( 2675) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { INT16_C( 13584), -INT16_C( 28925), INT16_C( 17902), INT16_C( 1952), INT16_C( 20602), -INT16_C( 19820), -INT16_C( 25947), INT16_C( 2597) }, { INT16_C( 13584), -INT16_C( 28925), -INT16_C( 28640), -INT16_C( 13180), INT16_C( 20602), INT16_C( 31278), -INT16_C( 25947), INT16_C( 2597) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_uint16x8_t r = simde_vceqq_s16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int16_t a_[8], b_[8]; simde_test_codegen_random_memory(sizeof(a_), HEDLEY_REINTERPRET_CAST(uint8_t*, a_)); simde_test_codegen_random_memory(sizeof(b_), HEDLEY_REINTERPRET_CAST(uint8_t*, b_)); for (size_t j = 0 ; j < (sizeof(a_) / sizeof(a_[0])) ; j++) { if (simde_test_codegen_random_i8() & 1) { a_[j] = b_[j]; } } simde_uint16x8_t r = simde_vceqq_s16(simde_vld1q_s16(a_), simde_vld1q_s16(b_)); simde_test_codegen_write_vi16(2, sizeof(a_) / sizeof(a_[0]), a_, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_vi16(2, sizeof(b_) / sizeof(b_[0]), b_, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vceqq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t b[4]; uint32_t r[4]; } test_vec[] = { { { INT32_C( 68585909), -INT32_C( 304121191), -INT32_C( 1443602637), -INT32_C( 449347537) }, { -INT32_C( 6934970), INT32_C( 1969593714), -INT32_C( 1443602637), -INT32_C( 449347537) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_MAX } }, { { INT32_C( 1418774728), -INT32_C( 350283357), INT32_C( 1677692505), INT32_C( 1864817917) }, { -INT32_C( 1377530759), INT32_C( 441899499), INT32_C( 1677692505), -INT32_C( 1543051044) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { -INT32_C( 1858135888), INT32_C( 1238200173), INT32_C( 149803864), INT32_C( 442295914) }, { INT32_C( 1769004816), INT32_C( 1238200173), INT32_C( 149803864), INT32_C( 442295914) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { -INT32_C( 2084019423), -INT32_C( 1403118783), INT32_C( 1324392016), INT32_C( 1513331513) }, { INT32_C( 964466913), -INT32_C( 700356244), INT32_C( 1324392016), INT32_C( 1513331513) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_MAX } }, { { INT32_C( 1366407681), -INT32_C( 1851760296), -INT32_C( 85142532), -INT32_C( 1686595537) }, { INT32_C( 1366407681), -INT32_C( 1851760296), -INT32_C( 85142532), -INT32_C( 432420401) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, { { INT32_C( 1880168978), INT32_C( 1531164812), -INT32_C( 948229947), -INT32_C( 1280339551) }, { INT32_C( 1238153293), INT32_C( 1531164812), -INT32_C( 2025685887), -INT32_C( 1280339551) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { -INT32_C( 1271016210), -INT32_C( 1683589639), -INT32_C( 1286712057), INT32_C( 84963862) }, { INT32_C( 523468189), -INT32_C( 1683589639), -INT32_C( 1286712057), INT32_C( 84963862) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { -INT32_C( 1679457388), INT32_C( 328874951), -INT32_C( 302752090), -INT32_C( 1338987093) }, { -INT32_C( 1679457388), -INT32_C( 280087335), -INT32_C( 302752090), -INT32_C( 1338987093) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_uint32x4_t r = simde_vceqq_s32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int32_t a_[4], b_[4]; simde_test_codegen_random_memory(sizeof(a_), HEDLEY_REINTERPRET_CAST(uint8_t*, a_)); simde_test_codegen_random_memory(sizeof(b_), HEDLEY_REINTERPRET_CAST(uint8_t*, b_)); for (size_t j = 0 ; j < (sizeof(a_) / sizeof(a_[0])) ; j++) { if (simde_test_codegen_random_i8() & 1) { a_[j] = b_[j]; } } simde_uint32x4_t r = simde_vceqq_s32(simde_vld1q_s32(a_), simde_vld1q_s32(b_)); simde_test_codegen_write_vi32(2, sizeof(a_) / sizeof(a_[0]), a_, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_vi32(2, sizeof(b_) / sizeof(b_[0]), b_, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vceqq_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; int64_t b[2]; uint64_t r[2]; } test_vec[] = { { { INT64_C( 122496343496134342), -INT64_C( 8871796291912087289) }, { -INT64_C( 1619632953150757040), -INT64_C( 1315802993512906350) }, { UINT64_C( 0), UINT64_C( 0) } }, { { INT64_C( 2083288838247884909), INT64_C( 652781822731857462) }, { INT64_C( 8036705605673239169), INT64_C( 652781822731857462) }, { UINT64_C( 0), UINT64_MAX } }, { { -INT64_C( 5840191344206578756), -INT64_C( 4478792894918717845) }, { -INT64_C( 5840191344206578756), -INT64_C( 4478792894918717845) }, { UINT64_MAX, UINT64_MAX } }, { { INT64_C( 2165879478655069652), -INT64_C( 8949288324055110036) }, { INT64_C( 240926292617560533), -INT64_C( 8949288324055110036) }, { UINT64_C( 0), UINT64_MAX } }, { { -INT64_C( 4084512031659993958), INT64_C( 681759254808292176) }, { INT64_C( 8087071077733255670), INT64_C( 681759254808292176) }, { UINT64_C( 0), UINT64_MAX } }, { { -INT64_C( 135823057901789263), -INT64_C( 4749785724592454897) }, { INT64_C( 6527389508598676334), -INT64_C( 4749785724592454897) }, { UINT64_C( 0), UINT64_MAX } }, { { INT64_C( 946894890669630655), INT64_C( 7671867306212125913) }, { INT64_C( 6188038121830254663), INT64_C( 7671867306212125913) }, { UINT64_C( 0), UINT64_MAX } }, { { INT64_C( 2558880696443476662), -INT64_C( 3803050708524976813) }, { INT64_C( 2558880696443476662), -INT64_C( 2390307941852566042) }, { UINT64_MAX, UINT64_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_uint64x2_t r = simde_vceqq_s64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int64_t a_[2], b_[2]; simde_test_codegen_random_memory(sizeof(a_), HEDLEY_REINTERPRET_CAST(uint8_t*, a_)); simde_test_codegen_random_memory(sizeof(b_), HEDLEY_REINTERPRET_CAST(uint8_t*, b_)); for (size_t j = 0 ; j < (sizeof(a_) / sizeof(a_[0])) ; j++) { if (simde_test_codegen_random_i8() & 1) { a_[j] = b_[j]; } } simde_uint64x2_t r = simde_vceqq_s64(simde_vld1q_s64(a_), simde_vld1q_s64(b_)); simde_test_codegen_write_vi64(2, sizeof(a_) / sizeof(a_[0]), a_, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_vi64(2, sizeof(b_) / sizeof(b_[0]), b_, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vceqq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(113), UINT8_C( 65), UINT8_C( 13), UINT8_C( 19), UINT8_C(188), UINT8_C( 30), UINT8_C(127), UINT8_C( 46), UINT8_C( 5), UINT8_C(161), UINT8_C(152), UINT8_C(201), UINT8_C(221), UINT8_C( 11), UINT8_C(234), UINT8_C(187) }, { UINT8_C(113), UINT8_C( 65), UINT8_C(119), UINT8_C(215), UINT8_C( 83), UINT8_C( 30), UINT8_C( 66), UINT8_C( 46), UINT8_C( 5), UINT8_C(160), UINT8_C( 36), UINT8_C( 28), UINT8_C(221), UINT8_C(166), UINT8_C(124), UINT8_C( 52) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_MAX, UINT8_C(125), UINT8_C(194), UINT8_C( 32), UINT8_C(198), UINT8_C( 9), UINT8_C( 43), UINT8_C( 47), UINT8_C(164), UINT8_MAX, UINT8_C(121), UINT8_C(240), UINT8_C( 75), UINT8_C( 56), UINT8_C(182), UINT8_C( 97) }, { UINT8_C(194), UINT8_C(254), UINT8_C( 51), UINT8_C( 32), UINT8_C(198), UINT8_C( 9), UINT8_C( 43), UINT8_C( 47), UINT8_C(119), UINT8_MAX, UINT8_C(121), UINT8_C(240), UINT8_C(189), UINT8_C(127), UINT8_C(218), UINT8_C(189) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(124), UINT8_C( 21), UINT8_C(254), UINT8_C(157), UINT8_C( 30), UINT8_C( 54), UINT8_C(113), UINT8_C(139), UINT8_C( 57), UINT8_C(235), UINT8_C(133), UINT8_C(247), UINT8_C(106), UINT8_C( 96), UINT8_C( 67), UINT8_C(103) }, { UINT8_C(252), UINT8_C(196), UINT8_C(254), UINT8_C(157), UINT8_C( 85), UINT8_C( 54), UINT8_C(227), UINT8_C(139), UINT8_C( 41), UINT8_C(170), UINT8_C( 12), UINT8_C( 85), UINT8_C( 40), UINT8_C(239), UINT8_C( 67), UINT8_C(165) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C( 94), UINT8_C( 20), UINT8_C( 55), UINT8_C(234), UINT8_C(211), UINT8_C(240), UINT8_C(176), UINT8_C( 23), UINT8_C(197), UINT8_C( 33), UINT8_C(198), UINT8_C(238), UINT8_C( 16), UINT8_C( 12), UINT8_C(102), UINT8_C( 17) }, { UINT8_C( 94), UINT8_C(122), UINT8_C( 55), UINT8_C(234), UINT8_C(211), UINT8_C(240), UINT8_C(176), UINT8_C( 23), UINT8_C( 46), UINT8_C(108), UINT8_C(198), UINT8_C(204), UINT8_C(221), UINT8_C(219), UINT8_C(102), UINT8_C( 17) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { UINT8_C( 27), UINT8_C( 44), UINT8_C( 48), UINT8_C( 78), UINT8_C( 33), UINT8_C(221), UINT8_C(101), UINT8_C(241), UINT8_C(232), UINT8_C(193), UINT8_C( 22), UINT8_C(213), UINT8_C( 7), UINT8_C(199), UINT8_C(214), UINT8_C( 59) }, { UINT8_C( 27), UINT8_C(113), UINT8_C( 48), UINT8_C(213), UINT8_C( 33), UINT8_C(221), UINT8_C( 84), UINT8_C(241), UINT8_C( 83), UINT8_C(193), UINT8_C(210), UINT8_C(213), UINT8_C(193), UINT8_C(199), UINT8_C(182), UINT8_C( 59) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, { { UINT8_C(242), UINT8_C(121), UINT8_C(222), UINT8_C(131), UINT8_C(104), UINT8_C(252), UINT8_C(105), UINT8_C(131), UINT8_C(243), UINT8_C( 19), UINT8_C(164), UINT8_C(180), UINT8_C(148), UINT8_C(188), UINT8_C(240), UINT8_C(135) }, { UINT8_C(134), UINT8_C(121), UINT8_C(161), UINT8_C(131), UINT8_C(104), UINT8_C(252), UINT8_C(105), UINT8_C(131), UINT8_C(109), UINT8_C( 19), UINT8_C(164), UINT8_C( 91), UINT8_C(148), UINT8_C(188), UINT8_C(100), UINT8_C(135) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, { { UINT8_C(179), UINT8_C( 71), UINT8_C( 1), UINT8_C(239), UINT8_C( 67), UINT8_C(163), UINT8_C(145), UINT8_C( 69), UINT8_C(121), UINT8_C( 23), UINT8_C( 11), UINT8_C( 13), UINT8_C(211), UINT8_MAX, UINT8_C(148), UINT8_C( 67) }, { UINT8_C(179), UINT8_C( 47), UINT8_C( 1), UINT8_C( 40), UINT8_C(206), UINT8_C(163), UINT8_C(145), UINT8_C( 69), UINT8_C(146), UINT8_C(175), UINT8_C( 89), UINT8_C(133), UINT8_C(188), UINT8_MAX, UINT8_C(254), UINT8_C( 67) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, { { UINT8_C( 28), UINT8_C( 5), UINT8_C(232), UINT8_C(124), UINT8_C(171), UINT8_C( 35), UINT8_C(189), UINT8_C(103), UINT8_C(105), UINT8_C( 59), UINT8_C( 35), UINT8_C(143), UINT8_C(182), UINT8_C( 95), UINT8_C(137), UINT8_C( 96) }, { UINT8_C( 28), UINT8_C( 5), UINT8_C(232), UINT8_C(124), UINT8_C(171), UINT8_C( 35), UINT8_C( 85), UINT8_C(103), UINT8_C(105), UINT8_C( 59), UINT8_C(247), UINT8_C( 32), UINT8_C(182), UINT8_C( 95), UINT8_C(137), UINT8_C( 96) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vceqq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint8_t a_[16], b_[16]; simde_test_codegen_random_memory(sizeof(a_), HEDLEY_REINTERPRET_CAST(uint8_t*, a_)); simde_test_codegen_random_memory(sizeof(b_), HEDLEY_REINTERPRET_CAST(uint8_t*, b_)); for (size_t j = 0 ; j < (sizeof(a_) / sizeof(a_[0])) ; j++) { if (simde_test_codegen_random_i8() & 1) { a_[j] = b_[j]; } } simde_uint8x16_t r = simde_vceqq_u8(simde_vld1q_u8(a_), simde_vld1q_u8(b_)); simde_test_codegen_write_vu8(2, sizeof(a_) / sizeof(a_[0]), a_, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_vu8(2, sizeof(b_) / sizeof(b_[0]), b_, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vceqq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(44578), UINT16_C(17527), UINT16_C(33032), UINT16_C(22568), UINT16_C(39320), UINT16_C(43487), UINT16_C( 5405), UINT16_C(39565) }, { UINT16_C(44578), UINT16_C(17527), UINT16_C(33032), UINT16_C(49551), UINT16_C(39320), UINT16_C(33295), UINT16_C(53936), UINT16_C(39565) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, { { UINT16_C(27399), UINT16_C(49584), UINT16_C(15701), UINT16_C(41884), UINT16_C(20596), UINT16_C(45799), UINT16_C( 9582), UINT16_C(11635) }, { UINT16_C(33552), UINT16_C(49584), UINT16_C(15701), UINT16_C(25947), UINT16_C(20596), UINT16_C(39065), UINT16_C( 9582), UINT16_C(30039) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { UINT16_C(39753), UINT16_C(56856), UINT16_C(35858), UINT16_C( 8716), UINT16_C(48143), UINT16_C(25827), UINT16_C(56130), UINT16_C(35901) }, { UINT16_C(25231), UINT16_C(64774), UINT16_C(23943), UINT16_C( 6003), UINT16_C( 3100), UINT16_C(55080), UINT16_C(56130), UINT16_C(35901) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { UINT16_C(36402), UINT16_C(11349), UINT16_C(11812), UINT16_C(39591), UINT16_C( 4740), UINT16_C(26147), UINT16_C(52872), UINT16_C(47959) }, { UINT16_C(18649), UINT16_C( 7152), UINT16_C(11812), UINT16_C(39591), UINT16_C( 4740), UINT16_C(26147), UINT16_C(52872), UINT16_C(47959) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(41049), UINT16_C(57919), UINT16_C(30379), UINT16_C(33847), UINT16_C(10175), UINT16_C( 3828), UINT16_C(31237), UINT16_C( 9819) }, { UINT16_C(41049), UINT16_C(57919), UINT16_C(38510), UINT16_C(52125), UINT16_C(33859), UINT16_C( 3828), UINT16_C(31237), UINT16_C( 9819) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C( 5837), UINT16_C(54009), UINT16_C(21648), UINT16_C(51921), UINT16_C( 4584), UINT16_C(12013), UINT16_C(18855), UINT16_C(59937) }, { UINT16_C( 5837), UINT16_C(54009), UINT16_C(21648), UINT16_C(59896), UINT16_C(15366), UINT16_C(12013), UINT16_C(30391), UINT16_C(22805) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C( 930), UINT16_C(22855), UINT16_C(34511), UINT16_C(40134), UINT16_C(49052), UINT16_C(11374), UINT16_C(26131), UINT16_C( 6421) }, { UINT16_C( 930), UINT16_C(22855), UINT16_C(23673), UINT16_C( 6578), UINT16_C( 1083), UINT16_C(25129), UINT16_C( 807), UINT16_C(23408) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(63865), UINT16_C(41002), UINT16_C(39676), UINT16_C(34299), UINT16_C(12349), UINT16_C(52445), UINT16_C(53550), UINT16_C(60100) }, { UINT16_C(63865), UINT16_C(41002), UINT16_C(39676), UINT16_C(34299), UINT16_C(12912), UINT16_C(52445), UINT16_C(53550), UINT16_C(60100) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vceqq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint16_t a_[8], b_[8]; simde_test_codegen_random_memory(sizeof(a_), HEDLEY_REINTERPRET_CAST(uint8_t*, a_)); simde_test_codegen_random_memory(sizeof(b_), HEDLEY_REINTERPRET_CAST(uint8_t*, b_)); for (size_t j = 0 ; j < (sizeof(a_) / sizeof(a_[0])) ; j++) { if (simde_test_codegen_random_i8() & 1) { a_[j] = b_[j]; } } simde_uint16x8_t r = simde_vceqq_u16(simde_vld1q_u16(a_), simde_vld1q_u16(b_)); simde_test_codegen_write_vu16(2, sizeof(a_) / sizeof(a_[0]), a_, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_vu16(2, sizeof(b_) / sizeof(b_[0]), b_, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vceqq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(3263161029), UINT32_C( 101765053), UINT32_C(1816709255), UINT32_C(3169297405) }, { UINT32_C(3263161029), UINT32_C( 27782043), UINT32_C(2243996519), UINT32_C(3169297405) }, { UINT32_MAX, UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, { { UINT32_C(2907166278), UINT32_C(2117207937), UINT32_C(1826343079), UINT32_C(3235830277) }, { UINT32_C(2907166278), UINT32_C(2117207937), UINT32_C(1698306474), UINT32_C(3235830277) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { UINT32_C(3422007588), UINT32_C( 607638393), UINT32_C(1953130030), UINT32_C(3173101884) }, { UINT32_C(1648055224), UINT32_C(1908962924), UINT32_C(3677464144), UINT32_C( 685716228) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(3615632297), UINT32_C(3444305297), UINT32_C(1468689566), UINT32_C(3343075357) }, { UINT32_C(2359066940), UINT32_C( 745065768), UINT32_C(3847505665), UINT32_C(3343075357) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, { { UINT32_C(1372842675), UINT32_C(3096611995), UINT32_C(1598120483), UINT32_C( 200072931) }, { UINT32_C(2889307307), UINT32_C(3096611995), UINT32_C(1216337364), UINT32_C(1313742491) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(2725529726), UINT32_C(4017755219), UINT32_C( 977127474), UINT32_C(3706507585) }, { UINT32_C( 831880797), UINT32_C(4017755219), UINT32_C( 977127474), UINT32_C(1738137065) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C(1465781187), UINT32_C(2184020377), UINT32_C( 639009737), UINT32_C(2538992595) }, { UINT32_C(4064530880), UINT32_C(2184020377), UINT32_C(2229978718), UINT32_C(2538992595) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { UINT32_C(2178929186), UINT32_C(1812318616), UINT32_C(3009344243), UINT32_C(3115512078) }, { UINT32_C(2178929186), UINT32_C(1812318616), UINT32_C(3137559229), UINT32_C(3115512078) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vceqq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint32_t a_[4], b_[4]; simde_test_codegen_random_memory(sizeof(a_), HEDLEY_REINTERPRET_CAST(uint8_t*, a_)); simde_test_codegen_random_memory(sizeof(b_), HEDLEY_REINTERPRET_CAST(uint8_t*, b_)); for (size_t j = 0 ; j < (sizeof(a_) / sizeof(a_[0])) ; j++) { if (simde_test_codegen_random_i8() & 1) { a_[j] = b_[j]; } } simde_uint32x4_t r = simde_vceqq_u32(simde_vld1q_u32(a_), simde_vld1q_u32(b_)); simde_test_codegen_write_vu32(2, sizeof(a_) / sizeof(a_[0]), a_, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_vu32(2, sizeof(b_) / sizeof(b_[0]), b_, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vceqq_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[2]; uint64_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C( 5192497038293950529), UINT64_C( 8988655146390972230) }, { UINT64_C( 7654138344029671543), UINT64_C( 8988655146390972230) }, { UINT64_C( 0), UINT64_MAX } }, { { UINT64_C(18144684993376973743), UINT64_C(11614340388918583195) }, { UINT64_C(18144684993376973743), UINT64_C(11129125334456797487) }, { UINT64_MAX, UINT64_C( 0) } }, { { UINT64_C( 3127155335287534526), UINT64_C(16538076718182468773) }, { UINT64_C(15278101899158077231), UINT64_C(16538076718182468773) }, { UINT64_C( 0), UINT64_MAX } }, { { UINT64_C(17870817187791419502), UINT64_C( 8595970968818886908) }, { UINT64_C(17870817187791419502), UINT64_C( 8595970968818886908) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C(15626440220401441863), UINT64_C(11761352064860019587) }, { UINT64_C(14003244560131081260), UINT64_C( 2326113439418180100) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C(13483908073591550313), UINT64_C(13996729257435331014) }, { UINT64_C(13483908073591550313), UINT64_C(13996729257435331014) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C( 966342067865953351), UINT64_C(12496042449812497273) }, { UINT64_C(16683983017028644208), UINT64_C(12496042449812497273) }, { UINT64_C( 0), UINT64_MAX } }, { { UINT64_C(10354616983512965509), UINT64_C( 1816336881116977332) }, { UINT64_C(10354616983512965509), UINT64_C( 1816336881116977332) }, { UINT64_MAX, UINT64_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_uint64x2_t r = simde_vceqq_u64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint64_t a_[2], b_[2]; simde_test_codegen_random_memory(sizeof(a_), HEDLEY_REINTERPRET_CAST(uint8_t*, a_)); simde_test_codegen_random_memory(sizeof(b_), HEDLEY_REINTERPRET_CAST(uint8_t*, b_)); for (size_t j = 0 ; j < (sizeof(a_) / sizeof(a_[0])) ; j++) { if (simde_test_codegen_random_i8() & 1) { a_[j] = b_[j]; } } simde_uint64x2_t r = simde_vceqq_u64(simde_vld1q_u64(a_), simde_vld1q_u64(b_)); simde_test_codegen_write_vu64(2, sizeof(a_) / sizeof(a_[0]), a_, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_vu64(2, sizeof(b_) / sizeof(b_[0]), b_, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vceqs_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vceqd_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vceqd_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vceqd_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vceq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vceq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vceq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vceq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vceq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vceq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vceq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vceq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vceq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vceq_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vceqq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vceqq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vceqq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vceqq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vceqq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vceqq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vceqq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vceqq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vceqq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vceqq_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/ceqz.c000066400000000000000000001213301400333146700164250ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN ceqz #include "test-neon.h" #include #include static int test_simde_vceqz_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[2]; uint32_t r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) }, { UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -327.33) }, { UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -721.62), SIMDE_FLOAT32_C( 916.22) }, { UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -765.93), SIMDE_FLOAT32_C( 0.00) }, { UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( 995.55), SIMDE_FLOAT32_C( 510.58) }, { UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) }, { UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -157.84) }, { UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) }, { UINT32_MAX, UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_uint32x2_t r = simde_vceqz_f32(a); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; } static int test_simde_vceqz_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[1]; uint64_t r[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( -60.46) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -469.42) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -384.64) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -89.50) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 584.35) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -326.83) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 994.82) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 158.46) }, { UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_uint64x1_t r = simde_vceqz_f64(a); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; } static int test_simde_vceqz_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; uint8_t r[8]; } test_vec[] = { { { INT8_C( 0), -INT8_C( 94), INT8_C( 67), INT8_C( 58), INT8_C( 0), INT8_C( 42), INT8_C( 0), INT8_C( 18) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } }, { { INT8_C( 111), -INT8_C( 68), INT8_C( 92), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 10), INT8_C( 0) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, { { INT8_C( 18), -INT8_C( 63), INT8_C( 0), INT8_C( 0), -INT8_C( 21), INT8_C( 0), INT8_C( 0), INT8_C( 0) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { INT8_C( 0), INT8_C( 0), INT8_C( 74), -INT8_C( 107), INT8_C( 100), INT8_C( 65), -INT8_C( 46), INT8_C( 46) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 7), -INT8_C( 65), INT8_C( 60), -INT8_C( 81) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { INT8_C( 0), -INT8_C( 27), INT8_C( 0), INT8_C( 0), INT8_C( 38), INT8_C( 0), INT8_C( 0), INT8_C( 0) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { INT8_C( 0), INT8_C( 0), -INT8_C( 95), -INT8_C( 90), INT8_C( 0), -INT8_C( 34), INT8_C( 86), INT8_C( 0) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { INT8_C( 117), -INT8_C( 13), INT8_C( 95), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 124), INT8_C( 103) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_uint8x8_t r = simde_vceqz_s8(a); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; } static int test_simde_vceqz_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; uint16_t r[4]; } test_vec[] = { { { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 17987) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { INT16_C( 0), -INT16_C( 20705), INT16_C( 22988), INT16_C( 0) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, { { -INT16_C( 31482), INT16_C( 0), INT16_C( 0), INT16_C( 3854) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 14307) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { INT16_C( 6294), INT16_C( 0), INT16_C( 0), INT16_C( 0) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { INT16_C( 0), INT16_C( 0), INT16_C( 30481), INT16_C( 0) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_uint16x4_t r = simde_vceqz_s16(a); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; } static int test_simde_vceqz_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; uint32_t r[2]; } test_vec[] = { { { INT32_C( 0), INT32_C( 0) }, { UINT32_MAX, UINT32_MAX } }, { { INT32_C( 2044598776), -INT32_C( 1438022561) }, { UINT32_C( 0), UINT32_C( 0) } }, { { INT32_C( 0), -INT32_C( 1360634674) }, { UINT32_MAX, UINT32_C( 0) } }, { { INT32_C( 0), -INT32_C( 283258467) }, { UINT32_MAX, UINT32_C( 0) } }, { { INT32_C( 0), INT32_C( 0) }, { UINT32_MAX, UINT32_MAX } }, { { INT32_C( 0), INT32_C( 0) }, { UINT32_MAX, UINT32_MAX } }, { { INT32_C( 159454426), INT32_C( 232464368) }, { UINT32_C( 0), UINT32_C( 0) } }, { { INT32_C( 2054051767), INT32_C( 681421949) }, { UINT32_C( 0), UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_uint32x2_t r = simde_vceqz_s32(a); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; } static int test_simde_vceqz_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[1]; uint64_t r[1]; } test_vec[] = { { { INT64_C( 0) }, { UINT64_MAX } }, { { INT64_C( 3454178962668484182) }, { UINT64_C( 0) } }, { { INT64_C( 0) }, { UINT64_MAX } }, { { INT64_C( 4224425884300989908) }, { UINT64_C( 0) } }, { { -INT64_C( 7039012478961771578) }, { UINT64_C( 0) } }, { { -INT64_C( 6037402715912565499) }, { UINT64_C( 0) } }, { { INT64_C( 4197757843192524821) }, { UINT64_C( 0) } }, { { INT64_C( 0) }, { UINT64_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_uint64x1_t r = simde_vceqz_s64(a); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; } static int test_simde_vceqz_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C(149), UINT8_C( 43), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 12), UINT8_C( 0) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, { { UINT8_C(140), UINT8_C(107), UINT8_C( 23), UINT8_C(216), UINT8_C( 0), UINT8_C( 0), UINT8_C(209), UINT8_C(115) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 63), UINT8_C(114), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(225), UINT8_C( 0) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, { { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(164), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C(187), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(178) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C(177), UINT8_C( 0), UINT8_C(134), UINT8_C( 85), UINT8_C(228), UINT8_C(147), UINT8_C( 20), UINT8_C( 0) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { UINT8_C( 0), UINT8_C( 0), UINT8_C( 91), UINT8_C(165), UINT8_C( 0), UINT8_C( 0), UINT8_C( 88), UINT8_C(198) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(116), UINT8_C(226), UINT8_C( 91), UINT8_C( 88), UINT8_C(117), UINT8_C(111), UINT8_C( 0), UINT8_C( 0) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t r = simde_vceqz_u8(a); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; } static int test_simde_vceqz_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C( 3710), UINT16_C( 0), UINT16_C(34330), UINT16_C(30937) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C( 0), UINT16_C( 0), UINT16_C(28771), UINT16_C( 0) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { UINT16_C(15055), UINT16_C( 0), UINT16_C( 0), UINT16_C(59617) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { UINT16_C(29424), UINT16_C( 0), UINT16_C(62941), UINT16_C( 0) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { UINT16_C(12449), UINT16_C( 195), UINT16_C(45749), UINT16_C(34158) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C( 0), UINT16_C(31294), UINT16_C( 0), UINT16_C( 0) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { UINT16_C(44577), UINT16_C(16684), UINT16_C(34528), UINT16_C(33221) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(61498), UINT16_C(10225), UINT16_C(22388), UINT16_C( 0) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t r = simde_vceqz_u16(a); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; } static int test_simde_vceqz_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C( 644870060), UINT32_C(1756296162) }, { UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C( 722655423), UINT32_C( 0) }, { UINT32_C( 0), UINT32_MAX } }, { { UINT32_C( 0), UINT32_C( 0) }, { UINT32_MAX, UINT32_MAX } }, { { UINT32_C( 0), UINT32_C( 171113308) }, { UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C( 0), UINT32_C( 0) }, { UINT32_MAX, UINT32_MAX } }, { { UINT32_C(3870260215), UINT32_C( 0) }, { UINT32_C( 0), UINT32_MAX } }, { { UINT32_C(3524516793), UINT32_C( 0) }, { UINT32_C( 0), UINT32_MAX } }, { { UINT32_C( 0), UINT32_C(1719902769) }, { UINT32_MAX, UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t r = simde_vceqz_u32(a); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; } static int test_simde_vceqz_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[1]; uint64_t r[1]; } test_vec[] = { { { UINT64_C( 0) }, { UINT64_MAX } }, { { UINT64_C( 0) }, { UINT64_MAX } }, { { UINT64_C(10791983333082230527) }, { UINT64_C( 0) } }, { { UINT64_C( 0) }, { UINT64_MAX } }, { { UINT64_C( 0) }, { UINT64_MAX } }, { { UINT64_C( 7298863471407900586) }, { UINT64_C( 0) } }, { { UINT64_C(13426132532232202525) }, { UINT64_C( 0) } }, { { UINT64_C( 0) }, { UINT64_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_t r = simde_vceqz_u64(a); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; } static int test_simde_vceqzq_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; uint32_t r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -765.96), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -158.27) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -383.93), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -833.58), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -556.99) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -216.78), SIMDE_FLOAT32_C( -123.11), SIMDE_FLOAT32_C( 0.00) }, { UINT32_MAX, UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 892.89), SIMDE_FLOAT32_C( 0.00) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( -117.11), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -727.26) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -788.65), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_uint32x4_t r = simde_vceqzq_f32(a); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_vceqzq_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[2]; uint64_t r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 962.42) }, { UINT64_MAX, UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) }, { UINT64_MAX, UINT64_MAX } }, { { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 85.69) }, { UINT64_MAX, UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 551.75), SIMDE_FLOAT64_C( 0.00) }, { UINT64_C( 0), UINT64_MAX } }, { { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) }, { UINT64_MAX, UINT64_MAX } }, { { SIMDE_FLOAT64_C( 36.19), SIMDE_FLOAT64_C( -43.32) }, { UINT64_C( 0), UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -973.94), SIMDE_FLOAT64_C( -254.74) }, { UINT64_C( 0), UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 779.02), SIMDE_FLOAT64_C( 185.84) }, { UINT64_C( 0), UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_uint64x2_t r = simde_vceqzq_f64(a); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; } static int test_simde_vceqzq_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; uint8_t r[16]; } test_vec[] = { { { -INT8_C( 28), INT8_C( 118), INT8_C( 0), INT8_C( 65), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 51), -INT8_C( 106), INT8_C( 0), -INT8_C( 3), INT8_C( 0), INT8_C( 32), INT8_C( 0), INT8_C( 0) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { INT8_C( 0), INT8_C( 100), -INT8_C( 19), INT8_C( 99), INT8_C( 0), INT8_C( 0), INT8_C( 104), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 64), -INT8_C( 39), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { -INT8_C( 46), -INT8_C( 44), INT8_C( 0), -INT8_C( 31), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 20), INT8_C( 0), INT8_C( 0), -INT8_C( 58), -INT8_C( 122), INT8_C( 0), -INT8_C( 36), -INT8_C( 114), INT8_C( 0) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { -INT8_C( 62), -INT8_C( 123), INT8_C( 0), INT8_C( 0), INT8_C( 68), INT8_C( 0), INT8_C( 68), INT8_C( 0), -INT8_C( 66), INT8_C( 10), INT8_C( 0), INT8_C( 0), -INT8_C( 25), INT8_C( 0), INT8_C( 5), INT8_C( 0) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, { { INT8_C( 0), INT8_C( 43), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 75), INT8_C( 0), INT8_C( 0), -INT8_C( 88), INT8_C( 0), INT8_C( 37), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 83) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { INT8_C( 0), INT8_C( 0), INT8_C( 92), -INT8_C( 66), -INT8_C( 67), INT8_C( 0), INT8_C( 0), -INT8_C( 5), INT8_C( 0), INT8_C( 32), INT8_C( 32), -INT8_C( 29), INT8_C( 69), INT8_C( 0), INT8_C( 54), -INT8_C( 7) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { INT8_C( 0), INT8_C( 0), -INT8_C( 109), INT8_C( 0), -INT8_C( 126), -INT8_C( 109), INT8_C( 0), INT8_C( 0), -INT8_C( 76), INT8_C( 0), INT8_C( 0), -INT8_C( 7), INT8_MAX, -INT8_C( 21), INT8_C( 0), INT8_C( 0) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { -INT8_C( 39), INT8_C( 0), -INT8_C( 98), INT8_C( 91), INT8_C( 0), -INT8_C( 104), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 99), INT8_C( 50), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 89) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_uint8x16_t r = simde_vceqzq_s8(a); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; } static int test_simde_vceqzq_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; uint16_t r[8]; } test_vec[] = { { { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 24335), -INT16_C( 21286), INT16_C( 0), INT16_C( 0), INT16_C( 31836) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { INT16_C( 0), INT16_C( 17565), INT16_C( 24523), INT16_C( 14671), INT16_C( 0), -INT16_C( 17008), -INT16_C( 24709), INT16_C( 0) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, { { -INT16_C( 32471), -INT16_C( 10245), -INT16_C( 5524), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 8174), INT16_C( 0) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { -INT16_C( 20265), -INT16_C( 15545), INT16_C( 0), INT16_C( 27834), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 17760) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { -INT16_C( 27239), INT16_C( 29471), INT16_C( 0), INT16_C( 22936), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 11770) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { INT16_C( 9873), -INT16_C( 7958), -INT16_C( 20602), INT16_C( 0), INT16_C( 0), -INT16_C( 14702), INT16_C( 10900), INT16_C( 0) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, { { -INT16_C( 604), INT16_C( 6091), INT16_C( 15454), -INT16_C( 4273), INT16_C( 0), -INT16_C( 5936), INT16_C( 0), INT16_C( 0) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { -INT16_C( 31919), INT16_C( 25758), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_uint16x8_t r = simde_vceqzq_s16(a); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_vceqzq_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; uint32_t r[4]; } test_vec[] = { { { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { -INT32_C( 741828223), -INT32_C( 1231377400), INT32_C( 1319317648), INT32_C( 0) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, { { INT32_C( 0), INT32_C( 858956050), INT32_C( 0), INT32_C( 0) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_MAX } }, { { INT32_C( 0), INT32_C( 0), INT32_C( 1164997533), INT32_C( 0) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { INT32_C( 0), -INT32_C( 564635233), INT32_C( 0), INT32_C( 586009733) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { -INT32_C( 183776264), INT32_C( 0), INT32_C( 0), INT32_C( 0) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { INT32_C( 0), INT32_C( 0), -INT32_C( 1036285494), INT32_C( 0) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { INT32_C( 1438651394), -INT32_C( 1538764752), INT32_C( 0), INT32_C( 0) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_uint32x4_t r = simde_vceqzq_s32(a); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_vceqzq_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; uint64_t r[2]; } test_vec[] = { { { INT64_C( 0), -INT64_C( 1979153583605383721) }, { UINT64_MAX, UINT64_C( 0) } }, { { -INT64_C( 5377894213596184894), INT64_C( 3556417410291188140) }, { UINT64_C( 0), UINT64_C( 0) } }, { { INT64_C( 0), INT64_C( 3867993108857557819) }, { UINT64_MAX, UINT64_C( 0) } }, { { -INT64_C( 8754853928386430442), INT64_C( 0) }, { UINT64_C( 0), UINT64_MAX } }, { { INT64_C( 0), -INT64_C( 7029379564668574246) }, { UINT64_MAX, UINT64_C( 0) } }, { { -INT64_C( 2748201011641982176), INT64_C( 1808857410149651866) }, { UINT64_C( 0), UINT64_C( 0) } }, { { INT64_C( 0), -INT64_C( 4758189311056880975) }, { UINT64_MAX, UINT64_C( 0) } }, { { -INT64_C( 4405967909247061718), -INT64_C( 5912680786139810763) }, { UINT64_C( 0), UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_uint64x2_t r = simde_vceqzq_s64(a); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; } static int test_simde_vceqzq_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C( 0), UINT8_C(254), UINT8_C( 0), UINT8_C( 0), UINT8_C(153), UINT8_C( 0), UINT8_C( 0), UINT8_C(123), UINT8_C( 0), UINT8_C( 80), UINT8_C(211), UINT8_C(166), UINT8_C(186), UINT8_C( 5), UINT8_C( 21), UINT8_C( 0) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { UINT8_C(246), UINT8_C(225), UINT8_C( 8), UINT8_C(144), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 56), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(141), UINT8_C(131) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 0), UINT8_C( 0), UINT8_C( 6), UINT8_C( 60), UINT8_C( 36), UINT8_C( 0), UINT8_C(116), UINT8_C(231), UINT8_C(240), UINT8_C( 0), UINT8_C( 0), UINT8_C(212), UINT8_C( 71), UINT8_C(241), UINT8_C( 87), UINT8_C( 0) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { UINT8_C( 0), UINT8_C( 0), UINT8_C( 13), UINT8_C(123), UINT8_C( 65), UINT8_C(130), UINT8_C( 98), UINT8_C( 0), UINT8_C(213), UINT8_C( 0), UINT8_C( 0), UINT8_C( 28), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(194) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C( 0), UINT8_C( 39), UINT8_C(195), UINT8_C( 0), UINT8_C(169), UINT8_C( 37), UINT8_C(218), UINT8_C(126), UINT8_C(235), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 63), UINT8_C( 23), UINT8_C( 0), UINT8_C( 0) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { UINT8_C(125), UINT8_C( 0), UINT8_C( 96), UINT8_C( 0), UINT8_C(245), UINT8_C( 0), UINT8_C( 0), UINT8_C(224), UINT8_C( 27), UINT8_C( 62), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(193), UINT8_C(229) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 0), UINT8_C( 0), UINT8_C( 39), UINT8_C( 0), UINT8_C( 97), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 10), UINT8_C( 0), UINT8_C(215), UINT8_C( 0), UINT8_C( 48), UINT8_C(152), UINT8_C( 0), UINT8_C( 0) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { UINT8_C( 61), UINT8_C( 0), UINT8_C( 96), UINT8_C( 0), UINT8_C( 0), UINT8_C( 40), UINT8_C( 26), UINT8_C( 0), UINT8_C( 0), UINT8_C(241), UINT8_C(172), UINT8_C( 0), UINT8_C(137), UINT8_C( 0), UINT8_C( 87), UINT8_C( 0) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t r = simde_vceqzq_u8(a); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; } static int test_simde_vceqzq_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C( 2628), UINT16_C( 0), UINT16_C( 0), UINT16_C(52529), UINT16_C(42347), UINT16_C(60537), UINT16_C( 0), UINT16_C( 0) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(40765), UINT16_C(35889), UINT16_C(34127), UINT16_C(32871), UINT16_C( 0) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, { { UINT16_C(42025), UINT16_C( 7467), UINT16_C(38458), UINT16_C( 0), UINT16_C( 9850), UINT16_C(54806), UINT16_C(21582), UINT16_C(32629) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(55585), UINT16_C( 0), UINT16_C( 0), UINT16_C(46249), UINT16_C( 0), UINT16_C( 0), UINT16_C(59499), UINT16_C( 0) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { UINT16_C(57589), UINT16_C(15068), UINT16_C(63287), UINT16_C( 0), UINT16_C(37584), UINT16_C(23742), UINT16_C(26560), UINT16_C(37648) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(18507), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(10408), UINT16_C(19561), UINT16_C( 0) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, { { UINT16_C(53896), UINT16_C(21130), UINT16_C( 5820), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(40560), UINT16_C( 0) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { UINT16_C(18507), UINT16_C( 0), UINT16_C( 0), UINT16_C(30421), UINT16_C( 0), UINT16_C( 0), UINT16_C(62069), UINT16_C( 0) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t r = simde_vceqzq_u16(a); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_vceqzq_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C( 0), UINT32_C( 0), UINT32_C(1525584464), UINT32_C(3368923743) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C( 0), UINT32_C(2610273753), UINT32_C(1719119292), UINT32_C( 0) }, { UINT32_MAX, UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, { { UINT32_C(2023245336), UINT32_C( 0), UINT32_C( 862246070), UINT32_C(3238986728) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(1455608914), UINT32_C(2095263119), UINT32_C( 0), UINT32_C(2051297665) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C( 0), UINT32_C( 788705166), UINT32_C(2794151764), UINT32_C(3271314995) }, { UINT32_MAX, UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(2650470171), UINT32_C( 0), UINT32_C( 0), UINT32_C(1482492874) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C( 0), UINT32_C(3702763873), UINT32_C( 0), UINT32_C( 843646441) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C( 0), UINT32_C(1673625513), UINT32_C( 0), UINT32_C(3180275292) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t r = simde_vceqzq_u32(a); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_vceqzq_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C(11054128522423255960), UINT64_C( 0) }, { UINT64_C( 0), UINT64_MAX } }, { { UINT64_C(18174782665337011255), UINT64_C(17375868560037170376) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C( 0), UINT64_C( 0) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C( 9091372480335400579), UINT64_C( 0) }, { UINT64_C( 0), UINT64_MAX } }, { { UINT64_C( 4304431729282197337), UINT64_C(12656370081252662593) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C( 0), UINT64_C(12455881121136436021) }, { UINT64_MAX, UINT64_C( 0) } }, { { UINT64_C(16621911418554417429), UINT64_C( 0) }, { UINT64_C( 0), UINT64_MAX } }, { { UINT64_C( 0), UINT64_C( 0) }, { UINT64_MAX, UINT64_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t r = simde_vceqzq_u64(a); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vceqz_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vceqz_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vceqz_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vceqz_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vceqz_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vceqz_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vceqz_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vceqz_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vceqz_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vceqz_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vceqzq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vceqzq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vceqzq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vceqzq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vceqzq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vceqzq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vceqzq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vceqzq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vceqzq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vceqzq_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/cge.c000066400000000000000000002272621400333146700162340ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN cge #include "test-neon.h" /* Check that both of these work */ #if defined(__cplusplus) #include "../../../simde/arm/neon/cge.h" #else #include "../../../simde/arm/neon.h" #endif static int test_simde_vcge_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; simde_float32 b[2]; uint32_t r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( -762.79), SIMDE_FLOAT32_C( 278.49) }, { SIMDE_FLOAT32_C( 573.76), SIMDE_FLOAT32_C( 673.25) }, { UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 768.48), SIMDE_FLOAT32_C( 971.82) }, { SIMDE_FLOAT32_C( 811.98), SIMDE_FLOAT32_C( -395.22) }, { UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( -590.44), SIMDE_FLOAT32_C( 152.23) }, { SIMDE_FLOAT32_C( -590.44), SIMDE_FLOAT32_C( 152.23) }, { UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 824.17), SIMDE_FLOAT32_C( 743.68) }, { SIMDE_FLOAT32_C( -356.79), SIMDE_FLOAT32_C( 242.15) }, { UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 141.44), SIMDE_FLOAT32_C( -148.44) }, { SIMDE_FLOAT32_C( -47.53), SIMDE_FLOAT32_C( -906.36) }, { UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( -221.36), SIMDE_FLOAT32_C( -647.60) }, { SIMDE_FLOAT32_C( 150.33), SIMDE_FLOAT32_C( -647.60) }, { UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( -973.68), SIMDE_FLOAT32_C( 554.40) }, { SIMDE_FLOAT32_C( -973.68), SIMDE_FLOAT32_C( -148.92) }, { UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 829.76), SIMDE_FLOAT32_C( -725.20) }, { SIMDE_FLOAT32_C( 365.78), SIMDE_FLOAT32_C( -760.67) }, { UINT32_MAX, UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_uint32x2_t r = simde_vcge_f32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x2_private a_ = simde_float32x2_to_private(simde_test_arm_neon_random_f32x2(-1000.0, 1000.0)); simde_float32x2_private b_ = simde_float32x2_to_private(simde_test_arm_neon_random_f32x2(-1000.0, 1000.0)); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_float32x2_t a = simde_float32x2_from_private(a_); simde_float32x2_t b = simde_float32x2_from_private(b_); simde_uint32x2_t r = simde_vcge_f32(a, b); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcge_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[1]; simde_float64 b[1]; uint64_t r[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( 130.39) }, { SIMDE_FLOAT64_C( 996.42) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 35.94) }, { SIMDE_FLOAT64_C( 35.94) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( -578.02) }, { SIMDE_FLOAT64_C( 669.25) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 105.77) }, { SIMDE_FLOAT64_C( 591.27) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -682.67) }, { SIMDE_FLOAT64_C( -705.46) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( 880.87) }, { SIMDE_FLOAT64_C( -97.20) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( 338.18) }, { SIMDE_FLOAT64_C( 925.14) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 626.85) }, { SIMDE_FLOAT64_C( -403.18) }, { UINT64_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_t b = simde_vld1_f64(test_vec[i].b); simde_uint64x1_t r = simde_vcge_f64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x1_private a_ = simde_float64x1_to_private(simde_test_arm_neon_random_f64x1(-1000.0, 1000.0)); simde_float64x1_private b_ = simde_float64x1_to_private(simde_test_arm_neon_random_f64x1(-1000.0, 1000.0)); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_float64x1_t a = simde_float64x1_from_private(a_); simde_float64x1_t b = simde_float64x1_from_private(b_); simde_uint64x1_t r = simde_vcge_f64(a, b); simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcge_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t b[8]; uint8_t r[8]; } test_vec[] = { { { INT8_MIN, -INT8_C( 59), -INT8_C( 67), INT8_C( 75), INT8_C( 83), -INT8_C( 69), INT8_C( 29), -INT8_C( 34) }, { -INT8_C( 98), INT8_C( 50), INT8_C( 122), INT8_C( 18), -INT8_C( 65), -INT8_C( 69), INT8_C( 29), -INT8_C( 121) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { -INT8_C( 51), -INT8_C( 84), INT8_C( 110), INT8_C( 104), -INT8_C( 93), -INT8_C( 118), INT8_C( 26), INT8_C( 35) }, { -INT8_C( 24), -INT8_C( 115), INT8_C( 110), INT8_C( 59), INT8_C( 65), -INT8_C( 118), INT8_C( 26), -INT8_C( 33) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { -INT8_C( 41), INT8_C( 95), INT8_C( 90), INT8_C( 86), INT8_C( 120), -INT8_C( 126), -INT8_C( 11), INT8_C( 69) }, { INT8_C( 46), -INT8_C( 60), -INT8_C( 82), -INT8_C( 46), -INT8_C( 25), INT8_C( 126), -INT8_C( 11), -INT8_C( 48) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { -INT8_C( 70), INT8_C( 29), INT8_C( 37), INT8_C( 0), INT8_C( 44), INT8_C( 40), -INT8_C( 1), INT8_C( 3) }, { -INT8_C( 120), INT8_C( 89), INT8_C( 90), INT8_C( 0), -INT8_C( 36), -INT8_C( 92), INT8_C( 69), INT8_C( 10) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { INT8_C( 54), INT8_C( 43), -INT8_C( 56), INT8_C( 36), INT8_C( 81), -INT8_C( 12), -INT8_C( 50), INT8_C( 11) }, { INT8_C( 17), -INT8_C( 13), INT8_C( 20), INT8_C( 62), INT8_C( 28), INT8_C( 19), INT8_C( 65), -INT8_C( 92) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { -INT8_C( 35), INT8_C( 48), -INT8_C( 8), INT8_C( 78), INT8_C( 2), INT8_C( 24), -INT8_C( 53), -INT8_C( 103) }, { INT8_C( 68), -INT8_C( 109), INT8_C( 92), -INT8_C( 107), -INT8_C( 121), INT8_C( 42), -INT8_C( 96), -INT8_C( 103) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { -INT8_C( 76), -INT8_C( 127), -INT8_C( 106), INT8_C( 3), INT8_C( 107), -INT8_C( 47), INT8_C( 59), -INT8_C( 15) }, { INT8_C( 1), -INT8_C( 107), -INT8_C( 106), INT8_C( 3), -INT8_C( 83), INT8_C( 97), INT8_C( 59), -INT8_C( 15) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { -INT8_C( 37), -INT8_C( 20), INT8_C( 24), -INT8_C( 30), INT8_C( 5), -INT8_C( 10), -INT8_C( 40), INT8_C( 41) }, { INT8_C( 119), INT8_C( 86), -INT8_C( 83), -INT8_C( 30), INT8_C( 39), INT8_C( 73), INT8_C( 42), INT8_C( 41) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_uint8x8_t r = simde_vcge_s8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_private a_ = simde_int8x8_to_private(simde_test_arm_neon_random_i8x8()); simde_int8x8_private b_ = simde_int8x8_to_private(simde_test_arm_neon_random_i8x8()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int8x8_t a = simde_int8x8_from_private(a_); simde_int8x8_t b = simde_int8x8_from_private(b_); simde_uint8x8_t r = simde_vcge_s8(a, b); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcge_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t b[4]; uint16_t r[4]; } test_vec[] = { { { INT16_C( 19213), -INT16_C( 23976), -INT16_C( 13884), INT16_C( 9880) }, { INT16_C( 19213), -INT16_C( 22464), -INT16_C( 32203), INT16_C( 9880) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { INT16_C( 18524), INT16_C( 14362), INT16_C( 2421), -INT16_C( 13240) }, { -INT16_C( 29395), -INT16_C( 31403), -INT16_C( 21161), INT16_C( 6951) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { -INT16_C( 29148), INT16_C( 27173), -INT16_C( 15344), INT16_C( 20607) }, { -INT16_C( 8434), INT16_C( 27173), INT16_C( 16167), -INT16_C( 25182) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { -INT16_C( 16776), -INT16_C( 12037), -INT16_C( 27961), -INT16_C( 7445) }, { INT16_C( 14843), INT16_C( 8038), -INT16_C( 27961), -INT16_C( 10376) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { INT16_C( 19927), -INT16_C( 305), INT16_C( 29324), -INT16_C( 10853) }, { INT16_C( 1117), -INT16_C( 10932), INT16_C( 18370), INT16_C( 11941) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { -INT16_C( 28427), -INT16_C( 28283), -INT16_C( 760), INT16_C( 24424) }, { -INT16_C( 28427), -INT16_C( 13116), -INT16_C( 27427), INT16_C( 27083) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { -INT16_C( 29845), INT16_C( 11576), INT16_C( 10209), -INT16_C( 5430) }, { INT16_C( 27503), INT16_C( 14754), INT16_C( 10209), -INT16_C( 5430) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { INT16_C( 3778), -INT16_C( 31989), -INT16_C( 20062), -INT16_C( 22519) }, { INT16_C( 18456), -INT16_C( 31989), INT16_C( 17363), -INT16_C( 23120) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_uint16x4_t r = simde_vcge_s16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_private a_ = simde_int16x4_to_private(simde_test_arm_neon_random_i16x4()); simde_int16x4_private b_ = simde_int16x4_to_private(simde_test_arm_neon_random_i16x4()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int16x4_t a = simde_int16x4_from_private(a_); simde_int16x4_t b = simde_int16x4_from_private(b_); simde_uint16x4_t r = simde_vcge_s16(a, b); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcge_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t b[2]; uint32_t r[2]; } test_vec[] = { { { -INT32_C( 829143814), INT32_C( 1777284011) }, { -INT32_C( 829143814), INT32_C( 1777284011) }, { UINT32_MAX, UINT32_MAX } }, { { INT32_C( 1630841126), -INT32_C( 1815256670) }, { INT32_C( 1763409610), -INT32_C( 1815256670) }, { UINT32_C( 0), UINT32_MAX } }, { { INT32_C( 951596606), INT32_C( 621169786) }, { -INT32_C( 561056129), INT32_C( 797488379) }, { UINT32_MAX, UINT32_C( 0) } }, { { -INT32_C( 1092706572), -INT32_C( 1087899363) }, { -INT32_C( 1705839269), INT32_C( 1473286559) }, { UINT32_MAX, UINT32_C( 0) } }, { { -INT32_C( 957249103), -INT32_C( 1056834106) }, { -INT32_C( 957249103), INT32_C( 694543690) }, { UINT32_MAX, UINT32_C( 0) } }, { { -INT32_C( 289937627), INT32_C( 1595380928) }, { -INT32_C( 289937627), -INT32_C( 1598137807) }, { UINT32_MAX, UINT32_MAX } }, { { INT32_C( 1313243037), INT32_C( 1393899272) }, { -INT32_C( 1082361316), INT32_C( 1032494844) }, { UINT32_MAX, UINT32_MAX } }, { { INT32_C( 971875259), INT32_C( 1290101019) }, { INT32_C( 971875259), INT32_C( 1857587752) }, { UINT32_MAX, UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_uint32x2_t r = simde_vcge_s32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_private a_ = simde_int32x2_to_private(simde_test_arm_neon_random_i32x2()); simde_int32x2_private b_ = simde_int32x2_to_private(simde_test_arm_neon_random_i32x2()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int32x2_t a = simde_int32x2_from_private(a_); simde_int32x2_t b = simde_int32x2_from_private(b_); simde_uint32x2_t r = simde_vcge_s32(a, b); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcge_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[1]; int64_t b[1]; uint64_t r[1]; } test_vec[] = { { { INT64_C( 8640190357508052970) }, { INT64_C( 5528585362751356760) }, { UINT64_MAX } }, { { -INT64_C( 3033591299633767839) }, { -INT64_C( 7667411631510427440) }, { UINT64_MAX } }, { { INT64_C( 4177766084966535903) }, { INT64_C( 4177766084966535903) }, { UINT64_MAX } }, { { -INT64_C( 6092879107822443257) }, { -INT64_C( 6092879107822443257) }, { UINT64_MAX } }, { { -INT64_C( 3695200531164032468) }, { INT64_C( 1346358656504223643) }, { UINT64_C( 0) } }, { { -INT64_C( 5449008510585538877) }, { -INT64_C( 5449008510585538877) }, { UINT64_MAX } }, { { INT64_C( 7410667899739727175) }, { -INT64_C( 2022569260477257840) }, { UINT64_MAX } }, { { -INT64_C( 921434190897626108) }, { -INT64_C( 1061286504771364963) }, { UINT64_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_uint64x1_t r = simde_vcge_s64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_private a_ = simde_int64x1_to_private(simde_test_arm_neon_random_i64x1()); simde_int64x1_private b_ = simde_int64x1_to_private(simde_test_arm_neon_random_i64x1()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int64x1_t a = simde_int64x1_from_private(a_); simde_int64x1_t b = simde_int64x1_from_private(b_); simde_uint64x1_t r = simde_vcge_s64(a, b); simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcge_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint8_t b[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C( 62), UINT8_C(185), UINT8_C(176), UINT8_C(170), UINT8_C(140), UINT8_C( 91), UINT8_C( 69), UINT8_C(245) }, { UINT8_C(115), UINT8_C(185), UINT8_C(176), UINT8_C(170), UINT8_C(140), UINT8_C(156), UINT8_C( 73), UINT8_C(113) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { UINT8_C(127), UINT8_C( 90), UINT8_C( 82), UINT8_C( 94), UINT8_C(181), UINT8_C(116), UINT8_C(134), UINT8_C( 40) }, { UINT8_C(162), UINT8_C( 90), UINT8_C( 82), UINT8_C( 35), UINT8_C(181), UINT8_C(151), UINT8_C( 24), UINT8_C( 40) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { UINT8_C( 49), UINT8_C( 25), UINT8_C( 97), UINT8_C( 73), UINT8_C(132), UINT8_C( 28), UINT8_C( 96), UINT8_C( 23) }, { UINT8_C(112), UINT8_C( 1), UINT8_C( 97), UINT8_C( 73), UINT8_C(117), UINT8_C(231), UINT8_C( 96), UINT8_C( 23) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { UINT8_C( 27), UINT8_C(240), UINT8_C(119), UINT8_C(128), UINT8_C( 11), UINT8_C(198), UINT8_C(249), UINT8_C( 61) }, { UINT8_C(223), UINT8_C(171), UINT8_C(141), UINT8_C( 99), UINT8_C(200), UINT8_C(221), UINT8_C(103), UINT8_C( 56) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { UINT8_C(247), UINT8_C(165), UINT8_C(232), UINT8_C( 2), UINT8_C(181), UINT8_C(141), UINT8_C(120), UINT8_C( 20) }, { UINT8_C(247), UINT8_C(239), UINT8_C(148), UINT8_C( 2), UINT8_C(181), UINT8_C(141), UINT8_C( 63), UINT8_C(149) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C( 40), UINT8_C(177), UINT8_C(220), UINT8_C(216), UINT8_C(155), UINT8_C(132), UINT8_C(201), UINT8_C( 46) }, { UINT8_C(236), UINT8_C(177), UINT8_C( 12), UINT8_C(229), UINT8_C(184), UINT8_C(132), UINT8_C(249), UINT8_C(175) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(189), UINT8_C(182), UINT8_C( 84), UINT8_C(226), UINT8_C( 22), UINT8_C(141), UINT8_C( 17), UINT8_MAX }, { UINT8_C( 71), UINT8_C(204), UINT8_C( 22), UINT8_C(226), UINT8_C( 19), UINT8_C(223), UINT8_C( 17), UINT8_MAX }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { UINT8_C(178), UINT8_C( 45), UINT8_C( 61), UINT8_C(200), UINT8_C(154), UINT8_C(175), UINT8_C(216), UINT8_C( 88) }, { UINT8_C(178), UINT8_C( 45), UINT8_C(191), UINT8_C(200), UINT8_C(186), UINT8_C(175), UINT8_C( 6), UINT8_C( 2) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vcge_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_private a_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); simde_uint8x8_private b_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint8x8_t a = simde_uint8x8_from_private(a_); simde_uint8x8_t b = simde_uint8x8_from_private(b_); simde_uint8x8_t r = simde_vcge_u8(a, b); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcge_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C( 2642), UINT16_C(12638), UINT16_C( 2321), UINT16_C(53075) }, { UINT16_C(13907), UINT16_C(49252), UINT16_C(65171), UINT16_C(14608) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, { { UINT16_C(36937), UINT16_C(10202), UINT16_C( 4220), UINT16_C(12453) }, { UINT16_C(56533), UINT16_C(10202), UINT16_C(14822), UINT16_C(63321) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(11234), UINT16_C(30038), UINT16_C(26153), UINT16_C(21423) }, { UINT16_C(12745), UINT16_C( 4854), UINT16_C(26049), UINT16_C(15992) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(18682), UINT16_C(57458), UINT16_C(45002), UINT16_C(50136) }, { UINT16_C(40823), UINT16_C(22873), UINT16_C(45002), UINT16_C(62671) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { UINT16_C(15791), UINT16_C(28913), UINT16_C(27298), UINT16_C( 6062) }, { UINT16_C( 7303), UINT16_C(33122), UINT16_C(54373), UINT16_C(58978) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C( 985), UINT16_C(41839), UINT16_C(16050), UINT16_C(49116) }, { UINT16_C(57276), UINT16_C(27558), UINT16_C(38940), UINT16_C(49116) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { UINT16_C(14503), UINT16_C(32961), UINT16_C(12703), UINT16_C(20771) }, { UINT16_C(40103), UINT16_C(32961), UINT16_C(12703), UINT16_C(20771) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(49050), UINT16_C(42467), UINT16_C(29527), UINT16_C(22901) }, { UINT16_C(19710), UINT16_C(42467), UINT16_C(61060), UINT16_C(37297) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vcge_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_private a_ = simde_uint16x4_to_private(simde_test_arm_neon_random_u16x4()); simde_uint16x4_private b_ = simde_uint16x4_to_private(simde_test_arm_neon_random_u16x4()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint16x4_t a = simde_uint16x4_from_private(a_); simde_uint16x4_t b = simde_uint16x4_from_private(b_); simde_uint16x4_t r = simde_vcge_u16(a, b); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcge_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C( 569449259), UINT32_C( 284840429) }, { UINT32_C( 862297363), UINT32_C( 984793204) }, { UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(3508313849), UINT32_C(4018812414) }, { UINT32_C(1862940867), UINT32_C(4018812414) }, { UINT32_MAX, UINT32_MAX } }, { { UINT32_C(1401376832), UINT32_C(1653009646) }, { UINT32_C( 312228012), UINT32_C( 733058575) }, { UINT32_MAX, UINT32_MAX } }, { { UINT32_C(1156147693), UINT32_C(4192123131) }, { UINT32_C(1156147693), UINT32_C(1754540512) }, { UINT32_MAX, UINT32_MAX } }, { { UINT32_C(2138489879), UINT32_C(1259569468) }, { UINT32_C(2138489879), UINT32_C( 490807918) }, { UINT32_MAX, UINT32_MAX } }, { { UINT32_C(3307313992), UINT32_C(3104251864) }, { UINT32_C(3307313992), UINT32_C( 63534366) }, { UINT32_MAX, UINT32_MAX } }, { { UINT32_C(2622524036), UINT32_C(3488133017) }, { UINT32_C(2044353768), UINT32_C(3488133017) }, { UINT32_MAX, UINT32_MAX } }, { { UINT32_C( 913996270), UINT32_C( 437369546) }, { UINT32_C(2268513552), UINT32_C( 437369546) }, { UINT32_C( 0), UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vcge_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_private a_ = simde_uint32x2_to_private(simde_test_arm_neon_random_u32x2()); simde_uint32x2_private b_ = simde_uint32x2_to_private(simde_test_arm_neon_random_u32x2()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint32x2_t a = simde_uint32x2_from_private(a_); simde_uint32x2_t b = simde_uint32x2_from_private(b_); simde_uint32x2_t r = simde_vcge_u32(a, b); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcge_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[1]; uint64_t b[1]; uint64_t r[1]; } test_vec[] = { { { UINT64_C(15188592653948299680) }, { UINT64_C(15188592653948299680) }, { UINT64_MAX } }, { { UINT64_C( 6204103708661670688) }, { UINT64_C( 6536367965351154177) }, { UINT64_C( 0) } }, { { UINT64_C( 1365467413574693313) }, { UINT64_C(13030847014366772973) }, { UINT64_C( 0) } }, { { UINT64_C( 8955896582142371742) }, { UINT64_C( 6753901549624578627) }, { UINT64_MAX } }, { { UINT64_C( 2548699820776245226) }, { UINT64_C( 2548699820776245226) }, { UINT64_MAX } }, { { UINT64_C( 8700755168802594176) }, { UINT64_C( 8700755168802594176) }, { UINT64_MAX } }, { { UINT64_C( 3667811991632179057) }, { UINT64_C( 3667811991632179057) }, { UINT64_MAX } }, { { UINT64_C(16236877168053407557) }, { UINT64_C(14927669731982511924) }, { UINT64_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_t b = simde_vld1_u64(test_vec[i].b); simde_uint64x1_t r = simde_vcge_u64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x1_private a_ = simde_uint64x1_to_private(simde_test_arm_neon_random_u64x1()); simde_uint64x1_private b_ = simde_uint64x1_to_private(simde_test_arm_neon_random_u64x1()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint64x1_t a = simde_uint64x1_from_private(a_); simde_uint64x1_t b = simde_uint64x1_from_private(b_); simde_uint64x1_t r = simde_vcge_u64(a, b); simde_test_arm_neon_write_u64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgeq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; simde_float32 b[4]; uint32_t r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -645.35), SIMDE_FLOAT32_C( 906.68), SIMDE_FLOAT32_C( 619.69), SIMDE_FLOAT32_C( 579.13) }, { SIMDE_FLOAT32_C( -888.20), SIMDE_FLOAT32_C( 906.68), SIMDE_FLOAT32_C( -502.35), SIMDE_FLOAT32_C( 579.13) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 247.32), SIMDE_FLOAT32_C( 296.45), SIMDE_FLOAT32_C( 752.07), SIMDE_FLOAT32_C( 130.33) }, { SIMDE_FLOAT32_C( 35.16), SIMDE_FLOAT32_C( 1.82), SIMDE_FLOAT32_C( 752.07), SIMDE_FLOAT32_C( -668.20) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 180.85), SIMDE_FLOAT32_C( 267.89), SIMDE_FLOAT32_C( 986.47), SIMDE_FLOAT32_C( 541.82) }, { SIMDE_FLOAT32_C( 880.72), SIMDE_FLOAT32_C( -827.97), SIMDE_FLOAT32_C( 567.27), SIMDE_FLOAT32_C( -764.64) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 93.64), SIMDE_FLOAT32_C( 529.98), SIMDE_FLOAT32_C( 399.13), SIMDE_FLOAT32_C( 278.11) }, { SIMDE_FLOAT32_C( -969.21), SIMDE_FLOAT32_C( -931.35), SIMDE_FLOAT32_C( 399.13), SIMDE_FLOAT32_C( 278.11) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 175.35), SIMDE_FLOAT32_C( -839.50), SIMDE_FLOAT32_C( -267.94), SIMDE_FLOAT32_C( -643.81) }, { SIMDE_FLOAT32_C( 175.35), SIMDE_FLOAT32_C( 52.39), SIMDE_FLOAT32_C( 966.24), SIMDE_FLOAT32_C( -643.81) }, { UINT32_MAX, UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( -875.26), SIMDE_FLOAT32_C( 465.28), SIMDE_FLOAT32_C( -563.64), SIMDE_FLOAT32_C( -189.26) }, { SIMDE_FLOAT32_C( -347.77), SIMDE_FLOAT32_C( 468.68), SIMDE_FLOAT32_C( 608.55), SIMDE_FLOAT32_C( 745.87) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -945.88), SIMDE_FLOAT32_C( -284.00), SIMDE_FLOAT32_C( -692.44), SIMDE_FLOAT32_C( 92.60) }, { SIMDE_FLOAT32_C( -840.71), SIMDE_FLOAT32_C( -284.00), SIMDE_FLOAT32_C( 819.48), SIMDE_FLOAT32_C( 92.60) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( 603.93), SIMDE_FLOAT32_C( -693.95), SIMDE_FLOAT32_C( -874.81), SIMDE_FLOAT32_C( 462.62) }, { SIMDE_FLOAT32_C( 337.88), SIMDE_FLOAT32_C( -693.95), SIMDE_FLOAT32_C( -874.81), SIMDE_FLOAT32_C( 462.62) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_uint32x4_t r = simde_vcgeq_f32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_private a_ = simde_float32x4_to_private(simde_test_arm_neon_random_f32x4(-1000.0, 1000.0)); simde_float32x4_private b_ = simde_float32x4_to_private(simde_test_arm_neon_random_f32x4(-1000.0, 1000.0)); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_float32x4_t a = simde_float32x4_from_private(a_); simde_float32x4_t b = simde_float32x4_from_private(b_); simde_uint32x4_t r = simde_vcgeq_f32(a, b); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgeq_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[2]; simde_float64 b[2]; uint64_t r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 109.93), SIMDE_FLOAT64_C( -385.27) }, { SIMDE_FLOAT64_C( 133.89), SIMDE_FLOAT64_C( 181.08) }, { UINT64_C( 0), UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 530.41), SIMDE_FLOAT64_C( -444.63) }, { SIMDE_FLOAT64_C( 530.41), SIMDE_FLOAT64_C( -444.63) }, { UINT64_MAX, UINT64_MAX } }, { { SIMDE_FLOAT64_C( -131.75), SIMDE_FLOAT64_C( 898.49) }, { SIMDE_FLOAT64_C( 524.05), SIMDE_FLOAT64_C( 898.49) }, { UINT64_C( 0), UINT64_MAX } }, { { SIMDE_FLOAT64_C( -9.19), SIMDE_FLOAT64_C( -76.59) }, { SIMDE_FLOAT64_C( -9.19), SIMDE_FLOAT64_C( -76.59) }, { UINT64_MAX, UINT64_MAX } }, { { SIMDE_FLOAT64_C( 751.31), SIMDE_FLOAT64_C( 63.65) }, { SIMDE_FLOAT64_C( 403.25), SIMDE_FLOAT64_C( -261.55) }, { UINT64_MAX, UINT64_MAX } }, { { SIMDE_FLOAT64_C( -195.23), SIMDE_FLOAT64_C( 820.28) }, { SIMDE_FLOAT64_C( 857.78), SIMDE_FLOAT64_C( 938.65) }, { UINT64_C( 0), UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 569.69), SIMDE_FLOAT64_C( 100.11) }, { SIMDE_FLOAT64_C( 861.54), SIMDE_FLOAT64_C( 100.11) }, { UINT64_C( 0), UINT64_MAX } }, { { SIMDE_FLOAT64_C( 58.64), SIMDE_FLOAT64_C( -798.26) }, { SIMDE_FLOAT64_C( 58.64), SIMDE_FLOAT64_C( -185.24) }, { UINT64_MAX, UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_uint64x2_t r = simde_vcgeq_f64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x2_private a_ = simde_float64x2_to_private(simde_test_arm_neon_random_f64x2(-1000.0, 1000.0)); simde_float64x2_private b_ = simde_float64x2_to_private(simde_test_arm_neon_random_f64x2(-1000.0, 1000.0)); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_float64x2_t a = simde_float64x2_from_private(a_); simde_float64x2_t b = simde_float64x2_from_private(b_); simde_uint64x2_t r = simde_vcgeq_f64(a, b); simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgeq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t b[16]; uint8_t r[16]; } test_vec[] = { { { -INT8_C( 109), -INT8_C( 41), -INT8_C( 32), INT8_C( 96), -INT8_C( 7), -INT8_C( 37), -INT8_C( 8), -INT8_C( 122), INT8_C( 44), INT8_C( 13), -INT8_C( 104), INT8_C( 3), -INT8_C( 92), -INT8_C( 42), -INT8_C( 53), INT8_C( 65) }, { -INT8_C( 109), INT8_C( 22), -INT8_C( 32), INT8_C( 96), -INT8_C( 11), -INT8_C( 37), -INT8_C( 8), -INT8_C( 122), -INT8_C( 26), -INT8_C( 67), -INT8_C( 74), INT8_C( 3), -INT8_C( 92), -INT8_C( 31), -INT8_C( 53), INT8_C( 65) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { INT8_C( 49), -INT8_C( 72), INT8_C( 57), -INT8_C( 82), -INT8_C( 109), INT8_C( 49), -INT8_C( 84), INT8_C( 121), -INT8_C( 9), INT8_C( 98), INT8_C( 12), -INT8_C( 110), -INT8_C( 8), -INT8_C( 29), -INT8_C( 44), INT8_C( 41) }, { INT8_C( 69), -INT8_C( 104), -INT8_C( 82), -INT8_C( 82), -INT8_C( 73), -INT8_C( 122), INT8_C( 67), -INT8_C( 28), -INT8_C( 9), -INT8_C( 34), INT8_C( 12), INT8_C( 61), -INT8_C( 8), -INT8_C( 29), INT8_C( 23), INT8_C( 41) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, { { INT8_C( 111), INT8_C( 76), INT8_C( 96), INT8_C( 39), -INT8_C( 46), -INT8_C( 93), INT8_C( 11), -INT8_C( 55), -INT8_C( 127), INT8_C( 100), INT8_C( 7), -INT8_C( 2), -INT8_C( 6), INT8_C( 30), -INT8_C( 93), -INT8_C( 106) }, { INT8_C( 110), -INT8_C( 14), -INT8_C( 60), -INT8_C( 16), -INT8_C( 18), INT8_C( 108), INT8_C( 97), INT8_C( 77), -INT8_C( 111), INT8_C( 100), -INT8_C( 17), -INT8_C( 2), INT8_C( 59), -INT8_C( 115), -INT8_C( 80), -INT8_C( 86) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { INT8_C( 67), INT8_C( 72), -INT8_C( 7), -INT8_C( 71), -INT8_C( 75), INT8_C( 90), -INT8_C( 18), INT8_C( 70), -INT8_C( 66), INT8_C( 110), INT8_C( 69), -INT8_C( 7), -INT8_C( 5), -INT8_C( 11), INT8_C( 19), -INT8_C( 44) }, { INT8_C( 6), INT8_C( 117), INT8_MAX, -INT8_C( 71), INT8_C( 81), -INT8_C( 12), -INT8_C( 18), INT8_C( 69), INT8_C( 112), -INT8_C( 100), INT8_C( 51), INT8_C( 10), -INT8_C( 19), -INT8_C( 73), INT8_C( 19), INT8_C( 48) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { INT8_C( 53), -INT8_C( 98), -INT8_C( 11), -INT8_C( 121), -INT8_C( 110), INT8_C( 101), -INT8_C( 52), INT8_C( 48), -INT8_C( 91), -INT8_C( 1), INT8_C( 122), -INT8_C( 38), -INT8_C( 74), INT8_C( 32), INT8_C( 22), -INT8_C( 55) }, { INT8_C( 44), -INT8_C( 1), INT8_C( 106), -INT8_C( 110), -INT8_C( 32), INT8_C( 101), -INT8_C( 74), INT8_C( 48), -INT8_C( 91), -INT8_C( 45), INT8_C( 122), -INT8_C( 38), -INT8_C( 108), -INT8_C( 103), INT8_C( 22), -INT8_C( 55) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { -INT8_C( 10), -INT8_C( 15), -INT8_C( 71), INT8_C( 85), INT8_C( 86), INT8_C( 101), -INT8_C( 123), INT8_C( 41), INT8_C( 63), -INT8_C( 1), -INT8_C( 43), -INT8_C( 41), -INT8_C( 104), -INT8_C( 126), INT8_C( 97), INT8_C( 20) }, { -INT8_C( 10), -INT8_C( 15), -INT8_C( 103), -INT8_C( 28), INT8_C( 14), INT8_C( 101), INT8_C( 81), INT8_C( 41), INT8_C( 63), INT8_C( 42), -INT8_C( 5), INT8_C( 58), -INT8_C( 96), -INT8_C( 126), INT8_C( 97), INT8_C( 20) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { INT8_C( 56), -INT8_C( 17), INT8_C( 96), INT8_C( 70), INT8_C( 84), -INT8_C( 79), -INT8_C( 94), -INT8_C( 108), -INT8_C( 36), INT8_C( 106), INT8_C( 93), INT8_C( 11), -INT8_C( 20), INT8_C( 47), -INT8_C( 112), -INT8_C( 58) }, { INT8_C( 73), -INT8_C( 6), INT8_C( 40), -INT8_C( 44), -INT8_C( 24), -INT8_C( 20), -INT8_C( 94), -INT8_C( 42), -INT8_C( 122), INT8_C( 71), INT8_C( 93), INT8_C( 11), -INT8_C( 114), -INT8_C( 77), -INT8_C( 122), -INT8_C( 58) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { INT8_C( 122), -INT8_C( 18), -INT8_C( 91), INT8_C( 99), INT8_C( 71), INT8_C( 71), INT8_C( 57), -INT8_C( 51), -INT8_C( 59), -INT8_C( 106), -INT8_C( 40), INT8_C( 76), INT8_C( 73), INT8_C( 84), INT8_C( 29), -INT8_C( 21) }, { INT8_C( 69), -INT8_C( 18), -INT8_C( 30), -INT8_C( 35), INT8_C( 106), INT8_C( 108), INT8_C( 81), INT8_C( 80), -INT8_C( 59), INT8_C( 65), INT8_C( 34), INT8_C( 76), -INT8_C( 63), INT8_C( 84), INT8_C( 29), INT8_C( 60) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_uint8x16_t r = simde_vcgeq_s8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_private a_ = simde_int8x16_to_private(simde_test_arm_neon_random_i8x16()); simde_int8x16_private b_ = simde_int8x16_to_private(simde_test_arm_neon_random_i8x16()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int8x16_t a = simde_int8x16_from_private(a_); simde_int8x16_t b = simde_int8x16_from_private(b_); simde_uint8x16_t r = simde_vcgeq_s8(a, b); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgeq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t b[8]; uint16_t r[8]; } test_vec[] = { { { INT16_C( 17214), -INT16_C( 27485), -INT16_C( 3533), INT16_C( 14277), INT16_C( 16736), -INT16_C( 26586), INT16_C( 16877), -INT16_C( 1335) }, { INT16_C( 17214), INT16_C( 27925), INT16_C( 25546), INT16_C( 15187), INT16_C( 6524), INT16_C( 24375), INT16_C( 21212), -INT16_C( 1335) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, { { -INT16_C( 26219), -INT16_C( 32169), INT16_C( 6874), INT16_C( 6484), INT16_C( 27230), INT16_C( 10374), -INT16_C( 22869), INT16_C( 18787) }, { -INT16_C( 25869), -INT16_C( 12376), INT16_C( 29421), -INT16_C( 2359), INT16_C( 22494), INT16_C( 15667), -INT16_C( 22869), INT16_C( 16892) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { -INT16_C( 18046), INT16_C( 20469), INT16_C( 22675), -INT16_C( 31080), INT16_C( 16627), -INT16_C( 8107), INT16_C( 15933), -INT16_C( 28202) }, { INT16_C( 2421), INT16_C( 8654), -INT16_C( 13392), -INT16_C( 3998), INT16_C( 9503), -INT16_C( 29430), INT16_C( 15933), -INT16_C( 16550) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { INT16_C( 26087), -INT16_C( 8336), -INT16_C( 13757), INT16_C( 15006), -INT16_C( 1702), -INT16_C( 15931), -INT16_C( 10925), -INT16_C( 7174) }, { INT16_C( 1441), -INT16_C( 8336), -INT16_C( 13757), INT16_C( 15006), -INT16_C( 21223), -INT16_C( 15931), -INT16_C( 10925), INT16_C( 14939) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { -INT16_C( 11785), -INT16_C( 17524), -INT16_C( 31155), -INT16_C( 4194), INT16_C( 3979), -INT16_C( 12594), INT16_C( 21626), -INT16_C( 3319) }, { -INT16_C( 12775), INT16_C( 28084), INT16_C( 4003), -INT16_C( 8537), INT16_C( 31717), INT16_C( 3229), INT16_C( 21626), INT16_C( 29070) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { INT16_C( 12506), -INT16_C( 19206), INT16_C( 925), -INT16_C( 18777), INT16_C( 24324), -INT16_C( 23074), INT16_C( 16682), INT16_C( 1489) }, { -INT16_C( 4026), -INT16_C( 16292), -INT16_C( 5563), INT16_C( 27442), INT16_C( 24324), -INT16_C( 23074), INT16_C( 16682), INT16_C( 1489) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { -INT16_C( 5957), INT16_C( 30310), INT16_C( 27059), -INT16_C( 25989), -INT16_C( 11687), -INT16_C( 24902), INT16_C( 28308), -INT16_C( 16119) }, { -INT16_C( 6069), INT16_C( 30310), INT16_C( 14121), -INT16_C( 25989), INT16_C( 13314), -INT16_C( 11863), INT16_C( 28308), INT16_C( 20338) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { INT16_C( 17947), INT16_C( 30952), INT16_C( 7348), -INT16_C( 2503), -INT16_C( 24614), -INT16_C( 27627), -INT16_C( 6186), -INT16_C( 9827) }, { INT16_C( 17947), -INT16_C( 20566), INT16_C( 7348), INT16_C( 3070), INT16_C( 29604), -INT16_C( 27627), INT16_C( 6493), INT16_C( 6622) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_uint16x8_t r = simde_vcgeq_s16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_private a_ = simde_int16x8_to_private(simde_test_arm_neon_random_i16x8()); simde_int16x8_private b_ = simde_int16x8_to_private(simde_test_arm_neon_random_i16x8()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int16x8_t a = simde_int16x8_from_private(a_); simde_int16x8_t b = simde_int16x8_from_private(b_); simde_uint16x8_t r = simde_vcgeq_s16(a, b); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgeq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t b[4]; uint32_t r[4]; } test_vec[] = { { { -INT32_C( 1532700866), INT32_C( 326959941), -INT32_C( 1401836417), -INT32_C( 433567921) }, { -INT32_C( 98180169), INT32_C( 326959941), -INT32_C( 1401836417), -INT32_C( 643440230) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { -INT32_C( 1473525636), -INT32_C( 1025524110), INT32_C( 1314168982), INT32_C( 817286634) }, { INT32_C( 1329804496), INT32_C( 2080093408), INT32_C( 1314168982), INT32_C( 1714475498) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { INT32_C( 533589530), -INT32_C( 522422794), INT32_C( 319853123), -INT32_C( 1486662714) }, { -INT32_C( 1641914872), -INT32_C( 370379009), -INT32_C( 531686073), -INT32_C( 1830068616) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_MAX } }, { { INT32_C( 1831737894), -INT32_C( 410700768), INT32_C( 692205788), INT32_C( 1160622150) }, { INT32_C( 1831737894), -INT32_C( 1353875913), INT32_C( 692205788), INT32_C( 544011505) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { INT32_C( 1267275460), INT32_C( 1171986430), -INT32_C( 309720889), -INT32_C( 43809991) }, { INT32_C( 319399735), -INT32_C( 784512032), -INT32_C( 1041127105), -INT32_C( 43809991) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { -INT32_C( 857922556), -INT32_C( 1946395055), INT32_C( 1470632992), INT32_C( 1360253261) }, { INT32_C( 997042428), -INT32_C( 1946395055), INT32_C( 2072535265), INT32_C( 1360253261) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { INT32_C( 2020726615), -INT32_C( 1496376853), -INT32_C( 951960885), -INT32_C( 1003480211) }, { -INT32_C( 272630003), -INT32_C( 1402321057), INT32_C( 251559193), -INT32_C( 1003480211) }, { UINT32_MAX, UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, { { -INT32_C( 2038166341), -INT32_C( 980723112), INT32_C( 1753922421), -INT32_C( 1351541680) }, { INT32_C( 2086460259), -INT32_C( 980723112), INT32_C( 1753922421), INT32_C( 407225949) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_uint32x4_t r = simde_vcgeq_s32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_private a_ = simde_int32x4_to_private(simde_test_arm_neon_random_i32x4()); simde_int32x4_private b_ = simde_int32x4_to_private(simde_test_arm_neon_random_i32x4()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int32x4_t a = simde_int32x4_from_private(a_); simde_int32x4_t b = simde_int32x4_from_private(b_); simde_uint32x4_t r = simde_vcgeq_s32(a, b); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgeq_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; int64_t b[2]; uint64_t r[2]; } test_vec[] = { { { -INT64_C( 6749822669995771752), -INT64_C( 3818101802221279798) }, { -INT64_C( 6305717332762332197), INT64_C( 7175660046940033834) }, { UINT64_C( 0), UINT64_C( 0) } }, { { INT64_C( 9028077517965231098), INT64_C( 7747673213297553935) }, { INT64_C( 9028077517965231098), INT64_C( 1061606484645582218) }, { UINT64_MAX, UINT64_MAX } }, { { -INT64_C( 5052448151738273918), INT64_C( 6997175910299228985) }, { -INT64_C( 5052448151738273918), INT64_C( 6997175910299228985) }, { UINT64_MAX, UINT64_MAX } }, { { INT64_C( 4769728672330431475), INT64_C( 2814510728028042588) }, { INT64_C( 4769728672330431475), -INT64_C( 6557647891631334546) }, { UINT64_MAX, UINT64_MAX } }, { { INT64_C( 832768351716102019), -INT64_C( 8268287527640553738) }, { -INT64_C( 5636969100111038727), -INT64_C( 1595533866079108353) }, { UINT64_MAX, UINT64_C( 0) } }, { { -INT64_C( 8590531313175455935), INT64_C( 6501900541919899210) }, { -INT64_C( 8590531313175455935), INT64_C( 6501900541919899210) }, { UINT64_MAX, UINT64_MAX } }, { { -INT64_C( 44138923088680662), INT64_C( 6471204320746746648) }, { -INT64_C( 5884813147232659619), -INT64_C( 1137101979037570025) }, { UINT64_MAX, UINT64_MAX } }, { { INT64_C( 1343769224458206736), INT64_C( 8143583866422456588) }, { -INT64_C( 8958862038161701909), INT64_C( 8143583866422456588) }, { UINT64_MAX, UINT64_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_uint64x2_t r = simde_vcgeq_s64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_private a_ = simde_int64x2_to_private(simde_test_arm_neon_random_i64x2()); simde_int64x2_private b_ = simde_int64x2_to_private(simde_test_arm_neon_random_i64x2()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int64x2_t a = simde_int64x2_from_private(a_); simde_int64x2_t b = simde_int64x2_from_private(b_); simde_uint64x2_t r = simde_vcgeq_s64(a, b); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgeq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C( 98), UINT8_C( 69), UINT8_C( 0), UINT8_C(252), UINT8_C( 36), UINT8_C(131), UINT8_C( 30), UINT8_C(216), UINT8_C( 60), UINT8_C( 12), UINT8_C(190), UINT8_C(154), UINT8_C(230), UINT8_C(105), UINT8_C(205), UINT8_C(119) }, { UINT8_C( 98), UINT8_C(214), UINT8_C( 49), UINT8_C( 72), UINT8_C( 36), UINT8_C( 54), UINT8_C(211), UINT8_C(157), UINT8_C(199), UINT8_C( 12), UINT8_C(178), UINT8_C(154), UINT8_C(160), UINT8_C(105), UINT8_C(205), UINT8_C( 70) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C(193), UINT8_C(217), UINT8_C(110), UINT8_C(114), UINT8_C( 36), UINT8_C(103), UINT8_MAX, UINT8_C(235), UINT8_C(176), UINT8_C( 8), UINT8_C(134), UINT8_C( 80), UINT8_C(159), UINT8_C( 83), UINT8_C(150), UINT8_C(180) }, { UINT8_C( 33), UINT8_C(217), UINT8_C(110), UINT8_C(114), UINT8_C( 58), UINT8_C(103), UINT8_MAX, UINT8_C(153), UINT8_C( 30), UINT8_C( 8), UINT8_C(223), UINT8_C( 68), UINT8_C(243), UINT8_C(156), UINT8_C(205), UINT8_C(180) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { UINT8_C(224), UINT8_C(172), UINT8_C( 6), UINT8_C(236), UINT8_C( 19), UINT8_C( 5), UINT8_C(133), UINT8_C( 49), UINT8_C( 14), UINT8_C(101), UINT8_C(118), UINT8_C( 1), UINT8_C( 1), UINT8_C(236), UINT8_C(252), UINT8_C(140) }, { UINT8_C(224), UINT8_C( 78), UINT8_C( 58), UINT8_C( 34), UINT8_C(107), UINT8_C(212), UINT8_C( 20), UINT8_C(189), UINT8_C(244), UINT8_C( 86), UINT8_C(174), UINT8_C(104), UINT8_C( 47), UINT8_C(236), UINT8_C(252), UINT8_C(225) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C( 27), UINT8_C(226), UINT8_C(153), UINT8_C(185), UINT8_C( 53), UINT8_C( 1), UINT8_C(207), UINT8_C( 73), UINT8_C( 4), UINT8_C(231), UINT8_C(167), UINT8_C( 51), UINT8_C(221), UINT8_C(173), UINT8_C( 21), UINT8_C(118) }, { UINT8_C(176), UINT8_C(226), UINT8_C( 35), UINT8_C(185), UINT8_C( 53), UINT8_C( 1), UINT8_C(207), UINT8_C(237), UINT8_C( 85), UINT8_C(231), UINT8_C(167), UINT8_C(236), UINT8_C(179), UINT8_C(236), UINT8_C( 99), UINT8_C(206) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(217), UINT8_C( 95), UINT8_C(210), UINT8_C( 15), UINT8_C(246), UINT8_C(161), UINT8_C(251), UINT8_C( 85), UINT8_C(130), UINT8_C(163), UINT8_C( 65), UINT8_C(235), UINT8_C(144), UINT8_C(165), UINT8_C( 10), UINT8_C(252) }, { UINT8_C(162), UINT8_C( 95), UINT8_C(189), UINT8_C( 76), UINT8_C(246), UINT8_C(199), UINT8_C(251), UINT8_C(127), UINT8_C(130), UINT8_C(221), UINT8_C(229), UINT8_C(235), UINT8_C(212), UINT8_C(193), UINT8_C( 4), UINT8_C(173) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { UINT8_C(181), UINT8_C( 2), UINT8_C( 68), UINT8_C(156), UINT8_C(110), UINT8_C( 91), UINT8_C( 27), UINT8_C(240), UINT8_C(179), UINT8_C( 0), UINT8_C(219), UINT8_C(152), UINT8_C(193), UINT8_C(223), UINT8_C( 80), UINT8_C(130) }, { UINT8_C(181), UINT8_C( 2), UINT8_C( 68), UINT8_C( 45), UINT8_C(187), UINT8_C( 91), UINT8_C( 45), UINT8_C( 23), UINT8_C(179), UINT8_C(105), UINT8_C( 4), UINT8_C(177), UINT8_C(175), UINT8_C(237), UINT8_C( 80), UINT8_C( 85) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { UINT8_C(224), UINT8_C(148), UINT8_C( 31), UINT8_C(130), UINT8_C(239), UINT8_C(188), UINT8_C(153), UINT8_C( 51), UINT8_C( 37), UINT8_C(107), UINT8_C( 83), UINT8_C(140), UINT8_C(138), UINT8_C(164), UINT8_C(238), UINT8_C( 30) }, { UINT8_C(224), UINT8_C( 27), UINT8_C( 31), UINT8_C( 4), UINT8_C( 39), UINT8_C( 17), UINT8_C(235), UINT8_C( 51), UINT8_C(223), UINT8_C(107), UINT8_C( 0), UINT8_C(140), UINT8_C( 48), UINT8_C( 80), UINT8_C(238), UINT8_C(247) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C(122), UINT8_C(120), UINT8_C(221), UINT8_C(161), UINT8_C( 91), UINT8_C( 83), UINT8_C(212), UINT8_C(104), UINT8_C( 28), UINT8_C(212), UINT8_C( 69), UINT8_C(216), UINT8_C( 37), UINT8_C(227), UINT8_C(216), UINT8_C( 9) }, { UINT8_C( 97), UINT8_C( 72), UINT8_C(221), UINT8_C(155), UINT8_C( 91), UINT8_C( 83), UINT8_C(251), UINT8_C( 11), UINT8_C( 28), UINT8_C( 48), UINT8_C( 69), UINT8_C(138), UINT8_C(143), UINT8_C(158), UINT8_C(216), UINT8_C( 9) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vcgeq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_private a_ = simde_uint8x16_to_private(simde_test_arm_neon_random_u8x16()); simde_uint8x16_private b_ = simde_uint8x16_to_private(simde_test_arm_neon_random_u8x16()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint8x16_t a = simde_uint8x16_from_private(a_); simde_uint8x16_t b = simde_uint8x16_from_private(b_); simde_uint8x16_t r = simde_vcgeq_u8(a, b); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgeq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(45482), UINT16_C(64499), UINT16_C(25037), UINT16_C(12580), UINT16_C(59654), UINT16_C(35892), UINT16_C( 2294), UINT16_C(31274) }, { UINT16_C(64286), UINT16_C(46359), UINT16_C(19027), UINT16_C(18590), UINT16_C( 9601), UINT16_C(44740), UINT16_C(52996), UINT16_C(44680) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(46007), UINT16_C(22200), UINT16_C(39355), UINT16_C(17668), UINT16_C(16277), UINT16_C(59534), UINT16_C( 4732), UINT16_C( 2608) }, { UINT16_C(62802), UINT16_C(22200), UINT16_C(16836), UINT16_C(17668), UINT16_C(44732), UINT16_C(39315), UINT16_C( 4732), UINT16_C(13435) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { UINT16_C(59721), UINT16_C(54017), UINT16_C(12565), UINT16_C(39969), UINT16_C(38438), UINT16_C( 5661), UINT16_C(49879), UINT16_C(37680) }, { UINT16_C(50288), UINT16_C(60716), UINT16_C(43222), UINT16_C(39969), UINT16_C( 914), UINT16_C( 5661), UINT16_C(30477), UINT16_C(22063) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(39837), UINT16_C(29811), UINT16_C(46191), UINT16_C(52487), UINT16_C(13416), UINT16_C(18245), UINT16_C(56284), UINT16_C( 4047) }, { UINT16_C(63454), UINT16_C(60293), UINT16_C(46191), UINT16_C(53058), UINT16_C(27621), UINT16_C(18245), UINT16_C( 9074), UINT16_C( 4047) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(41919), UINT16_C(39821), UINT16_C(15878), UINT16_C(23817), UINT16_C(64385), UINT16_C(52808), UINT16_C(35395), UINT16_C(10398) }, { UINT16_C(58358), UINT16_C(26735), UINT16_C(15878), UINT16_C(50296), UINT16_C(64385), UINT16_C(26847), UINT16_C(51334), UINT16_C(17847) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(36728), UINT16_C(48091), UINT16_C(31001), UINT16_C(15031), UINT16_C(21084), UINT16_C(25464), UINT16_C(61585), UINT16_C( 4647) }, { UINT16_C( 2027), UINT16_C(29307), UINT16_C(13007), UINT16_C(15031), UINT16_C(39031), UINT16_C( 9251), UINT16_C(27266), UINT16_C(64049) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { UINT16_C(39660), UINT16_C(32068), UINT16_C(27786), UINT16_C(30095), UINT16_C( 2675), UINT16_C(17127), UINT16_C(14083), UINT16_C(46204) }, { UINT16_C(40759), UINT16_C(47577), UINT16_C( 2569), UINT16_C( 691), UINT16_C(26902), UINT16_C(39701), UINT16_C(14083), UINT16_C(61308) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { UINT16_C(47110), UINT16_C(17378), UINT16_C( 2133), UINT16_C(36599), UINT16_C(53501), UINT16_C( 1863), UINT16_C(21181), UINT16_C(50070) }, { UINT16_C( 7780), UINT16_C(26507), UINT16_C( 2133), UINT16_C( 9814), UINT16_C(49865), UINT16_C(63105), UINT16_C(21181), UINT16_C(50070) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vcgeq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_private a_ = simde_uint16x8_to_private(simde_test_arm_neon_random_u16x8()); simde_uint16x8_private b_ = simde_uint16x8_to_private(simde_test_arm_neon_random_u16x8()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint16x8_t a = simde_uint16x8_from_private(a_); simde_uint16x8_t b = simde_uint16x8_from_private(b_); simde_uint16x8_t r = simde_vcgeq_u16(a, b); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgeq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(2162069647), UINT32_C(3754241403), UINT32_C(3789104306), UINT32_C(2344517141) }, { UINT32_C(1058982851), UINT32_C(1223602262), UINT32_C(3789104306), UINT32_C(3709876814) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C( 858071851), UINT32_C(1863283233), UINT32_C(3484233484), UINT32_C(1488824877) }, { UINT32_C( 765066619), UINT32_C(1863283233), UINT32_C(2001547181), UINT32_C(1488824877) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { UINT32_C( 803876226), UINT32_C( 625846571), UINT32_C(4034288757), UINT32_C(2048790361) }, { UINT32_C( 803876226), UINT32_C(2309371740), UINT32_C(2950784225), UINT32_C(3096013895) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C(2763555887), UINT32_C( 966078432), UINT32_C(2063199762), UINT32_C(1022336480) }, { UINT32_C(3066402516), UINT32_C(1097180922), UINT32_C(2063199762), UINT32_C(2752327796) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C( 699665687), UINT32_C(1895006352), UINT32_C(4071416606), UINT32_C(1940418937) }, { UINT32_C( 699665687), UINT32_C(1889775100), UINT32_C(2434052317), UINT32_C(1780833654) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { UINT32_C(2436600179), UINT32_C(1680915182), UINT32_C(1389319703), UINT32_C(3139267656) }, { UINT32_C(2678472385), UINT32_C(1680915182), UINT32_C(1389319703), UINT32_C(3139267656) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { UINT32_C(2589904960), UINT32_C( 889962236), UINT32_C(2490082770), UINT32_C(3526562275) }, { UINT32_C(3811992523), UINT32_C( 37029305), UINT32_C(1874678386), UINT32_C(3957394090) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C(2578869031), UINT32_C( 426014261), UINT32_C(1407948679), UINT32_C(3023446523) }, { UINT32_C(2578869031), UINT32_C(1728607421), UINT32_C(1481828734), UINT32_C( 657381161) }, { UINT32_MAX, UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vcgeq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_private a_ = simde_uint32x4_to_private(simde_test_arm_neon_random_u32x4()); simde_uint32x4_private b_ = simde_uint32x4_to_private(simde_test_arm_neon_random_u32x4()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint32x4_t a = simde_uint32x4_from_private(a_); simde_uint32x4_t b = simde_uint32x4_from_private(b_); simde_uint32x4_t r = simde_vcgeq_u32(a, b); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgeq_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[2]; uint64_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C(14562937709581382826), UINT64_C(14699337621818676983) }, { UINT64_C(14562937709581382826), UINT64_C(14699337621818676983) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C( 5459681504497828234), UINT64_C(14236187640464043906) }, { UINT64_C( 2432150606451133712), UINT64_C(14236187640464043906) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C(14476163272494907547), UINT64_C(10735033187440786193) }, { UINT64_C(15985759095115223721), UINT64_C( 8346033058204750676) }, { UINT64_C( 0), UINT64_MAX } }, { { UINT64_C(10714954279935368403), UINT64_C( 7414258787975201239) }, { UINT64_C(16866699637832619814), UINT64_C( 3581921216148728434) }, { UINT64_C( 0), UINT64_MAX } }, { { UINT64_C( 8400006579740446870), UINT64_C(15176171529956307873) }, { UINT64_C( 8400006579740446870), UINT64_C( 763894644646866966) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C(13328124129486869158), UINT64_C(16401399820575060595) }, { UINT64_C(11678300727327993742), UINT64_C(16401399820575060595) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C( 6642394988694951079), UINT64_C(16397272341211680939) }, { UINT64_C( 6716273130845450993), UINT64_C(18205208125254365450) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C(16454700178904663451), UINT64_C( 1942922326156028726) }, { UINT64_C(18396398711276686247), UINT64_C( 4185001424853443275) }, { UINT64_C( 0), UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_uint64x2_t r = simde_vcgeq_u64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_private a_ = simde_uint64x2_to_private(simde_test_arm_neon_random_u64x2()); simde_uint64x2_private b_ = simde_uint64x2_to_private(simde_test_arm_neon_random_u64x2()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint64x2_t a = simde_uint64x2_from_private(a_); simde_uint64x2_t b = simde_uint64x2_from_private(b_); simde_uint64x2_t r = simde_vcgeq_u64(a, b); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vcge_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcge_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcge_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vcge_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vcge_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vcge_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vcge_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vcge_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vcge_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vcge_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vcgeq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcgeq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcgeq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vcgeq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vcgeq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vcgeq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vcgeq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vcgeq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vcgeq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vcgeq_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/cgez.c000066400000000000000000000767721400333146700164360ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN cgez #include "test-neon.h" /* Check that both of these work */ #if defined(__cplusplus) #include "../../../simde/arm/neon/cgez.h" #else #include "../../../simde/arm/neon.h" #endif static int test_simde_vcgez_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; uint32_t r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( -133.39), SIMDE_FLOAT32_C( 805.67) }, { UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( -910.74), SIMDE_FLOAT32_C( 710.63) }, { UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 230.14) }, { UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 994.82), SIMDE_FLOAT32_C( -639.98) }, { UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -48.27), SIMDE_FLOAT32_C( 0.00) }, { UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( 98.24), SIMDE_FLOAT32_C( 0.00) }, { UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( -63.24), SIMDE_FLOAT32_C( 224.59) }, { UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 43.88) }, { UINT32_MAX, UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_uint32x2_t r = simde_vcgez_f32(a); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x2_private a_ = simde_float32x2_to_private(simde_test_arm_neon_random_f32x2(-1000.0, 1000.0)); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0.0; } simde_float32x2_t a = simde_float32x2_from_private(a_); simde_uint32x2_t r = simde_vcgez_f32(a); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgez_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[1]; uint64_t r[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( 625.39) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( 786.84) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( 381.01) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( 0.00) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( -664.71) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 389.46) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( -675.33) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 539.16) }, { UINT64_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_uint64x1_t r = simde_vcgez_f64(a); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x1_private a_ = simde_float64x1_to_private(simde_test_arm_neon_random_f64x1(-1000.0, 1000.0)); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0.0; } simde_float64x1_t a = simde_float64x1_from_private(a_); simde_uint64x1_t r = simde_vcgez_f64(a); simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgez_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; uint8_t r[8]; } test_vec[] = { { { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 64), INT8_C( 0), -INT8_C( 105), -INT8_C( 79) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { INT8_C( 55), INT8_C( 117), -INT8_C( 108), -INT8_C( 70), INT8_C( 111), INT8_C( 3), INT8_C( 0), -INT8_C( 7) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { INT8_C( 98), -INT8_C( 116), INT8_C( 0), INT8_C( 34), INT8_C( 115), INT8_C( 0), -INT8_C( 45), INT8_C( 47) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, { { -INT8_C( 28), INT8_C( 5), INT8_C( 22), INT8_C( 84), INT8_C( 0), -INT8_C( 92), INT8_C( 0), INT8_C( 77) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { INT8_C( 0), -INT8_C( 123), -INT8_C( 85), -INT8_C( 5), INT8_C( 15), INT8_C( 126), INT8_C( 42), -INT8_C( 63) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { INT8_C( 118), INT8_C( 0), INT8_C( 0), INT8_MAX, INT8_C( 115), INT8_C( 3), -INT8_C( 52), INT8_C( 0) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, { { -INT8_C( 21), INT8_C( 0), INT8_C( 89), INT8_C( 0), -INT8_C( 91), -INT8_C( 125), INT8_C( 0), INT8_C( 0) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { INT8_C( 0), INT8_C( 41), INT8_C( 0), INT8_C( 0), INT8_C( 45), -INT8_C( 17), INT8_C( 0), INT8_C( 0) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_uint8x8_t r = simde_vcgez_s8(a); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_private a_ = simde_int8x8_to_private(simde_test_arm_neon_random_i8x8()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0; } simde_int8x8_t a = simde_int8x8_from_private(a_); simde_uint8x8_t r = simde_vcgez_s8(a); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgez_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; uint16_t r[4]; } test_vec[] = { { { INT16_C( 0), -INT16_C( 6705), INT16_C( 23697), -INT16_C( 17451) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { INT16_C( 23201), -INT16_C( 17829), INT16_C( 0), -INT16_C( 7222) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { INT16_C( 0), -INT16_C( 30718), -INT16_C( 29104), INT16_C( 0) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, { { -INT16_C( 13514), -INT16_C( 26536), INT16_C( 11746), -INT16_C( 31865) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { INT16_C( 2133), INT16_C( 0), -INT16_C( 19825), INT16_C( 7246) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { INT16_C( 0), -INT16_C( 5394), -INT16_C( 6774), -INT16_C( 16249) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { -INT16_C( 8180), -INT16_C( 27882), INT16_C( 21699), INT16_C( 6165) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { INT16_C( 0), -INT16_C( 17912), INT16_C( 0), -INT16_C( 1385) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_uint16x4_t r = simde_vcgez_s16(a); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_private a_ = simde_int16x4_to_private(simde_test_arm_neon_random_i16x4()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0; } simde_int16x4_t a = simde_int16x4_from_private(a_); simde_uint16x4_t r = simde_vcgez_s16(a); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgez_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; uint32_t r[2]; } test_vec[] = { { { INT32_C( 0), -INT32_C( 190514591) }, { UINT32_MAX, UINT32_C( 0) } }, { { INT32_C( 0), -INT32_C( 1205804542) }, { UINT32_MAX, UINT32_C( 0) } }, { { INT32_C( 2075374757), INT32_C( 0) }, { UINT32_MAX, UINT32_MAX } }, { { INT32_C( 434605318), INT32_C( 1427327409) }, { UINT32_MAX, UINT32_MAX } }, { { INT32_C( 0), INT32_C( 1377343026) }, { UINT32_MAX, UINT32_MAX } }, { { INT32_C( 1046803595), INT32_C( 1317271152) }, { UINT32_MAX, UINT32_MAX } }, { { -INT32_C( 135390449), INT32_C( 0) }, { UINT32_C( 0), UINT32_MAX } }, { { INT32_C( 0), INT32_C( 0) }, { UINT32_MAX, UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_uint32x2_t r = simde_vcgez_s32(a); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_private a_ = simde_int32x2_to_private(simde_test_arm_neon_random_i32x2()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0; } simde_int32x2_t a = simde_int32x2_from_private(a_); simde_uint32x2_t r = simde_vcgez_s32(a); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgez_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[1]; uint64_t r[1]; } test_vec[] = { { { INT64_C( 0) }, { UINT64_MAX } }, { { INT64_C( 7153887103871360744) }, { UINT64_MAX } }, { { -INT64_C( 264543239347917471) }, { UINT64_C( 0) } }, { { INT64_C( 0) }, { UINT64_MAX } }, { { -INT64_C( 3851070515776987902) }, { UINT64_C( 0) } }, { { -INT64_C( 4491853047625019068) }, { UINT64_C( 0) } }, { { INT64_C( 0) }, { UINT64_MAX } }, { { -INT64_C( 3025692413433232025) }, { UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_uint64x1_t r = simde_vcgez_s64(a); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_private a_ = simde_int64x1_to_private(simde_test_arm_neon_random_i64x1()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0; } simde_int64x1_t a = simde_int64x1_from_private(a_); simde_uint64x1_t r = simde_vcgez_s64(a); simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgezq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; uint32_t r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -369.77), SIMDE_FLOAT32_C( 0.00) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -41.03), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 522.70), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 98.50), SIMDE_FLOAT32_C( 454.13), SIMDE_FLOAT32_C( 842.39), SIMDE_FLOAT32_C( 35.69) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 843.79), SIMDE_FLOAT32_C( -966.00), SIMDE_FLOAT32_C( 39.60) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( 717.22), SIMDE_FLOAT32_C( 165.47), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 907.07) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -252.33), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 684.67) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 57.60), SIMDE_FLOAT32_C( 844.67), SIMDE_FLOAT32_C( -357.97), SIMDE_FLOAT32_C( -478.05) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_uint32x4_t r = simde_vcgezq_f32(a); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_private a_ = simde_float32x4_to_private(simde_test_arm_neon_random_f32x4(-1000.0, 1000.0)); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0.0; } simde_float32x4_t a = simde_float32x4_from_private(a_); simde_uint32x4_t r = simde_vcgezq_f32(a); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgezq_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[2]; uint64_t r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 254.72), SIMDE_FLOAT64_C( 721.02) }, { UINT64_MAX, UINT64_MAX } }, { { SIMDE_FLOAT64_C( -171.93), SIMDE_FLOAT64_C( 0.00) }, { UINT64_C( 0), UINT64_MAX } }, { { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 341.16) }, { UINT64_MAX, UINT64_MAX } }, { { SIMDE_FLOAT64_C( 18.66), SIMDE_FLOAT64_C( 0.00) }, { UINT64_MAX, UINT64_MAX } }, { { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) }, { UINT64_MAX, UINT64_MAX } }, { { SIMDE_FLOAT64_C( -339.50), SIMDE_FLOAT64_C( 60.07) }, { UINT64_C( 0), UINT64_MAX } }, { { SIMDE_FLOAT64_C( 298.41), SIMDE_FLOAT64_C( -244.46) }, { UINT64_MAX, UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -617.84), SIMDE_FLOAT64_C( -233.87) }, { UINT64_C( 0), UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_uint64x2_t r = simde_vcgezq_f64(a); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x2_private a_ = simde_float64x2_to_private(simde_test_arm_neon_random_f64x2(-1000.0, 1000.0)); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0.0; } simde_float64x2_t a = simde_float64x2_from_private(a_); simde_uint64x2_t r = simde_vcgezq_f64(a); simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgezq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; uint8_t r[16]; } test_vec[] = { { { -INT8_C( 124), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 29), INT8_C( 49), INT8_C( 0), -INT8_C( 127), -INT8_C( 73), INT8_C( 45), -INT8_C( 117), INT8_C( 37), -INT8_C( 101), INT8_C( 10), INT8_C( 23), INT8_C( 93) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { -INT8_C( 106), INT8_C( 0), -INT8_C( 86), INT8_C( 0), -INT8_C( 5), INT8_C( 0), INT8_C( 53), -INT8_C( 78), INT8_C( 0), -INT8_C( 64), -INT8_C( 40), INT8_C( 95), -INT8_C( 54), -INT8_C( 17), -INT8_C( 68), INT8_C( 0) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { INT8_C( 36), -INT8_C( 86), -INT8_C( 57), INT8_C( 31), INT8_C( 64), INT8_C( 0), -INT8_C( 46), INT8_C( 0), INT8_C( 0), -INT8_C( 86), INT8_C( 100), -INT8_C( 122), -INT8_C( 103), INT8_C( 32), INT8_C( 93), -INT8_C( 121) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { -INT8_C( 70), -INT8_C( 108), INT8_C( 60), -INT8_C( 5), -INT8_C( 112), INT8_C( 14), -INT8_C( 1), INT8_C( 76), INT8_C( 0), INT8_C( 0), -INT8_C( 46), INT8_C( 0), -INT8_C( 124), INT8_C( 47), INT8_C( 0), INT8_C( 1) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 58), -INT8_C( 64), -INT8_C( 34), INT8_C( 0), INT8_C( 35), -INT8_C( 80), INT8_C( 0), -INT8_C( 89), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 72) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { INT8_C( 0), INT8_C( 40), -INT8_C( 91), INT8_C( 0), -INT8_C( 24), -INT8_C( 125), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 32), -INT8_C( 77), INT8_C( 0), -INT8_C( 3), INT8_C( 92), INT8_C( 0), INT8_C( 0) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { -INT8_C( 26), -INT8_C( 5), INT8_C( 0), -INT8_C( 50), INT8_C( 126), INT8_C( 71), INT8_C( 0), -INT8_C( 78), INT8_C( 39), -INT8_C( 114), -INT8_C( 59), INT8_C( 36), -INT8_C( 22), -INT8_C( 112), -INT8_C( 109), INT8_C( 0) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { INT8_C( 0), -INT8_C( 75), -INT8_C( 92), -INT8_C( 20), -INT8_C( 4), INT8_MAX, INT8_C( 0), INT8_C( 36), INT8_C( 13), INT8_C( 99), INT8_C( 0), INT8_C( 0), -INT8_C( 13), INT8_C( 0), INT8_C( 0), INT8_C( 0) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_uint8x16_t r = simde_vcgezq_s8(a); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_private a_ = simde_int8x16_to_private(simde_test_arm_neon_random_i8x16()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0; } simde_int8x16_t a = simde_int8x16_from_private(a_); simde_uint8x16_t r = simde_vcgezq_s8(a); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgezq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; uint16_t r[8]; } test_vec[] = { { { -INT16_C( 15226), INT16_C( 0), -INT16_C( 1431), INT16_C( 0), INT16_C( 6849), INT16_C( 0), -INT16_C( 8036), -INT16_C( 15990) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { INT16_C( 0), INT16_C( 1445), INT16_C( 0), INT16_C( 0), -INT16_C( 3775), -INT16_C( 21848), INT16_C( 14315), -INT16_C( 21071) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { INT16_C( 892), -INT16_C( 26999), -INT16_C( 21484), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 12800) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { -INT16_C( 19489), INT16_C( 4382), -INT16_C( 13092), INT16_C( 0), INT16_C( 29391), -INT16_C( 7185), INT16_C( 0), INT16_C( 0) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { INT16_C( 12686), INT16_C( 28368), -INT16_C( 6977), INT16_C( 0), INT16_C( 0), INT16_C( 29616), INT16_C( 0), INT16_C( 22732) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { INT16_C( 0), INT16_C( 11980), INT16_C( 0), -INT16_C( 21865), INT16_C( 26450), INT16_C( 4376), -INT16_C( 18613), -INT16_C( 7504) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { INT16_C( 0), INT16_C( 22064), -INT16_C( 32658), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 16253) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { INT16_C( 23905), INT16_C( 23484), INT16_C( 0), INT16_C( 23649), -INT16_C( 27998), INT16_C( 4275), -INT16_C( 19694), INT16_C( 0) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_uint16x8_t r = simde_vcgezq_s16(a); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_private a_ = simde_int16x8_to_private(simde_test_arm_neon_random_i16x8()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0; } simde_int16x8_t a = simde_int16x8_from_private(a_); simde_uint16x8_t r = simde_vcgezq_s16(a); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgezq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; uint32_t r[4]; } test_vec[] = { { { INT32_C( 0), INT32_C( 1643825484), INT32_C( 612391850), INT32_C( 0) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { INT32_C( 0), INT32_C( 1593239142), INT32_C( 2135441628), -INT32_C( 1324263067) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, { { INT32_C( 1676776038), INT32_C( 0), INT32_C( 0), INT32_C( 0) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { -INT32_C( 1927125720), -INT32_C( 532794434), -INT32_C( 1532145602), -INT32_C( 2146984222) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { INT32_C( 1817723155), -INT32_C( 399816415), INT32_C( 1502300209), INT32_C( 0) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_MAX } }, { { INT32_C( 0), INT32_C( 81185431), -INT32_C( 1086427221), INT32_C( 0) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { INT32_C( 1298875807), INT32_C( 0), -INT32_C( 353237898), INT32_C( 0) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { INT32_C( 921360262), INT32_C( 1292375405), INT32_C( 207602284), -INT32_C( 94778873) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_uint32x4_t r = simde_vcgezq_s32(a); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_private a_ = simde_int32x4_to_private(simde_test_arm_neon_random_i32x4()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0; } simde_int32x4_t a = simde_int32x4_from_private(a_); simde_uint32x4_t r = simde_vcgezq_s32(a); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgezq_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; uint64_t r[2]; } test_vec[] = { { { INT64_C( 0), INT64_C( 0) }, { UINT64_MAX, UINT64_MAX } }, { { -INT64_C( 6109154277218944666), -INT64_C( 7057256154138836602) }, { UINT64_C( 0), UINT64_C( 0) } }, { { INT64_C( 5359920317727548829), INT64_C( 5134123335401800481) }, { UINT64_MAX, UINT64_MAX } }, { { INT64_C( 3092872602263862579), -INT64_C( 2393269223996982099) }, { UINT64_MAX, UINT64_C( 0) } }, { { INT64_C( 0), -INT64_C( 7911487429770654735) }, { UINT64_MAX, UINT64_C( 0) } }, { { -INT64_C( 5369868132634761559), INT64_C( 7133338072004117451) }, { UINT64_C( 0), UINT64_MAX } }, { { INT64_C( 7397759754347610690), -INT64_C( 1242815026868658465) }, { UINT64_MAX, UINT64_C( 0) } }, { { INT64_C( 0), -INT64_C( 5757137311778680288) }, { UINT64_MAX, UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_uint64x2_t r = simde_vcgezq_s64(a); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_private a_ = simde_int64x2_to_private(simde_test_arm_neon_random_i64x2()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0; } simde_int64x2_t a = simde_int64x2_from_private(a_); simde_uint64x2_t r = simde_vcgezq_s64(a); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vcgez_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcgez_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcgez_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vcgez_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vcgez_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vcgez_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vcgezq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcgezq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcgezq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vcgezq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vcgezq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vcgezq_s64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/cgt.c000066400000000000000000002273121400333146700162470ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN cgt #include "test-neon.h" /* Check that both of these work */ #if defined(__cplusplus) #include "../../../simde/arm/neon/cgt.h" #else #include "../../../simde/arm/neon.h" #endif static int test_simde_vcgt_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; simde_float32 b[2]; uint32_t r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( -594.36), SIMDE_FLOAT32_C( -430.71) }, { SIMDE_FLOAT32_C( 66.10), SIMDE_FLOAT32_C( 318.22) }, { UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 895.57), SIMDE_FLOAT32_C( -28.91) }, { SIMDE_FLOAT32_C( 945.51), SIMDE_FLOAT32_C( -28.91) }, { UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -484.25), SIMDE_FLOAT32_C( -208.16) }, { SIMDE_FLOAT32_C( 869.60), SIMDE_FLOAT32_C( -208.16) }, { UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -169.59), SIMDE_FLOAT32_C( 957.55) }, { SIMDE_FLOAT32_C( -169.59), SIMDE_FLOAT32_C( 126.36) }, { UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( 853.75), SIMDE_FLOAT32_C( -413.58) }, { SIMDE_FLOAT32_C( 784.06), SIMDE_FLOAT32_C( -413.58) }, { UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -413.85), SIMDE_FLOAT32_C( 334.07) }, { SIMDE_FLOAT32_C( -413.85), SIMDE_FLOAT32_C( -621.67) }, { UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( -664.52), SIMDE_FLOAT32_C( -452.14) }, { SIMDE_FLOAT32_C( -360.16), SIMDE_FLOAT32_C( -719.02) }, { UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( -452.99), SIMDE_FLOAT32_C( -583.39) }, { SIMDE_FLOAT32_C( 29.10), SIMDE_FLOAT32_C( -583.39) }, { UINT32_C( 0), UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_uint32x2_t r = simde_vcgt_f32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x2_private a_ = simde_float32x2_to_private(simde_test_arm_neon_random_f32x2(-1000.0, 1000.0)); simde_float32x2_private b_ = simde_float32x2_to_private(simde_test_arm_neon_random_f32x2(-1000.0, 1000.0)); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_float32x2_t a = simde_float32x2_from_private(a_); simde_float32x2_t b = simde_float32x2_from_private(b_); simde_uint32x2_t r = simde_vcgt_f32(a, b); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgt_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[1]; simde_float64 b[1]; uint64_t r[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( -24.01) }, { SIMDE_FLOAT64_C( -24.01) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -765.41) }, { SIMDE_FLOAT64_C( -856.24) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( -723.94) }, { SIMDE_FLOAT64_C( 639.04) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -780.40) }, { SIMDE_FLOAT64_C( -780.40) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 140.87) }, { SIMDE_FLOAT64_C( 140.87) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -29.24) }, { SIMDE_FLOAT64_C( -30.09) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( -911.69) }, { SIMDE_FLOAT64_C( -396.28) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 20.33) }, { SIMDE_FLOAT64_C( 20.33) }, { UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_t b = simde_vld1_f64(test_vec[i].b); simde_uint64x1_t r = simde_vcgt_f64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x1_private a_ = simde_float64x1_to_private(simde_test_arm_neon_random_f64x1(-1000.0, 1000.0)); simde_float64x1_private b_ = simde_float64x1_to_private(simde_test_arm_neon_random_f64x1(-1000.0, 1000.0)); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_float64x1_t a = simde_float64x1_from_private(a_); simde_float64x1_t b = simde_float64x1_from_private(b_); simde_uint64x1_t r = simde_vcgt_f64(a, b); simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgt_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t b[8]; uint8_t r[8]; } test_vec[] = { { { -INT8_C( 20), INT8_C( 80), -INT8_C( 110), INT8_MIN, -INT8_C( 123), -INT8_C( 56), -INT8_C( 33), INT8_C( 11) }, { -INT8_C( 39), INT8_C( 19), -INT8_C( 53), -INT8_C( 86), -INT8_C( 123), -INT8_C( 23), -INT8_C( 33), -INT8_C( 82) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { -INT8_C( 2), INT8_C( 61), -INT8_C( 85), INT8_C( 50), INT8_C( 5), -INT8_C( 19), -INT8_C( 85), -INT8_C( 79) }, { -INT8_C( 38), INT8_C( 61), INT8_C( 49), INT8_C( 2), INT8_C( 5), -INT8_C( 19), INT8_C( 13), -INT8_C( 34) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { INT8_C( 59), -INT8_C( 118), INT8_C( 122), INT8_C( 54), INT8_C( 74), -INT8_C( 67), -INT8_C( 34), INT8_C( 72) }, { INT8_C( 113), -INT8_C( 118), INT8_C( 122), INT8_C( 54), INT8_C( 19), INT8_C( 37), -INT8_C( 25), -INT8_C( 19) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { INT8_MIN, -INT8_C( 49), -INT8_C( 115), -INT8_C( 103), INT8_C( 40), -INT8_C( 62), INT8_C( 17), -INT8_C( 103) }, { INT8_MIN, INT8_C( 107), INT8_C( 97), -INT8_C( 54), INT8_C( 40), INT8_C( 64), INT8_C( 19), -INT8_C( 103) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { -INT8_C( 50), -INT8_C( 69), INT8_C( 125), -INT8_C( 55), -INT8_C( 72), -INT8_C( 61), INT8_C( 59), -INT8_C( 113) }, { -INT8_C( 110), INT8_C( 105), INT8_C( 40), -INT8_C( 55), INT8_C( 43), INT8_C( 57), INT8_C( 59), -INT8_C( 85) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { INT8_C( 22), INT8_C( 52), -INT8_C( 124), -INT8_C( 55), -INT8_C( 22), INT8_C( 105), -INT8_C( 33), -INT8_C( 79) }, { INT8_C( 10), INT8_C( 92), -INT8_C( 115), -INT8_C( 61), INT8_C( 31), INT8_C( 105), INT8_C( 82), -INT8_C( 79) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { INT8_C( 82), INT8_C( 30), -INT8_C( 121), -INT8_C( 87), -INT8_C( 89), -INT8_C( 118), -INT8_C( 42), -INT8_C( 75) }, { -INT8_C( 66), INT8_C( 91), -INT8_C( 121), -INT8_C( 87), -INT8_C( 86), INT8_C( 102), INT8_C( 98), -INT8_C( 75) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { -INT8_C( 18), INT8_C( 116), INT8_C( 39), -INT8_C( 9), -INT8_C( 2), INT8_C( 26), INT8_C( 84), INT8_C( 20) }, { -INT8_C( 18), INT8_C( 116), INT8_C( 68), -INT8_C( 106), -INT8_C( 2), INT8_C( 26), INT8_C( 84), -INT8_C( 68) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_uint8x8_t r = simde_vcgt_s8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_private a_ = simde_int8x8_to_private(simde_test_arm_neon_random_i8x8()); simde_int8x8_private b_ = simde_int8x8_to_private(simde_test_arm_neon_random_i8x8()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int8x8_t a = simde_int8x8_from_private(a_); simde_int8x8_t b = simde_int8x8_from_private(b_); simde_uint8x8_t r = simde_vcgt_s8(a, b); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgt_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t b[4]; uint16_t r[4]; } test_vec[] = { { { -INT16_C( 28683), INT16_C( 18521), INT16_C( 16553), -INT16_C( 1209) }, { INT16_C( 26074), INT16_C( 26696), INT16_C( 16553), -INT16_C( 1209) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { INT16_C( 31423), INT16_C( 8947), INT16_C( 30730), -INT16_C( 18190) }, { INT16_C( 31423), -INT16_C( 19425), INT16_C( 30730), INT16_C( 765) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { INT16_C( 12592), -INT16_C( 9732), -INT16_C( 5087), INT16_C( 12500) }, { INT16_C( 32559), -INT16_C( 15368), -INT16_C( 5087), INT16_C( 12517) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { INT16_C( 18406), INT16_C( 23582), -INT16_C( 13959), -INT16_C( 5393) }, { INT16_C( 18406), INT16_C( 5837), -INT16_C( 13959), -INT16_C( 5393) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { INT16_C( 4930), INT16_C( 25599), -INT16_C( 6913), -INT16_C( 21868) }, { INT16_C( 31932), INT16_C( 3860), INT16_C( 12932), INT16_C( 1387) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { -INT16_C( 28676), INT16_C( 15901), INT16_C( 14804), -INT16_C( 7819) }, { -INT16_C( 28676), INT16_C( 15901), INT16_C( 7330), -INT16_C( 24158) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { INT16_C( 31233), INT16_C( 4884), INT16_C( 24198), -INT16_C( 8132) }, { INT16_C( 31233), INT16_C( 4884), INT16_C( 24198), INT16_C( 23197) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { INT16_C( 22690), INT16_C( 23602), INT16_C( 29556), INT16_C( 9107) }, { INT16_C( 12713), INT16_C( 23602), -INT16_C( 367), INT16_C( 9107) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_uint16x4_t r = simde_vcgt_s16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_private a_ = simde_int16x4_to_private(simde_test_arm_neon_random_i16x4()); simde_int16x4_private b_ = simde_int16x4_to_private(simde_test_arm_neon_random_i16x4()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int16x4_t a = simde_int16x4_from_private(a_); simde_int16x4_t b = simde_int16x4_from_private(b_); simde_uint16x4_t r = simde_vcgt_s16(a, b); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgt_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t b[2]; uint32_t r[2]; } test_vec[] = { { { INT32_C( 1272032729), INT32_C( 1940613686) }, { -INT32_C( 1339384298), -INT32_C( 71380318) }, { UINT32_MAX, UINT32_MAX } }, { { -INT32_C( 510544776), INT32_C( 47620657) }, { INT32_C( 701037432), INT32_C( 47620657) }, { UINT32_C( 0), UINT32_C( 0) } }, { { -INT32_C( 2105501588), -INT32_C( 1020376014) }, { INT32_C( 2060775804), -INT32_C( 1020376014) }, { UINT32_C( 0), UINT32_C( 0) } }, { { INT32_C( 541360295), -INT32_C( 1405936852) }, { -INT32_C( 1683021906), -INT32_C( 1405936852) }, { UINT32_MAX, UINT32_C( 0) } }, { { -INT32_C( 750530473), INT32_C( 1846632234) }, { -INT32_C( 1850246045), INT32_C( 1846632234) }, { UINT32_MAX, UINT32_C( 0) } }, { { INT32_C( 691652730), -INT32_C( 1519249566) }, { -INT32_C( 1581189117), -INT32_C( 1519249566) }, { UINT32_MAX, UINT32_C( 0) } }, { { INT32_C( 522664124), -INT32_C( 558833740) }, { -INT32_C( 1119043266), -INT32_C( 2021275826) }, { UINT32_MAX, UINT32_MAX } }, { { INT32_C( 1921037423), -INT32_C( 511938631) }, { INT32_C( 1135838808), -INT32_C( 511938631) }, { UINT32_MAX, UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_uint32x2_t r = simde_vcgt_s32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_private a_ = simde_int32x2_to_private(simde_test_arm_neon_random_i32x2()); simde_int32x2_private b_ = simde_int32x2_to_private(simde_test_arm_neon_random_i32x2()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int32x2_t a = simde_int32x2_from_private(a_); simde_int32x2_t b = simde_int32x2_from_private(b_); simde_uint32x2_t r = simde_vcgt_s32(a, b); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgt_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[1]; int64_t b[1]; uint64_t r[1]; } test_vec[] = { { { -INT64_C( 3823520135268495771) }, { -INT64_C( 2454514891205621436) }, { UINT64_C( 0) } }, { { -INT64_C( 1386457369042881231) }, { -INT64_C( 1386457369042881231) }, { UINT64_C( 0) } }, { { INT64_C( 3521105073255325347) }, { INT64_C( 6733137012515181439) }, { UINT64_C( 0) } }, { { INT64_C( 4316921532226472852) }, { -INT64_C( 3347124278221125850) }, { UINT64_MAX } }, { { INT64_C( 7678865423090072096) }, { INT64_C( 7513120585129590846) }, { UINT64_MAX } }, { { INT64_C( 8171360295472179766) }, { -INT64_C( 6550794157104719511) }, { UINT64_MAX } }, { { -INT64_C( 3628699006713524461) }, { INT64_C( 1544264537413518068) }, { UINT64_C( 0) } }, { { -INT64_C( 6001437256871250036) }, { -INT64_C( 6001437256871250036) }, { UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_uint64x1_t r = simde_vcgt_s64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_private a_ = simde_int64x1_to_private(simde_test_arm_neon_random_i64x1()); simde_int64x1_private b_ = simde_int64x1_to_private(simde_test_arm_neon_random_i64x1()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int64x1_t a = simde_int64x1_from_private(a_); simde_int64x1_t b = simde_int64x1_from_private(b_); simde_uint64x1_t r = simde_vcgt_s64(a, b); simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgt_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint8_t b[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C( 61), UINT8_C(241), UINT8_C(132), UINT8_C(244), UINT8_C(134), UINT8_C(100), UINT8_C(175), UINT8_C( 82) }, { UINT8_C( 61), UINT8_C(241), UINT8_C(132), UINT8_C(186), UINT8_C(143), UINT8_C(100), UINT8_C(114), UINT8_C( 93) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C(246), UINT8_C(181), UINT8_C( 4), UINT8_C(125), UINT8_C(193), UINT8_C( 76), UINT8_C(189), UINT8_C(157) }, { UINT8_C(246), UINT8_C( 2), UINT8_C(252), UINT8_C(125), UINT8_C( 95), UINT8_C(171), UINT8_C(207), UINT8_C(157) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(166), UINT8_C(170), UINT8_C(110), UINT8_C(218), UINT8_C(246), UINT8_C(100), UINT8_C(165), UINT8_C(236) }, { UINT8_C( 25), UINT8_C(170), UINT8_C(110), UINT8_C(218), UINT8_C(246), UINT8_C( 43), UINT8_C(226), UINT8_C(236) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(140), UINT8_C(130), UINT8_C(113), UINT8_C( 67), UINT8_C( 95), UINT8_C( 22), UINT8_C(217), UINT8_C(242) }, { UINT8_C( 68), UINT8_C(251), UINT8_C(113), UINT8_C(195), UINT8_C( 95), UINT8_C( 22), UINT8_C(250), UINT8_C(121) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { UINT8_C( 20), UINT8_C( 12), UINT8_C( 20), UINT8_C( 71), UINT8_C( 69), UINT8_C(121), UINT8_C( 57), UINT8_C(209) }, { UINT8_C(251), UINT8_C( 20), UINT8_C( 20), UINT8_C( 71), UINT8_C(238), UINT8_C(237), UINT8_C( 57), UINT8_C( 50) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { UINT8_C( 88), UINT8_C(252), UINT8_C( 56), UINT8_C(235), UINT8_C( 74), UINT8_C(219), UINT8_C(253), UINT8_C(113) }, { UINT8_C(231), UINT8_C(252), UINT8_C(253), UINT8_C( 44), UINT8_C(117), UINT8_C(192), UINT8_C(253), UINT8_C(113) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(156), UINT8_C(235), UINT8_C( 31), UINT8_C( 93), UINT8_C(242), UINT8_C(241), UINT8_C(223), UINT8_C(217) }, { UINT8_C( 5), UINT8_C( 23), UINT8_C( 31), UINT8_C( 80), UINT8_C(242), UINT8_C(204), UINT8_C(175), UINT8_C(217) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C( 21), UINT8_C(103), UINT8_C( 4), UINT8_C( 21), UINT8_C( 30), UINT8_C(250), UINT8_C(253), UINT8_C(245) }, { UINT8_C(229), UINT8_C( 45), UINT8_C( 83), UINT8_C(193), UINT8_C( 30), UINT8_C( 50), UINT8_C(244), UINT8_C( 35) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vcgt_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_private a_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); simde_uint8x8_private b_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint8x8_t a = simde_uint8x8_from_private(a_); simde_uint8x8_t b = simde_uint8x8_from_private(b_); simde_uint8x8_t r = simde_vcgt_u8(a, b); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgt_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(62314), UINT16_C(34517), UINT16_C(26000), UINT16_C(17193) }, { UINT16_C(62314), UINT16_C(17113), UINT16_C(63969), UINT16_C(44359) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(49883), UINT16_C(62505), UINT16_C(16661), UINT16_C(46714) }, { UINT16_C( 8635), UINT16_C(62505), UINT16_C(65062), UINT16_C(46714) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(11660), UINT16_C(26765), UINT16_C(22731), UINT16_C( 37) }, { UINT16_C(11660), UINT16_C(26765), UINT16_C(62959), UINT16_C( 1070) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(16643), UINT16_C(39505), UINT16_C(24608), UINT16_C(33608) }, { UINT16_C(16643), UINT16_C(39505), UINT16_C(25108), UINT16_C(57106) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(20518), UINT16_C(21678), UINT16_C(56418), UINT16_C(39000) }, { UINT16_C(20518), UINT16_C(37257), UINT16_C(28529), UINT16_C(37154) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { UINT16_C(26284), UINT16_C( 7369), UINT16_C(30701), UINT16_C(33439) }, { UINT16_C(32695), UINT16_C( 7369), UINT16_C(30701), UINT16_C(20336) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, { { UINT16_C(28952), UINT16_C(35339), UINT16_C(11744), UINT16_C(24324) }, { UINT16_C(12440), UINT16_C(17538), UINT16_C(61335), UINT16_C(24324) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(43811), UINT16_C(38128), UINT16_C(45858), UINT16_C(39198) }, { UINT16_C(18299), UINT16_C(38128), UINT16_C(64441), UINT16_C(39198) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vcgt_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_private a_ = simde_uint16x4_to_private(simde_test_arm_neon_random_u16x4()); simde_uint16x4_private b_ = simde_uint16x4_to_private(simde_test_arm_neon_random_u16x4()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint16x4_t a = simde_uint16x4_from_private(a_); simde_uint16x4_t b = simde_uint16x4_from_private(b_); simde_uint16x4_t r = simde_vcgt_u16(a, b); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgt_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C( 508093940), UINT32_C(3751220310) }, { UINT32_C(3608123784), UINT32_C(3751220310) }, { UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(1058673663), UINT32_C( 337578444) }, { UINT32_C( 974431602), UINT32_C( 337578444) }, { UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C(2494528012), UINT32_C(1768668691) }, { UINT32_C(1313342162), UINT32_C(3026274202) }, { UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C(3432578980), UINT32_C(2229246904) }, { UINT32_C(3432578980), UINT32_C(1598465200) }, { UINT32_C( 0), UINT32_MAX } }, { { UINT32_C(3656687698), UINT32_C(1077807782) }, { UINT32_C(3656687698), UINT32_C(3219859256) }, { UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(2612728163), UINT32_C( 742381674) }, { UINT32_C(2612728163), UINT32_C( 742381674) }, { UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C( 228543809), UINT32_C( 414841585) }, { UINT32_C( 228543809), UINT32_C( 414841585) }, { UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C( 476911187), UINT32_C(1104640982) }, { UINT32_C( 476911187), UINT32_C( 539206907) }, { UINT32_C( 0), UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vcgt_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_private a_ = simde_uint32x2_to_private(simde_test_arm_neon_random_u32x2()); simde_uint32x2_private b_ = simde_uint32x2_to_private(simde_test_arm_neon_random_u32x2()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint32x2_t a = simde_uint32x2_from_private(a_); simde_uint32x2_t b = simde_uint32x2_from_private(b_); simde_uint32x2_t r = simde_vcgt_u32(a, b); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgt_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[1]; uint64_t b[1]; uint64_t r[1]; } test_vec[] = { { { UINT64_C(10149447861172126575) }, { UINT64_C(14973029270786375072) }, { UINT64_C( 0) } }, { { UINT64_C( 188873460352155308) }, { UINT64_C(15677533420196890137) }, { UINT64_C( 0) } }, { { UINT64_C( 3685917520124110800) }, { UINT64_C(13765571949920383634) }, { UINT64_C( 0) } }, { { UINT64_C( 5874391744743614816) }, { UINT64_C(15891911110410574633) }, { UINT64_C( 0) } }, { { UINT64_C(17952937805671436476) }, { UINT64_C(17952937805671436476) }, { UINT64_C( 0) } }, { { UINT64_C(10504581769340920869) }, { UINT64_C(16663533674779229056) }, { UINT64_C( 0) } }, { { UINT64_C(13618227777779952285) }, { UINT64_C(16914102683313556847) }, { UINT64_C( 0) } }, { { UINT64_C(16927016630319800422) }, { UINT64_C(15186205456682811618) }, { UINT64_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_t b = simde_vld1_u64(test_vec[i].b); simde_uint64x1_t r = simde_vcgt_u64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x1_private a_ = simde_uint64x1_to_private(simde_test_arm_neon_random_u64x1()); simde_uint64x1_private b_ = simde_uint64x1_to_private(simde_test_arm_neon_random_u64x1()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint64x1_t a = simde_uint64x1_from_private(a_); simde_uint64x1_t b = simde_uint64x1_from_private(b_); simde_uint64x1_t r = simde_vcgt_u64(a, b); simde_test_arm_neon_write_u64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgtq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; simde_float32 b[4]; uint32_t r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -259.46), SIMDE_FLOAT32_C( -774.65), SIMDE_FLOAT32_C( 628.16), SIMDE_FLOAT32_C( -992.75) }, { SIMDE_FLOAT32_C( -259.46), SIMDE_FLOAT32_C( -774.65), SIMDE_FLOAT32_C( 628.16), SIMDE_FLOAT32_C( -707.60) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 127.08), SIMDE_FLOAT32_C( 51.79), SIMDE_FLOAT32_C( 966.55), SIMDE_FLOAT32_C( -31.51) }, { SIMDE_FLOAT32_C( 36.37), SIMDE_FLOAT32_C( 73.30), SIMDE_FLOAT32_C( 646.26), SIMDE_FLOAT32_C( -31.51) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 466.70), SIMDE_FLOAT32_C( 282.12), SIMDE_FLOAT32_C( -570.04), SIMDE_FLOAT32_C( -866.16) }, { SIMDE_FLOAT32_C( -215.26), SIMDE_FLOAT32_C( -475.25), SIMDE_FLOAT32_C( -570.04), SIMDE_FLOAT32_C( -866.16) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -574.22), SIMDE_FLOAT32_C( -2.51), SIMDE_FLOAT32_C( -310.68), SIMDE_FLOAT32_C( -504.85) }, { SIMDE_FLOAT32_C( -574.22), SIMDE_FLOAT32_C( -2.51), SIMDE_FLOAT32_C( -697.28), SIMDE_FLOAT32_C( 552.86) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 342.57), SIMDE_FLOAT32_C( 504.54), SIMDE_FLOAT32_C( 219.24), SIMDE_FLOAT32_C( -326.34) }, { SIMDE_FLOAT32_C( -190.21), SIMDE_FLOAT32_C( 504.54), SIMDE_FLOAT32_C( 219.24), SIMDE_FLOAT32_C( -723.52) }, { UINT32_MAX, UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( -280.95), SIMDE_FLOAT32_C( -30.91), SIMDE_FLOAT32_C( 705.25), SIMDE_FLOAT32_C( -225.48) }, { SIMDE_FLOAT32_C( -240.87), SIMDE_FLOAT32_C( 846.33), SIMDE_FLOAT32_C( -227.65), SIMDE_FLOAT32_C( -225.48) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 841.66), SIMDE_FLOAT32_C( 572.39), SIMDE_FLOAT32_C( 594.22), SIMDE_FLOAT32_C( -491.55) }, { SIMDE_FLOAT32_C( 841.66), SIMDE_FLOAT32_C( 110.40), SIMDE_FLOAT32_C( 594.11), SIMDE_FLOAT32_C( 184.23) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 152.78), SIMDE_FLOAT32_C( 77.13), SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( 939.45) }, { SIMDE_FLOAT32_C( 271.42), SIMDE_FLOAT32_C( 898.23), SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( 990.47) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_uint32x4_t r = simde_vcgtq_f32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_private a_ = simde_float32x4_to_private(simde_test_arm_neon_random_f32x4(-1000.0, 1000.0)); simde_float32x4_private b_ = simde_float32x4_to_private(simde_test_arm_neon_random_f32x4(-1000.0, 1000.0)); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_float32x4_t a = simde_float32x4_from_private(a_); simde_float32x4_t b = simde_float32x4_from_private(b_); simde_uint32x4_t r = simde_vcgtq_f32(a, b); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgtq_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[2]; simde_float64 b[2]; uint64_t r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -768.50), SIMDE_FLOAT64_C( 422.43) }, { SIMDE_FLOAT64_C( -768.50), SIMDE_FLOAT64_C( 422.43) }, { UINT64_C( 0), UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -374.33), SIMDE_FLOAT64_C( 705.75) }, { SIMDE_FLOAT64_C( -374.33), SIMDE_FLOAT64_C( 418.62) }, { UINT64_C( 0), UINT64_MAX } }, { { SIMDE_FLOAT64_C( 918.51), SIMDE_FLOAT64_C( 204.70) }, { SIMDE_FLOAT64_C( 951.02), SIMDE_FLOAT64_C( 727.05) }, { UINT64_C( 0), UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 838.32), SIMDE_FLOAT64_C( 700.06) }, { SIMDE_FLOAT64_C( 987.36), SIMDE_FLOAT64_C( -654.73) }, { UINT64_C( 0), UINT64_MAX } }, { { SIMDE_FLOAT64_C( 461.26), SIMDE_FLOAT64_C( -727.20) }, { SIMDE_FLOAT64_C( 461.26), SIMDE_FLOAT64_C( -199.48) }, { UINT64_C( 0), UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 305.36), SIMDE_FLOAT64_C( 231.22) }, { SIMDE_FLOAT64_C( 305.36), SIMDE_FLOAT64_C( -216.90) }, { UINT64_C( 0), UINT64_MAX } }, { { SIMDE_FLOAT64_C( 305.13), SIMDE_FLOAT64_C( -986.91) }, { SIMDE_FLOAT64_C( 240.23), SIMDE_FLOAT64_C( 930.80) }, { UINT64_MAX, UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 601.87), SIMDE_FLOAT64_C( 350.21) }, { SIMDE_FLOAT64_C( -113.14), SIMDE_FLOAT64_C( 552.89) }, { UINT64_MAX, UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_uint64x2_t r = simde_vcgtq_f64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x2_private a_ = simde_float64x2_to_private(simde_test_arm_neon_random_f64x2(-1000.0, 1000.0)); simde_float64x2_private b_ = simde_float64x2_to_private(simde_test_arm_neon_random_f64x2(-1000.0, 1000.0)); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_float64x2_t a = simde_float64x2_from_private(a_); simde_float64x2_t b = simde_float64x2_from_private(b_); simde_uint64x2_t r = simde_vcgtq_f64(a, b); simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgtq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t b[16]; uint8_t r[16]; } test_vec[] = { { { -INT8_C( 5), INT8_C( 60), -INT8_C( 67), -INT8_C( 35), INT8_C( 16), -INT8_C( 24), INT8_C( 30), INT8_C( 95), -INT8_C( 46), -INT8_C( 88), -INT8_C( 97), -INT8_C( 108), -INT8_C( 22), -INT8_C( 109), -INT8_C( 32), INT8_C( 19) }, { -INT8_C( 5), INT8_C( 98), -INT8_C( 119), -INT8_C( 49), INT8_C( 63), INT8_C( 120), INT8_C( 30), -INT8_C( 127), INT8_C( 47), INT8_C( 6), -INT8_C( 30), -INT8_C( 108), -INT8_C( 42), INT8_C( 17), INT8_C( 101), -INT8_C( 63) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { INT8_C( 39), INT8_C( 20), -INT8_C( 63), -INT8_C( 95), -INT8_C( 37), -INT8_C( 40), INT8_C( 34), INT8_C( 11), -INT8_C( 34), INT8_C( 5), -INT8_C( 97), -INT8_C( 76), INT8_C( 22), INT8_C( 5), INT8_C( 118), INT8_C( 99) }, { INT8_C( 39), INT8_C( 20), -INT8_C( 63), INT8_C( 50), INT8_C( 73), INT8_C( 126), INT8_C( 15), INT8_C( 38), -INT8_C( 38), INT8_C( 47), -INT8_C( 19), -INT8_C( 54), INT8_C( 47), -INT8_C( 57), -INT8_C( 75), -INT8_C( 110) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { -INT8_C( 35), -INT8_C( 107), INT8_C( 16), INT8_C( 37), INT8_C( 19), INT8_C( 31), -INT8_C( 44), -INT8_C( 18), INT8_C( 79), INT8_C( 36), -INT8_C( 72), INT8_C( 51), -INT8_C( 119), INT8_C( 58), INT8_C( 16), -INT8_C( 76) }, { -INT8_C( 35), INT8_C( 68), -INT8_C( 69), INT8_C( 37), -INT8_C( 102), -INT8_C( 52), INT8_C( 74), -INT8_C( 11), INT8_C( 125), INT8_C( 36), INT8_C( 102), INT8_C( 51), INT8_C( 116), INT8_C( 58), INT8_C( 17), -INT8_C( 39) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { INT8_C( 98), INT8_C( 62), -INT8_C( 6), -INT8_C( 4), -INT8_C( 29), INT8_C( 69), -INT8_C( 15), INT8_C( 96), -INT8_C( 95), -INT8_C( 92), -INT8_C( 108), -INT8_C( 103), -INT8_C( 110), -INT8_C( 91), INT8_C( 110), INT8_C( 37) }, { -INT8_C( 58), INT8_C( 62), INT8_C( 69), INT8_C( 6), -INT8_C( 103), INT8_C( 23), -INT8_C( 106), -INT8_C( 73), -INT8_C( 95), -INT8_C( 92), INT8_C( 94), -INT8_C( 103), -INT8_C( 62), -INT8_C( 71), INT8_C( 110), INT8_C( 37) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { -INT8_C( 22), INT8_C( 92), INT8_C( 25), -INT8_C( 124), -INT8_C( 69), -INT8_C( 81), -INT8_C( 110), INT8_C( 92), INT8_C( 83), -INT8_C( 103), -INT8_C( 11), INT8_C( 21), INT8_C( 52), INT8_C( 52), -INT8_C( 17), INT8_C( 34) }, { -INT8_C( 52), INT8_C( 92), -INT8_C( 43), INT8_C( 122), INT8_C( 111), -INT8_C( 23), -INT8_C( 110), -INT8_C( 39), -INT8_C( 112), -INT8_C( 121), -INT8_C( 42), -INT8_C( 35), INT8_C( 52), INT8_C( 52), -INT8_C( 17), INT8_C( 30) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { INT8_C( 15), -INT8_C( 99), INT8_C( 117), INT8_C( 37), -INT8_C( 86), INT8_C( 33), -INT8_C( 1), -INT8_C( 15), INT8_C( 73), -INT8_C( 43), INT8_C( 23), -INT8_C( 111), INT8_C( 9), INT8_C( 7), -INT8_C( 5), INT8_C( 2) }, { INT8_C( 15), -INT8_C( 99), INT8_C( 117), -INT8_C( 57), INT8_C( 123), INT8_C( 101), -INT8_C( 47), -INT8_C( 15), INT8_C( 73), -INT8_C( 15), -INT8_C( 70), -INT8_C( 111), INT8_C( 76), -INT8_C( 90), -INT8_C( 90), INT8_C( 2) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { INT8_C( 120), INT8_C( 93), INT8_C( 64), -INT8_C( 50), -INT8_C( 62), INT8_C( 4), -INT8_C( 52), -INT8_C( 99), INT8_C( 3), -INT8_C( 89), -INT8_C( 94), INT8_C( 79), INT8_C( 64), INT8_C( 67), INT8_C( 74), -INT8_C( 89) }, { INT8_C( 120), INT8_C( 122), -INT8_C( 72), -INT8_C( 50), -INT8_C( 95), INT8_C( 4), -INT8_C( 52), -INT8_C( 99), INT8_C( 103), -INT8_C( 89), -INT8_C( 94), -INT8_C( 47), INT8_C( 125), -INT8_C( 118), INT8_C( 74), -INT8_C( 15) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { -INT8_C( 68), -INT8_C( 6), -INT8_C( 38), INT8_C( 93), -INT8_C( 2), -INT8_C( 89), -INT8_C( 6), INT8_C( 101), INT8_C( 78), -INT8_C( 100), INT8_C( 54), -INT8_C( 52), INT8_C( 38), -INT8_C( 127), -INT8_C( 67), INT8_C( 13) }, { INT8_C( 12), -INT8_C( 100), -INT8_C( 74), -INT8_C( 87), INT8_C( 92), INT8_C( 107), INT8_C( 73), -INT8_C( 74), -INT8_C( 68), INT8_C( 57), INT8_C( 80), INT8_C( 81), INT8_C( 123), -INT8_C( 110), INT8_C( 93), INT8_C( 55) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_uint8x16_t r = simde_vcgtq_s8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_private a_ = simde_int8x16_to_private(simde_test_arm_neon_random_i8x16()); simde_int8x16_private b_ = simde_int8x16_to_private(simde_test_arm_neon_random_i8x16()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int8x16_t a = simde_int8x16_from_private(a_); simde_int8x16_t b = simde_int8x16_from_private(b_); simde_uint8x16_t r = simde_vcgtq_s8(a, b); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgtq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t b[8]; uint16_t r[8]; } test_vec[] = { { { INT16_C( 31254), -INT16_C( 3947), INT16_C( 22993), -INT16_C( 590), INT16_C( 14528), -INT16_C( 28835), INT16_C( 23061), -INT16_C( 6038) }, { INT16_C( 23834), -INT16_C( 3947), -INT16_C( 11322), -INT16_C( 18532), INT16_C( 14528), INT16_C( 5233), INT16_C( 23061), INT16_C( 11175) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { INT16_C( 391), -INT16_C( 19446), -INT16_C( 32485), -INT16_C( 3867), INT16_C( 12754), -INT16_C( 26416), INT16_C( 14703), -INT16_C( 2305) }, { -INT16_C( 15964), -INT16_C( 17703), -INT16_C( 32485), -INT16_C( 3867), INT16_C( 20950), -INT16_C( 31594), INT16_C( 14703), -INT16_C( 2305) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { -INT16_C( 20615), INT16_C( 32232), INT16_C( 14363), -INT16_C( 16574), INT16_C( 7417), INT16_C( 5497), INT16_C( 24477), -INT16_C( 32306) }, { -INT16_C( 25680), INT16_C( 8183), -INT16_C( 2092), INT16_C( 3861), -INT16_C( 16384), INT16_C( 32270), -INT16_C( 4856), -INT16_C( 32306) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { -INT16_C( 4003), -INT16_C( 806), INT16_C( 847), -INT16_C( 147), INT16_C( 25758), INT16_C( 29214), INT16_C( 13403), INT16_C( 6712) }, { -INT16_C( 28428), -INT16_C( 806), -INT16_C( 22147), INT16_C( 6781), INT16_C( 31840), INT16_C( 20433), INT16_C( 18621), INT16_C( 6712) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { -INT16_C( 22811), -INT16_C( 32048), -INT16_C( 13350), INT16_C( 18076), INT16_C( 30555), INT16_C( 30926), INT16_C( 22065), -INT16_C( 32525) }, { -INT16_C( 15163), -INT16_C( 32048), INT16_C( 2061), INT16_C( 18076), -INT16_C( 20417), INT16_C( 30926), INT16_C( 22065), INT16_C( 5712) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { INT16_C( 29292), -INT16_C( 29542), -INT16_C( 29253), -INT16_C( 14699), -INT16_C( 8879), -INT16_C( 4962), -INT16_C( 24859), INT16_C( 9380) }, { INT16_C( 29518), INT16_C( 32669), -INT16_C( 4663), -INT16_C( 14699), -INT16_C( 4729), -INT16_C( 4962), INT16_C( 17633), INT16_C( 19885) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { -INT16_C( 13599), -INT16_C( 15669), INT16_C( 30990), -INT16_C( 15088), INT16_C( 27548), -INT16_C( 27338), -INT16_C( 2984), -INT16_C( 8404) }, { -INT16_C( 13599), -INT16_C( 15669), INT16_C( 30990), -INT16_C( 15088), -INT16_C( 5440), -INT16_C( 27338), INT16_C( 10449), -INT16_C( 27205) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, { { -INT16_C( 5257), -INT16_C( 12438), -INT16_C( 26913), -INT16_C( 16210), INT16_C( 8515), INT16_C( 28290), -INT16_C( 27917), -INT16_C( 19661) }, { INT16_C( 27260), INT16_C( 19784), INT16_C( 914), -INT16_C( 21022), INT16_C( 8515), -INT16_C( 21670), -INT16_C( 6355), -INT16_C( 23120) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_uint16x8_t r = simde_vcgtq_s16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_private a_ = simde_int16x8_to_private(simde_test_arm_neon_random_i16x8()); simde_int16x8_private b_ = simde_int16x8_to_private(simde_test_arm_neon_random_i16x8()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int16x8_t a = simde_int16x8_from_private(a_); simde_int16x8_t b = simde_int16x8_from_private(b_); simde_uint16x8_t r = simde_vcgtq_s16(a, b); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgtq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t b[4]; uint32_t r[4]; } test_vec[] = { { { INT32_C( 492300188), -INT32_C( 1956120182), -INT32_C( 1955357078), -INT32_C( 1876357680) }, { INT32_C( 2138637092), INT32_C( 976653345), INT32_C( 1546898711), -INT32_C( 908456404) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { -INT32_C( 1777644245), INT32_C( 1606736946), -INT32_C( 1353954677), INT32_C( 1865336142) }, { INT32_C( 1755931985), INT32_C( 1606736946), -INT32_C( 550985750), -INT32_C( 60879151) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, { { INT32_C( 1946465512), INT32_C( 1277382910), INT32_C( 1723552277), -INT32_C( 372284233) }, { INT32_C( 726176577), INT32_C( 51081266), -INT32_C( 620729729), -INT32_C( 1138519340) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { INT32_C( 2123049471), INT32_C( 1576160678), INT32_C( 1680047133), -INT32_C( 2028040363) }, { INT32_C( 2123049471), INT32_C( 1733921683), INT32_C( 1680047133), INT32_C( 447304867) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { INT32_C( 1785485389), -INT32_C( 1356074662), -INT32_C( 2143860352), INT32_C( 100582002) }, { INT32_C( 1785485389), INT32_C( 2026803412), -INT32_C( 1936558108), INT32_C( 970602930) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { INT32_C( 1075469148), -INT32_C( 2066961198), -INT32_C( 1212242203), -INT32_C( 1567830835) }, { INT32_C( 1075469148), -INT32_C( 2066961198), -INT32_C( 1212242203), -INT32_C( 1578880981) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, { { -INT32_C( 1475342381), INT32_C( 1388998533), -INT32_C( 1745594821), -INT32_C( 813977860) }, { -INT32_C( 1610046533), INT32_C( 1951974985), -INT32_C( 988398345), -INT32_C( 813977860) }, { UINT32_MAX, UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { INT32_C( 915751419), INT32_C( 1238272663), INT32_C( 1405986200), -INT32_C( 1829448375) }, { -INT32_C( 1828238180), -INT32_C( 2074600056), -INT32_C( 1990994741), INT32_C( 1523370591) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_uint32x4_t r = simde_vcgtq_s32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_private a_ = simde_int32x4_to_private(simde_test_arm_neon_random_i32x4()); simde_int32x4_private b_ = simde_int32x4_to_private(simde_test_arm_neon_random_i32x4()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int32x4_t a = simde_int32x4_from_private(a_); simde_int32x4_t b = simde_int32x4_from_private(b_); simde_uint32x4_t r = simde_vcgtq_s32(a, b); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgtq_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; int64_t b[2]; uint64_t r[2]; } test_vec[] = { { { -INT64_C( 502189528637050139), INT64_C( 4939785098016854380) }, { INT64_C( 4830541158354021746), INT64_C( 4939785098016854380) }, { UINT64_C( 0), UINT64_C( 0) } }, { { INT64_C( 7537734782517829395), INT64_C( 4887792816035170112) }, { -INT64_C( 4674675188186846458), -INT64_C( 2819539494663585767) }, { UINT64_MAX, UINT64_MAX } }, { { INT64_C( 1601627469969290137), -INT64_C( 3511530056025063164) }, { INT64_C( 1601627469969290137), -INT64_C( 3920686018976416059) }, { UINT64_C( 0), UINT64_MAX } }, { { -INT64_C( 2941252623903311016), INT64_C( 622335687481439083) }, { INT64_C( 9002111178501477557), -INT64_C( 1938790237255187312) }, { UINT64_C( 0), UINT64_MAX } }, { { INT64_C( 5671339170748503635), -INT64_C( 1924727269611686317) }, { -INT64_C( 7377069698328644371), INT64_C( 7297631376879201416) }, { UINT64_MAX, UINT64_C( 0) } }, { { INT64_C( 22929999772477670), -INT64_C( 4077467447542804373) }, { -INT64_C( 3057186358420727097), -INT64_C( 1146248527329188783) }, { UINT64_MAX, UINT64_C( 0) } }, { { INT64_C( 1883303849582922475), -INT64_C( 3182209124792343854) }, { INT64_C( 1883303849582922475), -INT64_C( 6358821967087326098) }, { UINT64_C( 0), UINT64_MAX } }, { { INT64_C( 2761376618429519235), -INT64_C( 6174449215856279780) }, { INT64_C( 5000006545298775061), -INT64_C( 6174449215856279780) }, { UINT64_C( 0), UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_uint64x2_t r = simde_vcgtq_s64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_private a_ = simde_int64x2_to_private(simde_test_arm_neon_random_i64x2()); simde_int64x2_private b_ = simde_int64x2_to_private(simde_test_arm_neon_random_i64x2()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int64x2_t a = simde_int64x2_from_private(a_); simde_int64x2_t b = simde_int64x2_from_private(b_); simde_uint64x2_t r = simde_vcgtq_s64(a, b); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgtq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(254), UINT8_C(227), UINT8_C(203), UINT8_C(182), UINT8_C( 87), UINT8_C( 85), UINT8_C( 43), UINT8_C( 21), UINT8_C( 50), UINT8_C(171), UINT8_C(226), UINT8_C( 87), UINT8_C( 42), UINT8_C( 18), UINT8_C(241), UINT8_C( 41) }, { UINT8_MAX, UINT8_C( 57), UINT8_C(226), UINT8_C(182), UINT8_C(191), UINT8_C(155), UINT8_C( 33), UINT8_C(147), UINT8_C( 44), UINT8_C(101), UINT8_C(226), UINT8_C(119), UINT8_C( 42), UINT8_C( 18), UINT8_C(156), UINT8_C( 41) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C(113), UINT8_C( 73), UINT8_C( 0), UINT8_C( 74), UINT8_C(228), UINT8_C( 94), UINT8_C(196), UINT8_C( 17), UINT8_C(135), UINT8_C(129), UINT8_C(136), UINT8_C( 80), UINT8_C(185), UINT8_C( 37), UINT8_C(218), UINT8_C(174) }, { UINT8_C(141), UINT8_C(100), UINT8_C(251), UINT8_C( 74), UINT8_C( 26), UINT8_C( 94), UINT8_C( 58), UINT8_C(123), UINT8_C( 6), UINT8_C(129), UINT8_C(134), UINT8_C( 80), UINT8_C(186), UINT8_C(237), UINT8_C(154), UINT8_C( 43) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { UINT8_C( 51), UINT8_C( 42), UINT8_C(140), UINT8_C(206), UINT8_C(207), UINT8_C(235), UINT8_C(200), UINT8_C(142), UINT8_C(107), UINT8_C( 4), UINT8_C( 22), UINT8_C( 38), UINT8_C(211), UINT8_C(120), UINT8_C( 82), UINT8_C(113) }, { UINT8_C( 18), UINT8_C(174), UINT8_C(140), UINT8_C(206), UINT8_C(207), UINT8_C(183), UINT8_C( 16), UINT8_C(150), UINT8_C(107), UINT8_C( 4), UINT8_C( 22), UINT8_C( 68), UINT8_C(211), UINT8_C( 68), UINT8_C(170), UINT8_C( 6) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, { { UINT8_C(217), UINT8_C(162), UINT8_C(188), UINT8_C(168), UINT8_C( 90), UINT8_C(130), UINT8_C( 62), UINT8_C(197), UINT8_C(209), UINT8_C(225), UINT8_C( 10), UINT8_C(164), UINT8_C(153), UINT8_C( 61), UINT8_C(175), UINT8_C(229) }, { UINT8_C( 15), UINT8_C(253), UINT8_C(254), UINT8_C( 85), UINT8_C( 24), UINT8_C(130), UINT8_C( 8), UINT8_C(129), UINT8_C(229), UINT8_C(225), UINT8_C( 38), UINT8_C(193), UINT8_C( 12), UINT8_C( 61), UINT8_C(175), UINT8_C(229) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 83), UINT8_C(191), UINT8_C(105), UINT8_C( 58), UINT8_C( 87), UINT8_C( 42), UINT8_C(237), UINT8_C( 39), UINT8_C(113), UINT8_C( 19), UINT8_C(232), UINT8_C( 47), UINT8_C( 69), UINT8_C(151), UINT8_C(253), UINT8_C(153) }, { UINT8_C( 2), UINT8_C(139), UINT8_C(105), UINT8_C( 58), UINT8_C( 87), UINT8_C(104), UINT8_C( 66), UINT8_C(120), UINT8_C(113), UINT8_C(239), UINT8_C( 50), UINT8_C( 47), UINT8_C( 69), UINT8_C(244), UINT8_C(252), UINT8_C(153) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C(244), UINT8_C(239), UINT8_C(217), UINT8_C( 75), UINT8_C( 87), UINT8_C( 28), UINT8_C(176), UINT8_C(228), UINT8_C(182), UINT8_C( 28), UINT8_C(248), UINT8_C( 80), UINT8_C(234), UINT8_C(244), UINT8_C(242), UINT8_C(121) }, { UINT8_C( 18), UINT8_C(238), UINT8_C(147), UINT8_C( 91), UINT8_C(223), UINT8_C(176), UINT8_C(176), UINT8_C(228), UINT8_C(182), UINT8_C( 28), UINT8_C( 58), UINT8_C( 83), UINT8_C(133), UINT8_C(191), UINT8_C(242), UINT8_C(121) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 19), UINT8_C(155), UINT8_C(141), UINT8_C(250), UINT8_C( 75), UINT8_C(217), UINT8_C(223), UINT8_C(191), UINT8_C(245), UINT8_C( 25), UINT8_C( 84), UINT8_C(122), UINT8_C(216), UINT8_C( 48), UINT8_C(243), UINT8_C(135) }, { UINT8_C( 19), UINT8_C(183), UINT8_C(141), UINT8_C(251), UINT8_C( 64), UINT8_C( 92), UINT8_C(238), UINT8_C(191), UINT8_C( 35), UINT8_C( 49), UINT8_C( 40), UINT8_C(222), UINT8_C( 94), UINT8_C( 48), UINT8_C(171), UINT8_C(121) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { UINT8_C(199), UINT8_C( 88), UINT8_C(193), UINT8_C(227), UINT8_C(180), UINT8_C(175), UINT8_C(133), UINT8_C(215), UINT8_C(225), UINT8_C(238), UINT8_C(181), UINT8_C( 56), UINT8_C( 30), UINT8_C( 97), UINT8_C(254), UINT8_C(233) }, { UINT8_C( 53), UINT8_C( 45), UINT8_C( 0), UINT8_C(227), UINT8_C(128), UINT8_C( 24), UINT8_C(133), UINT8_C(236), UINT8_C(132), UINT8_C(162), UINT8_C( 48), UINT8_C( 56), UINT8_C(177), UINT8_C(252), UINT8_C(254), UINT8_C(120) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vcgtq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_private a_ = simde_uint8x16_to_private(simde_test_arm_neon_random_u8x16()); simde_uint8x16_private b_ = simde_uint8x16_to_private(simde_test_arm_neon_random_u8x16()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint8x16_t a = simde_uint8x16_from_private(a_); simde_uint8x16_t b = simde_uint8x16_from_private(b_); simde_uint8x16_t r = simde_vcgtq_u8(a, b); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgtq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C( 2446), UINT16_C(26386), UINT16_C( 2990), UINT16_C(37360), UINT16_C(47703), UINT16_C( 533), UINT16_C(57400), UINT16_C( 4735) }, { UINT16_C( 4132), UINT16_C(20851), UINT16_C(42315), UINT16_C(35847), UINT16_C(47703), UINT16_C(31280), UINT16_C(57400), UINT16_C(50806) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(15989), UINT16_C(12681), UINT16_C(22979), UINT16_C(15095), UINT16_C( 9352), UINT16_C(46392), UINT16_C(16311), UINT16_C( 3649) }, { UINT16_C(29177), UINT16_C(12681), UINT16_C(65361), UINT16_C(15095), UINT16_C( 9352), UINT16_C( 7377), UINT16_C(63810), UINT16_C(47091) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C( 6642), UINT16_C(43334), UINT16_C(55117), UINT16_C(21175), UINT16_C(16632), UINT16_C(18820), UINT16_C(25020), UINT16_C(51331) }, { UINT16_C(21664), UINT16_C(58084), UINT16_C(55117), UINT16_C(34201), UINT16_C(56229), UINT16_C(52351), UINT16_C(25020), UINT16_C(44637) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, { { UINT16_C(43599), UINT16_C(55427), UINT16_C(57589), UINT16_C(50519), UINT16_C(15170), UINT16_C(36775), UINT16_C(16402), UINT16_C(15055) }, { UINT16_C(37916), UINT16_C(55427), UINT16_C(57589), UINT16_C(28550), UINT16_C(56707), UINT16_C(44355), UINT16_C(27115), UINT16_C(15055) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(42331), UINT16_C(28410), UINT16_C( 3814), UINT16_C( 549), UINT16_C(43426), UINT16_C(38874), UINT16_C(24713), UINT16_C( 3335) }, { UINT16_C(19005), UINT16_C(10426), UINT16_C(35507), UINT16_C(50787), UINT16_C(11460), UINT16_C(60926), UINT16_C(64589), UINT16_C(43095) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(25273), UINT16_C(28314), UINT16_C(51551), UINT16_C( 379), UINT16_C(14059), UINT16_C(31369), UINT16_C(36032), UINT16_C(33892) }, { UINT16_C(25273), UINT16_C( 1649), UINT16_C(51551), UINT16_C( 430), UINT16_C(50458), UINT16_C(31369), UINT16_C( 4865), UINT16_C(59005) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { UINT16_C(25862), UINT16_C(50753), UINT16_C(42482), UINT16_C(31243), UINT16_C(48135), UINT16_C(26289), UINT16_C(24453), UINT16_C(40807) }, { UINT16_C(61476), UINT16_C( 9498), UINT16_C(38659), UINT16_C(31243), UINT16_C(24750), UINT16_C(26293), UINT16_C(61744), UINT16_C(13833) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, { { UINT16_C(42242), UINT16_C(60140), UINT16_C(50180), UINT16_C(10535), UINT16_C(16821), UINT16_C(47182), UINT16_C(23256), UINT16_C(25892) }, { UINT16_C(59578), UINT16_C(60140), UINT16_C(62938), UINT16_C(12576), UINT16_C( 7231), UINT16_C(11898), UINT16_C(28258), UINT16_C(25892) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vcgtq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_private a_ = simde_uint16x8_to_private(simde_test_arm_neon_random_u16x8()); simde_uint16x8_private b_ = simde_uint16x8_to_private(simde_test_arm_neon_random_u16x8()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint16x8_t a = simde_uint16x8_from_private(a_); simde_uint16x8_t b = simde_uint16x8_from_private(b_); simde_uint16x8_t r = simde_vcgtq_u16(a, b); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgtq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C( 473723137), UINT32_C(3865908623), UINT32_C(2105010025), UINT32_C(1194363102) }, { UINT32_C( 473723137), UINT32_C(3865908623), UINT32_C(1956114873), UINT32_C( 411778070) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_MAX } }, { { UINT32_C(2827839550), UINT32_C( 519361544), UINT32_C(2373277068), UINT32_C(1453961671) }, { UINT32_C(2151421638), UINT32_C( 519361544), UINT32_C(3325460240), UINT32_C( 779632623) }, { UINT32_MAX, UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, { { UINT32_C(2519071497), UINT32_C( 388209232), UINT32_C( 309185868), UINT32_C(3952257763) }, { UINT32_C(2383054462), UINT32_C(4115939334), UINT32_C( 656657535), UINT32_C(3659463121) }, { UINT32_MAX, UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, { { UINT32_C( 693933277), UINT32_C(1144834657), UINT32_C(4063284852), UINT32_C(1535130197) }, { UINT32_C(4199601274), UINT32_C(1898017952), UINT32_C(1649098605), UINT32_C(1604828290) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C(1106625997), UINT32_C(3895666067), UINT32_C(4027940512), UINT32_C(1335991938) }, { UINT32_C(1972958472), UINT32_C(3067601972), UINT32_C(4027940512), UINT32_C(1335991938) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C( 467649724), UINT32_C(4294255480), UINT32_C(3227309148), UINT32_C(4063543230) }, { UINT32_C(1135086755), UINT32_C( 204717706), UINT32_C(3227309148), UINT32_C( 666365291) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { UINT32_C(1881749689), UINT32_C(1765106686), UINT32_C(3014812687), UINT32_C(4210449264) }, { UINT32_C(1963403801), UINT32_C(1765106686), UINT32_C( 26274896), UINT32_C(1009505155) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_MAX } }, { { UINT32_C(3016547492), UINT32_C( 224821130), UINT32_C(4088094170), UINT32_C(2305488869) }, { UINT32_C(1710202389), UINT32_C( 224821130), UINT32_C(1565167954), UINT32_C(2305488869) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vcgtq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_private a_ = simde_uint32x4_to_private(simde_test_arm_neon_random_u32x4()); simde_uint32x4_private b_ = simde_uint32x4_to_private(simde_test_arm_neon_random_u32x4()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint32x4_t a = simde_uint32x4_from_private(a_); simde_uint32x4_t b = simde_uint32x4_from_private(b_); simde_uint32x4_t r = simde_vcgtq_u32(a, b); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgtq_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[2]; uint64_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C( 4984667592699295586), UINT64_C(15811667397857361308) }, { UINT64_C( 2339370879973895867), UINT64_C(15914396696525011477) }, { UINT64_MAX, UINT64_C( 0) } }, { { UINT64_C(17703454356444598334), UINT64_C( 1902071959979066185) }, { UINT64_C(17703454356444598334), UINT64_C(16535179364514408836) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C(15213933129178520905), UINT64_C(17198566065116191608) }, { UINT64_C(15213933129178520905), UINT64_C(18321098540334786591) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C( 7391236093137010105), UINT64_C( 8149620001519388879) }, { UINT64_C( 1479519886084451389), UINT64_C( 5612301609669465831) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C(10745059411321893956), UINT64_C(17681158274966770843) }, { UINT64_C(12075888877328785271), UINT64_C(13588399630576331348) }, { UINT64_C( 0), UINT64_MAX } }, { { UINT64_C(17223061392421060754), UINT64_C( 256844536222304592) }, { UINT64_C(13276475547068630142), UINT64_C( 373604650733713338) }, { UINT64_MAX, UINT64_C( 0) } }, { { UINT64_C(15817235441742767541), UINT64_C( 8349911497263694028) }, { UINT64_C( 3306683102762830829), UINT64_C( 1179745581916837631) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C( 4290543828840351433), UINT64_C( 4204393115711491950) }, { UINT64_C(14368431464027790381), UINT64_C(11143108144020046107) }, { UINT64_C( 0), UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_uint64x2_t r = simde_vcgtq_u64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_private a_ = simde_uint64x2_to_private(simde_test_arm_neon_random_u64x2()); simde_uint64x2_private b_ = simde_uint64x2_to_private(simde_test_arm_neon_random_u64x2()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint64x2_t a = simde_uint64x2_from_private(a_); simde_uint64x2_t b = simde_uint64x2_from_private(b_); simde_uint64x2_t r = simde_vcgtq_u64(a, b); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vcgt_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcgt_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcgt_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vcgt_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vcgt_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vcgt_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vcgt_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vcgt_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vcgt_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vcgt_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtq_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/cgtz.c000066400000000000000000000770141400333146700164430ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN cgtz #include "test-neon.h" /* Check that both of these work */ #if defined(__cplusplus) #include "../../../simde/arm/neon/cgtz.h" #else #include "../../../simde/arm/neon.h" #endif static int test_simde_vcgtz_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; uint32_t r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( 857.35), SIMDE_FLOAT32_C( 0.00) }, { UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 563.25), SIMDE_FLOAT32_C( -646.04) }, { UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -282.49), SIMDE_FLOAT32_C( 166.97) }, { UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( 481.87), SIMDE_FLOAT32_C( 349.86) }, { UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 616.03), SIMDE_FLOAT32_C( 0.00) }, { UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -725.00), SIMDE_FLOAT32_C( 0.00) }, { UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) }, { UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 440.03), SIMDE_FLOAT32_C( 616.59) }, { UINT32_MAX, UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_uint32x2_t r = simde_vcgtz_f32(a); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x2_private a_ = simde_float32x2_to_private(simde_test_arm_neon_random_f32x2(-1000.0, 1000.0)); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0.0; } simde_float32x2_t a = simde_float32x2_from_private(a_); simde_uint32x2_t r = simde_vcgtz_f32(a); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgtz_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[1]; uint64_t r[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( -862.57) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 383.63) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( -377.66) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -519.74) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 357.37) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( 0.00) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -484.99) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -884.47) }, { UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_uint64x1_t r = simde_vcgtz_f64(a); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x1_private a_ = simde_float64x1_to_private(simde_test_arm_neon_random_f64x1(-1000.0, 1000.0)); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0.0; } simde_float64x1_t a = simde_float64x1_from_private(a_); simde_uint64x1_t r = simde_vcgtz_f64(a); simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgtz_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; uint8_t r[8]; } test_vec[] = { { { INT8_C( 15), -INT8_C( 54), INT8_C( 73), INT8_C( 90), INT8_C( 0), -INT8_C( 125), INT8_C( 0), INT8_C( 0) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { -INT8_C( 43), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 31) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { INT8_C( 0), -INT8_C( 7), INT8_C( 0), -INT8_C( 125), INT8_C( 124), -INT8_C( 127), INT8_C( 27), -INT8_C( 17) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } }, { { -INT8_C( 25), INT8_C( 116), INT8_C( 39), -INT8_C( 32), INT8_C( 0), -INT8_C( 124), INT8_C( 0), INT8_C( 0) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { INT8_C( 29), INT8_C( 0), INT8_C( 72), INT8_C( 0), -INT8_C( 114), INT8_C( 0), INT8_C( 0), INT8_C( 0) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { -INT8_C( 124), -INT8_C( 100), INT8_C( 51), INT8_C( 30), INT8_C( 0), -INT8_C( 11), INT8_MAX, INT8_C( 0) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } }, { { INT8_C( 32), INT8_C( 0), -INT8_C( 50), -INT8_C( 82), INT8_C( 0), INT8_C( 86), INT8_C( 106), -INT8_C( 97) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { INT8_C( 0), INT8_C( 0), INT8_C( 17), -INT8_C( 29), INT8_C( 0), -INT8_C( 112), -INT8_C( 84), INT8_C( 0) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_uint8x8_t r = simde_vcgtz_s8(a); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_private a_ = simde_int8x8_to_private(simde_test_arm_neon_random_i8x8()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0; } simde_int8x8_t a = simde_int8x8_from_private(a_); simde_uint8x8_t r = simde_vcgtz_s8(a); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgtz_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; uint16_t r[4]; } test_vec[] = { { { INT16_C( 0), INT16_C( 7845), INT16_C( 0), INT16_C( 5355) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { -INT16_C( 19595), INT16_C( 15250), -INT16_C( 24818), INT16_C( 24104) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { INT16_C( 26221), INT16_C( 6909), INT16_C( 24749), -INT16_C( 16175) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { INT16_C( 2518), INT16_C( 0), INT16_C( 0), INT16_C( 0) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { INT16_C( 23682), -INT16_C( 19310), INT16_C( 29139), INT16_C( 30817) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { INT16_C( 9010), -INT16_C( 3056), INT16_C( 0), INT16_C( 12741) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, { { -INT16_C( 13646), INT16_C( 0), INT16_C( 0), INT16_C( 28329) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_uint16x4_t r = simde_vcgtz_s16(a); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_private a_ = simde_int16x4_to_private(simde_test_arm_neon_random_i16x4()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0; } simde_int16x4_t a = simde_int16x4_from_private(a_); simde_uint16x4_t r = simde_vcgtz_s16(a); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgtz_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; uint32_t r[2]; } test_vec[] = { { { -INT32_C( 435877116), -INT32_C( 718669983) }, { UINT32_C( 0), UINT32_C( 0) } }, { { -INT32_C( 1100467344), INT32_C( 0) }, { UINT32_C( 0), UINT32_C( 0) } }, { { INT32_C( 0), INT32_C( 374803573) }, { UINT32_C( 0), UINT32_MAX } }, { { INT32_C( 1937880686), -INT32_C( 982850916) }, { UINT32_MAX, UINT32_C( 0) } }, { { INT32_C( 308225961), INT32_C( 489722853) }, { UINT32_MAX, UINT32_MAX } }, { { INT32_C( 0), -INT32_C( 621839229) }, { UINT32_C( 0), UINT32_C( 0) } }, { { INT32_C( 0), INT32_C( 0) }, { UINT32_C( 0), UINT32_C( 0) } }, { { INT32_C( 1754358537), -INT32_C( 1493404043) }, { UINT32_MAX, UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_uint32x2_t r = simde_vcgtz_s32(a); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_private a_ = simde_int32x2_to_private(simde_test_arm_neon_random_i32x2()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0; } simde_int32x2_t a = simde_int32x2_from_private(a_); simde_uint32x2_t r = simde_vcgtz_s32(a); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgtz_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[1]; uint64_t r[1]; } test_vec[] = { { { INT64_C( 637165716263353288) }, { UINT64_MAX } }, { { INT64_C( 0) }, { UINT64_C( 0) } }, { { -INT64_C( 2096199424180815067) }, { UINT64_C( 0) } }, { { INT64_C( 0) }, { UINT64_C( 0) } }, { { INT64_C( 3839009869349841013) }, { UINT64_MAX } }, { { -INT64_C( 8324557890790649979) }, { UINT64_C( 0) } }, { { -INT64_C( 3120066588333083577) }, { UINT64_C( 0) } }, { { INT64_C( 0) }, { UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_uint64x1_t r = simde_vcgtz_s64(a); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_private a_ = simde_int64x1_to_private(simde_test_arm_neon_random_i64x1()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0; } simde_int64x1_t a = simde_int64x1_from_private(a_); simde_uint64x1_t r = simde_vcgtz_s64(a); simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgtzq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; uint32_t r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 610.80), SIMDE_FLOAT32_C( -604.76), SIMDE_FLOAT32_C( 570.85), SIMDE_FLOAT32_C( 181.79) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( -781.68), SIMDE_FLOAT32_C( 808.07), SIMDE_FLOAT32_C( -280.25), SIMDE_FLOAT32_C( 389.37) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -848.94) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 883.90), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -623.70) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -968.78), SIMDE_FLOAT32_C( -829.47), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 43.73) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 64.92), SIMDE_FLOAT32_C( 0.00) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 931.44), SIMDE_FLOAT32_C( -717.64), SIMDE_FLOAT32_C( 921.07) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( -374.65), SIMDE_FLOAT32_C( 314.40), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -187.17) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_uint32x4_t r = simde_vcgtzq_f32(a); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_private a_ = simde_float32x4_to_private(simde_test_arm_neon_random_f32x4(-1000.0, 1000.0)); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0.0; } simde_float32x4_t a = simde_float32x4_from_private(a_); simde_uint32x4_t r = simde_vcgtzq_f32(a); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgtzq_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[2]; uint64_t r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -919.27), SIMDE_FLOAT64_C( 975.16) }, { UINT64_C( 0), UINT64_MAX } }, { { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -36.67) }, { UINT64_C( 0), UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 540.59), SIMDE_FLOAT64_C( 0.00) }, { UINT64_MAX, UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 839.91) }, { UINT64_C( 0), UINT64_MAX } }, { { SIMDE_FLOAT64_C( 293.38), SIMDE_FLOAT64_C( 0.00) }, { UINT64_MAX, UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -105.76) }, { UINT64_C( 0), UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -155.06), SIMDE_FLOAT64_C( 0.00) }, { UINT64_C( 0), UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -532.89), SIMDE_FLOAT64_C( 0.00) }, { UINT64_C( 0), UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_uint64x2_t r = simde_vcgtzq_f64(a); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x2_private a_ = simde_float64x2_to_private(simde_test_arm_neon_random_f64x2(-1000.0, 1000.0)); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0.0; } simde_float64x2_t a = simde_float64x2_from_private(a_); simde_uint64x2_t r = simde_vcgtzq_f64(a); simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgtzq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; uint8_t r[16]; } test_vec[] = { { { -INT8_C( 34), -INT8_C( 103), INT8_C( 113), INT8_C( 58), INT8_C( 32), -INT8_C( 105), INT8_C( 28), -INT8_C( 43), INT8_C( 0), -INT8_C( 67), INT8_C( 24), INT8_C( 0), -INT8_C( 15), INT8_C( 0), INT8_C( 0), INT8_C( 0) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { INT8_C( 85), INT8_C( 0), INT8_C( 0), INT8_C( 118), INT8_C( 0), INT8_C( 99), INT8_C( 75), INT8_C( 0), INT8_C( 32), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 113), -INT8_C( 28), INT8_C( 121), INT8_C( 0) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } }, { { -INT8_C( 72), INT8_C( 0), INT8_C( 0), INT8_C( 9), INT8_C( 0), INT8_C( 0), -INT8_C( 122), -INT8_C( 64), INT8_C( 122), INT8_C( 0), -INT8_C( 46), INT8_C( 10), INT8_C( 62), INT8_C( 75), INT8_C( 0), -INT8_C( 5) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { INT8_C( 27), INT8_C( 14), INT8_C( 0), -INT8_C( 69), INT8_C( 37), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 87), -INT8_C( 112), -INT8_C( 104), INT8_C( 0), INT8_C( 0), -INT8_C( 106) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { INT8_C( 80), INT8_C( 0), INT8_C( 0), INT8_C( 117), INT8_C( 0), -INT8_C( 15), INT8_C( 21), INT8_C( 0), INT8_C( 62), -INT8_C( 66), INT8_C( 88), INT8_C( 0), INT8_C( 82), INT8_C( 0), INT8_C( 108), INT8_C( 0) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } }, { { INT8_C( 0), INT8_C( 41), INT8_C( 0), -INT8_C( 117), INT8_C( 26), -INT8_C( 74), INT8_C( 83), INT8_C( 88), INT8_C( 0), -INT8_C( 85), INT8_C( 46), -INT8_C( 57), -INT8_C( 114), INT8_C( 0), -INT8_C( 81), -INT8_C( 18) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { INT8_C( 52), INT8_C( 79), INT8_C( 0), INT8_C( 78), INT8_C( 5), INT8_C( 0), INT8_C( 0), INT8_C( 122), -INT8_C( 17), -INT8_C( 44), INT8_C( 65), INT8_C( 125), INT8_C( 0), -INT8_C( 16), INT8_C( 0), -INT8_C( 67) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { INT8_C( 16), INT8_C( 0), -INT8_C( 84), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 61), INT8_C( 38), -INT8_C( 47), INT8_C( 64), -INT8_C( 107), INT8_C( 0), -INT8_C( 85), INT8_C( 82), INT8_C( 34) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_uint8x16_t r = simde_vcgtzq_s8(a); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_private a_ = simde_int8x16_to_private(simde_test_arm_neon_random_i8x16()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0; } simde_int8x16_t a = simde_int8x16_from_private(a_); simde_uint8x16_t r = simde_vcgtzq_s8(a); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgtzq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; uint16_t r[8]; } test_vec[] = { { { -INT16_C( 13787), INT16_C( 10471), INT16_C( 31422), INT16_C( 25101), INT16_C( 7709), INT16_C( 12583), INT16_C( 0), INT16_C( 5018) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { INT16_C( 13969), -INT16_C( 22918), INT16_C( 14574), INT16_C( 4890), INT16_C( 258), -INT16_C( 16325), INT16_C( 18812), INT16_C( 0) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { INT16_C( 14880), INT16_C( 0), INT16_C( 0), -INT16_C( 28010), INT16_C( 4584), -INT16_C( 10696), INT16_C( 21321), INT16_C( 19433) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { INT16_C( 13433), INT16_C( 0), -INT16_C( 18791), INT16_C( 0), -INT16_C( 24592), -INT16_C( 3774), -INT16_C( 10159), INT16_C( 14724) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, { { INT16_C( 0), -INT16_C( 29900), INT16_C( 0), INT16_C( 12896), INT16_C( 0), INT16_C( 27757), -INT16_C( 18989), INT16_C( 0) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { INT16_C( 0), INT16_C( 1371), INT16_C( 0), INT16_C( 873), -INT16_C( 25245), INT16_C( 0), -INT16_C( 4292), INT16_C( 3919) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, { { INT16_C( 22279), INT16_C( 18026), INT16_C( 0), -INT16_C( 31377), -INT16_C( 13515), INT16_C( 6794), -INT16_C( 3164), INT16_C( 2077) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { INT16_C( 22319), -INT16_C( 24443), INT16_C( 10231), -INT16_C( 411), -INT16_C( 12418), INT16_C( 0), -INT16_C( 19433), INT16_C( 0) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_uint16x8_t r = simde_vcgtzq_s16(a); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_private a_ = simde_int16x8_to_private(simde_test_arm_neon_random_i16x8()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0; } simde_int16x8_t a = simde_int16x8_from_private(a_); simde_uint16x8_t r = simde_vcgtzq_s16(a); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgtzq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; uint32_t r[4]; } test_vec[] = { { { INT32_C( 0), INT32_C( 551830307), INT32_C( 0), INT32_C( 0) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_C( 0) } }, { { INT32_C( 0), -INT32_C( 2099009646), -INT32_C( 383656512), INT32_C( 0) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { INT32_C( 0), -INT32_C( 1902038387), INT32_C( 2002197176), INT32_C( 0) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { INT32_C( 0), -INT32_C( 1228072830), INT32_C( 1882166330), INT32_C( 0) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { INT32_C( 678303692), INT32_C( 0), INT32_C( 0), -INT32_C( 1645960934) }, { UINT32_MAX, UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { -INT32_C( 362511803), -INT32_C( 836995953), -INT32_C( 238691291), INT32_C( 0) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { INT32_C( 0), -INT32_C( 1972289328), INT32_C( 931055089), -INT32_C( 702422457) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { INT32_C( 242051610), INT32_C( 614696862), INT32_C( 1925385656), INT32_C( 222764860) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_uint32x4_t r = simde_vcgtzq_s32(a); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_private a_ = simde_int32x4_to_private(simde_test_arm_neon_random_i32x4()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0; } simde_int32x4_t a = simde_int32x4_from_private(a_); simde_uint32x4_t r = simde_vcgtzq_s32(a); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcgtzq_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; uint64_t r[2]; } test_vec[] = { { { -INT64_C( 2722594793879081836), -INT64_C( 7336936062086546730) }, { UINT64_C( 0), UINT64_C( 0) } }, { { -INT64_C( 5132795313325816595), INT64_C( 1914458200832322093) }, { UINT64_C( 0), UINT64_MAX } }, { { -INT64_C( 464587147821614976), INT64_C( 0) }, { UINT64_C( 0), UINT64_C( 0) } }, { { -INT64_C( 356795487879775814), -INT64_C( 8887970811507008019) }, { UINT64_C( 0), UINT64_C( 0) } }, { { -INT64_C( 2556735944492090887), INT64_C( 0) }, { UINT64_C( 0), UINT64_C( 0) } }, { { -INT64_C( 2440868277195989500), INT64_C( 0) }, { UINT64_C( 0), UINT64_C( 0) } }, { { INT64_C( 0), INT64_C( 2747886199500617458) }, { UINT64_C( 0), UINT64_MAX } }, { { INT64_C( 3323549104968795506), INT64_C( 3818980872865146362) }, { UINT64_MAX, UINT64_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_uint64x2_t r = simde_vcgtzq_s64(a); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_private a_ = simde_int64x2_to_private(simde_test_arm_neon_random_i64x2()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0; } simde_int64x2_t a = simde_int64x2_from_private(a_); simde_uint64x2_t r = simde_vcgtzq_s64(a); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vcgtz_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtz_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtz_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtz_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtz_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtz_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtzq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtzq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtzq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtzq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtzq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtzq_s64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/cle.c000066400000000000000000002272631400333146700162420ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN cle #include "test-neon.h" /* Check that both of these work */ #if defined(__cplusplus) #include "../../../simde/arm/neon/cle.h" #else #include "../../../simde/arm/neon.h" #endif static int test_simde_vcle_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; simde_float32 b[2]; uint32_t r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( 791.10), SIMDE_FLOAT32_C( 796.86) }, { SIMDE_FLOAT32_C( 791.10), SIMDE_FLOAT32_C( 796.86) }, { UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( -191.39), SIMDE_FLOAT32_C( -320.84) }, { SIMDE_FLOAT32_C( 186.39), SIMDE_FLOAT32_C( -320.84) }, { UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 401.94), SIMDE_FLOAT32_C( -653.44) }, { SIMDE_FLOAT32_C( -526.24), SIMDE_FLOAT32_C( 298.37) }, { UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( 452.25), SIMDE_FLOAT32_C( 434.09) }, { SIMDE_FLOAT32_C( -83.69), SIMDE_FLOAT32_C( 224.54) }, { UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 828.36), SIMDE_FLOAT32_C( 113.27) }, { SIMDE_FLOAT32_C( -845.96), SIMDE_FLOAT32_C( 459.19) }, { UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( 705.27), SIMDE_FLOAT32_C( -712.20) }, { SIMDE_FLOAT32_C( 705.27), SIMDE_FLOAT32_C( 140.77) }, { UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 640.57), SIMDE_FLOAT32_C( -173.03) }, { SIMDE_FLOAT32_C( 839.60), SIMDE_FLOAT32_C( -173.03) }, { UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( -38.70), SIMDE_FLOAT32_C( -25.63) }, { SIMDE_FLOAT32_C( -38.70), SIMDE_FLOAT32_C( 357.76) }, { UINT32_MAX, UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_uint32x2_t r = simde_vcle_f32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x2_private a_ = simde_float32x2_to_private(simde_test_arm_neon_random_f32x2(-1000.0, 1000.0)); simde_float32x2_private b_ = simde_float32x2_to_private(simde_test_arm_neon_random_f32x2(-1000.0, 1000.0)); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_float32x2_t a = simde_float32x2_from_private(a_); simde_float32x2_t b = simde_float32x2_from_private(b_); simde_uint32x2_t r = simde_vcle_f32(a, b); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcle_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[1]; simde_float64 b[1]; uint64_t r[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( -975.50) }, { SIMDE_FLOAT64_C( 222.47) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( 986.68) }, { SIMDE_FLOAT64_C( 986.68) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( 852.29) }, { SIMDE_FLOAT64_C( 445.48) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -413.29) }, { SIMDE_FLOAT64_C( 220.80) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( 33.63) }, { SIMDE_FLOAT64_C( -513.00) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -232.79) }, { SIMDE_FLOAT64_C( -316.14) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -848.20) }, { SIMDE_FLOAT64_C( -848.20) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( 855.66) }, { SIMDE_FLOAT64_C( 630.93) }, { UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_t b = simde_vld1_f64(test_vec[i].b); simde_uint64x1_t r = simde_vcle_f64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x1_private a_ = simde_float64x1_to_private(simde_test_arm_neon_random_f64x1(-1000.0, 1000.0)); simde_float64x1_private b_ = simde_float64x1_to_private(simde_test_arm_neon_random_f64x1(-1000.0, 1000.0)); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_float64x1_t a = simde_float64x1_from_private(a_); simde_float64x1_t b = simde_float64x1_from_private(b_); simde_uint64x1_t r = simde_vcle_f64(a, b); simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcle_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t b[8]; uint8_t r[8]; } test_vec[] = { { { -INT8_C( 100), INT8_C( 76), INT8_C( 123), -INT8_C( 90), INT8_C( 77), INT8_C( 7), INT8_C( 31), -INT8_C( 9) }, { -INT8_C( 100), INT8_C( 76), -INT8_C( 104), -INT8_C( 125), INT8_C( 74), INT8_C( 7), INT8_C( 62), -INT8_C( 74) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { INT8_C( 48), -INT8_C( 56), -INT8_C( 4), INT8_C( 49), INT8_C( 88), INT8_C( 74), INT8_C( 116), -INT8_C( 12) }, { INT8_C( 48), -INT8_C( 14), -INT8_C( 107), INT8_C( 125), INT8_C( 88), -INT8_C( 76), INT8_C( 116), -INT8_C( 12) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { INT8_C( 84), INT8_C( 26), -INT8_C( 64), INT8_C( 55), INT8_C( 61), INT8_C( 40), -INT8_C( 96), INT8_C( 111) }, { -INT8_C( 15), INT8_C( 26), -INT8_C( 96), -INT8_C( 79), INT8_C( 100), INT8_C( 23), -INT8_C( 96), -INT8_C( 107) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } }, { { -INT8_C( 108), -INT8_C( 49), INT8_C( 52), -INT8_C( 88), -INT8_C( 94), INT8_C( 47), -INT8_C( 71), -INT8_C( 38) }, { INT8_C( 55), INT8_C( 121), INT8_C( 17), INT8_C( 117), -INT8_C( 94), INT8_C( 47), -INT8_C( 28), -INT8_C( 109) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { INT8_C( 46), INT8_C( 32), INT8_C( 8), INT8_C( 2), -INT8_C( 36), -INT8_C( 46), -INT8_C( 20), -INT8_C( 115) }, { INT8_C( 46), INT8_C( 32), INT8_C( 25), -INT8_C( 76), INT8_C( 86), -INT8_C( 46), -INT8_C( 114), -INT8_C( 115) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, { { INT8_C( 95), -INT8_C( 59), -INT8_C( 58), INT8_C( 59), -INT8_C( 87), -INT8_C( 79), -INT8_C( 83), -INT8_C( 61) }, { INT8_C( 95), -INT8_C( 75), -INT8_C( 59), INT8_C( 59), INT8_C( 21), -INT8_C( 79), -INT8_C( 84), INT8_C( 67) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, { { INT8_C( 21), -INT8_C( 45), INT8_C( 16), -INT8_C( 12), -INT8_C( 98), -INT8_C( 67), -INT8_C( 126), INT8_C( 59) }, { INT8_C( 21), -INT8_C( 45), INT8_C( 16), -INT8_C( 65), -INT8_C( 36), -INT8_C( 67), -INT8_C( 126), INT8_C( 59) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { -INT8_C( 24), -INT8_C( 61), INT8_C( 97), -INT8_C( 1), -INT8_C( 126), INT8_C( 110), INT8_C( 8), INT8_C( 109) }, { INT8_C( 96), INT8_C( 49), INT8_C( 97), -INT8_C( 1), -INT8_C( 126), INT8_C( 110), INT8_C( 8), -INT8_C( 105) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_uint8x8_t r = simde_vcle_s8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_private a_ = simde_int8x8_to_private(simde_test_arm_neon_random_i8x8()); simde_int8x8_private b_ = simde_int8x8_to_private(simde_test_arm_neon_random_i8x8()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int8x8_t a = simde_int8x8_from_private(a_); simde_int8x8_t b = simde_int8x8_from_private(b_); simde_uint8x8_t r = simde_vcle_s8(a, b); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcle_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t b[4]; uint16_t r[4]; } test_vec[] = { { { -INT16_C( 28046), INT16_C( 20567), -INT16_C( 9917), INT16_C( 11469) }, { -INT16_C( 19367), INT16_C( 18908), INT16_C( 10036), -INT16_C( 3500) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { INT16_C( 6763), INT16_C( 15162), -INT16_C( 26685), -INT16_C( 10020) }, { INT16_C( 794), -INT16_C( 29559), -INT16_C( 8043), -INT16_C( 10020) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { INT16_C( 31190), -INT16_C( 28069), -INT16_C( 20728), INT16_C( 22148) }, { INT16_C( 31190), INT16_C( 16835), -INT16_C( 621), INT16_C( 22141) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { INT16_C( 25501), INT16_C( 12858), INT16_C( 5699), -INT16_C( 1014) }, { INT16_C( 3775), INT16_C( 7438), INT16_C( 27375), -INT16_C( 2129) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { INT16_C( 28744), INT16_C( 16177), -INT16_C( 20978), -INT16_C( 23914) }, { INT16_C( 28744), -INT16_C( 6831), -INT16_C( 29741), INT16_C( 5655) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, { { INT16_C( 8239), INT16_C( 8062), -INT16_C( 15244), -INT16_C( 23530) }, { INT16_C( 25696), INT16_C( 3219), -INT16_C( 15244), -INT16_C( 32180) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { INT16_C( 19927), INT16_C( 1797), -INT16_C( 31891), -INT16_C( 23749) }, { INT16_C( 19927), INT16_C( 1797), -INT16_C( 31891), -INT16_C( 2010) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { INT16_C( 12192), -INT16_C( 25055), -INT16_C( 16334), INT16_C( 26263) }, { -INT16_C( 17332), -INT16_C( 25055), -INT16_C( 16334), INT16_C( 13507) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_uint16x4_t r = simde_vcle_s16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_private a_ = simde_int16x4_to_private(simde_test_arm_neon_random_i16x4()); simde_int16x4_private b_ = simde_int16x4_to_private(simde_test_arm_neon_random_i16x4()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int16x4_t a = simde_int16x4_from_private(a_); simde_int16x4_t b = simde_int16x4_from_private(b_); simde_uint16x4_t r = simde_vcle_s16(a, b); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcle_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t b[2]; uint32_t r[2]; } test_vec[] = { { { -INT32_C( 1899112561), -INT32_C( 2083733649) }, { -INT32_C( 1426571674), -INT32_C( 8485340) }, { UINT32_MAX, UINT32_MAX } }, { { -INT32_C( 860232052), INT32_C( 1455351675) }, { INT32_C( 62249150), -INT32_C( 2099558987) }, { UINT32_MAX, UINT32_C( 0) } }, { { -INT32_C( 1462919358), INT32_C( 2113244612) }, { INT32_C( 251777099), INT32_C( 2113244612) }, { UINT32_MAX, UINT32_MAX } }, { { INT32_C( 583472996), INT32_C( 422865227) }, { -INT32_C( 1792605758), INT32_C( 422865227) }, { UINT32_C( 0), UINT32_MAX } }, { { INT32_C( 705979358), -INT32_C( 147253965) }, { -INT32_C( 59494494), INT32_C( 1729353888) }, { UINT32_C( 0), UINT32_MAX } }, { { INT32_C( 1267101753), INT32_C( 1832636450) }, { INT32_C( 1267101753), -INT32_C( 1766955646) }, { UINT32_MAX, UINT32_C( 0) } }, { { INT32_C( 1306118315), -INT32_C( 1756803337) }, { INT32_C( 570383654), INT32_C( 155378528) }, { UINT32_C( 0), UINT32_MAX } }, { { INT32_C( 315803097), -INT32_C( 954861330) }, { INT32_C( 1280117949), -INT32_C( 954861330) }, { UINT32_MAX, UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_uint32x2_t r = simde_vcle_s32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_private a_ = simde_int32x2_to_private(simde_test_arm_neon_random_i32x2()); simde_int32x2_private b_ = simde_int32x2_to_private(simde_test_arm_neon_random_i32x2()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int32x2_t a = simde_int32x2_from_private(a_); simde_int32x2_t b = simde_int32x2_from_private(b_); simde_uint32x2_t r = simde_vcle_s32(a, b); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcle_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[1]; int64_t b[1]; uint64_t r[1]; } test_vec[] = { { { INT64_C( 8637217986458171406) }, { INT64_C( 7326912451601565845) }, { UINT64_C( 0) } }, { { INT64_C( 6706107946540577436) }, { INT64_C( 8712933937820291348) }, { UINT64_MAX } }, { { INT64_C( 646593032519645204) }, { INT64_C( 2273665174117615427) }, { UINT64_MAX } }, { { INT64_C( 6595974795757143079) }, { -INT64_C( 5082613164841186952) }, { UINT64_C( 0) } }, { { -INT64_C( 5146632559931370274) }, { -INT64_C( 5146632559931370274) }, { UINT64_MAX } }, { { INT64_C( 2241975906344229579) }, { -INT64_C( 4514700189641056645) }, { UINT64_C( 0) } }, { { INT64_C( 7922964899174052536) }, { INT64_C( 8849835638844418162) }, { UINT64_MAX } }, { { -INT64_C( 2477221896963707100) }, { -INT64_C( 1502892239977859308) }, { UINT64_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_uint64x1_t r = simde_vcle_s64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_private a_ = simde_int64x1_to_private(simde_test_arm_neon_random_i64x1()); simde_int64x1_private b_ = simde_int64x1_to_private(simde_test_arm_neon_random_i64x1()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int64x1_t a = simde_int64x1_from_private(a_); simde_int64x1_t b = simde_int64x1_from_private(b_); simde_uint64x1_t r = simde_vcle_s64(a, b); simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcle_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint8_t b[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C(192), UINT8_C( 53), UINT8_C(203), UINT8_C( 45), UINT8_C( 48), UINT8_C(108), UINT8_C(220), UINT8_C( 89) }, { UINT8_C(196), UINT8_MAX, UINT8_C(126), UINT8_C(235), UINT8_C(101), UINT8_C( 94), UINT8_C( 93), UINT8_C(247) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { UINT8_C(249), UINT8_C( 38), UINT8_C(221), UINT8_C( 5), UINT8_C(109), UINT8_C(160), UINT8_C( 54), UINT8_C(211) }, { UINT8_C(213), UINT8_C( 1), UINT8_C( 0), UINT8_C( 5), UINT8_C(109), UINT8_C(221), UINT8_C( 94), UINT8_C( 50) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C( 11), UINT8_C(179), UINT8_C( 44), UINT8_C( 80), UINT8_C( 92), UINT8_C(169), UINT8_C( 45), UINT8_C( 85) }, { UINT8_C(178), UINT8_C( 10), UINT8_C(115), UINT8_C(197), UINT8_C(170), UINT8_C(169), UINT8_C(153), UINT8_C(127) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C(191), UINT8_C(103), UINT8_C( 62), UINT8_C(250), UINT8_C(226), UINT8_C(203), UINT8_C( 48), UINT8_C(189) }, { UINT8_C(127), UINT8_C(126), UINT8_C( 62), UINT8_C(219), UINT8_C( 10), UINT8_C(107), UINT8_C( 48), UINT8_C(189) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { UINT8_C(192), UINT8_C( 36), UINT8_C( 15), UINT8_C( 43), UINT8_C(221), UINT8_C( 89), UINT8_C(126), UINT8_C(199) }, { UINT8_C(192), UINT8_C( 17), UINT8_C(193), UINT8_C(163), UINT8_C(221), UINT8_C( 19), UINT8_C(144), UINT8_C( 92) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C( 11), UINT8_C( 93), UINT8_C(208), UINT8_C( 87), UINT8_C(182), UINT8_C( 85), UINT8_C( 78), UINT8_C(171) }, { UINT8_C(149), UINT8_C( 93), UINT8_C(215), UINT8_C(156), UINT8_C(182), UINT8_C( 85), UINT8_C( 99), UINT8_C(119) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C(121), UINT8_C( 51), UINT8_C(100), UINT8_C(179), UINT8_C(164), UINT8_C(189), UINT8_C( 99), UINT8_C( 73) }, { UINT8_C(152), UINT8_C( 51), UINT8_C(160), UINT8_C(142), UINT8_C(164), UINT8_C(239), UINT8_C( 58), UINT8_C( 57) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 94), UINT8_C(148), UINT8_C( 16), UINT8_C(150), UINT8_C( 62), UINT8_C(205), UINT8_C( 13), UINT8_C(183) }, { UINT8_C(134), UINT8_C(195), UINT8_C(106), UINT8_C(196), UINT8_C(128), UINT8_C(205), UINT8_C( 13), UINT8_C( 24) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vcle_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_private a_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); simde_uint8x8_private b_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint8x8_t a = simde_uint8x8_from_private(a_); simde_uint8x8_t b = simde_uint8x8_from_private(b_); simde_uint8x8_t r = simde_vcle_u8(a, b); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcle_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(15609), UINT16_C(41202), UINT16_C(28612), UINT16_C(11885) }, { UINT16_C(38027), UINT16_C(32348), UINT16_C(58641), UINT16_C(31115) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { UINT16_C(25440), UINT16_C(54415), UINT16_C(51022), UINT16_C(25593) }, { UINT16_C(25440), UINT16_C(22787), UINT16_C(62879), UINT16_C(25593) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { UINT16_C(61179), UINT16_C(35880), UINT16_C(46853), UINT16_C(13190) }, { UINT16_C(35022), UINT16_C(35880), UINT16_C(46853), UINT16_C(21600) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C( 7833), UINT16_C(38000), UINT16_C(12828), UINT16_C(57248) }, { UINT16_C( 7833), UINT16_C(38000), UINT16_C(56844), UINT16_C(57248) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(15022), UINT16_C(46130), UINT16_C(37618), UINT16_C(28936) }, { UINT16_C(11192), UINT16_C(16976), UINT16_C(35154), UINT16_C(28523) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C( 5040), UINT16_C( 8681), UINT16_C(35135), UINT16_C( 5888) }, { UINT16_C( 5040), UINT16_C(24253), UINT16_C(61261), UINT16_C(16146) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(63454), UINT16_C(62619), UINT16_C(59274), UINT16_C(17927) }, { UINT16_C(63454), UINT16_C(62619), UINT16_C(33880), UINT16_C(38677) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { UINT16_C(26185), UINT16_C(30236), UINT16_C(12122), UINT16_C(56246) }, { UINT16_C(26185), UINT16_C(36629), UINT16_C(37223), UINT16_C(61992) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vcle_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_private a_ = simde_uint16x4_to_private(simde_test_arm_neon_random_u16x4()); simde_uint16x4_private b_ = simde_uint16x4_to_private(simde_test_arm_neon_random_u16x4()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint16x4_t a = simde_uint16x4_from_private(a_); simde_uint16x4_t b = simde_uint16x4_from_private(b_); simde_uint16x4_t r = simde_vcle_u16(a, b); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcle_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C(3076618288), UINT32_C( 170876675) }, { UINT32_C(1243736868), UINT32_C(3393524517) }, { UINT32_C( 0), UINT32_MAX } }, { { UINT32_C( 403272314), UINT32_C(3921604021) }, { UINT32_C( 403272314), UINT32_C(1704016132) }, { UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C(3919454404), UINT32_C( 808697611) }, { UINT32_C(4277828050), UINT32_C(1259427829) }, { UINT32_MAX, UINT32_MAX } }, { { UINT32_C(3014666041), UINT32_C(2399389937) }, { UINT32_C(3808648435), UINT32_C(2399389937) }, { UINT32_MAX, UINT32_MAX } }, { { UINT32_C( 557769807), UINT32_C(1394620765) }, { UINT32_C(4187894160), UINT32_C(2820724728) }, { UINT32_MAX, UINT32_MAX } }, { { UINT32_C(1415009889), UINT32_C(2738379698) }, { UINT32_C(3560127267), UINT32_C(2293793354) }, { UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C(1382663105), UINT32_C(3981117429) }, { UINT32_C(1771400159), UINT32_C(2092955172) }, { UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C(4195714007), UINT32_C( 87118492) }, { UINT32_C(1855030986), UINT32_C( 87118492) }, { UINT32_C( 0), UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vcle_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_private a_ = simde_uint32x2_to_private(simde_test_arm_neon_random_u32x2()); simde_uint32x2_private b_ = simde_uint32x2_to_private(simde_test_arm_neon_random_u32x2()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint32x2_t a = simde_uint32x2_from_private(a_); simde_uint32x2_t b = simde_uint32x2_from_private(b_); simde_uint32x2_t r = simde_vcle_u32(a, b); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcle_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[1]; uint64_t b[1]; uint64_t r[1]; } test_vec[] = { { { UINT64_C(17342515446514902425) }, { UINT64_C(17342515446514902425) }, { UINT64_MAX } }, { { UINT64_C( 3327773999065374941) }, { UINT64_C( 300137521445377989) }, { UINT64_C( 0) } }, { { UINT64_C( 7235656385619221034) }, { UINT64_C(16482773610270905397) }, { UINT64_MAX } }, { { UINT64_C( 7613065674013128417) }, { UINT64_C( 120471459712082775) }, { UINT64_C( 0) } }, { { UINT64_C(14955085576460449004) }, { UINT64_C(14955085576460449004) }, { UINT64_MAX } }, { { UINT64_C(15304861135706240155) }, { UINT64_C(12909976860436499532) }, { UINT64_C( 0) } }, { { UINT64_C(12171332214848831173) }, { UINT64_C( 9589066105881297644) }, { UINT64_C( 0) } }, { { UINT64_C(14169459702986888117) }, { UINT64_C(14169459702986888117) }, { UINT64_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_t b = simde_vld1_u64(test_vec[i].b); simde_uint64x1_t r = simde_vcle_u64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x1_private a_ = simde_uint64x1_to_private(simde_test_arm_neon_random_u64x1()); simde_uint64x1_private b_ = simde_uint64x1_to_private(simde_test_arm_neon_random_u64x1()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint64x1_t a = simde_uint64x1_from_private(a_); simde_uint64x1_t b = simde_uint64x1_from_private(b_); simde_uint64x1_t r = simde_vcle_u64(a, b); simde_test_arm_neon_write_u64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcleq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; simde_float32 b[4]; uint32_t r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -668.90), SIMDE_FLOAT32_C( 928.96), SIMDE_FLOAT32_C( 525.27), SIMDE_FLOAT32_C( 244.82) }, { SIMDE_FLOAT32_C( 811.79), SIMDE_FLOAT32_C( 687.37), SIMDE_FLOAT32_C( 277.72), SIMDE_FLOAT32_C( 244.82) }, { UINT32_MAX, UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( 696.43), SIMDE_FLOAT32_C( -857.84), SIMDE_FLOAT32_C( 965.32), SIMDE_FLOAT32_C( 876.96) }, { SIMDE_FLOAT32_C( -631.72), SIMDE_FLOAT32_C( -857.84), SIMDE_FLOAT32_C( -893.87), SIMDE_FLOAT32_C( 876.96) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( -771.07), SIMDE_FLOAT32_C( 967.77), SIMDE_FLOAT32_C( 429.01), SIMDE_FLOAT32_C( 260.39) }, { SIMDE_FLOAT32_C( 702.86), SIMDE_FLOAT32_C( 636.28), SIMDE_FLOAT32_C( 429.01), SIMDE_FLOAT32_C( -966.04) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -358.35), SIMDE_FLOAT32_C( 497.18), SIMDE_FLOAT32_C( -378.16), SIMDE_FLOAT32_C( 420.17) }, { SIMDE_FLOAT32_C( -508.71), SIMDE_FLOAT32_C( -602.79), SIMDE_FLOAT32_C( 676.13), SIMDE_FLOAT32_C( -812.29) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -521.08), SIMDE_FLOAT32_C( -618.79), SIMDE_FLOAT32_C( 530.55), SIMDE_FLOAT32_C( -292.15) }, { SIMDE_FLOAT32_C( -521.08), SIMDE_FLOAT32_C( -618.79), SIMDE_FLOAT32_C( 223.55), SIMDE_FLOAT32_C( -292.15) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( -808.90), SIMDE_FLOAT32_C( 397.25), SIMDE_FLOAT32_C( 133.37), SIMDE_FLOAT32_C( -6.83) }, { SIMDE_FLOAT32_C( -648.47), SIMDE_FLOAT32_C( -694.75), SIMDE_FLOAT32_C( 133.37), SIMDE_FLOAT32_C( -6.83) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 730.94), SIMDE_FLOAT32_C( -785.96), SIMDE_FLOAT32_C( 91.33), SIMDE_FLOAT32_C( 514.55) }, { SIMDE_FLOAT32_C( 730.94), SIMDE_FLOAT32_C( -785.96), SIMDE_FLOAT32_C( 91.33), SIMDE_FLOAT32_C( 514.55) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( -626.37), SIMDE_FLOAT32_C( 818.81), SIMDE_FLOAT32_C( 403.89), SIMDE_FLOAT32_C( 352.06) }, { SIMDE_FLOAT32_C( -626.37), SIMDE_FLOAT32_C( 207.79), SIMDE_FLOAT32_C( 403.89), SIMDE_FLOAT32_C( -435.27) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_uint32x4_t r = simde_vcleq_f32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_private a_ = simde_float32x4_to_private(simde_test_arm_neon_random_f32x4(-1000.0, 1000.0)); simde_float32x4_private b_ = simde_float32x4_to_private(simde_test_arm_neon_random_f32x4(-1000.0, 1000.0)); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_float32x4_t a = simde_float32x4_from_private(a_); simde_float32x4_t b = simde_float32x4_from_private(b_); simde_uint32x4_t r = simde_vcleq_f32(a, b); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcleq_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[2]; simde_float64 b[2]; uint64_t r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 680.17), SIMDE_FLOAT64_C( -515.06) }, { SIMDE_FLOAT64_C( 718.41), SIMDE_FLOAT64_C( -515.06) }, { UINT64_MAX, UINT64_MAX } }, { { SIMDE_FLOAT64_C( 113.48), SIMDE_FLOAT64_C( -153.24) }, { SIMDE_FLOAT64_C( -711.61), SIMDE_FLOAT64_C( -153.24) }, { UINT64_C( 0), UINT64_MAX } }, { { SIMDE_FLOAT64_C( -988.69), SIMDE_FLOAT64_C( 305.46) }, { SIMDE_FLOAT64_C( 833.43), SIMDE_FLOAT64_C( 305.46) }, { UINT64_MAX, UINT64_MAX } }, { { SIMDE_FLOAT64_C( 993.85), SIMDE_FLOAT64_C( -613.57) }, { SIMDE_FLOAT64_C( -18.63), SIMDE_FLOAT64_C( -132.48) }, { UINT64_C( 0), UINT64_MAX } }, { { SIMDE_FLOAT64_C( 720.10), SIMDE_FLOAT64_C( -120.39) }, { SIMDE_FLOAT64_C( 800.59), SIMDE_FLOAT64_C( -120.39) }, { UINT64_MAX, UINT64_MAX } }, { { SIMDE_FLOAT64_C( 850.21), SIMDE_FLOAT64_C( 642.71) }, { SIMDE_FLOAT64_C( 850.21), SIMDE_FLOAT64_C( -236.33) }, { UINT64_MAX, UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -275.07), SIMDE_FLOAT64_C( 271.93) }, { SIMDE_FLOAT64_C( -275.07), SIMDE_FLOAT64_C( 271.93) }, { UINT64_MAX, UINT64_MAX } }, { { SIMDE_FLOAT64_C( 897.28), SIMDE_FLOAT64_C( -900.81) }, { SIMDE_FLOAT64_C( 83.69), SIMDE_FLOAT64_C( 730.71) }, { UINT64_C( 0), UINT64_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_uint64x2_t r = simde_vcleq_f64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x2_private a_ = simde_float64x2_to_private(simde_test_arm_neon_random_f64x2(-1000.0, 1000.0)); simde_float64x2_private b_ = simde_float64x2_to_private(simde_test_arm_neon_random_f64x2(-1000.0, 1000.0)); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_float64x2_t a = simde_float64x2_from_private(a_); simde_float64x2_t b = simde_float64x2_from_private(b_); simde_uint64x2_t r = simde_vcleq_f64(a, b); simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcleq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t b[16]; uint8_t r[16]; } test_vec[] = { { { INT8_C( 121), INT8_C( 94), INT8_C( 116), INT8_C( 97), -INT8_C( 79), -INT8_C( 19), -INT8_C( 113), INT8_C( 103), -INT8_C( 39), INT8_C( 1), INT8_C( 117), INT8_C( 6), INT8_C( 123), INT8_C( 85), INT8_C( 36), -INT8_C( 83) }, { -INT8_C( 14), INT8_C( 94), -INT8_C( 68), INT8_C( 97), -INT8_C( 79), -INT8_C( 19), -INT8_C( 34), INT8_C( 103), -INT8_C( 112), -INT8_C( 52), INT8_C( 117), INT8_C( 6), -INT8_C( 40), -INT8_C( 119), INT8_C( 36), INT8_C( 81) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { -INT8_C( 19), -INT8_C( 104), INT8_C( 93), -INT8_C( 98), -INT8_C( 123), INT8_C( 59), INT8_C( 52), INT8_C( 21), INT8_C( 7), INT8_C( 123), INT8_C( 27), -INT8_C( 33), -INT8_C( 84), INT8_C( 63), -INT8_C( 105), INT8_C( 93) }, { -INT8_C( 40), INT8_C( 83), -INT8_C( 118), -INT8_C( 103), INT8_C( 4), -INT8_C( 33), INT8_C( 52), -INT8_C( 73), -INT8_C( 109), INT8_C( 29), -INT8_C( 26), -INT8_C( 100), -INT8_C( 84), -INT8_C( 62), -INT8_C( 105), -INT8_C( 102) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } }, { { -INT8_C( 101), -INT8_C( 92), INT8_C( 56), -INT8_C( 97), -INT8_C( 124), -INT8_C( 11), -INT8_C( 90), -INT8_C( 120), INT8_C( 18), INT8_C( 61), -INT8_C( 77), -INT8_C( 65), -INT8_C( 1), INT8_C( 74), INT8_C( 89), -INT8_C( 96) }, { INT8_C( 63), -INT8_C( 111), INT8_C( 56), INT8_C( 111), -INT8_C( 49), INT8_C( 44), -INT8_C( 90), -INT8_C( 120), INT8_C( 59), -INT8_C( 67), INT8_C( 69), -INT8_C( 118), INT8_C( 5), INT8_C( 95), -INT8_C( 79), -INT8_C( 96) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, { { INT8_C( 35), INT8_C( 100), INT8_C( 74), -INT8_C( 14), -INT8_C( 112), -INT8_C( 15), INT8_C( 90), -INT8_C( 53), -INT8_C( 82), -INT8_C( 64), -INT8_C( 46), -INT8_C( 76), INT8_C( 32), -INT8_C( 2), INT8_C( 21), INT8_C( 36) }, { INT8_C( 121), -INT8_C( 108), -INT8_C( 84), -INT8_C( 32), INT8_C( 43), INT8_C( 75), INT8_C( 90), -INT8_C( 1), -INT8_C( 99), -INT8_C( 110), -INT8_C( 46), INT8_C( 57), INT8_C( 36), -INT8_C( 2), INT8_C( 21), INT8_C( 71) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { -INT8_C( 5), INT8_C( 101), -INT8_C( 19), -INT8_C( 57), -INT8_C( 69), -INT8_C( 49), INT8_C( 74), INT8_C( 77), INT8_C( 98), -INT8_C( 103), -INT8_C( 122), -INT8_C( 122), INT8_C( 5), -INT8_C( 101), INT8_C( 111), -INT8_C( 6) }, { -INT8_C( 5), INT8_C( 6), -INT8_C( 19), INT8_C( 75), -INT8_C( 69), -INT8_C( 85), INT8_C( 74), INT8_C( 48), -INT8_C( 65), -INT8_C( 3), -INT8_C( 59), -INT8_C( 38), INT8_C( 5), INT8_C( 126), INT8_C( 111), -INT8_C( 95) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { -INT8_C( 31), -INT8_C( 97), INT8_C( 20), INT8_C( 43), -INT8_C( 13), -INT8_C( 109), INT8_C( 91), -INT8_C( 78), -INT8_C( 112), INT8_C( 73), -INT8_C( 8), INT8_C( 92), -INT8_C( 98), -INT8_C( 4), INT8_C( 54), INT8_C( 34) }, { -INT8_C( 31), -INT8_C( 97), INT8_C( 20), -INT8_C( 106), -INT8_C( 50), -INT8_C( 12), -INT8_C( 84), -INT8_C( 105), INT8_C( 90), INT8_C( 73), -INT8_C( 8), INT8_C( 92), -INT8_C( 78), INT8_C( 84), INT8_C( 89), INT8_C( 34) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { INT8_C( 48), INT8_C( 91), INT8_C( 15), -INT8_C( 1), -INT8_C( 17), -INT8_C( 16), -INT8_C( 106), -INT8_C( 52), INT8_C( 57), -INT8_C( 9), -INT8_C( 90), -INT8_C( 19), -INT8_C( 119), INT8_C( 26), INT8_C( 14), -INT8_C( 71) }, { -INT8_C( 94), INT8_C( 91), INT8_C( 15), -INT8_C( 41), INT8_C( 4), INT8_C( 82), -INT8_C( 100), -INT8_C( 52), INT8_C( 33), -INT8_C( 9), INT8_C( 51), -INT8_C( 19), -INT8_C( 119), INT8_C( 26), -INT8_C( 101), -INT8_C( 71) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, { { INT8_C( 92), INT8_C( 80), -INT8_C( 106), -INT8_C( 7), -INT8_C( 94), -INT8_C( 27), INT8_C( 45), INT8_C( 43), -INT8_C( 37), INT8_C( 96), -INT8_C( 20), -INT8_C( 116), INT8_C( 122), INT8_C( 76), INT8_C( 69), -INT8_C( 112) }, { INT8_C( 42), -INT8_C( 2), -INT8_C( 106), -INT8_C( 7), INT8_C( 77), -INT8_C( 27), INT8_C( 0), INT8_C( 43), -INT8_C( 37), -INT8_C( 13), -INT8_C( 20), -INT8_C( 47), -INT8_C( 12), INT8_C( 44), INT8_C( 105), INT8_C( 81) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_uint8x16_t r = simde_vcleq_s8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_private a_ = simde_int8x16_to_private(simde_test_arm_neon_random_i8x16()); simde_int8x16_private b_ = simde_int8x16_to_private(simde_test_arm_neon_random_i8x16()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int8x16_t a = simde_int8x16_from_private(a_); simde_int8x16_t b = simde_int8x16_from_private(b_); simde_uint8x16_t r = simde_vcleq_s8(a, b); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcleq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t b[8]; uint16_t r[8]; } test_vec[] = { { { -INT16_C( 12376), INT16_C( 989), -INT16_C( 1994), -INT16_C( 15026), INT16_C( 5482), INT16_C( 30632), INT16_C( 13645), INT16_C( 30025) }, { -INT16_C( 30204), -INT16_C( 16382), -INT16_C( 1994), INT16_C( 28933), INT16_C( 5482), -INT16_C( 13335), -INT16_C( 23603), INT16_C( 30025) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, { { -INT16_C( 21413), -INT16_C( 29059), -INT16_C( 17695), -INT16_C( 6579), INT16_C( 20292), INT16_C( 31398), -INT16_C( 21689), -INT16_C( 19733) }, { -INT16_C( 10815), -INT16_C( 29059), -INT16_C( 14728), -INT16_C( 5373), INT16_C( 31725), INT16_C( 41), INT16_C( 11585), -INT16_C( 25430) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { INT16_C( 18145), INT16_C( 12), -INT16_C( 18753), -INT16_C( 26467), INT16_C( 22277), INT16_C( 2131), INT16_C( 17438), INT16_C( 2921) }, { -INT16_C( 27969), INT16_C( 12), -INT16_C( 18753), -INT16_C( 26467), -INT16_C( 7550), INT16_C( 2131), -INT16_C( 3212), INT16_C( 21970) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { INT16_C( 8367), INT16_C( 5669), -INT16_C( 1947), INT16_C( 9177), -INT16_C( 6713), -INT16_C( 31197), -INT16_C( 16229), -INT16_C( 3931) }, { INT16_C( 29346), INT16_C( 5669), -INT16_C( 1947), -INT16_C( 24725), -INT16_C( 5873), INT16_C( 22475), -INT16_C( 21951), -INT16_C( 3931) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { INT16_C( 30076), -INT16_C( 31559), INT16_C( 4405), -INT16_C( 10187), INT16_C( 23171), INT16_C( 29943), INT16_C( 18889), INT16_C( 25224) }, { INT16_C( 21571), -INT16_C( 31559), INT16_C( 24318), -INT16_C( 13964), INT16_C( 13007), INT16_C( 29943), INT16_C( 18889), INT16_C( 17888) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { -INT16_C( 17684), INT16_C( 19452), -INT16_C( 9964), -INT16_C( 3183), INT16_C( 23085), -INT16_C( 27930), INT16_C( 20409), INT16_C( 27898) }, { -INT16_C( 4990), INT16_C( 19452), -INT16_C( 9163), -INT16_C( 3183), -INT16_C( 4434), -INT16_C( 27930), -INT16_C( 19840), INT16_C( 27898) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { -INT16_C( 19801), INT16_C( 24700), INT16_C( 28673), -INT16_C( 31768), -INT16_C( 7076), -INT16_C( 28209), -INT16_C( 8142), INT16_C( 28292) }, { INT16_C( 27470), -INT16_C( 12799), -INT16_C( 1251), -INT16_C( 30406), -INT16_C( 6587), INT16_C( 26889), -INT16_C( 8142), -INT16_C( 9799) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { INT16_C( 12930), -INT16_C( 18978), INT16_C( 6214), -INT16_C( 27596), -INT16_C( 14131), INT16_C( 29496), -INT16_C( 25552), INT16_C( 29993) }, { INT16_C( 12930), -INT16_C( 18978), -INT16_C( 26606), -INT16_C( 23410), -INT16_C( 14131), INT16_C( 29496), INT16_C( 20457), -INT16_C( 4234) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_uint16x8_t r = simde_vcleq_s16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_private a_ = simde_int16x8_to_private(simde_test_arm_neon_random_i16x8()); simde_int16x8_private b_ = simde_int16x8_to_private(simde_test_arm_neon_random_i16x8()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int16x8_t a = simde_int16x8_from_private(a_); simde_int16x8_t b = simde_int16x8_from_private(b_); simde_uint16x8_t r = simde_vcleq_s16(a, b); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcleq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t b[4]; uint32_t r[4]; } test_vec[] = { { { -INT32_C( 1229097861), -INT32_C( 1438720539), -INT32_C( 1267867935), -INT32_C( 898897228) }, { -INT32_C( 1887772614), -INT32_C( 833866438), -INT32_C( 565620803), INT32_C( 653947819) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { INT32_C( 1987517076), -INT32_C( 1389697799), INT32_C( 142054862), -INT32_C( 1348996491) }, { -INT32_C( 461446361), -INT32_C( 574437582), INT32_C( 570670138), INT32_C( 149938292) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { -INT32_C( 508897369), -INT32_C( 1306519491), INT32_C( 828552970), -INT32_C( 854204262) }, { -INT32_C( 508897369), INT32_C( 134459028), -INT32_C( 2012155250), -INT32_C( 1568895144) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_C( 0) } }, { { INT32_C( 1076353589), INT32_C( 745638289), INT32_C( 284788585), -INT32_C( 218979490) }, { -INT32_C( 537201327), INT32_C( 1080560616), -INT32_C( 773593958), INT32_C( 910597377) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { INT32_C( 1630922999), -INT32_C( 848220561), INT32_C( 599811026), -INT32_C( 578552347) }, { INT32_C( 1602317253), INT32_C( 1345414223), INT32_C( 2055632642), -INT32_C( 578552347) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { -INT32_C( 1779764589), INT32_C( 1826480403), INT32_C( 1638785180), -INT32_C( 1698615733) }, { -INT32_C( 1779764589), INT32_C( 1326411881), INT32_C( 1378652781), INT32_C( 866544203) }, { UINT32_MAX, UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, { { INT32_C( 1444064233), -INT32_C( 190364247), INT32_C( 126773364), -INT32_C( 1013155750) }, { INT32_C( 1444064233), -INT32_C( 1985397187), -INT32_C( 1027846232), -INT32_C( 1041324009) }, { UINT32_MAX, UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { INT32_C( 1074834892), INT32_C( 1726878031), INT32_C( 724032953), -INT32_C( 866761330) }, { INT32_C( 743833476), INT32_C( 1726878031), INT32_C( 724032953), INT32_C( 809971300) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_uint32x4_t r = simde_vcleq_s32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_private a_ = simde_int32x4_to_private(simde_test_arm_neon_random_i32x4()); simde_int32x4_private b_ = simde_int32x4_to_private(simde_test_arm_neon_random_i32x4()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int32x4_t a = simde_int32x4_from_private(a_); simde_int32x4_t b = simde_int32x4_from_private(b_); simde_uint32x4_t r = simde_vcleq_s32(a, b); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcleq_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; int64_t b[2]; uint64_t r[2]; } test_vec[] = { { { INT64_C( 1875168629702030407), -INT64_C( 1585750502122760845) }, { INT64_C( 1154389533265603449), -INT64_C( 5056224679567743638) }, { UINT64_C( 0), UINT64_C( 0) } }, { { -INT64_C( 4677112269914828356), INT64_C( 802626250274728057) }, { INT64_C( 3150738932241681806), -INT64_C( 6912465892998042863) }, { UINT64_MAX, UINT64_C( 0) } }, { { -INT64_C( 8666382967555742), -INT64_C( 9215193599598120020) }, { INT64_C( 9189381345098103458), INT64_C( 7881448929588112162) }, { UINT64_MAX, UINT64_MAX } }, { { INT64_C( 4178212053687777766), -INT64_C( 7496719103440436358) }, { INT64_C( 232014033692395846), -INT64_C( 7496719103440436358) }, { UINT64_C( 0), UINT64_MAX } }, { { INT64_C( 4653644453121343583), INT64_C( 6586356358846238713) }, { INT64_C( 9203419145484562237), INT64_C( 7004173850729278199) }, { UINT64_MAX, UINT64_MAX } }, { { INT64_C( 8304329466640869147), INT64_C( 6665286354400067540) }, { INT64_C( 8304329466640869147), INT64_C( 6665286354400067540) }, { UINT64_MAX, UINT64_MAX } }, { { INT64_C( 7324496898826677287), INT64_C( 1947053861784541097) }, { INT64_C( 7324496898826677287), INT64_C( 5720880642963490271) }, { UINT64_MAX, UINT64_MAX } }, { { -INT64_C( 3451453209207438179), INT64_C( 7172065285969757399) }, { INT64_C( 5520034412839479216), INT64_C( 3082938850326232440) }, { UINT64_MAX, UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_uint64x2_t r = simde_vcleq_s64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_private a_ = simde_int64x2_to_private(simde_test_arm_neon_random_i64x2()); simde_int64x2_private b_ = simde_int64x2_to_private(simde_test_arm_neon_random_i64x2()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int64x2_t a = simde_int64x2_from_private(a_); simde_int64x2_t b = simde_int64x2_from_private(b_); simde_uint64x2_t r = simde_vcleq_s64(a, b); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcleq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(235), UINT8_C(181), UINT8_C(209), UINT8_C( 50), UINT8_C(176), UINT8_C( 16), UINT8_C(247), UINT8_C( 5), UINT8_C(145), UINT8_C( 67), UINT8_C( 27), UINT8_C(161), UINT8_C( 21), UINT8_C( 85), UINT8_C(225), UINT8_C(136) }, { UINT8_C(235), UINT8_C(141), UINT8_C(202), UINT8_C(238), UINT8_C(250), UINT8_C( 16), UINT8_C(247), UINT8_C( 5), UINT8_C( 85), UINT8_C(166), UINT8_C( 27), UINT8_C(128), UINT8_C( 21), UINT8_C(188), UINT8_C(228), UINT8_C(202) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C(217), UINT8_C( 62), UINT8_C(221), UINT8_C(211), UINT8_C( 78), UINT8_C(212), UINT8_C(189), UINT8_C(163), UINT8_C(123), UINT8_C(244), UINT8_C( 35), UINT8_C(144), UINT8_C(176), UINT8_C( 46), UINT8_C( 15), UINT8_C( 34) }, { UINT8_C(189), UINT8_C( 87), UINT8_C( 67), UINT8_C(244), UINT8_C(241), UINT8_C(109), UINT8_C(189), UINT8_C(205), UINT8_C( 28), UINT8_C( 40), UINT8_C(185), UINT8_C( 32), UINT8_C(115), UINT8_C( 46), UINT8_C( 15), UINT8_C( 76) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C(125), UINT8_C( 0), UINT8_C( 59), UINT8_C(110), UINT8_C(111), UINT8_C(248), UINT8_C(162), UINT8_C(139), UINT8_C( 43), UINT8_C(110), UINT8_C(130), UINT8_C(181), UINT8_C( 35), UINT8_C(186), UINT8_C(224), UINT8_C( 17) }, { UINT8_C(166), UINT8_C( 0), UINT8_C( 75), UINT8_C(103), UINT8_C(249), UINT8_C(169), UINT8_C(162), UINT8_C(229), UINT8_C( 43), UINT8_C(110), UINT8_C(130), UINT8_C(181), UINT8_C(148), UINT8_C( 65), UINT8_C(251), UINT8_C( 17) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { UINT8_C(196), UINT8_C(176), UINT8_C( 94), UINT8_C(155), UINT8_C( 89), UINT8_C(157), UINT8_C(163), UINT8_C(133), UINT8_C(136), UINT8_C( 37), UINT8_C( 58), UINT8_C( 45), UINT8_C(103), UINT8_C( 53), UINT8_C(104), UINT8_C(171) }, { UINT8_C(108), UINT8_C(232), UINT8_C( 94), UINT8_C(155), UINT8_C(164), UINT8_C(157), UINT8_C(234), UINT8_C( 86), UINT8_C(136), UINT8_C(206), UINT8_C( 43), UINT8_C( 45), UINT8_C(146), UINT8_C(144), UINT8_C(120), UINT8_C( 86) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C(235), UINT8_C( 74), UINT8_C(122), UINT8_C(143), UINT8_C(234), UINT8_C(127), UINT8_C( 6), UINT8_C(114), UINT8_C( 77), UINT8_C( 16), UINT8_C(159), UINT8_C(223), UINT8_C(162), UINT8_C( 15), UINT8_C(121), UINT8_C(224) }, { UINT8_C( 67), UINT8_C( 74), UINT8_C(122), UINT8_C(195), UINT8_C( 1), UINT8_C(152), UINT8_C( 6), UINT8_C(221), UINT8_C(241), UINT8_C(160), UINT8_C( 33), UINT8_C(127), UINT8_C(162), UINT8_C( 15), UINT8_C(121), UINT8_C(141) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C( 58), UINT8_C( 14), UINT8_C(119), UINT8_C( 59), UINT8_C(108), UINT8_C(125), UINT8_C( 6), UINT8_C(127), UINT8_MAX, UINT8_C( 58), UINT8_C( 22), UINT8_C(191), UINT8_C( 73), UINT8_C(143), UINT8_C( 77), UINT8_C(166) }, { UINT8_C(157), UINT8_C(106), UINT8_C(237), UINT8_C( 43), UINT8_C(108), UINT8_C(166), UINT8_C( 6), UINT8_C(127), UINT8_MAX, UINT8_C(192), UINT8_C( 51), UINT8_C(112), UINT8_C(177), UINT8_C(199), UINT8_C( 36), UINT8_C(235) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, { { UINT8_C(172), UINT8_C( 86), UINT8_C(129), UINT8_C( 24), UINT8_C( 75), UINT8_C(245), UINT8_C(152), UINT8_C(197), UINT8_C( 72), UINT8_C(112), UINT8_C(137), UINT8_C(249), UINT8_C(146), UINT8_C(145), UINT8_C(228), UINT8_C(103) }, { UINT8_C( 45), UINT8_C( 11), UINT8_C(226), UINT8_C( 69), UINT8_C( 75), UINT8_C(245), UINT8_C(123), UINT8_C(197), UINT8_C( 31), UINT8_C(112), UINT8_C(137), UINT8_C(216), UINT8_C(178), UINT8_C(242), UINT8_C( 46), UINT8_C( 94) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(104), UINT8_C( 37), UINT8_C(179), UINT8_C(183), UINT8_C(170), UINT8_C( 46), UINT8_C( 55), UINT8_C( 13), UINT8_C(158), UINT8_C( 2), UINT8_C(120), UINT8_C( 80), UINT8_C(245), UINT8_C(208), UINT8_C(174), UINT8_C( 62) }, { UINT8_C(128), UINT8_C( 37), UINT8_C(132), UINT8_C(183), UINT8_C( 51), UINT8_C(198), UINT8_C( 55), UINT8_C( 13), UINT8_C(118), UINT8_C(175), UINT8_C(120), UINT8_C(183), UINT8_C( 12), UINT8_C( 75), UINT8_C( 37), UINT8_C(117) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vcleq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_private a_ = simde_uint8x16_to_private(simde_test_arm_neon_random_u8x16()); simde_uint8x16_private b_ = simde_uint8x16_to_private(simde_test_arm_neon_random_u8x16()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint8x16_t a = simde_uint8x16_from_private(a_); simde_uint8x16_t b = simde_uint8x16_from_private(b_); simde_uint8x16_t r = simde_vcleq_u8(a, b); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcleq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(22479), UINT16_C(48311), UINT16_C(59303), UINT16_C( 1712), UINT16_C(52292), UINT16_C( 1671), UINT16_C(39280), UINT16_C(38684) }, { UINT16_C(21616), UINT16_C( 4985), UINT16_C(42330), UINT16_C( 1712), UINT16_C(52292), UINT16_C(22407), UINT16_C( 945), UINT16_C(33083) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(18980), UINT16_C(38204), UINT16_C(23011), UINT16_C(21292), UINT16_C(42669), UINT16_C( 1895), UINT16_C( 5963), UINT16_C( 7461) }, { UINT16_C(38371), UINT16_C(38375), UINT16_C( 9113), UINT16_C(62230), UINT16_C(21269), UINT16_C(61429), UINT16_C(47352), UINT16_C( 7461) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(41348), UINT16_C(53359), UINT16_C(37965), UINT16_C(39776), UINT16_C(25077), UINT16_C(44080), UINT16_C(18026), UINT16_C(32927) }, { UINT16_C(38041), UINT16_C(37487), UINT16_C(37965), UINT16_C(20655), UINT16_C(25077), UINT16_C(45110), UINT16_C(28735), UINT16_C(50200) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(38203), UINT16_C(42692), UINT16_C(25564), UINT16_C(29990), UINT16_C(38392), UINT16_C(17671), UINT16_C(46633), UINT16_C(56971) }, { UINT16_C(51991), UINT16_C(22478), UINT16_C(58940), UINT16_C(19739), UINT16_C(44910), UINT16_C(29463), UINT16_C(31907), UINT16_C(56971) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { UINT16_C(27199), UINT16_C(26863), UINT16_C(43116), UINT16_C(32475), UINT16_C(21840), UINT16_C(35983), UINT16_C(52746), UINT16_C(43737) }, { UINT16_C(61529), UINT16_C(64541), UINT16_C(43116), UINT16_C(32475), UINT16_C(24567), UINT16_C(43371), UINT16_C(52746), UINT16_C(18772) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { UINT16_C( 8846), UINT16_C(51876), UINT16_C(32204), UINT16_C(52144), UINT16_C(37230), UINT16_C(55842), UINT16_C(47259), UINT16_C(10833) }, { UINT16_C(50269), UINT16_C(26586), UINT16_C(11922), UINT16_C(52144), UINT16_C(25201), UINT16_C(14629), UINT16_C(47259), UINT16_C(10833) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { UINT16_C(61434), UINT16_C(13499), UINT16_C( 5356), UINT16_C(18788), UINT16_C(16088), UINT16_C(27312), UINT16_C(24806), UINT16_C(57778) }, { UINT16_C(23491), UINT16_C(24343), UINT16_C(26899), UINT16_C(60809), UINT16_C(32094), UINT16_C(53652), UINT16_C(24806), UINT16_C(57778) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(14008), UINT16_C(42675), UINT16_C(64151), UINT16_C(23042), UINT16_C(40147), UINT16_C(27065), UINT16_C(17027), UINT16_C(57687) }, { UINT16_C(60352), UINT16_C(42675), UINT16_C(25931), UINT16_C(39815), UINT16_C(40147), UINT16_C(21975), UINT16_C(23574), UINT16_C(52911) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vcleq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_private a_ = simde_uint16x8_to_private(simde_test_arm_neon_random_u16x8()); simde_uint16x8_private b_ = simde_uint16x8_to_private(simde_test_arm_neon_random_u16x8()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint16x8_t a = simde_uint16x8_from_private(a_); simde_uint16x8_t b = simde_uint16x8_from_private(b_); simde_uint16x8_t r = simde_vcleq_u16(a, b); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcleq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(2381386769), UINT32_C(3176466564), UINT32_C( 571499645), UINT32_C(2801129909) }, { UINT32_C( 974744019), UINT32_C(3176466564), UINT32_C(2645856096), UINT32_C(3366276535) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { UINT32_C(2953658930), UINT32_C(1557274022), UINT32_C(3389179895), UINT32_C(3036945201) }, { UINT32_C(2205374755), UINT32_C(2669684712), UINT32_C(3597190772), UINT32_C(2406333788) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C(2382500247), UINT32_C( 161785772), UINT32_C(1687687524), UINT32_C(2680566455) }, { UINT32_C(3460294233), UINT32_C( 161785772), UINT32_C(1687687524), UINT32_C( 571201419) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C( 835324434), UINT32_C( 577597079), UINT32_C( 348011195), UINT32_C(3454205216) }, { UINT32_C( 148276900), UINT32_C( 577597079), UINT32_C( 759527493), UINT32_C(2548954757) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C(3979565736), UINT32_C(1478129874), UINT32_C(3848598081), UINT32_C( 283982457) }, { UINT32_C(3979565736), UINT32_C(1478129874), UINT32_C(1861159022), UINT32_C( 14006463) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(2688045874), UINT32_C(2009426174), UINT32_C(1267184547), UINT32_C(3745650239) }, { UINT32_C(2688045874), UINT32_C( 453912411), UINT32_C(3575375072), UINT32_C(3745650239) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_MAX } }, { { UINT32_C(1368884654), UINT32_C(1167924792), UINT32_C( 187029209), UINT32_C(2242665769) }, { UINT32_C(1705032325), UINT32_C(3728391070), UINT32_C(4089281745), UINT32_C(1897102530) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C( 277882431), UINT32_C(1605215285), UINT32_C(3419805642), UINT32_C(3001844756) }, { UINT32_C( 277882431), UINT32_C(1694780835), UINT32_C(3419805642), UINT32_C(2332400066) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vcleq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_private a_ = simde_uint32x4_to_private(simde_test_arm_neon_random_u32x4()); simde_uint32x4_private b_ = simde_uint32x4_to_private(simde_test_arm_neon_random_u32x4()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint32x4_t a = simde_uint32x4_from_private(a_); simde_uint32x4_t b = simde_uint32x4_from_private(b_); simde_uint32x4_t r = simde_vcleq_u32(a, b); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcleq_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[2]; uint64_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C( 7031255301956804898), UINT64_C( 1474450428294761441) }, { UINT64_C(14583465103520464217), UINT64_C(12298299828163413800) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C( 3994920236002142288), UINT64_C( 216978030592215567) }, { UINT64_C(17908294585149505949), UINT64_C( 752740750983040994) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C( 6431661480201425578), UINT64_C(17079525600081545155) }, { UINT64_C( 6431661480201425578), UINT64_C(17526685962885593961) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C( 5292068948174131764), UINT64_C(17385868151557196591) }, { UINT64_C( 5292068948174131764), UINT64_C(13525532790163338768) }, { UINT64_MAX, UINT64_C( 0) } }, { { UINT64_C( 7981490243528583350), UINT64_C(15931039649777347335) }, { UINT64_C( 5477147589066124571), UINT64_C(15931039649777347335) }, { UINT64_C( 0), UINT64_MAX } }, { { UINT64_C( 4288175162100042517), UINT64_C(16470940607178522469) }, { UINT64_C( 4288175162100042517), UINT64_C( 1969053279587903843) }, { UINT64_MAX, UINT64_C( 0) } }, { { UINT64_C(17673668134669041266), UINT64_C( 108811382467493027) }, { UINT64_C(18146705304218835370), UINT64_C(16932089260167093638) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C(10894539657245827952), UINT64_C( 4324253423461110374) }, { UINT64_C(14498721430600439298), UINT64_C( 626635421977587112) }, { UINT64_MAX, UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_uint64x2_t r = simde_vcleq_u64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_private a_ = simde_uint64x2_to_private(simde_test_arm_neon_random_u64x2()); simde_uint64x2_private b_ = simde_uint64x2_to_private(simde_test_arm_neon_random_u64x2()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint64x2_t a = simde_uint64x2_from_private(a_); simde_uint64x2_t b = simde_uint64x2_from_private(b_); simde_uint64x2_t r = simde_vcleq_u64(a, b); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vcle_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcle_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcle_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vcle_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vcle_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vcle_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vcle_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vcle_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vcle_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vcle_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vcleq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcleq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcleq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vcleq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vcleq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vcleq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vcleq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vcleq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vcleq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vcleq_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/clez.c000066400000000000000000000767721400333146700164430ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN clez #include "test-neon.h" /* Check that both of these work */ #if defined(__cplusplus) #include "../../../simde/arm/neon/clez.h" #else #include "../../../simde/arm/neon.h" #endif static int test_simde_vclez_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; uint32_t r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( 70.04), SIMDE_FLOAT32_C( -657.90) }, { UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( -994.42), SIMDE_FLOAT32_C( -790.82) }, { UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) }, { UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( -581.92), SIMDE_FLOAT32_C( 0.00) }, { UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) }, { UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 327.30), SIMDE_FLOAT32_C( -480.97) }, { UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 126.99) }, { UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -661.81) }, { UINT32_MAX, UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_uint32x2_t r = simde_vclez_f32(a); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x2_private a_ = simde_float32x2_to_private(simde_test_arm_neon_random_f32x2(-1000.0, 1000.0)); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0.0; } simde_float32x2_t a = simde_float32x2_from_private(a_); simde_uint32x2_t r = simde_vclez_f32(a); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclez_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[1]; uint64_t r[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( -670.22) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( 0.00) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( 680.93) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 487.28) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -782.87) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( 0.00) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( 891.09) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -949.37) }, { UINT64_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_uint64x1_t r = simde_vclez_f64(a); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x1_private a_ = simde_float64x1_to_private(simde_test_arm_neon_random_f64x1(-1000.0, 1000.0)); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0.0; } simde_float64x1_t a = simde_float64x1_from_private(a_); simde_uint64x1_t r = simde_vclez_f64(a); simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclez_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; uint8_t r[8]; } test_vec[] = { { { -INT8_C( 120), -INT8_C( 118), INT8_C( 0), -INT8_C( 103), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 81) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { INT8_C( 0), -INT8_C( 109), INT8_C( 0), INT8_C( 1), -INT8_C( 61), INT8_C( 0), -INT8_C( 50), INT8_C( 0) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { INT8_C( 0), -INT8_C( 107), -INT8_C( 99), INT8_C( 0), -INT8_C( 65), -INT8_C( 127), INT8_C( 77), -INT8_C( 91) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, { { INT8_C( 32), -INT8_C( 41), -INT8_C( 90), INT8_C( 0), INT8_C( 64), INT8_C( 0), INT8_C( 107), INT8_C( 0) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, { { INT8_C( 117), INT8_C( 0), INT8_MIN, INT8_C( 53), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { INT8_C( 76), -INT8_C( 31), INT8_C( 28), -INT8_C( 116), INT8_C( 86), -INT8_C( 121), -INT8_C( 6), INT8_C( 0) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { INT8_C( 94), INT8_C( 0), -INT8_C( 116), INT8_C( 0), INT8_C( 0), INT8_C( 102), INT8_C( 0), INT8_C( 109) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } }, { { INT8_C( 0), INT8_C( 92), INT8_C( 0), INT8_C( 119), -INT8_C( 29), INT8_C( 86), INT8_C( 0), INT8_C( 83) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_uint8x8_t r = simde_vclez_s8(a); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_private a_ = simde_int8x8_to_private(simde_test_arm_neon_random_i8x8()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0; } simde_int8x8_t a = simde_int8x8_from_private(a_); simde_uint8x8_t r = simde_vclez_s8(a); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclez_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; uint16_t r[4]; } test_vec[] = { { { -INT16_C( 19507), -INT16_C( 22607), INT16_C( 0), -INT16_C( 18447) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { INT16_C( 27590), INT16_C( 6171), INT16_C( 105), INT16_C( 0) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, { { -INT16_C( 14201), -INT16_C( 23807), -INT16_C( 18034), INT16_C( 0) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { -INT16_C( 2888), -INT16_C( 29360), -INT16_C( 26029), INT16_C( 6831) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { INT16_C( 3019), -INT16_C( 15175), INT16_C( 11615), INT16_C( 0) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { INT16_C( 0), INT16_C( 0), -INT16_C( 7376), -INT16_C( 6111) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { INT16_C( 9483), INT16_C( 0), INT16_C( 30448), INT16_C( 0) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { -INT16_C( 8346), INT16_C( 23751), INT16_C( 20544), INT16_C( 0) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_uint16x4_t r = simde_vclez_s16(a); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_private a_ = simde_int16x4_to_private(simde_test_arm_neon_random_i16x4()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0; } simde_int16x4_t a = simde_int16x4_from_private(a_); simde_uint16x4_t r = simde_vclez_s16(a); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclez_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; uint32_t r[2]; } test_vec[] = { { { -INT32_C( 1560005487), INT32_C( 0) }, { UINT32_MAX, UINT32_MAX } }, { { INT32_C( 0), INT32_C( 0) }, { UINT32_MAX, UINT32_MAX } }, { { INT32_C( 1211310087), -INT32_C( 2032582295) }, { UINT32_C( 0), UINT32_MAX } }, { { INT32_C( 0), -INT32_C( 2036276554) }, { UINT32_MAX, UINT32_MAX } }, { { INT32_C( 0), INT32_C( 0) }, { UINT32_MAX, UINT32_MAX } }, { { -INT32_C( 1144024440), -INT32_C( 1258538534) }, { UINT32_MAX, UINT32_MAX } }, { { -INT32_C( 795864664), INT32_C( 112346726) }, { UINT32_MAX, UINT32_C( 0) } }, { { INT32_C( 1496076036), -INT32_C( 1751501023) }, { UINT32_C( 0), UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_uint32x2_t r = simde_vclez_s32(a); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_private a_ = simde_int32x2_to_private(simde_test_arm_neon_random_i32x2()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0; } simde_int32x2_t a = simde_int32x2_from_private(a_); simde_uint32x2_t r = simde_vclez_s32(a); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclez_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[1]; uint64_t r[1]; } test_vec[] = { { { -INT64_C( 1608322800724244625) }, { UINT64_MAX } }, { { INT64_C( 3916001074733946563) }, { UINT64_C( 0) } }, { { INT64_C( 0) }, { UINT64_MAX } }, { { INT64_C( 0) }, { UINT64_MAX } }, { { INT64_C( 8851094662317928534) }, { UINT64_C( 0) } }, { { INT64_C( 5705103693734990403) }, { UINT64_C( 0) } }, { { INT64_C( 1524843960183154149) }, { UINT64_C( 0) } }, { { -INT64_C( 1091930860044790496) }, { UINT64_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_uint64x1_t r = simde_vclez_s64(a); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_private a_ = simde_int64x1_to_private(simde_test_arm_neon_random_i64x1()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0; } simde_int64x1_t a = simde_int64x1_from_private(a_); simde_uint64x1_t r = simde_vclez_s64(a); simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclezq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; uint32_t r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 221.89), SIMDE_FLOAT32_C( 155.65), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 633.13) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 746.23), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( -515.28), SIMDE_FLOAT32_C( 795.42), SIMDE_FLOAT32_C( 99.50), SIMDE_FLOAT32_C( 505.39) }, { UINT32_MAX, UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -866.96), SIMDE_FLOAT32_C( -299.73), SIMDE_FLOAT32_C( 234.99) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -414.75), SIMDE_FLOAT32_C( 514.89), SIMDE_FLOAT32_C( 0.00) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( 730.51), SIMDE_FLOAT32_C( 656.06), SIMDE_FLOAT32_C( 193.33), SIMDE_FLOAT32_C( 0.00) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( 486.26), SIMDE_FLOAT32_C( 910.33), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 406.26) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 440.42), SIMDE_FLOAT32_C( 247.26), SIMDE_FLOAT32_C( 185.90), SIMDE_FLOAT32_C( -899.71) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_uint32x4_t r = simde_vclezq_f32(a); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_private a_ = simde_float32x4_to_private(simde_test_arm_neon_random_f32x4(-1000.0, 1000.0)); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0.0; } simde_float32x4_t a = simde_float32x4_from_private(a_); simde_uint32x4_t r = simde_vclezq_f32(a); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclezq_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[2]; uint64_t r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -407.08), SIMDE_FLOAT64_C( 0.00) }, { UINT64_MAX, UINT64_MAX } }, { { SIMDE_FLOAT64_C( 119.73), SIMDE_FLOAT64_C( 73.59) }, { UINT64_C( 0), UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 337.70), SIMDE_FLOAT64_C( 0.00) }, { UINT64_C( 0), UINT64_MAX } }, { { SIMDE_FLOAT64_C( 629.77), SIMDE_FLOAT64_C( 0.00) }, { UINT64_C( 0), UINT64_MAX } }, { { SIMDE_FLOAT64_C( 105.66), SIMDE_FLOAT64_C( -580.38) }, { UINT64_C( 0), UINT64_MAX } }, { { SIMDE_FLOAT64_C( -14.53), SIMDE_FLOAT64_C( 483.80) }, { UINT64_MAX, UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 424.55), SIMDE_FLOAT64_C( -35.62) }, { UINT64_C( 0), UINT64_MAX } }, { { SIMDE_FLOAT64_C( -74.95), SIMDE_FLOAT64_C( -62.26) }, { UINT64_MAX, UINT64_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_uint64x2_t r = simde_vclezq_f64(a); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x2_private a_ = simde_float64x2_to_private(simde_test_arm_neon_random_f64x2(-1000.0, 1000.0)); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0.0; } simde_float64x2_t a = simde_float64x2_from_private(a_); simde_uint64x2_t r = simde_vclezq_f64(a); simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclezq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; uint8_t r[16]; } test_vec[] = { { { INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 29), -INT8_C( 99), -INT8_C( 82), -INT8_C( 65), INT8_C( 0), INT8_C( 0), -INT8_C( 122), INT8_C( 15), -INT8_C( 10), INT8_C( 3), INT8_C( 0), INT8_C( 0), -INT8_C( 56) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { -INT8_C( 124), -INT8_C( 70), -INT8_C( 3), INT8_C( 34), INT8_C( 0), INT8_C( 0), -INT8_C( 116), INT8_C( 0), INT8_C( 67), -INT8_C( 101), INT8_C( 102), INT8_C( 70), -INT8_C( 115), INT8_C( 0), INT8_C( 14), -INT8_C( 54) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, { { INT8_C( 112), INT8_C( 0), INT8_C( 49), INT8_C( 0), INT8_C( 0), -INT8_C( 67), INT8_C( 0), INT8_C( 99), INT8_C( 88), INT8_C( 0), -INT8_C( 86), -INT8_C( 26), INT8_C( 114), INT8_C( 0), -INT8_C( 80), INT8_C( 0) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { INT8_C( 0), INT8_C( 89), INT8_C( 95), -INT8_C( 3), INT8_C( 0), -INT8_C( 89), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 10), INT8_C( 85), -INT8_C( 55), -INT8_C( 61), INT8_C( 0), INT8_C( 56), INT8_C( 75) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { INT8_C( 0), INT8_C( 47), INT8_MIN, -INT8_C( 52), INT8_C( 0), -INT8_C( 32), INT8_C( 59), INT8_C( 0), -INT8_C( 21), -INT8_C( 112), -INT8_C( 10), -INT8_C( 82), -INT8_C( 107), INT8_C( 46), INT8_C( 0), -INT8_C( 37) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { INT8_C( 40), INT8_C( 22), INT8_C( 96), -INT8_C( 2), -INT8_C( 10), -INT8_C( 101), INT8_C( 0), INT8_C( 0), INT8_C( 44), INT8_C( 33), -INT8_C( 113), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 99), -INT8_C( 9) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { INT8_C( 0), -INT8_C( 56), INT8_C( 53), INT8_C( 42), INT8_C( 0), INT8_C( 96), INT8_C( 0), -INT8_C( 112), -INT8_C( 126), -INT8_C( 101), INT8_C( 81), -INT8_C( 46), INT8_C( 36), -INT8_C( 18), -INT8_C( 55), -INT8_C( 123) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { INT8_C( 102), INT8_C( 14), INT8_C( 51), -INT8_C( 54), INT8_C( 0), INT8_C( 63), INT8_C( 0), -INT8_C( 16), -INT8_C( 38), -INT8_C( 84), -INT8_C( 62), -INT8_C( 2), INT8_C( 0), INT8_C( 0), -INT8_C( 125), INT8_C( 79) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_uint8x16_t r = simde_vclezq_s8(a); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_private a_ = simde_int8x16_to_private(simde_test_arm_neon_random_i8x16()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0; } simde_int8x16_t a = simde_int8x16_from_private(a_); simde_uint8x16_t r = simde_vclezq_s8(a); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclezq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; uint16_t r[8]; } test_vec[] = { { { INT16_C( 1992), -INT16_C( 10455), INT16_C( 15346), INT16_C( 5013), -INT16_C( 17142), -INT16_C( 32561), INT16_C( 21763), INT16_C( 9992) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 12143), -INT16_C( 17788), INT16_C( 30631), INT16_C( 0), INT16_C( 138) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { -INT16_C( 29080), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 12857), INT16_C( 0) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { -INT16_C( 346), INT16_C( 0), -INT16_C( 7546), -INT16_C( 4393), -INT16_C( 7824), -INT16_C( 32420), -INT16_C( 22024), INT16_C( 1181) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { -INT16_C( 13690), INT16_C( 20138), INT16_C( 10263), -INT16_C( 16881), INT16_C( 0), -INT16_C( 21485), INT16_C( 0), INT16_C( 8091) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { -INT16_C( 7914), -INT16_C( 17584), INT16_C( 22748), INT16_C( 25576), -INT16_C( 27870), INT16_C( 15025), INT16_C( 0), INT16_C( 0) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { INT16_C( 0), -INT16_C( 15994), INT16_C( 20281), INT16_C( 20310), INT16_C( 0), INT16_C( 3082), -INT16_C( 3074), INT16_C( 8559) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { -INT16_C( 20642), INT16_C( 0), INT16_C( 1240), -INT16_C( 2027), -INT16_C( 25856), INT16_C( 14777), INT16_C( 0), INT16_C( 6792) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_uint16x8_t r = simde_vclezq_s16(a); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_private a_ = simde_int16x8_to_private(simde_test_arm_neon_random_i16x8()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0; } simde_int16x8_t a = simde_int16x8_from_private(a_); simde_uint16x8_t r = simde_vclezq_s16(a); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclezq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; uint32_t r[4]; } test_vec[] = { { { -INT32_C( 860221835), INT32_C( 2143068941), INT32_C( 0), INT32_C( 0) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_MAX } }, { { INT32_C( 210922921), INT32_C( 0), -INT32_C( 1389508553), -INT32_C( 478550058) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { INT32_C( 0), INT32_C( 2045744066), INT32_C( 0), INT32_C( 0) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_MAX } }, { { INT32_C( 891287902), INT32_C( 1796774112), INT32_C( 0), INT32_C( 1045017212) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { -INT32_C( 521356587), -INT32_C( 1985142705), INT32_C( 368802230), INT32_C( 0) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { INT32_C( 0), -INT32_C( 526840677), INT32_C( 446976325), INT32_C( 1526370315) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_C( 0) } }, { { INT32_C( 0), -INT32_C( 1394227462), INT32_C( 103588648), INT32_C( 677506957) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_C( 0) } }, { { INT32_C( 0), -INT32_C( 290367938), INT32_C( 0), INT32_C( 0) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_uint32x4_t r = simde_vclezq_s32(a); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_private a_ = simde_int32x4_to_private(simde_test_arm_neon_random_i32x4()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0; } simde_int32x4_t a = simde_int32x4_from_private(a_); simde_uint32x4_t r = simde_vclezq_s32(a); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclezq_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; uint64_t r[2]; } test_vec[] = { { { INT64_C( 3936110928391657468), INT64_C( 3422569291316624941) }, { UINT64_C( 0), UINT64_C( 0) } }, { { INT64_C( 0), INT64_C( 7752757755250222179) }, { UINT64_MAX, UINT64_C( 0) } }, { { INT64_C( 613874270280738635), -INT64_C( 5059614506910022572) }, { UINT64_C( 0), UINT64_MAX } }, { { -INT64_C( 1735777232729757626), -INT64_C( 807660422876200805) }, { UINT64_MAX, UINT64_MAX } }, { { INT64_C( 0), -INT64_C( 2781527010755628526) }, { UINT64_MAX, UINT64_MAX } }, { { INT64_C( 0), INT64_C( 0) }, { UINT64_MAX, UINT64_MAX } }, { { -INT64_C( 4031249581053433282), -INT64_C( 4363211058523835097) }, { UINT64_MAX, UINT64_MAX } }, { { INT64_C( 0), INT64_C( 0) }, { UINT64_MAX, UINT64_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_uint64x2_t r = simde_vclezq_s64(a); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_private a_ = simde_int64x2_to_private(simde_test_arm_neon_random_i64x2()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = 0; } simde_int64x2_t a = simde_int64x2_from_private(a_); simde_uint64x2_t r = simde_vclezq_s64(a); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vclez_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vclez_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vclez_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vclez_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vclez_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vclez_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vclezq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vclezq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vclezq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vclezq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vclezq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vclezq_s64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/cls.c000066400000000000000000001122001400333146700162400ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN cls #include "test-neon.h" #include "../../../simde/arm/neon/cls.h" #include "../../../simde/arm/neon/and.h" #include "../../../simde/arm/neon/dup_n.h" #include "../../../simde/arm/neon/neg.h" #include "../../../simde/arm/neon/set_lane.h" #include "../../../simde/arm/neon/shl.h" static int test_simde_vcls_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t r[8]; } test_vec[] = { { { INT8_C( 0), -INT8_C( 1), -INT8_C( 2), INT8_C( 6), -INT8_C( 1), -INT8_C( 4), INT8_C( 8), -INT8_C( 1) }, { INT8_C( 7), INT8_C( 7), INT8_C( 6), INT8_C( 4), INT8_C( 7), INT8_C( 5), INT8_C( 3), INT8_C( 7) } }, { { -INT8_C( 14), -INT8_C( 4), -INT8_C( 1), -INT8_C( 1), -INT8_C( 3), -INT8_C( 97), INT8_C( 0), INT8_C( 14) }, { INT8_C( 3), INT8_C( 5), INT8_C( 7), INT8_C( 7), INT8_C( 5), INT8_C( 0), INT8_C( 7), INT8_C( 3) } }, { { INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 3), INT8_C( 12), -INT8_C( 37), -INT8_C( 48), -INT8_C( 59) }, { INT8_C( 6), INT8_C( 7), INT8_C( 7), INT8_C( 5), INT8_C( 3), INT8_C( 1), INT8_C( 1), INT8_C( 1) } }, { { INT8_C( 17), INT8_C( 24), -INT8_C( 7), -INT8_C( 2), INT8_C( 0), -INT8_C( 13), INT8_C( 0), -INT8_C( 1) }, { INT8_C( 2), INT8_C( 2), INT8_C( 4), INT8_C( 6), INT8_C( 7), INT8_C( 3), INT8_C( 7), INT8_C( 7) } }, { { -INT8_C( 20), INT8_C( 0), INT8_C( 15), INT8_C( 0), -INT8_C( 37), INT8_C( 4), -INT8_C( 1), -INT8_C( 16) }, { INT8_C( 2), INT8_C( 7), INT8_C( 3), INT8_C( 7), INT8_C( 1), INT8_C( 4), INT8_C( 7), INT8_C( 3) } }, { { -INT8_C( 2), INT8_C( 0), INT8_C( 0), -INT8_C( 102), -INT8_C( 1), INT8_C( 1), INT8_C( 0), -INT8_C( 19) }, { INT8_C( 6), INT8_C( 7), INT8_C( 7), INT8_C( 0), INT8_C( 7), INT8_C( 6), INT8_C( 7), INT8_C( 2) } }, { { -INT8_C( 4), -INT8_C( 27), -INT8_C( 1), -INT8_C( 7), INT8_C( 0), INT8_C( 0), INT8_C( 3), -INT8_C( 3) }, { INT8_C( 5), INT8_C( 2), INT8_C( 7), INT8_C( 4), INT8_C( 7), INT8_C( 7), INT8_C( 5), INT8_C( 5) } }, { { INT8_C( 2), -INT8_C( 2), -INT8_C( 12), -INT8_C( 1), INT8_C( 1), INT8_C( 3), -INT8_C( 3), INT8_C( 1) }, { INT8_C( 5), INT8_C( 6), INT8_C( 3), INT8_C( 7), INT8_C( 6), INT8_C( 5), INT8_C( 5), INT8_C( 6) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t r = simde_vcls_s8(a); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); a = simde_vshl_s8(a, simde_vneg_s8(simde_vand_s8(simde_test_arm_neon_random_i8x8(), simde_vdup_n_s8(7)))); if (i == 0) { a = simde_vset_lane_s8( INT8_C(0), a, 0); a = simde_vset_lane_s8(-INT8_C(1), a, 1); } simde_int8x8_t r = simde_vcls_s8(a); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcls_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t r[4]; } test_vec[] = { { { INT16_C( 0), -INT16_C( 1), -INT16_C( 4), -INT16_C( 980) }, { INT16_C( 15), INT16_C( 15), INT16_C( 13), INT16_C( 5) } }, { { INT16_C( 439), -INT16_C( 1580), -INT16_C( 4), -INT16_C( 19564) }, { INT16_C( 6), INT16_C( 4), INT16_C( 13), INT16_C( 0) } }, { { -INT16_C( 358), -INT16_C( 8), -INT16_C( 8990), -INT16_C( 8693) }, { INT16_C( 6), INT16_C( 12), INT16_C( 1), INT16_C( 1) } }, { { INT16_C( 776), INT16_C( 0), -INT16_C( 6837), -INT16_C( 1) }, { INT16_C( 5), INT16_C( 15), INT16_C( 2), INT16_C( 15) } }, { { -INT16_C( 56), INT16_C( 0), INT16_C( 13221), INT16_C( 0) }, { INT16_C( 9), INT16_C( 15), INT16_C( 1), INT16_C( 15) } }, { { -INT16_C( 1), -INT16_C( 3), -INT16_C( 1), -INT16_C( 170) }, { INT16_C( 15), INT16_C( 13), INT16_C( 15), INT16_C( 7) } }, { { -INT16_C( 16), -INT16_C( 7), INT16_C( 1), INT16_C( 16) }, { INT16_C( 11), INT16_C( 12), INT16_C( 14), INT16_C( 10) } }, { { -INT16_C( 2), -INT16_C( 1), -INT16_C( 1), -INT16_C( 15) }, { INT16_C( 14), INT16_C( 15), INT16_C( 15), INT16_C( 11) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t r = simde_vcls_s16(a); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); a = simde_vshl_s16(a, simde_vneg_s16(simde_vand_s16(simde_test_arm_neon_random_i16x4(), simde_vdup_n_s16(15)))); if (i == 0) { a = simde_vset_lane_s16( INT16_C(0), a, 0); a = simde_vset_lane_s16(-INT16_C(1), a, 1); } simde_int16x4_t r = simde_vcls_s16(a); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcls_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t r[2]; } test_vec[] = { { { INT32_C( 0), -INT32_C( 1) }, { INT32_C( 31), INT32_C( 31) } }, { { INT32_C( 5692), INT32_C( 8382156) }, { INT32_C( 18), INT32_C( 8) } }, { { -INT32_C( 3780), -INT32_C( 72139) }, { INT32_C( 19), INT32_C( 14) } }, { { -INT32_C( 39), -INT32_C( 28205650) }, { INT32_C( 25), INT32_C( 6) } }, { { -INT32_C( 16), -INT32_C( 80820) }, { INT32_C( 27), INT32_C( 14) } }, { { INT32_C( 18993), -INT32_C( 287849) }, { INT32_C( 16), INT32_C( 12) } }, { { INT32_C( 137743468), -INT32_C( 5881742) }, { INT32_C( 3), INT32_C( 8) } }, { { -INT32_C( 5524402), -INT32_C( 25465) }, { INT32_C( 8), INT32_C( 16) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t r = simde_vcls_s32(a); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); a = simde_vshl_s32(a, simde_vneg_s32(simde_vand_s32(simde_test_arm_neon_random_i32x2(), simde_vdup_n_s32(31)))); if (i == 0) { a = simde_vset_lane_s32( INT32_C(0), a, 0); a = simde_vset_lane_s32(-INT32_C(1), a, 1); } simde_int32x2_t r = simde_vcls_s32(a); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } /* clang and GCC are both missing the unsigned versions of these functions. See https://bugs.llvm.org/show_bug.cgi?id=47407 */ #if !defined(SIMDE_NATIVE_ALIASES_TESTING) static int test_simde_vcls_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; int8_t r[8]; } test_vec[] = { { { UINT8_C( 0), UINT8_MAX, UINT8_C(113), UINT8_C( 36), UINT8_C( 14), UINT8_C( 0), UINT8_C( 3), UINT8_C( 31) }, { INT8_C( 7), INT8_C( 7), INT8_C( 0), INT8_C( 1), INT8_C( 3), INT8_C( 7), INT8_C( 5), INT8_C( 2) } }, { { UINT8_C( 60), UINT8_C( 30), UINT8_C( 10), UINT8_C( 63), UINT8_C( 17), UINT8_C( 22), UINT8_C( 9), UINT8_C( 4) }, { INT8_C( 1), INT8_C( 2), INT8_C( 3), INT8_C( 1), INT8_C( 2), INT8_C( 2), INT8_C( 3), INT8_C( 4) } }, { { UINT8_C( 17), UINT8_C( 0), UINT8_C( 4), UINT8_C( 5), UINT8_C( 10), UINT8_C(162), UINT8_C(180), UINT8_C( 34) }, { INT8_C( 2), INT8_C( 7), INT8_C( 4), INT8_C( 4), INT8_C( 3), INT8_C( 0), INT8_C( 0), INT8_C( 1) } }, { { UINT8_C( 0), UINT8_C( 2), UINT8_C( 41), UINT8_C( 21), UINT8_C( 1), UINT8_C( 6), UINT8_C( 57), UINT8_C( 0) }, { INT8_C( 7), INT8_C( 5), INT8_C( 1), INT8_C( 2), INT8_C( 6), INT8_C( 4), INT8_C( 1), INT8_C( 7) } }, { { UINT8_C( 39), UINT8_C( 47), UINT8_C( 1), UINT8_C( 41), UINT8_C( 0), UINT8_C( 1), UINT8_C( 14), UINT8_C( 77) }, { INT8_C( 1), INT8_C( 1), INT8_C( 6), INT8_C( 1), INT8_C( 7), INT8_C( 6), INT8_C( 3), INT8_C( 0) } }, { { UINT8_C(127), UINT8_C( 47), UINT8_C( 12), UINT8_C( 93), UINT8_C( 1), UINT8_C( 15), UINT8_C( 3), UINT8_C( 26) }, { INT8_C( 0), INT8_C( 1), INT8_C( 3), INT8_C( 0), INT8_C( 6), INT8_C( 3), INT8_C( 5), INT8_C( 2) } }, { { UINT8_C( 1), UINT8_C( 27), UINT8_C( 4), UINT8_C(162), UINT8_C( 29), UINT8_C( 3), UINT8_C( 15), UINT8_C( 84) }, { INT8_C( 6), INT8_C( 2), INT8_C( 4), INT8_C( 0), INT8_C( 2), INT8_C( 5), INT8_C( 3), INT8_C( 0) } }, { { UINT8_C( 0), UINT8_C( 41), UINT8_C( 59), UINT8_C( 3), UINT8_C( 2), UINT8_C(181), UINT8_C( 45), UINT8_C(121) }, { INT8_C( 7), INT8_C( 1), INT8_C( 1), INT8_C( 5), INT8_C( 5), INT8_C( 0), INT8_C( 1), INT8_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_int8x8_t r = simde_vcls_u8(a); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); a = simde_vshl_u8(a, simde_vneg_s8(simde_vand_s8(simde_test_arm_neon_random_i8x8(), simde_vdup_n_s8(7)))); if (i == 0) { a = simde_vset_lane_u8(UINT8_C(0), a, 0); a = simde_vset_lane_u8( UINT8_MAX, a, 1); } simde_int8x8_t r = simde_vcls_u8(a); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcls_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; int16_t r[4]; } test_vec[] = { { { UINT16_C( 0), UINT16_MAX, UINT16_C( 460), UINT16_C( 0) }, { INT16_C( 15), INT16_C( 15), INT16_C( 6), INT16_C( 15) } }, { { UINT16_C( 496), UINT16_C( 15), UINT16_C( 6837), UINT16_C( 1259) }, { INT16_C( 6), INT16_C( 11), INT16_C( 2), INT16_C( 4) } }, { { UINT16_C( 15), UINT16_C( 63), UINT16_C( 6), UINT16_C( 79) }, { INT16_C( 11), INT16_C( 9), INT16_C( 12), INT16_C( 8) } }, { { UINT16_C( 402), UINT16_C( 1467), UINT16_C( 4), UINT16_C( 4) }, { INT16_C( 6), INT16_C( 4), INT16_C( 12), INT16_C( 12) } }, { { UINT16_C( 28), UINT16_C( 755), UINT16_C( 3), UINT16_C( 14) }, { INT16_C( 10), INT16_C( 5), INT16_C( 13), INT16_C( 11) } }, { { UINT16_C( 38), UINT16_C( 821), UINT16_C( 57), UINT16_C( 1759) }, { INT16_C( 9), INT16_C( 5), INT16_C( 9), INT16_C( 4) } }, { { UINT16_C( 2122), UINT16_C( 480), UINT16_C( 402), UINT16_C( 27) }, { INT16_C( 3), INT16_C( 6), INT16_C( 6), INT16_C( 10) } }, { { UINT16_C( 2), UINT16_C( 239), UINT16_C( 7107), UINT16_C( 89) }, { INT16_C( 13), INT16_C( 7), INT16_C( 2), INT16_C( 8) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_int16x4_t r = simde_vcls_u16(a); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); a = simde_vshl_u16(a, simde_vneg_s16(simde_vand_s16(simde_test_arm_neon_random_i16x4(), simde_vdup_n_s16(15)))); if (i == 0) { a = simde_vset_lane_u16(UINT16_C(0), a, 0); a = simde_vset_lane_u16( UINT16_MAX, a, 1); } simde_int16x4_t r = simde_vcls_u16(a); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcls_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; int32_t r[2]; } test_vec[] = { { { UINT32_C( 0), UINT32_MAX }, { INT32_C( 31), INT32_C( 31) } }, { { UINT32_C( 1), UINT32_C( 78) }, { INT32_C( 30), INT32_C( 24) } }, { { UINT32_C( 102847535), UINT32_C( 2594) }, { INT32_C( 4), INT32_C( 19) } }, { { UINT32_C( 2630), UINT32_C( 493) }, { INT32_C( 19), INT32_C( 22) } }, { { UINT32_C( 187), UINT32_C( 1733) }, { INT32_C( 23), INT32_C( 20) } }, { { UINT32_C( 11704), UINT32_C( 3651) }, { INT32_C( 17), INT32_C( 19) } }, { { UINT32_C( 7030603), UINT32_C( 245915039) }, { INT32_C( 8), INT32_C( 3) } }, { { UINT32_C( 25), UINT32_C( 2041698) }, { INT32_C( 26), INT32_C( 10) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_int32x2_t r = simde_vcls_u32(a); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); a = simde_vshl_u32(a, simde_vneg_s32(simde_vand_s32(simde_test_arm_neon_random_i32x2(), simde_vdup_n_s32(31)))); if (i == 0) { a = simde_vset_lane_u32(UINT32_C(0), a, 0); a = simde_vset_lane_u32( UINT32_MAX, a, 1); } simde_int32x2_t r = simde_vcls_u32(a); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #endif static int test_simde_vclsq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t r[16]; } test_vec[] = { { { INT8_C( 0), -INT8_C( 1), -INT8_C( 9), INT8_C( 4), -INT8_C( 36), INT8_C( 0), -INT8_C( 123), -INT8_C( 16), -INT8_C( 11), INT8_C( 12), INT8_C( 3), -INT8_C( 5), INT8_C( 47), INT8_C( 0), -INT8_C( 2), -INT8_C( 1) }, { INT8_C( 7), INT8_C( 7), INT8_C( 3), INT8_C( 4), INT8_C( 1), INT8_C( 7), INT8_C( 0), INT8_C( 3), INT8_C( 3), INT8_C( 3), INT8_C( 5), INT8_C( 4), INT8_C( 1), INT8_C( 7), INT8_C( 6), INT8_C( 7) } }, { { INT8_C( 0), -INT8_C( 1), INT8_C( 4), INT8_C( 44), INT8_C( 0), -INT8_C( 6), -INT8_C( 5), -INT8_C( 18), INT8_C( 0), INT8_C( 0), INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 20), INT8_C( 6), INT8_C( 12) }, { INT8_C( 7), INT8_C( 7), INT8_C( 4), INT8_C( 1), INT8_C( 7), INT8_C( 4), INT8_C( 4), INT8_C( 2), INT8_C( 7), INT8_C( 7), INT8_C( 6), INT8_C( 7), INT8_C( 7), INT8_C( 2), INT8_C( 4), INT8_C( 3) } }, { { -INT8_C( 1), -INT8_C( 2), INT8_C( 3), INT8_C( 1), INT8_C( 18), INT8_C( 0), -INT8_C( 2), INT8_C( 44), INT8_C( 6), INT8_C( 6), -INT8_C( 106), INT8_C( 24), -INT8_C( 4), -INT8_C( 4), -INT8_C( 1), -INT8_C( 1) }, { INT8_C( 7), INT8_C( 6), INT8_C( 5), INT8_C( 6), INT8_C( 2), INT8_C( 7), INT8_C( 6), INT8_C( 1), INT8_C( 4), INT8_C( 4), INT8_C( 0), INT8_C( 2), INT8_C( 5), INT8_C( 5), INT8_C( 7), INT8_C( 7) } }, { { INT8_C( 9), INT8_C( 0), -INT8_C( 1), INT8_C( 11), INT8_C( 3), -INT8_C( 15), -INT8_C( 5), INT8_C( 0), -INT8_C( 2), INT8_C( 4), -INT8_C( 96), INT8_C( 7), INT8_C( 23), INT8_C( 1), INT8_C( 0), INT8_C( 4) }, { INT8_C( 3), INT8_C( 7), INT8_C( 7), INT8_C( 3), INT8_C( 5), INT8_C( 3), INT8_C( 4), INT8_C( 7), INT8_C( 6), INT8_C( 4), INT8_C( 0), INT8_C( 4), INT8_C( 2), INT8_C( 6), INT8_C( 7), INT8_C( 4) } }, { { -INT8_C( 44), -INT8_C( 2), INT8_C( 0), -INT8_C( 4), INT8_C( 5), -INT8_C( 2), INT8_C( 15), -INT8_C( 1), INT8_C( 0), -INT8_C( 32), INT8_C( 0), INT8_C( 85), INT8_C( 35), INT8_C( 29), -INT8_C( 7), -INT8_C( 4) }, { INT8_C( 1), INT8_C( 6), INT8_C( 7), INT8_C( 5), INT8_C( 4), INT8_C( 6), INT8_C( 3), INT8_C( 7), INT8_C( 7), INT8_C( 2), INT8_C( 7), INT8_C( 0), INT8_C( 1), INT8_C( 2), INT8_C( 4), INT8_C( 5) } }, { { INT8_C( 2), INT8_C( 21), INT8_C( 12), INT8_C( 1), INT8_C( 2), -INT8_C( 8), -INT8_C( 2), -INT8_C( 4), INT8_C( 1), -INT8_C( 1), -INT8_C( 2), INT8_C( 1), INT8_C( 32), INT8_C( 58), INT8_C( 12), -INT8_C( 1) }, { INT8_C( 5), INT8_C( 2), INT8_C( 3), INT8_C( 6), INT8_C( 5), INT8_C( 4), INT8_C( 6), INT8_C( 5), INT8_C( 6), INT8_C( 7), INT8_C( 6), INT8_C( 6), INT8_C( 1), INT8_C( 1), INT8_C( 3), INT8_C( 7) } }, { { -INT8_C( 1), -INT8_C( 2), INT8_C( 8), INT8_C( 2), INT8_C( 5), INT8_C( 0), -INT8_C( 101), INT8_C( 88), INT8_C( 0), INT8_C( 3), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 7), INT8_C( 3), INT8_C( 7) }, { INT8_C( 7), INT8_C( 6), INT8_C( 3), INT8_C( 5), INT8_C( 4), INT8_C( 7), INT8_C( 0), INT8_C( 0), INT8_C( 7), INT8_C( 5), INT8_C( 7), INT8_C( 7), INT8_C( 7), INT8_C( 4), INT8_C( 5), INT8_C( 4) } }, { { -INT8_C( 104), -INT8_C( 17), -INT8_C( 28), -INT8_C( 11), -INT8_C( 14), INT8_C( 11), INT8_C( 0), -INT8_C( 18), -INT8_C( 93), -INT8_C( 15), INT8_C( 0), -INT8_C( 2), -INT8_C( 9), INT8_C( 12), -INT8_C( 13), INT8_C( 15) }, { INT8_C( 0), INT8_C( 2), INT8_C( 2), INT8_C( 3), INT8_C( 3), INT8_C( 3), INT8_C( 7), INT8_C( 2), INT8_C( 0), INT8_C( 3), INT8_C( 7), INT8_C( 6), INT8_C( 3), INT8_C( 3), INT8_C( 3), INT8_C( 3) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t r = simde_vclsq_s8(a); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); a = simde_vshlq_s8(a, simde_vnegq_s8(simde_vandq_s8(simde_test_arm_neon_random_i8x16(), simde_vdupq_n_s8(7)))); if (i == 0) { a = simde_vsetq_lane_s8( INT8_C(0), a, 0); a = simde_vsetq_lane_s8(-INT8_C(1), a, 1); } simde_int8x16_t r = simde_vclsq_s8(a); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclsq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t r[8]; } test_vec[] = { { { INT16_C( 0), -INT16_C( 1), INT16_C( 0), -INT16_C( 11), INT16_C( 1), -INT16_C( 8), INT16_C( 69), INT16_C( 0) }, { INT16_C( 15), INT16_C( 15), INT16_C( 15), INT16_C( 11), INT16_C( 14), INT16_C( 12), INT16_C( 8), INT16_C( 15) } }, { { INT16_C( 25), INT16_C( 39), INT16_C( 412), INT16_C( 550), INT16_C( 0), INT16_C( 1971), -INT16_C( 82), INT16_C( 530) }, { INT16_C( 10), INT16_C( 9), INT16_C( 6), INT16_C( 5), INT16_C( 15), INT16_C( 4), INT16_C( 8), INT16_C( 5) } }, { { -INT16_C( 7), -INT16_C( 3665), -INT16_C( 4), INT16_C( 1), -INT16_C( 20), -INT16_C( 2), -INT16_C( 1), -INT16_C( 43) }, { INT16_C( 12), INT16_C( 3), INT16_C( 13), INT16_C( 14), INT16_C( 10), INT16_C( 14), INT16_C( 15), INT16_C( 9) } }, { { -INT16_C( 6), INT16_C( 109), -INT16_C( 7304), INT16_C( 1198), INT16_C( 134), INT16_C( 13), INT16_C( 0), INT16_C( 11) }, { INT16_C( 12), INT16_C( 8), INT16_C( 2), INT16_C( 4), INT16_C( 7), INT16_C( 11), INT16_C( 15), INT16_C( 11) } }, { { INT16_C( 28), -INT16_C( 14), INT16_C( 27), INT16_C( 271), -INT16_C( 5), INT16_C( 257), -INT16_C( 3548), -INT16_C( 59) }, { INT16_C( 10), INT16_C( 11), INT16_C( 10), INT16_C( 6), INT16_C( 12), INT16_C( 6), INT16_C( 3), INT16_C( 9) } }, { { INT16_C( 0), -INT16_C( 364), -INT16_C( 1), INT16_C( 15), INT16_C( 12), -INT16_C( 651), -INT16_C( 1), INT16_C( 0) }, { INT16_C( 15), INT16_C( 6), INT16_C( 15), INT16_C( 11), INT16_C( 11), INT16_C( 5), INT16_C( 15), INT16_C( 15) } }, { { INT16_C( 0), INT16_C( 4449), INT16_C( 28), -INT16_C( 5416), -INT16_C( 8), INT16_C( 2848), INT16_C( 9525), INT16_C( 6) }, { INT16_C( 15), INT16_C( 2), INT16_C( 10), INT16_C( 2), INT16_C( 12), INT16_C( 3), INT16_C( 1), INT16_C( 12) } }, { { -INT16_C( 793), INT16_C( 107), INT16_C( 776), INT16_C( 7301), -INT16_C( 26590), -INT16_C( 15), -INT16_C( 804), INT16_C( 19) }, { INT16_C( 5), INT16_C( 8), INT16_C( 5), INT16_C( 2), INT16_C( 0), INT16_C( 11), INT16_C( 5), INT16_C( 10) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t r = simde_vclsq_s16(a); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); a = simde_vshlq_s16(a, simde_vnegq_s16(simde_vandq_s16(simde_test_arm_neon_random_i16x8(), simde_vdupq_n_s16(15)))); if (i == 0) { a = simde_vsetq_lane_s16( INT16_C(0), a, 0); a = simde_vsetq_lane_s16(-INT16_C(1), a, 1); } simde_int16x8_t r = simde_vclsq_s16(a); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclsq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t r[4]; } test_vec[] = { { { INT32_C( 0), -INT32_C( 1), -INT32_C( 24), INT32_C( 1190162929) }, { INT32_C( 31), INT32_C( 31), INT32_C( 26), INT32_C( 0) } }, { { -INT32_C( 32305), -INT32_C( 7), INT32_C( 0), -INT32_C( 23) }, { INT32_C( 16), INT32_C( 28), INT32_C( 31), INT32_C( 26) } }, { { INT32_C( 11961030), -INT32_C( 12), INT32_C( 16145), INT32_C( 501969) }, { INT32_C( 7), INT32_C( 27), INT32_C( 17), INT32_C( 12) } }, { { -INT32_C( 241), INT32_C( 2208371), INT32_C( 2033), -INT32_C( 472613) }, { INT32_C( 23), INT32_C( 9), INT32_C( 20), INT32_C( 12) } }, { { -INT32_C( 16), INT32_C( 367204), -INT32_C( 8202558), -INT32_C( 9760908) }, { INT32_C( 27), INT32_C( 12), INT32_C( 8), INT32_C( 7) } }, { { INT32_C( 461), -INT32_C( 129), -INT32_C( 1296), -INT32_C( 24948) }, { INT32_C( 22), INT32_C( 23), INT32_C( 20), INT32_C( 16) } }, { { -INT32_C( 1), INT32_C( 55140), INT32_C( 58632), -INT32_C( 466226226) }, { INT32_C( 31), INT32_C( 15), INT32_C( 15), INT32_C( 2) } }, { { -INT32_C( 1373374), INT32_C( 542555820), INT32_C( 1355), -INT32_C( 11555) }, { INT32_C( 10), INT32_C( 1), INT32_C( 20), INT32_C( 17) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t r = simde_vclsq_s32(a); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); a = simde_vshlq_s32(a, simde_vnegq_s32(simde_vandq_s32(simde_test_arm_neon_random_i32x4(), simde_vdupq_n_s32(31)))); if (i == 0) { a = simde_vsetq_lane_s32( INT32_C(0), a, 0); a = simde_vsetq_lane_s32(-INT32_C(1), a, 1); } simde_int32x4_t r = simde_vclsq_s32(a); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #if !defined(SIMDE_NATIVE_ALIASES_TESTING) static int test_simde_vclsq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; int8_t r[16]; } test_vec[] = { { { UINT8_C( 0), UINT8_MAX, UINT8_C( 7), UINT8_C( 18), UINT8_C( 91), UINT8_C( 73), UINT8_C( 44), UINT8_C( 10), UINT8_C(226), UINT8_C( 13), UINT8_C( 52), UINT8_C( 18), UINT8_C( 1), UINT8_C( 1), UINT8_C( 59), UINT8_C( 6) }, { INT8_C( 7), INT8_C( 7), INT8_C( 4), INT8_C( 2), INT8_C( 0), INT8_C( 0), INT8_C( 1), INT8_C( 3), INT8_C( 2), INT8_C( 3), INT8_C( 1), INT8_C( 2), INT8_C( 6), INT8_C( 6), INT8_C( 1), INT8_C( 4) } }, { { UINT8_C( 14), UINT8_C( 79), UINT8_C( 18), UINT8_C( 4), UINT8_C(115), UINT8_C( 2), UINT8_C( 35), UINT8_C(100), UINT8_C( 2), UINT8_C( 3), UINT8_C( 2), UINT8_C( 37), UINT8_C( 20), UINT8_C( 0), UINT8_C( 3), UINT8_C( 28) }, { INT8_C( 3), INT8_C( 0), INT8_C( 2), INT8_C( 4), INT8_C( 0), INT8_C( 5), INT8_C( 1), INT8_C( 0), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 1), INT8_C( 2), INT8_C( 7), INT8_C( 5), INT8_C( 2) } }, { { UINT8_C(102), UINT8_C( 4), UINT8_C( 21), UINT8_C( 44), UINT8_C( 56), UINT8_C( 15), UINT8_C( 7), UINT8_C( 55), UINT8_C( 11), UINT8_C( 1), UINT8_C( 51), UINT8_C( 22), UINT8_C( 4), UINT8_C(100), UINT8_C( 2), UINT8_C( 63) }, { INT8_C( 0), INT8_C( 4), INT8_C( 2), INT8_C( 1), INT8_C( 1), INT8_C( 3), INT8_C( 4), INT8_C( 1), INT8_C( 3), INT8_C( 6), INT8_C( 1), INT8_C( 2), INT8_C( 4), INT8_C( 0), INT8_C( 5), INT8_C( 1) } }, { { UINT8_C( 1), UINT8_C( 6), UINT8_C( 0), UINT8_C( 94), UINT8_C( 89), UINT8_C( 16), UINT8_C(150), UINT8_C( 95), UINT8_C( 2), UINT8_C( 12), UINT8_C( 6), UINT8_C(164), UINT8_C( 2), UINT8_C( 0), UINT8_C( 8), UINT8_C( 59) }, { INT8_C( 6), INT8_C( 4), INT8_C( 7), INT8_C( 0), INT8_C( 0), INT8_C( 2), INT8_C( 0), INT8_C( 0), INT8_C( 5), INT8_C( 3), INT8_C( 4), INT8_C( 0), INT8_C( 5), INT8_C( 7), INT8_C( 3), INT8_C( 1) } }, { { UINT8_C( 0), UINT8_C( 57), UINT8_C( 3), UINT8_C( 1), UINT8_C( 3), UINT8_C( 3), UINT8_C( 75), UINT8_C(123), UINT8_C( 45), UINT8_C( 0), UINT8_C( 15), UINT8_C( 4), UINT8_C( 7), UINT8_C( 8), UINT8_C( 0), UINT8_C( 5) }, { INT8_C( 7), INT8_C( 1), INT8_C( 5), INT8_C( 6), INT8_C( 5), INT8_C( 5), INT8_C( 0), INT8_C( 0), INT8_C( 1), INT8_C( 7), INT8_C( 3), INT8_C( 4), INT8_C( 4), INT8_C( 3), INT8_C( 7), INT8_C( 4) } }, { { UINT8_C( 13), UINT8_C( 21), UINT8_C( 5), UINT8_C(105), UINT8_C( 79), UINT8_C( 12), UINT8_C( 39), UINT8_C( 62), UINT8_C( 50), UINT8_C( 27), UINT8_C( 0), UINT8_C( 52), UINT8_C( 5), UINT8_C( 3), UINT8_C( 1), UINT8_C( 1) }, { INT8_C( 3), INT8_C( 2), INT8_C( 4), INT8_C( 0), INT8_C( 0), INT8_C( 3), INT8_C( 1), INT8_C( 1), INT8_C( 1), INT8_C( 2), INT8_C( 7), INT8_C( 1), INT8_C( 4), INT8_C( 5), INT8_C( 6), INT8_C( 6) } }, { { UINT8_C( 5), UINT8_C( 26), UINT8_C( 5), UINT8_C( 22), UINT8_C( 1), UINT8_C( 0), UINT8_C( 5), UINT8_C( 48), UINT8_C( 58), UINT8_C( 2), UINT8_C( 3), UINT8_C( 0), UINT8_C( 4), UINT8_C( 22), UINT8_C( 2), UINT8_C( 25) }, { INT8_C( 4), INT8_C( 2), INT8_C( 4), INT8_C( 2), INT8_C( 6), INT8_C( 7), INT8_C( 4), INT8_C( 1), INT8_C( 1), INT8_C( 5), INT8_C( 5), INT8_C( 7), INT8_C( 4), INT8_C( 2), INT8_C( 5), INT8_C( 2) } }, { { UINT8_C( 13), UINT8_C( 16), UINT8_C( 12), UINT8_C( 6), UINT8_C( 0), UINT8_C( 3), UINT8_C( 6), UINT8_C(158), UINT8_C( 30), UINT8_C(125), UINT8_C( 1), UINT8_C( 0), UINT8_C( 5), UINT8_C( 3), UINT8_C(124), UINT8_C( 3) }, { INT8_C( 3), INT8_C( 2), INT8_C( 3), INT8_C( 4), INT8_C( 7), INT8_C( 5), INT8_C( 4), INT8_C( 0), INT8_C( 2), INT8_C( 0), INT8_C( 6), INT8_C( 7), INT8_C( 4), INT8_C( 5), INT8_C( 0), INT8_C( 5) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_int8x16_t r = simde_vclsq_u8(a); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); a = simde_vshlq_u8(a, simde_vnegq_s8(simde_vandq_s8(simde_test_arm_neon_random_i8x16(), simde_vdupq_n_s8(7)))); if (i == 0) { a = simde_vsetq_lane_u8(UINT8_C(0), a, 0); a = simde_vsetq_lane_u8( UINT8_MAX, a, 1); } simde_int8x16_t r = simde_vclsq_u8(a); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclsq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; int16_t r[8]; } test_vec[] = { { { UINT16_C( 0), UINT16_MAX, UINT16_C( 3478), UINT16_C( 2), UINT16_C(48946), UINT16_C( 7341), UINT16_C(29212), UINT16_C( 15) }, { INT16_C( 15), INT16_C( 15), INT16_C( 3), INT16_C( 13), INT16_C( 0), INT16_C( 2), INT16_C( 0), INT16_C( 11) } }, { { UINT16_C(14806), UINT16_C( 11), UINT16_C( 3), UINT16_C(10114), UINT16_C( 438), UINT16_C( 5958), UINT16_C( 0), UINT16_C( 0) }, { INT16_C( 1), INT16_C( 11), INT16_C( 13), INT16_C( 1), INT16_C( 6), INT16_C( 2), INT16_C( 15), INT16_C( 15) } }, { { UINT16_C(16922), UINT16_C( 5), UINT16_C( 57), UINT16_C( 3), UINT16_C(13716), UINT16_C( 164), UINT16_C( 0), UINT16_C( 5) }, { INT16_C( 0), INT16_C( 12), INT16_C( 9), INT16_C( 13), INT16_C( 1), INT16_C( 7), INT16_C( 15), INT16_C( 12) } }, { { UINT16_C( 23), UINT16_C( 89), UINT16_C( 2792), UINT16_C( 2343), UINT16_C( 127), UINT16_C( 260), UINT16_C( 935), UINT16_C( 0) }, { INT16_C( 10), INT16_C( 8), INT16_C( 3), INT16_C( 3), INT16_C( 8), INT16_C( 6), INT16_C( 5), INT16_C( 15) } }, { { UINT16_C( 60), UINT16_C( 2113), UINT16_C( 3), UINT16_C( 3860), UINT16_C(13951), UINT16_C(29946), UINT16_C( 3), UINT16_C( 474) }, { INT16_C( 9), INT16_C( 3), INT16_C( 13), INT16_C( 3), INT16_C( 1), INT16_C( 0), INT16_C( 13), INT16_C( 6) } }, { { UINT16_C( 1), UINT16_C( 62), UINT16_C( 0), UINT16_C( 1979), UINT16_C( 109), UINT16_C( 14), UINT16_C( 7464), UINT16_C( 3008) }, { INT16_C( 14), INT16_C( 9), INT16_C( 15), INT16_C( 4), INT16_C( 8), INT16_C( 11), INT16_C( 2), INT16_C( 3) } }, { { UINT16_C( 72), UINT16_C( 2), UINT16_C( 135), UINT16_C( 25), UINT16_C( 248), UINT16_C( 1679), UINT16_C( 3838), UINT16_C( 0) }, { INT16_C( 8), INT16_C( 13), INT16_C( 7), INT16_C( 10), INT16_C( 7), INT16_C( 4), INT16_C( 3), INT16_C( 15) } }, { { UINT16_C( 2263), UINT16_C( 0), UINT16_C( 11), UINT16_C( 31), UINT16_C( 3), UINT16_C(35599), UINT16_C( 313), UINT16_C( 4514) }, { INT16_C( 3), INT16_C( 15), INT16_C( 11), INT16_C( 10), INT16_C( 13), INT16_C( 0), INT16_C( 6), INT16_C( 2) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_int16x8_t r = simde_vclsq_u16(a); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); a = simde_vshlq_u16(a, simde_vnegq_s16(simde_vandq_s16(simde_test_arm_neon_random_i16x8(), simde_vdupq_n_s16(15)))); if (i == 0) { a = simde_vsetq_lane_u16(UINT16_C(0), a, 0); a = simde_vsetq_lane_u16( UINT16_MAX, a, 1); } simde_int16x8_t r = simde_vclsq_u16(a); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclsq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; int32_t r[4]; } test_vec[] = { { { UINT32_C( 0), UINT32_MAX, UINT32_C( 1556457), UINT32_C( 848) }, { INT32_C( 31), INT32_C( 31), INT32_C( 10), INT32_C( 21) } }, { { UINT32_C( 2905984), UINT32_C( 449098526), UINT32_C( 102184), UINT32_C( 7) }, { INT32_C( 9), INT32_C( 2), INT32_C( 14), INT32_C( 28) } }, { { UINT32_C( 0), UINT32_C( 98276), UINT32_C( 2), UINT32_C( 176368497) }, { INT32_C( 31), INT32_C( 14), INT32_C( 29), INT32_C( 3) } }, { { UINT32_C( 1941758), UINT32_C( 314), UINT32_C( 1267), UINT32_C( 74738014) }, { INT32_C( 10), INT32_C( 22), INT32_C( 20), INT32_C( 4) } }, { { UINT32_C( 21888), UINT32_C( 68176), UINT32_C( 84), UINT32_C( 571158) }, { INT32_C( 16), INT32_C( 14), INT32_C( 24), INT32_C( 11) } }, { { UINT32_C( 109194), UINT32_C( 97702763), UINT32_C( 203919), UINT32_C( 224015726) }, { INT32_C( 14), INT32_C( 4), INT32_C( 13), INT32_C( 3) } }, { { UINT32_C( 960), UINT32_C(1498788316), UINT32_C( 807), UINT32_C( 212) }, { INT32_C( 21), INT32_C( 0), INT32_C( 21), INT32_C( 23) } }, { { UINT32_C( 52), UINT32_C( 31), UINT32_C( 508), UINT32_C( 1422) }, { INT32_C( 25), INT32_C( 26), INT32_C( 22), INT32_C( 20) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_int32x4_t r = simde_vclsq_u32(a); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); a = simde_vshlq_u32(a, simde_vnegq_s32(simde_vandq_s32(simde_test_arm_neon_random_i32x4(), simde_vdupq_n_s32(31)))); if (i == 0) { a = simde_vsetq_lane_u32(UINT32_C(0), a, 0); a = simde_vsetq_lane_u32( UINT32_MAX, a, 1); } simde_int32x4_t r = simde_vclsq_u32(a); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #endif SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vcls_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vcls_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vcls_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vclsq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vclsq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vclsq_s32) #if !defined(SIMDE_NATIVE_ALIASES_TESTING) SIMDE_TEST_FUNC_LIST_ENTRY(vcls_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vcls_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vcls_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vclsq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vclsq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vclsq_u32) #endif SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/clt.c000066400000000000000000002273161400333146700162600ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN clt #include "test-neon.h" /* Check that both of these work */ #if defined(__cplusplus) #include "../../../simde/arm/neon/clt.h" #else #include "../../../simde/arm/neon.h" #endif static int test_simde_vclt_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; simde_float32 b[2]; uint32_t r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( -878.13), SIMDE_FLOAT32_C( 253.04) }, { SIMDE_FLOAT32_C( -138.52), SIMDE_FLOAT32_C( 520.27) }, { UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 216.41), SIMDE_FLOAT32_C( -277.45) }, { SIMDE_FLOAT32_C( 216.41), SIMDE_FLOAT32_C( -277.45) }, { UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -520.65), SIMDE_FLOAT32_C( 69.98) }, { SIMDE_FLOAT32_C( -520.65), SIMDE_FLOAT32_C( -666.76) }, { UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -131.97), SIMDE_FLOAT32_C( -253.40) }, { SIMDE_FLOAT32_C( -131.97), SIMDE_FLOAT32_C( -253.40) }, { UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -732.59), SIMDE_FLOAT32_C( 16.82) }, { SIMDE_FLOAT32_C( -576.18), SIMDE_FLOAT32_C( -841.89) }, { UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 59.37), SIMDE_FLOAT32_C( -991.99) }, { SIMDE_FLOAT32_C( 59.37), SIMDE_FLOAT32_C( -991.99) }, { UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -436.42), SIMDE_FLOAT32_C( 163.54) }, { SIMDE_FLOAT32_C( -436.42), SIMDE_FLOAT32_C( 163.54) }, { UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 442.39), SIMDE_FLOAT32_C( 921.75) }, { SIMDE_FLOAT32_C( 696.88), SIMDE_FLOAT32_C( 921.75) }, { UINT32_MAX, UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_uint32x2_t r = simde_vclt_f32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x2_private a_ = simde_float32x2_to_private(simde_test_arm_neon_random_f32x2(-1000.0, 1000.0)); simde_float32x2_private b_ = simde_float32x2_to_private(simde_test_arm_neon_random_f32x2(-1000.0, 1000.0)); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_float32x2_t a = simde_float32x2_from_private(a_); simde_float32x2_t b = simde_float32x2_from_private(b_); simde_uint32x2_t r = simde_vclt_f32(a, b); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclt_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[1]; simde_float64 b[1]; uint64_t r[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( 904.65) }, { SIMDE_FLOAT64_C( -973.46) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -826.64) }, { SIMDE_FLOAT64_C( 314.00) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( 685.96) }, { SIMDE_FLOAT64_C( 685.96) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -419.20) }, { SIMDE_FLOAT64_C( 659.33) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( 424.09) }, { SIMDE_FLOAT64_C( -15.82) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 280.79) }, { SIMDE_FLOAT64_C( -594.35) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -595.16) }, { SIMDE_FLOAT64_C( 567.93) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( -45.66) }, { SIMDE_FLOAT64_C( -45.66) }, { UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_t b = simde_vld1_f64(test_vec[i].b); simde_uint64x1_t r = simde_vclt_f64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x1_private a_ = simde_float64x1_to_private(simde_test_arm_neon_random_f64x1(-1000.0, 1000.0)); simde_float64x1_private b_ = simde_float64x1_to_private(simde_test_arm_neon_random_f64x1(-1000.0, 1000.0)); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_float64x1_t a = simde_float64x1_from_private(a_); simde_float64x1_t b = simde_float64x1_from_private(b_); simde_uint64x1_t r = simde_vclt_f64(a, b); simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclt_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t b[8]; uint8_t r[8]; } test_vec[] = { { { -INT8_C( 75), INT8_C( 17), -INT8_C( 55), -INT8_C( 95), -INT8_C( 17), -INT8_C( 75), -INT8_C( 40), -INT8_C( 6) }, { -INT8_C( 105), -INT8_C( 44), INT8_C( 60), -INT8_C( 63), INT8_C( 11), -INT8_C( 25), -INT8_C( 41), INT8_C( 126) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, { { INT8_C( 70), INT8_C( 102), INT8_C( 48), -INT8_C( 1), INT8_C( 28), INT8_C( 37), INT8_C( 32), -INT8_C( 25) }, { INT8_C( 54), INT8_C( 102), -INT8_C( 120), INT8_C( 38), INT8_C( 28), INT8_C( 96), INT8_C( 32), -INT8_C( 77) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { -INT8_C( 125), INT8_C( 42), -INT8_C( 56), INT8_C( 89), -INT8_C( 126), INT8_C( 101), -INT8_C( 6), -INT8_C( 122) }, { -INT8_C( 41), INT8_C( 42), -INT8_C( 56), INT8_C( 9), INT8_C( 79), INT8_C( 101), -INT8_C( 16), -INT8_C( 122) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { INT8_C( 40), INT8_C( 16), INT8_C( 78), INT8_C( 107), INT8_C( 42), INT8_C( 12), -INT8_C( 62), -INT8_C( 32) }, { INT8_C( 119), -INT8_C( 44), INT8_C( 57), -INT8_C( 7), INT8_C( 42), INT8_C( 51), -INT8_C( 62), INT8_C( 2) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, { { INT8_C( 115), -INT8_C( 33), -INT8_C( 92), INT8_C( 76), -INT8_C( 85), INT8_C( 109), INT8_C( 90), -INT8_C( 45) }, { INT8_C( 80), -INT8_C( 88), INT8_C( 62), -INT8_C( 83), -INT8_C( 75), INT8_C( 109), -INT8_C( 115), INT8_C( 44) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { INT8_C( 115), INT8_C( 121), INT8_C( 52), -INT8_C( 63), INT8_C( 4), -INT8_C( 114), INT8_C( 32), -INT8_C( 25) }, { INT8_C( 22), -INT8_C( 60), INT8_C( 52), -INT8_C( 63), INT8_C( 4), -INT8_C( 114), -INT8_C( 108), INT8_C( 85) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { -INT8_C( 43), INT8_C( 61), -INT8_C( 18), INT8_C( 73), -INT8_C( 29), INT8_C( 92), INT8_C( 48), -INT8_C( 104) }, { -INT8_C( 43), -INT8_C( 85), -INT8_C( 4), INT8_C( 73), -INT8_C( 29), INT8_C( 28), INT8_C( 48), -INT8_C( 7) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { INT8_C( 107), INT8_C( 60), INT8_C( 22), INT8_C( 99), INT8_C( 47), INT8_C( 23), INT8_C( 41), INT8_C( 32) }, { INT8_C( 107), -INT8_C( 45), INT8_C( 112), -INT8_C( 111), INT8_C( 47), INT8_C( 23), INT8_C( 41), INT8_C( 4) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_uint8x8_t r = simde_vclt_s8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_private a_ = simde_int8x8_to_private(simde_test_arm_neon_random_i8x8()); simde_int8x8_private b_ = simde_int8x8_to_private(simde_test_arm_neon_random_i8x8()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int8x8_t a = simde_int8x8_from_private(a_); simde_int8x8_t b = simde_int8x8_from_private(b_); simde_uint8x8_t r = simde_vclt_s8(a, b); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclt_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t b[4]; uint16_t r[4]; } test_vec[] = { { { INT16_C( 17736), -INT16_C( 24384), -INT16_C( 8064), -INT16_C( 5581) }, { INT16_C( 11197), INT16_C( 2549), INT16_C( 17328), -INT16_C( 5581) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { INT16_C( 27109), -INT16_C( 3589), -INT16_C( 1655), -INT16_C( 10712) }, { INT16_C( 17454), INT16_C( 30265), -INT16_C( 1655), INT16_C( 2327) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { -INT16_C( 24584), -INT16_C( 22112), -INT16_C( 11038), INT16_C( 7571) }, { -INT16_C( 11601), -INT16_C( 27576), INT16_C( 17212), INT16_C( 18053) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { INT16_C( 11994), INT16_C( 31734), INT16_C( 3407), INT16_C( 10628) }, { INT16_C( 11994), -INT16_C( 11328), INT16_C( 24781), -INT16_C( 20356) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { -INT16_C( 32596), -INT16_C( 25046), -INT16_C( 423), -INT16_C( 21917) }, { -INT16_C( 32596), -INT16_C( 25046), INT16_C( 8406), INT16_C( 9497) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { INT16_C( 3788), -INT16_C( 26150), INT16_C( 22127), -INT16_C( 23735) }, { INT16_C( 5989), INT16_C( 18311), INT16_C( 45), -INT16_C( 31132) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { INT16_C( 23368), INT16_C( 7752), INT16_C( 24955), INT16_C( 3940) }, { -INT16_C( 27906), -INT16_C( 13649), -INT16_C( 30048), INT16_C( 3940) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { -INT16_C( 22087), -INT16_C( 3699), -INT16_C( 3782), INT16_C( 14456) }, { -INT16_C( 22087), INT16_C( 482), INT16_C( 10756), INT16_C( 32543) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_uint16x4_t r = simde_vclt_s16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_private a_ = simde_int16x4_to_private(simde_test_arm_neon_random_i16x4()); simde_int16x4_private b_ = simde_int16x4_to_private(simde_test_arm_neon_random_i16x4()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int16x4_t a = simde_int16x4_from_private(a_); simde_int16x4_t b = simde_int16x4_from_private(b_); simde_uint16x4_t r = simde_vclt_s16(a, b); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclt_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t b[2]; uint32_t r[2]; } test_vec[] = { { { INT32_C( 14356390), INT32_C( 2091570202) }, { -INT32_C( 800985278), INT32_C( 1231471845) }, { UINT32_C( 0), UINT32_C( 0) } }, { { -INT32_C( 1191761604), -INT32_C( 40964062) }, { INT32_C( 2118768955), -INT32_C( 40964062) }, { UINT32_MAX, UINT32_C( 0) } }, { { -INT32_C( 1984411094), INT32_C( 1860396681) }, { -INT32_C( 1984411094), -INT32_C( 1028459317) }, { UINT32_C( 0), UINT32_C( 0) } }, { { -INT32_C( 1856319914), -INT32_C( 921721689) }, { -INT32_C( 2067358316), INT32_C( 1780389702) }, { UINT32_C( 0), UINT32_MAX } }, { { -INT32_C( 99250480), -INT32_C( 1551577640) }, { INT32_C( 1835415233), INT32_C( 876330201) }, { UINT32_MAX, UINT32_MAX } }, { { INT32_C( 1823235288), -INT32_C( 901481547) }, { -INT32_C( 1247146486), -INT32_C( 901481547) }, { UINT32_C( 0), UINT32_C( 0) } }, { { INT32_C( 1505819032), INT32_C( 164046384) }, { INT32_C( 792527586), -INT32_C( 1836378651) }, { UINT32_C( 0), UINT32_C( 0) } }, { { INT32_C( 264597765), -INT32_C( 447047899) }, { INT32_C( 1627326955), -INT32_C( 447047899) }, { UINT32_MAX, UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_uint32x2_t r = simde_vclt_s32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_private a_ = simde_int32x2_to_private(simde_test_arm_neon_random_i32x2()); simde_int32x2_private b_ = simde_int32x2_to_private(simde_test_arm_neon_random_i32x2()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int32x2_t a = simde_int32x2_from_private(a_); simde_int32x2_t b = simde_int32x2_from_private(b_); simde_uint32x2_t r = simde_vclt_s32(a, b); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclt_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[1]; int64_t b[1]; uint64_t r[1]; } test_vec[] = { { { -INT64_C( 5127036871807353823) }, { -INT64_C( 5127036871807353823) }, { UINT64_C( 0) } }, { { -INT64_C( 2830046651551003050) }, { INT64_C( 1306622463292470730) }, { UINT64_MAX } }, { { INT64_C( 1880994240672666602) }, { INT64_C( 1880994240672666602) }, { UINT64_C( 0) } }, { { INT64_C( 2101844766198716) }, { INT64_C( 2101844766198716) }, { UINT64_C( 0) } }, { { -INT64_C( 3213452295226323034) }, { -INT64_C( 3213452295226323034) }, { UINT64_C( 0) } }, { { -INT64_C( 7302683616743827769) }, { INT64_C( 7731053581688718469) }, { UINT64_MAX } }, { { INT64_C( 7803816816053276538) }, { -INT64_C( 6942222641965927046) }, { UINT64_C( 0) } }, { { -INT64_C( 5643674020643342615) }, { -INT64_C( 5643674020643342615) }, { UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_uint64x1_t r = simde_vclt_s64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_private a_ = simde_int64x1_to_private(simde_test_arm_neon_random_i64x1()); simde_int64x1_private b_ = simde_int64x1_to_private(simde_test_arm_neon_random_i64x1()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int64x1_t a = simde_int64x1_from_private(a_); simde_int64x1_t b = simde_int64x1_from_private(b_); simde_uint64x1_t r = simde_vclt_s64(a, b); simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclt_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint8_t b[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C(150), UINT8_C(209), UINT8_C(111), UINT8_C(161), UINT8_C(192), UINT8_C( 67), UINT8_C(170), UINT8_C(177) }, { UINT8_C(137), UINT8_C(209), UINT8_C(233), UINT8_C(129), UINT8_C( 30), UINT8_C(216), UINT8_C( 20), UINT8_C(179) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, { { UINT8_C(220), UINT8_C( 94), UINT8_C( 34), UINT8_C(120), UINT8_C(106), UINT8_C( 75), UINT8_C(238), UINT8_C( 0) }, { UINT8_C(192), UINT8_C( 94), UINT8_C(161), UINT8_C(129), UINT8_C(161), UINT8_C( 75), UINT8_C( 50), UINT8_C( 43) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { UINT8_C( 37), UINT8_C( 19), UINT8_C(197), UINT8_C( 59), UINT8_C(139), UINT8_C(245), UINT8_C(241), UINT8_C(177) }, { UINT8_C( 16), UINT8_C( 19), UINT8_C( 41), UINT8_C(122), UINT8_C(139), UINT8_C( 24), UINT8_C(123), UINT8_C( 76) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(234), UINT8_C(238), UINT8_C(191), UINT8_C( 13), UINT8_C(174), UINT8_C(174), UINT8_C(112), UINT8_C(211) }, { UINT8_C(234), UINT8_C(189), UINT8_C( 15), UINT8_C(191), UINT8_C(178), UINT8_C( 0), UINT8_C(112), UINT8_C(194) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(212), UINT8_C(128), UINT8_C( 63), UINT8_C( 60), UINT8_C(182), UINT8_C(214), UINT8_C(242), UINT8_C(208) }, { UINT8_C(112), UINT8_C(128), UINT8_C(222), UINT8_C( 31), UINT8_C( 46), UINT8_C(214), UINT8_C(242), UINT8_C( 24) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 85), UINT8_C( 67), UINT8_C(179), UINT8_C(148), UINT8_C(251), UINT8_C(157), UINT8_C(188), UINT8_C(238) }, { UINT8_C( 85), UINT8_C(252), UINT8_C( 11), UINT8_C( 11), UINT8_C(126), UINT8_C(203), UINT8_C(220), UINT8_C(238) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C( 1), UINT8_C(106), UINT8_C(104), UINT8_C( 3), UINT8_C(178), UINT8_C(230), UINT8_C(124), UINT8_C(149) }, { UINT8_C(178), UINT8_C(202), UINT8_C( 41), UINT8_C(173), UINT8_C(104), UINT8_C(230), UINT8_C(124), UINT8_C(189) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { UINT8_C( 94), UINT8_C(121), UINT8_C( 24), UINT8_C(238), UINT8_C(232), UINT8_C(170), UINT8_C( 91), UINT8_C( 93) }, { UINT8_C( 20), UINT8_C(121), UINT8_C( 96), UINT8_C(198), UINT8_C(232), UINT8_C(120), UINT8_C( 91), UINT8_C(155) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vclt_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_private a_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); simde_uint8x8_private b_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint8x8_t a = simde_uint8x8_from_private(a_); simde_uint8x8_t b = simde_uint8x8_from_private(b_); simde_uint8x8_t r = simde_vclt_u8(a, b); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclt_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(32822), UINT16_C(20124), UINT16_C(15649), UINT16_C(53596) }, { UINT16_C(33867), UINT16_C(46145), UINT16_C(15649), UINT16_C(53596) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(29074), UINT16_C(23284), UINT16_C( 3098), UINT16_C(26002) }, { UINT16_C(30500), UINT16_C(23284), UINT16_C(37367), UINT16_C(44200) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { UINT16_C(62210), UINT16_C(63235), UINT16_C(13360), UINT16_C(65013) }, { UINT16_C(10596), UINT16_C(63235), UINT16_C( 1435), UINT16_C(46573) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(19383), UINT16_C(47515), UINT16_C(14496), UINT16_C(28637) }, { UINT16_C(19383), UINT16_C(47515), UINT16_C(29503), UINT16_C(28637) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { UINT16_C(28668), UINT16_C(26043), UINT16_C(61556), UINT16_C(34381) }, { UINT16_C(26479), UINT16_C(26043), UINT16_C(19318), UINT16_C( 5714) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(10807), UINT16_C(30452), UINT16_C(53917), UINT16_C(17894) }, { UINT16_C(21157), UINT16_C(41298), UINT16_C(21954), UINT16_C(13881) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(30957), UINT16_C(25627), UINT16_C(30515), UINT16_C(18298) }, { UINT16_C( 2393), UINT16_C(37250), UINT16_C(30515), UINT16_C(53255) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { UINT16_C(26688), UINT16_C( 655), UINT16_C(54381), UINT16_C( 824) }, { UINT16_C(62798), UINT16_C(15545), UINT16_C(54381), UINT16_C(12448) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vclt_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_private a_ = simde_uint16x4_to_private(simde_test_arm_neon_random_u16x4()); simde_uint16x4_private b_ = simde_uint16x4_to_private(simde_test_arm_neon_random_u16x4()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint16x4_t a = simde_uint16x4_from_private(a_); simde_uint16x4_t b = simde_uint16x4_from_private(b_); simde_uint16x4_t r = simde_vclt_u16(a, b); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclt_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C(1006538243), UINT32_C(3270930982) }, { UINT32_C(1006538243), UINT32_C(3849160586) }, { UINT32_C( 0), UINT32_MAX } }, { { UINT32_C(1689880060), UINT32_C(2592713952) }, { UINT32_C(1689880060), UINT32_C(2005775138) }, { UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C( 896308987), UINT32_C(2267575548) }, { UINT32_C( 896308987), UINT32_C( 425907012) }, { UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(3252436165), UINT32_C(3677308005) }, { UINT32_C(3243946568), UINT32_C(3677308005) }, { UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(2773410290), UINT32_C( 897523696) }, { UINT32_C(2773410290), UINT32_C(4292940579) }, { UINT32_C( 0), UINT32_MAX } }, { { UINT32_C(3716545173), UINT32_C(2914073850) }, { UINT32_C(1458425380), UINT32_C(2914073850) }, { UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(3201697484), UINT32_C( 191112424) }, { UINT32_C( 218842424), UINT32_C(1829216488) }, { UINT32_C( 0), UINT32_MAX } }, { { UINT32_C(2022799857), UINT32_C(4051590427) }, { UINT32_C(2022799857), UINT32_C(4051590427) }, { UINT32_C( 0), UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vclt_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_private a_ = simde_uint32x2_to_private(simde_test_arm_neon_random_u32x2()); simde_uint32x2_private b_ = simde_uint32x2_to_private(simde_test_arm_neon_random_u32x2()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint32x2_t a = simde_uint32x2_from_private(a_); simde_uint32x2_t b = simde_uint32x2_from_private(b_); simde_uint32x2_t r = simde_vclt_u32(a, b); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclt_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[1]; uint64_t b[1]; uint64_t r[1]; } test_vec[] = { { { UINT64_C(10070407729176073235) }, { UINT64_C( 7732343939302363799) }, { UINT64_C( 0) } }, { { UINT64_C(12628703139761999735) }, { UINT64_C(12329520246920030529) }, { UINT64_C( 0) } }, { { UINT64_C(17536455851440219275) }, { UINT64_C( 8151027840960181848) }, { UINT64_C( 0) } }, { { UINT64_C( 6959487189882112425) }, { UINT64_C(13600525177902459928) }, { UINT64_MAX } }, { { UINT64_C( 2278690975057472266) }, { UINT64_C(13616360460761907503) }, { UINT64_MAX } }, { { UINT64_C( 2520162804715748145) }, { UINT64_C( 2520162804715748145) }, { UINT64_C( 0) } }, { { UINT64_C( 235123552249151842) }, { UINT64_C( 2844834271792461472) }, { UINT64_MAX } }, { { UINT64_C(12488752737624128753) }, { UINT64_C(12268281653311768549) }, { UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_t b = simde_vld1_u64(test_vec[i].b); simde_uint64x1_t r = simde_vclt_u64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x1_private a_ = simde_uint64x1_to_private(simde_test_arm_neon_random_u64x1()); simde_uint64x1_private b_ = simde_uint64x1_to_private(simde_test_arm_neon_random_u64x1()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint64x1_t a = simde_uint64x1_from_private(a_); simde_uint64x1_t b = simde_uint64x1_from_private(b_); simde_uint64x1_t r = simde_vclt_u64(a, b); simde_test_arm_neon_write_u64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcltq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; simde_float32 b[4]; uint32_t r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 371.77), SIMDE_FLOAT32_C( -175.93), SIMDE_FLOAT32_C( 59.24), SIMDE_FLOAT32_C( 306.18) }, { SIMDE_FLOAT32_C( 371.77), SIMDE_FLOAT32_C( 169.92), SIMDE_FLOAT32_C( 324.63), SIMDE_FLOAT32_C( -35.16) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -350.88), SIMDE_FLOAT32_C( -974.36), SIMDE_FLOAT32_C( 869.28), SIMDE_FLOAT32_C( 571.35) }, { SIMDE_FLOAT32_C( -726.73), SIMDE_FLOAT32_C( -430.55), SIMDE_FLOAT32_C( 869.28), SIMDE_FLOAT32_C( -89.77) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 348.81), SIMDE_FLOAT32_C( -407.57), SIMDE_FLOAT32_C( 461.12), SIMDE_FLOAT32_C( -973.57) }, { SIMDE_FLOAT32_C( -318.44), SIMDE_FLOAT32_C( -380.83), SIMDE_FLOAT32_C( -918.31), SIMDE_FLOAT32_C( -593.83) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( -689.15), SIMDE_FLOAT32_C( 36.98), SIMDE_FLOAT32_C( 779.85), SIMDE_FLOAT32_C( -534.89) }, { SIMDE_FLOAT32_C( -173.57), SIMDE_FLOAT32_C( 1.89), SIMDE_FLOAT32_C( 942.20), SIMDE_FLOAT32_C( 475.55) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 705.44), SIMDE_FLOAT32_C( -83.83), SIMDE_FLOAT32_C( 878.57), SIMDE_FLOAT32_C( -104.10) }, { SIMDE_FLOAT32_C( -306.85), SIMDE_FLOAT32_C( -435.11), SIMDE_FLOAT32_C( 878.57), SIMDE_FLOAT32_C( -958.04) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 958.86), SIMDE_FLOAT32_C( -849.92), SIMDE_FLOAT32_C( -782.88), SIMDE_FLOAT32_C( 402.11) }, { SIMDE_FLOAT32_C( 291.01), SIMDE_FLOAT32_C( 957.42), SIMDE_FLOAT32_C( -782.88), SIMDE_FLOAT32_C( 601.87) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( -1.14), SIMDE_FLOAT32_C( -990.83), SIMDE_FLOAT32_C( 296.38), SIMDE_FLOAT32_C( -973.62) }, { SIMDE_FLOAT32_C( 145.16), SIMDE_FLOAT32_C( -656.73), SIMDE_FLOAT32_C( 327.18), SIMDE_FLOAT32_C( -149.41) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( -35.24), SIMDE_FLOAT32_C( 625.06), SIMDE_FLOAT32_C( 99.32), SIMDE_FLOAT32_C( 260.43) }, { SIMDE_FLOAT32_C( -35.24), SIMDE_FLOAT32_C( -937.06), SIMDE_FLOAT32_C( 99.32), SIMDE_FLOAT32_C( -76.38) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_uint32x4_t r = simde_vcltq_f32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_private a_ = simde_float32x4_to_private(simde_test_arm_neon_random_f32x4(-1000.0, 1000.0)); simde_float32x4_private b_ = simde_float32x4_to_private(simde_test_arm_neon_random_f32x4(-1000.0, 1000.0)); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_float32x4_t a = simde_float32x4_from_private(a_); simde_float32x4_t b = simde_float32x4_from_private(b_); simde_uint32x4_t r = simde_vcltq_f32(a, b); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcltq_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[2]; simde_float64 b[2]; uint64_t r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -541.42), SIMDE_FLOAT64_C( 483.82) }, { SIMDE_FLOAT64_C( 242.04), SIMDE_FLOAT64_C( -298.53) }, { UINT64_MAX, UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 586.91), SIMDE_FLOAT64_C( -140.81) }, { SIMDE_FLOAT64_C( -913.70), SIMDE_FLOAT64_C( -140.81) }, { UINT64_C( 0), UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 257.61), SIMDE_FLOAT64_C( 978.00) }, { SIMDE_FLOAT64_C( 491.76), SIMDE_FLOAT64_C( -666.72) }, { UINT64_MAX, UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 302.40), SIMDE_FLOAT64_C( -658.81) }, { SIMDE_FLOAT64_C( -712.21), SIMDE_FLOAT64_C( -658.81) }, { UINT64_C( 0), UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 285.48), SIMDE_FLOAT64_C( 825.49) }, { SIMDE_FLOAT64_C( -902.92), SIMDE_FLOAT64_C( 825.49) }, { UINT64_C( 0), UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -855.34), SIMDE_FLOAT64_C( 506.60) }, { SIMDE_FLOAT64_C( -855.34), SIMDE_FLOAT64_C( -668.79) }, { UINT64_C( 0), UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 494.95), SIMDE_FLOAT64_C( 794.97) }, { SIMDE_FLOAT64_C( 494.95), SIMDE_FLOAT64_C( -961.51) }, { UINT64_C( 0), UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 17.44), SIMDE_FLOAT64_C( 697.82) }, { SIMDE_FLOAT64_C( 17.44), SIMDE_FLOAT64_C( 697.82) }, { UINT64_C( 0), UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_uint64x2_t r = simde_vcltq_f64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x2_private a_ = simde_float64x2_to_private(simde_test_arm_neon_random_f64x2(-1000.0, 1000.0)); simde_float64x2_private b_ = simde_float64x2_to_private(simde_test_arm_neon_random_f64x2(-1000.0, 1000.0)); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_float64x2_t a = simde_float64x2_from_private(a_); simde_float64x2_t b = simde_float64x2_from_private(b_); simde_uint64x2_t r = simde_vcltq_f64(a, b); simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcltq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t b[16]; uint8_t r[16]; } test_vec[] = { { { INT8_C( 62), INT8_C( 110), INT8_C( 77), -INT8_C( 106), INT8_C( 45), -INT8_C( 95), INT8_C( 68), INT8_C( 99), -INT8_C( 107), -INT8_C( 48), -INT8_C( 91), -INT8_C( 79), INT8_C( 1), -INT8_C( 16), -INT8_C( 83), INT8_C( 10) }, { INT8_C( 94), INT8_C( 12), INT8_C( 68), -INT8_C( 59), -INT8_C( 39), -INT8_C( 89), INT8_C( 68), INT8_C( 99), -INT8_C( 69), -INT8_C( 48), -INT8_C( 33), -INT8_C( 79), -INT8_C( 53), -INT8_C( 16), -INT8_C( 88), INT8_C( 10) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { -INT8_C( 61), -INT8_C( 83), -INT8_C( 102), -INT8_C( 100), INT8_C( 84), -INT8_C( 33), INT8_C( 33), INT8_C( 15), -INT8_C( 81), -INT8_C( 33), -INT8_C( 63), INT8_C( 122), -INT8_C( 30), INT8_C( 105), -INT8_C( 124), INT8_C( 45) }, { INT8_C( 95), INT8_C( 37), -INT8_C( 72), -INT8_C( 11), -INT8_C( 33), -INT8_C( 89), INT8_C( 33), INT8_C( 7), INT8_C( 59), INT8_C( 43), INT8_C( 48), -INT8_C( 79), -INT8_C( 30), -INT8_C( 103), -INT8_C( 122), -INT8_C( 91) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } }, { { -INT8_C( 45), INT8_C( 65), INT8_C( 101), -INT8_C( 78), INT8_C( 126), INT8_C( 16), -INT8_C( 71), -INT8_C( 92), INT8_C( 119), -INT8_C( 23), INT8_C( 106), INT8_C( 89), -INT8_C( 126), -INT8_C( 15), -INT8_C( 1), -INT8_C( 55) }, { INT8_C( 18), INT8_C( 65), INT8_C( 101), INT8_C( 18), -INT8_C( 125), INT8_C( 16), -INT8_C( 63), -INT8_C( 92), INT8_C( 124), -INT8_C( 22), -INT8_C( 107), INT8_C( 81), -INT8_C( 104), -INT8_C( 76), -INT8_C( 122), INT8_C( 107) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { INT8_C( 12), INT8_C( 112), INT8_C( 65), INT8_MAX, INT8_MIN, INT8_C( 2), INT8_C( 51), -INT8_C( 4), -INT8_C( 19), -INT8_C( 66), -INT8_C( 26), -INT8_C( 123), INT8_C( 125), -INT8_C( 44), -INT8_C( 15), INT8_C( 8) }, { -INT8_C( 124), INT8_C( 14), INT8_C( 17), INT8_MAX, -INT8_C( 27), -INT8_C( 45), -INT8_C( 14), -INT8_C( 92), -INT8_C( 1), -INT8_C( 66), -INT8_C( 26), INT8_C( 29), -INT8_C( 119), -INT8_C( 14), INT8_C( 76), -INT8_C( 107) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { INT8_MIN, -INT8_C( 74), INT8_C( 5), -INT8_C( 25), -INT8_C( 119), -INT8_C( 119), -INT8_C( 114), INT8_C( 111), -INT8_C( 73), INT8_C( 114), INT8_C( 12), -INT8_C( 72), INT8_C( 100), -INT8_C( 14), INT8_C( 62), -INT8_C( 57) }, { INT8_MIN, -INT8_C( 9), -INT8_C( 86), INT8_C( 17), INT8_C( 78), -INT8_C( 119), -INT8_C( 114), INT8_C( 111), -INT8_C( 73), -INT8_C( 111), INT8_C( 12), -INT8_C( 72), -INT8_C( 123), -INT8_C( 78), INT8_C( 62), -INT8_C( 120) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { -INT8_C( 9), INT8_C( 66), -INT8_C( 91), INT8_C( 69), -INT8_C( 51), INT8_C( 72), -INT8_C( 76), -INT8_C( 124), INT8_C( 105), -INT8_C( 63), INT8_C( 126), INT8_C( 125), INT8_C( 115), INT8_C( 122), INT8_C( 17), -INT8_C( 37) }, { -INT8_C( 67), INT8_C( 66), -INT8_C( 51), -INT8_C( 9), INT8_C( 61), INT8_C( 72), -INT8_C( 26), -INT8_C( 85), INT8_C( 105), INT8_C( 19), INT8_C( 126), INT8_C( 125), INT8_C( 19), INT8_C( 23), INT8_C( 17), INT8_C( 10) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { INT8_C( 13), -INT8_C( 31), INT8_C( 15), INT8_C( 75), -INT8_C( 124), -INT8_C( 2), -INT8_C( 10), INT8_C( 74), -INT8_C( 93), INT8_C( 116), INT8_C( 16), INT8_C( 37), -INT8_C( 116), INT8_C( 33), INT8_C( 39), INT8_C( 65) }, { -INT8_C( 41), INT8_MIN, INT8_C( 15), -INT8_C( 63), -INT8_C( 124), -INT8_C( 69), INT8_C( 111), INT8_C( 74), -INT8_C( 93), INT8_C( 104), -INT8_C( 126), INT8_C( 6), INT8_C( 52), -INT8_C( 106), INT8_C( 39), INT8_C( 65) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { INT8_C( 68), INT8_C( 121), INT8_C( 0), -INT8_C( 87), INT8_C( 52), INT8_C( 111), -INT8_C( 13), INT8_C( 84), -INT8_C( 94), INT8_C( 70), -INT8_C( 34), INT8_C( 12), INT8_C( 12), INT8_C( 5), INT8_C( 77), -INT8_C( 124) }, { INT8_C( 68), -INT8_C( 38), INT8_C( 38), -INT8_C( 126), INT8_C( 93), INT8_C( 92), -INT8_C( 47), INT8_C( 84), -INT8_C( 94), INT8_C( 70), -INT8_C( 40), INT8_C( 9), -INT8_C( 21), INT8_C( 66), INT8_C( 72), INT8_C( 16) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_uint8x16_t r = simde_vcltq_s8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_private a_ = simde_int8x16_to_private(simde_test_arm_neon_random_i8x16()); simde_int8x16_private b_ = simde_int8x16_to_private(simde_test_arm_neon_random_i8x16()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int8x16_t a = simde_int8x16_from_private(a_); simde_int8x16_t b = simde_int8x16_from_private(b_); simde_uint8x16_t r = simde_vcltq_s8(a, b); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcltq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t b[8]; uint16_t r[8]; } test_vec[] = { { { INT16_C( 7364), -INT16_C( 12784), -INT16_C( 5799), INT16_C( 3417), INT16_C( 4291), INT16_C( 2542), -INT16_C( 15910), INT16_C( 9015) }, { -INT16_C( 27981), INT16_C( 29466), -INT16_C( 32414), INT16_C( 3417), -INT16_C( 30798), INT16_C( 2542), INT16_C( 21087), INT16_C( 9015) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { -INT16_C( 18337), -INT16_C( 8153), INT16_C( 14202), INT16_C( 11729), -INT16_C( 5175), INT16_C( 11169), -INT16_C( 28861), INT16_C( 7736) }, { INT16_C( 9857), -INT16_C( 8153), INT16_C( 24441), -INT16_C( 6141), -INT16_C( 2650), -INT16_C( 10320), -INT16_C( 28861), -INT16_C( 23861) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { -INT16_C( 28521), INT16_C( 898), -INT16_C( 3711), -INT16_C( 13879), INT16_C( 18913), INT16_C( 23275), INT16_C( 31057), INT16_C( 20290) }, { -INT16_C( 3357), INT16_C( 10022), -INT16_C( 3711), -INT16_C( 13879), -INT16_C( 23224), -INT16_C( 10614), INT16_C( 31057), -INT16_C( 5843) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { -INT16_C( 30376), INT16_C( 166), -INT16_C( 5768), INT16_C( 23375), INT16_C( 30171), INT16_C( 23938), INT16_C( 25894), -INT16_C( 20954) }, { -INT16_C( 20239), INT16_C( 17028), -INT16_C( 20182), INT16_C( 13099), INT16_C( 6241), -INT16_C( 13369), INT16_C( 25894), INT16_C( 32279) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, { { INT16_C( 17732), -INT16_C( 21793), INT16_C( 1425), -INT16_C( 32167), -INT16_C( 8778), -INT16_C( 7996), INT16_C( 22778), -INT16_C( 4077) }, { -INT16_C( 9720), INT16_C( 11963), -INT16_C( 11456), INT16_C( 12205), INT16_C( 11409), INT16_C( 14486), INT16_C( 22778), INT16_C( 16058) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { INT16_C( 10356), INT16_C( 28662), -INT16_C( 20351), INT16_C( 7853), INT16_C( 23075), INT16_C( 25499), -INT16_C( 552), -INT16_C( 1985) }, { INT16_C( 10356), INT16_C( 28662), -INT16_C( 20351), INT16_C( 7853), -INT16_C( 27062), -INT16_C( 5555), -INT16_C( 552), -INT16_C( 1985) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { -INT16_C( 8033), INT16_C( 10341), -INT16_C( 10456), -INT16_C( 25206), INT16_MIN, -INT16_C( 32500), -INT16_C( 18128), INT16_C( 31647) }, { -INT16_C( 5040), INT16_C( 10341), -INT16_C( 23318), INT16_C( 23584), -INT16_C( 14567), -INT16_C( 10444), INT16_C( 30732), -INT16_C( 21575) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { INT16_C( 23875), -INT16_C( 20138), -INT16_C( 2333), INT16_C( 12026), -INT16_C( 28190), -INT16_C( 19537), INT16_C( 31798), -INT16_C( 3191) }, { INT16_C( 23875), INT16_C( 20263), -INT16_C( 7979), INT16_C( 12026), INT16_C( 29150), -INT16_C( 19537), -INT16_C( 12942), -INT16_C( 3191) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_uint16x8_t r = simde_vcltq_s16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_private a_ = simde_int16x8_to_private(simde_test_arm_neon_random_i16x8()); simde_int16x8_private b_ = simde_int16x8_to_private(simde_test_arm_neon_random_i16x8()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int16x8_t a = simde_int16x8_from_private(a_); simde_int16x8_t b = simde_int16x8_from_private(b_); simde_uint16x8_t r = simde_vcltq_s16(a, b); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcltq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t b[4]; uint32_t r[4]; } test_vec[] = { { { -INT32_C( 50670728), INT32_C( 1362797516), INT32_C( 1200625608), -INT32_C( 361853940) }, { -INT32_C( 50670728), INT32_C( 1362797516), -INT32_C( 1506030104), INT32_C( 386108489) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, { { INT32_C( 674165600), INT32_C( 393264650), -INT32_C( 1040064950), INT32_C( 2126445489) }, { -INT32_C( 2016413281), INT32_C( 254675654), -INT32_C( 1373228650), -INT32_C( 1479440313) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { INT32_C( 127746237), INT32_C( 1735163167), INT32_C( 944670617), INT32_C( 1203772800) }, { -INT32_C( 1101599193), INT32_C( 1735163167), -INT32_C( 2129773307), -INT32_C( 116924868) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { -INT32_C( 619853502), -INT32_C( 787195055), -INT32_C( 1625762696), -INT32_C( 513970238) }, { -INT32_C( 246887700), INT32_C( 1148344071), INT32_C( 1413314869), INT32_C( 1603354141) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { INT32_C( 1898598393), -INT32_C( 451919325), INT32_C( 116949247), INT32_C( 1066340152) }, { -INT32_C( 1685913242), -INT32_C( 1729117829), INT32_C( 116949247), INT32_C( 392114974) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { INT32_C( 2106235339), INT32_C( 1058689031), INT32_C( 1795631081), INT32_C( 643891883) }, { -INT32_C( 1027714877), -INT32_C( 272058671), INT32_C( 1795631081), INT32_C( 2081460144) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, { { INT32_C( 1248728043), INT32_C( 1745939901), -INT32_C( 1299287569), -INT32_C( 294614781) }, { -INT32_C( 327009277), INT32_C( 341346660), INT32_C( 1099982872), -INT32_C( 294614781) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { -INT32_C( 1144895284), INT32_C( 1332880972), -INT32_C( 1626480228), -INT32_C( 2121494755) }, { INT32_C( 831972376), INT32_C( 1332880972), INT32_C( 1295901360), -INT32_C( 1918404927) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_uint32x4_t r = simde_vcltq_s32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_private a_ = simde_int32x4_to_private(simde_test_arm_neon_random_i32x4()); simde_int32x4_private b_ = simde_int32x4_to_private(simde_test_arm_neon_random_i32x4()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int32x4_t a = simde_int32x4_from_private(a_); simde_int32x4_t b = simde_int32x4_from_private(b_); simde_uint32x4_t r = simde_vcltq_s32(a, b); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcltq_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; int64_t b[2]; uint64_t r[2]; } test_vec[] = { { { -INT64_C( 3211926684988837804), -INT64_C( 8421230730182920338) }, { -INT64_C( 6076564325867935792), -INT64_C( 8398288375644386122) }, { UINT64_C( 0), UINT64_MAX } }, { { INT64_C( 1621145505625566236), INT64_C( 2592856562407949106) }, { INT64_C( 266431008299543418), -INT64_C( 7998385834714928779) }, { UINT64_C( 0), UINT64_C( 0) } }, { { -INT64_C( 6531430259948998375), -INT64_C( 5865573933921008497) }, { -INT64_C( 3202157712809819149), -INT64_C( 5865573933921008497) }, { UINT64_MAX, UINT64_C( 0) } }, { { -INT64_C( 4806800495864986554), INT64_C( 4567700026947357617) }, { INT64_C( 8265831110094800419), INT64_C( 4567700026947357617) }, { UINT64_MAX, UINT64_C( 0) } }, { { INT64_C( 3477571270109817056), INT64_C( 8832634092017089677) }, { INT64_C( 6739159238684913028), -INT64_C( 6977881495849959529) }, { UINT64_MAX, UINT64_C( 0) } }, { { INT64_C( 74737460574692298), INT64_C( 437789709730836818) }, { INT64_C( 2780949744972968593), -INT64_C( 4595858595526191386) }, { UINT64_MAX, UINT64_C( 0) } }, { { -INT64_C( 8807342457093705268), -INT64_C( 8379951819801218578) }, { -INT64_C( 6452993155498851451), -INT64_C( 804951188618319764) }, { UINT64_MAX, UINT64_MAX } }, { { INT64_C( 270920930229985738), -INT64_C( 217590579952747901) }, { INT64_C( 7705030636722512915), -INT64_C( 3114355646575143576) }, { UINT64_MAX, UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_uint64x2_t r = simde_vcltq_s64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_private a_ = simde_int64x2_to_private(simde_test_arm_neon_random_i64x2()); simde_int64x2_private b_ = simde_int64x2_to_private(simde_test_arm_neon_random_i64x2()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_int64x2_t a = simde_int64x2_from_private(a_); simde_int64x2_t b = simde_int64x2_from_private(b_); simde_uint64x2_t r = simde_vcltq_s64(a, b); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcltq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(150), UINT8_C( 53), UINT8_C(140), UINT8_C( 98), UINT8_C(179), UINT8_C(153), UINT8_C( 56), UINT8_C( 98), UINT8_C(170), UINT8_C(230), UINT8_C(115), UINT8_C(204), UINT8_C(248), UINT8_C( 8), UINT8_C(101), UINT8_C( 51) }, { UINT8_C( 67), UINT8_C( 35), UINT8_C(126), UINT8_C(122), UINT8_C(205), UINT8_C(161), UINT8_C(164), UINT8_C( 98), UINT8_C( 20), UINT8_C(230), UINT8_C(115), UINT8_C(204), UINT8_C(231), UINT8_C(118), UINT8_C(137), UINT8_C(126) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C(186), UINT8_C(154), UINT8_C(112), UINT8_C(225), UINT8_C( 59), UINT8_C( 20), UINT8_C(193), UINT8_C( 79), UINT8_C(203), UINT8_C(182), UINT8_C( 48), UINT8_C(226), UINT8_C( 44), UINT8_C(165), UINT8_C(115), UINT8_C(215) }, { UINT8_C(186), UINT8_C( 64), UINT8_C( 53), UINT8_C(104), UINT8_C( 88), UINT8_C( 34), UINT8_C(193), UINT8_C( 72), UINT8_C(203), UINT8_C( 76), UINT8_C( 48), UINT8_C(125), UINT8_C( 60), UINT8_C( 76), UINT8_C(115), UINT8_C( 80) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 37), UINT8_C( 32), UINT8_C( 84), UINT8_C( 28), UINT8_C(134), UINT8_C( 22), UINT8_C( 20), UINT8_C(190), UINT8_C( 98), UINT8_C( 68), UINT8_C( 21), UINT8_C(201), UINT8_C( 22), UINT8_C( 66), UINT8_C(239), UINT8_C(138) }, { UINT8_C( 37), UINT8_C( 32), UINT8_C(153), UINT8_C( 28), UINT8_C(148), UINT8_C( 10), UINT8_C( 14), UINT8_C(190), UINT8_C(151), UINT8_C(226), UINT8_C( 21), UINT8_C(201), UINT8_C( 22), UINT8_C( 68), UINT8_C(181), UINT8_C(138) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 48), UINT8_C( 80), UINT8_C(210), UINT8_C(196), UINT8_C( 90), UINT8_C(224), UINT8_C(130), UINT8_C(241), UINT8_C(195), UINT8_C( 59), UINT8_C(186), UINT8_C(217), UINT8_C(220), UINT8_C(111), UINT8_C(100), UINT8_C(132) }, { UINT8_C(121), UINT8_C(186), UINT8_C(178), UINT8_C(152), UINT8_C( 37), UINT8_C( 49), UINT8_C( 26), UINT8_C(212), UINT8_C(128), UINT8_C( 59), UINT8_C( 20), UINT8_C( 16), UINT8_C( 75), UINT8_C(203), UINT8_C(198), UINT8_C(123) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C( 51), UINT8_C(108), UINT8_C(158), UINT8_C( 88), UINT8_C(157), UINT8_C(190), UINT8_C( 44), UINT8_C( 29), UINT8_C(249), UINT8_C( 64), UINT8_C( 46), UINT8_C(187), UINT8_C( 12), UINT8_C(244), UINT8_C(198), UINT8_C( 1) }, { UINT8_C(140), UINT8_C( 0), UINT8_C(158), UINT8_C( 4), UINT8_C(194), UINT8_C( 6), UINT8_C( 64), UINT8_C( 28), UINT8_C( 41), UINT8_C( 85), UINT8_C( 82), UINT8_C(187), UINT8_C(206), UINT8_C( 12), UINT8_C(198), UINT8_C( 1) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 38), UINT8_C(153), UINT8_C(240), UINT8_C(232), UINT8_C(160), UINT8_C( 35), UINT8_C(252), UINT8_C(201), UINT8_C(122), UINT8_C( 98), UINT8_C(132), UINT8_C( 72), UINT8_C( 98), UINT8_C( 74), UINT8_C( 74), UINT8_C(175) }, { UINT8_C(179), UINT8_C(164), UINT8_C(240), UINT8_C(219), UINT8_C( 42), UINT8_C( 35), UINT8_C(252), UINT8_C(241), UINT8_C(132), UINT8_C( 98), UINT8_C(196), UINT8_C(217), UINT8_C(136), UINT8_C(191), UINT8_C(186), UINT8_C(175) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C(118), UINT8_C( 75), UINT8_C( 30), UINT8_C(190), UINT8_C( 66), UINT8_C( 26), UINT8_C(146), UINT8_C(198), UINT8_C(124), UINT8_C( 86), UINT8_C(159), UINT8_C( 0), UINT8_C( 87), UINT8_C( 94), UINT8_C( 67), UINT8_C(111) }, { UINT8_C(249), UINT8_C( 75), UINT8_C(104), UINT8_C(190), UINT8_C(231), UINT8_C( 42), UINT8_C(253), UINT8_C(218), UINT8_C(112), UINT8_C(133), UINT8_C( 47), UINT8_C( 0), UINT8_C( 87), UINT8_C( 94), UINT8_C( 67), UINT8_C(205) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { UINT8_C(253), UINT8_C( 67), UINT8_C(139), UINT8_C(229), UINT8_C(109), UINT8_C( 52), UINT8_C(191), UINT8_C(163), UINT8_C( 56), UINT8_C(238), UINT8_C(221), UINT8_C( 17), UINT8_C( 76), UINT8_C( 33), UINT8_C(223), UINT8_C(166) }, { UINT8_C(131), UINT8_C( 77), UINT8_C(139), UINT8_MAX, UINT8_C( 77), UINT8_C( 18), UINT8_C(248), UINT8_C(163), UINT8_C( 56), UINT8_C(246), UINT8_C( 16), UINT8_C(184), UINT8_C(168), UINT8_C(235), UINT8_C( 49), UINT8_C(166) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vcltq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_private a_ = simde_uint8x16_to_private(simde_test_arm_neon_random_u8x16()); simde_uint8x16_private b_ = simde_uint8x16_to_private(simde_test_arm_neon_random_u8x16()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint8x16_t a = simde_uint8x16_from_private(a_); simde_uint8x16_t b = simde_uint8x16_from_private(b_); simde_uint8x16_t r = simde_vcltq_u8(a, b); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcltq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(12035), UINT16_C( 296), UINT16_C( 6322), UINT16_C(36251), UINT16_C(38592), UINT16_C( 2034), UINT16_C(31190), UINT16_C(45710) }, { UINT16_C(53605), UINT16_C( 296), UINT16_C(50084), UINT16_C(36251), UINT16_C(43731), UINT16_C(27102), UINT16_C( 2753), UINT16_C(50261) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { UINT16_C(25811), UINT16_C(38065), UINT16_C( 1647), UINT16_C(43353), UINT16_C(45652), UINT16_C(18837), UINT16_C(10613), UINT16_C(18566) }, { UINT16_C(25811), UINT16_C(38065), UINT16_C( 1647), UINT16_C(43353), UINT16_C(14620), UINT16_C(18837), UINT16_C(20900), UINT16_C(42295) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { UINT16_C(16824), UINT16_C(11772), UINT16_C(51684), UINT16_C(58541), UINT16_C( 9958), UINT16_C(41739), UINT16_C(48688), UINT16_C(18942) }, { UINT16_C(37732), UINT16_C( 2194), UINT16_C(51684), UINT16_C(58541), UINT16_C(10741), UINT16_C(41739), UINT16_C(48688), UINT16_C(59558) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, { { UINT16_C(31154), UINT16_C(57184), UINT16_C(24484), UINT16_C( 2088), UINT16_C(47858), UINT16_C(55056), UINT16_C(48516), UINT16_C(31163) }, { UINT16_C(50918), UINT16_C( 5660), UINT16_C(50052), UINT16_C(34047), UINT16_C( 5477), UINT16_C(35565), UINT16_C(38048), UINT16_C(21141) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(51476), UINT16_C(14560), UINT16_C(55686), UINT16_C(14218), UINT16_C(48234), UINT16_C(49128), UINT16_C(33520), UINT16_C(22183) }, { UINT16_C(38295), UINT16_C(14560), UINT16_C(29993), UINT16_C(14218), UINT16_C(48234), UINT16_C(49128), UINT16_C(41493), UINT16_C(10502) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { UINT16_C(57856), UINT16_C(61631), UINT16_C(52199), UINT16_C(21178), UINT16_C( 9980), UINT16_C( 9524), UINT16_C(41806), UINT16_C( 1628) }, { UINT16_C(17787), UINT16_C(37061), UINT16_C(52199), UINT16_C(21178), UINT16_C(31727), UINT16_C(60484), UINT16_C(41806), UINT16_C(20104) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { UINT16_C(61212), UINT16_C(27320), UINT16_C(11483), UINT16_C(22093), UINT16_C(63111), UINT16_C(22758), UINT16_C(41181), UINT16_C(10208) }, { UINT16_C(61212), UINT16_C(27320), UINT16_C(16531), UINT16_C( 6584), UINT16_C(63111), UINT16_C(13828), UINT16_C(60283), UINT16_C(10208) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { UINT16_C( 8141), UINT16_C(28459), UINT16_C( 3293), UINT16_C(56182), UINT16_C(11841), UINT16_C(39367), UINT16_C(65471), UINT16_C(36072) }, { UINT16_C(61940), UINT16_C(28459), UINT16_C( 3293), UINT16_C(58518), UINT16_C( 1212), UINT16_C(39367), UINT16_C(65471), UINT16_C(36072) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vcltq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_private a_ = simde_uint16x8_to_private(simde_test_arm_neon_random_u16x8()); simde_uint16x8_private b_ = simde_uint16x8_to_private(simde_test_arm_neon_random_u16x8()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint16x8_t a = simde_uint16x8_from_private(a_); simde_uint16x8_t b = simde_uint16x8_from_private(b_); simde_uint16x8_t r = simde_vcltq_u16(a, b); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcltq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(3571304076), UINT32_C(2472274095), UINT32_C(3889123896), UINT32_C(2667893652) }, { UINT32_C( 599103569), UINT32_C(3098582936), UINT32_C(2104870284), UINT32_C(2098284017) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C( 240037334), UINT32_C(2482380287), UINT32_C(1110637297), UINT32_C(1717869711) }, { UINT32_C( 333584006), UINT32_C(3432011739), UINT32_C(2840240541), UINT32_C(1717869711) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C(3252382416), UINT32_C(4144172389), UINT32_C(2065852917), UINT32_C(1519256703) }, { UINT32_C(4180090460), UINT32_C(1336046015), UINT32_C(1454704141), UINT32_C(2292066744) }, { UINT32_MAX, UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, { { UINT32_C(2079346054), UINT32_C( 905384630), UINT32_C(1938785559), UINT32_C(1668069028) }, { UINT32_C( 884084263), UINT32_C(3431622420), UINT32_C( 626337937), UINT32_C( 673095329) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C( 953588257), UINT32_C(3299567136), UINT32_C(3841267193), UINT32_C(1166448676) }, { UINT32_C(3523543873), UINT32_C(3455539755), UINT32_C(3841267193), UINT32_C(1166448676) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(4137100541), UINT32_C(1574603833), UINT32_C(1520278296), UINT32_C(2141855677) }, { UINT32_C(4137100541), UINT32_C(1574603833), UINT32_C(1470259236), UINT32_C(2141855677) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C( 418021632), UINT32_C( 354745432), UINT32_C( 663068458), UINT32_C(2431316880) }, { UINT32_C(3445290920), UINT32_C( 354745432), UINT32_C(1670696732), UINT32_C(2431316880) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C(2253200220), UINT32_C(2108543674), UINT32_C(2541406959), UINT32_C( 426038465) }, { UINT32_C(3962472912), UINT32_C(3897607000), UINT32_C( 712587958), UINT32_C(1790714382) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vcltq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_private a_ = simde_uint32x4_to_private(simde_test_arm_neon_random_u32x4()); simde_uint32x4_private b_ = simde_uint32x4_to_private(simde_test_arm_neon_random_u32x4()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint32x4_t a = simde_uint32x4_from_private(a_); simde_uint32x4_t b = simde_uint32x4_from_private(b_); simde_uint32x4_t r = simde_vcltq_u32(a, b); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcltq_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[2]; uint64_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C( 5951566415767268674), UINT64_C(14751997874589376704) }, { UINT64_C( 3314177480145134518), UINT64_C( 7678354966161073507) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C(10733899104166349758), UINT64_C( 4544198551152891648) }, { UINT64_C( 5903945259160945099), UINT64_C( 4544198551152891648) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C(14335256087741015275), UINT64_C( 5077609231872586398) }, { UINT64_C(14335256087741015275), UINT64_C( 5077609231872586398) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C(16884082501088323537), UINT64_C(18183726781811778603) }, { UINT64_C( 6571733001915241168), UINT64_C(18183726781811778603) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C( 4917418667771907895), UINT64_C( 4857833735103647733) }, { UINT64_C( 3451256541622579149), UINT64_C( 4857833735103647733) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C( 9585677928860157643), UINT64_C(12138993750656537711) }, { UINT64_C(17533621864373092446), UINT64_C( 7817473392221707838) }, { UINT64_MAX, UINT64_C( 0) } }, { { UINT64_C( 7806607306692823205), UINT64_C(15729646765192443980) }, { UINT64_C( 7806607306692823205), UINT64_C(15729646765192443980) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C( 4142313535404413272), UINT64_C(15000715663249176140) }, { UINT64_C( 4577556720329631799), UINT64_C( 2146717038535436937) }, { UINT64_MAX, UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_uint64x2_t r = simde_vcltq_u64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_private a_ = simde_uint64x2_to_private(simde_test_arm_neon_random_u64x2()); simde_uint64x2_private b_ = simde_uint64x2_to_private(simde_test_arm_neon_random_u64x2()); /* Make some equal which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = b_.values[j]; } simde_uint64x2_t a = simde_uint64x2_from_private(a_); simde_uint64x2_t b = simde_uint64x2_from_private(b_); simde_uint64x2_t r = simde_vcltq_u64(a, b); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vclt_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vclt_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vclt_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vclt_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vclt_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vclt_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vclt_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vclt_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vclt_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vclt_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vcltq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcltq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcltq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vcltq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vcltq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vcltq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vcltq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vcltq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vcltq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vcltq_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/cltz.c000066400000000000000000000576331400333146700164550ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN cltz #include "test-neon.h" #include "../../../simde/arm/neon/cltz.h" #include "../../../simde/arm/neon/reinterpret.h" static int test_simde_vcltz_f32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { simde_float32 a[2]; uint32_t r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( -600.28), SIMDE_FLOAT32_C( -155.01) }, { UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 941.01), SIMDE_FLOAT32_C( 717.45) }, { UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 22.41), SIMDE_FLOAT32_C( -542.54) }, { UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( -400.37), SIMDE_FLOAT32_C( -711.08) }, { UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( 591.87), SIMDE_FLOAT32_C( -579.22) }, { UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( -322.37), SIMDE_FLOAT32_C( 93.44) }, { UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -455.16), SIMDE_FLOAT32_C( 540.07) }, { UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 983.37), SIMDE_FLOAT32_C( 113.16) }, { UINT32_C( 0), UINT32_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_uint32x2_t r = simde_vcltz_f32(a); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; } static int test_simde_vcltz_f64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { simde_float64 a[1]; uint64_t r[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( -383.68) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( -437.89) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( 762.24) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -529.26) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( 863.53) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -770.66) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( -387.78) }, { UINT64_MAX } }, { { SIMDE_FLOAT64_C( 354.63) }, { UINT64_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_uint64x1_t r = simde_vcltz_f64(a); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; } static int test_simde_vcltz_s8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { int8_t a[8]; uint8_t r[8]; } test_vec[] = { { { INT8_C( 123), INT8_C( 25), INT8_C( 43), -INT8_C( 6), INT8_C( 91), INT8_C( 15), -INT8_C( 4), -INT8_C( 78) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { INT8_C( 9), -INT8_C( 67), INT8_C( 14), INT8_C( 50), INT8_MAX, -INT8_C( 98), INT8_C( 122), INT8_C( 87) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { -INT8_C( 59), -INT8_C( 40), INT8_C( 23), -INT8_C( 123), INT8_C( 0), INT8_C( 81), -INT8_C( 111), -INT8_C( 11) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { INT8_C( 78), -INT8_C( 120), -INT8_C( 45), INT8_C( 104), INT8_C( 116), -INT8_C( 31), -INT8_C( 94), -INT8_C( 17) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { -INT8_C( 6), -INT8_C( 51), -INT8_C( 23), INT8_C( 86), -INT8_C( 36), -INT8_C( 27), INT8_C( 8), -INT8_C( 27) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, { { -INT8_C( 94), INT8_C( 22), INT8_C( 24), INT8_C( 33), -INT8_C( 76), -INT8_C( 110), INT8_C( 120), INT8_C( 121) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { INT8_C( 106), -INT8_C( 113), -INT8_C( 2), INT8_C( 106), -INT8_C( 32), -INT8_C( 112), INT8_C( 95), INT8_C( 47) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { INT8_C( 24), INT8_C( 51), -INT8_C( 105), -INT8_C( 116), INT8_C( 20), INT8_C( 57), INT8_C( 123), INT8_C( 14) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_uint8x8_t r = simde_vcltz_s8(a); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; } static int test_simde_vcltz_s16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { int16_t a[4]; uint16_t r[4]; } test_vec[] = { { { -INT16_C( 3899), -INT16_C( 2710), -INT16_C( 12036), INT16_C( 14864) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { INT16_C( 15179), -INT16_C( 3300), -INT16_C( 24965), -INT16_C( 23761) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { INT16_C( 27808), -INT16_C( 10552), -INT16_C( 3829), -INT16_C( 15215) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { -INT16_C( 18422), -INT16_C( 27002), INT16_C( 16021), INT16_C( 23504) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { INT16_C( 14894), INT16_C( 10832), INT16_C( 24586), INT16_C( 21860) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { -INT16_C( 32613), INT16_C( 5704), INT16_C( 30494), -INT16_C( 16455) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, { { -INT16_C( 32285), -INT16_C( 4203), INT16_C( 9843), INT16_C( 32179) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { INT16_C( 14814), INT16_C( 29715), -INT16_C( 7305), -INT16_C( 23089) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_uint16x4_t r = simde_vcltz_s16(a); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; } static int test_simde_vcltz_s32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { int32_t a[2]; uint32_t r[2]; } test_vec[] = { { { INT32_C( 887751135), -INT32_C( 1880768565) }, { UINT32_C( 0), UINT32_MAX } }, { { INT32_C( 1143275059), -INT32_C( 106841879) }, { UINT32_C( 0), UINT32_MAX } }, { { -INT32_C( 1429312649), INT32_C( 192520880) }, { UINT32_MAX, UINT32_C( 0) } }, { { -INT32_C( 1228640489), -INT32_C( 1630219841) }, { UINT32_MAX, UINT32_MAX } }, { { -INT32_C( 1630290221), -INT32_C( 1322403714) }, { UINT32_MAX, UINT32_MAX } }, { { -INT32_C( 1594535498), -INT32_C( 2120641014) }, { UINT32_MAX, UINT32_MAX } }, { { -INT32_C( 1322555391), INT32_C( 566076426) }, { UINT32_MAX, UINT32_C( 0) } }, { { -INT32_C( 757628653), INT32_C( 628206418) }, { UINT32_MAX, UINT32_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_uint32x2_t r = simde_vcltz_s32(a); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; } static int test_simde_vcltz_s64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { int64_t a[1]; uint64_t r[1]; } test_vec[] = { { { -INT64_C( 8326563804285590750) }, { UINT64_MAX } }, { { -INT64_C( 3208424567556038596) }, { UINT64_MAX } }, { { INT64_C( 2329024188526806682) }, { UINT64_C( 0) } }, { { -INT64_C( 8809616594128501407) }, { UINT64_MAX } }, { { INT64_C( 106393993243962263) }, { UINT64_C( 0) } }, { { -INT64_C( 6536608672518002276) }, { UINT64_MAX } }, { { INT64_C( 8384819676651960083) }, { UINT64_C( 0) } }, { { INT64_C( 9028095364871221859) }, { UINT64_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_uint64x1_t r = simde_vcltz_s64(a); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; } static int test_simde_vcltzq_f32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { simde_float32 a[4]; uint32_t r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -442.45), SIMDE_FLOAT32_C( 552.37), SIMDE_FLOAT32_C( -139.46), SIMDE_FLOAT32_C( 880.73) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -535.61), SIMDE_FLOAT32_C( -314.97), SIMDE_FLOAT32_C( -324.37), SIMDE_FLOAT32_C( -863.63) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { SIMDE_FLOAT32_C( -142.67), SIMDE_FLOAT32_C( -456.23), SIMDE_FLOAT32_C( 330.18), SIMDE_FLOAT32_C( -127.74) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( 725.38), SIMDE_FLOAT32_C( -382.66), SIMDE_FLOAT32_C( 959.27), SIMDE_FLOAT32_C( -336.93) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( 249.07), SIMDE_FLOAT32_C( 860.93), SIMDE_FLOAT32_C( 46.17), SIMDE_FLOAT32_C( 812.32) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -849.19), SIMDE_FLOAT32_C( -336.50), SIMDE_FLOAT32_C( 322.45), SIMDE_FLOAT32_C( -511.47) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( -105.46), SIMDE_FLOAT32_C( 857.83), SIMDE_FLOAT32_C( 267.60), SIMDE_FLOAT32_C( -602.28) }, { UINT32_MAX, UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, { { SIMDE_FLOAT32_C( 768.23), SIMDE_FLOAT32_C( -752.53), SIMDE_FLOAT32_C( -518.24), SIMDE_FLOAT32_C( -674.22) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_uint32x4_t r = simde_vcltzq_f32(a); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_vcltzq_f64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { simde_float64 a[2]; uint64_t r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -80.27), SIMDE_FLOAT64_C( 326.82) }, { UINT64_MAX, UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -489.46), SIMDE_FLOAT64_C( -457.68) }, { UINT64_MAX, UINT64_MAX } }, { { SIMDE_FLOAT64_C( -441.47), SIMDE_FLOAT64_C( 2.14) }, { UINT64_MAX, UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -692.88), SIMDE_FLOAT64_C( 270.65) }, { UINT64_MAX, UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 806.25), SIMDE_FLOAT64_C( 705.39) }, { UINT64_C( 0), UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -398.22), SIMDE_FLOAT64_C( -504.15) }, { UINT64_MAX, UINT64_MAX } }, { { SIMDE_FLOAT64_C( 952.43), SIMDE_FLOAT64_C( -573.18) }, { UINT64_C( 0), UINT64_MAX } }, { { SIMDE_FLOAT64_C( -443.24), SIMDE_FLOAT64_C( 861.65) }, { UINT64_MAX, UINT64_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_uint64x2_t r = simde_vcltzq_f64(a); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; } static int test_simde_vcltzq_s8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { int8_t a[16]; uint8_t r[16]; } test_vec[] = { { { INT8_C( 120), INT8_C( 63), INT8_C( 43), -INT8_C( 2), -INT8_C( 79), INT8_C( 47), INT8_C( 121), INT8_C( 7), INT8_C( 59), INT8_MAX, -INT8_C( 81), -INT8_C( 20), -INT8_C( 61), -INT8_C( 30), INT8_C( 90), INT8_C( 60) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { -INT8_C( 87), INT8_C( 32), INT8_C( 104), -INT8_C( 78), INT8_C( 22), INT8_C( 85), -INT8_C( 93), -INT8_C( 97), INT8_C( 26), -INT8_C( 7), -INT8_C( 98), INT8_C( 56), -INT8_C( 40), INT8_C( 11), INT8_C( 103), INT8_C( 81) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { INT8_C( 74), -INT8_C( 110), INT8_C( 79), -INT8_C( 5), -INT8_C( 63), -INT8_C( 55), INT8_C( 2), -INT8_C( 4), INT8_C( 72), -INT8_C( 79), -INT8_C( 23), INT8_C( 11), -INT8_C( 109), INT8_C( 67), INT8_C( 72), INT8_C( 60) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { INT8_C( 99), -INT8_C( 80), -INT8_C( 18), INT8_C( 121), INT8_C( 5), -INT8_C( 111), INT8_C( 25), INT8_C( 31), -INT8_C( 118), -INT8_C( 73), INT8_C( 87), INT8_C( 99), -INT8_C( 61), -INT8_C( 65), -INT8_C( 76), INT8_C( 13) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { INT8_C( 81), INT8_C( 3), INT8_C( 9), INT8_C( 19), -INT8_C( 52), INT8_C( 11), INT8_C( 15), INT8_C( 21), -INT8_C( 67), -INT8_C( 8), INT8_C( 32), INT8_C( 80), INT8_C( 60), INT8_C( 104), -INT8_C( 115), -INT8_C( 97) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { INT8_C( 24), INT8_C( 123), INT8_C( 25), INT8_C( 29), INT8_C( 13), INT8_C( 50), INT8_C( 61), -INT8_C( 105), -INT8_C( 23), -INT8_C( 108), -INT8_C( 6), -INT8_C( 84), INT8_C( 83), -INT8_C( 82), -INT8_C( 70), -INT8_C( 91) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { -INT8_C( 78), -INT8_C( 61), -INT8_C( 72), INT8_C( 126), -INT8_C( 50), -INT8_C( 57), -INT8_C( 109), -INT8_C( 117), -INT8_C( 64), -INT8_C( 76), -INT8_C( 36), -INT8_C( 4), INT8_C( 28), INT8_C( 105), -INT8_C( 101), INT8_C( 53) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } }, { { -INT8_C( 28), -INT8_C( 76), INT8_C( 82), -INT8_C( 15), -INT8_C( 26), -INT8_C( 113), -INT8_C( 119), -INT8_C( 48), INT8_C( 36), -INT8_C( 125), INT8_C( 124), INT8_C( 119), INT8_C( 50), INT8_C( 54), INT8_C( 28), -INT8_C( 28) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_uint8x16_t r = simde_vcltzq_s8(a); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; } static int test_simde_vcltzq_s16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { int16_t a[8]; uint16_t r[8]; } test_vec[] = { { { -INT16_C( 30173), INT16_C( 19444), -INT16_C( 28978), -INT16_C( 15017), INT16_C( 17496), INT16_C( 7636), -INT16_C( 12918), INT16_C( 3110) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { -INT16_C( 17031), INT16_C( 23923), -INT16_C( 15294), INT16_C( 828), INT16_C( 7152), -INT16_C( 19786), INT16_C( 22798), INT16_C( 12797) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { -INT16_C( 3613), -INT16_C( 20099), -INT16_C( 11137), -INT16_C( 10378), INT16_C( 19224), -INT16_C( 23820), INT16_C( 6936), -INT16_C( 27986) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { INT16_C( 8920), INT16_C( 6895), INT16_C( 11238), -INT16_C( 10723), -INT16_C( 11450), INT16_C( 21896), -INT16_C( 31188), INT16_C( 3974) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { INT16_C( 887), -INT16_C( 2111), INT16_C( 14295), -INT16_C( 4146), -INT16_C( 15486), -INT16_C( 25711), INT16_C( 16606), -INT16_C( 18899) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { INT16_C( 7266), INT16_C( 18640), -INT16_C( 4792), -INT16_C( 29153), -INT16_C( 22591), -INT16_C( 4637), INT16_C( 27181), -INT16_C( 23043) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { -INT16_C( 16787), INT16_C( 17820), INT16_C( 27381), INT16_C( 30772), -INT16_C( 14803), INT16_C( 2835), INT16_C( 16390), INT16_C( 26817) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { -INT16_C( 28324), -INT16_C( 23376), -INT16_C( 12417), INT16_C( 16435), INT16_C( 5751), -INT16_C( 23507), INT16_C( 10880), -INT16_C( 4535) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_uint16x8_t r = simde_vcltzq_s16(a); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_vcltzq_s32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { int32_t a[4]; uint32_t r[4]; } test_vec[] = { { { INT32_C( 104066264), INT32_C( 1548859983), -INT32_C( 983940808), INT32_C( 1490571068) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { -INT32_C( 222302292), -INT32_C( 1438018326), INT32_C( 756148753), -INT32_C( 1515649587) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { INT32_C( 1085070577), -INT32_C( 811729513), INT32_C( 2073425726), -INT32_C( 221024954) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { INT32_C( 1189450332), INT32_C( 955264550), -INT32_C( 446365160), -INT32_C( 108392953) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_MAX } }, { { -INT32_C( 2126956822), INT32_C( 1917965875), INT32_C( 351135437), -INT32_C( 1358512045) }, { UINT32_MAX, UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, { { INT32_C( 2046159698), INT32_C( 833742105), -INT32_C( 283765017), INT32_C( 250126628) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { INT32_C( 193995223), -INT32_C( 981605896), INT32_C( 450456263), INT32_C( 2093604906) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_C( 0) } }, { { -INT32_C( 436879669), -INT32_C( 1961449821), -INT32_C( 528863812), -INT32_C( 1511038258) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_uint32x4_t r = simde_vcltzq_s32(a); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_vcltzq_s64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { int64_t a[2]; uint64_t r[2]; } test_vec[] = { { { -INT64_C( 4367700174058313457), -INT64_C( 7730538683494878666) }, { UINT64_MAX, UINT64_MAX } }, { { -INT64_C( 4741576537993497024), -INT64_C( 8245872675751211559) }, { UINT64_MAX, UINT64_MAX } }, { { INT64_C( 7856281297696352367), -INT64_C( 4329458411663726768) }, { UINT64_C( 0), UINT64_MAX } }, { { -INT64_C( 2313814442133127710), -INT64_C( 2844345970244083077) }, { UINT64_MAX, UINT64_MAX } }, { { -INT64_C( 5627489208313822654), INT64_C( 7589113511113448203) }, { UINT64_MAX, UINT64_C( 0) } }, { { -INT64_C( 552410599904063889), -INT64_C( 4948198726902374637) }, { UINT64_MAX, UINT64_MAX } }, { { -INT64_C( 6932135403521281863), INT64_C( 3956552763142165751) }, { UINT64_MAX, UINT64_C( 0) } }, { { -INT64_C( 378018798689814963), -INT64_C( 2029775254167571808) }, { UINT64_MAX, UINT64_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_uint64x2_t r = simde_vcltzq_s64(a); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vcltz_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcltz_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcltz_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vcltz_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vcltz_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vcltz_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vcltzq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcltzq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcltzq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vcltzq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vcltzq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vcltzq_s64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/clz.c000066400000000000000000001275331400333146700162660ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN clz #include "test-neon.h" #include "../../../simde/arm/neon/clz.h" #include "../../../simde/arm/neon/and.h" #include "../../../simde/arm/neon/dup_n.h" #include "../../../simde/arm/neon/neg.h" #include "../../../simde/arm/neon/set_lane.h" #include "../../../simde/arm/neon/shl.h" static int test_simde_x_vclzb_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a; int8_t r; } test_vec[] = { { INT8_C( 0), INT8_C( 8) }, { -INT8_C( 1), INT8_C( 0) }, { INT8_C( 44), INT8_C( 2) }, { INT8_C( 43), INT8_C( 2) }, { -INT8_C( 35), INT8_C( 0) }, { INT8_C( 27), INT8_C( 3) }, { INT8_C( 48), INT8_C( 2) }, { INT8_C( 79), INT8_C( 1) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int8_t r = simde_x_vclzb_s8(test_vec[i].a); simde_assert_equal_i8(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int8_t a; switch (i) { case 0: a = INT8_C(0); break; case 1: a = INT8_MIN; break; default: a = simde_test_codegen_random_i8(); break; } int8_t r = simde_x_vclzb_s8(a); simde_test_codegen_write_i8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_x_vclzh_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a; int16_t r; } test_vec[] = { { INT16_C( 0), INT16_C( 16) }, { INT16_MIN, INT16_C( 0) }, { INT16_C( 26700), INT16_C( 1) }, { -INT16_C( 21932), INT16_C( 0) }, { -INT16_C( 30991), INT16_C( 0) }, { -INT16_C( 21858), INT16_C( 0) }, { -INT16_C( 15362), INT16_C( 0) }, { INT16_C( 14920), INT16_C( 2) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int16_t r = simde_x_vclzh_s16(test_vec[i].a); simde_assert_equal_i16(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int16_t a; switch (i) { case 0: a = INT16_C(0); break; case 1: a = INT16_MIN; break; default: a = simde_test_codegen_random_i16(); break; } int16_t r = simde_x_vclzh_s16(a); simde_test_codegen_write_i16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_x_vclzs_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a; int32_t r; } test_vec[] = { { INT32_C( 0), INT32_C( 32) }, { INT32_MIN, INT32_C( 0) }, { INT32_C( 1347396202), INT32_C( 1) }, { -INT32_C( 239946416), INT32_C( 0) }, { -INT32_C( 1214188073), INT32_C( 0) }, { INT32_C( 1550140061), INT32_C( 1) }, { INT32_C( 1239879933), INT32_C( 1) }, { INT32_C( 1794325369), INT32_C( 1) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int32_t r = simde_x_vclzs_s32(test_vec[i].a); simde_assert_equal_i32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int32_t a; switch (i) { case 0: a = INT32_C(0); break; case 1: a = INT32_MIN; break; default: a = simde_test_codegen_random_i32(); break; } int32_t r = simde_x_vclzs_s32(a); simde_test_codegen_write_i32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_x_vclzb_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a; uint8_t r; } test_vec[] = { { UINT8_C( 0), UINT8_C( 8) }, { UINT8_MAX, UINT8_C( 0) }, { UINT8_C(243), UINT8_C( 0) }, { UINT8_C(128), UINT8_C( 0) }, { UINT8_C(240), UINT8_C( 0) }, { UINT8_C( 44), UINT8_C( 2) }, { UINT8_C(119), UINT8_C( 1) }, { UINT8_C(181), UINT8_C( 0) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint8_t r = simde_x_vclzb_u8(test_vec[i].a); simde_assert_equal_u8(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint8_t a; switch (i) { case 0: a = UINT8_C(0); break; case 1: a = UINT8_MAX; break; default: a = simde_test_codegen_random_u8(); break; } uint8_t r = simde_x_vclzb_u8(a); simde_test_codegen_write_u8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_x_vclzh_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a; uint16_t r; } test_vec[] = { { UINT16_C( 0), UINT16_C( 16) }, { UINT16_MAX, UINT16_C( 0) }, { UINT16_C(47597), UINT16_C( 0) }, { UINT16_C(13283), UINT16_C( 2) }, { UINT16_C(30782), UINT16_C( 1) }, { UINT16_C(47971), UINT16_C( 0) }, { UINT16_C(10652), UINT16_C( 2) }, { UINT16_C(15587), UINT16_C( 2) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint16_t r = simde_x_vclzh_u16(test_vec[i].a); simde_assert_equal_u16(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint16_t a; switch (i) { case 0: a = UINT16_C(0); break; case 1: a = UINT16_MAX; break; default: a = simde_test_codegen_random_u16(); break; } uint16_t r = simde_x_vclzh_u16(a); simde_test_codegen_write_u16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_x_vclzs_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a; uint32_t r; } test_vec[] = { { UINT32_C( 0), UINT32_C( 32) }, { UINT32_MAX, UINT32_C( 0) }, { UINT32_C( 216644308), UINT32_C( 4) }, { UINT32_C(3628548586), UINT32_C( 0) }, { UINT32_C(2274079924), UINT32_C( 0) }, { UINT32_C( 805797440), UINT32_C( 2) }, { UINT32_C(2565242795), UINT32_C( 0) }, { UINT32_C(2009844024), UINT32_C( 1) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint32_t r = simde_x_vclzs_u32(test_vec[i].a); simde_assert_equal_u32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint32_t a; switch (i) { case 0: a = UINT32_C(0); break; case 1: a = UINT32_MAX; break; default: a = simde_test_codegen_random_u32(); break; } uint32_t r = simde_x_vclzs_u32(a); simde_test_codegen_write_u32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclz_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t r[8]; } test_vec[] = { { { INT8_C( 0), -INT8_C( 1), INT8_C( 26), -INT8_C( 5), -INT8_C( 62), INT8_C( 9), -INT8_C( 9), INT8_C( 26) }, { INT8_C( 8), INT8_C( 0), INT8_C( 3), INT8_C( 0), INT8_C( 0), INT8_C( 4), INT8_C( 0), INT8_C( 3) } }, { { -INT8_C( 87), INT8_C( 0), INT8_C( 2), -INT8_C( 4), -INT8_C( 15), -INT8_C( 3), -INT8_C( 2), INT8_C( 4) }, { INT8_C( 0), INT8_C( 8), INT8_C( 6), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 5) } }, { { INT8_C( 49), -INT8_C( 8), -INT8_C( 26), -INT8_C( 7), -INT8_C( 2), -INT8_C( 13), INT8_C( 5), INT8_C( 1) }, { INT8_C( 2), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 5), INT8_C( 7) } }, { { -INT8_C( 30), -INT8_C( 1), -INT8_C( 25), INT8_C( 30), -INT8_C( 5), INT8_C( 84), -INT8_C( 4), -INT8_C( 1) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 3), INT8_C( 0), INT8_C( 1), INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 36), -INT8_C( 117), -INT8_C( 3), INT8_C( 1), INT8_C( 5), INT8_C( 3), INT8_C( 44), INT8_C( 15) }, { INT8_C( 2), INT8_C( 0), INT8_C( 0), INT8_C( 7), INT8_C( 5), INT8_C( 6), INT8_C( 2), INT8_C( 4) } }, { { INT8_C( 0), INT8_C( 0), -INT8_C( 115), -INT8_C( 2), -INT8_C( 2), INT8_C( 1), -INT8_C( 18), INT8_C( 0) }, { INT8_C( 8), INT8_C( 8), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 7), INT8_C( 0), INT8_C( 8) } }, { { -INT8_C( 12), -INT8_C( 6), -INT8_C( 2), -INT8_C( 1), -INT8_C( 2), -INT8_C( 1), INT8_C( 58), INT8_C( 0) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 2), INT8_C( 8) } }, { { -INT8_C( 2), INT8_C( 0), INT8_C( 117), INT8_C( 0), INT8_C( 2), INT8_C( 12), INT8_C( 1), INT8_C( 0) }, { INT8_C( 0), INT8_C( 8), INT8_C( 1), INT8_C( 8), INT8_C( 6), INT8_C( 4), INT8_C( 7), INT8_C( 8) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t r = simde_vclz_s8(a); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); a = simde_vshl_s8(a, simde_vneg_s8(simde_vand_s8(simde_test_arm_neon_random_i8x8(), simde_vdup_n_s8(7)))); if (i == 0) { a = simde_vset_lane_s8( INT8_C(0), a, 0); a = simde_vset_lane_s8(-INT8_C(1), a, 1); } simde_int8x8_t r = simde_vclz_s8(a); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclz_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t r[4]; } test_vec[] = { { { INT16_C( 0), -INT16_C( 1), INT16_C( 9072), -INT16_C( 590) }, { INT16_C( 16), INT16_C( 0), INT16_C( 2), INT16_C( 0) } }, { { INT16_C( 26912), INT16_C( 3), -INT16_C( 93), INT16_C( 15) }, { INT16_C( 1), INT16_C( 14), INT16_C( 0), INT16_C( 12) } }, { { INT16_C( 15), -INT16_C( 14146), -INT16_C( 191), -INT16_C( 4389) }, { INT16_C( 12), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 15217), INT16_C( 5), -INT16_C( 9323), INT16_C( 2408) }, { INT16_C( 0), INT16_C( 13), INT16_C( 0), INT16_C( 4) } }, { { -INT16_C( 7), -INT16_C( 224), INT16_C( 505), -INT16_C( 5) }, { INT16_C( 0), INT16_C( 0), INT16_C( 7), INT16_C( 0) } }, { { INT16_C( 1), -INT16_C( 254), INT16_C( 158), -INT16_C( 1) }, { INT16_C( 15), INT16_C( 0), INT16_C( 8), INT16_C( 0) } }, { { -INT16_C( 1), INT16_C( 2), -INT16_C( 2), -INT16_C( 59) }, { INT16_C( 0), INT16_C( 14), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 11275), INT16_C( 397), -INT16_C( 1009), INT16_C( 2750) }, { INT16_C( 0), INT16_C( 7), INT16_C( 0), INT16_C( 4) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t r = simde_vclz_s16(a); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); a = simde_vshl_s16(a, simde_vneg_s16(simde_vand_s16(simde_test_arm_neon_random_i16x4(), simde_vdup_n_s16(15)))); if (i == 0) { a = simde_vset_lane_s16( INT16_C(0), a, 0); a = simde_vset_lane_s16(-INT16_C(1), a, 1); } simde_int16x4_t r = simde_vclz_s16(a); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclz_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t r[2]; } test_vec[] = { { { INT32_C( 0), -INT32_C( 1) }, { INT32_C( 32), INT32_C( 0) } }, { { -INT32_C( 7795023), -INT32_C( 688) }, { INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 124434), INT32_C( 134) }, { INT32_C( 15), INT32_C( 24) } }, { { INT32_C( 14659), INT32_C( 0) }, { INT32_C( 18), INT32_C( 32) } }, { { -INT32_C( 4567066), -INT32_C( 1) }, { INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 5168), INT32_C( 23) }, { INT32_C( 19), INT32_C( 27) } }, { { INT32_C( 0), INT32_C( 2017326) }, { INT32_C( 32), INT32_C( 11) } }, { { INT32_C( 211567), INT32_C( 17387595) }, { INT32_C( 14), INT32_C( 7) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t r = simde_vclz_s32(a); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); a = simde_vshl_s32(a, simde_vneg_s32(simde_vand_s32(simde_test_arm_neon_random_i32x2(), simde_vdup_n_s32(31)))); if (i == 0) { a = simde_vset_lane_s32( INT32_C(0), a, 0); a = simde_vset_lane_s32(-INT32_C(1), a, 1); } simde_int32x2_t r = simde_vclz_s32(a); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclz_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C( 0), UINT8_MAX, UINT8_C(216), UINT8_C( 23), UINT8_C( 1), UINT8_C( 14), UINT8_C( 35), UINT8_C( 1) }, { UINT8_C( 8), UINT8_C( 0), UINT8_C( 0), UINT8_C( 3), UINT8_C( 7), UINT8_C( 4), UINT8_C( 2), UINT8_C( 7) } }, { { UINT8_C( 6), UINT8_C(148), UINT8_C( 0), UINT8_C( 3), UINT8_C( 7), UINT8_C( 52), UINT8_C(222), UINT8_C( 2) }, { UINT8_C( 5), UINT8_C( 0), UINT8_C( 8), UINT8_C( 6), UINT8_C( 5), UINT8_C( 2), UINT8_C( 0), UINT8_C( 6) } }, { { UINT8_C( 50), UINT8_C( 30), UINT8_C( 44), UINT8_C( 2), UINT8_C( 1), UINT8_C( 3), UINT8_C( 2), UINT8_C( 13) }, { UINT8_C( 2), UINT8_C( 3), UINT8_C( 2), UINT8_C( 6), UINT8_C( 7), UINT8_C( 6), UINT8_C( 6), UINT8_C( 4) } }, { { UINT8_C(120), UINT8_C( 9), UINT8_C( 69), UINT8_C( 63), UINT8_C( 0), UINT8_C( 52), UINT8_C( 9), UINT8_C(110) }, { UINT8_C( 1), UINT8_C( 4), UINT8_C( 1), UINT8_C( 2), UINT8_C( 8), UINT8_C( 2), UINT8_C( 4), UINT8_C( 1) } }, { { UINT8_C(233), UINT8_C( 10), UINT8_C( 8), UINT8_C( 1), UINT8_C(229), UINT8_C( 9), UINT8_C( 92), UINT8_C( 19) }, { UINT8_C( 0), UINT8_C( 4), UINT8_C( 4), UINT8_C( 7), UINT8_C( 0), UINT8_C( 4), UINT8_C( 1), UINT8_C( 3) } }, { { UINT8_C( 35), UINT8_C( 5), UINT8_C( 2), UINT8_C(126), UINT8_C( 9), UINT8_C( 7), UINT8_C( 54), UINT8_C( 1) }, { UINT8_C( 2), UINT8_C( 5), UINT8_C( 6), UINT8_C( 1), UINT8_C( 4), UINT8_C( 5), UINT8_C( 2), UINT8_C( 7) } }, { { UINT8_C( 13), UINT8_C( 13), UINT8_C( 3), UINT8_C( 4), UINT8_C( 7), UINT8_C( 1), UINT8_C( 3), UINT8_C( 29) }, { UINT8_C( 4), UINT8_C( 4), UINT8_C( 6), UINT8_C( 5), UINT8_C( 5), UINT8_C( 7), UINT8_C( 6), UINT8_C( 3) } }, { { UINT8_C( 12), UINT8_C( 14), UINT8_C( 2), UINT8_C( 44), UINT8_C( 4), UINT8_C(113), UINT8_C( 1), UINT8_C( 2) }, { UINT8_C( 4), UINT8_C( 4), UINT8_C( 6), UINT8_C( 2), UINT8_C( 5), UINT8_C( 1), UINT8_C( 7), UINT8_C( 6) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t r = simde_vclz_u8(a); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); a = simde_vshl_u8(a, simde_vneg_s8(simde_vand_s8(simde_test_arm_neon_random_i8x8(), simde_vdup_n_s8(7)))); if (i == 0) { a = simde_vset_lane_u8(UINT8_C(0), a, 0); a = simde_vset_lane_u8( UINT8_MAX, a, 1); } simde_uint8x8_t r = simde_vclz_u8(a); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclz_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C(26770) }, { UINT16_C( 16), UINT16_C( 0), UINT16_C( 16), UINT16_C( 1) } }, { { UINT16_C( 6421), UINT16_C( 130), UINT16_C( 3843), UINT16_C( 205) }, { UINT16_C( 3), UINT16_C( 8), UINT16_C( 4), UINT16_C( 8) } }, { { UINT16_C( 1603), UINT16_C( 290), UINT16_C( 486), UINT16_C( 386) }, { UINT16_C( 5), UINT16_C( 7), UINT16_C( 7), UINT16_C( 7) } }, { { UINT16_C( 495), UINT16_C( 1), UINT16_C( 7), UINT16_C( 1608) }, { UINT16_C( 7), UINT16_C( 15), UINT16_C( 13), UINT16_C( 5) } }, { { UINT16_C( 1655), UINT16_C(24204), UINT16_C( 50), UINT16_C( 6) }, { UINT16_C( 5), UINT16_C( 1), UINT16_C( 10), UINT16_C( 13) } }, { { UINT16_C( 2), UINT16_C( 6868), UINT16_C(57838), UINT16_C( 413) }, { UINT16_C( 14), UINT16_C( 3), UINT16_C( 0), UINT16_C( 7) } }, { { UINT16_C( 128), UINT16_C( 94), UINT16_C( 204), UINT16_C( 51) }, { UINT16_C( 8), UINT16_C( 9), UINT16_C( 8), UINT16_C( 10) } }, { { UINT16_C( 1010), UINT16_C( 9), UINT16_C( 0), UINT16_C( 1) }, { UINT16_C( 6), UINT16_C( 12), UINT16_C( 16), UINT16_C( 15) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t r = simde_vclz_u16(a); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); a = simde_vshl_u16(a, simde_vneg_s16(simde_vand_s16(simde_test_arm_neon_random_i16x4(), simde_vdup_n_s16(15)))); if (i == 0) { a = simde_vset_lane_u16(UINT16_C(0), a, 0); a = simde_vset_lane_u16( UINT16_MAX, a, 1); } simde_uint16x4_t r = simde_vclz_u16(a); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclz_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C( 0), UINT32_MAX }, { UINT32_C( 32), UINT32_C( 0) } }, { { UINT32_C(1423272567), UINT32_C( 3536156) }, { UINT32_C( 1), UINT32_C( 10) } }, { { UINT32_C( 1801), UINT32_C( 154636) }, { UINT32_C( 21), UINT32_C( 14) } }, { { UINT32_C( 149), UINT32_C( 322967635) }, { UINT32_C( 24), UINT32_C( 3) } }, { { UINT32_C( 1828682), UINT32_C(1728249100) }, { UINT32_C( 11), UINT32_C( 1) } }, { { UINT32_C( 10614514), UINT32_C( 38) }, { UINT32_C( 8), UINT32_C( 26) } }, { { UINT32_C( 519401), UINT32_C( 1505652) }, { UINT32_C( 13), UINT32_C( 11) } }, { { UINT32_C( 1945), UINT32_C( 266) }, { UINT32_C( 21), UINT32_C( 23) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t r = simde_vclz_u32(a); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); a = simde_vshl_u32(a, simde_vneg_s32(simde_vand_s32(simde_test_arm_neon_random_i32x2(), simde_vdup_n_s32(31)))); if (i == 0) { a = simde_vset_lane_u32(UINT32_C(0), a, 0); a = simde_vset_lane_u32( UINT32_MAX, a, 1); } simde_uint32x2_t r = simde_vclz_u32(a); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclzq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t r[16]; } test_vec[] = { { { INT8_C( 0), -INT8_C( 1), INT8_C( 1), -INT8_C( 5), INT8_C( 11), -INT8_C( 1), INT8_C( 8), INT8_C( 54), INT8_C( 1), -INT8_C( 20), -INT8_C( 15), INT8_C( 26), INT8_C( 1), INT8_C( 0), -INT8_C( 99), -INT8_C( 7) }, { INT8_C( 8), INT8_C( 0), INT8_C( 7), INT8_C( 0), INT8_C( 4), INT8_C( 0), INT8_C( 4), INT8_C( 2), INT8_C( 7), INT8_C( 0), INT8_C( 0), INT8_C( 3), INT8_C( 7), INT8_C( 8), INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 0), INT8_C( 15), INT8_C( 16), INT8_C( 23), INT8_C( 96), INT8_C( 5), -INT8_C( 4), -INT8_C( 1), INT8_C( 3), -INT8_C( 3), -INT8_C( 23), INT8_C( 44), INT8_C( 1), -INT8_C( 121), -INT8_C( 1), -INT8_C( 1) }, { INT8_C( 8), INT8_C( 4), INT8_C( 3), INT8_C( 3), INT8_C( 1), INT8_C( 5), INT8_C( 0), INT8_C( 0), INT8_C( 6), INT8_C( 0), INT8_C( 0), INT8_C( 2), INT8_C( 7), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { -INT8_C( 2), -INT8_C( 4), INT8_C( 6), -INT8_C( 1), -INT8_C( 9), -INT8_C( 2), -INT8_C( 3), INT8_C( 0), -INT8_C( 31), -INT8_C( 4), -INT8_C( 1), -INT8_C( 1), INT8_C( 1), INT8_C( 7), INT8_C( 3), INT8_MAX }, { INT8_C( 0), INT8_C( 0), INT8_C( 5), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 8), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 7), INT8_C( 5), INT8_C( 6), INT8_C( 1) } }, { { -INT8_C( 3), -INT8_C( 2), INT8_C( 68), -INT8_C( 6), -INT8_C( 22), -INT8_C( 1), INT8_C( 20), INT8_C( 1), -INT8_C( 15), -INT8_C( 1), -INT8_C( 4), -INT8_C( 84), INT8_C( 0), INT8_C( 37), INT8_C( 21), INT8_C( 0) }, { INT8_C( 0), INT8_C( 0), INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 3), INT8_C( 7), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 8), INT8_C( 2), INT8_C( 3), INT8_C( 8) } }, { { -INT8_C( 23), -INT8_C( 62), INT8_C( 0), INT8_C( 5), INT8_C( 1), INT8_C( 115), -INT8_C( 63), INT8_C( 2), INT8_C( 0), INT8_C( 2), -INT8_C( 20), INT8_C( 2), -INT8_C( 1), -INT8_C( 5), INT8_C( 12), -INT8_C( 5) }, { INT8_C( 0), INT8_C( 0), INT8_C( 8), INT8_C( 5), INT8_C( 7), INT8_C( 1), INT8_C( 0), INT8_C( 6), INT8_C( 8), INT8_C( 6), INT8_C( 0), INT8_C( 6), INT8_C( 0), INT8_C( 0), INT8_C( 4), INT8_C( 0) } }, { { -INT8_C( 16), -INT8_C( 2), INT8_C( 2), INT8_C( 0), INT8_C( 0), -INT8_C( 8), INT8_C( 109), INT8_C( 0), -INT8_C( 17), INT8_C( 7), INT8_C( 23), INT8_C( 122), -INT8_C( 1), INT8_C( 8), INT8_C( 5), -INT8_C( 3) }, { INT8_C( 0), INT8_C( 0), INT8_C( 6), INT8_C( 8), INT8_C( 8), INT8_C( 0), INT8_C( 1), INT8_C( 8), INT8_C( 0), INT8_C( 5), INT8_C( 3), INT8_C( 1), INT8_C( 0), INT8_C( 4), INT8_C( 5), INT8_C( 0) } }, { { INT8_C( 12), INT8_C( 0), INT8_C( 29), INT8_C( 31), -INT8_C( 4), -INT8_C( 4), INT8_C( 0), -INT8_C( 4), INT8_C( 0), -INT8_C( 125), INT8_C( 1), -INT8_C( 1), -INT8_C( 7), -INT8_C( 54), -INT8_C( 76), INT8_C( 0) }, { INT8_C( 4), INT8_C( 8), INT8_C( 3), INT8_C( 3), INT8_C( 0), INT8_C( 0), INT8_C( 8), INT8_C( 0), INT8_C( 8), INT8_C( 0), INT8_C( 7), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 8) } }, { { -INT8_C( 2), INT8_C( 1), INT8_C( 1), INT8_C( 58), INT8_C( 41), INT8_C( 7), INT8_C( 0), INT8_C( 1), -INT8_C( 1), INT8_C( 14), INT8_C( 41), -INT8_C( 56), INT8_C( 0), INT8_C( 1), INT8_C( 0), -INT8_C( 4) }, { INT8_C( 0), INT8_C( 7), INT8_C( 7), INT8_C( 2), INT8_C( 2), INT8_C( 5), INT8_C( 8), INT8_C( 7), INT8_C( 0), INT8_C( 4), INT8_C( 2), INT8_C( 0), INT8_C( 8), INT8_C( 7), INT8_C( 8), INT8_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t r = simde_vclzq_s8(a); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); a = simde_vshlq_s8(a, simde_vnegq_s8(simde_vandq_s8(simde_test_arm_neon_random_i8x16(), simde_vdupq_n_s8(7)))); if (i == 0) { a = simde_vsetq_lane_s8( INT8_C(0), a, 0); a = simde_vsetq_lane_s8(-INT8_C(1), a, 1); } simde_int8x16_t r = simde_vclzq_s8(a); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclzq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t r[8]; } test_vec[] = { { { INT16_C( 0), -INT16_C( 1), INT16_C( 363), -INT16_C( 6), -INT16_C( 14), -INT16_C( 1226), -INT16_C( 297), -INT16_C( 3820) }, { INT16_C( 16), INT16_C( 0), INT16_C( 7), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 14), -INT16_C( 15), -INT16_C( 5), INT16_C( 3), -INT16_C( 463), INT16_C( 2), -INT16_C( 1), -INT16_C( 3686) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 14), INT16_C( 0), INT16_C( 14), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 31), INT16_C( 94), -INT16_C( 4149), -INT16_C( 3), -INT16_C( 1), -INT16_C( 921), -INT16_C( 8), -INT16_C( 1) }, { INT16_C( 11), INT16_C( 9), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 4538), -INT16_C( 3), -INT16_C( 6), INT16_C( 1), INT16_C( 226), INT16_C( 843), -INT16_C( 6), -INT16_C( 4618) }, { INT16_C( 3), INT16_C( 0), INT16_C( 0), INT16_C( 15), INT16_C( 8), INT16_C( 6), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 326), -INT16_C( 174), -INT16_C( 12), INT16_C( 15), -INT16_C( 223), -INT16_C( 2325), -INT16_C( 3828), -INT16_C( 5687) }, { INT16_C( 7), INT16_C( 0), INT16_C( 0), INT16_C( 12), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 1044), INT16_C( 8), INT16_C( 6), -INT16_C( 14693), INT16_C( 1199), INT16_C( 749), INT16_C( 52), INT16_C( 3096) }, { INT16_C( 5), INT16_C( 12), INT16_C( 13), INT16_C( 0), INT16_C( 5), INT16_C( 6), INT16_C( 10), INT16_C( 4) } }, { { INT16_C( 21944), -INT16_C( 12245), -INT16_C( 115), -INT16_C( 17817), INT16_C( 73), INT16_C( 1803), INT16_C( 4), -INT16_C( 8055) }, { INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 9), INT16_C( 5), INT16_C( 13), INT16_C( 0) } }, { { -INT16_C( 15), -INT16_C( 1), INT16_C( 60), -INT16_C( 10), -INT16_C( 2776), -INT16_C( 32), -INT16_C( 1), INT16_C( 648) }, { INT16_C( 0), INT16_C( 0), INT16_C( 10), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 6) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t r = simde_vclzq_s16(a); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); a = simde_vshlq_s16(a, simde_vnegq_s16(simde_vandq_s16(simde_test_arm_neon_random_i16x8(), simde_vdupq_n_s16(15)))); if (i == 0) { a = simde_vsetq_lane_s16( INT16_C(0), a, 0); a = simde_vsetq_lane_s16(-INT16_C(1), a, 1); } simde_int16x8_t r = simde_vclzq_s16(a); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclzq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t r[4]; } test_vec[] = { { { INT32_C( 0), -INT32_C( 1), -INT32_C( 2742371), INT32_C( 5307732) }, { INT32_C( 32), INT32_C( 0), INT32_C( 0), INT32_C( 9) } }, { { INT32_C( 0), INT32_C( 169886544), -INT32_C( 1), INT32_C( 33527206) }, { INT32_C( 32), INT32_C( 4), INT32_C( 0), INT32_C( 7) } }, { { -INT32_C( 14), INT32_C( 1), -INT32_C( 5141852), INT32_C( 166875) }, { INT32_C( 0), INT32_C( 31), INT32_C( 0), INT32_C( 14) } }, { { -INT32_C( 119), -INT32_C( 13104593), INT32_C( 29218601), INT32_C( 30162) }, { INT32_C( 0), INT32_C( 0), INT32_C( 7), INT32_C( 17) } }, { { INT32_C( 2), -INT32_C( 74), INT32_C( 2009), -INT32_C( 6734631) }, { INT32_C( 30), INT32_C( 0), INT32_C( 21), INT32_C( 0) } }, { { -INT32_C( 55), -INT32_C( 23134680), -INT32_C( 1), INT32_C( 22) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 27) } }, { { -INT32_C( 64880981), INT32_C( 1021439), INT32_C( 65), -INT32_C( 2) }, { INT32_C( 0), INT32_C( 12), INT32_C( 25), INT32_C( 0) } }, { { -INT32_C( 20), -INT32_C( 2791), INT32_C( 2), INT32_C( 115) }, { INT32_C( 0), INT32_C( 0), INT32_C( 30), INT32_C( 25) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t r = simde_vclzq_s32(a); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); a = simde_vshlq_s32(a, simde_vnegq_s32(simde_vandq_s32(simde_test_arm_neon_random_i32x4(), simde_vdupq_n_s32(31)))); if (i == 0) { a = simde_vsetq_lane_s32( INT32_C(0), a, 0); a = simde_vsetq_lane_s32(-INT32_C(1), a, 1); } simde_int32x4_t r = simde_vclzq_s32(a); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclzq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C( 0), UINT8_MAX, UINT8_C( 1), UINT8_C( 8), UINT8_C( 83), UINT8_C( 20), UINT8_C( 67), UINT8_C( 1), UINT8_C( 52), UINT8_C( 0), UINT8_C( 6), UINT8_C( 11), UINT8_C(211), UINT8_C( 7), UINT8_C( 14), UINT8_C( 1) }, { UINT8_C( 8), UINT8_C( 0), UINT8_C( 7), UINT8_C( 4), UINT8_C( 1), UINT8_C( 3), UINT8_C( 1), UINT8_C( 7), UINT8_C( 2), UINT8_C( 8), UINT8_C( 5), UINT8_C( 4), UINT8_C( 0), UINT8_C( 5), UINT8_C( 4), UINT8_C( 7) } }, { { UINT8_C( 3), UINT8_C( 81), UINT8_C( 12), UINT8_C( 0), UINT8_C( 51), UINT8_C( 14), UINT8_C( 3), UINT8_C( 10), UINT8_C( 8), UINT8_C( 11), UINT8_C( 94), UINT8_C( 61), UINT8_C( 88), UINT8_C( 25), UINT8_C( 92), UINT8_C( 4) }, { UINT8_C( 6), UINT8_C( 1), UINT8_C( 4), UINT8_C( 8), UINT8_C( 2), UINT8_C( 4), UINT8_C( 6), UINT8_C( 4), UINT8_C( 4), UINT8_C( 4), UINT8_C( 1), UINT8_C( 2), UINT8_C( 1), UINT8_C( 3), UINT8_C( 1), UINT8_C( 5) } }, { { UINT8_C( 55), UINT8_C( 9), UINT8_C( 0), UINT8_C( 0), UINT8_C( 64), UINT8_C( 30), UINT8_C( 2), UINT8_C( 20), UINT8_C( 1), UINT8_C( 0), UINT8_C( 2), UINT8_C( 98), UINT8_C( 52), UINT8_C( 0), UINT8_C( 3), UINT8_C( 47) }, { UINT8_C( 2), UINT8_C( 4), UINT8_C( 8), UINT8_C( 8), UINT8_C( 1), UINT8_C( 3), UINT8_C( 6), UINT8_C( 3), UINT8_C( 7), UINT8_C( 8), UINT8_C( 6), UINT8_C( 1), UINT8_C( 2), UINT8_C( 8), UINT8_C( 6), UINT8_C( 2) } }, { { UINT8_C( 0), UINT8_C( 5), UINT8_C(102), UINT8_C(153), UINT8_C(180), UINT8_C( 0), UINT8_C( 7), UINT8_C(102), UINT8_C( 1), UINT8_C(104), UINT8_C(201), UINT8_C( 66), UINT8_C( 15), UINT8_C( 3), UINT8_C( 80), UINT8_C( 25) }, { UINT8_C( 8), UINT8_C( 5), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 8), UINT8_C( 5), UINT8_C( 1), UINT8_C( 7), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 4), UINT8_C( 6), UINT8_C( 1), UINT8_C( 3) } }, { { UINT8_C( 0), UINT8_C( 0), UINT8_C( 16), UINT8_C( 3), UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 4), UINT8_C(110), UINT8_C( 6), UINT8_C( 0), UINT8_C( 53), UINT8_C( 52) }, { UINT8_C( 8), UINT8_C( 8), UINT8_C( 3), UINT8_C( 6), UINT8_C( 7), UINT8_C( 7), UINT8_C( 7), UINT8_C( 7), UINT8_C( 8), UINT8_C( 8), UINT8_C( 5), UINT8_C( 1), UINT8_C( 5), UINT8_C( 8), UINT8_C( 2), UINT8_C( 2) } }, { { UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 17), UINT8_C( 6), UINT8_C( 23), UINT8_C( 27), UINT8_C( 14), UINT8_C( 57), UINT8_C( 9), UINT8_C( 96), UINT8_C( 1), UINT8_C( 0), UINT8_C( 9), UINT8_C( 56) }, { UINT8_C( 7), UINT8_C( 8), UINT8_C( 7), UINT8_C( 7), UINT8_C( 3), UINT8_C( 5), UINT8_C( 3), UINT8_C( 3), UINT8_C( 4), UINT8_C( 2), UINT8_C( 4), UINT8_C( 1), UINT8_C( 7), UINT8_C( 8), UINT8_C( 4), UINT8_C( 2) } }, { { UINT8_C( 19), UINT8_C( 8), UINT8_C( 8), UINT8_C( 6), UINT8_C( 42), UINT8_C( 0), UINT8_C(180), UINT8_C( 1), UINT8_C(124), UINT8_C( 15), UINT8_C( 1), UINT8_C( 3), UINT8_C( 7), UINT8_C( 17), UINT8_C(182), UINT8_C( 3) }, { UINT8_C( 3), UINT8_C( 4), UINT8_C( 4), UINT8_C( 5), UINT8_C( 2), UINT8_C( 8), UINT8_C( 0), UINT8_C( 7), UINT8_C( 1), UINT8_C( 4), UINT8_C( 7), UINT8_C( 6), UINT8_C( 5), UINT8_C( 3), UINT8_C( 0), UINT8_C( 6) } }, { { UINT8_C( 4), UINT8_C(163), UINT8_C( 5), UINT8_C( 7), UINT8_C( 1), UINT8_C( 1), UINT8_C( 44), UINT8_C( 0), UINT8_C( 28), UINT8_C( 84), UINT8_C( 0), UINT8_C( 8), UINT8_C( 3), UINT8_C( 3), UINT8_C( 21), UINT8_C( 0) }, { UINT8_C( 5), UINT8_C( 0), UINT8_C( 5), UINT8_C( 5), UINT8_C( 7), UINT8_C( 7), UINT8_C( 2), UINT8_C( 8), UINT8_C( 3), UINT8_C( 1), UINT8_C( 8), UINT8_C( 4), UINT8_C( 6), UINT8_C( 6), UINT8_C( 3), UINT8_C( 8) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t r = simde_vclzq_u8(a); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); a = simde_vshlq_u8(a, simde_vnegq_s8(simde_vandq_s8(simde_test_arm_neon_random_i8x16(), simde_vdupq_n_s8(7)))); if (i == 0) { a = simde_vsetq_lane_u8(UINT8_C(0), a, 0); a = simde_vsetq_lane_u8( UINT8_MAX, a, 1); } simde_uint8x16_t r = simde_vclzq_u8(a); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclzq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C( 0), UINT16_MAX, UINT16_C( 6985), UINT16_C( 29), UINT16_C( 14), UINT16_C( 0), UINT16_C( 115), UINT16_C( 282) }, { UINT16_C( 16), UINT16_C( 0), UINT16_C( 3), UINT16_C( 11), UINT16_C( 12), UINT16_C( 16), UINT16_C( 9), UINT16_C( 7) } }, { { UINT16_C(26256), UINT16_C( 5768), UINT16_C( 3), UINT16_C( 1821), UINT16_C( 9), UINT16_C( 6047), UINT16_C( 177), UINT16_C( 150) }, { UINT16_C( 1), UINT16_C( 3), UINT16_C( 14), UINT16_C( 5), UINT16_C( 12), UINT16_C( 3), UINT16_C( 8), UINT16_C( 8) } }, { { UINT16_C( 1), UINT16_C( 7339), UINT16_C( 0), UINT16_C( 118), UINT16_C( 123), UINT16_C( 1), UINT16_C( 248), UINT16_C( 1779) }, { UINT16_C( 15), UINT16_C( 3), UINT16_C( 16), UINT16_C( 9), UINT16_C( 9), UINT16_C( 15), UINT16_C( 8), UINT16_C( 5) } }, { { UINT16_C( 451), UINT16_C( 44), UINT16_C( 3245), UINT16_C(32974), UINT16_C( 0), UINT16_C( 12), UINT16_C( 416), UINT16_C( 1978) }, { UINT16_C( 7), UINT16_C( 10), UINT16_C( 4), UINT16_C( 0), UINT16_C( 16), UINT16_C( 12), UINT16_C( 7), UINT16_C( 5) } }, { { UINT16_C( 159), UINT16_C( 4), UINT16_C( 275), UINT16_C( 22), UINT16_C( 7), UINT16_C( 319), UINT16_C( 677), UINT16_C(34586) }, { UINT16_C( 8), UINT16_C( 13), UINT16_C( 7), UINT16_C( 11), UINT16_C( 13), UINT16_C( 7), UINT16_C( 6), UINT16_C( 0) } }, { { UINT16_C( 419), UINT16_C( 114), UINT16_C( 490), UINT16_C( 3), UINT16_C( 12), UINT16_C( 6178), UINT16_C( 0), UINT16_C( 1013) }, { UINT16_C( 7), UINT16_C( 9), UINT16_C( 7), UINT16_C( 14), UINT16_C( 12), UINT16_C( 3), UINT16_C( 16), UINT16_C( 6) } }, { { UINT16_C( 3257), UINT16_C( 0), UINT16_C( 51), UINT16_C( 1557), UINT16_C(52698), UINT16_C(18497), UINT16_C( 12), UINT16_C( 34) }, { UINT16_C( 4), UINT16_C( 16), UINT16_C( 10), UINT16_C( 5), UINT16_C( 0), UINT16_C( 1), UINT16_C( 12), UINT16_C( 10) } }, { { UINT16_C( 3452), UINT16_C( 90), UINT16_C( 61), UINT16_C(16486), UINT16_C( 2618), UINT16_C( 1696), UINT16_C( 39), UINT16_C( 9222) }, { UINT16_C( 4), UINT16_C( 9), UINT16_C( 10), UINT16_C( 1), UINT16_C( 4), UINT16_C( 5), UINT16_C( 10), UINT16_C( 2) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t r = simde_vclzq_u16(a); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); a = simde_vshlq_u16(a, simde_vnegq_s16(simde_vandq_s16(simde_test_arm_neon_random_i16x8(), simde_vdupq_n_s16(15)))); if (i == 0) { a = simde_vsetq_lane_u16(UINT16_C(0), a, 0); a = simde_vsetq_lane_u16( UINT16_MAX, a, 1); } simde_uint16x8_t r = simde_vclzq_u16(a); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vclzq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C( 0), UINT32_MAX, UINT32_C( 62094), UINT32_C( 456339214) }, { UINT32_C( 32), UINT32_C( 0), UINT32_C( 16), UINT32_C( 3) } }, { { UINT32_C( 4054), UINT32_C( 911429422), UINT32_C( 0), UINT32_C( 399294) }, { UINT32_C( 20), UINT32_C( 2), UINT32_C( 32), UINT32_C( 13) } }, { { UINT32_C( 7), UINT32_C( 11236), UINT32_C( 11), UINT32_C( 465108) }, { UINT32_C( 29), UINT32_C( 18), UINT32_C( 28), UINT32_C( 13) } }, { { UINT32_C( 2604114), UINT32_C( 3), UINT32_C( 237743), UINT32_C( 468898133) }, { UINT32_C( 10), UINT32_C( 30), UINT32_C( 14), UINT32_C( 3) } }, { { UINT32_C( 13205), UINT32_C( 6), UINT32_C( 984219941), UINT32_C( 10668962) }, { UINT32_C( 18), UINT32_C( 29), UINT32_C( 2), UINT32_C( 8) } }, { { UINT32_C( 750849), UINT32_C( 4307860), UINT32_C( 15522368), UINT32_C( 678060) }, { UINT32_C( 12), UINT32_C( 9), UINT32_C( 8), UINT32_C( 12) } }, { { UINT32_C( 3291), UINT32_C( 1005), UINT32_C( 14643902), UINT32_C( 7) }, { UINT32_C( 20), UINT32_C( 22), UINT32_C( 8), UINT32_C( 29) } }, { { UINT32_C( 5), UINT32_C( 188), UINT32_C( 4073), UINT32_C( 4) }, { UINT32_C( 29), UINT32_C( 24), UINT32_C( 20), UINT32_C( 29) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t r = simde_vclzq_u32(a); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); a = simde_vshlq_u32(a, simde_vnegq_s32(simde_vandq_s32(simde_test_arm_neon_random_i32x4(), simde_vdupq_n_s32(31)))); if (i == 0) { a = simde_vsetq_lane_u32(UINT32_C(0), a, 0); a = simde_vsetq_lane_u32( UINT32_MAX, a, 1); } simde_uint32x4_t r = simde_vclzq_u32(a); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(x_vclzb_s8) SIMDE_TEST_FUNC_LIST_ENTRY(x_vclzh_s16) SIMDE_TEST_FUNC_LIST_ENTRY(x_vclzs_s32) SIMDE_TEST_FUNC_LIST_ENTRY(x_vclzb_u8) SIMDE_TEST_FUNC_LIST_ENTRY(x_vclzh_u16) SIMDE_TEST_FUNC_LIST_ENTRY(x_vclzs_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vclz_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vclz_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vclz_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vclz_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vclz_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vclz_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vclzq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vclzq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vclzq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vclzq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vclzq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vclzq_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/cnt.c000066400000000000000000000357621400333146700162640ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN cnt #include "test-neon.h" #include "../../../simde/arm/neon/cnt.h" static int test_simde_vcnt_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { int8_t a[8]; int8_t r[8]; } test_vec[] = { { { INT8_C( 89), -INT8_C( 96), -INT8_C( 103), -INT8_C( 70), -INT8_C( 8), -INT8_C( 10), INT8_C( 37), -INT8_C( 116) }, { INT8_C( 4), INT8_C( 2), INT8_C( 4), INT8_C( 5), INT8_C( 5), INT8_C( 6), INT8_C( 3), INT8_C( 3) } }, { { -INT8_C( 66), INT8_C( 58), -INT8_C( 5), -INT8_C( 94), INT8_C( 15), INT8_C( 122), INT8_C( 95), -INT8_C( 35) }, { INT8_C( 6), INT8_C( 4), INT8_C( 7), INT8_C( 3), INT8_C( 4), INT8_C( 5), INT8_C( 6), INT8_C( 6) } }, { { -INT8_C( 102), -INT8_C( 68), INT8_C( 104), -INT8_C( 83), -INT8_C( 88), -INT8_C( 36), -INT8_C( 100), INT8_MIN }, { INT8_C( 4), INT8_C( 5), INT8_C( 3), INT8_C( 5), INT8_C( 3), INT8_C( 5), INT8_C( 4), INT8_C( 1) } }, { { -INT8_C( 32), -INT8_C( 127), INT8_C( 72), -INT8_C( 57), -INT8_C( 104), INT8_C( 77), INT8_C( 40), -INT8_C( 15) }, { INT8_C( 3), INT8_C( 2), INT8_C( 2), INT8_C( 5), INT8_C( 3), INT8_C( 4), INT8_C( 2), INT8_C( 5) } }, { { -INT8_C( 18), -INT8_C( 63), -INT8_C( 84), -INT8_C( 26), -INT8_C( 73), -INT8_C( 47), INT8_C( 114), INT8_C( 117) }, { INT8_C( 6), INT8_C( 3), INT8_C( 4), INT8_C( 5), INT8_C( 6), INT8_C( 4), INT8_C( 4), INT8_C( 5) } }, { { INT8_C( 11), INT8_C( 109), INT8_C( 23), INT8_C( 26), -INT8_C( 25), INT8_C( 118), -INT8_C( 9), -INT8_C( 127) }, { INT8_C( 3), INT8_C( 5), INT8_C( 4), INT8_C( 3), INT8_C( 6), INT8_C( 5), INT8_C( 7), INT8_C( 2) } }, { { INT8_C( 51), INT8_C( 96), INT8_C( 46), -INT8_C( 37), INT8_C( 60), -INT8_C( 54), INT8_C( 92), INT8_C( 28) }, { INT8_C( 4), INT8_C( 2), INT8_C( 4), INT8_C( 6), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 3) } }, { { INT8_C( 76), -INT8_C( 92), -INT8_C( 28), -INT8_C( 28), -INT8_C( 15), INT8_C( 12), -INT8_C( 43), -INT8_C( 33) }, { INT8_C( 3), INT8_C( 3), INT8_C( 4), INT8_C( 4), INT8_C( 5), INT8_C( 2), INT8_C( 5), INT8_C( 7) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t r = simde_vcnt_s8(a); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8_t r = simde_vcnt_s8(a); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcnt_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { uint8_t a[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C(211), UINT8_C(195), UINT8_C(252), UINT8_C( 99), UINT8_C( 1), UINT8_C(121), UINT8_C(110), UINT8_C( 12) }, { UINT8_C( 5), UINT8_C( 4), UINT8_C( 6), UINT8_C( 4), UINT8_C( 1), UINT8_C( 5), UINT8_C( 5), UINT8_C( 2) } }, { { UINT8_C(233), UINT8_C( 90), UINT8_C( 91), UINT8_C(128), UINT8_C(108), UINT8_C( 22), UINT8_C(193), UINT8_C( 8) }, { UINT8_C( 5), UINT8_C( 4), UINT8_C( 5), UINT8_C( 1), UINT8_C( 4), UINT8_C( 3), UINT8_C( 3), UINT8_C( 1) } }, { { UINT8_C( 3), UINT8_C(151), UINT8_C( 72), UINT8_C(113), UINT8_C(148), UINT8_C(174), UINT8_C( 75), UINT8_C( 39) }, { UINT8_C( 2), UINT8_C( 5), UINT8_C( 2), UINT8_C( 4), UINT8_C( 3), UINT8_C( 5), UINT8_C( 4), UINT8_C( 4) } }, { { UINT8_C( 94), UINT8_C(223), UINT8_C(243), UINT8_C(180), UINT8_C( 18), UINT8_C(161), UINT8_C(144), UINT8_C(230) }, { UINT8_C( 5), UINT8_C( 7), UINT8_C( 6), UINT8_C( 4), UINT8_C( 2), UINT8_C( 3), UINT8_C( 2), UINT8_C( 5) } }, { { UINT8_C(100), UINT8_C(140), UINT8_C( 73), UINT8_C(101), UINT8_C( 5), UINT8_C(183), UINT8_C(114), UINT8_C(238) }, { UINT8_C( 3), UINT8_C( 3), UINT8_C( 3), UINT8_C( 4), UINT8_C( 2), UINT8_C( 6), UINT8_C( 4), UINT8_C( 6) } }, { { UINT8_C( 17), UINT8_C(205), UINT8_C(111), UINT8_C(125), UINT8_C(227), UINT8_C( 48), UINT8_C(134), UINT8_C(231) }, { UINT8_C( 2), UINT8_C( 5), UINT8_C( 6), UINT8_C( 6), UINT8_C( 5), UINT8_C( 2), UINT8_C( 3), UINT8_C( 6) } }, { { UINT8_C(199), UINT8_C(206), UINT8_C( 88), UINT8_C( 91), UINT8_C(124), UINT8_C(164), UINT8_C(130), UINT8_C(219) }, { UINT8_C( 5), UINT8_C( 5), UINT8_C( 3), UINT8_C( 5), UINT8_C( 5), UINT8_C( 3), UINT8_C( 2), UINT8_C( 6) } }, { { UINT8_C(131), UINT8_C(118), UINT8_C(143), UINT8_C(150), UINT8_C( 23), UINT8_C( 32), UINT8_C(124), UINT8_C(124) }, { UINT8_C( 3), UINT8_C( 5), UINT8_C( 5), UINT8_C( 4), UINT8_C( 4), UINT8_C( 1), UINT8_C( 5), UINT8_C( 5) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t r = simde_vcnt_u8(a); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t r = simde_vcnt_u8(a); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcntq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { int8_t a[16]; int8_t r[16]; } test_vec[] = { { { INT8_C( 107), -INT8_C( 51), INT8_C( 29), -INT8_C( 40), -INT8_C( 93), -INT8_C( 73), -INT8_C( 64), -INT8_C( 89), INT8_C( 102), -INT8_C( 25), -INT8_C( 96), INT8_C( 26), -INT8_C( 18), INT8_C( 104), -INT8_C( 6), INT8_C( 33) }, { INT8_C( 5), INT8_C( 5), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 6), INT8_C( 2), INT8_C( 5), INT8_C( 4), INT8_C( 6), INT8_C( 2), INT8_C( 3), INT8_C( 6), INT8_C( 3), INT8_C( 6), INT8_C( 2) } }, { { -INT8_C( 26), -INT8_C( 30), -INT8_C( 93), -INT8_C( 86), -INT8_C( 51), INT8_C( 82), INT8_C( 56), INT8_C( 107), INT8_C( 22), -INT8_C( 112), INT8_C( 110), INT8_C( 62), -INT8_C( 26), INT8_C( 54), INT8_C( 45), INT8_C( 81) }, { INT8_C( 5), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 5), INT8_C( 3), INT8_C( 3), INT8_C( 5), INT8_C( 3), INT8_C( 2), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 4), INT8_C( 4), INT8_C( 3) } }, { { INT8_C( 3), INT8_C( 74), INT8_C( 41), -INT8_C( 89), INT8_C( 1), -INT8_C( 23), INT8_C( 78), INT8_C( 104), -INT8_C( 48), -INT8_C( 18), -INT8_C( 126), -INT8_C( 65), INT8_C( 87), INT8_C( 125), -INT8_C( 32), INT8_C( 61) }, { INT8_C( 2), INT8_C( 3), INT8_C( 3), INT8_C( 5), INT8_C( 1), INT8_C( 5), INT8_C( 4), INT8_C( 3), INT8_C( 3), INT8_C( 6), INT8_C( 2), INT8_C( 7), INT8_C( 5), INT8_C( 6), INT8_C( 3), INT8_C( 5) } }, { { INT8_C( 95), -INT8_C( 125), -INT8_C( 25), INT8_C( 44), -INT8_C( 43), INT8_C( 32), -INT8_C( 105), -INT8_C( 21), -INT8_C( 80), INT8_C( 6), INT8_C( 41), -INT8_C( 106), INT8_C( 60), INT8_C( 86), -INT8_C( 25), INT8_C( 64) }, { INT8_C( 6), INT8_C( 3), INT8_C( 6), INT8_C( 3), INT8_C( 5), INT8_C( 1), INT8_C( 5), INT8_C( 6), INT8_C( 3), INT8_C( 2), INT8_C( 3), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 6), INT8_C( 1) } }, { { -INT8_C( 95), INT8_C( 17), -INT8_C( 25), -INT8_C( 94), -INT8_C( 6), INT8_C( 53), INT8_C( 10), -INT8_C( 53), INT8_C( 35), -INT8_C( 115), -INT8_C( 118), INT8_C( 122), INT8_C( 10), INT8_C( 106), -INT8_C( 73), INT8_C( 105) }, { INT8_C( 3), INT8_C( 2), INT8_C( 6), INT8_C( 3), INT8_C( 6), INT8_C( 4), INT8_C( 2), INT8_C( 5), INT8_C( 3), INT8_C( 4), INT8_C( 3), INT8_C( 5), INT8_C( 2), INT8_C( 4), INT8_C( 6), INT8_C( 4) } }, { { -INT8_C( 19), -INT8_C( 97), -INT8_C( 106), -INT8_C( 61), -INT8_C( 65), INT8_C( 45), -INT8_C( 82), INT8_C( 111), INT8_C( 51), -INT8_C( 40), INT8_C( 5), INT8_C( 112), INT8_C( 46), -INT8_C( 20), -INT8_C( 80), -INT8_C( 49) }, { INT8_C( 6), INT8_C( 6), INT8_C( 4), INT8_C( 4), INT8_C( 7), INT8_C( 4), INT8_C( 5), INT8_C( 6), INT8_C( 4), INT8_C( 4), INT8_C( 2), INT8_C( 3), INT8_C( 4), INT8_C( 5), INT8_C( 3), INT8_C( 6) } }, { { -INT8_C( 3), -INT8_C( 105), INT8_C( 114), -INT8_C( 8), -INT8_C( 52), INT8_C( 124), -INT8_C( 61), -INT8_C( 17), INT8_C( 9), INT8_C( 77), INT8_C( 106), INT8_C( 19), -INT8_C( 73), INT8_C( 33), INT8_C( 125), -INT8_C( 92) }, { INT8_C( 7), INT8_C( 5), INT8_C( 4), INT8_C( 5), INT8_C( 4), INT8_C( 5), INT8_C( 4), INT8_C( 7), INT8_C( 2), INT8_C( 4), INT8_C( 4), INT8_C( 3), INT8_C( 6), INT8_C( 2), INT8_C( 6), INT8_C( 3) } }, { { -INT8_C( 64), INT8_C( 19), INT8_C( 103), INT8_MAX, INT8_C( 64), INT8_C( 22), -INT8_C( 18), INT8_C( 116), -INT8_C( 18), -INT8_C( 13), -INT8_C( 28), INT8_C( 28), -INT8_C( 32), -INT8_C( 108), -INT8_C( 20), -INT8_C( 35) }, { INT8_C( 2), INT8_C( 3), INT8_C( 5), INT8_C( 7), INT8_C( 1), INT8_C( 3), INT8_C( 6), INT8_C( 4), INT8_C( 6), INT8_C( 6), INT8_C( 4), INT8_C( 3), INT8_C( 3), INT8_C( 3), INT8_C( 5), INT8_C( 6) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t r = simde_vcntq_s8(a); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); simde_int8x16_t r = simde_vcntq_s8(a); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcntq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { uint8_t a[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C( 43), UINT8_C( 94), UINT8_C(213), UINT8_C(247), UINT8_C(218), UINT8_C(152), UINT8_C(230), UINT8_C(228), UINT8_C(229), UINT8_C( 80), UINT8_C(247), UINT8_C(156), UINT8_C(114), UINT8_C(116), UINT8_C( 65), UINT8_C( 50) }, { UINT8_C( 4), UINT8_C( 5), UINT8_C( 5), UINT8_C( 7), UINT8_C( 5), UINT8_C( 3), UINT8_C( 5), UINT8_C( 4), UINT8_C( 5), UINT8_C( 2), UINT8_C( 7), UINT8_C( 4), UINT8_C( 4), UINT8_C( 4), UINT8_C( 2), UINT8_C( 3) } }, { { UINT8_C(135), UINT8_C(168), UINT8_C(178), UINT8_C(200), UINT8_C(190), UINT8_C(160), UINT8_C( 60), UINT8_C(172), UINT8_C(148), UINT8_C( 32), UINT8_C(201), UINT8_C(116), UINT8_C(180), UINT8_C(181), UINT8_C( 81), UINT8_C(223) }, { UINT8_C( 4), UINT8_C( 3), UINT8_C( 4), UINT8_C( 3), UINT8_C( 6), UINT8_C( 2), UINT8_C( 4), UINT8_C( 4), UINT8_C( 3), UINT8_C( 1), UINT8_C( 4), UINT8_C( 4), UINT8_C( 4), UINT8_C( 5), UINT8_C( 3), UINT8_C( 7) } }, { { UINT8_C( 19), UINT8_C( 39), UINT8_C(214), UINT8_C(237), UINT8_C(191), UINT8_C(188), UINT8_C(209), UINT8_C(165), UINT8_C( 13), UINT8_C(201), UINT8_C( 65), UINT8_C(127), UINT8_C( 61), UINT8_C(130), UINT8_C(177), UINT8_C(197) }, { UINT8_C( 3), UINT8_C( 4), UINT8_C( 5), UINT8_C( 6), UINT8_C( 7), UINT8_C( 5), UINT8_C( 4), UINT8_C( 4), UINT8_C( 3), UINT8_C( 4), UINT8_C( 2), UINT8_C( 7), UINT8_C( 5), UINT8_C( 2), UINT8_C( 4), UINT8_C( 4) } }, { { UINT8_C( 43), UINT8_C( 99), UINT8_C(141), UINT8_C(233), UINT8_C( 4), UINT8_C(201), UINT8_C(150), UINT8_C(152), UINT8_C(233), UINT8_C( 95), UINT8_C( 12), UINT8_C(157), UINT8_C( 20), UINT8_C( 93), UINT8_C(124), UINT8_C( 39) }, { UINT8_C( 4), UINT8_C( 4), UINT8_C( 4), UINT8_C( 5), UINT8_C( 1), UINT8_C( 4), UINT8_C( 4), UINT8_C( 3), UINT8_C( 5), UINT8_C( 6), UINT8_C( 2), UINT8_C( 5), UINT8_C( 2), UINT8_C( 5), UINT8_C( 5), UINT8_C( 4) } }, { { UINT8_C(132), UINT8_C( 82), UINT8_C( 20), UINT8_C( 68), UINT8_C( 14), UINT8_C(230), UINT8_C(233), UINT8_C( 27), UINT8_C(175), UINT8_C( 42), UINT8_C(154), UINT8_C(236), UINT8_C(173), UINT8_C( 76), UINT8_C(177), UINT8_C(216) }, { UINT8_C( 2), UINT8_C( 3), UINT8_C( 2), UINT8_C( 2), UINT8_C( 3), UINT8_C( 5), UINT8_C( 5), UINT8_C( 4), UINT8_C( 6), UINT8_C( 3), UINT8_C( 4), UINT8_C( 5), UINT8_C( 5), UINT8_C( 3), UINT8_C( 4), UINT8_C( 4) } }, { { UINT8_C(175), UINT8_C( 62), UINT8_C(193), UINT8_C(179), UINT8_C( 7), UINT8_C( 87), UINT8_C( 75), UINT8_C(240), UINT8_C(182), UINT8_C( 87), UINT8_C(141), UINT8_C(202), UINT8_C(181), UINT8_C( 9), UINT8_C(241), UINT8_C( 57) }, { UINT8_C( 6), UINT8_C( 5), UINT8_C( 3), UINT8_C( 5), UINT8_C( 3), UINT8_C( 5), UINT8_C( 4), UINT8_C( 4), UINT8_C( 5), UINT8_C( 5), UINT8_C( 4), UINT8_C( 4), UINT8_C( 5), UINT8_C( 2), UINT8_C( 5), UINT8_C( 4) } }, { { UINT8_C( 91), UINT8_C( 6), UINT8_C(125), UINT8_C(106), UINT8_C(236), UINT8_C(102), UINT8_C(133), UINT8_C(155), UINT8_C(145), UINT8_C( 32), UINT8_C(135), UINT8_C( 62), UINT8_C(108), UINT8_C( 57), UINT8_C( 22), UINT8_C( 27) }, { UINT8_C( 5), UINT8_C( 2), UINT8_C( 6), UINT8_C( 4), UINT8_C( 5), UINT8_C( 4), UINT8_C( 3), UINT8_C( 5), UINT8_C( 3), UINT8_C( 1), UINT8_C( 4), UINT8_C( 5), UINT8_C( 4), UINT8_C( 4), UINT8_C( 3), UINT8_C( 4) } }, { { UINT8_C(119), UINT8_C(215), UINT8_C(207), UINT8_C(127), UINT8_C( 47), UINT8_C( 26), UINT8_C(111), UINT8_C(229), UINT8_C(114), UINT8_C(253), UINT8_C(176), UINT8_C( 39), UINT8_C( 6), UINT8_C(161), UINT8_C( 96), UINT8_C( 98) }, { UINT8_C( 6), UINT8_C( 6), UINT8_C( 6), UINT8_C( 7), UINT8_C( 5), UINT8_C( 3), UINT8_C( 6), UINT8_C( 5), UINT8_C( 4), UINT8_C( 7), UINT8_C( 3), UINT8_C( 4), UINT8_C( 2), UINT8_C( 3), UINT8_C( 2), UINT8_C( 3) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t r = simde_vcntq_u8(a); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t r = simde_vcntq_u8(a); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vcnt_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vcnt_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vcntq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vcntq_u8) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/combine.c000066400000000000000000000777011400333146700171130ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN combine #include "test-neon.h" #include "../../../simde/arm/neon/combine.h" static int test_simde_vcombine_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; simde_float32 b[2]; simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -53.98), SIMDE_FLOAT32_C( -106.66) }, { SIMDE_FLOAT32_C( -75.22), SIMDE_FLOAT32_C( 554.40) }, { SIMDE_FLOAT32_C( -53.98), SIMDE_FLOAT32_C( -106.66), SIMDE_FLOAT32_C( -75.22), SIMDE_FLOAT32_C( 554.40) } }, { { SIMDE_FLOAT32_C( 653.50), SIMDE_FLOAT32_C( 983.28) }, { SIMDE_FLOAT32_C( 26.88), SIMDE_FLOAT32_C( 397.84) }, { SIMDE_FLOAT32_C( 653.50), SIMDE_FLOAT32_C( 983.28), SIMDE_FLOAT32_C( 26.88), SIMDE_FLOAT32_C( 397.84) } }, { { SIMDE_FLOAT32_C( -890.56), SIMDE_FLOAT32_C( 449.15) }, { SIMDE_FLOAT32_C( -645.05), SIMDE_FLOAT32_C( 760.45) }, { SIMDE_FLOAT32_C( -890.56), SIMDE_FLOAT32_C( 449.15), SIMDE_FLOAT32_C( -645.05), SIMDE_FLOAT32_C( 760.45) } }, { { SIMDE_FLOAT32_C( -595.29), SIMDE_FLOAT32_C( -601.12) }, { SIMDE_FLOAT32_C( 312.34), SIMDE_FLOAT32_C( -100.77) }, { SIMDE_FLOAT32_C( -595.29), SIMDE_FLOAT32_C( -601.12), SIMDE_FLOAT32_C( 312.34), SIMDE_FLOAT32_C( -100.77) } }, { { SIMDE_FLOAT32_C( -983.11), SIMDE_FLOAT32_C( 886.29) }, { SIMDE_FLOAT32_C( -371.66), SIMDE_FLOAT32_C( 376.19) }, { SIMDE_FLOAT32_C( -983.11), SIMDE_FLOAT32_C( 886.29), SIMDE_FLOAT32_C( -371.66), SIMDE_FLOAT32_C( 376.19) } }, { { SIMDE_FLOAT32_C( 421.16), SIMDE_FLOAT32_C( -941.17) }, { SIMDE_FLOAT32_C( -980.32), SIMDE_FLOAT32_C( 53.73) }, { SIMDE_FLOAT32_C( 421.16), SIMDE_FLOAT32_C( -941.17), SIMDE_FLOAT32_C( -980.32), SIMDE_FLOAT32_C( 53.73) } }, { { SIMDE_FLOAT32_C( -450.56), SIMDE_FLOAT32_C( -313.93) }, { SIMDE_FLOAT32_C( -837.06), SIMDE_FLOAT32_C( -183.56) }, { SIMDE_FLOAT32_C( -450.56), SIMDE_FLOAT32_C( -313.93), SIMDE_FLOAT32_C( -837.06), SIMDE_FLOAT32_C( -183.56) } }, { { SIMDE_FLOAT32_C( 591.39), SIMDE_FLOAT32_C( 270.71) }, { SIMDE_FLOAT32_C( -178.56), SIMDE_FLOAT32_C( -462.59) }, { SIMDE_FLOAT32_C( 591.39), SIMDE_FLOAT32_C( 270.71), SIMDE_FLOAT32_C( -178.56), SIMDE_FLOAT32_C( -462.59) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x4_t r = simde_vcombine_f32(a, b); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vcombine_f32(a, b); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcombine_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[1]; simde_float64 b[1]; simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 425.89) }, { SIMDE_FLOAT64_C( 679.01) }, { SIMDE_FLOAT64_C( 425.89), SIMDE_FLOAT64_C( 679.01) } }, { { SIMDE_FLOAT64_C( 46.20) }, { SIMDE_FLOAT64_C( -599.79) }, { SIMDE_FLOAT64_C( 46.20), SIMDE_FLOAT64_C( -599.79) } }, { { SIMDE_FLOAT64_C( -92.67) }, { SIMDE_FLOAT64_C( 654.91) }, { SIMDE_FLOAT64_C( -92.67), SIMDE_FLOAT64_C( 654.91) } }, { { SIMDE_FLOAT64_C( 643.92) }, { SIMDE_FLOAT64_C( 238.58) }, { SIMDE_FLOAT64_C( 643.92), SIMDE_FLOAT64_C( 238.58) } }, { { SIMDE_FLOAT64_C( 715.59) }, { SIMDE_FLOAT64_C( -916.05) }, { SIMDE_FLOAT64_C( 715.59), SIMDE_FLOAT64_C( -916.05) } }, { { SIMDE_FLOAT64_C( -885.97) }, { SIMDE_FLOAT64_C( 112.56) }, { SIMDE_FLOAT64_C( -885.97), SIMDE_FLOAT64_C( 112.56) } }, { { SIMDE_FLOAT64_C( 319.99) }, { SIMDE_FLOAT64_C( -850.11) }, { SIMDE_FLOAT64_C( 319.99), SIMDE_FLOAT64_C( -850.11) } }, { { SIMDE_FLOAT64_C( -588.46) }, { SIMDE_FLOAT64_C( 182.96) }, { SIMDE_FLOAT64_C( -588.46), SIMDE_FLOAT64_C( 182.96) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_t b = simde_vld1_f64(test_vec[i].b); simde_float64x2_t r = simde_vcombine_f64(a, b); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x1_t a = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_float64x1_t b = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_float64x2_t r = simde_vcombine_f64(a, b); simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcombine_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t b[8]; int8_t r[16]; } test_vec[] = { { { INT8_C( 68), -INT8_C( 50), -INT8_C( 26), INT8_C( 105), INT8_C( 81), INT8_C( 69), INT8_C( 3), INT8_C( 21) }, { INT8_C( 119), INT8_C( 72), INT8_C( 123), -INT8_C( 44), INT8_C( 61), INT8_C( 43), INT8_C( 109), -INT8_C( 34) }, { INT8_C( 68), -INT8_C( 50), -INT8_C( 26), INT8_C( 105), INT8_C( 81), INT8_C( 69), INT8_C( 3), INT8_C( 21), INT8_C( 119), INT8_C( 72), INT8_C( 123), -INT8_C( 44), INT8_C( 61), INT8_C( 43), INT8_C( 109), -INT8_C( 34) } }, { { INT8_C( 36), -INT8_C( 100), INT8_C( 109), -INT8_C( 72), -INT8_C( 41), -INT8_C( 75), INT8_C( 14), INT8_C( 114) }, { INT8_C( 110), INT8_C( 126), -INT8_C( 79), -INT8_C( 75), INT8_C( 23), -INT8_C( 2), -INT8_C( 9), INT8_C( 91) }, { INT8_C( 36), -INT8_C( 100), INT8_C( 109), -INT8_C( 72), -INT8_C( 41), -INT8_C( 75), INT8_C( 14), INT8_C( 114), INT8_C( 110), INT8_C( 126), -INT8_C( 79), -INT8_C( 75), INT8_C( 23), -INT8_C( 2), -INT8_C( 9), INT8_C( 91) } }, { { -INT8_C( 51), -INT8_C( 34), -INT8_C( 59), INT8_C( 30), INT8_C( 35), -INT8_C( 56), INT8_C( 51), -INT8_C( 102) }, { INT8_C( 16), -INT8_C( 82), INT8_C( 110), INT8_C( 77), -INT8_C( 38), -INT8_C( 37), INT8_C( 43), -INT8_C( 2) }, { -INT8_C( 51), -INT8_C( 34), -INT8_C( 59), INT8_C( 30), INT8_C( 35), -INT8_C( 56), INT8_C( 51), -INT8_C( 102), INT8_C( 16), -INT8_C( 82), INT8_C( 110), INT8_C( 77), -INT8_C( 38), -INT8_C( 37), INT8_C( 43), -INT8_C( 2) } }, { { INT8_C( 119), -INT8_C( 104), -INT8_C( 74), INT8_C( 79), INT8_C( 78), -INT8_C( 60), -INT8_C( 63), -INT8_C( 68) }, { INT8_C( 66), INT8_C( 114), INT8_C( 113), INT8_C( 90), INT8_C( 112), INT8_C( 105), -INT8_C( 75), INT8_C( 61) }, { INT8_C( 119), -INT8_C( 104), -INT8_C( 74), INT8_C( 79), INT8_C( 78), -INT8_C( 60), -INT8_C( 63), -INT8_C( 68), INT8_C( 66), INT8_C( 114), INT8_C( 113), INT8_C( 90), INT8_C( 112), INT8_C( 105), -INT8_C( 75), INT8_C( 61) } }, { { INT8_C( 71), INT8_C( 122), INT8_C( 91), INT8_C( 106), INT8_C( 67), -INT8_C( 113), INT8_C( 4), INT8_C( 83) }, { INT8_C( 61), INT8_C( 114), -INT8_C( 95), INT8_C( 23), INT8_C( 77), -INT8_C( 52), INT8_C( 22), -INT8_C( 60) }, { INT8_C( 71), INT8_C( 122), INT8_C( 91), INT8_C( 106), INT8_C( 67), -INT8_C( 113), INT8_C( 4), INT8_C( 83), INT8_C( 61), INT8_C( 114), -INT8_C( 95), INT8_C( 23), INT8_C( 77), -INT8_C( 52), INT8_C( 22), -INT8_C( 60) } }, { { INT8_C( 101), -INT8_C( 52), INT8_C( 19), -INT8_C( 77), -INT8_C( 111), -INT8_C( 44), INT8_C( 111), -INT8_C( 45) }, { INT8_C( 70), -INT8_C( 31), INT8_C( 45), -INT8_C( 73), INT8_C( 74), -INT8_C( 29), -INT8_C( 12), -INT8_C( 111) }, { INT8_C( 101), -INT8_C( 52), INT8_C( 19), -INT8_C( 77), -INT8_C( 111), -INT8_C( 44), INT8_C( 111), -INT8_C( 45), INT8_C( 70), -INT8_C( 31), INT8_C( 45), -INT8_C( 73), INT8_C( 74), -INT8_C( 29), -INT8_C( 12), -INT8_C( 111) } }, { { INT8_C( 93), INT8_C( 80), -INT8_C( 5), -INT8_C( 96), -INT8_C( 33), -INT8_C( 1), -INT8_C( 12), INT8_C( 28) }, { INT8_C( 113), -INT8_C( 107), INT8_C( 52), -INT8_C( 66), INT8_C( 97), INT8_C( 74), -INT8_C( 126), -INT8_C( 58) }, { INT8_C( 93), INT8_C( 80), -INT8_C( 5), -INT8_C( 96), -INT8_C( 33), -INT8_C( 1), -INT8_C( 12), INT8_C( 28), INT8_C( 113), -INT8_C( 107), INT8_C( 52), -INT8_C( 66), INT8_C( 97), INT8_C( 74), -INT8_C( 126), -INT8_C( 58) } }, { { INT8_C( 22), -INT8_C( 106), INT8_C( 121), -INT8_C( 89), INT8_C( 106), -INT8_C( 23), INT8_C( 123), -INT8_C( 79) }, { -INT8_C( 54), -INT8_C( 88), INT8_C( 104), INT8_C( 20), -INT8_C( 117), INT8_C( 92), -INT8_C( 91), -INT8_C( 23) }, { INT8_C( 22), -INT8_C( 106), INT8_C( 121), -INT8_C( 89), INT8_C( 106), -INT8_C( 23), INT8_C( 123), -INT8_C( 79), -INT8_C( 54), -INT8_C( 88), INT8_C( 104), INT8_C( 20), -INT8_C( 117), INT8_C( 92), -INT8_C( 91), -INT8_C( 23) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x16_t r = simde_vcombine_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); simde_int8x16_t r = simde_vcombine_s8(a, b); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcombine_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t b[4]; int16_t r[8]; } test_vec[] = { { { -INT16_C( 30456), INT16_C( 24748), INT16_C( 23695), -INT16_C( 11654) }, { INT16_C( 15149), -INT16_C( 12216), -INT16_C( 26484), -INT16_C( 18819) }, { -INT16_C( 30456), INT16_C( 24748), INT16_C( 23695), -INT16_C( 11654), INT16_C( 15149), -INT16_C( 12216), -INT16_C( 26484), -INT16_C( 18819) } }, { { -INT16_C( 7855), INT16_C( 16407), -INT16_C( 20008), INT16_C( 7685) }, { -INT16_C( 2640), INT16_C( 15239), INT16_C( 11611), INT16_C( 25435) }, { -INT16_C( 7855), INT16_C( 16407), -INT16_C( 20008), INT16_C( 7685), -INT16_C( 2640), INT16_C( 15239), INT16_C( 11611), INT16_C( 25435) } }, { { INT16_C( 1974), INT16_C( 18115), INT16_C( 15715), -INT16_C( 28648) }, { INT16_C( 24696), INT16_C( 1120), -INT16_C( 8712), INT16_C( 18875) }, { INT16_C( 1974), INT16_C( 18115), INT16_C( 15715), -INT16_C( 28648), INT16_C( 24696), INT16_C( 1120), -INT16_C( 8712), INT16_C( 18875) } }, { { -INT16_C( 11585), -INT16_C( 26743), -INT16_C( 29053), INT16_C( 13493) }, { INT16_C( 15747), -INT16_C( 8593), -INT16_C( 13718), INT16_C( 8513) }, { -INT16_C( 11585), -INT16_C( 26743), -INT16_C( 29053), INT16_C( 13493), INT16_C( 15747), -INT16_C( 8593), -INT16_C( 13718), INT16_C( 8513) } }, { { INT16_C( 1233), INT16_C( 13671), INT16_C( 32577), -INT16_C( 17979) }, { INT16_C( 9951), -INT16_C( 10307), INT16_C( 30723), -INT16_C( 15840) }, { INT16_C( 1233), INT16_C( 13671), INT16_C( 32577), -INT16_C( 17979), INT16_C( 9951), -INT16_C( 10307), INT16_C( 30723), -INT16_C( 15840) } }, { { -INT16_C( 22197), -INT16_C( 12710), INT16_C( 3895), -INT16_C( 17918) }, { INT16_C( 29004), -INT16_C( 18536), -INT16_C( 9925), INT16_C( 3544) }, { -INT16_C( 22197), -INT16_C( 12710), INT16_C( 3895), -INT16_C( 17918), INT16_C( 29004), -INT16_C( 18536), -INT16_C( 9925), INT16_C( 3544) } }, { { INT16_C( 16349), INT16_C( 7746), INT16_C( 1982), -INT16_C( 25129) }, { -INT16_C( 27603), INT16_C( 12660), -INT16_C( 27635), INT16_C( 22771) }, { INT16_C( 16349), INT16_C( 7746), INT16_C( 1982), -INT16_C( 25129), -INT16_C( 27603), INT16_C( 12660), -INT16_C( 27635), INT16_C( 22771) } }, { { INT16_C( 19773), INT16_C( 29734), INT16_C( 10589), -INT16_C( 22226) }, { -INT16_C( 14694), -INT16_C( 10656), INT16_C( 14495), INT16_C( 31971) }, { INT16_C( 19773), INT16_C( 29734), INT16_C( 10589), -INT16_C( 22226), -INT16_C( 14694), -INT16_C( 10656), INT16_C( 14495), INT16_C( 31971) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x8_t r = simde_vcombine_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); simde_int16x8_t r = simde_vcombine_s16(a, b); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcombine_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t b[2]; int32_t r[4]; } test_vec[] = { { { INT32_C( 479230270), -INT32_C( 919434613) }, { -INT32_C( 1167602195), INT32_C( 292509305) }, { INT32_C( 479230270), -INT32_C( 919434613), -INT32_C( 1167602195), INT32_C( 292509305) } }, { { INT32_C( 396420651), -INT32_C( 521622852) }, { INT32_C( 567435341), INT32_C( 1151863558) }, { INT32_C( 396420651), -INT32_C( 521622852), INT32_C( 567435341), INT32_C( 1151863558) } }, { { INT32_C( 274741637), -INT32_C( 1311075389) }, { -INT32_C( 630505120), -INT32_C( 1024730473) }, { INT32_C( 274741637), -INT32_C( 1311075389), -INT32_C( 630505120), -INT32_C( 1024730473) } }, { { INT32_C( 2111409089), -INT32_C( 2091007435) }, { INT32_C( 681848610), -INT32_C( 1083421382) }, { INT32_C( 2111409089), -INT32_C( 2091007435), INT32_C( 681848610), -INT32_C( 1083421382) } }, { { INT32_C( 1238420870), -INT32_C( 1057314208) }, { -INT32_C( 2103810581), INT32_C( 21267776) }, { INT32_C( 1238420870), -INT32_C( 1057314208), -INT32_C( 2103810581), INT32_C( 21267776) } }, { { INT32_C( 1182670352), INT32_C( 46783456) }, { INT32_C( 1143631114), INT32_C( 1074042554) }, { INT32_C( 1182670352), INT32_C( 46783456), INT32_C( 1143631114), INT32_C( 1074042554) } }, { { -INT32_C( 1014311837), INT32_C( 1770292350) }, { INT32_C( 720051946), -INT32_C( 1272238172) }, { -INT32_C( 1014311837), INT32_C( 1770292350), INT32_C( 720051946), -INT32_C( 1272238172) } }, { { INT32_C( 771402061), -INT32_C( 1909472380) }, { -INT32_C( 338536143), INT32_C( 1395447536) }, { INT32_C( 771402061), -INT32_C( 1909472380), -INT32_C( 338536143), INT32_C( 1395447536) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x4_t r = simde_vcombine_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); simde_int32x4_t r = simde_vcombine_s32(a, b); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcombine_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[1]; int64_t b[1]; int64_t r[2]; } test_vec[] = { { { INT64_C( 7042412965683835049) }, { INT64_C( 1117862416211221349) }, { INT64_C( 7042412965683835049), INT64_C( 1117862416211221349) } }, { { -INT64_C( 3901535941454286721) }, { -INT64_C( 1354626619370034422) }, { -INT64_C( 3901535941454286721), -INT64_C( 1354626619370034422) } }, { { INT64_C( 8556652449628906263) }, { -INT64_C( 5191846387337607439) }, { INT64_C( 8556652449628906263), -INT64_C( 5191846387337607439) } }, { { INT64_C( 2526541489756578955) }, { -INT64_C( 5228216683076765376) }, { INT64_C( 2526541489756578955), -INT64_C( 5228216683076765376) } }, { { INT64_C( 6093322338631486471) }, { INT64_C( 5659099228278286235) }, { INT64_C( 6093322338631486471), INT64_C( 5659099228278286235) } }, { { INT64_C( 1768126677572240204) }, { -INT64_C( 6542026434312078625) }, { INT64_C( 1768126677572240204), -INT64_C( 6542026434312078625) } }, { { -INT64_C( 4985399989866968294) }, { INT64_C( 8954869304345298740) }, { -INT64_C( 4985399989866968294), INT64_C( 8954869304345298740) } }, { { INT64_C( 6974505177732799450) }, { -INT64_C( 8239030459567217051) }, { INT64_C( 6974505177732799450), -INT64_C( 8239030459567217051) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_int64x2_t r = simde_vcombine_s64(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_t a = simde_test_arm_neon_random_i64x1(); simde_int64x1_t b = simde_test_arm_neon_random_i64x1(); simde_int64x2_t r = simde_vcombine_s64(a, b); simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcombine_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint8_t b[8]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(242), UINT8_C(197), UINT8_C(237), UINT8_C(208), UINT8_C( 62), UINT8_C(206), UINT8_C(169), UINT8_C(170) }, { UINT8_C( 26), UINT8_C( 39), UINT8_C(185), UINT8_C(112), UINT8_C(211), UINT8_C(125), UINT8_C(174), UINT8_C( 83) }, { UINT8_C(242), UINT8_C(197), UINT8_C(237), UINT8_C(208), UINT8_C( 62), UINT8_C(206), UINT8_C(169), UINT8_C(170), UINT8_C( 26), UINT8_C( 39), UINT8_C(185), UINT8_C(112), UINT8_C(211), UINT8_C(125), UINT8_C(174), UINT8_C( 83) } }, { { UINT8_C( 67), UINT8_C(254), UINT8_C( 36), UINT8_C(110), UINT8_C(206), UINT8_C( 98), UINT8_C(151), UINT8_C( 23) }, { UINT8_C(198), UINT8_C(171), UINT8_C( 20), UINT8_C(176), UINT8_C( 57), UINT8_C(103), UINT8_C(214), UINT8_C( 43) }, { UINT8_C( 67), UINT8_C(254), UINT8_C( 36), UINT8_C(110), UINT8_C(206), UINT8_C( 98), UINT8_C(151), UINT8_C( 23), UINT8_C(198), UINT8_C(171), UINT8_C( 20), UINT8_C(176), UINT8_C( 57), UINT8_C(103), UINT8_C(214), UINT8_C( 43) } }, { { UINT8_C( 44), UINT8_C(195), UINT8_C(252), UINT8_C(106), UINT8_C(145), UINT8_C(165), UINT8_C( 20), UINT8_C(171) }, { UINT8_C(205), UINT8_C(206), UINT8_C( 28), UINT8_C(160), UINT8_C( 75), UINT8_C(202), UINT8_C(243), UINT8_C(142) }, { UINT8_C( 44), UINT8_C(195), UINT8_C(252), UINT8_C(106), UINT8_C(145), UINT8_C(165), UINT8_C( 20), UINT8_C(171), UINT8_C(205), UINT8_C(206), UINT8_C( 28), UINT8_C(160), UINT8_C( 75), UINT8_C(202), UINT8_C(243), UINT8_C(142) } }, { { UINT8_C(200), UINT8_C( 24), UINT8_C(253), UINT8_C(151), UINT8_C(122), UINT8_C(148), UINT8_C(174), UINT8_C( 64) }, { UINT8_C( 64), UINT8_C(194), UINT8_C(241), UINT8_C(121), UINT8_C( 41), UINT8_C(199), UINT8_C(164), UINT8_C( 85) }, { UINT8_C(200), UINT8_C( 24), UINT8_C(253), UINT8_C(151), UINT8_C(122), UINT8_C(148), UINT8_C(174), UINT8_C( 64), UINT8_C( 64), UINT8_C(194), UINT8_C(241), UINT8_C(121), UINT8_C( 41), UINT8_C(199), UINT8_C(164), UINT8_C( 85) } }, { { UINT8_C(138), UINT8_C(160), UINT8_C(192), UINT8_C( 27), UINT8_C( 70), UINT8_C(212), UINT8_C(199), UINT8_C( 19) }, { UINT8_C(162), UINT8_C(227), UINT8_C(179), UINT8_C(238), UINT8_C(173), UINT8_C(167), UINT8_C(124), UINT8_C(118) }, { UINT8_C(138), UINT8_C(160), UINT8_C(192), UINT8_C( 27), UINT8_C( 70), UINT8_C(212), UINT8_C(199), UINT8_C( 19), UINT8_C(162), UINT8_C(227), UINT8_C(179), UINT8_C(238), UINT8_C(173), UINT8_C(167), UINT8_C(124), UINT8_C(118) } }, { { UINT8_C(191), UINT8_C(121), UINT8_C( 13), UINT8_C( 57), UINT8_C( 14), UINT8_C(187), UINT8_C(122), UINT8_C( 78) }, { UINT8_C(125), UINT8_C(107), UINT8_C(199), UINT8_C(166), UINT8_C( 50), UINT8_C(107), UINT8_C(251), UINT8_C(188) }, { UINT8_C(191), UINT8_C(121), UINT8_C( 13), UINT8_C( 57), UINT8_C( 14), UINT8_C(187), UINT8_C(122), UINT8_C( 78), UINT8_C(125), UINT8_C(107), UINT8_C(199), UINT8_C(166), UINT8_C( 50), UINT8_C(107), UINT8_C(251), UINT8_C(188) } }, { { UINT8_C( 12), UINT8_C(187), UINT8_C(215), UINT8_C( 82), UINT8_C(144), UINT8_C(158), UINT8_C(101), UINT8_C( 50) }, { UINT8_C(129), UINT8_C( 24), UINT8_C( 32), UINT8_C( 47), UINT8_C(191), UINT8_C(157), UINT8_C(165), UINT8_C(126) }, { UINT8_C( 12), UINT8_C(187), UINT8_C(215), UINT8_C( 82), UINT8_C(144), UINT8_C(158), UINT8_C(101), UINT8_C( 50), UINT8_C(129), UINT8_C( 24), UINT8_C( 32), UINT8_C( 47), UINT8_C(191), UINT8_C(157), UINT8_C(165), UINT8_C(126) } }, { { UINT8_C( 22), UINT8_C(178), UINT8_C(184), UINT8_C( 36), UINT8_C(109), UINT8_C( 50), UINT8_C(114), UINT8_C(234) }, { UINT8_C(157), UINT8_C( 57), UINT8_C(144), UINT8_C(207), UINT8_C(165), UINT8_C(139), UINT8_C(139), UINT8_C(177) }, { UINT8_C( 22), UINT8_C(178), UINT8_C(184), UINT8_C( 36), UINT8_C(109), UINT8_C( 50), UINT8_C(114), UINT8_C(234), UINT8_C(157), UINT8_C( 57), UINT8_C(144), UINT8_C(207), UINT8_C(165), UINT8_C(139), UINT8_C(139), UINT8_C(177) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x16_t r = simde_vcombine_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); simde_uint8x16_t r = simde_vcombine_u8(a, b); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcombine_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[8]; } test_vec[] = { { { UINT16_C( 2850), UINT16_C(54644), UINT16_C(22390), UINT16_C(12151) }, { UINT16_C(36777), UINT16_C(47906), UINT16_C( 9646), UINT16_C(44426) }, { UINT16_C( 2850), UINT16_C(54644), UINT16_C(22390), UINT16_C(12151), UINT16_C(36777), UINT16_C(47906), UINT16_C( 9646), UINT16_C(44426) } }, { { UINT16_C(44944), UINT16_C(58267), UINT16_C(30973), UINT16_C(16694) }, { UINT16_C(54474), UINT16_C(20044), UINT16_C(55264), UINT16_C( 593) }, { UINT16_C(44944), UINT16_C(58267), UINT16_C(30973), UINT16_C(16694), UINT16_C(54474), UINT16_C(20044), UINT16_C(55264), UINT16_C( 593) } }, { { UINT16_C(50658), UINT16_C(22744), UINT16_C(20252), UINT16_C(50568) }, { UINT16_C(43743), UINT16_C(36225), UINT16_C( 3023), UINT16_C(24378) }, { UINT16_C(50658), UINT16_C(22744), UINT16_C(20252), UINT16_C(50568), UINT16_C(43743), UINT16_C(36225), UINT16_C( 3023), UINT16_C(24378) } }, { { UINT16_C(54714), UINT16_C(46914), UINT16_C(30797), UINT16_C( 6136) }, { UINT16_C(17741), UINT16_C(11622), UINT16_C(46876), UINT16_C(65328) }, { UINT16_C(54714), UINT16_C(46914), UINT16_C(30797), UINT16_C( 6136), UINT16_C(17741), UINT16_C(11622), UINT16_C(46876), UINT16_C(65328) } }, { { UINT16_C( 2172), UINT16_C(38999), UINT16_C(57175), UINT16_C(13918) }, { UINT16_C(57225), UINT16_C(22723), UINT16_C(65002), UINT16_C(42167) }, { UINT16_C( 2172), UINT16_C(38999), UINT16_C(57175), UINT16_C(13918), UINT16_C(57225), UINT16_C(22723), UINT16_C(65002), UINT16_C(42167) } }, { { UINT16_C(64210), UINT16_C( 8284), UINT16_C(21618), UINT16_C(48951) }, { UINT16_C(40345), UINT16_C(46829), UINT16_C( 7508), UINT16_C(53429) }, { UINT16_C(64210), UINT16_C( 8284), UINT16_C(21618), UINT16_C(48951), UINT16_C(40345), UINT16_C(46829), UINT16_C( 7508), UINT16_C(53429) } }, { { UINT16_C( 3109), UINT16_C(31849), UINT16_C(51180), UINT16_C(30131) }, { UINT16_C(30374), UINT16_C(37070), UINT16_C(34164), UINT16_C(17972) }, { UINT16_C( 3109), UINT16_C(31849), UINT16_C(51180), UINT16_C(30131), UINT16_C(30374), UINT16_C(37070), UINT16_C(34164), UINT16_C(17972) } }, { { UINT16_C(36991), UINT16_C(62054), UINT16_C(40677), UINT16_C(32433) }, { UINT16_C(40507), UINT16_C(36916), UINT16_C(59835), UINT16_C(57440) }, { UINT16_C(36991), UINT16_C(62054), UINT16_C(40677), UINT16_C(32433), UINT16_C(40507), UINT16_C(36916), UINT16_C(59835), UINT16_C(57440) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x8_t r = simde_vcombine_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); simde_uint16x8_t r = simde_vcombine_u16(a, b); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcombine_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(1573219985), UINT32_C(4091762574) }, { UINT32_C(2146127911), UINT32_C( 652211733) }, { UINT32_C(1573219985), UINT32_C(4091762574), UINT32_C(2146127911), UINT32_C( 652211733) } }, { { UINT32_C(1454558493), UINT32_C(1586285622) }, { UINT32_C(1784348730), UINT32_C(2848915480) }, { UINT32_C(1454558493), UINT32_C(1586285622), UINT32_C(1784348730), UINT32_C(2848915480) } }, { { UINT32_C(4228355182), UINT32_C( 317778667) }, { UINT32_C(1351801659), UINT32_C(4000805329) }, { UINT32_C(4228355182), UINT32_C( 317778667), UINT32_C(1351801659), UINT32_C(4000805329) } }, { { UINT32_C(1967401279), UINT32_C( 819187957) }, { UINT32_C(3919196113), UINT32_C(2643683631) }, { UINT32_C(1967401279), UINT32_C( 819187957), UINT32_C(3919196113), UINT32_C(2643683631) } }, { { UINT32_C(3902446333), UINT32_C(3220933252) }, { UINT32_C( 907054437), UINT32_C(1025869822) }, { UINT32_C(3902446333), UINT32_C(3220933252), UINT32_C( 907054437), UINT32_C(1025869822) } }, { { UINT32_C(2796775856), UINT32_C( 198608442) }, { UINT32_C(3841224885), UINT32_C(3598878681) }, { UINT32_C(2796775856), UINT32_C( 198608442), UINT32_C(3841224885), UINT32_C(3598878681) } }, { { UINT32_C(2797476897), UINT32_C( 191216038) }, { UINT32_C(1161917766), UINT32_C(2911004412) }, { UINT32_C(2797476897), UINT32_C( 191216038), UINT32_C(1161917766), UINT32_C(2911004412) } }, { { UINT32_C( 173225424), UINT32_C(1897212348) }, { UINT32_C(1918241177), UINT32_C(2991118481) }, { UINT32_C( 173225424), UINT32_C(1897212348), UINT32_C(1918241177), UINT32_C(2991118481) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x4_t r = simde_vcombine_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); simde_uint32x4_t r = simde_vcombine_u32(a, b); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcombine_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[1]; uint64_t b[1]; uint64_t r[2]; } test_vec[] = { { { UINT64_C(18153453259173778237) }, { UINT64_C(11400740053166720884) }, { UINT64_C(18153453259173778237), UINT64_C(11400740053166720884) } }, { { UINT64_C( 7651395941339862214) }, { UINT64_C( 3776799313776537214) }, { UINT64_C( 7651395941339862214), UINT64_C( 3776799313776537214) } }, { { UINT64_C(10898431973749169846) }, { UINT64_C( 2905810644555846153) }, { UINT64_C(10898431973749169846), UINT64_C( 2905810644555846153) } }, { { UINT64_C( 8616290951558053911) }, { UINT64_C(14001987062142871845) }, { UINT64_C( 8616290951558053911), UINT64_C(14001987062142871845) } }, { { UINT64_C( 8134984611907659051) }, { UINT64_C(14745199371509641495) }, { UINT64_C( 8134984611907659051), UINT64_C(14745199371509641495) } }, { { UINT64_C( 335668664436369464) }, { UINT64_C( 9619813438614162610) }, { UINT64_C( 335668664436369464), UINT64_C( 9619813438614162610) } }, { { UINT64_C(16405973300147422934) }, { UINT64_C( 9469942237931295467) }, { UINT64_C(16405973300147422934), UINT64_C( 9469942237931295467) } }, { { UINT64_C(11207591305728516262) }, { UINT64_C(17476457064037984936) }, { UINT64_C(11207591305728516262), UINT64_C(17476457064037984936) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_t b = simde_vld1_u64(test_vec[i].b); simde_uint64x2_t r = simde_vcombine_u64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x1_t a = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t b = simde_test_arm_neon_random_u64x1(); simde_uint64x2_t r = simde_vcombine_u64(a, b); simde_test_arm_neon_write_u64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vcombine_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcombine_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcombine_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vcombine_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vcombine_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vcombine_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vcombine_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vcombine_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vcombine_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vcombine_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/create.c000066400000000000000000000360611400333146700167340ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN create /* N.B. CM: The comparisons are done at a whole vector (64-bit) level to * avoid endian problems due to the reversal of the order of the vector * elements with NEON on big-endian systems. */ #include "test-neon.h" /* Check that both of these work */ #if defined(__cplusplus) #include "../../../simde/arm/neon/create.h" #else #include "../../../simde/arm/neon.h" #endif static int test_simde_vcreate_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a; uint64_t r[1]; } test_vec[] = { { UINT64_C(14132917921477899950), { UINT64_C(14132917921477899950) } }, { UINT64_C( 9768881841052706856), { UINT64_C( 9768881841052706856) } }, { UINT64_C(16325103149125810475), { UINT64_C(16325103149125810475) } }, { UINT64_C( 2241800239056659389), { UINT64_C( 2241800239056659389) } }, { UINT64_C(16892050861247466928), { UINT64_C(16892050861247466928) } }, { UINT64_C( 6292462352927236486), { UINT64_C( 6292462352927236486) } }, { UINT64_C(13564512221404632202), { UINT64_C(13564512221404632202) } }, { UINT64_C(13980988618246101366), { UINT64_C(13980988618246101366) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t r = simde_vcreate_s8(test_vec[i].a); simde_test_arm_neon_assert_equal_u64x1(simde_vreinterpret_u64_s8(r), simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint64_t a; simde_test_codegen_random_memory(sizeof(a), HEDLEY_REINTERPRET_CAST(uint8_t*, &a)); simde_int8x8_t r = simde_vcreate_s8(a); simde_test_codegen_write_u64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, simde_vreinterpret_u64_s8(r), SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcreate_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a; uint64_t r[1]; } test_vec[] = { { UINT64_C(10194708065387003937), { UINT64_C(10194708065387003937) } }, { UINT64_C( 4926405416270135894), { UINT64_C( 4926405416270135894) } }, { UINT64_C(14233135747404150946), { UINT64_C(14233135747404150946) } }, { UINT64_C(13467007090084057788), { UINT64_C(13467007090084057788) } }, { UINT64_C( 5670577254878680876), { UINT64_C( 5670577254878680876) } }, { UINT64_C(14291510562562377841), { UINT64_C(14291510562562377841) } }, { UINT64_C( 4769192735922800093), { UINT64_C( 4769192735922800093) } }, { UINT64_C(16679089722896895781), { UINT64_C(16679089722896895781) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t r = simde_vcreate_s16(test_vec[i].a); simde_test_arm_neon_assert_equal_u64x1(simde_vreinterpret_u64_s16(r), simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint64_t a; simde_test_codegen_random_memory(sizeof(a), HEDLEY_REINTERPRET_CAST(uint8_t*, &a)); simde_int16x4_t r = simde_vcreate_s16(a); simde_test_codegen_write_u64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, simde_vreinterpret_u64_s16(r), SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcreate_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a; uint64_t r[1]; } test_vec[] = { { UINT64_C( 2439430255293500298), { UINT64_C( 2439430255293500298) } }, { UINT64_C( 4265113451400979156), { UINT64_C( 4265113451400979156) } }, { UINT64_C( 4220290061209471777), { UINT64_C( 4220290061209471777) } }, { UINT64_C(15125839935251710912), { UINT64_C(15125839935251710912) } }, { UINT64_C( 8203352478617372114), { UINT64_C( 8203352478617372114) } }, { UINT64_C( 3629274651627509259), { UINT64_C( 3629274651627509259) } }, { UINT64_C( 1873250436141407570), { UINT64_C( 1873250436141407570) } }, { UINT64_C(12807094805541099426), { UINT64_C(12807094805541099426) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t r = simde_vcreate_s32(test_vec[i].a); simde_test_arm_neon_assert_equal_u64x1(simde_vreinterpret_u64_s32(r), simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint64_t a; simde_test_codegen_random_memory(sizeof(a), HEDLEY_REINTERPRET_CAST(uint8_t*, &a)); simde_int32x2_t r = simde_vcreate_s32(a); simde_test_codegen_write_u64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, simde_vreinterpret_u64_s32(r), SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcreate_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a; int64_t r[1]; } test_vec[] = { { UINT64_C( 5395799976582993883), { INT64_C( 5395799976582993883) } }, { UINT64_C( 4567289535980575384), { INT64_C( 4567289535980575384) } }, { UINT64_C(16941982940099198756), { -INT64_C( 1504761133610352860) } }, { UINT64_C(15415313526547754242), { -INT64_C( 3031430547161797374) } }, { UINT64_C(10110945050363508873), { -INT64_C( 8335799023346042743) } }, { UINT64_C( 3838853662046080100), { INT64_C( 3838853662046080100) } }, { UINT64_C(11762288014748589464), { -INT64_C( 6684456058960962152) } }, { UINT64_C( 2091664278160892727), { INT64_C( 2091664278160892727) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t r = simde_vcreate_s64(test_vec[i].a); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint64_t a; simde_test_codegen_random_memory(sizeof(a), HEDLEY_REINTERPRET_CAST(uint8_t*, &a)); simde_int64x1_t r = simde_vcreate_s64(a); simde_test_codegen_write_u64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcreate_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a; uint64_t r[1]; } test_vec[] = { { UINT64_C( 9524461125136957834), { UINT64_C( 9524461125136957834) } }, { UINT64_C( 5681877711578043306), { UINT64_C( 5681877711578043306) } }, { UINT64_C( 2605432708141052716), { UINT64_C( 2605432708141052716) } }, { UINT64_C( 3195240861471437814), { UINT64_C( 3195240861471437814) } }, { UINT64_C( 9113757135495505160), { UINT64_C( 9113757135495505160) } }, { UINT64_C(10096533119189207844), { UINT64_C(10096533119189207844) } }, { UINT64_C( 6840162342001118346), { UINT64_C( 6840162342001118346) } }, { UINT64_C(10188299615527616150), { UINT64_C(10188299615527616150) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t r = simde_vcreate_u8(test_vec[i].a); simde_test_arm_neon_assert_equal_u64x1(simde_vreinterpret_u64_u8(r), simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint64_t a; simde_test_codegen_random_memory(sizeof(a), HEDLEY_REINTERPRET_CAST(uint8_t*, &a)); simde_uint8x8_t r = simde_vcreate_u8(a); simde_test_codegen_write_u64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, simde_vreinterpret_u64_u8(r), SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcreate_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a; uint64_t r[1]; } test_vec[] = { { UINT64_C(10724469750478909377), { UINT64_C(10724469750478909377) } }, { UINT64_C( 2531916720399834289), { UINT64_C( 2531916720399834289) } }, { UINT64_C(13290330643019436200), { UINT64_C(13290330643019436200) } }, { UINT64_C( 5364678349457749890), { UINT64_C( 5364678349457749890) } }, { UINT64_C(15337281361360988890), { UINT64_C(15337281361360988890) } }, { UINT64_C(18072566328191100628), { UINT64_C(18072566328191100628) } }, { UINT64_C(13203823162098677815), { UINT64_C(13203823162098677815) } }, { UINT64_C(16911653288486692574), { UINT64_C(16911653288486692574) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t r = simde_vcreate_u16(test_vec[i].a); simde_test_arm_neon_assert_equal_u64x1(simde_vreinterpret_u64_u16(r), simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint64_t a; simde_test_codegen_random_memory(sizeof(a), HEDLEY_REINTERPRET_CAST(uint8_t*, &a)); simde_uint16x4_t r = simde_vcreate_u16(a); simde_test_codegen_write_u64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, simde_vreinterpret_u64_u16(r), SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcreate_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a; uint64_t r[1]; } test_vec[] = { { UINT64_C(17456657123552764214), { UINT64_C(17456657123552764214) } }, { UINT64_C( 7637277477721962941), { UINT64_C( 7637277477721962941) } }, { UINT64_C( 5996866312583239863), { UINT64_C( 5996866312583239863) } }, { UINT64_C(15243824492585726451), { UINT64_C(15243824492585726451) } }, { UINT64_C(17486971808534147986), { UINT64_C(17486971808534147986) } }, { UINT64_C( 3061556536935998637), { UINT64_C( 3061556536935998637) } }, { UINT64_C(11395754859327227054), { UINT64_C(11395754859327227054) } }, { UINT64_C(11600010461574602123), { UINT64_C(11600010461574602123) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t r = simde_vcreate_u32(test_vec[i].a); simde_test_arm_neon_assert_equal_u64x1(simde_vreinterpret_u64_u32(r), simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint64_t a; simde_test_codegen_random_memory(sizeof(a), HEDLEY_REINTERPRET_CAST(uint8_t*, &a)); simde_uint32x2_t r = simde_vcreate_u32(a); simde_test_codegen_write_u64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, simde_vreinterpret_u64_u32(r), SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcreate_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a; uint64_t r[1]; } test_vec[] = { { UINT64_C( 9581747805669338544), { UINT64_C( 9581747805669338544) } }, { UINT64_C( 2636195106661380572), { UINT64_C( 2636195106661380572) } }, { UINT64_C( 338359284805175101), { UINT64_C( 338359284805175101) } }, { UINT64_C(18243733878740660141), { UINT64_C(18243733878740660141) } }, { UINT64_C(10003026585137609329), { UINT64_C(10003026585137609329) } }, { UINT64_C(16155708883640647060), { UINT64_C(16155708883640647060) } }, { UINT64_C( 4404981749918038306), { UINT64_C( 4404981749918038306) } }, { UINT64_C( 7970543397987432786), { UINT64_C( 7970543397987432786) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t r = simde_vcreate_u64(test_vec[i].a); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint64_t a; simde_test_codegen_random_memory(sizeof(a), HEDLEY_REINTERPRET_CAST(uint8_t*, &a)); simde_uint64x1_t r = simde_vcreate_u64(a); simde_test_codegen_write_u64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcreate_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a; uint64_t r[1]; } test_vec[] = { { UINT64_C(14121171741695809290), { UINT64_C(14121171741695809290) } }, { UINT64_C(14036155041624243569), { UINT64_C(14036155041624243569) } }, { UINT64_C( 4933549844381694689), { UINT64_C( 4933549844381694689) } }, { UINT64_C( 4721956654897595679), { UINT64_C( 4721956654897595679) } }, { UINT64_C( 4794791124775457096), { UINT64_C( 4794791124775457096) } }, { UINT64_C(14142503323827870106), { UINT64_C(14142503323827870106) } }, { UINT64_C(14093049577892322181), { UINT64_C(14093049577892322181) } }, { UINT64_C( 4932300799177322332), { UINT64_C( 4932300799177322332) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t r = simde_vcreate_f32(test_vec[i].a); simde_test_arm_neon_assert_equal_u64x1(simde_vreinterpret_u64_f32(r), simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint64_t a; simde_float32x2_t a_ = simde_test_arm_neon_random_f32x2(-1000.0, 1000.0); simde_memcpy(&a, &a_, sizeof(a)); simde_float32x2_t r = simde_vcreate_f32(a); simde_test_codegen_write_u64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, simde_vreinterpret_u64_f32(r), SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcreate_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a; simde_float64_t r[1]; } test_vec[] = { { UINT64_C( 4647965680019345244), { SIMDE_FLOAT64_C( 540.52) } }, { UINT64_C( 4647039803267827630), { SIMDE_FLOAT64_C( 473.63) } }, { UINT64_C(13872857505826498150), { SIMDE_FLOAT64_C( -713.30) } }, { UINT64_C(13873545360300834816), { SIMDE_FLOAT64_C( -791.50) } }, { UINT64_C(13859231829969516626), { SIMDE_FLOAT64_C( -87.53) } }, { UINT64_C( 4646306736875356815), { SIMDE_FLOAT64_C( 431.96) } }, { UINT64_C(13868860209313485947), { SIMDE_FLOAT64_C( -385.43) } }, { UINT64_C( 4651482094126833336), { SIMDE_FLOAT64_C( 940.29) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t r = simde_vcreate_f64(test_vec[i].a); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint64_t a; simde_float64x1_t a_ = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_memcpy(&a, &a_, sizeof(a)); simde_float64x1_t r = simde_vcreate_f64(a); simde_test_codegen_write_u64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vcreate_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vcreate_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vcreate_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vcreate_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vcreate_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vcreate_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vcreate_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vcreate_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vcreate_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcreate_f64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/cvt.c000066400000000000000000001121371400333146700162640ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN cvt #include "test-neon.h" #include "../../../simde/arm/neon/cvt.h" static int test_simde_vcvts_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a; int32_t r; } test_vec[] = { { SIMDE_FLOAT32_C( 550.19), INT32_C( 550) }, { SIMDE_FLOAT32_C( -14.71), -INT32_C( 14) }, { SIMDE_FLOAT32_C( 735.91), INT32_C( 735) }, { SIMDE_FLOAT32_C( 355.60), INT32_C( 355) }, { SIMDE_FLOAT32_C( -850.41), -INT32_C( 850) }, { SIMDE_FLOAT32_C( -934.68), -INT32_C( 934) }, { SIMDE_FLOAT32_C( -125.28), -INT32_C( 125) }, { SIMDE_FLOAT32_C( 784.80), INT32_C( 784) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32 a = test_vec[i].a; int32_t r = simde_vcvts_s32_f32(a); simde_assert_equal_i32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32 a = simde_test_codegen_random_f32(-1000.0, 1000.0); int32_t r = simde_vcvts_s32_f32(a); simde_test_codegen_write_f32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcvtd_s64_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a; int64_t r; } test_vec[] = { { SIMDE_FLOAT64_C( -881.66), -INT64_C( 881) }, { SIMDE_FLOAT64_C( -469.33), -INT64_C( 469) }, { SIMDE_FLOAT64_C( 808.92), INT64_C( 808) }, { SIMDE_FLOAT64_C( 567.01), INT64_C( 567) }, { SIMDE_FLOAT64_C( -252.92), -INT64_C( 252) }, { SIMDE_FLOAT64_C( 379.60), INT64_C( 379) }, { SIMDE_FLOAT64_C( -471.75), -INT64_C( 471) }, { SIMDE_FLOAT64_C( 774.04), INT64_C( 774) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64 a = test_vec[i].a; int64_t r = simde_vcvtd_s64_f64(a); simde_assert_equal_i64(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64 a = simde_test_codegen_random_f64(-1000.0, 1000.0); int64_t r = simde_vcvtd_s64_f64(a); simde_test_codegen_write_f64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcvts_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a; uint32_t r; } test_vec[] = { { SIMDE_FLOAT32_C( 843.42), UINT32_C( 843) }, { SIMDE_FLOAT32_C( 336.45), UINT32_C( 336) }, { SIMDE_FLOAT32_C( -150.13), UINT32_C( 0) }, { SIMDE_FLOAT32_C( 222.99), UINT32_C( 222) }, { SIMDE_FLOAT32_C( -200.28), UINT32_C( 0) }, { SIMDE_FLOAT32_C( 509.93), UINT32_C( 509) }, { SIMDE_FLOAT32_C( 630.54), UINT32_C( 630) }, { SIMDE_FLOAT32_C( 781.84), UINT32_C( 781) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32 a = test_vec[i].a; uint32_t r = simde_vcvts_u32_f32(a); simde_assert_equal_u32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32 a = simde_test_codegen_random_f32(-1000.0, 1000.0); uint32_t r = simde_vcvts_u32_f32(a); simde_test_codegen_write_f32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcvtd_u64_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a; uint64_t r; } test_vec[] = { { SIMDE_FLOAT64_C( -497.11), UINT64_C( 0) }, { SIMDE_FLOAT64_C( -710.38), UINT64_C( 0) }, { SIMDE_FLOAT64_C( -292.39), UINT64_C( 0) }, { SIMDE_FLOAT64_C( -747.52), UINT64_C( 0) }, { SIMDE_FLOAT64_C( -697.16), UINT64_C( 0) }, { SIMDE_FLOAT64_C( 134.76), UINT64_C( 134) }, { SIMDE_FLOAT64_C( 291.75), UINT64_C( 291) }, { SIMDE_FLOAT64_C( -298.90), UINT64_C( 0) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64 a = test_vec[i].a; uint64_t r = simde_vcvtd_u64_f64(a); simde_assert_equal_u64(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64 a = simde_test_codegen_random_f64(-1000.0, 1000.0); uint64_t r = simde_vcvtd_u64_f64(a); simde_test_codegen_write_f64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcvt_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; int32_t r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( 396.15), SIMDE_FLOAT32_C( -246.90) }, { INT32_C( 396), -INT32_C( 246) } }, { { SIMDE_FLOAT32_C( 241.51), SIMDE_FLOAT32_C( 602.56) }, { INT32_C( 241), INT32_C( 602) } }, { { SIMDE_FLOAT32_C( -106.85), SIMDE_FLOAT32_C( -566.67) }, { -INT32_C( 106), -INT32_C( 566) } }, { { SIMDE_FLOAT32_C( 463.44), SIMDE_FLOAT32_C( 539.86) }, { INT32_C( 463), INT32_C( 539) } }, { { SIMDE_FLOAT32_C( -550.41), SIMDE_FLOAT32_C( 982.91) }, { -INT32_C( 550), INT32_C( 982) } }, { { SIMDE_FLOAT32_C( 499.92), SIMDE_FLOAT32_C( -727.55) }, { INT32_C( 499), -INT32_C( 727) } }, { { SIMDE_FLOAT32_C( -713.41), SIMDE_FLOAT32_C( 713.10) }, { -INT32_C( 713), INT32_C( 713) } }, { { SIMDE_FLOAT32_C( -998.69), SIMDE_FLOAT32_C( -409.99) }, { -INT32_C( 998), -INT32_C( 409) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_int32x2_t r = simde_vcvt_s32_f32(a); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0, 1000.0); simde_int32x2_t r = simde_vcvt_s32_f32(a); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcvt_s64_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[1]; int64_t r[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( -824.03) }, { -INT64_C( 824) } }, { { SIMDE_FLOAT64_C( -841.94) }, { -INT64_C( 841) } }, { { SIMDE_FLOAT64_C( -786.92) }, { -INT64_C( 786) } }, { { SIMDE_FLOAT64_C( 5.30) }, { INT64_C( 5) } }, { { SIMDE_FLOAT64_C( -36.80) }, { -INT64_C( 36) } }, { { SIMDE_FLOAT64_C( 375.47) }, { INT64_C( 375) } }, { { SIMDE_FLOAT64_C( -12.40) }, { -INT64_C( 12) } }, { { SIMDE_FLOAT64_C( 35.70) }, { INT64_C( 35) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_int64x1_t r = simde_vcvt_s64_f64(a); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x1_t a = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_int64x1_t r = simde_vcvt_s64_f64(a); simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcvt_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; uint32_t r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( 769.38), SIMDE_FLOAT32_C( -948.10) }, { UINT32_C( 769), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -50.28), SIMDE_FLOAT32_C( -280.11) }, { UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -758.47), SIMDE_FLOAT32_C( -0.89) }, { UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -929.81), SIMDE_FLOAT32_C( 722.65) }, { UINT32_C( 0), UINT32_C( 722) } }, { { SIMDE_FLOAT32_C( 920.72), SIMDE_FLOAT32_C( 126.70) }, { UINT32_C( 920), UINT32_C( 126) } }, { { SIMDE_FLOAT32_C( -812.69), SIMDE_FLOAT32_C( -253.03) }, { UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -104.04), SIMDE_FLOAT32_C( -971.51) }, { UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 953.10), SIMDE_FLOAT32_C( -600.37) }, { UINT32_C( 953), UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_uint32x2_t r = simde_vcvt_u32_f32(a); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0, 1000.0); simde_uint32x2_t r = simde_vcvt_u32_f32(a); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcvt_u64_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[1]; uint64_t r[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( 382.15) }, { UINT64_C( 382) } }, { { SIMDE_FLOAT64_C( 800.28) }, { UINT64_C( 800) } }, { { SIMDE_FLOAT64_C( 154.09) }, { UINT64_C( 154) } }, { { SIMDE_FLOAT64_C( -615.70) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( 874.69) }, { UINT64_C( 874) } }, { { SIMDE_FLOAT64_C( 628.68) }, { UINT64_C( 628) } }, { { SIMDE_FLOAT64_C( -866.55) }, { UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -27.95) }, { UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_uint64x1_t r = simde_vcvt_u64_f64(a); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x1_t a = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_uint64x1_t r = simde_vcvt_u64_f64(a); simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcvtq_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; int32_t r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 803.18), SIMDE_FLOAT32_C( -587.37), SIMDE_FLOAT32_C( 500.83), SIMDE_FLOAT32_C( -606.29) }, { INT32_C( 803), -INT32_C( 587), INT32_C( 500), -INT32_C( 606) } }, { { SIMDE_FLOAT32_C( 553.19), SIMDE_FLOAT32_C( -89.37), SIMDE_FLOAT32_C( -751.51), SIMDE_FLOAT32_C( 39.67) }, { INT32_C( 553), -INT32_C( 89), -INT32_C( 751), INT32_C( 39) } }, { { SIMDE_FLOAT32_C( 324.39), SIMDE_FLOAT32_C( 39.90), SIMDE_FLOAT32_C( 154.38), SIMDE_FLOAT32_C( -782.06) }, { INT32_C( 324), INT32_C( 39), INT32_C( 154), -INT32_C( 782) } }, { { SIMDE_FLOAT32_C( 683.78), SIMDE_FLOAT32_C( 860.43), SIMDE_FLOAT32_C( 258.08), SIMDE_FLOAT32_C( -431.46) }, { INT32_C( 683), INT32_C( 860), INT32_C( 258), -INT32_C( 431) } }, { { SIMDE_FLOAT32_C( 4.94), SIMDE_FLOAT32_C( -752.53), SIMDE_FLOAT32_C( 343.30), SIMDE_FLOAT32_C( -618.07) }, { INT32_C( 4), -INT32_C( 752), INT32_C( 343), -INT32_C( 618) } }, { { SIMDE_FLOAT32_C( -508.63), SIMDE_FLOAT32_C( 933.29), SIMDE_FLOAT32_C( 48.92), SIMDE_FLOAT32_C( 220.74) }, { -INT32_C( 508), INT32_C( 933), INT32_C( 48), INT32_C( 220) } }, { { SIMDE_FLOAT32_C( -447.64), SIMDE_FLOAT32_C( -181.80), SIMDE_FLOAT32_C( -962.01), SIMDE_FLOAT32_C( 914.94) }, { -INT32_C( 447), -INT32_C( 181), -INT32_C( 962), INT32_C( 914) } }, { { SIMDE_FLOAT32_C( -193.26), SIMDE_FLOAT32_C( 71.12), SIMDE_FLOAT32_C( 342.76), SIMDE_FLOAT32_C( -390.07) }, { -INT32_C( 193), INT32_C( 71), INT32_C( 342), -INT32_C( 390) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_int32x4_t r = simde_vcvtq_s32_f32(a); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0, 1000.0); simde_int32x4_t r = simde_vcvtq_s32_f32(a); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcvtq_s64_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[2]; int64_t r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 7.90), SIMDE_FLOAT64_C( -570.72) }, { INT64_C( 7), -INT64_C( 570) } }, { { SIMDE_FLOAT64_C( -808.38), SIMDE_FLOAT64_C( 530.98) }, { -INT64_C( 808), INT64_C( 530) } }, { { SIMDE_FLOAT64_C( 801.81), SIMDE_FLOAT64_C( -148.95) }, { INT64_C( 801), -INT64_C( 148) } }, { { SIMDE_FLOAT64_C( 837.17), SIMDE_FLOAT64_C( 387.04) }, { INT64_C( 837), INT64_C( 387) } }, { { SIMDE_FLOAT64_C( 553.61), SIMDE_FLOAT64_C( 6.86) }, { INT64_C( 553), INT64_C( 6) } }, { { SIMDE_FLOAT64_C( -828.64), SIMDE_FLOAT64_C( -734.97) }, { -INT64_C( 828), -INT64_C( 734) } }, { { SIMDE_FLOAT64_C( 315.20), SIMDE_FLOAT64_C( -666.72) }, { INT64_C( 315), -INT64_C( 666) } }, { { SIMDE_FLOAT64_C( -735.89), SIMDE_FLOAT64_C( 55.10) }, { -INT64_C( 735), INT64_C( 55) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_int64x2_t r = simde_vcvtq_s64_f64(a); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x2_t a = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); simde_int64x2_t r = simde_vcvtq_s64_f64(a); simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcvtq_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; uint32_t r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 101.83), SIMDE_FLOAT32_C( 359.10), SIMDE_FLOAT32_C( 718.19), SIMDE_FLOAT32_C( -635.43) }, { UINT32_C( 101), UINT32_C( 359), UINT32_C( 718), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 426.17), SIMDE_FLOAT32_C( -916.71), SIMDE_FLOAT32_C( 598.15), SIMDE_FLOAT32_C( -323.30) }, { UINT32_C( 426), UINT32_C( 0), UINT32_C( 598), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 835.20), SIMDE_FLOAT32_C( 302.52), SIMDE_FLOAT32_C( -672.10), SIMDE_FLOAT32_C( -672.13) }, { UINT32_C( 835), UINT32_C( 302), UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -675.16), SIMDE_FLOAT32_C( -201.16), SIMDE_FLOAT32_C( 670.23), SIMDE_FLOAT32_C( 715.02) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 670), UINT32_C( 715) } }, { { SIMDE_FLOAT32_C( 135.28), SIMDE_FLOAT32_C( 243.93), SIMDE_FLOAT32_C( -51.68), SIMDE_FLOAT32_C( -899.58) }, { UINT32_C( 135), UINT32_C( 243), UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( 681.80), SIMDE_FLOAT32_C( -157.71), SIMDE_FLOAT32_C( 547.10), SIMDE_FLOAT32_C( 626.30) }, { UINT32_C( 681), UINT32_C( 0), UINT32_C( 547), UINT32_C( 626) } }, { { SIMDE_FLOAT32_C( 588.25), SIMDE_FLOAT32_C( -67.67), SIMDE_FLOAT32_C( -423.03), SIMDE_FLOAT32_C( -927.65) }, { UINT32_C( 588), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { SIMDE_FLOAT32_C( -830.78), SIMDE_FLOAT32_C( 270.29), SIMDE_FLOAT32_C( 703.79), SIMDE_FLOAT32_C( 271.05) }, { UINT32_C( 0), UINT32_C( 270), UINT32_C( 703), UINT32_C( 271) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_uint32x4_t r = simde_vcvtq_u32_f32(a); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0, 1000.0); simde_uint32x4_t r = simde_vcvtq_u32_f32(a); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcvtq_u64_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[2]; uint64_t r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 770.83), SIMDE_FLOAT64_C( 372.43) }, { UINT64_C( 770), UINT64_C( 372) } }, { { SIMDE_FLOAT64_C( 28.55), SIMDE_FLOAT64_C( 399.88) }, { UINT64_C( 28), UINT64_C( 399) } }, { { SIMDE_FLOAT64_C( -588.00), SIMDE_FLOAT64_C( 915.76) }, { UINT64_C( 0), UINT64_C( 915) } }, { { SIMDE_FLOAT64_C( -114.69), SIMDE_FLOAT64_C( 92.85) }, { UINT64_C( 0), UINT64_C( 92) } }, { { SIMDE_FLOAT64_C( 423.51), SIMDE_FLOAT64_C( -79.29) }, { UINT64_C( 423), UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -159.38), SIMDE_FLOAT64_C( 303.14) }, { UINT64_C( 0), UINT64_C( 303) } }, { { SIMDE_FLOAT64_C( 981.64), SIMDE_FLOAT64_C( -989.30) }, { UINT64_C( 981), UINT64_C( 0) } }, { { SIMDE_FLOAT64_C( -536.33), SIMDE_FLOAT64_C( 469.05) }, { UINT64_C( 0), UINT64_C( 469) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_uint64x2_t r = simde_vcvtq_u64_f64(a); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x2_t a = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); simde_uint64x2_t r = simde_vcvtq_u64_f64(a); simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcvt_f32_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; simde_float32 r[2]; } test_vec[] = { { { -INT32_C( 1490815448), INT32_C( 440787632) }, { SIMDE_FLOAT32_C(-1490815488.00), SIMDE_FLOAT32_C(440787648.00) } }, { { INT32_C( 1033660046), INT32_C( 730210234) }, { SIMDE_FLOAT32_C(1033660032.00), SIMDE_FLOAT32_C(730210240.00) } }, { { -INT32_C( 389361289), INT32_C( 1176472002) }, { SIMDE_FLOAT32_C(-389361280.00), SIMDE_FLOAT32_C(1176472064.00) } }, { { -INT32_C( 1920118894), -INT32_C( 230240567) }, { SIMDE_FLOAT32_C(-1920118912.00), SIMDE_FLOAT32_C(-230240560.00) } }, { { INT32_C( 1956211140), -INT32_C( 644948405) }, { SIMDE_FLOAT32_C(1956211200.00), SIMDE_FLOAT32_C(-644948416.00) } }, { { -INT32_C( 32036028), -INT32_C( 1054237366) }, { SIMDE_FLOAT32_C(-32036028.00), SIMDE_FLOAT32_C(-1054237376.00) } }, { { INT32_C( 816444270), INT32_C( 242665596) }, { SIMDE_FLOAT32_C(816444288.00), SIMDE_FLOAT32_C(242665600.00) } }, { { -INT32_C( 509869032), -INT32_C( 1764498734) }, { SIMDE_FLOAT32_C(-509869024.00), SIMDE_FLOAT32_C(-1764498688.00) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_float32x2_t r = simde_vcvt_f32_s32(a); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_float32x2_t r = simde_vcvt_f32_s32(a); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcvt_f64_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[1]; simde_float64 r[1]; } test_vec[] = { { { -INT64_C( 5637188979182257959) }, { SIMDE_FLOAT64_C(-5637188979182258176.00) } }, { { -INT64_C( 1615765083744039100) }, { SIMDE_FLOAT64_C(-1615765083744039168.00) } }, { { -INT64_C( 3391338634861752101) }, { SIMDE_FLOAT64_C(-3391338634861752320.00) } }, { { -INT64_C( 5913301266420841191) }, { SIMDE_FLOAT64_C(-5913301266420841472.00) } }, { { -INT64_C( 494825632698398733) }, { SIMDE_FLOAT64_C(-494825632698398720.00) } }, { { -INT64_C( 7406702033022478571) }, { SIMDE_FLOAT64_C(-7406702033022478336.00) } }, { { -INT64_C( 2579062017698220053) }, { SIMDE_FLOAT64_C(-2579062017698220032.00) } }, { { INT64_C( 151943772838646681) }, { SIMDE_FLOAT64_C(151943772838646688.00) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_float64x1_t r = simde_vcvt_f64_s64(a); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_t a = simde_test_arm_neon_random_i64x1(); simde_float64x1_t r = simde_vcvt_f64_s64(a); simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcvt_f32_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; simde_float32 r[2]; } test_vec[] = { { { UINT32_C(2587932456), UINT32_C( 295281173) }, { SIMDE_FLOAT32_C(2587932416.00), SIMDE_FLOAT32_C(295281184.00) } }, { { UINT32_C(3545609472), UINT32_C(2675448399) }, { SIMDE_FLOAT32_C(3545609472.00), SIMDE_FLOAT32_C(2675448320.00) } }, { { UINT32_C(2886471787), UINT32_C(3487249558) }, { SIMDE_FLOAT32_C(2886471680.00), SIMDE_FLOAT32_C(3487249664.00) } }, { { UINT32_C(4010341369), UINT32_C(2972864136) }, { SIMDE_FLOAT32_C(4010341376.00), SIMDE_FLOAT32_C(2972864256.00) } }, { { UINT32_C( 441152005), UINT32_C( 338486292) }, { SIMDE_FLOAT32_C(441152000.00), SIMDE_FLOAT32_C(338486304.00) } }, { { UINT32_C(3924328857), UINT32_C( 126378139) }, { SIMDE_FLOAT32_C(3924328960.00), SIMDE_FLOAT32_C(126378136.00) } }, { { UINT32_C( 129209456), UINT32_C(3252063944) }, { SIMDE_FLOAT32_C(129209456.00), SIMDE_FLOAT32_C(3252064000.00) } }, { { UINT32_C( 179363457), UINT32_C( 851173932) }, { SIMDE_FLOAT32_C(179363456.00), SIMDE_FLOAT32_C(851173952.00) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_float32x2_t r = simde_vcvt_f32_u32(a); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); simde_float32x2_t r = simde_vcvt_f32_u32(a); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcvt_f64_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[1]; simde_float64 r[1]; } test_vec[] = { { { UINT64_C( 8225907245979072970) }, { SIMDE_FLOAT64_C(8225907245979072512.00) } }, { { UINT64_C(16476598577281444883) }, { SIMDE_FLOAT64_C(16476598577281445888.00) } }, { { UINT64_C( 4343902165234378246) }, { SIMDE_FLOAT64_C(4343902165234378240.00) } }, { { UINT64_C(15500192606257303370) }, { SIMDE_FLOAT64_C(15500192606257303552.00) } }, { { UINT64_C( 6717319276498190264) }, { SIMDE_FLOAT64_C(6717319276498190336.00) } }, { { UINT64_C( 8350517506981939683) }, { SIMDE_FLOAT64_C(8350517506981939200.00) } }, { { UINT64_C(12151509346023160421) }, { SIMDE_FLOAT64_C(12151509346023159808.00) } }, { { UINT64_C( 3677480995464854564) }, { SIMDE_FLOAT64_C(3677480995464854528.00) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_float64x1_t r = simde_vcvt_f64_u64(a); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x1_t a = simde_test_arm_neon_random_u64x1(); simde_float64x1_t r = simde_vcvt_f64_u64(a); simde_test_arm_neon_write_u64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcvtq_f32_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; simde_float32 r[4]; } test_vec[] = { { { -INT32_C( 932389316), -INT32_C( 1088008327), -INT32_C( 1969616841), -INT32_C( 1510957523) }, { SIMDE_FLOAT32_C(-932389312.00), SIMDE_FLOAT32_C(-1088008320.00), SIMDE_FLOAT32_C(-1969616896.00), SIMDE_FLOAT32_C(-1510957568.00) } }, { { -INT32_C( 55262820), INT32_C( 574170805), INT32_C( 1250738434), INT32_C( 1268545039) }, { SIMDE_FLOAT32_C(-55262820.00), SIMDE_FLOAT32_C(574170816.00), SIMDE_FLOAT32_C(1250738432.00), SIMDE_FLOAT32_C(1268545024.00) } }, { { -INT32_C( 854325164), -INT32_C( 1886569896), INT32_C( 1880762178), INT32_C( 1645546181) }, { SIMDE_FLOAT32_C(-854325184.00), SIMDE_FLOAT32_C(-1886569856.00), SIMDE_FLOAT32_C(1880762240.00), SIMDE_FLOAT32_C(1645546240.00) } }, { { -INT32_C( 2141271605), -INT32_C( 224225040), INT32_C( 1748774489), -INT32_C( 122431324) }, { SIMDE_FLOAT32_C(-2141271552.00), SIMDE_FLOAT32_C(-224225040.00), SIMDE_FLOAT32_C(1748774528.00), SIMDE_FLOAT32_C(-122431328.00) } }, { { INT32_C( 969328609), INT32_C( 1153979138), INT32_C( 1085596282), -INT32_C( 1214068244) }, { SIMDE_FLOAT32_C(969328640.00), SIMDE_FLOAT32_C(1153979136.00), SIMDE_FLOAT32_C(1085596288.00), SIMDE_FLOAT32_C(-1214068224.00) } }, { { -INT32_C( 2093481837), -INT32_C( 243934568), -INT32_C( 1386630647), INT32_C( 1806044554) }, { SIMDE_FLOAT32_C(-2093481856.00), SIMDE_FLOAT32_C(-243934560.00), SIMDE_FLOAT32_C(-1386630656.00), SIMDE_FLOAT32_C(1806044544.00) } }, { { -INT32_C( 693867308), INT32_C( 958098879), INT32_C( 1014615887), INT32_C( 754129817) }, { SIMDE_FLOAT32_C(-693867328.00), SIMDE_FLOAT32_C(958098880.00), SIMDE_FLOAT32_C(1014615872.00), SIMDE_FLOAT32_C(754129792.00) } }, { { -INT32_C( 1263588580), INT32_C( 262546438), INT32_C( 1639776214), -INT32_C( 506699252) }, { SIMDE_FLOAT32_C(-1263588608.00), SIMDE_FLOAT32_C(262546432.00), SIMDE_FLOAT32_C(1639776256.00), SIMDE_FLOAT32_C(-506699264.00) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_float32x4_t r = simde_vcvtq_f32_s32(a); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_float32x4_t r = simde_vcvtq_f32_s32(a); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcvtq_f64_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; simde_float64 r[2]; } test_vec[] = { { { -INT64_C( 5437369519729810865), INT64_C( 7901797256341896009) }, { SIMDE_FLOAT64_C(-5437369519729810432.00), SIMDE_FLOAT64_C(7901797256341896192.00) } }, { { -INT64_C( 7839088714326884250), INT64_C( 640569667726039958) }, { SIMDE_FLOAT64_C(-7839088714326884352.00), SIMDE_FLOAT64_C(640569667726039936.00) } }, { { -INT64_C( 5074851512610989404), INT64_C( 3884241220501831100) }, { SIMDE_FLOAT64_C(-5074851512610989056.00), SIMDE_FLOAT64_C(3884241220501831168.00) } }, { { INT64_C( 4054477313972153432), INT64_C( 6809766693901170880) }, { SIMDE_FLOAT64_C(4054477313972153344.00), SIMDE_FLOAT64_C(6809766693901170688.00) } }, { { -INT64_C( 5392550408562575359), INT64_C( 779428067380435404) }, { SIMDE_FLOAT64_C(-5392550408562575360.00), SIMDE_FLOAT64_C(779428067380435456.00) } }, { { INT64_C( 2926214301938811585), INT64_C( 436886571722072056) }, { SIMDE_FLOAT64_C(2926214301938811392.00), SIMDE_FLOAT64_C(436886571722072064.00) } }, { { -INT64_C( 1636882415660610150), INT64_C( 5581745048800032442) }, { SIMDE_FLOAT64_C(-1636882415660610048.00), SIMDE_FLOAT64_C(5581745048800032768.00) } }, { { INT64_C( 3091806024566387289), INT64_C( 4987996807332923692) }, { SIMDE_FLOAT64_C(3091806024566387200.00), SIMDE_FLOAT64_C(4987996807332923392.00) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_float64x2_t r = simde_vcvtq_f64_s64(a); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); simde_float64x2_t r = simde_vcvtq_f64_s64(a); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcvtq_f32_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; simde_float32 r[4]; } test_vec[] = { { { UINT32_C(2742190227), UINT32_C(1279879163), UINT32_C(1560757930), UINT32_C(2555284292) }, { SIMDE_FLOAT32_C(2742190336.00), SIMDE_FLOAT32_C(1279879168.00), SIMDE_FLOAT32_C(1560757888.00), SIMDE_FLOAT32_C(2555284224.00) } }, { { UINT32_C( 314328964), UINT32_C(1716492923), UINT32_C(2449736314), UINT32_C(4182169446) }, { SIMDE_FLOAT32_C(314328960.00), SIMDE_FLOAT32_C(1716492928.00), SIMDE_FLOAT32_C(2449736192.00), SIMDE_FLOAT32_C(4182169344.00) } }, { { UINT32_C(1386068055), UINT32_C(3382634015), UINT32_C(1881646636), UINT32_C(3037230385) }, { SIMDE_FLOAT32_C(1386068096.00), SIMDE_FLOAT32_C(3382633984.00), SIMDE_FLOAT32_C(1881646592.00), SIMDE_FLOAT32_C(3037230336.00) } }, { { UINT32_C( 952681917), UINT32_C(3667793759), UINT32_C(2137826073), UINT32_C(3514348154) }, { SIMDE_FLOAT32_C(952681920.00), SIMDE_FLOAT32_C(3667793664.00), SIMDE_FLOAT32_C(2137826048.00), SIMDE_FLOAT32_C(3514348032.00) } }, { { UINT32_C(2317620586), UINT32_C( 676578043), UINT32_C(2593684072), UINT32_C(2907677168) }, { SIMDE_FLOAT32_C(2317620480.00), SIMDE_FLOAT32_C(676578048.00), SIMDE_FLOAT32_C(2593683968.00), SIMDE_FLOAT32_C(2907677184.00) } }, { { UINT32_C(3320125286), UINT32_C(1201636142), UINT32_C(2697333798), UINT32_C( 695353278) }, { SIMDE_FLOAT32_C(3320125184.00), SIMDE_FLOAT32_C(1201636096.00), SIMDE_FLOAT32_C(2697333760.00), SIMDE_FLOAT32_C(695353280.00) } }, { { UINT32_C(1353946708), UINT32_C(3245868632), UINT32_C(1901793409), UINT32_C( 387885745) }, { SIMDE_FLOAT32_C(1353946752.00), SIMDE_FLOAT32_C(3245868544.00), SIMDE_FLOAT32_C(1901793408.00), SIMDE_FLOAT32_C(387885760.00) } }, { { UINT32_C(4041016258), UINT32_C(2906160262), UINT32_C(1196293768), UINT32_C(2456862525) }, { SIMDE_FLOAT32_C(4041016320.00), SIMDE_FLOAT32_C(2906160384.00), SIMDE_FLOAT32_C(1196293760.00), SIMDE_FLOAT32_C(2456862464.00) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_float32x4_t r = simde_vcvtq_f32_u32(a); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_float32x4_t r = simde_vcvtq_f32_u32(a); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vcvtq_f64_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[2]; simde_float64 r[2]; } test_vec[] = { { { UINT64_C(13683682593130234659), UINT64_C(11559544905560819710) }, { SIMDE_FLOAT64_C(13683682593130233856.00), SIMDE_FLOAT64_C(11559544905560819712.00) } }, { { UINT64_C( 8936905941469911567), UINT64_C( 3558395723382130100) }, { SIMDE_FLOAT64_C(8936905941469912064.00), SIMDE_FLOAT64_C(3558395723382130176.00) } }, { { UINT64_C( 5604675100166593081), UINT64_C(13208615328365603029) }, { SIMDE_FLOAT64_C(5604675100166593536.00), SIMDE_FLOAT64_C(13208615328365602816.00) } }, { { UINT64_C(12384439342403662536), UINT64_C( 9485622372482503779) }, { SIMDE_FLOAT64_C(12384439342403661824.00), SIMDE_FLOAT64_C(9485622372482504704.00) } }, { { UINT64_C( 3344579514495568845), UINT64_C(11717851499640679486) }, { SIMDE_FLOAT64_C(3344579514495568896.00), SIMDE_FLOAT64_C(11717851499640680448.00) } }, { { UINT64_C(13150759761589106141), UINT64_C( 5909789806474809654) }, { SIMDE_FLOAT64_C(13150759761589106688.00), SIMDE_FLOAT64_C(5909789806474809344.00) } }, { { UINT64_C( 2728575438831915349), UINT64_C(14033684711101675845) }, { SIMDE_FLOAT64_C(2728575438831915520.00), SIMDE_FLOAT64_C(14033684711101675520.00) } }, { { UINT64_C(15972040989054780831), UINT64_C(17275351436999118824) }, { SIMDE_FLOAT64_C(15972040989054781440.00), SIMDE_FLOAT64_C(17275351436999118848.00) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_float64x2_t r = simde_vcvtq_f64_u64(a); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); simde_float64x2_t r = simde_vcvtq_f64_u64(a); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vcvts_s32_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcvtd_s64_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcvts_u32_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcvtd_u64_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcvt_s32_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcvt_s64_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcvt_u32_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcvt_u64_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcvtq_s32_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcvtq_s64_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcvtq_u32_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcvtq_u64_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcvt_f32_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vcvt_f64_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vcvt_f32_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vcvt_f64_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vcvtq_f32_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vcvtq_f64_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vcvtq_f32_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vcvtq_f64_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/declare-suites.h000066400000000000000000000077721400333146700204160ustar00rootroot00000000000000SIMDE_TEST_DECLARE_SUITE(aba) SIMDE_TEST_DECLARE_SUITE(abd) SIMDE_TEST_DECLARE_SUITE(abdl) SIMDE_TEST_DECLARE_SUITE(abs) SIMDE_TEST_DECLARE_SUITE(add) SIMDE_TEST_DECLARE_SUITE(addl) SIMDE_TEST_DECLARE_SUITE(addl_high) SIMDE_TEST_DECLARE_SUITE(addv) SIMDE_TEST_DECLARE_SUITE(addlv) SIMDE_TEST_DECLARE_SUITE(addw) SIMDE_TEST_DECLARE_SUITE(addw_high) SIMDE_TEST_DECLARE_SUITE(and) SIMDE_TEST_DECLARE_SUITE(bic) SIMDE_TEST_DECLARE_SUITE(bsl) SIMDE_TEST_DECLARE_SUITE(cagt) SIMDE_TEST_DECLARE_SUITE(ceq) SIMDE_TEST_DECLARE_SUITE(ceqz) SIMDE_TEST_DECLARE_SUITE(cge) SIMDE_TEST_DECLARE_SUITE(cgez) SIMDE_TEST_DECLARE_SUITE(cgt) SIMDE_TEST_DECLARE_SUITE(cgtz) SIMDE_TEST_DECLARE_SUITE(cle) SIMDE_TEST_DECLARE_SUITE(clez) SIMDE_TEST_DECLARE_SUITE(cls) SIMDE_TEST_DECLARE_SUITE(clt) SIMDE_TEST_DECLARE_SUITE(cltz) SIMDE_TEST_DECLARE_SUITE(clz) SIMDE_TEST_DECLARE_SUITE(cnt) SIMDE_TEST_DECLARE_SUITE(cvt) SIMDE_TEST_DECLARE_SUITE(combine) SIMDE_TEST_DECLARE_SUITE(create) SIMDE_TEST_DECLARE_SUITE(dot) SIMDE_TEST_DECLARE_SUITE(dot_lane) SIMDE_TEST_DECLARE_SUITE(dup_n) SIMDE_TEST_DECLARE_SUITE(dup_lane) SIMDE_TEST_DECLARE_SUITE(eor) SIMDE_TEST_DECLARE_SUITE(ext) SIMDE_TEST_DECLARE_SUITE(get_high) SIMDE_TEST_DECLARE_SUITE(get_lane) SIMDE_TEST_DECLARE_SUITE(get_low) SIMDE_TEST_DECLARE_SUITE(hadd) SIMDE_TEST_DECLARE_SUITE(hsub) SIMDE_TEST_DECLARE_SUITE(max) SIMDE_TEST_DECLARE_SUITE(maxnm) SIMDE_TEST_DECLARE_SUITE(maxv) SIMDE_TEST_DECLARE_SUITE(min) SIMDE_TEST_DECLARE_SUITE(minnm) SIMDE_TEST_DECLARE_SUITE(minv) SIMDE_TEST_DECLARE_SUITE(mla) SIMDE_TEST_DECLARE_SUITE(mla_n) SIMDE_TEST_DECLARE_SUITE(mlal) SIMDE_TEST_DECLARE_SUITE(mlal_high) SIMDE_TEST_DECLARE_SUITE(mlal_n) SIMDE_TEST_DECLARE_SUITE(mls) SIMDE_TEST_DECLARE_SUITE(mlsl) SIMDE_TEST_DECLARE_SUITE(mlsl_high) SIMDE_TEST_DECLARE_SUITE(mlsl_n) SIMDE_TEST_DECLARE_SUITE(movl) SIMDE_TEST_DECLARE_SUITE(movl_high) SIMDE_TEST_DECLARE_SUITE(movn) SIMDE_TEST_DECLARE_SUITE(movn_high) SIMDE_TEST_DECLARE_SUITE(mul) SIMDE_TEST_DECLARE_SUITE(mul_lane) SIMDE_TEST_DECLARE_SUITE(mul_n) SIMDE_TEST_DECLARE_SUITE(mull) SIMDE_TEST_DECLARE_SUITE(mull_high) SIMDE_TEST_DECLARE_SUITE(mull_n) SIMDE_TEST_DECLARE_SUITE(mvn) SIMDE_TEST_DECLARE_SUITE(neg) SIMDE_TEST_DECLARE_SUITE(orn) SIMDE_TEST_DECLARE_SUITE(orr) SIMDE_TEST_DECLARE_SUITE(padal) SIMDE_TEST_DECLARE_SUITE(padd) SIMDE_TEST_DECLARE_SUITE(paddl) SIMDE_TEST_DECLARE_SUITE(pmax) SIMDE_TEST_DECLARE_SUITE(pmin) SIMDE_TEST_DECLARE_SUITE(qabs) SIMDE_TEST_DECLARE_SUITE(qadd) SIMDE_TEST_DECLARE_SUITE(qdmulh) SIMDE_TEST_DECLARE_SUITE(qdmull) SIMDE_TEST_DECLARE_SUITE(qrdmulh) SIMDE_TEST_DECLARE_SUITE(qrdmulh_n) SIMDE_TEST_DECLARE_SUITE(qmovn) SIMDE_TEST_DECLARE_SUITE(qmovn_high) SIMDE_TEST_DECLARE_SUITE(qmovun) SIMDE_TEST_DECLARE_SUITE(qneg) SIMDE_TEST_DECLARE_SUITE(qshl) SIMDE_TEST_DECLARE_SUITE(qsub) SIMDE_TEST_DECLARE_SUITE(qtbl) SIMDE_TEST_DECLARE_SUITE(qtbx) SIMDE_TEST_DECLARE_SUITE(rbit) SIMDE_TEST_DECLARE_SUITE(reinterpret) SIMDE_TEST_DECLARE_SUITE(rev16) SIMDE_TEST_DECLARE_SUITE(rev32) SIMDE_TEST_DECLARE_SUITE(rev64) SIMDE_TEST_DECLARE_SUITE(rhadd) SIMDE_TEST_DECLARE_SUITE(rnd) SIMDE_TEST_DECLARE_SUITE(rndi) SIMDE_TEST_DECLARE_SUITE(rndm) SIMDE_TEST_DECLARE_SUITE(rndn) SIMDE_TEST_DECLARE_SUITE(rndp) SIMDE_TEST_DECLARE_SUITE(rshl) SIMDE_TEST_DECLARE_SUITE(rshr_n) SIMDE_TEST_DECLARE_SUITE(rsra_n) SIMDE_TEST_DECLARE_SUITE(set_lane) SIMDE_TEST_DECLARE_SUITE(st1_lane) SIMDE_TEST_DECLARE_SUITE(st3) SIMDE_TEST_DECLARE_SUITE(st4) SIMDE_TEST_DECLARE_SUITE(shl) SIMDE_TEST_DECLARE_SUITE(shl_n) SIMDE_TEST_DECLARE_SUITE(shr_n) SIMDE_TEST_DECLARE_SUITE(sra_n) SIMDE_TEST_DECLARE_SUITE(sub) SIMDE_TEST_DECLARE_SUITE(subl) SIMDE_TEST_DECLARE_SUITE(subw) SIMDE_TEST_DECLARE_SUITE(subw_high) SIMDE_TEST_DECLARE_SUITE(tbl) SIMDE_TEST_DECLARE_SUITE(tbx) SIMDE_TEST_DECLARE_SUITE(trn) SIMDE_TEST_DECLARE_SUITE(trn1) SIMDE_TEST_DECLARE_SUITE(trn2) SIMDE_TEST_DECLARE_SUITE(tst) SIMDE_TEST_DECLARE_SUITE(uqadd) SIMDE_TEST_DECLARE_SUITE(uzp) SIMDE_TEST_DECLARE_SUITE(uzp1) SIMDE_TEST_DECLARE_SUITE(uzp2) SIMDE_TEST_DECLARE_SUITE(zip) SIMDE_TEST_DECLARE_SUITE(zip1) SIMDE_TEST_DECLARE_SUITE(zip2) simde-0.7.2/test/arm/neon/dot.c000066400000000000000000000532261400333146700162610ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN dot #include "test-neon.h" #include "../../../simde/arm/neon/dot.h" static int test_simde_vdot_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int8_t b[8]; int8_t c[8]; int32_t r[2]; } test_vec[] = { { { INT32_C( 1524632456), -INT32_C( 1298408979) }, { -INT8_C( 30), -INT8_C( 3), -INT8_C( 60), -INT8_C( 69), -INT8_C( 89), -INT8_C( 80), INT8_C( 120), INT8_C( 114) }, { INT8_C( 37), -INT8_C( 73), -INT8_C( 19), -INT8_C( 21), -INT8_C( 48), -INT8_C( 53), INT8_C( 61), INT8_C( 79) }, { INT32_C( 1524634154), -INT32_C( 1298384141) } }, { { -INT32_C( 1634669294), INT32_C( 1359806665) }, { INT8_C( 12), -INT8_C( 19), -INT8_C( 85), -INT8_C( 7), -INT8_C( 57), INT8_C( 70), -INT8_C( 84), -INT8_C( 87) }, { INT8_C( 68), INT8_C( 112), INT8_C( 100), -INT8_C( 21), INT8_C( 32), -INT8_C( 35), INT8_C( 94), INT8_C( 69) }, { -INT32_C( 1634678959), INT32_C( 1359788492) } }, { { INT32_C( 1680952212), INT32_C( 699624983) }, { INT8_C( 91), INT8_C( 68), -INT8_C( 57), INT8_C( 36), INT8_C( 68), -INT8_C( 44), INT8_C( 117), INT8_C( 80) }, { -INT8_C( 62), INT8_C( 32), INT8_C( 74), -INT8_C( 119), INT8_C( 103), -INT8_C( 10), INT8_C( 50), -INT8_C( 85) }, { INT32_C( 1680940244), INT32_C( 699631477) } }, { { -INT32_C( 2036951450), INT32_C( 147649651) }, { INT8_C( 64), -INT8_C( 3), INT8_C( 108), INT8_C( 87), INT8_C( 107), INT8_C( 32), INT8_MIN, -INT8_C( 58) }, { INT8_C( 100), INT8_C( 71), -INT8_C( 22), -INT8_C( 88), INT8_C( 28), INT8_C( 95), -INT8_C( 7), -INT8_C( 34) }, { -INT32_C( 2036955295), INT32_C( 147658555) } }, { { -INT32_C( 429440129), -INT32_C( 1617848007) }, { INT8_C( 47), INT8_C( 40), INT8_C( 37), -INT8_C( 93), INT8_C( 28), -INT8_C( 15), -INT8_C( 85), INT8_C( 92) }, { -INT8_C( 18), INT8_C( 23), -INT8_C( 77), INT8_C( 89), INT8_C( 55), INT8_C( 51), INT8_C( 31), -INT8_C( 101) }, { -INT32_C( 429451181), -INT32_C( 1617859159) } }, { { -INT32_C( 1757148805), -INT32_C( 394969752) }, { INT8_MIN, -INT8_C( 36), -INT8_C( 50), -INT8_C( 71), INT8_C( 117), INT8_C( 96), INT8_C( 88), -INT8_C( 92) }, { -INT8_C( 120), INT8_C( 125), INT8_C( 71), -INT8_C( 92), INT8_C( 111), -INT8_C( 14), INT8_C( 1), INT8_C( 93) }, { -INT32_C( 1757134963), -INT32_C( 394966577) } }, { { INT32_C( 1102558218), INT32_C( 1675482856) }, { -INT8_C( 32), INT8_C( 33), -INT8_C( 6), INT8_C( 72), INT8_C( 94), INT8_C( 111), INT8_C( 48), -INT8_C( 34) }, { INT8_C( 75), -INT8_C( 1), -INT8_C( 105), -INT8_C( 64), INT8_C( 95), -INT8_C( 17), INT8_C( 100), -INT8_C( 25) }, { INT32_C( 1102551807), INT32_C( 1675495549) } }, { { -INT32_C( 611603348), -INT32_C( 1472623458) }, { INT8_C( 65), -INT8_C( 16), -INT8_C( 22), INT8_C( 41), -INT8_C( 58), -INT8_C( 57), -INT8_C( 116), -INT8_C( 90) }, { -INT8_C( 24), -INT8_C( 122), -INT8_C( 17), INT8_C( 70), -INT8_C( 11), INT8_C( 31), INT8_C( 36), INT8_C( 64) }, { -INT32_C( 611599712), -INT32_C( 1472634523) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t c = simde_vld1_s8(test_vec[i].c); simde_int32x2_t r = simde_vdot_s32(a, b, c); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); simde_int8x8_t c = simde_test_arm_neon_random_i8x8(); simde_int32x2_t r = simde_vdot_s32(a, b, c); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdot_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint8_t b[8]; uint8_t c[8]; uint32_t r[2]; } test_vec[] = { { { UINT32_C( 655185567), UINT32_C(3897459723) }, { UINT8_C( 18), UINT8_C(121), UINT8_C( 22), UINT8_C(155), UINT8_C(163), UINT8_C( 40), UINT8_C(139), UINT8_C(130) }, { UINT8_C(158), UINT8_C(120), UINT8_C( 42), UINT8_C(195), UINT8_C( 75), UINT8_C( 33), UINT8_C(233), UINT8_C(110) }, { UINT32_C( 655234080), UINT32_C(3897519955) } }, { { UINT32_C( 62151472), UINT32_C(2215003365) }, { UINT8_C(155), UINT8_C( 19), UINT8_C(171), UINT8_C(166), UINT8_C(152), UINT8_C(250), UINT8_C(142), UINT8_C(170) }, { UINT8_C(115), UINT8_C(165), UINT8_C( 70), UINT8_C( 22), UINT8_C(205), UINT8_C(209), UINT8_C(152), UINT8_C(107) }, { UINT32_C( 62188054), UINT32_C(2215126549) } }, { { UINT32_C(2486092361), UINT32_C( 335747299) }, { UINT8_C(115), UINT8_C(183), UINT8_C( 23), UINT8_C( 88), UINT8_C(251), UINT8_C( 29), UINT8_C(221), UINT8_C(150) }, { UINT8_C( 49), UINT8_C(136), UINT8_C( 60), UINT8_C(201), UINT8_C(130), UINT8_C(203), UINT8_C(115), UINT8_C(246) }, { UINT32_C(2486141952), UINT32_C( 335848131) } }, { { UINT32_C(1024244080), UINT32_C(3567887755) }, { UINT8_C(103), UINT8_C(215), UINT8_C(105), UINT8_C( 75), UINT8_C(239), UINT8_C(108), UINT8_C( 95), UINT8_C( 99) }, { UINT8_C( 35), UINT8_C(118), UINT8_C(187), UINT8_C( 30), UINT8_C(148), UINT8_C(152), UINT8_C(181), UINT8_C(197) }, { UINT32_C(1024294940), UINT32_C(3567976241) } }, { { UINT32_C(2744054049), UINT32_C( 748224956) }, { UINT8_C(187), UINT8_C(166), UINT8_C(106), UINT8_C( 70), UINT8_C( 75), UINT8_C( 19), UINT8_C( 26), UINT8_C(178) }, { UINT8_C(234), UINT8_C(131), UINT8_C(253), UINT8_C(218), UINT8_C(239), UINT8_C( 92), UINT8_C( 61), UINT8_C( 18) }, { UINT32_C(2744161631), UINT32_C( 748249419) } }, { { UINT32_C(1731328211), UINT32_C(2989287057) }, { UINT8_C(215), UINT8_C(186), UINT8_C( 85), UINT8_C(148), UINT8_C(187), UINT8_C(239), UINT8_C(192), UINT8_C(118) }, { UINT8_C(149), UINT8_C( 42), UINT8_C(188), UINT8_C(224), UINT8_C( 61), UINT8_C(215), UINT8_C(146), UINT8_C( 40) }, { UINT32_C(1731417190), UINT32_C(2989382601) } }, { { UINT32_C(1241682010), UINT32_C(3210493932) }, { UINT8_C( 55), UINT8_C(141), UINT8_C( 38), UINT8_C(200), UINT8_C(115), UINT8_C( 82), UINT8_C(122), UINT8_C( 75) }, { UINT8_C( 12), UINT8_C(208), UINT8_C(223), UINT8_C(200), UINT8_C(191), UINT8_C(159), UINT8_C( 62), UINT8_C( 84) }, { UINT32_C(1241760472), UINT32_C(3210542799) } }, { { UINT32_C( 120912842), UINT32_C( 741328594) }, { UINT8_C( 86), UINT8_C( 49), UINT8_C(118), UINT8_C( 67), UINT8_C(112), UINT8_C(211), UINT8_C( 2), UINT8_C(168) }, { UINT8_C( 96), UINT8_C( 41), UINT8_C(112), UINT8_C(212), UINT8_C(123), UINT8_C(235), UINT8_C( 31), UINT8_C(136) }, { UINT32_C( 120950527), UINT32_C( 741414865) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t c = simde_vld1_u8(test_vec[i].c); simde_uint32x2_t r = simde_vdot_u32(a, b, c); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t c = simde_test_arm_neon_random_u8x8(); simde_uint32x2_t r = simde_vdot_u32(a, b, c); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdotq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int8_t b[16]; int8_t c[16]; int32_t r[4]; } test_vec[] = { { { -INT32_C( 1315274814), INT32_C( 1813013239), -INT32_C( 878005830), INT32_C( 996395424) }, { -INT8_C( 46), -INT8_C( 66), INT8_C( 35), -INT8_C( 112), -INT8_C( 26), -INT8_C( 23), -INT8_C( 99), -INT8_C( 11), -INT8_C( 89), INT8_C( 77), -INT8_C( 31), -INT8_C( 32), -INT8_C( 24), INT8_C( 93), -INT8_C( 104), -INT8_C( 86) }, { -INT8_C( 36), INT8_C( 50), INT8_C( 91), -INT8_C( 45), -INT8_C( 107), INT8_C( 107), INT8_C( 63), INT8_C( 79), INT8_C( 28), -INT8_C( 22), INT8_C( 26), -INT8_C( 68), -INT8_C( 77), INT8_C( 125), -INT8_C( 8), -INT8_C( 123) }, { -INT32_C( 1315268233), INT32_C( 1813006454), -INT32_C( 878008646), INT32_C( 996420307) } }, { { INT32_C( 555031355), -INT32_C( 1424575996), -INT32_C( 410257409), INT32_C( 831595604) }, { INT8_C( 86), -INT8_C( 19), INT8_C( 4), -INT8_C( 21), INT8_C( 88), INT8_C( 68), INT8_C( 58), INT8_C( 117), INT8_C( 46), INT8_C( 84), INT8_C( 49), -INT8_C( 31), -INT8_C( 47), INT8_C( 41), INT8_C( 102), INT8_C( 13) }, { INT8_C( 68), INT8_C( 123), INT8_C( 46), INT8_C( 72), INT8_C( 45), INT8_C( 69), -INT8_C( 13), INT8_C( 45), INT8_C( 60), INT8_MAX, INT8_C( 20), -INT8_C( 111), -INT8_C( 93), -INT8_C( 90), -INT8_C( 62), -INT8_C( 7) }, { INT32_C( 555033538), -INT32_C( 1424562833), -INT32_C( 410239560), INT32_C( 831589870) } }, { { -INT32_C( 337262957), INT32_C( 945823498), INT32_C( 1159303796), INT32_C( 5406651) }, { -INT8_C( 6), -INT8_C( 127), INT8_C( 72), INT8_C( 40), -INT8_C( 58), INT8_C( 60), INT8_C( 85), INT8_C( 2), -INT8_C( 69), INT8_C( 105), -INT8_C( 109), INT8_C( 94), INT8_C( 15), INT8_C( 85), INT8_C( 87), -INT8_C( 94) }, { INT8_C( 28), INT8_C( 60), -INT8_C( 114), INT8_C( 38), INT8_C( 92), -INT8_C( 18), INT8_C( 95), -INT8_C( 48), INT8_MIN, INT8_C( 120), INT8_C( 21), INT8_C( 60), -INT8_C( 8), INT8_C( 104), INT8_C( 60), -INT8_C( 14) }, { -INT32_C( 337277433), INT32_C( 945825061), INT32_C( 1159328579), INT32_C( 5421907) } }, { { -INT32_C( 1357216535), INT32_C( 2075226048), -INT32_C( 388413991), -INT32_C( 1232391782) }, { INT8_C( 109), INT8_C( 25), -INT8_C( 35), -INT8_C( 55), INT8_C( 7), INT8_C( 60), -INT8_C( 103), -INT8_C( 120), -INT8_C( 76), -INT8_C( 81), -INT8_C( 60), -INT8_C( 84), INT8_C( 23), INT8_C( 0), -INT8_C( 97), INT8_C( 0) }, { -INT8_C( 124), -INT8_C( 71), -INT8_C( 81), INT8_C( 69), INT8_C( 41), INT8_C( 96), -INT8_C( 64), INT8_C( 2), -INT8_C( 91), -INT8_C( 102), -INT8_C( 22), INT8_C( 64), -INT8_C( 53), INT8_C( 117), -INT8_C( 10), INT8_C( 56) }, { -INT32_C( 1357232786), INT32_C( 2075238447), -INT32_C( 388402869), -INT32_C( 1232392031) } }, { { -INT32_C( 1778199666), -INT32_C( 1004627185), INT32_C( 1634787914), INT32_C( 1717637090) }, { -INT8_C( 55), INT8_C( 16), -INT8_C( 85), -INT8_C( 14), INT8_C( 113), INT8_C( 108), -INT8_C( 12), INT8_C( 22), INT8_C( 6), -INT8_C( 34), INT8_C( 86), -INT8_C( 47), INT8_C( 84), INT8_C( 77), INT8_C( 9), -INT8_C( 30) }, { INT8_C( 32), INT8_C( 11), INT8_C( 120), INT8_C( 48), -INT8_C( 89), -INT8_C( 106), -INT8_C( 12), -INT8_C( 15), INT8_C( 120), INT8_C( 100), INT8_C( 83), INT8_C( 90), INT8_C( 116), -INT8_C( 76), -INT8_C( 63), INT8_C( 61) }, { -INT32_C( 1778212122), -INT32_C( 1004648876), INT32_C( 1634788142), INT32_C( 1717638585) } }, { { INT32_C( 909077701), -INT32_C( 565435432), INT32_C( 1437573889), INT32_C( 272153072) }, { -INT8_C( 60), -INT8_C( 80), INT8_C( 64), INT8_C( 107), INT8_C( 71), INT8_C( 52), INT8_C( 93), -INT8_C( 65), -INT8_C( 103), -INT8_C( 80), INT8_C( 26), INT8_C( 13), INT8_C( 100), -INT8_C( 37), INT8_C( 74), INT8_C( 41) }, { INT8_C( 71), INT8_C( 121), INT8_C( 95), INT8_C( 32), -INT8_C( 100), -INT8_C( 84), -INT8_C( 2), -INT8_C( 99), INT8_C( 79), -INT8_C( 82), -INT8_C( 13), INT8_C( 63), INT8_C( 103), INT8_C( 43), INT8_C( 79), INT8_C( 43) }, { INT32_C( 909073265), -INT32_C( 565440651), INT32_C( 1437572793), INT32_C( 272169390) } }, { { INT32_C( 580358363), INT32_C( 1575154884), INT32_C( 141229220), INT32_C( 506639575) }, { INT8_C( 45), -INT8_C( 111), INT8_C( 62), -INT8_C( 55), INT8_C( 61), INT8_C( 61), INT8_C( 103), -INT8_C( 116), -INT8_C( 21), INT8_C( 90), -INT8_C( 53), INT8_C( 82), -INT8_C( 123), INT8_C( 27), INT8_C( 125), INT8_C( 96) }, { -INT8_C( 85), INT8_C( 20), -INT8_C( 125), INT8_C( 111), INT8_C( 8), INT8_C( 101), -INT8_C( 51), -INT8_C( 84), INT8_C( 97), INT8_C( 55), -INT8_C( 75), INT8_C( 56), -INT8_C( 20), -INT8_C( 25), INT8_C( 86), INT8_C( 25) }, { INT32_C( 580338463), INT32_C( 1575166024), INT32_C( 141240700), INT32_C( 506654510) } }, { { -INT32_C( 1226599048), -INT32_C( 1119728942), INT32_C( 688852644), -INT32_C( 729183191) }, { -INT8_C( 95), INT8_C( 12), INT8_C( 67), -INT8_C( 87), INT8_C( 113), INT8_C( 16), INT8_C( 86), -INT8_C( 46), INT8_C( 72), INT8_C( 11), INT8_C( 10), INT8_C( 52), -INT8_C( 14), INT8_C( 97), INT8_C( 77), INT8_C( 106) }, { -INT8_C( 10), INT8_C( 48), INT8_C( 32), -INT8_C( 56), INT8_C( 122), INT8_C( 99), -INT8_C( 123), INT8_C( 30), INT8_C( 113), -INT8_C( 108), INT8_C( 71), -INT8_C( 102), INT8_C( 32), -INT8_C( 47), INT8_C( 110), -INT8_C( 63) }, { -INT32_C( 1226590506), -INT32_C( 1119725530), INT32_C( 688854998), -INT32_C( 729186406) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t c = simde_vld1q_s8(test_vec[i].c); simde_int32x4_t r = simde_vdotq_s32(a, b, c); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); simde_int8x16_t c = simde_test_arm_neon_random_i8x16(); simde_int32x4_t r = simde_vdotq_s32(a, b, c); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdotq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint8_t b[16]; uint8_t c[16]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(3976885184), UINT32_C(3561508868), UINT32_C(3970367447), UINT32_C(3484184309) }, { UINT8_C(194), UINT8_MAX, UINT8_C(199), UINT8_C(164), UINT8_C(174), UINT8_C( 27), UINT8_C(152), UINT8_C(196), UINT8_C(143), UINT8_C(127), UINT8_C(127), UINT8_C(229), UINT8_C( 80), UINT8_C( 0), UINT8_C(146), UINT8_C( 16) }, { UINT8_C(116), UINT8_C(157), UINT8_C(254), UINT8_C(120), UINT8_C(237), UINT8_C( 70), UINT8_C( 76), UINT8_C(196), UINT8_C( 69), UINT8_C(243), UINT8_C(176), UINT8_C( 58), UINT8_C( 97), UINT8_C( 92), UINT8_C( 10), UINT8_C( 36) }, { UINT32_C(3977017949), UINT32_C(3561601964), UINT32_C(3970443809), UINT32_C(3484194105) } }, { { UINT32_C( 180932956), UINT32_C(2093965549), UINT32_C( 811749087), UINT32_C(3259036750) }, { UINT8_C(145), UINT8_C( 62), UINT8_C( 58), UINT8_C(126), UINT8_C(133), UINT8_C(135), UINT8_C( 67), UINT8_C(202), UINT8_C(122), UINT8_C(243), UINT8_C( 5), UINT8_C(219), UINT8_C( 80), UINT8_C( 15), UINT8_MAX, UINT8_C(172) }, { UINT8_C(224), UINT8_C(200), UINT8_C(182), UINT8_C(205), UINT8_C( 40), UINT8_C(133), UINT8_C( 74), UINT8_C( 8), UINT8_C(211), UINT8_C(172), UINT8_C( 56), UINT8_C( 34), UINT8_C(160), UINT8_C(120), UINT8_C(228), UINT8_C( 50) }, { UINT32_C( 181014222), UINT32_C(2093995398), UINT32_C( 811824351), UINT32_C(3259118090) } }, { { UINT32_C(1018175415), UINT32_C( 537326502), UINT32_C( 939199463), UINT32_C(4226022170) }, { UINT8_C(195), UINT8_C(153), UINT8_C(200), UINT8_C(235), UINT8_C( 31), UINT8_C( 18), UINT8_C(243), UINT8_C(242), UINT8_C(190), UINT8_C( 43), UINT8_C( 20), UINT8_C( 95), UINT8_C(164), UINT8_C(249), UINT8_C(145), UINT8_C( 91) }, { UINT8_C( 24), UINT8_C( 65), UINT8_C(151), UINT8_C(190), UINT8_C( 53), UINT8_C(157), UINT8_C(222), UINT8_C( 28), UINT8_C(169), UINT8_C(217), UINT8_C( 83), UINT8_C(195), UINT8_C(212), UINT8_C( 54), UINT8_C(190), UINT8_C(151) }, { UINT32_C(1018264890), UINT32_C( 537391693), UINT32_C( 939261089), UINT32_C(4226111675) } }, { { UINT32_C(4001597391), UINT32_C(1491170969), UINT32_C(1186461090), UINT32_C( 111233262) }, { UINT8_C(137), UINT8_C( 56), UINT8_C(196), UINT8_C(190), UINT8_C(213), UINT8_C(162), UINT8_C(218), UINT8_C(126), UINT8_C(124), UINT8_C( 45), UINT8_C( 66), UINT8_C( 80), UINT8_C( 99), UINT8_C( 0), UINT8_C(232), UINT8_C( 51) }, { UINT8_C(135), UINT8_C(107), UINT8_C( 33), UINT8_C( 33), UINT8_C(225), UINT8_C( 2), UINT8_C(121), UINT8_C(131), UINT8_C(248), UINT8_C( 48), UINT8_C(201), UINT8_C(230), UINT8_C(120), UINT8_C(106), UINT8_C(237), UINT8_C( 1) }, { UINT32_C(4001634616), UINT32_C(1491262102), UINT32_C(1186525668), UINT32_C( 111300177) } }, { { UINT32_C(2025894306), UINT32_C(3505822292), UINT32_C( 723531976), UINT32_C(3227387961) }, { UINT8_C(115), UINT8_C(128), UINT8_C(225), UINT8_C( 85), UINT8_C(130), UINT8_C( 90), UINT8_C(216), UINT8_C(122), UINT8_C(138), UINT8_C(162), UINT8_C( 97), UINT8_C( 2), UINT8_C( 12), UINT8_C( 78), UINT8_C( 4), UINT8_C(175) }, { UINT8_MAX, UINT8_C(196), UINT8_C( 39), UINT8_C( 83), UINT8_C( 94), UINT8_C( 29), UINT8_C( 35), UINT8_C( 38), UINT8_C( 86), UINT8_C( 68), UINT8_C( 82), UINT8_C(143), UINT8_C( 76), UINT8_C(176), UINT8_C( 79), UINT8_C(192) }, { UINT32_C(2025964549), UINT32_C(3505849318), UINT32_C( 723563100), UINT32_C(3227436517) } }, { { UINT32_C(3004510512), UINT32_C( 372108683), UINT32_C(2618855055), UINT32_C(3695910108) }, { UINT8_C(224), UINT8_C(114), UINT8_C( 47), UINT8_C( 63), UINT8_C(143), UINT8_C( 83), UINT8_C(101), UINT8_C(229), UINT8_C(151), UINT8_C(183), UINT8_C(116), UINT8_C(227), UINT8_C(104), UINT8_C(196), UINT8_C(163), UINT8_C(152) }, { UINT8_C(245), UINT8_C(184), UINT8_C( 75), UINT8_C(128), UINT8_C(166), UINT8_C(121), UINT8_C(150), UINT8_C( 53), UINT8_C( 7), UINT8_C(175), UINT8_C(209), UINT8_C(228), UINT8_C(203), UINT8_C( 28), UINT8_C(192), UINT8_C(172) }, { UINT32_C(3004597957), UINT32_C( 372169751), UINT32_C(2618964137), UINT32_C(3695994148) } }, { { UINT32_C( 518778766), UINT32_C(3640873026), UINT32_C(1891465224), UINT32_C( 822632508) }, { UINT8_C( 25), UINT8_C( 84), UINT8_C(177), UINT8_C(191), UINT8_C(205), UINT8_C( 72), UINT8_C(244), UINT8_C(212), UINT8_C(247), UINT8_C(198), UINT8_C(184), UINT8_C(194), UINT8_C(226), UINT8_C(120), UINT8_C(110), UINT8_C(113) }, { UINT8_C(104), UINT8_C( 89), UINT8_C(143), UINT8_C(170), UINT8_C(170), UINT8_C(146), UINT8_C(132), UINT8_C(178), UINT8_C( 10), UINT8_C( 65), UINT8_C( 34), UINT8_C( 70), UINT8_C(161), UINT8_C( 42), UINT8_C(119), UINT8_C(186) }, { UINT32_C( 518846623), UINT32_C(3640988332), UINT32_C(1891500400), UINT32_C( 822708042) } }, { { UINT32_C(1266231678), UINT32_C(1746955889), UINT32_C( 371906612), UINT32_C(3112671569) }, { UINT8_C(242), UINT8_C( 22), UINT8_C( 99), UINT8_C(156), UINT8_C(169), UINT8_C(231), UINT8_C( 78), UINT8_C(179), UINT8_C( 40), UINT8_C(112), UINT8_C(250), UINT8_C(202), UINT8_C(155), UINT8_C(113), UINT8_C(132), UINT8_C( 25) }, { UINT8_C(154), UINT8_C(254), UINT8_C(101), UINT8_C( 11), UINT8_C(108), UINT8_C(133), UINT8_C(115), UINT8_C(160), UINT8_C( 93), UINT8_C(158), UINT8_C(182), UINT8_C(174), UINT8_C( 55), UINT8_C( 62), UINT8_C(103), UINT8_C( 41) }, { UINT32_C(1266286249), UINT32_C(1747042474), UINT32_C( 372008676), UINT32_C(3112701721) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t c = simde_vld1q_u8(test_vec[i].c); simde_uint32x4_t r = simde_vdotq_u32(a, b, c); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t c = simde_test_arm_neon_random_u8x16(); simde_uint32x4_t r = simde_vdotq_u32(a, b, c); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vdot_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vdot_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vdotq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vdotq_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/dot_lane.c000066400000000000000000000565731400333146700172700ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN dot_lane #include "test-neon.h" #include "../../../simde/arm/neon/dot_lane.h" static int test_simde_vdot_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t r_[2]; int8_t a[8]; int8_t b[8]; const int lane; int32_t r[2]; } test_vec[] = { { { -INT32_C( 253147047), -INT32_C( 1800313358) }, { INT8_C( 3), INT8_C( 41), -INT8_C( 81), -INT8_C( 37), -INT8_C( 11), -INT8_C( 115), -INT8_C( 125), -INT8_C( 7) }, { -INT8_C( 94), INT8_C( 14), -INT8_C( 74), -INT8_C( 94), -INT8_C( 39), -INT8_C( 125), -INT8_C( 38), INT8_C( 93) }, INT32_C( 0), { -INT32_C( 253137283), -INT32_C( 1800313358) } }, { { INT32_C( 732178250), INT32_C( 154436016) }, { -INT8_C( 55), INT8_C( 29), -INT8_C( 7), -INT8_C( 69), -INT8_C( 126), -INT8_C( 86), INT8_C( 80), -INT8_C( 122) }, { -INT8_C( 45), -INT8_C( 1), INT8_C( 97), -INT8_C( 56), -INT8_C( 116), -INT8_C( 28), -INT8_C( 63), INT8_C( 47) }, INT32_C( 1), { INT32_C( 732178250), INT32_C( 154442266) } }, { { -INT32_C( 858687501), INT32_C( 1160358906) }, { -INT8_C( 46), -INT8_C( 51), INT8_C( 112), -INT8_C( 126), INT8_C( 78), -INT8_C( 92), -INT8_C( 116), INT8_C( 24) }, { -INT8_C( 63), -INT8_C( 123), -INT8_C( 45), INT8_C( 68), INT8_C( 48), INT8_C( 35), -INT8_C( 54), INT8_C( 3) }, INT32_C( 0), { -INT32_C( 858691938), INT32_C( 1160358906) } }, { { -INT32_C( 1345574110), INT32_C( 64916752) }, { INT8_C( 5), -INT8_C( 81), -INT8_C( 49), -INT8_C( 1), INT8_C( 90), -INT8_C( 8), INT8_C( 68), INT8_C( 44) }, { -INT8_C( 59), -INT8_C( 76), -INT8_C( 82), INT8_C( 19), INT8_C( 88), INT8_C( 58), INT8_C( 43), INT8_C( 26) }, INT32_C( 1), { -INT32_C( 1345574110), INT32_C( 64928276) } }, { { -INT32_C( 262209600), INT32_C( 1173563426) }, { INT8_C( 83), -INT8_C( 65), -INT8_C( 12), INT8_C( 99), INT8_C( 77), -INT8_C( 46), INT8_C( 102), INT8_C( 82) }, { -INT8_C( 127), INT8_C( 53), INT8_C( 81), -INT8_C( 37), INT8_C( 45), -INT8_C( 106), INT8_C( 7), -INT8_C( 14) }, INT32_C( 0), { -INT32_C( 262228221), INT32_C( 1173563426) } }, { { -INT32_C( 1559841462), -INT32_C( 1329778192) }, { INT8_C( 48), INT8_C( 27), -INT8_C( 96), INT8_C( 83), INT8_C( 67), -INT8_C( 109), -INT8_C( 104), -INT8_C( 106) }, { INT8_C( 83), -INT8_C( 116), -INT8_C( 6), -INT8_C( 96), INT8_C( 94), INT8_C( 96), -INT8_C( 14), -INT8_C( 33) }, INT32_C( 1), { -INT32_C( 1559841462), -INT32_C( 1329777404) } }, { { -INT32_C( 1011203178), INT32_C( 615956953) }, { INT8_C( 118), -INT8_C( 68), -INT8_C( 57), INT8_C( 102), -INT8_C( 19), -INT8_C( 124), INT8_C( 22), INT8_C( 30) }, { -INT8_C( 97), -INT8_C( 74), INT8_C( 113), -INT8_C( 30), INT8_C( 74), INT8_C( 9), INT8_C( 120), -INT8_C( 99) }, INT32_C( 0), { -INT32_C( 1011219093), INT32_C( 615956953) } }, { { -INT32_C( 214076779), INT32_C( 1775382483) }, { INT8_C( 114), -INT8_C( 116), INT8_C( 44), INT8_C( 76), INT8_C( 77), -INT8_C( 30), INT8_C( 112), -INT8_C( 61) }, { -INT8_C( 98), INT8_C( 55), INT8_C( 42), -INT8_C( 116), -INT8_C( 69), INT8_C( 64), -INT8_C( 86), INT8_C( 90) }, INT32_C( 1), { -INT32_C( 214076779), INT32_C( 1775360128) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t r_ = simde_vld1_s32(test_vec[i].r_); simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int32x2_t r = simde_vdot_lane_s32(r_, a, b, test_vec[i].lane); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t r_ = simde_test_arm_neon_random_i32x2(); simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); simde_int32x2_t r = simde_vdot_lane_s32(r_, a, b, lanes[i]); simde_test_arm_neon_write_i32x2(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdot_lane_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t r_[2]; uint8_t a[8]; uint8_t b[8]; const int lane; uint32_t r[2]; } test_vec[] = { { { UINT32_C(2273751440), UINT32_C(3884166093) }, { UINT8_C(198), UINT8_C(167), UINT8_C( 73), UINT8_C(199), UINT8_C(230), UINT8_C(240), UINT8_C( 5), UINT8_C( 49) }, { UINT8_C(237), UINT8_C(126), UINT8_C( 64), UINT8_C( 41), UINT8_C(234), UINT8_C( 92), UINT8_C(119), UINT8_C(208) }, INT32_C( 0), { UINT32_C(2273832239), UINT32_C(3884166093) } }, { { UINT32_C(3927977934), UINT32_C(1271200699) }, { UINT8_C(172), UINT8_C( 74), UINT8_C(210), UINT8_C(122), UINT8_C(245), UINT8_C( 86), UINT8_C( 97), UINT8_C(187) }, { UINT8_C(253), UINT8_C(170), UINT8_C(130), UINT8_C(227), UINT8_C(155), UINT8_C(135), UINT8_C( 20), UINT8_C(136) }, INT32_C( 1), { UINT32_C(3927977934), UINT32_C(1271277656) } }, { { UINT32_C(4038153478), UINT32_C(2143300017) }, { UINT8_C( 88), UINT8_C(224), UINT8_C(105), UINT8_C( 19), UINT8_C(219), UINT8_C( 45), UINT8_C( 94), UINT8_C(136) }, { UINT8_C(119), UINT8_C( 48), UINT8_C( 2), UINT8_C(109), UINT8_C(134), UINT8_C( 99), UINT8_C( 40), UINT8_C(131) }, INT32_C( 0), { UINT32_C(4038176983), UINT32_C(2143300017) } }, { { UINT32_C(2842143502), UINT32_C( 942766898) }, { UINT8_C(208), UINT8_C(227), UINT8_C( 40), UINT8_C(130), UINT8_C( 12), UINT8_C(233), UINT8_C( 1), UINT8_C(100) }, { UINT8_C(201), UINT8_C(107), UINT8_C(119), UINT8_C(165), UINT8_C(152), UINT8_C(213), UINT8_C( 45), UINT8_C( 16) }, INT32_C( 1), { UINT32_C(2842143502), UINT32_C( 942819996) } }, { { UINT32_C(2357014277), UINT32_C(2685379986) }, { UINT8_C( 80), UINT8_C(118), UINT8_C( 73), UINT8_C(131), UINT8_C(242), UINT8_C(123), UINT8_C(187), UINT8_C(194) }, { UINT8_C( 94), UINT8_C(228), UINT8_C( 68), UINT8_C(106), UINT8_C(205), UINT8_C( 70), UINT8_C(206), UINT8_C(150) }, INT32_C( 0), { UINT32_C(2357067551), UINT32_C(2685379986) } }, { { UINT32_C(1228621233), UINT32_C( 525953050) }, { UINT8_C(151), UINT8_C(214), UINT8_C(171), UINT8_C( 42), UINT8_C(124), UINT8_C(187), UINT8_C(202), UINT8_C(204) }, { UINT8_C( 49), UINT8_C( 20), UINT8_C( 79), UINT8_C( 35), UINT8_C(143), UINT8_C( 11), UINT8_C(230), UINT8_C(237) }, INT32_C( 1), { UINT32_C(1228621233), UINT32_C( 526067647) } }, { { UINT32_C(3159829231), UINT32_C( 559031664) }, { UINT8_C(106), UINT8_C(142), UINT8_C(107), UINT8_C(132), UINT8_C(246), UINT8_C(196), UINT8_C(163), UINT8_C(142) }, { UINT8_C(155), UINT8_C( 79), UINT8_C(184), UINT8_C( 23), UINT8_C( 10), UINT8_C(130), UINT8_C(227), UINT8_C( 59) }, INT32_C( 0), { UINT32_C(3159879603), UINT32_C( 559031664) } }, { { UINT32_C( 626996118), UINT32_C( 756172094) }, { UINT8_C(111), UINT8_C(105), UINT8_C(233), UINT8_C(224), UINT8_C(142), UINT8_C( 59), UINT8_C( 1), UINT8_C(248) }, { UINT8_C(201), UINT8_C(108), UINT8_C(124), UINT8_C(192), UINT8_C( 49), UINT8_C( 32), UINT8_C( 78), UINT8_C(204) }, INT32_C( 1), { UINT32_C( 626996118), UINT32_C( 756231610) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t r_ = simde_vld1_u32(test_vec[i].r_); simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint32x2_t r = simde_vdot_lane_u32(r_, a, b, test_vec[i].lane); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t r_ = simde_test_arm_neon_random_u32x2(); simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); simde_uint32x2_t r = simde_vdot_lane_u32(r_, a, b, lanes[i]); simde_test_arm_neon_write_u32x2(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdot_laneq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t r_[4]; int8_t a[16]; int8_t b[16]; const int lane; int32_t r[4]; } test_vec[] = { { { INT32_C( 1560176901), -INT32_C( 161233256), -INT32_C( 535959529), -INT32_C( 1508750539) }, { -INT8_C( 123), INT8_C( 76), INT8_C( 7), INT8_C( 87), INT8_C( 63), -INT8_C( 119), -INT8_C( 16), -INT8_C( 85), -INT8_C( 127), -INT8_C( 126), -INT8_C( 28), INT8_C( 81), -INT8_C( 117), INT8_C( 86), INT8_C( 102), -INT8_C( 112) }, { -INT8_C( 64), INT8_C( 100), -INT8_C( 19), INT8_C( 88), INT8_C( 42), INT8_C( 80), INT8_C( 79), INT8_C( 65), INT8_C( 56), INT8_C( 92), INT8_C( 33), INT8_C( 109), -INT8_C( 89), INT8_C( 51), INT8_C( 20), INT8_C( 44) }, INT32_C( 0), { INT32_C( 1560199896), -INT32_C( 161233256), -INT32_C( 535959529), -INT32_C( 1508750539) } }, { { -INT32_C( 1098638465), INT32_C( 644445349), -INT32_C( 2106110218), INT32_C( 1678957988) }, { INT8_C( 65), -INT8_C( 1), -INT8_C( 67), INT8_C( 107), INT8_C( 79), INT8_C( 12), -INT8_C( 83), -INT8_C( 121), INT8_C( 104), -INT8_C( 50), -INT8_C( 11), INT8_C( 15), INT8_C( 2), INT8_C( 9), INT8_C( 60), -INT8_C( 127) }, { INT8_C( 36), -INT8_C( 64), INT8_C( 64), -INT8_C( 55), INT8_C( 52), -INT8_C( 87), -INT8_C( 17), INT8_C( 43), -INT8_C( 9), INT8_C( 103), -INT8_C( 83), -INT8_C( 100), INT8_C( 68), -INT8_C( 65), INT8_C( 0), -INT8_C( 122) }, INT32_C( 1), { -INT32_C( 1098638465), INT32_C( 644444621), -INT32_C( 2106110218), INT32_C( 1678957988) } }, { { INT32_C( 250723775), INT32_C( 831954633), INT32_C( 1866566509), -INT32_C( 1192198764) }, { INT8_C( 61), INT8_C( 48), -INT8_C( 126), INT8_C( 113), -INT8_C( 38), INT8_C( 113), -INT8_C( 100), -INT8_C( 47), -INT8_C( 40), INT8_C( 73), INT8_C( 109), INT8_C( 29), INT8_C( 9), INT8_C( 110), -INT8_C( 93), -INT8_C( 56) }, { INT8_C( 43), -INT8_C( 108), -INT8_C( 42), -INT8_C( 11), INT8_C( 51), INT8_C( 108), INT8_C( 38), -INT8_C( 96), -INT8_C( 9), INT8_C( 103), INT8_C( 15), -INT8_C( 117), -INT8_C( 28), -INT8_C( 1), INT8_C( 68), INT8_C( 33) }, INT32_C( 2), { INT32_C( 250723775), INT32_C( 831954633), INT32_C( 1866572630), -INT32_C( 1192198764) } }, { { INT32_C( 177456688), INT32_C( 282799927), -INT32_C( 2110961287), -INT32_C( 498413385) }, { INT8_C( 100), INT8_C( 32), -INT8_C( 41), -INT8_C( 105), -INT8_C( 115), -INT8_C( 2), INT8_C( 55), -INT8_C( 124), INT8_C( 101), INT8_C( 70), INT8_C( 16), INT8_C( 74), INT8_C( 70), INT8_C( 84), INT8_C( 107), INT8_C( 118) }, { INT8_C( 26), -INT8_C( 2), INT8_MIN, INT8_C( 81), INT8_C( 46), INT8_C( 91), INT8_C( 97), -INT8_C( 89), -INT8_C( 92), -INT8_C( 114), INT8_C( 41), INT8_C( 91), INT8_C( 94), INT8_C( 115), INT8_C( 62), -INT8_C( 61) }, INT32_C( 3), { INT32_C( 177456688), INT32_C( 282799927), -INT32_C( 2110961287), -INT32_C( 498397709) } }, { { INT32_C( 542774675), INT32_C( 2040893971), INT32_C( 516142552), INT32_C( 596913673) }, { INT8_C( 45), INT8_C( 20), INT8_C( 116), INT8_C( 91), INT8_C( 112), -INT8_C( 42), INT8_C( 2), INT8_C( 20), INT8_C( 100), INT8_C( 43), INT8_C( 112), -INT8_C( 61), -INT8_C( 98), -INT8_C( 82), -INT8_C( 122), INT8_C( 49) }, { -INT8_C( 61), -INT8_C( 32), INT8_C( 82), -INT8_C( 41), INT8_C( 114), -INT8_C( 9), INT8_C( 80), INT8_C( 75), -INT8_C( 84), INT8_C( 19), INT8_C( 105), -INT8_C( 75), INT8_C( 65), -INT8_C( 2), -INT8_C( 40), INT8_C( 110) }, INT32_C( 0), { INT32_C( 542777071), INT32_C( 2040893971), INT32_C( 516142552), INT32_C( 596913673) } }, { { -INT32_C( 2100737006), -INT32_C( 2020095198), -INT32_C( 1807087626), INT32_C( 2026295477) }, { -INT8_C( 80), INT8_C( 24), INT8_C( 79), INT8_C( 35), INT8_C( 15), -INT8_C( 97), INT8_C( 110), -INT8_C( 69), -INT8_C( 78), -INT8_C( 41), INT8_C( 112), -INT8_C( 12), -INT8_C( 43), INT8_C( 72), INT8_C( 98), -INT8_C( 24) }, { -INT8_C( 108), INT8_C( 44), INT8_C( 106), -INT8_C( 73), -INT8_C( 9), INT8_C( 1), INT8_C( 62), -INT8_C( 18), INT8_C( 8), -INT8_C( 120), -INT8_C( 126), -INT8_C( 67), INT8_C( 88), INT8_C( 72), INT8_C( 54), INT8_C( 8) }, INT32_C( 1), { -INT32_C( 2100737006), -INT32_C( 2020087368), -INT32_C( 1807087626), INT32_C( 2026295477) } }, { { INT32_C( 1865123168), -INT32_C( 685074139), INT32_C( 1187748465), INT32_C( 1999515362) }, { INT8_C( 90), -INT8_C( 103), INT8_C( 46), INT8_C( 81), -INT8_C( 102), INT8_C( 108), INT8_C( 63), -INT8_C( 93), -INT8_C( 12), -INT8_C( 62), INT8_C( 96), INT8_C( 76), INT8_C( 10), -INT8_C( 106), INT8_C( 84), INT8_C( 107) }, { INT8_C( 28), INT8_MIN, -INT8_C( 38), INT8_C( 65), INT8_C( 25), INT8_C( 5), INT8_C( 24), -INT8_C( 118), -INT8_C( 97), -INT8_C( 28), -INT8_C( 47), -INT8_C( 126), INT8_C( 18), -INT8_C( 1), -INT8_C( 7), INT8_C( 108) }, INT32_C( 2), { INT32_C( 1865123168), -INT32_C( 685074139), INT32_C( 1187737277), INT32_C( 1999515362) } }, { { INT32_C( 868034456), -INT32_C( 2015953517), -INT32_C( 908904769), -INT32_C( 382457907) }, { -INT8_C( 89), INT8_C( 15), INT8_C( 42), -INT8_C( 63), INT8_C( 20), INT8_C( 66), INT8_C( 75), -INT8_C( 77), INT8_C( 38), INT8_C( 28), INT8_C( 53), INT8_C( 56), INT8_C( 28), INT8_C( 46), -INT8_C( 92), -INT8_C( 76) }, { INT8_C( 85), INT8_C( 98), -INT8_C( 25), -INT8_C( 24), INT8_C( 95), -INT8_C( 67), INT8_C( 111), INT8_C( 30), -INT8_C( 12), INT8_C( 66), -INT8_C( 25), -INT8_C( 63), INT8_C( 106), INT8_C( 28), -INT8_C( 86), INT8_C( 17) }, INT32_C( 3), { INT32_C( 868034456), -INT32_C( 2015953517), -INT32_C( 908904769), -INT32_C( 382447031) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t r_ = simde_vld1q_s32(test_vec[i].r_); simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int32x4_t r = simde_vdot_laneq_s32(r_, a, b, test_vec[i].lane); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 2, 3, 0, 1, 2, 3 }; for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t r_ = simde_test_arm_neon_random_i32x4(); simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); simde_int32x4_t r = simde_vdot_laneq_s32(r_, a, b, lanes[i]); simde_test_arm_neon_write_i32x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdot_laneq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t r_[4]; uint8_t a[16]; uint8_t b[16]; const int lane; uint32_t r[4]; } test_vec[] = { { { UINT32_C(1243008842), UINT32_C(2042260212), UINT32_C(4021263272), UINT32_C( 707897681) }, { UINT8_C(214), UINT8_C( 0), UINT8_C(102), UINT8_C(248), UINT8_C(105), UINT8_C(136), UINT8_C(248), UINT8_C(231), UINT8_C( 35), UINT8_C(235), UINT8_C( 87), UINT8_C( 56), UINT8_C(130), UINT8_C(122), UINT8_C(173), UINT8_C(204) }, { UINT8_C( 73), UINT8_C(196), UINT8_C( 22), UINT8_C( 61), UINT8_C( 46), UINT8_C(209), UINT8_C(183), UINT8_C(215), UINT8_C(108), UINT8_C(102), UINT8_C(198), UINT8_C(189), UINT8_C( 16), UINT8_C(247), UINT8_C(231), UINT8_C(230) }, INT32_C( 0), { UINT32_C(1243041836), UINT32_C(2042260212), UINT32_C(4021263272), UINT32_C( 707897681) } }, { { UINT32_C(1625247223), UINT32_C(4165457877), UINT32_C(1144037058), UINT32_C(1628495384) }, { UINT8_C(162), UINT8_C( 39), UINT8_C(159), UINT8_C(208), UINT8_C(248), UINT8_C( 86), UINT8_C(167), UINT8_C(100), UINT8_C(188), UINT8_C(109), UINT8_C( 33), UINT8_C(204), UINT8_C(100), UINT8_C( 9), UINT8_C(179), UINT8_C( 91) }, { UINT8_C( 86), UINT8_C(146), UINT8_C(188), UINT8_C( 44), UINT8_C(105), UINT8_C( 3), UINT8_C( 36), UINT8_C( 43), UINT8_C(162), UINT8_C( 85), UINT8_C(111), UINT8_C(186), UINT8_C( 51), UINT8_C(127), UINT8_C( 28), UINT8_C(213) }, INT32_C( 1), { UINT32_C(1625247223), UINT32_C(4165494487), UINT32_C(1144037058), UINT32_C(1628495384) } }, { { UINT32_C(2661661606), UINT32_C(3439480081), UINT32_C( 530195642), UINT32_C(2205830445) }, { UINT8_C(223), UINT8_C( 54), UINT8_C(175), UINT8_C( 72), UINT8_C( 58), UINT8_C(212), UINT8_C(115), UINT8_C(220), UINT8_C( 41), UINT8_C(226), UINT8_C(150), UINT8_C( 92), UINT8_C( 97), UINT8_C(178), UINT8_C( 49), UINT8_C( 8) }, { UINT8_C(109), UINT8_C(214), UINT8_C(166), UINT8_C(126), UINT8_C( 35), UINT8_C(169), UINT8_C( 76), UINT8_C(222), UINT8_C(205), UINT8_C(230), UINT8_C(253), UINT8_C(250), UINT8_C( 51), UINT8_C(119), UINT8_C(125), UINT8_C( 18) }, INT32_C( 2), { UINT32_C(2661661606), UINT32_C(3439480081), UINT32_C( 530316977), UINT32_C(2205830445) } }, { { UINT32_C(3898224046), UINT32_C( 717540609), UINT32_C( 277240495), UINT32_C(2048440077) }, { UINT8_C(141), UINT8_C(191), UINT8_C(249), UINT8_C(177), UINT8_C(104), UINT8_C( 69), UINT8_C(143), UINT8_C( 53), UINT8_C( 43), UINT8_C(140), UINT8_C( 47), UINT8_C( 94), UINT8_C( 3), UINT8_C(172), UINT8_C(112), UINT8_C(177) }, { UINT8_C(217), UINT8_C(202), UINT8_C(153), UINT8_C(218), UINT8_C(151), UINT8_C( 93), UINT8_C( 4), UINT8_C( 70), UINT8_C(184), UINT8_C(138), UINT8_C( 86), UINT8_C(197), UINT8_C( 65), UINT8_C(111), UINT8_C( 63), UINT8_C(207) }, INT32_C( 3), { UINT32_C(3898224046), UINT32_C( 717540609), UINT32_C( 277240495), UINT32_C(2048503059) } }, { { UINT32_C(2524985390), UINT32_C(2831880061), UINT32_C(2651257499), UINT32_C(2152756902) }, { UINT8_C( 64), UINT8_C(233), UINT8_C( 90), UINT8_C(215), UINT8_C( 71), UINT8_C( 95), UINT8_C( 29), UINT8_MAX, UINT8_C(233), UINT8_C(116), UINT8_C(196), UINT8_C( 43), UINT8_C(227), UINT8_C( 3), UINT8_C(250), UINT8_C( 17) }, { UINT8_C( 60), UINT8_C(122), UINT8_C(167), UINT8_C(185), UINT8_C(137), UINT8_C(114), UINT8_C( 98), UINT8_C( 36), UINT8_C(108), UINT8_C(104), UINT8_C(194), UINT8_C( 18), UINT8_C(223), UINT8_C( 18), UINT8_C(146), UINT8_C( 31) }, INT32_C( 0), { UINT32_C(2525072461), UINT32_C(2831880061), UINT32_C(2651257499), UINT32_C(2152756902) } }, { { UINT32_C(1140321788), UINT32_C( 893523020), UINT32_C(1801455240), UINT32_C(1165777417) }, { UINT8_C(212), UINT8_C( 35), UINT8_MAX, UINT8_C( 93), UINT8_C(149), UINT8_C( 97), UINT8_C(129), UINT8_C( 1), UINT8_C(201), UINT8_C( 68), UINT8_C( 20), UINT8_C(168), UINT8_C( 86), UINT8_C(166), UINT8_C(200), UINT8_C( 82) }, { UINT8_C(147), UINT8_C(191), UINT8_C(149), UINT8_C(223), UINT8_C(211), UINT8_C(215), UINT8_C( 21), UINT8_C( 92), UINT8_C(221), UINT8_C(117), UINT8_C(199), UINT8_C(231), UINT8_C(208), UINT8_C( 68), UINT8_C( 44), UINT8_C(164) }, INT32_C( 1), { UINT32_C(1140321788), UINT32_C( 893578115), UINT32_C(1801455240), UINT32_C(1165777417) } }, { { UINT32_C(4244777831), UINT32_C(1459520396), UINT32_C( 519967431), UINT32_C(1282459321) }, { UINT8_C(133), UINT8_C( 6), UINT8_C( 44), UINT8_C( 89), UINT8_C(221), UINT8_C( 65), UINT8_C(181), UINT8_C(187), UINT8_C(182), UINT8_C(124), UINT8_C(162), UINT8_C(134), UINT8_C(192), UINT8_C(206), UINT8_C( 43), UINT8_C( 40) }, { UINT8_C(250), UINT8_C( 45), UINT8_C( 37), UINT8_C(134), UINT8_C(176), UINT8_C( 35), UINT8_C(220), UINT8_C(120), UINT8_C( 54), UINT8_C(219), UINT8_C(150), UINT8_C(239), UINT8_C(161), UINT8_C( 6), UINT8_C( 59), UINT8_C( 39) }, INT32_C( 2), { UINT32_C(4244777831), UINT32_C(1459520396), UINT32_C( 520060741), UINT32_C(1282459321) } }, { { UINT32_C(3934283532), UINT32_C(1604662696), UINT32_C(1927628721), UINT32_C( 261754901) }, { UINT8_C( 61), UINT8_C(191), UINT8_C(150), UINT8_C(238), UINT8_C(226), UINT8_C(114), UINT8_C(102), UINT8_C( 24), UINT8_C( 77), UINT8_C(252), UINT8_C( 7), UINT8_C(239), UINT8_C( 2), UINT8_C( 67), UINT8_C( 22), UINT8_C( 15) }, { UINT8_C(170), UINT8_C(150), UINT8_C(249), UINT8_C( 83), UINT8_C(203), UINT8_C(158), UINT8_C(178), UINT8_C(124), UINT8_C(229), UINT8_C(151), UINT8_C(238), UINT8_C(250), UINT8_C(168), UINT8_C(136), UINT8_C( 10), UINT8_C(229) }, INT32_C( 3), { UINT32_C(3934283532), UINT32_C(1604662696), UINT32_C(1927628721), UINT32_C( 261768004) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t r_ = simde_vld1q_u32(test_vec[i].r_); simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint32x4_t r = simde_vdot_laneq_u32(r_, a, b, test_vec[i].lane); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 2, 3, 0, 1, 2, 3 }; for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t r_ = simde_test_arm_neon_random_u32x4(); simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); simde_uint32x4_t r = simde_vdot_laneq_u32(r_, a, b, lanes[i]); simde_test_arm_neon_write_u32x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vdot_lane_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vdot_lane_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vdot_laneq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vdot_laneq_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/dup_lane.c000066400000000000000000002551271400333146700172660ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN dup_lane #include "test-neon.h" #include "../../../simde/arm/neon/dup_n.h" #include "../../../simde/arm/neon/dup_lane.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ static int test_simde_vdup_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { simde_float32 vec[2]; int lane; simde_float32 r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( -787.13), SIMDE_FLOAT32_C( -15.07) }, INT8_C( 1), { SIMDE_FLOAT32_C( -15.07), SIMDE_FLOAT32_C( -15.07) } }, { { SIMDE_FLOAT32_C( 46.30), SIMDE_FLOAT32_C( 346.17) }, INT8_C( 1), { SIMDE_FLOAT32_C( 346.17), SIMDE_FLOAT32_C( 346.17) } }, { { SIMDE_FLOAT32_C( -139.62), SIMDE_FLOAT32_C( 486.64) }, INT8_C( 0), { SIMDE_FLOAT32_C( -139.62), SIMDE_FLOAT32_C( -139.62) } }, { { SIMDE_FLOAT32_C( -65.92), SIMDE_FLOAT32_C( 539.38) }, INT8_C( 0), { SIMDE_FLOAT32_C( -65.92), SIMDE_FLOAT32_C( -65.92) } }, { { SIMDE_FLOAT32_C( -303.01), SIMDE_FLOAT32_C( 704.85) }, INT8_C( 0), { SIMDE_FLOAT32_C( -303.01), SIMDE_FLOAT32_C( -303.01) } }, { { SIMDE_FLOAT32_C( 135.71), SIMDE_FLOAT32_C( 169.99) }, INT8_C( 1), { SIMDE_FLOAT32_C( 169.99), SIMDE_FLOAT32_C( 169.99) } }, { { SIMDE_FLOAT32_C( 262.13), SIMDE_FLOAT32_C( 264.53) }, INT8_C( 1), { SIMDE_FLOAT32_C( 264.53), SIMDE_FLOAT32_C( 264.53) } }, { { SIMDE_FLOAT32_C( 988.58), SIMDE_FLOAT32_C( -848.75) }, INT8_C( 0), { SIMDE_FLOAT32_C( 988.58), SIMDE_FLOAT32_C( 988.58) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t vec; simde_float32x2_t r; vec = simde_vld1_f32(test_vec[i].vec); SIMDE_CONSTIFY_2_(simde_vdup_lane_f32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(0.0f)), test_vec[i].lane, vec); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t vec = simde_test_arm_neon_random_f32x2(-1000.0, 1000.0); int lane = simde_test_codegen_random_i8() & 1; simde_float32x2_t r; SIMDE_CONSTIFY_2_(simde_vdup_lane_f32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(0.0f)), lane, vec); simde_test_arm_neon_write_f32x2(2, vec, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdup_lane_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { simde_float64 vec[1]; simde_float64 r[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( 101.54) }, { SIMDE_FLOAT64_C( 101.54) } }, { { SIMDE_FLOAT64_C( 798.61) }, { SIMDE_FLOAT64_C( 798.61) } }, { { SIMDE_FLOAT64_C( 273.92) }, { SIMDE_FLOAT64_C( 273.92) } }, { { SIMDE_FLOAT64_C( -17.48) }, { SIMDE_FLOAT64_C( -17.48) } }, { { SIMDE_FLOAT64_C( 458.09) }, { SIMDE_FLOAT64_C( 458.09) } }, { { SIMDE_FLOAT64_C( 541.19) }, { SIMDE_FLOAT64_C( 541.19) } }, { { SIMDE_FLOAT64_C( -316.84) }, { SIMDE_FLOAT64_C( -316.84) } }, { { SIMDE_FLOAT64_C( 934.37) }, { SIMDE_FLOAT64_C( 934.37) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t vec; simde_float64x1_t r; vec = simde_vld1_f64(test_vec[i].vec); r = simde_vdup_lane_f64(vec, 0); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x1_t vec = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_float64x1_t r = simde_vdup_lane_f64(vec, 0); simde_test_arm_neon_write_f64x1(2, vec, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdup_lane_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { int8_t vec[8]; int lane; int8_t r[8]; } test_vec[] = { { { -INT8_C( 109), INT8_C( 86), -INT8_C( 70), -INT8_C( 70), INT8_C( 94), -INT8_C( 56), -INT8_C( 20), INT8_C( 18) }, INT8_C( 3), { -INT8_C( 70), -INT8_C( 70), -INT8_C( 70), -INT8_C( 70), -INT8_C( 70), -INT8_C( 70), -INT8_C( 70), -INT8_C( 70) } }, { { INT8_C( 28), -INT8_C( 32), INT8_C( 48), -INT8_C( 97), -INT8_C( 103), INT8_C( 50), INT8_C( 39), INT8_C( 80) }, INT8_C( 6), { INT8_C( 39), INT8_C( 39), INT8_C( 39), INT8_C( 39), INT8_C( 39), INT8_C( 39), INT8_C( 39), INT8_C( 39) } }, { { INT8_C( 125), INT8_C( 42), INT8_MIN, -INT8_C( 38), -INT8_C( 52), -INT8_C( 85), INT8_C( 12), -INT8_C( 25) }, INT8_C( 1), { INT8_C( 42), INT8_C( 42), INT8_C( 42), INT8_C( 42), INT8_C( 42), INT8_C( 42), INT8_C( 42), INT8_C( 42) } }, { { -INT8_C( 51), -INT8_C( 113), -INT8_C( 42), -INT8_C( 121), INT8_C( 34), INT8_C( 44), INT8_C( 65), -INT8_C( 36) }, INT8_C( 3), { -INT8_C( 121), -INT8_C( 121), -INT8_C( 121), -INT8_C( 121), -INT8_C( 121), -INT8_C( 121), -INT8_C( 121), -INT8_C( 121) } }, { { INT8_C( 9), -INT8_C( 55), -INT8_C( 99), -INT8_C( 59), -INT8_C( 27), INT8_C( 125), -INT8_C( 11), -INT8_C( 124) }, INT8_C( 6), { -INT8_C( 11), -INT8_C( 11), -INT8_C( 11), -INT8_C( 11), -INT8_C( 11), -INT8_C( 11), -INT8_C( 11), -INT8_C( 11) } }, { { INT8_C( 40), -INT8_C( 84), INT8_C( 102), INT8_C( 22), INT8_C( 41), -INT8_C( 111), -INT8_C( 105), INT8_C( 3) }, INT8_C( 5), { -INT8_C( 111), -INT8_C( 111), -INT8_C( 111), -INT8_C( 111), -INT8_C( 111), -INT8_C( 111), -INT8_C( 111), -INT8_C( 111) } }, { { INT8_C( 66), INT8_C( 16), INT8_C( 68), INT8_C( 91), -INT8_C( 35), -INT8_C( 44), INT8_C( 49), INT8_C( 100) }, INT8_C( 6), { INT8_C( 49), INT8_C( 49), INT8_C( 49), INT8_C( 49), INT8_C( 49), INT8_C( 49), INT8_C( 49), INT8_C( 49) } }, { { INT8_C( 94), -INT8_C( 91), -INT8_C( 45), -INT8_C( 23), -INT8_C( 81), -INT8_C( 100), -INT8_C( 122), INT8_C( 116) }, INT8_C( 1), { -INT8_C( 91), -INT8_C( 91), -INT8_C( 91), -INT8_C( 91), -INT8_C( 91), -INT8_C( 91), -INT8_C( 91), -INT8_C( 91) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t vec; simde_int8x8_t r; vec = simde_vld1_s8(test_vec[i].vec); SIMDE_CONSTIFY_8_(simde_vdup_lane_s8, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_s8(INT8_C(0))), test_vec[i].lane, vec); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t vec = simde_test_arm_neon_random_i8x8(); int lane = simde_test_codegen_random_i8() & 7; simde_int8x8_t r; SIMDE_CONSTIFY_8_(simde_vdup_lane_s8, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_s8(INT8_C(0))), lane, vec); simde_test_arm_neon_write_i8x8(2, vec, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdup_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { int16_t vec[4]; int lane; int16_t r[4]; } test_vec[] = { { { -INT16_C( 18698), -INT16_C( 28829), -INT16_C( 10148), INT16_C( 27234) }, INT8_C( 2), { -INT16_C( 10148), -INT16_C( 10148), -INT16_C( 10148), -INT16_C( 10148) } }, { { INT16_C( 10218), -INT16_C( 11581), INT16_C( 15139), INT16_C( 3770) }, INT8_C( 0), { INT16_C( 10218), INT16_C( 10218), INT16_C( 10218), INT16_C( 10218) } }, { { -INT16_C( 17031), -INT16_C( 15001), -INT16_C( 20844), -INT16_C( 10697) }, INT8_C( 0), { -INT16_C( 17031), -INT16_C( 17031), -INT16_C( 17031), -INT16_C( 17031) } }, { { INT16_C( 15543), INT16_C( 6027), INT16_C( 16691), -INT16_C( 15750) }, INT8_C( 1), { INT16_C( 6027), INT16_C( 6027), INT16_C( 6027), INT16_C( 6027) } }, { { INT16_C( 9555), INT16_C( 2311), INT16_C( 11791), -INT16_C( 7731) }, INT8_C( 1), { INT16_C( 2311), INT16_C( 2311), INT16_C( 2311), INT16_C( 2311) } }, { { -INT16_C( 25848), -INT16_C( 7073), INT16_C( 7444), -INT16_C( 9909) }, INT8_C( 1), { -INT16_C( 7073), -INT16_C( 7073), -INT16_C( 7073), -INT16_C( 7073) } }, { { INT16_C( 4345), INT16_C( 10632), -INT16_C( 15161), -INT16_C( 8524) }, INT8_C( 3), { -INT16_C( 8524), -INT16_C( 8524), -INT16_C( 8524), -INT16_C( 8524) } }, { { INT16_C( 23029), -INT16_C( 27974), -INT16_C( 8276), -INT16_C( 19047) }, INT8_C( 2), { -INT16_C( 8276), -INT16_C( 8276), -INT16_C( 8276), -INT16_C( 8276) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t vec; simde_int16x4_t r; vec = simde_vld1_s16(test_vec[i].vec); SIMDE_CONSTIFY_4_(simde_vdup_lane_s16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_s16(INT16_C(0))), test_vec[i].lane, vec); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t vec = simde_test_arm_neon_random_i16x4(); int lane = simde_test_codegen_random_i8() & 3; simde_int16x4_t r; SIMDE_CONSTIFY_4_(simde_vdup_lane_s16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_s16(INT8_C(0))), lane, vec); simde_test_arm_neon_write_i16x4(2, vec, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdup_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { int32_t vec[2]; int lane; int32_t r[2]; } test_vec[] = { { { -INT32_C( 1646161109), -INT32_C( 1747230745) }, INT8_C( 0), { -INT32_C( 1646161109), -INT32_C( 1646161109) } }, { { -INT32_C( 1434884716), INT32_C( 398389744) }, INT8_C( 1), { INT32_C( 398389744), INT32_C( 398389744) } }, { { -INT32_C( 1849175508), INT32_C( 1020151922) }, INT8_C( 0), { -INT32_C( 1849175508), -INT32_C( 1849175508) } }, { { -INT32_C( 1012620287), INT32_C( 1822701775) }, INT8_C( 1), { INT32_C( 1822701775), INT32_C( 1822701775) } }, { { INT32_C( 1219905284), -INT32_C( 2050876197) }, INT8_C( 0), { INT32_C( 1219905284), INT32_C( 1219905284) } }, { { -INT32_C( 870104141), INT32_C( 26539632) }, INT8_C( 0), { -INT32_C( 870104141), -INT32_C( 870104141) } }, { { -INT32_C( 1801137956), -INT32_C( 1808185135) }, INT8_C( 1), { -INT32_C( 1808185135), -INT32_C( 1808185135) } }, { { -INT32_C( 1870382735), -INT32_C( 2058958019) }, INT8_C( 1), { -INT32_C( 2058958019), -INT32_C( 2058958019) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t vec; simde_int32x2_t r; vec = simde_vld1_s32(test_vec[i].vec); SIMDE_CONSTIFY_2_(simde_vdup_lane_s32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_s32(INT32_C(0))), test_vec[i].lane, vec); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t vec = simde_test_arm_neon_random_i32x2(); int lane = simde_test_codegen_random_i8() & 1; simde_int32x2_t r; SIMDE_CONSTIFY_2_(simde_vdup_lane_s32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_s32(INT8_C(0))), lane, vec); simde_test_arm_neon_write_i32x2(2, vec, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdup_lane_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { int64_t vec[1]; int64_t r[1]; } test_vec[] = { { { -INT64_C( 4066063288374638750) }, { -INT64_C( 4066063288374638750) } }, { { -INT64_C( 7732336477994252064) }, { -INT64_C( 7732336477994252064) } }, { { INT64_C( 5798089950005920716) }, { INT64_C( 5798089950005920716) } }, { { -INT64_C( 5770497274961656200) }, { -INT64_C( 5770497274961656200) } }, { { INT64_C( 7430289055526295386) }, { INT64_C( 7430289055526295386) } }, { { INT64_C( 9060392184859686968) }, { INT64_C( 9060392184859686968) } }, { { INT64_C( 497077724683344253) }, { INT64_C( 497077724683344253) } }, { { -INT64_C( 2234852464160771073) }, { -INT64_C( 2234852464160771073) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t vec; simde_int64x1_t r; vec = simde_vld1_s64(test_vec[i].vec); r = simde_vdup_lane_s64(vec, 0); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_t vec = simde_test_arm_neon_random_i64x1(); simde_int64x1_t r = simde_vdup_lane_s64(vec, 0); simde_test_arm_neon_write_i64x1(2, vec, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdup_lane_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { uint8_t vec[8]; int lane; uint8_t r[8]; } test_vec[] = { { { UINT8_C(225), UINT8_C( 70), UINT8_C(222), UINT8_C( 40), UINT8_C( 85), UINT8_C( 93), UINT8_C( 35), UINT8_C(172) }, UINT8_C( 6), { UINT8_C( 35), UINT8_C( 35), UINT8_C( 35), UINT8_C( 35), UINT8_C( 35), UINT8_C( 35), UINT8_C( 35), UINT8_C( 35) } }, { { UINT8_C( 14), UINT8_C(104), UINT8_C(219), UINT8_C(190), UINT8_C( 18), UINT8_C( 59), UINT8_C( 6), UINT8_C(151) }, UINT8_C( 5), { UINT8_C( 59), UINT8_C( 59), UINT8_C( 59), UINT8_C( 59), UINT8_C( 59), UINT8_C( 59), UINT8_C( 59), UINT8_C( 59) } }, { { UINT8_C( 86), UINT8_C(237), UINT8_C( 93), UINT8_C(114), UINT8_C(246), UINT8_C(234), UINT8_C(197), UINT8_C( 48) }, UINT8_C( 6), { UINT8_C(197), UINT8_C(197), UINT8_C(197), UINT8_C(197), UINT8_C(197), UINT8_C(197), UINT8_C(197), UINT8_C(197) } }, { { UINT8_C(140), UINT8_C(105), UINT8_C(114), UINT8_C( 55), UINT8_C( 74), UINT8_C(184), UINT8_C( 21), UINT8_C(115) }, UINT8_C( 5), { UINT8_C(184), UINT8_C(184), UINT8_C(184), UINT8_C(184), UINT8_C(184), UINT8_C(184), UINT8_C(184), UINT8_C(184) } }, { { UINT8_C(114), UINT8_C(150), UINT8_C(186), UINT8_C(200), UINT8_C(164), UINT8_C( 34), UINT8_C(164), UINT8_C( 98) }, UINT8_C( 4), { UINT8_C(164), UINT8_C(164), UINT8_C(164), UINT8_C(164), UINT8_C(164), UINT8_C(164), UINT8_C(164), UINT8_C(164) } }, { { UINT8_C(223), UINT8_C(104), UINT8_C(203), UINT8_C( 12), UINT8_C(190), UINT8_C(184), UINT8_C(105), UINT8_C( 48) }, UINT8_C( 7), { UINT8_C( 48), UINT8_C( 48), UINT8_C( 48), UINT8_C( 48), UINT8_C( 48), UINT8_C( 48), UINT8_C( 48), UINT8_C( 48) } }, { { UINT8_C( 84), UINT8_C(245), UINT8_C(223), UINT8_C(146), UINT8_C(130), UINT8_C( 73), UINT8_C( 4), UINT8_C(185) }, UINT8_C( 3), { UINT8_C(146), UINT8_C(146), UINT8_C(146), UINT8_C(146), UINT8_C(146), UINT8_C(146), UINT8_C(146), UINT8_C(146) } }, { { UINT8_C(188), UINT8_C(206), UINT8_C( 6), UINT8_C(202), UINT8_C( 64), UINT8_C(157), UINT8_C(132), UINT8_C( 8) }, UINT8_C( 1), { UINT8_C(206), UINT8_C(206), UINT8_C(206), UINT8_C(206), UINT8_C(206), UINT8_C(206), UINT8_C(206), UINT8_C(206) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t vec; simde_uint8x8_t r; vec = simde_vld1_u8(test_vec[i].vec); SIMDE_CONSTIFY_8_(simde_vdup_lane_u8, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_u8(UINT8_C(0))), test_vec[i].lane, vec); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t vec = simde_test_arm_neon_random_u8x8(); int lane = simde_test_codegen_random_u8() & 7; simde_uint8x8_t r; SIMDE_CONSTIFY_8_(simde_vdup_lane_u8, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_u8(UINT8_C(0))), lane, vec); simde_test_arm_neon_write_u8x8(2, vec, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u8(2, HEDLEY_STATIC_CAST(uint8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdup_lane_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { uint16_t vec[4]; int lane; uint16_t r[4]; } test_vec[] = { { { UINT16_C(39155), UINT16_C(17108), UINT16_C(35862), UINT16_C(33952) }, UINT8_C( 0), { UINT16_C(39155), UINT16_C(39155), UINT16_C(39155), UINT16_C(39155) } }, { { UINT16_C(25643), UINT16_C(65303), UINT16_C(24516), UINT16_C(25191) }, UINT8_C( 2), { UINT16_C(24516), UINT16_C(24516), UINT16_C(24516), UINT16_C(24516) } }, { { UINT16_C(41498), UINT16_C(10432), UINT16_C(25102), UINT16_C( 2461) }, UINT8_C( 2), { UINT16_C(25102), UINT16_C(25102), UINT16_C(25102), UINT16_C(25102) } }, { { UINT16_C(35666), UINT16_C(21432), UINT16_C(20862), UINT16_C(49191) }, UINT8_C( 3), { UINT16_C(49191), UINT16_C(49191), UINT16_C(49191), UINT16_C(49191) } }, { { UINT16_C(25011), UINT16_C(28908), UINT16_C(20620), UINT16_C(35719) }, UINT8_C( 1), { UINT16_C(28908), UINT16_C(28908), UINT16_C(28908), UINT16_C(28908) } }, { { UINT16_C(62439), UINT16_C(54647), UINT16_C( 6413), UINT16_C(13717) }, UINT8_C( 0), { UINT16_C(62439), UINT16_C(62439), UINT16_C(62439), UINT16_C(62439) } }, { { UINT16_C(54007), UINT16_C(11569), UINT16_C(48165), UINT16_C(30950) }, UINT8_C( 3), { UINT16_C(30950), UINT16_C(30950), UINT16_C(30950), UINT16_C(30950) } }, { { UINT16_C(41015), UINT16_C(40699), UINT16_C(23635), UINT16_C(50058) }, UINT8_C( 1), { UINT16_C(40699), UINT16_C(40699), UINT16_C(40699), UINT16_C(40699) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t vec; simde_uint16x4_t r; vec = simde_vld1_u16(test_vec[i].vec); SIMDE_CONSTIFY_4_(simde_vdup_lane_u16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_u16(UINT16_C(0))), test_vec[i].lane, vec); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t vec = simde_test_arm_neon_random_u16x4(); int lane = simde_test_codegen_random_u8() & 3; simde_uint16x4_t r; SIMDE_CONSTIFY_4_(simde_vdup_lane_u16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_u16(UINT8_C(0))), lane, vec); simde_test_arm_neon_write_u16x4(2, vec, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u8(2, HEDLEY_STATIC_CAST(uint8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdup_lane_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { uint32_t vec[2]; int lane; uint32_t r[2]; } test_vec[] = { { { UINT32_C(4034153435), UINT32_C( 124217138) }, UINT8_C( 0), { UINT32_C(4034153435), UINT32_C(4034153435) } }, { { UINT32_C(2846465153), UINT32_C(3235544211) }, UINT8_C( 1), { UINT32_C(3235544211), UINT32_C(3235544211) } }, { { UINT32_C(3524896407), UINT32_C(2093857245) }, UINT8_C( 1), { UINT32_C(2093857245), UINT32_C(2093857245) } }, { { UINT32_C( 332400170), UINT32_C(3515292641) }, UINT8_C( 1), { UINT32_C(3515292641), UINT32_C(3515292641) } }, { { UINT32_C(1666464239), UINT32_C(1661857978) }, UINT8_C( 1), { UINT32_C(1661857978), UINT32_C(1661857978) } }, { { UINT32_C( 709115273), UINT32_C(2789468884) }, UINT8_C( 0), { UINT32_C( 709115273), UINT32_C( 709115273) } }, { { UINT32_C( 172258557), UINT32_C(2983873182) }, UINT8_C( 0), { UINT32_C( 172258557), UINT32_C( 172258557) } }, { { UINT32_C(1140668662), UINT32_C(2342008359) }, UINT8_C( 0), { UINT32_C(1140668662), UINT32_C(1140668662) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t vec; simde_uint32x2_t r; vec = simde_vld1_u32(test_vec[i].vec); SIMDE_CONSTIFY_2_(simde_vdup_lane_u32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_u32(UINT32_C(0))), test_vec[i].lane, vec); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t vec = simde_test_arm_neon_random_u32x2(); int lane = simde_test_codegen_random_u8() & 1; simde_uint32x2_t r; SIMDE_CONSTIFY_2_(simde_vdup_lane_u32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_u32(UINT8_C(0))), lane, vec); simde_test_arm_neon_write_u32x2(2, vec, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u8(2, HEDLEY_STATIC_CAST(uint8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdup_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { uint64_t vec[1]; uint64_t r[1]; } test_vec[] = { { { UINT64_C( 5426997122108201096) }, { UINT64_C( 5426997122108201096) } }, { { UINT64_C( 5080213220590762597) }, { UINT64_C( 5080213220590762597) } }, { { UINT64_C( 533322304534421141) }, { UINT64_C( 533322304534421141) } }, { { UINT64_C(14619170657803413946) }, { UINT64_C(14619170657803413946) } }, { { UINT64_C( 3151999422994724178) }, { UINT64_C( 3151999422994724178) } }, { { UINT64_C( 7902049161203633248) }, { UINT64_C( 7902049161203633248) } }, { { UINT64_C(12874740165647350485) }, { UINT64_C(12874740165647350485) } }, { { UINT64_C(16715255793253080045) }, { UINT64_C(16715255793253080045) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t vec; simde_uint64x1_t r; vec = simde_vld1_u64(test_vec[i].vec); r = simde_vdup_lane_u64(vec, 0); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x1_t vec = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t r = simde_vdup_lane_u64(vec, 0); simde_test_arm_neon_write_u64x1(2, vec, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdup_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { simde_float32 vec[4]; int lane; simde_float32 r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( 757.85), SIMDE_FLOAT32_C( -809.44), SIMDE_FLOAT32_C( -135.68), SIMDE_FLOAT32_C( -140.81) }, INT8_C( 0), { SIMDE_FLOAT32_C( 757.85), SIMDE_FLOAT32_C( 757.85) } }, { { SIMDE_FLOAT32_C( -948.46), SIMDE_FLOAT32_C( 482.38), SIMDE_FLOAT32_C( 971.59), SIMDE_FLOAT32_C( -845.91) }, INT8_C( 0), { SIMDE_FLOAT32_C( -948.46), SIMDE_FLOAT32_C( -948.46) } }, { { SIMDE_FLOAT32_C( 656.44), SIMDE_FLOAT32_C( -574.13), SIMDE_FLOAT32_C( -306.48), SIMDE_FLOAT32_C( 302.94) }, INT8_C( 1), { SIMDE_FLOAT32_C( -574.13), SIMDE_FLOAT32_C( -574.13) } }, { { SIMDE_FLOAT32_C( -419.88), SIMDE_FLOAT32_C( 844.59), SIMDE_FLOAT32_C( -860.06), SIMDE_FLOAT32_C( 931.61) }, INT8_C( 2), { SIMDE_FLOAT32_C( -860.06), SIMDE_FLOAT32_C( -860.06) } }, { { SIMDE_FLOAT32_C( -45.34), SIMDE_FLOAT32_C( 773.65), SIMDE_FLOAT32_C( 451.92), SIMDE_FLOAT32_C( -733.20) }, INT8_C( 1), { SIMDE_FLOAT32_C( 773.65), SIMDE_FLOAT32_C( 773.65) } }, { { SIMDE_FLOAT32_C( 845.90), SIMDE_FLOAT32_C( -853.55), SIMDE_FLOAT32_C( -684.75), SIMDE_FLOAT32_C( 154.60) }, INT8_C( 2), { SIMDE_FLOAT32_C( -684.75), SIMDE_FLOAT32_C( -684.75) } }, { { SIMDE_FLOAT32_C( -651.35), SIMDE_FLOAT32_C( -87.55), SIMDE_FLOAT32_C( -854.44), SIMDE_FLOAT32_C( 212.97) }, INT8_C( 1), { SIMDE_FLOAT32_C( -87.55), SIMDE_FLOAT32_C( -87.55) } }, { { SIMDE_FLOAT32_C( -829.33), SIMDE_FLOAT32_C( 264.52), SIMDE_FLOAT32_C( 254.03), SIMDE_FLOAT32_C( -857.74) }, INT8_C( 3), { SIMDE_FLOAT32_C( -857.74), SIMDE_FLOAT32_C( -857.74) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t vec; simde_float32x2_t r; vec = simde_vld1q_f32(test_vec[i].vec); SIMDE_CONSTIFY_4_(simde_vdup_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(0.0f)), test_vec[i].lane, vec); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t vec = simde_test_arm_neon_random_f32x4(-1000.0, 1000.0); int lane = simde_test_codegen_random_i8() & 3; simde_float32x2_t r; SIMDE_CONSTIFY_4_(simde_vdup_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(0.0f)), lane, vec); simde_test_arm_neon_write_f32x4(2, vec, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdup_laneq_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { simde_float64 vec[2]; int lane; simde_float64 r[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( -964.14), SIMDE_FLOAT64_C( 302.49) }, INT8_C( 0), { SIMDE_FLOAT64_C( -964.14) } }, { { SIMDE_FLOAT64_C( 621.68), SIMDE_FLOAT64_C( 488.00) }, INT8_C( 0), { SIMDE_FLOAT64_C( 621.68) } }, { { SIMDE_FLOAT64_C( 950.70), SIMDE_FLOAT64_C( -572.96) }, INT8_C( 1), { SIMDE_FLOAT64_C( -572.96) } }, { { SIMDE_FLOAT64_C( 188.34), SIMDE_FLOAT64_C( -751.64) }, INT8_C( 1), { SIMDE_FLOAT64_C( -751.64) } }, { { SIMDE_FLOAT64_C( 97.41), SIMDE_FLOAT64_C( -734.30) }, INT8_C( 1), { SIMDE_FLOAT64_C( -734.30) } }, { { SIMDE_FLOAT64_C( 499.67), SIMDE_FLOAT64_C( -413.57) }, INT8_C( 1), { SIMDE_FLOAT64_C( -413.57) } }, { { SIMDE_FLOAT64_C( 766.87), SIMDE_FLOAT64_C( -42.42) }, INT8_C( 0), { SIMDE_FLOAT64_C( 766.87) } }, { { SIMDE_FLOAT64_C( -881.52), SIMDE_FLOAT64_C( -131.16) }, INT8_C( 1), { SIMDE_FLOAT64_C( -131.16) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t vec; simde_float64x1_t r; vec = simde_vld1q_f64(test_vec[i].vec); SIMDE_CONSTIFY_2_(simde_vdup_laneq_f64, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f64(0.0)), test_vec[i].lane, vec); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x2_t vec = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); int lane = simde_test_codegen_random_i8() & 1; simde_float64x1_t r; SIMDE_CONSTIFY_2_(simde_vdup_laneq_f64, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f64(0.0)), lane, vec); simde_test_arm_neon_write_f64x2(2, vec, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdup_laneq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { int8_t vec[16]; int lane; int8_t r[8]; } test_vec[] = { { { -INT8_C( 122), -INT8_C( 99), -INT8_C( 68), -INT8_C( 72), INT8_C( 111), INT8_C( 103), -INT8_C( 37), -INT8_C( 99), -INT8_C( 22), INT8_C( 69), INT8_C( 111), -INT8_C( 94), INT8_C( 15), -INT8_C( 65), -INT8_C( 92), INT8_C( 54) }, INT8_C( 8), { -INT8_C( 22), -INT8_C( 22), -INT8_C( 22), -INT8_C( 22), -INT8_C( 22), -INT8_C( 22), -INT8_C( 22), -INT8_C( 22) } }, { { -INT8_C( 44), -INT8_C( 29), INT8_C( 30), INT8_C( 24), -INT8_C( 11), -INT8_C( 121), -INT8_C( 123), -INT8_C( 35), INT8_C( 114), -INT8_C( 117), INT8_C( 12), INT8_C( 84), INT8_C( 67), INT8_C( 69), -INT8_C( 38), -INT8_C( 32) }, INT8_C( 1), { -INT8_C( 29), -INT8_C( 29), -INT8_C( 29), -INT8_C( 29), -INT8_C( 29), -INT8_C( 29), -INT8_C( 29), -INT8_C( 29) } }, { { -INT8_C( 109), INT8_C( 79), INT8_C( 104), INT8_C( 110), -INT8_C( 20), INT8_C( 82), -INT8_C( 77), INT8_C( 91), -INT8_C( 11), -INT8_C( 61), INT8_C( 27), -INT8_C( 103), -INT8_C( 7), INT8_C( 51), INT8_C( 109), -INT8_C( 36) }, INT8_C( 1), { INT8_C( 79), INT8_C( 79), INT8_C( 79), INT8_C( 79), INT8_C( 79), INT8_C( 79), INT8_C( 79), INT8_C( 79) } }, { { -INT8_C( 122), -INT8_C( 46), -INT8_C( 40), INT8_C( 11), -INT8_C( 81), INT8_C( 74), -INT8_C( 105), -INT8_C( 69), -INT8_C( 98), -INT8_C( 38), INT8_C( 0), INT8_C( 121), -INT8_C( 69), INT8_C( 1), INT8_C( 12), INT8_C( 10) }, INT8_C( 9), { -INT8_C( 38), -INT8_C( 38), -INT8_C( 38), -INT8_C( 38), -INT8_C( 38), -INT8_C( 38), -INT8_C( 38), -INT8_C( 38) } }, { { INT8_C( 122), -INT8_C( 9), -INT8_C( 68), INT8_C( 46), INT8_C( 82), -INT8_C( 79), -INT8_C( 15), INT8_C( 109), INT8_C( 74), -INT8_C( 22), -INT8_C( 95), -INT8_C( 73), -INT8_C( 57), -INT8_C( 14), INT8_C( 61), -INT8_C( 103) }, INT8_C( 11), { -INT8_C( 73), -INT8_C( 73), -INT8_C( 73), -INT8_C( 73), -INT8_C( 73), -INT8_C( 73), -INT8_C( 73), -INT8_C( 73) } }, { { INT8_C( 73), INT8_C( 72), INT8_C( 21), -INT8_C( 32), INT8_C( 3), -INT8_C( 76), -INT8_C( 70), INT8_C( 4), INT8_C( 45), INT8_C( 117), INT8_C( 5), INT8_C( 57), INT8_MIN, INT8_C( 111), -INT8_C( 77), INT8_C( 119) }, INT8_C( 11), { INT8_C( 57), INT8_C( 57), INT8_C( 57), INT8_C( 57), INT8_C( 57), INT8_C( 57), INT8_C( 57), INT8_C( 57) } }, { { -INT8_C( 31), -INT8_C( 55), -INT8_C( 36), -INT8_C( 46), INT8_C( 55), INT8_C( 38), -INT8_C( 67), -INT8_C( 40), -INT8_C( 35), -INT8_C( 124), -INT8_C( 54), INT8_C( 27), INT8_C( 29), -INT8_C( 107), INT8_C( 100), INT8_C( 101) }, INT8_C( 11), { INT8_C( 27), INT8_C( 27), INT8_C( 27), INT8_C( 27), INT8_C( 27), INT8_C( 27), INT8_C( 27), INT8_C( 27) } }, { { INT8_C( 68), INT8_C( 104), INT8_C( 95), -INT8_C( 2), INT8_C( 108), -INT8_C( 116), INT8_C( 116), INT8_C( 114), -INT8_C( 59), -INT8_C( 12), -INT8_C( 31), INT8_C( 120), INT8_C( 107), INT8_C( 12), INT8_C( 90), INT8_C( 52) }, INT8_C( 8), { -INT8_C( 59), -INT8_C( 59), -INT8_C( 59), -INT8_C( 59), -INT8_C( 59), -INT8_C( 59), -INT8_C( 59), -INT8_C( 59) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t vec; simde_int8x8_t r; vec = simde_vld1q_s8(test_vec[i].vec); SIMDE_CONSTIFY_16_(simde_vdup_laneq_s8, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_s8(INT8_C(0))), test_vec[i].lane, vec); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t vec = simde_test_arm_neon_random_i8x16(); int lane = simde_test_codegen_random_i8() & 15; simde_int8x8_t r; SIMDE_CONSTIFY_16_(simde_vdup_laneq_s8, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_s8(INT8_C(0))), lane, vec); simde_test_arm_neon_write_i8x16(2, vec, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdup_laneq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { int16_t vec[8]; int lane; int16_t r[4]; } test_vec[] = { { { INT16_C( 23653), -INT16_C( 16842), -INT16_C( 10435), -INT16_C( 21176), INT16_C( 15511), -INT16_C( 31969), INT16_C( 1229), INT16_C( 13269) }, INT8_C( 7), { INT16_C( 13269), INT16_C( 13269), INT16_C( 13269), INT16_C( 13269) } }, { { -INT16_C( 16631), -INT16_C( 285), INT16_C( 1669), INT16_C( 6286), -INT16_C( 11777), -INT16_C( 29100), -INT16_C( 1421), -INT16_C( 12045) }, INT8_C( 0), { -INT16_C( 16631), -INT16_C( 16631), -INT16_C( 16631), -INT16_C( 16631) } }, { { INT16_C( 3506), -INT16_C( 1528), -INT16_C( 24645), -INT16_C( 9674), INT16_C( 802), -INT16_C( 2081), INT16_C( 3638), -INT16_C( 2560) }, INT8_C( 1), { -INT16_C( 1528), -INT16_C( 1528), -INT16_C( 1528), -INT16_C( 1528) } }, { { INT16_C( 31742), -INT16_C( 29449), -INT16_C( 2413), -INT16_C( 6051), -INT16_C( 11899), INT16_C( 30946), INT16_C( 4769), -INT16_C( 20950) }, INT8_C( 2), { -INT16_C( 2413), -INT16_C( 2413), -INT16_C( 2413), -INT16_C( 2413) } }, { { INT16_C( 26917), INT16_C( 23481), -INT16_C( 9404), INT16_C( 9055), -INT16_C( 27182), -INT16_C( 11727), INT16_C( 9099), INT16_C( 1744) }, INT8_C( 2), { -INT16_C( 9404), -INT16_C( 9404), -INT16_C( 9404), -INT16_C( 9404) } }, { { -INT16_C( 26019), -INT16_C( 17903), -INT16_C( 27006), INT16_C( 25739), INT16_C( 11278), INT16_C( 14710), -INT16_C( 28197), INT16_C( 17502) }, INT8_C( 2), { -INT16_C( 27006), -INT16_C( 27006), -INT16_C( 27006), -INT16_C( 27006) } }, { { -INT16_C( 30535), INT16_C( 6182), -INT16_C( 1877), -INT16_C( 8786), INT16_C( 14795), -INT16_C( 25856), INT16_C( 6720), -INT16_C( 9480) }, INT8_C( 3), { -INT16_C( 8786), -INT16_C( 8786), -INT16_C( 8786), -INT16_C( 8786) } }, { { INT16_C( 23731), INT16_C( 16065), -INT16_C( 12096), INT16_C( 13931), INT16_C( 17929), INT16_C( 26567), INT16_C( 4746), INT16_C( 4896) }, INT8_C( 0), { INT16_C( 23731), INT16_C( 23731), INT16_C( 23731), INT16_C( 23731) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t vec; simde_int16x4_t r; vec = simde_vld1q_s16(test_vec[i].vec); SIMDE_CONSTIFY_8_(simde_vdup_laneq_s16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_s16(INT16_C(0))), test_vec[i].lane, vec); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t vec = simde_test_arm_neon_random_i16x8(); int lane = simde_test_codegen_random_i8() & 7; simde_int16x4_t r; SIMDE_CONSTIFY_8_(simde_vdup_laneq_s16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_s16(INT8_C(0))), lane, vec); simde_test_arm_neon_write_i16x8(2, vec, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdup_laneq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { int32_t vec[4]; int lane; int32_t r[2]; } test_vec[] = { { { -INT32_C( 416235975), -INT32_C( 1692337253), -INT32_C( 1883873129), -INT32_C( 1774001862) }, INT8_C( 3), { -INT32_C( 1774001862), -INT32_C( 1774001862) } }, { { -INT32_C( 327985535), INT32_C( 1412594829), INT32_C( 57064675), -INT32_C( 1908629809) }, INT8_C( 3), { -INT32_C( 1908629809), -INT32_C( 1908629809) } }, { { INT32_C( 1154099491), INT32_C( 2074370501), INT32_C( 861724657), -INT32_C( 860553355) }, INT8_C( 2), { INT32_C( 861724657), INT32_C( 861724657) } }, { { -INT32_C( 756131424), INT32_C( 344969645), INT32_C( 296902357), INT32_C( 372605420) }, INT8_C( 0), { -INT32_C( 756131424), -INT32_C( 756131424) } }, { { INT32_C( 497933177), -INT32_C( 1292067242), -INT32_C( 2035125550), -INT32_C( 1759042498) }, INT8_C( 3), { -INT32_C( 1759042498), -INT32_C( 1759042498) } }, { { -INT32_C( 1998306055), INT32_C( 199735640), INT32_C( 9229259), -INT32_C( 998647575) }, INT8_C( 2), { INT32_C( 9229259), INT32_C( 9229259) } }, { { -INT32_C( 1826350442), INT32_C( 2131097293), -INT32_C( 1532804227), INT32_C( 513652442) }, INT8_C( 3), { INT32_C( 513652442), INT32_C( 513652442) } }, { { INT32_C( 223639334), INT32_C( 249569154), -INT32_C( 1729639905), -INT32_C( 1456509554) }, INT8_C( 1), { INT32_C( 249569154), INT32_C( 249569154) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t vec; simde_int32x2_t r; vec = simde_vld1q_s32(test_vec[i].vec); SIMDE_CONSTIFY_4_(simde_vdup_laneq_s32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_s32(INT32_C(0))), test_vec[i].lane, vec); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t vec = simde_test_arm_neon_random_i32x4(); int lane = simde_test_codegen_random_i8() & 3; simde_int32x2_t r; SIMDE_CONSTIFY_4_(simde_vdup_laneq_s32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_s32(INT32_C(0))), lane, vec); simde_test_arm_neon_write_i32x4(2, vec, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdup_laneq_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { int64_t vec[2]; int lane; int64_t r[1]; } test_vec[] = { { { -INT64_C( 4867007410252740494), INT64_C( 6650439126861753290) }, INT8_C( 0), { -INT64_C( 4867007410252740494) } }, { { INT64_C( 6951225055928169884), INT64_C( 68363202768986995) }, INT8_C( 1), { INT64_C( 68363202768986995) } }, { { INT64_C( 4436684764484439282), -INT64_C( 3843432968721221438) }, INT8_C( 0), { INT64_C( 4436684764484439282) } }, { { INT64_C( 5042043137711416734), INT64_C( 9031044296322969711) }, INT8_C( 1), { INT64_C( 9031044296322969711) } }, { { INT64_C( 6558710019170682555), INT64_C( 688351337355483718) }, INT8_C( 1), { INT64_C( 688351337355483718) } }, { { -INT64_C( 536951092128346461), -INT64_C( 4625794981386355522) }, INT8_C( 0), { -INT64_C( 536951092128346461) } }, { { -INT64_C( 2721629803226676710), -INT64_C( 5335538334588657980) }, INT8_C( 0), { -INT64_C( 2721629803226676710) } }, { { INT64_C( 3429603820960188560), INT64_C( 1468284878264963704) }, INT8_C( 1), { INT64_C( 1468284878264963704) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t vec; simde_int64x1_t r; vec = simde_vld1q_s64(test_vec[i].vec); SIMDE_CONSTIFY_2_(simde_vdup_laneq_s64, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_s64(INT64_C(0))), test_vec[i].lane, vec); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t vec = simde_test_arm_neon_random_i64x2(); int lane = simde_test_codegen_random_i8() & 1; simde_int64x1_t r; SIMDE_CONSTIFY_2_(simde_vdup_laneq_s64, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_s64(INT64_C(0))), lane, vec); simde_test_arm_neon_write_i64x2(2, vec, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdup_laneq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { uint8_t vec[16]; int lane; uint8_t r[8]; } test_vec[] = { { { UINT8_C( 77), UINT8_C(147), UINT8_C( 62), UINT8_C(115), UINT8_C(118), UINT8_C( 11), UINT8_C( 79), UINT8_C(238), UINT8_C(216), UINT8_C( 18), UINT8_C(104), UINT8_C(180), UINT8_C(173), UINT8_C(197), UINT8_C(248), UINT8_C(124) }, UINT8_C( 15), { UINT8_C(124), UINT8_C(124), UINT8_C(124), UINT8_C(124), UINT8_C(124), UINT8_C(124), UINT8_C(124), UINT8_C(124) } }, { { UINT8_C(115), UINT8_C(176), UINT8_C( 5), UINT8_C(172), UINT8_C(204), UINT8_C(177), UINT8_C(208), UINT8_C(120), UINT8_C(189), UINT8_C( 77), UINT8_C(208), UINT8_C( 85), UINT8_C( 21), UINT8_C( 46), UINT8_C(162), UINT8_C(168) }, UINT8_C( 12), { UINT8_C( 21), UINT8_C( 21), UINT8_C( 21), UINT8_C( 21), UINT8_C( 21), UINT8_C( 21), UINT8_C( 21), UINT8_C( 21) } }, { { UINT8_C( 22), UINT8_C( 30), UINT8_C(119), UINT8_C(101), UINT8_C( 12), UINT8_C( 79), UINT8_C(119), UINT8_C(116), UINT8_C( 4), UINT8_C( 37), UINT8_C( 57), UINT8_C(252), UINT8_C(161), UINT8_C(233), UINT8_C(111), UINT8_C( 81) }, UINT8_C( 14), { UINT8_C(111), UINT8_C(111), UINT8_C(111), UINT8_C(111), UINT8_C(111), UINT8_C(111), UINT8_C(111), UINT8_C(111) } }, { { UINT8_C( 28), UINT8_C( 30), UINT8_C(159), UINT8_C(236), UINT8_C(150), UINT8_C( 93), UINT8_C( 57), UINT8_C(102), UINT8_C(178), UINT8_C( 79), UINT8_C(148), UINT8_C( 85), UINT8_C(247), UINT8_C( 0), UINT8_C(107), UINT8_C( 22) }, UINT8_C( 7), { UINT8_C(102), UINT8_C(102), UINT8_C(102), UINT8_C(102), UINT8_C(102), UINT8_C(102), UINT8_C(102), UINT8_C(102) } }, { { UINT8_C(208), UINT8_C( 34), UINT8_C(199), UINT8_C( 72), UINT8_C(151), UINT8_C(203), UINT8_C(109), UINT8_C(208), UINT8_C(199), UINT8_C( 14), UINT8_C(185), UINT8_C( 55), UINT8_C( 96), UINT8_C(167), UINT8_C( 83), UINT8_C(126) }, UINT8_C( 7), { UINT8_C(208), UINT8_C(208), UINT8_C(208), UINT8_C(208), UINT8_C(208), UINT8_C(208), UINT8_C(208), UINT8_C(208) } }, { { UINT8_C( 63), UINT8_C( 20), UINT8_C(164), UINT8_C(120), UINT8_C(122), UINT8_C( 86), UINT8_C(199), UINT8_C( 14), UINT8_C(171), UINT8_C(191), UINT8_C( 14), UINT8_C( 22), UINT8_C(213), UINT8_C(133), UINT8_C(231), UINT8_C(247) }, UINT8_C( 12), { UINT8_C(213), UINT8_C(213), UINT8_C(213), UINT8_C(213), UINT8_C(213), UINT8_C(213), UINT8_C(213), UINT8_C(213) } }, { { UINT8_C( 47), UINT8_C(142), UINT8_C( 23), UINT8_C(156), UINT8_C( 95), UINT8_C(223), UINT8_C(170), UINT8_C( 24), UINT8_C( 22), UINT8_C( 10), UINT8_C(192), UINT8_C(105), UINT8_C(136), UINT8_C( 7), UINT8_C(168), UINT8_C(156) }, UINT8_C( 11), { UINT8_C(105), UINT8_C(105), UINT8_C(105), UINT8_C(105), UINT8_C(105), UINT8_C(105), UINT8_C(105), UINT8_C(105) } }, { { UINT8_C( 32), UINT8_C( 22), UINT8_C( 1), UINT8_C(232), UINT8_C( 36), UINT8_C(173), UINT8_C(167), UINT8_C( 50), UINT8_C(195), UINT8_C(124), UINT8_C(184), UINT8_C(170), UINT8_C(115), UINT8_C( 4), UINT8_C(217), UINT8_C( 2) }, UINT8_C( 12), { UINT8_C(115), UINT8_C(115), UINT8_C(115), UINT8_C(115), UINT8_C(115), UINT8_C(115), UINT8_C(115), UINT8_C(115) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t vec; simde_uint8x8_t r; vec = simde_vld1q_u8(test_vec[i].vec); SIMDE_CONSTIFY_16_(simde_vdup_laneq_u8, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_u8(UINT8_C(0))), test_vec[i].lane, vec); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t vec = simde_test_arm_neon_random_u8x16(); int lane = simde_test_codegen_random_u8() & 15; simde_uint8x8_t r; SIMDE_CONSTIFY_16_(simde_vdup_laneq_u8, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_u8(UINT8_C(0))), lane, vec); simde_test_arm_neon_write_u8x16(2, vec, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u8(2, HEDLEY_STATIC_CAST(uint8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdup_laneq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { uint16_t vec[8]; int lane; uint16_t r[4]; } test_vec[] = { { { UINT16_C(24949), UINT16_C( 8443), UINT16_C( 4473), UINT16_C(14634), UINT16_C(45946), UINT16_C( 8768), UINT16_C(60239), UINT16_C(26178) }, UINT8_C( 5), { UINT16_C( 8768), UINT16_C( 8768), UINT16_C( 8768), UINT16_C( 8768) } }, { { UINT16_C(35370), UINT16_C(53658), UINT16_C(23997), UINT16_C(30029), UINT16_C(49416), UINT16_C(57721), UINT16_C(38339), UINT16_C( 9303) }, UINT8_C( 0), { UINT16_C(35370), UINT16_C(35370), UINT16_C(35370), UINT16_C(35370) } }, { { UINT16_C(40311), UINT16_C(41377), UINT16_C( 7127), UINT16_C( 5972), UINT16_C(42045), UINT16_C(32771), UINT16_C(61450), UINT16_C(38058) }, UINT8_C( 2), { UINT16_C( 7127), UINT16_C( 7127), UINT16_C( 7127), UINT16_C( 7127) } }, { { UINT16_C(20860), UINT16_C(51687), UINT16_C(61382), UINT16_C(16522), UINT16_C(19921), UINT16_C(10453), UINT16_C(26225), UINT16_C( 3999) }, UINT8_C( 7), { UINT16_C( 3999), UINT16_C( 3999), UINT16_C( 3999), UINT16_C( 3999) } }, { { UINT16_C(58944), UINT16_C(38179), UINT16_C(24829), UINT16_C( 57), UINT16_C(17376), UINT16_C(35824), UINT16_C(31447), UINT16_C(10503) }, UINT8_C( 2), { UINT16_C(24829), UINT16_C(24829), UINT16_C(24829), UINT16_C(24829) } }, { { UINT16_C(61392), UINT16_C(23377), UINT16_C( 8751), UINT16_C( 1448), UINT16_C( 6730), UINT16_C(59755), UINT16_C(29225), UINT16_C( 3882) }, UINT8_C( 5), { UINT16_C(59755), UINT16_C(59755), UINT16_C(59755), UINT16_C(59755) } }, { { UINT16_C( 3263), UINT16_C(63734), UINT16_C(54797), UINT16_C(64827), UINT16_C( 4705), UINT16_C(26744), UINT16_C(55867), UINT16_C(11065) }, UINT8_C( 3), { UINT16_C(64827), UINT16_C(64827), UINT16_C(64827), UINT16_C(64827) } }, { { UINT16_C(23188), UINT16_C(15438), UINT16_C(39007), UINT16_C(51798), UINT16_C(32642), UINT16_C(44093), UINT16_C(53902), UINT16_C(39787) }, UINT8_C( 0), { UINT16_C(23188), UINT16_C(23188), UINT16_C(23188), UINT16_C(23188) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t vec; simde_uint16x4_t r; vec = simde_vld1q_u16(test_vec[i].vec); SIMDE_CONSTIFY_8_(simde_vdup_laneq_u16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_u16(UINT16_C(0))), test_vec[i].lane, vec); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t vec = simde_test_arm_neon_random_u16x8(); int lane = simde_test_codegen_random_u8() & 7; simde_uint16x4_t r; SIMDE_CONSTIFY_8_(simde_vdup_laneq_u16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_u16(UINT16_C(0))), lane, vec); simde_test_arm_neon_write_u16x8(2, vec, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u8(2, HEDLEY_STATIC_CAST(uint8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdup_laneq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { uint32_t vec[8]; int lane; uint32_t r[2]; } test_vec[] = { { { UINT32_C(2661263459), UINT32_C( 498073765), UINT32_C(2734156905), UINT32_C(1899373335) }, UINT8_C( 1), { UINT32_C( 498073765), UINT32_C( 498073765) } }, { { UINT32_C(3372863858), UINT32_C(3628632987), UINT32_C(2729170743), UINT32_C( 436564850) }, UINT8_C( 2), { UINT32_C(2729170743), UINT32_C(2729170743) } }, { { UINT32_C(1410580387), UINT32_C(3560996061), UINT32_C(1425495838), UINT32_C(2579917000) }, UINT8_C( 2), { UINT32_C(1425495838), UINT32_C(1425495838) } }, { { UINT32_C(3640472975), UINT32_C(3098490125), UINT32_C(3710656983), UINT32_C(4202708539) }, UINT8_C( 1), { UINT32_C(3098490125), UINT32_C(3098490125) } }, { { UINT32_C( 349034452), UINT32_C(2741758892), UINT32_C( 101463103), UINT32_C( 43351757) }, UINT8_C( 3), { UINT32_C( 43351757), UINT32_C( 43351757) } }, { { UINT32_C( 481300589), UINT32_C(4097673416), UINT32_C(3845355621), UINT32_C(1253737587) }, UINT8_C( 2), { UINT32_C(3845355621), UINT32_C(3845355621) } }, { { UINT32_C( 977139406), UINT32_C(2792258970), UINT32_C( 405027715), UINT32_C(1317445438) }, UINT8_C( 0), { UINT32_C( 977139406), UINT32_C( 977139406) } }, { { UINT32_C(3772257954), UINT32_C(1045970187), UINT32_C(3703753506), UINT32_C( 212538390) }, UINT8_C( 2), { UINT32_C(3703753506), UINT32_C(3703753506) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t vec; simde_uint32x2_t r; vec = simde_vld1q_u32(test_vec[i].vec); SIMDE_CONSTIFY_4_(simde_vdup_laneq_u32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_u32(UINT32_C(0))), test_vec[i].lane, vec); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t vec = simde_test_arm_neon_random_u32x4(); int lane = simde_test_codegen_random_u8() & 3; simde_uint32x2_t r; SIMDE_CONSTIFY_4_(simde_vdup_laneq_u32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_u32(UINT32_C(0))), lane, vec); simde_test_arm_neon_write_u32x4(2, vec, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u8(2, HEDLEY_STATIC_CAST(uint8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdup_laneq_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { uint64_t vec[2]; int lane; uint64_t r[1]; } test_vec[] = { { { UINT64_C( 8110510466717951717), UINT64_C( 3572304598247984234) }, UINT8_C( 0), { UINT64_C( 8110510466717951717) } }, { { UINT64_C( 4366124917414247539), UINT64_C( 6918834943037648244) }, UINT8_C( 0), { UINT64_C( 4366124917414247539) } }, { { UINT64_C( 3292744983505054807), UINT64_C( 4117915127713221663) }, UINT8_C( 0), { UINT64_C( 3292744983505054807) } }, { { UINT64_C( 4698457210627404784), UINT64_C(18068255124800990820) }, UINT8_C( 0), { UINT64_C( 4698457210627404784) } }, { { UINT64_C(15070002321331591077), UINT64_C( 1022264726452052249) }, UINT8_C( 0), { UINT64_C(15070002321331591077) } }, { { UINT64_C( 2583179698698190518), UINT64_C( 4114481506476959668) }, UINT8_C( 1), { UINT64_C( 4114481506476959668) } }, { { UINT64_C(16551956027336260976), UINT64_C( 1981680721021243291) }, UINT8_C( 1), { UINT64_C( 1981680721021243291) } }, { { UINT64_C(17756356862603385451), UINT64_C( 4845139579135167924) }, UINT8_C( 1), { UINT64_C( 4845139579135167924) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t vec; simde_uint64x1_t r; vec = simde_vld1q_u64(test_vec[i].vec); SIMDE_CONSTIFY_2_(simde_vdup_laneq_u64, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_u64(UINT64_C(0))), test_vec[i].lane, vec); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_t vec = simde_test_arm_neon_random_u64x2(); int lane = simde_test_codegen_random_u8() & 1; simde_uint64x1_t r; SIMDE_CONSTIFY_2_(simde_vdup_laneq_u64, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_u64(UINT64_C(0))), lane, vec); simde_test_arm_neon_write_u64x2(2, vec, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u8(2, HEDLEY_STATIC_CAST(uint8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdupq_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { simde_float32 vec[4]; int lane; simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 841.63), SIMDE_FLOAT32_C( -49.45), SIMDE_FLOAT32_C( 261.67), SIMDE_FLOAT32_C( -350.53) }, INT8_C( 2), { SIMDE_FLOAT32_C( 261.67), SIMDE_FLOAT32_C( 261.67), SIMDE_FLOAT32_C( 261.67), SIMDE_FLOAT32_C( 261.67) } }, { { SIMDE_FLOAT32_C( 242.12), SIMDE_FLOAT32_C( 896.58), SIMDE_FLOAT32_C( -122.58), SIMDE_FLOAT32_C( -914.43) }, INT8_C( 3), { SIMDE_FLOAT32_C( -914.43), SIMDE_FLOAT32_C( -914.43), SIMDE_FLOAT32_C( -914.43), SIMDE_FLOAT32_C( -914.43) } }, { { SIMDE_FLOAT32_C( -903.27), SIMDE_FLOAT32_C( -11.67), SIMDE_FLOAT32_C( -137.96), SIMDE_FLOAT32_C( -521.79) }, INT8_C( 1), { SIMDE_FLOAT32_C( -11.67), SIMDE_FLOAT32_C( -11.67), SIMDE_FLOAT32_C( -11.67), SIMDE_FLOAT32_C( -11.67) } }, { { SIMDE_FLOAT32_C( -946.62), SIMDE_FLOAT32_C( -205.01), SIMDE_FLOAT32_C( -121.41), SIMDE_FLOAT32_C( 159.32) }, INT8_C( 0), { SIMDE_FLOAT32_C( -946.62), SIMDE_FLOAT32_C( -946.62), SIMDE_FLOAT32_C( -946.62), SIMDE_FLOAT32_C( -946.62) } }, { { SIMDE_FLOAT32_C( 334.52), SIMDE_FLOAT32_C( -142.22), SIMDE_FLOAT32_C( -498.68), SIMDE_FLOAT32_C( -830.99) }, INT8_C( 0), { SIMDE_FLOAT32_C( 334.52), SIMDE_FLOAT32_C( 334.52), SIMDE_FLOAT32_C( 334.52), SIMDE_FLOAT32_C( 334.52) } }, { { SIMDE_FLOAT32_C( 855.02), SIMDE_FLOAT32_C( -765.81), SIMDE_FLOAT32_C( -476.10), SIMDE_FLOAT32_C( 738.65) }, INT8_C( 2), { SIMDE_FLOAT32_C( -476.10), SIMDE_FLOAT32_C( -476.10), SIMDE_FLOAT32_C( -476.10), SIMDE_FLOAT32_C( -476.10) } }, { { SIMDE_FLOAT32_C( 85.00), SIMDE_FLOAT32_C( 580.28), SIMDE_FLOAT32_C( 330.74), SIMDE_FLOAT32_C( -653.33) }, INT8_C( 3), { SIMDE_FLOAT32_C( -653.33), SIMDE_FLOAT32_C( -653.33), SIMDE_FLOAT32_C( -653.33), SIMDE_FLOAT32_C( -653.33) } }, { { SIMDE_FLOAT32_C( -767.25), SIMDE_FLOAT32_C( 588.80), SIMDE_FLOAT32_C( -873.67), SIMDE_FLOAT32_C( 110.16) }, INT8_C( 3), { SIMDE_FLOAT32_C( 110.16), SIMDE_FLOAT32_C( 110.16), SIMDE_FLOAT32_C( 110.16), SIMDE_FLOAT32_C( 110.16) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t vec; simde_float32x4_t r; vec = simde_vld1q_f32(test_vec[i].vec); SIMDE_CONSTIFY_4_(simde_vdupq_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f32(0.0f)), test_vec[i].lane, vec); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t vec = simde_test_arm_neon_random_f32x4(-1000.0, 1000.0); int lane = simde_test_codegen_random_i8() & 3; simde_float32x4_t r; SIMDE_CONSTIFY_4_(simde_vdupq_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f32(0.0f)), lane, vec); simde_test_arm_neon_write_f32x4(2, vec, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdupq_laneq_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { simde_float64 vec[2]; int lane; simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -955.26), SIMDE_FLOAT64_C( -551.38) }, INT8_C( 0), { SIMDE_FLOAT64_C( -955.26), SIMDE_FLOAT64_C( -955.26) } }, { { SIMDE_FLOAT64_C( 53.53), SIMDE_FLOAT64_C( 970.21) }, INT8_C( 1), { SIMDE_FLOAT64_C( 970.21), SIMDE_FLOAT64_C( 970.21) } }, { { SIMDE_FLOAT64_C( -594.72), SIMDE_FLOAT64_C( 118.44) }, INT8_C( 1), { SIMDE_FLOAT64_C( 118.44), SIMDE_FLOAT64_C( 118.44) } }, { { SIMDE_FLOAT64_C( 499.85), SIMDE_FLOAT64_C( 930.54) }, INT8_C( 0), { SIMDE_FLOAT64_C( 499.85), SIMDE_FLOAT64_C( 499.85) } }, { { SIMDE_FLOAT64_C( 858.61), SIMDE_FLOAT64_C( 351.33) }, INT8_C( 1), { SIMDE_FLOAT64_C( 351.33), SIMDE_FLOAT64_C( 351.33) } }, { { SIMDE_FLOAT64_C( 558.39), SIMDE_FLOAT64_C( -964.87) }, INT8_C( 0), { SIMDE_FLOAT64_C( 558.39), SIMDE_FLOAT64_C( 558.39) } }, { { SIMDE_FLOAT64_C( -858.49), SIMDE_FLOAT64_C( 914.99) }, INT8_C( 0), { SIMDE_FLOAT64_C( -858.49), SIMDE_FLOAT64_C( -858.49) } }, { { SIMDE_FLOAT64_C( 745.32), SIMDE_FLOAT64_C( -258.32) }, INT8_C( 1), { SIMDE_FLOAT64_C( -258.32), SIMDE_FLOAT64_C( -258.32) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t vec; simde_float64x2_t r; vec = simde_vld1q_f64(test_vec[i].vec); SIMDE_CONSTIFY_2_(simde_vdupq_laneq_f64, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f64(0.0)), test_vec[i].lane, vec); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x2_t vec = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); int lane = simde_test_codegen_random_i8() & 1; simde_float64x2_t r; SIMDE_CONSTIFY_2_(simde_vdupq_laneq_f64, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f64(0.0)), lane, vec); simde_test_arm_neon_write_f64x2(2, vec, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdupq_laneq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { int8_t vec[16]; int lane; int8_t r[16]; } test_vec[] = { { { INT8_C( 99), -INT8_C( 62), INT8_C( 37), INT8_C( 65), INT8_C( 113), -INT8_C( 6), INT8_C( 57), -INT8_C( 114), INT8_C( 89), -INT8_C( 29), INT8_C( 4), -INT8_C( 45), INT8_C( 8), -INT8_C( 71), INT8_C( 46), INT8_C( 87) }, INT8_C( 7), { -INT8_C( 114), -INT8_C( 114), -INT8_C( 114), -INT8_C( 114), -INT8_C( 114), -INT8_C( 114), -INT8_C( 114), -INT8_C( 114), -INT8_C( 114), -INT8_C( 114), -INT8_C( 114), -INT8_C( 114), -INT8_C( 114), -INT8_C( 114), -INT8_C( 114), -INT8_C( 114) } }, { { -INT8_C( 56), -INT8_C( 99), -INT8_C( 88), INT8_C( 89), -INT8_C( 4), -INT8_C( 7), INT8_C( 59), -INT8_C( 97), -INT8_C( 11), -INT8_C( 98), INT8_C( 84), -INT8_C( 108), INT8_C( 7), -INT8_C( 51), -INT8_C( 9), -INT8_C( 54) }, INT8_C( 2), { -INT8_C( 88), -INT8_C( 88), -INT8_C( 88), -INT8_C( 88), -INT8_C( 88), -INT8_C( 88), -INT8_C( 88), -INT8_C( 88), -INT8_C( 88), -INT8_C( 88), -INT8_C( 88), -INT8_C( 88), -INT8_C( 88), -INT8_C( 88), -INT8_C( 88), -INT8_C( 88) } }, { { INT8_C( 56), INT8_C( 59), -INT8_C( 20), INT8_C( 113), -INT8_C( 55), INT8_C( 69), INT8_C( 84), -INT8_C( 51), INT8_C( 25), INT8_C( 92), -INT8_C( 122), INT8_C( 71), -INT8_C( 77), -INT8_C( 35), INT8_C( 16), INT8_C( 81) }, INT8_C( 6), { INT8_C( 84), INT8_C( 84), INT8_C( 84), INT8_C( 84), INT8_C( 84), INT8_C( 84), INT8_C( 84), INT8_C( 84), INT8_C( 84), INT8_C( 84), INT8_C( 84), INT8_C( 84), INT8_C( 84), INT8_C( 84), INT8_C( 84), INT8_C( 84) } }, { { INT8_C( 105), INT8_C( 77), INT8_MAX, -INT8_C( 91), -INT8_C( 20), INT8_C( 116), INT8_C( 67), INT8_C( 65), INT8_C( 8), INT8_C( 74), INT8_C( 14), INT8_C( 0), INT8_C( 20), INT8_C( 0), INT8_C( 56), INT8_C( 80) }, INT8_C( 12), { INT8_C( 20), INT8_C( 20), INT8_C( 20), INT8_C( 20), INT8_C( 20), INT8_C( 20), INT8_C( 20), INT8_C( 20), INT8_C( 20), INT8_C( 20), INT8_C( 20), INT8_C( 20), INT8_C( 20), INT8_C( 20), INT8_C( 20), INT8_C( 20) } }, { { -INT8_C( 86), INT8_C( 25), INT8_C( 50), -INT8_C( 2), -INT8_C( 25), INT8_C( 75), INT8_C( 91), INT8_C( 109), -INT8_C( 110), INT8_C( 14), INT8_C( 75), -INT8_C( 94), INT8_C( 95), -INT8_C( 47), INT8_C( 12), -INT8_C( 83) }, INT8_C( 0), { -INT8_C( 86), -INT8_C( 86), -INT8_C( 86), -INT8_C( 86), -INT8_C( 86), -INT8_C( 86), -INT8_C( 86), -INT8_C( 86), -INT8_C( 86), -INT8_C( 86), -INT8_C( 86), -INT8_C( 86), -INT8_C( 86), -INT8_C( 86), -INT8_C( 86), -INT8_C( 86) } }, { { -INT8_C( 79), -INT8_C( 103), -INT8_C( 60), -INT8_C( 12), -INT8_C( 38), -INT8_C( 51), INT8_C( 62), -INT8_C( 24), -INT8_C( 51), INT8_C( 83), -INT8_C( 24), INT8_C( 5), -INT8_C( 93), -INT8_C( 43), -INT8_C( 81), -INT8_C( 68) }, INT8_C( 7), { -INT8_C( 24), -INT8_C( 24), -INT8_C( 24), -INT8_C( 24), -INT8_C( 24), -INT8_C( 24), -INT8_C( 24), -INT8_C( 24), -INT8_C( 24), -INT8_C( 24), -INT8_C( 24), -INT8_C( 24), -INT8_C( 24), -INT8_C( 24), -INT8_C( 24), -INT8_C( 24) } }, { { -INT8_C( 82), -INT8_C( 93), INT8_C( 82), INT8_C( 9), INT8_C( 17), -INT8_C( 28), INT8_C( 23), INT8_C( 92), -INT8_C( 121), INT8_C( 119), INT8_C( 45), -INT8_C( 109), INT8_C( 36), INT8_C( 125), INT8_C( 68), -INT8_C( 67) }, INT8_C( 1), { -INT8_C( 93), -INT8_C( 93), -INT8_C( 93), -INT8_C( 93), -INT8_C( 93), -INT8_C( 93), -INT8_C( 93), -INT8_C( 93), -INT8_C( 93), -INT8_C( 93), -INT8_C( 93), -INT8_C( 93), -INT8_C( 93), -INT8_C( 93), -INT8_C( 93), -INT8_C( 93) } }, { { INT8_C( 56), -INT8_C( 104), INT8_C( 14), INT8_C( 118), INT8_MIN, -INT8_C( 37), -INT8_C( 55), INT8_C( 105), -INT8_C( 31), INT8_C( 108), INT8_C( 62), -INT8_C( 112), INT8_C( 41), INT8_C( 69), INT8_C( 62), -INT8_C( 52) }, INT8_C( 7), { INT8_C( 105), INT8_C( 105), INT8_C( 105), INT8_C( 105), INT8_C( 105), INT8_C( 105), INT8_C( 105), INT8_C( 105), INT8_C( 105), INT8_C( 105), INT8_C( 105), INT8_C( 105), INT8_C( 105), INT8_C( 105), INT8_C( 105), INT8_C( 105) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t vec; simde_int8x16_t r; vec = simde_vld1q_s8(test_vec[i].vec); SIMDE_CONSTIFY_16_(simde_vdupq_laneq_s8, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_s8(INT8_C(0))), test_vec[i].lane, vec); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t vec = simde_test_arm_neon_random_i8x16(); int lane = simde_test_codegen_random_i8() & 15; simde_int8x16_t r; SIMDE_CONSTIFY_16_(simde_vdupq_laneq_s8, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_s8(INT8_C(0))), lane, vec); simde_test_arm_neon_write_i8x16(2, vec, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdupq_laneq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { int16_t vec[8]; int lane; int16_t r[8]; } test_vec[] = { { { -INT16_C( 8889), INT16_C( 24443), INT16_C( 569), INT16_C( 26326), -INT16_C( 1387), -INT16_C( 9757), INT16_C( 9655), INT16_C( 20241) }, INT8_C( 3), { INT16_C( 26326), INT16_C( 26326), INT16_C( 26326), INT16_C( 26326), INT16_C( 26326), INT16_C( 26326), INT16_C( 26326), INT16_C( 26326) } }, { { -INT16_C( 12152), INT16_C( 20751), -INT16_C( 4039), INT16_C( 30654), -INT16_C( 6272), -INT16_C( 16452), INT16_C( 21427), -INT16_C( 28410) }, INT8_C( 6), { INT16_C( 21427), INT16_C( 21427), INT16_C( 21427), INT16_C( 21427), INT16_C( 21427), INT16_C( 21427), INT16_C( 21427), INT16_C( 21427) } }, { { -INT16_C( 13723), INT16_C( 15313), INT16_C( 26161), INT16_C( 5173), -INT16_C( 4800), INT16_C( 20793), INT16_C( 27964), INT16_C( 3289) }, INT8_C( 4), { -INT16_C( 4800), -INT16_C( 4800), -INT16_C( 4800), -INT16_C( 4800), -INT16_C( 4800), -INT16_C( 4800), -INT16_C( 4800), -INT16_C( 4800) } }, { { INT16_C( 17707), -INT16_C( 5780), -INT16_C( 4932), INT16_C( 30928), -INT16_C( 31829), -INT16_C( 19765), -INT16_C( 26092), -INT16_C( 8425) }, INT8_C( 3), { INT16_C( 30928), INT16_C( 30928), INT16_C( 30928), INT16_C( 30928), INT16_C( 30928), INT16_C( 30928), INT16_C( 30928), INT16_C( 30928) } }, { { INT16_C( 4179), -INT16_C( 30511), INT16_C( 4388), INT16_C( 24181), -INT16_C( 19869), INT16_C( 15563), INT16_C( 18366), INT16_C( 1127) }, INT8_C( 3), { INT16_C( 24181), INT16_C( 24181), INT16_C( 24181), INT16_C( 24181), INT16_C( 24181), INT16_C( 24181), INT16_C( 24181), INT16_C( 24181) } }, { { -INT16_C( 16304), INT16_C( 8351), INT16_C( 19257), INT16_C( 1188), -INT16_C( 18179), INT16_C( 5278), INT16_C( 2455), -INT16_C( 22681) }, INT8_C( 3), { INT16_C( 1188), INT16_C( 1188), INT16_C( 1188), INT16_C( 1188), INT16_C( 1188), INT16_C( 1188), INT16_C( 1188), INT16_C( 1188) } }, { { -INT16_C( 13072), INT16_C( 26092), INT16_C( 20266), -INT16_C( 2793), -INT16_C( 10612), -INT16_C( 3268), -INT16_C( 4134), -INT16_C( 26044) }, INT8_C( 6), { -INT16_C( 4134), -INT16_C( 4134), -INT16_C( 4134), -INT16_C( 4134), -INT16_C( 4134), -INT16_C( 4134), -INT16_C( 4134), -INT16_C( 4134) } }, { { -INT16_C( 11420), INT16_C( 2265), -INT16_C( 10536), INT16_C( 30401), INT16_C( 22763), INT16_C( 21120), INT16_C( 23296), -INT16_C( 13246) }, INT8_C( 7), { -INT16_C( 13246), -INT16_C( 13246), -INT16_C( 13246), -INT16_C( 13246), -INT16_C( 13246), -INT16_C( 13246), -INT16_C( 13246), -INT16_C( 13246) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t vec; simde_int16x8_t r; vec = simde_vld1q_s16(test_vec[i].vec); SIMDE_CONSTIFY_8_(simde_vdupq_laneq_s16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_s16(INT16_C(0))), test_vec[i].lane, vec); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t vec = simde_test_arm_neon_random_i16x8(); int lane = simde_test_codegen_random_i8() & 7; simde_int16x8_t r; SIMDE_CONSTIFY_8_(simde_vdupq_laneq_s16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_s16(INT8_C(0))), lane, vec); simde_test_arm_neon_write_i16x8(2, vec, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdupq_laneq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { int32_t vec[4]; int lane; int32_t r[4]; } test_vec[] = { { { -INT32_C( 520769284), INT32_C( 1690121648), INT32_C( 2059620000), -INT32_C( 175160556) }, INT8_C( 1), { INT32_C( 1690121648), INT32_C( 1690121648), INT32_C( 1690121648), INT32_C( 1690121648) } }, { { INT32_C( 1145289614), INT32_C( 1995152945), INT32_C( 1451618795), INT32_C( 1381122466) }, INT8_C( 2), { INT32_C( 1451618795), INT32_C( 1451618795), INT32_C( 1451618795), INT32_C( 1451618795) } }, { { -INT32_C( 278199502), INT32_C( 725486439), INT32_C( 342774405), INT32_C( 44179271) }, INT8_C( 2), { INT32_C( 342774405), INT32_C( 342774405), INT32_C( 342774405), INT32_C( 342774405) } }, { { -INT32_C( 771738649), INT32_C( 801369002), -INT32_C( 1804573119), -INT32_C( 1144609096) }, INT8_C( 2), { -INT32_C( 1804573119), -INT32_C( 1804573119), -INT32_C( 1804573119), -INT32_C( 1804573119) } }, { { -INT32_C( 199416906), -INT32_C( 1136221362), INT32_C( 1524338103), -INT32_C( 1019134577) }, INT8_C( 1), { -INT32_C( 1136221362), -INT32_C( 1136221362), -INT32_C( 1136221362), -INT32_C( 1136221362) } }, { { -INT32_C( 701993709), INT32_C( 205286044), -INT32_C( 994904578), -INT32_C( 746928976) }, INT8_C( 2), { -INT32_C( 994904578), -INT32_C( 994904578), -INT32_C( 994904578), -INT32_C( 994904578) } }, { { -INT32_C( 1266343570), -INT32_C( 1203684131), -INT32_C( 671755882), INT32_C( 32125588) }, INT8_C( 3), { INT32_C( 32125588), INT32_C( 32125588), INT32_C( 32125588), INT32_C( 32125588) } }, { { -INT32_C( 37380671), INT32_C( 1542636457), INT32_C( 35693447), -INT32_C( 1737489802) }, INT8_C( 3), { -INT32_C( 1737489802), -INT32_C( 1737489802), -INT32_C( 1737489802), -INT32_C( 1737489802) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t vec; simde_int32x4_t r; vec = simde_vld1q_s32(test_vec[i].vec); SIMDE_CONSTIFY_4_(simde_vdupq_laneq_s32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_s32(INT32_C(0))), test_vec[i].lane, vec); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t vec = simde_test_arm_neon_random_i32x4(); int lane = simde_test_codegen_random_i8() & 3; simde_int32x4_t r; SIMDE_CONSTIFY_4_(simde_vdupq_laneq_s32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_s32(INT32_C(0))), lane, vec); simde_test_arm_neon_write_i32x4(2, vec, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdupq_laneq_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { int64_t vec[2]; int lane; int64_t r[2]; } test_vec[] = { { { -INT64_C( 8011933124795626931), -INT64_C( 8152842150656023044) }, INT8_C( 0), { -INT64_C( 8011933124795626931), -INT64_C( 8011933124795626931) } }, { { INT64_C( 3685999757973111283), -INT64_C( 5386610080087765761) }, INT8_C( 1), { -INT64_C( 5386610080087765761), -INT64_C( 5386610080087765761) } }, { { -INT64_C( 2623861853923575070), INT64_C( 2913445673781549215) }, INT8_C( 1), { INT64_C( 2913445673781549215), INT64_C( 2913445673781549215) } }, { { INT64_C( 8091307841519511702), -INT64_C( 6617296750713834230) }, INT8_C( 0), { INT64_C( 8091307841519511702), INT64_C( 8091307841519511702) } }, { { INT64_C( 3479766334596526043), INT64_C( 4763859366259699991) }, INT8_C( 0), { INT64_C( 3479766334596526043), INT64_C( 3479766334596526043) } }, { { -INT64_C( 6333786973713146150), -INT64_C( 7553910520906202330) }, INT8_C( 0), { -INT64_C( 6333786973713146150), -INT64_C( 6333786973713146150) } }, { { -INT64_C( 6513945781748522339), -INT64_C( 7760827274264939690) }, INT8_C( 0), { -INT64_C( 6513945781748522339), -INT64_C( 6513945781748522339) } }, { { -INT64_C( 2932731163502779536), -INT64_C( 1373296300908956888) }, INT8_C( 1), { -INT64_C( 1373296300908956888), -INT64_C( 1373296300908956888) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t vec; simde_int64x2_t r; vec = simde_vld1q_s64(test_vec[i].vec); SIMDE_CONSTIFY_2_(simde_vdupq_laneq_s64, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_s64(INT64_C(0))), test_vec[i].lane, vec); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t vec = simde_test_arm_neon_random_i64x2(); int lane = simde_test_codegen_random_i8() & 1; simde_int64x2_t r; SIMDE_CONSTIFY_2_(simde_vdupq_laneq_s64, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_s64(INT64_C(0))), lane, vec); simde_test_arm_neon_write_i64x2(2, vec, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdupq_laneq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { uint8_t vec[16]; int lane; uint8_t r[16]; } test_vec[] = { { { UINT8_C(217), UINT8_C(190), UINT8_C(155), UINT8_C(115), UINT8_C(100), UINT8_C(241), UINT8_C(230), UINT8_C(170), UINT8_C( 99), UINT8_C(156), UINT8_C(170), UINT8_C(175), UINT8_C( 49), UINT8_C(127), UINT8_C( 31), UINT8_C( 20) }, UINT8_C( 1), { UINT8_C(190), UINT8_C(190), UINT8_C(190), UINT8_C(190), UINT8_C(190), UINT8_C(190), UINT8_C(190), UINT8_C(190), UINT8_C(190), UINT8_C(190), UINT8_C(190), UINT8_C(190), UINT8_C(190), UINT8_C(190), UINT8_C(190), UINT8_C(190) } }, { { UINT8_C(168), UINT8_C(159), UINT8_C( 10), UINT8_C(244), UINT8_C(119), UINT8_C( 50), UINT8_C(168), UINT8_C(109), UINT8_C(134), UINT8_C(242), UINT8_C(127), UINT8_C(120), UINT8_C(223), UINT8_C( 74), UINT8_C( 81), UINT8_C(157) }, UINT8_C( 5), { UINT8_C( 50), UINT8_C( 50), UINT8_C( 50), UINT8_C( 50), UINT8_C( 50), UINT8_C( 50), UINT8_C( 50), UINT8_C( 50), UINT8_C( 50), UINT8_C( 50), UINT8_C( 50), UINT8_C( 50), UINT8_C( 50), UINT8_C( 50), UINT8_C( 50), UINT8_C( 50) } }, { { UINT8_C(196), UINT8_C( 1), UINT8_C(215), UINT8_C(171), UINT8_C(171), UINT8_C( 58), UINT8_C( 71), UINT8_C( 86), UINT8_C(234), UINT8_C(120), UINT8_C(213), UINT8_C( 9), UINT8_C(140), UINT8_C( 6), UINT8_C(178), UINT8_C( 44) }, UINT8_C( 0), { UINT8_C(196), UINT8_C(196), UINT8_C(196), UINT8_C(196), UINT8_C(196), UINT8_C(196), UINT8_C(196), UINT8_C(196), UINT8_C(196), UINT8_C(196), UINT8_C(196), UINT8_C(196), UINT8_C(196), UINT8_C(196), UINT8_C(196), UINT8_C(196) } }, { { UINT8_C(166), UINT8_C(163), UINT8_C( 67), UINT8_C( 78), UINT8_C( 16), UINT8_C(201), UINT8_C( 65), UINT8_C(143), UINT8_C( 65), UINT8_C( 32), UINT8_C(217), UINT8_C(147), UINT8_C(189), UINT8_C(190), UINT8_C( 87), UINT8_C(191) }, UINT8_C( 5), { UINT8_C(201), UINT8_C(201), UINT8_C(201), UINT8_C(201), UINT8_C(201), UINT8_C(201), UINT8_C(201), UINT8_C(201), UINT8_C(201), UINT8_C(201), UINT8_C(201), UINT8_C(201), UINT8_C(201), UINT8_C(201), UINT8_C(201), UINT8_C(201) } }, { { UINT8_C( 2), UINT8_C(106), UINT8_C(208), UINT8_C( 74), UINT8_C(192), UINT8_C(186), UINT8_C(194), UINT8_C(149), UINT8_C(195), UINT8_C( 79), UINT8_C(156), UINT8_C(117), UINT8_C(123), UINT8_C(172), UINT8_C( 28), UINT8_C( 30) }, UINT8_C( 15), { UINT8_C( 30), UINT8_C( 30), UINT8_C( 30), UINT8_C( 30), UINT8_C( 30), UINT8_C( 30), UINT8_C( 30), UINT8_C( 30), UINT8_C( 30), UINT8_C( 30), UINT8_C( 30), UINT8_C( 30), UINT8_C( 30), UINT8_C( 30), UINT8_C( 30), UINT8_C( 30) } }, { { UINT8_C(106), UINT8_C( 46), UINT8_C(185), UINT8_C(171), UINT8_C(189), UINT8_C(250), UINT8_C(203), UINT8_C(150), UINT8_C(141), UINT8_C(137), UINT8_C( 84), UINT8_C(229), UINT8_C( 72), UINT8_C(234), UINT8_C(231), UINT8_C(178) }, UINT8_C( 10), { UINT8_C( 84), UINT8_C( 84), UINT8_C( 84), UINT8_C( 84), UINT8_C( 84), UINT8_C( 84), UINT8_C( 84), UINT8_C( 84), UINT8_C( 84), UINT8_C( 84), UINT8_C( 84), UINT8_C( 84), UINT8_C( 84), UINT8_C( 84), UINT8_C( 84), UINT8_C( 84) } }, { { UINT8_C( 49), UINT8_C(115), UINT8_C(116), UINT8_C(244), UINT8_C( 8), UINT8_C( 55), UINT8_C( 67), UINT8_C(164), UINT8_C(173), UINT8_C(190), UINT8_C( 81), UINT8_C(201), UINT8_C(220), UINT8_C( 64), UINT8_C( 51), UINT8_C( 10) }, UINT8_C( 9), { UINT8_C(190), UINT8_C(190), UINT8_C(190), UINT8_C(190), UINT8_C(190), UINT8_C(190), UINT8_C(190), UINT8_C(190), UINT8_C(190), UINT8_C(190), UINT8_C(190), UINT8_C(190), UINT8_C(190), UINT8_C(190), UINT8_C(190), UINT8_C(190) } }, { { UINT8_C(223), UINT8_C(199), UINT8_C(244), UINT8_C(170), UINT8_C( 93), UINT8_C(129), UINT8_C( 51), UINT8_C(177), UINT8_C(102), UINT8_C(123), UINT8_C(155), UINT8_C( 78), UINT8_C( 46), UINT8_C( 85), UINT8_C(127), UINT8_C(161) }, UINT8_C( 9), { UINT8_C(123), UINT8_C(123), UINT8_C(123), UINT8_C(123), UINT8_C(123), UINT8_C(123), UINT8_C(123), UINT8_C(123), UINT8_C(123), UINT8_C(123), UINT8_C(123), UINT8_C(123), UINT8_C(123), UINT8_C(123), UINT8_C(123), UINT8_C(123) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t vec; simde_uint8x16_t r; vec = simde_vld1q_u8(test_vec[i].vec); SIMDE_CONSTIFY_16_(simde_vdupq_laneq_u8, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_u8(UINT8_C(0))), test_vec[i].lane, vec); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t vec = simde_test_arm_neon_random_u8x16(); int lane = simde_test_codegen_random_u8() & 15; simde_uint8x16_t r; SIMDE_CONSTIFY_16_(simde_vdupq_laneq_u8, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_u8(UINT8_C(0))), lane, vec); simde_test_arm_neon_write_u8x16(2, vec, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u8(2, HEDLEY_STATIC_CAST(uint8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdupq_laneq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { uint16_t vec[8]; int lane; uint16_t r[8]; } test_vec[] = { { { UINT16_C(12576), UINT16_C(38673), UINT16_C(17857), UINT16_C(51906), UINT16_C(60712), UINT16_C(20918), UINT16_C(54013), UINT16_C(47959) }, UINT8_C( 1), { UINT16_C(38673), UINT16_C(38673), UINT16_C(38673), UINT16_C(38673), UINT16_C(38673), UINT16_C(38673), UINT16_C(38673), UINT16_C(38673) } }, { { UINT16_C(58637), UINT16_C(48422), UINT16_C(47041), UINT16_C(59773), UINT16_C(22510), UINT16_C(58503), UINT16_C(20626), UINT16_C(49924) }, UINT8_C( 2), { UINT16_C(47041), UINT16_C(47041), UINT16_C(47041), UINT16_C(47041), UINT16_C(47041), UINT16_C(47041), UINT16_C(47041), UINT16_C(47041) } }, { { UINT16_C(33947), UINT16_C(24231), UINT16_C(53326), UINT16_C( 1099), UINT16_C(18465), UINT16_C(31190), UINT16_C(53251), UINT16_C(59782) }, UINT8_C( 6), { UINT16_C(53251), UINT16_C(53251), UINT16_C(53251), UINT16_C(53251), UINT16_C(53251), UINT16_C(53251), UINT16_C(53251), UINT16_C(53251) } }, { { UINT16_C(43587), UINT16_C(49582), UINT16_C(40083), UINT16_C( 6680), UINT16_C(43648), UINT16_C(34154), UINT16_C(52334), UINT16_C(61984) }, UINT8_C( 4), { UINT16_C(43648), UINT16_C(43648), UINT16_C(43648), UINT16_C(43648), UINT16_C(43648), UINT16_C(43648), UINT16_C(43648), UINT16_C(43648) } }, { { UINT16_C(16766), UINT16_C(51524), UINT16_C(25925), UINT16_C( 7185), UINT16_C( 5598), UINT16_C(26092), UINT16_C(58110), UINT16_C(43176) }, UINT8_C( 0), { UINT16_C(16766), UINT16_C(16766), UINT16_C(16766), UINT16_C(16766), UINT16_C(16766), UINT16_C(16766), UINT16_C(16766), UINT16_C(16766) } }, { { UINT16_C(15209), UINT16_C(33325), UINT16_C(44373), UINT16_C(48940), UINT16_C(39474), UINT16_C(21388), UINT16_C( 141), UINT16_C(52945) }, UINT8_C( 4), { UINT16_C(39474), UINT16_C(39474), UINT16_C(39474), UINT16_C(39474), UINT16_C(39474), UINT16_C(39474), UINT16_C(39474), UINT16_C(39474) } }, { { UINT16_C( 5019), UINT16_C(44201), UINT16_C(34863), UINT16_C( 7105), UINT16_C(49133), UINT16_C(38398), UINT16_C(36455), UINT16_C(41727) }, UINT8_C( 3), { UINT16_C( 7105), UINT16_C( 7105), UINT16_C( 7105), UINT16_C( 7105), UINT16_C( 7105), UINT16_C( 7105), UINT16_C( 7105), UINT16_C( 7105) } }, { { UINT16_C(63361), UINT16_C(44393), UINT16_C(39863), UINT16_C(17224), UINT16_C(54766), UINT16_C(49219), UINT16_C(34723), UINT16_C(46683) }, UINT8_C( 0), { UINT16_C(63361), UINT16_C(63361), UINT16_C(63361), UINT16_C(63361), UINT16_C(63361), UINT16_C(63361), UINT16_C(63361), UINT16_C(63361) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t vec; simde_uint16x8_t r; vec = simde_vld1q_u16(test_vec[i].vec); SIMDE_CONSTIFY_8_(simde_vdupq_laneq_u16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_u16(UINT16_C(0))), test_vec[i].lane, vec); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t vec = simde_test_arm_neon_random_u16x8(); int lane = simde_test_codegen_random_u8() & 7; simde_uint16x8_t r; SIMDE_CONSTIFY_8_(simde_vdupq_laneq_u16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_u16(UINT16_C(0))), lane, vec); simde_test_arm_neon_write_u16x8(2, vec, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u8(2, HEDLEY_STATIC_CAST(uint8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdupq_laneq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { uint32_t vec[4]; int lane; uint32_t r[4]; } test_vec[] = { { { UINT32_C(3384337927), UINT32_C(4287145217), UINT32_C( 982446139), UINT32_C(2327529874) }, UINT8_C( 2), { UINT32_C( 982446139), UINT32_C( 982446139), UINT32_C( 982446139), UINT32_C( 982446139) } }, { { UINT32_C(2957918568), UINT32_C(3347397764), UINT32_C(1464740092), UINT32_C(3311369951) }, UINT8_C( 3), { UINT32_C(3311369951), UINT32_C(3311369951), UINT32_C(3311369951), UINT32_C(3311369951) } }, { { UINT32_C(2967258664), UINT32_C(1419777990), UINT32_C( 211628881), UINT32_C(4269101245) }, UINT8_C( 2), { UINT32_C( 211628881), UINT32_C( 211628881), UINT32_C( 211628881), UINT32_C( 211628881) } }, { { UINT32_C(2883224101), UINT32_C(2547242825), UINT32_C(2367009326), UINT32_C(1052068983) }, UINT8_C( 1), { UINT32_C(2547242825), UINT32_C(2547242825), UINT32_C(2547242825), UINT32_C(2547242825) } }, { { UINT32_C( 104858726), UINT32_C(4114190936), UINT32_C( 323352222), UINT32_C(1983505396) }, UINT8_C( 2), { UINT32_C( 323352222), UINT32_C( 323352222), UINT32_C( 323352222), UINT32_C( 323352222) } }, { { UINT32_C(3080044516), UINT32_C(1818936150), UINT32_C( 112779601), UINT32_C( 594338079) }, UINT8_C( 2), { UINT32_C( 112779601), UINT32_C( 112779601), UINT32_C( 112779601), UINT32_C( 112779601) } }, { { UINT32_C(2897509235), UINT32_C(3064156785), UINT32_C(2677708646), UINT32_C(3447937038) }, UINT8_C( 1), { UINT32_C(3064156785), UINT32_C(3064156785), UINT32_C(3064156785), UINT32_C(3064156785) } }, { { UINT32_C(2763007034), UINT32_C(1216741776), UINT32_C(1948951816), UINT32_C(1156009161) }, UINT8_C( 0), { UINT32_C(2763007034), UINT32_C(2763007034), UINT32_C(2763007034), UINT32_C(2763007034) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t vec; simde_uint32x4_t r; vec = simde_vld1q_u32(test_vec[i].vec); SIMDE_CONSTIFY_4_(simde_vdupq_laneq_u32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_u32(UINT32_C(0))), test_vec[i].lane, vec); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t vec = simde_test_arm_neon_random_u32x4(); int lane = simde_test_codegen_random_u8() & 3; simde_uint32x4_t r; SIMDE_CONSTIFY_4_(simde_vdupq_laneq_u32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_u32(UINT32_C(0))), lane, vec); simde_test_arm_neon_write_u32x4(2, vec, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u8(2, HEDLEY_STATIC_CAST(uint8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vdupq_laneq_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { uint64_t vec[2]; int lane; uint64_t r[2]; } test_vec[] = { { { UINT64_C( 490532184354108820), UINT64_C(14849858543504251991) }, UINT8_C( 1), { UINT64_C(14849858543504251991), UINT64_C(14849858543504251991) } }, { { UINT64_C(15052443541152030393), UINT64_C(12174668995002936953) }, UINT8_C( 1), { UINT64_C(12174668995002936953), UINT64_C(12174668995002936953) } }, { { UINT64_C( 8707568158386885676), UINT64_C(12477432015999959385) }, UINT8_C( 1), { UINT64_C(12477432015999959385), UINT64_C(12477432015999959385) } }, { { UINT64_C( 4682118690185958247), UINT64_C(12230875630775692699) }, UINT8_C( 1), { UINT64_C(12230875630775692699), UINT64_C(12230875630775692699) } }, { { UINT64_C(18306794091455890358), UINT64_C( 6830403770862296378) }, UINT8_C( 0), { UINT64_C(18306794091455890358), UINT64_C(18306794091455890358) } }, { { UINT64_C( 2305295186427806230), UINT64_C(18379435529598636957) }, UINT8_C( 1), { UINT64_C(18379435529598636957), UINT64_C(18379435529598636957) } }, { { UINT64_C(17584683220473821853), UINT64_C(17625863477911098554) }, UINT8_C( 1), { UINT64_C(17625863477911098554), UINT64_C(17625863477911098554) } }, { { UINT64_C(13924499850132829868), UINT64_C(12883094807028726044) }, UINT8_C( 1), { UINT64_C(12883094807028726044), UINT64_C(12883094807028726044) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t vec; simde_uint64x2_t r; vec = simde_vld1q_u64(test_vec[i].vec); SIMDE_CONSTIFY_2_(simde_vdupq_laneq_u64, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_u64(UINT64_C(0))), test_vec[i].lane, vec); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_t vec = simde_test_arm_neon_random_u64x2(); int lane = simde_test_codegen_random_u8() & 1; simde_uint64x2_t r; SIMDE_CONSTIFY_2_(simde_vdupq_laneq_u64, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_u64(UINT64_C(0))), lane, vec); simde_test_arm_neon_write_u64x2(2, vec, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u8(2, HEDLEY_STATIC_CAST(uint8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } HEDLEY_DIAGNOSTIC_POP SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vdup_lane_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_lane_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_lane_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_lane_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_lane_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_lane_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_lane_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_lane_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_lane_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_lane_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_laneq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_laneq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_laneq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_laneq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_laneq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_laneq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_laneq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_laneq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_laneq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_laneq_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vdupq_laneq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vdupq_laneq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vdupq_laneq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vdupq_laneq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vdupq_laneq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vdupq_laneq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vdupq_laneq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vdupq_laneq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vdupq_laneq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vdupq_laneq_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/dup_n.c000066400000000000000000000725561400333146700166070ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN dup_n #include "test-neon.h" #include "../../../simde/arm/neon/dup_n.h" static int test_simde_vdup_n_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a; simde_float32 r[2]; } test_vec[] = { { SIMDE_FLOAT32_C( 891.26), { SIMDE_FLOAT32_C( 891.26), SIMDE_FLOAT32_C( 891.26) } }, { SIMDE_FLOAT32_C( -212.41), { SIMDE_FLOAT32_C( -212.41), SIMDE_FLOAT32_C( -212.41) } }, { SIMDE_FLOAT32_C( 467.72), { SIMDE_FLOAT32_C( 467.72), SIMDE_FLOAT32_C( 467.72) } }, { SIMDE_FLOAT32_C( -459.92), { SIMDE_FLOAT32_C( -459.92), SIMDE_FLOAT32_C( -459.92) } }, { SIMDE_FLOAT32_C( 178.65), { SIMDE_FLOAT32_C( 178.65), SIMDE_FLOAT32_C( 178.65) } }, { SIMDE_FLOAT32_C( 923.96), { SIMDE_FLOAT32_C( 923.96), SIMDE_FLOAT32_C( 923.96) } }, { SIMDE_FLOAT32_C( -720.22), { SIMDE_FLOAT32_C( -720.22), SIMDE_FLOAT32_C( -720.22) } }, { SIMDE_FLOAT32_C( -919.57), { SIMDE_FLOAT32_C( -919.57), SIMDE_FLOAT32_C( -919.57) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32 a; simde_float32x2_t r; a = test_vec[i].a; r = simde_vdup_n_f32(a); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vdup_n_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { double a; simde_float64 r[1]; } test_vec[] = { { SIMDE_FLOAT64_C( -118.53), { SIMDE_FLOAT64_C( -118.53) } }, { SIMDE_FLOAT64_C( -542.12), { SIMDE_FLOAT64_C( -542.12) } }, { SIMDE_FLOAT64_C( -633.36), { SIMDE_FLOAT64_C( -633.36) } }, { SIMDE_FLOAT64_C( -497.58), { SIMDE_FLOAT64_C( -497.58) } }, { SIMDE_FLOAT64_C( 313.94), { SIMDE_FLOAT64_C( 313.94) } }, { SIMDE_FLOAT64_C( 244.08), { SIMDE_FLOAT64_C( 244.08) } }, { SIMDE_FLOAT64_C( 138.92), { SIMDE_FLOAT64_C( 138.92) } }, { SIMDE_FLOAT64_C( -906.24), { SIMDE_FLOAT64_C( -906.24) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64 a = test_vec[i].a; simde_float64x1_t r = simde_vdup_n_f64(a); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; } static int test_simde_vdup_n_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a; int8_t r[8]; } test_vec[] = { { -INT8_C( 125), { -INT8_C( 125), -INT8_C( 125), -INT8_C( 125), -INT8_C( 125), -INT8_C( 125), -INT8_C( 125), -INT8_C( 125), -INT8_C( 125) } }, { INT8_C( 51), { INT8_C( 51), INT8_C( 51), INT8_C( 51), INT8_C( 51), INT8_C( 51), INT8_C( 51), INT8_C( 51), INT8_C( 51) } }, { -INT8_C( 121), { -INT8_C( 121), -INT8_C( 121), -INT8_C( 121), -INT8_C( 121), -INT8_C( 121), -INT8_C( 121), -INT8_C( 121), -INT8_C( 121) } }, { -INT8_C( 82), { -INT8_C( 82), -INT8_C( 82), -INT8_C( 82), -INT8_C( 82), -INT8_C( 82), -INT8_C( 82), -INT8_C( 82), -INT8_C( 82) } }, { -INT8_C( 27), { -INT8_C( 27), -INT8_C( 27), -INT8_C( 27), -INT8_C( 27), -INT8_C( 27), -INT8_C( 27), -INT8_C( 27), -INT8_C( 27) } }, { -INT8_C( 6), { -INT8_C( 6), -INT8_C( 6), -INT8_C( 6), -INT8_C( 6), -INT8_C( 6), -INT8_C( 6), -INT8_C( 6), -INT8_C( 6) } }, { -INT8_C( 22), { -INT8_C( 22), -INT8_C( 22), -INT8_C( 22), -INT8_C( 22), -INT8_C( 22), -INT8_C( 22), -INT8_C( 22), -INT8_C( 22) } }, { INT8_C( 103), { INT8_C( 103), INT8_C( 103), INT8_C( 103), INT8_C( 103), INT8_C( 103), INT8_C( 103), INT8_C( 103), INT8_C( 103) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int8_t a = test_vec[i].a; simde_int8x8_t r = simde_vdup_n_s8(a); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; } static int test_simde_vdup_n_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a; int16_t r[4]; } test_vec[] = { { INT16_C( 14470), { INT16_C( 14470), INT16_C( 14470), INT16_C( 14470), INT16_C( 14470) } }, { -INT16_C( 25891), { -INT16_C( 25891), -INT16_C( 25891), -INT16_C( 25891), -INT16_C( 25891) } }, { -INT16_C( 19194), { -INT16_C( 19194), -INT16_C( 19194), -INT16_C( 19194), -INT16_C( 19194) } }, { INT16_C( 23205), { INT16_C( 23205), INT16_C( 23205), INT16_C( 23205), INT16_C( 23205) } }, { -INT16_C( 2042), { -INT16_C( 2042), -INT16_C( 2042), -INT16_C( 2042), -INT16_C( 2042) } }, { INT16_C( 18383), { INT16_C( 18383), INT16_C( 18383), INT16_C( 18383), INT16_C( 18383) } }, { INT16_C( 31162), { INT16_C( 31162), INT16_C( 31162), INT16_C( 31162), INT16_C( 31162) } }, { INT16_C( 13153), { INT16_C( 13153), INT16_C( 13153), INT16_C( 13153), INT16_C( 13153) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int16_t a = test_vec[i].a; simde_int16x4_t r = simde_vdup_n_s16(a); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; } static int test_simde_vdup_n_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a; int32_t r[2]; } test_vec[] = { { -INT32_C( 1024619355), { -INT32_C( 1024619355), -INT32_C( 1024619355) } }, { -INT32_C( 1215280205), { -INT32_C( 1215280205), -INT32_C( 1215280205) } }, { INT32_C( 1111759973), { INT32_C( 1111759973), INT32_C( 1111759973) } }, { -INT32_C( 1199775095), { -INT32_C( 1199775095), -INT32_C( 1199775095) } }, { INT32_C( 633784935), { INT32_C( 633784935), INT32_C( 633784935) } }, { -INT32_C( 1048329552), { -INT32_C( 1048329552), -INT32_C( 1048329552) } }, { -INT32_C( 1215802644), { -INT32_C( 1215802644), -INT32_C( 1215802644) } }, { INT32_C( 357141359), { INT32_C( 357141359), INT32_C( 357141359) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int32_t a = test_vec[i].a; simde_int32x2_t r = simde_vdup_n_s32(a); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; } static int test_simde_vdup_n_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a; int64_t r[1]; } test_vec[] = { { -INT64_C( 5115005979396968606), { -INT64_C( 5115005979396968606) } }, { INT64_C( 7983384083745204187), { INT64_C( 7983384083745204187) } }, { -INT64_C( 1545395142558030869), { -INT64_C( 1545395142558030869) } }, { -INT64_C( 8687466611181760880), { -INT64_C( 8687466611181760880) } }, { INT64_C( 3332119824696179291), { INT64_C( 3332119824696179291) } }, { -INT64_C( 365712898301861384), { -INT64_C( 365712898301861384) } }, { INT64_C( 6545642081325163422), { INT64_C( 6545642081325163422) } }, { -INT64_C( 8195148122753775720), { -INT64_C( 8195148122753775720) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int64_t a = test_vec[i].a; simde_int64x1_t r = simde_vdup_n_s64(a); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; } static int test_simde_vdup_n_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a; uint8_t r[16]; } test_vec[] = { { UINT8_C( 70), { UINT8_C( 70), UINT8_C( 70), UINT8_C( 70), UINT8_C( 70), UINT8_C( 70), UINT8_C( 70), UINT8_C( 70), UINT8_C( 70) } }, { UINT8_C( 47), { UINT8_C( 47), UINT8_C( 47), UINT8_C( 47), UINT8_C( 47), UINT8_C( 47), UINT8_C( 47), UINT8_C( 47), UINT8_C( 47) } }, { UINT8_C(150), { UINT8_C(150), UINT8_C(150), UINT8_C(150), UINT8_C(150), UINT8_C(150), UINT8_C(150), UINT8_C(150), UINT8_C(150) } }, { UINT8_C( 51), { UINT8_C( 51), UINT8_C( 51), UINT8_C( 51), UINT8_C( 51), UINT8_C( 51), UINT8_C( 51), UINT8_C( 51), UINT8_C( 51) } }, { UINT8_C(213), { UINT8_C(213), UINT8_C(213), UINT8_C(213), UINT8_C(213), UINT8_C(213), UINT8_C(213), UINT8_C(213), UINT8_C(213) } }, { UINT8_C(253), { UINT8_C(253), UINT8_C(253), UINT8_C(253), UINT8_C(253), UINT8_C(253), UINT8_C(253), UINT8_C(253), UINT8_C(253) } }, { UINT8_C( 68), { UINT8_C( 68), UINT8_C( 68), UINT8_C( 68), UINT8_C( 68), UINT8_C( 68), UINT8_C( 68), UINT8_C( 68), UINT8_C( 68) } }, { UINT8_C( 71), { UINT8_C( 71), UINT8_C( 71), UINT8_C( 71), UINT8_C( 71), UINT8_C( 71), UINT8_C( 71), UINT8_C( 71), UINT8_C( 71) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint8_t a = test_vec[i].a; simde_uint8x8_t r = simde_vdup_n_u8(a); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; } static int test_simde_vdup_n_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a; uint16_t r[4]; } test_vec[] = { { UINT16_C(51628), { UINT16_C(51628), UINT16_C(51628), UINT16_C(51628), UINT16_C(51628) } }, { UINT16_C(19470), { UINT16_C(19470), UINT16_C(19470), UINT16_C(19470), UINT16_C(19470) } }, { UINT16_C(18389), { UINT16_C(18389), UINT16_C(18389), UINT16_C(18389), UINT16_C(18389) } }, { UINT16_C( 116), { UINT16_C( 116), UINT16_C( 116), UINT16_C( 116), UINT16_C( 116) } }, { UINT16_C(44704), { UINT16_C(44704), UINT16_C(44704), UINT16_C(44704), UINT16_C(44704) } }, { UINT16_C(37574), { UINT16_C(37574), UINT16_C(37574), UINT16_C(37574), UINT16_C(37574) } }, { UINT16_C(18426), { UINT16_C(18426), UINT16_C(18426), UINT16_C(18426), UINT16_C(18426) } }, { UINT16_C(63921), { UINT16_C(63921), UINT16_C(63921), UINT16_C(63921), UINT16_C(63921) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint16_t a = test_vec[i].a; simde_uint16x4_t r = simde_vdup_n_u16(a); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; } static int test_simde_vdup_n_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a; uint32_t r[2]; } test_vec[] = { { UINT32_C(3794667895), { UINT32_C(3794667895), UINT32_C(3794667895) } }, { UINT32_C(1726335693), { UINT32_C(1726335693), UINT32_C(1726335693) } }, { UINT32_C(2983766046), { UINT32_C(2983766046), UINT32_C(2983766046) } }, { UINT32_C(3005127747), { UINT32_C(3005127747), UINT32_C(3005127747) } }, { UINT32_C(2102894648), { UINT32_C(2102894648), UINT32_C(2102894648) } }, { UINT32_C(3216998892), { UINT32_C(3216998892), UINT32_C(3216998892) } }, { UINT32_C( 828608890), { UINT32_C( 828608890), UINT32_C( 828608890) } }, { UINT32_C( 884825021), { UINT32_C( 884825021), UINT32_C( 884825021) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint32_t a = test_vec[i].a; simde_uint32x2_t r = simde_vdup_n_u32(a); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; } static int test_simde_vdup_n_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a; uint64_t r[1]; } test_vec[] = { { UINT64_C(13339791427449258007), { UINT64_C(13339791427449258007) } }, { UINT64_C( 7070510947546969075), { UINT64_C( 7070510947546969075) } }, { UINT64_C( 8999127261392807922), { UINT64_C( 8999127261392807922) } }, { UINT64_C(12929560324791906007), { UINT64_C(12929560324791906007) } }, { UINT64_C(14006218822742530566), { UINT64_C(14006218822742530566) } }, { UINT64_C(16134641465453407304), { UINT64_C(16134641465453407304) } }, { UINT64_C(17146709105050043369), { UINT64_C(17146709105050043369) } }, { UINT64_C( 3988202661257292296), { UINT64_C( 3988202661257292296) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint64_t a = test_vec[i].a; simde_uint64x1_t r = simde_vdup_n_u64(a); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; } static int test_simde_vdupq_n_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a; simde_float32 r[4]; } test_vec[] = { { SIMDE_FLOAT32_C( -253.37), { SIMDE_FLOAT32_C( -253.37), SIMDE_FLOAT32_C( -253.37), SIMDE_FLOAT32_C( -253.37), SIMDE_FLOAT32_C( -253.37) } }, { SIMDE_FLOAT32_C( 678.81), { SIMDE_FLOAT32_C( 678.81), SIMDE_FLOAT32_C( 678.81), SIMDE_FLOAT32_C( 678.81), SIMDE_FLOAT32_C( 678.81) } }, { SIMDE_FLOAT32_C( 287.86), { SIMDE_FLOAT32_C( 287.86), SIMDE_FLOAT32_C( 287.86), SIMDE_FLOAT32_C( 287.86), SIMDE_FLOAT32_C( 287.86) } }, { SIMDE_FLOAT32_C( -23.55), { SIMDE_FLOAT32_C( -23.55), SIMDE_FLOAT32_C( -23.55), SIMDE_FLOAT32_C( -23.55), SIMDE_FLOAT32_C( -23.55) } }, { SIMDE_FLOAT32_C( 255.34), { SIMDE_FLOAT32_C( 255.34), SIMDE_FLOAT32_C( 255.34), SIMDE_FLOAT32_C( 255.34), SIMDE_FLOAT32_C( 255.34) } }, { SIMDE_FLOAT32_C( -331.64), { SIMDE_FLOAT32_C( -331.64), SIMDE_FLOAT32_C( -331.64), SIMDE_FLOAT32_C( -331.64), SIMDE_FLOAT32_C( -331.64) } }, { SIMDE_FLOAT32_C( 507.10), { SIMDE_FLOAT32_C( 507.10), SIMDE_FLOAT32_C( 507.10), SIMDE_FLOAT32_C( 507.10), SIMDE_FLOAT32_C( 507.10) } }, { SIMDE_FLOAT32_C( 555.78), { SIMDE_FLOAT32_C( 555.78), SIMDE_FLOAT32_C( 555.78), SIMDE_FLOAT32_C( 555.78), SIMDE_FLOAT32_C( 555.78) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32 a = test_vec[i].a; simde_float32x4_t r = simde_vdupq_n_f32(a); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vdupq_n_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a; simde_float64 r[2]; } test_vec[] = { { SIMDE_FLOAT64_C( -118.41), { SIMDE_FLOAT64_C( -118.41), SIMDE_FLOAT64_C( -118.41) } }, { SIMDE_FLOAT64_C( -822.94), { SIMDE_FLOAT64_C( -822.94), SIMDE_FLOAT64_C( -822.94) } }, { SIMDE_FLOAT64_C( -264.09), { SIMDE_FLOAT64_C( -264.09), SIMDE_FLOAT64_C( -264.09) } }, { SIMDE_FLOAT64_C( -893.94), { SIMDE_FLOAT64_C( -893.94), SIMDE_FLOAT64_C( -893.94) } }, { SIMDE_FLOAT64_C( 267.56), { SIMDE_FLOAT64_C( 267.56), SIMDE_FLOAT64_C( 267.56) } }, { SIMDE_FLOAT64_C( -972.95), { SIMDE_FLOAT64_C( -972.95), SIMDE_FLOAT64_C( -972.95) } }, { SIMDE_FLOAT64_C( -132.42), { SIMDE_FLOAT64_C( -132.42), SIMDE_FLOAT64_C( -132.42) } }, { SIMDE_FLOAT64_C( -34.49), { SIMDE_FLOAT64_C( -34.49), SIMDE_FLOAT64_C( -34.49) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64 a = test_vec[i].a; simde_float64x2_t r = simde_vdupq_n_f64(a); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; } static int test_simde_vdupq_n_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a; int8_t r[16]; } test_vec[] = { { -INT8_C( 88), { -INT8_C( 88), -INT8_C( 88), -INT8_C( 88), -INT8_C( 88), -INT8_C( 88), -INT8_C( 88), -INT8_C( 88), -INT8_C( 88), -INT8_C( 88), -INT8_C( 88), -INT8_C( 88), -INT8_C( 88), -INT8_C( 88), -INT8_C( 88), -INT8_C( 88), -INT8_C( 88) } }, { INT8_C( 76), { INT8_C( 76), INT8_C( 76), INT8_C( 76), INT8_C( 76), INT8_C( 76), INT8_C( 76), INT8_C( 76), INT8_C( 76), INT8_C( 76), INT8_C( 76), INT8_C( 76), INT8_C( 76), INT8_C( 76), INT8_C( 76), INT8_C( 76), INT8_C( 76) } }, { -INT8_C( 22), { -INT8_C( 22), -INT8_C( 22), -INT8_C( 22), -INT8_C( 22), -INT8_C( 22), -INT8_C( 22), -INT8_C( 22), -INT8_C( 22), -INT8_C( 22), -INT8_C( 22), -INT8_C( 22), -INT8_C( 22), -INT8_C( 22), -INT8_C( 22), -INT8_C( 22), -INT8_C( 22) } }, { -INT8_C( 37), { -INT8_C( 37), -INT8_C( 37), -INT8_C( 37), -INT8_C( 37), -INT8_C( 37), -INT8_C( 37), -INT8_C( 37), -INT8_C( 37), -INT8_C( 37), -INT8_C( 37), -INT8_C( 37), -INT8_C( 37), -INT8_C( 37), -INT8_C( 37), -INT8_C( 37), -INT8_C( 37) } }, { -INT8_C( 58), { -INT8_C( 58), -INT8_C( 58), -INT8_C( 58), -INT8_C( 58), -INT8_C( 58), -INT8_C( 58), -INT8_C( 58), -INT8_C( 58), -INT8_C( 58), -INT8_C( 58), -INT8_C( 58), -INT8_C( 58), -INT8_C( 58), -INT8_C( 58), -INT8_C( 58), -INT8_C( 58) } }, { INT8_C( 5), { INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5) } }, { INT8_C( 39), { INT8_C( 39), INT8_C( 39), INT8_C( 39), INT8_C( 39), INT8_C( 39), INT8_C( 39), INT8_C( 39), INT8_C( 39), INT8_C( 39), INT8_C( 39), INT8_C( 39), INT8_C( 39), INT8_C( 39), INT8_C( 39), INT8_C( 39), INT8_C( 39) } }, { -INT8_C( 90), { -INT8_C( 90), -INT8_C( 90), -INT8_C( 90), -INT8_C( 90), -INT8_C( 90), -INT8_C( 90), -INT8_C( 90), -INT8_C( 90), -INT8_C( 90), -INT8_C( 90), -INT8_C( 90), -INT8_C( 90), -INT8_C( 90), -INT8_C( 90), -INT8_C( 90), -INT8_C( 90) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int8_t a = test_vec[i].a; simde_int8x16_t r = simde_vdupq_n_s8(a); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; } static int test_simde_vdupq_n_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a; int16_t r[8]; } test_vec[] = { { -INT16_C( 12855), { -INT16_C( 12855), -INT16_C( 12855), -INT16_C( 12855), -INT16_C( 12855), -INT16_C( 12855), -INT16_C( 12855), -INT16_C( 12855), -INT16_C( 12855) } }, { -INT16_C( 7137), { -INT16_C( 7137), -INT16_C( 7137), -INT16_C( 7137), -INT16_C( 7137), -INT16_C( 7137), -INT16_C( 7137), -INT16_C( 7137), -INT16_C( 7137) } }, { -INT16_C( 1466), { -INT16_C( 1466), -INT16_C( 1466), -INT16_C( 1466), -INT16_C( 1466), -INT16_C( 1466), -INT16_C( 1466), -INT16_C( 1466), -INT16_C( 1466) } }, { -INT16_C( 32577), { -INT16_C( 32577), -INT16_C( 32577), -INT16_C( 32577), -INT16_C( 32577), -INT16_C( 32577), -INT16_C( 32577), -INT16_C( 32577), -INT16_C( 32577) } }, { INT16_C( 31960), { INT16_C( 31960), INT16_C( 31960), INT16_C( 31960), INT16_C( 31960), INT16_C( 31960), INT16_C( 31960), INT16_C( 31960), INT16_C( 31960) } }, { -INT16_C( 27505), { -INT16_C( 27505), -INT16_C( 27505), -INT16_C( 27505), -INT16_C( 27505), -INT16_C( 27505), -INT16_C( 27505), -INT16_C( 27505), -INT16_C( 27505) } }, { -INT16_C( 27913), { -INT16_C( 27913), -INT16_C( 27913), -INT16_C( 27913), -INT16_C( 27913), -INT16_C( 27913), -INT16_C( 27913), -INT16_C( 27913), -INT16_C( 27913) } }, { -INT16_C( 2168), { -INT16_C( 2168), -INT16_C( 2168), -INT16_C( 2168), -INT16_C( 2168), -INT16_C( 2168), -INT16_C( 2168), -INT16_C( 2168), -INT16_C( 2168) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int16_t a = test_vec[i].a; simde_int16x8_t r = simde_vdupq_n_s16(a); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; } static int test_simde_vdupq_n_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a; int32_t r[4]; } test_vec[] = { { INT32_C( 1498606215), { INT32_C( 1498606215), INT32_C( 1498606215), INT32_C( 1498606215), INT32_C( 1498606215) } }, { INT32_C( 859053161), { INT32_C( 859053161), INT32_C( 859053161), INT32_C( 859053161), INT32_C( 859053161) } }, { -INT32_C( 1117116324), { -INT32_C( 1117116324), -INT32_C( 1117116324), -INT32_C( 1117116324), -INT32_C( 1117116324) } }, { INT32_C( 637922202), { INT32_C( 637922202), INT32_C( 637922202), INT32_C( 637922202), INT32_C( 637922202) } }, { INT32_C( 1777200653), { INT32_C( 1777200653), INT32_C( 1777200653), INT32_C( 1777200653), INT32_C( 1777200653) } }, { INT32_C( 1736753390), { INT32_C( 1736753390), INT32_C( 1736753390), INT32_C( 1736753390), INT32_C( 1736753390) } }, { INT32_C( 1743553983), { INT32_C( 1743553983), INT32_C( 1743553983), INT32_C( 1743553983), INT32_C( 1743553983) } }, { INT32_C( 833012906), { INT32_C( 833012906), INT32_C( 833012906), INT32_C( 833012906), INT32_C( 833012906) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int32_t a = test_vec[i].a; simde_int32x4_t r = simde_vdupq_n_s32(a); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; } static int test_simde_vdupq_n_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a; int64_t r[2]; } test_vec[] = { { INT64_C( 3084521629620139883), { INT64_C( 3084521629620139883), INT64_C( 3084521629620139883) } }, { -INT64_C( 3698824787540598387), { -INT64_C( 3698824787540598387), -INT64_C( 3698824787540598387) } }, { INT64_C( 5697069037127965299), { INT64_C( 5697069037127965299), INT64_C( 5697069037127965299) } }, { INT64_C( 8459596326044522619), { INT64_C( 8459596326044522619), INT64_C( 8459596326044522619) } }, { -INT64_C( 2000541241550771459), { -INT64_C( 2000541241550771459), -INT64_C( 2000541241550771459) } }, { INT64_C( 974893640165390152), { INT64_C( 974893640165390152), INT64_C( 974893640165390152) } }, { -INT64_C( 826418811364742626), { -INT64_C( 826418811364742626), -INT64_C( 826418811364742626) } }, { INT64_C( 2364107377413299280), { INT64_C( 2364107377413299280), INT64_C( 2364107377413299280) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int64_t a = test_vec[i].a; simde_int64x2_t r = simde_vdupq_n_s64(a); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; } static int test_simde_vdupq_n_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a; uint8_t r[16]; } test_vec[] = { { UINT8_C( 88), { UINT8_C( 88), UINT8_C( 88), UINT8_C( 88), UINT8_C( 88), UINT8_C( 88), UINT8_C( 88), UINT8_C( 88), UINT8_C( 88), UINT8_C( 88), UINT8_C( 88), UINT8_C( 88), UINT8_C( 88), UINT8_C( 88), UINT8_C( 88), UINT8_C( 88), UINT8_C( 88) } }, { UINT8_C(137), { UINT8_C(137), UINT8_C(137), UINT8_C(137), UINT8_C(137), UINT8_C(137), UINT8_C(137), UINT8_C(137), UINT8_C(137), UINT8_C(137), UINT8_C(137), UINT8_C(137), UINT8_C(137), UINT8_C(137), UINT8_C(137), UINT8_C(137), UINT8_C(137) } }, { UINT8_C( 33), { UINT8_C( 33), UINT8_C( 33), UINT8_C( 33), UINT8_C( 33), UINT8_C( 33), UINT8_C( 33), UINT8_C( 33), UINT8_C( 33), UINT8_C( 33), UINT8_C( 33), UINT8_C( 33), UINT8_C( 33), UINT8_C( 33), UINT8_C( 33), UINT8_C( 33), UINT8_C( 33) } }, { UINT8_C(179), { UINT8_C(179), UINT8_C(179), UINT8_C(179), UINT8_C(179), UINT8_C(179), UINT8_C(179), UINT8_C(179), UINT8_C(179), UINT8_C(179), UINT8_C(179), UINT8_C(179), UINT8_C(179), UINT8_C(179), UINT8_C(179), UINT8_C(179), UINT8_C(179) } }, { UINT8_C( 68), { UINT8_C( 68), UINT8_C( 68), UINT8_C( 68), UINT8_C( 68), UINT8_C( 68), UINT8_C( 68), UINT8_C( 68), UINT8_C( 68), UINT8_C( 68), UINT8_C( 68), UINT8_C( 68), UINT8_C( 68), UINT8_C( 68), UINT8_C( 68), UINT8_C( 68), UINT8_C( 68) } }, { UINT8_C(153), { UINT8_C(153), UINT8_C(153), UINT8_C(153), UINT8_C(153), UINT8_C(153), UINT8_C(153), UINT8_C(153), UINT8_C(153), UINT8_C(153), UINT8_C(153), UINT8_C(153), UINT8_C(153), UINT8_C(153), UINT8_C(153), UINT8_C(153), UINT8_C(153) } }, { UINT8_C(195), { UINT8_C(195), UINT8_C(195), UINT8_C(195), UINT8_C(195), UINT8_C(195), UINT8_C(195), UINT8_C(195), UINT8_C(195), UINT8_C(195), UINT8_C(195), UINT8_C(195), UINT8_C(195), UINT8_C(195), UINT8_C(195), UINT8_C(195), UINT8_C(195) } }, { UINT8_C(221), { UINT8_C(221), UINT8_C(221), UINT8_C(221), UINT8_C(221), UINT8_C(221), UINT8_C(221), UINT8_C(221), UINT8_C(221), UINT8_C(221), UINT8_C(221), UINT8_C(221), UINT8_C(221), UINT8_C(221), UINT8_C(221), UINT8_C(221), UINT8_C(221) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint8_t a = test_vec[i].a; simde_uint8x16_t r = simde_vdupq_n_u8(a); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; } static int test_simde_vdupq_n_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a; uint16_t r[8]; } test_vec[] = { { UINT16_C(15077), { UINT16_C(15077), UINT16_C(15077), UINT16_C(15077), UINT16_C(15077), UINT16_C(15077), UINT16_C(15077), UINT16_C(15077), UINT16_C(15077) } }, { UINT16_C(23766), { UINT16_C(23766), UINT16_C(23766), UINT16_C(23766), UINT16_C(23766), UINT16_C(23766), UINT16_C(23766), UINT16_C(23766), UINT16_C(23766) } }, { UINT16_C(62373), { UINT16_C(62373), UINT16_C(62373), UINT16_C(62373), UINT16_C(62373), UINT16_C(62373), UINT16_C(62373), UINT16_C(62373), UINT16_C(62373) } }, { UINT16_C( 9434), { UINT16_C( 9434), UINT16_C( 9434), UINT16_C( 9434), UINT16_C( 9434), UINT16_C( 9434), UINT16_C( 9434), UINT16_C( 9434), UINT16_C( 9434) } }, { UINT16_C(13770), { UINT16_C(13770), UINT16_C(13770), UINT16_C(13770), UINT16_C(13770), UINT16_C(13770), UINT16_C(13770), UINT16_C(13770), UINT16_C(13770) } }, { UINT16_C(60300), { UINT16_C(60300), UINT16_C(60300), UINT16_C(60300), UINT16_C(60300), UINT16_C(60300), UINT16_C(60300), UINT16_C(60300), UINT16_C(60300) } }, { UINT16_C( 9690), { UINT16_C( 9690), UINT16_C( 9690), UINT16_C( 9690), UINT16_C( 9690), UINT16_C( 9690), UINT16_C( 9690), UINT16_C( 9690), UINT16_C( 9690) } }, { UINT16_C( 2267), { UINT16_C( 2267), UINT16_C( 2267), UINT16_C( 2267), UINT16_C( 2267), UINT16_C( 2267), UINT16_C( 2267), UINT16_C( 2267), UINT16_C( 2267) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint16_t a = test_vec[i].a; simde_uint16x8_t r = simde_vdupq_n_u16(a); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_vdupq_n_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a; uint32_t r[4]; } test_vec[] = { { UINT32_C( 871626588), { UINT32_C( 871626588), UINT32_C( 871626588), UINT32_C( 871626588), UINT32_C( 871626588) } }, { UINT32_C(3690858582), { UINT32_C(3690858582), UINT32_C(3690858582), UINT32_C(3690858582), UINT32_C(3690858582) } }, { UINT32_C(2630828423), { UINT32_C(2630828423), UINT32_C(2630828423), UINT32_C(2630828423), UINT32_C(2630828423) } }, { UINT32_C(3967771416), { UINT32_C(3967771416), UINT32_C(3967771416), UINT32_C(3967771416), UINT32_C(3967771416) } }, { UINT32_C(1205362100), { UINT32_C(1205362100), UINT32_C(1205362100), UINT32_C(1205362100), UINT32_C(1205362100) } }, { UINT32_C(1010872729), { UINT32_C(1010872729), UINT32_C(1010872729), UINT32_C(1010872729), UINT32_C(1010872729) } }, { UINT32_C(3921730572), { UINT32_C(3921730572), UINT32_C(3921730572), UINT32_C(3921730572), UINT32_C(3921730572) } }, { UINT32_C(2097348897), { UINT32_C(2097348897), UINT32_C(2097348897), UINT32_C(2097348897), UINT32_C(2097348897) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint32_t a = test_vec[i].a; simde_uint32x4_t r = simde_vdupq_n_u32(a); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_vdupq_n_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a; uint64_t r[2]; } test_vec[] = { { UINT64_C( 9197789892391848940), { UINT64_C( 9197789892391848940), UINT64_C( 9197789892391848940) } }, { UINT64_C( 4409839811323935032), { UINT64_C( 4409839811323935032), UINT64_C( 4409839811323935032) } }, { UINT64_C( 685931058055602176), { UINT64_C( 685931058055602176), UINT64_C( 685931058055602176) } }, { UINT64_C(12677749240105368630), { UINT64_C(12677749240105368630), UINT64_C(12677749240105368630) } }, { UINT64_C(17861042748132860468), { UINT64_C(17861042748132860468), UINT64_C(17861042748132860468) } }, { UINT64_C( 1950859198369969636), { UINT64_C( 1950859198369969636), UINT64_C( 1950859198369969636) } }, { UINT64_C( 102279644159074744), { UINT64_C( 102279644159074744), UINT64_C( 102279644159074744) } }, { UINT64_C(18324753029341470939), { UINT64_C(18324753029341470939), UINT64_C(18324753029341470939) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint64_t a = test_vec[i].a; simde_uint64x2_t r = simde_vdupq_n_u64(a); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vdup_n_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_n_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_n_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_n_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_n_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_n_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_n_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_n_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_n_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vdup_n_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vdupq_n_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vdupq_n_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vdupq_n_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vdupq_n_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vdupq_n_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vdupq_n_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vdupq_n_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vdupq_n_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vdupq_n_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vdupq_n_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/eor.c000066400000000000000000001434401400333146700162560ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN eor #include "test-neon.h" #include "../../../simde/arm/neon/eor.h" static int test_simde_veor_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int8_t b[8]; int8_t r[8]; } test_vec[] = { { { -INT8_C( 57), INT8_C( 76), INT8_C( 112), INT8_C( 126), INT8_C( 46), -INT8_C( 66), -INT8_C( 3), INT8_C( 47) }, { INT8_C( 8), -INT8_C( 105), -INT8_C( 32), -INT8_C( 33), INT8_C( 118), INT8_C( 37), INT8_C( 95), INT8_C( 95) }, { -INT8_C( 49), -INT8_C( 37), -INT8_C( 112), -INT8_C( 95), INT8_C( 88), -INT8_C( 101), -INT8_C( 94), INT8_C( 112) } }, { { -INT8_C( 52), -INT8_C( 65), INT8_C( 78), -INT8_C( 24), INT8_C( 60), -INT8_C( 59), -INT8_C( 102), -INT8_C( 44) }, { -INT8_C( 22), -INT8_C( 3), INT8_C( 118), -INT8_C( 6), INT8_C( 21), -INT8_C( 127), INT8_C( 40), -INT8_C( 36) }, { INT8_C( 38), INT8_C( 66), INT8_C( 56), INT8_C( 18), INT8_C( 41), INT8_C( 68), -INT8_C( 78), INT8_C( 8) } }, { { -INT8_C( 50), -INT8_C( 104), INT8_C( 90), -INT8_C( 4), INT8_C( 86), INT8_C( 88), INT8_C( 43), INT8_C( 94) }, { -INT8_C( 17), INT8_C( 11), INT8_C( 62), INT8_C( 101), INT8_C( 48), -INT8_C( 99), -INT8_C( 60), -INT8_C( 3) }, { INT8_C( 33), -INT8_C( 109), INT8_C( 100), -INT8_C( 103), INT8_C( 102), -INT8_C( 59), -INT8_C( 17), -INT8_C( 93) } }, { { INT8_C( 92), INT8_C( 19), -INT8_C( 27), -INT8_C( 103), -INT8_C( 40), INT8_MAX, INT8_C( 109), -INT8_C( 61) }, { INT8_C( 124), -INT8_C( 28), -INT8_C( 67), -INT8_C( 110), INT8_C( 101), -INT8_C( 26), INT8_C( 110), INT8_C( 51) }, { INT8_C( 32), -INT8_C( 9), INT8_C( 88), INT8_C( 11), -INT8_C( 67), -INT8_C( 103), INT8_C( 3), -INT8_C( 16) } }, { { INT8_C( 126), -INT8_C( 55), INT8_C( 48), -INT8_C( 43), INT8_C( 33), INT8_C( 91), INT8_C( 51), INT8_C( 16) }, { INT8_C( 103), INT8_C( 113), INT8_C( 117), -INT8_C( 105), INT8_C( 14), INT8_C( 58), -INT8_C( 108), INT8_C( 107) }, { INT8_C( 25), -INT8_C( 72), INT8_C( 69), INT8_C( 66), INT8_C( 47), INT8_C( 97), -INT8_C( 89), INT8_C( 123) } }, { { INT8_C( 77), INT8_C( 122), INT8_C( 4), INT8_C( 37), -INT8_C( 7), INT8_C( 113), -INT8_C( 24), INT8_C( 118) }, { INT8_C( 85), -INT8_C( 90), INT8_C( 8), -INT8_C( 69), -INT8_C( 116), INT8_C( 118), -INT8_C( 18), INT8_C( 10) }, { INT8_C( 24), -INT8_C( 36), INT8_C( 12), -INT8_C( 98), INT8_C( 117), INT8_C( 7), INT8_C( 6), INT8_C( 124) } }, { { INT8_C( 63), INT8_C( 30), -INT8_C( 33), INT8_C( 96), INT8_C( 122), INT8_C( 19), INT8_C( 112), -INT8_C( 31) }, { -INT8_C( 124), -INT8_C( 26), INT8_C( 120), -INT8_C( 109), INT8_C( 32), INT8_C( 13), -INT8_C( 2), INT8_C( 109) }, { -INT8_C( 69), -INT8_C( 8), -INT8_C( 89), -INT8_C( 13), INT8_C( 90), INT8_C( 30), -INT8_C( 114), -INT8_C( 116) } }, { { -INT8_C( 121), INT8_C( 2), -INT8_C( 110), INT8_MIN, INT8_C( 115), INT8_C( 123), -INT8_C( 10), -INT8_C( 55) }, { INT8_C( 33), -INT8_C( 2), -INT8_C( 124), -INT8_C( 83), INT8_C( 117), INT8_C( 114), -INT8_C( 73), -INT8_C( 76) }, { -INT8_C( 90), -INT8_C( 4), INT8_C( 22), INT8_C( 45), INT8_C( 6), INT8_C( 9), INT8_C( 65), INT8_C( 125) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_veor_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; } static int test_simde_veor_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { { { INT16_C( 7761), -INT16_C( 3055), INT16_C( 13277), INT16_C( 25055) }, { INT16_C( 10106), -INT16_C( 30479), -INT16_C( 20937), INT16_C( 24721) }, { INT16_C( 14635), INT16_C( 31968), -INT16_C( 25110), INT16_C( 334) } }, { { INT16_C( 10700), INT16_C( 30148), -INT16_C( 17695), INT16_C( 25566) }, { INT16_C( 15382), -INT16_C( 3115), -INT16_C( 27357), INT16_C( 29773) }, { INT16_C( 5594), -INT16_C( 31215), INT16_C( 12226), INT16_C( 6035) } }, { { INT16_C( 24499), -INT16_C( 28311), INT16_C( 18578), INT16_C( 3314) }, { -INT16_C( 7312), -INT16_C( 22635), INT16_C( 9874), INT16_C( 24071) }, { -INT16_C( 17213), INT16_C( 14076), INT16_C( 28160), INT16_C( 21237) } }, { { -INT16_C( 13489), INT16_C( 12755), -INT16_C( 20090), -INT16_C( 25452) }, { INT16_C( 27117), INT16_C( 4239), -INT16_C( 8705), -INT16_C( 19835) }, { -INT16_C( 23902), INT16_C( 8540), INT16_C( 27769), INT16_C( 11793) } }, { { -INT16_C( 4548), -INT16_C( 12733), INT16_C( 13878), -INT16_C( 22822) }, { INT16_C( 28441), -INT16_C( 21683), INT16_C( 21654), -INT16_C( 6903) }, { -INT16_C( 32475), INT16_C( 25870), INT16_C( 25248), INT16_C( 17363) } }, { { -INT16_C( 8928), -INT16_C( 23018), -INT16_C( 21618), INT16_C( 31810) }, { -INT16_C( 11756), INT16_C( 5004), INT16_C( 4527), -INT16_C( 5178) }, { INT16_C( 3892), -INT16_C( 19046), -INT16_C( 17887), -INT16_C( 26748) } }, { { INT16_C( 2559), INT16_C( 14009), -INT16_C( 27841), INT16_C( 23004) }, { INT16_C( 10755), -INT16_C( 26364), INT16_C( 3710), -INT16_C( 24962) }, { INT16_C( 9212), -INT16_C( 20547), -INT16_C( 25279), -INT16_C( 14430) } }, { { -INT16_C( 27157), INT16_C( 31044), -INT16_C( 30912), INT16_C( 21749) }, { -INT16_C( 32167), INT16_C( 2152), INT16_C( 11923), -INT16_C( 27661) }, { INT16_C( 6066), INT16_C( 28972), -INT16_C( 22061), -INT16_C( 14586) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_veor_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; } static int test_simde_veor_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { { { -INT32_C( 223173885), -INT32_C( 1105528766) }, { -INT32_C( 647648370), INT32_C( 936515285) }, { INT32_C( 735512717), -INT32_C( 1983320425) } }, { { INT32_C( 307579213), INT32_C( 1903436113) }, { INT32_C( 777768489), -INT32_C( 910509627) }, { INT32_C( 1007585124), -INT32_C( 1194420076) } }, { { -INT32_C( 1615106980), -INT32_C( 211954331) }, { INT32_C( 1439482496), INT32_C( 646749913) }, { -INT32_C( 898126116), -INT32_C( 707703876) } }, { { INT32_C( 960029159), INT32_C( 749382659) }, { INT32_C( 1079641467), INT32_C( 453579966) }, { INT32_C( 2036524188), INT32_C( 933476541) } }, { { -INT32_C( 440744832), INT32_C( 433592217) }, { -INT32_C( 1284594214), INT32_C( 735705923) }, { INT32_C( 1456759130), INT32_C( 838986970) } }, { { -INT32_C( 547089700), INT32_C( 957025982) }, { -INT32_C( 763730668), -INT32_C( 85097607) }, { INT32_C( 220100552), -INT32_C( 1008300601) } }, { { -INT32_C( 505436344), -INT32_C( 1711556417) }, { -INT32_C( 1605605027), INT32_C( 1103832420) }, { INT32_C( 1100205589), -INT32_C( 667902501) } }, { { -INT32_C( 165662921), INT32_C( 1378823230) }, { INT32_C( 186952081), INT32_C( 1963266605) }, { -INT32_C( 50035034), INT32_C( 657079827) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_veor_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; } static int test_simde_veor_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[1]; int64_t b[1]; int64_t r[1]; } test_vec[] = { { { -INT64_C( 4828891895394799279) }, { -INT64_C( 978485463844695772) }, { INT64_C( 5663255367859937397) } }, { { -INT64_C( 2340679889570635955) }, { INT64_C( 3561912863977746482) }, { -INT64_C( 1231091376201633921) } }, { { INT64_C( 2416474760244209765) }, { INT64_C( 3960126324202680667) }, { INT64_C( 1692292520539298110) } }, { { -INT64_C( 6847313742192954864) }, { INT64_C( 2033041761997308452) }, { -INT64_C( 4841469826250411980) } }, { { -INT64_C( 6226382278488406919) }, { -INT64_C( 7006577862129197661) }, { INT64_C( 3987041097380862426) } }, { { INT64_C( 5540466748793861076) }, { -INT64_C( 8086909024474151181) }, { -INT64_C( 4384757599176100569) } }, { { -INT64_C( 4268396355849441589) }, { -INT64_C( 1863060805330571271) }, { INT64_C( 2514847184329163058) } }, { { -INT64_C( 9134975086147019321) }, { -INT64_C( 2379888436905800878) }, { INT64_C( 6900355332590163605) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_int64x1_t r = simde_veor_s64(a, b); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; } static int test_simde_veor_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(158), UINT8_C(246), UINT8_C( 5), UINT8_C(215), UINT8_C( 79), UINT8_C( 59), UINT8_C( 70), UINT8_C( 65) }, { UINT8_C(121), UINT8_C( 69), UINT8_C( 69), UINT8_C(120), UINT8_C( 84), UINT8_C(102), UINT8_C( 65), UINT8_C( 73) }, { UINT8_C(231), UINT8_C(179), UINT8_C( 64), UINT8_C(175), UINT8_C( 27), UINT8_C( 93), UINT8_C( 7), UINT8_C( 8) } }, { { UINT8_C(124), UINT8_C( 81), UINT8_C( 15), UINT8_C(239), UINT8_C(130), UINT8_C(246), UINT8_C( 22), UINT8_C( 98) }, { UINT8_C(136), UINT8_C(214), UINT8_C(239), UINT8_C(250), UINT8_C(198), UINT8_C( 97), UINT8_C( 56), UINT8_C(100) }, { UINT8_C(244), UINT8_C(135), UINT8_C(224), UINT8_C( 21), UINT8_C( 68), UINT8_C(151), UINT8_C( 46), UINT8_C( 6) } }, { { UINT8_C( 87), UINT8_C( 62), UINT8_C( 60), UINT8_C(166), UINT8_C(121), UINT8_C(130), UINT8_C(232), UINT8_C(242) }, { UINT8_C(199), UINT8_C( 45), UINT8_C(106), UINT8_C( 27), UINT8_C(147), UINT8_C(171), UINT8_C(101), UINT8_C( 15) }, { UINT8_C(144), UINT8_C( 19), UINT8_C( 86), UINT8_C(189), UINT8_C(234), UINT8_C( 41), UINT8_C(141), UINT8_C(253) } }, { { UINT8_C(253), UINT8_C(116), UINT8_MAX, UINT8_C(127), UINT8_C(106), UINT8_C( 21), UINT8_C(225), UINT8_C(243) }, { UINT8_C(235), UINT8_C(208), UINT8_C(237), UINT8_C(178), UINT8_C( 50), UINT8_C( 37), UINT8_C( 22), UINT8_C(137) }, { UINT8_C( 22), UINT8_C(164), UINT8_C( 18), UINT8_C(205), UINT8_C( 88), UINT8_C( 48), UINT8_C(247), UINT8_C(122) } }, { { UINT8_C( 99), UINT8_C( 82), UINT8_C( 48), UINT8_C(220), UINT8_C(212), UINT8_C( 24), UINT8_C(207), UINT8_C(156) }, { UINT8_C( 69), UINT8_C( 57), UINT8_C(183), UINT8_C(216), UINT8_C(229), UINT8_C( 28), UINT8_C(232), UINT8_C(226) }, { UINT8_C( 38), UINT8_C(107), UINT8_C(135), UINT8_C( 4), UINT8_C( 49), UINT8_C( 4), UINT8_C( 39), UINT8_C(126) } }, { { UINT8_C(145), UINT8_C(231), UINT8_C( 97), UINT8_C(251), UINT8_C(252), UINT8_C( 66), UINT8_C(238), UINT8_C(231) }, { UINT8_C( 19), UINT8_C(219), UINT8_C(153), UINT8_C( 69), UINT8_C( 1), UINT8_C(176), UINT8_C(206), UINT8_C(100) }, { UINT8_C(130), UINT8_C( 60), UINT8_C(248), UINT8_C(190), UINT8_C(253), UINT8_C(242), UINT8_C( 32), UINT8_C(131) } }, { { UINT8_C( 2), UINT8_C(254), UINT8_C( 65), UINT8_C(215), UINT8_C( 22), UINT8_C( 16), UINT8_C(115), UINT8_C( 91) }, { UINT8_C( 73), UINT8_C( 42), UINT8_C( 52), UINT8_C( 46), UINT8_C( 71), UINT8_C( 28), UINT8_C( 16), UINT8_C(216) }, { UINT8_C( 75), UINT8_C(212), UINT8_C(117), UINT8_C(249), UINT8_C( 81), UINT8_C( 12), UINT8_C( 99), UINT8_C(131) } }, { { UINT8_C( 3), UINT8_C(113), UINT8_C(211), UINT8_MAX, UINT8_C(180), UINT8_C(194), UINT8_C(230), UINT8_C(199) }, { UINT8_C(157), UINT8_C(128), UINT8_C( 12), UINT8_C(158), UINT8_C( 48), UINT8_C(218), UINT8_C( 3), UINT8_C( 50) }, { UINT8_C(158), UINT8_C(241), UINT8_C(223), UINT8_C( 97), UINT8_C(132), UINT8_C( 24), UINT8_C(229), UINT8_C(245) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_veor_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; } static int test_simde_veor_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(44341), UINT16_C(23492), UINT16_C(26475), UINT16_C(49752) }, { UINT16_C( 6246), UINT16_C( 3973), UINT16_C(21664), UINT16_C(43892) }, { UINT16_C(46419), UINT16_C(21569), UINT16_C(13259), UINT16_C(26924) } }, { { UINT16_C(18060), UINT16_C(35603), UINT16_C(17216), UINT16_C(50117) }, { UINT16_C(54080), UINT16_C(21475), UINT16_C( 1737), UINT16_C(65206) }, { UINT16_C(38348), UINT16_C(55536), UINT16_C(17801), UINT16_C(15731) } }, { { UINT16_C(31412), UINT16_C( 8025), UINT16_C(45793), UINT16_C(18657) }, { UINT16_C(26314), UINT16_C(27223), UINT16_C(52155), UINT16_C(18198) }, { UINT16_C( 7294), UINT16_C(29966), UINT16_C(31066), UINT16_C( 4087) } }, { { UINT16_C(10513), UINT16_C(21202), UINT16_C(38764), UINT16_C(44053) }, { UINT16_C(63594), UINT16_C(13312), UINT16_C(46847), UINT16_C(45874) }, { UINT16_C(53627), UINT16_C(26322), UINT16_C( 8595), UINT16_C( 7975) } }, { { UINT16_C(35889), UINT16_C( 4818), UINT16_C(46142), UINT16_C( 2138) }, { UINT16_C(45338), UINT16_C(54643), UINT16_C(35196), UINT16_C(36381) }, { UINT16_C(15659), UINT16_C(51105), UINT16_C(15682), UINT16_C(34375) } }, { { UINT16_C(61362), UINT16_C( 7904), UINT16_C(62855), UINT16_C(61898) }, { UINT16_C(51950), UINT16_C(60709), UINT16_C(22657), UINT16_C(45728) }, { UINT16_C( 9564), UINT16_C(62405), UINT16_C(44294), UINT16_C(17258) } }, { { UINT16_C(29412), UINT16_C( 8900), UINT16_C( 7974), UINT16_C(16682) }, { UINT16_C(40400), UINT16_C(19734), UINT16_C(13094), UINT16_C(55515) }, { UINT16_C(61236), UINT16_C(28626), UINT16_C(11264), UINT16_C(39409) } }, { { UINT16_C(47907), UINT16_C(43766), UINT16_C(49584), UINT16_C(40603) }, { UINT16_C(49547), UINT16_C( 3211), UINT16_C(11033), UINT16_C(64958) }, { UINT16_C(31400), UINT16_C(42621), UINT16_C(60073), UINT16_C(25381) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_veor_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; } static int test_simde_veor_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C( 475910642), UINT32_C(2198092259) }, { UINT32_C(1779073253), UINT32_C( 710440248) }, { UINT32_C(1985433879), UINT32_C(2841398491) } }, { { UINT32_C(2529940306), UINT32_C(1981044553) }, { UINT32_C(4205813484), UINT32_C( 565923630) }, { UINT32_C(1818512830), UINT32_C(1471092839) } }, { { UINT32_C( 54335520), UINT32_C( 914833745) }, { UINT32_C(4204827074), UINT32_C(1478818054) }, { UINT32_C(4187851234), UINT32_C(1856223319) } }, { { UINT32_C( 384823244), UINT32_C( 864813894) }, { UINT32_C(3492625314), UINT32_C(2884757642) }, { UINT32_C(3334657134), UINT32_C(2558389196) } }, { { UINT32_C(1387146752), UINT32_C( 847787376) }, { UINT32_C(3442223558), UINT32_C(3995423010) }, { UINT32_C(2676099014), UINT32_C(3702350930) } }, { { UINT32_C(2265191488), UINT32_C(3132789016) }, { UINT32_C(1468721100), UINT32_C(3473046735) }, { UINT32_C(3499029388), UINT32_C(1975053783) } }, { { UINT32_C( 438415530), UINT32_C(2890705638) }, { UINT32_C(4118378963), UINT32_C( 182689738) }, { UINT32_C(4015573369), UINT32_C(2796500268) } }, { { UINT32_C(3415337139), UINT32_C(1166363513) }, { UINT32_C( 27004978), UINT32_C( 919707276) }, { UINT32_C(3389913217), UINT32_C(1934939637) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_veor_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; } static int test_simde_veor_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[1]; uint64_t b[1]; uint64_t r[1]; } test_vec[] = { { { UINT64_C(12398498631144494080) }, { UINT64_C(12830720206572806384) }, { UINT64_C( 2170648006984611056) } }, { { UINT64_C(14503918927718196209) }, { UINT64_C(11943777821855590383) }, { UINT64_C( 7820667666536924190) } }, { { UINT64_C( 9809779626162407590) }, { UINT64_C( 2879441961520354615) }, { UINT64_C(12670459381603638673) } }, { { UINT64_C(16081757167494409208) }, { UINT64_C(10038957759141646855) }, { UINT64_C( 6087857433790822911) } }, { { UINT64_C( 2734570005143328978) }, { UINT64_C( 9788114807057594746) }, { UINT64_C(11683879364330070440) } }, { { UINT64_C( 6186002290649558537) }, { UINT64_C( 43438603698807594) }, { UINT64_C( 6143837196495015203) } }, { { UINT64_C( 4251220755635477483) }, { UINT64_C(14856461225913050903) }, { UINT64_C(17641617911741349116) } }, { { UINT64_C( 6544470438163519278) }, { UINT64_C( 1329409782962574404) }, { UINT64_C( 5233642461911434090) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_t b = simde_vld1_u64(test_vec[i].b); simde_uint64x1_t r = simde_veor_u64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; } static int test_simde_veorq_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int8_t b[16]; int8_t r[16]; } test_vec[] = { { { INT8_C( 7), INT8_C( 33), -INT8_C( 55), -INT8_C( 62), -INT8_C( 87), INT8_C( 96), -INT8_C( 63), -INT8_C( 107), -INT8_C( 112), -INT8_C( 109), INT8_C( 45), INT8_C( 65), INT8_C( 62), INT8_C( 1), -INT8_C( 77), INT8_C( 15) }, { INT8_C( 105), -INT8_C( 96), -INT8_C( 51), INT8_C( 40), -INT8_C( 44), INT8_C( 104), INT8_C( 89), INT8_C( 64), INT8_C( 119), -INT8_C( 33), INT8_C( 15), -INT8_C( 57), -INT8_C( 109), -INT8_C( 96), -INT8_C( 73), -INT8_C( 101) }, { INT8_C( 110), -INT8_C( 127), INT8_C( 4), -INT8_C( 22), INT8_C( 125), INT8_C( 8), -INT8_C( 104), -INT8_C( 43), -INT8_C( 25), INT8_C( 76), INT8_C( 34), -INT8_C( 122), -INT8_C( 83), -INT8_C( 95), INT8_C( 4), -INT8_C( 108) } }, { { -INT8_C( 63), INT8_MIN, INT8_C( 93), INT8_C( 106), -INT8_C( 31), INT8_C( 30), -INT8_C( 1), INT8_C( 113), -INT8_C( 78), INT8_C( 45), -INT8_C( 78), -INT8_C( 16), INT8_C( 46), INT8_C( 101), -INT8_C( 1), -INT8_C( 105) }, { INT8_C( 5), -INT8_C( 51), -INT8_C( 65), -INT8_C( 38), INT8_C( 53), INT8_C( 24), INT8_C( 26), -INT8_C( 84), -INT8_C( 8), INT8_C( 42), INT8_C( 115), -INT8_C( 117), -INT8_C( 54), INT8_C( 43), INT8_C( 38), -INT8_C( 117) }, { -INT8_C( 60), INT8_C( 77), -INT8_C( 30), -INT8_C( 80), -INT8_C( 44), INT8_C( 6), -INT8_C( 27), -INT8_C( 35), INT8_C( 74), INT8_C( 7), -INT8_C( 63), INT8_C( 123), -INT8_C( 28), INT8_C( 78), -INT8_C( 39), INT8_C( 28) } }, { { -INT8_C( 85), -INT8_C( 124), -INT8_C( 10), -INT8_C( 116), -INT8_C( 94), -INT8_C( 11), -INT8_C( 2), INT8_C( 84), INT8_C( 34), -INT8_C( 80), INT8_C( 68), INT8_C( 80), INT8_C( 22), INT8_C( 68), -INT8_C( 25), INT8_C( 27) }, { INT8_C( 17), -INT8_C( 90), -INT8_C( 11), INT8_C( 70), -INT8_C( 65), INT8_C( 16), -INT8_C( 14), -INT8_C( 73), INT8_C( 58), INT8_C( 101), INT8_C( 66), INT8_C( 4), -INT8_C( 112), INT8_C( 105), -INT8_C( 113), INT8_C( 60) }, { -INT8_C( 70), INT8_C( 34), INT8_C( 3), -INT8_C( 54), INT8_C( 29), -INT8_C( 27), INT8_C( 12), -INT8_C( 29), INT8_C( 24), -INT8_C( 43), INT8_C( 6), INT8_C( 84), -INT8_C( 122), INT8_C( 45), INT8_C( 104), INT8_C( 39) } }, { { -INT8_C( 19), -INT8_C( 123), -INT8_C( 56), -INT8_C( 113), INT8_C( 123), -INT8_C( 58), -INT8_C( 28), -INT8_C( 99), INT8_C( 119), INT8_C( 40), -INT8_C( 18), -INT8_C( 115), INT8_C( 108), -INT8_C( 43), -INT8_C( 88), INT8_C( 125) }, { INT8_C( 124), -INT8_C( 98), -INT8_C( 61), INT8_C( 59), -INT8_C( 82), -INT8_C( 75), -INT8_C( 14), -INT8_C( 24), INT8_C( 27), INT8_C( 52), -INT8_C( 20), -INT8_C( 85), -INT8_C( 99), INT8_C( 123), -INT8_C( 25), -INT8_C( 118) }, { -INT8_C( 111), INT8_C( 27), INT8_C( 11), -INT8_C( 76), -INT8_C( 43), INT8_C( 115), INT8_C( 22), INT8_C( 117), INT8_C( 108), INT8_C( 28), INT8_C( 2), INT8_C( 38), -INT8_C( 15), -INT8_C( 82), INT8_C( 79), -INT8_C( 9) } }, { { INT8_C( 1), -INT8_C( 80), INT8_C( 26), INT8_C( 124), INT8_C( 118), -INT8_C( 2), INT8_C( 25), -INT8_C( 19), INT8_C( 38), INT8_C( 7), INT8_C( 122), -INT8_C( 109), -INT8_C( 35), INT8_C( 35), INT8_C( 16), INT8_C( 89) }, { -INT8_C( 63), -INT8_C( 44), -INT8_C( 108), INT8_C( 111), -INT8_C( 119), -INT8_C( 122), INT8_C( 87), -INT8_C( 92), -INT8_C( 70), INT8_C( 67), INT8_C( 80), INT8_C( 88), -INT8_C( 66), INT8_C( 55), -INT8_C( 30), -INT8_C( 65) }, { -INT8_C( 64), INT8_C( 100), -INT8_C( 114), INT8_C( 19), -INT8_C( 1), INT8_C( 120), INT8_C( 78), INT8_C( 73), -INT8_C( 100), INT8_C( 68), INT8_C( 42), -INT8_C( 53), INT8_C( 99), INT8_C( 20), -INT8_C( 14), -INT8_C( 26) } }, { { -INT8_C( 25), -INT8_C( 4), INT8_C( 59), INT8_C( 94), -INT8_C( 6), INT8_C( 85), INT8_C( 75), INT8_C( 33), INT8_C( 92), -INT8_C( 58), -INT8_C( 76), INT8_C( 57), -INT8_C( 23), -INT8_C( 60), -INT8_C( 110), -INT8_C( 86) }, { -INT8_C( 104), INT8_C( 38), INT8_C( 25), INT8_C( 34), -INT8_C( 84), INT8_C( 112), -INT8_C( 58), INT8_C( 103), -INT8_C( 77), INT8_C( 22), -INT8_C( 65), INT8_C( 113), INT8_C( 78), -INT8_C( 95), INT8_C( 49), INT8_C( 53) }, { INT8_MAX, -INT8_C( 38), INT8_C( 34), INT8_C( 124), INT8_C( 86), INT8_C( 37), -INT8_C( 115), INT8_C( 70), -INT8_C( 17), -INT8_C( 48), INT8_C( 11), INT8_C( 72), -INT8_C( 89), INT8_C( 101), -INT8_C( 93), -INT8_C( 97) } }, { { -INT8_C( 98), INT8_C( 108), -INT8_C( 109), -INT8_C( 104), -INT8_C( 63), -INT8_C( 33), -INT8_C( 71), INT8_C( 30), -INT8_C( 91), INT8_C( 109), INT8_C( 87), -INT8_C( 114), INT8_C( 50), -INT8_C( 22), INT8_C( 56), -INT8_C( 54) }, { INT8_C( 16), INT8_C( 81), -INT8_C( 20), -INT8_C( 67), -INT8_C( 63), -INT8_C( 77), INT8_C( 36), INT8_C( 116), -INT8_C( 55), -INT8_C( 29), -INT8_C( 27), INT8_C( 23), -INT8_C( 124), INT8_C( 22), INT8_C( 77), INT8_C( 34) }, { -INT8_C( 114), INT8_C( 61), INT8_MAX, INT8_C( 37), INT8_C( 0), INT8_C( 108), -INT8_C( 99), INT8_C( 106), INT8_C( 108), -INT8_C( 114), -INT8_C( 78), -INT8_C( 103), -INT8_C( 74), -INT8_C( 4), INT8_C( 117), -INT8_C( 24) } }, { { -INT8_C( 125), -INT8_C( 32), -INT8_C( 69), INT8_C( 68), -INT8_C( 65), INT8_C( 116), INT8_C( 98), INT8_C( 100), -INT8_C( 30), -INT8_C( 70), -INT8_C( 14), INT8_C( 20), -INT8_C( 92), INT8_C( 42), -INT8_C( 34), -INT8_C( 76) }, { INT8_C( 123), -INT8_C( 53), INT8_C( 113), INT8_C( 60), INT8_C( 126), -INT8_C( 107), -INT8_C( 80), INT8_C( 71), INT8_C( 120), -INT8_C( 106), INT8_C( 95), -INT8_C( 3), -INT8_C( 84), -INT8_C( 84), INT8_C( 31), INT8_C( 47) }, { -INT8_C( 8), INT8_C( 43), -INT8_C( 54), INT8_C( 120), -INT8_C( 63), -INT8_C( 31), -INT8_C( 46), INT8_C( 35), -INT8_C( 102), INT8_C( 44), -INT8_C( 83), -INT8_C( 23), INT8_C( 8), -INT8_C( 122), -INT8_C( 63), -INT8_C( 101) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r = simde_veorq_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; } static int test_simde_veorq_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { -INT16_C( 22070), -INT16_C( 27902), INT16_C( 20197), -INT16_C( 6856), INT16_C( 29980), -INT16_C( 23942), -INT16_C( 27791), INT16_C( 8437) }, { -INT16_C( 27754), INT16_C( 5134), -INT16_C( 9605), INT16_C( 10935), -INT16_C( 4606), INT16_C( 3542), INT16_C( 29604), INT16_C( 28643) }, { INT16_C( 14940), -INT16_C( 30964), -INT16_C( 27490), -INT16_C( 12401), -INT16_C( 25826), -INT16_C( 20564), -INT16_C( 7979), INT16_C( 20246) } }, { { -INT16_C( 6884), INT16_C( 514), INT16_C( 14900), INT16_C( 20711), INT16_C( 25007), INT16_C( 8690), -INT16_C( 6156), -INT16_C( 30143) }, { INT16_C( 20346), -INT16_C( 2657), INT16_C( 22057), INT16_C( 11039), -INT16_C( 2748), -INT16_C( 5832), INT16_C( 7016), -INT16_C( 31400) }, { -INT16_C( 21914), -INT16_C( 2147), INT16_C( 27677), INT16_C( 31736), -INT16_C( 27413), -INT16_C( 14134), -INT16_C( 868), INT16_C( 3865) } }, { { INT16_C( 23041), INT16_C( 13703), INT16_C( 28309), INT16_C( 17541), INT16_C( 30671), -INT16_C( 15515), -INT16_C( 22946), -INT16_C( 10163) }, { -INT16_C( 4874), INT16_C( 8141), -INT16_C( 4797), -INT16_C( 30901), -INT16_C( 31774), INT16_C( 19312), -INT16_C( 14177), -INT16_C( 24368) }, { -INT16_C( 18697), INT16_C( 10826), -INT16_C( 31786), -INT16_C( 15410), -INT16_C( 3027), -INT16_C( 30699), INT16_C( 28353), INT16_C( 30877) } }, { { INT16_C( 22307), -INT16_C( 18219), INT16_C( 23237), -INT16_C( 27396), INT16_C( 25297), INT16_C( 12119), -INT16_C( 23544), -INT16_C( 505) }, { -INT16_C( 11119), -INT16_C( 11234), INT16_C( 27073), -INT16_C( 23461), -INT16_C( 13076), -INT16_C( 29713), -INT16_C( 16492), -INT16_C( 18645) }, { -INT16_C( 31822), INT16_C( 27851), INT16_C( 13060), INT16_C( 12455), -INT16_C( 20931), -INT16_C( 23368), INT16_C( 7068), INT16_C( 18732) } }, { { INT16_C( 22), -INT16_C( 9361), INT16_C( 27738), INT16_C( 11119), -INT16_C( 14642), -INT16_C( 10662), INT16_C( 24938), -INT16_C( 1067) }, { -INT16_C( 3274), -INT16_C( 2097), INT16_C( 11100), INT16_C( 18587), -INT16_C( 29961), -INT16_C( 29740), -INT16_C( 183), INT16_C( 24387) }, { -INT16_C( 3296), INT16_C( 11424), INT16_C( 18182), INT16_C( 25588), INT16_C( 19513), INT16_C( 23950), -INT16_C( 25053), -INT16_C( 23402) } }, { { -INT16_C( 19968), INT16_C( 23098), -INT16_C( 22242), -INT16_C( 4986), -INT16_C( 8081), -INT16_C( 9533), -INT16_C( 26558), INT16_C( 30933) }, { -INT16_C( 23157), -INT16_C( 6289), INT16_C( 3024), -INT16_C( 14545), INT16_C( 917), -INT16_C( 8366), -INT16_C( 27389), INT16_C( 830) }, { INT16_C( 6027), -INT16_C( 17067), -INT16_C( 23858), INT16_C( 11177), -INT16_C( 7174), INT16_C( 1425), INT16_C( 3393), INT16_C( 31723) } }, { { INT16_C( 31048), INT16_C( 26205), -INT16_C( 7390), -INT16_C( 28077), INT16_C( 5828), INT16_C( 1644), INT16_C( 16814), INT16_C( 14718) }, { -INT16_C( 4634), -INT16_C( 18912), INT16_C( 20472), -INT16_C( 29059), -INT16_C( 12205), INT16_C( 22125), -INT16_C( 21659), -INT16_C( 21159) }, { -INT16_C( 27474), -INT16_C( 12163), -INT16_C( 21286), INT16_C( 7214), -INT16_C( 14697), INT16_C( 20481), -INT16_C( 5429), -INT16_C( 27609) } }, { { -INT16_C( 18908), INT16_C( 18196), INT16_C( 26522), INT16_C( 24281), INT16_C( 17789), INT16_C( 11108), -INT16_C( 7546), INT16_C( 28004) }, { -INT16_C( 31537), -INT16_C( 14301), -INT16_C( 24109), INT16_C( 9814), -INT16_C( 15503), -INT16_C( 10628), -INT16_C( 10898), -INT16_C( 27772) }, { INT16_C( 13035), -INT16_C( 28873), -INT16_C( 14775), INT16_C( 30863), -INT16_C( 31220), -INT16_C( 744), INT16_C( 14312), -INT16_C( 288) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_veorq_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; } static int test_simde_veorq_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { INT32_C( 2023602252), -INT32_C( 1635477608), INT32_C( 1571456524), -INT32_C( 1631778536) }, { -INT32_C( 771340562), -INT32_C( 1375991329), -INT32_C( 1944584857), INT32_C( 248442050) }, { -INT32_C( 1432617310), INT32_C( 863542855), -INT32_C( 776111253), -INT32_C( 1871452710) } }, { { INT32_C( 1032219556), INT32_C( 266013443), -INT32_C( 1452505711), INT32_C( 2085038734) }, { INT32_C( 1330531952), -INT32_C( 1090762153), INT32_C( 172692296), -INT32_C( 1541924353) }, { INT32_C( 1925719508), -INT32_C( 1322827436), -INT32_C( 1557686567), -INT32_C( 664849551) } }, { { -INT32_C( 1998479739), INT32_C( 983022761), -INT32_C( 790428863), -INT32_C( 1655952595) }, { -INT32_C( 789800071), INT32_C( 764406245), -INT32_C( 63448324), INT32_C( 2023772147) }, { INT32_C( 1477248508), INT32_C( 387470668), INT32_C( 752146877), -INT32_C( 437492514) } }, { { -INT32_C( 1761574418), INT32_C( 2127730493), -INT32_C( 934365797), INT32_C( 1499896800) }, { INT32_C( 455758390), INT32_C( 944355643), -INT32_C( 2043379565), -INT32_C( 1090530096) }, { -INT32_C( 1943350312), INT32_C( 1184574982), INT32_C( 1316631816), -INT32_C( 409383120) } }, { { -INT32_C( 1839857835), INT32_C( 839919511), -INT32_C( 1107664931), INT32_C( 806838522) }, { -INT32_C( 297057869), -INT32_C( 1926851334), -INT32_C( 451716587), -INT32_C( 2086464978) }, { INT32_C( 2082389734), -INT32_C( 1086934163), INT32_C( 1491666376), -INT32_C( 1280019756) } }, { { -INT32_C( 1458178030), -INT32_C( 35969504), INT32_C( 2142950789), -INT32_C( 374353610) }, { INT32_C( 215480850), -INT32_C( 1533411697), -INT32_C( 2021020328), -INT32_C( 787862337) }, { -INT32_C( 1514012160), INT32_C( 1497553071), -INT32_C( 130844451), INT32_C( 950402441) } }, { { INT32_C( 1148854308), -INT32_C( 884910778), INT32_C( 1632304171), -INT32_C( 531957299) }, { -INT32_C( 2098454029), INT32_C( 2015790623), -INT32_C( 218124493), INT32_C( 12847579) }, { -INT32_C( 963247657), -INT32_C( 1285041319), -INT32_C( 1816833256), -INT32_C( 527503338) } }, { { INT32_C( 1866743337), -INT32_C( 1086683500), INT32_C( 1327531138), INT32_C( 1898932861) }, { -INT32_C( 1410130805), -INT32_C( 719119710), -INT32_C( 1513676087), INT32_C( 1436912427) }, { -INT32_C( 994631006), INT32_C( 1780063286), -INT32_C( 353917365), INT32_C( 613081430) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_veorq_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; } static int test_simde_veorq_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; int64_t b[2]; int64_t r[2]; } test_vec[] = { { { INT64_C( 7838966427958002848), -INT64_C( 8261242307613928860) }, { -INT64_C( 7431965701611751289), -INT64_C( 7407344992410839652) }, { -INT64_C( 858567960746443737), INT64_C( 1470980658505123832) } }, { { -INT64_C( 2886416538353870570), -INT64_C( 2150656445132361547) }, { -INT64_C( 6271031530521972019), INT64_C( 8555292687707573543) }, { INT64_C( 9154042880290745307), -INT64_C( 7737795467991990894) } }, { { INT64_C( 7829206375937561691), -INT64_C( 4369848476834029283) }, { INT64_C( 5445726219798794719), -INT64_C( 4720293152652729688) }, { INT64_C( 2825435453100393860), INT64_C( 9017626565343331253) } }, { { -INT64_C( 8962951169386382982), -INT64_C( 5796880591085591596) }, { INT64_C( 8365487760686624936), INT64_C( 2608422744189862928) }, { -INT64_C( 611049541326091822), -INT64_C( 8376786039276034108) } }, { { -INT64_C( 4152896156506669197), INT64_C( 6419350962335406486) }, { -INT64_C( 1293309491724046130), -INT64_C( 608104132508481087) }, { INT64_C( 2905049548670898109), -INT64_C( 5865506187303006121) } }, { { INT64_C( 7857425477002191955), -INT64_C( 8749955623567516792) }, { -INT64_C( 5303219508194789478), -INT64_C( 5750620235210045001) }, { -INT64_C( 2635728232020618295), INT64_C( 3936243814942003775) } }, { { -INT64_C( 5055008894465277291), INT64_C( 3520270740420144539) }, { -INT64_C( 2385764142126859718), INT64_C( 5541198542273054425) }, { INT64_C( 7439119847274641583), INT64_C( 8952251304586554178) } }, { { INT64_C( 8518438691616185165), -INT64_C( 1943686230725547969) }, { -INT64_C( 7447835856211759203), INT64_C( 6053598794347363582) }, { -INT64_C( 1255261415251544880), -INT64_C( 5691397999926209343) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_int64x2_t r = simde_veorq_s64(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; } static int test_simde_veorq_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(163), UINT8_C(170), UINT8_C(155), UINT8_C(223), UINT8_C(123), UINT8_C( 66), UINT8_C( 74), UINT8_C(148), UINT8_C(241), UINT8_C(216), UINT8_C(221), UINT8_C( 81), UINT8_C(250), UINT8_C(209), UINT8_C(216), UINT8_C(238) }, { UINT8_C(131), UINT8_C(191), UINT8_C(180), UINT8_C( 39), UINT8_C(251), UINT8_C(201), UINT8_C(227), UINT8_C(246), UINT8_C( 77), UINT8_C(182), UINT8_C(108), UINT8_C( 73), UINT8_C(249), UINT8_C(179), UINT8_C(147), UINT8_C(156) }, { UINT8_C( 32), UINT8_C( 21), UINT8_C( 47), UINT8_C(248), UINT8_C(128), UINT8_C(139), UINT8_C(169), UINT8_C( 98), UINT8_C(188), UINT8_C(110), UINT8_C(177), UINT8_C( 24), UINT8_C( 3), UINT8_C( 98), UINT8_C( 75), UINT8_C(114) } }, { { UINT8_C( 93), UINT8_C( 46), UINT8_C(124), UINT8_C(217), UINT8_C(112), UINT8_C(198), UINT8_C(109), UINT8_C( 98), UINT8_C(159), UINT8_C( 74), UINT8_C(179), UINT8_C(153), UINT8_C( 27), UINT8_C(139), UINT8_C(135), UINT8_C(159) }, { UINT8_C( 75), UINT8_C( 60), UINT8_C(198), UINT8_C( 70), UINT8_C( 5), UINT8_C(169), UINT8_C( 60), UINT8_C( 82), UINT8_C( 95), UINT8_C(168), UINT8_C(156), UINT8_C( 89), UINT8_C( 92), UINT8_C( 47), UINT8_C(245), UINT8_C(185) }, { UINT8_C( 22), UINT8_C( 18), UINT8_C(186), UINT8_C(159), UINT8_C(117), UINT8_C(111), UINT8_C( 81), UINT8_C( 48), UINT8_C(192), UINT8_C(226), UINT8_C( 47), UINT8_C(192), UINT8_C( 71), UINT8_C(164), UINT8_C(114), UINT8_C( 38) } }, { { UINT8_C( 93), UINT8_C(113), UINT8_C(146), UINT8_C(205), UINT8_C( 56), UINT8_MAX, UINT8_C( 47), UINT8_C(215), UINT8_C( 73), UINT8_C(226), UINT8_C(112), UINT8_C(101), UINT8_C(110), UINT8_C(248), UINT8_C( 4), UINT8_C(185) }, { UINT8_C( 52), UINT8_C(202), UINT8_MAX, UINT8_C( 57), UINT8_C(116), UINT8_C( 59), UINT8_C(140), UINT8_C(211), UINT8_C(228), UINT8_C( 40), UINT8_C( 44), UINT8_C( 64), UINT8_C( 87), UINT8_C( 34), UINT8_C(249), UINT8_C(180) }, { UINT8_C(105), UINT8_C(187), UINT8_C(109), UINT8_C(244), UINT8_C( 76), UINT8_C(196), UINT8_C(163), UINT8_C( 4), UINT8_C(173), UINT8_C(202), UINT8_C( 92), UINT8_C( 37), UINT8_C( 57), UINT8_C(218), UINT8_C(253), UINT8_C( 13) } }, { { UINT8_C(147), UINT8_C(140), UINT8_C(129), UINT8_C(203), UINT8_C(139), UINT8_C(177), UINT8_C(162), UINT8_C(213), UINT8_C(147), UINT8_C( 19), UINT8_C( 58), UINT8_C( 1), UINT8_C( 11), UINT8_C( 62), UINT8_C(186), UINT8_C( 63) }, { UINT8_C( 8), UINT8_C(185), UINT8_C(120), UINT8_C(124), UINT8_C(245), UINT8_C( 4), UINT8_C( 80), UINT8_C(217), UINT8_C( 44), UINT8_C(124), UINT8_C( 25), UINT8_C(131), UINT8_C(158), UINT8_C( 18), UINT8_C( 55), UINT8_C( 50) }, { UINT8_C(155), UINT8_C( 53), UINT8_C(249), UINT8_C(183), UINT8_C(126), UINT8_C(181), UINT8_C(242), UINT8_C( 12), UINT8_C(191), UINT8_C(111), UINT8_C( 35), UINT8_C(130), UINT8_C(149), UINT8_C( 44), UINT8_C(141), UINT8_C( 13) } }, { { UINT8_C(158), UINT8_C(185), UINT8_C(253), UINT8_C( 42), UINT8_C(106), UINT8_C(160), UINT8_MAX, UINT8_C(253), UINT8_C(179), UINT8_C( 57), UINT8_MAX, UINT8_C(190), UINT8_C(119), UINT8_C(185), UINT8_C(253), UINT8_C(127) }, { UINT8_C(115), UINT8_C(117), UINT8_C(252), UINT8_C(104), UINT8_C(122), UINT8_C( 76), UINT8_C( 65), UINT8_C(166), UINT8_C(200), UINT8_C( 90), UINT8_C( 42), UINT8_C(103), UINT8_C(108), UINT8_C( 97), UINT8_C(153), UINT8_C( 11) }, { UINT8_C(237), UINT8_C(204), UINT8_C( 1), UINT8_C( 66), UINT8_C( 16), UINT8_C(236), UINT8_C(190), UINT8_C( 91), UINT8_C(123), UINT8_C( 99), UINT8_C(213), UINT8_C(217), UINT8_C( 27), UINT8_C(216), UINT8_C(100), UINT8_C(116) } }, { { UINT8_C( 26), UINT8_C(150), UINT8_C( 53), UINT8_C(132), UINT8_C( 54), UINT8_C( 52), UINT8_C(130), UINT8_C(233), UINT8_C(109), UINT8_C(129), UINT8_C(167), UINT8_C(228), UINT8_C( 58), UINT8_C(164), UINT8_C( 99), UINT8_C(173) }, { UINT8_C( 26), UINT8_C( 95), UINT8_C( 21), UINT8_C(148), UINT8_C(171), UINT8_C( 86), UINT8_C( 58), UINT8_C(116), UINT8_C(176), UINT8_C(100), UINT8_C(219), UINT8_C( 29), UINT8_C(198), UINT8_C(116), UINT8_C( 40), UINT8_C(224) }, { UINT8_C( 0), UINT8_C(201), UINT8_C( 32), UINT8_C( 16), UINT8_C(157), UINT8_C( 98), UINT8_C(184), UINT8_C(157), UINT8_C(221), UINT8_C(229), UINT8_C(124), UINT8_C(249), UINT8_C(252), UINT8_C(208), UINT8_C( 75), UINT8_C( 77) } }, { { UINT8_C( 10), UINT8_C( 93), UINT8_C(101), UINT8_C( 65), UINT8_C(145), UINT8_C(231), UINT8_C( 42), UINT8_C(254), UINT8_C(104), UINT8_C(210), UINT8_C(226), UINT8_C(162), UINT8_C(118), UINT8_C( 69), UINT8_C( 80), UINT8_C(144) }, { UINT8_C(165), UINT8_C(101), UINT8_C( 36), UINT8_C( 80), UINT8_C(188), UINT8_C( 95), UINT8_C(196), UINT8_C(108), UINT8_C(195), UINT8_C(159), UINT8_C(137), UINT8_C(137), UINT8_C( 19), UINT8_C(177), UINT8_C(106), UINT8_C( 30) }, { UINT8_C(175), UINT8_C( 56), UINT8_C( 65), UINT8_C( 17), UINT8_C( 45), UINT8_C(184), UINT8_C(238), UINT8_C(146), UINT8_C(171), UINT8_C( 77), UINT8_C(107), UINT8_C( 43), UINT8_C(101), UINT8_C(244), UINT8_C( 58), UINT8_C(142) } }, { { UINT8_C( 14), UINT8_C(207), UINT8_C( 95), UINT8_C(159), UINT8_C(182), UINT8_C(137), UINT8_C(157), UINT8_C( 30), UINT8_C( 91), UINT8_C(127), UINT8_C(192), UINT8_C(210), UINT8_C(197), UINT8_C( 16), UINT8_C( 98), UINT8_C(106) }, { UINT8_C(118), UINT8_C(135), UINT8_C(186), UINT8_C( 50), UINT8_C(230), UINT8_C(127), UINT8_C(158), UINT8_C(169), UINT8_C( 30), UINT8_C( 40), UINT8_C( 51), UINT8_C( 50), UINT8_C(217), UINT8_C(157), UINT8_C( 80), UINT8_C(232) }, { UINT8_C(120), UINT8_C( 72), UINT8_C(229), UINT8_C(173), UINT8_C( 80), UINT8_C(246), UINT8_C( 3), UINT8_C(183), UINT8_C( 69), UINT8_C( 87), UINT8_C(243), UINT8_C(224), UINT8_C( 28), UINT8_C(141), UINT8_C( 50), UINT8_C(130) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_veorq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; } static int test_simde_veorq_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(40817), UINT16_C(53417), UINT16_C(32015), UINT16_C(44806), UINT16_C( 9027), UINT16_C(13174), UINT16_C(64941), UINT16_C(39222) }, { UINT16_C(48960), UINT16_C(16190), UINT16_C(49883), UINT16_C(20177), UINT16_C(31404), UINT16_C(58163), UINT16_C(36172), UINT16_C(48412) }, { UINT16_C( 8241), UINT16_C(61335), UINT16_C(49108), UINT16_C(57815), UINT16_C(23023), UINT16_C(53317), UINT16_C(28897), UINT16_C( 9258) } }, { { UINT16_C(50477), UINT16_C(15501), UINT16_C(37954), UINT16_C(34283), UINT16_C(25015), UINT16_C(26041), UINT16_C(61278), UINT16_C(40958) }, { UINT16_C(15534), UINT16_C(35294), UINT16_C(45055), UINT16_C(43991), UINT16_C( 2857), UINT16_C(30094), UINT16_C(43672), UINT16_C(50483) }, { UINT16_C(63875), UINT16_C(46419), UINT16_C(15293), UINT16_C(11836), UINT16_C(27294), UINT16_C( 4151), UINT16_C(17862), UINT16_C(23245) } }, { { UINT16_C(49264), UINT16_C(45569), UINT16_C(60756), UINT16_C( 3128), UINT16_C(61774), UINT16_C(44401), UINT16_C(28640), UINT16_C(36684) }, { UINT16_C(10924), UINT16_C(43800), UINT16_C(61657), UINT16_C( 598), UINT16_C(58619), UINT16_C(37751), UINT16_C(43663), UINT16_C(65369) }, { UINT16_C(60124), UINT16_C( 6425), UINT16_C( 7565), UINT16_C( 3694), UINT16_C( 5557), UINT16_C(15878), UINT16_C(50543), UINT16_C(28693) } }, { { UINT16_C(23147), UINT16_C(49073), UINT16_C(59719), UINT16_C(38603), UINT16_C(15578), UINT16_C(47939), UINT16_C(36780), UINT16_C(22602) }, { UINT16_C(25273), UINT16_C(37379), UINT16_C(22866), UINT16_C(19860), UINT16_C( 2877), UINT16_C(52449), UINT16_C(15030), UINT16_C( 8651) }, { UINT16_C(14546), UINT16_C(11698), UINT16_C(45077), UINT16_C(56159), UINT16_C(14311), UINT16_C(30626), UINT16_C(46362), UINT16_C(31105) } }, { { UINT16_C(32148), UINT16_C(56544), UINT16_C(44134), UINT16_C(16754), UINT16_C(46568), UINT16_C(38140), UINT16_C(17988), UINT16_C(65004) }, { UINT16_C(61352), UINT16_C(64399), UINT16_C( 9032), UINT16_C(34376), UINT16_C(10542), UINT16_C(58450), UINT16_C( 7779), UINT16_C(63493) }, { UINT16_C(37436), UINT16_C(10095), UINT16_C(36654), UINT16_C(51002), UINT16_C(40134), UINT16_C(28846), UINT16_C(22567), UINT16_C( 1513) } }, { { UINT16_C(59035), UINT16_C( 468), UINT16_C(18066), UINT16_C(31298), UINT16_C(16123), UINT16_C(16143), UINT16_C(64388), UINT16_C(11580) }, { UINT16_C(52203), UINT16_C(13096), UINT16_C(28910), UINT16_C( 7353), UINT16_C( 3226), UINT16_C(64769), UINT16_C( 1578), UINT16_C(50677) }, { UINT16_C(11632), UINT16_C(13052), UINT16_C(13948), UINT16_C(26363), UINT16_C(12897), UINT16_C(49678), UINT16_C(64942), UINT16_C(59593) } }, { { UINT16_C(51692), UINT16_C(32454), UINT16_C( 2319), UINT16_C( 2809), UINT16_C( 2119), UINT16_C(52297), UINT16_C(34051), UINT16_C(61177) }, { UINT16_C( 8528), UINT16_C(15906), UINT16_C(56209), UINT16_C(11099), UINT16_C(23783), UINT16_C( 4393), UINT16_C( 7778), UINT16_C(20438) }, { UINT16_C(59580), UINT16_C(16612), UINT16_C(53918), UINT16_C( 8610), UINT16_C(21664), UINT16_C(56672), UINT16_C(39777), UINT16_C(41263) } }, { { UINT16_C(40424), UINT16_C(63437), UINT16_C(50854), UINT16_C(60674), UINT16_C(19406), UINT16_C(53945), UINT16_C(45777), UINT16_C( 8640) }, { UINT16_C(58067), UINT16_C(25952), UINT16_C(48062), UINT16_C(42384), UINT16_C(47383), UINT16_C(31159), UINT16_C(36312), UINT16_C(49352) }, { UINT16_C(32571), UINT16_C(37549), UINT16_C(32024), UINT16_C(18578), UINT16_C(62169), UINT16_C(43790), UINT16_C(16137), UINT16_C(57608) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_veorq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_veorq_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(1830076854), UINT32_C(2624093226), UINT32_C(4093110100), UINT32_C( 698167114) }, { UINT32_C( 927289064), UINT32_C(1414399674), UINT32_C(4078960399), UINT32_C( 302114395) }, { UINT32_C(1515294558), UINT32_C(3357964944), UINT32_C( 15210587), UINT32_C(1000130833) } }, { { UINT32_C(3531544232), UINT32_C(3866027922), UINT32_C( 433677774), UINT32_C(2084730516) }, { UINT32_C(2142537924), UINT32_C(2647851662), UINT32_C(1553003009), UINT32_C(2171507417) }, { UINT32_C(2915802732), UINT32_C(2076042524), UINT32_C(1162450895), UINT32_C(4247577677) } }, { { UINT32_C( 978579112), UINT32_C(2753610453), UINT32_C(3166566695), UINT32_C( 859373423) }, { UINT32_C( 364047495), UINT32_C(4021454574), UINT32_C(1363952504), UINT32_C(2110962389) }, { UINT32_C( 803275311), UINT32_C(1267876923), UINT32_C(3992042079), UINT32_C(1323976122) } }, { { UINT32_C(2125932200), UINT32_C( 253941736), UINT32_C(1070325712), UINT32_C(1718748382) }, { UINT32_C(3749389808), UINT32_C( 600714923), UINT32_C(1182079601), UINT32_C(2109949909) }, { UINT32_C(2714501976), UINT32_C( 753727811), UINT32_C(2042545569), UINT32_C( 464601867) } }, { { UINT32_C(1442544237), UINT32_C( 560274769), UINT32_C(3680514300), UINT32_C( 625070644) }, { UINT32_C(2718219511), UINT32_C(1539756778), UINT32_C(3265346541), UINT32_C(4030686338) }, { UINT32_C(4160734874), UINT32_C(2057555899), UINT32_C( 432081681), UINT32_C(3581851318) } }, { { UINT32_C( 793066462), UINT32_C(1431349848), UINT32_C( 254849243), UINT32_C(2050257283) }, { UINT32_C( 404568109), UINT32_C(4168344331), UINT32_C(2696549662), UINT32_C(1485896057) }, { UINT32_C( 928515059), UINT32_C(2904770899), UINT32_C(2945099205), UINT32_C( 581208314) } }, { { UINT32_C(2374489652), UINT32_C(1541593216), UINT32_C( 191566472), UINT32_C(2961612675) }, { UINT32_C(3821577176), UINT32_C(2765831302), UINT32_C(3393492305), UINT32_C(3273840014) }, { UINT32_C(1850701292), UINT32_C(4281984006), UINT32_C(3241117657), UINT32_C(1940146701) } }, { { UINT32_C( 726706859), UINT32_C( 176632450), UINT32_C(3340169796), UINT32_C(1769446545) }, { UINT32_C(3310108735), UINT32_C(3446220668), UINT32_C(1268231612), UINT32_C( 755939970) }, { UINT32_C(3994872468), UINT32_C(3354269182), UINT32_C(2357288952), UINT32_C(1148790291) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_veorq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_veorq_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; uint64_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C( 8527705653078384895), UINT64_C(10179607844703931613) }, { UINT64_C(10192410360933839086), UINT64_C( 2403911898190977636) }, { UINT64_C(18098498112181702673), UINT64_C(12401033501679916729) } }, { { UINT64_C(10149867462929755977), UINT64_C( 6922438834607896094) }, { UINT64_C(11828856431356447074), UINT64_C(15918126349759228581) }, { UINT64_C( 2950729595619485227), UINT64_C(13617190798025235643) } }, { { UINT64_C(15129031179623276987), UINT64_C(17664637898287554899) }, { UINT64_C( 7842376392276655600), UINT64_C( 5427430808190387426) }, { UINT64_C(13628079889122729035), UINT64_C(13724575844232734129) } }, { { UINT64_C( 343024198562911037), UINT64_C( 7949959993762584523) }, { UINT64_C( 8962008892823233323), UINT64_C(16956972536245162616) }, { UINT64_C( 8691341630343114774), UINT64_C( 9583847818821076403) } }, { { UINT64_C( 9046779137311311831), UINT64_C( 7971704781101815221) }, { UINT64_C( 3110227796088078098), UINT64_C( 3957404440158477138) }, { UINT64_C( 6243433851020589253), UINT64_C( 6362083362209287911) } }, { { UINT64_C( 1854438940996454029), UINT64_C(15587219748785264543) }, { UINT64_C(13982179792934777132), UINT64_C( 5863314818597010889) }, { UINT64_C(15832092318380029857), UINT64_C( 9875901426487437910) } }, { { UINT64_C( 572322636551429492), UINT64_C( 6233608789164637667) }, { UINT64_C( 9087772245743765483), UINT64_C( 185216643220562046) }, { UINT64_C( 8786255496314493599), UINT64_C( 6057411476368383389) } }, { { UINT64_C( 5949478783376040482), UINT64_C(11495577361277811208) }, { UINT64_C( 9059581084911189638), UINT64_C( 9766426763440578809) }, { UINT64_C( 3398761491516660900), UINT64_C( 1729716309622659825) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_uint64x2_t r = simde_veorq_u64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(veor_s8) SIMDE_TEST_FUNC_LIST_ENTRY(veor_s16) SIMDE_TEST_FUNC_LIST_ENTRY(veor_s32) SIMDE_TEST_FUNC_LIST_ENTRY(veor_s64) SIMDE_TEST_FUNC_LIST_ENTRY(veor_u8) SIMDE_TEST_FUNC_LIST_ENTRY(veor_u16) SIMDE_TEST_FUNC_LIST_ENTRY(veor_u32) SIMDE_TEST_FUNC_LIST_ENTRY(veor_u64) SIMDE_TEST_FUNC_LIST_ENTRY(veorq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(veorq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(veorq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(veorq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(veorq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(veorq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(veorq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(veorq_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/ext.c000066400000000000000000002305441400333146700162730ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN ext #include "test-neon.h" #include "../../../simde/arm/neon/ext.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ static int test_simde_vext_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; simde_float32 b[2]; int n; simde_float32 r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( -687.45), SIMDE_FLOAT32_C( 362.88) }, { SIMDE_FLOAT32_C( 244.11), SIMDE_FLOAT32_C( 675.58) }, INT32_C( 0), { SIMDE_FLOAT32_C( -687.45), SIMDE_FLOAT32_C( 362.88) } }, { { SIMDE_FLOAT32_C( 707.43), SIMDE_FLOAT32_C( -78.71) }, { SIMDE_FLOAT32_C( 610.46), SIMDE_FLOAT32_C( -912.57) }, INT32_C( 1), { SIMDE_FLOAT32_C( -78.71), SIMDE_FLOAT32_C( 610.46) } }, { { SIMDE_FLOAT32_C( 754.89), SIMDE_FLOAT32_C( 993.62) }, { SIMDE_FLOAT32_C( -570.81), SIMDE_FLOAT32_C( -534.46) }, INT32_C( 0), { SIMDE_FLOAT32_C( 754.89), SIMDE_FLOAT32_C( 993.62) } }, { { SIMDE_FLOAT32_C( -147.63), SIMDE_FLOAT32_C( -385.01) }, { SIMDE_FLOAT32_C( 570.33), SIMDE_FLOAT32_C( -977.64) }, INT32_C( 1), { SIMDE_FLOAT32_C( -385.01), SIMDE_FLOAT32_C( 570.33) } }, { { SIMDE_FLOAT32_C( -414.19), SIMDE_FLOAT32_C( -824.34) }, { SIMDE_FLOAT32_C( -892.91), SIMDE_FLOAT32_C( -280.78) }, INT32_C( 0), { SIMDE_FLOAT32_C( -414.19), SIMDE_FLOAT32_C( -824.34) } }, { { SIMDE_FLOAT32_C( -659.90), SIMDE_FLOAT32_C( 892.24) }, { SIMDE_FLOAT32_C( -796.45), SIMDE_FLOAT32_C( -383.54) }, INT32_C( 1), { SIMDE_FLOAT32_C( 892.24), SIMDE_FLOAT32_C( -796.45) } }, { { SIMDE_FLOAT32_C( -29.63), SIMDE_FLOAT32_C( -636.99) }, { SIMDE_FLOAT32_C( -37.62), SIMDE_FLOAT32_C( -62.96) }, INT32_C( 0), { SIMDE_FLOAT32_C( -29.63), SIMDE_FLOAT32_C( -636.99) } }, { { SIMDE_FLOAT32_C( -107.40), SIMDE_FLOAT32_C( 234.54) }, { SIMDE_FLOAT32_C( 866.55), SIMDE_FLOAT32_C( 205.15) }, INT32_C( 1), { SIMDE_FLOAT32_C( 234.54), SIMDE_FLOAT32_C( 866.55) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); int n = test_vec[i].n; simde_float32x2_t r; SIMDE_CONSTIFY_2_(simde_vext_f32, r, (HEDLEY_UNREACHABLE(), a), n, a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vext_f32(a, b, lanes[i]); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vext_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[1]; simde_float64 b[1]; simde_float64 r[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( -680.82) }, { SIMDE_FLOAT64_C( 28.23) }, { SIMDE_FLOAT64_C( -680.82) } }, { { SIMDE_FLOAT64_C( 422.94) }, { SIMDE_FLOAT64_C( -433.13) }, { SIMDE_FLOAT64_C( 422.94) } }, { { SIMDE_FLOAT64_C( -224.72) }, { SIMDE_FLOAT64_C( -962.86) }, { SIMDE_FLOAT64_C( -224.72) } }, { { SIMDE_FLOAT64_C( 366.48) }, { SIMDE_FLOAT64_C( -406.43) }, { SIMDE_FLOAT64_C( 366.48) } }, { { SIMDE_FLOAT64_C( 712.41) }, { SIMDE_FLOAT64_C( 528.86) }, { SIMDE_FLOAT64_C( 712.41) } }, { { SIMDE_FLOAT64_C( 609.88) }, { SIMDE_FLOAT64_C( -935.77) }, { SIMDE_FLOAT64_C( 609.88) } }, { { SIMDE_FLOAT64_C( 202.19) }, { SIMDE_FLOAT64_C( 313.38) }, { SIMDE_FLOAT64_C( 202.19) } }, { { SIMDE_FLOAT64_C( -424.80) }, { SIMDE_FLOAT64_C( 680.39) }, { SIMDE_FLOAT64_C( -424.80) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_t b = simde_vld1_f64(test_vec[i].b); simde_float64x1_t r = simde_vext_f64(a, b, 0); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x1_t a = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_float64x1_t b = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_float64x1_t r = simde_vext_f64(a, b, 0); simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, n, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vext_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t b[8]; int n; int8_t r[8]; } test_vec[] = { { { -INT8_C( 97), -INT8_C( 53), -INT8_C( 60), INT8_C( 90), INT8_MIN, -INT8_C( 105), INT8_C( 27), -INT8_C( 34) }, { -INT8_C( 25), -INT8_C( 82), -INT8_C( 56), INT8_C( 95), -INT8_C( 75), INT8_C( 9), -INT8_C( 5), -INT8_C( 102) }, INT32_C( 0), { -INT8_C( 97), -INT8_C( 53), -INT8_C( 60), INT8_C( 90), INT8_MIN, -INT8_C( 105), INT8_C( 27), -INT8_C( 34) } }, { { INT8_C( 76), INT8_C( 122), -INT8_C( 21), -INT8_C( 77), -INT8_C( 52), INT8_C( 88), -INT8_C( 109), -INT8_C( 51) }, { -INT8_C( 91), INT8_C( 82), -INT8_C( 77), -INT8_C( 45), INT8_C( 24), -INT8_C( 52), -INT8_C( 41), -INT8_C( 73) }, INT32_C( 1), { INT8_C( 122), -INT8_C( 21), -INT8_C( 77), -INT8_C( 52), INT8_C( 88), -INT8_C( 109), -INT8_C( 51), -INT8_C( 91) } }, { { -INT8_C( 104), -INT8_C( 101), INT8_C( 17), INT8_C( 24), INT8_C( 50), INT8_C( 44), -INT8_C( 10), INT8_C( 25) }, { -INT8_C( 37), -INT8_C( 66), INT8_C( 121), -INT8_C( 112), -INT8_C( 57), INT8_C( 116), INT8_C( 42), INT8_C( 19) }, INT32_C( 2), { INT8_C( 17), INT8_C( 24), INT8_C( 50), INT8_C( 44), -INT8_C( 10), INT8_C( 25), -INT8_C( 37), -INT8_C( 66) } }, { { -INT8_C( 18), INT8_C( 22), -INT8_C( 58), -INT8_C( 69), INT8_C( 110), INT8_C( 90), -INT8_C( 120), INT8_C( 20) }, { -INT8_C( 84), INT8_C( 59), -INT8_C( 25), -INT8_C( 60), INT8_C( 8), -INT8_C( 66), INT8_C( 123), -INT8_C( 96) }, INT32_C( 3), { -INT8_C( 69), INT8_C( 110), INT8_C( 90), -INT8_C( 120), INT8_C( 20), -INT8_C( 84), INT8_C( 59), -INT8_C( 25) } }, { { INT8_C( 90), -INT8_C( 116), -INT8_C( 72), -INT8_C( 116), -INT8_C( 72), -INT8_C( 82), -INT8_C( 90), -INT8_C( 109) }, { INT8_C( 108), INT8_C( 31), INT8_C( 35), INT8_C( 52), -INT8_C( 109), INT8_C( 78), INT8_C( 71), -INT8_C( 126) }, INT32_C( 4), { -INT8_C( 72), -INT8_C( 82), -INT8_C( 90), -INT8_C( 109), INT8_C( 108), INT8_C( 31), INT8_C( 35), INT8_C( 52) } }, { { INT8_C( 100), INT8_C( 14), INT8_C( 61), -INT8_C( 46), INT8_C( 104), -INT8_C( 59), -INT8_C( 26), INT8_C( 20) }, { INT8_C( 0), -INT8_C( 50), -INT8_C( 40), INT8_C( 8), -INT8_C( 116), INT8_C( 83), -INT8_C( 88), -INT8_C( 26) }, INT32_C( 5), { -INT8_C( 59), -INT8_C( 26), INT8_C( 20), INT8_C( 0), -INT8_C( 50), -INT8_C( 40), INT8_C( 8), -INT8_C( 116) } }, { { -INT8_C( 33), INT8_C( 96), INT8_C( 115), -INT8_C( 105), INT8_C( 14), INT8_C( 25), INT8_C( 43), INT8_C( 123) }, { INT8_C( 56), INT8_C( 78), -INT8_C( 81), -INT8_C( 53), -INT8_C( 100), -INT8_C( 10), INT8_C( 77), INT8_C( 0) }, INT32_C( 6), { INT8_C( 43), INT8_C( 123), INT8_C( 56), INT8_C( 78), -INT8_C( 81), -INT8_C( 53), -INT8_C( 100), -INT8_C( 10) } }, { { INT8_C( 4), -INT8_C( 118), -INT8_C( 45), INT8_C( 108), INT8_C( 79), -INT8_C( 71), INT8_MIN, INT8_C( 80) }, { -INT8_C( 121), INT8_C( 88), INT8_C( 88), INT8_C( 20), -INT8_C( 85), INT8_C( 1), -INT8_C( 6), -INT8_C( 118) }, INT32_C( 7), { INT8_C( 80), -INT8_C( 121), INT8_C( 88), INT8_C( 88), INT8_C( 20), -INT8_C( 85), INT8_C( 1), -INT8_C( 6) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); int n = test_vec[i].n; simde_int8x8_t r; SIMDE_CONSTIFY_8_(simde_vext_s8, r, (HEDLEY_UNREACHABLE(), a), n, a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 2, 3, 4, 5, 6, 7 }; for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); simde_int8x8_t r = simde_vext_s8(a, b, lanes[i]); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vext_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t b[4]; int n; int16_t r[4]; } test_vec[] = { { { INT16_C( 30207), -INT16_C( 2757), INT16_C( 19340), -INT16_C( 16644) }, { -INT16_C( 30278), INT16_C( 18083), INT16_C( 5855), -INT16_C( 26433) }, INT32_C( 0), { INT16_C( 30207), -INT16_C( 2757), INT16_C( 19340), -INT16_C( 16644) } }, { { -INT16_C( 11351), -INT16_C( 407), -INT16_C( 17343), -INT16_C( 8726) }, { INT16_C( 11508), -INT16_C( 2305), INT16_C( 3599), INT16_C( 3665) }, INT32_C( 1), { -INT16_C( 407), -INT16_C( 17343), -INT16_C( 8726), INT16_C( 11508) } }, { { -INT16_C( 29565), INT16_C( 3843), -INT16_C( 41), -INT16_C( 28210) }, { INT16_C( 29065), INT16_C( 26839), -INT16_C( 27000), INT16_C( 12544) }, INT32_C( 2), { -INT16_C( 41), -INT16_C( 28210), INT16_C( 29065), INT16_C( 26839) } }, { { INT16_C( 26985), -INT16_C( 21712), INT16_C( 6694), INT16_C( 6792) }, { -INT16_C( 30906), INT16_C( 22032), INT16_C( 24981), INT16_C( 6244) }, INT32_C( 3), { INT16_C( 6792), -INT16_C( 30906), INT16_C( 22032), INT16_C( 24981) } }, { { INT16_C( 26861), -INT16_C( 15064), -INT16_C( 2457), -INT16_C( 4010) }, { INT16_C( 11879), -INT16_C( 4263), INT16_C( 22980), INT16_C( 11809) }, INT32_C( 0), { INT16_C( 26861), -INT16_C( 15064), -INT16_C( 2457), -INT16_C( 4010) } }, { { INT16_C( 20931), -INT16_C( 5671), INT16_C( 24939), -INT16_C( 19965) }, { INT16_C( 5352), INT16_C( 32264), INT16_C( 27765), INT16_C( 25494) }, INT32_C( 1), { -INT16_C( 5671), INT16_C( 24939), -INT16_C( 19965), INT16_C( 5352) } }, { { -INT16_C( 16684), INT16_C( 15400), INT16_C( 32436), INT16_C( 7212) }, { -INT16_C( 31316), INT16_C( 28939), INT16_C( 11487), -INT16_C( 23905) }, INT32_C( 2), { INT16_C( 32436), INT16_C( 7212), -INT16_C( 31316), INT16_C( 28939) } }, { { INT16_C( 30845), -INT16_C( 5749), -INT16_C( 28967), -INT16_C( 15973) }, { -INT16_C( 23646), INT16_C( 6207), -INT16_C( 10737), -INT16_C( 7045) }, INT32_C( 3), { -INT16_C( 15973), -INT16_C( 23646), INT16_C( 6207), -INT16_C( 10737) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); int n = test_vec[i].n; simde_int16x4_t r; SIMDE_CONSTIFY_4_(simde_vext_s16, r, (HEDLEY_UNREACHABLE(), a), n, a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 2, 3, 0, 1, 2, 3 }; for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); simde_int16x4_t r = simde_vext_s16(a, b, lanes[i]); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vext_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t b[2]; int n; int32_t r[2]; } test_vec[] = { { { INT32_C( 957350431), -INT32_C( 603021787) }, { -INT32_C( 1074644418), INT32_C( 794553918) }, INT32_C( 0), { INT32_C( 957350431), -INT32_C( 603021787) } }, { { INT32_C( 443957338), -INT32_C( 1718071061) }, { INT32_C( 1279020983), -INT32_C( 1469355383) }, INT32_C( 1), { -INT32_C( 1718071061), INT32_C( 1279020983) } }, { { -INT32_C( 1847493524), INT32_C( 1483599898) }, { INT32_C( 1746362154), -INT32_C( 1483181491) }, INT32_C( 0), { -INT32_C( 1847493524), INT32_C( 1483599898) } }, { { -INT32_C( 1631449422), INT32_C( 372726367) }, { INT32_C( 862090154), INT32_C( 1239207389) }, INT32_C( 1), { INT32_C( 372726367), INT32_C( 862090154) } }, { { INT32_C( 1675279689), -INT32_C( 675526739) }, { -INT32_C( 197078105), -INT32_C( 123938746) }, INT32_C( 0), { INT32_C( 1675279689), -INT32_C( 675526739) } }, { { INT32_C( 1167482598), INT32_C( 1650183608) }, { INT32_C( 513195329), -INT32_C( 731352437) }, INT32_C( 1), { INT32_C( 1650183608), INT32_C( 513195329) } }, { { -INT32_C( 583515601), INT32_C( 833942666) }, { INT32_C( 220591303), -INT32_C( 1291402804) }, INT32_C( 0), { -INT32_C( 583515601), INT32_C( 833942666) } }, { { -INT32_C( 654795745), -INT32_C( 1422240662) }, { -INT32_C( 1664495599), INT32_C( 1920020802) }, INT32_C( 1), { -INT32_C( 1422240662), -INT32_C( 1664495599) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); int n = test_vec[i].n; simde_int32x2_t r; SIMDE_CONSTIFY_2_(simde_vext_s32, r, (HEDLEY_UNREACHABLE(), a), n, a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); simde_int32x2_t r = simde_vext_s32(a, b, lanes[i]); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vext_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[1]; int64_t b[1]; int64_t r[1]; } test_vec[] = { { { INT64_C( 6639490098744952062) }, { -INT64_C( 906714779595733515) }, { INT64_C( 6639490098744952062) } }, { { -INT64_C( 1175976699607320186) }, { INT64_C( 8022877348809296326) }, { -INT64_C( 1175976699607320186) } }, { { -INT64_C( 1373935476923645568) }, { -INT64_C( 4541150998765402287) }, { -INT64_C( 1373935476923645568) } }, { { -INT64_C( 7139179752814035311) }, { -INT64_C( 5281588533336655149) }, { -INT64_C( 7139179752814035311) } }, { { INT64_C( 8839218246575480008) }, { INT64_C( 123985814383417533) }, { INT64_C( 8839218246575480008) } }, { { -INT64_C( 1103968923059055813) }, { INT64_C( 3319971401639943717) }, { -INT64_C( 1103968923059055813) } }, { { INT64_C( 5613904681621084741) }, { INT64_C( 2513849614873160305) }, { INT64_C( 5613904681621084741) } }, { { -INT64_C( 1180540366207328112) }, { -INT64_C( 12563317733469889) }, { -INT64_C( 1180540366207328112) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_int64x1_t r = simde_vext_s64(a, b, 0); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_t a = simde_test_arm_neon_random_i64x1(); simde_int64x1_t b = simde_test_arm_neon_random_i64x1(); simde_int64x1_t r = simde_vext_s64(a, b, 0); simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, 0, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vext_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint8_t b[8]; int n; uint8_t r[8]; } test_vec[] = { { { UINT8_C( 41), UINT8_C(252), UINT8_C( 40), UINT8_C( 64), UINT8_C(135), UINT8_C(135), UINT8_C(244), UINT8_C(115) }, { UINT8_C(156), UINT8_C(144), UINT8_C( 83), UINT8_C(200), UINT8_C(190), UINT8_C( 18), UINT8_C(166), UINT8_C(155) }, INT32_C( 0), { UINT8_C( 41), UINT8_C(252), UINT8_C( 40), UINT8_C( 64), UINT8_C(135), UINT8_C(135), UINT8_C(244), UINT8_C(115) } }, { { UINT8_C( 9), UINT8_C(208), UINT8_C(167), UINT8_C( 64), UINT8_C(179), UINT8_C( 73), UINT8_C(177), UINT8_C(189) }, { UINT8_C(152), UINT8_C( 56), UINT8_C(227), UINT8_C(221), UINT8_C(232), UINT8_C(138), UINT8_C(102), UINT8_C( 17) }, INT32_C( 1), { UINT8_C(208), UINT8_C(167), UINT8_C( 64), UINT8_C(179), UINT8_C( 73), UINT8_C(177), UINT8_C(189), UINT8_C(152) } }, { { UINT8_C(135), UINT8_C(142), UINT8_C( 81), UINT8_C( 14), UINT8_C( 21), UINT8_C( 70), UINT8_C(129), UINT8_C(177) }, { UINT8_C(214), UINT8_C(212), UINT8_C(122), UINT8_C(148), UINT8_C(230), UINT8_C( 32), UINT8_C( 47), UINT8_C(239) }, INT32_C( 2), { UINT8_C( 81), UINT8_C( 14), UINT8_C( 21), UINT8_C( 70), UINT8_C(129), UINT8_C(177), UINT8_C(214), UINT8_C(212) } }, { { UINT8_C(241), UINT8_C(215), UINT8_C( 48), UINT8_C(164), UINT8_C( 32), UINT8_C(225), UINT8_C( 98), UINT8_C(184) }, { UINT8_C( 25), UINT8_C( 69), UINT8_C(149), UINT8_C( 1), UINT8_C(208), UINT8_C(251), UINT8_C( 18), UINT8_C( 87) }, INT32_C( 3), { UINT8_C(164), UINT8_C( 32), UINT8_C(225), UINT8_C( 98), UINT8_C(184), UINT8_C( 25), UINT8_C( 69), UINT8_C(149) } }, { { UINT8_C(137), UINT8_C(100), UINT8_C(101), UINT8_C(159), UINT8_C(170), UINT8_C(231), UINT8_C( 80), UINT8_C(128) }, { UINT8_C(187), UINT8_C(202), UINT8_C( 21), UINT8_C(162), UINT8_C(235), UINT8_C( 68), UINT8_C(145), UINT8_C(220) }, INT32_C( 4), { UINT8_C(170), UINT8_C(231), UINT8_C( 80), UINT8_C(128), UINT8_C(187), UINT8_C(202), UINT8_C( 21), UINT8_C(162) } }, { { UINT8_C( 27), UINT8_C(193), UINT8_C(128), UINT8_C( 60), UINT8_C(163), UINT8_C(226), UINT8_C(244), UINT8_C(188) }, { UINT8_C( 40), UINT8_C(138), UINT8_C(190), UINT8_C(248), UINT8_C(133), UINT8_C(208), UINT8_C( 79), UINT8_C( 15) }, INT32_C( 5), { UINT8_C(226), UINT8_C(244), UINT8_C(188), UINT8_C( 40), UINT8_C(138), UINT8_C(190), UINT8_C(248), UINT8_C(133) } }, { { UINT8_C( 52), UINT8_C(180), UINT8_C(174), UINT8_C(222), UINT8_C(155), UINT8_C(254), UINT8_C( 95), UINT8_C( 87) }, { UINT8_C(201), UINT8_C(116), UINT8_C(249), UINT8_C(180), UINT8_C(184), UINT8_C(138), UINT8_C(144), UINT8_C(212) }, INT32_C( 6), { UINT8_C( 95), UINT8_C( 87), UINT8_C(201), UINT8_C(116), UINT8_C(249), UINT8_C(180), UINT8_C(184), UINT8_C(138) } }, { { UINT8_C( 76), UINT8_C( 16), UINT8_C( 16), UINT8_C(239), UINT8_C(243), UINT8_C( 4), UINT8_C(171), UINT8_C( 27) }, { UINT8_C(142), UINT8_C(105), UINT8_C( 19), UINT8_C( 20), UINT8_C( 58), UINT8_C( 98), UINT8_C( 35), UINT8_C(110) }, INT32_C( 7), { UINT8_C( 27), UINT8_C(142), UINT8_C(105), UINT8_C( 19), UINT8_C( 20), UINT8_C( 58), UINT8_C( 98), UINT8_C( 35) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); int n = test_vec[i].n; simde_uint8x8_t r; SIMDE_CONSTIFY_8_(simde_vext_u8, r, (HEDLEY_UNREACHABLE(), a), n, a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 2, 3, 4, 5, 6, 7 }; for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t r = simde_vext_u8(a, b, lanes[i]); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vext_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t b[4]; int n; uint16_t r[4]; } test_vec[] = { { { UINT16_C(16781), UINT16_C( 83), UINT16_C(49358), UINT16_C(43895) }, { UINT16_C(63669), UINT16_C(33927), UINT16_C(10175), UINT16_C(15472) }, INT32_C( 0), { UINT16_C(16781), UINT16_C( 83), UINT16_C(49358), UINT16_C(43895) } }, { { UINT16_C(10362), UINT16_C(59736), UINT16_C(10921), UINT16_C(40640) }, { UINT16_C(39613), UINT16_C(56230), UINT16_C(57753), UINT16_C( 9976) }, INT32_C( 1), { UINT16_C(59736), UINT16_C(10921), UINT16_C(40640), UINT16_C(39613) } }, { { UINT16_C(19490), UINT16_C(61734), UINT16_C(40204), UINT16_C(49564) }, { UINT16_C( 9109), UINT16_C(21829), UINT16_C(46411), UINT16_C(50577) }, INT32_C( 2), { UINT16_C(40204), UINT16_C(49564), UINT16_C( 9109), UINT16_C(21829) } }, { { UINT16_C(59869), UINT16_C(34478), UINT16_C(28180), UINT16_C(53540) }, { UINT16_C(51721), UINT16_C(41644), UINT16_C(42411), UINT16_C(52936) }, INT32_C( 3), { UINT16_C(53540), UINT16_C(51721), UINT16_C(41644), UINT16_C(42411) } }, { { UINT16_C(61425), UINT16_C(64959), UINT16_C(23436), UINT16_C( 8894) }, { UINT16_C( 894), UINT16_C(51575), UINT16_C( 2233), UINT16_C(38542) }, INT32_C( 0), { UINT16_C(61425), UINT16_C(64959), UINT16_C(23436), UINT16_C( 8894) } }, { { UINT16_C(15601), UINT16_C( 1309), UINT16_C(16811), UINT16_C(46294) }, { UINT16_C(33548), UINT16_C(46934), UINT16_C( 7720), UINT16_C( 6533) }, INT32_C( 1), { UINT16_C( 1309), UINT16_C(16811), UINT16_C(46294), UINT16_C(33548) } }, { { UINT16_C(17421), UINT16_C(39446), UINT16_C(54431), UINT16_C( 7868) }, { UINT16_C(13271), UINT16_C(37095), UINT16_C(30267), UINT16_C(11303) }, INT32_C( 2), { UINT16_C(54431), UINT16_C( 7868), UINT16_C(13271), UINT16_C(37095) } }, { { UINT16_C(17586), UINT16_C(23858), UINT16_C( 2181), UINT16_C(37137) }, { UINT16_C(26507), UINT16_C(45897), UINT16_C(52870), UINT16_C(37836) }, INT32_C( 3), { UINT16_C(37137), UINT16_C(26507), UINT16_C(45897), UINT16_C(52870) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); int n = test_vec[i].n; simde_uint16x4_t r; SIMDE_CONSTIFY_4_(simde_vext_u16, r, (HEDLEY_UNREACHABLE(), a), n, a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 2, 3, 0, 1, 2, 3 }; for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t r = simde_vext_u16(a, b, lanes[i]); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vext_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint32_t b[2]; int n; uint32_t r[2]; } test_vec[] = { { { UINT32_C(2921936545), UINT32_C(1836714120) }, { UINT32_C(3960998402), UINT32_C(1184252007) }, INT32_C( 0), { UINT32_C(2921936545), UINT32_C(1836714120) } }, { { UINT32_C(1465699894), UINT32_C(1555702821) }, { UINT32_C( 496622738), UINT32_C(3470628141) }, INT32_C( 1), { UINT32_C(1555702821), UINT32_C( 496622738) } }, { { UINT32_C(1400702923), UINT32_C( 297858831) }, { UINT32_C(1778309378), UINT32_C(1336906777) }, INT32_C( 0), { UINT32_C(1400702923), UINT32_C( 297858831) } }, { { UINT32_C(2275806050), UINT32_C(3269681200) }, { UINT32_C(1793031485), UINT32_C(3711482898) }, INT32_C( 1), { UINT32_C(3269681200), UINT32_C(1793031485) } }, { { UINT32_C(3526407619), UINT32_C(2951016877) }, { UINT32_C(3810058954), UINT32_C(3627206518) }, INT32_C( 0), { UINT32_C(3526407619), UINT32_C(2951016877) } }, { { UINT32_C( 56678611), UINT32_C(1992639289) }, { UINT32_C(3537937600), UINT32_C( 615454816) }, INT32_C( 1), { UINT32_C(1992639289), UINT32_C(3537937600) } }, { { UINT32_C(2079776974), UINT32_C(2603277009) }, { UINT32_C( 847135420), UINT32_C(3691753481) }, INT32_C( 0), { UINT32_C(2079776974), UINT32_C(2603277009) } }, { { UINT32_C(3252644744), UINT32_C(1865917614) }, { UINT32_C(2839615304), UINT32_C(4274909488) }, INT32_C( 1), { UINT32_C(1865917614), UINT32_C(2839615304) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); int n = test_vec[i].n; simde_uint32x2_t r; SIMDE_CONSTIFY_2_(simde_vext_u32, r, (HEDLEY_UNREACHABLE(), a), n, a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t r = simde_vext_u32(a, b, lanes[i]); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vext_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[1]; uint64_t b[1]; uint64_t r[1]; } test_vec[] = { { { UINT64_C( 4775218924999393801) }, { UINT64_C(14341243545373652668) }, { UINT64_C( 4775218924999393801) } }, { { UINT64_C( 9139095911556337710) }, { UINT64_C( 1794718599694819428) }, { UINT64_C( 9139095911556337710) } }, { { UINT64_C(17254207385035945953) }, { UINT64_C( 6000040611349516152) }, { UINT64_C(17254207385035945953) } }, { { UINT64_C(14523168050340220606) }, { UINT64_C( 2697498136351876169) }, { UINT64_C(14523168050340220606) } }, { { UINT64_C( 2867607344887786920) }, { UINT64_C( 6706397092927255193) }, { UINT64_C( 2867607344887786920) } }, { { UINT64_C(15556831202562008502) }, { UINT64_C( 9620707902598573083) }, { UINT64_C(15556831202562008502) } }, { { UINT64_C( 4386835494242146571) }, { UINT64_C( 2547361106271831389) }, { UINT64_C( 4386835494242146571) } }, { { UINT64_C(15516627759135639025) }, { UINT64_C( 3310630758589564047) }, { UINT64_C(15516627759135639025) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_t b = simde_vld1_u64(test_vec[i].b); simde_uint64x1_t r = simde_vext_u64(a, b, 0); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x1_t a = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t b = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t r = simde_vext_u64(a, b, 0); simde_test_arm_neon_write_u64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, 0, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vextq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; simde_float32 b[4]; int n; simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -11.82), SIMDE_FLOAT32_C( -542.24), SIMDE_FLOAT32_C( 904.95), SIMDE_FLOAT32_C( 599.34) }, { SIMDE_FLOAT32_C( -323.53), SIMDE_FLOAT32_C( 591.26), SIMDE_FLOAT32_C( -753.72), SIMDE_FLOAT32_C( 19.62) }, INT32_C( 0), { SIMDE_FLOAT32_C( -11.82), SIMDE_FLOAT32_C( -542.24), SIMDE_FLOAT32_C( 904.95), SIMDE_FLOAT32_C( 599.34) } }, { { SIMDE_FLOAT32_C( -246.51), SIMDE_FLOAT32_C( -600.23), SIMDE_FLOAT32_C( -164.99), SIMDE_FLOAT32_C( -616.01) }, { SIMDE_FLOAT32_C( -788.95), SIMDE_FLOAT32_C( -648.59), SIMDE_FLOAT32_C( 630.30), SIMDE_FLOAT32_C( 886.86) }, INT32_C( 1), { SIMDE_FLOAT32_C( -600.23), SIMDE_FLOAT32_C( -164.99), SIMDE_FLOAT32_C( -616.01), SIMDE_FLOAT32_C( -788.95) } }, { { SIMDE_FLOAT32_C( -83.54), SIMDE_FLOAT32_C( -412.91), SIMDE_FLOAT32_C( -86.28), SIMDE_FLOAT32_C( 790.95) }, { SIMDE_FLOAT32_C( 194.71), SIMDE_FLOAT32_C( -239.80), SIMDE_FLOAT32_C( -880.77), SIMDE_FLOAT32_C( -974.84) }, INT32_C( 2), { SIMDE_FLOAT32_C( -86.28), SIMDE_FLOAT32_C( 790.95), SIMDE_FLOAT32_C( 194.71), SIMDE_FLOAT32_C( -239.80) } }, { { SIMDE_FLOAT32_C( 572.07), SIMDE_FLOAT32_C( 171.25), SIMDE_FLOAT32_C( 700.50), SIMDE_FLOAT32_C( -266.41) }, { SIMDE_FLOAT32_C( 107.61), SIMDE_FLOAT32_C( -395.18), SIMDE_FLOAT32_C( -409.82), SIMDE_FLOAT32_C( -904.21) }, INT32_C( 3), { SIMDE_FLOAT32_C( -266.41), SIMDE_FLOAT32_C( 107.61), SIMDE_FLOAT32_C( -395.18), SIMDE_FLOAT32_C( -409.82) } }, { { SIMDE_FLOAT32_C( 62.58), SIMDE_FLOAT32_C( -504.88), SIMDE_FLOAT32_C( 695.13), SIMDE_FLOAT32_C( 739.05) }, { SIMDE_FLOAT32_C( -913.62), SIMDE_FLOAT32_C( 941.41), SIMDE_FLOAT32_C( -241.33), SIMDE_FLOAT32_C( -160.12) }, INT32_C( 0), { SIMDE_FLOAT32_C( 62.58), SIMDE_FLOAT32_C( -504.88), SIMDE_FLOAT32_C( 695.13), SIMDE_FLOAT32_C( 739.05) } }, { { SIMDE_FLOAT32_C( -658.83), SIMDE_FLOAT32_C( 593.68), SIMDE_FLOAT32_C( 223.86), SIMDE_FLOAT32_C( -447.78) }, { SIMDE_FLOAT32_C( 945.09), SIMDE_FLOAT32_C( -145.84), SIMDE_FLOAT32_C( -560.91), SIMDE_FLOAT32_C( -138.45) }, INT32_C( 1), { SIMDE_FLOAT32_C( 593.68), SIMDE_FLOAT32_C( 223.86), SIMDE_FLOAT32_C( -447.78), SIMDE_FLOAT32_C( 945.09) } }, { { SIMDE_FLOAT32_C( 441.26), SIMDE_FLOAT32_C( 352.81), SIMDE_FLOAT32_C( -347.49), SIMDE_FLOAT32_C( -364.04) }, { SIMDE_FLOAT32_C( -887.00), SIMDE_FLOAT32_C( -228.26), SIMDE_FLOAT32_C( -338.88), SIMDE_FLOAT32_C( 685.07) }, INT32_C( 2), { SIMDE_FLOAT32_C( -347.49), SIMDE_FLOAT32_C( -364.04), SIMDE_FLOAT32_C( -887.00), SIMDE_FLOAT32_C( -228.26) } }, { { SIMDE_FLOAT32_C( 942.99), SIMDE_FLOAT32_C( -638.38), SIMDE_FLOAT32_C( -581.34), SIMDE_FLOAT32_C( 50.60) }, { SIMDE_FLOAT32_C( -33.56), SIMDE_FLOAT32_C( 8.84), SIMDE_FLOAT32_C( 146.38), SIMDE_FLOAT32_C( -970.98) }, INT32_C( 3), { SIMDE_FLOAT32_C( 50.60), SIMDE_FLOAT32_C( -33.56), SIMDE_FLOAT32_C( 8.84), SIMDE_FLOAT32_C( 146.38) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); int n = test_vec[i].n; simde_float32x4_t r; SIMDE_CONSTIFY_4_(simde_vextq_f32, r, (HEDLEY_UNREACHABLE(), a), n, a, b); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 2, 3, 0, 1, 2, 3, }; for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vextq_f32(a, b, lanes[i]); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vextq_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[2]; simde_float64 b[2]; int n; simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 223.42), SIMDE_FLOAT64_C( -836.30) }, { SIMDE_FLOAT64_C( -552.33), SIMDE_FLOAT64_C( -75.69) }, INT32_C( 0), { SIMDE_FLOAT64_C( 223.42), SIMDE_FLOAT64_C( -836.30) } }, { { SIMDE_FLOAT64_C( 756.82), SIMDE_FLOAT64_C( -550.05) }, { SIMDE_FLOAT64_C( 687.41), SIMDE_FLOAT64_C( 804.17) }, INT32_C( 0), { SIMDE_FLOAT64_C( 756.82), SIMDE_FLOAT64_C( -550.05) } }, { { SIMDE_FLOAT64_C( -230.29), SIMDE_FLOAT64_C( -327.40) }, { SIMDE_FLOAT64_C( 614.24), SIMDE_FLOAT64_C( -107.90) }, INT32_C( 0), { SIMDE_FLOAT64_C( -230.29), SIMDE_FLOAT64_C( -327.40) } }, { { SIMDE_FLOAT64_C( -177.24), SIMDE_FLOAT64_C( -842.50) }, { SIMDE_FLOAT64_C( 773.97), SIMDE_FLOAT64_C( 644.31) }, INT32_C( 0), { SIMDE_FLOAT64_C( -177.24), SIMDE_FLOAT64_C( -842.50) } }, { { SIMDE_FLOAT64_C( -485.54), SIMDE_FLOAT64_C( -253.40) }, { SIMDE_FLOAT64_C( -930.32), SIMDE_FLOAT64_C( 677.29) }, INT32_C( 0), { SIMDE_FLOAT64_C( -485.54), SIMDE_FLOAT64_C( -253.40) } }, { { SIMDE_FLOAT64_C( 819.98), SIMDE_FLOAT64_C( 883.30) }, { SIMDE_FLOAT64_C( -760.51), SIMDE_FLOAT64_C( 669.54) }, INT32_C( 0), { SIMDE_FLOAT64_C( 819.98), SIMDE_FLOAT64_C( 883.30) } }, { { SIMDE_FLOAT64_C( -349.64), SIMDE_FLOAT64_C( 465.41) }, { SIMDE_FLOAT64_C( -828.45), SIMDE_FLOAT64_C( -240.64) }, INT32_C( 0), { SIMDE_FLOAT64_C( -349.64), SIMDE_FLOAT64_C( 465.41) } }, { { SIMDE_FLOAT64_C( -629.75), SIMDE_FLOAT64_C( 891.13) }, { SIMDE_FLOAT64_C( -550.52), SIMDE_FLOAT64_C( 593.67) }, INT32_C( 0), { SIMDE_FLOAT64_C( -629.75), SIMDE_FLOAT64_C( 891.13) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); int n = test_vec[i].n; simde_float64x2_t r; SIMDE_CONSTIFY_2_(simde_vextq_f64, r, (HEDLEY_UNREACHABLE(), a), n, a, b); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x2_t a = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); simde_float64x2_t b = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); simde_float64x2_t r = simde_vextq_f64(a, b, 0); simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, 0, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vextq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t b[16]; int n; int8_t r[16]; } test_vec[] = { { { -INT8_C( 97), -INT8_C( 4), -INT8_C( 92), INT8_C( 56), INT8_C( 19), INT8_C( 14), -INT8_C( 53), INT8_C( 19), INT8_C( 106), INT8_C( 24), -INT8_C( 46), -INT8_C( 86), -INT8_C( 103), INT8_C( 115), INT8_C( 96), -INT8_C( 67) }, { -INT8_C( 10), -INT8_C( 2), INT8_C( 82), INT8_C( 65), -INT8_C( 30), INT8_C( 34), INT8_C( 93), -INT8_C( 78), -INT8_C( 68), -INT8_C( 13), -INT8_C( 43), INT8_C( 71), -INT8_C( 27), -INT8_C( 5), INT8_C( 39), -INT8_C( 123) }, INT32_C( 2), { -INT8_C( 92), INT8_C( 56), INT8_C( 19), INT8_C( 14), -INT8_C( 53), INT8_C( 19), INT8_C( 106), INT8_C( 24), -INT8_C( 46), -INT8_C( 86), -INT8_C( 103), INT8_C( 115), INT8_C( 96), -INT8_C( 67), -INT8_C( 10), -INT8_C( 2) } }, { { -INT8_C( 9), -INT8_C( 53), -INT8_C( 67), INT8_C( 10), -INT8_C( 39), -INT8_C( 120), INT8_C( 30), INT8_C( 67), -INT8_C( 96), -INT8_C( 16), -INT8_C( 19), INT8_C( 57), INT8_C( 99), INT8_C( 77), -INT8_C( 9), INT8_C( 90) }, { INT8_C( 75), INT8_C( 73), -INT8_C( 101), INT8_C( 46), INT8_C( 107), -INT8_C( 8), -INT8_C( 32), INT8_C( 39), -INT8_C( 20), -INT8_C( 75), INT8_C( 110), -INT8_C( 47), -INT8_C( 79), -INT8_C( 107), INT8_C( 86), -INT8_C( 88) }, INT32_C( 4), { -INT8_C( 39), -INT8_C( 120), INT8_C( 30), INT8_C( 67), -INT8_C( 96), -INT8_C( 16), -INT8_C( 19), INT8_C( 57), INT8_C( 99), INT8_C( 77), -INT8_C( 9), INT8_C( 90), INT8_C( 75), INT8_C( 73), -INT8_C( 101), INT8_C( 46) } }, { { INT8_C( 96), INT8_C( 20), -INT8_C( 77), INT8_C( 57), -INT8_C( 100), -INT8_C( 47), INT8_C( 125), INT8_C( 61), -INT8_C( 63), INT8_C( 106), INT8_C( 118), INT8_C( 36), -INT8_C( 72), INT8_C( 109), INT8_C( 126), INT8_C( 3) }, { -INT8_C( 73), INT8_C( 25), INT8_C( 49), INT8_C( 34), INT8_C( 18), INT8_C( 17), INT8_C( 74), -INT8_C( 2), -INT8_C( 57), -INT8_C( 72), -INT8_C( 49), INT8_C( 120), INT8_C( 78), INT8_C( 38), INT8_C( 32), -INT8_C( 82) }, INT32_C( 6), { INT8_C( 125), INT8_C( 61), -INT8_C( 63), INT8_C( 106), INT8_C( 118), INT8_C( 36), -INT8_C( 72), INT8_C( 109), INT8_C( 126), INT8_C( 3), -INT8_C( 73), INT8_C( 25), INT8_C( 49), INT8_C( 34), INT8_C( 18), INT8_C( 17) } }, { { INT8_C( 58), -INT8_C( 45), -INT8_C( 24), -INT8_C( 42), -INT8_C( 92), INT8_C( 101), INT8_C( 19), INT8_C( 101), -INT8_C( 49), -INT8_C( 118), -INT8_C( 118), -INT8_C( 121), -INT8_C( 9), INT8_C( 8), -INT8_C( 117), -INT8_C( 82) }, { INT8_C( 34), -INT8_C( 68), -INT8_C( 47), INT8_C( 52), -INT8_C( 50), INT8_C( 27), INT8_C( 50), -INT8_C( 107), -INT8_C( 45), INT8_C( 1), INT8_C( 13), INT8_C( 33), INT8_C( 39), INT8_C( 45), -INT8_C( 48), INT8_C( 97) }, INT32_C( 8), { -INT8_C( 49), -INT8_C( 118), -INT8_C( 118), -INT8_C( 121), -INT8_C( 9), INT8_C( 8), -INT8_C( 117), -INT8_C( 82), INT8_C( 34), -INT8_C( 68), -INT8_C( 47), INT8_C( 52), -INT8_C( 50), INT8_C( 27), INT8_C( 50), -INT8_C( 107) } }, { { INT8_C( 1), -INT8_C( 72), INT8_C( 56), -INT8_C( 91), INT8_C( 29), INT8_C( 75), INT8_C( 11), -INT8_C( 20), -INT8_C( 43), -INT8_C( 107), INT8_C( 116), -INT8_C( 51), -INT8_C( 99), -INT8_C( 1), INT8_C( 123), -INT8_C( 65) }, { -INT8_C( 69), INT8_C( 76), -INT8_C( 13), -INT8_C( 119), INT8_C( 103), INT8_C( 37), INT8_C( 30), INT8_C( 59), INT8_C( 39), INT8_C( 43), INT8_C( 92), INT8_C( 78), INT8_C( 89), INT8_C( 44), -INT8_C( 80), INT8_C( 90) }, INT32_C( 10), { INT8_C( 116), -INT8_C( 51), -INT8_C( 99), -INT8_C( 1), INT8_C( 123), -INT8_C( 65), -INT8_C( 69), INT8_C( 76), -INT8_C( 13), -INT8_C( 119), INT8_C( 103), INT8_C( 37), INT8_C( 30), INT8_C( 59), INT8_C( 39), INT8_C( 43) } }, { { -INT8_C( 28), -INT8_C( 24), -INT8_C( 1), INT8_C( 1), INT8_C( 51), INT8_C( 10), -INT8_C( 18), INT8_C( 9), -INT8_C( 97), INT8_C( 98), -INT8_C( 42), INT8_C( 61), INT8_C( 97), INT8_C( 81), -INT8_C( 4), INT8_C( 28) }, { -INT8_C( 98), -INT8_C( 16), -INT8_C( 90), INT8_C( 5), INT8_C( 21), -INT8_C( 60), INT8_C( 64), INT8_C( 60), -INT8_C( 16), -INT8_C( 99), -INT8_C( 117), INT8_C( 73), -INT8_C( 55), INT8_C( 59), -INT8_C( 93), -INT8_C( 82) }, INT32_C( 12), { INT8_C( 97), INT8_C( 81), -INT8_C( 4), INT8_C( 28), -INT8_C( 98), -INT8_C( 16), -INT8_C( 90), INT8_C( 5), INT8_C( 21), -INT8_C( 60), INT8_C( 64), INT8_C( 60), -INT8_C( 16), -INT8_C( 99), -INT8_C( 117), INT8_C( 73) } }, { { INT8_C( 35), -INT8_C( 94), -INT8_C( 81), INT8_C( 86), -INT8_C( 83), -INT8_C( 99), INT8_C( 95), INT8_C( 76), -INT8_C( 1), INT8_C( 53), -INT8_C( 119), INT8_C( 96), -INT8_C( 121), -INT8_C( 122), INT8_C( 125), INT8_C( 37) }, { INT8_C( 118), INT8_C( 35), INT8_C( 42), -INT8_C( 117), -INT8_C( 25), INT8_C( 107), -INT8_C( 56), -INT8_C( 41), INT8_C( 8), INT8_C( 83), INT8_C( 32), -INT8_C( 47), -INT8_C( 114), -INT8_C( 61), INT8_MAX, -INT8_C( 79) }, INT32_C( 14), { INT8_C( 125), INT8_C( 37), INT8_C( 118), INT8_C( 35), INT8_C( 42), -INT8_C( 117), -INT8_C( 25), INT8_C( 107), -INT8_C( 56), -INT8_C( 41), INT8_C( 8), INT8_C( 83), INT8_C( 32), -INT8_C( 47), -INT8_C( 114), -INT8_C( 61) } }, { { INT8_C( 102), INT8_C( 47), INT8_C( 7), INT8_C( 19), -INT8_C( 52), INT8_C( 103), INT8_C( 95), -INT8_C( 52), -INT8_C( 100), -INT8_C( 23), INT8_C( 44), INT8_C( 35), INT8_C( 111), -INT8_C( 87), INT8_C( 72), -INT8_C( 27) }, { -INT8_C( 52), INT8_C( 115), INT8_C( 112), -INT8_C( 76), -INT8_C( 34), INT8_C( 56), -INT8_C( 117), -INT8_C( 26), -INT8_C( 117), -INT8_C( 84), -INT8_C( 73), INT8_C( 25), INT8_C( 111), INT8_C( 55), -INT8_C( 54), -INT8_C( 43) }, INT32_C( 15), { -INT8_C( 27), -INT8_C( 52), INT8_C( 115), INT8_C( 112), -INT8_C( 76), -INT8_C( 34), INT8_C( 56), -INT8_C( 117), -INT8_C( 26), -INT8_C( 117), -INT8_C( 84), -INT8_C( 73), INT8_C( 25), INT8_C( 111), INT8_C( 55), -INT8_C( 54) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); int n = test_vec[i].n; simde_int8x16_t r; SIMDE_CONSTIFY_16_(simde_vextq_s8, r, (HEDLEY_UNREACHABLE(), a), n, a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); const int lanes[] = { 2, 4, 6, 8, 10, 12, 14, 15 }; for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); simde_int8x16_t r = simde_vextq_s8(a, b, lanes[i]); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vextq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t b[8]; int n; int16_t r[8]; } test_vec[] = { { { -INT16_C( 5517), INT16_C( 11597), -INT16_C( 20136), INT16_C( 32730), -INT16_C( 15613), -INT16_C( 13999), -INT16_C( 10256), -INT16_C( 12873) }, { INT16_C( 23318), -INT16_C( 23493), INT16_C( 6269), -INT16_C( 11894), -INT16_C( 14678), -INT16_C( 567), -INT16_C( 20668), -INT16_C( 18497) }, INT32_C( 0), { -INT16_C( 5517), INT16_C( 11597), -INT16_C( 20136), INT16_C( 32730), -INT16_C( 15613), -INT16_C( 13999), -INT16_C( 10256), -INT16_C( 12873) } }, { { INT16_C( 3481), -INT16_C( 3612), -INT16_C( 16706), -INT16_C( 16015), -INT16_C( 15742), INT16_C( 29322), INT16_C( 17049), -INT16_C( 20673) }, { INT16_C( 31389), INT16_C( 6739), -INT16_C( 8813), INT16_C( 15852), -INT16_C( 19037), -INT16_C( 6341), -INT16_C( 1436), -INT16_C( 354) }, INT32_C( 1), { -INT16_C( 3612), -INT16_C( 16706), -INT16_C( 16015), -INT16_C( 15742), INT16_C( 29322), INT16_C( 17049), -INT16_C( 20673), INT16_C( 31389) } }, { { -INT16_C( 31993), -INT16_C( 14609), INT16_C( 24641), -INT16_C( 15481), INT16_C( 4642), -INT16_C( 17611), INT16_C( 29780), -INT16_C( 3734) }, { -INT16_C( 16913), -INT16_C( 32244), -INT16_C( 1894), INT16_C( 15807), -INT16_C( 1363), INT16_C( 4388), -INT16_C( 15371), -INT16_C( 1009) }, INT32_C( 2), { INT16_C( 24641), -INT16_C( 15481), INT16_C( 4642), -INT16_C( 17611), INT16_C( 29780), -INT16_C( 3734), -INT16_C( 16913), -INT16_C( 32244) } }, { { -INT16_C( 186), -INT16_C( 30782), INT16_C( 19039), -INT16_C( 32181), -INT16_C( 32676), -INT16_C( 20419), -INT16_C( 22283), -INT16_C( 7007) }, { -INT16_C( 21147), INT16_C( 102), INT16_C( 9637), INT16_C( 21053), INT16_C( 25120), INT16_C( 5476), INT16_C( 29477), INT16_C( 27409) }, INT32_C( 3), { -INT16_C( 32181), -INT16_C( 32676), -INT16_C( 20419), -INT16_C( 22283), -INT16_C( 7007), -INT16_C( 21147), INT16_C( 102), INT16_C( 9637) } }, { { -INT16_C( 11150), -INT16_C( 11534), INT16_C( 15646), INT16_C( 31316), -INT16_C( 28226), -INT16_C( 19670), -INT16_C( 13511), -INT16_C( 24681) }, { -INT16_C( 647), INT16_C( 7839), -INT16_C( 9182), INT16_C( 17009), -INT16_C( 10946), INT16_C( 25431), INT16_C( 26952), -INT16_C( 17458) }, INT32_C( 4), { -INT16_C( 28226), -INT16_C( 19670), -INT16_C( 13511), -INT16_C( 24681), -INT16_C( 647), INT16_C( 7839), -INT16_C( 9182), INT16_C( 17009) } }, { { -INT16_C( 16067), INT16_C( 23437), -INT16_C( 7682), -INT16_C( 17195), -INT16_C( 142), -INT16_C( 21393), INT16_C( 1738), INT16_C( 17227) }, { -INT16_C( 5629), INT16_C( 9826), -INT16_C( 11322), INT16_C( 1384), -INT16_C( 16216), -INT16_C( 3992), INT16_C( 14121), INT16_C( 26283) }, INT32_C( 5), { -INT16_C( 21393), INT16_C( 1738), INT16_C( 17227), -INT16_C( 5629), INT16_C( 9826), -INT16_C( 11322), INT16_C( 1384), -INT16_C( 16216) } }, { { INT16_C( 14584), -INT16_C( 2367), -INT16_C( 27111), -INT16_C( 29517), INT16_C( 8853), INT16_C( 24376), -INT16_C( 31959), INT16_C( 11427) }, { INT16_C( 1389), INT16_C( 13138), -INT16_C( 17448), -INT16_C( 32712), -INT16_C( 24197), -INT16_C( 23440), INT16_C( 7384), -INT16_C( 12278) }, INT32_C( 6), { -INT16_C( 31959), INT16_C( 11427), INT16_C( 1389), INT16_C( 13138), -INT16_C( 17448), -INT16_C( 32712), -INT16_C( 24197), -INT16_C( 23440) } }, { { -INT16_C( 13484), INT16_C( 28358), INT16_C( 31073), -INT16_C( 2310), INT16_C( 12956), -INT16_C( 15019), -INT16_C( 1867), INT16_C( 8945) }, { INT16_C( 17661), -INT16_C( 10923), -INT16_C( 28929), INT16_C( 31317), -INT16_C( 14801), INT16_C( 1822), INT16_C( 10466), INT16_C( 14039) }, INT32_C( 7), { INT16_C( 8945), INT16_C( 17661), -INT16_C( 10923), -INT16_C( 28929), INT16_C( 31317), -INT16_C( 14801), INT16_C( 1822), INT16_C( 10466) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); int n = test_vec[i].n; simde_int16x8_t r; SIMDE_CONSTIFY_8_(simde_vextq_s16, r, (HEDLEY_UNREACHABLE(), a), n, a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 2, 3, 4, 5, 6, 7 }; for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); simde_int16x8_t r = simde_vextq_s16(a, b, lanes[i]); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vextq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t b[4]; int n; int32_t r[4]; } test_vec[] = { { { -INT32_C( 145520176), -INT32_C( 1239517348), INT32_C( 1536889979), INT32_C( 771985900) }, { -INT32_C( 1757387936), INT32_C( 2023341027), -INT32_C( 369368052), -INT32_C( 2043944522) }, INT32_C( 0), { -INT32_C( 145520176), -INT32_C( 1239517348), INT32_C( 1536889979), INT32_C( 771985900) } }, { { -INT32_C( 948011413), INT32_C( 1887280373), -INT32_C( 1664345936), INT32_C( 164286377) }, { INT32_C( 362875442), -INT32_C( 762496314), -INT32_C( 709064674), -INT32_C( 715397270) }, INT32_C( 1), { INT32_C( 1887280373), -INT32_C( 1664345936), INT32_C( 164286377), INT32_C( 362875442) } }, { { INT32_C( 1537005925), INT32_C( 650844534), -INT32_C( 624715983), -INT32_C( 1729917594) }, { INT32_C( 1588429976), -INT32_C( 584041538), INT32_C( 766700739), INT32_C( 956435923) }, INT32_C( 2), { -INT32_C( 624715983), -INT32_C( 1729917594), INT32_C( 1588429976), -INT32_C( 584041538) } }, { { INT32_C( 1570021095), -INT32_C( 394043465), INT32_C( 1573013239), INT32_C( 1828103892) }, { -INT32_C( 372595926), -INT32_C( 1564017954), -INT32_C( 1160808217), INT32_C( 1827918469) }, INT32_C( 3), { INT32_C( 1828103892), -INT32_C( 372595926), -INT32_C( 1564017954), -INT32_C( 1160808217) } }, { { INT32_C( 684296048), -INT32_C( 569356825), INT32_C( 1731974035), -INT32_C( 1546440327) }, { -INT32_C( 1282630187), INT32_C( 2136298136), INT32_C( 1345922506), INT32_C( 1740385783) }, INT32_C( 0), { INT32_C( 684296048), -INT32_C( 569356825), INT32_C( 1731974035), -INT32_C( 1546440327) } }, { { -INT32_C( 1685092684), INT32_C( 1719247059), -INT32_C( 321997453), -INT32_C( 1148214810) }, { -INT32_C( 680584129), INT32_C( 961987694), -INT32_C( 527855639), INT32_C( 1900561852) }, INT32_C( 1), { INT32_C( 1719247059), -INT32_C( 321997453), -INT32_C( 1148214810), -INT32_C( 680584129) } }, { { -INT32_C( 1643325493), -INT32_C( 368736649), INT32_C( 567726907), -INT32_C( 1277335948) }, { -INT32_C( 259371902), -INT32_C( 97918960), INT32_C( 752530032), -INT32_C( 1013112072) }, INT32_C( 2), { INT32_C( 567726907), -INT32_C( 1277335948), -INT32_C( 259371902), -INT32_C( 97918960) } }, { { INT32_C( 1902291706), INT32_C( 1801217840), -INT32_C( 1366543814), INT32_C( 442657176) }, { -INT32_C( 972297035), INT32_C( 1036006605), -INT32_C( 546727193), -INT32_C( 1214117955) }, INT32_C( 3), { INT32_C( 442657176), -INT32_C( 972297035), INT32_C( 1036006605), -INT32_C( 546727193) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); int n = test_vec[i].n; simde_int32x4_t r; SIMDE_CONSTIFY_4_(simde_vextq_s32, r, (HEDLEY_UNREACHABLE(), a), n, a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 2, 3, 0, 1, 2, 3 }; for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); simde_int32x4_t r = simde_vextq_s32(a, b, lanes[i]); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vextq_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; int64_t b[2]; int n; int64_t r[2]; } test_vec[] = { { { INT64_C( 8417013950001805874), INT64_C( 3633663084360530521) }, { INT64_C( 1943148483586602984), INT64_C( 3816659953121660075) }, INT32_C( 0), { INT64_C( 8417013950001805874), INT64_C( 3633663084360530521) } }, { { -INT64_C( 1439284280502823430), INT64_C( 1054151440654452764) }, { -INT64_C( 4311023356637567279), -INT64_C( 1374377497113431369) }, INT32_C( 1), { INT64_C( 1054151440654452764), -INT64_C( 4311023356637567279) } }, { { INT64_C( 9155400649511190671), -INT64_C( 6593111716141286836) }, { -INT64_C( 7488392302349077195), INT64_C( 2320267026713630292) }, INT32_C( 0), { INT64_C( 9155400649511190671), -INT64_C( 6593111716141286836) } }, { { -INT64_C( 9016399738793971202), -INT64_C( 7889614551166694134) }, { INT64_C( 8044261376637614122), INT64_C( 3458176489256055627) }, INT32_C( 1), { -INT64_C( 7889614551166694134), INT64_C( 8044261376637614122) } }, { { -INT64_C( 7326670976013287525), -INT64_C( 1860835758546860227) }, { INT64_C( 5673852747975233075), -INT64_C( 583453248029232250) }, INT32_C( 0), { -INT64_C( 7326670976013287525), -INT64_C( 1860835758546860227) } }, { { INT64_C( 2635198315562039143), INT64_C( 4108384592786754145) }, { INT64_C( 957653221600916728), INT64_C( 291517491048707511) }, INT32_C( 1), { INT64_C( 4108384592786754145), INT64_C( 957653221600916728) } }, { { INT64_C( 5705936401661219620), -INT64_C( 3398123318154564200) }, { INT64_C( 6482956552596381284), -INT64_C( 8129991925187166581) }, INT32_C( 0), { INT64_C( 5705936401661219620), -INT64_C( 3398123318154564200) } }, { { INT64_C( 5730771934834796618), -INT64_C( 4080045330485349448) }, { -INT64_C( 3666586218353513438), INT64_C( 5432222966802123113) }, INT32_C( 1), { -INT64_C( 4080045330485349448), -INT64_C( 3666586218353513438) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); int n = test_vec[i].n; simde_int64x2_t r; SIMDE_CONSTIFY_2_(simde_vextq_s64, r, (HEDLEY_UNREACHABLE(), a), n, a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); simde_int64x2_t b = simde_test_arm_neon_random_i64x2(); simde_int64x2_t r = simde_vextq_s64(a, b, lanes[i]); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vextq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; uint8_t b[16]; int n; uint8_t r[16]; } test_vec[] = { { { UINT8_C(236), UINT8_C( 12), UINT8_C(220), UINT8_C( 76), UINT8_C(149), UINT8_C(102), UINT8_C(190), UINT8_C(154), UINT8_C(190), UINT8_C( 97), UINT8_C( 26), UINT8_C( 0), UINT8_C(104), UINT8_C( 4), UINT8_C(242), UINT8_C( 16) }, { UINT8_C( 85), UINT8_C(127), UINT8_C(245), UINT8_C( 30), UINT8_C(177), UINT8_C(132), UINT8_C( 67), UINT8_C(107), UINT8_C(115), UINT8_C( 46), UINT8_C(225), UINT8_C( 35), UINT8_C( 67), UINT8_C( 16), UINT8_C( 83), UINT8_C( 47) }, INT32_C( 0), { UINT8_C(236), UINT8_C( 12), UINT8_C(220), UINT8_C( 76), UINT8_C(149), UINT8_C(102), UINT8_C(190), UINT8_C(154), UINT8_C(190), UINT8_C( 97), UINT8_C( 26), UINT8_C( 0), UINT8_C(104), UINT8_C( 4), UINT8_C(242), UINT8_C( 16) } }, { { UINT8_C( 28), UINT8_C( 47), UINT8_C(124), UINT8_C(177), UINT8_C(149), UINT8_C( 58), UINT8_C( 76), UINT8_C( 83), UINT8_C(156), UINT8_C(102), UINT8_C( 84), UINT8_C( 4), UINT8_C(106), UINT8_C( 70), UINT8_C( 20), UINT8_C(191) }, { UINT8_C(197), UINT8_C( 9), UINT8_C(221), UINT8_C(118), UINT8_C(141), UINT8_C( 33), UINT8_C(225), UINT8_C( 0), UINT8_C( 79), UINT8_C(194), UINT8_C( 35), UINT8_C(147), UINT8_C(210), UINT8_C(118), UINT8_C(194), UINT8_C(239) }, INT32_C( 2), { UINT8_C(124), UINT8_C(177), UINT8_C(149), UINT8_C( 58), UINT8_C( 76), UINT8_C( 83), UINT8_C(156), UINT8_C(102), UINT8_C( 84), UINT8_C( 4), UINT8_C(106), UINT8_C( 70), UINT8_C( 20), UINT8_C(191), UINT8_C(197), UINT8_C( 9) } }, { { UINT8_C(166), UINT8_C( 62), UINT8_C(160), UINT8_C( 59), UINT8_C(121), UINT8_C(236), UINT8_C(143), UINT8_C( 21), UINT8_C( 82), UINT8_C(227), UINT8_C( 25), UINT8_C(188), UINT8_C( 41), UINT8_C( 45), UINT8_C(123), UINT8_C(238) }, { UINT8_C( 54), UINT8_C( 89), UINT8_C(100), UINT8_C(196), UINT8_C(122), UINT8_C( 69), UINT8_C(196), UINT8_C(201), UINT8_C( 7), UINT8_C(232), UINT8_C( 92), UINT8_C(218), UINT8_C( 94), UINT8_C( 31), UINT8_C(201), UINT8_C( 4) }, INT32_C( 4), { UINT8_C(121), UINT8_C(236), UINT8_C(143), UINT8_C( 21), UINT8_C( 82), UINT8_C(227), UINT8_C( 25), UINT8_C(188), UINT8_C( 41), UINT8_C( 45), UINT8_C(123), UINT8_C(238), UINT8_C( 54), UINT8_C( 89), UINT8_C(100), UINT8_C(196) } }, { { UINT8_C( 93), UINT8_C(105), UINT8_C( 64), UINT8_C(214), UINT8_C( 86), UINT8_C(207), UINT8_C(235), UINT8_C(168), UINT8_C(178), UINT8_C( 4), UINT8_C(101), UINT8_C(219), UINT8_C( 49), UINT8_C(224), UINT8_C(201), UINT8_C(104) }, { UINT8_C( 57), UINT8_C( 45), UINT8_C( 44), UINT8_C(179), UINT8_C(114), UINT8_C(240), UINT8_C(125), UINT8_C(121), UINT8_C(216), UINT8_C(217), UINT8_C( 83), UINT8_C( 55), UINT8_C(248), UINT8_C( 28), UINT8_C( 59), UINT8_C( 86) }, INT32_C( 6), { UINT8_C(235), UINT8_C(168), UINT8_C(178), UINT8_C( 4), UINT8_C(101), UINT8_C(219), UINT8_C( 49), UINT8_C(224), UINT8_C(201), UINT8_C(104), UINT8_C( 57), UINT8_C( 45), UINT8_C( 44), UINT8_C(179), UINT8_C(114), UINT8_C(240) } }, { { UINT8_C(134), UINT8_C(123), UINT8_C( 44), UINT8_C(220), UINT8_C( 74), UINT8_C( 24), UINT8_C(132), UINT8_C(252), UINT8_C( 28), UINT8_C(233), UINT8_C(215), UINT8_C( 78), UINT8_C(202), UINT8_C(160), UINT8_C(182), UINT8_C( 3) }, { UINT8_C(205), UINT8_C(226), UINT8_C(183), UINT8_C( 63), UINT8_C(210), UINT8_C( 52), UINT8_C(185), UINT8_C(171), UINT8_C( 13), UINT8_C( 12), UINT8_C(226), UINT8_C( 6), UINT8_C( 41), UINT8_C( 29), UINT8_C( 92), UINT8_C(175) }, INT32_C( 8), { UINT8_C( 28), UINT8_C(233), UINT8_C(215), UINT8_C( 78), UINT8_C(202), UINT8_C(160), UINT8_C(182), UINT8_C( 3), UINT8_C(205), UINT8_C(226), UINT8_C(183), UINT8_C( 63), UINT8_C(210), UINT8_C( 52), UINT8_C(185), UINT8_C(171) } }, { { UINT8_C(153), UINT8_C(136), UINT8_C(139), UINT8_C(227), UINT8_C(160), UINT8_C( 15), UINT8_C(224), UINT8_C(189), UINT8_C(249), UINT8_C(183), UINT8_C( 11), UINT8_C(195), UINT8_C( 88), UINT8_C(193), UINT8_C(198), UINT8_C( 37) }, { UINT8_C(163), UINT8_C(125), UINT8_C(101), UINT8_C(117), UINT8_C(177), UINT8_C( 30), UINT8_C( 32), UINT8_C(191), UINT8_C( 42), UINT8_C( 2), UINT8_C(197), UINT8_C( 83), UINT8_C( 32), UINT8_C( 33), UINT8_C( 2), UINT8_C(185) }, INT32_C( 10), { UINT8_C( 11), UINT8_C(195), UINT8_C( 88), UINT8_C(193), UINT8_C(198), UINT8_C( 37), UINT8_C(163), UINT8_C(125), UINT8_C(101), UINT8_C(117), UINT8_C(177), UINT8_C( 30), UINT8_C( 32), UINT8_C(191), UINT8_C( 42), UINT8_C( 2) } }, { { UINT8_C(169), UINT8_C(141), UINT8_C(156), UINT8_C( 74), UINT8_C(157), UINT8_C(124), UINT8_C( 7), UINT8_C(150), UINT8_C( 52), UINT8_C( 18), UINT8_C( 89), UINT8_C(140), UINT8_C(211), UINT8_C( 31), UINT8_C(177), UINT8_C(118) }, { UINT8_C(157), UINT8_C( 22), UINT8_C(235), UINT8_C( 78), UINT8_C( 52), UINT8_C( 12), UINT8_C( 13), UINT8_C( 95), UINT8_C( 14), UINT8_C(210), UINT8_C(178), UINT8_C( 46), UINT8_C(243), UINT8_C(181), UINT8_C(231), UINT8_C(157) }, INT32_C( 12), { UINT8_C(211), UINT8_C( 31), UINT8_C(177), UINT8_C(118), UINT8_C(157), UINT8_C( 22), UINT8_C(235), UINT8_C( 78), UINT8_C( 52), UINT8_C( 12), UINT8_C( 13), UINT8_C( 95), UINT8_C( 14), UINT8_C(210), UINT8_C(178), UINT8_C( 46) } }, { { UINT8_C( 66), UINT8_C(132), UINT8_C(231), UINT8_C(223), UINT8_C( 0), UINT8_C(238), UINT8_C(117), UINT8_C( 52), UINT8_C( 0), UINT8_C(206), UINT8_C(192), UINT8_C(211), UINT8_C(238), UINT8_C(114), UINT8_C( 73), UINT8_C(139) }, { UINT8_C(136), UINT8_C( 52), UINT8_C(217), UINT8_C(189), UINT8_C( 64), UINT8_C(231), UINT8_C( 28), UINT8_C( 79), UINT8_C(185), UINT8_C(206), UINT8_C(125), UINT8_C(173), UINT8_C(131), UINT8_C(101), UINT8_C( 74), UINT8_C(198) }, INT32_C( 14), { UINT8_C( 73), UINT8_C(139), UINT8_C(136), UINT8_C( 52), UINT8_C(217), UINT8_C(189), UINT8_C( 64), UINT8_C(231), UINT8_C( 28), UINT8_C( 79), UINT8_C(185), UINT8_C(206), UINT8_C(125), UINT8_C(173), UINT8_C(131), UINT8_C(101) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); int n = test_vec[i].n; simde_uint8x16_t r; SIMDE_CONSTIFY_16_(simde_vextq_u8, r, (HEDLEY_UNREACHABLE(), a), n, a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 2, 4, 6, 8, 10, 12, 14 }; for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t r = simde_vextq_u8(a, b, lanes[i]); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vextq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t b[8]; int n; uint16_t r[8]; } test_vec[] = { { { UINT16_C(24754), UINT16_C(42838), UINT16_C(61167), UINT16_C(61004), UINT16_C(44235), UINT16_C(29361), UINT16_C(52240), UINT16_C(47002) }, { UINT16_C(58649), UINT16_C(62180), UINT16_C(35996), UINT16_C(45949), UINT16_C(27671), UINT16_C(46444), UINT16_C(49652), UINT16_C(42653) }, INT32_C( 0), { UINT16_C(24754), UINT16_C(42838), UINT16_C(61167), UINT16_C(61004), UINT16_C(44235), UINT16_C(29361), UINT16_C(52240), UINT16_C(47002) } }, { { UINT16_C(62241), UINT16_C( 4173), UINT16_C(39393), UINT16_C(44543), UINT16_C(45125), UINT16_C(21791), UINT16_C(47740), UINT16_C(38413) }, { UINT16_C(61855), UINT16_C(15496), UINT16_C( 1405), UINT16_C(38383), UINT16_C(23410), UINT16_C(26186), UINT16_C(59165), UINT16_C(15884) }, INT32_C( 1), { UINT16_C( 4173), UINT16_C(39393), UINT16_C(44543), UINT16_C(45125), UINT16_C(21791), UINT16_C(47740), UINT16_C(38413), UINT16_C(61855) } }, { { UINT16_C(23002), UINT16_C(48207), UINT16_C(20210), UINT16_C(14185), UINT16_C(35070), UINT16_C(31628), UINT16_C(39234), UINT16_C(57873) }, { UINT16_C(39307), UINT16_C( 2078), UINT16_C( 3487), UINT16_C( 4509), UINT16_C(59241), UINT16_C(34423), UINT16_C(33742), UINT16_C(43460) }, INT32_C( 2), { UINT16_C(20210), UINT16_C(14185), UINT16_C(35070), UINT16_C(31628), UINT16_C(39234), UINT16_C(57873), UINT16_C(39307), UINT16_C( 2078) } }, { { UINT16_C( 5084), UINT16_C(52837), UINT16_C(52833), UINT16_C(24581), UINT16_C(37206), UINT16_C(39387), UINT16_C(60459), UINT16_C(46715) }, { UINT16_C(39301), UINT16_C( 9406), UINT16_C(23718), UINT16_C( 3893), UINT16_C(44099), UINT16_C( 4757), UINT16_C(23087), UINT16_C( 3003) }, INT32_C( 3), { UINT16_C(24581), UINT16_C(37206), UINT16_C(39387), UINT16_C(60459), UINT16_C(46715), UINT16_C(39301), UINT16_C( 9406), UINT16_C(23718) } }, { { UINT16_C( 8301), UINT16_C(53209), UINT16_C(57070), UINT16_C(17455), UINT16_C( 2672), UINT16_C(39901), UINT16_C(22774), UINT16_C(31569) }, { UINT16_C( 4081), UINT16_C(39072), UINT16_C(54635), UINT16_C(44967), UINT16_C(15746), UINT16_C(45505), UINT16_C(31895), UINT16_C( 1213) }, INT32_C( 4), { UINT16_C( 2672), UINT16_C(39901), UINT16_C(22774), UINT16_C(31569), UINT16_C( 4081), UINT16_C(39072), UINT16_C(54635), UINT16_C(44967) } }, { { UINT16_C(38556), UINT16_C(35539), UINT16_C( 629), UINT16_C(58830), UINT16_C(44044), UINT16_C( 640), UINT16_C(53508), UINT16_C(63102) }, { UINT16_C( 7904), UINT16_C(19598), UINT16_C(13811), UINT16_C(30203), UINT16_C(48242), UINT16_C( 2343), UINT16_C(58424), UINT16_C(54286) }, INT32_C( 5), { UINT16_C( 640), UINT16_C(53508), UINT16_C(63102), UINT16_C( 7904), UINT16_C(19598), UINT16_C(13811), UINT16_C(30203), UINT16_C(48242) } }, { { UINT16_C(57722), UINT16_C(61278), UINT16_C(11492), UINT16_C(61652), UINT16_C(21720), UINT16_C(56819), UINT16_C(28965), UINT16_C( 1747) }, { UINT16_C(24975), UINT16_C(33362), UINT16_C(19862), UINT16_C( 2552), UINT16_C( 7945), UINT16_C(16658), UINT16_C( 8195), UINT16_C(32021) }, INT32_C( 6), { UINT16_C(28965), UINT16_C( 1747), UINT16_C(24975), UINT16_C(33362), UINT16_C(19862), UINT16_C( 2552), UINT16_C( 7945), UINT16_C(16658) } }, { { UINT16_C(29442), UINT16_C(58989), UINT16_C(16799), UINT16_C(30934), UINT16_C(51606), UINT16_C(47957), UINT16_C(10298), UINT16_C(51649) }, { UINT16_C( 5001), UINT16_C( 8012), UINT16_C(17504), UINT16_C(26920), UINT16_C(15203), UINT16_C(26282), UINT16_C(48987), UINT16_C(24035) }, INT32_C( 7), { UINT16_C(51649), UINT16_C( 5001), UINT16_C( 8012), UINT16_C(17504), UINT16_C(26920), UINT16_C(15203), UINT16_C(26282), UINT16_C(48987) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); int n = test_vec[i].n; simde_uint16x8_t r; SIMDE_CONSTIFY_8_(simde_vextq_u16, r, (HEDLEY_UNREACHABLE(), a), n, a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 2, 3, 4, 5, 6, 7 }; for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t r = simde_vextq_u16(a, b, lanes[i]); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vextq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint32_t b[4]; int n; uint32_t r[4]; } test_vec[] = { { { UINT32_C(2900247930), UINT32_C( 249084522), UINT32_C(3367049666), UINT32_C(2305880858) }, { UINT32_C(1681122816), UINT32_C( 807407232), UINT32_C(3769075986), UINT32_C(2554260254) }, INT32_C( 0), { UINT32_C(2900247930), UINT32_C( 249084522), UINT32_C(3367049666), UINT32_C(2305880858) } }, { { UINT32_C(2588155184), UINT32_C(2577931479), UINT32_C(1348557109), UINT32_C(1306120525) }, { UINT32_C( 934350007), UINT32_C( 761778458), UINT32_C(2030898778), UINT32_C( 688999417) }, INT32_C( 1), { UINT32_C(2577931479), UINT32_C(1348557109), UINT32_C(1306120525), UINT32_C( 934350007) } }, { { UINT32_C(1086609000), UINT32_C(2832821362), UINT32_C( 335035334), UINT32_C(3294679308) }, { UINT32_C(4160426461), UINT32_C(1009017826), UINT32_C(1807036785), UINT32_C(3851732861) }, INT32_C( 2), { UINT32_C( 335035334), UINT32_C(3294679308), UINT32_C(4160426461), UINT32_C(1009017826) } }, { { UINT32_C(2401589277), UINT32_C(2335702981), UINT32_C(1184771898), UINT32_C(3708485120) }, { UINT32_C(4057269775), UINT32_C(3660446057), UINT32_C(2823152427), UINT32_C(3347962538) }, INT32_C( 3), { UINT32_C(3708485120), UINT32_C(4057269775), UINT32_C(3660446057), UINT32_C(2823152427) } }, { { UINT32_C(4149654322), UINT32_C(3967979186), UINT32_C(3190956221), UINT32_C( 765148446) }, { UINT32_C(2887675971), UINT32_C(2508606570), UINT32_C(3644705839), UINT32_C(3634416294) }, INT32_C( 0), { UINT32_C(4149654322), UINT32_C(3967979186), UINT32_C(3190956221), UINT32_C( 765148446) } }, { { UINT32_C( 802223741), UINT32_C(1109086852), UINT32_C(2432716403), UINT32_C(3468663691) }, { UINT32_C(1987763468), UINT32_C(1477115945), UINT32_C(1915832524), UINT32_C(2420887826) }, INT32_C( 1), { UINT32_C(1109086852), UINT32_C(2432716403), UINT32_C(3468663691), UINT32_C(1987763468) } }, { { UINT32_C(1287592904), UINT32_C(3767458669), UINT32_C(3027406377), UINT32_C( 914501930) }, { UINT32_C( 950860814), UINT32_C(3381704700), UINT32_C( 289129215), UINT32_C(1537312403) }, INT32_C( 2), { UINT32_C(3027406377), UINT32_C( 914501930), UINT32_C( 950860814), UINT32_C(3381704700) } }, { { UINT32_C( 262693281), UINT32_C(1710175804), UINT32_C(4011418053), UINT32_C(2703596434) }, { UINT32_C(2480525719), UINT32_C(2270980488), UINT32_C(3214448683), UINT32_C(3222944286) }, INT32_C( 3), { UINT32_C(2703596434), UINT32_C(2480525719), UINT32_C(2270980488), UINT32_C(3214448683) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); int n = test_vec[i].n; simde_uint32x4_t r; SIMDE_CONSTIFY_4_(simde_vextq_u32, r, (HEDLEY_UNREACHABLE(), a), n, a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 2, 3, 0, 1, 2, 3 }; for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t r = simde_vextq_u32(a, b, lanes[i]); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vextq_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[2]; uint64_t b[2]; int n; uint64_t r[2]; } test_vec[] = { { { UINT64_C( 7730985752149231055), UINT64_C(16691050437685090889) }, { UINT64_C(15296917467191559588), UINT64_C( 4953153054590983161) }, INT32_C( 0), { UINT64_C( 7730985752149231055), UINT64_C(16691050437685090889) } }, { { UINT64_C( 8585916073411621671), UINT64_C(12195681843175063656) }, { UINT64_C( 3873588522413173385), UINT64_C( 1501322571122461211) }, INT32_C( 1), { UINT64_C(12195681843175063656), UINT64_C( 3873588522413173385) } }, { { UINT64_C(10348416691789093623), UINT64_C( 731032852088995190) }, { UINT64_C( 8665691342820056273), UINT64_C( 397124965935696258) }, INT32_C( 0), { UINT64_C(10348416691789093623), UINT64_C( 731032852088995190) } }, { { UINT64_C(13794743674936644281), UINT64_C( 5696877048505194371) }, { UINT64_C( 4670688828940093180), UINT64_C( 8357046737594997736) }, INT32_C( 1), { UINT64_C( 5696877048505194371), UINT64_C( 4670688828940093180) } }, { { UINT64_C( 7965675779152618627), UINT64_C( 2443345895848396059) }, { UINT64_C(14593998027727979657), UINT64_C(17029159575628035878) }, INT32_C( 0), { UINT64_C( 7965675779152618627), UINT64_C( 2443345895848396059) } }, { { UINT64_C( 9599215256693417951), UINT64_C(13136443954186594824) }, { UINT64_C( 4249011095164799050), UINT64_C( 5841571628204144572) }, INT32_C( 1), { UINT64_C(13136443954186594824), UINT64_C( 4249011095164799050) } }, { { UINT64_C( 1442649469284436245), UINT64_C( 8493665932999271676) }, { UINT64_C( 2280960384311680345), UINT64_C( 5049754754406990417) }, INT32_C( 0), { UINT64_C( 1442649469284436245), UINT64_C( 8493665932999271676) } }, { { UINT64_C( 9024875554142563870), UINT64_C( 7678503297709670640) }, { UINT64_C( 2081928341414687075), UINT64_C(11220986191717279809) }, INT32_C( 1), { UINT64_C( 7678503297709670640), UINT64_C( 2081928341414687075) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); int n = test_vec[i].n; simde_uint64x2_t r; SIMDE_CONSTIFY_2_(simde_vextq_u64, r, (HEDLEY_UNREACHABLE(), a), n, a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); simde_uint64x2_t b = simde_test_arm_neon_random_u64x2(); simde_uint64x2_t r = simde_vextq_u64(a, b, lanes[i]); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } HEDLEY_DIAGNOSTIC_POP SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vext_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vext_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vext_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vext_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vext_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vext_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vext_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vext_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vext_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vext_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vextq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vextq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vextq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vextq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vextq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vextq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vextq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vextq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vextq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vextq_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/get_high.c000066400000000000000000000515751400333146700172560ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN get_high #include "test-neon.h" #include "../../../simde/arm/neon/get_high.h" static int test_simde_vget_high_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; simde_float32 r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( -342.02), SIMDE_FLOAT32_C( -700.14), SIMDE_FLOAT32_C( 232.12), SIMDE_FLOAT32_C( -101.83) }, { SIMDE_FLOAT32_C( 232.12), SIMDE_FLOAT32_C( -101.83) } }, { { SIMDE_FLOAT32_C( -988.11), SIMDE_FLOAT32_C( 147.79), SIMDE_FLOAT32_C( 871.85), SIMDE_FLOAT32_C( -252.38) }, { SIMDE_FLOAT32_C( 871.85), SIMDE_FLOAT32_C( -252.38) } }, { { SIMDE_FLOAT32_C( 182.44), SIMDE_FLOAT32_C( 926.75), SIMDE_FLOAT32_C( -990.47), SIMDE_FLOAT32_C( -507.70) }, { SIMDE_FLOAT32_C( -990.47), SIMDE_FLOAT32_C( -507.70) } }, { { SIMDE_FLOAT32_C( -332.88), SIMDE_FLOAT32_C( 248.48), SIMDE_FLOAT32_C( 962.27), SIMDE_FLOAT32_C( 190.23) }, { SIMDE_FLOAT32_C( 962.27), SIMDE_FLOAT32_C( 190.23) } }, { { SIMDE_FLOAT32_C( 530.12), SIMDE_FLOAT32_C( -559.75), SIMDE_FLOAT32_C( -68.78), SIMDE_FLOAT32_C( -625.03) }, { SIMDE_FLOAT32_C( -68.78), SIMDE_FLOAT32_C( -625.03) } }, { { SIMDE_FLOAT32_C( -696.26), SIMDE_FLOAT32_C( 343.99), SIMDE_FLOAT32_C( 875.39), SIMDE_FLOAT32_C( 507.32) }, { SIMDE_FLOAT32_C( 875.39), SIMDE_FLOAT32_C( 507.32) } }, { { SIMDE_FLOAT32_C( -239.21), SIMDE_FLOAT32_C( 714.53), SIMDE_FLOAT32_C( -233.95), SIMDE_FLOAT32_C( 96.87) }, { SIMDE_FLOAT32_C( -233.95), SIMDE_FLOAT32_C( 96.87) } }, { { SIMDE_FLOAT32_C( 751.96), SIMDE_FLOAT32_C( -235.16), SIMDE_FLOAT32_C( -766.29), SIMDE_FLOAT32_C( -590.06) }, { SIMDE_FLOAT32_C( -766.29), SIMDE_FLOAT32_C( -590.06) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a; simde_float32x2_t r; a = simde_vld1q_f32(test_vec[i].a); r = simde_vget_high_f32(a); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vget_high_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[2]; simde_float64 r[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( -496.87), SIMDE_FLOAT64_C( -513.71) }, { SIMDE_FLOAT64_C( -513.71) } }, { { SIMDE_FLOAT64_C( 305.86), SIMDE_FLOAT64_C( 676.57) }, { SIMDE_FLOAT64_C( 676.57) } }, { { SIMDE_FLOAT64_C( -67.25), SIMDE_FLOAT64_C( -986.11) }, { SIMDE_FLOAT64_C( -986.11) } }, { { SIMDE_FLOAT64_C( 582.35), SIMDE_FLOAT64_C( 984.12) }, { SIMDE_FLOAT64_C( 984.12) } }, { { SIMDE_FLOAT64_C( -936.12), SIMDE_FLOAT64_C( -999.04) }, { SIMDE_FLOAT64_C( -999.04) } }, { { SIMDE_FLOAT64_C( -427.34), SIMDE_FLOAT64_C( 114.37) }, { SIMDE_FLOAT64_C( 114.37) } }, { { SIMDE_FLOAT64_C( -340.67), SIMDE_FLOAT64_C( 719.32) }, { SIMDE_FLOAT64_C( 719.32) } }, { { SIMDE_FLOAT64_C( -609.36), SIMDE_FLOAT64_C( -653.25) }, { SIMDE_FLOAT64_C( -653.25) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x1_t r = simde_vget_high_f64(a); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; } static int test_simde_vget_high_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int8_t r[8]; } test_vec[] = { { { INT8_C( 2), -INT8_C( 102), -INT8_C( 10), -INT8_C( 48), -INT8_C( 126), -INT8_C( 61), INT8_C( 29), INT8_C( 91), -INT8_C( 59), -INT8_C( 8), INT8_C( 97), INT8_C( 27), -INT8_C( 101), INT8_C( 40), -INT8_C( 4), INT8_C( 34) }, { -INT8_C( 59), -INT8_C( 8), INT8_C( 97), INT8_C( 27), -INT8_C( 101), INT8_C( 40), -INT8_C( 4), INT8_C( 34) } }, { { -INT8_C( 127), -INT8_C( 21), -INT8_C( 97), -INT8_C( 6), INT8_C( 83), INT8_C( 58), -INT8_C( 124), -INT8_C( 3), INT8_C( 103), INT8_C( 78), -INT8_C( 13), -INT8_C( 42), -INT8_C( 66), INT8_C( 46), INT8_C( 62), -INT8_C( 64) }, { INT8_C( 103), INT8_C( 78), -INT8_C( 13), -INT8_C( 42), -INT8_C( 66), INT8_C( 46), INT8_C( 62), -INT8_C( 64) } }, { { -INT8_C( 56), INT8_C( 52), -INT8_C( 112), INT8_C( 74), -INT8_C( 9), -INT8_C( 83), -INT8_C( 90), -INT8_C( 68), -INT8_C( 91), INT8_C( 7), -INT8_C( 41), INT8_C( 64), INT8_C( 47), -INT8_C( 44), INT8_C( 98), -INT8_C( 79) }, { -INT8_C( 91), INT8_C( 7), -INT8_C( 41), INT8_C( 64), INT8_C( 47), -INT8_C( 44), INT8_C( 98), -INT8_C( 79) } }, { { -INT8_C( 65), INT8_C( 1), -INT8_C( 85), INT8_C( 18), INT8_C( 60), INT8_C( 47), INT8_C( 15), -INT8_C( 93), INT8_C( 126), INT8_C( 3), INT8_C( 121), INT8_C( 60), INT8_C( 49), -INT8_C( 73), -INT8_C( 3), -INT8_C( 6) }, { INT8_C( 126), INT8_C( 3), INT8_C( 121), INT8_C( 60), INT8_C( 49), -INT8_C( 73), -INT8_C( 3), -INT8_C( 6) } }, { { -INT8_C( 21), -INT8_C( 115), INT8_C( 68), -INT8_C( 29), INT8_C( 59), -INT8_C( 22), -INT8_C( 97), -INT8_C( 32), -INT8_C( 14), INT8_C( 119), INT8_C( 33), INT8_C( 33), INT8_C( 75), -INT8_C( 125), -INT8_C( 46), INT8_C( 10) }, { -INT8_C( 14), INT8_C( 119), INT8_C( 33), INT8_C( 33), INT8_C( 75), -INT8_C( 125), -INT8_C( 46), INT8_C( 10) } }, { { -INT8_C( 123), INT8_C( 126), INT8_C( 29), -INT8_C( 63), -INT8_C( 83), INT8_C( 44), INT8_C( 100), INT8_C( 43), INT8_C( 47), -INT8_C( 35), INT8_C( 104), INT8_C( 97), -INT8_C( 108), INT8_C( 101), INT8_C( 91), INT8_MAX }, { INT8_C( 47), -INT8_C( 35), INT8_C( 104), INT8_C( 97), -INT8_C( 108), INT8_C( 101), INT8_C( 91), INT8_MAX } }, { { -INT8_C( 14), -INT8_C( 97), INT8_C( 98), INT8_C( 45), -INT8_C( 118), INT8_C( 2), INT8_C( 14), INT8_C( 124), INT8_C( 121), INT8_C( 47), -INT8_C( 99), -INT8_C( 60), -INT8_C( 78), INT8_C( 112), -INT8_C( 50), INT8_C( 55) }, { INT8_C( 121), INT8_C( 47), -INT8_C( 99), -INT8_C( 60), -INT8_C( 78), INT8_C( 112), -INT8_C( 50), INT8_C( 55) } }, { { -INT8_C( 18), -INT8_C( 21), -INT8_C( 8), -INT8_C( 101), INT8_C( 24), INT8_C( 92), -INT8_C( 57), INT8_C( 71), INT8_C( 57), INT8_C( 47), -INT8_C( 88), -INT8_C( 51), -INT8_C( 108), INT8_C( 3), INT8_C( 77), -INT8_C( 122) }, { INT8_C( 57), INT8_C( 47), -INT8_C( 88), -INT8_C( 51), -INT8_C( 108), INT8_C( 3), INT8_C( 77), -INT8_C( 122) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x8_t r = simde_vget_high_s8(a); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; } static int test_simde_vget_high_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int16_t r[4]; } test_vec[] = { { { -INT16_C( 1812), -INT16_C( 24453), INT16_C( 7401), INT16_C( 2904), -INT16_C( 25876), INT16_C( 9993), INT16_C( 30955), -INT16_C( 5514) }, { -INT16_C( 25876), INT16_C( 9993), INT16_C( 30955), -INT16_C( 5514) } }, { { INT16_C( 16302), INT16_C( 17547), -INT16_C( 23755), INT16_C( 8407), INT16_C( 14448), INT16_C( 24177), -INT16_C( 17721), -INT16_C( 19647) }, { INT16_C( 14448), INT16_C( 24177), -INT16_C( 17721), -INT16_C( 19647) } }, { { -INT16_C( 17230), -INT16_C( 25773), -INT16_C( 21288), -INT16_C( 15193), -INT16_C( 20410), INT16_C( 12780), INT16_C( 25129), -INT16_C( 10468) }, { -INT16_C( 20410), INT16_C( 12780), INT16_C( 25129), -INT16_C( 10468) } }, { { -INT16_C( 22622), -INT16_C( 10469), -INT16_C( 3509), -INT16_C( 17416), INT16_C( 26923), -INT16_C( 3559), INT16_C( 23075), -INT16_C( 10587) }, { INT16_C( 26923), -INT16_C( 3559), INT16_C( 23075), -INT16_C( 10587) } }, { { -INT16_C( 2026), -INT16_C( 4239), INT16_C( 6308), -INT16_C( 5453), -INT16_C( 24631), -INT16_C( 3556), INT16_C( 14338), -INT16_C( 23351) }, { -INT16_C( 24631), -INT16_C( 3556), INT16_C( 14338), -INT16_C( 23351) } }, { { -INT16_C( 6945), INT16_C( 10875), INT16_C( 29655), INT16_C( 742), -INT16_C( 35), INT16_C( 244), -INT16_C( 26278), INT16_C( 28886) }, { -INT16_C( 35), INT16_C( 244), -INT16_C( 26278), INT16_C( 28886) } }, { { INT16_C( 18577), INT16_C( 13919), INT16_C( 4960), INT16_C( 10528), INT16_C( 15538), -INT16_C( 19429), -INT16_C( 7052), INT16_C( 21592) }, { INT16_C( 15538), -INT16_C( 19429), -INT16_C( 7052), INT16_C( 21592) } }, { { -INT16_C( 11063), -INT16_C( 24450), INT16_C( 25671), INT16_C( 9378), -INT16_C( 27036), -INT16_C( 16859), -INT16_C( 1233), -INT16_C( 16338) }, { -INT16_C( 27036), -INT16_C( 16859), -INT16_C( 1233), -INT16_C( 16338) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x4_t r = simde_vget_high_s16(a); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; } static int test_simde_vget_high_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t r[2]; } test_vec[] = { { { -INT32_C( 1476397000), INT32_C( 461227151), -INT32_C( 799969980), INT32_C( 1647228559) }, { -INT32_C( 799969980), INT32_C( 1647228559) } }, { { INT32_C( 1924220388), -INT32_C( 255827408), -INT32_C( 2021250903), -INT32_C( 1104113530) }, { -INT32_C( 2021250903), -INT32_C( 1104113530) } }, { { INT32_C( 392573064), INT32_C( 942924788), -INT32_C( 536247216), INT32_C( 524433211) }, { -INT32_C( 536247216), INT32_C( 524433211) } }, { { -INT32_C( 1382878083), -INT32_C( 6401450), INT32_C( 8856698), INT32_C( 1019197364) }, { INT32_C( 8856698), INT32_C( 1019197364) } }, { { -INT32_C( 615242265), INT32_C( 1494517512), INT32_C( 1178148107), -INT32_C( 781812908) }, { INT32_C( 1178148107), -INT32_C( 781812908) } }, { { -INT32_C( 981469073), -INT32_C( 976937654), -INT32_C( 171619263), -INT32_C( 349076477) }, { -INT32_C( 171619263), -INT32_C( 349076477) } }, { { -INT32_C( 1295612503), INT32_C( 403429900), INT32_C( 1281246455), INT32_C( 790480063) }, { INT32_C( 1281246455), INT32_C( 790480063) } }, { { INT32_C( 133471420), -INT32_C( 87246407), INT32_C( 166695173), -INT32_C( 1074519786) }, { INT32_C( 166695173), -INT32_C( 1074519786) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x2_t r = simde_vget_high_s32(a); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; } static int test_simde_vget_high_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; int64_t r[1]; } test_vec[] = { { { -INT64_C( 8232078131339440910), INT64_C( 6335349853300599597) }, { INT64_C( 6335349853300599597) } }, { { INT64_C( 1363296497143073401), -INT64_C( 845722348049157190) }, { -INT64_C( 845722348049157190) } }, { { INT64_C( 8704551305794192305), -INT64_C( 4365535833137767693) }, { -INT64_C( 4365535833137767693) } }, { { INT64_C( 5390133175935445707), -INT64_C( 3584049339885700787) }, { -INT64_C( 3584049339885700787) } }, { { INT64_C( 228796346852526185), INT64_C( 973200018634097012) }, { INT64_C( 973200018634097012) } }, { { INT64_C( 1622867932244095146), INT64_C( 8663420517885487182) }, { INT64_C( 8663420517885487182) } }, { { -INT64_C( 7709220138856396560), INT64_C( 4929987596782868763) }, { INT64_C( 4929987596782868763) } }, { { INT64_C( 1649728995198906885), -INT64_C( 2183193650073023956) }, { -INT64_C( 2183193650073023956) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x1_t r = simde_vget_high_s64(a); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; } static int test_simde_vget_high_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t r[8]; } test_vec[] = { { { UINT8_C(178), UINT8_C(229), UINT8_C(135), UINT8_C( 25), UINT8_C(212), UINT8_C( 46), UINT8_C(139), UINT8_C( 16), UINT8_C(223), UINT8_C(251), UINT8_C( 3), UINT8_C( 69), UINT8_C(221), UINT8_C(253), UINT8_C(220), UINT8_C(144) }, { UINT8_C(223), UINT8_C(251), UINT8_C( 3), UINT8_C( 69), UINT8_C(221), UINT8_C(253), UINT8_C(220), UINT8_C(144) } }, { { UINT8_C(214), UINT8_C( 35), UINT8_C( 37), UINT8_C( 87), UINT8_C( 39), UINT8_C( 45), UINT8_C(138), UINT8_C(244), UINT8_C(191), UINT8_C(175), UINT8_C(236), UINT8_C(166), UINT8_C(250), UINT8_C( 50), UINT8_C(230), UINT8_C(172) }, { UINT8_C(191), UINT8_C(175), UINT8_C(236), UINT8_C(166), UINT8_C(250), UINT8_C( 50), UINT8_C(230), UINT8_C(172) } }, { { UINT8_C( 23), UINT8_C(110), UINT8_C(198), UINT8_C(235), UINT8_C(156), UINT8_C( 81), UINT8_C(251), UINT8_C(123), UINT8_C( 76), UINT8_C(254), UINT8_C(193), UINT8_C( 41), UINT8_C(251), UINT8_C(157), UINT8_C(185), UINT8_C(209) }, { UINT8_C( 76), UINT8_C(254), UINT8_C(193), UINT8_C( 41), UINT8_C(251), UINT8_C(157), UINT8_C(185), UINT8_C(209) } }, { { UINT8_C(192), UINT8_C(222), UINT8_C( 41), UINT8_C(232), UINT8_C( 12), UINT8_C(179), UINT8_C(220), UINT8_C(203), UINT8_C( 98), UINT8_C(201), UINT8_C(114), UINT8_C( 92), UINT8_C(251), UINT8_C( 88), UINT8_C( 9), UINT8_C( 18) }, { UINT8_C( 98), UINT8_C(201), UINT8_C(114), UINT8_C( 92), UINT8_C(251), UINT8_C( 88), UINT8_C( 9), UINT8_C( 18) } }, { { UINT8_C(198), UINT8_C(207), UINT8_C(253), UINT8_C( 99), UINT8_C( 32), UINT8_C(248), UINT8_C(222), UINT8_C(109), UINT8_C(246), UINT8_C(159), UINT8_C(150), UINT8_C(242), UINT8_C( 61), UINT8_C( 80), UINT8_C(195), UINT8_C(253) }, { UINT8_C(246), UINT8_C(159), UINT8_C(150), UINT8_C(242), UINT8_C( 61), UINT8_C( 80), UINT8_C(195), UINT8_C(253) } }, { { UINT8_C( 46), UINT8_C(236), UINT8_C(229), UINT8_C( 58), UINT8_C(159), UINT8_C(194), UINT8_C( 6), UINT8_C( 2), UINT8_C(139), UINT8_C(120), UINT8_C( 94), UINT8_C(134), UINT8_C(208), UINT8_C(103), UINT8_C(152), UINT8_C(151) }, { UINT8_C(139), UINT8_C(120), UINT8_C( 94), UINT8_C(134), UINT8_C(208), UINT8_C(103), UINT8_C(152), UINT8_C(151) } }, { { UINT8_C( 54), UINT8_C(149), UINT8_C(250), UINT8_C( 87), UINT8_C(141), UINT8_C(216), UINT8_C(196), UINT8_C(131), UINT8_C(120), UINT8_C( 90), UINT8_C(117), UINT8_C(181), UINT8_C(170), UINT8_C( 57), UINT8_C(178), UINT8_C(217) }, { UINT8_C(120), UINT8_C( 90), UINT8_C(117), UINT8_C(181), UINT8_C(170), UINT8_C( 57), UINT8_C(178), UINT8_C(217) } }, { { UINT8_C( 37), UINT8_C(152), UINT8_C( 19), UINT8_C(197), UINT8_C( 90), UINT8_C( 25), UINT8_C(199), UINT8_C(229), UINT8_C(145), UINT8_C( 37), UINT8_C(107), UINT8_C( 98), UINT8_C(141), UINT8_C( 3), UINT8_C(249), UINT8_C(195) }, { UINT8_C(145), UINT8_C( 37), UINT8_C(107), UINT8_C( 98), UINT8_C(141), UINT8_C( 3), UINT8_C(249), UINT8_C(195) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x8_t r = simde_vget_high_u8(a); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; } static int test_simde_vget_high_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(22920), UINT16_C(63569), UINT16_C(23914), UINT16_C(54594), UINT16_C(62338), UINT16_C(64225), UINT16_C(22481), UINT16_C(21359) }, { UINT16_C(62338), UINT16_C(64225), UINT16_C(22481), UINT16_C(21359) } }, { { UINT16_C(33438), UINT16_C( 1944), UINT16_C(21841), UINT16_C(28692), UINT16_C(13996), UINT16_C( 156), UINT16_C(50690), UINT16_C(35674) }, { UINT16_C(13996), UINT16_C( 156), UINT16_C(50690), UINT16_C(35674) } }, { { UINT16_C(43808), UINT16_C(35459), UINT16_C(50697), UINT16_C(35679), UINT16_C(16569), UINT16_C(35462), UINT16_C(62871), UINT16_C(14046) }, { UINT16_C(16569), UINT16_C(35462), UINT16_C(62871), UINT16_C(14046) } }, { { UINT16_C(30327), UINT16_C(51261), UINT16_C(21195), UINT16_C(30521), UINT16_C(54664), UINT16_C(35447), UINT16_C(53659), UINT16_C(47893) }, { UINT16_C(54664), UINT16_C(35447), UINT16_C(53659), UINT16_C(47893) } }, { { UINT16_C(39293), UINT16_C(34373), UINT16_C(42079), UINT16_C( 6161), UINT16_C(38885), UINT16_C(31906), UINT16_C(32909), UINT16_C( 1202) }, { UINT16_C(38885), UINT16_C(31906), UINT16_C(32909), UINT16_C( 1202) } }, { { UINT16_C(61686), UINT16_C(49613), UINT16_C( 1602), UINT16_C(51768), UINT16_C(45019), UINT16_C(30292), UINT16_C(27265), UINT16_C(65074) }, { UINT16_C(45019), UINT16_C(30292), UINT16_C(27265), UINT16_C(65074) } }, { { UINT16_C(30467), UINT16_C(25220), UINT16_C(38172), UINT16_C( 378), UINT16_C( 7213), UINT16_C(47741), UINT16_C(12445), UINT16_C(37822) }, { UINT16_C( 7213), UINT16_C(47741), UINT16_C(12445), UINT16_C(37822) } }, { { UINT16_C(35616), UINT16_C(25173), UINT16_C(36241), UINT16_C(27692), UINT16_C(32829), UINT16_C(48867), UINT16_C( 5610), UINT16_C(60860) }, { UINT16_C(32829), UINT16_C(48867), UINT16_C( 5610), UINT16_C(60860) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x4_t r = simde_vget_high_u16(a); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; } static int test_simde_vget_high_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint32_t r[2]; } test_vec[] = { { { UINT32_C(1465259434), UINT32_C(1475338349), UINT32_C(3977679886), UINT32_C( 955184875) }, { UINT32_C(3977679886), UINT32_C( 955184875) } }, { { UINT32_C(2946554281), UINT32_C(1420591641), UINT32_C(3404329444), UINT32_C(3586975531) }, { UINT32_C(3404329444), UINT32_C(3586975531) } }, { { UINT32_C(1781343229), UINT32_C( 297933827), UINT32_C(2617170096), UINT32_C(2027154895) }, { UINT32_C(2617170096), UINT32_C(2027154895) } }, { { UINT32_C(3760747718), UINT32_C(3660895478), UINT32_C(4037287365), UINT32_C( 46559493) }, { UINT32_C(4037287365), UINT32_C( 46559493) } }, { { UINT32_C(2540499860), UINT32_C(3232247311), UINT32_C(3596330759), UINT32_C(1515073428) }, { UINT32_C(3596330759), UINT32_C(1515073428) } }, { { UINT32_C(2570745507), UINT32_C( 276000330), UINT32_C(2432702604), UINT32_C( 496223881) }, { UINT32_C(2432702604), UINT32_C( 496223881) } }, { { UINT32_C(3384082361), UINT32_C( 898194478), UINT32_C(2534138883), UINT32_C(3069335827) }, { UINT32_C(2534138883), UINT32_C(3069335827) } }, { { UINT32_C( 441396432), UINT32_C( 657114011), UINT32_C(1689791451), UINT32_C(2877377521) }, { UINT32_C(1689791451), UINT32_C(2877377521) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x2_t r = simde_vget_high_u32(a); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; } static int test_simde_vget_high_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; uint64_t r[1]; } test_vec[] = { { { UINT64_C(18392422304141364762), UINT64_C( 9553571390305685699) }, { UINT64_C( 9553571390305685699) } }, { { UINT64_C(10478824778783513736), UINT64_C( 8371460922054141089) }, { UINT64_C( 8371460922054141089) } }, { { UINT64_C( 4924723027677183669), UINT64_C( 8972535995333597553) }, { UINT64_C( 8972535995333597553) } }, { { UINT64_C( 6621385167040051125), UINT64_C( 1915813679545843143) }, { UINT64_C( 1915813679545843143) } }, { { UINT64_C( 763865471240075477), UINT64_C( 7715948976478543268) }, { UINT64_C( 7715948976478543268) } }, { { UINT64_C( 3754490732107106821), UINT64_C( 8006533284254532510) }, { UINT64_C( 8006533284254532510) } }, { { UINT64_C(18376479074412695395), UINT64_C(11771472225186025531) }, { UINT64_C(11771472225186025531) } }, { { UINT64_C( 6401441061383838005), UINT64_C( 8919791479167855808) }, { UINT64_C( 8919791479167855808) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x1_t r = simde_vget_high_u64(a); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vget_high_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vget_high_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vget_high_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vget_high_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vget_high_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vget_high_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vget_high_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vget_high_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vget_high_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vget_high_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/get_lane.c000066400000000000000000001033421400333146700172440ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN get_lane #include "test-neon.h" #include "../../../simde/arm/neon/get_lane.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ static int test_simde_vget_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[2]; int b; simde_float32 r; } test_vec[] = { { { SIMDE_FLOAT32_C( 410.64), SIMDE_FLOAT32_C( 344.10) }, INT8_C( 1), SIMDE_FLOAT32_C( 344.10) }, { { SIMDE_FLOAT32_C( -838.05), SIMDE_FLOAT32_C( -800.70) }, INT8_C( 1), SIMDE_FLOAT32_C( -800.70) }, { { SIMDE_FLOAT32_C( 546.78), SIMDE_FLOAT32_C( 198.96) }, INT8_C( 0), SIMDE_FLOAT32_C( 546.78) }, { { SIMDE_FLOAT32_C( -101.20), SIMDE_FLOAT32_C( -135.04) }, INT8_C( 1), SIMDE_FLOAT32_C( -135.04) }, { { SIMDE_FLOAT32_C( -61.99), SIMDE_FLOAT32_C( 998.55) }, INT8_C( 1), SIMDE_FLOAT32_C( 998.55) }, { { SIMDE_FLOAT32_C( -663.21), SIMDE_FLOAT32_C( -581.66) }, INT8_C( 0), SIMDE_FLOAT32_C( -663.21) }, { { SIMDE_FLOAT32_C( -515.64), SIMDE_FLOAT32_C( -101.75) }, INT8_C( 1), SIMDE_FLOAT32_C( -101.75) }, { { SIMDE_FLOAT32_C( 909.95), SIMDE_FLOAT32_C( -480.35) }, INT8_C( 0), SIMDE_FLOAT32_C( 909.95) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); int b = test_vec[i].b; simde_float32 r; SIMDE_CONSTIFY_2_(simde_vget_lane_f32, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT32_C(0.0)), b, a); simde_assert_equal_f32(r, test_vec[i].r, 1); } return 0; } static int test_simde_vget_lane_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[1]; simde_float64 r; } test_vec[] = { { { SIMDE_FLOAT64_C( -475.46) }, SIMDE_FLOAT64_C( -475.46) }, { { SIMDE_FLOAT64_C( 40.13) }, SIMDE_FLOAT64_C( 40.13) }, { { SIMDE_FLOAT64_C( 577.68) }, SIMDE_FLOAT64_C( 577.68) }, { { SIMDE_FLOAT64_C( -438.44) }, SIMDE_FLOAT64_C( -438.44) }, { { SIMDE_FLOAT64_C( 564.06) }, SIMDE_FLOAT64_C( 564.06) }, { { SIMDE_FLOAT64_C( -519.34) }, SIMDE_FLOAT64_C( -519.34) }, { { SIMDE_FLOAT64_C( -449.22) }, SIMDE_FLOAT64_C( -449.22) }, { { SIMDE_FLOAT64_C( -456.94) }, SIMDE_FLOAT64_C( -456.94) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64 r = simde_vget_lane_f64(a, 0); simde_assert_equal_f64(r, test_vec[i].r, 1); } return 0; } static int test_simde_vget_lane_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int b; int8_t r; } test_vec[] = { { { INT8_C( 79), INT8_C( 68), -INT8_C( 36), INT8_C( 47), INT8_C( 87), -INT8_C( 22), -INT8_C( 44), -INT8_C( 111) }, INT8_C( 2), -INT8_C( 36) }, { { -INT8_C( 78), INT8_C( 75), -INT8_C( 106), INT8_C( 111), -INT8_C( 55), INT8_C( 39), -INT8_C( 69), -INT8_C( 110) }, INT8_C( 4), -INT8_C( 55) }, { { INT8_C( 72), -INT8_C( 120), -INT8_C( 122), -INT8_C( 86), INT8_C( 90), -INT8_C( 24), -INT8_C( 60), -INT8_C( 104) }, INT8_C( 1), -INT8_C( 120) }, { { INT8_C( 116), INT8_C( 37), -INT8_C( 99), -INT8_C( 48), INT8_C( 117), -INT8_C( 31), -INT8_C( 84), -INT8_C( 92) }, INT8_C( 0), INT8_C( 116) }, { { -INT8_C( 106), INT8_C( 120), -INT8_C( 54), -INT8_C( 64), INT8_C( 42), INT8_C( 21), INT8_C( 87), -INT8_C( 103) }, INT8_C( 6), INT8_C( 87) }, { { INT8_C( 126), INT8_C( 84), INT8_C( 112), INT8_C( 98), -INT8_C( 100), -INT8_C( 7), -INT8_C( 23), INT8_C( 70) }, INT8_C( 3), INT8_C( 98) }, { { -INT8_C( 47), INT8_C( 11), -INT8_C( 21), INT8_C( 10), INT8_MAX, INT8_C( 17), -INT8_C( 89), INT8_C( 79) }, INT8_C( 6), -INT8_C( 89) }, { { -INT8_C( 120), -INT8_C( 5), INT8_C( 42), -INT8_C( 64), -INT8_C( 110), -INT8_C( 94), -INT8_C( 118), INT8_C( 82) }, INT8_C( 4), -INT8_C( 110) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); int b = test_vec[i].b; int8_t r; SIMDE_CONSTIFY_8_(simde_vget_lane_s8, r, (HEDLEY_UNREACHABLE(), 0), b, a); simde_assert_equal_i8(r, test_vec[i].r); } return 0; } static int test_simde_vget_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int b; int16_t r; } test_vec[] = { { { INT16_C( 13913), -INT16_C( 7828), -INT16_C( 14489), -INT16_C( 26057) }, INT8_C( 0), INT16_C( 13913) }, { { INT16_C( 13818), INT16_C( 18664), -INT16_C( 10245), INT16_C( 6383) }, INT8_C( 0), INT16_C( 13818) }, { { -INT16_C( 9771), -INT16_C( 25769), INT16_C( 2933), -INT16_C( 2011) }, INT8_C( 2), INT16_C( 2933) }, { { -INT16_C( 14019), -INT16_C( 15785), -INT16_C( 29150), INT16_C( 814) }, INT8_C( 1), -INT16_C( 15785) }, { { INT16_C( 15094), INT16_C( 3728), -INT16_C( 15051), INT16_C( 32246) }, INT8_C( 1), INT16_C( 3728) }, { { INT16_C( 28109), INT16_C( 18137), -INT16_C( 19902), -INT16_C( 8803) }, INT8_C( 0), INT16_C( 28109) }, { { INT16_C( 680), INT16_C( 27168), -INT16_C( 5569), INT16_C( 705) }, INT8_C( 0), INT16_C( 680) }, { { INT16_C( 12367), INT16_C( 17680), INT16_C( 18982), INT16_C( 13525) }, INT8_C( 3), INT16_C( 13525) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); int b = test_vec[i].b; int16_t r; SIMDE_CONSTIFY_4_(simde_vget_lane_s16, r, (HEDLEY_UNREACHABLE(), 0), b, a); simde_assert_equal_i16(r, test_vec[i].r); } return 0; } static int test_simde_vget_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int b; int32_t r; } test_vec[] = { { { INT32_C( 451851400), -INT32_C( 530446795) }, INT8_C( 0), INT32_C( 451851400) }, { { -INT32_C( 497636127), -INT32_C( 338817732) }, INT8_C( 1), -INT32_C( 338817732) }, { { INT32_C( 212911362), -INT32_C( 643488292) }, INT8_C( 1), -INT32_C( 643488292) }, { { -INT32_C( 1592001450), -INT32_C( 1634742396) }, INT8_C( 0), -INT32_C( 1592001450) }, { { -INT32_C( 941293163), -INT32_C( 1004629534) }, INT8_C( 1), -INT32_C( 1004629534) }, { { INT32_C( 1941475883), -INT32_C( 1608286828) }, INT8_C( 0), INT32_C( 1941475883) }, { { -INT32_C( 30194103), INT32_C( 1025060764) }, INT8_C( 1), INT32_C( 1025060764) }, { { -INT32_C( 330117656), INT32_C( 718361442) }, INT8_C( 1), INT32_C( 718361442) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); int b = test_vec[i].b; int32_t r; SIMDE_CONSTIFY_2_(simde_vget_lane_s32, r, (HEDLEY_UNREACHABLE(), 0), b, a); simde_assert_equal_i32(r, test_vec[i].r); } return 0; } static int test_simde_vget_lane_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[1]; int64_t r; } test_vec[] = { { { INT64_C( 7593315202386255056) }, INT64_C( 7593315202386255056) }, { { -INT64_C( 3283523923862515318) }, -INT64_C( 3283523923862515318) }, { { INT64_C( 4591998433815169493) }, INT64_C( 4591998433815169493) }, { { INT64_C( 3515554461116684124) }, INT64_C( 3515554461116684124) }, { { INT64_C( 8463300658565386409) }, INT64_C( 8463300658565386409) }, { { -INT64_C( 5169869895816060153) }, -INT64_C( 5169869895816060153) }, { { -INT64_C( 3838821912131374513) }, -INT64_C( 3838821912131374513) }, { { INT64_C( 166258511252788092) }, INT64_C( 166258511252788092) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); int64_t r = simde_vget_lane_s64(a, 0); simde_assert_equal_i64(r, test_vec[i].r); } return 0; } static int test_simde_vget_lane_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; int b; uint8_t r; } test_vec[] = { { { UINT8_C(145), UINT8_C( 15), UINT8_C(109), UINT8_C( 38), UINT8_C(127), UINT8_C( 69), UINT8_C(163), UINT8_C(231) }, INT8_C( 3), UINT8_C( 38) }, { { UINT8_C( 26), UINT8_C( 62), UINT8_C(190), UINT8_C(237), UINT8_C(178), UINT8_C(209), UINT8_C( 61), UINT8_C( 4) }, INT8_C( 4), UINT8_C(178) }, { { UINT8_C( 55), UINT8_C(139), UINT8_C( 83), UINT8_C( 28), UINT8_C( 77), UINT8_C( 73), UINT8_C(209), UINT8_C( 70) }, INT8_C( 0), UINT8_C( 55) }, { { UINT8_C( 62), UINT8_C(144), UINT8_C( 10), UINT8_C(169), UINT8_C( 34), UINT8_C( 25), UINT8_C( 23), UINT8_C( 72) }, INT8_C( 0), UINT8_C( 62) }, { { UINT8_C( 92), UINT8_C(235), UINT8_C(127), UINT8_C( 0), UINT8_C( 6), UINT8_C(189), UINT8_C(190), UINT8_C(243) }, INT8_C( 7), UINT8_C(243) }, { { UINT8_C(143), UINT8_C( 49), UINT8_C(115), UINT8_C(147), UINT8_C(104), UINT8_C(254), UINT8_C(231), UINT8_C(132) }, INT8_C( 4), UINT8_C(104) }, { { UINT8_C( 48), UINT8_C( 86), UINT8_C(146), UINT8_C(200), UINT8_C(148), UINT8_C( 34), UINT8_C(210), UINT8_C( 61) }, INT8_C( 4), UINT8_C(148) }, { { UINT8_C(235), UINT8_C( 84), UINT8_C(141), UINT8_C(131), UINT8_C(177), UINT8_C(120), UINT8_C( 2), UINT8_C(177) }, INT8_C( 6), UINT8_C( 2) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); int b = test_vec[i].b; uint8_t r; SIMDE_CONSTIFY_8_(simde_vget_lane_u8, r, (HEDLEY_UNREACHABLE(), 0), b, a); simde_assert_equal_u8(r, test_vec[i].r); } return 0; } static int test_simde_vget_lane_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; int b; uint16_t r; } test_vec[] = { { { UINT16_C(12419), UINT16_C(50038), UINT16_C(38877), UINT16_C(17206) }, INT8_C( 2), UINT16_C(38877) }, { { UINT16_C(48224), UINT16_C(21652), UINT16_C(15709), UINT16_C(31006) }, INT8_C( 3), UINT16_C(31006) }, { { UINT16_C(39237), UINT16_C(11133), UINT16_C(17423), UINT16_C(50851) }, INT8_C( 2), UINT16_C(17423) }, { { UINT16_C(48821), UINT16_C(47484), UINT16_C(44097), UINT16_C( 1071) }, INT8_C( 2), UINT16_C(44097) }, { { UINT16_C(15302), UINT16_C(28877), UINT16_C(35227), UINT16_C(61189) }, INT8_C( 3), UINT16_C(61189) }, { { UINT16_C( 3394), UINT16_C(19808), UINT16_C(63826), UINT16_C(32458) }, INT8_C( 0), UINT16_C( 3394) }, { { UINT16_C( 8462), UINT16_C(23759), UINT16_C(36311), UINT16_C(37080) }, INT8_C( 2), UINT16_C(36311) }, { { UINT16_C(49029), UINT16_C( 4051), UINT16_C( 3717), UINT16_C(63196) }, INT8_C( 1), UINT16_C( 4051) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); int b = test_vec[i].b; uint16_t r; SIMDE_CONSTIFY_4_(simde_vget_lane_u16, r, (HEDLEY_UNREACHABLE(), 0), b, a); simde_assert_equal_u16(r, test_vec[i].r); } return 0; } static int test_simde_vget_lane_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; int b; uint32_t r; } test_vec[] = { { { UINT32_C(3007867267), UINT32_C(2989705603) }, INT8_C( 0), UINT32_C(3007867267) }, { { UINT32_C(1366545288), UINT32_C(3281825408) }, INT8_C( 1), UINT32_C(3281825408) }, { { UINT32_C( 661651758), UINT32_C(3582788621) }, INT8_C( 1), UINT32_C(3582788621) }, { { UINT32_C( 203852593), UINT32_C(3260321551) }, INT8_C( 1), UINT32_C(3260321551) }, { { UINT32_C( 96859549), UINT32_C(3464009853) }, INT8_C( 0), UINT32_C( 96859549) }, { { UINT32_C(3990579750), UINT32_C(3210599832) }, INT8_C( 0), UINT32_C(3990579750) }, { { UINT32_C( 197152098), UINT32_C(2318486398) }, INT8_C( 0), UINT32_C( 197152098) }, { { UINT32_C(3541884608), UINT32_C(2174226811) }, INT8_C( 0), UINT32_C(3541884608) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); int b = test_vec[i].b; uint32_t r; SIMDE_CONSTIFY_2_(simde_vget_lane_u32, r, (HEDLEY_UNREACHABLE(), 0), b, a); simde_assert_equal_u32(r, test_vec[i].r); } return 0; } static int test_simde_vget_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[1]; uint64_t r; } test_vec[] = { { { UINT64_C( 8474713643211531917) }, UINT64_C( 8474713643211531917) }, { { UINT64_C( 4718805824424333625) }, UINT64_C( 4718805824424333625) }, { { UINT64_C( 2901443443683120557) }, UINT64_C( 2901443443683120557) }, { { UINT64_C(16896600451596749021) }, UINT64_C(16896600451596749021) }, { { UINT64_C( 2355020594457970359) }, UINT64_C( 2355020594457970359) }, { { UINT64_C(14871504216242625854) }, UINT64_C(14871504216242625854) }, { { UINT64_C(11884950968574087521) }, UINT64_C(11884950968574087521) }, { { UINT64_C(12253690780921831607) }, UINT64_C(12253690780921831607) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); uint64_t r = simde_vget_lane_u64(a, 0); simde_assert_equal_u64(r, test_vec[i].r); } return 0; } static int test_simde_vgetq_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; int b; simde_float32 r; } test_vec[] = { { { SIMDE_FLOAT32_C( 273.12), SIMDE_FLOAT32_C( 753.68), SIMDE_FLOAT32_C( -47.15), SIMDE_FLOAT32_C( 832.55) }, INT8_C( 1), SIMDE_FLOAT32_C( 753.68) }, { { SIMDE_FLOAT32_C( -32.75), SIMDE_FLOAT32_C( 69.02), SIMDE_FLOAT32_C( 834.09), SIMDE_FLOAT32_C( 613.18) }, INT8_C( 3), SIMDE_FLOAT32_C( 613.18) }, { { SIMDE_FLOAT32_C( -409.75), SIMDE_FLOAT32_C( -293.91), SIMDE_FLOAT32_C( -958.18), SIMDE_FLOAT32_C( -184.72) }, INT8_C( 3), SIMDE_FLOAT32_C( -184.72) }, { { SIMDE_FLOAT32_C( -957.69), SIMDE_FLOAT32_C( 993.98), SIMDE_FLOAT32_C( 264.51), SIMDE_FLOAT32_C( -192.93) }, INT8_C( 3), SIMDE_FLOAT32_C( -192.93) }, { { SIMDE_FLOAT32_C( 970.45), SIMDE_FLOAT32_C( 778.50), SIMDE_FLOAT32_C( 196.11), SIMDE_FLOAT32_C( 650.20) }, INT8_C( 1), SIMDE_FLOAT32_C( 778.50) }, { { SIMDE_FLOAT32_C( -845.53), SIMDE_FLOAT32_C( -889.17), SIMDE_FLOAT32_C( -363.16), SIMDE_FLOAT32_C( 770.16) }, INT8_C( 3), SIMDE_FLOAT32_C( 770.16) }, { { SIMDE_FLOAT32_C( 532.17), SIMDE_FLOAT32_C( 43.28), SIMDE_FLOAT32_C( 39.43), SIMDE_FLOAT32_C( -514.98) }, INT8_C( 1), SIMDE_FLOAT32_C( 43.28) }, { { SIMDE_FLOAT32_C( 35.67), SIMDE_FLOAT32_C( 452.27), SIMDE_FLOAT32_C( 944.85), SIMDE_FLOAT32_C( -130.24) }, INT8_C( 3), SIMDE_FLOAT32_C( -130.24) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); int b = test_vec[i].b; simde_float32 r; SIMDE_CONSTIFY_4_(simde_vgetq_lane_f32, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT32_C(0.0)), b, a); simde_assert_equal_f32(r, test_vec[i].r, 1); } return 0; } static int test_simde_vgetq_lane_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[2]; int b; simde_float64 r; } test_vec[] = { { { SIMDE_FLOAT64_C( 629.36), SIMDE_FLOAT64_C( -547.88) }, INT8_C( 1), SIMDE_FLOAT64_C( -547.88) }, { { SIMDE_FLOAT64_C( -917.95), SIMDE_FLOAT64_C( -153.86) }, INT8_C( 1), SIMDE_FLOAT64_C( -153.86) }, { { SIMDE_FLOAT64_C( 233.05), SIMDE_FLOAT64_C( -687.76) }, INT8_C( 1), SIMDE_FLOAT64_C( -687.76) }, { { SIMDE_FLOAT64_C( 358.76), SIMDE_FLOAT64_C( -20.52) }, INT8_C( 1), SIMDE_FLOAT64_C( -20.52) }, { { SIMDE_FLOAT64_C( 501.06), SIMDE_FLOAT64_C( -830.23) }, INT8_C( 1), SIMDE_FLOAT64_C( -830.23) }, { { SIMDE_FLOAT64_C( -204.70), SIMDE_FLOAT64_C( -481.76) }, INT8_C( 1), SIMDE_FLOAT64_C( -481.76) }, { { SIMDE_FLOAT64_C( -746.83), SIMDE_FLOAT64_C( -28.88) }, INT8_C( 1), SIMDE_FLOAT64_C( -28.88) }, { { SIMDE_FLOAT64_C( 935.21), SIMDE_FLOAT64_C( -696.96) }, INT8_C( 1), SIMDE_FLOAT64_C( -696.96) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); int b = test_vec[i].b; simde_float64 r; SIMDE_CONSTIFY_2_(simde_vgetq_lane_f64, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT64_C(0.0)), b, a); simde_assert_equal_f64(r, test_vec[i].r, 1); } return 0; } static int test_simde_vgetq_lane_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int b; int8_t r; } test_vec[] = { { { INT8_C( 62), INT8_C( 17), INT8_C( 52), INT8_C( 30), -INT8_C( 59), -INT8_C( 118), -INT8_C( 42), INT8_C( 25), -INT8_C( 15), -INT8_C( 48), -INT8_C( 40), -INT8_C( 27), INT8_C( 102), INT8_C( 24), -INT8_C( 127), -INT8_C( 50) }, INT8_C( 4), -INT8_C( 59) }, { { -INT8_C( 121), -INT8_C( 43), -INT8_C( 86), INT8_C( 63), INT8_C( 7), INT8_C( 83), INT8_C( 53), INT8_C( 61), -INT8_C( 4), INT8_C( 81), INT8_C( 2), INT8_C( 47), -INT8_C( 19), -INT8_C( 112), INT8_C( 109), -INT8_C( 2) }, INT8_C( 4), INT8_C( 7) }, { { -INT8_C( 117), -INT8_C( 61), INT8_C( 78), INT8_C( 97), -INT8_C( 36), INT8_C( 63), INT8_C( 50), -INT8_C( 76), INT8_C( 36), -INT8_C( 104), -INT8_C( 51), -INT8_C( 90), INT8_C( 102), INT8_C( 81), INT8_C( 45), INT8_C( 59) }, INT8_C( 11), -INT8_C( 90) }, { { INT8_C( 108), INT8_C( 66), INT8_C( 78), -INT8_C( 94), INT8_MIN, INT8_C( 74), -INT8_C( 13), -INT8_C( 126), INT8_C( 121), -INT8_C( 32), INT8_C( 18), -INT8_C( 25), -INT8_C( 34), -INT8_C( 41), INT8_C( 114), -INT8_C( 94) }, INT8_C( 5), INT8_C( 74) }, { { -INT8_C( 44), INT8_C( 126), INT8_C( 101), INT8_C( 6), INT8_C( 51), -INT8_C( 119), -INT8_C( 98), INT8_C( 0), INT8_C( 47), INT8_C( 4), INT8_C( 81), INT8_C( 93), INT8_C( 64), INT8_C( 77), -INT8_C( 55), -INT8_C( 126) }, INT8_C( 11), INT8_C( 93) }, { { INT8_C( 107), INT8_C( 2), -INT8_C( 26), INT8_C( 95), -INT8_C( 124), INT8_C( 95), INT8_C( 63), -INT8_C( 105), INT8_C( 70), INT8_C( 30), INT8_C( 110), -INT8_C( 71), -INT8_C( 64), -INT8_C( 109), -INT8_C( 115), INT8_C( 62) }, INT8_C( 8), INT8_C( 70) }, { { -INT8_C( 109), INT8_C( 113), -INT8_C( 126), INT8_C( 49), INT8_C( 113), -INT8_C( 79), INT8_C( 53), -INT8_C( 61), INT8_C( 14), INT8_C( 117), INT8_C( 16), -INT8_C( 40), -INT8_C( 8), -INT8_C( 85), INT8_C( 67), -INT8_C( 6) }, INT8_C( 1), INT8_C( 113) }, { { -INT8_C( 94), INT8_MAX, -INT8_C( 15), -INT8_C( 30), INT8_C( 22), INT8_C( 55), INT8_C( 0), -INT8_C( 124), -INT8_C( 16), -INT8_C( 64), INT8_C( 23), INT8_C( 125), -INT8_C( 2), INT8_C( 16), INT8_C( 16), INT8_C( 112) }, INT8_C( 2), -INT8_C( 15) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); int b = test_vec[i].b; int8_t r; SIMDE_CONSTIFY_16_(simde_vgetq_lane_s8, r, (HEDLEY_UNREACHABLE(), 0), b, a); simde_assert_equal_i8(r, test_vec[i].r); } return 0; } static int test_simde_vgetq_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int b; int16_t r; } test_vec[] = { { { INT16_C( 16940), -INT16_C( 29603), -INT16_C( 16153), -INT16_C( 3490), -INT16_C( 19383), INT16_C( 28817), INT16_C( 5341), INT16_C( 6146) }, INT8_C( 4), -INT16_C( 19383) }, { { -INT16_C( 22867), INT16_C( 27147), INT16_C( 18990), INT16_C( 5131), INT16_C( 18178), -INT16_C( 29906), -INT16_C( 10956), INT16_C( 30392) }, INT8_C( 2), INT16_C( 18990) }, { { INT16_C( 23876), -INT16_C( 23822), INT16_C( 15439), -INT16_C( 8105), INT16_C( 13484), -INT16_C( 20748), -INT16_C( 10164), -INT16_C( 3493) }, INT8_C( 3), -INT16_C( 8105) }, { { INT16_C( 8645), -INT16_C( 12242), INT16_C( 12341), INT16_C( 25623), INT16_C( 19644), INT16_C( 29753), INT16_C( 27586), INT16_C( 8376) }, INT8_C( 6), INT16_C( 27586) }, { { INT16_C( 28507), -INT16_C( 19814), INT16_C( 18000), INT16_C( 17638), INT16_C( 13044), INT16_C( 20253), INT16_C( 37), INT16_C( 17940) }, INT8_C( 6), INT16_C( 37) }, { { INT16_C( 31717), -INT16_C( 929), INT16_C( 7135), INT16_C( 6216), INT16_C( 2959), INT16_C( 18308), -INT16_C( 7637), -INT16_C( 25950) }, INT8_C( 4), INT16_C( 2959) }, { { -INT16_C( 5548), INT16_C( 15042), -INT16_C( 18897), INT16_C( 19565), -INT16_C( 28155), INT16_C( 6476), INT16_C( 31704), INT16_C( 21502) }, INT8_C( 2), -INT16_C( 18897) }, { { INT16_C( 13307), INT16_C( 17397), -INT16_C( 31669), -INT16_C( 12466), INT16_C( 31179), INT16_C( 28337), INT16_C( 11540), -INT16_C( 318) }, INT8_C( 7), -INT16_C( 318) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); int b = test_vec[i].b; int16_t r; SIMDE_CONSTIFY_8_(simde_vgetq_lane_s16, r, (HEDLEY_UNREACHABLE(), 0), b, a); simde_assert_equal_i16(r, test_vec[i].r); } return 0; } static int test_simde_vgetq_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int b; int32_t r; } test_vec[] = { { { -INT32_C( 382002186), INT32_C( 1860295962), INT32_C( 860430686), -INT32_C( 1104420096) }, INT8_C( 1), INT32_C( 1860295962) }, { { -INT32_C( 1513067068), -INT32_C( 120501551), -INT32_C( 1300405244), INT32_C( 262707700) }, INT8_C( 0), -INT32_C( 1513067068) }, { { INT32_C( 1941055890), -INT32_C( 510390121), INT32_C( 1875153987), -INT32_C( 1070336685) }, INT8_C( 2), INT32_C( 1875153987) }, { { -INT32_C( 1458531880), INT32_C( 101520521), INT32_C( 1872691654), INT32_C( 956396304) }, INT8_C( 1), INT32_C( 101520521) }, { { INT32_C( 137744756), INT32_C( 1990031794), INT32_C( 460058856), INT32_C( 1106522544) }, INT8_C( 1), INT32_C( 1990031794) }, { { -INT32_C( 1420375395), INT32_C( 1873550544), -INT32_C( 1931297397), -INT32_C( 956299018) }, INT8_C( 1), INT32_C( 1873550544) }, { { -INT32_C( 1497990903), INT32_C( 1536597743), -INT32_C( 1366538566), INT32_C( 1363922567) }, INT8_C( 0), -INT32_C( 1497990903) }, { { -INT32_C( 1556078090), INT32_C( 1918946448), INT32_C( 1501189720), -INT32_C( 1755138018) }, INT8_C( 2), INT32_C( 1501189720) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); int b = test_vec[i].b; int32_t r; SIMDE_CONSTIFY_4_(simde_vgetq_lane_s32, r, (HEDLEY_UNREACHABLE(), 0), b, a); simde_assert_equal_i32(r, test_vec[i].r); } return 0; } static int test_simde_vgetq_lane_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; int b; int64_t r; } test_vec[] = { { { INT64_C( 3728751874968948413), INT64_C( 5321062654944271890) }, INT8_C( 1), INT64_C( 5321062654944271890) }, { { -INT64_C( 6526973102782903603), INT64_C( 4086456640948673758) }, INT8_C( 0), -INT64_C( 6526973102782903603) }, { { INT64_C( 821728255052303305), -INT64_C( 8871372210728488644) }, INT8_C( 1), -INT64_C( 8871372210728488644) }, { { INT64_C( 9063507124988743488), -INT64_C( 755308779427368444) }, INT8_C( 1), -INT64_C( 755308779427368444) }, { { -INT64_C( 4601413837701090548), -INT64_C( 5568997109630427501) }, INT8_C( 1), -INT64_C( 5568997109630427501) }, { { INT64_C( 7249722157166126946), -INT64_C( 5802075325525506327) }, INT8_C( 1), -INT64_C( 5802075325525506327) }, { { INT64_C( 4192912744441392621), -INT64_C( 1385899749073431636) }, INT8_C( 0), INT64_C( 4192912744441392621) }, { { INT64_C( 3662145803337978095), -INT64_C( 1557336465030705668) }, INT8_C( 0), INT64_C( 3662145803337978095) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); int b = test_vec[i].b; int64_t r; SIMDE_CONSTIFY_2_(simde_vgetq_lane_s64, r, (HEDLEY_UNREACHABLE(), 0), b, a); simde_assert_equal_i64(r, test_vec[i].r); } return 0; } static int test_simde_vgetq_lane_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; int b; uint8_t r; } test_vec[] = { { { UINT8_C(217), UINT8_C( 95), UINT8_C(111), UINT8_C(104), UINT8_C( 19), UINT8_C(112), UINT8_C(195), UINT8_C( 29), UINT8_C(167), UINT8_C(167), UINT8_C( 53), UINT8_C(250), UINT8_C(196), UINT8_C(115), UINT8_C( 22), UINT8_C(127) }, INT8_C( 14), UINT8_C( 22) }, { { UINT8_C(166), UINT8_C(247), UINT8_C( 43), UINT8_C( 33), UINT8_C(246), UINT8_C(230), UINT8_C(120), UINT8_C(202), UINT8_C(208), UINT8_C(134), UINT8_C(111), UINT8_C(125), UINT8_C( 17), UINT8_C( 54), UINT8_C( 86), UINT8_C(112) }, INT8_C( 6), UINT8_C(120) }, { { UINT8_C(191), UINT8_C(132), UINT8_C( 22), UINT8_C(130), UINT8_C(161), UINT8_C(189), UINT8_C( 41), UINT8_C(214), UINT8_C(184), UINT8_C(237), UINT8_C( 73), UINT8_C(206), UINT8_C(108), UINT8_C(103), UINT8_C(117), UINT8_C( 99) }, INT8_C( 2), UINT8_C( 22) }, { { UINT8_C(150), UINT8_C( 89), UINT8_C(120), UINT8_C( 14), UINT8_C( 36), UINT8_C( 72), UINT8_C(148), UINT8_C(147), UINT8_C(197), UINT8_C(166), UINT8_C(201), UINT8_C( 28), UINT8_C( 22), UINT8_C(111), UINT8_C(219), UINT8_C(154) }, INT8_C( 6), UINT8_C(148) }, { { UINT8_C( 93), UINT8_C( 59), UINT8_C( 67), UINT8_C(134), UINT8_C( 17), UINT8_C(251), UINT8_C(115), UINT8_C( 91), UINT8_C(202), UINT8_C(223), UINT8_C(194), UINT8_C( 63), UINT8_C( 66), UINT8_C( 85), UINT8_C(213), UINT8_C(156) }, INT8_C( 13), UINT8_C( 85) }, { { UINT8_C(227), UINT8_C(192), UINT8_C( 22), UINT8_C(120), UINT8_C( 83), UINT8_C(219), UINT8_C( 30), UINT8_C( 28), UINT8_C(247), UINT8_C( 52), UINT8_C(140), UINT8_C(210), UINT8_C(207), UINT8_C( 18), UINT8_C( 47), UINT8_C( 10) }, INT8_C( 5), UINT8_C(219) }, { { UINT8_C(181), UINT8_C( 28), UINT8_C( 81), UINT8_C( 40), UINT8_C(119), UINT8_C( 27), UINT8_C( 7), UINT8_C( 57), UINT8_C( 90), UINT8_C( 74), UINT8_C(142), UINT8_C( 47), UINT8_C(230), UINT8_C( 92), UINT8_C( 18), UINT8_C(166) }, INT8_C( 2), UINT8_C( 81) }, { { UINT8_C(138), UINT8_C(249), UINT8_C( 77), UINT8_C(168), UINT8_C( 21), UINT8_C( 69), UINT8_C(221), UINT8_C(161), UINT8_C( 23), UINT8_C(172), UINT8_C(179), UINT8_C( 71), UINT8_C(182), UINT8_C( 9), UINT8_C(252), UINT8_C(210) }, INT8_C( 10), UINT8_C(179) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); int b = test_vec[i].b; uint8_t r; SIMDE_CONSTIFY_16_(simde_vgetq_lane_u8, r, (HEDLEY_UNREACHABLE(), 0), b, a); simde_assert_equal_u8(r, test_vec[i].r); } return 0; } static int test_simde_vgetq_lane_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; int b; uint16_t r; } test_vec[] = { { { UINT16_C(61091), UINT16_C(53658), UINT16_C(16697), UINT16_C( 5906), UINT16_C( 8437), UINT16_C(24513), UINT16_C(37350), UINT16_C(20655) }, INT8_C( 0), UINT16_C(61091) }, { { UINT16_C(42027), UINT16_C(57525), UINT16_C(55191), UINT16_C(45942), UINT16_C( 7418), UINT16_C( 8850), UINT16_C(38827), UINT16_C(39366) }, INT8_C( 1), UINT16_C(57525) }, { { UINT16_C(53911), UINT16_C(43634), UINT16_C(26601), UINT16_C(43978), UINT16_C(45510), UINT16_C(30268), UINT16_C(29697), UINT16_C(42401) }, INT8_C( 1), UINT16_C(43634) }, { { UINT16_C(15490), UINT16_C(63489), UINT16_C(64495), UINT16_C(33044), UINT16_C(48926), UINT16_C(58393), UINT16_C(19032), UINT16_C(10875) }, INT8_C( 5), UINT16_C(58393) }, { { UINT16_C( 5157), UINT16_C(61476), UINT16_C(60351), UINT16_C(64417), UINT16_C(41569), UINT16_C( 623), UINT16_C(38983), UINT16_C(33668) }, INT8_C( 1), UINT16_C(61476) }, { { UINT16_C(29308), UINT16_C(37269), UINT16_C(46068), UINT16_C( 3408), UINT16_C(43415), UINT16_C( 4695), UINT16_C( 5331), UINT16_C(59192) }, INT8_C( 1), UINT16_C(37269) }, { { UINT16_C(42536), UINT16_C(51492), UINT16_C(34209), UINT16_C( 4203), UINT16_C(45703), UINT16_C( 3241), UINT16_C(16949), UINT16_C(42888) }, INT8_C( 7), UINT16_C(42888) }, { { UINT16_C(39705), UINT16_C(27274), UINT16_C( 8616), UINT16_C( 19), UINT16_C(58932), UINT16_C(27668), UINT16_C(19918), UINT16_C(29844) }, INT8_C( 1), UINT16_C(27274) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); int b = test_vec[i].b; uint16_t r; SIMDE_CONSTIFY_8_(simde_vgetq_lane_u16, r, (HEDLEY_UNREACHABLE(), 0), b, a); simde_assert_equal_u16(r, test_vec[i].r); } return 0; } static int test_simde_vgetq_lane_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; int b; uint32_t r; } test_vec[] = { { { UINT32_C( 212833295), UINT32_C(2425004302), UINT32_C( 474038220), UINT32_C(2853920187) }, INT8_C( 0), UINT32_C( 212833295) }, { { UINT32_C(2233196758), UINT32_C(2390980779), UINT32_C(2474222548), UINT32_C(1419939520) }, INT8_C( 1), UINT32_C(2390980779) }, { { UINT32_C( 970875823), UINT32_C( 880388851), UINT32_C(3784455366), UINT32_C(3199696606) }, INT8_C( 1), UINT32_C( 880388851) }, { { UINT32_C(3222366780), UINT32_C(1919149560), UINT32_C( 452993912), UINT32_C(3670687095) }, INT8_C( 3), UINT32_C(3670687095) }, { { UINT32_C(2093338115), UINT32_C(2494598146), UINT32_C( 621973357), UINT32_C(3076631373) }, INT8_C( 0), UINT32_C(2093338115) }, { { UINT32_C(2225188897), UINT32_C( 581376546), UINT32_C(4284423989), UINT32_C(3338828537) }, INT8_C( 3), UINT32_C(3338828537) }, { { UINT32_C( 801884799), UINT32_C(1908291934), UINT32_C(3206286430), UINT32_C(1944181187) }, INT8_C( 2), UINT32_C(3206286430) }, { { UINT32_C( 227841381), UINT32_C( 372033975), UINT32_C(3415221704), UINT32_C(3075133677) }, INT8_C( 3), UINT32_C(3075133677) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); int b = test_vec[i].b; uint32_t r; SIMDE_CONSTIFY_4_(simde_vgetq_lane_u32, r, (HEDLEY_UNREACHABLE(), 0), b, a); simde_assert_equal_u32(r, test_vec[i].r); } return 0; } static int test_simde_vgetq_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; int b; uint64_t r; } test_vec[] = { { { UINT64_C( 3092081787328485317), UINT64_C( 7985414882938276043) }, INT8_C( 1), UINT64_C( 7985414882938276043) }, { { UINT64_C( 7025058213030416194), UINT64_C( 6780364979241558092) }, INT8_C( 1), UINT64_C( 6780364979241558092) }, { { UINT64_C( 3391474989674094302), UINT64_C( 6056589457741813753) }, INT8_C( 0), UINT64_C( 3391474989674094302) }, { { UINT64_C( 2242727670204370304), UINT64_C(16289431355216962840) }, INT8_C( 0), UINT64_C( 2242727670204370304) }, { { UINT64_C(12501920663126502103), UINT64_C( 7356476994223244425) }, INT8_C( 0), UINT64_C(12501920663126502103) }, { { UINT64_C( 5069585831496340757), UINT64_C( 3089409691725716985) }, INT8_C( 0), UINT64_C( 5069585831496340757) }, { { UINT64_C( 8343717041656415431), UINT64_C( 6889051721661195117) }, INT8_C( 1), UINT64_C( 6889051721661195117) }, { { UINT64_C( 1010574439278891982), UINT64_C(12967946829539362128) }, INT8_C( 1), UINT64_C(12967946829539362128) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); int b = test_vec[i].b; uint64_t r; SIMDE_CONSTIFY_2_(simde_vgetq_lane_u64, r, (HEDLEY_UNREACHABLE(), 0), b, a); simde_assert_equal_u64(r, test_vec[i].r); } return 0; } HEDLEY_DIAGNOSTIC_POP SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vget_lane_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vget_lane_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vget_lane_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vget_lane_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vget_lane_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vget_lane_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vget_lane_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vget_lane_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vget_lane_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vget_lane_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vgetq_lane_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vgetq_lane_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vgetq_lane_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vgetq_lane_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vgetq_lane_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vgetq_lane_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vgetq_lane_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vgetq_lane_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vgetq_lane_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vgetq_lane_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/get_low.c000066400000000000000000000515351400333146700171340ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN get_low #include "test-neon.h" #include "../../../simde/arm/neon/get_low.h" static int test_simde_vget_low_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; simde_float32 r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( -618.44), SIMDE_FLOAT32_C( -264.56), SIMDE_FLOAT32_C( 635.79), SIMDE_FLOAT32_C( 558.25) }, { SIMDE_FLOAT32_C( -618.44), SIMDE_FLOAT32_C( -264.56) } }, { { SIMDE_FLOAT32_C( -758.55), SIMDE_FLOAT32_C( 584.32), SIMDE_FLOAT32_C( 478.21), SIMDE_FLOAT32_C( -463.12) }, { SIMDE_FLOAT32_C( -758.55), SIMDE_FLOAT32_C( 584.32) } }, { { SIMDE_FLOAT32_C( -948.98), SIMDE_FLOAT32_C( -867.33), SIMDE_FLOAT32_C( 729.13), SIMDE_FLOAT32_C( 324.50) }, { SIMDE_FLOAT32_C( -948.98), SIMDE_FLOAT32_C( -867.33) } }, { { SIMDE_FLOAT32_C( -385.61), SIMDE_FLOAT32_C( -150.04), SIMDE_FLOAT32_C( -341.48), SIMDE_FLOAT32_C( -404.96) }, { SIMDE_FLOAT32_C( -385.61), SIMDE_FLOAT32_C( -150.04) } }, { { SIMDE_FLOAT32_C( 568.77), SIMDE_FLOAT32_C( 261.80), SIMDE_FLOAT32_C( 803.98), SIMDE_FLOAT32_C( -772.87) }, { SIMDE_FLOAT32_C( 568.77), SIMDE_FLOAT32_C( 261.80) } }, { { SIMDE_FLOAT32_C( 656.21), SIMDE_FLOAT32_C( 633.91), SIMDE_FLOAT32_C( -959.69), SIMDE_FLOAT32_C( 161.89) }, { SIMDE_FLOAT32_C( 656.21), SIMDE_FLOAT32_C( 633.91) } }, { { SIMDE_FLOAT32_C( 160.28), SIMDE_FLOAT32_C( -927.68), SIMDE_FLOAT32_C( 797.28), SIMDE_FLOAT32_C( 950.20) }, { SIMDE_FLOAT32_C( 160.28), SIMDE_FLOAT32_C( -927.68) } }, { { SIMDE_FLOAT32_C( 50.38), SIMDE_FLOAT32_C( 496.87), SIMDE_FLOAT32_C( 994.83), SIMDE_FLOAT32_C( 431.94) }, { SIMDE_FLOAT32_C( 50.38), SIMDE_FLOAT32_C( 496.87) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a; simde_float32x2_t r; a = simde_vld1q_f32(test_vec[i].a); r = simde_vget_low_f32(a); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vget_low_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[2]; simde_float64 r[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( 587.42), SIMDE_FLOAT64_C( -708.35) }, { SIMDE_FLOAT64_C( 587.42) } }, { { SIMDE_FLOAT64_C( 444.89), SIMDE_FLOAT64_C( -898.30) }, { SIMDE_FLOAT64_C( 444.89) } }, { { SIMDE_FLOAT64_C( 950.88), SIMDE_FLOAT64_C( 657.56) }, { SIMDE_FLOAT64_C( 950.88) } }, { { SIMDE_FLOAT64_C( -17.67), SIMDE_FLOAT64_C( -677.48) }, { SIMDE_FLOAT64_C( -17.67) } }, { { SIMDE_FLOAT64_C( -738.88), SIMDE_FLOAT64_C( 843.84) }, { SIMDE_FLOAT64_C( -738.88) } }, { { SIMDE_FLOAT64_C( 933.98), SIMDE_FLOAT64_C( 432.45) }, { SIMDE_FLOAT64_C( 933.98) } }, { { SIMDE_FLOAT64_C( 713.24), SIMDE_FLOAT64_C( -937.79) }, { SIMDE_FLOAT64_C( 713.24) } }, { { SIMDE_FLOAT64_C( -634.19), SIMDE_FLOAT64_C( -779.71) }, { SIMDE_FLOAT64_C( -634.19) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x1_t r = simde_vget_low_f64(a); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; } static int test_simde_vget_low_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int8_t r[8]; } test_vec[] = { { { -INT8_C( 50), INT8_C( 25), -INT8_C( 2), -INT8_C( 103), INT8_C( 121), -INT8_C( 72), INT8_C( 60), INT8_C( 40), -INT8_C( 124), -INT8_C( 21), -INT8_C( 26), INT8_C( 20), INT8_C( 1), -INT8_C( 116), -INT8_C( 14), INT8_C( 113) }, { -INT8_C( 50), INT8_C( 25), -INT8_C( 2), -INT8_C( 103), INT8_C( 121), -INT8_C( 72), INT8_C( 60), INT8_C( 40) } }, { { -INT8_C( 31), INT8_C( 36), -INT8_C( 55), INT8_C( 120), -INT8_C( 82), INT8_C( 112), INT8_MIN, -INT8_C( 68), INT8_C( 110), INT8_C( 121), -INT8_C( 124), -INT8_C( 81), INT8_C( 39), -INT8_C( 40), INT8_C( 49), -INT8_C( 11) }, { -INT8_C( 31), INT8_C( 36), -INT8_C( 55), INT8_C( 120), -INT8_C( 82), INT8_C( 112), INT8_MIN, -INT8_C( 68) } }, { { -INT8_C( 14), INT8_C( 47), -INT8_C( 114), INT8_C( 107), -INT8_C( 25), -INT8_C( 54), -INT8_C( 109), INT8_C( 107), -INT8_C( 74), INT8_C( 122), INT8_MAX, -INT8_C( 73), INT8_C( 6), INT8_C( 113), INT8_C( 41), -INT8_C( 25) }, { -INT8_C( 14), INT8_C( 47), -INT8_C( 114), INT8_C( 107), -INT8_C( 25), -INT8_C( 54), -INT8_C( 109), INT8_C( 107) } }, { { -INT8_C( 107), -INT8_C( 14), INT8_C( 95), INT8_C( 67), INT8_C( 98), -INT8_C( 33), -INT8_C( 1), -INT8_C( 47), INT8_C( 89), -INT8_C( 125), INT8_MIN, INT8_MIN, INT8_C( 92), -INT8_C( 78), INT8_C( 117), INT8_C( 78) }, { -INT8_C( 107), -INT8_C( 14), INT8_C( 95), INT8_C( 67), INT8_C( 98), -INT8_C( 33), -INT8_C( 1), -INT8_C( 47) } }, { { -INT8_C( 31), INT8_C( 3), -INT8_C( 71), -INT8_C( 55), -INT8_C( 50), INT8_C( 76), INT8_C( 52), -INT8_C( 124), -INT8_C( 58), -INT8_C( 76), INT8_C( 59), -INT8_C( 52), INT8_C( 37), INT8_C( 100), -INT8_C( 76), -INT8_C( 69) }, { -INT8_C( 31), INT8_C( 3), -INT8_C( 71), -INT8_C( 55), -INT8_C( 50), INT8_C( 76), INT8_C( 52), -INT8_C( 124) } }, { { INT8_C( 87), INT8_C( 19), -INT8_C( 2), -INT8_C( 71), -INT8_C( 13), -INT8_C( 2), -INT8_C( 118), INT8_C( 76), -INT8_C( 127), INT8_C( 11), -INT8_C( 52), -INT8_C( 35), -INT8_C( 67), INT8_C( 65), INT8_C( 43), -INT8_C( 98) }, { INT8_C( 87), INT8_C( 19), -INT8_C( 2), -INT8_C( 71), -INT8_C( 13), -INT8_C( 2), -INT8_C( 118), INT8_C( 76) } }, { { INT8_C( 68), -INT8_C( 28), INT8_C( 103), INT8_C( 18), INT8_C( 49), -INT8_C( 100), -INT8_C( 106), -INT8_C( 9), INT8_C( 80), -INT8_C( 46), -INT8_C( 60), INT8_C( 117), INT8_C( 54), INT8_C( 120), INT8_C( 48), -INT8_C( 115) }, { INT8_C( 68), -INT8_C( 28), INT8_C( 103), INT8_C( 18), INT8_C( 49), -INT8_C( 100), -INT8_C( 106), -INT8_C( 9) } }, { { -INT8_C( 117), INT8_C( 47), INT8_C( 71), INT8_C( 126), INT8_C( 45), -INT8_C( 47), -INT8_C( 54), -INT8_C( 82), -INT8_C( 36), -INT8_C( 106), -INT8_C( 116), -INT8_C( 103), -INT8_C( 41), -INT8_C( 73), INT8_C( 56), INT8_C( 28) }, { -INT8_C( 117), INT8_C( 47), INT8_C( 71), INT8_C( 126), INT8_C( 45), -INT8_C( 47), -INT8_C( 54), -INT8_C( 82) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x8_t r = simde_vget_low_s8(a); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; } static int test_simde_vget_low_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int16_t r[4]; } test_vec[] = { { { -INT16_C( 8842), -INT16_C( 21774), INT16_C( 23919), INT16_C( 1631), -INT16_C( 24130), -INT16_C( 3644), -INT16_C( 1346), -INT16_C( 21416) }, { -INT16_C( 8842), -INT16_C( 21774), INT16_C( 23919), INT16_C( 1631) } }, { { INT16_C( 16724), INT16_C( 30279), -INT16_C( 6849), INT16_C( 31437), -INT16_C( 14479), -INT16_C( 16413), -INT16_C( 19162), -INT16_C( 25537) }, { INT16_C( 16724), INT16_C( 30279), -INT16_C( 6849), INT16_C( 31437) } }, { { INT16_C( 12690), INT16_C( 582), -INT16_C( 23153), INT16_C( 19720), -INT16_C( 12986), INT16_C( 1342), -INT16_C( 26937), INT16_C( 7089) }, { INT16_C( 12690), INT16_C( 582), -INT16_C( 23153), INT16_C( 19720) } }, { { -INT16_C( 1576), INT16_C( 6034), INT16_C( 24542), INT16_C( 20370), INT16_C( 29990), INT16_C( 19470), INT16_C( 19754), -INT16_C( 16920) }, { -INT16_C( 1576), INT16_C( 6034), INT16_C( 24542), INT16_C( 20370) } }, { { INT16_C( 11903), INT16_C( 3775), -INT16_C( 14381), INT16_C( 6747), -INT16_C( 26220), INT16_C( 23327), -INT16_C( 12240), INT16_C( 2167) }, { INT16_C( 11903), INT16_C( 3775), -INT16_C( 14381), INT16_C( 6747) } }, { { INT16_C( 2505), -INT16_C( 22753), -INT16_C( 20120), -INT16_C( 28682), INT16_C( 1062), INT16_C( 20955), -INT16_C( 15278), -INT16_C( 12018) }, { INT16_C( 2505), -INT16_C( 22753), -INT16_C( 20120), -INT16_C( 28682) } }, { { -INT16_C( 12814), -INT16_C( 14625), INT16_C( 14996), INT16_C( 10720), -INT16_C( 45), INT16_C( 900), -INT16_C( 1073), -INT16_C( 26357) }, { -INT16_C( 12814), -INT16_C( 14625), INT16_C( 14996), INT16_C( 10720) } }, { { INT16_C( 11012), INT16_C( 27968), INT16_C( 14300), INT16_C( 1020), -INT16_C( 10437), -INT16_C( 29356), INT16_C( 25243), -INT16_C( 29090) }, { INT16_C( 11012), INT16_C( 27968), INT16_C( 14300), INT16_C( 1020) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x4_t r = simde_vget_low_s16(a); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; } static int test_simde_vget_low_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t r[2]; } test_vec[] = { { { -INT32_C( 2072489384), INT32_C( 1176990042), INT32_C( 492144110), -INT32_C( 1353512510) }, { -INT32_C( 2072489384), INT32_C( 1176990042) } }, { { INT32_C( 476637811), -INT32_C( 628344084), INT32_C( 1839877601), -INT32_C( 1813123782) }, { INT32_C( 476637811), -INT32_C( 628344084) } }, { { -INT32_C( 1709742785), -INT32_C( 975159337), -INT32_C( 2031995452), -INT32_C( 1321913026) }, { -INT32_C( 1709742785), -INT32_C( 975159337) } }, { { INT32_C( 198024479), -INT32_C( 1192863273), -INT32_C( 517631834), -INT32_C( 1116466307) }, { INT32_C( 198024479), -INT32_C( 1192863273) } }, { { INT32_C( 1331137400), -INT32_C( 1911277622), -INT32_C( 1424623764), INT32_C( 1281116716) }, { INT32_C( 1331137400), -INT32_C( 1911277622) } }, { { -INT32_C( 1084806424), INT32_C( 712523139), INT32_C( 1259052494), INT32_C( 688422832) }, { -INT32_C( 1084806424), INT32_C( 712523139) } }, { { -INT32_C( 713531638), INT32_C( 56855958), -INT32_C( 1330743164), -INT32_C( 1409545533) }, { -INT32_C( 713531638), INT32_C( 56855958) } }, { { -INT32_C( 1200991180), INT32_C( 1608704657), INT32_C( 816573824), INT32_C( 1985590124) }, { -INT32_C( 1200991180), INT32_C( 1608704657) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x2_t r = simde_vget_low_s32(a); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; } static int test_simde_vget_low_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; int64_t r[1]; } test_vec[] = { { { -INT64_C( 5552791125174468305), INT64_C( 7690059133849055532) }, { -INT64_C( 5552791125174468305) } }, { { -INT64_C( 3464544314608880100), -INT64_C( 8271590384803619907) }, { -INT64_C( 3464544314608880100) } }, { { -INT64_C( 1170727257668501975), INT64_C( 4855309375206429422) }, { -INT64_C( 1170727257668501975) } }, { { -INT64_C( 5109208448872577540), -INT64_C( 1605646795316963380) }, { -INT64_C( 5109208448872577540) } }, { { INT64_C( 3425469384464172246), -INT64_C( 2832050735323589047) }, { INT64_C( 3425469384464172246) } }, { { INT64_C( 8557466743929387021), -INT64_C( 7766078508003940975) }, { INT64_C( 8557466743929387021) } }, { { INT64_C( 3408864166942748893), INT64_C( 847329830903969037) }, { INT64_C( 3408864166942748893) } }, { { -INT64_C( 5788577846308576118), -INT64_C( 9129014539981992964) }, { -INT64_C( 5788577846308576118) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x1_t r = simde_vget_low_s64(a); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; } static int test_simde_vget_low_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t r[8]; } test_vec[] = { { { UINT8_C( 49), UINT8_C( 19), UINT8_C( 24), UINT8_C(135), UINT8_C(231), UINT8_C( 68), UINT8_C(165), UINT8_C( 18), UINT8_C( 76), UINT8_C(174), UINT8_C(105), UINT8_C( 5), UINT8_C( 66), UINT8_C( 84), UINT8_C(216), UINT8_C( 35) }, { UINT8_C( 49), UINT8_C( 19), UINT8_C( 24), UINT8_C(135), UINT8_C(231), UINT8_C( 68), UINT8_C(165), UINT8_C( 18) } }, { { UINT8_C(191), UINT8_C(201), UINT8_C(125), UINT8_C(100), UINT8_C(129), UINT8_C(173), UINT8_C( 89), UINT8_C( 59), UINT8_C(167), UINT8_C( 49), UINT8_C(196), UINT8_C(147), UINT8_C(153), UINT8_C( 43), UINT8_C(139), UINT8_C(202) }, { UINT8_C(191), UINT8_C(201), UINT8_C(125), UINT8_C(100), UINT8_C(129), UINT8_C(173), UINT8_C( 89), UINT8_C( 59) } }, { { UINT8_C( 62), UINT8_C(163), UINT8_C( 81), UINT8_C( 37), UINT8_C(231), UINT8_C(247), UINT8_C( 55), UINT8_C( 52), UINT8_C(165), UINT8_C(160), UINT8_C( 57), UINT8_C(232), UINT8_C(244), UINT8_C( 17), UINT8_C( 11), UINT8_C(179) }, { UINT8_C( 62), UINT8_C(163), UINT8_C( 81), UINT8_C( 37), UINT8_C(231), UINT8_C(247), UINT8_C( 55), UINT8_C( 52) } }, { { UINT8_C(218), UINT8_C(136), UINT8_C( 24), UINT8_C( 92), UINT8_C( 54), UINT8_C(113), UINT8_C(151), UINT8_C(221), UINT8_C(163), UINT8_C( 91), UINT8_C(113), UINT8_C( 60), UINT8_C(134), UINT8_C(252), UINT8_C( 6), UINT8_C(196) }, { UINT8_C(218), UINT8_C(136), UINT8_C( 24), UINT8_C( 92), UINT8_C( 54), UINT8_C(113), UINT8_C(151), UINT8_C(221) } }, { { UINT8_C(159), UINT8_C( 88), UINT8_C(233), UINT8_C(135), UINT8_C( 79), UINT8_C( 32), UINT8_C(187), UINT8_C(244), UINT8_C(193), UINT8_C(244), UINT8_C(220), UINT8_C(181), UINT8_C( 5), UINT8_C(232), UINT8_C(105), UINT8_C(224) }, { UINT8_C(159), UINT8_C( 88), UINT8_C(233), UINT8_C(135), UINT8_C( 79), UINT8_C( 32), UINT8_C(187), UINT8_C(244) } }, { { UINT8_C(112), UINT8_C(129), UINT8_C( 60), UINT8_C(166), UINT8_C(242), UINT8_C(211), UINT8_C(132), UINT8_C(149), UINT8_C( 46), UINT8_C(245), UINT8_C(209), UINT8_C(180), UINT8_C(241), UINT8_C(216), UINT8_C(120), UINT8_C(144) }, { UINT8_C(112), UINT8_C(129), UINT8_C( 60), UINT8_C(166), UINT8_C(242), UINT8_C(211), UINT8_C(132), UINT8_C(149) } }, { { UINT8_C( 48), UINT8_C( 97), UINT8_C( 23), UINT8_C(127), UINT8_C(129), UINT8_C(210), UINT8_C(115), UINT8_C( 66), UINT8_C(198), UINT8_C( 80), UINT8_C(248), UINT8_C(204), UINT8_C( 56), UINT8_C( 97), UINT8_C(172), UINT8_C(168) }, { UINT8_C( 48), UINT8_C( 97), UINT8_C( 23), UINT8_C(127), UINT8_C(129), UINT8_C(210), UINT8_C(115), UINT8_C( 66) } }, { { UINT8_C(226), UINT8_C(232), UINT8_C( 79), UINT8_C(212), UINT8_C(187), UINT8_C(211), UINT8_C(106), UINT8_C(233), UINT8_C(200), UINT8_C( 59), UINT8_C(157), UINT8_C(185), UINT8_C( 19), UINT8_C( 21), UINT8_C( 73), UINT8_C( 67) }, { UINT8_C(226), UINT8_C(232), UINT8_C( 79), UINT8_C(212), UINT8_C(187), UINT8_C(211), UINT8_C(106), UINT8_C(233) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x8_t r = simde_vget_low_u8(a); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; } static int test_simde_vget_low_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(63133), UINT16_C( 3112), UINT16_C(63250), UINT16_C(53711), UINT16_C( 3909), UINT16_C(60069), UINT16_C(17310), UINT16_C(25373) }, { UINT16_C(63133), UINT16_C( 3112), UINT16_C(63250), UINT16_C(53711) } }, { { UINT16_C( 1988), UINT16_C(43042), UINT16_C(34217), UINT16_C( 3759), UINT16_C(49039), UINT16_C(16267), UINT16_C( 8488), UINT16_C(50799) }, { UINT16_C( 1988), UINT16_C(43042), UINT16_C(34217), UINT16_C( 3759) } }, { { UINT16_C(38679), UINT16_C(10706), UINT16_C(41614), UINT16_C(54266), UINT16_C(41137), UINT16_C(20670), UINT16_C(56291), UINT16_C(42931) }, { UINT16_C(38679), UINT16_C(10706), UINT16_C(41614), UINT16_C(54266) } }, { { UINT16_C(55010), UINT16_C(35663), UINT16_C(65115), UINT16_C(60058), UINT16_C( 9662), UINT16_C(58921), UINT16_C(38983), UINT16_C(24236) }, { UINT16_C(55010), UINT16_C(35663), UINT16_C(65115), UINT16_C(60058) } }, { { UINT16_C(32560), UINT16_C(48776), UINT16_C(33313), UINT16_C(53906), UINT16_C(20514), UINT16_C( 1314), UINT16_C(54827), UINT16_C( 3500) }, { UINT16_C(32560), UINT16_C(48776), UINT16_C(33313), UINT16_C(53906) } }, { { UINT16_C(64428), UINT16_C( 1944), UINT16_C(13050), UINT16_C(47345), UINT16_C( 6744), UINT16_C(40862), UINT16_C(19378), UINT16_C(58109) }, { UINT16_C(64428), UINT16_C( 1944), UINT16_C(13050), UINT16_C(47345) } }, { { UINT16_C(34250), UINT16_C(60321), UINT16_C(13064), UINT16_C(10941), UINT16_C(57475), UINT16_C(44592), UINT16_C(56502), UINT16_C(25275) }, { UINT16_C(34250), UINT16_C(60321), UINT16_C(13064), UINT16_C(10941) } }, { { UINT16_C(21464), UINT16_C(53865), UINT16_C(23174), UINT16_C(56970), UINT16_C(10356), UINT16_C( 9853), UINT16_C(31347), UINT16_C(15625) }, { UINT16_C(21464), UINT16_C(53865), UINT16_C(23174), UINT16_C(56970) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x4_t r = simde_vget_low_u16(a); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; } static int test_simde_vget_low_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint32_t r[2]; } test_vec[] = { { { UINT32_C( 664864438), UINT32_C(1031793488), UINT32_C(3797458318), UINT32_C(2293678734) }, { UINT32_C( 664864438), UINT32_C(1031793488) } }, { { UINT32_C(2911591410), UINT32_C(2619020298), UINT32_C(3774382299), UINT32_C(3514230811) }, { UINT32_C(2911591410), UINT32_C(2619020298) } }, { { UINT32_C(1056446702), UINT32_C(2440787715), UINT32_C(2725565204), UINT32_C(2284464790) }, { UINT32_C(1056446702), UINT32_C(2440787715) } }, { { UINT32_C(2402727301), UINT32_C(2771145162), UINT32_C(3984925905), UINT32_C(4206820364) }, { UINT32_C(2402727301), UINT32_C(2771145162) } }, { { UINT32_C( 389658388), UINT32_C(1135129646), UINT32_C( 518331528), UINT32_C(3433435206) }, { UINT32_C( 389658388), UINT32_C(1135129646) } }, { { UINT32_C(2405162181), UINT32_C(4281698093), UINT32_C(3102522028), UINT32_C(3400772278) }, { UINT32_C(2405162181), UINT32_C(4281698093) } }, { { UINT32_C(2430725217), UINT32_C( 684952224), UINT32_C(3980834982), UINT32_C(2394549704) }, { UINT32_C(2430725217), UINT32_C( 684952224) } }, { { UINT32_C(4145878217), UINT32_C(1224102556), UINT32_C(3271614989), UINT32_C(4002329484) }, { UINT32_C(4145878217), UINT32_C(1224102556) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x2_t r = simde_vget_low_u32(a); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; } static int test_simde_vget_low_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; uint64_t r[1]; } test_vec[] = { { { UINT64_C(10622998194429045261), UINT64_C( 7741122720249478585) }, { UINT64_C(10622998194429045261) } }, { { UINT64_C(17660482733443859451), UINT64_C( 7088876185833046643) }, { UINT64_C(17660482733443859451) } }, { { UINT64_C( 2310614234654306281), UINT64_C( 3924073685936192752) }, { UINT64_C( 2310614234654306281) } }, { { UINT64_C( 7192069315162950751), UINT64_C(14345906403414384222) }, { UINT64_C( 7192069315162950751) } }, { { UINT64_C( 9321418311508205012), UINT64_C( 9800114439348489361) }, { UINT64_C( 9321418311508205012) } }, { { UINT64_C( 260981884868249677), UINT64_C(15993633616004850767) }, { UINT64_C( 260981884868249677) } }, { { UINT64_C( 2124984947085752427), UINT64_C( 6211284163340339586) }, { UINT64_C( 2124984947085752427) } }, { { UINT64_C( 1574626849321096242), UINT64_C(14500806154155888982) }, { UINT64_C( 1574626849321096242) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x1_t r = simde_vget_low_u64(a); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vget_low_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vget_low_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vget_low_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vget_low_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vget_low_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vget_low_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vget_low_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vget_low_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vget_low_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vget_low_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/hadd.c000066400000000000000000001377131400333146700163770ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN hadd #include "test-neon.h" #include "../../../simde/arm/neon/hadd.h" static int test_simde_vhadd_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t b[8]; int8_t r[8]; } test_vec[] = { { { -INT8_C( 111), -INT8_C( 110), INT8_C( 14), -INT8_C( 92), -INT8_C( 21), -INT8_C( 15), -INT8_C( 67), INT8_C( 34) }, { INT8_C( 85), INT8_C( 113), INT8_C( 87), -INT8_C( 58), INT8_C( 112), INT8_C( 51), -INT8_C( 18), INT8_C( 103) }, { -INT8_C( 13), INT8_C( 1), INT8_C( 50), -INT8_C( 75), INT8_C( 45), INT8_C( 18), -INT8_C( 43), INT8_C( 68) } }, { { INT8_C( 119), -INT8_C( 32), -INT8_C( 32), -INT8_C( 30), -INT8_C( 5), -INT8_C( 80), -INT8_C( 127), INT8_C( 47) }, { -INT8_C( 27), -INT8_C( 10), -INT8_C( 111), -INT8_C( 68), INT8_C( 125), -INT8_C( 43), -INT8_C( 31), INT8_C( 14) }, { INT8_C( 46), -INT8_C( 21), -INT8_C( 72), -INT8_C( 49), INT8_C( 60), -INT8_C( 62), -INT8_C( 79), INT8_C( 30) } }, { { INT8_C( 103), -INT8_C( 17), -INT8_C( 78), INT8_C( 83), -INT8_C( 32), INT8_C( 112), INT8_C( 117), INT8_C( 53) }, { -INT8_C( 31), -INT8_C( 52), -INT8_C( 4), INT8_C( 81), -INT8_C( 1), -INT8_C( 22), -INT8_C( 72), INT8_C( 118) }, { INT8_C( 36), -INT8_C( 35), -INT8_C( 41), INT8_C( 82), -INT8_C( 17), INT8_C( 45), INT8_C( 22), INT8_C( 85) } }, { { -INT8_C( 54), -INT8_C( 104), INT8_C( 89), -INT8_C( 59), INT8_C( 73), -INT8_C( 38), -INT8_C( 12), INT8_C( 46) }, { -INT8_C( 48), -INT8_C( 123), -INT8_C( 22), INT8_C( 78), INT8_C( 90), -INT8_C( 53), INT8_C( 92), -INT8_C( 63) }, { -INT8_C( 51), -INT8_C( 114), INT8_C( 33), INT8_C( 9), INT8_C( 81), -INT8_C( 46), INT8_C( 40), -INT8_C( 9) } }, { { -INT8_C( 70), INT8_C( 15), INT8_C( 20), -INT8_C( 101), INT8_MAX, -INT8_C( 119), -INT8_C( 48), INT8_C( 96) }, { INT8_C( 85), -INT8_C( 52), -INT8_C( 79), INT8_C( 84), -INT8_C( 74), INT8_C( 106), -INT8_C( 53), INT8_MIN }, { INT8_C( 7), -INT8_C( 19), -INT8_C( 30), -INT8_C( 9), INT8_C( 26), -INT8_C( 7), -INT8_C( 51), -INT8_C( 16) } }, { { INT8_C( 2), INT8_C( 36), INT8_C( 69), INT8_C( 75), -INT8_C( 2), INT8_C( 57), INT8_C( 121), -INT8_C( 50) }, { -INT8_C( 66), INT8_C( 99), INT8_C( 28), INT8_C( 24), INT8_C( 46), INT8_C( 121), -INT8_C( 38), -INT8_C( 23) }, { -INT8_C( 32), INT8_C( 67), INT8_C( 48), INT8_C( 49), INT8_C( 22), INT8_C( 89), INT8_C( 41), -INT8_C( 37) } }, { { -INT8_C( 120), -INT8_C( 18), -INT8_C( 124), INT8_C( 7), INT8_C( 120), INT8_C( 84), INT8_C( 103), -INT8_C( 51) }, { INT8_C( 33), INT8_C( 24), INT8_C( 34), -INT8_C( 41), -INT8_C( 126), -INT8_C( 19), INT8_C( 88), -INT8_C( 123) }, { -INT8_C( 44), INT8_C( 3), -INT8_C( 45), -INT8_C( 17), -INT8_C( 3), INT8_C( 32), INT8_C( 95), -INT8_C( 87) } }, { { INT8_C( 17), -INT8_C( 99), -INT8_C( 48), INT8_C( 15), -INT8_C( 41), INT8_C( 74), -INT8_C( 35), -INT8_C( 107) }, { -INT8_C( 83), -INT8_C( 6), -INT8_C( 82), -INT8_C( 36), INT8_C( 115), -INT8_C( 120), -INT8_C( 59), -INT8_C( 5) }, { -INT8_C( 33), -INT8_C( 53), -INT8_C( 65), -INT8_C( 11), INT8_C( 37), -INT8_C( 23), -INT8_C( 47), -INT8_C( 56) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vhadd_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); simde_int8x8_t r = simde_vhadd_s8(a, b); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vhadd_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { { { INT16_C( 16201), -INT16_C( 11732), -INT16_C( 17864), INT16_C( 19476) }, { INT16_C( 3388), -INT16_C( 22085), INT16_C( 2842), INT16_C( 4373) }, { INT16_C( 9794), -INT16_C( 16909), -INT16_C( 7511), INT16_C( 11924) } }, { { INT16_C( 23570), -INT16_C( 1377), INT16_C( 13725), INT16_C( 9933) }, { -INT16_C( 6686), INT16_C( 12749), -INT16_C( 30519), INT16_C( 4848) }, { INT16_C( 8442), INT16_C( 5686), -INT16_C( 8397), INT16_C( 7390) } }, { { INT16_C( 7367), -INT16_C( 28), -INT16_C( 1834), INT16_C( 4683) }, { INT16_C( 1542), INT16_C( 8379), -INT16_C( 12270), INT16_C( 9265) }, { INT16_C( 4454), INT16_C( 4175), -INT16_C( 7052), INT16_C( 6974) } }, { { -INT16_C( 11987), -INT16_C( 13794), -INT16_C( 5114), -INT16_C( 5904) }, { -INT16_C( 16943), -INT16_C( 26086), INT16_C( 2629), INT16_C( 3244) }, { -INT16_C( 14465), -INT16_C( 19940), -INT16_C( 1243), -INT16_C( 1330) } }, { { -INT16_C( 28633), -INT16_C( 757), INT16_C( 22409), -INT16_C( 28912) }, { -INT16_C( 13475), INT16_C( 28591), -INT16_C( 8036), -INT16_C( 13933) }, { -INT16_C( 21054), INT16_C( 13917), INT16_C( 7186), -INT16_C( 21423) } }, { { -INT16_C( 19791), -INT16_C( 18541), -INT16_C( 31586), INT16_C( 28576) }, { -INT16_C( 17855), -INT16_C( 30966), -INT16_C( 18748), -INT16_C( 5229) }, { -INT16_C( 18823), -INT16_C( 24754), -INT16_C( 25167), INT16_C( 11673) } }, { { -INT16_C( 24761), -INT16_C( 12055), -INT16_C( 1546), INT16_C( 21343) }, { INT16_C( 3780), INT16_C( 24771), INT16_C( 22254), -INT16_C( 24535) }, { -INT16_C( 10491), INT16_C( 6358), INT16_C( 10354), -INT16_C( 1596) } }, { { -INT16_C( 17144), -INT16_C( 22953), -INT16_C( 2239), -INT16_C( 32234) }, { INT16_C( 8369), INT16_C( 30217), -INT16_C( 25130), INT16_C( 7521) }, { -INT16_C( 4388), INT16_C( 3632), -INT16_C( 13685), -INT16_C( 12357) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_vhadd_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); simde_int16x4_t r = simde_vhadd_s16(a, b); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #if !defined(TEST_SIMDE_VABD_NO_TEST_32) static int test_simde_vhadd_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { { { INT32_C( 854411836), INT32_C( 142953539) }, { INT32_C( 1231571034), -INT32_C( 1477864801) }, { INT32_C( 1042991435), -INT32_C( 667455631) } }, { { -INT32_C( 1873919921), -INT32_C( 384670664) }, { INT32_C( 1516182660), -INT32_C( 176635463) }, { -INT32_C( 178868631), -INT32_C( 280653064) } }, { { INT32_C( 1327981835), INT32_C( 207072434) }, { -INT32_C( 1806319627), -INT32_C( 1606730159) }, { -INT32_C( 239168896), -INT32_C( 699828863) } }, { { -INT32_C( 1221555841), INT32_C( 1906328557) }, { INT32_C( 416022623), -INT32_C( 871545663) }, { -INT32_C( 402766609), INT32_C( 517391447) } }, { { INT32_C( 1528509609), -INT32_C( 714575136) }, { -INT32_C( 2090222286), INT32_C( 2065999356) }, { -INT32_C( 280856339), INT32_C( 675712110) } }, { { INT32_C( 473060398), -INT32_C( 158477673) }, { -INT32_C( 1827776046), INT32_C( 1197480861) }, { -INT32_C( 677357824), INT32_C( 519501594) } }, { { INT32_C( 815954767), INT32_C( 537201390) }, { -INT32_C( 995921976), INT32_C( 1111475988) }, { -INT32_C( 89983605), INT32_C( 824338689) } }, { { -INT32_C( 1285656292), INT32_C( 380300355) }, { -INT32_C( 475416507), INT32_C( 589957588) }, { -INT32_C( 880536400), INT32_C( 485128971) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_vhadd_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); simde_int32x2_t r = simde_vhadd_s32(a, b); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #endif static int test_simde_vhadd_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint8_t b[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C( 10), UINT8_C( 5), UINT8_C(217), UINT8_C(190), UINT8_C(214), UINT8_C( 67), UINT8_C(105), UINT8_C(247) }, { UINT8_MAX, UINT8_C(155), UINT8_C( 39), UINT8_C( 51), UINT8_C( 23), UINT8_C(170), UINT8_C(219), UINT8_C( 72) }, { UINT8_C(132), UINT8_C( 80), UINT8_C(128), UINT8_C(120), UINT8_C(118), UINT8_C(118), UINT8_C(162), UINT8_C(159) } }, { { UINT8_C(178), UINT8_C( 4), UINT8_C(120), UINT8_C(197), UINT8_C( 83), UINT8_C(228), UINT8_C(140), UINT8_C(197) }, { UINT8_C(105), UINT8_C(117), UINT8_C(168), UINT8_C(164), UINT8_C(176), UINT8_C( 55), UINT8_C(155), UINT8_C(186) }, { UINT8_C(141), UINT8_C( 60), UINT8_C(144), UINT8_C(180), UINT8_C(129), UINT8_C(141), UINT8_C(147), UINT8_C(191) } }, { { UINT8_C( 60), UINT8_C(116), UINT8_C(120), UINT8_C( 18), UINT8_C(183), UINT8_C(225), UINT8_C( 10), UINT8_C(182) }, { UINT8_C(124), UINT8_C( 49), UINT8_C(233), UINT8_C(147), UINT8_C(220), UINT8_C(197), UINT8_C(219), UINT8_C(142) }, { UINT8_C( 92), UINT8_C( 82), UINT8_C(176), UINT8_C( 82), UINT8_C(201), UINT8_C(211), UINT8_C(114), UINT8_C(162) } }, { { UINT8_C(201), UINT8_C( 83), UINT8_C( 83), UINT8_C( 29), UINT8_C( 56), UINT8_C(224), UINT8_C(226), UINT8_C(161) }, { UINT8_C( 85), UINT8_C(139), UINT8_C( 70), UINT8_C( 5), UINT8_C(194), UINT8_C(225), UINT8_C(191), UINT8_C(254) }, { UINT8_C(143), UINT8_C(111), UINT8_C( 76), UINT8_C( 17), UINT8_C(125), UINT8_C(224), UINT8_C(208), UINT8_C(207) } }, { { UINT8_C( 85), UINT8_C( 55), UINT8_C( 16), UINT8_C( 13), UINT8_C( 24), UINT8_C( 26), UINT8_C(195), UINT8_C(149) }, { UINT8_C( 76), UINT8_C(173), UINT8_C( 40), UINT8_C( 40), UINT8_C(114), UINT8_C( 4), UINT8_C(182), UINT8_C( 59) }, { UINT8_C( 80), UINT8_C(114), UINT8_C( 28), UINT8_C( 26), UINT8_C( 69), UINT8_C( 15), UINT8_C(188), UINT8_C(104) } }, { { UINT8_C( 87), UINT8_C( 10), UINT8_C( 88), UINT8_C(143), UINT8_C(234), UINT8_C( 59), UINT8_C( 49), UINT8_C( 63) }, { UINT8_C(198), UINT8_C(119), UINT8_C( 68), UINT8_C(136), UINT8_C( 88), UINT8_C( 3), UINT8_C(134), UINT8_C(173) }, { UINT8_C(142), UINT8_C( 64), UINT8_C( 78), UINT8_C(139), UINT8_C(161), UINT8_C( 31), UINT8_C( 91), UINT8_C(118) } }, { { UINT8_C( 58), UINT8_C(150), UINT8_C(186), UINT8_C( 82), UINT8_C(177), UINT8_C(126), UINT8_C(231), UINT8_C(253) }, { UINT8_C( 43), UINT8_C( 16), UINT8_C( 37), UINT8_C(157), UINT8_C( 20), UINT8_C(219), UINT8_C(216), UINT8_C(107) }, { UINT8_C( 50), UINT8_C( 83), UINT8_C(111), UINT8_C(119), UINT8_C( 98), UINT8_C(172), UINT8_C(223), UINT8_C(180) } }, { { UINT8_C(229), UINT8_C( 49), UINT8_C(251), UINT8_C(207), UINT8_C(108), UINT8_C( 44), UINT8_C( 14), UINT8_C( 50) }, { UINT8_C(163), UINT8_C( 82), UINT8_C(186), UINT8_C(251), UINT8_C( 85), UINT8_C( 64), UINT8_C(168), UINT8_C(143) }, { UINT8_C(196), UINT8_C( 65), UINT8_C(218), UINT8_C(229), UINT8_C( 96), UINT8_C( 54), UINT8_C( 91), UINT8_C( 96) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vhadd_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t r = simde_vhadd_u8(a, b); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vhadd_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(52357), UINT16_C(29523), UINT16_C(22999), UINT16_C(40851) }, { UINT16_C(14024), UINT16_C(56419), UINT16_C(41726), UINT16_C( 6686) }, { UINT16_C(33190), UINT16_C(42971), UINT16_C(32362), UINT16_C(23768) } }, { { UINT16_C(32019), UINT16_C(22221), UINT16_C(30569), UINT16_C(44652) }, { UINT16_C( 5680), UINT16_C( 1169), UINT16_C(47903), UINT16_C(42023) }, { UINT16_C(18849), UINT16_C(11695), UINT16_C(39236), UINT16_C(43337) } }, { { UINT16_C(31624), UINT16_C(24343), UINT16_C(43732), UINT16_C(40190) }, { UINT16_C(25057), UINT16_C(57208), UINT16_C(38403), UINT16_C( 5881) }, { UINT16_C(28340), UINT16_C(40775), UINT16_C(41067), UINT16_C(23035) } }, { { UINT16_C(50707), UINT16_C(31852), UINT16_C(55614), UINT16_C(28203) }, { UINT16_C(48367), UINT16_C( 3698), UINT16_C(39288), UINT16_C( 179) }, { UINT16_C(49537), UINT16_C(17775), UINT16_C(47451), UINT16_C(14191) } }, { { UINT16_C(51732), UINT16_C(59487), UINT16_C(23925), UINT16_C(22148) }, { UINT16_C(64702), UINT16_C(49461), UINT16_C(11923), UINT16_C(42711) }, { UINT16_C(58217), UINT16_C(54474), UINT16_C(17924), UINT16_C(32429) } }, { { UINT16_C(17396), UINT16_C(12835), UINT16_C(19996), UINT16_C( 2976) }, { UINT16_C( 4618), UINT16_C(33306), UINT16_C(52652), UINT16_C(49282) }, { UINT16_C(11007), UINT16_C(23070), UINT16_C(36324), UINT16_C(26129) } }, { { UINT16_C(57751), UINT16_C( 3241), UINT16_C(11582), UINT16_C(64610) }, { UINT16_C(38698), UINT16_C(48573), UINT16_C(38085), UINT16_C(47715) }, { UINT16_C(48224), UINT16_C(25907), UINT16_C(24833), UINT16_C(56162) } }, { { UINT16_C(34520), UINT16_C(62700), UINT16_C(36308), UINT16_C(57088) }, { UINT16_C( 6815), UINT16_C(19297), UINT16_C(58599), UINT16_C(32268) }, { UINT16_C(20667), UINT16_C(40998), UINT16_C(47453), UINT16_C(44678) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vhadd_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t r = simde_vhadd_u16(a, b); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vhadd_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C( 76264901), UINT32_C( 201387490) }, { UINT32_C(1254735493), UINT32_C( 704916818) }, { UINT32_C( 665500197), UINT32_C( 453152154) } }, { { UINT32_C(2283794867), UINT32_C( 493297534) }, { UINT32_C( 543803449), UINT32_C(1922987436) }, { UINT32_C(1413799158), UINT32_C(1208142485) } }, { { UINT32_C( 209070378), UINT32_C(2618914327) }, { UINT32_C(2280055348), UINT32_C(3283217167) }, { UINT32_C(1244562863), UINT32_C(2951065747) } }, { { UINT32_C(1514918108), UINT32_C( 678933231) }, { UINT32_C( 659087482), UINT32_C(2140792661) }, { UINT32_C(1087002795), UINT32_C(1409862946) } }, { { UINT32_C( 663490320), UINT32_C(3133384069) }, { UINT32_C(2537663111), UINT32_C(1901785749) }, { UINT32_C(1600576715), UINT32_C(2517584909) } }, { { UINT32_C(2999690691), UINT32_C(3520807511) }, { UINT32_C(2029527843), UINT32_C( 469274890) }, { UINT32_C(2514609267), UINT32_C(1995041200) } }, { { UINT32_C( 641893536), UINT32_C(2967471657) }, { UINT32_C(1162289584), UINT32_C(3602293011) }, { UINT32_C( 902091560), UINT32_C(3284882334) } }, { { UINT32_C(2643034438), UINT32_C(3866059971) }, { UINT32_C(2455725959), UINT32_C(2578274297) }, { UINT32_C(2549380198), UINT32_C(3222167134) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vhadd_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t r = simde_vhadd_u32(a, b); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vhaddq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t b[16]; int8_t r[16]; } test_vec[] = { { { INT8_C( 14), -INT8_C( 8), INT8_C( 51), INT8_C( 84), -INT8_C( 33), INT8_C( 105), -INT8_C( 96), -INT8_C( 60), -INT8_C( 70), INT8_C( 54), -INT8_C( 105), -INT8_C( 122), -INT8_C( 3), -INT8_C( 81), INT8_C( 63), INT8_C( 57) }, { INT8_C( 47), INT8_C( 56), -INT8_C( 25), INT8_C( 91), INT8_C( 15), -INT8_C( 61), -INT8_C( 2), -INT8_C( 127), INT8_C( 51), INT8_C( 85), INT8_C( 118), INT8_C( 49), INT8_C( 49), INT8_C( 74), -INT8_C( 66), INT8_C( 63) }, { INT8_C( 30), INT8_C( 24), INT8_C( 13), INT8_C( 87), -INT8_C( 9), INT8_C( 22), -INT8_C( 49), -INT8_C( 94), -INT8_C( 10), INT8_C( 69), INT8_C( 6), -INT8_C( 37), INT8_C( 23), -INT8_C( 4), -INT8_C( 2), INT8_C( 60) } }, { { INT8_C( 66), -INT8_C( 15), -INT8_C( 108), INT8_C( 34), INT8_C( 91), INT8_C( 52), -INT8_C( 26), INT8_C( 21), INT8_C( 107), INT8_C( 125), -INT8_C( 101), INT8_C( 104), INT8_C( 44), -INT8_C( 37), -INT8_C( 94), INT8_C( 91) }, { INT8_C( 19), -INT8_C( 119), -INT8_C( 74), INT8_C( 34), INT8_C( 76), -INT8_C( 75), -INT8_C( 93), INT8_MAX, INT8_C( 10), INT8_C( 25), -INT8_C( 79), INT8_C( 59), INT8_C( 99), INT8_C( 111), INT8_C( 123), -INT8_C( 90) }, { INT8_C( 42), -INT8_C( 67), -INT8_C( 91), INT8_C( 34), INT8_C( 83), -INT8_C( 12), -INT8_C( 60), INT8_C( 74), INT8_C( 58), INT8_C( 75), -INT8_C( 90), INT8_C( 81), INT8_C( 71), INT8_C( 37), INT8_C( 14), INT8_C( 0) } }, { { INT8_C( 96), INT8_C( 15), -INT8_C( 56), -INT8_C( 69), INT8_C( 67), -INT8_C( 82), -INT8_C( 48), -INT8_C( 82), INT8_C( 43), INT8_C( 108), INT8_C( 23), INT8_C( 88), INT8_C( 71), -INT8_C( 71), -INT8_C( 77), INT8_C( 90) }, { INT8_C( 66), INT8_C( 106), INT8_C( 125), -INT8_C( 114), INT8_C( 31), INT8_C( 32), INT8_C( 13), INT8_C( 41), INT8_C( 58), -INT8_C( 66), INT8_C( 101), -INT8_C( 99), INT8_C( 45), -INT8_C( 32), INT8_C( 67), -INT8_C( 114) }, { INT8_C( 81), INT8_C( 60), INT8_C( 34), -INT8_C( 92), INT8_C( 49), -INT8_C( 25), -INT8_C( 18), -INT8_C( 21), INT8_C( 50), INT8_C( 21), INT8_C( 62), -INT8_C( 6), INT8_C( 58), -INT8_C( 52), -INT8_C( 5), -INT8_C( 12) } }, { { -INT8_C( 17), INT8_C( 11), INT8_C( 73), INT8_C( 50), -INT8_C( 71), INT8_C( 26), -INT8_C( 31), -INT8_C( 27), -INT8_C( 122), -INT8_C( 8), INT8_C( 61), -INT8_C( 51), -INT8_C( 79), -INT8_C( 16), INT8_C( 39), -INT8_C( 13) }, { INT8_C( 90), -INT8_C( 92), -INT8_C( 127), INT8_C( 121), -INT8_C( 59), -INT8_C( 114), -INT8_C( 93), -INT8_C( 1), INT8_C( 77), INT8_C( 8), -INT8_C( 100), INT8_C( 122), -INT8_C( 24), -INT8_C( 32), INT8_C( 8), -INT8_C( 41) }, { INT8_C( 36), -INT8_C( 41), -INT8_C( 27), INT8_C( 85), -INT8_C( 65), -INT8_C( 44), -INT8_C( 62), -INT8_C( 14), -INT8_C( 23), INT8_C( 0), -INT8_C( 20), INT8_C( 35), -INT8_C( 52), -INT8_C( 24), INT8_C( 23), -INT8_C( 27) } }, { { -INT8_C( 21), INT8_C( 82), INT8_C( 9), -INT8_C( 91), INT8_C( 108), -INT8_C( 22), -INT8_C( 118), -INT8_C( 14), -INT8_C( 30), -INT8_C( 57), -INT8_C( 65), -INT8_C( 109), -INT8_C( 73), -INT8_C( 26), -INT8_C( 122), INT8_C( 18) }, { -INT8_C( 117), INT8_C( 7), -INT8_C( 117), INT8_C( 80), -INT8_C( 106), INT8_C( 46), INT8_C( 79), -INT8_C( 29), INT8_C( 54), -INT8_C( 21), INT8_C( 93), INT8_C( 30), -INT8_C( 53), INT8_C( 102), -INT8_C( 11), -INT8_C( 73) }, { -INT8_C( 69), INT8_C( 44), -INT8_C( 54), -INT8_C( 6), INT8_C( 1), INT8_C( 12), -INT8_C( 20), -INT8_C( 22), INT8_C( 12), -INT8_C( 39), INT8_C( 14), -INT8_C( 40), -INT8_C( 63), INT8_C( 38), -INT8_C( 67), -INT8_C( 28) } }, { { -INT8_C( 72), -INT8_C( 1), INT8_C( 92), INT8_C( 36), -INT8_C( 23), -INT8_C( 26), INT8_C( 22), -INT8_C( 52), -INT8_C( 83), -INT8_C( 43), INT8_C( 95), INT8_C( 100), -INT8_C( 69), -INT8_C( 26), INT8_C( 118), INT8_C( 70) }, { -INT8_C( 19), INT8_C( 2), -INT8_C( 106), -INT8_C( 125), INT8_C( 48), -INT8_C( 27), INT8_C( 102), INT8_C( 103), -INT8_C( 47), -INT8_C( 60), -INT8_C( 123), -INT8_C( 100), INT8_C( 42), INT8_C( 123), INT8_C( 83), -INT8_C( 30) }, { -INT8_C( 46), INT8_C( 0), -INT8_C( 7), -INT8_C( 45), INT8_C( 12), -INT8_C( 27), INT8_C( 62), INT8_C( 25), -INT8_C( 65), -INT8_C( 52), -INT8_C( 14), INT8_C( 0), -INT8_C( 14), INT8_C( 48), INT8_C( 100), INT8_C( 20) } }, { { INT8_C( 122), -INT8_C( 81), INT8_C( 6), INT8_C( 99), -INT8_C( 107), INT8_C( 28), INT8_C( 47), INT8_C( 66), -INT8_C( 15), -INT8_C( 113), -INT8_C( 89), -INT8_C( 84), INT8_C( 117), INT8_C( 29), -INT8_C( 13), INT8_C( 98) }, { INT8_C( 31), -INT8_C( 119), -INT8_C( 26), INT8_C( 80), INT8_C( 111), INT8_C( 76), -INT8_C( 73), INT8_C( 64), INT8_C( 16), INT8_C( 60), -INT8_C( 36), INT8_C( 58), -INT8_C( 73), INT8_C( 48), INT8_C( 28), INT8_C( 49) }, { INT8_C( 76), -INT8_C( 100), -INT8_C( 10), INT8_C( 89), INT8_C( 2), INT8_C( 52), -INT8_C( 13), INT8_C( 65), INT8_C( 0), -INT8_C( 27), -INT8_C( 63), -INT8_C( 13), INT8_C( 22), INT8_C( 38), INT8_C( 7), INT8_C( 73) } }, { { -INT8_C( 33), INT8_C( 34), -INT8_C( 107), INT8_C( 117), INT8_C( 62), -INT8_C( 60), -INT8_C( 73), INT8_C( 47), INT8_C( 83), INT8_C( 94), -INT8_C( 36), -INT8_C( 56), INT8_C( 124), -INT8_C( 49), INT8_C( 43), -INT8_C( 101) }, { INT8_C( 88), INT8_C( 17), -INT8_C( 21), -INT8_C( 57), INT8_C( 93), -INT8_C( 94), INT8_C( 7), INT8_C( 110), -INT8_C( 33), -INT8_C( 28), -INT8_C( 88), -INT8_C( 106), INT8_C( 20), -INT8_C( 59), -INT8_C( 56), -INT8_C( 13) }, { INT8_C( 27), INT8_C( 25), -INT8_C( 64), INT8_C( 30), INT8_C( 77), -INT8_C( 77), -INT8_C( 33), INT8_C( 78), INT8_C( 25), INT8_C( 33), -INT8_C( 62), -INT8_C( 81), INT8_C( 72), -INT8_C( 54), -INT8_C( 7), -INT8_C( 57) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r = simde_vhaddq_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); simde_int8x16_t r = simde_vhaddq_s8(a, b); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vhaddq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { -INT16_C( 4133), INT16_C( 1215), -INT16_C( 24587), -INT16_C( 23116), -INT16_C( 832), -INT16_C( 11030), -INT16_C( 24419), -INT16_C( 6998) }, { INT16_C( 13089), -INT16_C( 6783), -INT16_C( 3945), INT16_C( 8139), INT16_C( 10840), INT16_C( 20913), INT16_C( 24193), INT16_C( 23786) }, { INT16_C( 4478), -INT16_C( 2784), -INT16_C( 14266), -INT16_C( 7489), INT16_C( 5004), INT16_C( 4941), -INT16_C( 113), INT16_C( 8394) } }, { { -INT16_C( 21939), INT16_C( 17248), INT16_C( 5449), INT16_C( 2792), -INT16_C( 11503), -INT16_C( 20770), -INT16_C( 30605), -INT16_C( 27246) }, { INT16_C( 5308), INT16_C( 21370), INT16_C( 17668), INT16_C( 23666), INT16_C( 9072), -INT16_C( 3667), -INT16_C( 26495), -INT16_C( 12466) }, { -INT16_C( 8316), INT16_C( 19309), INT16_C( 11558), INT16_C( 13229), -INT16_C( 1216), -INT16_C( 12219), -INT16_C( 28550), -INT16_C( 19856) } }, { { -INT16_C( 20926), -INT16_C( 29934), -INT16_C( 1341), -INT16_C( 11115), INT16_C( 29645), INT16_C( 16771), INT16_C( 5628), -INT16_C( 18218) }, { INT16_C( 20521), INT16_C( 11787), INT16_C( 32405), INT16_C( 1418), INT16_C( 14497), INT16_C( 9207), INT16_C( 17872), INT16_C( 4850) }, { -INT16_C( 203), -INT16_C( 9074), INT16_C( 15532), -INT16_C( 4849), INT16_C( 22071), INT16_C( 12989), INT16_C( 11750), -INT16_C( 6684) } }, { { INT16_C( 1267), -INT16_C( 18531), INT16_C( 13310), -INT16_C( 13173), INT16_C( 3750), -INT16_C( 24051), -INT16_C( 7388), INT16_C( 19802) }, { INT16_C( 26163), -INT16_C( 14213), INT16_C( 1764), -INT16_C( 31282), -INT16_C( 15042), INT16_C( 3752), -INT16_C( 26102), -INT16_C( 736) }, { INT16_C( 13715), -INT16_C( 16372), INT16_C( 7537), -INT16_C( 22228), -INT16_C( 5646), -INT16_C( 10150), -INT16_C( 16745), INT16_C( 9533) } }, { { -INT16_C( 16994), -INT16_C( 25164), INT16_C( 16624), -INT16_C( 26775), INT16_C( 30286), INT16_C( 29241), -INT16_C( 27559), -INT16_C( 29504) }, { INT16_C( 15354), -INT16_C( 8620), INT16_C( 8769), INT16_C( 32611), INT16_C( 3303), -INT16_C( 3699), -INT16_C( 21082), INT16_C( 17903) }, { -INT16_C( 820), -INT16_C( 16892), INT16_C( 12696), INT16_C( 2918), INT16_C( 16794), INT16_C( 12771), -INT16_C( 24321), -INT16_C( 5801) } }, { { -INT16_C( 23701), INT16_C( 23522), INT16_C( 19427), INT16_C( 13042), INT16_C( 11457), INT16_C( 6820), INT16_C( 25792), -INT16_C( 17754) }, { -INT16_C( 1376), -INT16_C( 7784), -INT16_C( 1251), INT16_C( 1121), -INT16_C( 4601), -INT16_C( 20746), -INT16_C( 6756), INT16_C( 2035) }, { -INT16_C( 12539), INT16_C( 7869), INT16_C( 9088), INT16_C( 7081), INT16_C( 3428), -INT16_C( 6963), INT16_C( 9518), -INT16_C( 7860) } }, { { -INT16_C( 10872), INT16_C( 27746), INT16_C( 21792), -INT16_C( 7778), INT16_C( 17025), INT16_C( 16891), -INT16_C( 24153), INT16_C( 18427) }, { -INT16_C( 27749), -INT16_C( 18392), -INT16_C( 30322), -INT16_C( 26947), -INT16_C( 19592), INT16_C( 5188), INT16_C( 14232), INT16_C( 8219) }, { -INT16_C( 19311), INT16_C( 4677), -INT16_C( 4265), -INT16_C( 17363), -INT16_C( 1284), INT16_C( 11039), -INT16_C( 4961), INT16_C( 13323) } }, { { INT16_C( 32012), INT16_C( 11404), INT16_C( 10962), INT16_C( 21261), INT16_C( 2157), INT16_C( 5268), -INT16_C( 28759), INT16_C( 17499) }, { -INT16_C( 31966), -INT16_C( 19971), -INT16_C( 17907), -INT16_C( 31417), -INT16_C( 29843), INT16_C( 1433), -INT16_C( 19262), -INT16_C( 12763) }, { INT16_C( 23), -INT16_C( 4284), -INT16_C( 3473), -INT16_C( 5078), -INT16_C( 13843), INT16_C( 3350), -INT16_C( 24011), INT16_C( 2368) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_vhaddq_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); simde_int16x8_t r = simde_vhaddq_s16(a, b); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vhaddq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { INT32_C( 83538481), INT32_C( 1230440412), -INT32_C( 1201804273), -INT32_C( 1627604869) }, { INT32_C( 1229977916), INT32_C( 550409907), -INT32_C( 484088031), INT32_C( 1286687515) }, { INT32_C( 656758198), INT32_C( 890425159), -INT32_C( 842946152), -INT32_C( 170458677) } }, { { -INT32_C( 649024515), -INT32_C( 1054627662), INT32_C( 259621012), INT32_C( 1974302009) }, { INT32_C( 582941807), -INT32_C( 1287418734), INT32_C( 244738291), -INT32_C( 1336260685) }, { -INT32_C( 33041354), -INT32_C( 1171023198), INT32_C( 252179651), INT32_C( 319020662) } }, { { -INT32_C( 1534415886), -INT32_C( 412766893), INT32_C( 1727454765), -INT32_C( 1009015724) }, { INT32_C( 870685088), INT32_C( 417736741), INT32_C( 1143373201), -INT32_C( 1208647228) }, { -INT32_C( 331865399), INT32_C( 2484924), INT32_C( 1435413983), -INT32_C( 1108831476) } }, { { INT32_C( 2136702764), INT32_C( 1499906348), -INT32_C( 205497185), -INT32_C( 1581868288) }, { INT32_C( 1523883061), INT32_C( 1433647812), -INT32_C( 56977097), INT32_C( 1186172698) }, { INT32_C( 1830292912), INT32_C( 1466777080), -INT32_C( 131237141), -INT32_C( 197847795) } }, { { INT32_C( 985992718), INT32_C( 1871915983), -INT32_C( 2006822008), INT32_C( 606673391) }, { INT32_C( 2038365621), -INT32_C( 271584840), -INT32_C( 1511298677), INT32_C( 116170488) }, { INT32_C( 1512179169), INT32_C( 800165571), -INT32_C( 1759060343), INT32_C( 361421939) } }, { { INT32_C( 2084614573), INT32_C( 1709954013), INT32_C( 384650791), INT32_C( 473569127) }, { -INT32_C( 862603244), INT32_C( 901538986), -INT32_C( 975525939), -INT32_C( 204749242) }, { INT32_C( 611005664), INT32_C( 1305746499), -INT32_C( 295437574), INT32_C( 134409942) } }, { { INT32_C( 1433340792), INT32_C( 112876511), INT32_C( 270313385), -INT32_C( 752068930) }, { -INT32_C( 1197489906), -INT32_C( 202548442), INT32_C( 1236912131), INT32_C( 104629390) }, { INT32_C( 117925443), -INT32_C( 44835966), INT32_C( 753612758), -INT32_C( 323719770) } }, { { INT32_C( 1868278672), -INT32_C( 1351281402), INT32_C( 2076152253), -INT32_C( 179377177) }, { -INT32_C( 743510355), INT32_C( 1288084297), -INT32_C( 225083549), -INT32_C( 1795632892) }, { INT32_C( 562384158), -INT32_C( 31598553), INT32_C( 925534352), -INT32_C( 987505035) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_vhaddq_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); simde_int32x4_t r = simde_vhaddq_s32(a, b); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vhaddq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(115), UINT8_C(226), UINT8_C( 55), UINT8_C( 33), UINT8_C(202), UINT8_C(200), UINT8_C(178), UINT8_C(204), UINT8_C( 73), UINT8_C(156), UINT8_C(208), UINT8_C(156), UINT8_C(200), UINT8_C( 50), UINT8_C( 25), UINT8_C( 63) }, { UINT8_C(197), UINT8_C( 4), UINT8_C(161), UINT8_C(225), UINT8_C( 73), UINT8_C(219), UINT8_C(126), UINT8_C( 35), UINT8_C( 44), UINT8_C(165), UINT8_C( 54), UINT8_C( 96), UINT8_C( 3), UINT8_C(194), UINT8_C( 69), UINT8_C(118) }, { UINT8_C(156), UINT8_C(115), UINT8_C(108), UINT8_C(129), UINT8_C(137), UINT8_C(209), UINT8_C(152), UINT8_C(119), UINT8_C( 58), UINT8_C(160), UINT8_C(131), UINT8_C(126), UINT8_C(101), UINT8_C(122), UINT8_C( 47), UINT8_C( 90) } }, { { UINT8_C(164), UINT8_C(124), UINT8_C(151), UINT8_C(111), UINT8_C( 68), UINT8_C( 73), UINT8_C( 59), UINT8_C(141), UINT8_C(230), UINT8_C( 11), UINT8_C( 41), UINT8_C(174), UINT8_C( 61), UINT8_C( 66), UINT8_C(238), UINT8_C( 3) }, { UINT8_C( 71), UINT8_C(143), UINT8_C(228), UINT8_C(144), UINT8_C(106), UINT8_C( 99), UINT8_C(180), UINT8_C(150), UINT8_C( 8), UINT8_C(234), UINT8_C(246), UINT8_C( 11), UINT8_C(172), UINT8_C( 60), UINT8_C(129), UINT8_C( 81) }, { UINT8_C(117), UINT8_C(133), UINT8_C(189), UINT8_C(127), UINT8_C( 87), UINT8_C( 86), UINT8_C(119), UINT8_C(145), UINT8_C(119), UINT8_C(122), UINT8_C(143), UINT8_C( 92), UINT8_C(116), UINT8_C( 63), UINT8_C(183), UINT8_C( 42) } }, { { UINT8_C(184), UINT8_C( 25), UINT8_C(192), UINT8_C(253), UINT8_C( 98), UINT8_C(251), UINT8_C(138), UINT8_C( 72), UINT8_C( 6), UINT8_C(180), UINT8_C(247), UINT8_C( 67), UINT8_C(246), UINT8_C(229), UINT8_C( 70), UINT8_C( 61) }, { UINT8_C(116), UINT8_C( 43), UINT8_C(206), UINT8_C(222), UINT8_C(142), UINT8_C(130), UINT8_C(116), UINT8_C(150), UINT8_C(108), UINT8_C(107), UINT8_C(162), UINT8_C( 25), UINT8_C(167), UINT8_C( 35), UINT8_C(106), UINT8_C( 95) }, { UINT8_C(150), UINT8_C( 34), UINT8_C(199), UINT8_C(237), UINT8_C(120), UINT8_C(190), UINT8_C(127), UINT8_C(111), UINT8_C( 57), UINT8_C(143), UINT8_C(204), UINT8_C( 46), UINT8_C(206), UINT8_C(132), UINT8_C( 88), UINT8_C( 78) } }, { { UINT8_C( 60), UINT8_C( 42), UINT8_C( 92), UINT8_C(159), UINT8_C( 37), UINT8_C(231), UINT8_C(231), UINT8_C( 43), UINT8_C(155), UINT8_C(222), UINT8_C(110), UINT8_C(145), UINT8_C(195), UINT8_C(181), UINT8_C(207), UINT8_C( 55) }, { UINT8_C(224), UINT8_C(157), UINT8_C( 21), UINT8_C(110), UINT8_C( 31), UINT8_C(138), UINT8_C( 4), UINT8_C(139), UINT8_C(245), UINT8_C(166), UINT8_C(164), UINT8_C(156), UINT8_C(202), UINT8_C( 14), UINT8_C(251), UINT8_C( 6) }, { UINT8_C(142), UINT8_C( 99), UINT8_C( 56), UINT8_C(134), UINT8_C( 34), UINT8_C(184), UINT8_C(117), UINT8_C( 91), UINT8_C(200), UINT8_C(194), UINT8_C(137), UINT8_C(150), UINT8_C(198), UINT8_C( 97), UINT8_C(229), UINT8_C( 30) } }, { { UINT8_C( 56), UINT8_C( 88), UINT8_C(165), UINT8_C( 93), UINT8_C( 63), UINT8_C(141), UINT8_C(136), UINT8_C(218), UINT8_C(107), UINT8_C(247), UINT8_C(107), UINT8_C( 47), UINT8_C(172), UINT8_C( 58), UINT8_C(102), UINT8_C(140) }, { UINT8_C(215), UINT8_C(124), UINT8_C(250), UINT8_C(246), UINT8_C( 6), UINT8_C(254), UINT8_C(130), UINT8_C(251), UINT8_C(165), UINT8_C( 38), UINT8_C(151), UINT8_C(111), UINT8_C( 53), UINT8_C(146), UINT8_C(117), UINT8_C(109) }, { UINT8_C(135), UINT8_C(106), UINT8_C(207), UINT8_C(169), UINT8_C( 34), UINT8_C(197), UINT8_C(133), UINT8_C(234), UINT8_C(136), UINT8_C(142), UINT8_C(129), UINT8_C( 79), UINT8_C(112), UINT8_C(102), UINT8_C(109), UINT8_C(124) } }, { { UINT8_C(234), UINT8_C( 27), UINT8_C(203), UINT8_C( 41), UINT8_C(168), UINT8_C( 83), UINT8_C( 3), UINT8_C( 19), UINT8_C( 74), UINT8_C(111), UINT8_C( 66), UINT8_C(246), UINT8_C(169), UINT8_C(169), UINT8_C(130), UINT8_C(129) }, { UINT8_C( 37), UINT8_C(124), UINT8_C(119), UINT8_C( 43), UINT8_C(123), UINT8_C(249), UINT8_C( 38), UINT8_C( 32), UINT8_C( 32), UINT8_C(189), UINT8_C(143), UINT8_C( 85), UINT8_C( 79), UINT8_C( 4), UINT8_C(194), UINT8_C( 58) }, { UINT8_C(135), UINT8_C( 75), UINT8_C(161), UINT8_C( 42), UINT8_C(145), UINT8_C(166), UINT8_C( 20), UINT8_C( 25), UINT8_C( 53), UINT8_C(150), UINT8_C(104), UINT8_C(165), UINT8_C(124), UINT8_C( 86), UINT8_C(162), UINT8_C( 93) } }, { { UINT8_C( 31), UINT8_C(141), UINT8_C( 99), UINT8_C(199), UINT8_C(225), UINT8_C(103), UINT8_C(219), UINT8_C( 43), UINT8_C(214), UINT8_C( 29), UINT8_C( 34), UINT8_C(127), UINT8_C(198), UINT8_C(164), UINT8_C( 0), UINT8_C(235) }, { UINT8_C( 33), UINT8_C(120), UINT8_C( 22), UINT8_C(156), UINT8_C(113), UINT8_C( 60), UINT8_C(188), UINT8_C(145), UINT8_C(249), UINT8_C( 75), UINT8_C(230), UINT8_C( 73), UINT8_C( 79), UINT8_C(169), UINT8_C(131), UINT8_C(111) }, { UINT8_C( 32), UINT8_C(130), UINT8_C( 60), UINT8_C(177), UINT8_C(169), UINT8_C( 81), UINT8_C(203), UINT8_C( 94), UINT8_C(231), UINT8_C( 52), UINT8_C(132), UINT8_C(100), UINT8_C(138), UINT8_C(166), UINT8_C( 65), UINT8_C(173) } }, { { UINT8_C( 54), UINT8_C(230), UINT8_C( 54), UINT8_C( 23), UINT8_C( 77), UINT8_C( 17), UINT8_C( 67), UINT8_C( 35), UINT8_C( 47), UINT8_C(101), UINT8_C(163), UINT8_C(245), UINT8_C( 9), UINT8_C(163), UINT8_C(225), UINT8_C( 42) }, { UINT8_C( 27), UINT8_C(247), UINT8_C(198), UINT8_C(141), UINT8_C( 52), UINT8_C(130), UINT8_C( 30), UINT8_C( 45), UINT8_C(205), UINT8_C( 5), UINT8_C(118), UINT8_C( 29), UINT8_C(174), UINT8_C(249), UINT8_C(140), UINT8_C(228) }, { UINT8_C( 40), UINT8_C(238), UINT8_C(126), UINT8_C( 82), UINT8_C( 64), UINT8_C( 73), UINT8_C( 48), UINT8_C( 40), UINT8_C(126), UINT8_C( 53), UINT8_C(140), UINT8_C(137), UINT8_C( 91), UINT8_C(206), UINT8_C(182), UINT8_C(135) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vhaddq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t r = simde_vhaddq_u8(a, b); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vhaddq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(21629), UINT16_C(33539), UINT16_C(30825), UINT16_C( 9779), UINT16_C(61961), UINT16_C(61602), UINT16_C(61662), UINT16_C(35813) }, { UINT16_C(37854), UINT16_C(10334), UINT16_C( 9263), UINT16_C(37492), UINT16_C( 2724), UINT16_C(43140), UINT16_C(32219), UINT16_C(22588) }, { UINT16_C(29741), UINT16_C(21936), UINT16_C(20044), UINT16_C(23635), UINT16_C(32342), UINT16_C(52371), UINT16_C(46940), UINT16_C(29200) } }, { { UINT16_C(16337), UINT16_C(15068), UINT16_C( 4023), UINT16_C(49249), UINT16_C( 769), UINT16_C(57264), UINT16_C(38387), UINT16_C(53866) }, { UINT16_C(51241), UINT16_C(22778), UINT16_C(28397), UINT16_C(37354), UINT16_C(28536), UINT16_C(21561), UINT16_C(30188), UINT16_C(48556) }, { UINT16_C(33789), UINT16_C(18923), UINT16_C(16210), UINT16_C(43301), UINT16_C(14652), UINT16_C(39412), UINT16_C(34287), UINT16_C(51211) } }, { { UINT16_C(34996), UINT16_C(27639), UINT16_C(22679), UINT16_C(39211), UINT16_C(56155), UINT16_C(20344), UINT16_C(58224), UINT16_C(39201) }, { UINT16_C( 7083), UINT16_C(39153), UINT16_C(56457), UINT16_C( 553), UINT16_C(25163), UINT16_C(14166), UINT16_C( 727), UINT16_C(35828) }, { UINT16_C(21039), UINT16_C(33396), UINT16_C(39568), UINT16_C(19882), UINT16_C(40659), UINT16_C(17255), UINT16_C(29475), UINT16_C(37514) } }, { { UINT16_C(60299), UINT16_C( 8950), UINT16_C( 8516), UINT16_C(40891), UINT16_C(13564), UINT16_C(28142), UINT16_C( 3863), UINT16_C(49670) }, { UINT16_C(63530), UINT16_C(46171), UINT16_C(34004), UINT16_C( 8118), UINT16_C( 3303), UINT16_C(48726), UINT16_C(18958), UINT16_C(39242) }, { UINT16_C(61914), UINT16_C(27560), UINT16_C(21260), UINT16_C(24504), UINT16_C( 8433), UINT16_C(38434), UINT16_C(11410), UINT16_C(44456) } }, { { UINT16_C(16437), UINT16_C(31164), UINT16_C(30562), UINT16_C(24089), UINT16_C( 1963), UINT16_C(49867), UINT16_C(53783), UINT16_C(16773) }, { UINT16_C(57546), UINT16_C(40693), UINT16_C(43876), UINT16_C(19389), UINT16_C( 5047), UINT16_C(50698), UINT16_C(21597), UINT16_C(37471) }, { UINT16_C(36991), UINT16_C(35928), UINT16_C(37219), UINT16_C(21739), UINT16_C( 3505), UINT16_C(50282), UINT16_C(37690), UINT16_C(27122) } }, { { UINT16_C( 7060), UINT16_C(62988), UINT16_C( 9619), UINT16_C(15957), UINT16_C( 8236), UINT16_C(17153), UINT16_C(34546), UINT16_C(48261) }, { UINT16_C(31334), UINT16_C(51802), UINT16_C( 5926), UINT16_C(56598), UINT16_C( 8234), UINT16_C(34723), UINT16_C( 884), UINT16_C( 2074) }, { UINT16_C(19197), UINT16_C(57395), UINT16_C( 7772), UINT16_C(36277), UINT16_C( 8235), UINT16_C(25938), UINT16_C(17715), UINT16_C(25167) } }, { { UINT16_C( 9758), UINT16_C(45567), UINT16_C(21579), UINT16_C(30704), UINT16_C(61812), UINT16_C(26555), UINT16_C(16503), UINT16_C(56611) }, { UINT16_C(32442), UINT16_C(57511), UINT16_C(48533), UINT16_C(49342), UINT16_C(25053), UINT16_C(20807), UINT16_C(24932), UINT16_C(33626) }, { UINT16_C(21100), UINT16_C(51539), UINT16_C(35056), UINT16_C(40023), UINT16_C(43432), UINT16_C(23681), UINT16_C(20717), UINT16_C(45118) } }, { { UINT16_C(22919), UINT16_C(53812), UINT16_C( 9389), UINT16_C( 8522), UINT16_C( 1301), UINT16_C(35976), UINT16_C(44101), UINT16_C(65385) }, { UINT16_C( 4394), UINT16_C(49120), UINT16_C(40654), UINT16_C(44159), UINT16_C(51199), UINT16_C(25853), UINT16_C(22312), UINT16_C(45287) }, { UINT16_C(13656), UINT16_C(51466), UINT16_C(25021), UINT16_C(26340), UINT16_C(26250), UINT16_C(30914), UINT16_C(33206), UINT16_C(55336) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vhaddq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t r = simde_vhaddq_u16(a, b); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vhaddq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(1568807856), UINT32_C(1434438720), UINT32_C( 383911889), UINT32_C(3709225907) }, { UINT32_C( 731772508), UINT32_C(2480348308), UINT32_C( 217568483), UINT32_C(3703365164) }, { UINT32_C(1150290182), UINT32_C(1957393514), UINT32_C( 300740186), UINT32_C(3706295535) } }, { { UINT32_C( 976895738), UINT32_C(3700406539), UINT32_C(1962111424), UINT32_C( 424741309) }, { UINT32_C(2470768383), UINT32_C(3995474699), UINT32_C( 486153968), UINT32_C(4143494908) }, { UINT32_C(1723832060), UINT32_C(3847940619), UINT32_C(1224132696), UINT32_C(2284118108) } }, { { UINT32_C( 3158773), UINT32_C(2900148459), UINT32_C(3995127601), UINT32_C(3607654872) }, { UINT32_C(1802128480), UINT32_C(1482264936), UINT32_C(2876527791), UINT32_C(4288834570) }, { UINT32_C( 902643626), UINT32_C(2191206697), UINT32_C(3435827696), UINT32_C(3948244721) } }, { { UINT32_C(2332021407), UINT32_C(3291929746), UINT32_C(2226280107), UINT32_C( 677100232) }, { UINT32_C(1871955463), UINT32_C( 113765463), UINT32_C(1269906240), UINT32_C(1179276199) }, { UINT32_C(2101988435), UINT32_C(1702847604), UINT32_C(1748093173), UINT32_C( 928188215) } }, { { UINT32_C(3100723750), UINT32_C(3514566438), UINT32_C( 643116894), UINT32_C(4031689193) }, { UINT32_C(3462390135), UINT32_C( 248784589), UINT32_C( 156861793), UINT32_C(4283409369) }, { UINT32_C(3281556942), UINT32_C(1881675513), UINT32_C( 399989343), UINT32_C(4157549281) } }, { { UINT32_C( 330768621), UINT32_C(2263168040), UINT32_C(1286355555), UINT32_C(1648229099) }, { UINT32_C(2821758171), UINT32_C( 615908547), UINT32_C(1663897482), UINT32_C(2690809267) }, { UINT32_C(1576263396), UINT32_C(1439538293), UINT32_C(1475126518), UINT32_C(2169519183) } }, { { UINT32_C(3316914589), UINT32_C(2957744461), UINT32_C(3221092307), UINT32_C(3424729841) }, { UINT32_C(2574602966), UINT32_C(3770559318), UINT32_C(3997428539), UINT32_C( 110011752) }, { UINT32_C(2945758777), UINT32_C(3364151889), UINT32_C(3609260423), UINT32_C(1767370796) } }, { { UINT32_C( 214647487), UINT32_C(2948405211), UINT32_C( 7256590), UINT32_C(3402403828) }, { UINT32_C( 946094561), UINT32_C(2820153965), UINT32_C(1989565453), UINT32_C(3229361153) }, { UINT32_C( 580371024), UINT32_C(2884279588), UINT32_C( 998411021), UINT32_C(3315882490) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vhaddq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t r = simde_vhaddq_u32(a, b); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vhadd_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vhadd_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vhadd_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vhadd_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vhadd_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vhadd_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vhaddq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vhaddq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vhaddq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vhaddq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vhaddq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vhaddq_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/hsub.c000066400000000000000000001377121400333146700164370ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN hsub #include "test-neon.h" #include "../../../simde/arm/neon/hsub.h" static int test_simde_vhsub_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t b[8]; int8_t r[8]; } test_vec[] = { { { INT8_C( 82), INT8_C( 111), INT8_C( 70), -INT8_C( 31), -INT8_C( 51), -INT8_C( 22), INT8_C( 109), INT8_C( 85) }, { INT8_C( 126), INT8_C( 89), INT8_C( 38), INT8_C( 58), -INT8_C( 56), INT8_C( 28), INT8_C( 49), -INT8_C( 21) }, { -INT8_C( 22), INT8_C( 11), INT8_C( 16), -INT8_C( 45), INT8_C( 2), -INT8_C( 25), INT8_C( 30), INT8_C( 53) } }, { { -INT8_C( 63), -INT8_C( 107), -INT8_C( 109), -INT8_C( 28), INT8_C( 68), INT8_C( 51), INT8_C( 14), -INT8_C( 100) }, { INT8_C( 29), INT8_C( 41), -INT8_C( 53), -INT8_C( 109), -INT8_C( 102), INT8_C( 28), INT8_C( 36), -INT8_C( 19) }, { -INT8_C( 46), -INT8_C( 74), -INT8_C( 28), INT8_C( 40), INT8_C( 85), INT8_C( 11), -INT8_C( 11), -INT8_C( 41) } }, { { -INT8_C( 117), INT8_C( 106), -INT8_C( 50), INT8_C( 89), INT8_C( 84), INT8_C( 59), -INT8_C( 82), -INT8_C( 46) }, { -INT8_C( 107), -INT8_C( 44), INT8_C( 13), INT8_C( 93), -INT8_C( 16), INT8_C( 62), INT8_C( 72), -INT8_C( 78) }, { -INT8_C( 5), INT8_C( 75), -INT8_C( 32), -INT8_C( 2), INT8_C( 50), -INT8_C( 2), -INT8_C( 77), INT8_C( 16) } }, { { -INT8_C( 45), -INT8_C( 37), -INT8_C( 106), INT8_C( 23), INT8_C( 15), -INT8_C( 91), -INT8_C( 77), INT8_C( 44) }, { -INT8_C( 50), INT8_C( 126), -INT8_C( 64), INT8_C( 105), -INT8_C( 101), -INT8_C( 28), INT8_C( 86), INT8_C( 38) }, { INT8_C( 2), -INT8_C( 82), -INT8_C( 21), -INT8_C( 41), INT8_C( 58), -INT8_C( 32), -INT8_C( 82), INT8_C( 3) } }, { { INT8_C( 79), INT8_C( 36), INT8_MAX, -INT8_C( 93), INT8_C( 95), INT8_C( 46), INT8_C( 118), -INT8_C( 12) }, { INT8_C( 2), -INT8_C( 125), INT8_C( 81), -INT8_C( 13), -INT8_C( 63), -INT8_C( 102), -INT8_C( 91), -INT8_C( 107) }, { INT8_C( 38), INT8_C( 80), INT8_C( 23), -INT8_C( 40), INT8_C( 79), INT8_C( 74), INT8_C( 104), INT8_C( 47) } }, { { INT8_C( 117), INT8_C( 59), -INT8_C( 84), -INT8_C( 124), -INT8_C( 32), INT8_C( 96), -INT8_C( 79), -INT8_C( 81) }, { -INT8_C( 34), INT8_C( 113), INT8_C( 24), INT8_C( 121), INT8_C( 85), INT8_C( 110), -INT8_C( 96), -INT8_C( 92) }, { INT8_C( 75), -INT8_C( 27), -INT8_C( 54), -INT8_C( 123), -INT8_C( 59), -INT8_C( 7), INT8_C( 8), INT8_C( 5) } }, { { -INT8_C( 110), INT8_C( 31), INT8_C( 72), -INT8_C( 15), INT8_C( 77), -INT8_C( 66), -INT8_C( 26), INT8_C( 80) }, { INT8_C( 65), INT8_C( 55), INT8_C( 67), INT8_C( 2), -INT8_C( 47), -INT8_C( 24), -INT8_C( 105), INT8_C( 71) }, { -INT8_C( 88), -INT8_C( 12), INT8_C( 2), -INT8_C( 9), INT8_C( 62), -INT8_C( 21), INT8_C( 39), INT8_C( 4) } }, { { INT8_C( 35), INT8_C( 68), -INT8_C( 53), INT8_C( 4), -INT8_C( 92), INT8_C( 124), -INT8_C( 77), -INT8_C( 126) }, { -INT8_C( 19), -INT8_C( 53), -INT8_C( 4), INT8_C( 67), INT8_C( 57), -INT8_C( 100), -INT8_C( 25), -INT8_C( 53) }, { INT8_C( 27), INT8_C( 60), -INT8_C( 25), -INT8_C( 32), -INT8_C( 75), INT8_C( 112), -INT8_C( 26), -INT8_C( 37) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vhsub_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); simde_int8x8_t r = simde_vhsub_s8(a, b); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vhsub_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { { { INT16_C( 12219), INT16_C( 2492), -INT16_C( 23827), INT16_C( 11865) }, { -INT16_C( 25382), -INT16_C( 21711), -INT16_C( 14204), -INT16_C( 22542) }, { INT16_C( 18800), INT16_C( 12101), -INT16_C( 4812), INT16_C( 17203) } }, { { -INT16_C( 16884), -INT16_C( 20309), INT16_C( 24122), INT16_C( 10291) }, { INT16_C( 12073), INT16_C( 25195), INT16_C( 21195), -INT16_C( 31187) }, { -INT16_C( 14479), -INT16_C( 22752), INT16_C( 1463), INT16_C( 20739) } }, { { -INT16_C( 5502), INT16_C( 28559), -INT16_C( 6004), INT16_C( 26270) }, { -INT16_C( 12412), INT16_C( 2066), INT16_C( 1175), -INT16_C( 23376) }, { INT16_C( 3455), INT16_C( 13246), -INT16_C( 3590), INT16_C( 24823) } }, { { INT16_C( 23490), -INT16_C( 684), -INT16_C( 30790), -INT16_C( 7387) }, { -INT16_C( 28490), -INT16_C( 32442), INT16_C( 29666), INT16_C( 25608) }, { INT16_C( 25990), INT16_C( 15879), -INT16_C( 30228), -INT16_C( 16498) } }, { { -INT16_C( 26787), -INT16_C( 5420), INT16_C( 29312), INT16_C( 1104) }, { INT16_C( 25153), -INT16_C( 10227), -INT16_C( 17049), INT16_C( 10620) }, { -INT16_C( 25970), INT16_C( 2403), INT16_C( 23180), -INT16_C( 4758) } }, { { -INT16_C( 12008), -INT16_C( 11738), INT16_C( 19288), INT16_C( 4022) }, { -INT16_C( 805), -INT16_C( 16752), -INT16_C( 26513), -INT16_C( 13022) }, { -INT16_C( 5602), INT16_C( 2507), INT16_C( 22900), INT16_C( 8522) } }, { { -INT16_C( 2512), -INT16_C( 20297), INT16_C( 1896), -INT16_C( 22092) }, { -INT16_C( 16022), -INT16_C( 11902), -INT16_C( 386), -INT16_C( 26630) }, { INT16_C( 6755), -INT16_C( 4198), INT16_C( 1141), INT16_C( 2269) } }, { { INT16_C( 8655), INT16_C( 10345), INT16_C( 8044), INT16_C( 18487) }, { -INT16_C( 14565), -INT16_C( 29946), INT16_C( 10336), -INT16_C( 28584) }, { INT16_C( 11610), INT16_C( 20145), -INT16_C( 1146), INT16_C( 23535) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_vhsub_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); simde_int16x4_t r = simde_vhsub_s16(a, b); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #if !defined(TEST_SIMDE_VABD_NO_TEST_32) static int test_simde_vhsub_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { { { -INT32_C( 2025844961), -INT32_C( 2144209898) }, { INT32_C( 877769654), -INT32_C( 2117382991) }, { -INT32_C( 1451807308), -INT32_C( 13413454) } }, { { -INT32_C( 643222163), INT32_C( 1881268308) }, { INT32_C( 133900199), INT32_C( 1872188240) }, { -INT32_C( 388561181), INT32_C( 4540034) } }, { { INT32_C( 2029442914), -INT32_C( 2097600564) }, { -INT32_C( 1934210342), INT32_C( 51217046) }, { INT32_C( 1981826628), -INT32_C( 1074408805) } }, { { INT32_C( 199079607), INT32_C( 1031536278) }, { INT32_C( 1984263718), INT32_C( 736484553) }, { -INT32_C( 892592056), INT32_C( 147525862) } }, { { -INT32_C( 2136679500), -INT32_C( 587031293) }, { INT32_C( 2120857831), -INT32_C( 243173830) }, { -INT32_C( 2128768666), -INT32_C( 171928732) } }, { { -INT32_C( 1023582676), -INT32_C( 2097121187) }, { -INT32_C( 1191623185), -INT32_C( 706421215) }, { INT32_C( 84020254), -INT32_C( 695349986) } }, { { -INT32_C( 1135245127), INT32_C( 211441445) }, { INT32_C( 1250558736), -INT32_C( 1506014086) }, { -INT32_C( 1192901932), INT32_C( 858727765) } }, { { -INT32_C( 949405334), -INT32_C( 1605736015) }, { -INT32_C( 816233554), -INT32_C( 609927903) }, { -INT32_C( 66585890), -INT32_C( 497904056) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_vhsub_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); simde_int32x2_t r = simde_vhsub_s32(a, b); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #endif static int test_simde_vhsub_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint8_t b[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C(197), UINT8_C(250), UINT8_C(151), UINT8_C(234), UINT8_C( 82), UINT8_C( 49), UINT8_C(246), UINT8_C( 98) }, { UINT8_C( 53), UINT8_C(129), UINT8_C(172), UINT8_C(175), UINT8_C(141), UINT8_C(232), UINT8_C( 85), UINT8_C(247) }, { UINT8_C( 72), UINT8_C( 60), UINT8_C(245), UINT8_C( 29), UINT8_C(226), UINT8_C(164), UINT8_C( 80), UINT8_C(181) } }, { { UINT8_C( 33), UINT8_C(190), UINT8_C(191), UINT8_C(211), UINT8_C( 39), UINT8_C( 9), UINT8_C(115), UINT8_C(213) }, { UINT8_C( 77), UINT8_C(204), UINT8_C(165), UINT8_C(110), UINT8_C( 9), UINT8_C( 74), UINT8_C( 73), UINT8_C(206) }, { UINT8_C(234), UINT8_C(249), UINT8_C( 13), UINT8_C( 50), UINT8_C( 15), UINT8_C(223), UINT8_C( 21), UINT8_C( 3) } }, { { UINT8_C( 68), UINT8_C(225), UINT8_C(184), UINT8_C(150), UINT8_C( 18), UINT8_C(175), UINT8_C(248), UINT8_C( 71) }, { UINT8_C( 48), UINT8_C(165), UINT8_C(246), UINT8_C(189), UINT8_C(141), UINT8_C( 76), UINT8_C(180), UINT8_C(175) }, { UINT8_C( 10), UINT8_C( 30), UINT8_C(225), UINT8_C(236), UINT8_C(194), UINT8_C( 49), UINT8_C( 34), UINT8_C(204) } }, { { UINT8_C( 10), UINT8_C(115), UINT8_C(130), UINT8_C( 50), UINT8_C(125), UINT8_C(245), UINT8_C( 7), UINT8_C(202) }, { UINT8_C(194), UINT8_C(172), UINT8_C( 56), UINT8_C(203), UINT8_C(246), UINT8_C(130), UINT8_C(154), UINT8_C( 59) }, { UINT8_C(164), UINT8_C(227), UINT8_C( 37), UINT8_C(179), UINT8_C(195), UINT8_C( 57), UINT8_C(182), UINT8_C( 71) } }, { { UINT8_C( 99), UINT8_C( 82), UINT8_C(209), UINT8_C(117), UINT8_C( 1), UINT8_C(202), UINT8_C(189), UINT8_C( 49) }, { UINT8_C(111), UINT8_C(179), UINT8_C(238), UINT8_C(252), UINT8_MAX, UINT8_C(163), UINT8_C(171), UINT8_C( 10) }, { UINT8_C(250), UINT8_C(207), UINT8_C(241), UINT8_C(188), UINT8_C(129), UINT8_C( 19), UINT8_C( 9), UINT8_C( 19) } }, { { UINT8_C( 22), UINT8_C( 45), UINT8_C( 60), UINT8_C(147), UINT8_C( 35), UINT8_C( 67), UINT8_C( 93), UINT8_C(229) }, { UINT8_C(240), UINT8_C(150), UINT8_C(176), UINT8_C(230), UINT8_C( 24), UINT8_C( 74), UINT8_C( 33), UINT8_C(123) }, { UINT8_C(147), UINT8_C(203), UINT8_C(198), UINT8_C(214), UINT8_C( 5), UINT8_C(252), UINT8_C( 30), UINT8_C( 53) } }, { { UINT8_C(157), UINT8_C(243), UINT8_C(240), UINT8_C(158), UINT8_C(189), UINT8_C(173), UINT8_C(208), UINT8_C( 44) }, { UINT8_C( 97), UINT8_C(190), UINT8_C( 40), UINT8_C( 96), UINT8_C( 97), UINT8_C(212), UINT8_C(106), UINT8_C(120) }, { UINT8_C( 30), UINT8_C( 26), UINT8_C(100), UINT8_C( 31), UINT8_C( 46), UINT8_C(236), UINT8_C( 51), UINT8_C(218) } }, { { UINT8_C( 1), UINT8_C(166), UINT8_C( 11), UINT8_C( 36), UINT8_C(234), UINT8_C(105), UINT8_C( 9), UINT8_C(218) }, { UINT8_MAX, UINT8_C(186), UINT8_C(192), UINT8_C( 23), UINT8_C( 4), UINT8_C(226), UINT8_C(146), UINT8_C(161) }, { UINT8_C(129), UINT8_C(246), UINT8_C(165), UINT8_C( 6), UINT8_C(115), UINT8_C(195), UINT8_C(187), UINT8_C( 28) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vhsub_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t r = simde_vhsub_u8(a, b); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vhsub_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(33493), UINT16_C(37440), UINT16_C( 4144), UINT16_C(37310) }, { UINT16_C(59086), UINT16_C(12529), UINT16_C(23738), UINT16_C(48296) }, { UINT16_C(52739), UINT16_C(12455), UINT16_C(55739), UINT16_C(60043) } }, { { UINT16_C(45826), UINT16_C(60640), UINT16_C(59932), UINT16_C( 7110) }, { UINT16_C(34724), UINT16_C(43058), UINT16_C(50281), UINT16_C(15946) }, { UINT16_C( 5551), UINT16_C( 8791), UINT16_C( 4825), UINT16_C(61118) } }, { { UINT16_C(35399), UINT16_C(30672), UINT16_C(36506), UINT16_C(26632) }, { UINT16_C(63860), UINT16_C(12184), UINT16_C(16469), UINT16_C(22763) }, { UINT16_C(51305), UINT16_C( 9244), UINT16_C(10018), UINT16_C( 1934) } }, { { UINT16_C(52212), UINT16_C( 4164), UINT16_C( 2997), UINT16_C(22828) }, { UINT16_C(24210), UINT16_C(64258), UINT16_C(19491), UINT16_C(27193) }, { UINT16_C(14001), UINT16_C(35489), UINT16_C(57289), UINT16_C(63353) } }, { { UINT16_C( 2518), UINT16_C(28897), UINT16_C(59799), UINT16_C( 3032) }, { UINT16_C(29154), UINT16_C(14394), UINT16_C( 9649), UINT16_C(42384) }, { UINT16_C(52218), UINT16_C( 7251), UINT16_C(25075), UINT16_C(45860) } }, { { UINT16_C(54513), UINT16_C(42678), UINT16_C(58079), UINT16_C(28928) }, { UINT16_C( 576), UINT16_C(25452), UINT16_C(42318), UINT16_C( 9421) }, { UINT16_C(26968), UINT16_C( 8613), UINT16_C( 7880), UINT16_C( 9753) } }, { { UINT16_C(44718), UINT16_C(17812), UINT16_C(27799), UINT16_C(31313) }, { UINT16_C(35805), UINT16_C(36786), UINT16_C(17073), UINT16_C(41524) }, { UINT16_C( 4456), UINT16_C(56049), UINT16_C( 5363), UINT16_C(60430) } }, { { UINT16_C(59926), UINT16_C(63048), UINT16_C(18636), UINT16_C( 3431) }, { UINT16_C(54346), UINT16_C(39024), UINT16_C(15993), UINT16_C(10428) }, { UINT16_C( 2790), UINT16_C(12012), UINT16_C( 1321), UINT16_C(62037) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vhsub_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t r = simde_vhsub_u16(a, b); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vhsub_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C(2221756652), UINT32_C(2600386237) }, { UINT32_C(4213813322), UINT32_C( 144531186) }, { UINT32_C(3298938961), UINT32_C(1227927525) } }, { { UINT32_C( 369026376), UINT32_C(2015520302) }, { UINT32_C(3004273210), UINT32_C(3185298896) }, { UINT32_C(2977343879), UINT32_C(3710077999) } }, { { UINT32_C(3678488862), UINT32_C(1366638343) }, { UINT32_C(3779895279), UINT32_C(1172957693) }, { UINT32_C(4244264087), UINT32_C( 96840325) } }, { { UINT32_C(4250593487), UINT32_C(2289400910) }, { UINT32_C(3745220111), UINT32_C(1922832212) }, { UINT32_C( 252686688), UINT32_C( 183284349) } }, { { UINT32_C(1733156192), UINT32_C( 196723228) }, { UINT32_C(1592526177), UINT32_C(3198473967) }, { UINT32_C( 70315007), UINT32_C(2794091926) } }, { { UINT32_C( 213647038), UINT32_C(2324967547) }, { UINT32_C( 191483831), UINT32_C(1182598886) }, { UINT32_C( 11081603), UINT32_C( 571184330) } }, { { UINT32_C( 11455203), UINT32_C(3993724812) }, { UINT32_C(1531770988), UINT32_C(2350510286) }, { UINT32_C(3534809403), UINT32_C( 821607263) } }, { { UINT32_C(1788400879), UINT32_C(3170118661) }, { UINT32_C(3804716539), UINT32_C(1193821283) }, { UINT32_C(3286809466), UINT32_C( 988148689) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vhsub_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t r = simde_vhsub_u32(a, b); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vhsubq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t b[16]; int8_t r[16]; } test_vec[] = { { { INT8_C( 14), -INT8_C( 42), INT8_C( 71), -INT8_C( 102), INT8_C( 61), INT8_C( 82), -INT8_C( 120), -INT8_C( 86), INT8_C( 74), -INT8_C( 43), INT8_C( 5), INT8_C( 24), -INT8_C( 59), INT8_C( 31), -INT8_C( 92), -INT8_C( 76) }, { -INT8_C( 13), INT8_C( 60), INT8_C( 30), -INT8_C( 8), INT8_C( 104), INT8_C( 18), -INT8_C( 76), INT8_C( 100), INT8_C( 112), INT8_C( 123), INT8_C( 70), -INT8_C( 45), -INT8_C( 65), INT8_C( 110), INT8_C( 26), -INT8_C( 51) }, { INT8_C( 13), -INT8_C( 51), INT8_C( 20), -INT8_C( 47), -INT8_C( 22), INT8_C( 32), -INT8_C( 22), -INT8_C( 93), -INT8_C( 19), -INT8_C( 83), -INT8_C( 33), INT8_C( 34), INT8_C( 3), -INT8_C( 40), -INT8_C( 59), -INT8_C( 13) } }, { { INT8_C( 69), INT8_C( 97), INT8_C( 104), -INT8_C( 126), -INT8_C( 76), -INT8_C( 16), INT8_C( 44), -INT8_C( 2), -INT8_C( 59), INT8_C( 50), INT8_C( 23), -INT8_C( 117), INT8_C( 81), -INT8_C( 69), INT8_C( 63), INT8_C( 68) }, { -INT8_C( 8), INT8_C( 94), INT8_C( 61), INT8_C( 96), INT8_C( 112), -INT8_C( 15), -INT8_C( 60), -INT8_C( 32), INT8_C( 109), INT8_C( 10), -INT8_C( 76), INT8_C( 44), INT8_C( 121), -INT8_C( 50), -INT8_C( 6), -INT8_C( 66) }, { INT8_C( 38), INT8_C( 1), INT8_C( 21), -INT8_C( 111), -INT8_C( 94), -INT8_C( 1), INT8_C( 52), INT8_C( 15), -INT8_C( 84), INT8_C( 20), INT8_C( 49), -INT8_C( 81), -INT8_C( 20), -INT8_C( 10), INT8_C( 34), INT8_C( 67) } }, { { INT8_C( 48), INT8_C( 98), INT8_C( 64), -INT8_C( 28), INT8_C( 82), INT8_C( 109), -INT8_C( 30), INT8_C( 24), -INT8_C( 97), -INT8_C( 7), -INT8_C( 93), -INT8_C( 16), -INT8_C( 75), -INT8_C( 30), INT8_C( 52), -INT8_C( 83) }, { INT8_C( 64), INT8_C( 113), INT8_C( 13), -INT8_C( 79), INT8_C( 99), -INT8_C( 46), -INT8_C( 111), -INT8_C( 48), -INT8_C( 36), INT8_C( 69), -INT8_C( 4), INT8_C( 85), INT8_C( 20), -INT8_C( 10), INT8_C( 19), INT8_C( 68) }, { -INT8_C( 8), -INT8_C( 8), INT8_C( 25), INT8_C( 25), -INT8_C( 9), INT8_C( 77), INT8_C( 40), INT8_C( 36), -INT8_C( 31), -INT8_C( 38), -INT8_C( 45), -INT8_C( 51), -INT8_C( 48), -INT8_C( 10), INT8_C( 16), -INT8_C( 76) } }, { { INT8_C( 88), INT8_C( 84), INT8_C( 40), -INT8_C( 85), -INT8_C( 63), INT8_C( 10), -INT8_C( 61), INT8_C( 96), INT8_C( 4), INT8_C( 102), INT8_C( 80), -INT8_C( 71), INT8_C( 72), -INT8_C( 124), INT8_C( 102), -INT8_C( 119) }, { -INT8_C( 10), INT8_C( 115), INT8_C( 58), INT8_C( 89), INT8_C( 69), -INT8_C( 53), INT8_C( 41), INT8_C( 34), INT8_C( 17), INT8_C( 37), INT8_C( 119), INT8_C( 37), INT8_C( 28), -INT8_C( 117), INT8_C( 105), INT8_C( 116) }, { INT8_C( 49), -INT8_C( 16), -INT8_C( 9), -INT8_C( 87), -INT8_C( 66), INT8_C( 31), -INT8_C( 51), INT8_C( 31), -INT8_C( 7), INT8_C( 32), -INT8_C( 20), -INT8_C( 54), INT8_C( 22), -INT8_C( 4), -INT8_C( 2), -INT8_C( 118) } }, { { -INT8_C( 33), -INT8_C( 111), INT8_C( 31), -INT8_C( 96), -INT8_C( 101), -INT8_C( 30), INT8_C( 0), -INT8_C( 97), INT8_C( 72), INT8_C( 80), INT8_C( 88), -INT8_C( 111), -INT8_C( 44), -INT8_C( 66), INT8_C( 26), -INT8_C( 54) }, { INT8_C( 50), INT8_C( 84), INT8_C( 35), INT8_C( 119), INT8_C( 31), INT8_C( 76), -INT8_C( 103), INT8_C( 48), INT8_C( 114), INT8_C( 17), INT8_C( 85), -INT8_C( 114), -INT8_C( 100), -INT8_C( 66), INT8_C( 2), INT8_C( 123) }, { -INT8_C( 42), -INT8_C( 98), -INT8_C( 2), -INT8_C( 108), -INT8_C( 66), -INT8_C( 53), INT8_C( 51), -INT8_C( 73), -INT8_C( 21), INT8_C( 31), INT8_C( 1), INT8_C( 1), INT8_C( 28), INT8_C( 0), INT8_C( 12), -INT8_C( 89) } }, { { INT8_C( 79), INT8_C( 34), INT8_C( 27), -INT8_C( 21), INT8_C( 4), INT8_C( 27), -INT8_C( 118), INT8_C( 77), INT8_C( 107), -INT8_C( 29), -INT8_C( 34), INT8_C( 63), -INT8_C( 95), -INT8_C( 8), INT8_C( 10), -INT8_C( 45) }, { INT8_C( 76), INT8_C( 45), INT8_C( 75), INT8_C( 107), INT8_C( 122), -INT8_C( 28), -INT8_C( 100), -INT8_C( 20), -INT8_C( 11), -INT8_C( 15), INT8_C( 122), -INT8_C( 111), -INT8_C( 80), INT8_C( 124), INT8_C( 12), -INT8_C( 1) }, { INT8_C( 1), -INT8_C( 6), -INT8_C( 24), -INT8_C( 64), -INT8_C( 59), INT8_C( 27), -INT8_C( 9), INT8_C( 48), INT8_C( 59), -INT8_C( 7), -INT8_C( 78), INT8_C( 87), -INT8_C( 8), -INT8_C( 66), -INT8_C( 1), -INT8_C( 22) } }, { { -INT8_C( 98), INT8_C( 39), -INT8_C( 22), -INT8_C( 93), INT8_C( 66), INT8_C( 117), -INT8_C( 16), -INT8_C( 83), INT8_C( 88), -INT8_C( 50), -INT8_C( 19), -INT8_C( 7), -INT8_C( 58), -INT8_C( 9), -INT8_C( 51), INT8_C( 18) }, { INT8_C( 36), INT8_C( 24), INT8_C( 125), -INT8_C( 98), -INT8_C( 4), INT8_C( 25), -INT8_C( 118), -INT8_C( 14), INT8_C( 11), INT8_C( 4), -INT8_C( 125), -INT8_C( 69), -INT8_C( 127), -INT8_C( 112), -INT8_C( 70), INT8_C( 31) }, { -INT8_C( 67), INT8_C( 7), -INT8_C( 74), INT8_C( 2), INT8_C( 35), INT8_C( 46), INT8_C( 51), -INT8_C( 35), INT8_C( 38), -INT8_C( 27), INT8_C( 53), INT8_C( 31), INT8_C( 34), INT8_C( 51), INT8_C( 9), -INT8_C( 7) } }, { { -INT8_C( 73), -INT8_C( 91), -INT8_C( 62), -INT8_C( 6), INT8_C( 26), -INT8_C( 78), -INT8_C( 89), INT8_C( 114), INT8_MIN, -INT8_C( 108), INT8_C( 107), INT8_C( 70), -INT8_C( 117), INT8_C( 56), INT8_C( 88), -INT8_C( 80) }, { INT8_C( 80), -INT8_C( 42), INT8_C( 78), INT8_C( 77), -INT8_C( 17), -INT8_C( 39), INT8_C( 63), -INT8_C( 6), -INT8_C( 35), -INT8_C( 62), -INT8_C( 75), INT8_C( 94), INT8_C( 82), INT8_C( 112), INT8_C( 126), INT8_C( 10) }, { -INT8_C( 77), -INT8_C( 25), -INT8_C( 70), -INT8_C( 42), INT8_C( 21), -INT8_C( 20), -INT8_C( 76), INT8_C( 60), -INT8_C( 47), -INT8_C( 23), INT8_C( 91), -INT8_C( 12), -INT8_C( 100), -INT8_C( 28), -INT8_C( 19), -INT8_C( 45) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r = simde_vhsubq_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); simde_int8x16_t r = simde_vhsubq_s8(a, b); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vhsubq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { INT16_C( 16405), INT16_C( 12036), -INT16_C( 21517), INT16_C( 29601), INT16_C( 3136), -INT16_C( 13382), INT16_C( 4677), -INT16_C( 27269) }, { -INT16_C( 13592), -INT16_C( 10014), INT16_C( 8611), -INT16_C( 32558), -INT16_C( 30492), INT16_C( 14047), INT16_C( 24056), INT16_C( 3392) }, { INT16_C( 14998), INT16_C( 11025), -INT16_C( 15064), INT16_C( 31079), INT16_C( 16814), -INT16_C( 13715), -INT16_C( 9690), -INT16_C( 15331) } }, { { INT16_C( 17565), -INT16_C( 28612), -INT16_C( 8720), INT16_C( 12292), -INT16_C( 16663), INT16_C( 12027), INT16_C( 30672), -INT16_C( 17980) }, { -INT16_C( 22975), -INT16_C( 7023), INT16_C( 25544), -INT16_C( 21404), INT16_C( 17387), -INT16_C( 7198), INT16_C( 9120), INT16_C( 16112) }, { INT16_C( 20270), -INT16_C( 10795), -INT16_C( 17132), INT16_C( 16848), -INT16_C( 17025), INT16_C( 9612), INT16_C( 10776), -INT16_C( 17046) } }, { { INT16_C( 11367), INT16_C( 22478), -INT16_C( 11767), -INT16_C( 3193), -INT16_C( 31856), INT16_C( 24865), -INT16_C( 6662), INT16_C( 15130) }, { -INT16_C( 21620), INT16_C( 21535), -INT16_C( 31986), -INT16_C( 1536), -INT16_C( 7481), INT16_C( 26589), -INT16_C( 12795), INT16_C( 28069) }, { INT16_C( 16493), INT16_C( 471), INT16_C( 10109), -INT16_C( 829), -INT16_C( 12188), -INT16_C( 862), INT16_C( 3066), -INT16_C( 6470) } }, { { INT16_C( 29946), INT16_C( 1220), INT16_C( 19526), -INT16_C( 10249), INT16_C( 6351), -INT16_C( 14024), INT16_C( 21246), -INT16_C( 30204) }, { INT16_C( 9213), INT16_C( 3038), -INT16_C( 8538), INT16_C( 27909), -INT16_C( 7232), -INT16_C( 14635), INT16_C( 31409), -INT16_C( 21709) }, { INT16_C( 10366), -INT16_C( 909), INT16_C( 14032), -INT16_C( 19079), INT16_C( 6791), INT16_C( 305), -INT16_C( 5082), -INT16_C( 4248) } }, { { -INT16_C( 2066), INT16_C( 13743), -INT16_C( 22973), INT16_C( 4620), INT16_C( 17599), -INT16_C( 16933), -INT16_C( 8298), -INT16_C( 27833) }, { INT16_C( 9474), -INT16_C( 22114), -INT16_C( 23549), -INT16_C( 15594), -INT16_C( 5241), INT16_C( 14473), -INT16_C( 17306), INT16_C( 21731) }, { -INT16_C( 5770), INT16_C( 17928), INT16_C( 288), INT16_C( 10107), INT16_C( 11420), -INT16_C( 15703), INT16_C( 4504), -INT16_C( 24782) } }, { { -INT16_C( 27724), -INT16_C( 2167), -INT16_C( 27335), -INT16_C( 2038), -INT16_C( 6695), INT16_C( 28597), -INT16_C( 827), -INT16_C( 14590) }, { -INT16_C( 24287), INT16_C( 9328), -INT16_C( 30907), -INT16_C( 13080), INT16_C( 29042), -INT16_C( 10236), -INT16_C( 6354), -INT16_C( 7635) }, { -INT16_C( 1719), -INT16_C( 5748), INT16_C( 1786), INT16_C( 5521), -INT16_C( 17869), INT16_C( 19416), INT16_C( 2763), -INT16_C( 3478) } }, { { -INT16_C( 18822), -INT16_C( 19239), -INT16_C( 7348), INT16_C( 9644), INT16_C( 25289), -INT16_C( 29035), -INT16_C( 26786), -INT16_C( 32683) }, { -INT16_C( 14792), INT16_C( 32164), -INT16_C( 29619), -INT16_C( 16567), INT16_C( 19966), INT16_C( 11416), -INT16_C( 15051), -INT16_C( 20722) }, { -INT16_C( 2015), -INT16_C( 25702), INT16_C( 11135), INT16_C( 13105), INT16_C( 2661), -INT16_C( 20226), -INT16_C( 5868), -INT16_C( 5981) } }, { { -INT16_C( 6277), -INT16_C( 14493), INT16_C( 4299), -INT16_C( 27411), -INT16_C( 32142), -INT16_C( 12254), INT16_C( 30489), INT16_C( 21072) }, { -INT16_C( 2755), -INT16_C( 30001), INT16_C( 6529), INT16_C( 32586), -INT16_C( 7578), -INT16_C( 25685), -INT16_C( 18009), INT16_C( 8779) }, { -INT16_C( 1761), INT16_C( 7754), -INT16_C( 1115), -INT16_C( 29999), -INT16_C( 12282), INT16_C( 6715), INT16_C( 24249), INT16_C( 6146) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_vhsubq_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); simde_int16x8_t r = simde_vhsubq_s16(a, b); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vhsubq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { INT32_C( 1827319457), INT32_C( 805361598), INT32_C( 1912676953), -INT32_C( 674999911) }, { -INT32_C( 933129146), INT32_C( 323464109), INT32_C( 883946381), INT32_C( 1297611436) }, { INT32_C( 1380224301), INT32_C( 240948744), INT32_C( 514365286), -INT32_C( 986305674) } }, { { INT32_C( 1740194216), INT32_C( 1905768728), INT32_C( 1977850075), INT32_C( 810330346) }, { -INT32_C( 369578692), -INT32_C( 419676071), -INT32_C( 551834829), INT32_C( 1311601317) }, { INT32_C( 1054886454), INT32_C( 1162722399), INT32_C( 1264842452), -INT32_C( 250635486) } }, { { -INT32_C( 877271373), INT32_C( 2067549344), -INT32_C( 806346779), INT32_C( 67058887) }, { INT32_C( 1139603690), INT32_C( 1797908792), INT32_C( 977945748), INT32_C( 1770551222) }, { -INT32_C( 1008437532), INT32_C( 134820276), -INT32_C( 892146264), -INT32_C( 851746168) } }, { { -INT32_C( 30130850), INT32_C( 1853452425), INT32_C( 1463642768), -INT32_C( 1873068634) }, { INT32_C( 1842562869), -INT32_C( 975635152), -INT32_C( 117497279), -INT32_C( 127826022) }, { -INT32_C( 936346860), INT32_C( 1414543788), INT32_C( 790570023), -INT32_C( 872621306) } }, { { INT32_C( 1308006084), -INT32_C( 1766035706), -INT32_C( 2131822119), INT32_C( 1796229430) }, { -INT32_C( 1042750320), INT32_C( 579252449), INT32_C( 1830454739), -INT32_C( 798655476) }, { INT32_C( 1175378202), -INT32_C( 1172644078), -INT32_C( 1981138429), INT32_C( 1297442453) } }, { { INT32_C( 404577042), -INT32_C( 1531979318), INT32_C( 153394643), INT32_C( 1987392742) }, { -INT32_C( 113816296), -INT32_C( 786645506), INT32_C( 1312699970), -INT32_C( 1004624974) }, { INT32_C( 259196669), -INT32_C( 372666906), -INT32_C( 579652664), INT32_C( 1496008858) } }, { { -INT32_C( 925025026), -INT32_C( 395539435), INT32_C( 267554857), -INT32_C( 578459707) }, { -INT32_C( 1294484044), -INT32_C( 1115425926), -INT32_C( 603209431), INT32_C( 1654663780) }, { INT32_C( 184729509), INT32_C( 359943245), INT32_C( 435382144), -INT32_C( 1116561744) } }, { { INT32_C( 2066447718), INT32_C( 845453065), -INT32_C( 314485208), INT32_C( 1909114813) }, { -INT32_C( 31153788), -INT32_C( 1094998124), -INT32_C( 845494423), INT32_C( 1462778609) }, { INT32_C( 1048800753), INT32_C( 970225594), INT32_C( 265504607), INT32_C( 223168102) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_vhsubq_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); simde_int32x4_t r = simde_vhsubq_s32(a, b); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vhsubq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(184), UINT8_C( 91), UINT8_C(210), UINT8_C(193), UINT8_C(242), UINT8_C( 54), UINT8_C(244), UINT8_C( 26), UINT8_C(140), UINT8_C( 53), UINT8_C( 7), UINT8_C( 73), UINT8_C(252), UINT8_C(210), UINT8_C(187), UINT8_C(128) }, { UINT8_C(115), UINT8_C(223), UINT8_C(127), UINT8_C( 8), UINT8_C(134), UINT8_C( 58), UINT8_C(198), UINT8_C(239), UINT8_C( 1), UINT8_C( 96), UINT8_C(189), UINT8_C(242), UINT8_C(154), UINT8_C(237), UINT8_C( 73), UINT8_C( 82) }, { UINT8_C( 34), UINT8_C(190), UINT8_C( 41), UINT8_C( 92), UINT8_C( 54), UINT8_C(254), UINT8_C( 23), UINT8_C(149), UINT8_C( 69), UINT8_C(234), UINT8_C(165), UINT8_C(171), UINT8_C( 49), UINT8_C(242), UINT8_C( 57), UINT8_C( 23) } }, { { UINT8_C( 72), UINT8_C( 28), UINT8_C( 20), UINT8_C( 58), UINT8_C( 82), UINT8_C( 8), UINT8_C( 85), UINT8_C(223), UINT8_C( 61), UINT8_C( 92), UINT8_C( 40), UINT8_C( 58), UINT8_C( 46), UINT8_C(227), UINT8_C(186), UINT8_C(162) }, { UINT8_C(194), UINT8_C( 57), UINT8_C(170), UINT8_C( 73), UINT8_C(116), UINT8_C(112), UINT8_C( 56), UINT8_C(117), UINT8_C(208), UINT8_C(245), UINT8_C(104), UINT8_C(106), UINT8_C(226), UINT8_C(177), UINT8_C(189), UINT8_C( 42) }, { UINT8_C(195), UINT8_C(241), UINT8_C(181), UINT8_C(248), UINT8_C(239), UINT8_C(204), UINT8_C( 14), UINT8_C( 53), UINT8_C(182), UINT8_C(179), UINT8_C(224), UINT8_C(232), UINT8_C(166), UINT8_C( 25), UINT8_C(254), UINT8_C( 60) } }, { { UINT8_C(205), UINT8_C(209), UINT8_C(101), UINT8_C( 32), UINT8_C(217), UINT8_C(186), UINT8_MAX, UINT8_C( 22), UINT8_C( 22), UINT8_C( 39), UINT8_C( 80), UINT8_C( 69), UINT8_C( 11), UINT8_C( 11), UINT8_C(231), UINT8_C(205) }, { UINT8_C( 68), UINT8_C(145), UINT8_C( 22), UINT8_C(184), UINT8_C( 1), UINT8_C( 79), UINT8_C( 46), UINT8_C(209), UINT8_C( 68), UINT8_C(150), UINT8_C( 59), UINT8_C( 39), UINT8_C( 71), UINT8_C(248), UINT8_C( 81), UINT8_C( 21) }, { UINT8_C( 68), UINT8_C( 32), UINT8_C( 39), UINT8_C(180), UINT8_C(108), UINT8_C( 53), UINT8_C(104), UINT8_C(162), UINT8_C(233), UINT8_C(200), UINT8_C( 10), UINT8_C( 15), UINT8_C(226), UINT8_C(137), UINT8_C( 75), UINT8_C( 92) } }, { { UINT8_C(201), UINT8_C(182), UINT8_C( 53), UINT8_C(162), UINT8_C(112), UINT8_C( 52), UINT8_C(185), UINT8_C(135), UINT8_C( 91), UINT8_C( 9), UINT8_C(204), UINT8_C(102), UINT8_C( 20), UINT8_C(179), UINT8_C( 52), UINT8_C( 89) }, { UINT8_C( 68), UINT8_C( 74), UINT8_C( 17), UINT8_C( 69), UINT8_C(153), UINT8_C( 63), UINT8_C( 22), UINT8_C(222), UINT8_C(213), UINT8_C( 81), UINT8_C( 5), UINT8_C( 29), UINT8_C( 74), UINT8_C( 86), UINT8_C( 50), UINT8_C( 19) }, { UINT8_C( 66), UINT8_C( 54), UINT8_C( 18), UINT8_C( 46), UINT8_C(235), UINT8_C(250), UINT8_C( 81), UINT8_C(212), UINT8_C(195), UINT8_C(220), UINT8_C( 99), UINT8_C( 36), UINT8_C(229), UINT8_C( 46), UINT8_C( 1), UINT8_C( 35) } }, { { UINT8_C( 13), UINT8_C(103), UINT8_C(182), UINT8_C(125), UINT8_C(155), UINT8_C(111), UINT8_C( 4), UINT8_C(246), UINT8_C(120), UINT8_C(208), UINT8_C( 93), UINT8_C(141), UINT8_C(131), UINT8_C(145), UINT8_C(230), UINT8_C(199) }, { UINT8_C(219), UINT8_C(247), UINT8_C( 12), UINT8_C(117), UINT8_C( 55), UINT8_C( 34), UINT8_C( 83), UINT8_C( 12), UINT8_C(116), UINT8_C( 88), UINT8_C( 41), UINT8_C(190), UINT8_C(174), UINT8_C( 91), UINT8_C(209), UINT8_C(187) }, { UINT8_C(153), UINT8_C(184), UINT8_C( 85), UINT8_C( 4), UINT8_C( 50), UINT8_C( 38), UINT8_C(216), UINT8_C(117), UINT8_C( 2), UINT8_C( 60), UINT8_C( 26), UINT8_C(231), UINT8_C(234), UINT8_C( 27), UINT8_C( 10), UINT8_C( 6) } }, { { UINT8_C(194), UINT8_C(135), UINT8_C( 57), UINT8_C( 93), UINT8_C(246), UINT8_C( 61), UINT8_C( 84), UINT8_C(111), UINT8_C( 14), UINT8_C(177), UINT8_C(252), UINT8_C(145), UINT8_C( 66), UINT8_C(226), UINT8_C( 89), UINT8_C( 29) }, { UINT8_C(217), UINT8_C(101), UINT8_C(146), UINT8_C( 16), UINT8_C(136), UINT8_C(229), UINT8_C( 29), UINT8_C(252), UINT8_C( 61), UINT8_C( 70), UINT8_C(186), UINT8_C(236), UINT8_C(162), UINT8_C(139), UINT8_C(167), UINT8_C(100) }, { UINT8_C(244), UINT8_C( 17), UINT8_C(211), UINT8_C( 38), UINT8_C( 55), UINT8_C(172), UINT8_C( 27), UINT8_C(185), UINT8_C(232), UINT8_C( 53), UINT8_C( 33), UINT8_C(210), UINT8_C(208), UINT8_C( 43), UINT8_C(217), UINT8_C(220) } }, { { UINT8_C( 19), UINT8_C(224), UINT8_C(194), UINT8_C( 9), UINT8_C( 30), UINT8_C( 22), UINT8_C(120), UINT8_C( 44), UINT8_C(199), UINT8_C(116), UINT8_C(189), UINT8_C( 9), UINT8_C( 86), UINT8_C( 22), UINT8_C( 38), UINT8_C( 48) }, { UINT8_C(124), UINT8_C(185), UINT8_C( 64), UINT8_C( 4), UINT8_C(158), UINT8_C( 93), UINT8_C( 0), UINT8_C(220), UINT8_C(164), UINT8_C(186), UINT8_C(200), UINT8_C( 70), UINT8_C( 69), UINT8_C(111), UINT8_C(170), UINT8_C( 88) }, { UINT8_C(203), UINT8_C( 19), UINT8_C( 65), UINT8_C( 2), UINT8_C(192), UINT8_C(220), UINT8_C( 60), UINT8_C(168), UINT8_C( 17), UINT8_C(221), UINT8_C(250), UINT8_C(225), UINT8_C( 8), UINT8_C(211), UINT8_C(190), UINT8_C(236) } }, { { UINT8_C( 80), UINT8_C(108), UINT8_C( 98), UINT8_C(110), UINT8_C(130), UINT8_C(218), UINT8_C(154), UINT8_C( 73), UINT8_C( 79), UINT8_C( 87), UINT8_C( 82), UINT8_C(165), UINT8_C(110), UINT8_C(121), UINT8_C(213), UINT8_C(234) }, { UINT8_C( 50), UINT8_C( 22), UINT8_C(238), UINT8_C(208), UINT8_C(115), UINT8_C(238), UINT8_C(172), UINT8_C( 23), UINT8_C(168), UINT8_C(116), UINT8_C( 93), UINT8_C(237), UINT8_C(228), UINT8_C( 8), UINT8_C( 70), UINT8_C( 52) }, { UINT8_C( 15), UINT8_C( 43), UINT8_C(186), UINT8_C(207), UINT8_C( 7), UINT8_C(246), UINT8_C(247), UINT8_C( 25), UINT8_C(211), UINT8_C(241), UINT8_C(250), UINT8_C(220), UINT8_C(197), UINT8_C( 56), UINT8_C( 71), UINT8_C( 91) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vhsubq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t r = simde_vhsubq_u8(a, b); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vhsubq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(43124), UINT16_C(63394), UINT16_C(15490), UINT16_C(53568), UINT16_C(37779), UINT16_C( 375), UINT16_C(19468), UINT16_C(16107) }, { UINT16_C(55650), UINT16_C(54798), UINT16_C(48071), UINT16_C(28653), UINT16_C(19247), UINT16_C( 4957), UINT16_C(41811), UINT16_C(51015) }, { UINT16_C(59273), UINT16_C( 4298), UINT16_C(49245), UINT16_C(12457), UINT16_C( 9266), UINT16_C(63245), UINT16_C(54364), UINT16_C(48082) } }, { { UINT16_C(59723), UINT16_C(52670), UINT16_C(65317), UINT16_C(47519), UINT16_C( 5778), UINT16_C(40634), UINT16_C(42594), UINT16_C(50652) }, { UINT16_C(60031), UINT16_C(18331), UINT16_C(34981), UINT16_C(54710), UINT16_C( 5075), UINT16_C( 9960), UINT16_C(12470), UINT16_C( 494) }, { UINT16_C(65382), UINT16_C(17169), UINT16_C(15168), UINT16_C(61940), UINT16_C( 351), UINT16_C(15337), UINT16_C(15062), UINT16_C(25079) } }, { { UINT16_C(44057), UINT16_C(16335), UINT16_C(28331), UINT16_C(15864), UINT16_C(45700), UINT16_C(59099), UINT16_C(46936), UINT16_C(55467) }, { UINT16_C(18082), UINT16_C(18207), UINT16_C(54735), UINT16_C(41500), UINT16_C( 1513), UINT16_C(40905), UINT16_C(46901), UINT16_C(20129) }, { UINT16_C(12987), UINT16_C(64600), UINT16_C(52334), UINT16_C(52718), UINT16_C(22093), UINT16_C( 9097), UINT16_C( 17), UINT16_C(17669) } }, { { UINT16_C(28771), UINT16_C( 3981), UINT16_C(34270), UINT16_C(25164), UINT16_C(10296), UINT16_C(36936), UINT16_C(62687), UINT16_C(33128) }, { UINT16_C(34618), UINT16_C( 2505), UINT16_C(58717), UINT16_C(18092), UINT16_C(30186), UINT16_C( 8165), UINT16_C(34348), UINT16_C(36718) }, { UINT16_C(62612), UINT16_C( 738), UINT16_C(53312), UINT16_C( 3536), UINT16_C(55591), UINT16_C(14385), UINT16_C(14169), UINT16_C(63741) } }, { { UINT16_C(64502), UINT16_C(54430), UINT16_C(60289), UINT16_C(47414), UINT16_C(32531), UINT16_C(62025), UINT16_C(45683), UINT16_C(44404) }, { UINT16_C(15673), UINT16_C(38583), UINT16_C(25378), UINT16_C( 3548), UINT16_C(49880), UINT16_C( 1068), UINT16_C(39496), UINT16_C(16275) }, { UINT16_C(24414), UINT16_C( 7923), UINT16_C(17455), UINT16_C(21933), UINT16_C(56861), UINT16_C(30478), UINT16_C( 3093), UINT16_C(14064) } }, { { UINT16_C(12950), UINT16_C( 5907), UINT16_C(18973), UINT16_C(12496), UINT16_C( 6601), UINT16_C(15394), UINT16_C(38603), UINT16_C( 1513) }, { UINT16_C(41171), UINT16_C(63131), UINT16_C(30723), UINT16_C(56067), UINT16_C(12090), UINT16_C(33503), UINT16_C(29642), UINT16_C(24769) }, { UINT16_C(51425), UINT16_C(36924), UINT16_C(59661), UINT16_C(43750), UINT16_C(62791), UINT16_C(56481), UINT16_C( 4480), UINT16_C(53908) } }, { { UINT16_C(54693), UINT16_C(49783), UINT16_C(18207), UINT16_C(59634), UINT16_C( 5216), UINT16_C(11300), UINT16_C( 3499), UINT16_C(32305) }, { UINT16_C(52398), UINT16_C(45428), UINT16_C(30532), UINT16_C(32397), UINT16_C(27815), UINT16_C(28929), UINT16_C(49887), UINT16_C(34001) }, { UINT16_C( 1147), UINT16_C( 2177), UINT16_C(59373), UINT16_C(13618), UINT16_C(54236), UINT16_C(56721), UINT16_C(42342), UINT16_C(64688) } }, { { UINT16_C(18583), UINT16_C(46662), UINT16_C(14479), UINT16_C(61342), UINT16_C(49741), UINT16_C(63515), UINT16_C(19664), UINT16_C(32374) }, { UINT16_C(60185), UINT16_C(23855), UINT16_C(48226), UINT16_C( 2524), UINT16_C(56617), UINT16_C( 2170), UINT16_C(19359), UINT16_C(14221) }, { UINT16_C(44735), UINT16_C(11403), UINT16_C(48662), UINT16_C(29409), UINT16_C(62098), UINT16_C(30672), UINT16_C( 152), UINT16_C( 9076) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vhsubq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t r = simde_vhsubq_u16(a, b); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vhsubq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C( 586011539), UINT32_C(1494387724), UINT32_C( 508636494), UINT32_C(2476525434) }, { UINT32_C( 368102578), UINT32_C(2971585672), UINT32_C(1236965801), UINT32_C(2021672932) }, { UINT32_C( 108954480), UINT32_C(3556368322), UINT32_C(3930802642), UINT32_C( 227426251) } }, { { UINT32_C( 647654682), UINT32_C(1216326905), UINT32_C(1416024282), UINT32_C(1256653720) }, { UINT32_C(1465898959), UINT32_C(1292467876), UINT32_C(4220961559), UINT32_C( 611522058) }, { UINT32_C(3885845157), UINT32_C(4256896810), UINT32_C(2892498657), UINT32_C( 322565831) } }, { { UINT32_C(2102070916), UINT32_C(2495990458), UINT32_C( 870853787), UINT32_C(4269657903) }, { UINT32_C(1263918503), UINT32_C(1922588251), UINT32_C( 728575777), UINT32_C(3377520965) }, { UINT32_C( 419076206), UINT32_C( 286701103), UINT32_C( 71139005), UINT32_C( 446068469) } }, { { UINT32_C(2840042479), UINT32_C( 4066405), UINT32_C(1731405368), UINT32_C(2640687606) }, { UINT32_C(3924343694), UINT32_C( 995852313), UINT32_C(4117153967), UINT32_C(2562635433) }, { UINT32_C(3752816688), UINT32_C(3799074342), UINT32_C(3102092996), UINT32_C( 39026086) } }, { { UINT32_C(3074557265), UINT32_C(1253539858), UINT32_C(2628971430), UINT32_C( 708384668) }, { UINT32_C(3960676818), UINT32_C(1361538722), UINT32_C(3762720054), UINT32_C(2507670852) }, { UINT32_C(3851907519), UINT32_C(4240967864), UINT32_C(3728092984), UINT32_C(3395324204) } }, { { UINT32_C( 474790410), UINT32_C(3781624890), UINT32_C(2340231663), UINT32_C( 62240560) }, { UINT32_C(2062534872), UINT32_C(1825314358), UINT32_C(3880522403), UINT32_C( 578667799) }, { UINT32_C(3501095065), UINT32_C( 978155266), UINT32_C(3524821926), UINT32_C(4036753676) } }, { { UINT32_C(3124676991), UINT32_C(3164317133), UINT32_C(4014414014), UINT32_C(2834496719) }, { UINT32_C(4196590020), UINT32_C(2590502647), UINT32_C( 411218689), UINT32_C(4164616056) }, { UINT32_C(3759010781), UINT32_C( 286907243), UINT32_C(1801597662), UINT32_C(3629907627) } }, { { UINT32_C(2528278984), UINT32_C(3713158430), UINT32_C( 902601317), UINT32_C(1541258902) }, { UINT32_C(2522218399), UINT32_C(4012948718), UINT32_C(3892818544), UINT32_C(2061517489) }, { UINT32_C( 3030292), UINT32_C(4145072152), UINT32_C(2799858682), UINT32_C(4034838002) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vhsubq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t r = simde_vhsubq_u32(a, b); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vhsub_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vhsub_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vhsub_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vhsub_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vhsub_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vhsub_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vhsubq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vhsubq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vhsubq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vhsubq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vhsubq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vhsubq_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/max.c000066400000000000000000001773441400333146700162700ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN max #include "test-neon.h" #include "../../../simde/arm/neon/max.h" static int test_simde_vmax_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; simde_float32 b[2]; simde_float32 r[2]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -464.60) }, { SIMDE_FLOAT32_C( 866.05), SIMDE_MATH_NANF }, { SIMDE_MATH_NANF, SIMDE_MATH_NANF } }, { { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 861.67) }, { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 861.67) }, { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 861.67) } }, #endif { { SIMDE_FLOAT32_C( 378.04), SIMDE_FLOAT32_C( -897.72) }, { SIMDE_FLOAT32_C( -584.86), SIMDE_FLOAT32_C( 922.34) }, { SIMDE_FLOAT32_C( 378.04), SIMDE_FLOAT32_C( 922.34) } }, { { SIMDE_FLOAT32_C( 169.18), SIMDE_FLOAT32_C( 164.66) }, { SIMDE_FLOAT32_C( 295.66), SIMDE_FLOAT32_C( -857.49) }, { SIMDE_FLOAT32_C( 295.66), SIMDE_FLOAT32_C( 164.66) } }, { { SIMDE_FLOAT32_C( -597.22), SIMDE_FLOAT32_C( -740.42) }, { SIMDE_FLOAT32_C( -439.12), SIMDE_FLOAT32_C( -673.24) }, { SIMDE_FLOAT32_C( -439.12), SIMDE_FLOAT32_C( -673.24) } }, { { SIMDE_FLOAT32_C( 693.53), SIMDE_FLOAT32_C( -114.27) }, { SIMDE_FLOAT32_C( 599.27), SIMDE_FLOAT32_C( 359.67) }, { SIMDE_FLOAT32_C( 693.53), SIMDE_FLOAT32_C( 359.67) } }, { { SIMDE_FLOAT32_C( -598.01), SIMDE_FLOAT32_C( -64.73) }, { SIMDE_FLOAT32_C( 384.43), SIMDE_FLOAT32_C( 446.35) }, { SIMDE_FLOAT32_C( 384.43), SIMDE_FLOAT32_C( 446.35) } }, { { SIMDE_FLOAT32_C( 449.76), SIMDE_FLOAT32_C( 326.28) }, { SIMDE_FLOAT32_C( 146.92), SIMDE_FLOAT32_C( -725.29) }, { SIMDE_FLOAT32_C( 449.76), SIMDE_FLOAT32_C( 326.28) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x2_t r = simde_vmax_f32(a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); simde_float32_t values[8 * 2 * sizeof(simde_float32x2_t)]; simde_test_arm_neon_random_f32x2_full(8, 2, values, -1000.0f, 1000.0f, SIMDE_TEST_VEC_FLOAT_NAN | SIMDE_TEST_VEC_FLOAT_EQUAL); for (size_t i = 0 ; i < 8 ; i++) { simde_float32x2_t a = simde_test_arm_neon_random_extract_f32x2(i, 2, 0, values); simde_float32x2_t b = simde_test_arm_neon_random_extract_f32x2(i, 2, 1, values); simde_float32x2_t r = simde_vmax_f32(a, b); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmax_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[1]; simde_float64 b[1]; simde_float64 r[1]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NAN }, { SIMDE_FLOAT64_C( 170.12) }, { SIMDE_MATH_NAN } }, { { SIMDE_FLOAT64_C( -24.73) }, { SIMDE_MATH_NAN }, { SIMDE_MATH_NAN } }, { { SIMDE_MATH_NAN }, { SIMDE_MATH_NAN }, { SIMDE_MATH_NAN } }, #endif { { SIMDE_FLOAT64_C( -928.88) }, { SIMDE_FLOAT64_C( -928.88) }, { SIMDE_FLOAT64_C( -928.88) } }, { { SIMDE_FLOAT64_C( 148.48) }, { SIMDE_FLOAT64_C( -665.54) }, { SIMDE_FLOAT64_C( 148.48) } }, { { SIMDE_FLOAT64_C( 367.73) }, { SIMDE_FLOAT64_C( -686.85) }, { SIMDE_FLOAT64_C( 367.73) } }, { { SIMDE_FLOAT64_C( 630.12) }, { SIMDE_FLOAT64_C( 510.24) }, { SIMDE_FLOAT64_C( 630.12) } }, { { SIMDE_FLOAT64_C( -284.08) }, { SIMDE_FLOAT64_C( 889.70) }, { SIMDE_FLOAT64_C( 889.70) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_t b = simde_vld1_f64(test_vec[i].b); simde_float64x1_t r = simde_vmax_f64(a, b); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); simde_float64_t values[8 * 2 * sizeof(simde_float64x1_t)]; simde_test_arm_neon_random_f64x1_full(8, 2, values, -1000.0, 1000.0, SIMDE_TEST_VEC_FLOAT_NAN | SIMDE_TEST_VEC_FLOAT_EQUAL); for (size_t i = 0 ; i < 8 ; i++) { simde_float64x1_t a = simde_test_arm_neon_random_extract_f64x1(i, 2, 0, values); simde_float64x1_t b = simde_test_arm_neon_random_extract_f64x1(i, 2, 1, values); simde_float64x1_t r = simde_vmax_f64(a, b); simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmax_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int8_t b[8]; int8_t r[8]; } test_vec[] = { { { -INT8_C( 87), INT8_C( 87), INT8_C( 74), -INT8_C( 102), -INT8_C( 19), -INT8_C( 31), INT8_C( 113), INT8_C( 38) }, { INT8_C( 75), -INT8_C( 62), INT8_C( 18), -INT8_C( 21), -INT8_C( 114), -INT8_C( 14), INT8_C( 47), -INT8_C( 51) }, { INT8_C( 75), INT8_C( 87), INT8_C( 74), -INT8_C( 21), -INT8_C( 19), -INT8_C( 14), INT8_C( 113), INT8_C( 38) } }, { { INT8_C( 14), INT8_C( 106), INT8_C( 19), -INT8_C( 11), -INT8_C( 35), -INT8_C( 42), INT8_C( 78), INT8_C( 120) }, { -INT8_C( 88), -INT8_C( 117), INT8_C( 125), INT8_C( 123), INT8_C( 11), INT8_C( 49), INT8_C( 16), -INT8_C( 75) }, { INT8_C( 14), INT8_C( 106), INT8_C( 125), INT8_C( 123), INT8_C( 11), INT8_C( 49), INT8_C( 78), INT8_C( 120) } }, { { -INT8_C( 120), INT8_C( 90), INT8_C( 79), INT8_C( 118), INT8_C( 59), -INT8_C( 63), -INT8_C( 100), -INT8_C( 122) }, { -INT8_C( 125), -INT8_C( 82), INT8_C( 113), INT8_C( 17), -INT8_C( 95), -INT8_C( 95), -INT8_C( 34), -INT8_C( 81) }, { -INT8_C( 120), INT8_C( 90), INT8_C( 113), INT8_C( 118), INT8_C( 59), -INT8_C( 63), -INT8_C( 34), -INT8_C( 81) } }, { { INT8_C( 11), -INT8_C( 15), -INT8_C( 91), -INT8_C( 24), -INT8_C( 56), -INT8_C( 13), INT8_C( 97), INT8_C( 112) }, { INT8_C( 126), -INT8_C( 34), -INT8_C( 20), -INT8_C( 119), INT8_C( 15), -INT8_C( 4), INT8_C( 62), -INT8_C( 104) }, { INT8_C( 126), -INT8_C( 15), -INT8_C( 20), -INT8_C( 24), INT8_C( 15), -INT8_C( 4), INT8_C( 97), INT8_C( 112) } }, { { INT8_C( 86), -INT8_C( 114), INT8_C( 14), -INT8_C( 110), INT8_C( 79), -INT8_C( 86), INT8_C( 24), -INT8_C( 46) }, { INT8_C( 88), -INT8_C( 118), -INT8_C( 29), -INT8_C( 7), INT8_C( 43), -INT8_C( 63), -INT8_C( 87), INT8_C( 54) }, { INT8_C( 88), -INT8_C( 114), INT8_C( 14), -INT8_C( 7), INT8_C( 79), -INT8_C( 63), INT8_C( 24), INT8_C( 54) } }, { { -INT8_C( 77), INT8_C( 78), INT8_C( 30), INT8_C( 123), INT8_C( 65), INT8_MAX, -INT8_C( 21), -INT8_C( 65) }, { INT8_C( 94), -INT8_C( 41), INT8_C( 72), INT8_C( 109), -INT8_C( 45), -INT8_C( 121), INT8_C( 5), INT8_C( 42) }, { INT8_C( 94), INT8_C( 78), INT8_C( 72), INT8_C( 123), INT8_C( 65), INT8_MAX, INT8_C( 5), INT8_C( 42) } }, { { INT8_C( 21), INT8_C( 19), -INT8_C( 68), INT8_C( 100), -INT8_C( 67), -INT8_C( 44), INT8_C( 54), INT8_C( 22) }, { INT8_C( 94), INT8_C( 25), INT8_C( 15), -INT8_C( 119), -INT8_C( 38), -INT8_C( 72), -INT8_C( 65), -INT8_C( 115) }, { INT8_C( 94), INT8_C( 25), INT8_C( 15), INT8_C( 100), -INT8_C( 38), -INT8_C( 44), INT8_C( 54), INT8_C( 22) } }, { { INT8_C( 6), -INT8_C( 34), INT8_C( 8), INT8_C( 71), INT8_C( 93), -INT8_C( 12), INT8_C( 6), -INT8_C( 69) }, { -INT8_C( 53), INT8_C( 79), INT8_C( 41), -INT8_C( 97), -INT8_C( 42), INT8_C( 46), -INT8_C( 55), -INT8_C( 21) }, { INT8_C( 6), INT8_C( 79), INT8_C( 41), INT8_C( 71), INT8_C( 93), INT8_C( 46), INT8_C( 6), -INT8_C( 21) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vmax_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; } static int test_simde_vmax_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { { { INT16_C( 32420), INT16_C( 25491), INT16_C( 4717), INT16_C( 28329) }, { -INT16_C( 3837), -INT16_C( 16305), -INT16_C( 29343), INT16_C( 25906) }, { INT16_C( 32420), INT16_C( 25491), INT16_C( 4717), INT16_C( 28329) } }, { { -INT16_C( 16284), -INT16_C( 26374), -INT16_C( 27696), -INT16_C( 10953) }, { INT16_C( 8018), INT16_C( 12726), INT16_C( 25845), -INT16_C( 25884) }, { INT16_C( 8018), INT16_C( 12726), INT16_C( 25845), -INT16_C( 10953) } }, { { INT16_C( 30946), INT16_C( 20477), -INT16_C( 22646), -INT16_C( 29251) }, { INT16_C( 3480), -INT16_C( 1715), INT16_C( 32666), -INT16_C( 418) }, { INT16_C( 30946), INT16_C( 20477), INT16_C( 32666), -INT16_C( 418) } }, { { INT16_C( 22591), INT16_C( 3990), -INT16_C( 12820), INT16_C( 16100) }, { -INT16_C( 25875), -INT16_C( 7568), INT16_C( 21758), -INT16_C( 8068) }, { INT16_C( 22591), INT16_C( 3990), INT16_C( 21758), INT16_C( 16100) } }, { { INT16_C( 31436), INT16_C( 22063), -INT16_C( 4831), -INT16_C( 17949) }, { INT16_C( 12538), -INT16_C( 27469), INT16_C( 4527), -INT16_C( 4462) }, { INT16_C( 31436), INT16_C( 22063), INT16_C( 4527), -INT16_C( 4462) } }, { { INT16_C( 10602), INT16_C( 22269), -INT16_C( 7434), -INT16_C( 7276) }, { INT16_C( 1148), INT16_C( 31686), INT16_C( 16985), INT16_C( 9563) }, { INT16_C( 10602), INT16_C( 31686), INT16_C( 16985), INT16_C( 9563) } }, { { -INT16_C( 29764), -INT16_C( 8836), INT16_C( 24440), INT16_C( 29335) }, { INT16_C( 19088), INT16_C( 16134), -INT16_C( 26533), -INT16_C( 15058) }, { INT16_C( 19088), INT16_C( 16134), INT16_C( 24440), INT16_C( 29335) } }, { { INT16_C( 11201), -INT16_C( 18405), -INT16_C( 20467), -INT16_C( 30053) }, { INT16_C( 25012), INT16_C( 3333), INT16_C( 24740), INT16_C( 24627) }, { INT16_C( 25012), INT16_C( 3333), INT16_C( 24740), INT16_C( 24627) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_vmax_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; } static int test_simde_vmax_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { { { -INT32_C( 2127505976), -INT32_C( 1129472258) }, { INT32_C( 1899583289), -INT32_C( 1846142889) }, { INT32_C( 1899583289), -INT32_C( 1129472258) } }, { { -INT32_C( 157896609), INT32_C( 175452357) }, { -INT32_C( 1933110609), -INT32_C( 339752413) }, { -INT32_C( 157896609), INT32_C( 175452357) } }, { { -INT32_C( 1637027936), -INT32_C( 950330994) }, { -INT32_C( 935816080), INT32_C( 190394028) }, { -INT32_C( 935816080), INT32_C( 190394028) } }, { { -INT32_C( 1543376929), -INT32_C( 810649825) }, { -INT32_C( 1386515063), -INT32_C( 526902720) }, { -INT32_C( 1386515063), -INT32_C( 526902720) } }, { { -INT32_C( 1736571638), -INT32_C( 1889543906) }, { INT32_C( 425171053), -INT32_C( 1524322106) }, { INT32_C( 425171053), -INT32_C( 1524322106) } }, { { -INT32_C( 1102436705), INT32_C( 646838429) }, { -INT32_C( 1361844114), INT32_C( 227437571) }, { -INT32_C( 1102436705), INT32_C( 646838429) } }, { { -INT32_C( 1885008783), INT32_C( 1394476262) }, { INT32_C( 1668117916), -INT32_C( 1006071515) }, { INT32_C( 1668117916), INT32_C( 1394476262) } }, { { INT32_C( 1417892535), -INT32_C( 1183117237) }, { -INT32_C( 60338439), INT32_C( 722073018) }, { INT32_C( 1417892535), INT32_C( 722073018) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_vmax_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; } static int test_simde_x_vmax_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[1]; int64_t b[1]; int64_t r[1]; } test_vec[] = { { { INT64_C( 5432915600686277344) }, { -INT64_C( 1302158069541595515) }, { INT64_C( 5432915600686277344) } }, { { -INT64_C( 8056204626967572500) }, { -INT64_C( 512464674649944311) }, { -INT64_C( 512464674649944311) } }, { { -INT64_C( 7638502919520552514) }, { INT64_C( 5205046660914056749) }, { INT64_C( 5205046660914056749) } }, { { -INT64_C( 6954184997585290663) }, { INT64_C( 2640365464899947157) }, { INT64_C( 2640365464899947157) } }, { { -INT64_C( 7380040324739574033) }, { INT64_C( 1515221292220461410) }, { INT64_C( 1515221292220461410) } }, { { -INT64_C( 768568108066366945) }, { INT64_C( 5481802540706293641) }, { INT64_C( 5481802540706293641) } }, { { INT64_C( 2315660267356940572) }, { INT64_C( 5648611448497048209) }, { INT64_C( 5648611448497048209) } }, { { INT64_C( 7914330759719314816) }, { -INT64_C( 2744258162732533952) }, { INT64_C( 7914330759719314816) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_int64x1_t r = simde_x_vmax_s64(a, b); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_t a = simde_test_arm_neon_random_i64x1(); simde_int64x1_t b = simde_test_arm_neon_random_i64x1(); simde_int64x1_t r = simde_x_vmax_s64(a, b); simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmax_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; uint8_t b[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C( 34), UINT8_C(239), UINT8_C(244), UINT8_C(249), UINT8_C( 28), UINT8_C(126), UINT8_C( 40), UINT8_C(103) }, { UINT8_C( 7), UINT8_C(201), UINT8_C( 66), UINT8_C(199), UINT8_C(156), UINT8_C( 23), UINT8_C(126), UINT8_C(154) }, { UINT8_C( 34), UINT8_C(239), UINT8_C(244), UINT8_C(249), UINT8_C(156), UINT8_C(126), UINT8_C(126), UINT8_C(154) } }, { { UINT8_C(161), UINT8_C(235), UINT8_C(175), UINT8_C( 89), UINT8_C( 96), UINT8_C(166), UINT8_C( 15), UINT8_C( 57) }, { UINT8_C( 24), UINT8_C(173), UINT8_C(182), UINT8_C(141), UINT8_C(121), UINT8_C( 9), UINT8_C(153), UINT8_C(156) }, { UINT8_C(161), UINT8_C(235), UINT8_C(182), UINT8_C(141), UINT8_C(121), UINT8_C(166), UINT8_C(153), UINT8_C(156) } }, { { UINT8_C(248), UINT8_C(142), UINT8_C(149), UINT8_C( 21), UINT8_C( 12), UINT8_C(189), UINT8_C(124), UINT8_C( 19) }, { UINT8_C(134), UINT8_C(190), UINT8_C(218), UINT8_C( 35), UINT8_C(213), UINT8_C( 88), UINT8_C(189), UINT8_C(118) }, { UINT8_C(248), UINT8_C(190), UINT8_C(218), UINT8_C( 35), UINT8_C(213), UINT8_C(189), UINT8_C(189), UINT8_C(118) } }, { { UINT8_C( 67), UINT8_C(108), UINT8_C(207), UINT8_C(164), UINT8_C( 19), UINT8_C(222), UINT8_C(221), UINT8_C( 43) }, { UINT8_C(139), UINT8_C(147), UINT8_C(185), UINT8_C( 5), UINT8_C(156), UINT8_C( 82), UINT8_C(161), UINT8_C(148) }, { UINT8_C(139), UINT8_C(147), UINT8_C(207), UINT8_C(164), UINT8_C(156), UINT8_C(222), UINT8_C(221), UINT8_C(148) } }, { { UINT8_C(224), UINT8_C( 54), UINT8_C(169), UINT8_C(237), UINT8_C(243), UINT8_C( 37), UINT8_C( 0), UINT8_C(122) }, { UINT8_C(227), UINT8_C(219), UINT8_C(157), UINT8_C(184), UINT8_C( 51), UINT8_C( 90), UINT8_C( 47), UINT8_C(119) }, { UINT8_C(227), UINT8_C(219), UINT8_C(169), UINT8_C(237), UINT8_C(243), UINT8_C( 90), UINT8_C( 47), UINT8_C(122) } }, { { UINT8_C(199), UINT8_C(254), UINT8_C( 27), UINT8_C(218), UINT8_C(221), UINT8_C(248), UINT8_C( 5), UINT8_C(104) }, { UINT8_C(139), UINT8_C(190), UINT8_C(109), UINT8_C( 39), UINT8_C( 17), UINT8_C( 14), UINT8_C(187), UINT8_C(241) }, { UINT8_C(199), UINT8_C(254), UINT8_C(109), UINT8_C(218), UINT8_C(221), UINT8_C(248), UINT8_C(187), UINT8_C(241) } }, { { UINT8_C( 68), UINT8_C(101), UINT8_C(222), UINT8_C( 56), UINT8_C(138), UINT8_C(223), UINT8_C(178), UINT8_C(110) }, { UINT8_C(186), UINT8_C( 79), UINT8_C( 38), UINT8_C(237), UINT8_C(169), UINT8_C( 85), UINT8_C(100), UINT8_C(112) }, { UINT8_C(186), UINT8_C(101), UINT8_C(222), UINT8_C(237), UINT8_C(169), UINT8_C(223), UINT8_C(178), UINT8_C(112) } }, { { UINT8_C( 84), UINT8_C(127), UINT8_C( 74), UINT8_C( 49), UINT8_C(119), UINT8_C( 80), UINT8_C(153), UINT8_C( 2) }, { UINT8_C( 14), UINT8_C( 7), UINT8_C( 41), UINT8_C( 31), UINT8_C( 21), UINT8_C(229), UINT8_C( 17), UINT8_C( 90) }, { UINT8_C( 84), UINT8_C(127), UINT8_C( 74), UINT8_C( 49), UINT8_C(119), UINT8_C(229), UINT8_C(153), UINT8_C( 90) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vmax_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; } static int test_simde_vmax_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(24011), UINT16_C(62465), UINT16_C(60258), UINT16_C(57428) }, { UINT16_C(26316), UINT16_C(33072), UINT16_C(26869), UINT16_C(59293) }, { UINT16_C(26316), UINT16_C(62465), UINT16_C(60258), UINT16_C(59293) } }, { { UINT16_C(20007), UINT16_C(36086), UINT16_C( 9334), UINT16_C(55614) }, { UINT16_C(12969), UINT16_C(14745), UINT16_C(22678), UINT16_C(24990) }, { UINT16_C(20007), UINT16_C(36086), UINT16_C(22678), UINT16_C(55614) } }, { { UINT16_C(40885), UINT16_C( 5974), UINT16_C(43658), UINT16_C(22263) }, { UINT16_C(10000), UINT16_C( 1495), UINT16_C(30096), UINT16_C(47084) }, { UINT16_C(40885), UINT16_C( 5974), UINT16_C(43658), UINT16_C(47084) } }, { { UINT16_C(58051), UINT16_C(14660), UINT16_C(33286), UINT16_C(45074) }, { UINT16_C(44213), UINT16_C(19433), UINT16_C(34564), UINT16_C(47533) }, { UINT16_C(58051), UINT16_C(19433), UINT16_C(34564), UINT16_C(47533) } }, { { UINT16_C( 806), UINT16_C(45265), UINT16_C(51373), UINT16_C(48390) }, { UINT16_C(57072), UINT16_C(32962), UINT16_C(44883), UINT16_C( 5687) }, { UINT16_C(57072), UINT16_C(45265), UINT16_C(51373), UINT16_C(48390) } }, { { UINT16_C(31633), UINT16_C(38991), UINT16_C(25086), UINT16_C(45896) }, { UINT16_C(12557), UINT16_C( 4606), UINT16_C(43961), UINT16_C(57291) }, { UINT16_C(31633), UINT16_C(38991), UINT16_C(43961), UINT16_C(57291) } }, { { UINT16_C(40110), UINT16_C(23440), UINT16_C(38500), UINT16_C(21528) }, { UINT16_C(56180), UINT16_C(51156), UINT16_C( 3210), UINT16_C( 7133) }, { UINT16_C(56180), UINT16_C(51156), UINT16_C(38500), UINT16_C(21528) } }, { { UINT16_C(11399), UINT16_C(34227), UINT16_C(64398), UINT16_C(39736) }, { UINT16_C(14125), UINT16_C(59053), UINT16_C(30946), UINT16_C(37317) }, { UINT16_C(14125), UINT16_C(59053), UINT16_C(64398), UINT16_C(39736) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vmax_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; } static int test_simde_vmax_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C(1870064833), UINT32_C( 854381305) }, { UINT32_C(3121368286), UINT32_C(1157781054) }, { UINT32_C(3121368286), UINT32_C(1157781054) } }, { { UINT32_C( 440216721), UINT32_C(2807037766) }, { UINT32_C(4141240393), UINT32_C(1858046637) }, { UINT32_C(4141240393), UINT32_C(2807037766) } }, { { UINT32_C(1893611126), UINT32_C(3886205448) }, { UINT32_C(1419882006), UINT32_C(2526716676) }, { UINT32_C(1893611126), UINT32_C(3886205448) } }, { { UINT32_C( 363911119), UINT32_C( 549257175) }, { UINT32_C(4229403471), UINT32_C(2473252381) }, { UINT32_C(4229403471), UINT32_C(2473252381) } }, { { UINT32_C( 352536588), UINT32_C( 704423187) }, { UINT32_C(1484692819), UINT32_C( 284039233) }, { UINT32_C(1484692819), UINT32_C( 704423187) } }, { { UINT32_C(3324419823), UINT32_C(3974619805) }, { UINT32_C(2464743029), UINT32_C(3777385172) }, { UINT32_C(3324419823), UINT32_C(3974619805) } }, { { UINT32_C(2935368091), UINT32_C( 584577743) }, { UINT32_C(3497678223), UINT32_C(1575053421) }, { UINT32_C(3497678223), UINT32_C(1575053421) } }, { { UINT32_C(2736981766), UINT32_C(1603209961) }, { UINT32_C(3723589384), UINT32_C(1706956746) }, { UINT32_C(3723589384), UINT32_C(1706956746) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vmax_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; } static int test_simde_x_vmax_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[1]; uint64_t b[1]; uint64_t r[1]; } test_vec[] = { { { UINT64_C(10652172966900850920) }, { UINT64_C(17457258446329908764) }, { UINT64_C(17457258446329908764) } }, { { UINT64_C( 277750937440637858) }, { UINT64_C( 8434053037527416126) }, { UINT64_C( 8434053037527416126) } }, { { UINT64_C( 7645278514851557286) }, { UINT64_C( 1012231759559378954) }, { UINT64_C( 7645278514851557286) } }, { { UINT64_C( 4432513999474080383) }, { UINT64_C(10354795792364217264) }, { UINT64_C(10354795792364217264) } }, { { UINT64_C(16178671308015753934) }, { UINT64_C( 1689420061799509495) }, { UINT64_C(16178671308015753934) } }, { { UINT64_C( 3350145401873763137) }, { UINT64_C( 9203139584767758911) }, { UINT64_C( 9203139584767758911) } }, { { UINT64_C(17523543517649685881) }, { UINT64_C( 8232238381830449806) }, { UINT64_C(17523543517649685881) } }, { { UINT64_C( 3566904722205103828) }, { UINT64_C( 9471933180020386626) }, { UINT64_C( 9471933180020386626) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_t b = simde_vld1_u64(test_vec[i].b); simde_uint64x1_t r = simde_x_vmax_u64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x1_t a = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t b = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t r = simde_x_vmax_u64(a, b); simde_test_arm_neon_write_u64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmaxq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; simde_float32 b[4]; simde_float32 r[4]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -785.97), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -762.23) }, { SIMDE_FLOAT32_C( 56.78), SIMDE_MATH_NANF, SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -762.23) }, { SIMDE_MATH_NANF, SIMDE_MATH_NANF, SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -762.23) } }, #endif { { SIMDE_FLOAT32_C( -535.38), SIMDE_FLOAT32_C( 845.15), SIMDE_FLOAT32_C( 396.86), SIMDE_FLOAT32_C( 355.47) }, { SIMDE_FLOAT32_C( -18.38), SIMDE_FLOAT32_C( 855.51), SIMDE_FLOAT32_C( 739.43), SIMDE_FLOAT32_C( -523.81) }, { SIMDE_FLOAT32_C( -18.38), SIMDE_FLOAT32_C( 855.51), SIMDE_FLOAT32_C( 739.43), SIMDE_FLOAT32_C( 355.47) } }, { { SIMDE_FLOAT32_C( -446.27), SIMDE_FLOAT32_C( -422.02), SIMDE_FLOAT32_C( 105.64), SIMDE_FLOAT32_C( -885.46) }, { SIMDE_FLOAT32_C( -799.73), SIMDE_FLOAT32_C( -894.49), SIMDE_FLOAT32_C( 406.48), SIMDE_FLOAT32_C( 471.13) }, { SIMDE_FLOAT32_C( -446.27), SIMDE_FLOAT32_C( -422.02), SIMDE_FLOAT32_C( 406.48), SIMDE_FLOAT32_C( 471.13) } }, { { SIMDE_FLOAT32_C( -440.41), SIMDE_FLOAT32_C( -725.47), SIMDE_FLOAT32_C( -111.15), SIMDE_FLOAT32_C( 220.63) }, { SIMDE_FLOAT32_C( 187.92), SIMDE_FLOAT32_C( -3.17), SIMDE_FLOAT32_C( -134.93), SIMDE_FLOAT32_C( 247.43) }, { SIMDE_FLOAT32_C( 187.92), SIMDE_FLOAT32_C( -3.17), SIMDE_FLOAT32_C( -111.15), SIMDE_FLOAT32_C( 247.43) } }, { { SIMDE_FLOAT32_C( 210.87), SIMDE_FLOAT32_C( -846.16), SIMDE_FLOAT32_C( 468.46), SIMDE_FLOAT32_C( -732.35) }, { SIMDE_FLOAT32_C( -405.02), SIMDE_FLOAT32_C( -75.46), SIMDE_FLOAT32_C( 125.81), SIMDE_FLOAT32_C( 59.60) }, { SIMDE_FLOAT32_C( 210.87), SIMDE_FLOAT32_C( -75.46), SIMDE_FLOAT32_C( 468.46), SIMDE_FLOAT32_C( 59.60) } }, { { SIMDE_FLOAT32_C( -230.31), SIMDE_FLOAT32_C( -477.34), SIMDE_FLOAT32_C( -584.93), SIMDE_FLOAT32_C( 751.31) }, { SIMDE_FLOAT32_C( -621.83), SIMDE_FLOAT32_C( -845.50), SIMDE_FLOAT32_C( -772.50), SIMDE_FLOAT32_C( -68.10) }, { SIMDE_FLOAT32_C( -230.31), SIMDE_FLOAT32_C( -477.34), SIMDE_FLOAT32_C( -584.93), SIMDE_FLOAT32_C( 751.31) } }, { { SIMDE_FLOAT32_C( -267.52), SIMDE_FLOAT32_C( 333.13), SIMDE_FLOAT32_C( 46.43), SIMDE_FLOAT32_C( -67.25) }, { SIMDE_FLOAT32_C( 438.64), SIMDE_FLOAT32_C( -547.09), SIMDE_FLOAT32_C( -596.12), SIMDE_FLOAT32_C( 998.23) }, { SIMDE_FLOAT32_C( 438.64), SIMDE_FLOAT32_C( 333.13), SIMDE_FLOAT32_C( 46.43), SIMDE_FLOAT32_C( 998.23) } }, { { SIMDE_FLOAT32_C( -272.56), SIMDE_FLOAT32_C( 292.73), SIMDE_FLOAT32_C( 218.86), SIMDE_FLOAT32_C( 915.36) }, { SIMDE_FLOAT32_C( -710.44), SIMDE_FLOAT32_C( -916.07), SIMDE_FLOAT32_C( 162.80), SIMDE_FLOAT32_C( 500.43) }, { SIMDE_FLOAT32_C( -272.56), SIMDE_FLOAT32_C( 292.73), SIMDE_FLOAT32_C( 218.86), SIMDE_FLOAT32_C( 915.36) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r = simde_vmaxq_f32(a, b); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); simde_float32_t values[8 * 2 * sizeof(simde_float32x4_t)]; simde_test_arm_neon_random_f32x4_full(8, 2, values, -1000.0f, 1000.0f, SIMDE_TEST_VEC_FLOAT_NAN | SIMDE_TEST_VEC_FLOAT_EQUAL); for (size_t i = 0 ; i < 8 ; i++) { simde_float32x4_t a = simde_test_arm_neon_random_extract_f32x4(i, 2, 0, values); simde_float32x4_t b = simde_test_arm_neon_random_extract_f32x4(i, 2, 1, values); simde_float32x4_t r = simde_vmaxq_f32(a, b); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmaxq_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[2]; simde_float64 b[2]; simde_float64 r[2]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 768.08) }, { SIMDE_FLOAT64_C( -167.25), SIMDE_MATH_NAN }, { SIMDE_MATH_NAN, SIMDE_MATH_NAN } }, { { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 381.84) }, { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 381.84) }, { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 381.84) } }, #endif { { SIMDE_FLOAT64_C( -692.58), SIMDE_FLOAT64_C( -923.20) }, { SIMDE_FLOAT64_C( 794.72), SIMDE_FLOAT64_C( -538.08) }, { SIMDE_FLOAT64_C( 794.72), SIMDE_FLOAT64_C( -538.08) } }, { { SIMDE_FLOAT64_C( -695.71), SIMDE_FLOAT64_C( -273.38) }, { SIMDE_FLOAT64_C( 194.40), SIMDE_FLOAT64_C( 637.43) }, { SIMDE_FLOAT64_C( 194.40), SIMDE_FLOAT64_C( 637.43) } }, { { SIMDE_FLOAT64_C( 773.05), SIMDE_FLOAT64_C( -872.85) }, { SIMDE_FLOAT64_C( 76.07), SIMDE_FLOAT64_C( -774.04) }, { SIMDE_FLOAT64_C( 773.05), SIMDE_FLOAT64_C( -774.04) } }, { { SIMDE_FLOAT64_C( -468.97), SIMDE_FLOAT64_C( 74.30) }, { SIMDE_FLOAT64_C( -46.60), SIMDE_FLOAT64_C( 823.76) }, { SIMDE_FLOAT64_C( -46.60), SIMDE_FLOAT64_C( 823.76) } }, { { SIMDE_FLOAT64_C( -706.84), SIMDE_FLOAT64_C( -131.24) }, { SIMDE_FLOAT64_C( -886.68), SIMDE_FLOAT64_C( -622.90) }, { SIMDE_FLOAT64_C( -706.84), SIMDE_FLOAT64_C( -131.24) } }, { { SIMDE_FLOAT64_C( -968.44), SIMDE_FLOAT64_C( 613.76) }, { SIMDE_FLOAT64_C( -385.13), SIMDE_FLOAT64_C( -337.18) }, { SIMDE_FLOAT64_C( -385.13), SIMDE_FLOAT64_C( 613.76) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_float64x2_t r = simde_vmaxq_f64(a, b); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); simde_float64_t values[8 * 2 * sizeof(simde_float64x2_t)]; simde_test_arm_neon_random_f64x2_full(8, 2, values, -1000.0, 1000.0, SIMDE_TEST_VEC_FLOAT_NAN | SIMDE_TEST_VEC_FLOAT_EQUAL); for (size_t i = 0 ; i < 8 ; i++) { simde_float64x2_t a = simde_test_arm_neon_random_extract_f64x2(i, 2, 0, values); simde_float64x2_t b = simde_test_arm_neon_random_extract_f64x2(i, 2, 1, values); simde_float64x2_t r = simde_vmaxq_f64(a, b); simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmaxq_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int8_t b[16]; int8_t r[16]; } test_vec[] = { { { INT8_C( 97), -INT8_C( 30), INT8_MAX, INT8_C( 21), INT8_C( 44), INT8_C( 108), INT8_C( 50), INT8_C( 2), INT8_C( 55), -INT8_C( 33), -INT8_C( 124), INT8_C( 5), -INT8_C( 27), -INT8_C( 127), -INT8_C( 60), INT8_C( 103) }, { -INT8_C( 19), INT8_C( 86), -INT8_C( 5), INT8_C( 21), INT8_C( 117), INT8_C( 89), -INT8_C( 90), -INT8_C( 125), INT8_C( 56), INT8_C( 10), INT8_C( 98), -INT8_C( 72), -INT8_C( 16), -INT8_C( 97), INT8_C( 26), INT8_C( 81) }, { INT8_C( 97), INT8_C( 86), INT8_MAX, INT8_C( 21), INT8_C( 117), INT8_C( 108), INT8_C( 50), INT8_C( 2), INT8_C( 56), INT8_C( 10), INT8_C( 98), INT8_C( 5), -INT8_C( 16), -INT8_C( 97), INT8_C( 26), INT8_C( 103) } }, { { -INT8_C( 126), -INT8_C( 103), INT8_C( 103), -INT8_C( 82), INT8_C( 5), -INT8_C( 103), -INT8_C( 80), INT8_C( 61), INT8_C( 121), INT8_C( 52), INT8_C( 66), INT8_C( 94), -INT8_C( 74), INT8_C( 7), -INT8_C( 58), -INT8_C( 93) }, { INT8_C( 93), -INT8_C( 63), -INT8_C( 72), -INT8_C( 46), INT8_C( 26), INT8_C( 94), INT8_C( 86), INT8_C( 83), INT8_C( 105), -INT8_C( 72), INT8_C( 11), INT8_C( 89), INT8_C( 88), INT8_C( 37), -INT8_C( 86), -INT8_C( 38) }, { INT8_C( 93), -INT8_C( 63), INT8_C( 103), -INT8_C( 46), INT8_C( 26), INT8_C( 94), INT8_C( 86), INT8_C( 83), INT8_C( 121), INT8_C( 52), INT8_C( 66), INT8_C( 94), INT8_C( 88), INT8_C( 37), -INT8_C( 58), -INT8_C( 38) } }, { { -INT8_C( 65), INT8_C( 17), -INT8_C( 120), -INT8_C( 60), -INT8_C( 85), INT8_C( 56), INT8_C( 1), INT8_C( 36), INT8_C( 108), INT8_C( 68), -INT8_C( 126), INT8_C( 34), INT8_C( 75), INT8_C( 72), -INT8_C( 59), -INT8_C( 88) }, { INT8_C( 10), INT8_C( 125), INT8_C( 123), INT8_C( 36), -INT8_C( 36), -INT8_C( 47), INT8_C( 119), INT8_C( 69), -INT8_C( 119), -INT8_C( 126), -INT8_C( 98), -INT8_C( 31), -INT8_C( 88), INT8_C( 72), -INT8_C( 69), INT8_C( 103) }, { INT8_C( 10), INT8_C( 125), INT8_C( 123), INT8_C( 36), -INT8_C( 36), INT8_C( 56), INT8_C( 119), INT8_C( 69), INT8_C( 108), INT8_C( 68), -INT8_C( 98), INT8_C( 34), INT8_C( 75), INT8_C( 72), -INT8_C( 59), INT8_C( 103) } }, { { INT8_C( 90), INT8_C( 67), INT8_C( 43), INT8_C( 5), INT8_C( 123), INT8_C( 45), INT8_C( 41), -INT8_C( 24), INT8_C( 113), -INT8_C( 85), INT8_C( 10), -INT8_C( 68), -INT8_C( 12), -INT8_C( 48), INT8_C( 100), -INT8_C( 2) }, { INT8_C( 77), -INT8_C( 33), INT8_C( 34), INT8_C( 41), -INT8_C( 80), -INT8_C( 102), INT8_C( 110), INT8_C( 58), INT8_C( 28), INT8_C( 12), INT8_C( 27), -INT8_C( 60), INT8_C( 85), -INT8_C( 41), INT8_C( 43), -INT8_C( 81) }, { INT8_C( 90), INT8_C( 67), INT8_C( 43), INT8_C( 41), INT8_C( 123), INT8_C( 45), INT8_C( 110), INT8_C( 58), INT8_C( 113), INT8_C( 12), INT8_C( 27), -INT8_C( 60), INT8_C( 85), -INT8_C( 41), INT8_C( 100), -INT8_C( 2) } }, { { INT8_C( 26), INT8_C( 87), -INT8_C( 76), -INT8_C( 106), -INT8_C( 124), -INT8_C( 35), INT8_C( 126), -INT8_C( 11), -INT8_C( 120), -INT8_C( 120), -INT8_C( 79), INT8_C( 124), INT8_C( 88), INT8_C( 21), INT8_C( 122), -INT8_C( 90) }, { -INT8_C( 11), -INT8_C( 99), -INT8_C( 49), -INT8_C( 91), INT8_C( 55), INT8_C( 62), -INT8_C( 33), INT8_C( 83), INT8_C( 74), -INT8_C( 5), INT8_C( 24), -INT8_C( 97), -INT8_C( 46), INT8_C( 67), INT8_C( 78), -INT8_C( 20) }, { INT8_C( 26), INT8_C( 87), -INT8_C( 49), -INT8_C( 91), INT8_C( 55), INT8_C( 62), INT8_C( 126), INT8_C( 83), INT8_C( 74), -INT8_C( 5), INT8_C( 24), INT8_C( 124), INT8_C( 88), INT8_C( 67), INT8_C( 122), -INT8_C( 20) } }, { { -INT8_C( 102), INT8_C( 2), -INT8_C( 126), INT8_C( 30), -INT8_C( 33), INT8_C( 0), INT8_C( 19), INT8_C( 104), -INT8_C( 119), -INT8_C( 60), -INT8_C( 28), -INT8_C( 31), -INT8_C( 38), INT8_C( 95), -INT8_C( 121), -INT8_C( 49) }, { -INT8_C( 4), INT8_C( 87), INT8_C( 116), INT8_C( 51), -INT8_C( 107), INT8_C( 84), -INT8_C( 122), -INT8_C( 33), INT8_C( 79), -INT8_C( 98), INT8_MAX, INT8_C( 33), -INT8_C( 30), -INT8_C( 51), INT8_C( 13), INT8_C( 124) }, { -INT8_C( 4), INT8_C( 87), INT8_C( 116), INT8_C( 51), -INT8_C( 33), INT8_C( 84), INT8_C( 19), INT8_C( 104), INT8_C( 79), -INT8_C( 60), INT8_MAX, INT8_C( 33), -INT8_C( 30), INT8_C( 95), INT8_C( 13), INT8_C( 124) } }, { { -INT8_C( 48), -INT8_C( 112), -INT8_C( 101), -INT8_C( 81), -INT8_C( 112), -INT8_C( 82), INT8_C( 23), INT8_C( 25), INT8_C( 115), -INT8_C( 4), -INT8_C( 5), INT8_C( 77), INT8_C( 91), -INT8_C( 126), INT8_C( 28), INT8_C( 87) }, { -INT8_C( 39), -INT8_C( 112), -INT8_C( 118), INT8_C( 110), -INT8_C( 28), INT8_C( 16), INT8_C( 78), INT8_C( 51), -INT8_C( 81), -INT8_C( 51), INT8_C( 84), -INT8_C( 111), -INT8_C( 102), INT8_C( 98), INT8_C( 13), INT8_C( 106) }, { -INT8_C( 39), -INT8_C( 112), -INT8_C( 101), INT8_C( 110), -INT8_C( 28), INT8_C( 16), INT8_C( 78), INT8_C( 51), INT8_C( 115), -INT8_C( 4), INT8_C( 84), INT8_C( 77), INT8_C( 91), INT8_C( 98), INT8_C( 28), INT8_C( 106) } }, { { -INT8_C( 14), -INT8_C( 88), INT8_C( 26), -INT8_C( 126), INT8_C( 87), INT8_C( 49), -INT8_C( 100), -INT8_C( 54), INT8_C( 45), -INT8_C( 105), INT8_C( 23), -INT8_C( 120), INT8_C( 25), INT8_C( 51), -INT8_C( 33), -INT8_C( 13) }, { -INT8_C( 61), INT8_C( 105), INT8_C( 97), -INT8_C( 88), INT8_C( 122), -INT8_C( 81), -INT8_C( 37), INT8_C( 41), INT8_C( 124), INT8_C( 48), -INT8_C( 70), INT8_C( 23), -INT8_C( 110), -INT8_C( 57), -INT8_C( 127), -INT8_C( 124) }, { -INT8_C( 14), INT8_C( 105), INT8_C( 97), -INT8_C( 88), INT8_C( 122), INT8_C( 49), -INT8_C( 37), INT8_C( 41), INT8_C( 124), INT8_C( 48), INT8_C( 23), INT8_C( 23), INT8_C( 25), INT8_C( 51), -INT8_C( 33), -INT8_C( 13) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r = simde_vmaxq_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; } static int test_simde_vmaxq_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { INT16_C( 1220), -INT16_C( 22125), -INT16_C( 32646), -INT16_C( 13267), -INT16_C( 32228), -INT16_C( 13457), -INT16_C( 19765), INT16_C( 29048) }, { INT16_C( 6582), INT16_C( 20054), INT16_C( 14723), INT16_C( 901), -INT16_C( 14824), -INT16_C( 28104), -INT16_C( 12360), INT16_C( 31915) }, { INT16_C( 6582), INT16_C( 20054), INT16_C( 14723), INT16_C( 901), -INT16_C( 14824), -INT16_C( 13457), -INT16_C( 12360), INT16_C( 31915) } }, { { INT16_C( 16083), INT16_C( 20005), INT16_C( 21182), -INT16_C( 9446), -INT16_C( 30251), -INT16_C( 24410), INT16_C( 7740), -INT16_C( 3566) }, { INT16_C( 26679), -INT16_C( 17856), -INT16_C( 14942), -INT16_C( 17731), -INT16_C( 2677), INT16_C( 17229), -INT16_C( 1852), -INT16_C( 26689) }, { INT16_C( 26679), INT16_C( 20005), INT16_C( 21182), -INT16_C( 9446), -INT16_C( 2677), INT16_C( 17229), INT16_C( 7740), -INT16_C( 3566) } }, { { -INT16_C( 6858), -INT16_C( 2587), -INT16_C( 201), INT16_C( 3280), INT16_C( 30345), -INT16_C( 14931), -INT16_C( 16492), -INT16_C( 13385) }, { -INT16_C( 2265), -INT16_C( 13947), INT16_C( 17084), INT16_C( 18308), -INT16_C( 11977), -INT16_C( 1141), INT16_C( 19145), -INT16_C( 110) }, { -INT16_C( 2265), -INT16_C( 2587), INT16_C( 17084), INT16_C( 18308), INT16_C( 30345), -INT16_C( 1141), INT16_C( 19145), -INT16_C( 110) } }, { { INT16_C( 30767), INT16_C( 26612), -INT16_C( 15241), INT16_C( 115), INT16_C( 8250), -INT16_C( 12603), INT16_C( 31967), INT16_C( 1945) }, { INT16_C( 7795), INT16_C( 12240), INT16_C( 21600), -INT16_C( 26761), INT16_C( 549), -INT16_C( 4462), INT16_C( 9548), INT16_C( 31982) }, { INT16_C( 30767), INT16_C( 26612), INT16_C( 21600), INT16_C( 115), INT16_C( 8250), -INT16_C( 4462), INT16_C( 31967), INT16_C( 31982) } }, { { -INT16_C( 7523), INT16_C( 5347), INT16_C( 22183), -INT16_C( 7915), -INT16_C( 9609), INT16_C( 22192), INT16_C( 18775), -INT16_C( 13731) }, { INT16_C( 11880), -INT16_C( 14086), INT16_C( 29058), -INT16_C( 22432), -INT16_C( 3469), -INT16_C( 16490), -INT16_C( 31721), -INT16_C( 19397) }, { INT16_C( 11880), INT16_C( 5347), INT16_C( 29058), -INT16_C( 7915), -INT16_C( 3469), INT16_C( 22192), INT16_C( 18775), -INT16_C( 13731) } }, { { INT16_C( 7783), INT16_C( 3785), -INT16_C( 8587), -INT16_C( 4881), -INT16_C( 24648), INT16_C( 3906), -INT16_C( 24343), INT16_C( 20954) }, { -INT16_C( 11058), INT16_C( 20505), INT16_C( 31045), -INT16_C( 18184), -INT16_C( 28820), -INT16_C( 31881), -INT16_C( 19693), INT16_C( 31288) }, { INT16_C( 7783), INT16_C( 20505), INT16_C( 31045), -INT16_C( 4881), -INT16_C( 24648), INT16_C( 3906), -INT16_C( 19693), INT16_C( 31288) } }, { { INT16_C( 465), INT16_C( 18056), INT16_C( 30943), -INT16_C( 26830), INT16_C( 29975), INT16_C( 167), -INT16_C( 32491), -INT16_C( 7343) }, { INT16_C( 27477), -INT16_C( 26061), INT16_C( 11492), INT16_C( 20562), -INT16_C( 13893), -INT16_C( 12588), INT16_C( 3196), INT16_C( 20041) }, { INT16_C( 27477), INT16_C( 18056), INT16_C( 30943), INT16_C( 20562), INT16_C( 29975), INT16_C( 167), INT16_C( 3196), INT16_C( 20041) } }, { { -INT16_C( 12019), -INT16_C( 4972), -INT16_C( 14519), INT16_C( 24963), INT16_C( 10812), INT16_C( 20833), -INT16_C( 19541), INT16_C( 52) }, { INT16_C( 26398), INT16_C( 666), -INT16_C( 4973), INT16_C( 20051), INT16_C( 10166), INT16_C( 12829), INT16_C( 26163), INT16_C( 16512) }, { INT16_C( 26398), INT16_C( 666), -INT16_C( 4973), INT16_C( 24963), INT16_C( 10812), INT16_C( 20833), INT16_C( 26163), INT16_C( 16512) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_vmaxq_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; } static int test_simde_vmaxq_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { INT32_C( 956970151), INT32_C( 1495061858), -INT32_C( 1638694273), -INT32_C( 799373422) }, { -INT32_C( 2022589331), INT32_C( 1145242929), -INT32_C( 126744559), INT32_C( 490486389) }, { INT32_C( 956970151), INT32_C( 1495061858), -INT32_C( 126744559), INT32_C( 490486389) } }, { { -INT32_C( 799652242), -INT32_C( 1691782628), -INT32_C( 2026275339), INT32_C( 1901630212) }, { -INT32_C( 2131113649), -INT32_C( 523944753), -INT32_C( 1160235196), -INT32_C( 539552399) }, { -INT32_C( 799652242), -INT32_C( 523944753), -INT32_C( 1160235196), INT32_C( 1901630212) } }, { { INT32_C( 2007969115), -INT32_C( 1810703969), INT32_C( 1511803734), INT32_C( 785151198) }, { INT32_C( 212780349), INT32_C( 1189966593), INT32_C( 453035434), INT32_C( 922408922) }, { INT32_C( 2007969115), INT32_C( 1189966593), INT32_C( 1511803734), INT32_C( 922408922) } }, { { -INT32_C( 1548899836), -INT32_C( 650592125), -INT32_C( 365734901), INT32_C( 85524424) }, { -INT32_C( 971847996), -INT32_C( 468910278), -INT32_C( 1627386684), -INT32_C( 405407005) }, { -INT32_C( 971847996), -INT32_C( 468910278), -INT32_C( 365734901), INT32_C( 85524424) } }, { { INT32_C( 663388836), INT32_C( 1308672578), -INT32_C( 566741994), -INT32_C( 119254989) }, { INT32_C( 1354692118), -INT32_C( 1187656971), -INT32_C( 1185401386), -INT32_C( 727700177) }, { INT32_C( 1354692118), INT32_C( 1308672578), -INT32_C( 566741994), -INT32_C( 119254989) } }, { { -INT32_C( 218420560), INT32_C( 54590701), INT32_C( 1675786288), -INT32_C( 547633464) }, { -INT32_C( 1322313284), -INT32_C( 1184209693), -INT32_C( 915225703), -INT32_C( 1600318736) }, { -INT32_C( 218420560), INT32_C( 54590701), INT32_C( 1675786288), -INT32_C( 547633464) } }, { { INT32_C( 714315837), -INT32_C( 1003629676), INT32_C( 338169676), -INT32_C( 1846311979) }, { -INT32_C( 2143149155), INT32_C( 557493639), INT32_C( 1625992304), -INT32_C( 67008577) }, { INT32_C( 714315837), INT32_C( 557493639), INT32_C( 1625992304), -INT32_C( 67008577) } }, { { -INT32_C( 1272540129), -INT32_C( 1283959961), INT32_C( 952672355), -INT32_C( 1043678428) }, { INT32_C( 1715539166), INT32_C( 696744889), -INT32_C( 410357464), INT32_C( 400788472) }, { INT32_C( 1715539166), INT32_C( 696744889), INT32_C( 952672355), INT32_C( 400788472) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_vmaxq_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; } static int test_simde_x_vmaxq_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; int64_t b[2]; int64_t r[2]; } test_vec[] = { { { -INT64_C( 33951692342820127), -INT64_C( 7720064575400108197) }, { INT64_C( 6220286073678636896), INT64_C( 8039000713994285934) }, { INT64_C( 6220286073678636896), INT64_C( 8039000713994285934) } }, { { -INT64_C( 5880692503022210489), -INT64_C( 4002460468684024139) }, { -INT64_C( 5503582065510686705), -INT64_C( 1726331517560005364) }, { -INT64_C( 5503582065510686705), -INT64_C( 1726331517560005364) } }, { { INT64_C( 7191034392361048265), -INT64_C( 3713860292413532603) }, { -INT64_C( 2229363362547467241), -INT64_C( 748574811973987054) }, { INT64_C( 7191034392361048265), -INT64_C( 748574811973987054) } }, { { -INT64_C( 2400176054702315105), -INT64_C( 4486709295076084942) }, { -INT64_C( 7335469304539917699), -INT64_C( 2980279904009015166) }, { -INT64_C( 2400176054702315105), -INT64_C( 2980279904009015166) } }, { { -INT64_C( 4302392772949396012), INT64_C( 6067598668619634376) }, { INT64_C( 100725705291485507), INT64_C( 8329217981442667657) }, { INT64_C( 100725705291485507), INT64_C( 8329217981442667657) } }, { { INT64_C( 6295234650384087816), -INT64_C( 4239014786319642368) }, { INT64_C( 3805530324110201167), -INT64_C( 2757456615845888343) }, { INT64_C( 6295234650384087816), -INT64_C( 2757456615845888343) } }, { { -INT64_C( 4238502126160733370), INT64_C( 4983734319431088098) }, { INT64_C( 6374531560650291608), INT64_C( 4398711673538505568) }, { INT64_C( 6374531560650291608), INT64_C( 4983734319431088098) } }, { { INT64_C( 3567203091056131831), INT64_C( 8967439695274646840) }, { -INT64_C( 3999982176903985801), INT64_C( 8201905370448928416) }, { INT64_C( 3567203091056131831), INT64_C( 8967439695274646840) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_int64x2_t r = simde_x_vmaxq_s64(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); simde_int64x2_t b = simde_test_arm_neon_random_i64x2(); simde_int64x2_t r = simde_x_vmaxq_s64(a, b); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmaxq_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(193), UINT8_C(134), UINT8_C(245), UINT8_C(103), UINT8_C( 63), UINT8_C(110), UINT8_C(101), UINT8_C(105), UINT8_C(153), UINT8_C(168), UINT8_C(252), UINT8_C( 55), UINT8_C(169), UINT8_C( 51), UINT8_C( 20), UINT8_C(236) }, { UINT8_C(108), UINT8_C(201), UINT8_C(165), UINT8_C( 52), UINT8_C( 23), UINT8_C(179), UINT8_C( 17), UINT8_C(122), UINT8_C( 19), UINT8_C( 8), UINT8_C(137), UINT8_C(214), UINT8_C(156), UINT8_C(140), UINT8_C( 45), UINT8_C( 93) }, { UINT8_C(193), UINT8_C(201), UINT8_C(245), UINT8_C(103), UINT8_C( 63), UINT8_C(179), UINT8_C(101), UINT8_C(122), UINT8_C(153), UINT8_C(168), UINT8_C(252), UINT8_C(214), UINT8_C(169), UINT8_C(140), UINT8_C( 45), UINT8_C(236) } }, { { UINT8_C( 19), UINT8_C( 34), UINT8_C(197), UINT8_C( 82), UINT8_C(144), UINT8_C( 42), UINT8_C(187), UINT8_C( 42), UINT8_C(210), UINT8_C(183), UINT8_C( 97), UINT8_C(124), UINT8_C(235), UINT8_C(117), UINT8_C(104), UINT8_C( 87) }, { UINT8_C( 63), UINT8_C( 13), UINT8_C(140), UINT8_C( 86), UINT8_C(192), UINT8_C(157), UINT8_C(208), UINT8_C(211), UINT8_C(165), UINT8_C( 89), UINT8_C(169), UINT8_C( 66), UINT8_C(230), UINT8_C(215), UINT8_C(159), UINT8_C(249) }, { UINT8_C( 63), UINT8_C( 34), UINT8_C(197), UINT8_C( 86), UINT8_C(192), UINT8_C(157), UINT8_C(208), UINT8_C(211), UINT8_C(210), UINT8_C(183), UINT8_C(169), UINT8_C(124), UINT8_C(235), UINT8_C(215), UINT8_C(159), UINT8_C(249) } }, { { UINT8_C(249), UINT8_C(100), UINT8_C( 75), UINT8_C(138), UINT8_C(143), UINT8_C( 6), UINT8_C(180), UINT8_C( 97), UINT8_C(189), UINT8_C( 21), UINT8_C(221), UINT8_C(168), UINT8_C(139), UINT8_C( 70), UINT8_C( 0), UINT8_C(202) }, { UINT8_C( 83), UINT8_C(140), UINT8_C( 32), UINT8_C( 20), UINT8_C( 41), UINT8_C(240), UINT8_C(231), UINT8_C(206), UINT8_C( 73), UINT8_C(145), UINT8_C( 16), UINT8_C( 47), UINT8_C(104), UINT8_C(176), UINT8_C( 40), UINT8_C( 97) }, { UINT8_C(249), UINT8_C(140), UINT8_C( 75), UINT8_C(138), UINT8_C(143), UINT8_C(240), UINT8_C(231), UINT8_C(206), UINT8_C(189), UINT8_C(145), UINT8_C(221), UINT8_C(168), UINT8_C(139), UINT8_C(176), UINT8_C( 40), UINT8_C(202) } }, { { UINT8_C( 20), UINT8_C(115), UINT8_C(235), UINT8_C(163), UINT8_C(121), UINT8_C(159), UINT8_C( 5), UINT8_C( 55), UINT8_C(181), UINT8_C(226), UINT8_C(223), UINT8_C( 64), UINT8_C( 40), UINT8_C(223), UINT8_C( 10), UINT8_C(124) }, { UINT8_C(107), UINT8_C( 42), UINT8_C(144), UINT8_C(148), UINT8_C( 26), UINT8_C(119), UINT8_C( 99), UINT8_C( 99), UINT8_C( 8), UINT8_C(115), UINT8_C(147), UINT8_C(112), UINT8_C( 35), UINT8_C(187), UINT8_C(210), UINT8_C( 56) }, { UINT8_C(107), UINT8_C(115), UINT8_C(235), UINT8_C(163), UINT8_C(121), UINT8_C(159), UINT8_C( 99), UINT8_C( 99), UINT8_C(181), UINT8_C(226), UINT8_C(223), UINT8_C(112), UINT8_C( 40), UINT8_C(223), UINT8_C(210), UINT8_C(124) } }, { { UINT8_C( 47), UINT8_C(189), UINT8_C(219), UINT8_C(168), UINT8_C( 93), UINT8_C(224), UINT8_C(223), UINT8_C( 18), UINT8_C(195), UINT8_C(191), UINT8_C( 82), UINT8_C(235), UINT8_C(158), UINT8_C( 92), UINT8_C(103), UINT8_C( 10) }, { UINT8_C(134), UINT8_C(247), UINT8_C(158), UINT8_C(160), UINT8_C(111), UINT8_C( 1), UINT8_C( 3), UINT8_C(119), UINT8_C(117), UINT8_C(150), UINT8_C(232), UINT8_C(152), UINT8_C( 82), UINT8_C(186), UINT8_C(208), UINT8_C(129) }, { UINT8_C(134), UINT8_C(247), UINT8_C(219), UINT8_C(168), UINT8_C(111), UINT8_C(224), UINT8_C(223), UINT8_C(119), UINT8_C(195), UINT8_C(191), UINT8_C(232), UINT8_C(235), UINT8_C(158), UINT8_C(186), UINT8_C(208), UINT8_C(129) } }, { { UINT8_C(119), UINT8_C(172), UINT8_C( 41), UINT8_C(212), UINT8_C(140), UINT8_C( 9), UINT8_C(230), UINT8_C( 79), UINT8_C(200), UINT8_C( 56), UINT8_C( 59), UINT8_C(102), UINT8_C(148), UINT8_C(162), UINT8_C(112), UINT8_C( 26) }, { UINT8_C(154), UINT8_C( 15), UINT8_C(186), UINT8_C( 9), UINT8_C( 16), UINT8_C(190), UINT8_C(128), UINT8_C(133), UINT8_C( 84), UINT8_C(104), UINT8_C( 30), UINT8_C(166), UINT8_C( 34), UINT8_C(238), UINT8_C( 39), UINT8_C(154) }, { UINT8_C(154), UINT8_C(172), UINT8_C(186), UINT8_C(212), UINT8_C(140), UINT8_C(190), UINT8_C(230), UINT8_C(133), UINT8_C(200), UINT8_C(104), UINT8_C( 59), UINT8_C(166), UINT8_C(148), UINT8_C(238), UINT8_C(112), UINT8_C(154) } }, { { UINT8_C(154), UINT8_C( 81), UINT8_C(110), UINT8_C( 39), UINT8_C( 90), UINT8_C( 85), UINT8_C(118), UINT8_C( 34), UINT8_C(141), UINT8_C(177), UINT8_C(136), UINT8_C( 34), UINT8_C( 84), UINT8_C(249), UINT8_C( 60), UINT8_C(238) }, { UINT8_C( 8), UINT8_C(247), UINT8_C(247), UINT8_C( 24), UINT8_C(181), UINT8_C(119), UINT8_C(158), UINT8_C( 9), UINT8_C(224), UINT8_C(188), UINT8_C(176), UINT8_C( 2), UINT8_C(170), UINT8_C(215), UINT8_C(156), UINT8_C( 69) }, { UINT8_C(154), UINT8_C(247), UINT8_C(247), UINT8_C( 39), UINT8_C(181), UINT8_C(119), UINT8_C(158), UINT8_C( 34), UINT8_C(224), UINT8_C(188), UINT8_C(176), UINT8_C( 34), UINT8_C(170), UINT8_C(249), UINT8_C(156), UINT8_C(238) } }, { { UINT8_C( 40), UINT8_C( 11), UINT8_C(108), UINT8_C(130), UINT8_C( 96), UINT8_C(226), UINT8_C(164), UINT8_C(237), UINT8_C(148), UINT8_C( 45), UINT8_C( 15), UINT8_C(232), UINT8_C( 38), UINT8_C( 76), UINT8_C(214), UINT8_C( 46) }, { UINT8_C( 67), UINT8_C(205), UINT8_C( 70), UINT8_C(248), UINT8_C( 68), UINT8_C(228), UINT8_C( 1), UINT8_C( 36), UINT8_C(160), UINT8_C(177), UINT8_C( 39), UINT8_C( 75), UINT8_C(137), UINT8_C(195), UINT8_C(144), UINT8_C(177) }, { UINT8_C( 67), UINT8_C(205), UINT8_C(108), UINT8_C(248), UINT8_C( 96), UINT8_C(228), UINT8_C(164), UINT8_C(237), UINT8_C(160), UINT8_C(177), UINT8_C( 39), UINT8_C(232), UINT8_C(137), UINT8_C(195), UINT8_C(214), UINT8_C(177) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vmaxq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; } static int test_simde_vmaxq_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(50452), UINT16_C( 9264), UINT16_C(54520), UINT16_C(64366), UINT16_C(34356), UINT16_C(54346), UINT16_C(24327), UINT16_C(47384) }, { UINT16_C( 3239), UINT16_C(45245), UINT16_C(36184), UINT16_C(56734), UINT16_C(55033), UINT16_C(39946), UINT16_C(26299), UINT16_C(53086) }, { UINT16_C(50452), UINT16_C(45245), UINT16_C(54520), UINT16_C(64366), UINT16_C(55033), UINT16_C(54346), UINT16_C(26299), UINT16_C(53086) } }, { { UINT16_C(36651), UINT16_C( 9203), UINT16_C(24931), UINT16_C(38687), UINT16_C(27111), UINT16_C(61035), UINT16_C(33736), UINT16_C(28584) }, { UINT16_C(26000), UINT16_C(59423), UINT16_C(48626), UINT16_C(60357), UINT16_C(53395), UINT16_C(20103), UINT16_C(58934), UINT16_C(24862) }, { UINT16_C(36651), UINT16_C(59423), UINT16_C(48626), UINT16_C(60357), UINT16_C(53395), UINT16_C(61035), UINT16_C(58934), UINT16_C(28584) } }, { { UINT16_C( 4469), UINT16_C(55429), UINT16_C(42099), UINT16_C(23151), UINT16_C(55821), UINT16_C(54601), UINT16_C(61790), UINT16_C(60996) }, { UINT16_C(25686), UINT16_C(18902), UINT16_C(39713), UINT16_C(46388), UINT16_C(48235), UINT16_C(41219), UINT16_C( 8610), UINT16_C( 5891) }, { UINT16_C(25686), UINT16_C(55429), UINT16_C(42099), UINT16_C(46388), UINT16_C(55821), UINT16_C(54601), UINT16_C(61790), UINT16_C(60996) } }, { { UINT16_C(34867), UINT16_C(42735), UINT16_C(24108), UINT16_C(14592), UINT16_C(18744), UINT16_C(38414), UINT16_C(21050), UINT16_C(37252) }, { UINT16_C(23222), UINT16_C(55514), UINT16_C( 3830), UINT16_C(24973), UINT16_C(37066), UINT16_C(27651), UINT16_C( 1714), UINT16_C(58755) }, { UINT16_C(34867), UINT16_C(55514), UINT16_C(24108), UINT16_C(24973), UINT16_C(37066), UINT16_C(38414), UINT16_C(21050), UINT16_C(58755) } }, { { UINT16_C(29326), UINT16_C(47755), UINT16_C(35792), UINT16_C( 2547), UINT16_C( 469), UINT16_C( 3999), UINT16_C( 9299), UINT16_C( 2720) }, { UINT16_C(31358), UINT16_C(29922), UINT16_C(28553), UINT16_C(21462), UINT16_C(55807), UINT16_C(45504), UINT16_C(17375), UINT16_C(28054) }, { UINT16_C(31358), UINT16_C(47755), UINT16_C(35792), UINT16_C(21462), UINT16_C(55807), UINT16_C(45504), UINT16_C(17375), UINT16_C(28054) } }, { { UINT16_C( 8630), UINT16_C(34343), UINT16_C( 6829), UINT16_C(33423), UINT16_C(12059), UINT16_C(28305), UINT16_C(12883), UINT16_C(53624) }, { UINT16_C(23212), UINT16_C(13638), UINT16_C( 7369), UINT16_C(51593), UINT16_C(18933), UINT16_C(54394), UINT16_C( 4492), UINT16_C(16961) }, { UINT16_C(23212), UINT16_C(34343), UINT16_C( 7369), UINT16_C(51593), UINT16_C(18933), UINT16_C(54394), UINT16_C(12883), UINT16_C(53624) } }, { { UINT16_C(26674), UINT16_C(57289), UINT16_C(22658), UINT16_C(40289), UINT16_C(62343), UINT16_C(55819), UINT16_C(33829), UINT16_C(53676) }, { UINT16_C(62174), UINT16_C(43015), UINT16_C(36878), UINT16_C( 881), UINT16_C(60377), UINT16_C(26071), UINT16_C( 6396), UINT16_C(12200) }, { UINT16_C(62174), UINT16_C(57289), UINT16_C(36878), UINT16_C(40289), UINT16_C(62343), UINT16_C(55819), UINT16_C(33829), UINT16_C(53676) } }, { { UINT16_C(29056), UINT16_C( 526), UINT16_C(28873), UINT16_C(20895), UINT16_C(43619), UINT16_C(34859), UINT16_C(55086), UINT16_C( 3417) }, { UINT16_C(24777), UINT16_C(55221), UINT16_C( 9968), UINT16_C(51674), UINT16_C(45329), UINT16_C( 3631), UINT16_C(55241), UINT16_C(18749) }, { UINT16_C(29056), UINT16_C(55221), UINT16_C(28873), UINT16_C(51674), UINT16_C(45329), UINT16_C(34859), UINT16_C(55241), UINT16_C(18749) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vmaxq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_vmaxq_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(3972880476), UINT32_C(3349222332), UINT32_C(1208741539), UINT32_C(2054709649) }, { UINT32_C(1463202863), UINT32_C( 122068155), UINT32_C(1032132178), UINT32_C(1387139574) }, { UINT32_C(3972880476), UINT32_C(3349222332), UINT32_C(1208741539), UINT32_C(2054709649) } }, { { UINT32_C( 641629290), UINT32_C( 854515599), UINT32_C(1585117645), UINT32_C(2329473627) }, { UINT32_C(1658916775), UINT32_C(4268304555), UINT32_C( 876342846), UINT32_C(1787226624) }, { UINT32_C(1658916775), UINT32_C(4268304555), UINT32_C(1585117645), UINT32_C(2329473627) } }, { { UINT32_C(4119905382), UINT32_C(1881636515), UINT32_C(3553534328), UINT32_C( 995993236) }, { UINT32_C(1637695157), UINT32_C(2757690982), UINT32_C(4124613365), UINT32_C(3932118660) }, { UINT32_C(4119905382), UINT32_C(2757690982), UINT32_C(4124613365), UINT32_C(3932118660) } }, { { UINT32_C(3319787298), UINT32_C(3862234734), UINT32_C(1018758056), UINT32_C(1601640106) }, { UINT32_C(3149927508), UINT32_C( 257892122), UINT32_C(1040464058), UINT32_C(3106497430) }, { UINT32_C(3319787298), UINT32_C(3862234734), UINT32_C(1040464058), UINT32_C(3106497430) } }, { { UINT32_C(3246262355), UINT32_C(3081221135), UINT32_C(1643339959), UINT32_C(3401673334) }, { UINT32_C(2558886270), UINT32_C(1537795489), UINT32_C(3013192733), UINT32_C(1668071952) }, { UINT32_C(3246262355), UINT32_C(3081221135), UINT32_C(3013192733), UINT32_C(3401673334) } }, { { UINT32_C(3659852747), UINT32_C(1452395423), UINT32_C(2713224235), UINT32_C(1818982894) }, { UINT32_C(2600792570), UINT32_C(4093029590), UINT32_C(1772523609), UINT32_C( 499913554) }, { UINT32_C(3659852747), UINT32_C(4093029590), UINT32_C(2713224235), UINT32_C(1818982894) } }, { { UINT32_C(2650271998), UINT32_C(3874719931), UINT32_C(4203195148), UINT32_C( 526840356) }, { UINT32_C(3116002275), UINT32_C(1890365719), UINT32_C(2480493377), UINT32_C(1689363814) }, { UINT32_C(3116002275), UINT32_C(3874719931), UINT32_C(4203195148), UINT32_C(1689363814) } }, { { UINT32_C(1342285973), UINT32_C(1027011633), UINT32_C(3292052896), UINT32_C(2481168048) }, { UINT32_C( 558734857), UINT32_C(2425485647), UINT32_C(2988665676), UINT32_C(2786513936) }, { UINT32_C(1342285973), UINT32_C(2425485647), UINT32_C(3292052896), UINT32_C(2786513936) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vmaxq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_x_vmaxq_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[2]; uint64_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C(15158458638266685330), UINT64_C(11373162734640344784) }, { UINT64_C(11254475624222272126), UINT64_C(13246677659206409250) }, { UINT64_C(15158458638266685330), UINT64_C(13246677659206409250) } }, { { UINT64_C(11935278802667777023), UINT64_C(12261996856397501519) }, { UINT64_C( 5374603228077767431), UINT64_C( 6207871613249367881) }, { UINT64_C(11935278802667777023), UINT64_C(12261996856397501519) } }, { { UINT64_C( 6444871015450700278), UINT64_C( 6643674033896814685) }, { UINT64_C( 2936325922264346957), UINT64_C( 8451493433471055004) }, { UINT64_C( 6444871015450700278), UINT64_C( 8451493433471055004) } }, { { UINT64_C(10703279740501913382), UINT64_C(15013120455850422440) }, { UINT64_C( 4091807906721542082), UINT64_C( 4804624042024460217) }, { UINT64_C(10703279740501913382), UINT64_C(15013120455850422440) } }, { { UINT64_C( 5093566360046464483), UINT64_C(16194272586954493035) }, { UINT64_C(13916485725689283405), UINT64_C(11747542599416302761) }, { UINT64_C(13916485725689283405), UINT64_C(16194272586954493035) } }, { { UINT64_C(16103021188979980693), UINT64_C(12121027572017249593) }, { UINT64_C(12346532623333570445), UINT64_C(10187556841831027198) }, { UINT64_C(16103021188979980693), UINT64_C(12121027572017249593) } }, { { UINT64_C(14142774575225642737), UINT64_C( 3096877429899793143) }, { UINT64_C( 4302370569963211528), UINT64_C(10371326623864137064) }, { UINT64_C(14142774575225642737), UINT64_C(10371326623864137064) } }, { { UINT64_C(16197260828028022393), UINT64_C( 4329134842805674573) }, { UINT64_C( 5994849569874564280), UINT64_C(18324481154154244896) }, { UINT64_C(16197260828028022393), UINT64_C(18324481154154244896) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_uint64x2_t r = simde_x_vmaxq_u64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); simde_uint64x2_t b = simde_test_arm_neon_random_u64x2(); simde_uint64x2_t r = simde_x_vmaxq_u64(a, b); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vmax_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vmax_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vmax_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vmax_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmax_s32) SIMDE_TEST_FUNC_LIST_ENTRY(x_vmax_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vmax_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vmax_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmax_u32) SIMDE_TEST_FUNC_LIST_ENTRY(x_vmax_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vmaxq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vmaxq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vmaxq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vmaxq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmaxq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(x_vmaxq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vmaxq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vmaxq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmaxq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(x_vmaxq_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/maxnm.c000066400000000000000000000302241400333146700166040ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN maxnm #include "test-neon.h" #include "../../../simde/arm/neon/maxnm.h" static int test_simde_vmaxnm_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; simde_float32 b[2]; simde_float32 r[2]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 656.90) }, { SIMDE_FLOAT32_C( 427.79), SIMDE_MATH_NANF }, { SIMDE_FLOAT32_C( 427.79), SIMDE_FLOAT32_C( 656.90) } }, { { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 116.96) }, { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -999.94) }, { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 116.96) } }, #endif { { SIMDE_FLOAT32_C( -619.20), SIMDE_FLOAT32_C( -413.47) }, { SIMDE_FLOAT32_C( 871.28), SIMDE_FLOAT32_C( -660.33) }, { SIMDE_FLOAT32_C( 871.28), SIMDE_FLOAT32_C( -413.47) } }, { { SIMDE_FLOAT32_C( 422.55), SIMDE_FLOAT32_C( 160.51) }, { SIMDE_FLOAT32_C( 148.88), SIMDE_FLOAT32_C( 905.13) }, { SIMDE_FLOAT32_C( 422.55), SIMDE_FLOAT32_C( 905.13) } }, { { SIMDE_FLOAT32_C( -605.53), SIMDE_FLOAT32_C( -971.47) }, { SIMDE_FLOAT32_C( 182.75), SIMDE_FLOAT32_C( -737.07) }, { SIMDE_FLOAT32_C( 182.75), SIMDE_FLOAT32_C( -737.07) } }, { { SIMDE_FLOAT32_C( -182.06), SIMDE_FLOAT32_C( -678.54) }, { SIMDE_FLOAT32_C( 165.68), SIMDE_FLOAT32_C( 413.12) }, { SIMDE_FLOAT32_C( 165.68), SIMDE_FLOAT32_C( 413.12) } }, { { SIMDE_FLOAT32_C( 20.28), SIMDE_FLOAT32_C( -770.49) }, { SIMDE_FLOAT32_C( 647.00), SIMDE_FLOAT32_C( -632.40) }, { SIMDE_FLOAT32_C( 647.00), SIMDE_FLOAT32_C( -632.40) } }, { { SIMDE_FLOAT32_C( 949.17), SIMDE_FLOAT32_C( 616.00) }, { SIMDE_FLOAT32_C( -967.88), SIMDE_FLOAT32_C( -301.85) }, { SIMDE_FLOAT32_C( 949.17), SIMDE_FLOAT32_C( 616.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x2_t r = simde_vmaxnm_f32(a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); simde_float32_t values[8 * 2 * sizeof(simde_float32x2_t)]; simde_test_arm_neon_random_f32x2_full(8, 2, values, -1000.0f, 1000.0f, SIMDE_TEST_VEC_FLOAT_NAN); for (size_t i = 0 ; i < 8 ; i++) { simde_float32x2_t a = simde_test_arm_neon_random_extract_f32x2(i, 2, 0, values); simde_float32x2_t b = simde_test_arm_neon_random_extract_f32x2(i, 2, 1, values); simde_float32x2_t r = simde_vmaxnm_f32(a, b); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmaxnm_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[1]; simde_float64 b[1]; simde_float64 r[1]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NAN }, { SIMDE_FLOAT64_C( 169.64) }, { SIMDE_FLOAT64_C( 169.64) } }, { { SIMDE_FLOAT64_C( -986.47) }, { SIMDE_MATH_NAN }, { SIMDE_FLOAT64_C( -986.47) } }, { { SIMDE_MATH_NAN }, { SIMDE_MATH_NAN }, { SIMDE_MATH_NAN } }, #endif { { SIMDE_FLOAT64_C( 827.71) }, { SIMDE_FLOAT64_C( 191.90) }, { SIMDE_FLOAT64_C( 827.71) } }, { { SIMDE_FLOAT64_C( -275.70) }, { SIMDE_FLOAT64_C( 295.23) }, { SIMDE_FLOAT64_C( 295.23) } }, { { SIMDE_FLOAT64_C( 188.72) }, { SIMDE_FLOAT64_C( 429.15) }, { SIMDE_FLOAT64_C( 429.15) } }, { { SIMDE_FLOAT64_C( -147.66) }, { SIMDE_FLOAT64_C( 487.54) }, { SIMDE_FLOAT64_C( 487.54) } }, { { SIMDE_FLOAT64_C( -528.84) }, { SIMDE_FLOAT64_C( -797.49) }, { SIMDE_FLOAT64_C( -528.84) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_t b = simde_vld1_f64(test_vec[i].b); simde_float64x1_t r = simde_vmaxnm_f64(a, b); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); simde_float64_t values[8 * 2 * sizeof(simde_float64x1_t)]; simde_test_arm_neon_random_f64x1_full(8, 2, values, -1000.0, 1000.0, SIMDE_TEST_VEC_FLOAT_NAN); for (size_t i = 0 ; i < 8 ; i++) { simde_float64x1_t a = simde_test_arm_neon_random_extract_f64x1(i, 2, 0, values); simde_float64x1_t b = simde_test_arm_neon_random_extract_f64x1(i, 2, 1, values); simde_float64x1_t r = simde_vmaxnm_f64(a, b); simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmaxnmq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; simde_float32 b[4]; simde_float32 r[4]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -830.15), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 116.42) }, { SIMDE_FLOAT32_C( -786.61), SIMDE_MATH_NANF, SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 704.38) }, { SIMDE_FLOAT32_C( -786.61), SIMDE_FLOAT32_C( -830.15), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 704.38) } }, #endif { { SIMDE_FLOAT32_C( 42.56), SIMDE_FLOAT32_C( -762.07), SIMDE_FLOAT32_C( 243.80), SIMDE_FLOAT32_C( 127.20) }, { SIMDE_FLOAT32_C( -554.99), SIMDE_FLOAT32_C( 818.92), SIMDE_FLOAT32_C( -693.45), SIMDE_FLOAT32_C( 417.65) }, { SIMDE_FLOAT32_C( 42.56), SIMDE_FLOAT32_C( 818.92), SIMDE_FLOAT32_C( 243.80), SIMDE_FLOAT32_C( 417.65) } }, { { SIMDE_FLOAT32_C( -84.44), SIMDE_FLOAT32_C( 320.00), SIMDE_FLOAT32_C( 451.21), SIMDE_FLOAT32_C( 71.41) }, { SIMDE_FLOAT32_C( -104.53), SIMDE_FLOAT32_C( 615.74), SIMDE_FLOAT32_C( -465.38), SIMDE_FLOAT32_C( 92.29) }, { SIMDE_FLOAT32_C( -84.44), SIMDE_FLOAT32_C( 615.74), SIMDE_FLOAT32_C( 451.21), SIMDE_FLOAT32_C( 92.29) } }, { { SIMDE_FLOAT32_C( -599.78), SIMDE_FLOAT32_C( 592.36), SIMDE_FLOAT32_C( -378.07), SIMDE_FLOAT32_C( -109.06) }, { SIMDE_FLOAT32_C( 386.28), SIMDE_FLOAT32_C( -742.91), SIMDE_FLOAT32_C( 270.77), SIMDE_FLOAT32_C( 106.07) }, { SIMDE_FLOAT32_C( 386.28), SIMDE_FLOAT32_C( 592.36), SIMDE_FLOAT32_C( 270.77), SIMDE_FLOAT32_C( 106.07) } }, { { SIMDE_FLOAT32_C( -573.06), SIMDE_FLOAT32_C( 931.15), SIMDE_FLOAT32_C( -777.52), SIMDE_FLOAT32_C( -359.67) }, { SIMDE_FLOAT32_C( -618.04), SIMDE_FLOAT32_C( -590.04), SIMDE_FLOAT32_C( -655.29), SIMDE_FLOAT32_C( 424.52) }, { SIMDE_FLOAT32_C( -573.06), SIMDE_FLOAT32_C( 931.15), SIMDE_FLOAT32_C( -655.29), SIMDE_FLOAT32_C( 424.52) } }, { { SIMDE_FLOAT32_C( -352.11), SIMDE_FLOAT32_C( 588.52), SIMDE_FLOAT32_C( -448.29), SIMDE_FLOAT32_C( 92.89) }, { SIMDE_FLOAT32_C( 407.44), SIMDE_FLOAT32_C( -141.74), SIMDE_FLOAT32_C( -489.46), SIMDE_FLOAT32_C( -677.00) }, { SIMDE_FLOAT32_C( 407.44), SIMDE_FLOAT32_C( 588.52), SIMDE_FLOAT32_C( -448.29), SIMDE_FLOAT32_C( 92.89) } }, { { SIMDE_FLOAT32_C( -821.73), SIMDE_FLOAT32_C( 961.75), SIMDE_FLOAT32_C( 394.41), SIMDE_FLOAT32_C( 73.73) }, { SIMDE_FLOAT32_C( 577.49), SIMDE_FLOAT32_C( 929.03), SIMDE_FLOAT32_C( -833.98), SIMDE_FLOAT32_C( 977.71) }, { SIMDE_FLOAT32_C( 577.49), SIMDE_FLOAT32_C( 961.75), SIMDE_FLOAT32_C( 394.41), SIMDE_FLOAT32_C( 977.71) } }, { { SIMDE_FLOAT32_C( 521.39), SIMDE_FLOAT32_C( -212.06), SIMDE_FLOAT32_C( -131.35), SIMDE_FLOAT32_C( -92.34) }, { SIMDE_FLOAT32_C( 45.03), SIMDE_FLOAT32_C( -860.58), SIMDE_FLOAT32_C( -986.27), SIMDE_FLOAT32_C( 471.98) }, { SIMDE_FLOAT32_C( 521.39), SIMDE_FLOAT32_C( -212.06), SIMDE_FLOAT32_C( -131.35), SIMDE_FLOAT32_C( 471.98) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r = simde_vmaxnmq_f32(a, b); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); simde_float32_t values[8 * 2 * sizeof(simde_float32x4_t)]; simde_test_arm_neon_random_f32x4_full(8, 2, values, -1000.0f, 1000.0f, SIMDE_TEST_VEC_FLOAT_NAN); for (size_t i = 0 ; i < 8 ; i++) { simde_float32x4_t a = simde_test_arm_neon_random_extract_f32x4(i, 2, 0, values); simde_float32x4_t b = simde_test_arm_neon_random_extract_f32x4(i, 2, 1, values); simde_float32x4_t r = simde_vmaxnmq_f32(a, b); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmaxnmq_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[2]; simde_float64 b[2]; simde_float64 r[2]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( -182.58) }, { SIMDE_FLOAT64_C( 743.82), SIMDE_MATH_NAN }, { SIMDE_FLOAT64_C( 743.82), SIMDE_FLOAT64_C( -182.58) } }, { { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 493.92) }, { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 934.94) }, { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 934.94) } }, #endif { { SIMDE_FLOAT64_C( -559.75), SIMDE_FLOAT64_C( -168.42) }, { SIMDE_FLOAT64_C( 193.83), SIMDE_FLOAT64_C( -311.43) }, { SIMDE_FLOAT64_C( 193.83), SIMDE_FLOAT64_C( -168.42) } }, { { SIMDE_FLOAT64_C( 685.60), SIMDE_FLOAT64_C( 642.39) }, { SIMDE_FLOAT64_C( -129.77), SIMDE_FLOAT64_C( -575.43) }, { SIMDE_FLOAT64_C( 685.60), SIMDE_FLOAT64_C( 642.39) } }, { { SIMDE_FLOAT64_C( 166.90), SIMDE_FLOAT64_C( -869.88) }, { SIMDE_FLOAT64_C( 87.77), SIMDE_FLOAT64_C( -554.15) }, { SIMDE_FLOAT64_C( 166.90), SIMDE_FLOAT64_C( -554.15) } }, { { SIMDE_FLOAT64_C( -667.35), SIMDE_FLOAT64_C( -294.71) }, { SIMDE_FLOAT64_C( 134.11), SIMDE_FLOAT64_C( 615.74) }, { SIMDE_FLOAT64_C( 134.11), SIMDE_FLOAT64_C( 615.74) } }, { { SIMDE_FLOAT64_C( -85.63), SIMDE_FLOAT64_C( -649.42) }, { SIMDE_FLOAT64_C( -536.78), SIMDE_FLOAT64_C( 843.96) }, { SIMDE_FLOAT64_C( -85.63), SIMDE_FLOAT64_C( 843.96) } }, { { SIMDE_FLOAT64_C( 349.79), SIMDE_FLOAT64_C( 234.11) }, { SIMDE_FLOAT64_C( -713.81), SIMDE_FLOAT64_C( 557.65) }, { SIMDE_FLOAT64_C( 349.79), SIMDE_FLOAT64_C( 557.65) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_float64x2_t r = simde_vmaxnmq_f64(a, b); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); simde_float64_t values[8 * 2 * sizeof(simde_float64x2_t)]; simde_test_arm_neon_random_f64x2_full(8, 2, values, -1000.0, 1000.0, SIMDE_TEST_VEC_FLOAT_NAN); for (size_t i = 0 ; i < 8 ; i++) { simde_float64x2_t a = simde_test_arm_neon_random_extract_f64x2(i, 2, 0, values); simde_float64x2_t b = simde_test_arm_neon_random_extract_f64x2(i, 2, 1, values); simde_float64x2_t r = simde_vmaxnmq_f64(a, b); simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vmaxnm_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vmaxnm_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vmaxnmq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vmaxnmq_f64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/maxv.c000066400000000000000000000705571400333146700164540ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN maxv #include "test-neon.h" #include "../../../simde/arm/neon/maxv.h" static int test_simde_vmaxv_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t a[2]; simde_float32_t r; } test_vec[] = { { { SIMDE_FLOAT32_C( 498.24), SIMDE_FLOAT32_C( 700.18) }, SIMDE_FLOAT32_C( 700.18) }, { { SIMDE_FLOAT32_C( -550.14), SIMDE_FLOAT32_C( -372.82) }, SIMDE_FLOAT32_C( -372.82) }, { { SIMDE_FLOAT32_C( -184.85), SIMDE_FLOAT32_C( 347.23) }, SIMDE_FLOAT32_C( 347.23) }, { { SIMDE_FLOAT32_C( -183.13), SIMDE_FLOAT32_C( 910.25) }, SIMDE_FLOAT32_C( 910.25) }, { { SIMDE_FLOAT32_C( 995.08), SIMDE_FLOAT32_C( 458.35) }, SIMDE_FLOAT32_C( 995.08) }, { { SIMDE_FLOAT32_C( 954.33), SIMDE_FLOAT32_C( 629.96) }, SIMDE_FLOAT32_C( 954.33) }, { { SIMDE_FLOAT32_C( -93.64), SIMDE_FLOAT32_C( 684.43) }, SIMDE_FLOAT32_C( 684.43) }, { { SIMDE_FLOAT32_C( -76.95), SIMDE_FLOAT32_C( -360.35) }, SIMDE_FLOAT32_C( -76.95) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32_t r = simde_vmaxv_f32(a); simde_assert_equal_f32(r, test_vec[i].r, 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32_t r = simde_vmaxv_f32(a); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_f32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmaxv_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t r; } test_vec[] = { { { -INT8_C( 119), INT8_C( 14), INT8_C( 112), INT8_C( 4), INT8_C( 39), INT8_C( 117), INT8_C( 71), -INT8_C( 89) }, INT8_C( 117) }, { { INT8_C( 81), -INT8_C( 12), -INT8_C( 30), INT8_C( 0), -INT8_C( 49), INT8_C( 49), -INT8_C( 49), INT8_C( 50) }, INT8_C( 81) }, { { -INT8_C( 34), -INT8_C( 73), -INT8_C( 1), -INT8_C( 27), INT8_C( 106), -INT8_C( 54), INT8_C( 19), -INT8_C( 81) }, INT8_C( 106) }, { { INT8_C( 59), -INT8_C( 99), -INT8_C( 13), INT8_C( 90), -INT8_C( 105), -INT8_C( 108), -INT8_C( 64), INT8_C( 32) }, INT8_C( 90) }, { { -INT8_C( 94), INT8_C( 48), INT8_C( 36), -INT8_C( 55), -INT8_C( 91), INT8_C( 108), INT8_C( 112), -INT8_C( 10) }, INT8_C( 112) }, { { INT8_C( 96), INT8_C( 83), -INT8_C( 10), INT8_C( 47), -INT8_C( 124), -INT8_C( 58), INT8_C( 97), INT8_C( 98) }, INT8_C( 98) }, { { INT8_C( 125), INT8_C( 96), INT8_C( 71), -INT8_C( 24), INT8_C( 42), INT8_C( 90), -INT8_C( 105), INT8_C( 101) }, INT8_C( 125) }, { { -INT8_C( 8), -INT8_C( 117), -INT8_C( 65), -INT8_C( 113), INT8_C( 31), INT8_MAX, -INT8_C( 80), -INT8_C( 63) }, INT8_MAX } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); int8_t r = simde_vmaxv_s8(a); simde_assert_equal_i8(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); int8_t r = simde_vmaxv_s8(a); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmaxv_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t r; } test_vec[] = { { { -INT16_C( 11089), INT16_C( 21642), -INT16_C( 1216), -INT16_C( 24501) }, INT16_C( 21642) }, { { INT16_C( 16718), -INT16_C( 11569), INT16_C( 12295), -INT16_C( 31436) }, INT16_C( 16718) }, { { INT16_C( 31888), -INT16_C( 17811), INT16_C( 1238), -INT16_C( 12769) }, INT16_C( 31888) }, { { -INT16_C( 8561), -INT16_C( 20898), INT16_C( 3677), INT16_C( 3183) }, INT16_C( 3677) }, { { -INT16_C( 1310), INT16_C( 9057), -INT16_C( 21259), INT16_C( 17347) }, INT16_C( 17347) }, { { -INT16_C( 27667), -INT16_C( 2795), INT16_C( 18883), INT16_C( 21626) }, INT16_C( 21626) }, { { -INT16_C( 6203), -INT16_C( 25586), INT16_C( 12011), INT16_C( 31594) }, INT16_C( 31594) }, { { -INT16_C( 14324), INT16_C( 27177), -INT16_C( 26154), -INT16_C( 18058) }, INT16_C( 27177) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); int16_t r = simde_vmaxv_s16(a); simde_assert_equal_i16(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); int16_t r = simde_vmaxv_s16(a); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmaxv_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t r; } test_vec[] = { { { -INT32_C( 1998792813), INT32_C( 1909170051) }, INT32_C( 1909170051) }, { { -INT32_C( 161030094), -INT32_C( 280305623) }, -INT32_C( 161030094) }, { { -INT32_C( 1299490617), -INT32_C( 1825704570) }, -INT32_C( 1299490617) }, { { -INT32_C( 1795336258), -INT32_C( 2092076048) }, -INT32_C( 1795336258) }, { { -INT32_C( 838129333), -INT32_C( 79702327) }, -INT32_C( 79702327) }, { { -INT32_C( 537811530), INT32_C( 1288584069) }, INT32_C( 1288584069) }, { { INT32_C( 452942228), INT32_C( 229452879) }, INT32_C( 452942228) }, { { INT32_C( 1939974787), INT32_C( 1777790750) }, INT32_C( 1939974787) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); int32_t r = simde_vmaxv_s32(a); simde_assert_equal_i32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); int32_t r = simde_vmaxv_s32(a); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmaxv_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint8_t r; } test_vec[] = { { { UINT8_C( 24), UINT8_C( 1), UINT8_C( 55), UINT8_C(225), UINT8_C(215), UINT8_C(119), UINT8_C(221), UINT8_C(141) }, UINT8_C(225) }, { { UINT8_C( 28), UINT8_C(206), UINT8_C(109), UINT8_C(162), UINT8_C( 10), UINT8_C( 59), UINT8_C(238), UINT8_C(158) }, UINT8_C(238) }, { { UINT8_C(149), UINT8_C(237), UINT8_C(184), UINT8_C(228), UINT8_C( 26), UINT8_C(102), UINT8_C(241), UINT8_C(157) }, UINT8_C(241) }, { { UINT8_C( 16), UINT8_C(146), UINT8_C( 17), UINT8_C( 46), UINT8_C(129), UINT8_C( 7), UINT8_C(151), UINT8_C(154) }, UINT8_C(154) }, { { UINT8_C( 9), UINT8_C(207), UINT8_C(123), UINT8_C(224), UINT8_C( 70), UINT8_C( 88), UINT8_C(110), UINT8_C( 98) }, UINT8_C(224) }, { { UINT8_C( 39), UINT8_C(219), UINT8_C( 4), UINT8_C( 49), UINT8_C( 22), UINT8_C(243), UINT8_C(207), UINT8_C(171) }, UINT8_C(243) }, { { UINT8_C(224), UINT8_C(135), UINT8_C(143), UINT8_C(250), UINT8_C(237), UINT8_C(128), UINT8_C(152), UINT8_C(254) }, UINT8_C(254) }, { { UINT8_C( 19), UINT8_C(169), UINT8_C( 44), UINT8_C(148), UINT8_C(176), UINT8_C(196), UINT8_C( 46), UINT8_C(185) }, UINT8_C(196) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); uint8_t r = simde_vmaxv_u8(a); simde_assert_equal_u8(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); uint8_t r = simde_vmaxv_u8(a); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmaxv_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t r; } test_vec[] = { { { UINT16_C(43667), UINT16_C(55706), UINT16_C( 2050), UINT16_C(10555) }, UINT16_C(55706) }, { { UINT16_C(16611), UINT16_C(63834), UINT16_C(10547), UINT16_C( 5029) }, UINT16_C(63834) }, { { UINT16_C(13489), UINT16_C(40462), UINT16_C(42677), UINT16_C(51356) }, UINT16_C(51356) }, { { UINT16_C(51535), UINT16_C(65372), UINT16_C(35725), UINT16_C( 8377) }, UINT16_C(65372) }, { { UINT16_C(21301), UINT16_C(14329), UINT16_C(13403), UINT16_C(15969) }, UINT16_C(21301) }, { { UINT16_C(47988), UINT16_C(42807), UINT16_C(56549), UINT16_C(38587) }, UINT16_C(56549) }, { { UINT16_C(51473), UINT16_C(50740), UINT16_C(53615), UINT16_C(48782) }, UINT16_C(53615) }, { { UINT16_C(60058), UINT16_C(10173), UINT16_C(30325), UINT16_C(43591) }, UINT16_C(60058) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); uint16_t r = simde_vmaxv_u16(a); simde_assert_equal_u16(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); uint16_t r = simde_vmaxv_u16(a); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmaxv_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint32_t r; } test_vec[] = { { { UINT32_C( 618807497), UINT32_C(3915531124) }, UINT32_C(3915531124) }, { { UINT32_C(3817904894), UINT32_C(2272873334) }, UINT32_C(3817904894) }, { { UINT32_C(2202906132), UINT32_C( 423746431) }, UINT32_C(2202906132) }, { { UINT32_C( 994115526), UINT32_C(1072072565) }, UINT32_C(1072072565) }, { { UINT32_C( 996395207), UINT32_C( 153404939) }, UINT32_C( 996395207) }, { { UINT32_C(3605902688), UINT32_C( 358508032) }, UINT32_C(3605902688) }, { { UINT32_C(2476256020), UINT32_C(1303173767) }, UINT32_C(2476256020) }, { { UINT32_C(1317596377), UINT32_C( 982347379) }, UINT32_C(1317596377) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); uint32_t r = simde_vmaxv_u32(a); simde_assert_equal_u32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); uint32_t r = simde_vmaxv_u32(a); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmaxvq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t a[4]; simde_float32_t r; } test_vec[] = { { { SIMDE_FLOAT32_C( -913.17), SIMDE_FLOAT32_C( -110.66), SIMDE_FLOAT32_C( 975.06), SIMDE_FLOAT32_C( 216.37) }, SIMDE_FLOAT32_C( 975.06) }, { { SIMDE_FLOAT32_C( 803.46), SIMDE_FLOAT32_C( 420.85), SIMDE_FLOAT32_C( -602.65), SIMDE_FLOAT32_C( -768.35) }, SIMDE_FLOAT32_C( 803.46) }, { { SIMDE_FLOAT32_C( 208.72), SIMDE_FLOAT32_C( 938.55), SIMDE_FLOAT32_C( -166.98), SIMDE_FLOAT32_C( -194.96) }, SIMDE_FLOAT32_C( 938.55) }, { { SIMDE_FLOAT32_C( 658.71), SIMDE_FLOAT32_C( -102.67), SIMDE_FLOAT32_C( -344.04), SIMDE_FLOAT32_C( -91.55) }, SIMDE_FLOAT32_C( 658.71) }, { { SIMDE_FLOAT32_C( -612.74), SIMDE_FLOAT32_C( -313.43), SIMDE_FLOAT32_C( -63.59), SIMDE_FLOAT32_C( 951.39) }, SIMDE_FLOAT32_C( 951.39) }, { { SIMDE_FLOAT32_C( -79.23), SIMDE_FLOAT32_C( 283.94), SIMDE_FLOAT32_C( 381.77), SIMDE_FLOAT32_C( 496.29) }, SIMDE_FLOAT32_C( 496.29) }, { { SIMDE_FLOAT32_C( 588.86), SIMDE_FLOAT32_C( -488.36), SIMDE_FLOAT32_C( 652.93), SIMDE_FLOAT32_C( -868.04) }, SIMDE_FLOAT32_C( 652.93) }, { { SIMDE_FLOAT32_C( -913.85), SIMDE_FLOAT32_C( -606.06), SIMDE_FLOAT32_C( -892.31), SIMDE_FLOAT32_C( -827.02) }, SIMDE_FLOAT32_C( -606.06) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32_t r = simde_vmaxvq_f32(a); simde_assert_equal_f32(r, test_vec[i].r, 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32_t r = simde_vmaxvq_f32(a); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_f32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmaxvq_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64_t a[4]; simde_float64_t r; } test_vec[] = { { { SIMDE_FLOAT64_C( 283.28), SIMDE_FLOAT64_C( -917.25) }, SIMDE_FLOAT64_C( 283.28) }, { { SIMDE_FLOAT64_C( 389.34), SIMDE_FLOAT64_C( 86.75) }, SIMDE_FLOAT64_C( 389.34) }, { { SIMDE_FLOAT64_C( 503.60), SIMDE_FLOAT64_C( 786.70) }, SIMDE_FLOAT64_C( 786.70) }, { { SIMDE_FLOAT64_C( 318.40), SIMDE_FLOAT64_C( -287.68) }, SIMDE_FLOAT64_C( 318.40) }, { { SIMDE_FLOAT64_C( 725.25), SIMDE_FLOAT64_C( -848.58) }, SIMDE_FLOAT64_C( 725.25) }, { { SIMDE_FLOAT64_C( 517.36), SIMDE_FLOAT64_C( 383.96) }, SIMDE_FLOAT64_C( 517.36) }, { { SIMDE_FLOAT64_C( 48.75), SIMDE_FLOAT64_C( -826.68) }, SIMDE_FLOAT64_C( 48.75) }, { { SIMDE_FLOAT64_C( -707.59), SIMDE_FLOAT64_C( 436.01) }, SIMDE_FLOAT64_C( 436.01) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64_t r = simde_vmaxvq_f64(a); simde_assert_equal_f64(r, test_vec[i].r, 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x2_t a = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); simde_float64_t r = simde_vmaxvq_f64(a); simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_f64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmaxvq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t r; } test_vec[] = { { { -INT8_C( 96), INT8_C( 85), -INT8_C( 7), INT8_C( 119), INT8_C( 72), -INT8_C( 60), INT8_C( 40), INT8_C( 39), INT8_C( 24), INT8_C( 39), INT8_C( 122), -INT8_C( 38), -INT8_C( 77), INT8_C( 8), -INT8_C( 46), INT8_C( 49) }, INT8_C( 122) }, { { INT8_C( 11), INT8_C( 12), INT8_C( 101), -INT8_C( 87), -INT8_C( 111), -INT8_C( 79), -INT8_C( 105), INT8_C( 78), -INT8_C( 22), -INT8_C( 44), -INT8_C( 86), INT8_C( 110), INT8_C( 119), -INT8_C( 72), -INT8_C( 22), INT8_C( 23) }, INT8_C( 119) }, { { INT8_C( 13), -INT8_C( 29), -INT8_C( 114), INT8_C( 85), -INT8_C( 88), -INT8_C( 74), INT8_C( 125), -INT8_C( 64), -INT8_C( 35), -INT8_C( 9), -INT8_C( 102), -INT8_C( 111), -INT8_C( 1), INT8_C( 108), -INT8_C( 62), INT8_C( 11) }, INT8_C( 125) }, { { INT8_C( 121), INT8_C( 39), -INT8_C( 76), INT8_C( 10), -INT8_C( 40), INT8_C( 75), INT8_C( 89), -INT8_C( 62), INT8_C( 32), INT8_C( 3), INT8_C( 49), -INT8_C( 105), -INT8_C( 69), INT8_C( 27), -INT8_C( 82), -INT8_C( 55) }, INT8_C( 121) }, { { -INT8_C( 2), INT8_C( 60), INT8_C( 30), -INT8_C( 90), -INT8_C( 13), -INT8_C( 101), INT8_C( 102), -INT8_C( 48), -INT8_C( 109), INT8_C( 0), INT8_C( 97), -INT8_C( 110), INT8_C( 109), INT8_C( 35), -INT8_C( 99), -INT8_C( 26) }, INT8_C( 109) }, { { INT8_C( 75), INT8_C( 82), -INT8_C( 16), INT8_C( 35), -INT8_C( 99), INT8_C( 73), -INT8_C( 26), -INT8_C( 67), INT8_C( 76), INT8_C( 23), INT8_C( 84), INT8_C( 8), INT8_C( 50), INT8_C( 2), -INT8_C( 47), INT8_C( 48) }, INT8_C( 84) }, { { INT8_C( 63), -INT8_C( 17), -INT8_C( 41), INT8_C( 50), -INT8_C( 117), INT8_C( 61), INT8_C( 2), INT8_C( 30), INT8_C( 62), INT8_C( 100), -INT8_C( 80), -INT8_C( 85), -INT8_C( 121), INT8_C( 78), -INT8_C( 111), -INT8_C( 46) }, INT8_C( 100) }, { { -INT8_C( 96), -INT8_C( 127), -INT8_C( 10), INT8_C( 61), -INT8_C( 53), -INT8_C( 36), -INT8_C( 5), INT8_C( 23), -INT8_C( 13), INT8_C( 79), INT8_C( 31), INT8_C( 37), INT8_C( 82), -INT8_C( 16), INT8_C( 85), -INT8_C( 111) }, INT8_C( 85) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); int8_t r = simde_vmaxvq_s8(a); simde_assert_equal_i8(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); int8_t r = simde_vmaxvq_s8(a); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmaxvq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t r; } test_vec[] = { { { INT16_C( 11488), INT16_C( 27587), -INT16_C( 14998), -INT16_C( 22391), INT16_C( 14633), -INT16_C( 20141), -INT16_C( 7033), INT16_C( 10115) }, INT16_C( 27587) }, { { INT16_C( 31077), INT16_C( 12389), INT16_C( 24661), INT16_C( 18504), INT16_C( 26543), INT16_C( 365), -INT16_C( 15528), INT16_C( 14482) }, INT16_C( 31077) }, { { INT16_C( 21999), INT16_C( 22947), INT16_C( 11291), INT16_C( 17409), INT16_C( 21605), -INT16_C( 4619), INT16_C( 31032), -INT16_C( 25068) }, INT16_C( 31032) }, { { INT16_C( 31218), INT16_C( 18638), INT16_C( 5849), -INT16_C( 30320), -INT16_C( 386), -INT16_C( 10614), INT16_C( 7617), -INT16_C( 20466) }, INT16_C( 31218) }, { { -INT16_C( 20110), -INT16_C( 29430), INT16_C( 3037), INT16_C( 17106), -INT16_C( 14496), -INT16_C( 26577), INT16_C( 17472), INT16_C( 13110) }, INT16_C( 17472) }, { { INT16_C( 1469), -INT16_C( 26757), INT16_C( 2843), -INT16_C( 26336), -INT16_C( 22007), -INT16_C( 13713), INT16_C( 32199), INT16_C( 14971) }, INT16_C( 32199) }, { { -INT16_C( 31442), INT16_C( 3015), -INT16_C( 26224), -INT16_C( 4018), INT16_C( 32097), -INT16_C( 24183), -INT16_C( 16447), INT16_C( 32724) }, INT16_C( 32724) }, { { INT16_C( 20420), -INT16_C( 8170), INT16_C( 13915), INT16_C( 25721), -INT16_C( 5664), -INT16_C( 22481), -INT16_C( 21914), -INT16_C( 27166) }, INT16_C( 25721) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); int16_t r = simde_vmaxvq_s16(a); simde_assert_equal_i16(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); int16_t r = simde_vmaxvq_s16(a); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmaxvq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t r; } test_vec[] = { { { -INT32_C( 1079989969), -INT32_C( 1531908541), INT32_C( 759511404), -INT32_C( 1112794376) }, INT32_C( 759511404) }, { { -INT32_C( 996294039), -INT32_C( 651618568), INT32_C( 1719752959), INT32_C( 838558466) }, INT32_C( 1719752959) }, { { INT32_C( 1341168396), -INT32_C( 151805814), -INT32_C( 769443367), -INT32_C( 1131425709) }, INT32_C( 1341168396) }, { { -INT32_C( 1954468718), INT32_C( 1113893442), INT32_C( 78177538), INT32_C( 1412801352) }, INT32_C( 1412801352) }, { { -INT32_C( 928766658), -INT32_C( 1614899258), INT32_C( 594666192), INT32_C( 1155530930) }, INT32_C( 1155530930) }, { { INT32_C( 1875861804), INT32_C( 229716747), INT32_C( 1611749656), INT32_C( 984958716) }, INT32_C( 1875861804) }, { { INT32_C( 822303083), -INT32_C( 1043217936), INT32_C( 1441022627), INT32_C( 1872413763) }, INT32_C( 1872413763) }, { { INT32_C( 819882277), -INT32_C( 1254256739), -INT32_C( 468300056), INT32_C( 2083732) }, INT32_C( 819882277) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); int32_t r = simde_vmaxvq_s32(a); simde_assert_equal_i32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); int32_t r = simde_vmaxvq_s32(a); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmaxvq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; uint8_t r; } test_vec[] = { { { UINT8_C( 36), UINT8_C( 34), UINT8_C( 49), UINT8_C( 20), UINT8_C(227), UINT8_C( 2), UINT8_C(213), UINT8_C(135), UINT8_C( 69), UINT8_C(186), UINT8_C(220), UINT8_C(136), UINT8_C(126), UINT8_C(118), UINT8_C(247), UINT8_C(164) }, UINT8_C(247) }, { { UINT8_C(224), UINT8_C(214), UINT8_C(212), UINT8_C(125), UINT8_C(101), UINT8_C( 18), UINT8_C( 50), UINT8_C( 78), UINT8_C( 96), UINT8_C( 72), UINT8_C( 50), UINT8_C(245), UINT8_C( 19), UINT8_C( 81), UINT8_C(245), UINT8_C( 55) }, UINT8_C(245) }, { { UINT8_C(115), UINT8_C( 38), UINT8_C( 76), UINT8_C( 87), UINT8_C( 41), UINT8_C( 33), UINT8_C(222), UINT8_C(110), UINT8_C(219), UINT8_C(186), UINT8_C(246), UINT8_C( 90), UINT8_C( 49), UINT8_C(237), UINT8_C(254), UINT8_C( 17) }, UINT8_C(254) }, { { UINT8_C(195), UINT8_C(210), UINT8_C(142), UINT8_C( 41), UINT8_C(228), UINT8_C(192), UINT8_C(119), UINT8_C( 69), UINT8_C( 9), UINT8_C(169), UINT8_C( 58), UINT8_C( 28), UINT8_C(251), UINT8_C( 47), UINT8_C( 84), UINT8_C(110) }, UINT8_C(251) }, { { UINT8_C( 85), UINT8_C(160), UINT8_C(197), UINT8_C(126), UINT8_C(193), UINT8_C(163), UINT8_C(236), UINT8_C(157), UINT8_C( 94), UINT8_C(226), UINT8_C(247), UINT8_C(143), UINT8_C(208), UINT8_C(245), UINT8_C(160), UINT8_C(147) }, UINT8_C(247) }, { { UINT8_C(199), UINT8_C( 46), UINT8_C(188), UINT8_C(172), UINT8_C(238), UINT8_C( 51), UINT8_C(241), UINT8_C(247), UINT8_C(221), UINT8_C( 43), UINT8_C( 20), UINT8_C(216), UINT8_C( 90), UINT8_C(104), UINT8_C( 70), UINT8_C(175) }, UINT8_C(247) }, { { UINT8_C( 8), UINT8_C( 12), UINT8_C( 46), UINT8_C(201), UINT8_C(175), UINT8_C( 26), UINT8_C(102), UINT8_C( 13), UINT8_C(253), UINT8_C( 93), UINT8_C(156), UINT8_C(205), UINT8_C( 82), UINT8_C( 60), UINT8_C( 96), UINT8_C( 26) }, UINT8_C(253) }, { { UINT8_C(106), UINT8_C( 29), UINT8_C(198), UINT8_C( 89), UINT8_C( 80), UINT8_C(183), UINT8_C( 80), UINT8_C( 45), UINT8_C(226), UINT8_C(100), UINT8_C( 5), UINT8_C( 60), UINT8_C(204), UINT8_C( 76), UINT8_C(235), UINT8_C(212) }, UINT8_C(235) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); uint8_t r = simde_vmaxvq_u8(a); simde_assert_equal_u8(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); uint8_t r = simde_vmaxvq_u8(a); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmaxvq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t r; } test_vec[] = { { { UINT16_C( 6488), UINT16_C( 1950), UINT16_C( 1076), UINT16_C(12565), UINT16_C(45410), UINT16_C(46334), UINT16_C(24302), UINT16_C(22734) }, UINT16_C(46334) }, { { UINT16_C(38011), UINT16_C(52401), UINT16_C( 587), UINT16_C(11769), UINT16_C(65382), UINT16_C(13161), UINT16_C(21835), UINT16_C(41735) }, UINT16_C(65382) }, { { UINT16_C(42350), UINT16_C(41642), UINT16_C(49066), UINT16_C( 3283), UINT16_C(53617), UINT16_C(24512), UINT16_C(36656), UINT16_C(43959) }, UINT16_C(53617) }, { { UINT16_C(26915), UINT16_C(28535), UINT16_C(29035), UINT16_C(53660), UINT16_C( 1648), UINT16_C(47876), UINT16_C( 3163), UINT16_C(51550) }, UINT16_C(53660) }, { { UINT16_C( 2225), UINT16_C(23404), UINT16_C(16328), UINT16_C(14695), UINT16_C(10257), UINT16_C(16792), UINT16_C(20407), UINT16_C(56044) }, UINT16_C(56044) }, { { UINT16_C(25784), UINT16_C( 9033), UINT16_C(59093), UINT16_C(17909), UINT16_C(63980), UINT16_C(18176), UINT16_C(24069), UINT16_C(46864) }, UINT16_C(63980) }, { { UINT16_C(31846), UINT16_C(11794), UINT16_C(31420), UINT16_C(52583), UINT16_C(65442), UINT16_C(22798), UINT16_C(64079), UINT16_C( 1843) }, UINT16_C(65442) }, { { UINT16_C(32094), UINT16_C(13099), UINT16_C( 8291), UINT16_C(20344), UINT16_C(30745), UINT16_C( 8086), UINT16_C(42710), UINT16_C(15830) }, UINT16_C(42710) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); uint16_t r = simde_vmaxvq_u16(a); simde_assert_equal_u16(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); uint16_t r = simde_vmaxvq_u16(a); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmaxvq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint32_t r; } test_vec[] = { { { UINT32_C(3748390947), UINT32_C( 78435170), UINT32_C( 559790802), UINT32_C( 321491380) }, UINT32_C(3748390947) }, { { UINT32_C(1900434446), UINT32_C(2378219380), UINT32_C( 246175287), UINT32_C( 525042428) }, UINT32_C(2378219380) }, { { UINT32_C(3456022123), UINT32_C(1557310089), UINT32_C( 427634532), UINT32_C(3459032768) }, UINT32_C(3459032768) }, { { UINT32_C(1849651962), UINT32_C(1778188081), UINT32_C(1383573589), UINT32_C(2524037675) }, UINT32_C(2524037675) }, { { UINT32_C( 40071288), UINT32_C(2136880410), UINT32_C( 630774629), UINT32_C(2096415874) }, UINT32_C(2136880410) }, { { UINT32_C(1760244534), UINT32_C(2295457587), UINT32_C(3134867599), UINT32_C(2186300426) }, UINT32_C(3134867599) }, { { UINT32_C(3599021244), UINT32_C(1314251497), UINT32_C(1081404862), UINT32_C(3904661681) }, UINT32_C(3904661681) }, { { UINT32_C(3461392283), UINT32_C( 509026702), UINT32_C(1943548265), UINT32_C( 972368253) }, UINT32_C(3461392283) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); uint32_t r = simde_vmaxvq_u32(a); simde_assert_equal_u32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); uint32_t r = simde_vmaxvq_u32(a); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vmaxv_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vmaxv_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vmaxv_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmaxv_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmaxv_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vmaxv_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmaxv_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vmaxvq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vmaxvq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vmaxvq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vmaxvq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmaxvq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmaxvq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vmaxvq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmaxvq_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/meson.build000066400000000000000000000021651400333146700174650ustar00rootroot00000000000000simde_test_arm_neon_sources = [] foreach name : simde_neon_families if (name != 'ld1') and (name != 'ld3') and (name != 'ld4') and (name != 'st1') foreach lang : ['c', 'cpp'] source_file = name + '.c' if lang == 'cpp' source_file = configure_file(input: name + '.c', output: name + '.cpp', copy: true) endif simde_test_arm_neon_sources += source_file foreach emul : ['emul', 'native'] extra_flags = ['-DSIMDE_TEST_BARE'] if emul == 'emul' extra_flags += '-DSIMDE_NO_NATIVE' endif x = executable(name + '-' + emul + '-' + lang, source_file, c_args: simde_c_args + simde_c_defs + simde_native_c_flags + extra_flags, cpp_args: simde_c_args + simde_c_defs + simde_native_c_flags + extra_flags, include_directories: simde_include_dir, dependencies: simde_deps) test('arm/neon/' + name + '/' + emul + '/' + lang, x, protocol: 'tap', # Emscripten tests must be run from builddir workdir: meson.current_build_dir()) endforeach endforeach endif endforeach simde-0.7.2/test/arm/neon/min.c000066400000000000000000001745351400333146700162650ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN min #include "test-neon.h" #include "../../../simde/arm/neon/min.h" static int test_simde_vmin_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[2]; simde_float32 b[2]; simde_float32 r[2]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -974.49) }, { SIMDE_FLOAT32_C( -353.04), SIMDE_MATH_NANF }, { SIMDE_MATH_NANF, SIMDE_MATH_NANF } }, { { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -0.00) }, { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 0.00) }, { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -0.00) } }, #endif { { SIMDE_FLOAT32_C( 870.11), SIMDE_FLOAT32_C( -974.49) }, { SIMDE_FLOAT32_C( -353.04), SIMDE_FLOAT32_C( 662.92) }, { SIMDE_FLOAT32_C( -353.04), SIMDE_FLOAT32_C( -974.49) } }, { { SIMDE_FLOAT32_C( 946.40), SIMDE_FLOAT32_C( -326.23) }, { SIMDE_FLOAT32_C( 849.15), SIMDE_FLOAT32_C( 339.51) }, { SIMDE_FLOAT32_C( 849.15), SIMDE_FLOAT32_C( -326.23) } }, { { SIMDE_FLOAT32_C( 856.22), SIMDE_FLOAT32_C( -534.53) }, { SIMDE_FLOAT32_C( -798.27), SIMDE_FLOAT32_C( -412.33) }, { SIMDE_FLOAT32_C( -798.27), SIMDE_FLOAT32_C( -534.53) } }, { { SIMDE_FLOAT32_C( -329.92), SIMDE_FLOAT32_C( -939.01) }, { SIMDE_FLOAT32_C( -706.34), SIMDE_FLOAT32_C( -531.75) }, { SIMDE_FLOAT32_C( -706.34), SIMDE_FLOAT32_C( -939.01) } }, { { SIMDE_FLOAT32_C( 284.60), SIMDE_FLOAT32_C( -551.94) }, { SIMDE_FLOAT32_C( 834.19), SIMDE_FLOAT32_C( 83.26) }, { SIMDE_FLOAT32_C( 284.60), SIMDE_FLOAT32_C( -551.94) } }, { { SIMDE_FLOAT32_C( -426.40), SIMDE_FLOAT32_C( -458.52) }, { SIMDE_FLOAT32_C( -893.69), SIMDE_FLOAT32_C( 62.76) }, { SIMDE_FLOAT32_C( -893.69), SIMDE_FLOAT32_C( -458.52) } }, { { SIMDE_FLOAT32_C( -401.07), SIMDE_FLOAT32_C( 123.39) }, { SIMDE_FLOAT32_C( 83.17), SIMDE_FLOAT32_C( -66.65) }, { SIMDE_FLOAT32_C( -401.07), SIMDE_FLOAT32_C( -66.65) } }, { { SIMDE_FLOAT32_C( -936.03), SIMDE_FLOAT32_C( 382.28) }, { SIMDE_FLOAT32_C( 186.03), SIMDE_FLOAT32_C( 934.09) }, { SIMDE_FLOAT32_C( -936.03), SIMDE_FLOAT32_C( 382.28) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a, b, r; a = simde_vld1_f32(test_vec[i].a); b = simde_vld1_f32(test_vec[i].b); r = simde_vmin_f32(a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vmin_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[1]; simde_float64 b[1]; simde_float64 r[1]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NAN }, { SIMDE_FLOAT64_C( -211.23) }, { SIMDE_MATH_NAN } }, { { SIMDE_FLOAT64_C( 680.38) }, { SIMDE_MATH_NAN }, { SIMDE_MATH_NAN } }, { { SIMDE_MATH_NAN }, { SIMDE_MATH_NAN }, { SIMDE_MATH_NAN } }, #endif { { SIMDE_FLOAT64_C( 680.38) }, { SIMDE_FLOAT64_C( -211.23) }, { SIMDE_FLOAT64_C( -211.23) } }, { { SIMDE_FLOAT64_C( 566.20) }, { SIMDE_FLOAT64_C( 596.88) }, { SIMDE_FLOAT64_C( 566.20) } }, { { SIMDE_FLOAT64_C( 823.29) }, { SIMDE_FLOAT64_C( -604.90) }, { SIMDE_FLOAT64_C( -604.90) } }, { { SIMDE_FLOAT64_C( -329.55) }, { SIMDE_FLOAT64_C( 536.46) }, { SIMDE_FLOAT64_C( -329.55) } }, { { SIMDE_FLOAT64_C( -444.45) }, { SIMDE_FLOAT64_C( 107.94) }, { SIMDE_FLOAT64_C( -444.45) } }, { { SIMDE_FLOAT64_C( -45.21) }, { SIMDE_FLOAT64_C( 257.74) }, { SIMDE_FLOAT64_C( -45.21) } }, { { SIMDE_FLOAT64_C( -270.43) }, { SIMDE_FLOAT64_C( 26.80) }, { SIMDE_FLOAT64_C( -270.43) } }, { { SIMDE_FLOAT64_C( 904.46) }, { SIMDE_FLOAT64_C( 832.39) }, { SIMDE_FLOAT64_C( 832.39) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_t b = simde_vld1_f64(test_vec[i].b); simde_float64x1_t r = simde_vmin_f64(a, b); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; } static int test_simde_vmin_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int8_t b[8]; int8_t r[8]; } test_vec[] = { { { INT8_C( 103), -INT8_C( 58), INT8_C( 105), INT8_C( 115), INT8_C( 81), -INT8_C( 1), INT8_C( 74), -INT8_C( 20) }, { INT8_C( 41), -INT8_C( 51), -INT8_C( 70), -INT8_C( 85), -INT8_C( 14), -INT8_C( 5), -INT8_C( 29), INT8_C( 70) }, { INT8_C( 41), -INT8_C( 58), -INT8_C( 70), -INT8_C( 85), -INT8_C( 14), -INT8_C( 5), -INT8_C( 29), -INT8_C( 20) } }, { { INT8_C( 124), -INT8_C( 62), INT8_C( 84), -INT8_C( 8), INT8_C( 27), -INT8_C( 24), -INT8_C( 25), -INT8_C( 115) }, { INT8_C( 118), INT8_C( 90), INT8_C( 46), INT8_C( 99), INT8_C( 51), -INT8_C( 97), -INT8_C( 55), -INT8_C( 102) }, { INT8_C( 118), -INT8_C( 62), INT8_C( 46), -INT8_C( 8), INT8_C( 27), -INT8_C( 97), -INT8_C( 55), -INT8_C( 115) } }, { { INT8_C( 102), INT8_C( 50), INT8_C( 13), -INT8_C( 73), INT8_C( 49), INT8_C( 88), -INT8_C( 93), INT8_C( 90) }, { INT8_C( 37), INT8_C( 93), INT8_C( 5), INT8_C( 23), INT8_C( 88), -INT8_C( 23), INT8_C( 94), -INT8_C( 44) }, { INT8_C( 37), INT8_C( 50), INT8_C( 5), -INT8_C( 73), INT8_C( 49), -INT8_C( 23), -INT8_C( 93), -INT8_C( 44) } }, { { -INT8_C( 85), -INT8_C( 78), -INT8_C( 51), -INT8_C( 58), -INT8_C( 101), -INT8_C( 76), INT8_C( 84), INT8_C( 17) }, { INT8_C( 14), -INT8_C( 126), INT8_C( 116), INT8_C( 65), INT8_C( 33), INT8_C( 61), -INT8_C( 36), -INT8_C( 121) }, { -INT8_C( 85), -INT8_C( 126), -INT8_C( 51), -INT8_C( 58), -INT8_C( 101), -INT8_C( 76), -INT8_C( 36), -INT8_C( 121) } }, { { INT8_C( 112), -INT8_C( 23), INT8_C( 62), -INT8_C( 95), INT8_C( 65), -INT8_C( 31), -INT8_C( 4), INT8_C( 103) }, { INT8_C( 62), INT8_C( 1), INT8_C( 126), -INT8_C( 105), -INT8_C( 22), -INT8_C( 36), INT8_C( 107), -INT8_C( 106) }, { INT8_C( 62), -INT8_C( 23), INT8_C( 62), -INT8_C( 105), -INT8_C( 22), -INT8_C( 36), -INT8_C( 4), -INT8_C( 106) } }, { { -INT8_C( 113), INT8_C( 56), INT8_C( 92), INT8_C( 42), -INT8_C( 20), -INT8_C( 80), INT8_C( 59), -INT8_C( 5) }, { INT8_C( 50), -INT8_C( 81), INT8_C( 60), INT8_C( 84), -INT8_C( 20), INT8_C( 24), -INT8_C( 37), INT8_C( 92) }, { -INT8_C( 113), -INT8_C( 81), INT8_C( 60), INT8_C( 42), -INT8_C( 20), -INT8_C( 80), -INT8_C( 37), -INT8_C( 5) } }, { { INT8_C( 2), INT8_C( 26), -INT8_C( 2), INT8_C( 67), -INT8_C( 5), -INT8_C( 6), -INT8_C( 86), INT8_C( 58) }, { -INT8_C( 5), INT8_C( 41), -INT8_C( 47), -INT8_C( 26), INT8_C( 5), INT8_C( 60), INT8_C( 124), -INT8_C( 108) }, { -INT8_C( 5), INT8_C( 26), -INT8_C( 47), -INT8_C( 26), -INT8_C( 5), -INT8_C( 6), -INT8_C( 86), -INT8_C( 108) } }, { { INT8_C( 117), -INT8_C( 40), -INT8_C( 66), INT8_C( 97), -INT8_C( 119), -INT8_C( 7), INT8_C( 92), -INT8_C( 69) }, { -INT8_C( 88), -INT8_C( 103), INT8_C( 15), -INT8_C( 107), -INT8_C( 79), -INT8_C( 21), -INT8_C( 15), -INT8_C( 77) }, { -INT8_C( 88), -INT8_C( 103), -INT8_C( 66), -INT8_C( 107), -INT8_C( 119), -INT8_C( 21), -INT8_C( 15), -INT8_C( 77) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vmin_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; } static int test_simde_vmin_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { { { -INT16_C( 14745), INT16_C( 29545), -INT16_C( 175), -INT16_C( 5046) }, { -INT16_C( 13015), -INT16_C( 21574), -INT16_C( 1038), INT16_C( 18147) }, { -INT16_C( 14745), -INT16_C( 21574), -INT16_C( 1038), -INT16_C( 5046) } }, { { -INT16_C( 15748), -INT16_C( 1964), -INT16_C( 6117), -INT16_C( 29209) }, { INT16_C( 23158), INT16_C( 25390), -INT16_C( 24781), -INT16_C( 25911) }, { -INT16_C( 15748), -INT16_C( 1964), -INT16_C( 24781), -INT16_C( 29209) } }, { { INT16_C( 12902), -INT16_C( 18675), INT16_C( 22577), INT16_C( 23203) }, { INT16_C( 23845), INT16_C( 5893), -INT16_C( 5800), -INT16_C( 11170) }, { INT16_C( 12902), -INT16_C( 18675), -INT16_C( 5800), -INT16_C( 11170) } }, { { -INT16_C( 19797), -INT16_C( 14643), -INT16_C( 19301), INT16_C( 4436) }, { -INT16_C( 32242), INT16_C( 16756), INT16_C( 15649), -INT16_C( 30756) }, { -INT16_C( 32242), -INT16_C( 14643), -INT16_C( 19301), -INT16_C( 30756) } }, { { -INT16_C( 5776), -INT16_C( 24258), -INT16_C( 7871), INT16_C( 26620) }, { INT16_C( 318), -INT16_C( 26754), -INT16_C( 8982), -INT16_C( 27029) }, { -INT16_C( 5776), -INT16_C( 26754), -INT16_C( 8982), -INT16_C( 27029) } }, { { INT16_C( 14479), INT16_C( 10844), -INT16_C( 20244), -INT16_C( 1221) }, { -INT16_C( 20686), INT16_C( 21564), INT16_C( 6380), INT16_C( 23771) }, { -INT16_C( 20686), INT16_C( 10844), -INT16_C( 20244), -INT16_C( 1221) } }, { { INT16_C( 6658), INT16_C( 17406), -INT16_C( 1285), INT16_C( 15018) }, { INT16_C( 10747), -INT16_C( 6447), INT16_C( 15365), -INT16_C( 27524) }, { INT16_C( 6658), -INT16_C( 6447), -INT16_C( 1285), -INT16_C( 27524) } }, { { -INT16_C( 10123), INT16_C( 25022), -INT16_C( 1655), -INT16_C( 17572) }, { -INT16_C( 26200), -INT16_C( 27377), -INT16_C( 5199), -INT16_C( 19471) }, { -INT16_C( 26200), -INT16_C( 27377), -INT16_C( 5199), -INT16_C( 19471) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_vmin_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; } static int test_simde_vmin_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { { { INT32_C( 1936311911), -INT32_C( 330629295) }, { -INT32_C( 1413821143), INT32_C( 1189346290) }, { -INT32_C( 1413821143), -INT32_C( 330629295) } }, { { -INT32_C( 128662916), -INT32_C( 1914181605) }, { INT32_C( 1663982198), -INT32_C( 1698062541) }, { -INT32_C( 128662916), -INT32_C( 1914181605) } }, { { -INT32_C( 1223871898), INT32_C( 1520654385) }, { INT32_C( 386227493), -INT32_C( 731977384) }, { -INT32_C( 1223871898), -INT32_C( 731977384) } }, { { -INT32_C( 959597909), INT32_C( 290763931) }, { INT32_C( 1098154510), -INT32_C( 2015609567) }, { -INT32_C( 959597909), -INT32_C( 2015609567) } }, { { -INT32_C( 1589712528), INT32_C( 1744625985) }, { -INT32_C( 1753349826), -INT32_C( 1771315990) }, { -INT32_C( 1753349826), -INT32_C( 1771315990) } }, { { INT32_C( 710686863), -INT32_C( 79974164) }, { INT32_C( 1413263154), INT32_C( 1557862636) }, { INT32_C( 710686863), -INT32_C( 79974164) } }, { { INT32_C( 1140726274), INT32_C( 984283899) }, { -INT32_C( 422499845), -INT32_C( 1803797499) }, { -INT32_C( 422499845), -INT32_C( 1803797499) } }, { { INT32_C( 1639897205), -INT32_C( 1151534711) }, { -INT32_C( 1794139736), -INT32_C( 1275991119) }, { -INT32_C( 1794139736), -INT32_C( 1275991119) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_vmin_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; } static int test_simde_x_vmin_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[1]; int64_t b[1]; int64_t r[1]; } test_vec[] = { { { -INT64_C( 6605621020118094869) }, { -INT64_C( 4337702413104860846) }, { -INT64_C( 6605621020118094869) } }, { { -INT64_C( 7075196481814053712) }, { -INT64_C( 8208340683569924561) }, { -INT64_C( 8208340683569924561) } }, { { -INT64_C( 8693343429876967672) }, { -INT64_C( 782921250267405381) }, { -INT64_C( 8693343429876967672) } }, { { -INT64_C( 8239917855590612034) }, { INT64_C( 4077433520486421092) }, { -INT64_C( 8239917855590612034) } }, { { INT64_C( 1209298229987855115) }, { -INT64_C( 4870482474956784339) }, { -INT64_C( 4870482474956784339) } }, { { -INT64_C( 2738354095260399852) }, { -INT64_C( 2530047922748203653) }, { -INT64_C( 2738354095260399852) } }, { { INT64_C( 5343211016226198209) }, { INT64_C( 4259883144317867363) }, { INT64_C( 4259883144317867363) } }, { { -INT64_C( 4108218221852761427) }, { INT64_C( 1384668123137957002) }, { -INT64_C( 4108218221852761427) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_int64x1_t r = simde_x_vmin_s64(a, b); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_t a = simde_test_arm_neon_random_i64x1(); simde_int64x1_t b = simde_test_arm_neon_random_i64x1(); simde_int64x1_t r = simde_x_vmin_s64(a, b); simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmin_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(103), UINT8_C(198), UINT8_C(105), UINT8_C(115), UINT8_C( 81), UINT8_MAX, UINT8_C( 74), UINT8_C(236) }, { UINT8_C( 41), UINT8_C(205), UINT8_C(186), UINT8_C(171), UINT8_C(242), UINT8_C(251), UINT8_C(227), UINT8_C( 70) }, { UINT8_C( 41), UINT8_C(198), UINT8_C(105), UINT8_C(115), UINT8_C( 81), UINT8_C(251), UINT8_C( 74), UINT8_C( 70) } }, { { UINT8_C(124), UINT8_C(194), UINT8_C( 84), UINT8_C(248), UINT8_C( 27), UINT8_C(232), UINT8_C(231), UINT8_C(141) }, { UINT8_C(118), UINT8_C( 90), UINT8_C( 46), UINT8_C( 99), UINT8_C( 51), UINT8_C(159), UINT8_C(201), UINT8_C(154) }, { UINT8_C(118), UINT8_C( 90), UINT8_C( 46), UINT8_C( 99), UINT8_C( 27), UINT8_C(159), UINT8_C(201), UINT8_C(141) } }, { { UINT8_C(102), UINT8_C( 50), UINT8_C( 13), UINT8_C(183), UINT8_C( 49), UINT8_C( 88), UINT8_C(163), UINT8_C( 90) }, { UINT8_C( 37), UINT8_C( 93), UINT8_C( 5), UINT8_C( 23), UINT8_C( 88), UINT8_C(233), UINT8_C( 94), UINT8_C(212) }, { UINT8_C( 37), UINT8_C( 50), UINT8_C( 5), UINT8_C( 23), UINT8_C( 49), UINT8_C( 88), UINT8_C( 94), UINT8_C( 90) } }, { { UINT8_C(171), UINT8_C(178), UINT8_C(205), UINT8_C(198), UINT8_C(155), UINT8_C(180), UINT8_C( 84), UINT8_C( 17) }, { UINT8_C( 14), UINT8_C(130), UINT8_C(116), UINT8_C( 65), UINT8_C( 33), UINT8_C( 61), UINT8_C(220), UINT8_C(135) }, { UINT8_C( 14), UINT8_C(130), UINT8_C(116), UINT8_C( 65), UINT8_C( 33), UINT8_C( 61), UINT8_C( 84), UINT8_C( 17) } }, { { UINT8_C(112), UINT8_C(233), UINT8_C( 62), UINT8_C(161), UINT8_C( 65), UINT8_C(225), UINT8_C(252), UINT8_C(103) }, { UINT8_C( 62), UINT8_C( 1), UINT8_C(126), UINT8_C(151), UINT8_C(234), UINT8_C(220), UINT8_C(107), UINT8_C(150) }, { UINT8_C( 62), UINT8_C( 1), UINT8_C( 62), UINT8_C(151), UINT8_C( 65), UINT8_C(220), UINT8_C(107), UINT8_C(103) } }, { { UINT8_C(143), UINT8_C( 56), UINT8_C( 92), UINT8_C( 42), UINT8_C(236), UINT8_C(176), UINT8_C( 59), UINT8_C(251) }, { UINT8_C( 50), UINT8_C(175), UINT8_C( 60), UINT8_C( 84), UINT8_C(236), UINT8_C( 24), UINT8_C(219), UINT8_C( 92) }, { UINT8_C( 50), UINT8_C( 56), UINT8_C( 60), UINT8_C( 42), UINT8_C(236), UINT8_C( 24), UINT8_C( 59), UINT8_C( 92) } }, { { UINT8_C( 2), UINT8_C( 26), UINT8_C(254), UINT8_C( 67), UINT8_C(251), UINT8_C(250), UINT8_C(170), UINT8_C( 58) }, { UINT8_C(251), UINT8_C( 41), UINT8_C(209), UINT8_C(230), UINT8_C( 5), UINT8_C( 60), UINT8_C(124), UINT8_C(148) }, { UINT8_C( 2), UINT8_C( 26), UINT8_C(209), UINT8_C( 67), UINT8_C( 5), UINT8_C( 60), UINT8_C(124), UINT8_C( 58) } }, { { UINT8_C(117), UINT8_C(216), UINT8_C(190), UINT8_C( 97), UINT8_C(137), UINT8_C(249), UINT8_C( 92), UINT8_C(187) }, { UINT8_C(168), UINT8_C(153), UINT8_C( 15), UINT8_C(149), UINT8_C(177), UINT8_C(235), UINT8_C(241), UINT8_C(179) }, { UINT8_C(117), UINT8_C(153), UINT8_C( 15), UINT8_C( 97), UINT8_C(137), UINT8_C(235), UINT8_C( 92), UINT8_C(179) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vmin_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; } static int test_simde_vmin_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(50791), UINT16_C(29545), UINT16_C(65361), UINT16_C(60490) }, { UINT16_C(52521), UINT16_C(43962), UINT16_C(64498), UINT16_C(18147) }, { UINT16_C(50791), UINT16_C(29545), UINT16_C(64498), UINT16_C(18147) } }, { { UINT16_C(49788), UINT16_C(63572), UINT16_C(59419), UINT16_C(36327) }, { UINT16_C(23158), UINT16_C(25390), UINT16_C(40755), UINT16_C(39625) }, { UINT16_C(23158), UINT16_C(25390), UINT16_C(40755), UINT16_C(36327) } }, { { UINT16_C(12902), UINT16_C(46861), UINT16_C(22577), UINT16_C(23203) }, { UINT16_C(23845), UINT16_C( 5893), UINT16_C(59736), UINT16_C(54366) }, { UINT16_C(12902), UINT16_C( 5893), UINT16_C(22577), UINT16_C(23203) } }, { { UINT16_C(45739), UINT16_C(50893), UINT16_C(46235), UINT16_C( 4436) }, { UINT16_C(33294), UINT16_C(16756), UINT16_C(15649), UINT16_C(34780) }, { UINT16_C(33294), UINT16_C(16756), UINT16_C(15649), UINT16_C( 4436) } }, { { UINT16_C(59760), UINT16_C(41278), UINT16_C(57665), UINT16_C(26620) }, { UINT16_C( 318), UINT16_C(38782), UINT16_C(56554), UINT16_C(38507) }, { UINT16_C( 318), UINT16_C(38782), UINT16_C(56554), UINT16_C(26620) } }, { { UINT16_C(14479), UINT16_C(10844), UINT16_C(45292), UINT16_C(64315) }, { UINT16_C(44850), UINT16_C(21564), UINT16_C( 6380), UINT16_C(23771) }, { UINT16_C(14479), UINT16_C(10844), UINT16_C( 6380), UINT16_C(23771) } }, { { UINT16_C( 6658), UINT16_C(17406), UINT16_C(64251), UINT16_C(15018) }, { UINT16_C(10747), UINT16_C(59089), UINT16_C(15365), UINT16_C(38012) }, { UINT16_C( 6658), UINT16_C(17406), UINT16_C(15365), UINT16_C(15018) } }, { { UINT16_C(55413), UINT16_C(25022), UINT16_C(63881), UINT16_C(47964) }, { UINT16_C(39336), UINT16_C(38159), UINT16_C(60337), UINT16_C(46065) }, { UINT16_C(39336), UINT16_C(25022), UINT16_C(60337), UINT16_C(46065) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vmin_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; } static int test_simde_vmin_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C(1936311911), UINT32_C(3964338001) }, { UINT32_C(2881146153), UINT32_C(1189346290) }, { UINT32_C(1936311911), UINT32_C(1189346290) } }, { { UINT32_C(4166304380), UINT32_C(2380785691) }, { UINT32_C(1663982198), UINT32_C(2596904755) }, { UINT32_C(1663982198), UINT32_C(2380785691) } }, { { UINT32_C(3071095398), UINT32_C(1520654385) }, { UINT32_C( 386227493), UINT32_C(3562989912) }, { UINT32_C( 386227493), UINT32_C(1520654385) } }, { { UINT32_C(3335369387), UINT32_C( 290763931) }, { UINT32_C(1098154510), UINT32_C(2279357729) }, { UINT32_C(1098154510), UINT32_C( 290763931) } }, { { UINT32_C(2705254768), UINT32_C(1744625985) }, { UINT32_C(2541617470), UINT32_C(2523651306) }, { UINT32_C(2541617470), UINT32_C(1744625985) } }, { { UINT32_C( 710686863), UINT32_C(4214993132) }, { UINT32_C(1413263154), UINT32_C(1557862636) }, { UINT32_C( 710686863), UINT32_C(1557862636) } }, { { UINT32_C(1140726274), UINT32_C( 984283899) }, { UINT32_C(3872467451), UINT32_C(2491169797) }, { UINT32_C(1140726274), UINT32_C( 984283899) } }, { { UINT32_C(1639897205), UINT32_C(3143432585) }, { UINT32_C(2500827560), UINT32_C(3018976177) }, { UINT32_C(1639897205), UINT32_C(3018976177) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vmin_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; } static int test_simde_x_vmin_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[1]; uint64_t b[1]; uint64_t r[1]; } test_vec[] = { { { UINT64_C(14596859467346210377) }, { UINT64_C( 8490333782996743303) }, { UINT64_C( 8490333782996743303) } }, { { UINT64_C(14226021192279983154) }, { UINT64_C( 1273070398529584717) }, { UINT64_C( 1273070398529584717) } }, { { UINT64_C( 2300142690520277448) }, { UINT64_C(15762734658533978035) }, { UINT64_C( 2300142690520277448) } }, { { UINT64_C( 1020941357489899391) }, { UINT64_C( 4086179317755451099) }, { UINT64_C( 1020941357489899391) } }, { { UINT64_C( 6043344306685272359) }, { UINT64_C( 9275266790380700981) }, { UINT64_C( 6043344306685272359) } }, { { UINT64_C( 6914450965909563429) }, { UINT64_C( 9245811248480866982) }, { UINT64_C( 6914450965909563429) } }, { { UINT64_C( 2561736672060352921) }, { UINT64_C(12620134581618682786) }, { UINT64_C( 2561736672060352921) } }, { { UINT64_C(13764560778396305441) }, { UINT64_C( 6464395822620870877) }, { UINT64_C( 6464395822620870877) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_t b = simde_vld1_u64(test_vec[i].b); simde_uint64x1_t r = simde_x_vmin_u64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x1_t a = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t b = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t r = simde_x_vmin_u64(a, b); simde_test_arm_neon_write_u64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vminq_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; simde_float32 b[4]; simde_float32 r[4]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_FLOAT32_C( 680.38), SIMDE_MATH_NANF, SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 0.00) }, { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -604.90), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -0.00) }, { SIMDE_MATH_NANF, SIMDE_MATH_NANF, SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -0.00) } }, #endif { { SIMDE_FLOAT32_C( 680.38), SIMDE_FLOAT32_C( -211.23), SIMDE_FLOAT32_C( 566.20), SIMDE_FLOAT32_C( 596.88) }, { SIMDE_FLOAT32_C( 823.29), SIMDE_FLOAT32_C( -604.90), SIMDE_FLOAT32_C( -329.55), SIMDE_FLOAT32_C( 536.46) }, { SIMDE_FLOAT32_C( 680.38), SIMDE_FLOAT32_C( -604.90), SIMDE_FLOAT32_C( -329.55), SIMDE_FLOAT32_C( 536.46) } }, { { SIMDE_FLOAT32_C( -444.45), SIMDE_FLOAT32_C( 107.94), SIMDE_FLOAT32_C( -45.21), SIMDE_FLOAT32_C( 257.74) }, { SIMDE_FLOAT32_C( -270.43), SIMDE_FLOAT32_C( 26.80), SIMDE_FLOAT32_C( 904.46), SIMDE_FLOAT32_C( 832.39) }, { SIMDE_FLOAT32_C( -444.45), SIMDE_FLOAT32_C( 26.80), SIMDE_FLOAT32_C( -45.21), SIMDE_FLOAT32_C( 257.74) } }, { { SIMDE_FLOAT32_C( 271.42), SIMDE_FLOAT32_C( 434.59), SIMDE_FLOAT32_C( -716.79), SIMDE_FLOAT32_C( 213.94) }, { SIMDE_FLOAT32_C( -967.40), SIMDE_FLOAT32_C( -514.23), SIMDE_FLOAT32_C( -725.54), SIMDE_FLOAT32_C( 608.35) }, { SIMDE_FLOAT32_C( -967.40), SIMDE_FLOAT32_C( -514.23), SIMDE_FLOAT32_C( -725.54), SIMDE_FLOAT32_C( 213.94) } }, { { SIMDE_FLOAT32_C( -686.64), SIMDE_FLOAT32_C( -198.11), SIMDE_FLOAT32_C( -740.42), SIMDE_FLOAT32_C( -782.38) }, { SIMDE_FLOAT32_C( 997.85), SIMDE_FLOAT32_C( -563.49), SIMDE_FLOAT32_C( 25.86), SIMDE_FLOAT32_C( 678.22) }, { SIMDE_FLOAT32_C( -686.64), SIMDE_FLOAT32_C( -563.49), SIMDE_FLOAT32_C( -740.42), SIMDE_FLOAT32_C( -782.38) } }, { { SIMDE_FLOAT32_C( 225.28), SIMDE_FLOAT32_C( -407.94), SIMDE_FLOAT32_C( 275.10), SIMDE_FLOAT32_C( 48.57) }, { SIMDE_FLOAT32_C( -12.83), SIMDE_FLOAT32_C( 945.55), SIMDE_FLOAT32_C( -414.97), SIMDE_FLOAT32_C( 542.72) }, { SIMDE_FLOAT32_C( -12.83), SIMDE_FLOAT32_C( -407.94), SIMDE_FLOAT32_C( -414.97), SIMDE_FLOAT32_C( 48.57) } }, { { SIMDE_FLOAT32_C( 53.49), SIMDE_FLOAT32_C( 539.83), SIMDE_FLOAT32_C( -199.54), SIMDE_FLOAT32_C( 783.06) }, { SIMDE_FLOAT32_C( -433.37), SIMDE_FLOAT32_C( -295.08), SIMDE_FLOAT32_C( 615.45), SIMDE_FLOAT32_C( 838.05) }, { SIMDE_FLOAT32_C( -433.37), SIMDE_FLOAT32_C( -295.08), SIMDE_FLOAT32_C( -199.54), SIMDE_FLOAT32_C( 783.06) } }, { { SIMDE_FLOAT32_C( -860.49), SIMDE_FLOAT32_C( 898.65), SIMDE_FLOAT32_C( 51.99), SIMDE_FLOAT32_C( -827.89) }, { SIMDE_FLOAT32_C( -615.57), SIMDE_FLOAT32_C( 326.45), SIMDE_FLOAT32_C( 780.47), SIMDE_FLOAT32_C( -302.21) }, { SIMDE_FLOAT32_C( -860.49), SIMDE_FLOAT32_C( 326.45), SIMDE_FLOAT32_C( 51.99), SIMDE_FLOAT32_C( -827.89) } }, { { SIMDE_FLOAT32_C( -871.66), SIMDE_FLOAT32_C( -959.95), SIMDE_FLOAT32_C( -84.60), SIMDE_FLOAT32_C( -873.81) }, { SIMDE_FLOAT32_C( -523.44), SIMDE_FLOAT32_C( 941.27), SIMDE_FLOAT32_C( 804.42), SIMDE_FLOAT32_C( 701.84) }, { SIMDE_FLOAT32_C( -871.66), SIMDE_FLOAT32_C( -959.95), SIMDE_FLOAT32_C( -84.60), SIMDE_FLOAT32_C( -873.81) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r = simde_vminq_f32(a, b); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vminq_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[2]; simde_float64 b[2]; simde_float64 r[2]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( -211.23) }, { SIMDE_FLOAT64_C( 566.20), SIMDE_MATH_NAN }, { SIMDE_MATH_NAN, SIMDE_MATH_NAN } }, { { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 0.0) }, { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( -0.0) }, { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( -0.0) } }, #endif { { SIMDE_FLOAT64_C( 680.38), SIMDE_FLOAT64_C( -211.23) }, { SIMDE_FLOAT64_C( 566.20), SIMDE_FLOAT64_C( 596.88) }, { SIMDE_FLOAT64_C( 566.20), SIMDE_FLOAT64_C( -211.23) } }, { { SIMDE_FLOAT64_C( 823.29), SIMDE_FLOAT64_C( -604.90) }, { SIMDE_FLOAT64_C( -329.55), SIMDE_FLOAT64_C( 536.46) }, { SIMDE_FLOAT64_C( -329.55), SIMDE_FLOAT64_C( -604.90) } }, { { SIMDE_FLOAT64_C( -444.45), SIMDE_FLOAT64_C( 107.94) }, { SIMDE_FLOAT64_C( -45.21), SIMDE_FLOAT64_C( 257.74) }, { SIMDE_FLOAT64_C( -444.45), SIMDE_FLOAT64_C( 107.94) } }, { { SIMDE_FLOAT64_C( -270.43), SIMDE_FLOAT64_C( 26.80) }, { SIMDE_FLOAT64_C( 904.46), SIMDE_FLOAT64_C( 832.39) }, { SIMDE_FLOAT64_C( -270.43), SIMDE_FLOAT64_C( 26.80) } }, { { SIMDE_FLOAT64_C( 271.42), SIMDE_FLOAT64_C( 434.59) }, { SIMDE_FLOAT64_C( -716.79), SIMDE_FLOAT64_C( 213.94) }, { SIMDE_FLOAT64_C( -716.79), SIMDE_FLOAT64_C( 213.94) } }, { { SIMDE_FLOAT64_C( -967.40), SIMDE_FLOAT64_C( -514.23) }, { SIMDE_FLOAT64_C( -725.54), SIMDE_FLOAT64_C( 608.35) }, { SIMDE_FLOAT64_C( -967.40), SIMDE_FLOAT64_C( -514.23) } }, { { SIMDE_FLOAT64_C( -686.64), SIMDE_FLOAT64_C( -198.11) }, { SIMDE_FLOAT64_C( -740.42), SIMDE_FLOAT64_C( -782.38) }, { SIMDE_FLOAT64_C( -740.42), SIMDE_FLOAT64_C( -782.38) } }, { { SIMDE_FLOAT64_C( 997.85), SIMDE_FLOAT64_C( -563.49) }, { SIMDE_FLOAT64_C( 25.86), SIMDE_FLOAT64_C( 678.22) }, { SIMDE_FLOAT64_C( 25.86), SIMDE_FLOAT64_C( -563.49) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_float64x2_t r = simde_vminq_f64(a, b); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; } static int test_simde_vminq_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int8_t b[16]; int8_t r[16]; } test_vec[] = { { { INT8_C( 103), -INT8_C( 58), INT8_C( 105), INT8_C( 115), INT8_C( 81), -INT8_C( 1), INT8_C( 74), -INT8_C( 20), INT8_C( 41), -INT8_C( 51), -INT8_C( 70), -INT8_C( 85), -INT8_C( 14), -INT8_C( 5), -INT8_C( 29), INT8_C( 70) }, { INT8_C( 124), -INT8_C( 62), INT8_C( 84), -INT8_C( 8), INT8_C( 27), -INT8_C( 24), -INT8_C( 25), -INT8_C( 115), INT8_C( 118), INT8_C( 90), INT8_C( 46), INT8_C( 99), INT8_C( 51), -INT8_C( 97), -INT8_C( 55), -INT8_C( 102) }, { INT8_C( 103), -INT8_C( 62), INT8_C( 84), -INT8_C( 8), INT8_C( 27), -INT8_C( 24), -INT8_C( 25), -INT8_C( 115), INT8_C( 41), -INT8_C( 51), -INT8_C( 70), -INT8_C( 85), -INT8_C( 14), -INT8_C( 97), -INT8_C( 55), -INT8_C( 102) } }, { { INT8_C( 102), INT8_C( 50), INT8_C( 13), -INT8_C( 73), INT8_C( 49), INT8_C( 88), -INT8_C( 93), INT8_C( 90), INT8_C( 37), INT8_C( 93), INT8_C( 5), INT8_C( 23), INT8_C( 88), -INT8_C( 23), INT8_C( 94), -INT8_C( 44) }, { -INT8_C( 85), -INT8_C( 78), -INT8_C( 51), -INT8_C( 58), -INT8_C( 101), -INT8_C( 76), INT8_C( 84), INT8_C( 17), INT8_C( 14), -INT8_C( 126), INT8_C( 116), INT8_C( 65), INT8_C( 33), INT8_C( 61), -INT8_C( 36), -INT8_C( 121) }, { -INT8_C( 85), -INT8_C( 78), -INT8_C( 51), -INT8_C( 73), -INT8_C( 101), -INT8_C( 76), -INT8_C( 93), INT8_C( 17), INT8_C( 14), -INT8_C( 126), INT8_C( 5), INT8_C( 23), INT8_C( 33), -INT8_C( 23), -INT8_C( 36), -INT8_C( 121) } }, { { INT8_C( 112), -INT8_C( 23), INT8_C( 62), -INT8_C( 95), INT8_C( 65), -INT8_C( 31), -INT8_C( 4), INT8_C( 103), INT8_C( 62), INT8_C( 1), INT8_C( 126), -INT8_C( 105), -INT8_C( 22), -INT8_C( 36), INT8_C( 107), -INT8_C( 106) }, { -INT8_C( 113), INT8_C( 56), INT8_C( 92), INT8_C( 42), -INT8_C( 20), -INT8_C( 80), INT8_C( 59), -INT8_C( 5), INT8_C( 50), -INT8_C( 81), INT8_C( 60), INT8_C( 84), -INT8_C( 20), INT8_C( 24), -INT8_C( 37), INT8_C( 92) }, { -INT8_C( 113), -INT8_C( 23), INT8_C( 62), -INT8_C( 95), -INT8_C( 20), -INT8_C( 80), -INT8_C( 4), -INT8_C( 5), INT8_C( 50), -INT8_C( 81), INT8_C( 60), -INT8_C( 105), -INT8_C( 22), -INT8_C( 36), -INT8_C( 37), -INT8_C( 106) } }, { { INT8_C( 2), INT8_C( 26), -INT8_C( 2), INT8_C( 67), -INT8_C( 5), -INT8_C( 6), -INT8_C( 86), INT8_C( 58), -INT8_C( 5), INT8_C( 41), -INT8_C( 47), -INT8_C( 26), INT8_C( 5), INT8_C( 60), INT8_C( 124), -INT8_C( 108) }, { INT8_C( 117), -INT8_C( 40), -INT8_C( 66), INT8_C( 97), -INT8_C( 119), -INT8_C( 7), INT8_C( 92), -INT8_C( 69), -INT8_C( 88), -INT8_C( 103), INT8_C( 15), -INT8_C( 107), -INT8_C( 79), -INT8_C( 21), -INT8_C( 15), -INT8_C( 77) }, { INT8_C( 2), -INT8_C( 40), -INT8_C( 66), INT8_C( 67), -INT8_C( 119), -INT8_C( 7), -INT8_C( 86), -INT8_C( 69), -INT8_C( 88), -INT8_C( 103), -INT8_C( 47), -INT8_C( 107), -INT8_C( 79), -INT8_C( 21), -INT8_C( 15), -INT8_C( 108) } }, { { INT8_C( 5), -INT8_C( 17), -INT8_C( 9), INT8_C( 0), -INT8_C( 23), -INT8_C( 95), INT8_C( 58), -INT8_C( 27), -INT8_C( 54), INT8_C( 11), -INT8_C( 53), -INT8_C( 48), INT8_C( 72), INT8_C( 71), INT8_C( 100), -INT8_C( 67) }, { INT8_C( 31), INT8_C( 35), INT8_C( 30), -INT8_C( 88), INT8_C( 28), INT8_C( 123), INT8_C( 100), -INT8_C( 59), INT8_C( 20), INT8_C( 115), INT8_C( 90), -INT8_C( 59), INT8_C( 94), INT8_C( 75), INT8_C( 121), INT8_C( 99) }, { INT8_C( 5), -INT8_C( 17), -INT8_C( 9), -INT8_C( 88), -INT8_C( 23), -INT8_C( 95), INT8_C( 58), -INT8_C( 59), -INT8_C( 54), INT8_C( 11), -INT8_C( 53), -INT8_C( 59), INT8_C( 72), INT8_C( 71), INT8_C( 100), -INT8_C( 67) } }, { { INT8_C( 59), INT8_C( 112), INT8_C( 100), INT8_C( 36), INT8_C( 17), -INT8_C( 98), INT8_C( 9), -INT8_C( 36), -INT8_C( 86), -INT8_C( 44), -INT8_C( 84), -INT8_C( 14), INT8_C( 27), INT8_C( 16), -INT8_C( 81), INT8_C( 59) }, { INT8_C( 51), -INT8_C( 51), -INT8_C( 29), INT8_C( 80), INT8_C( 72), INT8_C( 71), INT8_C( 21), INT8_C( 92), -INT8_C( 69), INT8_C( 111), INT8_C( 34), INT8_C( 25), -INT8_C( 70), -INT8_C( 101), INT8_C( 125), -INT8_C( 11) }, { INT8_C( 51), -INT8_C( 51), -INT8_C( 29), INT8_C( 36), INT8_C( 17), -INT8_C( 98), INT8_C( 9), -INT8_C( 36), -INT8_C( 86), -INT8_C( 44), -INT8_C( 84), -INT8_C( 14), -INT8_C( 70), -INT8_C( 101), -INT8_C( 81), -INT8_C( 11) } }, { { INT8_C( 11), -INT8_C( 31), INT8_C( 26), INT8_C( 28), INT8_MAX, INT8_C( 35), -INT8_C( 8), INT8_C( 41), -INT8_C( 8), -INT8_C( 92), INT8_C( 27), INT8_C( 19), -INT8_C( 75), -INT8_C( 54), INT8_C( 78), -INT8_C( 24) }, { -INT8_C( 104), INT8_C( 50), INT8_C( 56), -INT8_C( 32), INT8_C( 121), INT8_C( 77), INT8_C( 61), INT8_C( 52), -INT8_C( 68), INT8_C( 95), INT8_C( 78), INT8_C( 119), -INT8_C( 6), -INT8_C( 53), INT8_C( 108), INT8_C( 5) }, { -INT8_C( 104), -INT8_C( 31), INT8_C( 26), -INT8_C( 32), INT8_C( 121), INT8_C( 35), -INT8_C( 8), INT8_C( 41), -INT8_C( 68), -INT8_C( 92), INT8_C( 27), INT8_C( 19), -INT8_C( 75), -INT8_C( 54), INT8_C( 78), -INT8_C( 24) } }, { { -INT8_C( 84), -INT8_C( 122), INT8_C( 33), INT8_C( 43), -INT8_C( 86), INT8_C( 26), INT8_C( 85), -INT8_C( 94), -INT8_C( 66), INT8_C( 112), -INT8_C( 75), INT8_C( 115), INT8_C( 59), INT8_C( 4), INT8_C( 92), -INT8_C( 45) }, { INT8_C( 54), -INT8_C( 108), -INT8_C( 77), -INT8_C( 81), -INT8_C( 30), -INT8_C( 16), -INT8_C( 28), -INT8_C( 98), INT8_C( 79), INT8_C( 50), INT8_C( 21), INT8_C( 73), -INT8_C( 3), -INT8_C( 126), INT8_C( 78), -INT8_C( 87) }, { -INT8_C( 84), -INT8_C( 122), -INT8_C( 77), -INT8_C( 81), -INT8_C( 86), -INT8_C( 16), -INT8_C( 28), -INT8_C( 98), -INT8_C( 66), INT8_C( 50), -INT8_C( 75), INT8_C( 73), -INT8_C( 3), -INT8_C( 126), INT8_C( 78), -INT8_C( 87) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r = simde_vminq_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; } static int test_simde_vminq_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { -INT16_C( 14745), INT16_C( 29545), -INT16_C( 175), -INT16_C( 5046), -INT16_C( 13015), -INT16_C( 21574), -INT16_C( 1038), INT16_C( 18147) }, { -INT16_C( 15748), -INT16_C( 1964), -INT16_C( 6117), -INT16_C( 29209), INT16_C( 23158), INT16_C( 25390), -INT16_C( 24781), -INT16_C( 25911) }, { -INT16_C( 15748), -INT16_C( 1964), -INT16_C( 6117), -INT16_C( 29209), -INT16_C( 13015), -INT16_C( 21574), -INT16_C( 24781), -INT16_C( 25911) } }, { { INT16_C( 12902), -INT16_C( 18675), INT16_C( 22577), INT16_C( 23203), INT16_C( 23845), INT16_C( 5893), -INT16_C( 5800), -INT16_C( 11170) }, { -INT16_C( 19797), -INT16_C( 14643), -INT16_C( 19301), INT16_C( 4436), -INT16_C( 32242), INT16_C( 16756), INT16_C( 15649), -INT16_C( 30756) }, { -INT16_C( 19797), -INT16_C( 18675), -INT16_C( 19301), INT16_C( 4436), -INT16_C( 32242), INT16_C( 5893), -INT16_C( 5800), -INT16_C( 30756) } }, { { -INT16_C( 5776), -INT16_C( 24258), -INT16_C( 7871), INT16_C( 26620), INT16_C( 318), -INT16_C( 26754), -INT16_C( 8982), -INT16_C( 27029) }, { INT16_C( 14479), INT16_C( 10844), -INT16_C( 20244), -INT16_C( 1221), -INT16_C( 20686), INT16_C( 21564), INT16_C( 6380), INT16_C( 23771) }, { -INT16_C( 5776), -INT16_C( 24258), -INT16_C( 20244), -INT16_C( 1221), -INT16_C( 20686), -INT16_C( 26754), -INT16_C( 8982), -INT16_C( 27029) } }, { { INT16_C( 6658), INT16_C( 17406), -INT16_C( 1285), INT16_C( 15018), INT16_C( 10747), -INT16_C( 6447), INT16_C( 15365), -INT16_C( 27524) }, { -INT16_C( 10123), INT16_C( 25022), -INT16_C( 1655), -INT16_C( 17572), -INT16_C( 26200), -INT16_C( 27377), -INT16_C( 5199), -INT16_C( 19471) }, { -INT16_C( 10123), INT16_C( 17406), -INT16_C( 1655), -INT16_C( 17572), -INT16_C( 26200), -INT16_C( 27377), -INT16_C( 5199), -INT16_C( 27524) } }, { { -INT16_C( 4347), INT16_C( 247), -INT16_C( 24087), -INT16_C( 6854), INT16_C( 3018), -INT16_C( 12085), INT16_C( 18248), -INT16_C( 17052) }, { INT16_C( 8991), -INT16_C( 22498), INT16_C( 31516), -INT16_C( 15004), INT16_C( 29460), -INT16_C( 15014), INT16_C( 19294), INT16_C( 25465) }, { -INT16_C( 4347), -INT16_C( 22498), -INT16_C( 24087), -INT16_C( 15004), INT16_C( 3018), -INT16_C( 15014), INT16_C( 18248), -INT16_C( 17052) } }, { { INT16_C( 28731), INT16_C( 9316), -INT16_C( 25071), -INT16_C( 9207), -INT16_C( 11094), -INT16_C( 3412), INT16_C( 4123), INT16_C( 15279) }, { -INT16_C( 13005), INT16_C( 20707), INT16_C( 18248), INT16_C( 23573), INT16_C( 28603), INT16_C( 6434), -INT16_C( 25670), -INT16_C( 2691) }, { -INT16_C( 13005), INT16_C( 9316), -INT16_C( 25071), -INT16_C( 9207), -INT16_C( 11094), -INT16_C( 3412), -INT16_C( 25670), -INT16_C( 2691) } }, { { -INT16_C( 7925), INT16_C( 7194), INT16_C( 9087), INT16_C( 10744), -INT16_C( 23304), INT16_C( 4891), -INT16_C( 13643), -INT16_C( 6066) }, { INT16_C( 12952), -INT16_C( 8136), INT16_C( 19833), INT16_C( 13373), INT16_C( 24508), INT16_C( 30542), -INT16_C( 13318), INT16_C( 1388) }, { -INT16_C( 7925), -INT16_C( 8136), INT16_C( 9087), INT16_C( 10744), -INT16_C( 23304), INT16_C( 4891), -INT16_C( 13643), -INT16_C( 6066) } }, { { -INT16_C( 31060), INT16_C( 11041), INT16_C( 6826), -INT16_C( 23979), INT16_C( 28862), INT16_C( 29621), INT16_C( 1083), -INT16_C( 11428) }, { -INT16_C( 27594), -INT16_C( 20557), -INT16_C( 3870), -INT16_C( 24860), INT16_C( 12879), INT16_C( 18709), -INT16_C( 32003), -INT16_C( 22194) }, { -INT16_C( 31060), -INT16_C( 20557), -INT16_C( 3870), -INT16_C( 24860), INT16_C( 12879), INT16_C( 18709), -INT16_C( 32003), -INT16_C( 22194) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_vminq_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; } static int test_simde_vminq_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { INT32_C( 1936311911), -INT32_C( 330629295), -INT32_C( 1413821143), INT32_C( 1189346290) }, { -INT32_C( 128662916), -INT32_C( 1914181605), INT32_C( 1663982198), -INT32_C( 1698062541) }, { -INT32_C( 128662916), -INT32_C( 1914181605), -INT32_C( 1413821143), -INT32_C( 1698062541) } }, { { -INT32_C( 1223871898), INT32_C( 1520654385), INT32_C( 386227493), -INT32_C( 731977384) }, { -INT32_C( 959597909), INT32_C( 290763931), INT32_C( 1098154510), -INT32_C( 2015609567) }, { -INT32_C( 1223871898), INT32_C( 290763931), INT32_C( 386227493), -INT32_C( 2015609567) } }, { { -INT32_C( 1589712528), INT32_C( 1744625985), -INT32_C( 1753349826), -INT32_C( 1771315990) }, { INT32_C( 710686863), -INT32_C( 79974164), INT32_C( 1413263154), INT32_C( 1557862636) }, { -INT32_C( 1589712528), -INT32_C( 79974164), -INT32_C( 1753349826), -INT32_C( 1771315990) } }, { { INT32_C( 1140726274), INT32_C( 984283899), -INT32_C( 422499845), -INT32_C( 1803797499) }, { INT32_C( 1639897205), -INT32_C( 1151534711), -INT32_C( 1794139736), -INT32_C( 1275991119) }, { INT32_C( 1140726274), -INT32_C( 1151534711), -INT32_C( 1794139736), -INT32_C( 1803797499) } }, { { INT32_C( 16248581), -INT32_C( 449142295), -INT32_C( 791999542), -INT32_C( 1117501624) }, { -INT32_C( 1474419937), -INT32_C( 983270628), -INT32_C( 983928044), INT32_C( 1668893534) }, { -INT32_C( 1474419937), -INT32_C( 983270628), -INT32_C( 983928044), -INT32_C( 1117501624) } }, { { INT32_C( 610562107), -INT32_C( 603349487), -INT32_C( 223554390), INT32_C( 1001328667) }, { INT32_C( 1357106483), INT32_C( 1544898376), INT32_C( 421687227), -INT32_C( 176317510) }, { INT32_C( 610562107), -INT32_C( 603349487), -INT32_C( 223554390), -INT32_C( 176317510) } }, { { INT32_C( 471523595), INT32_C( 704127871), INT32_C( 320578808), -INT32_C( 397489483) }, { -INT32_C( 533187944), INT32_C( 876432761), INT32_C( 2001625020), INT32_C( 91016186) }, { -INT32_C( 533187944), INT32_C( 704127871), INT32_C( 320578808), -INT32_C( 397489483) } }, { { INT32_C( 723617452), -INT32_C( 1571480918), INT32_C( 1941270718), -INT32_C( 748944325) }, { -INT32_C( 1347185610), -INT32_C( 1629163294), INT32_C( 1226125903), -INT32_C( 1454472451) }, { -INT32_C( 1347185610), -INT32_C( 1629163294), INT32_C( 1226125903), -INT32_C( 1454472451) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_vminq_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; } static int test_simde_x_vminq_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; int64_t b[2]; int64_t r[2]; } test_vec[] = { { { -INT64_C( 6310161457262489484), INT64_C( 9150748465495244559) }, { -INT64_C( 307299995468452001), INT64_C( 3381162882742398488) }, { -INT64_C( 6310161457262489484), INT64_C( 3381162882742398488) } }, { { INT64_C( 5448216716855701341), -INT64_C( 4381602882203523212) }, { INT64_C( 2981032279474032666), -INT64_C( 6820597993101396369) }, { INT64_C( 2981032279474032666), -INT64_C( 6820597993101396369) } }, { { INT64_C( 5140068232687050701), INT64_C( 5313733190651462551) }, { -INT64_C( 921538610980994051), -INT64_C( 3106230268439255040) }, { -INT64_C( 921538610980994051), -INT64_C( 3106230268439255040) } }, { { INT64_C( 4642706166988568915), -INT64_C( 7822759604437423625) }, { INT64_C( 2608665159880678076), INT64_C( 1683023292705230719) }, { INT64_C( 2608665159880678076), -INT64_C( 7822759604437423625) } }, { { INT64_C( 7326984329290987973), -INT64_C( 7043635440719763178) }, { INT64_C( 3917025254536306708), -INT64_C( 3081561587413302176) }, { INT64_C( 3917025254536306708), -INT64_C( 7043635440719763178) } }, { { INT64_C( 4477969082528004689), -INT64_C( 4802479144129876006) }, { -INT64_C( 6199395885311969270), INT64_C( 2232389912232899606) }, { -INT64_C( 6199395885311969270), -INT64_C( 4802479144129876006) } }, { { INT64_C( 714384795166916228), INT64_C( 582688379079349424) }, { INT64_C( 8451780267177741399), -INT64_C( 6457995692247082891) }, { INT64_C( 714384795166916228), -INT64_C( 6457995692247082891) } }, { { -INT64_C( 1292748330957128234), INT64_C( 5411178255095419921) }, { INT64_C( 1746893778266677610), INT64_C( 5194544220892486222) }, { -INT64_C( 1292748330957128234), INT64_C( 5194544220892486222) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_int64x2_t r = simde_x_vminq_s64(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); simde_int64x2_t b = simde_test_arm_neon_random_i64x2(); simde_int64x2_t r = simde_x_vminq_s64(a, b); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vminq_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(103), UINT8_C(198), UINT8_C(105), UINT8_C(115), UINT8_C( 81), UINT8_MAX, UINT8_C( 74), UINT8_C(236), UINT8_C( 41), UINT8_C(205), UINT8_C(186), UINT8_C(171), UINT8_C(242), UINT8_C(251), UINT8_C(227), UINT8_C( 70) }, { UINT8_C(124), UINT8_C(194), UINT8_C( 84), UINT8_C(248), UINT8_C( 27), UINT8_C(232), UINT8_C(231), UINT8_C(141), UINT8_C(118), UINT8_C( 90), UINT8_C( 46), UINT8_C( 99), UINT8_C( 51), UINT8_C(159), UINT8_C(201), UINT8_C(154) }, { UINT8_C(103), UINT8_C(194), UINT8_C( 84), UINT8_C(115), UINT8_C( 27), UINT8_C(232), UINT8_C( 74), UINT8_C(141), UINT8_C( 41), UINT8_C( 90), UINT8_C( 46), UINT8_C( 99), UINT8_C( 51), UINT8_C(159), UINT8_C(201), UINT8_C( 70) } }, { { UINT8_C(102), UINT8_C( 50), UINT8_C( 13), UINT8_C(183), UINT8_C( 49), UINT8_C( 88), UINT8_C(163), UINT8_C( 90), UINT8_C( 37), UINT8_C( 93), UINT8_C( 5), UINT8_C( 23), UINT8_C( 88), UINT8_C(233), UINT8_C( 94), UINT8_C(212) }, { UINT8_C(171), UINT8_C(178), UINT8_C(205), UINT8_C(198), UINT8_C(155), UINT8_C(180), UINT8_C( 84), UINT8_C( 17), UINT8_C( 14), UINT8_C(130), UINT8_C(116), UINT8_C( 65), UINT8_C( 33), UINT8_C( 61), UINT8_C(220), UINT8_C(135) }, { UINT8_C(102), UINT8_C( 50), UINT8_C( 13), UINT8_C(183), UINT8_C( 49), UINT8_C( 88), UINT8_C( 84), UINT8_C( 17), UINT8_C( 14), UINT8_C( 93), UINT8_C( 5), UINT8_C( 23), UINT8_C( 33), UINT8_C( 61), UINT8_C( 94), UINT8_C(135) } }, { { UINT8_C(112), UINT8_C(233), UINT8_C( 62), UINT8_C(161), UINT8_C( 65), UINT8_C(225), UINT8_C(252), UINT8_C(103), UINT8_C( 62), UINT8_C( 1), UINT8_C(126), UINT8_C(151), UINT8_C(234), UINT8_C(220), UINT8_C(107), UINT8_C(150) }, { UINT8_C(143), UINT8_C( 56), UINT8_C( 92), UINT8_C( 42), UINT8_C(236), UINT8_C(176), UINT8_C( 59), UINT8_C(251), UINT8_C( 50), UINT8_C(175), UINT8_C( 60), UINT8_C( 84), UINT8_C(236), UINT8_C( 24), UINT8_C(219), UINT8_C( 92) }, { UINT8_C(112), UINT8_C( 56), UINT8_C( 62), UINT8_C( 42), UINT8_C( 65), UINT8_C(176), UINT8_C( 59), UINT8_C(103), UINT8_C( 50), UINT8_C( 1), UINT8_C( 60), UINT8_C( 84), UINT8_C(234), UINT8_C( 24), UINT8_C(107), UINT8_C( 92) } }, { { UINT8_C( 2), UINT8_C( 26), UINT8_C(254), UINT8_C( 67), UINT8_C(251), UINT8_C(250), UINT8_C(170), UINT8_C( 58), UINT8_C(251), UINT8_C( 41), UINT8_C(209), UINT8_C(230), UINT8_C( 5), UINT8_C( 60), UINT8_C(124), UINT8_C(148) }, { UINT8_C(117), UINT8_C(216), UINT8_C(190), UINT8_C( 97), UINT8_C(137), UINT8_C(249), UINT8_C( 92), UINT8_C(187), UINT8_C(168), UINT8_C(153), UINT8_C( 15), UINT8_C(149), UINT8_C(177), UINT8_C(235), UINT8_C(241), UINT8_C(179) }, { UINT8_C( 2), UINT8_C( 26), UINT8_C(190), UINT8_C( 67), UINT8_C(137), UINT8_C(249), UINT8_C( 92), UINT8_C( 58), UINT8_C(168), UINT8_C( 41), UINT8_C( 15), UINT8_C(149), UINT8_C( 5), UINT8_C( 60), UINT8_C(124), UINT8_C(148) } }, { { UINT8_C( 5), UINT8_C(239), UINT8_C(247), UINT8_C( 0), UINT8_C(233), UINT8_C(161), UINT8_C( 58), UINT8_C(229), UINT8_C(202), UINT8_C( 11), UINT8_C(203), UINT8_C(208), UINT8_C( 72), UINT8_C( 71), UINT8_C(100), UINT8_C(189) }, { UINT8_C( 31), UINT8_C( 35), UINT8_C( 30), UINT8_C(168), UINT8_C( 28), UINT8_C(123), UINT8_C(100), UINT8_C(197), UINT8_C( 20), UINT8_C(115), UINT8_C( 90), UINT8_C(197), UINT8_C( 94), UINT8_C( 75), UINT8_C(121), UINT8_C( 99) }, { UINT8_C( 5), UINT8_C( 35), UINT8_C( 30), UINT8_C( 0), UINT8_C( 28), UINT8_C(123), UINT8_C( 58), UINT8_C(197), UINT8_C( 20), UINT8_C( 11), UINT8_C( 90), UINT8_C(197), UINT8_C( 72), UINT8_C( 71), UINT8_C(100), UINT8_C( 99) } }, { { UINT8_C( 59), UINT8_C(112), UINT8_C(100), UINT8_C( 36), UINT8_C( 17), UINT8_C(158), UINT8_C( 9), UINT8_C(220), UINT8_C(170), UINT8_C(212), UINT8_C(172), UINT8_C(242), UINT8_C( 27), UINT8_C( 16), UINT8_C(175), UINT8_C( 59) }, { UINT8_C( 51), UINT8_C(205), UINT8_C(227), UINT8_C( 80), UINT8_C( 72), UINT8_C( 71), UINT8_C( 21), UINT8_C( 92), UINT8_C(187), UINT8_C(111), UINT8_C( 34), UINT8_C( 25), UINT8_C(186), UINT8_C(155), UINT8_C(125), UINT8_C(245) }, { UINT8_C( 51), UINT8_C(112), UINT8_C(100), UINT8_C( 36), UINT8_C( 17), UINT8_C( 71), UINT8_C( 9), UINT8_C( 92), UINT8_C(170), UINT8_C(111), UINT8_C( 34), UINT8_C( 25), UINT8_C( 27), UINT8_C( 16), UINT8_C(125), UINT8_C( 59) } }, { { UINT8_C( 11), UINT8_C(225), UINT8_C( 26), UINT8_C( 28), UINT8_C(127), UINT8_C( 35), UINT8_C(248), UINT8_C( 41), UINT8_C(248), UINT8_C(164), UINT8_C( 27), UINT8_C( 19), UINT8_C(181), UINT8_C(202), UINT8_C( 78), UINT8_C(232) }, { UINT8_C(152), UINT8_C( 50), UINT8_C( 56), UINT8_C(224), UINT8_C(121), UINT8_C( 77), UINT8_C( 61), UINT8_C( 52), UINT8_C(188), UINT8_C( 95), UINT8_C( 78), UINT8_C(119), UINT8_C(250), UINT8_C(203), UINT8_C(108), UINT8_C( 5) }, { UINT8_C( 11), UINT8_C( 50), UINT8_C( 26), UINT8_C( 28), UINT8_C(121), UINT8_C( 35), UINT8_C( 61), UINT8_C( 41), UINT8_C(188), UINT8_C( 95), UINT8_C( 27), UINT8_C( 19), UINT8_C(181), UINT8_C(202), UINT8_C( 78), UINT8_C( 5) } }, { { UINT8_C(172), UINT8_C(134), UINT8_C( 33), UINT8_C( 43), UINT8_C(170), UINT8_C( 26), UINT8_C( 85), UINT8_C(162), UINT8_C(190), UINT8_C(112), UINT8_C(181), UINT8_C(115), UINT8_C( 59), UINT8_C( 4), UINT8_C( 92), UINT8_C(211) }, { UINT8_C( 54), UINT8_C(148), UINT8_C(179), UINT8_C(175), UINT8_C(226), UINT8_C(240), UINT8_C(228), UINT8_C(158), UINT8_C( 79), UINT8_C( 50), UINT8_C( 21), UINT8_C( 73), UINT8_C(253), UINT8_C(130), UINT8_C( 78), UINT8_C(169) }, { UINT8_C( 54), UINT8_C(134), UINT8_C( 33), UINT8_C( 43), UINT8_C(170), UINT8_C( 26), UINT8_C( 85), UINT8_C(158), UINT8_C( 79), UINT8_C( 50), UINT8_C( 21), UINT8_C( 73), UINT8_C( 59), UINT8_C( 4), UINT8_C( 78), UINT8_C(169) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vminq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; } static int test_simde_vminq_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(50791), UINT16_C(29545), UINT16_C(65361), UINT16_C(60490), UINT16_C(52521), UINT16_C(43962), UINT16_C(64498), UINT16_C(18147) }, { UINT16_C(49788), UINT16_C(63572), UINT16_C(59419), UINT16_C(36327), UINT16_C(23158), UINT16_C(25390), UINT16_C(40755), UINT16_C(39625) }, { UINT16_C(49788), UINT16_C(29545), UINT16_C(59419), UINT16_C(36327), UINT16_C(23158), UINT16_C(25390), UINT16_C(40755), UINT16_C(18147) } }, { { UINT16_C(12902), UINT16_C(46861), UINT16_C(22577), UINT16_C(23203), UINT16_C(23845), UINT16_C( 5893), UINT16_C(59736), UINT16_C(54366) }, { UINT16_C(45739), UINT16_C(50893), UINT16_C(46235), UINT16_C( 4436), UINT16_C(33294), UINT16_C(16756), UINT16_C(15649), UINT16_C(34780) }, { UINT16_C(12902), UINT16_C(46861), UINT16_C(22577), UINT16_C( 4436), UINT16_C(23845), UINT16_C( 5893), UINT16_C(15649), UINT16_C(34780) } }, { { UINT16_C(59760), UINT16_C(41278), UINT16_C(57665), UINT16_C(26620), UINT16_C( 318), UINT16_C(38782), UINT16_C(56554), UINT16_C(38507) }, { UINT16_C(14479), UINT16_C(10844), UINT16_C(45292), UINT16_C(64315), UINT16_C(44850), UINT16_C(21564), UINT16_C( 6380), UINT16_C(23771) }, { UINT16_C(14479), UINT16_C(10844), UINT16_C(45292), UINT16_C(26620), UINT16_C( 318), UINT16_C(21564), UINT16_C( 6380), UINT16_C(23771) } }, { { UINT16_C( 6658), UINT16_C(17406), UINT16_C(64251), UINT16_C(15018), UINT16_C(10747), UINT16_C(59089), UINT16_C(15365), UINT16_C(38012) }, { UINT16_C(55413), UINT16_C(25022), UINT16_C(63881), UINT16_C(47964), UINT16_C(39336), UINT16_C(38159), UINT16_C(60337), UINT16_C(46065) }, { UINT16_C( 6658), UINT16_C(17406), UINT16_C(63881), UINT16_C(15018), UINT16_C(10747), UINT16_C(38159), UINT16_C(15365), UINT16_C(38012) } }, { { UINT16_C(61189), UINT16_C( 247), UINT16_C(41449), UINT16_C(58682), UINT16_C( 3018), UINT16_C(53451), UINT16_C(18248), UINT16_C(48484) }, { UINT16_C( 8991), UINT16_C(43038), UINT16_C(31516), UINT16_C(50532), UINT16_C(29460), UINT16_C(50522), UINT16_C(19294), UINT16_C(25465) }, { UINT16_C( 8991), UINT16_C( 247), UINT16_C(31516), UINT16_C(50532), UINT16_C( 3018), UINT16_C(50522), UINT16_C(18248), UINT16_C(25465) } }, { { UINT16_C(28731), UINT16_C( 9316), UINT16_C(40465), UINT16_C(56329), UINT16_C(54442), UINT16_C(62124), UINT16_C( 4123), UINT16_C(15279) }, { UINT16_C(52531), UINT16_C(20707), UINT16_C(18248), UINT16_C(23573), UINT16_C(28603), UINT16_C( 6434), UINT16_C(39866), UINT16_C(62845) }, { UINT16_C(28731), UINT16_C( 9316), UINT16_C(18248), UINT16_C(23573), UINT16_C(28603), UINT16_C( 6434), UINT16_C( 4123), UINT16_C(15279) } }, { { UINT16_C(57611), UINT16_C( 7194), UINT16_C( 9087), UINT16_C(10744), UINT16_C(42232), UINT16_C( 4891), UINT16_C(51893), UINT16_C(59470) }, { UINT16_C(12952), UINT16_C(57400), UINT16_C(19833), UINT16_C(13373), UINT16_C(24508), UINT16_C(30542), UINT16_C(52218), UINT16_C( 1388) }, { UINT16_C(12952), UINT16_C( 7194), UINT16_C( 9087), UINT16_C(10744), UINT16_C(24508), UINT16_C( 4891), UINT16_C(51893), UINT16_C( 1388) } }, { { UINT16_C(34476), UINT16_C(11041), UINT16_C( 6826), UINT16_C(41557), UINT16_C(28862), UINT16_C(29621), UINT16_C( 1083), UINT16_C(54108) }, { UINT16_C(37942), UINT16_C(44979), UINT16_C(61666), UINT16_C(40676), UINT16_C(12879), UINT16_C(18709), UINT16_C(33533), UINT16_C(43342) }, { UINT16_C(34476), UINT16_C(11041), UINT16_C( 6826), UINT16_C(40676), UINT16_C(12879), UINT16_C(18709), UINT16_C( 1083), UINT16_C(43342) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vminq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_vminq_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(1936311911), UINT32_C(3964338001), UINT32_C(2881146153), UINT32_C(1189346290) }, { UINT32_C(4166304380), UINT32_C(2380785691), UINT32_C(1663982198), UINT32_C(2596904755) }, { UINT32_C(1936311911), UINT32_C(2380785691), UINT32_C(1663982198), UINT32_C(1189346290) } }, { { UINT32_C(3071095398), UINT32_C(1520654385), UINT32_C( 386227493), UINT32_C(3562989912) }, { UINT32_C(3335369387), UINT32_C( 290763931), UINT32_C(1098154510), UINT32_C(2279357729) }, { UINT32_C(3071095398), UINT32_C( 290763931), UINT32_C( 386227493), UINT32_C(2279357729) } }, { { UINT32_C(2705254768), UINT32_C(1744625985), UINT32_C(2541617470), UINT32_C(2523651306) }, { UINT32_C( 710686863), UINT32_C(4214993132), UINT32_C(1413263154), UINT32_C(1557862636) }, { UINT32_C( 710686863), UINT32_C(1744625985), UINT32_C(1413263154), UINT32_C(1557862636) } }, { { UINT32_C(1140726274), UINT32_C( 984283899), UINT32_C(3872467451), UINT32_C(2491169797) }, { UINT32_C(1639897205), UINT32_C(3143432585), UINT32_C(2500827560), UINT32_C(3018976177) }, { UINT32_C(1140726274), UINT32_C( 984283899), UINT32_C(2500827560), UINT32_C(2491169797) } }, { { UINT32_C( 16248581), UINT32_C(3845825001), UINT32_C(3502967754), UINT32_C(3177465672) }, { UINT32_C(2820547359), UINT32_C(3311696668), UINT32_C(3311039252), UINT32_C(1668893534) }, { UINT32_C( 16248581), UINT32_C(3311696668), UINT32_C(3311039252), UINT32_C(1668893534) } }, { { UINT32_C( 610562107), UINT32_C(3691617809), UINT32_C(4071412906), UINT32_C(1001328667) }, { UINT32_C(1357106483), UINT32_C(1544898376), UINT32_C( 421687227), UINT32_C(4118649786) }, { UINT32_C( 610562107), UINT32_C(1544898376), UINT32_C( 421687227), UINT32_C(1001328667) } }, { { UINT32_C( 471523595), UINT32_C( 704127871), UINT32_C( 320578808), UINT32_C(3897477813) }, { UINT32_C(3761779352), UINT32_C( 876432761), UINT32_C(2001625020), UINT32_C( 91016186) }, { UINT32_C( 471523595), UINT32_C( 704127871), UINT32_C( 320578808), UINT32_C( 91016186) } }, { { UINT32_C( 723617452), UINT32_C(2723486378), UINT32_C(1941270718), UINT32_C(3546022971) }, { UINT32_C(2947781686), UINT32_C(2665804002), UINT32_C(1226125903), UINT32_C(2840494845) }, { UINT32_C( 723617452), UINT32_C(2665804002), UINT32_C(1226125903), UINT32_C(2840494845) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vminq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_x_vminq_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[2]; uint64_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C(16836110598710425851), UINT64_C(12252665336215584828) }, { UINT64_C(11326009230589852014), UINT64_C(11266702940499291730) }, { UINT64_C(11326009230589852014), UINT64_C(11266702940499291730) } }, { { UINT64_C(14673735355923934806), UINT64_C( 2860500141138215636) }, { UINT64_C( 3460038703485275139), UINT64_C( 644307978453668788) }, { UINT64_C( 3460038703485275139), UINT64_C( 644307978453668788) } }, { { UINT64_C( 5193605245871186104), UINT64_C( 2289802357024789223) }, { UINT64_C( 6475483179607465679), UINT64_C( 5662890149964319965) }, { UINT64_C( 5193605245871186104), UINT64_C( 2289802357024789223) } }, { { UINT64_C(11762743331289127040), UINT64_C( 6950754694243520059) }, { UINT64_C( 7072599224735431465), UINT64_C(17679873698987824503) }, { UINT64_C( 7072599224735431465), UINT64_C( 6950754694243520059) } }, { { UINT64_C(15293987499784466911), UINT64_C(13161666064170058422) }, { UINT64_C( 1766724562881590685), UINT64_C(15774282088974808191) }, { UINT64_C( 1766724562881590685), UINT64_C(13161666064170058422) } }, { { UINT64_C(12889502227679859826), UINT64_C( 5366778574159809591) }, { UINT64_C( 2814732900626136662), UINT64_C( 2970973564818896229) }, { UINT64_C( 2814732900626136662), UINT64_C( 2970973564818896229) } }, { { UINT64_C(11350784541736677848), UINT64_C( 8510356479346376482) }, { UINT64_C(15900376812139415258), UINT64_C(11642499693643875506) }, { UINT64_C(11350784541736677848), UINT64_C( 8510356479346376482) } }, { { UINT64_C( 12662931778357288), UINT64_C( 4977297182304472700) }, { UINT64_C(12520976715759452016), UINT64_C(11502379781133369141) }, { UINT64_C( 12662931778357288), UINT64_C( 4977297182304472700) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_uint64x2_t r = simde_x_vminq_u64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); simde_uint64x2_t b = simde_test_arm_neon_random_u64x2(); simde_uint64x2_t r = simde_x_vminq_u64(a, b); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vmin_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vmin_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vmin_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vmin_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmin_s32) SIMDE_TEST_FUNC_LIST_ENTRY(x_vmin_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vmin_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vmin_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmin_u32) SIMDE_TEST_FUNC_LIST_ENTRY(x_vmin_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vminq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vminq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vminq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vminq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vminq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(x_vminq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vminq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vminq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vminq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(x_vminq_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/minnm.c000066400000000000000000000302201400333146700165760ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN minnm #include "test-neon.h" #include "../../../simde/arm/neon/minnm.h" static int test_simde_vminnm_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; simde_float32 b[2]; simde_float32 r[2]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 944.82) }, { SIMDE_FLOAT32_C( 575.31), SIMDE_MATH_NANF }, { SIMDE_FLOAT32_C( 575.31), SIMDE_FLOAT32_C( 944.82) } }, { { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -343.95) }, { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 317.39) }, { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -343.95) } }, #endif { { SIMDE_FLOAT32_C( -696.17), SIMDE_FLOAT32_C( 907.59) }, { SIMDE_FLOAT32_C( -623.94), SIMDE_FLOAT32_C( 625.50) }, { SIMDE_FLOAT32_C( -696.17), SIMDE_FLOAT32_C( 625.50) } }, { { SIMDE_FLOAT32_C( -705.76), SIMDE_FLOAT32_C( -732.20) }, { SIMDE_FLOAT32_C( -126.64), SIMDE_FLOAT32_C( -660.16) }, { SIMDE_FLOAT32_C( -705.76), SIMDE_FLOAT32_C( -732.20) } }, { { SIMDE_FLOAT32_C( -661.61), SIMDE_FLOAT32_C( -734.04) }, { SIMDE_FLOAT32_C( 847.38), SIMDE_FLOAT32_C( 816.85) }, { SIMDE_FLOAT32_C( -661.61), SIMDE_FLOAT32_C( -734.04) } }, { { SIMDE_FLOAT32_C( 945.94), SIMDE_FLOAT32_C( -136.95) }, { SIMDE_FLOAT32_C( 70.32), SIMDE_FLOAT32_C( 820.87) }, { SIMDE_FLOAT32_C( 70.32), SIMDE_FLOAT32_C( -136.95) } }, { { SIMDE_FLOAT32_C( 441.43), SIMDE_FLOAT32_C( -694.16) }, { SIMDE_FLOAT32_C( 343.41), SIMDE_FLOAT32_C( 88.05) }, { SIMDE_FLOAT32_C( 343.41), SIMDE_FLOAT32_C( -694.16) } }, { { SIMDE_FLOAT32_C( 175.22), SIMDE_FLOAT32_C( -756.19) }, { SIMDE_FLOAT32_C( -558.30), SIMDE_FLOAT32_C( 795.61) }, { SIMDE_FLOAT32_C( -558.30), SIMDE_FLOAT32_C( -756.19) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x2_t r = simde_vminnm_f32(a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); simde_float32_t values[8 * 2 * sizeof(simde_float32x2_t)]; simde_test_arm_neon_random_f32x2_full(8, 2, values, -1000.0f, 1000.0f, SIMDE_TEST_VEC_FLOAT_NAN); for (size_t i = 0 ; i < 8 ; i++) { simde_float32x2_t a = simde_test_arm_neon_random_extract_f32x2(i, 2, 0, values); simde_float32x2_t b = simde_test_arm_neon_random_extract_f32x2(i, 2, 1, values); simde_float32x2_t r = simde_vminnm_f32(a, b); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vminnm_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[1]; simde_float64 b[1]; simde_float64 r[1]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NAN }, { SIMDE_FLOAT64_C( 814.09) }, { SIMDE_FLOAT64_C( 814.09) } }, { { SIMDE_FLOAT64_C( 857.46) }, { SIMDE_MATH_NAN }, { SIMDE_FLOAT64_C( 857.46) } }, { { SIMDE_MATH_NAN }, { SIMDE_MATH_NAN }, { SIMDE_MATH_NAN } }, #endif { { SIMDE_FLOAT64_C( 611.47) }, { SIMDE_FLOAT64_C( 938.24) }, { SIMDE_FLOAT64_C( 611.47) } }, { { SIMDE_FLOAT64_C( -733.28) }, { SIMDE_FLOAT64_C( -430.87) }, { SIMDE_FLOAT64_C( -733.28) } }, { { SIMDE_FLOAT64_C( 558.71) }, { SIMDE_FLOAT64_C( 197.76) }, { SIMDE_FLOAT64_C( 197.76) } }, { { SIMDE_FLOAT64_C( -73.48) }, { SIMDE_FLOAT64_C( -904.42) }, { SIMDE_FLOAT64_C( -904.42) } }, { { SIMDE_FLOAT64_C( 443.92) }, { SIMDE_FLOAT64_C( 926.58) }, { SIMDE_FLOAT64_C( 443.92) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_t b = simde_vld1_f64(test_vec[i].b); simde_float64x1_t r = simde_vminnm_f64(a, b); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); simde_float64_t values[8 * 2 * sizeof(simde_float64x1_t)]; simde_test_arm_neon_random_f64x1_full(8, 2, values, -1000.0, 1000.0, SIMDE_TEST_VEC_FLOAT_NAN); for (size_t i = 0 ; i < 8 ; i++) { simde_float64x1_t a = simde_test_arm_neon_random_extract_f64x1(i, 2, 0, values); simde_float64x1_t b = simde_test_arm_neon_random_extract_f64x1(i, 2, 1, values); simde_float64x1_t r = simde_vminnm_f64(a, b); simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vminnmq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; simde_float32 b[4]; simde_float32 r[4]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 819.39), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 912.19) }, { SIMDE_FLOAT32_C( -631.16), SIMDE_MATH_NANF, SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 587.97) }, { SIMDE_FLOAT32_C( -631.16), SIMDE_FLOAT32_C( 819.39), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 587.97) } }, #endif { { SIMDE_FLOAT32_C( 979.32), SIMDE_FLOAT32_C( -967.75), SIMDE_FLOAT32_C( -462.78), SIMDE_FLOAT32_C( -270.14) }, { SIMDE_FLOAT32_C( -821.32), SIMDE_FLOAT32_C( -724.47), SIMDE_FLOAT32_C( -442.09), SIMDE_FLOAT32_C( -73.38) }, { SIMDE_FLOAT32_C( -821.32), SIMDE_FLOAT32_C( -967.75), SIMDE_FLOAT32_C( -462.78), SIMDE_FLOAT32_C( -270.14) } }, { { SIMDE_FLOAT32_C( -910.38), SIMDE_FLOAT32_C( -584.63), SIMDE_FLOAT32_C( 694.05), SIMDE_FLOAT32_C( -314.00) }, { SIMDE_FLOAT32_C( 781.88), SIMDE_FLOAT32_C( 305.53), SIMDE_FLOAT32_C( -375.75), SIMDE_FLOAT32_C( -951.40) }, { SIMDE_FLOAT32_C( -910.38), SIMDE_FLOAT32_C( -584.63), SIMDE_FLOAT32_C( -375.75), SIMDE_FLOAT32_C( -951.40) } }, { { SIMDE_FLOAT32_C( 874.66), SIMDE_FLOAT32_C( -817.04), SIMDE_FLOAT32_C( 246.35), SIMDE_FLOAT32_C( -198.82) }, { SIMDE_FLOAT32_C( -721.46), SIMDE_FLOAT32_C( -309.72), SIMDE_FLOAT32_C( -272.24), SIMDE_FLOAT32_C( -582.08) }, { SIMDE_FLOAT32_C( -721.46), SIMDE_FLOAT32_C( -817.04), SIMDE_FLOAT32_C( -272.24), SIMDE_FLOAT32_C( -582.08) } }, { { SIMDE_FLOAT32_C( -490.34), SIMDE_FLOAT32_C( -147.19), SIMDE_FLOAT32_C( -669.89), SIMDE_FLOAT32_C( -121.49) }, { SIMDE_FLOAT32_C( -220.92), SIMDE_FLOAT32_C( -59.54), SIMDE_FLOAT32_C( -533.53), SIMDE_FLOAT32_C( -241.60) }, { SIMDE_FLOAT32_C( -490.34), SIMDE_FLOAT32_C( -147.19), SIMDE_FLOAT32_C( -669.89), SIMDE_FLOAT32_C( -241.60) } }, { { SIMDE_FLOAT32_C( -27.29), SIMDE_FLOAT32_C( 3.69), SIMDE_FLOAT32_C( 488.26), SIMDE_FLOAT32_C( 151.39) }, { SIMDE_FLOAT32_C( 279.22), SIMDE_FLOAT32_C( -953.83), SIMDE_FLOAT32_C( -922.00), SIMDE_FLOAT32_C( 368.84) }, { SIMDE_FLOAT32_C( -27.29), SIMDE_FLOAT32_C( -953.83), SIMDE_FLOAT32_C( -922.00), SIMDE_FLOAT32_C( 151.39) } }, { { SIMDE_FLOAT32_C( -538.47), SIMDE_FLOAT32_C( 772.06), SIMDE_FLOAT32_C( -945.16), SIMDE_FLOAT32_C( -756.59) }, { SIMDE_FLOAT32_C( 77.58), SIMDE_FLOAT32_C( -320.91), SIMDE_FLOAT32_C( -708.00), SIMDE_FLOAT32_C( -47.76) }, { SIMDE_FLOAT32_C( -538.47), SIMDE_FLOAT32_C( -320.91), SIMDE_FLOAT32_C( -945.16), SIMDE_FLOAT32_C( -756.59) } }, { { SIMDE_FLOAT32_C( -137.95), SIMDE_FLOAT32_C( 538.36), SIMDE_FLOAT32_C( 753.42), SIMDE_FLOAT32_C( 140.59) }, { SIMDE_FLOAT32_C( -771.36), SIMDE_FLOAT32_C( -518.82), SIMDE_FLOAT32_C( 558.51), SIMDE_FLOAT32_C( -261.70) }, { SIMDE_FLOAT32_C( -771.36), SIMDE_FLOAT32_C( -518.82), SIMDE_FLOAT32_C( 558.51), SIMDE_FLOAT32_C( -261.70) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r = simde_vminnmq_f32(a, b); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); simde_float32_t values[8 * 2 * sizeof(simde_float32x4_t)]; simde_test_arm_neon_random_f32x4_full(8, 2, values, -1000.0f, 1000.0f, SIMDE_TEST_VEC_FLOAT_NAN); for (size_t i = 0 ; i < 8 ; i++) { simde_float32x4_t a = simde_test_arm_neon_random_extract_f32x4(i, 2, 0, values); simde_float32x4_t b = simde_test_arm_neon_random_extract_f32x4(i, 2, 1, values); simde_float32x4_t r = simde_vminnmq_f32(a, b); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vminnmq_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[2]; simde_float64 b[2]; simde_float64 r[2]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 888.63) }, { SIMDE_FLOAT64_C( 616.81), SIMDE_MATH_NAN }, { SIMDE_FLOAT64_C( 616.81), SIMDE_FLOAT64_C( 888.63) } }, { { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( -916.72) }, { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 801.79) }, { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( -916.72) } }, #endif { { SIMDE_FLOAT64_C( 86.97), SIMDE_FLOAT64_C( -640.28) }, { SIMDE_FLOAT64_C( -46.82), SIMDE_FLOAT64_C( -633.81) }, { SIMDE_FLOAT64_C( -46.82), SIMDE_FLOAT64_C( -640.28) } }, { { SIMDE_FLOAT64_C( -594.11), SIMDE_FLOAT64_C( 31.18) }, { SIMDE_FLOAT64_C( 735.03), SIMDE_FLOAT64_C( -132.58) }, { SIMDE_FLOAT64_C( -594.11), SIMDE_FLOAT64_C( -132.58) } }, { { SIMDE_FLOAT64_C( -196.76), SIMDE_FLOAT64_C( 789.88) }, { SIMDE_FLOAT64_C( 110.83), SIMDE_FLOAT64_C( 880.82) }, { SIMDE_FLOAT64_C( -196.76), SIMDE_FLOAT64_C( 789.88) } }, { { SIMDE_FLOAT64_C( -531.03), SIMDE_FLOAT64_C( 402.83) }, { SIMDE_FLOAT64_C( -166.93), SIMDE_FLOAT64_C( 331.02) }, { SIMDE_FLOAT64_C( -531.03), SIMDE_FLOAT64_C( 331.02) } }, { { SIMDE_FLOAT64_C( -58.81), SIMDE_FLOAT64_C( -413.51) }, { SIMDE_FLOAT64_C( -528.39), SIMDE_FLOAT64_C( 169.82) }, { SIMDE_FLOAT64_C( -528.39), SIMDE_FLOAT64_C( -413.51) } }, { { SIMDE_FLOAT64_C( 67.67), SIMDE_FLOAT64_C( -969.88) }, { SIMDE_FLOAT64_C( 908.12), SIMDE_FLOAT64_C( -598.34) }, { SIMDE_FLOAT64_C( 67.67), SIMDE_FLOAT64_C( -969.88) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_float64x2_t r = simde_vminnmq_f64(a, b); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); simde_float64_t values[8 * 2 * sizeof(simde_float64x2_t)]; simde_test_arm_neon_random_f64x2_full(8, 2, values, -1000.0, 1000.0, SIMDE_TEST_VEC_FLOAT_NAN); for (size_t i = 0 ; i < 8 ; i++) { simde_float64x2_t a = simde_test_arm_neon_random_extract_f64x2(i, 2, 0, values); simde_float64x2_t b = simde_test_arm_neon_random_extract_f64x2(i, 2, 1, values); simde_float64x2_t r = simde_vminnmq_f64(a, b); simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vminnm_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vminnm_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vminnmq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vminnmq_f64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/minv.c000066400000000000000000000722421400333146700164430ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN minv #include "test-neon.h" #include "../../../simde/arm/neon/minv.h" static int test_simde_vminv_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t a[2]; simde_float32_t r; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 534.24) }, SIMDE_MATH_NANF }, { { SIMDE_FLOAT32_C( -385.00), SIMDE_MATH_NANF }, SIMDE_MATH_NANF }, { { SIMDE_MATH_NANF, SIMDE_MATH_NANF }, SIMDE_MATH_NANF }, #endif { { SIMDE_FLOAT32_C( 453.38), SIMDE_FLOAT32_C( 453.38) }, SIMDE_FLOAT32_C( 453.38) }, { { SIMDE_FLOAT32_C( -282.45), SIMDE_FLOAT32_C( -125.43) }, SIMDE_FLOAT32_C( -282.45) }, { { SIMDE_FLOAT32_C( -15.54), SIMDE_FLOAT32_C( -221.82) }, SIMDE_FLOAT32_C( -221.82) }, { { SIMDE_FLOAT32_C( -502.94), SIMDE_FLOAT32_C( -155.47) }, SIMDE_FLOAT32_C( -502.94) }, { { SIMDE_FLOAT32_C( -806.84), SIMDE_FLOAT32_C( 615.87) }, SIMDE_FLOAT32_C( -806.84) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32_t r = simde_vminv_f32(a); simde_assert_equal_f32(r, test_vec[i].r, 1); } return 0; #else fputc('\n', stdout); simde_float32_t values[8 * 1 * sizeof(simde_float32x2_t)]; simde_test_arm_neon_random_f32x2_full(8, 1, values, -1000.0f, 1000.0f, SIMDE_TEST_VEC_FLOAT_NAN | SIMDE_TEST_VEC_FLOAT_EQUAL | SIMDE_TEST_VEC_FLOAT_PAIR); for (size_t i = 0 ; i < 8 ; i++) { simde_float32x2_t a = simde_test_arm_neon_random_extract_f32x2(i, 1, 0, values); simde_float32_t r = simde_vminv_f32(a); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_f32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vminv_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t r; } test_vec[] = { { { -INT8_C( 61), -INT8_C( 120), -INT8_C( 117), -INT8_C( 49), INT8_C( 83), INT8_C( 68), INT8_C( 67), -INT8_C( 74) }, -INT8_C( 120) }, { { -INT8_C( 45), INT8_C( 42), INT8_C( 86), INT8_C( 69), -INT8_C( 15), -INT8_C( 70), -INT8_C( 75), -INT8_C( 48) }, -INT8_C( 75) }, { { INT8_C( 101), -INT8_C( 113), -INT8_C( 30), -INT8_C( 58), INT8_C( 115), INT8_C( 6), -INT8_C( 6), -INT8_C( 54) }, -INT8_C( 113) }, { { INT8_C( 121), INT8_C( 121), -INT8_C( 119), -INT8_C( 59), -INT8_C( 44), -INT8_C( 105), -INT8_C( 56), -INT8_C( 105) }, -INT8_C( 119) }, { { INT8_C( 31), INT8_C( 83), INT8_C( 102), INT8_C( 114), -INT8_C( 105), -INT8_C( 87), INT8_C( 40), INT8_C( 107) }, -INT8_C( 105) }, { { -INT8_C( 45), INT8_MAX, -INT8_C( 80), -INT8_C( 59), INT8_C( 57), INT8_C( 101), -INT8_C( 107), -INT8_C( 98) }, -INT8_C( 107) }, { { -INT8_C( 12), INT8_C( 119), INT8_C( 101), INT8_C( 103), INT8_C( 125), INT8_C( 95), INT8_C( 49), -INT8_C( 10) }, -INT8_C( 12) }, { { -INT8_C( 40), -INT8_C( 70), -INT8_C( 69), -INT8_C( 84), INT8_C( 81), -INT8_C( 125), INT8_C( 67), INT8_C( 112) }, -INT8_C( 125) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); int8_t r = simde_vminv_s8(a); simde_assert_equal_i8(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); int8_t r = simde_vminv_s8(a); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vminv_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t r; } test_vec[] = { { { -INT16_C( 22057), INT16_C( 28386), INT16_C( 2899), INT16_C( 9945) }, -INT16_C( 22057) }, { { -INT16_C( 30326), -INT16_C( 15381), -INT16_C( 32530), -INT16_C( 7327) }, -INT16_C( 32530) }, { { -INT16_C( 14601), INT16_C( 30026), INT16_C( 31781), -INT16_C( 661) }, -INT16_C( 14601) }, { { INT16_C( 10038), -INT16_C( 30551), -INT16_C( 4950), -INT16_C( 32264) }, -INT16_C( 32264) }, { { -INT16_C( 9322), -INT16_C( 5648), -INT16_C( 13850), INT16_C( 28687) }, -INT16_C( 13850) }, { { -INT16_C( 1197), INT16_C( 16691), -INT16_C( 27525), INT16_C( 29476) }, -INT16_C( 27525) }, { { INT16_C( 28507), -INT16_C( 32536), INT16_C( 21483), INT16_C( 8574) }, -INT16_C( 32536) }, { { INT16_C( 10106), INT16_C( 9641), -INT16_C( 24044), -INT16_C( 21850) }, -INT16_C( 24044) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); int16_t r = simde_vminv_s16(a); simde_assert_equal_i16(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); int16_t r = simde_vminv_s16(a); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vminv_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t r; } test_vec[] = { { { INT32_C( 1670616701), -INT32_C( 1277975968) }, -INT32_C( 1277975968) }, { { INT32_C( 435422877), -INT32_C( 175367782) }, -INT32_C( 175367782) }, { { INT32_C( 1937142920), INT32_C( 1117058247) }, INT32_C( 1117058247) }, { { INT32_C( 795295259), INT32_C( 1574505952) }, INT32_C( 795295259) }, { { INT32_C( 79719588), -INT32_C( 1397255409) }, -INT32_C( 1397255409) }, { { INT32_C( 868592537), INT32_C( 1277776324) }, INT32_C( 868592537) }, { { -INT32_C( 1916821563), -INT32_C( 1362144109) }, -INT32_C( 1916821563) }, { { INT32_C( 1927165586), -INT32_C( 405817533) }, -INT32_C( 405817533) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); int32_t r = simde_vminv_s32(a); simde_assert_equal_i32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); int32_t r = simde_vminv_s32(a); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vminv_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint8_t r; } test_vec[] = { { { UINT8_C(134), UINT8_C( 85), UINT8_C( 72), UINT8_C(183), UINT8_C(145), UINT8_C(178), UINT8_C( 35), UINT8_C( 42) }, UINT8_C( 35) }, { { UINT8_C(142), UINT8_C(244), UINT8_C(166), UINT8_C(237), UINT8_C( 33), UINT8_C(229), UINT8_C(169), UINT8_C( 85) }, UINT8_C( 33) }, { { UINT8_C(239), UINT8_C( 28), UINT8_C(117), UINT8_C(120), UINT8_C(247), UINT8_C(132), UINT8_C( 35), UINT8_C(222) }, UINT8_C( 28) }, { { UINT8_C(222), UINT8_C( 36), UINT8_C( 57), UINT8_C(238), UINT8_C(241), UINT8_C( 64), UINT8_C( 3), UINT8_C(119) }, UINT8_C( 3) }, { { UINT8_C(149), UINT8_C( 76), UINT8_C( 46), UINT8_C( 38), UINT8_C(254), UINT8_C( 81), UINT8_C( 80), UINT8_C(141) }, UINT8_C( 38) }, { { UINT8_C( 70), UINT8_C(246), UINT8_C(122), UINT8_C(103), UINT8_C(219), UINT8_C( 36), UINT8_C(188), UINT8_C(202) }, UINT8_C( 36) }, { { UINT8_C( 64), UINT8_C( 49), UINT8_C( 66), UINT8_C( 55), UINT8_C(181), UINT8_C(101), UINT8_C( 22), UINT8_C(148) }, UINT8_C( 22) }, { { UINT8_C(138), UINT8_C( 79), UINT8_C(130), UINT8_C(123), UINT8_C(143), UINT8_C(133), UINT8_C(242), UINT8_C( 36) }, UINT8_C( 36) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); uint8_t r = simde_vminv_u8(a); simde_assert_equal_u8(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); uint8_t r = simde_vminv_u8(a); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vminv_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t r; } test_vec[] = { { { UINT16_C( 8401), UINT16_C(53322), UINT16_C(39537), UINT16_C(46941) }, UINT16_C( 8401) }, { { UINT16_C(55184), UINT16_C(27422), UINT16_C(56059), UINT16_C(15158) }, UINT16_C(15158) }, { { UINT16_C(30731), UINT16_C(49523), UINT16_C(35294), UINT16_C(26709) }, UINT16_C(26709) }, { { UINT16_C(55256), UINT16_C(26851), UINT16_C(54620), UINT16_C(11916) }, UINT16_C(11916) }, { { UINT16_C(55285), UINT16_C(26366), UINT16_C(23409), UINT16_C( 542) }, UINT16_C( 542) }, { { UINT16_C(15410), UINT16_C(11885), UINT16_C(41751), UINT16_C( 8809) }, UINT16_C( 8809) }, { { UINT16_C(56348), UINT16_C(64227), UINT16_C(14437), UINT16_C(15970) }, UINT16_C(14437) }, { { UINT16_C(17679), UINT16_C(27814), UINT16_C(12826), UINT16_C( 3994) }, UINT16_C( 3994) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); uint16_t r = simde_vminv_u16(a); simde_assert_equal_u16(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); uint16_t r = simde_vminv_u16(a); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vminv_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint32_t r; } test_vec[] = { { { UINT32_C(2071304201), UINT32_C( 628986867) }, UINT32_C( 628986867) }, { { UINT32_C(3881036496), UINT32_C(2852765070) }, UINT32_C(2852765070) }, { { UINT32_C(4288998809), UINT32_C( 893191717) }, UINT32_C( 893191717) }, { { UINT32_C(1705108299), UINT32_C( 527710997) }, UINT32_C( 527710997) }, { { UINT32_C(3332041171), UINT32_C(1307252605) }, UINT32_C(1307252605) }, { { UINT32_C(2402565889), UINT32_C(2503556604) }, UINT32_C(2402565889) }, { { UINT32_C(1351933226), UINT32_C( 780521955) }, UINT32_C( 780521955) }, { { UINT32_C(3398641332), UINT32_C( 887687009) }, UINT32_C( 887687009) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); uint32_t r = simde_vminv_u32(a); simde_assert_equal_u32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); uint32_t r = simde_vminv_u32(a); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vminvq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32_t a[4]; simde_float32_t r; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -701.57), SIMDE_FLOAT32_C( 406.22), SIMDE_MATH_NANF }, SIMDE_MATH_NANF }, { { SIMDE_MATH_NANF, SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -103.16), SIMDE_FLOAT32_C( -103.16) }, SIMDE_MATH_NANF }, #endif { { SIMDE_FLOAT32_C( 696.89), SIMDE_FLOAT32_C( -458.92), SIMDE_FLOAT32_C( 883.35), SIMDE_FLOAT32_C( 910.86) }, SIMDE_FLOAT32_C( -458.92) }, { { SIMDE_FLOAT32_C( 257.45), SIMDE_FLOAT32_C( 520.20), SIMDE_FLOAT32_C( -905.06), SIMDE_FLOAT32_C( -593.51) }, SIMDE_FLOAT32_C( -905.06) }, { { SIMDE_FLOAT32_C( -269.91), SIMDE_FLOAT32_C( -858.01), SIMDE_FLOAT32_C( -48.94), SIMDE_FLOAT32_C( -616.54) }, SIMDE_FLOAT32_C( -858.01) }, { { SIMDE_FLOAT32_C( 719.99), SIMDE_FLOAT32_C( -168.74), SIMDE_FLOAT32_C( 719.60), SIMDE_FLOAT32_C( -623.28) }, SIMDE_FLOAT32_C( -623.28) }, { { SIMDE_FLOAT32_C( 759.29), SIMDE_FLOAT32_C( 309.98), SIMDE_FLOAT32_C( 52.00), SIMDE_FLOAT32_C( 647.84) }, SIMDE_FLOAT32_C( 52.00) }, { { SIMDE_FLOAT32_C( 117.95), SIMDE_FLOAT32_C( -401.59), SIMDE_FLOAT32_C( 805.20), SIMDE_FLOAT32_C( 531.01) }, SIMDE_FLOAT32_C( -401.59) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32_t r = simde_vminvq_f32(a); simde_assert_equal_f32(r, test_vec[i].r, 1); } return 0; #else fputc('\n', stdout); simde_float32_t values[8 * 1 * sizeof(simde_float32x2_t)]; simde_test_arm_neon_random_f32x4_full(8, 1, values, -1000.0f, 1000.0f, SIMDE_TEST_VEC_FLOAT_NAN | SIMDE_TEST_VEC_FLOAT_EQUAL | SIMDE_TEST_VEC_FLOAT_PAIR); for (size_t i = 0 ; i < 8 ; i++) { simde_float32x4_t a = simde_test_arm_neon_random_extract_f32x4(i, 1, 0, values); simde_float32_t r = simde_vminvq_f32(a); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_f32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vminvq_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64_t a[4]; simde_float64_t r; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 916.26) }, SIMDE_MATH_NAN }, { { SIMDE_FLOAT64_C( 280.88), SIMDE_MATH_NAN }, SIMDE_MATH_NAN }, { { SIMDE_MATH_NAN, SIMDE_MATH_NAN }, SIMDE_MATH_NAN }, #endif { { SIMDE_FLOAT64_C( -759.17), SIMDE_FLOAT64_C( -759.17) }, SIMDE_FLOAT64_C( -759.17) }, { { SIMDE_FLOAT64_C( -235.01), SIMDE_FLOAT64_C( 213.31) }, SIMDE_FLOAT64_C( -235.01) }, { { SIMDE_FLOAT64_C( -536.29), SIMDE_FLOAT64_C( -137.31) }, SIMDE_FLOAT64_C( -536.29) }, { { SIMDE_FLOAT64_C( -353.98), SIMDE_FLOAT64_C( 948.71) }, SIMDE_FLOAT64_C( -353.98) }, { { SIMDE_FLOAT64_C( 575.38), SIMDE_FLOAT64_C( -153.95) }, SIMDE_FLOAT64_C( -153.95) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64_t r = simde_vminvq_f64(a); simde_assert_equal_f64(r, test_vec[i].r, 1); } return 0; #else fputc('\n', stdout); simde_float64_t values[8 * 1 * sizeof(simde_float64x2_t)]; simde_test_arm_neon_random_f64x2_full(8, 1, values, -1000.0, 1000.0, SIMDE_TEST_VEC_FLOAT_NAN | SIMDE_TEST_VEC_FLOAT_EQUAL | SIMDE_TEST_VEC_FLOAT_PAIR); for (size_t i = 0 ; i < 8 ; i++) { simde_float64x2_t a = simde_test_arm_neon_random_extract_f64x2(i, 1, 0, values); simde_float64_t r = simde_vminvq_f64(a); simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_f64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vminvq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t r; } test_vec[] = { { { INT8_C( 12), INT8_C( 34), INT8_C( 51), INT8_C( 40), INT8_C( 78), -INT8_C( 121), -INT8_C( 8), -INT8_C( 96), INT8_C( 110), -INT8_C( 109), INT8_C( 83), INT8_C( 93), INT8_C( 22), INT8_C( 58), INT8_C( 61), INT8_C( 29) }, -INT8_C( 121) }, { { INT8_C( 27), -INT8_C( 117), -INT8_C( 67), -INT8_C( 30), -INT8_C( 108), -INT8_C( 7), -INT8_C( 51), -INT8_C( 116), INT8_C( 96), -INT8_C( 40), -INT8_C( 79), INT8_C( 44), -INT8_C( 102), INT8_C( 44), INT8_C( 57), -INT8_C( 90) }, -INT8_C( 117) }, { { INT8_C( 79), INT8_C( 108), -INT8_C( 50), -INT8_C( 99), -INT8_C( 13), -INT8_C( 58), INT8_C( 62), INT8_C( 98), INT8_C( 89), -INT8_C( 111), -INT8_C( 65), INT8_C( 112), -INT8_C( 52), -INT8_C( 3), -INT8_C( 115), -INT8_C( 25) }, -INT8_C( 115) }, { { -INT8_C( 120), INT8_C( 74), -INT8_C( 55), INT8_C( 29), INT8_C( 68), -INT8_C( 106), -INT8_C( 87), -INT8_C( 92), INT8_C( 111), INT8_C( 90), -INT8_C( 48), INT8_C( 9), -INT8_C( 121), INT8_C( 10), -INT8_C( 81), -INT8_C( 42) }, -INT8_C( 121) }, { { INT8_C( 118), INT8_C( 125), INT8_C( 115), INT8_C( 106), INT8_C( 67), -INT8_C( 79), -INT8_C( 52), -INT8_C( 99), INT8_C( 67), -INT8_C( 117), INT8_C( 13), INT8_C( 15), -INT8_C( 120), -INT8_C( 102), -INT8_C( 10), INT8_C( 17) }, -INT8_C( 120) }, { { -INT8_C( 28), -INT8_C( 64), INT8_C( 46), INT8_C( 40), INT8_C( 86), -INT8_C( 41), -INT8_C( 51), -INT8_C( 59), INT8_C( 50), -INT8_C( 99), -INT8_C( 50), -INT8_C( 71), -INT8_C( 89), INT8_C( 125), -INT8_C( 113), INT8_C( 30) }, -INT8_C( 113) }, { { -INT8_C( 6), INT8_C( 2), -INT8_C( 120), INT8_C( 62), -INT8_C( 76), INT8_C( 84), -INT8_C( 37), -INT8_C( 9), -INT8_C( 33), -INT8_C( 24), INT8_C( 6), INT8_C( 104), -INT8_C( 126), -INT8_C( 4), INT8_C( 121), INT8_C( 102) }, -INT8_C( 126) }, { { -INT8_C( 68), -INT8_C( 89), -INT8_C( 113), INT8_C( 19), INT8_C( 126), INT8_C( 92), -INT8_C( 40), -INT8_C( 80), -INT8_C( 7), -INT8_C( 89), INT8_C( 105), -INT8_C( 95), INT8_C( 36), -INT8_C( 8), -INT8_C( 65), INT8_C( 31) }, -INT8_C( 113) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); int8_t r = simde_vminvq_s8(a); simde_assert_equal_i8(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); int8_t r = simde_vminvq_s8(a); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vminvq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t r; } test_vec[] = { { { INT16_C( 18427), -INT16_C( 20643), INT16_C( 14491), INT16_C( 31398), -INT16_C( 21472), -INT16_C( 23838), INT16_C( 23464), INT16_C( 25864) }, -INT16_C( 23838) }, { { -INT16_C( 26878), -INT16_C( 32392), INT16_C( 20723), -INT16_C( 4815), -INT16_C( 25609), INT16_C( 7310), INT16_C( 19859), -INT16_C( 29125) }, -INT16_C( 32392) }, { { -INT16_C( 26476), INT16_C( 12093), -INT16_C( 7216), -INT16_C( 3927), -INT16_C( 29553), INT16_C( 14482), -INT16_C( 25881), -INT16_C( 5475) }, -INT16_C( 29553) }, { { INT16_C( 5426), INT16_C( 9579), -INT16_C( 25499), INT16_C( 23826), -INT16_C( 24521), -INT16_C( 13447), -INT16_C( 19219), -INT16_C( 32423) }, -INT16_C( 32423) }, { { -INT16_C( 26804), INT16_C( 7344), INT16_C( 23162), INT16_C( 2572), -INT16_C( 24858), -INT16_C( 12990), -INT16_C( 8392), INT16_C( 27319) }, -INT16_C( 26804) }, { { INT16_C( 8948), INT16_C( 22928), -INT16_C( 23873), -INT16_C( 2378), INT16_C( 12099), INT16_C( 12481), INT16_C( 7139), INT16_C( 12210) }, -INT16_C( 23873) }, { { INT16_C( 25266), INT16_C( 11339), INT16_C( 22460), -INT16_C( 24010), INT16_C( 30965), INT16_C( 11888), INT16_C( 10071), INT16_C( 19352) }, -INT16_C( 24010) }, { { INT16_C( 10314), INT16_C( 2469), INT16_C( 23499), INT16_C( 3839), -INT16_C( 15989), INT16_C( 28222), -INT16_C( 3876), -INT16_C( 29026) }, -INT16_C( 29026) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); int16_t r = simde_vminvq_s16(a); simde_assert_equal_i16(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); int16_t r = simde_vminvq_s16(a); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vminvq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t r; } test_vec[] = { { { INT32_C( 263907667), INT32_C( 917696833), -INT32_C( 1050402199), -INT32_C( 1827865271) }, -INT32_C( 1827865271) }, { { -INT32_C( 258166491), -INT32_C( 1728144371), INT32_C( 956710237), -INT32_C( 2134399955) }, -INT32_C( 2134399955) }, { { -INT32_C( 812613234), -INT32_C( 603635086), -INT32_C( 1382192540), -INT32_C( 1941853849) }, -INT32_C( 1941853849) }, { { INT32_C( 1753079131), -INT32_C( 704611463), -INT32_C( 435222856), INT32_C( 963040939) }, -INT32_C( 704611463) }, { { -INT32_C( 905382312), -INT32_C( 1666839240), -INT32_C( 565558409), INT32_C( 1215007725) }, -INT32_C( 1666839240) }, { { -INT32_C( 491722648), INT32_C( 481865827), INT32_C( 1627572406), -INT32_C( 157652834) }, -INT32_C( 491722648) }, { { -INT32_C( 1748917665), INT32_C( 657745840), -INT32_C( 1744404821), INT32_C( 1910534409) }, -INT32_C( 1748917665) }, { { -INT32_C( 1135374247), -INT32_C( 153613248), INT32_C( 1918425812), -INT32_C( 1570115005) }, -INT32_C( 1570115005) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); int32_t r = simde_vminvq_s32(a); simde_assert_equal_i32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); int32_t r = simde_vminvq_s32(a); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vminvq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; uint8_t r; } test_vec[] = { { { UINT8_C(127), UINT8_C( 63), UINT8_C(172), UINT8_C( 73), UINT8_C(120), UINT8_C(238), UINT8_C( 41), UINT8_C(213), UINT8_C(108), UINT8_C( 80), UINT8_C( 33), UINT8_C( 3), UINT8_C(225), UINT8_C( 6), UINT8_C( 42), UINT8_C(209) }, UINT8_C( 3) }, { { UINT8_C(235), UINT8_C(134), UINT8_C(207), UINT8_C(198), UINT8_C(210), UINT8_C(234), UINT8_C(130), UINT8_C(179), UINT8_C(125), UINT8_C(148), UINT8_C( 50), UINT8_C(225), UINT8_C(137), UINT8_C(148), UINT8_C(106), UINT8_C( 8) }, UINT8_C( 8) }, { { UINT8_C(211), UINT8_C( 22), UINT8_C( 81), UINT8_C( 76), UINT8_C( 5), UINT8_C(123), UINT8_C( 33), UINT8_C(113), UINT8_C(203), UINT8_C( 67), UINT8_C(116), UINT8_C(172), UINT8_C( 73), UINT8_C(159), UINT8_C(126), UINT8_C( 52) }, UINT8_C( 5) }, { { UINT8_C( 37), UINT8_C( 77), UINT8_C(250), UINT8_C(248), UINT8_C( 55), UINT8_C(124), UINT8_C(171), UINT8_C(181), UINT8_C( 17), UINT8_C(221), UINT8_C(150), UINT8_C(154), UINT8_C(113), UINT8_C( 0), UINT8_C(162), UINT8_C( 69) }, UINT8_C( 0) }, { { UINT8_C( 23), UINT8_C(244), UINT8_C(145), UINT8_C( 28), UINT8_C(111), UINT8_C(178), UINT8_C(141), UINT8_C( 58), UINT8_C(245), UINT8_C( 2), UINT8_C(230), UINT8_C( 62), UINT8_C(161), UINT8_C(100), UINT8_C(114), UINT8_C(198) }, UINT8_C( 2) }, { { UINT8_C(177), UINT8_C(108), UINT8_C(190), UINT8_C(233), UINT8_C(233), UINT8_C(105), UINT8_C(158), UINT8_C(250), UINT8_C( 71), UINT8_C( 52), UINT8_C(148), UINT8_C(184), UINT8_C( 52), UINT8_C( 54), UINT8_C(253), UINT8_C( 75) }, UINT8_C( 52) }, { { UINT8_C( 42), UINT8_C(142), UINT8_C(103), UINT8_C(153), UINT8_C( 65), UINT8_C(245), UINT8_C(211), UINT8_C( 54), UINT8_C(247), UINT8_C(186), UINT8_C(117), UINT8_C(152), UINT8_C( 30), UINT8_C(231), UINT8_C( 94), UINT8_C(208) }, UINT8_C( 30) }, { { UINT8_C( 84), UINT8_C( 29), UINT8_C(185), UINT8_C( 61), UINT8_C(134), UINT8_C( 87), UINT8_C( 55), UINT8_C(205), UINT8_C(139), UINT8_C(203), UINT8_C(134), UINT8_C(191), UINT8_C( 1), UINT8_C(131), UINT8_C( 11), UINT8_C( 44) }, UINT8_C( 1) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); uint8_t r = simde_vminvq_u8(a); simde_assert_equal_u8(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); uint8_t r = simde_vminvq_u8(a); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vminvq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t r; } test_vec[] = { { { UINT16_C(26010), UINT16_C(25137), UINT16_C(63517), UINT16_C(43199), UINT16_C(42873), UINT16_C(20868), UINT16_C(59734), UINT16_C(35096) }, UINT16_C(20868) }, { { UINT16_C(14291), UINT16_C(55676), UINT16_C(15043), UINT16_C(49309), UINT16_C(14487), UINT16_C(49347), UINT16_C(11103), UINT16_C(63872) }, UINT16_C(11103) }, { { UINT16_C(45457), UINT16_C(44635), UINT16_C( 6826), UINT16_C( 9047), UINT16_C(56258), UINT16_C( 6260), UINT16_C(36036), UINT16_C(38817) }, UINT16_C( 6260) }, { { UINT16_C( 7619), UINT16_C(34416), UINT16_C( 3672), UINT16_C(61254), UINT16_C( 2374), UINT16_C(42415), UINT16_C(12340), UINT16_C(50590) }, UINT16_C( 2374) }, { { UINT16_C(64225), UINT16_C(35700), UINT16_C(51988), UINT16_C(54958), UINT16_C( 8870), UINT16_C(27630), UINT16_C(37038), UINT16_C(28930) }, UINT16_C( 8870) }, { { UINT16_C(29613), UINT16_C( 1527), UINT16_C(15745), UINT16_C(51189), UINT16_C(42054), UINT16_C(31596), UINT16_C( 2772), UINT16_C(46656) }, UINT16_C( 1527) }, { { UINT16_C(46084), UINT16_C( 6465), UINT16_C(61567), UINT16_C( 9967), UINT16_C(56850), UINT16_C(49553), UINT16_C(37742), UINT16_C( 6962) }, UINT16_C( 6465) }, { { UINT16_C(10758), UINT16_C(34593), UINT16_C( 5735), UINT16_C(44622), UINT16_C(47802), UINT16_C(36649), UINT16_C(27077), UINT16_C(51525) }, UINT16_C( 5735) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); uint16_t r = simde_vminvq_u16(a); simde_assert_equal_u16(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); uint16_t r = simde_vminvq_u16(a); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vminvq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint32_t r; } test_vec[] = { { { UINT32_C(2648868382), UINT32_C(2311311990), UINT32_C( 508187824), UINT32_C(3996744936) }, UINT32_C( 508187824) }, { { UINT32_C( 242637478), UINT32_C( 733791344), UINT32_C(1153099135), UINT32_C(1812856654) }, UINT32_C( 242637478) }, { { UINT32_C(4228575365), UINT32_C(1921371586), UINT32_C( 177262370), UINT32_C(4076390731) }, UINT32_C( 177262370) }, { { UINT32_C(2483056164), UINT32_C(2998910003), UINT32_C(4025907617), UINT32_C(4267443064) }, UINT32_C(2483056164) }, { { UINT32_C(3053086451), UINT32_C(1428651827), UINT32_C(2573186894), UINT32_C(2777372801) }, UINT32_C(1428651827) }, { { UINT32_C(4181298118), UINT32_C(3903584583), UINT32_C(3956842866), UINT32_C(2565420197) }, UINT32_C(2565420197) }, { { UINT32_C(3444499354), UINT32_C(2955113826), UINT32_C(2924053037), UINT32_C(2706626010) }, UINT32_C(2706626010) }, { { UINT32_C(2828700768), UINT32_C(4170204805), UINT32_C(2363713767), UINT32_C( 908446876) }, UINT32_C( 908446876) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); uint32_t r = simde_vminvq_u32(a); simde_assert_equal_u32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); uint32_t r = simde_vminvq_u32(a); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vminv_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vminv_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vminv_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vminv_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vminv_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vminv_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vminv_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vminvq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vminvq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vminvq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vminvq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vminvq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vminvq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vminvq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vminvq_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/mla.c000066400000000000000000002063261400333146700162450ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN mla #include "test-neon.h" #include "../../../simde/arm/neon/mla.h" static int test_simde_vmla_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[2]; simde_float32 b[2]; simde_float32 c[2]; simde_float32 r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C(316.661133), SIMDE_FLOAT32_C(532.098022) }, { SIMDE_FLOAT32_C(620.658325), SIMDE_FLOAT32_C(-997.429260) }, { SIMDE_FLOAT32_C(-285.241272), SIMDE_FLOAT32_C(-34.532288) }, { SIMDE_FLOAT32_C(-176720.718750), SIMDE_FLOAT32_C(34975.613281) } }, { { SIMDE_FLOAT32_C(471.026245), SIMDE_FLOAT32_C(750.918213) }, { SIMDE_FLOAT32_C(-25.710571), SIMDE_FLOAT32_C(227.714966) }, { SIMDE_FLOAT32_C(-713.672607), SIMDE_FLOAT32_C(-277.196289) }, { SIMDE_FLOAT32_C(18819.955078), SIMDE_FLOAT32_C(-62370.824219) } }, { { SIMDE_FLOAT32_C(194.214355), SIMDE_FLOAT32_C(-492.961609) }, { SIMDE_FLOAT32_C(-13.566528), SIMDE_FLOAT32_C(253.638428) }, { SIMDE_FLOAT32_C(459.621704), SIMDE_FLOAT32_C(-547.513184) }, { SIMDE_FLOAT32_C(-6041.256348), SIMDE_FLOAT32_C(-139363.359375) } }, { { SIMDE_FLOAT32_C(572.033813), SIMDE_FLOAT32_C(-487.424255) }, { SIMDE_FLOAT32_C(876.644287), SIMDE_FLOAT32_C(-464.728149) }, { SIMDE_FLOAT32_C(-627.000244), SIMDE_FLOAT32_C(-604.188477) }, { SIMDE_FLOAT32_C(-549084.125000), SIMDE_FLOAT32_C(280295.968750) } }, { { SIMDE_FLOAT32_C(-380.228882), SIMDE_FLOAT32_C(-880.954773) }, { SIMDE_FLOAT32_C(-273.911987), SIMDE_FLOAT32_C(-60.760437) }, { SIMDE_FLOAT32_C(460.995361), SIMDE_FLOAT32_C(-993.405762) }, { SIMDE_FLOAT32_C(-126652.382812), SIMDE_FLOAT32_C(59478.816406) } }, { { SIMDE_FLOAT32_C(-200.876343), SIMDE_FLOAT32_C(-222.343384) }, { SIMDE_FLOAT32_C(538.692261), SIMDE_FLOAT32_C(-580.218018) }, { SIMDE_FLOAT32_C(-219.772644), SIMDE_FLOAT32_C(-746.548950) }, { SIMDE_FLOAT32_C(-118590.695312), SIMDE_FLOAT32_C(432938.812500) } }, { { SIMDE_FLOAT32_C(385.249634), SIMDE_FLOAT32_C(-748.746277) }, { SIMDE_FLOAT32_C(-995.630615), SIMDE_FLOAT32_C(-640.460815) }, { SIMDE_FLOAT32_C(478.968628), SIMDE_FLOAT32_C(-709.303223) }, { SIMDE_FLOAT32_C(-476490.593750), SIMDE_FLOAT32_C(453532.156250) } }, { { SIMDE_FLOAT32_C(82.342896), SIMDE_FLOAT32_C(-326.816833) }, { SIMDE_FLOAT32_C(-202.264832), SIMDE_FLOAT32_C(-931.223450) }, { SIMDE_FLOAT32_C(926.821655), SIMDE_FLOAT32_C(-742.643005) }, { SIMDE_FLOAT32_C(-187381.078125), SIMDE_FLOAT32_C(691239.750000) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a, b, c, r; a = simde_vld1_f32(test_vec[i].a); b = simde_vld1_f32(test_vec[i].b); c = simde_vld1_f32(test_vec[i].c); r = simde_vmla_f32(a, b, c); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vmla_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[1]; simde_float64 b[1]; simde_float64 c[1]; simde_float64 r[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( -737.86) }, { SIMDE_FLOAT64_C( 54.24) }, { SIMDE_FLOAT64_C( 214.25) }, { SIMDE_FLOAT64_C( 10883.06) } }, { { SIMDE_FLOAT64_C( -556.90) }, { SIMDE_FLOAT64_C( -78.19) }, { SIMDE_FLOAT64_C( -607.99) }, { SIMDE_FLOAT64_C( 46981.84) } }, { { SIMDE_FLOAT64_C( 487.07) }, { SIMDE_FLOAT64_C( 470.32) }, { SIMDE_FLOAT64_C( -82.83) }, { SIMDE_FLOAT64_C(-38469.54) } }, { { SIMDE_FLOAT64_C( 174.25) }, { SIMDE_FLOAT64_C( -952.32) }, { SIMDE_FLOAT64_C( 119.45) }, { SIMDE_FLOAT64_C(-113580.37) } }, { { SIMDE_FLOAT64_C( -537.41) }, { SIMDE_FLOAT64_C( 278.36) }, { SIMDE_FLOAT64_C( 723.14) }, { SIMDE_FLOAT64_C(200755.84) } }, { { SIMDE_FLOAT64_C( 217.52) }, { SIMDE_FLOAT64_C( 318.77) }, { SIMDE_FLOAT64_C( -673.30) }, { SIMDE_FLOAT64_C(-214410.32) } }, { { SIMDE_FLOAT64_C( -927.68) }, { SIMDE_FLOAT64_C( -114.09) }, { SIMDE_FLOAT64_C( 770.14) }, { SIMDE_FLOAT64_C(-88792.95) } }, { { SIMDE_FLOAT64_C( -953.39) }, { SIMDE_FLOAT64_C( -237.19) }, { SIMDE_FLOAT64_C( 341.63) }, { SIMDE_FLOAT64_C(-81984.61) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_t b = simde_vld1_f64(test_vec[i].b); simde_float64x1_t c = simde_vld1_f64(test_vec[i].c); simde_float64x1_t r = simde_vmla_f64(a, b, c); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; } static int test_simde_vmla_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int8_t b[8]; int8_t c[8]; int8_t r[8]; } test_vec[] = { { { -INT8_C( 97), -INT8_C( 50), -INT8_C( 94), -INT8_C( 28), INT8_C( 126), -INT8_C( 17), -INT8_C( 96), INT8_C( 91) }, { -INT8_C( 103), -INT8_C( 58), INT8_C( 111), -INT8_C( 40), INT8_C( 71), INT8_C( 77), INT8_C( 29), -INT8_C( 87) }, { -INT8_C( 124), INT8_C( 119), -INT8_C( 122), INT8_C( 94), -INT8_C( 88), INT8_C( 3), -INT8_C( 29), -INT8_C( 35) }, { -INT8_C( 125), -INT8_C( 40), -INT8_C( 68), INT8_C( 52), INT8_C( 22), -INT8_C( 42), INT8_C( 87), INT8_C( 64) } }, { { INT8_C( 18), INT8_C( 99), INT8_C( 76), INT8_C( 75), INT8_C( 95), -INT8_C( 107), INT8_C( 108), -INT8_C( 2) }, { INT8_C( 100), INT8_C( 14), -INT8_C( 30), -INT8_C( 30), -INT8_C( 3), -INT8_C( 125), INT8_C( 61), -INT8_C( 105) }, { INT8_C( 73), -INT8_C( 84), INT8_C( 111), -INT8_C( 112), -INT8_C( 6), -INT8_C( 116), INT8_C( 58), INT8_C( 126) }, { -INT8_C( 106), -INT8_C( 53), INT8_C( 74), INT8_C( 107), INT8_C( 113), INT8_C( 57), INT8_C( 62), INT8_C( 80) } }, { { INT8_C( 4), -INT8_C( 64), -INT8_C( 36), -INT8_C( 84), -INT8_C( 61), -INT8_C( 65), -INT8_C( 119), -INT8_C( 42) }, { INT8_C( 35), -INT8_C( 43), INT8_C( 33), -INT8_C( 126), INT8_C( 107), -INT8_C( 114), INT8_MIN, -INT8_C( 49) }, { -INT8_C( 100), INT8_C( 99), -INT8_C( 79), -INT8_C( 102), -INT8_C( 26), -INT8_C( 17), INT8_C( 49), INT8_C( 47) }, { INT8_C( 88), INT8_C( 31), -INT8_C( 83), -INT8_C( 32), -INT8_C( 27), INT8_C( 81), INT8_C( 9), -INT8_C( 41) } }, { { -INT8_C( 101), -INT8_C( 96), -INT8_C( 65), -INT8_C( 107), INT8_C( 45), -INT8_C( 7), INT8_C( 19), INT8_C( 49) }, { -INT8_C( 70), -INT8_C( 16), -INT8_C( 35), INT8_C( 125), -INT8_C( 81), INT8_C( 102), INT8_C( 83), -INT8_C( 46) }, { INT8_C( 59), INT8_C( 117), INT8_C( 84), -INT8_C( 90), INT8_C( 3), -INT8_C( 43), INT8_C( 117), -INT8_C( 97) }, { INT8_C( 121), INT8_C( 80), INT8_C( 67), -INT8_C( 93), INT8_C( 58), -INT8_C( 41), INT8_C( 2), -INT8_C( 97) } }, { { INT8_C( 56), INT8_C( 39), INT8_C( 57), INT8_C( 30), INT8_C( 22), INT8_C( 106), INT8_C( 77), -INT8_C( 79) }, { INT8_C( 11), INT8_C( 12), INT8_C( 71), INT8_C( 56), INT8_C( 6), INT8_C( 90), INT8_C( 105), -INT8_C( 64) }, { INT8_C( 74), INT8_C( 70), INT8_C( 61), -INT8_C( 6), -INT8_C( 84), -INT8_C( 111), -INT8_C( 52), -INT8_C( 25) }, { INT8_C( 102), INT8_C( 111), INT8_C( 36), -INT8_C( 50), INT8_C( 30), INT8_C( 100), -INT8_C( 7), -INT8_C( 15) } }, { { INT8_C( 6), INT8_C( 33), -INT8_C( 114), INT8_C( 9), -INT8_C( 10), INT8_C( 3), -INT8_C( 88), INT8_C( 46) }, { INT8_C( 42), -INT8_C( 30), INT8_C( 76), INT8_C( 64), INT8_C( 76), -INT8_C( 103), -INT8_C( 14), INT8_C( 87) }, { -INT8_C( 91), INT8_C( 57), -INT8_C( 113), -INT8_C( 85), -INT8_C( 109), -INT8_C( 8), INT8_C( 107), -INT8_C( 34) }, { INT8_C( 24), INT8_C( 115), INT8_C( 2), -INT8_C( 55), -INT8_C( 102), INT8_C( 59), -INT8_C( 50), -INT8_C( 96) } }, { { INT8_C( 62), -INT8_C( 87), -INT8_C( 40), -INT8_C( 22), INT8_C( 58), -INT8_C( 92), -INT8_C( 46), INT8_C( 64) }, { -INT8_C( 59), INT8_C( 96), INT8_C( 73), -INT8_C( 69), INT8_C( 99), -INT8_C( 15), -INT8_C( 23), -INT8_C( 114) }, { -INT8_C( 45), INT8_C( 53), -INT8_C( 50), INT8_C( 32), -INT8_C( 50), -INT8_C( 64), INT8_C( 119), INT8_C( 116) }, { -INT8_C( 99), -INT8_C( 119), -INT8_C( 106), INT8_C( 74), -INT8_C( 28), INT8_C( 100), INT8_C( 33), -INT8_C( 104) } }, { { -INT8_C( 7), INT8_C( 7), INT8_C( 31), -INT8_C( 115), -INT8_C( 1), -INT8_C( 117), INT8_C( 107), INT8_C( 62) }, { INT8_C( 52), INT8_C( 67), INT8_C( 40), INT8_C( 110), -INT8_C( 25), -INT8_C( 6), -INT8_C( 82), -INT8_C( 83) }, { INT8_C( 90), -INT8_C( 9), INT8_C( 104), -INT8_C( 66), -INT8_C( 24), INT8_C( 82), INT8_C( 76), -INT8_C( 68) }, { INT8_C( 65), -INT8_C( 84), INT8_C( 95), INT8_C( 49), INT8_C( 87), -INT8_C( 97), INT8_C( 19), INT8_C( 74) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t c = simde_vld1_s8(test_vec[i].c); simde_int8x8_t r = simde_vmla_s8(a, b, c); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; } static int test_simde_vmla_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int16_t b[4]; int16_t c[4]; int16_t r[4]; } test_vec[] = { { { INT16_C( 23455), -INT16_C( 11689), -INT16_C( 22552), INT16_C( 11017) }, { -INT16_C( 5730), INT16_C( 14488), -INT16_C( 6817), -INT16_C( 24086) }, { -INT16_C( 21930), INT16_C( 24130), INT16_C( 6050), -INT16_C( 26539) }, { -INT16_C( 15693), INT16_C( 14727), INT16_C( 22278), -INT16_C( 8773) } }, { { -INT16_C( 1714), INT16_C( 5586), -INT16_C( 5200), INT16_C( 20237) }, { INT16_C( 25670), INT16_C( 12065), INT16_C( 11020), -INT16_C( 21926) }, { -INT16_C( 3564), INT16_C( 29922), -INT16_C( 12841), INT16_C( 11541) }, { -INT16_C( 1338), -INT16_C( 23308), -INT16_C( 20796), INT16_C( 6767) } }, { { INT16_C( 22647), INT16_C( 6539), -INT16_C( 8081), -INT16_C( 16719) }, { -INT16_C( 31526), -INT16_C( 29997), -INT16_C( 8081), -INT16_C( 18982) }, { -INT16_C( 1212), INT16_C( 20708), INT16_C( 15910), INT16_C( 15355) }, { INT16_C( 24671), -INT16_C( 21129), INT16_C( 4841), INT16_C( 18799) } }, { { -INT16_C( 8911), INT16_C( 2223), -INT16_C( 15190), INT16_C( 8502) }, { -INT16_C( 16100), -INT16_C( 29638), -INT16_C( 4958), INT16_C( 31818) }, { INT16_C( 7536), -INT16_C( 8442), -INT16_C( 7939), INT16_C( 16788) }, { -INT16_C( 31375), -INT16_C( 10229), INT16_C( 24772), -INT16_C( 14850) } }, { { INT16_C( 31196), INT16_C( 658), -INT16_C( 29257), -INT16_C( 6083) }, { -INT16_C( 5014), INT16_C( 5617), INT16_C( 10161), -INT16_C( 13002) }, { INT16_C( 29160), -INT16_C( 30119), -INT16_C( 23715), -INT16_C( 13050) }, { -INT16_C( 31764), -INT16_C( 29349), -INT16_C( 21500), -INT16_C( 2687) } }, { { INT16_C( 3520), -INT16_C( 16980), INT16_C( 16621), -INT16_C( 13825) }, { -INT16_C( 28231), INT16_C( 29132), INT16_C( 2334), -INT16_C( 30631) }, { INT16_C( 19190), -INT16_C( 22627), -INT16_C( 11151), INT16_C( 23156) }, { -INT16_C( 28794), -INT16_C( 25656), INT16_C( 7979), -INT16_C( 9133) } }, { { -INT16_C( 12731), -INT16_C( 23836), -INT16_C( 5263), INT16_C( 12911) }, { INT16_C( 7160), -INT16_C( 6673), -INT16_C( 4517), INT16_C( 5551) }, { INT16_C( 31615), -INT16_C( 25210), -INT16_C( 8316), INT16_C( 31270) }, { -INT16_C( 10675), -INT16_C( 28418), INT16_C( 5981), -INT16_C( 12183) } }, { { -INT16_C( 15574), -INT16_C( 25823), -INT16_C( 26985), -INT16_C( 8971) }, { -INT16_C( 9628), -INT16_C( 10882), -INT16_C( 4667), -INT16_C( 17145) }, { -INT16_C( 2296), INT16_C( 25762), INT16_C( 20965), INT16_C( 25977) }, { INT16_C( 4682), -INT16_C( 4899), -INT16_C( 25392), -INT16_C( 1980) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t c = simde_vld1_s16(test_vec[i].c); simde_int16x4_t r = simde_vmla_s16(a, b, c); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; } static int test_simde_vmla_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int32_t b[2]; int32_t c[2]; int32_t r[2]; } test_vec[] = { { { -INT32_C( 1360280946), -INT32_C( 1150468689) }, { INT32_C( 44236099), -INT32_C( 1372656593) }, { INT32_C( 30169474), -INT32_C( 449840714) }, { -INT32_C( 1004492396), -INT32_C( 1675370983) } }, { { -INT32_C( 37762479), -INT32_C( 902980548) }, { INT32_C( 242751582), -INT32_C( 1714821803) }, { INT32_C( 312175587), -INT32_C( 624899752) }, { INT32_C( 1633071019), -INT32_C( 1906037132) } }, { { -INT32_C( 656700382), -INT32_C( 675411066) }, { INT32_C( 315915733), INT32_C( 2027684377) }, { INT32_C( 1887851802), INT32_C( 487149370) }, { INT32_C( 1572211908), INT32_C( 102579248) } }, { { INT32_C( 305112250), -INT32_C( 1863520146) }, { INT32_C( 40487036), -INT32_C( 1462097965) }, { -INT32_C( 1111839068), -INT32_C( 885680208) }, { -INT32_C( 1169138646), -INT32_C( 468400514) } }, { { INT32_C( 641448940), -INT32_C( 985381878) }, { INT32_C( 1456960488), -INT32_C( 521681821) }, { INT32_C( 1608667276), INT32_C( 453491831) }, { INT32_C( 1662492364), INT32_C( 1424900623) } }, { { INT32_C( 467190379), INT32_C( 1172704857) }, { -INT32_C( 731176503), INT32_C( 1318694757) }, { -INT32_C( 2036043741), -INT32_C( 1067021516) }, { -INT32_C( 536194842), INT32_C( 1368884701) } }, { { INT32_C( 1377782235), INT32_C( 1886201605) }, { INT32_C( 1116489449), INT32_C( 495415892) }, { -INT32_C( 101584236), -INT32_C( 985167198) }, { -INT32_C( 929461617), INT32_C( 2098251821) } }, { { INT32_C( 793505019), INT32_C( 1408283255) }, { INT32_C( 10817531), INT32_C( 527504182) }, { -INT32_C( 1386087079), INT32_C( 63629423) }, { INT32_C( 915704382), INT32_C( 972879585) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t c = simde_vld1_s32(test_vec[i].c); simde_int32x2_t r = simde_vmla_s32(a, b, c); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; } static int test_simde_vmla_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t c[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C( 99), UINT8_C( 82), UINT8_C(243), UINT8_C(181), UINT8_C(230), UINT8_C(106), UINT8_C(221), UINT8_C(140) }, { UINT8_C(211), UINT8_C(191), UINT8_C(194), UINT8_C(123), UINT8_C( 58), UINT8_C(224), UINT8_C(253), UINT8_C(107) }, { UINT8_C(179), UINT8_C(200), UINT8_C(167), UINT8_C(104), UINT8_C( 33), UINT8_C(216), UINT8_C(117), UINT8_C(211) }, { UINT8_C(236), UINT8_C(138), UINT8_C(129), UINT8_C(173), UINT8_C( 96), UINT8_C(106), UINT8_C(126), UINT8_C(189) } }, { { UINT8_C( 77), UINT8_C(143), UINT8_C(122), UINT8_C( 34), UINT8_C( 94), UINT8_C( 20), UINT8_C(148), UINT8_C(194) }, { UINT8_C(102), UINT8_C(135), UINT8_C(119), UINT8_C( 76), UINT8_C(241), UINT8_C( 84), UINT8_C(216), UINT8_C(197) }, { UINT8_C( 19), UINT8_C(154), UINT8_C( 64), UINT8_C( 77), UINT8_C(122), UINT8_C( 61), UINT8_C(184), UINT8_C( 45) }, { UINT8_C(223), UINT8_C(197), UINT8_C( 58), UINT8_C(254), UINT8_C( 56), UINT8_C( 24), UINT8_C(212), UINT8_C( 99) } }, { { UINT8_C( 5), UINT8_C( 96), UINT8_C(149), UINT8_C( 38), UINT8_C( 56), UINT8_C( 10), UINT8_C(249), UINT8_C(133) }, { UINT8_C(154), UINT8_C(115), UINT8_C(167), UINT8_C(248), UINT8_C(135), UINT8_C( 59), UINT8_C(186), UINT8_C(237) }, { UINT8_C(194), UINT8_C( 49), UINT8_C( 57), UINT8_C(180), UINT8_C(133), UINT8_C( 17), UINT8_C(121), UINT8_C(153) }, { UINT8_C(185), UINT8_C( 99), UINT8_C(196), UINT8_C(134), UINT8_C( 91), UINT8_C(245), UINT8_C(227), UINT8_C( 42) } }, { { UINT8_C(171), UINT8_C(185), UINT8_C(230), UINT8_C( 38), UINT8_C(246), UINT8_C(159), UINT8_C( 83), UINT8_C(251) }, { UINT8_MAX, UINT8_C(233), UINT8_C( 33), UINT8_C( 55), UINT8_C(243), UINT8_C( 27), UINT8_C(188), UINT8_C(141) }, { UINT8_C(142), UINT8_C( 99), UINT8_C(134), UINT8_C( 22), UINT8_C(159), UINT8_C( 64), UINT8_C( 3), UINT8_C( 97) }, { UINT8_C( 29), UINT8_C(212), UINT8_C( 44), UINT8_C(224), UINT8_C(227), UINT8_C( 95), UINT8_C(135), UINT8_C(104) } }, { { UINT8_C(114), UINT8_C( 61), UINT8_C( 21), UINT8_C(247), UINT8_C( 78), UINT8_C(142), UINT8_C(144), UINT8_C(250) }, { UINT8_C( 71), UINT8_C(119), UINT8_C( 32), UINT8_C( 61), UINT8_C( 22), UINT8_C(115), UINT8_C( 56), UINT8_C( 21) }, { UINT8_C( 92), UINT8_C( 90), UINT8_C( 76), UINT8_C( 80), UINT8_C(117), UINT8_C( 8), UINT8_C(221), UINT8_C( 3) }, { UINT8_C(246), UINT8_C( 19), UINT8_C(149), UINT8_C( 7), UINT8_C( 92), UINT8_C( 38), UINT8_C(232), UINT8_C( 57) } }, { { UINT8_C(107), UINT8_C( 99), UINT8_C( 25), UINT8_C( 10), UINT8_C(164), UINT8_C( 29), UINT8_C(108), UINT8_C( 22) }, { UINT8_C( 90), UINT8_C(129), UINT8_C( 13), UINT8_C(168), UINT8_C( 16), UINT8_C(158), UINT8_C(162), UINT8_C( 87) }, { UINT8_C( 21), UINT8_C(194), UINT8_C(149), UINT8_C( 43), UINT8_C( 54), UINT8_C(205), UINT8_C( 64), UINT8_C(146) }, { UINT8_C(205), UINT8_C( 37), UINT8_C(170), UINT8_C( 66), UINT8_C( 4), UINT8_C(163), UINT8_C(236), UINT8_C(180) } }, { { UINT8_C( 39), UINT8_C(140), UINT8_C(226), UINT8_C(156), UINT8_C(148), UINT8_C(192), UINT8_C(160), UINT8_MAX }, { UINT8_C( 35), UINT8_C(185), UINT8_C( 10), UINT8_C(199), UINT8_C(214), UINT8_C(118), UINT8_C(221), UINT8_C( 48) }, { UINT8_C(247), UINT8_C(235), UINT8_C(217), UINT8_C( 7), UINT8_C(137), UINT8_C(123), UINT8_C( 95), UINT8_C(158) }, { UINT8_C(236), UINT8_C( 95), UINT8_C( 92), UINT8_C( 13), UINT8_C( 26), UINT8_C(114), UINT8_C(163), UINT8_C(159) } }, { { UINT8_C( 62), UINT8_C(244), UINT8_C(201), UINT8_C(116), UINT8_C(193), UINT8_C( 9), UINT8_C( 6), UINT8_C(233) }, { UINT8_C(149), UINT8_C(233), UINT8_C(133), UINT8_C( 41), UINT8_C(169), UINT8_C( 37), UINT8_C( 40), UINT8_C(204) }, { UINT8_C(223), UINT8_C( 50), UINT8_C(148), UINT8_C(181), UINT8_C(168), UINT8_C(113), UINT8_C(230), UINT8_C(160) }, { UINT8_C( 9), UINT8_C(118), UINT8_C(173), UINT8_C(113), UINT8_C(169), UINT8_C( 94), UINT8_C(246), UINT8_C(105) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t c = simde_vld1_u8(test_vec[i].c); simde_uint8x8_t r = simde_vmla_u8(a, b, c); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; } static int test_simde_vmla_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint16_t b[4]; uint16_t c[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(33884), UINT16_C(33998), UINT16_C(54257), UINT16_C( 4534) }, { UINT16_C( 87), UINT16_C(11697), UINT16_C(54623), UINT16_C(61277) }, { UINT16_C(41847), UINT16_C(64502), UINT16_C(55349), UINT16_C(33687) }, { UINT16_C( 4557), UINT16_C(63460), UINT16_C(10396), UINT16_C(55441) } }, { { UINT16_C(47710), UINT16_C( 1621), UINT16_C(61515), UINT16_C(43253) }, { UINT16_C(50037), UINT16_C(26156), UINT16_C(58262), UINT16_C(61048) }, { UINT16_C(10723), UINT16_C(16923), UINT16_C(30974), UINT16_C(30001) }, { UINT16_C(51229), UINT16_C( 9465), UINT16_C( 3871), UINT16_C( 9709) } }, { { UINT16_C(10011), UINT16_C(20849), UINT16_C( 2303), UINT16_C(24276) }, { UINT16_C(10946), UINT16_C( 3684), UINT16_C(22810), UINT16_C(36790) }, { UINT16_C(57885), UINT16_C(46070), UINT16_C(28357), UINT16_C(43169) }, { UINT16_C(17173), UINT16_C( 4489), UINT16_C(50689), UINT16_C(12362) } }, { { UINT16_C(48279), UINT16_C(38378), UINT16_C( 6965), UINT16_C(20491) }, { UINT16_C(31810), UINT16_C(17057), UINT16_C(30340), UINT16_C(18336) }, { UINT16_C( 1184), UINT16_C(47701), UINT16_C( 2910), UINT16_C(31562) }, { UINT16_C(28119), UINT16_C(44895), UINT16_C(19373), UINT16_C(58443) } }, { { UINT16_C(16621), UINT16_C(45870), UINT16_C(53422), UINT16_C(17755) }, { UINT16_C(18060), UINT16_C(49626), UINT16_C(58721), UINT16_C(42002) }, { UINT16_C(45921), UINT16_C(59110), UINT16_C(34345), UINT16_C(51501) }, { UINT16_C(57337), UINT16_C(47370), UINT16_C(21303), UINT16_C(16005) } }, { { UINT16_C(33418), UINT16_C(59524), UINT16_C(52877), UINT16_C(31331) }, { UINT16_C(37390), UINT16_C(48173), UINT16_C(35170), UINT16_C(60929) }, { UINT16_C(56271), UINT16_C(12464), UINT16_C(49857), UINT16_C( 8916) }, { UINT16_C(38364), UINT16_C(46964), UINT16_C(42351), UINT16_C(46391) } }, { { UINT16_C(47733), UINT16_C(40712), UINT16_C(13632), UINT16_C(52072) }, { UINT16_C(60599), UINT16_C(17587), UINT16_C( 6074), UINT16_C(51391) }, { UINT16_C(60585), UINT16_C( 2948), UINT16_C(34165), UINT16_C(17657) }, { UINT16_C(45892), UINT16_C(48212), UINT16_C(44866), UINT16_C(51503) } }, { { UINT16_C(43361), UINT16_C( 8821), UINT16_C(18795), UINT16_C(57668) }, { UINT16_C(19716), UINT16_C(17536), UINT16_C(59522), UINT16_C(14863) }, { UINT16_C(50133), UINT16_C(36734), UINT16_C(15834), UINT16_C(33624) }, { UINT16_C(51637), UINT16_C(22901), UINT16_C(16927), UINT16_C(33644) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t c = simde_vld1_u16(test_vec[i].c); simde_uint16x4_t r = simde_vmla_u16(a, b, c); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; } static int test_simde_vmla_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint32_t b[2]; uint32_t c[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C(3833350932), UINT32_C(3771528631) }, { UINT32_C(1108988095), UINT32_C(2937766969) }, { UINT32_C( 780880486), UINT32_C(1841350699) }, { UINT32_C(1523690798), UINT32_C(2740482890) } }, { { UINT32_C(1330709669), UINT32_C(2095324776) }, { UINT32_C( 543252585), UINT32_C( 352333142) }, { UINT32_C( 928455165), UINT32_C(1155953373) }, { UINT32_C( 614894954), UINT32_C(1165987238) } }, { { UINT32_C(3883037116), UINT32_C(3730125369) }, { UINT32_C(2653791798), UINT32_C( 756748740) }, { UINT32_C(3360521330), UINT32_C(2799521193) }, { UINT32_C(2726731208), UINT32_C( 935321757) } }, { { UINT32_C(1155347559), UINT32_C(1669907366) }, { UINT32_C(1850407732), UINT32_C(1682743085) }, { UINT32_C( 167934534), UINT32_C(4248313227) }, { UINT32_C( 662482079), UINT32_C(3523430933) } }, { { UINT32_C(1120241049), UINT32_C( 988390099) }, { UINT32_C(2105460439), UINT32_C(3202353034) }, { UINT32_C( 791423746), UINT32_C( 278100426) }, { UINT32_C( 790442055), UINT32_C(2944257975) } }, { { UINT32_C(2115737331), UINT32_C(1299993523) }, { UINT32_C(2878292440), UINT32_C(3152378084) }, { UINT32_C(3375916095), UINT32_C(1837570411) }, { UINT32_C(2874485531), UINT32_C( 326115071) } }, { { UINT32_C( 245150788), UINT32_C( 538914861) }, { UINT32_C(2040412870), UINT32_C(1707481741) }, { UINT32_C(1074878044), UINT32_C( 234616526) }, { UINT32_C(1381914476), UINT32_C(2487946147) } }, { { UINT32_C(3319149402), UINT32_C(2419220044) }, { UINT32_C(1067437842), UINT32_C(3311386367) }, { UINT32_C(2235497976), UINT32_C(1961493784) }, { UINT32_C(3634861770), UINT32_C( 285032756) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t c = simde_vld1_u32(test_vec[i].c); simde_uint32x2_t r = simde_vmla_u32(a, b, c); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; } static int test_simde_vmlaq_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; simde_float32 b[4]; simde_float32 c[4]; simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C(145.140869), SIMDE_FLOAT32_C(86.372559), SIMDE_FLOAT32_C(588.548218), SIMDE_FLOAT32_C(215.169800) }, { SIMDE_FLOAT32_C(440.652344), SIMDE_FLOAT32_C(-714.658264), SIMDE_FLOAT32_C(-845.502808), SIMDE_FLOAT32_C(657.962158) }, { SIMDE_FLOAT32_C(752.153320), SIMDE_FLOAT32_C(738.940552), SIMDE_FLOAT32_C(-56.434509), SIMDE_FLOAT32_C(-510.827606) }, { SIMDE_FLOAT32_C(331583.281250), SIMDE_FLOAT32_C(-528003.625000), SIMDE_FLOAT32_C(48304.082031), SIMDE_FLOAT32_C(-335890.062500) } }, { { SIMDE_FLOAT32_C(204.974365), SIMDE_FLOAT32_C(-576.728638), SIMDE_FLOAT32_C(89.728027), SIMDE_FLOAT32_C(612.498413) }, { SIMDE_FLOAT32_C(-581.968323), SIMDE_FLOAT32_C(434.484985), SIMDE_FLOAT32_C(-203.843140), SIMDE_FLOAT32_C(812.033081) }, { SIMDE_FLOAT32_C(-54.438843), SIMDE_FLOAT32_C(-360.203674), SIMDE_FLOAT32_C(-623.818970), SIMDE_FLOAT32_C(-160.126221) }, { SIMDE_FLOAT32_C(31886.656250), SIMDE_FLOAT32_C(-157079.828125), SIMDE_FLOAT32_C(127250.945312), SIMDE_FLOAT32_C(-129415.289062) } }, { { SIMDE_FLOAT32_C(-430.798706), SIMDE_FLOAT32_C(612.035400), SIMDE_FLOAT32_C(-285.161377), SIMDE_FLOAT32_C(-857.128662) }, { SIMDE_FLOAT32_C(-202.940674), SIMDE_FLOAT32_C(-256.752319), SIMDE_FLOAT32_C(574.135498), SIMDE_FLOAT32_C(942.200195) }, { SIMDE_FLOAT32_C(829.620117), SIMDE_FLOAT32_C(162.683838), SIMDE_FLOAT32_C(157.370117), SIMDE_FLOAT32_C(270.272583) }, { SIMDE_FLOAT32_C(-168794.468750), SIMDE_FLOAT32_C(-41157.417969), SIMDE_FLOAT32_C(90066.609375), SIMDE_FLOAT32_C(253793.750000) } }, { { SIMDE_FLOAT32_C(448.025513), SIMDE_FLOAT32_C(311.867310), SIMDE_FLOAT32_C(-71.765259), SIMDE_FLOAT32_C(200.178955) }, { SIMDE_FLOAT32_C(50.807861), SIMDE_FLOAT32_C(871.800171), SIMDE_FLOAT32_C(689.351196), SIMDE_FLOAT32_C(-744.217712) }, { SIMDE_FLOAT32_C(-704.928345), SIMDE_FLOAT32_C(-220.920593), SIMDE_FLOAT32_C(868.280640), SIMDE_FLOAT32_C(-286.896667) }, { SIMDE_FLOAT32_C(-35367.875000), SIMDE_FLOAT32_C(-192286.734375), SIMDE_FLOAT32_C(598478.562500), SIMDE_FLOAT32_C(213713.750000) } }, { { SIMDE_FLOAT32_C(-786.435486), SIMDE_FLOAT32_C(-335.562378), SIMDE_FLOAT32_C(-474.863464), SIMDE_FLOAT32_C(159.125610) }, { SIMDE_FLOAT32_C(304.234009), SIMDE_FLOAT32_C(-98.682495), SIMDE_FLOAT32_C(998.999390), SIMDE_FLOAT32_C(873.435303) }, { SIMDE_FLOAT32_C(-486.647034), SIMDE_FLOAT32_C(-286.161804), SIMDE_FLOAT32_C(-983.693298), SIMDE_FLOAT32_C(310.412231) }, { SIMDE_FLOAT32_C(-148841.015625), SIMDE_FLOAT32_C(27903.597656), SIMDE_FLOAT32_C(-983183.875000), SIMDE_FLOAT32_C(271284.125000) } }, { { SIMDE_FLOAT32_C(457.085815), SIMDE_FLOAT32_C(590.442139), SIMDE_FLOAT32_C(252.612549), SIMDE_FLOAT32_C(286.706055) }, { SIMDE_FLOAT32_C(-246.873962), SIMDE_FLOAT32_C(-590.017212), SIMDE_FLOAT32_C(-443.021301), SIMDE_FLOAT32_C(-798.848389) }, { SIMDE_FLOAT32_C(721.850098), SIMDE_FLOAT32_C(485.213501), SIMDE_FLOAT32_C(401.330566), SIMDE_FLOAT32_C(-227.341858) }, { SIMDE_FLOAT32_C(-177748.921875), SIMDE_FLOAT32_C(-285693.875000), SIMDE_FLOAT32_C(-177545.375000), SIMDE_FLOAT32_C(181898.375000) } }, { { SIMDE_FLOAT32_C(357.013794), SIMDE_FLOAT32_C(90.682007), SIMDE_FLOAT32_C(28.440430), SIMDE_FLOAT32_C(652.085327) }, { SIMDE_FLOAT32_C(869.761353), SIMDE_FLOAT32_C(-103.278809), SIMDE_FLOAT32_C(-634.811157), SIMDE_FLOAT32_C(-916.674072) }, { SIMDE_FLOAT32_C(561.158813), SIMDE_FLOAT32_C(-109.674683), SIMDE_FLOAT32_C(242.451660), SIMDE_FLOAT32_C(-134.607117) }, { SIMDE_FLOAT32_C(488431.250000), SIMDE_FLOAT32_C(11417.751953), SIMDE_FLOAT32_C(-153882.578125), SIMDE_FLOAT32_C(124042.937500) } }, { { SIMDE_FLOAT32_C(791.642822), SIMDE_FLOAT32_C(241.451050), SIMDE_FLOAT32_C(-261.171753), SIMDE_FLOAT32_C(-695.004150) }, { SIMDE_FLOAT32_C(955.289307), SIMDE_FLOAT32_C(-244.864990), SIMDE_FLOAT32_C(615.408081), SIMDE_FLOAT32_C(412.375122) }, { SIMDE_FLOAT32_C(-654.422729), SIMDE_FLOAT32_C(-131.979187), SIMDE_FLOAT32_C(-300.918701), SIMDE_FLOAT32_C(98.703369) }, { SIMDE_FLOAT32_C(-624371.437500), SIMDE_FLOAT32_C(32558.533203), SIMDE_FLOAT32_C(-185448.968750), SIMDE_FLOAT32_C(40007.808594) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t c = simde_vld1q_f32(test_vec[i].c); simde_float32x4_t r = simde_vmlaq_f32(a, b, c); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vmlaq_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[2]; simde_float64 b[2]; simde_float64 c[2]; simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C(137.836326), SIMDE_FLOAT64_C(180.498224) }, { SIMDE_FLOAT64_C(70.815405), SIMDE_FLOAT64_C(-388.999976) }, { SIMDE_FLOAT64_C(-611.056813), SIMDE_FLOAT64_C(-662.416708) }, { SIMDE_FLOAT64_C(-43134.399584), SIMDE_FLOAT64_C(257860.581526) } }, { { SIMDE_FLOAT64_C(-410.709663), SIMDE_FLOAT64_C(-321.801186) }, { SIMDE_FLOAT64_C(20.311225), SIMDE_FLOAT64_C(-337.649517) }, { SIMDE_FLOAT64_C(-755.163089), SIMDE_FLOAT64_C(32.799101) }, { SIMDE_FLOAT64_C(-15748.996891), SIMDE_FLOAT64_C(-11396.401804) } }, { { SIMDE_FLOAT64_C(-790.783079), SIMDE_FLOAT64_C(351.759081) }, { SIMDE_FLOAT64_C(914.436994), SIMDE_FLOAT64_C(117.928983) }, { SIMDE_FLOAT64_C(-166.522701), SIMDE_FLOAT64_C(878.769813) }, { SIMDE_FLOAT64_C(-153065.301121), SIMDE_FLOAT64_C(103984.189630) } }, { { SIMDE_FLOAT64_C(-131.289381), SIMDE_FLOAT64_C(-355.855969) }, { SIMDE_FLOAT64_C(31.078331), SIMDE_FLOAT64_C(58.849271) }, { SIMDE_FLOAT64_C(-20.792446), SIMDE_FLOAT64_C(-30.490121) }, { SIMDE_FLOAT64_C(-777.483883), SIMDE_FLOAT64_C(-2150.177382) } }, { { SIMDE_FLOAT64_C(869.063754), SIMDE_FLOAT64_C(516.089691) }, { SIMDE_FLOAT64_C(-998.833277), SIMDE_FLOAT64_C(424.541474) }, { SIMDE_FLOAT64_C(263.351169), SIMDE_FLOAT64_C(168.341670) }, { SIMDE_FLOAT64_C(-262174.847521), SIMDE_FLOAT64_C(71984.110331) } }, { { SIMDE_FLOAT64_C(546.778995), SIMDE_FLOAT64_C(-598.812505) }, { SIMDE_FLOAT64_C(-651.160107), SIMDE_FLOAT64_C(-382.405600) }, { SIMDE_FLOAT64_C(12.187520), SIMDE_FLOAT64_C(-262.216919) }, { SIMDE_FLOAT64_C(-7389.247558), SIMDE_FLOAT64_C(99674.405701) } }, { { SIMDE_FLOAT64_C(-44.822308), SIMDE_FLOAT64_C(601.477857) }, { SIMDE_FLOAT64_C(415.981895), SIMDE_FLOAT64_C(975.488918) }, { SIMDE_FLOAT64_C(-736.171662), SIMDE_FLOAT64_C(660.818807) }, { SIMDE_FLOAT64_C(-306278.905223), SIMDE_FLOAT64_C(645222.900518) } }, { { SIMDE_FLOAT64_C(8.288018), SIMDE_FLOAT64_C(-526.954741) }, { SIMDE_FLOAT64_C(12.577887), SIMDE_FLOAT64_C(-77.274989) }, { SIMDE_FLOAT64_C(590.974242), SIMDE_FLOAT64_C(846.055186) }, { SIMDE_FLOAT64_C(7441.495354), SIMDE_FLOAT64_C(-65905.859876) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_float64x2_t c = simde_vld1q_f64(test_vec[i].c); simde_float64x2_t r = simde_vmlaq_f64(a, b, c); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; } static int test_simde_vmlaq_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int8_t b[16]; int8_t c[16]; int8_t r[16]; } test_vec[] = { { { -INT8_C( 119), INT8_C( 54), -INT8_C( 42), INT8_C( 7), -INT8_C( 21), -INT8_C( 83), -INT8_C( 69), INT8_C( 123), -INT8_C( 21), -INT8_C( 32), INT8_C( 5), -INT8_C( 52), -INT8_C( 83), -INT8_C( 31), INT8_C( 59), INT8_C( 78) }, { INT8_C( 53), INT8_C( 26), -INT8_C( 101), INT8_C( 21), INT8_C( 9), INT8_C( 41), -INT8_C( 27), -INT8_C( 91), -INT8_C( 79), INT8_C( 55), INT8_C( 62), INT8_C( 89), INT8_C( 47), -INT8_C( 123), INT8_C( 15), -INT8_C( 71) }, { -INT8_C( 69), -INT8_C( 27), -INT8_C( 64), -INT8_C( 89), -INT8_C( 110), INT8_C( 123), INT8_C( 34), INT8_C( 126), INT8_C( 91), INT8_C( 40), INT8_C( 74), INT8_C( 9), INT8_C( 9), -INT8_C( 123), INT8_C( 87), INT8_C( 62) }, { INT8_C( 64), INT8_C( 120), INT8_C( 22), -INT8_C( 70), INT8_C( 13), INT8_C( 96), INT8_C( 37), -INT8_C( 79), -INT8_C( 42), INT8_C( 120), -INT8_C( 15), -INT8_C( 19), INT8_C( 84), -INT8_C( 6), INT8_C( 84), INT8_C( 28) } }, { { -INT8_C( 96), -INT8_C( 13), INT8_C( 83), -INT8_C( 87), INT8_C( 28), INT8_C( 56), INT8_C( 78), -INT8_C( 51), INT8_C( 112), -INT8_C( 116), INT8_C( 39), -INT8_C( 97), INT8_C( 17), INT8_C( 54), INT8_C( 88), -INT8_C( 51) }, { INT8_C( 28), INT8_C( 24), INT8_C( 116), -INT8_C( 82), -INT8_C( 109), -INT8_C( 106), INT8_C( 44), -INT8_C( 17), -INT8_C( 66), INT8_C( 118), -INT8_C( 8), -INT8_C( 57), -INT8_C( 4), INT8_C( 79), INT8_C( 5), -INT8_C( 100) }, { INT8_C( 66), INT8_C( 88), INT8_C( 69), INT8_C( 94), -INT8_C( 111), -INT8_C( 109), INT8_C( 44), INT8_C( 1), INT8_C( 31), INT8_C( 83), -INT8_C( 96), INT8_C( 48), -INT8_C( 119), -INT8_C( 7), -INT8_C( 3), -INT8_C( 91) }, { -INT8_C( 40), INT8_C( 51), -INT8_C( 105), -INT8_C( 115), INT8_C( 95), INT8_C( 90), -INT8_C( 34), -INT8_C( 68), INT8_C( 114), -INT8_C( 50), INT8_C( 39), -INT8_C( 17), -INT8_C( 19), INT8_C( 13), INT8_C( 73), INT8_C( 89) } }, { { INT8_C( 17), INT8_C( 113), INT8_C( 84), -INT8_C( 91), INT8_C( 8), INT8_MIN, -INT8_C( 108), -INT8_C( 58), -INT8_C( 9), -INT8_C( 116), -INT8_C( 114), -INT8_C( 13), -INT8_C( 37), -INT8_C( 109), -INT8_C( 113), INT8_C( 30) }, { -INT8_C( 20), -INT8_C( 44), INT8_C( 124), INT8_C( 125), INT8_C( 103), -INT8_C( 88), INT8_C( 126), -INT8_C( 122), -INT8_C( 5), INT8_C( 30), -INT8_C( 74), -INT8_C( 123), INT8_C( 23), -INT8_C( 76), INT8_C( 42), INT8_C( 41) }, { INT8_C( 37), INT8_C( 126), -INT8_C( 50), INT8_C( 45), -INT8_C( 1), INT8_C( 98), -INT8_C( 12), -INT8_C( 10), -INT8_C( 18), -INT8_C( 126), -INT8_C( 23), -INT8_C( 55), INT8_C( 21), INT8_C( 120), -INT8_C( 25), INT8_C( 1) }, { INT8_C( 45), -INT8_C( 55), INT8_C( 28), -INT8_C( 98), -INT8_C( 95), -INT8_C( 48), -INT8_C( 84), -INT8_C( 118), INT8_C( 81), -INT8_C( 56), INT8_C( 52), INT8_C( 96), -INT8_C( 66), -INT8_C( 13), INT8_C( 117), INT8_C( 71) } }, { { INT8_C( 76), INT8_C( 100), INT8_C( 126), -INT8_C( 77), INT8_C( 12), -INT8_C( 4), INT8_C( 57), INT8_C( 8), INT8_C( 27), -INT8_C( 17), -INT8_C( 115), INT8_C( 50), -INT8_C( 93), -INT8_C( 73), INT8_C( 91), -INT8_C( 55) }, { INT8_C( 54), INT8_C( 41), -INT8_C( 10), INT8_C( 53), -INT8_C( 117), -INT8_C( 22), INT8_C( 43), INT8_C( 121), INT8_C( 108), INT8_C( 20), INT8_C( 67), -INT8_C( 126), -INT8_C( 116), INT8_C( 42), -INT8_C( 125), -INT8_C( 40) }, { -INT8_C( 114), INT8_C( 2), -INT8_C( 117), -INT8_C( 101), -INT8_C( 2), -INT8_C( 60), -INT8_C( 93), INT8_C( 25), -INT8_C( 77), INT8_C( 48), INT8_C( 76), INT8_C( 87), -INT8_C( 25), -INT8_C( 89), INT8_C( 32), INT8_C( 29) }, { INT8_C( 64), -INT8_C( 74), INT8_C( 16), -INT8_C( 54), -INT8_C( 10), INT8_C( 36), -INT8_C( 102), -INT8_C( 39), -INT8_C( 97), -INT8_C( 81), INT8_C( 113), INT8_C( 96), -INT8_C( 9), INT8_C( 29), -INT8_C( 69), INT8_C( 65) } }, { { -INT8_C( 47), INT8_C( 22), INT8_C( 82), INT8_C( 92), INT8_C( 1), INT8_C( 125), -INT8_C( 42), INT8_C( 109), -INT8_C( 111), INT8_C( 25), -INT8_C( 17), INT8_C( 29), INT8_C( 67), INT8_C( 115), -INT8_C( 11), -INT8_C( 46) }, { INT8_C( 117), INT8_MIN, INT8_C( 109), INT8_C( 115), INT8_C( 68), INT8_C( 16), -INT8_C( 115), -INT8_C( 8), INT8_C( 64), -INT8_C( 39), INT8_C( 79), INT8_C( 39), INT8_MIN, INT8_C( 111), INT8_C( 69), INT8_C( 81) }, { -INT8_C( 123), -INT8_C( 105), -INT8_C( 82), -INT8_C( 122), INT8_C( 21), -INT8_C( 124), -INT8_C( 12), -INT8_C( 90), -INT8_C( 99), -INT8_C( 29), -INT8_C( 60), -INT8_C( 32), INT8_C( 86), -INT8_C( 71), -INT8_C( 78), -INT8_C( 53) }, { -INT8_C( 102), -INT8_C( 106), INT8_C( 104), -INT8_C( 114), -INT8_C( 107), -INT8_C( 67), INT8_C( 58), INT8_C( 61), -INT8_C( 47), -INT8_C( 124), INT8_C( 107), INT8_C( 61), INT8_C( 67), -INT8_C( 86), -INT8_C( 17), INT8_C( 13) } }, { { INT8_C( 58), INT8_C( 31), INT8_C( 63), INT8_C( 126), INT8_C( 47), -INT8_C( 52), INT8_C( 118), INT8_C( 111), -INT8_C( 91), -INT8_C( 59), -INT8_C( 105), INT8_C( 37), INT8_C( 52), -INT8_C( 36), INT8_C( 119), -INT8_C( 70) }, { INT8_C( 115), INT8_C( 37), INT8_C( 64), -INT8_C( 120), -INT8_C( 87), INT8_C( 52), INT8_C( 47), INT8_C( 70), INT8_C( 24), -INT8_C( 13), INT8_C( 38), INT8_C( 110), -INT8_C( 84), -INT8_C( 39), INT8_C( 58), -INT8_C( 26) }, { -INT8_C( 8), INT8_C( 121), INT8_C( 101), INT8_C( 40), INT8_C( 69), -INT8_C( 37), -INT8_C( 105), -INT8_C( 22), -INT8_C( 95), INT8_C( 46), INT8_C( 15), -INT8_C( 43), INT8_C( 10), -INT8_C( 122), -INT8_C( 113), INT8_C( 126) }, { -INT8_C( 94), -INT8_C( 100), INT8_MAX, -INT8_C( 66), -INT8_C( 68), INT8_C( 72), INT8_C( 47), INT8_C( 107), -INT8_C( 67), INT8_C( 111), -INT8_C( 47), -INT8_C( 85), -INT8_C( 20), INT8_C( 114), -INT8_C( 35), -INT8_C( 18) } }, { { -INT8_C( 85), -INT8_C( 48), INT8_C( 6), INT8_C( 84), INT8_C( 4), INT8_C( 53), -INT8_C( 102), INT8_C( 28), INT8_C( 40), -INT8_C( 63), -INT8_C( 117), -INT8_C( 43), -INT8_C( 102), -INT8_C( 59), -INT8_C( 69), -INT8_C( 110) }, { INT8_C( 62), INT8_C( 32), -INT8_C( 70), -INT8_C( 125), -INT8_C( 4), INT8_C( 82), INT8_C( 109), -INT8_C( 99), INT8_MIN, INT8_C( 124), INT8_C( 114), -INT8_C( 117), INT8_C( 3), INT8_C( 2), INT8_C( 9), -INT8_C( 82) }, { -INT8_C( 46), INT8_C( 15), INT8_C( 3), -INT8_C( 42), INT8_C( 69), -INT8_C( 99), -INT8_C( 13), INT8_C( 109), INT8_C( 94), INT8_C( 126), INT8_C( 66), -INT8_C( 8), INT8_C( 67), -INT8_C( 2), -INT8_C( 117), -INT8_C( 127) }, { -INT8_C( 121), -INT8_C( 80), INT8_C( 52), -INT8_C( 42), -INT8_C( 16), INT8_MAX, INT8_C( 17), -INT8_C( 11), INT8_C( 40), -INT8_C( 55), -INT8_C( 17), INT8_C( 125), INT8_C( 99), -INT8_C( 63), -INT8_C( 98), INT8_C( 64) } }, { { INT8_C( 30), INT8_C( 69), INT8_C( 4), INT8_C( 26), -INT8_C( 105), INT8_C( 113), -INT8_C( 73), INT8_C( 24), -INT8_C( 19), INT8_C( 42), -INT8_C( 93), -INT8_C( 16), INT8_C( 44), -INT8_C( 84), -INT8_C( 97), -INT8_C( 2) }, { -INT8_C( 69), -INT8_C( 94), -INT8_C( 44), INT8_C( 0), INT8_C( 63), -INT8_C( 57), INT8_C( 110), -INT8_C( 98), INT8_C( 69), -INT8_C( 80), -INT8_C( 106), -INT8_C( 120), -INT8_C( 82), INT8_C( 33), INT8_C( 9), -INT8_C( 51) }, { INT8_C( 103), INT8_C( 13), -INT8_C( 25), -INT8_C( 2), INT8_C( 126), -INT8_C( 97), INT8_C( 22), INT8_C( 108), -INT8_C( 55), -INT8_C( 71), INT8_C( 92), -INT8_C( 11), INT8_C( 101), -INT8_C( 5), -INT8_C( 13), INT8_C( 33) }, { INT8_C( 91), INT8_MAX, INT8_C( 80), INT8_C( 26), -INT8_C( 103), INT8_C( 10), INT8_C( 43), -INT8_C( 64), INT8_C( 26), INT8_C( 90), -INT8_C( 117), INT8_C( 24), -INT8_C( 46), INT8_C( 7), INT8_C( 42), INT8_C( 107) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t c = simde_vld1q_s8(test_vec[i].c); simde_int8x16_t r = simde_vmlaq_s8(a, b, c); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; } static int test_simde_vmlaq_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int16_t b[8]; int16_t c[8]; int16_t r[8]; } test_vec[] = { { { INT16_C( 11663), -INT16_C( 12561), -INT16_C( 6544), INT16_C( 27850), INT16_C( 31437), -INT16_C( 27894), INT16_C( 9795), INT16_C( 8816) }, { -INT16_C( 10231), INT16_C( 3711), INT16_C( 27259), -INT16_C( 27438), INT16_C( 30269), INT16_C( 21709), INT16_C( 31469), INT16_C( 31823) }, { INT16_C( 16039), INT16_C( 5962), INT16_C( 5156), -INT16_C( 3709), -INT16_C( 29298), -INT16_C( 11899), -INT16_C( 2637), -INT16_C( 17164) }, { INT16_C( 18798), INT16_C( 26789), INT16_C( 31676), INT16_C( 17984), -INT16_C( 22109), -INT16_C( 373), -INT16_C( 5382), -INT16_C( 24132) } }, { { INT16_C( 29645), INT16_C( 18635), -INT16_C( 25123), INT16_C( 6876), -INT16_C( 22252), INT16_C( 366), -INT16_C( 17117), -INT16_C( 13699) }, { -INT16_C( 14341), INT16_C( 8161), INT16_C( 25819), INT16_C( 27152), -INT16_C( 27150), -INT16_C( 23237), INT16_C( 12170), INT16_C( 22626) }, { INT16_C( 11682), INT16_C( 32672), INT16_C( 32202), -INT16_C( 8551), INT16_C( 1830), INT16_C( 19167), INT16_C( 23748), -INT16_C( 16620) }, { INT16_C( 8099), -INT16_C( 11157), INT16_C( 8619), INT16_C( 24172), -INT16_C( 30464), -INT16_C( 557), -INT16_C( 17717), -INT16_C( 12251) } }, { { -INT16_C( 2525), -INT16_C( 34), -INT16_C( 4262), INT16_C( 19561), -INT16_C( 23420), INT16_C( 4082), INT16_C( 21716), INT16_C( 30311) }, { INT16_C( 1921), INT16_C( 19446), -INT16_C( 28796), -INT16_C( 21718), INT16_C( 2455), INT16_C( 23541), INT16_C( 2406), -INT16_C( 30437) }, { -INT16_C( 1537), INT16_C( 23176), -INT16_C( 3608), INT16_C( 28070), -INT16_C( 26474), INT16_C( 27260), -INT16_C( 7188), INT16_C( 28128) }, { -INT16_C( 5982), -INT16_C( 10610), INT16_C( 17146), INT16_C( 11173), -INT16_C( 5378), INT16_C( 3230), INT16_C( 28892), -INT16_C( 4857) } }, { { -INT16_C( 10518), INT16_C( 28601), -INT16_C( 7322), -INT16_C( 742), INT16_C( 4076), INT16_C( 21080), INT16_C( 29464), INT16_C( 6364) }, { INT16_C( 25709), INT16_C( 21874), INT16_C( 6230), -INT16_C( 4926), INT16_C( 16049), -INT16_C( 25258), INT16_C( 13857), INT16_C( 3083) }, { -INT16_C( 15347), INT16_C( 29563), -INT16_C( 27225), -INT16_C( 27792), -INT16_C( 14172), -INT16_C( 17178), -INT16_C( 15812), -INT16_C( 22060) }, { INT16_C( 25715), -INT16_C( 19585), -INT16_C( 11904), -INT16_C( 2054), -INT16_C( 32432), -INT16_C( 10852), INT16_C( 9428), INT16_C( 21752) } }, { { INT16_C( 17958), INT16_C( 31998), -INT16_C( 16033), INT16_C( 4200), -INT16_C( 16641), INT16_C( 8621), -INT16_C( 18187), INT16_C( 557) }, { -INT16_C( 22404), INT16_C( 9077), -INT16_C( 6851), -INT16_C( 7753), -INT16_C( 25171), -INT16_C( 5731), INT16_C( 29279), -INT16_C( 31342) }, { -INT16_C( 28232), INT16_C( 5890), INT16_C( 27218), INT16_C( 20775), -INT16_C( 10967), INT16_C( 7794), -INT16_C( 24691), INT16_C( 2592) }, { -INT16_C( 25786), INT16_C( 18152), INT16_C( 28905), INT16_C( 23113), -INT16_C( 3916), -INT16_C( 28777), -INT16_C( 18360), INT16_C( 26733) } }, { { -INT16_C( 27321), -INT16_C( 31699), -INT16_C( 7046), INT16_C( 10085), INT16_C( 897), -INT16_C( 8175), -INT16_C( 23691), INT16_C( 11622) }, { INT16_C( 26676), -INT16_C( 31163), INT16_C( 27858), -INT16_C( 1064), INT16_C( 19009), -INT16_C( 12519), INT16_C( 14826), INT16_C( 12761) }, { INT16_C( 1742), INT16_C( 18614), INT16_C( 7147), INT16_C( 27760), -INT16_C( 32482), -INT16_C( 27827), -INT16_C( 19676), INT16_C( 22977) }, { -INT16_C( 22753), INT16_C( 24891), -INT16_C( 4288), INT16_C( 30181), INT16_C( 30751), -INT16_C( 31338), INT16_C( 26205), INT16_C( 13055) } }, { { INT16_C( 1563), -INT16_C( 4641), -INT16_C( 18574), -INT16_C( 19223), INT16_C( 514), -INT16_C( 4989), INT16_C( 23612), INT16_C( 2589) }, { -INT16_C( 11422), INT16_C( 19795), -INT16_C( 15377), INT16_C( 3514), INT16_C( 1860), INT16_C( 26785), INT16_C( 25274), -INT16_C( 10815) }, { -INT16_C( 24216), -INT16_C( 9534), -INT16_C( 21672), INT16_C( 23182), INT16_C( 4526), -INT16_C( 5562), INT16_C( 25709), -INT16_C( 12044) }, { -INT16_C( 30741), INT16_C( 13509), -INT16_C( 18790), -INT16_C( 18923), INT16_C( 30266), -INT16_C( 19831), INT16_C( 3438), -INT16_C( 27119) } }, { { INT16_C( 18231), INT16_C( 9757), -INT16_C( 10486), INT16_C( 20020), -INT16_C( 10786), -INT16_C( 26441), INT16_C( 30775), -INT16_C( 24723) }, { INT16_C( 12313), INT16_C( 29305), INT16_C( 2267), -INT16_C( 30260), INT16_C( 4889), -INT16_C( 30861), INT16_C( 26743), -INT16_C( 20905) }, { INT16_C( 29871), -INT16_C( 17707), INT16_C( 2380), INT16_C( 10760), -INT16_C( 16418), INT16_C( 5571), INT16_C( 12344), INT16_C( 20916) }, { INT16_C( 31822), INT16_C( 20170), INT16_C( 11022), INT16_C( 5268), INT16_C( 3212), INT16_C( 13392), -INT16_C( 24001), -INT16_C( 17511) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t c = simde_vld1q_s16(test_vec[i].c); simde_int16x8_t r = simde_vmlaq_s16(a, b, c); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; } static int test_simde_vmlaq_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t b[4]; int32_t c[4]; int32_t r[4]; } test_vec[] = { { { INT32_C( 710046150), -INT32_C( 241640202), -INT32_C( 1703216902), -INT32_C( 1791793692) }, { INT32_C( 934905717), INT32_C( 54536198), -INT32_C( 808807169), INT32_C( 1335743881) }, { INT32_C( 1031401287), -INT32_C( 969993524), -INT32_C( 278812406), -INT32_C( 2088463346) }, { -INT32_C( 1148568007), INT32_C( 144911806), -INT32_C( 94363152), INT32_C( 1693206370) } }, { { INT32_C( 582696476), INT32_C( 1713830758), INT32_C( 406188175), INT32_C( 208130757) }, { -INT32_C( 1907695166), -INT32_C( 44795405), INT32_C( 837596450), INT32_C( 1723101514) }, { INT32_C( 361263023), -INT32_C( 109334934), INT32_C( 1678881182), INT32_C( 1164999043) }, { -INT32_C( 1193365574), -INT32_C( 1463442428), -INT32_C( 523485813), INT32_C( 1722402211) } }, { { INT32_C( 1305721434), INT32_C( 1447700531), INT32_C( 663173085), INT32_C( 1468873640) }, { INT32_C( 359405227), INT32_C( 1661987012), INT32_C( 482812057), -INT32_C( 211667047) }, { INT32_C( 624965106), INT32_C( 997952349), INT32_C( 1784808130), -INT32_C( 373166018) }, { INT32_C( 1349684992), -INT32_C( 123722905), INT32_C( 1497439695), -INT32_C( 1999691082) } }, { { -INT32_C( 889311994), -INT32_C( 1372779243), -INT32_C( 942934994), INT32_C( 515583276) }, { -INT32_C( 1069286558), INT32_C( 1224458118), -INT32_C( 5087807), INT32_C( 1407742797) }, { -INT32_C( 1239488607), INT32_C( 577063924), INT32_C( 1827287104), -INT32_C( 1081432739) }, { INT32_C( 789623720), INT32_C( 1528421069), INT32_C( 1147245678), INT32_C( 117420325) } }, { { INT32_C( 662687136), INT32_C( 1315928716), INT32_C( 625812184), INT32_C( 913913493) }, { INT32_C( 300783389), INT32_C( 573854434), -INT32_C( 544334206), INT32_C( 1671305411) }, { INT32_C( 1921654502), INT32_C( 1891695256), -INT32_C( 1315566052), INT32_C( 1642598212) }, { INT32_C( 2051864494), -INT32_C( 1074699076), INT32_C( 548377872), INT32_C( 1134770017) } }, { { -INT32_C( 2005740122), -INT32_C( 1448368601), -INT32_C( 2021115452), INT32_C( 954934866) }, { -INT32_C( 576031420), -INT32_C( 1957860497), -INT32_C( 1120017543), -INT32_C( 1742854670) }, { INT32_C( 555847930), -INT32_C( 70595529), INT32_C( 1468224005), -INT32_C( 1114673544) }, { -INT32_C( 995911154), -INT32_C( 478138624), -INT32_C( 1836749535), INT32_C( 1468591042) } }, { { INT32_C( 1402616547), INT32_C( 517924773), -INT32_C( 1109713973), INT32_C( 978778432) }, { -INT32_C( 1067747447), INT32_C( 1220289859), -INT32_C( 257933449), -INT32_C( 1867698259) }, { INT32_C( 249775977), -INT32_C( 114507218), INT32_C( 515311581), -INT32_C( 1973940992) }, { INT32_C( 331366676), INT32_C( 1466851247), INT32_C( 704587398), -INT32_C( 1422212544) } }, { { -INT32_C( 951405436), INT32_C( 1359939289), -INT32_C( 230576315), INT32_C( 1216605919) }, { INT32_C( 1666672181), INT32_C( 106791720), -INT32_C( 1960569717), -INT32_C( 1525318623) }, { INT32_C( 174874672), -INT32_C( 1403290778), INT32_C( 178166827), -INT32_C( 1085136502) }, { INT32_C( 1583201908), INT32_C( 1200455881), INT32_C( 1772082846), INT32_C( 1823152553) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t c = simde_vld1q_s32(test_vec[i].c); simde_int32x4_t r = simde_vmlaq_s32(a, b, c); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; } static int test_simde_vmlaq_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t c[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C( 2), UINT8_C( 10), UINT8_C(124), UINT8_C(234), UINT8_C( 83), UINT8_C(137), UINT8_C( 86), UINT8_C( 16), UINT8_C( 13), UINT8_C( 45), UINT8_C(130), UINT8_C(164), UINT8_C(239), UINT8_C( 2), UINT8_C(135), UINT8_C(180) }, { UINT8_C(153), UINT8_C(251), UINT8_C(146), UINT8_C(239), UINT8_C( 13), UINT8_C(152), UINT8_C(238), UINT8_C( 33), UINT8_C(186), UINT8_C(155), UINT8_C(198), UINT8_C(106), UINT8_C(253), UINT8_C(193), UINT8_MAX, UINT8_C( 0) }, { UINT8_C(204), UINT8_C(123), UINT8_C(234), UINT8_C( 31), UINT8_C( 4), UINT8_C( 64), UINT8_C( 47), UINT8_C( 17), UINT8_C(109), UINT8_C(177), UINT8_C(181), UINT8_C( 92), UINT8_C(179), UINT8_C( 60), UINT8_C( 16), UINT8_C( 77) }, { UINT8_C(238), UINT8_C(163), UINT8_C(240), UINT8_C(219), UINT8_C(135), UINT8_C(137), UINT8_C( 8), UINT8_C( 65), UINT8_C( 63), UINT8_C( 88), UINT8_C(128), UINT8_C(188), UINT8_C(214), UINT8_C( 62), UINT8_C(119), UINT8_C(180) } }, { { UINT8_C( 55), UINT8_C(162), UINT8_C( 60), UINT8_C( 68), UINT8_C( 59), UINT8_C( 42), UINT8_C(102), UINT8_C(245), UINT8_C(197), UINT8_C( 44), UINT8_C( 96), UINT8_C(195), UINT8_C(238), UINT8_C( 95), UINT8_C(195), UINT8_C(186) }, { UINT8_C(218), UINT8_C(173), UINT8_C(217), UINT8_C(223), UINT8_C(237), UINT8_C( 8), UINT8_C(240), UINT8_C( 90), UINT8_C(185), UINT8_C(166), UINT8_C(183), UINT8_C(108), UINT8_C(226), UINT8_C(199), UINT8_C(185), UINT8_C( 26) }, { UINT8_C(106), UINT8_C(245), UINT8_C( 94), UINT8_C(165), UINT8_C( 31), UINT8_C(196), UINT8_C(154), UINT8_C(229), UINT8_C(241), UINT8_C(250), UINT8_C(168), UINT8_C(223), UINT8_C( 89), UINT8_C(107), UINT8_C(153), UINT8_C( 52) }, { UINT8_C(123), UINT8_C( 51), UINT8_C(234), UINT8_MAX, UINT8_C(238), UINT8_C( 74), UINT8_C(198), UINT8_C(119), UINT8_C(238), UINT8_C( 72), UINT8_C(120), UINT8_C(215), UINT8_C(128), UINT8_C(140), UINT8_C( 84), UINT8_C( 2) } }, { { UINT8_C( 24), UINT8_C(114), UINT8_C( 19), UINT8_C( 5), UINT8_C(122), UINT8_C( 3), UINT8_C( 95), UINT8_C( 51), UINT8_C(169), UINT8_C( 22), UINT8_C(159), UINT8_C(140), UINT8_C(222), UINT8_C( 89), UINT8_C(166), UINT8_C( 72) }, { UINT8_C( 78), UINT8_C( 4), UINT8_C(237), UINT8_C(110), UINT8_C(201), UINT8_C(135), UINT8_C( 83), UINT8_C(186), UINT8_C(130), UINT8_C(251), UINT8_C(153), UINT8_C(219), UINT8_C(102), UINT8_C( 50), UINT8_C( 15), UINT8_C(126) }, { UINT8_C(164), UINT8_C( 34), UINT8_C(131), UINT8_C( 30), UINT8_C( 38), UINT8_C(226), UINT8_C( 81), UINT8_C(207), UINT8_C(249), UINT8_C(240), UINT8_C( 91), UINT8_C(215), UINT8_C( 73), UINT8_C( 1), UINT8_C( 31), UINT8_C(152) }, { UINT8_C( 16), UINT8_C(250), UINT8_C( 90), UINT8_C(233), UINT8_C( 80), UINT8_C( 49), UINT8_C(162), UINT8_C(153), UINT8_C( 27), UINT8_C(102), UINT8_C( 2), UINT8_C(121), UINT8_C(244), UINT8_C(139), UINT8_C(119), UINT8_C( 24) } }, { { UINT8_C( 6), UINT8_C( 12), UINT8_C( 6), UINT8_C(207), UINT8_C(147), UINT8_C( 89), UINT8_C(137), UINT8_C( 21), UINT8_C( 84), UINT8_C( 34), UINT8_C(241), UINT8_C(186), UINT8_C( 84), UINT8_C( 0), UINT8_C( 56), UINT8_C(248) }, { UINT8_C( 35), UINT8_C(187), UINT8_C( 22), UINT8_C( 73), UINT8_C(157), UINT8_C(103), UINT8_C( 24), UINT8_C(150), UINT8_C( 87), UINT8_C(116), UINT8_C(109), UINT8_C(161), UINT8_C(117), UINT8_C(140), UINT8_C( 57), UINT8_C(123) }, { UINT8_C(152), UINT8_C( 63), UINT8_C( 74), UINT8_C( 44), UINT8_C(152), UINT8_C(211), UINT8_C( 65), UINT8_C(236), UINT8_C(245), UINT8_C( 50), UINT8_C(166), UINT8_C( 73), UINT8_C( 51), UINT8_C(222), UINT8_C( 65), UINT8_C( 86) }, { UINT8_C(206), UINT8_C( 17), UINT8_C( 98), UINT8_C( 91), UINT8_C(203), UINT8_C( 62), UINT8_C(161), UINT8_C( 93), UINT8_C(151), UINT8_C(202), UINT8_C(159), UINT8_C(163), UINT8_C(163), UINT8_C(104), UINT8_C(177), UINT8_C( 74) } }, { { UINT8_C(153), UINT8_C( 87), UINT8_C(159), UINT8_C( 54), UINT8_C(190), UINT8_C(183), UINT8_C(205), UINT8_C( 22), UINT8_C( 43), UINT8_C( 58), UINT8_C(183), UINT8_C(161), UINT8_C(199), UINT8_C(240), UINT8_C( 28), UINT8_C( 95) }, { UINT8_C( 47), UINT8_C(103), UINT8_C(139), UINT8_C(199), UINT8_C( 58), UINT8_C(205), UINT8_C(179), UINT8_C( 48), UINT8_MAX, UINT8_C( 89), UINT8_C(121), UINT8_C( 50), UINT8_C( 55), UINT8_C(187), UINT8_C(136), UINT8_C(208) }, { UINT8_C( 18), UINT8_C( 39), UINT8_C( 6), UINT8_C(209), UINT8_C(223), UINT8_C(211), UINT8_C(231), UINT8_C( 10), UINT8_C( 14), UINT8_C(158), UINT8_C(171), UINT8_C(213), UINT8_C(142), UINT8_C(200), UINT8_C( 52), UINT8_C(189) }, { UINT8_C(231), UINT8_C( 8), UINT8_C(225), UINT8_C(173), UINT8_C( 68), UINT8_C(174), UINT8_C( 82), UINT8_C(246), UINT8_C( 29), UINT8_C( 40), UINT8_C(138), UINT8_C( 59), UINT8_C( 73), UINT8_C( 8), UINT8_C(188), UINT8_C(239) } }, { { UINT8_C( 47), UINT8_C(192), UINT8_C(132), UINT8_C(105), UINT8_C(141), UINT8_C( 55), UINT8_C(153), UINT8_C(140), UINT8_C(144), UINT8_C( 19), UINT8_C(191), UINT8_C(199), UINT8_C(206), UINT8_C( 71), UINT8_C(151), UINT8_C(224) }, { UINT8_C(111), UINT8_C(157), UINT8_C(177), UINT8_C( 78), UINT8_C(113), UINT8_C(152), UINT8_C( 88), UINT8_C(127), UINT8_C( 54), UINT8_C( 4), UINT8_C( 84), UINT8_C(196), UINT8_C(204), UINT8_C(136), UINT8_C(129), UINT8_C(251) }, { UINT8_C( 72), UINT8_C( 5), UINT8_C(100), UINT8_C(213), UINT8_C( 60), UINT8_C(254), UINT8_C( 98), UINT8_C(204), UINT8_C( 17), UINT8_C( 33), UINT8_C(147), UINT8_C(223), UINT8_C(104), UINT8_C( 42), UINT8_C(191), UINT8_C(215) }, { UINT8_C(103), UINT8_C(209), UINT8_C(168), UINT8_C( 79), UINT8_C( 9), UINT8_C( 7), UINT8_C( 73), UINT8_C(192), UINT8_C( 38), UINT8_C(151), UINT8_C(251), UINT8_C(131), UINT8_C(174), UINT8_C(151), UINT8_C(214), UINT8_C(173) } }, { { UINT8_C(200), UINT8_C(113), UINT8_C( 37), UINT8_C( 57), UINT8_C( 9), UINT8_C(126), UINT8_C(184), UINT8_C( 64), UINT8_C(130), UINT8_C( 12), UINT8_C( 4), UINT8_C( 78), UINT8_C(148), UINT8_C(134), UINT8_C( 73), UINT8_C(221) }, { UINT8_C(139), UINT8_C(173), UINT8_C(178), UINT8_C(200), UINT8_C(171), UINT8_C( 20), UINT8_C(148), UINT8_C(188), UINT8_C( 53), UINT8_C( 40), UINT8_C(155), UINT8_C(158), UINT8_C( 82), UINT8_C( 91), UINT8_C(117), UINT8_C( 26) }, { UINT8_C(204), UINT8_C(155), UINT8_C( 83), UINT8_C(213), UINT8_C( 25), UINT8_C( 11), UINT8_C( 21), UINT8_C(155), UINT8_C( 23), UINT8_C( 26), UINT8_C(233), UINT8_C(172), UINT8_C(160), UINT8_C( 50), UINT8_C(137), UINT8_C( 43) }, { UINT8_C(140), UINT8_C( 48), UINT8_C(219), UINT8_C(161), UINT8_C(188), UINT8_C( 90), UINT8_C(220), UINT8_C( 20), UINT8_C( 69), UINT8_C( 28), UINT8_C( 23), UINT8_C(118), UINT8_C(212), UINT8_C( 76), UINT8_C(230), UINT8_C( 59) } }, { { UINT8_C(223), UINT8_C( 59), UINT8_C(243), UINT8_C(139), UINT8_C( 80), UINT8_C(136), UINT8_C( 71), UINT8_C(133), UINT8_C(176), UINT8_C(227), UINT8_C( 35), UINT8_C( 2), UINT8_C( 62), UINT8_C(153), UINT8_C( 29), UINT8_C( 10) }, { UINT8_C( 52), UINT8_C(112), UINT8_C(223), UINT8_C( 77), UINT8_C(124), UINT8_C(245), UINT8_C(232), UINT8_C(147), UINT8_C( 15), UINT8_C(209), UINT8_C( 63), UINT8_C(175), UINT8_C( 3), UINT8_C(200), UINT8_C(218), UINT8_C(226) }, { UINT8_C( 4), UINT8_C(206), UINT8_C(109), UINT8_C( 84), UINT8_C( 86), UINT8_C(181), UINT8_C(217), UINT8_C( 6), UINT8_C(152), UINT8_C(253), UINT8_C( 8), UINT8_C(214), UINT8_C(150), UINT8_C( 37), UINT8_C(224), UINT8_C(202) }, { UINT8_C(175), UINT8_C( 91), UINT8_C(230), UINT8_C(207), UINT8_C(248), UINT8_C(193), UINT8_C(239), UINT8_C(247), UINT8_C(152), UINT8_C(112), UINT8_C( 27), UINT8_C( 76), UINT8_C( 0), UINT8_C(129), UINT8_C(221), UINT8_C( 94) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t c = simde_vld1q_u8(test_vec[i].c); simde_uint8x16_t r = simde_vmlaq_u8(a, b, c); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; } static int test_simde_vmlaq_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint16_t b[8]; uint16_t c[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C( 9265), UINT16_C(42133), UINT16_C(42540), UINT16_C(31120), UINT16_C(27981), UINT16_C(34441), UINT16_C(19055), UINT16_C(23368) }, { UINT16_C(63347), UINT16_C(35030), UINT16_C(52357), UINT16_C(48858), UINT16_C(54042), UINT16_C(36395), UINT16_C(41964), UINT16_C( 7515) }, { UINT16_C(61640), UINT16_C(62658), UINT16_C(21143), UINT16_C(58477), UINT16_C(63168), UINT16_C(12138), UINT16_C(45632), UINT16_C(45962) }, { UINT16_C(17929), UINT16_C(20161), UINT16_C(58015), UINT16_C(58466), UINT16_C(48333), UINT16_C(18775), UINT16_C(23919), UINT16_C(53078) } }, { { UINT16_C(24745), UINT16_C(11835), UINT16_C( 5421), UINT16_C(18412), UINT16_C( 6120), UINT16_C(54741), UINT16_C(12475), UINT16_C(33778) }, { UINT16_C(46112), UINT16_C(46967), UINT16_C(58631), UINT16_C(51099), UINT16_C( 1499), UINT16_C( 7414), UINT16_C(32951), UINT16_C(25039) }, { UINT16_C( 3041), UINT16_C( 3727), UINT16_C(31776), UINT16_C( 2389), UINT16_C(10899), UINT16_C(20190), UINT16_C(53338), UINT16_C(31441) }, { UINT16_C( 4297), UINT16_C(11188), UINT16_C( 6669), UINT16_C( 355), UINT16_C(25257), UINT16_C(59177), UINT16_C( 8465), UINT16_C( 1009) } }, { { UINT16_C(18821), UINT16_C(35890), UINT16_C(52526), UINT16_C( 2387), UINT16_C(18899), UINT16_C(35365), UINT16_C(62921), UINT16_C(43755) }, { UINT16_C(31488), UINT16_C( 8376), UINT16_C( 3575), UINT16_C(35369), UINT16_C( 1847), UINT16_C(37337), UINT16_C(43736), UINT16_C(23820) }, { UINT16_C(16115), UINT16_C( 8681), UINT16_C(15371), UINT16_C(56875), UINT16_C(20613), UINT16_C(20073), UINT16_C(21573), UINT16_C(17913) }, { UINT16_C( 2693), UINT16_C( 2986), UINT16_C(19147), UINT16_C(52278), UINT16_C(14694), UINT16_C(31270), UINT16_C(57857), UINT16_C(26519) } }, { { UINT16_C(45519), UINT16_C(50790), UINT16_C(36799), UINT16_C(63057), UINT16_C(10903), UINT16_C(28552), UINT16_C(38100), UINT16_C(51404) }, { UINT16_C(46546), UINT16_C(56809), UINT16_C( 5361), UINT16_C(30396), UINT16_C( 9573), UINT16_C(43716), UINT16_C(48505), UINT16_C(18928) }, { UINT16_C(22127), UINT16_C(11791), UINT16_C(24805), UINT16_C(31780), UINT16_C(44170), UINT16_C(24555), UINT16_C(46912), UINT16_C( 4647) }, { UINT16_C( 5085), UINT16_C(42253), UINT16_C(43860), UINT16_C(47297), UINT16_C(12041), UINT16_C(60788), UINT16_C(29204), UINT16_C(60508) } }, { { UINT16_C( 4204), UINT16_C(24048), UINT16_C(44069), UINT16_C(35539), UINT16_C(39121), UINT16_C(18996), UINT16_C( 9301), UINT16_C(50323) }, { UINT16_C(41850), UINT16_C(24818), UINT16_C( 5891), UINT16_C(36572), UINT16_C(51395), UINT16_C( 1261), UINT16_C( 5247), UINT16_C(60438) }, { UINT16_C( 1572), UINT16_C(18761), UINT16_C( 7602), UINT16_C(33747), UINT16_C( 2229), UINT16_C( 2766), UINT16_C(24876), UINT16_C(42959) }, { UINT16_C(59796), UINT16_C( 1266), UINT16_C( 827), UINT16_C(56871), UINT16_C(41648), UINT16_C(33514), UINT16_C(51497), UINT16_C( 1117) } }, { { UINT16_C(49412), UINT16_C( 2055), UINT16_C(58328), UINT16_C(40086), UINT16_C(33707), UINT16_C(11168), UINT16_C(46743), UINT16_C(47895) }, { UINT16_C(24765), UINT16_C(28421), UINT16_C(55421), UINT16_C(13043), UINT16_C(49632), UINT16_C( 3389), UINT16_C( 3106), UINT16_C(10164) }, { UINT16_C(48077), UINT16_C(42543), UINT16_C(50590), UINT16_C(19010), UINT16_C(57928), UINT16_C(57205), UINT16_C(35992), UINT16_C(21914) }, { UINT16_C(18269), UINT16_C(42994), UINT16_C(45566), UINT16_C(64828), UINT16_C(51883), UINT16_C(23425), UINT16_C(33479), UINT16_C(24927) } }, { { UINT16_C(40940), UINT16_C(27333), UINT16_C(47224), UINT16_C(22684), UINT16_C(55673), UINT16_C(39781), UINT16_C( 6629), UINT16_C(46018) }, { UINT16_C(61908), UINT16_C(29529), UINT16_C(39862), UINT16_C(65213), UINT16_C(12925), UINT16_C( 5597), UINT16_C(30910), UINT16_C(43627) }, { UINT16_C(12311), UINT16_C(36628), UINT16_C(45544), UINT16_C(25064), UINT16_C(19850), UINT16_C(28924), UINT16_C(48999), UINT16_C(15139) }, { UINT16_C( 6648), UINT16_C( 9401), UINT16_C(43880), UINT16_C(53476), UINT16_C(43483), UINT16_C(53489), UINT16_C(28759), UINT16_C(43363) } }, { { UINT16_C(31920), UINT16_C(26542), UINT16_C(27415), UINT16_C(37989), UINT16_C(17309), UINT16_C(23465), UINT16_C( 5307), UINT16_C(53766) }, { UINT16_C( 6724), UINT16_C(11362), UINT16_C(19147), UINT16_C(22157), UINT16_C(35479), UINT16_C(65222), UINT16_C(59721), UINT16_C(63802) }, { UINT16_C(59493), UINT16_C(31840), UINT16_C(50772), UINT16_C(61712), UINT16_C(47369), UINT16_C(50253), UINT16_C(21454), UINT16_C( 4758) }, { UINT16_C(31108), UINT16_C(33902), UINT16_C(63411), UINT16_C(47669), UINT16_C(16876), UINT16_C(38199), UINT16_C(30841), UINT16_C(60930) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t c = simde_vld1q_u16(test_vec[i].c); simde_uint16x8_t r = simde_vmlaq_u16(a, b, c); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_vmlaq_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint32_t b[4]; uint32_t c[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(1060715319), UINT32_C(2706085937), UINT32_C(4105014023), UINT32_C(3284141172) }, { UINT32_C(4288173704), UINT32_C(3432566778), UINT32_C(4076060134), UINT32_C(1353036057) }, { UINT32_C( 512810732), UINT32_C(2126502774), UINT32_C(3681709158), UINT32_C(4187894385) }, { UINT32_C(2536698007), UINT32_C(3777778541), UINT32_C(3897834667), UINT32_C(1474826877) } }, { { UINT32_C(2214147720), UINT32_C(3947860229), UINT32_C(1742553678), UINT32_C(3753411315) }, { UINT32_C(3623700577), UINT32_C(2320874787), UINT32_C(2590361385), UINT32_C(2190738425) }, { UINT32_C(1040551225), UINT32_C(1831425054), UINT32_C(2312373910), UINT32_C(3932720265) }, { UINT32_C( 69255457), UINT32_C(1963341599), UINT32_C( 555674196), UINT32_C(3930246964) } }, { { UINT32_C(4173424340), UINT32_C(1283594019), UINT32_C(3639076830), UINT32_C( 593132522) }, { UINT32_C( 643915528), UINT32_C(1234406067), UINT32_C( 433219728), UINT32_C(3372432116) }, { UINT32_C(3284256160), UINT32_C(3121628124), UINT32_C( 345175850), UINT32_C(2050485362) }, { UINT32_C(2697327572), UINT32_C(3721011703), UINT32_C(4201643902), UINT32_C(2475666578) } }, { { UINT32_C(4271937611), UINT32_C(2991010850), UINT32_C(2429295004), UINT32_C(4099526484) }, { UINT32_C(1891113620), UINT32_C(2267793501), UINT32_C( 832290239), UINT32_C(4121678506) }, { UINT32_C(2364820330), UINT32_C( 457128831), UINT32_C(2846624597), UINT32_C(1872561626) }, { UINT32_C(3753178515), UINT32_C(4128246085), UINT32_C(1308446471), UINT32_C(1812629016) } }, { { UINT32_C(2111788320), UINT32_C(3691317789), UINT32_C(1913495752), UINT32_C(3714562163) }, { UINT32_C(2204785412), UINT32_C(3969886615), UINT32_C(2392148916), UINT32_C(1895641680) }, { UINT32_C(2783894920), UINT32_C(2944594919), UINT32_C( 102862739), UINT32_C(1290045768) }, { UINT32_C(1353327424), UINT32_C(1170171486), UINT32_C(2974131492), UINT32_C(3016404723) } }, { { UINT32_C(2077183716), UINT32_C(2875682551), UINT32_C( 171572409), UINT32_C(3078240047) }, { UINT32_C(4217137172), UINT32_C(4021018203), UINT32_C(3069561966), UINT32_C( 973265237) }, { UINT32_C( 515232039), UINT32_C(4190707008), UINT32_C(1208156953), UINT32_C(1325366842) }, { UINT32_C( 637500912), UINT32_C( 654464183), UINT32_C( 518667639), UINT32_C(2764196465) } }, { { UINT32_C(1112104166), UINT32_C(2821846074), UINT32_C( 375269057), UINT32_C( 659578880) }, { UINT32_C(1917191730), UINT32_C(1013649187), UINT32_C(1283813138), UINT32_C(3550119149) }, { UINT32_C( 454419680), UINT32_C(2579711704), UINT32_C(1840259693), UINT32_C(3029598338) }, { UINT32_C(1697468582), UINT32_C(1532185538), UINT32_C(1783551339), UINT32_C( 416014426) } }, { { UINT32_C( 690412038), UINT32_C(4234580714), UINT32_C(3997821697), UINT32_C(1354883951) }, { UINT32_C(2691422151), UINT32_C(2319003165), UINT32_C(3556239696), UINT32_C(4035415274) }, { UINT32_C(1343925862), UINT32_C(1095598144), UINT32_C(3660551787), UINT32_C(1093333113) }, { UINT32_C(2063471952), UINT32_C(1568250410), UINT32_C(3209514865), UINT32_C(2639453705) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t c = simde_vld1q_u32(test_vec[i].c); simde_uint32x4_t r = simde_vmlaq_u32(a, b, c); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vmla_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vmla_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vmla_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vmla_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmla_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmla_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vmla_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmla_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vmlaq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vmlaq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vmlaq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vmlaq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmlaq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmlaq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vmlaq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmlaq_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/mla_n.c000066400000000000000000001115261400333146700165570ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN mla_n #include "test-neon.h" #include "../../../simde/arm/neon/mla_n.h" static int test_simde_vmla_n_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; simde_float32 b[2]; simde_float32 c; simde_float32 r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( -680.78), SIMDE_FLOAT32_C( -597.80) }, { SIMDE_FLOAT32_C( 774.13), SIMDE_FLOAT32_C( -444.02) }, SIMDE_FLOAT32_C( -818.28), { SIMDE_FLOAT32_C(-634135.88), SIMDE_FLOAT32_C(362734.88) } }, { { SIMDE_FLOAT32_C( -946.16), SIMDE_FLOAT32_C( 309.30) }, { SIMDE_FLOAT32_C( -303.37), SIMDE_FLOAT32_C( -304.24) }, SIMDE_FLOAT32_C( -769.24), { SIMDE_FLOAT32_C(232418.17), SIMDE_FLOAT32_C(234342.86) } }, { { SIMDE_FLOAT32_C( -293.93), SIMDE_FLOAT32_C( 460.64) }, { SIMDE_FLOAT32_C( -114.31), SIMDE_FLOAT32_C( 164.95) }, SIMDE_FLOAT32_C( 732.21), { SIMDE_FLOAT32_C(-83992.86), SIMDE_FLOAT32_C(121238.68) } }, { { SIMDE_FLOAT32_C( -513.27), SIMDE_FLOAT32_C( -298.77) }, { SIMDE_FLOAT32_C( 31.87), SIMDE_FLOAT32_C( -63.24) }, SIMDE_FLOAT32_C( 777.71), { SIMDE_FLOAT32_C( 24272.35), SIMDE_FLOAT32_C(-49481.15) } }, { { SIMDE_FLOAT32_C( -30.10), SIMDE_FLOAT32_C( -408.99) }, { SIMDE_FLOAT32_C( -446.43), SIMDE_FLOAT32_C( 878.35) }, SIMDE_FLOAT32_C( 581.74), { SIMDE_FLOAT32_C(-259736.27), SIMDE_FLOAT32_C(510562.31) } }, { { SIMDE_FLOAT32_C( 861.55), SIMDE_FLOAT32_C( -880.45) }, { SIMDE_FLOAT32_C( -856.08), SIMDE_FLOAT32_C( -537.88) }, SIMDE_FLOAT32_C( 393.17), { SIMDE_FLOAT32_C(-335723.44), SIMDE_FLOAT32_C(-212358.73) } }, { { SIMDE_FLOAT32_C( 174.89), SIMDE_FLOAT32_C( -218.67) }, { SIMDE_FLOAT32_C( 795.36), SIMDE_FLOAT32_C( -50.98) }, SIMDE_FLOAT32_C( 337.31), { SIMDE_FLOAT32_C(268457.75), SIMDE_FLOAT32_C(-17414.73) } }, { { SIMDE_FLOAT32_C( 977.08), SIMDE_FLOAT32_C( 2.85) }, { SIMDE_FLOAT32_C( -353.39), SIMDE_FLOAT32_C( -326.28) }, SIMDE_FLOAT32_C( 698.61), { SIMDE_FLOAT32_C(-245904.72), SIMDE_FLOAT32_C(-227939.62) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32 c = test_vec[i].c; simde_float32x2_t r = simde_vmla_n_f32(a, b, c); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32 c = simde_test_codegen_random_f32(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vmla_n_f32(a, b, c); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_f32(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmla_n_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t b[4]; int16_t c; int16_t r[4]; } test_vec[] = { { { -INT16_C( 13008), -INT16_C( 2885), INT16_C( 19861), -INT16_C( 19732) }, { INT16_C( 11119), -INT16_C( 14555), INT16_C( 25759), -INT16_C( 6163) }, INT16_C( 28428), { -INT16_C( 2204), INT16_C( 21879), -INT16_C( 2551), INT16_C( 21768) } }, { { INT16_C( 7463), INT16_C( 31302), -INT16_C( 21059), -INT16_C( 30252) }, { INT16_C( 4259), -INT16_C( 12863), -INT16_C( 3762), INT16_C( 2458) }, INT16_C( 12262), { -INT16_C( 871), -INT16_C( 15188), -INT16_C( 13359), INT16_C( 28720) } }, { { -INT16_C( 11689), -INT16_C( 14623), INT16_C( 1789), -INT16_C( 25459) }, { INT16_C( 31594), INT16_C( 30595), -INT16_C( 21782), INT16_C( 12436) }, INT16_C( 20772), { -INT16_C( 18625), INT16_C( 2125), INT16_C( 6629), INT16_C( 17757) } }, { { -INT16_C( 1826), -INT16_C( 32294), -INT16_C( 25591), INT16_C( 22350) }, { -INT16_C( 6003), INT16_C( 29536), -INT16_C( 18665), -INT16_C( 1979) }, INT16_C( 17277), { INT16_C( 27831), -INT16_C( 2118), INT16_C( 1860), -INT16_C( 24577) } }, { { INT16_C( 3070), INT16_C( 27103), INT16_C( 25478), INT16_C( 28896) }, { INT16_C( 29709), INT16_C( 12960), INT16_C( 32454), -INT16_C( 24534) }, INT16_C( 13311), { INT16_C( 15345), -INT16_C( 18625), INT16_C( 7360), INT16_C( 22710) } }, { { INT16_C( 19772), -INT16_C( 13686), -INT16_C( 5323), INT16_C( 19517) }, { -INT16_C( 31838), INT16_C( 8260), INT16_C( 17350), -INT16_C( 23253) }, -INT16_C( 20052), { -INT16_C( 16364), INT16_C( 31802), INT16_C( 23101), INT16_C( 33) } }, { { -INT16_C( 29688), INT16_C( 5665), -INT16_C( 16128), -INT16_C( 14776) }, { INT16_C( 29248), INT16_C( 16231), -INT16_C( 23642), INT16_C( 12429) }, -INT16_C( 15763), { -INT16_C( 20152), INT16_C( 8956), INT16_C( 15022), INT16_C( 19537) } }, { { -INT16_C( 21733), -INT16_C( 16881), INT16_C( 21294), -INT16_C( 2850) }, { INT16_C( 2454), INT16_C( 17049), -INT16_C( 23878), -INT16_C( 9266) }, -INT16_C( 12360), { -INT16_C( 10005), INT16_C( 21255), -INT16_C( 20770), -INT16_C( 32018) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); int16_t c = test_vec[i].c; simde_int16x4_t r = simde_vmla_n_s16(a, b, c); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); int16_t c = simde_test_codegen_random_i16(); simde_int16x4_t r = simde_vmla_n_s16(a, b, c); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i16(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmla_n_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t b[2]; int32_t c; int32_t r[2]; } test_vec[] = { { { INT32_C( 888615576), -INT32_C( 1946370236) }, { INT32_C( 491560011), -INT32_C( 1001757691) }, INT32_C( 554369550), { -INT32_C( 895920974), -INT32_C( 837992054) } }, { { INT32_C( 1129433576), INT32_C( 377974658) }, { INT32_C( 352551805), -INT32_C( 95749450) }, INT32_C( 75843257), { -INT32_C( 1396638403), -INT32_C( 1882038008) } }, { { -INT32_C( 400436766), INT32_C( 1152150326) }, { INT32_C( 1449572206), INT32_C( 43628416) }, -INT32_C( 1558699994), { -INT32_C( 828453834), -INT32_C( 1838307274) } }, { { INT32_C( 1522080676), -INT32_C( 833354987) }, { INT32_C( 752015689), -INT32_C( 518720597) }, -INT32_C( 853163937), { -INT32_C( 59015749), INT32_C( 848408458) } }, { { -INT32_C( 131888264), INT32_C( 1778105410) }, { -INT32_C( 2129914915), INT32_C( 1155253551) }, INT32_C( 303247304), { INT32_C( 77570848), INT32_C( 2136893946) } }, { { -INT32_C( 1287723768), INT32_C( 949244633) }, { -INT32_C( 1979336174), -INT32_C( 2004670395) }, -INT32_C( 1041137948), { INT32_C( 786397456), INT32_C( 1227564621) } }, { { -INT32_C( 1069351535), -INT32_C( 1962598973) }, { INT32_C( 1436358732), -INT32_C( 704062467) }, INT32_C( 1074699565), { INT32_C( 2028920557), -INT32_C( 514592196) } }, { { -INT32_C( 1664478377), INT32_C( 539249979) }, { INT32_C( 1575032267), -INT32_C( 702733293) }, -INT32_C( 1906236863), { INT32_C( 868258274), -INT32_C( 1853359090) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); int32_t c = test_vec[i].c; simde_int32x2_t r = simde_vmla_n_s32(a, b, c); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); int32_t c = simde_test_codegen_random_i32(); simde_int32x2_t r = simde_vmla_n_s32(a, b, c); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmla_n_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t b[4]; uint16_t c; uint16_t r[4]; } test_vec[] = { { { UINT16_C( 2400), UINT16_C(15509), UINT16_C( 4372), UINT16_C(40051) }, { UINT16_C(61882), UINT16_C(59067), UINT16_C(47693), UINT16_C(62182) }, UINT16_C(35322), { UINT16_C(41732), UINT16_C(41523), UINT16_C(13638), UINT16_C(59151) } }, { { UINT16_C(16620), UINT16_C(40433), UINT16_C(62418), UINT16_C( 5903) }, { UINT16_C(46428), UINT16_C( 4731), UINT16_C(56078), UINT16_C(42011) }, UINT16_C(12055), { UINT16_C(28720), UINT16_C(56318), UINT16_C(13332), UINT16_C(51836) } }, { { UINT16_C(35509), UINT16_C(28875), UINT16_C(34684), UINT16_C(51542) }, { UINT16_C(15681), UINT16_C(15292), UINT16_C(43206), UINT16_C(46971) }, UINT16_C(19781), { UINT16_C(39482), UINT16_C( 5751), UINT16_C(37594), UINT16_C(15485) } }, { { UINT16_C(21675), UINT16_C( 1892), UINT16_C(57098), UINT16_C( 6169) }, { UINT16_C(13499), UINT16_C(53948), UINT16_C(29284), UINT16_C(12125) }, UINT16_C(55778), { UINT16_C(25793), UINT16_C(27996), UINT16_C(40786), UINT16_C(48435) } }, { { UINT16_C(14518), UINT16_C(63394), UINT16_C(24181), UINT16_C(15411) }, { UINT16_C(44550), UINT16_C(19699), UINT16_C(40700), UINT16_C(24736) }, UINT16_C(43685), { UINT16_C(24212), UINT16_C(60993), UINT16_C(12001), UINT16_C(50003) } }, { { UINT16_C(48704), UINT16_C(64451), UINT16_C(32755), UINT16_C(22477) }, { UINT16_C(10993), UINT16_C(54150), UINT16_C(15619), UINT16_C(42508) }, UINT16_C(33076), { UINT16_C(59444), UINT16_C(30971), UINT16_C(26511), UINT16_C( 7741) } }, { { UINT16_C(26372), UINT16_C( 3005), UINT16_C(45334), UINT16_C( 4695) }, { UINT16_C(63311), UINT16_C(62834), UINT16_C(45730), UINT16_C(26035) }, UINT16_C(42669), { UINT16_C(49511), UINT16_C(54727), UINT16_C(29840), UINT16_C(56910) } }, { { UINT16_C(31716), UINT16_C(55037), UINT16_C(33957), UINT16_C(43433) }, { UINT16_C(46529), UINT16_C(62799), UINT16_C(21303), UINT16_C(62557) }, UINT16_C(29534), { UINT16_C(60354), UINT16_C(26367), UINT16_C(51159), UINT16_C(10959) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); uint16_t c = test_vec[i].c; simde_uint16x4_t r = simde_vmla_n_u16(a, b, c); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); uint16_t c = simde_test_codegen_random_u16(); simde_uint16x4_t r = simde_vmla_n_u16(a, b, c); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_u16(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmla_n_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint32_t b[2]; uint32_t c; uint32_t r[2]; } test_vec[] = { { { UINT32_C(2305842287), UINT32_C(1514417662) }, { UINT32_C(3213067177), UINT32_C(3633736064) }, UINT32_C( 679518645), { UINT32_C(3820600300), UINT32_C(3572909438) } }, { { UINT32_C(2905213946), UINT32_C(4090384713) }, { UINT32_C(1057534927), UINT32_C( 248018959) }, UINT32_C(1332219045), { UINT32_C(2739643493), UINT32_C(1203529460) } }, { { UINT32_C( 269413519), UINT32_C( 183018581) }, { UINT32_C(1060333637), UINT32_C(3169607027) }, UINT32_C(2125446062), { UINT32_C(3478467189), UINT32_C(2540710271) } }, { { UINT32_C(2109585262), UINT32_C(3582690608) }, { UINT32_C( 556070033), UINT32_C( 892416736) }, UINT32_C( 473962967), { UINT32_C(4025042485), UINT32_C(2278891344) } }, { { UINT32_C(4099699585), UINT32_C(2125482192) }, { UINT32_C(1912364803), UINT32_C(1206892823) }, UINT32_C(3491527230), { UINT32_C(2836849723), UINT32_C( 393744994) } }, { { UINT32_C(1324433774), UINT32_C(1250173555) }, { UINT32_C(3160917051), UINT32_C( 129024823) }, UINT32_C( 260399115), { UINT32_C(3097743351), UINT32_C(2363762128) } }, { { UINT32_C(3598746303), UINT32_C(2048749371) }, { UINT32_C(1481259754), UINT32_C(4003937147) }, UINT32_C(2553883485), { UINT32_C(1870084033), UINT32_C(2319365098) } }, { { UINT32_C( 643080431), UINT32_C(1848444003) }, { UINT32_C( 595440228), UINT32_C(1895497268) }, UINT32_C(1474959213), { UINT32_C(2571669891), UINT32_C( 493481095) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); uint32_t c = test_vec[i].c; simde_uint32x2_t r = simde_vmla_n_u32(a, b, c); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); uint32_t c = simde_test_codegen_random_u32(); simde_uint32x2_t r = simde_vmla_n_u32(a, b, c); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_u32(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlaq_n_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; simde_float32 b[4]; simde_float32 c; simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -565.27), SIMDE_FLOAT32_C( -578.48), SIMDE_FLOAT32_C( 923.27), SIMDE_FLOAT32_C( -735.63) }, { SIMDE_FLOAT32_C( -721.39), SIMDE_FLOAT32_C( 999.71), SIMDE_FLOAT32_C( -670.81), SIMDE_FLOAT32_C( -721.24) }, SIMDE_FLOAT32_C( 987.68), { SIMDE_FLOAT32_C(-713067.75), SIMDE_FLOAT32_C(986815.06), SIMDE_FLOAT32_C(-661622.38), SIMDE_FLOAT32_C(-713089.94) } }, { { SIMDE_FLOAT32_C( -575.56), SIMDE_FLOAT32_C( 755.06), SIMDE_FLOAT32_C( -177.83), SIMDE_FLOAT32_C( -498.94) }, { SIMDE_FLOAT32_C( -117.24), SIMDE_FLOAT32_C( -43.95), SIMDE_FLOAT32_C( 115.50), SIMDE_FLOAT32_C( 776.41) }, SIMDE_FLOAT32_C( -808.82), { SIMDE_FLOAT32_C( 94250.49), SIMDE_FLOAT32_C( 36302.70), SIMDE_FLOAT32_C(-93596.54), SIMDE_FLOAT32_C(-628474.88) } }, { { SIMDE_FLOAT32_C( -35.98), SIMDE_FLOAT32_C( -41.82), SIMDE_FLOAT32_C( 249.53), SIMDE_FLOAT32_C( -482.80) }, { SIMDE_FLOAT32_C( 691.32), SIMDE_FLOAT32_C( 488.86), SIMDE_FLOAT32_C( 795.66), SIMDE_FLOAT32_C( 785.88) }, SIMDE_FLOAT32_C( 36.60), { SIMDE_FLOAT32_C( 25266.33), SIMDE_FLOAT32_C( 17850.46), SIMDE_FLOAT32_C( 29370.68), SIMDE_FLOAT32_C( 28280.41) } }, { { SIMDE_FLOAT32_C( 325.74), SIMDE_FLOAT32_C( -867.74), SIMDE_FLOAT32_C( -719.43), SIMDE_FLOAT32_C( 295.15) }, { SIMDE_FLOAT32_C( -433.01), SIMDE_FLOAT32_C( -297.91), SIMDE_FLOAT32_C( 218.42), SIMDE_FLOAT32_C( -168.64) }, SIMDE_FLOAT32_C( -19.30), { SIMDE_FLOAT32_C( 8682.83), SIMDE_FLOAT32_C( 4881.92), SIMDE_FLOAT32_C( -4934.94), SIMDE_FLOAT32_C( 3549.90) } }, { { SIMDE_FLOAT32_C( 218.13), SIMDE_FLOAT32_C( 160.55), SIMDE_FLOAT32_C( 259.47), SIMDE_FLOAT32_C( 205.82) }, { SIMDE_FLOAT32_C( 585.00), SIMDE_FLOAT32_C( 14.53), SIMDE_FLOAT32_C( -972.01), SIMDE_FLOAT32_C( -913.95) }, SIMDE_FLOAT32_C( 897.29), { SIMDE_FLOAT32_C(525132.75), SIMDE_FLOAT32_C( 13198.17), SIMDE_FLOAT32_C(-871915.31), SIMDE_FLOAT32_C(-819872.38) } }, { { SIMDE_FLOAT32_C( -15.96), SIMDE_FLOAT32_C( 201.56), SIMDE_FLOAT32_C( 673.70), SIMDE_FLOAT32_C( 175.22) }, { SIMDE_FLOAT32_C( -834.43), SIMDE_FLOAT32_C( -368.13), SIMDE_FLOAT32_C( -575.24), SIMDE_FLOAT32_C( -317.23) }, SIMDE_FLOAT32_C( -676.81), { SIMDE_FLOAT32_C(564734.62), SIMDE_FLOAT32_C(249355.62), SIMDE_FLOAT32_C(390001.88), SIMDE_FLOAT32_C(214879.66) } }, { { SIMDE_FLOAT32_C( 913.62), SIMDE_FLOAT32_C( -521.58), SIMDE_FLOAT32_C( -890.93), SIMDE_FLOAT32_C( -49.78) }, { SIMDE_FLOAT32_C( 804.17), SIMDE_FLOAT32_C( -758.67), SIMDE_FLOAT32_C( 230.79), SIMDE_FLOAT32_C( 99.32) }, SIMDE_FLOAT32_C( -191.67), { SIMDE_FLOAT32_C(-153221.64), SIMDE_FLOAT32_C(144892.70), SIMDE_FLOAT32_C(-45126.45), SIMDE_FLOAT32_C(-19086.44) } }, { { SIMDE_FLOAT32_C( 932.88), SIMDE_FLOAT32_C( -682.26), SIMDE_FLOAT32_C( 639.69), SIMDE_FLOAT32_C( -86.42) }, { SIMDE_FLOAT32_C( 535.88), SIMDE_FLOAT32_C( -199.76), SIMDE_FLOAT32_C( -826.95), SIMDE_FLOAT32_C( -258.30) }, SIMDE_FLOAT32_C( -614.76), { SIMDE_FLOAT32_C(-328504.72), SIMDE_FLOAT32_C(122122.20), SIMDE_FLOAT32_C(509015.50), SIMDE_FLOAT32_C(158706.08) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32 c = test_vec[i].c; simde_float32x4_t r = simde_vmlaq_n_f32(a, b, c); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32 c = simde_test_codegen_random_f32(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vmlaq_n_f32(a, b, c); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_f32(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlaq_n_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t b[8]; int16_t c; int16_t r[8]; } test_vec[] = { { { -INT16_C( 29756), -INT16_C( 21306), INT16_C( 18193), -INT16_C( 21743), INT16_C( 15788), INT16_C( 22404), INT16_C( 28557), -INT16_C( 29072) }, { INT16_C( 13465), -INT16_C( 9565), INT16_C( 29280), INT16_C( 7895), -INT16_C( 31066), INT16_C( 32613), -INT16_C( 30322), INT16_C( 21186) }, -INT16_C( 30700), { -INT16_C( 4168), INT16_C( 22914), INT16_C( 13969), INT16_C( 19421), -INT16_C( 3420), -INT16_C( 3224), -INT16_C( 24923), INT16_C( 5528) } }, { { INT16_C( 9983), INT16_C( 4304), INT16_C( 31953), INT16_C( 21837), -INT16_C( 9517), INT16_C( 17349), INT16_C( 24168), INT16_C( 3191) }, { -INT16_C( 10440), INT16_C( 4222), INT16_C( 9718), INT16_C( 23446), INT16_C( 9380), INT16_C( 26340), -INT16_C( 1673), INT16_C( 30447) }, -INT16_C( 16609), { -INT16_C( 313), INT16_C( 4626), -INT16_C( 24677), INT16_C( 22135), -INT16_C( 22865), -INT16_C( 10911), INT16_C( 23761), -INT16_C( 15256) } }, { { -INT16_C( 3962), -INT16_C( 11205), INT16_C( 3654), INT16_C( 2990), INT16_C( 5970), -INT16_C( 13975), -INT16_C( 24029), -INT16_C( 24159) }, { -INT16_C( 26702), INT16_C( 18630), INT16_C( 27378), -INT16_C( 10387), -INT16_C( 6959), -INT16_C( 16176), -INT16_C( 4262), -INT16_C( 8065) }, -INT16_C( 17697), { INT16_C( 26772), INT16_C( 5301), INT16_C( 2836), -INT16_C( 6751), INT16_C( 17249), -INT16_C( 8551), -INT16_C( 31351), INT16_C( 30274) } }, { { INT16_C( 9652), INT16_C( 25544), INT16_C( 6704), -INT16_C( 25990), -INT16_C( 25116), -INT16_C( 31428), -INT16_C( 4546), INT16_C( 1308) }, { INT16_C( 3638), -INT16_C( 23697), INT16_C( 16613), -INT16_C( 19065), -INT16_C( 7936), INT16_C( 32676), -INT16_C( 31550), INT16_C( 30265) }, INT16_C( 681), { -INT16_C( 3238), INT16_C( 9743), -INT16_C( 17571), INT16_C( 32409), INT16_C( 9956), INT16_C( 4224), INT16_C( 5712), -INT16_C( 32067) } }, { { -INT16_C( 9511), INT16_C( 21276), INT16_C( 116), -INT16_C( 20240), INT16_C( 12165), -INT16_C( 24162), -INT16_C( 11212), -INT16_C( 23632) }, { -INT16_C( 27272), -INT16_C( 28), -INT16_C( 7093), -INT16_C( 4127), -INT16_C( 23708), -INT16_C( 25229), INT16_C( 7449), -INT16_C( 3169) }, -INT16_C( 17161), { INT16_C( 12705), -INT16_C( 22504), INT16_C( 22737), INT16_C( 24327), INT16_C( 17665), -INT16_C( 109), INT16_C( 17235), INT16_C( 30233) } }, { { INT16_C( 27462), INT16_C( 14268), INT16_C( 16923), -INT16_C( 18074), -INT16_C( 25885), -INT16_C( 27763), INT16_C( 1341), INT16_C( 8489) }, { INT16_C( 29701), -INT16_C( 6650), INT16_C( 27235), -INT16_C( 10359), -INT16_C( 24057), -INT16_C( 22540), -INT16_C( 5227), -INT16_C( 9117) }, INT16_C( 8022), { -INT16_C( 12), INT16_C( 14272), -INT16_C( 931), -INT16_C( 18324), -INT16_C( 7619), -INT16_C( 29819), INT16_C( 13387), INT16_C( 10091) } }, { { INT16_C( 28947), INT16_C( 31073), INT16_C( 17706), -INT16_C( 18669), INT16_C( 20696), INT16_C( 445), -INT16_C( 15758), INT16_C( 30837) }, { -INT16_C( 9816), INT16_C( 12770), -INT16_C( 5712), -INT16_C( 23341), INT16_C( 27024), -INT16_C( 3185), -INT16_C( 6843), INT16_C( 22547) }, INT16_C( 29782), { -INT16_C( 20605), -INT16_C( 23731), -INT16_C( 31158), -INT16_C( 19979), INT16_C( 1848), -INT16_C( 24633), INT16_C( 2976), -INT16_C( 21801) } }, { { -INT16_C( 32559), -INT16_C( 6983), -INT16_C( 28105), -INT16_C( 3020), -INT16_C( 22893), INT16_C( 2486), INT16_C( 24094), INT16_C( 226) }, { -INT16_C( 28017), INT16_C( 25578), INT16_C( 31286), -INT16_C( 14900), INT16_C( 4462), -INT16_C( 32342), INT16_C( 105), INT16_C( 15093) }, -INT16_C( 20608), { -INT16_C( 30383), -INT16_C( 12359), -INT16_C( 26825), INT16_C( 20020), -INT16_C( 28781), INT16_C( 5302), INT16_C( 22942), -INT16_C( 2462) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); int16_t c = test_vec[i].c; simde_int16x8_t r = simde_vmlaq_n_s16(a, b, c); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); int16_t c = simde_test_codegen_random_i16(); simde_int16x8_t r = simde_vmlaq_n_s16(a, b, c); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i16(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlaq_n_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t b[4]; int32_t c; int32_t r[4]; } test_vec[] = { { { INT32_C( 1499344216), INT32_C( 1524728521), INT32_C( 1303864302), -INT32_C( 1288989303) }, { -INT32_C( 1898069034), -INT32_C( 637691134), INT32_C( 1260153992), -INT32_C( 1453578159) }, INT32_C( 503495253), { -INT32_C( 2064273562), INT32_C( 1891200627), -INT32_C( 1374536426), -INT32_C( 1296969106) } }, { { INT32_C( 712631356), -INT32_C( 797429689), -INT32_C( 1669094459), INT32_C( 1747607654) }, { -INT32_C( 2092881925), -INT32_C( 439460204), -INT32_C( 594597242), INT32_C( 570069477) }, -INT32_C( 1135840395), { -INT32_C( 1631917325), INT32_C( 1345709547), -INT32_C( 1043053309), -INT32_C( 80417521) } }, { { INT32_C( 1770833060), -INT32_C( 855306393), INT32_C( 1798713456), -INT32_C( 336627881) }, { INT32_C( 1557249493), -INT32_C( 851943192), INT32_C( 1743663857), INT32_C( 1243822758) }, INT32_C( 1706275070), { INT32_C( 2031652346), INT32_C( 2009904535), INT32_C( 1403790), INT32_C( 860342283) } }, { { INT32_C( 791919039), INT32_C( 1083927017), -INT32_C( 1238594848), INT32_C( 789773639) }, { INT32_C( 1325156957), INT32_C( 582347644), INT32_C( 611113253), INT32_C( 1216946313) }, -INT32_C( 1032274727), { -INT32_C( 1086124396), -INT32_C( 1612769787), INT32_C( 1497351997), INT32_C( 1902914920) } }, { { INT32_C( 100799269), -INT32_C( 457429347), -INT32_C( 2011902421), -INT32_C( 1797844968) }, { INT32_C( 565677307), -INT32_C( 297458843), INT32_C( 473419331), -INT32_C( 1327583349) }, INT32_C( 1605820866), { INT32_C( 1165329499), -INT32_C( 569719257), INT32_C( 2001338353), -INT32_C( 1212988818) } }, { { INT32_C( 994275855), INT32_C( 1489196864), INT32_C( 1676515944), -INT32_C( 1937464281) }, { INT32_C( 192661959), INT32_C( 589804184), INT32_C( 601032289), -INT32_C( 159216921) }, INT32_C( 1026672124), { -INT32_C( 713345805), -INT32_C( 413262624), INT32_C( 1629587172), INT32_C( 1260397195) } }, { { -INT32_C( 2053769955), -INT32_C( 1226276209), -INT32_C( 297571034), -INT32_C( 822493642) }, { -INT32_C( 772726672), INT32_C( 234145062), INT32_C( 1258583631), INT32_C( 1485321531) }, -INT32_C( 1159913942), { -INT32_C( 1717409923), INT32_C( 2113490635), INT32_C( 1259280412), INT32_C( 494822372) } }, { { -INT32_C( 948910432), INT32_C( 1773515571), -INT32_C( 516379023), -INT32_C( 172873266) }, { INT32_C( 1023583982), INT32_C( 1485375004), INT32_C( 1722814780), -INT32_C( 803172817) }, -INT32_C( 2020109996), { INT32_C( 1161714360), INT32_C( 1906675811), INT32_C( 455430689), INT32_C( 1470263354) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); int32_t c = test_vec[i].c; simde_int32x4_t r = simde_vmlaq_n_s32(a, b, c); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); int32_t c = simde_test_codegen_random_i32(); simde_int32x4_t r = simde_vmlaq_n_s32(a, b, c); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlaq_n_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t b[8]; uint16_t c; uint16_t r[8]; } test_vec[] = { { { UINT16_C(32955), UINT16_C(48081), UINT16_C(31759), UINT16_C(16199), UINT16_C(32480), UINT16_C(10241), UINT16_C(21846), UINT16_C(11889) }, { UINT16_C(60208), UINT16_C( 2393), UINT16_C(43679), UINT16_C(54796), UINT16_C(44510), UINT16_C(46318), UINT16_C( 9561), UINT16_C( 5250) }, UINT16_C(21413), { UINT16_C(42667), UINT16_C(40238), UINT16_C( 394), UINT16_C( 6403), UINT16_C(35062), UINT16_C(61287), UINT16_C(17075), UINT16_C(35899) } }, { { UINT16_C(46543), UINT16_C( 5839), UINT16_C(45044), UINT16_C(62869), UINT16_C(60376), UINT16_C(18763), UINT16_C(31514), UINT16_C(29492) }, { UINT16_C(54149), UINT16_C(37150), UINT16_C(64682), UINT16_C(38974), UINT16_C(38832), UINT16_C(12989), UINT16_C(25516), UINT16_C(31621) }, UINT16_C(21528), { UINT16_C(11847), UINT16_C(35231), UINT16_C(10212), UINT16_C(37733), UINT16_C(58456), UINT16_C( 3843), UINT16_C(17210), UINT16_C(43948) } }, { { UINT16_C( 3218), UINT16_C( 9988), UINT16_C(56321), UINT16_C(19474), UINT16_C(11301), UINT16_C(23240), UINT16_C(19872), UINT16_C(48685) }, { UINT16_C(55262), UINT16_C( 7354), UINT16_C(27248), UINT16_C(11699), UINT16_C(24476), UINT16_C( 8592), UINT16_C(43227), UINT16_C(28021) }, UINT16_C(31156), { UINT16_C(49834), UINT16_C(17356), UINT16_C(41665), UINT16_C( 2286), UINT16_C( 8661), UINT16_C( 1032), UINT16_C(35484), UINT16_C( 369) } }, { { UINT16_C(46740), UINT16_C(42581), UINT16_C(31490), UINT16_C(51923), UINT16_C(29653), UINT16_C( 535), UINT16_C(62769), UINT16_C(60378) }, { UINT16_C(18961), UINT16_C(50517), UINT16_C(61815), UINT16_C( 2084), UINT16_C(65298), UINT16_C(34736), UINT16_C(25964), UINT16_C( 1) }, UINT16_C(22043), { UINT16_C(15455), UINT16_C( 1100), UINT16_C(60559), UINT16_C(48799), UINT16_C(26299), UINT16_C(29095), UINT16_C(61333), UINT16_C(16885) } }, { { UINT16_C( 7591), UINT16_C(31441), UINT16_C(42728), UINT16_C(65517), UINT16_C( 7849), UINT16_C(33781), UINT16_C( 1545), UINT16_C(24269) }, { UINT16_C(17611), UINT16_C(61519), UINT16_C(24908), UINT16_C(65007), UINT16_C(23784), UINT16_C(59746), UINT16_C(32092), UINT16_C( 832) }, UINT16_C( 4506), { UINT16_C(64197), UINT16_C(18775), UINT16_C(15008), UINT16_C(41139), UINT16_C(27193), UINT16_C(27369), UINT16_C(35681), UINT16_C(37709) } }, { { UINT16_C(33405), UINT16_C(27320), UINT16_C(24962), UINT16_C(30600), UINT16_C(37348), UINT16_C(45437), UINT16_C(18927), UINT16_C(16117) }, { UINT16_C(16953), UINT16_C(10399), UINT16_C(34879), UINT16_C(41348), UINT16_C(57713), UINT16_C(45342), UINT16_C(47332), UINT16_C(25283) }, UINT16_C(31547), { UINT16_C(10400), UINT16_C(11357), UINT16_C( 3335), UINT16_C( 7412), UINT16_C(53743), UINT16_C(60775), UINT16_C(29307), UINT16_C(45798) } }, { { UINT16_C(48588), UINT16_C(21980), UINT16_C(49204), UINT16_C(45542), UINT16_C(54897), UINT16_C(26362), UINT16_C(13076), UINT16_C(46248) }, { UINT16_C(59228), UINT16_C(57404), UINT16_C(44424), UINT16_C(42689), UINT16_C(42591), UINT16_C( 8799), UINT16_C(39432), UINT16_C(54429) }, UINT16_C(31063), { UINT16_C(55824), UINT16_C(58944), UINT16_C( 364), UINT16_C(38525), UINT16_C(18362), UINT16_C(64579), UINT16_C(21452), UINT16_C(11011) } }, { { UINT16_C(35625), UINT16_C( 4153), UINT16_C(43580), UINT16_C(14310), UINT16_C(64016), UINT16_C(47466), UINT16_C(50862), UINT16_C(60064) }, { UINT16_C(10663), UINT16_C(26776), UINT16_C(63439), UINT16_C(11790), UINT16_C( 5657), UINT16_C(46792), UINT16_C( 8171), UINT16_C( 5167) }, UINT16_C(26794), { UINT16_C( 3087), UINT16_C(17705), UINT16_C(20914), UINT16_C(32050), UINT16_C(52906), UINT16_C(23098), UINT16_C(28860), UINT16_C(27094) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); uint16_t c = test_vec[i].c; simde_uint16x8_t r = simde_vmlaq_n_u16(a, b, c); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); uint16_t c = simde_test_codegen_random_u16(); simde_uint16x8_t r = simde_vmlaq_n_u16(a, b, c); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_u16(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlaq_n_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint32_t b[4]; uint32_t c; uint32_t r[4]; } test_vec[] = { { { UINT32_C(1359494925), UINT32_C(3654459894), UINT32_C(3061381284), UINT32_C(2780043972) }, { UINT32_C(1309599344), UINT32_C(1081949128), UINT32_C(2658471569), UINT32_C(2189969525) }, UINT32_C(2127794567), { UINT32_C(4189664285), UINT32_C(2157015150), UINT32_C(4076259355), UINT32_C(4211840375) } }, { { UINT32_C(3596068402), UINT32_C(1653395358), UINT32_C(1510490345), UINT32_C(4020770343) }, { UINT32_C(3794740561), UINT32_C(2827002931), UINT32_C(1948912108), UINT32_C(3438476954) }, UINT32_C(1117931940), { UINT32_C(2909906710), UINT32_C( 302943050), UINT32_C(3777161241), UINT32_C(1465981647) } }, { { UINT32_C( 27536920), UINT32_C(2505813102), UINT32_C( 344196291), UINT32_C(1576514345) }, { UINT32_C(1141208919), UINT32_C( 448278656), UINT32_C(3538332206), UINT32_C( 185895155) }, UINT32_C( 621590710), { UINT32_C(1986104818), UINT32_C(3173656430), UINT32_C(1861882231), UINT32_C(2338568171) } }, { { UINT32_C( 683305061), UINT32_C(2503753580), UINT32_C(1257386994), UINT32_C( 730790059) }, { UINT32_C(1447446056), UINT32_C(3811060976), UINT32_C(1810775221), UINT32_C(1502673652) }, UINT32_C(3464579938), { UINT32_C(1347877813), UINT32_C( 347587404), UINT32_C(2770565180), UINT32_C(3146286611) } }, { { UINT32_C(2086977162), UINT32_C(2630244081), UINT32_C(1992840270), UINT32_C(2328628890) }, { UINT32_C(4016960570), UINT32_C( 626744112), UINT32_C(3095325526), UINT32_C(3230072886) }, UINT32_C(2940070846), { UINT32_C(4144185238), UINT32_C(3183023249), UINT32_C(3296784418), UINT32_C(1393412270) } }, { { UINT32_C(2420900673), UINT32_C(4060484696), UINT32_C(1551749922), UINT32_C(4165790407) }, { UINT32_C(2619189062), UINT32_C(3377765266), UINT32_C(1502206875), UINT32_C( 134858438) }, UINT32_C( 580408778), { UINT32_C( 902577789), UINT32_C(2837467020), UINT32_C(3275199088), UINT32_C(3767763203) } }, { { UINT32_C(2333384297), UINT32_C( 971477361), UINT32_C(3258004348), UINT32_C(1834897114) }, { UINT32_C(2234954473), UINT32_C(1423884174), UINT32_C(1348265862), UINT32_C(2775774268) }, UINT32_C( 70289043), { UINT32_C(3671281716), UINT32_C(2964579323), UINT32_C( 165300590), UINT32_C(1433924942) } }, { { UINT32_C(2487031832), UINT32_C( 643198539), UINT32_C(2794697916), UINT32_C(4096510310) }, { UINT32_C( 239667592), UINT32_C( 761177585), UINT32_C( 768856218), UINT32_C(1865483095) }, UINT32_C(1728278299), { UINT32_C(2511016304), UINT32_C(3793251766), UINT32_C(3140188922), UINT32_C(4191083667) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); uint32_t c = test_vec[i].c; simde_uint32x4_t r = simde_vmlaq_n_u32(a, b, c); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); uint32_t c = simde_test_codegen_random_u32(); simde_uint32x4_t r = simde_vmlaq_n_u32(a, b, c); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_u32(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vmla_n_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vmla_n_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmla_n_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmla_n_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmla_n_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vmlaq_n_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vmlaq_n_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmlaq_n_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmlaq_n_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmlaq_n_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/mlal.c000066400000000000000000000633641400333146700164240ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN mlal #include "test-neon.h" #include "../../../simde/arm/neon/mlal.h" static int test_simde_vmlal_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int8_t b[8]; int8_t c[8]; int16_t r[8]; } test_vec[] = { { { INT16_C( 22815), INT16_C( 31689), -INT16_C( 3101), -INT16_C( 14571), INT16_C( 4624), -INT16_C( 7544), -INT16_C( 30491), INT16_C( 21581) }, { -INT8_C( 66), INT8_C( 81), INT8_C( 115), INT8_C( 86), -INT8_C( 49), -INT8_C( 27), -INT8_C( 75), INT8_C( 12) }, { -INT8_C( 122), -INT8_C( 95), -INT8_C( 63), -INT8_C( 71), INT8_C( 63), -INT8_C( 125), -INT8_C( 33), INT8_C( 94) }, { INT16_C( 30867), INT16_C( 23994), -INT16_C( 10346), -INT16_C( 20677), INT16_C( 1537), -INT16_C( 4169), -INT16_C( 28016), INT16_C( 22709) } }, { { -INT16_C( 22307), -INT16_C( 16167), -INT16_C( 4452), -INT16_C( 21369), INT16_C( 3841), -INT16_C( 6514), -INT16_C( 9320), INT16_C( 22074) }, { INT8_C( 45), -INT8_C( 83), -INT8_C( 84), -INT8_C( 4), -INT8_C( 109), INT8_C( 97), INT8_C( 8), INT8_C( 25) }, { INT8_C( 3), -INT8_C( 54), -INT8_C( 45), INT8_C( 66), INT8_C( 77), -INT8_C( 78), -INT8_C( 96), INT8_C( 42) }, { -INT16_C( 22172), -INT16_C( 11685), -INT16_C( 672), -INT16_C( 21633), -INT16_C( 4552), -INT16_C( 14080), -INT16_C( 10088), INT16_C( 23124) } }, { { INT16_C( 31066), -INT16_C( 2326), INT16_C( 29287), INT16_C( 26786), INT16_C( 12417), INT16_C( 6479), -INT16_C( 30452), INT16_C( 14703) }, { INT8_C( 55), INT8_C( 28), INT8_C( 53), -INT8_C( 54), INT8_C( 125), INT8_C( 62), -INT8_C( 29), INT8_MIN }, { INT8_C( 8), -INT8_C( 74), -INT8_C( 62), INT8_C( 85), INT8_C( 104), INT8_C( 98), INT8_MIN, -INT8_C( 61) }, { INT16_C( 31506), -INT16_C( 4398), INT16_C( 26001), INT16_C( 22196), INT16_C( 25417), INT16_C( 12555), -INT16_C( 26740), INT16_C( 22511) } }, { { INT16_C( 27355), INT16_C( 17337), INT16_C( 23772), INT16_C( 24235), -INT16_C( 1396), -INT16_C( 26505), -INT16_C( 6268), -INT16_C( 17455) }, { INT8_C( 3), INT8_C( 7), -INT8_C( 123), INT8_MIN, INT8_C( 69), INT8_C( 104), INT8_C( 1), INT8_C( 77) }, { INT8_C( 31), -INT8_C( 61), -INT8_C( 94), -INT8_C( 121), INT8_C( 38), INT8_C( 34), INT8_C( 74), INT8_C( 1) }, { INT16_C( 27448), INT16_C( 16910), -INT16_C( 30202), -INT16_C( 25813), INT16_C( 1226), -INT16_C( 22969), -INT16_C( 6194), -INT16_C( 17378) } }, { { INT16_C( 1165), INT16_C( 26948), -INT16_C( 4000), -INT16_C( 4921), INT16_C( 16362), INT16_C( 28293), INT16_C( 22054), INT16_C( 10537) }, { INT8_C( 93), -INT8_C( 82), -INT8_C( 87), -INT8_C( 94), INT8_C( 23), -INT8_C( 86), -INT8_C( 17), INT8_C( 54) }, { INT8_C( 110), -INT8_C( 110), -INT8_C( 67), -INT8_C( 108), -INT8_C( 76), INT8_C( 8), -INT8_C( 107), INT8_C( 65) }, { INT16_C( 11395), -INT16_C( 29568), INT16_C( 1829), INT16_C( 5231), INT16_C( 14614), INT16_C( 27605), INT16_C( 23873), INT16_C( 14047) } }, { { -INT16_C( 9716), INT16_C( 27819), INT16_C( 29386), -INT16_C( 19368), -INT16_C( 8783), -INT16_C( 10461), INT16_C( 19508), -INT16_C( 28416) }, { -INT8_C( 5), -INT8_C( 86), INT8_C( 52), INT8_C( 18), INT8_C( 84), INT8_C( 35), INT8_C( 72), -INT8_C( 62) }, { -INT8_C( 75), INT8_C( 5), INT8_C( 86), INT8_C( 106), INT8_C( 13), -INT8_C( 20), -INT8_C( 85), INT8_C( 25) }, { -INT16_C( 9341), INT16_C( 27389), -INT16_C( 31678), -INT16_C( 17460), -INT16_C( 7691), -INT16_C( 11161), INT16_C( 13388), -INT16_C( 29966) } }, { { INT16_C( 22214), -INT16_C( 28539), -INT16_C( 8503), INT16_C( 31300), INT16_C( 26555), -INT16_C( 4270), INT16_C( 21172), -INT16_C( 20607) }, { -INT8_C( 4), -INT8_C( 75), -INT8_C( 63), INT8_C( 81), -INT8_C( 40), INT8_C( 9), INT8_C( 19), -INT8_C( 114) }, { INT8_C( 14), INT8_C( 106), -INT8_C( 8), INT8_C( 28), INT8_C( 86), -INT8_C( 93), INT8_C( 53), INT8_C( 28) }, { INT16_C( 22158), INT16_C( 29047), -INT16_C( 7999), -INT16_C( 31968), INT16_C( 23115), -INT16_C( 5107), INT16_C( 22179), -INT16_C( 23799) } }, { { -INT16_C( 17414), -INT16_C( 15444), -INT16_C( 3943), INT16_C( 21565), -INT16_C( 28840), INT16_C( 3140), -INT16_C( 14878), -INT16_C( 8517) }, { INT8_C( 122), INT8_C( 124), INT8_C( 47), INT8_C( 82), -INT8_C( 123), INT8_C( 67), -INT8_C( 32), -INT8_C( 109) }, { -INT8_C( 83), -INT8_C( 40), -INT8_C( 81), INT8_C( 3), INT8_C( 124), -INT8_C( 27), INT8_C( 31), INT8_C( 118) }, { -INT16_C( 27540), -INT16_C( 20404), -INT16_C( 7750), INT16_C( 21811), INT16_C( 21444), INT16_C( 1331), -INT16_C( 15870), -INT16_C( 21379) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t c = simde_vld1_s8(test_vec[i].c); simde_int16x8_t r = simde_vmlal_s8(a, b, c); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); simde_int8x8_t c = simde_test_arm_neon_random_i8x8(); simde_int16x8_t r = simde_vmlal_s8(a, b, c); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlal_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int16_t b[4]; int16_t c[4]; int32_t r[4]; } test_vec[] = { { { -INT32_C( 1576146538), -INT32_C( 379749091), -INT32_C( 1025404855), -INT32_C( 1031842427) }, { INT16_C( 1573), -INT16_C( 2909), -INT16_C( 20161), INT16_C( 24305) }, { -INT16_C( 7737), INT16_C( 12514), -INT16_C( 21357), INT16_C( 10595) }, { -INT32_C( 1588316839), -INT32_C( 416152317), -INT32_C( 594826378), -INT32_C( 774330952) } }, { { -INT32_C( 1295290219), INT32_C( 932981230), INT32_C( 1056603577), -INT32_C( 134121005) }, { -INT16_C( 23425), -INT16_C( 16404), -INT16_C( 8619), INT16_C( 7197) }, { -INT16_C( 65), INT16_C( 21069), -INT16_C( 20308), INT16_C( 16763) }, { -INT32_C( 1293767594), INT32_C( 587365354), INT32_C( 1231638229), -INT32_C( 13477694) } }, { { INT32_C( 250890016), INT32_C( 692490352), -INT32_C( 530038771), INT32_C( 970549689) }, { -INT16_C( 15091), INT16_C( 25336), INT16_C( 5539), INT16_C( 25214) }, { -INT16_C( 13547), -INT16_C( 15948), INT16_C( 12411), -INT16_C( 25598) }, { INT32_C( 455327793), INT32_C( 288431824), -INT32_C( 461294242), INT32_C( 325121717) } }, { { -INT32_C( 408226185), -INT32_C( 1810829178), -INT32_C( 361465808), -INT32_C( 299676191) }, { INT16_C( 6931), -INT16_C( 18864), -INT16_C( 12496), INT16_C( 17689) }, { -INT16_C( 12902), INT16_C( 5638), INT16_C( 2557), INT16_C( 29874) }, { -INT32_C( 497649947), -INT32_C( 1917184410), -INT32_C( 393418080), INT32_C( 228764995) } }, { { -INT32_C( 2040832769), INT32_C( 2098883661), -INT32_C( 966291740), -INT32_C( 273380644) }, { INT16_C( 1445), -INT16_C( 10587), -INT16_C( 16684), INT16_C( 28187) }, { INT16_C( 8844), -INT16_C( 30332), INT16_C( 13867), INT16_C( 11006) }, { -INT32_C( 2028053189), -INT32_C( 1874958751), -INT32_C( 1197648768), INT32_C( 36845478) } }, { { -INT32_C( 525313645), -INT32_C( 1436693819), INT32_C( 896583001), -INT32_C( 182180785) }, { -INT16_C( 14039), -INT16_C( 565), -INT16_C( 6520), INT16_C( 5228) }, { -INT16_C( 4088), INT16_C( 13213), -INT16_C( 25817), -INT16_C( 17826) }, { -INT32_C( 467922213), -INT32_C( 1444159164), INT32_C( 1064909841), -INT32_C( 275375113) } }, { { -INT32_C( 1164308747), INT32_C( 845477849), INT32_C( 208131260), INT32_C( 570526713) }, { -INT16_C( 13228), -INT16_C( 9184), -INT16_C( 29518), -INT16_C( 17424) }, { -INT16_C( 29060), -INT16_C( 23570), INT16_C( 19497), INT16_C( 7773) }, { -INT32_C( 779903067), INT32_C( 1061944729), -INT32_C( 367381186), INT32_C( 435089961) } }, { { INT32_C( 886699867), -INT32_C( 1419362833), INT32_C( 196594962), -INT32_C( 1406289832) }, { INT16_C( 19844), INT16_C( 14217), INT16_C( 31193), INT16_C( 22258) }, { -INT16_C( 8185), INT16_C( 12793), INT16_C( 22317), -INT16_C( 30641) }, { INT32_C( 724276727), -INT32_C( 1237484752), INT32_C( 892729143), -INT32_C( 2088297210) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t c = simde_vld1_s16(test_vec[i].c); simde_int32x4_t r = simde_vmlal_s16(a, b, c); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); simde_int16x4_t c = simde_test_arm_neon_random_i16x4(); simde_int32x4_t r = simde_vmlal_s16(a, b, c); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlal_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; int32_t b[2]; int32_t c[2]; int64_t r[2]; } test_vec[] = { { { -INT64_C( 2979872316173994295), -INT64_C( 1240883300939221293) }, { INT32_C( 1264523082), -INT32_C( 1345297013) }, { -INT32_C( 300170767), INT32_C( 828293992) }, { -INT64_C( 3359445179587138189), -INT64_C( 2355184734262667189) } }, { { -INT64_C( 344116179695056007), -INT64_C( 3436780301193115862) }, { -INT32_C( 1138971087), INT32_C( 74181651) }, { INT32_C( 384993198), -INT32_C( 1018736566) }, { -INT64_C( 782612300908722233), -INT64_C( 3512351861593066328) } }, { { -INT64_C( 5386480482346723813), -INT64_C( 2396648732816469401) }, { INT32_C( 630905106), INT32_C( 1948845766) }, { -INT32_C( 678814835), -INT32_C( 2019896980) }, { -INT64_C( 5814748227776771323), -INT64_C( 6333116410045656081) } }, { { -INT64_C( 8708260595498237721), -INT64_C( 5609177671864664931) }, { -INT32_C( 774389237), INT32_C( 1430585544) }, { -INT32_C( 2027106533), -INT32_C( 2029009248) }, { -INT64_C( 7138491114090652400), -INT64_C( 8511848970695775843) } }, { { -INT64_C( 8993934859609449849), -INT64_C( 2184819944803425445) }, { -INT32_C( 2135784265), -INT32_C( 1596589948) }, { INT32_C( 1747387079), INT32_C( 1341077192) }, { INT64_C( 5720767385907589832), -INT64_C( 4325970309042691461) } }, { { INT64_C( 7167802101881890833), -INT64_C( 900706475257674039) }, { INT32_C( 359871121), -INT32_C( 223000534) }, { INT32_C( 324721994), INT32_C( 610421011) }, { INT64_C( 7284660169876026107), -INT64_C( 1036830686675493913) } }, { { INT64_C( 5304559210268940594), -INT64_C( 7347226928829891365) }, { -INT32_C( 978355046), INT32_C( 263677381) }, { INT32_C( 1428296002), -INT32_C( 1938127526) }, { INT64_C( 3907178609530614502), -INT64_C( 7858267318929580771) } }, { { INT64_C( 335346826108900830), -INT64_C( 7997507868240933024) }, { INT32_C( 1716958113), INT32_C( 1517620504) }, { INT32_C( 2041550878), -INT32_C( 83547875) }, { INT64_C( 3840604169193274044), -INT64_C( 8124301836406562024) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t c = simde_vld1_s32(test_vec[i].c); simde_int64x2_t r = simde_vmlal_s32(a, b, c); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); simde_int32x2_t c = simde_test_arm_neon_random_i32x2(); simde_int64x2_t r = simde_vmlal_s32(a, b, c); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlal_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint8_t b[8]; uint8_t c[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(47449), UINT16_C(30639), UINT16_C(18307), UINT16_C(14469), UINT16_C(51034), UINT16_C(24048), UINT16_C(31662), UINT16_C(48756) }, { UINT8_C(161), UINT8_C(211), UINT8_C(242), UINT8_C(246), UINT8_C(142), UINT8_C( 12), UINT8_C( 6), UINT8_C( 91) }, { UINT8_C( 89), UINT8_C(125), UINT8_C(196), UINT8_C(231), UINT8_C(202), UINT8_C(241), UINT8_C(182), UINT8_C( 36) }, { UINT16_C(61778), UINT16_C(57014), UINT16_C( 203), UINT16_C( 5759), UINT16_C(14182), UINT16_C(26940), UINT16_C(32754), UINT16_C(52032) } }, { { UINT16_C(26282), UINT16_C(11931), UINT16_C( 8365), UINT16_C( 2150), UINT16_C(22248), UINT16_C(38501), UINT16_C(55761), UINT16_C(29269) }, { UINT8_C(173), UINT8_C( 71), UINT8_C(105), UINT8_C( 59), UINT8_C( 83), UINT8_C(111), UINT8_C(151), UINT8_C(172) }, { UINT8_C(236), UINT8_C( 91), UINT8_C(148), UINT8_C(183), UINT8_C( 76), UINT8_C( 74), UINT8_C(219), UINT8_C(246) }, { UINT16_C( 1574), UINT16_C(18392), UINT16_C(23905), UINT16_C(12947), UINT16_C(28556), UINT16_C(46715), UINT16_C(23294), UINT16_C( 6045) } }, { { UINT16_C(30384), UINT16_C(24100), UINT16_C(35478), UINT16_C(32358), UINT16_C(52192), UINT16_C(45589), UINT16_C(27300), UINT16_C(20772) }, { UINT8_C(177), UINT8_C(141), UINT8_C(141), UINT8_C( 4), UINT8_C(253), UINT8_C( 36), UINT8_C(177), UINT8_C(233) }, { UINT8_C(127), UINT8_C( 69), UINT8_C(160), UINT8_C(203), UINT8_C(143), UINT8_C(123), UINT8_C(193), UINT8_C( 64) }, { UINT16_C(52863), UINT16_C(33829), UINT16_C(58038), UINT16_C(33170), UINT16_C(22835), UINT16_C(50017), UINT16_C(61461), UINT16_C(35684) } }, { { UINT16_C(59121), UINT16_C(34974), UINT16_C( 1136), UINT16_C(20742), UINT16_C( 7119), UINT16_C(29443), UINT16_C(10117), UINT16_C(14021) }, { UINT8_C(181), UINT8_C( 82), UINT8_C( 59), UINT8_C(178), UINT8_C(118), UINT8_C(236), UINT8_C(155), UINT8_C(245) }, { UINT8_C( 49), UINT8_C( 60), UINT8_C(192), UINT8_C(192), UINT8_C(183), UINT8_C(129), UINT8_C( 0), UINT8_C(169) }, { UINT16_C( 2454), UINT16_C(39894), UINT16_C(12464), UINT16_C(54918), UINT16_C(28713), UINT16_C(59887), UINT16_C(10117), UINT16_C(55426) } }, { { UINT16_C(40551), UINT16_C(55345), UINT16_C(14242), UINT16_C(28969), UINT16_C(11347), UINT16_C(55525), UINT16_C(43603), UINT16_C( 2063) }, { UINT8_C(252), UINT8_C( 74), UINT8_C(186), UINT8_C(114), UINT8_C( 54), UINT8_C( 86), UINT8_C(103), UINT8_C(103) }, { UINT8_C(146), UINT8_C( 39), UINT8_C( 39), UINT8_C( 73), UINT8_C(168), UINT8_C( 40), UINT8_C(242), UINT8_C( 16) }, { UINT16_C(11807), UINT16_C(58231), UINT16_C(21496), UINT16_C(37291), UINT16_C(20419), UINT16_C(58965), UINT16_C( 2993), UINT16_C( 3711) } }, { { UINT16_C( 9158), UINT16_C(27112), UINT16_C( 4443), UINT16_C(44762), UINT16_C(48957), UINT16_C(36998), UINT16_C(38249), UINT16_C(26009) }, { UINT8_C(223), UINT8_C( 83), UINT8_C(215), UINT8_C( 21), UINT8_C(169), UINT8_C( 62), UINT8_C(124), UINT8_C( 59) }, { UINT8_C(101), UINT8_C(164), UINT8_C(133), UINT8_C( 14), UINT8_C(204), UINT8_C(119), UINT8_C( 30), UINT8_C(146) }, { UINT16_C(31681), UINT16_C(40724), UINT16_C(33038), UINT16_C(45056), UINT16_C(17897), UINT16_C(44376), UINT16_C(41969), UINT16_C(34623) } }, { { UINT16_C( 1691), UINT16_C(63227), UINT16_C(54807), UINT16_C(21668), UINT16_C(10901), UINT16_C(65508), UINT16_C(32192), UINT16_C(40804) }, { UINT8_C(209), UINT8_C( 60), UINT8_C(181), UINT8_C(122), UINT8_C(122), UINT8_C( 49), UINT8_C(182), UINT8_C(224) }, { UINT8_C(213), UINT8_C( 59), UINT8_C(238), UINT8_C(161), UINT8_C(178), UINT8_C( 12), UINT8_C( 52), UINT8_C( 77) }, { UINT16_C(46208), UINT16_C( 1231), UINT16_C(32349), UINT16_C(41310), UINT16_C(32617), UINT16_C( 560), UINT16_C(41656), UINT16_C(58052) } }, { { UINT16_C(12050), UINT16_C(10563), UINT16_C(59141), UINT16_C(39805), UINT16_C(24850), UINT16_C(53914), UINT16_C(65247), UINT16_C(45169) }, { UINT8_C( 58), UINT8_C( 38), UINT8_C( 42), UINT8_C(181), UINT8_C( 88), UINT8_C(224), UINT8_C(149), UINT8_C( 45) }, { UINT8_C( 27), UINT8_C(131), UINT8_C(207), UINT8_C(206), UINT8_C(143), UINT8_C( 3), UINT8_C( 27), UINT8_C(161) }, { UINT16_C(13616), UINT16_C(15541), UINT16_C( 2299), UINT16_C(11555), UINT16_C(37434), UINT16_C(54586), UINT16_C( 3734), UINT16_C(52414) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t c = simde_vld1_u8(test_vec[i].c); simde_uint16x8_t r = simde_vmlal_u8(a, b, c); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t c = simde_test_arm_neon_random_u8x8(); simde_uint16x8_t r = simde_vmlal_u8(a, b, c); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlal_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint16_t b[4]; uint16_t c[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(3154437210), UINT32_C(3449386933), UINT32_C( 981946950), UINT32_C(3734704900) }, { UINT16_C(23951), UINT16_C(53783), UINT16_C( 4408), UINT16_C(46605) }, { UINT16_C( 876), UINT16_C(48085), UINT16_C(47956), UINT16_C(44730) }, { UINT32_C(3175418286), UINT32_C(1740575192), UINT32_C(1193336998), UINT32_C(1524379254) } }, { { UINT32_C(1416347295), UINT32_C(2082538549), UINT32_C(1521920086), UINT32_C(1245270459) }, { UINT16_C(20654), UINT16_C(59164), UINT16_C(10850), UINT16_C(52893) }, { UINT16_C(29229), UINT16_C(33161), UINT16_C(17197), UINT16_C(52527) }, { UINT32_C(2020043061), UINT32_C(4044475953), UINT32_C(1708507536), UINT32_C(4023581070) } }, { { UINT32_C( 924948993), UINT32_C(4122166174), UINT32_C(2806999531), UINT32_C(1777436858) }, { UINT16_C( 3801), UINT16_C(15184), UINT16_C(60728), UINT16_C(25865) }, { UINT16_C(37727), UINT16_C(36326), UINT16_C( 5590), UINT16_C(55386) }, { UINT32_C(1068349320), UINT32_C( 378772862), UINT32_C(3146469051), UINT32_C(3209995748) } }, { { UINT32_C(1309637552), UINT32_C(2856567486), UINT32_C(3847328555), UINT32_C(4098769435) }, { UINT16_C(40528), UINT16_C(34863), UINT16_C(14731), UINT16_C(60397) }, { UINT16_C(54220), UINT16_C(41592), UINT16_C(53993), UINT16_C(39290) }, { UINT32_C(3507065712), UINT32_C( 11622086), UINT32_C( 347732142), UINT32_C(2176800269) } }, { { UINT32_C( 216500557), UINT32_C(1991650123), UINT32_C(3646687166), UINT32_C(2597235273) }, { UINT16_C(64841), UINT16_C(54306), UINT16_C( 4150), UINT16_C( 703) }, { UINT16_C(14307), UINT16_C(52389), UINT16_C( 7945), UINT16_C(22373) }, { UINT32_C(1144180744), UINT32_C( 541719861), UINT32_C(3679658916), UINT32_C(2612963492) } }, { { UINT32_C(4100148649), UINT32_C( 912988536), UINT32_C(1762641696), UINT32_C(3120815473) }, { UINT16_C( 9947), UINT16_C( 4495), UINT16_C(20022), UINT16_C( 6420) }, { UINT16_C(47494), UINT16_C(36838), UINT16_C(19416), UINT16_C(33254) }, { UINT32_C( 277604171), UINT32_C(1078575346), UINT32_C(2151388848), UINT32_C(3334306153) } }, { { UINT32_C( 276187544), UINT32_C(2185683298), UINT32_C( 434919080), UINT32_C( 248835891) }, { UINT16_C(25365), UINT16_C(19232), UINT16_C(13489), UINT16_C(14181) }, { UINT16_C(19437), UINT16_C(50631), UINT16_C(44438), UINT16_C(12103) }, { UINT32_C( 769207049), UINT32_C(3159418690), UINT32_C(1034343262), UINT32_C( 420468534) } }, { { UINT32_C(1497349623), UINT32_C(1188857502), UINT32_C( 257935580), UINT32_C(3441308599) }, { UINT16_C(16022), UINT16_C(18456), UINT16_C(32114), UINT16_C(24447) }, { UINT16_C(18120), UINT16_C(24356), UINT16_C(27636), UINT16_C(60302) }, { UINT32_C(1787668263), UINT32_C(1638371838), UINT32_C(1145438084), UINT32_C( 620544297) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t c = simde_vld1_u16(test_vec[i].c); simde_uint32x4_t r = simde_vmlal_u16(a, b, c); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t c = simde_test_arm_neon_random_u16x4(); simde_uint32x4_t r = simde_vmlal_u16(a, b, c); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlal_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[2]; uint32_t b[2]; uint32_t c[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C(10190224453633740445), UINT64_C( 8831462168739048581) }, { UINT32_C(3579245118), UINT32_C( 26948603) }, { UINT32_C( 186872846), UINT32_C(2037143771) }, { UINT64_C(10859088175366006273), UINT64_C( 8886360347477650494) } }, { { UINT64_C( 4294859221972665063), UINT64_C( 9068824954654529363) }, { UINT32_C(3713151458), UINT32_C(1927212388) }, { UINT32_C(1031668066), UINT32_C(1085729369) }, { UINT64_C( 8125599005412605291), UINT64_C(11161256044606752535) } }, { { UINT64_C( 7970781020315038636), UINT64_C(15005054901360262783) }, { UINT32_C(1068338906), UINT32_C(3736177532) }, { UINT32_C(3843764108), UINT32_C(3324432921) }, { UINT64_C(12077223762377824484), UINT64_C( 8978982413732042139) } }, { { UINT64_C(15712994622238651269), UINT64_C( 7098188166977871555) }, { UINT32_C(3751882339), UINT32_C(1170035385) }, { UINT32_C(2603276418), UINT32_C( 811684010) }, { UINT64_C( 7033437364758481355), UINT64_C( 8047887180116565405) } }, { { UINT64_C( 1295311894479931844), UINT64_C(14530656829270299069) }, { UINT32_C( 715671665), UINT32_C( 477062554) }, { UINT32_C(3904412221), UINT32_C(2920815082) }, { UINT64_C( 4089589089529349809), UINT64_C(15924068332050938497) } }, { { UINT64_C( 9584722998327109795), UINT64_C( 8829149881152750936) }, { UINT32_C(1420111801), UINT32_C(3513783444) }, { UINT32_C(2579048622), UINT32_C(3846689090) }, { UINT64_C(13247260381782098017), UINT64_C( 3898838246100625280) } }, { { UINT64_C( 3677147157587432890), UINT64_C(18240273672796492447) }, { UINT32_C( 995215271), UINT32_C(2316157404) }, { UINT32_C( 740542186), UINT32_C(1376873368) }, { UINT64_C( 4414146049914355296), UINT64_C( 2982585044750557503) } }, { { UINT64_C( 6958021572826556288), UINT64_C(11596253343504695469) }, { UINT32_C(3470540786), UINT32_C(3931695360) }, { UINT32_C(1192656047), UINT32_C(1738090471) }, { UINT64_C(11097183027609589230), UINT64_C(18429895583595610029) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t c = simde_vld1_u32(test_vec[i].c); simde_uint64x2_t r = simde_vmlal_u32(a, b, c); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t c = simde_test_arm_neon_random_u32x2(); simde_uint64x2_t r = simde_vmlal_u32(a, b, c); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vmlal_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vmlal_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmlal_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmlal_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vmlal_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmlal_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/mlal_high.c000066400000000000000000001026341400333146700174150ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN mlal_high #include "test-neon.h" #include "../../../simde/arm/neon/mlal_high.h" static int test_simde_vmlal_high_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int8_t b[16]; int8_t c[16]; int16_t r[8]; } test_vec[] = { { { INT16_C( 3820), INT16_C( 12612), INT16_C( 12948), INT16_C( 16793), INT16_C( 15561), -INT16_C( 25824), INT16_C( 27015), INT16_C( 5311) }, { INT8_C( 40), -INT8_C( 122), -INT8_C( 54), -INT8_C( 97), INT8_C( 0), INT8_C( 110), -INT8_C( 59), -INT8_C( 126), -INT8_C( 26), INT8_C( 43), -INT8_C( 37), -INT8_C( 65), -INT8_C( 9), INT8_C( 46), -INT8_C( 80), -INT8_C( 29) }, { INT8_C( 60), -INT8_C( 12), INT8_C( 20), -INT8_C( 47), INT8_C( 38), -INT8_C( 82), INT8_C( 18), -INT8_C( 17), -INT8_C( 22), INT8_C( 51), -INT8_C( 118), INT8_C( 113), -INT8_C( 100), INT8_C( 73), -INT8_C( 123), -INT8_C( 60) }, { INT16_C( 4392), INT16_C( 14805), INT16_C( 17314), INT16_C( 9448), INT16_C( 16461), -INT16_C( 22466), -INT16_C( 28681), INT16_C( 7051) } }, { { INT16_C( 20431), -INT16_C( 12444), INT16_C( 10685), -INT16_C( 23470), INT16_C( 11604), INT16_C( 19555), INT16_C( 4955), -INT16_C( 26577) }, { INT8_C( 7), INT8_C( 68), INT8_C( 105), INT8_C( 45), -INT8_C( 14), INT8_C( 123), INT8_C( 29), -INT8_C( 36), -INT8_C( 82), -INT8_C( 89), INT8_C( 77), INT8_C( 74), -INT8_C( 15), -INT8_C( 46), INT8_C( 15), -INT8_C( 64) }, { INT8_C( 34), INT8_C( 115), -INT8_C( 112), -INT8_C( 33), -INT8_C( 100), -INT8_C( 30), -INT8_C( 125), -INT8_C( 15), INT8_C( 15), -INT8_C( 26), INT8_C( 61), INT8_C( 107), -INT8_C( 7), INT8_C( 108), INT8_C( 3), INT8_C( 0) }, { INT16_C( 19201), -INT16_C( 10130), INT16_C( 15382), -INT16_C( 15552), INT16_C( 11709), INT16_C( 14587), INT16_C( 5000), -INT16_C( 26577) } }, { { INT16_C( 27824), -INT16_C( 24018), INT16_C( 19431), -INT16_C( 27010), -INT16_C( 13326), -INT16_C( 7200), -INT16_C( 4194), -INT16_C( 16220) }, { INT8_C( 98), INT8_C( 52), -INT8_C( 97), -INT8_C( 1), INT8_C( 22), INT8_C( 35), -INT8_C( 16), INT8_C( 37), INT8_C( 9), INT8_C( 45), -INT8_C( 112), INT8_C( 3), -INT8_C( 103), -INT8_C( 109), INT8_C( 3), INT8_C( 74) }, { -INT8_C( 1), INT8_C( 49), -INT8_C( 20), -INT8_C( 25), INT8_C( 124), INT8_C( 107), INT8_C( 125), INT8_C( 111), INT8_C( 54), INT8_C( 93), INT8_C( 82), -INT8_C( 44), INT8_C( 77), -INT8_C( 10), -INT8_C( 108), -INT8_C( 81) }, { INT16_C( 28310), -INT16_C( 19833), INT16_C( 10247), -INT16_C( 27142), -INT16_C( 21257), -INT16_C( 6110), -INT16_C( 4518), -INT16_C( 22214) } }, { { INT16_C( 13354), INT16_C( 16558), -INT16_C( 25001), INT16_C( 24678), -INT16_C( 2357), INT16_C( 25955), INT16_C( 26506), -INT16_C( 30289) }, { -INT8_C( 104), -INT8_C( 101), INT8_C( 112), INT8_C( 21), INT8_C( 6), -INT8_C( 19), -INT8_C( 124), INT8_C( 61), INT8_C( 75), -INT8_C( 42), INT8_C( 17), -INT8_C( 104), -INT8_C( 51), -INT8_C( 90), INT8_C( 71), -INT8_C( 9) }, { -INT8_C( 38), -INT8_C( 10), INT8_C( 56), INT8_C( 49), -INT8_C( 108), -INT8_C( 98), -INT8_C( 111), INT8_C( 96), -INT8_C( 108), -INT8_C( 11), -INT8_C( 59), INT8_C( 30), INT8_C( 92), INT8_C( 116), -INT8_C( 88), -INT8_C( 12) }, { INT16_C( 5254), INT16_C( 17020), -INT16_C( 26004), INT16_C( 21558), -INT16_C( 7049), INT16_C( 15515), INT16_C( 20258), -INT16_C( 30181) } }, { { INT16_C( 6159), INT16_C( 5641), -INT16_C( 29434), INT16_C( 20819), INT16_C( 25700), INT16_C( 12777), INT16_C( 12298), -INT16_C( 7128) }, { INT8_C( 38), INT8_C( 96), INT8_C( 21), -INT8_C( 69), -INT8_C( 2), -INT8_C( 89), INT8_C( 27), -INT8_C( 109), -INT8_C( 100), -INT8_C( 32), -INT8_C( 79), -INT8_C( 8), INT8_C( 84), INT8_C( 89), -INT8_C( 20), INT8_C( 99) }, { INT8_C( 114), -INT8_C( 10), INT8_C( 121), INT8_C( 120), -INT8_C( 125), -INT8_C( 52), -INT8_C( 55), -INT8_C( 25), INT8_C( 49), -INT8_C( 78), INT8_C( 24), INT8_C( 59), -INT8_C( 30), INT8_C( 65), INT8_C( 32), INT8_C( 9) }, { INT16_C( 1259), INT16_C( 8137), -INT16_C( 31330), INT16_C( 20347), INT16_C( 23180), INT16_C( 18562), INT16_C( 11658), -INT16_C( 6237) } }, { { INT16_C( 13729), -INT16_C( 24380), -INT16_C( 8228), INT16_C( 30771), -INT16_C( 6977), INT16_C( 4976), INT16_C( 23870), -INT16_C( 20362) }, { INT8_C( 83), -INT8_C( 16), INT8_C( 40), -INT8_C( 42), -INT8_C( 68), -INT8_C( 15), -INT8_C( 66), -INT8_C( 19), -INT8_C( 93), -INT8_C( 42), INT8_C( 41), -INT8_C( 123), INT8_C( 23), INT8_C( 73), -INT8_C( 114), -INT8_C( 71) }, { INT8_C( 126), INT8_C( 82), INT8_C( 89), INT8_C( 91), INT8_C( 49), -INT8_C( 116), -INT8_C( 45), -INT8_C( 16), INT8_C( 112), INT8_C( 68), INT8_C( 3), -INT8_C( 82), -INT8_C( 95), INT8_C( 122), INT8_C( 94), -INT8_C( 12) }, { INT16_C( 3313), -INT16_C( 27236), -INT16_C( 8105), -INT16_C( 24679), -INT16_C( 9162), INT16_C( 13882), INT16_C( 13154), -INT16_C( 19510) } }, { { -INT16_C( 31126), INT16_C( 9930), -INT16_C( 30601), INT16_C( 6676), INT16_C( 15711), INT16_C( 30368), INT16_C( 11910), INT16_C( 1071) }, { -INT8_C( 127), -INT8_C( 120), INT8_C( 95), -INT8_C( 78), INT8_C( 20), INT8_C( 51), -INT8_C( 93), -INT8_C( 123), INT8_C( 119), -INT8_C( 90), INT8_C( 51), INT8_C( 24), INT8_C( 32), -INT8_C( 110), INT8_C( 12), -INT8_C( 118) }, { INT8_C( 24), -INT8_C( 42), -INT8_C( 79), -INT8_C( 112), INT8_C( 95), -INT8_C( 59), -INT8_C( 86), -INT8_C( 66), INT8_C( 2), INT8_C( 74), INT8_C( 52), -INT8_C( 120), INT8_C( 121), INT8_C( 100), -INT8_C( 116), -INT8_C( 6) }, { -INT16_C( 30888), INT16_C( 3270), -INT16_C( 27949), INT16_C( 3796), INT16_C( 19583), INT16_C( 19368), INT16_C( 10518), INT16_C( 1779) } }, { { -INT16_C( 4884), INT16_C( 428), INT16_C( 20255), -INT16_C( 27002), -INT16_C( 17930), INT16_C( 5806), -INT16_C( 17845), INT16_C( 25761) }, { -INT8_C( 112), INT8_C( 82), -INT8_C( 12), -INT8_C( 17), INT8_C( 23), -INT8_C( 98), -INT8_C( 83), INT8_C( 25), -INT8_C( 23), -INT8_C( 30), -INT8_C( 95), INT8_C( 98), INT8_C( 70), INT8_C( 45), INT8_C( 92), INT8_C( 50) }, { INT8_C( 25), INT8_C( 8), INT8_C( 51), INT8_C( 56), INT8_C( 88), -INT8_C( 71), -INT8_C( 50), INT8_C( 78), INT8_C( 115), INT8_C( 124), INT8_C( 100), -INT8_C( 66), INT8_C( 54), INT8_C( 5), INT8_C( 34), -INT8_C( 57) }, { -INT16_C( 7529), -INT16_C( 3292), INT16_C( 10755), INT16_C( 32066), -INT16_C( 14150), INT16_C( 6031), -INT16_C( 14717), INT16_C( 22911) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t c = simde_vld1q_s8(test_vec[i].c); simde_int16x8_t r = simde_vmlal_high_s8(a, b, c); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); simde_int8x16_t c = simde_test_arm_neon_random_i8x16(); simde_int16x8_t r = simde_vmlal_high_s8(a, b, c); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlal_high_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int16_t b[8]; int16_t c[8]; int32_t r[4]; } test_vec[] = { { { INT32_C( 1115600880), -INT32_C( 1829145922), INT32_C( 182690733), -INT32_C( 1887574040) }, { INT16_C( 27742), -INT16_C( 20465), INT16_C( 27015), INT16_C( 8158), -INT16_C( 12479), INT16_C( 32288), -INT16_C( 7148), INT16_C( 1054) }, { -INT16_C( 25189), INT16_C( 23110), INT16_C( 16399), -INT16_C( 17172), -INT16_C( 12317), -INT16_C( 13370), INT16_C( 17335), INT16_C( 5466) }, { INT32_C( 1269304723), INT32_C( 2034130814), INT32_C( 58780153), -INT32_C( 1881812876) } }, { { INT32_C( 918907567), INT32_C( 357934291), -INT32_C( 2020379277), -INT32_C( 175394470) }, { -INT16_C( 11698), INT16_C( 23887), INT16_C( 15378), -INT16_C( 2791), -INT16_C( 8437), -INT16_C( 15680), INT16_C( 6946), -INT16_C( 11561) }, { -INT16_C( 25211), INT16_C( 22536), INT16_C( 24129), -INT16_C( 19347), INT16_C( 211), INT16_C( 11579), -INT16_C( 14670), INT16_C( 35) }, { INT32_C( 917127360), INT32_C( 176375571), -INT32_C( 2122277097), -INT32_C( 175799105) } }, { { -INT32_C( 1436650856), -INT32_C( 1163954258), INT32_C( 2038194263), INT32_C( 4936827) }, { INT16_C( 21745), INT16_C( 12888), -INT16_C( 14670), -INT16_C( 31258), INT16_C( 8646), INT16_C( 30899), -INT16_C( 10521), -INT16_C( 32647) }, { -INT16_C( 10424), -INT16_C( 2262), -INT16_C( 13746), -INT16_C( 23119), INT16_C( 11562), -INT16_C( 23265), INT16_C( 27265), INT16_C( 29349) }, { -INT32_C( 1336685804), -INT32_C( 1882819493), INT32_C( 1751339198), -INT32_C( 953219976) } }, { { INT32_C( 1889861054), -INT32_C( 1963554109), -INT32_C( 1828542037), -INT32_C( 955024513) }, { INT16_C( 15698), -INT16_C( 24130), INT16_C( 28423), INT16_C( 12614), INT16_C( 26013), INT16_C( 7894), INT16_C( 31696), -INT16_C( 29039) }, { INT16_C( 13689), INT16_C( 15615), -INT16_C( 2624), INT16_C( 27590), -INT16_C( 13922), INT16_C( 7678), INT16_C( 4420), -INT16_C( 26652) }, { INT32_C( 1527708068), -INT32_C( 1902943977), -INT32_C( 1688445717), -INT32_C( 181077085) } }, { { INT32_C( 1446552399), -INT32_C( 1350009326), -INT32_C( 1261543708), INT32_C( 1396858842) }, { INT16_C( 16788), INT16_C( 21647), INT16_C( 22070), -INT16_C( 11072), -INT16_C( 16865), INT16_C( 25585), -INT16_C( 10544), INT16_C( 8186) }, { INT16_C( 12921), -INT16_C( 29835), -INT16_C( 591), -INT16_C( 27333), INT16_C( 2396), INT16_C( 13897), -INT16_C( 29848), -INT16_C( 887) }, { INT32_C( 1406143859), -INT32_C( 994454581), -INT32_C( 946826396), INT32_C( 1389597860) } }, { { INT32_C( 55646413), -INT32_C( 1915219602), -INT32_C( 1611544113), INT32_C( 415165343) }, { INT16_C( 13342), -INT16_C( 12380), -INT16_C( 8399), -INT16_C( 29340), -INT16_C( 21016), INT16_C( 20675), INT16_C( 19512), INT16_C( 1356) }, { -INT16_C( 25243), -INT16_C( 11511), -INT16_C( 7762), INT16_C( 32353), INT16_C( 21162), INT16_C( 18973), -INT16_C( 9155), INT16_C( 23394) }, { -INT32_C( 389094179), -INT32_C( 1522952827), -INT32_C( 1790176473), INT32_C( 446887607) } }, { { INT32_C( 1093273104), -INT32_C( 842035483), INT32_C( 1948095035), INT32_C( 1148807903) }, { -INT16_C( 32249), -INT16_C( 18921), INT16_C( 30819), INT16_C( 3636), INT16_C( 20938), INT16_C( 2136), -INT16_C( 17875), INT16_C( 15715) }, { -INT16_C( 28991), -INT16_C( 22913), INT16_C( 19996), INT16_C( 22644), -INT16_C( 28192), -INT16_C( 16436), INT16_C( 17915), INT16_C( 771) }, { INT32_C( 502989008), -INT32_C( 877142779), INT32_C( 1627864410), INT32_C( 1160924168) } }, { { INT32_C( 733551560), INT32_C( 1580854675), INT32_C( 1818661182), INT32_C( 229230924) }, { INT16_C( 10327), INT16_C( 29875), INT16_C( 10102), INT16_C( 22476), -INT16_C( 26439), -INT16_C( 19434), INT16_C( 6877), -INT16_C( 23113) }, { INT16_C( 28725), -INT16_C( 14127), INT16_C( 2653), -INT16_C( 25562), -INT16_C( 29540), -INT16_C( 6136), -INT16_C( 20138), -INT16_C( 21003) }, { INT32_C( 1514559620), INT32_C( 1700101699), INT32_C( 1680172156), INT32_C( 714673263) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t c = simde_vld1q_s16(test_vec[i].c); simde_int32x4_t r = simde_vmlal_high_s16(a, b, c); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); simde_int16x8_t c = simde_test_arm_neon_random_i16x8(); simde_int32x4_t r = simde_vmlal_high_s16(a, b, c); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlal_high_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; int32_t b[4]; int32_t c[4]; int64_t r[2]; } test_vec[] = { { { INT64_C( 8645752205991848051), INT64_C( 1679351349465497540) }, { INT32_C( 1094402736), -INT32_C( 1664121706), -INT32_C( 1376308970), -INT32_C( 1591905747) }, { -INT32_C( 325104102), -INT32_C( 211510737), -INT32_C( 2121246119), INT32_C( 1268309659) }, { -INT64_C( 6881501806560316135), -INT64_C( 339678085672212733) } }, { { INT64_C( 7877741705012499233), -INT64_C( 2928685844663743859) }, { -INT32_C( 1262159227), -INT32_C( 291034988), INT32_C( 493827970), INT32_C( 929564438) }, { INT32_C( 854521306), -INT32_C( 559988143), INT32_C( 1234745996), INT32_C( 1998714097) }, { INT64_C( 8487493813682807353), -INT64_C( 1070752298363261373) } }, { { -INT64_C( 8110469398928824850), -INT64_C( 4515117118497431542) }, { INT32_C( 1525892617), INT32_C( 339252104), INT32_C( 1046401357), -INT32_C( 1279950907) }, { INT32_C( 1899422052), -INT32_C( 1107253324), INT32_C( 1860152455), -INT32_C( 902809920) }, { -INT64_C( 6164003345789943415), -INT64_C( 3359564742544834102) } }, { { INT64_C( 223312692866458492), INT64_C( 6532180949562717998) }, { -INT32_C( 1932796456), INT32_C( 206293892), INT32_C( 930753143), -INT32_C( 587093408) }, { -INT32_C( 2065619250), -INT32_C( 1316423037), INT32_C( 1738852977), -INT32_C( 1698608447) }, { INT64_C( 1841755566424115203), INT64_C( 7529422771569535374) } }, { { -INT64_C( 3550963684817335257), -INT64_C( 6063806773280230757) }, { -INT32_C( 1339049427), INT32_C( 643938229), INT32_C( 1133380993), INT32_C( 2010992464) }, { INT32_C( 857932764), INT32_C( 268622709), -INT32_C( 351533042), INT32_C( 1033299215) }, { -INT64_C( 3949384553031605963), -INT64_C( 3985849838858114997) } }, { { -INT64_C( 109406366209292896), INT64_C( 3826967153714727252) }, { -INT32_C( 1754775774), INT32_C( 698837531), -INT32_C( 2112572302), INT32_C( 968862361) }, { -INT32_C( 326194065), INT32_C( 1357581051), INT32_C( 1827941395), INT32_C( 1839272011) }, { -INT64_C( 3971064726965534186), INT64_C( 5608968576813405223) } }, { { -INT64_C( 1759593037895038384), -INT64_C( 4525510338717046689) }, { -INT32_C( 793853484), -INT32_C( 568288821), INT32_C( 273355973), INT32_C( 1971186725) }, { INT32_C( 1809875446), -INT32_C( 1940753363), INT32_C( 1854257948), -INT32_C( 1221609245) }, { -INT64_C( 1252720552326514980), -INT64_C( 6933530265598319314) } }, { { INT64_C( 4333209917881572729), INT64_C( 5789036862707101116) }, { INT32_C( 2025535051), -INT32_C( 972747350), -INT32_C( 1422619960), -INT32_C( 1151114430) }, { -INT32_C( 1207965119), INT32_C( 1324556946), -INT32_C( 382779249), INT32_C( 1429898506) }, { INT64_C( 4877759317782782769), INT64_C( 4143060059015059536) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t c = simde_vld1q_s32(test_vec[i].c); simde_int64x2_t r = simde_vmlal_high_s32(a, b, c); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); simde_int32x4_t c = simde_test_arm_neon_random_i32x4(); simde_int64x2_t r = simde_vmlal_high_s32(a, b, c); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlal_high_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint8_t b[16]; uint8_t c[16]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(20405), UINT16_C(34902), UINT16_C(42627), UINT16_C(24981), UINT16_C(45797), UINT16_C(26511), UINT16_C( 3016), UINT16_C( 7803) }, { UINT8_C( 1), UINT8_C(119), UINT8_C(178), UINT8_C( 83), UINT8_C(215), UINT8_C(173), UINT8_C(184), UINT8_C(152), UINT8_C(178), UINT8_C( 84), UINT8_C( 83), UINT8_C(138), UINT8_C(172), UINT8_C( 49), UINT8_C( 8), UINT8_C( 98) }, { UINT8_C(129), UINT8_C( 95), UINT8_C(234), UINT8_C( 4), UINT8_C( 5), UINT8_C(127), UINT8_C(101), UINT8_C(234), UINT8_C( 49), UINT8_C(245), UINT8_C( 81), UINT8_C(250), UINT8_C( 0), UINT8_C(204), UINT8_C( 24), UINT8_C( 2) }, { UINT16_C(29127), UINT16_C(55482), UINT16_C(49350), UINT16_C(59481), UINT16_C(45797), UINT16_C(36507), UINT16_C( 3208), UINT16_C( 7999) } }, { { UINT16_C(51779), UINT16_C( 6741), UINT16_C( 3448), UINT16_C(10930), UINT16_C( 1633), UINT16_C( 3764), UINT16_C(48183), UINT16_C(47216) }, { UINT8_C( 27), UINT8_C( 90), UINT8_C(188), UINT8_C( 33), UINT8_C(217), UINT8_C( 34), UINT8_C( 11), UINT8_C( 10), UINT8_C( 23), UINT8_C( 93), UINT8_C( 4), UINT8_C( 23), UINT8_C( 41), UINT8_C( 28), UINT8_C( 25), UINT8_C(109) }, { UINT8_C(231), UINT8_C(110), UINT8_C(135), UINT8_C( 95), UINT8_C(123), UINT8_C( 58), UINT8_C(137), UINT8_C(221), UINT8_C( 64), UINT8_C( 61), UINT8_C(235), UINT8_C(119), UINT8_C(249), UINT8_C( 91), UINT8_C( 48), UINT8_C( 21) }, { UINT16_C(53251), UINT16_C(12414), UINT16_C( 4388), UINT16_C(13667), UINT16_C(11842), UINT16_C( 6312), UINT16_C(49383), UINT16_C(49505) } }, { { UINT16_C(60597), UINT16_C(36406), UINT16_C(16654), UINT16_C( 9624), UINT16_C(40350), UINT16_C(51261), UINT16_C(22201), UINT16_C(41013) }, { UINT8_C(197), UINT8_C(188), UINT8_MAX, UINT8_C( 64), UINT8_C(246), UINT8_C(136), UINT8_C( 29), UINT8_C( 54), UINT8_C(197), UINT8_C( 8), UINT8_C(174), UINT8_C(191), UINT8_C( 99), UINT8_C(222), UINT8_C(212), UINT8_C( 24) }, { UINT8_C(202), UINT8_C( 10), UINT8_C(166), UINT8_C(217), UINT8_C( 75), UINT8_C( 63), UINT8_C(254), UINT8_C(234), UINT8_C(220), UINT8_C( 59), UINT8_C(178), UINT8_C(149), UINT8_C(146), UINT8_C(231), UINT8_C( 54), UINT8_C( 87) }, { UINT16_C(38401), UINT16_C(36878), UINT16_C(47626), UINT16_C(38083), UINT16_C(54804), UINT16_C(37007), UINT16_C(33649), UINT16_C(43101) } }, { { UINT16_C(13731), UINT16_C(39575), UINT16_C(46526), UINT16_C(33744), UINT16_C(32445), UINT16_C( 8514), UINT16_C( 5724), UINT16_C(10041) }, { UINT8_C( 32), UINT8_C(224), UINT8_C( 0), UINT8_C(108), UINT8_C( 31), UINT8_C(254), UINT8_C( 86), UINT8_C(251), UINT8_C( 58), UINT8_C( 8), UINT8_C(144), UINT8_C(204), UINT8_C(239), UINT8_C(198), UINT8_C( 35), UINT8_C(146) }, { UINT8_C(252), UINT8_C(186), UINT8_C( 44), UINT8_C(186), UINT8_C(111), UINT8_C(253), UINT8_C( 61), UINT8_C( 45), UINT8_C(123), UINT8_C(128), UINT8_C( 78), UINT8_C(216), UINT8_C(150), UINT8_C(135), UINT8_MAX, UINT8_C(183) }, { UINT16_C(20865), UINT16_C(40599), UINT16_C(57758), UINT16_C(12272), UINT16_C( 2759), UINT16_C(35244), UINT16_C(14649), UINT16_C(36759) } }, { { UINT16_C(65383), UINT16_C(34339), UINT16_C(31229), UINT16_C(14209), UINT16_C( 4737), UINT16_C(28675), UINT16_C( 9944), UINT16_C(54274) }, { UINT8_C(225), UINT8_C( 47), UINT8_C(142), UINT8_C( 80), UINT8_C( 44), UINT8_C(204), UINT8_C(125), UINT8_C(167), UINT8_C( 76), UINT8_C(203), UINT8_C(127), UINT8_C(226), UINT8_C( 83), UINT8_C(126), UINT8_C(153), UINT8_C(186) }, { UINT8_C(125), UINT8_C(188), UINT8_C( 65), UINT8_C(123), UINT8_C( 53), UINT8_C(194), UINT8_C(178), UINT8_C(182), UINT8_C(212), UINT8_C(182), UINT8_C( 38), UINT8_C(173), UINT8_C(220), UINT8_C( 41), UINT8_C(129), UINT8_C(189) }, { UINT16_C(15959), UINT16_C( 5749), UINT16_C(36055), UINT16_C(53307), UINT16_C(22997), UINT16_C(33841), UINT16_C(29681), UINT16_C(23892) } }, { { UINT16_C( 4184), UINT16_C(33806), UINT16_C(35804), UINT16_C(10283), UINT16_C(43863), UINT16_C(43530), UINT16_C(42025), UINT16_C(42852) }, { UINT8_C( 96), UINT8_C(165), UINT8_C( 34), UINT8_C(150), UINT8_C(104), UINT8_C(212), UINT8_C( 76), UINT8_C( 60), UINT8_C(138), UINT8_C(115), UINT8_C(233), UINT8_C(103), UINT8_C(156), UINT8_C(107), UINT8_C( 36), UINT8_C(244) }, { UINT8_C(123), UINT8_C( 50), UINT8_C(120), UINT8_C( 87), UINT8_C(190), UINT8_C(163), UINT8_C(127), UINT8_C( 21), UINT8_C( 78), UINT8_C(137), UINT8_C(191), UINT8_C(120), UINT8_C( 45), UINT8_C( 35), UINT8_C( 31), UINT8_C(142) }, { UINT16_C(14948), UINT16_C(49561), UINT16_C(14771), UINT16_C(22643), UINT16_C(50883), UINT16_C(47275), UINT16_C(43141), UINT16_C(11964) } }, { { UINT16_C(16841), UINT16_C(12580), UINT16_C(28693), UINT16_C(41069), UINT16_C(22499), UINT16_C(32519), UINT16_C(11202), UINT16_C(15731) }, { UINT8_C( 94), UINT8_C(235), UINT8_C(148), UINT8_C( 28), UINT8_C(143), UINT8_C( 19), UINT8_C( 49), UINT8_C(221), UINT8_C(156), UINT8_C(240), UINT8_C( 85), UINT8_C(202), UINT8_C( 19), UINT8_C(116), UINT8_C( 88), UINT8_C(220) }, { UINT8_C(181), UINT8_C(124), UINT8_C( 13), UINT8_C(203), UINT8_C(236), UINT8_C(123), UINT8_C(107), UINT8_C(208), UINT8_C(210), UINT8_C(114), UINT8_C( 79), UINT8_C(148), UINT8_C(157), UINT8_C(195), UINT8_C(209), UINT8_C(251) }, { UINT16_C(49601), UINT16_C(39940), UINT16_C(35408), UINT16_C( 5429), UINT16_C(25482), UINT16_C(55139), UINT16_C(29594), UINT16_C( 5415) } }, { { UINT16_C(26030), UINT16_C(15639), UINT16_C(18552), UINT16_C( 5147), UINT16_C(28728), UINT16_C(19678), UINT16_C(14053), UINT16_C(39464) }, { UINT8_C(178), UINT8_C( 54), UINT8_C(101), UINT8_C(159), UINT8_C(177), UINT8_C(208), UINT8_C(111), UINT8_C(131), UINT8_C( 66), UINT8_C(190), UINT8_C( 23), UINT8_C(224), UINT8_C(129), UINT8_C(232), UINT8_C(219), UINT8_C( 48) }, { UINT8_C( 77), UINT8_C(243), UINT8_C(109), UINT8_C(197), UINT8_C( 59), UINT8_C(136), UINT8_C(217), UINT8_C(116), UINT8_C(249), UINT8_C(184), UINT8_C(192), UINT8_C(222), UINT8_C(238), UINT8_C(232), UINT8_C(120), UINT8_C(161) }, { UINT16_C(42464), UINT16_C(50599), UINT16_C(22968), UINT16_C(54875), UINT16_C(59430), UINT16_C( 7966), UINT16_C(40333), UINT16_C(47192) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t c = simde_vld1q_u8(test_vec[i].c); simde_uint16x8_t r = simde_vmlal_high_u8(a, b, c); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t c = simde_test_arm_neon_random_u8x16(); simde_uint16x8_t r = simde_vmlal_high_u8(a, b, c); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlal_high_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint16_t b[8]; uint16_t c[8]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(1254400694), UINT32_C(2319989667), UINT32_C( 183778036), UINT32_C( 475007370) }, { UINT16_C(27950), UINT16_C(64978), UINT16_C(58654), UINT16_C(41408), UINT16_C( 2454), UINT16_C( 686), UINT16_C(47772), UINT16_C(21423) }, { UINT16_C(29532), UINT16_C(65437), UINT16_C(59055), UINT16_C(41866), UINT16_C(32288), UINT16_C(43693), UINT16_C(65159), UINT16_C(46535) }, { UINT32_C(1333635446), UINT32_C(2349963065), UINT32_C(3296553784), UINT32_C(1471926675) } }, { { UINT32_C(2326960491), UINT32_C( 338391934), UINT32_C( 420993404), UINT32_C(4033660564) }, { UINT16_C( 2362), UINT16_C(59888), UINT16_C(31471), UINT16_C( 4236), UINT16_C(15096), UINT16_C(32698), UINT16_C(33080), UINT16_C(41780) }, { UINT16_C(59163), UINT16_C(39213), UINT16_C(22618), UINT16_C(54958), UINT16_C(50482), UINT16_C(50927), UINT16_C(23435), UINT16_C(50614) }, { UINT32_C(3089036763), UINT32_C(2003602980), UINT32_C(1196223204), UINT32_C(1853346188) } }, { { UINT32_C(1420732005), UINT32_C( 409221920), UINT32_C(2912362357), UINT32_C(3142634656) }, { UINT16_C(32435), UINT16_C( 3413), UINT16_C( 982), UINT16_C( 2275), UINT16_C(54216), UINT16_C(21454), UINT16_C(34094), UINT16_C(37657) }, { UINT16_C(50987), UINT16_C(19688), UINT16_C(19458), UINT16_C(30564), UINT16_C(64619), UINT16_C( 3108), UINT16_C(30152), UINT16_C(31687) }, { UINT32_C( 629148413), UINT32_C( 475900952), UINT32_C(3940364645), UINT32_C( 40904719) } }, { { UINT32_C(3381140723), UINT32_C(3889326879), UINT32_C(1832624190), UINT32_C(1358976037) }, { UINT16_C(59419), UINT16_C( 7837), UINT16_C( 309), UINT16_C(41109), UINT16_C(47869), UINT16_C(50604), UINT16_C(29743), UINT16_C( 8768) }, { UINT16_C(51344), UINT16_C(45291), UINT16_C(48436), UINT16_C(29335), UINT16_C(53854), UINT16_C(33759), UINT16_C(57382), UINT16_C(17108) }, { UINT32_C(1664110553), UINT32_C(1302700019), UINT32_C(3539337016), UINT32_C(1508978981) } }, { { UINT32_C(4250956232), UINT32_C(1889465715), UINT32_C(3728100015), UINT32_C(1325430462) }, { UINT16_C(60479), UINT16_C(29695), UINT16_C(38569), UINT16_C( 2021), UINT16_C(50537), UINT16_C(36747), UINT16_C(24485), UINT16_C(28113) }, { UINT16_C(12753), UINT16_C(17515), UINT16_C( 2343), UINT16_C(54964), UINT16_C(59987), UINT16_C( 4789), UINT16_C(46433), UINT16_C(41057) }, { UINT32_C(2987551955), UINT32_C(2065447098), UINT32_C( 570044724), UINT32_C(2479665903) } }, { { UINT32_C(1259561121), UINT32_C(1599273206), UINT32_C(1659887037), UINT32_C( 248561725) }, { UINT16_C(15346), UINT16_C( 6482), UINT16_C( 1604), UINT16_C(38895), UINT16_C(42225), UINT16_C(21161), UINT16_C( 2650), UINT16_C(64498) }, { UINT16_C( 1386), UINT16_C(24902), UINT16_C(39421), UINT16_C(48064), UINT16_C(44918), UINT16_C(45853), UINT16_C(60784), UINT16_C(25281) }, { UINT32_C(3156223671), UINT32_C(2569568539), UINT32_C(1820964637), UINT32_C(1879135663) } }, { { UINT32_C(1820005160), UINT32_C( 184838682), UINT32_C(1767746831), UINT32_C( 576999352) }, { UINT16_C(43860), UINT16_C(20867), UINT16_C(17476), UINT16_C(47628), UINT16_C(10995), UINT16_C(25454), UINT16_C(12055), UINT16_C(16581) }, { UINT16_C(16451), UINT16_C(23980), UINT16_C(45227), UINT16_C(47720), UINT16_C(50526), UINT16_C( 5667), UINT16_C(34580), UINT16_C(26680) }, { UINT32_C(2375538530), UINT32_C( 329086500), UINT32_C(2184608731), UINT32_C(1019380432) } }, { { UINT32_C(1991883826), UINT32_C(4080125440), UINT32_C( 123183088), UINT32_C( 289873102) }, { UINT16_C(62557), UINT16_C( 2158), UINT16_C(54948), UINT16_C( 706), UINT16_C(58779), UINT16_C(44824), UINT16_C(20844), UINT16_C(40727) }, { UINT16_C(53517), UINT16_C( 3349), UINT16_C(18071), UINT16_C(34560), UINT16_C(22501), UINT16_C(46222), UINT16_C(54900), UINT16_C(53701) }, { UINT32_C(3314470105), UINT32_C(1857013072), UINT32_C(1267518688), UINT32_C(2476953729) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t c = simde_vld1q_u16(test_vec[i].c); simde_uint32x4_t r = simde_vmlal_high_u16(a, b, c); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t c = simde_test_arm_neon_random_u16x8(); simde_uint32x4_t r = simde_vmlal_high_u16(a, b, c); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlal_high_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[2]; uint32_t b[4]; uint32_t c[4]; uint64_t r[2]; } test_vec[] = { { { UINT64_C( 8235345744292223378), UINT64_C(12397745504786786594) }, { UINT32_C( 681719911), UINT32_C(3980442008), UINT32_C(4280365644), UINT32_C(1653442256) }, { UINT32_C(4072578464), UINT32_C(2472806257), UINT32_C( 173008688), UINT32_C(4055260298) }, { UINT64_C( 8975886188520938450), UINT64_C( 656140166869587266) } }, { { UINT64_C( 6708492149745866900), UINT64_C( 7043606458429946508) }, { UINT32_C(4182998408), UINT32_C(3045898372), UINT32_C( 717216671), UINT32_C(3424351543) }, { UINT32_C(3757585869), UINT32_C( 456921743), UINT32_C( 226007372), UINT32_C( 208549764) }, { UINT64_C( 6870588404713165512), UINT64_C( 7757754164575632360) } }, { { UINT64_C( 1868309374540563124), UINT64_C(17253113295557340268) }, { UINT32_C( 600729748), UINT32_C(3325954682), UINT32_C( 685029028), UINT32_C(2704556781) }, { UINT32_C(2128230660), UINT32_C( 915982282), UINT32_C(2514082676), UINT32_C(3464775994) }, { UINT64_C( 3590528986392482052), UINT64_C( 8177052631066503966) } }, { { UINT64_C( 139946240152523441), UINT64_C( 897725993230910950) }, { UINT32_C(1619742614), UINT32_C(2341872407), UINT32_C( 958427134), UINT32_C(1762108856) }, { UINT32_C(1419049463), UINT32_C( 257263145), UINT32_C(1407417931), UINT32_C(1902139867) }, { UINT64_C( 1488853774101063195), UINT64_C( 4249503498222273102) } }, { { UINT64_C( 885633589777132456), UINT64_C( 574575160984955863) }, { UINT32_C(1868336454), UINT32_C(1602138387), UINT32_C( 196239920), UINT32_C(1669141179) }, { UINT32_C( 186863357), UINT32_C(2367188406), UINT32_C(3894173144), UINT32_C(4042266026) }, { UINT64_C( 1649825816021840936), UINT64_C( 7321687841454240517) } }, { { UINT64_C( 3248747887485471907), UINT64_C(15591100011637229632) }, { UINT32_C(3135471876), UINT32_C(3343383535), UINT32_C( 61891673), UINT32_C( 485793913) }, { UINT32_C(3922940908), UINT32_C(1897392177), UINT32_C(2339131568), UINT32_C( 174312198) }, { UINT64_C( 3393520653596105171), UINT64_C(15675779816387280406) } }, { { UINT64_C(11169784979419121228), UINT64_C( 4541478163365933936) }, { UINT32_C( 388553190), UINT32_C(1921597378), UINT32_C(2516448655), UINT32_C( 228614336) }, { UINT32_C(3913835943), UINT32_C(3800320881), UINT32_C(1372332798), UINT32_C(2609959605) }, { UINT64_C(14623190003158607918), UINT64_C( 5138152345449831216) } }, { { UINT64_C( 9862949122136521132), UINT64_C(16572891756800892465) }, { UINT32_C(2496546594), UINT32_C(2440450706), UINT32_C( 719471221), UINT32_C(3234165524) }, { UINT32_C( 607024940), UINT32_C(3836481459), UINT32_C( 735431405), UINT32_C(2853295239) }, { UINT64_C(10392070853053616637), UINT64_C( 7354176774858481085) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t c = simde_vld1q_u32(test_vec[i].c); simde_uint64x2_t r = simde_vmlal_high_u32(a, b, c); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t c = simde_test_arm_neon_random_u32x4(); simde_uint64x2_t r = simde_vmlal_high_u32(a, b, c); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vmlal_high_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vmlal_high_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmlal_high_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmlal_high_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vmlal_high_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmlal_high_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/mlal_n.c000066400000000000000000000324721400333146700167350ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN mlal_n #include "test-neon.h" #include "../../../simde/arm/neon/mlal_n.h" static int test_simde_vmlal_n_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int16_t b[4]; int16_t c; int32_t r[4]; } test_vec[] = { { { INT32_C( 94316344), -INT32_C( 1570598978), -INT32_C( 731248258), INT32_C( 989578608) }, { -INT16_C( 18299), -INT16_C( 1428), -INT16_C( 6808), -INT16_C( 17488) }, INT16_C( 11032), { -INT32_C( 107558224), -INT32_C( 1586352674), -INT32_C( 806354114), INT32_C( 796650992) } }, { { -INT32_C( 2048067768), -INT32_C( 223533997), -INT32_C( 1937675478), INT32_C( 2023096077) }, { INT16_C( 1491), -INT16_C( 12739), -INT16_C( 15553), -INT16_C( 21370) }, -INT16_C( 4419), { -INT32_C( 2054656497), -INT32_C( 167240356), -INT32_C( 1868946771), INT32_C( 2117530107) } }, { { -INT32_C( 1448448623), -INT32_C( 2053049704), INT32_C( 631895160), INT32_C( 1754321894) }, { -INT16_C( 25249), -INT16_C( 2969), INT16_C( 15125), INT16_C( 21497) }, INT16_C( 14601), { -INT32_C( 1817109272), -INT32_C( 2096400073), INT32_C( 852735285), INT32_C( 2068199591) } }, { { -INT32_C( 739930090), INT32_C( 675378814), -INT32_C( 1088693985), INT32_C( 145986398) }, { -INT16_C( 25928), INT16_C( 18651), INT16_C( 15106), INT16_C( 27109) }, -INT16_C( 1233), { -INT32_C( 707960866), INT32_C( 652382131), -INT32_C( 1107319683), INT32_C( 112561001) } }, { { -INT32_C( 1370609244), INT32_C( 1195271266), INT32_C( 2025700407), INT32_C( 5430501) }, { -INT16_C( 20325), INT16_C( 20371), INT16_C( 19384), -INT16_C( 27415) }, -INT16_C( 5229), { -INT32_C( 1264329819), INT32_C( 1088751307), INT32_C( 1924341471), INT32_C( 148783536) } }, { { -INT32_C( 28018481), -INT32_C( 1054344845), -INT32_C( 450524761), -INT32_C( 1918804528) }, { -INT16_C( 31019), INT16_C( 10089), INT16_C( 1414), INT16_C( 6616) }, -INT16_C( 28588), { INT32_C( 858752691), -INT32_C( 1342769177), -INT32_C( 490948193), -INT32_C( 2107942736) } }, { { -INT32_C( 148619932), INT32_C( 2087777064), INT32_C( 427156466), -INT32_C( 895279963) }, { INT16_C( 29441), -INT16_C( 23769), -INT16_C( 767), INT16_C( 27177) }, -INT16_C( 20444), { -INT32_C( 750511736), -INT32_C( 1721256796), INT32_C( 442837014), -INT32_C( 1450886551) } }, { { -INT32_C( 1010172817), -INT32_C( 1325388147), -INT32_C( 1784338395), INT32_C( 444176293) }, { INT16_C( 7856), INT16_C( 21303), INT16_C( 14568), INT16_C( 4295) }, -INT16_C( 14117), { -INT32_C( 1121075969), -INT32_C( 1626122598), -INT32_C( 1989994851), INT32_C( 383543778) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); int16_t c = test_vec[i].c; simde_int32x4_t r = simde_vmlal_n_s16(a, b, c); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); int16_t c = simde_test_codegen_random_i16(); simde_int32x4_t r = simde_vmlal_n_s16(a, b, c); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i16(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlal_n_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; int32_t b[2]; int32_t c; int64_t r[2]; } test_vec[] = { { { INT64_C( 2048296796130203316), INT64_C( 8606392377422747921) }, { INT32_C( 1887333753), -INT32_C( 1563251834) }, INT32_C( 1033152716), { INT64_C( 3998200789040626464), INT64_C( 6991314499333666777) } }, { { -INT64_C( 5954789392489103048), -INT64_C( 6399417830918731431) }, { INT32_C( 941531327), -INT32_C( 1800889075) }, INT32_C( 339114824), { -INT64_C( 5635502162243011600), -INT64_C( 7010126012630879231) } }, { { -INT64_C( 6608823492868453597), -INT64_C( 3326159638695336940) }, { -INT32_C( 1921513522), -INT32_C( 1245341784) }, INT32_C( 2085187124), { INT64_C( 7831205326174807291), -INT64_C( 5922930291671326156) } }, { { INT64_C( 6298532890838925545), -INT64_C( 4386138432066506336) }, { INT32_C( 949287273), -INT32_C( 1463481088) }, -INT32_C( 664892508), { INT64_C( 5667358895081474861), -INT64_C( 3413080821055617632) } }, { { INT64_C( 8281808724244604921), INT64_C( 6709736759272036296) }, { INT32_C( 2099330580), INT32_C( 951432503) }, INT32_C( 1725987778), { -INT64_C( 6541516426403295455), INT64_C( 8351897631041984630) } }, { { INT64_C( 1000243212586991110), INT64_C( 3362300124960182136) }, { INT32_C( 613205520), INT32_C( 1017228548) }, INT32_C( 628381538), { INT64_C( 1385570240354680870), INT64_C( 4001507764449928960) } }, { { INT64_C( 8779708734924084434), -INT64_C( 5902523081149728675) }, { INT32_C( 1625079632), -INT32_C( 1987811195) }, INT32_C( 2026186261), { -INT64_C( 6374321315396131230), INT64_C( 8516545259788831046) } }, { { INT64_C( 2388922719029508477), INT64_C( 4405963777843592946) }, { INT32_C( 2011904807), INT32_C( 2144913658) }, INT32_C( 1174953008), { INT64_C( 4752816323823817933), INT64_C( 6926136532210976210) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); int32_t c = test_vec[i].c; simde_int64x2_t r = simde_vmlal_n_s32(a, b, c); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); int32_t c = simde_test_codegen_random_i32(); simde_int64x2_t r = simde_vmlal_n_s32(a, b, c); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlal_n_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint16_t b[4]; uint16_t c; uint32_t r[4]; } test_vec[] = { { { UINT32_C(4290694786), UINT32_C(2504940295), UINT32_C(1991669123), UINT32_C( 700797043) }, { UINT16_C(60017), UINT16_C(39015), UINT16_C(21029), UINT16_C( 7951) }, UINT16_C(59162), { UINT32_C(3546453244), UINT32_C( 518178429), UINT32_C(3235786821), UINT32_C(1171194105) } }, { { UINT32_C(2806270878), UINT32_C(1333118609), UINT32_C( 346717382), UINT32_C(3364498706) }, { UINT16_C(64931), UINT16_C(26905), UINT16_C(35367), UINT16_C(36435) }, UINT16_C(31010), { UINT32_C( 524813892), UINT32_C(2167442659), UINT32_C(1443448052), UINT32_C( 199380760) } }, { { UINT32_C(4221055456), UINT32_C(1564882713), UINT32_C(1394857950), UINT32_C(3503286566) }, { UINT16_C(57854), UINT16_C(35069), UINT16_C(41386), UINT16_C(50053) }, UINT16_C(44042), { UINT32_C(2474094028), UINT32_C(3109391611), UINT32_C(3217580162), UINT32_C(1412753496) } }, { { UINT32_C(1866095949), UINT32_C(1872763862), UINT32_C(1554430230), UINT32_C( 976454678) }, { UINT16_C(22999), UINT16_C(42531), UINT16_C( 8489), UINT16_C( 9864) }, UINT16_C(12969), { UINT32_C(2164369980), UINT32_C(2424348401), UINT32_C(1664524071), UINT32_C(1104380894) } }, { { UINT32_C(3522506695), UINT32_C( 372196059), UINT32_C(1362167217), UINT32_C( 436946804) }, { UINT16_C( 8611), UINT16_C(54942), UINT16_C(30043), UINT16_C(32559) }, UINT16_C(22556), { UINT32_C(3716736411), UINT32_C(1611467811), UINT32_C(2039817125), UINT32_C(1171347608) } }, { { UINT32_C(1249813664), UINT32_C(3413722838), UINT32_C(1175278615), UINT32_C(2605497962) }, { UINT16_C(49167), UINT16_C( 6882), UINT16_C(34267), UINT16_C(31036) }, UINT16_C(38747), { UINT32_C(3154887413), UINT32_C(3680379692), UINT32_C(2503022064), UINT32_C(3808049854) } }, { { UINT32_C( 186026735), UINT32_C(1638905826), UINT32_C(2057798913), UINT32_C(1573830224) }, { UINT16_C(14597), UINT16_C(20763), UINT16_C(10964), UINT16_C(46865) }, UINT16_C(60485), { UINT32_C(1068926280), UINT32_C(2894755881), UINT32_C(2720956453), UINT32_C( 113492453) } }, { { UINT32_C(2556854588), UINT32_C( 790779160), UINT32_C( 266732896), UINT32_C( 227862374) }, { UINT16_C(58465), UINT16_C(12235), UINT16_C(53313), UINT16_C(23656) }, UINT16_C(15649), { UINT32_C(3471773373), UINT32_C( 982244675), UINT32_C(1101028033), UINT32_C( 598055118) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); uint16_t c = test_vec[i].c; simde_uint32x4_t r = simde_vmlal_n_u16(a, b, c); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); uint16_t c = simde_test_codegen_random_u16(); simde_uint32x4_t r = simde_vmlal_n_u16(a, b, c); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_u16(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlal_n_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[2]; uint32_t b[2]; uint32_t c; uint64_t r[2]; } test_vec[] = { { { UINT64_C(15112567586238305083), UINT64_C(10599289406992564070) }, { UINT32_C(1475048791), UINT32_C(1617544317) }, UINT32_C(1175038216), { UINT64_C(16845806286127901939), UINT64_C(12499965795541182542) } }, { { UINT64_C(15419563916853419108), UINT64_C( 9527648644139300843) }, { UINT32_C(2518176319), UINT32_C(1156383687) }, UINT32_C(3483719623), { UINT64_C( 5745440099818075229), UINT64_C(13556165186258290844) } }, { { UINT64_C(17943624365020392960), UINT64_C(12616964892059959770) }, { UINT32_C(3056816503), UINT32_C(1800162211) }, UINT32_C( 380582478), { UINT64_C( 660251090813875778), UINT64_C(13302075087124298628) } }, { { UINT64_C( 5617672102965761169), UINT64_C(12910438021484509578) }, { UINT32_C( 56771468), UINT32_C( 951752084) }, UINT32_C( 799213281), { UINT64_C( 5663044614173227677), UINT64_C(13671090927236737182) } }, { { UINT64_C(12277423533935907392), UINT64_C( 5482356641975719974) }, { UINT32_C(3774824532), UINT32_C( 417620355) }, UINT32_C(3612385014), { UINT64_C( 7466799030102719224), UINT64_C( 6990962153919079944) } }, { { UINT64_C(17056904501791290276), UINT64_C(14001284910167759223) }, { UINT32_C( 554591180), UINT32_C( 654380452) }, UINT32_C(1698686318), { UINT64_C(17998980951340765516), UINT64_C(15112872030746814959) } }, { { UINT64_C(14415252208582692739), UINT64_C( 5594961520471884688) }, { UINT32_C(2769351641), UINT32_C(4207287894) }, UINT32_C(2585970732), { UINT64_C( 3129970425115312335), UINT64_C(16474884875453803096) } }, { { UINT64_C( 8311741593912566189), UINT64_C( 439083547831789183) }, { UINT32_C( 341031995), UINT32_C( 96035759) }, UINT32_C(2919268482), { UINT64_C( 9307305548269647779), UINT64_C( 719437712225437021) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); uint32_t c = test_vec[i].c; simde_uint64x2_t r = simde_vmlal_n_u32(a, b, c); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); uint32_t c = simde_test_codegen_random_u32(); simde_uint64x2_t r = simde_vmlal_n_u32(a, b, c); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_u32(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vmlal_n_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmlal_n_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmlal_n_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmlal_n_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/mls.c000066400000000000000000002310221400333146700162560ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN mls #include "test-neon.h" #include "../../../simde/arm/neon/mls.h" static int test_simde_vmls_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; simde_float32 b[2]; simde_float32 c[2]; simde_float32 r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( -365.18), SIMDE_FLOAT32_C( 555.85) }, { SIMDE_FLOAT32_C( 342.95), SIMDE_FLOAT32_C( 560.35) }, { SIMDE_FLOAT32_C( 990.43), SIMDE_FLOAT32_C( 963.21) }, { SIMDE_FLOAT32_C(-340033.16), SIMDE_FLOAT32_C(-539178.81) } }, { { SIMDE_FLOAT32_C( -502.01), SIMDE_FLOAT32_C( 894.84) }, { SIMDE_FLOAT32_C( 131.14), SIMDE_FLOAT32_C( 977.90) }, { SIMDE_FLOAT32_C( 151.65), SIMDE_FLOAT32_C( 607.45) }, { SIMDE_FLOAT32_C(-20389.39), SIMDE_FLOAT32_C(-593130.56) } }, { { SIMDE_FLOAT32_C( -426.49), SIMDE_FLOAT32_C( 736.48) }, { SIMDE_FLOAT32_C( -938.18), SIMDE_FLOAT32_C( -397.50) }, { SIMDE_FLOAT32_C( 44.80), SIMDE_FLOAT32_C( 850.55) }, { SIMDE_FLOAT32_C( 41603.98), SIMDE_FLOAT32_C(338830.09) } }, { { SIMDE_FLOAT32_C( -738.35), SIMDE_FLOAT32_C( 630.15) }, { SIMDE_FLOAT32_C( 820.68), SIMDE_FLOAT32_C( -844.89) }, { SIMDE_FLOAT32_C( 328.00), SIMDE_FLOAT32_C( 133.56) }, { SIMDE_FLOAT32_C(-269921.38), SIMDE_FLOAT32_C(113473.66) } }, { { SIMDE_FLOAT32_C( 660.82), SIMDE_FLOAT32_C( -833.42) }, { SIMDE_FLOAT32_C( 240.76), SIMDE_FLOAT32_C( -548.85) }, { SIMDE_FLOAT32_C( -272.61), SIMDE_FLOAT32_C( 897.78) }, { SIMDE_FLOAT32_C( 66294.40), SIMDE_FLOAT32_C(491913.16) } }, { { SIMDE_FLOAT32_C( -760.18), SIMDE_FLOAT32_C( 362.21) }, { SIMDE_FLOAT32_C( 453.62), SIMDE_FLOAT32_C( 582.76) }, { SIMDE_FLOAT32_C( -77.44), SIMDE_FLOAT32_C( 444.05) }, { SIMDE_FLOAT32_C( 34368.15), SIMDE_FLOAT32_C(-258412.38) } }, { { SIMDE_FLOAT32_C( 545.98), SIMDE_FLOAT32_C( 420.55) }, { SIMDE_FLOAT32_C( 338.90), SIMDE_FLOAT32_C( -322.88) }, { SIMDE_FLOAT32_C( 398.44), SIMDE_FLOAT32_C( -509.46) }, { SIMDE_FLOAT32_C(-134485.33), SIMDE_FLOAT32_C(-164073.89) } }, { { SIMDE_FLOAT32_C( -715.43), SIMDE_FLOAT32_C( 971.95) }, { SIMDE_FLOAT32_C( -772.98), SIMDE_FLOAT32_C( -653.61) }, { SIMDE_FLOAT32_C( -425.55), SIMDE_FLOAT32_C( 271.82) }, { SIMDE_FLOAT32_C(-329657.06), SIMDE_FLOAT32_C(178636.22) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x2_t c = simde_vld1_f32(test_vec[i].c); simde_float32x2_t r = simde_vmls_f32(a, b, c); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0, 1000.0); simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0, 1000.0); simde_float32x2_t c = simde_test_arm_neon_random_f32x2(-1000.0, 1000.0); simde_float32x2_t r = simde_vmls_f32(a, b, c); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmls_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[1]; simde_float64 b[1]; simde_float64 c[1]; simde_float64 r[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( 662.20) }, { SIMDE_FLOAT64_C( 614.22) }, { SIMDE_FLOAT64_C( -568.69) }, { SIMDE_FLOAT64_C(349962.97) } }, { { SIMDE_FLOAT64_C( 237.85) }, { SIMDE_FLOAT64_C( 33.93) }, { SIMDE_FLOAT64_C( 22.08) }, { SIMDE_FLOAT64_C( -511.32) } }, { { SIMDE_FLOAT64_C( 254.37) }, { SIMDE_FLOAT64_C( -543.96) }, { SIMDE_FLOAT64_C( 92.53) }, { SIMDE_FLOAT64_C( 50586.99) } }, { { SIMDE_FLOAT64_C( 461.60) }, { SIMDE_FLOAT64_C( -604.69) }, { SIMDE_FLOAT64_C( -522.66) }, { SIMDE_FLOAT64_C(-315585.68) } }, { { SIMDE_FLOAT64_C( -338.41) }, { SIMDE_FLOAT64_C( 863.96) }, { SIMDE_FLOAT64_C( 948.15) }, { SIMDE_FLOAT64_C(-819502.08) } }, { { SIMDE_FLOAT64_C( 524.72) }, { SIMDE_FLOAT64_C( -156.65) }, { SIMDE_FLOAT64_C( 673.47) }, { SIMDE_FLOAT64_C(106023.80) } }, { { SIMDE_FLOAT64_C( -712.19) }, { SIMDE_FLOAT64_C( -87.29) }, { SIMDE_FLOAT64_C( 443.43) }, { SIMDE_FLOAT64_C( 37994.81) } }, { { SIMDE_FLOAT64_C( 673.85) }, { SIMDE_FLOAT64_C( 112.89) }, { SIMDE_FLOAT64_C( -243.00) }, { SIMDE_FLOAT64_C( 28106.12) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_t b = simde_vld1_f64(test_vec[i].b); simde_float64x1_t c = simde_vld1_f64(test_vec[i].c); simde_float64x1_t r = simde_vmls_f64(a, b, c); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x1_t a = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_float64x1_t b = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_float64x1_t c = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_float64x1_t r = simde_vmls_f64(a, b, c); simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x1(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmls_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t b[8]; int8_t c[8]; int8_t r[8]; } test_vec[] = { { { -INT8_C( 109), INT8_C( 57), -INT8_C( 86), INT8_C( 50), -INT8_C( 11), INT8_C( 88), -INT8_C( 121), -INT8_C( 63) }, { INT8_C( 31), -INT8_C( 117), INT8_C( 39), INT8_C( 85), -INT8_C( 105), -INT8_C( 3), INT8_C( 78), -INT8_C( 42) }, { INT8_C( 53), INT8_C( 25), INT8_C( 95), -INT8_C( 29), INT8_C( 65), INT8_C( 107), -INT8_C( 31), INT8_C( 54) }, { INT8_C( 40), -INT8_C( 90), INT8_C( 49), -INT8_C( 45), -INT8_C( 98), -INT8_C( 103), -INT8_C( 7), -INT8_C( 99) } }, { { -INT8_C( 93), -INT8_C( 72), -INT8_C( 124), INT8_C( 46), INT8_C( 126), INT8_C( 99), INT8_C( 61), INT8_C( 18) }, { -INT8_C( 100), -INT8_C( 24), INT8_C( 68), -INT8_C( 111), INT8_C( 64), -INT8_C( 53), INT8_C( 83), INT8_C( 95) }, { INT8_C( 86), INT8_C( 122), -INT8_C( 76), -INT8_C( 19), INT8_C( 119), INT8_C( 2), -INT8_C( 61), -INT8_C( 84) }, { INT8_C( 59), INT8_C( 40), -INT8_C( 76), -INT8_C( 15), -INT8_C( 66), -INT8_C( 51), INT8_C( 4), INT8_C( 62) } }, { { INT8_C( 28), INT8_C( 34), -INT8_C( 113), INT8_C( 93), -INT8_C( 115), INT8_C( 112), -INT8_C( 108), INT8_C( 49) }, { INT8_C( 40), INT8_C( 24), INT8_C( 95), -INT8_C( 90), INT8_C( 123), -INT8_C( 99), -INT8_C( 72), INT8_C( 24) }, { -INT8_C( 123), -INT8_C( 4), -INT8_C( 87), -INT8_C( 59), -INT8_C( 57), -INT8_C( 4), INT8_C( 36), INT8_C( 29) }, { INT8_C( 84), -INT8_C( 126), -INT8_C( 40), -INT8_C( 97), -INT8_C( 16), -INT8_C( 28), -INT8_C( 76), INT8_C( 121) } }, { { INT8_C( 118), -INT8_C( 40), INT8_C( 11), -INT8_C( 19), -INT8_C( 37), -INT8_C( 50), -INT8_C( 103), -INT8_C( 9) }, { -INT8_C( 15), INT8_C( 40), INT8_C( 84), INT8_C( 126), -INT8_C( 104), -INT8_C( 24), -INT8_C( 81), -INT8_C( 64) }, { INT8_C( 1), INT8_C( 15), INT8_C( 103), INT8_C( 124), -INT8_C( 84), INT8_C( 31), -INT8_C( 108), INT8_C( 49) }, { -INT8_C( 123), INT8_MIN, INT8_C( 63), -INT8_C( 27), -INT8_C( 69), -INT8_C( 74), INT8_C( 109), INT8_C( 55) } }, { { INT8_C( 28), INT8_C( 62), -INT8_C( 10), -INT8_C( 29), INT8_C( 58), INT8_C( 26), INT8_C( 1), -INT8_C( 79) }, { -INT8_C( 14), INT8_C( 12), -INT8_C( 98), -INT8_C( 51), -INT8_C( 38), INT8_C( 56), -INT8_C( 60), -INT8_C( 53) }, { INT8_C( 96), INT8_C( 25), INT8_C( 74), -INT8_C( 7), INT8_C( 1), -INT8_C( 7), -INT8_C( 71), INT8_C( 2) }, { INT8_C( 92), INT8_C( 18), INT8_C( 74), INT8_C( 126), INT8_C( 96), -INT8_C( 94), INT8_C( 93), INT8_C( 27) } }, { { INT8_C( 8), INT8_C( 32), INT8_MAX, -INT8_C( 76), INT8_C( 64), INT8_C( 19), -INT8_C( 27), INT8_C( 92) }, { INT8_C( 81), -INT8_C( 37), INT8_C( 63), -INT8_C( 116), -INT8_C( 11), INT8_C( 64), INT8_C( 61), -INT8_C( 24) }, { INT8_C( 76), -INT8_C( 37), -INT8_C( 75), INT8_C( 39), INT8_C( 19), INT8_C( 122), -INT8_C( 14), INT8_C( 116) }, { -INT8_C( 4), -INT8_C( 57), -INT8_C( 12), INT8_C( 96), INT8_C( 17), -INT8_C( 109), INT8_C( 59), INT8_C( 60) } }, { { -INT8_C( 109), INT8_C( 60), INT8_C( 109), -INT8_C( 108), INT8_C( 54), INT8_C( 38), -INT8_C( 105), INT8_C( 62) }, { INT8_C( 71), INT8_C( 22), -INT8_C( 13), -INT8_C( 121), INT8_C( 41), -INT8_C( 40), -INT8_C( 29), INT8_C( 123) }, { -INT8_C( 76), INT8_C( 34), INT8_C( 7), -INT8_C( 87), INT8_C( 99), INT8_C( 68), -INT8_C( 111), -INT8_C( 81) }, { -INT8_C( 89), INT8_C( 80), -INT8_C( 56), INT8_C( 117), INT8_C( 91), -INT8_C( 58), INT8_C( 4), INT8_C( 41) } }, { { INT8_C( 31), INT8_C( 71), -INT8_C( 42), INT8_C( 51), -INT8_C( 63), -INT8_C( 55), -INT8_C( 89), INT8_C( 84) }, { INT8_C( 5), INT8_C( 20), -INT8_C( 24), INT8_C( 59), INT8_C( 58), INT8_MAX, INT8_C( 122), -INT8_C( 127) }, { -INT8_C( 107), INT8_C( 109), INT8_C( 8), -INT8_C( 65), INT8_C( 69), -INT8_C( 21), INT8_C( 58), -INT8_C( 7) }, { INT8_C( 54), -INT8_C( 61), -INT8_C( 106), INT8_C( 46), INT8_C( 31), INT8_C( 52), INT8_C( 3), -INT8_C( 37) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t c = simde_vld1_s8(test_vec[i].c); simde_int8x8_t r = simde_vmls_s8(a, b, c); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); simde_int8x8_t c = simde_test_arm_neon_random_i8x8(); simde_int8x8_t r = simde_vmls_s8(a, b, c); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmls_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t b[4]; int16_t c[4]; int16_t r[4]; } test_vec[] = { { { INT16_C( 16654), INT16_C( 29091), INT16_C( 13445), -INT16_C( 23520) }, { -INT16_C( 2181), INT16_C( 15575), INT16_C( 32448), -INT16_C( 14960) }, { INT16_C( 31122), -INT16_C( 13055), INT16_C( 31736), -INT16_C( 29106) }, { -INT16_C( 1560), INT16_C( 2508), INT16_C( 10885), -INT16_C( 28096) } }, { { INT16_C( 22504), INT16_C( 11597), -INT16_C( 30910), INT16_C( 20519) }, { -INT16_C( 13624), INT16_C( 19905), -INT16_C( 7426), INT16_C( 31473) }, { -INT16_C( 13863), -INT16_C( 26186), INT16_C( 18247), -INT16_C( 9634) }, { INT16_C( 27744), -INT16_C( 29417), INT16_C( 8400), -INT16_C( 3671) } }, { { INT16_C( 24512), -INT16_C( 18265), -INT16_C( 2598), -INT16_C( 15802) }, { -INT16_C( 27828), -INT16_C( 28688), INT16_C( 5914), -INT16_C( 7457) }, { -INT16_C( 24095), -INT16_C( 8401), INT16_C( 8579), INT16_C( 23641) }, { INT16_C( 7668), INT16_C( 15255), -INT16_C( 13940), -INT16_C( 16705) } }, { { INT16_C( 4330), INT16_C( 12789), INT16_C( 21335), INT16_C( 5899) }, { -INT16_C( 19789), -INT16_C( 29233), INT16_C( 5800), -INT16_C( 2992) }, { INT16_C( 16553), -INT16_C( 15229), INT16_C( 25431), INT16_C( 14502) }, { INT16_C( 22719), INT16_C( 9480), -INT16_C( 22465), INT16_C( 11051) } }, { { -INT16_C( 10748), -INT16_C( 30953), INT16_C( 29175), -INT16_C( 7709) }, { -INT16_C( 10111), -INT16_C( 10222), INT16_C( 7723), -INT16_C( 8465) }, { -INT16_C( 16688), INT16_C( 30828), -INT16_C( 17196), INT16_C( 32365) }, { INT16_C( 12084), -INT16_C( 4225), -INT16_C( 7589), INT16_C( 21536) } }, { { -INT16_C( 3844), INT16_C( 21314), -INT16_C( 6061), INT16_C( 22411) }, { -INT16_C( 23874), -INT16_C( 18978), -INT16_C( 16109), -INT16_C( 27498) }, { -INT16_C( 22119), -INT16_C( 14996), INT16_C( 23495), -INT16_C( 26717) }, { INT16_C( 16238), -INT16_C( 15462), INT16_C( 4494), INT16_C( 16905) } }, { { INT16_C( 3866), -INT16_C( 4592), INT16_C( 32203), -INT16_C( 14484) }, { -INT16_C( 20883), -INT16_C( 16102), -INT16_C( 23145), INT16_C( 21784) }, { -INT16_C( 2232), INT16_C( 23307), -INT16_C( 24136), INT16_C( 21232) }, { -INT16_C( 10894), INT16_C( 25586), -INT16_C( 32189), INT16_C( 20716) } }, { { INT16_C( 23626), INT16_C( 4375), -INT16_C( 17736), -INT16_C( 11607) }, { -INT16_C( 17974), -INT16_C( 27200), INT16_C( 11574), -INT16_C( 23715) }, { INT16_C( 30683), INT16_C( 29284), INT16_C( 32029), INT16_C( 26056) }, { -INT16_C( 31108), INT16_C( 4631), INT16_C( 15770), -INT16_C( 32511) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t c = simde_vld1_s16(test_vec[i].c); simde_int16x4_t r = simde_vmls_s16(a, b, c); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); simde_int16x4_t c = simde_test_arm_neon_random_i16x4(); simde_int16x4_t r = simde_vmls_s16(a, b, c); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmls_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t b[2]; int32_t c[2]; int32_t r[2]; } test_vec[] = { { { INT32_C( 750834548), -INT32_C( 1082216332) }, { -INT32_C( 976186099), INT32_C( 446134608) }, { INT32_C( 1756321586), INT32_C( 1611402372) }, { INT32_C( 2071546858), INT32_C( 1072980788) } }, { { -INT32_C( 1580044156), INT32_C( 1627822829) }, { -INT32_C( 493959571), -INT32_C( 2069820297) }, { -INT32_C( 230067806), INT32_C( 487383275) }, { -INT32_C( 1630682486), -INT32_C( 1514927696) } }, { { -INT32_C( 1132020937), INT32_C( 1276941000) }, { -INT32_C( 252842494), -INT32_C( 162401399) }, { INT32_C( 819519417), -INT32_C( 1900774932) }, { INT32_C( 747765189), INT32_C( 1109413756) } }, { { -INT32_C( 696189461), INT32_C( 368282845) }, { INT32_C( 265386311), INT32_C( 240905483) }, { INT32_C( 1694386395), -INT32_C( 178565317) }, { INT32_C( 1405109806), -INT32_C( 1632336300) } }, { { INT32_C( 455422767), -INT32_C( 1733698899) }, { -INT32_C( 1251071529), -INT32_C( 53845579) }, { -INT32_C( 435381285), INT32_C( 1676961672) }, { INT32_C( 310262594), -INT32_C( 1068359803) } }, { { -INT32_C( 339152208), INT32_C( 1910514498) }, { INT32_C( 59508310), -INT32_C( 1214564896) }, { INT32_C( 325847390), INT32_C( 1175402091) }, { -INT32_C( 1545758436), -INT32_C( 1903993182) } }, { { INT32_C( 1496062929), INT32_C( 868032899) }, { INT32_C( 1428063507), -INT32_C( 20512856) }, { -INT32_C( 452832763), -INT32_C( 442720889) }, { -INT32_C( 540185486), -INT32_C( 1146992661) } }, { { INT32_C( 318245287), INT32_C( 290981951) }, { -INT32_C( 1502968797), -INT32_C( 1176950875) }, { INT32_C( 1410267308), -INT32_C( 61614601) }, { INT32_C( 1568365091), -INT32_C( 866681076) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t c = simde_vld1_s32(test_vec[i].c); simde_int32x2_t r = simde_vmls_s32(a, b, c); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); simde_int32x2_t c = simde_test_arm_neon_random_i32x2(); simde_int32x2_t r = simde_vmls_s32(a, b, c); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmls_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint8_t b[8]; uint8_t c[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C( 39), UINT8_C( 85), UINT8_C(225), UINT8_C(175), UINT8_C(242), UINT8_C(125), UINT8_C(148), UINT8_C(153) }, { UINT8_C(134), UINT8_C(141), UINT8_C(171), UINT8_C(198), UINT8_C(149), UINT8_C( 3), UINT8_C(215), UINT8_C(184) }, { UINT8_C(136), UINT8_C( 65), UINT8_C( 95), UINT8_C( 45), UINT8_C(105), UINT8_C( 56), UINT8_C(230), UINT8_C( 21) }, { UINT8_C(247), UINT8_C(136), UINT8_C(108), UINT8_C(225), UINT8_C(213), UINT8_C(213), UINT8_C(106), UINT8_C(129) } }, { { UINT8_C( 48), UINT8_C(245), UINT8_C(106), UINT8_C( 39), UINT8_C(202), UINT8_C(189), UINT8_C( 35), UINT8_C(241) }, { UINT8_C( 18), UINT8_C( 4), UINT8_C(160), UINT8_C( 4), UINT8_C(130), UINT8_C( 53), UINT8_C(158), UINT8_C( 8) }, { UINT8_C(194), UINT8_C( 73), UINT8_C(206), UINT8_C( 87), UINT8_C( 77), UINT8_C(165), UINT8_C( 15), UINT8_C(213) }, { UINT8_C(140), UINT8_C(209), UINT8_C(170), UINT8_C(203), UINT8_C(176), UINT8_C(148), UINT8_C(225), UINT8_C( 73) } }, { { UINT8_C(231), UINT8_C(110), UINT8_C( 2), UINT8_C( 80), UINT8_C(167), UINT8_C(233), UINT8_C(101), UINT8_C(215) }, { UINT8_C(222), UINT8_C(207), UINT8_MAX, UINT8_C(168), UINT8_C(140), UINT8_C( 34), UINT8_C(153), UINT8_C(158) }, { UINT8_C( 39), UINT8_C( 58), UINT8_C(163), UINT8_C(169), UINT8_C(111), UINT8_C( 65), UINT8_C(177), UINT8_C( 49) }, { UINT8_C( 21), UINT8_C(136), UINT8_C(165), UINT8_C(104), UINT8_C(243), UINT8_C( 71), UINT8_C(156), UINT8_C(153) } }, { { UINT8_C(138), UINT8_C(128), UINT8_C(136), UINT8_C(215), UINT8_C( 37), UINT8_C(151), UINT8_C(172), UINT8_C( 12) }, { UINT8_C( 6), UINT8_C(175), UINT8_C( 92), UINT8_C(173), UINT8_C(152), UINT8_C(194), UINT8_C(132), UINT8_C(118) }, { UINT8_C(145), UINT8_C(131), UINT8_C( 30), UINT8_C( 30), UINT8_C(166), UINT8_C(183), UINT8_C(188), UINT8_C(205) }, { UINT8_C( 36), UINT8_C(243), UINT8_C(192), UINT8_C(145), UINT8_C(149), UINT8_C(233), UINT8_C(188), UINT8_C(142) } }, { { UINT8_C(241), UINT8_C( 95), UINT8_C(118), UINT8_C( 96), UINT8_C(160), UINT8_C( 39), UINT8_C(145), UINT8_C( 43) }, { UINT8_C(167), UINT8_C( 25), UINT8_C( 2), UINT8_C(205), UINT8_C(177), UINT8_C(175), UINT8_C(217), UINT8_C(183) }, { UINT8_C( 94), UINT8_C( 54), UINT8_C(100), UINT8_C(246), UINT8_C(248), UINT8_C(232), UINT8_C(108), UINT8_C(137) }, { UINT8_C(159), UINT8_C( 25), UINT8_C(174), UINT8_C( 98), UINT8_C( 40), UINT8_C(143), UINT8_C( 5), UINT8_C( 60) } }, { { UINT8_C(108), UINT8_C(138), UINT8_C(167), UINT8_C( 18), UINT8_C( 65), UINT8_C(100), UINT8_C(223), UINT8_C( 51) }, { UINT8_C(195), UINT8_C( 85), UINT8_C(147), UINT8_C(100), UINT8_C(124), UINT8_C( 37), UINT8_C(143), UINT8_C( 36) }, { UINT8_C( 62), UINT8_C(145), UINT8_C(241), UINT8_C(239), UINT8_C( 64), UINT8_C(202), UINT8_C(166), UINT8_C(158) }, { UINT8_C( 50), UINT8_C(101), UINT8_C( 68), UINT8_C(182), UINT8_C( 65), UINT8_C( 50), UINT8_C( 37), UINT8_C(251) } }, { { UINT8_C( 0), UINT8_C( 10), UINT8_C(148), UINT8_C(248), UINT8_C(243), UINT8_C( 0), UINT8_C(130), UINT8_C( 95) }, { UINT8_C(138), UINT8_C( 41), UINT8_C(113), UINT8_C(204), UINT8_C(141), UINT8_C( 80), UINT8_MAX, UINT8_C( 81) }, { UINT8_C(165), UINT8_C(146), UINT8_C(181), UINT8_C( 33), UINT8_C(183), UINT8_C( 68), UINT8_C( 69), UINT8_C(246) }, { UINT8_C( 14), UINT8_C(168), UINT8_C(175), UINT8_C(172), UINT8_C( 40), UINT8_C(192), UINT8_C(199), UINT8_C(137) } }, { { UINT8_C(213), UINT8_C( 54), UINT8_C(229), UINT8_C( 22), UINT8_C( 1), UINT8_C(140), UINT8_C(180), UINT8_C( 1) }, { UINT8_C(150), UINT8_C( 73), UINT8_C(250), UINT8_C(137), UINT8_C( 73), UINT8_C(124), UINT8_C(232), UINT8_C(212) }, { UINT8_C(165), UINT8_C( 89), UINT8_C(160), UINT8_C( 51), UINT8_C(169), UINT8_C(159), UINT8_C(132), UINT8_C( 78) }, { UINT8_C( 39), UINT8_C(213), UINT8_C(165), UINT8_C(203), UINT8_C(208), UINT8_C(136), UINT8_C( 20), UINT8_C(105) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t c = simde_vld1_u8(test_vec[i].c); simde_uint8x8_t r = simde_vmls_u8(a, b, c); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t c = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t r = simde_vmls_u8(a, b, c); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmls_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t b[4]; uint16_t c[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(14641), UINT16_C(59760), UINT16_C(46461), UINT16_C(21215) }, { UINT16_C(50412), UINT16_C(60776), UINT16_C( 7504), UINT16_C(59374) }, { UINT16_C(59494), UINT16_C(44912), UINT16_C(22884), UINT16_C( 2691) }, { UINT16_C(58153), UINT16_C(62448), UINT16_C(29245), UINT16_C(22549) } }, { { UINT16_C( 9138), UINT16_C(23613), UINT16_C(49602), UINT16_C(62634) }, { UINT16_C( 6906), UINT16_C(30685), UINT16_C(48336), UINT16_C(48329) }, { UINT16_C(12928), UINT16_C(53673), UINT16_C(38735), UINT16_C(46520) }, { UINT16_C(53938), UINT16_C(52824), UINT16_C(52626), UINT16_C(10034) } }, { { UINT16_C(10368), UINT16_C(58468), UINT16_C(59521), UINT16_C(13550) }, { UINT16_C(11019), UINT16_C(52880), UINT16_C(15084), UINT16_C(59074) }, { UINT16_C(40789), UINT16_C( 9565), UINT16_C(10075), UINT16_C(56289) }, { UINT16_C( 2265), UINT16_C( 2580), UINT16_C( 669), UINT16_C(28268) } }, { { UINT16_C(35417), UINT16_C(43180), UINT16_C(25633), UINT16_C(41309) }, { UINT16_C(49549), UINT16_C( 3718), UINT16_C(29865), UINT16_C(46402) }, { UINT16_C(53920), UINT16_C(35971), UINT16_C(17677), UINT16_C(25203) }, { UINT16_C(59449), UINT16_C(61978), UINT16_C(60044), UINT16_C(61623) } }, { { UINT16_C(53476), UINT16_C(16263), UINT16_C(26871), UINT16_C(20506) }, { UINT16_C(51186), UINT16_C( 5112), UINT16_C(21803), UINT16_C(47285) }, { UINT16_C(15127), UINT16_C(49351), UINT16_C( 2479), UINT16_C(20341) }, { UINT16_C( 5158), UINT16_C(47551), UINT16_C(44434), UINT16_C( 2657) } }, { { UINT16_C(63708), UINT16_C(59868), UINT16_C(20285), UINT16_C( 8523) }, { UINT16_C(53791), UINT16_C( 5984), UINT16_C(31546), UINT16_C(11367) }, { UINT16_C(24642), UINT16_C(27967), UINT16_C(62645), UINT16_C(52262) }, { UINT16_C(11486), UINT16_C(18748), UINT16_C(59195), UINT16_C(30209) } }, { { UINT16_C(60719), UINT16_C(57229), UINT16_C( 758), UINT16_C(53806) }, { UINT16_C( 2811), UINT16_C(14523), UINT16_C( 1625), UINT16_C(31066) }, { UINT16_C(47832), UINT16_C( 4752), UINT16_C(63285), UINT16_C(30526) }, { UINT16_C(19303), UINT16_C(53341), UINT16_C(54153), UINT16_C(39010) } }, { { UINT16_C(32343), UINT16_C( 3557), UINT16_C( 2930), UINT16_C(41689) }, { UINT16_C(26360), UINT16_C(61057), UINT16_C(44905), UINT16_C(25793) }, { UINT16_C(31930), UINT16_C( 5020), UINT16_C(63107), UINT16_C(23436) }, { UINT16_C(36391), UINT16_C( 9289), UINT16_C(25271), UINT16_C(61005) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t c = simde_vld1_u16(test_vec[i].c); simde_uint16x4_t r = simde_vmls_u16(a, b, c); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t c = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t r = simde_vmls_u16(a, b, c); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmls_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint32_t b[2]; uint32_t c[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C(3865975985), UINT32_C(1801366548) }, { UINT32_C(2641904426), UINT32_C(1178554958) }, { UINT32_C( 557105336), UINT32_C( 696644975) }, { UINT32_C(1997952641), UINT32_C( 463427138) } }, { { UINT32_C(4114424434), UINT32_C(3377514776) }, { UINT32_C(4205887206), UINT32_C(2506428011) }, { UINT32_C(2670911057), UINT32_C(3907350832) }, { UINT32_C( 987318188), UINT32_C(1296685576) } }, { { UINT32_C(2701793585), UINT32_C(2177535759) }, { UINT32_C(3396732849), UINT32_C(3079915217) }, { UINT32_C(4038148997), UINT32_C(2726630993) }, { UINT32_C(1280558908), UINT32_C( 744337646) } }, { { UINT32_C( 608286964), UINT32_C(1527588393) }, { UINT32_C(1341921088), UINT32_C(1490077350) }, { UINT32_C(2669823694), UINT32_C(2438378764) }, { UINT32_C( 42697588), UINT32_C(3540809825) } }, { { UINT32_C(1249970169), UINT32_C( 317523741) }, { UINT32_C(3895865023), UINT32_C(2504213333) }, { UINT32_C( 31735642), UINT32_C(3562648582) }, { UINT32_C(4040660947), UINT32_C(2071244575) } }, { { UINT32_C( 108231674), UINT32_C( 697878832) }, { UINT32_C(3983808976), UINT32_C(3758055712) }, { UINT32_C(3838326415), UINT32_C(3564702585) }, { UINT32_C(2057777354), UINT32_C(3902243088) } }, { { UINT32_C(1372937547), UINT32_C( 186986001) }, { UINT32_C(3641874601), UINT32_C( 822323809) }, { UINT32_C(3827201987), UINT32_C(1740840664) }, { UINT32_C(3957753232), UINT32_C(1932725817) } }, { { UINT32_C(3444345684), UINT32_C(3802252695) }, { UINT32_C( 875787811), UINT32_C(1296062628) }, { UINT32_C(1361531632), UINT32_C(3212978940) }, { UINT32_C(1323406468), UINT32_C(3916930087) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t c = simde_vld1_u32(test_vec[i].c); simde_uint32x2_t r = simde_vmls_u32(a, b, c); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t c = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t r = simde_vmls_u32(a, b, c); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlsq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; simde_float32 b[4]; simde_float32 c[4]; simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 596.92), SIMDE_FLOAT32_C( -413.27), SIMDE_FLOAT32_C( 717.20), SIMDE_FLOAT32_C( -915.06) }, { SIMDE_FLOAT32_C( -486.72), SIMDE_FLOAT32_C( 408.94), SIMDE_FLOAT32_C( -871.24), SIMDE_FLOAT32_C( 518.77) }, { SIMDE_FLOAT32_C( 984.39), SIMDE_FLOAT32_C( -416.38), SIMDE_FLOAT32_C( 953.84), SIMDE_FLOAT32_C( -338.43) }, { SIMDE_FLOAT32_C(479719.22), SIMDE_FLOAT32_C(169861.17), SIMDE_FLOAT32_C(831740.75), SIMDE_FLOAT32_C(174652.27) } }, { { SIMDE_FLOAT32_C( -409.52), SIMDE_FLOAT32_C( -800.89), SIMDE_FLOAT32_C( 109.19), SIMDE_FLOAT32_C( 12.08) }, { SIMDE_FLOAT32_C( 528.53), SIMDE_FLOAT32_C( -884.21), SIMDE_FLOAT32_C( -913.46), SIMDE_FLOAT32_C( 551.48) }, { SIMDE_FLOAT32_C( -761.40), SIMDE_FLOAT32_C( -258.52), SIMDE_FLOAT32_C( 301.97), SIMDE_FLOAT32_C( -198.07) }, { SIMDE_FLOAT32_C(402013.25), SIMDE_FLOAT32_C(-229386.86), SIMDE_FLOAT32_C(275946.72), SIMDE_FLOAT32_C(109243.72) } }, { { SIMDE_FLOAT32_C( -503.83), SIMDE_FLOAT32_C( 16.75), SIMDE_FLOAT32_C( -286.53), SIMDE_FLOAT32_C( -6.86) }, { SIMDE_FLOAT32_C( 487.42), SIMDE_FLOAT32_C( -978.12), SIMDE_FLOAT32_C( 184.61), SIMDE_FLOAT32_C( 84.33) }, { SIMDE_FLOAT32_C( -391.38), SIMDE_FLOAT32_C( -98.19), SIMDE_FLOAT32_C( 169.27), SIMDE_FLOAT32_C( 121.90) }, { SIMDE_FLOAT32_C(190262.62), SIMDE_FLOAT32_C(-96024.85), SIMDE_FLOAT32_C(-31535.46), SIMDE_FLOAT32_C(-10286.69) } }, { { SIMDE_FLOAT32_C( -689.25), SIMDE_FLOAT32_C( 298.04), SIMDE_FLOAT32_C( -359.33), SIMDE_FLOAT32_C( -704.86) }, { SIMDE_FLOAT32_C( 881.65), SIMDE_FLOAT32_C( -405.49), SIMDE_FLOAT32_C( -43.29), SIMDE_FLOAT32_C( -527.87) }, { SIMDE_FLOAT32_C( -206.39), SIMDE_FLOAT32_C( -934.10), SIMDE_FLOAT32_C( 484.22), SIMDE_FLOAT32_C( -677.86) }, { SIMDE_FLOAT32_C(181274.50), SIMDE_FLOAT32_C(-378470.16), SIMDE_FLOAT32_C( 20602.55), SIMDE_FLOAT32_C(-358526.81) } }, { { SIMDE_FLOAT32_C( -818.31), SIMDE_FLOAT32_C( 570.75), SIMDE_FLOAT32_C( 873.62), SIMDE_FLOAT32_C( -579.71) }, { SIMDE_FLOAT32_C( -687.77), SIMDE_FLOAT32_C( 175.59), SIMDE_FLOAT32_C( 222.23), SIMDE_FLOAT32_C( -191.61) }, { SIMDE_FLOAT32_C( -807.66), SIMDE_FLOAT32_C( 935.69), SIMDE_FLOAT32_C( 801.53), SIMDE_FLOAT32_C( 679.76) }, { SIMDE_FLOAT32_C(-556302.62), SIMDE_FLOAT32_C(-163727.05), SIMDE_FLOAT32_C(-177250.39), SIMDE_FLOAT32_C(129669.10) } }, { { SIMDE_FLOAT32_C( 957.58), SIMDE_FLOAT32_C( -13.85), SIMDE_FLOAT32_C( -235.91), SIMDE_FLOAT32_C( -433.81) }, { SIMDE_FLOAT32_C( 887.96), SIMDE_FLOAT32_C( 933.36), SIMDE_FLOAT32_C( 688.09), SIMDE_FLOAT32_C( -801.30) }, { SIMDE_FLOAT32_C( 231.40), SIMDE_FLOAT32_C( -671.24), SIMDE_FLOAT32_C( -506.16), SIMDE_FLOAT32_C( 113.05) }, { SIMDE_FLOAT32_C(-204516.36), SIMDE_FLOAT32_C(626494.69), SIMDE_FLOAT32_C(348047.75), SIMDE_FLOAT32_C( 90153.16) } }, { { SIMDE_FLOAT32_C( -76.73), SIMDE_FLOAT32_C( 450.55), SIMDE_FLOAT32_C( 585.19), SIMDE_FLOAT32_C( 716.88) }, { SIMDE_FLOAT32_C( 516.45), SIMDE_FLOAT32_C( 69.40), SIMDE_FLOAT32_C( -960.98), SIMDE_FLOAT32_C( 698.14) }, { SIMDE_FLOAT32_C( -359.85), SIMDE_FLOAT32_C( 912.64), SIMDE_FLOAT32_C( -881.57), SIMDE_FLOAT32_C( -47.62) }, { SIMDE_FLOAT32_C(185767.81), SIMDE_FLOAT32_C(-62886.67), SIMDE_FLOAT32_C(-846585.94), SIMDE_FLOAT32_C( 33962.30) } }, { { SIMDE_FLOAT32_C( 88.23), SIMDE_FLOAT32_C( 340.65), SIMDE_FLOAT32_C( 760.78), SIMDE_FLOAT32_C( 280.57) }, { SIMDE_FLOAT32_C( 276.34), SIMDE_FLOAT32_C( 562.31), SIMDE_FLOAT32_C( -39.67), SIMDE_FLOAT32_C( 233.92) }, { SIMDE_FLOAT32_C( -451.55), SIMDE_FLOAT32_C( 724.42), SIMDE_FLOAT32_C( 800.11), SIMDE_FLOAT32_C( -563.59) }, { SIMDE_FLOAT32_C(124869.55), SIMDE_FLOAT32_C(-407007.94), SIMDE_FLOAT32_C( 32501.14), SIMDE_FLOAT32_C(132115.55) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t c = simde_vld1q_f32(test_vec[i].c); simde_float32x4_t r = simde_vmlsq_f32(a, b, c); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0, 1000.0); simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0, 1000.0); simde_float32x4_t c = simde_test_arm_neon_random_f32x4(-1000.0, 1000.0); simde_float32x4_t r = simde_vmlsq_f32(a, b, c); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlsq_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[2]; simde_float64 b[2]; simde_float64 c[2]; simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 657.78), SIMDE_FLOAT64_C( 488.21) }, { SIMDE_FLOAT64_C( -364.89), SIMDE_FLOAT64_C( -110.82) }, { SIMDE_FLOAT64_C( 816.97), SIMDE_FLOAT64_C( 128.95) }, { SIMDE_FLOAT64_C(298761.96), SIMDE_FLOAT64_C( 14778.45) } }, { { SIMDE_FLOAT64_C( -997.76), SIMDE_FLOAT64_C( -259.76) }, { SIMDE_FLOAT64_C( -420.50), SIMDE_FLOAT64_C( 587.42) }, { SIMDE_FLOAT64_C( -542.88), SIMDE_FLOAT64_C( -904.05) }, { SIMDE_FLOAT64_C(-229278.80), SIMDE_FLOAT64_C(530797.29) } }, { { SIMDE_FLOAT64_C( -343.18), SIMDE_FLOAT64_C( -503.86) }, { SIMDE_FLOAT64_C( 794.08), SIMDE_FLOAT64_C( 296.98) }, { SIMDE_FLOAT64_C( -591.22), SIMDE_FLOAT64_C( 912.51) }, { SIMDE_FLOAT64_C(469132.80), SIMDE_FLOAT64_C(-271501.08) } }, { { SIMDE_FLOAT64_C( -750.64), SIMDE_FLOAT64_C( 497.01) }, { SIMDE_FLOAT64_C( 253.16), SIMDE_FLOAT64_C( -989.86) }, { SIMDE_FLOAT64_C( -222.42), SIMDE_FLOAT64_C( -470.49) }, { SIMDE_FLOAT64_C( 55557.21), SIMDE_FLOAT64_C(-465222.22) } }, { { SIMDE_FLOAT64_C( 572.44), SIMDE_FLOAT64_C( 737.91) }, { SIMDE_FLOAT64_C( 763.43), SIMDE_FLOAT64_C( -879.10) }, { SIMDE_FLOAT64_C( 462.33), SIMDE_FLOAT64_C( 563.54) }, { SIMDE_FLOAT64_C(-352384.15), SIMDE_FLOAT64_C(496145.92) } }, { { SIMDE_FLOAT64_C( -442.69), SIMDE_FLOAT64_C( 120.11) }, { SIMDE_FLOAT64_C( 51.75), SIMDE_FLOAT64_C( 192.42) }, { SIMDE_FLOAT64_C( -990.71), SIMDE_FLOAT64_C( -131.28) }, { SIMDE_FLOAT64_C( 50826.55), SIMDE_FLOAT64_C( 25381.01) } }, { { SIMDE_FLOAT64_C( -678.63), SIMDE_FLOAT64_C( -988.47) }, { SIMDE_FLOAT64_C( 608.96), SIMDE_FLOAT64_C( -99.13) }, { SIMDE_FLOAT64_C( 598.95), SIMDE_FLOAT64_C( -933.92) }, { SIMDE_FLOAT64_C(-365415.22), SIMDE_FLOAT64_C(-93567.96) } }, { { SIMDE_FLOAT64_C( -3.18), SIMDE_FLOAT64_C( -744.22) }, { SIMDE_FLOAT64_C( -437.78), SIMDE_FLOAT64_C( -209.09) }, { SIMDE_FLOAT64_C( 552.75), SIMDE_FLOAT64_C( -29.00) }, { SIMDE_FLOAT64_C(241979.71), SIMDE_FLOAT64_C( -6807.83) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_float64x2_t c = simde_vld1q_f64(test_vec[i].c); simde_float64x2_t r = simde_vmlsq_f64(a, b, c); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x2_t a = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); simde_float64x2_t b = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); simde_float64x2_t c = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); simde_float64x2_t r = simde_vmlsq_f64(a, b, c); simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x2(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlsq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t b[16]; int8_t c[16]; int8_t r[16]; } test_vec[] = { { { -INT8_C( 21), -INT8_C( 27), -INT8_C( 75), -INT8_C( 105), INT8_C( 87), INT8_C( 49), INT8_C( 102), -INT8_C( 101), -INT8_C( 5), INT8_C( 77), INT8_C( 55), INT8_C( 70), -INT8_C( 100), -INT8_C( 22), -INT8_C( 102), -INT8_C( 70) }, { INT8_C( 24), -INT8_C( 76), INT8_C( 58), -INT8_C( 75), -INT8_C( 75), -INT8_C( 69), -INT8_C( 42), INT8_C( 15), INT8_C( 103), INT8_C( 23), -INT8_C( 97), -INT8_C( 19), INT8_C( 60), -INT8_C( 12), INT8_C( 29), INT8_C( 39) }, { -INT8_C( 39), -INT8_C( 46), -INT8_C( 65), INT8_C( 48), INT8_C( 3), INT8_C( 37), -INT8_C( 52), -INT8_C( 2), INT8_C( 115), INT8_C( 3), INT8_C( 68), INT8_C( 15), -INT8_C( 19), -INT8_C( 34), -INT8_C( 55), INT8_C( 5) }, { -INT8_C( 109), INT8_C( 61), INT8_C( 111), -INT8_C( 89), INT8_C( 56), INT8_C( 42), -INT8_C( 34), -INT8_C( 71), -INT8_C( 74), INT8_C( 8), -INT8_C( 5), INT8_C( 99), INT8_C( 16), INT8_C( 82), -INT8_C( 43), -INT8_C( 9) } }, { { -INT8_C( 109), INT8_C( 3), -INT8_C( 69), INT8_C( 72), -INT8_C( 65), -INT8_C( 111), INT8_C( 87), INT8_C( 38), -INT8_C( 87), -INT8_C( 10), INT8_C( 20), -INT8_C( 27), -INT8_C( 22), INT8_C( 49), INT8_C( 13), -INT8_C( 61) }, { INT8_C( 4), -INT8_C( 52), -INT8_C( 13), INT8_C( 7), -INT8_C( 15), -INT8_C( 65), INT8_C( 6), INT8_C( 100), -INT8_C( 62), INT8_C( 74), INT8_C( 115), -INT8_C( 80), INT8_C( 41), INT8_C( 60), -INT8_C( 75), -INT8_C( 68) }, { INT8_C( 64), INT8_C( 112), INT8_C( 4), -INT8_C( 1), INT8_C( 2), INT8_C( 91), INT8_C( 37), -INT8_C( 85), INT8_C( 81), INT8_C( 57), -INT8_C( 112), INT8_C( 59), INT8_C( 107), -INT8_C( 99), -INT8_C( 2), INT8_C( 111) }, { -INT8_C( 109), -INT8_C( 61), -INT8_C( 17), INT8_C( 79), -INT8_C( 35), -INT8_C( 84), INT8_C( 121), INT8_C( 90), INT8_C( 71), INT8_C( 124), INT8_C( 100), INT8_C( 85), -INT8_C( 57), INT8_C( 101), INT8_C( 119), INT8_C( 63) } }, { { INT8_C( 105), -INT8_C( 15), INT8_C( 118), INT8_C( 91), -INT8_C( 79), INT8_C( 124), -INT8_C( 65), INT8_C( 115), -INT8_C( 57), INT8_C( 51), INT8_C( 35), -INT8_C( 16), INT8_C( 111), -INT8_C( 39), -INT8_C( 84), -INT8_C( 81) }, { INT8_C( 73), -INT8_C( 80), -INT8_C( 82), INT8_C( 75), INT8_C( 11), -INT8_C( 44), -INT8_C( 10), INT8_C( 92), INT8_C( 13), -INT8_C( 121), -INT8_C( 105), INT8_C( 120), INT8_C( 36), -INT8_C( 107), -INT8_C( 25), -INT8_C( 114) }, { -INT8_C( 122), INT8_C( 94), -INT8_C( 23), INT8_C( 55), -INT8_C( 38), -INT8_C( 88), -INT8_C( 85), -INT8_C( 95), -INT8_C( 37), -INT8_C( 50), -INT8_C( 111), INT8_C( 75), -INT8_C( 89), INT8_C( 61), -INT8_C( 6), -INT8_C( 15) }, { INT8_C( 51), INT8_C( 81), INT8_C( 24), INT8_C( 62), INT8_C( 83), INT8_C( 92), INT8_C( 109), -INT8_C( 105), -INT8_C( 88), -INT8_C( 111), -INT8_C( 100), -INT8_C( 56), -INT8_C( 13), INT8_C( 88), INT8_C( 22), INT8_C( 1) } }, { { -INT8_C( 19), -INT8_C( 87), INT8_C( 60), -INT8_C( 8), INT8_C( 125), INT8_C( 51), INT8_C( 84), -INT8_C( 118), -INT8_C( 70), -INT8_C( 21), INT8_C( 3), -INT8_C( 34), INT8_MIN, -INT8_C( 22), INT8_C( 108), INT8_C( 7) }, { INT8_C( 72), INT8_C( 85), INT8_C( 62), INT8_C( 35), -INT8_C( 2), -INT8_C( 23), -INT8_C( 60), -INT8_C( 39), -INT8_C( 72), INT8_C( 86), INT8_C( 36), INT8_C( 95), -INT8_C( 109), INT8_C( 31), INT8_C( 80), -INT8_C( 127) }, { -INT8_C( 56), -INT8_C( 115), INT8_C( 121), INT8_C( 69), -INT8_C( 64), -INT8_C( 50), -INT8_C( 49), INT8_C( 122), -INT8_C( 71), -INT8_C( 46), INT8_C( 88), INT8_C( 58), -INT8_C( 67), -INT8_C( 59), INT8_C( 65), INT8_C( 5) }, { -INT8_C( 83), -INT8_C( 40), -INT8_C( 18), -INT8_C( 119), -INT8_C( 3), -INT8_C( 75), -INT8_C( 40), INT8_C( 32), -INT8_C( 62), INT8_C( 95), -INT8_C( 93), INT8_C( 88), -INT8_C( 7), INT8_C( 15), INT8_C( 28), -INT8_C( 126) } }, { { INT8_C( 26), INT8_MAX, INT8_C( 40), INT8_C( 24), INT8_C( 105), -INT8_C( 19), -INT8_C( 14), INT8_C( 33), INT8_C( 67), INT8_C( 22), INT8_MIN, -INT8_C( 42), INT8_C( 53), -INT8_C( 47), INT8_C( 87), -INT8_C( 3) }, { INT8_C( 94), -INT8_C( 47), INT8_C( 66), INT8_C( 30), -INT8_C( 97), INT8_C( 18), -INT8_C( 104), INT8_C( 88), -INT8_C( 28), -INT8_C( 16), -INT8_C( 110), -INT8_C( 95), -INT8_C( 75), -INT8_C( 45), -INT8_C( 89), -INT8_C( 48) }, { INT8_C( 83), -INT8_C( 49), -INT8_C( 24), -INT8_C( 68), -INT8_C( 68), -INT8_C( 38), -INT8_C( 35), -INT8_C( 1), -INT8_C( 15), INT8_C( 93), -INT8_C( 42), INT8_C( 38), INT8_C( 46), INT8_C( 45), INT8_C( 36), -INT8_C( 116) }, { -INT8_C( 96), INT8_MIN, INT8_C( 88), INT8_C( 16), -INT8_C( 91), -INT8_C( 103), -INT8_C( 70), INT8_C( 121), -INT8_C( 97), -INT8_C( 26), INT8_C( 116), -INT8_C( 16), -INT8_C( 81), -INT8_C( 70), -INT8_C( 37), INT8_C( 61) } }, { { -INT8_C( 2), INT8_C( 102), -INT8_C( 86), -INT8_C( 99), INT8_C( 120), INT8_C( 66), -INT8_C( 10), INT8_C( 93), INT8_C( 51), -INT8_C( 120), -INT8_C( 2), -INT8_C( 24), INT8_C( 92), -INT8_C( 91), -INT8_C( 72), -INT8_C( 81) }, { INT8_C( 117), -INT8_C( 95), INT8_C( 107), INT8_C( 49), INT8_C( 123), INT8_C( 72), INT8_C( 49), INT8_C( 108), -INT8_C( 91), INT8_C( 7), -INT8_C( 109), -INT8_C( 44), INT8_C( 52), -INT8_C( 73), INT8_C( 96), INT8_C( 51) }, { INT8_C( 29), INT8_C( 11), -INT8_C( 48), -INT8_C( 106), INT8_C( 77), -INT8_C( 58), -INT8_C( 13), INT8_MIN, INT8_C( 79), -INT8_C( 15), INT8_C( 105), -INT8_C( 85), -INT8_C( 105), INT8_C( 33), INT8_C( 90), INT8_C( 12) }, { -INT8_C( 67), INT8_C( 123), -INT8_C( 70), -INT8_C( 25), INT8_C( 121), -INT8_C( 110), INT8_C( 115), INT8_C( 93), INT8_C( 72), -INT8_C( 15), -INT8_C( 77), INT8_C( 76), -INT8_C( 80), INT8_C( 14), -INT8_C( 8), INT8_C( 75) } }, { { -INT8_C( 62), -INT8_C( 59), INT8_C( 61), INT8_C( 62), INT8_C( 13), INT8_C( 110), -INT8_C( 86), -INT8_C( 78), INT8_C( 117), INT8_C( 61), -INT8_C( 122), -INT8_C( 86), -INT8_C( 12), -INT8_C( 25), -INT8_C( 35), INT8_C( 18) }, { -INT8_C( 14), -INT8_C( 83), -INT8_C( 88), INT8_C( 63), INT8_C( 116), -INT8_C( 101), -INT8_C( 64), -INT8_C( 61), -INT8_C( 116), INT8_C( 41), INT8_C( 110), INT8_C( 35), INT8_C( 74), -INT8_C( 56), INT8_C( 47), INT8_C( 13) }, { -INT8_C( 115), INT8_C( 109), INT8_C( 75), -INT8_C( 102), -INT8_C( 37), -INT8_C( 11), INT8_C( 76), INT8_C( 81), INT8_C( 51), -INT8_C( 45), -INT8_C( 5), INT8_C( 39), -INT8_C( 70), -INT8_C( 40), INT8_C( 57), -INT8_C( 84) }, { INT8_C( 120), INT8_C( 28), INT8_C( 5), INT8_C( 88), -INT8_C( 47), INT8_C( 23), -INT8_C( 86), -INT8_C( 1), -INT8_C( 111), INT8_C( 114), -INT8_C( 84), INT8_C( 85), INT8_C( 48), INT8_C( 39), INT8_C( 102), INT8_C( 86) } }, { { -INT8_C( 123), -INT8_C( 31), -INT8_C( 21), -INT8_C( 7), INT8_C( 124), -INT8_C( 85), -INT8_C( 68), INT8_C( 9), -INT8_C( 44), INT8_C( 42), INT8_C( 44), INT8_C( 31), -INT8_C( 14), INT8_C( 92), INT8_C( 44), INT8_MAX }, { -INT8_C( 55), INT8_C( 119), INT8_C( 25), -INT8_C( 92), INT8_C( 108), INT8_C( 102), -INT8_C( 11), -INT8_C( 97), INT8_C( 57), -INT8_C( 16), -INT8_C( 57), -INT8_C( 13), -INT8_C( 56), INT8_C( 0), -INT8_C( 97), INT8_C( 78) }, { -INT8_C( 30), -INT8_C( 118), INT8_C( 71), INT8_C( 94), INT8_C( 54), INT8_C( 4), INT8_C( 103), INT8_C( 10), INT8_C( 46), -INT8_C( 108), INT8_C( 41), INT8_C( 33), -INT8_C( 16), INT8_C( 85), -INT8_C( 96), -INT8_C( 71) }, { INT8_C( 19), -INT8_C( 69), -INT8_C( 4), -INT8_C( 63), -INT8_C( 76), INT8_C( 19), INT8_C( 41), -INT8_C( 45), -INT8_C( 106), INT8_C( 106), INT8_C( 77), -INT8_C( 52), INT8_C( 114), INT8_C( 92), -INT8_C( 52), INT8_C( 33) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t c = simde_vld1q_s8(test_vec[i].c); simde_int8x16_t r = simde_vmlsq_s8(a, b, c); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); simde_int8x16_t c = simde_test_arm_neon_random_i8x16(); simde_int8x16_t r = simde_vmlsq_s8(a, b, c); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlsq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t b[8]; int16_t c[8]; int16_t r[8]; } test_vec[] = { { { -INT16_C( 17716), INT16_C( 14685), INT16_C( 21280), INT16_C( 23000), -INT16_C( 24765), INT16_C( 3148), -INT16_C( 5216), -INT16_C( 32166) }, { -INT16_C( 24203), -INT16_C( 21536), INT16_C( 18597), -INT16_C( 11082), -INT16_C( 8228), -INT16_C( 13067), -INT16_C( 27339), INT16_C( 389) }, { -INT16_C( 7601), INT16_C( 28474), INT16_C( 4917), INT16_C( 31176), INT16_C( 5298), INT16_C( 21125), -INT16_C( 8193), INT16_C( 30164) }, { -INT16_C( 25167), INT16_C( 10397), INT16_C( 2551), INT16_C( 9640), -INT16_C( 14261), INT16_C( 5891), INT16_C( 8405), INT16_C( 30518) } }, { { -INT16_C( 19072), INT16_C( 9760), -INT16_C( 10499), -INT16_C( 9734), -INT16_C( 4170), -INT16_C( 5211), INT16_C( 10884), -INT16_C( 11028) }, { INT16_C( 9996), INT16_C( 16963), INT16_C( 3130), -INT16_C( 4933), INT16_C( 16416), INT16_C( 8255), INT16_C( 4895), -INT16_C( 24683) }, { -INT16_C( 19000), -INT16_C( 14907), -INT16_C( 16500), INT16_C( 17054), INT16_C( 17326), INT16_C( 13101), INT16_C( 6509), INT16_C( 31239) }, { -INT16_C( 18400), -INT16_C( 26223), -INT16_C( 7867), -INT16_C( 30576), -INT16_C( 1546), -INT16_C( 19566), -INT16_C( 175), INT16_C( 30169) } }, { { INT16_C( 19008), INT16_C( 31420), INT16_C( 30550), INT16_C( 30567), -INT16_C( 22857), -INT16_C( 10601), INT16_C( 11449), -INT16_C( 32139) }, { INT16_C( 15329), INT16_C( 27975), -INT16_C( 6406), -INT16_C( 22097), -INT16_C( 9175), -INT16_C( 26660), -INT16_C( 7178), INT16_C( 13841) }, { -INT16_C( 13011), -INT16_C( 31567), INT16_C( 6212), -INT16_C( 1029), -INT16_C( 27970), INT16_C( 30673), INT16_C( 18110), -INT16_C( 24583) }, { -INT16_C( 26957), INT16_C( 20645), -INT16_C( 21266), -INT16_C( 31790), -INT16_C( 8631), -INT16_C( 26629), -INT16_C( 18395), INT16_C( 23788) } }, { { INT16_C( 16769), INT16_C( 31757), -INT16_C( 17369), INT16_C( 20517), INT16_C( 409), -INT16_C( 28697), -INT16_C( 1820), INT16_C( 4549) }, { INT16_C( 30405), INT16_C( 2453), -INT16_C( 28530), INT16_C( 19460), -INT16_C( 10974), -INT16_C( 7996), -INT16_C( 17124), -INT16_C( 25216) }, { -INT16_C( 29186), INT16_C( 9497), INT16_C( 15945), -INT16_C( 7562), INT16_C( 23871), INT16_C( 9073), INT16_C( 14166), INT16_C( 6965) }, { -INT16_C( 5877), INT16_C( 896), INT16_C( 8105), -INT16_C( 16819), INT16_C( 13371), -INT16_C( 29341), INT16_C( 28028), -INT16_C( 2491) } }, { { -INT16_C( 13651), INT16_C( 15397), INT16_C( 10587), INT16_C( 32136), INT16_C( 19711), INT16_C( 7006), -INT16_C( 8694), INT16_C( 2232) }, { -INT16_C( 11669), -INT16_C( 19410), -INT16_C( 23536), INT16_C( 20631), INT16_C( 2049), INT16_C( 22387), -INT16_C( 22465), -INT16_C( 4749) }, { -INT16_C( 26509), -INT16_C( 12759), -INT16_C( 20031), -INT16_C( 16309), -INT16_C( 22018), INT16_C( 2267), -INT16_C( 27513), -INT16_C( 3568) }, { -INT16_C( 17252), INT16_C( 23751), INT16_C( 26955), -INT16_C( 24245), -INT16_C( 19711), -INT16_C( 19459), -INT16_C( 18223), INT16_C( 31624) } }, { { INT16_C( 15974), INT16_C( 30375), INT16_C( 16098), -INT16_C( 6970), INT16_C( 14918), -INT16_C( 31173), -INT16_C( 20766), INT16_C( 21875) }, { -INT16_C( 25530), INT16_C( 2083), INT16_C( 28493), INT16_C( 19400), -INT16_C( 23528), -INT16_C( 24493), INT16_C( 25656), -INT16_C( 24942) }, { INT16_C( 14754), -INT16_C( 31468), -INT16_C( 9353), -INT16_C( 16791), -INT16_C( 23531), -INT16_C( 2236), -INT16_C( 18605), -INT16_C( 26291) }, { -INT16_C( 15334), -INT16_C( 23317), -INT16_C( 23785), INT16_C( 24510), INT16_C( 25678), -INT16_C( 9425), INT16_C( 10426), INT16_C( 24969) } }, { { INT16_C( 28755), -INT16_C( 24415), INT16_C( 27359), -INT16_C( 1812), INT16_C( 16142), INT16_C( 18072), INT16_C( 10915), INT16_C( 18148) }, { -INT16_C( 1948), -INT16_C( 9269), INT16_C( 13523), -INT16_C( 5991), -INT16_C( 8744), INT16_C( 11232), INT16_C( 11668), -INT16_C( 6203) }, { INT16_C( 26269), INT16_C( 32136), INT16_C( 29904), -INT16_C( 8587), INT16_C( 3507), INT16_C( 22308), INT16_C( 2103), -INT16_C( 25699) }, { INT16_C( 17151), -INT16_C( 16951), -INT16_C( 7313), -INT16_C( 769), INT16_C( 10502), -INT16_C( 1256), -INT16_C( 16425), -INT16_C( 9197) } }, { { INT16_C( 26625), -INT16_C( 11145), INT16_C( 4252), INT16_C( 29885), -INT16_C( 25106), -INT16_C( 32096), INT16_C( 26058), INT16_C( 26474) }, { -INT16_C( 3381), -INT16_C( 25372), INT16_C( 22886), INT16_C( 6522), -INT16_C( 24730), -INT16_C( 24976), INT16_C( 3495), -INT16_C( 22471) }, { -INT16_C( 20363), INT16_C( 4477), INT16_C( 15041), -INT16_C( 20602), INT16_C( 9943), -INT16_C( 24271), -INT16_C( 25717), INT16_C( 22024) }, { -INT16_C( 7878), INT16_C( 5411), -INT16_C( 29002), -INT16_C( 18207), -INT16_C( 25788), -INT16_C( 16592), -INT16_C( 8419), -INT16_C( 94) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t c = simde_vld1q_s16(test_vec[i].c); simde_int16x8_t r = simde_vmlsq_s16(a, b, c); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); simde_int16x8_t c = simde_test_arm_neon_random_i16x8(); simde_int16x8_t r = simde_vmlsq_s16(a, b, c); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlsq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t b[4]; int32_t c[4]; int32_t r[4]; } test_vec[] = { { { -INT32_C( 202183283), -INT32_C( 1391628986), -INT32_C( 1286898420), INT32_C( 6063243) }, { -INT32_C( 166536907), -INT32_C( 358246381), INT32_C( 1233901246), -INT32_C( 6319246) }, { -INT32_C( 940338560), INT32_C( 192151807), INT32_C( 146718589), INT32_C( 2013862467) }, { -INT32_C( 900261619), INT32_C( 947843673), INT32_C( 600301638), INT32_C( 1167940021) } }, { { INT32_C( 107879411), INT32_C( 1911559091), INT32_C( 1555725290), -INT32_C( 1889838833) }, { -INT32_C( 363442453), -INT32_C( 856307122), -INT32_C( 841698167), -INT32_C( 1035608626) }, { -INT32_C( 1412909832), -INT32_C( 1323517497), INT32_C( 1124980276), INT32_C( 466839856) }, { -INT32_C( 1296274357), INT32_C( 1898279697), -INT32_C( 1197061610), -INT32_C( 276937617) } }, { { INT32_C( 101067191), INT32_C( 2110979060), INT32_C( 2118821551), INT32_C( 2084606084) }, { INT32_C( 187172932), -INT32_C( 155368255), INT32_C( 1262078491), -INT32_C( 345633741) }, { INT32_C( 720464950), INT32_C( 396870504), -INT32_C( 308940183), -INT32_C( 966077054) }, { -INT32_C( 669795489), -INT32_C( 458683764), INT32_C( 1641167772), -INT32_C( 1960185954) } }, { { -INT32_C( 1613655330), -INT32_C( 241856810), -INT32_C( 1942171815), INT32_C( 293053403) }, { INT32_C( 2000381967), -INT32_C( 1785732309), INT32_C( 1468146901), -INT32_C( 669127430) }, { INT32_C( 1433923198), -INT32_C( 700052099), -INT32_C( 1218280484), INT32_C( 902421030) }, { INT32_C( 1541237116), -INT32_C( 1945603113), INT32_C( 1661086029), -INT32_C( 1360031041) } }, { { INT32_C( 1856832578), -INT32_C( 1140638489), INT32_C( 1511228768), -INT32_C( 265146254) }, { -INT32_C( 1673156065), -INT32_C( 1821209417), INT32_C( 894096399), -INT32_C( 244706386) }, { -INT32_C( 10545384), -INT32_C( 1262722477), INT32_C( 1494142951), INT32_C( 524960000) }, { -INT32_C( 2091169702), INT32_C( 295538578), -INT32_C( 230421801), INT32_C( 1055326834) } }, { { -INT32_C( 1564766229), INT32_C( 708128027), -INT32_C( 1335918847), -INT32_C( 1415460205) }, { INT32_C( 900333793), INT32_C( 1240032866), INT32_C( 916715318), INT32_C( 592833848) }, { -INT32_C( 1731915652), INT32_C( 1052965437), INT32_C( 233710202), -INT32_C( 843542548) }, { -INT32_C( 1087368465), INT32_C( 1297617857), INT32_C( 1295156037), -INT32_C( 1062677773) } }, { { -INT32_C( 251501937), -INT32_C( 12850231), INT32_C( 456515299), INT32_C( 1195346635) }, { -INT32_C( 673250150), INT32_C( 2031461119), -INT32_C( 1333394236), INT32_C( 595410579) }, { INT32_C( 1762951328), INT32_C( 1315458923), -INT32_C( 127230675), -INT32_C( 1052727001) }, { -INT32_C( 45448625), -INT32_C( 867427788), INT32_C( 160617583), -INT32_C( 1189349530) } }, { { -INT32_C( 1399316563), -INT32_C( 2061127999), INT32_C( 1161145266), -INT32_C( 1972849687) }, { -INT32_C( 1628209869), -INT32_C( 85107508), INT32_C( 569530361), -INT32_C( 1377684992) }, { INT32_C( 324696914), -INT32_C( 610697431), INT32_C( 337694251), -INT32_C( 1264678527) }, { -INT32_C( 125533097), -INT32_C( 1708351979), INT32_C( 462849759), INT32_C( 44794345) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t c = simde_vld1q_s32(test_vec[i].c); simde_int32x4_t r = simde_vmlsq_s32(a, b, c); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); simde_int32x4_t c = simde_test_arm_neon_random_i32x4(); simde_int32x4_t r = simde_vmlsq_s32(a, b, c); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlsq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; uint8_t b[16]; uint8_t c[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C( 6), UINT8_C(146), UINT8_C( 83), UINT8_C(210), UINT8_C(238), UINT8_C( 64), UINT8_C(204), UINT8_C(231), UINT8_C(151), UINT8_C(191), UINT8_C( 8), UINT8_C(151), UINT8_C(241), UINT8_C(235), UINT8_C( 68), UINT8_C( 67) }, { UINT8_C(102), UINT8_C(158), UINT8_C( 87), UINT8_C(143), UINT8_C( 30), UINT8_C(240), UINT8_C(106), UINT8_C( 73), UINT8_C(190), UINT8_C(138), UINT8_C( 93), UINT8_C( 64), UINT8_C( 19), UINT8_C(252), UINT8_C(244), UINT8_C( 25) }, { UINT8_C(142), UINT8_C( 71), UINT8_C(236), UINT8_C(124), UINT8_C(135), UINT8_C(184), UINT8_C( 99), UINT8_C( 30), UINT8_C(119), UINT8_C(108), UINT8_C(181), UINT8_C(105), UINT8_C( 87), UINT8_C(250), UINT8_C(172), UINT8_C(189) }, { UINT8_C(114), UINT8_C(192), UINT8_C( 31), UINT8_C(142), UINT8_C( 28), UINT8_C(192), UINT8_C(206), UINT8_C( 89), UINT8_C( 69), UINT8_C(135), UINT8_C( 71), UINT8_C( 87), UINT8_C(124), UINT8_C(211), UINT8_C( 84), UINT8_C(206) } }, { { UINT8_C(152), UINT8_C( 3), UINT8_C( 76), UINT8_C(182), UINT8_C(243), UINT8_C(182), UINT8_MAX, UINT8_C(178), UINT8_C( 64), UINT8_C( 93), UINT8_C(242), UINT8_C( 84), UINT8_C( 89), UINT8_C(230), UINT8_C(109), UINT8_C(231) }, { UINT8_C( 46), UINT8_C( 89), UINT8_C( 99), UINT8_C(181), UINT8_C( 18), UINT8_C(198), UINT8_C(212), UINT8_C(137), UINT8_C( 50), UINT8_C(137), UINT8_C(242), UINT8_C(137), UINT8_C(131), UINT8_C(159), UINT8_C( 70), UINT8_C( 28) }, { UINT8_C(162), UINT8_C(146), UINT8_C(210), UINT8_C(150), UINT8_C( 72), UINT8_C(210), UINT8_C( 72), UINT8_C(137), UINT8_C( 47), UINT8_C( 58), UINT8_C(221), UINT8_C(136), UINT8_C( 32), UINT8_C( 74), UINT8_C(111), UINT8_C( 78) }, { UINT8_C(124), UINT8_C( 65), UINT8_C( 22), UINT8_C(168), UINT8_C(227), UINT8_C( 74), UINT8_C( 95), UINT8_C( 97), UINT8_C( 18), UINT8_C( 83), UINT8_C( 8), UINT8_C(140), UINT8_C(249), UINT8_C(240), UINT8_C( 19), UINT8_C( 95) } }, { { UINT8_C(164), UINT8_C(210), UINT8_C( 4), UINT8_C(182), UINT8_C(152), UINT8_C(216), UINT8_C( 63), UINT8_C(203), UINT8_C( 97), UINT8_C( 50), UINT8_C( 84), UINT8_C(229), UINT8_C(209), UINT8_C(155), UINT8_C( 1), UINT8_C(115) }, { UINT8_C( 45), UINT8_C(211), UINT8_C( 9), UINT8_C(118), UINT8_C(165), UINT8_C( 81), UINT8_MAX, UINT8_C(212), UINT8_C(139), UINT8_C(220), UINT8_C( 92), UINT8_C(172), UINT8_C( 38), UINT8_C(203), UINT8_C(250), UINT8_C(202) }, { UINT8_C(157), UINT8_C(254), UINT8_C(128), UINT8_C( 54), UINT8_C(214), UINT8_C(192), UINT8_C( 1), UINT8_C( 56), UINT8_C(242), UINT8_C( 85), UINT8_C( 29), UINT8_C(195), UINT8_C(240), UINT8_C( 30), UINT8_C( 54), UINT8_C( 30) }, { UINT8_C( 11), UINT8_C(120), UINT8_C(132), UINT8_C(210), UINT8_C(170), UINT8_C( 24), UINT8_C( 64), UINT8_C(107), UINT8_C(251), UINT8_C( 38), UINT8_C(232), UINT8_C(225), UINT8_C( 49), UINT8_C(209), UINT8_C( 69), UINT8_C(199) } }, { { UINT8_C(241), UINT8_C( 64), UINT8_C(148), UINT8_C(151), UINT8_C(145), UINT8_C(147), UINT8_C(107), UINT8_C( 29), UINT8_C(111), UINT8_C(200), UINT8_C(201), UINT8_C(149), UINT8_C(147), UINT8_C(195), UINT8_C( 96), UINT8_C( 49) }, { UINT8_C(194), UINT8_C(224), UINT8_C(103), UINT8_C(152), UINT8_C(160), UINT8_C(104), UINT8_C(208), UINT8_C(146), UINT8_C(189), UINT8_C(237), UINT8_C( 85), UINT8_C(174), UINT8_C( 11), UINT8_C(140), UINT8_C(204), UINT8_C(253) }, { UINT8_C(204), UINT8_C( 96), UINT8_C(148), UINT8_C( 93), UINT8_C(243), UINT8_MAX, UINT8_C(122), UINT8_C( 98), UINT8_C(199), UINT8_C( 67), UINT8_C(247), UINT8_C( 91), UINT8_C( 7), UINT8_C( 87), UINT8_C(140), UINT8_C(201) }, { UINT8_C( 89), UINT8_C( 64), UINT8_C( 8), UINT8_C( 95), UINT8_C(177), UINT8_C(251), UINT8_C( 75), UINT8_C( 57), UINT8_C(132), UINT8_C(193), UINT8_C(198), UINT8_C(187), UINT8_C( 70), UINT8_C( 47), UINT8_C(208), UINT8_C(140) } }, { { UINT8_C( 56), UINT8_C(243), UINT8_C( 97), UINT8_C(216), UINT8_C( 91), UINT8_C( 50), UINT8_C(107), UINT8_C( 24), UINT8_C( 31), UINT8_C(192), UINT8_C(198), UINT8_C( 43), UINT8_C( 76), UINT8_C(146), UINT8_C( 40), UINT8_C( 24) }, { UINT8_C(242), UINT8_C(188), UINT8_C(118), UINT8_C(229), UINT8_C(187), UINT8_C(240), UINT8_C( 71), UINT8_C(131), UINT8_C( 52), UINT8_C( 63), UINT8_C(222), UINT8_C( 59), UINT8_C(150), UINT8_C(106), UINT8_C( 4), UINT8_C(206) }, { UINT8_C( 93), UINT8_C(101), UINT8_C(167), UINT8_C(184), UINT8_C(151), UINT8_C( 18), UINT8_C(208), UINT8_C(183), UINT8_C(210), UINT8_C(151), UINT8_C(226), UINT8_C( 31), UINT8_C( 41), UINT8_C( 10), UINT8_C( 55), UINT8_C( 28) }, { UINT8_C( 78), UINT8_C(199), UINT8_C(103), UINT8_C( 64), UINT8_C( 14), UINT8_C( 82), UINT8_C(187), UINT8_C(115), UINT8_C(119), UINT8_C(151), UINT8_C(202), UINT8_C( 6), UINT8_C( 70), UINT8_C(110), UINT8_C( 76), UINT8_C(144) } }, { { UINT8_C(198), UINT8_C(173), UINT8_C( 1), UINT8_C(129), UINT8_C(158), UINT8_C( 73), UINT8_C( 4), UINT8_C(210), UINT8_C(136), UINT8_C(226), UINT8_C( 13), UINT8_C( 30), UINT8_C( 76), UINT8_C( 17), UINT8_C(237), UINT8_C(169) }, { UINT8_C(118), UINT8_C(148), UINT8_C( 97), UINT8_C( 14), UINT8_C(166), UINT8_C( 50), UINT8_C(197), UINT8_C(120), UINT8_C(201), UINT8_C(167), UINT8_C(151), UINT8_C(242), UINT8_C(177), UINT8_C(207), UINT8_C( 14), UINT8_C(119) }, { UINT8_C(124), UINT8_C( 16), UINT8_C(248), UINT8_C( 26), UINT8_C( 89), UINT8_C(253), UINT8_C(236), UINT8_C(225), UINT8_C(223), UINT8_C(249), UINT8_MAX, UINT8_C( 44), UINT8_C( 10), UINT8_C(236), UINT8_C(213), UINT8_C(129) }, { UINT8_C(158), UINT8_C(109), UINT8_C( 9), UINT8_C( 21), UINT8_C(232), UINT8_C(223), UINT8_C(104), UINT8_C( 90), UINT8_C(113), UINT8_C(115), UINT8_C(164), UINT8_C(134), UINT8_C( 98), UINT8_C( 61), UINT8_C( 71), UINT8_C(178) } }, { { UINT8_C(128), UINT8_C( 55), UINT8_C(143), UINT8_C( 38), UINT8_C(105), UINT8_C( 84), UINT8_C(159), UINT8_C( 50), UINT8_C(251), UINT8_C( 54), UINT8_C( 36), UINT8_C(172), UINT8_C( 5), UINT8_C( 51), UINT8_C( 35), UINT8_C(130) }, { UINT8_C( 67), UINT8_C( 27), UINT8_C(156), UINT8_C(156), UINT8_C( 24), UINT8_C(137), UINT8_C(125), UINT8_C(248), UINT8_C(130), UINT8_C(124), UINT8_C( 36), UINT8_C(141), UINT8_C(105), UINT8_C(249), UINT8_C( 14), UINT8_C(233) }, { UINT8_C( 48), UINT8_C(157), UINT8_C( 16), UINT8_C(153), UINT8_C(241), UINT8_C(175), UINT8_C(203), UINT8_C(236), UINT8_C(229), UINT8_C(240), UINT8_C(152), UINT8_C(235), UINT8_C( 35), UINT8_C(187), UINT8_C(109), UINT8_C(102) }, { UINT8_C(240), UINT8_C(168), UINT8_C(207), UINT8_C(234), UINT8_C(209), UINT8_C(173), UINT8_C(128), UINT8_C(146), UINT8_C(177), UINT8_C(246), UINT8_C(196), UINT8_C( 61), UINT8_C(170), UINT8_C( 80), UINT8_C( 45), UINT8_C(172) } }, { { UINT8_C(214), UINT8_C( 9), UINT8_C( 2), UINT8_C(239), UINT8_C(146), UINT8_C(127), UINT8_C(231), UINT8_C( 21), UINT8_C(251), UINT8_C( 11), UINT8_C(162), UINT8_C(100), UINT8_C( 4), UINT8_C(176), UINT8_C( 78), UINT8_C( 53) }, { UINT8_C( 77), UINT8_C( 94), UINT8_C(206), UINT8_C( 62), UINT8_C( 13), UINT8_C(154), UINT8_C( 42), UINT8_C(242), UINT8_C(138), UINT8_C(194), UINT8_C(221), UINT8_C(173), UINT8_C(125), UINT8_C( 74), UINT8_C( 19), UINT8_C( 83) }, { UINT8_C( 84), UINT8_C( 21), UINT8_C( 66), UINT8_C(230), UINT8_C(148), UINT8_C( 41), UINT8_C(251), UINT8_C(143), UINT8_C( 52), UINT8_C(157), UINT8_C(244), UINT8_C( 57), UINT8_C( 77), UINT8_C( 66), UINT8_C(110), UINT8_C(154) }, { UINT8_C(146), UINT8_C( 83), UINT8_C(230), UINT8_C( 59), UINT8_C( 14), UINT8_C(213), UINT8_C(185), UINT8_C(231), UINT8_C(243), UINT8_C( 17), UINT8_C(254), UINT8_C(223), UINT8_C(107), UINT8_C(156), UINT8_C( 36), UINT8_C( 71) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t c = simde_vld1q_u8(test_vec[i].c); simde_uint8x16_t r = simde_vmlsq_u8(a, b, c); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t c = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t r = simde_vmlsq_u8(a, b, c); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlsq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t b[8]; uint16_t c[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(15520), UINT16_C(44504), UINT16_C( 726), UINT16_C(24735), UINT16_C(32196), UINT16_C(16653), UINT16_C( 8391), UINT16_C( 7061) }, { UINT16_C(55093), UINT16_C(51458), UINT16_C(64769), UINT16_C(13657), UINT16_C(19867), UINT16_C(59502), UINT16_C(56463), UINT16_C(12163) }, { UINT16_C(23321), UINT16_C(61404), UINT16_C(31582), UINT16_C( 8784), UINT16_C(24056), UINT16_C(49252), UINT16_C(63870), UINT16_C(46043) }, { UINT16_C(24947), UINT16_C( 4640), UINT16_C(41336), UINT16_C(58063), UINT16_C( 156), UINT16_C(62997), UINT16_C(31589), UINT16_C(56708) } }, { { UINT16_C(56784), UINT16_C(53629), UINT16_C(55003), UINT16_C(30215), UINT16_C(29987), UINT16_C(45662), UINT16_C(57682), UINT16_C(27617) }, { UINT16_C(48445), UINT16_C(39770), UINT16_C(43576), UINT16_C(12733), UINT16_C( 8456), UINT16_C(34545), UINT16_C(52250), UINT16_C(60217) }, { UINT16_C(46762), UINT16_C(34236), UINT16_C(50060), UINT16_C(45051), UINT16_C(22841), UINT16_C(35681), UINT16_C(16955), UINT16_C(30966) }, { UINT16_C(54606), UINT16_C(63845), UINT16_C( 6203), UINT16_C(32440), UINT16_C(21083), UINT16_C(46605), UINT16_C( 9044), UINT16_C(43803) } }, { { UINT16_C(20735), UINT16_C(14355), UINT16_C(53499), UINT16_C( 873), UINT16_C(23282), UINT16_C( 3209), UINT16_C(49702), UINT16_C(53495) }, { UINT16_C(46201), UINT16_C( 1365), UINT16_C(20599), UINT16_C(45237), UINT16_C( 5802), UINT16_C(58683), UINT16_C(12633), UINT16_C(22621) }, { UINT16_C(28802), UINT16_C(32144), UINT16_C(63808), UINT16_C(12928), UINT16_C( 2387), UINT16_C(31295), UINT16_C(14027), UINT16_C(17482) }, { UINT16_C(48013), UINT16_C(46915), UINT16_C(62523), UINT16_C(20201), UINT16_C( 2004), UINT16_C(34052), UINT16_C(55955), UINT16_C(37397) } }, { { UINT16_C(41194), UINT16_C(25162), UINT16_C(65520), UINT16_C(39442), UINT16_C(19989), UINT16_C(28287), UINT16_C(56447), UINT16_C( 455) }, { UINT16_C(22348), UINT16_C(36222), UINT16_C(65105), UINT16_C(42175), UINT16_C(65031), UINT16_C(54046), UINT16_C(26933), UINT16_C( 7959) }, { UINT16_C(24841), UINT16_C(63873), UINT16_C(37984), UINT16_C(30356), UINT16_C( 5090), UINT16_C(25060), UINT16_C(44016), UINT16_C(15459) }, { UINT16_C(49982), UINT16_C(34764), UINT16_C(52624), UINT16_C(20902), UINT16_C(34535), UINT16_C( 2503), UINT16_C(54223), UINT16_C(38882) } }, { { UINT16_C(57603), UINT16_C(21705), UINT16_C(35296), UINT16_C(59384), UINT16_C( 6023), UINT16_C(48314), UINT16_C(53888), UINT16_C(35292) }, { UINT16_C(23859), UINT16_C(38018), UINT16_C( 5873), UINT16_C(54026), UINT16_C(60970), UINT16_C( 6709), UINT16_C(39066), UINT16_C(40278) }, { UINT16_C( 8313), UINT16_C(23025), UINT16_C(59817), UINT16_C(12353), UINT16_C(64256), UINT16_C(33005), UINT16_C(51661), UINT16_C( 265) }, { UINT16_C(29672), UINT16_C(21607), UINT16_C( 3015), UINT16_C(29294), UINT16_C(59783), UINT16_C(63913), UINT16_C(46382), UINT16_C(43990) } }, { { UINT16_C(35878), UINT16_C( 6293), UINT16_C(40866), UINT16_C(52459), UINT16_C( 8333), UINT16_C(10214), UINT16_C(15800), UINT16_C(12996) }, { UINT16_C(46429), UINT16_C( 1675), UINT16_C(52383), UINT16_C(40758), UINT16_C( 9160), UINT16_C(38176), UINT16_C(10732), UINT16_C( 5014) }, { UINT16_C(11189), UINT16_C(22571), UINT16_C( 5834), UINT16_C(22564), UINT16_C( 2871), UINT16_C(61311), UINT16_C(17480), UINT16_C(42273) }, { UINT16_C(45669), UINT16_C(14140), UINT16_C(32812), UINT16_C(55635), UINT16_C(55445), UINT16_C(19718), UINT16_C(50008), UINT16_C(65134) } }, { { UINT16_C(44537), UINT16_C(39083), UINT16_C(57721), UINT16_C(16696), UINT16_C(22533), UINT16_C(61911), UINT16_C(28033), UINT16_C(14084) }, { UINT16_C(12185), UINT16_C(25487), UINT16_C(45894), UINT16_C(32187), UINT16_C(15294), UINT16_C( 1644), UINT16_C(36479), UINT16_C(30891) }, { UINT16_C(22075), UINT16_C(46097), UINT16_C(18744), UINT16_C(15862), UINT16_C(52641), UINT16_C( 8750), UINT16_C(13114), UINT16_C(54105) }, { UINT16_C(20406), UINT16_C(28716), UINT16_C(46121), UINT16_C(57478), UINT16_C(40839), UINT16_C(29295), UINT16_C(55227), UINT16_C(21137) } }, { { UINT16_C(59490), UINT16_C(43063), UINT16_C(62108), UINT16_C(23077), UINT16_C(37421), UINT16_C(44129), UINT16_C( 3104), UINT16_C(23333) }, { UINT16_C(13923), UINT16_C(39695), UINT16_C( 1407), UINT16_C( 8408), UINT16_C( 1746), UINT16_C( 3394), UINT16_C(39993), UINT16_C(40160) }, { UINT16_C( 6020), UINT16_C( 8260), UINT16_C(27146), UINT16_C(14203), UINT16_C(56572), UINT16_C( 7396), UINT16_C( 2536), UINT16_C(19319) }, { UINT16_C(63574), UINT16_C(38971), UINT16_C( 9638), UINT16_C(10845), UINT16_C(25461), UINT16_C(42393), UINT16_C(30584), UINT16_C(52997) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t c = simde_vld1q_u16(test_vec[i].c); simde_uint16x8_t r = simde_vmlsq_u16(a, b, c); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t c = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t r = simde_vmlsq_u16(a, b, c); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlsq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint32_t b[4]; uint32_t c[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(3202778687), UINT32_C(1591656076), UINT32_C(4268433605), UINT32_C(1100631228) }, { UINT32_C(1835130723), UINT32_C(1168497737), UINT32_C(2707524024), UINT32_C(3521960082) }, { UINT32_C(3935294302), UINT32_C(1447652753), UINT32_C(1247130765), UINT32_C(1686892288) }, { UINT32_C(2743789285), UINT32_C(4132496435), UINT32_C(2630149741), UINT32_C(2216951484) } }, { { UINT32_C( 399633614), UINT32_C(2170320585), UINT32_C(2434973183), UINT32_C(4100067221) }, { UINT32_C(1943990754), UINT32_C(3972671326), UINT32_C(3694534620), UINT32_C(3712008462) }, { UINT32_C(1995707053), UINT32_C(2297975176), UINT32_C(2753108494), UINT32_C( 194542633) }, { UINT32_C(1706988308), UINT32_C(2893205209), UINT32_C(4201949175), UINT32_C(3979369559) } }, { { UINT32_C(3430905453), UINT32_C(2058897822), UINT32_C(1985408616), UINT32_C(1548982191) }, { UINT32_C( 835930281), UINT32_C(2813971353), UINT32_C( 256627685), UINT32_C(3155878735) }, { UINT32_C(4169701722), UINT32_C(1249001698), UINT32_C(3720464430), UINT32_C( 138024031) }, { UINT32_C(2952980995), UINT32_C( 296373388), UINT32_C( 143061826), UINT32_C(2091979102) } }, { { UINT32_C(4114222428), UINT32_C(3181245400), UINT32_C( 365750470), UINT32_C( 651356108) }, { UINT32_C(1662933632), UINT32_C(3383595163), UINT32_C(3097980504), UINT32_C(3753959811) }, { UINT32_C(3335912174), UINT32_C(3028513518), UINT32_C( 650793050), UINT32_C(3075251255) }, { UINT32_C(3726699100), UINT32_C(4169891262), UINT32_C(3070533078), UINT32_C(4078084263) } }, { { UINT32_C(2434427638), UINT32_C(1398524154), UINT32_C(3104506422), UINT32_C(3516517347) }, { UINT32_C(3029823174), UINT32_C( 979901152), UINT32_C(2707501674), UINT32_C(3310988750) }, { UINT32_C( 307655448), UINT32_C(1919267131), UINT32_C(2519429299), UINT32_C( 40354876) }, { UINT32_C(2040600166), UINT32_C(2288564058), UINT32_C(3359138328), UINT32_C(1570854811) } }, { { UINT32_C( 313982514), UINT32_C(2202869273), UINT32_C( 522563153), UINT32_C(1944354395) }, { UINT32_C( 763771889), UINT32_C(2694835180), UINT32_C(2553727580), UINT32_C(3248135823) }, { UINT32_C(3050590364), UINT32_C(3208192366), UINT32_C( 719281871), UINT32_C(3449734108) }, { UINT32_C(1503340374), UINT32_C(1168134833), UINT32_C(2389067245), UINT32_C(3975892599) } }, { { UINT32_C(3959039230), UINT32_C(1804310799), UINT32_C(4077109604), UINT32_C(4239695199) }, { UINT32_C(1555138797), UINT32_C(2015095465), UINT32_C( 614726216), UINT32_C(3169993150) }, { UINT32_C(1957162085), UINT32_C(3940561542), UINT32_C(1407050740), UINT32_C(1850708353) }, { UINT32_C(4179207037), UINT32_C(1262076569), UINT32_C( 933452996), UINT32_C(2713902753) } }, { { UINT32_C(3284795674), UINT32_C( 876406507), UINT32_C(2656624608), UINT32_C(2237352480) }, { UINT32_C(3187212855), UINT32_C( 698865973), UINT32_C(1048347837), UINT32_C( 799853589) }, { UINT32_C(3102963661), UINT32_C(1038888797), UINT32_C( 786187534), UINT32_C(3333633935) }, { UINT32_C(1093052943), UINT32_C(1669743530), UINT32_C(2606148746), UINT32_C(3545089893) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t c = simde_vld1q_u32(test_vec[i].c); simde_uint32x4_t r = simde_vmlsq_u32(a, b, c); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t c = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t r = simde_vmlsq_u32(a, b, c); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vmls_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vmls_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vmls_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vmls_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmls_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmls_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vmls_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmls_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vmlsq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vmlsq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vmlsq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vmlsq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmlsq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmlsq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vmlsq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmlsq_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/mlsl.c000066400000000000000000000633601400333146700164420ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN mlsl #include "test-neon.h" #include "../../../simde/arm/neon/mlsl.h" static int test_simde_vmlsl_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int8_t b[8]; int8_t c[8]; int16_t r[8]; } test_vec[] = { { { INT16_C( 31745), -INT16_C( 21638), -INT16_C( 25091), -INT16_C( 30065), -INT16_C( 22021), -INT16_C( 2377), -INT16_C( 25004), -INT16_C( 16531) }, { INT8_C( 71), -INT8_C( 102), -INT8_C( 119), -INT8_C( 102), -INT8_C( 113), -INT8_C( 72), INT8_C( 35), -INT8_C( 6) }, { INT8_C( 93), INT8_C( 75), -INT8_C( 4), -INT8_C( 46), INT8_C( 41), INT8_MAX, -INT8_C( 107), INT8_C( 43) }, { INT16_C( 25142), -INT16_C( 13988), -INT16_C( 25567), INT16_C( 30779), -INT16_C( 17388), INT16_C( 6767), -INT16_C( 21259), -INT16_C( 16273) } }, { { INT16_C( 4091), -INT16_C( 1578), INT16_C( 26028), -INT16_C( 22653), INT16_C( 14863), INT16_C( 25502), INT16_C( 3032), INT16_C( 8226) }, { -INT8_C( 91), -INT8_C( 85), -INT8_C( 70), INT8_C( 52), INT8_C( 100), -INT8_C( 35), INT8_C( 46), -INT8_C( 63) }, { INT8_C( 41), INT8_C( 42), -INT8_C( 109), INT8_C( 82), -INT8_C( 87), INT8_C( 40), INT8_C( 125), -INT8_C( 91) }, { INT16_C( 7822), INT16_C( 1992), INT16_C( 18398), -INT16_C( 26917), INT16_C( 23563), INT16_C( 26902), -INT16_C( 2718), INT16_C( 2493) } }, { { INT16_C( 21303), -INT16_C( 7010), INT16_C( 8633), -INT16_C( 14197), INT16_C( 10588), INT16_C( 13355), INT16_C( 19765), -INT16_C( 9644) }, { -INT8_C( 7), INT8_C( 15), INT8_C( 15), INT8_C( 93), -INT8_C( 20), INT8_C( 61), INT8_C( 30), INT8_C( 21) }, { INT8_C( 104), -INT8_C( 79), INT8_C( 104), INT8_C( 17), -INT8_C( 39), -INT8_C( 27), -INT8_C( 74), INT8_C( 16) }, { INT16_C( 22031), -INT16_C( 5825), INT16_C( 7073), -INT16_C( 15778), INT16_C( 9808), INT16_C( 15002), INT16_C( 21985), -INT16_C( 9980) } }, { { INT16_C( 21561), -INT16_C( 3340), -INT16_C( 32650), -INT16_C( 11590), -INT16_C( 6743), -INT16_C( 8698), INT16_C( 23346), INT16_C( 11193) }, { INT8_C( 106), -INT8_C( 56), -INT8_C( 120), INT8_C( 86), INT8_C( 5), -INT8_C( 90), INT8_C( 108), INT8_C( 109) }, { INT8_C( 87), -INT8_C( 44), INT8_MAX, INT8_C( 48), -INT8_C( 71), INT8_C( 53), INT8_C( 65), -INT8_C( 14) }, { INT16_C( 12339), -INT16_C( 5804), -INT16_C( 17410), -INT16_C( 15718), -INT16_C( 6388), -INT16_C( 3928), INT16_C( 16326), INT16_C( 12719) } }, { { INT16_C( 13706), INT16_C( 228), -INT16_C( 24907), INT16_C( 24530), -INT16_C( 10109), -INT16_C( 18883), -INT16_C( 2509), -INT16_C( 25119) }, { -INT8_C( 66), INT8_C( 106), -INT8_C( 12), -INT8_C( 60), INT8_C( 16), INT8_C( 96), INT8_C( 49), INT8_C( 104) }, { INT8_C( 52), -INT8_C( 80), -INT8_C( 104), -INT8_C( 19), -INT8_C( 26), -INT8_C( 39), -INT8_C( 32), INT8_C( 112) }, { INT16_C( 17138), INT16_C( 8708), -INT16_C( 26155), INT16_C( 23390), -INT16_C( 9693), -INT16_C( 15139), -INT16_C( 941), INT16_C( 28769) } }, { { -INT16_C( 15345), -INT16_C( 15248), INT16_C( 16995), -INT16_C( 6621), INT16_C( 24858), INT16_C( 20124), INT16_C( 32343), INT16_C( 5867) }, { -INT8_C( 24), -INT8_C( 33), -INT8_C( 38), -INT8_C( 8), INT8_C( 63), INT8_C( 11), INT8_C( 96), INT8_C( 115) }, { -INT8_C( 68), -INT8_C( 7), INT8_C( 97), -INT8_C( 94), -INT8_C( 46), INT8_C( 65), INT8_C( 18), -INT8_C( 31) }, { -INT16_C( 16977), -INT16_C( 15479), INT16_C( 20681), -INT16_C( 7373), INT16_C( 27756), INT16_C( 19409), INT16_C( 30615), INT16_C( 9432) } }, { { -INT16_C( 32251), INT16_C( 26790), -INT16_C( 13884), -INT16_C( 8625), -INT16_C( 5334), -INT16_C( 32212), INT16_C( 6249), INT16_C( 20888) }, { -INT8_C( 9), INT8_C( 114), INT8_C( 74), INT8_C( 55), INT8_C( 125), -INT8_C( 86), -INT8_C( 86), INT8_C( 57) }, { -INT8_C( 93), INT8_C( 11), -INT8_C( 37), INT8_C( 118), INT8_C( 76), -INT8_C( 19), INT8_C( 87), INT8_C( 82) }, { INT16_C( 32448), INT16_C( 25536), -INT16_C( 11146), -INT16_C( 15115), -INT16_C( 14834), INT16_C( 31690), INT16_C( 13731), INT16_C( 16214) } }, { { -INT16_C( 657), INT16_C( 13242), INT16_C( 2503), -INT16_C( 3822), INT16_C( 16117), INT16_C( 24179), INT16_C( 2902), INT16_C( 20144) }, { INT8_C( 125), -INT8_C( 6), -INT8_C( 123), -INT8_C( 5), -INT8_C( 92), INT8_C( 47), INT8_C( 52), INT8_C( 72) }, { INT8_C( 59), INT8_C( 16), -INT8_C( 66), -INT8_C( 121), -INT8_C( 3), INT8_C( 21), -INT8_C( 39), INT8_C( 109) }, { -INT16_C( 8032), INT16_C( 13338), -INT16_C( 5615), -INT16_C( 4427), INT16_C( 15841), INT16_C( 23192), INT16_C( 4930), INT16_C( 12296) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t c = simde_vld1_s8(test_vec[i].c); simde_int16x8_t r = simde_vmlsl_s8(a, b, c); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); simde_int8x8_t c = simde_test_arm_neon_random_i8x8(); simde_int16x8_t r = simde_vmlsl_s8(a, b, c); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlsl_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[8]; int16_t b[8]; int16_t c[8]; int32_t r[8]; } test_vec[] = { { { -INT32_C( 389456902), -INT32_C( 1594102070), INT32_C( 334870077), INT32_C( 493042589) }, { -INT16_C( 32086), -INT16_C( 2707), INT16_C( 7962), INT16_C( 29609) }, { -INT16_C( 18041), INT16_C( 5864), -INT16_C( 29710), -INT16_C( 4982) }, { -INT32_C( 968320428), -INT32_C( 1578228222), INT32_C( 571421097), INT32_C( 640554627) } }, { { -INT32_C( 1311419162), INT32_C( 2136068162), INT32_C( 596854662), INT32_C( 759232130) }, { -INT16_C( 20872), -INT16_C( 27870), -INT16_C( 13363), INT16_C( 21766) }, { -INT16_C( 4476), INT16_C( 30315), -INT16_C( 2695), INT16_C( 24675) }, { -INT32_C( 1404842234), -INT32_C( 1314020084), INT32_C( 560841377), INT32_C( 222156080) } }, { { -INT32_C( 1945028535), -INT32_C( 1911856632), INT32_C( 749837993), INT32_C( 223998356) }, { INT16_C( 31647), INT16_C( 28064), -INT16_C( 22970), -INT16_C( 13374) }, { INT16_C( 11668), INT16_C( 3649), -INT16_C( 23518), INT16_C( 27758) }, { INT32_C( 1980681565), -INT32_C( 2014262168), INT32_C( 209629533), INT32_C( 595233848) } }, { { -INT32_C( 453476388), -INT32_C( 1955462175), INT32_C( 917971874), -INT32_C( 1270673387) }, { -INT16_C( 7285), -INT16_C( 11999), -INT16_C( 7287), INT16_C( 7836) }, { -INT16_C( 8688), INT16_C( 13100), -INT16_C( 25982), INT16_C( 24479) }, { -INT32_C( 516768468), -INT32_C( 1798275275), INT32_C( 728641040), -INT32_C( 1462490831) } }, { { -INT32_C( 96233703), INT32_C( 1015395994), -INT32_C( 294437671), -INT32_C( 677136820) }, { -INT16_C( 15206), INT16_C( 9129), INT16_C( 17832), -INT16_C( 18367) }, { INT16_C( 27939), -INT16_C( 22805), -INT16_C( 30201), INT16_C( 8197) }, { INT32_C( 328606731), INT32_C( 1223582839), INT32_C( 244106561), -INT32_C( 526582521) } }, { { -INT32_C( 1139062751), -INT32_C( 654794498), INT32_C( 700869597), -INT32_C( 1140758238) }, { -INT16_C( 21970), -INT16_C( 10529), INT16_C( 8687), INT16_C( 5006) }, { INT16_C( 31374), -INT16_C( 26951), -INT16_C( 16892), INT16_C( 9910) }, { -INT32_C( 449775971), -INT32_C( 938561577), INT32_C( 847610401), -INT32_C( 1190367698) } }, { { INT32_C( 98750726), INT32_C( 1339939442), INT32_C( 1752736582), INT32_C( 992246029) }, { INT16_C( 803), INT16_C( 4881), -INT16_C( 24796), -INT16_C( 19674) }, { -INT16_C( 8423), INT16_C( 7753), -INT16_C( 99), -INT16_C( 23740) }, { INT32_C( 105514395), INT32_C( 1302097049), INT32_C( 1750281778), INT32_C( 525185269) } }, { { INT32_C( 1135093457), INT32_C( 1184007424), INT32_C( 917375529), -INT32_C( 1485712764) }, { -INT16_C( 32042), -INT16_C( 1350), -INT16_C( 8159), INT16_C( 15277) }, { -INT16_C( 2369), INT16_C( 23641), -INT16_C( 25098), -INT16_C( 14592) }, { INT32_C( 1059185959), INT32_C( 1215922774), INT32_C( 712600947), -INT32_C( 1262790780) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t c = simde_vld1_s16(test_vec[i].c); simde_int32x4_t r = simde_vmlsl_s16(a, b, c); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); simde_int16x4_t c = simde_test_arm_neon_random_i16x4(); simde_int32x4_t r = simde_vmlsl_s16(a, b, c); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlsl_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[8]; int32_t b[8]; int32_t c[8]; int64_t r[8]; } test_vec[] = { { { INT64_C( 6271997155721521347), INT64_C( 7048976240456087718) }, { -INT32_C( 1587835776), INT32_C( 752617837) }, { -INT32_C( 175557121), -INT32_C( 1782806062) }, { INT64_C( 5993241278266160451), INT64_C( 8390747882629015612) } }, { { INT64_C( 699856066260354609), -INT64_C( 4537652320033094885) }, { -INT32_C( 10263662), INT32_C( 1797996396) }, { INT32_C( 1197585525), INT32_C( 1876761917) }, { INT64_C( 712147679305047159), -INT64_C( 7912063482949146017) } }, { { -INT64_C( 5453993730357971228), INT64_C( 1930745201687233480) }, { INT32_C( 823733957), -INT32_C( 476232338) }, { INT32_C( 925564665), -INT32_C( 5896421) }, { -INT64_C( 6216412774317800633), INT64_C( 1927937135328571182) } }, { { -INT64_C( 4428280908314414019), INT64_C( 1061608475383883288) }, { INT32_C( 876598726), INT32_C( 320330778) }, { -INT32_C( 162905638), -INT32_C( 2030702519) }, { -INT64_C( 4285478033585396831), INT64_C( 1712104993181713070) } }, { { -INT64_C( 1648061477688624283), INT64_C( 5342472591096081614) }, { INT32_C( 897475611), INT32_C( 457741632) }, { INT32_C( 521245654), -INT32_C( 391772541) }, { -INT64_C( 2115866739493368877), INT64_C( 5521803193386208526) } }, { { -INT64_C( 5045043223223401662), -INT64_C( 1719542942759155884) }, { -INT32_C( 1742888872), INT32_C( 196306229) }, { INT32_C( 2083243256), INT32_C( 224711115) }, { -INT64_C( 1414181734671954430), -INT64_C( 1763655134359191219) } }, { { INT64_C( 7455740054248401876), -INT64_C( 7829401044269447261) }, { INT32_C( 908883201), -INT32_C( 750657573) }, { INT32_C( 1867476132), INT32_C( 293385277) }, { INT64_C( 5758422369605143344), -INT64_C( 7609169164282694540) } }, { { INT64_C( 6690458678473038943), INT64_C( 8817807800455141520) }, { INT32_C( 1974504346), INT32_C( 239727210) }, { -INT32_C( 1669490594), -INT32_C( 1397884596) }, { -INT64_C( 8459868961777391149), INT64_C( 9152918774556198680) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t c = simde_vld1_s32(test_vec[i].c); simde_int64x2_t r = simde_vmlsl_s32(a, b, c); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); simde_int32x2_t c = simde_test_arm_neon_random_i32x2(); simde_int64x2_t r = simde_vmlsl_s32(a, b, c); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlsl_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint8_t b[8]; uint8_t c[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(32405), UINT16_C(19998), UINT16_C(63686), UINT16_C(22186), UINT16_C(30224), UINT16_C(35362), UINT16_C(33179), UINT16_C(13828) }, { UINT8_C( 12), UINT8_C(181), UINT8_C(171), UINT8_C(118), UINT8_C(167), UINT8_C(244), UINT8_C(133), UINT8_C( 5) }, { UINT8_C(141), UINT8_C( 2), UINT8_C(161), UINT8_C(217), UINT8_C(252), UINT8_C( 79), UINT8_C(133), UINT8_C(145) }, { UINT16_C(30713), UINT16_C(19636), UINT16_C(36155), UINT16_C(62116), UINT16_C(53676), UINT16_C(16086), UINT16_C(15490), UINT16_C(13103) } }, { { UINT16_C(42189), UINT16_C(37855), UINT16_C(35228), UINT16_C(44266), UINT16_C( 3072), UINT16_C(39734), UINT16_C(15245), UINT16_C(39377) }, { UINT8_C(240), UINT8_C(125), UINT8_C( 15), UINT8_C(151), UINT8_C(113), UINT8_C(148), UINT8_C(156), UINT8_C(254) }, { UINT8_C(151), UINT8_C( 62), UINT8_C(216), UINT8_C(147), UINT8_C(141), UINT8_C( 93), UINT8_C( 36), UINT8_C( 90) }, { UINT16_C( 5949), UINT16_C(30105), UINT16_C(31988), UINT16_C(22069), UINT16_C(52675), UINT16_C(25970), UINT16_C( 9629), UINT16_C(16517) } }, { { UINT16_C( 1025), UINT16_C(40429), UINT16_C(55181), UINT16_C(36170), UINT16_C(32995), UINT16_C(28713), UINT16_C(64187), UINT16_C(43785) }, { UINT8_C(119), UINT8_C( 25), UINT8_C( 66), UINT8_C(233), UINT8_C(173), UINT8_C(223), UINT8_C(231), UINT8_C( 68) }, { UINT8_C( 29), UINT8_C(191), UINT8_C(215), UINT8_C(170), UINT8_C( 29), UINT8_C(252), UINT8_C( 4), UINT8_C( 30) }, { UINT16_C(63110), UINT16_C(35654), UINT16_C(40991), UINT16_C(62096), UINT16_C(27978), UINT16_C(38053), UINT16_C(63263), UINT16_C(41745) } }, { { UINT16_C(61696), UINT16_C(36284), UINT16_C( 1737), UINT16_C(44059), UINT16_C(17542), UINT16_C(16925), UINT16_C( 9790), UINT16_C(46829) }, { UINT8_C( 63), UINT8_C( 48), UINT8_C(159), UINT8_C(237), UINT8_C( 15), UINT8_C(134), UINT8_C( 49), UINT8_C( 44) }, { UINT8_C( 70), UINT8_C( 9), UINT8_C(214), UINT8_C( 99), UINT8_C( 5), UINT8_C(218), UINT8_C(129), UINT8_C( 5) }, { UINT16_C(57286), UINT16_C(35852), UINT16_C(33247), UINT16_C(20596), UINT16_C(17467), UINT16_C(53249), UINT16_C( 3469), UINT16_C(46609) } }, { { UINT16_C(15819), UINT16_C(38034), UINT16_C(44355), UINT16_C(51777), UINT16_C(24305), UINT16_C(12300), UINT16_C(63876), UINT16_C(50406) }, { UINT8_C( 41), UINT8_C(133), UINT8_C(177), UINT8_C( 56), UINT8_C( 11), UINT8_C(226), UINT8_C(100), UINT8_C( 81) }, { UINT8_C(235), UINT8_C( 58), UINT8_C(180), UINT8_C(240), UINT8_C( 20), UINT8_C( 54), UINT8_C(245), UINT8_C(224) }, { UINT16_C( 6184), UINT16_C(30320), UINT16_C(12495), UINT16_C(38337), UINT16_C(24085), UINT16_C( 96), UINT16_C(39376), UINT16_C(32262) } }, { { UINT16_C(34931), UINT16_C(46964), UINT16_C(46389), UINT16_C(10113), UINT16_C(36115), UINT16_C(38999), UINT16_C(15750), UINT16_C(45148) }, { UINT8_C(194), UINT8_C( 13), UINT8_C(232), UINT8_C(205), UINT8_C(239), UINT8_C( 77), UINT8_C( 31), UINT8_C(219) }, { UINT8_C(135), UINT8_C(211), UINT8_C(203), UINT8_C(156), UINT8_C( 9), UINT8_C(193), UINT8_C(124), UINT8_C(125) }, { UINT16_C( 8741), UINT16_C(44221), UINT16_C(64829), UINT16_C(43669), UINT16_C(33964), UINT16_C(24138), UINT16_C(11906), UINT16_C(17773) } }, { { UINT16_C(61513), UINT16_C(32308), UINT16_C(46502), UINT16_C(47525), UINT16_C(64578), UINT16_C(51281), UINT16_C(44345), UINT16_C(64376) }, { UINT8_C(186), UINT8_C( 97), UINT8_C(201), UINT8_C(170), UINT8_C(174), UINT8_C(232), UINT8_C(133), UINT8_C( 53) }, { UINT8_C(187), UINT8_C( 80), UINT8_C(209), UINT8_C(197), UINT8_C( 17), UINT8_C( 77), UINT8_C( 66), UINT8_C( 90) }, { UINT16_C(26731), UINT16_C(24548), UINT16_C( 4493), UINT16_C(14035), UINT16_C(61620), UINT16_C(33417), UINT16_C(35567), UINT16_C(59606) } }, { { UINT16_C(30270), UINT16_C(58585), UINT16_C(32299), UINT16_C(28061), UINT16_C(61307), UINT16_C(46133), UINT16_C(44700), UINT16_C(22448) }, { UINT8_C( 15), UINT8_C(121), UINT8_C( 1), UINT8_C(189), UINT8_C( 97), UINT8_C(134), UINT8_C(242), UINT8_C( 28) }, { UINT8_C(214), UINT8_C(196), UINT8_C(225), UINT8_C(232), UINT8_C( 17), UINT8_C( 35), UINT8_C( 66), UINT8_C( 79) }, { UINT16_C(27060), UINT16_C(34869), UINT16_C(32074), UINT16_C(49749), UINT16_C(59658), UINT16_C(41443), UINT16_C(28728), UINT16_C(20236) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t c = simde_vld1_u8(test_vec[i].c); simde_uint16x8_t r = simde_vmlsl_u8(a, b, c); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t c = simde_test_arm_neon_random_u8x8(); simde_uint16x8_t r = simde_vmlsl_u8(a, b, c); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlsl_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[8]; uint16_t b[8]; uint16_t c[8]; uint32_t r[8]; } test_vec[] = { { { UINT32_C( 46872662), UINT32_C(2110696610), UINT32_C(1606486472), UINT32_C(3331166886) }, { UINT16_C(23921), UINT16_C( 2489), UINT16_C( 1686), UINT16_C(49008) }, { UINT16_C(23913), UINT16_C(36921), UINT16_C(32763), UINT16_C(20942) }, { UINT32_C(3769817085), UINT32_C(2018800241), UINT32_C(1551248054), UINT32_C(2304841350) } }, { { UINT32_C(1498716855), UINT32_C( 248980038), UINT32_C(3513620523), UINT32_C(2627271467) }, { UINT16_C(20824), UINT16_C(61349), UINT16_C( 5720), UINT16_C(49582) }, { UINT16_C(59251), UINT16_C(28241), UINT16_C( 8294), UINT16_C( 7871) }, { UINT32_C( 264874031), UINT32_C(2811390225), UINT32_C(3466178843), UINT32_C(2237011545) } }, { { UINT32_C( 7803834), UINT32_C(1611550261), UINT32_C( 305298407), UINT32_C(3484338806) }, { UINT16_C(21531), UINT16_C(29630), UINT16_C(27754), UINT16_C(56629) }, { UINT16_C(34387), UINT16_C(47691), UINT16_C( 2726), UINT16_C(24792) }, { UINT32_C(3562384633), UINT32_C( 198465931), UINT32_C( 229641003), UINT32_C(2080392638) } }, { { UINT32_C(1398820638), UINT32_C(2243194526), UINT32_C(1620567786), UINT32_C(3408872880) }, { UINT16_C(60825), UINT16_C( 831), UINT16_C(29785), UINT16_C(44512) }, { UINT16_C(11258), UINT16_C(41319), UINT16_C(16182), UINT16_C(21505) }, { UINT32_C( 714052788), UINT32_C(2208858437), UINT32_C(1138586916), UINT32_C(2451642320) } }, { { UINT32_C( 749167246), UINT32_C(3132185552), UINT32_C(4045097025), UINT32_C( 666716814) }, { UINT16_C(64568), UINT16_C(37163), UINT16_C( 2928), UINT16_C(27198) }, { UINT16_C(42295), UINT16_C(27915), UINT16_C( 3556), UINT16_C(29633) }, { UINT32_C(2313230982), UINT32_C(2094780407), UINT32_C(4034685057), UINT32_C(4155725776) } }, { { UINT32_C(1067411567), UINT32_C( 100291012), UINT32_C( 670504345), UINT32_C(2538583135) }, { UINT16_C(31408), UINT16_C( 8233), UINT16_C(26501), UINT16_C(48266) }, { UINT16_C(38413), UINT16_C(61737), UINT16_C(60067), UINT16_C( 4708) }, { UINT32_C(4155903359), UINT32_C(3886977587), UINT32_C(3373636074), UINT32_C(2311346807) } }, { { UINT32_C( 391185491), UINT32_C(3994831701), UINT32_C(3222672224), UINT32_C(2002216391) }, { UINT16_C(32991), UINT16_C(25751), UINT16_C( 8936), UINT16_C(62753) }, { UINT16_C(19128), UINT16_C(23526), UINT16_C(19253), UINT16_C(34925) }, { UINT32_C(4055100939), UINT32_C(3389013675), UINT32_C(3050627416), UINT32_C(4105535162) } }, { { UINT32_C(2761932367), UINT32_C(1788001034), UINT32_C(2519378127), UINT32_C(3960373773) }, { UINT16_C(42242), UINT16_C(59985), UINT16_C(29383), UINT16_C(32735) }, { UINT16_C(50876), UINT16_C(61914), UINT16_C(18193), UINT16_C(24697) }, { UINT32_C( 612828375), UINT32_C(2369057040), UINT32_C(1984813208), UINT32_C(3151917478) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t c = simde_vld1_u16(test_vec[i].c); simde_uint32x4_t r = simde_vmlsl_u16(a, b, c); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t c = simde_test_arm_neon_random_u16x4(); simde_uint32x4_t r = simde_vmlsl_u16(a, b, c); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlsl_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[8]; uint32_t b[8]; uint32_t c[8]; uint64_t r[8]; } test_vec[] = { { { UINT64_C(11779893612886890502), UINT64_C( 2970483663335695679) }, { UINT32_C(3021245165), UINT32_C(3090478076) }, { UINT32_C(3400142521), UINT32_C(1546265430) }, { UINT64_C( 1507229461004729537), UINT64_C(16638528325953534615) } }, { { UINT64_C( 338868131041390140), UINT64_C( 2439975957550918795) }, { UINT32_C( 265678867), UINT32_C(2647132643) }, { UINT32_C(1852273176), UINT32_C(3519713941) }, { UINT64_C(18293502365976770164), UINT64_C(11569570364017194348) } }, { { UINT64_C(12144400720407377600), UINT64_C( 8635149137401797249) }, { UINT32_C( 176597798), UINT32_C(3450293941) }, { UINT32_C(1446710976), UINT32_C(1629947296) }, { UINT64_C(11888914747703346752), UINT64_C( 3011351857863663713) } }, { { UINT64_C( 2233627761285925179), UINT64_C(13196859246959090250) }, { UINT32_C( 180464469), UINT32_C(3134679289) }, { UINT32_C( 386929270), UINT32_C(1383610135) }, { UINT64_C( 2163800776034817549), UINT64_C( 8859685212724096235) } }, { { UINT64_C( 1800500657005354560), UINT64_C(14864951327980965891) }, { UINT32_C(2413300629), UINT32_C(3930697587) }, { UINT32_C(3623967169), UINT32_C(3509221776) }, { UINT64_C(11501522482291856875), UINT64_C( 1071261760809911379) } }, { { UINT64_C( 9277040838585603287), UINT64_C( 343221831556367483) }, { UINT32_C(3717438569), UINT32_C( 231201867) }, { UINT32_C(3336947765), UINT32_C( 412553281) }, { UINT64_C(15318886587945806618), UINT64_C( 247838742752191856) } }, { { UINT64_C( 7917748095574492900), UINT64_C(13579801287408677889) }, { UINT32_C( 228133057), UINT32_C( 437936357) }, { UINT32_C(1776353064), UINT32_C(4085348111) }, { UINT64_C( 7512503240772856252), UINT64_C(11790678818600506262) } }, { { UINT64_C(13698883255360831677), UINT64_C( 7293760618596630058) }, { UINT32_C(2440221100), UINT32_C(1504480305) }, { UINT32_C(2613218443), UINT32_C(3247326212) }, { UINT64_C( 7322052471843084377), UINT64_C( 2408222288732375398) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t c = simde_vld1_u32(test_vec[i].c); simde_uint64x2_t r = simde_vmlsl_u32(a, b, c); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t c = simde_test_arm_neon_random_u32x2(); simde_uint64x2_t r = simde_vmlsl_u32(a, b, c); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vmlsl_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vmlsl_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmlsl_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmlsl_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vmlsl_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmlsl_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/mlsl_high.c000066400000000000000000001026221400333146700174340ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN mlsl_high #include "test-neon.h" #include "../../../simde/arm/neon/mlsl_high.h" static int test_simde_vmlsl_high_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int8_t b[16]; int8_t c[16]; int16_t r[8]; } test_vec[] = { { { -INT16_C( 31077), INT16_C( 5466), INT16_C( 17188), INT16_C( 11098), INT16_C( 28477), -INT16_C( 8315), INT16_C( 4954), -INT16_C( 28534) }, { INT8_MIN, -INT8_C( 20), INT8_C( 61), INT8_C( 84), -INT8_C( 54), -INT8_C( 49), INT8_C( 102), -INT8_C( 70), INT8_C( 61), -INT8_C( 15), INT8_C( 16), INT8_C( 22), -INT8_C( 8), -INT8_C( 34), INT8_C( 21), -INT8_C( 109) }, { INT8_C( 101), INT8_C( 111), -INT8_C( 88), -INT8_C( 119), -INT8_C( 78), INT8_C( 3), -INT8_C( 76), -INT8_C( 17), INT8_C( 114), INT8_C( 57), -INT8_C( 50), -INT8_C( 52), INT8_C( 77), INT8_C( 88), INT8_C( 92), -INT8_C( 51) }, { INT16_C( 27505), INT16_C( 6321), INT16_C( 17988), INT16_C( 12242), INT16_C( 29093), -INT16_C( 5323), INT16_C( 3022), INT16_C( 31443) } }, { { -INT16_C( 26300), INT16_C( 3618), -INT16_C( 30615), -INT16_C( 22840), -INT16_C( 10119), INT16_C( 29372), -INT16_C( 11849), INT16_C( 7173) }, { INT8_C( 64), -INT8_C( 82), -INT8_C( 91), -INT8_C( 13), -INT8_C( 79), INT8_C( 89), -INT8_C( 30), INT8_C( 35), -INT8_C( 110), -INT8_C( 79), -INT8_C( 17), -INT8_C( 33), INT8_C( 9), INT8_C( 75), -INT8_C( 83), INT8_C( 78) }, { -INT8_C( 27), -INT8_C( 49), INT8_C( 92), INT8_C( 78), INT8_C( 87), INT8_C( 37), -INT8_C( 12), -INT8_C( 47), -INT8_C( 3), -INT8_C( 80), INT8_C( 67), -INT8_C( 76), -INT8_C( 126), INT8_C( 72), -INT8_C( 48), -INT8_C( 62) }, { -INT16_C( 26630), -INT16_C( 2702), -INT16_C( 29476), -INT16_C( 25348), -INT16_C( 8985), INT16_C( 23972), -INT16_C( 15833), INT16_C( 12009) } }, { { INT16_C( 30198), -INT16_C( 22603), -INT16_C( 26418), INT16_C( 25034), -INT16_C( 18103), INT16_C( 21056), -INT16_C( 4859), -INT16_C( 5472) }, { -INT8_C( 68), -INT8_C( 3), INT8_C( 56), INT8_C( 20), INT8_C( 34), INT8_C( 44), -INT8_C( 27), INT8_C( 31), -INT8_C( 36), INT8_C( 40), -INT8_C( 44), INT8_C( 94), INT8_C( 112), -INT8_C( 92), INT8_C( 33), INT8_C( 103) }, { INT8_C( 26), -INT8_C( 42), INT8_C( 14), -INT8_C( 24), INT8_C( 110), -INT8_C( 39), INT8_C( 73), -INT8_C( 73), -INT8_C( 110), -INT8_C( 118), INT8_C( 10), -INT8_C( 105), INT8_C( 119), -INT8_C( 86), -INT8_C( 127), INT8_C( 52) }, { INT16_C( 26238), -INT16_C( 17883), -INT16_C( 25978), -INT16_C( 30632), -INT16_C( 31431), INT16_C( 13144), -INT16_C( 668), -INT16_C( 10828) } }, { { -INT16_C( 18009), -INT16_C( 14008), INT16_C( 11749), -INT16_C( 15639), -INT16_C( 17067), -INT16_C( 15072), INT16_C( 16737), INT16_C( 31532) }, { INT8_C( 24), INT8_C( 59), INT8_C( 100), -INT8_C( 122), INT8_C( 20), -INT8_C( 83), INT8_C( 62), -INT8_C( 90), INT8_C( 55), INT8_C( 72), INT8_C( 62), -INT8_C( 81), -INT8_C( 14), -INT8_C( 65), -INT8_C( 29), -INT8_C( 102) }, { INT8_C( 121), INT8_C( 43), INT8_C( 99), INT8_C( 94), INT8_C( 88), INT8_C( 76), INT8_C( 32), -INT8_C( 83), INT8_C( 9), INT8_C( 65), INT8_C( 114), INT8_C( 107), -INT8_C( 126), -INT8_C( 97), -INT8_C( 26), -INT8_C( 102) }, { -INT16_C( 18504), -INT16_C( 18688), INT16_C( 4681), -INT16_C( 6972), -INT16_C( 18831), -INT16_C( 21377), INT16_C( 15983), INT16_C( 21128) } }, { { INT16_C( 19162), -INT16_C( 4575), INT16_C( 24568), INT16_C( 12180), -INT16_C( 11609), -INT16_C( 26146), -INT16_C( 15982), INT16_C( 2867) }, { -INT8_C( 20), -INT8_C( 105), INT8_C( 105), INT8_C( 68), -INT8_C( 29), -INT8_C( 118), -INT8_C( 15), -INT8_C( 19), -INT8_C( 53), INT8_C( 100), INT8_C( 88), INT8_C( 77), INT8_C( 3), INT8_C( 62), -INT8_C( 24), -INT8_C( 35) }, { -INT8_C( 119), INT8_C( 9), -INT8_C( 53), -INT8_C( 127), INT8_C( 104), INT8_C( 95), -INT8_C( 80), INT8_C( 15), INT8_C( 50), -INT8_C( 113), -INT8_C( 88), -INT8_C( 60), INT8_C( 80), -INT8_C( 36), -INT8_C( 49), INT8_C( 61) }, { INT16_C( 21812), INT16_C( 6725), INT16_C( 32312), INT16_C( 16800), -INT16_C( 11849), -INT16_C( 23914), -INT16_C( 17158), INT16_C( 5002) } }, { { INT16_C( 14451), INT16_C( 22145), INT16_C( 29634), -INT16_C( 29373), -INT16_C( 25641), -INT16_C( 9509), -INT16_C( 15398), INT16_C( 25527) }, { -INT8_C( 52), -INT8_C( 126), -INT8_C( 28), INT8_C( 52), -INT8_C( 31), -INT8_C( 108), INT8_C( 67), INT8_C( 19), INT8_C( 35), -INT8_C( 21), -INT8_C( 41), INT8_C( 116), -INT8_C( 57), -INT8_C( 90), -INT8_C( 79), INT8_C( 58) }, { -INT8_C( 33), INT8_C( 50), -INT8_C( 111), -INT8_C( 95), -INT8_C( 91), -INT8_C( 44), INT8_C( 47), INT8_C( 124), INT8_C( 112), INT8_C( 10), INT8_C( 86), INT8_C( 74), -INT8_C( 51), INT8_C( 13), -INT8_C( 83), -INT8_C( 103) }, { INT16_C( 10531), INT16_C( 22355), -INT16_C( 32376), INT16_C( 27579), -INT16_C( 28548), -INT16_C( 8339), -INT16_C( 21955), INT16_C( 31501) } }, { { -INT16_C( 28273), INT16_C( 29133), INT16_C( 4133), INT16_C( 18820), INT16_C( 23803), -INT16_C( 15427), INT16_C( 28162), -INT16_C( 7683) }, { -INT8_C( 96), -INT8_C( 114), -INT8_C( 125), INT8_C( 70), INT8_C( 99), -INT8_C( 78), -INT8_C( 62), -INT8_C( 45), -INT8_C( 68), INT8_C( 25), INT8_C( 29), -INT8_C( 119), INT8_C( 38), -INT8_C( 54), INT8_C( 34), -INT8_C( 74) }, { INT8_C( 91), -INT8_C( 17), INT8_C( 39), INT8_MIN, -INT8_C( 1), -INT8_C( 85), -INT8_C( 55), -INT8_C( 6), INT8_C( 7), -INT8_C( 122), -INT8_C( 67), INT8_C( 10), -INT8_C( 12), -INT8_C( 69), -INT8_C( 21), -INT8_C( 107) }, { -INT16_C( 27797), INT16_C( 32183), INT16_C( 6076), INT16_C( 20010), INT16_C( 24259), -INT16_C( 19153), INT16_C( 28876), -INT16_C( 15601) } }, { { INT16_C( 28233), -INT16_C( 21285), -INT16_C( 25312), -INT16_C( 9089), -INT16_C( 25418), -INT16_C( 8859), -INT16_C( 30874), -INT16_C( 15981) }, { INT8_C( 118), -INT8_C( 70), INT8_C( 66), INT8_C( 117), INT8_C( 101), INT8_C( 11), INT8_C( 112), INT8_C( 109), -INT8_C( 110), INT8_C( 45), INT8_C( 119), -INT8_C( 122), -INT8_C( 24), INT8_C( 98), INT8_C( 27), INT8_C( 50) }, { -INT8_C( 47), -INT8_C( 10), -INT8_C( 34), -INT8_C( 15), -INT8_C( 108), INT8_C( 94), -INT8_C( 50), INT8_C( 74), -INT8_C( 6), INT8_C( 51), INT8_C( 39), INT8_C( 97), -INT8_C( 69), -INT8_C( 70), INT8_C( 34), INT8_C( 49) }, { INT16_C( 27573), -INT16_C( 23580), -INT16_C( 29953), INT16_C( 2745), -INT16_C( 27074), -INT16_C( 1999), -INT16_C( 31792), -INT16_C( 18431) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t c = simde_vld1q_s8(test_vec[i].c); simde_int16x8_t r = simde_vmlsl_high_s8(a, b, c); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); simde_int8x16_t c = simde_test_arm_neon_random_i8x16(); simde_int16x8_t r = simde_vmlsl_high_s8(a, b, c); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlsl_high_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int16_t b[8]; int16_t c[8]; int32_t r[4]; } test_vec[] = { { { -INT32_C( 196837651), -INT32_C( 735099441), INT32_C( 85273546), -INT32_C( 1503660226) }, { -INT16_C( 21911), -INT16_C( 532), INT16_C( 11222), -INT16_C( 28049), -INT16_C( 29213), INT16_C( 18983), INT16_C( 6520), INT16_C( 26206) }, { -INT16_C( 23913), INT16_C( 26202), -INT16_C( 30233), -INT16_C( 20165), INT16_C( 20660), -INT16_C( 3146), INT16_C( 5447), -INT16_C( 20071) }, { INT32_C( 406702929), -INT32_C( 675378923), INT32_C( 49759106), -INT32_C( 977679600) } }, { { -INT32_C( 1766947392), -INT32_C( 1809244751), INT32_C( 601772203), INT32_C( 8993897) }, { -INT16_C( 6946), -INT16_C( 15002), -INT16_C( 24211), INT16_C( 8823), INT16_C( 11762), INT16_C( 14613), -INT16_C( 20925), INT16_C( 1002) }, { -INT16_C( 26573), -INT16_C( 7015), -INT16_C( 15690), INT16_C( 24952), INT16_C( 22034), INT16_C( 31620), INT16_C( 3730), INT16_C( 28795) }, { -INT32_C( 2026111300), INT32_C( 2023659485), INT32_C( 679822453), -INT32_C( 19858693) } }, { { INT32_C( 1597432562), INT32_C( 1971432835), INT32_C( 498046682), INT32_C( 2015402308) }, { -INT16_C( 17870), -INT16_C( 6052), -INT16_C( 10884), -INT16_C( 28855), -INT16_C( 13013), -INT16_C( 16886), -INT16_C( 31013), -INT16_C( 13010) }, { INT16_C( 25704), -INT16_C( 5331), -INT16_C( 20975), -INT16_C( 5023), INT16_C( 4165), -INT16_C( 30455), INT16_C( 10921), -INT16_C( 9471) }, { INT32_C( 1651631707), INT32_C( 1457169705), INT32_C( 836739655), INT32_C( 1892184598) } }, { { INT32_C( 1623416548), INT32_C( 1592724531), -INT32_C( 1256391974), -INT32_C( 394048640) }, { -INT16_C( 20305), -INT16_C( 15917), INT16_C( 13406), -INT16_C( 23635), -INT16_C( 18876), -INT16_C( 4563), INT16_C( 12000), -INT16_C( 15159) }, { -INT16_C( 29300), -INT16_C( 16603), INT16_C( 5273), INT16_C( 29470), INT16_C( 14862), -INT16_C( 29143), -INT16_C( 21371), INT16_C( 13686) }, { INT32_C( 1903951660), INT32_C( 1459745022), -INT32_C( 999939974), -INT32_C( 186582566) } }, { { -INT32_C( 1158264228), -INT32_C( 1017207938), INT32_C( 984714073), INT32_C( 1191082681) }, { INT16_C( 8967), -INT16_C( 24315), INT16_C( 9016), INT16_C( 17940), INT16_C( 15710), -INT16_C( 7211), INT16_C( 19433), INT16_C( 17688) }, { INT16_C( 3733), INT16_C( 5120), INT16_C( 24241), INT16_C( 3031), -INT16_C( 30487), -INT16_C( 23995), INT16_C( 17154), INT16_C( 2792) }, { -INT32_C( 679313458), -INT32_C( 1190235883), INT32_C( 651360391), INT32_C( 1141697785) } }, { { -INT32_C( 1616122265), INT32_C( 1877327633), -INT32_C( 430720259), -INT32_C( 1691587834) }, { INT16_C( 11386), INT16_C( 11183), -INT16_C( 31094), INT16_C( 29494), INT16_C( 31502), INT16_C( 4373), -INT16_C( 321), INT16_C( 9755) }, { -INT16_C( 14612), -INT16_C( 571), -INT16_C( 21883), -INT16_C( 32147), -INT16_C( 16283), INT16_C( 27497), -INT16_C( 27349), -INT16_C( 23290) }, { -INT32_C( 1103175199), INT32_C( 1757083252), -INT32_C( 439499288), -INT32_C( 1464393884) } }, { { INT32_C( 1272035009), INT32_C( 1270744892), INT32_C( 1113379715), -INT32_C( 1117227055) }, { INT16_C( 11581), -INT16_C( 15685), INT16_C( 10455), INT16_C( 15429), -INT16_C( 20760), INT16_C( 5031), -INT16_C( 20925), INT16_C( 1209) }, { -INT16_C( 30108), -INT16_C( 24497), INT16_C( 3473), INT16_C( 5355), INT16_C( 18400), -INT16_C( 19882), -INT16_C( 16706), -INT16_C( 1169) }, { INT32_C( 1654019009), INT32_C( 1370771234), INT32_C( 763806665), -INT32_C( 1115813734) } }, { { -INT32_C( 1010947349), INT32_C( 989791058), -INT32_C( 196171855), -INT32_C( 1174927531) }, { INT16_C( 18321), INT16_C( 8793), INT16_C( 17748), INT16_C( 13367), -INT16_C( 29300), INT16_C( 19430), INT16_C( 22092), INT16_C( 14150) }, { INT16_C( 1152), -INT16_C( 11270), -INT16_C( 1529), -INT16_C( 18419), INT16_C( 23457), -INT16_C( 2388), -INT16_C( 23454), -INT16_C( 3153) }, { -INT32_C( 323657249), INT32_C( 1036189898), INT32_C( 321973913), -INT32_C( 1130312581) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t c = simde_vld1q_s16(test_vec[i].c); simde_int32x4_t r = simde_vmlsl_high_s16(a, b, c); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); simde_int16x8_t c = simde_test_arm_neon_random_i16x8(); simde_int32x4_t r = simde_vmlsl_high_s16(a, b, c); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlsl_high_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; int32_t b[4]; int32_t c[4]; int64_t r[2]; } test_vec[] = { { { -INT64_C( 2705452481976596245), INT64_C( 3557399159967668954) }, { INT32_C( 1996773488), -INT32_C( 214953646), -INT32_C( 806757267), INT32_C( 1824757889) }, { -INT32_C( 290661983), INT32_C( 13115430), INT32_C( 724036986), -INT32_C( 916683431) }, { -INT64_C( 2121330381944318983), INT64_C( 5230124482400506113) } }, { { -INT64_C( 2439982941661536035), INT64_C( 5060482088255229261) }, { INT32_C( 1882580554), -INT32_C( 2123301626), INT32_C( 1152161771), -INT32_C( 99809252) }, { -INT32_C( 651539096), INT32_C( 196627902), INT32_C( 14247514), INT32_C( 558240727) }, { -INT64_C( 2456398382624123329), INT64_C( 5116199677653035465) } }, { { INT64_C( 7242071295000673274), -INT64_C( 7084244446544908903) }, { -INT32_C( 1065887486), -INT32_C( 2117390553), INT32_C( 1820435605), -INT32_C( 1299330888) }, { -INT32_C( 1129177533), -INT32_C( 1205849057), -INT32_C( 1754347423), -INT32_C( 2144068227) }, { -INT64_C( 8010996266339682427), INT64_C( 8576645553844147137) } }, { { INT64_C( 8043161590456953847), INT64_C( 3177967838759166384) }, { -INT32_C( 1494626938), INT32_C( 1650329857), INT32_C( 1341770962), -INT32_C( 506515734) }, { -INT32_C( 1258352423), -INT32_C( 870080996), -INT32_C( 1472856129), -INT32_C( 288010393) }, { -INT64_C( 8427346898156671671), INT64_C( 3032086043149142922) } }, { { -INT64_C( 7385636451878912484), -INT64_C( 9112267865781077569) }, { -INT32_C( 466253368), -INT32_C( 407807961), -INT32_C( 1081088425), INT32_C( 1387095093) }, { -INT32_C( 361807582), -INT32_C( 226234573), INT32_C( 278621289), -INT32_C( 309254875) }, { -INT64_C( 7084422201382432659), -INT64_C( 8683301946182249194) } }, { { INT64_C( 8556139039845172910), -INT64_C( 3174788484804817815) }, { INT32_C( 1472028707), -INT32_C( 1203158705), -INT32_C( 758520403), -INT32_C( 1178576374) }, { INT32_C( 1083150881), INT32_C( 2125876245), INT32_C( 1226632344), -INT32_C( 232976946) }, { -INT64_C( 8960179373960664074), -INT64_C( 3449369609047091619) } }, { { -INT64_C( 3930074451990160787), -INT64_C( 5099381442482585993) }, { INT32_C( 49924845), -INT32_C( 1367298026), INT32_C( 1794678172), INT32_C( 391910826) }, { INT32_C( 198419951), -INT32_C( 1345042376), INT32_C( 523333510), -INT32_C( 1193775925) }, { -INT64_C( 4869289679063304507), -INT64_C( 4631527733656921943) } }, { { INT64_C( 2232443471111377462), -INT64_C( 605200850192108584) }, { -INT32_C( 1040028278), INT32_C( 1014159029), INT32_C( 291218246), INT32_C( 1187590928) }, { -INT32_C( 2020375547), -INT32_C( 1733980480), -INT32_C( 1994707327), -INT32_C( 1652509677) }, { INT64_C( 2813338640163665904), INT64_C( 1357304650645301672) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t c = simde_vld1q_s32(test_vec[i].c); simde_int64x2_t r = simde_vmlsl_high_s32(a, b, c); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); simde_int32x4_t c = simde_test_arm_neon_random_i32x4(); simde_int64x2_t r = simde_vmlsl_high_s32(a, b, c); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlsl_high_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint8_t b[16]; uint8_t c[16]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(33569), UINT16_C(55136), UINT16_C(53849), UINT16_C(40723), UINT16_C(28277), UINT16_C(34224), UINT16_C(31393), UINT16_C(42956) }, { UINT8_C(254), UINT8_C( 95), UINT8_C( 46), UINT8_C(190), UINT8_C(237), UINT8_C(212), UINT8_C( 87), UINT8_C(110), UINT8_C( 2), UINT8_C(114), UINT8_C(247), UINT8_C( 21), UINT8_C( 38), UINT8_C(119), UINT8_C(178), UINT8_C( 72) }, { UINT8_C(250), UINT8_C( 18), UINT8_C( 31), UINT8_C( 84), UINT8_C(228), UINT8_C( 50), UINT8_C(243), UINT8_C( 90), UINT8_C(160), UINT8_C(164), UINT8_C(223), UINT8_C( 65), UINT8_C( 30), UINT8_C(171), UINT8_C(232), UINT8_C( 28) }, { UINT16_C(33249), UINT16_C(36440), UINT16_C(64304), UINT16_C(39358), UINT16_C(27137), UINT16_C(13875), UINT16_C(55633), UINT16_C(40940) } }, { { UINT16_C( 5898), UINT16_C(63451), UINT16_C(13035), UINT16_C(60773), UINT16_C(23716), UINT16_C(51714), UINT16_C(46292), UINT16_C(52754) }, { UINT8_C(199), UINT8_C( 49), UINT8_C( 34), UINT8_C(171), UINT8_C( 99), UINT8_C( 22), UINT8_C( 5), UINT8_C( 3), UINT8_C(186), UINT8_C(229), UINT8_C( 69), UINT8_C(216), UINT8_C(144), UINT8_C( 45), UINT8_C(244), UINT8_C(155) }, { UINT8_C( 68), UINT8_C(207), UINT8_C(146), UINT8_C( 47), UINT8_C( 1), UINT8_C(248), UINT8_C( 28), UINT8_C(165), UINT8_C( 84), UINT8_C( 30), UINT8_C(112), UINT8_C( 40), UINT8_C(211), UINT8_C(130), UINT8_C(247), UINT8_C(154) }, { UINT16_C(55810), UINT16_C(56581), UINT16_C( 5307), UINT16_C(52133), UINT16_C(58868), UINT16_C(45864), UINT16_C(51560), UINT16_C(28884) } }, { { UINT16_C( 6580), UINT16_C( 5957), UINT16_C(19247), UINT16_C(59675), UINT16_C(24624), UINT16_C(49345), UINT16_C(46733), UINT16_C(53851) }, { UINT8_C(133), UINT8_C(238), UINT8_C( 1), UINT8_C(135), UINT8_C(230), UINT8_C( 30), UINT8_C( 44), UINT8_C( 58), UINT8_C( 60), UINT8_C(156), UINT8_C( 99), UINT8_C( 15), UINT8_C( 31), UINT8_C( 90), UINT8_C(169), UINT8_C(211) }, { UINT8_C(115), UINT8_C(239), UINT8_C(234), UINT8_C(163), UINT8_C( 58), UINT8_C( 5), UINT8_C(140), UINT8_C(106), UINT8_C(101), UINT8_C( 78), UINT8_C( 42), UINT8_C(243), UINT8_C( 4), UINT8_C(134), UINT8_C(197), UINT8_C(137) }, { UINT16_C( 520), UINT16_C(59325), UINT16_C(15089), UINT16_C(56030), UINT16_C(24500), UINT16_C(37285), UINT16_C(13440), UINT16_C(24944) } }, { { UINT16_C(50804), UINT16_C(23056), UINT16_C(15844), UINT16_C( 8596), UINT16_C(63449), UINT16_C(63536), UINT16_C(55889), UINT16_C(50635) }, { UINT8_C(201), UINT8_C(182), UINT8_C(104), UINT8_C( 3), UINT8_C(187), UINT8_C(244), UINT8_C(109), UINT8_C( 33), UINT8_C( 66), UINT8_C(151), UINT8_C( 20), UINT8_C( 70), UINT8_C( 29), UINT8_C(217), UINT8_C(208), UINT8_C(145) }, { UINT8_C(159), UINT8_C(224), UINT8_C(235), UINT8_C(132), UINT8_C( 29), UINT8_C(128), UINT8_C(165), UINT8_C(247), UINT8_C(119), UINT8_C(213), UINT8_C(239), UINT8_C(201), UINT8_C(175), UINT8_C(187), UINT8_C(142), UINT8_C(120) }, { UINT16_C(42950), UINT16_C(56429), UINT16_C(11064), UINT16_C(60062), UINT16_C(58374), UINT16_C(22957), UINT16_C(26353), UINT16_C(33235) } }, { { UINT16_C(63089), UINT16_C(11387), UINT16_C(59626), UINT16_C(11597), UINT16_C(24960), UINT16_C(40307), UINT16_C(17210), UINT16_C(55855) }, { UINT8_C( 36), UINT8_C( 26), UINT8_C( 94), UINT8_C( 65), UINT8_C(154), UINT8_C( 3), UINT8_C( 56), UINT8_C( 18), UINT8_C(216), UINT8_C( 40), UINT8_C(219), UINT8_C(136), UINT8_C(227), UINT8_C(105), UINT8_C( 0), UINT8_C( 84) }, { UINT8_C( 95), UINT8_C(124), UINT8_C(128), UINT8_C( 73), UINT8_C(100), UINT8_C(206), UINT8_C(118), UINT8_C(228), UINT8_C( 47), UINT8_C(234), UINT8_C(130), UINT8_C(106), UINT8_C( 45), UINT8_C(177), UINT8_C( 68), UINT8_C( 81) }, { UINT16_C(52937), UINT16_C( 2027), UINT16_C(31156), UINT16_C(62717), UINT16_C(14745), UINT16_C(21722), UINT16_C(17210), UINT16_C(49051) } }, { { UINT16_C(41675), UINT16_C(26259), UINT16_C(52133), UINT16_C(32120), UINT16_C(21491), UINT16_C(54789), UINT16_C( 1724), UINT16_C( 6954) }, { UINT8_C(130), UINT8_C(171), UINT8_C(100), UINT8_C(230), UINT8_C(121), UINT8_C(219), UINT8_C(203), UINT8_C(168), UINT8_C(197), UINT8_C( 77), UINT8_C( 18), UINT8_C(242), UINT8_C(254), UINT8_C( 86), UINT8_C( 68), UINT8_C(201) }, { UINT8_C(248), UINT8_C(215), UINT8_C( 47), UINT8_C(157), UINT8_C(162), UINT8_C(167), UINT8_C( 27), UINT8_C(150), UINT8_C(250), UINT8_C( 32), UINT8_C(108), UINT8_C(182), UINT8_C( 38), UINT8_C(151), UINT8_C(209), UINT8_C(168) }, { UINT16_C(57961), UINT16_C(23795), UINT16_C(50189), UINT16_C(53612), UINT16_C(11839), UINT16_C(41803), UINT16_C(53048), UINT16_C(38722) } }, { { UINT16_C(13890), UINT16_C(48015), UINT16_C(23057), UINT16_C(54883), UINT16_C(30375), UINT16_C(42440), UINT16_C( 3276), UINT16_C(50542) }, { UINT8_C(227), UINT8_C(158), UINT8_C( 98), UINT8_C(134), UINT8_C( 69), UINT8_C(125), UINT8_C( 28), UINT8_C( 64), UINT8_C(158), UINT8_C(136), UINT8_C(246), UINT8_C(196), UINT8_C( 31), UINT8_C(200), UINT8_C(109), UINT8_C( 97) }, { UINT8_C(254), UINT8_C(252), UINT8_C( 28), UINT8_C( 15), UINT8_C( 86), UINT8_C(128), UINT8_C(229), UINT8_C(253), UINT8_C(246), UINT8_C(173), UINT8_C(162), UINT8_C(194), UINT8_C(186), UINT8_C( 16), UINT8_C(135), UINT8_C(157) }, { UINT16_C(40558), UINT16_C(24487), UINT16_C(48741), UINT16_C(16859), UINT16_C(24609), UINT16_C(39240), UINT16_C(54097), UINT16_C(35313) } }, { { UINT16_C(60078), UINT16_C(62499), UINT16_C(16231), UINT16_C( 1332), UINT16_C(10952), UINT16_C(59338), UINT16_C(14322), UINT16_C(61513) }, { UINT8_C( 51), UINT8_C(101), UINT8_MAX, UINT8_C(137), UINT8_C(229), UINT8_C(228), UINT8_C(134), UINT8_C(219), UINT8_C(146), UINT8_C( 40), UINT8_C(158), UINT8_C( 76), UINT8_C( 56), UINT8_C( 37), UINT8_C(233), UINT8_C(231) }, { UINT8_C( 15), UINT8_C( 13), UINT8_C(219), UINT8_C(119), UINT8_C( 76), UINT8_C( 15), UINT8_C(124), UINT8_C( 20), UINT8_C( 57), UINT8_C( 70), UINT8_C(252), UINT8_C( 44), UINT8_C(125), UINT8_C( 69), UINT8_C( 28), UINT8_C(176) }, { UINT16_C(51756), UINT16_C(59699), UINT16_C(41951), UINT16_C(63524), UINT16_C( 3952), UINT16_C(56785), UINT16_C( 7798), UINT16_C(20857) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t c = simde_vld1q_u8(test_vec[i].c); simde_uint16x8_t r = simde_vmlsl_high_u8(a, b, c); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t c = simde_test_arm_neon_random_u8x16(); simde_uint16x8_t r = simde_vmlsl_high_u8(a, b, c); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlsl_high_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint16_t b[8]; uint16_t c[8]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(2419661994), UINT32_C(2456534784), UINT32_C( 551422439), UINT32_C(1040697391) }, { UINT16_C(58069), UINT16_C( 8629), UINT16_C(13041), UINT16_C(10806), UINT16_C(12920), UINT16_C(63062), UINT16_C(29559), UINT16_C( 8614) }, { UINT16_C(57487), UINT16_C(36785), UINT16_C( 7583), UINT16_C(34594), UINT16_C( 38), UINT16_C(21927), UINT16_C(44744), UINT16_C(40340) }, { UINT32_C(2419171034), UINT32_C(1073774310), UINT32_C(3523801839), UINT32_C( 693208631) } }, { { UINT32_C(2176797072), UINT32_C(4104910203), UINT32_C(2666136103), UINT32_C( 79663221) }, { UINT16_C(29040), UINT16_C( 4243), UINT16_C(46478), UINT16_C(46231), UINT16_C(16054), UINT16_C(32266), UINT16_C(40684), UINT16_C(31772) }, { UINT16_C(56295), UINT16_C(25597), UINT16_C(43216), UINT16_C(63319), UINT16_C(16810), UINT16_C( 8085), UINT16_C(21713), UINT16_C(16931) }, { UINT32_C(1906929332), UINT32_C(3844039593), UINT32_C(1782764411), UINT32_C(3836698785) } }, { { UINT32_C(1397929925), UINT32_C( 571009388), UINT32_C( 329323047), UINT32_C(2542779824) }, { UINT16_C(35992), UINT16_C(26874), UINT16_C(20788), UINT16_C(57183), UINT16_C(62610), UINT16_C(25854), UINT16_C( 8776), UINT16_C( 3750) }, { UINT16_C(63705), UINT16_C(17761), UINT16_C(27105), UINT16_C( 2152), UINT16_C( 2427), UINT16_C(11035), UINT16_C(43718), UINT16_C(24259) }, { UINT32_C(1245975455), UINT32_C( 285710498), UINT32_C(4240621175), UINT32_C(2451808574) } }, { { UINT32_C(1791409462), UINT32_C(2705925391), UINT32_C(1627736089), UINT32_C(1131391850) }, { UINT16_C(53667), UINT16_C(33928), UINT16_C(61498), UINT16_C(46732), UINT16_C(43001), UINT16_C(49121), UINT16_C(42065), UINT16_C(34589) }, { UINT16_C(58210), UINT16_C(29170), UINT16_C(15112), UINT16_C( 8466), UINT16_C( 6275), UINT16_C(60803), UINT16_C(62147), UINT16_C(26416) }, { UINT32_C(1521578187), UINT32_C(4014188524), UINT32_C(3308489830), UINT32_C( 217688826) } }, { { UINT32_C(4276861379), UINT32_C(2746513577), UINT32_C(1902286111), UINT32_C(2633531450) }, { UINT16_C(60003), UINT16_C(27661), UINT16_C( 7974), UINT16_C(43405), UINT16_C( 4151), UINT16_C(64407), UINT16_C(50947), UINT16_C(50786) }, { UINT16_C(19840), UINT16_C(10948), UINT16_C(30917), UINT16_C(58829), UINT16_C(12046), UINT16_C(18518), UINT16_C(20143), UINT16_C( 5092) }, { UINT32_C(4226858433), UINT32_C(1553824751), UINT32_C( 876060690), UINT32_C(2374929138) } }, { { UINT32_C(1602220345), UINT32_C(1208486928), UINT32_C( 541302557), UINT32_C(3890652519) }, { UINT16_C(44018), UINT16_C(47121), UINT16_C(56867), UINT16_C(12701), UINT16_C(62222), UINT16_C(48505), UINT16_C(23873), UINT16_C(31440) }, { UINT16_C(20302), UINT16_C(24537), UINT16_C(57948), UINT16_C(31143), UINT16_C(60033), UINT16_C(59545), UINT16_C(32655), UINT16_C(33232) }, { UINT32_C(2161814315), UINT32_C(2615223999), UINT32_C(4056697038), UINT32_C(2845838439) } }, { { UINT32_C(1312416042), UINT32_C(3464484544), UINT32_C( 193722825), UINT32_C(2776980566) }, { UINT16_C(24491), UINT16_C( 1796), UINT16_C(43841), UINT16_C(49792), UINT16_C( 6549), UINT16_C( 9387), UINT16_C(31641), UINT16_C(50085) }, { UINT16_C(57180), UINT16_C( 7185), UINT16_C(37301), UINT16_C(32746), UINT16_C(30346), UINT16_C(57482), UINT16_C( 4050), UINT16_C(32133) }, { UINT32_C(1113680088), UINT32_C(2924901010), UINT32_C( 65576775), UINT32_C(1167599261) } }, { { UINT32_C(2944764270), UINT32_C(3379692852), UINT32_C(3102547231), UINT32_C(4101739416) }, { UINT16_C(36210), UINT16_C(10001), UINT16_C(64286), UINT16_C(43174), UINT16_C(12401), UINT16_C(17288), UINT16_C( 3648), UINT16_C(44737) }, { UINT16_C(18071), UINT16_C(52318), UINT16_C(53323), UINT16_C(27285), UINT16_C(33773), UINT16_C(34082), UINT16_C(40470), UINT16_C(34937) }, { UINT32_C(2525945297), UINT32_C(2790483236), UINT32_C(2954912671), UINT32_C(2538762847) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t c = simde_vld1q_u16(test_vec[i].c); simde_uint32x4_t r = simde_vmlsl_high_u16(a, b, c); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t c = simde_test_arm_neon_random_u16x8(); simde_uint32x4_t r = simde_vmlsl_high_u16(a, b, c); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlsl_high_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[2]; uint32_t b[4]; uint32_t c[4]; uint64_t r[2]; } test_vec[] = { { { UINT64_C(17866156331517315627), UINT64_C( 2266995647527483782) }, { UINT32_C(2381042498), UINT32_C(2432204963), UINT32_C( 420813315), UINT32_C(3819015864) }, { UINT32_C(2670481689), UINT32_C( 764812711), UINT32_C( 519360919), UINT32_C( 255683021) }, { UINT64_C(17647602341511479142), UINT64_C( 1290538134173038638) } }, { { UINT64_C(12425314453772118076), UINT64_C( 3119700495649113263) }, { UINT32_C(1607104696), UINT32_C( 747463061), UINT32_C( 4948274), UINT32_C( 638552298) }, { UINT32_C(1510321329), UINT32_C(4043732033), UINT32_C( 173657336), UINT32_C(3963003956) }, { UINT64_C(12424455149691480012), UINT64_C( 589115212562222375) } }, { { UINT64_C(10727250376069480476), UINT64_C( 7163718143336261978) }, { UINT32_C(2445111120), UINT32_C(3682780131), UINT32_C(3303398288), UINT32_C(2628787072) }, { UINT32_C(2102328348), UINT32_C( 789654997), UINT32_C( 141796694), UINT32_C(2573983049) }, { UINT64_C(10258839419865820604), UINT64_C( 397264780577919450) } }, { { UINT64_C( 8938147640385349708), UINT64_C( 2856953468810227848) }, { UINT32_C(3265655789), UINT32_C(2012395040), UINT32_C(2776589660), UINT32_C(2403265347) }, { UINT32_C(4290734099), UINT32_C(2658912533), UINT32_C(3316038585), UINT32_C(2582399916) }, { UINT64_C(18177613266822870224), UINT64_C(15097505312301268612) } }, { { UINT64_C(11877766040469082431), UINT64_C( 6090422788463089331) }, { UINT32_C( 89408751), UINT32_C(3349401358), UINT32_C( 931940747), UINT32_C(3570432405) }, { UINT32_C(1379085066), UINT32_C( 754322041), UINT32_C(2703376224), UINT32_C(3069552582) }, { UINT64_C( 9358379582852483103), UINT64_C(13577536854548421237) } }, { { UINT64_C(11871873558399109612), UINT64_C(15064729625762156199) }, { UINT32_C(1327776982), UINT32_C(2927303246), UINT32_C( 542088538), UINT32_C( 836125765) }, { UINT32_C(2804650382), UINT32_C(2538335471), UINT32_C( 30615098), UINT32_C(2815681745) }, { UINT64_C(11855277464683562888), UINT64_C(12710465572727496274) } }, { { UINT64_C( 7720421080555845416), UINT64_C( 5009800175050060560) }, { UINT32_C(3823940084), UINT32_C(3631888285), UINT32_C( 785993309), UINT32_C(1524018226) }, { UINT32_C(3050360227), UINT32_C(1327559743), UINT32_C( 514108519), UINT32_C( 56896015) }, { UINT64_C( 7316335224521946045), UINT64_C( 4923089611203291170) } }, { { UINT64_C(16524095746587644123), UINT64_C( 8807891709168200367) }, { UINT32_C(4130278327), UINT32_C(1715884287), UINT32_C( 193260284), UINT32_C(4027508757) }, { UINT32_C(3244946745), UINT32_C( 94812758), UINT32_C(3169237733), UINT32_C(1530274212) }, { UINT64_C(15911607962244547951), UINT64_C( 2644698919726925883) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t c = simde_vld1q_u32(test_vec[i].c); simde_uint64x2_t r = simde_vmlsl_high_u32(a, b, c); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t c = simde_test_arm_neon_random_u32x4(); simde_uint64x2_t r = simde_vmlsl_high_u32(a, b, c); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vmlsl_high_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vmlsl_high_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmlsl_high_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmlsl_high_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vmlsl_high_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmlsl_high_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/mlsl_n.c000066400000000000000000000323561400333146700167600ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN mlsl_n #include "test-neon.h" #include "../../../simde/arm/neon/mlsl_n.h" static int test_simde_vmlsl_n_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[8]; int16_t b[8]; int16_t c; int32_t r[8]; } test_vec[] = { { { INT32_C( 97116604), INT32_C( 1553793811), INT32_C( 2023811647), INT32_C( 156946984) }, { -INT16_C( 29600), INT16_C( 21469), -INT16_C( 25494), INT16_C( 24076) }, INT16_C( 30465), { INT32_C( 998880604), INT32_C( 899740726), -INT32_C( 1494480939), -INT32_C( 576528356) } }, { { -INT32_C( 1677353418), INT32_C( 779993701), INT32_C( 1680970183), -INT32_C( 1924238867) }, { INT16_C( 30441), INT16_C( 17503), -INT16_C( 16257), INT16_C( 23760) }, INT16_C( 14867), { -INT32_C( 2129919765), INT32_C( 519776600), INT32_C( 1922663002), INT32_C( 2017488509) } }, { { -INT32_C( 90693384), -INT32_C( 1667707241), -INT32_C( 413205142), INT32_C( 1551377707) }, { INT16_C( 25993), -INT16_C( 10291), -INT16_C( 18446), INT16_C( 21069) }, -INT16_C( 12805), { INT32_C( 242146981), -INT32_C( 1799483496), -INT32_C( 649406172), INT32_C( 1821166252) } }, { { INT32_C( 623495954), -INT32_C( 1656413691), -INT32_C( 1267999716), -INT32_C( 676211335) }, { -INT16_C( 9028), INT16_C( 13821), -INT16_C( 31176), INT16_C( 1690) }, -INT16_C( 29346), { INT32_C( 358560266), -INT32_C( 1250822625), INT32_C( 2112076684), -INT32_C( 626616595) } }, { { -INT32_C( 1193301059), -INT32_C( 1568411272), INT32_C( 1556383766), -INT32_C( 1875320795) }, { -INT16_C( 20076), INT16_C( 17765), INT16_C( 8585), -INT16_C( 31199) }, INT16_C( 22870), { -INT32_C( 734162939), -INT32_C( 1974696822), INT32_C( 1360044816), -INT32_C( 1161799665) } }, { { INT32_C( 1784672524), INT32_C( 1561730174), INT32_C( 1464766164), -INT32_C( 186686416) }, { INT16_C( 1216), -INT16_C( 1580), INT16_C( 26772), -INT16_C( 1622) }, INT16_C( 13229), { INT32_C( 1768586060), INT32_C( 1582631994), INT32_C( 1110599376), -INT32_C( 165228978) } }, { { INT32_C( 1908002331), -INT32_C( 2023569880), INT32_C( 1185210416), -INT32_C( 1948944323) }, { INT16_C( 1488), -INT16_C( 20496), -INT16_C( 20230), -INT16_C( 12620) }, INT16_C( 18601), { INT32_C( 1880324043), -INT32_C( 1642323784), INT32_C( 1561508646), -INT32_C( 1714199703) } }, { { -INT32_C( 465415113), INT32_C( 1102273927), INT32_C( 822598606), INT32_C( 101791586) }, { INT16_C( 20350), INT16_C( 21375), INT16_C( 20442), -INT16_C( 13736) }, INT16_C( 21246), { -INT32_C( 897771213), INT32_C( 648140677), INT32_C( 388287874), INT32_C( 393626642) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int32x4_t r = simde_vmlsl_n_s16(a, b, test_vec[i].c); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); int16_t c = simde_test_codegen_random_i16(); simde_int32x4_t r = simde_vmlsl_n_s16(a, b, c); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i16(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlsl_n_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[8]; int32_t b[8]; int32_t c; int64_t r[8]; } test_vec[] = { { { -INT64_C( 2346229598510318991), INT64_C( 3796772103095528739) }, { -INT32_C( 1681176134), -INT32_C( 2129879360) }, INT32_C( 1543559106), { INT64_C( 248765131915257213), INT64_C( 7084366783904980899) } }, { { INT64_C( 5445455234151003318), INT64_C( 5502634789337432744) }, { -INT32_C( 595587551), INT32_C( 410471511) }, -INT32_C( 1399225366), { INT64_C( 4612094025117984652), INT64_C( 6076976939548980770) } }, { { -INT64_C( 272498232800011942), -INT64_C( 324978474139268855) }, { -INT32_C( 599270725), INT32_C( 1085851880) }, -INT32_C( 27774956), { -INT64_C( 289142950818975042), -INT64_C( 294818985949751575) } }, { { -INT64_C( 2945720800232410701), -INT64_C( 1490380179026717033) }, { INT32_C( 1223151245), -INT32_C( 1859899735) }, INT32_C( 181526006), { -INT64_C( 3167754560471188171), -INT64_C( 1152760008571708623) } }, { { -INT64_C( 6571118978326320883), INT64_C( 7105642565012876645) }, { -INT32_C( 129045141), INT32_C( 1698706876) }, INT32_C( 1526097251), { -INT64_C( 6374183543391313492), INT64_C( 4513250671294478769) } }, { { INT64_C( 869034136292345922), INT64_C( 1044884366166122015) }, { -INT32_C( 395305860), -INT32_C( 958349814) }, INT32_C( 1462444531), { INT64_C( 1447147029321597582), INT64_C( 2446417810435289249) } }, { { -INT64_C( 2659632961100373626), -INT64_C( 3519960012574742910) }, { INT32_C( 1088268228), -INT32_C( 852996413) }, INT32_C( 9636108), { -INT64_C( 2670119631278350250), -INT64_C( 3511740447015462306) } }, { { -INT64_C( 3929380913756520918), -INT64_C( 5025300621991571683) }, { -INT32_C( 410425309), -INT32_C( 752392433) }, -INT32_C( 1046458187), { -INT64_C( 4358873838511575701), -INT64_C( 5812647843341270654) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int64x2_t r = simde_vmlsl_n_s32(a, b, test_vec[i].c); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); int32_t c = simde_test_codegen_random_i32(); simde_int64x2_t r = simde_vmlsl_n_s32(a, b, c); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlsl_n_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[8]; uint16_t b[8]; uint16_t c; uint32_t r[8]; } test_vec[] = { { { UINT32_C(2210477145), UINT32_C(3526564082), UINT32_C(1050454816), UINT32_C(4024320058) }, { UINT16_C( 8396), UINT16_C(61353), UINT16_C(13192), UINT16_C(39126) }, UINT16_C(64922), { UINT32_C(1665392033), UINT32_C(3838371912), UINT32_C( 194003792), UINT32_C(1484181886) } }, { { UINT32_C( 189615979), UINT32_C(3527386640), UINT32_C(1558852137), UINT32_C(2684816132) }, { UINT16_C(16713), UINT16_C(10209), UINT16_C(44337), UINT16_C(55879) }, UINT16_C(53148), { UINT32_C(3596320751), UINT32_C(2984798708), UINT32_C(3497396557), UINT32_C(4009926336) } }, { { UINT32_C(2808574477), UINT32_C(3187069551), UINT32_C( 493029342), UINT32_C(3276770777) }, { UINT16_C(21737), UINT16_C(61902), UINT16_C( 6132), UINT16_C(54578) }, UINT16_C(25406), { UINT32_C(2256324255), UINT32_C(1614387339), UINT32_C( 337239750), UINT32_C(1890162109) } }, { { UINT32_C( 507413890), UINT32_C(3163573077), UINT32_C(3918463219), UINT32_C( 552627645) }, { UINT16_C(51594), UINT16_C(55981), UINT16_C(38797), UINT16_C(23342) }, UINT16_C( 8840), { UINT32_C( 51322930), UINT32_C(2668701037), UINT32_C(3575497739), UINT32_C( 346284365) } }, { { UINT32_C(2985867891), UINT32_C(1547139614), UINT32_C( 698846361), UINT32_C(3609827912) }, { UINT16_C(59012), UINT16_C(29764), UINT16_C(52999), UINT16_C(46142) }, UINT16_C(52137), { UINT32_C(4204126543), UINT32_C(4290301242), UINT32_C(2230604794), UINT32_C(1204122458) } }, { { UINT32_C(3542538059), UINT32_C(4052654585), UINT32_C(2188160075), UINT32_C(2936931592) }, { UINT16_C(22062), UINT16_C(22602), UINT16_C(52782), UINT16_C(29246) }, UINT16_C(17730), { UINT32_C(3151378799), UINT32_C(3651921125), UINT32_C(1252335215), UINT32_C(2418400012) } }, { { UINT32_C(3942285377), UINT32_C(1925268811), UINT32_C(2802563865), UINT32_C( 408114860) }, { UINT16_C(23512), UINT16_C(58909), UINT16_C(19466), UINT16_C(21565) }, UINT16_C(27556), { UINT32_C(3294388705), UINT32_C( 301972407), UINT32_C(2266158769), UINT32_C(4108837016) } }, { { UINT32_C(1709040162), UINT32_C( 585441064), UINT32_C(3412537609), UINT32_C(2928050339) }, { UINT16_C(12839), UINT16_C(31237), UINT16_C(56651), UINT16_C(26837) }, UINT16_C(57540), { UINT32_C( 970284102), UINT32_C(3083031380), UINT32_C( 152839069), UINT32_C(1383849359) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint32x4_t r = simde_vmlsl_n_u16(a, b, test_vec[i].c); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); uint16_t c = simde_test_codegen_random_u16(); simde_uint32x4_t r = simde_vmlsl_n_u16(a, b, c); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_u16(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmlsl_n_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[8]; uint32_t b[8]; uint32_t c; uint64_t r[8]; } test_vec[] = { { { UINT64_C( 5276907511242555828), UINT64_C(17064827856324420540) }, { UINT32_C(3278730557), UINT32_C( 703960100) }, UINT32_C(3825614863), { UINT64_C(11180491234320638753), UINT64_C(14371747634805454240) } }, { { UINT64_C( 4014670769694231209), UINT64_C(15685449624397672784) }, { UINT32_C(2563080283), UINT32_C( 425407221) }, UINT32_C(3594670535), { UINT64_C(13247985671264221420), UINT64_C(14156250821692739549) } }, { { UINT64_C(16111767387484342673), UINT64_C(10652305318833704568) }, { UINT32_C(3698098561), UINT32_C(4151652609) }, UINT32_C( 722522212), { UINT64_C(13439809034996605741), UINT64_C( 7652644092323453460) } }, { { UINT64_C(12676994807810315041), UINT64_C( 1331558968493770042) }, { UINT32_C(3181727292), UINT32_C(3516535248) }, UINT32_C(2814906179), { UINT64_C( 3720730993666577773), UINT64_C( 9879586243936824266) } }, { { UINT64_C(14462416489356122591), UINT64_C(18049026699570946446) }, { UINT32_C(3876452011), UINT32_C( 346403396) }, UINT32_C( 132530116), { UINT64_C(13948669854669859315), UINT64_C(18003117817316272510) } }, { { UINT64_C(12911398499793153614), UINT64_C(12369967078570713683) }, { UINT32_C(2762286841), UINT32_C(1619767836) }, UINT32_C( 695545957), { UINT64_C(10990101055461301777), UINT64_C(11243344108962274631) } }, { { UINT64_C(10514741888436689775), UINT64_C( 9264679574992853342) }, { UINT32_C(2888514995), UINT32_C(2136002659) }, UINT32_C(3906984835), { UINT64_C(17676101681011140566), UINT64_C( 919349578760177077) } }, { { UINT64_C(13346771192628990988), UINT64_C( 4120387103836742687) }, { UINT32_C( 28950606), UINT32_C(1622009085) }, UINT32_C( 954203573), { UINT64_C(13319146420943275750), UINT64_C( 2572660239491281982) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint64x2_t r = simde_vmlsl_n_u32(a, b, test_vec[i].c); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); uint32_t c = simde_test_codegen_random_u32(); simde_uint64x2_t r = simde_vmlsl_n_u32(a, b, c); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_u32(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vmlsl_n_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmlsl_n_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmlsl_n_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmlsl_n_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/movl.c000066400000000000000000000300661400333146700164450ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN movl #include "test-neon.h" #include static int test_simde_vmovl_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int16_t r[8]; } test_vec[] = { { { INT8_C( 31), INT8_C( 71), INT8_C( 44), INT8_C( 91), -INT8_C( 52), INT8_C( 8), INT8_C( 55), -INT8_C( 52) }, { INT16_C( 31), INT16_C( 71), INT16_C( 44), INT16_C( 91), -INT16_C( 52), INT16_C( 8), INT16_C( 55), -INT16_C( 52) } }, { { INT8_C( 65), -INT8_C( 81), -INT8_C( 57), INT8_C( 44), INT8_C( 26), -INT8_C( 47), INT8_C( 67), -INT8_C( 127) }, { INT16_C( 65), -INT16_C( 81), -INT16_C( 57), INT16_C( 44), INT16_C( 26), -INT16_C( 47), INT16_C( 67), -INT16_C( 127) } }, { { INT8_C( 98), INT8_C( 108), INT8_C( 40), INT8_C( 95), -INT8_C( 117), -INT8_C( 15), INT8_C( 121), INT8_C( 41) }, { INT16_C( 98), INT16_C( 108), INT16_C( 40), INT16_C( 95), -INT16_C( 117), -INT16_C( 15), INT16_C( 121), INT16_C( 41) } }, { { -INT8_C( 21), -INT8_C( 77), -INT8_C( 55), -INT8_C( 69), INT8_C( 110), INT8_C( 126), INT8_C( 54), -INT8_C( 115) }, { -INT16_C( 21), -INT16_C( 77), -INT16_C( 55), -INT16_C( 69), INT16_C( 110), INT16_C( 126), INT16_C( 54), -INT16_C( 115) } }, { { -INT8_C( 59), INT8_C( 98), -INT8_C( 24), -INT8_C( 110), INT8_C( 106), INT8_C( 31), INT8_C( 94), -INT8_C( 85) }, { -INT16_C( 59), INT16_C( 98), -INT16_C( 24), -INT16_C( 110), INT16_C( 106), INT16_C( 31), INT16_C( 94), -INT16_C( 85) } }, { { -INT8_C( 49), INT8_C( 38), -INT8_C( 40), -INT8_C( 23), -INT8_C( 9), INT8_C( 27), INT8_C( 107), INT8_C( 90) }, { -INT16_C( 49), INT16_C( 38), -INT16_C( 40), -INT16_C( 23), -INT16_C( 9), INT16_C( 27), INT16_C( 107), INT16_C( 90) } }, { { -INT8_C( 121), -INT8_C( 109), -INT8_C( 71), INT8_C( 19), -INT8_C( 124), INT8_C( 50), INT8_C( 60), INT8_C( 112) }, { -INT16_C( 121), -INT16_C( 109), -INT16_C( 71), INT16_C( 19), -INT16_C( 124), INT16_C( 50), INT16_C( 60), INT16_C( 112) } }, { { -INT8_C( 27), INT8_C( 5), INT8_C( 43), INT8_C( 83), -INT8_C( 125), INT8_C( 98), -INT8_C( 32), INT8_C( 72) }, { -INT16_C( 27), INT16_C( 5), INT16_C( 43), INT16_C( 83), -INT16_C( 125), INT16_C( 98), -INT16_C( 32), INT16_C( 72) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int16x8_t r = simde_vmovl_s8(a); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; } static int test_simde_vmovl_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int32_t r[4]; } test_vec[] = { { { INT16_C( 6049), INT16_C( 7552), INT16_C( 6479), INT16_C( 19559) }, { INT32_C( 6049), INT32_C( 7552), INT32_C( 6479), INT32_C( 19559) } }, { { INT16_C( 29363), -INT16_C( 9131), -INT16_C( 587), -INT16_C( 3980) }, { INT32_C( 29363), -INT32_C( 9131), -INT32_C( 587), -INT32_C( 3980) } }, { { INT16_C( 709), -INT16_C( 16398), INT16_C( 8777), -INT16_C( 5159) }, { INT32_C( 709), -INT32_C( 16398), INT32_C( 8777), -INT32_C( 5159) } }, { { -INT16_C( 2426), -INT16_C( 1197), INT16_C( 12053), -INT16_C( 18840) }, { -INT32_C( 2426), -INT32_C( 1197), INT32_C( 12053), -INT32_C( 18840) } }, { { -INT16_C( 6074), -INT16_C( 27181), INT16_C( 14849), -INT16_C( 19231) }, { -INT32_C( 6074), -INT32_C( 27181), INT32_C( 14849), -INT32_C( 19231) } }, { { INT16_C( 13997), INT16_C( 25232), INT16_C( 1075), -INT16_C( 1966) }, { INT32_C( 13997), INT32_C( 25232), INT32_C( 1075), -INT32_C( 1966) } }, { { INT16_C( 17415), INT16_C( 20663), -INT16_C( 28570), -INT16_C( 5060) }, { INT32_C( 17415), INT32_C( 20663), -INT32_C( 28570), -INT32_C( 5060) } }, { { -INT16_C( 28794), -INT16_C( 25369), INT16_C( 20414), INT16_C( 1106) }, { -INT32_C( 28794), -INT32_C( 25369), INT32_C( 20414), INT32_C( 1106) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int32x4_t r = simde_vmovl_s16(a); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; } static int test_simde_vmovl_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int64_t r[2]; } test_vec[] = { { { -INT32_C( 1337807957), INT32_C( 63141997) }, { -INT64_C( 1337807957), INT64_C( 63141997) } }, { { -INT32_C( 455737023), -INT32_C( 1346288334) }, { -INT64_C( 455737023), -INT64_C( 1346288334) } }, { { -INT32_C( 1909724594), -INT32_C( 253267815) }, { -INT64_C( 1909724594), -INT64_C( 253267815) } }, { { -INT32_C( 1154621214), -INT32_C( 662767571) }, { -INT64_C( 1154621214), -INT64_C( 662767571) } }, { { INT32_C( 294174883), INT32_C( 2048150585) }, { INT64_C( 294174883), INT64_C( 2048150585) } }, { { INT32_C( 2136992333), INT32_C( 2116952112) }, { INT64_C( 2136992333), INT64_C( 2116952112) } }, { { -INT32_C( 1559471862), -INT32_C( 1416301623) }, { -INT64_C( 1559471862), -INT64_C( 1416301623) } }, { { INT32_C( 23577043), INT32_C( 1574561209) }, { INT64_C( 23577043), INT64_C( 1574561209) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int64x2_t r = simde_vmovl_s32(a); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; } static int test_simde_vmovl_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; uint16_t r[8]; } test_vec[] = { { { UINT8_C(189), UINT8_C(244), UINT8_C(174), UINT8_C( 32), UINT8_C( 26), UINT8_C(250), UINT8_C( 0), UINT8_C(155) }, { UINT16_C( 189), UINT16_C( 244), UINT16_C( 174), UINT16_C( 32), UINT16_C( 26), UINT16_C( 250), UINT16_C( 0), UINT16_C( 155) } }, { { UINT8_C( 40), UINT8_C(141), UINT8_C( 72), UINT8_C(225), UINT8_C(243), UINT8_C( 40), UINT8_C( 73), UINT8_C(157) }, { UINT16_C( 40), UINT16_C( 141), UINT16_C( 72), UINT16_C( 225), UINT16_C( 243), UINT16_C( 40), UINT16_C( 73), UINT16_C( 157) } }, { { UINT8_C(119), UINT8_C(113), UINT8_C(175), UINT8_C(165), UINT8_C(177), UINT8_C(181), UINT8_C(183), UINT8_C(236) }, { UINT16_C( 119), UINT16_C( 113), UINT16_C( 175), UINT16_C( 165), UINT16_C( 177), UINT16_C( 181), UINT16_C( 183), UINT16_C( 236) } }, { { UINT8_C( 8), UINT8_C( 38), UINT8_C( 77), UINT8_C(212), UINT8_C( 67), UINT8_C(220), UINT8_C(111), UINT8_C( 0) }, { UINT16_C( 8), UINT16_C( 38), UINT16_C( 77), UINT16_C( 212), UINT16_C( 67), UINT16_C( 220), UINT16_C( 111), UINT16_C( 0) } }, { { UINT8_C(208), UINT8_C( 29), UINT8_C( 32), UINT8_C(234), UINT8_C( 23), UINT8_C( 33), UINT8_C(133), UINT8_C( 63) }, { UINT16_C( 208), UINT16_C( 29), UINT16_C( 32), UINT16_C( 234), UINT16_C( 23), UINT16_C( 33), UINT16_C( 133), UINT16_C( 63) } }, { { UINT8_C(174), UINT8_C(206), UINT8_C( 32), UINT8_C(161), UINT8_C(246), UINT8_C(106), UINT8_C( 62), UINT8_C(109) }, { UINT16_C( 174), UINT16_C( 206), UINT16_C( 32), UINT16_C( 161), UINT16_C( 246), UINT16_C( 106), UINT16_C( 62), UINT16_C( 109) } }, { { UINT8_C(219), UINT8_C(237), UINT8_C( 18), UINT8_C(141), UINT8_C(162), UINT8_C(201), UINT8_C(121), UINT8_C(170) }, { UINT16_C( 219), UINT16_C( 237), UINT16_C( 18), UINT16_C( 141), UINT16_C( 162), UINT16_C( 201), UINT16_C( 121), UINT16_C( 170) } }, { { UINT8_C(240), UINT8_C(198), UINT8_C(126), UINT8_C( 51), UINT8_C(162), UINT8_C(238), UINT8_C( 51), UINT8_C(114) }, { UINT16_C( 240), UINT16_C( 198), UINT16_C( 126), UINT16_C( 51), UINT16_C( 162), UINT16_C( 238), UINT16_C( 51), UINT16_C( 114) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint16x8_t r = simde_vmovl_u8(a); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_vmovl_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint32_t r[4]; } test_vec[] = { { { UINT16_C(33781), UINT16_C(23125), UINT16_C(60863), UINT16_C(50162) }, { UINT32_C( 33781), UINT32_C( 23125), UINT32_C( 60863), UINT32_C( 50162) } }, { { UINT16_C(50430), UINT16_C(23639), UINT16_C(48165), UINT16_C(43126) }, { UINT32_C( 50430), UINT32_C( 23639), UINT32_C( 48165), UINT32_C( 43126) } }, { { UINT16_C(33936), UINT16_C(59873), UINT16_C(56243), UINT16_C(57822) }, { UINT32_C( 33936), UINT32_C( 59873), UINT32_C( 56243), UINT32_C( 57822) } }, { { UINT16_C( 1496), UINT16_C(57739), UINT16_C(11575), UINT16_C(11738) }, { UINT32_C( 1496), UINT32_C( 57739), UINT32_C( 11575), UINT32_C( 11738) } }, { { UINT16_C(12208), UINT16_C(28807), UINT16_C(31004), UINT16_C( 6707) }, { UINT32_C( 12208), UINT32_C( 28807), UINT32_C( 31004), UINT32_C( 6707) } }, { { UINT16_C(35389), UINT16_C(25463), UINT16_C(60742), UINT16_C(54795) }, { UINT32_C( 35389), UINT32_C( 25463), UINT32_C( 60742), UINT32_C( 54795) } }, { { UINT16_C(60529), UINT16_C( 9407), UINT16_C(40392), UINT16_C(40965) }, { UINT32_C( 60529), UINT32_C( 9407), UINT32_C( 40392), UINT32_C( 40965) } }, { { UINT16_C(37027), UINT16_C(55937), UINT16_C(23486), UINT16_C(28167) }, { UINT32_C( 37027), UINT32_C( 55937), UINT32_C( 23486), UINT32_C( 28167) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint32x4_t r = simde_vmovl_u16(a); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_vmovl_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint64_t r[2]; } test_vec[] = { { { UINT32_C(1405182796), UINT32_C(3853144912) }, { UINT64_C( 1405182796), UINT64_C( 3853144912) } }, { { UINT32_C( 231259192), UINT32_C(1147577182) }, { UINT64_C( 231259192), UINT64_C( 1147577182) } }, { { UINT32_C(1620251097), UINT32_C(3067746191) }, { UINT64_C( 1620251097), UINT64_C( 3067746191) } }, { { UINT32_C(3303260614), UINT32_C( 925867243) }, { UINT64_C( 3303260614), UINT64_C( 925867243) } }, { { UINT32_C(1351282944), UINT32_C(2100639044) }, { UINT64_C( 1351282944), UINT64_C( 2100639044) } }, { { UINT32_C(1351286257), UINT32_C(2056581281) }, { UINT64_C( 1351286257), UINT64_C( 2056581281) } }, { { UINT32_C(2447058946), UINT32_C( 105428287) }, { UINT64_C( 2447058946), UINT64_C( 105428287) } }, { { UINT32_C(1657416566), UINT32_C(3348756935) }, { UINT64_C( 1657416566), UINT64_C( 3348756935) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint64x2_t r = simde_vmovl_u32(a); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vmovl_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vmovl_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmovl_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmovl_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vmovl_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmovl_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/movl_high.c000066400000000000000000000376351400333146700174550ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN movl_high #include "test-neon.h" #include static int test_simde_vmovl_high_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int16_t r[8]; } test_vec[] = { { { INT8_C( 14), INT8_C( 54), -INT8_C( 9), -INT8_C( 31), -INT8_C( 54), -INT8_C( 22), INT8_C( 44), INT8_C( 62), INT8_C( 23), INT8_C( 107), -INT8_C( 40), INT8_C( 21), -INT8_C( 6), -INT8_C( 80), INT8_C( 20), -INT8_C( 87) }, { INT16_C( 23), INT16_C( 107), -INT16_C( 40), INT16_C( 21), -INT16_C( 6), -INT16_C( 80), INT16_C( 20), -INT16_C( 87) } }, { { -INT8_C( 84), INT8_C( 95), -INT8_C( 109), -INT8_C( 90), -INT8_C( 111), INT8_C( 97), INT8_C( 80), -INT8_C( 111), INT8_C( 69), INT8_C( 2), INT8_C( 28), -INT8_C( 73), -INT8_C( 74), INT8_C( 26), INT8_C( 48), -INT8_C( 59) }, { INT16_C( 69), INT16_C( 2), INT16_C( 28), -INT16_C( 73), -INT16_C( 74), INT16_C( 26), INT16_C( 48), -INT16_C( 59) } }, { { INT8_C( 80), INT8_C( 39), -INT8_C( 90), INT8_C( 27), INT8_C( 18), -INT8_C( 46), INT8_C( 89), INT8_C( 41), INT8_C( 62), INT8_C( 49), INT8_C( 63), INT8_C( 56), -INT8_C( 30), INT8_C( 83), -INT8_C( 30), -INT8_C( 114) }, { INT16_C( 62), INT16_C( 49), INT16_C( 63), INT16_C( 56), -INT16_C( 30), INT16_C( 83), -INT16_C( 30), -INT16_C( 114) } }, { { -INT8_C( 77), INT8_C( 117), INT8_C( 52), INT8_C( 68), -INT8_C( 42), -INT8_C( 124), -INT8_C( 43), INT8_C( 28), -INT8_C( 122), -INT8_C( 15), -INT8_C( 45), INT8_C( 61), INT8_C( 12), INT8_C( 3), INT8_C( 2), INT8_C( 92) }, { -INT16_C( 122), -INT16_C( 15), -INT16_C( 45), INT16_C( 61), INT16_C( 12), INT16_C( 3), INT16_C( 2), INT16_C( 92) } }, { { INT8_C( 43), -INT8_C( 88), INT8_C( 119), INT8_C( 61), INT8_C( 122), -INT8_C( 47), INT8_C( 102), -INT8_C( 72), INT8_C( 2), -INT8_C( 91), -INT8_C( 15), -INT8_C( 28), -INT8_C( 7), -INT8_C( 45), INT8_C( 115), -INT8_C( 84) }, { INT16_C( 2), -INT16_C( 91), -INT16_C( 15), -INT16_C( 28), -INT16_C( 7), -INT16_C( 45), INT16_C( 115), -INT16_C( 84) } }, { { INT8_C( 72), -INT8_C( 89), -INT8_C( 16), INT8_C( 31), INT8_C( 44), -INT8_C( 59), INT8_C( 59), -INT8_C( 78), -INT8_C( 73), INT8_C( 14), -INT8_C( 17), -INT8_C( 61), INT8_C( 18), -INT8_C( 15), INT8_C( 31), INT8_C( 61) }, { -INT16_C( 73), INT16_C( 14), -INT16_C( 17), -INT16_C( 61), INT16_C( 18), -INT16_C( 15), INT16_C( 31), INT16_C( 61) } }, { { -INT8_C( 103), -INT8_C( 105), INT8_C( 122), INT8_C( 20), INT8_C( 104), -INT8_C( 32), -INT8_C( 52), INT8_C( 106), -INT8_C( 122), -INT8_C( 67), INT8_C( 79), INT8_MAX, -INT8_C( 112), -INT8_C( 62), INT8_C( 43), -INT8_C( 39) }, { -INT16_C( 122), -INT16_C( 67), INT16_C( 79), INT16_C( 127), -INT16_C( 112), -INT16_C( 62), INT16_C( 43), -INT16_C( 39) } }, { { INT8_C( 105), INT8_C( 27), -INT8_C( 8), -INT8_C( 107), -INT8_C( 32), INT8_C( 51), INT8_C( 72), -INT8_C( 105), INT8_C( 65), INT8_C( 55), INT8_C( 90), INT8_C( 83), INT8_C( 41), INT8_C( 122), -INT8_C( 112), -INT8_C( 62) }, { INT16_C( 65), INT16_C( 55), INT16_C( 90), INT16_C( 83), INT16_C( 41), INT16_C( 122), -INT16_C( 112), -INT16_C( 62) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int16x8_t r = simde_vmovl_high_s8(a); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; } static int test_simde_vmovl_high_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int32_t r[4]; } test_vec[] = { { { INT16_C( 28578), INT16_C( 30752), INT16_C( 25600), -INT16_C( 9764), INT16_C( 32653), INT16_C( 10732), INT16_C( 12215), -INT16_C( 28369) }, { INT32_C( 32653), INT32_C( 10732), INT32_C( 12215), -INT32_C( 28369) } }, { { INT16_C( 28571), INT16_C( 25725), -INT16_C( 29640), INT16_C( 24270), -INT16_C( 7947), -INT16_C( 838), -INT16_C( 32612), INT16_C( 16238) }, { -INT32_C( 7947), -INT32_C( 838), -INT32_C( 32612), INT32_C( 16238) } }, { { -INT16_C( 28945), -INT16_C( 3913), -INT16_C( 27662), -INT16_C( 32567), -INT16_C( 19181), -INT16_C( 13655), -INT16_C( 9756), INT16_C( 32604) }, { -INT32_C( 19181), -INT32_C( 13655), -INT32_C( 9756), INT32_C( 32604) } }, { { -INT16_C( 9912), -INT16_C( 32540), -INT16_C( 19867), INT16_C( 23262), -INT16_C( 26478), INT16_C( 12119), -INT16_C( 15080), INT16_C( 2158) }, { -INT32_C( 26478), INT32_C( 12119), -INT32_C( 15080), INT32_C( 2158) } }, { { INT16_C( 9555), INT16_C( 17912), -INT16_C( 15943), -INT16_C( 13115), INT16_C( 28534), INT16_C( 23446), -INT16_C( 3512), -INT16_C( 28454) }, { INT32_C( 28534), INT32_C( 23446), -INT32_C( 3512), -INT32_C( 28454) } }, { { -INT16_C( 16693), INT16_C( 12560), -INT16_C( 4240), INT16_C( 907), -INT16_C( 7545), -INT16_C( 24526), -INT16_C( 24409), -INT16_C( 1368) }, { -INT32_C( 7545), -INT32_C( 24526), -INT32_C( 24409), -INT32_C( 1368) } }, { { -INT16_C( 24379), INT16_C( 32320), INT16_C( 1377), -INT16_C( 10422), -INT16_C( 7820), -INT16_C( 17358), INT16_C( 3539), -INT16_C( 24756) }, { -INT32_C( 7820), -INT32_C( 17358), INT32_C( 3539), -INT32_C( 24756) } }, { { INT16_C( 24011), INT16_C( 15568), INT16_C( 23372), -INT16_C( 11457), INT16_C( 28990), -INT16_C( 6797), INT16_C( 6929), -INT16_C( 10528) }, { INT32_C( 28990), -INT32_C( 6797), INT32_C( 6929), -INT32_C( 10528) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int32x4_t r = simde_vmovl_high_s16(a); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; } static int test_simde_vmovl_high_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int64_t r[2]; } test_vec[] = { { { -INT32_C( 572680233), -INT32_C( 1924933718), INT32_C( 500354625), INT32_C( 962586996) }, { INT64_C( 500354625), INT64_C( 962586996) } }, { { -INT32_C( 1764297356), INT32_C( 1350824687), INT32_C( 205890779), -INT32_C( 1241391649) }, { INT64_C( 205890779), -INT64_C( 1241391649) } }, { { INT32_C( 513007220), -INT32_C( 156510283), INT32_C( 420708005), -INT32_C( 598576537) }, { INT64_C( 420708005), -INT64_C( 598576537) } }, { { INT32_C( 1467099496), -INT32_C( 156764901), INT32_C( 2013457561), INT32_C( 1043268554) }, { INT64_C( 2013457561), INT64_C( 1043268554) } }, { { -INT32_C( 1755462942), INT32_C( 1066272921), -INT32_C( 296181370), INT32_C( 2093656852) }, { -INT64_C( 296181370), INT64_C( 2093656852) } }, { { -INT32_C( 271369004), -INT32_C( 890930639), INT32_C( 826533735), -INT32_C( 865111318) }, { INT64_C( 826533735), -INT64_C( 865111318) } }, { { -INT32_C( 832254924), INT32_C( 1527640789), -INT32_C( 1488362093), -INT32_C( 467463408) }, { -INT64_C( 1488362093), -INT64_C( 467463408) } }, { { -INT32_C( 2133592497), -INT32_C( 666126223), -INT32_C( 1979085153), INT32_C( 878082048) }, { -INT64_C( 1979085153), INT64_C( 878082048) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int64x2_t r = simde_vmovl_high_s32(a); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; } static int test_simde_vmovl_high_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint16_t r[8]; } test_vec[] = { { { UINT8_C( 28), UINT8_C(149), UINT8_C( 21), UINT8_C( 41), UINT8_C( 25), UINT8_C(124), UINT8_C( 35), UINT8_C( 26), UINT8_C(131), UINT8_C(127), UINT8_C( 78), UINT8_C(210), UINT8_C(128), UINT8_C( 54), UINT8_C(221), UINT8_C(127) }, { UINT16_C( 131), UINT16_C( 127), UINT16_C( 78), UINT16_C( 210), UINT16_C( 128), UINT16_C( 54), UINT16_C( 221), UINT16_C( 127) } }, { { UINT8_C(225), UINT8_C(206), UINT8_C(118), UINT8_C( 15), UINT8_C(119), UINT8_C(173), UINT8_C( 76), UINT8_C(125), UINT8_C( 27), UINT8_C( 72), UINT8_C( 81), UINT8_C(180), UINT8_C( 91), UINT8_C(242), UINT8_C( 74), UINT8_C(119) }, { UINT16_C( 27), UINT16_C( 72), UINT16_C( 81), UINT16_C( 180), UINT16_C( 91), UINT16_C( 242), UINT16_C( 74), UINT16_C( 119) } }, { { UINT8_C(135), UINT8_C( 95), UINT8_C(160), UINT8_C(160), UINT8_C(219), UINT8_C(195), UINT8_C(186), UINT8_C( 94), UINT8_C( 66), UINT8_C( 9), UINT8_C( 48), UINT8_C(194), UINT8_C( 63), UINT8_C( 13), UINT8_C( 66), UINT8_C( 33) }, { UINT16_C( 66), UINT16_C( 9), UINT16_C( 48), UINT16_C( 194), UINT16_C( 63), UINT16_C( 13), UINT16_C( 66), UINT16_C( 33) } }, { { UINT8_C(219), UINT8_C(184), UINT8_C( 48), UINT8_C( 82), UINT8_C(101), UINT8_C(124), UINT8_C(208), UINT8_C(128), UINT8_C(197), UINT8_C( 33), UINT8_C( 52), UINT8_C( 32), UINT8_C( 20), UINT8_C(126), UINT8_C(151), UINT8_C(155) }, { UINT16_C( 197), UINT16_C( 33), UINT16_C( 52), UINT16_C( 32), UINT16_C( 20), UINT16_C( 126), UINT16_C( 151), UINT16_C( 155) } }, { { UINT8_C(221), UINT8_C( 55), UINT8_C( 60), UINT8_C(184), UINT8_C(250), UINT8_C(246), UINT8_C( 22), UINT8_C( 60), UINT8_MAX, UINT8_C( 70), UINT8_MAX, UINT8_C( 63), UINT8_C( 84), UINT8_C( 65), UINT8_C( 96), UINT8_C( 47) }, { UINT16_C( 255), UINT16_C( 70), UINT16_C( 255), UINT16_C( 63), UINT16_C( 84), UINT16_C( 65), UINT16_C( 96), UINT16_C( 47) } }, { { UINT8_C(249), UINT8_C(144), UINT8_C(130), UINT8_C( 95), UINT8_C( 12), UINT8_C( 82), UINT8_C(223), UINT8_C(209), UINT8_C(115), UINT8_C( 20), UINT8_C(241), UINT8_C(135), UINT8_C(146), UINT8_C(136), UINT8_C( 35), UINT8_C(112) }, { UINT16_C( 115), UINT16_C( 20), UINT16_C( 241), UINT16_C( 135), UINT16_C( 146), UINT16_C( 136), UINT16_C( 35), UINT16_C( 112) } }, { { UINT8_C(191), UINT8_C( 95), UINT8_C( 40), UINT8_C(185), UINT8_C( 85), UINT8_C( 63), UINT8_C(246), UINT8_C( 85), UINT8_C(133), UINT8_C(245), UINT8_C(148), UINT8_C(217), UINT8_C( 54), UINT8_C(244), UINT8_C( 9), UINT8_C( 47) }, { UINT16_C( 133), UINT16_C( 245), UINT16_C( 148), UINT16_C( 217), UINT16_C( 54), UINT16_C( 244), UINT16_C( 9), UINT16_C( 47) } }, { { UINT8_C(132), UINT8_C(139), UINT8_C(142), UINT8_C(144), UINT8_C(221), UINT8_C(110), UINT8_C( 98), UINT8_C( 80), UINT8_C(130), UINT8_C( 83), UINT8_C(216), UINT8_C( 20), UINT8_C(220), UINT8_C(251), UINT8_C(132), UINT8_C(155) }, { UINT16_C( 130), UINT16_C( 83), UINT16_C( 216), UINT16_C( 20), UINT16_C( 220), UINT16_C( 251), UINT16_C( 132), UINT16_C( 155) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint16x8_t r = simde_vmovl_high_u8(a); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_vmovl_high_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint32_t r[4]; } test_vec[] = { { { UINT16_C(56414), UINT16_C(14753), UINT16_C(11227), UINT16_C( 1697), UINT16_C(52363), UINT16_C(39448), UINT16_C(21201), UINT16_C(23081) }, { UINT32_C( 52363), UINT32_C( 39448), UINT32_C( 21201), UINT32_C( 23081) } }, { { UINT16_C(26292), UINT16_C(28020), UINT16_C(49104), UINT16_C(22614), UINT16_C( 3192), UINT16_C(23245), UINT16_C(44189), UINT16_C(64321) }, { UINT32_C( 3192), UINT32_C( 23245), UINT32_C( 44189), UINT32_C( 64321) } }, { { UINT16_C(57992), UINT16_C(25396), UINT16_C(54541), UINT16_C(39273), UINT16_C(33441), UINT16_C(29235), UINT16_C(23764), UINT16_C(35021) }, { UINT32_C( 33441), UINT32_C( 29235), UINT32_C( 23764), UINT32_C( 35021) } }, { { UINT16_C(16834), UINT16_C(37622), UINT16_C(19456), UINT16_C(30955), UINT16_C(47192), UINT16_C(62930), UINT16_C( 4964), UINT16_C(60657) }, { UINT32_C( 47192), UINT32_C( 62930), UINT32_C( 4964), UINT32_C( 60657) } }, { { UINT16_C( 9718), UINT16_C( 847), UINT16_C(47355), UINT16_C(40092), UINT16_C(53050), UINT16_C( 3855), UINT16_C(56363), UINT16_C(61079) }, { UINT32_C( 53050), UINT32_C( 3855), UINT32_C( 56363), UINT32_C( 61079) } }, { { UINT16_C(36125), UINT16_C( 7552), UINT16_C(27610), UINT16_C(12949), UINT16_C(26403), UINT16_C(34600), UINT16_C( 6523), UINT16_C(29043) }, { UINT32_C( 26403), UINT32_C( 34600), UINT32_C( 6523), UINT32_C( 29043) } }, { { UINT16_C(49726), UINT16_C(14708), UINT16_C( 4475), UINT16_C(46550), UINT16_C(58848), UINT16_C( 3268), UINT16_C(23745), UINT16_C(57082) }, { UINT32_C( 58848), UINT32_C( 3268), UINT32_C( 23745), UINT32_C( 57082) } }, { { UINT16_C(31465), UINT16_C(50171), UINT16_C(37094), UINT16_C( 2550), UINT16_C( 7927), UINT16_C(29329), UINT16_C( 1079), UINT16_C(30179) }, { UINT32_C( 7927), UINT32_C( 29329), UINT32_C( 1079), UINT32_C( 30179) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint32x4_t r = simde_vmovl_high_u16(a); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_vmovl_high_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint64_t r[2]; } test_vec[] = { { { UINT32_C(1326137435), UINT32_C( 923094607), UINT32_C(1667697370), UINT32_C(4033731226) }, { UINT64_C( 1667697370), UINT64_C( 4033731226) } }, { { UINT32_C(1959824374), UINT32_C( 951202124), UINT32_C(3418443039), UINT32_C(1852570643) }, { UINT64_C( 3418443039), UINT64_C( 1852570643) } }, { { UINT32_C(2344449596), UINT32_C(2680341188), UINT32_C(1711417804), UINT32_C(4183256835) }, { UINT64_C( 1711417804), UINT64_C( 4183256835) } }, { { UINT32_C(1198335995), UINT32_C(2021662553), UINT32_C(2151891309), UINT32_C(2045685565) }, { UINT64_C( 2151891309), UINT64_C( 2045685565) } }, { { UINT32_C(3926174501), UINT32_C( 982107757), UINT32_C(4087385072), UINT32_C(4142790651) }, { UINT64_C( 4087385072), UINT64_C( 4142790651) } }, { { UINT32_C(2017286687), UINT32_C(3891314042), UINT32_C(1013396735), UINT32_C( 146167267) }, { UINT64_C( 1013396735), UINT64_C( 146167267) } }, { { UINT32_C(1844623872), UINT32_C(1906801537), UINT32_C( 40126471), UINT32_C(1593332031) }, { UINT64_C( 40126471), UINT64_C( 1593332031) } }, { { UINT32_C( 651572652), UINT32_C(4060989427), UINT32_C(3727586555), UINT32_C(3387352265) }, { UINT64_C( 3727586555), UINT64_C( 3387352265) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint64x2_t r = simde_vmovl_high_u32(a); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vmovl_high_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vmovl_high_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmovl_high_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmovl_high_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vmovl_high_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmovl_high_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/movn.c000066400000000000000000000341351400333146700164500ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN movn #include "test-neon.h" #include "../../../simde/arm/neon/movn.h" static int test_simde_vmovn_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int8_t r[8]; } test_vec[] = { { { -INT16_C( 9747), INT16_C( 2915), INT16_C( 12465), -INT16_C( 19440), -INT16_C( 27230), INT16_C( 26615), -INT16_C( 10739), -INT16_C( 1162) }, { -INT8_C( 19), INT8_C( 99), -INT8_C( 79), INT8_C( 16), -INT8_C( 94), -INT8_C( 9), INT8_C( 13), INT8_C( 118) } }, { { -INT16_C( 4564), INT16_C( 11796), INT16_C( 560), -INT16_C( 5089), -INT16_C( 4592), -INT16_C( 22646), INT16_C( 6419), INT16_C( 136) }, { INT8_C( 44), INT8_C( 20), INT8_C( 48), INT8_C( 31), INT8_C( 16), -INT8_C( 118), INT8_C( 19), -INT8_C( 120) } }, { { -INT16_C( 5134), -INT16_C( 23541), INT16_C( 7196), -INT16_C( 16808), INT16_C( 20657), -INT16_C( 16602), -INT16_C( 25562), INT16_C( 21178) }, { -INT8_C( 14), INT8_C( 11), INT8_C( 28), INT8_C( 88), -INT8_C( 79), INT8_C( 38), INT8_C( 38), -INT8_C( 70) } }, { { -INT16_C( 12661), -INT16_C( 17536), -INT16_C( 24623), -INT16_C( 7769), INT16_C( 12941), -INT16_C( 24440), INT16_C( 4171), INT16_C( 16032) }, { -INT8_C( 117), INT8_MIN, -INT8_C( 47), -INT8_C( 89), -INT8_C( 115), -INT8_C( 120), INT8_C( 75), -INT8_C( 96) } }, { { -INT16_C( 21253), INT16_C( 6114), INT16_C( 15048), INT16_C( 31190), -INT16_C( 886), -INT16_C( 20424), -INT16_C( 3432), INT16_C( 8962) }, { -INT8_C( 5), -INT8_C( 30), -INT8_C( 56), -INT8_C( 42), -INT8_C( 118), INT8_C( 56), -INT8_C( 104), INT8_C( 2) } }, { { -INT16_C( 31807), -INT16_C( 27937), -INT16_C( 31198), -INT16_C( 20365), -INT16_C( 1096), INT16_C( 1104), -INT16_C( 3829), INT16_C( 1602) }, { -INT8_C( 63), -INT8_C( 33), INT8_C( 34), INT8_C( 115), -INT8_C( 72), INT8_C( 80), INT8_C( 11), INT8_C( 66) } }, { { INT16_C( 9373), INT16_C( 25886), -INT16_C( 2978), -INT16_C( 5666), INT16_C( 6128), -INT16_C( 30567), -INT16_C( 25591), -INT16_C( 13652) }, { -INT8_C( 99), INT8_C( 30), INT8_C( 94), -INT8_C( 34), -INT8_C( 16), -INT8_C( 103), INT8_C( 9), -INT8_C( 84) } }, { { -INT16_C( 29921), INT16_C( 16732), -INT16_C( 12527), -INT16_C( 13583), INT16_C( 17098), -INT16_C( 10802), INT16_C( 4147), -INT16_C( 12068) }, { INT8_C( 31), INT8_C( 92), INT8_C( 17), -INT8_C( 15), -INT8_C( 54), -INT8_C( 50), INT8_C( 51), -INT8_C( 36) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int8x8_t r = simde_vmovn_s16(a); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int8x8_t r = simde_vmovn_s16(a); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmovn_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int16_t r[4]; } test_vec[] = { { { -INT32_C( 881259170), INT32_C( 492400936), -INT32_C( 382033531), -INT32_C( 1489116661) }, { INT16_C( 3422), INT16_C( 28968), -INT16_C( 24187), -INT16_C( 7669) } }, { { INT32_C( 1727619065), INT32_C( 1734264326), INT32_C( 485245415), INT32_C( 444708540) }, { INT16_C( 24569), -INT16_C( 14842), INT16_C( 16871), -INT16_C( 18756) } }, { { -INT32_C( 320406844), -INT32_C( 251052181), -INT32_C( 338017312), INT32_C( 529733414) }, { -INT16_C( 1340), INT16_C( 16235), INT16_C( 17376), INT16_C( 5926) } }, { { INT32_C( 2089192566), INT32_C( 971236434), -INT32_C( 514404571), INT32_C( 1258084230) }, { -INT16_C( 29578), -INT16_C( 7086), -INT16_C( 12507), -INT16_C( 10362) } }, { { INT32_C( 1027007186), INT32_C( 36585249), -INT32_C( 1460860798), -INT32_C( 1782022113) }, { -INT16_C( 7470), INT16_C( 16161), INT16_C( 2178), -INT16_C( 32737) } }, { { INT32_C( 1594969612), INT32_C( 1486418994), INT32_C( 1245310660), -INT32_C( 1735117370) }, { INT16_C( 19980), -INT16_C( 3022), -INT16_C( 4412), INT16_C( 13766) } }, { { INT32_C( 970312215), -INT32_C( 1959066615), INT32_C( 741615628), -INT32_C( 1245578071) }, { -INT16_C( 13801), INT16_C( 1033), INT16_C( 10252), -INT16_C( 855) } }, { { INT32_C( 2081739594), -INT32_C( 1948996153), INT32_C( 1641352859), INT32_C( 1543072067) }, { -INT16_C( 11446), -INT16_C( 21049), INT16_C( 3739), INT16_C( 26947) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int16x4_t r = simde_vmovn_s32(a); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int16x4_t r = simde_vmovn_s32(a); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmovn_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; int32_t r[2]; } test_vec[] = { { { INT64_C( 8720536517257111466), -INT64_C( 4813755463156168939) }, { INT32_C( 1374594986), INT32_C( 519767829) } }, { { -INT64_C( 6024405302477077041), INT64_C( 1994957680940228729) }, { -INT32_C( 891304497), -INT32_C( 362733447) } }, { { INT64_C( 4856693694531411243), INT64_C( 5413208539070095737) }, { -INT32_C( 311648981), INT32_C( 1650614649) } }, { { INT64_C( 8813679717742214745), INT64_C( 6928247294813123227) }, { -INT32_C( 1541996967), INT32_C( 191214235) } }, { { INT64_C( 9016967336968491697), INT64_C( 8277051071673631766) }, { -INT32_C( 548498767), -INT32_C( 1830845418) } }, { { INT64_C( 682127594960122876), -INT64_C( 6796147409390543847) }, { -INT32_C( 48827396), INT32_C( 1310055449) } }, { { -INT64_C( 4084866352865542706), INT64_C( 2932466970569289255) }, { -INT32_C( 763298354), INT32_C( 1079586343) } }, { { INT64_C( 5233917445204723754), -INT64_C( 4536454188383094919) }, { -INT32_C( 1725577174), INT32_C( 1771550585) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int32x2_t r = simde_vmovn_s64(a); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); simde_int32x2_t r = simde_vmovn_s64(a); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmovn_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint8_t r[8]; } test_vec[] = { { { UINT16_C( 8289), UINT16_C(49720), UINT16_C(65062), UINT16_C(62879), UINT16_C(25787), UINT16_C( 7725), UINT16_C(13523), UINT16_C(54160) }, { UINT8_C( 97), UINT8_C( 56), UINT8_C( 38), UINT8_C(159), UINT8_C(187), UINT8_C( 45), UINT8_C(211), UINT8_C(144) } }, { { UINT16_C( 5140), UINT16_C(59810), UINT16_C(19299), UINT16_C( 5030), UINT16_C(17392), UINT16_C(20341), UINT16_C(61775), UINT16_C(45550) }, { UINT8_C( 20), UINT8_C(162), UINT8_C( 99), UINT8_C(166), UINT8_C(240), UINT8_C(117), UINT8_C( 79), UINT8_C(238) } }, { { UINT16_C( 9745), UINT16_C(14195), UINT16_C( 4644), UINT16_C(57388), UINT16_C(22902), UINT16_C(18942), UINT16_C(36494), UINT16_C(41501) }, { UINT8_C( 17), UINT8_C(115), UINT8_C( 36), UINT8_C( 44), UINT8_C(118), UINT8_C(254), UINT8_C(142), UINT8_C( 29) } }, { { UINT16_C(49058), UINT16_C( 1419), UINT16_C(12554), UINT16_C(64025), UINT16_C(36469), UINT16_C(50249), UINT16_C(14207), UINT16_C(36981) }, { UINT8_C(162), UINT8_C(139), UINT8_C( 10), UINT8_C( 25), UINT8_C(117), UINT8_C( 73), UINT8_C(127), UINT8_C(117) } }, { { UINT16_C(59741), UINT16_C(33480), UINT16_C(62715), UINT16_C(29282), UINT16_C(24654), UINT16_C(56507), UINT16_C(55534), UINT16_C(36990) }, { UINT8_C( 93), UINT8_C(200), UINT8_C(251), UINT8_C( 98), UINT8_C( 78), UINT8_C(187), UINT8_C(238), UINT8_C(126) } }, { { UINT16_C( 2711), UINT16_C(41365), UINT16_C(44603), UINT16_C(45211), UINT16_C(58684), UINT16_C(47989), UINT16_C(59932), UINT16_C(31308) }, { UINT8_C(151), UINT8_C(149), UINT8_C( 59), UINT8_C(155), UINT8_C( 60), UINT8_C(117), UINT8_C( 28), UINT8_C( 76) } }, { { UINT16_C( 5331), UINT16_C(53244), UINT16_C(24072), UINT16_C(22081), UINT16_C(64702), UINT16_C(44082), UINT16_C(45525), UINT16_C(27708) }, { UINT8_C(211), UINT8_C(252), UINT8_C( 8), UINT8_C( 65), UINT8_C(190), UINT8_C( 50), UINT8_C(213), UINT8_C( 60) } }, { { UINT16_C(53691), UINT16_C(62990), UINT16_C(43392), UINT16_C(48295), UINT16_C( 7310), UINT16_C(43896), UINT16_C(50182), UINT16_C(55845) }, { UINT8_C(187), UINT8_C( 14), UINT8_C(128), UINT8_C(167), UINT8_C(142), UINT8_C(120), UINT8_C( 6), UINT8_C( 37) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint8x8_t r = simde_vmovn_u16(a); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint8x8_t r = simde_vmovn_u16(a); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmovn_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint16_t r[4]; } test_vec[] = { { { UINT32_C( 659436180), UINT32_C(2964530838), UINT32_C(1315824826), UINT32_C(1173187531) }, { UINT16_C(12948), UINT16_C( 9878), UINT16_C(58554), UINT16_C(27595) } }, { { UINT32_C(2193938838), UINT32_C(1661786804), UINT32_C(2666353876), UINT32_C(3589341761) }, { UINT16_C(55702), UINT16_C(55988), UINT16_C(21716), UINT16_C( 577) } }, { { UINT32_C(3405528884), UINT32_C( 528199525), UINT32_C(1584261267), UINT32_C(3936574036) }, { UINT16_C(16180), UINT16_C(44901), UINT16_C(59539), UINT16_C(23124) } }, { { UINT32_C(3899484211), UINT32_C( 390822210), UINT32_C( 246757837), UINT32_C(1893967419) }, { UINT16_C(26675), UINT16_C(31042), UINT16_C(14797), UINT16_C(42555) } }, { { UINT32_C(1262149862), UINT32_C( 594261391), UINT32_C(4068595870), UINT32_C(1725703475) }, { UINT16_C(57574), UINT16_C(46479), UINT16_C(55454), UINT16_C( 9523) } }, { { UINT32_C(3478014349), UINT32_C(2431032003), UINT32_C( 245341395), UINT32_C( 679379522) }, { UINT16_C(18829), UINT16_C(39619), UINT16_C(40147), UINT16_C(33346) } }, { { UINT32_C(4067735906), UINT32_C( 202760046), UINT32_C(3942553271), UINT32_C(1213324219) }, { UINT16_C(47458), UINT16_C(57198), UINT16_C(38583), UINT16_C(56251) } }, { { UINT32_C(3877150500), UINT32_C( 209255993), UINT32_C(3709540250), UINT32_C(4228225433) }, { UINT16_C(40740), UINT16_C(65081), UINT16_C( 6042), UINT16_C(39321) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint16x4_t r = simde_vmovn_u32(a); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint16x4_t r = simde_vmovn_u32(a); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmovn_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[2]; uint32_t r[2]; } test_vec[] = { { { UINT64_C( 9187982830363858881), UINT64_C( 5472703971259775409) }, { UINT32_C(1178195905), UINT32_C( 667875761) } }, { { UINT64_C(15094741277317888312), UINT64_C( 3672368221012049671) }, { UINT32_C(1660254520), UINT32_C(2715500295) } }, { { UINT64_C( 2804048724730851254), UINT64_C(16394299647680493812) }, { UINT32_C(1786261430), UINT32_C( 961460468) } }, { { UINT64_C(14754285810614631097), UINT64_C( 3648589129453575423) }, { UINT32_C(4031085241), UINT32_C(1886297343) } }, { { UINT64_C( 733952689181563539), UINT64_C( 8632775679215369534) }, { UINT32_C( 144448147), UINT32_C(3913514302) } }, { { UINT64_C(15191531954389848641), UINT64_C( 4139903880899215813) }, { UINT32_C( 124195393), UINT32_C(1094861253) } }, { { UINT64_C(15212720376896360447), UINT64_C( 7647827028808131053) }, { UINT32_C( 339808255), UINT32_C(2881315309) } }, { { UINT64_C( 8665565265690462620), UINT64_C( 7522467582825825414) }, { UINT32_C(1869711772), UINT32_C( 750355590) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint32x2_t r = simde_vmovn_u64(a); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); simde_uint32x2_t r = simde_vmovn_u64(a); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vmovn_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmovn_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmovn_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vmovn_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmovn_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vmovn_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/movn_high.c000066400000000000000000000565751400333146700174630ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN movn_high #include "test-neon.h" #include "../../../simde/arm/neon/movn_high.h" static int test_simde_vmovn_high_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t r[8]; int16_t a[8]; int8_t res[16]; } test_vec[] = { { { -INT8_C( 75), -INT8_C( 23), -INT8_C( 101), -INT8_C( 44), -INT8_C( 30), -INT8_C( 4), -INT8_C( 58), INT8_C( 42) }, { -INT16_C( 2815), INT16_C( 20026), -INT16_C( 27075), -INT16_C( 9535), -INT16_C( 19091), -INT16_C( 13510), -INT16_C( 18313), INT16_C( 21770) }, { -INT8_C( 75), -INT8_C( 23), -INT8_C( 101), -INT8_C( 44), -INT8_C( 30), -INT8_C( 4), -INT8_C( 58), INT8_C( 42), INT8_C( 1), INT8_C( 58), INT8_C( 61), -INT8_C( 63), INT8_C( 109), INT8_C( 58), INT8_C( 119), INT8_C( 10) } }, { { INT8_C( 77), -INT8_C( 89), -INT8_C( 42), INT8_C( 17), -INT8_C( 17), INT8_C( 74), -INT8_C( 118), -INT8_C( 92) }, { INT16_C( 9523), INT16_C( 5496), INT16_C( 16161), INT16_C( 8767), INT16_C( 31028), INT16_C( 29297), INT16_C( 12815), INT16_C( 31820) }, { INT8_C( 77), -INT8_C( 89), -INT8_C( 42), INT8_C( 17), -INT8_C( 17), INT8_C( 74), -INT8_C( 118), -INT8_C( 92), INT8_C( 51), INT8_C( 120), INT8_C( 33), INT8_C( 63), INT8_C( 52), INT8_C( 113), INT8_C( 15), INT8_C( 76) } }, { { -INT8_C( 25), -INT8_C( 121), INT8_C( 71), INT8_C( 95), INT8_C( 63), INT8_C( 81), -INT8_C( 76), -INT8_C( 115) }, { -INT16_C( 29959), -INT16_C( 5986), INT16_C( 10452), INT16_C( 1933), INT16_C( 1357), INT16_C( 28188), INT16_C( 23364), INT16_C( 31120) }, { -INT8_C( 25), -INT8_C( 121), INT8_C( 71), INT8_C( 95), INT8_C( 63), INT8_C( 81), -INT8_C( 76), -INT8_C( 115), -INT8_C( 7), -INT8_C( 98), -INT8_C( 44), -INT8_C( 115), INT8_C( 77), INT8_C( 28), INT8_C( 68), -INT8_C( 112) } }, { { -INT8_C( 44), INT8_C( 1), -INT8_C( 21), -INT8_C( 29), INT8_C( 51), INT8_C( 55), INT8_C( 96), INT8_C( 27) }, { -INT16_C( 22594), -INT16_C( 390), INT16_C( 12025), -INT16_C( 3445), INT16_C( 10681), -INT16_C( 29222), INT16_C( 26449), -INT16_C( 24939) }, { -INT8_C( 44), INT8_C( 1), -INT8_C( 21), -INT8_C( 29), INT8_C( 51), INT8_C( 55), INT8_C( 96), INT8_C( 27), -INT8_C( 66), INT8_C( 122), -INT8_C( 7), -INT8_C( 117), -INT8_C( 71), -INT8_C( 38), INT8_C( 81), -INT8_C( 107) } }, { { INT8_C( 109), -INT8_C( 79), INT8_C( 12), -INT8_C( 79), INT8_C( 13), -INT8_C( 100), INT8_C( 42), -INT8_C( 31) }, { INT16_C( 5534), -INT16_C( 11835), INT16_C( 9549), INT16_C( 3052), INT16_C( 26316), -INT16_C( 15095), -INT16_C( 27499), INT16_C( 20151) }, { INT8_C( 109), -INT8_C( 79), INT8_C( 12), -INT8_C( 79), INT8_C( 13), -INT8_C( 100), INT8_C( 42), -INT8_C( 31), -INT8_C( 98), -INT8_C( 59), INT8_C( 77), -INT8_C( 20), -INT8_C( 52), INT8_C( 9), -INT8_C( 107), -INT8_C( 73) } }, { { -INT8_C( 67), -INT8_C( 110), -INT8_C( 37), INT8_C( 14), -INT8_C( 7), INT8_C( 112), -INT8_C( 84), INT8_C( 102) }, { -INT16_C( 18398), INT16_C( 12056), INT16_C( 16981), -INT16_C( 3312), -INT16_C( 10920), -INT16_C( 23100), -INT16_C( 19974), -INT16_C( 14416) }, { -INT8_C( 67), -INT8_C( 110), -INT8_C( 37), INT8_C( 14), -INT8_C( 7), INT8_C( 112), -INT8_C( 84), INT8_C( 102), INT8_C( 34), INT8_C( 24), INT8_C( 85), INT8_C( 16), INT8_C( 88), -INT8_C( 60), -INT8_C( 6), -INT8_C( 80) } }, { { INT8_C( 23), -INT8_C( 70), -INT8_C( 116), -INT8_C( 84), INT8_C( 78), INT8_C( 68), -INT8_C( 6), INT8_C( 12) }, { -INT16_C( 10538), -INT16_C( 12518), -INT16_C( 14522), INT16_C( 26678), INT16_C( 20095), -INT16_C( 11113), -INT16_C( 22384), -INT16_C( 5945) }, { INT8_C( 23), -INT8_C( 70), -INT8_C( 116), -INT8_C( 84), INT8_C( 78), INT8_C( 68), -INT8_C( 6), INT8_C( 12), -INT8_C( 42), INT8_C( 26), INT8_C( 70), INT8_C( 54), INT8_MAX, -INT8_C( 105), -INT8_C( 112), -INT8_C( 57) } }, { { INT8_C( 125), -INT8_C( 116), -INT8_C( 115), INT8_C( 120), INT8_C( 61), INT8_C( 62), INT8_C( 63), INT8_C( 84) }, { -INT16_C( 13320), INT16_C( 17921), -INT16_C( 1265), -INT16_C( 6830), INT16_C( 28113), INT16_C( 6325), -INT16_C( 5324), -INT16_C( 19584) }, { INT8_C( 125), -INT8_C( 116), -INT8_C( 115), INT8_C( 120), INT8_C( 61), INT8_C( 62), INT8_C( 63), INT8_C( 84), -INT8_C( 8), INT8_C( 1), INT8_C( 15), INT8_C( 82), -INT8_C( 47), -INT8_C( 75), INT8_C( 52), INT8_MIN } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t r = simde_vld1_s8(test_vec[i].r); simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int8x16_t res = simde_vmovn_high_s16(r, a); simde_test_arm_neon_assert_equal_i8x16(res, simde_vld1q_s8(test_vec[i].res)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t r = simde_test_arm_neon_random_i8x8(); simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int8x16_t res = simde_vmovn_high_s16(r, a); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, res, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmovn_high_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t r[4]; int32_t a[4]; int16_t res[8]; } test_vec[] = { { { INT16_C( 16471), -INT16_C( 23696), -INT16_C( 17690), -INT16_C( 5790) }, { INT32_C( 191683823), -INT32_C( 713549285), -INT32_C( 2095428269), -INT32_C( 654902461) }, { INT16_C( 16471), -INT16_C( 23696), -INT16_C( 17690), -INT16_C( 5790), -INT16_C( 8977), INT16_C( 6683), INT16_C( 19795), -INT16_C( 1213) } }, { { -INT16_C( 3155), INT16_C( 24211), -INT16_C( 1303), INT16_C( 16724) }, { INT32_C( 568640570), INT32_C( 1846167166), INT32_C( 1031370530), -INT32_C( 468520303) }, { -INT16_C( 3155), INT16_C( 24211), -INT16_C( 1303), INT16_C( 16724), -INT16_C( 15302), INT16_C( 18046), INT16_C( 30498), -INT16_C( 3439) } }, { { INT16_C( 11583), -INT16_C( 32153), INT16_C( 24104), -INT16_C( 10918) }, { INT32_C( 993259089), INT32_C( 578586600), -INT32_C( 901554101), -INT32_C( 935833946) }, { INT16_C( 11583), -INT16_C( 32153), INT16_C( 24104), -INT16_C( 10918), -INT16_C( 4527), -INT16_C( 30744), INT16_C( 24651), INT16_C( 20134) } }, { { -INT16_C( 20027), INT16_C( 22021), INT16_C( 6051), -INT16_C( 7621) }, { INT32_C( 1818600004), INT32_C( 1380105984), -INT32_C( 1785891411), INT32_C( 1220020733) }, { -INT16_C( 20027), INT16_C( 22021), INT16_C( 6051), -INT16_C( 7621), -INT16_C( 23996), -INT16_C( 16640), INT16_C( 30125), INT16_C( 2557) } }, { { -INT16_C( 1175), INT16_C( 3858), INT16_C( 19017), INT16_C( 3799) }, { -INT32_C( 1620714244), INT32_C( 948084979), INT32_C( 1134880578), INT32_C( 1419110054) }, { -INT16_C( 1175), INT16_C( 3858), INT16_C( 19017), INT16_C( 3799), -INT16_C( 8964), -INT16_C( 24333), -INT16_C( 6334), -INT16_C( 6490) } }, { { INT16_C( 8796), INT16_C( 23017), -INT16_C( 24277), -INT16_C( 27487) }, { -INT32_C( 425479011), -INT32_C( 84575490), INT32_C( 1234852438), INT32_C( 1015094522) }, { INT16_C( 8796), INT16_C( 23017), -INT16_C( 24277), -INT16_C( 27487), -INT16_C( 19299), INT16_C( 31486), INT16_C( 23126), INT16_C( 7418) } }, { { INT16_C( 9731), -INT16_C( 22145), INT16_C( 5132), INT16_C( 26877) }, { INT32_C( 1640097590), INT32_C( 636838792), INT32_C( 353146903), INT32_C( 1745879314) }, { INT16_C( 9731), -INT16_C( 22145), INT16_C( 5132), INT16_C( 26877), -INT16_C( 6346), INT16_C( 25480), -INT16_C( 26601), INT16_C( 274) } }, { { -INT16_C( 21925), INT16_C( 21938), INT16_C( 13254), -INT16_C( 13935) }, { INT32_C( 1718751577), INT32_C( 1557032997), -INT32_C( 541224873), INT32_C( 168145907) }, { -INT16_C( 21925), INT16_C( 21938), INT16_C( 13254), -INT16_C( 13935), INT16_C( 4441), INT16_C( 28709), -INT16_C( 28585), -INT16_C( 19469) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t r = simde_vld1_s16(test_vec[i].r); simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int16x8_t res = simde_vmovn_high_s32(r, a); simde_test_arm_neon_assert_equal_i16x8(res, simde_vld1q_s16(test_vec[i].res)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t r = simde_test_arm_neon_random_i16x4(); simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int16x8_t res = simde_vmovn_high_s32(r, a); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, res, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmovn_high_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t r[2]; int64_t a[2]; int32_t res[4]; } test_vec[] = { { { -INT32_C( 1624253440), -INT32_C( 1322384523) }, { INT64_C( 5771356035658123911), -INT64_C( 4696279848783989999) }, { -INT32_C( 1624253440), -INT32_C( 1322384523), -INT32_C( 1984591225), INT32_C( 124236561) } }, { { -INT32_C( 1855335696), -INT32_C( 2096352893) }, { -INT64_C( 4236953495512401139), -INT64_C( 871614883780630309) }, { -INT32_C( 1855335696), -INT32_C( 2096352893), -INT32_C( 2111685875), -INT32_C( 1756370725) } }, { { INT32_C( 1996115482), -INT32_C( 1271542332) }, { -INT64_C( 2547257489644675425), -INT64_C( 823717716859893619) }, { INT32_C( 1996115482), -INT32_C( 1271542332), INT32_C( 575053471), -INT32_C( 882980723) } }, { { INT32_C( 1535893626), INT32_C( 1615753798) }, { -INT64_C( 5387980717616052031), INT64_C( 6869535149564330154) }, { INT32_C( 1535893626), INT32_C( 1615753798), -INT32_C( 2049488703), INT32_C( 2044166314) } }, { { INT32_C( 1596633926), -INT32_C( 1101743292) }, { INT64_C( 1387786340373553051), -INT64_C( 3495969626035578447) }, { INT32_C( 1596633926), -INT32_C( 1101743292), -INT32_C( 501555301), -INT32_C( 963110479) } }, { { INT32_C( 608719698), INT32_C( 377724368) }, { INT64_C( 383872948084584016), -INT64_C( 8714982225589342807) }, { INT32_C( 608719698), INT32_C( 377724368), -INT32_C( 1787384240), -INT32_C( 68719191) } }, { { INT32_C( 1749919555), -INT32_C( 868693639) }, { INT64_C( 7927307445275230236), -INT64_C( 1400523536505538270) }, { INT32_C( 1749919555), -INT32_C( 868693639), -INT32_C( 319782884), -INT32_C( 1962706654) } }, { { -INT32_C( 1696041020), -INT32_C( 467536223) }, { INT64_C( 6116878095343185821), INT64_C( 7606935218418668293) }, { -INT32_C( 1696041020), -INT32_C( 467536223), INT32_C( 390885277), INT32_C( 591450885) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t r = simde_vld1_s32(test_vec[i].r); simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int32x4_t res = simde_vmovn_high_s64(r, a); simde_test_arm_neon_assert_equal_i32x4(res, simde_vld1q_s32(test_vec[i].res)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t r = simde_test_arm_neon_random_i32x2(); simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); simde_int32x4_t res = simde_vmovn_high_s64(r, a); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, res, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmovn_high_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t r[8]; uint16_t a[8]; uint8_t res[16]; } test_vec[] = { { { UINT8_C(213), UINT8_C(212), UINT8_C(127), UINT8_C(175), UINT8_C(133), UINT8_C( 47), UINT8_C( 76), UINT8_C( 45) }, { UINT16_C(46071), UINT16_C(11665), UINT16_C(45861), UINT16_C(64159), UINT16_C(43655), UINT16_C(56017), UINT16_C( 9101), UINT16_C(43587) }, { UINT8_C(213), UINT8_C(212), UINT8_C(127), UINT8_C(175), UINT8_C(133), UINT8_C( 47), UINT8_C( 76), UINT8_C( 45), UINT8_C(247), UINT8_C(145), UINT8_C( 37), UINT8_C(159), UINT8_C(135), UINT8_C(209), UINT8_C(141), UINT8_C( 67) } }, { { UINT8_C(112), UINT8_C(211), UINT8_C(183), UINT8_C(247), UINT8_C( 1), UINT8_C(237), UINT8_C( 68), UINT8_C(215) }, { UINT16_C(50113), UINT16_C(18310), UINT16_C(54002), UINT16_C(60020), UINT16_C( 1413), UINT16_C(43543), UINT16_C(46776), UINT16_C(16548) }, { UINT8_C(112), UINT8_C(211), UINT8_C(183), UINT8_C(247), UINT8_C( 1), UINT8_C(237), UINT8_C( 68), UINT8_C(215), UINT8_C(193), UINT8_C(134), UINT8_C(242), UINT8_C(116), UINT8_C(133), UINT8_C( 23), UINT8_C(184), UINT8_C(164) } }, { { UINT8_C( 96), UINT8_C(118), UINT8_C( 26), UINT8_C(238), UINT8_C(153), UINT8_C( 93), UINT8_C(152), UINT8_C( 9) }, { UINT16_C(20272), UINT16_C(12800), UINT16_C(17468), UINT16_C(64777), UINT16_C(36615), UINT16_C(63812), UINT16_C(47457), UINT16_C(59363) }, { UINT8_C( 96), UINT8_C(118), UINT8_C( 26), UINT8_C(238), UINT8_C(153), UINT8_C( 93), UINT8_C(152), UINT8_C( 9), UINT8_C( 48), UINT8_C( 0), UINT8_C( 60), UINT8_C( 9), UINT8_C( 7), UINT8_C( 68), UINT8_C( 97), UINT8_C(227) } }, { { UINT8_C(190), UINT8_C(250), UINT8_C(145), UINT8_C(119), UINT8_C(176), UINT8_C( 54), UINT8_C(183), UINT8_C( 17) }, { UINT16_C(53676), UINT16_C(17919), UINT16_C(38703), UINT16_C(24398), UINT16_C(20198), UINT16_C( 8849), UINT16_C(39570), UINT16_C(39199) }, { UINT8_C(190), UINT8_C(250), UINT8_C(145), UINT8_C(119), UINT8_C(176), UINT8_C( 54), UINT8_C(183), UINT8_C( 17), UINT8_C(172), UINT8_MAX, UINT8_C( 47), UINT8_C( 78), UINT8_C(230), UINT8_C(145), UINT8_C(146), UINT8_C( 31) } }, { { UINT8_C( 41), UINT8_C(100), UINT8_C(146), UINT8_C(139), UINT8_C( 29), UINT8_C(118), UINT8_C(114), UINT8_C(219) }, { UINT16_C( 880), UINT16_C( 8530), UINT16_C( 2361), UINT16_C(58674), UINT16_C(12763), UINT16_C( 2602), UINT16_C(30920), UINT16_C(44649) }, { UINT8_C( 41), UINT8_C(100), UINT8_C(146), UINT8_C(139), UINT8_C( 29), UINT8_C(118), UINT8_C(114), UINT8_C(219), UINT8_C(112), UINT8_C( 82), UINT8_C( 57), UINT8_C( 50), UINT8_C(219), UINT8_C( 42), UINT8_C(200), UINT8_C(105) } }, { { UINT8_C(198), UINT8_C(251), UINT8_C(208), UINT8_C( 88), UINT8_C(149), UINT8_C(239), UINT8_C(241), UINT8_C(191) }, { UINT16_C(33875), UINT16_C(28746), UINT16_C(48378), UINT16_C(27212), UINT16_C(40639), UINT16_C(63883), UINT16_C(48552), UINT16_C(33758) }, { UINT8_C(198), UINT8_C(251), UINT8_C(208), UINT8_C( 88), UINT8_C(149), UINT8_C(239), UINT8_C(241), UINT8_C(191), UINT8_C( 83), UINT8_C( 74), UINT8_C(250), UINT8_C( 76), UINT8_C(191), UINT8_C(139), UINT8_C(168), UINT8_C(222) } }, { { UINT8_C(238), UINT8_C( 9), UINT8_C(141), UINT8_C(182), UINT8_C(129), UINT8_C(246), UINT8_C(100), UINT8_C( 72) }, { UINT16_C(13553), UINT16_C(34720), UINT16_C(37412), UINT16_C(30534), UINT16_C(36886), UINT16_C( 4328), UINT16_C(13388), UINT16_C( 2938) }, { UINT8_C(238), UINT8_C( 9), UINT8_C(141), UINT8_C(182), UINT8_C(129), UINT8_C(246), UINT8_C(100), UINT8_C( 72), UINT8_C(241), UINT8_C(160), UINT8_C( 36), UINT8_C( 70), UINT8_C( 22), UINT8_C(232), UINT8_C( 76), UINT8_C(122) } }, { { UINT8_C(210), UINT8_C( 6), UINT8_C( 4), UINT8_C(122), UINT8_C(195), UINT8_C(227), UINT8_C(253), UINT8_C(178) }, { UINT16_C(35564), UINT16_C(28008), UINT16_C(52609), UINT16_C(29365), UINT16_C(22017), UINT16_C( 9721), UINT16_C(16360), UINT16_C(65181) }, { UINT8_C(210), UINT8_C( 6), UINT8_C( 4), UINT8_C(122), UINT8_C(195), UINT8_C(227), UINT8_C(253), UINT8_C(178), UINT8_C(236), UINT8_C(104), UINT8_C(129), UINT8_C(181), UINT8_C( 1), UINT8_C(249), UINT8_C(232), UINT8_C(157) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t r = simde_vld1_u8(test_vec[i].r); simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint8x16_t res = simde_vmovn_high_u16(r, a); simde_test_arm_neon_assert_equal_u8x16(res, simde_vld1q_u8(test_vec[i].res)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t r = simde_test_arm_neon_random_u8x8(); simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint8x16_t res = simde_vmovn_high_u16(r, a); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, res, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmovn_high_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t r[4]; uint32_t a[4]; uint16_t res[8]; } test_vec[] = { { { UINT16_C(34255), UINT16_C( 6926), UINT16_C(35001), UINT16_C(35623) }, { UINT32_C(1376136078), UINT32_C(4194566926), UINT32_C( 258501774), UINT32_C( 998317369) }, { UINT16_C(34255), UINT16_C( 6926), UINT16_C(35001), UINT16_C(35623), UINT16_C(11150), UINT16_C( 782), UINT16_C(27790), UINT16_C( 7481) } }, { { UINT16_C(31603), UINT16_C(23392), UINT16_C(64954), UINT16_C(35417) }, { UINT32_C(1000695682), UINT32_C(2127023344), UINT32_C( 114347512), UINT32_C(1577178320) }, { UINT16_C(31603), UINT16_C(23392), UINT16_C(64954), UINT16_C(35417), UINT16_C(26498), UINT16_C(52464), UINT16_C(52728), UINT16_C(54480) } }, { { UINT16_C(26945), UINT16_C(31341), UINT16_C(61318), UINT16_C(64181) }, { UINT32_C( 609556074), UINT32_C(2528030483), UINT32_C( 114381846), UINT32_C( 411408416) }, { UINT16_C(26945), UINT16_C(31341), UINT16_C(61318), UINT16_C(64181), UINT16_C( 5738), UINT16_C(44819), UINT16_C(21526), UINT16_C(38944) } }, { { UINT16_C(21861), UINT16_C(13855), UINT16_C( 8234), UINT16_C(27540) }, { UINT32_C( 266666633), UINT32_C(1527356401), UINT32_C(3296681905), UINT32_C( 609889806) }, { UINT16_C(21861), UINT16_C(13855), UINT16_C( 8234), UINT16_C(27540), UINT16_C( 649), UINT16_C(39921), UINT16_C(24497), UINT16_C(11790) } }, { { UINT16_C(11394), UINT16_C(41515), UINT16_C(45252), UINT16_C(10939) }, { UINT32_C( 794876421), UINT32_C(2207970554), UINT32_C(3885138166), UINT32_C(3426917403) }, { UINT16_C(11394), UINT16_C(41515), UINT16_C(45252), UINT16_C(10939), UINT16_C(55813), UINT16_C(62714), UINT16_C(33014), UINT16_C(39963) } }, { { UINT16_C(49915), UINT16_C( 2448), UINT16_C(60400), UINT16_C(29229) }, { UINT32_C(3675543575), UINT32_C( 235261704), UINT32_C(2738709929), UINT32_C(1344723034) }, { UINT16_C(49915), UINT16_C( 2448), UINT16_C(60400), UINT16_C(29229), UINT16_C(22551), UINT16_C(53000), UINT16_C(26025), UINT16_C(55386) } }, { { UINT16_C(47448), UINT16_C(29496), UINT16_C(31317), UINT16_C(20543) }, { UINT32_C( 744083260), UINT32_C(3516827322), UINT32_C(3886920671), UINT32_C( 754299522) }, { UINT16_C(47448), UINT16_C(29496), UINT16_C(31317), UINT16_C(20543), UINT16_C(53052), UINT16_C(34490), UINT16_C(46047), UINT16_C(45698) } }, { { UINT16_C(13080), UINT16_C(29391), UINT16_C(62987), UINT16_C(25538) }, { UINT32_C( 81197743), UINT32_C(2975077749), UINT32_C(2682170852), UINT32_C( 309361715) }, { UINT16_C(13080), UINT16_C(29391), UINT16_C(62987), UINT16_C(25538), UINT16_C(64175), UINT16_C( 5493), UINT16_C(44516), UINT16_C(31795) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t r = simde_vld1_u16(test_vec[i].r); simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint16x8_t res = simde_vmovn_high_u32(r, a); simde_test_arm_neon_assert_equal_u16x8(res, simde_vld1q_u16(test_vec[i].res)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t r = simde_test_arm_neon_random_u16x4(); simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint16x8_t res = simde_vmovn_high_u32(r, a); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, res, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmovn_high_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t r[2]; uint64_t a[2]; uint32_t res[4]; } test_vec[] = { { { UINT32_C(3002735919), UINT32_C(3906924496) }, { UINT64_C( 5949286594922589474), UINT64_C( 6934886448832603671) }, { UINT32_C(3002735919), UINT32_C(3906924496), UINT32_C( 760917282), UINT32_C(2354472471) } }, { { UINT32_C(2348751703), UINT32_C(3348983704) }, { UINT64_C(12197251404703307661), UINT64_C(15202859022808489733) }, { UINT32_C(2348751703), UINT32_C(3348983704), UINT32_C(1568249741), UINT32_C(2832703237) } }, { { UINT32_C(1230918094), UINT32_C(1403624700) }, { UINT64_C(11896113384583506103), UINT64_C(17096869149747089427) }, { UINT32_C(1230918094), UINT32_C(1403624700), UINT32_C(1339992247), UINT32_C(2583859219) } }, { { UINT32_C(2710903782), UINT32_C(1349816450) }, { UINT64_C( 2752054990760235746), UINT64_C(11504881256092733676) }, { UINT32_C(2710903782), UINT32_C(1349816450), UINT32_C(3734688482), UINT32_C( 74780908) } }, { { UINT32_C( 87730973), UINT32_C(3639770866) }, { UINT64_C(18045058494762682265), UINT64_C( 3842706591977047744) }, { UINT32_C( 87730973), UINT32_C(3639770866), UINT32_C( 460949401), UINT32_C( 785909440) } }, { { UINT32_C(2772027929), UINT32_C(1933959766) }, { UINT64_C(10833245065660891021), UINT64_C( 9080415450746180082) }, { UINT32_C(2772027929), UINT32_C(1933959766), UINT32_C(2138603405), UINT32_C( 179425778) } }, { { UINT32_C(1840110627), UINT32_C(4288807397) }, { UINT64_C( 5518573921246698699), UINT64_C( 7708794187185589864) }, { UINT32_C(1840110627), UINT32_C(4288807397), UINT32_C( 581229771), UINT32_C(1707806312) } }, { { UINT32_C(2993991156), UINT32_C(3996219594) }, { UINT64_C(12338171789356818004), UINT64_C( 3450991823874285017) }, { UINT32_C(2993991156), UINT32_C(3996219594), UINT32_C( 979099220), UINT32_C(2563628505) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t r = simde_vld1_u32(test_vec[i].r); simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint32x4_t res = simde_vmovn_high_u64(r, a); simde_test_arm_neon_assert_equal_u32x4(res, simde_vld1q_u32(test_vec[i].res)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t r = simde_test_arm_neon_random_u32x2(); simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); simde_uint32x4_t res = simde_vmovn_high_u64(r, a); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, res, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vmovn_high_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmovn_high_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmovn_high_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vmovn_high_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmovn_high_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vmovn_high_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/mul.c000066400000000000000000001721361400333146700162720ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN mul #include "test-neon.h" #include "../../../simde/arm/neon/mul.h" static int test_simde_vmul_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[2]; simde_float32 b[2]; simde_float32 r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( -984.85), SIMDE_FLOAT32_C( 239.90) }, { SIMDE_FLOAT32_C( -108.56), SIMDE_FLOAT32_C( 891.83) }, { SIMDE_FLOAT32_C(106915.31), SIMDE_FLOAT32_C(213950.02) } }, { { SIMDE_FLOAT32_C( 288.24), SIMDE_FLOAT32_C( -869.25) }, { SIMDE_FLOAT32_C( -156.82), SIMDE_FLOAT32_C( 921.87) }, { SIMDE_FLOAT32_C(-45201.80), SIMDE_FLOAT32_C(-801335.50) } }, { { SIMDE_FLOAT32_C( -879.78), SIMDE_FLOAT32_C( -268.41) }, { SIMDE_FLOAT32_C( -165.21), SIMDE_FLOAT32_C( -936.42) }, { SIMDE_FLOAT32_C(145348.47), SIMDE_FLOAT32_C(251344.48) } }, { { SIMDE_FLOAT32_C( 385.59), SIMDE_FLOAT32_C( -229.84) }, { SIMDE_FLOAT32_C( 996.35), SIMDE_FLOAT32_C( -804.89) }, { SIMDE_FLOAT32_C(384182.59), SIMDE_FLOAT32_C(184995.92) } }, { { SIMDE_FLOAT32_C( 339.75), SIMDE_FLOAT32_C( -605.97) }, { SIMDE_FLOAT32_C( 882.89), SIMDE_FLOAT32_C( 300.48) }, { SIMDE_FLOAT32_C(299961.88), SIMDE_FLOAT32_C(-182081.86) } }, { { SIMDE_FLOAT32_C( -227.17), SIMDE_FLOAT32_C( 998.92) }, { SIMDE_FLOAT32_C( -867.68), SIMDE_FLOAT32_C( 668.07) }, { SIMDE_FLOAT32_C(197110.86), SIMDE_FLOAT32_C(667348.50) } }, { { SIMDE_FLOAT32_C( 105.88), SIMDE_FLOAT32_C( -369.44) }, { SIMDE_FLOAT32_C( 748.88), SIMDE_FLOAT32_C( 812.67) }, { SIMDE_FLOAT32_C( 79291.41), SIMDE_FLOAT32_C(-300232.81) } }, { { SIMDE_FLOAT32_C( -814.69), SIMDE_FLOAT32_C( -31.04) }, { SIMDE_FLOAT32_C( -710.62), SIMDE_FLOAT32_C( -799.54) }, { SIMDE_FLOAT32_C(578935.00), SIMDE_FLOAT32_C( 24817.72) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a, b, r; a = simde_vld1_f32(test_vec[i].a); b = simde_vld1_f32(test_vec[i].b); r = simde_vmul_f32(a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vmul_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[1]; simde_float64 b[1]; simde_float64 r[1]; } test_vec[] = { { { SIMDE_FLOAT64_C(-859.012888) }, { SIMDE_FLOAT64_C(808.899130) }, { SIMDE_FLOAT64_C(-694854.777259) } }, { { SIMDE_FLOAT64_C(-512.686665) }, { SIMDE_FLOAT64_C(364.360016) }, { SIMDE_FLOAT64_C(-186802.521685) } }, { { SIMDE_FLOAT64_C(429.047777) }, { SIMDE_FLOAT64_C(643.143308) }, { SIMDE_FLOAT64_C(275939.206701) } }, { { SIMDE_FLOAT64_C(515.485107) }, { SIMDE_FLOAT64_C(158.472572) }, { SIMDE_FLOAT64_C(81690.250725) } }, { { SIMDE_FLOAT64_C(-433.956881) }, { SIMDE_FLOAT64_C(78.045774) }, { SIMDE_FLOAT64_C(-33868.500567) } }, { { SIMDE_FLOAT64_C(-998.339714) }, { SIMDE_FLOAT64_C(751.053657) }, { SIMDE_FLOAT64_C(-749806.693756) } }, { { SIMDE_FLOAT64_C(735.460464) }, { SIMDE_FLOAT64_C(670.199813) }, { SIMDE_FLOAT64_C(492905.465423) } }, { { SIMDE_FLOAT64_C(190.192630) }, { SIMDE_FLOAT64_C(-439.362447) }, { SIMDE_FLOAT64_C(-83563.499287) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_t b = simde_vld1_f64(test_vec[i].b); simde_float64x1_t r = simde_vmul_f64(a, b); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; } static int test_simde_vmul_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int8_t b[8]; int8_t r[8]; } test_vec[] = { { { INT8_C( 106), INT8_C( 59), INT8_C( 38), INT8_C( 92), INT8_C( 101), -INT8_C( 69), INT8_C( 33), -INT8_C( 125) }, { INT8_C( 116), INT8_C( 9), INT8_C( 111), -INT8_C( 27), -INT8_C( 82), -INT8_C( 53), -INT8_C( 72), -INT8_C( 89) }, { INT8_C( 8), INT8_C( 19), INT8_C( 122), INT8_C( 76), -INT8_C( 90), INT8_C( 73), -INT8_C( 72), INT8_C( 117) } }, { { -INT8_C( 49), INT8_C( 114), -INT8_C( 110), -INT8_C( 77), INT8_C( 44), -INT8_C( 114), -INT8_C( 19), -INT8_C( 70) }, { -INT8_C( 124), INT8_C( 116), -INT8_C( 15), -INT8_C( 94), INT8_C( 118), INT8_C( 102), INT8_C( 35), -INT8_C( 31) }, { -INT8_C( 68), -INT8_C( 88), INT8_C( 114), INT8_C( 70), INT8_C( 72), -INT8_C( 108), INT8_C( 103), INT8_C( 122) } }, { { -INT8_C( 95), INT8_C( 73), INT8_C( 61), INT8_C( 7), INT8_C( 4), INT8_C( 94), -INT8_C( 118), INT8_C( 120) }, { INT8_C( 104), -INT8_C( 6), INT8_C( 93), INT8_C( 22), -INT8_C( 59), INT8_C( 21), -INT8_C( 67), -INT8_C( 108) }, { INT8_C( 104), INT8_C( 74), INT8_C( 41), -INT8_C( 102), INT8_C( 20), -INT8_C( 74), -INT8_C( 30), INT8_C( 96) } }, { { -INT8_C( 121), INT8_C( 80), INT8_C( 71), -INT8_C( 76), -INT8_C( 34), INT8_C( 52), INT8_C( 110), INT8_C( 99) }, { -INT8_C( 88), INT8_C( 96), INT8_C( 5), INT8_C( 31), -INT8_C( 58), INT8_C( 40), INT8_C( 0), INT8_C( 104) }, { -INT8_C( 104), INT8_C( 0), INT8_C( 99), -INT8_C( 52), -INT8_C( 76), INT8_C( 32), INT8_C( 0), INT8_C( 56) } }, { { INT8_C( 113), INT8_C( 61), INT8_C( 111), INT8_C( 117), -INT8_C( 101), -INT8_C( 7), -INT8_C( 19), INT8_C( 3) }, { -INT8_C( 13), INT8_C( 75), INT8_C( 25), -INT8_C( 72), INT8_C( 96), -INT8_C( 41), INT8_C( 76), -INT8_C( 24) }, { INT8_C( 67), -INT8_C( 33), -INT8_C( 41), INT8_C( 24), INT8_C( 32), INT8_C( 31), INT8_C( 92), -INT8_C( 72) } }, { { INT8_C( 39), -INT8_C( 109), -INT8_C( 100), INT8_C( 5), -INT8_C( 56), INT8_C( 10), INT8_C( 104), INT8_C( 112) }, { INT8_C( 106), INT8_C( 109), -INT8_C( 113), INT8_C( 49), -INT8_C( 107), -INT8_C( 113), -INT8_C( 103), INT8_C( 6) }, { INT8_C( 38), -INT8_C( 105), INT8_C( 36), -INT8_C( 11), INT8_C( 104), -INT8_C( 106), INT8_C( 40), -INT8_C( 96) } }, { { -INT8_C( 52), INT8_C( 8), INT8_C( 123), INT8_C( 104), INT8_C( 1), INT8_C( 105), INT8_C( 107), -INT8_C( 11) }, { -INT8_C( 76), -INT8_C( 123), -INT8_C( 83), INT8_C( 20), INT8_C( 92), -INT8_C( 6), -INT8_C( 4), -INT8_C( 125) }, { INT8_C( 112), INT8_C( 40), INT8_C( 31), INT8_C( 32), INT8_C( 92), -INT8_C( 118), INT8_C( 84), INT8_C( 95) } }, { { -INT8_C( 115), -INT8_C( 104), -INT8_C( 120), INT8_C( 85), -INT8_C( 93), -INT8_C( 15), -INT8_C( 58), INT8_C( 13) }, { INT8_C( 94), INT8_C( 85), INT8_C( 62), -INT8_C( 12), -INT8_C( 27), -INT8_C( 41), -INT8_C( 6), -INT8_C( 79) }, { -INT8_C( 58), INT8_C( 120), -INT8_C( 16), INT8_C( 4), -INT8_C( 49), INT8_C( 103), INT8_C( 92), -INT8_C( 3) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vmul_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; } static int test_simde_vmul_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { { { -INT16_C( 1153), INT16_C( 9267), INT16_C( 13091), -INT16_C( 11549) }, { INT16_C( 16786), INT16_C( 22839), -INT16_C( 14400), INT16_C( 22119) }, { -INT16_C( 21138), -INT16_C( 32267), -INT16_C( 28864), INT16_C( 6997) } }, { { INT16_C( 1251), INT16_C( 20541), -INT16_C( 32133), INT16_C( 24648) }, { -INT16_C( 27063), INT16_C( 19566), -INT16_C( 16105), -INT16_C( 27036) }, { INT16_C( 26299), -INT16_C( 27082), INT16_C( 29709), -INT16_C( 13280) } }, { { -INT16_C( 26692), -INT16_C( 8262), -INT16_C( 25142), INT16_C( 23730) }, { -INT16_C( 5666), -INT16_C( 24907), INT16_C( 7345), -INT16_C( 27404) }, { -INT16_C( 20216), -INT16_C( 1406), INT16_C( 12458), INT16_C( 16808) } }, { { INT16_C( 12577), -INT16_C( 25372), INT16_C( 11444), -INT16_C( 516) }, { INT16_C( 27331), -INT16_C( 9655), -INT16_C( 21204), -INT16_C( 6032) }, { INT16_C( 5667), -INT16_C( 6908), INT16_C( 21232), INT16_C( 32320) } }, { { INT16_C( 10820), INT16_C( 3784), INT16_C( 31431), -INT16_C( 23190) }, { INT16_C( 8035), INT16_C( 5187), INT16_C( 14139), INT16_C( 23721) }, { -INT16_C( 27572), INT16_C( 32344), INT16_C( 3293), INT16_C( 19194) } }, { { -INT16_C( 29336), INT16_C( 7417), -INT16_C( 2630), INT16_C( 32025) }, { INT16_C( 25184), -INT16_C( 29609), -INT16_C( 14577), INT16_C( 21364) }, { -INT16_C( 10496), INT16_C( 1183), -INT16_C( 1050), -INT16_C( 13740) } }, { { INT16_C( 15601), -INT16_C( 18335), -INT16_C( 13386), INT16_C( 6749) }, { -INT16_C( 24342), INT16_C( 9774), -INT16_C( 10281), INT16_C( 16258) }, { INT16_C( 21578), -INT16_C( 30866), -INT16_C( 4134), INT16_C( 17978) } }, { { INT16_C( 31589), INT16_C( 8028), INT16_C( 30065), -INT16_C( 11876) }, { -INT16_C( 3112), -INT16_C( 6307), -INT16_C( 11846), -INT16_C( 21701) }, { -INT16_C( 968), INT16_C( 26732), -INT16_C( 27366), -INT16_C( 32012) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_vmul_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; } static int test_simde_vmul_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { { { INT32_C( 1902437050), -INT32_C( 1986881182) }, { -INT32_C( 584919357), INT32_C( 952226700) }, { INT32_C( 1679044526), INT32_C( 595175320) } }, { { -INT32_C( 1394667052), -INT32_C( 1073928011) }, { -INT32_C( 106226047), -INT32_C( 865115374) }, { INT32_C( 1562039252), INT32_C( 1225120698) } }, { { -INT32_C( 1489120443), INT32_C( 791727980) }, { INT32_C( 839668390), -INT32_C( 110441179) }, { -INT32_C( 1593894722), -INT32_C( 689998180) } }, { { -INT32_C( 1817883938), -INT32_C( 212622478) }, { -INT32_C( 739377471), -INT32_C( 1499505567) }, { -INT32_C( 1720994466), -INT32_C( 1321474510) } }, { { -INT32_C( 1689396177), INT32_C( 1372224939) }, { -INT32_C( 192686129), -INT32_C( 2081493340) }, { INT32_C( 1877836545), -INT32_C( 58229876) } }, { { -INT32_C( 1441361096), -INT32_C( 140678858) }, { -INT32_C( 926250393), INT32_C( 359557606) }, { -INT32_C( 2069795960), -INT32_C( 1429754748) } }, { { -INT32_C( 240075963), INT32_C( 121797432) }, { -INT32_C( 151206318), -INT32_C( 327554380) }, { -INT32_C( 1050715622), -INT32_C( 1126706336) } }, { { -INT32_C( 1265201027), INT32_C( 1638609913) }, { -INT32_C( 1540786498), INT32_C( 632985823) }, { -INT32_C( 1503798586), INT32_C( 2062032359) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_vmul_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; } static int test_simde_x_vmul_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[1]; int64_t b[1]; int64_t r[1]; } test_vec[] = { { { -INT64_C( 6852233445081966377) }, { INT64_C( 949766532573427103) }, { INT64_C( 3051662338158473353) } }, { { INT64_C( 4361299594231041259) }, { -INT64_C( 4420890052168132134) }, { INT64_C( 4739764582358168350) } }, { { INT64_C( 1444579558535101295) }, { -INT64_C( 3616007996289735901) }, { -INT64_C( 1463454656528282323) } }, { { INT64_C( 5747443551797445544) }, { -INT64_C( 9184083115752015394) }, { INT64_C( 6787112144328823728) } }, { { INT64_C( 2493297645073518865) }, { -INT64_C( 3869798096008984677) }, { -INT64_C( 2727157459429045173) } }, { { -INT64_C( 5945009003509396806) }, { INT64_C( 1648038210740580174) }, { INT64_C( 970083354321644204) } }, { { INT64_C( 6842152849050946515) }, { INT64_C( 5469947826171337468) }, { -INT64_C( 8410656588808695372) } }, { { -INT64_C( 6958067674496943118) }, { -INT64_C( 4934575301326870935) }, { INT64_C( 6969990240459227714) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_int64x1_t r = simde_x_vmul_s64(a, b); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_t a = simde_test_arm_neon_random_i64x1(); simde_int64x1_t b = simde_test_arm_neon_random_i64x1(); simde_int64x1_t r = simde_x_vmul_s64(a, b); simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmul_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C( 24), UINT8_C(211), UINT8_C(229), UINT8_C( 35), UINT8_C(121), UINT8_C(154), UINT8_C(180), UINT8_C( 84) }, { UINT8_C( 5), UINT8_C(153), UINT8_C(140), UINT8_C( 32), UINT8_C( 16), UINT8_C(217), UINT8_C(232), UINT8_C(155) }, { UINT8_C(120), UINT8_C( 27), UINT8_C( 60), UINT8_C( 96), UINT8_C(144), UINT8_C(138), UINT8_C( 32), UINT8_C(220) } }, { { UINT8_C( 45), UINT8_C(107), UINT8_C( 56), UINT8_C(203), UINT8_C(110), UINT8_C(126), UINT8_C(239), UINT8_C(144) }, { UINT8_C( 35), UINT8_C(237), UINT8_C(204), UINT8_C(112), UINT8_C( 65), UINT8_C( 80), UINT8_C(228), UINT8_C( 89) }, { UINT8_C( 39), UINT8_C( 15), UINT8_C(160), UINT8_C(208), UINT8_C(238), UINT8_C( 96), UINT8_C(220), UINT8_C( 16) } }, { { UINT8_C( 35), UINT8_C(201), UINT8_C(124), UINT8_C(156), UINT8_C( 99), UINT8_C( 48), UINT8_C(240), UINT8_C(105) }, { UINT8_C(202), UINT8_C(124), UINT8_C(137), UINT8_C(218), UINT8_C( 86), UINT8_C(113), UINT8_C(117), UINT8_C(131) }, { UINT8_C(158), UINT8_C( 92), UINT8_C( 92), UINT8_C(216), UINT8_C( 66), UINT8_C( 48), UINT8_C(176), UINT8_C(187) } }, { { UINT8_C(220), UINT8_C(173), UINT8_C( 78), UINT8_C( 75), UINT8_C( 44), UINT8_C( 62), UINT8_C(219), UINT8_C( 79) }, { UINT8_C( 43), UINT8_C(167), UINT8_C(191), UINT8_C(108), UINT8_C(248), UINT8_C(164), UINT8_C(197), UINT8_C( 27) }, { UINT8_C(244), UINT8_C(219), UINT8_C( 50), UINT8_C(164), UINT8_C(160), UINT8_C(184), UINT8_C(135), UINT8_C( 85) } }, { { UINT8_C(109), UINT8_C( 65), UINT8_C(184), UINT8_C(209), UINT8_C(113), UINT8_C(168), UINT8_C( 58), UINT8_C( 59) }, { UINT8_C( 37), UINT8_C(195), UINT8_C( 21), UINT8_C(123), UINT8_C( 53), UINT8_C(138), UINT8_C(254), UINT8_C( 17) }, { UINT8_C(193), UINT8_C(131), UINT8_C( 24), UINT8_C(107), UINT8_C(101), UINT8_C(144), UINT8_C(140), UINT8_C(235) } }, { { UINT8_C( 56), UINT8_C( 76), UINT8_C( 92), UINT8_C(100), UINT8_C(138), UINT8_C( 55), UINT8_C(179), UINT8_C(181) }, { UINT8_C(223), UINT8_C(115), UINT8_C( 33), UINT8_C(215), UINT8_C( 23), UINT8_C(230), UINT8_C(242), UINT8_C(132) }, { UINT8_C(200), UINT8_C( 36), UINT8_C(220), UINT8_C(252), UINT8_C(102), UINT8_C(106), UINT8_C( 54), UINT8_C( 84) } }, { { UINT8_C( 39), UINT8_C(170), UINT8_C( 85), UINT8_C(153), UINT8_C( 83), UINT8_C(143), UINT8_C(212), UINT8_C(120) }, { UINT8_C( 83), UINT8_C(234), UINT8_C(243), UINT8_C(136), UINT8_C(116), UINT8_C(241), UINT8_C(153), UINT8_C(172) }, { UINT8_C(165), UINT8_C(100), UINT8_C(175), UINT8_C( 72), UINT8_C(156), UINT8_C(159), UINT8_C(180), UINT8_C(160) } }, { { UINT8_C( 61), UINT8_C(246), UINT8_C( 16), UINT8_C(200), UINT8_C( 45), UINT8_C(196), UINT8_C(125), UINT8_C( 12) }, { UINT8_C( 55), UINT8_C(159), UINT8_C(227), UINT8_C( 78), UINT8_C(133), UINT8_C(214), UINT8_C(210), UINT8_C(173) }, { UINT8_C( 27), UINT8_C(202), UINT8_C( 48), UINT8_C(240), UINT8_C( 97), UINT8_C(216), UINT8_C(138), UINT8_C( 28) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vmul_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; } static int test_simde_vmul_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(26526), UINT16_C( 3086), UINT16_C(48363), UINT16_C( 5470) }, { UINT16_C(17470), UINT16_C(33907), UINT16_C(19149), UINT16_C(42742) }, { UINT16_C( 4164), UINT16_C(41546), UINT16_C(13871), UINT16_C(31828) } }, { { UINT16_C(19501), UINT16_C(28414), UINT16_C( 6061), UINT16_C(12929) }, { UINT16_C(38218), UINT16_C( 4045), UINT16_C(45293), UINT16_C(35808) }, { UINT16_C(13826), UINT16_C(50022), UINT16_C(56105), UINT16_C(15328) } }, { { UINT16_C(61207), UINT16_C( 664), UINT16_C(63147), UINT16_C(59672) }, { UINT16_C(35642), UINT16_C( 1902), UINT16_C(25813), UINT16_C( 942) }, { UINT16_C(43062), UINT16_C(17744), UINT16_C( 2119), UINT16_C(46672) } }, { { UINT16_C(44209), UINT16_C(24177), UINT16_C(62147), UINT16_C( 3472) }, { UINT16_C(23944), UINT16_C(29980), UINT16_C(64781), UINT16_C( 9472) }, { UINT16_C( 2824), UINT16_C(63836), UINT16_C( 2791), UINT16_C(53248) } }, { { UINT16_C(39148), UINT16_C(38695), UINT16_C(16270), UINT16_C(51585) }, { UINT16_C(61387), UINT16_C(41168), UINT16_C(32339), UINT16_C( 1187) }, { UINT16_C(38692), UINT16_C(12208), UINT16_C(32522), UINT16_C(20771) } }, { { UINT16_C( 5163), UINT16_C(61026), UINT16_C(61959), UINT16_C(36860) }, { UINT16_C( 6223), UINT16_C(23812), UINT16_C( 1045), UINT16_C( 386) }, { UINT16_C(16709), UINT16_C(21384), UINT16_C(63123), UINT16_C( 6648) } }, { { UINT16_C(43421), UINT16_C(11161), UINT16_C( 6889), UINT16_C(46324) }, { UINT16_C(50441), UINT16_C(23636), UINT16_C(63555), UINT16_C(28257) }, { UINT16_C(51077), UINT16_C(18996), UINT16_C(49915), UINT16_C(26740) } }, { { UINT16_C(49932), UINT16_C( 4957), UINT16_C(22966), UINT16_C( 1442) }, { UINT16_C(42609), UINT16_C(34658), UINT16_C(58539), UINT16_C(18568) }, { UINT16_C(57420), UINT16_C(29850), UINT16_C( 1170), UINT16_C(36368) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vmul_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; } static int test_simde_vmul_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C(3631289062), UINT32_C(1255180013) }, { UINT32_C(3711528889), UINT32_C(4134291800) }, { UINT32_C( 874406454), UINT32_C(3626235512) } }, { { UINT32_C(2370034702), UINT32_C(1186131006) }, { UINT32_C(2414703895), UINT32_C(4202144275) }, { UINT32_C(1067526978), UINT32_C(3583558810) } }, { { UINT32_C(2731731124), UINT32_C( 686596719) }, { UINT32_C(1778787601), UINT32_C(2103472751) }, { UINT32_C(1205565428), UINT32_C( 999283745) } }, { { UINT32_C(2349507406), UINT32_C(2815671695) }, { UINT32_C(1177993266), UINT32_C( 390114658) }, { UINT32_C( 453141820), UINT32_C(2464691134) } }, { { UINT32_C( 79237781), UINT32_C(3308103092) }, { UINT32_C( 976171979), UINT32_C(4088893605) }, { UINT32_C(3369102119), UINT32_C( 323816708) } }, { { UINT32_C(3279929651), UINT32_C(2976535166) }, { UINT32_C(1979162643), UINT32_C(3800840013) }, { UINT32_C( 309343945), UINT32_C(2375803366) } }, { { UINT32_C(4259792201), UINT32_C(3066172651) }, { UINT32_C(3975213639), UINT32_C(3051333506) }, { UINT32_C(1259551039), UINT32_C(3387485270) } }, { { UINT32_C(3883425384), UINT32_C(3298353841) }, { UINT32_C(3476656002), UINT32_C( 263374534) }, { UINT32_C(3842246864), UINT32_C(1269185254) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vmul_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; } static int test_simde_x_vmul_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[1]; uint64_t b[1]; uint64_t r[1]; } test_vec[] = { { { UINT64_C( 9501235154748714381) }, { UINT64_C( 6872502071409485626) }, { UINT64_C( 5840718951435488498) } }, { { UINT64_C(16836630811398367501) }, { UINT64_C( 1040243009521352130) }, { UINT64_C( 3736209579242157274) } }, { { UINT64_C( 1542624550018396125) }, { UINT64_C(18212347914471204647) }, { UINT64_C( 150701147483813291) } }, { { UINT64_C( 475528078903446059) }, { UINT64_C(13868812939453805543) }, { UINT64_C(13365877717431238093) } }, { { UINT64_C(14090089345526864794) }, { UINT64_C(15627738843931896820) }, { UINT64_C( 8057496985447148744) } }, { { UINT64_C(17705374232516993899) }, { UINT64_C(13400413542018855764) }, { UINT64_C( 6886449617527500828) } }, { { UINT64_C(11335481825455676656) }, { UINT64_C(13738336576390328611) }, { UINT64_C(15315597398385190096) } }, { { UINT64_C(11819456772893978628) }, { UINT64_C( 3111937016080328024) }, { UINT64_C( 1531824312858580320) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_t b = simde_vld1_u64(test_vec[i].b); simde_uint64x1_t r = simde_x_vmul_u64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x1_t a = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t b = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t r = simde_x_vmul_u64(a, b); simde_test_arm_neon_write_u64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmulq_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; simde_float32 b[4]; simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C(-875.908447), SIMDE_FLOAT32_C(-952.067810), SIMDE_FLOAT32_C(-835.896851), SIMDE_FLOAT32_C(-332.003418) }, { SIMDE_FLOAT32_C(-353.433044), SIMDE_FLOAT32_C(411.022461), SIMDE_FLOAT32_C(-878.854858), SIMDE_FLOAT32_C(965.658569) }, { SIMDE_FLOAT32_C(309575.000000), SIMDE_FLOAT32_C(-391321.250000), SIMDE_FLOAT32_C(734632.000000), SIMDE_FLOAT32_C(-320601.937500) } }, { { SIMDE_FLOAT32_C(-818.029663), SIMDE_FLOAT32_C(-58.492615), SIMDE_FLOAT32_C(-314.083435), SIMDE_FLOAT32_C(-471.082703) }, { SIMDE_FLOAT32_C(100.064087), SIMDE_FLOAT32_C(-957.928711), SIMDE_FLOAT32_C(680.392578), SIMDE_FLOAT32_C(301.457642) }, { SIMDE_FLOAT32_C(-81855.390625), SIMDE_FLOAT32_C(56031.753906), SIMDE_FLOAT32_C(-213700.031250), SIMDE_FLOAT32_C(-142011.484375) } }, { { SIMDE_FLOAT32_C(143.138428), SIMDE_FLOAT32_C(-445.185791), SIMDE_FLOAT32_C(108.660034), SIMDE_FLOAT32_C(-105.336975) }, { SIMDE_FLOAT32_C(-543.193237), SIMDE_FLOAT32_C(437.742188), SIMDE_FLOAT32_C(569.496582), SIMDE_FLOAT32_C(-743.131836) }, { SIMDE_FLOAT32_C(-77751.828125), SIMDE_FLOAT32_C(-194876.609375), SIMDE_FLOAT32_C(61881.519531), SIMDE_FLOAT32_C(78279.257812) } }, { { SIMDE_FLOAT32_C(-259.595825), SIMDE_FLOAT32_C(409.617432), SIMDE_FLOAT32_C(718.381958), SIMDE_FLOAT32_C(310.151978) }, { SIMDE_FLOAT32_C(521.223389), SIMDE_FLOAT32_C(-42.746582), SIMDE_FLOAT32_C(-267.409790), SIMDE_FLOAT32_C(645.314941) }, { SIMDE_FLOAT32_C(-135307.421875), SIMDE_FLOAT32_C(-17509.746094), SIMDE_FLOAT32_C(-192102.375000), SIMDE_FLOAT32_C(200145.703125) } }, { { SIMDE_FLOAT32_C(5.185669), SIMDE_FLOAT32_C(-103.306641), SIMDE_FLOAT32_C(-686.688354), SIMDE_FLOAT32_C(651.752686) }, { SIMDE_FLOAT32_C(-692.284058), SIMDE_FLOAT32_C(-565.543213), SIMDE_FLOAT32_C(617.411255), SIMDE_FLOAT32_C(-510.313782) }, { SIMDE_FLOAT32_C(-3589.956055), SIMDE_FLOAT32_C(58424.371094), SIMDE_FLOAT32_C(-423969.125000), SIMDE_FLOAT32_C(-332598.375000) } }, { { SIMDE_FLOAT32_C(375.964233), SIMDE_FLOAT32_C(-696.671997), SIMDE_FLOAT32_C(18.603516), SIMDE_FLOAT32_C(-523.971558) }, { SIMDE_FLOAT32_C(-654.600769), SIMDE_FLOAT32_C(-301.003845), SIMDE_FLOAT32_C(777.486084), SIMDE_FLOAT32_C(488.537720) }, { SIMDE_FLOAT32_C(-246106.468750), SIMDE_FLOAT32_C(209700.953125), SIMDE_FLOAT32_C(14463.974609), SIMDE_FLOAT32_C(-255979.875000) } }, { { SIMDE_FLOAT32_C(253.810303), SIMDE_FLOAT32_C(-113.853699), SIMDE_FLOAT32_C(-616.799194), SIMDE_FLOAT32_C(710.617065) }, { SIMDE_FLOAT32_C(-676.111450), SIMDE_FLOAT32_C(952.697266), SIMDE_FLOAT32_C(967.485229), SIMDE_FLOAT32_C(64.292725) }, { SIMDE_FLOAT32_C(-171604.046875), SIMDE_FLOAT32_C(-108468.109375), SIMDE_FLOAT32_C(-596744.125000), SIMDE_FLOAT32_C(45687.507812) } }, { { SIMDE_FLOAT32_C(362.314941), SIMDE_FLOAT32_C(685.867310), SIMDE_FLOAT32_C(-625.555176), SIMDE_FLOAT32_C(-116.461548) }, { SIMDE_FLOAT32_C(-356.879150), SIMDE_FLOAT32_C(107.035034), SIMDE_FLOAT32_C(-471.146545), SIMDE_FLOAT32_C(648.306519) }, { SIMDE_FLOAT32_C(-129302.648438), SIMDE_FLOAT32_C(73411.828125), SIMDE_FLOAT32_C(294728.156250), SIMDE_FLOAT32_C(-75502.781250) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r = simde_vmulq_f32(a, b); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vmulq_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[2]; simde_float64 b[2]; simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C(-110.548430), SIMDE_FLOAT64_C(608.512841) }, { SIMDE_FLOAT64_C(736.040173), SIMDE_FLOAT64_C(-280.442189) }, { SIMDE_FLOAT64_C(-81368.085325), SIMDE_FLOAT64_C(-170652.673215) } }, { { SIMDE_FLOAT64_C(-149.158503), SIMDE_FLOAT64_C(-505.167299) }, { SIMDE_FLOAT64_C(-802.637817), SIMDE_FLOAT64_C(-512.437647) }, { SIMDE_FLOAT64_C(119720.254897), SIMDE_FLOAT64_C(258866.741888) } }, { { SIMDE_FLOAT64_C(-15.761167), SIMDE_FLOAT64_C(-532.799363) }, { SIMDE_FLOAT64_C(-516.739889), SIMDE_FLOAT64_C(-825.864527) }, { SIMDE_FLOAT64_C(8144.423830), SIMDE_FLOAT64_C(440020.093999) } }, { { SIMDE_FLOAT64_C(-142.146788), SIMDE_FLOAT64_C(-460.594133) }, { SIMDE_FLOAT64_C(887.546627), SIMDE_FLOAT64_C(-409.867550) }, { SIMDE_FLOAT64_C(-126161.902322), SIMDE_FLOAT64_C(188782.588915) } }, { { SIMDE_FLOAT64_C(-230.600942), SIMDE_FLOAT64_C(-52.274368) }, { SIMDE_FLOAT64_C(150.986619), SIMDE_FLOAT64_C(67.500747) }, { SIMDE_FLOAT64_C(-34817.656482), SIMDE_FLOAT64_C(-3528.558923) } }, { { SIMDE_FLOAT64_C(389.471275), SIMDE_FLOAT64_C(-747.723748) }, { SIMDE_FLOAT64_C(-950.827661), SIMDE_FLOAT64_C(917.085530) }, { SIMDE_FLOAT64_C(-370320.061840), SIMDE_FLOAT64_C(-685726.629746) } }, { { SIMDE_FLOAT64_C(352.081057), SIMDE_FLOAT64_C(409.278147) }, { SIMDE_FLOAT64_C(684.852159), SIMDE_FLOAT64_C(-854.134726) }, { SIMDE_FLOAT64_C(241123.472398), SIMDE_FLOAT64_C(-349578.677755) } }, { { SIMDE_FLOAT64_C(-169.233569), SIMDE_FLOAT64_C(-633.363110) }, { SIMDE_FLOAT64_C(-43.526636), SIMDE_FLOAT64_C(720.218002) }, { SIMDE_FLOAT64_C(7366.167936), SIMDE_FLOAT64_C(-456159.513343) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_float64x2_t r = simde_vmulq_f64(a, b); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; } static int test_simde_vmulq_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int8_t b[16]; int8_t r[16]; } test_vec[] = { { { -INT8_C( 74), INT8_C( 14), INT8_C( 98), -INT8_C( 34), INT8_C( 126), INT8_C( 71), -INT8_C( 106), -INT8_C( 123), -INT8_C( 76), INT8_C( 59), -INT8_C( 35), INT8_C( 50), INT8_C( 118), -INT8_C( 17), INT8_C( 52), INT8_C( 99) }, { INT8_C( 90), INT8_C( 106), -INT8_C( 48), -INT8_C( 89), INT8_C( 6), -INT8_C( 49), INT8_C( 117), -INT8_C( 108), -INT8_C( 71), INT8_C( 79), INT8_C( 109), -INT8_C( 80), -INT8_C( 40), INT8_C( 27), INT8_C( 105), -INT8_C( 114) }, { -INT8_C( 4), -INT8_C( 52), -INT8_C( 96), -INT8_C( 46), -INT8_C( 12), INT8_C( 105), -INT8_C( 114), -INT8_C( 28), INT8_C( 20), INT8_C( 53), INT8_C( 25), INT8_C( 96), -INT8_C( 112), INT8_C( 53), INT8_C( 84), -INT8_C( 22) } }, { { INT8_C( 41), -INT8_C( 53), INT8_C( 108), -INT8_C( 89), INT8_C( 18), INT8_C( 3), INT8_C( 44), -INT8_C( 58), INT8_C( 62), INT8_C( 10), -INT8_C( 8), -INT8_C( 76), -INT8_C( 7), INT8_C( 44), INT8_C( 23), INT8_C( 84) }, { -INT8_C( 106), -INT8_C( 25), -INT8_C( 5), -INT8_C( 100), -INT8_C( 74), INT8_C( 112), INT8_C( 48), INT8_C( 111), -INT8_C( 64), -INT8_C( 99), INT8_C( 32), -INT8_C( 104), -INT8_C( 72), -INT8_C( 119), INT8_C( 38), -INT8_C( 31) }, { INT8_C( 6), INT8_C( 45), -INT8_C( 28), -INT8_C( 60), -INT8_C( 52), INT8_C( 80), INT8_C( 64), -INT8_C( 38), INT8_MIN, INT8_C( 34), INT8_C( 0), -INT8_C( 32), -INT8_C( 8), -INT8_C( 116), INT8_C( 106), -INT8_C( 44) } }, { { INT8_C( 84), -INT8_C( 109), -INT8_C( 120), INT8_C( 102), -INT8_C( 106), -INT8_C( 76), INT8_C( 44), -INT8_C( 44), -INT8_C( 66), INT8_C( 36), -INT8_C( 120), -INT8_C( 72), INT8_C( 80), -INT8_C( 97), INT8_C( 12), -INT8_C( 26) }, { -INT8_C( 122), INT8_C( 7), -INT8_C( 126), INT8_C( 61), INT8_C( 119), -INT8_C( 78), -INT8_C( 84), INT8_C( 55), INT8_C( 79), -INT8_C( 52), -INT8_C( 49), INT8_C( 7), INT8_C( 85), -INT8_C( 10), -INT8_C( 24), -INT8_C( 87) }, { -INT8_C( 8), INT8_C( 5), INT8_C( 16), INT8_C( 78), -INT8_C( 70), INT8_C( 40), -INT8_C( 112), -INT8_C( 116), -INT8_C( 94), -INT8_C( 80), -INT8_C( 8), INT8_C( 8), -INT8_C( 112), -INT8_C( 54), -INT8_C( 32), -INT8_C( 42) } }, { { -INT8_C( 119), INT8_C( 112), INT8_C( 15), INT8_C( 31), INT8_C( 36), INT8_C( 59), -INT8_C( 13), -INT8_C( 29), INT8_C( 95), INT8_C( 123), -INT8_C( 101), -INT8_C( 81), INT8_C( 26), -INT8_C( 89), -INT8_C( 107), -INT8_C( 96) }, { -INT8_C( 82), INT8_C( 23), -INT8_C( 35), INT8_C( 37), -INT8_C( 55), -INT8_C( 118), INT8_C( 93), INT8_C( 24), INT8_C( 86), INT8_C( 44), INT8_C( 31), -INT8_C( 84), INT8_C( 34), INT8_C( 7), INT8_C( 85), -INT8_C( 85) }, { INT8_C( 30), INT8_C( 16), -INT8_C( 13), INT8_C( 123), INT8_C( 68), -INT8_C( 50), INT8_C( 71), INT8_C( 72), -INT8_C( 22), INT8_C( 36), -INT8_C( 59), -INT8_C( 108), INT8_C( 116), -INT8_C( 111), INT8_C( 121), -INT8_C( 32) } }, { { INT8_C( 119), INT8_C( 101), -INT8_C( 54), -INT8_C( 100), -INT8_C( 96), -INT8_C( 67), INT8_MAX, INT8_C( 0), INT8_C( 56), INT8_C( 26), -INT8_C( 81), INT8_C( 82), -INT8_C( 63), INT8_C( 69), -INT8_C( 13), INT8_C( 111) }, { INT8_C( 92), -INT8_C( 48), -INT8_C( 108), INT8_C( 38), INT8_C( 90), -INT8_C( 15), INT8_C( 62), -INT8_C( 79), INT8_C( 30), INT8_C( 94), INT8_C( 93), INT8_C( 64), INT8_C( 101), -INT8_C( 78), -INT8_C( 20), -INT8_C( 35) }, { -INT8_C( 60), INT8_C( 16), -INT8_C( 56), INT8_C( 40), INT8_C( 64), -INT8_C( 19), -INT8_C( 62), INT8_C( 0), -INT8_C( 112), -INT8_C( 116), -INT8_C( 109), INT8_MIN, INT8_C( 37), -INT8_C( 6), INT8_C( 4), -INT8_C( 45) } }, { { INT8_C( 23), -INT8_C( 74), INT8_C( 121), -INT8_C( 72), INT8_C( 116), -INT8_C( 8), -INT8_C( 72), -INT8_C( 84), INT8_C( 18), INT8_C( 103), -INT8_C( 1), -INT8_C( 45), -INT8_C( 84), -INT8_C( 14), INT8_C( 66), INT8_C( 9) }, { -INT8_C( 62), -INT8_C( 42), INT8_C( 47), INT8_C( 29), -INT8_C( 56), INT8_C( 109), -INT8_C( 50), -INT8_C( 26), -INT8_C( 53), INT8_C( 43), INT8_C( 38), INT8_C( 49), -INT8_C( 35), INT8_C( 18), INT8_C( 14), -INT8_C( 11) }, { INT8_C( 110), INT8_C( 36), INT8_C( 55), -INT8_C( 40), -INT8_C( 96), -INT8_C( 104), INT8_C( 16), -INT8_C( 120), INT8_C( 70), INT8_C( 77), -INT8_C( 38), INT8_C( 99), INT8_C( 124), INT8_C( 4), -INT8_C( 100), -INT8_C( 99) } }, { { -INT8_C( 55), -INT8_C( 121), -INT8_C( 83), INT8_C( 61), INT8_MAX, INT8_C( 101), -INT8_C( 23), -INT8_C( 111), -INT8_C( 52), -INT8_C( 24), INT8_C( 100), INT8_C( 121), -INT8_C( 38), -INT8_C( 90), -INT8_C( 126), -INT8_C( 99) }, { INT8_C( 124), -INT8_C( 79), -INT8_C( 70), INT8_C( 68), INT8_C( 30), -INT8_C( 120), INT8_C( 42), -INT8_C( 22), -INT8_C( 77), INT8_C( 81), INT8_C( 27), -INT8_C( 112), INT8_C( 99), INT8_C( 41), -INT8_C( 123), INT8_C( 44) }, { INT8_C( 92), INT8_C( 87), -INT8_C( 78), INT8_C( 52), -INT8_C( 30), -INT8_C( 88), INT8_C( 58), -INT8_C( 118), -INT8_C( 92), INT8_C( 104), -INT8_C( 116), INT8_C( 16), INT8_C( 78), -INT8_C( 106), -INT8_C( 118), -INT8_C( 4) } }, { { -INT8_C( 80), INT8_C( 50), INT8_C( 105), INT8_C( 47), -INT8_C( 105), INT8_C( 83), -INT8_C( 64), INT8_C( 100), INT8_C( 59), INT8_C( 36), -INT8_C( 35), INT8_C( 22), -INT8_C( 54), INT8_C( 95), -INT8_C( 77), INT8_C( 70) }, { INT8_C( 16), INT8_C( 109), -INT8_C( 117), INT8_C( 46), -INT8_C( 11), -INT8_C( 75), INT8_C( 24), -INT8_C( 88), INT8_C( 6), INT8_C( 51), INT8_C( 56), INT8_C( 106), INT8_C( 92), -INT8_C( 66), -INT8_C( 106), INT8_C( 12) }, { INT8_C( 0), INT8_C( 74), INT8_C( 3), INT8_C( 114), -INT8_C( 125), -INT8_C( 81), INT8_C( 0), -INT8_C( 96), INT8_C( 98), INT8_C( 44), INT8_C( 88), INT8_C( 28), -INT8_C( 104), -INT8_C( 126), -INT8_C( 30), INT8_C( 72) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r = simde_vmulq_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; } static int test_simde_vmulq_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { INT16_C( 1714), INT16_C( 1643), INT16_C( 5800), -INT16_C( 15471), INT16_C( 4030), INT16_C( 12348), -INT16_C( 9553), -INT16_C( 3871) }, { INT16_C( 21671), -INT16_C( 12413), -INT16_C( 3351), -INT16_C( 30479), -INT16_C( 2270), -INT16_C( 20651), INT16_C( 13282), -INT16_C( 27277) }, { -INT16_C( 14818), -INT16_C( 12863), INT16_C( 28392), INT16_C( 9089), INT16_C( 26940), INT16_C( 2028), -INT16_C( 5250), INT16_C( 10771) } }, { { -INT16_C( 8647), -INT16_C( 7525), INT16_C( 11508), -INT16_C( 19803), -INT16_C( 7877), -INT16_C( 5150), -INT16_C( 15172), INT16_C( 25563) }, { INT16_C( 24088), INT16_C( 306), INT16_C( 9041), INT16_C( 29577), -INT16_C( 8422), -INT16_C( 734), -INT16_C( 27374), INT16_C( 19602) }, { -INT16_C( 15528), -INT16_C( 8890), -INT16_C( 27340), -INT16_C( 18099), INT16_C( 17662), -INT16_C( 20988), INT16_C( 16696), -INT16_C( 2330) } }, { { INT16_C( 11635), INT16_C( 26670), -INT16_C( 11431), -INT16_C( 27622), -INT16_C( 588), INT16_C( 28799), INT16_C( 23233), -INT16_C( 9773) }, { INT16_C( 1465), INT16_C( 2778), INT16_C( 25385), INT16_C( 17277), -INT16_C( 24766), INT16_C( 21824), -INT16_C( 11724), -INT16_C( 22623) }, { INT16_C( 5915), -INT16_C( 31956), INT16_C( 17473), INT16_C( 7858), INT16_C( 13416), INT16_C( 19136), -INT16_C( 16076), -INT16_C( 23885) } }, { { -INT16_C( 12289), INT16_C( 22543), INT16_C( 10914), INT16_C( 22253), INT16_C( 27687), -INT16_C( 5945), -INT16_C( 25913), -INT16_C( 32575) }, { -INT16_C( 25696), -INT16_C( 13942), INT16_C( 2046), INT16_C( 16652), INT16_C( 19878), -INT16_C( 9578), INT16_C( 14111), INT16_C( 8065) }, { INT16_C( 25696), INT16_C( 16150), -INT16_C( 17732), INT16_C( 16412), -INT16_C( 9142), -INT16_C( 9574), INT16_C( 32537), INT16_C( 16449) } }, { { -INT16_C( 28410), -INT16_C( 22409), INT16_C( 25787), -INT16_C( 7426), -INT16_C( 14895), -INT16_C( 26422), -INT16_C( 29856), INT16_C( 24) }, { -INT16_C( 24026), INT16_C( 9417), -INT16_C( 10839), INT16_C( 20325), -INT16_C( 1246), INT16_C( 16937), -INT16_C( 21966), INT16_C( 14433) }, { INT16_C( 21220), INT16_C( 367), INT16_C( 5747), -INT16_C( 4042), INT16_C( 12482), -INT16_C( 29606), -INT16_C( 1856), INT16_C( 18712) } }, { { -INT16_C( 10181), -INT16_C( 2336), -INT16_C( 8387), INT16_C( 3800), -INT16_C( 23900), INT16_C( 1190), -INT16_C( 16851), INT16_C( 21252) }, { -INT16_C( 12960), INT16_C( 2424), -INT16_C( 8797), -INT16_C( 15016), -INT16_C( 32295), INT16_C( 2823), INT16_C( 26667), INT16_C( 26436) }, { INT16_C( 21792), -INT16_C( 26368), -INT16_C( 13097), INT16_C( 21056), -INT16_C( 32508), INT16_C( 17034), INT16_C( 14735), -INT16_C( 22256) } }, { { INT16_C( 9281), INT16_C( 32349), INT16_C( 13827), -INT16_C( 22388), INT16_C( 13016), INT16_C( 1708), -INT16_C( 19984), INT16_C( 20569) }, { -INT16_C( 11906), INT16_C( 8537), -INT16_C( 20049), -INT16_C( 30489), -INT16_C( 4558), INT16_C( 23955), -INT16_C( 10409), -INT16_C( 26428) }, { -INT16_C( 5890), -INT16_C( 5291), -INT16_C( 243), INT16_C( 30292), -INT16_C( 16848), INT16_C( 20676), INT16_C( 2192), INT16_C( 23588) } }, { { INT16_C( 8956), -INT16_C( 234), -INT16_C( 23976), INT16_C( 12455), INT16_C( 21716), -INT16_C( 15306), -INT16_C( 28667), -INT16_C( 31980) }, { INT16_C( 28001), INT16_C( 4261), -INT16_C( 29666), INT16_C( 20632), INT16_C( 11386), -INT16_C( 11859), INT16_C( 29187), -INT16_C( 151) }, { -INT16_C( 29316), -INT16_C( 14034), INT16_C( 9808), INT16_C( 4904), -INT16_C( 8952), -INT16_C( 20866), -INT16_C( 5617), -INT16_C( 20684) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_vmulq_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; } static int test_simde_vmulq_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { INT32_C( 855182333), INT32_C( 1421648751), -INT32_C( 1233362429), -INT32_C( 1733602626) }, { INT32_C( 128839327), INT32_C( 1231820222), INT32_C( 118527701), INT32_C( 1271697230) }, { INT32_C( 919382051), -INT32_C( 146970782), -INT32_C( 333754753), -INT32_C( 791455772) } }, { { INT32_C( 176014747), INT32_C( 1818114408), INT32_C( 1495456411), -INT32_C( 873345748) }, { INT32_C( 2060623547), -INT32_C( 1983692877), INT32_C( 596693973), -INT32_C( 43099038) }, { INT32_C( 1090191929), INT32_C( 1480244152), INT32_C( 207463415), -INT32_C( 1657513256) } }, { { -INT32_C( 1995969759), -INT32_C( 1074436828), INT32_C( 1813518144), -INT32_C( 1606940188) }, { INT32_C( 1528433320), INT32_C( 535092553), INT32_C( 306345136), -INT32_C( 250564144) }, { -INT32_C( 658794584), INT32_C( 89779012), -INT32_C( 1181156352), INT32_C( 782150976) } }, { { -INT32_C( 1048897636), -INT32_C( 1115656067), INT32_C( 1814665608), INT32_C( 1242325410) }, { -INT32_C( 1247402388), -INT32_C( 1277916669), -INT32_C( 809101313), INT32_C( 1690425032) }, { -INT32_C( 2137580080), INT32_C( 61387639), INT32_C( 829105784), INT32_C( 376156816) } }, { { INT32_C( 1780825325), INT32_C( 875013804), -INT32_C( 509521601), INT32_C( 506244530) }, { -INT32_C( 673918252), INT32_C( 1535879260), -INT32_C( 2027204161), INT32_C( 351071271) }, { -INT32_C( 233513404), INT32_C( 1728366032), INT32_C( 1084591361), -INT32_C( 788099554) } }, { { -INT32_C( 729870040), -INT32_C( 167139657), -INT32_C( 1428641033), INT32_C( 734528599) }, { INT32_C( 839097558), INT32_C( 76451397), INT32_C( 109885919), -INT32_C( 853903195) }, { INT32_C( 337950576), INT32_C( 9793875), -INT32_C( 1352831959), INT32_C( 365532179) } }, { { INT32_C( 1101109641), INT32_C( 926394944), -INT32_C( 1394536620), -INT32_C( 371742189) }, { -INT32_C( 1961043386), INT32_C( 1200663144), INT32_C( 139271267), INT32_C( 500525204) }, { -INT32_C( 1125472906), -INT32_C( 374625792), -INT32_C( 1258676868), INT32_C( 1989479164) } }, { { INT32_C( 1096709633), INT32_C( 1970902561), -INT32_C( 1188996443), INT32_C( 1268971780) }, { INT32_C( 1020706515), -INT32_C( 880581016), INT32_C( 382980482), INT32_C( 976529465) }, { -INT32_C( 582614829), INT32_C( 689513320), INT32_C( 569359562), INT32_C( 1199837668) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_vmulq_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; } static int test_simde_x_vmulq_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; int64_t b[2]; int64_t r[2]; } test_vec[] = { { { INT64_C( 4157955043631783278), -INT64_C( 7383173757687745161) }, { INT64_C( 8419394962083018722), -INT64_C( 901253715486592488) }, { -INT64_C( 1204569672834376420), -INT64_C( 3540053461434973912) } }, { { -INT64_C( 5564550027079371400), INT64_C( 7215618185716682996) }, { -INT64_C( 7309784739616468995), INT64_C( 907593611098290464) }, { -INT64_C( 5023435711355822184), INT64_C( 7243084518534509184) } }, { { INT64_C( 6309086109994951584), INT64_C( 1015004339592301360) }, { INT64_C( 4247095772352916692), -INT64_C( 1526589114959274448) }, { -INT64_C( 8242478513497898880), INT64_C( 5279603848098724096) } }, { { -INT64_C( 372299265012831717), -INT64_C( 8034770570599155874) }, { -INT64_C( 8159723877624797586), INT64_C( 3812335679117107636) }, { INT64_C( 7937087198466349466), INT64_C( 8278271605044349976) } }, { { INT64_C( 6868419346552761741), INT64_C( 3281520765434854718) }, { -INT64_C( 297386802674241293), -INT64_C( 2204545740809096934) }, { INT64_C( 1000600319430126807), INT64_C( 8314945196091938380) } }, { { -INT64_C( 2310476761110406769), -INT64_C( 39577972136886650) }, { -INT64_C( 4965447695158203948), -INT64_C( 9108998695387525024) }, { -INT64_C( 9106841149599051412), INT64_C( 8719168335463567936) } }, { { -INT64_C( 2310909180584538258), INT64_C( 56742684649637823) }, { -INT64_C( 6260792620510144064), -INT64_C( 3407267369714840509) }, { -INT64_C( 4282053819573514112), INT64_C( 1360702996582710013) } }, { { -INT64_C( 8720149998406219837), INT64_C( 8628926262549749652) }, { INT64_C( 5847351332806712116), INT64_C( 4266101234806981379) }, { -INT64_C( 4315554267887606628), INT64_C( 2862090052451965628) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_int64x2_t r = simde_x_vmulq_s64(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); simde_int64x2_t b = simde_test_arm_neon_random_i64x2(); simde_int64x2_t r = simde_x_vmulq_s64(a, b); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmulq_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C( 12), UINT8_C( 29), UINT8_C(140), UINT8_C(196), UINT8_C( 70), UINT8_C(147), UINT8_C( 49), UINT8_C(155), UINT8_C(180), UINT8_C( 1), UINT8_C(121), UINT8_C(185), UINT8_C(238), UINT8_C( 11), UINT8_C( 89), UINT8_C(169) }, { UINT8_C(107), UINT8_C(145), UINT8_C(239), UINT8_C(122), UINT8_C( 68), UINT8_C(239), UINT8_C( 49), UINT8_C( 64), UINT8_C(185), UINT8_C(223), UINT8_C(191), UINT8_C(183), UINT8_C(253), UINT8_C(154), UINT8_C(179), UINT8_C( 10) }, { UINT8_C( 4), UINT8_C(109), UINT8_C(180), UINT8_C(104), UINT8_C(152), UINT8_C( 61), UINT8_C( 97), UINT8_C(192), UINT8_C( 20), UINT8_C(223), UINT8_C( 71), UINT8_C( 63), UINT8_C( 54), UINT8_C(158), UINT8_C( 59), UINT8_C(154) } }, { { UINT8_C(184), UINT8_C( 64), UINT8_C(206), UINT8_C(254), UINT8_C(211), UINT8_C( 0), UINT8_C(153), UINT8_C(136), UINT8_C( 1), UINT8_C( 19), UINT8_C( 65), UINT8_C(239), UINT8_C( 30), UINT8_C(154), UINT8_C(153), UINT8_C(137) }, { UINT8_C( 44), UINT8_C(136), UINT8_C( 3), UINT8_C(112), UINT8_C(119), UINT8_C( 52), UINT8_C(176), UINT8_C( 49), UINT8_C( 19), UINT8_C(111), UINT8_C(232), UINT8_C( 17), UINT8_C( 10), UINT8_C(156), UINT8_C( 27), UINT8_C(194) }, { UINT8_C(160), UINT8_C( 0), UINT8_C(106), UINT8_C( 32), UINT8_C( 21), UINT8_C( 0), UINT8_C( 48), UINT8_C( 8), UINT8_C( 19), UINT8_C( 61), UINT8_C(232), UINT8_C(223), UINT8_C( 44), UINT8_C(216), UINT8_C( 35), UINT8_C(210) } }, { { UINT8_C(220), UINT8_C(233), UINT8_C(192), UINT8_C(175), UINT8_C(233), UINT8_C( 89), UINT8_C( 55), UINT8_C(235), UINT8_C(108), UINT8_C(120), UINT8_C(218), UINT8_C(139), UINT8_C( 19), UINT8_C(115), UINT8_C( 20), UINT8_C( 63) }, { UINT8_C(251), UINT8_C( 24), UINT8_C(175), UINT8_C(115), UINT8_C( 76), UINT8_C( 95), UINT8_C(164), UINT8_C( 96), UINT8_C(206), UINT8_C(140), UINT8_C(113), UINT8_C(216), UINT8_C( 40), UINT8_C(140), UINT8_C(154), UINT8_C( 4) }, { UINT8_C(180), UINT8_C(216), UINT8_C( 64), UINT8_C(157), UINT8_C( 44), UINT8_C( 7), UINT8_C( 60), UINT8_C( 32), UINT8_C(232), UINT8_C(160), UINT8_C( 58), UINT8_C( 72), UINT8_C(248), UINT8_C(228), UINT8_C( 8), UINT8_C(252) } }, { { UINT8_C(117), UINT8_C( 90), UINT8_C(180), UINT8_C( 95), UINT8_C(180), UINT8_C(235), UINT8_C( 74), UINT8_C( 32), UINT8_C(100), UINT8_C( 36), UINT8_C(171), UINT8_C(119), UINT8_C(152), UINT8_C(192), UINT8_C(182), UINT8_C(147) }, { UINT8_C(216), UINT8_C(101), UINT8_C( 6), UINT8_C( 36), UINT8_C(196), UINT8_C(170), UINT8_C(132), UINT8_C(146), UINT8_C( 55), UINT8_C(245), UINT8_C(107), UINT8_C( 95), UINT8_C(129), UINT8_C( 5), UINT8_C(100), UINT8_C(247) }, { UINT8_C(184), UINT8_C(130), UINT8_C( 56), UINT8_C( 92), UINT8_C(208), UINT8_C( 14), UINT8_C( 40), UINT8_C( 64), UINT8_C(124), UINT8_C(116), UINT8_C(121), UINT8_C( 41), UINT8_C(152), UINT8_C(192), UINT8_C( 24), UINT8_C(213) } }, { { UINT8_C( 96), UINT8_C( 24), UINT8_C( 86), UINT8_C( 20), UINT8_C( 3), UINT8_C(160), UINT8_C( 52), UINT8_C(103), UINT8_C(196), UINT8_C(224), UINT8_C(222), UINT8_C( 92), UINT8_C(160), UINT8_C(148), UINT8_C(240), UINT8_C(120) }, { UINT8_C(249), UINT8_C(246), UINT8_C(156), UINT8_C(189), UINT8_C(161), UINT8_C( 33), UINT8_C( 80), UINT8_C(216), UINT8_C( 22), UINT8_C(187), UINT8_C( 55), UINT8_C(152), UINT8_C(192), UINT8_C(155), UINT8_C(143), UINT8_C( 32) }, { UINT8_C( 96), UINT8_C( 16), UINT8_C(104), UINT8_C(196), UINT8_C(227), UINT8_C(160), UINT8_C( 64), UINT8_C(232), UINT8_C(216), UINT8_C(160), UINT8_C(178), UINT8_C(160), UINT8_C( 0), UINT8_C(156), UINT8_C( 16), UINT8_C( 0) } }, { { UINT8_C(179), UINT8_C(229), UINT8_C( 52), UINT8_C(183), UINT8_C(133), UINT8_C(105), UINT8_C( 30), UINT8_C( 73), UINT8_C( 73), UINT8_C(253), UINT8_C(166), UINT8_C(233), UINT8_C(145), UINT8_C(150), UINT8_C( 97), UINT8_C(139) }, { UINT8_C(140), UINT8_C(253), UINT8_C( 72), UINT8_C( 45), UINT8_C( 30), UINT8_C(152), UINT8_C( 5), UINT8_C( 53), UINT8_C( 83), UINT8_C( 61), UINT8_C(205), UINT8_C( 20), UINT8_C(216), UINT8_C( 92), UINT8_C( 52), UINT8_C(140) }, { UINT8_C(228), UINT8_C( 81), UINT8_C(160), UINT8_C( 43), UINT8_C(150), UINT8_C( 88), UINT8_C(150), UINT8_C( 29), UINT8_C(171), UINT8_C( 73), UINT8_C(238), UINT8_C( 52), UINT8_C( 88), UINT8_C(232), UINT8_C(180), UINT8_C( 4) } }, { { UINT8_C( 65), UINT8_C(105), UINT8_C( 67), UINT8_C(198), UINT8_C(210), UINT8_C( 97), UINT8_C( 15), UINT8_C( 27), UINT8_C( 94), UINT8_C(181), UINT8_C( 4), UINT8_C(240), UINT8_C( 75), UINT8_C(101), UINT8_C(123), UINT8_C(216) }, { UINT8_C( 98), UINT8_C(195), UINT8_C( 5), UINT8_C(129), UINT8_C( 92), UINT8_C( 11), UINT8_C(182), UINT8_C(175), UINT8_C( 72), UINT8_C(131), UINT8_C(195), UINT8_C( 32), UINT8_C(223), UINT8_C(248), UINT8_C(172), UINT8_C( 32) }, { UINT8_C(226), UINT8_C(251), UINT8_C( 79), UINT8_C(198), UINT8_C(120), UINT8_C( 43), UINT8_C(170), UINT8_C(117), UINT8_C(112), UINT8_C(159), UINT8_C( 12), UINT8_C( 0), UINT8_C( 85), UINT8_C(216), UINT8_C(164), UINT8_C( 0) } }, { { UINT8_C( 97), UINT8_C(239), UINT8_C(230), UINT8_C( 51), UINT8_C( 81), UINT8_C(245), UINT8_C( 78), UINT8_C(175), UINT8_C(171), UINT8_C( 82), UINT8_C(159), UINT8_C(246), UINT8_C(183), UINT8_C( 26), UINT8_C(206), UINT8_C( 25) }, { UINT8_C(222), UINT8_C(212), UINT8_C(154), UINT8_C( 58), UINT8_C(223), UINT8_C( 80), UINT8_C(233), UINT8_C( 39), UINT8_C(211), UINT8_C(173), UINT8_C( 71), UINT8_C(178), UINT8_C(165), UINT8_C(244), UINT8_C(210), UINT8_C( 6) }, { UINT8_C( 30), UINT8_C(236), UINT8_C( 92), UINT8_C(142), UINT8_C(143), UINT8_C(144), UINT8_C(254), UINT8_C(169), UINT8_C(241), UINT8_C(106), UINT8_C( 25), UINT8_C( 12), UINT8_C(243), UINT8_C(200), UINT8_C(252), UINT8_C(150) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vmulq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; } static int test_simde_vmulq_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(64522), UINT16_C(40150), UINT16_C(18908), UINT16_C(26142), UINT16_C(30372), UINT16_C(42082), UINT16_C(17414), UINT16_C(42550) }, { UINT16_C(35252), UINT16_C( 5631), UINT16_C(23381), UINT16_C(46234), UINT16_C(30698), UINT16_C( 807), UINT16_C(48695), UINT16_C(16921) }, { UINT16_C(37128), UINT16_C(50986), UINT16_C(47628), UINT16_C(34316), UINT16_C(44520), UINT16_C(12526), UINT16_C( 4426), UINT16_C(10054) } }, { { UINT16_C(61370), UINT16_C(38622), UINT16_C(64568), UINT16_C(56572), UINT16_C(24179), UINT16_C(31104), UINT16_C(47010), UINT16_C(22304) }, { UINT16_C( 8000), UINT16_C(38252), UINT16_C( 1658), UINT16_C(25674), UINT16_C(29053), UINT16_C(46184), UINT16_C(33072), UINT16_C(60150) }, { UINT16_C(29824), UINT16_C(56232), UINT16_C(33456), UINT16_C(20696), UINT16_C(57639), UINT16_C(23552), UINT16_C( 4192), UINT16_C(63680) } }, { { UINT16_C(54384), UINT16_C(43137), UINT16_C(32209), UINT16_C(17540), UINT16_C( 1244), UINT16_C(32445), UINT16_C(56763), UINT16_C(64725) }, { UINT16_C(16892), UINT16_C(30353), UINT16_C(56135), UINT16_C(50395), UINT16_C(17229), UINT16_C(32121), UINT16_C(28612), UINT16_C(13415) }, { UINT16_C(36416), UINT16_C(59153), UINT16_C(45047), UINT16_C(44268), UINT16_C( 2604), UINT16_C(12373), UINT16_C(55340), UINT16_C(64947) } }, { { UINT16_C(59460), UINT16_C( 5596), UINT16_C(24678), UINT16_C(16985), UINT16_C( 5732), UINT16_C( 8384), UINT16_C(38644), UINT16_C(61468) }, { UINT16_C(44503), UINT16_C( 8039), UINT16_C(17033), UINT16_C(55011), UINT16_C(23685), UINT16_C(18771), UINT16_C(47820), UINT16_C( 4221) }, { UINT16_C( 1308), UINT16_C(28548), UINT16_C(58006), UINT16_C(15083), UINT16_C(37364), UINT16_C(24128), UINT16_C(37488), UINT16_C(64940) } }, { { UINT16_C(22947), UINT16_C( 2341), UINT16_C(32441), UINT16_C( 7499), UINT16_C( 2964), UINT16_C(34877), UINT16_C(22945), UINT16_C(31097) }, { UINT16_C(57351), UINT16_C(37016), UINT16_C(31522), UINT16_C(42854), UINT16_C(47576), UINT16_C(42224), UINT16_C(28019), UINT16_C( 5812) }, { UINT16_C( 4981), UINT16_C(15864), UINT16_C(46994), UINT16_C(39138), UINT16_C(47328), UINT16_C(52528), UINT16_C(53331), UINT16_C(53012) } }, { { UINT16_C(55750), UINT16_C(32543), UINT16_C(27223), UINT16_C(60316), UINT16_C(55926), UINT16_C( 6004), UINT16_C(60723), UINT16_C(14992) }, { UINT16_C(10445), UINT16_C(61386), UINT16_C(12452), UINT16_C(31894), UINT16_C(34537), UINT16_C(23840), UINT16_C(54515), UINT16_C(47475) }, { UINT16_C(21390), UINT16_C(16246), UINT16_C(28604), UINT16_C(40296), UINT16_C(39270), UINT16_C( 4736), UINT16_C(25449), UINT16_C(24240) } }, { { UINT16_C(37805), UINT16_C( 1080), UINT16_C(54525), UINT16_C(29679), UINT16_C(25518), UINT16_C(57995), UINT16_C( 6992), UINT16_C( 7452) }, { UINT16_C(59204), UINT16_C(59404), UINT16_C(41495), UINT16_C( 356), UINT16_C(33832), UINT16_C( 7006), UINT16_C(53592), UINT16_C( 1492) }, { UINT16_C(21748), UINT16_C(62112), UINT16_C(15547), UINT16_C(14428), UINT16_C(19248), UINT16_C(55306), UINT16_C(45952), UINT16_C(42800) } }, { { UINT16_C( 3172), UINT16_C(25097), UINT16_C(63713), UINT16_C(36821), UINT16_C(24668), UINT16_C(44145), UINT16_C(36476), UINT16_C(49354) }, { UINT16_C(54901), UINT16_C(36008), UINT16_C( 3193), UINT16_C(41357), UINT16_C(60304), UINT16_C(59581), UINT16_C(37309), UINT16_C( 8685) }, { UINT16_C(16820), UINT16_C(16872), UINT16_C(11865), UINT16_C(11601), UINT16_C(42944), UINT16_C(46957), UINT16_C(28044), UINT16_C(34050) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vmulq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_vmulq_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C( 838207622), UINT32_C(3405383108), UINT32_C(2377386269), UINT32_C(4159190503) }, { UINT32_C(2224025479), UINT32_C(3989376773), UINT32_C(3953058906), UINT32_C(1658203612) }, { UINT32_C(3851262122), UINT32_C(2552010964), UINT32_C(1750009906), UINT32_C(1512172932) } }, { { UINT32_C(4187212853), UINT32_C(4005793489), UINT32_C(2138863511), UINT32_C( 880174253) }, { UINT32_C(1505297747), UINT32_C(1850114580), UINT32_C(1113187430), UINT32_C(1185165073) }, { UINT32_C(3442818607), UINT32_C(3505605204), UINT32_C( 664347178), UINT32_C( 86798973) } }, { { UINT32_C(3426695163), UINT32_C(1572537029), UINT32_C( 668743546), UINT32_C(4015739547) }, { UINT32_C(1799885911), UINT32_C(4242116246), UINT32_C(2201891698), UINT32_C(1590288994) }, { UINT32_C( 24355405), UINT32_C(3961120110), UINT32_C(2091516500), UINT32_C( 93680982) } }, { { UINT32_C(3744073754), UINT32_C(2218583306), UINT32_C(3098220572), UINT32_C(3248949098) }, { UINT32_C(2989289243), UINT32_C(4021159549), UINT32_C(2608000313), UINT32_C(3925425103) }, { UINT32_C(1064771774), UINT32_C( 31855074), UINT32_C( 870468156), UINT32_C(3302911158) } }, { { UINT32_C(1305027651), UINT32_C( 651298057), UINT32_C(2296282398), UINT32_C(2689238404) }, { UINT32_C(4048713332), UINT32_C(3051356284), UINT32_C(3176223469), UINT32_C(3500558989) }, { UINT32_C(2790346844), UINT32_C(2459594844), UINT32_C(3062591686), UINT32_C(2805707188) } }, { { UINT32_C(2015194990), UINT32_C(2476666741), UINT32_C(4045110381), UINT32_C(1972462849) }, { UINT32_C(1483138012), UINT32_C(3507373796), UINT32_C( 630087576), UINT32_C( 418723241) }, { UINT32_C(2297318536), UINT32_C(2355970612), UINT32_C( 552883128), UINT32_C(2027545257) } }, { { UINT32_C( 428872356), UINT32_C(1873554946), UINT32_C(2875246762), UINT32_C( 153154093) }, { UINT32_C(3110242005), UINT32_C(1686859980), UINT32_C(2022250959), UINT32_C(4086333006) }, { UINT32_C(1432967796), UINT32_C( 342591896), UINT32_C( 433380214), UINT32_C(4018925494) } }, { { UINT32_C(2450268304), UINT32_C(4160862542), UINT32_C(2929943169), UINT32_C( 699974484) }, { UINT32_C( 367204937), UINT32_C(1501130378), UINT32_C(3604021895), UINT32_C( 298410624) }, { UINT32_C(2423515408), UINT32_C( 577464332), UINT32_C(2688480263), UINT32_C(2283524608) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vmulq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_x_vmulq_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[2]; uint64_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C( 8453629100971987715), UINT64_C(17339128753870447218) }, { UINT64_C( 8372071250065717968), UINT64_C(17638644746708104470) }, { UINT64_C( 2275508533926944880), UINT64_C(16821336020454117324) } }, { { UINT64_C(16534421168073941280), UINT64_C( 8097383758611524530) }, { UINT64_C( 8362760502854536233), UINT64_C( 1448695574823102004) }, { UINT64_C( 2146727699645845024), UINT64_C(15795703185322069032) } }, { { UINT64_C(11188784158704261613), UINT64_C( 3190351751723701302) }, { UINT64_C( 5190350280411485492), UINT64_C(14105986338996947643) }, { UINT64_C( 727445559653512484), UINT64_C(14320173044861542258) } }, { { UINT64_C(11017046459309872992), UINT64_C(10083195999946206910) }, { UINT64_C( 4946682512632886346), UINT64_C(18215008370324280367) }, { UINT64_C(13190602949145641408), UINT64_C(10245401748418112738) } }, { { UINT64_C( 1407139957151493260), UINT64_C(17724412949559637528) }, { UINT64_C( 7801635594418001783), UINT64_C( 8693700918831625998) }, { UINT64_C( 5669749077729970452), UINT64_C(10463065914885709136) } }, { { UINT64_C( 3283105528936917798), UINT64_C(11494757621095490789) }, { UINT64_C(16309474703689374125), UINT64_C( 2990163666190256155) }, { UINT64_C( 882033643506572974), UINT64_C(14869171963549289511) } }, { { UINT64_C(13502132351939241758), UINT64_C( 4566879350880468377) }, { UINT64_C(18329792197960531882), UINT64_C( 4428592178037910856) }, { UINT64_C( 9138642147534199788), UINT64_C(14173604868775933960) } }, { { UINT64_C(16860863817803962988), UINT64_C(14024501814442477775) }, { UINT64_C(10658790286282508809), UINT64_C(16351254641287033230) }, { UINT64_C( 1271651353639462348), UINT64_C(11031284440050516434) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_uint64x2_t r = simde_x_vmulq_u64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); simde_uint64x2_t b = simde_test_arm_neon_random_u64x2(); simde_uint64x2_t r = simde_x_vmulq_u64(a, b); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vmul_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vmul_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vmul_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vmul_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmul_s32) SIMDE_TEST_FUNC_LIST_ENTRY(x_vmul_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vmul_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vmul_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmul_u32) SIMDE_TEST_FUNC_LIST_ENTRY(x_vmul_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(x_vmulq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(x_vmulq_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/mul_lane.c000066400000000000000000002036301400333146700172630ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN mul_lane #include "test-neon.h" #include "../../../simde/arm/neon/mul_lane.h" #include "../../../simde/arm/neon/dup_n.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ static int test_simde_vmul_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; simde_float32 b[2]; int lane; simde_float32 r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( -773.99), SIMDE_FLOAT32_C( 962.49) }, { SIMDE_FLOAT32_C( -531.26), SIMDE_FLOAT32_C( 540.16) }, INT8_C( 1), { SIMDE_FLOAT32_C(-418078.41), SIMDE_FLOAT32_C(519898.56) } }, { { SIMDE_FLOAT32_C( -409.93), SIMDE_FLOAT32_C( 510.04) }, { SIMDE_FLOAT32_C( -659.02), SIMDE_FLOAT32_C( 102.89) }, INT8_C( 1), { SIMDE_FLOAT32_C(-42177.70), SIMDE_FLOAT32_C( 52478.02) } }, { { SIMDE_FLOAT32_C( -999.99), SIMDE_FLOAT32_C( -702.78) }, { SIMDE_FLOAT32_C( -114.76), SIMDE_FLOAT32_C( 327.24) }, INT8_C( 1), { SIMDE_FLOAT32_C(-327236.72), SIMDE_FLOAT32_C(-229977.73) } }, { { SIMDE_FLOAT32_C( 302.70), SIMDE_FLOAT32_C( -977.40) }, { SIMDE_FLOAT32_C( 45.80), SIMDE_FLOAT32_C( 892.47) }, INT8_C( 1), { SIMDE_FLOAT32_C(270150.66), SIMDE_FLOAT32_C(-872300.19) } }, { { SIMDE_FLOAT32_C( 306.93), SIMDE_FLOAT32_C( 371.14) }, { SIMDE_FLOAT32_C( 546.46), SIMDE_FLOAT32_C( 323.18) }, INT8_C( 1), { SIMDE_FLOAT32_C( 99193.63), SIMDE_FLOAT32_C(119945.02) } }, { { SIMDE_FLOAT32_C( 148.94), SIMDE_FLOAT32_C( -38.81) }, { SIMDE_FLOAT32_C( -107.93), SIMDE_FLOAT32_C( -920.00) }, INT8_C( 0), { SIMDE_FLOAT32_C(-16075.09), SIMDE_FLOAT32_C( 4188.76) } }, { { SIMDE_FLOAT32_C( -268.22), SIMDE_FLOAT32_C( -693.98) }, { SIMDE_FLOAT32_C( -987.79), SIMDE_FLOAT32_C( 200.52) }, INT8_C( 1), { SIMDE_FLOAT32_C(-53783.48), SIMDE_FLOAT32_C(-139156.88) } }, { { SIMDE_FLOAT32_C( 646.59), SIMDE_FLOAT32_C( 790.59) }, { SIMDE_FLOAT32_C( 356.21), SIMDE_FLOAT32_C( 987.57) }, INT8_C( 0), { SIMDE_FLOAT32_C(230321.83), SIMDE_FLOAT32_C(281616.06) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x2_t r; SIMDE_CONSTIFY_2_(simde_vmul_lane_f32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0, 1000.0); simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0, 1000.0); int lane = simde_test_codegen_random_i8() & 1; simde_float32x2_t r; SIMDE_CONSTIFY_2_(simde_vmul_lane_f32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))), lane, a, b); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmul_lane_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[1]; simde_float64 b[1]; simde_float64 r[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( 361.11) }, { SIMDE_FLOAT64_C( -650.09) }, { SIMDE_FLOAT64_C(-234754.00) } }, { { SIMDE_FLOAT64_C( 96.20) }, { SIMDE_FLOAT64_C( 967.05) }, { SIMDE_FLOAT64_C( 93030.21) } }, { { SIMDE_FLOAT64_C( -44.02) }, { SIMDE_FLOAT64_C( 966.32) }, { SIMDE_FLOAT64_C(-42537.41) } }, { { SIMDE_FLOAT64_C( 207.91) }, { SIMDE_FLOAT64_C( 922.18) }, { SIMDE_FLOAT64_C(191730.44) } }, { { SIMDE_FLOAT64_C( 263.40) }, { SIMDE_FLOAT64_C( -535.20) }, { SIMDE_FLOAT64_C(-140971.68) } }, { { SIMDE_FLOAT64_C( -474.15) }, { SIMDE_FLOAT64_C( 105.73) }, { SIMDE_FLOAT64_C(-50131.88) } }, { { SIMDE_FLOAT64_C( -684.09) }, { SIMDE_FLOAT64_C( -137.80) }, { SIMDE_FLOAT64_C( 94267.60) } }, { { SIMDE_FLOAT64_C( 842.76) }, { SIMDE_FLOAT64_C( -239.29) }, { SIMDE_FLOAT64_C(-201664.04) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_t b = simde_vld1_f64(test_vec[i].b); simde_float64x1_t r = simde_vmul_lane_f64(a, b, 0); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x1_t a = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_float64x1_t b = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_float64x1_t r = simde_vmul_lane_f64(a, b, 0); simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmul_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t b[4]; int lane; int16_t r[4]; } test_vec[] = { { { -INT16_C( 3489), -INT16_C( 7576), -INT16_C( 15092), -INT16_C( 26073) }, { -INT16_C( 7744), INT16_C( 19101), INT16_C( 14511), INT16_C( 9522) }, INT8_C( 1), { INT16_C( 6723), -INT16_C( 5688), INT16_C( 20572), -INT16_C( 12309) } }, { { INT16_C( 23915), -INT16_C( 31301), -INT16_C( 15588), INT16_C( 8786) }, { -INT16_C( 30436), INT16_C( 797), -INT16_C( 13115), -INT16_C( 18589) }, INT8_C( 0), { INT16_C( 31412), -INT16_C( 19596), INT16_C( 21264), -INT16_C( 23816) } }, { { -INT16_C( 15291), INT16_C( 28153), -INT16_C( 18082), -INT16_C( 1202) }, { -INT16_C( 765), INT16_C( 13875), -INT16_C( 11998), INT16_C( 32673) }, INT8_C( 0), { INT16_C( 32207), INT16_C( 24299), INT16_C( 4634), INT16_C( 2026) } }, { { -INT16_C( 25562), INT16_C( 30800), INT16_C( 27838), -INT16_C( 9215) }, { -INT16_C( 14481), -INT16_C( 11608), -INT16_C( 8834), INT16_C( 16920) }, INT8_C( 2), { -INT16_C( 22348), INT16_C( 18272), -INT16_C( 29820), INT16_C( 9598) } }, { { -INT16_C( 24187), -INT16_C( 11376), -INT16_C( 27748), -INT16_C( 12079) }, { -INT16_C( 3127), INT16_C( 27297), INT16_C( 11635), INT16_C( 3984) }, INT8_C( 1), { -INT16_C( 22875), -INT16_C( 21104), INT16_C( 27932), -INT16_C( 8847) } }, { { -INT16_C( 13048), INT16_C( 2793), INT16_C( 22953), INT16_C( 21201) }, { INT16_C( 20267), INT16_C( 17199), INT16_C( 1426), INT16_C( 13256) }, INT8_C( 1), { -INT16_C( 17288), -INT16_C( 1081), -INT16_C( 20217), -INT16_C( 6305) } }, { { -INT16_C( 12388), INT16_C( 27945), -INT16_C( 3425), INT16_C( 16480) }, { -INT16_C( 11427), -INT16_C( 4754), -INT16_C( 5150), -INT16_C( 20234) }, INT8_C( 1), { -INT16_C( 24312), -INT16_C( 9058), INT16_C( 29522), -INT16_C( 30400) } }, { { INT16_C( 22784), -INT16_C( 11986), INT16_C( 22955), -INT16_C( 9696) }, { -INT16_C( 19811), INT16_C( 26080), INT16_C( 30181), -INT16_C( 19199) }, INT8_C( 2), { -INT16_C( 25344), INT16_C( 9254), INT16_C( 23799), -INT16_C( 16736) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r; SIMDE_CONSTIFY_4_(simde_vmul_lane_s16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_s16(INT16_C(0))), test_vec[i].lane, a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); int lane = simde_test_codegen_random_i8() & 3; simde_int16x4_t r; SIMDE_CONSTIFY_4_(simde_vmul_lane_s16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_s16(INT16_C(0))), lane, a, b); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmul_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t b[2]; int lane; int32_t r[2]; } test_vec[] = { { { -INT32_C( 812559250), INT32_C( 61009557) }, { -INT32_C( 772897317), -INT32_C( 1898855627) }, INT8_C( 1), { -INT32_C( 1640741690), INT32_C( 1243341785) } }, { { -INT32_C( 1018479966), -INT32_C( 193599468) }, { INT32_C( 1332370253), INT32_C( 1690109968) }, INT8_C( 1), { INT32_C( 539146784), INT32_C( 493035840) } }, { { INT32_C( 797440396), -INT32_C( 340499460) }, { INT32_C( 112126260), -INT32_C( 1314348937) }, INT8_C( 1), { -INT32_C( 1542684652), INT32_C( 683145764) } }, { { -INT32_C( 506214805), INT32_C( 607920314) }, { INT32_C( 757943407), -INT32_C( 1430665552) }, INT8_C( 0), { INT32_C( 2039073893), INT32_C( 41642150) } }, { { -INT32_C( 1666079000), INT32_C( 1082516881) }, { -INT32_C( 1797194261), INT32_C( 1962920878) }, INT8_C( 0), { -INT32_C( 1811448072), INT32_C( 1101662491) } }, { { INT32_C( 482226144), -INT32_C( 2140656045) }, { INT32_C( 340203867), INT32_C( 1778226627) }, INT8_C( 0), { -INT32_C( 1855462240), INT32_C( 647205505) } }, { { INT32_C( 522976153), INT32_C( 454825787) }, { -INT32_C( 1427715413), INT32_C( 1854637119) }, INT8_C( 0), { INT32_C( 744343859), INT32_C( 1394217577) } }, { { INT32_C( 275825063), -INT32_C( 1993749694) }, { -INT32_C( 585175840), INT32_C( 1349936213) }, INT8_C( 1), { -INT32_C( 200308621), INT32_C( 783434986) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r; SIMDE_CONSTIFY_2_(simde_vmul_lane_s32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_s32(INT32_C(0))), test_vec[i].lane, a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); int lane = simde_test_codegen_random_i8() & 1; simde_int32x2_t r; SIMDE_CONSTIFY_2_(simde_vmul_lane_s32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_s32(INT32_C(0))), lane, a, b); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmul_lane_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t b[4]; int lane; uint16_t r[4]; } test_vec[] = { { { UINT16_C(56255), UINT16_C(10796), UINT16_C(17520), UINT16_C( 1280) }, { UINT16_C(42772), UINT16_C(28649), UINT16_C(61065), UINT16_C(17278) }, INT8_C( 2), { UINT16_C(11063), UINT16_C(31116), UINT16_C(49136), UINT16_C(44288) } }, { { UINT16_C(14034), UINT16_C(28449), UINT16_C( 1540), UINT16_C(20273) }, { UINT16_C( 7493), UINT16_C(63190), UINT16_C(54710), UINT16_C(37301) }, INT8_C( 2), { UINT16_C(45900), UINT16_C(30326), UINT16_C(39640), UINT16_C( 4566) } }, { { UINT16_C( 479), UINT16_C(57158), UINT16_C(23046), UINT16_C(61574) }, { UINT16_C( 4297), UINT16_C(18398), UINT16_C(58707), UINT16_C(35353) }, INT8_C( 2), { UINT16_C( 5709), UINT16_C( 434), UINT16_C(36338), UINT16_C(55666) } }, { { UINT16_C(36488), UINT16_C(47628), UINT16_C(20958), UINT16_C(46295) }, { UINT16_C(36167), UINT16_C(64650), UINT16_C(35871), UINT16_C( 8411) }, INT8_C( 2), { UINT16_C(41592), UINT16_C( 6004), UINT16_C(20962), UINT16_C(31241) } }, { { UINT16_C(10170), UINT16_C(16684), UINT16_C(62743), UINT16_C(62801) }, { UINT16_C(42045), UINT16_C(22234), UINT16_C(57390), UINT16_C(48607) }, INT8_C( 1), { UINT16_C(20580), UINT16_C(18296), UINT16_C(28566), UINT16_C( 7418) } }, { { UINT16_C(39833), UINT16_C(28734), UINT16_C(34383), UINT16_C(55806) }, { UINT16_C( 7554), UINT16_C(24165), UINT16_C(14141), UINT16_C(25624) }, INT8_C( 3), { UINT16_C(23128), UINT16_C(48592), UINT16_C(29544), UINT16_C(42960) } }, { { UINT16_C(31577), UINT16_C(43609), UINT16_C(38513), UINT16_C(19279) }, { UINT16_C(32236), UINT16_C(52012), UINT16_C( 6458), UINT16_C(54628) }, INT8_C( 3), { UINT16_C(15300), UINT16_C(38852), UINT16_C(51492), UINT16_C( 9692) } }, { { UINT16_C( 9685), UINT16_C(54237), UINT16_C(24830), UINT16_C(25840) }, { UINT16_C(11710), UINT16_C(54939), UINT16_C(65426), UINT16_C( 3376) }, INT8_C( 0), { UINT16_C(34070), UINT16_C( 5894), UINT16_C(41604), UINT16_C( 6688) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r; SIMDE_CONSTIFY_4_(simde_vmul_lane_u16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_u16(INT16_C(0))), test_vec[i].lane, a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); int lane = simde_test_codegen_random_i8() & 3; simde_uint16x4_t r; SIMDE_CONSTIFY_4_(simde_vmul_lane_u16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_u16(UINT16_C(0))), lane, a, b); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmul_lane_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint32_t b[2]; int lane; uint32_t r[2]; } test_vec[] = { { { UINT32_C(3751696772), UINT32_C( 92944092) }, { UINT32_C( 95534389), UINT32_C( 347736448) }, INT8_C( 1), { UINT32_C(3124389376), UINT32_C(2900437504) } }, { { UINT32_C(2311291488), UINT32_C(2163324479) }, { UINT32_C(2637368739), UINT32_C(1965198108) }, INT8_C( 0), { UINT32_C(2117788448), UINT32_C(1370832157) } }, { { UINT32_C(2338345473), UINT32_C( 155751767) }, { UINT32_C(1427294362), UINT32_C(1605700573) }, INT8_C( 1), { UINT32_C(3479295453), UINT32_C( 26650907) } }, { { UINT32_C( 797286206), UINT32_C(1379739679) }, { UINT32_C(3890108869), UINT32_C( 552077518) }, INT8_C( 0), { UINT32_C( 267655350), UINT32_C(1340384987) } }, { { UINT32_C(3170728052), UINT32_C(2475005825) }, { UINT32_C(2728354540), UINT32_C(1625321153) }, INT8_C( 1), { UINT32_C(1247389556), UINT32_C(2410718785) } }, { { UINT32_C(1293647632), UINT32_C(2963726546) }, { UINT32_C(2964878536), UINT32_C( 220471445) }, INT8_C( 1), { UINT32_C(4280480848), UINT32_C(4106426938) } }, { { UINT32_C(1723109089), UINT32_C(3234373921) }, { UINT32_C( 590252355), UINT32_C(1764958698) }, INT8_C( 0), { UINT32_C(4045786595), UINT32_C(1235932323) } }, { { UINT32_C( 639449984), UINT32_C(2744903147) }, { UINT32_C(3133354133), UINT32_C(3415990333) }, INT8_C( 1), { UINT32_C( 797388160), UINT32_C(4087113983) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r; SIMDE_CONSTIFY_2_(simde_vmul_lane_u32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_u32(UINT32_C(0))), test_vec[i].lane, a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); int lane = simde_test_codegen_random_i8() & 1; simde_uint32x2_t r; SIMDE_CONSTIFY_2_(simde_vmul_lane_u32, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_u32(UINT32_C(0))), lane, a, b); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmulq_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; simde_float32 b[2]; int lane; simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 213.13), SIMDE_FLOAT32_C( 981.87), SIMDE_FLOAT32_C( 831.27), SIMDE_FLOAT32_C( 118.60) }, { SIMDE_FLOAT32_C( -324.54), SIMDE_FLOAT32_C( 912.98) }, INT8_C( 0), { SIMDE_FLOAT32_C(-69169.21), SIMDE_FLOAT32_C(-318656.09), SIMDE_FLOAT32_C(-269780.38), SIMDE_FLOAT32_C(-38490.45) } }, { { SIMDE_FLOAT32_C( 398.02), SIMDE_FLOAT32_C( -7.67), SIMDE_FLOAT32_C( 260.87), SIMDE_FLOAT32_C( 702.83) }, { SIMDE_FLOAT32_C( 333.84), SIMDE_FLOAT32_C( 873.07) }, INT8_C( 1), { SIMDE_FLOAT32_C(347499.31), SIMDE_FLOAT32_C( -6696.45), SIMDE_FLOAT32_C(227757.77), SIMDE_FLOAT32_C(613619.81) } }, { { SIMDE_FLOAT32_C( -354.62), SIMDE_FLOAT32_C( 438.98), SIMDE_FLOAT32_C( 27.69), SIMDE_FLOAT32_C( -892.52) }, { SIMDE_FLOAT32_C( 356.02), SIMDE_FLOAT32_C( -450.46) }, INT8_C( 0), { SIMDE_FLOAT32_C(-126251.80), SIMDE_FLOAT32_C(156285.66), SIMDE_FLOAT32_C( 9858.19), SIMDE_FLOAT32_C(-317754.97) } }, { { SIMDE_FLOAT32_C( -796.85), SIMDE_FLOAT32_C( 254.52), SIMDE_FLOAT32_C( 640.39), SIMDE_FLOAT32_C( 497.52) }, { SIMDE_FLOAT32_C( 257.72), SIMDE_FLOAT32_C( -9.95) }, INT8_C( 1), { SIMDE_FLOAT32_C( 7928.66), SIMDE_FLOAT32_C( -2532.47), SIMDE_FLOAT32_C( -6371.88), SIMDE_FLOAT32_C( -4950.32) } }, { { SIMDE_FLOAT32_C( -951.01), SIMDE_FLOAT32_C( -981.56), SIMDE_FLOAT32_C( 34.30), SIMDE_FLOAT32_C( 262.12) }, { SIMDE_FLOAT32_C( -999.70), SIMDE_FLOAT32_C( -134.44) }, INT8_C( 0), { SIMDE_FLOAT32_C(950724.69), SIMDE_FLOAT32_C(981265.56), SIMDE_FLOAT32_C(-34289.71), SIMDE_FLOAT32_C(-262041.36) } }, { { SIMDE_FLOAT32_C( -324.23), SIMDE_FLOAT32_C( -221.45), SIMDE_FLOAT32_C( 748.19), SIMDE_FLOAT32_C( -926.22) }, { SIMDE_FLOAT32_C( 770.88), SIMDE_FLOAT32_C( 9.06) }, INT8_C( 1), { SIMDE_FLOAT32_C( -2937.52), SIMDE_FLOAT32_C( -2006.34), SIMDE_FLOAT32_C( 6778.60), SIMDE_FLOAT32_C( -8391.55) } }, { { SIMDE_FLOAT32_C( 104.72), SIMDE_FLOAT32_C( -117.86), SIMDE_FLOAT32_C( -296.55), SIMDE_FLOAT32_C( 750.10) }, { SIMDE_FLOAT32_C( -678.89), SIMDE_FLOAT32_C( 731.14) }, INT8_C( 1), { SIMDE_FLOAT32_C( 76564.98), SIMDE_FLOAT32_C(-86172.16), SIMDE_FLOAT32_C(-216819.56), SIMDE_FLOAT32_C(548428.12) } }, { { SIMDE_FLOAT32_C( 677.13), SIMDE_FLOAT32_C( -719.32), SIMDE_FLOAT32_C( -444.57), SIMDE_FLOAT32_C( 880.28) }, { SIMDE_FLOAT32_C( 535.19), SIMDE_FLOAT32_C( -804.18) }, INT8_C( 0), { SIMDE_FLOAT32_C(362393.22), SIMDE_FLOAT32_C(-384972.88), SIMDE_FLOAT32_C(-237929.42), SIMDE_FLOAT32_C(471117.06) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x4_t r; SIMDE_CONSTIFY_2_(simde_vmulq_lane_f32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, a, b); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0, 1000.0); simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0, 1000.0); int lane = simde_test_codegen_random_i8() & 1; simde_float32x4_t r; SIMDE_CONSTIFY_2_(simde_vmulq_lane_f32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))), lane, a, b); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmulq_lane_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[2]; simde_float64 b[1]; simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -207.09), SIMDE_FLOAT64_C( 185.86) }, { SIMDE_FLOAT64_C( 705.49) }, { SIMDE_FLOAT64_C(-146099.92), SIMDE_FLOAT64_C(131122.37) } }, { { SIMDE_FLOAT64_C( -158.10), SIMDE_FLOAT64_C( 204.30) }, { SIMDE_FLOAT64_C( -260.21) }, { SIMDE_FLOAT64_C( 41139.20), SIMDE_FLOAT64_C(-53160.90) } }, { { SIMDE_FLOAT64_C( -895.97), SIMDE_FLOAT64_C( 204.60) }, { SIMDE_FLOAT64_C( 605.35) }, { SIMDE_FLOAT64_C(-542375.44), SIMDE_FLOAT64_C(123854.61) } }, { { SIMDE_FLOAT64_C( -515.25), SIMDE_FLOAT64_C( 880.37) }, { SIMDE_FLOAT64_C( -616.10) }, { SIMDE_FLOAT64_C(317445.53), SIMDE_FLOAT64_C(-542395.96) } }, { { SIMDE_FLOAT64_C( -767.06), SIMDE_FLOAT64_C( 954.16) }, { SIMDE_FLOAT64_C( -845.22) }, { SIMDE_FLOAT64_C(648334.45), SIMDE_FLOAT64_C(-806475.12) } }, { { SIMDE_FLOAT64_C( 242.00), SIMDE_FLOAT64_C( 730.77) }, { SIMDE_FLOAT64_C( 259.50) }, { SIMDE_FLOAT64_C( 62799.00), SIMDE_FLOAT64_C(189634.82) } }, { { SIMDE_FLOAT64_C( -875.86), SIMDE_FLOAT64_C( -565.78) }, { SIMDE_FLOAT64_C( 9.60) }, { SIMDE_FLOAT64_C( -8408.26), SIMDE_FLOAT64_C( -5431.49) } }, { { SIMDE_FLOAT64_C( -554.75), SIMDE_FLOAT64_C( -834.64) }, { SIMDE_FLOAT64_C( -132.82) }, { SIMDE_FLOAT64_C( 73681.89), SIMDE_FLOAT64_C(110856.88) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x1_t b = simde_vld1_f64(test_vec[i].b); simde_float64x2_t r = simde_vmulq_lane_f64(a, b, 0); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x2_t a = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); simde_float64x1_t b = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_float64x2_t r = simde_vmulq_lane_f64(a, b, 0); simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmulq_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t b[4]; int lane; int16_t r[8]; } test_vec[] = { { { INT16_C( 30459), -INT16_C( 14985), INT16_C( 2555), INT16_C( 2281), -INT16_C( 19504), INT16_C( 11036), INT16_C( 25139), -INT16_C( 29002) }, { -INT16_C( 3894), INT16_C( 24172), INT16_C( 15210), INT16_C( 12261) }, INT8_C( 2), { INT16_C( 7406), INT16_C( 12358), -INT16_C( 1298), INT16_C( 25466), INT16_C( 25632), INT16_C( 19864), INT16_C( 27166), INT16_C( 2396) } }, { { -INT16_C( 17676), INT16_C( 26584), -INT16_C( 1959), -INT16_C( 12446), INT16_C( 10095), INT16_C( 30922), -INT16_C( 11760), -INT16_C( 15288) }, { INT16_C( 29678), INT16_C( 20727), -INT16_C( 31446), INT16_C( 6682) }, INT8_C( 2), { INT16_C( 28680), INT16_C( 16752), -INT16_C( 1126), -INT16_C( 4076), INT16_C( 9014), -INT16_C( 15580), -INT16_C( 14688), -INT16_C( 25648) } }, { { -INT16_C( 31624), INT16_C( 23853), INT16_C( 2995), INT16_C( 27985), -INT16_C( 18205), -INT16_C( 9273), -INT16_C( 27110), INT16_C( 16714) }, { -INT16_C( 15775), INT16_C( 13138), INT16_C( 5642), INT16_C( 32290) }, INT8_C( 1), { INT16_C( 22128), -INT16_C( 12438), INT16_C( 26710), INT16_C( 9970), INT16_C( 29110), INT16_C( 2750), INT16_C( 16980), -INT16_C( 22604) } }, { { -INT16_C( 22414), -INT16_C( 29294), -INT16_C( 31550), INT16_C( 17925), INT16_C( 25521), -INT16_C( 17159), INT16_C( 26548), INT16_C( 28064) }, { INT16_C( 31534), -INT16_C( 15225), -INT16_C( 13882), -INT16_C( 30683) }, INT8_C( 3), { -INT16_C( 6022), INT16_C( 1562), INT16_C( 16394), -INT16_C( 14663), INT16_C( 28821), -INT16_C( 26627), -INT16_C( 25340), -INT16_C( 10208) } }, { { -INT16_C( 27815), INT16_C( 31537), INT16_C( 15889), -INT16_C( 17939), INT16_C( 31440), INT16_C( 21883), -INT16_C( 16000), -INT16_C( 7418) }, { -INT16_C( 15430), INT16_C( 8599), INT16_C( 1123), -INT16_C( 8625) }, INT8_C( 0), { -INT16_C( 9814), -INT16_C( 11110), INT16_C( 2906), -INT16_C( 25294), -INT16_C( 21728), -INT16_C( 13218), INT16_C( 5888), -INT16_C( 31652) } }, { { -INT16_C( 23532), INT16_C( 14677), INT16_C( 28717), -INT16_C( 16238), INT16_C( 3489), -INT16_C( 8239), -INT16_C( 29957), INT16_C( 30127) }, { INT16_C( 1029), -INT16_C( 14603), -INT16_C( 10229), -INT16_C( 12672) }, INT8_C( 0), { -INT16_C( 31644), INT16_C( 29353), -INT16_C( 6943), INT16_C( 2778), -INT16_C( 14299), -INT16_C( 23787), -INT16_C( 23833), INT16_C( 2155) } }, { { INT16_C( 12706), -INT16_C( 3724), INT16_C( 15), -INT16_C( 19451), INT16_C( 16213), -INT16_C( 14879), -INT16_C( 24111), -INT16_C( 8346) }, { INT16_C( 17778), -INT16_C( 806), INT16_C( 20469), -INT16_C( 1791) }, INT8_C( 1), { -INT16_C( 17420), -INT16_C( 13112), -INT16_C( 12090), INT16_C( 14402), -INT16_C( 26014), -INT16_C( 614), -INT16_C( 30726), -INT16_C( 23332) } }, { { INT16_C( 1223), INT16_C( 18205), -INT16_C( 29230), INT16_C( 1001), -INT16_C( 9470), INT16_C( 531), -INT16_C( 14368), INT16_C( 8024) }, { INT16_C( 7592), INT16_C( 18929), -INT16_C( 12156), -INT16_C( 13893) }, INT8_C( 2), { INT16_C( 9884), INT16_C( 15092), -INT16_C( 16312), INT16_C( 21540), -INT16_C( 29432), -INT16_C( 32308), INT16_C( 3968), -INT16_C( 22176) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x8_t r; SIMDE_CONSTIFY_4_(simde_vmulq_lane_s16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_s16(INT16_C(0))), test_vec[i].lane, a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); int lane = simde_test_codegen_random_i8() & 3; simde_int16x8_t r; SIMDE_CONSTIFY_4_(simde_vmulq_lane_s16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_s16(INT16_C(0))), lane, a, b); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmulq_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t b[2]; int lane; int32_t r[4]; } test_vec[] = { { { INT32_C( 568356878), -INT32_C( 2094014030), -INT32_C( 1281537859), INT32_C( 987407471) }, { INT32_C( 136707343), INT32_C( 800378846) }, INT8_C( 0), { INT32_C( 1827301074), INT32_C( 815080302), INT32_C( 1651182611), -INT32_C( 1017358975) } }, { { INT32_C( 682706454), -INT32_C( 1992917223), INT32_C( 1094408031), -INT32_C( 805388666) }, { INT32_C( 4108892), -INT32_C( 1995409012) }, INT8_C( 0), { -INT32_C( 607880216), -INT32_C( 66908420), -INT32_C( 585796572), INT32_C( 75080744) } }, { { -INT32_C( 865245416), -INT32_C( 572270441), INT32_C( 150342537), -INT32_C( 1721204670) }, { INT32_C( 2048960955), INT32_C( 1507258279) }, INT8_C( 1), { INT32_C( 78489768), INT32_C( 68240257), -INT32_C( 758354593), -INT32_C( 871020274) } }, { { -INT32_C( 425269802), -INT32_C( 687913617), -INT32_C( 1972515918), -INT32_C( 1173075026) }, { -INT32_C( 1040442303), INT32_C( 763204996) }, INT8_C( 1), { -INT32_C( 380497832), -INT32_C( 650352580), INT32_C( 135635400), INT32_C( 1283017656) } }, { { -INT32_C( 844835337), INT32_C( 346280494), -INT32_C( 1115452808), INT32_C( 1416649097) }, { INT32_C( 302250695), INT32_C( 726908174) }, INT8_C( 1), { -INT32_C( 854585726), -INT32_C( 234130300), -INT32_C( 2024527216), -INT32_C( 1700132226) } }, { { -INT32_C( 1840337131), -INT32_C( 1299643171), INT32_C( 2061522600), -INT32_C( 2081164039) }, { -INT32_C( 787697545), INT32_C( 462971168) }, INT8_C( 0), { INT32_C( 1961743043), -INT32_C( 664456517), -INT32_C( 1849964520), -INT32_C( 746713153) } }, { { -INT32_C( 48158294), INT32_C( 68344404), INT32_C( 752985128), -INT32_C( 2083155155) }, { INT32_C( 1753042803), -INT32_C( 203489214) }, INT8_C( 1), { -INT32_C( 341656108), -INT32_C( 133674584), -INT32_C( 1996205488), -INT32_C( 398265958) } }, { { INT32_C( 1215173552), INT32_C( 1492308526), -INT32_C( 844304141), -INT32_C( 1477070846) }, { INT32_C( 617882257), INT32_C( 1184389367) }, INT8_C( 1), { -INT32_C( 1988526384), -INT32_C( 1445422494), INT32_C( 1727529589), -INT32_C( 1883405842) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x4_t r; SIMDE_CONSTIFY_2_(simde_vmulq_lane_s32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_s32(INT32_C(0))), test_vec[i].lane, a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); int lane = simde_test_codegen_random_i8() & 1; simde_int32x4_t r; SIMDE_CONSTIFY_2_(simde_vmulq_lane_s32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_s32(INT32_C(0))), lane, a, b); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmulq_lane_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t b[4]; int lane; uint16_t r[8]; } test_vec[] = { { { UINT16_C(36602), UINT16_C(26614), UINT16_C(29299), UINT16_C(24022), UINT16_C(34078), UINT16_C(58723), UINT16_C(45747), UINT16_C(62564) }, { UINT16_C( 7589), UINT16_C( 1166), UINT16_C(56009), UINT16_C(61188) }, INT8_C( 3), { UINT16_C(41448), UINT16_C(18904), UINT16_C( 9932), UINT16_C(16728), UINT16_C( 5752), UINT16_C( 652), UINT16_C(59340), UINT16_C(11664) } }, { { UINT16_C(64917), UINT16_C(25813), UINT16_C(51482), UINT16_C(43102), UINT16_C(50623), UINT16_C(12827), UINT16_C(31132), UINT16_C( 8528) }, { UINT16_C(14044), UINT16_C(36565), UINT16_C(51610), UINT16_C(46900) }, INT8_C( 3), { UINT16_C( 1348), UINT16_C(48708), UINT16_C(28488), UINT16_C(25880), UINT16_C(46028), UINT16_C(31356), UINT16_C(14256), UINT16_C(62528) } }, { { UINT16_C(32824), UINT16_C(15409), UINT16_C(50287), UINT16_C(27858), UINT16_C(13977), UINT16_C(25223), UINT16_C(12180), UINT16_C(22818) }, { UINT16_C(21579), UINT16_C(50421), UINT16_C( 6052), UINT16_C(55968) }, INT8_C( 0), { UINT16_C(61544), UINT16_C(46683), UINT16_C(63621), UINT16_C(51590), UINT16_C(13011), UINT16_C(10637), UINT16_C(32860), UINT16_C(17654) } }, { { UINT16_C(29743), UINT16_C(25525), UINT16_C( 3116), UINT16_C(44187), UINT16_C(55101), UINT16_C( 284), UINT16_C(34985), UINT16_C(57243) }, { UINT16_C(64783), UINT16_C(16243), UINT16_C(52511), UINT16_C(29578) }, INT8_C( 2), { UINT16_C(46257), UINT16_C( 1003), UINT16_C(46420), UINT16_C( 1477), UINT16_C(59747), UINT16_C(36452), UINT16_C(57719), UINT16_C(12997) } }, { { UINT16_C( 6222), UINT16_C(61145), UINT16_C(50674), UINT16_C(26397), UINT16_C(32890), UINT16_C(34451), UINT16_C(16155), UINT16_C(62403) }, { UINT16_C(50523), UINT16_C(58524), UINT16_C(31840), UINT16_C(24051) }, INT8_C( 3), { UINT16_C(26634), UINT16_C(36091), UINT16_C(52918), UINT16_C(27015), UINT16_C(17870), UINT16_C( 9353), UINT16_C(46497), UINT16_C(14617) } }, { { UINT16_C(32050), UINT16_C(48316), UINT16_C(32752), UINT16_C( 2058), UINT16_C(63832), UINT16_C( 7931), UINT16_C(25110), UINT16_C(38808) }, { UINT16_C( 8181), UINT16_C(13490), UINT16_C(42466), UINT16_C(42896) }, INT8_C( 2), { UINT16_C(49188), UINT16_C(51704), UINT16_C(41440), UINT16_C(35540), UINT16_C(55216), UINT16_C( 8342), UINT16_C(50540), UINT16_C(52272) } }, { { UINT16_C( 1908), UINT16_C(26558), UINT16_C(44389), UINT16_C(58010), UINT16_C(22122), UINT16_C(59858), UINT16_C(56161), UINT16_C(23105) }, { UINT16_C(24534), UINT16_C(14448), UINT16_C( 2040), UINT16_C( 5933) }, INT8_C( 2), { UINT16_C(25696), UINT16_C(45584), UINT16_C(48344), UINT16_C(47920), UINT16_C(40112), UINT16_C(16752), UINT16_C(11512), UINT16_C(13816) } }, { { UINT16_C(63841), UINT16_C(61791), UINT16_C(41377), UINT16_C(43109), UINT16_C(52575), UINT16_C( 3341), UINT16_C(61287), UINT16_C(48503) }, { UINT16_C(24770), UINT16_C(40222), UINT16_C(30881), UINT16_C( 371) }, INT8_C( 1), { UINT16_C(46686), UINT16_C(35874), UINT16_C(44510), UINT16_C(44246), UINT16_C(21538), UINT16_C(32902), UINT16_C(14610), UINT16_C(12018) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x8_t r; SIMDE_CONSTIFY_4_(simde_vmulq_lane_u16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_u16(INT16_C(0))), test_vec[i].lane, a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); int lane = simde_test_codegen_random_i8() & 3; simde_uint16x8_t r; SIMDE_CONSTIFY_4_(simde_vmulq_lane_u16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_u16(UINT16_C(0))), lane, a, b); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmulq_lane_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint32_t b[2]; int lane; uint32_t r[4]; } test_vec[] = { { { UINT32_C(1755766840), UINT32_C(1721742792), UINT32_C( 366368309), UINT32_C(3851638648) }, { UINT32_C(3317088559), UINT32_C(3000942253) }, INT8_C( 0), { UINT32_C(2239586888), UINT32_C(1670260664), UINT32_C(2886060219), UINT32_C(3259926280) } }, { { UINT32_C(4256160835), UINT32_C(4181021976), UINT32_C(2982256380), UINT32_C(2498111549) }, { UINT32_C(1426914302), UINT32_C(1334112910) }, INT8_C( 0), { UINT32_C(3012967290), UINT32_C(2188965328), UINT32_C(1422709256), UINT32_C(2309733254) } }, { { UINT32_C( 677837897), UINT32_C(2322283438), UINT32_C(1067674090), UINT32_C(1010539678) }, { UINT32_C(2239425629), UINT32_C(3447918290) }, INT8_C( 0), { UINT32_C(3318149765), UINT32_C(4209317430), UINT32_C(3481612802), UINT32_C(2404748646) } }, { { UINT32_C(1544313816), UINT32_C(2812720555), UINT32_C(1448529435), UINT32_C(2722160697) }, { UINT32_C( 524386435), UINT32_C(2826796475) }, INT8_C( 1), { UINT32_C(3730280136), UINT32_C(3388617449), UINT32_C(3476733113), UINT32_C( 380935843) } }, { { UINT32_C(2248505858), UINT32_C(2740854856), UINT32_C(3780033324), UINT32_C( 805172833) }, { UINT32_C( 745034572), UINT32_C( 347081620) }, INT8_C( 0), { UINT32_C(2029969048), UINT32_C(2175331680), UINT32_C( 65983760), UINT32_C(1504614348) } }, { { UINT32_C(1288294351), UINT32_C(4065313143), UINT32_C(3862615494), UINT32_C(3994279219) }, { UINT32_C( 575665196), UINT32_C(1164869361) }, INT8_C( 1), { UINT32_C(3023471583), UINT32_C(2456793863), UINT32_C(3954094950), UINT32_C(2994493187) } }, { { UINT32_C(1257429402), UINT32_C(3155836653), UINT32_C(3358877191), UINT32_C(2324637365) }, { UINT32_C(1035892064), UINT32_C(2104077448) }, INT8_C( 0), { UINT32_C(1255209920), UINT32_C(3045059040), UINT32_C(1786344864), UINT32_C(1035287776) } }, { { UINT32_C(4185550475), UINT32_C(2358534579), UINT32_C(2440683091), UINT32_C(1603876600) }, { UINT32_C(2417254762), UINT32_C(1727045616) }, INT8_C( 1), { UINT32_C(4055889744), UINT32_C(3717842128), UINT32_C(2468739792), UINT32_C( 558338176) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x4_t r; SIMDE_CONSTIFY_2_(simde_vmulq_lane_u32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_u32(UINT32_C(0))), test_vec[i].lane, a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); int lane = simde_test_codegen_random_i8() & 1; simde_uint32x4_t r; SIMDE_CONSTIFY_2_(simde_vmulq_lane_u32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_u32(UINT32_C(0))), lane, a, b); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmulq_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; simde_float32 b[4]; int lane; simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -483.10), SIMDE_FLOAT32_C( 841.64), SIMDE_FLOAT32_C( 832.87), SIMDE_FLOAT32_C( 478.63) }, { SIMDE_FLOAT32_C( 684.27), SIMDE_FLOAT32_C( -437.87), SIMDE_FLOAT32_C( 138.75), SIMDE_FLOAT32_C( -36.57) }, INT8_C( 0), { SIMDE_FLOAT32_C(-330570.84), SIMDE_FLOAT32_C(575909.00), SIMDE_FLOAT32_C(569907.94), SIMDE_FLOAT32_C(327512.16) } }, { { SIMDE_FLOAT32_C( 141.10), SIMDE_FLOAT32_C( 936.51), SIMDE_FLOAT32_C( -996.31), SIMDE_FLOAT32_C( 809.04) }, { SIMDE_FLOAT32_C( -520.77), SIMDE_FLOAT32_C( -383.63), SIMDE_FLOAT32_C( 396.57), SIMDE_FLOAT32_C( -505.39) }, INT8_C( 0), { SIMDE_FLOAT32_C(-73480.66), SIMDE_FLOAT32_C(-487706.34), SIMDE_FLOAT32_C(518848.38), SIMDE_FLOAT32_C(-421323.75) } }, { { SIMDE_FLOAT32_C( 757.82), SIMDE_FLOAT32_C( -769.31), SIMDE_FLOAT32_C( 479.87), SIMDE_FLOAT32_C( 27.24) }, { SIMDE_FLOAT32_C( 87.58), SIMDE_FLOAT32_C( -325.93), SIMDE_FLOAT32_C( -147.19), SIMDE_FLOAT32_C( 630.99) }, INT8_C( 1), { SIMDE_FLOAT32_C(-246996.27), SIMDE_FLOAT32_C(250741.20), SIMDE_FLOAT32_C(-156404.03), SIMDE_FLOAT32_C( -8878.33) } }, { { SIMDE_FLOAT32_C( 921.82), SIMDE_FLOAT32_C( 48.37), SIMDE_FLOAT32_C( -130.34), SIMDE_FLOAT32_C( 806.30) }, { SIMDE_FLOAT32_C( 565.26), SIMDE_FLOAT32_C( -288.70), SIMDE_FLOAT32_C( 639.18), SIMDE_FLOAT32_C( 43.89) }, INT8_C( 3), { SIMDE_FLOAT32_C( 40458.68), SIMDE_FLOAT32_C( 2122.96), SIMDE_FLOAT32_C( -5720.62), SIMDE_FLOAT32_C( 35388.51) } }, { { SIMDE_FLOAT32_C( -798.70), SIMDE_FLOAT32_C( -817.36), SIMDE_FLOAT32_C( 359.00), SIMDE_FLOAT32_C( 117.01) }, { SIMDE_FLOAT32_C( 323.75), SIMDE_FLOAT32_C( 295.51), SIMDE_FLOAT32_C( 120.71), SIMDE_FLOAT32_C( 132.79) }, INT8_C( 3), { SIMDE_FLOAT32_C(-106059.37), SIMDE_FLOAT32_C(-108537.23), SIMDE_FLOAT32_C( 47671.61), SIMDE_FLOAT32_C( 15537.76) } }, { { SIMDE_FLOAT32_C( 737.08), SIMDE_FLOAT32_C( -470.63), SIMDE_FLOAT32_C( -730.64), SIMDE_FLOAT32_C( 79.42) }, { SIMDE_FLOAT32_C( -712.82), SIMDE_FLOAT32_C( -499.95), SIMDE_FLOAT32_C( -440.71), SIMDE_FLOAT32_C( 314.43) }, INT8_C( 1), { SIMDE_FLOAT32_C(-368503.16), SIMDE_FLOAT32_C(235291.48), SIMDE_FLOAT32_C(365283.47), SIMDE_FLOAT32_C(-39706.03) } }, { { SIMDE_FLOAT32_C( 233.36), SIMDE_FLOAT32_C( -832.76), SIMDE_FLOAT32_C( 218.62), SIMDE_FLOAT32_C( 442.88) }, { SIMDE_FLOAT32_C( -910.95), SIMDE_FLOAT32_C( -733.02), SIMDE_FLOAT32_C( -687.47), SIMDE_FLOAT32_C( 895.36) }, INT8_C( 1), { SIMDE_FLOAT32_C(-171057.55), SIMDE_FLOAT32_C(610429.75), SIMDE_FLOAT32_C(-160252.83), SIMDE_FLOAT32_C(-324639.91) } }, { { SIMDE_FLOAT32_C( 23.84), SIMDE_FLOAT32_C( 534.54), SIMDE_FLOAT32_C( -123.86), SIMDE_FLOAT32_C( 419.41) }, { SIMDE_FLOAT32_C( 735.84), SIMDE_FLOAT32_C( 58.79), SIMDE_FLOAT32_C( -221.60), SIMDE_FLOAT32_C( -147.15) }, INT8_C( 1), { SIMDE_FLOAT32_C( 1401.55), SIMDE_FLOAT32_C( 31425.61), SIMDE_FLOAT32_C( -7281.73), SIMDE_FLOAT32_C( 24657.12) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r; SIMDE_CONSTIFY_4_(simde_vmulq_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))), test_vec[i].lane, a, b); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0, 1000.0); simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0, 1000.0); int lane = simde_test_codegen_random_i8() & 3; simde_float32x4_t r; SIMDE_CONSTIFY_4_(simde_vmulq_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))), lane, a, b); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmulq_laneq_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[2]; simde_float64 b[2]; int lane; simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -8.30), SIMDE_FLOAT64_C( 39.42) }, { SIMDE_FLOAT64_C( -145.93), SIMDE_FLOAT64_C( -489.21) }, INT8_C( 0), { SIMDE_FLOAT64_C( 1211.22), SIMDE_FLOAT64_C( -5752.56) } }, { { SIMDE_FLOAT64_C( -947.61), SIMDE_FLOAT64_C( 662.96) }, { SIMDE_FLOAT64_C( 341.51), SIMDE_FLOAT64_C( 581.12) }, INT8_C( 1), { SIMDE_FLOAT64_C(-550675.12), SIMDE_FLOAT64_C(385259.32) } }, { { SIMDE_FLOAT64_C( 777.29), SIMDE_FLOAT64_C( 252.16) }, { SIMDE_FLOAT64_C( 955.20), SIMDE_FLOAT64_C( -332.93) }, INT8_C( 1), { SIMDE_FLOAT64_C(-258783.16), SIMDE_FLOAT64_C(-83951.63) } }, { { SIMDE_FLOAT64_C( 446.81), SIMDE_FLOAT64_C( -501.58) }, { SIMDE_FLOAT64_C( -807.53), SIMDE_FLOAT64_C( -797.54) }, INT8_C( 0), { SIMDE_FLOAT64_C(-360812.48), SIMDE_FLOAT64_C(405040.90) } }, { { SIMDE_FLOAT64_C( 357.00), SIMDE_FLOAT64_C( -144.04) }, { SIMDE_FLOAT64_C( 968.23), SIMDE_FLOAT64_C( -970.92) }, INT8_C( 0), { SIMDE_FLOAT64_C(345658.11), SIMDE_FLOAT64_C(-139463.85) } }, { { SIMDE_FLOAT64_C( 974.60), SIMDE_FLOAT64_C( 51.96) }, { SIMDE_FLOAT64_C( 988.71), SIMDE_FLOAT64_C( 292.59) }, INT8_C( 0), { SIMDE_FLOAT64_C(963596.77), SIMDE_FLOAT64_C( 51373.37) } }, { { SIMDE_FLOAT64_C( -177.10), SIMDE_FLOAT64_C( -715.71) }, { SIMDE_FLOAT64_C( -172.78), SIMDE_FLOAT64_C( 676.97) }, INT8_C( 1), { SIMDE_FLOAT64_C(-119891.39), SIMDE_FLOAT64_C(-484514.20) } }, { { SIMDE_FLOAT64_C( -585.24), SIMDE_FLOAT64_C( 729.36) }, { SIMDE_FLOAT64_C( -541.96), SIMDE_FLOAT64_C( 756.27) }, INT8_C( 1), { SIMDE_FLOAT64_C(-442599.45), SIMDE_FLOAT64_C(551593.09) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_float64x2_t r; SIMDE_CONSTIFY_2_(simde_vmulq_laneq_f64, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f64(SIMDE_FLOAT64_C(0.0))), test_vec[i].lane, a, b); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x2_t a = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); simde_float64x2_t b = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); int lane = simde_test_codegen_random_i8() & 1; simde_float64x2_t r; SIMDE_CONSTIFY_2_(simde_vmulq_laneq_f64, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f64(SIMDE_FLOAT64_C(0.0))), lane, a, b); simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmulq_laneq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t b[8]; int lane; int16_t r[8]; } test_vec[] = { { { -INT16_C( 15106), -INT16_C( 11576), INT16_C( 21525), -INT16_C( 22481), INT16_C( 8396), INT16_C( 4855), INT16_C( 6473), -INT16_C( 18119) }, { -INT16_C( 22892), INT16_C( 18029), -INT16_C( 30073), INT16_C( 4981), INT16_C( 31143), -INT16_C( 17613), INT16_C( 25903), INT16_C( 11615) }, INT8_C( 1), { INT16_C( 21542), INT16_C( 28456), -INT16_C( 29967), INT16_C( 30211), -INT16_C( 16676), -INT16_C( 25301), -INT16_C( 17899), INT16_C( 29509) } }, { { -INT16_C( 217), INT16_C( 31806), -INT16_C( 6354), INT16_C( 20040), INT16_C( 23262), -INT16_C( 2153), INT16_C( 20627), INT16_C( 14732) }, { -INT16_C( 11586), INT16_C( 18624), -INT16_C( 11449), -INT16_C( 15888), -INT16_C( 21754), INT16_C( 27632), INT16_C( 7434), INT16_C( 12948) }, INT8_C( 5), { -INT16_C( 32368), INT16_C( 25632), -INT16_C( 2784), INT16_C( 31616), -INT16_C( 1504), INT16_C( 14992), -INT16_C( 1328), INT16_C( 30528) } }, { { -INT16_C( 20781), -INT16_C( 17845), -INT16_C( 25866), INT16_C( 20632), -INT16_C( 28879), -INT16_C( 32029), INT16_C( 7195), -INT16_C( 4800) }, { -INT16_C( 30500), -INT16_C( 20683), -INT16_C( 2440), INT16_C( 9398), INT16_C( 8678), INT16_C( 814), INT16_C( 24758), -INT16_C( 30432) }, INT8_C( 6), { INT16_C( 27138), -INT16_C( 28334), INT16_C( 27364), INT16_C( 19472), INT16_C( 11478), INT16_C( 11618), INT16_C( 6962), -INT16_C( 21632) } }, { { INT16_C( 17260), INT16_C( 1540), INT16_C( 21723), INT16_C( 27191), -INT16_C( 18121), INT16_C( 21382), INT16_C( 29689), -INT16_C( 32209) }, { -INT16_C( 8280), -INT16_C( 24838), INT16_C( 7829), -INT16_C( 18812), -INT16_C( 30643), -INT16_C( 21140), -INT16_C( 2648), INT16_C( 5308) }, INT8_C( 0), { INT16_C( 21216), INT16_C( 28320), INT16_C( 29880), -INT16_C( 25320), INT16_C( 29976), -INT16_C( 30224), INT16_C( 616), INT16_C( 24536) } }, { { INT16_C( 6848), INT16_C( 5395), INT16_C( 32338), INT16_C( 2892), -INT16_C( 24572), INT16_C( 30469), -INT16_C( 30769), -INT16_C( 20960) }, { -INT16_C( 16767), -INT16_C( 24509), -INT16_C( 1469), -INT16_C( 13331), -INT16_C( 26010), INT16_C( 23667), -INT16_C( 30634), INT16_C( 6036) }, INT8_C( 2), { -INT16_C( 32704), INT16_C( 4601), INT16_C( 9078), INT16_C( 11492), -INT16_C( 14068), INT16_C( 2127), -INT16_C( 20179), -INT16_C( 11680) } }, { { INT16_C( 11432), INT16_C( 9972), INT16_C( 120), INT16_C( 6186), -INT16_C( 24315), -INT16_C( 29464), -INT16_C( 26943), -INT16_C( 32755) }, { -INT16_C( 21030), -INT16_C( 11069), -INT16_C( 29030), INT16_C( 13626), -INT16_C( 27135), -INT16_C( 30325), -INT16_C( 24021), -INT16_C( 11476) }, INT8_C( 6), { -INT16_C( 12232), -INT16_C( 3332), INT16_C( 1064), -INT16_C( 23794), INT16_C( 13783), INT16_C( 31480), INT16_C( 29803), -INT16_C( 17361) } }, { { -INT16_C( 1760), INT16_C( 8263), INT16_C( 24355), -INT16_C( 15323), -INT16_C( 20153), -INT16_C( 8570), INT16_C( 1727), INT16_C( 27832) }, { -INT16_C( 29495), INT16_C( 22279), INT16_C( 15558), INT16_C( 23896), -INT16_C( 7481), INT16_C( 27272), INT16_C( 23310), INT16_C( 11832) }, INT8_C( 4), { -INT16_C( 6176), -INT16_C( 15055), -INT16_C( 9675), INT16_C( 8899), INT16_C( 31793), INT16_C( 17962), -INT16_C( 9095), -INT16_C( 3320) } }, { { INT16_C( 20351), -INT16_C( 8329), INT16_C( 15220), INT16_C( 9766), INT16_C( 1217), -INT16_C( 14363), INT16_C( 20924), INT16_C( 18576) }, { -INT16_C( 6312), -INT16_C( 27633), INT16_C( 27712), INT16_C( 8796), -INT16_C( 14604), INT16_C( 20272), INT16_C( 24318), INT16_C( 32419) }, INT8_C( 5), { INT16_C( 6352), -INT16_C( 24752), -INT16_C( 3648), -INT16_C( 7904), INT16_C( 29488), INT16_C( 9712), INT16_C( 22336), INT16_C( 2816) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r; SIMDE_CONSTIFY_8_(simde_vmulq_laneq_s16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_s16(INT16_C(0))), test_vec[i].lane, a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); int lane = simde_test_codegen_random_i8() & 7; simde_int16x8_t r; SIMDE_CONSTIFY_8_(simde_vmulq_laneq_s16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_s16(INT16_C(0))), lane, a, b); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmulq_laneq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t b[4]; int lane; int32_t r[4]; } test_vec[] = { { { INT32_C( 1428315418), -INT32_C( 2011740029), INT32_C( 2118442541), INT32_C( 1456967023) }, { INT32_C( 144075676), -INT32_C( 1912817465), INT32_C( 1200376808), INT32_C( 150211310) }, INT8_C( 3), { INT32_C( 173904428), -INT32_C( 1554610230), INT32_C( 1066946518), INT32_C( 1840763698) } }, { { INT32_C( 1592417558), INT32_C( 1401647732), INT32_C( 1153567415), INT32_C( 1289754849) }, { INT32_C( 1729423535), INT32_C( 793813476), INT32_C( 924686125), -INT32_C( 1566694005) }, INT8_C( 2), { INT32_C( 832067294), -INT32_C( 752252316), INT32_C( 304665387), -INT32_C( 628222323) } }, { { -INT32_C( 67698551), -INT32_C( 1766700404), INT32_C( 611841548), -INT32_C( 1093418026) }, { INT32_C( 2040675287), INT32_C( 581357963), INT32_C( 330161902), INT32_C( 93736828) }, INT8_C( 0), { -INT32_C( 1589443057), -INT32_C( 8200300), -INT32_C( 1509136364), INT32_C( 627559098) } }, { { -INT32_C( 673447539), -INT32_C( 1427934541), -INT32_C( 1400895511), INT32_C( 377765851) }, { -INT32_C( 1281229343), -INT32_C( 2120105053), -INT32_C( 1057049232), -INT32_C( 670039221) }, INT8_C( 0), { -INT32_C( 1231899155), INT32_C( 1171633747), INT32_C( 148977865), -INT32_C( 955085189) } }, { { INT32_C( 1589161964), -INT32_C( 1723374958), INT32_C( 561312738), INT32_C( 1946389111) }, { -INT32_C( 300370389), INT32_C( 207591767), -INT32_C( 1688789097), INT32_C( 463417135) }, INT8_C( 2), { -INT32_C( 378447820), -INT32_C( 207039970), -INT32_C( 1655556018), -INT32_C( 146008783) } }, { { -INT32_C( 1183811241), -INT32_C( 1281619264), INT32_C( 153861502), INT32_C( 1983160256) }, { INT32_C( 1372464055), -INT32_C( 1578575230), INT32_C( 1641055026), INT32_C( 2051074851) }, INT8_C( 0), { INT32_C( 452960817), INT32_C( 545517376), -INT32_C( 493834478), -INT32_C( 34246080) } }, { { -INT32_C( 1172032336), -INT32_C( 1925589040), -INT32_C( 1571929598), -INT32_C( 1722170250) }, { INT32_C( 1830529682), INT32_C( 379567251), INT32_C( 2000224396), -INT32_C( 237259712) }, INT8_C( 0), { -INT32_C( 1305285536), INT32_C( 1195193504), INT32_C( 1487730980), INT32_C( 135948108) } }, { { -INT32_C( 675763201), INT32_C( 651773412), INT32_C( 1486715795), INT32_C( 2146055893) }, { INT32_C( 236083026), -INT32_C( 140826122), INT32_C( 372707938), -INT32_C( 302110482) }, INT8_C( 0), { -INT32_C( 444022610), -INT32_C( 1436492024), -INT32_C( 1580824042), INT32_C( 312603450) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r; SIMDE_CONSTIFY_4_(simde_vmulq_laneq_s32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_s32(INT32_C(0))), test_vec[i].lane, a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); int lane = simde_test_codegen_random_i8() & 3; simde_int32x4_t r; SIMDE_CONSTIFY_4_(simde_vmulq_laneq_s32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_s32(INT32_C(0))), lane, a, b); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmulq_laneq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t b[8]; int lane; uint16_t r[8]; } test_vec[] = { { { UINT16_C(50358), UINT16_C(64696), UINT16_C(57246), UINT16_C( 6543), UINT16_C(59260), UINT16_C(45806), UINT16_C(28370), UINT16_C(10500) }, { UINT16_C( 4992), UINT16_C(43296), UINT16_C( 6062), UINT16_C(49164), UINT16_C( 8782), UINT16_C(30638), UINT16_C(39969), UINT16_C(55115) }, INT8_C( 0), { UINT16_C(56576), UINT16_C( 1024), UINT16_C(35072), UINT16_C(25728), UINT16_C(61952), UINT16_C( 8448), UINT16_C(65280), UINT16_C(52736) } }, { { UINT16_C(54019), UINT16_C(58110), UINT16_C( 6243), UINT16_C(19038), UINT16_C( 4358), UINT16_C(29724), UINT16_C(17941), UINT16_C(10485) }, { UINT16_C(40550), UINT16_C(32214), UINT16_C(38826), UINT16_C(52683), UINT16_C(16965), UINT16_C(57838), UINT16_C(50573), UINT16_C(37186) }, INT8_C( 1), { UINT16_C(56194), UINT16_C(50772), UINT16_C(47554), UINT16_C( 4244), UINT16_C(10500), UINT16_C(47976), UINT16_C(54926), UINT16_C(56782) } }, { { UINT16_C(29504), UINT16_C(22780), UINT16_C(18130), UINT16_C(58207), UINT16_C(54115), UINT16_C(43512), UINT16_C( 8648), UINT16_C(26383) }, { UINT16_C(36087), UINT16_C(36369), UINT16_C(56919), UINT16_C(39636), UINT16_C(46540), UINT16_C(37415), UINT16_C(47351), UINT16_C(14379) }, INT8_C( 4), { UINT16_C( 5888), UINT16_C( 5328), UINT16_C(59736), UINT16_C(23220), UINT16_C(29156), UINT16_C(51616), UINT16_C(21344), UINT16_C(47860) } }, { { UINT16_C(36903), UINT16_C(28158), UINT16_C(57839), UINT16_C(50128), UINT16_C(31193), UINT16_C(64139), UINT16_C(62088), UINT16_C( 5362) }, { UINT16_C(32772), UINT16_C(57964), UINT16_C( 1620), UINT16_C( 2735), UINT16_C(16685), UINT16_C(58881), UINT16_C(14700), UINT16_C(37650) }, INT8_C( 2), { UINT16_C(14028), UINT16_C( 2904), UINT16_C(48236), UINT16_C( 8256), UINT16_C( 4404), UINT16_C(30620), UINT16_C(50336), UINT16_C(35688) } }, { { UINT16_C( 16), UINT16_C(61881), UINT16_C(31953), UINT16_C(19146), UINT16_C(50440), UINT16_C(64211), UINT16_C(59319), UINT16_C(14334) }, { UINT16_C(57683), UINT16_C(22924), UINT16_C(38544), UINT16_C(53639), UINT16_C(28055), UINT16_C(53565), UINT16_C(53375), UINT16_C(36763) }, INT8_C( 0), { UINT16_C( 5424), UINT16_C(63483), UINT16_C(10435), UINT16_C(51582), UINT16_C(59800), UINT16_C(50537), UINT16_C(63317), UINT16_C(25946) } }, { { UINT16_C(32852), UINT16_C(53665), UINT16_C(60490), UINT16_C( 4057), UINT16_C(54207), UINT16_C(42694), UINT16_C(65234), UINT16_C(46074) }, { UINT16_C(21386), UINT16_C( 8259), UINT16_C( 5338), UINT16_C(18359), UINT16_C(34897), UINT16_C( 8646), UINT16_C(21795), UINT16_C(30961) }, INT8_C( 5), { UINT16_C( 5368), UINT16_C(58246), UINT16_C(19260), UINT16_C(15062), UINT16_C(25786), UINT16_C(33572), UINT16_C(10348), UINT16_C(27996) } }, { { UINT16_C(18835), UINT16_C(32544), UINT16_C(12066), UINT16_C(62782), UINT16_C(58614), UINT16_C(62663), UINT16_C(31454), UINT16_C(12926) }, { UINT16_C(40637), UINT16_C(53516), UINT16_C(21589), UINT16_C(56866), UINT16_C(17178), UINT16_C(28673), UINT16_C(31029), UINT16_C(51269) }, INT8_C( 2), { UINT16_C(43471), UINT16_C(46496), UINT16_C(52810), UINT16_C(50582), UINT16_C(48558), UINT16_C(37395), UINT16_C(41910), UINT16_C( 7126) } }, { { UINT16_C(18277), UINT16_C(38372), UINT16_C(55941), UINT16_C(27019), UINT16_C(32673), UINT16_C( 7240), UINT16_C(31485), UINT16_C(39897) }, { UINT16_C(43910), UINT16_C(56048), UINT16_C(52941), UINT16_C( 4597), UINT16_C(26064), UINT16_C(18758), UINT16_C( 3754), UINT16_C( 4108) }, INT8_C( 5), { UINT16_C(21150), UINT16_C( 88), UINT16_C(44382), UINT16_C(32514), UINT16_C(52998), UINT16_C(17328), UINT16_C(50734), UINT16_C(32342) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r; SIMDE_CONSTIFY_8_(simde_vmulq_laneq_u16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_u16(INT16_C(0))), test_vec[i].lane, a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); int lane = simde_test_codegen_random_i8() & 7; simde_uint16x8_t r; SIMDE_CONSTIFY_8_(simde_vmulq_laneq_u16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_u16(UINT16_C(0))), lane, a, b); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmulq_laneq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint32_t b[4]; int lane; uint32_t r[4]; } test_vec[] = { { { UINT32_C( 133424210), UINT32_C(3567681185), UINT32_C( 168554144), UINT32_C(2467885481) }, { UINT32_C(3585083847), UINT32_C(2741717858), UINT32_C(2755662601), UINT32_C(1393839105) }, INT8_C( 0), { UINT32_C(4093468094), UINT32_C( 730863655), UINT32_C(1755979360), UINT32_C(4195535711) } }, { { UINT32_C(2261932808), UINT32_C(4012287489), UINT32_C(2744660401), UINT32_C(1382689609) }, { UINT32_C( 666189788), UINT32_C(3190839467), UINT32_C(3854554265), UINT32_C(4027978728) }, INT8_C( 2), { UINT32_C(3658680264), UINT32_C(2941323929), UINT32_C( 925583049), UINT32_C(3317912225) } }, { { UINT32_C(2389669864), UINT32_C(3460325021), UINT32_C( 572056566), UINT32_C(2382260813) }, { UINT32_C(2016945439), UINT32_C( 689043029), UINT32_C(3373397686), UINT32_C(4097245708) }, INT8_C( 1), { UINT32_C(2258878472), UINT32_C(3283568673), UINT32_C(3332904110), UINT32_C( 62671249) } }, { { UINT32_C( 68584358), UINT32_C(2801460675), UINT32_C(1743985917), UINT32_C(1065844762) }, { UINT32_C(2945777592), UINT32_C( 107330832), UINT32_C(3507695311), UINT32_C( 189400933) }, INT8_C( 2), { UINT32_C(2771273530), UINT32_C(1520554669), UINT32_C(1888544403), UINT32_C(1937981702) } }, { { UINT32_C(1162678112), UINT32_C( 641921801), UINT32_C(1732291303), UINT32_C( 807436337) }, { UINT32_C(3527462676), UINT32_C(1654736436), UINT32_C(1640460889), UINT32_C( 485216956) }, INT8_C( 1), { UINT32_C(2436587392), UINT32_C(1589040084), UINT32_C(2435113196), UINT32_C( 175665140) } }, { { UINT32_C( 753558073), UINT32_C(1309872292), UINT32_C(3514792785), UINT32_C(1793437595) }, { UINT32_C( 916371439), UINT32_C(3398369368), UINT32_C(2575757511), UINT32_C( 360358876) }, INT8_C( 1), { UINT32_C(1214825368), UINT32_C(2317989984), UINT32_C(3490800600), UINT32_C( 612982088) } }, { { UINT32_C(1974026596), UINT32_C(3502700629), UINT32_C( 661362551), UINT32_C( 873911676) }, { UINT32_C(1938574451), UINT32_C(3426375644), UINT32_C(2158547933), UINT32_C(2978331981) }, INT8_C( 3), { UINT32_C(1982037780), UINT32_C(2052616849), UINT32_C(1107454411), UINT32_C( 736543308) } }, { { UINT32_C( 659826223), UINT32_C(2208179436), UINT32_C(1677772175), UINT32_C( 685192412) }, { UINT32_C( 386157248), UINT32_C(1475727748), UINT32_C( 933524857), UINT32_C( 708204027) }, INT8_C( 0), { UINT32_C(2749512000), UINT32_C( 888662272), UINT32_C( 686522688), UINT32_C(1847540992) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r; SIMDE_CONSTIFY_4_(simde_vmulq_laneq_u32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_u32(UINT32_C(0))), test_vec[i].lane, a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); int lane = simde_test_codegen_random_i8() & 3; simde_uint32x4_t r; SIMDE_CONSTIFY_4_(simde_vmulq_laneq_u32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_u32(UINT32_C(0))), lane, a, b); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, lane), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } HEDLEY_DIAGNOSTIC_POP SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vmul_lane_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vmul_lane_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vmul_lane_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmul_lane_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmul_lane_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmul_lane_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_lane_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_lane_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_lane_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_lane_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_lane_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_lane_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_laneq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_laneq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_laneq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_laneq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_laneq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_laneq_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/mul_n.c000066400000000000000000000766521400333146700166150ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN mul_n #include "test-neon.h" #include "../../../simde/arm/neon/mul_n.h" static int test_simde_vmul_n_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; simde_float32 b; simde_float32 r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( -322.57), SIMDE_FLOAT32_C( 102.45) }, SIMDE_FLOAT32_C( 338.06), { SIMDE_FLOAT32_C(-109048.02), SIMDE_FLOAT32_C( 34634.25) } }, { { SIMDE_FLOAT32_C( 826.06), SIMDE_FLOAT32_C( 35.18) }, SIMDE_FLOAT32_C( -55.56), { SIMDE_FLOAT32_C(-45895.89), SIMDE_FLOAT32_C( -1954.60) } }, { { SIMDE_FLOAT32_C( 331.09), SIMDE_FLOAT32_C( 34.30) }, SIMDE_FLOAT32_C( -93.48), { SIMDE_FLOAT32_C(-30950.29), SIMDE_FLOAT32_C( -3206.36) } }, { { SIMDE_FLOAT32_C( -539.02), SIMDE_FLOAT32_C( -424.49) }, SIMDE_FLOAT32_C( 433.22), { SIMDE_FLOAT32_C(-233514.25), SIMDE_FLOAT32_C(-183897.55) } }, { { SIMDE_FLOAT32_C( -259.56), SIMDE_FLOAT32_C( -381.86) }, SIMDE_FLOAT32_C( -747.65), { SIMDE_FLOAT32_C(194060.03), SIMDE_FLOAT32_C(285497.62) } }, { { SIMDE_FLOAT32_C( -602.55), SIMDE_FLOAT32_C( 464.29) }, SIMDE_FLOAT32_C( 164.24), { SIMDE_FLOAT32_C(-98962.81), SIMDE_FLOAT32_C( 76254.99) } }, { { SIMDE_FLOAT32_C( -454.97), SIMDE_FLOAT32_C( -825.36) }, SIMDE_FLOAT32_C( -63.46), { SIMDE_FLOAT32_C( 28872.40), SIMDE_FLOAT32_C( 52377.34) } }, { { SIMDE_FLOAT32_C( -885.16), SIMDE_FLOAT32_C( -707.21) }, SIMDE_FLOAT32_C( 474.46), { SIMDE_FLOAT32_C(-419973.00), SIMDE_FLOAT32_C(-335542.88) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32 b = test_vec[i].b; simde_float32x2_t r = simde_vmul_n_f32(a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32 b = simde_test_codegen_random_f32(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vmul_n_f32(a, b); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_f32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmul_n_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[1]; simde_float64 b; simde_float64 r[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( 8.29) }, SIMDE_FLOAT64_C( 136.66), { SIMDE_FLOAT64_C( 1132.91) } }, { { SIMDE_FLOAT64_C( 594.35) }, SIMDE_FLOAT64_C( 690.12), { SIMDE_FLOAT64_C(410172.82) } }, { { SIMDE_FLOAT64_C( 410.65) }, SIMDE_FLOAT64_C( -245.50), { SIMDE_FLOAT64_C(-100814.57) } }, { { SIMDE_FLOAT64_C( -390.11) }, SIMDE_FLOAT64_C( -996.89), { SIMDE_FLOAT64_C(388896.76) } }, { { SIMDE_FLOAT64_C( -725.18) }, SIMDE_FLOAT64_C( 771.24), { SIMDE_FLOAT64_C(-559287.82) } }, { { SIMDE_FLOAT64_C( -382.17) }, SIMDE_FLOAT64_C( 80.74), { SIMDE_FLOAT64_C(-30856.41) } }, { { SIMDE_FLOAT64_C( 57.06) }, SIMDE_FLOAT64_C( 152.89), { SIMDE_FLOAT64_C( 8723.90) } }, { { SIMDE_FLOAT64_C( -971.23) }, SIMDE_FLOAT64_C( 806.94), { SIMDE_FLOAT64_C(-783724.34) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64 b = test_vec[i].b; simde_float64x1_t r = simde_vmul_n_f64(a, b); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x1_t a = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_float64 b = simde_test_codegen_random_f64(-1000.0f, 1000.0f); simde_float64x1_t r = simde_vmul_n_f64(a, b); simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_f64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmul_n_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t b; int16_t r[4]; } test_vec[] = { { { -INT16_C( 11569), -INT16_C( 13956), INT16_C( 1331), INT16_C( 9314) }, -INT16_C( 30067), { -INT16_C( 19965), -INT16_C( 11956), INT16_C( 23319), -INT16_C( 8710) } }, { { -INT16_C( 13683), INT16_C( 14392), -INT16_C( 22788), -INT16_C( 4789) }, -INT16_C( 30281), { INT16_C( 16331), INT16_C( 10248), INT16_C( 14884), -INT16_C( 15459) } }, { { -INT16_C( 21076), -INT16_C( 4703), -INT16_C( 32359), INT16_C( 15317) }, INT16_C( 29744), { -INT16_C( 32704), -INT16_C( 32208), -INT16_C( 24400), -INT16_C( 17424) } }, { { -INT16_C( 213), -INT16_C( 22714), INT16_C( 31432), INT16_C( 10924) }, INT16_C( 14750), { INT16_C( 3978), -INT16_C( 11468), INT16_C( 20336), -INT16_C( 24024) } }, { { INT16_C( 11444), -INT16_C( 5116), INT16_C( 100), -INT16_C( 20333) }, INT16_C( 19181), { INT16_C( 27300), -INT16_C( 22604), INT16_C( 17556), -INT16_C( 2537) } }, { { -INT16_C( 26311), -INT16_C( 9481), -INT16_C( 28282), INT16_C( 23643) }, -INT16_C( 29748), { INT16_C( 3180), -INT16_C( 26156), -INT16_C( 18232), INT16_C( 388) } }, { { -INT16_C( 2096), INT16_C( 5770), INT16_C( 21151), INT16_C( 19344) }, INT16_C( 12156), { INT16_C( 14528), INT16_C( 16600), INT16_C( 13828), INT16_C( 2496) } }, { { INT16_C( 12421), -INT16_C( 30373), -INT16_C( 16611), -INT16_C( 20343) }, INT16_C( 30575), { -INT16_C( 9045), -INT16_C( 9355), INT16_C( 22675), INT16_C( 14951) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); int16_t b = test_vec[i].b; simde_int16x4_t r = simde_vmul_n_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); int16_t b = simde_test_codegen_random_i16(); simde_int16x4_t r = simde_vmul_n_s16(a, b); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmul_n_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t b; int32_t r[2]; } test_vec[] = { { { -INT32_C( 2046384268), -INT32_C( 937882563) }, -INT32_C( 653222027), { INT32_C( 776705028), INT32_C( 2061269729) } }, { { INT32_C( 1723824725), -INT32_C( 838833157) }, INT32_C( 67083436), { -INT32_C( 1864565476), INT32_C( 706379940) } }, { { INT32_C( 474729031), INT32_C( 124028051) }, INT32_C( 1770875436), { -INT32_C( 958191052), INT32_C( 874686276) } }, { { -INT32_C( 349067658), -INT32_C( 1681571515) }, -INT32_C( 1241414469), { -INT32_C( 242257870), -INT32_C( 838647449) } }, { { -INT32_C( 1669004560), -INT32_C( 442530658) }, -INT32_C( 519968178), { INT32_C( 942032160), INT32_C( 678482980) } }, { { -INT32_C( 1679268497), INT32_C( 1174697680) }, INT32_C( 1647457564), { INT32_C( 2014909220), INT32_C( 1773539008) } }, { { INT32_C( 838727542), INT32_C( 1810366075) }, -INT32_C( 1626903552), { INT32_C( 849332224), INT32_C( 968418304) } }, { { INT32_C( 1065658097), INT32_C( 2197137) }, -INT32_C( 1147467285), { INT32_C( 395229243), -INT32_C( 1320378341) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); int32_t b = test_vec[i].b; simde_int32x2_t r = simde_vmul_n_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); int32_t b = simde_test_codegen_random_i32(); simde_int32x2_t r = simde_vmul_n_s32(a, b); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmul_n_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t b; uint16_t r[4]; } test_vec[] = { { { UINT16_C(10900), UINT16_C(51102), UINT16_C(21030), UINT16_C(24060) }, UINT16_C(48791), { UINT16_C(62796), UINT16_C( 562), UINT16_C(43114), UINT16_C(30628) } }, { { UINT16_C( 8504), UINT16_C(37652), UINT16_C( 4524), UINT16_C(29706) }, UINT16_C(34360), { UINT16_C(37952), UINT16_C(42080), UINT16_C(58784), UINT16_C(40496) } }, { { UINT16_C(24879), UINT16_C(36045), UINT16_C(40790), UINT16_C(53145) }, UINT16_C(33486), { UINT16_C( 4562), UINT16_C(26358), UINT16_C(58164), UINT16_C(48926) } }, { { UINT16_C(25144), UINT16_C(54957), UINT16_C(54058), UINT16_C( 9768) }, UINT16_C(48944), { UINT16_C(12928), UINT16_C(21360), UINT16_C(60896), UINT16_C(65408) } }, { { UINT16_C(26852), UINT16_C(63712), UINT16_C(36091), UINT16_C( 1290) }, UINT16_C(16897), { UINT16_C(12516), UINT16_C(47328), UINT16_C(17147), UINT16_C(39178) } }, { { UINT16_C(12427), UINT16_C(22691), UINT16_C(63932), UINT16_C(22008) }, UINT16_C(50889), { UINT16_C(40739), UINT16_C(43515), UINT16_C(31900), UINT16_C(20408) } }, { { UINT16_C( 472), UINT16_C(34089), UINT16_C(21463), UINT16_C(65368) }, UINT16_C(35193), { UINT16_C(30488), UINT16_C(57697), UINT16_C(44959), UINT16_C(51352) } }, { { UINT16_C(23999), UINT16_C(40945), UINT16_C(60758), UINT16_C(24620) }, UINT16_C(11762), { UINT16_C(12686), UINT16_C(36562), UINT16_C(31052), UINT16_C(42392) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); uint16_t b = test_vec[i].b; simde_uint16x4_t r = simde_vmul_n_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); uint16_t b = simde_test_codegen_random_u16(); simde_uint16x4_t r = simde_vmul_n_u16(a, b); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmul_n_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint32_t b; uint32_t r[2]; } test_vec[] = { { { UINT32_C(3164635766), UINT32_C(1397605284) }, UINT32_C(2149061254), { UINT32_C(1077974468), UINT32_C(1775628248) } }, { { UINT32_C( 229242173), UINT32_C(2073123090) }, UINT32_C(2781432267), { UINT32_C( 714660959), UINT32_C(4157195078) } }, { { UINT32_C(2999833826), UINT32_C( 621473455) }, UINT32_C( 367110768), { UINT32_C(2383230688), UINT32_C(2585240208) } }, { { UINT32_C(4100468334), UINT32_C(2121564225) }, UINT32_C(2290818677), { UINT32_C(2395479110), UINT32_C(2643540917) } }, { { UINT32_C(1124277623), UINT32_C(1239993446) }, UINT32_C(1341896096), { UINT32_C(2993982816), UINT32_C(2886065600) } }, { { UINT32_C( 343213731), UINT32_C( 506025648) }, UINT32_C(3323171205), { UINT32_C(2041038255), UINT32_C(2762193264) } }, { { UINT32_C(2252637969), UINT32_C( 487510181) }, UINT32_C(1398804973), { UINT32_C( 63515581), UINT32_C(2675056065) } }, { { UINT32_C(2124171485), UINT32_C(2714605822) }, UINT32_C(1320501918), { UINT32_C(1233056358), UINT32_C(1844177092) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); uint32_t b = test_vec[i].b; simde_uint32x2_t r = simde_vmul_n_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); uint32_t b = simde_test_codegen_random_u32(); simde_uint32x2_t r = simde_vmul_n_u32(a, b); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmulq_n_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; simde_float32 b; simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 715.05), SIMDE_FLOAT32_C( -716.07), SIMDE_FLOAT32_C( -502.73), SIMDE_FLOAT32_C( 871.00) }, SIMDE_FLOAT32_C( 429.18), { SIMDE_FLOAT32_C(306885.16), SIMDE_FLOAT32_C(-307322.91), SIMDE_FLOAT32_C(-215761.66), SIMDE_FLOAT32_C(373815.78) } }, { { SIMDE_FLOAT32_C( -532.27), SIMDE_FLOAT32_C( -766.37), SIMDE_FLOAT32_C( -989.32), SIMDE_FLOAT32_C( 215.47) }, SIMDE_FLOAT32_C( 230.94), { SIMDE_FLOAT32_C(-122922.44), SIMDE_FLOAT32_C(-176985.48), SIMDE_FLOAT32_C(-228473.56), SIMDE_FLOAT32_C( 49760.64) } }, { { SIMDE_FLOAT32_C( -948.09), SIMDE_FLOAT32_C( -787.27), SIMDE_FLOAT32_C( -800.69), SIMDE_FLOAT32_C( -447.45) }, SIMDE_FLOAT32_C( 14.29), { SIMDE_FLOAT32_C(-13548.21), SIMDE_FLOAT32_C(-11250.09), SIMDE_FLOAT32_C(-11441.86), SIMDE_FLOAT32_C( -6394.06) } }, { { SIMDE_FLOAT32_C( -572.66), SIMDE_FLOAT32_C( 567.13), SIMDE_FLOAT32_C( -909.19), SIMDE_FLOAT32_C( 166.79) }, SIMDE_FLOAT32_C( -610.23), { SIMDE_FLOAT32_C(349454.28), SIMDE_FLOAT32_C(-346079.72), SIMDE_FLOAT32_C(554815.00), SIMDE_FLOAT32_C(-101780.26) } }, { { SIMDE_FLOAT32_C( 231.91), SIMDE_FLOAT32_C( 484.47), SIMDE_FLOAT32_C( 543.89), SIMDE_FLOAT32_C( 817.92) }, SIMDE_FLOAT32_C( -700.44), { SIMDE_FLOAT32_C(-162439.05), SIMDE_FLOAT32_C(-339342.16), SIMDE_FLOAT32_C(-380962.31), SIMDE_FLOAT32_C(-572903.88) } }, { { SIMDE_FLOAT32_C( 709.18), SIMDE_FLOAT32_C( 81.85), SIMDE_FLOAT32_C( -425.91), SIMDE_FLOAT32_C( 317.14) }, SIMDE_FLOAT32_C( 890.53), { SIMDE_FLOAT32_C(631546.06), SIMDE_FLOAT32_C( 72889.88), SIMDE_FLOAT32_C(-379285.66), SIMDE_FLOAT32_C(282422.72) } }, { { SIMDE_FLOAT32_C( 108.81), SIMDE_FLOAT32_C( 32.18), SIMDE_FLOAT32_C( -825.54), SIMDE_FLOAT32_C( 606.08) }, SIMDE_FLOAT32_C( -96.81), { SIMDE_FLOAT32_C(-10533.90), SIMDE_FLOAT32_C( -3115.35), SIMDE_FLOAT32_C( 79920.52), SIMDE_FLOAT32_C(-58674.61) } }, { { SIMDE_FLOAT32_C( 603.63), SIMDE_FLOAT32_C( -926.19), SIMDE_FLOAT32_C( 136.81), SIMDE_FLOAT32_C( 614.31) }, SIMDE_FLOAT32_C( 289.28), { SIMDE_FLOAT32_C(174618.09), SIMDE_FLOAT32_C(-267928.25), SIMDE_FLOAT32_C( 39576.39), SIMDE_FLOAT32_C(177707.59) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32 b = test_vec[i].b; simde_float32x4_t r = simde_vmulq_n_f32(a, b); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32 b = simde_test_codegen_random_f32(-1000.0f, 1000.0); simde_float32x4_t r = simde_vmulq_n_f32(a, b); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_f32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmulq_n_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[2]; simde_float64 b; simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -876.34), SIMDE_FLOAT64_C( -946.04) }, SIMDE_FLOAT64_C( -921.46), { SIMDE_FLOAT64_C(807512.26), SIMDE_FLOAT64_C(871738.02) } }, { { SIMDE_FLOAT64_C( -297.61), SIMDE_FLOAT64_C( 953.41) }, SIMDE_FLOAT64_C( -273.22), { SIMDE_FLOAT64_C( 81313.00), SIMDE_FLOAT64_C(-260490.68) } }, { { SIMDE_FLOAT64_C( -449.19), SIMDE_FLOAT64_C( 633.81) }, SIMDE_FLOAT64_C( -282.52), { SIMDE_FLOAT64_C(126905.16), SIMDE_FLOAT64_C(-179064.00) } }, { { SIMDE_FLOAT64_C( 17.47), SIMDE_FLOAT64_C( 267.43) }, SIMDE_FLOAT64_C( -863.41), { SIMDE_FLOAT64_C(-15083.77), SIMDE_FLOAT64_C(-230901.74) } }, { { SIMDE_FLOAT64_C( -236.05), SIMDE_FLOAT64_C( 920.75) }, SIMDE_FLOAT64_C( -470.13), { SIMDE_FLOAT64_C(110974.19), SIMDE_FLOAT64_C(-432872.20) } }, { { SIMDE_FLOAT64_C( -667.31), SIMDE_FLOAT64_C( 277.78) }, SIMDE_FLOAT64_C( 38.32), { SIMDE_FLOAT64_C(-25571.32), SIMDE_FLOAT64_C( 10644.53) } }, { { SIMDE_FLOAT64_C( 502.96), SIMDE_FLOAT64_C( -260.05) }, SIMDE_FLOAT64_C( 348.52), { SIMDE_FLOAT64_C(175291.62), SIMDE_FLOAT64_C(-90632.63) } }, { { SIMDE_FLOAT64_C( -87.91), SIMDE_FLOAT64_C( -360.14) }, SIMDE_FLOAT64_C( 845.79), { SIMDE_FLOAT64_C(-74353.40), SIMDE_FLOAT64_C(-304602.81) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64 b = test_vec[i].b; simde_float64x2_t r = simde_vmulq_n_f64(a, b); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x2_t a = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); simde_float64 b = simde_test_codegen_random_f64(-1000.0, 1000.0); simde_float64x2_t r = simde_vmulq_n_f64(a, b); simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_f64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmulq_n_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t b; int16_t r[8]; } test_vec[] = { { { INT16_C( 29355), INT16_C( 8535), INT16_C( 30880), -INT16_C( 2113), -INT16_C( 18419), INT16_C( 15569), INT16_C( 3061), INT16_C( 20353) }, INT16_C( 29090), { INT16_C( 2870), -INT16_C( 32754), -INT16_C( 2752), INT16_C( 5598), INT16_C( 13626), -INT16_C( 17086), -INT16_C( 18934), INT16_C( 16546) } }, { { INT16_C( 21959), -INT16_C( 25743), INT16_C( 25094), -INT16_C( 30043), INT16_C( 2684), INT16_C( 14912), -INT16_C( 5362), INT16_C( 26029) }, INT16_C( 19724), { -INT16_C( 8108), INT16_C( 17996), INT16_C( 26184), INT16_C( 8380), -INT16_C( 13872), -INT16_C( 1280), INT16_C( 15016), -INT16_C( 13028) } }, { { -INT16_C( 13091), -INT16_C( 5308), INT16_C( 5508), INT16_C( 31015), -INT16_C( 22240), -INT16_C( 15672), -INT16_C( 28646), -INT16_C( 29928) }, INT16_C( 7723), { INT16_C( 20255), INT16_C( 31852), INT16_C( 5420), -INT16_C( 5235), INT16_C( 10336), INT16_C( 10136), INT16_C( 16478), INT16_C( 11528) } }, { { -INT16_C( 12050), INT16_C( 27304), -INT16_C( 5925), -INT16_C( 5724), INT16_C( 20947), -INT16_C( 8370), INT16_C( 11422), -INT16_C( 7509) }, INT16_C( 12055), { INT16_C( 30562), INT16_C( 27928), INT16_C( 8365), INT16_C( 6588), INT16_C( 5877), INT16_C( 25090), INT16_C( 1074), -INT16_C( 15779) } }, { { INT16_C( 16119), INT16_C( 6056), INT16_C( 29159), INT16_C( 730), -INT16_C( 3583), INT16_C( 11405), INT16_C( 31504), -INT16_C( 18180) }, -INT16_C( 10267), { -INT16_C( 15373), INT16_C( 16712), -INT16_C( 7005), -INT16_C( 23806), INT16_C( 20965), INT16_C( 17697), -INT16_C( 31408), INT16_C( 7532) } }, { { -INT16_C( 30048), INT16_C( 29632), INT16_C( 4059), INT16_C( 31314), -INT16_C( 453), INT16_C( 21084), INT16_C( 21549), -INT16_C( 10608) }, INT16_C( 30827), { -INT16_C( 3872), INT16_C( 24896), INT16_C( 18569), -INT16_C( 28602), -INT16_C( 5463), -INT16_C( 29580), INT16_C( 18127), INT16_C( 11824) } }, { { INT16_C( 17735), INT16_C( 18554), INT16_C( 1847), INT16_C( 18292), INT16_C( 28803), INT16_C( 26879), -INT16_C( 24760), INT16_C( 2290) }, -INT16_C( 12782), { INT16_C( 254), INT16_C( 17556), -INT16_C( 15394), INT16_C( 24104), INT16_C( 21302), -INT16_C( 27666), INT16_C( 8976), INT16_C( 23812) } }, { { INT16_C( 25879), INT16_C( 21064), -INT16_C( 23453), -INT16_C( 28508), INT16_C( 13816), INT16_C( 25702), -INT16_C( 21075), INT16_C( 10153) }, -INT16_C( 7691), { -INT16_C( 2557), INT16_C( 1768), INT16_C( 21951), -INT16_C( 28428), -INT16_C( 25000), -INT16_C( 17506), INT16_C( 17297), INT16_C( 32189) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); int16_t b = test_vec[i].b; simde_int16x8_t r = simde_vmulq_n_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); int16_t b = simde_test_codegen_random_i16(); simde_int16x8_t r = simde_vmulq_n_s16(a, b); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmulq_n_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t b; int32_t r[4]; } test_vec[] = { { { INT32_C( 1259590867), INT32_C( 1715745376), -INT32_C( 2072571461), INT32_C( 1807423984) }, -INT32_C( 1921903951), { INT32_C( 1252195299), -INT32_C( 814640032), -INT32_C( 640128949), INT32_C( 1628425968) } }, { { INT32_C( 953218746), INT32_C( 1297004900), -INT32_C( 1560843569), -INT32_C( 1628566722) }, -INT32_C( 133942723), { INT32_C( 1692606034), -INT32_C( 1853377324), INT32_C( 1527570771), -INT32_C( 1713190458) } }, { { INT32_C( 880573251), INT32_C( 1184839828), INT32_C( 80941386), INT32_C( 1815978760) }, INT32_C( 750357341), { INT32_C( 1066477655), INT32_C( 1854138820), -INT32_C( 1853196830), -INT32_C( 89026072) } }, { { INT32_C( 869183989), -INT32_C( 103695172), INT32_C( 854775278), -INT32_C( 446271919) }, -INT32_C( 265615962), { INT32_C( 900214238), INT32_C( 172232168), -INT32_C( 694264236), INT32_C( 942546310) } }, { { INT32_C( 536215319), -INT32_C( 7654750), -INT32_C( 1305786947), -INT32_C( 1293550858) }, -INT32_C( 1515473226), { -INT32_C( 744836006), -INT32_C( 550819028), -INT32_C( 728305058), -INT32_C( 532853020) } }, { { -INT32_C( 589849205), -INT32_C( 1295893236), INT32_C( 1503915330), -INT32_C( 1887921940) }, -INT32_C( 2020735798), { INT32_C( 1353874350), INT32_C( 1189108600), -INT32_C( 1280688620), INT32_C( 1584683576) } }, { { INT32_C( 1060747849), INT32_C( 1794186932), INT32_C( 1594858708), INT32_C( 1178396218) }, INT32_C( 1727594019), { INT32_C( 2020664827), INT32_C( 1372441244), -INT32_C( 501430020), INT32_C( 720438766) } }, { { -INT32_C( 658531349), -INT32_C( 43567053), -INT32_C( 2054884036), INT32_C( 1673837999) }, -INT32_C( 1345407525), { INT32_C( 21072137), -INT32_C( 669834591), -INT32_C( 1805044140), -INT32_C( 1220739147) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); int32_t b = test_vec[i].b; simde_int32x4_t r = simde_vmulq_n_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); int32_t b = simde_test_codegen_random_i32(); simde_int32x4_t r = simde_vmulq_n_s32(a, b); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmulq_n_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t b; uint16_t r[8]; } test_vec[] = { { { UINT16_C( 5555), UINT16_C(58708), UINT16_C(41599), UINT16_C(46026), UINT16_C(46713), UINT16_C(17683), UINT16_C(45334), UINT16_C(64079) }, UINT16_C(28197), { UINT16_C( 3295), UINT16_C(15652), UINT16_C( 3675), UINT16_C(51250), UINT16_C(23933), UINT16_C( 9663), UINT16_C( 3118), UINT16_C( 8043) } }, { { UINT16_C(18396), UINT16_C(16510), UINT16_C(15761), UINT16_C(12772), UINT16_C( 2859), UINT16_C(49835), UINT16_C(24432), UINT16_C(50392) }, UINT16_C(22340), { UINT16_C(55920), UINT16_C(62328), UINT16_C(41348), UINT16_C(48272), UINT16_C(37996), UINT16_C(53868), UINT16_C(27072), UINT16_C(45408) } }, { { UINT16_C( 3686), UINT16_C(57354), UINT16_C( 7621), UINT16_C(56101), UINT16_C(29902), UINT16_C(62677), UINT16_C(45538), UINT16_C(24635) }, UINT16_C(52466), { UINT16_C(58476), UINT16_C(49524), UINT16_C( 8250), UINT16_C(42234), UINT16_C(37564), UINT16_C(11610), UINT16_C(16292), UINT16_C(64454) } }, { { UINT16_C(54941), UINT16_C(51709), UINT16_C(43489), UINT16_C(20875), UINT16_C(25352), UINT16_C(19477), UINT16_C(31675), UINT16_C(50523) }, UINT16_C( 8283), { UINT16_C(59855), UINT16_C(27887), UINT16_C(33531), UINT16_C(23657), UINT16_C(13272), UINT16_C(43895), UINT16_C(23417), UINT16_C(34649) } }, { { UINT16_C(32995), UINT16_C(45563), UINT16_C(53493), UINT16_C(55205), UINT16_C(57730), UINT16_C(29752), UINT16_C(54701), UINT16_C(43850) }, UINT16_C(11166), { UINT16_C(44314), UINT16_C( 490), UINT16_C( 7734), UINT16_C(52950), UINT16_C( 1084), UINT16_C( 8848), UINT16_C(61382), UINT16_C( 9644) } }, { { UINT16_C(10836), UINT16_C(23676), UINT16_C(37261), UINT16_C(18600), UINT16_C( 780), UINT16_C(26638), UINT16_C(61731), UINT16_C( 7912) }, UINT16_C(56738), { UINT16_C(19752), UINT16_C(37496), UINT16_C(54330), UINT16_C( 592), UINT16_C(18840), UINT16_C(61148), UINT16_C(53030), UINT16_C(54992) } }, { { UINT16_C(18671), UINT16_C(29109), UINT16_C(60713), UINT16_C(55013), UINT16_C(12226), UINT16_C(24961), UINT16_C(54618), UINT16_C(54923) }, UINT16_C( 6193), { UINT16_C(23999), UINT16_C(48037), UINT16_C(15577), UINT16_C(39381), UINT16_C(21538), UINT16_C(49585), UINT16_C(17978), UINT16_C( 6299) } }, { { UINT16_C(55911), UINT16_C(29537), UINT16_C(28637), UINT16_C( 475), UINT16_C(50272), UINT16_C( 543), UINT16_C( 3745), UINT16_C(22090) }, UINT16_C(29567), { UINT16_C(40473), UINT16_C(53279), UINT16_C(50595), UINT16_C(19621), UINT16_C(35744), UINT16_C(64097), UINT16_C(38111), UINT16_C( 3254) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); uint16_t b = test_vec[i].b; simde_uint16x8_t r = simde_vmulq_n_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); uint16_t b = simde_test_codegen_random_u16(); simde_uint16x8_t r = simde_vmulq_n_u16(a, b); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmulq_n_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint32_t b; uint32_t r[4]; } test_vec[] = { { { UINT32_C( 593693436), UINT32_C(2539985187), UINT32_C(3612342030), UINT32_C(2996367597) }, UINT32_C(1522838875), { UINT32_C(2207331220), UINT32_C(2948109937), UINT32_C(2786674682), UINT32_C(2661403967) } }, { { UINT32_C(1145577779), UINT32_C(2632968636), UINT32_C(1447235673), UINT32_C(1048160027) }, UINT32_C(3470122688), { UINT32_C(1817321536), UINT32_C(2181846272), UINT32_C( 310800576), UINT32_C( 496877120) } }, { { UINT32_C(3282445526), UINT32_C(1869954580), UINT32_C( 516504299), UINT32_C( 392302939) }, UINT32_C(1421038074), { UINT32_C(2609491708), UINT32_C(3911119752), UINT32_C( 175694974), UINT32_C(3689135582) } }, { { UINT32_C(2108356450), UINT32_C(1589322654), UINT32_C(3626799106), UINT32_C(3382432436) }, UINT32_C(4231532560), { UINT32_C( 529896992), UINT32_C(4034664928), UINT32_C(3292078112), UINT32_C(2655546176) } }, { { UINT32_C(2786722378), UINT32_C( 247299347), UINT32_C( 811758030), UINT32_C( 112004200) }, UINT32_C( 828663855), { UINT32_C( 513571734), UINT32_C( 772124285), UINT32_C(3267794130), UINT32_C(1198491416) } }, { { UINT32_C(2903085304), UINT32_C(1937155171), UINT32_C(4285509301), UINT32_C(3299183280) }, UINT32_C(3587334919), { UINT32_C(2982338248), UINT32_C(1107740597), UINT32_C( 370656755), UINT32_C(2575882960) } }, { { UINT32_C(1007039700), UINT32_C(1866642240), UINT32_C( 346138140), UINT32_C(2579606070) }, UINT32_C( 67974991), { UINT32_C(3657947500), UINT32_C(3294105792), UINT32_C(4160636580), UINT32_C(2333286570) } }, { { UINT32_C(2516810982), UINT32_C( 224045318), UINT32_C(3772984332), UINT32_C(2686249312) }, UINT32_C(3088080540), { UINT32_C(3495727144), UINT32_C(1303327656), UINT32_C(3579526992), UINT32_C(3310319232) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); uint32_t b = test_vec[i].b; simde_uint32x4_t r = simde_vmulq_n_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); uint32_t b = simde_test_codegen_random_u32(); simde_uint32x4_t r = simde_vmulq_n_u32(a, b); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vmul_n_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vmul_n_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vmul_n_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmul_n_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmul_n_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmul_n_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_n_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_n_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_n_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_n_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_n_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmulq_n_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/mull.c000066400000000000000000000411451400333146700164410ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN mull #include "test-neon.h" #include "../../../simde/arm/neon/mull.h" static int test_simde_vmull_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int8_t b[8]; int16_t r[8]; } test_vec[] = { { { INT8_C( 80), -INT8_C( 57), INT8_MIN, -INT8_C( 68), INT8_C( 13), -INT8_C( 44), INT8_C( 8), INT8_C( 65) }, { -INT8_C( 55), -INT8_C( 20), INT8_C( 56), -INT8_C( 54), INT8_C( 110), INT8_C( 55), -INT8_C( 97), -INT8_C( 8) }, { -INT16_C( 4400), INT16_C( 1140), -INT16_C( 7168), INT16_C( 3672), INT16_C( 1430), -INT16_C( 2420), -INT16_C( 776), -INT16_C( 520) } }, { { INT8_C( 90), INT8_C( 52), INT8_C( 32), INT8_C( 61), -INT8_C( 126), INT8_C( 97), INT8_C( 42), -INT8_C( 90) }, { INT8_C( 100), INT8_C( 38), -INT8_C( 122), INT8_C( 112), -INT8_C( 57), INT8_C( 19), -INT8_C( 61), INT8_C( 23) }, { INT16_C( 9000), INT16_C( 1976), -INT16_C( 3904), INT16_C( 6832), INT16_C( 7182), INT16_C( 1843), -INT16_C( 2562), -INT16_C( 2070) } }, { { -INT8_C( 38), INT8_C( 68), -INT8_C( 44), -INT8_C( 24), INT8_C( 24), -INT8_C( 36), INT8_C( 41), -INT8_C( 31) }, { -INT8_C( 56), INT8_C( 97), -INT8_C( 85), INT8_C( 55), -INT8_C( 104), INT8_C( 74), INT8_C( 47), -INT8_C( 14) }, { INT16_C( 2128), INT16_C( 6596), INT16_C( 3740), -INT16_C( 1320), -INT16_C( 2496), -INT16_C( 2664), INT16_C( 1927), INT16_C( 434) } }, { { INT8_C( 126), INT8_C( 80), INT8_C( 48), INT8_C( 1), -INT8_C( 79), INT8_C( 90), -INT8_C( 89), INT8_C( 21) }, { INT8_MIN, INT8_C( 46), -INT8_C( 123), INT8_C( 72), INT8_C( 65), INT8_C( 73), INT8_C( 95), INT8_C( 28) }, { -INT16_C( 16128), INT16_C( 3680), -INT16_C( 5904), INT16_C( 72), -INT16_C( 5135), INT16_C( 6570), -INT16_C( 8455), INT16_C( 588) } }, { { -INT8_C( 115), INT8_C( 51), INT8_C( 4), -INT8_C( 91), INT8_C( 16), INT8_C( 45), -INT8_C( 122), -INT8_C( 40) }, { -INT8_C( 114), INT8_C( 49), INT8_C( 15), INT8_C( 38), INT8_C( 123), INT8_C( 63), INT8_C( 25), -INT8_C( 7) }, { INT16_C( 13110), INT16_C( 2499), INT16_C( 60), -INT16_C( 3458), INT16_C( 1968), INT16_C( 2835), -INT16_C( 3050), INT16_C( 280) } }, { { -INT8_C( 113), INT8_C( 73), -INT8_C( 6), INT8_C( 64), -INT8_C( 93), -INT8_C( 94), INT8_C( 86), INT8_C( 36) }, { -INT8_C( 48), -INT8_C( 37), INT8_C( 108), INT8_C( 17), INT8_C( 36), -INT8_C( 53), INT8_C( 45), -INT8_C( 79) }, { INT16_C( 5424), -INT16_C( 2701), -INT16_C( 648), INT16_C( 1088), -INT16_C( 3348), INT16_C( 4982), INT16_C( 3870), -INT16_C( 2844) } }, { { -INT8_C( 1), INT8_C( 49), INT8_C( 86), INT8_C( 15), INT8_C( 94), -INT8_C( 36), -INT8_C( 25), -INT8_C( 20) }, { INT8_C( 13), -INT8_C( 9), INT8_C( 19), -INT8_C( 120), INT8_C( 54), INT8_C( 44), -INT8_C( 126), -INT8_C( 59) }, { -INT16_C( 13), -INT16_C( 441), INT16_C( 1634), -INT16_C( 1800), INT16_C( 5076), -INT16_C( 1584), INT16_C( 3150), INT16_C( 1180) } }, { { INT8_C( 117), INT8_C( 124), INT8_C( 5), INT8_C( 24), INT8_C( 30), INT8_C( 91), INT8_C( 60), -INT8_C( 18) }, { INT8_C( 55), -INT8_C( 88), INT8_C( 0), INT8_C( 91), INT8_C( 116), INT8_C( 45), INT8_C( 13), INT8_C( 115) }, { INT16_C( 6435), -INT16_C( 10912), INT16_C( 0), INT16_C( 2184), INT16_C( 3480), INT16_C( 4095), INT16_C( 780), -INT16_C( 2070) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int16x8_t r = simde_vmull_s8(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; } static int test_simde_vmull_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int16_t b[4]; int32_t r[4]; } test_vec[] = { { { INT16_C( 11230), INT16_C( 11512), -INT16_C( 32461), -INT16_C( 31562) }, { INT16_C( 29011), -INT16_C( 4051), INT16_C( 24636), -INT16_C( 23193) }, { INT32_C( 325793530), -INT32_C( 46635112), -INT32_C( 799709196), INT32_C( 732017466) } }, { { INT16_C( 17728), INT16_C( 31395), INT16_C( 5945), INT16_C( 5959) }, { INT16_C( 2425), -INT16_C( 15905), -INT16_C( 10338), INT16_C( 31939) }, { INT32_C( 42990400), -INT32_C( 499337475), -INT32_C( 61459410), INT32_C( 190324501) } }, { { -INT16_C( 17662), INT16_C( 13993), INT16_C( 24380), -INT16_C( 28486) }, { -INT16_C( 6192), INT16_C( 3200), -INT16_C( 6329), -INT16_C( 30542) }, { INT32_C( 109363104), INT32_C( 44777600), -INT32_C( 154301020), INT32_C( 870019412) } }, { { INT16_C( 21805), INT16_C( 26114), INT16_C( 18796), -INT16_C( 6787) }, { INT16_C( 23635), -INT16_C( 3674), INT16_C( 27188), INT16_C( 13933) }, { INT32_C( 515361175), -INT32_C( 95942836), INT32_C( 511025648), -INT32_C( 94563271) } }, { { INT16_C( 5669), INT16_C( 25196), INT16_C( 9846), INT16_C( 18162) }, { INT16_C( 29198), INT16_C( 21843), INT16_C( 1369), -INT16_C( 31011) }, { INT32_C( 165523462), INT32_C( 550356228), INT32_C( 13479174), -INT32_C( 563221782) } }, { { -INT16_C( 8358), -INT16_C( 14612), INT16_C( 26921), INT16_C( 31916) }, { INT16_C( 21190), -INT16_C( 1427), -INT16_C( 9540), -INT16_C( 7632) }, { -INT32_C( 177106020), INT32_C( 20851324), -INT32_C( 256826340), -INT32_C( 243582912) } }, { { -INT16_C( 25103), INT16_C( 26436), INT16_C( 14019), -INT16_C( 11859) }, { INT16_C( 168), INT16_C( 295), INT16_C( 1029), INT16_C( 24456) }, { -INT32_C( 4217304), INT32_C( 7798620), INT32_C( 14425551), -INT32_C( 290023704) } }, { { INT16_C( 29924), INT16_C( 3366), -INT16_C( 11554), -INT16_C( 23415) }, { -INT16_C( 2524), -INT16_C( 7778), -INT16_C( 12592), -INT16_C( 15933) }, { -INT32_C( 75528176), -INT32_C( 26180748), INT32_C( 145487968), INT32_C( 373071195) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int32x4_t r = simde_vmull_s16(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; } static int test_simde_vmull_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int32_t b[2]; int64_t r[2]; } test_vec[] = { { { INT32_C( 930126813), -INT32_C( 560729004) }, { INT32_C( 166776726), -INT32_C( 422116933) }, { INT64_C( 155123504636954238), INT64_C( 236693207412624732) } }, { { INT32_C( 1728012372), INT32_C( 633898368) }, { -INT32_C( 1137785715), -INT32_C( 1374263343) }, { -INT64_C( 1966107792204865980), -INT64_C( 871143290329924224) } }, { { INT32_C( 1457882626), INT32_C( 271874170) }, { INT32_C( 35267655), INT32_C( 2045309221) }, { INT64_C( 51416101484262030), INT64_C( 556066746852721570) } }, { { -INT32_C( 757078191), -INT32_C( 84433043) }, { INT32_C( 1018635627), -INT32_C( 1897214580) }, { -INT64_C( 771186817777310757), INT64_C( 160187600213366940) } }, { { -INT32_C( 823865517), -INT32_C( 1898047417) }, { INT32_C( 1636890684), INT32_C( 1004173801) }, { -INT64_C( 1348577789646143628), -INT64_C( 1905969489207122017) } }, { { -INT32_C( 854738592), -INT32_C( 876084128) }, { -INT32_C( 1241022678), -INT32_C( 1622806196) }, { INT64_C( 1060749976433789376), INT64_C( 1421714751135657088) } }, { { INT32_C( 124594624), INT32_C( 2123713602) }, { INT32_C( 786441796), -INT32_C( 9848161) }, { INT64_C( 97986419870504704), -INT64_C( 20914673470385922) } }, { { -INT32_C( 724732300), -INT32_C( 1532979846) }, { INT32_C( 1616619284), INT32_C( 1509925017) }, { -INT64_C( 1171616211917673200), -INT64_C( 2314684620032207382) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int64x2_t r = simde_vmull_s32(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; } static int test_simde_vmull_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; uint8_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT8_C( 94), UINT8_C(152), UINT8_C( 27), UINT8_C(118), UINT8_C(190), UINT8_C(231), UINT8_C( 17), UINT8_C(211) }, { UINT8_C( 99), UINT8_C(112), UINT8_C( 80), UINT8_C(144), UINT8_C(181), UINT8_C(106), UINT8_C( 70), UINT8_C( 39) }, { UINT16_C( 9306), UINT16_C(17024), UINT16_C( 2160), UINT16_C(16992), UINT16_C(34390), UINT16_C(24486), UINT16_C( 1190), UINT16_C( 8229) } }, { { UINT8_C(237), UINT8_C(190), UINT8_C( 61), UINT8_C( 90), UINT8_C( 53), UINT8_C( 74), UINT8_C(239), UINT8_C( 23) }, { UINT8_C( 70), UINT8_C(227), UINT8_MAX, UINT8_C(159), UINT8_C(184), UINT8_C(227), UINT8_C(105), UINT8_C( 22) }, { UINT16_C(16590), UINT16_C(43130), UINT16_C(15555), UINT16_C(14310), UINT16_C( 9752), UINT16_C(16798), UINT16_C(25095), UINT16_C( 506) } }, { { UINT8_C(123), UINT8_C(132), UINT8_C(141), UINT8_C( 57), UINT8_C(108), UINT8_C(158), UINT8_C( 12), UINT8_C(207) }, { UINT8_C( 14), UINT8_C( 93), UINT8_C( 96), UINT8_C(196), UINT8_C(199), UINT8_C(166), UINT8_C(235), UINT8_C(180) }, { UINT16_C( 1722), UINT16_C(12276), UINT16_C(13536), UINT16_C(11172), UINT16_C(21492), UINT16_C(26228), UINT16_C( 2820), UINT16_C(37260) } }, { { UINT8_C(100), UINT8_C( 40), UINT8_C( 15), UINT8_C(154), UINT8_C(114), UINT8_C(254), UINT8_C(177), UINT8_C(185) }, { UINT8_C(226), UINT8_C(177), UINT8_C( 88), UINT8_C(154), UINT8_C(148), UINT8_C(193), UINT8_C(176), UINT8_C( 16) }, { UINT16_C(22600), UINT16_C( 7080), UINT16_C( 1320), UINT16_C(23716), UINT16_C(16872), UINT16_C(49022), UINT16_C(31152), UINT16_C( 2960) } }, { { UINT8_C( 69), UINT8_C( 61), UINT8_C( 73), UINT8_C(177), UINT8_C(220), UINT8_C( 86), UINT8_C(129), UINT8_C(234) }, { UINT8_C(179), UINT8_C(225), UINT8_C(174), UINT8_C(122), UINT8_C(135), UINT8_C(153), UINT8_C( 46), UINT8_C(236) }, { UINT16_C(12351), UINT16_C(13725), UINT16_C(12702), UINT16_C(21594), UINT16_C(29700), UINT16_C(13158), UINT16_C( 5934), UINT16_C(55224) } }, { { UINT8_C(194), UINT8_C( 61), UINT8_C(134), UINT8_C( 52), UINT8_C( 60), UINT8_C( 55), UINT8_C(237), UINT8_C( 30) }, { UINT8_C(232), UINT8_C( 69), UINT8_C(184), UINT8_C(125), UINT8_C( 6), UINT8_C(104), UINT8_C(141), UINT8_C( 76) }, { UINT16_C(45008), UINT16_C( 4209), UINT16_C(24656), UINT16_C( 6500), UINT16_C( 360), UINT16_C( 5720), UINT16_C(33417), UINT16_C( 2280) } }, { { UINT8_C(166), UINT8_C(214), UINT8_C(253), UINT8_C(130), UINT8_C( 44), UINT8_C(126), UINT8_C(108), UINT8_C(223) }, { UINT8_C( 95), UINT8_C( 27), UINT8_C( 89), UINT8_C(231), UINT8_C(180), UINT8_C(136), UINT8_C(211), UINT8_C(118) }, { UINT16_C(15770), UINT16_C( 5778), UINT16_C(22517), UINT16_C(30030), UINT16_C( 7920), UINT16_C(17136), UINT16_C(22788), UINT16_C(26314) } }, { { UINT8_C(197), UINT8_C( 89), UINT8_C(171), UINT8_C( 1), UINT8_C(144), UINT8_C(152), UINT8_C( 31), UINT8_C(121) }, { UINT8_C(222), UINT8_C(215), UINT8_C(246), UINT8_C(228), UINT8_C( 64), UINT8_C(131), UINT8_C( 48), UINT8_C(230) }, { UINT16_C(43734), UINT16_C(19135), UINT16_C(42066), UINT16_C( 228), UINT16_C( 9216), UINT16_C(19912), UINT16_C( 1488), UINT16_C(27830) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint16x8_t r = simde_vmull_u8(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_vmull_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint16_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT16_C(65254), UINT16_C(49526), UINT16_C(58343), UINT16_C(28199) }, { UINT16_C( 3568), UINT16_C(53134), UINT16_C(38079), UINT16_C(44979) }, { UINT32_C( 232826272), UINT32_C(2631514484), UINT32_C(2221643097), UINT32_C(1268362821) } }, { { UINT16_C(41279), UINT16_C(54255), UINT16_C(49218), UINT16_C(49274) }, { UINT16_C(44771), UINT16_C(52368), UINT16_C(62625), UINT16_C(34586) }, { UINT32_C(1848102109), UINT32_C(2841225840), UINT32_C(3082277250), UINT32_C(1704190564) } }, { { UINT16_C(37107), UINT16_C(55881), UINT16_C(28787), UINT16_C(25416) }, { UINT16_C(54910), UINT16_C(15666), UINT16_C(58986), UINT16_C(43500) }, { UINT32_C(2037545370), UINT32_C( 875431746), UINT32_C(1698029982), UINT32_C(1105596000) } }, { { UINT16_C(56455), UINT16_C(51581), UINT16_C(63388), UINT16_C(32649) }, { UINT16_C( 6821), UINT16_C(17995), UINT16_C(25870), UINT16_C( 462) }, { UINT32_C( 385079555), UINT32_C( 928200095), UINT32_C(1639847560), UINT32_C( 15083838) } }, { { UINT16_C( 6133), UINT16_C(27099), UINT16_C( 9351), UINT16_C( 1484) }, { UINT16_C(65530), UINT16_C(25923), UINT16_C(12261), UINT16_C(27662) }, { UINT32_C( 401895490), UINT32_C( 702487377), UINT32_C( 114652611), UINT32_C( 41050408) } }, { { UINT16_C(35595), UINT16_C(43062), UINT16_C(49027), UINT16_C(10279) }, { UINT16_C(29657), UINT16_C(59503), UINT16_C(15832), UINT16_C(52969) }, { UINT32_C(1055640915), UINT32_C(2562318186), UINT32_C( 776195464), UINT32_C( 544468351) } }, { { UINT16_C(50516), UINT16_C(56119), UINT16_C( 1001), UINT16_C(58337) }, { UINT16_C( 9218), UINT16_C(59208), UINT16_C(22355), UINT16_C(24404) }, { UINT32_C( 465656488), UINT32_C(3322693752), UINT32_C( 22377355), UINT32_C(1423656148) } }, { { UINT16_C(35554), UINT16_C(25863), UINT16_C(11849), UINT16_C( 9102) }, { UINT16_C(64929), UINT16_C(31243), UINT16_C(62522), UINT16_C(36424) }, { UINT32_C(2308485666), UINT32_C( 808037709), UINT32_C( 740823178), UINT32_C( 331531248) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint32x4_t r = simde_vmull_u16(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_vmull_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint32_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT32_C(1764671971), UINT32_C( 417693998) }, { UINT32_C(3142976160), UINT32_C(3551123166) }, { UINT64_C( 5546321935073211360), UINT64_C( 1483282832596957668) } }, { { UINT32_C(1771619725), UINT32_C(3853953090) }, { UINT32_C(4088780350), UINT32_C(1688137088) }, { UINT64_C( 7243763919252403750), UINT64_C( 6506001146641201920) } }, { { UINT32_C(3721252015), UINT32_C(4009079374) }, { UINT32_C(2477411253), UINT32_C(2926007073) }, { UINT64_C( 9219071617209924795), UINT64_C(11730594604542412302) } }, { { UINT32_C(1528299288), UINT32_C(3863006887) }, { UINT32_C( 618263972), UINT32_C(2441639906) }, { UINT64_C( 944892388203651936), UINT64_C( 9432071772452032622) } }, { { UINT32_C(2456704580), UINT32_C(3179307784) }, { UINT32_C(3494980270), UINT32_C(2524887166) }, { UINT64_C( 8586134036318636600), UINT64_C( 8027393420585500144) } }, { { UINT32_C(1592891063), UINT32_C( 138686820) }, { UINT32_C( 153886246), UINT32_C(3650794901) }, { UINT64_C( 245124025972019498), UINT64_C( 506317135291904820) } }, { { UINT32_C( 325781771), UINT32_C( 466742380) }, { UINT32_C(2498437654), UINT32_C(2435541466) }, { UINT64_C( 813945443653205234), UINT64_C( 1136770420429529080) } }, { { UINT32_C(1676614911), UINT32_C(1953182798) }, { UINT32_C(3883767890), UINT32_C(1489049677) }, { UINT64_C( 6511583155237007790), UINT64_C( 2908386214483856246) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint64x2_t r = simde_vmull_u32(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vmull_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vmull_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmull_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmull_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vmull_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmull_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/mull_high.c000066400000000000000000000660761400333146700174520ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN mull_high #include "test-neon.h" #include "../../../simde/arm/neon/mull_high.h" static int test_simde_vmull_high_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t b[16]; int16_t r[8]; } test_vec[] = { { { INT8_C( 1), INT8_C( 41), INT8_MIN, INT8_C( 91), INT8_C( 79), -INT8_C( 119), INT8_C( 78), -INT8_C( 75), -INT8_C( 74), -INT8_C( 42), -INT8_C( 125), -INT8_C( 81), -INT8_C( 49), INT8_C( 57), -INT8_C( 30), -INT8_C( 14) }, { INT8_C( 81), INT8_C( 59), INT8_C( 120), INT8_C( 49), -INT8_C( 28), -INT8_C( 59), -INT8_C( 81), -INT8_C( 20), INT8_C( 66), -INT8_C( 124), -INT8_C( 49), INT8_MIN, INT8_C( 30), -INT8_C( 80), -INT8_C( 63), INT8_C( 32) }, { -INT16_C( 4884), INT16_C( 5208), INT16_C( 6125), INT16_C( 10368), -INT16_C( 1470), -INT16_C( 4560), INT16_C( 1890), -INT16_C( 448) } }, { { -INT8_C( 39), INT8_C( 65), INT8_C( 123), INT8_C( 41), -INT8_C( 53), -INT8_C( 55), -INT8_C( 34), -INT8_C( 127), -INT8_C( 96), INT8_C( 97), INT8_C( 48), INT8_C( 111), -INT8_C( 102), INT8_C( 18), INT8_C( 97), -INT8_C( 20) }, { INT8_C( 77), -INT8_C( 39), INT8_C( 29), INT8_C( 50), -INT8_C( 98), -INT8_C( 51), INT8_C( 30), -INT8_C( 32), INT8_C( 81), -INT8_C( 18), INT8_C( 96), INT8_C( 112), -INT8_C( 98), INT8_C( 33), -INT8_C( 112), INT8_C( 120) }, { -INT16_C( 7776), -INT16_C( 1746), INT16_C( 4608), INT16_C( 12432), INT16_C( 9996), INT16_C( 594), -INT16_C( 10864), -INT16_C( 2400) } }, { { INT8_C( 99), INT8_C( 11), -INT8_C( 95), INT8_C( 46), -INT8_C( 44), INT8_MAX, -INT8_C( 81), INT8_C( 116), -INT8_C( 31), -INT8_C( 33), -INT8_C( 29), INT8_C( 123), -INT8_C( 14), INT8_C( 68), INT8_C( 103), INT8_C( 63) }, { INT8_C( 29), -INT8_C( 123), INT8_C( 113), -INT8_C( 69), INT8_C( 82), -INT8_C( 112), -INT8_C( 100), -INT8_C( 93), INT8_C( 126), -INT8_C( 4), INT8_C( 19), INT8_C( 28), INT8_C( 30), -INT8_C( 93), -INT8_C( 108), -INT8_C( 127) }, { -INT16_C( 3906), INT16_C( 132), -INT16_C( 551), INT16_C( 3444), -INT16_C( 420), -INT16_C( 6324), -INT16_C( 11124), -INT16_C( 8001) } }, { { -INT8_C( 82), INT8_C( 53), -INT8_C( 81), -INT8_C( 125), -INT8_C( 75), INT8_C( 94), -INT8_C( 9), -INT8_C( 106), INT8_C( 61), -INT8_C( 37), INT8_C( 17), INT8_C( 47), INT8_C( 31), INT8_C( 121), INT8_C( 111), INT8_C( 61) }, { -INT8_C( 2), -INT8_C( 32), -INT8_C( 8), INT8_C( 80), INT8_C( 112), -INT8_C( 108), -INT8_C( 13), -INT8_C( 18), -INT8_C( 111), INT8_C( 7), INT8_C( 11), -INT8_C( 81), -INT8_C( 86), -INT8_C( 97), INT8_C( 48), INT8_C( 89) }, { -INT16_C( 6771), -INT16_C( 259), INT16_C( 187), -INT16_C( 3807), -INT16_C( 2666), -INT16_C( 11737), INT16_C( 5328), INT16_C( 5429) } }, { { -INT8_C( 43), -INT8_C( 33), -INT8_C( 36), -INT8_C( 118), INT8_C( 61), -INT8_C( 45), INT8_C( 32), INT8_C( 122), -INT8_C( 82), INT8_C( 49), -INT8_C( 86), -INT8_C( 50), -INT8_C( 86), INT8_C( 25), INT8_C( 11), -INT8_C( 88) }, { -INT8_C( 7), INT8_C( 3), -INT8_C( 8), INT8_C( 106), -INT8_C( 104), -INT8_C( 20), INT8_C( 88), INT8_C( 41), -INT8_C( 13), INT8_C( 99), -INT8_C( 40), -INT8_C( 99), INT8_C( 3), INT8_C( 8), -INT8_C( 10), -INT8_C( 40) }, { INT16_C( 1066), INT16_C( 4851), INT16_C( 3440), INT16_C( 4950), -INT16_C( 258), INT16_C( 200), -INT16_C( 110), INT16_C( 3520) } }, { { -INT8_C( 25), -INT8_C( 46), INT8_C( 98), INT8_C( 36), -INT8_C( 90), -INT8_C( 126), -INT8_C( 98), INT8_C( 84), -INT8_C( 77), INT8_C( 72), INT8_C( 34), INT8_C( 94), INT8_C( 97), INT8_C( 45), INT8_C( 6), INT8_C( 91) }, { INT8_C( 49), -INT8_C( 1), -INT8_C( 59), -INT8_C( 55), -INT8_C( 21), INT8_C( 29), -INT8_C( 14), -INT8_C( 34), -INT8_C( 127), -INT8_C( 54), INT8_C( 123), -INT8_C( 124), -INT8_C( 46), INT8_C( 114), INT8_C( 92), -INT8_C( 71) }, { INT16_C( 9779), -INT16_C( 3888), INT16_C( 4182), -INT16_C( 11656), -INT16_C( 4462), INT16_C( 5130), INT16_C( 552), -INT16_C( 6461) } }, { { INT8_C( 68), -INT8_C( 66), -INT8_C( 35), -INT8_C( 22), INT8_C( 64), INT8_C( 123), INT8_C( 63), -INT8_C( 13), -INT8_C( 60), INT8_C( 97), INT8_C( 81), INT8_C( 37), -INT8_C( 113), INT8_C( 88), INT8_MIN, -INT8_C( 64) }, { INT8_C( 87), INT8_C( 69), -INT8_C( 119), INT8_C( 66), INT8_C( 99), INT8_C( 123), INT8_C( 32), -INT8_C( 28), INT8_C( 69), -INT8_C( 101), INT8_C( 104), INT8_C( 23), INT8_C( 13), -INT8_C( 60), -INT8_C( 48), INT8_C( 82) }, { -INT16_C( 4140), -INT16_C( 9797), INT16_C( 8424), INT16_C( 851), -INT16_C( 1469), -INT16_C( 5280), INT16_C( 6144), -INT16_C( 5248) } }, { { -INT8_C( 126), -INT8_C( 83), INT8_C( 60), -INT8_C( 62), INT8_C( 40), INT8_C( 123), -INT8_C( 75), -INT8_C( 20), -INT8_C( 35), INT8_C( 7), INT8_C( 18), INT8_C( 108), INT8_C( 95), -INT8_C( 110), INT8_C( 44), -INT8_C( 74) }, { -INT8_C( 40), -INT8_C( 75), -INT8_C( 8), INT8_C( 59), INT8_C( 48), INT8_C( 24), INT8_C( 31), INT8_C( 117), -INT8_C( 77), -INT8_C( 121), -INT8_C( 116), -INT8_C( 63), INT8_C( 75), INT8_C( 92), INT8_C( 19), -INT8_C( 51) }, { INT16_C( 2695), -INT16_C( 847), -INT16_C( 2088), -INT16_C( 6804), INT16_C( 7125), -INT16_C( 10120), INT16_C( 836), INT16_C( 3774) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int16x8_t r = simde_vmull_high_s8(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); simde_int16x8_t r = simde_vmull_high_s8(a, b); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmull_high_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t b[8]; int32_t r[4]; } test_vec[] = { { { INT16_C( 26020), INT16_C( 28555), INT16_C( 1681), -INT16_C( 31395), INT16_C( 1137), INT16_C( 4438), INT16_C( 10954), -INT16_C( 8728) }, { INT16_C( 853), -INT16_C( 8018), INT16_C( 7096), -INT16_C( 14876), -INT16_C( 12102), -INT16_C( 1906), -INT16_C( 10046), INT16_C( 26192) }, { -INT32_C( 13759974), -INT32_C( 8458828), -INT32_C( 110043884), -INT32_C( 228603776) } }, { { -INT16_C( 9411), -INT16_C( 12587), INT16_C( 13281), INT16_C( 21076), -INT16_C( 21961), INT16_C( 355), INT16_C( 19668), INT16_C( 10718) }, { -INT16_C( 29361), INT16_C( 1801), -INT16_C( 4440), INT16_C( 25548), INT16_C( 23230), -INT16_C( 32677), -INT16_C( 21709), INT16_C( 28903) }, { -INT32_C( 510154030), -INT32_C( 11600335), -INT32_C( 426972612), INT32_C( 309782354) } }, { { -INT16_C( 17274), INT16_C( 26687), -INT16_C( 27665), INT16_C( 10170), INT16_C( 7741), INT16_C( 4392), INT16_C( 1898), -INT16_C( 18117) }, { INT16_C( 17556), INT16_C( 15552), -INT16_C( 29390), -INT16_C( 3937), -INT16_C( 1049), INT16_C( 6769), INT16_C( 22694), INT16_C( 11659) }, { -INT32_C( 8120309), INT32_C( 29729448), INT32_C( 43073212), -INT32_C( 211226103) } }, { { -INT16_C( 13804), INT16_C( 1173), INT16_C( 20317), -INT16_C( 26069), INT16_C( 21357), -INT16_C( 10325), -INT16_C( 6566), -INT16_C( 4464) }, { INT16_C( 20779), INT16_C( 23851), -INT16_C( 13602), -INT16_C( 15026), -INT16_C( 16443), INT16_C( 27872), INT16_C( 27415), INT16_C( 11161) }, { -INT32_C( 351173151), -INT32_C( 287778400), -INT32_C( 180006890), -INT32_C( 49822704) } }, { { INT16_C( 11829), -INT16_C( 28113), INT16_C( 23165), -INT16_C( 5332), -INT16_C( 10322), INT16_C( 2242), INT16_C( 21438), -INT16_C( 5641) }, { INT16_C( 8868), -INT16_C( 32186), -INT16_C( 27412), -INT16_C( 19897), INT16_C( 10067), INT16_C( 27166), -INT16_C( 18542), -INT16_C( 14442) }, { -INT32_C( 103911574), INT32_C( 60906172), -INT32_C( 397503396), INT32_C( 81467322) } }, { { -INT16_C( 14875), INT16_C( 25177), -INT16_C( 31456), -INT16_C( 12723), INT16_C( 4189), INT16_C( 7126), -INT16_C( 12957), INT16_C( 1796) }, { INT16_C( 19183), -INT16_C( 9079), -INT16_C( 12065), INT16_C( 12942), -INT16_C( 21256), -INT16_C( 30051), INT16_C( 13155), INT16_C( 18514) }, { -INT32_C( 89041384), -INT32_C( 214143426), -INT32_C( 170449335), INT32_C( 33251144) } }, { { -INT16_C( 21512), INT16_C( 6314), -INT16_C( 1999), -INT16_C( 28954), -INT16_C( 17144), INT16_C( 27561), -INT16_C( 21110), INT16_C( 31346) }, { -INT16_C( 1033), -INT16_C( 10666), -INT16_C( 6965), -INT16_C( 15607), -INT16_C( 22896), -INT16_C( 3250), -INT16_C( 24359), -INT16_C( 11973) }, { INT32_C( 392529024), -INT32_C( 89573250), INT32_C( 514218490), -INT32_C( 375305658) } }, { { -INT16_C( 6837), INT16_C( 31978), -INT16_C( 12067), -INT16_C( 6902), -INT16_C( 19571), INT16_C( 6224), -INT16_C( 15776), INT16_C( 22674) }, { -INT16_C( 5955), -INT16_C( 30418), INT16_C( 14284), INT16_C( 23628), -INT16_C( 25891), -INT16_C( 18865), -INT16_C( 30150), -INT16_C( 31096) }, { INT32_C( 506712761), -INT32_C( 117415760), INT32_C( 475646400), -INT32_C( 705070704) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int32x4_t r = simde_vmull_high_s16(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); simde_int32x4_t r = simde_vmull_high_s16(a, b); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmull_high_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t b[4]; int64_t r[2]; } test_vec[] = { { { -INT32_C( 1126387473), -INT32_C( 122861674), -INT32_C( 249089497), -INT32_C( 774744012) }, { INT32_C( 824384446), INT32_C( 2060991692), -INT32_C( 829349104), INT32_C( 297697313) }, { INT64_C( 206582151152760688), -INT64_C( 230639210635239756) } }, { { -INT32_C( 942826960), INT32_C( 163543778), -INT32_C( 503584851), -INT32_C( 38613697) }, { -INT32_C( 1255221528), INT32_C( 573507090), INT32_C( 1257357353), INT32_C( 1885056832) }, { -INT64_C( 633186115264259403), -INT64_C( 72789013338627904) } }, { { INT32_C( 725035081), INT32_C( 1345714083), INT32_C( 489763038), -INT32_C( 451222531) }, { -INT32_C( 862304071), INT32_C( 2012137806), -INT32_C( 910041207), -INT32_C( 667279986) }, { -INT64_C( 445704546245506866), INT64_C( 301091764168564566) } }, { { -INT32_C( 402427579), INT32_C( 1178089576), INT32_C( 1701013864), INT32_C( 105610573) }, { INT32_C( 332588485), INT32_C( 948617647), INT32_C( 788679840), -INT32_C( 1375257495) }, { INT64_C( 1341555342097301760), -INT64_C( 145241732069494635) } }, { { INT32_C( 378997421), -INT32_C( 1419980989), -INT32_C( 2045657031), INT32_C( 59530301) }, { -INT32_C( 266969279), -INT32_C( 1071013600), INT32_C( 1458514925), INT32_C( 335869543) }, { -INT64_C( 2983621311144687675), INT64_C( 19994414991522443) } }, { { INT32_C( 1143642881), -INT32_C( 1527806101), -INT32_C( 2077622201), -INT32_C( 1635273124) }, { INT32_C( 898539029), INT32_C( 754366271), INT32_C( 1233315298), -INT32_C( 581007652) }, { -INT64_C( 2562363243957730898), INT64_C( 950106198153944848) } }, { { -INT32_C( 1927182302), INT32_C( 1446055951), INT32_C( 1843092241), INT32_C( 655057425) }, { INT32_C( 1063033344), INT32_C( 879448657), INT32_C( 343797048), -INT32_C( 1762534540) }, { INT64_C( 633649671647504568), -INT64_C( 1154561337245959500) } }, { { INT32_C( 1931678308), INT32_C( 868897826), -INT32_C( 1063148113), INT32_C( 149400583) }, { -INT32_C( 1740159930), -INT32_C( 825445482), INT32_C( 350374304), -INT32_C( 1985293531) }, { -INT64_C( 372499780141288352), -INT64_C( 296604010957528573) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int64x2_t r = simde_vmull_high_s32(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); simde_int64x2_t r = simde_vmull_high_s32(a, b); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmull_high_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; uint8_t b[16]; uint16_t r[8]; } test_vec[] = { { { UINT8_C(179), UINT8_C( 51), UINT8_C( 59), UINT8_C( 26), UINT8_C(136), UINT8_C(207), UINT8_C( 24), UINT8_C(152), UINT8_C( 17), UINT8_C(185), UINT8_C(236), UINT8_C( 76), UINT8_C( 36), UINT8_C(205), UINT8_C( 64), UINT8_C(207) }, { UINT8_C( 48), UINT8_C( 72), UINT8_C(171), UINT8_C( 31), UINT8_C(148), UINT8_C(163), UINT8_C(148), UINT8_C( 63), UINT8_C( 78), UINT8_C(254), UINT8_C(102), UINT8_C( 44), UINT8_C(198), UINT8_C(233), UINT8_C(197), UINT8_C(121) }, { UINT16_C( 1326), UINT16_C(46990), UINT16_C(24072), UINT16_C( 3344), UINT16_C( 7128), UINT16_C(47765), UINT16_C(12608), UINT16_C(25047) } }, { { UINT8_C( 29), UINT8_C( 1), UINT8_C(147), UINT8_C(165), UINT8_C(208), UINT8_C(171), UINT8_C( 61), UINT8_C(226), UINT8_C(100), UINT8_C( 42), UINT8_C( 46), UINT8_C(136), UINT8_C(247), UINT8_C(110), UINT8_C( 88), UINT8_C( 39) }, { UINT8_C(182), UINT8_C( 3), UINT8_C( 71), UINT8_C( 75), UINT8_C(166), UINT8_C(219), UINT8_C(138), UINT8_C(244), UINT8_C(217), UINT8_C(240), UINT8_C( 33), UINT8_C(159), UINT8_C(218), UINT8_C(230), UINT8_C( 25), UINT8_C(247) }, { UINT16_C(21700), UINT16_C(10080), UINT16_C( 1518), UINT16_C(21624), UINT16_C(53846), UINT16_C(25300), UINT16_C( 2200), UINT16_C( 9633) } }, { { UINT8_C(231), UINT8_C(172), UINT8_C(156), UINT8_C(184), UINT8_C( 88), UINT8_C(217), UINT8_C(154), UINT8_C(188), UINT8_C( 3), UINT8_C(200), UINT8_C( 69), UINT8_C(250), UINT8_C( 55), UINT8_C(157), UINT8_C( 34), UINT8_C(237) }, { UINT8_C(160), UINT8_C(105), UINT8_C( 56), UINT8_C( 71), UINT8_C( 68), UINT8_C(195), UINT8_C( 59), UINT8_C( 30), UINT8_C(179), UINT8_C( 92), UINT8_C(189), UINT8_C(141), UINT8_C( 67), UINT8_C(214), UINT8_C(132), UINT8_C( 42) }, { UINT16_C( 537), UINT16_C(18400), UINT16_C(13041), UINT16_C(35250), UINT16_C( 3685), UINT16_C(33598), UINT16_C( 4488), UINT16_C( 9954) } }, { { UINT8_C(131), UINT8_C( 32), UINT8_C(226), UINT8_C(219), UINT8_C(250), UINT8_C(124), UINT8_C(151), UINT8_C(253), UINT8_C( 69), UINT8_C(220), UINT8_C(248), UINT8_C(124), UINT8_C(121), UINT8_C( 26), UINT8_C(105), UINT8_C( 26) }, { UINT8_C(131), UINT8_C(162), UINT8_C( 97), UINT8_C(199), UINT8_C(101), UINT8_C(156), UINT8_C(229), UINT8_C( 24), UINT8_C(249), UINT8_C(163), UINT8_C(166), UINT8_C( 60), UINT8_C(121), UINT8_C( 42), UINT8_C(102), UINT8_C(252) }, { UINT16_C(17181), UINT16_C(35860), UINT16_C(41168), UINT16_C( 7440), UINT16_C(14641), UINT16_C( 1092), UINT16_C(10710), UINT16_C( 6552) } }, { { UINT8_C( 75), UINT8_C( 73), UINT8_C(215), UINT8_C( 69), UINT8_C(197), UINT8_C(111), UINT8_C( 66), UINT8_C( 10), UINT8_C( 75), UINT8_C( 58), UINT8_C(134), UINT8_C(197), UINT8_C( 84), UINT8_C(240), UINT8_C(223), UINT8_C(215) }, { UINT8_C(146), UINT8_C( 64), UINT8_C(159), UINT8_C(247), UINT8_C(220), UINT8_C(132), UINT8_C( 15), UINT8_C(213), UINT8_C( 39), UINT8_C(181), UINT8_C( 17), UINT8_C(161), UINT8_C(224), UINT8_C(120), UINT8_C(157), UINT8_C( 43) }, { UINT16_C( 2925), UINT16_C(10498), UINT16_C( 2278), UINT16_C(31717), UINT16_C(18816), UINT16_C(28800), UINT16_C(35011), UINT16_C( 9245) } }, { { UINT8_C(193), UINT8_C(117), UINT8_C(112), UINT8_C(134), UINT8_C(228), UINT8_C(178), UINT8_C(145), UINT8_C( 47), UINT8_C(237), UINT8_C( 23), UINT8_C(244), UINT8_C( 65), UINT8_C( 7), UINT8_C(211), UINT8_C( 25), UINT8_C(153) }, { UINT8_C( 19), UINT8_C(184), UINT8_C(144), UINT8_C(240), UINT8_C( 60), UINT8_C(160), UINT8_C(197), UINT8_C(100), UINT8_C( 85), UINT8_C(215), UINT8_C( 5), UINT8_C( 53), UINT8_C( 79), UINT8_C(162), UINT8_C( 96), UINT8_C( 16) }, { UINT16_C(20145), UINT16_C( 4945), UINT16_C( 1220), UINT16_C( 3445), UINT16_C( 553), UINT16_C(34182), UINT16_C( 2400), UINT16_C( 2448) } }, { { UINT8_C( 23), UINT8_C(208), UINT8_C(150), UINT8_C(251), UINT8_C(131), UINT8_C( 39), UINT8_C( 43), UINT8_C(112), UINT8_C( 63), UINT8_C( 31), UINT8_C(177), UINT8_C( 70), UINT8_C(243), UINT8_C(202), UINT8_C(224), UINT8_C( 6) }, { UINT8_C(130), UINT8_C(112), UINT8_C(246), UINT8_C(191), UINT8_C( 16), UINT8_C(188), UINT8_C( 35), UINT8_C(102), UINT8_C(147), UINT8_C( 40), UINT8_C(155), UINT8_C(226), UINT8_C(202), UINT8_C(252), UINT8_C(242), UINT8_C(226) }, { UINT16_C( 9261), UINT16_C( 1240), UINT16_C(27435), UINT16_C(15820), UINT16_C(49086), UINT16_C(50904), UINT16_C(54208), UINT16_C( 1356) } }, { { UINT8_C(204), UINT8_C(136), UINT8_C(221), UINT8_C( 79), UINT8_C(176), UINT8_C( 8), UINT8_C(191), UINT8_C(239), UINT8_C( 40), UINT8_C(113), UINT8_C( 53), UINT8_C( 27), UINT8_C( 59), UINT8_C( 21), UINT8_C( 33), UINT8_C(190) }, { UINT8_C(134), UINT8_C( 24), UINT8_C(125), UINT8_C(150), UINT8_C(212), UINT8_C(160), UINT8_C(252), UINT8_C(103), UINT8_C(200), UINT8_C(152), UINT8_C( 73), UINT8_C(146), UINT8_C(148), UINT8_C( 59), UINT8_C(116), UINT8_C( 96) }, { UINT16_C( 8000), UINT16_C(17176), UINT16_C( 3869), UINT16_C( 3942), UINT16_C( 8732), UINT16_C( 1239), UINT16_C( 3828), UINT16_C(18240) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint16x8_t r = simde_vmull_high_u8(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); simde_uint16x8_t r = simde_vmull_high_u8(a, b); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmull_high_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t b[8]; uint32_t r[4]; } test_vec[] = { { { UINT16_C(21187), UINT16_C(29616), UINT16_C(28506), UINT16_C(33378), UINT16_C(39136), UINT16_C( 7325), UINT16_C(49069), UINT16_C(13274) }, { UINT16_C(22487), UINT16_C(43978), UINT16_C(50935), UINT16_C(48914), UINT16_C(23390), UINT16_C(62033), UINT16_C(50838), UINT16_C(22867) }, { UINT32_C( 915391040), UINT32_C( 454391725), UINT32_C(2494569822), UINT32_C( 303536558) } }, { { UINT16_C( 792), UINT16_C(29389), UINT16_C(12146), UINT16_C(21493), UINT16_C(37575), UINT16_C(30063), UINT16_C(18769), UINT16_C(10408) }, { UINT16_C(29344), UINT16_C(38867), UINT16_C(58681), UINT16_C(38742), UINT16_C(42816), UINT16_C(54922), UINT16_C(56685), UINT16_C(34096) }, { UINT32_C(1608811200), UINT32_C(1651120086), UINT32_C(1063920765), UINT32_C( 354871168) } }, { { UINT16_C(64992), UINT16_C(21240), UINT16_C(60716), UINT16_C(62629), UINT16_C( 5247), UINT16_C(53609), UINT16_C( 4445), UINT16_C(65017) }, { UINT16_C(52612), UINT16_C(48532), UINT16_C(60082), UINT16_C(62292), UINT16_C(56978), UINT16_C(65481), UINT16_C(63931), UINT16_C(39813) }, { UINT32_C( 298963566), UINT32_C(3510370929), UINT32_C( 284173295), UINT32_C(2588521821) } }, { { UINT16_C(32246), UINT16_C( 9198), UINT16_C(37738), UINT16_C(59671), UINT16_C(32936), UINT16_C( 1466), UINT16_C(46225), UINT16_C( 5379) }, { UINT16_C(38785), UINT16_C(13266), UINT16_C(10114), UINT16_C( 5158), UINT16_C(61445), UINT16_C(49427), UINT16_C(39145), UINT16_C(57436) }, { UINT32_C(2023752520), UINT32_C( 72459982), UINT32_C(1809477625), UINT32_C( 308948244) } }, { { UINT16_C(18965), UINT16_C(32515), UINT16_C( 6878), UINT16_C(34409), UINT16_C( 9114), UINT16_C(11147), UINT16_C(36567), UINT16_C(22593) }, { UINT16_C( 4902), UINT16_C(43148), UINT16_C(45626), UINT16_C(16572), UINT16_C(53154), UINT16_C(35841), UINT16_C(23912), UINT16_C(32108) }, { UINT32_C( 484445556), UINT32_C( 399519627), UINT32_C( 874390104), UINT32_C( 725416044) } }, { { UINT16_C(28584), UINT16_C(34557), UINT16_C(26249), UINT16_C( 8972), UINT16_C(38793), UINT16_C(24910), UINT16_C(36646), UINT16_C(19641) }, { UINT16_C(17827), UINT16_C(56820), UINT16_C(45304), UINT16_C(39453), UINT16_C( 7807), UINT16_C(59174), UINT16_C(37500), UINT16_C( 9317) }, { UINT32_C( 302856951), UINT32_C(1474024340), UINT32_C(1374225000), UINT32_C( 182995197) } }, { { UINT16_C(25089), UINT16_C(35498), UINT16_C(46792), UINT16_C(20909), UINT16_C(64589), UINT16_C(29618), UINT16_C(27787), UINT16_C(11967) }, { UINT16_C(46001), UINT16_C(43276), UINT16_C(10595), UINT16_C(58180), UINT16_C(27208), UINT16_C(50378), UINT16_C(12285), UINT16_C(65256) }, { UINT32_C(1757337512), UINT32_C(1492095604), UINT32_C( 341363295), UINT32_C( 780918552) } }, { { UINT16_C(37521), UINT16_C(22921), UINT16_C(13896), UINT16_C(38315), UINT16_C(23858), UINT16_C(48649), UINT16_C(51401), UINT16_C(31724) }, { UINT16_C(63612), UINT16_C(57124), UINT16_C(26658), UINT16_C(27330), UINT16_C(36307), UINT16_C(53294), UINT16_C( 5820), UINT16_C(20174) }, { UINT32_C( 866212406), UINT32_C(2592699806), UINT32_C( 299153820), UINT32_C( 639999976) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint32x4_t r = simde_vmull_high_u16(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); simde_uint32x4_t r = simde_vmull_high_u16(a, b); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmull_high_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint32_t b[4]; uint64_t r[2]; } test_vec[] = { { { UINT32_C(4037498792), UINT32_C(3229962894), UINT32_C(2038337200), UINT32_C(3556010839) }, { UINT32_C(2243041635), UINT32_C(1424979329), UINT32_C(3190037762), UINT32_C(3675058995) }, { UINT64_C( 6502372639689346400), UINT64_C(13068549620184446805) } }, { { UINT32_C(3637228618), UINT32_C(3063501062), UINT32_C( 909121503), UINT32_C(3859358850) }, { UINT32_C(3211508797), UINT32_C( 856906545), UINT32_C(2901489784), UINT32_C(1971846699) }, { UINT64_C( 2637806753369225352), UINT64_C( 7610064008628936150) } }, { { UINT32_C(3092140978), UINT32_C(2205149092), UINT32_C(2176491518), UINT32_C( 23577539) }, { UINT32_C(2965426815), UINT32_C(2799948589), UINT32_C( 911398155), UINT32_C(2242697683) }, { UINT64_C( 1983650353878349290), UINT64_C( 52877292086142137) } }, { { UINT32_C(3493722668), UINT32_C(3746868449), UINT32_C( 257953355), UINT32_C(1360054225) }, { UINT32_C(3338784922), UINT32_C(2943215011), UINT32_C(2380644282), UINT32_C(3306328473) }, { UINT64_C( 614095179603466110), UINT64_C( 4496786008941448425) } }, { { UINT32_C(1821790091), UINT32_C(1196223228), UINT32_C(3377900792), UINT32_C( 236611188) }, { UINT32_C(3671399478), UINT32_C(3146334977), UINT32_C(2605215234), UINT32_C(2338413056) }, { UINT64_C( 8800158602259065328), UINT64_C( 553294691214870528) } }, { { UINT32_C(2784556969), UINT32_C(3656205537), UINT32_C(1688355824), UINT32_C(3765616042) }, { UINT32_C(3669641433), UINT32_C(2375369611), UINT32_C(2989088178), UINT32_C(3762129463) }, { UINT64_C( 5046644433775848672), UINT64_C(14166735057953645446) } }, { { UINT32_C(1652962689), UINT32_C(1782281081), UINT32_C(1624169910), UINT32_C(1933656474) }, { UINT32_C( 340654985), UINT32_C(4053918271), UINT32_C(4137929407), UINT32_C(3587694676) }, { UINT64_C( 6720700432553543370), UINT64_C( 6937369036982732424) } }, { { UINT32_C(2402770198), UINT32_C(2264494800), UINT32_C(3941058640), UINT32_C(2455644169) }, { UINT32_C(1655089955), UINT32_C(1297303438), UINT32_C(1715795474), UINT32_C(3980139479) }, { UINT64_C( 6762050577280595360), UINT64_C( 9773806303413047951) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint64x2_t r = simde_vmull_high_u32(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); simde_uint64x2_t r = simde_vmull_high_u32(a, b); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vmull_high_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vmull_high_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmull_high_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmull_high_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vmull_high_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmull_high_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/mull_n.c000066400000000000000000000233641400333146700167610ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN mull_n #include "test-neon.h" #include "../../../simde/arm/neon/mull_n.h" static int test_simde_vmull_n_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t b; int32_t r[4]; } test_vec[] = { { { INT16_C( 13839), INT16_C( 23928), -INT16_C( 18368), -INT16_C( 26754) }, INT16_C( 10433), { INT32_C( 144382287), INT32_C( 249640824), -INT32_C( 191633344), -INT32_C( 279124482) } }, { { INT16_C( 15140), -INT16_C( 7541), -INT16_C( 10338), INT16_C( 14722) }, -INT16_C( 2838), { -INT32_C( 42967320), INT32_C( 21401358), INT32_C( 29339244), -INT32_C( 41781036) } }, { { INT16_C( 30889), INT16_C( 11965), INT16_C( 10828), -INT16_C( 28246) }, -INT16_C( 4452), { -INT32_C( 137517828), -INT32_C( 53268180), -INT32_C( 48206256), INT32_C( 125751192) } }, { { -INT16_C( 21453), -INT16_C( 21724), INT16_C( 25865), -INT16_C( 30877) }, INT16_C( 9468), { -INT32_C( 203117004), -INT32_C( 205682832), INT32_C( 244889820), -INT32_C( 292343436) } }, { { INT16_C( 8368), INT16_C( 15200), -INT16_C( 509), -INT16_C( 31470) }, -INT16_C( 969), { -INT32_C( 8108592), -INT32_C( 14728800), INT32_C( 493221), INT32_C( 30494430) } }, { { -INT16_C( 7815), INT16_C( 14196), -INT16_C( 16369), -INT16_C( 18079) }, -INT16_C( 430), { INT32_C( 3360450), -INT32_C( 6104280), INT32_C( 7038670), INT32_C( 7773970) } }, { { -INT16_C( 31321), -INT16_C( 13142), -INT16_C( 19664), -INT16_C( 27855) }, INT16_C( 11578), { -INT32_C( 362634538), -INT32_C( 152158076), -INT32_C( 227669792), -INT32_C( 322505190) } }, { { -INT16_C( 5449), INT16_C( 5965), INT16_C( 20518), INT16_C( 14358) }, INT16_C( 19926), { -INT32_C( 108576774), INT32_C( 118858590), INT32_C( 408841668), INT32_C( 286097508) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); int16_t b = test_vec[i].b; simde_int32x4_t r = simde_vmull_n_s16(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); int16_t b = simde_test_codegen_random_i16(); simde_int32x4_t r = simde_vmull_n_s16(a, b); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmull_n_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t b; int64_t r[2]; } test_vec[] = { { { -INT32_C( 899636787), INT32_C( 823949875) }, -INT32_C( 936755039), { INT64_C( 842739293492019693), -INT64_C( 771839197289670125) } }, { { INT32_C( 676491476), INT32_C( 189564474) }, -INT32_C( 38164631), { -INT64_C( 25818047556185356), -INT64_C( 7234658200919094) } }, { { -INT32_C( 883789225), -INT32_C( 768140539) }, -INT32_C( 274884676), { INT64_C( 242940114766416100), INT64_C( 211150063185480364) } }, { { -INT32_C( 1289635567), -INT32_C( 847557639) }, -INT32_C( 168440389), { INT64_C( 217226716573715563), INT64_C( 142762938413081571) } }, { { -INT32_C( 1123990956), INT32_C( 1085979369) }, INT32_C( 839650349), { -INT64_C( 943759398478243644), INT64_C( 911842956187649781) } }, { { -INT32_C( 486194393), -INT32_C( 338451750) }, INT32_C( 1419703387), { -INT64_C( 690251826482509091), -INT64_C( 480501095811077250) } }, { { -INT32_C( 98428353), INT32_C( 1005590503) }, INT32_C( 1140388185), { -INT64_C( 112246530830209305), INT64_C( 1146763528569407055) } }, { { -INT32_C( 662457685), -INT32_C( 435449921) }, -INT32_C( 1396043566), { INT64_C( 924819788891504710), INT64_C( 607907060527258286) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); int32_t b = test_vec[i].b; simde_int64x2_t r = simde_vmull_n_s32(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); int32_t b = simde_test_codegen_random_i32(); simde_int64x2_t r = simde_vmull_n_s32(a, b); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmull_n_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t b; uint32_t r[4]; } test_vec[] = { { { UINT16_C(51213), UINT16_C(42159), UINT16_C(16985), UINT16_C(41215) }, UINT16_C(47519), { UINT32_C(2433590547), UINT32_C(2003353521), UINT32_C( 807110215), UINT32_C(1958495585) } }, { { UINT16_C( 7629), UINT16_C(55608), UINT16_C(36497), UINT16_C(36334) }, UINT16_C(19590), { UINT32_C( 149452110), UINT32_C(1089360720), UINT32_C( 714976230), UINT32_C( 711783060) } }, { { UINT16_C(25666), UINT16_C(13699), UINT16_C(51873), UINT16_C( 1864) }, UINT16_C(33004), { UINT32_C( 847080664), UINT32_C( 452121796), UINT32_C(1712016492), UINT32_C( 61519456) } }, { { UINT16_C(64206), UINT16_C(32072), UINT16_C(41374), UINT16_C(40383) }, UINT16_C(24129), { UINT32_C(1549226574), UINT32_C( 773865288), UINT32_C( 998313246), UINT32_C( 974401407) } }, { { UINT16_C( 3926), UINT16_C(36476), UINT16_C( 3560), UINT16_C(54813) }, UINT16_C(41882), { UINT32_C( 164428732), UINT32_C(1527687832), UINT32_C( 149099920), UINT32_C(2295678066) } }, { { UINT16_C(56354), UINT16_C(42248), UINT16_C(43281), UINT16_C(22896) }, UINT16_C(23728), { UINT32_C(1337167712), UINT32_C(1002460544), UINT32_C(1026971568), UINT32_C( 543276288) } }, { { UINT16_C(32473), UINT16_C( 8790), UINT16_C(62716), UINT16_C(48067) }, UINT16_C( 1425), { UINT32_C( 46274025), UINT32_C( 12525750), UINT32_C( 89370300), UINT32_C( 68495475) } }, { { UINT16_C(59162), UINT16_C(38420), UINT16_C(64630), UINT16_C(37795) }, UINT16_C(15827), { UINT32_C( 936356974), UINT32_C( 608073340), UINT32_C(1022899010), UINT32_C( 598181465) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); uint16_t b = test_vec[i].b; simde_uint32x4_t r = simde_vmull_n_u16(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); uint16_t b = simde_test_codegen_random_u16(); simde_uint32x4_t r = simde_vmull_n_u16(a, b); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmull_n_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint32_t b; uint64_t r[2]; } test_vec[] = { { { UINT32_C(1324541274), UINT32_C( 832398216) }, UINT32_C(4043693192), { UINT64_C( 5356038532196806608), UINT64_C( 3365962999072145472) } }, { { UINT32_C( 612019424), UINT32_C( 590534729) }, UINT32_C( 946752278), { UINT64_C( 579430783852247872), UINT64_C( 559090099918862662) } }, { { UINT32_C(2626716765), UINT32_C(2899880274) }, UINT32_C( 385534606), { UINT64_C( 1012690213067869590), UINT64_C( 1118004198883762044) } }, { { UINT32_C(3058145069), UINT32_C(1420250483) }, UINT32_C(1131946489), { UINT64_C( 3461656573707212741), UINT64_C( 1607647547732404187) } }, { { UINT32_C( 275163898), UINT32_C(1397347574) }, UINT32_C(2867845464), { UINT64_C( 789127536735858672), UINT64_C( 4007376901727304336) } }, { { UINT32_C( 290899842), UINT32_C(3207025042) }, UINT32_C(1551200232), { UINT64_C( 451243902399163344), UINT64_C( 4974737989180209744) } }, { { UINT32_C(3048217788), UINT32_C( 955787326) }, UINT32_C(3360186066), { UINT64_C(10242578937370942008), UINT64_C( 3211623254884599516) } }, { { UINT32_C(2317062450), UINT32_C(3979610731) }, UINT32_C(1694403538), { UINT64_C( 3926038813046948100), UINT64_C( 6743066502469166278) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); uint32_t b = test_vec[i].b; simde_uint64x2_t r = simde_vmull_n_u32(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); uint32_t b = simde_test_codegen_random_u32(); simde_uint64x2_t r = simde_vmull_n_u32(a, b); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vmull_n_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmull_n_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmull_n_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmull_n_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/mvn.c000066400000000000000000001031231400333146700162630ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN mvn #include "test-neon.h" /* Check that both of these work */ #if defined(__cplusplus) #include "../../../simde/arm/neon/mvn.h" #else #include "../../../simde/arm/neon.h" #endif static int test_simde_vmvn_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t r[8]; } test_vec[] = { { { INT8_C( 27), INT8_C( 6), INT8_C( 75), INT8_C( 101), INT8_C( 8), -INT8_C( 12), -INT8_C( 2), INT8_C( 29) }, { -INT8_C( 28), -INT8_C( 7), -INT8_C( 76), -INT8_C( 102), -INT8_C( 9), INT8_C( 11), INT8_C( 1), -INT8_C( 30) } }, { { INT8_C( 81), -INT8_C( 96), -INT8_C( 60), INT8_C( 86), -INT8_C( 57), INT8_C( 42), -INT8_C( 34), -INT8_C( 29) }, { -INT8_C( 82), INT8_C( 95), INT8_C( 59), -INT8_C( 87), INT8_C( 56), -INT8_C( 43), INT8_C( 33), INT8_C( 28) } }, { { INT8_C( 120), INT8_C( 82), -INT8_C( 78), INT8_C( 74), -INT8_C( 126), INT8_C( 123), INT8_C( 26), INT8_C( 61) }, { -INT8_C( 121), -INT8_C( 83), INT8_C( 77), -INT8_C( 75), INT8_C( 125), -INT8_C( 124), -INT8_C( 27), -INT8_C( 62) } }, { { -INT8_C( 113), INT8_C( 122), -INT8_C( 79), -INT8_C( 94), -INT8_C( 16), INT8_C( 93), -INT8_C( 97), INT8_C( 11) }, { INT8_C( 112), -INT8_C( 123), INT8_C( 78), INT8_C( 93), INT8_C( 15), -INT8_C( 94), INT8_C( 96), -INT8_C( 12) } }, { { INT8_C( 99), -INT8_C( 21), INT8_C( 112), INT8_C( 107), -INT8_C( 33), INT8_C( 111), -INT8_C( 120), INT8_C( 48) }, { -INT8_C( 100), INT8_C( 20), -INT8_C( 113), -INT8_C( 108), INT8_C( 32), -INT8_C( 112), INT8_C( 119), -INT8_C( 49) } }, { { INT8_C( 15), INT8_C( 76), -INT8_C( 122), -INT8_C( 42), INT8_C( 118), INT8_C( 100), -INT8_C( 71), -INT8_C( 17) }, { -INT8_C( 16), -INT8_C( 77), INT8_C( 121), INT8_C( 41), -INT8_C( 119), -INT8_C( 101), INT8_C( 70), INT8_C( 16) } }, { { -INT8_C( 74), INT8_C( 107), INT8_C( 57), INT8_C( 57), -INT8_C( 26), INT8_C( 83), INT8_C( 118), INT8_C( 117) }, { INT8_C( 73), -INT8_C( 108), -INT8_C( 58), -INT8_C( 58), INT8_C( 25), -INT8_C( 84), -INT8_C( 119), -INT8_C( 118) } }, { { -INT8_C( 51), INT8_C( 39), INT8_C( 23), -INT8_C( 67), -INT8_C( 124), -INT8_C( 73), -INT8_C( 55), -INT8_C( 25) }, { INT8_C( 50), -INT8_C( 40), -INT8_C( 24), INT8_C( 66), INT8_C( 123), INT8_C( 72), INT8_C( 54), INT8_C( 24) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t r = simde_vmvn_s8(a); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8_t r = simde_vmvn_s8(a); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmvn_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t r[4]; } test_vec[] = { { { INT16_C( 3761), INT16_C( 8161), -INT16_C( 11229), INT16_C( 25212) }, { -INT16_C( 3762), -INT16_C( 8162), INT16_C( 11228), -INT16_C( 25213) } }, { { INT16_C( 17586), INT16_C( 20930), -INT16_C( 21449), INT16_C( 5279) }, { -INT16_C( 17587), -INT16_C( 20931), INT16_C( 21448), -INT16_C( 5280) } }, { { -INT16_C( 1840), -INT16_C( 4596), -INT16_C( 25181), INT16_C( 2074) }, { INT16_C( 1839), INT16_C( 4595), INT16_C( 25180), -INT16_C( 2075) } }, { { -INT16_C( 28742), -INT16_C( 7187), -INT16_C( 21198), -INT16_C( 7217) }, { INT16_C( 28741), INT16_C( 7186), INT16_C( 21197), INT16_C( 7216) } }, { { -INT16_C( 20293), -INT16_C( 8702), INT16_C( 32388), INT16_C( 13889) }, { INT16_C( 20292), INT16_C( 8701), -INT16_C( 32389), -INT16_C( 13890) } }, { { INT16_C( 962), -INT16_C( 1656), INT16_C( 10159), INT16_C( 32526) }, { -INT16_C( 963), INT16_C( 1655), -INT16_C( 10160), -INT16_C( 32527) } }, { { INT16_C( 6687), -INT16_C( 15763), -INT16_C( 30792), INT16_C( 29386) }, { -INT16_C( 6688), INT16_C( 15762), INT16_C( 30791), -INT16_C( 29387) } }, { { -INT16_C( 18665), INT16_C( 18773), INT16_C( 9316), INT16_C( 7980) }, { INT16_C( 18664), -INT16_C( 18774), -INT16_C( 9317), -INT16_C( 7981) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t r = simde_vmvn_s16(a); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); simde_int16x4_t r = simde_vmvn_s16(a); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmvn_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t r[2]; } test_vec[] = { { { INT32_C( 793763496), INT32_C( 1077145611) }, { -INT32_C( 793763497), -INT32_C( 1077145612) } }, { { -INT32_C( 223545418), -INT32_C( 1691633505) }, { INT32_C( 223545417), INT32_C( 1691633504) } }, { { -INT32_C( 705535259), INT32_C( 702757249) }, { INT32_C( 705535258), -INT32_C( 702757250) } }, { { INT32_C( 332262688), INT32_C( 461678451) }, { -INT32_C( 332262689), -INT32_C( 461678452) } }, { { -INT32_C( 1857367162), INT32_C( 2060549571) }, { INT32_C( 1857367161), -INT32_C( 2060549572) } }, { { INT32_C( 325877108), INT32_C( 464426805) }, { -INT32_C( 325877109), -INT32_C( 464426806) } }, { { INT32_C( 2079367417), -INT32_C( 106638119) }, { -INT32_C( 2079367418), INT32_C( 106638118) } }, { { INT32_C( 873296577), -INT32_C( 1622175463) }, { -INT32_C( 873296578), INT32_C( 1622175462) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t r = simde_vmvn_s32(a); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_int32x2_t r = simde_vmvn_s32(a); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmvn_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C(245), UINT8_C(228), UINT8_C( 41), UINT8_C( 86), UINT8_C(199), UINT8_C(240), UINT8_C( 66), UINT8_C( 39) }, { UINT8_C( 10), UINT8_C( 27), UINT8_C(214), UINT8_C(169), UINT8_C( 56), UINT8_C( 15), UINT8_C(189), UINT8_C(216) } }, { { UINT8_C(222), UINT8_C(231), UINT8_C(111), UINT8_C( 66), UINT8_C(118), UINT8_C(193), UINT8_C( 32), UINT8_C( 66) }, { UINT8_C( 33), UINT8_C( 24), UINT8_C(144), UINT8_C(189), UINT8_C(137), UINT8_C( 62), UINT8_C(223), UINT8_C(189) } }, { { UINT8_C(126), UINT8_C( 35), UINT8_C(233), UINT8_C(186), UINT8_C( 84), UINT8_C( 74), UINT8_C( 40), UINT8_C(224) }, { UINT8_C(129), UINT8_C(220), UINT8_C( 22), UINT8_C( 69), UINT8_C(171), UINT8_C(181), UINT8_C(215), UINT8_C( 31) } }, { { UINT8_C(236), UINT8_C( 39), UINT8_C( 71), UINT8_C(236), UINT8_C( 45), UINT8_C(106), UINT8_C( 15), UINT8_C( 35) }, { UINT8_C( 19), UINT8_C(216), UINT8_C(184), UINT8_C( 19), UINT8_C(210), UINT8_C(149), UINT8_C(240), UINT8_C(220) } }, { { UINT8_C( 79), UINT8_C( 57), UINT8_C(121), UINT8_C( 22), UINT8_C( 41), UINT8_C(187), UINT8_C( 61), UINT8_C( 7) }, { UINT8_C(176), UINT8_C(198), UINT8_C(134), UINT8_C(233), UINT8_C(214), UINT8_C( 68), UINT8_C(194), UINT8_C(248) } }, { { UINT8_C(162), UINT8_C(173), UINT8_C( 73), UINT8_C( 24), UINT8_C(110), UINT8_C(106), UINT8_C( 91), UINT8_C(236) }, { UINT8_C( 93), UINT8_C( 82), UINT8_C(182), UINT8_C(231), UINT8_C(145), UINT8_C(149), UINT8_C(164), UINT8_C( 19) } }, { { UINT8_C(141), UINT8_C( 68), UINT8_C(167), UINT8_C(225), UINT8_C(143), UINT8_C(207), UINT8_C(193), UINT8_C(123) }, { UINT8_C(114), UINT8_C(187), UINT8_C( 88), UINT8_C( 30), UINT8_C(112), UINT8_C( 48), UINT8_C( 62), UINT8_C(132) } }, { { UINT8_C(246), UINT8_C( 9), UINT8_C(103), UINT8_C( 36), UINT8_C(115), UINT8_C(118), UINT8_C( 71), UINT8_C(194) }, { UINT8_C( 9), UINT8_C(246), UINT8_C(152), UINT8_C(219), UINT8_C(140), UINT8_C(137), UINT8_C(184), UINT8_C( 61) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t r = simde_vmvn_u8(a); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t r = simde_vmvn_u8(a); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmvn_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(48717), UINT16_C(17400), UINT16_C(59057), UINT16_C(50869) }, { UINT16_C(16818), UINT16_C(48135), UINT16_C( 6478), UINT16_C(14666) } }, { { UINT16_C(33816), UINT16_C(61995), UINT16_C(11273), UINT16_C(45334) }, { UINT16_C(31719), UINT16_C( 3540), UINT16_C(54262), UINT16_C(20201) } }, { { UINT16_C(57276), UINT16_C(34241), UINT16_C(52335), UINT16_C(56050) }, { UINT16_C( 8259), UINT16_C(31294), UINT16_C(13200), UINT16_C( 9485) } }, { { UINT16_C(52494), UINT16_C( 1151), UINT16_C(50587), UINT16_C(59614) }, { UINT16_C(13041), UINT16_C(64384), UINT16_C(14948), UINT16_C( 5921) } }, { { UINT16_C(55171), UINT16_C(13611), UINT16_C(57533), UINT16_C(54779) }, { UINT16_C(10364), UINT16_C(51924), UINT16_C( 8002), UINT16_C(10756) } }, { { UINT16_C( 9828), UINT16_C(28103), UINT16_C(56914), UINT16_C( 3870) }, { UINT16_C(55707), UINT16_C(37432), UINT16_C( 8621), UINT16_C(61665) } }, { { UINT16_C(57277), UINT16_C(11412), UINT16_C(34475), UINT16_C(47366) }, { UINT16_C( 8258), UINT16_C(54123), UINT16_C(31060), UINT16_C(18169) } }, { { UINT16_C(34388), UINT16_C(61373), UINT16_C(40011), UINT16_C(53207) }, { UINT16_C(31147), UINT16_C( 4162), UINT16_C(25524), UINT16_C(12328) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t r = simde_vmvn_u16(a); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t r = simde_vmvn_u16(a); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmvn_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C( 87345636), UINT32_C(2841930894) }, { UINT32_C(4207621659), UINT32_C(1453036401) } }, { { UINT32_C(2317135787), UINT32_C( 870366211) }, { UINT32_C(1977831508), UINT32_C(3424601084) } }, { { UINT32_C(3381487743), UINT32_C(3683874477) }, { UINT32_C( 913479552), UINT32_C( 611092818) } }, { { UINT32_C( 142579175), UINT32_C(3930029830) }, { UINT32_C(4152388120), UINT32_C( 364937465) } }, { { UINT32_C(3622859592), UINT32_C(2323666143) }, { UINT32_C( 672107703), UINT32_C(1971301152) } }, { { UINT32_C( 102014211), UINT32_C(3627677017) }, { UINT32_C(4192953084), UINT32_C( 667290278) } }, { { UINT32_C( 178374237), UINT32_C( 619000893) }, { UINT32_C(4116593058), UINT32_C(3675966402) } }, { { UINT32_C(3475793097), UINT32_C( 750414819) }, { UINT32_C( 819174198), UINT32_C(3544552476) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t r = simde_vmvn_u32(a); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t r = simde_vmvn_u32(a); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmvnq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t r[16]; } test_vec[] = { { { INT8_C( 69), INT8_C( 44), -INT8_C( 110), -INT8_C( 82), -INT8_C( 7), INT8_C( 68), -INT8_C( 120), -INT8_C( 25), INT8_C( 15), INT8_C( 54), INT8_C( 36), INT8_C( 121), INT8_C( 81), INT8_C( 4), INT8_C( 96), -INT8_C( 117) }, { -INT8_C( 70), -INT8_C( 45), INT8_C( 109), INT8_C( 81), INT8_C( 6), -INT8_C( 69), INT8_C( 119), INT8_C( 24), -INT8_C( 16), -INT8_C( 55), -INT8_C( 37), -INT8_C( 122), -INT8_C( 82), -INT8_C( 5), -INT8_C( 97), INT8_C( 116) } }, { { -INT8_C( 16), INT8_C( 35), -INT8_C( 107), INT8_C( 20), INT8_C( 50), -INT8_C( 76), -INT8_C( 12), -INT8_C( 40), -INT8_C( 51), INT8_C( 18), -INT8_C( 26), INT8_C( 18), INT8_C( 55), INT8_C( 80), -INT8_C( 35), INT8_C( 124) }, { INT8_C( 15), -INT8_C( 36), INT8_C( 106), -INT8_C( 21), -INT8_C( 51), INT8_C( 75), INT8_C( 11), INT8_C( 39), INT8_C( 50), -INT8_C( 19), INT8_C( 25), -INT8_C( 19), -INT8_C( 56), -INT8_C( 81), INT8_C( 34), -INT8_C( 125) } }, { { INT8_C( 124), INT8_C( 111), INT8_C( 43), INT8_C( 117), -INT8_C( 76), -INT8_C( 77), INT8_C( 93), -INT8_C( 61), -INT8_C( 23), -INT8_C( 127), INT8_C( 60), INT8_C( 58), -INT8_C( 123), -INT8_C( 100), -INT8_C( 59), INT8_C( 117) }, { -INT8_C( 125), -INT8_C( 112), -INT8_C( 44), -INT8_C( 118), INT8_C( 75), INT8_C( 76), -INT8_C( 94), INT8_C( 60), INT8_C( 22), INT8_C( 126), -INT8_C( 61), -INT8_C( 59), INT8_C( 122), INT8_C( 99), INT8_C( 58), -INT8_C( 118) } }, { { -INT8_C( 64), INT8_C( 91), -INT8_C( 118), -INT8_C( 14), INT8_C( 15), INT8_C( 126), -INT8_C( 54), -INT8_C( 36), -INT8_C( 111), -INT8_C( 79), -INT8_C( 18), -INT8_C( 56), INT8_C( 1), -INT8_C( 53), INT8_C( 68), INT8_C( 125) }, { INT8_C( 63), -INT8_C( 92), INT8_C( 117), INT8_C( 13), -INT8_C( 16), -INT8_C( 127), INT8_C( 53), INT8_C( 35), INT8_C( 110), INT8_C( 78), INT8_C( 17), INT8_C( 55), -INT8_C( 2), INT8_C( 52), -INT8_C( 69), -INT8_C( 126) } }, { { INT8_C( 59), INT8_C( 111), -INT8_C( 14), -INT8_C( 17), INT8_C( 34), INT8_C( 79), -INT8_C( 78), INT8_C( 11), -INT8_C( 48), -INT8_C( 17), INT8_C( 70), INT8_C( 86), -INT8_C( 117), INT8_C( 11), -INT8_C( 53), INT8_C( 75) }, { -INT8_C( 60), -INT8_C( 112), INT8_C( 13), INT8_C( 16), -INT8_C( 35), -INT8_C( 80), INT8_C( 77), -INT8_C( 12), INT8_C( 47), INT8_C( 16), -INT8_C( 71), -INT8_C( 87), INT8_C( 116), -INT8_C( 12), INT8_C( 52), -INT8_C( 76) } }, { { INT8_C( 102), INT8_C( 85), INT8_C( 61), INT8_C( 117), -INT8_C( 44), INT8_C( 8), INT8_C( 82), INT8_C( 101), -INT8_C( 71), INT8_C( 64), INT8_C( 45), -INT8_C( 70), INT8_C( 12), INT8_C( 113), INT8_C( 55), INT8_C( 71) }, { -INT8_C( 103), -INT8_C( 86), -INT8_C( 62), -INT8_C( 118), INT8_C( 43), -INT8_C( 9), -INT8_C( 83), -INT8_C( 102), INT8_C( 70), -INT8_C( 65), -INT8_C( 46), INT8_C( 69), -INT8_C( 13), -INT8_C( 114), -INT8_C( 56), -INT8_C( 72) } }, { { -INT8_C( 31), INT8_C( 41), INT8_C( 54), INT8_C( 3), INT8_C( 121), -INT8_C( 24), INT8_C( 15), INT8_C( 73), -INT8_C( 41), INT8_C( 85), -INT8_C( 97), INT8_C( 99), INT8_C( 96), INT8_C( 107), -INT8_C( 82), -INT8_C( 57) }, { INT8_C( 30), -INT8_C( 42), -INT8_C( 55), -INT8_C( 4), -INT8_C( 122), INT8_C( 23), -INT8_C( 16), -INT8_C( 74), INT8_C( 40), -INT8_C( 86), INT8_C( 96), -INT8_C( 100), -INT8_C( 97), -INT8_C( 108), INT8_C( 81), INT8_C( 56) } }, { { -INT8_C( 64), -INT8_C( 20), INT8_C( 60), -INT8_C( 108), -INT8_C( 12), -INT8_C( 114), -INT8_C( 7), -INT8_C( 83), -INT8_C( 49), INT8_C( 38), INT8_C( 103), -INT8_C( 37), -INT8_C( 104), -INT8_C( 98), INT8_C( 34), INT8_C( 121) }, { INT8_C( 63), INT8_C( 19), -INT8_C( 61), INT8_C( 107), INT8_C( 11), INT8_C( 113), INT8_C( 6), INT8_C( 82), INT8_C( 48), -INT8_C( 39), -INT8_C( 104), INT8_C( 36), INT8_C( 103), INT8_C( 97), -INT8_C( 35), -INT8_C( 122) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t r = simde_vmvnq_s8(a); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); simde_int8x16_t r = simde_vmvnq_s8(a); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmvnq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t r[8]; } test_vec[] = { { { -INT16_C( 16310), INT16_C( 26887), INT16_C( 16229), -INT16_C( 12679), -INT16_C( 20729), -INT16_C( 12653), -INT16_C( 3818), INT16_C( 21089) }, { INT16_C( 16309), -INT16_C( 26888), -INT16_C( 16230), INT16_C( 12678), INT16_C( 20728), INT16_C( 12652), INT16_C( 3817), -INT16_C( 21090) } }, { { INT16_C( 28555), -INT16_C( 26112), INT16_C( 22553), -INT16_C( 32323), -INT16_C( 6022), -INT16_C( 18686), INT16_C( 12944), -INT16_C( 9680) }, { -INT16_C( 28556), INT16_C( 26111), -INT16_C( 22554), INT16_C( 32322), INT16_C( 6021), INT16_C( 18685), -INT16_C( 12945), INT16_C( 9679) } }, { { INT16_C( 14322), INT16_C( 22596), -INT16_C( 17034), INT16_C( 32038), -INT16_C( 18067), -INT16_C( 31924), -INT16_C( 21078), INT16_C( 13781) }, { -INT16_C( 14323), -INT16_C( 22597), INT16_C( 17033), -INT16_C( 32039), INT16_C( 18066), INT16_C( 31923), INT16_C( 21077), -INT16_C( 13782) } }, { { -INT16_C( 10980), INT16_C( 13776), -INT16_C( 29395), -INT16_C( 22346), -INT16_C( 18315), INT16_C( 1631), -INT16_C( 28438), -INT16_C( 8736) }, { INT16_C( 10979), -INT16_C( 13777), INT16_C( 29394), INT16_C( 22345), INT16_C( 18314), -INT16_C( 1632), INT16_C( 28437), INT16_C( 8735) } }, { { INT16_C( 9415), INT16_C( 15925), INT16_C( 23522), INT16_C( 20411), INT16_C( 1812), -INT16_C( 16430), -INT16_C( 22604), -INT16_C( 11788) }, { -INT16_C( 9416), -INT16_C( 15926), -INT16_C( 23523), -INT16_C( 20412), -INT16_C( 1813), INT16_C( 16429), INT16_C( 22603), INT16_C( 11787) } }, { { -INT16_C( 15236), -INT16_C( 22266), -INT16_C( 17070), -INT16_C( 14511), -INT16_C( 20107), INT16_C( 24781), -INT16_C( 20927), INT16_C( 2109) }, { INT16_C( 15235), INT16_C( 22265), INT16_C( 17069), INT16_C( 14510), INT16_C( 20106), -INT16_C( 24782), INT16_C( 20926), -INT16_C( 2110) } }, { { INT16_C( 29394), -INT16_C( 19386), INT16_C( 717), -INT16_C( 7933), -INT16_C( 10999), -INT16_C( 16736), -INT16_C( 27268), -INT16_C( 1905) }, { -INT16_C( 29395), INT16_C( 19385), -INT16_C( 718), INT16_C( 7932), INT16_C( 10998), INT16_C( 16735), INT16_C( 27267), INT16_C( 1904) } }, { { -INT16_C( 27303), -INT16_C( 21598), -INT16_C( 3246), -INT16_C( 14221), INT16_C( 16548), -INT16_C( 6872), INT16_C( 26094), -INT16_C( 15890) }, { INT16_C( 27302), INT16_C( 21597), INT16_C( 3245), INT16_C( 14220), -INT16_C( 16549), INT16_C( 6871), -INT16_C( 26095), INT16_C( 15889) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t r = simde_vmvnq_s16(a); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int16x8_t r = simde_vmvnq_s16(a); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmvnq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t r[4]; } test_vec[] = { { { INT32_C( 1864677264), -INT32_C( 683572489), -INT32_C( 1030215016), INT32_C( 1509418569) }, { -INT32_C( 1864677265), INT32_C( 683572488), INT32_C( 1030215015), -INT32_C( 1509418570) } }, { { INT32_C( 202844593), INT32_C( 1440732261), INT32_C( 1701141465), INT32_C( 2111764460) }, { -INT32_C( 202844594), -INT32_C( 1440732262), -INT32_C( 1701141466), -INT32_C( 2111764461) } }, { { -INT32_C( 1511259474), INT32_C( 477900164), -INT32_C( 1579281321), -INT32_C( 1392847109) }, { INT32_C( 1511259473), -INT32_C( 477900165), INT32_C( 1579281320), INT32_C( 1392847108) } }, { { INT32_C( 1689784831), -INT32_C( 1078355995), -INT32_C( 551280909), -INT32_C( 1017380331) }, { -INT32_C( 1689784832), INT32_C( 1078355994), INT32_C( 551280908), INT32_C( 1017380330) } }, { { -INT32_C( 2006431740), -INT32_C( 844766090), -INT32_C( 210861064), INT32_C( 1486907481) }, { INT32_C( 2006431739), INT32_C( 844766089), INT32_C( 210861063), -INT32_C( 1486907482) } }, { { INT32_C( 1606178937), -INT32_C( 484542992), -INT32_C( 1463663981), INT32_C( 1231757125) }, { -INT32_C( 1606178938), INT32_C( 484542991), INT32_C( 1463663980), -INT32_C( 1231757126) } }, { { -INT32_C( 573385881), -INT32_C( 1330939977), INT32_C( 1419975162), -INT32_C( 72596606) }, { INT32_C( 573385880), INT32_C( 1330939976), -INT32_C( 1419975163), INT32_C( 72596605) } }, { { -INT32_C( 1940231780), INT32_C( 1919908062), INT32_C( 1716667), -INT32_C( 1203141040) }, { INT32_C( 1940231779), -INT32_C( 1919908063), -INT32_C( 1716668), INT32_C( 1203141039) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t r = simde_vmvnq_s32(a); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int32x4_t r = simde_vmvnq_s32(a); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmvnq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C( 18), UINT8_C(204), UINT8_C( 53), UINT8_C( 76), UINT8_C(185), UINT8_C( 18), UINT8_C( 45), UINT8_C(197), UINT8_C(231), UINT8_C(109), UINT8_C(114), UINT8_C(202), UINT8_C(183), UINT8_C( 33), UINT8_C(169), UINT8_C( 35) }, { UINT8_C(237), UINT8_C( 51), UINT8_C(202), UINT8_C(179), UINT8_C( 70), UINT8_C(237), UINT8_C(210), UINT8_C( 58), UINT8_C( 24), UINT8_C(146), UINT8_C(141), UINT8_C( 53), UINT8_C( 72), UINT8_C(222), UINT8_C( 86), UINT8_C(220) } }, { { UINT8_C( 8), UINT8_C(155), UINT8_C( 61), UINT8_C(185), UINT8_C( 53), UINT8_C( 38), UINT8_C(194), UINT8_C( 57), UINT8_C(191), UINT8_C( 15), UINT8_C( 48), UINT8_C(119), UINT8_C( 78), UINT8_C(235), UINT8_C( 20), UINT8_C( 96) }, { UINT8_C(247), UINT8_C(100), UINT8_C(194), UINT8_C( 70), UINT8_C(202), UINT8_C(217), UINT8_C( 61), UINT8_C(198), UINT8_C( 64), UINT8_C(240), UINT8_C(207), UINT8_C(136), UINT8_C(177), UINT8_C( 20), UINT8_C(235), UINT8_C(159) } }, { { UINT8_C(183), UINT8_C( 74), UINT8_C(172), UINT8_C(112), UINT8_C( 92), UINT8_C(218), UINT8_C( 53), UINT8_C( 68), UINT8_C( 71), UINT8_C(167), UINT8_C( 14), UINT8_C(254), UINT8_C(201), UINT8_C(183), UINT8_C( 33), UINT8_C(209) }, { UINT8_C( 72), UINT8_C(181), UINT8_C( 83), UINT8_C(143), UINT8_C(163), UINT8_C( 37), UINT8_C(202), UINT8_C(187), UINT8_C(184), UINT8_C( 88), UINT8_C(241), UINT8_C( 1), UINT8_C( 54), UINT8_C( 72), UINT8_C(222), UINT8_C( 46) } }, { { UINT8_C( 82), UINT8_C( 94), UINT8_C(138), UINT8_C(135), UINT8_C(132), UINT8_C( 77), UINT8_C(192), UINT8_C( 67), UINT8_C( 92), UINT8_C(240), UINT8_C(187), UINT8_C(170), UINT8_C(219), UINT8_C(207), UINT8_C( 10), UINT8_C(146) }, { UINT8_C(173), UINT8_C(161), UINT8_C(117), UINT8_C(120), UINT8_C(123), UINT8_C(178), UINT8_C( 63), UINT8_C(188), UINT8_C(163), UINT8_C( 15), UINT8_C( 68), UINT8_C( 85), UINT8_C( 36), UINT8_C( 48), UINT8_C(245), UINT8_C(109) } }, { { UINT8_C( 25), UINT8_C(183), UINT8_C( 2), UINT8_C(118), UINT8_C(145), UINT8_C( 55), UINT8_C(186), UINT8_C(216), UINT8_C(223), UINT8_C(200), UINT8_C(214), UINT8_C(168), UINT8_C(127), UINT8_C(247), UINT8_C(121), UINT8_C(209) }, { UINT8_C(230), UINT8_C( 72), UINT8_C(253), UINT8_C(137), UINT8_C(110), UINT8_C(200), UINT8_C( 69), UINT8_C( 39), UINT8_C( 32), UINT8_C( 55), UINT8_C( 41), UINT8_C( 87), UINT8_C(128), UINT8_C( 8), UINT8_C(134), UINT8_C( 46) } }, { { UINT8_C( 85), UINT8_C( 4), UINT8_C( 88), UINT8_C(217), UINT8_C( 81), UINT8_C( 24), UINT8_C( 28), UINT8_C(173), UINT8_C( 8), UINT8_C(215), UINT8_C( 88), UINT8_C(228), UINT8_C(167), UINT8_C( 98), UINT8_C(118), UINT8_C(192) }, { UINT8_C(170), UINT8_C(251), UINT8_C(167), UINT8_C( 38), UINT8_C(174), UINT8_C(231), UINT8_C(227), UINT8_C( 82), UINT8_C(247), UINT8_C( 40), UINT8_C(167), UINT8_C( 27), UINT8_C( 88), UINT8_C(157), UINT8_C(137), UINT8_C( 63) } }, { { UINT8_C( 25), UINT8_C(121), UINT8_C( 54), UINT8_C(170), UINT8_C(176), UINT8_C(240), UINT8_C(130), UINT8_C(143), UINT8_C(184), UINT8_C( 88), UINT8_C( 55), UINT8_C( 55), UINT8_C( 79), UINT8_C(177), UINT8_C( 8), UINT8_C(164) }, { UINT8_C(230), UINT8_C(134), UINT8_C(201), UINT8_C( 85), UINT8_C( 79), UINT8_C( 15), UINT8_C(125), UINT8_C(112), UINT8_C( 71), UINT8_C(167), UINT8_C(200), UINT8_C(200), UINT8_C(176), UINT8_C( 78), UINT8_C(247), UINT8_C( 91) } }, { { UINT8_C(181), UINT8_C( 96), UINT8_C(125), UINT8_C( 6), UINT8_C(120), UINT8_C(154), UINT8_C(179), UINT8_C(129), UINT8_C(113), UINT8_C( 11), UINT8_C(101), UINT8_C( 24), UINT8_C(110), UINT8_C(219), UINT8_C(217), UINT8_C(135) }, { UINT8_C( 74), UINT8_C(159), UINT8_C(130), UINT8_C(249), UINT8_C(135), UINT8_C(101), UINT8_C( 76), UINT8_C(126), UINT8_C(142), UINT8_C(244), UINT8_C(154), UINT8_C(231), UINT8_C(145), UINT8_C( 36), UINT8_C( 38), UINT8_C(120) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t r = simde_vmvnq_u8(a); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t r = simde_vmvnq_u8(a); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmvnq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(47575), UINT16_C(62789), UINT16_C(44550), UINT16_C(31586), UINT16_C( 2213), UINT16_C(35014), UINT16_C(61130), UINT16_C( 2225) }, { UINT16_C(17960), UINT16_C( 2746), UINT16_C(20985), UINT16_C(33949), UINT16_C(63322), UINT16_C(30521), UINT16_C( 4405), UINT16_C(63310) } }, { { UINT16_C(54992), UINT16_C(34469), UINT16_C(53736), UINT16_C(23984), UINT16_C(14961), UINT16_C(52228), UINT16_C(42793), UINT16_C( 164) }, { UINT16_C(10543), UINT16_C(31066), UINT16_C(11799), UINT16_C(41551), UINT16_C(50574), UINT16_C(13307), UINT16_C(22742), UINT16_C(65371) } }, { { UINT16_C(60000), UINT16_C(26614), UINT16_C(22680), UINT16_C(16098), UINT16_C(43104), UINT16_C(10950), UINT16_C(30615), UINT16_C(26418) }, { UINT16_C( 5535), UINT16_C(38921), UINT16_C(42855), UINT16_C(49437), UINT16_C(22431), UINT16_C(54585), UINT16_C(34920), UINT16_C(39117) } }, { { UINT16_C(55374), UINT16_C(14061), UINT16_C(40361), UINT16_C( 7060), UINT16_C(39127), UINT16_C( 487), UINT16_C(35903), UINT16_C(40705) }, { UINT16_C(10161), UINT16_C(51474), UINT16_C(25174), UINT16_C(58475), UINT16_C(26408), UINT16_C(65048), UINT16_C(29632), UINT16_C(24830) } }, { { UINT16_C(63350), UINT16_C( 3590), UINT16_C(59471), UINT16_C(44876), UINT16_C( 5009), UINT16_C(10458), UINT16_C( 3210), UINT16_C(55439) }, { UINT16_C( 2185), UINT16_C(61945), UINT16_C( 6064), UINT16_C(20659), UINT16_C(60526), UINT16_C(55077), UINT16_C(62325), UINT16_C(10096) } }, { { UINT16_C(31972), UINT16_C(36367), UINT16_C(41753), UINT16_C(61865), UINT16_C(36923), UINT16_C(31474), UINT16_C(62236), UINT16_C(37401) }, { UINT16_C(33563), UINT16_C(29168), UINT16_C(23782), UINT16_C( 3670), UINT16_C(28612), UINT16_C(34061), UINT16_C( 3299), UINT16_C(28134) } }, { { UINT16_C( 8427), UINT16_C(15009), UINT16_C(60680), UINT16_C(39402), UINT16_C(50176), UINT16_C(35777), UINT16_C(20688), UINT16_C(46435) }, { UINT16_C(57108), UINT16_C(50526), UINT16_C( 4855), UINT16_C(26133), UINT16_C(15359), UINT16_C(29758), UINT16_C(44847), UINT16_C(19100) } }, { { UINT16_C(29388), UINT16_C(58947), UINT16_C(60437), UINT16_C(20695), UINT16_C(51580), UINT16_C(39370), UINT16_C(58556), UINT16_C(42795) }, { UINT16_C(36147), UINT16_C( 6588), UINT16_C( 5098), UINT16_C(44840), UINT16_C(13955), UINT16_C(26165), UINT16_C( 6979), UINT16_C(22740) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t r = simde_vmvnq_u16(a); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t r = simde_vmvnq_u16(a); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vmvnq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(1700826709), UINT32_C(4200405802), UINT32_C(4150461492), UINT32_C(3304330719) }, { UINT32_C(2594140586), UINT32_C( 94561493), UINT32_C( 144505803), UINT32_C( 990636576) } }, { { UINT32_C( 799921039), UINT32_C(2967516902), UINT32_C(2873095487), UINT32_C( 346815679) }, { UINT32_C(3495046256), UINT32_C(1327450393), UINT32_C(1421871808), UINT32_C(3948151616) } }, { { UINT32_C(3044608907), UINT32_C(1538315815), UINT32_C(3126006747), UINT32_C(3095283497) }, { UINT32_C(1250358388), UINT32_C(2756651480), UINT32_C(1168960548), UINT32_C(1199683798) } }, { { UINT32_C( 15150106), UINT32_C( 565233634), UINT32_C(2093805500), UINT32_C(2005956588) }, { UINT32_C(4279817189), UINT32_C(3729733661), UINT32_C(2201161795), UINT32_C(2289010707) } }, { { UINT32_C(2855012995), UINT32_C(3137723616), UINT32_C( 427120880), UINT32_C(3134321824) }, { UINT32_C(1439954300), UINT32_C(1157243679), UINT32_C(3867846415), UINT32_C(1160645471) } }, { { UINT32_C( 45857056), UINT32_C(1025731456), UINT32_C(1203367771), UINT32_C(3921561958) }, { UINT32_C(4249110239), UINT32_C(3269235839), UINT32_C(3091599524), UINT32_C( 373405337) } }, { { UINT32_C( 882109011), UINT32_C(3085932999), UINT32_C(2446353905), UINT32_C(2035065177) }, { UINT32_C(3412858284), UINT32_C(1209034296), UINT32_C(1848613390), UINT32_C(2259902118) } }, { { UINT32_C(3682273114), UINT32_C(3440942706), UINT32_C(4078227853), UINT32_C(1860031002) }, { UINT32_C( 612694181), UINT32_C( 854024589), UINT32_C( 216739442), UINT32_C(2434936293) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t r = simde_vmvnq_u32(a); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t r = simde_vmvnq_u32(a); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vmvn_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vmvn_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmvn_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmvn_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vmvn_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmvn_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vmvnq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vmvnq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vmvnq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vmvnq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vmvnq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vmvnq_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/neg.c000066400000000000000000000706421400333146700162450ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN neg #include "test-neon.h" #include "../../../simde/arm/neon/neg.h" static int test_simde_vneg_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; simde_float32 r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( 716.64), SIMDE_FLOAT32_C( -987.31) }, { SIMDE_FLOAT32_C( -716.64), SIMDE_FLOAT32_C( 987.31) } }, { { SIMDE_FLOAT32_C( -83.05), SIMDE_FLOAT32_C( -486.69) }, { SIMDE_FLOAT32_C( 83.05), SIMDE_FLOAT32_C( 486.69) } }, { { SIMDE_FLOAT32_C( 36.35), SIMDE_FLOAT32_C( 515.69) }, { SIMDE_FLOAT32_C( -36.35), SIMDE_FLOAT32_C( -515.69) } }, { { SIMDE_FLOAT32_C( -864.35), SIMDE_FLOAT32_C( 418.22) }, { SIMDE_FLOAT32_C( 864.35), SIMDE_FLOAT32_C( -418.22) } }, { { SIMDE_FLOAT32_C( -825.75), SIMDE_FLOAT32_C( -563.01) }, { SIMDE_FLOAT32_C( 825.75), SIMDE_FLOAT32_C( 563.01) } }, { { SIMDE_FLOAT32_C( -635.51), SIMDE_FLOAT32_C( -778.46) }, { SIMDE_FLOAT32_C( 635.51), SIMDE_FLOAT32_C( 778.46) } }, { { SIMDE_FLOAT32_C( 755.42), SIMDE_FLOAT32_C( 443.43) }, { SIMDE_FLOAT32_C( -755.42), SIMDE_FLOAT32_C( -443.43) } }, { { SIMDE_FLOAT32_C( -286.48), SIMDE_FLOAT32_C( -693.77) }, { SIMDE_FLOAT32_C( 286.48), SIMDE_FLOAT32_C( 693.77) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t r = simde_vneg_f32(a); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vneg_f32(a); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vneg_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[1]; simde_float64 r[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( -67.79) }, { SIMDE_FLOAT64_C( 67.79) } }, { { SIMDE_FLOAT64_C( -759.84) }, { SIMDE_FLOAT64_C( 759.84) } }, { { SIMDE_FLOAT64_C( 497.94) }, { SIMDE_FLOAT64_C( -497.94) } }, { { SIMDE_FLOAT64_C( 813.37) }, { SIMDE_FLOAT64_C( -813.37) } }, { { SIMDE_FLOAT64_C( -277.49) }, { SIMDE_FLOAT64_C( 277.49) } }, { { SIMDE_FLOAT64_C( 513.19) }, { SIMDE_FLOAT64_C( -513.19) } }, { { SIMDE_FLOAT64_C( -893.67) }, { SIMDE_FLOAT64_C( 893.67) } }, { { SIMDE_FLOAT64_C( 120.57) }, { SIMDE_FLOAT64_C( -120.57) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_t r = simde_vneg_f64(a); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x1_t a = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_float64x1_t r = simde_vneg_f64(a); simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vneg_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t r[8]; } test_vec[] = { { { -INT8_C( 126), INT8_C( 53), -INT8_C( 7), INT8_C( 9), -INT8_C( 2), INT8_C( 28), INT8_C( 18), INT8_C( 47) }, { INT8_C( 126), -INT8_C( 53), INT8_C( 7), -INT8_C( 9), INT8_C( 2), -INT8_C( 28), -INT8_C( 18), -INT8_C( 47) } }, { { INT8_C( 2), INT8_C( 90), -INT8_C( 75), -INT8_C( 54), INT8_C( 108), INT8_C( 22), -INT8_C( 115), INT8_C( 23) }, { -INT8_C( 2), -INT8_C( 90), INT8_C( 75), INT8_C( 54), -INT8_C( 108), -INT8_C( 22), INT8_C( 115), -INT8_C( 23) } }, { { INT8_C( 76), -INT8_C( 50), -INT8_C( 98), -INT8_C( 115), INT8_C( 78), INT8_C( 40), -INT8_C( 89), -INT8_C( 13) }, { -INT8_C( 76), INT8_C( 50), INT8_C( 98), INT8_C( 115), -INT8_C( 78), -INT8_C( 40), INT8_C( 89), INT8_C( 13) } }, { { -INT8_C( 61), INT8_MAX, INT8_C( 55), INT8_C( 119), INT8_C( 116), -INT8_C( 71), INT8_C( 60), -INT8_C( 9) }, { INT8_C( 61), -INT8_C( 127), -INT8_C( 55), -INT8_C( 119), -INT8_C( 116), INT8_C( 71), -INT8_C( 60), INT8_C( 9) } }, { { -INT8_C( 18), INT8_C( 53), INT8_C( 0), -INT8_C( 19), INT8_C( 81), INT8_C( 18), INT8_C( 28), INT8_C( 84) }, { INT8_C( 18), -INT8_C( 53), INT8_C( 0), INT8_C( 19), -INT8_C( 81), -INT8_C( 18), -INT8_C( 28), -INT8_C( 84) } }, { { INT8_C( 108), -INT8_C( 46), INT8_C( 30), -INT8_C( 40), -INT8_C( 24), -INT8_C( 85), -INT8_C( 17), INT8_C( 53) }, { -INT8_C( 108), INT8_C( 46), -INT8_C( 30), INT8_C( 40), INT8_C( 24), INT8_C( 85), INT8_C( 17), -INT8_C( 53) } }, { { INT8_C( 121), -INT8_C( 115), -INT8_C( 62), -INT8_C( 57), -INT8_C( 75), INT8_C( 105), -INT8_C( 70), INT8_C( 121) }, { -INT8_C( 121), INT8_C( 115), INT8_C( 62), INT8_C( 57), INT8_C( 75), -INT8_C( 105), INT8_C( 70), -INT8_C( 121) } }, { { -INT8_C( 24), -INT8_C( 14), -INT8_C( 16), INT8_C( 92), -INT8_C( 85), INT8_C( 44), INT8_C( 83), -INT8_C( 103) }, { INT8_C( 24), INT8_C( 14), INT8_C( 16), -INT8_C( 92), INT8_C( 85), -INT8_C( 44), -INT8_C( 83), INT8_C( 103) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t r = simde_vneg_s8(a); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8_t r = simde_vneg_s8(a); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vneg_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t r[4]; } test_vec[] = { { { -INT16_C( 16602), -INT16_C( 23072), INT16_C( 1303), -INT16_C( 18923) }, { INT16_C( 16602), INT16_C( 23072), -INT16_C( 1303), INT16_C( 18923) } }, { { INT16_C( 27322), INT16_C( 20477), -INT16_C( 2392), -INT16_C( 16976) }, { -INT16_C( 27322), -INT16_C( 20477), INT16_C( 2392), INT16_C( 16976) } }, { { INT16_C( 6729), -INT16_C( 21704), -INT16_C( 14531), -INT16_C( 3275) }, { -INT16_C( 6729), INT16_C( 21704), INT16_C( 14531), INT16_C( 3275) } }, { { -INT16_C( 20184), INT16_C( 9579), -INT16_C( 26844), INT16_C( 19022) }, { INT16_C( 20184), -INT16_C( 9579), INT16_C( 26844), -INT16_C( 19022) } }, { { INT16_C( 11862), INT16_C( 28143), INT16_C( 1075), -INT16_C( 4572) }, { -INT16_C( 11862), -INT16_C( 28143), -INT16_C( 1075), INT16_C( 4572) } }, { { INT16_C( 8559), INT16_C( 5949), -INT16_C( 4584), INT16_C( 25044) }, { -INT16_C( 8559), -INT16_C( 5949), INT16_C( 4584), -INT16_C( 25044) } }, { { INT16_C( 3336), INT16_C( 17676), INT16_C( 16852), -INT16_C( 712) }, { -INT16_C( 3336), -INT16_C( 17676), -INT16_C( 16852), INT16_C( 712) } }, { { -INT16_C( 23565), INT16_C( 5922), INT16_C( 28730), -INT16_C( 28319) }, { INT16_C( 23565), -INT16_C( 5922), -INT16_C( 28730), INT16_C( 28319) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t r = simde_vneg_s16(a); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); simde_int16x4_t r = simde_vneg_s16(a); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vneg_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t r[2]; } test_vec[] = { { { -INT32_C( 1514105162), -INT32_C( 1586606905) }, { INT32_C( 1514105162), INT32_C( 1586606905) } }, { { INT32_C( 302620279), -INT32_C( 491707108) }, { -INT32_C( 302620279), INT32_C( 491707108) } }, { { -INT32_C( 876535401), -INT32_C( 2036143544) }, { INT32_C( 876535401), INT32_C( 2036143544) } }, { { -INT32_C( 731879282), -INT32_C( 456778194) }, { INT32_C( 731879282), INT32_C( 456778194) } }, { { INT32_C( 2089387701), INT32_C( 1243477971) }, { -INT32_C( 2089387701), -INT32_C( 1243477971) } }, { { -INT32_C( 1319360619), -INT32_C( 476901812) }, { INT32_C( 1319360619), INT32_C( 476901812) } }, { { INT32_C( 2007913775), -INT32_C( 838971329) }, { -INT32_C( 2007913775), INT32_C( 838971329) } }, { { -INT32_C( 408789319), INT32_C( 852191357) }, { INT32_C( 408789319), -INT32_C( 852191357) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t r = simde_vneg_s32(a); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_int32x2_t r = simde_vneg_s32(a); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vneg_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[1]; int64_t r[1]; } test_vec[] = { { { -INT64_C( 876024735620259410) }, { INT64_C( 876024735620259410) } }, { { -INT64_C( 5592594344589837578) }, { INT64_C( 5592594344589837578) } }, { { -INT64_C( 8278041931419543793) }, { INT64_C( 8278041931419543793) } }, { { INT64_C( 604013801299550026) }, { -INT64_C( 604013801299550026) } }, { { -INT64_C( 424514824547121404) }, { INT64_C( 424514824547121404) } }, { { INT64_C( 2653581244415604749) }, { -INT64_C( 2653581244415604749) } }, { { -INT64_C( 8302618725080709503) }, { INT64_C( 8302618725080709503) } }, { { -INT64_C( 7913371491563099189) }, { INT64_C( 7913371491563099189) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t r = simde_vneg_s64(a); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_t a = simde_test_arm_neon_random_i64x1(); simde_int64x1_t r = simde_vneg_s64(a); simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vnegq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 804.42), SIMDE_FLOAT32_C( -601.06), SIMDE_FLOAT32_C( -462.25), SIMDE_FLOAT32_C( -337.95) }, { SIMDE_FLOAT32_C( -804.42), SIMDE_FLOAT32_C( 601.06), SIMDE_FLOAT32_C( 462.25), SIMDE_FLOAT32_C( 337.95) } }, { { SIMDE_FLOAT32_C( -866.80), SIMDE_FLOAT32_C( -244.09), SIMDE_FLOAT32_C( 711.44), SIMDE_FLOAT32_C( 906.78) }, { SIMDE_FLOAT32_C( 866.80), SIMDE_FLOAT32_C( 244.09), SIMDE_FLOAT32_C( -711.44), SIMDE_FLOAT32_C( -906.78) } }, { { SIMDE_FLOAT32_C( -339.26), SIMDE_FLOAT32_C( -922.22), SIMDE_FLOAT32_C( -188.81), SIMDE_FLOAT32_C( 579.99) }, { SIMDE_FLOAT32_C( 339.26), SIMDE_FLOAT32_C( 922.22), SIMDE_FLOAT32_C( 188.81), SIMDE_FLOAT32_C( -579.99) } }, { { SIMDE_FLOAT32_C( -134.89), SIMDE_FLOAT32_C( -740.85), SIMDE_FLOAT32_C( -696.51), SIMDE_FLOAT32_C( -24.34) }, { SIMDE_FLOAT32_C( 134.89), SIMDE_FLOAT32_C( 740.85), SIMDE_FLOAT32_C( 696.51), SIMDE_FLOAT32_C( 24.34) } }, { { SIMDE_FLOAT32_C( 818.56), SIMDE_FLOAT32_C( 209.57), SIMDE_FLOAT32_C( 842.16), SIMDE_FLOAT32_C( 742.03) }, { SIMDE_FLOAT32_C( -818.56), SIMDE_FLOAT32_C( -209.57), SIMDE_FLOAT32_C( -842.16), SIMDE_FLOAT32_C( -742.03) } }, { { SIMDE_FLOAT32_C( 92.41), SIMDE_FLOAT32_C( -593.00), SIMDE_FLOAT32_C( 705.50), SIMDE_FLOAT32_C( -603.00) }, { SIMDE_FLOAT32_C( -92.41), SIMDE_FLOAT32_C( 593.00), SIMDE_FLOAT32_C( -705.50), SIMDE_FLOAT32_C( 603.00) } }, { { SIMDE_FLOAT32_C( -438.51), SIMDE_FLOAT32_C( 671.01), SIMDE_FLOAT32_C( -393.07), SIMDE_FLOAT32_C( 195.29) }, { SIMDE_FLOAT32_C( 438.51), SIMDE_FLOAT32_C( -671.01), SIMDE_FLOAT32_C( 393.07), SIMDE_FLOAT32_C( -195.29) } }, { { SIMDE_FLOAT32_C( 829.18), SIMDE_FLOAT32_C( 342.47), SIMDE_FLOAT32_C( 267.28), SIMDE_FLOAT32_C( 633.59) }, { SIMDE_FLOAT32_C( -829.18), SIMDE_FLOAT32_C( -342.47), SIMDE_FLOAT32_C( -267.28), SIMDE_FLOAT32_C( -633.59) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t r = simde_vnegq_f32(a); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vnegq_f32(a); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vnegq_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[2]; simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 29.13), SIMDE_FLOAT64_C( -879.83) }, { SIMDE_FLOAT64_C( -29.13), SIMDE_FLOAT64_C( 879.83) } }, { { SIMDE_FLOAT64_C( 870.39), SIMDE_FLOAT64_C( -154.06) }, { SIMDE_FLOAT64_C( -870.39), SIMDE_FLOAT64_C( 154.06) } }, { { SIMDE_FLOAT64_C( -918.42), SIMDE_FLOAT64_C( 809.31) }, { SIMDE_FLOAT64_C( 918.42), SIMDE_FLOAT64_C( -809.31) } }, { { SIMDE_FLOAT64_C( 998.46), SIMDE_FLOAT64_C( -21.80) }, { SIMDE_FLOAT64_C( -998.46), SIMDE_FLOAT64_C( 21.80) } }, { { SIMDE_FLOAT64_C( -937.13), SIMDE_FLOAT64_C( -287.08) }, { SIMDE_FLOAT64_C( 937.13), SIMDE_FLOAT64_C( 287.08) } }, { { SIMDE_FLOAT64_C( 803.74), SIMDE_FLOAT64_C( -923.43) }, { SIMDE_FLOAT64_C( -803.74), SIMDE_FLOAT64_C( 923.43) } }, { { SIMDE_FLOAT64_C( -658.03), SIMDE_FLOAT64_C( 943.52) }, { SIMDE_FLOAT64_C( 658.03), SIMDE_FLOAT64_C( -943.52) } }, { { SIMDE_FLOAT64_C( 715.86), SIMDE_FLOAT64_C( -973.82) }, { SIMDE_FLOAT64_C( -715.86), SIMDE_FLOAT64_C( 973.82) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t r = simde_vnegq_f64(a); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x2_t a = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); simde_float64x2_t r = simde_vnegq_f64(a); simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vnegq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t r[16]; } test_vec[] = { { { INT8_C( 33), -INT8_C( 124), INT8_C( 14), -INT8_C( 53), -INT8_C( 28), -INT8_C( 98), -INT8_C( 12), INT8_C( 95), -INT8_C( 17), INT8_C( 54), INT8_C( 88), INT8_C( 35), INT8_C( 84), INT8_C( 68), INT8_C( 14), -INT8_C( 64) }, { -INT8_C( 33), INT8_C( 124), -INT8_C( 14), INT8_C( 53), INT8_C( 28), INT8_C( 98), INT8_C( 12), -INT8_C( 95), INT8_C( 17), -INT8_C( 54), -INT8_C( 88), -INT8_C( 35), -INT8_C( 84), -INT8_C( 68), -INT8_C( 14), INT8_C( 64) } }, { { INT8_C( 93), INT8_C( 100), INT8_MIN, -INT8_C( 18), -INT8_C( 20), INT8_C( 22), -INT8_C( 77), -INT8_C( 95), INT8_C( 14), INT8_C( 36), INT8_C( 106), INT8_C( 50), -INT8_C( 110), INT8_C( 71), -INT8_C( 18), -INT8_C( 77) }, { -INT8_C( 93), -INT8_C( 100), INT8_MIN, INT8_C( 18), INT8_C( 20), -INT8_C( 22), INT8_C( 77), INT8_C( 95), -INT8_C( 14), -INT8_C( 36), -INT8_C( 106), -INT8_C( 50), INT8_C( 110), -INT8_C( 71), INT8_C( 18), INT8_C( 77) } }, { { -INT8_C( 53), -INT8_C( 4), INT8_MAX, -INT8_C( 81), -INT8_C( 101), INT8_C( 115), INT8_C( 14), -INT8_C( 118), -INT8_C( 86), INT8_C( 103), -INT8_C( 82), -INT8_C( 2), -INT8_C( 85), -INT8_C( 68), -INT8_C( 66), INT8_C( 9) }, { INT8_C( 53), INT8_C( 4), -INT8_C( 127), INT8_C( 81), INT8_C( 101), -INT8_C( 115), -INT8_C( 14), INT8_C( 118), INT8_C( 86), -INT8_C( 103), INT8_C( 82), INT8_C( 2), INT8_C( 85), INT8_C( 68), INT8_C( 66), -INT8_C( 9) } }, { { INT8_C( 32), INT8_C( 63), -INT8_C( 9), INT8_C( 13), INT8_C( 85), -INT8_C( 86), -INT8_C( 82), INT8_C( 99), -INT8_C( 49), INT8_C( 24), -INT8_C( 106), INT8_C( 97), INT8_C( 95), -INT8_C( 124), INT8_C( 20), INT8_C( 43) }, { -INT8_C( 32), -INT8_C( 63), INT8_C( 9), -INT8_C( 13), -INT8_C( 85), INT8_C( 86), INT8_C( 82), -INT8_C( 99), INT8_C( 49), -INT8_C( 24), INT8_C( 106), -INT8_C( 97), -INT8_C( 95), INT8_C( 124), -INT8_C( 20), -INT8_C( 43) } }, { { -INT8_C( 127), -INT8_C( 109), -INT8_C( 38), INT8_C( 28), INT8_C( 7), -INT8_C( 23), -INT8_C( 90), -INT8_C( 79), INT8_C( 80), INT8_C( 84), -INT8_C( 81), -INT8_C( 5), INT8_C( 16), INT8_C( 110), INT8_C( 4), INT8_C( 49) }, { INT8_MAX, INT8_C( 109), INT8_C( 38), -INT8_C( 28), -INT8_C( 7), INT8_C( 23), INT8_C( 90), INT8_C( 79), -INT8_C( 80), -INT8_C( 84), INT8_C( 81), INT8_C( 5), -INT8_C( 16), -INT8_C( 110), -INT8_C( 4), -INT8_C( 49) } }, { { -INT8_C( 83), -INT8_C( 5), INT8_C( 62), INT8_C( 2), -INT8_C( 90), -INT8_C( 20), INT8_C( 102), INT8_C( 117), INT8_C( 4), -INT8_C( 4), -INT8_C( 42), INT8_C( 100), INT8_MIN, -INT8_C( 22), -INT8_C( 113), INT8_C( 1) }, { INT8_C( 83), INT8_C( 5), -INT8_C( 62), -INT8_C( 2), INT8_C( 90), INT8_C( 20), -INT8_C( 102), -INT8_C( 117), -INT8_C( 4), INT8_C( 4), INT8_C( 42), -INT8_C( 100), INT8_MIN, INT8_C( 22), INT8_C( 113), -INT8_C( 1) } }, { { INT8_C( 126), INT8_C( 105), INT8_C( 29), -INT8_C( 123), INT8_C( 82), -INT8_C( 60), INT8_C( 54), -INT8_C( 94), INT8_C( 24), -INT8_C( 27), -INT8_C( 98), INT8_C( 41), INT8_C( 83), -INT8_C( 94), INT8_C( 90), INT8_C( 0) }, { -INT8_C( 126), -INT8_C( 105), -INT8_C( 29), INT8_C( 123), -INT8_C( 82), INT8_C( 60), -INT8_C( 54), INT8_C( 94), -INT8_C( 24), INT8_C( 27), INT8_C( 98), -INT8_C( 41), -INT8_C( 83), INT8_C( 94), -INT8_C( 90), INT8_C( 0) } }, { { -INT8_C( 98), -INT8_C( 104), INT8_C( 3), INT8_C( 68), -INT8_C( 124), INT8_C( 105), -INT8_C( 71), -INT8_C( 120), INT8_C( 101), -INT8_C( 113), -INT8_C( 20), -INT8_C( 27), INT8_C( 121), INT8_C( 123), -INT8_C( 25), -INT8_C( 9) }, { INT8_C( 98), INT8_C( 104), -INT8_C( 3), -INT8_C( 68), INT8_C( 124), -INT8_C( 105), INT8_C( 71), INT8_C( 120), -INT8_C( 101), INT8_C( 113), INT8_C( 20), INT8_C( 27), -INT8_C( 121), -INT8_C( 123), INT8_C( 25), INT8_C( 9) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t r = simde_vnegq_s8(a); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); simde_int8x16_t r = simde_vnegq_s8(a); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vnegq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t r[8]; } test_vec[] = { { { -INT16_C( 15787), -INT16_C( 29057), -INT16_C( 19375), -INT16_C( 11784), INT16_C( 4515), -INT16_C( 7027), -INT16_C( 20562), INT16_C( 28379) }, { INT16_C( 15787), INT16_C( 29057), INT16_C( 19375), INT16_C( 11784), -INT16_C( 4515), INT16_C( 7027), INT16_C( 20562), -INT16_C( 28379) } }, { { INT16_C( 7291), -INT16_C( 3670), -INT16_C( 13367), -INT16_C( 28161), -INT16_C( 8143), INT16_C( 25681), -INT16_C( 8439), INT16_C( 24356) }, { -INT16_C( 7291), INT16_C( 3670), INT16_C( 13367), INT16_C( 28161), INT16_C( 8143), -INT16_C( 25681), INT16_C( 8439), -INT16_C( 24356) } }, { { -INT16_C( 23646), -INT16_C( 3091), -INT16_C( 6825), -INT16_C( 1340), INT16_C( 20982), -INT16_C( 23073), -INT16_C( 17919), INT16_C( 31763) }, { INT16_C( 23646), INT16_C( 3091), INT16_C( 6825), INT16_C( 1340), -INT16_C( 20982), INT16_C( 23073), INT16_C( 17919), -INT16_C( 31763) } }, { { -INT16_C( 16682), -INT16_C( 24723), INT16_C( 27785), -INT16_C( 17615), -INT16_C( 32180), INT16_C( 22047), INT16_C( 17250), INT16_C( 1205) }, { INT16_C( 16682), INT16_C( 24723), -INT16_C( 27785), INT16_C( 17615), INT16_C( 32180), -INT16_C( 22047), -INT16_C( 17250), -INT16_C( 1205) } }, { { -INT16_C( 23834), INT16_C( 15863), -INT16_C( 17528), INT16_C( 32311), INT16_C( 5644), INT16_C( 3363), INT16_C( 14288), -INT16_C( 22646) }, { INT16_C( 23834), -INT16_C( 15863), INT16_C( 17528), -INT16_C( 32311), -INT16_C( 5644), -INT16_C( 3363), -INT16_C( 14288), INT16_C( 22646) } }, { { -INT16_C( 2059), INT16_C( 32326), INT16_C( 30564), -INT16_C( 20423), INT16_C( 22778), INT16_C( 23558), -INT16_C( 17509), -INT16_C( 32416) }, { INT16_C( 2059), -INT16_C( 32326), -INT16_C( 30564), INT16_C( 20423), -INT16_C( 22778), -INT16_C( 23558), INT16_C( 17509), INT16_C( 32416) } }, { { INT16_C( 22366), -INT16_C( 6466), -INT16_C( 2542), INT16_C( 7780), -INT16_C( 30708), -INT16_C( 8916), -INT16_C( 18753), -INT16_C( 19324) }, { -INT16_C( 22366), INT16_C( 6466), INT16_C( 2542), -INT16_C( 7780), INT16_C( 30708), INT16_C( 8916), INT16_C( 18753), INT16_C( 19324) } }, { { -INT16_C( 13651), INT16_C( 4402), INT16_C( 27714), INT16_C( 15554), -INT16_C( 14140), INT16_C( 24728), -INT16_C( 1916), -INT16_C( 7455) }, { INT16_C( 13651), -INT16_C( 4402), -INT16_C( 27714), -INT16_C( 15554), INT16_C( 14140), -INT16_C( 24728), INT16_C( 1916), INT16_C( 7455) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t r = simde_vnegq_s16(a); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int16x8_t r = simde_vnegq_s16(a); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vnegq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t r[4]; } test_vec[] = { { { INT32_C( 1274343554), -INT32_C( 1647935950), INT32_C( 941518839), -INT32_C( 809119283) }, { -INT32_C( 1274343554), INT32_C( 1647935950), -INT32_C( 941518839), INT32_C( 809119283) } }, { { INT32_C( 2018801436), -INT32_C( 1506414198), INT32_C( 1466099323), -INT32_C( 356289944) }, { -INT32_C( 2018801436), INT32_C( 1506414198), -INT32_C( 1466099323), INT32_C( 356289944) } }, { { -INT32_C( 1791576221), INT32_C( 758316085), INT32_C( 946163819), INT32_C( 1023879713) }, { INT32_C( 1791576221), -INT32_C( 758316085), -INT32_C( 946163819), -INT32_C( 1023879713) } }, { { INT32_C( 733371297), -INT32_C( 925766835), INT32_C( 958411985), INT32_C( 153412518) }, { -INT32_C( 733371297), INT32_C( 925766835), -INT32_C( 958411985), -INT32_C( 153412518) } }, { { -INT32_C( 794928485), -INT32_C( 1040330666), INT32_C( 1106862624), INT32_C( 763297932) }, { INT32_C( 794928485), INT32_C( 1040330666), -INT32_C( 1106862624), -INT32_C( 763297932) } }, { { -INT32_C( 1470614181), -INT32_C( 244241888), INT32_C( 69964126), INT32_C( 252596084) }, { INT32_C( 1470614181), INT32_C( 244241888), -INT32_C( 69964126), -INT32_C( 252596084) } }, { { -INT32_C( 2052951), -INT32_C( 1648304771), -INT32_C( 857753280), INT32_C( 351952569) }, { INT32_C( 2052951), INT32_C( 1648304771), INT32_C( 857753280), -INT32_C( 351952569) } }, { { -INT32_C( 1279438189), -INT32_C( 626774404), INT32_C( 870305727), -INT32_C( 951849698) }, { INT32_C( 1279438189), INT32_C( 626774404), -INT32_C( 870305727), INT32_C( 951849698) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t r = simde_vnegq_s32(a); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int32x4_t r = simde_vnegq_s32(a); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vnegq_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; int64_t r[2]; } test_vec[] = { { { INT64_C( 1379776353572449913), INT64_C( 8647865240165635373) }, { -INT64_C( 1379776353572449913), -INT64_C( 8647865240165635373) } }, { { -INT64_C( 3642568412225199825), INT64_C( 3250626639892679639) }, { INT64_C( 3642568412225199825), -INT64_C( 3250626639892679639) } }, { { INT64_C( 7446088706595571705), INT64_C( 7799815905998854696) }, { -INT64_C( 7446088706595571705), -INT64_C( 7799815905998854696) } }, { { INT64_C( 6324714693310316964), -INT64_C( 8983753774972534927) }, { -INT64_C( 6324714693310316964), INT64_C( 8983753774972534927) } }, { { -INT64_C( 345903249401511790), -INT64_C( 2549318626993654539) }, { INT64_C( 345903249401511790), INT64_C( 2549318626993654539) } }, { { INT64_C( 7735102754277036929), INT64_C( 7252734741606074265) }, { -INT64_C( 7735102754277036929), -INT64_C( 7252734741606074265) } }, { { INT64_C( 8913296042769804516), -INT64_C( 852502546803166441) }, { -INT64_C( 8913296042769804516), INT64_C( 852502546803166441) } }, { { INT64_C( 3658134118366314336), INT64_C( 2020704703068067284) }, { -INT64_C( 3658134118366314336), -INT64_C( 2020704703068067284) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t r = simde_vnegq_s64(a); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); simde_int64x2_t r = simde_vnegq_s64(a); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vneg_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vneg_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vneg_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vneg_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vneg_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vneg_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vnegq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vnegq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vnegq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vnegq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vnegq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vnegq_s64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/orn.c000066400000000000000000001624211400333146700162670ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN orn #include "test-neon.h" #include "../../../simde/arm/neon/orn.h" static int test_simde_vorn_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t b[8]; int8_t r[8]; } test_vec[] = { { { INT8_C( 71), -INT8_C( 69), INT8_C( 126), -INT8_C( 50), INT8_C( 51), -INT8_C( 21), -INT8_C( 47), -INT8_C( 103) }, { INT8_C( 30), INT8_C( 10), INT8_C( 57), INT8_C( 26), INT8_C( 126), -INT8_C( 9), -INT8_C( 8), INT8_C( 99) }, { -INT8_C( 25), -INT8_C( 1), -INT8_C( 2), -INT8_C( 17), -INT8_C( 77), -INT8_C( 21), -INT8_C( 41), -INT8_C( 99) } }, { { INT8_C( 83), -INT8_C( 92), INT8_C( 91), INT8_C( 51), INT8_C( 23), INT8_C( 56), INT8_C( 16), -INT8_C( 37) }, { INT8_C( 38), INT8_C( 14), -INT8_C( 105), INT8_C( 93), INT8_C( 53), -INT8_C( 77), -INT8_C( 24), INT8_C( 124) }, { -INT8_C( 37), -INT8_C( 11), INT8_C( 123), -INT8_C( 77), -INT8_C( 33), INT8_C( 124), INT8_C( 23), -INT8_C( 37) } }, { { INT8_C( 111), INT8_C( 102), INT8_C( 74), -INT8_C( 94), INT8_C( 82), INT8_C( 27), INT8_C( 59), INT8_C( 112) }, { INT8_C( 38), INT8_C( 116), -INT8_C( 118), -INT8_C( 92), INT8_C( 107), -INT8_C( 126), INT8_C( 7), -INT8_C( 65) }, { -INT8_C( 1), -INT8_C( 17), INT8_MAX, -INT8_C( 5), -INT8_C( 42), INT8_MAX, -INT8_C( 5), INT8_C( 112) } }, { { INT8_C( 39), INT8_C( 99), -INT8_C( 14), INT8_C( 62), -INT8_C( 101), INT8_C( 2), INT8_C( 25), -INT8_C( 63) }, { INT8_C( 16), -INT8_C( 80), INT8_C( 30), INT8_C( 69), INT8_C( 100), INT8_C( 6), -INT8_C( 63), -INT8_C( 45) }, { -INT8_C( 17), INT8_C( 111), -INT8_C( 13), -INT8_C( 66), -INT8_C( 101), -INT8_C( 5), INT8_C( 63), -INT8_C( 19) } }, { { INT8_C( 108), INT8_C( 11), INT8_C( 117), -INT8_C( 66), INT8_C( 39), -INT8_C( 80), INT8_C( 46), INT8_C( 77) }, { INT8_C( 36), -INT8_C( 72), -INT8_C( 15), -INT8_C( 112), INT8_C( 59), -INT8_C( 7), INT8_C( 79), INT8_C( 98) }, { -INT8_C( 1), INT8_C( 79), INT8_MAX, -INT8_C( 1), -INT8_C( 25), -INT8_C( 74), -INT8_C( 66), -INT8_C( 35) } }, { { INT8_C( 92), INT8_C( 65), -INT8_C( 96), -INT8_C( 9), INT8_C( 67), -INT8_C( 71), -INT8_C( 72), INT8_C( 83) }, { INT8_C( 105), -INT8_C( 42), -INT8_C( 103), -INT8_C( 51), -INT8_C( 36), INT8_C( 90), -INT8_C( 96), INT8_C( 72) }, { -INT8_C( 34), INT8_C( 105), -INT8_C( 26), -INT8_C( 9), INT8_C( 99), -INT8_C( 67), -INT8_C( 1), -INT8_C( 9) } }, { { INT8_C( 102), INT8_C( 21), INT8_C( 7), -INT8_C( 115), -INT8_C( 59), INT8_C( 53), -INT8_C( 38), -INT8_C( 22) }, { -INT8_C( 18), -INT8_C( 53), INT8_C( 122), INT8_C( 41), -INT8_C( 60), -INT8_C( 55), -INT8_C( 117), INT8_C( 32) }, { INT8_C( 119), INT8_C( 53), -INT8_C( 121), -INT8_C( 33), -INT8_C( 1), INT8_C( 55), -INT8_C( 2), -INT8_C( 1) } }, { { INT8_C( 10), INT8_C( 43), INT8_C( 23), INT8_C( 77), -INT8_C( 28), -INT8_C( 49), -INT8_C( 96), INT8_C( 77) }, { -INT8_C( 91), INT8_C( 57), INT8_C( 27), -INT8_C( 127), -INT8_C( 108), -INT8_C( 69), -INT8_C( 54), -INT8_C( 6) }, { INT8_C( 90), -INT8_C( 17), -INT8_C( 9), INT8_MAX, -INT8_C( 17), -INT8_C( 49), -INT8_C( 75), INT8_C( 77) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vorn_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); simde_int8x8_t r = simde_vorn_s8(a, b); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vorn_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { { { -INT16_C( 11823), -INT16_C( 27001), INT16_C( 24838), -INT16_C( 2944) }, { -INT16_C( 1492), -INT16_C( 3811), -INT16_C( 22333), -INT16_C( 13039) }, { -INT16_C( 10797), -INT16_C( 24857), INT16_C( 30526), -INT16_C( 2322) } }, { { INT16_C( 10707), -INT16_C( 18662), -INT16_C( 17416), -INT16_C( 25083) }, { INT16_C( 8436), -INT16_C( 30689), -INT16_C( 5669), -INT16_C( 21374) }, { -INT16_C( 37), -INT16_C( 2054), -INT16_C( 16388), -INT16_C( 8323) } }, { { INT16_C( 2490), -INT16_C( 16061), -INT16_C( 15510), -INT16_C( 26699) }, { -INT16_C( 11330), -INT16_C( 32376), -INT16_C( 26245), INT16_C( 20303) }, { INT16_C( 11771), -INT16_C( 137), -INT16_C( 6162), -INT16_C( 18507) } }, { { INT16_C( 27074), -INT16_C( 17658), INT16_C( 2852), INT16_C( 6489) }, { INT16_C( 30763), INT16_C( 1953), INT16_C( 9314), INT16_C( 7347) }, { -INT16_C( 4138), -INT16_C( 1186), -INT16_C( 9283), -INT16_C( 1187) } }, { { -INT16_C( 2515), -INT16_C( 26403), -INT16_C( 27718), INT16_C( 30767) }, { -INT16_C( 18586), -INT16_C( 7687), INT16_C( 18512), INT16_C( 4912) }, { -INT16_C( 323), -INT16_C( 24865), -INT16_C( 18497), -INT16_C( 785) } }, { { INT16_C( 14258), -INT16_C( 10546), INT16_C( 10050), INT16_C( 28399) }, { -INT16_C( 28257), INT16_C( 373), INT16_C( 10421), -INT16_C( 7650) }, { INT16_C( 32754), -INT16_C( 306), -INT16_C( 2230), INT16_C( 32751) } }, { { -INT16_C( 1249), -INT16_C( 9862), -INT16_C( 22130), -INT16_C( 2991) }, { INT16_C( 19040), -INT16_C( 20010), INT16_C( 1683), INT16_C( 17860) }, { -INT16_C( 97), -INT16_C( 8325), -INT16_C( 1554), -INT16_C( 389) } }, { { -INT16_C( 28099), -INT16_C( 32741), INT16_C( 3001), INT16_C( 22766) }, { INT16_C( 25500), INT16_C( 20826), INT16_C( 30859), -INT16_C( 21965) }, { -INT16_C( 24961), -INT16_C( 20801), -INT16_C( 28675), INT16_C( 24046) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_vorn_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); simde_int16x4_t r = simde_vorn_s16(a, b); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vorn_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { { { -INT32_C( 941672358), INT32_C( 1460359429) }, { INT32_C( 1534253508), -INT32_C( 1871844903) }, { -INT32_C( 404799877), INT32_C( 2140886823) } }, { { INT32_C( 1247411385), INT32_C( 1043917429) }, { -INT32_C( 790489490), -INT32_C( 1189984161) }, { INT32_C( 1868430777), INT32_C( 2130575349) } }, { { -INT32_C( 2055147136), INT32_C( 115182402) }, { INT32_C( 1046564709), -INT32_C( 221327560) }, { -INT32_C( 979439206), INT32_C( 268286919) } }, { { INT32_C( 1077684171), -INT32_C( 2139196398) }, { -INT32_C( 363831157), INT32_C( 681796264) }, { INT32_C( 1438629887), -INT32_C( 679543465) } }, { { -INT32_C( 1783815341), INT32_C( 345803439) }, { INT32_C( 307428825), -INT32_C( 1761336884) }, { -INT32_C( 38984841), INT32_C( 2097143487) } }, { { INT32_C( 1524056136), INT32_C( 1088050613) }, { INT32_C( 1563044789), -INT32_C( 511324787) }, { -INT32_C( 86518710), INT32_C( 1593473015) } }, { { -INT32_C( 1619643408), -INT32_C( 1749871939) }, { -INT32_C( 592902640), INT32_C( 1869917479) }, { -INT32_C( 1082721281), -INT32_C( 1749331203) } }, { { -INT32_C( 1563800595), INT32_C( 1457693857) }, { INT32_C( 1572080847), -INT32_C( 901891623) }, { -INT32_C( 1563689987), INT32_C( 2011424423) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_vorn_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); simde_int32x2_t r = simde_vorn_s32(a, b); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vorn_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[1]; int64_t b[1]; int64_t r[1]; } test_vec[] = { { { -INT64_C( 2900003943802620820) }, { INT64_C( 340346227834186275) }, { -INT64_C( 15800051893946884) } }, { { -INT64_C( 618531193571802253) }, { -INT64_C( 4307250660080607594) }, { -INT64_C( 4805211211177093) } }, { { -INT64_C( 2025019218014067897) }, { INT64_C( 3690109182816393241) }, { -INT64_C( 1157495473783773209) } }, { { -INT64_C( 321530848576283542) }, { -INT64_C( 5193595468643574064) }, { -INT64_C( 316382341262475921) } }, { { -INT64_C( 6563245997126460601) }, { INT64_C( 7509200415277455171) }, { -INT64_C( 5193777867993908225) } }, { { -INT64_C( 124267704839668251) }, { -INT64_C( 7095118474331639455) }, { -INT64_C( 110637604046929921) } }, { { INT64_C( 4331495243257303022) }, { -INT64_C( 2302054115141750361) }, { INT64_C( 4611281387325313022) } }, { { -INT64_C( 1432301318404634638) }, { INT64_C( 5122360075800692259) }, { -INT64_C( 216179728562012162) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_int64x1_t r = simde_vorn_s64(a, b); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_t a = simde_test_arm_neon_random_i64x1(); simde_int64x1_t b = simde_test_arm_neon_random_i64x1(); simde_int64x1_t r = simde_vorn_s64(a, b); simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vorn_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint8_t b[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C(150), UINT8_C(187), UINT8_C(191), UINT8_C(144), UINT8_C( 75), UINT8_C(220), UINT8_C(204), UINT8_C(242) }, { UINT8_C(205), UINT8_C( 12), UINT8_C(151), UINT8_C(199), UINT8_C(129), UINT8_C(164), UINT8_C(167), UINT8_C(116) }, { UINT8_C(182), UINT8_C(251), UINT8_MAX, UINT8_C(184), UINT8_C(127), UINT8_C(223), UINT8_C(220), UINT8_C(251) } }, { { UINT8_C(171), UINT8_C(210), UINT8_C(149), UINT8_C( 53), UINT8_C( 68), UINT8_C(180), UINT8_C( 33), UINT8_C(103) }, { UINT8_C(250), UINT8_C(223), UINT8_C(224), UINT8_C( 83), UINT8_C( 39), UINT8_C(246), UINT8_C(154), UINT8_C(189) }, { UINT8_C(175), UINT8_C(242), UINT8_C(159), UINT8_C(189), UINT8_C(220), UINT8_C(189), UINT8_C(101), UINT8_C(103) } }, { { UINT8_C(178), UINT8_C( 90), UINT8_C( 77), UINT8_C(253), UINT8_C( 54), UINT8_C( 26), UINT8_C(239), UINT8_C( 3) }, { UINT8_C( 38), UINT8_C(134), UINT8_C(202), UINT8_C(167), UINT8_C( 42), UINT8_C(114), UINT8_C( 27), UINT8_C(213) }, { UINT8_C(251), UINT8_C(123), UINT8_C(125), UINT8_C(253), UINT8_C(247), UINT8_C(159), UINT8_C(239), UINT8_C( 43) } }, { { UINT8_C( 68), UINT8_C(176), UINT8_C( 10), UINT8_C(136), UINT8_C(101), UINT8_C( 44), UINT8_C(239), UINT8_C( 95) }, { UINT8_C( 11), UINT8_C(207), UINT8_C(179), UINT8_C( 50), UINT8_C(198), UINT8_C( 77), UINT8_C(240), UINT8_C(120) }, { UINT8_C(244), UINT8_C(176), UINT8_C( 78), UINT8_C(205), UINT8_C(125), UINT8_C(190), UINT8_C(239), UINT8_C(223) } }, { { UINT8_C(167), UINT8_C( 61), UINT8_C(117), UINT8_C(221), UINT8_C( 87), UINT8_C(100), UINT8_C(224), UINT8_C(125) }, { UINT8_C(234), UINT8_C(171), UINT8_C( 37), UINT8_C( 20), UINT8_C( 29), UINT8_C( 64), UINT8_C(233), UINT8_C( 97) }, { UINT8_C(183), UINT8_C(125), UINT8_MAX, UINT8_MAX, UINT8_C(247), UINT8_MAX, UINT8_C(246), UINT8_MAX } }, { { UINT8_C(241), UINT8_C(243), UINT8_C(234), UINT8_C( 86), UINT8_C( 31), UINT8_C(217), UINT8_C(181), UINT8_C( 43) }, { UINT8_C(169), UINT8_C(104), UINT8_C( 93), UINT8_C(111), UINT8_C(182), UINT8_C( 77), UINT8_C(231), UINT8_C( 93) }, { UINT8_C(247), UINT8_C(247), UINT8_C(234), UINT8_C(214), UINT8_C( 95), UINT8_C(251), UINT8_C(189), UINT8_C(171) } }, { { UINT8_C(139), UINT8_C( 92), UINT8_C( 59), UINT8_C(226), UINT8_C(192), UINT8_C( 27), UINT8_C( 96), UINT8_C(170) }, { UINT8_C(198), UINT8_C(133), UINT8_C(190), UINT8_C(227), UINT8_C(197), UINT8_C(167), UINT8_C( 69), UINT8_C(182) }, { UINT8_C(187), UINT8_C(126), UINT8_C(123), UINT8_C(254), UINT8_C(250), UINT8_C( 91), UINT8_C(250), UINT8_C(235) } }, { { UINT8_C(154), UINT8_C( 47), UINT8_C( 12), UINT8_C(186), UINT8_C( 8), UINT8_C(194), UINT8_C(229), UINT8_C(177) }, { UINT8_C( 42), UINT8_C( 66), UINT8_C( 32), UINT8_C(224), UINT8_C(144), UINT8_C( 7), UINT8_C( 62), UINT8_C( 27) }, { UINT8_C(223), UINT8_C(191), UINT8_C(223), UINT8_C(191), UINT8_C(111), UINT8_C(250), UINT8_C(229), UINT8_C(245) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vorn_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t r = simde_vorn_u8(a, b); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vorn_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(31075), UINT16_C( 9213), UINT16_C(23956), UINT16_C(23501) }, { UINT16_C(35810), UINT16_C(43070), UINT16_C(33586), UINT16_C(52574) }, { UINT16_C(32127), UINT16_C(30717), UINT16_C(32221), UINT16_C(31725) } }, { { UINT16_C(27570), UINT16_C(48007), UINT16_C(27693), UINT16_C(22380) }, { UINT16_C(36270), UINT16_C(15928), UINT16_C(30356), UINT16_C(63577) }, { UINT16_C(31731), UINT16_C(64455), UINT16_C(60783), UINT16_C(22510) } }, { { UINT16_C(22511), UINT16_C(33563), UINT16_C(59828), UINT16_C(38878) }, { UINT16_C( 7540), UINT16_C(42815), UINT16_C(40352), UINT16_C(21364) }, { UINT16_C(63471), UINT16_C(56283), UINT16_C(60415), UINT16_C(49119) } }, { { UINT16_C(64264), UINT16_C(13582), UINT16_C(31335), UINT16_C( 5517) }, { UINT16_C(50439), UINT16_C(40020), UINT16_C(44347), UINT16_C(10900) }, { UINT16_C(64504), UINT16_C(30639), UINT16_C(31463), UINT16_C(54767) } }, { { UINT16_C(44804), UINT16_C(47533), UINT16_C(35992), UINT16_C( 3408) }, { UINT16_C(36777), UINT16_C(18868), UINT16_C(10284), UINT16_C(13724) }, { UINT16_C(65366), UINT16_C(49135), UINT16_C(57307), UINT16_C(53107) } }, { { UINT16_C(43555), UINT16_C(35434), UINT16_C(63269), UINT16_C(11423) }, { UINT16_C(62396), UINT16_C(63432), UINT16_C(23713), UINT16_C(42273) }, { UINT16_C(44643), UINT16_C(35455), UINT16_C(63359), UINT16_C(32479) } }, { { UINT16_C(53004), UINT16_C(42078), UINT16_C(44635), UINT16_C( 1201) }, { UINT16_C(25917), UINT16_C(27213), UINT16_C(60045), UINT16_C(45215) }, { UINT16_C(57294), UINT16_C(46590), UINT16_C(49019), UINT16_C(20465) } }, { { UINT16_C( 2452), UINT16_C(47418), UINT16_C(55809), UINT16_C(48614) }, { UINT16_C(44749), UINT16_C(28341), UINT16_C(54795), UINT16_C( 5908) }, { UINT16_C(22966), UINT16_C(47482), UINT16_C(64501), UINT16_C(65007) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vorn_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t r = simde_vorn_u16(a, b); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vorn_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C( 12284581), UINT32_C(1577348385) }, { UINT32_C(1623741138), UINT32_C(3490735932) }, { UINT32_C(2680160173), UINT32_C(2146434531) } }, { { UINT32_C(1921665905), UINT32_C(4063195173) }, { UINT32_C( 694281246), UINT32_C(1614837179) }, { UINT32_C(4137573361), UINT32_C(4290771557) } }, { { UINT32_C( 140639463), UINT32_C( 996631913) }, { UINT32_C(4087033783), UINT32_C( 147106967) }, { UINT32_C( 208010479), UINT32_C(4286543721) } }, { { UINT32_C( 477777655), UINT32_C(3691948478) }, { UINT32_C(1225158542), UINT32_C(3433645796) }, { UINT32_C(3204177655), UINT32_C(4284398015) } }, { { UINT32_C(2882800194), UINT32_C( 669465456) }, { UINT32_C( 35357291), UINT32_C( 621469486) }, { UINT32_C(4294213590), UINT32_C(4294392817) } }, { { UINT32_C(3946939437), UINT32_C(3150401581) }, { UINT32_C(2751778239), UINT32_C(1450225172) }, { UINT32_C(4227577453), UINT32_C(3150926319) } }, { { UINT32_C( 671237304), UINT32_C(3947948416) }, { UINT32_C(2599250795), UINT32_C(2009069386) }, { UINT32_C(1829950652), UINT32_C(3947948469) } }, { { UINT32_C(2824995195), UINT32_C( 274999633) }, { UINT32_C( 196372727), UINT32_C(3479249942) }, { UINT32_C(4234909563), UINT32_C( 822017017) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vorn_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t r = simde_vorn_u32(a, b); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vorn_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[1]; uint64_t b[1]; uint64_t r[1]; } test_vec[] = { { { UINT64_C(13318348393709790057) }, { UINT64_C( 3707326404460986802) }, { UINT64_C(18220701318975029101) } }, { { UINT64_C(17758889596753270034) }, { UINT64_C(13154278172658837672) }, { UINT64_C(18408111630538757975) } }, { { UINT64_C( 9208299708435367366) }, { UINT64_C( 4620674727520443444) }, { UINT64_C(18440679017576130511) } }, { { UINT64_C(16337115763932723908) }, { UINT64_C(16375983374514305600) }, { UINT64_C(18356067328090701823) } }, { { UINT64_C( 9943596500732207795) }, { UINT64_C( 3263865533828004572) }, { UINT64_C(15852367051336181683) } }, { { UINT64_C( 5164408348139974543) }, { UINT64_C( 3198990020481731429) }, { UINT64_C(15545281906160172959) } }, { { UINT64_C(17859774302290467579) }, { UINT64_C( 8339343679508533028) }, { UINT64_C(18437379651069213435) } }, { { UINT64_C(13513867989543070779) }, { UINT64_C( 8275542950167797554) }, { UINT64_C(13812539912525622527) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_t b = simde_vld1_u64(test_vec[i].b); simde_uint64x1_t r = simde_vorn_u64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x1_t a = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t b = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t r = simde_vorn_u64(a, b); simde_test_arm_neon_write_u64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vornq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t b[16]; int8_t r[16]; } test_vec[] = { { { -INT8_C( 3), -INT8_C( 124), -INT8_C( 61), INT8_C( 23), INT8_C( 47), -INT8_C( 99), INT8_C( 14), INT8_C( 83), -INT8_C( 20), INT8_C( 16), -INT8_C( 31), -INT8_C( 48), INT8_C( 92), -INT8_C( 100), INT8_C( 67), -INT8_C( 104) }, { INT8_C( 72), INT8_MAX, -INT8_C( 38), -INT8_C( 98), INT8_C( 102), INT8_C( 100), INT8_C( 90), -INT8_C( 103), -INT8_C( 61), -INT8_C( 105), INT8_C( 69), INT8_C( 59), INT8_C( 58), INT8_C( 29), -INT8_C( 83), INT8_C( 55) }, { -INT8_C( 1), -INT8_C( 124), -INT8_C( 25), INT8_C( 119), -INT8_C( 65), -INT8_C( 97), -INT8_C( 81), INT8_C( 119), -INT8_C( 4), INT8_C( 120), -INT8_C( 5), -INT8_C( 44), -INT8_C( 35), -INT8_C( 2), INT8_C( 83), -INT8_C( 40) } }, { { -INT8_C( 94), INT8_C( 112), INT8_C( 78), -INT8_C( 47), INT8_C( 13), INT8_C( 93), INT8_C( 36), -INT8_C( 6), INT8_C( 109), INT8_C( 5), -INT8_C( 54), -INT8_C( 55), -INT8_C( 95), INT8_C( 14), INT8_C( 97), -INT8_C( 23) }, { -INT8_C( 115), INT8_C( 60), -INT8_C( 120), -INT8_C( 12), -INT8_C( 96), -INT8_C( 30), -INT8_C( 115), INT8_C( 100), INT8_C( 121), -INT8_C( 46), -INT8_C( 97), -INT8_C( 77), -INT8_C( 17), INT8_C( 76), -INT8_C( 22), -INT8_C( 111) }, { -INT8_C( 14), -INT8_C( 13), INT8_MAX, -INT8_C( 37), INT8_C( 95), INT8_C( 93), INT8_C( 118), -INT8_C( 5), -INT8_C( 17), INT8_C( 45), -INT8_C( 22), -INT8_C( 51), -INT8_C( 79), -INT8_C( 65), INT8_C( 117), -INT8_C( 17) } }, { { -INT8_C( 67), INT8_C( 57), INT8_C( 98), -INT8_C( 54), -INT8_C( 106), -INT8_C( 121), -INT8_C( 60), INT8_C( 3), -INT8_C( 116), -INT8_C( 113), -INT8_C( 52), INT8_C( 46), -INT8_C( 99), INT8_C( 46), INT8_C( 23), INT8_C( 42) }, { INT8_C( 106), -INT8_C( 97), INT8_C( 30), INT8_C( 10), -INT8_C( 127), -INT8_C( 85), INT8_C( 110), -INT8_C( 5), INT8_C( 125), INT8_C( 13), -INT8_C( 82), INT8_C( 109), INT8_C( 90), -INT8_C( 103), -INT8_C( 2), INT8_C( 23) }, { -INT8_C( 67), INT8_C( 121), -INT8_C( 29), -INT8_C( 1), -INT8_C( 2), -INT8_C( 41), -INT8_C( 43), INT8_C( 7), -INT8_C( 114), -INT8_C( 1), -INT8_C( 35), -INT8_C( 66), -INT8_C( 67), INT8_C( 110), INT8_C( 23), -INT8_C( 22) } }, { { -INT8_C( 46), INT8_C( 97), -INT8_C( 31), INT8_C( 104), -INT8_C( 24), -INT8_C( 90), INT8_C( 107), INT8_C( 116), INT8_C( 53), INT8_C( 55), -INT8_C( 94), -INT8_C( 46), INT8_C( 101), -INT8_C( 70), -INT8_C( 4), -INT8_C( 49) }, { INT8_C( 89), INT8_C( 27), -INT8_C( 38), -INT8_C( 37), -INT8_C( 58), INT8_C( 72), -INT8_C( 42), INT8_C( 68), INT8_C( 86), -INT8_C( 124), -INT8_C( 79), -INT8_C( 80), INT8_C( 29), -INT8_C( 81), -INT8_C( 57), -INT8_C( 17) }, { -INT8_C( 10), -INT8_C( 27), -INT8_C( 27), INT8_C( 108), -INT8_C( 7), -INT8_C( 73), INT8_C( 107), -INT8_C( 1), -INT8_C( 67), INT8_MAX, -INT8_C( 18), -INT8_C( 33), -INT8_C( 25), -INT8_C( 6), -INT8_C( 4), -INT8_C( 33) } }, { { INT8_C( 16), -INT8_C( 88), INT8_C( 87), -INT8_C( 8), INT8_C( 78), -INT8_C( 62), INT8_C( 109), -INT8_C( 125), -INT8_C( 6), INT8_C( 15), INT8_C( 85), INT8_C( 95), -INT8_C( 55), INT8_C( 82), INT8_C( 47), INT8_C( 35) }, { INT8_C( 109), INT8_C( 9), -INT8_C( 2), INT8_C( 51), INT8_C( 81), -INT8_C( 44), INT8_C( 119), -INT8_C( 89), INT8_C( 88), INT8_C( 40), INT8_C( 87), INT8_C( 118), -INT8_C( 40), INT8_C( 30), INT8_C( 101), -INT8_C( 24) }, { -INT8_C( 110), -INT8_C( 2), INT8_C( 87), -INT8_C( 4), -INT8_C( 18), -INT8_C( 21), -INT8_C( 19), -INT8_C( 37), -INT8_C( 1), -INT8_C( 33), -INT8_C( 3), -INT8_C( 33), -INT8_C( 17), -INT8_C( 13), -INT8_C( 65), INT8_C( 55) } }, { { -INT8_C( 57), -INT8_C( 67), -INT8_C( 31), INT8_C( 21), INT8_MAX, INT8_C( 78), -INT8_C( 103), INT8_C( 121), INT8_C( 93), -INT8_C( 18), -INT8_C( 39), INT8_C( 39), INT8_C( 64), INT8_C( 8), INT8_C( 74), -INT8_C( 83) }, { INT8_C( 17), INT8_C( 72), -INT8_C( 31), INT8_C( 98), INT8_C( 28), INT8_C( 88), INT8_C( 10), INT8_C( 116), -INT8_C( 127), INT8_C( 97), -INT8_C( 22), INT8_C( 89), INT8_MIN, INT8_C( 80), INT8_C( 65), INT8_C( 71) }, { -INT8_C( 17), -INT8_C( 65), -INT8_C( 1), -INT8_C( 99), -INT8_C( 1), -INT8_C( 17), -INT8_C( 3), -INT8_C( 5), INT8_MAX, -INT8_C( 2), -INT8_C( 35), -INT8_C( 89), INT8_MAX, -INT8_C( 81), -INT8_C( 2), -INT8_C( 67) } }, { { INT8_C( 13), INT8_C( 34), INT8_C( 92), -INT8_C( 116), INT8_C( 112), -INT8_C( 11), INT8_C( 6), -INT8_C( 50), -INT8_C( 28), -INT8_C( 33), -INT8_C( 11), INT8_C( 36), -INT8_C( 25), INT8_C( 63), -INT8_C( 46), -INT8_C( 8) }, { -INT8_C( 121), -INT8_C( 77), INT8_C( 90), -INT8_C( 93), INT8_C( 11), INT8_C( 100), INT8_C( 23), -INT8_C( 116), -INT8_C( 58), INT8_C( 2), -INT8_C( 27), INT8_C( 70), INT8_C( 82), INT8_C( 39), -INT8_C( 115), INT8_C( 95) }, { INT8_C( 125), INT8_C( 110), -INT8_C( 3), -INT8_C( 36), -INT8_C( 12), -INT8_C( 1), -INT8_C( 18), -INT8_C( 1), -INT8_C( 3), -INT8_C( 1), -INT8_C( 1), -INT8_C( 67), -INT8_C( 17), -INT8_C( 1), -INT8_C( 14), -INT8_C( 8) } }, { { INT8_C( 73), -INT8_C( 23), -INT8_C( 21), -INT8_C( 70), -INT8_C( 33), -INT8_C( 15), -INT8_C( 120), -INT8_C( 61), -INT8_C( 48), INT8_C( 125), -INT8_C( 25), -INT8_C( 73), -INT8_C( 68), -INT8_C( 71), -INT8_C( 81), INT8_C( 67) }, { INT8_C( 108), INT8_C( 10), -INT8_C( 26), INT8_C( 120), INT8_C( 110), -INT8_C( 3), INT8_C( 4), INT8_C( 52), -INT8_C( 1), -INT8_C( 22), INT8_C( 122), INT8_C( 81), INT8_C( 17), INT8_C( 7), -INT8_C( 80), INT8_C( 90) }, { -INT8_C( 37), -INT8_C( 3), -INT8_C( 5), -INT8_C( 65), -INT8_C( 33), -INT8_C( 13), -INT8_C( 5), -INT8_C( 53), -INT8_C( 48), INT8_C( 125), -INT8_C( 25), -INT8_C( 65), -INT8_C( 2), -INT8_C( 7), -INT8_C( 17), -INT8_C( 25) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r = simde_vornq_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); simde_int8x16_t r = simde_vornq_s8(a, b); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vornq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { -INT16_C( 25359), -INT16_C( 12268), -INT16_C( 25459), INT16_C( 24211), INT16_C( 31257), -INT16_C( 10987), -INT16_C( 15052), -INT16_C( 24552) }, { -INT16_C( 305), INT16_C( 15640), INT16_C( 7676), -INT16_C( 1166), -INT16_C( 5113), INT16_C( 6221), -INT16_C( 524), -INT16_C( 6798) }, { -INT16_C( 25103), -INT16_C( 11529), -INT16_C( 369), INT16_C( 24223), INT16_C( 31737), -INT16_C( 2121), -INT16_C( 14529), -INT16_C( 17763) } }, { { -INT16_C( 30823), INT16_C( 10165), INT16_C( 18467), INT16_C( 15749), -INT16_C( 25918), -INT16_C( 2542), INT16_C( 11103), INT16_C( 11927) }, { -INT16_C( 20695), INT16_C( 9580), -INT16_C( 8500), -INT16_C( 11487), INT16_C( 28362), -INT16_C( 16661), INT16_C( 24171), INT16_C( 1443) }, { -INT16_C( 10273), -INT16_C( 73), INT16_C( 26931), INT16_C( 15839), -INT16_C( 25609), -INT16_C( 2282), -INT16_C( 21537), -INT16_C( 289) } }, { { INT16_C( 22757), INT16_C( 2092), -INT16_C( 20064), INT16_C( 25413), INT16_C( 22603), -INT16_C( 21671), -INT16_C( 3965), -INT16_C( 21287) }, { INT16_C( 17824), INT16_C( 27858), -INT16_C( 3293), -INT16_C( 4544), INT16_C( 11105), -INT16_C( 13140), INT16_C( 20617), INT16_C( 28369) }, { -INT16_C( 1281), -INT16_C( 25811), -INT16_C( 16900), INT16_C( 29695), -INT16_C( 8993), -INT16_C( 17573), -INT16_C( 9), -INT16_C( 16897) } }, { { -INT16_C( 600), INT16_C( 18807), -INT16_C( 17234), -INT16_C( 1364), INT16_C( 1300), -INT16_C( 26715), INT16_C( 32502), -INT16_C( 27068) }, { INT16_C( 5828), -INT16_C( 6398), INT16_C( 16905), INT16_C( 27349), -INT16_C( 32146), -INT16_C( 2250), INT16_C( 2258), INT16_C( 31334) }, { -INT16_C( 581), INT16_C( 23039), -INT16_C( 16898), -INT16_C( 82), INT16_C( 32149), -INT16_C( 24595), -INT16_C( 1), -INT16_C( 26659) } }, { { -INT16_C( 8955), -INT16_C( 19261), INT16_C( 28569), -INT16_C( 20818), INT16_C( 21365), INT16_C( 27461), -INT16_C( 30255), -INT16_C( 27391) }, { INT16_C( 927), -INT16_C( 22403), INT16_C( 21062), -INT16_C( 19438), INT16_C( 18900), -INT16_C( 22869), INT16_C( 4433), INT16_C( 22049) }, { -INT16_C( 667), -INT16_C( 2109), -INT16_C( 4167), -INT16_C( 4113), -INT16_C( 2177), INT16_C( 31573), -INT16_C( 4097), -INT16_C( 16929) } }, { { -INT16_C( 6930), -INT16_C( 30710), -INT16_C( 18348), -INT16_C( 14026), INT16_C( 31499), -INT16_C( 8908), INT16_C( 13573), -INT16_C( 23438) }, { -INT16_C( 4296), INT16_C( 32333), INT16_C( 24386), INT16_C( 5682), -INT16_C( 8536), -INT16_C( 1603), -INT16_C( 8465), -INT16_C( 8624) }, { -INT16_C( 2833), -INT16_C( 30278), -INT16_C( 18179), -INT16_C( 5633), INT16_C( 31583), -INT16_C( 8330), INT16_C( 13589), -INT16_C( 23041) } }, { { INT16_C( 23234), INT16_C( 5734), -INT16_C( 25581), INT16_C( 7903), INT16_C( 4887), INT16_C( 7419), INT16_C( 28232), -INT16_C( 32319) }, { INT16_C( 3677), -INT16_C( 24577), INT16_C( 12909), INT16_C( 5814), INT16_C( 29456), -INT16_C( 241), INT16_C( 24401), INT16_C( 5085) }, { -INT16_C( 1054), INT16_C( 30310), -INT16_C( 8813), -INT16_C( 33), -INT16_C( 24577), INT16_C( 7419), -INT16_C( 4370), -INT16_C( 4637) } }, { { INT16_C( 17338), -INT16_C( 13014), INT16_C( 2527), -INT16_C( 2069), -INT16_C( 6371), INT16_C( 25875), -INT16_C( 11179), -INT16_C( 19738) }, { -INT16_C( 6430), INT16_C( 20562), INT16_C( 2072), INT16_C( 10342), INT16_C( 30075), -INT16_C( 13273), INT16_C( 1493), -INT16_C( 28705) }, { INT16_C( 23487), -INT16_C( 4177), -INT16_C( 1), -INT16_C( 2053), -INT16_C( 4195), INT16_C( 30683), -INT16_C( 385), -INT16_C( 3354) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_vornq_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); simde_int16x8_t r = simde_vornq_s16(a, b); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vornq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { INT32_C( 677120328), INT32_C( 807356179), -INT32_C( 2087374290), -INT32_C( 382305273) }, { INT32_C( 2050590818), INT32_C( 195207056), -INT32_C( 354957035), INT32_C( 393852622) }, { -INT32_C( 1377927203), -INT32_C( 195074177), -INT32_C( 1749207314), -INT32_C( 373916361) } }, { { -INT32_C( 750791232), INT32_C( 1258511900), -INT32_C( 1748068208), INT32_C( 1988166676) }, { INT32_C( 485538444), INT32_C( 1864864346), INT32_C( 710540892), INT32_C( 1967248053) }, { -INT32_C( 213920269), -INT32_C( 606371907), -INT32_C( 672228941), -INT32_C( 21025442) } }, { { -INT32_C( 1018658649), INT32_C( 1863207902), -INT32_C( 133767709), INT32_C( 1852737505) }, { -INT32_C( 1668653246), INT32_C( 1292612337), INT32_C( 1702388912), -INT32_C( 572868298) }, { -INT32_C( 478306113), -INT32_C( 110626), -INT32_C( 91758609), INT32_C( 1852819433) } }, { { INT32_C( 413147706), INT32_C( 1367846765), INT32_C( 1833537164), INT32_C( 1474017045) }, { INT32_C( 150169110), -INT32_C( 933888488), -INT32_C( 1741763230), -INT32_C( 1049294713) }, { -INT32_C( 5456901), INT32_C( 2008002543), INT32_C( 1876541085), INT32_C( 2145122173) } }, { { -INT32_C( 1730603477), INT32_C( 1374249413), INT32_C( 79573743), INT32_C( 6068970) }, { INT32_C( 403197696), -INT32_C( 1327473074), -INT32_C( 1303835093), INT32_C( 1114881559) }, { -INT32_C( 18689), INT32_C( 1610605045), INT32_C( 1304359935), -INT32_C( 1109599254) } }, { { -INT32_C( 1713746476), -INT32_C( 1645558610), -INT32_C( 526276106), INT32_C( 1138883907) }, { -INT32_C( 1688475315), INT32_C( 1934310215), INT32_C( 1629852746), INT32_C( 665033043) }, { -INT32_C( 33660938), -INT32_C( 1644247874), -INT32_C( 17044489), -INT32_C( 604110865) } }, { { -INT32_C( 1799258650), INT32_C( 942779201), -INT32_C( 1759980972), INT32_C( 500890064) }, { INT32_C( 716715491), -INT32_C( 1147337871), -INT32_C( 350436456), INT32_C( 1108524892) }, { -INT32_C( 708313090), INT32_C( 2087976911), -INT32_C( 1745093001), -INT32_C( 1107297805) } }, { { INT32_C( 2128007996), -INT32_C( 743045249), -INT32_C( 1452552487), -INT32_C( 1413069368) }, { -INT32_C( 321552773), INT32_C( 447181698), -INT32_C( 1845116106), -INT32_C( 1093396606) }, { INT32_C( 2147406780), -INT32_C( 134312065), -INT32_C( 302252327), -INT32_C( 336597507) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_vornq_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); simde_int32x4_t r = simde_vornq_s32(a, b); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vornq_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; int64_t b[2]; int64_t r[2]; } test_vec[] = { { { -INT64_C( 8485359279090652437), INT64_C( 7580959975182477505) }, { -INT64_C( 5614303746724656775), INT64_C( 2089415351950645951) }, { -INT64_C( 3458778036814545169), -INT64_C( 1498291375123206719) } }, { { -INT64_C( 1153268750774551358), -INT64_C( 6502164130496499092) }, { -INT64_C( 3177934863686690634), INT64_C( 296415424194816463) }, { -INT64_C( 1153256924374892597), -INT64_C( 7899175018826116) } }, { { -INT64_C( 3139441699060145072), INT64_C( 5294811028661462696) }, { -INT64_C( 6921081755340695029), INT64_C( 7768918383820539786) }, { -INT64_C( 833448024532523532), -INT64_C( 2486000222950392067) } }, { { INT64_C( 7394445214660315234), -INT64_C( 3811534294866997954) }, { -INT64_C( 9205395062007359793), -INT64_C( 41625736401068059) }, { INT64_C( 9213936852346562930), -INT64_C( 3775151308294853314) } }, { { -INT64_C( 1402193236821715355), INT64_C( 5386543176955643189) }, { INT64_C( 7918780086091361134), -INT64_C( 5429693740112530936) }, { -INT64_C( 100486567778363659), INT64_C( 5465960352241929719) } }, { { INT64_C( 921658848263564848), -INT64_C( 3463898813775640536) }, { INT64_C( 532298867592713837), -INT64_C( 2643804658750140084) }, { -INT64_C( 225488021274005582), -INT64_C( 1153503739377373509) } }, { { -INT64_C( 5502691159140915008), -INT64_C( 3225629125903997940) }, { -INT64_C( 2945306279867289318), -INT64_C( 2413154112914666244) }, { -INT64_C( 4899960375199420699), -INT64_C( 901423718178244849) } }, { { -INT64_C( 3807767373881787415), -INT64_C( 2525265113918177605) }, { -INT64_C( 1997603821643776795), INT64_C( 7267981299377027553) }, { -INT64_C( 2614058383598257157), -INT64_C( 2308387527666729281) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_int64x2_t r = simde_vornq_s64(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); simde_int64x2_t b = simde_test_arm_neon_random_i64x2(); simde_int64x2_t r = simde_vornq_s64(a, b); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vornq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(167), UINT8_C( 77), UINT8_C( 14), UINT8_MAX, UINT8_C(136), UINT8_C( 28), UINT8_C(151), UINT8_C( 36), UINT8_C( 23), UINT8_C( 41), UINT8_C( 21), UINT8_C(245), UINT8_C(241), UINT8_C( 71), UINT8_C(105), UINT8_C( 50) }, { UINT8_C(200), UINT8_C(232), UINT8_C( 25), UINT8_C( 33), UINT8_C(236), UINT8_C( 98), UINT8_C(210), UINT8_C(206), UINT8_C(228), UINT8_C(213), UINT8_C(147), UINT8_C(252), UINT8_C(137), UINT8_C(209), UINT8_C( 39), UINT8_C( 48) }, { UINT8_C(183), UINT8_C( 95), UINT8_C(238), UINT8_MAX, UINT8_C(155), UINT8_C(157), UINT8_C(191), UINT8_C( 53), UINT8_C( 31), UINT8_C( 43), UINT8_C(125), UINT8_C(247), UINT8_C(247), UINT8_C(111), UINT8_C(249), UINT8_MAX } }, { { UINT8_C( 30), UINT8_C( 54), UINT8_C( 47), UINT8_C(167), UINT8_C( 82), UINT8_C(199), UINT8_C(203), UINT8_C(106), UINT8_C(240), UINT8_C(224), UINT8_C( 95), UINT8_C(225), UINT8_C( 39), UINT8_C(200), UINT8_C( 19), UINT8_C(240) }, { UINT8_C(176), UINT8_C( 45), UINT8_C( 17), UINT8_C(157), UINT8_C(143), UINT8_C(227), UINT8_C(107), UINT8_C(115), UINT8_C(184), UINT8_C(254), UINT8_C(112), UINT8_C( 65), UINT8_C(207), UINT8_C(151), UINT8_C(113), UINT8_C(238) }, { UINT8_C( 95), UINT8_C(246), UINT8_C(239), UINT8_C(231), UINT8_C(114), UINT8_C(223), UINT8_C(223), UINT8_C(238), UINT8_C(247), UINT8_C(225), UINT8_C(223), UINT8_MAX, UINT8_C( 55), UINT8_C(232), UINT8_C(159), UINT8_C(241) } }, { { UINT8_C(205), UINT8_C(161), UINT8_C(149), UINT8_C( 32), UINT8_C(104), UINT8_C( 96), UINT8_C(138), UINT8_C( 88), UINT8_C( 64), UINT8_C(233), UINT8_C( 57), UINT8_C(104), UINT8_C(177), UINT8_C( 76), UINT8_C( 88), UINT8_C( 98) }, { UINT8_C(121), UINT8_C(105), UINT8_MAX, UINT8_C( 9), UINT8_C( 76), UINT8_C(106), UINT8_C(124), UINT8_C( 5), UINT8_C(105), UINT8_C(236), UINT8_C( 70), UINT8_C( 56), UINT8_C(132), UINT8_C(184), UINT8_C( 38), UINT8_C( 81) }, { UINT8_C(207), UINT8_C(183), UINT8_C(149), UINT8_C(246), UINT8_C(251), UINT8_C(245), UINT8_C(139), UINT8_C(250), UINT8_C(214), UINT8_C(251), UINT8_C(185), UINT8_C(239), UINT8_C(251), UINT8_C( 79), UINT8_C(217), UINT8_C(238) } }, { { UINT8_C( 89), UINT8_C(187), UINT8_C(113), UINT8_C(193), UINT8_C( 27), UINT8_C(251), UINT8_C( 25), UINT8_C( 92), UINT8_C(228), UINT8_C( 82), UINT8_C(196), UINT8_C(150), UINT8_C(158), UINT8_C( 28), UINT8_C(248), UINT8_C( 24) }, { UINT8_C(133), UINT8_C(247), UINT8_C( 33), UINT8_C(209), UINT8_C( 97), UINT8_C(157), UINT8_C(214), UINT8_C(202), UINT8_C(138), UINT8_C( 29), UINT8_C( 3), UINT8_C( 14), UINT8_C(213), UINT8_C( 41), UINT8_C( 95), UINT8_C( 46) }, { UINT8_C(123), UINT8_C(187), UINT8_MAX, UINT8_C(239), UINT8_C(159), UINT8_C(251), UINT8_C( 57), UINT8_C(125), UINT8_C(245), UINT8_C(242), UINT8_C(252), UINT8_C(247), UINT8_C(190), UINT8_C(222), UINT8_C(248), UINT8_C(217) } }, { { UINT8_C(229), UINT8_C(209), UINT8_C(239), UINT8_C( 0), UINT8_C(204), UINT8_C( 8), UINT8_C( 92), UINT8_C(177), UINT8_C( 90), UINT8_C( 32), UINT8_C( 71), UINT8_C(248), UINT8_C( 60), UINT8_C( 63), UINT8_C( 16), UINT8_C(193) }, { UINT8_C( 54), UINT8_C( 49), UINT8_C(147), UINT8_C(151), UINT8_C(207), UINT8_C(105), UINT8_C( 98), UINT8_C( 89), UINT8_C(134), UINT8_C(101), UINT8_C(103), UINT8_C( 91), UINT8_C(142), UINT8_C(198), UINT8_C(137), UINT8_C(115) }, { UINT8_C(237), UINT8_C(223), UINT8_C(239), UINT8_C(104), UINT8_C(252), UINT8_C(158), UINT8_C(221), UINT8_C(183), UINT8_C(123), UINT8_C(186), UINT8_C(223), UINT8_C(252), UINT8_C(125), UINT8_C( 63), UINT8_C(118), UINT8_C(205) } }, { { UINT8_C(151), UINT8_C(120), UINT8_C(116), UINT8_C(100), UINT8_C(128), UINT8_C(208), UINT8_C( 21), UINT8_C(218), UINT8_C(241), UINT8_C( 92), UINT8_C(211), UINT8_C( 45), UINT8_C(155), UINT8_C(227), UINT8_C(239), UINT8_C(209) }, { UINT8_C( 21), UINT8_C(130), UINT8_C(104), UINT8_C(228), UINT8_C(235), UINT8_C(202), UINT8_C( 61), UINT8_C(114), UINT8_C( 47), UINT8_C(164), UINT8_C(205), UINT8_C(190), UINT8_C(106), UINT8_C( 87), UINT8_C( 49), UINT8_C( 2) }, { UINT8_MAX, UINT8_C(125), UINT8_C(247), UINT8_C(127), UINT8_C(148), UINT8_C(245), UINT8_C(215), UINT8_C(223), UINT8_C(241), UINT8_C( 95), UINT8_C(243), UINT8_C(109), UINT8_C(159), UINT8_C(235), UINT8_C(239), UINT8_C(253) } }, { { UINT8_C(207), UINT8_C(165), UINT8_C(102), UINT8_C( 80), UINT8_C(118), UINT8_C(123), UINT8_C( 42), UINT8_C(103), UINT8_C(215), UINT8_C(253), UINT8_C(148), UINT8_C(114), UINT8_C(225), UINT8_C(131), UINT8_C( 67), UINT8_C(246) }, { UINT8_C( 5), UINT8_C(171), UINT8_C(218), UINT8_C(241), UINT8_C(118), UINT8_C( 23), UINT8_C( 99), UINT8_C(165), UINT8_C(187), UINT8_C( 48), UINT8_C( 99), UINT8_C( 37), UINT8_C(135), UINT8_C(149), UINT8_C( 39), UINT8_C( 87) }, { UINT8_MAX, UINT8_C(245), UINT8_C(103), UINT8_C( 94), UINT8_MAX, UINT8_C(251), UINT8_C(190), UINT8_C(127), UINT8_C(215), UINT8_MAX, UINT8_C(156), UINT8_C(250), UINT8_C(249), UINT8_C(235), UINT8_C(219), UINT8_C(254) } }, { { UINT8_C( 58), UINT8_C(141), UINT8_C(167), UINT8_C(176), UINT8_C( 8), UINT8_C(209), UINT8_C( 23), UINT8_C(223), UINT8_C(207), UINT8_C(172), UINT8_C( 81), UINT8_C(176), UINT8_C( 47), UINT8_C(148), UINT8_C(166), UINT8_C( 53) }, { UINT8_C( 64), UINT8_C(128), UINT8_C( 38), UINT8_C(182), UINT8_C(151), UINT8_C(137), UINT8_C( 91), UINT8_C( 82), UINT8_C(185), UINT8_C(191), UINT8_C(119), UINT8_C( 65), UINT8_C( 84), UINT8_C(159), UINT8_C(152), UINT8_C(142) }, { UINT8_C(191), UINT8_MAX, UINT8_MAX, UINT8_C(249), UINT8_C(104), UINT8_C(247), UINT8_C(183), UINT8_MAX, UINT8_C(207), UINT8_C(236), UINT8_C(217), UINT8_C(190), UINT8_C(175), UINT8_C(244), UINT8_C(231), UINT8_C(117) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vornq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t r = simde_vornq_u8(a, b); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vornq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(16172), UINT16_C(13631), UINT16_C(22032), UINT16_C(57108), UINT16_C(26114), UINT16_C(12943), UINT16_C(13818), UINT16_C(14951) }, { UINT16_C(36277), UINT16_C(19696), UINT16_C(19478), UINT16_C(53150), UINT16_C( 5643), UINT16_C(24336), UINT16_C(43189), UINT16_C(57837) }, { UINT16_C(32622), UINT16_C(46911), UINT16_C(63481), UINT16_C(65397), UINT16_C(61430), UINT16_C(45807), UINT16_C(30714), UINT16_C(15991) } }, { { UINT16_C(11495), UINT16_C(63510), UINT16_C(11139), UINT16_C(34263), UINT16_C(26513), UINT16_C(35767), UINT16_C( 7836), UINT16_C(21190) }, { UINT16_C(46763), UINT16_C(49566), UINT16_C(15618), UINT16_C( 3473), UINT16_C(41299), UINT16_C( 2156), UINT16_C(23114), UINT16_C(12777) }, { UINT16_C(28151), UINT16_C(65143), UINT16_C(60415), UINT16_C(63487), UINT16_C(32701), UINT16_C(65463), UINT16_C(49085), UINT16_C(57046) } }, { { UINT16_C( 134), UINT16_C( 2345), UINT16_C( 299), UINT16_C(48271), UINT16_C(18024), UINT16_C( 1095), UINT16_C( 3429), UINT16_C( 4182) }, { UINT16_C(62916), UINT16_C(50898), UINT16_C(25394), UINT16_C(34260), UINT16_C(16388), UINT16_C(20109), UINT16_C(30362), UINT16_C( 8576) }, { UINT16_C( 2751), UINT16_C(14637), UINT16_C(40431), UINT16_C(65199), UINT16_C(65531), UINT16_C(46455), UINT16_C(36197), UINT16_C(56959) } }, { { UINT16_C(43382), UINT16_C(41258), UINT16_C(47530), UINT16_C( 4701), UINT16_C(42240), UINT16_C(25879), UINT16_C(28082), UINT16_C(30325) }, { UINT16_C(18274), UINT16_C(37949), UINT16_C( 4522), UINT16_C(44825), UINT16_C(42577), UINT16_C(60669), UINT16_C(32029), UINT16_C(37645) }, { UINT16_C(47615), UINT16_C(60394), UINT16_MAX, UINT16_C(21247), UINT16_C(64942), UINT16_C(30487), UINT16_C(61426), UINT16_C(32503) } }, { { UINT16_C(14119), UINT16_C(53557), UINT16_C(37617), UINT16_C(61924), UINT16_C(64311), UINT16_C(59990), UINT16_C(52072), UINT16_C(52064) }, { UINT16_C(40211), UINT16_C(48479), UINT16_C(31150), UINT16_C( 108), UINT16_C(27167), UINT16_C(15596), UINT16_C(63975), UINT16_C( 3792) }, { UINT16_C(30703), UINT16_C(54197), UINT16_C(38641), UINT16_C(65527), UINT16_C(65527), UINT16_C(60247), UINT16_C(53112), UINT16_C(64367) } }, { { UINT16_C( 1328), UINT16_C( 8672), UINT16_C(50327), UINT16_C(53010), UINT16_C(26815), UINT16_C(10169), UINT16_C( 6452), UINT16_C(18418) }, { UINT16_C(21175), UINT16_C(25860), UINT16_C(29131), UINT16_C(60005), UINT16_C(20955), UINT16_C(49703), UINT16_C(63306), UINT16_C(31697) }, { UINT16_C(44408), UINT16_C(48123), UINT16_C(52919), UINT16_C(57242), UINT16_C(61119), UINT16_C(16377), UINT16_C( 6581), UINT16_C(51198) } }, { { UINT16_C(45564), UINT16_C(37788), UINT16_C(44917), UINT16_C(13410), UINT16_C( 6935), UINT16_C(19291), UINT16_C(20021), UINT16_C(60562) }, { UINT16_C(38816), UINT16_C(27473), UINT16_C(46856), UINT16_C(58197), UINT16_C(31752), UINT16_C(21413), UINT16_C(30323), UINT16_C(28622) }, { UINT16_C(63999), UINT16_C(38846), UINT16_C(61431), UINT16_C(15594), UINT16_C(39927), UINT16_C(61275), UINT16_C(53181), UINT16_C(64691) } }, { { UINT16_C(27175), UINT16_C(39939), UINT16_C(25881), UINT16_C(12752), UINT16_C(11393), UINT16_C(46716), UINT16_C( 3962), UINT16_C( 6818) }, { UINT16_C(62374), UINT16_C(44677), UINT16_C(55978), UINT16_C(45969), UINT16_C(13911), UINT16_C(51718), UINT16_C(54445), UINT16_C(54330) }, { UINT16_C(28287), UINT16_C(56699), UINT16_C(25949), UINT16_C(32254), UINT16_C(60841), UINT16_C(47101), UINT16_C(12154), UINT16_C(15335) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vornq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t r = simde_vornq_u16(a, b); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vornq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(1483816254), UINT32_C( 596197794), UINT32_C(3889759597), UINT32_C(3120659220) }, { UINT32_C( 426280559), UINT32_C(3100440929), UINT32_C(3716338224), UINT32_C(3853630630) }, { UINT32_C(4277632446), UINT32_C(1740326846), UINT32_C(3892129263), UINT32_C(3125771101) } }, { { UINT32_C(2621252345), UINT32_C(3519006308), UINT32_C(3770259915), UINT32_C(2207955476) }, { UINT32_C(2728198977), UINT32_C( 744122876), UINT32_C(3792297020), UINT32_C(2462563225) }, { UINT32_C(3716153087), UINT32_C(3552564839), UINT32_C(4261395403), UINT32_C(4022009462) } }, { { UINT32_C(1093534941), UINT32_C(2517888714), UINT32_C(2608254087), UINT32_C(3340701830) }, { UINT32_C( 275364883), UINT32_C(1631372069), UINT32_C( 960775840), UINT32_C(3737848577) }, { UINT32_C(4022224893), UINT32_C(2664693466), UINT32_C(3758095839), UINT32_C(3879728382) } }, { { UINT32_C(3659594256), UINT32_C(1869624296), UINT32_C(2232084223), UINT32_C( 172829175) }, { UINT32_C( 186300133), UINT32_C( 443307898), UINT32_C(2656284829), UINT32_C(3430686396) }, { UINT32_C(4276484890), UINT32_C(4025727981), UINT32_C(3853447167), UINT32_C(1003481591) } }, { { UINT32_C( 10918936), UINT32_C(3463387087), UINT32_C(4099177213), UINT32_C(2315231651) }, { UINT32_C(3516143959), UINT32_C( 233504880), UINT32_C(1839939249), UINT32_C(1966680157) }, { UINT32_C( 787480248), UINT32_C(4269801423), UINT32_C(4132764671), UINT32_C(2348808099) } }, { { UINT32_C(2490818500), UINT32_C(4100122102), UINT32_C( 65582687), UINT32_C(2945247063) }, { UINT32_C(1904222209), UINT32_C(3514788896), UINT32_C( 121514666), UINT32_C( 394098514) }, { UINT32_C(2659180542), UINT32_C(4276287487), UINT32_C(4226414431), UINT32_C(4019122175) } }, { { UINT32_C(1303114583), UINT32_C( 943787480), UINT32_C( 456862404), UINT32_C( 315279121) }, { UINT32_C( 126110695), UINT32_C(1641612215), UINT32_C(2154370861), UINT32_C(3868714639) }, { UINT32_C(4261148511), UINT32_C(3194486232), UINT32_C(2143283926), UINT32_C( 468377457) } }, { { UINT32_C(2972926681), UINT32_C( 334067023), UINT32_C(2955879583), UINT32_C(3536058859) }, { UINT32_C(4225386308), UINT32_C(2002629450), UINT32_C(1509410506), UINT32_C(2235535020) }, { UINT32_C(3040344827), UINT32_C(2615901695), UINT32_C(3056549311), UINT32_C(4207147515) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vornq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t r = simde_vornq_u32(a, b); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vornq_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[2]; uint64_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C( 9742166600277455824), UINT64_C(11530053269874762308) }, { UINT64_C( 6486302283261598785), UINT64_C( 6480947635329170111) }, { UINT64_C(12105437171942913022), UINT64_C(11966062555094280004) } }, { { UINT64_C(10076150258787231334), UINT64_C( 5191732310021180431) }, { UINT64_C( 6954639336781686938), UINT64_C(10427164827855828945) }, { UINT64_C(11528561244109502311), UINT64_C( 8020908565252262959) } }, { { UINT64_C(16912058775276236257), UINT64_C(14199674990761438911) }, { UINT64_C(10018946467096421129), UINT64_C( 8242601689839228329) }, { UINT64_C(18372364171491556855), UINT64_C(14816672830487097087) } }, { { UINT64_C(13009859136661897671), UINT64_C( 5524941767599382971) }, { UINT64_C(13236643410795273068), UINT64_C(18003682885643180247) }, { UINT64_C(18216497504031761879), UINT64_C( 5669638099513108411) } }, { { UINT64_C( 8923926367836009263), UINT64_C( 5230947306527937033) }, { UINT64_C( 7524707462120705961), UINT64_C( 1577572578750363833) }, { UINT64_C(18436329424100750207), UINT64_C(16905209931097069391) } }, { { UINT64_C(12706925761294598172), UINT64_C( 8180953410434808420) }, { UINT64_C(15068363861602662180), UINT64_C(12864991478119460237) }, { UINT64_C(13761447847209699551), UINT64_C( 9078940094085184118) } }, { { UINT64_C( 4069539738901529121), UINT64_C( 3448631459411686818) }, { UINT64_C( 2873862372947395832), UINT64_C( 6933050006603276015) }, { UINT64_C(17905746926224014119), UINT64_C(13825166618822210994) } }, { { UINT64_C( 5904960937062418406), UINT64_C( 7156669968291752471) }, { UINT64_C( 2765022391870633373), UINT64_C( 330138391560321780) }, { UINT64_C(15704802798273941478), UINT64_C(18121251162307718943) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_uint64x2_t r = simde_vornq_u64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); simde_uint64x2_t b = simde_test_arm_neon_random_u64x2(); simde_uint64x2_t r = simde_vornq_u64(a, b); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vorn_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vorn_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vorn_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vorn_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vorn_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vorn_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vorn_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vorn_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vornq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vornq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vornq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vornq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vornq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vornq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vornq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vornq_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/orr.c000066400000000000000000001434411400333146700162740ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN orr #include "test-neon.h" #include "../../../simde/arm/neon/orr.h" static int test_simde_vorr_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int8_t b[8]; int8_t r[8]; } test_vec[] = { { { INT8_C( 43), -INT8_C( 72), INT8_C( 13), -INT8_C( 58), -INT8_C( 99), INT8_C( 110), -INT8_C( 120), -INT8_C( 100) }, { INT8_C( 21), -INT8_C( 25), -INT8_C( 50), INT8_C( 76), INT8_C( 92), INT8_C( 65), INT8_C( 115), -INT8_C( 62) }, { INT8_C( 63), -INT8_C( 1), -INT8_C( 49), -INT8_C( 50), -INT8_C( 35), INT8_C( 111), -INT8_C( 5), -INT8_C( 34) } }, { { INT8_C( 30), -INT8_C( 126), INT8_C( 57), INT8_C( 41), INT8_C( 64), INT8_C( 98), -INT8_C( 52), -INT8_C( 70) }, { INT8_C( 41), INT8_C( 28), INT8_MAX, INT8_C( 96), -INT8_C( 11), INT8_C( 40), INT8_C( 71), INT8_C( 32) }, { INT8_C( 63), -INT8_C( 98), INT8_MAX, INT8_C( 105), -INT8_C( 11), INT8_C( 106), -INT8_C( 49), -INT8_C( 70) } }, { { -INT8_C( 32), INT8_C( 84), -INT8_C( 25), INT8_C( 125), -INT8_C( 61), INT8_C( 111), INT8_C( 25), -INT8_C( 40) }, { INT8_C( 87), -INT8_C( 25), INT8_C( 36), -INT8_C( 77), INT8_C( 40), -INT8_C( 105), INT8_C( 117), INT8_C( 71) }, { -INT8_C( 9), -INT8_C( 9), -INT8_C( 25), -INT8_C( 1), -INT8_C( 21), -INT8_C( 1), INT8_C( 125), -INT8_C( 33) } }, { { INT8_C( 26), -INT8_C( 82), INT8_C( 112), INT8_C( 90), INT8_C( 17), INT8_C( 60), INT8_C( 20), INT8_C( 58) }, { INT8_C( 88), -INT8_C( 108), -INT8_C( 102), INT8_C( 77), -INT8_C( 68), -INT8_C( 31), INT8_C( 109), -INT8_C( 100) }, { INT8_C( 90), -INT8_C( 66), -INT8_C( 6), INT8_C( 95), -INT8_C( 67), -INT8_C( 3), INT8_C( 125), -INT8_C( 66) } }, { { INT8_C( 54), INT8_C( 84), INT8_C( 25), -INT8_C( 7), -INT8_C( 60), INT8_C( 50), -INT8_C( 47), INT8_C( 27) }, { INT8_C( 25), -INT8_C( 10), -INT8_C( 50), INT8_C( 65), -INT8_C( 115), INT8_C( 67), -INT8_C( 120), -INT8_C( 89) }, { INT8_C( 63), -INT8_C( 10), -INT8_C( 33), -INT8_C( 7), -INT8_C( 51), INT8_C( 115), -INT8_C( 39), -INT8_C( 65) } }, { { -INT8_C( 15), -INT8_C( 8), INT8_C( 1), INT8_C( 2), INT8_C( 52), INT8_C( 22), INT8_C( 60), -INT8_C( 116) }, { -INT8_C( 86), -INT8_C( 41), -INT8_C( 39), INT8_C( 102), -INT8_C( 72), INT8_C( 71), INT8_C( 2), -INT8_C( 18) }, { -INT8_C( 5), -INT8_C( 1), -INT8_C( 39), INT8_C( 102), -INT8_C( 68), INT8_C( 87), INT8_C( 62), -INT8_C( 18) } }, { { -INT8_C( 101), INT8_C( 27), -INT8_C( 25), INT8_C( 95), INT8_C( 77), -INT8_C( 71), INT8_C( 122), INT8_C( 102) }, { -INT8_C( 81), INT8_C( 72), -INT8_C( 89), INT8_C( 60), -INT8_C( 117), INT8_C( 48), -INT8_C( 28), INT8_C( 125) }, { -INT8_C( 65), INT8_C( 91), -INT8_C( 25), INT8_MAX, -INT8_C( 49), -INT8_C( 71), -INT8_C( 2), INT8_MAX } }, { { INT8_C( 40), -INT8_C( 27), INT8_MAX, INT8_C( 93), -INT8_C( 5), -INT8_C( 68), -INT8_C( 23), -INT8_C( 91) }, { -INT8_C( 109), -INT8_C( 61), INT8_C( 11), INT8_C( 75), INT8_C( 10), INT8_C( 13), INT8_C( 58), -INT8_C( 91) }, { -INT8_C( 69), -INT8_C( 25), INT8_MAX, INT8_C( 95), -INT8_C( 5), -INT8_C( 67), -INT8_C( 5), -INT8_C( 91) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vorr_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; } static int test_simde_vorr_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { { { -INT16_C( 16819), INT16_C( 16268), INT16_C( 17482), INT16_C( 11881) }, { INT16_C( 12073), INT16_C( 22113), INT16_C( 13230), -INT16_C( 21136) }, { -INT16_C( 16531), INT16_C( 32749), INT16_C( 30702), -INT16_C( 20615) } }, { { -INT16_C( 3611), -INT16_C( 18421), INT16_C( 29876), -INT16_C( 552) }, { -INT16_C( 26226), INT16_C( 29711), -INT16_C( 15476), -INT16_C( 9483) }, { -INT16_C( 1553), -INT16_C( 1009), -INT16_C( 2116), -INT16_C( 3) } }, { { -INT16_C( 32383), -INT16_C( 13287), -INT16_C( 32059), -INT16_C( 4102) }, { INT16_C( 23729), INT16_C( 24389), -INT16_C( 19057), INT16_C( 29708) }, { -INT16_C( 8783), -INT16_C( 8355), -INT16_C( 18481), -INT16_C( 2) } }, { { INT16_C( 6054), INT16_C( 23340), INT16_C( 1419), INT16_C( 6744) }, { INT16_C( 26526), INT16_C( 10894), -INT16_C( 31957), -INT16_C( 21500) }, { INT16_C( 30654), INT16_C( 31662), -INT16_C( 30805), -INT16_C( 16804) } }, { { INT16_C( 7428), -INT16_C( 13960), INT16_C( 29599), INT16_C( 20664) }, { -INT16_C( 305), INT16_C( 24239), -INT16_C( 17229), INT16_C( 23251) }, { -INT16_C( 49), -INT16_C( 8193), -INT16_C( 65), INT16_C( 23291) } }, { { -INT16_C( 45), INT16_C( 24501), INT16_C( 3332), -INT16_C( 23943) }, { INT16_C( 1908), -INT16_C( 24627), -INT16_C( 11894), -INT16_C( 29108) }, { -INT16_C( 9), -INT16_C( 8195), -INT16_C( 8818), -INT16_C( 20867) } }, { { -INT16_C( 15121), -INT16_C( 29097), INT16_C( 4151), INT16_C( 1759) }, { -INT16_C( 29170), -INT16_C( 16027), INT16_C( 14410), INT16_C( 7707) }, { -INT16_C( 12561), -INT16_C( 12425), INT16_C( 14463), INT16_C( 7903) } }, { { -INT16_C( 12233), INT16_C( 15485), -INT16_C( 2339), INT16_C( 21214) }, { -INT16_C( 21507), -INT16_C( 30735), INT16_C( 15741), INT16_C( 27669) }, { -INT16_C( 1025), -INT16_C( 16387), -INT16_C( 3), INT16_C( 32479) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_vorr_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; } static int test_simde_vorr_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { { { -INT32_C( 1845357222), INT32_C( 1792000206) }, { INT32_C( 1952762721), -INT32_C( 591518222) }, { -INT32_C( 161030277), -INT32_C( 16783874) } }, { { INT32_C( 1074647543), -INT32_C( 1556974049) }, { INT32_C( 1200672026), INT32_C( 225866674) }, { INT32_C( 1201528319), -INT32_C( 1351190593) } }, { { INT32_C( 1453291656), -INT32_C( 1698599368) }, { INT32_C( 722347321), INT32_C( 1091095626) }, { INT32_C( 2141158841), -INT32_C( 607523206) } }, { { -INT32_C( 1115613795), -INT32_C( 1503612020) }, { INT32_C( 871231616), -INT32_C( 348101789) }, { -INT32_C( 1074924131), -INT32_C( 278858769) } }, { { INT32_C( 339927004), -INT32_C( 2018639027) }, { INT32_C( 1924316200), INT32_C( 649312904) }, { INT32_C( 1995636732), -INT32_C( 1480606771) } }, { { INT32_C( 1558394320), INT32_C( 1761756136) }, { -INT32_C( 1751322572), INT32_C( 797170771) }, { -INT32_C( 536873484), INT32_C( 1870913531) } }, { { INT32_C( 138724795), -INT32_C( 259001656) }, { INT32_C( 929186479), -INT32_C( 866314756) }, { INT32_C( 1063700415), -INT32_C( 52430852) } }, { { INT32_C( 858275914), -INT32_C( 1197724796) }, { INT32_C( 1850685467), -INT32_C( 811675116) }, { INT32_C( 2138011739), -INT32_C( 6358124) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_vorr_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; } static int test_simde_vorr_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[1]; int64_t b[1]; int64_t r[1]; } test_vec[] = { { { INT64_C( 3151194404085669577) }, { INT64_C( 7150093679456372669) }, { INT64_C( 7762937874449424381) } }, { { INT64_C( 8772691476915187418) }, { -INT64_C( 7160547676975981979) }, { -INT64_C( 162446265433882881) } }, { { INT64_C( 6919140515929545021) }, { -INT64_C( 2327186706082379520) }, { -INT64_C( 20902094605199043) } }, { { -INT64_C( 7602424505649834598) }, { -INT64_C( 914887447672121379) }, { -INT64_C( 612511850049939489) } }, { { INT64_C( 290432717920553731) }, { -INT64_C( 4575365684894762596) }, { -INT64_C( 4285215561364800609) } }, { { INT64_C( 2748959705308497502) }, { -INT64_C( 2376172329570520740) }, { -INT64_C( 61247352163879074) } }, { { INT64_C( 4933285092639757422) }, { INT64_C( 3705450760845577094) }, { INT64_C( 8610583385991536622) } }, { { -INT64_C( 3216438236108606739) }, { -INT64_C( 4191185119456447717) }, { -INT64_C( 2891896485226007553) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_int64x1_t r = simde_vorr_s64(a, b); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; } static int test_simde_vorr_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C( 35), UINT8_C(214), UINT8_C(198), UINT8_C(133), UINT8_C(234), UINT8_C(114), UINT8_C(199), UINT8_C(150) }, { UINT8_C( 85), UINT8_C( 51), UINT8_C( 53), UINT8_C(117), UINT8_C( 5), UINT8_C(105), UINT8_C(134), UINT8_C( 51) }, { UINT8_C(119), UINT8_C(247), UINT8_C(247), UINT8_C(245), UINT8_C(239), UINT8_C(123), UINT8_C(199), UINT8_C(183) } }, { { UINT8_C( 44), UINT8_C( 39), UINT8_C( 84), UINT8_C(230), UINT8_C( 67), UINT8_C(145), UINT8_C(231), UINT8_C( 39) }, { UINT8_C( 56), UINT8_C(162), UINT8_C(186), UINT8_C(218), UINT8_C(216), UINT8_C( 29), UINT8_C(225), UINT8_C(251) }, { UINT8_C( 60), UINT8_C(167), UINT8_C(254), UINT8_C(254), UINT8_C(219), UINT8_C(157), UINT8_C(231), UINT8_MAX } }, { { UINT8_C(243), UINT8_C(167), UINT8_C(128), UINT8_C(222), UINT8_C( 25), UINT8_C( 71), UINT8_C(116), UINT8_C(110) }, { UINT8_C(122), UINT8_C(170), UINT8_C(228), UINT8_C(128), UINT8_C( 19), UINT8_C(106), UINT8_C(179), UINT8_C( 63) }, { UINT8_C(251), UINT8_C(175), UINT8_C(228), UINT8_C(222), UINT8_C( 27), UINT8_C(111), UINT8_C(247), UINT8_C(127) } }, { { UINT8_C(146), UINT8_C( 7), UINT8_C( 37), UINT8_C(213), UINT8_C(152), UINT8_C( 12), UINT8_C(252), UINT8_C(208) }, { UINT8_C(174), UINT8_C(183), UINT8_C(170), UINT8_C(135), UINT8_C(212), UINT8_C(139), UINT8_C(130), UINT8_C(199) }, { UINT8_C(190), UINT8_C(183), UINT8_C(175), UINT8_C(215), UINT8_C(220), UINT8_C(143), UINT8_C(254), UINT8_C(215) } }, { { UINT8_C( 50), UINT8_C( 3), UINT8_C(165), UINT8_C( 75), UINT8_C( 74), UINT8_C( 26), UINT8_C(186), UINT8_C(197) }, { UINT8_C(196), UINT8_C(158), UINT8_C( 69), UINT8_C(215), UINT8_C( 8), UINT8_C(248), UINT8_C( 23), UINT8_C(154) }, { UINT8_C(246), UINT8_C(159), UINT8_C(229), UINT8_C(223), UINT8_C( 74), UINT8_C(250), UINT8_C(191), UINT8_C(223) } }, { { UINT8_MAX, UINT8_C( 60), UINT8_C(112), UINT8_C(151), UINT8_C( 73), UINT8_C(108), UINT8_C(103), UINT8_C(247) }, { UINT8_C( 35), UINT8_C( 17), UINT8_C(126), UINT8_C(247), UINT8_C(157), UINT8_C( 1), UINT8_C(191), UINT8_C(207) }, { UINT8_MAX, UINT8_C( 61), UINT8_C(126), UINT8_C(247), UINT8_C(221), UINT8_C(109), UINT8_MAX, UINT8_MAX } }, { { UINT8_C( 4), UINT8_C(100), UINT8_C( 27), UINT8_C( 78), UINT8_C(126), UINT8_C(213), UINT8_C( 19), UINT8_C( 66) }, { UINT8_C(115), UINT8_C( 88), UINT8_C( 26), UINT8_C(123), UINT8_C( 80), UINT8_C( 49), UINT8_C( 22), UINT8_C( 79) }, { UINT8_C(119), UINT8_C(124), UINT8_C( 27), UINT8_C(127), UINT8_C(126), UINT8_C(245), UINT8_C( 23), UINT8_C( 79) } }, { { UINT8_C(109), UINT8_C(134), UINT8_C(230), UINT8_C(182), UINT8_C(242), UINT8_C( 77), UINT8_C(174), UINT8_C( 22) }, { UINT8_C( 95), UINT8_C( 44), UINT8_C( 13), UINT8_C(252), UINT8_C( 45), UINT8_C(204), UINT8_C(203), UINT8_C( 49) }, { UINT8_C(127), UINT8_C(174), UINT8_C(239), UINT8_C(254), UINT8_MAX, UINT8_C(205), UINT8_C(239), UINT8_C( 55) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vorr_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; } static int test_simde_vorr_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(38025), UINT16_C( 1112), UINT16_C(15377), UINT16_C( 4043) }, { UINT16_C( 8697), UINT16_C(57130), UINT16_C( 4216), UINT16_C(52332) }, { UINT16_C(46585), UINT16_C(57210), UINT16_C(15481), UINT16_C(53231) } }, { { UINT16_C(31219), UINT16_C(35443), UINT16_C(51575), UINT16_C(56842) }, { UINT16_C(57021), UINT16_C(64071), UINT16_C( 9441), UINT16_C(27152) }, { UINT16_MAX, UINT16_C(64119), UINT16_C(60919), UINT16_C(65050) } }, { { UINT16_C(26808), UINT16_C(51823), UINT16_C(15012), UINT16_C(40665) }, { UINT16_C( 859), UINT16_C(54141), UINT16_C(59668), UINT16_C( 1952) }, { UINT16_C(27643), UINT16_C(56191), UINT16_C(64436), UINT16_C(40953) } }, { { UINT16_C( 4962), UINT16_C(55953), UINT16_C(39900), UINT16_C(39608) }, { UINT16_C(65401), UINT16_C(23444), UINT16_C(42019), UINT16_C(56261) }, { UINT16_C(65403), UINT16_C(56213), UINT16_C(49151), UINT16_C(56317) } }, { { UINT16_C(13325), UINT16_C(45477), UINT16_C(32622), UINT16_C(51791) }, { UINT16_C(52610), UINT16_C(38557), UINT16_C(15798), UINT16_C( 6557) }, { UINT16_C(64911), UINT16_C(47037), UINT16_C(32766), UINT16_C(56287) } }, { { UINT16_C(11857), UINT16_C(11763), UINT16_C(43977), UINT16_C(17351) }, { UINT16_C(23466), UINT16_C(52638), UINT16_C(25344), UINT16_C( 3496) }, { UINT16_C(32763), UINT16_C(60927), UINT16_C(60361), UINT16_C(20463) } }, { { UINT16_C(20120), UINT16_C( 1726), UINT16_C( 3789), UINT16_C(20432) }, { UINT16_C(28379), UINT16_C(37350), UINT16_C(33707), UINT16_C(64682) }, { UINT16_C(28379), UINT16_C(38910), UINT16_C(36847), UINT16_C(65530) } }, { { UINT16_C(40370), UINT16_C(31530), UINT16_C(61768), UINT16_C(62142) }, { UINT16_C(23629), UINT16_C(19903), UINT16_C(26816), UINT16_C(22618) }, { UINT16_C(56831), UINT16_C(32703), UINT16_C(63944), UINT16_C(64254) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vorr_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; } static int test_simde_vorr_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C(2542243251), UINT32_C(1923390208) }, { UINT32_C(1002757752), UINT32_C(4145368516) }, { UINT32_C(3217547259), UINT32_C(4155891652) } }, { { UINT32_C(1695516834), UINT32_C(1276650104) }, { UINT32_C(2163439025), UINT32_C(4227580744) }, { UINT32_C(3858759091), UINT32_C(4294697848) } }, { { UINT32_C(1351844688), UINT32_C(2529310494) }, { UINT32_C(3654453013), UINT32_C(1993402324) }, { UINT32_C(3654518613), UINT32_C(4141021150) } }, { { UINT32_C(3906789232), UINT32_C(3006592002) }, { UINT32_C(2972919657), UINT32_C( 984428522) }, { UINT32_C(4194303865), UINT32_C(3149791210) } }, { { UINT32_C(3498721458), UINT32_C(2355514743) }, { UINT32_C(2825205972), UINT32_C(2417964320) }, { UINT32_C(4176443638), UINT32_C(2625600887) } }, { { UINT32_C( 377027348), UINT32_C(1506454767) }, { UINT32_C(3188391379), UINT32_C(3740841772) }, { UINT32_C(3195731927), UINT32_C(3757752303) } }, { { UINT32_C(1856930807), UINT32_C(2767853008) }, { UINT32_C(1833721677), UINT32_C(2851957652) }, { UINT32_C(1877925887), UINT32_C(2919202772) } }, { { UINT32_C(1455388006), UINT32_C(4121921825) }, { UINT32_C(3014900103), UINT32_C(1754442609) }, { UINT32_C(4156554727), UINT32_C(4257196913) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vorr_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; } static int test_simde_vorr_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[1]; uint64_t b[1]; uint64_t r[1]; } test_vec[] = { { { UINT64_C( 8723019818965631880) }, { UINT64_C(10354760186494598420) }, { UINT64_C(18428588727770005404) } }, { { UINT64_C(13192140800111500143) }, { UINT64_C(11472348407170591550) }, { UINT64_C(13778761135761128319) } }, { { UINT64_C(15007327377362344059) }, { UINT64_C(10380688433334625017) }, { UINT64_C(15010424705917123323) } }, { { UINT64_C(14472684133206828281) }, { UINT64_C( 4102455717689345080) }, { UINT64_C(17942303254750553337) } }, { { UINT64_C( 5889942334929607729) }, { UINT64_C( 4511446038880561129) }, { UINT64_C( 9205330554010189817) } }, { { UINT64_C( 6473705803523085444) }, { UINT64_C( 9796340008937356542) }, { UINT64_C(16138578302157257982) } }, { { UINT64_C( 2944283500602521395) }, { UINT64_C( 1251100802482875865) }, { UINT64_C( 4169488553517449211) } }, { { UINT64_C(17590489458298371234) }, { UINT64_C(10326932464964978036) }, { UINT64_C(18401139624751660534) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_t b = simde_vld1_u64(test_vec[i].b); simde_uint64x1_t r = simde_vorr_u64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; } static int test_simde_vorrq_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int8_t b[16]; int8_t r[16]; } test_vec[] = { { { -INT8_C( 120), -INT8_C( 40), INT8_C( 38), INT8_C( 49), -INT8_C( 100), INT8_C( 57), -INT8_C( 73), -INT8_C( 108), INT8_C( 39), INT8_C( 6), -INT8_C( 9), INT8_C( 2), INT8_C( 86), INT8_C( 18), INT8_C( 34), INT8_C( 37) }, { INT8_C( 83), INT8_C( 10), INT8_C( 53), INT8_C( 69), INT8_C( 64), INT8_C( 88), INT8_C( 30), -INT8_C( 92), INT8_C( 121), -INT8_C( 41), INT8_C( 117), INT8_C( 38), -INT8_C( 6), INT8_C( 74), -INT8_C( 127), -INT8_C( 126) }, { -INT8_C( 37), -INT8_C( 38), INT8_C( 55), INT8_C( 117), -INT8_C( 36), INT8_C( 121), -INT8_C( 65), -INT8_C( 76), INT8_MAX, -INT8_C( 41), -INT8_C( 9), INT8_C( 38), -INT8_C( 2), INT8_C( 90), -INT8_C( 93), -INT8_C( 89) } }, { { INT8_C( 35), -INT8_C( 88), -INT8_C( 77), -INT8_C( 65), -INT8_C( 31), INT8_C( 106), INT8_C( 83), INT8_C( 8), INT8_C( 113), INT8_C( 74), INT8_C( 10), -INT8_C( 57), INT8_C( 92), INT8_C( 45), -INT8_C( 20), -INT8_C( 81) }, { INT8_C( 55), INT8_C( 34), -INT8_C( 11), INT8_C( 119), INT8_C( 122), INT8_C( 19), INT8_C( 27), -INT8_C( 13), -INT8_C( 22), -INT8_C( 112), INT8_C( 25), -INT8_C( 28), -INT8_C( 37), -INT8_C( 101), INT8_C( 103), -INT8_C( 2) }, { INT8_C( 55), -INT8_C( 86), -INT8_C( 9), -INT8_C( 1), -INT8_C( 5), INT8_C( 123), INT8_C( 91), -INT8_C( 5), -INT8_C( 5), -INT8_C( 38), INT8_C( 27), -INT8_C( 25), -INT8_C( 33), -INT8_C( 65), -INT8_C( 17), -INT8_C( 1) } }, { { INT8_C( 67), INT8_C( 26), -INT8_C( 67), INT8_C( 36), -INT8_C( 123), INT8_C( 16), INT8_C( 45), -INT8_C( 10), INT8_C( 90), INT8_C( 55), -INT8_C( 67), -INT8_C( 74), INT8_C( 100), -INT8_C( 87), INT8_C( 101), -INT8_C( 100) }, { -INT8_C( 53), INT8_C( 90), INT8_C( 19), INT8_C( 70), INT8_C( 109), INT8_C( 47), INT8_C( 57), INT8_C( 87), -INT8_C( 65), INT8_C( 83), INT8_C( 60), -INT8_C( 102), -INT8_C( 18), -INT8_C( 93), -INT8_C( 104), INT8_C( 49) }, { -INT8_C( 53), INT8_C( 90), -INT8_C( 65), INT8_C( 102), -INT8_C( 19), INT8_C( 63), INT8_C( 61), -INT8_C( 9), -INT8_C( 1), INT8_C( 119), -INT8_C( 67), -INT8_C( 66), -INT8_C( 18), -INT8_C( 85), -INT8_C( 3), -INT8_C( 67) } }, { { -INT8_C( 67), INT8_C( 85), INT8_C( 85), INT8_C( 66), INT8_C( 101), -INT8_C( 126), INT8_C( 56), -INT8_C( 65), -INT8_C( 70), -INT8_C( 11), INT8_C( 117), INT8_C( 30), -INT8_C( 97), -INT8_C( 37), -INT8_C( 70), INT8_C( 106) }, { INT8_C( 53), -INT8_C( 50), -INT8_C( 80), -INT8_C( 93), -INT8_C( 3), -INT8_C( 22), -INT8_C( 6), -INT8_C( 68), INT8_C( 61), INT8_C( 54), INT8_C( 87), INT8_C( 43), -INT8_C( 39), -INT8_C( 17), INT8_C( 92), -INT8_C( 105) }, { -INT8_C( 67), -INT8_C( 33), -INT8_C( 11), -INT8_C( 29), -INT8_C( 3), -INT8_C( 22), -INT8_C( 6), -INT8_C( 65), -INT8_C( 65), -INT8_C( 9), INT8_C( 119), INT8_C( 63), -INT8_C( 33), -INT8_C( 1), -INT8_C( 2), -INT8_C( 1) } }, { { INT8_C( 69), -INT8_C( 79), -INT8_C( 39), -INT8_C( 86), INT8_C( 52), INT8_C( 18), INT8_C( 106), -INT8_C( 18), INT8_C( 7), -INT8_C( 33), INT8_C( 12), -INT8_C( 90), -INT8_C( 70), -INT8_C( 57), INT8_C( 17), -INT8_C( 16) }, { -INT8_C( 107), -INT8_C( 63), -INT8_C( 109), -INT8_C( 110), -INT8_C( 85), -INT8_C( 115), INT8_C( 78), -INT8_C( 24), -INT8_C( 60), -INT8_C( 91), INT8_C( 19), -INT8_C( 99), -INT8_C( 107), INT8_C( 111), INT8_C( 52), -INT8_C( 38) }, { -INT8_C( 43), -INT8_C( 15), -INT8_C( 37), -INT8_C( 70), -INT8_C( 65), -INT8_C( 97), INT8_C( 110), -INT8_C( 18), -INT8_C( 57), -INT8_C( 1), INT8_C( 31), -INT8_C( 65), -INT8_C( 65), -INT8_C( 17), INT8_C( 53), -INT8_C( 6) } }, { { INT8_C( 33), INT8_C( 14), -INT8_C( 124), INT8_C( 85), INT8_C( 32), -INT8_C( 18), INT8_C( 67), INT8_C( 39), -INT8_C( 50), INT8_C( 79), -INT8_C( 50), -INT8_C( 120), INT8_C( 22), -INT8_C( 33), INT8_C( 120), -INT8_C( 85) }, { -INT8_C( 96), INT8_C( 11), INT8_C( 61), INT8_C( 76), -INT8_C( 103), -INT8_C( 116), INT8_C( 52), INT8_C( 93), INT8_C( 49), INT8_C( 72), -INT8_C( 6), -INT8_C( 58), -INT8_C( 73), INT8_C( 47), -INT8_C( 96), -INT8_C( 40) }, { -INT8_C( 95), INT8_C( 15), -INT8_C( 67), INT8_C( 93), -INT8_C( 71), -INT8_C( 18), INT8_C( 119), INT8_MAX, -INT8_C( 1), INT8_C( 79), -INT8_C( 2), -INT8_C( 50), -INT8_C( 73), -INT8_C( 1), -INT8_C( 8), -INT8_C( 5) } }, { { INT8_C( 61), INT8_C( 37), INT8_C( 45), INT8_C( 93), INT8_C( 19), INT8_C( 112), -INT8_C( 124), -INT8_C( 31), -INT8_C( 64), INT8_C( 82), INT8_C( 106), -INT8_C( 42), INT8_C( 49), -INT8_C( 30), -INT8_C( 126), -INT8_C( 46) }, { -INT8_C( 18), -INT8_C( 65), INT8_C( 30), -INT8_C( 121), INT8_C( 75), INT8_C( 82), -INT8_C( 28), INT8_C( 125), -INT8_C( 102), -INT8_C( 34), INT8_C( 67), INT8_C( 82), INT8_C( 13), -INT8_C( 28), INT8_C( 42), INT8_C( 74) }, { -INT8_C( 1), -INT8_C( 65), INT8_C( 63), -INT8_C( 33), INT8_C( 91), INT8_C( 114), -INT8_C( 28), -INT8_C( 3), -INT8_C( 38), -INT8_C( 34), INT8_C( 107), -INT8_C( 42), INT8_C( 61), -INT8_C( 26), -INT8_C( 86), -INT8_C( 38) } }, { { INT8_C( 9), INT8_C( 88), -INT8_C( 89), INT8_C( 28), -INT8_C( 56), INT8_C( 44), -INT8_C( 2), -INT8_C( 120), INT8_C( 126), INT8_C( 104), INT8_C( 95), -INT8_C( 80), INT8_C( 74), -INT8_C( 31), -INT8_C( 126), INT8_C( 56) }, { -INT8_C( 96), -INT8_C( 96), -INT8_C( 65), -INT8_C( 20), -INT8_C( 14), -INT8_C( 93), INT8_C( 105), -INT8_C( 115), -INT8_C( 126), -INT8_C( 84), -INT8_C( 33), -INT8_C( 113), -INT8_C( 112), INT8_C( 9), -INT8_C( 38), -INT8_C( 103) }, { -INT8_C( 87), -INT8_C( 8), -INT8_C( 65), -INT8_C( 4), -INT8_C( 6), -INT8_C( 81), -INT8_C( 1), -INT8_C( 115), -INT8_C( 2), -INT8_C( 20), -INT8_C( 33), -INT8_C( 65), -INT8_C( 38), -INT8_C( 23), -INT8_C( 38), -INT8_C( 71) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r = simde_vorrq_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; } static int test_simde_vorrq_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { INT16_C( 13388), INT16_C( 21211), INT16_C( 15643), INT16_C( 19225), -INT16_C( 8663), -INT16_C( 7904), -INT16_C( 29244), INT16_C( 28768) }, { -INT16_C( 7306), -INT16_C( 3637), INT16_C( 28019), -INT16_C( 12496), INT16_C( 7322), INT16_C( 853), -INT16_C( 284), INT16_C( 12518) }, { -INT16_C( 2178), -INT16_C( 3109), INT16_C( 32123), -INT16_C( 12487), -INT16_C( 8517), -INT16_C( 7307), -INT16_C( 28), INT16_C( 28902) } }, { { -INT16_C( 15821), INT16_C( 20098), -INT16_C( 25601), INT16_C( 10649), -INT16_C( 18055), INT16_C( 15882), INT16_C( 27206), -INT16_C( 17234) }, { INT16_C( 31053), -INT16_C( 16211), -INT16_C( 8730), -INT16_C( 32625), -INT16_C( 6662), -INT16_C( 8572), INT16_C( 27363), INT16_C( 5646) }, { -INT16_C( 1153), -INT16_C( 12625), -INT16_C( 8193), -INT16_C( 22113), -INT16_C( 517), -INT16_C( 370), INT16_C( 27367), -INT16_C( 16722) } }, { { -INT16_C( 28628), INT16_C( 11364), -INT16_C( 468), -INT16_C( 23211), INT16_C( 24503), -INT16_C( 285), -INT16_C( 28215), INT16_C( 5818) }, { INT16_C( 26635), -INT16_C( 3626), INT16_C( 25925), INT16_C( 16242), -INT16_C( 2486), INT16_C( 11805), INT16_C( 11104), -INT16_C( 29372) }, { -INT16_C( 2001), -INT16_C( 522), -INT16_C( 147), -INT16_C( 16521), -INT16_C( 1), -INT16_C( 257), -INT16_C( 17431), -INT16_C( 24578) } }, { { -INT16_C( 22084), -INT16_C( 5959), INT16_C( 3751), INT16_C( 24205), INT16_C( 29037), INT16_C( 13916), INT16_C( 5890), INT16_C( 3404) }, { INT16_C( 8831), -INT16_C( 15105), INT16_C( 29063), -INT16_C( 11772), INT16_C( 8551), -INT16_C( 14592), INT16_C( 17485), INT16_C( 2388) }, { -INT16_C( 21505), -INT16_C( 4865), INT16_C( 32679), -INT16_C( 8563), INT16_C( 29039), -INT16_C( 2212), INT16_C( 22351), INT16_C( 3420) } }, { { INT16_C( 3565), -INT16_C( 27407), INT16_C( 32283), -INT16_C( 30477), INT16_C( 20463), -INT16_C( 3394), INT16_C( 2662), -INT16_C( 6657) }, { -INT16_C( 468), -INT16_C( 19286), -INT16_C( 20881), -INT16_C( 10618), -INT16_C( 31025), INT16_C( 7326), -INT16_C( 3382), -INT16_C( 18395) }, { -INT16_C( 19), -INT16_C( 19205), -INT16_C( 385), -INT16_C( 8457), -INT16_C( 12305), -INT16_C( 322), -INT16_C( 1298), -INT16_C( 513) } }, { { INT16_C( 5632), INT16_C( 6988), INT16_C( 16277), -INT16_C( 31580), INT16_C( 25231), -INT16_C( 2698), INT16_C( 30317), -INT16_C( 26149) }, { -INT16_C( 31372), -INT16_C( 7091), -INT16_C( 11469), INT16_C( 698), INT16_C( 22617), INT16_C( 9247), INT16_C( 17483), INT16_C( 19420) }, { -INT16_C( 26764), -INT16_C( 179), -INT16_C( 73), -INT16_C( 31042), INT16_C( 31455), -INT16_C( 2689), INT16_C( 30319), -INT16_C( 9249) } }, { { INT16_C( 10331), -INT16_C( 3994), INT16_C( 2664), -INT16_C( 2188), -INT16_C( 5267), -INT16_C( 9492), -INT16_C( 14495), -INT16_C( 10893) }, { -INT16_C( 16052), INT16_C( 32697), INT16_C( 29844), -INT16_C( 4478), -INT16_C( 24116), INT16_C( 5906), -INT16_C( 4379), INT16_C( 16482) }, { -INT16_C( 5793), -INT16_C( 1), INT16_C( 32508), -INT16_C( 10), -INT16_C( 5139), -INT16_C( 8194), -INT16_C( 4123), -INT16_C( 10893) } }, { { -INT16_C( 14058), INT16_C( 32304), -INT16_C( 23085), INT16_C( 16501), INT16_C( 25232), -INT16_C( 3814), -INT16_C( 29143), INT16_C( 30406) }, { -INT16_C( 32689), -INT16_C( 7179), INT16_C( 30708), -INT16_C( 16175), -INT16_C( 7400), -INT16_C( 296), INT16_C( 15057), -INT16_C( 6082) }, { -INT16_C( 13985), -INT16_C( 11), -INT16_C( 2057), -INT16_C( 16139), -INT16_C( 7272), -INT16_C( 38), -INT16_C( 16647), -INT16_C( 258) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_vorrq_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; } static int test_simde_vorrq_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { INT32_C( 1853195043), -INT32_C( 662805650), -INT32_C( 1554457435), INT32_C( 316683366) }, { INT32_C( 811600762), INT32_C( 1834839949), INT32_C( 555751229), INT32_C( 715490055) }, { INT32_C( 2121632635), -INT32_C( 41979921), -INT32_C( 1552359491), INT32_C( 988133223) } }, { { INT32_C( 1956125190), INT32_C( 575477373), INT32_C( 1506125299), INT32_C( 1399629273) }, { INT32_C( 1048890545), INT32_C( 2041372988), -INT32_C( 6566664), INT32_C( 1428766799) }, { INT32_C( 2124209847), INT32_C( 2079192957), -INT32_C( 2101765), INT32_C( 1466820063) } }, { { -INT32_C( 657800870), -INT32_C( 872802344), -INT32_C( 1775976260), INT32_C( 384405605) }, { -INT32_C( 1722454691), INT32_C( 1192362319), INT32_C( 491236813), INT32_C( 1215459565) }, { -INT32_C( 639636129), -INT32_C( 805693473), -INT32_C( 1620578819), INT32_C( 1593569517) } }, { { INT32_C( 169884722), INT32_C( 282401363), INT32_C( 1084684762), -INT32_C( 413757558) }, { INT32_C( 1283501053), INT32_C( 2056491692), INT32_C( 764926528), INT32_C( 2104822091) }, { INT32_C( 1319157759), INT32_C( 2060950271), INT32_C( 1840774106), -INT32_C( 8941621) } }, { { -INT32_C( 1719167674), -INT32_C( 1968612176), -INT32_C( 540389547), -INT32_C( 590994977) }, { INT32_C( 2032682700), INT32_C( 435403737), -INT32_C( 515470698), -INT32_C( 648102765) }, { -INT32_C( 105916466), -INT32_C( 1677983751), -INT32_C( 3223593), -INT32_C( 572604961) } }, { { INT32_C( 24372561), -INT32_C( 1769202623), INT32_C( 1266046572), INT32_C( 1143422071) }, { INT32_C( 1555910531), -INT32_C( 1602899958), -INT32_C( 847135942), -INT32_C( 911745161) }, { INT32_C( 1577054163), -INT32_C( 1224885173), -INT32_C( 805830786), -INT32_C( 844628105) } }, { { INT32_C( 97131204), -INT32_C( 1566812618), INT32_C( 619516589), -INT32_C( 781708210) }, { INT32_C( 1848452451), INT32_C( 252617429), -INT32_C( 706899874), INT32_C( 882803823) }, { INT32_C( 1877950439), -INT32_C( 1348536585), -INT32_C( 167931137), -INT32_C( 167865233) } }, { { -INT32_C( 734434914), INT32_C( 1819792831), INT32_C( 915498215), -INT32_C( 603457159) }, { -INT32_C( 196463329), INT32_C( 889477591), INT32_C( 1477173737), INT32_C( 59550053) }, { -INT32_C( 193233505), INT32_C( 2105007615), INT32_C( 2124146159), -INT32_C( 544212611) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_vorrq_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; } static int test_simde_vorrq_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; int64_t b[2]; int64_t r[2]; } test_vec[] = { { { -INT64_C( 7289792402058885751), INT64_C( 7962744538976951060) }, { INT64_C( 4959271613877493853), -INT64_C( 8811573062044955811) }, { -INT64_C( 2389160221097794083), -INT64_C( 1173370237447316643) } }, { { INT64_C( 3408739052754675525), -INT64_C( 1456366311260998302) }, { -INT64_C( 2515076220118064750), -INT64_C( 4182735194501316494) }, { -INT64_C( 45344289161107497), -INT64_C( 1154062185669216910) } }, { { INT64_C( 1000299032988902807), -INT64_C( 2993713681861684485) }, { INT64_C( 7075087877057169874), -INT64_C( 1186764091284166590) }, { INT64_C( 8065884243059437015), -INT64_C( 2263495882474757) } }, { { -INT64_C( 4403813695115689943), INT64_C( 5615637420298685140) }, { -INT64_C( 7035803098410936055), INT64_C( 1908937941248388150) }, { -INT64_C( 2379080516704954071), INT64_C( 6917503720056647414) } }, { { INT64_C( 8390437693362740968), -INT64_C( 8798490521932874724) }, { -INT64_C( 8498897969402182380), INT64_C( 4472521205212540422) }, { -INT64_C( 108688975236980740), -INT64_C( 4614501322251764194) } }, { { -INT64_C( 1168299466064920140), -INT64_C( 1938115942705310183) }, { INT64_C( 8633227249326528754), INT64_C( 3631168478866499792) }, { -INT64_C( 13651590762423818), -INT64_C( 612790816717406503) } }, { { -INT64_C( 4160391825870860159), INT64_C( 3052725007824964519) }, { -INT64_C( 2020358029854134003), INT64_C( 7410512544945235239) }, { -INT64_C( 1731775071238753907), INT64_C( 7989242793809407911) } }, { { INT64_C( 6283363364298710974), INT64_C( 1164791845038820808) }, { INT64_C( 59820965681404637), INT64_C( 663854660148649707) }, { INT64_C( 6338534974706147327), INT64_C( 1819027964574031851) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_int64x2_t r = simde_vorrq_s64(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; } static int test_simde_vorrq_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(218), UINT8_C(190), UINT8_C( 55), UINT8_C(196), UINT8_C(201), UINT8_C(122), UINT8_C(129), UINT8_C(177), UINT8_C( 1), UINT8_C(192), UINT8_C(189), UINT8_C( 65), UINT8_C(239), UINT8_C(179), UINT8_C(198), UINT8_C(110) }, { UINT8_C( 30), UINT8_C(135), UINT8_C(179), UINT8_C( 66), UINT8_C( 65), UINT8_C(253), UINT8_C(112), UINT8_C( 11), UINT8_C(224), UINT8_C( 85), UINT8_C(152), UINT8_C( 57), UINT8_C(139), UINT8_C(214), UINT8_C(122), UINT8_C(101) }, { UINT8_C(222), UINT8_C(191), UINT8_C(183), UINT8_C(198), UINT8_C(201), UINT8_MAX, UINT8_C(241), UINT8_C(187), UINT8_C(225), UINT8_C(213), UINT8_C(189), UINT8_C(121), UINT8_C(239), UINT8_C(247), UINT8_C(254), UINT8_C(111) } }, { { UINT8_C(148), UINT8_C(177), UINT8_C( 41), UINT8_C( 93), UINT8_C( 43), UINT8_C(170), UINT8_C( 15), UINT8_C( 44), UINT8_C(107), UINT8_C(204), UINT8_C(110), UINT8_C( 90), UINT8_C(127), UINT8_C( 52), UINT8_C(200), UINT8_C(157) }, { UINT8_C(188), UINT8_C(123), UINT8_C(224), UINT8_C(253), UINT8_C(120), UINT8_C( 80), UINT8_C( 8), UINT8_C( 89), UINT8_C(166), UINT8_C(160), UINT8_C(146), UINT8_C( 49), UINT8_C(119), UINT8_C( 12), UINT8_C(150), UINT8_C( 11) }, { UINT8_C(188), UINT8_C(251), UINT8_C(233), UINT8_C(253), UINT8_C(123), UINT8_C(250), UINT8_C( 15), UINT8_C(125), UINT8_C(239), UINT8_C(236), UINT8_C(254), UINT8_C(123), UINT8_C(127), UINT8_C( 60), UINT8_C(222), UINT8_C(159) } }, { { UINT8_C(189), UINT8_C(192), UINT8_C(105), UINT8_C(232), UINT8_C(106), UINT8_C(120), UINT8_C( 21), UINT8_C(213), UINT8_C( 68), UINT8_C(131), UINT8_C( 47), UINT8_C(196), UINT8_C(183), UINT8_C(247), UINT8_C( 97), UINT8_C(115) }, { UINT8_C(115), UINT8_C( 65), UINT8_C(112), UINT8_C(235), UINT8_C(146), UINT8_C(120), UINT8_C( 68), UINT8_C( 56), UINT8_C( 25), UINT8_C(214), UINT8_C(105), UINT8_C(144), UINT8_C(226), UINT8_MAX, UINT8_C(155), UINT8_C(159) }, { UINT8_MAX, UINT8_C(193), UINT8_C(121), UINT8_C(235), UINT8_C(250), UINT8_C(120), UINT8_C( 85), UINT8_C(253), UINT8_C( 93), UINT8_C(215), UINT8_C(111), UINT8_C(212), UINT8_C(247), UINT8_MAX, UINT8_C(251), UINT8_MAX } }, { { UINT8_C(191), UINT8_C( 4), UINT8_C(136), UINT8_C( 42), UINT8_C(124), UINT8_C(157), UINT8_MAX, UINT8_C(193), UINT8_C( 32), UINT8_C( 47), UINT8_C(133), UINT8_C(215), UINT8_C( 38), UINT8_C(230), UINT8_C( 75), UINT8_C(153) }, { UINT8_C( 40), UINT8_C(187), UINT8_C(133), UINT8_C(186), UINT8_C( 52), UINT8_C(201), UINT8_C(242), UINT8_C( 77), UINT8_C(160), UINT8_C( 91), UINT8_C(221), UINT8_C(130), UINT8_C( 90), UINT8_C(120), UINT8_C( 34), UINT8_C( 26) }, { UINT8_C(191), UINT8_C(191), UINT8_C(141), UINT8_C(186), UINT8_C(124), UINT8_C(221), UINT8_MAX, UINT8_C(205), UINT8_C(160), UINT8_C(127), UINT8_C(221), UINT8_C(215), UINT8_C(126), UINT8_C(254), UINT8_C(107), UINT8_C(155) } }, { { UINT8_C(125), UINT8_C(170), UINT8_C( 68), UINT8_C(249), UINT8_C( 71), UINT8_C( 67), UINT8_C(186), UINT8_C(103), UINT8_C(114), UINT8_C( 63), UINT8_C( 62), UINT8_C(153), UINT8_C( 38), UINT8_C(137), UINT8_C( 50), UINT8_C( 78) }, { UINT8_C( 69), UINT8_C(183), UINT8_C( 8), UINT8_C(121), UINT8_C(129), UINT8_C(250), UINT8_C(198), UINT8_C( 33), UINT8_C( 85), UINT8_C(163), UINT8_C(163), UINT8_C(175), UINT8_C( 27), UINT8_C(197), UINT8_C(201), UINT8_C(152) }, { UINT8_C(125), UINT8_C(191), UINT8_C( 76), UINT8_C(249), UINT8_C(199), UINT8_C(251), UINT8_C(254), UINT8_C(103), UINT8_C(119), UINT8_C(191), UINT8_C(191), UINT8_C(191), UINT8_C( 63), UINT8_C(205), UINT8_C(251), UINT8_C(222) } }, { { UINT8_C(111), UINT8_C( 13), UINT8_C(146), UINT8_C(182), UINT8_C( 81), UINT8_C( 76), UINT8_C( 29), UINT8_C(195), UINT8_C(140), UINT8_C( 92), UINT8_C( 92), UINT8_C(178), UINT8_C(229), UINT8_C(143), UINT8_C( 0), UINT8_C( 42) }, { UINT8_C( 70), UINT8_C( 8), UINT8_C(163), UINT8_C(199), UINT8_C( 2), UINT8_C(105), UINT8_C(232), UINT8_C( 87), UINT8_C( 12), UINT8_C(140), UINT8_C( 6), UINT8_C( 40), UINT8_C( 81), UINT8_C(208), UINT8_C(192), UINT8_C(193) }, { UINT8_C(111), UINT8_C( 13), UINT8_C(179), UINT8_C(247), UINT8_C( 83), UINT8_C(109), UINT8_C(253), UINT8_C(215), UINT8_C(140), UINT8_C(220), UINT8_C( 94), UINT8_C(186), UINT8_C(245), UINT8_C(223), UINT8_C(192), UINT8_C(235) } }, { { UINT8_C(221), UINT8_C( 82), UINT8_C(119), UINT8_C( 46), UINT8_C(159), UINT8_C(149), UINT8_C(242), UINT8_C( 43), UINT8_C(241), UINT8_C( 78), UINT8_C(221), UINT8_C(214), UINT8_C(221), UINT8_C(221), UINT8_C( 1), UINT8_C( 36) }, { UINT8_C(229), UINT8_C(164), UINT8_C(235), UINT8_C(231), UINT8_C( 14), UINT8_C(212), UINT8_C( 62), UINT8_C( 26), UINT8_C( 96), UINT8_C( 68), UINT8_C( 66), UINT8_C(177), UINT8_C( 20), UINT8_C( 3), UINT8_C(114), UINT8_C(242) }, { UINT8_C(253), UINT8_C(246), UINT8_MAX, UINT8_C(239), UINT8_C(159), UINT8_C(213), UINT8_C(254), UINT8_C( 59), UINT8_C(241), UINT8_C( 78), UINT8_C(223), UINT8_C(247), UINT8_C(221), UINT8_C(223), UINT8_C(115), UINT8_C(246) } }, { { UINT8_C( 85), UINT8_C(234), UINT8_C( 32), UINT8_C(244), UINT8_C(127), UINT8_C( 18), UINT8_C( 31), UINT8_C(112), UINT8_C( 97), UINT8_C(252), UINT8_C( 70), UINT8_C( 62), UINT8_C(217), UINT8_C( 71), UINT8_C( 98), UINT8_C(190) }, { UINT8_C(236), UINT8_C( 78), UINT8_C(165), UINT8_C(250), UINT8_C( 34), UINT8_C(227), UINT8_C( 20), UINT8_C(130), UINT8_C( 40), UINT8_C( 87), UINT8_C( 51), UINT8_C( 60), UINT8_C( 90), UINT8_C(166), UINT8_C( 46), UINT8_C(175) }, { UINT8_C(253), UINT8_C(238), UINT8_C(165), UINT8_C(254), UINT8_C(127), UINT8_C(243), UINT8_C( 31), UINT8_C(242), UINT8_C(105), UINT8_MAX, UINT8_C(119), UINT8_C( 62), UINT8_C(219), UINT8_C(231), UINT8_C(110), UINT8_C(191) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vorrq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; } static int test_simde_vorrq_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(10801), UINT16_C(20218), UINT16_C(39462), UINT16_C(17462), UINT16_C(43429), UINT16_C(54451), UINT16_C(35031), UINT16_C(64204) }, { UINT16_C(47232), UINT16_C(47860), UINT16_C(40274), UINT16_C(46114), UINT16_C(30801), UINT16_C(19838), UINT16_C(16556), UINT16_C(56677) }, { UINT16_C(47793), UINT16_C(65278), UINT16_C(40822), UINT16_C(62518), UINT16_C(63989), UINT16_C(56831), UINT16_C(51455), UINT16_C(65517) } }, { { UINT16_C(24682), UINT16_C(36907), UINT16_C(25082), UINT16_C(41172), UINT16_C(34571), UINT16_C(57972), UINT16_C(16400), UINT16_C(37084) }, { UINT16_C(53752), UINT16_C(19019), UINT16_C(28014), UINT16_C(49150), UINT16_C(32229), UINT16_C(37133), UINT16_C(29373), UINT16_C(10094) }, { UINT16_C(61946), UINT16_C(55915), UINT16_C(28158), UINT16_C(49150), UINT16_C(65519), UINT16_C(62333), UINT16_C(29373), UINT16_C(47102) } }, { { UINT16_C(39634), UINT16_C(52663), UINT16_C(36091), UINT16_C( 1645), UINT16_C(57619), UINT16_C( 9192), UINT16_C(50465), UINT16_C( 6580) }, { UINT16_C(65430), UINT16_C( 1123), UINT16_C(25196), UINT16_C(20931), UINT16_C(53471), UINT16_C(40162), UINT16_C(20547), UINT16_C( 5571) }, { UINT16_C(65494), UINT16_C(52727), UINT16_C(61183), UINT16_C(22511), UINT16_C(61919), UINT16_C(49130), UINT16_C(54627), UINT16_C( 7671) } }, { { UINT16_C(31466), UINT16_C(59106), UINT16_C(20230), UINT16_C( 6892), UINT16_C(54576), UINT16_C(20797), UINT16_C(61850), UINT16_C(12394) }, { UINT16_C(52976), UINT16_C(23604), UINT16_C(63280), UINT16_C( 4013), UINT16_C(36808), UINT16_C( 2987), UINT16_C(28384), UINT16_C(51744) }, { UINT16_C(65274), UINT16_C(65270), UINT16_C(65334), UINT16_C( 8173), UINT16_C(57336), UINT16_C(23487), UINT16_C(65530), UINT16_C(64106) } }, { { UINT16_C( 1000), UINT16_C(61360), UINT16_C(40274), UINT16_C(33545), UINT16_C(18034), UINT16_C( 3284), UINT16_C(16184), UINT16_C(10300) }, { UINT16_C(28685), UINT16_C(15749), UINT16_C(12903), UINT16_C(12108), UINT16_C(63426), UINT16_C(41530), UINT16_C(23397), UINT16_C(19820) }, { UINT16_C(29677), UINT16_C(65461), UINT16_C(49015), UINT16_C(44877), UINT16_C(63474), UINT16_C(44798), UINT16_C(32637), UINT16_C(28028) } }, { { UINT16_C( 7518), UINT16_C(45116), UINT16_C(17850), UINT16_C(11315), UINT16_C( 2188), UINT16_C(50232), UINT16_C(29767), UINT16_C(21740) }, { UINT16_C(29156), UINT16_C(19345), UINT16_C(56740), UINT16_C(26235), UINT16_C(46548), UINT16_C(14600), UINT16_C(29712), UINT16_C(28294) }, { UINT16_C(32254), UINT16_C(64445), UINT16_C(56766), UINT16_C(28283), UINT16_C(48604), UINT16_C(64824), UINT16_C(29783), UINT16_C(32494) } }, { { UINT16_C(50065), UINT16_C(19231), UINT16_C(21000), UINT16_C(38007), UINT16_C(44890), UINT16_C(41304), UINT16_C(17699), UINT16_C( 2037) }, { UINT16_C(34486), UINT16_C(23123), UINT16_C(52835), UINT16_C(14272), UINT16_C(51331), UINT16_C(38000), UINT16_C(63293), UINT16_C(52738) }, { UINT16_C(51127), UINT16_C(23391), UINT16_C(56939), UINT16_C(47095), UINT16_C(61403), UINT16_C(46456), UINT16_C(63295), UINT16_C(53239) } }, { { UINT16_C( 8634), UINT16_C(49690), UINT16_C(37236), UINT16_C(52823), UINT16_C(44865), UINT16_C(25712), UINT16_C(26100), UINT16_C(43884) }, { UINT16_C(49132), UINT16_C(20229), UINT16_C(50829), UINT16_C( 4231), UINT16_C(63374), UINT16_C(52132), UINT16_C(42990), UINT16_C(43162) }, { UINT16_C(49150), UINT16_C(53023), UINT16_C(55293), UINT16_C(57047), UINT16_C(65487), UINT16_C(61428), UINT16_C(59390), UINT16_C(44030) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vorrq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_vorrq_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C( 99494175), UINT32_C(4046041379), UINT32_C(2532327692), UINT32_C(3424227086) }, { UINT32_C(1287656047), UINT32_C(1421712740), UINT32_C(3503588178), UINT32_C(1167418662) }, { UINT32_C(1307458431), UINT32_C(4122852711), UINT32_C(3606365022), UINT32_C(3449679662) } }, { { UINT32_C(2991293326), UINT32_C(1101231157), UINT32_C(3285685173), UINT32_C(2442195234) }, { UINT32_C(1742622723), UINT32_C(1086036973), UINT32_C(1158713374), UINT32_C(2223678965) }, { UINT32_C(4158641039), UINT32_C(1102839805), UINT32_C(3352794047), UINT32_C(2509960695) } }, { { UINT32_C(1563874600), UINT32_C(4271888713), UINT32_C(2395108972), UINT32_C(1780437607) }, { UINT32_C(2412969378), UINT32_C(3083832729), UINT32_C( 335339293), UINT32_C(2912388996) }, { UINT32_C(3757505962), UINT32_C(4292861401), UINT32_C(2684288893), UINT32_C(4020230119) } }, { { UINT32_C(2785725788), UINT32_C( 312781222), UINT32_C(2275436064), UINT32_C(1525858232) }, { UINT32_C(1458226365), UINT32_C(1863170385), UINT32_C( 495061657), UINT32_C(3989445009) }, { UINT32_C(4142583293), UINT32_C(2142091767), UINT32_C(2678222521), UINT32_C(4294623161) } }, { { UINT32_C(2358498790), UINT32_C(2661169278), UINT32_C(1462124190), UINT32_C(3132168445) }, { UINT32_C( 756063196), UINT32_C(4003208789), UINT32_C(3104513576), UINT32_C( 497473079) }, { UINT32_C(2912149502), UINT32_C(4271783551), UINT32_C(4281286334), UINT32_C(3216498431) } }, { { UINT32_C( 698956459), UINT32_C( 281560946), UINT32_C(2204626565), UINT32_C(3795654918) }, { UINT32_C( 151998132), UINT32_C(2499259500), UINT32_C( 38601674), UINT32_C(2216686553) }, { UINT32_C( 699367103), UINT32_C(2499800958), UINT32_C(2205151183), UINT32_C(3862952927) } }, { { UINT32_C(2678966573), UINT32_C(2528146704), UINT32_C(1763252067), UINT32_C(3846919728) }, { UINT32_C( 284056484), UINT32_C(3517244935), UINT32_C(3268669929), UINT32_C( 306639845) }, { UINT32_C(2683296685), UINT32_C(3618961175), UINT32_C(3957061611), UINT32_C(4149213173) } }, { { UINT32_C(3434279868), UINT32_C(3428999785), UINT32_C(2855697273), UINT32_C(1989116370) }, { UINT32_C(3817307612), UINT32_C(1286941539), UINT32_C( 34506781), UINT32_C( 924079227) }, { UINT32_C(4021813244), UINT32_C(3438766955), UINT32_C(2856254333), UINT32_C(2006963707) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vorrq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_vorrq_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; uint64_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C(14452183464467521954), UINT64_C( 9768552466044535994) }, { UINT64_C( 8323295634090358226), UINT64_C( 6983312523239866925) }, { UINT64_C(18127684834605135346), UINT64_C(16715672690978965183) } }, { { UINT64_C( 8658500954789528855), UINT64_C( 3687679955597296624) }, { UINT64_C( 2807013575393128506), UINT64_C( 28059438019851078) }, { UINT64_C( 9150663531423462719), UINT64_C( 3706444805173523446) } }, { { UINT64_C( 1957528908326895604), UINT64_C( 6818994250571767565) }, { UINT64_C( 7836495514368451232), UINT64_C( 3022147574291203335) }, { UINT64_C( 9217420585431203828), UINT64_C( 9219430515284631311) } }, { { UINT64_C(16796463827198366162), UINT64_C( 7739429135089916901) }, { UINT64_C( 2318724998057182875), UINT64_C( 4802808010765278538) }, { UINT64_C(16806808032609523675), UINT64_C( 7775458140247023599) } }, { { UINT64_C( 7932627338875929425), UINT64_C( 174477540259753269) }, { UINT64_C( 9105980703266533412), UINT64_C(13853253254717992590) }, { UINT64_C( 9105992286793364341), UINT64_C(14009570822839037887) } }, { { UINT64_C( 1161369996009242919), UINT64_C(10941913606644667761) }, { UINT64_C( 2062423839904996844), UINT64_C( 7552917285566586017) }, { UINT64_C( 2062424190215888367), UINT64_C(18435905585613086193) } }, { { UINT64_C( 3731012125718068955), UINT64_C( 4744793257937656013) }, { UINT64_C( 9307535116733689217), UINT64_C( 8883178328313447838) }, { UINT64_C(12965643299237642203), UINT64_C( 8926107935186804191) } }, { { UINT64_C( 7284282782848737313), UINT64_C( 9684565509766717904) }, { UINT64_C( 7461550652344617002), UINT64_C(11682212645880339973) }, { UINT64_C( 7466678847073019947), UINT64_C(11997588881817419733) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_uint64x2_t r = simde_vorrq_u64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vorr_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vorr_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vorr_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vorr_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vorr_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vorr_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vorr_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vorr_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vorrq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vorrq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vorrq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vorrq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vorrq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vorrq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vorrq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vorrq_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/padal.c000066400000000000000000001175651400333146700165630ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN padal #include "test-neon.h" #include "../../../simde/arm/neon/padal.h" static int test_simde_vpadal_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int8_t b[8]; int16_t r[4]; } test_vec[] = { { { -INT16_C( 30161), INT16_C( 28803), INT16_C( 7944), -INT16_C( 11953) }, { INT8_C( 24), -INT8_C( 114), INT8_C( 75), INT8_C( 41), -INT8_C( 47), -INT8_C( 102), INT8_C( 58), INT8_C( 60) }, { -INT16_C( 30251), INT16_C( 28919), INT16_C( 7795), -INT16_C( 11835) } }, { { INT16_C( 20613), -INT16_C( 1419), -INT16_C( 15592), -INT16_C( 26460) }, { INT8_C( 99), -INT8_C( 24), INT8_C( 126), INT8_C( 100), -INT8_C( 26), INT8_C( 88), INT8_C( 13), INT8_C( 22) }, { INT16_C( 20688), -INT16_C( 1193), -INT16_C( 15530), -INT16_C( 26425) } }, { { -INT16_C( 28446), -INT16_C( 5242), -INT16_C( 10833), -INT16_C( 14404) }, { INT8_C( 100), INT8_C( 7), -INT8_C( 16), INT8_C( 53), -INT8_C( 95), INT8_C( 43), INT8_C( 114), INT8_C( 38) }, { -INT16_C( 28339), -INT16_C( 5205), -INT16_C( 10885), -INT16_C( 14252) } }, { { -INT16_C( 6277), -INT16_C( 27872), -INT16_C( 14934), INT16_C( 3371) }, { -INT8_C( 83), -INT8_C( 87), INT8_C( 113), -INT8_C( 109), INT8_C( 2), INT8_C( 126), -INT8_C( 87), -INT8_C( 28) }, { -INT16_C( 6447), -INT16_C( 27868), -INT16_C( 14806), INT16_C( 3256) } }, { { INT16_C( 12302), -INT16_C( 16945), -INT16_C( 29947), INT16_C( 27012) }, { -INT8_C( 109), INT8_C( 116), -INT8_C( 97), INT8_C( 52), -INT8_C( 97), INT8_C( 17), INT8_C( 91), INT8_C( 26) }, { INT16_C( 12309), -INT16_C( 16990), -INT16_C( 30027), INT16_C( 27129) } }, { { INT16_C( 31736), -INT16_C( 23890), -INT16_C( 9920), -INT16_C( 4689) }, { -INT8_C( 125), INT8_C( 32), -INT8_C( 127), -INT8_C( 123), -INT8_C( 98), INT8_C( 42), INT8_C( 105), -INT8_C( 84) }, { INT16_C( 31643), -INT16_C( 24140), -INT16_C( 9976), -INT16_C( 4668) } }, { { INT16_C( 14682), INT16_C( 24681), -INT16_C( 4668), INT16_C( 22473) }, { INT8_C( 97), INT8_C( 104), -INT8_C( 116), INT8_C( 1), INT8_C( 121), -INT8_C( 25), INT8_C( 27), INT8_C( 113) }, { INT16_C( 14883), INT16_C( 24566), -INT16_C( 4572), INT16_C( 22613) } }, { { -INT16_C( 13982), -INT16_C( 23789), -INT16_C( 15709), INT16_C( 9872) }, { -INT8_C( 30), INT8_C( 17), -INT8_C( 85), INT8_MIN, INT8_C( 60), INT8_C( 20), INT8_C( 44), -INT8_C( 106) }, { -INT16_C( 13995), -INT16_C( 24002), -INT16_C( 15629), INT16_C( 9810) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int16x4_t r = simde_vpadal_s8(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); simde_int16x4_t r = simde_vpadal_s8(a, b); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vpadal_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int16_t b[4]; int32_t r[2]; } test_vec[] = { { { INT32_C( 504749832), INT32_C( 1960423893) }, { INT16_C( 27165), -INT16_C( 2076), -INT16_C( 28050), -INT16_C( 17483) }, { INT32_C( 504774921), INT32_C( 1960378360) } }, { { -INT32_C( 866204476), INT32_C( 326389811) }, { -INT16_C( 654), INT16_C( 31863), -INT16_C( 19755), -INT16_C( 8779) }, { -INT32_C( 866173267), INT32_C( 326361277) } }, { { INT32_C( 1727843217), -INT32_C( 1713711748) }, { -INT16_C( 16577), -INT16_C( 21104), INT16_C( 17745), INT16_C( 5480) }, { INT32_C( 1727805536), -INT32_C( 1713688523) } }, { { INT32_C( 1038206730), -INT32_C( 1991224041) }, { -INT16_C( 14254), INT16_C( 9990), -INT16_C( 17542), INT16_C( 3077) }, { INT32_C( 1038202466), -INT32_C( 1991238506) } }, { { INT32_C( 57803142), INT32_C( 362565078) }, { INT16_C( 11532), INT16_C( 24002), INT16_C( 10866), INT16_C( 31859) }, { INT32_C( 57838676), INT32_C( 362607803) } }, { { INT32_C( 146363633), -INT32_C( 57537878) }, { -INT16_C( 26414), INT16_C( 19492), INT16_C( 10579), -INT16_C( 9640) }, { INT32_C( 146356711), -INT32_C( 57536939) } }, { { INT32_C( 14535466), INT32_C( 605387032) }, { -INT16_C( 10330), INT16_C( 6529), -INT16_C( 3071), -INT16_C( 3179) }, { INT32_C( 14531665), INT32_C( 605380782) } }, { { -INT32_C( 201633975), INT32_C( 737119577) }, { INT16_C( 4901), INT16_C( 31095), -INT16_C( 12228), INT16_C( 26195) }, { -INT32_C( 201597979), INT32_C( 737133544) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int32x2_t r = simde_vpadal_s16(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); simde_int32x2_t r = simde_vpadal_s16(a, b); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vpadal_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[1]; int32_t b[2]; int64_t r[1]; } test_vec[] = { { { INT64_C( 4306996531119136319) }, { -INT32_C( 712300919), -INT32_C( 1254531556) }, { INT64_C( 4306996529152303844) } }, { { -INT64_C( 1142215973302328584) }, { -INT32_C( 666473390), -INT32_C( 1794099370) }, { -INT64_C( 1142215975762901344) } }, { { INT64_C( 5843113515927421525) }, { INT32_C( 724017423), -INT32_C( 136290049) }, { INT64_C( 5843113516515148899) } }, { { -INT64_C( 4282400485266855678) }, { -INT32_C( 1633888184), INT32_C( 1681107983) }, { -INT64_C( 4282400485219635879) } }, { { INT64_C( 5551061743561561839) }, { -INT32_C( 176672522), -INT32_C( 1830004336) }, { INT64_C( 5551061741554884981) } }, { { -INT64_C( 4181093637499411774) }, { -INT32_C( 1402759779), INT32_C( 823170626) }, { -INT64_C( 4181093638079000927) } }, { { -INT64_C( 7315831907249795092) }, { -INT32_C( 1315966687), INT32_C( 205749066) }, { -INT64_C( 7315831908360012713) } }, { { -INT64_C( 2783160537737758947) }, { INT32_C( 293979343), INT32_C( 1195545946) }, { -INT64_C( 2783160536248233658) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int64x1_t r = simde_vpadal_s32(a, b); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_t a = simde_test_arm_neon_random_i64x1(); simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); simde_int64x1_t r = simde_vpadal_s32(a, b); simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vpadal_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint8_t b[8]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(33837), UINT16_C(14344), UINT16_C( 212), UINT16_C(49109) }, { UINT8_C( 82), UINT8_C(248), UINT8_C(222), UINT8_C( 54), UINT8_C(170), UINT8_C(254), UINT8_C(232), UINT8_C(178) }, { UINT16_C(34167), UINT16_C(14620), UINT16_C( 636), UINT16_C(49519) } }, { { UINT16_C(29181), UINT16_C(26461), UINT16_C(17709), UINT16_C( 6904) }, { UINT8_C(124), UINT8_C(109), UINT8_C( 96), UINT8_C( 12), UINT8_C( 85), UINT8_C(144), UINT8_C( 43), UINT8_C(131) }, { UINT16_C(29414), UINT16_C(26569), UINT16_C(17938), UINT16_C( 7078) } }, { { UINT16_C(13076), UINT16_C(59835), UINT16_C(37171), UINT16_C(34216) }, { UINT8_C(137), UINT8_C(134), UINT8_C(188), UINT8_C( 51), UINT8_C(133), UINT8_C(164), UINT8_C(229), UINT8_C(130) }, { UINT16_C(13347), UINT16_C(60074), UINT16_C(37468), UINT16_C(34575) } }, { { UINT16_C(17173), UINT16_C(17129), UINT16_C(57992), UINT16_C( 1116) }, { UINT8_C( 79), UINT8_C(189), UINT8_C( 16), UINT8_C(165), UINT8_C( 77), UINT8_C( 60), UINT8_C( 40), UINT8_C( 98) }, { UINT16_C(17441), UINT16_C(17310), UINT16_C(58129), UINT16_C( 1254) } }, { { UINT16_C(58223), UINT16_C(41803), UINT16_C(62324), UINT16_C(65064) }, { UINT8_C(122), UINT8_C(228), UINT8_C( 49), UINT8_MAX, UINT8_C(137), UINT8_C( 23), UINT8_C(129), UINT8_C(158) }, { UINT16_C(58573), UINT16_C(42107), UINT16_C(62484), UINT16_C(65351) } }, { { UINT16_C(27226), UINT16_C(58081), UINT16_C(15692), UINT16_C(40167) }, { UINT8_C(250), UINT8_C(247), UINT8_C( 65), UINT8_C( 72), UINT8_C( 51), UINT8_C(105), UINT8_C(170), UINT8_C(163) }, { UINT16_C(27723), UINT16_C(58218), UINT16_C(15848), UINT16_C(40500) } }, { { UINT16_C(62796), UINT16_C(49478), UINT16_C(28392), UINT16_C(25279) }, { UINT8_C( 83), UINT8_C(240), UINT8_C( 97), UINT8_C(220), UINT8_C( 7), UINT8_C(226), UINT8_C(122), UINT8_C( 97) }, { UINT16_C(63119), UINT16_C(49795), UINT16_C(28625), UINT16_C(25498) } }, { { UINT16_C(23373), UINT16_C(39236), UINT16_C(11161), UINT16_C(37685) }, { UINT8_C( 34), UINT8_C(118), UINT8_C(219), UINT8_C( 86), UINT8_C(223), UINT8_C(133), UINT8_C(249), UINT8_C( 44) }, { UINT16_C(23525), UINT16_C(39541), UINT16_C(11517), UINT16_C(37978) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint16x4_t r = simde_vpadal_u8(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); simde_uint16x4_t r = simde_vpadal_u8(a, b); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vpadal_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint16_t b[4]; uint32_t r[2]; } test_vec[] = { { { UINT32_C(3031144186), UINT32_C(1298847721) }, { UINT16_C(62939), UINT16_C(26118), UINT16_C(36198), UINT16_C( 1343) }, { UINT32_C(3031233243), UINT32_C(1298885262) } }, { { UINT32_C(1473052482), UINT32_C( 282556598) }, { UINT16_C(63441), UINT16_C(33634), UINT16_C(36908), UINT16_C( 9772) }, { UINT32_C(1473149557), UINT32_C( 282603278) } }, { { UINT32_C( 266000166), UINT32_C(2304591022) }, { UINT16_C(25401), UINT16_C(40944), UINT16_C(12273), UINT16_C(13220) }, { UINT32_C( 266066511), UINT32_C(2304616515) } }, { { UINT32_C(3851055406), UINT32_C(3136643817) }, { UINT16_C(22617), UINT16_C(34110), UINT16_C(27368), UINT16_C( 3755) }, { UINT32_C(3851112133), UINT32_C(3136674940) } }, { { UINT32_C(4011689281), UINT32_C( 58227402) }, { UINT16_C(26846), UINT16_C(53155), UINT16_C(18328), UINT16_C(50690) }, { UINT32_C(4011769282), UINT32_C( 58296420) } }, { { UINT32_C(2729151672), UINT32_C(1214030318) }, { UINT16_C(39673), UINT16_C(57805), UINT16_C(30980), UINT16_C(17903) }, { UINT32_C(2729249150), UINT32_C(1214079201) } }, { { UINT32_C(3358854398), UINT32_C(1707912583) }, { UINT16_C(28437), UINT16_C(44340), UINT16_C(14006), UINT16_C(28532) }, { UINT32_C(3358927175), UINT32_C(1707955121) } }, { { UINT32_C(2970689474), UINT32_C(3120131520) }, { UINT16_C(50696), UINT16_C( 3226), UINT16_C(35135), UINT16_C(15954) }, { UINT32_C(2970743396), UINT32_C(3120182609) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint32x2_t r = simde_vpadal_u16(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); simde_uint32x2_t r = simde_vpadal_u16(a, b); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vpadal_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[1]; uint32_t b[2]; uint64_t r[1]; } test_vec[] = { { { UINT64_C(16268155908237277759) }, { UINT32_C( 875055710), UINT32_C(2229076956) }, { UINT64_C(16268155911341410425) } }, { { UINT64_C(14643826491213206399) }, { UINT32_C(3716771902), UINT32_C( 554289377) }, { UINT64_C(14643826495484267678) } }, { { UINT64_C(12249248146022871426) }, { UINT32_C( 954017628), UINT32_C(2847717930) }, { UINT64_C(12249248149824606984) } }, { { UINT64_C( 9580111084831634317) }, { UINT32_C(3059842260), UINT32_C(3336006468) }, { UINT64_C( 9580111091227483045) } }, { { UINT64_C(10632965974969755036) }, { UINT32_C( 868969481), UINT32_C(3017574182) }, { UINT64_C(10632965978856298699) } }, { { UINT64_C( 4631334754029602683) }, { UINT32_C(2415306571), UINT32_C(2924924178) }, { UINT64_C( 4631334759369833432) } }, { { UINT64_C( 2647030380067305970) }, { UINT32_C(3075966864), UINT32_C(2305438734) }, { UINT64_C( 2647030385448711568) } }, { { UINT64_C( 6910650571603723835) }, { UINT32_C(1240456503), UINT32_C(2633450922) }, { UINT64_C( 6910650575477631260) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint64x1_t r = simde_vpadal_u32(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x1_t a = simde_test_arm_neon_random_u64x1(); simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); simde_uint64x1_t r = simde_vpadal_u32(a, b); simde_test_arm_neon_write_u64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vpadalq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int8_t b[16]; int16_t r[8]; } test_vec[] = { { { INT16_C( 21409), -INT16_C( 5093), -INT16_C( 30886), -INT16_C( 17820), INT16_C( 488), INT16_C( 5692), -INT16_C( 31806), INT16_C( 26370) }, { -INT8_C( 93), -INT8_C( 54), INT8_C( 113), -INT8_C( 74), -INT8_C( 61), INT8_C( 35), -INT8_C( 32), INT8_C( 16), -INT8_C( 37), -INT8_C( 100), INT8_C( 22), -INT8_C( 102), INT8_C( 8), INT8_C( 8), INT8_C( 36), -INT8_C( 86) }, { INT16_C( 21262), -INT16_C( 5054), -INT16_C( 30912), -INT16_C( 17836), INT16_C( 351), INT16_C( 5612), -INT16_C( 31790), INT16_C( 26320) } }, { { INT16_C( 16220), -INT16_C( 18794), -INT16_C( 1082), -INT16_C( 20624), -INT16_C( 21252), -INT16_C( 16699), -INT16_C( 14544), -INT16_C( 11483) }, { -INT8_C( 110), -INT8_C( 106), -INT8_C( 119), INT8_C( 85), -INT8_C( 71), INT8_C( 106), INT8_C( 101), -INT8_C( 108), INT8_C( 6), INT8_C( 123), INT8_C( 47), INT8_C( 14), -INT8_C( 124), INT8_C( 83), -INT8_C( 72), -INT8_C( 32) }, { INT16_C( 16004), -INT16_C( 18828), -INT16_C( 1047), -INT16_C( 20631), -INT16_C( 21123), -INT16_C( 16638), -INT16_C( 14585), -INT16_C( 11587) } }, { { INT16_C( 20371), INT16_C( 22934), INT16_C( 1610), INT16_C( 17928), -INT16_C( 12878), -INT16_C( 7676), INT16_C( 10645), INT16_C( 10165) }, { -INT8_C( 65), INT8_C( 63), INT8_C( 124), INT8_C( 120), -INT8_C( 87), -INT8_C( 31), INT8_C( 13), -INT8_C( 81), INT8_C( 93), INT8_C( 60), -INT8_C( 67), -INT8_C( 31), -INT8_C( 113), INT8_C( 118), -INT8_C( 63), INT8_C( 34) }, { INT16_C( 20369), INT16_C( 23178), INT16_C( 1492), INT16_C( 17860), -INT16_C( 12725), -INT16_C( 7774), INT16_C( 10650), INT16_C( 10136) } }, { { INT16_C( 22469), INT16_C( 3964), -INT16_C( 31651), INT16_C( 3925), INT16_C( 22866), -INT16_C( 6158), -INT16_C( 22654), INT16_C( 16654) }, { -INT8_C( 26), -INT8_C( 118), -INT8_C( 71), -INT8_C( 113), INT8_C( 107), -INT8_C( 58), INT8_C( 62), -INT8_C( 56), INT8_C( 2), -INT8_C( 4), -INT8_C( 87), -INT8_C( 110), INT8_C( 114), INT8_C( 106), -INT8_C( 76), INT8_C( 55) }, { INT16_C( 22325), INT16_C( 3780), -INT16_C( 31602), INT16_C( 3931), INT16_C( 22864), -INT16_C( 6355), -INT16_C( 22434), INT16_C( 16633) } }, { { INT16_C( 12481), INT16_C( 7750), -INT16_C( 25675), INT16_C( 1838), INT16_C( 8436), INT16_C( 30446), -INT16_C( 825), -INT16_C( 20809) }, { -INT8_C( 122), INT8_C( 112), INT8_C( 61), -INT8_C( 15), INT8_C( 55), INT8_C( 124), -INT8_C( 70), INT8_C( 57), INT8_C( 120), INT8_C( 99), -INT8_C( 53), -INT8_C( 22), -INT8_C( 50), INT8_MIN, INT8_C( 33), -INT8_C( 113) }, { INT16_C( 12471), INT16_C( 7796), -INT16_C( 25496), INT16_C( 1825), INT16_C( 8655), INT16_C( 30371), -INT16_C( 1003), -INT16_C( 20889) } }, { { INT16_C( 26544), INT16_C( 26030), -INT16_C( 9214), -INT16_C( 2452), INT16_C( 23292), -INT16_C( 15508), INT16_C( 9046), -INT16_C( 9103) }, { -INT8_C( 109), -INT8_C( 81), -INT8_C( 50), -INT8_C( 54), INT8_C( 43), -INT8_C( 120), INT8_C( 4), -INT8_C( 93), -INT8_C( 21), -INT8_C( 49), -INT8_C( 115), -INT8_C( 71), INT8_C( 79), -INT8_C( 82), INT8_C( 73), INT8_C( 0) }, { INT16_C( 26354), INT16_C( 25926), -INT16_C( 9291), -INT16_C( 2541), INT16_C( 23222), -INT16_C( 15694), INT16_C( 9043), -INT16_C( 9030) } }, { { -INT16_C( 2283), INT16_C( 5989), -INT16_C( 11565), -INT16_C( 12531), INT16_C( 31020), -INT16_C( 31854), INT16_C( 1180), INT16_C( 12127) }, { -INT8_C( 77), INT8_C( 45), -INT8_C( 6), -INT8_C( 34), -INT8_C( 75), -INT8_C( 2), -INT8_C( 127), -INT8_C( 95), -INT8_C( 51), INT8_C( 14), INT8_C( 90), INT8_C( 29), -INT8_C( 68), -INT8_C( 93), INT8_C( 29), -INT8_C( 47) }, { -INT16_C( 2315), INT16_C( 5949), -INT16_C( 11642), -INT16_C( 12753), INT16_C( 30983), -INT16_C( 31735), INT16_C( 1019), INT16_C( 12109) } }, { { -INT16_C( 32102), INT16_C( 28136), -INT16_C( 2732), -INT16_C( 32452), -INT16_C( 12434), INT16_C( 2564), INT16_C( 25555), -INT16_C( 31175) }, { -INT8_C( 111), INT8_C( 51), INT8_C( 100), INT8_C( 70), INT8_C( 49), -INT8_C( 27), -INT8_C( 25), -INT8_C( 1), -INT8_C( 13), INT8_C( 66), INT8_C( 28), -INT8_C( 81), -INT8_C( 27), INT8_C( 57), INT8_MIN, INT8_MIN }, { -INT16_C( 32162), INT16_C( 28306), -INT16_C( 2710), -INT16_C( 32478), -INT16_C( 12381), INT16_C( 2511), INT16_C( 25585), -INT16_C( 31431) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int16x8_t r = simde_vpadalq_s8(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); simde_int16x8_t r = simde_vpadalq_s8(a, b); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vpadalq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int16_t b[8]; int32_t r[4]; } test_vec[] = { { { -INT32_C( 408029165), INT32_C( 562585106), INT32_C( 1007709111), -INT32_C( 179631329) }, { INT16_C( 22791), -INT16_C( 17866), -INT16_C( 27083), -INT16_C( 8697), -INT16_C( 22514), -INT16_C( 2564), INT16_C( 18719), INT16_C( 13052) }, { -INT32_C( 408024240), INT32_C( 562549326), INT32_C( 1007684033), -INT32_C( 179599558) } }, { { INT32_C( 1394190657), -INT32_C( 1082875641), INT32_C( 754680844), -INT32_C( 1759426928) }, { INT16_C( 22431), -INT16_C( 10927), INT16_C( 23021), -INT16_C( 1101), -INT16_C( 20479), INT16_C( 8433), -INT16_C( 4615), INT16_C( 14930) }, { INT32_C( 1394202161), -INT32_C( 1082853721), INT32_C( 754668798), -INT32_C( 1759416613) } }, { { -INT32_C( 1651676266), INT32_C( 425460237), INT32_C( 373643398), INT32_C( 1051551390) }, { -INT16_C( 66), -INT16_C( 21741), -INT16_C( 14760), INT16_C( 22951), -INT16_C( 26506), INT16_C( 28537), -INT16_C( 13435), INT16_C( 7081) }, { -INT32_C( 1651698073), INT32_C( 425468428), INT32_C( 373645429), INT32_C( 1051545036) } }, { { INT32_C( 1136146230), -INT32_C( 1084418759), INT32_C( 198615661), -INT32_C( 951483639) }, { INT16_C( 23682), -INT16_C( 9614), INT16_C( 6435), -INT16_C( 26317), -INT16_C( 21327), INT16_C( 13833), -INT16_C( 19849), -INT16_C( 20911) }, { INT32_C( 1136160298), -INT32_C( 1084438641), INT32_C( 198608167), -INT32_C( 951524399) } }, { { INT32_C( 586222313), -INT32_C( 1931325921), -INT32_C( 90720015), -INT32_C( 1094590149) }, { INT16_C( 13117), INT16_C( 24728), -INT16_C( 13235), -INT16_C( 262), INT16_C( 888), -INT16_C( 4043), -INT16_C( 31051), -INT16_C( 24674) }, { INT32_C( 586260158), -INT32_C( 1931339418), -INT32_C( 90723170), -INT32_C( 1094645874) } }, { { -INT32_C( 1346269296), -INT32_C( 818175010), -INT32_C( 1748380837), -INT32_C( 246052172) }, { -INT16_C( 4675), INT16_C( 2642), INT16_C( 19641), INT16_C( 12809), INT16_C( 15951), INT16_C( 1058), -INT16_C( 16188), INT16_C( 21923) }, { -INT32_C( 1346271329), -INT32_C( 818142560), -INT32_C( 1748363828), -INT32_C( 246046437) } }, { { INT32_C( 755262799), INT32_C( 1694253064), -INT32_C( 939801325), INT32_C( 230182991) }, { INT16_C( 2621), -INT16_C( 2281), INT16_C( 8278), -INT16_C( 23255), INT16_C( 19294), INT16_C( 9130), INT16_C( 19723), INT16_C( 23160) }, { INT32_C( 755263139), INT32_C( 1694238087), -INT32_C( 939772901), INT32_C( 230225874) } }, { { -INT32_C( 1148683086), -INT32_C( 820017988), -INT32_C( 1718216118), -INT32_C( 1482272918) }, { -INT16_C( 16807), -INT16_C( 20322), -INT16_C( 14370), INT16_C( 15701), -INT16_C( 238), INT16_C( 7520), -INT16_C( 10163), -INT16_C( 136) }, { -INT32_C( 1148720215), -INT32_C( 820016657), -INT32_C( 1718208836), -INT32_C( 1482283217) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int32x4_t r = simde_vpadalq_s16(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); simde_int32x4_t r = simde_vpadalq_s16(a, b); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vpadalq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; int32_t b[4]; int64_t r[2]; } test_vec[] = { { { INT64_C( 8358831643597162478), -INT64_C( 4127542501836088642) }, { -INT32_C( 1695082653), INT32_C( 482778832), -INT32_C( 270956823), -INT32_C( 1466047046) }, { INT64_C( 8358831642384858657), -INT64_C( 4127542503573092511) } }, { { -INT64_C( 3695083590524238804), -INT64_C( 2538875182963854584) }, { INT32_C( 24558385), INT32_C( 1125989722), INT32_C( 2100492227), INT32_C( 153473244) }, { -INT64_C( 3695083589373690697), -INT64_C( 2538875180709889113) } }, { { INT64_C( 517356502106542676), -INT64_C( 2536429654596284297) }, { -INT32_C( 539147643), INT32_C( 1109588863), -INT32_C( 826321422), INT32_C( 2044191781) }, { INT64_C( 517356502676983896), -INT64_C( 2536429653378413938) } }, { { -INT64_C( 6882334586154834826), -INT64_C( 3305540639557385319) }, { INT32_C( 515046814), -INT32_C( 362752776), INT32_C( 1320755241), INT32_C( 2076741636) }, { -INT64_C( 6882334586002540788), -INT64_C( 3305540636159888442) } }, { { INT64_C( 5079337647775785908), INT64_C( 509375601867099633) }, { -INT32_C( 1608137816), -INT32_C( 1047820649), -INT32_C( 1441839962), -INT32_C( 1994008620) }, { INT64_C( 5079337645119827443), INT64_C( 509375598431251051) } }, { { INT64_C( 8534008504961603195), INT64_C( 5431559555495885296) }, { INT32_C( 552371849), -INT32_C( 1293846772), -INT32_C( 1889734213), INT32_C( 1125679816) }, { INT64_C( 8534008504220128272), INT64_C( 5431559554731830899) } }, { { INT64_C( 5878563524265606808), -INT64_C( 7439371473419351597) }, { -INT32_C( 1648841071), -INT32_C( 531654107), INT32_C( 1399827339), -INT32_C( 963147731) }, { INT64_C( 5878563522085111630), -INT64_C( 7439371472982671989) } }, { { -INT64_C( 5784460457844473338), INT64_C( 29247489515611328) }, { -INT32_C( 1147330410), INT32_C( 1167846586), -INT32_C( 979891561), -INT32_C( 1718931566) }, { -INT64_C( 5784460457823957162), INT64_C( 29247486816788201) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int64x2_t r = simde_vpadalq_s32(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); simde_int64x2_t r = simde_vpadalq_s32(a, b); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vpadalq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint8_t b[16]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(62196), UINT16_C(63140), UINT16_C( 6425), UINT16_C(55467), UINT16_C(34429), UINT16_C(59966), UINT16_C(34679), UINT16_C(58442) }, { UINT8_C(140), UINT8_C(113), UINT8_C(171), UINT8_C(189), UINT8_C( 42), UINT8_C( 80), UINT8_C( 89), UINT8_C(103), UINT8_C(177), UINT8_C(142), UINT8_C( 67), UINT8_C(228), UINT8_C(125), UINT8_C( 11), UINT8_C( 27), UINT8_C(114) }, { UINT16_C(62449), UINT16_C(63500), UINT16_C( 6547), UINT16_C(55659), UINT16_C(34748), UINT16_C(60261), UINT16_C(34815), UINT16_C(58583) } }, { { UINT16_C(49406), UINT16_C( 5992), UINT16_C( 5337), UINT16_C(22255), UINT16_C(11674), UINT16_C( 4673), UINT16_C(35764), UINT16_C(16886) }, { UINT8_C(252), UINT8_C(161), UINT8_C(254), UINT8_C( 39), UINT8_C(242), UINT8_C( 87), UINT8_C(142), UINT8_C(163), UINT8_C(230), UINT8_C(210), UINT8_C(135), UINT8_C( 99), UINT8_C(221), UINT8_C(163), UINT8_C(213), UINT8_C(219) }, { UINT16_C(49819), UINT16_C( 6285), UINT16_C( 5666), UINT16_C(22560), UINT16_C(12114), UINT16_C( 4907), UINT16_C(36148), UINT16_C(17318) } }, { { UINT16_C(15971), UINT16_C(15603), UINT16_C(57938), UINT16_C(60563), UINT16_C(54288), UINT16_C(50430), UINT16_C(62559), UINT16_C(23301) }, { UINT8_C(150), UINT8_C( 3), UINT8_C(130), UINT8_C(136), UINT8_C( 91), UINT8_C( 17), UINT8_C( 43), UINT8_C( 65), UINT8_C(227), UINT8_C(178), UINT8_C(164), UINT8_C(192), UINT8_C( 85), UINT8_C(122), UINT8_C(156), UINT8_C(184) }, { UINT16_C(16124), UINT16_C(15869), UINT16_C(58046), UINT16_C(60671), UINT16_C(54693), UINT16_C(50786), UINT16_C(62766), UINT16_C(23641) } }, { { UINT16_C(36792), UINT16_C( 2805), UINT16_C(34929), UINT16_C(33270), UINT16_C(62812), UINT16_C(47942), UINT16_C(19433), UINT16_C(32534) }, { UINT8_C( 79), UINT8_C(153), UINT8_C( 7), UINT8_C(170), UINT8_C(170), UINT8_C( 50), UINT8_C(235), UINT8_C(141), UINT8_C(229), UINT8_C(143), UINT8_C( 77), UINT8_C( 58), UINT8_C( 9), UINT8_C(233), UINT8_C(243), UINT8_C(193) }, { UINT16_C(37024), UINT16_C( 2982), UINT16_C(35149), UINT16_C(33646), UINT16_C(63184), UINT16_C(48077), UINT16_C(19675), UINT16_C(32970) } }, { { UINT16_C(59512), UINT16_C(60107), UINT16_C(49776), UINT16_C(52331), UINT16_C(45495), UINT16_C(41095), UINT16_C(40445), UINT16_C(19488) }, { UINT8_C( 54), UINT8_C( 39), UINT8_C(246), UINT8_C(224), UINT8_C( 90), UINT8_C(225), UINT8_C(109), UINT8_C( 63), UINT8_C(112), UINT8_C(187), UINT8_C(121), UINT8_C(122), UINT8_C(164), UINT8_C(108), UINT8_C( 59), UINT8_C( 29) }, { UINT16_C(59605), UINT16_C(60577), UINT16_C(50091), UINT16_C(52503), UINT16_C(45794), UINT16_C(41338), UINT16_C(40717), UINT16_C(19576) } }, { { UINT16_C( 1876), UINT16_C(50183), UINT16_C(29385), UINT16_C(32912), UINT16_C( 5924), UINT16_C( 8480), UINT16_C(16565), UINT16_C(60269) }, { UINT8_C(104), UINT8_C( 99), UINT8_C(204), UINT8_C(194), UINT8_C( 68), UINT8_C( 57), UINT8_C( 1), UINT8_C(180), UINT8_C(244), UINT8_C(122), UINT8_C( 46), UINT8_C(153), UINT8_C(231), UINT8_C(106), UINT8_C(182), UINT8_C( 59) }, { UINT16_C( 2079), UINT16_C(50581), UINT16_C(29510), UINT16_C(33093), UINT16_C( 6290), UINT16_C( 8679), UINT16_C(16902), UINT16_C(60510) } }, { { UINT16_C(48497), UINT16_C(14848), UINT16_C(36911), UINT16_C(21434), UINT16_C(55976), UINT16_C(23924), UINT16_C(57627), UINT16_C(33608) }, { UINT8_C( 68), UINT8_C( 20), UINT8_C( 69), UINT8_C(136), UINT8_C( 78), UINT8_C( 70), UINT8_C( 61), UINT8_C( 66), UINT8_C(192), UINT8_C(107), UINT8_C(219), UINT8_C(167), UINT8_C(213), UINT8_C(145), UINT8_C(227), UINT8_C( 70) }, { UINT16_C(48585), UINT16_C(15053), UINT16_C(37059), UINT16_C(21561), UINT16_C(56275), UINT16_C(24310), UINT16_C(57985), UINT16_C(33905) } }, { { UINT16_C(58190), UINT16_C(32384), UINT16_C(14963), UINT16_C( 7121), UINT16_C(17941), UINT16_C(12408), UINT16_C(49447), UINT16_C(27827) }, { UINT8_C(213), UINT8_C(248), UINT8_C(244), UINT8_C( 35), UINT8_C( 62), UINT8_C( 49), UINT8_C(102), UINT8_C(254), UINT8_C(157), UINT8_C( 65), UINT8_C(166), UINT8_C(114), UINT8_C(211), UINT8_C(137), UINT8_C(185), UINT8_C( 33) }, { UINT16_C(58651), UINT16_C(32663), UINT16_C(15074), UINT16_C( 7477), UINT16_C(18163), UINT16_C(12688), UINT16_C(49795), UINT16_C(28045) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint16x8_t r = simde_vpadalq_u8(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); simde_uint16x8_t r = simde_vpadalq_u8(a, b); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vpadalq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint16_t b[8]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(3624145319), UINT32_C(1377518211), UINT32_C(3012138328), UINT32_C(2898950110) }, { UINT16_C(46785), UINT16_C(45446), UINT16_C(16731), UINT16_C(63885), UINT16_C(37154), UINT16_C(54076), UINT16_C(44856), UINT16_C(57089) }, { UINT32_C(3624237550), UINT32_C(1377598827), UINT32_C(3012229558), UINT32_C(2899052055) } }, { { UINT32_C(1152845248), UINT32_C(2694238792), UINT32_C(1179852648), UINT32_C(1475485078) }, { UINT16_C(31188), UINT16_C(12040), UINT16_C(38330), UINT16_C(56360), UINT16_C(25894), UINT16_C(24240), UINT16_C(45332), UINT16_C(54333) }, { UINT32_C(1152888476), UINT32_C(2694333482), UINT32_C(1179902782), UINT32_C(1475584743) } }, { { UINT32_C(4263048630), UINT32_C( 798994119), UINT32_C(1668739789), UINT32_C(3837487120) }, { UINT16_C(50145), UINT16_C(39955), UINT16_C(15449), UINT16_C(32632), UINT16_C(10401), UINT16_C(46558), UINT16_C( 7129), UINT16_C(37001) }, { UINT32_C(4263138730), UINT32_C( 799042200), UINT32_C(1668796748), UINT32_C(3837531250) } }, { { UINT32_C(3633226256), UINT32_C( 503786832), UINT32_C( 813792544), UINT32_C(3339992294) }, { UINT16_C( 9984), UINT16_C(22883), UINT16_C(56419), UINT16_C( 1240), UINT16_C(46596), UINT16_C(57017), UINT16_C(17362), UINT16_C(57966) }, { UINT32_C(3633259123), UINT32_C( 503844491), UINT32_C( 813896157), UINT32_C(3340067622) } }, { { UINT32_C( 901446885), UINT32_C(1247003178), UINT32_C( 628806975), UINT32_C( 300781073) }, { UINT16_C(20661), UINT16_C( 6506), UINT16_C(17196), UINT16_C(12573), UINT16_C(55289), UINT16_C(51983), UINT16_C(32026), UINT16_C(65454) }, { UINT32_C( 901474052), UINT32_C(1247032947), UINT32_C( 628914247), UINT32_C( 300878553) } }, { { UINT32_C(2738120825), UINT32_C(1793951786), UINT32_C(1854891869), UINT32_C(2877324533) }, { UINT16_C(60109), UINT16_C(63940), UINT16_C(57645), UINT16_C(10026), UINT16_C(14776), UINT16_C(54002), UINT16_C(41142), UINT16_C(12497) }, { UINT32_C(2738244874), UINT32_C(1794019457), UINT32_C(1854960647), UINT32_C(2877378172) } }, { { UINT32_C( 869467657), UINT32_C(3952984462), UINT32_C( 509160744), UINT32_C(1992939945) }, { UINT16_C(36292), UINT16_C(61808), UINT16_C(39534), UINT16_C(10008), UINT16_C( 3028), UINT16_C(35577), UINT16_C(52139), UINT16_C(46266) }, { UINT32_C( 869565757), UINT32_C(3953034004), UINT32_C( 509199349), UINT32_C(1993038350) } }, { { UINT32_C(1609076433), UINT32_C(2001372495), UINT32_C(1553310642), UINT32_C(1104305789) }, { UINT16_C(17131), UINT16_C(23090), UINT16_C(19421), UINT16_C(45441), UINT16_C(31318), UINT16_C( 315), UINT16_C(63045), UINT16_C( 5814) }, { UINT32_C(1609116654), UINT32_C(2001437357), UINT32_C(1553342275), UINT32_C(1104374648) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint32x4_t r = simde_vpadalq_u16(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); simde_uint32x4_t r = simde_vpadalq_u16(a, b); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vpadalq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[2]; uint32_t b[4]; uint64_t r[2]; } test_vec[] = { { { UINT64_C( 6651368149195880995), UINT64_C(16555414622728810358) }, { UINT32_C(3603103992), UINT32_C(4097160130), UINT32_C( 53538364), UINT32_C(4025022924) }, { UINT64_C( 6651368156896145117), UINT64_C(16555414626807371646) } }, { { UINT64_C(17395039739962856179), UINT64_C( 1319346996561212807) }, { UINT32_C( 115872323), UINT32_C( 33168837), UINT32_C(3607440139), UINT32_C( 398912804) }, { UINT64_C(17395039740111897339), UINT64_C( 1319347000567565750) } }, { { UINT64_C( 3416976098987217151), UINT64_C( 8370758154985271038) }, { UINT32_C(3212448762), UINT32_C(1019311408), UINT32_C(3289630368), UINT32_C(3000752819) }, { UINT64_C( 3416976103218977321), UINT64_C( 8370758161275654225) } }, { { UINT64_C( 9566376262250659306), UINT64_C( 4408117228946086790) }, { UINT32_C( 184395482), UINT32_C(3141975579), UINT32_C( 931093124), UINT32_C( 518609716) }, { UINT64_C( 9566376265577030367), UINT64_C( 4408117230395789630) } }, { { UINT64_C( 3847046805425559128), UINT64_C(11623818981066735823) }, { UINT32_C(3685436863), UINT32_C(2409034251), UINT32_C(2160465484), UINT32_C(3382620273) }, { UINT64_C( 3847046811520030242), UINT64_C(11623818986609821580) } }, { { UINT64_C(10640612582520017094), UINT64_C( 8103373972678547468) }, { UINT32_C(1347100484), UINT32_C(1574953489), UINT32_C(1776133880), UINT32_C( 473070678) }, { UINT64_C(10640612585442071067), UINT64_C( 8103373974927752026) } }, { { UINT64_C(17519915040173644748), UINT64_C( 429595417324735189) }, { UINT32_C(1800749657), UINT32_C( 482882852), UINT32_C( 830842587), UINT32_C(3998070562) }, { UINT64_C(17519915042457277257), UINT64_C( 429595422153648338) } }, { { UINT64_C(17396456604294897554), UINT64_C(14281598779226796651) }, { UINT32_C(3560015792), UINT32_C(2549152188), UINT32_C(3251139999), UINT32_C(3215922732) }, { UINT64_C(17396456610404065534), UINT64_C(14281598785693859382) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint64x2_t r = simde_vpadalq_u32(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); simde_uint64x2_t r = simde_vpadalq_u32(a, b); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vpadal_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vpadal_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vpadal_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vpadal_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vpadal_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vpadal_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vpadalq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vpadalq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vpadalq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vpadalq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vpadalq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vpadalq_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/padd.c000066400000000000000000001546601400333146700164070ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN padd #include "test-neon.h" #include "../../../simde/arm/neon/padd.h" static int test_simde_vpadd_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[2]; simde_float32 b[2]; simde_float32 r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( 364.07), SIMDE_FLOAT32_C( -805.98) }, { SIMDE_FLOAT32_C( 541.55), SIMDE_FLOAT32_C( -507.67) }, { SIMDE_FLOAT32_C( -441.91), SIMDE_FLOAT32_C( 33.88) } }, { { SIMDE_FLOAT32_C( -210.05), SIMDE_FLOAT32_C( 950.23) }, { SIMDE_FLOAT32_C( -73.21), SIMDE_FLOAT32_C( -858.69) }, { SIMDE_FLOAT32_C( 740.18), SIMDE_FLOAT32_C( -931.90) } }, { { SIMDE_FLOAT32_C( 289.47), SIMDE_FLOAT32_C( 222.50) }, { SIMDE_FLOAT32_C( -617.39), SIMDE_FLOAT32_C( 38.06) }, { SIMDE_FLOAT32_C( 511.98), SIMDE_FLOAT32_C( -579.33) } }, { { SIMDE_FLOAT32_C( -385.12), SIMDE_FLOAT32_C( -947.12) }, { SIMDE_FLOAT32_C( -383.15), SIMDE_FLOAT32_C( -390.83) }, { SIMDE_FLOAT32_C( -1332.24), SIMDE_FLOAT32_C( -773.98) } }, { { SIMDE_FLOAT32_C( 787.35), SIMDE_FLOAT32_C( 150.74) }, { SIMDE_FLOAT32_C( -44.15), SIMDE_FLOAT32_C( -31.92) }, { SIMDE_FLOAT32_C( 938.08), SIMDE_FLOAT32_C( -76.07) } }, { { SIMDE_FLOAT32_C( 608.96), SIMDE_FLOAT32_C( -994.50) }, { SIMDE_FLOAT32_C( -198.21), SIMDE_FLOAT32_C( -284.71) }, { SIMDE_FLOAT32_C( -385.53), SIMDE_FLOAT32_C( -482.92) } }, { { SIMDE_FLOAT32_C( 116.75), SIMDE_FLOAT32_C( 296.49) }, { SIMDE_FLOAT32_C( -579.72), SIMDE_FLOAT32_C( -346.53) }, { SIMDE_FLOAT32_C( 413.24), SIMDE_FLOAT32_C( -926.24) } }, { { SIMDE_FLOAT32_C( -405.05), SIMDE_FLOAT32_C( 579.35) }, { SIMDE_FLOAT32_C( 172.94), SIMDE_FLOAT32_C( 959.02) }, { SIMDE_FLOAT32_C( 174.30), SIMDE_FLOAT32_C( 1131.96) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x2_t r = simde_vpadd_f32(a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vpadd_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int8_t b[8]; int8_t r[8]; } test_vec[] = { { { -INT8_C( 42), -INT8_C( 32), -INT8_C( 64), INT8_C( 117), INT8_C( 61), INT8_C( 45), -INT8_C( 26), INT8_C( 96) }, { -INT8_C( 49), INT8_C( 76), -INT8_C( 77), -INT8_C( 78), INT8_C( 47), -INT8_C( 116), INT8_C( 78), -INT8_C( 108) }, { -INT8_C( 74), INT8_C( 53), INT8_C( 106), INT8_C( 70), INT8_C( 27), INT8_C( 101), -INT8_C( 69), -INT8_C( 30) } }, { { INT8_C( 3), -INT8_C( 93), -INT8_C( 22), INT8_C( 89), INT8_C( 69), INT8_C( 121), -INT8_C( 64), INT8_C( 110) }, { -INT8_C( 10), -INT8_C( 63), -INT8_C( 118), INT8_C( 71), INT8_C( 28), -INT8_C( 42), -INT8_C( 14), -INT8_C( 14) }, { -INT8_C( 90), INT8_C( 67), -INT8_C( 66), INT8_C( 46), -INT8_C( 73), -INT8_C( 47), -INT8_C( 14), -INT8_C( 28) } }, { { -INT8_C( 74), -INT8_C( 78), INT8_C( 103), -INT8_C( 13), -INT8_C( 33), INT8_C( 77), INT8_C( 83), -INT8_C( 82) }, { -INT8_C( 103), INT8_C( 6), INT8_C( 96), -INT8_C( 56), -INT8_C( 110), -INT8_C( 82), INT8_C( 92), -INT8_C( 106) }, { INT8_C( 104), INT8_C( 90), INT8_C( 44), INT8_C( 1), -INT8_C( 97), INT8_C( 40), INT8_C( 64), -INT8_C( 14) } }, { { INT8_C( 82), INT8_C( 71), -INT8_C( 17), -INT8_C( 105), -INT8_C( 64), -INT8_C( 80), INT8_C( 5), -INT8_C( 74) }, { INT8_C( 113), -INT8_C( 112), -INT8_C( 2), -INT8_C( 115), INT8_C( 102), -INT8_C( 16), INT8_MAX, INT8_C( 28) }, { -INT8_C( 103), -INT8_C( 122), INT8_C( 112), -INT8_C( 69), INT8_C( 1), -INT8_C( 117), INT8_C( 86), -INT8_C( 101) } }, { { -INT8_C( 94), -INT8_C( 26), INT8_C( 15), -INT8_C( 127), INT8_C( 51), INT8_C( 98), INT8_C( 47), -INT8_C( 52) }, { INT8_C( 104), -INT8_C( 112), -INT8_C( 108), -INT8_C( 6), INT8_C( 62), -INT8_C( 15), -INT8_C( 112), -INT8_C( 112) }, { -INT8_C( 120), -INT8_C( 112), -INT8_C( 107), -INT8_C( 5), -INT8_C( 8), -INT8_C( 114), INT8_C( 47), INT8_C( 32) } }, { { INT8_C( 56), INT8_MIN, INT8_C( 39), -INT8_C( 8), INT8_C( 48), INT8_C( 45), -INT8_C( 81), -INT8_C( 95) }, { -INT8_C( 67), -INT8_C( 83), INT8_C( 47), INT8_C( 35), -INT8_C( 99), -INT8_C( 82), INT8_C( 63), INT8_C( 63) }, { -INT8_C( 72), INT8_C( 31), INT8_C( 93), INT8_C( 80), INT8_C( 106), INT8_C( 82), INT8_C( 75), INT8_C( 126) } }, { { -INT8_C( 107), INT8_C( 78), -INT8_C( 64), -INT8_C( 56), -INT8_C( 80), -INT8_C( 17), -INT8_C( 107), INT8_C( 24) }, { INT8_MAX, INT8_C( 41), INT8_C( 18), -INT8_C( 66), INT8_C( 26), -INT8_C( 93), INT8_C( 78), INT8_C( 82) }, { -INT8_C( 29), -INT8_C( 120), -INT8_C( 97), -INT8_C( 83), -INT8_C( 88), -INT8_C( 48), -INT8_C( 67), -INT8_C( 96) } }, { { INT8_C( 35), INT8_C( 118), INT8_C( 75), INT8_C( 83), -INT8_C( 93), -INT8_C( 6), -INT8_C( 12), INT8_C( 96) }, { -INT8_C( 89), INT8_C( 35), -INT8_C( 125), INT8_C( 68), -INT8_C( 46), -INT8_C( 62), -INT8_C( 125), INT8_C( 103) }, { -INT8_C( 103), -INT8_C( 98), -INT8_C( 99), INT8_C( 84), -INT8_C( 54), -INT8_C( 57), -INT8_C( 108), -INT8_C( 22) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vpadd_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; } static int test_simde_vpadd_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { { { INT16_C( 12301), -INT16_C( 30280), INT16_C( 1981), -INT16_C( 8183) }, { INT16_C( 2606), INT16_C( 142), -INT16_C( 1330), -INT16_C( 1866) }, { -INT16_C( 17979), -INT16_C( 6202), INT16_C( 2748), -INT16_C( 3196) } }, { { INT16_C( 18011), -INT16_C( 2555), -INT16_C( 26024), INT16_C( 8774) }, { INT16_C( 7111), INT16_C( 25679), INT16_C( 19345), -INT16_C( 25081) }, { INT16_C( 15456), -INT16_C( 17250), -INT16_C( 32746), -INT16_C( 5736) } }, { { -INT16_C( 16516), INT16_C( 14631), INT16_C( 12486), -INT16_C( 3047) }, { -INT16_C( 22470), INT16_C( 2548), -INT16_C( 21598), -INT16_C( 511) }, { -INT16_C( 1885), INT16_C( 9439), -INT16_C( 19922), -INT16_C( 22109) } }, { { INT16_C( 2033), INT16_C( 18932), INT16_C( 15009), INT16_C( 26987) }, { -INT16_C( 17834), -INT16_C( 6195), -INT16_C( 11259), -INT16_C( 32379) }, { INT16_C( 20965), -INT16_C( 23540), -INT16_C( 24029), INT16_C( 21898) } }, { { -INT16_C( 21101), INT16_C( 23226), -INT16_C( 11043), INT16_C( 6222) }, { INT16_C( 17276), INT16_C( 7713), INT16_C( 8942), -INT16_C( 8420) }, { INT16_C( 2125), -INT16_C( 4821), INT16_C( 24989), INT16_C( 522) } }, { { INT16_C( 4137), -INT16_C( 13528), -INT16_C( 27829), -INT16_C( 24268) }, { INT16_C( 333), INT16_C( 21128), INT16_C( 3541), INT16_C( 26836) }, { -INT16_C( 9391), INT16_C( 13439), INT16_C( 21461), INT16_C( 30377) } }, { { -INT16_C( 28998), -INT16_C( 26430), INT16_C( 4450), -INT16_C( 8528) }, { -INT16_C( 11948), INT16_C( 17149), INT16_C( 6643), INT16_C( 7457) }, { INT16_C( 10108), -INT16_C( 4078), INT16_C( 5201), INT16_C( 14100) } }, { { INT16_C( 18730), INT16_C( 30184), INT16_C( 7388), INT16_C( 10518) }, { -INT16_C( 25059), -INT16_C( 3461), INT16_C( 20395), INT16_C( 26202) }, { -INT16_C( 16622), INT16_C( 17906), -INT16_C( 28520), -INT16_C( 18939) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_vpadd_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; } static int test_simde_vpadd_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { { { INT32_C( 1699479832), -INT32_C( 412031930) }, { -INT32_C( 2482844), -INT32_C( 51361615) }, { INT32_C( 1287447902), -INT32_C( 53844459) } }, { { -INT32_C( 538447692), INT32_C( 1324757574) }, { -INT32_C( 268960219), INT32_C( 1571947844) }, { INT32_C( 786309882), INT32_C( 1302987625) } }, { { INT32_C( 1287847686), INT32_C( 1211380451) }, { INT32_C( 21433936), INT32_C( 201144407) }, { -INT32_C( 1795739159), INT32_C( 222578343) } }, { { INT32_C( 1860887592), INT32_C( 1002234134) }, { INT32_C( 590001119), -INT32_C( 1082073671) }, { -INT32_C( 1431845570), -INT32_C( 492072552) } }, { { -INT32_C( 1089715236), -INT32_C( 989380491) }, { -INT32_C( 1513730226), -INT32_C( 1347370105) }, { -INT32_C( 2079095727), INT32_C( 1433866965) } }, { { -INT32_C( 1105355864), INT32_C( 1543166332) }, { INT32_C( 1182672013), -INT32_C( 586809599) }, { INT32_C( 437810468), INT32_C( 595862414) } }, { { -INT32_C( 1214443198), -INT32_C( 1602378671) }, { INT32_C( 2051359731), -INT32_C( 1356204537) }, { INT32_C( 1478145427), INT32_C( 695155194) } }, { { INT32_C( 225265297), -INT32_C( 1385666784) }, { -INT32_C( 1913395572), INT32_C( 661387493) }, { -INT32_C( 1160401487), -INT32_C( 1252008079) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_vpadd_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; } static int test_simde_vpadd_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(243), UINT8_C( 66), UINT8_C( 33), UINT8_C( 59), UINT8_C(231), UINT8_C(109), UINT8_C(213), UINT8_C(120) }, { UINT8_C(224), UINT8_C(152), UINT8_C( 89), UINT8_C(119), UINT8_C(114), UINT8_C( 12), UINT8_C(242), UINT8_C(158) }, { UINT8_C( 53), UINT8_C( 92), UINT8_C( 84), UINT8_C( 77), UINT8_C(120), UINT8_C(208), UINT8_C(126), UINT8_C(144) } }, { { UINT8_C( 36), UINT8_C( 52), UINT8_C(243), UINT8_C(110), UINT8_C(177), UINT8_C(236), UINT8_C(243), UINT8_C(140) }, { UINT8_C(169), UINT8_C(251), UINT8_C(254), UINT8_C(243), UINT8_C(133), UINT8_C( 91), UINT8_C( 89), UINT8_C(120) }, { UINT8_C( 88), UINT8_C( 97), UINT8_C(157), UINT8_C(127), UINT8_C(164), UINT8_C(241), UINT8_C(224), UINT8_C(209) } }, { { UINT8_C(157), UINT8_C(122), UINT8_C(179), UINT8_C(133), UINT8_C(232), UINT8_C(136), UINT8_C(253), UINT8_C(200) }, { UINT8_C( 32), UINT8_C( 87), UINT8_C( 63), UINT8_C(146), UINT8_C( 99), UINT8_C( 50), UINT8_C( 48), UINT8_C(135) }, { UINT8_C( 23), UINT8_C( 56), UINT8_C(112), UINT8_C(197), UINT8_C(119), UINT8_C(209), UINT8_C(149), UINT8_C(183) } }, { { UINT8_C(102), UINT8_C( 35), UINT8_C(245), UINT8_C( 23), UINT8_C( 16), UINT8_C(232), UINT8_C(163), UINT8_C(185) }, { UINT8_C(227), UINT8_C(162), UINT8_C(173), UINT8_C(104), UINT8_C(253), UINT8_C( 6), UINT8_C(224), UINT8_C(154) }, { UINT8_C(137), UINT8_C( 12), UINT8_C(248), UINT8_C( 92), UINT8_C(133), UINT8_C( 21), UINT8_C( 3), UINT8_C(122) } }, { { UINT8_C(128), UINT8_C(147), UINT8_C( 31), UINT8_C(104), UINT8_C( 27), UINT8_C( 29), UINT8_C( 49), UINT8_C( 60) }, { UINT8_C(116), UINT8_C(112), UINT8_C(206), UINT8_C(215), UINT8_C(162), UINT8_MAX, UINT8_C( 94), UINT8_C( 8) }, { UINT8_C( 19), UINT8_C(135), UINT8_C( 56), UINT8_C(109), UINT8_C(228), UINT8_C(165), UINT8_C(161), UINT8_C(102) } }, { { UINT8_C( 34), UINT8_C( 83), UINT8_C( 31), UINT8_C( 50), UINT8_C( 60), UINT8_C(195), UINT8_C(236), UINT8_C( 31) }, { UINT8_C(101), UINT8_C(153), UINT8_C(136), UINT8_C( 98), UINT8_C(159), UINT8_C(104), UINT8_C(252), UINT8_C( 31) }, { UINT8_C(117), UINT8_C( 81), UINT8_MAX, UINT8_C( 11), UINT8_C(254), UINT8_C(234), UINT8_C( 7), UINT8_C( 27) } }, { { UINT8_C(252), UINT8_C( 28), UINT8_C(136), UINT8_C( 23), UINT8_C( 57), UINT8_C(185), UINT8_C( 83), UINT8_C(173) }, { UINT8_C( 41), UINT8_C( 34), UINT8_C(132), UINT8_C(204), UINT8_C( 33), UINT8_C(226), UINT8_C(212), UINT8_C( 67) }, { UINT8_C( 24), UINT8_C(159), UINT8_C(242), UINT8_C( 0), UINT8_C( 75), UINT8_C( 80), UINT8_C( 3), UINT8_C( 23) } }, { { UINT8_C( 53), UINT8_C(244), UINT8_C(118), UINT8_C(113), UINT8_C(183), UINT8_C( 98), UINT8_C(145), UINT8_C( 28) }, { UINT8_C(251), UINT8_C( 25), UINT8_C(126), UINT8_C(154), UINT8_C(129), UINT8_C(122), UINT8_C(185), UINT8_C(125) }, { UINT8_C( 41), UINT8_C(231), UINT8_C( 25), UINT8_C(173), UINT8_C( 20), UINT8_C( 24), UINT8_C(251), UINT8_C( 54) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vpadd_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; } static int test_simde_vpadd_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(21226), UINT16_C(34183), UINT16_C(22510), UINT16_C(15812) }, { UINT16_C( 9063), UINT16_C(27641), UINT16_C(59469), UINT16_C(64332) }, { UINT16_C(55409), UINT16_C(38322), UINT16_C(36704), UINT16_C(58265) } }, { { UINT16_C(31569), UINT16_C(54281), UINT16_C(16377), UINT16_C( 5882) }, { UINT16_C(15455), UINT16_C(11970), UINT16_C(43165), UINT16_C(34802) }, { UINT16_C(20314), UINT16_C(22259), UINT16_C(27425), UINT16_C(12431) } }, { { UINT16_C(31227), UINT16_C(59660), UINT16_C(53457), UINT16_C(14374) }, { UINT16_C( 8435), UINT16_C(16548), UINT16_C(61448), UINT16_C(22843) }, { UINT16_C(25351), UINT16_C( 2295), UINT16_C(24983), UINT16_C(18755) } }, { { UINT16_C(17515), UINT16_C(25901), UINT16_C(10116), UINT16_C(58235) }, { UINT16_C(15715), UINT16_C( 17), UINT16_C( 998), UINT16_C(57735) }, { UINT16_C(43416), UINT16_C( 2815), UINT16_C(15732), UINT16_C(58733) } }, { { UINT16_C(37757), UINT16_C(20170), UINT16_C(61796), UINT16_C(22406) }, { UINT16_C(10769), UINT16_C( 6552), UINT16_C(54042), UINT16_C(34418) }, { UINT16_C(57927), UINT16_C(18666), UINT16_C(17321), UINT16_C(22924) } }, { { UINT16_C(40728), UINT16_C(40171), UINT16_C(26310), UINT16_C(10623) }, { UINT16_C(37027), UINT16_C(35114), UINT16_C(45460), UINT16_C( 4458) }, { UINT16_C(15363), UINT16_C(36933), UINT16_C( 6605), UINT16_C(49918) } }, { { UINT16_C(13637), UINT16_C(43359), UINT16_C(58662), UINT16_C(14080) }, { UINT16_C(38928), UINT16_C(10832), UINT16_C(49772), UINT16_C(33968) }, { UINT16_C(56996), UINT16_C( 7206), UINT16_C(49760), UINT16_C(18204) } }, { { UINT16_C(39777), UINT16_C(10016), UINT16_C(40705), UINT16_C(42320) }, { UINT16_C(31279), UINT16_C(49966), UINT16_C(39212), UINT16_C(29140) }, { UINT16_C(49793), UINT16_C(17489), UINT16_C(15709), UINT16_C( 2816) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vpadd_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; } static int test_simde_vpadd_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C(3709109624), UINT32_C(4040697517) }, { UINT32_C( 609191128), UINT32_C(2173632669) }, { UINT32_C(3454839845), UINT32_C(2782823797) } }, { { UINT32_C( 887164280), UINT32_C(2941533126) }, { UINT32_C( 856155564), UINT32_C(2881345843) }, { UINT32_C(3828697406), UINT32_C(3737501407) } }, { { UINT32_C( 193516126), UINT32_C(3506135544) }, { UINT32_C(2197113829), UINT32_C(3288564811) }, { UINT32_C(3699651670), UINT32_C(1190711344) } }, { { UINT32_C(1492706449), UINT32_C(3406253087) }, { UINT32_C(1694371377), UINT32_C(1158724839) }, { UINT32_C( 603992240), UINT32_C(2853096216) } }, { { UINT32_C(2253494414), UINT32_C(3730263289) }, { UINT32_C(3814739095), UINT32_C(1638360016) }, { UINT32_C(1688790407), UINT32_C(1158131815) } }, { { UINT32_C(1723440967), UINT32_C( 473088491) }, { UINT32_C(3078631631), UINT32_C(2063372524) }, { UINT32_C(2196529458), UINT32_C( 847036859) } }, { { UINT32_C( 570510633), UINT32_C( 822171802) }, { UINT32_C(1947492772), UINT32_C( 215333829) }, { UINT32_C(1392682435), UINT32_C(2162826601) } }, { { UINT32_C(1165201242), UINT32_C( 526493008) }, { UINT32_C(3268862677), UINT32_C(2604454770) }, { UINT32_C(1691694250), UINT32_C(1578350151) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vpadd_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; } static int test_simde_vpaddq_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; simde_float32 b[4]; simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 288.42), SIMDE_FLOAT32_C( -21.02), SIMDE_FLOAT32_C( -974.95), SIMDE_FLOAT32_C( -192.40) }, { SIMDE_FLOAT32_C( -577.52), SIMDE_FLOAT32_C( 442.97), SIMDE_FLOAT32_C( -440.24), SIMDE_FLOAT32_C( 115.18) }, { SIMDE_FLOAT32_C( 267.40), SIMDE_FLOAT32_C( -1167.35), SIMDE_FLOAT32_C( -134.54), SIMDE_FLOAT32_C( -325.05) } }, { { SIMDE_FLOAT32_C( 7.02), SIMDE_FLOAT32_C( -617.77), SIMDE_FLOAT32_C( 477.82), SIMDE_FLOAT32_C( 908.71) }, { SIMDE_FLOAT32_C( -987.94), SIMDE_FLOAT32_C( -606.22), SIMDE_FLOAT32_C( -272.93), SIMDE_FLOAT32_C( -275.00) }, { SIMDE_FLOAT32_C( -610.74), SIMDE_FLOAT32_C( 1386.53), SIMDE_FLOAT32_C( -1594.16), SIMDE_FLOAT32_C( -547.94) } }, { { SIMDE_FLOAT32_C( -93.25), SIMDE_FLOAT32_C( 892.97), SIMDE_FLOAT32_C( -408.68), SIMDE_FLOAT32_C( -20.37) }, { SIMDE_FLOAT32_C( -622.02), SIMDE_FLOAT32_C( -239.70), SIMDE_FLOAT32_C( 793.71), SIMDE_FLOAT32_C( -983.86) }, { SIMDE_FLOAT32_C( 799.72), SIMDE_FLOAT32_C( -429.06), SIMDE_FLOAT32_C( -861.72), SIMDE_FLOAT32_C( -190.15) } }, { { SIMDE_FLOAT32_C( -339.96), SIMDE_FLOAT32_C( 415.18), SIMDE_FLOAT32_C( 196.04), SIMDE_FLOAT32_C( -159.70) }, { SIMDE_FLOAT32_C( 128.82), SIMDE_FLOAT32_C( 843.81), SIMDE_FLOAT32_C( 217.86), SIMDE_FLOAT32_C( -582.76) }, { SIMDE_FLOAT32_C( 75.21), SIMDE_FLOAT32_C( 36.34), SIMDE_FLOAT32_C( 972.63), SIMDE_FLOAT32_C( -364.90) } }, { { SIMDE_FLOAT32_C( -177.21), SIMDE_FLOAT32_C( 242.90), SIMDE_FLOAT32_C( 224.84), SIMDE_FLOAT32_C( 245.28) }, { SIMDE_FLOAT32_C( -314.13), SIMDE_FLOAT32_C( 784.61), SIMDE_FLOAT32_C( -639.54), SIMDE_FLOAT32_C( 692.90) }, { SIMDE_FLOAT32_C( 65.70), SIMDE_FLOAT32_C( 470.12), SIMDE_FLOAT32_C( 470.48), SIMDE_FLOAT32_C( 53.36) } }, { { SIMDE_FLOAT32_C( -833.16), SIMDE_FLOAT32_C( 838.28), SIMDE_FLOAT32_C( 601.61), SIMDE_FLOAT32_C( -821.10) }, { SIMDE_FLOAT32_C( -767.94), SIMDE_FLOAT32_C( -671.32), SIMDE_FLOAT32_C( -96.10), SIMDE_FLOAT32_C( 138.81) }, { SIMDE_FLOAT32_C( 5.12), SIMDE_FLOAT32_C( -219.49), SIMDE_FLOAT32_C( -1439.26), SIMDE_FLOAT32_C( 42.71) } }, { { SIMDE_FLOAT32_C( -778.36), SIMDE_FLOAT32_C( 495.21), SIMDE_FLOAT32_C( -881.56), SIMDE_FLOAT32_C( -400.38) }, { SIMDE_FLOAT32_C( -744.48), SIMDE_FLOAT32_C( 912.15), SIMDE_FLOAT32_C( -384.24), SIMDE_FLOAT32_C( -84.44) }, { SIMDE_FLOAT32_C( -283.14), SIMDE_FLOAT32_C( -1281.94), SIMDE_FLOAT32_C( 167.67), SIMDE_FLOAT32_C( -468.68) } }, { { SIMDE_FLOAT32_C( 327.32), SIMDE_FLOAT32_C( 811.80), SIMDE_FLOAT32_C( 755.86), SIMDE_FLOAT32_C( -543.86) }, { SIMDE_FLOAT32_C( 655.62), SIMDE_FLOAT32_C( -26.28), SIMDE_FLOAT32_C( -126.62), SIMDE_FLOAT32_C( -521.59) }, { SIMDE_FLOAT32_C( 1139.13), SIMDE_FLOAT32_C( 212.00), SIMDE_FLOAT32_C( 629.33), SIMDE_FLOAT32_C( -648.21) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r = simde_vpaddq_f32(a, b); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vpaddq_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[2]; simde_float64 b[2]; simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -304.59), SIMDE_FLOAT64_C( 738.24) }, { SIMDE_FLOAT64_C( -491.10), SIMDE_FLOAT64_C( 20.25) }, { SIMDE_FLOAT64_C( 433.66), SIMDE_FLOAT64_C( -470.85) } }, { { SIMDE_FLOAT64_C( 46.86), SIMDE_FLOAT64_C( -650.53) }, { SIMDE_FLOAT64_C( 209.27), SIMDE_FLOAT64_C( 722.54) }, { SIMDE_FLOAT64_C( -603.67), SIMDE_FLOAT64_C( 931.81) } }, { { SIMDE_FLOAT64_C( -507.52), SIMDE_FLOAT64_C( 40.12) }, { SIMDE_FLOAT64_C( 676.59), SIMDE_FLOAT64_C( 843.37) }, { SIMDE_FLOAT64_C( -467.40), SIMDE_FLOAT64_C( 1519.96) } }, { { SIMDE_FLOAT64_C( -613.61), SIMDE_FLOAT64_C( 272.81) }, { SIMDE_FLOAT64_C( 650.30), SIMDE_FLOAT64_C( 222.96) }, { SIMDE_FLOAT64_C( -340.80), SIMDE_FLOAT64_C( 873.26) } }, { { SIMDE_FLOAT64_C( 615.20), SIMDE_FLOAT64_C( -394.78) }, { SIMDE_FLOAT64_C( 469.07), SIMDE_FLOAT64_C( 54.35) }, { SIMDE_FLOAT64_C( 220.41), SIMDE_FLOAT64_C( 523.42) } }, { { SIMDE_FLOAT64_C( -172.64), SIMDE_FLOAT64_C( 682.30) }, { SIMDE_FLOAT64_C( -786.74), SIMDE_FLOAT64_C( 236.37) }, { SIMDE_FLOAT64_C( 509.66), SIMDE_FLOAT64_C( -550.37) } }, { { SIMDE_FLOAT64_C( 915.30), SIMDE_FLOAT64_C( 782.55) }, { SIMDE_FLOAT64_C( 199.30), SIMDE_FLOAT64_C( -594.15) }, { SIMDE_FLOAT64_C( 1697.85), SIMDE_FLOAT64_C( -394.85) } }, { { SIMDE_FLOAT64_C( 264.00), SIMDE_FLOAT64_C( -410.11) }, { SIMDE_FLOAT64_C( -427.51), SIMDE_FLOAT64_C( 959.41) }, { SIMDE_FLOAT64_C( -146.12), SIMDE_FLOAT64_C( 531.90) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_float64x2_t r = simde_vpaddq_f64(a, b); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; } static int test_simde_vpaddq_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int8_t b[16]; int8_t r[16]; } test_vec[] = { { { INT8_C( 120), INT8_C( 124), -INT8_C( 100), -INT8_C( 34), -INT8_C( 111), INT8_C( 99), INT8_C( 2), INT8_C( 11), INT8_C( 41), INT8_C( 86), INT8_C( 126), -INT8_C( 10), -INT8_C( 4), INT8_C( 80), -INT8_C( 113), -INT8_C( 35) }, { -INT8_C( 85), -INT8_C( 62), -INT8_C( 41), INT8_C( 122), INT8_C( 100), INT8_C( 10), INT8_C( 105), -INT8_C( 112), -INT8_C( 97), INT8_C( 104), -INT8_C( 126), -INT8_C( 104), -INT8_C( 97), -INT8_C( 30), -INT8_C( 82), INT8_C( 23) }, { -INT8_C( 12), INT8_C( 122), -INT8_C( 12), INT8_C( 13), INT8_MAX, INT8_C( 116), INT8_C( 76), INT8_C( 108), INT8_C( 109), INT8_C( 81), INT8_C( 110), -INT8_C( 7), INT8_C( 7), INT8_C( 26), -INT8_C( 127), -INT8_C( 59) } }, { { INT8_C( 94), INT8_C( 74), -INT8_C( 11), -INT8_C( 17), -INT8_C( 83), -INT8_C( 8), -INT8_C( 6), -INT8_C( 42), INT8_C( 78), INT8_C( 120), -INT8_C( 52), INT8_C( 75), -INT8_C( 56), INT8_C( 92), INT8_C( 40), INT8_C( 115) }, { INT8_C( 30), -INT8_C( 1), -INT8_C( 19), -INT8_C( 125), INT8_C( 10), INT8_C( 86), INT8_C( 19), -INT8_C( 87), -INT8_C( 66), -INT8_C( 107), INT8_C( 65), INT8_C( 93), INT8_C( 119), -INT8_C( 17), INT8_C( 117), -INT8_C( 43) }, { -INT8_C( 88), -INT8_C( 28), -INT8_C( 91), -INT8_C( 48), -INT8_C( 58), INT8_C( 23), INT8_C( 36), -INT8_C( 101), INT8_C( 29), INT8_C( 112), INT8_C( 96), -INT8_C( 68), INT8_C( 83), -INT8_C( 98), INT8_C( 102), INT8_C( 74) } }, { { INT8_C( 57), INT8_C( 106), -INT8_C( 60), -INT8_C( 25), INT8_C( 98), -INT8_C( 66), -INT8_C( 67), -INT8_C( 79), INT8_C( 54), -INT8_C( 118), -INT8_C( 4), -INT8_C( 2), -INT8_C( 26), INT8_C( 36), INT8_C( 113), INT8_C( 4) }, { INT8_C( 35), INT8_C( 95), -INT8_C( 121), INT8_C( 45), -INT8_C( 75), -INT8_C( 102), -INT8_C( 41), INT8_C( 116), INT8_C( 47), INT8_C( 24), -INT8_C( 47), -INT8_C( 90), INT8_C( 8), INT8_C( 70), INT8_C( 123), INT8_C( 65) }, { -INT8_C( 93), -INT8_C( 85), INT8_C( 32), INT8_C( 110), -INT8_C( 64), -INT8_C( 6), INT8_C( 10), INT8_C( 117), -INT8_C( 126), -INT8_C( 76), INT8_C( 79), INT8_C( 75), INT8_C( 71), INT8_C( 119), INT8_C( 78), -INT8_C( 68) } }, { { -INT8_C( 79), INT8_C( 63), INT8_C( 40), INT8_C( 19), -INT8_C( 3), -INT8_C( 26), -INT8_C( 60), INT8_C( 51), INT8_C( 112), -INT8_C( 64), INT8_C( 49), INT8_C( 86), -INT8_C( 28), -INT8_C( 93), INT8_C( 90), INT8_C( 8) }, { INT8_C( 2), -INT8_C( 30), INT8_C( 53), -INT8_C( 73), INT8_C( 124), INT8_C( 12), INT8_C( 43), -INT8_C( 84), INT8_C( 37), -INT8_C( 3), INT8_C( 82), INT8_C( 45), INT8_C( 67), -INT8_C( 50), INT8_C( 110), -INT8_C( 12) }, { -INT8_C( 16), INT8_C( 59), -INT8_C( 29), -INT8_C( 9), INT8_C( 48), -INT8_C( 121), -INT8_C( 121), INT8_C( 98), -INT8_C( 28), -INT8_C( 20), -INT8_C( 120), -INT8_C( 41), INT8_C( 34), INT8_MAX, INT8_C( 17), INT8_C( 98) } }, { { INT8_C( 13), -INT8_C( 105), INT8_C( 8), INT8_C( 11), INT8_C( 125), -INT8_C( 52), INT8_C( 62), -INT8_C( 19), -INT8_C( 115), INT8_C( 112), INT8_C( 67), INT8_C( 113), INT8_C( 19), -INT8_C( 99), INT8_C( 121), INT8_C( 21) }, { INT8_MAX, -INT8_C( 81), -INT8_C( 52), -INT8_C( 4), -INT8_C( 69), -INT8_C( 8), -INT8_C( 88), -INT8_C( 32), -INT8_C( 11), -INT8_C( 6), INT8_C( 13), INT8_C( 56), -INT8_C( 56), INT8_C( 124), INT8_C( 45), -INT8_C( 42) }, { -INT8_C( 92), INT8_C( 19), INT8_C( 73), INT8_C( 43), -INT8_C( 3), -INT8_C( 76), -INT8_C( 80), -INT8_C( 114), INT8_C( 46), -INT8_C( 56), -INT8_C( 77), -INT8_C( 120), -INT8_C( 17), INT8_C( 69), INT8_C( 68), INT8_C( 3) } }, { { INT8_C( 19), INT8_C( 53), -INT8_C( 31), -INT8_C( 112), INT8_C( 1), INT8_C( 31), INT8_C( 125), -INT8_C( 114), -INT8_C( 113), -INT8_C( 64), INT8_C( 0), -INT8_C( 94), INT8_C( 93), INT8_C( 121), -INT8_C( 73), -INT8_C( 35) }, { INT8_C( 40), -INT8_C( 124), -INT8_C( 39), -INT8_C( 28), INT8_C( 124), -INT8_C( 127), -INT8_C( 60), INT8_C( 113), INT8_C( 123), -INT8_C( 46), -INT8_C( 87), INT8_C( 68), INT8_C( 78), -INT8_C( 42), INT8_C( 26), INT8_C( 97) }, { INT8_C( 72), INT8_C( 113), INT8_C( 32), INT8_C( 11), INT8_C( 79), -INT8_C( 94), -INT8_C( 42), -INT8_C( 108), -INT8_C( 84), -INT8_C( 67), -INT8_C( 3), INT8_C( 53), INT8_C( 77), -INT8_C( 19), INT8_C( 36), INT8_C( 123) } }, { { INT8_C( 11), -INT8_C( 5), -INT8_C( 15), INT8_C( 13), INT8_C( 26), INT8_C( 110), -INT8_C( 101), -INT8_C( 86), INT8_C( 46), -INT8_C( 101), INT8_C( 76), -INT8_C( 117), INT8_C( 21), INT8_C( 4), INT8_C( 104), INT8_C( 61) }, { -INT8_C( 120), INT8_C( 65), INT8_C( 33), INT8_C( 4), -INT8_C( 62), -INT8_C( 26), INT8_C( 117), INT8_C( 62), -INT8_C( 72), INT8_C( 30), -INT8_C( 126), INT8_C( 6), -INT8_C( 11), -INT8_C( 100), INT8_C( 103), INT8_C( 0) }, { INT8_C( 6), -INT8_C( 2), -INT8_C( 120), INT8_C( 69), -INT8_C( 55), -INT8_C( 41), INT8_C( 25), -INT8_C( 91), -INT8_C( 55), INT8_C( 37), -INT8_C( 88), -INT8_C( 77), -INT8_C( 42), -INT8_C( 120), -INT8_C( 111), INT8_C( 103) } }, { { -INT8_C( 105), INT8_C( 88), INT8_C( 13), -INT8_C( 79), -INT8_C( 58), -INT8_C( 87), INT8_C( 91), -INT8_C( 12), INT8_C( 68), -INT8_C( 88), INT8_MAX, INT8_C( 89), -INT8_C( 84), -INT8_C( 24), -INT8_C( 105), INT8_C( 52) }, { INT8_C( 41), -INT8_C( 72), INT8_C( 56), -INT8_C( 20), -INT8_C( 98), -INT8_C( 83), INT8_C( 42), INT8_C( 86), -INT8_C( 53), -INT8_C( 84), INT8_C( 92), -INT8_C( 64), INT8_C( 72), -INT8_C( 61), -INT8_C( 63), -INT8_C( 33) }, { -INT8_C( 17), -INT8_C( 66), INT8_C( 111), INT8_C( 79), -INT8_C( 20), -INT8_C( 40), -INT8_C( 108), -INT8_C( 53), -INT8_C( 31), INT8_C( 36), INT8_C( 75), INT8_MIN, INT8_C( 119), INT8_C( 28), INT8_C( 11), -INT8_C( 96) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r = simde_vpaddq_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; } static int test_simde_vpaddq_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { -INT16_C( 29696), INT16_C( 13750), INT16_C( 24319), INT16_C( 26400), INT16_C( 9764), INT16_C( 24428), -INT16_C( 10546), INT16_C( 14168) }, { -INT16_C( 17422), INT16_C( 6892), INT16_C( 26568), INT16_C( 6610), INT16_C( 2432), -INT16_C( 27244), INT16_C( 19912), -INT16_C( 13860) }, { -INT16_C( 15946), -INT16_C( 14817), -INT16_C( 31344), INT16_C( 3622), -INT16_C( 10530), -INT16_C( 32358), -INT16_C( 24812), INT16_C( 6052) } }, { { -INT16_C( 27943), -INT16_C( 9986), INT16_C( 7920), INT16_C( 5439), -INT16_C( 21692), INT16_C( 4724), -INT16_C( 12927), INT16_C( 29769) }, { INT16_C( 13704), INT16_C( 20622), INT16_C( 24733), INT16_C( 7529), -INT16_C( 407), INT16_C( 12979), -INT16_C( 28853), INT16_C( 9467) }, { INT16_C( 27607), INT16_C( 13359), -INT16_C( 16968), INT16_C( 16842), -INT16_C( 31210), INT16_C( 32262), INT16_C( 12572), -INT16_C( 19386) } }, { { -INT16_C( 1759), INT16_C( 4604), INT16_C( 15128), INT16_C( 23590), -INT16_C( 25625), INT16_C( 26735), -INT16_C( 18328), -INT16_C( 3876) }, { INT16_C( 27374), -INT16_C( 29888), -INT16_C( 21814), INT16_C( 13480), INT16_C( 23464), -INT16_C( 3226), INT16_C( 25066), INT16_C( 2839) }, { INT16_C( 2845), -INT16_C( 26818), INT16_C( 1110), -INT16_C( 22204), -INT16_C( 2514), -INT16_C( 8334), INT16_C( 20238), INT16_C( 27905) } }, { { INT16_C( 4954), INT16_C( 29213), INT16_C( 17230), INT16_C( 13775), INT16_C( 16094), INT16_C( 18078), INT16_C( 31478), -INT16_C( 7114) }, { INT16_C( 30693), -INT16_C( 20625), INT16_C( 6177), -INT16_C( 13853), INT16_C( 18803), INT16_C( 24252), -INT16_C( 11350), INT16_C( 1385) }, { -INT16_C( 31369), INT16_C( 31005), -INT16_C( 31364), INT16_C( 24364), INT16_C( 10068), -INT16_C( 7676), -INT16_C( 22481), -INT16_C( 9965) } }, { { -INT16_C( 31002), INT16_C( 13431), INT16_C( 18122), -INT16_C( 22422), INT16_C( 2180), INT16_C( 31727), INT16_C( 9602), INT16_C( 26463) }, { -INT16_C( 12388), -INT16_C( 17129), -INT16_C( 1305), INT16_C( 23174), INT16_C( 16964), -INT16_C( 4424), INT16_C( 8725), -INT16_C( 1037) }, { -INT16_C( 17571), -INT16_C( 4300), -INT16_C( 31629), -INT16_C( 29471), -INT16_C( 29517), INT16_C( 21869), INT16_C( 12540), INT16_C( 7688) } }, { { INT16_C( 27560), INT16_C( 29232), -INT16_C( 25935), INT16_C( 13851), INT16_C( 2722), INT16_C( 9393), INT16_C( 4143), -INT16_C( 13172) }, { -INT16_C( 23585), -INT16_C( 14711), INT16_C( 4253), -INT16_C( 7903), -INT16_C( 9902), INT16_C( 26832), -INT16_C( 15365), -INT16_C( 23453) }, { -INT16_C( 8744), -INT16_C( 12084), INT16_C( 12115), -INT16_C( 9029), INT16_C( 27240), -INT16_C( 3650), INT16_C( 16930), INT16_C( 26718) } }, { { -INT16_C( 27858), -INT16_C( 8170), INT16_C( 12589), -INT16_C( 12522), -INT16_C( 14533), INT16_C( 27636), -INT16_C( 32553), -INT16_C( 18633) }, { -INT16_C( 16349), -INT16_C( 16259), -INT16_C( 24880), INT16_C( 9122), INT16_C( 29304), INT16_C( 29579), -INT16_C( 4555), INT16_C( 25623) }, { INT16_C( 29508), INT16_C( 67), INT16_C( 13103), INT16_C( 14350), -INT16_C( 32608), -INT16_C( 15758), -INT16_C( 6653), INT16_C( 21068) } }, { { INT16_C( 11906), -INT16_C( 20668), INT16_C( 23135), -INT16_C( 25729), INT16_C( 29473), -INT16_C( 2042), INT16_C( 15859), INT16_C( 5807) }, { INT16_C( 11773), -INT16_C( 12586), INT16_C( 30923), INT16_C( 17393), INT16_C( 31978), INT16_C( 8375), -INT16_C( 12694), -INT16_C( 4988) }, { -INT16_C( 8762), -INT16_C( 2594), INT16_C( 27431), INT16_C( 21666), -INT16_C( 813), -INT16_C( 17220), -INT16_C( 25183), -INT16_C( 17682) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_vpaddq_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; } static int test_simde_vpaddq_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { INT32_C( 630741622), INT32_C( 504305028), -INT32_C( 187880710), INT32_C( 867999092) }, { INT32_C( 824916310), -INT32_C( 1005749701), INT32_C( 1610912158), INT32_C( 723032501) }, { INT32_C( 1135046650), INT32_C( 680118382), -INT32_C( 180833391), -INT32_C( 1961022637) } }, { { INT32_C( 2018619635), -INT32_C( 1080663867), -INT32_C( 5020790), INT32_C( 1462923264) }, { -INT32_C( 460825175), INT32_C( 1973982679), -INT32_C( 589976538), INT32_C( 956820806) }, { INT32_C( 937955768), INT32_C( 1457902474), INT32_C( 1513157504), INT32_C( 366844268) } }, { { INT32_C( 1655789725), INT32_C( 1126254520), -INT32_C( 1421683285), -INT32_C( 301829051) }, { -INT32_C( 1462531375), INT32_C( 1176337183), INT32_C( 1847783976), INT32_C( 2091330015) }, { -INT32_C( 1512923051), -INT32_C( 1723512336), -INT32_C( 286194192), -INT32_C( 355853305) } }, { { INT32_C( 987650434), INT32_C( 1266548640), INT32_C( 435666900), INT32_C( 67696947) }, { -INT32_C( 1548952700), INT32_C( 2129250646), -INT32_C( 1695806533), -INT32_C( 1223257035) }, { -INT32_C( 2040768222), INT32_C( 503363847), INT32_C( 580297946), INT32_C( 1375903728) } }, { { -INT32_C( 1913522963), -INT32_C( 925274124), INT32_C( 1659031598), INT32_C( 1298590409) }, { INT32_C( 468784069), -INT32_C( 1734681892), INT32_C( 456296166), INT32_C( 131221786) }, { INT32_C( 1456170209), -INT32_C( 1337345289), -INT32_C( 1265897823), INT32_C( 587517952) } }, { { INT32_C( 831898429), INT32_C( 1643802162), INT32_C( 130276414), -INT32_C( 1957353018) }, { INT32_C( 430327356), INT32_C( 112279584), -INT32_C( 517872697), INT32_C( 1793717036) }, { -INT32_C( 1819266705), -INT32_C( 1827076604), INT32_C( 542606940), INT32_C( 1275844339) } }, { { -INT32_C( 375685449), INT32_C( 709531116), INT32_C( 925961585), INT32_C( 1942128183) }, { -INT32_C( 309564980), INT32_C( 1894989225), INT32_C( 1297224993), -INT32_C( 1078510840) }, { INT32_C( 333845667), -INT32_C( 1426877528), INT32_C( 1585424245), INT32_C( 218714153) } }, { { -INT32_C( 1515629639), INT32_C( 1523577832), INT32_C( 949026817), INT32_C( 1403737223) }, { INT32_C( 1715484861), -INT32_C( 1764281227), INT32_C( 1373907273), INT32_C( 487693156) }, { INT32_C( 7948193), -INT32_C( 1942203256), -INT32_C( 48796366), INT32_C( 1861600429) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_vpaddq_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; } static int test_simde_vpaddq_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; int64_t b[2]; int64_t r[2]; } test_vec[] = { { { INT64_C( 4896844547604500339), INT64_C( 8765501941374251448) }, { -INT64_C( 7785191802955445040), INT64_C( 1730198806973093673) }, { -INT64_C( 4784397584730799829), -INT64_C( 6054992995982351367) } }, { { INT64_C( 1703751951021001014), INT64_C( 7056773999644983266) }, { INT64_C( 1684314300911287422), -INT64_C( 1497452695245042055) }, { INT64_C( 8760525950665984280), INT64_C( 186861605666245367) } }, { { INT64_C( 629079142398964803), -INT64_C( 639111501445216464) }, { INT64_C( 7776910553607507943), INT64_C( 2418646161902200048) }, { -INT64_C( 10032359046251661), -INT64_C( 8251187358199843625) } }, { { -INT64_C( 206181738529628939), -INT64_C( 957614104208629691) }, { -INT64_C( 8409709677376931602), -INT64_C( 8387459978995116903) }, { -INT64_C( 1163795842738258630), INT64_C( 1649574417337503111) } }, { { INT64_C( 2335056876039200157), -INT64_C( 979651647068297879) }, { INT64_C( 9021304771970150156), -INT64_C( 4534404492057812240) }, { INT64_C( 1355405228970902278), INT64_C( 4486900279912337916) } }, { { -INT64_C( 1447745090286274324), INT64_C( 7471128028670642104) }, { -INT64_C( 8334121547303321610), -INT64_C( 3997185613633532509) }, { INT64_C( 6023382938384367780), INT64_C( 6115436912772697497) } }, { { -INT64_C( 123152696641276963), INT64_C( 7422609973013293632) }, { -INT64_C( 860300428430994969), INT64_C( 2560932209795736038) }, { INT64_C( 7299457276372016669), INT64_C( 1700631781364741069) } }, { { -INT64_C( 3154265126970713099), INT64_C( 4850501438233637751) }, { INT64_C( 1209450879884104066), -INT64_C( 4945130759824178186) }, { INT64_C( 1696236311262924652), -INT64_C( 3735679879940074120) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_int64x2_t r = simde_vpaddq_s64(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; } static int test_simde_vpaddq_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(204), UINT8_C( 23), UINT8_C(182), UINT8_C(158), UINT8_C(247), UINT8_C(226), UINT8_C(226), UINT8_C(178), UINT8_C(171), UINT8_C(144), UINT8_C( 17), UINT8_C( 54), UINT8_C(172), UINT8_C(220), UINT8_C(208), UINT8_C( 28) }, { UINT8_C(134), UINT8_C(190), UINT8_C(132), UINT8_C(101), UINT8_C(198), UINT8_C(195), UINT8_C(108), UINT8_C( 76), UINT8_C(138), UINT8_C( 36), UINT8_C( 2), UINT8_C(216), UINT8_C( 42), UINT8_C( 49), UINT8_C( 3), UINT8_C(246) }, { UINT8_C(227), UINT8_C( 84), UINT8_C(217), UINT8_C(148), UINT8_C( 59), UINT8_C( 71), UINT8_C(136), UINT8_C(236), UINT8_C( 68), UINT8_C(233), UINT8_C(137), UINT8_C(184), UINT8_C(174), UINT8_C(218), UINT8_C( 91), UINT8_C(249) } }, { { UINT8_C( 72), UINT8_C(185), UINT8_C(148), UINT8_C( 64), UINT8_C(155), UINT8_C(118), UINT8_C(242), UINT8_C( 70), UINT8_C( 6), UINT8_C( 3), UINT8_C(124), UINT8_C(179), UINT8_C(223), UINT8_C( 77), UINT8_C(207), UINT8_C(101) }, { UINT8_C( 11), UINT8_C( 84), UINT8_C(202), UINT8_C(209), UINT8_C( 23), UINT8_C( 54), UINT8_C( 30), UINT8_C(162), UINT8_C( 91), UINT8_C( 32), UINT8_C(122), UINT8_C(133), UINT8_C( 82), UINT8_C(125), UINT8_C(123), UINT8_C(154) }, { UINT8_C( 1), UINT8_C(212), UINT8_C( 17), UINT8_C( 56), UINT8_C( 9), UINT8_C( 47), UINT8_C( 44), UINT8_C( 52), UINT8_C( 95), UINT8_C(155), UINT8_C( 77), UINT8_C(192), UINT8_C(123), UINT8_MAX, UINT8_C(207), UINT8_C( 21) } }, { { UINT8_C( 54), UINT8_C( 15), UINT8_C(218), UINT8_C(209), UINT8_C(133), UINT8_C(204), UINT8_C( 24), UINT8_C(140), UINT8_C(207), UINT8_C(148), UINT8_C( 63), UINT8_C(174), UINT8_C(225), UINT8_C( 14), UINT8_C( 19), UINT8_C(237) }, { UINT8_C( 98), UINT8_C(222), UINT8_C(190), UINT8_C(122), UINT8_C( 20), UINT8_C(220), UINT8_C( 28), UINT8_C(111), UINT8_C(253), UINT8_C(150), UINT8_C(244), UINT8_C( 79), UINT8_C( 19), UINT8_C(111), UINT8_C(233), UINT8_C( 73) }, { UINT8_C( 69), UINT8_C(171), UINT8_C( 81), UINT8_C(164), UINT8_C( 99), UINT8_C(237), UINT8_C(239), UINT8_C( 0), UINT8_C( 64), UINT8_C( 56), UINT8_C(240), UINT8_C(139), UINT8_C(147), UINT8_C( 67), UINT8_C(130), UINT8_C( 50) } }, { { UINT8_C(126), UINT8_C(196), UINT8_C( 26), UINT8_C( 4), UINT8_C(144), UINT8_C( 50), UINT8_C(144), UINT8_C( 96), UINT8_C(199), UINT8_C(207), UINT8_C( 14), UINT8_C(168), UINT8_C(221), UINT8_C( 34), UINT8_C(149), UINT8_C( 64) }, { UINT8_C( 0), UINT8_C( 84), UINT8_C(186), UINT8_C( 20), UINT8_C( 48), UINT8_C(214), UINT8_C(132), UINT8_C( 45), UINT8_C(108), UINT8_C(120), UINT8_C(124), UINT8_C(127), UINT8_C(232), UINT8_C(102), UINT8_C(200), UINT8_C(102) }, { UINT8_C( 66), UINT8_C( 30), UINT8_C(194), UINT8_C(240), UINT8_C(150), UINT8_C(182), UINT8_MAX, UINT8_C(213), UINT8_C( 84), UINT8_C(206), UINT8_C( 6), UINT8_C(177), UINT8_C(228), UINT8_C(251), UINT8_C( 78), UINT8_C( 46) } }, { { UINT8_C( 42), UINT8_C(226), UINT8_C(106), UINT8_C(186), UINT8_C( 21), UINT8_C(250), UINT8_C( 26), UINT8_C(220), UINT8_C(201), UINT8_C( 41), UINT8_C(132), UINT8_C(167), UINT8_C( 75), UINT8_C( 26), UINT8_C(231), UINT8_C( 75) }, { UINT8_C(110), UINT8_C(161), UINT8_C( 95), UINT8_C(158), UINT8_C(119), UINT8_C(227), UINT8_C(204), UINT8_C(227), UINT8_C( 92), UINT8_C( 72), UINT8_C( 98), UINT8_C( 68), UINT8_C(174), UINT8_C( 42), UINT8_C(170), UINT8_C(216) }, { UINT8_C( 12), UINT8_C( 36), UINT8_C( 15), UINT8_C(246), UINT8_C(242), UINT8_C( 43), UINT8_C(101), UINT8_C( 50), UINT8_C( 15), UINT8_C(253), UINT8_C( 90), UINT8_C(175), UINT8_C(164), UINT8_C(166), UINT8_C(216), UINT8_C(130) } }, { { UINT8_C( 12), UINT8_C( 21), UINT8_C(147), UINT8_C( 33), UINT8_C( 15), UINT8_C(173), UINT8_C(253), UINT8_C(217), UINT8_C(214), UINT8_C(130), UINT8_C(128), UINT8_C( 33), UINT8_C(156), UINT8_C(103), UINT8_C(108), UINT8_C( 10) }, { UINT8_C( 8), UINT8_C(204), UINT8_C(168), UINT8_C(127), UINT8_C(175), UINT8_C(116), UINT8_C( 98), UINT8_C( 11), UINT8_C(189), UINT8_C(196), UINT8_C( 79), UINT8_C(107), UINT8_C(238), UINT8_C(250), UINT8_C( 68), UINT8_C(250) }, { UINT8_C( 33), UINT8_C(180), UINT8_C(188), UINT8_C(214), UINT8_C( 88), UINT8_C(161), UINT8_C( 3), UINT8_C(118), UINT8_C(212), UINT8_C( 39), UINT8_C( 35), UINT8_C(109), UINT8_C(129), UINT8_C(186), UINT8_C(232), UINT8_C( 62) } }, { { UINT8_C( 15), UINT8_C(215), UINT8_C( 28), UINT8_C( 30), UINT8_C(132), UINT8_C( 25), UINT8_C(247), UINT8_C( 91), UINT8_C(155), UINT8_C(119), UINT8_C(124), UINT8_C( 55), UINT8_C(222), UINT8_C(233), UINT8_C( 65), UINT8_C(230) }, { UINT8_C(181), UINT8_C(234), UINT8_C(101), UINT8_C(100), UINT8_C( 94), UINT8_C(199), UINT8_C(112), UINT8_C( 27), UINT8_C(139), UINT8_C(191), UINT8_C(135), UINT8_C(121), UINT8_C(185), UINT8_C(203), UINT8_C(116), UINT8_C(200) }, { UINT8_C(230), UINT8_C( 58), UINT8_C(157), UINT8_C( 82), UINT8_C( 18), UINT8_C(179), UINT8_C(199), UINT8_C( 39), UINT8_C(159), UINT8_C(201), UINT8_C( 37), UINT8_C(139), UINT8_C( 74), UINT8_C( 0), UINT8_C(132), UINT8_C( 60) } }, { { UINT8_C(162), UINT8_C(144), UINT8_C(231), UINT8_C( 38), UINT8_C(169), UINT8_C(222), UINT8_C(129), UINT8_C( 69), UINT8_C( 86), UINT8_C(254), UINT8_C(124), UINT8_C( 52), UINT8_C(231), UINT8_C(190), UINT8_C( 27), UINT8_C(156) }, { UINT8_C(168), UINT8_C(128), UINT8_C( 0), UINT8_C( 6), UINT8_C( 72), UINT8_C(112), UINT8_C( 34), UINT8_C(211), UINT8_C( 48), UINT8_C(169), UINT8_C( 77), UINT8_C(233), UINT8_C(116), UINT8_C(193), UINT8_C(178), UINT8_C( 22) }, { UINT8_C( 50), UINT8_C( 13), UINT8_C(135), UINT8_C(198), UINT8_C( 84), UINT8_C(176), UINT8_C(165), UINT8_C(183), UINT8_C( 40), UINT8_C( 6), UINT8_C(184), UINT8_C(245), UINT8_C(217), UINT8_C( 54), UINT8_C( 53), UINT8_C(200) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vpaddq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; } static int test_simde_vpaddq_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C( 9349), UINT16_C(24087), UINT16_C(49567), UINT16_C( 4999), UINT16_C( 2307), UINT16_C(62434), UINT16_C(18810), UINT16_C(11356) }, { UINT16_C(14700), UINT16_C(56484), UINT16_C(45907), UINT16_C(11347), UINT16_C(51476), UINT16_C(16753), UINT16_C( 2394), UINT16_C(57376) }, { UINT16_C(33436), UINT16_C(54566), UINT16_C(64741), UINT16_C(30166), UINT16_C( 5648), UINT16_C(57254), UINT16_C( 2693), UINT16_C(59770) } }, { { UINT16_C(14125), UINT16_C(52542), UINT16_C(50680), UINT16_C(64736), UINT16_C(49871), UINT16_C(18927), UINT16_C(19211), UINT16_C(30837) }, { UINT16_C( 6532), UINT16_C(55124), UINT16_C(42957), UINT16_C(57604), UINT16_C(30064), UINT16_C(52002), UINT16_C(17022), UINT16_C(43947) }, { UINT16_C( 1131), UINT16_C(49880), UINT16_C( 3262), UINT16_C(50048), UINT16_C(61656), UINT16_C(35025), UINT16_C(16530), UINT16_C(60969) } }, { { UINT16_C(59770), UINT16_C(29304), UINT16_C(22702), UINT16_C(32110), UINT16_C(23835), UINT16_C( 9926), UINT16_C(15272), UINT16_C(11678) }, { UINT16_C(62293), UINT16_C( 8708), UINT16_C( 2202), UINT16_C( 2819), UINT16_C( 9597), UINT16_C(64470), UINT16_C(33128), UINT16_C(58023) }, { UINT16_C(23538), UINT16_C(54812), UINT16_C(33761), UINT16_C(26950), UINT16_C( 5465), UINT16_C( 5021), UINT16_C( 8531), UINT16_C(25615) } }, { { UINT16_C( 8042), UINT16_C( 6228), UINT16_C(50040), UINT16_C(37782), UINT16_C(23584), UINT16_C(51641), UINT16_C(22680), UINT16_C(60918) }, { UINT16_C(64075), UINT16_C(58639), UINT16_C( 4611), UINT16_C(33008), UINT16_C(50743), UINT16_C(40828), UINT16_C( 9031), UINT16_C(45441) }, { UINT16_C(14270), UINT16_C(22286), UINT16_C( 9689), UINT16_C(18062), UINT16_C(57178), UINT16_C(37619), UINT16_C(26035), UINT16_C(54472) } }, { { UINT16_C(54850), UINT16_C(47818), UINT16_C(24729), UINT16_C(47437), UINT16_C( 1980), UINT16_C(21634), UINT16_C(30815), UINT16_C(43585) }, { UINT16_C(20595), UINT16_C(30351), UINT16_C(32866), UINT16_C(39670), UINT16_C(29254), UINT16_C(36409), UINT16_C(48021), UINT16_C(55359) }, { UINT16_C(37132), UINT16_C( 6630), UINT16_C(23614), UINT16_C( 8864), UINT16_C(50946), UINT16_C( 7000), UINT16_C( 127), UINT16_C(37844) } }, { { UINT16_C( 2449), UINT16_C(10898), UINT16_C(57449), UINT16_C( 9955), UINT16_C(26343), UINT16_C(18042), UINT16_C(48350), UINT16_C(20976) }, { UINT16_C(32524), UINT16_C(28615), UINT16_C(48895), UINT16_C(17929), UINT16_C(16944), UINT16_C(50900), UINT16_C( 5117), UINT16_C(36510) }, { UINT16_C(13347), UINT16_C( 1868), UINT16_C(44385), UINT16_C( 3790), UINT16_C(61139), UINT16_C( 1288), UINT16_C( 2308), UINT16_C(41627) } }, { { UINT16_C(12317), UINT16_C(34488), UINT16_C(39952), UINT16_C(63404), UINT16_C( 9986), UINT16_C(57405), UINT16_C(11747), UINT16_C(61234) }, { UINT16_C(63917), UINT16_C(44126), UINT16_C(26551), UINT16_C(59634), UINT16_C(50858), UINT16_C(42926), UINT16_C(19674), UINT16_C(63286) }, { UINT16_C(46805), UINT16_C(37820), UINT16_C( 1855), UINT16_C( 7445), UINT16_C(42507), UINT16_C(20649), UINT16_C(28248), UINT16_C(17424) } }, { { UINT16_C(61052), UINT16_C(36221), UINT16_C(10890), UINT16_C(35972), UINT16_C(49745), UINT16_C(13421), UINT16_C(40943), UINT16_C(39971) }, { UINT16_C(33432), UINT16_C(20553), UINT16_C(15337), UINT16_C(37688), UINT16_C(58882), UINT16_C(56379), UINT16_C(28978), UINT16_C(44755) }, { UINT16_C(31737), UINT16_C(46862), UINT16_C(63166), UINT16_C(15378), UINT16_C(53985), UINT16_C(53025), UINT16_C(49725), UINT16_C( 8197) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vpaddq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_vpaddq_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(3816938315), UINT32_C(2809583990), UINT32_C(3278927444), UINT32_C(3055051205) }, { UINT32_C(1596983728), UINT32_C( 356509708), UINT32_C(2588010953), UINT32_C( 637551579) }, { UINT32_C(2331555009), UINT32_C(2039011353), UINT32_C(1953493436), UINT32_C(3225562532) } }, { { UINT32_C(2500428063), UINT32_C(2939977819), UINT32_C(3144854774), UINT32_C(3195112206) }, { UINT32_C(2686362004), UINT32_C(1404394889), UINT32_C( 569243206), UINT32_C(1497951802) }, { UINT32_C(1145438586), UINT32_C(2044999684), UINT32_C(4090756893), UINT32_C(2067195008) } }, { { UINT32_C(3404616047), UINT32_C(3346672337), UINT32_C(3833785558), UINT32_C( 195294071) }, { UINT32_C( 514572693), UINT32_C(1685152030), UINT32_C(2441502551), UINT32_C(3169504845) }, { UINT32_C(2456321088), UINT32_C(4029079629), UINT32_C(2199724723), UINT32_C(1316040100) } }, { { UINT32_C(4052211743), UINT32_C(3652714754), UINT32_C(1706900461), UINT32_C(3278921774) }, { UINT32_C(1071782945), UINT32_C(3567539069), UINT32_C(4284885682), UINT32_C( 398217464) }, { UINT32_C(3409959201), UINT32_C( 690854939), UINT32_C( 344354718), UINT32_C( 388135850) } }, { { UINT32_C( 721961769), UINT32_C( 822395204), UINT32_C( 714523388), UINT32_C(1156450082) }, { UINT32_C(2692993059), UINT32_C(3597936419), UINT32_C(1238751825), UINT32_C(1415680299) }, { UINT32_C(1544356973), UINT32_C(1870973470), UINT32_C(1995962182), UINT32_C(2654432124) } }, { { UINT32_C( 411003348), UINT32_C( 642417706), UINT32_C(1750196294), UINT32_C( 179060711) }, { UINT32_C( 850014223), UINT32_C(2835881815), UINT32_C( 619896569), UINT32_C(1148736367) }, { UINT32_C(1053421054), UINT32_C(1929257005), UINT32_C(3685896038), UINT32_C(1768632936) } }, { { UINT32_C(3881629885), UINT32_C(3255740028), UINT32_C(1848270727), UINT32_C(2910443422) }, { UINT32_C(1591681799), UINT32_C(1007151170), UINT32_C( 895548102), UINT32_C( 175757645) }, { UINT32_C(2842402617), UINT32_C( 463746853), UINT32_C(2598832969), UINT32_C(1071305747) } }, { { UINT32_C(1307760337), UINT32_C( 51314812), UINT32_C(4252121439), UINT32_C( 397077264) }, { UINT32_C(1366722830), UINT32_C( 932019569), UINT32_C(3312315767), UINT32_C(2546984646) }, { UINT32_C(1359075149), UINT32_C( 354231407), UINT32_C(2298742399), UINT32_C(1564333117) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vpaddq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_vpaddq_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; uint64_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C(15513978132590441720), UINT64_C(18412383231435127934) }, { UINT64_C(15173798401319091866), UINT64_C( 7554486488907008214) }, { UINT64_C(15479617290316018038), UINT64_C( 4281540816516548464) } }, { { UINT64_C( 5179969466727272737), UINT64_C( 8407314346400738990) }, { UINT64_C(12652720048107532165), UINT64_C( 8114699618393940678) }, { UINT64_C(13587283813128011727), UINT64_C( 2320675592791921227) } }, { { UINT64_C(16381383177064366917), UINT64_C(16472263491434092619) }, { UINT64_C(14299563238393217771), UINT64_C( 5432072351392715171) }, { UINT64_C(14406902594788907920), UINT64_C( 1284891516076381326) } }, { { UINT64_C( 9652814040289369564), UINT64_C(12432824220313617735) }, { UINT64_C( 1769037749118055622), UINT64_C(18231816079301838771) }, { UINT64_C( 3638894186893435683), UINT64_C( 1554109754710342777) } }, { { UINT64_C( 6055094155079540041), UINT64_C(13544723167905034890) }, { UINT64_C(18322346746262415545), UINT64_C( 410596935221072021) }, { UINT64_C( 1153073249275023315), UINT64_C( 286199607773935950) } }, { { UINT64_C( 5799106650900578584), UINT64_C(13038928049068537154) }, { UINT64_C( 1676510600800400891), UINT64_C(14860611816019327865) }, { UINT64_C( 391290626259564122), UINT64_C(16537122416819728756) } }, { { UINT64_C(10905306329890735169), UINT64_C( 9336457387018547939) }, { UINT64_C( 1644108111038346295), UINT64_C( 6884746120955469318) }, { UINT64_C( 1795019643199731492), UINT64_C( 8528854231993815613) } }, { { UINT64_C(15003669511904067404), UINT64_C( 51580054159558319) }, { UINT64_C(10405158514773884595), UINT64_C( 9775019265358672682) }, { UINT64_C(15055249566063625723), UINT64_C( 1733433706423005661) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_uint64x2_t r = simde_vpaddq_u64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vpadd_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vpadd_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vpadd_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vpadd_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vpadd_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vpadd_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vpadd_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vpaddq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vpaddq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vpaddq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vpaddq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vpaddq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vpaddq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vpaddq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vpaddq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vpaddq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vpaddq_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/paddl.c000066400000000000000000000626601400333146700165610ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN paddl #include "test-neon.h" #include static int test_simde_vpaddl_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int16_t r[4]; } test_vec[] = { { { INT8_C( 37), INT8_C( 75), -INT8_C( 87), -INT8_C( 55), -INT8_C( 42), INT8_C( 118), INT8_C( 42), INT8_C( 29) }, { INT16_C( 112), -INT16_C( 142), INT16_C( 76), INT16_C( 71) } }, { { -INT8_C( 107), INT8_C( 110), INT8_C( 20), INT8_C( 7), INT8_C( 86), -INT8_C( 94), -INT8_C( 118), -INT8_C( 25) }, { INT16_C( 3), INT16_C( 27), -INT16_C( 8), -INT16_C( 143) } }, { { INT8_C( 39), -INT8_C( 93), -INT8_C( 18), INT8_C( 40), INT8_C( 38), INT8_C( 46), INT8_C( 10), INT8_C( 77) }, { -INT16_C( 54), INT16_C( 22), INT16_C( 84), INT16_C( 87) } }, { { INT8_C( 36), -INT8_C( 29), -INT8_C( 5), INT8_C( 78), -INT8_C( 115), -INT8_C( 27), -INT8_C( 88), -INT8_C( 78) }, { INT16_C( 7), INT16_C( 73), -INT16_C( 142), -INT16_C( 166) } }, { { INT8_C( 48), INT8_C( 82), INT8_C( 123), INT8_C( 7), -INT8_C( 56), -INT8_C( 90), INT8_C( 36), INT8_C( 93) }, { INT16_C( 130), INT16_C( 130), -INT16_C( 146), INT16_C( 129) } }, { { INT8_C( 20), INT8_C( 57), INT8_C( 100), INT8_C( 106), -INT8_C( 37), -INT8_C( 17), INT8_C( 81), INT8_C( 2) }, { INT16_C( 77), INT16_C( 206), -INT16_C( 54), INT16_C( 83) } }, { { -INT8_C( 110), INT8_C( 63), INT8_C( 43), -INT8_C( 71), INT8_C( 110), INT8_C( 53), INT8_C( 6), -INT8_C( 110) }, { -INT16_C( 47), -INT16_C( 28), INT16_C( 163), -INT16_C( 104) } }, { { INT8_C( 25), INT8_C( 1), -INT8_C( 32), -INT8_C( 90), -INT8_C( 26), -INT8_C( 119), INT8_C( 89), INT8_C( 23) }, { INT16_C( 26), -INT16_C( 122), -INT16_C( 145), INT16_C( 112) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int16x4_t r = simde_vpaddl_s8(a); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; } static int test_simde_vpaddl_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int32_t r[2]; } test_vec[] = { { { -INT16_C( 28169), INT16_C( 12604), INT16_C( 18912), INT16_C( 16948) }, { -INT32_C( 15565), INT32_C( 35860) } }, { { -INT16_C( 23339), INT16_C( 4117), -INT16_C( 5015), INT16_C( 18679) }, { -INT32_C( 19222), INT32_C( 13664) } }, { { -INT16_C( 21626), INT16_C( 28835), -INT16_C( 8629), -INT16_C( 31563) }, { INT32_C( 7209), -INT32_C( 40192) } }, { { -INT16_C( 13057), -INT16_C( 10221), -INT16_C( 5231), -INT16_C( 30631) }, { -INT32_C( 23278), -INT32_C( 35862) } }, { { -INT16_C( 27011), INT16_C( 23994), -INT16_C( 4385), -INT16_C( 19297) }, { -INT32_C( 3017), -INT32_C( 23682) } }, { { -INT16_C( 19054), -INT16_C( 828), -INT16_C( 17247), INT16_C( 10052) }, { -INT32_C( 19882), -INT32_C( 7195) } }, { { -INT16_C( 6297), -INT16_C( 19560), INT16_C( 19909), -INT16_C( 15305) }, { -INT32_C( 25857), INT32_C( 4604) } }, { { INT16_C( 18970), -INT16_C( 21604), -INT16_C( 2506), -INT16_C( 19660) }, { -INT32_C( 2634), -INT32_C( 22166) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int32x2_t r = simde_vpaddl_s16(a); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; } static int test_simde_vpaddl_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int64_t r[1]; } test_vec[] = { { { -INT32_C( 1059767371), -INT32_C( 8047219) }, { -INT64_C( 1067814590) } }, { { INT32_C( 656763255), -INT32_C( 226202987) }, { INT64_C( 430560268) } }, { { -INT32_C( 2065914833), INT32_C( 230074665) }, { -INT64_C( 1835840168) } }, { { -INT32_C( 2040001686), -INT32_C( 611631322) }, { -INT64_C( 2651633008) } }, { { INT32_C( 127623290), INT32_C( 218505621) }, { INT64_C( 346128911) } }, { { INT32_C( 523512714), -INT32_C( 988694379) }, { -INT64_C( 465181665) } }, { { -INT32_C( 2058752676), INT32_C( 9699222) }, { -INT64_C( 2049053454) } }, { { INT32_C( 814152458), -INT32_C( 1274343110) }, { -INT64_C( 460190652) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int64x1_t r = simde_vpaddl_s32(a); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; } static int test_simde_vpaddl_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; uint16_t r[4]; } test_vec[] = { { { UINT8_C( 70), UINT8_C(221), UINT8_C(124), UINT8_C(147), UINT8_C(112), UINT8_C(157), UINT8_C(115), UINT8_C( 88) }, { UINT16_C( 291), UINT16_C( 271), UINT16_C( 269), UINT16_C( 203) } }, { { UINT8_C(174), UINT8_C( 77), UINT8_C(253), UINT8_C(160), UINT8_C(176), UINT8_C(227), UINT8_C(112), UINT8_C(129) }, { UINT16_C( 251), UINT16_C( 413), UINT16_C( 403), UINT16_C( 241) } }, { { UINT8_C(169), UINT8_C(158), UINT8_C(189), UINT8_C(216), UINT8_C(189), UINT8_C( 90), UINT8_C(193), UINT8_C(141) }, { UINT16_C( 327), UINT16_C( 405), UINT16_C( 279), UINT16_C( 334) } }, { { UINT8_C( 21), UINT8_C(153), UINT8_C( 3), UINT8_C( 46), UINT8_C( 35), UINT8_C( 5), UINT8_C( 6), UINT8_C(106) }, { UINT16_C( 174), UINT16_C( 49), UINT16_C( 40), UINT16_C( 112) } }, { { UINT8_C(227), UINT8_C(130), UINT8_C(253), UINT8_C( 83), UINT8_C( 32), UINT8_C(112), UINT8_C(172), UINT8_C(206) }, { UINT16_C( 357), UINT16_C( 336), UINT16_C( 144), UINT16_C( 378) } }, { { UINT8_C(189), UINT8_C(169), UINT8_C(111), UINT8_C(109), UINT8_C(141), UINT8_C(223), UINT8_C(239), UINT8_C( 54) }, { UINT16_C( 358), UINT16_C( 220), UINT16_C( 364), UINT16_C( 293) } }, { { UINT8_C(125), UINT8_C(172), UINT8_C( 14), UINT8_C( 59), UINT8_C( 6), UINT8_C(207), UINT8_C(200), UINT8_C( 27) }, { UINT16_C( 297), UINT16_C( 73), UINT16_C( 213), UINT16_C( 227) } }, { { UINT8_C(104), UINT8_C(203), UINT8_C( 73), UINT8_C(140), UINT8_C(209), UINT8_C( 80), UINT8_C(246), UINT8_C(180) }, { UINT16_C( 307), UINT16_C( 213), UINT16_C( 289), UINT16_C( 426) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint16x4_t r = simde_vpaddl_u8(a); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; } static int test_simde_vpaddl_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint32_t r[2]; } test_vec[] = { { { UINT16_C(44535), UINT16_C(20080), UINT16_C( 7327), UINT16_C( 723) }, { UINT32_C( 64615), UINT32_C( 8050) } }, { { UINT16_C(26434), UINT16_C(28458), UINT16_C(59172), UINT16_C( 7820) }, { UINT32_C( 54892), UINT32_C( 66992) } }, { { UINT16_C(24764), UINT16_C(43747), UINT16_C(19848), UINT16_C(37555) }, { UINT32_C( 68511), UINT32_C( 57403) } }, { { UINT16_C(24368), UINT16_C(46049), UINT16_C( 4980), UINT16_C(27626) }, { UINT32_C( 70417), UINT32_C( 32606) } }, { { UINT16_C(23233), UINT16_C(24762), UINT16_C(36215), UINT16_C(47458) }, { UINT32_C( 47995), UINT32_C( 83673) } }, { { UINT16_C(36340), UINT16_C( 6184), UINT16_C(46196), UINT16_C(12342) }, { UINT32_C( 42524), UINT32_C( 58538) } }, { { UINT16_C( 6421), UINT16_C(40411), UINT16_C(36455), UINT16_C(38703) }, { UINT32_C( 46832), UINT32_C( 75158) } }, { { UINT16_C( 4333), UINT16_C(24906), UINT16_C(13348), UINT16_C(58828) }, { UINT32_C( 29239), UINT32_C( 72176) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint32x2_t r = simde_vpaddl_u16(a); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; } static int test_simde_vpaddl_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint64_t r[1]; } test_vec[] = { { { UINT32_C(3644053684), UINT32_C(4206572357) }, { UINT64_C( 7850626041) } }, { { UINT32_C( 281834052), UINT32_C(1790485039) }, { UINT64_C( 2072319091) } }, { { UINT32_C( 645429519), UINT32_C(1131238590) }, { UINT64_C( 1776668109) } }, { { UINT32_C(2329321534), UINT32_C( 551106155) }, { UINT64_C( 2880427689) } }, { { UINT32_C(1425607950), UINT32_C(2236527936) }, { UINT64_C( 3662135886) } }, { { UINT32_C(1452612135), UINT32_C(3435155132) }, { UINT64_C( 4887767267) } }, { { UINT32_C(2247243975), UINT32_C(3418906509) }, { UINT64_C( 5666150484) } }, { { UINT32_C(1800838656), UINT32_C(3817549780) }, { UINT64_C( 5618388436) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint64x1_t r = simde_vpaddl_u32(a); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; } static int test_simde_vpaddlq_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int16_t r[8]; } test_vec[] = { { { -INT8_C( 28), INT8_C( 34), -INT8_C( 114), INT8_C( 121), -INT8_C( 18), -INT8_C( 116), INT8_C( 4), INT8_C( 105), INT8_C( 15), INT8_C( 2), -INT8_C( 99), INT8_C( 37), INT8_C( 35), INT8_C( 11), INT8_C( 104), -INT8_C( 83) }, { INT16_C( 6), INT16_C( 7), -INT16_C( 134), INT16_C( 109), INT16_C( 17), -INT16_C( 62), INT16_C( 46), INT16_C( 21) } }, { { -INT8_C( 4), -INT8_C( 12), INT8_C( 98), -INT8_C( 51), -INT8_C( 63), INT8_C( 25), -INT8_C( 83), INT8_C( 12), -INT8_C( 79), INT8_C( 69), -INT8_C( 116), -INT8_C( 7), -INT8_C( 32), INT8_C( 114), INT8_C( 30), -INT8_C( 60) }, { -INT16_C( 16), INT16_C( 47), -INT16_C( 38), -INT16_C( 71), -INT16_C( 10), -INT16_C( 123), INT16_C( 82), -INT16_C( 30) } }, { { -INT8_C( 108), -INT8_C( 83), INT8_C( 62), -INT8_C( 125), INT8_C( 57), INT8_C( 66), -INT8_C( 20), INT8_C( 72), INT8_C( 68), -INT8_C( 119), INT8_C( 110), INT8_C( 103), -INT8_C( 108), -INT8_C( 42), INT8_C( 21), -INT8_C( 112) }, { -INT16_C( 191), -INT16_C( 63), INT16_C( 123), INT16_C( 52), -INT16_C( 51), INT16_C( 213), -INT16_C( 150), -INT16_C( 91) } }, { { -INT8_C( 53), INT8_C( 119), INT8_C( 93), -INT8_C( 116), -INT8_C( 112), INT8_C( 10), -INT8_C( 103), INT8_C( 65), INT8_C( 80), INT8_C( 37), INT8_C( 59), INT8_C( 48), -INT8_C( 105), INT8_C( 89), -INT8_C( 12), INT8_C( 43) }, { INT16_C( 66), -INT16_C( 23), -INT16_C( 102), -INT16_C( 38), INT16_C( 117), INT16_C( 107), -INT16_C( 16), INT16_C( 31) } }, { { INT8_C( 6), INT8_C( 50), -INT8_C( 82), INT8_C( 64), INT8_C( 116), -INT8_C( 102), -INT8_C( 120), -INT8_C( 72), INT8_C( 35), -INT8_C( 10), INT8_C( 32), -INT8_C( 73), -INT8_C( 51), INT8_C( 53), INT8_C( 71), -INT8_C( 104) }, { INT16_C( 56), -INT16_C( 18), INT16_C( 14), -INT16_C( 192), INT16_C( 25), -INT16_C( 41), INT16_C( 2), -INT16_C( 33) } }, { { -INT8_C( 84), -INT8_C( 92), INT8_C( 36), INT8_C( 60), -INT8_C( 81), -INT8_C( 67), INT8_C( 126), -INT8_C( 1), -INT8_C( 30), -INT8_C( 71), INT8_C( 47), INT8_C( 121), INT8_C( 18), INT8_C( 35), -INT8_C( 91), INT8_C( 25) }, { -INT16_C( 176), INT16_C( 96), -INT16_C( 148), INT16_C( 125), -INT16_C( 101), INT16_C( 168), INT16_C( 53), -INT16_C( 66) } }, { { INT8_C( 86), INT8_C( 83), INT8_C( 89), -INT8_C( 54), -INT8_C( 18), -INT8_C( 31), -INT8_C( 125), INT8_C( 17), -INT8_C( 40), -INT8_C( 93), -INT8_C( 55), -INT8_C( 91), -INT8_C( 40), INT8_C( 16), INT8_C( 61), -INT8_C( 124) }, { INT16_C( 169), INT16_C( 35), -INT16_C( 49), -INT16_C( 108), -INT16_C( 133), -INT16_C( 146), -INT16_C( 24), -INT16_C( 63) } }, { { -INT8_C( 75), INT8_C( 97), -INT8_C( 64), INT8_C( 100), INT8_C( 31), INT8_C( 62), INT8_C( 99), INT8_C( 1), -INT8_C( 9), -INT8_C( 110), INT8_C( 123), INT8_C( 10), -INT8_C( 75), INT8_C( 32), INT8_C( 35), INT8_C( 11) }, { INT16_C( 22), INT16_C( 36), INT16_C( 93), INT16_C( 100), -INT16_C( 119), INT16_C( 133), -INT16_C( 43), INT16_C( 46) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int16x8_t r = simde_vpaddlq_s8(a); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; } static int test_simde_vpaddlq_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int32_t r[4]; } test_vec[] = { { { -INT16_C( 31604), -INT16_C( 11562), INT16_C( 27564), -INT16_C( 22459), -INT16_C( 21679), -INT16_C( 5440), INT16_C( 8433), INT16_C( 11191) }, { -INT32_C( 43166), INT32_C( 5105), -INT32_C( 27119), INT32_C( 19624) } }, { { -INT16_C( 22772), -INT16_C( 17814), INT16_C( 14348), INT16_C( 5693), -INT16_C( 20501), INT16_C( 16667), INT16_C( 4751), INT16_C( 6915) }, { -INT32_C( 40586), INT32_C( 20041), -INT32_C( 3834), INT32_C( 11666) } }, { { -INT16_C( 9578), INT16_C( 17133), INT16_C( 12869), -INT16_C( 26901), -INT16_C( 21539), -INT16_C( 12672), INT16_C( 14283), -INT16_C( 9991) }, { INT32_C( 7555), -INT32_C( 14032), -INT32_C( 34211), INT32_C( 4292) } }, { { INT16_C( 25822), -INT16_C( 5230), -INT16_C( 12388), -INT16_C( 30975), INT16_C( 7294), INT16_C( 3529), -INT16_C( 13266), -INT16_C( 15320) }, { INT32_C( 20592), -INT32_C( 43363), INT32_C( 10823), -INT32_C( 28586) } }, { { INT16_C( 5798), -INT16_C( 5369), -INT16_C( 3512), INT16_C( 9857), INT16_C( 669), INT16_C( 27124), -INT16_C( 4551), INT16_C( 6209) }, { INT32_C( 429), INT32_C( 6345), INT32_C( 27793), INT32_C( 1658) } }, { { -INT16_C( 11438), -INT16_C( 4605), INT16_C( 1186), INT16_C( 8309), INT16_C( 15905), INT16_C( 20270), INT16_C( 22027), -INT16_C( 20204) }, { -INT32_C( 16043), INT32_C( 9495), INT32_C( 36175), INT32_C( 1823) } }, { { INT16_C( 7020), -INT16_C( 19043), INT16_C( 7693), -INT16_C( 21797), -INT16_C( 12512), INT16_C( 23059), INT16_C( 21693), INT16_C( 3954) }, { -INT32_C( 12023), -INT32_C( 14104), INT32_C( 10547), INT32_C( 25647) } }, { { INT16_C( 29991), -INT16_C( 13827), INT16_C( 29561), -INT16_C( 25878), INT16_C( 6321), -INT16_C( 17174), -INT16_C( 402), -INT16_C( 9362) }, { INT32_C( 16164), INT32_C( 3683), -INT32_C( 10853), -INT32_C( 9764) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int32x4_t r = simde_vpaddlq_s16(a); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; } static int test_simde_vpaddlq_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int64_t r[2]; } test_vec[] = { { { -INT32_C( 179405244), -INT32_C( 1105081452), -INT32_C( 1366598320), -INT32_C( 141294459) }, { -INT64_C( 1284486696), -INT64_C( 1507892779) } }, { { INT32_C( 1123194590), -INT32_C( 163436825), -INT32_C( 140019302), -INT32_C( 70761545) }, { INT64_C( 959757765), -INT64_C( 210780847) } }, { { INT32_C( 1441863361), INT32_C( 840176353), -INT32_C( 186605713), -INT32_C( 2115210077) }, { INT64_C( 2282039714), -INT64_C( 2301815790) } }, { { -INT32_C( 222044661), -INT32_C( 1628961276), INT32_C( 915771263), -INT32_C( 1808638254) }, { -INT64_C( 1851005937), -INT64_C( 892866991) } }, { { INT32_C( 1458119540), -INT32_C( 1517748939), INT32_C( 1083795613), -INT32_C( 389904931) }, { -INT64_C( 59629399), INT64_C( 693890682) } }, { { INT32_C( 1759151460), INT32_C( 185057931), INT32_C( 591502417), INT32_C( 1874293754) }, { INT64_C( 1944209391), INT64_C( 2465796171) } }, { { -INT32_C( 859463274), INT32_C( 997281182), -INT32_C( 1837364555), -INT32_C( 193315184) }, { INT64_C( 137817908), -INT64_C( 2030679739) } }, { { INT32_C( 1331451075), INT32_C( 1733976854), -INT32_C( 91514112), -INT32_C( 1519828465) }, { INT64_C( 3065427929), -INT64_C( 1611342577) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int64x2_t r = simde_vpaddlq_s32(a); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; } static int test_simde_vpaddlq_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint16_t r[8]; } test_vec[] = { { { UINT8_C( 15), UINT8_C(158), UINT8_C(165), UINT8_C( 64), UINT8_C(119), UINT8_C(178), UINT8_C(200), UINT8_C(231), UINT8_C( 24), UINT8_C(237), UINT8_C( 46), UINT8_C(127), UINT8_C(140), UINT8_C(113), UINT8_C(193), UINT8_C(161) }, { UINT16_C( 173), UINT16_C( 229), UINT16_C( 297), UINT16_C( 431), UINT16_C( 261), UINT16_C( 173), UINT16_C( 253), UINT16_C( 354) } }, { { UINT8_C(221), UINT8_C( 70), UINT8_C(171), UINT8_C(120), UINT8_C(167), UINT8_C(165), UINT8_C(178), UINT8_C( 1), UINT8_C(226), UINT8_C( 25), UINT8_C( 93), UINT8_C(232), UINT8_C( 92), UINT8_C(136), UINT8_C( 14), UINT8_C(108) }, { UINT16_C( 291), UINT16_C( 291), UINT16_C( 332), UINT16_C( 179), UINT16_C( 251), UINT16_C( 325), UINT16_C( 228), UINT16_C( 122) } }, { { UINT8_C( 38), UINT8_C(180), UINT8_C(172), UINT8_C(157), UINT8_C(102), UINT8_C(117), UINT8_C(133), UINT8_C(127), UINT8_C( 98), UINT8_C(179), UINT8_C(254), UINT8_C(238), UINT8_C( 36), UINT8_C(191), UINT8_C(144), UINT8_C( 1) }, { UINT16_C( 218), UINT16_C( 329), UINT16_C( 219), UINT16_C( 260), UINT16_C( 277), UINT16_C( 492), UINT16_C( 227), UINT16_C( 145) } }, { { UINT8_C( 6), UINT8_C( 59), UINT8_C(121), UINT8_C(173), UINT8_C(225), UINT8_C( 44), UINT8_C(174), UINT8_C(195), UINT8_C( 69), UINT8_C( 11), UINT8_C(171), UINT8_C(162), UINT8_C(147), UINT8_C(186), UINT8_C( 14), UINT8_C(186) }, { UINT16_C( 65), UINT16_C( 294), UINT16_C( 269), UINT16_C( 369), UINT16_C( 80), UINT16_C( 333), UINT16_C( 333), UINT16_C( 200) } }, { { UINT8_C(110), UINT8_C(186), UINT8_C( 87), UINT8_C(212), UINT8_C( 47), UINT8_C(220), UINT8_C( 83), UINT8_C(145), UINT8_C(143), UINT8_C( 82), UINT8_C(128), UINT8_C(180), UINT8_C( 17), UINT8_C( 16), UINT8_C(181), UINT8_C( 23) }, { UINT16_C( 296), UINT16_C( 299), UINT16_C( 267), UINT16_C( 228), UINT16_C( 225), UINT16_C( 308), UINT16_C( 33), UINT16_C( 204) } }, { { UINT8_C( 75), UINT8_C( 47), UINT8_C(196), UINT8_C( 44), UINT8_C( 91), UINT8_C(114), UINT8_C(239), UINT8_C(160), UINT8_C(126), UINT8_C(155), UINT8_C( 66), UINT8_C( 17), UINT8_C( 85), UINT8_C( 80), UINT8_C(203), UINT8_C(195) }, { UINT16_C( 122), UINT16_C( 240), UINT16_C( 205), UINT16_C( 399), UINT16_C( 281), UINT16_C( 83), UINT16_C( 165), UINT16_C( 398) } }, { { UINT8_C( 11), UINT8_C( 35), UINT8_C(151), UINT8_C( 58), UINT8_MAX, UINT8_C(235), UINT8_C(204), UINT8_C(143), UINT8_C( 61), UINT8_C( 76), UINT8_C( 67), UINT8_C( 78), UINT8_C( 92), UINT8_C(248), UINT8_C(102), UINT8_C(167) }, { UINT16_C( 46), UINT16_C( 209), UINT16_C( 490), UINT16_C( 347), UINT16_C( 137), UINT16_C( 145), UINT16_C( 340), UINT16_C( 269) } }, { { UINT8_C( 39), UINT8_C( 42), UINT8_C(212), UINT8_C(130), UINT8_C(157), UINT8_C(195), UINT8_C( 35), UINT8_C( 27), UINT8_C( 94), UINT8_C(101), UINT8_C( 44), UINT8_C(179), UINT8_C(182), UINT8_C(248), UINT8_C(118), UINT8_C(193) }, { UINT16_C( 81), UINT16_C( 342), UINT16_C( 352), UINT16_C( 62), UINT16_C( 195), UINT16_C( 223), UINT16_C( 430), UINT16_C( 311) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint16x8_t r = simde_vpaddlq_u8(a); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_vpaddlq_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint32_t r[4]; } test_vec[] = { { { UINT16_C(45646), UINT16_C(37940), UINT16_C(24613), UINT16_C( 4010), UINT16_C(40572), UINT16_C(35232), UINT16_C(47266), UINT16_C(32358) }, { UINT32_C( 83586), UINT32_C( 28623), UINT32_C( 75804), UINT32_C( 79624) } }, { { UINT16_C(56173), UINT16_C(35384), UINT16_C(13142), UINT16_C(63375), UINT16_C(22379), UINT16_C(33653), UINT16_C(11990), UINT16_C( 9253) }, { UINT32_C( 91557), UINT32_C( 76517), UINT32_C( 56032), UINT32_C( 21243) } }, { { UINT16_C(23008), UINT16_C( 1720), UINT16_C(25273), UINT16_C(13845), UINT16_C(46592), UINT16_C(41663), UINT16_C( 9582), UINT16_C(56097) }, { UINT32_C( 24728), UINT32_C( 39118), UINT32_C( 88255), UINT32_C( 65679) } }, { { UINT16_C(22784), UINT16_C(22117), UINT16_C(62860), UINT16_C(63309), UINT16_C(49996), UINT16_C( 8826), UINT16_C(40945), UINT16_C(53831) }, { UINT32_C( 44901), UINT32_C( 126169), UINT32_C( 58822), UINT32_C( 94776) } }, { { UINT16_C(65528), UINT16_C(45784), UINT16_C(60770), UINT16_C(25320), UINT16_C(42915), UINT16_C( 4613), UINT16_C( 9932), UINT16_C(52461) }, { UINT32_C( 111312), UINT32_C( 86090), UINT32_C( 47528), UINT32_C( 62393) } }, { { UINT16_C(21375), UINT16_C( 2850), UINT16_C(28744), UINT16_C(37890), UINT16_C(31795), UINT16_C( 9399), UINT16_C(65052), UINT16_C( 5366) }, { UINT32_C( 24225), UINT32_C( 66634), UINT32_C( 41194), UINT32_C( 70418) } }, { { UINT16_C(52989), UINT16_C(24518), UINT16_C(44732), UINT16_C(24514), UINT16_C(51029), UINT16_C( 8561), UINT16_C(24557), UINT16_C(27885) }, { UINT32_C( 77507), UINT32_C( 69246), UINT32_C( 59590), UINT32_C( 52442) } }, { { UINT16_C( 4274), UINT16_C(64119), UINT16_C(31104), UINT16_C(45966), UINT16_C(17909), UINT16_C( 4567), UINT16_C(52803), UINT16_C(16678) }, { UINT32_C( 68393), UINT32_C( 77070), UINT32_C( 22476), UINT32_C( 69481) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint32x4_t r = simde_vpaddlq_u16(a); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_vpaddlq_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint64_t r[2]; } test_vec[] = { { { UINT32_C( 555135081), UINT32_C(1421955818), UINT32_C(2255016604), UINT32_C(2023953240) }, { UINT64_C( 1977090899), UINT64_C( 4278969844) } }, { { UINT32_C(1200506322), UINT32_C(1076302833), UINT32_C(1855454778), UINT32_C(2018949391) }, { UINT64_C( 2276809155), UINT64_C( 3874404169) } }, { { UINT32_C(1419340905), UINT32_C(1520982718), UINT32_C(2246119725), UINT32_C(4143809572) }, { UINT64_C( 2940323623), UINT64_C( 6389929297) } }, { { UINT32_C(3124595657), UINT32_C(3657065886), UINT32_C(1917293155), UINT32_C(3035274571) }, { UINT64_C( 6781661543), UINT64_C( 4952567726) } }, { { UINT32_C(3339223817), UINT32_C( 170045917), UINT32_C(3868132290), UINT32_C(1356631175) }, { UINT64_C( 3509269734), UINT64_C( 5224763465) } }, { { UINT32_C(3054115096), UINT32_C(3784246398), UINT32_C(3780368278), UINT32_C(2123775348) }, { UINT64_C( 6838361494), UINT64_C( 5904143626) } }, { { UINT32_C(2655362752), UINT32_C( 296249167), UINT32_C(4059510890), UINT32_C(3695301572) }, { UINT64_C( 2951611919), UINT64_C( 7754812462) } }, { { UINT32_C(1804815341), UINT32_C(3863814736), UINT32_C(1858642169), UINT32_C(2666290910) }, { UINT64_C( 5668630077), UINT64_C( 4524933079) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint64x2_t r = simde_vpaddlq_u32(a); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vpaddl_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vpaddl_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vpaddl_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vpaddl_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vpaddl_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vpaddl_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vpaddlq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vpaddlq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vpaddlq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vpaddlq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vpaddlq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vpaddlq_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/pmax.c000066400000000000000000001436211400333146700164370ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN pmax #include "test-neon.h" #include "../../../simde/arm/neon/pmax.h" static int test_simde_vpmax_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[2]; simde_float32 b[2]; simde_float32 r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( 316.14), SIMDE_FLOAT32_C( -317.76) }, { SIMDE_FLOAT32_C( -999.28), SIMDE_FLOAT32_C( -769.83) }, { SIMDE_FLOAT32_C( 316.14), SIMDE_FLOAT32_C( -769.83) } }, { { SIMDE_FLOAT32_C( 563.53), SIMDE_FLOAT32_C( 317.30) }, { SIMDE_FLOAT32_C( 849.20), SIMDE_FLOAT32_C( -796.72) }, { SIMDE_FLOAT32_C( 563.53), SIMDE_FLOAT32_C( 849.20) } }, { { SIMDE_FLOAT32_C( 615.10), SIMDE_FLOAT32_C( 119.52) }, { SIMDE_FLOAT32_C( 458.60), SIMDE_FLOAT32_C( 696.42) }, { SIMDE_FLOAT32_C( 615.10), SIMDE_FLOAT32_C( 696.42) } }, { { SIMDE_FLOAT32_C( -342.74), SIMDE_FLOAT32_C( 78.42) }, { SIMDE_FLOAT32_C( 362.08), SIMDE_FLOAT32_C( -518.00) }, { SIMDE_FLOAT32_C( 78.42), SIMDE_FLOAT32_C( 362.08) } }, { { SIMDE_FLOAT32_C( -470.51), SIMDE_FLOAT32_C( -628.98) }, { SIMDE_FLOAT32_C( 627.75), SIMDE_FLOAT32_C( -933.75) }, { SIMDE_FLOAT32_C( -470.51), SIMDE_FLOAT32_C( 627.75) } }, { { SIMDE_FLOAT32_C( -180.94), SIMDE_FLOAT32_C( 801.21) }, { SIMDE_FLOAT32_C( 206.11), SIMDE_FLOAT32_C( -537.70) }, { SIMDE_FLOAT32_C( 801.21), SIMDE_FLOAT32_C( 206.11) } }, { { SIMDE_FLOAT32_C( -275.82), SIMDE_FLOAT32_C( -533.06) }, { SIMDE_FLOAT32_C( 77.91), SIMDE_FLOAT32_C( 887.40) }, { SIMDE_FLOAT32_C( -275.82), SIMDE_FLOAT32_C( 887.40) } }, { { SIMDE_FLOAT32_C( 896.99), SIMDE_FLOAT32_C( -718.48) }, { SIMDE_FLOAT32_C( -622.01), SIMDE_FLOAT32_C( 213.13) }, { SIMDE_FLOAT32_C( 896.99), SIMDE_FLOAT32_C( 213.13) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x2_t r = simde_vpmax_f32(a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vpmax_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int8_t b[8]; int8_t r[8]; } test_vec[] = { { { INT8_C( 54), INT8_C( 72), INT8_C( 21), INT8_C( 4), -INT8_C( 44), -INT8_C( 85), -INT8_C( 47), INT8_C( 48) }, { -INT8_C( 97), INT8_C( 124), -INT8_C( 89), -INT8_C( 115), INT8_C( 72), -INT8_C( 78), INT8_C( 21), INT8_C( 9) }, { INT8_C( 72), INT8_C( 21), -INT8_C( 44), INT8_C( 48), INT8_C( 124), -INT8_C( 89), INT8_C( 72), INT8_C( 21) } }, { { INT8_C( 47), INT8_C( 79), INT8_C( 36), -INT8_C( 113), INT8_C( 55), -INT8_C( 81), INT8_C( 42), INT8_C( 63) }, { INT8_C( 81), -INT8_C( 48), INT8_C( 68), -INT8_C( 24), -INT8_C( 31), -INT8_C( 126), -INT8_C( 127), INT8_C( 23) }, { INT8_C( 79), INT8_C( 36), INT8_C( 55), INT8_C( 63), INT8_C( 81), INT8_C( 68), -INT8_C( 31), INT8_C( 23) } }, { { -INT8_C( 54), -INT8_C( 106), INT8_C( 28), -INT8_C( 98), INT8_C( 66), -INT8_C( 19), -INT8_C( 49), -INT8_C( 31) }, { INT8_C( 105), INT8_C( 118), INT8_C( 111), -INT8_C( 78), INT8_C( 41), -INT8_C( 124), -INT8_C( 69), INT8_C( 88) }, { -INT8_C( 54), INT8_C( 28), INT8_C( 66), -INT8_C( 31), INT8_C( 118), INT8_C( 111), INT8_C( 41), INT8_C( 88) } }, { { -INT8_C( 45), -INT8_C( 33), -INT8_C( 25), INT8_C( 10), -INT8_C( 113), INT8_C( 17), INT8_C( 73), -INT8_C( 32) }, { -INT8_C( 31), -INT8_C( 115), -INT8_C( 56), -INT8_C( 62), INT8_C( 15), INT8_C( 73), -INT8_C( 38), -INT8_C( 38) }, { -INT8_C( 33), INT8_C( 10), INT8_C( 17), INT8_C( 73), -INT8_C( 31), -INT8_C( 56), INT8_C( 73), -INT8_C( 38) } }, { { -INT8_C( 33), -INT8_C( 10), INT8_C( 120), INT8_C( 33), -INT8_C( 29), INT8_C( 71), INT8_C( 3), INT8_C( 76) }, { -INT8_C( 66), INT8_C( 114), -INT8_C( 2), -INT8_C( 25), -INT8_C( 10), -INT8_C( 70), INT8_C( 63), -INT8_C( 55) }, { -INT8_C( 10), INT8_C( 120), INT8_C( 71), INT8_C( 76), INT8_C( 114), -INT8_C( 2), -INT8_C( 10), INT8_C( 63) } }, { { -INT8_C( 103), INT8_C( 38), -INT8_C( 45), INT8_C( 40), INT8_C( 55), INT8_C( 28), INT8_C( 8), INT8_C( 24) }, { -INT8_C( 87), -INT8_C( 48), -INT8_C( 37), -INT8_C( 71), INT8_C( 25), -INT8_C( 75), -INT8_C( 109), -INT8_C( 7) }, { INT8_C( 38), INT8_C( 40), INT8_C( 55), INT8_C( 24), -INT8_C( 48), -INT8_C( 37), INT8_C( 25), -INT8_C( 7) } }, { { -INT8_C( 85), INT8_C( 11), INT8_C( 26), -INT8_C( 114), INT8_C( 83), INT8_C( 29), -INT8_C( 38), INT8_C( 17) }, { -INT8_C( 113), -INT8_C( 39), -INT8_C( 8), -INT8_C( 123), -INT8_C( 109), INT8_C( 55), INT8_C( 78), INT8_C( 44) }, { INT8_C( 11), INT8_C( 26), INT8_C( 83), INT8_C( 17), -INT8_C( 39), -INT8_C( 8), INT8_C( 55), INT8_C( 78) } }, { { INT8_C( 93), INT8_C( 33), INT8_C( 85), -INT8_C( 108), INT8_C( 61), INT8_C( 93), -INT8_C( 84), -INT8_C( 25) }, { INT8_C( 46), -INT8_C( 121), -INT8_C( 96), INT8_C( 71), INT8_C( 60), INT8_C( 51), INT8_C( 64), -INT8_C( 25) }, { INT8_C( 93), INT8_C( 85), INT8_C( 93), -INT8_C( 25), INT8_C( 46), INT8_C( 71), INT8_C( 60), INT8_C( 64) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vpmax_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; } static int test_simde_vpmax_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { { { -INT16_C( 21767), INT16_C( 30835), -INT16_C( 27635), INT16_C( 2734) }, { -INT16_C( 6484), -INT16_C( 29161), INT16_C( 17540), INT16_C( 14326) }, { INT16_C( 30835), INT16_C( 2734), -INT16_C( 6484), INT16_C( 17540) } }, { { INT16_C( 21571), INT16_C( 768), INT16_C( 17210), -INT16_C( 13076) }, { -INT16_C( 12895), INT16_C( 10560), INT16_C( 16348), -INT16_C( 10827) }, { INT16_C( 21571), INT16_C( 17210), INT16_C( 10560), INT16_C( 16348) } }, { { INT16_C( 10474), -INT16_C( 2226), -INT16_C( 836), INT16_C( 26625) }, { INT16_C( 6626), INT16_C( 26359), -INT16_C( 4771), -INT16_C( 24419) }, { INT16_C( 10474), INT16_C( 26625), INT16_C( 26359), -INT16_C( 4771) } }, { { -INT16_C( 25023), INT16_C( 31651), -INT16_C( 28447), -INT16_C( 32184) }, { -INT16_C( 30627), INT16_C( 15020), INT16_C( 25032), -INT16_C( 19953) }, { INT16_C( 31651), -INT16_C( 28447), INT16_C( 15020), INT16_C( 25032) } }, { { INT16_C( 23945), INT16_C( 17833), -INT16_C( 21926), INT16_C( 15534) }, { -INT16_C( 23101), INT16_C( 8355), INT16_C( 16530), -INT16_C( 11072) }, { INT16_C( 23945), INT16_C( 15534), INT16_C( 8355), INT16_C( 16530) } }, { { INT16_C( 25822), -INT16_C( 16305), -INT16_C( 26636), INT16_C( 20802) }, { -INT16_C( 4576), -INT16_C( 6005), -INT16_C( 25777), -INT16_C( 10086) }, { INT16_C( 25822), INT16_C( 20802), -INT16_C( 4576), -INT16_C( 10086) } }, { { INT16_C( 17400), INT16_C( 21022), -INT16_C( 13075), -INT16_C( 20081) }, { INT16_C( 12913), INT16_C( 977), -INT16_C( 28046), INT16_C( 20951) }, { INT16_C( 21022), -INT16_C( 13075), INT16_C( 12913), INT16_C( 20951) } }, { { INT16_C( 10230), -INT16_C( 5615), INT16_C( 21438), -INT16_C( 8645) }, { -INT16_C( 14526), -INT16_C( 28218), INT16_C( 24674), INT16_C( 23146) }, { INT16_C( 10230), INT16_C( 21438), -INT16_C( 14526), INT16_C( 24674) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_vpmax_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; } static int test_simde_vpmax_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { { { -INT32_C( 1806003294), INT32_C( 1534340244) }, { -INT32_C( 191415557), INT32_C( 1758153183) }, { INT32_C( 1534340244), INT32_C( 1758153183) } }, { { INT32_C( 2009572098), INT32_C( 834509304) }, { INT32_C( 1970830255), INT32_C( 642974083) }, { INT32_C( 2009572098), INT32_C( 1970830255) } }, { { INT32_C( 683322772), -INT32_C( 712823079) }, { INT32_C( 1204361832), INT32_C( 1722782819) }, { INT32_C( 683322772), INT32_C( 1722782819) } }, { { INT32_C( 819820088), -INT32_C( 1100899825) }, { -INT32_C( 1741367019), INT32_C( 1874757594) }, { INT32_C( 819820088), INT32_C( 1874757594) } }, { { INT32_C( 244807733), INT32_C( 249764774) }, { -INT32_C( 1722438603), INT32_C( 2046755905) }, { INT32_C( 249764774), INT32_C( 2046755905) } }, { { -INT32_C( 1985356678), -INT32_C( 1958212746) }, { -INT32_C( 1088193308), INT32_C( 942596611) }, { -INT32_C( 1958212746), INT32_C( 942596611) } }, { { INT32_C( 21480794), INT32_C( 370092768) }, { INT32_C( 414148055), -INT32_C( 460214679) }, { INT32_C( 370092768), INT32_C( 414148055) } }, { { INT32_C( 7158410), INT32_C( 713798981) }, { INT32_C( 904506929), -INT32_C( 345172080) }, { INT32_C( 713798981), INT32_C( 904506929) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_vpmax_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; } static int test_simde_vpmax_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; uint8_t b[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C(190), UINT8_C(197), UINT8_C( 74), UINT8_C(131), UINT8_C(236), UINT8_C(219), UINT8_C(181), UINT8_C( 31) }, { UINT8_C( 23), UINT8_C( 93), UINT8_C( 64), UINT8_C(225), UINT8_C(244), UINT8_C(226), UINT8_C(137), UINT8_C(241) }, { UINT8_C(197), UINT8_C(131), UINT8_C(236), UINT8_C(181), UINT8_C( 93), UINT8_C(225), UINT8_C(244), UINT8_C(241) } }, { { UINT8_C( 33), UINT8_C( 75), UINT8_C(214), UINT8_C( 8), UINT8_C( 10), UINT8_C(126), UINT8_C(228), UINT8_C(193) }, { UINT8_C( 62), UINT8_C(252), UINT8_C(231), UINT8_C(120), UINT8_C( 49), UINT8_C(141), UINT8_C(136), UINT8_C(240) }, { UINT8_C( 75), UINT8_C(214), UINT8_C(126), UINT8_C(228), UINT8_C(252), UINT8_C(231), UINT8_C(141), UINT8_C(240) } }, { { UINT8_C( 82), UINT8_C(210), UINT8_C(115), UINT8_C( 62), UINT8_C(173), UINT8_C( 40), UINT8_C( 93), UINT8_C(196) }, { UINT8_C(133), UINT8_C(157), UINT8_C(165), UINT8_C(121), UINT8_C(128), UINT8_C( 46), UINT8_C(106), UINT8_C(161) }, { UINT8_C(210), UINT8_C(115), UINT8_C(173), UINT8_C(196), UINT8_C(157), UINT8_C(165), UINT8_C(128), UINT8_C(161) } }, { { UINT8_C(122), UINT8_C( 64), UINT8_C(169), UINT8_C(132), UINT8_C(190), UINT8_C(141), UINT8_C( 69), UINT8_C(253) }, { UINT8_C(137), UINT8_C( 45), UINT8_C(117), UINT8_C(187), UINT8_C(186), UINT8_C(253), UINT8_C(171), UINT8_C( 12) }, { UINT8_C(122), UINT8_C(169), UINT8_C(190), UINT8_C(253), UINT8_C(137), UINT8_C(187), UINT8_C(253), UINT8_C(171) } }, { { UINT8_C(207), UINT8_C( 30), UINT8_C( 75), UINT8_C(124), UINT8_C( 70), UINT8_C(168), UINT8_C( 64), UINT8_C(203) }, { UINT8_C( 70), UINT8_C(229), UINT8_C( 68), UINT8_C(198), UINT8_C( 20), UINT8_C(174), UINT8_C(103), UINT8_C(142) }, { UINT8_C(207), UINT8_C(124), UINT8_C(168), UINT8_C(203), UINT8_C(229), UINT8_C(198), UINT8_C(174), UINT8_C(142) } }, { { UINT8_C(238), UINT8_C( 16), UINT8_C( 18), UINT8_C(173), UINT8_C(158), UINT8_C( 88), UINT8_C(170), UINT8_C( 39) }, { UINT8_C(133), UINT8_C( 31), UINT8_C(226), UINT8_C( 63), UINT8_C( 29), UINT8_C(141), UINT8_C( 75), UINT8_C(236) }, { UINT8_C(238), UINT8_C(173), UINT8_C(158), UINT8_C(170), UINT8_C(133), UINT8_C(226), UINT8_C(141), UINT8_C(236) } }, { { UINT8_C(171), UINT8_C(150), UINT8_C(105), UINT8_C(241), UINT8_C( 63), UINT8_C(169), UINT8_C(188), UINT8_C(133) }, { UINT8_C(143), UINT8_C( 0), UINT8_C( 75), UINT8_C(163), UINT8_C(174), UINT8_C(178), UINT8_C( 49), UINT8_C(157) }, { UINT8_C(171), UINT8_C(241), UINT8_C(169), UINT8_C(188), UINT8_C(143), UINT8_C(163), UINT8_C(178), UINT8_C(157) } }, { { UINT8_C(194), UINT8_C( 67), UINT8_C( 74), UINT8_C( 96), UINT8_C(155), UINT8_C(244), UINT8_C(136), UINT8_C( 32) }, { UINT8_C( 19), UINT8_C(106), UINT8_C( 95), UINT8_C( 48), UINT8_C(248), UINT8_C(171), UINT8_C( 29), UINT8_C(163) }, { UINT8_C(194), UINT8_C( 96), UINT8_C(244), UINT8_C(136), UINT8_C(106), UINT8_C( 95), UINT8_C(248), UINT8_C(163) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vpmax_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; } static int test_simde_vpmax_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(17905), UINT16_C(30463), UINT16_C(10779), UINT16_C(40437) }, { UINT16_C(54218), UINT16_C(63146), UINT16_C(19060), UINT16_C(61787) }, { UINT16_C(30463), UINT16_C(40437), UINT16_C(63146), UINT16_C(61787) } }, { { UINT16_C(47398), UINT16_C(43390), UINT16_C(13086), UINT16_C(17157) }, { UINT16_C(33597), UINT16_C( 4258), UINT16_C(62124), UINT16_C(40446) }, { UINT16_C(47398), UINT16_C(17157), UINT16_C(33597), UINT16_C(62124) } }, { { UINT16_C(65080), UINT16_C(21267), UINT16_C( 2088), UINT16_C(62192) }, { UINT16_C(39644), UINT16_C(20713), UINT16_C(17636), UINT16_C( 2881) }, { UINT16_C(65080), UINT16_C(62192), UINT16_C(39644), UINT16_C(17636) } }, { { UINT16_C(49405), UINT16_C( 7092), UINT16_C(47603), UINT16_C(12383) }, { UINT16_C( 316), UINT16_C(59456), UINT16_C(16115), UINT16_C(11142) }, { UINT16_C(49405), UINT16_C(47603), UINT16_C(59456), UINT16_C(16115) } }, { { UINT16_C(39228), UINT16_C(25983), UINT16_C(28578), UINT16_C(32343) }, { UINT16_C(16394), UINT16_C(61134), UINT16_C( 3973), UINT16_C(33529) }, { UINT16_C(39228), UINT16_C(32343), UINT16_C(61134), UINT16_C(33529) } }, { { UINT16_C(44751), UINT16_C(49822), UINT16_C(64871), UINT16_C(42226) }, { UINT16_C(13054), UINT16_C(61836), UINT16_C( 4721), UINT16_C(44317) }, { UINT16_C(49822), UINT16_C(64871), UINT16_C(61836), UINT16_C(44317) } }, { { UINT16_C(40108), UINT16_C(19986), UINT16_C(27147), UINT16_C( 5580) }, { UINT16_C(39594), UINT16_C(12036), UINT16_C(64937), UINT16_C(31154) }, { UINT16_C(40108), UINT16_C(27147), UINT16_C(39594), UINT16_C(64937) } }, { { UINT16_C(20651), UINT16_C( 4923), UINT16_C(11853), UINT16_C(19383) }, { UINT16_C(17248), UINT16_C(53564), UINT16_C(22870), UINT16_C( 639) }, { UINT16_C(20651), UINT16_C(19383), UINT16_C(53564), UINT16_C(22870) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vpmax_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; } static int test_simde_vpmax_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C(2640987649), UINT32_C( 568799812) }, { UINT32_C(2065880842), UINT32_C(1160985759) }, { UINT32_C(2640987649), UINT32_C(2065880842) } }, { { UINT32_C(3408914299), UINT32_C( 786561845) }, { UINT32_C(2780205411), UINT32_C(1474653014) }, { UINT32_C(3408914299), UINT32_C(2780205411) } }, { { UINT32_C(4143206321), UINT32_C(2350373761) }, { UINT32_C(1359428018), UINT32_C(4036377461) }, { UINT32_C(4143206321), UINT32_C(4036377461) } }, { { UINT32_C(1471989282), UINT32_C( 629513665) }, { UINT32_C(2227846190), UINT32_C(1507569575) }, { UINT32_C(1471989282), UINT32_C(2227846190) } }, { { UINT32_C(2152714238), UINT32_C(1544316586) }, { UINT32_C( 346887071), UINT32_C(1879327822) }, { UINT32_C(2152714238), UINT32_C(1879327822) } }, { { UINT32_C(3418931210), UINT32_C(2364558685) }, { UINT32_C( 823180169), UINT32_C(1770712170) }, { UINT32_C(3418931210), UINT32_C(1770712170) } }, { { UINT32_C(1726601659), UINT32_C(3737318719) }, { UINT32_C(1475506184), UINT32_C(3200775860) }, { UINT32_C(3737318719), UINT32_C(3200775860) } }, { { UINT32_C( 344559543), UINT32_C(1721793245) }, { UINT32_C(2677518645), UINT32_C(1476927901) }, { UINT32_C(1721793245), UINT32_C(2677518645) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vpmax_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; } static int test_simde_vpmaxq_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; simde_float32 b[4]; simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -402.07), SIMDE_FLOAT32_C( -666.37), SIMDE_FLOAT32_C( 362.68), SIMDE_FLOAT32_C( -326.71) }, { SIMDE_FLOAT32_C( 454.85), SIMDE_FLOAT32_C( -121.62), SIMDE_FLOAT32_C( 430.43), SIMDE_FLOAT32_C( 746.87) }, { SIMDE_FLOAT32_C( -402.07), SIMDE_FLOAT32_C( 362.68), SIMDE_FLOAT32_C( 454.85), SIMDE_FLOAT32_C( 746.87) } }, { { SIMDE_FLOAT32_C( 33.52), SIMDE_FLOAT32_C( -284.09), SIMDE_FLOAT32_C( -166.25), SIMDE_FLOAT32_C( 480.19) }, { SIMDE_FLOAT32_C( -49.24), SIMDE_FLOAT32_C( 564.50), SIMDE_FLOAT32_C( 852.14), SIMDE_FLOAT32_C( -390.08) }, { SIMDE_FLOAT32_C( 33.52), SIMDE_FLOAT32_C( 480.19), SIMDE_FLOAT32_C( 564.50), SIMDE_FLOAT32_C( 852.14) } }, { { SIMDE_FLOAT32_C( -339.81), SIMDE_FLOAT32_C( 223.10), SIMDE_FLOAT32_C( -267.43), SIMDE_FLOAT32_C( -98.08) }, { SIMDE_FLOAT32_C( 315.18), SIMDE_FLOAT32_C( -621.58), SIMDE_FLOAT32_C( -833.21), SIMDE_FLOAT32_C( 0.90) }, { SIMDE_FLOAT32_C( 223.10), SIMDE_FLOAT32_C( -98.08), SIMDE_FLOAT32_C( 315.18), SIMDE_FLOAT32_C( 0.90) } }, { { SIMDE_FLOAT32_C( 737.95), SIMDE_FLOAT32_C( 329.19), SIMDE_FLOAT32_C( 372.64), SIMDE_FLOAT32_C( 555.32) }, { SIMDE_FLOAT32_C( 563.00), SIMDE_FLOAT32_C( -123.50), SIMDE_FLOAT32_C( -962.40), SIMDE_FLOAT32_C( -839.07) }, { SIMDE_FLOAT32_C( 737.95), SIMDE_FLOAT32_C( 555.32), SIMDE_FLOAT32_C( 563.00), SIMDE_FLOAT32_C( -839.07) } }, { { SIMDE_FLOAT32_C( 210.13), SIMDE_FLOAT32_C( 400.28), SIMDE_FLOAT32_C( -165.78), SIMDE_FLOAT32_C( -335.02) }, { SIMDE_FLOAT32_C( -721.33), SIMDE_FLOAT32_C( -735.34), SIMDE_FLOAT32_C( -588.15), SIMDE_FLOAT32_C( 312.18) }, { SIMDE_FLOAT32_C( 400.28), SIMDE_FLOAT32_C( -165.78), SIMDE_FLOAT32_C( -721.33), SIMDE_FLOAT32_C( 312.18) } }, { { SIMDE_FLOAT32_C( -19.43), SIMDE_FLOAT32_C( 245.59), SIMDE_FLOAT32_C( -207.63), SIMDE_FLOAT32_C( 931.34) }, { SIMDE_FLOAT32_C( -189.91), SIMDE_FLOAT32_C( -355.49), SIMDE_FLOAT32_C( -458.75), SIMDE_FLOAT32_C( 470.28) }, { SIMDE_FLOAT32_C( 245.59), SIMDE_FLOAT32_C( 931.34), SIMDE_FLOAT32_C( -189.91), SIMDE_FLOAT32_C( 470.28) } }, { { SIMDE_FLOAT32_C( 867.60), SIMDE_FLOAT32_C( 273.82), SIMDE_FLOAT32_C( -627.81), SIMDE_FLOAT32_C( 182.78) }, { SIMDE_FLOAT32_C( 652.24), SIMDE_FLOAT32_C( -461.02), SIMDE_FLOAT32_C( -816.31), SIMDE_FLOAT32_C( 390.19) }, { SIMDE_FLOAT32_C( 867.60), SIMDE_FLOAT32_C( 182.78), SIMDE_FLOAT32_C( 652.24), SIMDE_FLOAT32_C( 390.19) } }, { { SIMDE_FLOAT32_C( 868.17), SIMDE_FLOAT32_C( 556.33), SIMDE_FLOAT32_C( -54.49), SIMDE_FLOAT32_C( 431.18) }, { SIMDE_FLOAT32_C( -567.17), SIMDE_FLOAT32_C( -16.88), SIMDE_FLOAT32_C( 592.11), SIMDE_FLOAT32_C( 642.95) }, { SIMDE_FLOAT32_C( 868.17), SIMDE_FLOAT32_C( 431.18), SIMDE_FLOAT32_C( -16.88), SIMDE_FLOAT32_C( 642.95) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r = simde_vpmaxq_f32(a, b); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vpmaxq_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[2]; simde_float64 b[2]; simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 521.70), SIMDE_FLOAT64_C( 828.15) }, { SIMDE_FLOAT64_C( 535.14), SIMDE_FLOAT64_C( 480.15) }, { SIMDE_FLOAT64_C( 828.15), SIMDE_FLOAT64_C( 535.14) } }, { { SIMDE_FLOAT64_C( -518.98), SIMDE_FLOAT64_C( -210.96) }, { SIMDE_FLOAT64_C( -930.38), SIMDE_FLOAT64_C( 215.97) }, { SIMDE_FLOAT64_C( -210.96), SIMDE_FLOAT64_C( 215.97) } }, { { SIMDE_FLOAT64_C( 91.20), SIMDE_FLOAT64_C( 348.64) }, { SIMDE_FLOAT64_C( -290.21), SIMDE_FLOAT64_C( 54.45) }, { SIMDE_FLOAT64_C( 348.64), SIMDE_FLOAT64_C( 54.45) } }, { { SIMDE_FLOAT64_C( -405.44), SIMDE_FLOAT64_C( -790.85) }, { SIMDE_FLOAT64_C( 151.68), SIMDE_FLOAT64_C( 31.01) }, { SIMDE_FLOAT64_C( -405.44), SIMDE_FLOAT64_C( 151.68) } }, { { SIMDE_FLOAT64_C( -850.04), SIMDE_FLOAT64_C( -183.10) }, { SIMDE_FLOAT64_C( 893.21), SIMDE_FLOAT64_C( 481.08) }, { SIMDE_FLOAT64_C( -183.10), SIMDE_FLOAT64_C( 893.21) } }, { { SIMDE_FLOAT64_C( -361.59), SIMDE_FLOAT64_C( 9.07) }, { SIMDE_FLOAT64_C( 399.01), SIMDE_FLOAT64_C( -896.67) }, { SIMDE_FLOAT64_C( 9.07), SIMDE_FLOAT64_C( 399.01) } }, { { SIMDE_FLOAT64_C( -108.96), SIMDE_FLOAT64_C( 271.20) }, { SIMDE_FLOAT64_C( 839.25), SIMDE_FLOAT64_C( 679.65) }, { SIMDE_FLOAT64_C( 271.20), SIMDE_FLOAT64_C( 839.25) } }, { { SIMDE_FLOAT64_C( 406.65), SIMDE_FLOAT64_C( -664.11) }, { SIMDE_FLOAT64_C( 733.62), SIMDE_FLOAT64_C( -71.65) }, { SIMDE_FLOAT64_C( 406.65), SIMDE_FLOAT64_C( 733.62) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_float64x2_t r = simde_vpmaxq_f64(a, b); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; } static int test_simde_vpmaxq_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int8_t b[16]; int8_t r[16]; } test_vec[] = { { { INT8_C( 11), INT8_C( 113), -INT8_C( 40), -INT8_C( 42), -INT8_C( 25), -INT8_C( 127), INT8_C( 23), -INT8_C( 82), -INT8_C( 54), INT8_C( 39), INT8_C( 20), -INT8_C( 7), INT8_C( 58), INT8_C( 80), -INT8_C( 61), -INT8_C( 6) }, { INT8_C( 52), -INT8_C( 3), -INT8_C( 24), -INT8_C( 102), INT8_C( 42), -INT8_C( 37), -INT8_C( 105), -INT8_C( 107), -INT8_C( 31), -INT8_C( 46), INT8_C( 13), INT8_C( 68), -INT8_C( 114), -INT8_C( 94), -INT8_C( 10), -INT8_C( 102) }, { INT8_C( 113), -INT8_C( 40), -INT8_C( 25), INT8_C( 23), INT8_C( 39), INT8_C( 20), INT8_C( 80), -INT8_C( 6), INT8_C( 52), -INT8_C( 24), INT8_C( 42), -INT8_C( 105), -INT8_C( 31), INT8_C( 68), -INT8_C( 94), -INT8_C( 10) } }, { { INT8_C( 19), -INT8_C( 50), INT8_C( 112), -INT8_C( 6), INT8_C( 80), -INT8_C( 121), -INT8_C( 88), INT8_C( 26), -INT8_C( 81), -INT8_C( 67), INT8_C( 19), -INT8_C( 23), INT8_C( 13), -INT8_C( 42), -INT8_C( 28), INT8_C( 66) }, { -INT8_C( 45), -INT8_C( 52), -INT8_C( 36), -INT8_C( 3), -INT8_C( 88), INT8_C( 115), -INT8_C( 109), -INT8_C( 119), INT8_C( 69), -INT8_C( 96), -INT8_C( 50), -INT8_C( 45), INT8_C( 66), -INT8_C( 60), INT8_C( 109), INT8_C( 85) }, { INT8_C( 19), INT8_C( 112), INT8_C( 80), INT8_C( 26), -INT8_C( 67), INT8_C( 19), INT8_C( 13), INT8_C( 66), -INT8_C( 45), -INT8_C( 3), INT8_C( 115), -INT8_C( 109), INT8_C( 69), -INT8_C( 45), INT8_C( 66), INT8_C( 109) } }, { { -INT8_C( 109), -INT8_C( 35), INT8_C( 79), -INT8_C( 29), INT8_C( 101), -INT8_C( 9), -INT8_C( 3), INT8_C( 20), -INT8_C( 76), INT8_C( 17), -INT8_C( 3), -INT8_C( 62), -INT8_C( 25), -INT8_C( 31), INT8_C( 4), -INT8_C( 69) }, { -INT8_C( 82), -INT8_C( 32), -INT8_C( 72), INT8_C( 86), INT8_C( 83), INT8_C( 75), -INT8_C( 33), -INT8_C( 104), -INT8_C( 21), -INT8_C( 83), INT8_C( 107), INT8_C( 45), INT8_C( 114), -INT8_C( 39), -INT8_C( 126), INT8_C( 5) }, { -INT8_C( 35), INT8_C( 79), INT8_C( 101), INT8_C( 20), INT8_C( 17), -INT8_C( 3), -INT8_C( 25), INT8_C( 4), -INT8_C( 32), INT8_C( 86), INT8_C( 83), -INT8_C( 33), -INT8_C( 21), INT8_C( 107), INT8_C( 114), INT8_C( 5) } }, { { -INT8_C( 74), -INT8_C( 47), -INT8_C( 24), INT8_C( 27), -INT8_C( 55), -INT8_C( 27), INT8_C( 47), INT8_C( 125), -INT8_C( 10), INT8_C( 45), INT8_C( 63), -INT8_C( 34), INT8_C( 14), INT8_C( 67), -INT8_C( 103), -INT8_C( 68) }, { INT8_C( 35), INT8_C( 81), INT8_C( 18), INT8_C( 118), -INT8_C( 99), -INT8_C( 14), INT8_C( 14), -INT8_C( 120), -INT8_C( 97), INT8_C( 122), -INT8_C( 74), INT8_C( 17), INT8_C( 83), INT8_C( 56), INT8_C( 22), INT8_C( 9) }, { -INT8_C( 47), INT8_C( 27), -INT8_C( 27), INT8_C( 125), INT8_C( 45), INT8_C( 63), INT8_C( 67), -INT8_C( 68), INT8_C( 81), INT8_C( 118), -INT8_C( 14), INT8_C( 14), INT8_C( 122), INT8_C( 17), INT8_C( 83), INT8_C( 22) } }, { { INT8_C( 10), -INT8_C( 2), INT8_C( 37), -INT8_C( 45), -INT8_C( 28), INT8_C( 84), INT8_C( 80), -INT8_C( 38), -INT8_C( 127), -INT8_C( 112), -INT8_C( 72), -INT8_C( 112), -INT8_C( 45), INT8_C( 81), INT8_C( 76), -INT8_C( 9) }, { -INT8_C( 93), INT8_C( 95), INT8_C( 109), INT8_C( 64), INT8_C( 81), INT8_C( 124), -INT8_C( 56), -INT8_C( 16), -INT8_C( 10), INT8_C( 126), INT8_C( 2), INT8_C( 73), -INT8_C( 73), INT8_C( 24), INT8_C( 82), -INT8_C( 63) }, { INT8_C( 10), INT8_C( 37), INT8_C( 84), INT8_C( 80), -INT8_C( 112), -INT8_C( 72), INT8_C( 81), INT8_C( 76), INT8_C( 95), INT8_C( 109), INT8_C( 124), -INT8_C( 16), INT8_C( 126), INT8_C( 73), INT8_C( 24), INT8_C( 82) } }, { { INT8_C( 23), INT8_C( 119), -INT8_C( 108), -INT8_C( 5), -INT8_C( 52), -INT8_C( 28), -INT8_C( 43), INT8_C( 77), INT8_C( 116), -INT8_C( 114), -INT8_C( 35), INT8_C( 72), -INT8_C( 33), INT8_C( 42), INT8_C( 63), -INT8_C( 126) }, { -INT8_C( 119), -INT8_C( 84), -INT8_C( 62), -INT8_C( 38), INT8_C( 40), -INT8_C( 117), -INT8_C( 54), INT8_C( 30), INT8_C( 9), -INT8_C( 52), INT8_C( 103), -INT8_C( 64), -INT8_C( 27), -INT8_C( 70), -INT8_C( 127), -INT8_C( 4) }, { INT8_C( 119), -INT8_C( 5), -INT8_C( 28), INT8_C( 77), INT8_C( 116), INT8_C( 72), INT8_C( 42), INT8_C( 63), -INT8_C( 84), -INT8_C( 38), INT8_C( 40), INT8_C( 30), INT8_C( 9), INT8_C( 103), -INT8_C( 27), -INT8_C( 4) } }, { { INT8_C( 49), INT8_C( 21), -INT8_C( 9), -INT8_C( 3), -INT8_C( 6), -INT8_C( 52), INT8_C( 75), INT8_C( 110), INT8_C( 90), INT8_C( 40), -INT8_C( 74), INT8_C( 58), INT8_C( 82), -INT8_C( 11), -INT8_C( 68), -INT8_C( 37) }, { -INT8_C( 94), INT8_MAX, -INT8_C( 75), -INT8_C( 54), INT8_C( 10), INT8_MIN, -INT8_C( 23), INT8_C( 19), INT8_C( 76), INT8_C( 80), -INT8_C( 44), INT8_C( 49), INT8_C( 10), INT8_C( 85), INT8_C( 45), INT8_C( 60) }, { INT8_C( 49), -INT8_C( 3), -INT8_C( 6), INT8_C( 110), INT8_C( 90), INT8_C( 58), INT8_C( 82), -INT8_C( 37), INT8_MAX, -INT8_C( 54), INT8_C( 10), INT8_C( 19), INT8_C( 80), INT8_C( 49), INT8_C( 85), INT8_C( 60) } }, { { INT8_C( 107), INT8_C( 36), INT8_C( 57), INT8_C( 101), -INT8_C( 15), -INT8_C( 124), -INT8_C( 45), INT8_C( 75), -INT8_C( 83), -INT8_C( 118), -INT8_C( 123), -INT8_C( 1), INT8_MAX, INT8_C( 66), -INT8_C( 37), INT8_C( 33) }, { -INT8_C( 63), -INT8_C( 112), -INT8_C( 20), -INT8_C( 53), INT8_C( 16), -INT8_C( 43), -INT8_C( 34), INT8_C( 93), INT8_C( 37), -INT8_C( 78), -INT8_C( 114), INT8_C( 48), INT8_C( 8), -INT8_C( 68), INT8_C( 108), INT8_C( 115) }, { INT8_C( 107), INT8_C( 101), -INT8_C( 15), INT8_C( 75), -INT8_C( 83), -INT8_C( 1), INT8_MAX, INT8_C( 33), -INT8_C( 63), -INT8_C( 20), INT8_C( 16), INT8_C( 93), INT8_C( 37), INT8_C( 48), INT8_C( 8), INT8_C( 115) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r = simde_vpmaxq_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; } static int test_simde_vpmaxq_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { INT16_C( 13831), -INT16_C( 19466), INT16_C( 25467), INT16_C( 4969), -INT16_C( 11147), INT16_C( 11223), -INT16_C( 7596), INT16_C( 5877) }, { -INT16_C( 12676), INT16_C( 18011), -INT16_C( 29303), -INT16_C( 19656), -INT16_C( 6414), -INT16_C( 13803), INT16_C( 15467), INT16_C( 29189) }, { INT16_C( 13831), INT16_C( 25467), INT16_C( 11223), INT16_C( 5877), INT16_C( 18011), -INT16_C( 19656), -INT16_C( 6414), INT16_C( 29189) } }, { { -INT16_C( 1166), -INT16_C( 4570), -INT16_C( 28834), -INT16_C( 11519), -INT16_C( 10141), -INT16_C( 18433), -INT16_C( 2886), INT16_C( 14285) }, { INT16_C( 10434), INT16_C( 19581), -INT16_C( 19019), -INT16_C( 22273), INT16_C( 5275), INT16_C( 1650), INT16_C( 30544), -INT16_C( 15751) }, { -INT16_C( 1166), -INT16_C( 11519), -INT16_C( 10141), INT16_C( 14285), INT16_C( 19581), -INT16_C( 19019), INT16_C( 5275), INT16_C( 30544) } }, { { -INT16_C( 24717), -INT16_C( 11856), -INT16_C( 20178), -INT16_C( 28251), -INT16_C( 23414), INT16_C( 17480), INT16_C( 5528), INT16_C( 23163) }, { -INT16_C( 1730), -INT16_C( 3162), -INT16_C( 23122), INT16_C( 19099), INT16_C( 3769), INT16_C( 2384), -INT16_C( 13947), -INT16_C( 1844) }, { -INT16_C( 11856), -INT16_C( 20178), INT16_C( 17480), INT16_C( 23163), -INT16_C( 1730), INT16_C( 19099), INT16_C( 3769), -INT16_C( 1844) } }, { { INT16_C( 31848), -INT16_C( 26934), INT16_C( 28462), -INT16_C( 18393), INT16_C( 28435), -INT16_C( 21508), INT16_C( 30853), -INT16_C( 15611) }, { -INT16_C( 21391), INT16_C( 8118), INT16_C( 21073), INT16_C( 2921), -INT16_C( 17824), -INT16_C( 6892), -INT16_C( 8061), -INT16_C( 4898) }, { INT16_C( 31848), INT16_C( 28462), INT16_C( 28435), INT16_C( 30853), INT16_C( 8118), INT16_C( 21073), -INT16_C( 6892), -INT16_C( 4898) } }, { { -INT16_C( 22435), -INT16_C( 29822), -INT16_C( 21993), INT16_C( 10819), INT16_C( 16153), -INT16_C( 24875), -INT16_C( 9545), INT16_C( 10337) }, { INT16_C( 6278), -INT16_C( 10168), -INT16_C( 20118), -INT16_C( 13597), -INT16_C( 2197), -INT16_C( 4177), -INT16_C( 29224), INT16_C( 13787) }, { -INT16_C( 22435), INT16_C( 10819), INT16_C( 16153), INT16_C( 10337), INT16_C( 6278), -INT16_C( 13597), -INT16_C( 2197), INT16_C( 13787) } }, { { INT16_C( 23861), INT16_C( 19648), INT16_C( 775), INT16_C( 8566), INT16_C( 19266), -INT16_C( 1345), INT16_C( 8486), -INT16_C( 21470) }, { INT16_C( 27193), -INT16_C( 23676), INT16_C( 26396), -INT16_C( 30867), INT16_C( 7263), INT16_C( 14198), INT16_C( 20906), -INT16_C( 8340) }, { INT16_C( 23861), INT16_C( 8566), INT16_C( 19266), INT16_C( 8486), INT16_C( 27193), INT16_C( 26396), INT16_C( 14198), INT16_C( 20906) } }, { { INT16_C( 11439), -INT16_C( 18900), -INT16_C( 24017), INT16_C( 29143), -INT16_C( 26642), INT16_C( 5227), -INT16_C( 29000), -INT16_C( 3648) }, { INT16_C( 17912), INT16_C( 5268), INT16_C( 428), INT16_C( 2972), INT16_C( 4637), -INT16_C( 14526), -INT16_C( 20892), INT16_C( 5031) }, { INT16_C( 11439), INT16_C( 29143), INT16_C( 5227), -INT16_C( 3648), INT16_C( 17912), INT16_C( 2972), INT16_C( 4637), INT16_C( 5031) } }, { { -INT16_C( 11302), INT16_C( 2505), -INT16_C( 24203), INT16_C( 25467), -INT16_C( 6600), -INT16_C( 3977), INT16_C( 14452), INT16_C( 28129) }, { INT16_C( 30077), INT16_C( 10625), INT16_C( 7542), -INT16_C( 27851), INT16_C( 30512), -INT16_C( 27557), INT16_C( 550), INT16_C( 167) }, { INT16_C( 2505), INT16_C( 25467), -INT16_C( 3977), INT16_C( 28129), INT16_C( 30077), INT16_C( 7542), INT16_C( 30512), INT16_C( 550) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_vpmaxq_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; } static int test_simde_vpmaxq_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { -INT32_C( 1235350443), INT32_C( 1495786913), INT32_C( 1460006431), -INT32_C( 1555314335) }, { INT32_C( 1196301238), INT32_C( 1914049203), INT32_C( 734461602), INT32_C( 1447843073) }, { INT32_C( 1495786913), INT32_C( 1460006431), INT32_C( 1914049203), INT32_C( 1447843073) } }, { { INT32_C( 51161698), -INT32_C( 1352911728), -INT32_C( 2096733918), -INT32_C( 517582550) }, { INT32_C( 539522413), INT32_C( 697450119), INT32_C( 1045780541), INT32_C( 278176174) }, { INT32_C( 51161698), -INT32_C( 517582550), INT32_C( 697450119), INT32_C( 1045780541) } }, { { -INT32_C( 602693300), -INT32_C( 141856555), -INT32_C( 59075887), INT32_C( 1356702179) }, { -INT32_C( 1653537514), -INT32_C( 2134506685), INT32_C( 180231004), INT32_C( 152720317) }, { -INT32_C( 141856555), INT32_C( 1356702179), -INT32_C( 1653537514), INT32_C( 180231004) } }, { { -INT32_C( 907727372), INT32_C( 1874882717), -INT32_C( 429180414), -INT32_C( 248100645) }, { -INT32_C( 1869699251), INT32_C( 118576555), INT32_C( 756141936), INT32_C( 372648738) }, { INT32_C( 1874882717), -INT32_C( 248100645), INT32_C( 118576555), INT32_C( 756141936) } }, { { -INT32_C( 153150632), -INT32_C( 1905942644), -INT32_C( 1250635558), INT32_C( 1705487128) }, { -INT32_C( 34261678), -INT32_C( 83622262), -INT32_C( 131590698), -INT32_C( 1727045824) }, { -INT32_C( 153150632), INT32_C( 1705487128), -INT32_C( 34261678), -INT32_C( 131590698) } }, { { INT32_C( 110095994), INT32_C( 1754657934), -INT32_C( 602076732), INT32_C( 121750708) }, { -INT32_C( 2080098566), INT32_C( 327092541), INT32_C( 1594599454), -INT32_C( 2114446841) }, { INT32_C( 1754657934), INT32_C( 121750708), INT32_C( 327092541), INT32_C( 1594599454) } }, { { -INT32_C( 1752660215), INT32_C( 1073683835), -INT32_C( 618980314), -INT32_C( 605922079) }, { -INT32_C( 815798638), INT32_C( 249749487), -INT32_C( 1905398137), INT32_C( 286221576) }, { INT32_C( 1073683835), -INT32_C( 605922079), INT32_C( 249749487), INT32_C( 286221576) } }, { { INT32_C( 1739102188), -INT32_C( 609835084), -INT32_C( 1514749500), -INT32_C( 1350526947) }, { INT32_C( 1853874046), INT32_C( 1165779390), INT32_C( 1490282831), INT32_C( 980017998) }, { INT32_C( 1739102188), -INT32_C( 1350526947), INT32_C( 1853874046), INT32_C( 1490282831) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_vpmaxq_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; } static int test_simde_vpmaxq_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(194), UINT8_C(204), UINT8_C(147), UINT8_C( 27), UINT8_C(126), UINT8_C( 49), UINT8_C( 55), UINT8_C(152), UINT8_C(245), UINT8_C( 93), UINT8_C( 69), UINT8_C(214), UINT8_C(114), UINT8_C( 39), UINT8_C(222), UINT8_C( 45) }, { UINT8_C(147), UINT8_C(174), UINT8_C( 82), UINT8_C(252), UINT8_C( 42), UINT8_C( 38), UINT8_C(161), UINT8_C( 37), UINT8_C(211), UINT8_C(101), UINT8_C( 54), UINT8_C(212), UINT8_C( 23), UINT8_C(109), UINT8_C(232), UINT8_C(218) }, { UINT8_C(204), UINT8_C(147), UINT8_C(126), UINT8_C(152), UINT8_C(245), UINT8_C(214), UINT8_C(114), UINT8_C(222), UINT8_C(174), UINT8_C(252), UINT8_C( 42), UINT8_C(161), UINT8_C(211), UINT8_C(212), UINT8_C(109), UINT8_C(232) } }, { { UINT8_C( 57), UINT8_C(123), UINT8_C(245), UINT8_C(183), UINT8_C(172), UINT8_C( 44), UINT8_C( 80), UINT8_C(161), UINT8_C(138), UINT8_C(149), UINT8_C(119), UINT8_C(252), UINT8_C(188), UINT8_C( 85), UINT8_C( 41), UINT8_C( 80) }, { UINT8_C( 4), UINT8_C(123), UINT8_C( 76), UINT8_C( 46), UINT8_C(161), UINT8_C(237), UINT8_C( 83), UINT8_C(117), UINT8_C( 82), UINT8_C(137), UINT8_C( 73), UINT8_C(105), UINT8_C(246), UINT8_C( 49), UINT8_C( 67), UINT8_C( 47) }, { UINT8_C(123), UINT8_C(245), UINT8_C(172), UINT8_C(161), UINT8_C(149), UINT8_C(252), UINT8_C(188), UINT8_C( 80), UINT8_C(123), UINT8_C( 76), UINT8_C(237), UINT8_C(117), UINT8_C(137), UINT8_C(105), UINT8_C(246), UINT8_C( 67) } }, { { UINT8_C(172), UINT8_C( 56), UINT8_C(231), UINT8_C( 88), UINT8_C(101), UINT8_C( 55), UINT8_C(249), UINT8_C(239), UINT8_C(204), UINT8_C(113), UINT8_C(235), UINT8_C(137), UINT8_C(198), UINT8_C( 21), UINT8_C(217), UINT8_C(202) }, { UINT8_C(144), UINT8_C( 37), UINT8_C(248), UINT8_C( 50), UINT8_C( 18), UINT8_C( 76), UINT8_C(167), UINT8_C(100), UINT8_C(213), UINT8_C(240), UINT8_C(205), UINT8_C(204), UINT8_C( 33), UINT8_C( 17), UINT8_C(251), UINT8_C(205) }, { UINT8_C(172), UINT8_C(231), UINT8_C(101), UINT8_C(249), UINT8_C(204), UINT8_C(235), UINT8_C(198), UINT8_C(217), UINT8_C(144), UINT8_C(248), UINT8_C( 76), UINT8_C(167), UINT8_C(240), UINT8_C(205), UINT8_C( 33), UINT8_C(251) } }, { { UINT8_C( 73), UINT8_C(226), UINT8_C( 37), UINT8_C(174), UINT8_C( 25), UINT8_C( 30), UINT8_C(157), UINT8_C(230), UINT8_C(143), UINT8_C(137), UINT8_C(111), UINT8_C( 86), UINT8_C(158), UINT8_C( 72), UINT8_C( 32), UINT8_C( 46) }, { UINT8_C(109), UINT8_C( 25), UINT8_C( 96), UINT8_C(127), UINT8_C(101), UINT8_C( 7), UINT8_C(227), UINT8_C( 58), UINT8_C(247), UINT8_C(176), UINT8_C( 6), UINT8_C( 24), UINT8_C(193), UINT8_C( 2), UINT8_C(229), UINT8_C( 11) }, { UINT8_C(226), UINT8_C(174), UINT8_C( 30), UINT8_C(230), UINT8_C(143), UINT8_C(111), UINT8_C(158), UINT8_C( 46), UINT8_C(109), UINT8_C(127), UINT8_C(101), UINT8_C(227), UINT8_C(247), UINT8_C( 24), UINT8_C(193), UINT8_C(229) } }, { { UINT8_C(228), UINT8_C( 10), UINT8_C(185), UINT8_C(254), UINT8_C( 41), UINT8_C( 87), UINT8_C(228), UINT8_C(184), UINT8_C(224), UINT8_C( 83), UINT8_C( 14), UINT8_C(126), UINT8_C(155), UINT8_C( 47), UINT8_C(172), UINT8_C( 8) }, { UINT8_C( 72), UINT8_C( 13), UINT8_C(135), UINT8_C(173), UINT8_C( 20), UINT8_C(106), UINT8_C(231), UINT8_C( 12), UINT8_C( 26), UINT8_C(238), UINT8_C( 36), UINT8_C(220), UINT8_C(240), UINT8_C( 10), UINT8_C(231), UINT8_C(212) }, { UINT8_C(228), UINT8_C(254), UINT8_C( 87), UINT8_C(228), UINT8_C(224), UINT8_C(126), UINT8_C(155), UINT8_C(172), UINT8_C( 72), UINT8_C(173), UINT8_C(106), UINT8_C(231), UINT8_C(238), UINT8_C(220), UINT8_C(240), UINT8_C(231) } }, { { UINT8_C( 20), UINT8_C(160), UINT8_C(210), UINT8_C( 61), UINT8_C(247), UINT8_C(182), UINT8_C(246), UINT8_C(215), UINT8_C( 9), UINT8_C( 4), UINT8_C( 85), UINT8_C(164), UINT8_C( 51), UINT8_C( 2), UINT8_C(172), UINT8_C(123) }, { UINT8_C( 15), UINT8_C( 51), UINT8_C( 40), UINT8_C( 35), UINT8_C(157), UINT8_C( 16), UINT8_C( 47), UINT8_C(184), UINT8_C(254), UINT8_C( 84), UINT8_C(148), UINT8_C(238), UINT8_C( 94), UINT8_C(123), UINT8_C(194), UINT8_C(114) }, { UINT8_C(160), UINT8_C(210), UINT8_C(247), UINT8_C(246), UINT8_C( 9), UINT8_C(164), UINT8_C( 51), UINT8_C(172), UINT8_C( 51), UINT8_C( 40), UINT8_C(157), UINT8_C(184), UINT8_C(254), UINT8_C(238), UINT8_C(123), UINT8_C(194) } }, { { UINT8_C( 27), UINT8_C(149), UINT8_C(176), UINT8_C( 19), UINT8_C( 75), UINT8_C(166), UINT8_C(234), UINT8_C( 85), UINT8_C(170), UINT8_C( 64), UINT8_C(249), UINT8_C(222), UINT8_C( 66), UINT8_C(166), UINT8_C( 89), UINT8_C( 81) }, { UINT8_C(217), UINT8_C(130), UINT8_C(116), UINT8_C(119), UINT8_C(146), UINT8_C(164), UINT8_C( 47), UINT8_C(144), UINT8_C(248), UINT8_C(195), UINT8_C(126), UINT8_C( 86), UINT8_C( 62), UINT8_C( 64), UINT8_C(200), UINT8_C( 89) }, { UINT8_C(149), UINT8_C(176), UINT8_C(166), UINT8_C(234), UINT8_C(170), UINT8_C(249), UINT8_C(166), UINT8_C( 89), UINT8_C(217), UINT8_C(119), UINT8_C(164), UINT8_C(144), UINT8_C(248), UINT8_C(126), UINT8_C( 64), UINT8_C(200) } }, { { UINT8_C(213), UINT8_C(120), UINT8_C(108), UINT8_C( 33), UINT8_C( 30), UINT8_C( 87), UINT8_C(118), UINT8_C(201), UINT8_C(151), UINT8_C(111), UINT8_C(167), UINT8_C(217), UINT8_C( 21), UINT8_C( 0), UINT8_C( 42), UINT8_C(239) }, { UINT8_C(130), UINT8_C(158), UINT8_C(102), UINT8_C( 20), UINT8_C( 66), UINT8_C(149), UINT8_C(164), UINT8_C( 58), UINT8_C( 88), UINT8_C( 34), UINT8_C(144), UINT8_C(150), UINT8_C( 99), UINT8_C( 89), UINT8_C(239), UINT8_C( 56) }, { UINT8_C(213), UINT8_C(108), UINT8_C( 87), UINT8_C(201), UINT8_C(151), UINT8_C(217), UINT8_C( 21), UINT8_C(239), UINT8_C(158), UINT8_C(102), UINT8_C(149), UINT8_C(164), UINT8_C( 88), UINT8_C(150), UINT8_C( 99), UINT8_C(239) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vpmaxq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; } static int test_simde_vpmaxq_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(59667), UINT16_C(15725), UINT16_C(57980), UINT16_C(10476), UINT16_C(19235), UINT16_C(46700), UINT16_C(45532), UINT16_C( 5614) }, { UINT16_C(15278), UINT16_C(65483), UINT16_C(21580), UINT16_C(37597), UINT16_C(56993), UINT16_C(15917), UINT16_C(40954), UINT16_C( 3652) }, { UINT16_C(59667), UINT16_C(57980), UINT16_C(46700), UINT16_C(45532), UINT16_C(65483), UINT16_C(37597), UINT16_C(56993), UINT16_C(40954) } }, { { UINT16_C(45448), UINT16_C( 1099), UINT16_C(14228), UINT16_C(46893), UINT16_C(39299), UINT16_C(24429), UINT16_C(23370), UINT16_C(63604) }, { UINT16_C(16278), UINT16_C(58103), UINT16_C(54419), UINT16_C(13428), UINT16_C(41650), UINT16_C(44403), UINT16_C(46913), UINT16_C(51643) }, { UINT16_C(45448), UINT16_C(46893), UINT16_C(39299), UINT16_C(63604), UINT16_C(58103), UINT16_C(54419), UINT16_C(44403), UINT16_C(51643) } }, { { UINT16_C( 1640), UINT16_C(64718), UINT16_C(64318), UINT16_C(49588), UINT16_C( 8596), UINT16_C(56864), UINT16_C(38013), UINT16_C( 5078) }, { UINT16_C(52947), UINT16_C(26358), UINT16_C(27298), UINT16_C(21915), UINT16_C( 3596), UINT16_C(19714), UINT16_C(48581), UINT16_C(11543) }, { UINT16_C(64718), UINT16_C(64318), UINT16_C(56864), UINT16_C(38013), UINT16_C(52947), UINT16_C(27298), UINT16_C(19714), UINT16_C(48581) } }, { { UINT16_C(58819), UINT16_C( 298), UINT16_C(57056), UINT16_C(29890), UINT16_C(58111), UINT16_C(31826), UINT16_C(10358), UINT16_C(18832) }, { UINT16_C(34550), UINT16_C(39344), UINT16_C(19440), UINT16_C(65006), UINT16_C(61529), UINT16_C( 7754), UINT16_C(25005), UINT16_C(28747) }, { UINT16_C(58819), UINT16_C(57056), UINT16_C(58111), UINT16_C(18832), UINT16_C(39344), UINT16_C(65006), UINT16_C(61529), UINT16_C(28747) } }, { { UINT16_C(30022), UINT16_C( 9842), UINT16_C(13395), UINT16_C(21402), UINT16_C(60439), UINT16_C(36303), UINT16_C(24341), UINT16_C( 3031) }, { UINT16_C(34789), UINT16_C(54948), UINT16_C(37586), UINT16_C(11219), UINT16_C( 7554), UINT16_C(12105), UINT16_C(38015), UINT16_C(50592) }, { UINT16_C(30022), UINT16_C(21402), UINT16_C(60439), UINT16_C(24341), UINT16_C(54948), UINT16_C(37586), UINT16_C(12105), UINT16_C(50592) } }, { { UINT16_C( 4618), UINT16_C(24044), UINT16_C(34374), UINT16_C(23984), UINT16_C(32883), UINT16_C(35051), UINT16_C(49887), UINT16_C(50579) }, { UINT16_C(14409), UINT16_C( 7067), UINT16_C(28362), UINT16_C(19782), UINT16_C(36747), UINT16_C( 2684), UINT16_C( 7203), UINT16_C(11728) }, { UINT16_C(24044), UINT16_C(34374), UINT16_C(35051), UINT16_C(50579), UINT16_C(14409), UINT16_C(28362), UINT16_C(36747), UINT16_C(11728) } }, { { UINT16_C(48174), UINT16_C(30091), UINT16_C(15170), UINT16_C(46546), UINT16_C(48571), UINT16_C(39741), UINT16_C(53631), UINT16_C(51296) }, { UINT16_C(64265), UINT16_C(54243), UINT16_C(10601), UINT16_C(62496), UINT16_C(40376), UINT16_C(56575), UINT16_C(53177), UINT16_C(59401) }, { UINT16_C(48174), UINT16_C(46546), UINT16_C(48571), UINT16_C(53631), UINT16_C(64265), UINT16_C(62496), UINT16_C(56575), UINT16_C(59401) } }, { { UINT16_C(38027), UINT16_C(52573), UINT16_C(12240), UINT16_C(35715), UINT16_C(49389), UINT16_C(27686), UINT16_C(34449), UINT16_C(39477) }, { UINT16_C( 6273), UINT16_C(60014), UINT16_C(36418), UINT16_C(64223), UINT16_C(56875), UINT16_C(58838), UINT16_C(57517), UINT16_C(14541) }, { UINT16_C(52573), UINT16_C(35715), UINT16_C(49389), UINT16_C(39477), UINT16_C(60014), UINT16_C(64223), UINT16_C(58838), UINT16_C(57517) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vpmaxq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_vpmaxq_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C( 492066321), UINT32_C(2750383310), UINT32_C(2709160739), UINT32_C(4140404091) }, { UINT32_C( 156087931), UINT32_C(2899083535), UINT32_C(1023047402), UINT32_C( 834825504) }, { UINT32_C(2750383310), UINT32_C(4140404091), UINT32_C(2899083535), UINT32_C(1023047402) } }, { { UINT32_C(2454656707), UINT32_C(3174383258), UINT32_C(1029681090), UINT32_C(3073583164) }, { UINT32_C(4005593311), UINT32_C(3969616897), UINT32_C( 589862146), UINT32_C(3327453699) }, { UINT32_C(3174383258), UINT32_C(3073583164), UINT32_C(4005593311), UINT32_C(3327453699) } }, { { UINT32_C(2606277376), UINT32_C(2757266914), UINT32_C(2044835645), UINT32_C(3207664864) }, { UINT32_C(2527981973), UINT32_C(2156021886), UINT32_C(3785599966), UINT32_C(2527590549) }, { UINT32_C(2757266914), UINT32_C(3207664864), UINT32_C(2527981973), UINT32_C(3785599966) } }, { { UINT32_C(2100363419), UINT32_C(3391195533), UINT32_C( 558105153), UINT32_C(2900391191) }, { UINT32_C(3829566822), UINT32_C(3026568662), UINT32_C( 93653104), UINT32_C(2627419136) }, { UINT32_C(3391195533), UINT32_C(2900391191), UINT32_C(3829566822), UINT32_C(2627419136) } }, { { UINT32_C(3390688316), UINT32_C(2543074134), UINT32_C(1421400125), UINT32_C(3019937869) }, { UINT32_C(4221059877), UINT32_C(2024799496), UINT32_C( 108872710), UINT32_C(3181517185) }, { UINT32_C(3390688316), UINT32_C(3019937869), UINT32_C(4221059877), UINT32_C(3181517185) } }, { { UINT32_C( 998751461), UINT32_C( 886185207), UINT32_C(1116310260), UINT32_C(1224116514) }, { UINT32_C(3561197260), UINT32_C(2454516620), UINT32_C(3097020983), UINT32_C(3363191779) }, { UINT32_C( 998751461), UINT32_C(1224116514), UINT32_C(3561197260), UINT32_C(3363191779) } }, { { UINT32_C(3993304567), UINT32_C( 237164057), UINT32_C(2203102049), UINT32_C( 30098997) }, { UINT32_C(1624641492), UINT32_C( 972169730), UINT32_C(3488779244), UINT32_C(3180882118) }, { UINT32_C(3993304567), UINT32_C(2203102049), UINT32_C(1624641492), UINT32_C(3488779244) } }, { { UINT32_C(2141953125), UINT32_C(3549285746), UINT32_C(2924993913), UINT32_C(4155449891) }, { UINT32_C( 861439281), UINT32_C(2490190504), UINT32_C(2607046613), UINT32_C( 744029383) }, { UINT32_C(3549285746), UINT32_C(4155449891), UINT32_C(2490190504), UINT32_C(2607046613) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vpmaxq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vpmax_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vpmax_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vpmax_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vpmax_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vpmax_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vpmax_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vpmax_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vpmaxq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vpmaxq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vpmaxq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vpmaxq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vpmaxq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vpmaxq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vpmaxq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vpmaxq_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/pmin.c000066400000000000000000001435631400333146700164420ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN pmin #include "test-neon.h" #include "../../../simde/arm/neon/pmin.h" static int test_simde_vpmin_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[2]; simde_float32 b[2]; simde_float32 r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( -823.14), SIMDE_FLOAT32_C( 128.93) }, { SIMDE_FLOAT32_C( -79.23), SIMDE_FLOAT32_C( 181.88) }, { SIMDE_FLOAT32_C( -823.14), SIMDE_FLOAT32_C( -79.23) } }, { { SIMDE_FLOAT32_C( 342.60), SIMDE_FLOAT32_C( 371.59) }, { SIMDE_FLOAT32_C( 649.79), SIMDE_FLOAT32_C( 595.53) }, { SIMDE_FLOAT32_C( 342.60), SIMDE_FLOAT32_C( 595.53) } }, { { SIMDE_FLOAT32_C( 270.06), SIMDE_FLOAT32_C( -842.97) }, { SIMDE_FLOAT32_C( -961.22), SIMDE_FLOAT32_C( -328.85) }, { SIMDE_FLOAT32_C( -842.97), SIMDE_FLOAT32_C( -961.22) } }, { { SIMDE_FLOAT32_C( -201.57), SIMDE_FLOAT32_C( 793.30) }, { SIMDE_FLOAT32_C( -461.86), SIMDE_FLOAT32_C( -809.27) }, { SIMDE_FLOAT32_C( -201.57), SIMDE_FLOAT32_C( -809.27) } }, { { SIMDE_FLOAT32_C( -247.56), SIMDE_FLOAT32_C( -943.60) }, { SIMDE_FLOAT32_C( 440.83), SIMDE_FLOAT32_C( 190.31) }, { SIMDE_FLOAT32_C( -943.60), SIMDE_FLOAT32_C( 190.31) } }, { { SIMDE_FLOAT32_C( -830.78), SIMDE_FLOAT32_C( -101.36) }, { SIMDE_FLOAT32_C( -658.42), SIMDE_FLOAT32_C( 551.40) }, { SIMDE_FLOAT32_C( -830.78), SIMDE_FLOAT32_C( -658.42) } }, { { SIMDE_FLOAT32_C( -949.01), SIMDE_FLOAT32_C( -839.90) }, { SIMDE_FLOAT32_C( 498.39), SIMDE_FLOAT32_C( 411.31) }, { SIMDE_FLOAT32_C( -949.01), SIMDE_FLOAT32_C( 411.31) } }, { { SIMDE_FLOAT32_C( 115.69), SIMDE_FLOAT32_C( -563.88) }, { SIMDE_FLOAT32_C( 32.11), SIMDE_FLOAT32_C( 292.55) }, { SIMDE_FLOAT32_C( -563.88), SIMDE_FLOAT32_C( 32.11) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a, b, r; a = simde_vld1_f32(test_vec[i].a); b = simde_vld1_f32(test_vec[i].b); r = simde_vpmin_f32(a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vpmin_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int8_t b[8]; int8_t r[8]; } test_vec[] = { { { INT8_C( 103), -INT8_C( 58), INT8_C( 105), INT8_C( 115), INT8_C( 81), -INT8_C( 1), INT8_C( 74), -INT8_C( 20) }, { INT8_C( 41), -INT8_C( 51), -INT8_C( 70), -INT8_C( 85), -INT8_C( 14), -INT8_C( 5), -INT8_C( 29), INT8_C( 70) }, { -INT8_C( 58), INT8_C( 105), -INT8_C( 1), -INT8_C( 20), -INT8_C( 51), -INT8_C( 85), -INT8_C( 14), -INT8_C( 29) } }, { { INT8_C( 124), -INT8_C( 62), INT8_C( 84), -INT8_C( 8), INT8_C( 27), -INT8_C( 24), -INT8_C( 25), -INT8_C( 115) }, { INT8_C( 118), INT8_C( 90), INT8_C( 46), INT8_C( 99), INT8_C( 51), -INT8_C( 97), -INT8_C( 55), -INT8_C( 102) }, { -INT8_C( 62), -INT8_C( 8), -INT8_C( 24), -INT8_C( 115), INT8_C( 90), INT8_C( 46), -INT8_C( 97), -INT8_C( 102) } }, { { INT8_C( 102), INT8_C( 50), INT8_C( 13), -INT8_C( 73), INT8_C( 49), INT8_C( 88), -INT8_C( 93), INT8_C( 90) }, { INT8_C( 37), INT8_C( 93), INT8_C( 5), INT8_C( 23), INT8_C( 88), -INT8_C( 23), INT8_C( 94), -INT8_C( 44) }, { INT8_C( 50), -INT8_C( 73), INT8_C( 49), -INT8_C( 93), INT8_C( 37), INT8_C( 5), -INT8_C( 23), -INT8_C( 44) } }, { { -INT8_C( 85), -INT8_C( 78), -INT8_C( 51), -INT8_C( 58), -INT8_C( 101), -INT8_C( 76), INT8_C( 84), INT8_C( 17) }, { INT8_C( 14), -INT8_C( 126), INT8_C( 116), INT8_C( 65), INT8_C( 33), INT8_C( 61), -INT8_C( 36), -INT8_C( 121) }, { -INT8_C( 85), -INT8_C( 58), -INT8_C( 101), INT8_C( 17), -INT8_C( 126), INT8_C( 65), INT8_C( 33), -INT8_C( 121) } }, { { INT8_C( 112), -INT8_C( 23), INT8_C( 62), -INT8_C( 95), INT8_C( 65), -INT8_C( 31), -INT8_C( 4), INT8_C( 103) }, { INT8_C( 62), INT8_C( 1), INT8_C( 126), -INT8_C( 105), -INT8_C( 22), -INT8_C( 36), INT8_C( 107), -INT8_C( 106) }, { -INT8_C( 23), -INT8_C( 95), -INT8_C( 31), -INT8_C( 4), INT8_C( 1), -INT8_C( 105), -INT8_C( 36), -INT8_C( 106) } }, { { -INT8_C( 113), INT8_C( 56), INT8_C( 92), INT8_C( 42), -INT8_C( 20), -INT8_C( 80), INT8_C( 59), -INT8_C( 5) }, { INT8_C( 50), -INT8_C( 81), INT8_C( 60), INT8_C( 84), -INT8_C( 20), INT8_C( 24), -INT8_C( 37), INT8_C( 92) }, { -INT8_C( 113), INT8_C( 42), -INT8_C( 80), -INT8_C( 5), -INT8_C( 81), INT8_C( 60), -INT8_C( 20), -INT8_C( 37) } }, { { INT8_C( 2), INT8_C( 26), -INT8_C( 2), INT8_C( 67), -INT8_C( 5), -INT8_C( 6), -INT8_C( 86), INT8_C( 58) }, { -INT8_C( 5), INT8_C( 41), -INT8_C( 47), -INT8_C( 26), INT8_C( 5), INT8_C( 60), INT8_C( 124), -INT8_C( 108) }, { INT8_C( 2), -INT8_C( 2), -INT8_C( 6), -INT8_C( 86), -INT8_C( 5), -INT8_C( 47), INT8_C( 5), -INT8_C( 108) } }, { { INT8_C( 117), -INT8_C( 40), -INT8_C( 66), INT8_C( 97), -INT8_C( 119), -INT8_C( 7), INT8_C( 92), -INT8_C( 69) }, { -INT8_C( 88), -INT8_C( 103), INT8_C( 15), -INT8_C( 107), -INT8_C( 79), -INT8_C( 21), -INT8_C( 15), -INT8_C( 77) }, { -INT8_C( 40), -INT8_C( 66), -INT8_C( 119), -INT8_C( 69), -INT8_C( 103), -INT8_C( 107), -INT8_C( 79), -INT8_C( 77) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vpmin_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; } static int test_simde_vpmin_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { { { -INT16_C( 14745), INT16_C( 29545), -INT16_C( 175), -INT16_C( 5046) }, { -INT16_C( 13015), -INT16_C( 21574), -INT16_C( 1038), INT16_C( 18147) }, { -INT16_C( 14745), -INT16_C( 5046), -INT16_C( 21574), -INT16_C( 1038) } }, { { -INT16_C( 15748), -INT16_C( 1964), -INT16_C( 6117), -INT16_C( 29209) }, { INT16_C( 23158), INT16_C( 25390), -INT16_C( 24781), -INT16_C( 25911) }, { -INT16_C( 15748), -INT16_C( 29209), INT16_C( 23158), -INT16_C( 25911) } }, { { INT16_C( 12902), -INT16_C( 18675), INT16_C( 22577), INT16_C( 23203) }, { INT16_C( 23845), INT16_C( 5893), -INT16_C( 5800), -INT16_C( 11170) }, { -INT16_C( 18675), INT16_C( 22577), INT16_C( 5893), -INT16_C( 11170) } }, { { -INT16_C( 19797), -INT16_C( 14643), -INT16_C( 19301), INT16_C( 4436) }, { -INT16_C( 32242), INT16_C( 16756), INT16_C( 15649), -INT16_C( 30756) }, { -INT16_C( 19797), -INT16_C( 19301), -INT16_C( 32242), -INT16_C( 30756) } }, { { -INT16_C( 5776), -INT16_C( 24258), -INT16_C( 7871), INT16_C( 26620) }, { INT16_C( 318), -INT16_C( 26754), -INT16_C( 8982), -INT16_C( 27029) }, { -INT16_C( 24258), -INT16_C( 7871), -INT16_C( 26754), -INT16_C( 27029) } }, { { INT16_C( 14479), INT16_C( 10844), -INT16_C( 20244), -INT16_C( 1221) }, { -INT16_C( 20686), INT16_C( 21564), INT16_C( 6380), INT16_C( 23771) }, { INT16_C( 10844), -INT16_C( 20244), -INT16_C( 20686), INT16_C( 6380) } }, { { INT16_C( 6658), INT16_C( 17406), -INT16_C( 1285), INT16_C( 15018) }, { INT16_C( 10747), -INT16_C( 6447), INT16_C( 15365), -INT16_C( 27524) }, { INT16_C( 6658), -INT16_C( 1285), -INT16_C( 6447), -INT16_C( 27524) } }, { { -INT16_C( 10123), INT16_C( 25022), -INT16_C( 1655), -INT16_C( 17572) }, { -INT16_C( 26200), -INT16_C( 27377), -INT16_C( 5199), -INT16_C( 19471) }, { -INT16_C( 10123), -INT16_C( 17572), -INT16_C( 27377), -INT16_C( 19471) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_vpmin_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; } static int test_simde_vpmin_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { { { INT32_C( 1936311911), -INT32_C( 330629295) }, { -INT32_C( 1413821143), INT32_C( 1189346290) }, { -INT32_C( 330629295), -INT32_C( 1413821143) } }, { { -INT32_C( 128662916), -INT32_C( 1914181605) }, { INT32_C( 1663982198), -INT32_C( 1698062541) }, { -INT32_C( 1914181605), -INT32_C( 1698062541) } }, { { -INT32_C( 1223871898), INT32_C( 1520654385) }, { INT32_C( 386227493), -INT32_C( 731977384) }, { -INT32_C( 1223871898), -INT32_C( 731977384) } }, { { -INT32_C( 959597909), INT32_C( 290763931) }, { INT32_C( 1098154510), -INT32_C( 2015609567) }, { -INT32_C( 959597909), -INT32_C( 2015609567) } }, { { -INT32_C( 1589712528), INT32_C( 1744625985) }, { -INT32_C( 1753349826), -INT32_C( 1771315990) }, { -INT32_C( 1589712528), -INT32_C( 1771315990) } }, { { INT32_C( 710686863), -INT32_C( 79974164) }, { INT32_C( 1413263154), INT32_C( 1557862636) }, { -INT32_C( 79974164), INT32_C( 1413263154) } }, { { INT32_C( 1140726274), INT32_C( 984283899) }, { -INT32_C( 422499845), -INT32_C( 1803797499) }, { INT32_C( 984283899), -INT32_C( 1803797499) } }, { { INT32_C( 1639897205), -INT32_C( 1151534711) }, { -INT32_C( 1794139736), -INT32_C( 1275991119) }, { -INT32_C( 1151534711), -INT32_C( 1794139736) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_vpmin_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; } static int test_simde_vpmin_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(103), UINT8_C(198), UINT8_C(105), UINT8_C(115), UINT8_C( 81), UINT8_MAX, UINT8_C( 74), UINT8_C(236) }, { UINT8_C( 41), UINT8_C(205), UINT8_C(186), UINT8_C(171), UINT8_C(242), UINT8_C(251), UINT8_C(227), UINT8_C( 70) }, { UINT8_C(103), UINT8_C(105), UINT8_C( 81), UINT8_C( 74), UINT8_C( 41), UINT8_C(171), UINT8_C(242), UINT8_C( 70) } }, { { UINT8_C(124), UINT8_C(194), UINT8_C( 84), UINT8_C(248), UINT8_C( 27), UINT8_C(232), UINT8_C(231), UINT8_C(141) }, { UINT8_C(118), UINT8_C( 90), UINT8_C( 46), UINT8_C( 99), UINT8_C( 51), UINT8_C(159), UINT8_C(201), UINT8_C(154) }, { UINT8_C(124), UINT8_C( 84), UINT8_C( 27), UINT8_C(141), UINT8_C( 90), UINT8_C( 46), UINT8_C( 51), UINT8_C(154) } }, { { UINT8_C(102), UINT8_C( 50), UINT8_C( 13), UINT8_C(183), UINT8_C( 49), UINT8_C( 88), UINT8_C(163), UINT8_C( 90) }, { UINT8_C( 37), UINT8_C( 93), UINT8_C( 5), UINT8_C( 23), UINT8_C( 88), UINT8_C(233), UINT8_C( 94), UINT8_C(212) }, { UINT8_C( 50), UINT8_C( 13), UINT8_C( 49), UINT8_C( 90), UINT8_C( 37), UINT8_C( 5), UINT8_C( 88), UINT8_C( 94) } }, { { UINT8_C(171), UINT8_C(178), UINT8_C(205), UINT8_C(198), UINT8_C(155), UINT8_C(180), UINT8_C( 84), UINT8_C( 17) }, { UINT8_C( 14), UINT8_C(130), UINT8_C(116), UINT8_C( 65), UINT8_C( 33), UINT8_C( 61), UINT8_C(220), UINT8_C(135) }, { UINT8_C(171), UINT8_C(198), UINT8_C(155), UINT8_C( 17), UINT8_C( 14), UINT8_C( 65), UINT8_C( 33), UINT8_C(135) } }, { { UINT8_C(112), UINT8_C(233), UINT8_C( 62), UINT8_C(161), UINT8_C( 65), UINT8_C(225), UINT8_C(252), UINT8_C(103) }, { UINT8_C( 62), UINT8_C( 1), UINT8_C(126), UINT8_C(151), UINT8_C(234), UINT8_C(220), UINT8_C(107), UINT8_C(150) }, { UINT8_C(112), UINT8_C( 62), UINT8_C( 65), UINT8_C(103), UINT8_C( 1), UINT8_C(126), UINT8_C(220), UINT8_C(107) } }, { { UINT8_C(143), UINT8_C( 56), UINT8_C( 92), UINT8_C( 42), UINT8_C(236), UINT8_C(176), UINT8_C( 59), UINT8_C(251) }, { UINT8_C( 50), UINT8_C(175), UINT8_C( 60), UINT8_C( 84), UINT8_C(236), UINT8_C( 24), UINT8_C(219), UINT8_C( 92) }, { UINT8_C( 56), UINT8_C( 42), UINT8_C(176), UINT8_C( 59), UINT8_C( 50), UINT8_C( 60), UINT8_C( 24), UINT8_C( 92) } }, { { UINT8_C( 2), UINT8_C( 26), UINT8_C(254), UINT8_C( 67), UINT8_C(251), UINT8_C(250), UINT8_C(170), UINT8_C( 58) }, { UINT8_C(251), UINT8_C( 41), UINT8_C(209), UINT8_C(230), UINT8_C( 5), UINT8_C( 60), UINT8_C(124), UINT8_C(148) }, { UINT8_C( 2), UINT8_C( 67), UINT8_C(250), UINT8_C( 58), UINT8_C( 41), UINT8_C(209), UINT8_C( 5), UINT8_C(124) } }, { { UINT8_C(117), UINT8_C(216), UINT8_C(190), UINT8_C( 97), UINT8_C(137), UINT8_C(249), UINT8_C( 92), UINT8_C(187) }, { UINT8_C(168), UINT8_C(153), UINT8_C( 15), UINT8_C(149), UINT8_C(177), UINT8_C(235), UINT8_C(241), UINT8_C(179) }, { UINT8_C(117), UINT8_C( 97), UINT8_C(137), UINT8_C( 92), UINT8_C(153), UINT8_C( 15), UINT8_C(177), UINT8_C(179) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vpmin_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; } static int test_simde_vpmin_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(50791), UINT16_C(29545), UINT16_C(65361), UINT16_C(60490) }, { UINT16_C(52521), UINT16_C(43962), UINT16_C(64498), UINT16_C(18147) }, { UINT16_C(29545), UINT16_C(60490), UINT16_C(43962), UINT16_C(18147) } }, { { UINT16_C(49788), UINT16_C(63572), UINT16_C(59419), UINT16_C(36327) }, { UINT16_C(23158), UINT16_C(25390), UINT16_C(40755), UINT16_C(39625) }, { UINT16_C(49788), UINT16_C(36327), UINT16_C(23158), UINT16_C(39625) } }, { { UINT16_C(12902), UINT16_C(46861), UINT16_C(22577), UINT16_C(23203) }, { UINT16_C(23845), UINT16_C( 5893), UINT16_C(59736), UINT16_C(54366) }, { UINT16_C(12902), UINT16_C(22577), UINT16_C( 5893), UINT16_C(54366) } }, { { UINT16_C(45739), UINT16_C(50893), UINT16_C(46235), UINT16_C( 4436) }, { UINT16_C(33294), UINT16_C(16756), UINT16_C(15649), UINT16_C(34780) }, { UINT16_C(45739), UINT16_C( 4436), UINT16_C(16756), UINT16_C(15649) } }, { { UINT16_C(59760), UINT16_C(41278), UINT16_C(57665), UINT16_C(26620) }, { UINT16_C( 318), UINT16_C(38782), UINT16_C(56554), UINT16_C(38507) }, { UINT16_C(41278), UINT16_C(26620), UINT16_C( 318), UINT16_C(38507) } }, { { UINT16_C(14479), UINT16_C(10844), UINT16_C(45292), UINT16_C(64315) }, { UINT16_C(44850), UINT16_C(21564), UINT16_C( 6380), UINT16_C(23771) }, { UINT16_C(10844), UINT16_C(45292), UINT16_C(21564), UINT16_C( 6380) } }, { { UINT16_C( 6658), UINT16_C(17406), UINT16_C(64251), UINT16_C(15018) }, { UINT16_C(10747), UINT16_C(59089), UINT16_C(15365), UINT16_C(38012) }, { UINT16_C( 6658), UINT16_C(15018), UINT16_C(10747), UINT16_C(15365) } }, { { UINT16_C(55413), UINT16_C(25022), UINT16_C(63881), UINT16_C(47964) }, { UINT16_C(39336), UINT16_C(38159), UINT16_C(60337), UINT16_C(46065) }, { UINT16_C(25022), UINT16_C(47964), UINT16_C(38159), UINT16_C(46065) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vpmin_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; } static int test_simde_vpmin_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C(1936311911), UINT32_C(3964338001) }, { UINT32_C(2881146153), UINT32_C(1189346290) }, { UINT32_C(1936311911), UINT32_C(1189346290) } }, { { UINT32_C(4166304380), UINT32_C(2380785691) }, { UINT32_C(1663982198), UINT32_C(2596904755) }, { UINT32_C(2380785691), UINT32_C(1663982198) } }, { { UINT32_C(3071095398), UINT32_C(1520654385) }, { UINT32_C( 386227493), UINT32_C(3562989912) }, { UINT32_C(1520654385), UINT32_C( 386227493) } }, { { UINT32_C(3335369387), UINT32_C( 290763931) }, { UINT32_C(1098154510), UINT32_C(2279357729) }, { UINT32_C( 290763931), UINT32_C(1098154510) } }, { { UINT32_C(2705254768), UINT32_C(1744625985) }, { UINT32_C(2541617470), UINT32_C(2523651306) }, { UINT32_C(1744625985), UINT32_C(2523651306) } }, { { UINT32_C( 710686863), UINT32_C(4214993132) }, { UINT32_C(1413263154), UINT32_C(1557862636) }, { UINT32_C( 710686863), UINT32_C(1413263154) } }, { { UINT32_C(1140726274), UINT32_C( 984283899) }, { UINT32_C(3872467451), UINT32_C(2491169797) }, { UINT32_C( 984283899), UINT32_C(2491169797) } }, { { UINT32_C(1639897205), UINT32_C(3143432585) }, { UINT32_C(2500827560), UINT32_C(3018976177) }, { UINT32_C(1639897205), UINT32_C(2500827560) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vpmin_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; } static int test_simde_vpminq_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; simde_float32 b[4]; simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 680.38), SIMDE_FLOAT32_C( -211.23), SIMDE_FLOAT32_C( 566.20), SIMDE_FLOAT32_C( 596.88) }, { SIMDE_FLOAT32_C( 823.29), SIMDE_FLOAT32_C( -604.90), SIMDE_FLOAT32_C( -329.55), SIMDE_FLOAT32_C( 536.46) }, { SIMDE_FLOAT32_C( -211.23), SIMDE_FLOAT32_C( 566.20), SIMDE_FLOAT32_C( -604.90), SIMDE_FLOAT32_C( -329.55) } }, { { SIMDE_FLOAT32_C( -444.45), SIMDE_FLOAT32_C( 107.94), SIMDE_FLOAT32_C( -45.21), SIMDE_FLOAT32_C( 257.74) }, { SIMDE_FLOAT32_C( -270.43), SIMDE_FLOAT32_C( 26.80), SIMDE_FLOAT32_C( 904.46), SIMDE_FLOAT32_C( 832.39) }, { SIMDE_FLOAT32_C( -444.45), SIMDE_FLOAT32_C( -45.21), SIMDE_FLOAT32_C( -270.43), SIMDE_FLOAT32_C( 832.39) } }, { { SIMDE_FLOAT32_C( 271.42), SIMDE_FLOAT32_C( 434.59), SIMDE_FLOAT32_C( -716.79), SIMDE_FLOAT32_C( 213.94) }, { SIMDE_FLOAT32_C( -967.40), SIMDE_FLOAT32_C( -514.23), SIMDE_FLOAT32_C( -725.54), SIMDE_FLOAT32_C( 608.35) }, { SIMDE_FLOAT32_C( 271.42), SIMDE_FLOAT32_C( -716.79), SIMDE_FLOAT32_C( -967.40), SIMDE_FLOAT32_C( -725.54) } }, { { SIMDE_FLOAT32_C( -686.64), SIMDE_FLOAT32_C( -198.11), SIMDE_FLOAT32_C( -740.42), SIMDE_FLOAT32_C( -782.38) }, { SIMDE_FLOAT32_C( 997.85), SIMDE_FLOAT32_C( -563.49), SIMDE_FLOAT32_C( 25.86), SIMDE_FLOAT32_C( 678.22) }, { SIMDE_FLOAT32_C( -686.64), SIMDE_FLOAT32_C( -782.38), SIMDE_FLOAT32_C( -563.49), SIMDE_FLOAT32_C( 25.86) } }, { { SIMDE_FLOAT32_C( 225.28), SIMDE_FLOAT32_C( -407.94), SIMDE_FLOAT32_C( 275.10), SIMDE_FLOAT32_C( 48.57) }, { SIMDE_FLOAT32_C( -12.83), SIMDE_FLOAT32_C( 945.55), SIMDE_FLOAT32_C( -414.97), SIMDE_FLOAT32_C( 542.72) }, { SIMDE_FLOAT32_C( -407.94), SIMDE_FLOAT32_C( 48.57), SIMDE_FLOAT32_C( -12.83), SIMDE_FLOAT32_C( -414.97) } }, { { SIMDE_FLOAT32_C( 53.49), SIMDE_FLOAT32_C( 539.83), SIMDE_FLOAT32_C( -199.54), SIMDE_FLOAT32_C( 783.06) }, { SIMDE_FLOAT32_C( -433.37), SIMDE_FLOAT32_C( -295.08), SIMDE_FLOAT32_C( 615.45), SIMDE_FLOAT32_C( 838.05) }, { SIMDE_FLOAT32_C( 53.49), SIMDE_FLOAT32_C( -199.54), SIMDE_FLOAT32_C( -433.37), SIMDE_FLOAT32_C( 615.45) } }, { { SIMDE_FLOAT32_C( -860.49), SIMDE_FLOAT32_C( 898.65), SIMDE_FLOAT32_C( 51.99), SIMDE_FLOAT32_C( -827.89) }, { SIMDE_FLOAT32_C( -615.57), SIMDE_FLOAT32_C( 326.45), SIMDE_FLOAT32_C( 780.47), SIMDE_FLOAT32_C( -302.21) }, { SIMDE_FLOAT32_C( -860.49), SIMDE_FLOAT32_C( -827.89), SIMDE_FLOAT32_C( -615.57), SIMDE_FLOAT32_C( -302.21) } }, { { SIMDE_FLOAT32_C( -871.66), SIMDE_FLOAT32_C( -959.95), SIMDE_FLOAT32_C( -84.60), SIMDE_FLOAT32_C( -873.81) }, { SIMDE_FLOAT32_C( -523.44), SIMDE_FLOAT32_C( 941.27), SIMDE_FLOAT32_C( 804.42), SIMDE_FLOAT32_C( 701.84) }, { SIMDE_FLOAT32_C( -959.95), SIMDE_FLOAT32_C( -873.81), SIMDE_FLOAT32_C( -523.44), SIMDE_FLOAT32_C( 701.84) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r = simde_vpminq_f32(a, b); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vpminq_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[2]; simde_float64 b[2]; simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 680.38), SIMDE_FLOAT64_C( -211.23) }, { SIMDE_FLOAT64_C( 566.20), SIMDE_FLOAT64_C( 596.88) }, { SIMDE_FLOAT64_C( -211.23), SIMDE_FLOAT64_C( 566.20) } }, { { SIMDE_FLOAT64_C( 823.29), SIMDE_FLOAT64_C( -604.90) }, { SIMDE_FLOAT64_C( -329.55), SIMDE_FLOAT64_C( 536.46) }, { SIMDE_FLOAT64_C( -604.90), SIMDE_FLOAT64_C( -329.55) } }, { { SIMDE_FLOAT64_C( -444.45), SIMDE_FLOAT64_C( 107.94) }, { SIMDE_FLOAT64_C( -45.21), SIMDE_FLOAT64_C( 257.74) }, { SIMDE_FLOAT64_C( -444.45), SIMDE_FLOAT64_C( -45.21) } }, { { SIMDE_FLOAT64_C( -270.43), SIMDE_FLOAT64_C( 26.80) }, { SIMDE_FLOAT64_C( 904.46), SIMDE_FLOAT64_C( 832.39) }, { SIMDE_FLOAT64_C( -270.43), SIMDE_FLOAT64_C( 832.39) } }, { { SIMDE_FLOAT64_C( 271.42), SIMDE_FLOAT64_C( 434.59) }, { SIMDE_FLOAT64_C( -716.79), SIMDE_FLOAT64_C( 213.94) }, { SIMDE_FLOAT64_C( 271.42), SIMDE_FLOAT64_C( -716.79) } }, { { SIMDE_FLOAT64_C( -967.40), SIMDE_FLOAT64_C( -514.23) }, { SIMDE_FLOAT64_C( -725.54), SIMDE_FLOAT64_C( 608.35) }, { SIMDE_FLOAT64_C( -967.40), SIMDE_FLOAT64_C( -725.54) } }, { { SIMDE_FLOAT64_C( -686.64), SIMDE_FLOAT64_C( -198.11) }, { SIMDE_FLOAT64_C( -740.42), SIMDE_FLOAT64_C( -782.38) }, { SIMDE_FLOAT64_C( -686.64), SIMDE_FLOAT64_C( -782.38) } }, { { SIMDE_FLOAT64_C( 997.85), SIMDE_FLOAT64_C( -563.49) }, { SIMDE_FLOAT64_C( 25.86), SIMDE_FLOAT64_C( 678.22) }, { SIMDE_FLOAT64_C( -563.49), SIMDE_FLOAT64_C( 25.86) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_float64x2_t r = simde_vpminq_f64(a, b); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; } static int test_simde_vpminq_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int8_t b[16]; int8_t r[16]; } test_vec[] = { { { INT8_C( 103), -INT8_C( 58), INT8_C( 105), INT8_C( 115), INT8_C( 81), -INT8_C( 1), INT8_C( 74), -INT8_C( 20), INT8_C( 41), -INT8_C( 51), -INT8_C( 70), -INT8_C( 85), -INT8_C( 14), -INT8_C( 5), -INT8_C( 29), INT8_C( 70) }, { INT8_C( 124), -INT8_C( 62), INT8_C( 84), -INT8_C( 8), INT8_C( 27), -INT8_C( 24), -INT8_C( 25), -INT8_C( 115), INT8_C( 118), INT8_C( 90), INT8_C( 46), INT8_C( 99), INT8_C( 51), -INT8_C( 97), -INT8_C( 55), -INT8_C( 102) }, { -INT8_C( 58), INT8_C( 105), -INT8_C( 1), -INT8_C( 20), -INT8_C( 51), -INT8_C( 85), -INT8_C( 14), -INT8_C( 29), -INT8_C( 62), -INT8_C( 8), -INT8_C( 24), -INT8_C( 115), INT8_C( 90), INT8_C( 46), -INT8_C( 97), -INT8_C( 102) } }, { { INT8_C( 102), INT8_C( 50), INT8_C( 13), -INT8_C( 73), INT8_C( 49), INT8_C( 88), -INT8_C( 93), INT8_C( 90), INT8_C( 37), INT8_C( 93), INT8_C( 5), INT8_C( 23), INT8_C( 88), -INT8_C( 23), INT8_C( 94), -INT8_C( 44) }, { -INT8_C( 85), -INT8_C( 78), -INT8_C( 51), -INT8_C( 58), -INT8_C( 101), -INT8_C( 76), INT8_C( 84), INT8_C( 17), INT8_C( 14), -INT8_C( 126), INT8_C( 116), INT8_C( 65), INT8_C( 33), INT8_C( 61), -INT8_C( 36), -INT8_C( 121) }, { INT8_C( 50), -INT8_C( 73), INT8_C( 49), -INT8_C( 93), INT8_C( 37), INT8_C( 5), -INT8_C( 23), -INT8_C( 44), -INT8_C( 85), -INT8_C( 58), -INT8_C( 101), INT8_C( 17), -INT8_C( 126), INT8_C( 65), INT8_C( 33), -INT8_C( 121) } }, { { INT8_C( 112), -INT8_C( 23), INT8_C( 62), -INT8_C( 95), INT8_C( 65), -INT8_C( 31), -INT8_C( 4), INT8_C( 103), INT8_C( 62), INT8_C( 1), INT8_C( 126), -INT8_C( 105), -INT8_C( 22), -INT8_C( 36), INT8_C( 107), -INT8_C( 106) }, { -INT8_C( 113), INT8_C( 56), INT8_C( 92), INT8_C( 42), -INT8_C( 20), -INT8_C( 80), INT8_C( 59), -INT8_C( 5), INT8_C( 50), -INT8_C( 81), INT8_C( 60), INT8_C( 84), -INT8_C( 20), INT8_C( 24), -INT8_C( 37), INT8_C( 92) }, { -INT8_C( 23), -INT8_C( 95), -INT8_C( 31), -INT8_C( 4), INT8_C( 1), -INT8_C( 105), -INT8_C( 36), -INT8_C( 106), -INT8_C( 113), INT8_C( 42), -INT8_C( 80), -INT8_C( 5), -INT8_C( 81), INT8_C( 60), -INT8_C( 20), -INT8_C( 37) } }, { { INT8_C( 2), INT8_C( 26), -INT8_C( 2), INT8_C( 67), -INT8_C( 5), -INT8_C( 6), -INT8_C( 86), INT8_C( 58), -INT8_C( 5), INT8_C( 41), -INT8_C( 47), -INT8_C( 26), INT8_C( 5), INT8_C( 60), INT8_C( 124), -INT8_C( 108) }, { INT8_C( 117), -INT8_C( 40), -INT8_C( 66), INT8_C( 97), -INT8_C( 119), -INT8_C( 7), INT8_C( 92), -INT8_C( 69), -INT8_C( 88), -INT8_C( 103), INT8_C( 15), -INT8_C( 107), -INT8_C( 79), -INT8_C( 21), -INT8_C( 15), -INT8_C( 77) }, { INT8_C( 2), -INT8_C( 2), -INT8_C( 6), -INT8_C( 86), -INT8_C( 5), -INT8_C( 47), INT8_C( 5), -INT8_C( 108), -INT8_C( 40), -INT8_C( 66), -INT8_C( 119), -INT8_C( 69), -INT8_C( 103), -INT8_C( 107), -INT8_C( 79), -INT8_C( 77) } }, { { INT8_C( 5), -INT8_C( 17), -INT8_C( 9), INT8_C( 0), -INT8_C( 23), -INT8_C( 95), INT8_C( 58), -INT8_C( 27), -INT8_C( 54), INT8_C( 11), -INT8_C( 53), -INT8_C( 48), INT8_C( 72), INT8_C( 71), INT8_C( 100), -INT8_C( 67) }, { INT8_C( 31), INT8_C( 35), INT8_C( 30), -INT8_C( 88), INT8_C( 28), INT8_C( 123), INT8_C( 100), -INT8_C( 59), INT8_C( 20), INT8_C( 115), INT8_C( 90), -INT8_C( 59), INT8_C( 94), INT8_C( 75), INT8_C( 121), INT8_C( 99) }, { -INT8_C( 17), -INT8_C( 9), -INT8_C( 95), -INT8_C( 27), -INT8_C( 54), -INT8_C( 53), INT8_C( 71), -INT8_C( 67), INT8_C( 31), -INT8_C( 88), INT8_C( 28), -INT8_C( 59), INT8_C( 20), -INT8_C( 59), INT8_C( 75), INT8_C( 99) } }, { { INT8_C( 59), INT8_C( 112), INT8_C( 100), INT8_C( 36), INT8_C( 17), -INT8_C( 98), INT8_C( 9), -INT8_C( 36), -INT8_C( 86), -INT8_C( 44), -INT8_C( 84), -INT8_C( 14), INT8_C( 27), INT8_C( 16), -INT8_C( 81), INT8_C( 59) }, { INT8_C( 51), -INT8_C( 51), -INT8_C( 29), INT8_C( 80), INT8_C( 72), INT8_C( 71), INT8_C( 21), INT8_C( 92), -INT8_C( 69), INT8_C( 111), INT8_C( 34), INT8_C( 25), -INT8_C( 70), -INT8_C( 101), INT8_C( 125), -INT8_C( 11) }, { INT8_C( 59), INT8_C( 36), -INT8_C( 98), -INT8_C( 36), -INT8_C( 86), -INT8_C( 84), INT8_C( 16), -INT8_C( 81), -INT8_C( 51), -INT8_C( 29), INT8_C( 71), INT8_C( 21), -INT8_C( 69), INT8_C( 25), -INT8_C( 101), -INT8_C( 11) } }, { { INT8_C( 11), -INT8_C( 31), INT8_C( 26), INT8_C( 28), INT8_MAX, INT8_C( 35), -INT8_C( 8), INT8_C( 41), -INT8_C( 8), -INT8_C( 92), INT8_C( 27), INT8_C( 19), -INT8_C( 75), -INT8_C( 54), INT8_C( 78), -INT8_C( 24) }, { -INT8_C( 104), INT8_C( 50), INT8_C( 56), -INT8_C( 32), INT8_C( 121), INT8_C( 77), INT8_C( 61), INT8_C( 52), -INT8_C( 68), INT8_C( 95), INT8_C( 78), INT8_C( 119), -INT8_C( 6), -INT8_C( 53), INT8_C( 108), INT8_C( 5) }, { -INT8_C( 31), INT8_C( 26), INT8_C( 35), -INT8_C( 8), -INT8_C( 92), INT8_C( 19), -INT8_C( 75), -INT8_C( 24), -INT8_C( 104), -INT8_C( 32), INT8_C( 77), INT8_C( 52), -INT8_C( 68), INT8_C( 78), -INT8_C( 53), INT8_C( 5) } }, { { -INT8_C( 84), -INT8_C( 122), INT8_C( 33), INT8_C( 43), -INT8_C( 86), INT8_C( 26), INT8_C( 85), -INT8_C( 94), -INT8_C( 66), INT8_C( 112), -INT8_C( 75), INT8_C( 115), INT8_C( 59), INT8_C( 4), INT8_C( 92), -INT8_C( 45) }, { INT8_C( 54), -INT8_C( 108), -INT8_C( 77), -INT8_C( 81), -INT8_C( 30), -INT8_C( 16), -INT8_C( 28), -INT8_C( 98), INT8_C( 79), INT8_C( 50), INT8_C( 21), INT8_C( 73), -INT8_C( 3), -INT8_C( 126), INT8_C( 78), -INT8_C( 87) }, { -INT8_C( 122), INT8_C( 33), -INT8_C( 86), -INT8_C( 94), -INT8_C( 66), -INT8_C( 75), INT8_C( 4), -INT8_C( 45), -INT8_C( 108), -INT8_C( 81), -INT8_C( 30), -INT8_C( 98), INT8_C( 50), INT8_C( 21), -INT8_C( 126), -INT8_C( 87) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r = simde_vpminq_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; } static int test_simde_vpminq_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { -INT16_C( 14745), INT16_C( 29545), -INT16_C( 175), -INT16_C( 5046), -INT16_C( 13015), -INT16_C( 21574), -INT16_C( 1038), INT16_C( 18147) }, { -INT16_C( 15748), -INT16_C( 1964), -INT16_C( 6117), -INT16_C( 29209), INT16_C( 23158), INT16_C( 25390), -INT16_C( 24781), -INT16_C( 25911) }, { -INT16_C( 14745), -INT16_C( 5046), -INT16_C( 21574), -INT16_C( 1038), -INT16_C( 15748), -INT16_C( 29209), INT16_C( 23158), -INT16_C( 25911) } }, { { INT16_C( 12902), -INT16_C( 18675), INT16_C( 22577), INT16_C( 23203), INT16_C( 23845), INT16_C( 5893), -INT16_C( 5800), -INT16_C( 11170) }, { -INT16_C( 19797), -INT16_C( 14643), -INT16_C( 19301), INT16_C( 4436), -INT16_C( 32242), INT16_C( 16756), INT16_C( 15649), -INT16_C( 30756) }, { -INT16_C( 18675), INT16_C( 22577), INT16_C( 5893), -INT16_C( 11170), -INT16_C( 19797), -INT16_C( 19301), -INT16_C( 32242), -INT16_C( 30756) } }, { { -INT16_C( 5776), -INT16_C( 24258), -INT16_C( 7871), INT16_C( 26620), INT16_C( 318), -INT16_C( 26754), -INT16_C( 8982), -INT16_C( 27029) }, { INT16_C( 14479), INT16_C( 10844), -INT16_C( 20244), -INT16_C( 1221), -INT16_C( 20686), INT16_C( 21564), INT16_C( 6380), INT16_C( 23771) }, { -INT16_C( 24258), -INT16_C( 7871), -INT16_C( 26754), -INT16_C( 27029), INT16_C( 10844), -INT16_C( 20244), -INT16_C( 20686), INT16_C( 6380) } }, { { INT16_C( 6658), INT16_C( 17406), -INT16_C( 1285), INT16_C( 15018), INT16_C( 10747), -INT16_C( 6447), INT16_C( 15365), -INT16_C( 27524) }, { -INT16_C( 10123), INT16_C( 25022), -INT16_C( 1655), -INT16_C( 17572), -INT16_C( 26200), -INT16_C( 27377), -INT16_C( 5199), -INT16_C( 19471) }, { INT16_C( 6658), -INT16_C( 1285), -INT16_C( 6447), -INT16_C( 27524), -INT16_C( 10123), -INT16_C( 17572), -INT16_C( 27377), -INT16_C( 19471) } }, { { -INT16_C( 4347), INT16_C( 247), -INT16_C( 24087), -INT16_C( 6854), INT16_C( 3018), -INT16_C( 12085), INT16_C( 18248), -INT16_C( 17052) }, { INT16_C( 8991), -INT16_C( 22498), INT16_C( 31516), -INT16_C( 15004), INT16_C( 29460), -INT16_C( 15014), INT16_C( 19294), INT16_C( 25465) }, { -INT16_C( 4347), -INT16_C( 24087), -INT16_C( 12085), -INT16_C( 17052), -INT16_C( 22498), -INT16_C( 15004), -INT16_C( 15014), INT16_C( 19294) } }, { { INT16_C( 28731), INT16_C( 9316), -INT16_C( 25071), -INT16_C( 9207), -INT16_C( 11094), -INT16_C( 3412), INT16_C( 4123), INT16_C( 15279) }, { -INT16_C( 13005), INT16_C( 20707), INT16_C( 18248), INT16_C( 23573), INT16_C( 28603), INT16_C( 6434), -INT16_C( 25670), -INT16_C( 2691) }, { INT16_C( 9316), -INT16_C( 25071), -INT16_C( 11094), INT16_C( 4123), -INT16_C( 13005), INT16_C( 18248), INT16_C( 6434), -INT16_C( 25670) } }, { { -INT16_C( 7925), INT16_C( 7194), INT16_C( 9087), INT16_C( 10744), -INT16_C( 23304), INT16_C( 4891), -INT16_C( 13643), -INT16_C( 6066) }, { INT16_C( 12952), -INT16_C( 8136), INT16_C( 19833), INT16_C( 13373), INT16_C( 24508), INT16_C( 30542), -INT16_C( 13318), INT16_C( 1388) }, { -INT16_C( 7925), INT16_C( 9087), -INT16_C( 23304), -INT16_C( 13643), -INT16_C( 8136), INT16_C( 13373), INT16_C( 24508), -INT16_C( 13318) } }, { { -INT16_C( 31060), INT16_C( 11041), INT16_C( 6826), -INT16_C( 23979), INT16_C( 28862), INT16_C( 29621), INT16_C( 1083), -INT16_C( 11428) }, { -INT16_C( 27594), -INT16_C( 20557), -INT16_C( 3870), -INT16_C( 24860), INT16_C( 12879), INT16_C( 18709), -INT16_C( 32003), -INT16_C( 22194) }, { -INT16_C( 31060), -INT16_C( 23979), INT16_C( 28862), -INT16_C( 11428), -INT16_C( 27594), -INT16_C( 24860), INT16_C( 12879), -INT16_C( 32003) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_vpminq_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; } static int test_simde_vpminq_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { INT32_C( 1936311911), -INT32_C( 330629295), -INT32_C( 1413821143), INT32_C( 1189346290) }, { -INT32_C( 128662916), -INT32_C( 1914181605), INT32_C( 1663982198), -INT32_C( 1698062541) }, { -INT32_C( 330629295), -INT32_C( 1413821143), -INT32_C( 1914181605), -INT32_C( 1698062541) } }, { { -INT32_C( 1223871898), INT32_C( 1520654385), INT32_C( 386227493), -INT32_C( 731977384) }, { -INT32_C( 959597909), INT32_C( 290763931), INT32_C( 1098154510), -INT32_C( 2015609567) }, { -INT32_C( 1223871898), -INT32_C( 731977384), -INT32_C( 959597909), -INT32_C( 2015609567) } }, { { -INT32_C( 1589712528), INT32_C( 1744625985), -INT32_C( 1753349826), -INT32_C( 1771315990) }, { INT32_C( 710686863), -INT32_C( 79974164), INT32_C( 1413263154), INT32_C( 1557862636) }, { -INT32_C( 1589712528), -INT32_C( 1771315990), -INT32_C( 79974164), INT32_C( 1413263154) } }, { { INT32_C( 1140726274), INT32_C( 984283899), -INT32_C( 422499845), -INT32_C( 1803797499) }, { INT32_C( 1639897205), -INT32_C( 1151534711), -INT32_C( 1794139736), -INT32_C( 1275991119) }, { INT32_C( 984283899), -INT32_C( 1803797499), -INT32_C( 1151534711), -INT32_C( 1794139736) } }, { { INT32_C( 16248581), -INT32_C( 449142295), -INT32_C( 791999542), -INT32_C( 1117501624) }, { -INT32_C( 1474419937), -INT32_C( 983270628), -INT32_C( 983928044), INT32_C( 1668893534) }, { -INT32_C( 449142295), -INT32_C( 1117501624), -INT32_C( 1474419937), -INT32_C( 983928044) } }, { { INT32_C( 610562107), -INT32_C( 603349487), -INT32_C( 223554390), INT32_C( 1001328667) }, { INT32_C( 1357106483), INT32_C( 1544898376), INT32_C( 421687227), -INT32_C( 176317510) }, { -INT32_C( 603349487), -INT32_C( 223554390), INT32_C( 1357106483), -INT32_C( 176317510) } }, { { INT32_C( 471523595), INT32_C( 704127871), INT32_C( 320578808), -INT32_C( 397489483) }, { -INT32_C( 533187944), INT32_C( 876432761), INT32_C( 2001625020), INT32_C( 91016186) }, { INT32_C( 471523595), -INT32_C( 397489483), -INT32_C( 533187944), INT32_C( 91016186) } }, { { INT32_C( 723617452), -INT32_C( 1571480918), INT32_C( 1941270718), -INT32_C( 748944325) }, { -INT32_C( 1347185610), -INT32_C( 1629163294), INT32_C( 1226125903), -INT32_C( 1454472451) }, { -INT32_C( 1571480918), -INT32_C( 748944325), -INT32_C( 1629163294), -INT32_C( 1454472451) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_vpminq_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; } static int test_simde_vpminq_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(103), UINT8_C(198), UINT8_C(105), UINT8_C(115), UINT8_C( 81), UINT8_MAX, UINT8_C( 74), UINT8_C(236), UINT8_C( 41), UINT8_C(205), UINT8_C(186), UINT8_C(171), UINT8_C(242), UINT8_C(251), UINT8_C(227), UINT8_C( 70) }, { UINT8_C(124), UINT8_C(194), UINT8_C( 84), UINT8_C(248), UINT8_C( 27), UINT8_C(232), UINT8_C(231), UINT8_C(141), UINT8_C(118), UINT8_C( 90), UINT8_C( 46), UINT8_C( 99), UINT8_C( 51), UINT8_C(159), UINT8_C(201), UINT8_C(154) }, { UINT8_C(103), UINT8_C(105), UINT8_C( 81), UINT8_C( 74), UINT8_C( 41), UINT8_C(171), UINT8_C(242), UINT8_C( 70), UINT8_C(124), UINT8_C( 84), UINT8_C( 27), UINT8_C(141), UINT8_C( 90), UINT8_C( 46), UINT8_C( 51), UINT8_C(154) } }, { { UINT8_C(102), UINT8_C( 50), UINT8_C( 13), UINT8_C(183), UINT8_C( 49), UINT8_C( 88), UINT8_C(163), UINT8_C( 90), UINT8_C( 37), UINT8_C( 93), UINT8_C( 5), UINT8_C( 23), UINT8_C( 88), UINT8_C(233), UINT8_C( 94), UINT8_C(212) }, { UINT8_C(171), UINT8_C(178), UINT8_C(205), UINT8_C(198), UINT8_C(155), UINT8_C(180), UINT8_C( 84), UINT8_C( 17), UINT8_C( 14), UINT8_C(130), UINT8_C(116), UINT8_C( 65), UINT8_C( 33), UINT8_C( 61), UINT8_C(220), UINT8_C(135) }, { UINT8_C( 50), UINT8_C( 13), UINT8_C( 49), UINT8_C( 90), UINT8_C( 37), UINT8_C( 5), UINT8_C( 88), UINT8_C( 94), UINT8_C(171), UINT8_C(198), UINT8_C(155), UINT8_C( 17), UINT8_C( 14), UINT8_C( 65), UINT8_C( 33), UINT8_C(135) } }, { { UINT8_C(112), UINT8_C(233), UINT8_C( 62), UINT8_C(161), UINT8_C( 65), UINT8_C(225), UINT8_C(252), UINT8_C(103), UINT8_C( 62), UINT8_C( 1), UINT8_C(126), UINT8_C(151), UINT8_C(234), UINT8_C(220), UINT8_C(107), UINT8_C(150) }, { UINT8_C(143), UINT8_C( 56), UINT8_C( 92), UINT8_C( 42), UINT8_C(236), UINT8_C(176), UINT8_C( 59), UINT8_C(251), UINT8_C( 50), UINT8_C(175), UINT8_C( 60), UINT8_C( 84), UINT8_C(236), UINT8_C( 24), UINT8_C(219), UINT8_C( 92) }, { UINT8_C(112), UINT8_C( 62), UINT8_C( 65), UINT8_C(103), UINT8_C( 1), UINT8_C(126), UINT8_C(220), UINT8_C(107), UINT8_C( 56), UINT8_C( 42), UINT8_C(176), UINT8_C( 59), UINT8_C( 50), UINT8_C( 60), UINT8_C( 24), UINT8_C( 92) } }, { { UINT8_C( 2), UINT8_C( 26), UINT8_C(254), UINT8_C( 67), UINT8_C(251), UINT8_C(250), UINT8_C(170), UINT8_C( 58), UINT8_C(251), UINT8_C( 41), UINT8_C(209), UINT8_C(230), UINT8_C( 5), UINT8_C( 60), UINT8_C(124), UINT8_C(148) }, { UINT8_C(117), UINT8_C(216), UINT8_C(190), UINT8_C( 97), UINT8_C(137), UINT8_C(249), UINT8_C( 92), UINT8_C(187), UINT8_C(168), UINT8_C(153), UINT8_C( 15), UINT8_C(149), UINT8_C(177), UINT8_C(235), UINT8_C(241), UINT8_C(179) }, { UINT8_C( 2), UINT8_C( 67), UINT8_C(250), UINT8_C( 58), UINT8_C( 41), UINT8_C(209), UINT8_C( 5), UINT8_C(124), UINT8_C(117), UINT8_C( 97), UINT8_C(137), UINT8_C( 92), UINT8_C(153), UINT8_C( 15), UINT8_C(177), UINT8_C(179) } }, { { UINT8_C( 5), UINT8_C(239), UINT8_C(247), UINT8_C( 0), UINT8_C(233), UINT8_C(161), UINT8_C( 58), UINT8_C(229), UINT8_C(202), UINT8_C( 11), UINT8_C(203), UINT8_C(208), UINT8_C( 72), UINT8_C( 71), UINT8_C(100), UINT8_C(189) }, { UINT8_C( 31), UINT8_C( 35), UINT8_C( 30), UINT8_C(168), UINT8_C( 28), UINT8_C(123), UINT8_C(100), UINT8_C(197), UINT8_C( 20), UINT8_C(115), UINT8_C( 90), UINT8_C(197), UINT8_C( 94), UINT8_C( 75), UINT8_C(121), UINT8_C( 99) }, { UINT8_C( 5), UINT8_C( 0), UINT8_C(161), UINT8_C( 58), UINT8_C( 11), UINT8_C(203), UINT8_C( 71), UINT8_C(100), UINT8_C( 31), UINT8_C( 30), UINT8_C( 28), UINT8_C(100), UINT8_C( 20), UINT8_C( 90), UINT8_C( 75), UINT8_C( 99) } }, { { UINT8_C( 59), UINT8_C(112), UINT8_C(100), UINT8_C( 36), UINT8_C( 17), UINT8_C(158), UINT8_C( 9), UINT8_C(220), UINT8_C(170), UINT8_C(212), UINT8_C(172), UINT8_C(242), UINT8_C( 27), UINT8_C( 16), UINT8_C(175), UINT8_C( 59) }, { UINT8_C( 51), UINT8_C(205), UINT8_C(227), UINT8_C( 80), UINT8_C( 72), UINT8_C( 71), UINT8_C( 21), UINT8_C( 92), UINT8_C(187), UINT8_C(111), UINT8_C( 34), UINT8_C( 25), UINT8_C(186), UINT8_C(155), UINT8_C(125), UINT8_C(245) }, { UINT8_C( 59), UINT8_C( 36), UINT8_C( 17), UINT8_C( 9), UINT8_C(170), UINT8_C(172), UINT8_C( 16), UINT8_C( 59), UINT8_C( 51), UINT8_C( 80), UINT8_C( 71), UINT8_C( 21), UINT8_C(111), UINT8_C( 25), UINT8_C(155), UINT8_C(125) } }, { { UINT8_C( 11), UINT8_C(225), UINT8_C( 26), UINT8_C( 28), UINT8_C(127), UINT8_C( 35), UINT8_C(248), UINT8_C( 41), UINT8_C(248), UINT8_C(164), UINT8_C( 27), UINT8_C( 19), UINT8_C(181), UINT8_C(202), UINT8_C( 78), UINT8_C(232) }, { UINT8_C(152), UINT8_C( 50), UINT8_C( 56), UINT8_C(224), UINT8_C(121), UINT8_C( 77), UINT8_C( 61), UINT8_C( 52), UINT8_C(188), UINT8_C( 95), UINT8_C( 78), UINT8_C(119), UINT8_C(250), UINT8_C(203), UINT8_C(108), UINT8_C( 5) }, { UINT8_C( 11), UINT8_C( 26), UINT8_C( 35), UINT8_C( 41), UINT8_C(164), UINT8_C( 19), UINT8_C(181), UINT8_C( 78), UINT8_C( 50), UINT8_C( 56), UINT8_C( 77), UINT8_C( 52), UINT8_C( 95), UINT8_C( 78), UINT8_C(203), UINT8_C( 5) } }, { { UINT8_C(172), UINT8_C(134), UINT8_C( 33), UINT8_C( 43), UINT8_C(170), UINT8_C( 26), UINT8_C( 85), UINT8_C(162), UINT8_C(190), UINT8_C(112), UINT8_C(181), UINT8_C(115), UINT8_C( 59), UINT8_C( 4), UINT8_C( 92), UINT8_C(211) }, { UINT8_C( 54), UINT8_C(148), UINT8_C(179), UINT8_C(175), UINT8_C(226), UINT8_C(240), UINT8_C(228), UINT8_C(158), UINT8_C( 79), UINT8_C( 50), UINT8_C( 21), UINT8_C( 73), UINT8_C(253), UINT8_C(130), UINT8_C( 78), UINT8_C(169) }, { UINT8_C(134), UINT8_C( 33), UINT8_C( 26), UINT8_C( 85), UINT8_C(112), UINT8_C(115), UINT8_C( 4), UINT8_C( 92), UINT8_C( 54), UINT8_C(175), UINT8_C(226), UINT8_C(158), UINT8_C( 50), UINT8_C( 21), UINT8_C(130), UINT8_C( 78) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vpminq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; } static int test_simde_vpminq_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(50791), UINT16_C(29545), UINT16_C(65361), UINT16_C(60490), UINT16_C(52521), UINT16_C(43962), UINT16_C(64498), UINT16_C(18147) }, { UINT16_C(49788), UINT16_C(63572), UINT16_C(59419), UINT16_C(36327), UINT16_C(23158), UINT16_C(25390), UINT16_C(40755), UINT16_C(39625) }, { UINT16_C(29545), UINT16_C(60490), UINT16_C(43962), UINT16_C(18147), UINT16_C(49788), UINT16_C(36327), UINT16_C(23158), UINT16_C(39625) } }, { { UINT16_C(12902), UINT16_C(46861), UINT16_C(22577), UINT16_C(23203), UINT16_C(23845), UINT16_C( 5893), UINT16_C(59736), UINT16_C(54366) }, { UINT16_C(45739), UINT16_C(50893), UINT16_C(46235), UINT16_C( 4436), UINT16_C(33294), UINT16_C(16756), UINT16_C(15649), UINT16_C(34780) }, { UINT16_C(12902), UINT16_C(22577), UINT16_C( 5893), UINT16_C(54366), UINT16_C(45739), UINT16_C( 4436), UINT16_C(16756), UINT16_C(15649) } }, { { UINT16_C(59760), UINT16_C(41278), UINT16_C(57665), UINT16_C(26620), UINT16_C( 318), UINT16_C(38782), UINT16_C(56554), UINT16_C(38507) }, { UINT16_C(14479), UINT16_C(10844), UINT16_C(45292), UINT16_C(64315), UINT16_C(44850), UINT16_C(21564), UINT16_C( 6380), UINT16_C(23771) }, { UINT16_C(41278), UINT16_C(26620), UINT16_C( 318), UINT16_C(38507), UINT16_C(10844), UINT16_C(45292), UINT16_C(21564), UINT16_C( 6380) } }, { { UINT16_C( 6658), UINT16_C(17406), UINT16_C(64251), UINT16_C(15018), UINT16_C(10747), UINT16_C(59089), UINT16_C(15365), UINT16_C(38012) }, { UINT16_C(55413), UINT16_C(25022), UINT16_C(63881), UINT16_C(47964), UINT16_C(39336), UINT16_C(38159), UINT16_C(60337), UINT16_C(46065) }, { UINT16_C( 6658), UINT16_C(15018), UINT16_C(10747), UINT16_C(15365), UINT16_C(25022), UINT16_C(47964), UINT16_C(38159), UINT16_C(46065) } }, { { UINT16_C(61189), UINT16_C( 247), UINT16_C(41449), UINT16_C(58682), UINT16_C( 3018), UINT16_C(53451), UINT16_C(18248), UINT16_C(48484) }, { UINT16_C( 8991), UINT16_C(43038), UINT16_C(31516), UINT16_C(50532), UINT16_C(29460), UINT16_C(50522), UINT16_C(19294), UINT16_C(25465) }, { UINT16_C( 247), UINT16_C(41449), UINT16_C( 3018), UINT16_C(18248), UINT16_C( 8991), UINT16_C(31516), UINT16_C(29460), UINT16_C(19294) } }, { { UINT16_C(28731), UINT16_C( 9316), UINT16_C(40465), UINT16_C(56329), UINT16_C(54442), UINT16_C(62124), UINT16_C( 4123), UINT16_C(15279) }, { UINT16_C(52531), UINT16_C(20707), UINT16_C(18248), UINT16_C(23573), UINT16_C(28603), UINT16_C( 6434), UINT16_C(39866), UINT16_C(62845) }, { UINT16_C( 9316), UINT16_C(40465), UINT16_C(54442), UINT16_C( 4123), UINT16_C(20707), UINT16_C(18248), UINT16_C( 6434), UINT16_C(39866) } }, { { UINT16_C(57611), UINT16_C( 7194), UINT16_C( 9087), UINT16_C(10744), UINT16_C(42232), UINT16_C( 4891), UINT16_C(51893), UINT16_C(59470) }, { UINT16_C(12952), UINT16_C(57400), UINT16_C(19833), UINT16_C(13373), UINT16_C(24508), UINT16_C(30542), UINT16_C(52218), UINT16_C( 1388) }, { UINT16_C( 7194), UINT16_C( 9087), UINT16_C( 4891), UINT16_C(51893), UINT16_C(12952), UINT16_C(13373), UINT16_C(24508), UINT16_C( 1388) } }, { { UINT16_C(34476), UINT16_C(11041), UINT16_C( 6826), UINT16_C(41557), UINT16_C(28862), UINT16_C(29621), UINT16_C( 1083), UINT16_C(54108) }, { UINT16_C(37942), UINT16_C(44979), UINT16_C(61666), UINT16_C(40676), UINT16_C(12879), UINT16_C(18709), UINT16_C(33533), UINT16_C(43342) }, { UINT16_C(11041), UINT16_C( 6826), UINT16_C(28862), UINT16_C( 1083), UINT16_C(37942), UINT16_C(40676), UINT16_C(12879), UINT16_C(33533) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vpminq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_vpminq_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(1936311911), UINT32_C(3964338001), UINT32_C(2881146153), UINT32_C(1189346290) }, { UINT32_C(4166304380), UINT32_C(2380785691), UINT32_C(1663982198), UINT32_C(2596904755) }, { UINT32_C(1936311911), UINT32_C(1189346290), UINT32_C(2380785691), UINT32_C(1663982198) } }, { { UINT32_C(3071095398), UINT32_C(1520654385), UINT32_C( 386227493), UINT32_C(3562989912) }, { UINT32_C(3335369387), UINT32_C( 290763931), UINT32_C(1098154510), UINT32_C(2279357729) }, { UINT32_C(1520654385), UINT32_C( 386227493), UINT32_C( 290763931), UINT32_C(1098154510) } }, { { UINT32_C(2705254768), UINT32_C(1744625985), UINT32_C(2541617470), UINT32_C(2523651306) }, { UINT32_C( 710686863), UINT32_C(4214993132), UINT32_C(1413263154), UINT32_C(1557862636) }, { UINT32_C(1744625985), UINT32_C(2523651306), UINT32_C( 710686863), UINT32_C(1413263154) } }, { { UINT32_C(1140726274), UINT32_C( 984283899), UINT32_C(3872467451), UINT32_C(2491169797) }, { UINT32_C(1639897205), UINT32_C(3143432585), UINT32_C(2500827560), UINT32_C(3018976177) }, { UINT32_C( 984283899), UINT32_C(2491169797), UINT32_C(1639897205), UINT32_C(2500827560) } }, { { UINT32_C( 16248581), UINT32_C(3845825001), UINT32_C(3502967754), UINT32_C(3177465672) }, { UINT32_C(2820547359), UINT32_C(3311696668), UINT32_C(3311039252), UINT32_C(1668893534) }, { UINT32_C( 16248581), UINT32_C(3177465672), UINT32_C(2820547359), UINT32_C(1668893534) } }, { { UINT32_C( 610562107), UINT32_C(3691617809), UINT32_C(4071412906), UINT32_C(1001328667) }, { UINT32_C(1357106483), UINT32_C(1544898376), UINT32_C( 421687227), UINT32_C(4118649786) }, { UINT32_C( 610562107), UINT32_C(1001328667), UINT32_C(1357106483), UINT32_C( 421687227) } }, { { UINT32_C( 471523595), UINT32_C( 704127871), UINT32_C( 320578808), UINT32_C(3897477813) }, { UINT32_C(3761779352), UINT32_C( 876432761), UINT32_C(2001625020), UINT32_C( 91016186) }, { UINT32_C( 471523595), UINT32_C( 320578808), UINT32_C( 876432761), UINT32_C( 91016186) } }, { { UINT32_C( 723617452), UINT32_C(2723486378), UINT32_C(1941270718), UINT32_C(3546022971) }, { UINT32_C(2947781686), UINT32_C(2665804002), UINT32_C(1226125903), UINT32_C(2840494845) }, { UINT32_C( 723617452), UINT32_C(1941270718), UINT32_C(2665804002), UINT32_C(1226125903) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vpminq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vpmin_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vpmin_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vpmin_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vpmin_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vpmin_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vpmin_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vpmin_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vpminq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vpminq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vpminq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vpminq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vpminq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vpminq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vpminq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vpminq_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/qabs.c000066400000000000000000000632531400333146700164220ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN qabs #include "test-neon.h" #include "../../../simde/arm/neon/qabs.h" #include "../../../simde/arm/neon/set_lane.h" static int test_simde_vqabsb_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a; int8_t r; } test_vec[] = { { INT8_MIN, INT8_MAX }, { INT8_C( 23), INT8_C( 23) }, { INT8_C( 79), INT8_C( 79) }, { -INT8_C( 44), INT8_C( 44) }, { INT8_C( 56), INT8_C( 56) }, { INT8_C( 3), INT8_C( 3) }, { -INT8_C( 28), INT8_C( 28) }, { INT8_C( 10), INT8_C( 10) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int8_t r = simde_vqabsb_s8(test_vec[i].a); simde_assert_equal_i8(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int8_t a = (i == 0) ? INT8_MIN : simde_test_codegen_random_i8(); int8_t r = simde_vqabsb_s8(a); simde_test_codegen_write_i8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqabsh_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a; int16_t r; } test_vec[] = { { INT16_MIN, INT16_MAX }, { -INT16_C( 19836), INT16_C( 19836) }, { -INT16_C( 11176), INT16_C( 11176) }, { -INT16_C( 6823), INT16_C( 6823) }, { INT16_C( 3362), INT16_C( 3362) }, { INT16_C( 23735), INT16_C( 23735) }, { INT16_C( 12384), INT16_C( 12384) }, { INT16_C( 28164), INT16_C( 28164) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int16_t r = simde_vqabsh_s16(test_vec[i].a); simde_assert_equal_i16(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int16_t a = (i == 0) ? INT16_MIN : simde_test_codegen_random_i16(); int16_t r = simde_vqabsh_s16(a); simde_test_codegen_write_i16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqabss_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a; int32_t r; } test_vec[] = { { INT32_MIN, INT32_MAX }, { -INT32_C( 1790689467), INT32_C( 1790689467) }, { -INT32_C( 1403327490), INT32_C( 1403327490) }, { -INT32_C( 2050787531), INT32_C( 2050787531) }, { INT32_C( 814283596), INT32_C( 814283596) }, { INT32_C( 1591872774), INT32_C( 1591872774) }, { INT32_C( 54738145), INT32_C( 54738145) }, { -INT32_C( 1453327799), INT32_C( 1453327799) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int32_t r = simde_vqabss_s32(test_vec[i].a); simde_assert_equal_i32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int32_t a = (i == 0) ? INT32_MIN : simde_test_codegen_random_i32(); int32_t r = simde_vqabss_s32(a); simde_test_codegen_write_i32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqabsd_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a; int64_t r; } test_vec[] = { { INT64_MIN, INT64_MAX }, { -INT64_C( 6843680731007786197), INT64_C( 6843680731007786197) }, { INT64_C( 2522315783993647175), INT64_C( 2522315783993647175) }, { INT64_C( 8678496504418437644), INT64_C( 8678496504418437644) }, { -INT64_C( 2853634349142920334), INT64_C( 2853634349142920334) }, { INT64_C( 2342241078398778685), INT64_C( 2342241078398778685) }, { -INT64_C( 1537241644215185745), INT64_C( 1537241644215185745) }, { -INT64_C( 6397525771841372632), INT64_C( 6397525771841372632) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int64_t r = simde_vqabsd_s64(test_vec[i].a); simde_assert_equal_i64(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int64_t a = (i == 0) ? INT64_MIN : simde_test_codegen_random_i64(); int64_t r = simde_vqabsd_s64(a); simde_test_codegen_write_i64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqabs_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t r[8]; } test_vec[] = { { { INT8_MIN, -INT8_C( 92), -INT8_C( 69), -INT8_C( 75), INT8_C( 60), -INT8_C( 103), INT8_C( 78), INT8_C( 10) }, { INT8_MAX, INT8_C( 92), INT8_C( 69), INT8_C( 75), INT8_C( 60), INT8_C( 103), INT8_C( 78), INT8_C( 10) } }, { { -INT8_C( 51), INT8_C( 25), INT8_C( 31), -INT8_C( 120), INT8_C( 122), INT8_C( 11), -INT8_C( 102), -INT8_C( 14) }, { INT8_C( 51), INT8_C( 25), INT8_C( 31), INT8_C( 120), INT8_C( 122), INT8_C( 11), INT8_C( 102), INT8_C( 14) } }, { { -INT8_C( 110), -INT8_C( 109), -INT8_C( 104), INT8_C( 52), -INT8_C( 24), INT8_C( 103), -INT8_C( 7), INT8_C( 81) }, { INT8_C( 110), INT8_C( 109), INT8_C( 104), INT8_C( 52), INT8_C( 24), INT8_C( 103), INT8_C( 7), INT8_C( 81) } }, { { -INT8_C( 61), -INT8_C( 63), INT8_C( 61), INT8_C( 25), INT8_C( 125), -INT8_C( 8), -INT8_C( 21), -INT8_C( 58) }, { INT8_C( 61), INT8_C( 63), INT8_C( 61), INT8_C( 25), INT8_C( 125), INT8_C( 8), INT8_C( 21), INT8_C( 58) } }, { { -INT8_C( 100), -INT8_C( 90), INT8_C( 123), -INT8_C( 39), INT8_C( 64), -INT8_C( 55), -INT8_C( 29), INT8_C( 13) }, { INT8_C( 100), INT8_C( 90), INT8_C( 123), INT8_C( 39), INT8_C( 64), INT8_C( 55), INT8_C( 29), INT8_C( 13) } }, { { -INT8_C( 29), INT8_C( 2), -INT8_C( 107), INT8_C( 93), INT8_C( 14), INT8_C( 48), INT8_C( 80), -INT8_C( 96) }, { INT8_C( 29), INT8_C( 2), INT8_C( 107), INT8_C( 93), INT8_C( 14), INT8_C( 48), INT8_C( 80), INT8_C( 96) } }, { { -INT8_C( 61), -INT8_C( 24), -INT8_C( 44), -INT8_C( 85), INT8_C( 79), -INT8_C( 51), -INT8_C( 4), INT8_C( 19) }, { INT8_C( 61), INT8_C( 24), INT8_C( 44), INT8_C( 85), INT8_C( 79), INT8_C( 51), INT8_C( 4), INT8_C( 19) } }, { { -INT8_C( 114), INT8_C( 57), INT8_C( 44), INT8_C( 11), INT8_C( 49), INT8_C( 24), -INT8_C( 46), -INT8_C( 50) }, { INT8_C( 114), INT8_C( 57), INT8_C( 44), INT8_C( 11), INT8_C( 49), INT8_C( 24), INT8_C( 46), INT8_C( 50) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t r = simde_vqabs_s8(a); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); if (i == 0) a = simde_vset_lane_s8(INT8_MIN, a, 0); simde_int8x8_t r = simde_vqabs_s8(a); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqabs_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t r[4]; } test_vec[] = { { { INT16_MIN, -INT16_C( 24953), -INT16_C( 302), INT16_C( 7435) }, { INT16_MAX, INT16_C( 24953), INT16_C( 302), INT16_C( 7435) } }, { { INT16_C( 9719), INT16_C( 8963), -INT16_C( 16749), INT16_C( 8624) }, { INT16_C( 9719), INT16_C( 8963), INT16_C( 16749), INT16_C( 8624) } }, { { INT16_C( 7660), -INT16_C( 17022), -INT16_C( 14157), INT16_C( 16231) }, { INT16_C( 7660), INT16_C( 17022), INT16_C( 14157), INT16_C( 16231) } }, { { INT16_C( 2029), -INT16_C( 15106), -INT16_C( 5141), -INT16_C( 16369) }, { INT16_C( 2029), INT16_C( 15106), INT16_C( 5141), INT16_C( 16369) } }, { { -INT16_C( 26992), INT16_C( 25182), INT16_C( 27028), -INT16_C( 29825) }, { INT16_C( 26992), INT16_C( 25182), INT16_C( 27028), INT16_C( 29825) } }, { { -INT16_C( 31857), INT16_C( 8879), INT16_C( 24385), INT16_C( 11587) }, { INT16_C( 31857), INT16_C( 8879), INT16_C( 24385), INT16_C( 11587) } }, { { -INT16_C( 14979), INT16_C( 12522), INT16_C( 21134), INT16_C( 31599) }, { INT16_C( 14979), INT16_C( 12522), INT16_C( 21134), INT16_C( 31599) } }, { { INT16_C( 27993), INT16_C( 17471), INT16_C( 20312), -INT16_C( 5884) }, { INT16_C( 27993), INT16_C( 17471), INT16_C( 20312), INT16_C( 5884) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t r = simde_vqabs_s16(a); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); if (i == 0) a = simde_vset_lane_s16(INT16_MIN, a, 0); simde_int16x4_t r = simde_vqabs_s16(a); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqabs_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t r[2]; } test_vec[] = { { { INT32_MIN, INT32_C( 1527106508) }, { INT32_MAX, INT32_C( 1527106508) } }, { { -INT32_C( 1887587250), -INT32_C( 1849900780) }, { INT32_C( 1887587250), INT32_C( 1849900780) } }, { { INT32_C( 348235654), INT32_C( 1385116153) }, { INT32_C( 348235654), INT32_C( 1385116153) } }, { { -INT32_C( 141111394), INT32_C( 65051166) }, { INT32_C( 141111394), INT32_C( 65051166) } }, { { -INT32_C( 897766403), INT32_C( 1143309302) }, { INT32_C( 897766403), INT32_C( 1143309302) } }, { { INT32_C( 1272161079), -INT32_C( 354643868) }, { INT32_C( 1272161079), INT32_C( 354643868) } }, { { INT32_C( 822058551), INT32_C( 1837272783) }, { INT32_C( 822058551), INT32_C( 1837272783) } }, { { INT32_C( 2070157405), -INT32_C( 1333836622) }, { INT32_C( 2070157405), INT32_C( 1333836622) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t r = simde_vqabs_s32(a); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); if (i == 0) a = simde_vset_lane_s32(INT32_MIN, a, 0); simde_int32x2_t r = simde_vqabs_s32(a); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqabs_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[1]; int64_t r[1]; } test_vec[] = { { { INT64_MIN }, { INT64_MAX } }, { { INT64_C( 5013033114247790146) }, { INT64_C( 5013033114247790146) } }, { { INT64_C( 8987486240898388093) }, { INT64_C( 8987486240898388093) } }, { { -INT64_C( 3282431011849822705) }, { INT64_C( 3282431011849822705) } }, { { -INT64_C( 3555059141116826509) }, { INT64_C( 3555059141116826509) } }, { { INT64_C( 627695294526827618) }, { INT64_C( 627695294526827618) } }, { { INT64_C( 3688462541816867989) }, { INT64_C( 3688462541816867989) } }, { { INT64_C( 1252114627109922603) }, { INT64_C( 1252114627109922603) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t r = simde_vqabs_s64(a); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_t a = simde_test_arm_neon_random_i64x1(); if (i == 0) a = simde_vset_lane_s64(INT64_MIN, a, 0); simde_int64x1_t r = simde_vqabs_s64(a); simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqabsq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t r[16]; } test_vec[] = { { { INT8_MIN, -INT8_C( 103), INT8_C( 3), -INT8_C( 34), INT8_C( 125), -INT8_C( 83), -INT8_C( 84), -INT8_C( 32), INT8_C( 89), INT8_C( 33), INT8_C( 81), -INT8_C( 28), INT8_C( 38), INT8_C( 7), -INT8_C( 20), -INT8_C( 68) }, { INT8_MAX, INT8_C( 103), INT8_C( 3), INT8_C( 34), INT8_C( 125), INT8_C( 83), INT8_C( 84), INT8_C( 32), INT8_C( 89), INT8_C( 33), INT8_C( 81), INT8_C( 28), INT8_C( 38), INT8_C( 7), INT8_C( 20), INT8_C( 68) } }, { { INT8_C( 51), INT8_C( 64), INT8_C( 112), INT8_C( 87), INT8_C( 77), -INT8_C( 96), -INT8_C( 118), INT8_C( 121), -INT8_C( 57), INT8_MAX, INT8_C( 7), INT8_C( 101), -INT8_C( 26), INT8_C( 103), INT8_C( 118), INT8_C( 57) }, { INT8_C( 51), INT8_C( 64), INT8_C( 112), INT8_C( 87), INT8_C( 77), INT8_C( 96), INT8_C( 118), INT8_C( 121), INT8_C( 57), INT8_MAX, INT8_C( 7), INT8_C( 101), INT8_C( 26), INT8_C( 103), INT8_C( 118), INT8_C( 57) } }, { { INT8_C( 1), INT8_C( 122), INT8_C( 23), INT8_C( 126), INT8_C( 39), -INT8_C( 60), INT8_C( 94), INT8_MIN, -INT8_C( 27), -INT8_C( 81), INT8_C( 100), INT8_C( 11), -INT8_C( 73), INT8_C( 80), -INT8_C( 57), -INT8_C( 22) }, { INT8_C( 1), INT8_C( 122), INT8_C( 23), INT8_C( 126), INT8_C( 39), INT8_C( 60), INT8_C( 94), INT8_MAX, INT8_C( 27), INT8_C( 81), INT8_C( 100), INT8_C( 11), INT8_C( 73), INT8_C( 80), INT8_C( 57), INT8_C( 22) } }, { { -INT8_C( 112), INT8_C( 55), INT8_C( 65), -INT8_C( 35), -INT8_C( 41), -INT8_C( 53), INT8_C( 86), -INT8_C( 97), INT8_C( 74), INT8_C( 93), INT8_C( 4), INT8_C( 48), -INT8_C( 59), INT8_C( 123), INT8_C( 105), -INT8_C( 58) }, { INT8_C( 112), INT8_C( 55), INT8_C( 65), INT8_C( 35), INT8_C( 41), INT8_C( 53), INT8_C( 86), INT8_C( 97), INT8_C( 74), INT8_C( 93), INT8_C( 4), INT8_C( 48), INT8_C( 59), INT8_C( 123), INT8_C( 105), INT8_C( 58) } }, { { -INT8_C( 11), -INT8_C( 127), INT8_C( 68), INT8_C( 28), INT8_C( 69), -INT8_C( 93), -INT8_C( 100), INT8_C( 42), INT8_C( 82), INT8_C( 0), INT8_C( 53), INT8_C( 9), INT8_C( 80), -INT8_C( 3), -INT8_C( 12), -INT8_C( 32) }, { INT8_C( 11), INT8_MAX, INT8_C( 68), INT8_C( 28), INT8_C( 69), INT8_C( 93), INT8_C( 100), INT8_C( 42), INT8_C( 82), INT8_C( 0), INT8_C( 53), INT8_C( 9), INT8_C( 80), INT8_C( 3), INT8_C( 12), INT8_C( 32) } }, { { INT8_C( 52), INT8_C( 53), -INT8_C( 67), INT8_C( 12), INT8_C( 1), INT8_C( 20), -INT8_C( 85), INT8_C( 75), INT8_C( 113), -INT8_C( 81), INT8_C( 124), INT8_C( 54), INT8_C( 42), -INT8_C( 27), -INT8_C( 4), INT8_C( 31) }, { INT8_C( 52), INT8_C( 53), INT8_C( 67), INT8_C( 12), INT8_C( 1), INT8_C( 20), INT8_C( 85), INT8_C( 75), INT8_C( 113), INT8_C( 81), INT8_C( 124), INT8_C( 54), INT8_C( 42), INT8_C( 27), INT8_C( 4), INT8_C( 31) } }, { { INT8_C( 102), INT8_C( 65), INT8_C( 59), -INT8_C( 85), -INT8_C( 28), -INT8_C( 41), -INT8_C( 43), INT8_C( 54), -INT8_C( 41), INT8_C( 11), INT8_C( 64), INT8_C( 39), INT8_C( 8), INT8_C( 52), INT8_C( 7), INT8_C( 60) }, { INT8_C( 102), INT8_C( 65), INT8_C( 59), INT8_C( 85), INT8_C( 28), INT8_C( 41), INT8_C( 43), INT8_C( 54), INT8_C( 41), INT8_C( 11), INT8_C( 64), INT8_C( 39), INT8_C( 8), INT8_C( 52), INT8_C( 7), INT8_C( 60) } }, { { INT8_C( 105), -INT8_C( 59), INT8_C( 72), INT8_C( 106), -INT8_C( 39), -INT8_C( 13), -INT8_C( 74), INT8_C( 74), -INT8_C( 93), INT8_C( 50), -INT8_C( 127), -INT8_C( 51), INT8_C( 23), INT8_C( 125), -INT8_C( 19), INT8_C( 126) }, { INT8_C( 105), INT8_C( 59), INT8_C( 72), INT8_C( 106), INT8_C( 39), INT8_C( 13), INT8_C( 74), INT8_C( 74), INT8_C( 93), INT8_C( 50), INT8_MAX, INT8_C( 51), INT8_C( 23), INT8_C( 125), INT8_C( 19), INT8_C( 126) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t r = simde_vqabsq_s8(a); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); if (i == 0) a = simde_vsetq_lane_s8(INT8_MIN, a, 0); simde_int8x16_t r = simde_vqabsq_s8(a); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqabsq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t r[8]; } test_vec[] = { { { INT16_MIN, -INT16_C( 24023), -INT16_C( 256), -INT16_C( 10279), INT16_C( 6410), INT16_C( 4863), INT16_C( 1613), -INT16_C( 18866) }, { INT16_MAX, INT16_C( 24023), INT16_C( 256), INT16_C( 10279), INT16_C( 6410), INT16_C( 4863), INT16_C( 1613), INT16_C( 18866) } }, { { -INT16_C( 26677), -INT16_C( 23519), -INT16_C( 10358), INT16_C( 11759), INT16_C( 28681), INT16_C( 8443), -INT16_C( 5907), -INT16_C( 21346) }, { INT16_C( 26677), INT16_C( 23519), INT16_C( 10358), INT16_C( 11759), INT16_C( 28681), INT16_C( 8443), INT16_C( 5907), INT16_C( 21346) } }, { { -INT16_C( 14320), INT16_C( 4174), INT16_C( 10183), -INT16_C( 11800), -INT16_C( 6336), -INT16_C( 29213), INT16_C( 12781), -INT16_C( 18108) }, { INT16_C( 14320), INT16_C( 4174), INT16_C( 10183), INT16_C( 11800), INT16_C( 6336), INT16_C( 29213), INT16_C( 12781), INT16_C( 18108) } }, { { INT16_C( 26056), INT16_C( 21341), INT16_C( 19516), INT16_C( 17792), INT16_C( 31676), -INT16_C( 21915), INT16_C( 1123), INT16_C( 29782) }, { INT16_C( 26056), INT16_C( 21341), INT16_C( 19516), INT16_C( 17792), INT16_C( 31676), INT16_C( 21915), INT16_C( 1123), INT16_C( 29782) } }, { { -INT16_C( 23348), -INT16_C( 27772), INT16_C( 27852), INT16_C( 3172), INT16_C( 18259), INT16_C( 16794), -INT16_C( 8584), INT16_C( 16890) }, { INT16_C( 23348), INT16_C( 27772), INT16_C( 27852), INT16_C( 3172), INT16_C( 18259), INT16_C( 16794), INT16_C( 8584), INT16_C( 16890) } }, { { INT16_C( 22339), INT16_C( 32660), INT16_C( 5284), INT16_C( 24772), INT16_C( 10640), -INT16_C( 3318), INT16_C( 24621), -INT16_C( 1689) }, { INT16_C( 22339), INT16_C( 32660), INT16_C( 5284), INT16_C( 24772), INT16_C( 10640), INT16_C( 3318), INT16_C( 24621), INT16_C( 1689) } }, { { -INT16_C( 5115), -INT16_C( 11892), -INT16_C( 4008), -INT16_C( 21283), INT16_C( 30519), -INT16_C( 20243), -INT16_C( 6315), -INT16_C( 26383) }, { INT16_C( 5115), INT16_C( 11892), INT16_C( 4008), INT16_C( 21283), INT16_C( 30519), INT16_C( 20243), INT16_C( 6315), INT16_C( 26383) } }, { { -INT16_C( 31426), -INT16_C( 7657), -INT16_C( 9319), INT16_C( 10563), INT16_C( 19717), INT16_C( 12829), -INT16_C( 31570), -INT16_C( 19668) }, { INT16_C( 31426), INT16_C( 7657), INT16_C( 9319), INT16_C( 10563), INT16_C( 19717), INT16_C( 12829), INT16_C( 31570), INT16_C( 19668) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t r = simde_vqabsq_s16(a); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); if (i == 0) a = simde_vsetq_lane_s16(INT16_MIN, a, 0); simde_int16x8_t r = simde_vqabsq_s16(a); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqabsq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t r[4]; } test_vec[] = { { { INT32_MIN, -INT32_C( 529178199), INT32_C( 781214425), -INT32_C( 2016968375) }, { INT32_MAX, INT32_C( 529178199), INT32_C( 781214425), INT32_C( 2016968375) } }, { { -INT32_C( 1603609082), -INT32_C( 1077301830), -INT32_C( 1460541702), -INT32_C( 614785685) }, { INT32_C( 1603609082), INT32_C( 1077301830), INT32_C( 1460541702), INT32_C( 614785685) } }, { { INT32_C( 2141511638), INT32_C( 442440001), -INT32_C( 1001852805), INT32_C( 2018250609) }, { INT32_C( 2141511638), INT32_C( 442440001), INT32_C( 1001852805), INT32_C( 2018250609) } }, { { -INT32_C( 1474775314), INT32_C( 1567088995), INT32_C( 856053960), INT32_C( 1276010870) }, { INT32_C( 1474775314), INT32_C( 1567088995), INT32_C( 856053960), INT32_C( 1276010870) } }, { { -INT32_C( 2100579519), INT32_C( 1218194124), -INT32_C( 1945312230), -INT32_C( 503031564) }, { INT32_C( 2100579519), INT32_C( 1218194124), INT32_C( 1945312230), INT32_C( 503031564) } }, { { INT32_C( 1904876558), -INT32_C( 976227843), -INT32_C( 1074211511), INT32_C( 1997211446) }, { INT32_C( 1904876558), INT32_C( 976227843), INT32_C( 1074211511), INT32_C( 1997211446) } }, { { -INT32_C( 2030446918), INT32_C( 466523393), INT32_C( 1856494458), INT32_C( 1112582963) }, { INT32_C( 2030446918), INT32_C( 466523393), INT32_C( 1856494458), INT32_C( 1112582963) } }, { { -INT32_C( 978068793), INT32_C( 344621771), -INT32_C( 1898675369), INT32_C( 1141235594) }, { INT32_C( 978068793), INT32_C( 344621771), INT32_C( 1898675369), INT32_C( 1141235594) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t r = simde_vqabsq_s32(a); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); if (i == 0) a = simde_vsetq_lane_s32(INT32_MIN, a, 0); simde_int32x4_t r = simde_vqabsq_s32(a); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqabsq_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; int64_t r[2]; } test_vec[] = { { { INT64_MIN, -INT64_C( 1303286155051238796) }, { INT64_MAX, INT64_C( 1303286155051238796) } }, { { INT64_C( 8612637443141836198), -INT64_C( 1185279885774071105) }, { INT64_C( 8612637443141836198), INT64_C( 1185279885774071105) } }, { { INT64_C( 7326364772480735241), -INT64_C( 7204080496378369549) }, { INT64_C( 7326364772480735241), INT64_C( 7204080496378369549) } }, { { -INT64_C( 5536731587706767469), INT64_C( 4762145634079551213) }, { INT64_C( 5536731587706767469), INT64_C( 4762145634079551213) } }, { { INT64_C( 2970279220873313762), -INT64_C( 3582816656948116042) }, { INT64_C( 2970279220873313762), INT64_C( 3582816656948116042) } }, { { -INT64_C( 2979506185981372930), INT64_C( 1176586486946833368) }, { INT64_C( 2979506185981372930), INT64_C( 1176586486946833368) } }, { { INT64_C( 8588114629788644560), -INT64_C( 5302308270149242527) }, { INT64_C( 8588114629788644560), INT64_C( 5302308270149242527) } }, { { INT64_C( 7889831097103149759), INT64_C( 6858932265364126706) }, { INT64_C( 7889831097103149759), INT64_C( 6858932265364126706) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t r = simde_vqabsq_s64(a); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); if (i == 0) a = simde_vsetq_lane_s64(INT64_MIN, a, 0); simde_int64x2_t r = simde_vqabsq_s64(a); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vqabsb_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqabsh_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqabss_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vqabsd_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vqabs_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqabs_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqabs_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vqabs_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vqabsq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqabsq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqabsq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vqabsq_s64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/qadd.c000066400000000000000000002120601400333146700163750ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN qadd #include "test-neon.h" #include "../../../simde/arm/neon/qadd.h" static int test_simde_vqaddb_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a; int8_t b; int8_t r; } test_vec[] = { { -INT8_C( 47), INT8_C( 10), -INT8_C( 37) }, { INT8_C( 58), INT8_C( 109), INT8_MAX }, { -INT8_C( 66), INT8_C( 31), -INT8_C( 35) }, { INT8_MAX, -INT8_C( 3), INT8_C( 124) }, { INT8_C( 88), INT8_C( 75), INT8_MAX }, { INT8_C( 32), INT8_C( 124), INT8_MAX }, { -INT8_C( 95), -INT8_C( 49), INT8_MIN }, { -INT8_C( 102), INT8_C( 38), -INT8_C( 64) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int8_t r = simde_vqaddb_s8(test_vec[i].a, test_vec[i].b); simde_assert_equal_i8(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int8_t a = simde_test_codegen_random_i8(); int8_t b = simde_test_codegen_random_i8(); int8_t r = simde_vqaddb_s8(a, b); simde_test_codegen_write_i8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqaddh_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a; int16_t b; int16_t r; } test_vec[] = { { -INT16_C( 11727), -INT16_C( 7161), -INT16_C( 18888) }, { INT16_C( 29017), -INT16_C( 13336), INT16_C( 15681) }, { INT16_C( 21856), -INT16_C( 24160), -INT16_C( 2304) }, { INT16_C( 32040), -INT16_C( 18828), INT16_C( 13212) }, { INT16_C( 18671), INT16_C( 31739), INT16_MAX }, { -INT16_C( 18049), -INT16_C( 17707), INT16_MIN }, { INT16_C( 2161), -INT16_C( 28800), -INT16_C( 26639) }, { INT16_C( 28052), -INT16_C( 14641), INT16_C( 13411) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int16_t r = simde_vqaddh_s16(test_vec[i].a, test_vec[i].b); simde_assert_equal_i16(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int16_t a = simde_test_codegen_random_i16(); int16_t b = simde_test_codegen_random_i16(); int16_t r = simde_vqaddh_s16(a, b); simde_test_codegen_write_i16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqadds_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a; int32_t b; int32_t r; } test_vec[] = { { -INT32_C( 1924326973), INT32_C( 676628), -INT32_C( 1923650345) }, { INT32_C( 1307766957), -INT32_C( 113624017), INT32_C( 1194142940) }, { -INT32_C( 921041363), -INT32_C( 1022157195), -INT32_C( 1943198558) }, { INT32_C( 1938109522), INT32_C( 1742412708), INT32_MAX }, { INT32_C( 1223960628), INT32_C( 675872635), INT32_C( 1899833263) }, { INT32_C( 510999535), -INT32_C( 1541951369), -INT32_C( 1030951834) }, { INT32_C( 745419190), -INT32_C( 1561362096), -INT32_C( 815942906) }, { INT32_C( 1511355573), -INT32_C( 993922929), INT32_C( 517432644) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int32_t r = simde_vqadds_s32(test_vec[i].a, test_vec[i].b); simde_assert_equal_i32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int32_t a = simde_test_codegen_random_i32(); int32_t b = simde_test_codegen_random_i32(); int32_t r = simde_vqadds_s32(a, b); simde_test_codegen_write_i32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqaddd_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a; int64_t b; int64_t r; } test_vec[] = { { -INT64_C( 340247776837815537), INT64_C( 3360683632828815009), INT64_C( 3020435855990999472) }, { INT64_C( 6414212653139999150), -INT64_C( 1181778323891955332), INT64_C( 5232434329248043818) }, { INT64_C( 7213452425197097133), -INT64_C( 7958316966115190934), -INT64_C( 744864540918093801) }, { INT64_C( 2296594619730872089), INT64_C( 3516443261144205821), INT64_C( 5813037880875077910) }, { -INT64_C( 7448554466647680646), -INT64_C( 2866955959388315966), INT64_MIN }, { INT64_C( 8651201011724998477), -INT64_C( 4181274173947682746), INT64_C( 4469926837777315731) }, { INT64_C( 8629077984009144827), -INT64_C( 8387582401519503358), INT64_C( 241495582489641469) }, { INT64_C( 8907147219098662825), INT64_C( 1079524787921154539), INT64_MAX } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int64_t r = simde_vqaddd_s64(test_vec[i].a, test_vec[i].b); simde_assert_equal_i64(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int64_t a = simde_test_codegen_random_i64(); int64_t b = simde_test_codegen_random_i64(); int64_t r = simde_vqaddd_s64(a, b); simde_test_codegen_write_i64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqaddb_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a; uint8_t b; uint8_t r; } test_vec[] = { { UINT8_C(138), UINT8_C(251), UINT8_MAX }, { UINT8_C( 66), UINT8_C(220), UINT8_MAX }, { UINT8_C( 74), UINT8_C(241), UINT8_MAX }, { UINT8_C( 3), UINT8_C( 37), UINT8_C( 40) }, { UINT8_C( 75), UINT8_C(226), UINT8_MAX }, { UINT8_C(118), UINT8_C(137), UINT8_MAX }, { UINT8_C(238), UINT8_C( 97), UINT8_MAX }, { UINT8_C(103), UINT8_C(148), UINT8_C(251) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint8_t r = simde_vqaddb_u8(test_vec[i].a, test_vec[i].b); simde_assert_equal_u8(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint8_t a = simde_test_codegen_random_u8(); uint8_t b = simde_test_codegen_random_u8(); uint8_t r = simde_vqaddb_u8(a, b); simde_test_codegen_write_u8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_u8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqaddh_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a; uint16_t b; uint16_t r; } test_vec[] = { { UINT16_C(16243), UINT16_C(65009), UINT16_MAX }, { UINT16_C(20266), UINT16_C(14387), UINT16_C(34653) }, { UINT16_C( 2966), UINT16_C(24188), UINT16_C(27154) }, { UINT16_C(53306), UINT16_C(50252), UINT16_MAX }, { UINT16_C(36555), UINT16_C( 5537), UINT16_C(42092) }, { UINT16_C(42111), UINT16_C(51771), UINT16_MAX }, { UINT16_C(45447), UINT16_C(30035), UINT16_MAX }, { UINT16_C(47890), UINT16_C(34057), UINT16_MAX } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint16_t r = simde_vqaddh_u16(test_vec[i].a, test_vec[i].b); simde_assert_equal_u16(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint16_t a = simde_test_codegen_random_u16(); uint16_t b = simde_test_codegen_random_u16(); uint16_t r = simde_vqaddh_u16(a, b); simde_test_codegen_write_u16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_u16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqadds_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a; uint32_t b; uint32_t r; } test_vec[] = { { UINT32_C( 612629242), UINT32_C(3747460681), UINT32_MAX }, { UINT32_C(4215134657), UINT32_C(1958775465), UINT32_MAX }, { UINT32_C(2559140120), UINT32_C(2355282949), UINT32_MAX }, { UINT32_C(2281813622), UINT32_C(1796082545), UINT32_C(4077896167) }, { UINT32_C(1334808837), UINT32_C( 137292871), UINT32_C(1472101708) }, { UINT32_C(1845783749), UINT32_C( 249742582), UINT32_C(2095526331) }, { UINT32_C( 715549733), UINT32_C(2797013296), UINT32_C(3512563029) }, { UINT32_C( 808433855), UINT32_C(3382394307), UINT32_C(4190828162) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint32_t r = simde_vqadds_u32(test_vec[i].a, test_vec[i].b); simde_assert_equal_u32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint32_t a = simde_test_codegen_random_u32(); uint32_t b = simde_test_codegen_random_u32(); uint32_t r = simde_vqadds_u32(a, b); simde_test_codegen_write_u32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_u32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqaddd_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a; uint64_t b; uint64_t r; } test_vec[] = { { UINT64_C(11868383189900131719), UINT64_C(14734087086196248751), UINT64_MAX }, { UINT64_C( 7756480039837890037), UINT64_C(12583229052650908031), UINT64_MAX }, { UINT64_C( 9070143878369891994), UINT64_C( 3990158769449026671), UINT64_C(13060302647818918665) }, { UINT64_C( 6684755670146830553), UINT64_C(11368367503293966062), UINT64_C(18053123173440796615) }, { UINT64_C(13668346040092766053), UINT64_C(15597847870258027260), UINT64_MAX }, { UINT64_C(14644008195626031103), UINT64_C( 2016950410859849565), UINT64_C(16660958606485880668) }, { UINT64_C( 9970966889071760980), UINT64_C( 6768699537814564631), UINT64_C(16739666426886325611) }, { UINT64_C( 8484908248979874071), UINT64_C(11127219603306357146), UINT64_MAX } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint64_t r = simde_vqaddd_u64(test_vec[i].a, test_vec[i].b); simde_assert_equal_u64(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint64_t a = simde_test_codegen_random_u64(); uint64_t b = simde_test_codegen_random_u64(); uint64_t r = simde_vqaddd_u64(a, b); simde_test_codegen_write_u64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_u64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqadd_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t b[8]; int8_t r[8]; } test_vec[] = { { { -INT8_C( 88), -INT8_C( 39), -INT8_C( 126), -INT8_C( 86), -INT8_C( 106), -INT8_C( 49), INT8_C( 70), INT8_C( 124) }, { INT8_C( 64), -INT8_C( 91), INT8_C( 64), INT8_C( 47), INT8_C( 38), INT8_C( 91), -INT8_C( 80), -INT8_C( 15) }, { -INT8_C( 24), INT8_MIN, -INT8_C( 62), -INT8_C( 39), -INT8_C( 68), INT8_C( 42), -INT8_C( 10), INT8_C( 109) } }, { { -INT8_C( 18), -INT8_C( 104), INT8_C( 112), INT8_C( 14), INT8_C( 66), INT8_C( 72), -INT8_C( 83), -INT8_C( 71) }, { -INT8_C( 106), -INT8_C( 119), INT8_C( 15), -INT8_C( 10), INT8_C( 78), INT8_C( 70), INT8_MAX, -INT8_C( 9) }, { -INT8_C( 124), INT8_MIN, INT8_MAX, INT8_C( 4), INT8_MAX, INT8_MAX, INT8_C( 44), -INT8_C( 80) } }, { { INT8_C( 31), INT8_C( 1), -INT8_C( 95), -INT8_C( 74), -INT8_C( 48), -INT8_C( 25), INT8_C( 50), INT8_C( 16) }, { -INT8_C( 116), INT8_C( 114), INT8_C( 64), -INT8_C( 78), -INT8_C( 51), -INT8_C( 16), -INT8_C( 93), -INT8_C( 69) }, { -INT8_C( 85), INT8_C( 115), -INT8_C( 31), INT8_MIN, -INT8_C( 99), -INT8_C( 41), -INT8_C( 43), -INT8_C( 53) } }, { { -INT8_C( 119), INT8_C( 19), -INT8_C( 54), -INT8_C( 53), INT8_C( 92), INT8_C( 119), -INT8_C( 124), -INT8_C( 14) }, { INT8_C( 0), -INT8_C( 109), -INT8_C( 23), INT8_C( 79), -INT8_C( 39), INT8_C( 104), INT8_C( 70), -INT8_C( 7) }, { -INT8_C( 119), -INT8_C( 90), -INT8_C( 77), INT8_C( 26), INT8_C( 53), INT8_MAX, -INT8_C( 54), -INT8_C( 21) } }, { { INT8_C( 105), -INT8_C( 25), -INT8_C( 81), INT8_C( 58), -INT8_C( 50), -INT8_C( 31), INT8_C( 74), INT8_C( 90) }, { INT8_C( 83), -INT8_C( 118), INT8_C( 12), INT8_C( 32), INT8_C( 123), -INT8_C( 80), -INT8_C( 37), INT8_C( 4) }, { INT8_MAX, INT8_MIN, -INT8_C( 69), INT8_C( 90), INT8_C( 73), -INT8_C( 111), INT8_C( 37), INT8_C( 94) } }, { { -INT8_C( 61), -INT8_C( 91), -INT8_C( 49), INT8_C( 31), INT8_C( 29), INT8_C( 83), INT8_C( 18), INT8_C( 29) }, { -INT8_C( 25), -INT8_C( 5), INT8_C( 108), -INT8_C( 64), INT8_C( 99), -INT8_C( 78), -INT8_C( 71), -INT8_C( 52) }, { -INT8_C( 86), -INT8_C( 96), INT8_C( 59), -INT8_C( 33), INT8_MAX, INT8_C( 5), -INT8_C( 53), -INT8_C( 23) } }, { { -INT8_C( 103), INT8_C( 104), INT8_C( 6), INT8_C( 103), INT8_C( 73), INT8_C( 81), -INT8_C( 63), -INT8_C( 100) }, { -INT8_C( 37), -INT8_C( 50), -INT8_C( 68), INT8_C( 86), INT8_C( 126), -INT8_C( 104), INT8_C( 90), INT8_C( 65) }, { INT8_MIN, INT8_C( 54), -INT8_C( 62), INT8_MAX, INT8_MAX, -INT8_C( 23), INT8_C( 27), -INT8_C( 35) } }, { { INT8_C( 61), INT8_C( 41), INT8_C( 97), INT8_C( 90), INT8_C( 125), INT8_C( 115), INT8_C( 120), INT8_C( 100) }, { INT8_C( 110), -INT8_C( 28), INT8_C( 36), -INT8_C( 47), -INT8_C( 105), -INT8_C( 34), -INT8_C( 99), INT8_C( 48) }, { INT8_MAX, INT8_C( 13), INT8_MAX, INT8_C( 43), INT8_C( 20), INT8_C( 81), INT8_C( 21), INT8_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vqadd_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); simde_int8x8_t r = simde_vqadd_s8(a, b); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqadd_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { { { -INT16_C( 18685), INT16_C( 3599), INT16_C( 636), -INT16_C( 12418) }, { INT16_C( 7207), INT16_C( 12614), INT16_C( 27874), INT16_C( 29127) }, { -INT16_C( 11478), INT16_C( 16213), INT16_C( 28510), INT16_C( 16709) } }, { { INT16_C( 3398), INT16_C( 3218), INT16_C( 18051), INT16_C( 32) }, { -INT16_C( 20943), INT16_C( 15609), INT16_C( 11436), -INT16_C( 20499) }, { -INT16_C( 17545), INT16_C( 18827), INT16_C( 29487), -INT16_C( 20467) } }, { { -INT16_C( 796), INT16_C( 24765), INT16_C( 15358), INT16_C( 9519) }, { INT16_C( 30295), INT16_C( 14679), INT16_C( 7906), INT16_C( 10410) }, { INT16_C( 29499), INT16_MAX, INT16_C( 23264), INT16_C( 19929) } }, { { INT16_C( 15659), -INT16_C( 20940), INT16_C( 21635), -INT16_C( 19026) }, { -INT16_C( 22782), -INT16_C( 20751), -INT16_C( 8492), -INT16_C( 18339) }, { -INT16_C( 7123), INT16_MIN, INT16_C( 13143), INT16_MIN } }, { { INT16_C( 6874), -INT16_C( 9960), INT16_C( 18262), -INT16_C( 20994) }, { INT16_C( 21949), -INT16_C( 24601), -INT16_C( 28301), -INT16_C( 24632) }, { INT16_C( 28823), INT16_MIN, -INT16_C( 10039), INT16_MIN } }, { { -INT16_C( 818), INT16_C( 21069), -INT16_C( 943), INT16_C( 21255) }, { -INT16_C( 1885), INT16_C( 30466), INT16_C( 24535), -INT16_C( 20177) }, { -INT16_C( 2703), INT16_MAX, INT16_C( 23592), INT16_C( 1078) } }, { { INT16_C( 18298), -INT16_C( 12150), -INT16_C( 30321), INT16_C( 19581) }, { INT16_C( 25822), INT16_C( 21228), -INT16_C( 19210), -INT16_C( 15119) }, { INT16_MAX, INT16_C( 9078), INT16_MIN, INT16_C( 4462) } }, { { INT16_C( 16048), INT16_C( 278), INT16_C( 7482), -INT16_C( 8619) }, { INT16_C( 22294), -INT16_C( 4779), -INT16_C( 31306), INT16_C( 12446) }, { INT16_MAX, -INT16_C( 4501), -INT16_C( 23824), INT16_C( 3827) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_vqadd_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); simde_int16x4_t r = simde_vqadd_s16(a, b); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqadd_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { { { INT32_C( 1708613354), -INT32_C( 360181531) }, { -INT32_C( 1233544421), -INT32_C( 501103337) }, { INT32_C( 475068933), -INT32_C( 861284868) } }, { { -INT32_C( 455086879), INT32_C( 1497246780) }, { INT32_C( 1790669567), -INT32_C( 235132410) }, { INT32_C( 1335582688), INT32_C( 1262114370) } }, { { INT32_C( 1851184009), -INT32_C( 10887452) }, { -INT32_C( 1850289543), INT32_C( 2020857751) }, { INT32_C( 894466), INT32_C( 2009970299) } }, { { INT32_C( 6050755), INT32_C( 2136578687) }, { INT32_C( 334042380), -INT32_C( 939203265) }, { INT32_C( 340093135), INT32_C( 1197375422) } }, { { -INT32_C( 1674093896), -INT32_C( 1298362311) }, { -INT32_C( 113028510), -INT32_C( 328091863) }, { -INT32_C( 1787122406), -INT32_C( 1626454174) } }, { { -INT32_C( 1980969718), INT32_C( 1929922151) }, { -INT32_C( 1702432421), -INT32_C( 1889301802) }, { INT32_MIN, INT32_C( 40620349) } }, { { INT32_C( 506174181), -INT32_C( 1932474582) }, { INT32_C( 1116017689), -INT32_C( 718276917) }, { INT32_C( 1622191870), INT32_MIN } }, { { INT32_C( 710810563), -INT32_C( 1130535071) }, { INT32_C( 794240088), -INT32_C( 1816216914) }, { INT32_C( 1505050651), INT32_MIN } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_vqadd_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); simde_int32x2_t r = simde_vqadd_s32(a, b); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqadd_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[1]; int64_t b[1]; int64_t r[1]; } test_vec[] = { { { -INT64_C( 6407494046951039651) }, { -INT64_C( 969804030585533899) }, { -INT64_C( 7377298077536573550) } }, { { -INT64_C( 756558871846176206) }, { INT64_C( 3565986500388755763) }, { INT64_C( 2809427628542579557) } }, { { -INT64_C( 4589979959597430390) }, { INT64_C( 5005928091048444711) }, { INT64_C( 415948131451014321) } }, { { -INT64_C( 4213209180864222756) }, { -INT64_C( 9202148206401943226) }, { INT64_MIN } }, { { -INT64_C( 7549593798898658321) }, { -INT64_C( 3508138139130637577) }, { INT64_MIN } }, { { INT64_C( 8023137625942621439) }, { -INT64_C( 7464201085452205326) }, { INT64_C( 558936540490416113) } }, { { -INT64_C( 274916365824782297) }, { -INT64_C( 2061989040168153697) }, { -INT64_C( 2336905405992935994) } }, { { -INT64_C( 6959242656317324333) }, { -INT64_C( 1130419137071746033) }, { -INT64_C( 8089661793389070366) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_int64x1_t r = simde_vqadd_s64(a, b); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_t a = simde_test_arm_neon_random_i64x1(); simde_int64x1_t b = simde_test_arm_neon_random_i64x1(); simde_int64x1_t r = simde_vqadd_s64(a, b); simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqadd_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint8_t b[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C(117), UINT8_C(158), UINT8_C(103), UINT8_C( 86), UINT8_C(230), UINT8_C( 91), UINT8_C( 90), UINT8_C( 30) }, { UINT8_C(105), UINT8_C( 10), UINT8_C( 94), UINT8_C( 46), UINT8_C(204), UINT8_C(120), UINT8_C( 27), UINT8_C( 58) }, { UINT8_C(222), UINT8_C(168), UINT8_C(197), UINT8_C(132), UINT8_MAX, UINT8_C(211), UINT8_C(117), UINT8_C( 88) } }, { { UINT8_C( 43), UINT8_C(155), UINT8_C(114), UINT8_C(189), UINT8_C( 69), UINT8_C(227), UINT8_C(189), UINT8_C(131) }, { UINT8_C(141), UINT8_C(146), UINT8_C(231), UINT8_C(209), UINT8_C(205), UINT8_C( 58), UINT8_C(250), UINT8_C( 66) }, { UINT8_C(184), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C(197) } }, { { UINT8_C(217), UINT8_C( 98), UINT8_C(152), UINT8_C(191), UINT8_C(189), UINT8_C(243), UINT8_C(222), UINT8_C( 38) }, { UINT8_C(253), UINT8_C( 60), UINT8_C( 84), UINT8_C(201), UINT8_C(180), UINT8_C(111), UINT8_C( 4), UINT8_C(223) }, { UINT8_MAX, UINT8_C(158), UINT8_C(236), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C(226), UINT8_MAX } }, { { UINT8_C( 10), UINT8_C(118), UINT8_C(156), UINT8_C( 79), UINT8_C( 89), UINT8_C( 89), UINT8_C(210), UINT8_C(231) }, { UINT8_C(235), UINT8_C(186), UINT8_C(184), UINT8_C(184), UINT8_C(244), UINT8_C(178), UINT8_C(251), UINT8_C(205) }, { UINT8_C(245), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C( 20), UINT8_C(147), UINT8_C(141), UINT8_C(209), UINT8_C(134), UINT8_C(107), UINT8_C(247), UINT8_C(132) }, { UINT8_C(167), UINT8_C( 76), UINT8_C( 77), UINT8_C( 91), UINT8_C(187), UINT8_C( 81), UINT8_C( 58), UINT8_C(198) }, { UINT8_C(187), UINT8_C(223), UINT8_C(218), UINT8_MAX, UINT8_MAX, UINT8_C(188), UINT8_MAX, UINT8_MAX } }, { { UINT8_C(200), UINT8_C(214), UINT8_C( 21), UINT8_C( 33), UINT8_C( 48), UINT8_C(232), UINT8_C( 8), UINT8_C( 27) }, { UINT8_C(162), UINT8_C(192), UINT8_C(212), UINT8_C(150), UINT8_C(115), UINT8_C(207), UINT8_C(100), UINT8_C(135) }, { UINT8_MAX, UINT8_MAX, UINT8_C(233), UINT8_C(183), UINT8_C(163), UINT8_MAX, UINT8_C(108), UINT8_C(162) } }, { { UINT8_C( 98), UINT8_C(241), UINT8_C( 89), UINT8_C(233), UINT8_C( 92), UINT8_C( 80), UINT8_C(109), UINT8_C( 3) }, { UINT8_C(156), UINT8_C(186), UINT8_C( 94), UINT8_C( 88), UINT8_C( 12), UINT8_C(152), UINT8_C( 30), UINT8_C(212) }, { UINT8_C(254), UINT8_MAX, UINT8_C(183), UINT8_MAX, UINT8_C(104), UINT8_C(232), UINT8_C(139), UINT8_C(215) } }, { { UINT8_C(110), UINT8_C( 51), UINT8_C(245), UINT8_C(158), UINT8_C( 27), UINT8_C(254), UINT8_C(186), UINT8_C(189) }, { UINT8_C(190), UINT8_C(142), UINT8_C( 84), UINT8_C( 49), UINT8_C( 93), UINT8_C(184), UINT8_C(185), UINT8_C(191) }, { UINT8_MAX, UINT8_C(193), UINT8_MAX, UINT8_C(207), UINT8_C(120), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vqadd_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t r = simde_vqadd_u8(a, b); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqadd_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(11966), UINT16_C(44150), UINT16_C(30001), UINT16_C( 2927) }, { UINT16_C(10232), UINT16_C(33651), UINT16_C( 4051), UINT16_C(25723) }, { UINT16_C(22198), UINT16_MAX, UINT16_C(34052), UINT16_C(28650) } }, { { UINT16_C(26150), UINT16_C(62035), UINT16_C(51754), UINT16_C(33204) }, { UINT16_C(17207), UINT16_C(27830), UINT16_C(15972), UINT16_C( 9059) }, { UINT16_C(43357), UINT16_MAX, UINT16_MAX, UINT16_C(42263) } }, { { UINT16_C(55660), UINT16_C(40399), UINT16_C(15951), UINT16_C(18344) }, { UINT16_C( 7014), UINT16_C(14794), UINT16_C(17707), UINT16_C(20893) }, { UINT16_C(62674), UINT16_C(55193), UINT16_C(33658), UINT16_C(39237) } }, { { UINT16_C(61611), UINT16_C(54851), UINT16_C(63419), UINT16_C(62039) }, { UINT16_C( 3642), UINT16_C(40798), UINT16_C(49484), UINT16_C(47298) }, { UINT16_C(65253), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(37274), UINT16_C(59733), UINT16_C(65232), UINT16_C(13872) }, { UINT16_C(64281), UINT16_C(17519), UINT16_C( 3136), UINT16_C(60566) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(55804), UINT16_C(47042), UINT16_C( 6609), UINT16_C( 2985) }, { UINT16_C( 1831), UINT16_C(29610), UINT16_C(27848), UINT16_C(25387) }, { UINT16_C(57635), UINT16_MAX, UINT16_C(34457), UINT16_C(28372) } }, { { UINT16_C(33278), UINT16_C(52812), UINT16_C(32127), UINT16_C(38916) }, { UINT16_C(29560), UINT16_C(47325), UINT16_C(29567), UINT16_C(31652) }, { UINT16_C(62838), UINT16_MAX, UINT16_C(61694), UINT16_MAX } }, { { UINT16_C(26188), UINT16_C( 7475), UINT16_C(56448), UINT16_C(42793) }, { UINT16_C(54244), UINT16_C(44059), UINT16_C(17984), UINT16_C(15887) }, { UINT16_MAX, UINT16_C(51534), UINT16_MAX, UINT16_C(58680) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vqadd_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t r = simde_vqadd_u16(a, b); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqadd_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C( 94467099), UINT32_C(1607547519) }, { UINT32_C(1088442987), UINT32_C( 531747472) }, { UINT32_C(1182910086), UINT32_C(2139294991) } }, { { UINT32_C(3018317671), UINT32_C(4236516707) }, { UINT32_C( 963062607), UINT32_C(1805733967) }, { UINT32_C(3981380278), UINT32_MAX } }, { { UINT32_C(1148207813), UINT32_C(3886301564) }, { UINT32_C( 606569619), UINT32_C(3192117334) }, { UINT32_C(1754777432), UINT32_MAX } }, { { UINT32_C( 460401336), UINT32_C(2534929992) }, { UINT32_C(1909489185), UINT32_C(2497475023) }, { UINT32_C(2369890521), UINT32_MAX } }, { { UINT32_C( 802704563), UINT32_C( 555121806) }, { UINT32_C(1464155393), UINT32_C(3440740373) }, { UINT32_C(2266859956), UINT32_C(3995862179) } }, { { UINT32_C(4226385843), UINT32_C(2660368509) }, { UINT32_C(1309631103), UINT32_C(2263018451) }, { UINT32_MAX, UINT32_MAX } }, { { UINT32_C(3333798456), UINT32_C( 954714935) }, { UINT32_C( 495922440), UINT32_C(1760273845) }, { UINT32_C(3829720896), UINT32_C(2714988780) } }, { { UINT32_C(2841891884), UINT32_C(1397224916) }, { UINT32_C( 715216727), UINT32_C(2058388290) }, { UINT32_C(3557108611), UINT32_C(3455613206) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vqadd_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t r = simde_vqadd_u32(a, b); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqadd_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[1]; uint64_t b[1]; uint64_t r[1]; } test_vec[] = { { { UINT64_C( 7980904046589353852) }, { UINT64_C( 1728305938133397857) }, { UINT64_C( 9709209984722751709) } }, { { UINT64_C( 2205678454715127659) }, { UINT64_C( 8848910256038185308) }, { UINT64_C(11054588710753312967) } }, { { UINT64_C( 8909060253653810243) }, { UINT64_C(16084739608838686217) }, { UINT64_MAX } }, { { UINT64_C(13802628386871132048) }, { UINT64_C(16918644631544726354) }, { UINT64_MAX } }, { { UINT64_C(13973195990776111445) }, { UINT64_C( 8024319282771093001) }, { UINT64_MAX } }, { { UINT64_C(11853027073396148571) }, { UINT64_C( 4107213462431975789) }, { UINT64_C(15960240535828124360) } }, { { UINT64_C(16977323619508493857) }, { UINT64_C(17601973458158780151) }, { UINT64_MAX } }, { { UINT64_C(18033874767248041038) }, { UINT64_C( 1951482484148353786) }, { UINT64_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_t b = simde_vld1_u64(test_vec[i].b); simde_uint64x1_t r = simde_vqadd_u64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x1_t a = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t b = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t r = simde_vqadd_u64(a, b); simde_test_arm_neon_write_u64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqaddq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t b[16]; int8_t r[16]; } test_vec[] = { { { INT8_C( 51), -INT8_C( 81), INT8_C( 4), -INT8_C( 100), -INT8_C( 121), -INT8_C( 54), INT8_C( 38), INT8_C( 101), INT8_C( 84), INT8_C( 97), -INT8_C( 115), INT8_C( 67), INT8_C( 116), -INT8_C( 108), -INT8_C( 16), -INT8_C( 88) }, { INT8_C( 79), INT8_C( 77), -INT8_C( 7), -INT8_C( 47), -INT8_C( 36), INT8_C( 48), -INT8_C( 22), INT8_C( 40), -INT8_C( 24), -INT8_C( 37), INT8_C( 32), -INT8_C( 72), -INT8_C( 71), -INT8_C( 8), -INT8_C( 12), -INT8_C( 19) }, { INT8_MAX, -INT8_C( 4), -INT8_C( 3), INT8_MIN, INT8_MIN, -INT8_C( 6), INT8_C( 16), INT8_MAX, INT8_C( 60), INT8_C( 60), -INT8_C( 83), -INT8_C( 5), INT8_C( 45), -INT8_C( 116), -INT8_C( 28), -INT8_C( 107) } }, { { -INT8_C( 89), -INT8_C( 8), -INT8_C( 119), INT8_C( 47), -INT8_C( 61), -INT8_C( 81), -INT8_C( 108), INT8_C( 23), INT8_C( 17), INT8_C( 33), INT8_C( 90), -INT8_C( 123), -INT8_C( 75), INT8_C( 74), INT8_C( 46), INT8_C( 4) }, { -INT8_C( 104), INT8_C( 39), -INT8_C( 42), INT8_C( 116), INT8_C( 87), -INT8_C( 64), -INT8_C( 99), INT8_C( 63), -INT8_C( 101), -INT8_C( 67), -INT8_C( 9), INT8_C( 84), -INT8_C( 75), -INT8_C( 20), INT8_C( 65), INT8_C( 93) }, { INT8_MIN, INT8_C( 31), INT8_MIN, INT8_MAX, INT8_C( 26), INT8_MIN, INT8_MIN, INT8_C( 86), -INT8_C( 84), -INT8_C( 34), INT8_C( 81), -INT8_C( 39), INT8_MIN, INT8_C( 54), INT8_C( 111), INT8_C( 97) } }, { { -INT8_C( 28), -INT8_C( 53), -INT8_C( 116), -INT8_C( 89), INT8_C( 122), INT8_C( 32), -INT8_C( 65), -INT8_C( 117), INT8_C( 66), INT8_C( 25), INT8_C( 17), -INT8_C( 9), INT8_C( 100), INT8_C( 63), -INT8_C( 4), -INT8_C( 4) }, { INT8_C( 102), -INT8_C( 46), INT8_C( 112), -INT8_C( 67), -INT8_C( 110), INT8_C( 13), -INT8_C( 3), INT8_C( 45), -INT8_C( 53), -INT8_C( 12), -INT8_C( 127), INT8_MIN, -INT8_C( 32), -INT8_C( 61), -INT8_C( 35), -INT8_C( 59) }, { INT8_C( 74), -INT8_C( 99), -INT8_C( 4), INT8_MIN, INT8_C( 12), INT8_C( 45), -INT8_C( 68), -INT8_C( 72), INT8_C( 13), INT8_C( 13), -INT8_C( 110), INT8_MIN, INT8_C( 68), INT8_C( 2), -INT8_C( 39), -INT8_C( 63) } }, { { -INT8_C( 114), INT8_C( 105), INT8_C( 108), INT8_C( 8), -INT8_C( 118), INT8_C( 43), -INT8_C( 108), -INT8_C( 52), INT8_C( 69), -INT8_C( 91), -INT8_C( 61), -INT8_C( 87), -INT8_C( 28), -INT8_C( 65), -INT8_C( 91), INT8_C( 74) }, { -INT8_C( 111), INT8_C( 21), INT8_C( 7), INT8_C( 35), INT8_C( 35), INT8_C( 4), INT8_C( 80), -INT8_C( 18), -INT8_C( 7), -INT8_C( 46), INT8_C( 110), -INT8_C( 39), -INT8_C( 107), INT8_C( 76), -INT8_C( 98), INT8_C( 35) }, { INT8_MIN, INT8_C( 126), INT8_C( 115), INT8_C( 43), -INT8_C( 83), INT8_C( 47), -INT8_C( 28), -INT8_C( 70), INT8_C( 62), INT8_MIN, INT8_C( 49), -INT8_C( 126), INT8_MIN, INT8_C( 11), INT8_MIN, INT8_C( 109) } }, { { -INT8_C( 75), INT8_C( 11), INT8_C( 43), INT8_C( 63), INT8_C( 54), -INT8_C( 65), INT8_C( 11), INT8_C( 123), INT8_C( 100), -INT8_C( 49), INT8_C( 36), INT8_C( 72), -INT8_C( 114), -INT8_C( 55), -INT8_C( 110), INT8_C( 32) }, { -INT8_C( 33), -INT8_C( 102), INT8_C( 67), INT8_C( 2), -INT8_C( 98), -INT8_C( 108), -INT8_C( 16), -INT8_C( 105), INT8_C( 102), INT8_C( 94), INT8_C( 113), -INT8_C( 5), -INT8_C( 86), INT8_C( 15), INT8_C( 30), INT8_C( 96) }, { -INT8_C( 108), -INT8_C( 91), INT8_C( 110), INT8_C( 65), -INT8_C( 44), INT8_MIN, -INT8_C( 5), INT8_C( 18), INT8_MAX, INT8_C( 45), INT8_MAX, INT8_C( 67), INT8_MIN, -INT8_C( 40), -INT8_C( 80), INT8_MAX } }, { { INT8_C( 26), INT8_C( 73), -INT8_C( 97), INT8_C( 81), INT8_C( 9), -INT8_C( 85), -INT8_C( 52), INT8_C( 109), INT8_C( 122), -INT8_C( 15), -INT8_C( 74), INT8_C( 8), -INT8_C( 70), INT8_C( 72), INT8_C( 40), -INT8_C( 103) }, { -INT8_C( 30), INT8_C( 108), -INT8_C( 101), -INT8_C( 127), INT8_C( 0), -INT8_C( 117), INT8_C( 24), INT8_C( 102), -INT8_C( 22), -INT8_C( 119), INT8_C( 97), -INT8_C( 108), -INT8_C( 103), INT8_MAX, -INT8_C( 12), -INT8_C( 77) }, { -INT8_C( 4), INT8_MAX, INT8_MIN, -INT8_C( 46), INT8_C( 9), INT8_MIN, -INT8_C( 28), INT8_MAX, INT8_C( 100), INT8_MIN, INT8_C( 23), -INT8_C( 100), INT8_MIN, INT8_MAX, INT8_C( 28), INT8_MIN } }, { { -INT8_C( 56), -INT8_C( 108), INT8_C( 4), -INT8_C( 47), INT8_C( 63), -INT8_C( 47), INT8_C( 63), -INT8_C( 71), -INT8_C( 62), -INT8_C( 11), -INT8_C( 63), INT8_C( 124), INT8_C( 61), -INT8_C( 22), INT8_C( 22), INT8_C( 32) }, { INT8_C( 86), -INT8_C( 79), -INT8_C( 95), INT8_C( 86), INT8_C( 61), -INT8_C( 71), -INT8_C( 68), INT8_C( 39), INT8_C( 67), INT8_C( 29), -INT8_C( 69), -INT8_C( 36), -INT8_C( 100), -INT8_C( 80), -INT8_C( 113), INT8_C( 100) }, { INT8_C( 30), INT8_MIN, -INT8_C( 91), INT8_C( 39), INT8_C( 124), -INT8_C( 118), -INT8_C( 5), -INT8_C( 32), INT8_C( 5), INT8_C( 18), INT8_MIN, INT8_C( 88), -INT8_C( 39), -INT8_C( 102), -INT8_C( 91), INT8_MAX } }, { { INT8_C( 68), -INT8_C( 108), INT8_C( 54), -INT8_C( 125), INT8_C( 101), INT8_C( 117), INT8_C( 60), INT8_C( 39), INT8_C( 106), -INT8_C( 3), -INT8_C( 93), -INT8_C( 89), -INT8_C( 25), -INT8_C( 71), -INT8_C( 57), INT8_C( 61) }, { INT8_C( 107), INT8_C( 104), -INT8_C( 109), -INT8_C( 88), INT8_C( 34), INT8_C( 79), -INT8_C( 49), INT8_C( 101), INT8_C( 108), -INT8_C( 118), INT8_C( 65), INT8_C( 8), INT8_C( 58), -INT8_C( 48), INT8_C( 109), INT8_C( 126) }, { INT8_MAX, -INT8_C( 4), -INT8_C( 55), INT8_MIN, INT8_MAX, INT8_MAX, INT8_C( 11), INT8_MAX, INT8_MAX, -INT8_C( 121), -INT8_C( 28), -INT8_C( 81), INT8_C( 33), -INT8_C( 119), INT8_C( 52), INT8_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r = simde_vqaddq_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); simde_int8x16_t r = simde_vqaddq_s8(a, b); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqaddq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { -INT16_C( 9717), INT16_C( 12151), -INT16_C( 21744), -INT16_C( 18219), -INT16_C( 22282), INT16_C( 12709), INT16_C( 21139), INT16_C( 14023) }, { INT16_C( 348), -INT16_C( 31129), INT16_C( 15964), INT16_C( 17917), INT16_C( 21690), INT16_C( 3256), -INT16_C( 19649), INT16_C( 19310) }, { -INT16_C( 9369), -INT16_C( 18978), -INT16_C( 5780), -INT16_C( 302), -INT16_C( 592), INT16_C( 15965), INT16_C( 1490), INT16_MAX } }, { { -INT16_C( 6771), -INT16_C( 25222), INT16_C( 20369), -INT16_C( 30890), -INT16_C( 1033), -INT16_C( 29768), -INT16_C( 32691), -INT16_C( 22079) }, { INT16_C( 10369), -INT16_C( 8657), INT16_C( 11623), INT16_C( 8483), -INT16_C( 9343), -INT16_C( 16339), -INT16_C( 25713), INT16_C( 7179) }, { INT16_C( 3598), INT16_MIN, INT16_C( 31992), -INT16_C( 22407), -INT16_C( 10376), INT16_MIN, INT16_MIN, -INT16_C( 14900) } }, { { -INT16_C( 31359), INT16_C( 4794), INT16_C( 4309), -INT16_C( 13159), INT16_C( 21003), INT16_C( 22615), INT16_C( 6354), INT16_C( 21250) }, { INT16_C( 12609), -INT16_C( 22479), INT16_C( 21854), -INT16_C( 8247), -INT16_C( 2512), -INT16_C( 16480), -INT16_C( 21614), INT16_C( 5084) }, { -INT16_C( 18750), -INT16_C( 17685), INT16_C( 26163), -INT16_C( 21406), INT16_C( 18491), INT16_C( 6135), -INT16_C( 15260), INT16_C( 26334) } }, { { -INT16_C( 27087), INT16_C( 1573), -INT16_C( 16730), -INT16_C( 20014), INT16_C( 10768), -INT16_C( 7671), INT16_C( 2882), -INT16_C( 31946) }, { INT16_C( 26429), -INT16_C( 25813), -INT16_C( 2884), -INT16_C( 4741), INT16_C( 7147), INT16_C( 32172), -INT16_C( 30522), -INT16_C( 2160) }, { -INT16_C( 658), -INT16_C( 24240), -INT16_C( 19614), -INT16_C( 24755), INT16_C( 17915), INT16_C( 24501), -INT16_C( 27640), INT16_MIN } }, { { -INT16_C( 19170), -INT16_C( 15107), -INT16_C( 12173), -INT16_C( 31627), INT16_C( 32762), INT16_C( 15462), -INT16_C( 25462), -INT16_C( 14400) }, { -INT16_C( 5372), -INT16_C( 16285), -INT16_C( 8480), -INT16_C( 13395), INT16_C( 23289), -INT16_C( 16568), -INT16_C( 10014), INT16_C( 439) }, { -INT16_C( 24542), -INT16_C( 31392), -INT16_C( 20653), INT16_MIN, INT16_MAX, -INT16_C( 1106), INT16_MIN, -INT16_C( 13961) } }, { { -INT16_C( 19315), INT16_C( 197), INT16_C( 15236), INT16_C( 32388), -INT16_C( 5190), INT16_C( 17595), INT16_C( 31623), -INT16_C( 29940) }, { INT16_C( 28518), INT16_C( 17996), -INT16_C( 1715), INT16_C( 17937), INT16_C( 22867), INT16_C( 13829), -INT16_C( 17359), -INT16_C( 16841) }, { INT16_C( 9203), INT16_C( 18193), INT16_C( 13521), INT16_MAX, INT16_C( 17677), INT16_C( 31424), INT16_C( 14264), INT16_MIN } }, { { -INT16_C( 911), -INT16_C( 2625), INT16_C( 17207), -INT16_C( 3724), INT16_C( 12078), -INT16_C( 18890), INT16_C( 17066), INT16_C( 4161) }, { -INT16_C( 29263), -INT16_C( 425), INT16_C( 26759), -INT16_C( 9660), INT16_C( 18882), -INT16_C( 3312), INT16_C( 18182), INT16_C( 30642) }, { -INT16_C( 30174), -INT16_C( 3050), INT16_MAX, -INT16_C( 13384), INT16_C( 30960), -INT16_C( 22202), INT16_MAX, INT16_MAX } }, { { INT16_C( 28996), INT16_C( 31596), -INT16_C( 8012), -INT16_C( 7315), -INT16_C( 23793), -INT16_C( 18023), -INT16_C( 9499), -INT16_C( 26934) }, { INT16_C( 8552), -INT16_C( 4204), -INT16_C( 10103), INT16_C( 19401), -INT16_C( 9695), INT16_C( 10047), -INT16_C( 3807), INT16_C( 26014) }, { INT16_MAX, INT16_C( 27392), -INT16_C( 18115), INT16_C( 12086), INT16_MIN, -INT16_C( 7976), -INT16_C( 13306), -INT16_C( 920) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_vqaddq_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); simde_int16x8_t r = simde_vqaddq_s16(a, b); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqaddq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { INT32_C( 405785285), -INT32_C( 2001782660), INT32_C( 676703092), INT32_C( 215642211) }, { INT32_C( 833772085), INT32_C( 197532335), INT32_C( 1777678421), INT32_C( 2076218293) }, { INT32_C( 1239557370), -INT32_C( 1804250325), INT32_MAX, INT32_MAX } }, { { -INT32_C( 627839139), -INT32_C( 1688059097), INT32_C( 1371781102), INT32_C( 1566416423) }, { -INT32_C( 1483861768), -INT32_C( 2135796694), INT32_C( 1122609292), -INT32_C( 1732400581) }, { -INT32_C( 2111700907), INT32_MIN, INT32_MAX, -INT32_C( 165984158) } }, { { -INT32_C( 1049472871), -INT32_C( 2107845229), -INT32_C( 1261231988), -INT32_C( 1240387138) }, { INT32_C( 1801297729), INT32_C( 2146111731), -INT32_C( 205400648), INT32_C( 411795071) }, { INT32_C( 751824858), INT32_C( 38266502), -INT32_C( 1466632636), -INT32_C( 828592067) } }, { { INT32_C( 1658453711), INT32_C( 1608791763), INT32_C( 336836694), INT32_C( 717890793) }, { -INT32_C( 1231738941), -INT32_C( 281640649), -INT32_C( 706545834), INT32_C( 1173188214) }, { INT32_C( 426714770), INT32_C( 1327151114), -INT32_C( 369709140), INT32_C( 1891079007) } }, { { INT32_C( 1067960172), INT32_C( 1402899709), INT32_C( 761770564), -INT32_C( 1705561642) }, { -INT32_C( 1873744808), -INT32_C( 1015052691), -INT32_C( 191339906), INT32_C( 1010403024) }, { -INT32_C( 805784636), INT32_C( 387847018), INT32_C( 570430658), -INT32_C( 695158618) } }, { { INT32_C( 1249632333), -INT32_C( 1331881620), -INT32_C( 1562573621), -INT32_C( 1925434315) }, { -INT32_C( 1910666207), -INT32_C( 1856856813), -INT32_C( 813307137), -INT32_C( 1123303824) }, { -INT32_C( 661033874), INT32_MIN, INT32_MIN, INT32_MIN } }, { { INT32_C( 185042590), INT32_C( 1807459487), -INT32_C( 586311256), -INT32_C( 294958643) }, { -INT32_C( 394426155), INT32_C( 611962661), INT32_C( 720633529), INT32_C( 1541930684) }, { -INT32_C( 209383565), INT32_MAX, INT32_C( 134322273), INT32_C( 1246972041) } }, { { INT32_C( 610725764), INT32_C( 1016013203), -INT32_C( 2011587398), -INT32_C( 1166637851) }, { INT32_C( 849605389), INT32_C( 2086083778), -INT32_C( 676967909), -INT32_C( 852325047) }, { INT32_C( 1460331153), INT32_MAX, INT32_MIN, -INT32_C( 2018962898) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_vqaddq_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); simde_int32x4_t r = simde_vqaddq_s32(a, b); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqaddq_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; int64_t b[2]; int64_t r[2]; } test_vec[] = { { { -INT64_C( 247047943962692106), -INT64_C( 1789219120264728378) }, { -INT64_C( 7416386688530799948), INT64_C( 8227642745494112686) }, { -INT64_C( 7663434632493492054), INT64_C( 6438423625229384308) } }, { { -INT64_C( 2262437474353296534), INT64_C( 35363921426178789) }, { INT64_C( 1042527654696889669), INT64_C( 6180465821605525207) }, { -INT64_C( 1219909819656406865), INT64_C( 6215829743031703996) } }, { { -INT64_C( 8921192576213292745), -INT64_C( 7944746336789026447) }, { INT64_C( 2068249840679090244), -INT64_C( 7809746033345302435) }, { -INT64_C( 6852942735534202501), INT64_MIN } }, { { -INT64_C( 7944325414965177698), -INT64_C( 5288055603979101505) }, { INT64_C( 5829644682889203077), INT64_C( 3021413471688939262) }, { -INT64_C( 2114680732075974621), -INT64_C( 2266642132290162243) } }, { { -INT64_C( 6306404067117182007), INT64_C( 6640930210847181313) }, { -INT64_C( 7952497594911521697), INT64_C( 4330732708174773348) }, { INT64_MIN, INT64_MAX } }, { { -INT64_C( 6467273906831654995), -INT64_C( 6585033115421772950) }, { INT64_C( 3885369327502214343), INT64_C( 6882976536663021526) }, { -INT64_C( 2581904579329440652), INT64_C( 297943421241248576) } }, { { -INT64_C( 5401728779666740547), INT64_C( 3247821444645069352) }, { INT64_C( 8745835046495521112), INT64_C( 9176444618500375625) }, { INT64_C( 3344106266828780565), INT64_MAX } }, { { -INT64_C( 6056519599965188111), -INT64_C( 2981335135517284083) }, { -INT64_C( 2139104708659239116), INT64_C( 5343017766601102052) }, { -INT64_C( 8195624308624427227), INT64_C( 2361682631083817969) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_int64x2_t r = simde_vqaddq_s64(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); simde_int64x2_t b = simde_test_arm_neon_random_i64x2(); simde_int64x2_t r = simde_vqaddq_s64(a, b); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqaddq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C( 19), UINT8_C(249), UINT8_C( 68), UINT8_C( 15), UINT8_C(214), UINT8_C(186), UINT8_C(154), UINT8_C(108), UINT8_C(248), UINT8_C( 33), UINT8_C( 46), UINT8_C(216), UINT8_C( 99), UINT8_C(130), UINT8_C(158), UINT8_C( 3) }, { UINT8_C( 66), UINT8_C(193), UINT8_C(146), UINT8_C(243), UINT8_C(113), UINT8_C(248), UINT8_C(189), UINT8_C(191), UINT8_C(249), UINT8_C(237), UINT8_C(119), UINT8_C(109), UINT8_C( 90), UINT8_C( 39), UINT8_C(171), UINT8_C(109) }, { UINT8_C( 85), UINT8_MAX, UINT8_C(214), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C(165), UINT8_MAX, UINT8_C(189), UINT8_C(169), UINT8_MAX, UINT8_C(112) } }, { { UINT8_C( 33), UINT8_C(239), UINT8_C(124), UINT8_C(247), UINT8_C(170), UINT8_C( 22), UINT8_C(100), UINT8_C(162), UINT8_C( 55), UINT8_C(146), UINT8_C(122), UINT8_C(154), UINT8_C( 20), UINT8_C( 24), UINT8_C(157), UINT8_C( 87) }, { UINT8_C(217), UINT8_C( 48), UINT8_C( 74), UINT8_C( 75), UINT8_C( 40), UINT8_C( 7), UINT8_C( 10), UINT8_C( 34), UINT8_C(245), UINT8_C(129), UINT8_C(143), UINT8_C( 79), UINT8_C(169), UINT8_C( 58), UINT8_C(188), UINT8_C(202) }, { UINT8_C(250), UINT8_MAX, UINT8_C(198), UINT8_MAX, UINT8_C(210), UINT8_C( 29), UINT8_C(110), UINT8_C(196), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C(233), UINT8_C(189), UINT8_C( 82), UINT8_MAX, UINT8_MAX } }, { { UINT8_C( 42), UINT8_C( 56), UINT8_C(193), UINT8_C(212), UINT8_C( 78), UINT8_C( 37), UINT8_C(118), UINT8_C(133), UINT8_C(183), UINT8_C(240), UINT8_C( 31), UINT8_C(204), UINT8_C( 9), UINT8_C(188), UINT8_C( 35), UINT8_C(226) }, { UINT8_C(236), UINT8_C(109), UINT8_C( 45), UINT8_C( 21), UINT8_C(116), UINT8_C( 55), UINT8_C( 55), UINT8_C(105), UINT8_C(185), UINT8_C(198), UINT8_C(184), UINT8_C( 98), UINT8_C( 0), UINT8_C(116), UINT8_C( 44), UINT8_C( 42) }, { UINT8_MAX, UINT8_C(165), UINT8_C(238), UINT8_C(233), UINT8_C(194), UINT8_C( 92), UINT8_C(173), UINT8_C(238), UINT8_MAX, UINT8_MAX, UINT8_C(215), UINT8_MAX, UINT8_C( 9), UINT8_MAX, UINT8_C( 79), UINT8_MAX } }, { { UINT8_C(172), UINT8_C(237), UINT8_C(254), UINT8_C(250), UINT8_C( 19), UINT8_C(116), UINT8_C(127), UINT8_C(202), UINT8_C(101), UINT8_C(158), UINT8_C(150), UINT8_C(110), UINT8_C( 91), UINT8_C(185), UINT8_C( 80), UINT8_C( 71) }, { UINT8_C( 38), UINT8_C(126), UINT8_C( 92), UINT8_C(155), UINT8_C(181), UINT8_C(147), UINT8_C( 4), UINT8_C(110), UINT8_C( 89), UINT8_C(189), UINT8_C(208), UINT8_C( 90), UINT8_C( 49), UINT8_C(252), UINT8_C(132), UINT8_C(222) }, { UINT8_C(210), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C(200), UINT8_MAX, UINT8_C(131), UINT8_MAX, UINT8_C(190), UINT8_MAX, UINT8_MAX, UINT8_C(200), UINT8_C(140), UINT8_MAX, UINT8_C(212), UINT8_MAX } }, { { UINT8_C(234), UINT8_C(131), UINT8_C(216), UINT8_C(253), UINT8_C(247), UINT8_C( 88), UINT8_C(199), UINT8_C( 92), UINT8_C(246), UINT8_C( 94), UINT8_C(202), UINT8_C( 81), UINT8_C( 23), UINT8_C( 27), UINT8_C(153), UINT8_C( 62) }, { UINT8_C(153), UINT8_C(245), UINT8_C(217), UINT8_C( 78), UINT8_C(137), UINT8_C(221), UINT8_C(189), UINT8_C(226), UINT8_C(154), UINT8_C(141), UINT8_C( 60), UINT8_C(204), UINT8_C(138), UINT8_C(193), UINT8_C(170), UINT8_C(116) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C(235), UINT8_MAX, UINT8_MAX, UINT8_C(161), UINT8_C(220), UINT8_MAX, UINT8_C(178) } }, { { UINT8_C( 68), UINT8_C(130), UINT8_C(113), UINT8_C( 59), UINT8_C(218), UINT8_C( 56), UINT8_C(152), UINT8_C(209), UINT8_C(150), UINT8_C( 98), UINT8_C( 34), UINT8_C(174), UINT8_C(125), UINT8_C(187), UINT8_C(236), UINT8_C( 22) }, { UINT8_C(177), UINT8_C(197), UINT8_C(101), UINT8_C( 58), UINT8_C(162), UINT8_C( 34), UINT8_C( 28), UINT8_C( 61), UINT8_C(175), UINT8_C( 89), UINT8_C( 9), UINT8_C( 57), UINT8_C( 26), UINT8_C(179), UINT8_C(173), UINT8_C( 94) }, { UINT8_C(245), UINT8_MAX, UINT8_C(214), UINT8_C(117), UINT8_MAX, UINT8_C( 90), UINT8_C(180), UINT8_MAX, UINT8_MAX, UINT8_C(187), UINT8_C( 43), UINT8_C(231), UINT8_C(151), UINT8_MAX, UINT8_MAX, UINT8_C(116) } }, { { UINT8_C( 53), UINT8_C( 30), UINT8_C(153), UINT8_C( 16), UINT8_C( 87), UINT8_C( 49), UINT8_C(225), UINT8_C(237), UINT8_C(148), UINT8_C( 3), UINT8_C(155), UINT8_C( 17), UINT8_C(191), UINT8_C(135), UINT8_C( 40), UINT8_C(112) }, { UINT8_C( 76), UINT8_C(141), UINT8_C(170), UINT8_C(239), UINT8_C(175), UINT8_C(198), UINT8_C( 44), UINT8_C( 94), UINT8_C( 31), UINT8_C( 53), UINT8_C(152), UINT8_C( 57), UINT8_C(232), UINT8_C( 69), UINT8_C(151), UINT8_C( 29) }, { UINT8_C(129), UINT8_C(171), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C(247), UINT8_MAX, UINT8_MAX, UINT8_C(179), UINT8_C( 56), UINT8_MAX, UINT8_C( 74), UINT8_MAX, UINT8_C(204), UINT8_C(191), UINT8_C(141) } }, { { UINT8_C(100), UINT8_C( 49), UINT8_C( 45), UINT8_C(187), UINT8_C( 98), UINT8_C( 14), UINT8_C(168), UINT8_C(246), UINT8_C( 18), UINT8_C( 68), UINT8_C( 8), UINT8_C(209), UINT8_C(203), UINT8_C( 48), UINT8_C( 65), UINT8_C( 24) }, { UINT8_C(189), UINT8_C(235), UINT8_C( 7), UINT8_C(108), UINT8_C(177), UINT8_C( 51), UINT8_C(202), UINT8_C(209), UINT8_C(104), UINT8_C( 98), UINT8_C( 10), UINT8_C( 80), UINT8_C(168), UINT8_C(162), UINT8_C(109), UINT8_C( 12) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 52), UINT8_MAX, UINT8_MAX, UINT8_C( 65), UINT8_MAX, UINT8_MAX, UINT8_C(122), UINT8_C(166), UINT8_C( 18), UINT8_MAX, UINT8_MAX, UINT8_C(210), UINT8_C(174), UINT8_C( 36) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vqaddq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t r = simde_vqaddq_u8(a, b); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqaddq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(44137), UINT16_C(29135), UINT16_C( 5385), UINT16_C(65255), UINT16_C( 4257), UINT16_C(32373), UINT16_C(42489), UINT16_C( 1882) }, { UINT16_C(11291), UINT16_C( 2242), UINT16_C(33329), UINT16_C(22790), UINT16_C(17167), UINT16_C(45739), UINT16_C(24707), UINT16_C(60522) }, { UINT16_C(55428), UINT16_C(31377), UINT16_C(38714), UINT16_MAX, UINT16_C(21424), UINT16_MAX, UINT16_MAX, UINT16_C(62404) } }, { { UINT16_C(14605), UINT16_C( 5725), UINT16_C(17742), UINT16_C(61204), UINT16_C(35413), UINT16_C(20078), UINT16_C(51247), UINT16_C(19030) }, { UINT16_C( 6388), UINT16_C( 9554), UINT16_C(22682), UINT16_C(43390), UINT16_C(10652), UINT16_C( 8027), UINT16_C(50570), UINT16_C(38667) }, { UINT16_C(20993), UINT16_C(15279), UINT16_C(40424), UINT16_MAX, UINT16_C(46065), UINT16_C(28105), UINT16_MAX, UINT16_C(57697) } }, { { UINT16_C(26878), UINT16_C(19885), UINT16_C(49837), UINT16_C( 828), UINT16_C(43596), UINT16_C(31569), UINT16_C(42866), UINT16_C(26309) }, { UINT16_C( 6335), UINT16_C(22923), UINT16_C( 2672), UINT16_C( 3074), UINT16_C(24115), UINT16_C(48427), UINT16_C(13859), UINT16_C( 8788) }, { UINT16_C(33213), UINT16_C(42808), UINT16_C(52509), UINT16_C( 3902), UINT16_MAX, UINT16_MAX, UINT16_C(56725), UINT16_C(35097) } }, { { UINT16_C( 671), UINT16_C(19567), UINT16_C(43972), UINT16_C( 4175), UINT16_C(41302), UINT16_C(51339), UINT16_C(20552), UINT16_C( 2095) }, { UINT16_C(47720), UINT16_C(55649), UINT16_C(25796), UINT16_C(63717), UINT16_C( 4546), UINT16_C(58805), UINT16_C( 2631), UINT16_C(58887) }, { UINT16_C(48391), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C(45848), UINT16_MAX, UINT16_C(23183), UINT16_C(60982) } }, { { UINT16_C(30220), UINT16_C(53299), UINT16_C(33314), UINT16_C(30944), UINT16_C(27427), UINT16_C(27712), UINT16_C(28603), UINT16_C( 9332) }, { UINT16_C(54570), UINT16_C(61181), UINT16_C(57913), UINT16_C(64486), UINT16_C(40179), UINT16_C(15329), UINT16_C(59558), UINT16_C(45601) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C(43041), UINT16_MAX, UINT16_C(54933) } }, { { UINT16_C(21599), UINT16_C(33154), UINT16_C(25303), UINT16_C(64249), UINT16_C(14797), UINT16_C(34918), UINT16_C(55977), UINT16_C(54188) }, { UINT16_C(43440), UINT16_C(59841), UINT16_C(43148), UINT16_C(32741), UINT16_C(50756), UINT16_C(60090), UINT16_C(56494), UINT16_C( 3484) }, { UINT16_C(65039), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C(57672) } }, { { UINT16_C( 7728), UINT16_C( 1934), UINT16_C(34688), UINT16_C(19714), UINT16_C(26817), UINT16_C(27349), UINT16_C(33347), UINT16_C(62269) }, { UINT16_C(65067), UINT16_C(47068), UINT16_C(49574), UINT16_C(59959), UINT16_C(61831), UINT16_C(14036), UINT16_C(28877), UINT16_C(65091) }, { UINT16_MAX, UINT16_C(49002), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C(41385), UINT16_C(62224), UINT16_MAX } }, { { UINT16_C(53902), UINT16_C( 3589), UINT16_C( 1881), UINT16_C( 6747), UINT16_C(12656), UINT16_C(45956), UINT16_C(49587), UINT16_C(56998) }, { UINT16_C(33472), UINT16_C(26262), UINT16_C(52548), UINT16_C(52049), UINT16_C( 9662), UINT16_C(35841), UINT16_C(17814), UINT16_C( 9354) }, { UINT16_MAX, UINT16_C(29851), UINT16_C(54429), UINT16_C(58796), UINT16_C(22318), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vqaddq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t r = simde_vqaddq_u16(a, b); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqaddq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(3666842669), UINT32_C(2390080161), UINT32_C( 817569982), UINT32_C(4145787418) }, { UINT32_C( 135928911), UINT32_C(3146748057), UINT32_C(2147957260), UINT32_C( 895976456) }, { UINT32_C(3802771580), UINT32_MAX, UINT32_C(2965527242), UINT32_MAX } }, { { UINT32_C(3104831000), UINT32_C(1866958256), UINT32_C(3231646629), UINT32_C( 79149749) }, { UINT32_C(1879953879), UINT32_C(1831574625), UINT32_C(3740086998), UINT32_C(3457373366) }, { UINT32_MAX, UINT32_C(3698532881), UINT32_MAX, UINT32_C(3536523115) } }, { { UINT32_C(4220002891), UINT32_C(1298845607), UINT32_C(2282556114), UINT32_C(2609693892) }, { UINT32_C(4127955349), UINT32_C( 191051317), UINT32_C( 518606952), UINT32_C(4025351332) }, { UINT32_MAX, UINT32_C(1489896924), UINT32_C(2801163066), UINT32_MAX } }, { { UINT32_C(3337319711), UINT32_C( 387142980), UINT32_C( 614408287), UINT32_C(2042571748) }, { UINT32_C(4201630661), UINT32_C(1778831873), UINT32_C(3347640098), UINT32_C( 196507116) }, { UINT32_MAX, UINT32_C(2165974853), UINT32_C(3962048385), UINT32_C(2239078864) } }, { { UINT32_C( 802267626), UINT32_C(1447486967), UINT32_C(3933922565), UINT32_C(3580049936) }, { UINT32_C( 114348805), UINT32_C(3362838181), UINT32_C(2979002821), UINT32_C(1505510766) }, { UINT32_C( 916616431), UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { UINT32_C(3733491431), UINT32_C(2016726643), UINT32_C(3278024627), UINT32_C(4003055337) }, { UINT32_C(1056205209), UINT32_C( 67527999), UINT32_C(3434517854), UINT32_C(3257234139) }, { UINT32_MAX, UINT32_C(2084254642), UINT32_MAX, UINT32_MAX } }, { { UINT32_C(1939909888), UINT32_C( 787272827), UINT32_C(1827819139), UINT32_C(2908392212) }, { UINT32_C( 871124980), UINT32_C( 305656500), UINT32_C(1675554184), UINT32_C(1613038688) }, { UINT32_C(2811034868), UINT32_C(1092929327), UINT32_C(3503373323), UINT32_MAX } }, { { UINT32_C( 768918961), UINT32_C( 492552345), UINT32_C( 596200718), UINT32_C(3436242136) }, { UINT32_C(3892296755), UINT32_C( 939079599), UINT32_C(2224740132), UINT32_C(2380644315) }, { UINT32_MAX, UINT32_C(1431631944), UINT32_C(2820940850), UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vqaddq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t r = simde_vqaddq_u32(a, b); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqaddq_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[2]; uint64_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C(17243729724880665306), UINT64_C( 479918823202608748) }, { UINT64_C( 9159647833849261395), UINT64_C(12959692697279553431) }, { UINT64_MAX, UINT64_C(13439611520482162179) } }, { { UINT64_C(12254249772932216397), UINT64_C( 7517869064982434477) }, { UINT64_C(14442852904420611779), UINT64_C( 7078785694487478705) }, { UINT64_MAX, UINT64_C(14596654759469913182) } }, { { UINT64_C( 5405605557765850652), UINT64_C(14637240881365201315) }, { UINT64_C( 6448356046027773571), UINT64_C( 9989581481856698411) }, { UINT64_C(11853961603793624223), UINT64_MAX } }, { { UINT64_C( 6543424278721996261), UINT64_C(15559149823588525850) }, { UINT64_C(18198211363880019851), UINT64_C( 8469826759267479697) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C( 8220413299959295491), UINT64_C(16598158656689940269) }, { UINT64_C( 9076345260268512040), UINT64_C( 1376700779674218775) }, { UINT64_C(17296758560227807531), UINT64_C(17974859436364159044) } }, { { UINT64_C( 450045338343773045), UINT64_C( 8956216358241554952) }, { UINT64_C( 2462460794197337283), UINT64_C( 2648368024063158428) }, { UINT64_C( 2912506132541110328), UINT64_C(11604584382304713380) } }, { { UINT64_C( 8635704442793004536), UINT64_C( 202985511673618177) }, { UINT64_C( 5672527485480289930), UINT64_C( 6798388585654026629) }, { UINT64_C(14308231928273294466), UINT64_C( 7001374097327644806) } }, { { UINT64_C( 8731924942568769864), UINT64_C(13450175318447753831) }, { UINT64_C(13539237984830773734), UINT64_C(16307051131059626122) }, { UINT64_MAX, UINT64_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_uint64x2_t r = simde_vqaddq_u64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); simde_uint64x2_t b = simde_test_arm_neon_random_u64x2(); simde_uint64x2_t r = simde_vqaddq_u64(a, b); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vqaddb_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqaddh_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqadds_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vqaddd_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vqaddb_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vqaddh_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vqadds_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vqaddd_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vqadd_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqadd_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqadd_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vqadd_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vqadd_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vqadd_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vqadd_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vqadd_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vqaddq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqaddq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqaddq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vqaddq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vqaddq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vqaddq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vqaddq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vqaddq_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/qdmulh.c000066400000000000000000000331121400333146700167550ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN qdmulh #include "test-neon.h" #include "../../../simde/arm/neon/qdmulh.h" static int test_simde_vqdmulh_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { { { INT16_C( 10007), INT16_C( 28883), -INT16_C( 16203), -INT16_C( 25505) }, { -INT16_C( 28965), -INT16_C( 965), -INT16_C( 21451), -INT16_C( 19467) }, { -INT16_C( 8846), -INT16_C( 851), INT16_C( 10607), INT16_C( 15152) } }, { { INT16_C( 297), -INT16_C( 28727), INT16_C( 26792), INT16_C( 21146) }, { -INT16_C( 2097), INT16_C( 13945), INT16_C( 6034), -INT16_C( 22205) }, { -INT16_C( 20), -INT16_C( 12226), INT16_C( 4933), -INT16_C( 14330) } }, { { INT16_C( 5694), -INT16_C( 3302), INT16_C( 31190), -INT16_C( 20080) }, { -INT16_C( 13560), INT16_C( 15789), -INT16_C( 23944), -INT16_C( 24080) }, { -INT16_C( 2357), -INT16_C( 1592), -INT16_C( 22791), INT16_C( 14756) } }, { { -INT16_C( 17757), INT16_C( 19504), -INT16_C( 13790), -INT16_C( 3682) }, { INT16_C( 6337), INT16_C( 21287), INT16_C( 27183), INT16_C( 28413) }, { -INT16_C( 3435), INT16_C( 12670), -INT16_C( 11440), -INT16_C( 3193) } }, { { INT16_C( 6017), INT16_C( 22369), -INT16_C( 3696), -INT16_C( 26615) }, { -INT16_C( 18755), INT16_C( 13781), -INT16_C( 14759), -INT16_C( 810) }, { -INT16_C( 3444), INT16_C( 9407), INT16_C( 1664), INT16_C( 657) } }, { { INT16_C( 1664), -INT16_C( 23992), -INT16_C( 6191), -INT16_C( 28013) }, { -INT16_C( 17665), INT16_C( 12006), -INT16_C( 7388), -INT16_C( 23140) }, { -INT16_C( 898), -INT16_C( 8791), INT16_C( 1395), INT16_C( 19782) } }, { { -INT16_C( 262), -INT16_C( 29955), INT16_C( 1775), -INT16_C( 21469) }, { -INT16_C( 1860), INT16_C( 5601), -INT16_C( 18498), INT16_C( 15890) }, { INT16_C( 14), -INT16_C( 5121), -INT16_C( 1003), -INT16_C( 10411) } }, { { INT16_C( 23230), -INT16_C( 28704), INT16_C( 29505), INT16_C( 16417) }, { INT16_C( 1837), INT16_C( 21103), INT16_C( 3050), -INT16_C( 6921) }, { INT16_C( 1302), -INT16_C( 18486), INT16_C( 2746), -INT16_C( 3468) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_vqdmulh_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); simde_int16x4_t r = simde_vqdmulh_s16(a, b); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqdmulh_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { { { INT32_C( 1569407431), INT32_C( 1412891023) }, { INT32_C( 132938559), -INT32_C( 247554225) }, { INT32_C( 97153132), -INT32_C( 162873018) } }, { { INT32_C( 2121161384), INT32_C( 1882235674) }, { INT32_C( 1466757416), INT32_C( 1199108991) }, { INT32_C( 1448778989), INT32_C( 1051000188) } }, { { -INT32_C( 995884235), INT32_C( 1125767940) }, { -INT32_C( 1505098410), INT32_C( 1284999588) }, { INT32_C( 697981463), INT32_C( 673630898) } }, { { -INT32_C( 37026077), -INT32_C( 781321303) }, { INT32_C( 1797839340), INT32_C( 45261005) }, { -INT32_C( 30997646), -INT32_C( 16467361) } }, { { -INT32_C( 1463396444), -INT32_C( 1980965070) }, { -INT32_C( 1993394460), -INT32_C( 1563048257) }, { INT32_C( 1358392818), INT32_C( 1441847532) } }, { { INT32_C( 1973395660), -INT32_C( 2025386853) }, { -INT32_C( 1275891738), -INT32_C( 1263164144) }, { -INT32_C( 1172460252), INT32_C( 1191345998) } }, { { INT32_C( 794590204), INT32_C( 1069041755) }, { INT32_C( 1053353855), INT32_C( 2061606573) }, { INT32_C( 389751351), INT32_C( 1026291171) } }, { { -INT32_C( 621838274), INT32_C( 1935750797) }, { -INT32_C( 1239001946), -INT32_C( 160769030) }, { INT32_C( 358772851), -INT32_C( 144917881) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_vqdmulh_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); simde_int32x2_t r = simde_vqdmulh_s32(a, b); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqdmulhq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { INT16_C( 18403), INT16_C( 14972), -INT16_C( 24285), INT16_C( 10702), -INT16_C( 30041), -INT16_C( 5908), -INT16_C( 31444), INT16_C( 30183) }, { -INT16_C( 6237), INT16_C( 3774), INT16_C( 8878), -INT16_C( 15805), -INT16_C( 14743), INT16_C( 22060), INT16_C( 15220), INT16_C( 22311) }, { -INT16_C( 3503), INT16_C( 1724), -INT16_C( 6580), -INT16_C( 5162), INT16_C( 13516), -INT16_C( 3978), -INT16_C( 14606), INT16_C( 20550) } }, { { -INT16_C( 23678), -INT16_C( 23150), INT16_C( 24645), -INT16_C( 4914), -INT16_C( 17686), INT16_C( 6101), -INT16_C( 17344), -INT16_C( 7284) }, { INT16_C( 19107), INT16_C( 21233), INT16_C( 13677), -INT16_C( 10732), INT16_C( 16635), INT16_C( 28460), INT16_C( 21627), -INT16_C( 313) }, { -INT16_C( 13807), -INT16_C( 15001), INT16_C( 10286), INT16_C( 1609), -INT16_C( 8979), INT16_C( 5298), -INT16_C( 11448), INT16_C( 69) } }, { { INT16_C( 23031), INT16_C( 15523), INT16_C( 29369), -INT16_C( 23767), -INT16_C( 468), INT16_C( 27834), INT16_C( 18106), INT16_C( 24144) }, { INT16_C( 16785), -INT16_C( 336), -INT16_C( 15242), INT16_C( 29396), INT16_C( 5), -INT16_C( 32543), -INT16_C( 22444), INT16_C( 19582) }, { INT16_C( 11797), -INT16_C( 160), -INT16_C( 13661), -INT16_C( 21322), -INT16_C( 1), -INT16_C( 27643), -INT16_C( 12402), INT16_C( 14428) } }, { { INT16_C( 8705), -INT16_C( 17784), -INT16_C( 20076), -INT16_C( 16290), INT16_C( 6319), INT16_C( 27181), INT16_C( 32095), -INT16_C( 3896) }, { INT16_C( 30910), INT16_C( 13806), -INT16_C( 15812), INT16_C( 16807), -INT16_C( 30526), INT16_C( 6082), INT16_C( 16433), INT16_C( 12899) }, { INT16_C( 8211), -INT16_C( 7493), INT16_C( 9687), -INT16_C( 8356), -INT16_C( 5887), INT16_C( 5045), INT16_C( 16095), -INT16_C( 1534) } }, { { -INT16_C( 5278), -INT16_C( 2323), INT16_C( 19357), INT16_C( 19639), -INT16_C( 7069), -INT16_C( 15690), INT16_C( 32353), INT16_C( 8114) }, { -INT16_C( 24330), INT16_C( 13140), -INT16_C( 1182), INT16_C( 9588), INT16_C( 13956), -INT16_C( 19140), -INT16_C( 24713), -INT16_C( 9753) }, { INT16_C( 3918), -INT16_C( 932), -INT16_C( 699), INT16_C( 5746), -INT16_C( 3011), INT16_C( 9164), -INT16_C( 24401), -INT16_C( 2416) } }, { { -INT16_C( 11126), INT16_C( 10192), -INT16_C( 30945), -INT16_C( 31884), INT16_C( 10859), -INT16_C( 13243), -INT16_C( 1879), -INT16_C( 24597) }, { INT16_C( 16536), -INT16_C( 1070), INT16_C( 18235), -INT16_C( 16608), INT16_C( 23677), -INT16_C( 2956), INT16_C( 23803), -INT16_C( 31282) }, { -INT16_C( 5615), -INT16_C( 333), -INT16_C( 17221), INT16_C( 16159), INT16_C( 7846), INT16_C( 1194), -INT16_C( 1365), INT16_C( 23481) } }, { { -INT16_C( 25040), INT16_C( 20653), INT16_C( 8485), -INT16_C( 28461), INT16_C( 6219), -INT16_C( 2980), INT16_C( 18192), -INT16_C( 22124) }, { INT16_C( 26247), -INT16_C( 15452), -INT16_C( 15187), INT16_C( 11138), -INT16_C( 2272), INT16_C( 6943), -INT16_C( 4781), -INT16_C( 31840) }, { -INT16_C( 20057), -INT16_C( 9740), -INT16_C( 3933), -INT16_C( 9675), -INT16_C( 432), -INT16_C( 632), -INT16_C( 2655), INT16_C( 21497) } }, { { INT16_C( 19851), -INT16_C( 20269), -INT16_C( 22930), -INT16_C( 17856), -INT16_C( 25409), -INT16_C( 12370), INT16_C( 17124), INT16_C( 27512) }, { INT16_C( 7337), INT16_C( 22062), -INT16_C( 20000), INT16_C( 129), -INT16_C( 24152), -INT16_C( 1253), -INT16_C( 17266), INT16_C( 6782) }, { INT16_C( 4444), -INT16_C( 13647), INT16_C( 13995), -INT16_C( 71), INT16_C( 18727), INT16_C( 473), -INT16_C( 9023), INT16_C( 5694) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_vqdmulhq_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); simde_int16x8_t r = simde_vqdmulhq_s16(a, b); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqdmulhq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { INT32_C( 500379413), -INT32_C( 31083559), INT32_C( 1368591925), INT32_C( 2052497007) }, { -INT32_C( 841622730), INT32_C( 1100873920), INT32_C( 1914938471), INT32_C( 1872022618) }, { -INT32_C( 196104259), -INT32_C( 15934501), INT32_C( 1220390819), INT32_C( 1789220059) } }, { { -INT32_C( 728995845), INT32_C( 1339273754), INT32_C( 799041216), INT32_C( 984217348) }, { -INT32_C( 1845002542), -INT32_C( 422271618), -INT32_C( 1537673399), -INT32_C( 1089213245) }, { INT32_C( 626314052), -INT32_C( 263348825), -INT32_C( 572141457), -INT32_C( 499199411) } }, { { INT32_C( 1855168596), INT32_C( 314467922), -INT32_C( 801022260), INT32_C( 671803989) }, { -INT32_C( 424013464), INT32_C( 13405879), INT32_C( 1235494278), INT32_C( 1695070225) }, { -INT32_C( 366296835), INT32_C( 1963097), -INT32_C( 460845614), INT32_C( 530274090) } }, { { -INT32_C( 1428906920), -INT32_C( 809725438), INT32_C( 1184890352), INT32_C( 1332652775) }, { INT32_C( 1916086459), INT32_C( 1030947511), INT32_C( 948311847), INT32_C( 664702927) }, { -INT32_C( 1274938323), -INT32_C( 388726790), INT32_C( 523238237), INT32_C( 412491243) } }, { { INT32_C( 768700971), -INT32_C( 184775420), INT32_C( 1899666570), INT32_C( 29403206) }, { -INT32_C( 2005600559), INT32_C( 533063672), -INT32_C( 833139714), INT32_C( 100005338) }, { -INT32_C( 717913312), -INT32_C( 45866270), -INT32_C( 736996375), INT32_C( 1369266) } }, { { INT32_C( 1798555495), -INT32_C( 547344556), INT32_C( 290495179), INT32_C( 336793923) }, { -INT32_C( 6519033), INT32_C( 1813930350), -INT32_C( 2026211924), -INT32_C( 762564501) }, { -INT32_C( 5459806), -INT32_C( 462329436), -INT32_C( 274090467), -INT32_C( 119594434) } }, { { INT32_C( 1262403575), -INT32_C( 1171611921), INT32_C( 2076998456), -INT32_C( 1819287668) }, { -INT32_C( 728618138), INT32_C( 960540812), -INT32_C( 1866433755), -INT32_C( 1570550613) }, { -INT32_C( 428319975), -INT32_C( 524046396), -INT32_C( 1805173246), INT32_C( 1330526248) } }, { { -INT32_C( 68312820), INT32_C( 2008356927), INT32_C( 536052115), -INT32_C( 961379744) }, { INT32_C( 983188654), INT32_C( 427023348), INT32_C( 27931478), -INT32_C( 1935471232) }, { -INT32_C( 31275857), INT32_C( 399358244), INT32_C( 6972219), INT32_C( 866466591) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_vqdmulhq_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); simde_int32x4_t r = simde_vqdmulhq_s32(a, b); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vqdmulh_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqdmulh_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vqdmulhq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqdmulhq_s32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/qdmull.c000066400000000000000000000134411400333146700167640ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN qdmull #include "test-neon.h" #include "../../../simde/arm/neon/qdmull.h" static int test_simde_vqdmull_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t b[4]; int32_t r[4]; } test_vec[] = { { { INT16_C( 31681), INT16_C( 13027), -INT16_C( 13937), -INT16_C( 20674) }, { INT16_C( 10302), -INT16_C( 18422), INT16_C( 4806), -INT16_C( 12487) }, { INT32_C( 652755324), -INT32_C( 479966788), -INT32_C( 133962444), INT32_C( 516312476) } }, { { -INT16_C( 13071), INT16_C( 28436), INT16_C( 8073), -INT16_C( 13812) }, { -INT16_C( 4168), INT16_C( 8843), INT16_C( 11236), -INT16_C( 23047) }, { INT32_C( 108959856), INT32_C( 502919096), INT32_C( 181416456), INT32_C( 636650328) } }, { { -INT16_C( 8794), INT16_C( 14039), INT16_C( 5542), -INT16_C( 6939) }, { -INT16_C( 4291), INT16_C( 925), -INT16_C( 10750), -INT16_C( 3117) }, { INT32_C( 75470108), INT32_C( 25972150), -INT32_C( 119153000), INT32_C( 43257726) } }, { { -INT16_C( 6238), INT16_C( 11106), INT16_C( 28167), -INT16_C( 16394) }, { -INT16_C( 32418), INT16_C( 17122), -INT16_C( 9299), INT16_C( 21479) }, { INT32_C( 404446968), INT32_C( 380313864), -INT32_C( 523849866), -INT32_C( 704253452) } }, { { -INT16_C( 16712), INT16_C( 24457), INT16_C( 28627), INT16_C( 4419) }, { -INT16_C( 8098), INT16_C( 24596), -INT16_C( 6217), INT16_C( 22868) }, { INT32_C( 270667552), INT32_C( 1203088744), -INT32_C( 355948118), INT32_C( 202107384) } }, { { -INT16_C( 18737), -INT16_C( 10619), INT16_C( 31525), -INT16_C( 31851) }, { INT16_C( 30716), -INT16_C( 22075), -INT16_C( 21421), INT16_C( 3069) }, { -INT32_C( 1151051384), INT32_C( 468828850), -INT32_C( 1350594050), -INT32_C( 195501438) } }, { { -INT16_C( 31126), INT16_C( 15722), -INT16_C( 20747), INT16_C( 21582) }, { INT16_C( 25486), INT16_C( 17844), INT16_C( 2122), INT16_C( 6559) }, { -INT32_C( 1586554472), INT32_C( 561086736), -INT32_C( 88050268), INT32_C( 283112676) } }, { { INT16_C( 9407), -INT16_C( 6929), -INT16_C( 31329), -INT16_C( 25753) }, { INT16_C( 11516), INT16_C( 20293), INT16_C( 17112), INT16_C( 16987) }, { INT32_C( 216662024), -INT32_C( 281220394), -INT32_C( 1072203696), -INT32_C( 874932422) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int32x4_t r = simde_vqdmull_s16(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); simde_int32x4_t r = simde_vqdmull_s16(a, b); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqdmull_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t b[2]; int64_t r[2]; } test_vec[] = { { { INT32_C( 1793774140), INT32_C( 2047372376) }, { -INT32_C( 988507864), -INT32_C( 890762382) }, { -INT64_C( 3546319687259673920), -INT64_C( 3647444588973519264) } }, { { INT32_C( 437091211), -INT32_C( 1030744986) }, { INT32_C( 2088454865), -INT32_C( 1587752347) }, { INT64_C( 1825690532123383030), INT64_C( 3273135541359964284) } }, { { -INT32_C( 66369884), -INT32_C( 613018702) }, { INT32_C( 396397476), INT32_C( 568428694) }, { -INT64_C( 52617709000025568), -INT64_C( 696914840350870376) } }, { { INT32_C( 1782377988), -INT32_C( 735196157) }, { INT32_C( 2085660694), INT32_C( 505261434) }, { INT64_C( 7434871422844807344), -INT64_C( 742932529114218276) } }, { { -INT32_C( 1508234765), -INT32_C( 511602372) }, { -INT32_C( 1292361444), -INT32_C( 1361782358) }, { INT64_C( 3898368917572801320), INT64_C( 1393382169001106352) } }, { { -INT32_C( 887615288), -INT32_C( 224442916) }, { INT32_C( 1735323885), -INT32_C( 1870295907) }, { -INT64_C( 3080600019915107760), INT64_C( 839549334299889624) } }, { { -INT32_C( 248078155), INT32_C( 1305655089) }, { -INT32_C( 2080388391), INT32_C( 1815204772) }, { INT64_C( 1032197827445397210), INT64_C( 4740062696277769416) } }, { { -INT32_C( 1086895645), INT32_C( 2092095119) }, { INT32_C( 1675894982), INT32_C( 1643407788) }, { -INT64_C( 3643045914826306780), INT64_C( 6876330823602773544) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int64x2_t r = simde_vqdmull_s32(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); simde_int64x2_t r = simde_vqdmull_s32(a, b); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vqdmull_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqdmull_s32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/qmovn.c000066400000000000000000000360761400333146700166370ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN qmovn #include "test-neon.h" #include "../../../simde/arm/neon/qmovn.h" #include "../../../simde/arm/neon/and.h" #include "../../../simde/arm/neon/shl.h" #include "../../../simde/arm/neon/neg.h" #include "../../../simde/arm/neon/dup_n.h" static int test_simde_vqmovn_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int8_t r[8]; } test_vec[] = { { { INT16_C( 12), INT16_C( 2618), INT16_C( 1578), -INT16_C( 3171), INT16_C( 0), INT16_C( 4882), -INT16_C( 13300), INT16_C( 1) }, { INT8_C( 12), INT8_MAX, INT8_MAX, INT8_MIN, INT8_C( 0), INT8_MAX, INT8_MIN, INT8_C( 1) } }, { { INT16_C( 599), -INT16_C( 43), -INT16_C( 27285), -INT16_C( 97), -INT16_C( 3), INT16_C( 3), INT16_C( 86), -INT16_C( 2810) }, { INT8_MAX, -INT8_C( 43), INT8_MIN, -INT8_C( 97), -INT8_C( 3), INT8_C( 3), INT8_C( 86), INT8_MIN } }, { { -INT16_C( 21), -INT16_C( 1), INT16_C( 201), INT16_C( 58), INT16_C( 0), INT16_C( 2864), -INT16_C( 10), -INT16_C( 32766) }, { -INT8_C( 21), -INT8_C( 1), INT8_MAX, INT8_C( 58), INT8_C( 0), INT8_MAX, -INT8_C( 10), INT8_MIN } }, { { INT16_C( 918), INT16_C( 44), -INT16_C( 93), -INT16_C( 1357), INT16_C( 623), INT16_C( 1), INT16_C( 4), INT16_C( 1) }, { INT8_MAX, INT8_C( 44), -INT8_C( 93), INT8_MIN, INT8_MAX, INT8_C( 1), INT8_C( 4), INT8_C( 1) } }, { { INT16_C( 106), INT16_C( 7840), INT16_C( 19948), -INT16_C( 618), -INT16_C( 23), -INT16_C( 408), -INT16_C( 1), INT16_C( 2676) }, { INT8_C( 106), INT8_MAX, INT8_MAX, INT8_MIN, -INT8_C( 23), INT8_MIN, -INT8_C( 1), INT8_MAX } }, { { INT16_C( 10178), INT16_C( 29083), -INT16_C( 1), INT16_C( 108), INT16_C( 179), -INT16_C( 217), INT16_C( 1), INT16_C( 0) }, { INT8_MAX, INT8_MAX, -INT8_C( 1), INT8_C( 108), INT8_MAX, INT8_MIN, INT8_C( 1), INT8_C( 0) } }, { { INT16_C( 4038), -INT16_C( 1), INT16_C( 0), INT16_C( 4), -INT16_C( 602), -INT16_C( 63), -INT16_C( 4), -INT16_C( 3598) }, { INT8_MAX, -INT8_C( 1), INT8_C( 0), INT8_C( 4), INT8_MIN, -INT8_C( 63), -INT8_C( 4), INT8_MIN } }, { { -INT16_C( 1871), -INT16_C( 51), INT16_C( 209), INT16_C( 23), INT16_C( 118), -INT16_C( 4), INT16_C( 168), -INT16_C( 40) }, { INT8_MIN, -INT8_C( 51), INT8_MAX, INT8_C( 23), INT8_C( 118), -INT8_C( 4), INT8_MAX, -INT8_C( 40) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int8x8_t r = simde_vqmovn_s16(a); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t s = simde_vandq_s16(simde_test_arm_neon_random_i16x8(), simde_vdupq_n_s16(15)); simde_int16x8_t a = simde_vshlq_s16(simde_test_arm_neon_random_i16x8(), simde_vnegq_s16(s)); simde_int8x8_t r = simde_vqmovn_s16(a); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqmovn_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int16_t r[4]; } test_vec[] = { { { -INT32_C( 313), INT32_C( 228909576), -INT32_C( 4), -INT32_C( 19412) }, { -INT16_C( 313), INT16_MAX, -INT16_C( 4), -INT16_C( 19412) } }, { { INT32_C( 67996), INT32_C( 67), -INT32_C( 6652), INT32_C( 395687) }, { INT16_MAX, INT16_C( 67), -INT16_C( 6652), INT16_MAX } }, { { -INT32_C( 895), INT32_C( 752146193), INT32_C( 37), -INT32_C( 3293944) }, { -INT16_C( 895), INT16_MAX, INT16_C( 37), INT16_MIN } }, { { INT32_C( 1375968), -INT32_C( 16384195), -INT32_C( 572655), INT32_C( 131998101) }, { INT16_MAX, INT16_MIN, INT16_MIN, INT16_MAX } }, { { -INT32_C( 409962415), INT32_C( 211718), INT32_C( 90), -INT32_C( 436309) }, { INT16_MIN, INT16_MAX, INT16_C( 90), INT16_MIN } }, { { INT32_C( 0), INT32_C( 3784027), INT32_C( 1395), INT32_C( 5674) }, { INT16_C( 0), INT16_MAX, INT16_C( 1395), INT16_C( 5674) } }, { { INT32_C( 43298), -INT32_C( 37946), -INT32_C( 128), INT32_C( 1445) }, { INT16_MAX, INT16_MIN, -INT16_C( 128), INT16_C( 1445) } }, { { INT32_C( 612), -INT32_C( 2), INT32_C( 110), -INT32_C( 49395955) }, { INT16_C( 612), -INT16_C( 2), INT16_C( 110), INT16_MIN } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int16x4_t r = simde_vqmovn_s32(a); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t s = simde_vandq_s32(simde_test_arm_neon_random_i32x4(), simde_vdupq_n_s32(31)); simde_int32x4_t a = simde_vshlq_s32(simde_test_arm_neon_random_i32x4(), simde_vnegq_s32(s)); simde_int16x4_t r = simde_vqmovn_s32(a); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqmovn_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; int32_t r[2]; } test_vec[] = { { { INT64_C( 139526682078321), INT64_C( 2088817746455) }, { INT32_MAX, INT32_MAX } }, { { -INT64_C( 17035200), -INT64_C( 955712) }, { -INT32_C( 17035200), -INT32_C( 955712) } }, { { INT64_C( 1425567), -INT64_C( 3) }, { INT32_C( 1425567), -INT32_C( 3) } }, { { -INT64_C( 8297862809411069), INT64_C( 82481668) }, { INT32_MIN, INT32_C( 82481668) } }, { { -INT64_C( 86), -INT64_C( 6848) }, { -INT32_C( 86), -INT32_C( 6848) } }, { { -INT64_C( 24045364990477906), -INT64_C( 59096770) }, { INT32_MIN, -INT32_C( 59096770) } }, { { INT64_C( 4080946299684606), -INT64_C( 2138562467329891) }, { INT32_MAX, INT32_MIN } }, { { -INT64_C( 273036987440), INT64_C( 31037958) }, { INT32_MIN, INT32_C( 31037958) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int32x2_t r = simde_vqmovn_s64(a); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t s = simde_vandq_s64(simde_test_arm_neon_random_i64x2(), simde_vdupq_n_s64(63)); simde_int64x2_t a = simde_vshlq_s64(simde_test_arm_neon_random_i64x2(), simde_vnegq_s64(s)); simde_int32x2_t r = simde_vqmovn_s64(a); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqmovn_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint8_t r[8]; } test_vec[] = { { { UINT16_C( 1408), UINT16_C( 423), UINT16_C( 15), UINT16_C( 5546), UINT16_C( 459), UINT16_C( 3), UINT16_C( 5), UINT16_C( 9903) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 15), UINT8_MAX, UINT8_MAX, UINT8_C( 3), UINT8_C( 5), UINT8_MAX } }, { { UINT16_C( 2), UINT16_C( 298), UINT16_C(65062), UINT16_C( 3), UINT16_C( 84), UINT16_C( 315), UINT16_C( 613), UINT16_C( 0) }, { UINT8_C( 2), UINT8_MAX, UINT8_MAX, UINT8_C( 3), UINT8_C( 84), UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { UINT16_C( 3), UINT16_C( 88), UINT16_C( 85), UINT16_C( 603), UINT16_C( 0), UINT16_C( 266), UINT16_C( 3), UINT16_C( 482) }, { UINT8_C( 3), UINT8_C( 88), UINT8_C( 85), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 3), UINT8_MAX } }, { { UINT16_C( 5), UINT16_C( 43), UINT16_C(31330), UINT16_C( 4222), UINT16_C( 75), UINT16_C( 230), UINT16_C( 15), UINT16_C(10269) }, { UINT8_C( 5), UINT8_C( 43), UINT8_MAX, UINT8_MAX, UINT8_C( 75), UINT8_C(230), UINT8_C( 15), UINT8_MAX } }, { { UINT16_C( 9768), UINT16_C( 374), UINT16_C(23872), UINT16_C( 4467), UINT16_C( 0), UINT16_C( 1), UINT16_C( 465), UINT16_C( 2289) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 1), UINT8_MAX, UINT8_MAX } }, { { UINT16_C( 9), UINT16_C( 9), UINT16_C( 113), UINT16_C( 1321), UINT16_C( 196), UINT16_C( 5575), UINT16_C( 1620), UINT16_C( 227) }, { UINT8_C( 9), UINT8_C( 9), UINT8_C(113), UINT8_MAX, UINT8_C(196), UINT8_MAX, UINT8_MAX, UINT8_C(227) } }, { { UINT16_C( 6), UINT16_C( 873), UINT16_C( 28), UINT16_C( 115), UINT16_C( 10), UINT16_C( 1), UINT16_C( 4430), UINT16_C( 33) }, { UINT8_C( 6), UINT8_MAX, UINT8_C( 28), UINT8_C(115), UINT8_C( 10), UINT8_C( 1), UINT8_MAX, UINT8_C( 33) } }, { { UINT16_C( 138), UINT16_C( 1701), UINT16_C( 75), UINT16_C( 744), UINT16_C( 88), UINT16_C( 367), UINT16_C( 23), UINT16_C( 246) }, { UINT8_C(138), UINT8_MAX, UINT8_C( 75), UINT8_MAX, UINT8_C( 88), UINT8_MAX, UINT8_C( 23), UINT8_C(246) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint8x8_t r = simde_vqmovn_u16(a); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t s = simde_vandq_s16(simde_test_arm_neon_random_i16x8(), simde_vdupq_n_s16(15)); simde_uint16x8_t a = simde_vshlq_u16(simde_test_arm_neon_random_u16x8(), simde_vnegq_s16(s)); simde_uint8x8_t r = simde_vqmovn_u16(a); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqmovn_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint16_t r[4]; } test_vec[] = { { { UINT32_C( 157), UINT32_C( 14), UINT32_C( 23), UINT32_C( 46) }, { UINT16_C( 157), UINT16_C( 14), UINT16_C( 23), UINT16_C( 46) } }, { { UINT32_C( 1647047), UINT32_C( 0), UINT32_C( 106273110), UINT32_C( 68243) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { UINT32_C( 99), UINT32_C( 504538727), UINT32_C( 1480), UINT32_C( 394467) }, { UINT16_C( 99), UINT16_MAX, UINT16_C( 1480), UINT16_MAX } }, { { UINT32_C( 1), UINT32_C( 1555382), UINT32_C( 3035558), UINT32_C( 9561996) }, { UINT16_C( 1), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT32_C( 1571819), UINT32_C( 1870), UINT32_C( 15), UINT32_C( 67123) }, { UINT16_MAX, UINT16_C( 1870), UINT16_C( 15), UINT16_MAX } }, { { UINT32_C( 442), UINT32_C( 4721), UINT32_C( 212), UINT32_C( 963971) }, { UINT16_C( 442), UINT16_C( 4721), UINT16_C( 212), UINT16_MAX } }, { { UINT32_C( 2436), UINT32_C( 23), UINT32_C( 25), UINT32_C( 28600) }, { UINT16_C( 2436), UINT16_C( 23), UINT16_C( 25), UINT16_C(28600) } }, { { UINT32_C( 58), UINT32_C( 157917), UINT32_C( 56493959), UINT32_C( 7) }, { UINT16_C( 58), UINT16_MAX, UINT16_MAX, UINT16_C( 7) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint16x4_t r = simde_vqmovn_u32(a); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t s = simde_vandq_s32(simde_test_arm_neon_random_i32x4(), simde_vdupq_n_s32(31)); simde_uint32x4_t a = simde_vshlq_u32(simde_test_arm_neon_random_u32x4(), simde_vnegq_s32(s)); simde_uint16x4_t r = simde_vqmovn_u32(a); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqmovn_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[2]; uint32_t r[2]; } test_vec[] = { { { UINT64_C( 868814), UINT64_C( 1506946) }, { UINT32_C( 868814), UINT32_C( 1506946) } }, { { UINT64_C( 3231520), UINT64_C( 278576143467030) }, { UINT32_C( 3231520), UINT32_MAX } }, { { UINT64_C( 29), UINT64_C( 288187650150740947) }, { UINT32_C( 29), UINT32_MAX } }, { { UINT64_C( 141800033160582), UINT64_C( 192885) }, { UINT32_MAX, UINT32_C( 192885) } }, { { UINT64_C( 31175358884501), UINT64_C( 10525926877) }, { UINT32_MAX, UINT32_MAX } }, { { UINT64_C( 7224308), UINT64_C( 142567009801148782) }, { UINT32_C( 7224308), UINT32_MAX } }, { { UINT64_C( 200), UINT64_C( 979) }, { UINT32_C( 200), UINT32_C( 979) } }, { { UINT64_C( 60957747479), UINT64_C( 573775) }, { UINT32_MAX, UINT32_C( 573775) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint32x2_t r = simde_vqmovn_u64(a); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t s = simde_vandq_s64(simde_test_arm_neon_random_i64x2(), simde_vdupq_n_s64(63)); simde_uint64x2_t a = simde_vshlq_u64(simde_test_arm_neon_random_u64x2(), simde_vnegq_s64(s)); simde_uint32x2_t r = simde_vqmovn_u64(a); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vqmovn_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqmovn_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vqmovn_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vqmovn_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vqmovn_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vqmovn_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/qmovn_high.c000066400000000000000000000605041400333146700176270ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN qmovn_high #include "test-neon.h" #include "../../../simde/arm/neon/and.h" #include "../../../simde/arm/neon/dup_n.h" #include "../../../simde/arm/neon/neg.h" #include "../../../simde/arm/neon/qmovn_high.h" #include "../../../simde/arm/neon/shl.h" static int test_simde_vqmovn_high_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t r[8]; int16_t a[8]; int8_t r_[16]; } test_vec[] = { { { -INT8_C( 41), INT8_C( 124), INT8_C( 42), -INT8_C( 105), -INT8_C( 24), INT8_C( 41), -INT8_C( 16), -INT8_C( 76) }, { -INT16_C( 171), -INT16_C( 1), -INT16_C( 586), INT16_C( 1), -INT16_C( 55), INT16_C( 2), -INT16_C( 16), -INT16_C( 12) }, { -INT8_C( 41), INT8_C( 124), INT8_C( 42), -INT8_C( 105), -INT8_C( 24), INT8_C( 41), -INT8_C( 16), -INT8_C( 76), INT8_MIN, -INT8_C( 1), INT8_MIN, INT8_C( 1), -INT8_C( 55), INT8_C( 2), -INT8_C( 16), -INT8_C( 12) } }, { { -INT8_C( 50), -INT8_C( 109), INT8_C( 47), -INT8_C( 30), INT8_C( 64), INT8_C( 29), -INT8_C( 77), INT8_C( 10) }, { -INT16_C( 2), -INT16_C( 10467), INT16_C( 1), INT16_C( 13137), -INT16_C( 8144), INT16_C( 24), -INT16_C( 3545), -INT16_C( 1011) }, { -INT8_C( 50), -INT8_C( 109), INT8_C( 47), -INT8_C( 30), INT8_C( 64), INT8_C( 29), -INT8_C( 77), INT8_C( 10), -INT8_C( 2), INT8_MIN, INT8_C( 1), INT8_MAX, INT8_MIN, INT8_C( 24), INT8_MIN, INT8_MIN } }, { { INT8_C( 57), -INT8_C( 114), INT8_C( 53), -INT8_C( 58), -INT8_C( 21), -INT8_C( 75), -INT8_C( 48), INT8_C( 29) }, { -INT16_C( 6), INT16_C( 3281), INT16_C( 1353), -INT16_C( 88), INT16_C( 807), -INT16_C( 807), INT16_C( 13580), -INT16_C( 4) }, { INT8_C( 57), -INT8_C( 114), INT8_C( 53), -INT8_C( 58), -INT8_C( 21), -INT8_C( 75), -INT8_C( 48), INT8_C( 29), -INT8_C( 6), INT8_MAX, INT8_MAX, -INT8_C( 88), INT8_MAX, INT8_MIN, INT8_MAX, -INT8_C( 4) } }, { { INT8_C( 48), -INT8_C( 99), -INT8_C( 78), INT8_C( 84), INT8_C( 32), -INT8_C( 54), INT8_C( 106), INT8_C( 69) }, { INT16_C( 6), -INT16_C( 1), INT16_C( 381), INT16_C( 296), INT16_C( 0), -INT16_C( 2), -INT16_C( 4), -INT16_C( 1) }, { INT8_C( 48), -INT8_C( 99), -INT8_C( 78), INT8_C( 84), INT8_C( 32), -INT8_C( 54), INT8_C( 106), INT8_C( 69), INT8_C( 6), -INT8_C( 1), INT8_MAX, INT8_MAX, INT8_C( 0), -INT8_C( 2), -INT8_C( 4), -INT8_C( 1) } }, { { -INT8_C( 17), -INT8_C( 23), -INT8_C( 122), INT8_C( 51), INT8_C( 117), INT8_C( 103), INT8_C( 13), -INT8_C( 44) }, { INT16_C( 12294), -INT16_C( 1), INT16_C( 161), -INT16_C( 5), INT16_C( 1043), -INT16_C( 1295), INT16_C( 0), INT16_C( 28) }, { -INT8_C( 17), -INT8_C( 23), -INT8_C( 122), INT8_C( 51), INT8_C( 117), INT8_C( 103), INT8_C( 13), -INT8_C( 44), INT8_MAX, -INT8_C( 1), INT8_MAX, -INT8_C( 5), INT8_MAX, INT8_MIN, INT8_C( 0), INT8_C( 28) } }, { { INT8_C( 58), INT8_C( 82), INT8_C( 69), -INT8_C( 111), -INT8_C( 18), -INT8_C( 79), -INT8_C( 118), -INT8_C( 13) }, { -INT16_C( 37), -INT16_C( 16), INT16_C( 5045), INT16_C( 1), INT16_C( 2068), -INT16_C( 8), INT16_C( 2812), INT16_C( 1872) }, { INT8_C( 58), INT8_C( 82), INT8_C( 69), -INT8_C( 111), -INT8_C( 18), -INT8_C( 79), -INT8_C( 118), -INT8_C( 13), -INT8_C( 37), -INT8_C( 16), INT8_MAX, INT8_C( 1), INT8_MAX, -INT8_C( 8), INT8_MAX, INT8_MAX } }, { { INT8_C( 106), INT8_C( 76), INT8_C( 50), -INT8_C( 84), -INT8_C( 17), -INT8_C( 115), -INT8_C( 11), -INT8_C( 61) }, { INT16_C( 0), -INT16_C( 30), -INT16_C( 12), -INT16_C( 1), INT16_C( 24117), INT16_C( 586), -INT16_C( 195), INT16_C( 95) }, { INT8_C( 106), INT8_C( 76), INT8_C( 50), -INT8_C( 84), -INT8_C( 17), -INT8_C( 115), -INT8_C( 11), -INT8_C( 61), INT8_C( 0), -INT8_C( 30), -INT8_C( 12), -INT8_C( 1), INT8_MAX, INT8_MAX, INT8_MIN, INT8_C( 95) } }, { { -INT8_C( 81), INT8_C( 110), INT8_C( 13), -INT8_C( 104), -INT8_C( 103), INT8_C( 43), -INT8_C( 102), INT8_C( 121) }, { -INT16_C( 1751), -INT16_C( 1), -INT16_C( 3), -INT16_C( 1039), -INT16_C( 13), -INT16_C( 2), INT16_C( 600), -INT16_C( 19) }, { -INT8_C( 81), INT8_C( 110), INT8_C( 13), -INT8_C( 104), -INT8_C( 103), INT8_C( 43), -INT8_C( 102), INT8_C( 121), INT8_MIN, -INT8_C( 1), -INT8_C( 3), INT8_MIN, -INT8_C( 13), -INT8_C( 2), INT8_MAX, -INT8_C( 19) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t r = simde_vld1_s8(test_vec[i].r); simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int8x16_t r_ = simde_vqmovn_high_s16(r, a); simde_test_arm_neon_assert_equal_i8x16(r_, simde_vld1q_s8(test_vec[i].r_)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t r = simde_test_arm_neon_random_i8x8(); simde_int16x8_t s = simde_vandq_s16(simde_test_arm_neon_random_i16x8(), simde_vdupq_n_s16(15)); simde_int16x8_t a = simde_vshlq_s16(simde_test_arm_neon_random_i16x8(), simde_vnegq_s16(s)); simde_int8x16_t r_ = simde_vqmovn_high_s16(r, a); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r_, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqmovn_high_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t r[4]; int32_t a[4]; int16_t r_[8]; } test_vec[] = { { { -INT16_C( 15725), -INT16_C( 13295), INT16_C( 5444), INT16_C( 13069) }, { INT32_C( 0), -INT32_C( 2066), -INT32_C( 11), -INT32_C( 16) }, { -INT16_C( 15725), -INT16_C( 13295), INT16_C( 5444), INT16_C( 13069), INT16_C( 0), -INT16_C( 2066), -INT16_C( 11), -INT16_C( 16) } }, { { INT16_C( 9425), INT16_C( 17262), -INT16_C( 25798), INT16_C( 29641) }, { -INT32_C( 1524), -INT32_C( 1), INT32_C( 4385743), INT32_C( 63) }, { INT16_C( 9425), INT16_C( 17262), -INT16_C( 25798), INT16_C( 29641), -INT16_C( 1524), -INT16_C( 1), INT16_MAX, INT16_C( 63) } }, { { -INT16_C( 28066), -INT16_C( 17139), -INT16_C( 23754), INT16_C( 32468) }, { -INT32_C( 15), INT32_C( 0), INT32_C( 532089106), -INT32_C( 513) }, { -INT16_C( 28066), -INT16_C( 17139), -INT16_C( 23754), INT16_C( 32468), -INT16_C( 15), INT16_C( 0), INT16_MAX, -INT16_C( 513) } }, { { INT16_C( 4028), -INT16_C( 17938), -INT16_C( 1645), INT16_C( 13940) }, { INT32_C( 372262), INT32_C( 8026), INT32_C( 2), -INT32_C( 15418) }, { INT16_C( 4028), -INT16_C( 17938), -INT16_C( 1645), INT16_C( 13940), INT16_MAX, INT16_C( 8026), INT16_C( 2), -INT16_C( 15418) } }, { { INT16_C( 15621), -INT16_C( 27897), INT16_C( 17166), -INT16_C( 5202) }, { -INT32_C( 1), INT32_C( 370136), INT32_C( 98573054), -INT32_C( 15) }, { INT16_C( 15621), -INT16_C( 27897), INT16_C( 17166), -INT16_C( 5202), -INT16_C( 1), INT16_MAX, INT16_MAX, -INT16_C( 15) } }, { { -INT16_C( 32465), INT16_C( 6761), INT16_C( 5320), -INT16_C( 21933) }, { INT32_C( 221), -INT32_C( 2648018), -INT32_C( 3), INT32_C( 1) }, { -INT16_C( 32465), INT16_C( 6761), INT16_C( 5320), -INT16_C( 21933), INT16_C( 221), INT16_MIN, -INT16_C( 3), INT16_C( 1) } }, { { INT16_C( 10176), -INT16_C( 30030), INT16_C( 6961), INT16_C( 11837) }, { -INT32_C( 114), -INT32_C( 933690393), INT32_C( 47), INT32_C( 3555) }, { INT16_C( 10176), -INT16_C( 30030), INT16_C( 6961), INT16_C( 11837), -INT16_C( 114), INT16_MIN, INT16_C( 47), INT16_C( 3555) } }, { { -INT16_C( 7441), INT16_C( 4172), -INT16_C( 17272), INT16_C( 16880) }, { INT32_C( 10512), -INT32_C( 41434), INT32_C( 103), INT32_C( 1131) }, { -INT16_C( 7441), INT16_C( 4172), -INT16_C( 17272), INT16_C( 16880), INT16_C( 10512), INT16_MIN, INT16_C( 103), INT16_C( 1131) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t r = simde_vld1_s16(test_vec[i].r); simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int16x8_t r_ = simde_vqmovn_high_s32(r, a); simde_test_arm_neon_assert_equal_i16x8(r_, simde_vld1q_s16(test_vec[i].r_)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t r = simde_test_arm_neon_random_i16x4(); simde_int32x4_t s = simde_vandq_s32(simde_test_arm_neon_random_i32x4(), simde_vdupq_n_s32(31)); simde_int32x4_t a = simde_vshlq_s32(simde_test_arm_neon_random_i32x4(), simde_vnegq_s32(s)); simde_int16x8_t r_ = simde_vqmovn_high_s32(r, a); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r_, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqmovn_high_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t r[2]; int64_t a[2]; int32_t r_[4]; } test_vec[] = { { { -INT32_C( 1906760496), INT32_C( 1718814463) }, { INT64_C( 24815211807137), -INT64_C( 1160) }, { -INT32_C( 1906760496), INT32_C( 1718814463), INT32_MAX, -INT32_C( 1160) } }, { { -INT32_C( 316764895), -INT32_C( 38613431) }, { -INT64_C( 145384723163912), -INT64_C( 1094) }, { -INT32_C( 316764895), -INT32_C( 38613431), INT32_MIN, -INT32_C( 1094) } }, { { -INT32_C( 779066945), INT32_C( 1600804844) }, { INT64_C( 10057642), -INT64_C( 1094) }, { -INT32_C( 779066945), INT32_C( 1600804844), INT32_C( 10057642), -INT32_C( 1094) } }, { { INT32_C( 1252458329), -INT32_C( 1463815501) }, { INT64_C( 28), -INT64_C( 2) }, { INT32_C( 1252458329), -INT32_C( 1463815501), INT32_C( 28), -INT32_C( 2) } }, { { -INT32_C( 1136281637), -INT32_C( 1155525315) }, { -INT64_C( 5572144560772544), -INT64_C( 7) }, { -INT32_C( 1136281637), -INT32_C( 1155525315), INT32_MIN, -INT32_C( 7) } }, { { INT32_C( 1052741768), INT32_C( 1016459777) }, { INT64_C( 8192254), INT64_C( 779949925665) }, { INT32_C( 1052741768), INT32_C( 1016459777), INT32_C( 8192254), INT32_MAX } }, { { INT32_C( 349986035), -INT32_C( 1983095886) }, { -INT64_C( 34635), INT64_C( 9221390498424859) }, { INT32_C( 349986035), -INT32_C( 1983095886), -INT32_C( 34635), INT32_MAX } }, { { -INT32_C( 1365712188), INT32_C( 889823723) }, { INT64_C( 24599642), -INT64_C( 9686486) }, { -INT32_C( 1365712188), INT32_C( 889823723), INT32_C( 24599642), -INT32_C( 9686486) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t r = simde_vld1_s32(test_vec[i].r); simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int32x4_t r_ = simde_vqmovn_high_s64(r, a); simde_test_arm_neon_assert_equal_i32x4(r_, simde_vld1q_s32(test_vec[i].r_)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t r = simde_test_arm_neon_random_i32x2(); simde_int64x2_t s = simde_vandq_s64(simde_test_arm_neon_random_i64x2(), simde_vdupq_n_s64(63)); simde_int64x2_t a = simde_vshlq_s64(simde_test_arm_neon_random_i64x2(), simde_vnegq_s64(s)); simde_int32x4_t r_ = simde_vqmovn_high_s64(r, a); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r_, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqmovn_high_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t r[8]; uint16_t a[8]; uint8_t r_[16]; } test_vec[] = { { { UINT8_C(228), UINT8_C(126), UINT8_C(244), UINT8_C(201), UINT8_C(244), UINT8_C(214), UINT8_C( 11), UINT8_C(240) }, { UINT16_C( 1088), UINT16_C( 60), UINT16_C( 29), UINT16_C( 99), UINT16_C( 603), UINT16_C( 6179), UINT16_C( 1233), UINT16_C( 201) }, { UINT8_C(228), UINT8_C(126), UINT8_C(244), UINT8_C(201), UINT8_C(244), UINT8_C(214), UINT8_C( 11), UINT8_C(240), UINT8_MAX, UINT8_C( 60), UINT8_C( 29), UINT8_C( 99), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C(201) } }, { { UINT8_C( 43), UINT8_C( 72), UINT8_C(155), UINT8_C(214), UINT8_C(248), UINT8_C( 84), UINT8_C(251), UINT8_C(189) }, { UINT16_C( 4), UINT16_C(13913), UINT16_C( 361), UINT16_C( 8), UINT16_C( 10), UINT16_C( 3933), UINT16_C( 437), UINT16_C( 1931) }, { UINT8_C( 43), UINT8_C( 72), UINT8_C(155), UINT8_C(214), UINT8_C(248), UINT8_C( 84), UINT8_C(251), UINT8_C(189), UINT8_C( 4), UINT8_MAX, UINT8_MAX, UINT8_C( 8), UINT8_C( 10), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C(184), UINT8_C( 20), UINT8_C(233), UINT8_C( 79), UINT8_C( 28), UINT8_C(193), UINT8_C( 39), UINT8_C( 40) }, { UINT16_C( 26), UINT16_C( 928), UINT16_C( 194), UINT16_C( 401), UINT16_C( 2), UINT16_C( 121), UINT16_C( 3), UINT16_C( 3779) }, { UINT8_C(184), UINT8_C( 20), UINT8_C(233), UINT8_C( 79), UINT8_C( 28), UINT8_C(193), UINT8_C( 39), UINT8_C( 40), UINT8_C( 26), UINT8_MAX, UINT8_C(194), UINT8_MAX, UINT8_C( 2), UINT8_C(121), UINT8_C( 3), UINT8_MAX } }, { { UINT8_C( 23), UINT8_C( 49), UINT8_C(121), UINT8_C(191), UINT8_C(252), UINT8_C(127), UINT8_C( 30), UINT8_C( 9) }, { UINT16_C( 62), UINT16_C( 635), UINT16_C( 13), UINT16_C( 3571), UINT16_C( 5), UINT16_C( 919), UINT16_C( 310), UINT16_C( 31) }, { UINT8_C( 23), UINT8_C( 49), UINT8_C(121), UINT8_C(191), UINT8_C(252), UINT8_C(127), UINT8_C( 30), UINT8_C( 9), UINT8_C( 62), UINT8_MAX, UINT8_C( 13), UINT8_MAX, UINT8_C( 5), UINT8_MAX, UINT8_MAX, UINT8_C( 31) } }, { { UINT8_C(164), UINT8_C( 5), UINT8_C(134), UINT8_C( 65), UINT8_C(237), UINT8_C(169), UINT8_C(202), UINT8_C(167) }, { UINT16_C( 2), UINT16_C( 4), UINT16_C( 2), UINT16_C( 0), UINT16_C( 1291), UINT16_C(10175), UINT16_C( 72), UINT16_C( 1) }, { UINT8_C(164), UINT8_C( 5), UINT8_C(134), UINT8_C( 65), UINT8_C(237), UINT8_C(169), UINT8_C(202), UINT8_C(167), UINT8_C( 2), UINT8_C( 4), UINT8_C( 2), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 72), UINT8_C( 1) } }, { { UINT8_C(131), UINT8_C( 4), UINT8_C(251), UINT8_C(145), UINT8_C(220), UINT8_C(154), UINT8_C(186), UINT8_C( 49) }, { UINT16_C( 77), UINT16_C( 2), UINT16_C( 49), UINT16_C( 822), UINT16_C( 399), UINT16_C( 10), UINT16_C( 5), UINT16_C( 5) }, { UINT8_C(131), UINT8_C( 4), UINT8_C(251), UINT8_C(145), UINT8_C(220), UINT8_C(154), UINT8_C(186), UINT8_C( 49), UINT8_C( 77), UINT8_C( 2), UINT8_C( 49), UINT8_MAX, UINT8_MAX, UINT8_C( 10), UINT8_C( 5), UINT8_C( 5) } }, { { UINT8_C(189), UINT8_C(182), UINT8_C(140), UINT8_C(183), UINT8_C(177), UINT8_C( 1), UINT8_C(184), UINT8_C(248) }, { UINT16_C(13223), UINT16_C( 253), UINT16_C( 3), UINT16_C( 0), UINT16_C( 1), UINT16_C( 82), UINT16_C( 2766), UINT16_C( 337) }, { UINT8_C(189), UINT8_C(182), UINT8_C(140), UINT8_C(183), UINT8_C(177), UINT8_C( 1), UINT8_C(184), UINT8_C(248), UINT8_MAX, UINT8_C(253), UINT8_C( 3), UINT8_C( 0), UINT8_C( 1), UINT8_C( 82), UINT8_MAX, UINT8_MAX } }, { { UINT8_C( 1), UINT8_C(117), UINT8_C(136), UINT8_C( 13), UINT8_C(105), UINT8_C(231), UINT8_C(114), UINT8_C(248) }, { UINT16_C( 772), UINT16_C( 2), UINT16_C(41070), UINT16_C( 55), UINT16_C(15125), UINT16_C( 63), UINT16_C( 14), UINT16_C( 1) }, { UINT8_C( 1), UINT8_C(117), UINT8_C(136), UINT8_C( 13), UINT8_C(105), UINT8_C(231), UINT8_C(114), UINT8_C(248), UINT8_MAX, UINT8_C( 2), UINT8_MAX, UINT8_C( 55), UINT8_MAX, UINT8_C( 63), UINT8_C( 14), UINT8_C( 1) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t r = simde_vld1_u8(test_vec[i].r); simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint8x16_t r_ = simde_vqmovn_high_u16(r, a); simde_test_arm_neon_assert_equal_u8x16(r_, simde_vld1q_u8(test_vec[i].r_)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t r = simde_test_arm_neon_random_u8x8(); simde_int16x8_t s = simde_vandq_s16(simde_test_arm_neon_random_i16x8(), simde_vdupq_n_s16(15)); simde_uint16x8_t a = simde_vshlq_u16(simde_test_arm_neon_random_u16x8(), simde_vnegq_s16(s)); simde_uint8x16_t r_ = simde_vqmovn_high_u16(r, a); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r_, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqmovn_high_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t r[4]; uint32_t a[4]; uint16_t r_[8]; } test_vec[] = { { { UINT16_C(16687), UINT16_C(10500), UINT16_C( 9731), UINT16_C(45994) }, { UINT32_C( 13651), UINT32_C( 74952074), UINT32_C(2069626105), UINT32_C( 107346) }, { UINT16_C(16687), UINT16_C(10500), UINT16_C( 9731), UINT16_C(45994), UINT16_C(13651), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(13209), UINT16_C(48667), UINT16_C(12327), UINT16_C(51627) }, { UINT32_C( 7335), UINT32_C( 14346), UINT32_C( 10), UINT32_C( 140622373) }, { UINT16_C(13209), UINT16_C(48667), UINT16_C(12327), UINT16_C(51627), UINT16_C( 7335), UINT16_C(14346), UINT16_C( 10), UINT16_MAX } }, { { UINT16_C( 3311), UINT16_C(48966), UINT16_C(44359), UINT16_C(57878) }, { UINT32_C( 987656421), UINT32_C( 3677986), UINT32_C( 321929665), UINT32_C( 169339866) }, { UINT16_C( 3311), UINT16_C(48966), UINT16_C(44359), UINT16_C(57878), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C( 2842), UINT16_C(32216), UINT16_C(62574), UINT16_C( 4347) }, { UINT32_C( 0), UINT32_C( 200), UINT32_C( 1), UINT32_C( 161853309) }, { UINT16_C( 2842), UINT16_C(32216), UINT16_C(62574), UINT16_C( 4347), UINT16_C( 0), UINT16_C( 200), UINT16_C( 1), UINT16_MAX } }, { { UINT16_C(32557), UINT16_C(40717), UINT16_C(22859), UINT16_C(35062) }, { UINT32_C( 160838827), UINT32_C( 9), UINT32_C( 85), UINT32_C( 34130831) }, { UINT16_C(32557), UINT16_C(40717), UINT16_C(22859), UINT16_C(35062), UINT16_MAX, UINT16_C( 9), UINT16_C( 85), UINT16_MAX } }, { { UINT16_C( 2412), UINT16_C(18216), UINT16_C(63024), UINT16_C(27003) }, { UINT32_C( 8), UINT32_C( 12926794), UINT32_C( 733), UINT32_C( 472135829) }, { UINT16_C( 2412), UINT16_C(18216), UINT16_C(63024), UINT16_C(27003), UINT16_C( 8), UINT16_MAX, UINT16_C( 733), UINT16_MAX } }, { { UINT16_C(44161), UINT16_C(41727), UINT16_C(45661), UINT16_C(45857) }, { UINT32_C( 140), UINT32_C( 101454), UINT32_C( 397677545), UINT32_C( 63146535) }, { UINT16_C(44161), UINT16_C(41727), UINT16_C(45661), UINT16_C(45857), UINT16_C( 140), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C( 3189), UINT16_C(50251), UINT16_C(18427), UINT16_C(39964) }, { UINT32_C( 30161), UINT32_C( 8303449), UINT32_C( 1538102), UINT32_C( 69809) }, { UINT16_C( 3189), UINT16_C(50251), UINT16_C(18427), UINT16_C(39964), UINT16_C(30161), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t r = simde_vld1_u16(test_vec[i].r); simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint16x8_t r_ = simde_vqmovn_high_u32(r, a); simde_test_arm_neon_assert_equal_u16x8(r_, simde_vld1q_u16(test_vec[i].r_)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t r = simde_test_arm_neon_random_u16x4(); simde_int32x4_t s = simde_vandq_s32(simde_test_arm_neon_random_i32x4(), simde_vdupq_n_s32(31)); simde_uint32x4_t a = simde_vshlq_u32(simde_test_arm_neon_random_u32x4(), simde_vnegq_s32(s)); simde_uint16x8_t r_ = simde_vqmovn_high_u32(r, a); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r_, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqmovn_high_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t r[2]; uint64_t a[2]; uint32_t r_[4]; } test_vec[] = { { { UINT32_C(2600053184), UINT32_C(3225579596) }, { UINT64_C( 39493657923431738), UINT64_C( 16872038636) }, { UINT32_C(2600053184), UINT32_C(3225579596), UINT32_MAX, UINT32_MAX } }, { { UINT32_C(3665814127), UINT32_C(3092249826) }, { UINT64_C( 1275278610937403067), UINT64_C( 296) }, { UINT32_C(3665814127), UINT32_C(3092249826), UINT32_MAX, UINT32_C( 296) } }, { { UINT32_C(1321041292), UINT32_C(2955657081) }, { UINT64_C( 38279), UINT64_C( 190) }, { UINT32_C(1321041292), UINT32_C(2955657081), UINT32_C( 38279), UINT32_C( 190) } }, { { UINT32_C( 932333107), UINT32_C(2115459463) }, { UINT64_C( 6), UINT64_C( 3) }, { UINT32_C( 932333107), UINT32_C(2115459463), UINT32_C( 6), UINT32_C( 3) } }, { { UINT32_C(1173992368), UINT32_C( 603532775) }, { UINT64_C( 307), UINT64_C( 4129860144000901197) }, { UINT32_C(1173992368), UINT32_C( 603532775), UINT32_C( 307), UINT32_MAX } }, { { UINT32_C( 873684659), UINT32_C( 81502658) }, { UINT64_C( 20538), UINT64_C( 50037503) }, { UINT32_C( 873684659), UINT32_C( 81502658), UINT32_C( 20538), UINT32_C( 50037503) } }, { { UINT32_C(3514062919), UINT32_C(1579501817) }, { UINT64_C( 0), UINT64_C( 261667593493) }, { UINT32_C(3514062919), UINT32_C(1579501817), UINT32_C( 0), UINT32_MAX } }, { { UINT32_C( 355163949), UINT32_C(1319326452) }, { UINT64_C( 1960864181916608585), UINT64_C( 48518210310) }, { UINT32_C( 355163949), UINT32_C(1319326452), UINT32_MAX, UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t r = simde_vld1_u32(test_vec[i].r); simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint32x4_t r_ = simde_vqmovn_high_u64(r, a); simde_test_arm_neon_assert_equal_u32x4(r_, simde_vld1q_u32(test_vec[i].r_)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t r = simde_test_arm_neon_random_u32x2(); simde_int64x2_t s = simde_vandq_s64(simde_test_arm_neon_random_i64x2(), simde_vdupq_n_s64(63)); simde_uint64x2_t a = simde_vshlq_u64(simde_test_arm_neon_random_u64x2(), simde_vnegq_s64(s)); simde_uint32x4_t r_ = simde_vqmovn_high_u64(r, a); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r_, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vqmovn_high_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqmovn_high_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vqmovn_high_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vqmovn_high_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vqmovn_high_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vqmovn_high_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/qmovun.c000066400000000000000000000162611400333146700170160ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN qmovun #include "test-neon.h" #include "../../../simde/arm/neon/qmovun.h" static int test_simde_vqmovun_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; uint8_t r[8]; } test_vec[] = { { { -INT16_C( 18345), INT16_C( 7399), -INT16_C( 5353), -INT16_C( 25148), -INT16_C( 27188), INT16_C( 13769), INT16_C( 990), INT16_C( 9688) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { INT16_C( 27133), INT16_C( 15294), -INT16_C( 22736), INT16_C( 17779), INT16_C( 32692), INT16_C( 10966), INT16_C( 17328), INT16_C( 1930) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { INT16_C( 29179), INT16_C( 4643), -INT16_C( 6308), INT16_C( 10671), INT16_C( 30844), INT16_C( 23134), INT16_C( 13948), INT16_C( 31103) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { INT16_C( 16031), -INT16_C( 12364), INT16_C( 10213), -INT16_C( 26091), -INT16_C( 5210), INT16_C( 22468), INT16_C( 20014), INT16_C( 10590) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { -INT16_C( 32064), INT16_C( 7227), -INT16_C( 5527), -INT16_C( 6587), -INT16_C( 23709), -INT16_C( 8384), -INT16_C( 16167), INT16_C( 30808) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { INT16_C( 3326), -INT16_C( 7352), INT16_C( 23859), -INT16_C( 9859), INT16_C( 16712), INT16_C( 30256), -INT16_C( 28784), INT16_C( 20639) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, { { -INT16_C( 9711), INT16_C( 31340), -INT16_C( 19772), INT16_C( 10080), -INT16_C( 24235), INT16_C( 12038), INT16_C( 24161), INT16_C( 24487) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { -INT16_C( 4246), -INT16_C( 25278), -INT16_C( 16308), -INT16_C( 27529), -INT16_C( 22783), -INT16_C( 28406), -INT16_C( 22218), INT16_C( 18401) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_uint8x8_t r = simde_vqmovun_s16(a); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_uint8x8_t r = simde_vqmovun_s16(a); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqmovun_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; uint16_t r[4]; } test_vec[] = { { { INT32_C( 1811669884), -INT32_C( 1616095694), INT32_C( 1505503431), INT32_C( 410694829) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { -INT32_C( 428853893), INT32_C( 1008142342), INT32_C( 497592662), INT32_C( 932503994) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { -INT32_C( 140341307), -INT32_C( 1399435547), INT32_C( 604394358), -INT32_C( 2109964281) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { -INT32_C( 1133990474), INT32_C( 200835253), -INT32_C( 483876823), INT32_C( 1176157313) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { INT32_C( 826195275), -INT32_C( 2099391477), INT32_C( 782689063), INT32_C( 447799908) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { INT32_C( 1171659151), -INT32_C( 1034891623), -INT32_C( 274368146), -INT32_C( 2127118283) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { -INT32_C( 2001570691), INT32_C( 1879740232), -INT32_C( 677465997), INT32_C( 586239891) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { INT32_C( 23578472), INT32_C( 63158421), INT32_C( 1727162417), -INT32_C( 1511577560) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_uint16x4_t r = simde_vqmovun_s32(a); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_uint16x4_t r = simde_vqmovun_s32(a); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqmovun_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; uint32_t r[2]; } test_vec[] = { { { -INT64_C( 5425750683829434048), -INT64_C( 7162323672895373370) }, { UINT32_C( 0), UINT32_C( 0) } }, { { -INT64_C( 5585277752272748202), INT64_C( 1967427466993443775) }, { UINT32_C( 0), UINT32_MAX } }, { { INT64_C( 8318933209745641790), -INT64_C( 3732010236125240056) }, { UINT32_MAX, UINT32_C( 0) } }, { { -INT64_C( 907126553308030658), -INT64_C( 2749521670418995231) }, { UINT32_C( 0), UINT32_C( 0) } }, { { -INT64_C( 4634756288245928563), -INT64_C( 3859146670975987666) }, { UINT32_C( 0), UINT32_C( 0) } }, { { INT64_C( 5751074110553797031), -INT64_C( 8626675892199145774) }, { UINT32_MAX, UINT32_C( 0) } }, { { INT64_C( 1533037962395377360), -INT64_C( 3163731268945142079) }, { UINT32_MAX, UINT32_C( 0) } }, { { INT64_C( 6204411644969261404), -INT64_C( 2791938161888713282) }, { UINT32_MAX, UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_uint32x2_t r = simde_vqmovun_s64(a); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); simde_uint32x2_t r = simde_vqmovun_s64(a); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vqmovun_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqmovun_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vqmovun_s64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/qneg.c000066400000000000000000000633651400333146700164320ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN qneg #include "test-neon.h" #include "../../../simde/arm/neon/qneg.h" #include "../../../simde/arm/neon/set_lane.h" static int test_simde_vqnegb_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a; int8_t r; } test_vec[] = { { INT8_MIN, INT8_MAX }, { -INT8_C( 59), INT8_C( 59) }, { -INT8_C( 53), INT8_C( 53) }, { INT8_C( 96), -INT8_C( 96) }, { INT8_C( 75), -INT8_C( 75) }, { -INT8_C( 55), INT8_C( 55) }, { -INT8_C( 47), INT8_C( 47) }, { -INT8_C( 61), INT8_C( 61) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int8_t r = simde_vqnegb_s8(test_vec[i].a); simde_assert_equal_i8(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int8_t a = (i == 0) ? INT8_MIN : simde_test_codegen_random_i8(); int8_t r = simde_vqnegb_s8(a); simde_test_codegen_write_i8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqnegh_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a; int16_t r; } test_vec[] = { { INT16_MIN, INT16_MAX }, { -INT16_C( 19482), INT16_C( 19482) }, { -INT16_C( 1948), INT16_C( 1948) }, { INT16_C( 21429), -INT16_C( 21429) }, { INT16_C( 2876), -INT16_C( 2876) }, { INT16_C( 12853), -INT16_C( 12853) }, { -INT16_C( 15738), INT16_C( 15738) }, { -INT16_C( 22893), INT16_C( 22893) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int16_t r = simde_vqnegh_s16(test_vec[i].a); simde_assert_equal_i16(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int16_t a = (i == 0) ? INT16_MIN : simde_test_codegen_random_i16(); int16_t r = simde_vqnegh_s16(a); simde_test_codegen_write_i16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqnegs_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a; int32_t r; } test_vec[] = { { INT32_MIN, INT32_MAX }, { INT32_C( 833874552), -INT32_C( 833874552) }, { INT32_C( 789306444), -INT32_C( 789306444) }, { INT32_C( 2046037166), -INT32_C( 2046037166) }, { INT32_C( 1044529005), -INT32_C( 1044529005) }, { INT32_C( 1743857923), -INT32_C( 1743857923) }, { INT32_C( 1572513313), -INT32_C( 1572513313) }, { INT32_C( 948957106), -INT32_C( 948957106) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int32_t r = simde_vqnegs_s32(test_vec[i].a); simde_assert_equal_i32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int32_t a = (i == 0) ? INT32_MIN : simde_test_codegen_random_i32(); int32_t r = simde_vqnegs_s32(a); simde_test_codegen_write_i32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqnegd_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a; int64_t r; } test_vec[] = { { INT64_MIN, INT64_MAX }, { INT64_C( 6438900673212785330), -INT64_C( 6438900673212785330) }, { -INT64_C( 2263765304330393746), INT64_C( 2263765304330393746) }, { INT64_C( 2388614247759206332), -INT64_C( 2388614247759206332) }, { -INT64_C( 9033923089242529353), INT64_C( 9033923089242529353) }, { -INT64_C( 9181141915911028687), INT64_C( 9181141915911028687) }, { INT64_C( 6396010445682777967), -INT64_C( 6396010445682777967) }, { -INT64_C( 6256838048045538807), INT64_C( 6256838048045538807) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int64_t r = simde_vqnegd_s64(test_vec[i].a); simde_assert_equal_i64(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int64_t a = (i == 0) ? INT64_MIN : simde_test_codegen_random_i64(); int64_t r = simde_vqnegd_s64(a); simde_test_codegen_write_i64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqneg_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t r[8]; } test_vec[] = { { { INT8_MIN, INT8_C( 92), -INT8_C( 48), INT8_C( 20), INT8_C( 55), INT8_C( 1), INT8_C( 44), INT8_C( 90) }, { INT8_MAX, -INT8_C( 92), INT8_C( 48), -INT8_C( 20), -INT8_C( 55), -INT8_C( 1), -INT8_C( 44), -INT8_C( 90) } }, { { -INT8_C( 56), INT8_C( 96), -INT8_C( 3), INT8_C( 125), INT8_MIN, INT8_C( 80), -INT8_C( 75), -INT8_C( 2) }, { INT8_C( 56), -INT8_C( 96), INT8_C( 3), -INT8_C( 125), INT8_MAX, -INT8_C( 80), INT8_C( 75), INT8_C( 2) } }, { { INT8_C( 2), -INT8_C( 40), -INT8_C( 121), INT8_MIN, -INT8_C( 72), INT8_C( 115), -INT8_C( 65), INT8_C( 108) }, { -INT8_C( 2), INT8_C( 40), INT8_C( 121), INT8_MAX, INT8_C( 72), -INT8_C( 115), INT8_C( 65), -INT8_C( 108) } }, { { INT8_C( 112), -INT8_C( 88), -INT8_C( 96), INT8_C( 16), INT8_MIN, -INT8_C( 3), -INT8_C( 32), -INT8_C( 95) }, { -INT8_C( 112), INT8_C( 88), INT8_C( 96), -INT8_C( 16), INT8_MAX, INT8_C( 3), INT8_C( 32), INT8_C( 95) } }, { { -INT8_C( 31), -INT8_C( 51), -INT8_C( 114), INT8_C( 25), INT8_MIN, -INT8_C( 18), INT8_C( 23), INT8_C( 19) }, { INT8_C( 31), INT8_C( 51), INT8_C( 114), -INT8_C( 25), INT8_MAX, INT8_C( 18), -INT8_C( 23), -INT8_C( 19) } }, { { INT8_C( 103), -INT8_C( 56), INT8_MIN, -INT8_C( 109), -INT8_C( 54), INT8_C( 98), INT8_C( 27), -INT8_C( 47) }, { -INT8_C( 103), INT8_C( 56), INT8_MAX, INT8_C( 109), INT8_C( 54), -INT8_C( 98), -INT8_C( 27), INT8_C( 47) } }, { { -INT8_C( 114), -INT8_C( 112), -INT8_C( 121), -INT8_C( 30), INT8_MIN, INT8_C( 47), -INT8_C( 126), INT8_C( 16) }, { INT8_C( 114), INT8_C( 112), INT8_C( 121), INT8_C( 30), INT8_MAX, -INT8_C( 47), INT8_C( 126), -INT8_C( 16) } }, { { INT8_MIN, -INT8_C( 16), INT8_C( 93), -INT8_C( 77), -INT8_C( 47), INT8_C( 42), INT8_C( 65), -INT8_C( 21) }, { INT8_MAX, INT8_C( 16), -INT8_C( 93), INT8_C( 77), INT8_C( 47), -INT8_C( 42), -INT8_C( 65), INT8_C( 21) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t r = simde_vqneg_s8(a); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); a = simde_vset_lane_s8(INT8_MIN, a, simde_test_codegen_random_i8() & 7); simde_int8x8_t r = simde_vqneg_s8(a); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqneg_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t r[4]; } test_vec[] = { { { INT16_MIN, -INT16_C( 3014), -INT16_C( 596), -INT16_C( 13820) }, { INT16_MAX, INT16_C( 3014), INT16_C( 596), INT16_C( 13820) } }, { { INT16_MIN, -INT16_C( 27535), -INT16_C( 24057), INT16_C( 14980) }, { INT16_MAX, INT16_C( 27535), INT16_C( 24057), -INT16_C( 14980) } }, { { -INT16_C( 5471), INT16_MIN, INT16_C( 7663), -INT16_C( 23038) }, { INT16_C( 5471), INT16_MAX, -INT16_C( 7663), INT16_C( 23038) } }, { { -INT16_C( 182), -INT16_C( 30384), -INT16_C( 15455), INT16_MIN }, { INT16_C( 182), INT16_C( 30384), INT16_C( 15455), INT16_MAX } }, { { INT16_MIN, INT16_C( 12345), INT16_C( 22890), -INT16_C( 95) }, { INT16_MAX, -INT16_C( 12345), -INT16_C( 22890), INT16_C( 95) } }, { { INT16_MIN, -INT16_C( 3173), -INT16_C( 31451), -INT16_C( 31794) }, { INT16_MAX, INT16_C( 3173), INT16_C( 31451), INT16_C( 31794) } }, { { -INT16_C( 31253), -INT16_C( 10982), INT16_C( 6607), INT16_MIN }, { INT16_C( 31253), INT16_C( 10982), -INT16_C( 6607), INT16_MAX } }, { { INT16_C( 7144), INT16_MIN, -INT16_C( 5157), INT16_C( 2960) }, { -INT16_C( 7144), INT16_MAX, INT16_C( 5157), -INT16_C( 2960) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t r = simde_vqneg_s16(a); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); a = simde_vset_lane_s16(INT16_MIN, a, simde_test_codegen_random_i8() & 3); simde_int16x4_t r = simde_vqneg_s16(a); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqneg_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t r[2]; } test_vec[] = { { { INT32_C( 1247063273), INT32_MIN }, { -INT32_C( 1247063273), INT32_MAX } }, { { INT32_MIN, INT32_C( 1928856989) }, { INT32_MAX, -INT32_C( 1928856989) } }, { { INT32_MIN, -INT32_C( 669104000) }, { INT32_MAX, INT32_C( 669104000) } }, { { INT32_MIN, -INT32_C( 1665934254) }, { INT32_MAX, INT32_C( 1665934254) } }, { { -INT32_C( 2001763957), INT32_MIN }, { INT32_C( 2001763957), INT32_MAX } }, { { INT32_MIN, -INT32_C( 1609692717) }, { INT32_MAX, INT32_C( 1609692717) } }, { { INT32_MIN, INT32_C( 648727239) }, { INT32_MAX, -INT32_C( 648727239) } }, { { INT32_MIN, -INT32_C( 335398556) }, { INT32_MAX, INT32_C( 335398556) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t r = simde_vqneg_s32(a); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); a = simde_vset_lane_s32(INT32_MIN, a, simde_test_codegen_random_i8() & 1); simde_int32x2_t r = simde_vqneg_s32(a); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqneg_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[1]; int64_t r[1]; } test_vec[] = { { { -INT64_C( 8191283204820754105) }, { INT64_C( 8191283204820754105) } }, { { INT64_C( 7088802444648733089) }, { -INT64_C( 7088802444648733089) } }, { { INT64_MIN }, { INT64_MAX } }, { { -INT64_C( 1307339272021044458) }, { INT64_C( 1307339272021044458) } }, { { INT64_MIN }, { INT64_MAX } }, { { -INT64_C( 2461253948824801768) }, { INT64_C( 2461253948824801768) } }, { { INT64_C( 8701714957695160342) }, { -INT64_C( 8701714957695160342) } }, { { INT64_C( 5631676463628112935) }, { -INT64_C( 5631676463628112935) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t r = simde_vqneg_s64(a); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_t a = simde_test_arm_neon_random_i64x1(); if ((simde_test_codegen_random_i8() & 3) == 3) a = simde_vset_lane_s64(INT64_MIN, a, 0); simde_int64x1_t r = simde_vqneg_s64(a); simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqnegq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t r[16]; } test_vec[] = { { { -INT8_C( 57), -INT8_C( 22), INT8_C( 105), -INT8_C( 86), INT8_C( 3), INT8_MIN, -INT8_C( 17), -INT8_C( 23), INT8_C( 42), -INT8_C( 51), -INT8_C( 63), INT8_C( 7), INT8_MAX, -INT8_C( 41), -INT8_C( 105), -INT8_C( 98) }, { INT8_C( 57), INT8_C( 22), -INT8_C( 105), INT8_C( 86), -INT8_C( 3), INT8_MAX, INT8_C( 17), INT8_C( 23), -INT8_C( 42), INT8_C( 51), INT8_C( 63), -INT8_C( 7), -INT8_C( 127), INT8_C( 41), INT8_C( 105), INT8_C( 98) } }, { { INT8_C( 61), INT8_C( 81), INT8_C( 23), -INT8_C( 74), -INT8_C( 33), INT8_C( 63), INT8_C( 10), INT8_C( 91), INT8_C( 31), -INT8_C( 15), INT8_C( 22), INT8_MIN, INT8_C( 63), -INT8_C( 78), INT8_C( 14), INT8_C( 41) }, { -INT8_C( 61), -INT8_C( 81), -INT8_C( 23), INT8_C( 74), INT8_C( 33), -INT8_C( 63), -INT8_C( 10), -INT8_C( 91), -INT8_C( 31), INT8_C( 15), -INT8_C( 22), INT8_MAX, -INT8_C( 63), INT8_C( 78), -INT8_C( 14), -INT8_C( 41) } }, { { -INT8_C( 71), INT8_C( 44), -INT8_C( 77), -INT8_C( 88), INT8_C( 22), -INT8_C( 35), INT8_C( 118), -INT8_C( 41), -INT8_C( 28), -INT8_C( 11), -INT8_C( 82), INT8_MIN, -INT8_C( 109), INT8_C( 4), -INT8_C( 71), -INT8_C( 27) }, { INT8_C( 71), -INT8_C( 44), INT8_C( 77), INT8_C( 88), -INT8_C( 22), INT8_C( 35), -INT8_C( 118), INT8_C( 41), INT8_C( 28), INT8_C( 11), INT8_C( 82), INT8_MAX, INT8_C( 109), -INT8_C( 4), INT8_C( 71), INT8_C( 27) } }, { { INT8_C( 111), -INT8_C( 60), INT8_C( 90), INT8_C( 121), INT8_C( 31), INT8_C( 122), INT8_MIN, INT8_C( 53), -INT8_C( 63), -INT8_C( 87), -INT8_C( 25), -INT8_C( 49), -INT8_C( 45), INT8_C( 3), -INT8_C( 120), -INT8_C( 1) }, { -INT8_C( 111), INT8_C( 60), -INT8_C( 90), -INT8_C( 121), -INT8_C( 31), -INT8_C( 122), INT8_MAX, -INT8_C( 53), INT8_C( 63), INT8_C( 87), INT8_C( 25), INT8_C( 49), INT8_C( 45), -INT8_C( 3), INT8_C( 120), INT8_C( 1) } }, { { INT8_C( 49), INT8_C( 21), -INT8_C( 109), -INT8_C( 89), -INT8_C( 20), INT8_MIN, -INT8_C( 100), -INT8_C( 101), -INT8_C( 13), INT8_C( 47), -INT8_C( 97), -INT8_C( 83), INT8_C( 20), -INT8_C( 70), INT8_C( 28), -INT8_C( 39) }, { -INT8_C( 49), -INT8_C( 21), INT8_C( 109), INT8_C( 89), INT8_C( 20), INT8_MAX, INT8_C( 100), INT8_C( 101), INT8_C( 13), -INT8_C( 47), INT8_C( 97), INT8_C( 83), -INT8_C( 20), INT8_C( 70), -INT8_C( 28), INT8_C( 39) } }, { { -INT8_C( 106), INT8_MIN, -INT8_C( 113), INT8_C( 0), INT8_C( 46), INT8_C( 80), -INT8_C( 86), INT8_C( 21), INT8_C( 31), INT8_C( 125), INT8_C( 24), -INT8_C( 88), INT8_C( 124), -INT8_C( 50), -INT8_C( 39), -INT8_C( 110) }, { INT8_C( 106), INT8_MAX, INT8_C( 113), INT8_C( 0), -INT8_C( 46), -INT8_C( 80), INT8_C( 86), -INT8_C( 21), -INT8_C( 31), -INT8_C( 125), -INT8_C( 24), INT8_C( 88), -INT8_C( 124), INT8_C( 50), INT8_C( 39), INT8_C( 110) } }, { { INT8_MIN, INT8_C( 126), -INT8_C( 39), INT8_C( 28), INT8_C( 25), -INT8_C( 52), INT8_C( 75), INT8_MIN, INT8_C( 121), INT8_C( 96), INT8_C( 115), -INT8_C( 106), INT8_C( 57), -INT8_C( 120), INT8_C( 44), INT8_C( 49) }, { INT8_MAX, -INT8_C( 126), INT8_C( 39), -INT8_C( 28), -INT8_C( 25), INT8_C( 52), -INT8_C( 75), INT8_MAX, -INT8_C( 121), -INT8_C( 96), -INT8_C( 115), INT8_C( 106), -INT8_C( 57), INT8_C( 120), -INT8_C( 44), -INT8_C( 49) } }, { { INT8_C( 44), INT8_C( 95), INT8_C( 103), -INT8_C( 42), INT8_C( 117), -INT8_C( 122), INT8_MIN, -INT8_C( 115), INT8_C( 46), -INT8_C( 48), INT8_C( 92), INT8_C( 7), INT8_C( 98), -INT8_C( 67), -INT8_C( 121), -INT8_C( 32) }, { -INT8_C( 44), -INT8_C( 95), -INT8_C( 103), INT8_C( 42), -INT8_C( 117), INT8_C( 122), INT8_MAX, INT8_C( 115), -INT8_C( 46), INT8_C( 48), -INT8_C( 92), -INT8_C( 7), -INT8_C( 98), INT8_C( 67), INT8_C( 121), INT8_C( 32) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t r = simde_vqnegq_s8(a); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); a = simde_vsetq_lane_s8(INT8_MIN, a, simde_test_codegen_random_i8() & 15); simde_int8x16_t r = simde_vqnegq_s8(a); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqnegq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t r[8]; } test_vec[] = { { { -INT16_C( 1373), -INT16_C( 4253), -INT16_C( 9038), INT16_MIN, -INT16_C( 30606), -INT16_C( 24915), -INT16_C( 15175), INT16_C( 6603) }, { INT16_C( 1373), INT16_C( 4253), INT16_C( 9038), INT16_MAX, INT16_C( 30606), INT16_C( 24915), INT16_C( 15175), -INT16_C( 6603) } }, { { -INT16_C( 29023), -INT16_C( 2638), -INT16_C( 8165), INT16_C( 30661), INT16_C( 10216), INT16_C( 28469), INT16_MIN, INT16_C( 275) }, { INT16_C( 29023), INT16_C( 2638), INT16_C( 8165), -INT16_C( 30661), -INT16_C( 10216), -INT16_C( 28469), INT16_MAX, -INT16_C( 275) } }, { { -INT16_C( 19454), INT16_MIN, INT16_C( 32217), -INT16_C( 30759), -INT16_C( 28132), -INT16_C( 6325), INT16_C( 30635), INT16_C( 14728) }, { INT16_C( 19454), INT16_MAX, -INT16_C( 32217), INT16_C( 30759), INT16_C( 28132), INT16_C( 6325), -INT16_C( 30635), -INT16_C( 14728) } }, { { INT16_C( 21885), INT16_C( 16905), -INT16_C( 3636), INT16_C( 361), INT16_C( 29025), INT16_C( 29901), INT16_MIN, INT16_C( 9846) }, { -INT16_C( 21885), -INT16_C( 16905), INT16_C( 3636), -INT16_C( 361), -INT16_C( 29025), -INT16_C( 29901), INT16_MAX, -INT16_C( 9846) } }, { { INT16_C( 199), -INT16_C( 24444), -INT16_C( 24441), -INT16_C( 11726), INT16_MIN, INT16_C( 3913), INT16_C( 29207), INT16_C( 27789) }, { -INT16_C( 199), INT16_C( 24444), INT16_C( 24441), INT16_C( 11726), INT16_MAX, -INT16_C( 3913), -INT16_C( 29207), -INT16_C( 27789) } }, { { INT16_C( 14799), INT16_C( 14701), -INT16_C( 12742), INT16_C( 1962), INT16_C( 7234), INT16_MIN, INT16_C( 2371), INT16_C( 17279) }, { -INT16_C( 14799), -INT16_C( 14701), INT16_C( 12742), -INT16_C( 1962), -INT16_C( 7234), INT16_MAX, -INT16_C( 2371), -INT16_C( 17279) } }, { { -INT16_C( 13793), INT16_C( 21037), INT16_MIN, -INT16_C( 6608), INT16_C( 18372), INT16_C( 20824), -INT16_C( 11084), -INT16_C( 4832) }, { INT16_C( 13793), -INT16_C( 21037), INT16_MAX, INT16_C( 6608), -INT16_C( 18372), -INT16_C( 20824), INT16_C( 11084), INT16_C( 4832) } }, { { INT16_C( 10073), INT16_C( 784), INT16_C( 21295), INT16_C( 12832), INT16_C( 25355), -INT16_C( 29893), INT16_MIN, INT16_C( 28842) }, { -INT16_C( 10073), -INT16_C( 784), -INT16_C( 21295), -INT16_C( 12832), -INT16_C( 25355), INT16_C( 29893), INT16_MAX, -INT16_C( 28842) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t r = simde_vqnegq_s16(a); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); a = simde_vsetq_lane_s16(INT16_MIN, a, simde_test_codegen_random_i8() & 7); simde_int16x8_t r = simde_vqnegq_s16(a); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqnegq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t r[4]; } test_vec[] = { { { INT32_C( 749407484), INT32_C( 1265922034), INT32_MIN, INT32_C( 1010458901) }, { -INT32_C( 749407484), -INT32_C( 1265922034), INT32_MAX, -INT32_C( 1010458901) } }, { { INT32_C( 1573219133), -INT32_C( 641675107), INT32_C( 111306331), INT32_MIN }, { -INT32_C( 1573219133), INT32_C( 641675107), -INT32_C( 111306331), INT32_MAX } }, { { INT32_MIN, INT32_C( 1087074848), -INT32_C( 1918771117), -INT32_C( 2000022756) }, { INT32_MAX, -INT32_C( 1087074848), INT32_C( 1918771117), INT32_C( 2000022756) } }, { { -INT32_C( 391568088), -INT32_C( 1605434114), INT32_C( 221848842), INT32_MIN }, { INT32_C( 391568088), INT32_C( 1605434114), -INT32_C( 221848842), INT32_MAX } }, { { -INT32_C( 1432224033), INT32_MIN, INT32_C( 1291036289), INT32_C( 1416940590) }, { INT32_C( 1432224033), INT32_MAX, -INT32_C( 1291036289), -INT32_C( 1416940590) } }, { { -INT32_C( 1418112420), INT32_MIN, -INT32_C( 844572271), -INT32_C( 1230186569) }, { INT32_C( 1418112420), INT32_MAX, INT32_C( 844572271), INT32_C( 1230186569) } }, { { -INT32_C( 529664938), -INT32_C( 930680876), INT32_MIN, INT32_C( 1527449865) }, { INT32_C( 529664938), INT32_C( 930680876), INT32_MAX, -INT32_C( 1527449865) } }, { { -INT32_C( 2012852553), INT32_MIN, INT32_C( 284825956), -INT32_C( 1016695857) }, { INT32_C( 2012852553), INT32_MAX, -INT32_C( 284825956), INT32_C( 1016695857) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t r = simde_vqnegq_s32(a); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); a = simde_vsetq_lane_s32(INT32_MIN, a, simde_test_codegen_random_i8() & 3); simde_int32x4_t r = simde_vqnegq_s32(a); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqnegq_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; int64_t r[2]; } test_vec[] = { { { -INT64_C( 828086871084984250), INT64_C( 3854096010263235514) }, { INT64_C( 828086871084984250), -INT64_C( 3854096010263235514) } }, { { -INT64_C( 3639046133724107004), -INT64_C( 6251241396687057262) }, { INT64_C( 3639046133724107004), INT64_C( 6251241396687057262) } }, { { -INT64_C( 425107489667938036), -INT64_C( 1946399232152174468) }, { INT64_C( 425107489667938036), INT64_C( 1946399232152174468) } }, { { -INT64_C( 4309889297074374813), -INT64_C( 830958200115805909) }, { INT64_C( 4309889297074374813), INT64_C( 830958200115805909) } }, { { INT64_MIN, -INT64_C( 1198093595569662721) }, { INT64_MAX, INT64_C( 1198093595569662721) } }, { { INT64_C( 2211971346900284225), -INT64_C( 8905928107539194389) }, { -INT64_C( 2211971346900284225), INT64_C( 8905928107539194389) } }, { { INT64_C( 4465699288241565830), -INT64_C( 7640949259834936747) }, { -INT64_C( 4465699288241565830), INT64_C( 7640949259834936747) } }, { { -INT64_C( 2254431034773090970), INT64_C( 4465036234584639496) }, { INT64_C( 2254431034773090970), -INT64_C( 4465036234584639496) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t r = simde_vqnegq_s64(a); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); if ((simde_test_codegen_random_i8() & 3) == 3) a = simde_vsetq_lane_s64(INT64_MIN, a, 0); simde_int64x2_t r = simde_vqnegq_s64(a); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vqnegb_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqnegh_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqnegs_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vqnegd_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vqneg_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqneg_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqneg_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vqneg_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vqnegq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqnegq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqnegq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vqnegq_s64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/qrdmulh.c000066400000000000000000000331341400333146700171430ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN qrdmulh #include "test-neon.h" #include "../../../simde/arm/neon/qrdmulh.h" static int test_simde_vqrdmulh_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { { { INT16_C( 24408), INT16_C( 8011), -INT16_C( 30441), -INT16_C( 30215) }, { INT16_C( 4356), INT16_C( 11308), INT16_C( 3238), -INT16_C( 19917) }, { INT16_C( 3245), INT16_C( 2765), -INT16_C( 3008), INT16_C( 18365) } }, { { INT16_C( 11964), INT16_C( 20417), -INT16_C( 7014), INT16_C( 9797) }, { INT16_C( 3004), INT16_C( 6963), -INT16_C( 19145), -INT16_C( 28864) }, { INT16_C( 1097), INT16_C( 4338), INT16_C( 4098), -INT16_C( 8630) } }, { { -INT16_C( 29676), INT16_C( 11183), -INT16_C( 22507), INT16_C( 6580) }, { -INT16_C( 7751), INT16_C( 24645), INT16_C( 30957), -INT16_C( 21998) }, { INT16_C( 7020), INT16_C( 8411), -INT16_C( 21263), -INT16_C( 4417) } }, { { -INT16_C( 11354), INT16_C( 16889), INT16_C( 16055), INT16_C( 29799) }, { -INT16_C( 26039), -INT16_C( 32625), -INT16_C( 12465), INT16_C( 25360) }, { INT16_C( 9022), -INT16_C( 16815), -INT16_C( 6107), INT16_C( 23062) } }, { { -INT16_C( 16549), INT16_C( 28814), INT16_C( 17255), INT16_C( 8329) }, { -INT16_C( 12508), INT16_C( 4480), -INT16_C( 28089), -INT16_C( 4421) }, { INT16_C( 6317), INT16_C( 3939), -INT16_C( 14791), -INT16_C( 1124) } }, { { -INT16_C( 19354), INT16_C( 7471), -INT16_C( 26894), INT16_C( 15249) }, { INT16_C( 8240), -INT16_C( 32580), -INT16_C( 13072), INT16_C( 19427) }, { -INT16_C( 4867), -INT16_C( 7428), INT16_C( 10729), INT16_C( 9041) } }, { { INT16_C( 29323), -INT16_C( 3396), INT16_C( 17845), -INT16_C( 9966) }, { -INT16_C( 27884), INT16_C( 23786), -INT16_C( 23003), -INT16_C( 29878) }, { -INT16_C( 24952), -INT16_C( 2465), -INT16_C( 12527), INT16_C( 9087) } }, { { INT16_C( 31066), INT16_C( 19881), INT16_C( 14863), INT16_C( 16264) }, { INT16_C( 17499), INT16_C( 19391), -INT16_C( 23792), -INT16_C( 25706) }, { INT16_C( 16590), INT16_C( 11765), -INT16_C( 10792), -INT16_C( 12759) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_vqrdmulh_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); simde_int16x4_t r = simde_vqrdmulh_s16(a, b); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqrdmulh_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { { { INT32_C( 1899441268), INT32_C( 839689240) }, { -INT32_C( 1702480800), -INT32_C( 1555117258) }, { -INT32_C( 1505837911), -INT32_C( 608067600) } }, { { -INT32_C( 252180371), -INT32_C( 33985746) }, { INT32_C( 1179500889), -INT32_C( 1494958031) }, { -INT32_C( 138509540), INT32_C( 23658976) } }, { { INT32_C( 68623340), INT32_C( 540419007) }, { -INT32_C( 1900364456), -INT32_C( 181270136) }, { -INT32_C( 60726589), -INT32_C( 45617030) } }, { { INT32_C( 1088760337), -INT32_C( 281157995) }, { -INT32_C( 785020257), INT32_C( 930552139) }, { -INT32_C( 398000199), -INT32_C( 121831975) } }, { { -INT32_C( 214135244), INT32_C( 152269489) }, { -INT32_C( 1214722513), -INT32_C( 391329065) }, { INT32_C( 121125440), -INT32_C( 27747581) } }, { { -INT32_C( 1993829644), INT32_C( 276325744) }, { INT32_C( 1004645872), -INT32_C( 93169466) }, { -INT32_C( 932762735), -INT32_C( 11988507) } }, { { -INT32_C( 1745965338), INT32_C( 1352728865) }, { -INT32_C( 1509410353), -INT32_C( 141577213) }, { INT32_C( 1227193585), -INT32_C( 89181393) } }, { { -INT32_C( 1216301242), INT32_C( 231209245) }, { INT32_C( 1833478310), -INT32_C( 429409792) }, { -INT32_C( 1038453516), -INT32_C( 46232489) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_vqrdmulh_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); simde_int32x2_t r = simde_vqrdmulh_s32(a, b); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqrdmulhq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { INT16_C( 362), INT16_C( 17724), INT16_C( 9860), -INT16_C( 26948), INT16_C( 19934), INT16_C( 20294), INT16_C( 24470), INT16_C( 32030) }, { -INT16_C( 27709), INT16_C( 4190), INT16_C( 5967), -INT16_C( 18116), -INT16_C( 5194), -INT16_C( 32603), INT16_C( 21823), -INT16_C( 22254) }, { -INT16_C( 306), INT16_C( 2266), INT16_C( 1795), INT16_C( 14898), -INT16_C( 3160), -INT16_C( 20192), INT16_C( 16297), -INT16_C( 21753) } }, { { INT16_C( 20311), -INT16_C( 9233), -INT16_C( 21643), INT16_C( 21361), -INT16_C( 18440), -INT16_C( 29022), -INT16_C( 16105), -INT16_C( 9716) }, { INT16_C( 27220), -INT16_C( 23318), INT16_C( 9857), INT16_C( 14173), INT16_C( 785), INT16_C( 20664), -INT16_C( 13736), -INT16_C( 20486) }, { INT16_C( 16872), INT16_C( 6570), -INT16_C( 6510), INT16_C( 9239), -INT16_C( 442), -INT16_C( 18302), INT16_C( 6751), INT16_C( 6074) } }, { { -INT16_C( 5863), -INT16_C( 28790), -INT16_C( 876), -INT16_C( 29470), -INT16_C( 31309), -INT16_C( 13797), INT16_C( 10054), -INT16_C( 25948) }, { -INT16_C( 29039), INT16_C( 4670), -INT16_C( 25420), -INT16_C( 14774), INT16_C( 671), -INT16_C( 2282), INT16_C( 4300), -INT16_C( 6489) }, { INT16_C( 5196), -INT16_C( 4103), INT16_C( 680), INT16_C( 13287), -INT16_C( 641), INT16_C( 961), INT16_C( 1319), INT16_C( 5138) } }, { { INT16_C( 12793), -INT16_C( 29323), INT16_C( 22317), -INT16_C( 7910), INT16_C( 13788), INT16_C( 8875), INT16_C( 20572), -INT16_C( 4675) }, { -INT16_C( 1058), -INT16_C( 27649), INT16_C( 18839), INT16_C( 13913), INT16_C( 28491), INT16_C( 6190), -INT16_C( 10880), INT16_C( 31230) }, { -INT16_C( 413), INT16_C( 24742), INT16_C( 12831), -INT16_C( 3359), INT16_C( 11988), INT16_C( 1677), -INT16_C( 6831), -INT16_C( 4456) } }, { { INT16_C( 29446), INT16_C( 13319), INT16_C( 8650), -INT16_C( 22763), -INT16_C( 16298), -INT16_C( 19767), -INT16_C( 31216), -INT16_C( 4193) }, { -INT16_C( 24958), INT16_C( 6530), -INT16_C( 9240), INT16_C( 13136), INT16_C( 32330), -INT16_C( 13749), INT16_C( 18771), INT16_C( 22852) }, { -INT16_C( 22428), INT16_C( 2654), -INT16_C( 2439), -INT16_C( 9125), -INT16_C( 16080), INT16_C( 8294), -INT16_C( 17882), -INT16_C( 2924) } }, { { INT16_C( 19388), -INT16_C( 30835), -INT16_C( 23956), -INT16_C( 15826), -INT16_C( 2205), INT16_C( 29556), INT16_C( 4990), INT16_C( 98) }, { -INT16_C( 6991), -INT16_C( 26343), INT16_C( 27071), INT16_C( 2765), INT16_C( 6375), INT16_C( 15060), INT16_C( 6242), INT16_C( 7828) }, { -INT16_C( 4136), INT16_C( 24789), -INT16_C( 19791), -INT16_C( 1335), -INT16_C( 429), INT16_C( 13584), INT16_C( 951), INT16_C( 23) } }, { { INT16_C( 8547), -INT16_C( 12379), -INT16_C( 11324), INT16_C( 10129), INT16_C( 1483), INT16_C( 18842), -INT16_C( 744), -INT16_C( 13751) }, { INT16_C( 25313), -INT16_C( 24221), INT16_C( 12492), -INT16_C( 19541), INT16_C( 32585), -INT16_C( 21522), -INT16_C( 32104), -INT16_C( 1079) }, { INT16_C( 6602), INT16_C( 9150), -INT16_C( 4317), -INT16_C( 6040), INT16_C( 1475), -INT16_C( 12375), INT16_C( 729), INT16_C( 453) } }, { { INT16_C( 28579), INT16_C( 26571), INT16_C( 23618), INT16_C( 3470), INT16_C( 10594), INT16_C( 31318), -INT16_C( 24794), INT16_C( 1860) }, { -INT16_C( 22526), -INT16_C( 12632), INT16_C( 21464), INT16_C( 8577), INT16_C( 28627), INT16_C( 27596), -INT16_C( 26895), -INT16_C( 27290) }, { -INT16_C( 19646), -INT16_C( 10243), INT16_C( 15470), INT16_C( 908), INT16_C( 9255), INT16_C( 26375), INT16_C( 20350), -INT16_C( 1549) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_vqrdmulhq_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); simde_int16x8_t r = simde_vqrdmulhq_s16(a, b); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqrdmulhq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { -INT32_C( 1138864092), INT32_C( 288563958), -INT32_C( 1259870191), -INT32_C( 155258730) }, { INT32_C( 2111102674), INT32_C( 1634858731), -INT32_C( 551689891), -INT32_C( 930088541) }, { -INT32_C( 1119570355), INT32_C( 219680977), INT32_C( 323661439), INT32_C( 67243523) } }, { { INT32_C( 1082436937), -INT32_C( 531515185), INT32_C( 915749280), -INT32_C( 80981207) }, { INT32_C( 494403890), INT32_C( 1417603831), INT32_C( 1848876235), -INT32_C( 566836587) }, { INT32_C( 249203775), -INT32_C( 350865518), INT32_C( 788414423), INT32_C( 21375302) } }, { { INT32_C( 1042201455), INT32_C( 320827507), -INT32_C( 766921559), INT32_C( 969766151) }, { INT32_C( 1834370678), -INT32_C( 71117520), INT32_C( 107607409), INT32_C( 669294776) }, { INT32_C( 890243701), -INT32_C( 10624741), -INT32_C( 38429369), INT32_C( 302241845) } }, { { -INT32_C( 832175269), INT32_C( 484541811), INT32_C( 1089350201), INT32_C( 377142175) }, { INT32_C( 830722049), INT32_C( 372000165), -INT32_C( 216230341), -INT32_C( 1860566730) }, { -INT32_C( 321914602), INT32_C( 83935276), -INT32_C( 109686780), -INT32_C( 326753679) } }, { { INT32_C( 2002747396), INT32_C( 1049837573), INT32_C( 159351146), INT32_C( 1042282812) }, { INT32_C( 1869587401), INT32_C( 595959016), INT32_C( 1746313778), -INT32_C( 1476841053) }, { INT32_C( 1743580820), INT32_C( 291345719), INT32_C( 129582873), -INT32_C( 716785922) } }, { { -INT32_C( 1222747983), INT32_C( 49656216), INT32_C( 1846244402), INT32_C( 934030189) }, { -INT32_C( 1230627634), -INT32_C( 354800712), INT32_C( 1884483789), -INT32_C( 753448159) }, { INT32_C( 700702638), -INT32_C( 8204049), INT32_C( 1620136968), -INT32_C( 327706023) } }, { { INT32_C( 998913443), INT32_C( 406683622), INT32_C( 1636255988), INT32_C( 1100493683) }, { INT32_C( 133709391), INT32_C( 938594922), -INT32_C( 458734654), INT32_C( 834125710) }, { INT32_C( 62195635), INT32_C( 177748121), -INT32_C( 349528773), INT32_C( 427453813) } }, { { -INT32_C( 613662219), -INT32_C( 1259034176), INT32_C( 1695972338), -INT32_C( 22565202) }, { INT32_C( 1459986413), INT32_C( 865007473), -INT32_C( 921225670), -INT32_C( 335884554) }, { -INT32_C( 417203876), -INT32_C( 507139587), -INT32_C( 727536740), INT32_C( 3529388) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_vqrdmulhq_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); simde_int32x4_t r = simde_vqrdmulhq_s32(a, b); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vqrdmulh_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqrdmulh_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vqrdmulhq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqrdmulhq_s32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/qrdmulh_n.c000066400000000000000000000262721400333146700174650ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN qrdmulh_n #include "test-neon.h" #include "../../../simde/arm/neon/qrdmulh_n.h" static int test_simde_vqrdmulh_n_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t b; int16_t r[4]; } test_vec[] = { { { -INT16_C( 18176), -INT16_C( 30703), INT16_C( 10850), INT16_C( 27817) }, INT16_C( 18000), { -INT16_C( 9984), -INT16_C( 16866), INT16_C( 5960), INT16_C( 15280) } }, { { -INT16_C( 2714), -INT16_C( 3736), INT16_C( 28840), -INT16_C( 15962) }, -INT16_C( 25764), { INT16_C( 2134), INT16_C( 2937), -INT16_C( 22676), INT16_C( 12550) } }, { { INT16_C( 22294), INT16_C( 23953), -INT16_C( 23491), INT16_C( 1951) }, INT16_C( 12290), { INT16_C( 8362), INT16_C( 8984), -INT16_C( 8811), INT16_C( 732) } }, { { INT16_C( 808), INT16_C( 15082), INT16_C( 19595), INT16_C( 13412) }, -INT16_C( 19272), { -INT16_C( 475), -INT16_C( 8870), -INT16_C( 11525), -INT16_C( 7888) } }, { { INT16_C( 8059), -INT16_C( 7254), INT16_C( 21008), -INT16_C( 18861) }, -INT16_C( 20716), { -INT16_C( 5095), INT16_C( 4586), -INT16_C( 13281), INT16_C( 11924) } }, { { INT16_C( 10833), -INT16_C( 7674), INT16_C( 17544), INT16_C( 10119) }, -INT16_C( 30389), { -INT16_C( 10047), INT16_C( 7117), -INT16_C( 16270), -INT16_C( 9384) } }, { { INT16_C( 29527), INT16_C( 16780), INT16_C( 6061), INT16_C( 4493) }, INT16_C( 17996), { INT16_C( 16216), INT16_C( 9215), INT16_C( 3329), INT16_C( 2468) } }, { { -INT16_C( 14394), INT16_C( 28773), INT16_C( 30122), -INT16_C( 574) }, -INT16_C( 10708), { INT16_C( 4704), -INT16_C( 9403), -INT16_C( 9843), INT16_C( 188) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); int16_t b = test_vec[i].b; simde_int16x4_t r = simde_vqrdmulh_n_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); int16_t b = simde_test_codegen_random_i16(); simde_int16x4_t r = simde_vqrdmulh_n_s16(a, b); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqrdmulh_n_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t b; int32_t r[2]; } test_vec[] = { { { -INT32_C( 544357201), -INT32_C( 1702801489) }, -INT32_C( 246487516), { INT32_C( 62481153), INT32_C( 195447034) } }, { { INT32_C( 906206034), INT32_C( 993962082) }, INT32_C( 1984742686), { INT32_C( 837531778), INT32_C( 918637483) } }, { { -INT32_C( 521306617), -INT32_C( 54329012) }, INT32_C( 1960530373), { -INT32_C( 475923278), -INT32_C( 49599296) } }, { { -INT32_C( 988848992), -INT32_C( 1783210685) }, INT32_C( 1523301112), { -INT32_C( 701432474), -INT32_C( 1264906870) } }, { { -INT32_C( 2120939166), -INT32_C( 705175090) }, -INT32_C( 1397300128), { INT32_C( 1380028468), INT32_C( 458835272) } }, { { -INT32_C( 1415022106), INT32_C( 1797293258) }, INT32_C( 590360544), { -INT32_C( 389000969), INT32_C( 494090386) } }, { { -INT32_C( 2068257140), INT32_C( 64979872) }, INT32_C( 1535407245), { -INT32_C( 1478761899), INT32_C( 46459290) } }, { { -INT32_C( 1238271146), INT32_C( 1164109663) }, INT32_C( 737217376), { -INT32_C( 425090550), INT32_C( 399631388) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); int32_t b = test_vec[i].b; simde_int32x2_t r = simde_vqrdmulh_n_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); int32_t b = simde_test_codegen_random_i32(); simde_int32x2_t r = simde_vqrdmulh_n_s32(a, b); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqrdmulhq_n_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t b; int16_t r[8]; } test_vec[] = { { { -INT16_C( 1031), INT16_C( 19322), -INT16_C( 26994), -INT16_C( 9878), INT16_C( 10242), INT16_C( 17924), -INT16_C( 30389), -INT16_C( 31196) }, -INT16_C( 26280), { INT16_C( 827), -INT16_C( 15496), INT16_C( 21649), INT16_C( 7922), -INT16_C( 8214), -INT16_C( 14375), INT16_C( 24372), INT16_C( 25019) } }, { { -INT16_C( 19624), INT16_C( 22844), INT16_C( 16624), -INT16_C( 26808), INT16_C( 20694), -INT16_C( 32011), -INT16_C( 4479), -INT16_C( 899) }, INT16_C( 3130), { -INT16_C( 1874), INT16_C( 2182), INT16_C( 1588), -INT16_C( 2561), INT16_C( 1977), -INT16_C( 3058), -INT16_C( 428), -INT16_C( 86) } }, { { -INT16_C( 23406), -INT16_C( 27419), -INT16_C( 5684), INT16_C( 6106), -INT16_C( 142), -INT16_C( 13411), -INT16_C( 2664), -INT16_C( 11138) }, INT16_C( 28494), { -INT16_C( 20353), -INT16_C( 23843), -INT16_C( 4943), INT16_C( 5310), -INT16_C( 123), -INT16_C( 11662), -INT16_C( 2317), -INT16_C( 9685) } }, { { -INT16_C( 27115), -INT16_C( 5370), -INT16_C( 794), INT16_C( 26733), -INT16_C( 5142), INT16_C( 9316), -INT16_C( 2313), -INT16_C( 9016) }, -INT16_C( 27510), { INT16_C( 22764), INT16_C( 4508), INT16_C( 667), -INT16_C( 22443), INT16_C( 4317), -INT16_C( 7821), INT16_C( 1942), INT16_C( 7569) } }, { { INT16_C( 25798), INT16_C( 14507), INT16_C( 18531), -INT16_C( 1021), -INT16_C( 32194), -INT16_C( 29488), -INT16_C( 6671), -INT16_C( 2269) }, INT16_C( 2513), { INT16_C( 1978), INT16_C( 1113), INT16_C( 1421), -INT16_C( 78), -INT16_C( 2469), -INT16_C( 2261), -INT16_C( 512), -INT16_C( 174) } }, { { INT16_C( 16115), -INT16_C( 8591), -INT16_C( 10967), INT16_C( 8194), -INT16_C( 13365), INT16_C( 22013), -INT16_C( 15521), INT16_C( 3002) }, INT16_C( 7675), { INT16_C( 3774), -INT16_C( 2012), -INT16_C( 2569), INT16_C( 1919), -INT16_C( 3130), INT16_C( 5156), -INT16_C( 3635), INT16_C( 703) } }, { { -INT16_C( 173), -INT16_C( 28391), -INT16_C( 5503), INT16_C( 29214), INT16_C( 16847), -INT16_C( 24471), INT16_C( 23882), -INT16_C( 17185) }, INT16_C( 2107), { -INT16_C( 11), -INT16_C( 1826), -INT16_C( 354), INT16_C( 1878), INT16_C( 1083), -INT16_C( 1573), INT16_C( 1536), -INT16_C( 1105) } }, { { INT16_C( 15761), INT16_C( 23849), INT16_C( 9736), INT16_C( 26802), INT16_C( 27881), -INT16_C( 7053), -INT16_C( 14710), -INT16_C( 23581) }, INT16_C( 25688), { INT16_C( 12356), INT16_C( 18696), INT16_C( 7632), INT16_C( 21011), INT16_C( 21857), -INT16_C( 5529), -INT16_C( 11532), -INT16_C( 18486) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); int16_t b = test_vec[i].b; simde_int16x8_t r = simde_vqrdmulhq_n_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); int16_t b = simde_test_codegen_random_i16(); simde_int16x8_t r = simde_vqrdmulhq_n_s16(a, b); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqrdmulhq_n_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t b; int32_t r[4]; } test_vec[] = { { { -INT32_C( 1155602282), -INT32_C( 998396914), -INT32_C( 467948871), -INT32_C( 835337376) }, INT32_C( 781379465), { -INT32_C( 420475329), -INT32_C( 363274872), -INT32_C( 170267019), -INT32_C( 303944327) } }, { { -INT32_C( 1502344877), INT32_C( 774603804), INT32_C( 1942974173), INT32_C( 1697574230) }, INT32_C( 1395239833), { -INT32_C( 976087253), INT32_C( 503267200), INT32_C( 1262368150), INT32_C( 1102929555) } }, { { -INT32_C( 1237892010), -INT32_C( 1870369785), -INT32_C( 1497491885), INT32_C( 994849311) }, -INT32_C( 1804961865), { INT32_C( 1040449306), INT32_C( 1572047423), INT32_C( 1258643226), -INT32_C( 836171707) } }, { { INT32_C( 1007106533), -INT32_C( 1063176921), INT32_C( 907266784), INT32_C( 367806990) }, INT32_C( 161837238), { INT32_C( 75896894), -INT32_C( 80122434), INT32_C( 68372837), INT32_C( 27718426) } }, { { -INT32_C( 1498455161), INT32_C( 1289944213), INT32_C( 1507871859), -INT32_C( 1399462011) }, -INT32_C( 43174372), { INT32_C( 30125892), -INT32_C( 25933856), -INT32_C( 30315211), INT32_C( 28135671) } }, { { INT32_C( 238256128), -INT32_C( 2128404277), INT32_C( 395036816), -INT32_C( 1061275093) }, -INT32_C( 1441947594), { -INT32_C( 159979263), INT32_C( 1429136575), -INT32_C( 265251094), INT32_C( 712602896) } }, { { INT32_C( 1896082924), -INT32_C( 249653035), -INT32_C( 823227442), -INT32_C( 690216437) }, -INT32_C( 765919422), { -INT32_C( 676255085), INT32_C( 89041008), INT32_C( 293611496), INT32_C( 246171921) } }, { { -INT32_C( 219487289), INT32_C( 1420994589), INT32_C( 889110344), -INT32_C( 2103115347) }, INT32_C( 1735639961), { -INT32_C( 177394091), INT32_C( 1148476728), INT32_C( 718597063), -INT32_C( 1699780598) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); int32_t b = test_vec[i].b; simde_int32x4_t r = simde_vqrdmulhq_n_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); int32_t b = simde_test_codegen_random_i32(); simde_int32x4_t r = simde_vqrdmulhq_n_s32(a, b); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vqrdmulh_n_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqrdmulh_n_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vqrdmulhq_n_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqrdmulhq_n_s32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/qshl.c000066400000000000000000002216401400333146700164370ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN qshl #include "test-neon.h" #include "../../../simde/arm/neon/qshl.h" #include "../../../simde/arm/neon/and.h" #include "../../../simde/arm/neon/dup_n.h" #include "../../../simde/arm/neon/neg.h" #include "../../../simde/arm/neon/shl.h" /* Until v12, clang used unsigned parameters on the scalar versions, so * when testing we need to disable the -Wsign-conversion to avoid a * diagnostic. If this is a problem in your code you might want to * consider using simde_vqshl* instead of vqshl* so you can avoid an * ifdef. */ #if HEDLEY_HAS_WARNING("-Wsign-conversion") && defined(SIMDE_NATIVE_ALIASES_TESTING) #pragma clang diagnostic ignored "-Wsign-conversion" #endif static int test_simde_vqshlb_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a; int8_t b; int8_t r; } test_vec[] = { { INT8_C( 76), INT8_C( 1), INT8_MAX }, { -INT8_C( 27), INT8_C( 11), INT8_MIN }, { INT8_C( 93), INT8_C( 7), INT8_MAX }, { -INT8_C( 1), INT8_C( 4), -INT8_C( 16) }, { INT8_C( 1), INT8_C( 8), INT8_MAX }, { -INT8_C( 48), INT8_C( 1), -INT8_C( 96) }, { -INT8_C( 1), INT8_C( 12), INT8_MIN }, { INT8_C( 3), INT8_C( 4), INT8_C( 48) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int8_t r = simde_vqshlb_s8(test_vec[i].a, test_vec[i].b); simde_assert_equal_i8(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int8_t a = simde_test_codegen_random_i8() >> (simde_test_codegen_random_i8() & 7); int8_t b = simde_test_codegen_random_i8() & 15; int8_t r = simde_vqshlb_s8(a, b); simde_test_codegen_write_i8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqshlh_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a; int16_t b; int16_t r; } test_vec[] = { { INT16_C( 1815), INT16_C( 27), INT16_MAX }, { -INT16_C( 3418), INT16_C( 8), INT16_MIN }, { INT16_C( 7), INT16_C( 25), INT16_MAX }, { INT16_C( 27), INT16_C( 7), INT16_C( 3456) }, { INT16_C( 126), INT16_C( 6), INT16_C( 8064) }, { -INT16_C( 5), INT16_C( 2), -INT16_C( 20) }, { INT16_C( 2), INT16_C( 28), INT16_MAX }, { INT16_C( 6300), INT16_C( 4), INT16_MAX } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int16_t r = simde_vqshlh_s16(test_vec[i].a, test_vec[i].b); simde_assert_equal_i16(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int16_t a = simde_test_codegen_random_i16() >> (simde_test_codegen_random_i16() & 15); int16_t b = simde_test_codegen_random_i16() & 31; int16_t r = simde_vqshlh_s16(a, b); simde_test_codegen_write_i16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqshls_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a; int32_t b; int32_t r; } test_vec[] = { { -INT32_C( 4189281), INT32_C( 48), INT32_MIN }, { INT32_C( 125729542), INT32_C( 40), INT32_MAX }, { -INT32_C( 95), INT32_C( 58), INT32_MIN }, { INT32_C( 1661), INT32_C( 19), INT32_C( 870842368) }, { INT32_C( 0), INT32_C( 39), INT32_C( 0) }, { -INT32_C( 553689631), INT32_C( 25), INT32_MIN }, { INT32_C( 139156), INT32_C( 19), INT32_MAX }, { INT32_C( 1819628), INT32_C( 9), INT32_C( 931649536) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int32_t r = simde_vqshls_s32(test_vec[i].a, test_vec[i].b); simde_assert_equal_i32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int32_t a = simde_test_codegen_random_i32() >> (simde_test_codegen_random_i32() & 31); int32_t b = simde_test_codegen_random_i32() & 63; int32_t r = simde_vqshls_s32(a, b); simde_test_codegen_write_i32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqshld_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a; int64_t b; int64_t r; } test_vec[] = { { INT64_C( 5670071369984075849), INT64_C( 39), INT64_MAX }, { -INT64_C( 15590799414), INT64_C( 59), INT64_MIN }, { -INT64_C( 3355610189070484), INT64_C( 10), -INT64_C( 3436144833608175616) }, { -INT64_C( 353835611503824087), INT64_C( 42), INT64_MIN }, { INT64_C( 791223430054), INT64_C( 29), INT64_MAX }, { INT64_C( 948157670769426), INT64_C( 47), INT64_MAX }, { INT64_C( 7265832135546397357), INT64_C( 34), INT64_MAX }, { -INT64_C( 1096390776312631), INT64_C( 25), INT64_MIN } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int64_t r = simde_vqshld_s64(test_vec[i].a, test_vec[i].b); simde_assert_equal_i64(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int64_t a = simde_test_codegen_random_i64() >> (simde_test_codegen_random_i64() & 31); int64_t b = simde_test_codegen_random_i64() & 63; int64_t r = simde_vqshld_s64(a, b); simde_test_codegen_write_i64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqshlb_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a; int8_t b; uint8_t r; } test_vec[] = { { UINT8_C( 1), INT8_C( 9), UINT8_MAX }, { UINT8_C(107), -INT8_C( 4), UINT8_C( 6) }, { UINT8_C(182), -INT8_C( 1), UINT8_C( 91) }, { UINT8_C( 3), INT8_C( 10), UINT8_MAX }, { UINT8_C( 20), -INT8_C( 1), UINT8_C( 10) }, { UINT8_C( 6), -INT8_C( 1), UINT8_C( 3) }, { UINT8_C( 28), -INT8_C( 3), UINT8_C( 3) }, { UINT8_C( 3), -INT8_C( 13), UINT8_C( 0) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint8_t r = simde_vqshlb_u8(test_vec[i].a, test_vec[i].b); simde_assert_equal_u8(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint8_t a = simde_test_codegen_random_u8() >> (simde_test_codegen_random_u8() & 7); int8_t b = simde_test_codegen_random_i8() % 16; uint8_t r = simde_vqshlb_u8(a, b); simde_test_codegen_write_u8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_u8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqshlh_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a; int16_t b; uint16_t r; } test_vec[] = { { UINT16_C( 368), -INT16_C( 6), UINT16_C( 5) }, { UINT16_C( 1), INT16_C( 22), UINT16_MAX }, { UINT16_C( 22), INT16_C( 16), UINT16_MAX }, { UINT16_C( 5634), -INT16_C( 8), UINT16_C( 22) }, { UINT16_C(17754), -INT16_C( 20), UINT16_C( 0) }, { UINT16_C( 3312), INT16_C( 13), UINT16_MAX }, { UINT16_C( 1), INT16_C( 19), UINT16_MAX }, { UINT16_C(61653), -INT16_C( 4), UINT16_C( 3853) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint16_t r = simde_vqshlh_u16(test_vec[i].a, test_vec[i].b); simde_assert_equal_u16(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint16_t a = simde_test_codegen_random_u16() >> (simde_test_codegen_random_u16() & 15); int16_t b = simde_test_codegen_random_i16() % 32; uint16_t r = simde_vqshlh_u16(a, b); simde_test_codegen_write_u16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_u16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqshls_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a; int32_t b; uint32_t r; } test_vec[] = { { UINT32_C( 1061138), INT32_C( 20), UINT32_MAX }, { UINT32_C(3565737185), INT32_C( 57), UINT32_MAX }, { UINT32_C( 268518), INT32_C( 43), UINT32_MAX }, { UINT32_C( 5), INT32_C( 19), UINT32_C( 2621440) }, { UINT32_C( 428356234), INT32_C( 10), UINT32_MAX }, { UINT32_C( 5680), INT32_C( 16), UINT32_C( 372244480) }, { UINT32_C( 206), INT32_C( 16), UINT32_C( 13500416) }, { UINT32_C( 43944177), INT32_C( 29), UINT32_MAX } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint32_t r = simde_vqshls_u32(test_vec[i].a, test_vec[i].b); simde_assert_equal_u32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint32_t a = simde_test_codegen_random_u32() >> (simde_test_codegen_random_u32() & 31); int32_t b = simde_test_codegen_random_u32() % 64; uint32_t r = simde_vqshls_u32(a, b); simde_test_codegen_write_u32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_u32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqshld_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a; int64_t b; uint64_t r; } test_vec[] = { { UINT64_C( 8538370902170065728), INT64_C( 63), UINT64_MAX }, { UINT64_C( 7371871036122383575), INT64_C( 63), UINT64_MAX }, { UINT64_C( 822863165501813659), -INT64_C( 4), UINT64_C( 51428947843863353) }, { UINT64_C( 4293105166), INT64_C( 51), UINT64_MAX }, { UINT64_C( 5056019599), INT64_C( 30), UINT64_C( 5428859706410008576) }, { UINT64_C( 168829403185), -INT64_C( 5), UINT64_C( 5275918849) }, { UINT64_C( 12247585093), INT64_C( 48), UINT64_MAX }, { UINT64_C(15004996209508217752), INT64_C( 38), UINT64_MAX } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint64_t r = simde_vqshld_u64(test_vec[i].a, test_vec[i].b); simde_assert_equal_u64(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint64_t a = simde_test_codegen_random_u64() >> (simde_test_codegen_random_u64() & 31); int64_t b = simde_test_codegen_random_i64() % 64; uint64_t r = simde_vqshld_u64(a, b); simde_test_codegen_write_u64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_u64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqshl_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t b[8]; int8_t r[8]; } test_vec[] = { { { INT8_C( 65), -INT8_C( 6), -INT8_C( 64), -INT8_C( 21), INT8_C( 1), -INT8_C( 115), -INT8_C( 1), INT8_C( 31) }, { INT8_C( 1), INT8_C( 14), INT8_C( 0), INT8_C( 6), INT8_C( 8), INT8_C( 9), INT8_C( 14), INT8_C( 15) }, { INT8_MAX, INT8_MIN, -INT8_C( 64), INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MAX } }, { { INT8_C( 1), INT8_C( 2), -INT8_C( 4), -INT8_C( 1), INT8_C( 1), INT8_C( 13), -INT8_C( 1), -INT8_C( 1) }, { INT8_C( 15), INT8_C( 8), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 0), INT8_C( 5), INT8_C( 7) }, { INT8_MAX, INT8_MAX, INT8_MIN, -INT8_C( 32), INT8_C( 32), INT8_C( 13), -INT8_C( 32), INT8_MIN } }, { { -INT8_C( 122), INT8_C( 29), -INT8_C( 1), INT8_C( 0), -INT8_C( 5), -INT8_C( 25), -INT8_C( 4), INT8_C( 57) }, { INT8_C( 6), INT8_C( 15), INT8_C( 7), INT8_C( 13), INT8_C( 8), INT8_C( 0), INT8_C( 5), INT8_C( 8) }, { INT8_MIN, INT8_MAX, INT8_MIN, INT8_C( 0), INT8_MIN, -INT8_C( 25), INT8_MIN, INT8_MAX } }, { { INT8_C( 2), INT8_C( 8), INT8_C( 95), -INT8_C( 4), -INT8_C( 5), -INT8_C( 2), -INT8_C( 3), INT8_C( 6) }, { INT8_C( 11), INT8_C( 6), INT8_C( 8), INT8_C( 10), INT8_C( 4), INT8_C( 13), INT8_C( 4), INT8_C( 11) }, { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN, -INT8_C( 80), INT8_MIN, -INT8_C( 48), INT8_MAX } }, { { INT8_C( 0), -INT8_C( 19), INT8_C( 0), INT8_C( 20), INT8_C( 0), -INT8_C( 53), -INT8_C( 3), INT8_C( 10) }, { INT8_C( 9), INT8_C( 10), INT8_C( 10), INT8_C( 6), INT8_C( 15), INT8_C( 7), INT8_C( 9), INT8_C( 10) }, { INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_MAX, INT8_C( 0), INT8_MIN, INT8_MIN, INT8_MAX } }, { { -INT8_C( 1), -INT8_C( 2), INT8_C( 4), -INT8_C( 6), INT8_C( 2), -INT8_C( 3), INT8_C( 0), INT8_C( 30) }, { INT8_C( 8), INT8_C( 6), INT8_C( 15), INT8_C( 5), INT8_C( 13), INT8_C( 11), INT8_C( 9), INT8_C( 6) }, { INT8_MIN, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, INT8_C( 0), INT8_MAX } }, { { -INT8_C( 6), -INT8_C( 1), -INT8_C( 3), INT8_C( 86), INT8_C( 19), INT8_C( 1), -INT8_C( 1), INT8_C( 49) }, { INT8_C( 15), INT8_C( 2), INT8_C( 1), INT8_C( 3), INT8_C( 15), INT8_C( 5), INT8_C( 0), INT8_C( 7) }, { INT8_MIN, -INT8_C( 4), -INT8_C( 6), INT8_MAX, INT8_MAX, INT8_C( 32), -INT8_C( 1), INT8_MAX } }, { { INT8_C( 10), INT8_C( 0), -INT8_C( 1), INT8_C( 117), -INT8_C( 3), -INT8_C( 1), -INT8_C( 1), -INT8_C( 121) }, { INT8_C( 7), INT8_C( 11), INT8_C( 13), INT8_C( 4), INT8_C( 4), INT8_C( 14), INT8_C( 6), INT8_C( 4) }, { INT8_MAX, INT8_C( 0), INT8_MIN, INT8_MAX, -INT8_C( 48), INT8_MIN, -INT8_C( 64), INT8_MIN } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vqshl_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); a = simde_vshl_s8(a, simde_vneg_s8(simde_vand_s8(simde_test_arm_neon_random_i8x8(), simde_vdup_n_s8(7)))); b = simde_vand_s8(b, simde_vdup_n_s8(15)); simde_int8x8_t r = simde_vqshl_s8(a, b); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqshl_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { { { INT16_C( 629), -INT16_C( 1930), -INT16_C( 203), -INT16_C( 57) }, { INT16_C( 24), INT16_C( 25), INT16_C( 9), INT16_C( 19) }, { INT16_MAX, INT16_MIN, INT16_MIN, INT16_MIN } }, { { INT16_C( 341), -INT16_C( 2), -INT16_C( 51), INT16_C( 320) }, { INT16_C( 15), INT16_C( 9), INT16_C( 26), INT16_C( 24) }, { INT16_MAX, -INT16_C( 1024), INT16_MIN, INT16_MAX } }, { { -INT16_C( 9), -INT16_C( 1567), -INT16_C( 31654), -INT16_C( 66) }, { INT16_C( 25), INT16_C( 13), INT16_C( 19), INT16_C( 0) }, { INT16_MIN, INT16_MIN, INT16_MIN, -INT16_C( 66) } }, { { -INT16_C( 3881), INT16_C( 1242), INT16_C( 43), INT16_C( 24) }, { INT16_C( 3), INT16_C( 31), INT16_C( 2), INT16_C( 27) }, { -INT16_C( 31048), INT16_MAX, INT16_C( 172), INT16_MAX } }, { { INT16_C( 15), INT16_C( 991), -INT16_C( 4), -INT16_C( 31) }, { INT16_C( 23), INT16_C( 10), INT16_C( 20), INT16_C( 16) }, { INT16_MAX, INT16_MAX, INT16_MIN, INT16_MIN } }, { { INT16_C( 38), -INT16_C( 758), INT16_C( 1), INT16_C( 1012) }, { INT16_C( 9), INT16_C( 27), INT16_C( 30), INT16_C( 22) }, { INT16_C( 19456), INT16_MIN, INT16_MAX, INT16_MAX } }, { { INT16_C( 974), INT16_C( 63), -INT16_C( 79), INT16_C( 8) }, { INT16_C( 30), INT16_C( 3), INT16_C( 14), INT16_C( 29) }, { INT16_MAX, INT16_C( 504), INT16_MIN, INT16_MAX } }, { { -INT16_C( 97), INT16_C( 7), INT16_C( 935), -INT16_C( 2) }, { INT16_C( 13), INT16_C( 9), INT16_C( 30), INT16_C( 25) }, { INT16_MIN, INT16_C( 3584), INT16_MAX, INT16_MIN } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_vqshl_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); a = simde_vshl_s16(a, simde_vneg_s16(simde_vand_s16(simde_test_arm_neon_random_i16x4(), simde_vdup_n_s16(15)))); b = simde_vand_s16(b, simde_vdup_n_s16(31)); simde_int16x4_t r = simde_vqshl_s16(a, b); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqshl_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { { { -INT32_C( 1173271), INT32_C( 7299919) }, { INT32_C( 20), INT32_C( 51) }, { INT32_MIN, INT32_MAX } }, { { -INT32_C( 90947158), -INT32_C( 2365082) }, { INT32_C( 54), INT32_C( 18) }, { INT32_MIN, INT32_MIN } }, { { INT32_C( 4265), INT32_C( 2) }, { INT32_C( 32), INT32_C( 15) }, { INT32_MAX, INT32_C( 65536) } }, { { INT32_C( 2841512), -INT32_C( 1958) }, { INT32_C( 44), INT32_C( 14) }, { INT32_MAX, -INT32_C( 32079872) } }, { { -INT32_C( 97), INT32_C( 128986388) }, { INT32_C( 4), INT32_C( 47) }, { -INT32_C( 1552), INT32_MAX } }, { { -INT32_C( 401488194), -INT32_C( 2750) }, { INT32_C( 51), INT32_C( 19) }, { INT32_MIN, -INT32_C( 1441792000) } }, { { -INT32_C( 36123), INT32_C( 9) }, { INT32_C( 20), INT32_C( 39) }, { INT32_MIN, INT32_MAX } }, { { INT32_C( 41), -INT32_C( 14) }, { INT32_C( 55), INT32_C( 15) }, { INT32_MAX, -INT32_C( 458752) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_vqshl_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); a = simde_vshl_s32(a, simde_vneg_s32(simde_vand_s32(simde_test_arm_neon_random_i32x2(), simde_vdup_n_s32(31)))); b = simde_vand_s32(b, simde_vdup_n_s32(63)); simde_int32x2_t r = simde_vqshl_s32(a, b); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqshl_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[1]; int64_t b[1]; int64_t r[1]; } test_vec[] = { { { -INT64_C( 1420889861288) }, { INT64_C( 36) }, { INT64_MIN } }, { { INT64_C( 23751618503999) }, { INT64_C( 10) }, { INT64_C( 24321657348094976) } }, { { INT64_C( 58288625249) }, { INT64_C( 19) }, { INT64_C( 30560026754547712) } }, { { INT64_C( 104685087965022) }, { INT64_C( 23) }, { INT64_MAX } }, { { INT64_C( 6277) }, { INT64_C( 11) }, { INT64_C( 12855296) } }, { { INT64_C( 412853) }, { INT64_C( 32) }, { INT64_C( 1773190133055488) } }, { { -INT64_C( 3675864865379130) }, { INT64_C( 30) }, { INT64_MIN } }, { { INT64_C( 1096678395763767) }, { INT64_C( 60) }, { INT64_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_int64x1_t r = simde_vqshl_s64(a, b); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_t a = simde_test_arm_neon_random_i64x1(); simde_int64x1_t b = simde_test_arm_neon_random_i64x1(); a = simde_vshl_s64(a, simde_vneg_s64(simde_vand_s64(simde_test_arm_neon_random_i64x1(), simde_vdup_n_s64(63)))); b = simde_vand_s64(b, simde_vdup_n_s64(63)); simde_int64x1_t r = simde_vqshl_s64(a, b); simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqshl_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; int8_t b[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C( 2), UINT8_C( 79), UINT8_C( 19), UINT8_C(202), UINT8_C( 12), UINT8_C( 46), UINT8_C( 3), UINT8_C( 74) }, { INT8_C( 5), INT8_C( 0), INT8_C( 12), INT8_C( 0), INT8_C( 4), INT8_C( 8), INT8_C( 2), INT8_C( 8) }, { UINT8_C( 64), UINT8_C( 79), UINT8_MAX, UINT8_C(202), UINT8_C(192), UINT8_MAX, UINT8_C( 12), UINT8_MAX } }, { { UINT8_C(147), UINT8_C( 12), UINT8_C( 6), UINT8_C( 13), UINT8_C( 3), UINT8_C( 0), UINT8_C( 9), UINT8_C( 59) }, { INT8_C( 4), INT8_C( 3), INT8_C( 0), INT8_C( 11), INT8_C( 15), INT8_C( 8), INT8_C( 0), INT8_C( 4) }, { UINT8_MAX, UINT8_C( 96), UINT8_C( 6), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 9), UINT8_MAX } }, { { UINT8_C(104), UINT8_C(102), UINT8_C( 22), UINT8_C( 36), UINT8_C( 57), UINT8_C( 23), UINT8_C( 12), UINT8_C( 30) }, { INT8_C( 13), INT8_C( 14), INT8_C( 14), INT8_C( 2), INT8_C( 4), INT8_C( 14), INT8_C( 9), INT8_C( 8) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C(144), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C( 0), UINT8_C( 2), UINT8_C( 20), UINT8_C( 14), UINT8_C( 14), UINT8_C( 6), UINT8_C( 26), UINT8_C( 5) }, { INT8_C( 8), INT8_C( 5), INT8_C( 3), INT8_C( 14), INT8_C( 3), INT8_C( 8), INT8_C( 7), INT8_C( 0) }, { UINT8_C( 0), UINT8_C( 64), UINT8_C(160), UINT8_MAX, UINT8_C(112), UINT8_MAX, UINT8_MAX, UINT8_C( 5) } }, { { UINT8_C( 0), UINT8_C( 57), UINT8_C( 4), UINT8_C( 12), UINT8_C( 21), UINT8_C( 27), UINT8_C( 2), UINT8_C( 58) }, { INT8_C( 1), INT8_C( 9), INT8_C( 13), INT8_C( 0), INT8_C( 4), INT8_C( 2), INT8_C( 15), INT8_C( 13) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 12), UINT8_MAX, UINT8_C(108), UINT8_MAX, UINT8_MAX } }, { { UINT8_C( 62), UINT8_C( 47), UINT8_C( 43), UINT8_C( 1), UINT8_C( 5), UINT8_C(120), UINT8_C( 4), UINT8_C( 14) }, { INT8_C( 7), INT8_C( 7), INT8_C( 7), INT8_C( 2), INT8_C( 0), INT8_C( 9), INT8_C( 12), INT8_C( 1) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 4), UINT8_C( 5), UINT8_MAX, UINT8_MAX, UINT8_C( 28) } }, { { UINT8_C( 26), UINT8_C( 39), UINT8_C(174), UINT8_C(111), UINT8_C( 22), UINT8_C( 7), UINT8_C( 12), UINT8_C( 2) }, { INT8_C( 8), INT8_C( 14), INT8_C( 8), INT8_C( 2), INT8_C( 14), INT8_C( 10), INT8_C( 2), INT8_C( 6) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 48), UINT8_C(128) } }, { { UINT8_C(141), UINT8_C( 0), UINT8_C( 2), UINT8_C( 1), UINT8_C( 43), UINT8_C(193), UINT8_C( 1), UINT8_C( 17) }, { INT8_C( 0), INT8_C( 10), INT8_C( 9), INT8_C( 3), INT8_C( 3), INT8_C( 10), INT8_C( 14), INT8_C( 11) }, { UINT8_C(141), UINT8_C( 0), UINT8_MAX, UINT8_C( 8), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_uint8x8_t r = simde_vqshl_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); a = simde_vshl_u8(a, simde_vneg_s8(simde_vand_s8(simde_test_arm_neon_random_i8x8(), simde_vdup_n_s8(7)))); b = simde_vand_s8(b, simde_vdup_n_s8(15)); simde_uint8x8_t r = simde_vqshl_u8(a, b); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqshl_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; int16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C( 1105), UINT16_C( 11), UINT16_C( 3430), UINT16_C( 11) }, { INT16_C( 1), INT16_C( 26), INT16_C( 18), INT16_C( 5) }, { UINT16_C( 2210), UINT16_MAX, UINT16_MAX, UINT16_C( 352) } }, { { UINT16_C(36784), UINT16_C( 3175), UINT16_C( 172), UINT16_C( 1338) }, { INT16_C( 17), INT16_C( 4), INT16_C( 12), INT16_C( 18) }, { UINT16_MAX, UINT16_C(50800), UINT16_MAX, UINT16_MAX } }, { { UINT16_C( 1559), UINT16_C( 2688), UINT16_C(53249), UINT16_C( 2) }, { INT16_C( 31), INT16_C( 23), INT16_C( 25), INT16_C( 11) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 4096) } }, { { UINT16_C( 2352), UINT16_C(14028), UINT16_C( 3054), UINT16_C( 155) }, { INT16_C( 24), INT16_C( 2), INT16_C( 11), INT16_C( 11) }, { UINT16_MAX, UINT16_C(56112), UINT16_MAX, UINT16_MAX } }, { { UINT16_C( 3439), UINT16_C( 62), UINT16_C( 20), UINT16_C( 34) }, { INT16_C( 23), INT16_C( 31), INT16_C( 7), INT16_C( 1) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 2560), UINT16_C( 68) } }, { { UINT16_C( 2), UINT16_C(10879), UINT16_C( 20), UINT16_C( 3359) }, { INT16_C( 15), INT16_C( 16), INT16_C( 23), INT16_C( 20) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C( 3984), UINT16_C( 4191), UINT16_C( 67), UINT16_C( 84) }, { INT16_C( 17), INT16_C( 18), INT16_C( 3), INT16_C( 29) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 536), UINT16_MAX } }, { { UINT16_C( 1), UINT16_C( 1767), UINT16_C( 4978), UINT16_C( 3035) }, { INT16_C( 23), INT16_C( 26), INT16_C( 25), INT16_C( 1) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 6070) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_uint16x4_t r = simde_vqshl_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); a = simde_vshl_u16(a, simde_vneg_s16(simde_vand_s16(simde_test_arm_neon_random_i16x4(), simde_vdup_n_s16(15)))); b = simde_vand_s16(b, simde_vdup_n_s16(31)); simde_uint16x4_t r = simde_vqshl_u16(a, b); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqshl_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; int32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C( 11), UINT32_C( 2456) }, { INT32_C( 55), INT32_C( 31) }, { UINT32_MAX, UINT32_MAX } }, { { UINT32_C( 146134516), UINT32_C( 28278) }, { INT32_C( 20), INT32_C( 24) }, { UINT32_MAX, UINT32_MAX } }, { { UINT32_C( 97), UINT32_C( 436) }, { INT32_C( 9), INT32_C( 63) }, { UINT32_C( 49664), UINT32_MAX } }, { { UINT32_C( 25335), UINT32_C( 9) }, { INT32_C( 31), INT32_C( 46) }, { UINT32_MAX, UINT32_MAX } }, { { UINT32_C( 603), UINT32_C( 113827) }, { INT32_C( 63), INT32_C( 39) }, { UINT32_MAX, UINT32_MAX } }, { { UINT32_C( 14), UINT32_C( 1635361) }, { INT32_C( 60), INT32_C( 62) }, { UINT32_MAX, UINT32_MAX } }, { { UINT32_C( 3044), UINT32_C( 1889936) }, { INT32_C( 31), INT32_C( 42) }, { UINT32_MAX, UINT32_MAX } }, { { UINT32_C( 11315), UINT32_C( 2830620) }, { INT32_C( 20), INT32_C( 31) }, { UINT32_MAX, UINT32_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_uint32x2_t r = simde_vqshl_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); a = simde_vshl_u32(a, simde_vneg_s32(simde_vand_s32(simde_test_arm_neon_random_i32x2(), simde_vdup_n_s32(31)))); b = simde_vand_s32(b, simde_vdup_n_s32(63)); simde_uint32x2_t r = simde_vqshl_u32(a, b); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqshl_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[1]; int64_t b[1]; uint64_t r[1]; } test_vec[] = { { { UINT64_C( 11758907) }, { INT64_C( 53) }, { UINT64_MAX } }, { { UINT64_C( 201) }, { INT64_C( 39) }, { UINT64_C( 110500918591488) } }, { { UINT64_C( 10353) }, { INT64_C( 60) }, { UINT64_MAX } }, { { UINT64_C( 16865279727) }, { INT64_C( 54) }, { UINT64_MAX } }, { { UINT64_C( 298491154210) }, { INT64_C( 26) }, { UINT64_MAX } }, { { UINT64_C( 45) }, { INT64_C( 59) }, { UINT64_MAX } }, { { UINT64_C( 158) }, { INT64_C( 54) }, { UINT64_C( 2846274964498153472) } }, { { UINT64_C( 415649) }, { INT64_C( 33) }, { UINT64_C( 3570397723230208) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_uint64x1_t r = simde_vqshl_u64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x1_t a = simde_test_arm_neon_random_u64x1(); simde_int64x1_t b = simde_test_arm_neon_random_i64x1(); a = simde_vshl_u64(a, simde_vneg_s64(simde_vand_s64(simde_test_arm_neon_random_i64x1(), simde_vdup_n_s64(63)))); b = simde_vand_s64(b, simde_vdup_n_s64(63)); simde_uint64x1_t r = simde_vqshl_u64(a, b); simde_test_arm_neon_write_u64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqshlq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t b[16]; int8_t r[16]; } test_vec[] = { { { -INT8_C( 1), INT8_C( 0), -INT8_C( 3), INT8_C( 0), INT8_C( 5), -INT8_C( 2), INT8_C( 41), INT8_C( 0), INT8_C( 45), INT8_C( 6), -INT8_C( 39), -INT8_C( 7), -INT8_C( 23), -INT8_C( 59), -INT8_C( 8), -INT8_C( 1) }, { INT8_C( 5), INT8_C( 7), INT8_C( 6), INT8_C( 1), INT8_C( 7), INT8_C( 14), INT8_C( 8), INT8_C( 11), INT8_C( 14), INT8_C( 0), INT8_C( 2), INT8_C( 1), INT8_C( 7), INT8_C( 12), INT8_C( 10), INT8_C( 1) }, { -INT8_C( 32), INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_MAX, INT8_MIN, INT8_MAX, INT8_C( 0), INT8_MAX, INT8_C( 6), INT8_MIN, -INT8_C( 14), INT8_MIN, INT8_MIN, INT8_MIN, -INT8_C( 2) } }, { { -INT8_C( 17), -INT8_C( 16), INT8_C( 9), INT8_C( 27), INT8_C( 6), INT8_C( 1), INT8_C( 49), -INT8_C( 11), INT8_C( 2), INT8_C( 58), -INT8_C( 16), INT8_C( 15), -INT8_C( 4), -INT8_C( 1), -INT8_C( 8), INT8_C( 5) }, { INT8_C( 7), INT8_C( 5), INT8_C( 15), INT8_C( 3), INT8_C( 4), INT8_C( 15), INT8_C( 11), INT8_C( 5), INT8_C( 3), INT8_C( 4), INT8_C( 9), INT8_C( 13), INT8_C( 13), INT8_C( 6), INT8_C( 11), INT8_C( 12) }, { INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_C( 96), INT8_MAX, INT8_MAX, INT8_MIN, INT8_C( 16), INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, -INT8_C( 64), INT8_MIN, INT8_MAX } }, { { -INT8_C( 2), -INT8_C( 1), INT8_C( 11), -INT8_C( 3), INT8_C( 0), INT8_C( 1), -INT8_C( 30), INT8_C( 2), INT8_C( 19), INT8_C( 3), INT8_C( 17), INT8_C( 6), -INT8_C( 26), -INT8_C( 26), INT8_C( 1), -INT8_C( 32) }, { INT8_C( 8), INT8_C( 11), INT8_C( 12), INT8_C( 11), INT8_C( 1), INT8_C( 5), INT8_C( 7), INT8_C( 12), INT8_C( 14), INT8_C( 2), INT8_C( 7), INT8_C( 2), INT8_C( 13), INT8_C( 10), INT8_C( 14), INT8_C( 14) }, { INT8_MIN, INT8_MIN, INT8_MAX, INT8_MIN, INT8_C( 0), INT8_C( 32), INT8_MIN, INT8_MAX, INT8_MAX, INT8_C( 12), INT8_MAX, INT8_C( 24), INT8_MIN, INT8_MIN, INT8_MAX, INT8_MIN } }, { { INT8_C( 4), INT8_C( 0), -INT8_C( 7), -INT8_C( 16), -INT8_C( 13), INT8_C( 0), INT8_C( 1), INT8_C( 19), -INT8_C( 1), INT8_C( 44), INT8_C( 1), -INT8_C( 9), -INT8_C( 77), -INT8_C( 49), -INT8_C( 11), -INT8_C( 1) }, { INT8_C( 11), INT8_C( 14), INT8_C( 12), INT8_C( 2), INT8_C( 12), INT8_C( 15), INT8_C( 8), INT8_C( 14), INT8_C( 3), INT8_C( 9), INT8_C( 10), INT8_C( 5), INT8_C( 2), INT8_C( 9), INT8_C( 0), INT8_C( 6) }, { INT8_MAX, INT8_C( 0), INT8_MIN, -INT8_C( 64), INT8_MIN, INT8_C( 0), INT8_MAX, INT8_MAX, -INT8_C( 8), INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN, -INT8_C( 11), -INT8_C( 64) } }, { { -INT8_C( 2), -INT8_C( 53), INT8_C( 1), -INT8_C( 14), -INT8_C( 1), INT8_C( 70), INT8_C( 49), -INT8_C( 2), INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 4), INT8_C( 1), INT8_C( 0), INT8_C( 1), -INT8_C( 43) }, { INT8_C( 4), INT8_C( 4), INT8_C( 13), INT8_C( 7), INT8_C( 2), INT8_C( 14), INT8_C( 9), INT8_C( 10), INT8_C( 15), INT8_C( 7), INT8_C( 4), INT8_C( 15), INT8_C( 1), INT8_C( 14), INT8_C( 11), INT8_C( 9) }, { -INT8_C( 32), INT8_MIN, INT8_MAX, INT8_MIN, -INT8_C( 4), INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, INT8_C( 0), INT8_MIN, INT8_C( 2), INT8_C( 0), INT8_MAX, INT8_MIN } }, { { -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 121), INT8_C( 2), -INT8_C( 121), INT8_C( 28), -INT8_C( 52), -INT8_C( 69), INT8_C( 7), INT8_C( 10), INT8_C( 1), -INT8_C( 1), -INT8_C( 93), INT8_C( 13) }, { INT8_C( 12), INT8_C( 0), INT8_C( 6), INT8_C( 11), INT8_C( 1), INT8_C( 8), INT8_C( 9), INT8_C( 15), INT8_C( 7), INT8_C( 8), INT8_C( 3), INT8_C( 5), INT8_C( 15), INT8_C( 2), INT8_C( 6), INT8_C( 9) }, { INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN, INT8_C( 56), INT8_MAX, INT8_MAX, -INT8_C( 4), INT8_MIN, INT8_MAX } }, { { -INT8_C( 2), INT8_C( 9), INT8_C( 14), -INT8_C( 11), INT8_C( 0), INT8_C( 31), INT8_C( 19), -INT8_C( 2), -INT8_C( 3), -INT8_C( 7), INT8_C( 10), INT8_C( 34), -INT8_C( 3), INT8_C( 24), -INT8_C( 7), INT8_C( 0) }, { INT8_C( 0), INT8_C( 5), INT8_C( 1), INT8_C( 1), INT8_C( 2), INT8_C( 1), INT8_C( 10), INT8_C( 11), INT8_C( 9), INT8_C( 14), INT8_C( 14), INT8_C( 5), INT8_C( 4), INT8_C( 14), INT8_C( 14), INT8_C( 11) }, { -INT8_C( 2), INT8_MAX, INT8_C( 28), -INT8_C( 22), INT8_C( 0), INT8_C( 62), INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, -INT8_C( 48), INT8_MAX, INT8_MIN, INT8_C( 0) } }, { { -INT8_C( 2), INT8_C( 0), INT8_C( 10), INT8_C( 40), INT8_C( 109), INT8_C( 0), -INT8_C( 4), -INT8_C( 25), -INT8_C( 41), INT8_C( 0), -INT8_C( 8), -INT8_C( 14), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 2) }, { INT8_C( 12), INT8_C( 3), INT8_C( 15), INT8_C( 12), INT8_C( 13), INT8_C( 8), INT8_C( 1), INT8_C( 2), INT8_C( 12), INT8_C( 12), INT8_C( 4), INT8_C( 2), INT8_C( 5), INT8_C( 15), INT8_C( 8), INT8_C( 3) }, { INT8_MIN, INT8_C( 0), INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 0), -INT8_C( 8), -INT8_C( 100), INT8_MIN, INT8_C( 0), INT8_MIN, -INT8_C( 56), -INT8_C( 32), INT8_MIN, INT8_MIN, INT8_C( 16) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r = simde_vqshlq_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); a = simde_vshlq_s8(a, simde_vnegq_s8(simde_vandq_s8(simde_test_arm_neon_random_i8x16(), simde_vdupq_n_s8(7)))); b = simde_vandq_s8(b, simde_vdupq_n_s8(15)); simde_int8x16_t r = simde_vqshlq_s8(a, b); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqshlq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { INT16_C( 0), INT16_C( 19), INT16_C( 25), INT16_C( 7), -INT16_C( 34), INT16_C( 659), -INT16_C( 161), -INT16_C( 1333) }, { INT16_C( 19), INT16_C( 10), INT16_C( 23), INT16_C( 29), INT16_C( 25), INT16_C( 18), INT16_C( 5), INT16_C( 9) }, { INT16_C( 0), INT16_C( 19456), INT16_MAX, INT16_MAX, INT16_MIN, INT16_MAX, -INT16_C( 5152), INT16_MIN } }, { { -INT16_C( 794), INT16_C( 7198), -INT16_C( 83), INT16_C( 687), -INT16_C( 1495), INT16_C( 477), -INT16_C( 111), -INT16_C( 6) }, { INT16_C( 1), INT16_C( 25), INT16_C( 15), INT16_C( 30), INT16_C( 12), INT16_C( 17), INT16_C( 8), INT16_C( 13) }, { -INT16_C( 1588), INT16_MAX, INT16_MIN, INT16_MAX, INT16_MIN, INT16_MAX, -INT16_C( 28416), INT16_MIN } }, { { -INT16_C( 1), INT16_C( 394), -INT16_C( 70), -INT16_C( 2), INT16_C( 14), -INT16_C( 4474), -INT16_C( 2), -INT16_C( 1) }, { INT16_C( 17), INT16_C( 20), INT16_C( 22), INT16_C( 15), INT16_C( 17), INT16_C( 12), INT16_C( 2), INT16_C( 25) }, { INT16_MIN, INT16_MAX, INT16_MIN, INT16_MIN, INT16_MAX, INT16_MIN, -INT16_C( 8), INT16_MIN } }, { { INT16_C( 4), INT16_C( 4), INT16_C( 1287), INT16_C( 368), -INT16_C( 73), INT16_C( 109), -INT16_C( 138), -INT16_C( 228) }, { INT16_C( 4), INT16_C( 23), INT16_C( 19), INT16_C( 3), INT16_C( 13), INT16_C( 23), INT16_C( 11), INT16_C( 26) }, { INT16_C( 64), INT16_MAX, INT16_MAX, INT16_C( 2944), INT16_MIN, INT16_MAX, INT16_MIN, INT16_MIN } }, { { -INT16_C( 197), INT16_C( 1656), -INT16_C( 1202), INT16_C( 12), INT16_C( 10), -INT16_C( 1877), -INT16_C( 6), INT16_C( 563) }, { INT16_C( 20), INT16_C( 24), INT16_C( 16), INT16_C( 17), INT16_C( 4), INT16_C( 17), INT16_C( 28), INT16_C( 14) }, { INT16_MIN, INT16_MAX, INT16_MIN, INT16_MAX, INT16_C( 160), INT16_MIN, INT16_MIN, INT16_MAX } }, { { -INT16_C( 5), -INT16_C( 2325), INT16_C( 1), -INT16_C( 8), INT16_C( 0), -INT16_C( 1092), -INT16_C( 446), -INT16_C( 49) }, { INT16_C( 18), INT16_C( 1), INT16_C( 13), INT16_C( 21), INT16_C( 17), INT16_C( 10), INT16_C( 22), INT16_C( 28) }, { INT16_MIN, -INT16_C( 4650), INT16_C( 8192), INT16_MIN, INT16_C( 0), INT16_MIN, INT16_MIN, INT16_MIN } }, { { INT16_C( 7), -INT16_C( 16), -INT16_C( 24061), -INT16_C( 685), INT16_C( 0), INT16_C( 6), -INT16_C( 779), INT16_C( 0) }, { INT16_C( 17), INT16_C( 12), INT16_C( 13), INT16_C( 4), INT16_C( 9), INT16_C( 29), INT16_C( 30), INT16_C( 1) }, { INT16_MAX, INT16_MIN, INT16_MIN, -INT16_C( 10960), INT16_C( 0), INT16_MAX, INT16_MIN, INT16_C( 0) } }, { { INT16_C( 23495), INT16_C( 0), INT16_C( 25), INT16_C( 27714), INT16_C( 1), INT16_C( 877), INT16_C( 3402), -INT16_C( 16431) }, { INT16_C( 24), INT16_C( 14), INT16_C( 4), INT16_C( 3), INT16_C( 25), INT16_C( 22), INT16_C( 17), INT16_C( 20) }, { INT16_MAX, INT16_C( 0), INT16_C( 400), INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MIN } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_vqshlq_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); a = simde_vshlq_s16(a, simde_vnegq_s16(simde_vandq_s16(simde_test_arm_neon_random_i16x8(), simde_vdupq_n_s16(15)))); b = simde_vandq_s16(b, simde_vdupq_n_s16(31)); simde_int16x8_t r = simde_vqshlq_s16(a, b); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqshlq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { -INT32_C( 339863), INT32_C( 968313), -INT32_C( 1235562459), -INT32_C( 9465) }, { INT32_C( 4), INT32_C( 44), INT32_C( 3), INT32_C( 15) }, { -INT32_C( 5437808), INT32_MAX, INT32_MIN, -INT32_C( 310149120) } }, { { -INT32_C( 4310), -INT32_C( 6), -INT32_C( 3997), INT32_C( 2046270) }, { INT32_C( 32), INT32_C( 42), INT32_C( 54), INT32_C( 54) }, { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MAX } }, { { INT32_C( 60), INT32_C( 0), -INT32_C( 2), -INT32_C( 2) }, { INT32_C( 41), INT32_C( 52), INT32_C( 39), INT32_C( 32) }, { INT32_MAX, INT32_C( 0), INT32_MIN, INT32_MIN } }, { { INT32_C( 118), INT32_C( 45659413), -INT32_C( 2), -INT32_C( 1935607) }, { INT32_C( 20), INT32_C( 0), INT32_C( 48), INT32_C( 60) }, { INT32_C( 123731968), INT32_C( 45659413), INT32_MIN, INT32_MIN } }, { { -INT32_C( 3), -INT32_C( 56), -INT32_C( 22048), INT32_C( 12517613) }, { INT32_C( 26), INT32_C( 32), INT32_C( 8), INT32_C( 1) }, { -INT32_C( 201326592), INT32_MIN, -INT32_C( 5644288), INT32_C( 25035226) } }, { { INT32_C( 482311), INT32_C( 1), INT32_C( 28143), -INT32_C( 1) }, { INT32_C( 17), INT32_C( 40), INT32_C( 5), INT32_C( 15) }, { INT32_MAX, INT32_MAX, INT32_C( 900576), -INT32_C( 32768) } }, { { -INT32_C( 12146667), -INT32_C( 412308), INT32_C( 58268), INT32_C( 51128) }, { INT32_C( 3), INT32_C( 1), INT32_C( 57), INT32_C( 61) }, { -INT32_C( 97173336), -INT32_C( 824616), INT32_MAX, INT32_MAX } }, { { -INT32_C( 49114), -INT32_C( 310905264), INT32_C( 0), -INT32_C( 19) }, { INT32_C( 30), INT32_C( 26), INT32_C( 63), INT32_C( 25) }, { INT32_MIN, INT32_MIN, INT32_C( 0), -INT32_C( 637534208) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_vqshlq_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); a = simde_vshlq_s32(a, simde_vnegq_s32(simde_vandq_s32(simde_test_arm_neon_random_i32x4(), simde_vdupq_n_s32(31)))); b = simde_vandq_s32(b, simde_vdupq_n_s32(63)); simde_int32x4_t r = simde_vqshlq_s32(a, b); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqshlq_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; int64_t b[2]; int64_t r[2]; } test_vec[] = { { { -INT64_C( 138420769706638201), -INT64_C( 30931) }, { INT64_C( 11), INT64_C( 55) }, { INT64_MIN, INT64_MIN } }, { { -INT64_C( 4812641786), INT64_C( 5565153) }, { INT64_C( 25), INT64_C( 14) }, { -INT64_C( 161485461548695552), INT64_C( 91179466752) } }, { { -INT64_C( 123), INT64_C( 318) }, { INT64_C( 34), INT64_C( 6) }, { -INT64_C( 2113123909632), INT64_C( 20352) } }, { { INT64_C( 516225737598936679), -INT64_C( 151207) }, { INT64_C( 61), INT64_C( 42) }, { INT64_MAX, -INT64_C( 665015418804502528) } }, { { -INT64_C( 1210783136), -INT64_C( 2) }, { INT64_C( 50), INT64_C( 38) }, { INT64_MIN, -INT64_C( 549755813888) } }, { { -INT64_C( 288097725918163), INT64_C( 38) }, { INT64_C( 56), INT64_C( 45) }, { INT64_MIN, INT64_C( 1337006139375616) } }, { { INT64_C( 7429992216965), INT64_C( 73287991613121) }, { INT64_C( 42), INT64_C( 15) }, { INT64_MAX, INT64_C( 2401500909178748928) } }, { { -INT64_C( 909897764488), INT64_C( 6277) }, { INT64_C( 58), INT64_C( 53) }, { INT64_MIN, INT64_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_int64x2_t r = simde_vqshlq_s64(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); simde_int64x2_t b = simde_test_arm_neon_random_i64x2(); a = simde_vshlq_s64(a, simde_vnegq_s64(simde_vandq_s64(simde_test_arm_neon_random_i64x2(), simde_vdupq_n_s64(63)))); b = simde_vandq_s64(b, simde_vdupq_n_s64(63)); simde_int64x2_t r = simde_vqshlq_s64(a, b); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqshlq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; int8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C( 1), UINT8_C( 3), UINT8_C( 1), UINT8_C( 19), UINT8_C( 19), UINT8_C( 0), UINT8_C( 15), UINT8_C( 8), UINT8_C( 32), UINT8_C( 9), UINT8_C( 3), UINT8_C( 10), UINT8_C( 3), UINT8_C( 26), UINT8_C( 30), UINT8_C( 75) }, { INT8_C( 11), INT8_C( 15), INT8_C( 9), INT8_C( 4), INT8_C( 10), INT8_C( 6), INT8_C( 8), INT8_C( 12), INT8_C( 2), INT8_C( 13), INT8_C( 13), INT8_C( 9), INT8_C( 8), INT8_C( 15), INT8_C( 10), INT8_C( 5) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C(128), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C( 5), UINT8_C( 26), UINT8_C( 4), UINT8_C( 6), UINT8_C(108), UINT8_C( 2), UINT8_C(232), UINT8_C( 4), UINT8_C( 2), UINT8_C( 24), UINT8_C( 1), UINT8_C( 1), UINT8_C( 98), UINT8_C( 0), UINT8_C( 2), UINT8_C( 4) }, { INT8_C( 4), INT8_C( 7), INT8_C( 14), INT8_C( 14), INT8_C( 15), INT8_C( 3), INT8_C( 9), INT8_C( 0), INT8_C( 15), INT8_C( 14), INT8_C( 2), INT8_C( 5), INT8_C( 0), INT8_C( 11), INT8_C( 6), INT8_C( 2) }, { UINT8_C( 80), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 16), UINT8_MAX, UINT8_C( 4), UINT8_MAX, UINT8_MAX, UINT8_C( 4), UINT8_C( 32), UINT8_C( 98), UINT8_C( 0), UINT8_C(128), UINT8_C( 16) } }, { { UINT8_C( 3), UINT8_C( 25), UINT8_C( 12), UINT8_C( 50), UINT8_C( 1), UINT8_C( 0), UINT8_C( 15), UINT8_C( 54), UINT8_C( 1), UINT8_C( 6), UINT8_C( 0), UINT8_C( 37), UINT8_C( 5), UINT8_C( 49), UINT8_C( 62), UINT8_C( 63) }, { INT8_C( 3), INT8_C( 10), INT8_C( 5), INT8_C( 12), INT8_C( 0), INT8_C( 5), INT8_C( 0), INT8_C( 10), INT8_C( 8), INT8_C( 7), INT8_C( 9), INT8_C( 10), INT8_C( 6), INT8_C( 4), INT8_C( 15), INT8_C( 12) }, { UINT8_C( 24), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 1), UINT8_C( 0), UINT8_C( 15), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C( 3), UINT8_C( 2), UINT8_C( 86), UINT8_C( 46), UINT8_C(101), UINT8_C( 28), UINT8_C( 3), UINT8_C( 14), UINT8_C(113), UINT8_C( 56), UINT8_C( 5), UINT8_C( 0), UINT8_C( 0), UINT8_C( 13), UINT8_C( 1), UINT8_C( 1) }, { INT8_C( 0), INT8_C( 7), INT8_C( 10), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 9), INT8_C( 14), INT8_C( 4), INT8_C( 14), INT8_C( 15), INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 1), INT8_C( 13) }, { UINT8_C( 3), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 13), UINT8_C( 2), UINT8_MAX } }, { { UINT8_C( 5), UINT8_C( 1), UINT8_MAX, UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 9), UINT8_C( 4), UINT8_C( 0), UINT8_C(111), UINT8_C( 3), UINT8_C( 2), UINT8_C( 7), UINT8_C( 24), UINT8_C( 3), UINT8_C( 1) }, { INT8_C( 5), INT8_C( 14), INT8_C( 7), INT8_C( 15), INT8_C( 14), INT8_C( 12), INT8_C( 11), INT8_C( 15), INT8_C( 15), INT8_C( 1), INT8_C( 5), INT8_C( 14), INT8_C( 12), INT8_C( 12), INT8_C( 14), INT8_C( 14) }, { UINT8_C(160), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C(222), UINT8_C( 96), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C( 54), UINT8_C( 2), UINT8_C( 25), UINT8_C(165), UINT8_C( 2), UINT8_C( 0), UINT8_C( 0), UINT8_C( 87), UINT8_C(178), UINT8_C( 3), UINT8_C( 3), UINT8_C( 1), UINT8_C(245), UINT8_C( 28), UINT8_C( 78), UINT8_C( 81) }, { INT8_C( 0), INT8_C( 13), INT8_C( 1), INT8_C( 6), INT8_C( 12), INT8_C( 4), INT8_C( 2), INT8_C( 11), INT8_C( 13), INT8_C( 6), INT8_C( 9), INT8_C( 15), INT8_C( 15), INT8_C( 13), INT8_C( 6), INT8_C( 5) }, { UINT8_C( 54), UINT8_MAX, UINT8_C( 50), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C(192), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C( 1), UINT8_C( 1), UINT8_C(123), UINT8_C( 5), UINT8_C( 5), UINT8_C( 44), UINT8_C( 8), UINT8_C( 5), UINT8_C( 40), UINT8_C( 5), UINT8_C( 2), UINT8_MAX, UINT8_C( 8), UINT8_C( 84), UINT8_C( 1), UINT8_C( 14) }, { INT8_C( 4), INT8_C( 15), INT8_C( 15), INT8_C( 2), INT8_C( 14), INT8_C( 6), INT8_C( 2), INT8_C( 6), INT8_C( 3), INT8_C( 0), INT8_C( 4), INT8_C( 4), INT8_C( 12), INT8_C( 5), INT8_C( 5), INT8_C( 4) }, { UINT8_C( 16), UINT8_MAX, UINT8_MAX, UINT8_C( 20), UINT8_MAX, UINT8_MAX, UINT8_C( 32), UINT8_MAX, UINT8_MAX, UINT8_C( 5), UINT8_C( 32), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 32), UINT8_C(224) } }, { { UINT8_C(232), UINT8_C( 1), UINT8_C( 12), UINT8_C( 1), UINT8_C( 25), UINT8_C( 4), UINT8_C( 3), UINT8_C(192), UINT8_C(106), UINT8_C( 88), UINT8_C( 66), UINT8_C( 0), UINT8_C( 29), UINT8_C( 0), UINT8_C( 7), UINT8_C( 0) }, { INT8_C( 5), INT8_C( 15), INT8_C( 11), INT8_C( 10), INT8_C( 8), INT8_C( 12), INT8_C( 0), INT8_C( 11), INT8_C( 0), INT8_C( 4), INT8_C( 3), INT8_C( 12), INT8_C( 13), INT8_C( 10), INT8_C( 12), INT8_C( 5) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 3), UINT8_MAX, UINT8_C(106), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_uint8x16_t r = simde_vqshlq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); a = simde_vshlq_u8(a, simde_vnegq_s8(simde_vandq_s8(simde_test_arm_neon_random_i8x16(), simde_vdupq_n_s8(7)))); b = simde_vandq_s8(b, simde_vdupq_n_s8(15)); simde_uint8x16_t r = simde_vqshlq_u8(a, b); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqshlq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; int16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C( 1010), UINT16_C( 0), UINT16_C( 90), UINT16_C( 211), UINT16_C( 1), UINT16_C( 139), UINT16_C( 44), UINT16_C( 1) }, { INT16_C( 17), INT16_C( 13), INT16_C( 7), INT16_C( 27), INT16_C( 4), INT16_C( 22), INT16_C( 23), INT16_C( 16) }, { UINT16_MAX, UINT16_C( 0), UINT16_C(11520), UINT16_MAX, UINT16_C( 16), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C( 6946), UINT16_C( 0), UINT16_C( 894), UINT16_C( 880), UINT16_C(14593), UINT16_C( 3540), UINT16_C( 482), UINT16_C( 2380) }, { INT16_C( 29), INT16_C( 9), INT16_C( 25), INT16_C( 24), INT16_C( 12), INT16_C( 6), INT16_C( 23), INT16_C( 18) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C( 93), UINT16_C( 221), UINT16_C( 20), UINT16_C( 2962), UINT16_C( 30), UINT16_C( 258), UINT16_C( 0), UINT16_C(13384) }, { INT16_C( 10), INT16_C( 31), INT16_C( 20), INT16_C( 18), INT16_C( 20), INT16_C( 22), INT16_C( 22), INT16_C( 30) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { UINT16_C( 4017), UINT16_C( 733), UINT16_C(33924), UINT16_C( 0), UINT16_C( 164), UINT16_C( 3541), UINT16_C(11363), UINT16_C( 6) }, { INT16_C( 9), INT16_C( 14), INT16_C( 29), INT16_C( 14), INT16_C( 8), INT16_C( 18), INT16_C( 29), INT16_C( 10) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C(41984), UINT16_MAX, UINT16_MAX, UINT16_C( 6144) } }, { { UINT16_C( 11), UINT16_C( 40), UINT16_C( 1), UINT16_C( 0), UINT16_C( 384), UINT16_C( 21), UINT16_C( 4), UINT16_C( 0) }, { INT16_C( 30), INT16_C( 25), INT16_C( 4), INT16_C( 5), INT16_C( 8), INT16_C( 3), INT16_C( 5), INT16_C( 21) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 16), UINT16_C( 0), UINT16_MAX, UINT16_C( 168), UINT16_C( 128), UINT16_C( 0) } }, { { UINT16_C( 29), UINT16_C(14721), UINT16_C(14975), UINT16_C( 2), UINT16_C( 29), UINT16_C( 1), UINT16_C( 1), UINT16_C(12488) }, { INT16_C( 23), INT16_C( 24), INT16_C( 6), INT16_C( 2), INT16_C( 8), INT16_C( 7), INT16_C( 26), INT16_C( 21) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 8), UINT16_C( 7424), UINT16_C( 128), UINT16_MAX, UINT16_MAX } }, { { UINT16_C( 993), UINT16_C( 3), UINT16_C( 15), UINT16_C( 265), UINT16_C( 80), UINT16_C( 331), UINT16_C( 2709), UINT16_C( 25) }, { INT16_C( 2), INT16_C( 5), INT16_C( 5), INT16_C( 26), INT16_C( 28), INT16_C( 30), INT16_C( 29), INT16_C( 17) }, { UINT16_C( 3972), UINT16_C( 96), UINT16_C( 480), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C( 2321), UINT16_C( 466), UINT16_C( 29), UINT16_C( 1146), UINT16_C( 930), UINT16_C( 1), UINT16_C( 2), UINT16_C( 39) }, { INT16_C( 25), INT16_C( 15), INT16_C( 18), INT16_C( 9), INT16_C( 17), INT16_C( 23), INT16_C( 5), INT16_C( 27) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 64), UINT16_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_uint16x8_t r = simde_vqshlq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); a = simde_vshlq_u16(a, simde_vnegq_s16(simde_vandq_s16(simde_test_arm_neon_random_i16x8(), simde_vdupq_n_s16(15)))); b = simde_vandq_s16(b, simde_vdupq_n_s16(31)); simde_uint16x8_t r = simde_vqshlq_u16(a, b); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqshlq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; int32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C( 24300), UINT32_C( 4), UINT32_C( 10), UINT32_C( 5267993) }, { INT32_C( 40), INT32_C( 55), INT32_C( 12), INT32_C( 4) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 40960), UINT32_C( 84287888) } }, { { UINT32_C( 508275), UINT32_C( 38365), UINT32_C( 77), UINT32_C(3970210525) }, { INT32_C( 19), INT32_C( 54), INT32_C( 49), INT32_C( 11) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { UINT32_C( 414773067), UINT32_C( 3), UINT32_C( 896), UINT32_C( 210) }, { INT32_C( 35), INT32_C( 19), INT32_C( 51), INT32_C( 7) }, { UINT32_MAX, UINT32_C( 1572864), UINT32_MAX, UINT32_C( 26880) } }, { { UINT32_C( 117690), UINT32_C( 89), UINT32_C( 6), UINT32_C( 38) }, { INT32_C( 23), INT32_C( 34), INT32_C( 45), INT32_C( 11) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_C( 77824) } }, { { UINT32_C( 72), UINT32_C( 10692582), UINT32_C( 0), UINT32_C( 68375885) }, { INT32_C( 24), INT32_C( 0), INT32_C( 59), INT32_C( 59) }, { UINT32_C(1207959552), UINT32_C( 10692582), UINT32_C( 0), UINT32_MAX } }, { { UINT32_C(3490692944), UINT32_C( 648397257), UINT32_C( 17661220), UINT32_C( 2) }, { INT32_C( 33), INT32_C( 11), INT32_C( 11), INT32_C( 17) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_C( 262144) } }, { { UINT32_C( 92097311), UINT32_C( 12173), UINT32_C( 0), UINT32_C( 176732640) }, { INT32_C( 29), INT32_C( 58), INT32_C( 58), INT32_C( 49) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { UINT32_C( 198186158), UINT32_C( 23), UINT32_C( 2), UINT32_C( 5784654) }, { INT32_C( 15), INT32_C( 2), INT32_C( 13), INT32_C( 0) }, { UINT32_MAX, UINT32_C( 92), UINT32_C( 16384), UINT32_C( 5784654) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_uint32x4_t r = simde_vqshlq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); a = simde_vshlq_u32(a, simde_vnegq_s32(simde_vandq_s32(simde_test_arm_neon_random_i32x4(), simde_vdupq_n_s32(31)))); b = simde_vandq_s32(b, simde_vdupq_n_s32(63)); simde_uint32x4_t r = simde_vqshlq_u32(a, b); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqshlq_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[2]; int64_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C( 14), UINT64_C( 6558353326851752741) }, { INT64_C( 6), INT64_C( 29) }, { UINT64_C( 896), UINT64_MAX } }, { { UINT64_C( 12353627805118795), UINT64_C( 473443966) }, { INT64_C( 46), INT64_C( 58) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C( 901), UINT64_C( 1704486418) }, { INT64_C( 36), INT64_C( 3) }, { UINT64_C( 61916248539136), UINT64_C( 13635891344) } }, { { UINT64_C( 21), UINT64_C( 17) }, { INT64_C( 15), INT64_C( 62) }, { UINT64_C( 688128), UINT64_MAX } }, { { UINT64_C( 66472473494), UINT64_C( 47300183899) }, { INT64_C( 58), INT64_C( 42) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C( 227152248), UINT64_C( 162342530) }, { INT64_C( 60), INT64_C( 43) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C( 33185), UINT64_C( 31991553854) }, { INT64_C( 31), INT64_C( 48) }, { UINT64_C( 71264244858880), UINT64_MAX } }, { { UINT64_C( 4061564034), UINT64_C( 70125565431909990) }, { INT64_C( 56), INT64_C( 24) }, { UINT64_MAX, UINT64_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_uint64x2_t r = simde_vqshlq_u64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); simde_int64x2_t b = simde_test_arm_neon_random_i64x2(); a = simde_vshlq_u64(a, simde_vnegq_s64(simde_vandq_s64(simde_test_arm_neon_random_i64x2(), simde_vdupq_n_s64(63)))); b = simde_vandq_s64(b, simde_vdupq_n_s64(63)); simde_uint64x2_t r = simde_vqshlq_u64(a, b); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vqshlb_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqshlh_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqshls_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vqshld_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vqshlb_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vqshlh_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vqshls_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vqshld_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vqshl_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqshl_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqshl_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vqshl_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vqshl_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vqshl_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vqshl_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vqshl_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vqshlq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqshlq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqshlq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vqshlq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vqshlq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vqshlq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vqshlq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vqshlq_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/qsub.c000066400000000000000000002120631400333146700164410ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN qsub #include "test-neon.h" #include "../../../simde/arm/neon/qsub.h" static int test_simde_vqsubb_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a; int8_t b; int8_t r; } test_vec[] = { { -INT8_C( 66), INT8_C( 22), -INT8_C( 88) }, { -INT8_C( 36), -INT8_C( 62), INT8_C( 26) }, { -INT8_C( 75), -INT8_C( 8), -INT8_C( 67) }, { -INT8_C( 35), -INT8_C( 127), INT8_C( 92) }, { INT8_C( 98), -INT8_C( 121), INT8_MAX }, { -INT8_C( 118), -INT8_C( 45), -INT8_C( 73) }, { INT8_C( 39), -INT8_C( 30), INT8_C( 69) }, { INT8_C( 106), INT8_C( 114), -INT8_C( 8) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int8_t r = simde_vqsubb_s8(test_vec[i].a, test_vec[i].b); simde_assert_equal_i8(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int8_t a = simde_test_codegen_random_i8(); int8_t b = simde_test_codegen_random_i8(); int8_t r = simde_vqsubb_s8(a, b); simde_test_codegen_write_i8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqsubh_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a; int16_t b; int16_t r; } test_vec[] = { { INT16_C( 8855), INT16_C( 23221), -INT16_C( 14366) }, { -INT16_C( 31415), INT16_C( 22921), INT16_MIN }, { INT16_C( 21653), -INT16_C( 22868), INT16_MAX }, { INT16_C( 29215), -INT16_C( 5971), INT16_MAX }, { -INT16_C( 29041), INT16_C( 12520), INT16_MIN }, { INT16_C( 15481), -INT16_C( 24981), INT16_MAX }, { INT16_C( 16324), INT16_C( 12426), INT16_C( 3898) }, { -INT16_C( 19369), -INT16_C( 4418), -INT16_C( 14951) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int16_t r = simde_vqsubh_s16(test_vec[i].a, test_vec[i].b); simde_assert_equal_i16(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int16_t a = simde_test_codegen_random_i16(); int16_t b = simde_test_codegen_random_i16(); int16_t r = simde_vqsubh_s16(a, b); simde_test_codegen_write_i16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqsubs_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a; int32_t b; int32_t r; } test_vec[] = { { INT32_C( 541619415), -INT32_C( 1904553479), INT32_MAX }, { INT32_C( 1144333861), INT32_C( 657318552), INT32_C( 487015309) }, { -INT32_C( 380168848), INT32_C( 361218641), -INT32_C( 741387489) }, { INT32_C( 1497698561), -INT32_C( 1656290106), INT32_MAX }, { INT32_C( 1908248696), -INT32_C( 2013317279), INT32_MAX }, { -INT32_C( 171232163), -INT32_C( 2028079082), INT32_C( 1856846919) }, { INT32_C( 1584428045), INT32_C( 947124279), INT32_C( 637303766) }, { -INT32_C( 812533495), INT32_C( 896326077), -INT32_C( 1708859572) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int32_t r = simde_vqsubs_s32(test_vec[i].a, test_vec[i].b); simde_assert_equal_i32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int32_t a = simde_test_codegen_random_i32(); int32_t b = simde_test_codegen_random_i32(); int32_t r = simde_vqsubs_s32(a, b); simde_test_codegen_write_i32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqsubd_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a; int64_t b; int64_t r; } test_vec[] = { { -INT64_C( 4660762299921454487), INT64_C( 2555741205718900186), -INT64_C( 7216503505640354673) }, { -INT64_C( 1606107347560568762), -INT64_C( 8529776873063233618), INT64_C( 6923669525502664856) }, { -INT64_C( 4219690419135232433), INT64_C( 4393874924657583298), -INT64_C( 8613565343792815731) }, { INT64_C( 2296675770715175957), INT64_C( 963254365123680437), INT64_C( 1333421405591495520) }, { INT64_C( 1810779477915906672), INT64_C( 1910910545548072786), -INT64_C( 100131067632166114) }, { INT64_C( 7059373367053204839), INT64_C( 1968519074954051974), INT64_C( 5090854292099152865) }, { -INT64_C( 8090831914640470201), -INT64_C( 392555804137838482), -INT64_C( 7698276110502631719) }, { INT64_C( 1272207019781299354), -INT64_C( 4967287367727194550), INT64_C( 6239494387508493904) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int64_t r = simde_vqsubd_s64(test_vec[i].a, test_vec[i].b); simde_assert_equal_i64(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int64_t a = simde_test_codegen_random_i64(); int64_t b = simde_test_codegen_random_i64(); int64_t r = simde_vqsubd_s64(a, b); simde_test_codegen_write_i64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_i64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqsubb_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a; uint8_t b; uint8_t r; } test_vec[] = { { UINT8_C(181), UINT8_C(134), UINT8_C( 47) }, { UINT8_C( 90), UINT8_C(242), UINT8_C( 0) }, { UINT8_C( 30), UINT8_C( 17), UINT8_C( 13) }, { UINT8_C(130), UINT8_C(140), UINT8_C( 0) }, { UINT8_C(185), UINT8_C( 90), UINT8_C( 95) }, { UINT8_MAX, UINT8_C( 76), UINT8_C(179) }, { UINT8_C(182), UINT8_C(140), UINT8_C( 42) }, { UINT8_C( 70), UINT8_C( 80), UINT8_C( 0) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint8_t r = simde_vqsubb_u8(test_vec[i].a, test_vec[i].b); simde_assert_equal_u8(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint8_t a = simde_test_codegen_random_u8(); uint8_t b = simde_test_codegen_random_u8(); uint8_t r = simde_vqsubb_u8(a, b); simde_test_codegen_write_u8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_u8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqsubh_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a; uint16_t b; uint16_t r; } test_vec[] = { { UINT16_C( 6185), UINT16_C(46230), UINT16_C( 0) }, { UINT16_C(16097), UINT16_C(11205), UINT16_C( 4892) }, { UINT16_C( 6940), UINT16_C(36896), UINT16_C( 0) }, { UINT16_C(12481), UINT16_C(30540), UINT16_C( 0) }, { UINT16_C(42679), UINT16_C(54633), UINT16_C( 0) }, { UINT16_C(60343), UINT16_C(29025), UINT16_C(31318) }, { UINT16_C(24901), UINT16_C(64701), UINT16_C( 0) }, { UINT16_C( 1261), UINT16_C( 5708), UINT16_C( 0) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint16_t r = simde_vqsubh_u16(test_vec[i].a, test_vec[i].b); simde_assert_equal_u16(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint16_t a = simde_test_codegen_random_u16(); uint16_t b = simde_test_codegen_random_u16(); uint16_t r = simde_vqsubh_u16(a, b); simde_test_codegen_write_u16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_u16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqsubs_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a; uint32_t b; uint32_t r; } test_vec[] = { { UINT32_C(4274709276), UINT32_C(1026134049), UINT32_C(3248575227) }, { UINT32_C(1825458859), UINT32_C( 836967034), UINT32_C( 988491825) }, { UINT32_C(1996901824), UINT32_C(2129160248), UINT32_C( 0) }, { UINT32_C(3061491401), UINT32_C(3335374506), UINT32_C( 0) }, { UINT32_C(3401881513), UINT32_C(3523800615), UINT32_C( 0) }, { UINT32_C(2990528056), UINT32_C(2967741168), UINT32_C( 22786888) }, { UINT32_C(2821188207), UINT32_C( 455479378), UINT32_C(2365708829) }, { UINT32_C(1624416438), UINT32_C( 270966630), UINT32_C(1353449808) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint32_t r = simde_vqsubs_u32(test_vec[i].a, test_vec[i].b); simde_assert_equal_u32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint32_t a = simde_test_codegen_random_u32(); uint32_t b = simde_test_codegen_random_u32(); uint32_t r = simde_vqsubs_u32(a, b); simde_test_codegen_write_u32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_u32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqsubd_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a; uint64_t b; uint64_t r; } test_vec[] = { { UINT64_C( 1238739320754072374), UINT64_C( 97011640326909880), UINT64_C( 1141727680427162494) }, { UINT64_C( 5044259803873509522), UINT64_C(12098582238282044015), UINT64_C( 0) }, { UINT64_C( 6602618604825723063), UINT64_C(11401245139197519269), UINT64_C( 0) }, { UINT64_C( 2428147951454118620), UINT64_C(15883594615127234645), UINT64_C( 0) }, { UINT64_C( 5664418551435195294), UINT64_C(15645153251035619443), UINT64_C( 0) }, { UINT64_C( 8752174285764075937), UINT64_C(16740633300212608081), UINT64_C( 0) }, { UINT64_C( 4690435028858552096), UINT64_C(18319764071651581286), UINT64_C( 0) }, { UINT64_C(14708693142388966017), UINT64_C( 3199125891678625587), UINT64_C(11509567250710340430) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint64_t r = simde_vqsubd_u64(test_vec[i].a, test_vec[i].b); simde_assert_equal_u64(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint64_t a = simde_test_codegen_random_u64(); uint64_t b = simde_test_codegen_random_u64(); uint64_t r = simde_vqsubd_u64(a, b); simde_test_codegen_write_u64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_u64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqsub_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t b[8]; int8_t r[8]; } test_vec[] = { { { INT8_C( 84), -INT8_C( 114), -INT8_C( 11), INT8_C( 34), INT8_C( 83), INT8_C( 12), INT8_C( 99), -INT8_C( 71) }, { -INT8_C( 86), INT8_C( 37), INT8_C( 28), INT8_C( 6), INT8_C( 5), INT8_C( 89), INT8_C( 4), -INT8_C( 122) }, { INT8_MAX, INT8_MIN, -INT8_C( 39), INT8_C( 28), INT8_C( 78), -INT8_C( 77), INT8_C( 95), INT8_C( 51) } }, { { -INT8_C( 37), INT8_C( 86), INT8_C( 44), INT8_C( 86), INT8_C( 28), INT8_C( 76), INT8_C( 34), INT8_C( 79) }, { -INT8_C( 85), INT8_C( 100), -INT8_C( 52), -INT8_C( 73), -INT8_C( 7), INT8_C( 49), -INT8_C( 29), INT8_C( 77) }, { INT8_C( 48), -INT8_C( 14), INT8_C( 96), INT8_MAX, INT8_C( 35), INT8_C( 27), INT8_C( 63), INT8_C( 2) } }, { { -INT8_C( 65), -INT8_C( 40), INT8_C( 111), INT8_C( 19), -INT8_C( 28), -INT8_C( 46), -INT8_C( 52), -INT8_C( 114) }, { -INT8_C( 9), -INT8_C( 23), -INT8_C( 107), -INT8_C( 4), INT8_C( 66), -INT8_C( 103), -INT8_C( 125), INT8_C( 29) }, { -INT8_C( 56), -INT8_C( 17), INT8_MAX, INT8_C( 23), -INT8_C( 94), INT8_C( 57), INT8_C( 73), INT8_MIN } }, { { -INT8_C( 17), -INT8_C( 81), INT8_C( 116), INT8_C( 12), -INT8_C( 5), -INT8_C( 106), INT8_C( 91), -INT8_C( 90) }, { -INT8_C( 5), INT8_C( 40), INT8_C( 93), -INT8_C( 12), INT8_C( 89), INT8_C( 64), INT8_C( 65), INT8_C( 25) }, { -INT8_C( 12), -INT8_C( 121), INT8_C( 23), INT8_C( 24), -INT8_C( 94), INT8_MIN, INT8_C( 26), -INT8_C( 115) } }, { { INT8_C( 24), -INT8_C( 80), INT8_C( 44), -INT8_C( 3), -INT8_C( 126), -INT8_C( 8), -INT8_C( 117), INT8_C( 122) }, { -INT8_C( 31), INT8_C( 32), INT8_C( 118), INT8_C( 35), -INT8_C( 70), -INT8_C( 7), INT8_C( 65), -INT8_C( 87) }, { INT8_C( 55), -INT8_C( 112), -INT8_C( 74), -INT8_C( 38), -INT8_C( 56), -INT8_C( 1), INT8_MIN, INT8_MAX } }, { { -INT8_C( 87), -INT8_C( 75), -INT8_C( 75), -INT8_C( 92), INT8_C( 75), INT8_C( 17), INT8_C( 75), INT8_C( 70) }, { INT8_C( 57), -INT8_C( 88), INT8_C( 58), -INT8_C( 110), -INT8_C( 23), INT8_C( 123), -INT8_C( 85), INT8_C( 1) }, { INT8_MIN, INT8_C( 13), INT8_MIN, INT8_C( 18), INT8_C( 98), -INT8_C( 106), INT8_MAX, INT8_C( 69) } }, { { INT8_C( 43), -INT8_C( 41), -INT8_C( 2), -INT8_C( 82), -INT8_C( 48), -INT8_C( 118), INT8_C( 40), -INT8_C( 79) }, { -INT8_C( 86), -INT8_C( 98), -INT8_C( 43), INT8_C( 100), -INT8_C( 104), INT8_C( 22), INT8_C( 14), INT8_C( 65) }, { INT8_MAX, INT8_C( 57), INT8_C( 41), INT8_MIN, INT8_C( 56), INT8_MIN, INT8_C( 26), INT8_MIN } }, { { -INT8_C( 53), -INT8_C( 61), -INT8_C( 27), INT8_C( 22), -INT8_C( 44), INT8_C( 48), INT8_C( 93), INT8_C( 13) }, { -INT8_C( 39), -INT8_C( 105), -INT8_C( 96), -INT8_C( 62), INT8_C( 19), INT8_C( 75), -INT8_C( 61), INT8_C( 62) }, { -INT8_C( 14), INT8_C( 44), INT8_C( 69), INT8_C( 84), -INT8_C( 63), -INT8_C( 27), INT8_MAX, -INT8_C( 49) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vqsub_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); simde_int8x8_t r = simde_vqsub_s8(a, b); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqsub_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { { { -INT16_C( 15837), -INT16_C( 3092), INT16_C( 5196), -INT16_C( 2396) }, { INT16_C( 31155), INT16_C( 19291), INT16_C( 27023), INT16_C( 23180) }, { INT16_MIN, -INT16_C( 22383), -INT16_C( 21827), -INT16_C( 25576) } }, { { INT16_C( 28972), INT16_C( 369), -INT16_C( 12638), INT16_C( 31502) }, { -INT16_C( 20891), INT16_C( 30781), INT16_C( 250), INT16_C( 7607) }, { INT16_MAX, -INT16_C( 30412), -INT16_C( 12888), INT16_C( 23895) } }, { { -INT16_C( 23614), INT16_C( 3600), -INT16_C( 19272), INT16_C( 27397) }, { INT16_C( 24622), -INT16_C( 16970), INT16_C( 17097), -INT16_C( 2792) }, { INT16_MIN, INT16_C( 20570), INT16_MIN, INT16_C( 30189) } }, { { -INT16_C( 30285), INT16_C( 22006), INT16_C( 1367), -INT16_C( 17200) }, { INT16_C( 3507), -INT16_C( 21195), -INT16_C( 5106), -INT16_C( 12086) }, { INT16_MIN, INT16_MAX, INT16_C( 6473), -INT16_C( 5114) } }, { { -INT16_C( 9585), INT16_C( 18399), -INT16_C( 7025), -INT16_C( 16974) }, { INT16_C( 26692), INT16_C( 3450), -INT16_C( 27990), INT16_C( 24066) }, { INT16_MIN, INT16_C( 14949), INT16_C( 20965), INT16_MIN } }, { { -INT16_C( 1765), INT16_C( 29363), -INT16_C( 31490), -INT16_C( 20177) }, { INT16_C( 25745), -INT16_C( 24737), INT16_C( 10576), -INT16_C( 8336) }, { -INT16_C( 27510), INT16_MAX, INT16_MIN, -INT16_C( 11841) } }, { { INT16_C( 20228), -INT16_C( 27865), -INT16_C( 9933), INT16_C( 30544) }, { -INT16_C( 13758), -INT16_C( 4988), -INT16_C( 31139), INT16_C( 30794) }, { INT16_MAX, -INT16_C( 22877), INT16_C( 21206), -INT16_C( 250) } }, { { -INT16_C( 385), INT16_C( 32235), INT16_C( 6786), INT16_C( 4911) }, { -INT16_C( 29058), -INT16_C( 12621), INT16_C( 9143), -INT16_C( 17491) }, { INT16_C( 28673), INT16_MAX, -INT16_C( 2357), INT16_C( 22402) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_vqsub_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); simde_int16x4_t r = simde_vqsub_s16(a, b); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqsub_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { { { -INT32_C( 1521560462), -INT32_C( 266559826) }, { -INT32_C( 958619543), -INT32_C( 1505876186) }, { -INT32_C( 562940919), INT32_C( 1239316360) } }, { { -INT32_C( 1490867931), -INT32_C( 1044753853) }, { -INT32_C( 1735430688), INT32_C( 39009680) }, { INT32_C( 244562757), -INT32_C( 1083763533) } }, { { -INT32_C( 1079533039), -INT32_C( 1448099008) }, { -INT32_C( 1972401053), -INT32_C( 667898189) }, { INT32_C( 892868014), -INT32_C( 780200819) } }, { { INT32_C( 461329367), -INT32_C( 2032387674) }, { INT32_C( 924740775), -INT32_C( 1170574679) }, { -INT32_C( 463411408), -INT32_C( 861812995) } }, { { INT32_C( 1417339156), INT32_C( 150874533) }, { INT32_C( 1754426805), -INT32_C( 213859813) }, { -INT32_C( 337087649), INT32_C( 364734346) } }, { { -INT32_C( 1139884266), -INT32_C( 1606227207) }, { -INT32_C( 2662058), -INT32_C( 407236141) }, { -INT32_C( 1137222208), -INT32_C( 1198991066) } }, { { -INT32_C( 1740950285), INT32_C( 329267549) }, { -INT32_C( 1032113241), INT32_C( 196459765) }, { -INT32_C( 708837044), INT32_C( 132807784) } }, { { INT32_C( 1959248763), INT32_C( 68422318) }, { INT32_C( 1040510059), -INT32_C( 265961731) }, { INT32_C( 918738704), INT32_C( 334384049) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_vqsub_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); simde_int32x2_t r = simde_vqsub_s32(a, b); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqsub_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[1]; int64_t b[1]; int64_t r[1]; } test_vec[] = { { { INT64_C( 4711373902919000306) }, { INT64_C( 1539590720065887836) }, { INT64_C( 3171783182853112470) } }, { { -INT64_C( 7408528940881795972) }, { -INT64_C( 2055615115581508726) }, { -INT64_C( 5352913825300287246) } }, { { -INT64_C( 8775099862162996899) }, { INT64_C( 8152136498302892915) }, { INT64_MIN } }, { { -INT64_C( 3088965443652965287) }, { INT64_C( 5967787332997085437) }, { -INT64_C( 9056752776650050724) } }, { { INT64_C( 902832744462615767) }, { -INT64_C( 1431516226032935236) }, { INT64_C( 2334348970495551003) } }, { { -INT64_C( 8860668954151764504) }, { INT64_C( 1428843107972310669) }, { INT64_MIN } }, { { INT64_C( 6552345614570757449) }, { INT64_C( 3132303308971771897) }, { INT64_C( 3420042305598985552) } }, { { INT64_C( 1006667218594753771) }, { -INT64_C( 352578529519506228) }, { INT64_C( 1359245748114259999) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_int64x1_t r = simde_vqsub_s64(a, b); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_t a = simde_test_arm_neon_random_i64x1(); simde_int64x1_t b = simde_test_arm_neon_random_i64x1(); simde_int64x1_t r = simde_vqsub_s64(a, b); simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqsub_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint8_t b[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C(128), UINT8_C( 47), UINT8_C(222), UINT8_C( 30), UINT8_C(202), UINT8_C(204), UINT8_C(121), UINT8_C(196) }, { UINT8_C(212), UINT8_C(132), UINT8_C( 80), UINT8_C( 23), UINT8_C(178), UINT8_C(201), UINT8_C( 66), UINT8_C(157) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C(142), UINT8_C( 7), UINT8_C( 24), UINT8_C( 3), UINT8_C( 55), UINT8_C( 39) } }, { { UINT8_C(181), UINT8_C(160), UINT8_C( 16), UINT8_C( 53), UINT8_C( 6), UINT8_C( 8), UINT8_C( 67), UINT8_C(211) }, { UINT8_C(117), UINT8_C(210), UINT8_C(219), UINT8_C( 39), UINT8_C( 54), UINT8_C(247), UINT8_C( 34), UINT8_C(182) }, { UINT8_C( 64), UINT8_C( 0), UINT8_C( 0), UINT8_C( 14), UINT8_C( 0), UINT8_C( 0), UINT8_C( 33), UINT8_C( 29) } }, { { UINT8_C( 38), UINT8_C( 0), UINT8_C(213), UINT8_C(241), UINT8_C(205), UINT8_C( 78), UINT8_C(181), UINT8_C(161) }, { UINT8_C(210), UINT8_C( 5), UINT8_C(184), UINT8_C(132), UINT8_C(206), UINT8_C(250), UINT8_C( 33), UINT8_C(131) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 29), UINT8_C(109), UINT8_C( 0), UINT8_C( 0), UINT8_C(148), UINT8_C( 30) } }, { { UINT8_C(154), UINT8_C( 49), UINT8_C(185), UINT8_C(160), UINT8_C( 58), UINT8_C(252), UINT8_C(115), UINT8_C(175) }, { UINT8_C(206), UINT8_C( 79), UINT8_C(214), UINT8_C( 4), UINT8_C( 70), UINT8_C(248), UINT8_C(187), UINT8_C(108) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(156), UINT8_C( 0), UINT8_C( 4), UINT8_C( 0), UINT8_C( 67) } }, { { UINT8_C(249), UINT8_C(144), UINT8_C( 93), UINT8_C(198), UINT8_C(222), UINT8_C( 18), UINT8_C(103), UINT8_C(176) }, { UINT8_C( 24), UINT8_C( 31), UINT8_C( 53), UINT8_C(230), UINT8_C( 25), UINT8_C( 86), UINT8_C(106), UINT8_C(179) }, { UINT8_C(225), UINT8_C(113), UINT8_C( 40), UINT8_C( 0), UINT8_C(197), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(136), UINT8_C( 35), UINT8_C( 83), UINT8_C(194), UINT8_C( 31), UINT8_C(199), UINT8_C(113), UINT8_C(237) }, { UINT8_C( 22), UINT8_C( 71), UINT8_C(242), UINT8_C( 92), UINT8_C( 63), UINT8_C(173), UINT8_C(200), UINT8_C( 56) }, { UINT8_C(114), UINT8_C( 0), UINT8_C( 0), UINT8_C(102), UINT8_C( 0), UINT8_C( 26), UINT8_C( 0), UINT8_C(181) } }, { { UINT8_C( 61), UINT8_C( 38), UINT8_C(254), UINT8_C( 27), UINT8_C( 56), UINT8_C(101), UINT8_C(203), UINT8_C( 80) }, { UINT8_C(132), UINT8_C( 0), UINT8_C( 55), UINT8_C(157), UINT8_C( 87), UINT8_C(161), UINT8_C( 80), UINT8_C(223) }, { UINT8_C( 0), UINT8_C( 38), UINT8_C(199), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(123), UINT8_C( 0) } }, { { UINT8_C(196), UINT8_C(164), UINT8_C(161), UINT8_C(227), UINT8_C(107), UINT8_C( 18), UINT8_C(208), UINT8_C(129) }, { UINT8_C( 89), UINT8_C(194), UINT8_C(221), UINT8_C(152), UINT8_C(111), UINT8_C(165), UINT8_C(209), UINT8_C(172) }, { UINT8_C(107), UINT8_C( 0), UINT8_C( 0), UINT8_C( 75), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vqsub_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t r = simde_vqsub_u8(a, b); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqsub_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(53195), UINT16_C( 1223), UINT16_C(37685), UINT16_C(47444) }, { UINT16_C(35731), UINT16_C(59991), UINT16_C(42796), UINT16_C(61641) }, { UINT16_C(17464), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(27211), UINT16_C(46803), UINT16_C(42108), UINT16_C(54583) }, { UINT16_C( 5222), UINT16_C(54894), UINT16_C(16314), UINT16_C(34178) }, { UINT16_C(21989), UINT16_C( 0), UINT16_C(25794), UINT16_C(20405) } }, { { UINT16_C(18958), UINT16_C(17289), UINT16_C(57053), UINT16_C(28925) }, { UINT16_C(21609), UINT16_C(38491), UINT16_C( 9467), UINT16_C(18310) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C(47586), UINT16_C(10615) } }, { { UINT16_C(23183), UINT16_C( 3069), UINT16_C(13822), UINT16_C(25825) }, { UINT16_C(20297), UINT16_C( 826), UINT16_C(48526), UINT16_C(40073) }, { UINT16_C( 2886), UINT16_C( 2243), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C( 4615), UINT16_C(58592), UINT16_C(56816), UINT16_C(23124) }, { UINT16_C(44849), UINT16_C(11504), UINT16_C(30420), UINT16_C(25459) }, { UINT16_C( 0), UINT16_C(47088), UINT16_C(26396), UINT16_C( 0) } }, { { UINT16_C(29136), UINT16_C(52846), UINT16_C(20390), UINT16_C(61235) }, { UINT16_C(28062), UINT16_C(11507), UINT16_C(31786), UINT16_C(12745) }, { UINT16_C( 1074), UINT16_C(41339), UINT16_C( 0), UINT16_C(48490) } }, { { UINT16_C(43406), UINT16_C(32533), UINT16_C(27270), UINT16_C(47065) }, { UINT16_C(51481), UINT16_C(60899), UINT16_C(22335), UINT16_C( 4176) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 4935), UINT16_C(42889) } }, { { UINT16_C(49096), UINT16_C(28382), UINT16_C( 4366), UINT16_C(44381) }, { UINT16_C(20607), UINT16_C(43481), UINT16_C(41676), UINT16_C(23515) }, { UINT16_C(28489), UINT16_C( 0), UINT16_C( 0), UINT16_C(20866) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vqsub_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t r = simde_vqsub_u16(a, b); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqsub_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C(3520786507), UINT32_C(1955115866) }, { UINT32_C(3143724156), UINT32_C(2345382595) }, { UINT32_C( 377062351), UINT32_C( 0) } }, { { UINT32_C(2147068529), UINT32_C( 975984315) }, { UINT32_C(1944323751), UINT32_C(4107190184) }, { UINT32_C( 202744778), UINT32_C( 0) } }, { { UINT32_C( 180725935), UINT32_C(3615379035) }, { UINT32_C(2106843066), UINT32_C( 34102929) }, { UINT32_C( 0), UINT32_C(3581276106) } }, { { UINT32_C(3296854280), UINT32_C(4278103639) }, { UINT32_C(1567810228), UINT32_C(1364279457) }, { UINT32_C(1729044052), UINT32_C(2913824182) } }, { { UINT32_C(1146820329), UINT32_C( 505207140) }, { UINT32_C(1251717048), UINT32_C( 374121229) }, { UINT32_C( 0), UINT32_C( 131085911) } }, { { UINT32_C(4242198180), UINT32_C( 838523005) }, { UINT32_C(1552837819), UINT32_C(2527977389) }, { UINT32_C(2689360361), UINT32_C( 0) } }, { { UINT32_C(1524238582), UINT32_C(2591684321) }, { UINT32_C(3018069157), UINT32_C(1556689080) }, { UINT32_C( 0), UINT32_C(1034995241) } }, { { UINT32_C(2086183935), UINT32_C( 917328763) }, { UINT32_C(1821588671), UINT32_C( 285360155) }, { UINT32_C( 264595264), UINT32_C( 631968608) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vqsub_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t r = simde_vqsub_u32(a, b); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqsub_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[1]; uint64_t b[1]; uint64_t r[1]; } test_vec[] = { { { UINT64_C( 8774390669137206601) }, { UINT64_C(15568651277195585785) }, { UINT64_C( 0) } }, { { UINT64_C( 8739799921270744728) }, { UINT64_C( 7379966152159911229) }, { UINT64_C( 1359833769110833499) } }, { { UINT64_C(13047303859675977413) }, { UINT64_C(14605019878296796670) }, { UINT64_C( 0) } }, { { UINT64_C( 4759504381343040475) }, { UINT64_C(11567783556614451973) }, { UINT64_C( 0) } }, { { UINT64_C( 7875751691503016668) }, { UINT64_C( 181537236956001159) }, { UINT64_C( 7694214454547015509) } }, { { UINT64_C(10321586837836489462) }, { UINT64_C(13335787268895594647) }, { UINT64_C( 0) } }, { { UINT64_C( 2031859648466864981) }, { UINT64_C( 6951969487651763792) }, { UINT64_C( 0) } }, { { UINT64_C( 5609119769578639805) }, { UINT64_C( 1803641916361836912) }, { UINT64_C( 3805477853216802893) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_t b = simde_vld1_u64(test_vec[i].b); simde_uint64x1_t r = simde_vqsub_u64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x1_t a = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t b = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t r = simde_vqsub_u64(a, b); simde_test_arm_neon_write_u64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqsubq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t b[16]; int8_t r[16]; } test_vec[] = { { { INT8_C( 29), INT8_C( 88), -INT8_C( 34), -INT8_C( 78), -INT8_C( 10), INT8_C( 16), -INT8_C( 49), INT8_C( 70), -INT8_C( 122), -INT8_C( 91), -INT8_C( 66), -INT8_C( 16), INT8_C( 0), INT8_C( 57), INT8_C( 81), -INT8_C( 66) }, { INT8_C( 74), -INT8_C( 84), INT8_C( 6), INT8_C( 0), INT8_C( 69), -INT8_C( 35), INT8_C( 78), -INT8_C( 75), INT8_C( 103), INT8_C( 14), INT8_C( 3), INT8_C( 43), -INT8_C( 32), INT8_C( 11), INT8_C( 69), -INT8_C( 2) }, { -INT8_C( 45), INT8_MAX, -INT8_C( 40), -INT8_C( 78), -INT8_C( 79), INT8_C( 51), -INT8_C( 127), INT8_MAX, INT8_MIN, -INT8_C( 105), -INT8_C( 69), -INT8_C( 59), INT8_C( 32), INT8_C( 46), INT8_C( 12), -INT8_C( 64) } }, { { INT8_C( 99), INT8_C( 35), -INT8_C( 80), INT8_C( 89), INT8_C( 51), INT8_MAX, -INT8_C( 96), -INT8_C( 71), INT8_C( 37), INT8_C( 94), -INT8_C( 86), INT8_C( 37), -INT8_C( 105), -INT8_C( 5), -INT8_C( 29), -INT8_C( 30) }, { -INT8_C( 89), -INT8_C( 23), -INT8_C( 30), -INT8_C( 20), -INT8_C( 57), INT8_C( 48), -INT8_C( 94), INT8_C( 46), INT8_C( 62), -INT8_C( 91), INT8_C( 89), INT8_C( 31), -INT8_C( 80), -INT8_C( 98), INT8_C( 29), INT8_C( 20) }, { INT8_MAX, INT8_C( 58), -INT8_C( 50), INT8_C( 109), INT8_C( 108), INT8_C( 79), -INT8_C( 2), -INT8_C( 117), -INT8_C( 25), INT8_MAX, INT8_MIN, INT8_C( 6), -INT8_C( 25), INT8_C( 93), -INT8_C( 58), -INT8_C( 50) } }, { { -INT8_C( 63), -INT8_C( 51), INT8_C( 109), -INT8_C( 12), INT8_C( 77), INT8_C( 13), -INT8_C( 82), INT8_C( 114), INT8_C( 108), INT8_C( 88), -INT8_C( 105), INT8_C( 3), INT8_C( 83), INT8_C( 123), -INT8_C( 27), -INT8_C( 6) }, { INT8_C( 100), -INT8_C( 56), -INT8_C( 25), INT8_C( 43), -INT8_C( 8), -INT8_C( 119), INT8_C( 89), INT8_C( 55), INT8_C( 46), -INT8_C( 77), INT8_C( 86), -INT8_C( 33), INT8_C( 81), INT8_C( 115), -INT8_C( 13), INT8_C( 19) }, { INT8_MIN, INT8_C( 5), INT8_MAX, -INT8_C( 55), INT8_C( 85), INT8_MAX, INT8_MIN, INT8_C( 59), INT8_C( 62), INT8_MAX, INT8_MIN, INT8_C( 36), INT8_C( 2), INT8_C( 8), -INT8_C( 14), -INT8_C( 25) } }, { { INT8_C( 64), INT8_C( 96), INT8_C( 7), -INT8_C( 115), INT8_C( 110), -INT8_C( 75), -INT8_C( 1), -INT8_C( 38), INT8_C( 13), -INT8_C( 105), -INT8_C( 35), INT8_C( 96), INT8_C( 18), -INT8_C( 61), INT8_C( 91), INT8_C( 118) }, { -INT8_C( 117), INT8_C( 66), -INT8_C( 94), -INT8_C( 125), -INT8_C( 53), -INT8_C( 5), -INT8_C( 70), -INT8_C( 7), -INT8_C( 82), INT8_C( 16), -INT8_C( 40), INT8_C( 0), -INT8_C( 125), -INT8_C( 53), INT8_C( 19), -INT8_C( 60) }, { INT8_MAX, INT8_C( 30), INT8_C( 101), INT8_C( 10), INT8_MAX, -INT8_C( 70), INT8_C( 69), -INT8_C( 31), INT8_C( 95), -INT8_C( 121), INT8_C( 5), INT8_C( 96), INT8_MAX, -INT8_C( 8), INT8_C( 72), INT8_MAX } }, { { INT8_C( 44), INT8_C( 26), INT8_C( 81), -INT8_C( 102), -INT8_C( 48), INT8_C( 81), INT8_C( 116), -INT8_C( 35), -INT8_C( 24), INT8_C( 81), INT8_C( 62), -INT8_C( 6), INT8_C( 20), -INT8_C( 103), INT8_C( 112), -INT8_C( 97) }, { -INT8_C( 37), INT8_C( 18), INT8_C( 35), -INT8_C( 90), INT8_C( 14), -INT8_C( 35), -INT8_C( 97), -INT8_C( 68), -INT8_C( 18), INT8_C( 120), -INT8_C( 68), INT8_C( 113), INT8_C( 67), -INT8_C( 49), INT8_C( 53), INT8_C( 111) }, { INT8_C( 81), INT8_C( 8), INT8_C( 46), -INT8_C( 12), -INT8_C( 62), INT8_C( 116), INT8_MAX, INT8_C( 33), -INT8_C( 6), -INT8_C( 39), INT8_MAX, -INT8_C( 119), -INT8_C( 47), -INT8_C( 54), INT8_C( 59), INT8_MIN } }, { { -INT8_C( 22), -INT8_C( 121), INT8_C( 9), -INT8_C( 70), -INT8_C( 40), INT8_C( 125), -INT8_C( 105), -INT8_C( 64), -INT8_C( 49), -INT8_C( 43), -INT8_C( 70), -INT8_C( 29), INT8_C( 110), INT8_C( 42), -INT8_C( 125), INT8_C( 73) }, { INT8_C( 61), -INT8_C( 90), -INT8_C( 17), INT8_C( 75), -INT8_C( 125), -INT8_C( 113), INT8_C( 7), INT8_C( 113), INT8_C( 7), -INT8_C( 60), -INT8_C( 29), INT8_C( 74), -INT8_C( 109), INT8_C( 24), -INT8_C( 70), INT8_C( 125) }, { -INT8_C( 83), -INT8_C( 31), INT8_C( 26), INT8_MIN, INT8_C( 85), INT8_MAX, -INT8_C( 112), INT8_MIN, -INT8_C( 56), INT8_C( 17), -INT8_C( 41), -INT8_C( 103), INT8_MAX, INT8_C( 18), -INT8_C( 55), -INT8_C( 52) } }, { { -INT8_C( 97), -INT8_C( 61), INT8_C( 55), INT8_C( 119), INT8_C( 65), -INT8_C( 49), INT8_C( 55), INT8_C( 16), -INT8_C( 92), -INT8_C( 15), -INT8_C( 13), INT8_C( 19), INT8_C( 28), INT8_C( 118), INT8_C( 92), INT8_C( 89) }, { INT8_C( 28), INT8_C( 76), -INT8_C( 92), -INT8_C( 96), -INT8_C( 37), -INT8_C( 85), INT8_C( 17), -INT8_C( 30), INT8_C( 111), -INT8_C( 12), INT8_C( 44), INT8_C( 3), INT8_C( 13), -INT8_C( 26), INT8_MIN, -INT8_C( 84) }, { -INT8_C( 125), INT8_MIN, INT8_MAX, INT8_MAX, INT8_C( 102), INT8_C( 36), INT8_C( 38), INT8_C( 46), INT8_MIN, -INT8_C( 3), -INT8_C( 57), INT8_C( 16), INT8_C( 15), INT8_MAX, INT8_MAX, INT8_MAX } }, { { -INT8_C( 86), -INT8_C( 72), INT8_C( 36), -INT8_C( 21), -INT8_C( 121), INT8_C( 91), -INT8_C( 5), INT8_C( 43), INT8_C( 77), -INT8_C( 18), INT8_C( 62), INT8_C( 105), INT8_C( 101), -INT8_C( 101), -INT8_C( 62), -INT8_C( 127) }, { -INT8_C( 25), INT8_C( 102), INT8_C( 33), -INT8_C( 62), INT8_C( 17), INT8_C( 51), -INT8_C( 92), -INT8_C( 127), INT8_C( 39), -INT8_C( 48), -INT8_C( 124), INT8_C( 52), -INT8_C( 73), INT8_C( 4), -INT8_C( 31), INT8_C( 97) }, { -INT8_C( 61), INT8_MIN, INT8_C( 3), INT8_C( 41), INT8_MIN, INT8_C( 40), INT8_C( 87), INT8_MAX, INT8_C( 38), INT8_C( 30), INT8_MAX, INT8_C( 53), INT8_MAX, -INT8_C( 105), -INT8_C( 31), INT8_MIN } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r = simde_vqsubq_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); simde_int8x16_t r = simde_vqsubq_s8(a, b); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqsubq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { INT16_C( 1468), INT16_C( 17228), INT16_C( 18272), -INT16_C( 21137), -INT16_C( 21195), -INT16_C( 26090), -INT16_C( 10168), INT16_C( 12060) }, { INT16_C( 15678), INT16_C( 20721), -INT16_C( 27280), -INT16_C( 26415), INT16_C( 21862), INT16_C( 7628), -INT16_C( 21159), INT16_C( 5758) }, { -INT16_C( 14210), -INT16_C( 3493), INT16_MAX, INT16_C( 5278), INT16_MIN, INT16_MIN, INT16_C( 10991), INT16_C( 6302) } }, { { -INT16_C( 13646), INT16_C( 4953), -INT16_C( 14319), INT16_C( 18112), -INT16_C( 10378), -INT16_C( 16671), -INT16_C( 593), -INT16_C( 4370) }, { -INT16_C( 8390), -INT16_C( 21698), INT16_C( 3957), -INT16_C( 9405), INT16_C( 3940), -INT16_C( 16904), INT16_C( 30397), INT16_C( 28627) }, { -INT16_C( 5256), INT16_C( 26651), -INT16_C( 18276), INT16_C( 27517), -INT16_C( 14318), INT16_C( 233), -INT16_C( 30990), INT16_MIN } }, { { INT16_C( 11584), INT16_C( 20866), INT16_C( 17397), INT16_C( 27543), INT16_C( 30746), -INT16_C( 14038), INT16_C( 6261), -INT16_C( 20297) }, { -INT16_C( 2569), INT16_C( 27739), -INT16_C( 25084), INT16_C( 26695), INT16_C( 16301), INT16_C( 27174), -INT16_C( 1611), -INT16_C( 2598) }, { INT16_C( 14153), -INT16_C( 6873), INT16_MAX, INT16_C( 848), INT16_C( 14445), INT16_MIN, INT16_C( 7872), -INT16_C( 17699) } }, { { INT16_C( 23590), INT16_C( 7238), -INT16_C( 8545), -INT16_C( 18041), -INT16_C( 20138), -INT16_C( 13181), INT16_C( 15049), -INT16_C( 16004) }, { -INT16_C( 10448), INT16_C( 13357), INT16_C( 30069), INT16_C( 8861), -INT16_C( 15436), INT16_C( 27277), INT16_C( 26556), -INT16_C( 7329) }, { INT16_MAX, -INT16_C( 6119), INT16_MIN, -INT16_C( 26902), -INT16_C( 4702), INT16_MIN, -INT16_C( 11507), -INT16_C( 8675) } }, { { -INT16_C( 22845), INT16_C( 25599), -INT16_C( 31100), -INT16_C( 9700), -INT16_C( 24776), INT16_C( 422), INT16_C( 8922), INT16_C( 2754) }, { -INT16_C( 3847), INT16_C( 28222), -INT16_C( 9371), INT16_C( 6545), INT16_C( 7838), INT16_C( 23427), -INT16_C( 7291), INT16_C( 18494) }, { -INT16_C( 18998), -INT16_C( 2623), -INT16_C( 21729), -INT16_C( 16245), -INT16_C( 32614), -INT16_C( 23005), INT16_C( 16213), -INT16_C( 15740) } }, { { INT16_C( 15753), INT16_C( 3499), -INT16_C( 14141), -INT16_C( 1049), -INT16_C( 29081), INT16_C( 16893), -INT16_C( 16464), -INT16_C( 21941) }, { -INT16_C( 30033), INT16_C( 5144), -INT16_C( 22171), INT16_C( 1070), -INT16_C( 20025), INT16_C( 19551), -INT16_C( 25196), INT16_C( 7573) }, { INT16_MAX, -INT16_C( 1645), INT16_C( 8030), -INT16_C( 2119), -INT16_C( 9056), -INT16_C( 2658), INT16_C( 8732), -INT16_C( 29514) } }, { { INT16_C( 16602), -INT16_C( 25302), INT16_C( 4616), INT16_C( 28825), -INT16_C( 26976), INT16_C( 20657), -INT16_C( 683), INT16_C( 1530) }, { INT16_C( 4999), -INT16_C( 5095), INT16_C( 18364), -INT16_C( 31504), INT16_C( 20473), -INT16_C( 29232), INT16_C( 26092), -INT16_C( 14677) }, { INT16_C( 11603), -INT16_C( 20207), -INT16_C( 13748), INT16_MAX, INT16_MIN, INT16_MAX, -INT16_C( 26775), INT16_C( 16207) } }, { { -INT16_C( 10842), -INT16_C( 20892), -INT16_C( 537), -INT16_C( 30946), -INT16_C( 12141), -INT16_C( 5928), -INT16_C( 11571), INT16_C( 21741) }, { INT16_C( 2021), -INT16_C( 24000), INT16_C( 12622), INT16_C( 18214), -INT16_C( 2432), INT16_C( 28117), -INT16_C( 32676), INT16_C( 563) }, { -INT16_C( 12863), INT16_C( 3108), -INT16_C( 13159), INT16_MIN, -INT16_C( 9709), INT16_MIN, INT16_C( 21105), INT16_C( 21178) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_vqsubq_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); simde_int16x8_t r = simde_vqsubq_s16(a, b); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqsubq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { INT32_C( 1034983253), INT32_C( 667209620), INT32_C( 1813027999), INT32_C( 1421933935) }, { INT32_C( 1408630788), -INT32_C( 1298523087), INT32_C( 1864331027), INT32_C( 1165054703) }, { -INT32_C( 373647535), INT32_C( 1965732707), -INT32_C( 51303028), INT32_C( 256879232) } }, { { INT32_C( 2122457578), -INT32_C( 1884928272), INT32_C( 1392228067), -INT32_C( 1197032525) }, { -INT32_C( 318005828), -INT32_C( 861952583), INT32_C( 71024149), -INT32_C( 79057903) }, { INT32_MAX, -INT32_C( 1022975689), INT32_C( 1321203918), -INT32_C( 1117974622) } }, { { -INT32_C( 1099314226), -INT32_C( 179429614), -INT32_C( 1991816747), -INT32_C( 1052644091) }, { INT32_C( 1152273546), INT32_C( 101732081), INT32_C( 487279628), -INT32_C( 971483912) }, { INT32_MIN, -INT32_C( 281161695), INT32_MIN, -INT32_C( 81160179) } }, { { INT32_C( 847614496), -INT32_C( 2027433039), INT32_C( 554724892), -INT32_C( 421375653) }, { -INT32_C( 1909812835), -INT32_C( 342541601), INT32_C( 2131337350), INT32_C( 340074996) }, { INT32_MAX, -INT32_C( 1684891438), -INT32_C( 1576612458), -INT32_C( 761450649) } }, { { INT32_C( 1699138227), -INT32_C( 1158910563), INT32_C( 937163995), -INT32_C( 367149491) }, { INT32_C( 779634511), INT32_C( 135859585), -INT32_C( 1568202067), -INT32_C( 139015100) }, { INT32_C( 919503716), -INT32_C( 1294770148), INT32_MAX, -INT32_C( 228134391) } }, { { INT32_C( 878509463), INT32_C( 1190021226), -INT32_C( 1854027196), -INT32_C( 679765368) }, { INT32_C( 1644557537), -INT32_C( 1351999999), -INT32_C( 2058227391), INT32_C( 1434191806) }, { -INT32_C( 766048074), INT32_MAX, INT32_C( 204200195), -INT32_C( 2113957174) } }, { { INT32_C( 1871304964), INT32_C( 1723168801), -INT32_C( 889769406), -INT32_C( 1381927988) }, { INT32_C( 1745856103), INT32_C( 85424836), INT32_C( 696936555), INT32_C( 1954416496) }, { INT32_C( 125448861), INT32_C( 1637743965), -INT32_C( 1586705961), INT32_MIN } }, { { INT32_C( 31656160), -INT32_C( 1033398144), -INT32_C( 1769185334), INT32_C( 960703954) }, { -INT32_C( 1751034925), INT32_C( 949860813), -INT32_C( 1855838431), INT32_C( 235331630) }, { INT32_C( 1782691085), -INT32_C( 1983258957), INT32_C( 86653097), INT32_C( 725372324) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_vqsubq_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); simde_int32x4_t r = simde_vqsubq_s32(a, b); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqsubq_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; int64_t b[2]; int64_t r[2]; } test_vec[] = { { { INT64_C( 5488330497879763432), -INT64_C( 5268887329813842218) }, { INT64_C( 6737362211679667065), -INT64_C( 6174446353717927662) }, { -INT64_C( 1249031713799903633), INT64_C( 905559023904085444) } }, { { -INT64_C( 5932017138937995298), INT64_C( 3498513743973093619) }, { -INT64_C( 2786332034548180038), INT64_C( 1603960109088261078) }, { -INT64_C( 3145685104389815260), INT64_C( 1894553634884832541) } }, { { -INT64_C( 8841370285577382711), -INT64_C( 7666558287053282380) }, { -INT64_C( 2493621974940773947), -INT64_C( 5660541048033280477) }, { -INT64_C( 6347748310636608764), -INT64_C( 2006017239020001903) } }, { { -INT64_C( 4601942092203038709), INT64_C( 3220853730185018946) }, { INT64_C( 4736128877027933910), -INT64_C( 1066210828566646885) }, { INT64_MIN, INT64_C( 4287064558751665831) } }, { { INT64_C( 7332898282719315446), -INT64_C( 8448130901258080722) }, { INT64_C( 3662307856862378611), INT64_C( 8370196635761078352) }, { INT64_C( 3670590425856936835), INT64_MIN } }, { { -INT64_C( 7640572926802545042), INT64_C( 2479494029669792041) }, { INT64_C( 4697685359771045736), INT64_C( 4461555193753429751) }, { INT64_MIN, -INT64_C( 1982061164083637710) } }, { { -INT64_C( 4155759654631153322), -INT64_C( 3093960381515465153) }, { INT64_C( 4831524897867220187), INT64_C( 9005407212867716588) }, { -INT64_C( 8987284552498373509), INT64_MIN } }, { { -INT64_C( 3574014710070523900), -INT64_C( 1131305013801645557) }, { INT64_C( 1377760808561025424), INT64_C( 9075256247633041231) }, { -INT64_C( 4951775518631549324), INT64_MIN } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_int64x2_t r = simde_vqsubq_s64(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); simde_int64x2_t b = simde_test_arm_neon_random_i64x2(); simde_int64x2_t r = simde_vqsubq_s64(a, b); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqsubq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(145), UINT8_C( 42), UINT8_C( 30), UINT8_C( 32), UINT8_C(183), UINT8_C(132), UINT8_C(238), UINT8_C(194), UINT8_C(142), UINT8_C(170), UINT8_C( 57), UINT8_C(164), UINT8_C(118), UINT8_C(134), UINT8_C(148), UINT8_C( 6) }, { UINT8_C(167), UINT8_C( 81), UINT8_C(225), UINT8_C(206), UINT8_C( 27), UINT8_C( 0), UINT8_C(225), UINT8_C(106), UINT8_C(215), UINT8_C(177), UINT8_C(223), UINT8_C( 80), UINT8_C(122), UINT8_C(208), UINT8_C(206), UINT8_C( 11) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(156), UINT8_C(132), UINT8_C( 13), UINT8_C( 88), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 84), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(251), UINT8_C(236), UINT8_C( 43), UINT8_C(178), UINT8_C(112), UINT8_C( 25), UINT8_C(116), UINT8_MAX, UINT8_C(195), UINT8_C(173), UINT8_C(163), UINT8_C( 57), UINT8_C( 51), UINT8_C( 55), UINT8_C( 63), UINT8_C(218) }, { UINT8_C(136), UINT8_C( 33), UINT8_C(168), UINT8_C(163), UINT8_C( 33), UINT8_C(138), UINT8_C( 14), UINT8_C(248), UINT8_C( 59), UINT8_C(237), UINT8_C( 72), UINT8_C(181), UINT8_C(190), UINT8_C( 22), UINT8_C(192), UINT8_C(185) }, { UINT8_C(115), UINT8_C(203), UINT8_C( 0), UINT8_C( 15), UINT8_C( 79), UINT8_C( 0), UINT8_C(102), UINT8_C( 7), UINT8_C(136), UINT8_C( 0), UINT8_C( 91), UINT8_C( 0), UINT8_C( 0), UINT8_C( 33), UINT8_C( 0), UINT8_C( 33) } }, { { UINT8_C( 2), UINT8_C(236), UINT8_C(107), UINT8_C(115), UINT8_C( 5), UINT8_C(223), UINT8_C(114), UINT8_C(201), UINT8_C(140), UINT8_C( 21), UINT8_C( 2), UINT8_C(192), UINT8_C( 76), UINT8_C( 66), UINT8_C(154), UINT8_C(213) }, { UINT8_C( 99), UINT8_C( 67), UINT8_C(120), UINT8_C(132), UINT8_C(205), UINT8_C(134), UINT8_C(124), UINT8_C( 8), UINT8_C(116), UINT8_C(196), UINT8_C(189), UINT8_C( 50), UINT8_C(219), UINT8_C(125), UINT8_C(235), UINT8_C(221) }, { UINT8_C( 0), UINT8_C(169), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 89), UINT8_C( 0), UINT8_C(193), UINT8_C( 24), UINT8_C( 0), UINT8_C( 0), UINT8_C(142), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(105), UINT8_C( 86), UINT8_C( 80), UINT8_C(111), UINT8_C( 53), UINT8_C(194), UINT8_C( 56), UINT8_C(193), UINT8_C(215), UINT8_C( 58), UINT8_C(129), UINT8_C( 36), UINT8_C(124), UINT8_C( 28), UINT8_C(249), UINT8_C(223) }, { UINT8_C( 95), UINT8_C(113), UINT8_C( 99), UINT8_C( 44), UINT8_C(248), UINT8_C(223), UINT8_C( 52), UINT8_C(108), UINT8_C(164), UINT8_C(241), UINT8_C(158), UINT8_C(127), UINT8_C(110), UINT8_C(137), UINT8_C( 92), UINT8_C(216) }, { UINT8_C( 10), UINT8_C( 0), UINT8_C( 0), UINT8_C( 67), UINT8_C( 0), UINT8_C( 0), UINT8_C( 4), UINT8_C( 85), UINT8_C( 51), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 14), UINT8_C( 0), UINT8_C(157), UINT8_C( 7) } }, { { UINT8_C(223), UINT8_C(173), UINT8_C( 71), UINT8_C( 20), UINT8_C(111), UINT8_C(127), UINT8_C(213), UINT8_C( 71), UINT8_C(185), UINT8_C( 87), UINT8_C(107), UINT8_C( 54), UINT8_C(115), UINT8_C(100), UINT8_C( 21), UINT8_C(210) }, { UINT8_C(213), UINT8_C(121), UINT8_C(254), UINT8_C(205), UINT8_C( 88), UINT8_C( 50), UINT8_C( 57), UINT8_C(252), UINT8_C( 35), UINT8_C(215), UINT8_C(123), UINT8_C(145), UINT8_C( 96), UINT8_C(216), UINT8_C(105), UINT8_C( 63) }, { UINT8_C( 10), UINT8_C( 52), UINT8_C( 0), UINT8_C( 0), UINT8_C( 23), UINT8_C( 77), UINT8_C(156), UINT8_C( 0), UINT8_C(150), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 19), UINT8_C( 0), UINT8_C( 0), UINT8_C(147) } }, { { UINT8_C(133), UINT8_C(176), UINT8_C( 83), UINT8_C(244), UINT8_C( 47), UINT8_C( 41), UINT8_C( 59), UINT8_C(233), UINT8_C(128), UINT8_C(166), UINT8_C( 31), UINT8_C(243), UINT8_C( 10), UINT8_C( 52), UINT8_C(197), UINT8_C(224) }, { UINT8_C(173), UINT8_C(195), UINT8_C(173), UINT8_C( 6), UINT8_C(245), UINT8_C(231), UINT8_C( 2), UINT8_C( 24), UINT8_C(190), UINT8_C(126), UINT8_C(169), UINT8_C( 31), UINT8_C( 86), UINT8_C( 19), UINT8_C( 94), UINT8_C(219) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(238), UINT8_C( 0), UINT8_C( 0), UINT8_C( 57), UINT8_C(209), UINT8_C( 0), UINT8_C( 40), UINT8_C( 0), UINT8_C(212), UINT8_C( 0), UINT8_C( 33), UINT8_C(103), UINT8_C( 5) } }, { { UINT8_C(195), UINT8_C(178), UINT8_C(207), UINT8_C(243), UINT8_C(219), UINT8_C( 11), UINT8_C(220), UINT8_C( 91), UINT8_C(177), UINT8_C(251), UINT8_C( 78), UINT8_C(188), UINT8_C( 47), UINT8_C( 19), UINT8_C(156), UINT8_C(221) }, { UINT8_C(214), UINT8_C( 73), UINT8_C(227), UINT8_C(203), UINT8_C( 48), UINT8_C(229), UINT8_C(227), UINT8_C(239), UINT8_C( 99), UINT8_C(140), UINT8_C( 14), UINT8_C(185), UINT8_C(159), UINT8_C(108), UINT8_C(148), UINT8_C( 99) }, { UINT8_C( 0), UINT8_C(105), UINT8_C( 0), UINT8_C( 40), UINT8_C(171), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 78), UINT8_C(111), UINT8_C( 64), UINT8_C( 3), UINT8_C( 0), UINT8_C( 0), UINT8_C( 8), UINT8_C(122) } }, { { UINT8_C( 30), UINT8_C(100), UINT8_C( 86), UINT8_C(249), UINT8_C(111), UINT8_C( 50), UINT8_C( 84), UINT8_C( 32), UINT8_C( 45), UINT8_C(162), UINT8_C(220), UINT8_C( 92), UINT8_C(181), UINT8_C(120), UINT8_C( 57), UINT8_C(139) }, { UINT8_C(194), UINT8_C( 28), UINT8_C( 86), UINT8_C(242), UINT8_C( 2), UINT8_C( 57), UINT8_C(225), UINT8_C(101), UINT8_C(198), UINT8_C(239), UINT8_C( 31), UINT8_C(101), UINT8_C( 92), UINT8_C(179), UINT8_C(200), UINT8_C(122) }, { UINT8_C( 0), UINT8_C( 72), UINT8_C( 0), UINT8_C( 7), UINT8_C(109), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(189), UINT8_C( 0), UINT8_C( 89), UINT8_C( 0), UINT8_C( 0), UINT8_C( 17) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vqsubq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t r = simde_vqsubq_u8(a, b); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqsubq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C( 7703), UINT16_C(34420), UINT16_C(51280), UINT16_C(32167), UINT16_C(33643), UINT16_C( 8410), UINT16_C( 5116), UINT16_C(48812) }, { UINT16_C( 560), UINT16_C(12976), UINT16_C(37436), UINT16_C( 663), UINT16_C(46721), UINT16_C(56679), UINT16_C(12394), UINT16_C(33112) }, { UINT16_C( 7143), UINT16_C(21444), UINT16_C(13844), UINT16_C(31504), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(15700) } }, { { UINT16_C(52302), UINT16_C(40712), UINT16_C(44948), UINT16_C(65308), UINT16_C(63026), UINT16_C(11808), UINT16_C(52234), UINT16_C(15084) }, { UINT16_C(40398), UINT16_C( 2668), UINT16_C( 815), UINT16_C(45068), UINT16_C(29882), UINT16_C( 9358), UINT16_C(59044), UINT16_C(62117) }, { UINT16_C(11904), UINT16_C(38044), UINT16_C(44133), UINT16_C(20240), UINT16_C(33144), UINT16_C( 2450), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(44466), UINT16_C(18065), UINT16_C(44636), UINT16_C(36678), UINT16_C(26276), UINT16_C(44733), UINT16_C(43570), UINT16_C( 232) }, { UINT16_C(21575), UINT16_C(30219), UINT16_C( 5976), UINT16_C( 4646), UINT16_C(46219), UINT16_C(12086), UINT16_C(56218), UINT16_C(19490) }, { UINT16_C(22891), UINT16_C( 0), UINT16_C(38660), UINT16_C(32032), UINT16_C( 0), UINT16_C(32647), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(45961), UINT16_C(58771), UINT16_C(55649), UINT16_C( 1652), UINT16_C(12863), UINT16_C(29108), UINT16_C(40412), UINT16_C( 9073) }, { UINT16_C(31985), UINT16_C(18841), UINT16_C(49044), UINT16_C( 8027), UINT16_C(37236), UINT16_C( 3663), UINT16_C(29037), UINT16_C(63067) }, { UINT16_C(13976), UINT16_C(39930), UINT16_C( 6605), UINT16_C( 0), UINT16_C( 0), UINT16_C(25445), UINT16_C(11375), UINT16_C( 0) } }, { { UINT16_C(60964), UINT16_C(34523), UINT16_C(20679), UINT16_C( 1676), UINT16_C(16514), UINT16_C(24183), UINT16_C(59613), UINT16_C(53121) }, { UINT16_C( 6757), UINT16_C(63768), UINT16_C(29913), UINT16_C(19736), UINT16_C(26373), UINT16_C(29276), UINT16_C(47064), UINT16_C(64872) }, { UINT16_C(54207), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(12549), UINT16_C( 0) } }, { { UINT16_C(17573), UINT16_C(27779), UINT16_C( 3988), UINT16_C( 5746), UINT16_C(59727), UINT16_C(11636), UINT16_C(62929), UINT16_C(14076) }, { UINT16_C( 5135), UINT16_C(59439), UINT16_C(18568), UINT16_C(36406), UINT16_C(37551), UINT16_C(34816), UINT16_C(26953), UINT16_C(61061) }, { UINT16_C(12438), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(22176), UINT16_C( 0), UINT16_C(35976), UINT16_C( 0) } }, { { UINT16_C( 2221), UINT16_C(16730), UINT16_C(52247), UINT16_C(26199), UINT16_C(52149), UINT16_C(34451), UINT16_C(36800), UINT16_C(53181) }, { UINT16_C(60580), UINT16_C(11447), UINT16_C(60724), UINT16_C(58554), UINT16_C(47999), UINT16_C(51308), UINT16_C(61732), UINT16_C(53686) }, { UINT16_C( 0), UINT16_C( 5283), UINT16_C( 0), UINT16_C( 0), UINT16_C( 4150), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C( 4345), UINT16_C( 4114), UINT16_C(27100), UINT16_C(37238), UINT16_C( 2612), UINT16_C(62488), UINT16_C(54681), UINT16_C(15811) }, { UINT16_C(31425), UINT16_C(63082), UINT16_C( 9320), UINT16_C(59354), UINT16_C(18143), UINT16_C( 944), UINT16_C(26167), UINT16_C(12500) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C(17780), UINT16_C( 0), UINT16_C( 0), UINT16_C(61544), UINT16_C(28514), UINT16_C( 3311) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vqsubq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t r = simde_vqsubq_u16(a, b); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqsubq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(1396762231), UINT32_C(2212869711), UINT32_C(1517813184), UINT32_C(2476161746) }, { UINT32_C( 495518133), UINT32_C( 84173606), UINT32_C(3758732457), UINT32_C(2450578715) }, { UINT32_C( 901244098), UINT32_C(2128696105), UINT32_C( 0), UINT32_C( 25583031) } }, { { UINT32_C( 333795524), UINT32_C(3348613639), UINT32_C(2569080519), UINT32_C(4264409417) }, { UINT32_C(3759912634), UINT32_C(3286638362), UINT32_C(4020563924), UINT32_C(2424419532) }, { UINT32_C( 0), UINT32_C( 61975277), UINT32_C( 0), UINT32_C(1839989885) } }, { { UINT32_C( 212100613), UINT32_C(4174592817), UINT32_C(2459104585), UINT32_C(1754316718) }, { UINT32_C(2403969909), UINT32_C(2673029067), UINT32_C(3935237918), UINT32_C(2960854955) }, { UINT32_C( 0), UINT32_C(1501563750), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(2797346677), UINT32_C(2745143386), UINT32_C( 859189637), UINT32_C(1704707824) }, { UINT32_C(1039524978), UINT32_C( 836519955), UINT32_C(3927730751), UINT32_C(4003174265) }, { UINT32_C(1757821699), UINT32_C(1908623431), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C( 278222774), UINT32_C(1823683815), UINT32_C(1436543333), UINT32_C( 582630320) }, { UINT32_C( 861908767), UINT32_C( 912538615), UINT32_C( 505512101), UINT32_C(3440163863) }, { UINT32_C( 0), UINT32_C( 911145200), UINT32_C( 931031232), UINT32_C( 0) } }, { { UINT32_C(4225605907), UINT32_C( 979866069), UINT32_C( 714016634), UINT32_C(1632389698) }, { UINT32_C(4053052409), UINT32_C(2334652902), UINT32_C(2443790457), UINT32_C( 408860164) }, { UINT32_C( 172553498), UINT32_C( 0), UINT32_C( 0), UINT32_C(1223529534) } }, { { UINT32_C( 756235351), UINT32_C(1197963981), UINT32_C(3279091585), UINT32_C( 975552065) }, { UINT32_C(1345042794), UINT32_C( 752636850), UINT32_C(2696775067), UINT32_C(2478316347) }, { UINT32_C( 0), UINT32_C( 445327131), UINT32_C( 582316518), UINT32_C( 0) } }, { { UINT32_C( 616614743), UINT32_C(3345753925), UINT32_C(1602936350), UINT32_C( 110800796) }, { UINT32_C( 458737001), UINT32_C(3024565016), UINT32_C(4099146936), UINT32_C(2005339168) }, { UINT32_C( 157877742), UINT32_C( 321188909), UINT32_C( 0), UINT32_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vqsubq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t r = simde_vqsubq_u32(a, b); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqsubq_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[2]; uint64_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C(10224024848080586711), UINT64_C( 9694427134200409830) }, { UINT64_C(14634703258642473036), UINT64_C( 5730063582356270318) }, { UINT64_C( 0), UINT64_C( 3964363551844139512) } }, { { UINT64_C( 1119513024125477261), UINT64_C( 5215480616301262269) }, { UINT64_C(15841910786196505595), UINT64_C( 7924486926259296561) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C( 8271409052956321168), UINT64_C( 8190833270310394727) }, { UINT64_C( 5654929166370298034), UINT64_C( 7079362651552769009) }, { UINT64_C( 2616479886586023134), UINT64_C( 1111470618757625718) } }, { { UINT64_C( 35998668026521431), UINT64_C( 3325362398858366015) }, { UINT64_C(17281236453137687911), UINT64_C( 1576258484415024043) }, { UINT64_C( 0), UINT64_C( 1749103914443341972) } }, { { UINT64_C( 2251695219389103013), UINT64_C( 1879966640302760813) }, { UINT64_C(15532476601301132448), UINT64_C(17442217391346241085) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C( 4480642722485580079), UINT64_C(11447936937942778387) }, { UINT64_C( 3181171840948928033), UINT64_C(16021615797036612121) }, { UINT64_C( 1299470881536652046), UINT64_C( 0) } }, { { UINT64_C( 898217407916378471), UINT64_C(18282839721204948509) }, { UINT64_C(15657732499944764719), UINT64_C( 1186302306518266602) }, { UINT64_C( 0), UINT64_C(17096537414686681907) } }, { { UINT64_C(11252174941359530785), UINT64_C(15724723450389461343) }, { UINT64_C(16068356335904326757), UINT64_C(17757954831424044471) }, { UINT64_C( 0), UINT64_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_uint64x2_t r = simde_vqsubq_u64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); simde_uint64x2_t b = simde_test_arm_neon_random_u64x2(); simde_uint64x2_t r = simde_vqsubq_u64(a, b); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vqsubb_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqsubh_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqsubs_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vqsubd_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vqsubb_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vqsubh_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vqsubs_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vqsubd_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vqsub_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqsub_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqsub_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vqsub_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vqsub_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vqsub_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vqsub_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vqsub_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vqsubq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqsubq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vqsubq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vqsubq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vqsubq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vqsubq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vqsubq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vqsubq_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/qtbl.c000066400000000000000000004547621400333146700164470ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN qtbl #include "test-neon.h" /* Check that both of these work */ #if defined(__cplusplus) #include "../../../simde/arm/neon/qtbl.h" #else #include "../../../simde/arm/neon.h" #endif #if 0 #define PROBABILITY 80 #define probability(p) (rand() < ((HEDLEY_STATIC_CAST(int64_t, RAND_MAX) * (p)) / 100)) #endif static int test_simde_vqtbl1_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 int8_t t[16]; SIMDE_ALIGN_TO_16 uint8_t idx[8]; SIMDE_ALIGN_TO_16 int8_t r[8]; } test_vec[] = { { { -INT8_C( 29), INT8_C( 100), INT8_C( 31), INT8_C( 119), INT8_C( 4), INT8_C( 25), INT8_C( 119), -INT8_C( 28), -INT8_C( 31), -INT8_C( 76), -INT8_C( 95), -INT8_C( 91), INT8_C( 81), -INT8_C( 115), INT8_C( 77), -INT8_C( 75) }, { UINT8_C( 2), UINT8_C( 10), UINT8_C( 7), UINT8_C( 2), UINT8_C( 14), UINT8_C( 89), UINT8_C( 10), UINT8_C( 1) }, { INT8_C( 31), -INT8_C( 95), -INT8_C( 28), INT8_C( 31), INT8_C( 77), INT8_C( 0), -INT8_C( 95), INT8_C( 100) } }, { { INT8_C( 62), INT8_C( 30), -INT8_C( 17), INT8_C( 66), INT8_C( 56), INT8_C( 102), INT8_C( 38), INT8_C( 25), INT8_C( 26), -INT8_C( 57), -INT8_C( 66), INT8_C( 107), INT8_C( 85), INT8_C( 11), INT8_C( 33), INT8_C( 39) }, { UINT8_C( 5), UINT8_C( 8), UINT8_C( 9), UINT8_C( 4), UINT8_C(130), UINT8_C( 3), UINT8_C(181), UINT8_C( 7) }, { INT8_C( 102), INT8_C( 26), -INT8_C( 57), INT8_C( 56), INT8_C( 0), INT8_C( 66), INT8_C( 0), INT8_C( 25) } }, { { -INT8_C( 20), -INT8_C( 44), -INT8_C( 102), INT8_C( 36), INT8_C( 59), -INT8_C( 64), INT8_C( 61), INT8_C( 85), -INT8_C( 120), -INT8_C( 4), -INT8_C( 63), -INT8_C( 35), INT8_C( 7), -INT8_C( 30), INT8_C( 4), -INT8_C( 83) }, { UINT8_C( 10), UINT8_C( 13), UINT8_C( 1), UINT8_C( 12), UINT8_C( 0), UINT8_C( 6), UINT8_C( 4), UINT8_C( 8) }, { -INT8_C( 63), -INT8_C( 30), -INT8_C( 44), INT8_C( 7), -INT8_C( 20), INT8_C( 61), INT8_C( 59), -INT8_C( 120) } }, { { INT8_C( 78), INT8_C( 79), -INT8_C( 48), -INT8_C( 119), INT8_C( 15), INT8_C( 13), -INT8_C( 34), -INT8_C( 105), INT8_C( 9), -INT8_C( 97), INT8_C( 116), INT8_C( 17), -INT8_C( 127), INT8_C( 120), -INT8_C( 66), -INT8_C( 116) }, { UINT8_C( 5), UINT8_C( 15), UINT8_C( 8), UINT8_C( 6), UINT8_C( 21), UINT8_C( 12), UINT8_C( 14), UINT8_C( 9) }, { INT8_C( 13), -INT8_C( 116), INT8_C( 9), -INT8_C( 34), INT8_C( 0), -INT8_C( 127), -INT8_C( 66), -INT8_C( 97) } }, { { -INT8_C( 69), INT8_C( 107), -INT8_C( 125), -INT8_C( 54), INT8_C( 120), INT8_C( 97), INT8_C( 98), -INT8_C( 126), INT8_C( 1), -INT8_C( 42), -INT8_C( 109), -INT8_C( 126), INT8_C( 79), INT8_C( 81), INT8_C( 14), INT8_C( 116) }, { UINT8_C( 0), UINT8_C( 7), UINT8_C( 10), UINT8_C( 5), UINT8_C( 99), UINT8_C( 8), UINT8_C( 14), UINT8_C( 2) }, { -INT8_C( 69), -INT8_C( 126), -INT8_C( 109), INT8_C( 97), INT8_C( 0), INT8_C( 1), INT8_C( 14), -INT8_C( 125) } }, { { -INT8_C( 121), -INT8_C( 28), INT8_C( 119), INT8_C( 0), INT8_C( 69), -INT8_C( 39), -INT8_C( 126), INT8_C( 70), -INT8_C( 81), INT8_C( 21), -INT8_C( 55), -INT8_C( 2), INT8_C( 102), -INT8_C( 41), INT8_C( 115), -INT8_C( 58) }, { UINT8_C(254), UINT8_C( 13), UINT8_C( 11), UINT8_C( 98), UINT8_C( 6), UINT8_C( 9), UINT8_C( 4), UINT8_C(161) }, { INT8_C( 0), -INT8_C( 41), -INT8_C( 2), INT8_C( 0), -INT8_C( 126), INT8_C( 21), INT8_C( 69), INT8_C( 0) } }, { { INT8_C( 77), -INT8_C( 92), INT8_C( 26), -INT8_C( 110), INT8_C( 125), -INT8_C( 100), -INT8_C( 39), INT8_C( 44), -INT8_C( 79), -INT8_C( 94), INT8_C( 43), INT8_C( 23), INT8_C( 121), -INT8_C( 98), -INT8_C( 35), INT8_C( 120) }, { UINT8_C( 11), UINT8_C( 8), UINT8_C( 10), UINT8_C( 1), UINT8_C( 1), UINT8_C( 14), UINT8_C(130), UINT8_C(176) }, { INT8_C( 23), -INT8_C( 79), INT8_C( 43), -INT8_C( 92), -INT8_C( 92), -INT8_C( 35), INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 2), INT8_C( 11), -INT8_C( 45), INT8_MAX, -INT8_C( 89), -INT8_C( 84), -INT8_C( 85), INT8_C( 88), INT8_C( 78), -INT8_C( 42), INT8_C( 111), -INT8_C( 57), INT8_C( 116), INT8_C( 76), INT8_C( 63), INT8_C( 112) }, { UINT8_C(100), UINT8_C( 9), UINT8_C( 1), UINT8_C( 5), UINT8_C( 8), UINT8_C( 4), UINT8_C( 6), UINT8_C( 12) }, { INT8_C( 0), -INT8_C( 42), INT8_C( 11), -INT8_C( 84), INT8_C( 78), -INT8_C( 89), -INT8_C( 85), INT8_C( 116) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t t = simde_vld1q_s8(test_vec[i].t); simde_uint8x8_t idx = simde_vld1_u8(test_vec[i].idx); simde_int8x8_t r = simde_vqtbl1_s8(t, idx); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t t = simde_test_arm_neon_random_i8x16(); simde_uint8x8_private idx_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] &= 15; } } simde_uint8x8_t idx = simde_uint8x8_from_private(idx_); simde_int8x8_t r = simde_vqtbl1_s8(t, idx); simde_test_arm_neon_write_i8x16(2, t, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqtbl1_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 uint8_t t[16]; SIMDE_ALIGN_TO_16 uint8_t idx[8]; SIMDE_ALIGN_TO_16 uint8_t r[8]; } test_vec[] = { { { UINT8_C( 60), UINT8_C( 20), UINT8_C( 0), UINT8_C(112), UINT8_C(102), UINT8_C( 65), UINT8_C(211), UINT8_C(114), UINT8_C(253), UINT8_C( 6), UINT8_C(129), UINT8_C(166), UINT8_C(153), UINT8_C(197), UINT8_C( 72), UINT8_C(200) }, { UINT8_C(103), UINT8_C( 8), UINT8_C( 65), UINT8_C( 6), UINT8_C( 0), UINT8_C(184), UINT8_C( 12), UINT8_C( 1) }, { UINT8_C( 0), UINT8_C(253), UINT8_C( 0), UINT8_C(211), UINT8_C( 60), UINT8_C( 0), UINT8_C(153), UINT8_C( 20) } }, { { UINT8_C(166), UINT8_C(109), UINT8_C(131), UINT8_C( 12), UINT8_C(174), UINT8_C( 86), UINT8_C(126), UINT8_C(172), UINT8_C( 92), UINT8_MAX, UINT8_C( 82), UINT8_C(245), UINT8_C(196), UINT8_C(154), UINT8_C(189), UINT8_C( 43) }, { UINT8_C( 3), UINT8_C( 15), UINT8_C( 2), UINT8_C( 3), UINT8_C( 7), UINT8_C( 14), UINT8_C( 4), UINT8_C( 2) }, { UINT8_C( 12), UINT8_C( 43), UINT8_C(131), UINT8_C( 12), UINT8_C(172), UINT8_C(189), UINT8_C(174), UINT8_C(131) } }, { { UINT8_C(242), UINT8_C( 87), UINT8_C(169), UINT8_C(160), UINT8_C(173), UINT8_C( 40), UINT8_C( 76), UINT8_C( 9), UINT8_C( 39), UINT8_C(158), UINT8_C(254), UINT8_C(236), UINT8_C( 57), UINT8_C(187), UINT8_C( 23), UINT8_C(204) }, { UINT8_C( 10), UINT8_C( 9), UINT8_C( 15), UINT8_C( 1), UINT8_C( 8), UINT8_C( 4), UINT8_C( 3), UINT8_C( 3) }, { UINT8_C(254), UINT8_C(158), UINT8_C(204), UINT8_C( 87), UINT8_C( 39), UINT8_C(173), UINT8_C(160), UINT8_C(160) } }, { { UINT8_C(191), UINT8_C(215), UINT8_C(131), UINT8_C(108), UINT8_MAX, UINT8_C(207), UINT8_C(117), UINT8_C( 38), UINT8_C(110), UINT8_C(115), UINT8_C( 18), UINT8_C(167), UINT8_C( 47), UINT8_C( 42), UINT8_C(115), UINT8_C(233) }, { UINT8_C( 51), UINT8_C( 2), UINT8_C( 11), UINT8_C( 11), UINT8_C( 6), UINT8_C( 14), UINT8_C( 14), UINT8_C( 0) }, { UINT8_C( 0), UINT8_C(131), UINT8_C(167), UINT8_C(167), UINT8_C(117), UINT8_C(115), UINT8_C(115), UINT8_C(191) } }, { { UINT8_C(213), UINT8_C(112), UINT8_C( 23), UINT8_C(212), UINT8_C( 63), UINT8_C(141), UINT8_C(250), UINT8_C(173), UINT8_C( 0), UINT8_C( 13), UINT8_C( 84), UINT8_C( 47), UINT8_C( 55), UINT8_C(199), UINT8_C( 25), UINT8_C(106) }, { UINT8_C( 10), UINT8_C( 4), UINT8_C( 6), UINT8_C( 0), UINT8_C( 34), UINT8_C(116), UINT8_C( 0), UINT8_C( 12) }, { UINT8_C( 84), UINT8_C( 63), UINT8_C(250), UINT8_C(213), UINT8_C( 0), UINT8_C( 0), UINT8_C(213), UINT8_C( 55) } }, { { UINT8_C(164), UINT8_C(165), UINT8_C(152), UINT8_C(227), UINT8_C( 50), UINT8_C(146), UINT8_C(145), UINT8_C( 51), UINT8_C(159), UINT8_C(229), UINT8_C( 98), UINT8_C(214), UINT8_C(173), UINT8_C(123), UINT8_C( 65), UINT8_C(231) }, { UINT8_C(239), UINT8_C( 7), UINT8_C(199), UINT8_C( 2), UINT8_C( 11), UINT8_C( 8), UINT8_C( 14), UINT8_C( 3) }, { UINT8_C( 0), UINT8_C( 51), UINT8_C( 0), UINT8_C(152), UINT8_C(214), UINT8_C(159), UINT8_C( 65), UINT8_C(227) } }, { { UINT8_C( 73), UINT8_C( 36), UINT8_C( 81), UINT8_C(124), UINT8_C(182), UINT8_C(226), UINT8_C(175), UINT8_C( 86), UINT8_C(199), UINT8_C( 17), UINT8_C( 44), UINT8_C(116), UINT8_C(141), UINT8_C(109), UINT8_C( 91), UINT8_C(124) }, { UINT8_C( 4), UINT8_C( 3), UINT8_C( 14), UINT8_C( 0), UINT8_C( 11), UINT8_C( 13), UINT8_C( 3), UINT8_C( 4) }, { UINT8_C(182), UINT8_C(124), UINT8_C( 91), UINT8_C( 73), UINT8_C(116), UINT8_C(109), UINT8_C(124), UINT8_C(182) } }, { { UINT8_C( 92), UINT8_C(250), UINT8_C( 34), UINT8_C( 18), UINT8_C(220), UINT8_C(209), UINT8_C(104), UINT8_C(164), UINT8_C(227), UINT8_C(149), UINT8_C( 24), UINT8_C(112), UINT8_C( 2), UINT8_C(116), UINT8_C(236), UINT8_C( 55) }, { UINT8_C( 7), UINT8_C( 11), UINT8_C( 7), UINT8_C(162), UINT8_C( 88), UINT8_C( 10), UINT8_C( 6), UINT8_C( 10) }, { UINT8_C(164), UINT8_C(112), UINT8_C(164), UINT8_C( 0), UINT8_C( 0), UINT8_C( 24), UINT8_C(104), UINT8_C( 24) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t t = simde_vld1q_u8(test_vec[i].t); simde_uint8x8_t idx = simde_vld1_u8(test_vec[i].idx); simde_uint8x8_t r = simde_vqtbl1_u8(t, idx); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t t = simde_test_arm_neon_random_u8x16(); simde_uint8x8_private idx_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] &= 15; } } simde_uint8x8_t idx = simde_uint8x8_from_private(idx_); simde_uint8x8_t r = simde_vqtbl1_u8(t, idx); simde_test_arm_neon_write_u8x16(2, t, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #if !defined(SIMDE_BUG_INTEL_857088) static int test_simde_vqtbl2_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 int8_t t[2][16]; SIMDE_ALIGN_TO_16 uint8_t idx[8]; SIMDE_ALIGN_TO_16 int8_t r[8]; } test_vec[] = { { { { INT8_C( 111), -INT8_C( 69), INT8_C( 111), -INT8_C( 97), -INT8_C( 67), INT8_C( 21), -INT8_C( 1), -INT8_C( 16), -INT8_C( 39), INT8_C( 28), -INT8_C( 84), INT8_C( 73), INT8_C( 88), -INT8_C( 42), -INT8_C( 2), INT8_C( 72) }, { INT8_C( 107), INT8_C( 104), -INT8_C( 117), -INT8_C( 55), -INT8_C( 106), -INT8_C( 6), INT8_C( 98), INT8_C( 77), INT8_C( 122), INT8_C( 7), -INT8_C( 123), INT8_C( 86), -INT8_C( 108), INT8_C( 97), -INT8_C( 82), INT8_C( 3) } }, { UINT8_C( 28), UINT8_C( 30), UINT8_C( 2), UINT8_C(218), UINT8_C( 19), UINT8_C( 1), UINT8_C( 10), UINT8_C( 12) }, { -INT8_C( 108), -INT8_C( 82), INT8_C( 111), INT8_C( 0), -INT8_C( 55), -INT8_C( 69), -INT8_C( 84), INT8_C( 88) } }, { { { -INT8_C( 69), -INT8_C( 23), INT8_MIN, INT8_C( 82), -INT8_C( 29), -INT8_C( 30), -INT8_C( 97), INT8_C( 93), -INT8_C( 22), INT8_C( 36), -INT8_C( 77), INT8_C( 126), -INT8_C( 122), INT8_C( 97), -INT8_C( 127), -INT8_C( 94) }, { INT8_MAX, INT8_C( 35), INT8_C( 124), -INT8_C( 78), -INT8_C( 59), INT8_C( 71), -INT8_C( 66), -INT8_C( 126), -INT8_C( 67), INT8_C( 20), -INT8_C( 104), INT8_C( 10), INT8_C( 103), -INT8_C( 10), -INT8_C( 63), INT8_C( 35) } }, { UINT8_C( 31), UINT8_C( 2), UINT8_C( 21), UINT8_C(194), UINT8_C( 4), UINT8_C( 20), UINT8_C( 31), UINT8_C( 14) }, { INT8_C( 35), INT8_MIN, INT8_C( 71), INT8_C( 0), -INT8_C( 29), -INT8_C( 59), INT8_C( 35), -INT8_C( 127) } }, { { { INT8_C( 49), -INT8_C( 34), INT8_C( 101), -INT8_C( 10), INT8_C( 37), INT8_C( 36), INT8_C( 120), -INT8_C( 30), INT8_C( 56), INT8_C( 16), -INT8_C( 20), -INT8_C( 97), INT8_C( 6), -INT8_C( 82), -INT8_C( 62), -INT8_C( 27) }, { -INT8_C( 16), INT8_C( 55), -INT8_C( 89), INT8_C( 20), INT8_C( 76), -INT8_C( 58), INT8_C( 35), -INT8_C( 123), -INT8_C( 104), -INT8_C( 81), INT8_C( 68), -INT8_C( 52), -INT8_C( 67), -INT8_C( 91), INT8_MAX, -INT8_C( 18) } }, { UINT8_C( 3), UINT8_C(228), UINT8_C( 4), UINT8_C( 8), UINT8_C( 8), UINT8_C( 92), UINT8_C( 11), UINT8_C( 0) }, { -INT8_C( 10), INT8_C( 0), INT8_C( 37), INT8_C( 56), INT8_C( 56), INT8_C( 0), -INT8_C( 97), INT8_C( 49) } }, { { { -INT8_C( 38), INT8_C( 0), INT8_C( 42), INT8_C( 38), -INT8_C( 57), INT8_C( 77), -INT8_C( 85), INT8_C( 95), -INT8_C( 4), -INT8_C( 17), INT8_C( 43), -INT8_C( 71), -INT8_C( 108), -INT8_C( 86), -INT8_C( 89), INT8_C( 24) }, { -INT8_C( 113), -INT8_C( 117), -INT8_C( 64), -INT8_C( 105), -INT8_C( 24), INT8_C( 75), -INT8_C( 40), INT8_C( 85), -INT8_C( 61), -INT8_C( 72), -INT8_C( 56), -INT8_C( 24), INT8_C( 90), INT8_C( 33), -INT8_C( 2), INT8_C( 52) } }, { UINT8_C( 2), UINT8_C( 40), UINT8_C( 26), UINT8_C( 9), UINT8_C(117), UINT8_C( 5), UINT8_C( 8), UINT8_C( 17) }, { INT8_C( 42), INT8_C( 0), -INT8_C( 56), -INT8_C( 17), INT8_C( 0), INT8_C( 77), -INT8_C( 4), -INT8_C( 117) } }, { { { INT8_C( 94), INT8_C( 97), INT8_C( 69), INT8_C( 70), -INT8_C( 83), INT8_C( 29), -INT8_C( 101), INT8_C( 112), -INT8_C( 43), INT8_C( 99), INT8_C( 88), INT8_C( 47), -INT8_C( 123), INT8_C( 86), INT8_C( 100), -INT8_C( 89) }, { INT8_C( 126), -INT8_C( 66), -INT8_C( 112), -INT8_C( 13), -INT8_C( 60), -INT8_C( 40), INT8_C( 101), -INT8_C( 72), INT8_C( 76), -INT8_C( 112), INT8_C( 65), INT8_C( 107), INT8_C( 98), -INT8_C( 30), INT8_C( 24), -INT8_C( 64) } }, { UINT8_C( 68), UINT8_C( 29), UINT8_C( 6), UINT8_C( 17), UINT8_C( 26), UINT8_C( 1), UINT8_C( 1), UINT8_C( 15) }, { INT8_C( 0), -INT8_C( 30), -INT8_C( 101), -INT8_C( 66), INT8_C( 65), INT8_C( 97), INT8_C( 97), -INT8_C( 89) } }, { { { -INT8_C( 95), -INT8_C( 63), -INT8_C( 126), INT8_C( 101), -INT8_C( 103), -INT8_C( 25), INT8_C( 30), -INT8_C( 26), INT8_C( 119), INT8_C( 95), INT8_C( 81), -INT8_C( 39), INT8_C( 66), INT8_C( 105), -INT8_C( 102), -INT8_C( 122) }, { -INT8_C( 57), -INT8_C( 96), INT8_C( 119), INT8_C( 65), INT8_C( 66), -INT8_C( 40), -INT8_C( 111), INT8_C( 71), -INT8_C( 111), INT8_C( 16), -INT8_C( 47), -INT8_C( 95), -INT8_C( 13), INT8_C( 2), INT8_C( 48), -INT8_C( 108) } }, { UINT8_C( 3), UINT8_C( 18), UINT8_C(250), UINT8_C( 28), UINT8_C( 25), UINT8_C( 24), UINT8_C( 2), UINT8_C( 16) }, { INT8_C( 101), INT8_C( 119), INT8_C( 0), -INT8_C( 13), INT8_C( 16), -INT8_C( 111), -INT8_C( 126), -INT8_C( 57) } }, { { { INT8_C( 36), -INT8_C( 74), INT8_C( 5), INT8_C( 102), -INT8_C( 114), -INT8_C( 106), -INT8_C( 83), INT8_C( 32), -INT8_C( 90), INT8_C( 126), -INT8_C( 63), -INT8_C( 103), INT8_MIN, -INT8_C( 15), INT8_C( 46), INT8_C( 67) }, { -INT8_C( 93), INT8_C( 40), -INT8_C( 97), INT8_C( 60), INT8_C( 64), -INT8_C( 30), INT8_C( 76), -INT8_C( 73), INT8_C( 117), INT8_C( 54), INT8_C( 113), INT8_C( 114), -INT8_C( 71), -INT8_C( 80), INT8_C( 54), -INT8_C( 35) } }, { UINT8_C( 7), UINT8_C( 60), UINT8_C( 3), UINT8_C( 21), UINT8_C( 18), UINT8_C(240), UINT8_C( 21), UINT8_C( 25) }, { INT8_C( 32), INT8_C( 0), INT8_C( 102), -INT8_C( 30), -INT8_C( 97), INT8_C( 0), -INT8_C( 30), INT8_C( 54) } }, { { { INT8_C( 104), -INT8_C( 47), -INT8_C( 88), -INT8_C( 88), -INT8_C( 77), -INT8_C( 11), INT8_C( 96), INT8_C( 40), INT8_C( 43), -INT8_C( 47), -INT8_C( 101), -INT8_C( 28), -INT8_C( 127), -INT8_C( 47), -INT8_C( 62), -INT8_C( 24) }, { INT8_C( 13), INT8_C( 5), -INT8_C( 34), -INT8_C( 32), -INT8_C( 10), -INT8_C( 13), INT8_C( 89), INT8_C( 100), -INT8_C( 54), INT8_C( 107), INT8_C( 83), -INT8_C( 109), -INT8_C( 84), -INT8_C( 124), -INT8_C( 1), INT8_C( 20) } }, { UINT8_C( 21), UINT8_C( 7), UINT8_C( 29), UINT8_C( 8), UINT8_C( 28), UINT8_C( 29), UINT8_C( 17), UINT8_C(199) }, { -INT8_C( 13), INT8_C( 40), -INT8_C( 124), INT8_C( 43), -INT8_C( 84), -INT8_C( 124), INT8_C( 5), INT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16x2_t t; t.val[0] = simde_vld1q_s8(test_vec[i].t[0]); t.val[1] = simde_vld1q_s8(test_vec[i].t[1]); simde_uint8x8_t idx = simde_vld1_u8(test_vec[i].idx); simde_int8x8_t r = simde_vqtbl2_s8(t, idx); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16x2_t t = simde_test_arm_neon_random_i8x16x2(); simde_uint8x8_private idx_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] &= 31; } } simde_uint8x8_t idx = simde_uint8x8_from_private(idx_); simde_int8x8_t r = simde_vqtbl2_s8(t, idx); simde_test_arm_neon_write_i8x16x2(2, t, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqtbl2_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 uint8_t t[2][16]; SIMDE_ALIGN_TO_16 uint8_t idx[8]; SIMDE_ALIGN_TO_16 uint8_t r[8]; } test_vec[] = { { { { UINT8_C(100), UINT8_C( 13), UINT8_C(102), UINT8_C(107), UINT8_C( 43), UINT8_C(137), UINT8_C( 34), UINT8_C( 34), UINT8_C(186), UINT8_C(204), UINT8_C( 12), UINT8_C(111), UINT8_C(110), UINT8_C( 43), UINT8_C(121), UINT8_C(110) }, { UINT8_C(239), UINT8_C( 67), UINT8_C(177), UINT8_C(201), UINT8_C( 68), UINT8_C( 65), UINT8_C(171), UINT8_C( 23), UINT8_C(115), UINT8_C( 0), UINT8_C( 85), UINT8_C(146), UINT8_C( 33), UINT8_C( 24), UINT8_C(200), UINT8_C(134) } }, { UINT8_C( 5), UINT8_C( 14), UINT8_C( 17), UINT8_C( 16), UINT8_C( 23), UINT8_C( 19), UINT8_C( 19), UINT8_C(113) }, { UINT8_C(137), UINT8_C(121), UINT8_C( 67), UINT8_C(239), UINT8_C( 23), UINT8_C(201), UINT8_C(201), UINT8_C( 0) } }, { { { UINT8_C(157), UINT8_C(108), UINT8_C( 98), UINT8_C(225), UINT8_C(173), UINT8_C( 14), UINT8_C(248), UINT8_C( 32), UINT8_C( 14), UINT8_C( 77), UINT8_C(178), UINT8_C( 47), UINT8_C(101), UINT8_C(122), UINT8_C(181), UINT8_C(139) }, { UINT8_C(169), UINT8_C(166), UINT8_C(219), UINT8_C( 96), UINT8_C(186), UINT8_C( 78), UINT8_C(210), UINT8_C(153), UINT8_C(205), UINT8_C(178), UINT8_C(231), UINT8_C(119), UINT8_C( 12), UINT8_C(162), UINT8_C( 16), UINT8_C(169) } }, { UINT8_C( 15), UINT8_C( 19), UINT8_C( 10), UINT8_C( 28), UINT8_C( 1), UINT8_C(131), UINT8_C( 29), UINT8_C(143) }, { UINT8_C(139), UINT8_C( 96), UINT8_C(178), UINT8_C( 12), UINT8_C(108), UINT8_C( 0), UINT8_C(162), UINT8_C( 0) } }, { { { UINT8_C( 26), UINT8_C(156), UINT8_C( 19), UINT8_C(212), UINT8_C(235), UINT8_C(229), UINT8_C(110), UINT8_C(184), UINT8_C(152), UINT8_C( 85), UINT8_C( 48), UINT8_C(164), UINT8_C(247), UINT8_C( 64), UINT8_C( 77), UINT8_C( 6) }, { UINT8_C(179), UINT8_C(215), UINT8_C(195), UINT8_C( 52), UINT8_C( 90), UINT8_C(160), UINT8_C(195), UINT8_C( 43), UINT8_C( 47), UINT8_C(130), UINT8_C( 97), UINT8_C( 57), UINT8_C(246), UINT8_C( 34), UINT8_C(236), UINT8_C( 16) } }, { UINT8_C(190), UINT8_C( 0), UINT8_C( 5), UINT8_C( 9), UINT8_C( 5), UINT8_C( 83), UINT8_C( 2), UINT8_C( 29) }, { UINT8_C( 0), UINT8_C( 26), UINT8_C(229), UINT8_C( 85), UINT8_C(229), UINT8_C( 0), UINT8_C( 19), UINT8_C( 34) } }, { { { UINT8_C( 70), UINT8_C(105), UINT8_C(186), UINT8_C(160), UINT8_C( 9), UINT8_C(126), UINT8_C(203), UINT8_C( 56), UINT8_C( 0), UINT8_C( 44), UINT8_C(114), UINT8_C(246), UINT8_C( 78), UINT8_C( 94), UINT8_C( 6), UINT8_C( 13) }, { UINT8_C( 94), UINT8_C(235), UINT8_C(182), UINT8_C( 68), UINT8_C( 62), UINT8_C( 24), UINT8_C(193), UINT8_C(230), UINT8_C(170), UINT8_C(227), UINT8_C(134), UINT8_C(125), UINT8_C( 81), UINT8_C( 44), UINT8_C( 3), UINT8_C(151) } }, { UINT8_C(149), UINT8_C( 29), UINT8_C( 24), UINT8_C(158), UINT8_C( 27), UINT8_C( 3), UINT8_C( 22), UINT8_C( 27) }, { UINT8_C( 0), UINT8_C( 44), UINT8_C(170), UINT8_C( 0), UINT8_C(125), UINT8_C(160), UINT8_C(193), UINT8_C(125) } }, { { { UINT8_C( 35), UINT8_C( 66), UINT8_C( 73), UINT8_C( 98), UINT8_C( 90), UINT8_C( 11), UINT8_C( 72), UINT8_C( 5), UINT8_C(238), UINT8_C(206), UINT8_C(130), UINT8_C( 63), UINT8_C(250), UINT8_C(133), UINT8_C(215), UINT8_C(143) }, { UINT8_C( 66), UINT8_C( 15), UINT8_C( 45), UINT8_C(126), UINT8_C( 18), UINT8_C( 4), UINT8_C(185), UINT8_C( 66), UINT8_C( 76), UINT8_C(235), UINT8_C(193), UINT8_C(243), UINT8_C( 35), UINT8_C( 76), UINT8_C(249), UINT8_C( 70) } }, { UINT8_C( 14), UINT8_C( 2), UINT8_C( 8), UINT8_C( 9), UINT8_C( 13), UINT8_C(241), UINT8_C( 14), UINT8_C( 27) }, { UINT8_C(215), UINT8_C( 73), UINT8_C(238), UINT8_C(206), UINT8_C(133), UINT8_C( 0), UINT8_C(215), UINT8_C(243) } }, { { { UINT8_C( 97), UINT8_C(119), UINT8_C(181), UINT8_C(115), UINT8_C(123), UINT8_C(111), UINT8_C(182), UINT8_C(199), UINT8_C( 90), UINT8_C(119), UINT8_C(187), UINT8_C(125), UINT8_C(195), UINT8_C(180), UINT8_C(195), UINT8_C( 82) }, { UINT8_C(246), UINT8_C(108), UINT8_C( 59), UINT8_C( 68), UINT8_C( 93), UINT8_C( 41), UINT8_C(127), UINT8_C( 28), UINT8_C(153), UINT8_C(250), UINT8_C(214), UINT8_C(142), UINT8_C( 76), UINT8_C( 32), UINT8_C(197), UINT8_C(173) } }, { UINT8_C(151), UINT8_C( 27), UINT8_C( 1), UINT8_C( 18), UINT8_C( 10), UINT8_C( 23), UINT8_C( 25), UINT8_C( 4) }, { UINT8_C( 0), UINT8_C(142), UINT8_C(119), UINT8_C( 59), UINT8_C(187), UINT8_C( 28), UINT8_C(250), UINT8_C(123) } }, { { { UINT8_C(240), UINT8_C(158), UINT8_C(131), UINT8_C( 77), UINT8_C(199), UINT8_C( 2), UINT8_C(106), UINT8_C( 96), UINT8_C(253), UINT8_C( 64), UINT8_C(238), UINT8_C( 73), UINT8_C( 96), UINT8_C(180), UINT8_C(247), UINT8_C(247) }, { UINT8_C( 47), UINT8_C( 24), UINT8_C( 9), UINT8_C( 25), UINT8_C(239), UINT8_C(227), UINT8_C( 93), UINT8_C( 61), UINT8_C(119), UINT8_C( 30), UINT8_C( 78), UINT8_C(192), UINT8_C(162), UINT8_C(178), UINT8_MAX, UINT8_C(147) } }, { UINT8_C( 16), UINT8_C( 2), UINT8_C(224), UINT8_C( 24), UINT8_C( 4), UINT8_C( 10), UINT8_C( 24), UINT8_C( 1) }, { UINT8_C( 47), UINT8_C(131), UINT8_C( 0), UINT8_C(119), UINT8_C(199), UINT8_C(238), UINT8_C(119), UINT8_C(158) } }, { { { UINT8_C(218), UINT8_C(236), UINT8_C( 99), UINT8_C(201), UINT8_C(207), UINT8_C(192), UINT8_C( 6), UINT8_C( 71), UINT8_C(222), UINT8_C( 84), UINT8_C( 7), UINT8_C(128), UINT8_C( 6), UINT8_C( 6), UINT8_C( 19), UINT8_C( 87) }, { UINT8_C(136), UINT8_C(244), UINT8_C(111), UINT8_C( 12), UINT8_C( 62), UINT8_C(231), UINT8_C(142), UINT8_C(201), UINT8_C( 78), UINT8_C( 89), UINT8_C(181), UINT8_C(105), UINT8_C( 27), UINT8_C(152), UINT8_C(179), UINT8_C(245) } }, { UINT8_C(132), UINT8_C( 22), UINT8_C(190), UINT8_C( 20), UINT8_C( 22), UINT8_C( 4), UINT8_C(155), UINT8_C( 20) }, { UINT8_C( 0), UINT8_C(142), UINT8_C( 0), UINT8_C( 62), UINT8_C(142), UINT8_C(207), UINT8_C( 0), UINT8_C( 62) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16x2_t t; t.val[0] = simde_vld1q_u8(test_vec[i].t[0]); t.val[1] = simde_vld1q_u8(test_vec[i].t[1]); simde_uint8x8_t idx = simde_vld1_u8(test_vec[i].idx); simde_uint8x8_t r = simde_vqtbl2_u8(t, idx); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16x2_t t = simde_test_arm_neon_random_u8x16x2(); simde_uint8x8_private idx_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] &= 31; } } simde_uint8x8_t idx = simde_uint8x8_from_private(idx_); simde_uint8x8_t r = simde_vqtbl2_u8(t, idx); simde_test_arm_neon_write_u8x16x2(2, t, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqtbl3_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 int8_t t[3][16]; SIMDE_ALIGN_TO_16 uint8_t idx[8]; SIMDE_ALIGN_TO_16 int8_t r[8]; } test_vec[] = { { { { -INT8_C( 89), INT8_C( 74), -INT8_C( 122), INT8_C( 5), -INT8_C( 54), -INT8_C( 125), INT8_C( 83), -INT8_C( 23), -INT8_C( 83), -INT8_C( 23), -INT8_C( 63), INT8_C( 36), -INT8_C( 31), INT8_C( 61), -INT8_C( 38), -INT8_C( 49) }, { INT8_C( 53), INT8_C( 108), -INT8_C( 51), -INT8_C( 96), INT8_C( 38), -INT8_C( 87), -INT8_C( 102), -INT8_C( 70), -INT8_C( 15), INT8_C( 52), -INT8_C( 59), INT8_C( 64), INT8_C( 109), INT8_C( 58), -INT8_C( 48), INT8_C( 21) }, { -INT8_C( 124), INT8_C( 86), INT8_C( 26), INT8_C( 78), -INT8_C( 39), INT8_C( 109), INT8_C( 56), -INT8_C( 121), INT8_C( 86), -INT8_C( 7), -INT8_C( 85), INT8_C( 55), INT8_C( 54), -INT8_C( 123), INT8_C( 7), INT8_C( 107) } }, { UINT8_C( 2), UINT8_C( 20), UINT8_C( 11), UINT8_C( 24), UINT8_C(125), UINT8_C( 22), UINT8_C(210), UINT8_C( 14) }, { -INT8_C( 122), INT8_C( 38), INT8_C( 36), -INT8_C( 15), INT8_C( 0), -INT8_C( 102), INT8_C( 0), -INT8_C( 38) } }, { { { -INT8_C( 43), INT8_C( 119), -INT8_C( 93), -INT8_C( 82), -INT8_C( 28), -INT8_C( 37), INT8_C( 53), INT8_C( 59), -INT8_C( 43), -INT8_C( 32), INT8_C( 114), INT8_C( 11), INT8_C( 102), INT8_C( 121), INT8_C( 119), INT8_C( 88) }, { INT8_C( 77), -INT8_C( 126), INT8_C( 112), -INT8_C( 54), INT8_C( 40), INT8_C( 66), INT8_C( 57), INT8_C( 3), -INT8_C( 39), -INT8_C( 25), INT8_C( 75), -INT8_C( 86), INT8_C( 102), -INT8_C( 88), -INT8_C( 1), INT8_C( 59) }, { INT8_C( 31), -INT8_C( 94), -INT8_C( 23), INT8_C( 3), INT8_C( 126), INT8_C( 31), INT8_C( 62), INT8_C( 83), -INT8_C( 1), -INT8_C( 79), INT8_C( 94), INT8_C( 101), INT8_C( 42), -INT8_C( 43), -INT8_C( 67), INT8_C( 120) } }, { UINT8_C( 40), UINT8_C( 45), UINT8_C( 18), UINT8_C( 32), UINT8_C(111), UINT8_C( 27), UINT8_C( 35), UINT8_C( 24) }, { -INT8_C( 1), -INT8_C( 43), INT8_C( 112), INT8_C( 31), INT8_C( 0), -INT8_C( 86), INT8_C( 3), -INT8_C( 39) } }, { { { -INT8_C( 108), -INT8_C( 19), -INT8_C( 103), INT8_C( 18), INT8_C( 12), -INT8_C( 41), INT8_C( 101), INT8_C( 12), -INT8_C( 120), -INT8_C( 61), INT8_C( 113), -INT8_C( 77), -INT8_C( 103), INT8_C( 47), INT8_C( 43), -INT8_C( 15) }, { INT8_C( 92), INT8_C( 109), INT8_C( 113), -INT8_C( 52), -INT8_C( 23), -INT8_C( 11), INT8_C( 20), INT8_C( 76), -INT8_C( 61), INT8_C( 7), INT8_C( 21), INT8_C( 58), -INT8_C( 8), INT8_C( 25), -INT8_C( 49), -INT8_C( 116) }, { INT8_C( 6), INT8_C( 104), -INT8_C( 98), INT8_C( 19), INT8_C( 64), INT8_C( 3), INT8_C( 31), -INT8_C( 56), -INT8_C( 57), -INT8_C( 112), INT8_C( 123), INT8_C( 96), -INT8_C( 65), -INT8_C( 90), INT8_C( 81), INT8_C( 28) } }, { UINT8_C( 20), UINT8_C( 2), UINT8_C( 40), UINT8_C(253), UINT8_C( 39), UINT8_C( 12), UINT8_C( 25), UINT8_C(123) }, { -INT8_C( 23), -INT8_C( 103), -INT8_C( 57), INT8_C( 0), -INT8_C( 56), -INT8_C( 103), INT8_C( 7), INT8_C( 0) } }, { { { -INT8_C( 19), INT8_C( 39), -INT8_C( 112), INT8_C( 45), INT8_C( 42), -INT8_C( 81), -INT8_C( 11), -INT8_C( 15), INT8_C( 64), INT8_C( 113), INT8_C( 81), -INT8_C( 1), INT8_C( 23), -INT8_C( 94), INT8_C( 27), INT8_C( 43) }, { INT8_C( 101), INT8_C( 3), INT8_C( 40), INT8_C( 28), INT8_C( 0), INT8_C( 113), -INT8_C( 105), INT8_C( 3), -INT8_C( 49), INT8_C( 76), -INT8_C( 1), INT8_C( 70), -INT8_C( 47), -INT8_C( 120), -INT8_C( 60), -INT8_C( 66) }, { -INT8_C( 81), INT8_C( 84), -INT8_C( 21), -INT8_C( 39), INT8_C( 4), -INT8_C( 32), -INT8_C( 53), INT8_C( 68), INT8_C( 81), INT8_C( 28), INT8_C( 67), INT8_C( 105), -INT8_C( 65), INT8_C( 95), -INT8_C( 108), INT8_C( 36) } }, { UINT8_C( 2), UINT8_C(189), UINT8_C( 16), UINT8_C( 2), UINT8_C( 46), UINT8_C(216), UINT8_C(102), UINT8_C(254) }, { -INT8_C( 112), INT8_C( 0), INT8_C( 101), -INT8_C( 112), -INT8_C( 108), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { { INT8_C( 93), -INT8_C( 98), INT8_C( 118), INT8_C( 97), INT8_MAX, INT8_C( 65), -INT8_C( 91), -INT8_C( 48), INT8_C( 93), -INT8_C( 24), INT8_C( 57), INT8_C( 28), INT8_C( 71), -INT8_C( 50), INT8_C( 64), -INT8_C( 86) }, { -INT8_C( 117), -INT8_C( 127), INT8_C( 12), -INT8_C( 71), INT8_C( 89), INT8_C( 114), -INT8_C( 73), INT8_C( 125), -INT8_C( 40), -INT8_C( 4), INT8_C( 115), -INT8_C( 59), INT8_C( 4), INT8_C( 38), INT8_C( 98), INT8_C( 97) }, { -INT8_C( 59), -INT8_C( 40), -INT8_C( 62), INT8_C( 68), INT8_C( 25), INT8_C( 103), INT8_C( 20), INT8_C( 118), INT8_C( 80), INT8_C( 78), -INT8_C( 109), -INT8_C( 105), INT8_C( 28), -INT8_C( 45), INT8_C( 65), -INT8_C( 89) } }, { UINT8_C( 36), UINT8_C( 30), UINT8_C( 0), UINT8_C(173), UINT8_C( 0), UINT8_C( 24), UINT8_C( 43), UINT8_C(152) }, { INT8_C( 25), INT8_C( 98), INT8_C( 93), INT8_C( 0), INT8_C( 93), -INT8_C( 40), -INT8_C( 105), INT8_C( 0) } }, { { { -INT8_C( 104), INT8_C( 60), -INT8_C( 51), -INT8_C( 79), -INT8_C( 92), -INT8_C( 30), INT8_C( 39), -INT8_C( 12), INT8_C( 48), -INT8_C( 70), -INT8_C( 117), INT8_C( 76), -INT8_C( 114), -INT8_C( 51), -INT8_C( 13), -INT8_C( 30) }, { INT8_C( 27), INT8_C( 83), -INT8_C( 112), -INT8_C( 37), INT8_C( 107), -INT8_C( 69), INT8_C( 116), INT8_MAX, INT8_C( 89), -INT8_C( 46), -INT8_C( 104), INT8_C( 29), -INT8_C( 110), INT8_C( 18), -INT8_C( 89), INT8_C( 42) }, { INT8_C( 78), INT8_C( 116), -INT8_C( 37), -INT8_C( 14), INT8_C( 86), INT8_C( 2), -INT8_C( 26), -INT8_C( 122), -INT8_C( 67), INT8_C( 114), -INT8_C( 46), INT8_C( 75), INT8_C( 63), -INT8_C( 59), INT8_C( 45), INT8_C( 90) } }, { UINT8_C( 25), UINT8_C( 45), UINT8_C( 5), UINT8_C( 36), UINT8_C( 24), UINT8_C(169), UINT8_C( 4), UINT8_C( 17) }, { -INT8_C( 46), -INT8_C( 59), -INT8_C( 30), INT8_C( 86), INT8_C( 89), INT8_C( 0), -INT8_C( 92), INT8_C( 83) } }, { { { INT8_C( 10), INT8_C( 18), -INT8_C( 17), INT8_C( 97), INT8_C( 21), -INT8_C( 43), -INT8_C( 25), -INT8_C( 46), INT8_C( 71), -INT8_C( 70), INT8_C( 29), -INT8_C( 122), INT8_MAX, INT8_C( 74), -INT8_C( 32), -INT8_C( 104) }, { INT8_C( 8), INT8_C( 22), INT8_C( 29), INT8_MIN, -INT8_C( 65), INT8_C( 33), INT8_C( 82), INT8_C( 59), -INT8_C( 67), INT8_C( 65), INT8_C( 72), INT8_C( 107), -INT8_C( 41), INT8_MIN, INT8_C( 103), -INT8_C( 31) }, { -INT8_C( 110), INT8_C( 86), INT8_C( 66), -INT8_C( 89), INT8_C( 44), INT8_C( 42), INT8_C( 121), INT8_C( 115), -INT8_C( 28), -INT8_C( 106), -INT8_C( 6), INT8_C( 99), -INT8_C( 31), -INT8_C( 38), -INT8_C( 4), -INT8_C( 23) } }, { UINT8_C(240), UINT8_C( 25), UINT8_C( 9), UINT8_C(176), UINT8_C( 10), UINT8_C( 43), UINT8_C(235), UINT8_C( 7) }, { INT8_C( 0), INT8_C( 65), -INT8_C( 70), INT8_C( 0), INT8_C( 29), INT8_C( 99), INT8_C( 0), -INT8_C( 46) } }, { { { INT8_C( 32), -INT8_C( 9), -INT8_C( 19), INT8_C( 76), INT8_C( 33), INT8_C( 103), -INT8_C( 65), INT8_C( 5), -INT8_C( 3), -INT8_C( 71), INT8_C( 105), -INT8_C( 34), -INT8_C( 108), INT8_C( 101), -INT8_C( 57), -INT8_C( 124) }, { INT8_C( 126), INT8_C( 49), INT8_C( 52), -INT8_C( 72), -INT8_C( 20), INT8_C( 31), -INT8_C( 81), -INT8_C( 23), INT8_C( 83), INT8_C( 17), -INT8_C( 68), INT8_C( 6), -INT8_C( 38), INT8_C( 113), INT8_C( 76), -INT8_C( 6) }, { INT8_C( 105), INT8_C( 58), INT8_C( 70), -INT8_C( 118), -INT8_C( 95), INT8_C( 6), -INT8_C( 112), -INT8_C( 98), -INT8_C( 65), -INT8_C( 7), INT8_C( 125), INT8_C( 83), INT8_C( 94), INT8_C( 68), -INT8_C( 40), -INT8_C( 36) } }, { UINT8_C( 21), UINT8_C( 12), UINT8_C( 4), UINT8_C( 2), UINT8_C( 44), UINT8_C( 19), UINT8_C( 27), UINT8_C( 31) }, { INT8_C( 31), -INT8_C( 108), INT8_C( 33), -INT8_C( 19), INT8_C( 94), -INT8_C( 72), INT8_C( 6), -INT8_C( 6) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16x3_t t; t.val[0] = simde_vld1q_s8(test_vec[i].t[0]); t.val[1] = simde_vld1q_s8(test_vec[i].t[1]); t.val[2] = simde_vld1q_s8(test_vec[i].t[2]); simde_uint8x8_t idx = simde_vld1_u8(test_vec[i].idx); simde_int8x8_t r = simde_vqtbl3_s8(t, idx); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16x3_t t = simde_test_arm_neon_random_i8x16x3(); simde_uint8x8_private idx_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] %= 48; } } simde_uint8x8_t idx = simde_uint8x8_from_private(idx_); simde_int8x8_t r = simde_vqtbl3_s8(t, idx); simde_test_arm_neon_write_i8x16x3(2, t, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqtbl3_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 uint8_t t[3][16]; SIMDE_ALIGN_TO_16 uint8_t idx[8]; SIMDE_ALIGN_TO_16 uint8_t r[8]; } test_vec[] = { { { { UINT8_C(208), UINT8_C( 97), UINT8_C(114), UINT8_C(178), UINT8_C(120), UINT8_C( 19), UINT8_C( 1), UINT8_C(138), UINT8_C(136), UINT8_C( 71), UINT8_C( 4), UINT8_C(251), UINT8_C( 0), UINT8_C(149), UINT8_C(109), UINT8_C( 74) }, { UINT8_C( 90), UINT8_C(147), UINT8_C(103), UINT8_C( 44), UINT8_C(188), UINT8_C(116), UINT8_C(156), UINT8_C(105), UINT8_C(117), UINT8_C(159), UINT8_C( 36), UINT8_C(192), UINT8_C(208), UINT8_C(106), UINT8_C( 53), UINT8_C(160) }, { UINT8_C(203), UINT8_C(167), UINT8_C( 83), UINT8_C( 67), UINT8_C(187), UINT8_C( 84), UINT8_C(205), UINT8_C( 67), UINT8_C(156), UINT8_C(209), UINT8_C( 62), UINT8_C(156), UINT8_C(103), UINT8_C(171), UINT8_C(230), UINT8_C(193) } }, { UINT8_C( 62), UINT8_C( 30), UINT8_C( 45), UINT8_C( 10), UINT8_C(194), UINT8_C( 41), UINT8_C( 3), UINT8_C( 7) }, { UINT8_C( 0), UINT8_C( 53), UINT8_C(171), UINT8_C( 4), UINT8_C( 0), UINT8_C(209), UINT8_C(178), UINT8_C(138) } }, { { { UINT8_C(212), UINT8_C(236), UINT8_C( 1), UINT8_C(143), UINT8_C( 64), UINT8_C(206), UINT8_C(210), UINT8_C(220), UINT8_C(160), UINT8_C( 16), UINT8_C(121), UINT8_C( 7), UINT8_C(187), UINT8_C( 95), UINT8_C(200), UINT8_C(250) }, { UINT8_C(173), UINT8_C(182), UINT8_C(244), UINT8_C(111), UINT8_C( 63), UINT8_C( 88), UINT8_C(166), UINT8_C(104), UINT8_C(224), UINT8_C(158), UINT8_C( 96), UINT8_C(210), UINT8_C(203), UINT8_C(249), UINT8_C(143), UINT8_C(159) }, { UINT8_C(229), UINT8_C(144), UINT8_C( 47), UINT8_C( 38), UINT8_C( 95), UINT8_C( 1), UINT8_C( 2), UINT8_MAX, UINT8_C( 18), UINT8_C(123), UINT8_C( 6), UINT8_C(205), UINT8_C(219), UINT8_C(206), UINT8_C(199), UINT8_C(136) } }, { UINT8_C( 36), UINT8_C( 44), UINT8_C( 8), UINT8_C( 4), UINT8_C( 20), UINT8_C( 14), UINT8_C( 44), UINT8_C( 4) }, { UINT8_C( 95), UINT8_C(219), UINT8_C(160), UINT8_C( 64), UINT8_C( 63), UINT8_C(200), UINT8_C(219), UINT8_C( 64) } }, { { { UINT8_C(230), UINT8_C(214), UINT8_C(145), UINT8_C( 69), UINT8_C(215), UINT8_C(148), UINT8_C( 68), UINT8_C(233), UINT8_C( 15), UINT8_C( 74), UINT8_C(183), UINT8_C(234), UINT8_C( 24), UINT8_C(126), UINT8_C(115), UINT8_C(157) }, { UINT8_C( 58), UINT8_C(107), UINT8_C( 97), UINT8_C( 78), UINT8_C( 9), UINT8_C(141), UINT8_C( 66), UINT8_C( 70), UINT8_C( 25), UINT8_C( 8), UINT8_C( 77), UINT8_C(159), UINT8_C( 94), UINT8_C(244), UINT8_C( 11), UINT8_C( 68) }, { UINT8_C(202), UINT8_C(156), UINT8_C(137), UINT8_C(162), UINT8_C( 48), UINT8_C(205), UINT8_C(139), UINT8_C( 64), UINT8_C( 23), UINT8_C( 66), UINT8_C( 42), UINT8_C( 47), UINT8_C(193), UINT8_C(157), UINT8_C(204), UINT8_C(251) } }, { UINT8_C( 8), UINT8_C( 45), UINT8_C( 74), UINT8_C( 18), UINT8_C(186), UINT8_C( 44), UINT8_C( 40), UINT8_C( 20) }, { UINT8_C( 15), UINT8_C(157), UINT8_C( 0), UINT8_C( 97), UINT8_C( 0), UINT8_C(193), UINT8_C( 23), UINT8_C( 9) } }, { { { UINT8_C( 27), UINT8_C(192), UINT8_C( 6), UINT8_C( 75), UINT8_C(141), UINT8_C(146), UINT8_C(139), UINT8_C(164), UINT8_C(212), UINT8_C(182), UINT8_C(211), UINT8_C(149), UINT8_C( 83), UINT8_C(160), UINT8_C(145), UINT8_C( 92) }, { UINT8_C(205), UINT8_C(219), UINT8_C(110), UINT8_C(136), UINT8_C(103), UINT8_C(198), UINT8_C( 92), UINT8_C(252), UINT8_C(107), UINT8_C(207), UINT8_C(239), UINT8_C( 5), UINT8_C( 78), UINT8_C( 38), UINT8_C(106), UINT8_C(105) }, { UINT8_C(230), UINT8_C(112), UINT8_C(180), UINT8_C(115), UINT8_C( 2), UINT8_C( 64), UINT8_C( 23), UINT8_C(215), UINT8_C(246), UINT8_C(235), UINT8_C(108), UINT8_C( 73), UINT8_C(139), UINT8_C(253), UINT8_C(165), UINT8_C( 88) } }, { UINT8_C( 24), UINT8_C( 19), UINT8_C( 32), UINT8_C( 16), UINT8_C( 25), UINT8_C( 12), UINT8_C( 12), UINT8_C( 21) }, { UINT8_C(107), UINT8_C(136), UINT8_C(230), UINT8_C(205), UINT8_C(207), UINT8_C( 83), UINT8_C( 83), UINT8_C(198) } }, { { { UINT8_C( 37), UINT8_C(119), UINT8_C(172), UINT8_C( 39), UINT8_C(183), UINT8_C(196), UINT8_C(254), UINT8_C(173), UINT8_C(175), UINT8_C(107), UINT8_C(247), UINT8_C( 58), UINT8_C(104), UINT8_C(156), UINT8_C(146), UINT8_C( 65) }, { UINT8_C(176), UINT8_C(115), UINT8_C(129), UINT8_C(137), UINT8_C(175), UINT8_C(189), UINT8_C(206), UINT8_C(187), UINT8_C(233), UINT8_C( 25), UINT8_C( 21), UINT8_C( 60), UINT8_C(205), UINT8_C(216), UINT8_C(117), UINT8_C(242) }, { UINT8_C( 80), UINT8_C( 33), UINT8_C( 26), UINT8_C( 7), UINT8_C(229), UINT8_C( 24), UINT8_C(181), UINT8_C(148), UINT8_C(131), UINT8_C(172), UINT8_C(206), UINT8_C(236), UINT8_C( 72), UINT8_C( 97), UINT8_C( 45), UINT8_C(248) } }, { UINT8_C( 20), UINT8_C( 30), UINT8_C( 34), UINT8_C( 35), UINT8_C( 11), UINT8_C( 32), UINT8_C( 15), UINT8_C( 37) }, { UINT8_C(175), UINT8_C(117), UINT8_C( 26), UINT8_C( 7), UINT8_C( 58), UINT8_C( 80), UINT8_C( 65), UINT8_C( 24) } }, { { { UINT8_C( 39), UINT8_C( 67), UINT8_C(132), UINT8_C( 13), UINT8_C( 92), UINT8_C( 57), UINT8_C(161), UINT8_C(223), UINT8_C(229), UINT8_C(112), UINT8_C(203), UINT8_C( 46), UINT8_C(209), UINT8_C(248), UINT8_C( 38), UINT8_C(165) }, { UINT8_C(166), UINT8_C(168), UINT8_C( 40), UINT8_C( 18), UINT8_C(249), UINT8_C(103), UINT8_C(103), UINT8_C( 98), UINT8_C(188), UINT8_C(248), UINT8_C(153), UINT8_C(233), UINT8_C(254), UINT8_C(195), UINT8_C(102), UINT8_C( 37) }, { UINT8_C( 6), UINT8_C(234), UINT8_C( 50), UINT8_C( 98), UINT8_C( 36), UINT8_C(212), UINT8_C( 66), UINT8_C( 9), UINT8_C( 68), UINT8_C( 13), UINT8_C( 55), UINT8_C( 21), UINT8_C( 6), UINT8_C( 94), UINT8_C(186), UINT8_C(172) } }, { UINT8_C( 6), UINT8_C( 34), UINT8_C( 46), UINT8_MAX, UINT8_C( 74), UINT8_C( 37), UINT8_C( 2), UINT8_C( 6) }, { UINT8_C(161), UINT8_C( 50), UINT8_C(186), UINT8_C( 0), UINT8_C( 0), UINT8_C(212), UINT8_C(132), UINT8_C(161) } }, { { { UINT8_C( 63), UINT8_C(115), UINT8_C( 39), UINT8_C( 99), UINT8_C( 71), UINT8_C(105), UINT8_C(109), UINT8_C(139), UINT8_C(119), UINT8_C(164), UINT8_C(160), UINT8_C(125), UINT8_C( 2), UINT8_C( 90), UINT8_C( 41), UINT8_C( 9) }, { UINT8_C( 61), UINT8_C(232), UINT8_C( 8), UINT8_C(135), UINT8_C( 13), UINT8_C(106), UINT8_C(141), UINT8_C( 43), UINT8_C(102), UINT8_C(124), UINT8_C( 70), UINT8_C( 36), UINT8_C(209), UINT8_C(135), UINT8_C(233), UINT8_C( 16) }, { UINT8_C(251), UINT8_C( 17), UINT8_C(116), UINT8_C( 66), UINT8_C(122), UINT8_C(225), UINT8_C(206), UINT8_C(241), UINT8_C(133), UINT8_C(110), UINT8_C(110), UINT8_C(136), UINT8_C(201), UINT8_C(152), UINT8_C(145), UINT8_C( 6) } }, { UINT8_C( 32), UINT8_C( 9), UINT8_C( 45), UINT8_C( 45), UINT8_C( 4), UINT8_C( 26), UINT8_C( 40), UINT8_C(106) }, { UINT8_C(251), UINT8_C(164), UINT8_C(152), UINT8_C(152), UINT8_C( 71), UINT8_C( 70), UINT8_C(133), UINT8_C( 0) } }, { { { UINT8_C(137), UINT8_C(235), UINT8_C(196), UINT8_C( 3), UINT8_C(204), UINT8_C(146), UINT8_C(245), UINT8_C( 82), UINT8_C( 0), UINT8_C( 99), UINT8_C(218), UINT8_C(201), UINT8_C(251), UINT8_C(107), UINT8_C(207), UINT8_C(123) }, { UINT8_C( 4), UINT8_C( 92), UINT8_C( 9), UINT8_C( 8), UINT8_C(118), UINT8_C(193), UINT8_C(114), UINT8_C( 12), UINT8_C(192), UINT8_C( 1), UINT8_C(115), UINT8_C( 71), UINT8_C(121), UINT8_C(235), UINT8_C(200), UINT8_C( 2) }, { UINT8_C(214), UINT8_C(140), UINT8_C( 5), UINT8_C(163), UINT8_C( 30), UINT8_C(250), UINT8_C(245), UINT8_C( 31), UINT8_C( 94), UINT8_C(207), UINT8_C(232), UINT8_C( 89), UINT8_C( 58), UINT8_C(184), UINT8_C(213), UINT8_C( 62) } }, { UINT8_C( 20), UINT8_C( 30), UINT8_C( 23), UINT8_C( 43), UINT8_C( 15), UINT8_C( 41), UINT8_C( 7), UINT8_C( 0) }, { UINT8_C(118), UINT8_C(200), UINT8_C( 12), UINT8_C( 89), UINT8_C(123), UINT8_C(207), UINT8_C( 82), UINT8_C(137) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16x3_t t; t.val[0] = simde_vld1q_u8(test_vec[i].t[0]); t.val[1] = simde_vld1q_u8(test_vec[i].t[1]); t.val[2] = simde_vld1q_u8(test_vec[i].t[2]); simde_uint8x8_t idx = simde_vld1_u8(test_vec[i].idx); simde_uint8x8_t r = simde_vqtbl3_u8(t, idx); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16x3_t t = simde_test_arm_neon_random_u8x16x3(); simde_uint8x8_private idx_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] %= 48; } } simde_uint8x8_t idx = simde_uint8x8_from_private(idx_); simde_uint8x8_t r = simde_vqtbl3_u8(t, idx); simde_test_arm_neon_write_u8x16x3(2, t, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqtbl4_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 int8_t t[4][16]; SIMDE_ALIGN_TO_16 uint8_t idx[8]; SIMDE_ALIGN_TO_16 int8_t r[8]; } test_vec[] = { { { { INT8_C( 97), INT8_C( 11), INT8_C( 79), -INT8_C( 105), -INT8_C( 26), -INT8_C( 117), -INT8_C( 75), INT8_C( 124), -INT8_C( 52), -INT8_C( 21), INT8_C( 43), -INT8_C( 1), INT8_C( 105), INT8_C( 94), -INT8_C( 42), INT8_C( 42) }, { -INT8_C( 12), INT8_C( 5), -INT8_C( 14), INT8_C( 1), -INT8_C( 5), INT8_C( 49), -INT8_C( 102), INT8_C( 78), -INT8_C( 11), -INT8_C( 49), INT8_C( 119), INT8_C( 100), -INT8_C( 85), INT8_C( 113), INT8_C( 44), INT8_C( 12) }, { INT8_C( 124), INT8_C( 123), -INT8_C( 93), INT8_C( 99), INT8_C( 7), INT8_C( 88), -INT8_C( 33), -INT8_C( 45), INT8_C( 67), INT8_C( 10), -INT8_C( 46), -INT8_C( 84), INT8_C( 104), -INT8_C( 87), -INT8_C( 41), INT8_C( 92) }, { -INT8_C( 82), -INT8_C( 55), INT8_C( 94), -INT8_C( 87), -INT8_C( 5), -INT8_C( 8), -INT8_C( 9), -INT8_C( 16), -INT8_C( 57), INT8_C( 110), INT8_C( 84), INT8_C( 114), -INT8_C( 33), -INT8_C( 127), INT8_MAX, INT8_C( 92) } }, { UINT8_C( 60), UINT8_C( 34), UINT8_C( 63), UINT8_C( 3), UINT8_C( 59), UINT8_C( 30), UINT8_C( 23), UINT8_C( 62) }, { -INT8_C( 33), -INT8_C( 93), INT8_C( 92), -INT8_C( 105), INT8_C( 114), INT8_C( 44), INT8_C( 78), INT8_MAX } }, { { { INT8_C( 11), -INT8_C( 53), -INT8_C( 87), INT8_C( 6), -INT8_C( 61), -INT8_C( 95), -INT8_C( 10), -INT8_C( 117), INT8_C( 15), INT8_C( 75), -INT8_C( 3), -INT8_C( 17), -INT8_C( 52), INT8_C( 124), INT8_C( 75), -INT8_C( 56) }, { -INT8_C( 97), INT8_C( 10), -INT8_C( 52), INT8_C( 26), -INT8_C( 88), -INT8_C( 93), -INT8_C( 40), INT8_C( 80), INT8_C( 76), INT8_C( 67), INT8_C( 96), -INT8_C( 97), -INT8_C( 123), -INT8_C( 51), -INT8_C( 97), -INT8_C( 111) }, { -INT8_C( 104), INT8_C( 73), -INT8_C( 105), INT8_C( 92), -INT8_C( 22), -INT8_C( 114), -INT8_C( 25), -INT8_C( 7), -INT8_C( 39), -INT8_C( 28), -INT8_C( 24), -INT8_C( 91), INT8_C( 97), INT8_C( 51), INT8_C( 109), INT8_C( 0) }, { INT8_C( 61), INT8_C( 57), INT8_C( 26), -INT8_C( 27), -INT8_C( 36), -INT8_C( 14), INT8_C( 53), INT8_C( 41), INT8_C( 54), -INT8_C( 106), -INT8_C( 56), -INT8_C( 69), INT8_C( 99), INT8_C( 103), INT8_C( 76), -INT8_C( 4) } }, { UINT8_C( 48), UINT8_C( 36), UINT8_C( 24), UINT8_C( 26), UINT8_C(114), UINT8_C( 63), UINT8_C( 20), UINT8_C( 11) }, { INT8_C( 61), -INT8_C( 22), INT8_C( 76), INT8_C( 96), INT8_C( 0), -INT8_C( 4), -INT8_C( 88), -INT8_C( 17) } }, { { { -INT8_C( 105), -INT8_C( 98), -INT8_C( 45), INT8_C( 115), -INT8_C( 111), INT8_C( 8), -INT8_C( 100), -INT8_C( 57), -INT8_C( 98), INT8_C( 100), -INT8_C( 126), INT8_C( 2), -INT8_C( 52), -INT8_C( 49), -INT8_C( 2), INT8_C( 124) }, { -INT8_C( 77), INT8_C( 86), INT8_C( 23), INT8_C( 37), -INT8_C( 107), -INT8_C( 85), INT8_C( 112), -INT8_C( 72), INT8_C( 39), INT8_C( 96), INT8_C( 61), -INT8_C( 41), -INT8_C( 67), -INT8_C( 63), -INT8_C( 59), INT8_C( 84) }, { INT8_C( 96), -INT8_C( 104), -INT8_C( 56), -INT8_C( 15), -INT8_C( 96), INT8_C( 100), -INT8_C( 72), INT8_C( 63), -INT8_C( 55), INT8_C( 58), INT8_C( 65), -INT8_C( 107), INT8_C( 9), INT8_C( 63), INT8_C( 17), -INT8_C( 68) }, { -INT8_C( 107), INT8_C( 40), -INT8_C( 31), INT8_C( 42), -INT8_C( 45), INT8_C( 81), -INT8_C( 30), -INT8_C( 5), -INT8_C( 79), INT8_C( 31), -INT8_C( 46), INT8_C( 111), -INT8_C( 31), -INT8_C( 105), -INT8_C( 61), INT8_C( 65) } }, { UINT8_C( 47), UINT8_C(139), UINT8_C( 50), UINT8_C( 16), UINT8_C( 48), UINT8_C( 42), UINT8_C( 15), UINT8_C( 57) }, { -INT8_C( 68), INT8_C( 0), -INT8_C( 31), -INT8_C( 77), -INT8_C( 107), INT8_C( 65), INT8_C( 124), INT8_C( 31) } }, { { { -INT8_C( 120), -INT8_C( 52), INT8_C( 78), INT8_C( 91), INT8_C( 29), INT8_C( 48), INT8_C( 86), -INT8_C( 49), INT8_C( 80), INT8_C( 41), INT8_C( 62), INT8_C( 49), -INT8_C( 64), INT8_C( 1), INT8_C( 114), -INT8_C( 16) }, { -INT8_C( 115), -INT8_C( 92), -INT8_C( 64), INT8_C( 125), -INT8_C( 114), -INT8_C( 49), INT8_C( 54), -INT8_C( 78), INT8_C( 31), -INT8_C( 124), -INT8_C( 32), -INT8_C( 82), -INT8_C( 29), -INT8_C( 53), -INT8_C( 46), INT8_C( 107) }, { -INT8_C( 105), INT8_C( 32), -INT8_C( 57), -INT8_C( 76), INT8_C( 80), INT8_C( 29), -INT8_C( 125), -INT8_C( 96), INT8_C( 70), -INT8_C( 63), -INT8_C( 47), INT8_C( 7), -INT8_C( 61), INT8_C( 67), -INT8_C( 9), INT8_C( 80) }, { -INT8_C( 25), -INT8_C( 73), -INT8_C( 51), INT8_C( 117), -INT8_C( 122), INT8_C( 3), INT8_C( 40), -INT8_C( 91), -INT8_C( 121), INT8_C( 8), INT8_C( 83), INT8_C( 106), -INT8_C( 45), INT8_C( 37), -INT8_C( 42), INT8_C( 106) } }, { UINT8_C( 5), UINT8_C( 29), UINT8_C( 31), UINT8_C( 21), UINT8_C( 58), UINT8_C( 34), UINT8_C( 54), UINT8_C( 1) }, { INT8_C( 48), -INT8_C( 53), INT8_C( 107), -INT8_C( 49), INT8_C( 83), -INT8_C( 57), INT8_C( 40), -INT8_C( 52) } }, { { { -INT8_C( 74), INT8_C( 68), -INT8_C( 88), INT8_C( 60), INT8_C( 71), -INT8_C( 48), -INT8_C( 31), -INT8_C( 50), -INT8_C( 40), INT8_C( 52), INT8_C( 56), -INT8_C( 84), INT8_C( 89), INT8_C( 14), INT8_C( 22), -INT8_C( 98) }, { -INT8_C( 85), INT8_C( 53), INT8_C( 51), INT8_C( 102), -INT8_C( 40), INT8_C( 105), INT8_C( 103), INT8_C( 60), INT8_C( 113), INT8_C( 111), INT8_C( 99), -INT8_C( 68), INT8_C( 110), -INT8_C( 38), -INT8_C( 18), INT8_C( 36) }, { INT8_C( 30), -INT8_C( 106), INT8_C( 96), INT8_C( 101), INT8_C( 102), INT8_C( 65), INT8_C( 51), INT8_C( 63), INT8_C( 117), INT8_C( 107), -INT8_C( 21), -INT8_C( 50), INT8_C( 122), INT8_C( 1), INT8_C( 108), INT8_C( 37) }, { INT8_C( 55), -INT8_C( 97), -INT8_C( 117), INT8_C( 15), INT8_C( 9), -INT8_C( 14), INT8_C( 75), INT8_C( 122), INT8_C( 97), -INT8_C( 82), INT8_C( 54), -INT8_C( 49), -INT8_C( 120), INT8_C( 36), -INT8_C( 13), -INT8_C( 90) } }, { UINT8_C( 59), UINT8_C( 19), UINT8_C( 11), UINT8_C( 33), UINT8_C( 20), UINT8_C( 62), UINT8_C( 32), UINT8_C( 9) }, { -INT8_C( 49), INT8_C( 102), -INT8_C( 84), -INT8_C( 106), -INT8_C( 40), -INT8_C( 13), INT8_C( 30), INT8_C( 52) } }, { { { -INT8_C( 29), -INT8_C( 44), -INT8_C( 109), -INT8_C( 20), -INT8_C( 57), -INT8_C( 34), INT8_C( 102), INT8_C( 40), -INT8_C( 116), -INT8_C( 100), -INT8_C( 8), INT8_C( 20), -INT8_C( 64), -INT8_C( 21), -INT8_C( 70), INT8_C( 123) }, { INT8_C( 63), -INT8_C( 59), -INT8_C( 99), -INT8_C( 45), INT8_C( 3), -INT8_C( 3), -INT8_C( 35), -INT8_C( 84), INT8_C( 73), -INT8_C( 76), -INT8_C( 48), -INT8_C( 106), -INT8_C( 8), INT8_C( 25), INT8_C( 26), -INT8_C( 37) }, { -INT8_C( 19), -INT8_C( 83), -INT8_C( 57), -INT8_C( 76), -INT8_C( 117), INT8_C( 45), -INT8_C( 35), INT8_C( 23), -INT8_C( 55), -INT8_C( 43), INT8_C( 43), -INT8_C( 119), -INT8_C( 64), -INT8_C( 27), INT8_C( 5), -INT8_C( 1) }, { -INT8_C( 86), -INT8_C( 94), -INT8_C( 45), -INT8_C( 83), -INT8_C( 97), -INT8_C( 80), INT8_C( 89), -INT8_C( 24), INT8_C( 100), INT8_C( 41), INT8_C( 126), INT8_C( 92), INT8_C( 66), -INT8_C( 104), INT8_C( 55), INT8_C( 48) } }, { UINT8_C( 5), UINT8_C(254), UINT8_C( 36), UINT8_C( 16), UINT8_C( 43), UINT8_C( 1), UINT8_C( 39), UINT8_C( 52) }, { -INT8_C( 34), INT8_C( 0), -INT8_C( 117), INT8_C( 63), -INT8_C( 119), -INT8_C( 44), INT8_C( 23), -INT8_C( 97) } }, { { { INT8_C( 37), INT8_C( 41), INT8_C( 78), -INT8_C( 60), -INT8_C( 39), -INT8_C( 88), -INT8_C( 83), INT8_C( 62), -INT8_C( 47), INT8_C( 43), -INT8_C( 102), INT8_C( 20), -INT8_C( 60), -INT8_C( 46), INT8_C( 68), INT8_C( 9) }, { -INT8_C( 48), INT8_C( 40), -INT8_C( 38), -INT8_C( 4), -INT8_C( 22), -INT8_C( 63), -INT8_C( 16), INT8_MIN, -INT8_C( 44), INT8_C( 110), -INT8_C( 41), -INT8_C( 53), -INT8_C( 15), INT8_C( 46), INT8_C( 109), INT8_C( 22) }, { INT8_C( 87), -INT8_C( 69), -INT8_C( 37), INT8_C( 49), INT8_C( 99), -INT8_C( 120), INT8_C( 111), INT8_C( 53), -INT8_C( 77), INT8_C( 9), INT8_C( 73), INT8_C( 119), -INT8_C( 37), -INT8_C( 115), -INT8_C( 127), -INT8_C( 84) }, { -INT8_C( 75), INT8_C( 91), -INT8_C( 88), -INT8_C( 97), INT8_C( 28), -INT8_C( 104), INT8_C( 32), -INT8_C( 16), INT8_C( 7), -INT8_C( 9), -INT8_C( 68), -INT8_C( 8), INT8_C( 37), INT8_C( 41), INT8_C( 15), INT8_C( 125) } }, { UINT8_C( 36), UINT8_C( 42), UINT8_C( 46), UINT8_C( 72), UINT8_C( 50), UINT8_C( 29), UINT8_C( 61), UINT8_C( 37) }, { INT8_C( 99), INT8_C( 73), -INT8_C( 127), INT8_C( 0), -INT8_C( 88), INT8_C( 46), INT8_C( 41), -INT8_C( 120) } }, { { { INT8_C( 121), INT8_C( 86), -INT8_C( 88), -INT8_C( 107), -INT8_C( 18), -INT8_C( 56), -INT8_C( 122), -INT8_C( 11), -INT8_C( 65), INT8_C( 66), -INT8_C( 18), -INT8_C( 27), INT8_C( 107), -INT8_C( 3), INT8_C( 98), INT8_C( 79) }, { -INT8_C( 25), INT8_C( 16), -INT8_C( 105), INT8_C( 89), INT8_C( 45), INT8_C( 20), INT8_C( 126), INT8_C( 83), -INT8_C( 38), INT8_C( 27), INT8_C( 85), INT8_C( 45), INT8_C( 57), INT8_C( 3), INT8_C( 54), -INT8_C( 78) }, { INT8_C( 89), -INT8_C( 34), INT8_C( 72), INT8_C( 72), -INT8_C( 90), -INT8_C( 50), INT8_C( 61), INT8_C( 101), INT8_C( 16), INT8_C( 43), INT8_C( 74), INT8_C( 123), INT8_C( 40), -INT8_C( 84), -INT8_C( 54), INT8_C( 15) }, { -INT8_C( 68), INT8_C( 98), INT8_C( 104), -INT8_C( 23), INT8_C( 118), -INT8_C( 25), INT8_C( 61), INT8_C( 81), INT8_C( 2), -INT8_C( 110), INT8_C( 126), INT8_C( 60), -INT8_C( 106), -INT8_C( 76), -INT8_C( 18), -INT8_C( 17) } }, { UINT8_C( 18), UINT8_C( 54), UINT8_C( 55), UINT8_C( 56), UINT8_C( 4), UINT8_C( 53), UINT8_C( 30), UINT8_C( 20) }, { -INT8_C( 105), INT8_C( 61), INT8_C( 81), INT8_C( 2), -INT8_C( 18), -INT8_C( 25), INT8_C( 54), INT8_C( 45) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16x4_t t; t.val[0] = simde_vld1q_s8(test_vec[i].t[0]); t.val[1] = simde_vld1q_s8(test_vec[i].t[1]); t.val[2] = simde_vld1q_s8(test_vec[i].t[2]); t.val[3] = simde_vld1q_s8(test_vec[i].t[3]); simde_uint8x8_t idx = simde_vld1_u8(test_vec[i].idx); simde_int8x8_t r = simde_vqtbl4_s8(t, idx); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16x4_t t = simde_test_arm_neon_random_i8x16x4(); simde_uint8x8_private idx_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] &= 63; } } simde_uint8x8_t idx = simde_uint8x8_from_private(idx_); simde_int8x8_t r = simde_vqtbl4_s8(t, idx); simde_test_arm_neon_write_i8x16x4(2, t, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqtbl4_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 uint8_t t[4][16]; SIMDE_ALIGN_TO_16 uint8_t idx[8]; SIMDE_ALIGN_TO_16 uint8_t r[8]; } test_vec[] = { { { { UINT8_C( 73), UINT8_C(147), UINT8_C( 18), UINT8_C(240), UINT8_C(193), UINT8_C( 95), UINT8_C( 22), UINT8_C(176), UINT8_C( 28), UINT8_C( 92), UINT8_C(226), UINT8_C( 93), UINT8_C(217), UINT8_C(166), UINT8_C(186), UINT8_C(146) }, { UINT8_C( 26), UINT8_C(238), UINT8_C( 0), UINT8_C(185), UINT8_C(120), UINT8_C(178), UINT8_C(101), UINT8_C( 19), UINT8_C( 8), UINT8_C( 27), UINT8_C(113), UINT8_C(182), UINT8_C(189), UINT8_C(192), UINT8_C( 36), UINT8_C( 6) }, { UINT8_C( 84), UINT8_C( 54), UINT8_C(246), UINT8_C( 21), UINT8_C(149), UINT8_C( 12), UINT8_C(197), UINT8_C(177), UINT8_C(104), UINT8_C(167), UINT8_C( 14), UINT8_C( 65), UINT8_C( 77), UINT8_C(200), UINT8_C(212), UINT8_C(103) }, { UINT8_C(182), UINT8_C(212), UINT8_C( 32), UINT8_C( 47), UINT8_C(135), UINT8_C(134), UINT8_C( 66), UINT8_C(143), UINT8_C(161), UINT8_C(179), UINT8_C( 70), UINT8_C( 95), UINT8_C(115), UINT8_C(106), UINT8_C(101), UINT8_C(199) } }, { UINT8_C( 32), UINT8_C( 92), UINT8_C( 28), UINT8_C( 53), UINT8_C( 40), UINT8_C(161), UINT8_C( 38), UINT8_C( 17) }, { UINT8_C( 84), UINT8_C( 0), UINT8_C(189), UINT8_C(134), UINT8_C(104), UINT8_C( 0), UINT8_C(197), UINT8_C(238) } }, { { { UINT8_C(187), UINT8_C( 30), UINT8_C(162), UINT8_C( 66), UINT8_C(164), UINT8_C(228), UINT8_C(209), UINT8_C( 70), UINT8_C(151), UINT8_C( 23), UINT8_C(165), UINT8_C( 10), UINT8_C(129), UINT8_C( 10), UINT8_C(210), UINT8_C( 33) }, { UINT8_C(102), UINT8_C(174), UINT8_C( 86), UINT8_C(207), UINT8_C( 80), UINT8_C( 60), UINT8_C(160), UINT8_C(153), UINT8_C( 48), UINT8_C(178), UINT8_C( 47), UINT8_C(237), UINT8_C(153), UINT8_C( 45), UINT8_C( 96), UINT8_C( 84) }, { UINT8_C( 76), UINT8_C( 2), UINT8_C(150), UINT8_C(240), UINT8_C(230), UINT8_C(103), UINT8_C( 54), UINT8_C(125), UINT8_C(127), UINT8_C(219), UINT8_C(135), UINT8_C( 0), UINT8_C(230), UINT8_C( 89), UINT8_C( 34), UINT8_C( 76) }, { UINT8_C( 8), UINT8_C(120), UINT8_C( 27), UINT8_C( 88), UINT8_C(181), UINT8_C(187), UINT8_C(241), UINT8_C(229), UINT8_C(110), UINT8_C( 32), UINT8_C(210), UINT8_C( 7), UINT8_C( 78), UINT8_C( 50), UINT8_C( 91), UINT8_C(154) } }, { UINT8_C( 52), UINT8_C( 49), UINT8_C(138), UINT8_C( 26), UINT8_C( 24), UINT8_C( 1), UINT8_C( 23), UINT8_C( 23) }, { UINT8_C(181), UINT8_C(120), UINT8_C( 0), UINT8_C( 47), UINT8_C( 48), UINT8_C( 30), UINT8_C(153), UINT8_C(153) } }, { { { UINT8_C(114), UINT8_C(234), UINT8_C(216), UINT8_C( 39), UINT8_C(166), UINT8_C(201), UINT8_C( 13), UINT8_C( 20), UINT8_C(234), UINT8_C(223), UINT8_C( 27), UINT8_C( 56), UINT8_C( 18), UINT8_C(118), UINT8_C(210), UINT8_C( 70) }, { UINT8_C(103), UINT8_C( 92), UINT8_C( 97), UINT8_C(191), UINT8_C( 29), UINT8_C(248), UINT8_C(151), UINT8_C(186), UINT8_C( 23), UINT8_C(111), UINT8_C( 60), UINT8_C(144), UINT8_C(105), UINT8_C( 11), UINT8_C( 16), UINT8_C(219) }, { UINT8_C(246), UINT8_C(233), UINT8_C( 3), UINT8_C(156), UINT8_C(178), UINT8_C( 16), UINT8_C(176), UINT8_C(156), UINT8_C(239), UINT8_C(203), UINT8_C(212), UINT8_C( 1), UINT8_C( 65), UINT8_C(166), UINT8_C( 72), UINT8_C(168) }, { UINT8_C( 3), UINT8_C(169), UINT8_C(103), UINT8_C( 32), UINT8_C(161), UINT8_C(254), UINT8_C(218), UINT8_C(185), UINT8_C(109), UINT8_C( 23), UINT8_C( 73), UINT8_C(214), UINT8_C( 34), UINT8_C( 89), UINT8_C(178), UINT8_C( 24) } }, { UINT8_C( 2), UINT8_C( 53), UINT8_C( 52), UINT8_C( 53), UINT8_C( 5), UINT8_C( 36), UINT8_C( 17), UINT8_C( 52) }, { UINT8_C(216), UINT8_C(254), UINT8_C(161), UINT8_C(254), UINT8_C(201), UINT8_C(178), UINT8_C( 92), UINT8_C(161) } }, { { { UINT8_C(167), UINT8_C(128), UINT8_C( 48), UINT8_C( 72), UINT8_C(126), UINT8_C( 10), UINT8_C( 1), UINT8_C(236), UINT8_C( 33), UINT8_C( 74), UINT8_C(194), UINT8_C( 68), UINT8_C(164), UINT8_C(116), UINT8_C( 92), UINT8_C(230) }, { UINT8_C( 41), UINT8_C( 17), UINT8_C(219), UINT8_C(238), UINT8_C(117), UINT8_C(109), UINT8_C(163), UINT8_C(165), UINT8_C(211), UINT8_C( 89), UINT8_C( 21), UINT8_C(223), UINT8_C( 87), UINT8_C( 46), UINT8_C(239), UINT8_C(254) }, { UINT8_C(174), UINT8_C( 31), UINT8_C( 70), UINT8_C( 44), UINT8_C( 41), UINT8_C( 72), UINT8_C( 24), UINT8_C( 75), UINT8_C(146), UINT8_C(219), UINT8_C(143), UINT8_C( 54), UINT8_C( 79), UINT8_C(235), UINT8_C( 29), UINT8_C(121) }, { UINT8_C(252), UINT8_C(248), UINT8_C(103), UINT8_C(114), UINT8_C(101), UINT8_C( 10), UINT8_C( 23), UINT8_C( 56), UINT8_C( 99), UINT8_C( 44), UINT8_C( 24), UINT8_C(186), UINT8_C( 90), UINT8_C( 7), UINT8_C(184), UINT8_C( 8) } }, { UINT8_C( 38), UINT8_C( 63), UINT8_C( 53), UINT8_C( 79), UINT8_C( 7), UINT8_C( 13), UINT8_C( 26), UINT8_C( 25) }, { UINT8_C( 24), UINT8_C( 8), UINT8_C( 10), UINT8_C( 0), UINT8_C(236), UINT8_C(116), UINT8_C( 21), UINT8_C( 89) } }, { { { UINT8_C( 37), UINT8_C( 88), UINT8_C(131), UINT8_C(139), UINT8_C( 99), UINT8_C(154), UINT8_C(195), UINT8_C(198), UINT8_C(199), UINT8_C(219), UINT8_C(129), UINT8_C( 33), UINT8_C(226), UINT8_C( 57), UINT8_C( 42), UINT8_C( 8) }, { UINT8_C( 56), UINT8_C( 95), UINT8_C( 88), UINT8_C(127), UINT8_C(172), UINT8_C(242), UINT8_C( 89), UINT8_C(213), UINT8_C( 28), UINT8_C(105), UINT8_C( 77), UINT8_C( 49), UINT8_C(150), UINT8_C( 62), UINT8_C( 66), UINT8_C(187) }, { UINT8_C(150), UINT8_C(198), UINT8_C( 70), UINT8_C(249), UINT8_C( 96), UINT8_C( 10), UINT8_C(192), UINT8_C( 39), UINT8_C(229), UINT8_C( 65), UINT8_C( 73), UINT8_C(200), UINT8_C(122), UINT8_C(115), UINT8_C(208), UINT8_C(179) }, { UINT8_C(210), UINT8_C( 40), UINT8_C( 50), UINT8_C(126), UINT8_C( 27), UINT8_C(139), UINT8_C( 83), UINT8_C( 55), UINT8_C(244), UINT8_C(160), UINT8_C(104), UINT8_C(138), UINT8_C(222), UINT8_C(170), UINT8_C( 70), UINT8_C(117) } }, { UINT8_C( 48), UINT8_C(140), UINT8_C( 46), UINT8_C( 17), UINT8_C( 22), UINT8_C( 46), UINT8_C( 56), UINT8_C( 60) }, { UINT8_C(210), UINT8_C( 0), UINT8_C(208), UINT8_C( 95), UINT8_C( 89), UINT8_C(208), UINT8_C(244), UINT8_C(222) } }, { { { UINT8_C( 61), UINT8_C(207), UINT8_C( 5), UINT8_C( 88), UINT8_C( 91), UINT8_C( 88), UINT8_C(143), UINT8_C( 79), UINT8_C(249), UINT8_C(247), UINT8_C(218), UINT8_C(215), UINT8_C(161), UINT8_C( 32), UINT8_C( 76), UINT8_C( 18) }, { UINT8_C(172), UINT8_C(187), UINT8_C(227), UINT8_C( 67), UINT8_C(233), UINT8_C(219), UINT8_C(191), UINT8_C( 89), UINT8_C( 29), UINT8_C( 3), UINT8_C( 67), UINT8_C(209), UINT8_C( 23), UINT8_C(224), UINT8_C( 88), UINT8_C( 84) }, { UINT8_C(175), UINT8_C( 93), UINT8_C(172), UINT8_C( 10), UINT8_C(181), UINT8_C( 59), UINT8_C( 90), UINT8_C(174), UINT8_C( 50), UINT8_C( 52), UINT8_C(134), UINT8_C(212), UINT8_C( 84), UINT8_C(210), UINT8_C(230), UINT8_C( 0) }, { UINT8_C(141), UINT8_C(201), UINT8_C( 67), UINT8_C(119), UINT8_C(164), UINT8_C( 2), UINT8_C(208), UINT8_C(193), UINT8_C( 5), UINT8_C( 19), UINT8_C(147), UINT8_C( 29), UINT8_C(243), UINT8_C(235), UINT8_C(113), UINT8_C(162) } }, { UINT8_C( 8), UINT8_C( 30), UINT8_C(173), UINT8_C( 61), UINT8_C( 25), UINT8_C( 7), UINT8_C(172), UINT8_C( 12) }, { UINT8_C(249), UINT8_C( 88), UINT8_C( 0), UINT8_C(235), UINT8_C( 3), UINT8_C( 79), UINT8_C( 0), UINT8_C(161) } }, { { { UINT8_C( 15), UINT8_C(211), UINT8_C( 9), UINT8_C(179), UINT8_C(213), UINT8_C(217), UINT8_C(117), UINT8_C(219), UINT8_C(236), UINT8_C( 8), UINT8_C(248), UINT8_C(223), UINT8_C(243), UINT8_C(105), UINT8_C(129), UINT8_C( 59) }, { UINT8_C(135), UINT8_C( 46), UINT8_C( 56), UINT8_C(225), UINT8_C( 53), UINT8_C(228), UINT8_C(109), UINT8_C(112), UINT8_C( 22), UINT8_C(205), UINT8_MAX, UINT8_C( 27), UINT8_C( 19), UINT8_C(143), UINT8_C(173), UINT8_C( 34) }, { UINT8_C( 98), UINT8_C(182), UINT8_C(213), UINT8_C( 55), UINT8_C(143), UINT8_C( 74), UINT8_C( 18), UINT8_C(123), UINT8_C( 82), UINT8_C( 10), UINT8_C( 90), UINT8_C( 69), UINT8_C(116), UINT8_C(219), UINT8_C(128), UINT8_C(251) }, { UINT8_C( 10), UINT8_C(185), UINT8_C(220), UINT8_C( 63), UINT8_C(157), UINT8_C( 73), UINT8_C(176), UINT8_C(180), UINT8_C( 22), UINT8_C(175), UINT8_C(207), UINT8_C( 41), UINT8_C( 62), UINT8_C(124), UINT8_C( 75), UINT8_C(160) } }, { UINT8_C( 50), UINT8_C( 33), UINT8_C( 24), UINT8_C( 1), UINT8_C(107), UINT8_C( 42), UINT8_C( 60), UINT8_C( 62) }, { UINT8_C(220), UINT8_C(182), UINT8_C( 22), UINT8_C(211), UINT8_C( 0), UINT8_C( 90), UINT8_C( 62), UINT8_C( 75) } }, { { { UINT8_C( 61), UINT8_C( 65), UINT8_C(187), UINT8_C(218), UINT8_C(138), UINT8_C(107), UINT8_C(142), UINT8_C(161), UINT8_C( 26), UINT8_C( 93), UINT8_C(202), UINT8_C( 89), UINT8_C(217), UINT8_C( 22), UINT8_C(249), UINT8_C( 11) }, { UINT8_C( 55), UINT8_C(209), UINT8_C(204), UINT8_C(162), UINT8_C(188), UINT8_C( 8), UINT8_C( 96), UINT8_C(177), UINT8_C(158), UINT8_C(100), UINT8_C( 26), UINT8_C( 16), UINT8_C(232), UINT8_C(126), UINT8_C(139), UINT8_C( 37) }, { UINT8_C(191), UINT8_C( 70), UINT8_MAX, UINT8_C( 74), UINT8_C(177), UINT8_C(142), UINT8_C(235), UINT8_C(204), UINT8_C(235), UINT8_C(181), UINT8_C( 37), UINT8_C(197), UINT8_C(203), UINT8_C( 30), UINT8_C(208), UINT8_C( 2) }, { UINT8_C(240), UINT8_C(157), UINT8_C(165), UINT8_C(172), UINT8_C(165), UINT8_C( 5), UINT8_C( 93), UINT8_C( 68), UINT8_C(105), UINT8_C(119), UINT8_C( 84), UINT8_C( 81), UINT8_C(245), UINT8_C(223), UINT8_C(118), UINT8_C(181) } }, { UINT8_C( 38), UINT8_C( 54), UINT8_C( 63), UINT8_C(215), UINT8_C( 4), UINT8_C( 42), UINT8_C( 35), UINT8_C(239) }, { UINT8_C(235), UINT8_C( 93), UINT8_C(181), UINT8_C( 0), UINT8_C(138), UINT8_C( 37), UINT8_C( 74), UINT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16x4_t t; t.val[0] = simde_vld1q_u8(test_vec[i].t[0]); t.val[1] = simde_vld1q_u8(test_vec[i].t[1]); t.val[2] = simde_vld1q_u8(test_vec[i].t[2]); t.val[3] = simde_vld1q_u8(test_vec[i].t[3]); simde_uint8x8_t idx = simde_vld1_u8(test_vec[i].idx); simde_uint8x8_t r = simde_vqtbl4_u8(t, idx); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16x4_t t = simde_test_arm_neon_random_u8x16x4(); simde_uint8x8_private idx_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] &= 63; } } simde_uint8x8_t idx = simde_uint8x8_from_private(idx_); simde_uint8x8_t r = simde_vqtbl4_u8(t, idx); simde_test_arm_neon_write_u8x16x4(2, t, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #endif /* !defined(SIMDE_BUG_INTEL_857088) */ static int test_simde_vqtbl1q_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 int8_t t[16]; SIMDE_ALIGN_TO_16 uint8_t idx[16]; SIMDE_ALIGN_TO_16 int8_t r[16]; } test_vec[] = { { { INT8_C( 37), INT8_C( 124), INT8_C( 8), -INT8_C( 5), INT8_C( 48), -INT8_C( 55), -INT8_C( 105), -INT8_C( 1), -INT8_C( 108), -INT8_C( 100), -INT8_C( 70), INT8_C( 68), INT8_C( 95), INT8_C( 72), -INT8_C( 122), INT8_C( 99) }, { UINT8_C( 6), UINT8_C( 0), UINT8_C( 9), UINT8_C( 6), UINT8_C( 4), UINT8_C( 2), UINT8_C( 14), UINT8_C( 11), UINT8_C( 5), UINT8_C( 2), UINT8_C( 15), UINT8_C( 15), UINT8_C( 0), UINT8_C(171), UINT8_C( 3), UINT8_C( 5) }, { -INT8_C( 105), INT8_C( 37), -INT8_C( 100), -INT8_C( 105), INT8_C( 48), INT8_C( 8), -INT8_C( 122), INT8_C( 68), -INT8_C( 55), INT8_C( 8), INT8_C( 99), INT8_C( 99), INT8_C( 37), INT8_C( 0), -INT8_C( 5), -INT8_C( 55) } }, { { INT8_C( 82), -INT8_C( 112), -INT8_C( 57), -INT8_C( 122), INT8_C( 2), INT8_C( 54), INT8_C( 114), -INT8_C( 104), INT8_C( 8), INT8_C( 97), INT8_C( 39), INT8_C( 56), INT8_C( 13), -INT8_C( 86), -INT8_C( 114), INT8_C( 53) }, { UINT8_C( 5), UINT8_C( 15), UINT8_C( 13), UINT8_C(137), UINT8_C( 7), UINT8_C( 4), UINT8_C( 1), UINT8_C( 75), UINT8_C( 6), UINT8_C( 13), UINT8_C( 14), UINT8_C( 1), UINT8_C( 79), UINT8_C(116), UINT8_C( 50), UINT8_C( 2) }, { INT8_C( 54), INT8_C( 53), -INT8_C( 86), INT8_C( 0), -INT8_C( 104), INT8_C( 2), -INT8_C( 112), INT8_C( 0), INT8_C( 114), -INT8_C( 86), -INT8_C( 114), -INT8_C( 112), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 57) } }, { { -INT8_C( 35), -INT8_C( 53), INT8_C( 46), -INT8_C( 92), -INT8_C( 81), -INT8_C( 97), -INT8_C( 17), -INT8_C( 90), INT8_C( 60), INT8_C( 29), -INT8_C( 9), -INT8_C( 117), -INT8_C( 111), INT8_C( 41), INT8_C( 45), -INT8_C( 106) }, { UINT8_C( 2), UINT8_C( 6), UINT8_C( 13), UINT8_C( 2), UINT8_C( 0), UINT8_C( 12), UINT8_C( 9), UINT8_C(236), UINT8_C( 2), UINT8_C( 9), UINT8_C( 5), UINT8_C( 2), UINT8_C( 7), UINT8_C( 3), UINT8_C( 7), UINT8_C( 4) }, { INT8_C( 46), -INT8_C( 17), INT8_C( 41), INT8_C( 46), -INT8_C( 35), -INT8_C( 111), INT8_C( 29), INT8_C( 0), INT8_C( 46), INT8_C( 29), -INT8_C( 97), INT8_C( 46), -INT8_C( 90), -INT8_C( 92), -INT8_C( 90), -INT8_C( 81) } }, { { INT8_C( 47), INT8_C( 73), -INT8_C( 24), INT8_C( 31), -INT8_C( 123), INT8_C( 114), INT8_C( 12), -INT8_C( 121), INT8_C( 107), INT8_C( 1), -INT8_C( 7), INT8_C( 99), INT8_C( 53), INT8_C( 16), INT8_C( 55), INT8_C( 51) }, { UINT8_C( 5), UINT8_C( 0), UINT8_C(225), UINT8_C( 57), UINT8_C( 23), UINT8_C( 5), UINT8_C( 89), UINT8_C( 12), UINT8_C( 0), UINT8_C( 4), UINT8_C( 2), UINT8_C( 4), UINT8_C( 13), UINT8_C( 14), UINT8_C( 11), UINT8_C( 12) }, { INT8_C( 114), INT8_C( 47), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 114), INT8_C( 0), INT8_C( 53), INT8_C( 47), -INT8_C( 123), -INT8_C( 24), -INT8_C( 123), INT8_C( 16), INT8_C( 55), INT8_C( 99), INT8_C( 53) } }, { { -INT8_C( 101), -INT8_C( 125), INT8_C( 74), -INT8_C( 77), -INT8_C( 71), -INT8_C( 93), INT8_C( 79), INT8_C( 57), -INT8_C( 89), INT8_C( 1), INT8_C( 46), -INT8_C( 123), INT8_C( 95), -INT8_C( 71), -INT8_C( 111), INT8_C( 6) }, { UINT8_C( 12), UINT8_C( 13), UINT8_C( 50), UINT8_C( 2), UINT8_C( 5), UINT8_C( 5), UINT8_C( 3), UINT8_C( 15), UINT8_C( 1), UINT8_C( 7), UINT8_C( 13), UINT8_C( 13), UINT8_C( 2), UINT8_C( 15), UINT8_C( 14), UINT8_C( 14) }, { INT8_C( 95), -INT8_C( 71), INT8_C( 0), INT8_C( 74), -INT8_C( 93), -INT8_C( 93), -INT8_C( 77), INT8_C( 6), -INT8_C( 125), INT8_C( 57), -INT8_C( 71), -INT8_C( 71), INT8_C( 74), INT8_C( 6), -INT8_C( 111), -INT8_C( 111) } }, { { -INT8_C( 58), INT8_C( 56), -INT8_C( 37), -INT8_C( 68), INT8_C( 29), INT8_C( 62), -INT8_C( 21), -INT8_C( 82), INT8_C( 85), -INT8_C( 120), -INT8_C( 5), INT8_C( 87), -INT8_C( 56), INT8_C( 89), -INT8_C( 11), -INT8_C( 117) }, { UINT8_C( 1), UINT8_C( 6), UINT8_C( 7), UINT8_C( 12), UINT8_C( 6), UINT8_C( 12), UINT8_C( 14), UINT8_C( 7), UINT8_C( 0), UINT8_C( 6), UINT8_C( 7), UINT8_C( 12), UINT8_C( 15), UINT8_C( 13), UINT8_C( 5), UINT8_C( 5) }, { INT8_C( 56), -INT8_C( 21), -INT8_C( 82), -INT8_C( 56), -INT8_C( 21), -INT8_C( 56), -INT8_C( 11), -INT8_C( 82), -INT8_C( 58), -INT8_C( 21), -INT8_C( 82), -INT8_C( 56), -INT8_C( 117), INT8_C( 89), INT8_C( 62), INT8_C( 62) } }, { { INT8_C( 7), INT8_C( 15), INT8_C( 49), -INT8_C( 19), -INT8_C( 53), INT8_C( 112), INT8_C( 117), INT8_C( 107), INT8_C( 38), -INT8_C( 4), -INT8_C( 88), -INT8_C( 27), -INT8_C( 118), -INT8_C( 83), INT8_C( 106), INT8_C( 79) }, { UINT8_C( 14), UINT8_C( 12), UINT8_C( 2), UINT8_C( 12), UINT8_C( 8), UINT8_C( 2), UINT8_C( 0), UINT8_C( 13), UINT8_C( 14), UINT8_C( 11), UINT8_C( 10), UINT8_C( 50), UINT8_C( 11), UINT8_C( 2), UINT8_C( 24), UINT8_C( 2) }, { INT8_C( 106), -INT8_C( 118), INT8_C( 49), -INT8_C( 118), INT8_C( 38), INT8_C( 49), INT8_C( 7), -INT8_C( 83), INT8_C( 106), -INT8_C( 27), -INT8_C( 88), INT8_C( 0), -INT8_C( 27), INT8_C( 49), INT8_C( 0), INT8_C( 49) } }, { { -INT8_C( 37), INT8_C( 29), -INT8_C( 24), -INT8_C( 77), -INT8_C( 33), INT8_C( 8), INT8_C( 65), INT8_C( 45), -INT8_C( 13), INT8_C( 75), INT8_C( 96), -INT8_C( 97), INT8_C( 94), INT8_C( 120), INT8_C( 81), INT8_MAX }, { UINT8_C( 1), UINT8_C( 1), UINT8_C( 12), UINT8_C( 11), UINT8_C( 6), UINT8_C( 5), UINT8_C( 10), UINT8_C( 8), UINT8_C(197), UINT8_C( 15), UINT8_C( 3), UINT8_C( 3), UINT8_C( 14), UINT8_C(158), UINT8_C(175), UINT8_C( 41) }, { INT8_C( 29), INT8_C( 29), INT8_C( 94), -INT8_C( 97), INT8_C( 65), INT8_C( 8), INT8_C( 96), -INT8_C( 13), INT8_C( 0), INT8_MAX, -INT8_C( 77), -INT8_C( 77), INT8_C( 81), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t t = simde_vld1q_s8(test_vec[i].t); simde_uint8x16_t idx = simde_vld1q_u8(test_vec[i].idx); simde_int8x16_t r = simde_vqtbl1q_s8(t, idx); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t t = simde_test_arm_neon_random_i8x16(); simde_uint8x16_private idx_ = simde_uint8x16_to_private(simde_test_arm_neon_random_u8x16()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] &= 15; } } simde_uint8x16_t idx = simde_uint8x16_from_private(idx_); simde_int8x16_t r = simde_vqtbl1q_s8(t, idx); simde_test_arm_neon_write_i8x16(2, t, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqtbl1q_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 uint8_t t[16]; SIMDE_ALIGN_TO_16 uint8_t idx[16]; SIMDE_ALIGN_TO_16 uint8_t r[16]; } test_vec[] = { { { UINT8_C(142), UINT8_C(172), UINT8_C( 12), UINT8_C(116), UINT8_C(168), UINT8_C(225), UINT8_C( 78), UINT8_C( 20), UINT8_C( 22), UINT8_C( 25), UINT8_C(166), UINT8_C(198), UINT8_C( 9), UINT8_C(107), UINT8_C(223), UINT8_C(250) }, { UINT8_C( 6), UINT8_C( 10), UINT8_C( 9), UINT8_C( 11), UINT8_C(204), UINT8_C( 6), UINT8_C( 6), UINT8_C( 0), UINT8_C( 2), UINT8_C( 2), UINT8_C( 4), UINT8_C( 12), UINT8_C( 15), UINT8_C( 98), UINT8_C( 8), UINT8_C( 13) }, { UINT8_C( 78), UINT8_C(166), UINT8_C( 25), UINT8_C(198), UINT8_C( 0), UINT8_C( 78), UINT8_C( 78), UINT8_C(142), UINT8_C( 12), UINT8_C( 12), UINT8_C(168), UINT8_C( 9), UINT8_C(250), UINT8_C( 0), UINT8_C( 22), UINT8_C(107) } }, { { UINT8_C( 76), UINT8_C( 53), UINT8_C( 78), UINT8_C( 24), UINT8_C(155), UINT8_C( 36), UINT8_C(153), UINT8_C( 77), UINT8_C(231), UINT8_C(189), UINT8_C(170), UINT8_C(150), UINT8_C( 31), UINT8_C( 34), UINT8_C(211), UINT8_C( 45) }, { UINT8_C( 6), UINT8_C( 4), UINT8_C( 3), UINT8_C( 12), UINT8_C( 3), UINT8_C( 14), UINT8_C( 8), UINT8_C(155), UINT8_C( 15), UINT8_C( 10), UINT8_C( 13), UINT8_C( 12), UINT8_C( 12), UINT8_C( 8), UINT8_C(174), UINT8_C( 56) }, { UINT8_C(153), UINT8_C(155), UINT8_C( 24), UINT8_C( 31), UINT8_C( 24), UINT8_C(211), UINT8_C(231), UINT8_C( 0), UINT8_C( 45), UINT8_C(170), UINT8_C( 34), UINT8_C( 31), UINT8_C( 31), UINT8_C(231), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(245), UINT8_C(214), UINT8_C(117), UINT8_C(120), UINT8_C(132), UINT8_C(254), UINT8_C( 19), UINT8_C(164), UINT8_C(200), UINT8_C(208), UINT8_C(160), UINT8_C(180), UINT8_C(169), UINT8_C( 78), UINT8_C(236), UINT8_C(182) }, { UINT8_C( 11), UINT8_C( 13), UINT8_C( 15), UINT8_C( 12), UINT8_C( 6), UINT8_C( 86), UINT8_C( 4), UINT8_C( 13), UINT8_C( 6), UINT8_C( 2), UINT8_C( 2), UINT8_C( 9), UINT8_C( 3), UINT8_C(133), UINT8_C( 2), UINT8_C( 8) }, { UINT8_C(180), UINT8_C( 78), UINT8_C(182), UINT8_C(169), UINT8_C( 19), UINT8_C( 0), UINT8_C(132), UINT8_C( 78), UINT8_C( 19), UINT8_C(117), UINT8_C(117), UINT8_C(208), UINT8_C(120), UINT8_C( 0), UINT8_C(117), UINT8_C(200) } }, { { UINT8_C( 60), UINT8_C(147), UINT8_C( 41), UINT8_C( 98), UINT8_C(233), UINT8_C(157), UINT8_C( 47), UINT8_C(223), UINT8_C(175), UINT8_C(194), UINT8_C(152), UINT8_C( 50), UINT8_C( 71), UINT8_C(186), UINT8_C(170), UINT8_C(162) }, { UINT8_C( 82), UINT8_C( 10), UINT8_C( 1), UINT8_C( 7), UINT8_C(157), UINT8_C( 5), UINT8_C( 5), UINT8_C( 1), UINT8_C( 8), UINT8_C( 8), UINT8_C( 14), UINT8_C( 10), UINT8_C( 7), UINT8_C( 2), UINT8_C( 87), UINT8_C( 3) }, { UINT8_C( 0), UINT8_C(152), UINT8_C(147), UINT8_C(223), UINT8_C( 0), UINT8_C(157), UINT8_C(157), UINT8_C(147), UINT8_C(175), UINT8_C(175), UINT8_C(170), UINT8_C(152), UINT8_C(223), UINT8_C( 41), UINT8_C( 0), UINT8_C( 98) } }, { { UINT8_C( 66), UINT8_C( 81), UINT8_C( 10), UINT8_C(224), UINT8_C( 86), UINT8_C( 79), UINT8_C( 81), UINT8_C(127), UINT8_C(167), UINT8_C( 64), UINT8_C( 25), UINT8_C(254), UINT8_C( 98), UINT8_C(113), UINT8_C(145), UINT8_C( 23) }, { UINT8_C(241), UINT8_C(135), UINT8_C( 5), UINT8_C( 15), UINT8_C( 12), UINT8_C( 2), UINT8_C( 11), UINT8_C( 3), UINT8_C( 8), UINT8_C( 10), UINT8_C( 1), UINT8_C( 25), UINT8_C(130), UINT8_C( 1), UINT8_C( 11), UINT8_C( 5) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 79), UINT8_C( 23), UINT8_C( 98), UINT8_C( 10), UINT8_C(254), UINT8_C(224), UINT8_C(167), UINT8_C( 25), UINT8_C( 81), UINT8_C( 0), UINT8_C( 0), UINT8_C( 81), UINT8_C(254), UINT8_C( 79) } }, { { UINT8_C( 83), UINT8_C(100), UINT8_C( 67), UINT8_MAX, UINT8_C(151), UINT8_C( 30), UINT8_C(146), UINT8_C(223), UINT8_C(248), UINT8_C( 83), UINT8_C(248), UINT8_C(123), UINT8_C(228), UINT8_C( 52), UINT8_C( 64), UINT8_C(199) }, { UINT8_C( 9), UINT8_C( 5), UINT8_C( 0), UINT8_C( 14), UINT8_C( 11), UINT8_C(184), UINT8_C( 11), UINT8_C( 18), UINT8_C( 9), UINT8_C( 6), UINT8_C( 10), UINT8_C(204), UINT8_C( 2), UINT8_C( 10), UINT8_C( 0), UINT8_C( 6) }, { UINT8_C( 83), UINT8_C( 30), UINT8_C( 83), UINT8_C( 64), UINT8_C(123), UINT8_C( 0), UINT8_C(123), UINT8_C( 0), UINT8_C( 83), UINT8_C(146), UINT8_C(248), UINT8_C( 0), UINT8_C( 67), UINT8_C(248), UINT8_C( 83), UINT8_C(146) } }, { { UINT8_C(250), UINT8_C( 55), UINT8_C(233), UINT8_C(213), UINT8_C(239), UINT8_C( 52), UINT8_C(231), UINT8_C(120), UINT8_C(186), UINT8_C(146), UINT8_C( 68), UINT8_C( 13), UINT8_C(236), UINT8_C( 68), UINT8_C(179), UINT8_C(170) }, { UINT8_C( 7), UINT8_C( 88), UINT8_C( 0), UINT8_C(233), UINT8_C( 0), UINT8_C( 53), UINT8_C( 67), UINT8_C( 12), UINT8_C( 2), UINT8_C( 8), UINT8_C( 12), UINT8_C( 4), UINT8_C( 45), UINT8_C( 3), UINT8_C( 15), UINT8_C( 7) }, { UINT8_C(120), UINT8_C( 0), UINT8_C(250), UINT8_C( 0), UINT8_C(250), UINT8_C( 0), UINT8_C( 0), UINT8_C(236), UINT8_C(233), UINT8_C(186), UINT8_C(236), UINT8_C(239), UINT8_C( 0), UINT8_C(213), UINT8_C(170), UINT8_C(120) } }, { { UINT8_C(144), UINT8_C( 12), UINT8_C( 91), UINT8_C( 32), UINT8_C( 65), UINT8_C(158), UINT8_C( 60), UINT8_C(164), UINT8_C(182), UINT8_C(200), UINT8_C(104), UINT8_C(227), UINT8_C(139), UINT8_C( 7), UINT8_C( 10), UINT8_C(133) }, { UINT8_C( 15), UINT8_C( 6), UINT8_C( 14), UINT8_C( 12), UINT8_C( 10), UINT8_C( 0), UINT8_C( 4), UINT8_C( 0), UINT8_C( 6), UINT8_C( 8), UINT8_C( 2), UINT8_C( 0), UINT8_C( 0), UINT8_C( 15), UINT8_C( 2), UINT8_C( 16) }, { UINT8_C(133), UINT8_C( 60), UINT8_C( 10), UINT8_C(139), UINT8_C(104), UINT8_C(144), UINT8_C( 65), UINT8_C(144), UINT8_C( 60), UINT8_C(182), UINT8_C( 91), UINT8_C(144), UINT8_C(144), UINT8_C(133), UINT8_C( 91), UINT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t t = simde_vld1q_u8(test_vec[i].t); simde_uint8x16_t idx = simde_vld1q_u8(test_vec[i].idx); simde_uint8x16_t r = simde_vqtbl1q_u8(t, idx); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t t = simde_test_arm_neon_random_u8x16(); simde_uint8x16_private idx_ = simde_uint8x16_to_private(simde_test_arm_neon_random_u8x16()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] &= 15; } } simde_uint8x16_t idx = simde_uint8x16_from_private(idx_); simde_uint8x16_t r = simde_vqtbl1q_u8(t, idx); simde_test_arm_neon_write_u8x16(2, t, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #if !defined(SIMDE_BUG_INTEL_857088) static int test_simde_vqtbl2q_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 int8_t t[2][16]; SIMDE_ALIGN_TO_16 uint8_t idx[16]; SIMDE_ALIGN_TO_16 int8_t r[16]; } test_vec[] = { { { { INT8_C( 81), INT8_C( 103), -INT8_C( 107), -INT8_C( 105), INT8_C( 49), INT8_C( 42), -INT8_C( 93), -INT8_C( 31), INT8_C( 20), -INT8_C( 36), INT8_C( 67), -INT8_C( 64), INT8_C( 40), -INT8_C( 120), -INT8_C( 115), INT8_C( 32) }, { -INT8_C( 9), INT8_C( 92), INT8_C( 74), -INT8_C( 27), INT8_C( 94), -INT8_C( 5), -INT8_C( 74), INT8_C( 49), INT8_C( 102), -INT8_C( 67), INT8_C( 1), INT8_C( 121), INT8_C( 125), INT8_C( 3), -INT8_C( 116), -INT8_C( 50) } }, { UINT8_C( 11), UINT8_C( 33), UINT8_C( 6), UINT8_C(156), UINT8_C( 12), UINT8_C( 9), UINT8_C(125), UINT8_C( 96), UINT8_C(229), UINT8_C( 0), UINT8_C( 0), UINT8_C( 13), UINT8_C( 8), UINT8_C( 13), UINT8_C( 13), UINT8_C( 0) }, { -INT8_C( 64), INT8_C( 0), -INT8_C( 93), INT8_C( 0), INT8_C( 40), -INT8_C( 36), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 81), INT8_C( 81), -INT8_C( 120), INT8_C( 20), -INT8_C( 120), -INT8_C( 120), INT8_C( 81) } }, { { { -INT8_C( 1), INT8_C( 74), -INT8_C( 92), INT8_C( 75), INT8_C( 83), INT8_C( 34), -INT8_C( 85), INT8_C( 56), -INT8_C( 30), -INT8_C( 53), INT8_C( 69), INT8_C( 43), INT8_C( 120), INT8_C( 114), INT8_C( 107), -INT8_C( 126) }, { -INT8_C( 23), -INT8_C( 112), -INT8_C( 23), INT8_C( 92), INT8_C( 107), -INT8_C( 126), INT8_C( 53), INT8_C( 3), INT8_C( 28), -INT8_C( 121), INT8_C( 24), -INT8_C( 71), INT8_C( 101), -INT8_C( 4), -INT8_C( 62), INT8_C( 101) } }, { UINT8_C( 6), UINT8_C( 6), UINT8_C( 16), UINT8_C( 25), UINT8_C( 8), UINT8_C( 28), UINT8_C(209), UINT8_C(107), UINT8_C( 39), UINT8_C( 22), UINT8_C( 22), UINT8_C( 0), UINT8_C(136), UINT8_C( 1), UINT8_C( 34), UINT8_C( 18) }, { -INT8_C( 85), -INT8_C( 85), -INT8_C( 23), -INT8_C( 121), -INT8_C( 30), INT8_C( 101), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 53), INT8_C( 53), -INT8_C( 1), INT8_C( 0), INT8_C( 74), INT8_C( 0), -INT8_C( 23) } }, { { { -INT8_C( 117), INT8_C( 6), -INT8_C( 12), INT8_C( 20), INT8_C( 98), -INT8_C( 59), INT8_MAX, -INT8_C( 118), -INT8_C( 36), INT8_C( 21), INT8_C( 42), INT8_C( 100), INT8_C( 22), INT8_C( 76), -INT8_C( 42), -INT8_C( 89) }, { INT8_C( 87), -INT8_C( 91), -INT8_C( 93), -INT8_C( 27), -INT8_C( 87), -INT8_C( 94), -INT8_C( 114), INT8_C( 52), -INT8_C( 71), -INT8_C( 15), INT8_C( 37), -INT8_C( 51), INT8_C( 22), INT8_C( 123), INT8_C( 40), -INT8_C( 94) } }, { UINT8_C( 2), UINT8_C( 28), UINT8_C( 22), UINT8_C( 4), UINT8_C( 1), UINT8_C( 21), UINT8_C(110), UINT8_C( 29), UINT8_C( 74), UINT8_C( 24), UINT8_C( 2), UINT8_C( 96), UINT8_C( 4), UINT8_C( 24), UINT8_C( 7), UINT8_C( 28) }, { -INT8_C( 12), INT8_C( 22), -INT8_C( 114), INT8_C( 98), INT8_C( 6), -INT8_C( 94), INT8_C( 0), INT8_C( 123), INT8_C( 0), -INT8_C( 71), -INT8_C( 12), INT8_C( 0), INT8_C( 98), -INT8_C( 71), -INT8_C( 118), INT8_C( 22) } }, { { { INT8_C( 23), INT8_C( 15), -INT8_C( 126), -INT8_C( 8), INT8_C( 68), -INT8_C( 15), -INT8_C( 74), -INT8_C( 114), -INT8_C( 119), -INT8_C( 40), -INT8_C( 18), INT8_C( 110), -INT8_C( 48), -INT8_C( 11), -INT8_C( 86), INT8_C( 110) }, { -INT8_C( 97), -INT8_C( 53), -INT8_C( 76), -INT8_C( 21), INT8_C( 122), INT8_C( 47), -INT8_C( 15), INT8_C( 27), -INT8_C( 48), -INT8_C( 60), -INT8_C( 45), -INT8_C( 20), -INT8_C( 65), INT8_C( 44), -INT8_C( 118), -INT8_C( 42) } }, { UINT8_C( 28), UINT8_C( 12), UINT8_C( 14), UINT8_C( 0), UINT8_C( 29), UINT8_C(132), UINT8_C( 15), UINT8_C( 7), UINT8_C( 28), UINT8_C(253), UINT8_C( 21), UINT8_C( 13), UINT8_C( 19), UINT8_C(159), UINT8_C( 27), UINT8_C( 18) }, { -INT8_C( 65), -INT8_C( 48), -INT8_C( 86), INT8_C( 23), INT8_C( 44), INT8_C( 0), INT8_C( 110), -INT8_C( 114), -INT8_C( 65), INT8_C( 0), INT8_C( 47), -INT8_C( 11), -INT8_C( 21), INT8_C( 0), -INT8_C( 20), -INT8_C( 76) } }, { { { -INT8_C( 47), -INT8_C( 106), -INT8_C( 68), -INT8_C( 49), INT8_C( 27), -INT8_C( 53), INT8_C( 86), INT8_C( 119), -INT8_C( 56), INT8_C( 75), -INT8_C( 92), -INT8_C( 69), -INT8_C( 22), INT8_C( 63), INT8_C( 78), INT8_C( 84) }, { -INT8_C( 113), -INT8_C( 52), INT8_C( 56), INT8_C( 14), INT8_C( 59), INT8_C( 56), INT8_C( 93), INT8_C( 110), INT8_C( 11), -INT8_C( 104), INT8_C( 96), INT8_C( 11), INT8_C( 93), INT8_C( 40), INT8_C( 70), INT8_C( 46) } }, { UINT8_C( 30), UINT8_C( 2), UINT8_C(253), UINT8_C( 25), UINT8_C( 13), UINT8_C( 83), UINT8_C( 17), UINT8_C(150), UINT8_C( 30), UINT8_C( 21), UINT8_C( 17), UINT8_C( 8), UINT8_C( 21), UINT8_C(159), UINT8_C( 28), UINT8_C( 4) }, { INT8_C( 70), -INT8_C( 68), INT8_C( 0), -INT8_C( 104), INT8_C( 63), INT8_C( 0), -INT8_C( 52), INT8_C( 0), INT8_C( 70), INT8_C( 56), -INT8_C( 52), -INT8_C( 56), INT8_C( 56), INT8_C( 0), INT8_C( 93), INT8_C( 27) } }, { { { -INT8_C( 83), INT8_C( 80), INT8_C( 52), INT8_C( 122), -INT8_C( 93), -INT8_C( 123), INT8_C( 16), INT8_C( 66), INT8_C( 123), INT8_C( 98), -INT8_C( 54), -INT8_C( 80), INT8_C( 1), -INT8_C( 89), INT8_C( 116), INT8_C( 109) }, { -INT8_C( 68), INT8_C( 70), INT8_C( 19), INT8_C( 9), INT8_C( 117), INT8_C( 40), INT8_C( 98), INT8_C( 60), -INT8_C( 100), -INT8_C( 58), INT8_C( 96), INT8_C( 57), INT8_C( 113), -INT8_C( 78), -INT8_C( 108), INT8_C( 30) } }, { UINT8_C( 2), UINT8_C( 8), UINT8_C(152), UINT8_C( 6), UINT8_C( 14), UINT8_C( 9), UINT8_C( 8), UINT8_C( 9), UINT8_C( 11), UINT8_C( 18), UINT8_C( 25), UINT8_C( 12), UINT8_C( 25), UINT8_C( 13), UINT8_C( 25), UINT8_C( 21) }, { INT8_C( 52), INT8_C( 123), INT8_C( 0), INT8_C( 16), INT8_C( 116), INT8_C( 98), INT8_C( 123), INT8_C( 98), -INT8_C( 80), INT8_C( 19), -INT8_C( 58), INT8_C( 1), -INT8_C( 58), -INT8_C( 89), -INT8_C( 58), INT8_C( 40) } }, { { { -INT8_C( 25), INT8_C( 111), -INT8_C( 97), INT8_C( 53), INT8_C( 24), -INT8_C( 121), -INT8_C( 2), INT8_C( 35), INT8_C( 57), INT8_C( 119), INT8_C( 48), -INT8_C( 109), INT8_C( 100), -INT8_C( 87), -INT8_C( 88), -INT8_C( 105) }, { INT8_C( 54), -INT8_C( 57), INT8_C( 63), -INT8_C( 21), INT8_C( 73), INT8_C( 35), INT8_C( 61), -INT8_C( 111), INT8_C( 103), -INT8_C( 57), INT8_C( 74), INT8_C( 93), -INT8_C( 26), INT8_C( 33), INT8_C( 86), -INT8_C( 51) } }, { UINT8_C( 16), UINT8_C( 21), UINT8_C( 2), UINT8_C(169), UINT8_C( 28), UINT8_C( 0), UINT8_C( 12), UINT8_C( 22), UINT8_C(119), UINT8_C( 28), UINT8_C( 9), UINT8_C( 27), UINT8_C( 6), UINT8_C(241), UINT8_C( 18), UINT8_C( 28) }, { INT8_C( 54), INT8_C( 35), -INT8_C( 97), INT8_C( 0), -INT8_C( 26), -INT8_C( 25), INT8_C( 100), INT8_C( 61), INT8_C( 0), -INT8_C( 26), INT8_C( 119), INT8_C( 93), -INT8_C( 2), INT8_C( 0), INT8_C( 63), -INT8_C( 26) } }, { { { -INT8_C( 28), -INT8_C( 127), INT8_C( 55), INT8_C( 97), -INT8_C( 127), INT8_C( 4), INT8_C( 23), -INT8_C( 8), INT8_C( 0), INT8_C( 96), -INT8_C( 45), -INT8_C( 90), INT8_C( 81), INT8_C( 69), -INT8_C( 125), INT8_C( 10) }, { -INT8_C( 10), INT8_C( 75), INT8_C( 12), -INT8_C( 54), INT8_C( 80), -INT8_C( 97), INT8_C( 5), INT8_C( 28), INT8_C( 124), -INT8_C( 98), -INT8_C( 49), INT8_C( 122), -INT8_C( 115), INT8_C( 78), INT8_C( 9), INT8_C( 113) } }, { UINT8_C( 16), UINT8_C( 0), UINT8_C( 18), UINT8_C( 17), UINT8_C( 4), UINT8_C( 9), UINT8_C( 74), UINT8_C( 5), UINT8_C( 9), UINT8_C( 29), UINT8_C( 11), UINT8_C( 27), UINT8_C( 3), UINT8_C( 14), UINT8_C( 5), UINT8_C( 89) }, { -INT8_C( 10), -INT8_C( 28), INT8_C( 12), INT8_C( 75), -INT8_C( 127), INT8_C( 96), INT8_C( 0), INT8_C( 4), INT8_C( 96), INT8_C( 78), -INT8_C( 90), INT8_C( 122), INT8_C( 97), -INT8_C( 125), INT8_C( 4), INT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16x2_t t; t.val[0] = simde_vld1q_s8(test_vec[i].t[0]); t.val[1] = simde_vld1q_s8(test_vec[i].t[1]); simde_uint8x16_t idx = simde_vld1q_u8(test_vec[i].idx); simde_int8x16_t r = simde_vqtbl2q_s8(t, idx); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16x2_t t = simde_test_arm_neon_random_i8x16x2(); simde_uint8x16_private idx_ = simde_uint8x16_to_private(simde_test_arm_neon_random_u8x16()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] &= 31; } } simde_uint8x16_t idx = simde_uint8x16_from_private(idx_); simde_int8x16_t r = simde_vqtbl2q_s8(t, idx); simde_test_arm_neon_write_i8x16x2(2, t, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqtbl2q_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 uint8_t t[2][16]; SIMDE_ALIGN_TO_16 uint8_t idx[16]; SIMDE_ALIGN_TO_16 uint8_t r[16]; } test_vec[] = { { { { UINT8_C(113), UINT8_C(145), UINT8_C(199), UINT8_C( 23), UINT8_C(246), UINT8_C(139), UINT8_C(165), UINT8_C(240), UINT8_C( 96), UINT8_C( 24), UINT8_C(242), UINT8_C(168), UINT8_C(193), UINT8_C( 50), UINT8_C( 37), UINT8_C(142) }, { UINT8_C(247), UINT8_C(182), UINT8_C(243), UINT8_C( 26), UINT8_C(212), UINT8_C(212), UINT8_C(185), UINT8_C( 89), UINT8_C( 97), UINT8_C(225), UINT8_C(162), UINT8_C(157), UINT8_C( 32), UINT8_C( 50), UINT8_C(223), UINT8_C(145) } }, { UINT8_C( 3), UINT8_C( 6), UINT8_C(169), UINT8_C( 25), UINT8_C( 18), UINT8_C( 14), UINT8_C(169), UINT8_C( 18), UINT8_C( 6), UINT8_C( 27), UINT8_C( 26), UINT8_C( 8), UINT8_C( 13), UINT8_C( 0), UINT8_C( 22), UINT8_C(196) }, { UINT8_C( 23), UINT8_C(165), UINT8_C( 0), UINT8_C(225), UINT8_C(243), UINT8_C( 37), UINT8_C( 0), UINT8_C(243), UINT8_C(165), UINT8_C(157), UINT8_C(162), UINT8_C( 96), UINT8_C( 50), UINT8_C(113), UINT8_C(185), UINT8_C( 0) } }, { { { UINT8_C( 2), UINT8_C(212), UINT8_C(147), UINT8_C( 52), UINT8_C( 34), UINT8_C( 60), UINT8_C(199), UINT8_C(137), UINT8_C(216), UINT8_C( 1), UINT8_C(177), UINT8_C(165), UINT8_C( 97), UINT8_C(103), UINT8_C(106), UINT8_C(120) }, { UINT8_C( 17), UINT8_C( 72), UINT8_C( 98), UINT8_C(144), UINT8_C(224), UINT8_C(166), UINT8_C(111), UINT8_C( 89), UINT8_C(139), UINT8_C(236), UINT8_C(242), UINT8_C(163), UINT8_C( 72), UINT8_C( 29), UINT8_C(125), UINT8_C( 75) } }, { UINT8_C( 17), UINT8_C( 17), UINT8_C( 31), UINT8_C( 20), UINT8_C( 13), UINT8_C( 70), UINT8_C( 29), UINT8_C( 5), UINT8_C( 8), UINT8_C( 14), UINT8_C( 11), UINT8_C( 9), UINT8_C( 21), UINT8_C( 53), UINT8_C( 1), UINT8_C( 7) }, { UINT8_C( 72), UINT8_C( 72), UINT8_C( 75), UINT8_C(224), UINT8_C(103), UINT8_C( 0), UINT8_C( 29), UINT8_C( 60), UINT8_C(216), UINT8_C(106), UINT8_C(165), UINT8_C( 1), UINT8_C(166), UINT8_C( 0), UINT8_C(212), UINT8_C(137) } }, { { { UINT8_C(231), UINT8_C(198), UINT8_C(204), UINT8_C( 52), UINT8_C( 12), UINT8_C(105), UINT8_C( 90), UINT8_C( 84), UINT8_C(183), UINT8_C( 37), UINT8_C(254), UINT8_C(108), UINT8_C( 90), UINT8_C( 31), UINT8_C( 51), UINT8_C(215) }, { UINT8_C(163), UINT8_C(138), UINT8_C( 53), UINT8_C(205), UINT8_C( 81), UINT8_C(235), UINT8_C(131), UINT8_C( 4), UINT8_C(148), UINT8_C(219), UINT8_MAX, UINT8_C( 91), UINT8_C(177), UINT8_C( 70), UINT8_C( 19), UINT8_C(152) } }, { UINT8_C( 12), UINT8_C( 31), UINT8_C( 13), UINT8_C( 24), UINT8_C( 8), UINT8_C( 39), UINT8_C(109), UINT8_C( 31), UINT8_C( 12), UINT8_C(107), UINT8_C( 11), UINT8_C( 6), UINT8_C(138), UINT8_C( 31), UINT8_C( 29), UINT8_C( 14) }, { UINT8_C( 90), UINT8_C(152), UINT8_C( 31), UINT8_C(148), UINT8_C(183), UINT8_C( 0), UINT8_C( 0), UINT8_C(152), UINT8_C( 90), UINT8_C( 0), UINT8_C(108), UINT8_C( 90), UINT8_C( 0), UINT8_C(152), UINT8_C( 70), UINT8_C( 51) } }, { { { UINT8_C(127), UINT8_C(113), UINT8_C(232), UINT8_C(199), UINT8_C(152), UINT8_C( 85), UINT8_C(198), UINT8_C(228), UINT8_C(192), UINT8_C( 50), UINT8_C(138), UINT8_C( 75), UINT8_C(209), UINT8_C( 7), UINT8_C(121), UINT8_C(250) }, { UINT8_C(186), UINT8_C(116), UINT8_C(117), UINT8_C( 88), UINT8_C(243), UINT8_C(243), UINT8_C(138), UINT8_C( 77), UINT8_C(113), UINT8_C( 24), UINT8_C( 88), UINT8_C( 53), UINT8_C(184), UINT8_C(252), UINT8_C( 5), UINT8_C( 56) } }, { UINT8_C(109), UINT8_C( 14), UINT8_C( 31), UINT8_C( 5), UINT8_C( 67), UINT8_C( 6), UINT8_C( 9), UINT8_C( 4), UINT8_C( 24), UINT8_C(115), UINT8_C( 15), UINT8_C( 9), UINT8_C( 27), UINT8_C( 8), UINT8_C( 3), UINT8_C( 21) }, { UINT8_C( 0), UINT8_C(121), UINT8_C( 56), UINT8_C( 85), UINT8_C( 0), UINT8_C(198), UINT8_C( 50), UINT8_C(152), UINT8_C(113), UINT8_C( 0), UINT8_C(250), UINT8_C( 50), UINT8_C( 53), UINT8_C(192), UINT8_C(199), UINT8_C(243) } }, { { { UINT8_C(198), UINT8_C( 31), UINT8_C( 68), UINT8_C( 10), UINT8_C(229), UINT8_C( 46), UINT8_C( 14), UINT8_C(221), UINT8_C(161), UINT8_C( 93), UINT8_C(166), UINT8_C( 28), UINT8_C( 37), UINT8_C(106), UINT8_C( 81), UINT8_C( 97) }, { UINT8_C(162), UINT8_C(222), UINT8_C(145), UINT8_C(206), UINT8_C(246), UINT8_C( 13), UINT8_C(108), UINT8_C( 37), UINT8_C(226), UINT8_C( 63), UINT8_C( 13), UINT8_C(180), UINT8_C( 23), UINT8_C( 45), UINT8_C(243), UINT8_C(222) } }, { UINT8_C( 13), UINT8_C( 23), UINT8_C( 8), UINT8_C( 18), UINT8_C( 5), UINT8_C( 22), UINT8_C( 16), UINT8_C( 7), UINT8_C( 19), UINT8_C(182), UINT8_C( 3), UINT8_C( 24), UINT8_C( 0), UINT8_C( 21), UINT8_C( 25), UINT8_C( 3) }, { UINT8_C(106), UINT8_C( 37), UINT8_C(161), UINT8_C(145), UINT8_C( 46), UINT8_C(108), UINT8_C(162), UINT8_C(221), UINT8_C(206), UINT8_C( 0), UINT8_C( 10), UINT8_C(226), UINT8_C(198), UINT8_C( 13), UINT8_C( 63), UINT8_C( 10) } }, { { { UINT8_C( 57), UINT8_C( 26), UINT8_C( 41), UINT8_C(158), UINT8_C( 16), UINT8_C( 57), UINT8_C(165), UINT8_C( 99), UINT8_C(240), UINT8_C(201), UINT8_C(219), UINT8_C( 16), UINT8_C( 62), UINT8_C(180), UINT8_C(211), UINT8_C(145) }, { UINT8_C( 31), UINT8_C(101), UINT8_C(219), UINT8_C(151), UINT8_C( 98), UINT8_C( 74), UINT8_C(241), UINT8_C(159), UINT8_C(198), UINT8_C( 0), UINT8_C(243), UINT8_C(112), UINT8_C( 1), UINT8_C( 37), UINT8_C(103), UINT8_C( 58) } }, { UINT8_C( 31), UINT8_C( 17), UINT8_C(217), UINT8_C( 79), UINT8_C(202), UINT8_C( 30), UINT8_C( 18), UINT8_C( 26), UINT8_C( 7), UINT8_C( 13), UINT8_C( 11), UINT8_C( 5), UINT8_C( 1), UINT8_C( 30), UINT8_C( 23), UINT8_C( 0) }, { UINT8_C( 58), UINT8_C(101), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(103), UINT8_C(219), UINT8_C(243), UINT8_C( 99), UINT8_C(180), UINT8_C( 16), UINT8_C( 57), UINT8_C( 26), UINT8_C(103), UINT8_C(159), UINT8_C( 57) } }, { { { UINT8_C(107), UINT8_C(254), UINT8_C(171), UINT8_C( 54), UINT8_C(124), UINT8_C( 93), UINT8_C(240), UINT8_C(196), UINT8_C(234), UINT8_C(187), UINT8_C( 73), UINT8_C( 43), UINT8_C( 90), UINT8_C( 96), UINT8_C(140), UINT8_C( 93) }, { UINT8_C( 82), UINT8_C(131), UINT8_C(195), UINT8_C(142), UINT8_C(108), UINT8_C(200), UINT8_C(145), UINT8_C( 85), UINT8_C(192), UINT8_C( 4), UINT8_C( 64), UINT8_C(221), UINT8_C(222), UINT8_C(101), UINT8_C( 57), UINT8_C( 74) } }, { UINT8_C( 99), UINT8_C( 4), UINT8_C( 0), UINT8_C( 31), UINT8_C( 1), UINT8_C( 16), UINT8_C(163), UINT8_C( 43), UINT8_C( 12), UINT8_C( 13), UINT8_C( 23), UINT8_C( 6), UINT8_C( 13), UINT8_C( 3), UINT8_C( 3), UINT8_C( 0) }, { UINT8_C( 0), UINT8_C(124), UINT8_C(107), UINT8_C( 74), UINT8_C(254), UINT8_C( 82), UINT8_C( 0), UINT8_C( 0), UINT8_C( 90), UINT8_C( 96), UINT8_C( 85), UINT8_C(240), UINT8_C( 96), UINT8_C( 54), UINT8_C( 54), UINT8_C(107) } }, { { { UINT8_C( 43), UINT8_C(108), UINT8_C( 16), UINT8_C(109), UINT8_C(220), UINT8_C(179), UINT8_C(152), UINT8_C( 8), UINT8_C(160), UINT8_C(239), UINT8_C(142), UINT8_C(238), UINT8_C(210), UINT8_C(114), UINT8_C(142), UINT8_C( 57) }, { UINT8_C( 25), UINT8_C(188), UINT8_C( 12), UINT8_C(136), UINT8_C(124), UINT8_C( 52), UINT8_C(184), UINT8_C( 63), UINT8_C(157), UINT8_C(198), UINT8_C(225), UINT8_C(106), UINT8_C( 13), UINT8_C(205), UINT8_C(155), UINT8_C( 56) } }, { UINT8_C( 25), UINT8_C( 11), UINT8_C( 5), UINT8_C( 22), UINT8_C( 30), UINT8_C( 62), UINT8_C( 30), UINT8_MAX, UINT8_C( 13), UINT8_C( 13), UINT8_C( 13), UINT8_C( 0), UINT8_C( 31), UINT8_C( 27), UINT8_C( 25), UINT8_C( 24) }, { UINT8_C(198), UINT8_C(238), UINT8_C(179), UINT8_C(184), UINT8_C(155), UINT8_C( 0), UINT8_C(155), UINT8_C( 0), UINT8_C(114), UINT8_C(114), UINT8_C(114), UINT8_C( 43), UINT8_C( 56), UINT8_C(106), UINT8_C(198), UINT8_C(157) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16x2_t t; t.val[0] = simde_vld1q_u8(test_vec[i].t[0]); t.val[1] = simde_vld1q_u8(test_vec[i].t[1]); simde_uint8x16_t idx = simde_vld1q_u8(test_vec[i].idx); simde_uint8x16_t r = simde_vqtbl2q_u8(t, idx); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16x2_t t = simde_test_arm_neon_random_u8x16x2(); simde_uint8x16_private idx_ = simde_uint8x16_to_private(simde_test_arm_neon_random_u8x16()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] &= 31; } } simde_uint8x16_t idx = simde_uint8x16_from_private(idx_); simde_uint8x16_t r = simde_vqtbl2q_u8(t, idx); simde_test_arm_neon_write_u8x16x2(2, t, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqtbl3q_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 int8_t t[3][16]; SIMDE_ALIGN_TO_16 uint8_t idx[16]; SIMDE_ALIGN_TO_16 int8_t r[16]; } test_vec[] = { { { { -INT8_C( 104), -INT8_C( 29), INT8_C( 64), -INT8_C( 110), -INT8_C( 17), -INT8_C( 127), -INT8_C( 55), -INT8_C( 27), INT8_C( 52), -INT8_C( 16), -INT8_C( 7), -INT8_C( 50), INT8_C( 73), -INT8_C( 35), INT8_C( 81), INT8_C( 116) }, { -INT8_C( 100), -INT8_C( 87), -INT8_C( 87), -INT8_C( 107), -INT8_C( 88), -INT8_C( 14), -INT8_C( 111), -INT8_C( 109), INT8_C( 90), -INT8_C( 62), INT8_C( 33), INT8_C( 126), INT8_C( 114), -INT8_C( 13), INT8_C( 100), INT8_C( 10) }, { -INT8_C( 42), -INT8_C( 92), -INT8_C( 99), -INT8_C( 59), INT8_C( 37), INT8_C( 102), -INT8_C( 86), INT8_C( 90), INT8_C( 87), -INT8_C( 92), INT8_C( 40), -INT8_C( 96), -INT8_C( 127), INT8_C( 122), INT8_C( 20), INT8_C( 29) } }, { UINT8_C( 35), UINT8_C( 46), UINT8_C( 34), UINT8_C( 11), UINT8_C( 32), UINT8_C( 19), UINT8_C( 94), UINT8_C( 11), UINT8_C( 5), UINT8_C( 31), UINT8_C( 41), UINT8_C( 23), UINT8_C( 18), UINT8_C( 45), UINT8_C( 34), UINT8_C( 24) }, { -INT8_C( 59), INT8_C( 20), -INT8_C( 99), -INT8_C( 50), -INT8_C( 42), -INT8_C( 107), INT8_C( 0), -INT8_C( 50), -INT8_C( 127), INT8_C( 10), -INT8_C( 92), -INT8_C( 109), -INT8_C( 87), INT8_C( 122), -INT8_C( 99), INT8_C( 90) } }, { { { INT8_C( 79), -INT8_C( 84), -INT8_C( 93), INT8_C( 0), -INT8_C( 17), INT8_C( 1), INT8_C( 11), -INT8_C( 11), -INT8_C( 127), -INT8_C( 108), INT8_C( 108), -INT8_C( 13), -INT8_C( 127), -INT8_C( 18), INT8_C( 60), INT8_C( 19) }, { INT8_C( 13), INT8_C( 74), -INT8_C( 53), -INT8_C( 109), INT8_C( 2), -INT8_C( 36), INT8_C( 111), INT8_C( 95), INT8_C( 22), -INT8_C( 20), INT8_C( 60), -INT8_C( 54), INT8_C( 126), INT8_C( 55), -INT8_C( 94), -INT8_C( 51) }, { -INT8_C( 29), INT8_C( 69), -INT8_C( 51), -INT8_C( 45), INT8_C( 70), -INT8_C( 40), -INT8_C( 56), -INT8_C( 57), INT8_C( 108), INT8_C( 52), -INT8_C( 69), -INT8_C( 18), INT8_C( 35), -INT8_C( 9), INT8_C( 1), INT8_C( 48) } }, { UINT8_C( 17), UINT8_C( 12), UINT8_C( 3), UINT8_C( 19), UINT8_C( 25), UINT8_C( 3), UINT8_C(162), UINT8_C( 47), UINT8_C( 31), UINT8_C(223), UINT8_C( 42), UINT8_C( 13), UINT8_C( 22), UINT8_C( 44), UINT8_C( 11), UINT8_C( 9) }, { INT8_C( 74), -INT8_C( 127), INT8_C( 0), -INT8_C( 109), -INT8_C( 20), INT8_C( 0), INT8_C( 0), INT8_C( 48), -INT8_C( 51), INT8_C( 0), -INT8_C( 69), -INT8_C( 18), INT8_C( 111), INT8_C( 35), -INT8_C( 13), -INT8_C( 108) } }, { { { INT8_C( 57), -INT8_C( 32), -INT8_C( 75), -INT8_C( 30), INT8_C( 19), INT8_C( 88), -INT8_C( 94), INT8_C( 50), INT8_C( 55), INT8_C( 44), -INT8_C( 48), INT8_C( 77), INT8_C( 88), INT8_C( 59), INT8_C( 70), -INT8_C( 55) }, { INT8_C( 115), INT8_C( 19), INT8_MIN, -INT8_C( 124), -INT8_C( 89), -INT8_C( 1), INT8_C( 2), INT8_C( 112), INT8_C( 57), INT8_C( 109), INT8_C( 92), INT8_C( 106), -INT8_C( 38), INT8_C( 121), -INT8_C( 36), INT8_C( 20) }, { INT8_C( 89), -INT8_C( 110), -INT8_C( 10), INT8_C( 108), -INT8_C( 22), -INT8_C( 104), -INT8_C( 98), INT8_C( 33), -INT8_C( 60), INT8_C( 110), INT8_C( 110), INT8_C( 28), -INT8_C( 87), -INT8_C( 76), -INT8_C( 27), INT8_C( 29) } }, { UINT8_C( 7), UINT8_C( 6), UINT8_C(161), UINT8_C( 15), UINT8_C(101), UINT8_C(163), UINT8_C( 31), UINT8_C( 15), UINT8_C( 17), UINT8_C( 12), UINT8_C( 9), UINT8_C( 43), UINT8_C( 37), UINT8_C( 38), UINT8_C( 15), UINT8_C( 14) }, { INT8_C( 50), -INT8_C( 94), INT8_C( 0), -INT8_C( 55), INT8_C( 0), INT8_C( 0), INT8_C( 20), -INT8_C( 55), INT8_C( 19), INT8_C( 88), INT8_C( 44), INT8_C( 28), -INT8_C( 104), -INT8_C( 98), -INT8_C( 55), INT8_C( 70) } }, { { { -INT8_C( 69), -INT8_C( 17), -INT8_C( 36), INT8_C( 32), -INT8_C( 110), -INT8_C( 69), -INT8_C( 65), -INT8_C( 93), -INT8_C( 9), -INT8_C( 55), -INT8_C( 113), -INT8_C( 84), -INT8_C( 81), -INT8_C( 114), -INT8_C( 70), INT8_C( 39) }, { -INT8_C( 124), INT8_C( 52), -INT8_C( 119), INT8_C( 19), INT8_C( 77), INT8_C( 12), INT8_C( 102), -INT8_C( 44), -INT8_C( 3), -INT8_C( 43), INT8_C( 4), -INT8_C( 94), INT8_C( 42), INT8_C( 82), INT8_C( 15), -INT8_C( 27) }, { INT8_C( 65), -INT8_C( 21), INT8_C( 6), -INT8_C( 45), -INT8_C( 89), -INT8_C( 59), INT8_C( 119), -INT8_C( 98), -INT8_C( 114), INT8_C( 6), INT8_C( 75), INT8_C( 61), -INT8_C( 108), INT8_C( 5), INT8_C( 100), INT8_C( 25) } }, { UINT8_C( 10), UINT8_C(237), UINT8_C( 44), UINT8_C( 39), UINT8_C( 9), UINT8_C( 2), UINT8_C( 43), UINT8_C(246), UINT8_C( 7), UINT8_C( 47), UINT8_C( 9), UINT8_C( 2), UINT8_C( 33), UINT8_C( 24), UINT8_C(119), UINT8_C( 2) }, { -INT8_C( 113), INT8_C( 0), -INT8_C( 108), -INT8_C( 98), -INT8_C( 55), -INT8_C( 36), INT8_C( 61), INT8_C( 0), -INT8_C( 93), INT8_C( 25), -INT8_C( 55), -INT8_C( 36), -INT8_C( 21), -INT8_C( 3), INT8_C( 0), -INT8_C( 36) } }, { { { INT8_C( 97), INT8_C( 28), -INT8_C( 21), INT8_C( 90), -INT8_C( 82), INT8_C( 70), INT8_C( 81), INT8_C( 22), -INT8_C( 91), -INT8_C( 22), -INT8_C( 88), INT8_C( 87), -INT8_C( 110), INT8_C( 31), INT8_C( 73), INT8_C( 38) }, { -INT8_C( 99), INT8_C( 15), INT8_C( 97), -INT8_C( 32), INT8_C( 76), INT8_C( 59), -INT8_C( 79), -INT8_C( 113), INT8_C( 95), -INT8_C( 64), INT8_C( 103), -INT8_C( 119), INT8_C( 52), INT8_C( 87), -INT8_C( 19), -INT8_C( 107) }, { INT8_C( 116), -INT8_C( 40), -INT8_C( 17), INT8_C( 34), INT8_C( 30), INT8_C( 64), INT8_C( 56), -INT8_C( 60), INT8_C( 42), -INT8_C( 32), INT8_C( 27), -INT8_C( 67), INT8_C( 0), INT8_C( 100), -INT8_C( 29), -INT8_C( 99) } }, { UINT8_C( 20), UINT8_C( 69), UINT8_C( 29), UINT8_C( 0), UINT8_C( 32), UINT8_C( 46), UINT8_C( 32), UINT8_C( 31), UINT8_C( 47), UINT8_C( 39), UINT8_C(105), UINT8_C( 35), UINT8_C( 14), UINT8_C( 38), UINT8_C( 40), UINT8_C( 34) }, { INT8_C( 76), INT8_C( 0), INT8_C( 87), INT8_C( 97), INT8_C( 116), -INT8_C( 29), INT8_C( 116), -INT8_C( 107), -INT8_C( 99), -INT8_C( 60), INT8_C( 0), INT8_C( 34), INT8_C( 73), INT8_C( 56), INT8_C( 42), -INT8_C( 17) } }, { { { -INT8_C( 8), -INT8_C( 40), -INT8_C( 59), INT8_C( 120), INT8_C( 6), INT8_C( 21), INT8_C( 87), -INT8_C( 11), -INT8_C( 52), -INT8_C( 64), INT8_C( 24), -INT8_C( 37), INT8_C( 23), -INT8_C( 48), INT8_C( 93), INT8_C( 70) }, { INT8_C( 120), INT8_C( 2), -INT8_C( 109), INT8_C( 96), -INT8_C( 32), -INT8_C( 91), INT8_C( 114), -INT8_C( 98), -INT8_C( 47), INT8_C( 66), INT8_C( 92), INT8_C( 98), -INT8_C( 11), -INT8_C( 73), INT8_C( 103), -INT8_C( 19) }, { -INT8_C( 113), INT8_C( 45), INT8_C( 101), -INT8_C( 107), INT8_C( 66), -INT8_C( 68), -INT8_C( 117), INT8_C( 15), INT8_C( 125), -INT8_C( 93), -INT8_C( 22), -INT8_C( 108), INT8_C( 116), INT8_C( 71), -INT8_C( 38), -INT8_C( 20) } }, { UINT8_C( 26), UINT8_C( 13), UINT8_C( 28), UINT8_C( 42), UINT8_C( 18), UINT8_C( 46), UINT8_C( 8), UINT8_C(228), UINT8_C( 0), UINT8_C( 36), UINT8_C( 22), UINT8_C( 5), UINT8_C(219), UINT8_C( 30), UINT8_C( 34), UINT8_C( 10) }, { INT8_C( 92), -INT8_C( 48), -INT8_C( 11), -INT8_C( 22), -INT8_C( 109), -INT8_C( 38), -INT8_C( 52), INT8_C( 0), -INT8_C( 8), INT8_C( 66), INT8_C( 114), INT8_C( 21), INT8_C( 0), INT8_C( 103), INT8_C( 101), INT8_C( 24) } }, { { { INT8_C( 92), -INT8_C( 38), -INT8_C( 46), INT8_C( 111), -INT8_C( 104), -INT8_C( 102), INT8_C( 83), -INT8_C( 103), -INT8_C( 66), -INT8_C( 103), -INT8_C( 114), -INT8_C( 103), INT8_C( 71), INT8_C( 113), INT8_C( 3), INT8_C( 34) }, { -INT8_C( 72), INT8_C( 2), INT8_C( 64), -INT8_C( 68), -INT8_C( 115), INT8_C( 108), INT8_C( 61), -INT8_C( 69), -INT8_C( 125), INT8_C( 82), INT8_C( 93), -INT8_C( 31), INT8_C( 65), -INT8_C( 21), -INT8_C( 119), -INT8_C( 98) }, { -INT8_C( 59), INT8_C( 91), INT8_C( 13), INT8_C( 93), -INT8_C( 11), INT8_C( 96), -INT8_C( 10), -INT8_C( 77), -INT8_C( 7), -INT8_C( 123), INT8_C( 76), INT8_C( 65), -INT8_C( 10), INT8_C( 79), INT8_C( 99), -INT8_C( 82) } }, { UINT8_C( 33), UINT8_C( 19), UINT8_C( 11), UINT8_C( 30), UINT8_C( 16), UINT8_C( 24), UINT8_C(153), UINT8_C( 3), UINT8_C( 11), UINT8_C(246), UINT8_C( 20), UINT8_C( 12), UINT8_C( 33), UINT8_C( 13), UINT8_C( 26), UINT8_C( 22) }, { INT8_C( 91), -INT8_C( 68), -INT8_C( 103), -INT8_C( 119), -INT8_C( 72), -INT8_C( 125), INT8_C( 0), INT8_C( 111), -INT8_C( 103), INT8_C( 0), -INT8_C( 115), INT8_C( 71), INT8_C( 91), INT8_C( 113), INT8_C( 93), INT8_C( 61) } }, { { { -INT8_C( 119), -INT8_C( 113), -INT8_C( 53), -INT8_C( 103), INT8_C( 55), INT8_C( 100), INT8_C( 44), INT8_C( 50), INT8_C( 91), -INT8_C( 96), INT8_C( 111), INT8_C( 60), -INT8_C( 99), INT8_C( 73), -INT8_C( 29), -INT8_C( 11) }, { INT8_C( 49), -INT8_C( 25), INT8_C( 66), INT8_C( 120), -INT8_C( 31), INT8_C( 66), -INT8_C( 71), INT8_C( 97), -INT8_C( 114), INT8_C( 59), -INT8_C( 42), INT8_C( 41), INT8_C( 33), -INT8_C( 6), INT8_C( 21), -INT8_C( 86) }, { -INT8_C( 119), -INT8_C( 32), INT8_C( 67), -INT8_C( 63), INT8_C( 69), INT8_C( 111), -INT8_C( 13), -INT8_C( 96), INT8_C( 15), INT8_C( 98), -INT8_C( 36), -INT8_C( 84), -INT8_C( 84), -INT8_C( 65), -INT8_C( 95), -INT8_C( 35) } }, { UINT8_C( 22), UINT8_C(227), UINT8_C( 85), UINT8_C( 40), UINT8_C( 37), UINT8_C( 15), UINT8_C(233), UINT8_C( 35), UINT8_C( 26), UINT8_C( 47), UINT8_C( 28), UINT8_C( 11), UINT8_C( 42), UINT8_C( 1), UINT8_C( 21), UINT8_C( 19) }, { -INT8_C( 71), INT8_C( 0), INT8_C( 0), INT8_C( 15), INT8_C( 111), -INT8_C( 11), INT8_C( 0), -INT8_C( 63), -INT8_C( 42), -INT8_C( 35), INT8_C( 33), INT8_C( 60), -INT8_C( 36), -INT8_C( 113), INT8_C( 66), INT8_C( 120) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16x3_t t; t.val[0] = simde_vld1q_s8(test_vec[i].t[0]); t.val[1] = simde_vld1q_s8(test_vec[i].t[1]); t.val[2] = simde_vld1q_s8(test_vec[i].t[2]); simde_uint8x16_t idx = simde_vld1q_u8(test_vec[i].idx); simde_int8x16_t r = simde_vqtbl3q_s8(t, idx); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16x3_t t = simde_test_arm_neon_random_i8x16x3(); simde_uint8x16_private idx_ = simde_uint8x16_to_private(simde_test_arm_neon_random_u8x16()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] %= 48; } } simde_uint8x16_t idx = simde_uint8x16_from_private(idx_); simde_int8x16_t r = simde_vqtbl3q_s8(t, idx); simde_test_arm_neon_write_i8x16x3(2, t, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqtbl3q_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 uint8_t t[3][16]; SIMDE_ALIGN_TO_16 uint8_t idx[16]; SIMDE_ALIGN_TO_16 uint8_t r[16]; } test_vec[] = { { { { UINT8_C(183), UINT8_C( 77), UINT8_C(187), UINT8_C(230), UINT8_C( 94), UINT8_C(110), UINT8_C(156), UINT8_C(135), UINT8_C(216), UINT8_C(209), UINT8_C(114), UINT8_C(100), UINT8_C( 51), UINT8_C( 27), UINT8_C(104), UINT8_C(194) }, { UINT8_C( 28), UINT8_C( 9), UINT8_C(138), UINT8_C(222), UINT8_C( 96), UINT8_C(233), UINT8_C(123), UINT8_C( 57), UINT8_C(232), UINT8_C(241), UINT8_C( 29), UINT8_C(249), UINT8_C(162), UINT8_C( 81), UINT8_C(115), UINT8_C( 89) }, { UINT8_C(159), UINT8_C( 46), UINT8_C( 63), UINT8_C(253), UINT8_C(157), UINT8_C(219), UINT8_C(132), UINT8_C(117), UINT8_C(172), UINT8_C(247), UINT8_C(218), UINT8_C(223), UINT8_C( 18), UINT8_C( 66), UINT8_C(162), UINT8_C( 46) } }, { UINT8_C( 27), UINT8_C( 44), UINT8_C( 13), UINT8_C( 27), UINT8_C( 21), UINT8_C( 40), UINT8_C( 37), UINT8_C( 13), UINT8_C(121), UINT8_C( 2), UINT8_C( 6), UINT8_C( 27), UINT8_C( 36), UINT8_C( 10), UINT8_C( 20), UINT8_C(243) }, { UINT8_C(249), UINT8_C( 18), UINT8_C( 27), UINT8_C(249), UINT8_C(233), UINT8_C(172), UINT8_C(219), UINT8_C( 27), UINT8_C( 0), UINT8_C(187), UINT8_C(156), UINT8_C(249), UINT8_C(157), UINT8_C(114), UINT8_C( 96), UINT8_C( 0) } }, { { { UINT8_C(232), UINT8_C(186), UINT8_C(189), UINT8_C(254), UINT8_C( 66), UINT8_C(162), UINT8_C(251), UINT8_C(187), UINT8_C(165), UINT8_C(242), UINT8_C(215), UINT8_C(249), UINT8_C( 92), UINT8_C( 75), UINT8_C(236), UINT8_C(244) }, { UINT8_MAX, UINT8_C(220), UINT8_C( 42), UINT8_C(141), UINT8_C( 81), UINT8_C(213), UINT8_C(200), UINT8_C(189), UINT8_C( 90), UINT8_C(226), UINT8_C( 60), UINT8_C( 33), UINT8_C(158), UINT8_C(233), UINT8_C( 51), UINT8_C(134) }, { UINT8_C(163), UINT8_C(240), UINT8_C(132), UINT8_C(229), UINT8_C(147), UINT8_C(128), UINT8_C(160), UINT8_C( 56), UINT8_C(114), UINT8_C(119), UINT8_C( 49), UINT8_C(206), UINT8_C(195), UINT8_C( 29), UINT8_C(194), UINT8_C(194) } }, { UINT8_C( 9), UINT8_C( 44), UINT8_C( 31), UINT8_C( 27), UINT8_C( 1), UINT8_C( 23), UINT8_C( 8), UINT8_C( 27), UINT8_C( 9), UINT8_C( 20), UINT8_C( 12), UINT8_C( 7), UINT8_C( 45), UINT8_C( 15), UINT8_C( 30), UINT8_C(208) }, { UINT8_C(242), UINT8_C(195), UINT8_C(134), UINT8_C( 33), UINT8_C(186), UINT8_C(189), UINT8_C(165), UINT8_C( 33), UINT8_C(242), UINT8_C( 81), UINT8_C( 92), UINT8_C(187), UINT8_C( 29), UINT8_C(244), UINT8_C( 51), UINT8_C( 0) } }, { { { UINT8_C( 17), UINT8_C(162), UINT8_C(189), UINT8_C(211), UINT8_C(185), UINT8_C(198), UINT8_C(238), UINT8_C(179), UINT8_C( 10), UINT8_C( 43), UINT8_C( 74), UINT8_C( 56), UINT8_C(154), UINT8_C(104), UINT8_C( 8), UINT8_C(250) }, { UINT8_C( 11), UINT8_C(190), UINT8_C(237), UINT8_C( 45), UINT8_C( 20), UINT8_C( 24), UINT8_C(194), UINT8_C(225), UINT8_C(116), UINT8_C( 36), UINT8_C(114), UINT8_C(237), UINT8_C( 73), UINT8_C(196), UINT8_C( 96), UINT8_C( 91) }, { UINT8_C(102), UINT8_C( 29), UINT8_C( 46), UINT8_C( 32), UINT8_C(227), UINT8_C( 28), UINT8_C(211), UINT8_C(238), UINT8_C( 71), UINT8_C( 29), UINT8_C( 38), UINT8_C(226), UINT8_C(134), UINT8_C( 46), UINT8_C(220), UINT8_C(145) } }, { UINT8_C( 44), UINT8_C( 10), UINT8_C( 46), UINT8_C( 0), UINT8_C(226), UINT8_C( 32), UINT8_C( 34), UINT8_C( 39), UINT8_C(165), UINT8_C( 36), UINT8_C( 20), UINT8_C( 46), UINT8_C( 24), UINT8_C(164), UINT8_C( 25), UINT8_C( 31) }, { UINT8_C(134), UINT8_C( 74), UINT8_C(220), UINT8_C( 17), UINT8_C( 0), UINT8_C(102), UINT8_C( 46), UINT8_C(238), UINT8_C( 0), UINT8_C(227), UINT8_C( 20), UINT8_C(220), UINT8_C(116), UINT8_C( 0), UINT8_C( 36), UINT8_C( 91) } }, { { { UINT8_C(100), UINT8_C(101), UINT8_C(213), UINT8_C( 70), UINT8_C(229), UINT8_C(183), UINT8_C(157), UINT8_C(138), UINT8_C( 11), UINT8_C(226), UINT8_C(121), UINT8_C( 35), UINT8_C(134), UINT8_C(194), UINT8_C(162), UINT8_C( 72) }, { UINT8_C( 58), UINT8_C( 65), UINT8_C(238), UINT8_C(206), UINT8_C(179), UINT8_C(129), UINT8_C(169), UINT8_C( 67), UINT8_C( 59), UINT8_C(103), UINT8_C( 88), UINT8_C( 35), UINT8_C( 1), UINT8_MAX, UINT8_C(247), UINT8_C(101) }, { UINT8_C(100), UINT8_C(204), UINT8_C(171), UINT8_C( 73), UINT8_C(131), UINT8_C( 73), UINT8_C(212), UINT8_C(142), UINT8_C( 43), UINT8_C( 77), UINT8_C(178), UINT8_C(177), UINT8_C( 15), UINT8_C( 84), UINT8_C(250), UINT8_C( 73) } }, { UINT8_C( 6), UINT8_C( 40), UINT8_C( 23), UINT8_C( 25), UINT8_C( 9), UINT8_C(193), UINT8_C(140), UINT8_C( 20), UINT8_C( 40), UINT8_C( 37), UINT8_C( 7), UINT8_C( 41), UINT8_C( 36), UINT8_C( 47), UINT8_C( 46), UINT8_C( 24) }, { UINT8_C(157), UINT8_C( 43), UINT8_C( 67), UINT8_C(103), UINT8_C(226), UINT8_C( 0), UINT8_C( 0), UINT8_C(179), UINT8_C( 43), UINT8_C( 73), UINT8_C(138), UINT8_C( 77), UINT8_C(131), UINT8_C( 73), UINT8_C(250), UINT8_C( 59) } }, { { { UINT8_C( 65), UINT8_C( 35), UINT8_C(131), UINT8_C(170), UINT8_C(228), UINT8_C( 16), UINT8_C( 79), UINT8_C( 12), UINT8_C(245), UINT8_C( 22), UINT8_C( 53), UINT8_C(217), UINT8_C(213), UINT8_C(195), UINT8_C( 33), UINT8_C( 97) }, { UINT8_C(252), UINT8_C(178), UINT8_C(112), UINT8_C(127), UINT8_C( 24), UINT8_C( 13), UINT8_C( 44), UINT8_C(202), UINT8_C( 93), UINT8_C(139), UINT8_C(140), UINT8_C( 1), UINT8_C(228), UINT8_C(152), UINT8_C( 59), UINT8_C( 37) }, { UINT8_C(187), UINT8_C(190), UINT8_C(208), UINT8_C(159), UINT8_C(206), UINT8_C( 31), UINT8_C(171), UINT8_C(195), UINT8_C( 53), UINT8_C(224), UINT8_C(156), UINT8_C( 11), UINT8_C(163), UINT8_C(189), UINT8_C(108), UINT8_C(159) } }, { UINT8_C( 16), UINT8_C( 28), UINT8_C( 30), UINT8_C( 40), UINT8_C( 41), UINT8_C( 27), UINT8_C( 34), UINT8_C( 22), UINT8_C( 22), UINT8_C(223), UINT8_C( 23), UINT8_C( 43), UINT8_C(119), UINT8_C( 34), UINT8_C( 32), UINT8_C( 2) }, { UINT8_C(252), UINT8_C(228), UINT8_C( 59), UINT8_C( 53), UINT8_C(224), UINT8_C( 1), UINT8_C(208), UINT8_C( 44), UINT8_C( 44), UINT8_C( 0), UINT8_C(202), UINT8_C( 11), UINT8_C( 0), UINT8_C(208), UINT8_C(187), UINT8_C(131) } }, { { { UINT8_C( 88), UINT8_C(189), UINT8_C( 37), UINT8_C( 65), UINT8_C( 8), UINT8_C(119), UINT8_C(136), UINT8_C(222), UINT8_C( 86), UINT8_C(207), UINT8_C(153), UINT8_C(205), UINT8_C( 82), UINT8_C(122), UINT8_MAX, UINT8_C(147) }, { UINT8_C( 42), UINT8_C(208), UINT8_C(162), UINT8_C(250), UINT8_C( 76), UINT8_C(117), UINT8_MAX, UINT8_C(168), UINT8_C(229), UINT8_C( 15), UINT8_C(167), UINT8_C( 18), UINT8_C(139), UINT8_C( 70), UINT8_C(175), UINT8_C(227) }, { UINT8_C( 3), UINT8_C(212), UINT8_C( 36), UINT8_C( 11), UINT8_C( 75), UINT8_C(172), UINT8_C(233), UINT8_C(162), UINT8_C(124), UINT8_C(131), UINT8_C(111), UINT8_C(206), UINT8_C(253), UINT8_C(111), UINT8_C( 97), UINT8_C( 39) } }, { UINT8_C( 63), UINT8_C( 3), UINT8_C( 33), UINT8_C( 44), UINT8_C( 25), UINT8_C( 32), UINT8_C( 4), UINT8_C( 46), UINT8_C( 47), UINT8_C( 28), UINT8_C(112), UINT8_C( 42), UINT8_C( 34), UINT8_C( 31), UINT8_C( 13), UINT8_C( 37) }, { UINT8_C( 0), UINT8_C( 65), UINT8_C(212), UINT8_C(253), UINT8_C( 15), UINT8_C( 3), UINT8_C( 8), UINT8_C( 97), UINT8_C( 39), UINT8_C(139), UINT8_C( 0), UINT8_C(111), UINT8_C( 36), UINT8_C(227), UINT8_C(122), UINT8_C(172) } }, { { { UINT8_C( 29), UINT8_C(226), UINT8_C(138), UINT8_C(150), UINT8_C( 3), UINT8_C(191), UINT8_C(244), UINT8_C( 50), UINT8_C(155), UINT8_C(100), UINT8_C(237), UINT8_C(189), UINT8_C(131), UINT8_C(138), UINT8_C(226), UINT8_C(118) }, { UINT8_C( 76), UINT8_C( 18), UINT8_C(180), UINT8_C(187), UINT8_C( 43), UINT8_C(149), UINT8_C(165), UINT8_C(200), UINT8_C(229), UINT8_C( 94), UINT8_C( 97), UINT8_C(164), UINT8_C(119), UINT8_C( 34), UINT8_C(162), UINT8_C(148) }, { UINT8_C( 5), UINT8_C( 45), UINT8_C( 42), UINT8_C( 8), UINT8_C(236), UINT8_C( 30), UINT8_C( 58), UINT8_C(135), UINT8_C(130), UINT8_C( 39), UINT8_C( 68), UINT8_C( 5), UINT8_C(178), UINT8_C( 38), UINT8_C(123), UINT8_C(254) } }, { UINT8_C( 8), UINT8_C( 0), UINT8_C( 41), UINT8_C( 3), UINT8_C( 5), UINT8_C( 47), UINT8_C( 43), UINT8_C( 26), UINT8_C( 45), UINT8_C( 45), UINT8_C( 30), UINT8_C( 4), UINT8_C(175), UINT8_C( 0), UINT8_C( 9), UINT8_C(180) }, { UINT8_C(155), UINT8_C( 29), UINT8_C( 39), UINT8_C(150), UINT8_C(191), UINT8_C(254), UINT8_C( 5), UINT8_C( 97), UINT8_C( 38), UINT8_C( 38), UINT8_C(162), UINT8_C( 3), UINT8_C( 0), UINT8_C( 29), UINT8_C(100), UINT8_C( 0) } }, { { { UINT8_C( 69), UINT8_C(136), UINT8_C(150), UINT8_C( 10), UINT8_C(231), UINT8_C(193), UINT8_C(180), UINT8_C(164), UINT8_C( 78), UINT8_C( 2), UINT8_C(217), UINT8_C(254), UINT8_C(243), UINT8_C(162), UINT8_C(178), UINT8_C( 16) }, { UINT8_C(149), UINT8_C(111), UINT8_C( 26), UINT8_C(167), UINT8_C(102), UINT8_C(170), UINT8_C( 60), UINT8_C(132), UINT8_C(127), UINT8_C(214), UINT8_C( 85), UINT8_C(121), UINT8_C(235), UINT8_C( 36), UINT8_C(172), UINT8_C( 49) }, { UINT8_C(172), UINT8_C( 66), UINT8_C( 59), UINT8_C(148), UINT8_C( 3), UINT8_C(240), UINT8_C( 56), UINT8_C( 82), UINT8_C(242), UINT8_C( 17), UINT8_C( 80), UINT8_C(229), UINT8_C(179), UINT8_C( 2), UINT8_C(246), UINT8_C( 73) } }, { UINT8_C( 17), UINT8_C( 16), UINT8_C( 0), UINT8_C( 23), UINT8_C( 42), UINT8_C( 44), UINT8_C( 44), UINT8_C( 9), UINT8_C( 2), UINT8_C(177), UINT8_C( 35), UINT8_C(238), UINT8_C( 21), UINT8_C( 47), UINT8_C( 31), UINT8_C( 33) }, { UINT8_C(111), UINT8_C(149), UINT8_C( 69), UINT8_C(132), UINT8_C( 80), UINT8_C(179), UINT8_C(179), UINT8_C( 2), UINT8_C(150), UINT8_C( 0), UINT8_C(148), UINT8_C( 0), UINT8_C(170), UINT8_C( 73), UINT8_C( 49), UINT8_C( 66) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16x3_t t; t.val[0] = simde_vld1q_u8(test_vec[i].t[0]); t.val[1] = simde_vld1q_u8(test_vec[i].t[1]); t.val[2] = simde_vld1q_u8(test_vec[i].t[2]); simde_uint8x16_t idx = simde_vld1q_u8(test_vec[i].idx); simde_uint8x16_t r = simde_vqtbl3q_u8(t, idx); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16x3_t t = simde_test_arm_neon_random_u8x16x3(); simde_uint8x16_private idx_ = simde_uint8x16_to_private(simde_test_arm_neon_random_u8x16()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] %= 48; } } simde_uint8x16_t idx = simde_uint8x16_from_private(idx_); simde_uint8x16_t r = simde_vqtbl3q_u8(t, idx); simde_test_arm_neon_write_u8x16x3(2, t, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqtbl4q_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 int8_t t[4][16]; SIMDE_ALIGN_TO_16 uint8_t idx[16]; SIMDE_ALIGN_TO_16 int8_t r[16]; } test_vec[] = { { { { -INT8_C( 76), -INT8_C( 122), INT8_C( 105), -INT8_C( 93), INT8_C( 78), INT8_MAX, -INT8_C( 98), -INT8_C( 72), INT8_C( 8), INT8_C( 25), -INT8_C( 8), -INT8_C( 74), INT8_C( 68), INT8_C( 77), INT8_C( 64), -INT8_C( 67) }, { INT8_C( 25), -INT8_C( 52), INT8_C( 61), -INT8_C( 107), INT8_C( 42), INT8_C( 20), -INT8_C( 6), INT8_C( 41), INT8_C( 112), INT8_C( 51), -INT8_C( 103), -INT8_C( 34), INT8_C( 51), INT8_C( 77), INT8_C( 28), -INT8_C( 25) }, { -INT8_C( 44), -INT8_C( 122), -INT8_C( 118), INT8_C( 34), INT8_C( 5), INT8_C( 41), -INT8_C( 37), INT8_C( 13), INT8_C( 66), -INT8_C( 45), -INT8_C( 61), -INT8_C( 122), INT8_C( 32), INT8_C( 4), INT8_C( 68), INT8_C( 57) }, { -INT8_C( 48), -INT8_C( 127), -INT8_C( 49), -INT8_C( 6), -INT8_C( 107), -INT8_C( 55), INT8_C( 36), INT8_C( 6), -INT8_C( 4), -INT8_C( 67), -INT8_C( 28), INT8_C( 47), INT8_C( 10), INT8_C( 1), INT8_C( 22), -INT8_C( 34) } }, { UINT8_C( 7), UINT8_C( 33), UINT8_C( 1), UINT8_C( 12), UINT8_C(202), UINT8_C( 28), UINT8_C(153), UINT8_C( 12), UINT8_C(175), UINT8_C( 29), UINT8_C( 18), UINT8_C(207), UINT8_C( 33), UINT8_C(214), UINT8_C( 8), UINT8_C( 49) }, { -INT8_C( 72), -INT8_C( 122), -INT8_C( 122), INT8_C( 68), INT8_C( 0), INT8_C( 51), INT8_C( 0), INT8_C( 68), INT8_C( 0), INT8_C( 77), INT8_C( 61), INT8_C( 0), -INT8_C( 122), INT8_C( 0), INT8_C( 8), -INT8_C( 127) } }, { { { -INT8_C( 124), -INT8_C( 10), -INT8_C( 20), INT8_C( 78), -INT8_C( 46), -INT8_C( 123), INT8_C( 90), -INT8_C( 127), -INT8_C( 30), -INT8_C( 20), INT8_C( 80), INT8_C( 67), -INT8_C( 61), INT8_C( 89), INT8_C( 116), INT8_C( 27) }, { INT8_C( 48), -INT8_C( 96), INT8_C( 8), -INT8_C( 47), -INT8_C( 17), -INT8_C( 4), INT8_C( 110), -INT8_C( 4), -INT8_C( 44), INT8_C( 58), INT8_C( 19), -INT8_C( 83), INT8_C( 29), INT8_C( 8), INT8_C( 13), -INT8_C( 95) }, { -INT8_C( 1), -INT8_C( 7), -INT8_C( 17), -INT8_C( 47), INT8_C( 126), INT8_C( 73), INT8_C( 83), INT8_C( 97), INT8_C( 54), -INT8_C( 93), -INT8_C( 92), -INT8_C( 7), -INT8_C( 4), INT8_C( 25), INT8_C( 20), INT8_C( 45) }, { -INT8_C( 71), INT8_C( 28), -INT8_C( 2), -INT8_C( 88), INT8_C( 24), INT8_C( 108), -INT8_C( 92), -INT8_C( 20), -INT8_C( 90), -INT8_C( 73), -INT8_C( 103), -INT8_C( 60), -INT8_C( 64), -INT8_C( 90), INT8_C( 101), -INT8_C( 65) } }, { UINT8_C( 31), UINT8_C( 21), UINT8_C( 16), UINT8_C( 30), UINT8_C( 30), UINT8_C(227), UINT8_C( 63), UINT8_C( 20), UINT8_C( 7), UINT8_C( 35), UINT8_C(205), UINT8_C( 3), UINT8_C( 60), UINT8_C( 33), UINT8_C( 48), UINT8_C( 53) }, { -INT8_C( 95), -INT8_C( 4), INT8_C( 48), INT8_C( 13), INT8_C( 13), INT8_C( 0), -INT8_C( 65), -INT8_C( 17), -INT8_C( 127), -INT8_C( 47), INT8_C( 0), INT8_C( 78), -INT8_C( 64), -INT8_C( 7), -INT8_C( 71), INT8_C( 108) } }, { { { INT8_C( 63), INT8_C( 9), INT8_C( 0), -INT8_C( 34), -INT8_C( 19), INT8_MAX, -INT8_C( 78), INT8_C( 116), -INT8_C( 93), INT8_MIN, -INT8_C( 9), -INT8_C( 33), INT8_C( 97), -INT8_C( 88), -INT8_C( 43), INT8_C( 95) }, { INT8_C( 86), INT8_C( 115), INT8_C( 118), INT8_C( 113), -INT8_C( 75), INT8_C( 121), INT8_C( 50), -INT8_C( 81), INT8_C( 21), -INT8_C( 73), INT8_C( 105), INT8_C( 88), -INT8_C( 95), -INT8_C( 30), INT8_C( 59), -INT8_C( 31) }, { -INT8_C( 20), INT8_C( 59), -INT8_C( 65), -INT8_C( 39), -INT8_C( 69), INT8_C( 113), INT8_C( 77), INT8_C( 94), -INT8_C( 15), INT8_C( 68), INT8_C( 61), INT8_C( 83), -INT8_C( 20), INT8_C( 18), -INT8_C( 78), INT8_C( 67) }, { -INT8_C( 123), INT8_C( 40), -INT8_C( 76), INT8_C( 59), -INT8_C( 95), -INT8_C( 26), -INT8_C( 22), -INT8_C( 73), -INT8_C( 99), INT8_C( 84), INT8_C( 15), INT8_C( 62), INT8_C( 54), INT8_C( 74), INT8_C( 31), INT8_C( 34) } }, { UINT8_C( 6), UINT8_C( 30), UINT8_C( 59), UINT8_C( 65), UINT8_C( 80), UINT8_C( 72), UINT8_C( 31), UINT8_C( 1), UINT8_C( 13), UINT8_C( 28), UINT8_C( 20), UINT8_C( 57), UINT8_C( 47), UINT8_C( 7), UINT8_C(188), UINT8_C( 52) }, { -INT8_C( 78), INT8_C( 59), INT8_C( 62), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 31), INT8_C( 9), -INT8_C( 88), -INT8_C( 95), -INT8_C( 75), INT8_C( 84), INT8_C( 67), INT8_C( 116), INT8_C( 0), -INT8_C( 95) } }, { { { INT8_C( 48), INT8_C( 66), -INT8_C( 23), INT8_MIN, -INT8_C( 117), -INT8_C( 120), -INT8_C( 63), INT8_C( 24), INT8_C( 100), INT8_C( 86), -INT8_C( 111), INT8_C( 83), -INT8_C( 99), INT8_C( 78), -INT8_C( 56), INT8_C( 12) }, { -INT8_C( 66), INT8_C( 119), INT8_C( 29), INT8_C( 21), INT8_C( 17), -INT8_C( 27), INT8_C( 8), -INT8_C( 1), -INT8_C( 67), INT8_C( 58), INT8_C( 36), -INT8_C( 33), -INT8_C( 116), INT8_C( 107), -INT8_C( 121), -INT8_C( 68) }, { -INT8_C( 83), INT8_C( 112), INT8_C( 60), INT8_C( 56), -INT8_C( 8), -INT8_C( 3), INT8_C( 80), INT8_C( 92), INT8_C( 83), -INT8_C( 30), -INT8_C( 80), -INT8_C( 16), INT8_C( 48), INT8_C( 120), -INT8_C( 3), -INT8_C( 18) }, { -INT8_C( 17), INT8_C( 26), INT8_C( 3), INT8_C( 1), INT8_C( 0), INT8_C( 12), INT8_C( 0), -INT8_C( 67), INT8_C( 70), INT8_C( 36), -INT8_C( 100), -INT8_C( 46), -INT8_C( 113), INT8_C( 35), -INT8_C( 114), INT8_C( 61) } }, { UINT8_C(147), UINT8_C(202), UINT8_C( 53), UINT8_C( 11), UINT8_C( 8), UINT8_C(198), UINT8_C( 39), UINT8_C( 27), UINT8_C( 40), UINT8_C( 23), UINT8_C( 12), UINT8_C(216), UINT8_C( 15), UINT8_C( 9), UINT8_C( 6), UINT8_C( 63) }, { INT8_C( 0), INT8_C( 0), INT8_C( 12), INT8_C( 83), INT8_C( 100), INT8_C( 0), INT8_C( 92), -INT8_C( 33), INT8_C( 83), -INT8_C( 1), -INT8_C( 99), INT8_C( 0), INT8_C( 12), INT8_C( 86), -INT8_C( 63), INT8_C( 61) } }, { { { INT8_C( 72), INT8_C( 103), -INT8_C( 67), INT8_C( 16), INT8_C( 45), -INT8_C( 91), INT8_C( 43), -INT8_C( 43), INT8_C( 60), INT8_C( 55), -INT8_C( 83), INT8_C( 76), INT8_C( 64), INT8_C( 115), INT8_C( 75), INT8_C( 100) }, { INT8_C( 61), INT8_C( 75), -INT8_C( 121), INT8_C( 19), INT8_C( 75), INT8_C( 104), INT8_C( 48), INT8_C( 112), -INT8_C( 28), INT8_C( 31), INT8_C( 37), -INT8_C( 124), -INT8_C( 100), INT8_C( 22), -INT8_C( 74), -INT8_C( 28) }, { INT8_C( 125), INT8_C( 116), -INT8_C( 12), -INT8_C( 86), INT8_C( 25), INT8_C( 32), INT8_MAX, INT8_C( 85), INT8_C( 87), INT8_C( 44), -INT8_C( 95), -INT8_C( 104), -INT8_C( 96), -INT8_C( 20), -INT8_C( 4), -INT8_C( 35) }, { INT8_C( 55), -INT8_C( 125), -INT8_C( 15), -INT8_C( 125), -INT8_C( 21), INT8_C( 33), -INT8_C( 13), -INT8_C( 48), INT8_C( 64), INT8_C( 24), INT8_C( 84), -INT8_C( 36), INT8_C( 47), INT8_C( 10), -INT8_C( 63), -INT8_C( 84) } }, { UINT8_C( 62), UINT8_C( 53), UINT8_C( 87), UINT8_C( 23), UINT8_C( 21), UINT8_C( 22), UINT8_C(237), UINT8_C( 45), UINT8_C( 3), UINT8_C( 14), UINT8_C( 5), UINT8_C( 35), UINT8_C( 59), UINT8_C( 1), UINT8_C( 0), UINT8_C( 50) }, { -INT8_C( 63), INT8_C( 33), INT8_C( 0), INT8_C( 112), INT8_C( 104), INT8_C( 48), INT8_C( 0), -INT8_C( 20), INT8_C( 16), INT8_C( 75), -INT8_C( 91), -INT8_C( 86), -INT8_C( 36), INT8_C( 103), INT8_C( 72), -INT8_C( 15) } }, { { { INT8_C( 37), INT8_C( 116), INT8_C( 116), -INT8_C( 5), INT8_C( 74), INT8_C( 97), INT8_C( 40), INT8_C( 77), -INT8_C( 16), -INT8_C( 19), -INT8_C( 16), INT8_C( 107), -INT8_C( 82), INT8_C( 113), INT8_C( 29), -INT8_C( 14) }, { -INT8_C( 30), INT8_C( 83), INT8_C( 34), INT8_C( 117), INT8_C( 124), INT8_C( 34), INT8_C( 71), -INT8_C( 67), INT8_C( 118), -INT8_C( 10), INT8_C( 46), -INT8_C( 43), INT8_C( 102), INT8_C( 75), -INT8_C( 78), -INT8_C( 116) }, { -INT8_C( 65), INT8_C( 38), -INT8_C( 121), INT8_C( 9), -INT8_C( 120), -INT8_C( 81), INT8_C( 87), INT8_C( 120), -INT8_C( 100), INT8_C( 71), -INT8_C( 29), INT8_C( 74), -INT8_C( 72), INT8_C( 0), INT8_C( 60), -INT8_C( 101) }, { INT8_C( 83), INT8_C( 95), INT8_C( 16), -INT8_C( 49), -INT8_C( 127), INT8_C( 87), -INT8_C( 115), -INT8_C( 8), INT8_C( 78), -INT8_C( 69), -INT8_C( 51), -INT8_C( 76), INT8_C( 6), INT8_MAX, INT8_C( 64), -INT8_C( 59) } }, { UINT8_C( 37), UINT8_C( 7), UINT8_C( 14), UINT8_C( 45), UINT8_C( 54), UINT8_C( 37), UINT8_C(165), UINT8_C( 18), UINT8_C( 45), UINT8_C( 8), UINT8_C( 92), UINT8_C( 37), UINT8_C(137), UINT8_C(153), UINT8_C( 0), UINT8_C( 28) }, { -INT8_C( 81), INT8_C( 77), INT8_C( 29), INT8_C( 0), -INT8_C( 115), -INT8_C( 81), INT8_C( 0), INT8_C( 34), INT8_C( 0), -INT8_C( 16), INT8_C( 0), -INT8_C( 81), INT8_C( 0), INT8_C( 0), INT8_C( 37), INT8_C( 102) } }, { { { INT8_C( 50), -INT8_C( 115), -INT8_C( 112), -INT8_C( 87), -INT8_C( 77), INT8_C( 54), -INT8_C( 69), INT8_C( 32), -INT8_C( 66), INT8_C( 24), INT8_C( 69), INT8_C( 71), -INT8_C( 79), INT8_C( 6), INT8_C( 36), -INT8_C( 87) }, { -INT8_C( 42), -INT8_C( 48), INT8_C( 34), -INT8_C( 2), INT8_C( 9), -INT8_C( 108), INT8_C( 116), -INT8_C( 3), -INT8_C( 46), -INT8_C( 97), -INT8_C( 9), -INT8_C( 112), INT8_C( 10), -INT8_C( 74), -INT8_C( 13), INT8_C( 60) }, { INT8_C( 67), -INT8_C( 125), -INT8_C( 27), -INT8_C( 10), -INT8_C( 71), -INT8_C( 95), INT8_C( 22), INT8_C( 120), -INT8_C( 71), INT8_C( 92), -INT8_C( 65), INT8_C( 106), INT8_C( 98), -INT8_C( 29), INT8_C( 19), INT8_C( 56) }, { -INT8_C( 77), INT8_C( 53), INT8_C( 55), -INT8_C( 68), -INT8_C( 55), -INT8_C( 85), -INT8_C( 71), -INT8_C( 100), INT8_C( 74), -INT8_C( 80), INT8_C( 44), INT8_C( 84), INT8_C( 102), INT8_C( 31), -INT8_C( 111), -INT8_C( 86) } }, { UINT8_C( 34), UINT8_C( 54), UINT8_C(160), UINT8_C( 28), UINT8_C( 23), UINT8_C( 55), UINT8_C(212), UINT8_C( 16), UINT8_C( 19), UINT8_C( 19), UINT8_C( 58), UINT8_C( 53), UINT8_C( 55), UINT8_C( 13), UINT8_C(173), UINT8_C( 42) }, { -INT8_C( 27), -INT8_C( 71), INT8_C( 0), INT8_C( 10), -INT8_C( 3), -INT8_C( 100), INT8_C( 0), -INT8_C( 42), -INT8_C( 2), -INT8_C( 2), INT8_C( 44), -INT8_C( 85), -INT8_C( 100), INT8_C( 6), INT8_C( 0), -INT8_C( 65) } }, { { { INT8_C( 54), INT8_C( 2), INT8_C( 50), INT8_C( 78), -INT8_C( 71), INT8_C( 6), INT8_C( 30), -INT8_C( 52), -INT8_C( 103), INT8_C( 89), INT8_C( 65), INT8_C( 16), -INT8_C( 90), -INT8_C( 18), INT8_C( 59), INT8_C( 41) }, { -INT8_C( 45), INT8_C( 34), INT8_C( 118), INT8_C( 99), -INT8_C( 62), INT8_C( 94), INT8_C( 61), INT8_C( 19), INT8_C( 115), INT8_C( 108), -INT8_C( 53), -INT8_C( 90), INT8_C( 44), INT8_C( 44), INT8_C( 124), INT8_C( 99) }, { INT8_C( 46), -INT8_C( 82), -INT8_C( 79), -INT8_C( 25), -INT8_C( 76), -INT8_C( 49), -INT8_C( 77), INT8_C( 78), INT8_C( 40), -INT8_C( 12), INT8_C( 94), -INT8_C( 49), -INT8_C( 29), -INT8_C( 103), -INT8_C( 8), -INT8_C( 74) }, { -INT8_C( 69), INT8_C( 110), INT8_C( 25), INT8_C( 126), -INT8_C( 51), INT8_C( 86), -INT8_C( 111), INT8_C( 64), -INT8_C( 61), INT8_C( 92), -INT8_C( 26), -INT8_C( 17), -INT8_C( 119), INT8_C( 99), INT8_C( 82), -INT8_C( 73) } }, { UINT8_C( 17), UINT8_C( 3), UINT8_C( 31), UINT8_C( 6), UINT8_C( 19), UINT8_C( 18), UINT8_C( 20), UINT8_C( 59), UINT8_C( 7), UINT8_C( 50), UINT8_C( 10), UINT8_C( 42), UINT8_C( 12), UINT8_C(195), UINT8_C( 32), UINT8_C(199) }, { INT8_C( 34), INT8_C( 78), INT8_C( 99), INT8_C( 30), INT8_C( 99), INT8_C( 118), -INT8_C( 62), -INT8_C( 17), -INT8_C( 52), INT8_C( 25), INT8_C( 65), INT8_C( 94), -INT8_C( 90), INT8_C( 0), INT8_C( 46), INT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16x4_t t; t.val[0] = simde_vld1q_s8(test_vec[i].t[0]); t.val[1] = simde_vld1q_s8(test_vec[i].t[1]); t.val[2] = simde_vld1q_s8(test_vec[i].t[2]); t.val[3] = simde_vld1q_s8(test_vec[i].t[3]); simde_uint8x16_t idx = simde_vld1q_u8(test_vec[i].idx); simde_int8x16_t r = simde_vqtbl4q_s8(t, idx); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16x4_t t = simde_test_arm_neon_random_i8x16x4(); simde_uint8x16_private idx_ = simde_uint8x16_to_private(simde_test_arm_neon_random_u8x16()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] &= 63; } } simde_uint8x16_t idx = simde_uint8x16_from_private(idx_); simde_int8x16_t r = simde_vqtbl4q_s8(t, idx); simde_test_arm_neon_write_i8x16x4(2, t, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqtbl4q_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 uint8_t t[4][16]; SIMDE_ALIGN_TO_16 uint8_t idx[16]; SIMDE_ALIGN_TO_16 uint8_t r[16]; } test_vec[] = { { { { UINT8_C( 61), UINT8_C(170), UINT8_C(106), UINT8_C( 58), UINT8_C(205), UINT8_C( 68), UINT8_C( 29), UINT8_C( 29), UINT8_C( 37), UINT8_C( 43), UINT8_C(241), UINT8_C(123), UINT8_C(240), UINT8_C(188), UINT8_C( 79), UINT8_C( 68) }, { UINT8_C(247), UINT8_C(121), UINT8_C(252), UINT8_C(147), UINT8_C(228), UINT8_C(140), UINT8_C(235), UINT8_C( 42), UINT8_C( 74), UINT8_C(124), UINT8_C( 84), UINT8_C( 73), UINT8_C( 20), UINT8_C( 61), UINT8_C(118), UINT8_C( 81) }, { UINT8_C(232), UINT8_C(224), UINT8_C(140), UINT8_C(181), UINT8_C( 36), UINT8_C(169), UINT8_C(210), UINT8_C( 73), UINT8_C(213), UINT8_C(195), UINT8_C(196), UINT8_C(197), UINT8_C(127), UINT8_C( 20), UINT8_C( 9), UINT8_C(119) }, { UINT8_C(141), UINT8_C( 5), UINT8_C( 10), UINT8_C(113), UINT8_C(146), UINT8_C(245), UINT8_C(156), UINT8_C(220), UINT8_C(113), UINT8_C(240), UINT8_C( 37), UINT8_C(133), UINT8_C( 46), UINT8_C(155), UINT8_C(215), UINT8_C( 22) } }, { UINT8_C( 59), UINT8_C( 99), UINT8_C( 11), UINT8_C( 32), UINT8_C( 12), UINT8_C( 29), UINT8_C( 41), UINT8_C( 33), UINT8_C( 32), UINT8_C( 46), UINT8_C(167), UINT8_C( 31), UINT8_C( 2), UINT8_C(176), UINT8_C( 22), UINT8_C( 15) }, { UINT8_C(133), UINT8_C( 0), UINT8_C(123), UINT8_C(232), UINT8_C(240), UINT8_C( 61), UINT8_C(195), UINT8_C(224), UINT8_C(232), UINT8_C( 9), UINT8_C( 0), UINT8_C( 81), UINT8_C(106), UINT8_C( 0), UINT8_C(235), UINT8_C( 68) } }, { { { UINT8_C(134), UINT8_C( 92), UINT8_C( 0), UINT8_C(146), UINT8_C(249), UINT8_C(234), UINT8_C(116), UINT8_C( 89), UINT8_C(152), UINT8_C( 27), UINT8_C( 56), UINT8_C( 90), UINT8_C(203), UINT8_C(143), UINT8_C(169), UINT8_C(129) }, { UINT8_C(239), UINT8_C(105), UINT8_C(201), UINT8_C( 69), UINT8_C(198), UINT8_C(237), UINT8_C( 11), UINT8_C( 19), UINT8_C( 55), UINT8_C( 87), UINT8_C(142), UINT8_C( 28), UINT8_C(122), UINT8_C( 31), UINT8_C(124), UINT8_C( 0) }, { UINT8_C(123), UINT8_C(125), UINT8_C(147), UINT8_C(116), UINT8_C(103), UINT8_C( 7), UINT8_C(205), UINT8_MAX, UINT8_C( 34), UINT8_C( 5), UINT8_C( 89), UINT8_C(237), UINT8_C(148), UINT8_C( 2), UINT8_C(111), UINT8_C(132) }, { UINT8_C(107), UINT8_C( 56), UINT8_C(201), UINT8_C( 49), UINT8_C( 38), UINT8_C(212), UINT8_C( 68), UINT8_C( 93), UINT8_C( 44), UINT8_C(210), UINT8_C(121), UINT8_C(166), UINT8_C(241), UINT8_C(245), UINT8_C(167), UINT8_C(108) } }, { UINT8_C( 50), UINT8_C( 58), UINT8_C(224), UINT8_C( 25), UINT8_C( 1), UINT8_C( 45), UINT8_C(216), UINT8_C( 35), UINT8_C( 51), UINT8_C( 49), UINT8_C( 16), UINT8_C( 7), UINT8_C( 51), UINT8_C( 63), UINT8_C(203), UINT8_C( 31) }, { UINT8_C(201), UINT8_C(121), UINT8_C( 0), UINT8_C( 87), UINT8_C( 92), UINT8_C( 2), UINT8_C( 0), UINT8_C(116), UINT8_C( 49), UINT8_C( 56), UINT8_C(239), UINT8_C( 89), UINT8_C( 49), UINT8_C(108), UINT8_C( 0), UINT8_C( 0) } }, { { { UINT8_C( 28), UINT8_C( 38), UINT8_C( 53), UINT8_C( 93), UINT8_C(211), UINT8_C( 14), UINT8_C(192), UINT8_C(134), UINT8_C( 63), UINT8_C( 17), UINT8_C(206), UINT8_C(115), UINT8_C(208), UINT8_C(153), UINT8_C( 18), UINT8_C(200) }, { UINT8_C( 46), UINT8_C(226), UINT8_C(230), UINT8_C(151), UINT8_C(247), UINT8_C( 97), UINT8_C( 44), UINT8_C(223), UINT8_C( 85), UINT8_C(103), UINT8_C(184), UINT8_C( 63), UINT8_C( 74), UINT8_C(253), UINT8_C(155), UINT8_C(102) }, { UINT8_C( 35), UINT8_C(208), UINT8_C(196), UINT8_C(247), UINT8_C(222), UINT8_C(132), UINT8_C(125), UINT8_C( 30), UINT8_C(149), UINT8_C( 75), UINT8_C(145), UINT8_C(102), UINT8_C(229), UINT8_C(163), UINT8_C( 46), UINT8_C( 19) }, { UINT8_C(133), UINT8_C( 21), UINT8_C(170), UINT8_C(125), UINT8_C(118), UINT8_C(214), UINT8_C( 92), UINT8_C(204), UINT8_C( 61), UINT8_C( 20), UINT8_C( 11), UINT8_C(135), UINT8_C( 17), UINT8_C(166), UINT8_C(238), UINT8_C( 53) } }, { UINT8_C( 54), UINT8_C( 50), UINT8_C( 44), UINT8_C( 85), UINT8_C( 54), UINT8_C( 41), UINT8_C(115), UINT8_C( 12), UINT8_C( 53), UINT8_C( 4), UINT8_C( 50), UINT8_C( 26), UINT8_C( 39), UINT8_C( 32), UINT8_C( 45), UINT8_C( 44) }, { UINT8_C( 92), UINT8_C(170), UINT8_C(229), UINT8_C( 0), UINT8_C( 92), UINT8_C( 75), UINT8_C( 0), UINT8_C(208), UINT8_C(214), UINT8_C(211), UINT8_C(170), UINT8_C(184), UINT8_C( 30), UINT8_C( 35), UINT8_C(163), UINT8_C(229) } }, { { { UINT8_C(210), UINT8_C(140), UINT8_C( 52), UINT8_C( 8), UINT8_C( 53), UINT8_C(167), UINT8_C(212), UINT8_C( 42), UINT8_C(171), UINT8_C( 6), UINT8_C( 4), UINT8_C( 82), UINT8_C(103), UINT8_C(241), UINT8_C(127), UINT8_C(220) }, { UINT8_C(136), UINT8_C( 40), UINT8_C(200), UINT8_C(245), UINT8_C( 46), UINT8_C(128), UINT8_C(160), UINT8_C( 71), UINT8_C( 67), UINT8_C(210), UINT8_C(114), UINT8_C(172), UINT8_C(242), UINT8_C(210), UINT8_C(140), UINT8_C(196) }, { UINT8_C( 94), UINT8_C(192), UINT8_C(204), UINT8_C(148), UINT8_C(104), UINT8_C(161), UINT8_C(190), UINT8_C( 19), UINT8_C(167), UINT8_C(195), UINT8_C(102), UINT8_C( 14), UINT8_C(180), UINT8_C(229), UINT8_C(235), UINT8_C( 61) }, { UINT8_C( 13), UINT8_C(179), UINT8_C( 50), UINT8_C( 59), UINT8_C( 52), UINT8_C(210), UINT8_C(131), UINT8_C(119), UINT8_C(164), UINT8_C(245), UINT8_C( 36), UINT8_C(150), UINT8_C(200), UINT8_C(176), UINT8_C( 90), UINT8_C( 38) } }, { UINT8_C(112), UINT8_C( 39), UINT8_C( 58), UINT8_C( 24), UINT8_C( 8), UINT8_C( 57), UINT8_C( 44), UINT8_C( 47), UINT8_C( 60), UINT8_C( 18), UINT8_C( 62), UINT8_C( 48), UINT8_C( 55), UINT8_C( 41), UINT8_C( 45), UINT8_C( 4) }, { UINT8_C( 0), UINT8_C( 19), UINT8_C( 36), UINT8_C( 67), UINT8_C(171), UINT8_C(245), UINT8_C(180), UINT8_C( 61), UINT8_C(200), UINT8_C(200), UINT8_C( 90), UINT8_C( 13), UINT8_C(119), UINT8_C(195), UINT8_C(229), UINT8_C( 53) } }, { { { UINT8_C(239), UINT8_C(161), UINT8_C(229), UINT8_C(183), UINT8_C( 26), UINT8_C(209), UINT8_C( 38), UINT8_C( 86), UINT8_C( 35), UINT8_C(164), UINT8_C( 71), UINT8_C( 90), UINT8_C( 13), UINT8_C(116), UINT8_C(158), UINT8_C( 42) }, { UINT8_C(212), UINT8_C( 30), UINT8_C(122), UINT8_C( 7), UINT8_C( 33), UINT8_C( 66), UINT8_C(222), UINT8_C( 26), UINT8_C( 46), UINT8_C( 75), UINT8_C(218), UINT8_C(202), UINT8_C( 19), UINT8_C(193), UINT8_C(215), UINT8_C( 2) }, { UINT8_C( 99), UINT8_C(188), UINT8_C(185), UINT8_C(125), UINT8_C(141), UINT8_C(224), UINT8_C(212), UINT8_C(176), UINT8_C(132), UINT8_C( 27), UINT8_C( 10), UINT8_C(146), UINT8_C(143), UINT8_C(168), UINT8_C(188), UINT8_C(100) }, { UINT8_C(199), UINT8_C( 54), UINT8_C(107), UINT8_C(232), UINT8_C(121), UINT8_C( 73), UINT8_C( 2), UINT8_C(167), UINT8_C(148), UINT8_C(221), UINT8_C(114), UINT8_C(168), UINT8_C(158), UINT8_C( 73), UINT8_C(170), UINT8_C( 1) } }, { UINT8_C( 5), UINT8_C( 36), UINT8_C( 63), UINT8_C( 18), UINT8_C( 4), UINT8_C( 19), UINT8_C( 2), UINT8_C( 8), UINT8_C( 46), UINT8_C( 12), UINT8_C( 26), UINT8_C( 61), UINT8_C( 52), UINT8_C( 22), UINT8_C( 33), UINT8_C( 59) }, { UINT8_C(209), UINT8_C(141), UINT8_C( 1), UINT8_C(122), UINT8_C( 26), UINT8_C( 7), UINT8_C(229), UINT8_C( 35), UINT8_C(188), UINT8_C( 13), UINT8_C(218), UINT8_C( 73), UINT8_C(121), UINT8_C(222), UINT8_C(188), UINT8_C(168) } }, { { { UINT8_C( 96), UINT8_C(162), UINT8_C(191), UINT8_C(164), UINT8_C(245), UINT8_C( 1), UINT8_C(109), UINT8_C( 99), UINT8_C( 77), UINT8_C(199), UINT8_C( 97), UINT8_C( 66), UINT8_C(222), UINT8_C(194), UINT8_C(253), UINT8_C( 43) }, { UINT8_C(143), UINT8_C(161), UINT8_C(241), UINT8_C(164), UINT8_C( 72), UINT8_C( 94), UINT8_C( 78), UINT8_C(203), UINT8_C( 62), UINT8_C(160), UINT8_C(237), UINT8_C(102), UINT8_C(157), UINT8_C( 17), UINT8_C(148), UINT8_C(253) }, { UINT8_C(179), UINT8_C( 83), UINT8_C(162), UINT8_C(169), UINT8_C( 85), UINT8_C( 15), UINT8_C( 12), UINT8_C(162), UINT8_C(214), UINT8_C(109), UINT8_C(228), UINT8_C(180), UINT8_C( 48), UINT8_C(226), UINT8_C(223), UINT8_C(191) }, { UINT8_C(131), UINT8_C(208), UINT8_C( 99), UINT8_C(203), UINT8_C( 47), UINT8_C(178), UINT8_C(151), UINT8_C(109), UINT8_C( 82), UINT8_C(132), UINT8_C(211), UINT8_C(239), UINT8_C(149), UINT8_C(103), UINT8_C(237), UINT8_C( 73) } }, { UINT8_C( 59), UINT8_C( 15), UINT8_C(242), UINT8_C( 16), UINT8_C( 30), UINT8_C( 62), UINT8_C( 50), UINT8_C( 52), UINT8_C( 44), UINT8_C( 23), UINT8_C( 41), UINT8_C(156), UINT8_C( 57), UINT8_C( 8), UINT8_C( 91), UINT8_C( 60) }, { UINT8_C(239), UINT8_C( 43), UINT8_C( 0), UINT8_C(143), UINT8_C(148), UINT8_C(237), UINT8_C( 99), UINT8_C( 47), UINT8_C( 48), UINT8_C(203), UINT8_C(109), UINT8_C( 0), UINT8_C(132), UINT8_C( 77), UINT8_C( 0), UINT8_C(149) } }, { { { UINT8_C( 46), UINT8_C(180), UINT8_C(123), UINT8_C(204), UINT8_C(178), UINT8_C( 45), UINT8_C( 65), UINT8_C( 30), UINT8_C(196), UINT8_C(106), UINT8_C(186), UINT8_C( 61), UINT8_C(114), UINT8_C( 21), UINT8_C( 58), UINT8_C( 75) }, { UINT8_C(212), UINT8_C( 2), UINT8_C( 83), UINT8_C( 68), UINT8_C( 97), UINT8_C(200), UINT8_C( 7), UINT8_C( 68), UINT8_C( 17), UINT8_C(186), UINT8_C(189), UINT8_C(193), UINT8_C( 89), UINT8_C(127), UINT8_C( 44), UINT8_C(136) }, { UINT8_C( 51), UINT8_C(167), UINT8_C( 84), UINT8_C(230), UINT8_C(212), UINT8_C(149), UINT8_C( 4), UINT8_C(153), UINT8_MAX, UINT8_C(191), UINT8_C(214), UINT8_C(114), UINT8_C(212), UINT8_C( 16), UINT8_C(189), UINT8_C(168) }, { UINT8_C( 18), UINT8_C( 17), UINT8_C(237), UINT8_C(115), UINT8_C(217), UINT8_C(244), UINT8_C(184), UINT8_C(234), UINT8_C(174), UINT8_C(117), UINT8_C(171), UINT8_C( 8), UINT8_C(245), UINT8_C(215), UINT8_C(144), UINT8_C( 40) } }, { UINT8_C( 62), UINT8_C( 36), UINT8_C( 14), UINT8_C( 83), UINT8_C( 58), UINT8_C( 19), UINT8_C(236), UINT8_C( 57), UINT8_C(210), UINT8_C(194), UINT8_C( 43), UINT8_C( 38), UINT8_C( 19), UINT8_C( 41), UINT8_C( 79), UINT8_C( 37) }, { UINT8_C(144), UINT8_C(212), UINT8_C( 58), UINT8_C( 0), UINT8_C(171), UINT8_C( 68), UINT8_C( 0), UINT8_C(117), UINT8_C( 0), UINT8_C( 0), UINT8_C(114), UINT8_C( 4), UINT8_C( 68), UINT8_C(191), UINT8_C( 0), UINT8_C(149) } }, { { { UINT8_C( 91), UINT8_C(178), UINT8_C(210), UINT8_C(213), UINT8_C(197), UINT8_C(190), UINT8_C( 79), UINT8_C(151), UINT8_C(129), UINT8_C( 58), UINT8_C( 62), UINT8_C( 84), UINT8_C(227), UINT8_C(141), UINT8_C( 57), UINT8_C(157) }, { UINT8_C(201), UINT8_C(146), UINT8_C( 49), UINT8_C(249), UINT8_C(163), UINT8_C(175), UINT8_C(216), UINT8_C( 42), UINT8_C(216), UINT8_C(191), UINT8_C(165), UINT8_C(217), UINT8_C( 54), UINT8_C( 73), UINT8_C( 89), UINT8_C(146) }, { UINT8_C(252), UINT8_C( 43), UINT8_C(103), UINT8_C(193), UINT8_C(234), UINT8_C(182), UINT8_C( 89), UINT8_C(107), UINT8_C(241), UINT8_C(151), UINT8_C(191), UINT8_C(212), UINT8_C( 36), UINT8_C(248), UINT8_C(114), UINT8_C(237) }, { UINT8_C(139), UINT8_C(163), UINT8_C(230), UINT8_C( 46), UINT8_C( 82), UINT8_C(191), UINT8_C( 88), UINT8_C( 42), UINT8_C(126), UINT8_C(254), UINT8_C( 4), UINT8_C(181), UINT8_C( 71), UINT8_C( 93), UINT8_C( 71), UINT8_C( 67) } }, { UINT8_C( 8), UINT8_C(174), UINT8_C( 5), UINT8_C( 50), UINT8_C( 37), UINT8_C( 30), UINT8_C(221), UINT8_C( 22), UINT8_C( 53), UINT8_C( 28), UINT8_C( 42), UINT8_C( 25), UINT8_C( 21), UINT8_C(156), UINT8_C( 6), UINT8_C( 32) }, { UINT8_C(129), UINT8_C( 0), UINT8_C(190), UINT8_C(230), UINT8_C(182), UINT8_C( 89), UINT8_C( 0), UINT8_C(216), UINT8_C(191), UINT8_C( 54), UINT8_C(191), UINT8_C(191), UINT8_C(175), UINT8_C( 0), UINT8_C( 79), UINT8_C(252) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16x4_t t; t.val[0] = simde_vld1q_u8(test_vec[i].t[0]); t.val[1] = simde_vld1q_u8(test_vec[i].t[1]); t.val[2] = simde_vld1q_u8(test_vec[i].t[2]); t.val[3] = simde_vld1q_u8(test_vec[i].t[3]); simde_uint8x16_t idx = simde_vld1q_u8(test_vec[i].idx); simde_uint8x16_t r = simde_vqtbl4q_u8(t, idx); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16x4_t t = simde_test_arm_neon_random_u8x16x4(); simde_uint8x16_private idx_ = simde_uint8x16_to_private(simde_test_arm_neon_random_u8x16()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] &= 63; } } simde_uint8x16_t idx = simde_uint8x16_from_private(idx_); simde_uint8x16_t r = simde_vqtbl4q_u8(t, idx); simde_test_arm_neon_write_u8x16x4(2, t, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vqtbl1_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqtbl1_u8) #if !defined(SIMDE_BUG_INTEL_857088) SIMDE_TEST_FUNC_LIST_ENTRY(vqtbl2_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqtbl2_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vqtbl3_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqtbl3_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vqtbl4_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqtbl4_u8) #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_TEST_FUNC_LIST_ENTRY(vqtbl1q_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqtbl1q_u8) #if !defined(SIMDE_BUG_INTEL_857088) SIMDE_TEST_FUNC_LIST_ENTRY(vqtbl2q_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqtbl2q_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vqtbl3q_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqtbl3q_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vqtbl4q_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqtbl4q_u8) #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/qtbx.c000066400000000000000000005417561400333146700164630ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN qtbx #include "test-neon.h" /* Check that both of these work */ #if defined(__cplusplus) #include "../../../simde/arm/neon/qtbx.h" #else #include "../../../simde/arm/neon.h" #endif #if 0 #define PROBABILITY 80 #define probability(p) (rand() < ((HEDLEY_STATIC_CAST(int64_t, RAND_MAX) * (p)) / 100)) #endif static int test_simde_vqtbx1_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 int8_t a[8]; SIMDE_ALIGN_TO_16 int8_t t[16]; SIMDE_ALIGN_TO_16 uint8_t idx[8]; SIMDE_ALIGN_TO_16 int8_t r[8]; } test_vec[] = { { { INT8_C( 21), INT8_C( 38), INT8_C( 68), INT8_C( 1), -INT8_C( 45), -INT8_C( 53), -INT8_C( 126), -INT8_C( 66) }, { -INT8_C( 63), INT8_C( 125), -INT8_C( 54), INT8_C( 55), -INT8_C( 8), -INT8_C( 76), INT8_C( 80), -INT8_C( 34), INT8_C( 91), -INT8_C( 107), -INT8_C( 19), INT8_C( 119), -INT8_C( 40), INT8_C( 1), INT8_C( 84), INT8_C( 120) }, { UINT8_C( 5), UINT8_C( 4), UINT8_C( 7), UINT8_C( 9), UINT8_C( 4), UINT8_C( 12), UINT8_C( 0), UINT8_C( 9) }, { -INT8_C( 76), -INT8_C( 8), -INT8_C( 34), -INT8_C( 107), -INT8_C( 8), -INT8_C( 40), -INT8_C( 63), -INT8_C( 107) } }, { { INT8_C( 58), INT8_C( 30), INT8_C( 24), INT8_C( 50), -INT8_C( 45), INT8_C( 105), INT8_C( 16), INT8_C( 46) }, { -INT8_C( 2), -INT8_C( 3), -INT8_C( 91), -INT8_C( 42), -INT8_C( 2), -INT8_C( 7), INT8_C( 78), INT8_C( 83), -INT8_C( 3), -INT8_C( 91), INT8_C( 108), INT8_C( 34), INT8_C( 65), INT8_C( 124), INT8_C( 91), INT8_C( 4) }, { UINT8_C( 1), UINT8_C( 6), UINT8_C(154), UINT8_C( 1), UINT8_C( 2), UINT8_C( 14), UINT8_C(210), UINT8_C( 12) }, { -INT8_C( 3), INT8_C( 78), INT8_C( 24), -INT8_C( 3), -INT8_C( 91), INT8_C( 91), INT8_C( 16), INT8_C( 65) } }, { { -INT8_C( 52), -INT8_C( 78), INT8_C( 39), -INT8_C( 54), -INT8_C( 84), INT8_C( 117), INT8_C( 30), -INT8_C( 87) }, { INT8_C( 27), -INT8_C( 118), -INT8_C( 53), INT8_C( 92), INT8_C( 7), INT8_C( 39), INT8_C( 96), -INT8_C( 40), -INT8_C( 67), -INT8_C( 6), -INT8_C( 55), INT8_C( 15), -INT8_C( 24), -INT8_C( 101), -INT8_C( 100), -INT8_C( 11) }, { UINT8_C( 5), UINT8_C( 10), UINT8_C(212), UINT8_C(217), UINT8_C( 41), UINT8_C( 2), UINT8_C( 42), UINT8_C( 5) }, { INT8_C( 39), -INT8_C( 55), INT8_C( 39), -INT8_C( 54), -INT8_C( 84), -INT8_C( 53), INT8_C( 30), INT8_C( 39) } }, { { INT8_C( 104), -INT8_C( 75), INT8_C( 63), INT8_C( 111), -INT8_C( 36), -INT8_C( 97), INT8_C( 71), -INT8_C( 103) }, { -INT8_C( 102), INT8_C( 16), -INT8_C( 87), -INT8_C( 126), -INT8_C( 85), INT8_C( 69), INT8_C( 119), INT8_C( 48), -INT8_C( 97), INT8_C( 76), INT8_C( 9), -INT8_C( 56), INT8_C( 46), INT8_C( 52), -INT8_C( 67), -INT8_C( 62) }, { UINT8_C( 6), UINT8_C( 13), UINT8_C( 3), UINT8_C( 77), UINT8_C( 90), UINT8_C( 13), UINT8_C( 0), UINT8_C(194) }, { INT8_C( 119), INT8_C( 52), -INT8_C( 126), INT8_C( 111), -INT8_C( 36), INT8_C( 52), -INT8_C( 102), -INT8_C( 103) } }, { { -INT8_C( 120), -INT8_C( 63), INT8_C( 43), INT8_C( 51), INT8_C( 6), -INT8_C( 94), INT8_C( 100), -INT8_C( 90) }, { -INT8_C( 18), INT8_C( 109), INT8_C( 110), INT8_C( 28), -INT8_C( 95), INT8_C( 44), -INT8_C( 33), INT8_C( 39), -INT8_C( 87), -INT8_C( 30), INT8_C( 117), INT8_C( 3), -INT8_C( 49), -INT8_C( 91), -INT8_C( 58), INT8_C( 113) }, { UINT8_C( 4), UINT8_C( 7), UINT8_C(240), UINT8_C( 2), UINT8_C( 0), UINT8_C( 9), UINT8_C( 11), UINT8_C( 8) }, { -INT8_C( 95), INT8_C( 39), INT8_C( 43), INT8_C( 110), -INT8_C( 18), -INT8_C( 30), INT8_C( 3), -INT8_C( 87) } }, { { -INT8_C( 3), -INT8_C( 27), -INT8_C( 93), -INT8_C( 97), INT8_C( 17), -INT8_C( 126), -INT8_C( 58), -INT8_C( 70) }, { INT8_C( 100), INT8_C( 59), -INT8_C( 66), INT8_C( 51), -INT8_C( 32), -INT8_C( 124), -INT8_C( 91), -INT8_C( 12), INT8_C( 123), -INT8_C( 107), INT8_C( 23), -INT8_C( 21), -INT8_C( 98), -INT8_C( 30), -INT8_C( 28), INT8_C( 105) }, { UINT8_C( 8), UINT8_C( 0), UINT8_C( 10), UINT8_C( 0), UINT8_C(160), UINT8_C( 1), UINT8_C( 7), UINT8_C( 13) }, { INT8_C( 123), INT8_C( 100), INT8_C( 23), INT8_C( 100), INT8_C( 17), INT8_C( 59), -INT8_C( 12), -INT8_C( 30) } }, { { INT8_C( 62), INT8_C( 32), -INT8_C( 75), INT8_C( 31), -INT8_C( 92), INT8_C( 90), INT8_C( 19), INT8_C( 32) }, { -INT8_C( 16), INT8_C( 42), INT8_C( 11), -INT8_C( 114), INT8_C( 12), -INT8_C( 17), -INT8_C( 9), -INT8_C( 28), -INT8_C( 1), INT8_C( 49), INT8_C( 85), -INT8_C( 97), -INT8_C( 30), INT8_C( 76), INT8_C( 61), INT8_C( 121) }, { UINT8_C( 7), UINT8_C( 9), UINT8_C( 1), UINT8_C( 5), UINT8_C( 12), UINT8_C( 3), UINT8_C( 7), UINT8_C( 11) }, { -INT8_C( 28), INT8_C( 49), INT8_C( 42), -INT8_C( 17), -INT8_C( 30), -INT8_C( 114), -INT8_C( 28), -INT8_C( 97) } }, { { INT8_C( 24), INT8_C( 116), INT8_C( 21), INT8_C( 36), INT8_C( 99), INT8_C( 13), INT8_C( 9), INT8_C( 99) }, { INT8_C( 62), INT8_C( 94), INT8_C( 2), INT8_C( 33), -INT8_C( 86), INT8_C( 63), -INT8_C( 102), -INT8_C( 110), -INT8_C( 71), -INT8_C( 69), -INT8_C( 105), INT8_C( 53), INT8_C( 62), INT8_C( 30), -INT8_C( 16), -INT8_C( 30) }, { UINT8_C( 10), UINT8_C( 10), UINT8_C( 11), UINT8_C( 1), UINT8_C(184), UINT8_C(147), UINT8_C( 8), UINT8_C( 0) }, { -INT8_C( 105), -INT8_C( 105), INT8_C( 53), INT8_C( 94), INT8_C( 99), INT8_C( 13), -INT8_C( 71), INT8_C( 62) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x16_t t = simde_vld1q_s8(test_vec[i].t); simde_uint8x8_t idx = simde_vld1_u8(test_vec[i].idx); simde_int8x8_t r = simde_vqtbx1_s8(a, t, idx); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x16_t t = simde_test_arm_neon_random_i8x16(); simde_uint8x8_private idx_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] &= 15; } } simde_uint8x8_t idx = simde_uint8x8_from_private(idx_); simde_int8x8_t r = simde_vqtbx1_s8(a, t, idx); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, t, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqtbx1_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 uint8_t a[8]; SIMDE_ALIGN_TO_16 uint8_t t[16]; SIMDE_ALIGN_TO_16 uint8_t idx[8]; SIMDE_ALIGN_TO_16 uint8_t r[8]; } test_vec[] = { { { UINT8_C( 67), UINT8_C(113), UINT8_C(152), UINT8_C(135), UINT8_C(228), UINT8_C(244), UINT8_C(106), UINT8_C(132) }, { UINT8_C(119), UINT8_C(237), UINT8_C( 36), UINT8_C(159), UINT8_C(151), UINT8_C(105), UINT8_C( 81), UINT8_C( 31), UINT8_C( 14), UINT8_C(145), UINT8_C( 71), UINT8_C(115), UINT8_C( 69), UINT8_C(213), UINT8_C(110), UINT8_C(103) }, { UINT8_C( 12), UINT8_C( 11), UINT8_C(105), UINT8_C( 14), UINT8_C( 9), UINT8_C( 9), UINT8_C( 8), UINT8_C( 12) }, { UINT8_C( 69), UINT8_C(115), UINT8_C(152), UINT8_C(110), UINT8_C(145), UINT8_C(145), UINT8_C( 14), UINT8_C( 69) } }, { { UINT8_C(219), UINT8_C(182), UINT8_C( 28), UINT8_C(114), UINT8_C( 31), UINT8_C(109), UINT8_C(145), UINT8_C( 45) }, { UINT8_C(254), UINT8_C(217), UINT8_C(160), UINT8_C( 67), UINT8_C(174), UINT8_C( 15), UINT8_C(170), UINT8_C(219), UINT8_C(154), UINT8_C( 19), UINT8_C( 57), UINT8_C( 84), UINT8_C(204), UINT8_C(177), UINT8_C( 80), UINT8_C(246) }, { UINT8_C( 2), UINT8_C( 4), UINT8_C( 5), UINT8_C( 7), UINT8_C( 2), UINT8_C(151), UINT8_C( 4), UINT8_C(157) }, { UINT8_C(160), UINT8_C(174), UINT8_C( 15), UINT8_C(219), UINT8_C(160), UINT8_C(109), UINT8_C(174), UINT8_C( 45) } }, { { UINT8_C(122), UINT8_C( 59), UINT8_C( 15), UINT8_C( 41), UINT8_C( 74), UINT8_C(186), UINT8_C( 4), UINT8_C(229) }, { UINT8_C(205), UINT8_C( 61), UINT8_C( 57), UINT8_C(154), UINT8_C(238), UINT8_C(137), UINT8_C(144), UINT8_C(177), UINT8_C( 94), UINT8_C(149), UINT8_C(120), UINT8_C( 32), UINT8_C( 45), UINT8_C(188), UINT8_C(190), UINT8_C(123) }, { UINT8_C( 12), UINT8_C( 14), UINT8_C( 8), UINT8_C( 10), UINT8_C(111), UINT8_C( 3), UINT8_C( 6), UINT8_C( 10) }, { UINT8_C( 45), UINT8_C(190), UINT8_C( 94), UINT8_C(120), UINT8_C( 74), UINT8_C(154), UINT8_C(144), UINT8_C(120) } }, { { UINT8_C( 84), UINT8_C( 39), UINT8_C(231), UINT8_C( 66), UINT8_C(177), UINT8_C(119), UINT8_C(243), UINT8_C( 15) }, { UINT8_C( 13), UINT8_C(108), UINT8_C( 47), UINT8_C( 58), UINT8_C( 40), UINT8_C(237), UINT8_C(181), UINT8_C( 69), UINT8_C(187), UINT8_C(157), UINT8_C( 47), UINT8_C( 43), UINT8_C( 33), UINT8_C(229), UINT8_C( 21), UINT8_C(224) }, { UINT8_C( 10), UINT8_C( 8), UINT8_C( 9), UINT8_C( 10), UINT8_C( 15), UINT8_C( 8), UINT8_C( 7), UINT8_C(147) }, { UINT8_C( 47), UINT8_C(187), UINT8_C(157), UINT8_C( 47), UINT8_C(224), UINT8_C(187), UINT8_C( 69), UINT8_C( 15) } }, { { UINT8_C( 53), UINT8_C(239), UINT8_C( 28), UINT8_C( 93), UINT8_C(220), UINT8_C(209), UINT8_C(162), UINT8_C(152) }, { UINT8_C(111), UINT8_C(209), UINT8_C(195), UINT8_C(144), UINT8_C(182), UINT8_C(216), UINT8_C(112), UINT8_C( 97), UINT8_C( 0), UINT8_C( 89), UINT8_C(139), UINT8_C( 63), UINT8_C( 49), UINT8_C( 2), UINT8_C(210), UINT8_C( 49) }, { UINT8_C( 96), UINT8_C( 7), UINT8_C(225), UINT8_C( 5), UINT8_C( 0), UINT8_C( 1), UINT8_C( 8), UINT8_C( 5) }, { UINT8_C( 53), UINT8_C( 97), UINT8_C( 28), UINT8_C(216), UINT8_C(111), UINT8_C(209), UINT8_C( 0), UINT8_C(216) } }, { { UINT8_C(119), UINT8_C(199), UINT8_C( 5), UINT8_C( 45), UINT8_C(159), UINT8_C(117), UINT8_C(142), UINT8_C(159) }, { UINT8_C(206), UINT8_C( 25), UINT8_C(222), UINT8_C( 0), UINT8_C( 27), UINT8_C(176), UINT8_C( 49), UINT8_C(123), UINT8_C( 88), UINT8_C( 18), UINT8_C(177), UINT8_C(200), UINT8_C(179), UINT8_C(201), UINT8_C(110), UINT8_C( 67) }, { UINT8_C( 13), UINT8_C(113), UINT8_C(176), UINT8_C( 3), UINT8_C( 22), UINT8_C( 4), UINT8_C( 8), UINT8_C(141) }, { UINT8_C(201), UINT8_C(199), UINT8_C( 5), UINT8_C( 0), UINT8_C(159), UINT8_C( 27), UINT8_C( 88), UINT8_C(159) } }, { { UINT8_C( 99), UINT8_C(153), UINT8_C(193), UINT8_C(126), UINT8_C( 74), UINT8_C(242), UINT8_C(250), UINT8_C(162) }, { UINT8_C( 4), UINT8_C(171), UINT8_C(106), UINT8_C(184), UINT8_C(116), UINT8_C(216), UINT8_C(251), UINT8_C(113), UINT8_C( 73), UINT8_C(171), UINT8_C(117), UINT8_C( 96), UINT8_C( 96), UINT8_C(237), UINT8_C(237), UINT8_C(220) }, { UINT8_C(107), UINT8_C(168), UINT8_C(247), UINT8_C( 13), UINT8_C( 2), UINT8_C( 2), UINT8_C( 14), UINT8_C( 5) }, { UINT8_C( 99), UINT8_C(153), UINT8_C(193), UINT8_C(237), UINT8_C(106), UINT8_C(106), UINT8_C(237), UINT8_C(216) } }, { { UINT8_C(120), UINT8_C(162), UINT8_C(142), UINT8_C(236), UINT8_C(123), UINT8_C(137), UINT8_C( 94), UINT8_C(196) }, { UINT8_C( 53), UINT8_C(211), UINT8_C( 36), UINT8_C(149), UINT8_C(192), UINT8_C( 18), UINT8_C(113), UINT8_C( 43), UINT8_C(186), UINT8_C(104), UINT8_C(137), UINT8_C(172), UINT8_C( 27), UINT8_C(167), UINT8_C( 1), UINT8_C(103) }, { UINT8_C( 7), UINT8_C( 5), UINT8_C(253), UINT8_C( 88), UINT8_C( 2), UINT8_C( 5), UINT8_C( 14), UINT8_C( 11) }, { UINT8_C( 43), UINT8_C( 18), UINT8_C(142), UINT8_C(236), UINT8_C( 36), UINT8_C( 18), UINT8_C( 1), UINT8_C(172) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x16_t t = simde_vld1q_u8(test_vec[i].t); simde_uint8x8_t idx = simde_vld1_u8(test_vec[i].idx); simde_uint8x8_t r = simde_vqtbx1_u8(a, t, idx); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x16_t t = simde_test_arm_neon_random_u8x16(); simde_uint8x8_private idx_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] &= 15; } } simde_uint8x8_t idx = simde_uint8x8_from_private(idx_); simde_uint8x8_t r = simde_vqtbx1_u8(a, t, idx); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, t, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #if !defined(SIMDE_BUG_INTEL_857088) static int test_simde_vqtbx2_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 int8_t a[8]; SIMDE_ALIGN_TO_16 int8_t t[2][16]; SIMDE_ALIGN_TO_16 uint8_t idx[8]; SIMDE_ALIGN_TO_16 int8_t r[8]; } test_vec[] = { { { -INT8_C( 60), INT8_C( 123), INT8_C( 121), INT8_C( 34), -INT8_C( 64), -INT8_C( 121), -INT8_C( 60), -INT8_C( 3) }, { { INT8_C( 77), -INT8_C( 45), -INT8_C( 96), -INT8_C( 118), -INT8_C( 126), -INT8_C( 124), INT8_C( 78), -INT8_C( 32), INT8_C( 18), -INT8_C( 54), INT8_C( 120), INT8_C( 87), -INT8_C( 104), -INT8_C( 50), INT8_C( 126), INT8_C( 110) }, { INT8_C( 76), -INT8_C( 38), INT8_C( 14), -INT8_C( 62), -INT8_C( 38), INT8_C( 3), INT8_C( 6), -INT8_C( 97), INT8_C( 126), INT8_MAX, -INT8_C( 63), INT8_C( 62), INT8_C( 6), -INT8_C( 123), INT8_C( 60), INT8_C( 83) } }, { UINT8_C( 25), UINT8_C( 28), UINT8_C( 30), UINT8_C( 27), UINT8_C( 0), UINT8_C( 12), UINT8_C( 27), UINT8_C( 18) }, { INT8_MAX, INT8_C( 6), INT8_C( 60), INT8_C( 62), INT8_C( 77), -INT8_C( 104), INT8_C( 62), INT8_C( 14) } }, { { INT8_C( 34), INT8_C( 12), INT8_C( 16), -INT8_C( 3), INT8_C( 16), INT8_C( 23), -INT8_C( 100), -INT8_C( 114) }, { { -INT8_C( 106), INT8_C( 93), -INT8_C( 51), -INT8_C( 99), -INT8_C( 29), INT8_C( 9), -INT8_C( 16), INT8_C( 60), -INT8_C( 27), -INT8_C( 50), INT8_C( 23), INT8_C( 69), -INT8_C( 5), -INT8_C( 46), -INT8_C( 72), -INT8_C( 14) }, { INT8_C( 5), -INT8_C( 127), -INT8_C( 127), INT8_C( 6), -INT8_C( 55), INT8_MAX, INT8_C( 84), -INT8_C( 20), -INT8_C( 116), INT8_C( 101), -INT8_C( 23), -INT8_C( 100), INT8_C( 124), -INT8_C( 123), INT8_C( 42), INT8_C( 18) } }, { UINT8_C(226), UINT8_C( 23), UINT8_C( 15), UINT8_C( 5), UINT8_C( 0), UINT8_C(160), UINT8_C( 1), UINT8_C( 5) }, { INT8_C( 34), -INT8_C( 20), -INT8_C( 14), INT8_C( 9), -INT8_C( 106), INT8_C( 23), INT8_C( 93), INT8_C( 9) } }, { { INT8_C( 100), -INT8_C( 35), -INT8_C( 10), INT8_C( 46), INT8_C( 92), INT8_C( 74), INT8_C( 26), -INT8_C( 24) }, { { -INT8_C( 81), INT8_C( 3), -INT8_C( 124), INT8_C( 43), -INT8_C( 120), -INT8_C( 81), INT8_C( 62), INT8_C( 106), -INT8_C( 90), -INT8_C( 19), INT8_C( 48), -INT8_C( 89), -INT8_C( 115), INT8_C( 49), -INT8_C( 116), -INT8_C( 4) }, { INT8_C( 74), -INT8_C( 73), INT8_C( 101), INT8_C( 52), -INT8_C( 102), -INT8_C( 63), INT8_C( 36), -INT8_C( 1), -INT8_C( 98), INT8_C( 26), INT8_C( 45), -INT8_C( 6), INT8_C( 100), INT8_C( 71), -INT8_C( 29), INT8_C( 20) } }, { UINT8_C( 74), UINT8_C( 7), UINT8_C( 31), UINT8_C( 18), UINT8_C( 22), UINT8_C( 29), UINT8_C( 28), UINT8_C( 29) }, { INT8_C( 100), INT8_C( 106), INT8_C( 20), INT8_C( 101), INT8_C( 36), INT8_C( 71), INT8_C( 100), INT8_C( 71) } }, { { -INT8_C( 88), INT8_C( 90), INT8_C( 28), INT8_C( 66), INT8_C( 27), INT8_C( 64), INT8_C( 65), -INT8_C( 71) }, { { INT8_C( 90), INT8_C( 110), -INT8_C( 77), -INT8_C( 65), -INT8_C( 75), -INT8_C( 106), -INT8_C( 45), -INT8_C( 1), -INT8_C( 2), INT8_C( 18), -INT8_C( 47), INT8_C( 20), -INT8_C( 112), INT8_C( 14), -INT8_C( 47), -INT8_C( 5) }, { INT8_C( 122), INT8_C( 53), -INT8_C( 13), INT8_C( 24), INT8_C( 38), -INT8_C( 24), INT8_C( 0), -INT8_C( 50), INT8_C( 66), INT8_C( 29), INT8_C( 16), INT8_C( 93), INT8_C( 93), INT8_C( 82), INT8_C( 22), -INT8_C( 72) } }, { UINT8_C( 0), UINT8_C( 9), UINT8_C( 23), UINT8_C(118), UINT8_C( 0), UINT8_C( 10), UINT8_C( 21), UINT8_C( 30) }, { INT8_C( 90), INT8_C( 18), -INT8_C( 50), INT8_C( 66), INT8_C( 90), -INT8_C( 47), -INT8_C( 24), INT8_C( 22) } }, { { INT8_C( 121), -INT8_C( 37), -INT8_C( 24), -INT8_C( 97), -INT8_C( 61), -INT8_C( 24), INT8_C( 109), INT8_C( 5) }, { { INT8_C( 5), INT8_C( 126), INT8_C( 98), INT8_C( 99), -INT8_C( 48), INT8_C( 120), INT8_C( 27), -INT8_C( 112), INT8_C( 65), -INT8_C( 110), INT8_C( 6), -INT8_C( 95), -INT8_C( 36), INT8_C( 124), -INT8_C( 1), INT8_C( 56) }, { -INT8_C( 61), INT8_C( 114), INT8_C( 37), INT8_C( 24), -INT8_C( 74), INT8_C( 12), -INT8_C( 25), INT8_C( 47), -INT8_C( 25), -INT8_C( 49), -INT8_C( 49), -INT8_C( 86), -INT8_C( 72), INT8_C( 60), -INT8_C( 81), -INT8_C( 67) } }, { UINT8_C(186), UINT8_C( 17), UINT8_C( 0), UINT8_C( 10), UINT8_C( 9), UINT8_C( 27), UINT8_C( 27), UINT8_C( 11) }, { INT8_C( 121), INT8_C( 114), INT8_C( 5), INT8_C( 6), -INT8_C( 110), -INT8_C( 86), -INT8_C( 86), -INT8_C( 95) } }, { { -INT8_C( 34), INT8_C( 7), INT8_C( 120), -INT8_C( 108), INT8_C( 19), INT8_C( 96), -INT8_C( 61), -INT8_C( 5) }, { { INT8_C( 47), -INT8_C( 110), -INT8_C( 91), -INT8_C( 25), -INT8_C( 49), INT8_C( 85), -INT8_C( 91), -INT8_C( 119), INT8_C( 102), -INT8_C( 59), INT8_C( 20), -INT8_C( 16), INT8_C( 1), INT8_C( 47), -INT8_C( 69), -INT8_C( 50) }, { INT8_C( 80), INT8_C( 39), INT8_C( 120), -INT8_C( 18), -INT8_C( 109), INT8_C( 90), INT8_C( 78), INT8_C( 113), INT8_C( 97), -INT8_C( 57), INT8_C( 5), INT8_C( 116), INT8_C( 39), -INT8_C( 55), INT8_C( 111), INT8_C( 86) } }, { UINT8_C( 91), UINT8_C( 21), UINT8_C( 30), UINT8_C( 10), UINT8_C( 10), UINT8_C(227), UINT8_C( 20), UINT8_C( 16) }, { -INT8_C( 34), INT8_C( 90), INT8_C( 111), INT8_C( 20), INT8_C( 20), INT8_C( 96), -INT8_C( 109), INT8_C( 80) } }, { { -INT8_C( 93), -INT8_C( 16), INT8_C( 53), INT8_C( 54), INT8_C( 74), -INT8_C( 124), -INT8_C( 88), -INT8_C( 85) }, { { INT8_C( 75), -INT8_C( 83), INT8_C( 31), INT8_C( 114), INT8_C( 118), -INT8_C( 113), -INT8_C( 56), -INT8_C( 46), -INT8_C( 92), INT8_C( 6), -INT8_C( 4), INT8_C( 14), -INT8_C( 23), -INT8_C( 80), -INT8_C( 34), -INT8_C( 110) }, { INT8_C( 120), -INT8_C( 97), INT8_C( 59), INT8_C( 111), INT8_C( 26), -INT8_C( 77), -INT8_C( 73), -INT8_C( 67), -INT8_C( 93), -INT8_C( 20), -INT8_C( 12), -INT8_C( 19), INT8_C( 112), -INT8_C( 100), -INT8_C( 104), -INT8_C( 69) } }, { UINT8_C( 9), UINT8_C( 24), UINT8_C( 45), UINT8_C( 0), UINT8_C( 71), UINT8_C(246), UINT8_C( 18), UINT8_C( 11) }, { INT8_C( 6), -INT8_C( 93), INT8_C( 53), INT8_C( 75), INT8_C( 74), -INT8_C( 124), INT8_C( 59), INT8_C( 14) } }, { { INT8_C( 118), -INT8_C( 77), INT8_C( 39), -INT8_C( 111), INT8_C( 103), -INT8_C( 34), INT8_C( 78), INT8_C( 10) }, { { -INT8_C( 54), INT8_C( 66), -INT8_C( 8), INT8_C( 59), -INT8_C( 34), -INT8_C( 112), -INT8_C( 10), INT8_C( 40), INT8_C( 72), INT8_C( 36), -INT8_C( 24), -INT8_C( 113), INT8_C( 26), INT8_C( 122), INT8_C( 122), INT8_C( 22) }, { INT8_C( 8), INT8_C( 115), -INT8_C( 4), INT8_C( 71), INT8_C( 75), INT8_C( 116), -INT8_C( 1), -INT8_C( 63), INT8_C( 40), INT8_C( 38), INT8_C( 82), -INT8_C( 113), INT8_C( 4), -INT8_C( 95), -INT8_C( 103), -INT8_C( 50) } }, { UINT8_C( 3), UINT8_C( 17), UINT8_C( 9), UINT8_C( 2), UINT8_C( 2), UINT8_C( 0), UINT8_C( 10), UINT8_C(106) }, { INT8_C( 59), INT8_C( 115), INT8_C( 36), -INT8_C( 8), -INT8_C( 8), -INT8_C( 54), -INT8_C( 24), INT8_C( 10) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x16x2_t t; t.val[0] = simde_vld1q_s8(test_vec[i].t[0]); t.val[1] = simde_vld1q_s8(test_vec[i].t[1]); simde_uint8x8_t idx = simde_vld1_u8(test_vec[i].idx); simde_int8x8_t r = simde_vqtbx2_s8(a, t, idx); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x16x2_t t = simde_test_arm_neon_random_i8x16x2(); simde_uint8x8_private idx_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] &= 31; } } simde_uint8x8_t idx = simde_uint8x8_from_private(idx_); simde_int8x8_t r = simde_vqtbx2_s8(a, t, idx); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16x2(2, t, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqtbx2_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 uint8_t a[8]; SIMDE_ALIGN_TO_16 uint8_t t[2][16]; SIMDE_ALIGN_TO_16 uint8_t idx[8]; SIMDE_ALIGN_TO_16 uint8_t r[8]; } test_vec[] = { { { UINT8_C( 23), UINT8_C( 34), UINT8_C( 21), UINT8_C(203), UINT8_C( 53), UINT8_C(117), UINT8_C(140), UINT8_C(178) }, { { UINT8_C(160), UINT8_C(125), UINT8_C(156), UINT8_C( 73), UINT8_C(180), UINT8_C(216), UINT8_C( 67), UINT8_C( 66), UINT8_C(189), UINT8_C(226), UINT8_C(220), UINT8_C(210), UINT8_C( 12), UINT8_C(225), UINT8_C(218), UINT8_C(164) }, { UINT8_C( 91), UINT8_C( 89), UINT8_C(144), UINT8_C(176), UINT8_C(112), UINT8_C( 30), UINT8_C(226), UINT8_C(135), UINT8_C( 64), UINT8_C(248), UINT8_C( 83), UINT8_C(117), UINT8_C(109), UINT8_C(223), UINT8_C( 40), UINT8_C( 13) } }, { UINT8_C( 28), UINT8_C( 4), UINT8_C( 22), UINT8_C( 16), UINT8_C( 28), UINT8_C( 25), UINT8_C( 19), UINT8_C( 25) }, { UINT8_C(109), UINT8_C(180), UINT8_C(226), UINT8_C( 91), UINT8_C(109), UINT8_C(248), UINT8_C(176), UINT8_C(248) } }, { { UINT8_C( 95), UINT8_C(188), UINT8_C( 28), UINT8_C(207), UINT8_C(219), UINT8_MAX, UINT8_C( 86), UINT8_C( 27) }, { { UINT8_C(247), UINT8_C(169), UINT8_C(145), UINT8_C(100), UINT8_C(136), UINT8_C(185), UINT8_C(114), UINT8_C(229), UINT8_C(125), UINT8_C(200), UINT8_C(245), UINT8_C( 26), UINT8_C( 98), UINT8_C( 72), UINT8_C(115), UINT8_C(221) }, { UINT8_C(120), UINT8_C(159), UINT8_C(101), UINT8_C(137), UINT8_C(164), UINT8_C(146), UINT8_C(245), UINT8_C( 3), UINT8_C( 78), UINT8_C( 18), UINT8_C(210), UINT8_C( 41), UINT8_C( 17), UINT8_C( 41), UINT8_C( 69), UINT8_C( 8) } }, { UINT8_C( 18), UINT8_C( 22), UINT8_C( 12), UINT8_C( 27), UINT8_C(143), UINT8_C(222), UINT8_C( 0), UINT8_C( 12) }, { UINT8_C(101), UINT8_C(245), UINT8_C( 98), UINT8_C( 41), UINT8_C(219), UINT8_MAX, UINT8_C(247), UINT8_C( 98) } }, { { UINT8_C( 57), UINT8_C( 76), UINT8_C(127), UINT8_C(221), UINT8_C(222), UINT8_C(116), UINT8_C(225), UINT8_C( 44) }, { { UINT8_C(134), UINT8_C(179), UINT8_C( 86), UINT8_C(151), UINT8_C(220), UINT8_C(155), UINT8_C(159), UINT8_C(175), UINT8_C(113), UINT8_C( 12), UINT8_C( 10), UINT8_C( 0), UINT8_C(234), UINT8_C( 74), UINT8_C( 12), UINT8_C(145) }, { UINT8_C(127), UINT8_C( 51), UINT8_C(154), UINT8_C(253), UINT8_C(205), UINT8_C(129), UINT8_C(243), UINT8_C( 6), UINT8_C(205), UINT8_C(114), UINT8_C(227), UINT8_C(171), UINT8_C(231), UINT8_C(196), UINT8_C(215), UINT8_C(109) } }, { UINT8_C(120), UINT8_C( 13), UINT8_C( 5), UINT8_C( 20), UINT8_C(200), UINT8_C( 4), UINT8_C( 3), UINT8_C( 57) }, { UINT8_C( 57), UINT8_C( 74), UINT8_C(155), UINT8_C(205), UINT8_C(222), UINT8_C(220), UINT8_C(151), UINT8_C( 44) } }, { { UINT8_C(121), UINT8_C(199), UINT8_C(212), UINT8_C( 70), UINT8_C( 72), UINT8_C(200), UINT8_C( 76), UINT8_C( 21) }, { { UINT8_C( 58), UINT8_C( 47), UINT8_C(192), UINT8_C( 33), UINT8_C(244), UINT8_C(151), UINT8_C(143), UINT8_C(108), UINT8_C(197), UINT8_C(148), UINT8_C(192), UINT8_C(141), UINT8_C( 56), UINT8_C(196), UINT8_C(199), UINT8_C(233) }, { UINT8_C(209), UINT8_C( 0), UINT8_C(132), UINT8_C( 41), UINT8_C( 70), UINT8_C(176), UINT8_C( 0), UINT8_C(191), UINT8_C(119), UINT8_C(212), UINT8_C( 5), UINT8_C(191), UINT8_C(156), UINT8_C( 81), UINT8_C(212), UINT8_C(215) } }, { UINT8_C( 1), UINT8_C( 20), UINT8_C( 24), UINT8_C( 21), UINT8_C( 12), UINT8_C(135), UINT8_C( 1), UINT8_C( 17) }, { UINT8_C( 47), UINT8_C( 70), UINT8_C(119), UINT8_C(176), UINT8_C( 56), UINT8_C(200), UINT8_C( 47), UINT8_C( 0) } }, { { UINT8_C( 70), UINT8_C(193), UINT8_C( 96), UINT8_C(140), UINT8_C(113), UINT8_C( 96), UINT8_C( 76), UINT8_C(233) }, { { UINT8_C( 52), UINT8_C( 81), UINT8_C(168), UINT8_C(209), UINT8_C(163), UINT8_C(125), UINT8_C(168), UINT8_C( 36), UINT8_C( 17), UINT8_C(160), UINT8_C(153), UINT8_C( 61), UINT8_C( 40), UINT8_C(122), UINT8_C( 46), UINT8_C( 67) }, { UINT8_C( 27), UINT8_C(173), UINT8_C(151), UINT8_C(129), UINT8_C(242), UINT8_C(212), UINT8_C(184), UINT8_C( 56), UINT8_C(149), UINT8_C( 24), UINT8_C(197), UINT8_C( 7), UINT8_C(120), UINT8_C( 17), UINT8_C(240), UINT8_C(172) } }, { UINT8_C( 2), UINT8_C( 24), UINT8_C( 29), UINT8_C( 5), UINT8_C( 21), UINT8_C( 5), UINT8_C( 9), UINT8_C( 7) }, { UINT8_C(168), UINT8_C(149), UINT8_C( 17), UINT8_C(125), UINT8_C(212), UINT8_C(125), UINT8_C(160), UINT8_C( 36) } }, { { UINT8_C( 64), UINT8_C(201), UINT8_C(217), UINT8_C( 50), UINT8_C(157), UINT8_C(145), UINT8_C(107), UINT8_C( 51) }, { { UINT8_C(169), UINT8_C( 48), UINT8_C( 58), UINT8_C( 33), UINT8_C( 65), UINT8_C( 42), UINT8_C(205), UINT8_C(163), UINT8_C(194), UINT8_C( 75), UINT8_C(169), UINT8_C(216), UINT8_C(112), UINT8_C(210), UINT8_MAX, UINT8_C( 54) }, { UINT8_C(149), UINT8_C( 99), UINT8_C( 36), UINT8_C(209), UINT8_C(246), UINT8_C( 86), UINT8_C( 41), UINT8_C( 54), UINT8_C( 31), UINT8_C( 2), UINT8_C(105), UINT8_C(188), UINT8_C(147), UINT8_C(212), UINT8_C(239), UINT8_C( 60) } }, { UINT8_C( 4), UINT8_C( 41), UINT8_C( 29), UINT8_C( 5), UINT8_C( 19), UINT8_C( 43), UINT8_C( 8), UINT8_C( 22) }, { UINT8_C( 65), UINT8_C(201), UINT8_C(212), UINT8_C( 42), UINT8_C(209), UINT8_C(145), UINT8_C(194), UINT8_C( 41) } }, { { UINT8_C( 80), UINT8_C( 65), UINT8_C(202), UINT8_C( 71), UINT8_C(151), UINT8_C(244), UINT8_C(125), UINT8_C(182) }, { { UINT8_C(246), UINT8_C(230), UINT8_C(115), UINT8_C(138), UINT8_C(186), UINT8_C( 98), UINT8_C(198), UINT8_C(190), UINT8_C(140), UINT8_C( 36), UINT8_C( 3), UINT8_C(223), UINT8_C( 79), UINT8_C(236), UINT8_C(245), UINT8_C(197) }, { UINT8_C(125), UINT8_C(227), UINT8_C(171), UINT8_C(225), UINT8_C(208), UINT8_C(200), UINT8_C(218), UINT8_C( 33), UINT8_C( 10), UINT8_C(165), UINT8_C(104), UINT8_C(161), UINT8_C(153), UINT8_C(229), UINT8_C( 88), UINT8_C(143) } }, { UINT8_C( 12), UINT8_C( 11), UINT8_C( 25), UINT8_C( 6), UINT8_C( 13), UINT8_C(224), UINT8_C( 5), UINT8_C( 25) }, { UINT8_C( 79), UINT8_C(223), UINT8_C(165), UINT8_C(198), UINT8_C(236), UINT8_C(244), UINT8_C( 98), UINT8_C(165) } }, { { UINT8_C(114), UINT8_C(195), UINT8_C(147), UINT8_C( 66), UINT8_C(140), UINT8_C(110), UINT8_C( 99), UINT8_C(150) }, { { UINT8_C( 19), UINT8_C(203), UINT8_C( 55), UINT8_C(172), UINT8_C(177), UINT8_C(143), UINT8_C( 59), UINT8_C(125), UINT8_C( 90), UINT8_C( 85), UINT8_C( 3), UINT8_C(136), UINT8_C( 53), UINT8_C( 72), UINT8_C( 65), UINT8_C( 57) }, { UINT8_C(145), UINT8_C(218), UINT8_C(140), UINT8_C(197), UINT8_C(105), UINT8_C(164), UINT8_C(119), UINT8_C(219), UINT8_C(103), UINT8_C( 11), UINT8_C( 29), UINT8_C(243), UINT8_C(121), UINT8_C(129), UINT8_C(137), UINT8_C(140) } }, { UINT8_C( 12), UINT8_C(193), UINT8_C( 56), UINT8_C( 29), UINT8_C( 16), UINT8_C(115), UINT8_C( 26), UINT8_C( 11) }, { UINT8_C( 53), UINT8_C(195), UINT8_C(147), UINT8_C(129), UINT8_C(145), UINT8_C(110), UINT8_C( 29), UINT8_C(136) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x16x2_t t; t.val[0] = simde_vld1q_u8(test_vec[i].t[0]); t.val[1] = simde_vld1q_u8(test_vec[i].t[1]); simde_uint8x8_t idx = simde_vld1_u8(test_vec[i].idx); simde_uint8x8_t r = simde_vqtbx2_u8(a, t, idx); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x16x2_t t = simde_test_arm_neon_random_u8x16x2(); simde_uint8x8_private idx_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] &= 31; } } simde_uint8x8_t idx = simde_uint8x8_from_private(idx_); simde_uint8x8_t r = simde_vqtbx2_u8(a, t, idx); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16x2(2, t, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqtbx3_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 int8_t a[8]; SIMDE_ALIGN_TO_16 int8_t t[3][16]; SIMDE_ALIGN_TO_16 uint8_t idx[8]; SIMDE_ALIGN_TO_16 int8_t r[8]; } test_vec[] = { { { INT8_C( 104), INT8_C( 25), INT8_C( 39), INT8_C( 61), INT8_C( 88), -INT8_C( 17), -INT8_C( 117), INT8_C( 68) }, { { -INT8_C( 22), INT8_C( 63), INT8_C( 59), -INT8_C( 30), INT8_C( 108), -INT8_C( 27), INT8_C( 12), -INT8_C( 11), -INT8_C( 7), INT8_C( 97), -INT8_C( 79), -INT8_C( 52), INT8_C( 3), -INT8_C( 76), INT8_C( 106), -INT8_C( 18) }, { INT8_C( 100), INT8_C( 55), INT8_C( 30), -INT8_C( 29), INT8_C( 49), -INT8_C( 47), INT8_C( 103), -INT8_C( 103), -INT8_C( 22), -INT8_C( 114), -INT8_C( 41), INT8_C( 66), INT8_C( 125), INT8_C( 98), -INT8_C( 122), INT8_C( 104) }, { -INT8_C( 94), -INT8_C( 63), INT8_C( 74), INT8_C( 14), -INT8_C( 90), INT8_C( 86), INT8_C( 4), -INT8_C( 97), -INT8_C( 72), -INT8_C( 75), INT8_C( 108), -INT8_C( 69), INT8_C( 106), -INT8_C( 42), -INT8_C( 87), -INT8_C( 50) } }, { UINT8_C( 13), UINT8_C( 7), UINT8_C( 33), UINT8_C( 14), UINT8_C( 8), UINT8_C( 24), UINT8_C( 23), UINT8_C(130) }, { -INT8_C( 76), -INT8_C( 11), -INT8_C( 63), INT8_C( 106), -INT8_C( 7), -INT8_C( 22), -INT8_C( 103), INT8_C( 68) } }, { { INT8_C( 12), -INT8_C( 42), -INT8_C( 63), -INT8_C( 77), INT8_C( 44), -INT8_C( 59), INT8_C( 82), -INT8_C( 28) }, { { INT8_C( 123), -INT8_C( 66), -INT8_C( 97), -INT8_C( 27), -INT8_C( 108), INT8_C( 72), -INT8_C( 77), -INT8_C( 95), INT8_C( 15), INT8_C( 100), -INT8_C( 33), -INT8_C( 89), INT8_C( 124), -INT8_C( 73), INT8_C( 41), INT8_C( 34) }, { INT8_C( 101), -INT8_C( 18), INT8_C( 70), INT8_C( 118), INT8_C( 57), -INT8_C( 46), INT8_C( 41), INT8_C( 69), -INT8_C( 88), -INT8_C( 21), -INT8_C( 8), -INT8_C( 43), -INT8_C( 80), INT8_C( 75), -INT8_C( 71), INT8_C( 43) }, { INT8_C( 9), INT8_C( 89), INT8_C( 16), -INT8_C( 98), -INT8_C( 95), -INT8_C( 61), INT8_C( 63), -INT8_C( 79), INT8_C( 39), INT8_C( 31), INT8_C( 88), -INT8_C( 93), -INT8_C( 42), -INT8_C( 126), -INT8_C( 58), INT8_C( 59) } }, { UINT8_C( 16), UINT8_C( 12), UINT8_C(178), UINT8_C(169), UINT8_C( 31), UINT8_C( 27), UINT8_C( 46), UINT8_C( 39) }, { INT8_C( 101), INT8_C( 124), -INT8_C( 63), -INT8_C( 77), INT8_C( 43), -INT8_C( 43), -INT8_C( 58), -INT8_C( 79) } }, { { INT8_C( 111), -INT8_C( 77), -INT8_C( 39), INT8_C( 16), INT8_C( 118), INT8_C( 25), -INT8_C( 63), -INT8_C( 98) }, { { INT8_C( 56), INT8_C( 26), INT8_C( 65), INT8_C( 14), -INT8_C( 100), INT8_C( 7), INT8_C( 73), INT8_C( 12), INT8_C( 20), -INT8_C( 5), -INT8_C( 75), -INT8_C( 13), -INT8_C( 41), -INT8_C( 93), INT8_C( 122), -INT8_C( 99) }, { -INT8_C( 118), -INT8_C( 41), INT8_C( 20), -INT8_C( 68), -INT8_C( 19), -INT8_C( 73), -INT8_C( 8), INT8_C( 92), INT8_C( 106), -INT8_C( 47), INT8_C( 108), -INT8_C( 32), -INT8_C( 22), INT8_C( 46), INT8_C( 126), INT8_C( 34) }, { INT8_C( 72), -INT8_C( 64), INT8_C( 48), -INT8_C( 28), -INT8_C( 57), INT8_C( 122), -INT8_C( 16), -INT8_C( 37), INT8_C( 117), -INT8_C( 91), -INT8_C( 50), INT8_C( 76), INT8_C( 72), INT8_C( 73), -INT8_C( 22), -INT8_C( 45) } }, { UINT8_C( 32), UINT8_C( 14), UINT8_C(143), UINT8_C( 13), UINT8_C( 37), UINT8_C( 39), UINT8_C( 9), UINT8_C( 31) }, { INT8_C( 72), INT8_C( 122), -INT8_C( 39), -INT8_C( 93), INT8_C( 122), -INT8_C( 37), -INT8_C( 5), INT8_C( 34) } }, { { INT8_C( 62), -INT8_C( 106), INT8_C( 47), INT8_C( 6), INT8_C( 16), INT8_C( 31), -INT8_C( 31), -INT8_C( 122) }, { { -INT8_C( 60), -INT8_C( 80), -INT8_C( 46), INT8_C( 13), -INT8_C( 7), -INT8_C( 68), -INT8_C( 32), INT8_C( 25), -INT8_C( 69), INT8_C( 111), INT8_C( 38), INT8_C( 112), -INT8_C( 9), -INT8_C( 113), -INT8_C( 112), INT8_C( 80) }, { INT8_C( 100), -INT8_C( 112), -INT8_C( 109), INT8_C( 104), INT8_C( 14), -INT8_C( 7), -INT8_C( 77), INT8_C( 77), -INT8_C( 112), -INT8_C( 29), INT8_C( 83), -INT8_C( 96), INT8_C( 2), INT8_C( 52), INT8_C( 38), -INT8_C( 57) }, { -INT8_C( 28), -INT8_C( 7), -INT8_C( 44), -INT8_C( 35), -INT8_C( 75), -INT8_C( 76), -INT8_C( 10), INT8_C( 112), INT8_C( 35), INT8_C( 28), -INT8_C( 31), INT8_C( 26), -INT8_C( 85), INT8_C( 113), INT8_C( 106), INT8_C( 16) } }, { UINT8_C( 1), UINT8_C( 14), UINT8_C( 24), UINT8_C( 15), UINT8_C( 7), UINT8_C( 43), UINT8_C( 44), UINT8_C( 39) }, { -INT8_C( 80), -INT8_C( 112), -INT8_C( 112), INT8_C( 80), INT8_C( 25), INT8_C( 26), -INT8_C( 85), INT8_C( 112) } }, { { INT8_C( 71), -INT8_C( 84), -INT8_C( 90), -INT8_C( 3), INT8_C( 96), -INT8_C( 100), INT8_C( 109), -INT8_C( 125) }, { { -INT8_C( 71), INT8_C( 78), -INT8_C( 98), INT8_C( 100), -INT8_C( 65), INT8_C( 8), INT8_C( 116), -INT8_C( 64), INT8_C( 6), -INT8_C( 20), -INT8_C( 48), -INT8_C( 2), INT8_C( 24), INT8_C( 44), -INT8_C( 123), INT8_C( 38) }, { -INT8_C( 36), -INT8_C( 83), INT8_C( 55), -INT8_C( 64), -INT8_C( 4), INT8_C( 15), -INT8_C( 120), INT8_C( 67), -INT8_C( 69), INT8_C( 46), INT8_C( 64), INT8_C( 27), -INT8_C( 53), -INT8_C( 82), -INT8_C( 97), -INT8_C( 124) }, { -INT8_C( 4), INT8_C( 61), -INT8_C( 24), -INT8_C( 68), INT8_C( 69), INT8_C( 93), INT8_C( 124), INT8_C( 76), INT8_C( 73), INT8_C( 76), INT8_C( 74), INT8_C( 97), INT8_C( 121), -INT8_C( 49), -INT8_C( 120), INT8_C( 85) } }, { UINT8_C( 29), UINT8_C( 47), UINT8_C( 21), UINT8_C(121), UINT8_C(207), UINT8_C( 13), UINT8_C( 44), UINT8_C( 42) }, { -INT8_C( 82), INT8_C( 85), INT8_C( 15), -INT8_C( 3), INT8_C( 96), INT8_C( 44), INT8_C( 121), INT8_C( 74) } }, { { -INT8_C( 126), INT8_C( 3), INT8_C( 99), -INT8_C( 57), INT8_C( 96), -INT8_C( 32), INT8_C( 19), -INT8_C( 86) }, { { INT8_C( 44), INT8_C( 93), INT8_C( 11), -INT8_C( 91), INT8_C( 45), -INT8_C( 109), -INT8_C( 6), -INT8_C( 86), INT8_C( 83), INT8_C( 15), INT8_C( 35), INT8_C( 34), -INT8_C( 83), -INT8_C( 33), -INT8_C( 84), INT8_C( 121) }, { -INT8_C( 36), INT8_C( 82), INT8_C( 16), -INT8_C( 121), -INT8_C( 105), INT8_C( 43), INT8_C( 47), INT8_C( 25), INT8_C( 46), -INT8_C( 110), -INT8_C( 31), -INT8_C( 113), INT8_C( 114), -INT8_C( 12), INT8_C( 57), -INT8_C( 97) }, { INT8_C( 82), INT8_C( 68), INT8_C( 68), INT8_MAX, -INT8_C( 40), INT8_C( 63), INT8_C( 41), INT8_C( 43), INT8_C( 78), INT8_C( 76), INT8_C( 77), -INT8_C( 5), INT8_C( 43), -INT8_C( 7), INT8_C( 116), INT8_C( 8) } }, { UINT8_C( 28), UINT8_C( 36), UINT8_C(143), UINT8_C( 35), UINT8_C( 31), UINT8_C( 46), UINT8_C( 13), UINT8_C( 30) }, { INT8_C( 114), -INT8_C( 40), INT8_C( 99), INT8_MAX, -INT8_C( 97), INT8_C( 116), -INT8_C( 33), INT8_C( 57) } }, { { -INT8_C( 22), -INT8_C( 89), -INT8_C( 93), -INT8_C( 62), -INT8_C( 26), -INT8_C( 52), -INT8_C( 19), INT8_C( 52) }, { { INT8_C( 24), INT8_C( 58), INT8_C( 48), INT8_C( 68), INT8_C( 52), -INT8_C( 92), INT8_C( 76), INT8_MIN, INT8_C( 41), -INT8_C( 37), INT8_C( 99), -INT8_C( 40), -INT8_C( 102), INT8_C( 96), -INT8_C( 74), -INT8_C( 21) }, { INT8_C( 62), INT8_C( 35), -INT8_C( 82), INT8_C( 17), -INT8_C( 55), INT8_C( 17), INT8_C( 53), -INT8_C( 76), -INT8_C( 72), -INT8_C( 39), INT8_C( 118), -INT8_C( 98), -INT8_C( 91), INT8_C( 100), -INT8_C( 46), -INT8_C( 66) }, { -INT8_C( 98), INT8_C( 2), INT8_C( 2), -INT8_C( 46), -INT8_C( 89), INT8_C( 78), INT8_C( 82), -INT8_C( 48), INT8_C( 41), -INT8_C( 74), -INT8_C( 88), -INT8_C( 61), INT8_C( 22), INT8_C( 95), -INT8_C( 82), INT8_C( 85) } }, { UINT8_C( 34), UINT8_C( 45), UINT8_C( 6), UINT8_C( 28), UINT8_C( 14), UINT8_C( 11), UINT8_C( 0), UINT8_C( 38) }, { INT8_C( 2), INT8_C( 95), INT8_C( 76), -INT8_C( 91), -INT8_C( 74), -INT8_C( 40), INT8_C( 24), INT8_C( 82) } }, { { -INT8_C( 103), -INT8_C( 38), INT8_C( 75), INT8_C( 64), INT8_C( 40), -INT8_C( 98), INT8_C( 16), INT8_C( 81) }, { { INT8_C( 84), -INT8_C( 72), INT8_C( 21), INT8_C( 106), INT8_C( 23), -INT8_C( 61), -INT8_C( 65), -INT8_C( 102), INT8_C( 32), INT8_C( 37), -INT8_C( 26), -INT8_C( 114), -INT8_C( 63), -INT8_C( 26), -INT8_C( 76), INT8_C( 53) }, { INT8_C( 92), INT8_C( 120), INT8_C( 79), INT8_C( 55), INT8_C( 15), INT8_C( 39), -INT8_C( 80), -INT8_C( 88), INT8_C( 1), -INT8_C( 5), -INT8_C( 24), INT8_C( 41), -INT8_C( 103), -INT8_C( 8), INT8_C( 123), -INT8_C( 19) }, { -INT8_C( 80), -INT8_C( 112), INT8_C( 88), -INT8_C( 56), INT8_C( 83), INT8_C( 23), INT8_C( 98), INT8_C( 116), INT8_C( 61), INT8_C( 72), INT8_C( 2), -INT8_C( 2), INT8_C( 46), -INT8_C( 73), INT8_C( 51), -INT8_C( 118) } }, { UINT8_C( 47), UINT8_C( 35), UINT8_C( 1), UINT8_C( 14), UINT8_C( 26), UINT8_C( 17), UINT8_C(230), UINT8_C( 28) }, { -INT8_C( 118), -INT8_C( 56), -INT8_C( 72), -INT8_C( 76), -INT8_C( 24), INT8_C( 120), INT8_C( 16), -INT8_C( 103) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x16x3_t t; t.val[0] = simde_vld1q_s8(test_vec[i].t[0]); t.val[1] = simde_vld1q_s8(test_vec[i].t[1]); t.val[2] = simde_vld1q_s8(test_vec[i].t[2]); simde_uint8x8_t idx = simde_vld1_u8(test_vec[i].idx); simde_int8x8_t r = simde_vqtbx3_s8(a, t, idx); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x16x3_t t = simde_test_arm_neon_random_i8x16x3(); simde_uint8x8_private idx_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] %= 48; } } simde_uint8x8_t idx = simde_uint8x8_from_private(idx_); simde_int8x8_t r = simde_vqtbx3_s8(a, t, idx); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16x3(2, t, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqtbx3_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 uint8_t a[8]; SIMDE_ALIGN_TO_16 uint8_t t[3][16]; SIMDE_ALIGN_TO_16 uint8_t idx[8]; SIMDE_ALIGN_TO_16 uint8_t r[8]; } test_vec[] = { { { UINT8_C( 64), UINT8_C( 73), UINT8_C(176), UINT8_C(195), UINT8_C( 10), UINT8_C(253), UINT8_C(172), UINT8_C(197) }, { { UINT8_C(164), UINT8_C(244), UINT8_C(189), UINT8_C(143), UINT8_C( 67), UINT8_C(155), UINT8_C(194), UINT8_C(236), UINT8_C( 94), UINT8_C( 21), UINT8_C( 96), UINT8_C(206), UINT8_C( 60), UINT8_C( 34), UINT8_C( 99), UINT8_C( 68) }, { UINT8_C( 93), UINT8_C(248), UINT8_C( 4), UINT8_C(154), UINT8_C( 54), UINT8_C(117), UINT8_C(188), UINT8_C(118), UINT8_C(191), UINT8_C(109), UINT8_C( 58), UINT8_C(201), UINT8_C(106), UINT8_C(230), UINT8_C(142), UINT8_C( 14) }, { UINT8_C(218), UINT8_C( 75), UINT8_C(157), UINT8_C( 30), UINT8_C(230), UINT8_C( 95), UINT8_C( 10), UINT8_C( 68), UINT8_C(117), UINT8_C(106), UINT8_C( 19), UINT8_C(177), UINT8_C(141), UINT8_C(118), UINT8_C(246), UINT8_C(234) } }, { UINT8_C( 15), UINT8_C(250), UINT8_C( 36), UINT8_C(165), UINT8_C( 16), UINT8_C( 65), UINT8_C( 28), UINT8_C( 47) }, { UINT8_C( 68), UINT8_C( 73), UINT8_C(230), UINT8_C(195), UINT8_C( 93), UINT8_C(253), UINT8_C(106), UINT8_C(234) } }, { { UINT8_C(210), UINT8_C(196), UINT8_C( 52), UINT8_C(185), UINT8_C( 35), UINT8_C( 63), UINT8_C(253), UINT8_C(152) }, { { UINT8_C(169), UINT8_C( 16), UINT8_C( 74), UINT8_C( 54), UINT8_C(135), UINT8_C( 64), UINT8_C( 33), UINT8_C(246), UINT8_C( 58), UINT8_C(165), UINT8_C(155), UINT8_C(170), UINT8_C(230), UINT8_C(183), UINT8_C(217), UINT8_C(148) }, { UINT8_C( 13), UINT8_C(210), UINT8_C(172), UINT8_C( 73), UINT8_C( 89), UINT8_C(211), UINT8_C( 96), UINT8_C( 43), UINT8_C(151), UINT8_C(148), UINT8_C(228), UINT8_C(186), UINT8_C(211), UINT8_C(226), UINT8_C( 83), UINT8_C(125) }, { UINT8_C(242), UINT8_C(157), UINT8_C(179), UINT8_C(121), UINT8_C(221), UINT8_C(212), UINT8_C(111), UINT8_C( 23), UINT8_C(122), UINT8_C( 11), UINT8_C(194), UINT8_C( 96), UINT8_C(194), UINT8_C(155), UINT8_C(245), UINT8_C(208) } }, { UINT8_C(109), UINT8_C(161), UINT8_C( 25), UINT8_C( 6), UINT8_C(116), UINT8_C( 25), UINT8_C( 2), UINT8_C( 11) }, { UINT8_C(210), UINT8_C(196), UINT8_C(148), UINT8_C( 33), UINT8_C( 35), UINT8_C(148), UINT8_C( 74), UINT8_C(170) } }, { { UINT8_C(182), UINT8_C( 18), UINT8_C( 36), UINT8_C(147), UINT8_C(230), UINT8_C(148), UINT8_C(170), UINT8_C( 96) }, { { UINT8_C(159), UINT8_C(108), UINT8_C(193), UINT8_C( 97), UINT8_C( 8), UINT8_C(182), UINT8_C( 49), UINT8_C(117), UINT8_C( 87), UINT8_C( 75), UINT8_C( 60), UINT8_C(204), UINT8_C(196), UINT8_C( 46), UINT8_C(215), UINT8_C(210) }, { UINT8_C( 4), UINT8_C(157), UINT8_C(180), UINT8_C(189), UINT8_C(182), UINT8_C( 18), UINT8_C(104), UINT8_C(108), UINT8_C( 36), UINT8_C(140), UINT8_MAX, UINT8_C( 11), UINT8_C( 32), UINT8_C(170), UINT8_C(107), UINT8_C(191) }, { UINT8_C( 22), UINT8_C( 44), UINT8_C( 33), UINT8_C( 30), UINT8_C(226), UINT8_C( 82), UINT8_C(148), UINT8_C( 58), UINT8_C(157), UINT8_C(208), UINT8_C( 6), UINT8_C( 98), UINT8_C(254), UINT8_C(221), UINT8_C( 52), UINT8_C( 2) } }, { UINT8_C( 27), UINT8_C( 40), UINT8_C( 47), UINT8_C( 49), UINT8_C( 11), UINT8_C( 39), UINT8_C( 14), UINT8_C( 31) }, { UINT8_C( 11), UINT8_C(157), UINT8_C( 2), UINT8_C(147), UINT8_C(204), UINT8_C( 58), UINT8_C(215), UINT8_C(191) } }, { { UINT8_C(194), UINT8_C(181), UINT8_C(124), UINT8_C(165), UINT8_C( 7), UINT8_C( 16), UINT8_C(223), UINT8_C(165) }, { { UINT8_C(224), UINT8_C(229), UINT8_C( 7), UINT8_C(222), UINT8_C(194), UINT8_C( 59), UINT8_C(225), UINT8_C( 61), UINT8_C( 36), UINT8_C(160), UINT8_C(111), UINT8_C( 31), UINT8_C(200), UINT8_C( 13), UINT8_C( 62), UINT8_C(124) }, { UINT8_C(170), UINT8_C(105), UINT8_C( 80), UINT8_C(242), UINT8_MAX, UINT8_C(228), UINT8_C( 80), UINT8_C(193), UINT8_C(153), UINT8_C(204), UINT8_C(102), UINT8_C(161), UINT8_C(221), UINT8_C( 69), UINT8_C( 70), UINT8_C(189) }, { UINT8_C( 42), UINT8_C( 77), UINT8_C(156), UINT8_C(237), UINT8_C(136), UINT8_C(125), UINT8_C( 42), UINT8_C(172), UINT8_C( 29), UINT8_C(153), UINT8_C(203), UINT8_C(229), UINT8_C(166), UINT8_C( 10), UINT8_C( 97), UINT8_C( 81) } }, { UINT8_C( 19), UINT8_C( 34), UINT8_C( 19), UINT8_C( 18), UINT8_C( 6), UINT8_C( 3), UINT8_C( 3), UINT8_C( 0) }, { UINT8_C(242), UINT8_C(156), UINT8_C(242), UINT8_C( 80), UINT8_C(225), UINT8_C(222), UINT8_C(222), UINT8_C(224) } }, { { UINT8_C(100), UINT8_C(150), UINT8_C(247), UINT8_C(236), UINT8_C( 19), UINT8_C( 33), UINT8_C(153), UINT8_C( 48) }, { { UINT8_C(187), UINT8_C(100), UINT8_C( 22), UINT8_C( 97), UINT8_C(110), UINT8_C(119), UINT8_C(178), UINT8_C(225), UINT8_C( 41), UINT8_C(245), UINT8_C( 83), UINT8_C(192), UINT8_C(136), UINT8_C(135), UINT8_C(240), UINT8_C(232) }, { UINT8_C( 33), UINT8_C(193), UINT8_C( 36), UINT8_C( 0), UINT8_C(216), UINT8_C( 30), UINT8_C( 10), UINT8_C( 60), UINT8_C(180), UINT8_C( 1), UINT8_C( 40), UINT8_C(199), UINT8_C( 35), UINT8_C(193), UINT8_C(248), UINT8_C(222) }, { UINT8_C( 38), UINT8_C( 14), UINT8_C( 63), UINT8_C(148), UINT8_C(133), UINT8_C(242), UINT8_C(118), UINT8_C(175), UINT8_C(231), UINT8_C(201), UINT8_C(111), UINT8_C(112), UINT8_C( 80), UINT8_C( 95), UINT8_C( 88), UINT8_C(113) } }, { UINT8_C( 32), UINT8_C( 28), UINT8_C( 18), UINT8_C( 8), UINT8_C( 11), UINT8_C( 28), UINT8_C( 4), UINT8_C( 79) }, { UINT8_C( 38), UINT8_C( 35), UINT8_C( 36), UINT8_C( 41), UINT8_C(192), UINT8_C( 35), UINT8_C(110), UINT8_C( 48) } }, { { UINT8_C( 29), UINT8_C(190), UINT8_C(216), UINT8_C(162), UINT8_C(176), UINT8_C( 78), UINT8_C( 81), UINT8_C(152) }, { { UINT8_C( 24), UINT8_C(192), UINT8_C( 8), UINT8_C(104), UINT8_C( 31), UINT8_C( 96), UINT8_C(218), UINT8_C( 63), UINT8_C(220), UINT8_C( 76), UINT8_C( 55), UINT8_C(119), UINT8_C(200), UINT8_C(107), UINT8_C(199), UINT8_C( 70) }, { UINT8_C(200), UINT8_C(222), UINT8_C(231), UINT8_C(230), UINT8_C(237), UINT8_C(102), UINT8_C( 42), UINT8_C( 10), UINT8_C( 37), UINT8_C( 2), UINT8_C(172), UINT8_C(213), UINT8_C( 81), UINT8_C(254), UINT8_C(109), UINT8_C(105) }, { UINT8_C(190), UINT8_C(117), UINT8_C(209), UINT8_C(222), UINT8_C(213), UINT8_C(171), UINT8_C( 29), UINT8_C(178), UINT8_C(247), UINT8_C( 85), UINT8_C( 41), UINT8_C(192), UINT8_C(192), UINT8_C(240), UINT8_C( 6), UINT8_C(136) } }, { UINT8_C( 14), UINT8_C( 46), UINT8_C( 14), UINT8_C( 43), UINT8_C( 36), UINT8_C( 8), UINT8_C(197), UINT8_C(121) }, { UINT8_C(199), UINT8_C( 6), UINT8_C(199), UINT8_C(192), UINT8_C(213), UINT8_C(220), UINT8_C( 81), UINT8_C(152) } }, { { UINT8_C( 50), UINT8_C( 38), UINT8_C( 12), UINT8_C( 7), UINT8_C(210), UINT8_C( 42), UINT8_C(185), UINT8_C(201) }, { { UINT8_C(127), UINT8_C(227), UINT8_C(137), UINT8_C( 63), UINT8_C(211), UINT8_C(144), UINT8_C(200), UINT8_C(162), UINT8_C(126), UINT8_C( 54), UINT8_C( 93), UINT8_C(210), UINT8_C(207), UINT8_C( 35), UINT8_C( 76), UINT8_C(106) }, { UINT8_C(149), UINT8_C(155), UINT8_C( 86), UINT8_C( 5), UINT8_C( 87), UINT8_C(171), UINT8_C( 51), UINT8_C(137), UINT8_C(209), UINT8_C( 64), UINT8_C(145), UINT8_C(163), UINT8_C(106), UINT8_C( 74), UINT8_C(109), UINT8_C(233) }, { UINT8_C( 45), UINT8_C(246), UINT8_C( 40), UINT8_C( 1), UINT8_C(134), UINT8_C(240), UINT8_C(163), UINT8_C( 4), UINT8_C( 39), UINT8_C( 0), UINT8_C(215), UINT8_C(246), UINT8_C( 35), UINT8_C( 35), UINT8_C( 96), UINT8_C(184) } }, { UINT8_C( 46), UINT8_C( 38), UINT8_C( 45), UINT8_C( 21), UINT8_C( 1), UINT8_C( 1), UINT8_C( 15), UINT8_C( 50) }, { UINT8_C( 96), UINT8_C(163), UINT8_C( 35), UINT8_C(171), UINT8_C(227), UINT8_C(227), UINT8_C(106), UINT8_C(201) } }, { { UINT8_C( 57), UINT8_C(172), UINT8_C(169), UINT8_C(192), UINT8_C(157), UINT8_C( 76), UINT8_C(196), UINT8_C(196) }, { { UINT8_C( 76), UINT8_C(155), UINT8_C(186), UINT8_C(112), UINT8_C(190), UINT8_C( 26), UINT8_C( 40), UINT8_C(124), UINT8_C(208), UINT8_C(230), UINT8_C(146), UINT8_C( 49), UINT8_C(215), UINT8_C( 49), UINT8_C( 99), UINT8_C( 8) }, { UINT8_C( 97), UINT8_C( 57), UINT8_C(163), UINT8_C(219), UINT8_C(124), UINT8_C( 39), UINT8_C(131), UINT8_C(182), UINT8_C(211), UINT8_C( 44), UINT8_C(118), UINT8_C(112), UINT8_C(120), UINT8_C( 58), UINT8_C( 52), UINT8_C(197) }, { UINT8_C(214), UINT8_C(238), UINT8_C( 53), UINT8_C(148), UINT8_C( 8), UINT8_C( 93), UINT8_C( 17), UINT8_C(216), UINT8_C( 67), UINT8_C(163), UINT8_C( 9), UINT8_C( 26), UINT8_C(212), UINT8_C(109), UINT8_C( 34), UINT8_C( 53) } }, { UINT8_C(166), UINT8_C( 5), UINT8_C( 16), UINT8_C( 35), UINT8_C( 44), UINT8_C( 4), UINT8_C( 25), UINT8_C( 0) }, { UINT8_C( 57), UINT8_C( 26), UINT8_C( 97), UINT8_C(148), UINT8_C(212), UINT8_C(190), UINT8_C( 44), UINT8_C( 76) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x16x3_t t; t.val[0] = simde_vld1q_u8(test_vec[i].t[0]); t.val[1] = simde_vld1q_u8(test_vec[i].t[1]); t.val[2] = simde_vld1q_u8(test_vec[i].t[2]); simde_uint8x8_t idx = simde_vld1_u8(test_vec[i].idx); simde_uint8x8_t r = simde_vqtbx3_u8(a, t, idx); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x16x3_t t = simde_test_arm_neon_random_u8x16x3(); simde_uint8x8_private idx_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] %= 48; } } simde_uint8x8_t idx = simde_uint8x8_from_private(idx_); simde_uint8x8_t r = simde_vqtbx3_u8(a, t, idx); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16x3(2, t, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqtbx4_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 int8_t a[8]; SIMDE_ALIGN_TO_16 int8_t t[4][16]; SIMDE_ALIGN_TO_16 uint8_t idx[8]; SIMDE_ALIGN_TO_16 int8_t r[8]; } test_vec[] = { { { -INT8_C( 16), -INT8_C( 14), -INT8_C( 52), -INT8_C( 12), -INT8_C( 2), -INT8_C( 47), -INT8_C( 123), -INT8_C( 8) }, { { INT8_C( 94), INT8_C( 7), -INT8_C( 43), INT8_C( 123), INT8_C( 118), INT8_C( 102), -INT8_C( 127), -INT8_C( 42), -INT8_C( 116), INT8_C( 105), -INT8_C( 32), INT8_C( 101), INT8_C( 123), -INT8_C( 37), -INT8_C( 47), INT8_C( 97) }, { -INT8_C( 83), INT8_C( 18), -INT8_C( 87), -INT8_C( 67), -INT8_C( 69), -INT8_C( 44), INT8_C( 31), -INT8_C( 85), -INT8_C( 57), -INT8_C( 20), -INT8_C( 97), -INT8_C( 59), -INT8_C( 67), INT8_C( 37), -INT8_C( 67), INT8_C( 27) }, { INT8_C( 44), -INT8_C( 109), -INT8_C( 106), -INT8_C( 94), -INT8_C( 7), INT8_C( 23), INT8_C( 120), -INT8_C( 122), -INT8_C( 127), INT8_C( 88), -INT8_C( 21), -INT8_C( 4), INT8_C( 52), -INT8_C( 68), INT8_C( 93), -INT8_C( 31) }, { -INT8_C( 49), INT8_C( 6), -INT8_C( 98), -INT8_C( 118), -INT8_C( 37), -INT8_C( 67), INT8_C( 54), -INT8_C( 94), -INT8_C( 87), -INT8_C( 43), INT8_C( 103), INT8_C( 102), -INT8_C( 6), INT8_C( 36), -INT8_C( 127), INT8_C( 38) } }, { UINT8_C( 55), UINT8_C( 23), UINT8_C( 8), UINT8_C( 49), UINT8_C( 47), UINT8_C( 0), UINT8_C( 55), UINT8_C( 48) }, { -INT8_C( 94), -INT8_C( 85), -INT8_C( 116), INT8_C( 6), -INT8_C( 31), INT8_C( 94), -INT8_C( 94), -INT8_C( 49) } }, { { INT8_C( 15), INT8_C( 76), INT8_C( 56), -INT8_C( 22), INT8_C( 9), INT8_C( 110), -INT8_C( 116), -INT8_C( 77) }, { { INT8_C( 67), -INT8_C( 13), INT8_C( 25), INT8_C( 62), INT8_C( 24), -INT8_C( 101), INT8_C( 100), -INT8_C( 49), -INT8_C( 78), INT8_C( 45), INT8_MIN, -INT8_C( 31), INT8_C( 109), -INT8_C( 73), -INT8_C( 111), INT8_C( 6) }, { -INT8_C( 39), INT8_C( 61), -INT8_C( 45), -INT8_C( 72), INT8_C( 70), -INT8_C( 127), INT8_C( 101), INT8_C( 86), -INT8_C( 51), -INT8_C( 99), INT8_C( 64), -INT8_C( 41), INT8_C( 11), -INT8_C( 51), -INT8_C( 118), INT8_C( 79) }, { -INT8_C( 64), -INT8_C( 93), -INT8_C( 115), -INT8_C( 40), INT8_C( 62), -INT8_C( 15), -INT8_C( 88), -INT8_C( 15), INT8_C( 30), INT8_C( 40), -INT8_C( 46), -INT8_C( 116), -INT8_C( 32), INT8_C( 100), -INT8_C( 110), -INT8_C( 71) }, { -INT8_C( 95), INT8_C( 102), INT8_C( 113), -INT8_C( 24), -INT8_C( 25), -INT8_C( 41), INT8_C( 62), -INT8_C( 75), INT8_C( 116), INT8_C( 126), -INT8_C( 116), INT8_MIN, INT8_C( 75), INT8_C( 22), -INT8_C( 49), INT8_C( 12) } }, { UINT8_C( 57), UINT8_C( 28), UINT8_C( 36), UINT8_C(248), UINT8_C( 13), UINT8_C( 12), UINT8_C( 41), UINT8_C( 44) }, { INT8_C( 126), INT8_C( 11), INT8_C( 62), -INT8_C( 22), -INT8_C( 73), INT8_C( 109), INT8_C( 40), -INT8_C( 32) } }, { { -INT8_C( 16), -INT8_C( 64), -INT8_C( 87), -INT8_C( 40), -INT8_C( 105), -INT8_C( 25), -INT8_C( 115), INT8_C( 11) }, { { INT8_C( 101), INT8_C( 25), -INT8_C( 117), -INT8_C( 79), INT8_C( 47), INT8_C( 90), -INT8_C( 67), -INT8_C( 24), -INT8_C( 74), -INT8_C( 95), -INT8_C( 32), INT8_C( 4), INT8_C( 46), -INT8_C( 55), INT8_C( 112), -INT8_C( 29) }, { -INT8_C( 123), INT8_C( 104), INT8_C( 120), -INT8_C( 92), -INT8_C( 14), -INT8_C( 58), INT8_C( 101), -INT8_C( 29), -INT8_C( 122), INT8_C( 14), -INT8_C( 69), INT8_C( 29), -INT8_C( 11), INT8_C( 72), INT8_C( 41), INT8_C( 91) }, { INT8_C( 97), -INT8_C( 76), INT8_C( 12), -INT8_C( 112), INT8_C( 15), -INT8_C( 55), INT8_C( 120), -INT8_C( 59), INT8_C( 106), INT8_C( 89), -INT8_C( 55), -INT8_C( 104), INT8_C( 34), INT8_C( 57), INT8_C( 123), -INT8_C( 89) }, { -INT8_C( 95), -INT8_C( 13), INT8_C( 76), -INT8_C( 108), -INT8_C( 70), -INT8_C( 79), INT8_C( 119), INT8_C( 64), -INT8_C( 64), INT8_C( 50), INT8_C( 94), -INT8_C( 75), INT8_C( 122), -INT8_C( 121), INT8_C( 16), -INT8_C( 37) } }, { UINT8_C( 59), UINT8_C( 28), UINT8_C(107), UINT8_C( 10), UINT8_C( 37), UINT8_C( 35), UINT8_C( 16), UINT8_C( 16) }, { -INT8_C( 75), -INT8_C( 11), -INT8_C( 87), -INT8_C( 32), -INT8_C( 55), -INT8_C( 112), -INT8_C( 123), -INT8_C( 123) } }, { { INT8_C( 87), INT8_C( 82), INT8_C( 72), INT8_C( 17), INT8_C( 4), -INT8_C( 65), INT8_C( 82), -INT8_C( 60) }, { { -INT8_C( 15), -INT8_C( 80), INT8_C( 121), INT8_C( 107), INT8_C( 55), -INT8_C( 118), INT8_C( 70), INT8_C( 114), -INT8_C( 90), -INT8_C( 79), -INT8_C( 67), -INT8_C( 116), -INT8_C( 107), -INT8_C( 51), -INT8_C( 36), -INT8_C( 47) }, { -INT8_C( 90), -INT8_C( 60), INT8_C( 48), -INT8_C( 71), INT8_C( 40), INT8_C( 55), INT8_C( 110), INT8_MIN, -INT8_C( 119), -INT8_C( 74), -INT8_C( 111), -INT8_C( 115), INT8_C( 118), -INT8_C( 29), INT8_C( 81), INT8_C( 103) }, { -INT8_C( 109), -INT8_C( 53), -INT8_C( 45), -INT8_C( 54), INT8_C( 85), INT8_C( 25), INT8_C( 61), -INT8_C( 5), -INT8_C( 53), -INT8_C( 6), -INT8_C( 121), INT8_C( 96), -INT8_C( 57), INT8_C( 99), INT8_C( 49), INT8_C( 109) }, { INT8_C( 40), INT8_C( 98), INT8_C( 39), INT8_C( 80), -INT8_C( 103), -INT8_C( 107), -INT8_C( 48), INT8_C( 34), INT8_C( 75), INT8_C( 98), -INT8_C( 80), -INT8_C( 63), INT8_C( 69), INT8_C( 1), INT8_C( 41), -INT8_C( 39) } }, { UINT8_C(204), UINT8_C(252), UINT8_C( 35), UINT8_C( 33), UINT8_C( 21), UINT8_C(224), UINT8_C( 29), UINT8_C(224) }, { INT8_C( 87), INT8_C( 82), -INT8_C( 54), -INT8_C( 53), INT8_C( 55), -INT8_C( 65), -INT8_C( 29), -INT8_C( 60) } }, { { -INT8_C( 44), INT8_C( 54), INT8_MIN, INT8_C( 109), -INT8_C( 53), INT8_C( 81), -INT8_C( 113), INT8_C( 22) }, { { -INT8_C( 77), INT8_C( 63), -INT8_C( 40), -INT8_C( 8), INT8_C( 65), INT8_C( 1), -INT8_C( 47), INT8_C( 13), -INT8_C( 3), INT8_C( 117), INT8_C( 47), INT8_C( 18), INT8_C( 85), INT8_C( 76), -INT8_C( 13), INT8_C( 48) }, { -INT8_C( 16), INT8_C( 51), -INT8_C( 47), -INT8_C( 8), -INT8_C( 91), -INT8_C( 32), INT8_C( 40), INT8_C( 121), INT8_C( 22), -INT8_C( 87), -INT8_C( 26), -INT8_C( 31), -INT8_C( 6), INT8_C( 118), -INT8_C( 8), -INT8_C( 83) }, { -INT8_C( 75), -INT8_C( 48), -INT8_C( 91), -INT8_C( 10), -INT8_C( 47), INT8_C( 119), INT8_C( 4), -INT8_C( 50), -INT8_C( 20), INT8_C( 51), -INT8_C( 32), INT8_C( 65), INT8_MAX, -INT8_C( 45), INT8_C( 113), INT8_C( 111) }, { INT8_C( 7), INT8_C( 67), INT8_C( 104), -INT8_C( 84), INT8_C( 35), -INT8_C( 112), INT8_C( 38), INT8_C( 58), INT8_C( 57), INT8_C( 12), INT8_C( 27), INT8_C( 51), -INT8_C( 126), INT8_C( 19), -INT8_C( 32), INT8_C( 56) } }, { UINT8_C( 35), UINT8_C( 6), UINT8_C( 46), UINT8_C( 52), UINT8_C( 61), UINT8_C( 50), UINT8_C( 2), UINT8_C( 41) }, { -INT8_C( 10), -INT8_C( 47), INT8_C( 113), INT8_C( 35), INT8_C( 19), INT8_C( 104), -INT8_C( 40), INT8_C( 51) } }, { { -INT8_C( 33), -INT8_C( 68), -INT8_C( 22), INT8_C( 2), INT8_C( 76), INT8_C( 16), INT8_C( 60), -INT8_C( 122) }, { { INT8_C( 28), INT8_C( 88), -INT8_C( 71), -INT8_C( 97), INT8_C( 107), -INT8_C( 102), -INT8_C( 41), INT8_C( 79), INT8_C( 32), INT8_C( 5), INT8_C( 3), INT8_C( 29), INT8_C( 56), -INT8_C( 122), INT8_C( 6), -INT8_C( 99) }, { -INT8_C( 23), INT8_C( 48), -INT8_C( 126), INT8_C( 31), -INT8_C( 52), -INT8_C( 42), INT8_C( 93), -INT8_C( 85), -INT8_C( 110), INT8_C( 71), -INT8_C( 82), -INT8_C( 34), INT8_C( 87), -INT8_C( 22), INT8_C( 100), INT8_C( 115) }, { INT8_C( 66), INT8_C( 30), INT8_C( 18), -INT8_C( 82), -INT8_C( 72), -INT8_C( 23), -INT8_C( 3), -INT8_C( 40), -INT8_C( 17), INT8_C( 0), -INT8_C( 11), INT8_C( 39), -INT8_C( 122), -INT8_C( 5), -INT8_C( 60), INT8_C( 111) }, { INT8_C( 43), INT8_C( 70), -INT8_C( 113), -INT8_C( 8), INT8_C( 28), -INT8_C( 20), -INT8_C( 93), -INT8_C( 82), INT8_C( 51), INT8_C( 81), -INT8_C( 115), -INT8_C( 118), INT8_C( 60), -INT8_C( 15), -INT8_C( 3), INT8_C( 126) } }, { UINT8_C( 15), UINT8_C( 16), UINT8_C( 44), UINT8_C( 7), UINT8_C( 57), UINT8_C( 41), UINT8_C( 31), UINT8_C( 40) }, { -INT8_C( 99), -INT8_C( 23), -INT8_C( 122), INT8_C( 79), INT8_C( 81), INT8_C( 0), INT8_C( 115), -INT8_C( 17) } }, { { INT8_C( 26), -INT8_C( 81), -INT8_C( 77), INT8_C( 55), -INT8_C( 101), INT8_C( 86), -INT8_C( 27), -INT8_C( 50) }, { { -INT8_C( 88), INT8_C( 114), INT8_C( 88), -INT8_C( 28), INT8_C( 100), INT8_C( 85), INT8_C( 98), INT8_C( 115), INT8_C( 101), -INT8_C( 113), INT8_C( 59), INT8_C( 95), -INT8_C( 72), -INT8_C( 38), INT8_C( 71), -INT8_C( 30) }, { INT8_C( 111), INT8_C( 87), -INT8_C( 109), -INT8_C( 2), INT8_C( 43), -INT8_C( 77), -INT8_C( 71), INT8_C( 69), INT8_C( 98), INT8_C( 108), INT8_C( 124), -INT8_C( 3), -INT8_C( 61), INT8_C( 98), -INT8_C( 53), INT8_C( 107) }, { -INT8_C( 44), INT8_C( 35), INT8_C( 79), INT8_C( 56), INT8_C( 120), -INT8_C( 79), -INT8_C( 84), -INT8_C( 34), INT8_C( 64), -INT8_C( 25), INT8_C( 61), -INT8_C( 7), -INT8_C( 63), -INT8_C( 124), -INT8_C( 37), INT8_C( 48) }, { -INT8_C( 37), INT8_C( 110), INT8_C( 47), INT8_C( 6), INT8_C( 33), -INT8_C( 24), INT8_C( 76), -INT8_C( 125), INT8_C( 85), -INT8_C( 56), INT8_MIN, INT8_C( 24), INT8_C( 42), INT8_C( 75), -INT8_C( 125), -INT8_C( 1) } }, { UINT8_C( 46), UINT8_C(210), UINT8_C( 55), UINT8_C( 39), UINT8_C( 3), UINT8_C( 35), UINT8_C( 5), UINT8_C( 4) }, { -INT8_C( 37), -INT8_C( 81), -INT8_C( 125), -INT8_C( 34), -INT8_C( 28), INT8_C( 56), INT8_C( 85), INT8_C( 100) } }, { { INT8_C( 7), -INT8_C( 21), INT8_C( 104), INT8_C( 40), -INT8_C( 44), -INT8_C( 76), -INT8_C( 84), INT8_C( 41) }, { { INT8_C( 125), INT8_C( 44), INT8_C( 65), -INT8_C( 89), INT8_C( 120), -INT8_C( 60), -INT8_C( 90), -INT8_C( 26), -INT8_C( 106), -INT8_C( 34), -INT8_C( 51), INT8_C( 25), -INT8_C( 63), -INT8_C( 110), -INT8_C( 35), -INT8_C( 116) }, { -INT8_C( 108), -INT8_C( 102), INT8_C( 24), INT8_C( 27), INT8_C( 51), -INT8_C( 44), INT8_C( 125), INT8_C( 58), -INT8_C( 64), -INT8_C( 27), INT8_C( 98), -INT8_C( 108), -INT8_C( 102), INT8_C( 14), -INT8_C( 67), INT8_C( 23) }, { INT8_C( 59), -INT8_C( 2), -INT8_C( 66), -INT8_C( 77), -INT8_C( 62), INT8_C( 101), -INT8_C( 103), INT8_C( 88), INT8_C( 67), INT8_C( 103), INT8_C( 113), INT8_C( 4), -INT8_C( 7), INT8_C( 79), -INT8_C( 112), -INT8_C( 114) }, { -INT8_C( 23), -INT8_C( 88), -INT8_C( 87), INT8_C( 28), INT8_C( 125), INT8_C( 38), INT8_C( 86), INT8_C( 61), INT8_C( 11), -INT8_C( 71), -INT8_C( 47), -INT8_C( 91), -INT8_C( 57), -INT8_C( 114), -INT8_C( 68), INT8_C( 2) } }, { UINT8_C( 12), UINT8_C( 59), UINT8_C( 53), UINT8_C( 78), UINT8_C( 32), UINT8_C( 79), UINT8_C( 38), UINT8_C( 35) }, { -INT8_C( 63), -INT8_C( 91), INT8_C( 38), INT8_C( 40), INT8_C( 59), -INT8_C( 76), -INT8_C( 103), -INT8_C( 77) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x16x4_t t; t.val[0] = simde_vld1q_s8(test_vec[i].t[0]); t.val[1] = simde_vld1q_s8(test_vec[i].t[1]); t.val[2] = simde_vld1q_s8(test_vec[i].t[2]); t.val[3] = simde_vld1q_s8(test_vec[i].t[3]); simde_uint8x8_t idx = simde_vld1_u8(test_vec[i].idx); simde_int8x8_t r = simde_vqtbx4_s8(a, t, idx); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x16x4_t t = simde_test_arm_neon_random_i8x16x4(); simde_uint8x8_private idx_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] &= 63; } } simde_uint8x8_t idx = simde_uint8x8_from_private(idx_); simde_int8x8_t r = simde_vqtbx4_s8(a, t, idx); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16x4(2, t, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqtbx4_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 uint8_t a[8]; SIMDE_ALIGN_TO_16 uint8_t t[4][16]; SIMDE_ALIGN_TO_16 uint8_t idx[8]; SIMDE_ALIGN_TO_16 uint8_t r[8]; } test_vec[] = { { { UINT8_C( 77), UINT8_C(152), UINT8_C(194), UINT8_C(106), UINT8_C( 44), UINT8_C( 84), UINT8_C( 13), UINT8_C( 88) }, { { UINT8_C( 85), UINT8_C(254), UINT8_C( 68), UINT8_C(155), UINT8_C(185), UINT8_C( 75), UINT8_C( 80), UINT8_C( 52), UINT8_C( 14), UINT8_C(177), UINT8_C(141), UINT8_C( 55), UINT8_C(102), UINT8_C(223), UINT8_C( 33), UINT8_C( 22) }, { UINT8_C(110), UINT8_C(181), UINT8_C(240), UINT8_C(185), UINT8_C( 6), UINT8_MAX, UINT8_C(188), UINT8_C( 84), UINT8_C(151), UINT8_C(126), UINT8_C(190), UINT8_C(196), UINT8_C(211), UINT8_C(203), UINT8_C( 28), UINT8_C( 40) }, { UINT8_C(202), UINT8_C( 97), UINT8_C(195), UINT8_C(131), UINT8_C(172), UINT8_C( 19), UINT8_C(183), UINT8_C(186), UINT8_C(196), UINT8_C( 68), UINT8_C(241), UINT8_C( 43), UINT8_C( 35), UINT8_C( 19), UINT8_C( 65), UINT8_C(145) }, { UINT8_C(200), UINT8_C( 49), UINT8_C( 74), UINT8_C(206), UINT8_C( 48), UINT8_C( 6), UINT8_C( 34), UINT8_C(199), UINT8_C(132), UINT8_C(224), UINT8_C(139), UINT8_C( 87), UINT8_C(172), UINT8_C(168), UINT8_C(128), UINT8_C(118) } }, { UINT8_C( 9), UINT8_C( 3), UINT8_C( 57), UINT8_C( 53), UINT8_C( 23), UINT8_C( 48), UINT8_C( 47), UINT8_C( 27) }, { UINT8_C(177), UINT8_C(155), UINT8_C(224), UINT8_C( 6), UINT8_C( 84), UINT8_C(200), UINT8_C(145), UINT8_C(196) } }, { { UINT8_C(184), UINT8_C(242), UINT8_C( 10), UINT8_C(232), UINT8_C(248), UINT8_C( 44), UINT8_C(176), UINT8_C(124) }, { { UINT8_C( 13), UINT8_C( 59), UINT8_C(212), UINT8_C(185), UINT8_C(227), UINT8_C( 84), UINT8_C( 47), UINT8_C(236), UINT8_C(151), UINT8_C( 40), UINT8_C(161), UINT8_C(238), UINT8_C(216), UINT8_C( 16), UINT8_C( 10), UINT8_C(204) }, { UINT8_C(113), UINT8_C( 80), UINT8_C(227), UINT8_C(228), UINT8_C(216), UINT8_C(139), UINT8_C( 32), UINT8_C(144), UINT8_C(125), UINT8_C( 42), UINT8_C(121), UINT8_C(117), UINT8_C( 86), UINT8_C( 41), UINT8_C(241), UINT8_C( 99) }, { UINT8_C(100), UINT8_C(197), UINT8_C( 28), UINT8_C( 72), UINT8_C( 25), UINT8_C( 75), UINT8_C( 52), UINT8_C(177), UINT8_C(115), UINT8_C(214), UINT8_C(159), UINT8_C( 75), UINT8_C(230), UINT8_C(169), UINT8_C( 23), UINT8_C( 87) }, { UINT8_C(250), UINT8_C(250), UINT8_C( 60), UINT8_C(210), UINT8_C(133), UINT8_C( 92), UINT8_C( 98), UINT8_C( 2), UINT8_C(134), UINT8_C(219), UINT8_C(119), UINT8_C(220), UINT8_C( 4), UINT8_C(105), UINT8_C( 64), UINT8_C(105) } }, { UINT8_C( 46), UINT8_C( 28), UINT8_C(177), UINT8_C( 72), UINT8_C( 40), UINT8_C(229), UINT8_C( 57), UINT8_C( 27) }, { UINT8_C( 23), UINT8_C( 86), UINT8_C( 10), UINT8_C(232), UINT8_C(115), UINT8_C( 44), UINT8_C(219), UINT8_C(117) } }, { { UINT8_C(121), UINT8_C( 53), UINT8_C( 14), UINT8_C(254), UINT8_C(145), UINT8_C(112), UINT8_C( 1), UINT8_C( 23) }, { { UINT8_C( 76), UINT8_C(120), UINT8_C(244), UINT8_C( 80), UINT8_C(225), UINT8_C( 52), UINT8_C(185), UINT8_C( 16), UINT8_C(144), UINT8_C(106), UINT8_C( 88), UINT8_C( 56), UINT8_C( 80), UINT8_C( 81), UINT8_C( 84), UINT8_C( 11) }, { UINT8_C(233), UINT8_C(187), UINT8_C(173), UINT8_C( 43), UINT8_C( 57), UINT8_C(167), UINT8_C(103), UINT8_C(178), UINT8_C(220), UINT8_C(117), UINT8_C(177), UINT8_C(110), UINT8_C(230), UINT8_C(178), UINT8_C(133), UINT8_C( 50) }, { UINT8_C( 42), UINT8_C(121), UINT8_C(130), UINT8_C( 12), UINT8_C(173), UINT8_C( 60), UINT8_C( 28), UINT8_C( 62), UINT8_C(166), UINT8_C(116), UINT8_C(118), UINT8_C(246), UINT8_C(197), UINT8_C(202), UINT8_C( 2), UINT8_C(174) }, { UINT8_C(133), UINT8_C(175), UINT8_C(218), UINT8_C(191), UINT8_C( 86), UINT8_C( 65), UINT8_C(113), UINT8_C( 51), UINT8_C(183), UINT8_C( 34), UINT8_C(161), UINT8_C(157), UINT8_C(212), UINT8_C( 38), UINT8_C(207), UINT8_MAX } }, { UINT8_C( 32), UINT8_C( 17), UINT8_C( 11), UINT8_C( 13), UINT8_C(141), UINT8_C( 39), UINT8_C( 11), UINT8_C( 52) }, { UINT8_C( 42), UINT8_C(187), UINT8_C( 56), UINT8_C( 81), UINT8_C(145), UINT8_C( 62), UINT8_C( 56), UINT8_C( 86) } }, { { UINT8_C(220), UINT8_C(232), UINT8_C( 17), UINT8_C( 50), UINT8_C( 42), UINT8_C(130), UINT8_C(101), UINT8_C(225) }, { { UINT8_C(165), UINT8_C( 6), UINT8_C(126), UINT8_C(121), UINT8_C( 45), UINT8_C( 77), UINT8_C(120), UINT8_C(205), UINT8_C(158), UINT8_C(131), UINT8_C( 26), UINT8_C( 44), UINT8_C(170), UINT8_C(166), UINT8_C( 96), UINT8_C( 69) }, { UINT8_C(168), UINT8_C(138), UINT8_C(165), UINT8_C(116), UINT8_C(183), UINT8_C(180), UINT8_C(198), UINT8_C(147), UINT8_C(156), UINT8_C(215), UINT8_C(197), UINT8_C(198), UINT8_C( 90), UINT8_C( 43), UINT8_C(167), UINT8_MAX }, { UINT8_C( 49), UINT8_C( 37), UINT8_C(120), UINT8_C( 94), UINT8_C(114), UINT8_C(241), UINT8_C( 43), UINT8_C( 17), UINT8_C(116), UINT8_C( 70), UINT8_C( 61), UINT8_C( 31), UINT8_C(236), UINT8_C(157), UINT8_C(100), UINT8_C(148) }, { UINT8_C( 39), UINT8_C( 10), UINT8_C( 8), UINT8_C(222), UINT8_C(190), UINT8_C(207), UINT8_C(113), UINT8_C( 90), UINT8_C(166), UINT8_C( 55), UINT8_C( 33), UINT8_C( 0), UINT8_C( 98), UINT8_C(200), UINT8_MAX, UINT8_C(147) } }, { UINT8_C( 46), UINT8_C( 56), UINT8_C( 50), UINT8_C( 32), UINT8_C( 41), UINT8_C( 29), UINT8_C( 49), UINT8_C(221) }, { UINT8_C(100), UINT8_C(166), UINT8_C( 8), UINT8_C( 49), UINT8_C( 70), UINT8_C( 43), UINT8_C( 10), UINT8_C(225) } }, { { UINT8_C(107), UINT8_C(236), UINT8_C( 81), UINT8_C( 41), UINT8_C(187), UINT8_C(195), UINT8_C(131), UINT8_C( 97) }, { { UINT8_C(250), UINT8_C(164), UINT8_C( 98), UINT8_C( 92), UINT8_C(109), UINT8_C( 97), UINT8_C(239), UINT8_C( 91), UINT8_C(217), UINT8_C(225), UINT8_C(187), UINT8_C( 66), UINT8_MAX, UINT8_C( 45), UINT8_C( 32), UINT8_C( 98) }, { UINT8_C(219), UINT8_C( 28), UINT8_C(178), UINT8_C( 39), UINT8_C(125), UINT8_C(149), UINT8_C(154), UINT8_C(232), UINT8_C(129), UINT8_C(235), UINT8_C( 17), UINT8_C( 60), UINT8_C(174), UINT8_C(149), UINT8_C(158), UINT8_C(168) }, { UINT8_C( 57), UINT8_C( 0), UINT8_C( 4), UINT8_C(166), UINT8_C( 97), UINT8_C(244), UINT8_C( 1), UINT8_C( 59), UINT8_C(213), UINT8_C(189), UINT8_C(125), UINT8_C(212), UINT8_C(234), UINT8_C(157), UINT8_C( 55), UINT8_C(197) }, { UINT8_C(186), UINT8_C(233), UINT8_C(236), UINT8_C( 55), UINT8_C(126), UINT8_C(134), UINT8_C( 32), UINT8_C( 0), UINT8_C(114), UINT8_C( 49), UINT8_C( 60), UINT8_C( 32), UINT8_C(198), UINT8_C(218), UINT8_C(201), UINT8_C( 0) } }, { UINT8_C( 26), UINT8_C( 13), UINT8_C(166), UINT8_C( 60), UINT8_C( 1), UINT8_C( 40), UINT8_C( 55), UINT8_C( 23) }, { UINT8_C( 17), UINT8_C( 45), UINT8_C( 81), UINT8_C(198), UINT8_C(164), UINT8_C(213), UINT8_C( 0), UINT8_C(232) } }, { { UINT8_C(139), UINT8_C( 1), UINT8_C(131), UINT8_C( 10), UINT8_C(135), UINT8_C(163), UINT8_C( 10), UINT8_C(249) }, { { UINT8_C(213), UINT8_C( 70), UINT8_C( 26), UINT8_C(155), UINT8_C( 33), UINT8_C(227), UINT8_C(155), UINT8_C(251), UINT8_C(176), UINT8_C( 66), UINT8_C( 55), UINT8_C(114), UINT8_C(234), UINT8_C(174), UINT8_C( 9), UINT8_C( 79) }, { UINT8_C(163), UINT8_C(116), UINT8_C(158), UINT8_C( 53), UINT8_C( 23), UINT8_C(178), UINT8_C(129), UINT8_C(162), UINT8_C(179), UINT8_C( 4), UINT8_C(172), UINT8_C( 59), UINT8_C(168), UINT8_C(182), UINT8_C( 52), UINT8_C(125) }, { UINT8_C(253), UINT8_C( 78), UINT8_C( 24), UINT8_C( 30), UINT8_C( 49), UINT8_C(180), UINT8_C( 25), UINT8_C(226), UINT8_C(246), UINT8_C( 81), UINT8_C( 84), UINT8_C(224), UINT8_MAX, UINT8_C( 93), UINT8_C( 47), UINT8_C(162) }, { UINT8_C(209), UINT8_C(205), UINT8_C(215), UINT8_C(232), UINT8_C(127), UINT8_C( 88), UINT8_C(139), UINT8_C( 51), UINT8_C( 93), UINT8_C( 55), UINT8_C(110), UINT8_C( 5), UINT8_C(238), UINT8_C(162), UINT8_C(130), UINT8_C(235) } }, { UINT8_C( 49), UINT8_C(154), UINT8_C( 9), UINT8_C( 34), UINT8_C( 14), UINT8_C( 34), UINT8_C( 4), UINT8_C( 4) }, { UINT8_C(205), UINT8_C( 1), UINT8_C( 66), UINT8_C( 24), UINT8_C( 9), UINT8_C( 24), UINT8_C( 33), UINT8_C( 33) } }, { { UINT8_C( 32), UINT8_C(237), UINT8_C(111), UINT8_C(160), UINT8_C( 69), UINT8_C(250), UINT8_C(211), UINT8_C(162) }, { { UINT8_C( 50), UINT8_C( 65), UINT8_C(167), UINT8_C( 32), UINT8_C(227), UINT8_C( 41), UINT8_C( 11), UINT8_C(212), UINT8_C(196), UINT8_C( 20), UINT8_C(247), UINT8_C( 18), UINT8_C( 54), UINT8_C(251), UINT8_C( 87), UINT8_C(170) }, { UINT8_C( 84), UINT8_C(123), UINT8_C( 29), UINT8_C( 9), UINT8_C(207), UINT8_C( 50), UINT8_C(144), UINT8_C(239), UINT8_C( 31), UINT8_C( 0), UINT8_C(143), UINT8_C(101), UINT8_C(250), UINT8_C( 98), UINT8_C( 7), UINT8_C( 44) }, { UINT8_C(163), UINT8_C(175), UINT8_C( 76), UINT8_C(135), UINT8_C(216), UINT8_C( 87), UINT8_C( 91), UINT8_C(156), UINT8_C(107), UINT8_C( 82), UINT8_C(175), UINT8_C(162), UINT8_C( 78), UINT8_C( 6), UINT8_C( 76), UINT8_C(162) }, { UINT8_C(129), UINT8_C(105), UINT8_C(171), UINT8_C( 80), UINT8_C(155), UINT8_C( 60), UINT8_C( 64), UINT8_C(187), UINT8_C( 60), UINT8_C(207), UINT8_C( 32), UINT8_C( 54), UINT8_C( 50), UINT8_C( 39), UINT8_C( 99), UINT8_C(213) } }, { UINT8_C( 22), UINT8_C( 47), UINT8_C( 28), UINT8_C(175), UINT8_C( 7), UINT8_C( 56), UINT8_C( 11), UINT8_C(114) }, { UINT8_C(144), UINT8_C(162), UINT8_C(250), UINT8_C(160), UINT8_C(212), UINT8_C( 60), UINT8_C( 18), UINT8_C(162) } }, { { UINT8_C(201), UINT8_C(166), UINT8_C(210), UINT8_C(101), UINT8_C(226), UINT8_C( 18), UINT8_C( 32), UINT8_C( 30) }, { { UINT8_C(226), UINT8_C( 64), UINT8_C( 84), UINT8_C( 20), UINT8_C(103), UINT8_C(183), UINT8_C(233), UINT8_C( 62), UINT8_C(103), UINT8_C( 70), UINT8_C(237), UINT8_C(110), UINT8_C(254), UINT8_C( 56), UINT8_C(224), UINT8_C( 8) }, { UINT8_C( 51), UINT8_C(245), UINT8_C( 97), UINT8_C( 51), UINT8_C( 85), UINT8_C( 91), UINT8_C(181), UINT8_C( 31), UINT8_C( 1), UINT8_C(136), UINT8_C(132), UINT8_C(227), UINT8_C(154), UINT8_C(164), UINT8_C( 1), UINT8_C(124) }, { UINT8_C(228), UINT8_C( 86), UINT8_C(144), UINT8_C( 75), UINT8_C( 13), UINT8_C(122), UINT8_C(137), UINT8_C(116), UINT8_C(192), UINT8_C(118), UINT8_C(226), UINT8_C(190), UINT8_C(175), UINT8_C(195), UINT8_C(198), UINT8_C(226) }, { UINT8_C(184), UINT8_C( 39), UINT8_C( 21), UINT8_C( 13), UINT8_C(131), UINT8_C(203), UINT8_C( 44), UINT8_C(132), UINT8_C( 83), UINT8_C(176), UINT8_C(104), UINT8_C(237), UINT8_C( 84), UINT8_C(105), UINT8_C(106), UINT8_C( 56) } }, { UINT8_C( 63), UINT8_C(250), UINT8_C( 4), UINT8_C(205), UINT8_C( 52), UINT8_C( 13), UINT8_C( 65), UINT8_C( 52) }, { UINT8_C( 56), UINT8_C(166), UINT8_C(103), UINT8_C(101), UINT8_C(131), UINT8_C( 56), UINT8_C( 32), UINT8_C(131) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x16x4_t t; t.val[0] = simde_vld1q_u8(test_vec[i].t[0]); t.val[1] = simde_vld1q_u8(test_vec[i].t[1]); t.val[2] = simde_vld1q_u8(test_vec[i].t[2]); t.val[3] = simde_vld1q_u8(test_vec[i].t[3]); simde_uint8x8_t idx = simde_vld1_u8(test_vec[i].idx); simde_uint8x8_t r = simde_vqtbx4_u8(a, t, idx); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x16x4_t t = simde_test_arm_neon_random_u8x16x4(); simde_uint8x8_private idx_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] &= 63; } } simde_uint8x8_t idx = simde_uint8x8_from_private(idx_); simde_uint8x8_t r = simde_vqtbx4_u8(a, t, idx); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16x4(2, t, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #endif /* !defined(SIMDE_BUG_INTEL_857088) */ static int test_simde_vqtbx1q_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 int8_t a[16]; SIMDE_ALIGN_TO_16 int8_t t[16]; SIMDE_ALIGN_TO_16 uint8_t idx[16]; SIMDE_ALIGN_TO_16 int8_t r[16]; } test_vec[] = { { { INT8_C( 67), INT8_C( 27), INT8_C( 119), INT8_C( 109), -INT8_C( 91), -INT8_C( 5), -INT8_C( 81), INT8_C( 24), -INT8_C( 44), INT8_C( 81), -INT8_C( 38), -INT8_C( 75), INT8_C( 39), -INT8_C( 7), INT8_C( 39), -INT8_C( 114) }, { INT8_C( 80), -INT8_C( 33), -INT8_C( 15), -INT8_C( 85), -INT8_C( 101), INT8_C( 6), -INT8_C( 58), -INT8_C( 22), INT8_C( 28), -INT8_C( 89), -INT8_C( 93), INT8_C( 77), INT8_C( 4), -INT8_C( 94), INT8_C( 1), INT8_C( 71) }, { UINT8_C(189), UINT8_C( 9), UINT8_C( 4), UINT8_C( 2), UINT8_C( 4), UINT8_C( 3), UINT8_C( 10), UINT8_C( 9), UINT8_C( 4), UINT8_C( 4), UINT8_C( 14), UINT8_C( 12), UINT8_C( 77), UINT8_C( 5), UINT8_C( 10), UINT8_C( 13) }, { INT8_C( 67), -INT8_C( 89), -INT8_C( 101), -INT8_C( 15), -INT8_C( 101), -INT8_C( 85), -INT8_C( 93), -INT8_C( 89), -INT8_C( 101), -INT8_C( 101), INT8_C( 1), INT8_C( 4), INT8_C( 39), INT8_C( 6), -INT8_C( 93), -INT8_C( 94) } }, { { INT8_C( 70), -INT8_C( 74), -INT8_C( 17), -INT8_C( 69), INT8_C( 25), INT8_C( 106), INT8_C( 4), -INT8_C( 50), -INT8_C( 66), INT8_C( 2), -INT8_C( 86), INT8_C( 12), INT8_C( 40), INT8_C( 20), -INT8_C( 87), INT8_C( 44) }, { INT8_C( 112), -INT8_C( 14), -INT8_C( 52), -INT8_C( 46), INT8_C( 0), INT8_C( 86), INT8_C( 81), -INT8_C( 74), -INT8_C( 125), INT8_C( 29), INT8_C( 112), INT8_C( 83), -INT8_C( 22), INT8_C( 114), -INT8_C( 32), INT8_C( 49) }, { UINT8_C( 8), UINT8_C( 15), UINT8_C( 12), UINT8_C( 1), UINT8_C( 9), UINT8_C(240), UINT8_C( 15), UINT8_C( 8), UINT8_C( 2), UINT8_C( 9), UINT8_C( 4), UINT8_C( 10), UINT8_C( 14), UINT8_C( 13), UINT8_C( 7), UINT8_C( 14) }, { -INT8_C( 125), INT8_C( 49), -INT8_C( 22), -INT8_C( 14), INT8_C( 29), INT8_C( 106), INT8_C( 49), -INT8_C( 125), -INT8_C( 52), INT8_C( 29), INT8_C( 0), INT8_C( 112), -INT8_C( 32), INT8_C( 114), -INT8_C( 74), -INT8_C( 32) } }, { { -INT8_C( 16), -INT8_C( 122), -INT8_C( 94), INT8_C( 41), INT8_C( 118), -INT8_C( 78), INT8_C( 33), INT8_C( 105), INT8_C( 107), INT8_C( 37), -INT8_C( 125), INT8_C( 57), -INT8_C( 45), -INT8_C( 54), INT8_C( 120), INT8_C( 114) }, { -INT8_C( 34), -INT8_C( 119), INT8_C( 18), INT8_C( 72), -INT8_C( 21), INT8_C( 105), INT8_C( 53), INT8_C( 106), INT8_C( 48), INT8_C( 118), -INT8_C( 45), INT8_C( 105), -INT8_C( 106), INT8_C( 110), -INT8_C( 54), -INT8_C( 122) }, { UINT8_C( 4), UINT8_C( 12), UINT8_C( 0), UINT8_C(107), UINT8_C( 14), UINT8_C( 1), UINT8_C( 4), UINT8_C(138), UINT8_C( 7), UINT8_C( 7), UINT8_C( 3), UINT8_C(202), UINT8_C( 2), UINT8_C( 11), UINT8_C( 12), UINT8_C( 0) }, { -INT8_C( 21), -INT8_C( 106), -INT8_C( 34), INT8_C( 41), -INT8_C( 54), -INT8_C( 119), -INT8_C( 21), INT8_C( 105), INT8_C( 106), INT8_C( 106), INT8_C( 72), INT8_C( 57), INT8_C( 18), INT8_C( 105), -INT8_C( 106), -INT8_C( 34) } }, { { -INT8_C( 121), -INT8_C( 64), -INT8_C( 70), -INT8_C( 90), -INT8_C( 110), -INT8_C( 114), INT8_C( 48), -INT8_C( 119), -INT8_C( 26), -INT8_C( 13), INT8_C( 83), INT8_C( 8), INT8_C( 47), -INT8_C( 113), INT8_C( 8), -INT8_C( 13) }, { -INT8_C( 34), INT8_C( 80), -INT8_C( 93), -INT8_C( 106), -INT8_C( 51), -INT8_C( 68), INT8_C( 126), -INT8_C( 63), -INT8_C( 87), -INT8_C( 49), INT8_C( 75), INT8_C( 4), -INT8_C( 22), INT8_C( 91), INT8_C( 84), INT8_C( 114) }, { UINT8_C( 12), UINT8_C( 14), UINT8_C( 8), UINT8_C(174), UINT8_C( 13), UINT8_C( 8), UINT8_C( 7), UINT8_C( 3), UINT8_C( 11), UINT8_C( 10), UINT8_C( 11), UINT8_C( 10), UINT8_C( 9), UINT8_C( 3), UINT8_C( 14), UINT8_C( 8) }, { -INT8_C( 22), INT8_C( 84), -INT8_C( 87), -INT8_C( 90), INT8_C( 91), -INT8_C( 87), -INT8_C( 63), -INT8_C( 106), INT8_C( 4), INT8_C( 75), INT8_C( 4), INT8_C( 75), -INT8_C( 49), -INT8_C( 106), INT8_C( 84), -INT8_C( 87) } }, { { -INT8_C( 50), INT8_C( 81), -INT8_C( 30), INT8_C( 107), -INT8_C( 103), INT8_C( 25), -INT8_C( 18), -INT8_C( 44), -INT8_C( 93), INT8_C( 121), INT8_C( 63), -INT8_C( 68), INT8_C( 12), -INT8_C( 99), -INT8_C( 76), -INT8_C( 17) }, { -INT8_C( 98), INT8_C( 67), -INT8_C( 97), INT8_C( 91), INT8_C( 80), INT8_C( 17), -INT8_C( 62), INT8_C( 44), -INT8_C( 51), INT8_C( 46), -INT8_C( 13), -INT8_C( 27), -INT8_C( 19), INT8_C( 44), INT8_C( 25), -INT8_C( 69) }, { UINT8_C( 13), UINT8_C( 11), UINT8_C( 6), UINT8_C( 6), UINT8_C( 4), UINT8_C( 4), UINT8_C(235), UINT8_C(183), UINT8_C( 13), UINT8_C( 10), UINT8_C( 4), UINT8_C( 9), UINT8_C( 7), UINT8_C( 8), UINT8_C( 8), UINT8_C( 5) }, { INT8_C( 44), -INT8_C( 27), -INT8_C( 62), -INT8_C( 62), INT8_C( 80), INT8_C( 80), -INT8_C( 18), -INT8_C( 44), INT8_C( 44), -INT8_C( 13), INT8_C( 80), INT8_C( 46), INT8_C( 44), -INT8_C( 51), -INT8_C( 51), INT8_C( 17) } }, { { INT8_C( 1), INT8_MIN, -INT8_C( 100), INT8_C( 21), -INT8_C( 107), -INT8_C( 121), -INT8_C( 51), INT8_C( 34), -INT8_C( 79), INT8_C( 65), -INT8_C( 68), INT8_C( 120), INT8_C( 105), INT8_C( 68), -INT8_C( 35), -INT8_C( 43) }, { INT8_C( 108), -INT8_C( 99), -INT8_C( 112), -INT8_C( 91), INT8_C( 32), INT8_C( 120), -INT8_C( 84), -INT8_C( 47), INT8_C( 84), -INT8_C( 104), INT8_C( 112), INT8_C( 92), -INT8_C( 99), -INT8_C( 54), -INT8_C( 31), -INT8_C( 98) }, { UINT8_C( 10), UINT8_C( 13), UINT8_C( 4), UINT8_C(223), UINT8_C( 4), UINT8_C( 1), UINT8_C( 2), UINT8_C( 5), UINT8_C( 2), UINT8_C( 14), UINT8_C( 45), UINT8_C( 11), UINT8_C( 2), UINT8_C( 10), UINT8_C( 0), UINT8_C( 15) }, { INT8_C( 112), -INT8_C( 54), INT8_C( 32), INT8_C( 21), INT8_C( 32), -INT8_C( 99), -INT8_C( 112), INT8_C( 120), -INT8_C( 112), -INT8_C( 31), -INT8_C( 68), INT8_C( 92), -INT8_C( 112), INT8_C( 112), INT8_C( 108), -INT8_C( 98) } }, { { INT8_C( 24), INT8_C( 72), -INT8_C( 2), INT8_C( 29), -INT8_C( 55), INT8_C( 0), -INT8_C( 46), -INT8_C( 117), -INT8_C( 66), INT8_C( 0), -INT8_C( 73), -INT8_C( 64), INT8_C( 10), -INT8_C( 73), INT8_C( 47), -INT8_C( 78) }, { INT8_C( 72), INT8_C( 68), INT8_C( 123), INT8_C( 82), INT8_C( 4), INT8_C( 21), -INT8_C( 81), INT8_C( 93), INT8_C( 31), INT8_C( 105), INT8_C( 83), -INT8_C( 13), INT8_C( 4), -INT8_C( 25), INT8_C( 17), INT8_C( 28) }, { UINT8_C( 0), UINT8_C( 15), UINT8_C( 9), UINT8_C( 9), UINT8_C( 15), UINT8_C( 12), UINT8_C( 5), UINT8_C( 13), UINT8_C( 12), UINT8_C( 12), UINT8_C( 14), UINT8_C( 22), UINT8_C( 3), UINT8_C( 13), UINT8_C( 9), UINT8_C( 12) }, { INT8_C( 72), INT8_C( 28), INT8_C( 105), INT8_C( 105), INT8_C( 28), INT8_C( 4), INT8_C( 21), -INT8_C( 25), INT8_C( 4), INT8_C( 4), INT8_C( 17), -INT8_C( 64), INT8_C( 82), -INT8_C( 25), INT8_C( 105), INT8_C( 4) } }, { { -INT8_C( 116), INT8_C( 0), -INT8_C( 57), -INT8_C( 101), INT8_C( 12), INT8_C( 76), INT8_C( 105), INT8_C( 24), -INT8_C( 120), -INT8_C( 9), INT8_C( 47), INT8_C( 123), -INT8_C( 76), -INT8_C( 8), -INT8_C( 73), -INT8_C( 74) }, { INT8_C( 60), INT8_C( 69), -INT8_C( 68), -INT8_C( 107), -INT8_C( 125), INT8_C( 31), INT8_C( 13), INT8_C( 41), -INT8_C( 43), INT8_C( 120), -INT8_C( 44), INT8_C( 114), -INT8_C( 12), -INT8_C( 101), INT8_C( 64), INT8_MIN }, { UINT8_C( 11), UINT8_C( 7), UINT8_C( 12), UINT8_C( 8), UINT8_C( 3), UINT8_C(133), UINT8_C( 0), UINT8_C( 11), UINT8_C( 12), UINT8_C(239), UINT8_C( 6), UINT8_C( 0), UINT8_C( 7), UINT8_C( 14), UINT8_C( 6), UINT8_C( 3) }, { INT8_C( 114), INT8_C( 41), -INT8_C( 12), -INT8_C( 43), -INT8_C( 107), INT8_C( 76), INT8_C( 60), INT8_C( 114), -INT8_C( 12), -INT8_C( 9), INT8_C( 13), INT8_C( 60), INT8_C( 41), INT8_C( 64), INT8_C( 13), -INT8_C( 107) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t t = simde_vld1q_s8(test_vec[i].t); simde_uint8x16_t idx = simde_vld1q_u8(test_vec[i].idx); simde_int8x16_t r = simde_vqtbx1q_s8(a, t, idx); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); simde_int8x16_t t = simde_test_arm_neon_random_i8x16(); simde_uint8x16_private idx_ = simde_uint8x16_to_private(simde_test_arm_neon_random_u8x16()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] &= 15; } } simde_uint8x16_t idx = simde_uint8x16_from_private(idx_); simde_int8x16_t r = simde_vqtbx1q_s8(a, t, idx); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, t, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqtbx1q_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 uint8_t a[16]; SIMDE_ALIGN_TO_16 uint8_t t[16]; SIMDE_ALIGN_TO_16 uint8_t idx[16]; SIMDE_ALIGN_TO_16 uint8_t r[16]; } test_vec[] = { { { UINT8_C( 4), UINT8_C(126), UINT8_C(237), UINT8_C(145), UINT8_C(224), UINT8_C(143), UINT8_C(218), UINT8_C(212), UINT8_C(103), UINT8_C(184), UINT8_C( 98), UINT8_C(222), UINT8_C(203), UINT8_C(136), UINT8_C( 94), UINT8_C(117) }, { UINT8_C( 22), UINT8_C(132), UINT8_C( 9), UINT8_C( 24), UINT8_C( 14), UINT8_C( 58), UINT8_C( 48), UINT8_C( 2), UINT8_C( 52), UINT8_C(148), UINT8_C( 97), UINT8_C(203), UINT8_C(211), UINT8_C(189), UINT8_C(115), UINT8_C(215) }, { UINT8_C( 11), UINT8_C( 0), UINT8_C( 8), UINT8_C( 12), UINT8_C( 0), UINT8_C( 2), UINT8_C( 0), UINT8_C( 87), UINT8_C( 10), UINT8_C( 2), UINT8_C( 5), UINT8_C( 5), UINT8_C(218), UINT8_C( 3), UINT8_C( 11), UINT8_C( 1) }, { UINT8_C(203), UINT8_C( 22), UINT8_C( 52), UINT8_C(211), UINT8_C( 22), UINT8_C( 9), UINT8_C( 22), UINT8_C(212), UINT8_C( 97), UINT8_C( 9), UINT8_C( 58), UINT8_C( 58), UINT8_C(203), UINT8_C( 24), UINT8_C(203), UINT8_C(132) } }, { { UINT8_C( 82), UINT8_C(225), UINT8_C(157), UINT8_C( 66), UINT8_C( 35), UINT8_C(141), UINT8_C(153), UINT8_C( 30), UINT8_C(223), UINT8_C(206), UINT8_C(227), UINT8_C(186), UINT8_C( 97), UINT8_C( 30), UINT8_C(171), UINT8_C(121) }, { UINT8_C( 99), UINT8_C(180), UINT8_C(158), UINT8_C(225), UINT8_C(237), UINT8_C(198), UINT8_C(148), UINT8_C(187), UINT8_C( 79), UINT8_C( 19), UINT8_C( 92), UINT8_C(149), UINT8_C( 5), UINT8_C(213), UINT8_C( 22), UINT8_C( 87) }, { UINT8_C( 6), UINT8_C( 4), UINT8_C( 10), UINT8_C( 9), UINT8_C( 1), UINT8_C( 3), UINT8_C( 7), UINT8_C( 1), UINT8_C( 2), UINT8_C( 11), UINT8_C( 11), UINT8_C( 99), UINT8_C( 9), UINT8_C( 6), UINT8_C( 12), UINT8_C( 12) }, { UINT8_C(148), UINT8_C(237), UINT8_C( 92), UINT8_C( 19), UINT8_C(180), UINT8_C(225), UINT8_C(187), UINT8_C(180), UINT8_C(158), UINT8_C(149), UINT8_C(149), UINT8_C(186), UINT8_C( 19), UINT8_C(148), UINT8_C( 5), UINT8_C( 5) } }, { { UINT8_C(239), UINT8_C(220), UINT8_C(164), UINT8_C( 49), UINT8_C( 15), UINT8_C(155), UINT8_C( 82), UINT8_C( 17), UINT8_C(118), UINT8_C( 45), UINT8_C(117), UINT8_C(112), UINT8_C(179), UINT8_C( 81), UINT8_C(204), UINT8_C(237) }, { UINT8_C(204), UINT8_C( 10), UINT8_C( 20), UINT8_C( 13), UINT8_C(221), UINT8_C(247), UINT8_C(157), UINT8_C(194), UINT8_C( 55), UINT8_C(194), UINT8_C(173), UINT8_C( 75), UINT8_C(254), UINT8_C(239), UINT8_C( 22), UINT8_C(237) }, { UINT8_C( 11), UINT8_C( 10), UINT8_C( 14), UINT8_C( 10), UINT8_C( 5), UINT8_C( 0), UINT8_C( 12), UINT8_C( 12), UINT8_C( 13), UINT8_C( 1), UINT8_C( 12), UINT8_C( 0), UINT8_C( 2), UINT8_C( 8), UINT8_C( 13), UINT8_C( 15) }, { UINT8_C( 75), UINT8_C(173), UINT8_C( 22), UINT8_C(173), UINT8_C(247), UINT8_C(204), UINT8_C(254), UINT8_C(254), UINT8_C(239), UINT8_C( 10), UINT8_C(254), UINT8_C(204), UINT8_C( 20), UINT8_C( 55), UINT8_C(239), UINT8_C(237) } }, { { UINT8_C(156), UINT8_C(246), UINT8_C(244), UINT8_C(241), UINT8_C(103), UINT8_C(224), UINT8_C(189), UINT8_C( 4), UINT8_C( 65), UINT8_C(249), UINT8_C( 85), UINT8_C(243), UINT8_C( 2), UINT8_C(146), UINT8_C(114), UINT8_C( 21) }, { UINT8_C(228), UINT8_MAX, UINT8_C( 5), UINT8_C( 46), UINT8_C( 41), UINT8_C(183), UINT8_C(174), UINT8_C( 21), UINT8_C( 23), UINT8_C(122), UINT8_C( 0), UINT8_C(101), UINT8_C( 92), UINT8_C(216), UINT8_C(127), UINT8_C(248) }, { UINT8_C( 14), UINT8_C( 3), UINT8_C(234), UINT8_C( 53), UINT8_C( 3), UINT8_C(167), UINT8_C( 10), UINT8_C( 4), UINT8_C( 1), UINT8_C( 15), UINT8_C(135), UINT8_C( 3), UINT8_C( 1), UINT8_C( 10), UINT8_C( 8), UINT8_C( 6) }, { UINT8_C(127), UINT8_C( 46), UINT8_C(244), UINT8_C(241), UINT8_C( 46), UINT8_C(224), UINT8_C( 0), UINT8_C( 41), UINT8_MAX, UINT8_C(248), UINT8_C( 85), UINT8_C( 46), UINT8_MAX, UINT8_C( 0), UINT8_C( 23), UINT8_C(174) } }, { { UINT8_C(227), UINT8_C(156), UINT8_C( 19), UINT8_C( 54), UINT8_C( 67), UINT8_C( 77), UINT8_C(202), UINT8_C(228), UINT8_C(220), UINT8_C( 81), UINT8_C(135), UINT8_C(254), UINT8_C( 75), UINT8_C( 63), UINT8_C( 4), UINT8_C( 68) }, { UINT8_C(252), UINT8_C( 56), UINT8_C(102), UINT8_C(113), UINT8_C( 26), UINT8_C(158), UINT8_C(252), UINT8_C(119), UINT8_C(213), UINT8_C(237), UINT8_C( 49), UINT8_C(229), UINT8_C( 93), UINT8_C(227), UINT8_C(195), UINT8_C( 64) }, { UINT8_C( 15), UINT8_C( 6), UINT8_C( 6), UINT8_C( 2), UINT8_C( 4), UINT8_C( 64), UINT8_C( 7), UINT8_C( 0), UINT8_C( 2), UINT8_C( 46), UINT8_C(254), UINT8_C( 13), UINT8_C( 14), UINT8_C( 2), UINT8_C( 2), UINT8_C(106) }, { UINT8_C( 64), UINT8_C(252), UINT8_C(252), UINT8_C(102), UINT8_C( 26), UINT8_C( 77), UINT8_C(119), UINT8_C(252), UINT8_C(102), UINT8_C( 81), UINT8_C(135), UINT8_C(227), UINT8_C(195), UINT8_C(102), UINT8_C(102), UINT8_C( 68) } }, { { UINT8_C(122), UINT8_C(218), UINT8_C( 34), UINT8_C(158), UINT8_C( 26), UINT8_C(201), UINT8_C(159), UINT8_C(172), UINT8_C(247), UINT8_C(157), UINT8_C(138), UINT8_C(101), UINT8_C(160), UINT8_C(172), UINT8_C(208), UINT8_C(218) }, { UINT8_C( 52), UINT8_C(171), UINT8_C( 47), UINT8_C( 91), UINT8_C(131), UINT8_C(252), UINT8_C( 87), UINT8_C( 73), UINT8_C(249), UINT8_C( 56), UINT8_C(108), UINT8_C(218), UINT8_C(220), UINT8_C(207), UINT8_C( 57), UINT8_C( 86) }, { UINT8_C( 9), UINT8_C( 11), UINT8_C( 5), UINT8_C( 4), UINT8_C( 4), UINT8_C(148), UINT8_C( 0), UINT8_C( 12), UINT8_C( 1), UINT8_C( 10), UINT8_C( 1), UINT8_C( 1), UINT8_C(166), UINT8_C( 81), UINT8_C( 12), UINT8_C( 11) }, { UINT8_C( 56), UINT8_C(218), UINT8_C(252), UINT8_C(131), UINT8_C(131), UINT8_C(201), UINT8_C( 52), UINT8_C(220), UINT8_C(171), UINT8_C(108), UINT8_C(171), UINT8_C(171), UINT8_C(160), UINT8_C(172), UINT8_C(220), UINT8_C(218) } }, { { UINT8_C( 64), UINT8_C(236), UINT8_C(114), UINT8_C(100), UINT8_C(128), UINT8_C(227), UINT8_C(128), UINT8_C(178), UINT8_C(221), UINT8_C( 2), UINT8_C(131), UINT8_C(132), UINT8_C( 83), UINT8_C( 47), UINT8_C( 95), UINT8_C( 80) }, { UINT8_C( 11), UINT8_C(149), UINT8_C(209), UINT8_C(226), UINT8_C( 34), UINT8_C(154), UINT8_C(179), UINT8_C(231), UINT8_C(208), UINT8_C( 94), UINT8_C(136), UINT8_C(213), UINT8_C( 67), UINT8_C(127), UINT8_C(131), UINT8_C(131) }, { UINT8_C(108), UINT8_C(246), UINT8_C(231), UINT8_C( 12), UINT8_C( 9), UINT8_C( 8), UINT8_C( 14), UINT8_C( 6), UINT8_C( 10), UINT8_C( 34), UINT8_C( 10), UINT8_C( 13), UINT8_C( 1), UINT8_C( 9), UINT8_C( 14), UINT8_C( 12) }, { UINT8_C( 64), UINT8_C(236), UINT8_C(114), UINT8_C( 67), UINT8_C( 94), UINT8_C(208), UINT8_C(131), UINT8_C(179), UINT8_C(136), UINT8_C( 2), UINT8_C(136), UINT8_C(127), UINT8_C(149), UINT8_C( 94), UINT8_C(131), UINT8_C( 67) } }, { { UINT8_C(152), UINT8_C(254), UINT8_C(151), UINT8_C(113), UINT8_C(102), UINT8_C( 54), UINT8_C( 39), UINT8_C(208), UINT8_C( 88), UINT8_C( 98), UINT8_C(142), UINT8_C(169), UINT8_C(251), UINT8_C(156), UINT8_C( 6), UINT8_C( 42) }, { UINT8_C(123), UINT8_C( 69), UINT8_C(122), UINT8_C(244), UINT8_C( 55), UINT8_C(178), UINT8_C( 62), UINT8_C(136), UINT8_C(113), UINT8_C( 92), UINT8_C( 28), UINT8_C(176), UINT8_C(254), UINT8_C( 51), UINT8_C( 91), UINT8_C(150) }, { UINT8_C( 50), UINT8_C( 3), UINT8_C( 7), UINT8_C( 8), UINT8_C( 9), UINT8_C( 15), UINT8_C(105), UINT8_C(129), UINT8_C(145), UINT8_C( 7), UINT8_C( 10), UINT8_C( 12), UINT8_C( 3), UINT8_C( 0), UINT8_C( 6), UINT8_C( 14) }, { UINT8_C(152), UINT8_C(244), UINT8_C(136), UINT8_C(113), UINT8_C( 92), UINT8_C(150), UINT8_C( 39), UINT8_C(208), UINT8_C( 88), UINT8_C(136), UINT8_C( 28), UINT8_C(254), UINT8_C(244), UINT8_C(123), UINT8_C( 62), UINT8_C( 91) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t t = simde_vld1q_u8(test_vec[i].t); simde_uint8x16_t idx = simde_vld1q_u8(test_vec[i].idx); simde_uint8x16_t r = simde_vqtbx1q_u8(a, t, idx); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t t = simde_test_arm_neon_random_u8x16(); simde_uint8x16_private idx_ = simde_uint8x16_to_private(simde_test_arm_neon_random_u8x16()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] &= 15; } } simde_uint8x16_t idx = simde_uint8x16_from_private(idx_); simde_uint8x16_t r = simde_vqtbx1q_u8(a, t, idx); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, t, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #if !defined(SIMDE_BUG_INTEL_857088) static int test_simde_vqtbx2q_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 int8_t a[16]; SIMDE_ALIGN_TO_16 int8_t t[2][16]; SIMDE_ALIGN_TO_16 uint8_t idx[16]; SIMDE_ALIGN_TO_16 int8_t r[16]; } test_vec[] = { { { INT8_C( 7), -INT8_C( 120), -INT8_C( 106), -INT8_C( 62), -INT8_C( 89), INT8_C( 10), -INT8_C( 91), INT8_C( 88), INT8_C( 49), INT8_C( 118), -INT8_C( 109), -INT8_C( 78), -INT8_C( 110), -INT8_C( 57), INT8_C( 33), -INT8_C( 51) }, { { INT8_C( 75), INT8_C( 121), INT8_C( 74), -INT8_C( 96), -INT8_C( 31), INT8_C( 105), INT8_C( 88), INT8_C( 13), INT8_C( 12), -INT8_C( 45), -INT8_C( 97), -INT8_C( 117), -INT8_C( 91), -INT8_C( 92), -INT8_C( 104), -INT8_C( 84) }, { INT8_C( 44), INT8_C( 46), INT8_C( 110), -INT8_C( 45), INT8_C( 56), INT8_C( 20), INT8_C( 43), INT8_C( 106), -INT8_C( 118), -INT8_C( 65), INT8_C( 28), INT8_C( 28), -INT8_C( 122), INT8_C( 61), -INT8_C( 23), -INT8_C( 47) } }, { UINT8_C( 23), UINT8_C( 19), UINT8_C( 17), UINT8_C( 24), UINT8_C( 29), UINT8_C( 10), UINT8_C( 6), UINT8_C( 9), UINT8_C( 29), UINT8_C( 5), UINT8_C( 20), UINT8_C( 2), UINT8_C(233), UINT8_C( 12), UINT8_C(238), UINT8_C( 21) }, { INT8_C( 106), -INT8_C( 45), INT8_C( 46), -INT8_C( 118), INT8_C( 61), -INT8_C( 97), INT8_C( 88), -INT8_C( 45), INT8_C( 61), INT8_C( 105), INT8_C( 56), INT8_C( 74), -INT8_C( 110), -INT8_C( 91), INT8_C( 33), INT8_C( 20) } }, { { INT8_C( 52), -INT8_C( 100), INT8_C( 70), -INT8_C( 47), INT8_C( 102), -INT8_C( 20), INT8_C( 122), INT8_C( 3), INT8_C( 49), -INT8_C( 81), INT8_C( 69), INT8_C( 27), INT8_C( 123), INT8_C( 51), INT8_C( 48), INT8_C( 118) }, { { -INT8_C( 113), INT8_C( 25), -INT8_C( 87), INT8_C( 0), INT8_C( 46), INT8_C( 70), -INT8_C( 5), INT8_C( 1), -INT8_C( 1), INT8_C( 18), INT8_C( 91), -INT8_C( 11), INT8_C( 19), -INT8_C( 123), -INT8_C( 93), INT8_C( 72) }, { INT8_C( 33), -INT8_C( 23), INT8_C( 25), -INT8_C( 121), -INT8_C( 43), -INT8_C( 108), -INT8_C( 118), INT8_C( 6), INT8_C( 67), -INT8_C( 49), INT8_C( 33), -INT8_C( 66), INT8_C( 2), INT8_C( 82), INT8_C( 52), -INT8_C( 110) } }, { UINT8_C( 11), UINT8_C( 29), UINT8_C( 18), UINT8_C( 25), UINT8_C( 3), UINT8_C( 13), UINT8_C(155), UINT8_C( 2), UINT8_C( 31), UINT8_C( 22), UINT8_C( 24), UINT8_C( 19), UINT8_C( 27), UINT8_C( 27), UINT8_C(251), UINT8_C( 29) }, { -INT8_C( 11), INT8_C( 82), INT8_C( 25), -INT8_C( 49), INT8_C( 0), -INT8_C( 123), INT8_C( 122), -INT8_C( 87), -INT8_C( 110), -INT8_C( 118), INT8_C( 67), -INT8_C( 121), -INT8_C( 66), -INT8_C( 66), INT8_C( 48), INT8_C( 82) } }, { { -INT8_C( 68), -INT8_C( 91), -INT8_C( 8), -INT8_C( 33), INT8_C( 50), -INT8_C( 109), INT8_C( 2), -INT8_C( 47), -INT8_C( 119), INT8_C( 26), -INT8_C( 124), INT8_C( 4), -INT8_C( 43), INT8_MAX, -INT8_C( 95), INT8_C( 121) }, { { -INT8_C( 108), -INT8_C( 58), -INT8_C( 14), INT8_C( 60), INT8_C( 117), INT8_C( 113), INT8_C( 40), -INT8_C( 13), INT8_C( 18), -INT8_C( 46), INT8_C( 116), INT8_C( 5), -INT8_C( 80), -INT8_C( 121), INT8_C( 100), INT8_C( 108) }, { INT8_C( 44), INT8_C( 92), INT8_C( 76), INT8_C( 94), -INT8_C( 17), INT8_C( 78), INT8_C( 48), INT8_C( 120), INT8_C( 104), -INT8_C( 76), INT8_C( 124), INT8_C( 61), INT8_C( 52), INT8_C( 30), -INT8_C( 74), -INT8_C( 56) } }, { UINT8_C( 4), UINT8_C(168), UINT8_C( 4), UINT8_C( 89), UINT8_C( 25), UINT8_C( 12), UINT8_C( 76), UINT8_C( 12), UINT8_C( 30), UINT8_C( 1), UINT8_C( 17), UINT8_C(175), UINT8_C( 8), UINT8_C( 21), UINT8_C( 27), UINT8_C( 21) }, { INT8_C( 117), -INT8_C( 91), INT8_C( 117), -INT8_C( 33), -INT8_C( 76), -INT8_C( 80), INT8_C( 2), -INT8_C( 80), -INT8_C( 74), -INT8_C( 58), INT8_C( 92), INT8_C( 4), INT8_C( 18), INT8_C( 78), INT8_C( 61), INT8_C( 78) } }, { { -INT8_C( 72), -INT8_C( 72), INT8_C( 48), -INT8_C( 46), -INT8_C( 27), INT8_C( 124), -INT8_C( 2), -INT8_C( 29), INT8_C( 61), INT8_C( 47), -INT8_C( 110), -INT8_C( 122), -INT8_C( 59), -INT8_C( 82), -INT8_C( 5), -INT8_C( 74) }, { { INT8_C( 21), -INT8_C( 50), -INT8_C( 105), -INT8_C( 53), -INT8_C( 46), -INT8_C( 17), -INT8_C( 24), -INT8_C( 118), -INT8_C( 60), INT8_C( 67), INT8_C( 118), -INT8_C( 73), INT8_C( 83), INT8_C( 42), -INT8_C( 114), INT8_C( 12) }, { -INT8_C( 30), -INT8_C( 66), -INT8_C( 34), -INT8_C( 57), INT8_C( 59), -INT8_C( 36), -INT8_C( 85), INT8_C( 120), INT8_C( 11), INT8_C( 61), -INT8_C( 2), -INT8_C( 48), -INT8_C( 21), -INT8_C( 7), -INT8_C( 121), INT8_C( 1) } }, { UINT8_C( 8), UINT8_C( 30), UINT8_C( 12), UINT8_C( 26), UINT8_C( 13), UINT8_C( 20), UINT8_C( 4), UINT8_C( 18), UINT8_C( 23), UINT8_C( 26), UINT8_C( 9), UINT8_C( 11), UINT8_C( 4), UINT8_C( 24), UINT8_C( 23), UINT8_C( 6) }, { -INT8_C( 60), -INT8_C( 121), INT8_C( 83), -INT8_C( 2), INT8_C( 42), INT8_C( 59), -INT8_C( 46), -INT8_C( 34), INT8_C( 120), -INT8_C( 2), INT8_C( 67), -INT8_C( 73), -INT8_C( 46), INT8_C( 11), INT8_C( 120), -INT8_C( 24) } }, { { -INT8_C( 110), INT8_C( 15), -INT8_C( 28), -INT8_C( 97), -INT8_C( 61), INT8_C( 8), INT8_C( 113), -INT8_C( 69), -INT8_C( 94), -INT8_C( 5), INT8_C( 6), INT8_C( 102), INT8_C( 19), INT8_C( 93), INT8_C( 12), -INT8_C( 23) }, { { -INT8_C( 110), INT8_C( 122), -INT8_C( 5), -INT8_C( 93), -INT8_C( 109), -INT8_C( 123), -INT8_C( 65), -INT8_C( 22), INT8_C( 13), -INT8_C( 84), INT8_C( 44), -INT8_C( 113), INT8_C( 32), INT8_C( 111), -INT8_C( 39), -INT8_C( 78) }, { INT8_C( 126), -INT8_C( 67), INT8_C( 82), INT8_C( 65), -INT8_C( 59), -INT8_C( 61), -INT8_C( 4), INT8_C( 103), -INT8_C( 66), INT8_C( 2), -INT8_C( 51), -INT8_C( 47), INT8_C( 95), -INT8_C( 38), -INT8_C( 69), -INT8_C( 15) } }, { UINT8_C( 20), UINT8_C( 22), UINT8_C(148), UINT8_C( 8), UINT8_C( 27), UINT8_C( 20), UINT8_C( 18), UINT8_C( 72), UINT8_C( 0), UINT8_C( 30), UINT8_C(216), UINT8_C( 33), UINT8_C(109), UINT8_C( 17), UINT8_C( 19), UINT8_C( 11) }, { -INT8_C( 59), -INT8_C( 4), -INT8_C( 28), INT8_C( 13), -INT8_C( 47), -INT8_C( 59), INT8_C( 82), -INT8_C( 69), -INT8_C( 110), -INT8_C( 69), INT8_C( 6), INT8_C( 102), INT8_C( 19), -INT8_C( 67), INT8_C( 65), -INT8_C( 113) } }, { { -INT8_C( 22), INT8_C( 17), INT8_MIN, INT8_C( 37), INT8_C( 101), INT8_C( 82), INT8_C( 109), INT8_C( 101), INT8_C( 80), INT8_C( 69), -INT8_C( 122), -INT8_C( 67), -INT8_C( 9), INT8_C( 90), -INT8_C( 88), INT8_C( 102) }, { { INT8_MAX, -INT8_C( 44), -INT8_C( 102), INT8_C( 104), -INT8_C( 3), INT8_C( 54), INT8_C( 16), INT8_C( 41), -INT8_C( 96), -INT8_C( 119), -INT8_C( 76), -INT8_C( 29), -INT8_C( 67), INT8_C( 48), INT8_C( 123), -INT8_C( 89) }, { INT8_C( 65), -INT8_C( 5), -INT8_C( 52), -INT8_C( 90), INT8_C( 77), INT8_C( 57), INT8_C( 12), -INT8_C( 99), INT8_MAX, -INT8_C( 110), INT8_C( 90), INT8_C( 118), -INT8_C( 20), INT8_C( 2), -INT8_C( 36), INT8_C( 108) } }, { UINT8_C(215), UINT8_C( 22), UINT8_C( 20), UINT8_C( 20), UINT8_C( 13), UINT8_C( 4), UINT8_C( 29), UINT8_C( 13), UINT8_C( 13), UINT8_C( 17), UINT8_C( 16), UINT8_C( 10), UINT8_C( 2), UINT8_C( 12), UINT8_C( 17), UINT8_C( 3) }, { -INT8_C( 22), INT8_C( 12), INT8_C( 77), INT8_C( 77), INT8_C( 48), -INT8_C( 3), INT8_C( 2), INT8_C( 48), INT8_C( 48), -INT8_C( 5), INT8_C( 65), -INT8_C( 76), -INT8_C( 102), -INT8_C( 67), -INT8_C( 5), INT8_C( 104) } }, { { INT8_C( 30), -INT8_C( 107), -INT8_C( 101), -INT8_C( 53), INT8_C( 122), -INT8_C( 104), INT8_C( 24), -INT8_C( 25), INT8_C( 74), INT8_C( 73), INT8_C( 18), INT8_C( 44), -INT8_C( 11), -INT8_C( 29), INT8_C( 79), -INT8_C( 100) }, { { -INT8_C( 127), INT8_C( 25), -INT8_C( 111), INT8_C( 88), -INT8_C( 17), INT8_C( 36), -INT8_C( 82), INT8_C( 88), INT8_C( 17), INT8_C( 122), -INT8_C( 83), INT8_C( 0), INT8_C( 34), INT8_C( 110), -INT8_C( 57), INT8_C( 64) }, { INT8_C( 3), INT8_C( 98), INT8_C( 12), INT8_C( 125), -INT8_C( 6), INT8_C( 36), INT8_C( 101), INT8_C( 68), INT8_C( 109), INT8_C( 119), INT8_C( 112), INT8_C( 98), INT8_C( 90), -INT8_C( 64), -INT8_C( 1), -INT8_C( 37) } }, { UINT8_C( 25), UINT8_C(144), UINT8_C( 51), UINT8_C( 9), UINT8_C( 20), UINT8_C( 1), UINT8_C( 1), UINT8_C( 5), UINT8_C( 27), UINT8_C( 14), UINT8_C( 6), UINT8_C( 29), UINT8_C( 28), UINT8_C( 13), UINT8_C( 30), UINT8_C( 31) }, { INT8_C( 119), -INT8_C( 107), -INT8_C( 101), INT8_C( 122), -INT8_C( 6), INT8_C( 25), INT8_C( 25), INT8_C( 36), INT8_C( 98), -INT8_C( 57), -INT8_C( 82), -INT8_C( 64), INT8_C( 90), INT8_C( 110), -INT8_C( 1), -INT8_C( 37) } }, { { INT8_C( 78), INT8_C( 2), INT8_C( 1), INT8_C( 2), -INT8_C( 28), INT8_C( 34), -INT8_C( 56), INT8_C( 63), -INT8_C( 16), -INT8_C( 114), -INT8_C( 67), INT8_C( 44), INT8_C( 27), INT8_C( 123), INT8_C( 107), INT8_C( 10) }, { { INT8_C( 69), INT8_C( 40), -INT8_C( 13), INT8_C( 51), INT8_C( 74), INT8_C( 33), -INT8_C( 113), -INT8_C( 29), -INT8_C( 64), INT8_C( 78), -INT8_C( 41), INT8_C( 30), INT8_C( 11), -INT8_C( 90), INT8_C( 86), INT8_C( 89) }, { -INT8_C( 88), INT8_C( 87), INT8_C( 92), -INT8_C( 116), INT8_C( 121), INT8_C( 36), -INT8_C( 52), INT8_C( 105), -INT8_C( 78), -INT8_C( 119), -INT8_C( 107), -INT8_C( 51), INT8_C( 4), INT8_C( 1), -INT8_C( 41), INT8_C( 73) } }, { UINT8_C( 9), UINT8_C( 10), UINT8_C( 28), UINT8_C( 20), UINT8_C( 12), UINT8_C( 12), UINT8_C( 23), UINT8_C( 12), UINT8_C( 26), UINT8_C( 14), UINT8_C(202), UINT8_C( 5), UINT8_C( 20), UINT8_C( 1), UINT8_C( 31), UINT8_C( 29) }, { INT8_C( 78), -INT8_C( 41), INT8_C( 4), INT8_C( 121), INT8_C( 11), INT8_C( 11), INT8_C( 105), INT8_C( 11), -INT8_C( 107), INT8_C( 86), -INT8_C( 67), INT8_C( 33), INT8_C( 121), INT8_C( 40), INT8_C( 73), INT8_C( 1) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16x2_t t; t.val[0] = simde_vld1q_s8(test_vec[i].t[0]); t.val[1] = simde_vld1q_s8(test_vec[i].t[1]); simde_uint8x16_t idx = simde_vld1q_u8(test_vec[i].idx); simde_int8x16_t r = simde_vqtbx2q_s8(a, t, idx); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); simde_int8x16x2_t t = simde_test_arm_neon_random_i8x16x2(); simde_uint8x16_private idx_ = simde_uint8x16_to_private(simde_test_arm_neon_random_u8x16()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] &= 31; } } simde_uint8x16_t idx = simde_uint8x16_from_private(idx_); simde_int8x16_t r = simde_vqtbx2q_s8(a, t, idx); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16x2(2, t, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqtbx2q_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 uint8_t a[16]; SIMDE_ALIGN_TO_16 uint8_t t[2][16]; SIMDE_ALIGN_TO_16 uint8_t idx[16]; SIMDE_ALIGN_TO_16 uint8_t r[16]; } test_vec[] = { { { UINT8_C(120), UINT8_C(135), UINT8_C( 23), UINT8_C(165), UINT8_C(152), UINT8_C( 73), UINT8_C(154), UINT8_C(157), UINT8_C(218), UINT8_C(197), UINT8_C(238), UINT8_C(115), UINT8_C( 26), UINT8_C( 87), UINT8_C(126), UINT8_C(247) }, { { UINT8_C( 28), UINT8_C( 53), UINT8_C( 0), UINT8_C( 33), UINT8_C( 11), UINT8_C(223), UINT8_C(105), UINT8_C(234), UINT8_C(115), UINT8_C( 74), UINT8_C(131), UINT8_C(128), UINT8_C( 50), UINT8_C(101), UINT8_C(210), UINT8_C(170) }, { UINT8_C(236), UINT8_C(234), UINT8_C( 80), UINT8_C(132), UINT8_C( 51), UINT8_C(234), UINT8_C( 33), UINT8_C( 13), UINT8_C(176), UINT8_C( 16), UINT8_C(129), UINT8_C(202), UINT8_C(103), UINT8_MAX, UINT8_C(194), UINT8_C(131) } }, { UINT8_C( 20), UINT8_C( 2), UINT8_C(164), UINT8_C( 64), UINT8_C( 1), UINT8_C( 14), UINT8_C( 10), UINT8_C( 20), UINT8_C( 24), UINT8_C( 13), UINT8_C( 21), UINT8_C(138), UINT8_C( 18), UINT8_C(103), UINT8_C( 20), UINT8_C(254) }, { UINT8_C( 51), UINT8_C( 0), UINT8_C( 23), UINT8_C(165), UINT8_C( 53), UINT8_C(210), UINT8_C(131), UINT8_C( 51), UINT8_C(176), UINT8_C(101), UINT8_C(234), UINT8_C(115), UINT8_C( 80), UINT8_C( 87), UINT8_C( 51), UINT8_C(247) } }, { { UINT8_C(110), UINT8_C( 68), UINT8_C(135), UINT8_C( 15), UINT8_C( 82), UINT8_C(177), UINT8_C( 36), UINT8_C(170), UINT8_C( 95), UINT8_C(185), UINT8_C( 52), UINT8_C(113), UINT8_C( 32), UINT8_C(104), UINT8_C(112), UINT8_C(114) }, { { UINT8_C(237), UINT8_C(243), UINT8_C(247), UINT8_C( 92), UINT8_C(151), UINT8_C(137), UINT8_C(123), UINT8_C( 76), UINT8_C(157), UINT8_C(100), UINT8_C(104), UINT8_C(175), UINT8_C( 16), UINT8_C( 7), UINT8_C(246), UINT8_C(126) }, { UINT8_C( 75), UINT8_C(125), UINT8_C(141), UINT8_C(157), UINT8_C( 47), UINT8_C(177), UINT8_C( 71), UINT8_C(142), UINT8_C(106), UINT8_C(123), UINT8_MAX, UINT8_C(139), UINT8_C(228), UINT8_C(111), UINT8_C(253), UINT8_C(209) } }, { UINT8_C( 2), UINT8_C(244), UINT8_C( 13), UINT8_C( 26), UINT8_C( 29), UINT8_C( 8), UINT8_C( 70), UINT8_C( 26), UINT8_C( 12), UINT8_C( 14), UINT8_C( 10), UINT8_C( 28), UINT8_C( 21), UINT8_C( 0), UINT8_C( 26), UINT8_C( 1) }, { UINT8_C(247), UINT8_C( 68), UINT8_C( 7), UINT8_MAX, UINT8_C(111), UINT8_C(157), UINT8_C( 36), UINT8_MAX, UINT8_C( 16), UINT8_C(246), UINT8_C(104), UINT8_C(228), UINT8_C(177), UINT8_C(237), UINT8_MAX, UINT8_C(243) } }, { { UINT8_C(192), UINT8_C( 67), UINT8_C(198), UINT8_C( 61), UINT8_C(235), UINT8_C( 12), UINT8_C( 88), UINT8_C(248), UINT8_C(186), UINT8_C( 34), UINT8_C( 20), UINT8_C(112), UINT8_C(226), UINT8_C(175), UINT8_C(113), UINT8_C( 32) }, { { UINT8_C(215), UINT8_C( 15), UINT8_C(141), UINT8_C(176), UINT8_C(245), UINT8_C(136), UINT8_C(244), UINT8_C( 87), UINT8_C(131), UINT8_C(195), UINT8_C(156), UINT8_C(237), UINT8_C(143), UINT8_C(179), UINT8_C(185), UINT8_C( 79) }, { UINT8_C(246), UINT8_C(128), UINT8_C(141), UINT8_C(226), UINT8_C(140), UINT8_C(229), UINT8_C(218), UINT8_C( 71), UINT8_C( 7), UINT8_C(238), UINT8_C(183), UINT8_C(233), UINT8_C(157), UINT8_C( 40), UINT8_C( 10), UINT8_C(116) } }, { UINT8_C( 23), UINT8_C( 23), UINT8_C( 5), UINT8_C( 13), UINT8_C( 0), UINT8_C( 25), UINT8_C( 4), UINT8_C( 3), UINT8_C( 29), UINT8_C( 0), UINT8_C( 16), UINT8_C( 12), UINT8_C( 19), UINT8_C( 9), UINT8_C( 28), UINT8_C( 10) }, { UINT8_C( 71), UINT8_C( 71), UINT8_C(136), UINT8_C(179), UINT8_C(215), UINT8_C(238), UINT8_C(245), UINT8_C(176), UINT8_C( 40), UINT8_C(215), UINT8_C(246), UINT8_C(143), UINT8_C(226), UINT8_C(195), UINT8_C(157), UINT8_C(156) } }, { { UINT8_C(192), UINT8_C(171), UINT8_C(224), UINT8_C(224), UINT8_C(197), UINT8_C(100), UINT8_C(131), UINT8_C(162), UINT8_C(133), UINT8_C( 19), UINT8_C( 14), UINT8_C( 88), UINT8_C( 92), UINT8_C(202), UINT8_C( 34), UINT8_C( 38) }, { { UINT8_C( 19), UINT8_C(206), UINT8_C(124), UINT8_C( 65), UINT8_C( 84), UINT8_C( 25), UINT8_C(118), UINT8_C(201), UINT8_C(109), UINT8_C(149), UINT8_C(219), UINT8_C(233), UINT8_C(189), UINT8_C( 97), UINT8_C(156), UINT8_C(125) }, { UINT8_C( 13), UINT8_C(125), UINT8_C( 93), UINT8_C(210), UINT8_C(225), UINT8_C(224), UINT8_C(116), UINT8_C(102), UINT8_C(243), UINT8_C(130), UINT8_C(191), UINT8_C( 80), UINT8_C( 77), UINT8_C(225), UINT8_C(118), UINT8_C( 96) } }, { UINT8_C(176), UINT8_C( 18), UINT8_C( 2), UINT8_C( 4), UINT8_C( 11), UINT8_C( 24), UINT8_C(205), UINT8_C(120), UINT8_C( 13), UINT8_C( 8), UINT8_C( 97), UINT8_C( 11), UINT8_C( 10), UINT8_C( 29), UINT8_C( 8), UINT8_C( 23) }, { UINT8_C(192), UINT8_C( 93), UINT8_C(124), UINT8_C( 84), UINT8_C(233), UINT8_C(243), UINT8_C(131), UINT8_C(162), UINT8_C( 97), UINT8_C(109), UINT8_C( 14), UINT8_C(233), UINT8_C(219), UINT8_C(225), UINT8_C(109), UINT8_C(102) } }, { { UINT8_C(210), UINT8_C( 47), UINT8_C( 23), UINT8_C(221), UINT8_C( 71), UINT8_C(229), UINT8_C( 85), UINT8_C(245), UINT8_C(141), UINT8_C(182), UINT8_C( 96), UINT8_C(151), UINT8_C(179), UINT8_C( 72), UINT8_C(174), UINT8_C( 46) }, { { UINT8_C(142), UINT8_C(151), UINT8_C(138), UINT8_C(181), UINT8_C(244), UINT8_C( 76), UINT8_C(207), UINT8_C(212), UINT8_C(206), UINT8_C( 57), UINT8_C( 0), UINT8_C( 49), UINT8_C( 25), UINT8_C(141), UINT8_C( 68), UINT8_C(235) }, { UINT8_C(188), UINT8_C( 91), UINT8_C(200), UINT8_C( 4), UINT8_C( 64), UINT8_C( 29), UINT8_C(249), UINT8_C(206), UINT8_C(211), UINT8_C( 89), UINT8_C(101), UINT8_C(134), UINT8_C(161), UINT8_C( 20), UINT8_C(180), UINT8_C( 48) } }, { UINT8_C( 11), UINT8_C( 30), UINT8_C( 5), UINT8_C( 0), UINT8_C(139), UINT8_C( 20), UINT8_C( 20), UINT8_C( 25), UINT8_C( 13), UINT8_C( 20), UINT8_C( 10), UINT8_C( 6), UINT8_C( 2), UINT8_C( 14), UINT8_C( 17), UINT8_C(190) }, { UINT8_C( 49), UINT8_C(180), UINT8_C( 76), UINT8_C(142), UINT8_C( 71), UINT8_C( 64), UINT8_C( 64), UINT8_C( 89), UINT8_C(141), UINT8_C( 64), UINT8_C( 0), UINT8_C(207), UINT8_C(138), UINT8_C( 68), UINT8_C( 91), UINT8_C( 46) } }, { { UINT8_C( 34), UINT8_C(203), UINT8_C(253), UINT8_C(173), UINT8_C(127), UINT8_C(113), UINT8_C( 6), UINT8_C(108), UINT8_C(229), UINT8_C(144), UINT8_C(114), UINT8_C(231), UINT8_C( 94), UINT8_C( 99), UINT8_C(166), UINT8_C(136) }, { { UINT8_C( 28), UINT8_C(104), UINT8_C(242), UINT8_C(242), UINT8_C( 36), UINT8_C( 42), UINT8_C(155), UINT8_C( 56), UINT8_C(199), UINT8_C(202), UINT8_C(238), UINT8_C(121), UINT8_C(174), UINT8_C(212), UINT8_C(214), UINT8_C(209) }, { UINT8_C(159), UINT8_C(211), UINT8_C(126), UINT8_C( 30), UINT8_C( 68), UINT8_C(133), UINT8_C(138), UINT8_C( 41), UINT8_C( 21), UINT8_C(252), UINT8_C( 17), UINT8_C(116), UINT8_C( 95), UINT8_C(183), UINT8_C(252), UINT8_C(123) } }, { UINT8_C( 31), UINT8_C( 14), UINT8_C( 13), UINT8_C( 3), UINT8_C( 24), UINT8_C( 8), UINT8_C(124), UINT8_C( 31), UINT8_C( 19), UINT8_C(106), UINT8_C( 88), UINT8_C(129), UINT8_C( 31), UINT8_C( 14), UINT8_C( 18), UINT8_C( 30) }, { UINT8_C(123), UINT8_C(214), UINT8_C(212), UINT8_C(242), UINT8_C( 21), UINT8_C(199), UINT8_C( 6), UINT8_C(123), UINT8_C( 30), UINT8_C(144), UINT8_C(114), UINT8_C(231), UINT8_C(123), UINT8_C(214), UINT8_C(126), UINT8_C(252) } }, { { UINT8_C(201), UINT8_C(204), UINT8_C(154), UINT8_C(225), UINT8_C(213), UINT8_C( 22), UINT8_C(193), UINT8_C(168), UINT8_C(128), UINT8_C( 25), UINT8_C( 41), UINT8_C(191), UINT8_C( 72), UINT8_C(124), UINT8_C(158), UINT8_C( 73) }, { { UINT8_C( 77), UINT8_C(155), UINT8_C(143), UINT8_C(163), UINT8_C( 34), UINT8_C(254), UINT8_C( 14), UINT8_C(166), UINT8_C(126), UINT8_C(238), UINT8_C(138), UINT8_C(181), UINT8_C(201), UINT8_C(233), UINT8_C( 11), UINT8_C(147) }, { UINT8_C(181), UINT8_C(165), UINT8_C(116), UINT8_C(138), UINT8_C(187), UINT8_C( 53), UINT8_C( 50), UINT8_C( 60), UINT8_C( 79), UINT8_C( 92), UINT8_C(251), UINT8_C(151), UINT8_C(216), UINT8_C(153), UINT8_C(224), UINT8_C( 37) } }, { UINT8_C( 20), UINT8_C(111), UINT8_C( 8), UINT8_C( 23), UINT8_C( 13), UINT8_C( 22), UINT8_C( 29), UINT8_C(235), UINT8_C( 4), UINT8_C( 7), UINT8_C( 0), UINT8_C( 14), UINT8_C( 16), UINT8_C( 12), UINT8_C( 1), UINT8_C( 38) }, { UINT8_C(187), UINT8_C(204), UINT8_C(126), UINT8_C( 60), UINT8_C(233), UINT8_C( 50), UINT8_C(153), UINT8_C(168), UINT8_C( 34), UINT8_C(166), UINT8_C( 77), UINT8_C( 11), UINT8_C(181), UINT8_C(201), UINT8_C(155), UINT8_C( 73) } }, { { UINT8_C( 1), UINT8_C( 4), UINT8_C(105), UINT8_C(110), UINT8_C(218), UINT8_C(103), UINT8_C( 90), UINT8_C(159), UINT8_C(238), UINT8_C(250), UINT8_C( 45), UINT8_C( 95), UINT8_C(166), UINT8_C( 78), UINT8_C(133), UINT8_C(248) }, { { UINT8_C(227), UINT8_C( 53), UINT8_C( 5), UINT8_C(174), UINT8_C( 24), UINT8_C( 78), UINT8_C(200), UINT8_C( 87), UINT8_C(146), UINT8_C(121), UINT8_C(110), UINT8_C(112), UINT8_C( 11), UINT8_C(170), UINT8_C(131), UINT8_C( 12) }, { UINT8_C(174), UINT8_C(236), UINT8_C(122), UINT8_C(137), UINT8_C( 83), UINT8_C(212), UINT8_C( 40), UINT8_C( 66), UINT8_C(207), UINT8_C( 85), UINT8_C(161), UINT8_C(117), UINT8_C(163), UINT8_C( 38), UINT8_C(109), UINT8_C(134) } }, { UINT8_C( 27), UINT8_C( 18), UINT8_C( 21), UINT8_C( 20), UINT8_C( 0), UINT8_C( 29), UINT8_C( 11), UINT8_C( 19), UINT8_C(119), UINT8_C( 26), UINT8_C( 3), UINT8_C( 2), UINT8_C(228), UINT8_C( 6), UINT8_C( 14), UINT8_C( 19) }, { UINT8_C(117), UINT8_C(122), UINT8_C(212), UINT8_C( 83), UINT8_C(227), UINT8_C( 38), UINT8_C(112), UINT8_C(137), UINT8_C(238), UINT8_C(161), UINT8_C(174), UINT8_C( 5), UINT8_C(166), UINT8_C(200), UINT8_C(131), UINT8_C(137) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16x2_t t; t.val[0] = simde_vld1q_u8(test_vec[i].t[0]); t.val[1] = simde_vld1q_u8(test_vec[i].t[1]); simde_uint8x16_t idx = simde_vld1q_u8(test_vec[i].idx); simde_uint8x16_t r = simde_vqtbx2q_u8(a, t, idx); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); simde_uint8x16x2_t t = simde_test_arm_neon_random_u8x16x2(); simde_uint8x16_private idx_ = simde_uint8x16_to_private(simde_test_arm_neon_random_u8x16()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] &= 31; } } simde_uint8x16_t idx = simde_uint8x16_from_private(idx_); simde_uint8x16_t r = simde_vqtbx2q_u8(a, t, idx); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16x2(2, t, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqtbx3q_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 int8_t a[16]; SIMDE_ALIGN_TO_16 int8_t t[3][16]; SIMDE_ALIGN_TO_16 uint8_t idx[16]; SIMDE_ALIGN_TO_16 int8_t r[16]; } test_vec[] = { { { -INT8_C( 7), -INT8_C( 74), INT8_MAX, INT8_C( 27), -INT8_C( 36), -INT8_C( 108), -INT8_C( 81), -INT8_C( 40), INT8_C( 69), -INT8_C( 88), -INT8_C( 119), INT8_C( 50), -INT8_C( 81), -INT8_C( 43), -INT8_C( 6), -INT8_C( 86) }, { { -INT8_C( 103), -INT8_C( 116), INT8_C( 51), -INT8_C( 15), INT8_C( 87), INT8_C( 111), INT8_C( 102), -INT8_C( 85), -INT8_C( 100), INT8_C( 38), -INT8_C( 122), -INT8_C( 109), INT8_C( 19), INT8_C( 35), INT8_C( 37), INT8_C( 12) }, { -INT8_C( 39), -INT8_C( 92), INT8_C( 39), -INT8_C( 75), INT8_C( 56), -INT8_C( 42), -INT8_C( 115), INT8_C( 125), INT8_C( 126), INT8_C( 22), -INT8_C( 80), INT8_C( 46), -INT8_C( 21), -INT8_C( 86), -INT8_C( 40), -INT8_C( 124) }, { INT8_C( 54), INT8_C( 11), INT8_C( 118), -INT8_C( 114), INT8_C( 123), -INT8_C( 36), INT8_C( 57), INT8_C( 23), INT8_C( 2), -INT8_C( 64), -INT8_C( 85), INT8_C( 21), -INT8_C( 29), -INT8_C( 48), INT8_C( 33), -INT8_C( 67) } }, { UINT8_C( 20), UINT8_C( 24), UINT8_C( 18), UINT8_C( 28), UINT8_C( 31), UINT8_C( 0), UINT8_C( 41), UINT8_C( 13), UINT8_C( 22), UINT8_C( 25), UINT8_C( 11), UINT8_C( 2), UINT8_C( 36), UINT8_C( 19), UINT8_C( 38), UINT8_C( 42) }, { INT8_C( 56), INT8_C( 126), INT8_C( 39), -INT8_C( 21), -INT8_C( 124), -INT8_C( 103), -INT8_C( 64), INT8_C( 35), -INT8_C( 115), INT8_C( 22), -INT8_C( 109), INT8_C( 51), INT8_C( 123), -INT8_C( 75), INT8_C( 57), -INT8_C( 85) } }, { { INT8_C( 91), INT8_C( 85), -INT8_C( 36), INT8_C( 122), INT8_C( 85), INT8_C( 6), INT8_C( 23), INT8_C( 107), -INT8_C( 33), -INT8_C( 29), INT8_C( 109), INT8_C( 99), -INT8_C( 122), -INT8_C( 12), INT8_C( 30), INT8_C( 53) }, { { -INT8_C( 16), INT8_C( 102), INT8_C( 95), -INT8_C( 55), -INT8_C( 24), -INT8_C( 95), -INT8_C( 91), INT8_C( 42), -INT8_C( 115), -INT8_C( 106), INT8_C( 80), INT8_C( 74), -INT8_C( 88), INT8_C( 50), INT8_C( 122), INT8_C( 3) }, { -INT8_C( 121), INT8_C( 87), INT8_C( 125), -INT8_C( 36), INT8_C( 93), -INT8_C( 107), INT8_C( 72), INT8_C( 60), INT8_C( 120), -INT8_C( 75), -INT8_C( 96), -INT8_C( 2), -INT8_C( 87), -INT8_C( 66), INT8_C( 52), -INT8_C( 102) }, { INT8_C( 36), -INT8_C( 109), INT8_C( 99), INT8_C( 13), INT8_C( 52), INT8_C( 8), INT8_C( 55), -INT8_C( 62), -INT8_C( 98), -INT8_C( 121), INT8_C( 12), INT8_C( 71), -INT8_C( 70), -INT8_C( 122), INT8_C( 74), INT8_C( 65) } }, { UINT8_C( 29), UINT8_C( 8), UINT8_C( 30), UINT8_C( 10), UINT8_C( 93), UINT8_C( 6), UINT8_C( 23), UINT8_C(213), UINT8_C( 27), UINT8_C( 23), UINT8_C(211), UINT8_C( 5), UINT8_C( 21), UINT8_C( 7), UINT8_C( 47), UINT8_C( 9) }, { -INT8_C( 66), -INT8_C( 115), INT8_C( 52), INT8_C( 80), INT8_C( 85), -INT8_C( 91), INT8_C( 60), INT8_C( 107), -INT8_C( 2), INT8_C( 60), INT8_C( 109), -INT8_C( 95), -INT8_C( 107), INT8_C( 42), INT8_C( 65), -INT8_C( 106) } }, { { -INT8_C( 61), -INT8_C( 33), INT8_C( 60), INT8_C( 32), INT8_C( 69), -INT8_C( 77), -INT8_C( 11), INT8_C( 96), -INT8_C( 54), -INT8_C( 56), INT8_C( 37), -INT8_C( 97), -INT8_C( 48), -INT8_C( 124), -INT8_C( 104), INT8_C( 107) }, { { INT8_C( 71), -INT8_C( 97), INT8_C( 58), INT8_C( 18), -INT8_C( 35), -INT8_C( 52), INT8_C( 123), -INT8_C( 94), INT8_C( 105), INT8_C( 44), INT8_C( 34), -INT8_C( 115), INT8_C( 39), -INT8_C( 29), -INT8_C( 113), -INT8_C( 22) }, { -INT8_C( 62), -INT8_C( 53), INT8_C( 10), INT8_C( 7), INT8_C( 126), -INT8_C( 1), INT8_C( 103), INT8_C( 72), -INT8_C( 57), -INT8_C( 115), -INT8_C( 25), -INT8_C( 105), INT8_C( 17), INT8_MAX, INT8_C( 2), INT8_C( 88) }, { INT8_C( 30), INT8_C( 61), INT8_C( 106), -INT8_C( 5), INT8_C( 9), -INT8_C( 26), -INT8_C( 98), INT8_C( 114), INT8_C( 18), -INT8_C( 64), INT8_C( 0), INT8_C( 57), -INT8_C( 93), -INT8_C( 113), INT8_C( 35), INT8_C( 101) } }, { UINT8_C( 42), UINT8_C( 45), UINT8_C(108), UINT8_C( 24), UINT8_C( 44), UINT8_C( 19), UINT8_C( 32), UINT8_C( 3), UINT8_C( 96), UINT8_C( 7), UINT8_C( 43), UINT8_C( 18), UINT8_C( 38), UINT8_C(141), UINT8_C( 10), UINT8_C(165) }, { INT8_C( 0), -INT8_C( 113), INT8_C( 60), -INT8_C( 57), -INT8_C( 93), INT8_C( 7), INT8_C( 30), INT8_C( 18), -INT8_C( 54), -INT8_C( 94), INT8_C( 57), INT8_C( 10), -INT8_C( 98), -INT8_C( 124), INT8_C( 34), INT8_C( 107) } }, { { -INT8_C( 74), INT8_C( 114), INT8_C( 7), -INT8_C( 30), INT8_C( 70), INT8_C( 39), -INT8_C( 43), -INT8_C( 90), INT8_C( 46), INT8_C( 96), INT8_C( 24), -INT8_C( 76), -INT8_C( 18), -INT8_C( 29), INT8_C( 89), -INT8_C( 72) }, { { INT8_C( 24), -INT8_C( 6), -INT8_C( 116), INT8_C( 51), INT8_C( 56), -INT8_C( 46), INT8_C( 96), INT8_C( 55), INT8_C( 24), -INT8_C( 58), -INT8_C( 40), -INT8_C( 19), INT8_C( 79), -INT8_C( 33), INT8_C( 28), INT8_C( 5) }, { INT8_C( 81), INT8_C( 35), -INT8_C( 25), -INT8_C( 105), INT8_C( 74), -INT8_C( 68), INT8_C( 62), INT8_C( 120), INT8_C( 29), INT8_C( 86), INT8_C( 44), INT8_C( 11), INT8_C( 57), -INT8_C( 122), -INT8_C( 61), INT8_C( 81) }, { INT8_MIN, INT8_C( 79), -INT8_C( 124), -INT8_C( 72), INT8_C( 33), -INT8_C( 28), -INT8_C( 17), INT8_C( 57), -INT8_C( 86), -INT8_C( 56), INT8_C( 38), -INT8_C( 7), -INT8_C( 89), INT8_C( 66), -INT8_C( 2), -INT8_C( 8) } }, { UINT8_C( 5), UINT8_C( 37), UINT8_C( 0), UINT8_C( 31), UINT8_C(162), UINT8_C( 14), UINT8_C( 39), UINT8_C( 47), UINT8_C( 36), UINT8_C( 36), UINT8_C( 10), UINT8_C( 46), UINT8_C( 26), UINT8_C( 45), UINT8_C( 31), UINT8_C( 42) }, { -INT8_C( 46), -INT8_C( 28), INT8_C( 24), INT8_C( 81), INT8_C( 70), INT8_C( 28), INT8_C( 57), -INT8_C( 8), INT8_C( 33), INT8_C( 33), -INT8_C( 40), -INT8_C( 2), INT8_C( 44), INT8_C( 66), INT8_C( 81), INT8_C( 38) } }, { { -INT8_C( 96), -INT8_C( 7), -INT8_C( 74), INT8_C( 66), -INT8_C( 57), -INT8_C( 35), INT8_C( 1), -INT8_C( 20), INT8_C( 49), -INT8_C( 53), INT8_C( 74), INT8_C( 11), INT8_C( 89), -INT8_C( 7), INT8_C( 101), INT8_C( 54) }, { { INT8_C( 45), INT8_C( 120), INT8_C( 52), INT8_C( 70), INT8_C( 122), INT8_C( 108), INT8_C( 9), INT8_C( 68), -INT8_C( 53), -INT8_C( 59), -INT8_C( 75), INT8_C( 108), INT8_MIN, INT8_C( 30), INT8_C( 114), INT8_C( 33) }, { INT8_C( 24), INT8_C( 40), INT8_C( 99), -INT8_C( 33), INT8_C( 6), INT8_C( 101), -INT8_C( 53), INT8_C( 55), INT8_C( 48), INT8_C( 21), INT8_C( 67), -INT8_C( 119), INT8_C( 15), -INT8_C( 88), -INT8_C( 65), INT8_C( 60) }, { INT8_C( 32), -INT8_C( 12), -INT8_C( 126), -INT8_C( 102), INT8_C( 96), -INT8_C( 117), -INT8_C( 34), INT8_C( 43), INT8_C( 81), -INT8_C( 109), -INT8_C( 105), -INT8_C( 47), -INT8_C( 78), INT8_C( 10), -INT8_C( 14), -INT8_C( 54) } }, { UINT8_C( 2), UINT8_C( 38), UINT8_C( 25), UINT8_C( 8), UINT8_C( 43), UINT8_C( 21), UINT8_C(112), UINT8_C( 43), UINT8_C(138), UINT8_C( 35), UINT8_C( 21), UINT8_C( 9), UINT8_C( 43), UINT8_C( 4), UINT8_C(214), UINT8_C( 28) }, { INT8_C( 52), -INT8_C( 34), INT8_C( 21), -INT8_C( 53), -INT8_C( 47), INT8_C( 101), INT8_C( 1), -INT8_C( 47), INT8_C( 49), -INT8_C( 102), INT8_C( 101), -INT8_C( 59), -INT8_C( 47), INT8_C( 122), INT8_C( 101), INT8_C( 15) } }, { { INT8_C( 79), -INT8_C( 82), -INT8_C( 63), INT8_C( 10), INT8_C( 35), INT8_C( 49), -INT8_C( 11), -INT8_C( 83), -INT8_C( 28), INT8_C( 106), INT8_C( 71), INT8_C( 63), -INT8_C( 97), INT8_C( 29), -INT8_C( 69), -INT8_C( 57) }, { { INT8_C( 117), -INT8_C( 46), INT8_C( 80), INT8_C( 89), -INT8_C( 57), INT8_C( 5), -INT8_C( 114), INT8_C( 79), INT8_C( 81), -INT8_C( 107), -INT8_C( 118), -INT8_C( 89), -INT8_C( 114), -INT8_C( 114), INT8_C( 47), -INT8_C( 35) }, { INT8_C( 60), -INT8_C( 16), -INT8_C( 25), INT8_C( 95), INT8_C( 33), -INT8_C( 36), INT8_C( 13), INT8_C( 5), INT8_C( 71), INT8_C( 84), INT8_C( 69), -INT8_C( 26), INT8_C( 113), INT8_C( 0), -INT8_C( 83), -INT8_C( 26) }, { -INT8_C( 46), -INT8_C( 2), INT8_C( 64), -INT8_C( 103), INT8_C( 3), -INT8_C( 50), -INT8_C( 23), INT8_C( 84), INT8_C( 99), INT8_C( 115), -INT8_C( 5), -INT8_C( 15), INT8_C( 1), INT8_C( 42), -INT8_C( 50), INT8_C( 62) } }, { UINT8_C( 27), UINT8_C( 37), UINT8_C(157), UINT8_C( 12), UINT8_C(146), UINT8_C(170), UINT8_C( 18), UINT8_C( 25), UINT8_C( 14), UINT8_C( 39), UINT8_C(191), UINT8_C(111), UINT8_C( 39), UINT8_C( 12), UINT8_C( 38), UINT8_C( 42) }, { -INT8_C( 26), -INT8_C( 50), -INT8_C( 63), -INT8_C( 114), INT8_C( 35), INT8_C( 49), -INT8_C( 25), INT8_C( 84), INT8_C( 47), INT8_C( 84), INT8_C( 71), INT8_C( 63), INT8_C( 84), -INT8_C( 114), -INT8_C( 23), -INT8_C( 5) } }, { { INT8_C( 61), INT8_C( 44), INT8_C( 62), -INT8_C( 49), -INT8_C( 41), INT8_MIN, -INT8_C( 88), -INT8_C( 43), INT8_C( 7), INT8_C( 103), INT8_C( 69), -INT8_C( 113), -INT8_C( 44), -INT8_C( 101), -INT8_C( 23), INT8_C( 62) }, { { INT8_C( 49), -INT8_C( 36), -INT8_C( 84), -INT8_C( 107), -INT8_C( 71), INT8_C( 109), INT8_C( 93), INT8_C( 8), INT8_C( 42), INT8_C( 23), INT8_C( 89), INT8_C( 17), -INT8_C( 97), -INT8_C( 24), INT8_C( 19), -INT8_C( 36) }, { INT8_C( 21), INT8_C( 81), -INT8_C( 84), -INT8_C( 20), -INT8_C( 46), INT8_C( 84), -INT8_C( 63), -INT8_C( 39), -INT8_C( 68), INT8_C( 6), INT8_C( 104), -INT8_C( 112), -INT8_C( 95), INT8_C( 81), -INT8_C( 50), -INT8_C( 46) }, { INT8_C( 46), INT8_C( 122), INT8_C( 104), -INT8_C( 25), -INT8_C( 24), -INT8_C( 59), -INT8_C( 17), INT8_C( 18), -INT8_C( 36), INT8_C( 73), INT8_C( 35), INT8_C( 123), INT8_C( 49), INT8_C( 54), INT8_C( 88), INT8_C( 70) } }, { UINT8_C( 39), UINT8_C( 4), UINT8_C( 2), UINT8_C( 41), UINT8_C( 88), UINT8_C(244), UINT8_C( 3), UINT8_C( 20), UINT8_C( 10), UINT8_C( 11), UINT8_C( 20), UINT8_C( 12), UINT8_C(237), UINT8_C(115), UINT8_C(110), UINT8_C( 27) }, { INT8_C( 18), -INT8_C( 71), -INT8_C( 84), INT8_C( 73), -INT8_C( 41), INT8_MIN, -INT8_C( 107), -INT8_C( 46), INT8_C( 89), INT8_C( 17), -INT8_C( 46), -INT8_C( 97), -INT8_C( 44), -INT8_C( 101), -INT8_C( 23), -INT8_C( 112) } }, { { INT8_C( 80), -INT8_C( 27), INT8_C( 33), -INT8_C( 88), -INT8_C( 39), INT8_C( 84), -INT8_C( 67), -INT8_C( 45), -INT8_C( 16), INT8_C( 97), INT8_C( 111), -INT8_C( 35), -INT8_C( 44), -INT8_C( 34), -INT8_C( 8), -INT8_C( 62) }, { { -INT8_C( 76), -INT8_C( 6), -INT8_C( 105), INT8_C( 80), -INT8_C( 21), INT8_MAX, -INT8_C( 55), INT8_C( 38), -INT8_C( 119), -INT8_C( 67), -INT8_C( 110), -INT8_C( 54), INT8_C( 9), INT8_C( 68), -INT8_C( 110), INT8_C( 89) }, { INT8_C( 41), -INT8_C( 77), INT8_C( 1), INT8_C( 2), INT8_C( 8), -INT8_C( 66), -INT8_C( 42), -INT8_C( 8), INT8_C( 32), INT8_C( 69), -INT8_C( 43), -INT8_C( 12), INT8_C( 35), -INT8_C( 51), -INT8_C( 74), -INT8_C( 40) }, { -INT8_C( 57), INT8_C( 78), INT8_C( 40), -INT8_C( 78), -INT8_C( 51), -INT8_C( 15), -INT8_C( 40), INT8_C( 86), -INT8_C( 82), INT8_C( 106), INT8_C( 32), -INT8_C( 73), -INT8_C( 81), -INT8_C( 78), INT8_C( 16), -INT8_C( 40) } }, { UINT8_C( 6), UINT8_C( 18), UINT8_C( 27), UINT8_C(110), UINT8_C(208), UINT8_C( 33), UINT8_C( 6), UINT8_C(240), UINT8_C( 6), UINT8_C( 11), UINT8_C(229), UINT8_C( 26), UINT8_C( 8), UINT8_C(155), UINT8_C( 2), UINT8_C( 15) }, { -INT8_C( 55), INT8_C( 1), -INT8_C( 12), -INT8_C( 88), -INT8_C( 39), INT8_C( 78), -INT8_C( 55), -INT8_C( 45), -INT8_C( 55), -INT8_C( 54), INT8_C( 111), -INT8_C( 43), -INT8_C( 119), -INT8_C( 34), -INT8_C( 105), INT8_C( 89) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16x3_t t; t.val[0] = simde_vld1q_s8(test_vec[i].t[0]); t.val[1] = simde_vld1q_s8(test_vec[i].t[1]); t.val[2] = simde_vld1q_s8(test_vec[i].t[2]); simde_uint8x16_t idx = simde_vld1q_u8(test_vec[i].idx); simde_int8x16_t r = simde_vqtbx3q_s8(a, t, idx); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); simde_int8x16x3_t t = simde_test_arm_neon_random_i8x16x3(); simde_uint8x16_private idx_ = simde_uint8x16_to_private(simde_test_arm_neon_random_u8x16()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] %= 48; } } simde_uint8x16_t idx = simde_uint8x16_from_private(idx_); simde_int8x16_t r = simde_vqtbx3q_s8(a, t, idx); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16x3(2, t, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqtbx3q_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 uint8_t a[16]; SIMDE_ALIGN_TO_16 uint8_t t[3][16]; SIMDE_ALIGN_TO_16 uint8_t idx[16]; SIMDE_ALIGN_TO_16 uint8_t r[16]; } test_vec[] = { { { UINT8_C(132), UINT8_C( 35), UINT8_C(180), UINT8_C( 82), UINT8_C(230), UINT8_C(115), UINT8_C( 84), UINT8_C(186), UINT8_C(238), UINT8_C( 5), UINT8_C(241), UINT8_C(182), UINT8_C(118), UINT8_C( 79), UINT8_C(195), UINT8_C(246) }, { { UINT8_C(244), UINT8_C(191), UINT8_C( 4), UINT8_C( 81), UINT8_C(216), UINT8_C(144), UINT8_C(137), UINT8_C(173), UINT8_C(245), UINT8_C(182), UINT8_C(128), UINT8_C(141), UINT8_C( 44), UINT8_C( 19), UINT8_C(214), UINT8_C(176) }, { UINT8_C( 54), UINT8_C(138), UINT8_C( 2), UINT8_C( 28), UINT8_C(254), UINT8_C( 87), UINT8_C(214), UINT8_C(236), UINT8_C( 92), UINT8_C(199), UINT8_C(163), UINT8_C(210), UINT8_C( 23), UINT8_C(102), UINT8_C(200), UINT8_C( 11) }, { UINT8_C( 37), UINT8_C(205), UINT8_C( 92), UINT8_C(253), UINT8_C( 93), UINT8_C(230), UINT8_C(170), UINT8_C( 83), UINT8_C(156), UINT8_C( 43), UINT8_C(224), UINT8_C(200), UINT8_C( 62), UINT8_C(182), UINT8_C(120), UINT8_C(116) } }, { UINT8_C( 16), UINT8_C( 27), UINT8_C( 0), UINT8_C( 14), UINT8_C( 18), UINT8_C( 7), UINT8_C( 43), UINT8_C( 46), UINT8_C( 46), UINT8_C( 14), UINT8_C( 0), UINT8_C( 21), UINT8_C( 52), UINT8_C( 8), UINT8_C( 32), UINT8_C( 41) }, { UINT8_C( 54), UINT8_C(210), UINT8_C(244), UINT8_C(214), UINT8_C( 2), UINT8_C(173), UINT8_C(200), UINT8_C(120), UINT8_C(120), UINT8_C(214), UINT8_C(244), UINT8_C( 87), UINT8_C(118), UINT8_C(245), UINT8_C( 37), UINT8_C( 43) } }, { { UINT8_C(234), UINT8_C(110), UINT8_C( 91), UINT8_C(188), UINT8_C(213), UINT8_C(134), UINT8_C(234), UINT8_C( 4), UINT8_C( 84), UINT8_C(234), UINT8_C( 73), UINT8_C(136), UINT8_C(179), UINT8_C(154), UINT8_C(225), UINT8_C( 72) }, { { UINT8_C( 71), UINT8_C( 55), UINT8_C( 59), UINT8_C(218), UINT8_C( 56), UINT8_C(129), UINT8_C( 9), UINT8_C(100), UINT8_C(167), UINT8_C( 0), UINT8_C(206), UINT8_C(131), UINT8_C(111), UINT8_C(172), UINT8_C(160), UINT8_C( 90) }, { UINT8_C( 27), UINT8_C(251), UINT8_C( 22), UINT8_C(240), UINT8_C(129), UINT8_C( 1), UINT8_C(244), UINT8_C(213), UINT8_C(235), UINT8_C( 62), UINT8_C( 93), UINT8_C(158), UINT8_C(216), UINT8_C( 62), UINT8_C(231), UINT8_C( 31) }, { UINT8_C(117), UINT8_C( 34), UINT8_C(249), UINT8_C(174), UINT8_C(164), UINT8_C( 2), UINT8_C( 18), UINT8_C( 75), UINT8_C( 2), UINT8_C(225), UINT8_C(207), UINT8_C(113), UINT8_C(141), UINT8_C(111), UINT8_C(203), UINT8_C(168) } }, { UINT8_C( 10), UINT8_C( 34), UINT8_C(153), UINT8_C(235), UINT8_C(227), UINT8_C(141), UINT8_C(192), UINT8_C( 14), UINT8_C( 11), UINT8_C( 29), UINT8_C( 13), UINT8_C( 19), UINT8_C( 43), UINT8_C( 36), UINT8_C(194), UINT8_C( 16) }, { UINT8_C(206), UINT8_C(249), UINT8_C( 91), UINT8_C(188), UINT8_C(213), UINT8_C(134), UINT8_C(234), UINT8_C(160), UINT8_C(131), UINT8_C( 62), UINT8_C(172), UINT8_C(240), UINT8_C(113), UINT8_C(164), UINT8_C(225), UINT8_C( 27) } }, { { UINT8_C(222), UINT8_C( 65), UINT8_C(249), UINT8_C(193), UINT8_C(206), UINT8_C(185), UINT8_C(144), UINT8_C(154), UINT8_C(214), UINT8_C(253), UINT8_C( 61), UINT8_C( 49), UINT8_C( 81), UINT8_C( 0), UINT8_C( 1), UINT8_C(199) }, { { UINT8_C(187), UINT8_C(128), UINT8_C(226), UINT8_C(121), UINT8_C( 17), UINT8_C( 72), UINT8_C( 56), UINT8_C(131), UINT8_C(125), UINT8_C(105), UINT8_C(130), UINT8_C( 33), UINT8_C(102), UINT8_C( 42), UINT8_C( 47), UINT8_C( 68) }, { UINT8_C(107), UINT8_C( 40), UINT8_C( 6), UINT8_C( 58), UINT8_C(225), UINT8_C(150), UINT8_C(212), UINT8_C(183), UINT8_C(147), UINT8_C( 17), UINT8_C(232), UINT8_C(228), UINT8_C( 17), UINT8_C(233), UINT8_C(171), UINT8_C(205) }, { UINT8_C(105), UINT8_C(141), UINT8_C( 70), UINT8_C(122), UINT8_C(213), UINT8_C(126), UINT8_C(253), UINT8_C( 82), UINT8_C(232), UINT8_C(128), UINT8_C(115), UINT8_C( 78), UINT8_C(170), UINT8_C(162), UINT8_C(146), UINT8_C( 22) } }, { UINT8_C( 10), UINT8_C( 8), UINT8_C( 32), UINT8_C(171), UINT8_C( 46), UINT8_C( 36), UINT8_C( 98), UINT8_C(193), UINT8_C( 5), UINT8_C( 26), UINT8_C( 21), UINT8_C( 23), UINT8_C( 52), UINT8_C( 81), UINT8_C( 20), UINT8_C( 13) }, { UINT8_C(130), UINT8_C(125), UINT8_C(105), UINT8_C(193), UINT8_C(146), UINT8_C(213), UINT8_C(144), UINT8_C(154), UINT8_C( 72), UINT8_C(232), UINT8_C(150), UINT8_C(183), UINT8_C( 81), UINT8_C( 0), UINT8_C(225), UINT8_C( 42) } }, { { UINT8_C( 57), UINT8_C(166), UINT8_C(146), UINT8_C(104), UINT8_C(202), UINT8_C(245), UINT8_C( 41), UINT8_MAX, UINT8_C( 63), UINT8_C(207), UINT8_C( 70), UINT8_C(115), UINT8_C( 32), UINT8_C( 90), UINT8_C( 17), UINT8_C(254) }, { { UINT8_C(180), UINT8_C( 41), UINT8_C(178), UINT8_C(141), UINT8_C( 62), UINT8_C(185), UINT8_C( 77), UINT8_C(212), UINT8_C( 51), UINT8_C( 92), UINT8_C( 20), UINT8_C( 79), UINT8_C(253), UINT8_C(106), UINT8_C( 54), UINT8_C( 54) }, { UINT8_C( 16), UINT8_C(201), UINT8_C(158), UINT8_C(218), UINT8_C(190), UINT8_C(200), UINT8_C(217), UINT8_C(253), UINT8_C(151), UINT8_C( 32), UINT8_C(113), UINT8_C(183), UINT8_C(122), UINT8_C(130), UINT8_C(181), UINT8_C( 47) }, { UINT8_C(171), UINT8_C(104), UINT8_C(188), UINT8_C(233), UINT8_C( 33), UINT8_C( 9), UINT8_C(189), UINT8_C( 84), UINT8_C(101), UINT8_C(209), UINT8_C(163), UINT8_C( 98), UINT8_C( 59), UINT8_C(218), UINT8_C(153), UINT8_C( 75) } }, { UINT8_C( 19), UINT8_C( 7), UINT8_C( 37), UINT8_C( 1), UINT8_C( 15), UINT8_C( 15), UINT8_C( 46), UINT8_C( 6), UINT8_C( 31), UINT8_C( 15), UINT8_C( 29), UINT8_C(153), UINT8_C( 33), UINT8_C( 3), UINT8_C( 8), UINT8_C(252) }, { UINT8_C(218), UINT8_C(212), UINT8_C( 9), UINT8_C( 41), UINT8_C( 54), UINT8_C( 54), UINT8_C(153), UINT8_C( 77), UINT8_C( 47), UINT8_C( 54), UINT8_C(130), UINT8_C(115), UINT8_C(104), UINT8_C(141), UINT8_C( 51), UINT8_C(254) } }, { { UINT8_C( 38), UINT8_C( 33), UINT8_C( 97), UINT8_C( 38), UINT8_C( 32), UINT8_C(192), UINT8_C(188), UINT8_C( 63), UINT8_C(143), UINT8_C( 10), UINT8_C(217), UINT8_C(225), UINT8_C( 13), UINT8_C(161), UINT8_C(221), UINT8_C(120) }, { { UINT8_C( 38), UINT8_C(195), UINT8_C( 4), UINT8_C(180), UINT8_C(103), UINT8_C(228), UINT8_C(167), UINT8_C(220), UINT8_C(103), UINT8_C(253), UINT8_C(140), UINT8_C(197), UINT8_C(236), UINT8_C(136), UINT8_C(197), UINT8_C( 19) }, { UINT8_C(170), UINT8_C( 39), UINT8_C( 57), UINT8_C(202), UINT8_C(231), UINT8_C(245), UINT8_C( 10), UINT8_C(118), UINT8_MAX, UINT8_C(227), UINT8_C( 87), UINT8_C( 12), UINT8_C(132), UINT8_C( 53), UINT8_C(132), UINT8_C(170) }, { UINT8_C(248), UINT8_C(136), UINT8_C( 94), UINT8_C( 95), UINT8_C(108), UINT8_C( 6), UINT8_C( 59), UINT8_C(212), UINT8_C( 3), UINT8_C(200), UINT8_C(153), UINT8_C(240), UINT8_C( 80), UINT8_C( 94), UINT8_C( 3), UINT8_C(250) } }, { UINT8_C( 37), UINT8_C( 12), UINT8_C(197), UINT8_C(108), UINT8_C( 1), UINT8_C( 15), UINT8_C(227), UINT8_C( 1), UINT8_C( 34), UINT8_C( 10), UINT8_C( 61), UINT8_C( 6), UINT8_C(111), UINT8_C(194), UINT8_C( 33), UINT8_C( 8) }, { UINT8_C( 6), UINT8_C(236), UINT8_C( 97), UINT8_C( 38), UINT8_C(195), UINT8_C( 19), UINT8_C(188), UINT8_C(195), UINT8_C( 94), UINT8_C(140), UINT8_C(217), UINT8_C(167), UINT8_C( 13), UINT8_C(161), UINT8_C(136), UINT8_C(103) } }, { { UINT8_C(120), UINT8_C(219), UINT8_C(116), UINT8_C(169), UINT8_C(170), UINT8_C( 87), UINT8_C(218), UINT8_C( 92), UINT8_C(146), UINT8_C( 24), UINT8_C(146), UINT8_C( 1), UINT8_C(218), UINT8_C(115), UINT8_C(105), UINT8_C( 36) }, { { UINT8_C(179), UINT8_C( 49), UINT8_C(219), UINT8_C(248), UINT8_C( 52), UINT8_C(102), UINT8_C( 65), UINT8_MAX, UINT8_C(138), UINT8_C(122), UINT8_C( 26), UINT8_C( 13), UINT8_C(182), UINT8_C( 48), UINT8_C( 21), UINT8_C( 46) }, { UINT8_C( 11), UINT8_C(137), UINT8_C(216), UINT8_C(181), UINT8_C(225), UINT8_C(178), UINT8_C( 17), UINT8_C(115), UINT8_C(202), UINT8_C(164), UINT8_C(116), UINT8_C(164), UINT8_C( 23), UINT8_C(222), UINT8_C(201), UINT8_C(202) }, { UINT8_C( 15), UINT8_C(164), UINT8_C(195), UINT8_C( 67), UINT8_C( 11), UINT8_C( 4), UINT8_C( 66), UINT8_C(149), UINT8_C(127), UINT8_C( 92), UINT8_C(162), UINT8_C( 53), UINT8_C(141), UINT8_C(183), UINT8_C(100), UINT8_C(152) } }, { UINT8_C( 65), UINT8_C( 60), UINT8_C( 30), UINT8_C( 34), UINT8_C( 46), UINT8_C( 47), UINT8_C( 5), UINT8_C(185), UINT8_C( 3), UINT8_C( 9), UINT8_C( 93), UINT8_C( 27), UINT8_C(231), UINT8_C( 38), UINT8_C( 37), UINT8_C( 6) }, { UINT8_C(120), UINT8_C(219), UINT8_C(201), UINT8_C(195), UINT8_C(100), UINT8_C(152), UINT8_C(102), UINT8_C( 92), UINT8_C(248), UINT8_C(122), UINT8_C(146), UINT8_C(164), UINT8_C(218), UINT8_C( 66), UINT8_C( 4), UINT8_C( 65) } }, { { UINT8_C( 1), UINT8_C( 75), UINT8_C( 40), UINT8_C(240), UINT8_C(171), UINT8_C(189), UINT8_C(169), UINT8_C(174), UINT8_C(199), UINT8_C( 6), UINT8_C(201), UINT8_C(174), UINT8_C( 45), UINT8_C(175), UINT8_C(165), UINT8_C(248) }, { { UINT8_C( 87), UINT8_C(222), UINT8_C(206), UINT8_C( 4), UINT8_C( 90), UINT8_C( 57), UINT8_C( 48), UINT8_C( 50), UINT8_C( 71), UINT8_C(146), UINT8_C(151), UINT8_C( 13), UINT8_C( 87), UINT8_C(148), UINT8_C( 19), UINT8_C( 89) }, { UINT8_C(224), UINT8_C( 60), UINT8_C( 73), UINT8_C(139), UINT8_C(249), UINT8_C(242), UINT8_C( 57), UINT8_C(192), UINT8_C(248), UINT8_C( 3), UINT8_C(111), UINT8_C( 37), UINT8_C(178), UINT8_C( 20), UINT8_C( 29), UINT8_C( 9) }, { UINT8_C(242), UINT8_C(235), UINT8_C( 14), UINT8_C( 76), UINT8_C( 37), UINT8_C( 62), UINT8_C(126), UINT8_C(108), UINT8_C(208), UINT8_C( 21), UINT8_C(121), UINT8_C( 40), UINT8_C(170), UINT8_C(141), UINT8_C(129), UINT8_C(138) } }, { UINT8_C( 9), UINT8_C( 10), UINT8_C( 21), UINT8_C(194), UINT8_C( 44), UINT8_C( 78), UINT8_C(131), UINT8_C( 36), UINT8_C( 33), UINT8_C( 2), UINT8_C( 26), UINT8_C( 3), UINT8_C( 6), UINT8_C( 7), UINT8_C( 13), UINT8_C( 8) }, { UINT8_C(146), UINT8_C(151), UINT8_C(242), UINT8_C(240), UINT8_C(170), UINT8_C(189), UINT8_C(169), UINT8_C( 37), UINT8_C(235), UINT8_C(206), UINT8_C(111), UINT8_C( 4), UINT8_C( 48), UINT8_C( 50), UINT8_C(148), UINT8_C( 71) } }, { { UINT8_C(157), UINT8_C( 34), UINT8_C( 6), UINT8_C( 89), UINT8_C(112), UINT8_C(137), UINT8_C( 13), UINT8_C(194), UINT8_C(123), UINT8_C(231), UINT8_C(197), UINT8_C(129), UINT8_C(223), UINT8_C(210), UINT8_C(122), UINT8_C(194) }, { { UINT8_C(237), UINT8_C(191), UINT8_C(202), UINT8_C( 71), UINT8_C(130), UINT8_C( 62), UINT8_C(113), UINT8_C( 91), UINT8_C( 44), UINT8_C(195), UINT8_C(222), UINT8_C(167), UINT8_C(150), UINT8_C(235), UINT8_C(235), UINT8_C( 51) }, { UINT8_C( 13), UINT8_C(242), UINT8_C(140), UINT8_C(126), UINT8_C(123), UINT8_C(153), UINT8_C( 64), UINT8_C(247), UINT8_C(129), UINT8_C( 5), UINT8_C(120), UINT8_C( 96), UINT8_C(216), UINT8_C(242), UINT8_C( 34), UINT8_C(197) }, { UINT8_C(177), UINT8_C(236), UINT8_C( 12), UINT8_C( 52), UINT8_C( 42), UINT8_C(125), UINT8_C(143), UINT8_C( 87), UINT8_C( 64), UINT8_C(110), UINT8_C(254), UINT8_C(214), UINT8_C( 89), UINT8_C(234), UINT8_C( 9), UINT8_C(103) } }, { UINT8_C( 28), UINT8_C(149), UINT8_C( 37), UINT8_C( 39), UINT8_C( 47), UINT8_C( 37), UINT8_C( 78), UINT8_C( 32), UINT8_C( 42), UINT8_C( 7), UINT8_C( 16), UINT8_C( 2), UINT8_C(185), UINT8_C( 2), UINT8_C(200), UINT8_C( 11) }, { UINT8_C(216), UINT8_C( 34), UINT8_C(125), UINT8_C( 87), UINT8_C(103), UINT8_C(125), UINT8_C( 13), UINT8_C(177), UINT8_C(254), UINT8_C( 91), UINT8_C( 13), UINT8_C(202), UINT8_C(223), UINT8_C(202), UINT8_C(122), UINT8_C(167) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16x3_t t; t.val[0] = simde_vld1q_u8(test_vec[i].t[0]); t.val[1] = simde_vld1q_u8(test_vec[i].t[1]); t.val[2] = simde_vld1q_u8(test_vec[i].t[2]); simde_uint8x16_t idx = simde_vld1q_u8(test_vec[i].idx); simde_uint8x16_t r = simde_vqtbx3q_u8(a, t, idx); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); simde_uint8x16x3_t t = simde_test_arm_neon_random_u8x16x3(); simde_uint8x16_private idx_ = simde_uint8x16_to_private(simde_test_arm_neon_random_u8x16()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] %= 48; } } simde_uint8x16_t idx = simde_uint8x16_from_private(idx_); simde_uint8x16_t r = simde_vqtbx3q_u8(a, t, idx); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16x3(2, t, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqtbx4q_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 int8_t a[16]; SIMDE_ALIGN_TO_16 int8_t t[4][16]; SIMDE_ALIGN_TO_16 uint8_t idx[16]; SIMDE_ALIGN_TO_16 int8_t r[16]; } test_vec[] = { { { -INT8_C( 16), INT8_C( 76), INT8_C( 33), -INT8_C( 127), INT8_C( 45), -INT8_C( 19), -INT8_C( 92), -INT8_C( 98), -INT8_C( 78), -INT8_C( 19), INT8_C( 79), INT8_C( 2), -INT8_C( 105), -INT8_C( 38), INT8_C( 18), -INT8_C( 106) }, { { INT8_C( 123), -INT8_C( 62), -INT8_C( 119), -INT8_C( 97), -INT8_C( 113), INT8_C( 91), -INT8_C( 62), INT8_C( 87), INT8_C( 88), INT8_C( 57), -INT8_C( 28), -INT8_C( 92), -INT8_C( 78), -INT8_C( 125), INT8_C( 24), -INT8_C( 94) }, { -INT8_C( 49), INT8_C( 57), INT8_C( 35), -INT8_C( 3), INT8_C( 39), -INT8_C( 57), -INT8_C( 101), -INT8_C( 39), -INT8_C( 75), -INT8_C( 21), -INT8_C( 37), INT8_C( 76), -INT8_C( 59), -INT8_C( 19), -INT8_C( 30), INT8_C( 64) }, { -INT8_C( 80), INT8_C( 108), -INT8_C( 33), INT8_C( 63), -INT8_C( 57), -INT8_C( 95), -INT8_C( 106), INT8_C( 32), -INT8_C( 38), INT8_C( 122), -INT8_C( 60), -INT8_C( 116), -INT8_C( 2), -INT8_C( 36), INT8_C( 46), -INT8_C( 51) }, { INT8_C( 21), INT8_C( 81), -INT8_C( 54), INT8_C( 60), INT8_C( 25), INT8_C( 102), INT8_C( 22), -INT8_C( 50), INT8_C( 81), -INT8_C( 15), INT8_C( 26), INT8_C( 22), -INT8_C( 33), -INT8_C( 3), INT8_C( 86), -INT8_C( 113) } }, { UINT8_C( 41), UINT8_C( 53), UINT8_C(206), UINT8_C( 48), UINT8_C( 23), UINT8_C( 36), UINT8_C( 16), UINT8_C( 49), UINT8_C( 31), UINT8_C( 20), UINT8_C( 62), UINT8_C( 29), UINT8_C( 48), UINT8_C( 44), UINT8_C( 42), UINT8_C( 6) }, { INT8_C( 122), INT8_C( 102), INT8_C( 33), INT8_C( 21), -INT8_C( 39), -INT8_C( 57), -INT8_C( 49), INT8_C( 81), INT8_C( 64), INT8_C( 39), INT8_C( 86), -INT8_C( 19), INT8_C( 21), -INT8_C( 2), -INT8_C( 60), -INT8_C( 62) } }, { { -INT8_C( 51), -INT8_C( 122), INT8_C( 86), -INT8_C( 92), -INT8_C( 22), -INT8_C( 90), INT8_C( 86), -INT8_C( 55), -INT8_C( 69), -INT8_C( 108), -INT8_C( 90), -INT8_C( 85), INT8_C( 0), INT8_C( 81), -INT8_C( 79), -INT8_C( 66) }, { { -INT8_C( 58), -INT8_C( 12), -INT8_C( 107), -INT8_C( 95), INT8_C( 76), INT8_C( 58), -INT8_C( 51), -INT8_C( 106), -INT8_C( 6), INT8_C( 15), -INT8_C( 65), -INT8_C( 74), -INT8_C( 89), INT8_C( 119), -INT8_C( 36), INT8_C( 116) }, { -INT8_C( 3), INT8_C( 50), INT8_C( 25), -INT8_C( 24), -INT8_C( 40), INT8_C( 111), -INT8_C( 79), -INT8_C( 109), INT8_C( 3), INT8_C( 88), INT8_C( 63), INT8_C( 3), -INT8_C( 87), -INT8_C( 16), -INT8_C( 62), INT8_C( 111) }, { -INT8_C( 28), INT8_C( 87), INT8_C( 16), INT8_C( 49), -INT8_C( 110), -INT8_C( 35), -INT8_C( 57), -INT8_C( 116), -INT8_C( 20), -INT8_C( 121), INT8_C( 66), -INT8_C( 109), -INT8_C( 2), INT8_C( 30), INT8_C( 7), -INT8_C( 4) }, { INT8_C( 80), INT8_C( 32), -INT8_C( 28), INT8_C( 41), -INT8_C( 113), -INT8_C( 107), -INT8_C( 68), -INT8_C( 110), -INT8_C( 19), -INT8_C( 5), -INT8_C( 106), -INT8_C( 106), -INT8_C( 20), INT8_C( 88), INT8_C( 5), -INT8_C( 48) } }, { UINT8_C( 47), UINT8_C( 21), UINT8_C( 1), UINT8_C( 65), UINT8_C( 50), UINT8_C( 9), UINT8_C( 13), UINT8_C(222), UINT8_C( 16), UINT8_C( 16), UINT8_C( 49), UINT8_C( 14), UINT8_C( 46), UINT8_C( 57), UINT8_C( 74), UINT8_C( 63) }, { -INT8_C( 4), INT8_C( 111), -INT8_C( 12), -INT8_C( 92), -INT8_C( 28), INT8_C( 15), INT8_C( 119), -INT8_C( 55), -INT8_C( 3), -INT8_C( 3), INT8_C( 32), -INT8_C( 36), INT8_C( 7), -INT8_C( 5), -INT8_C( 79), -INT8_C( 48) } }, { { INT8_C( 99), INT8_C( 30), -INT8_C( 102), INT8_C( 85), -INT8_C( 25), INT8_C( 104), INT8_C( 52), INT8_C( 55), INT8_C( 120), -INT8_C( 91), -INT8_C( 123), -INT8_C( 90), INT8_C( 30), -INT8_C( 48), INT8_C( 37), -INT8_C( 72) }, { { -INT8_C( 2), -INT8_C( 51), -INT8_C( 31), -INT8_C( 62), INT8_C( 50), -INT8_C( 100), INT8_C( 116), -INT8_C( 110), -INT8_C( 18), -INT8_C( 68), -INT8_C( 34), -INT8_C( 105), INT8_C( 9), -INT8_C( 6), -INT8_C( 16), INT8_C( 108) }, { INT8_C( 24), -INT8_C( 117), -INT8_C( 62), -INT8_C( 1), -INT8_C( 13), -INT8_C( 10), INT8_C( 54), INT8_C( 107), -INT8_C( 101), -INT8_C( 68), INT8_C( 17), -INT8_C( 70), -INT8_C( 116), INT8_C( 55), INT8_C( 114), -INT8_C( 118) }, { INT8_C( 4), INT8_C( 83), INT8_C( 77), INT8_C( 54), -INT8_C( 17), -INT8_C( 63), -INT8_C( 56), -INT8_C( 35), INT8_C( 125), -INT8_C( 90), INT8_C( 117), -INT8_C( 122), -INT8_C( 95), INT8_C( 101), -INT8_C( 13), -INT8_C( 71) }, { -INT8_C( 16), -INT8_C( 75), -INT8_C( 71), -INT8_C( 29), -INT8_C( 85), -INT8_C( 17), INT8_C( 78), INT8_C( 70), -INT8_C( 85), INT8_C( 96), INT8_C( 0), INT8_C( 55), -INT8_C( 105), INT8_C( 114), -INT8_C( 62), -INT8_C( 101) } }, { UINT8_C( 5), UINT8_C( 15), UINT8_C( 18), UINT8_C( 53), UINT8_C( 16), UINT8_C(154), UINT8_C( 18), UINT8_C( 13), UINT8_C( 1), UINT8_C( 7), UINT8_C( 19), UINT8_C( 34), UINT8_C( 45), UINT8_C( 6), UINT8_C( 27), UINT8_C( 29) }, { -INT8_C( 100), INT8_C( 108), -INT8_C( 62), -INT8_C( 17), INT8_C( 24), INT8_C( 104), -INT8_C( 62), -INT8_C( 6), -INT8_C( 51), -INT8_C( 110), -INT8_C( 1), INT8_C( 77), INT8_C( 101), INT8_C( 116), -INT8_C( 70), INT8_C( 55) } }, { { -INT8_C( 8), -INT8_C( 12), INT8_C( 90), -INT8_C( 56), -INT8_C( 114), -INT8_C( 19), INT8_C( 21), -INT8_C( 49), -INT8_C( 12), -INT8_C( 24), -INT8_C( 79), INT8_C( 97), -INT8_C( 81), INT8_C( 77), -INT8_C( 65), INT8_C( 42) }, { { -INT8_C( 95), INT8_C( 0), INT8_C( 81), -INT8_C( 27), -INT8_C( 113), -INT8_C( 66), -INT8_C( 43), INT8_MAX, INT8_C( 43), -INT8_C( 4), INT8_C( 5), INT8_C( 11), -INT8_C( 27), INT8_C( 39), -INT8_C( 79), -INT8_C( 35) }, { INT8_C( 27), INT8_C( 11), -INT8_C( 91), -INT8_C( 86), -INT8_C( 8), -INT8_C( 70), INT8_C( 121), -INT8_C( 19), -INT8_C( 94), INT8_C( 43), INT8_C( 78), INT8_C( 81), INT8_C( 120), INT8_C( 13), INT8_C( 124), INT8_C( 25) }, { INT8_C( 13), -INT8_C( 51), -INT8_C( 1), -INT8_C( 99), -INT8_C( 117), -INT8_C( 44), INT8_C( 28), -INT8_C( 74), -INT8_C( 48), INT8_C( 33), -INT8_C( 62), -INT8_C( 75), INT8_C( 73), INT8_C( 115), -INT8_C( 110), INT8_C( 100) }, { INT8_C( 126), INT8_C( 55), INT8_C( 14), INT8_C( 119), -INT8_C( 15), -INT8_C( 120), INT8_C( 100), -INT8_C( 109), -INT8_C( 77), -INT8_C( 78), -INT8_C( 27), INT8_C( 43), -INT8_C( 64), INT8_C( 97), INT8_C( 68), -INT8_C( 51) } }, { UINT8_C( 46), UINT8_C( 3), UINT8_C( 42), UINT8_C( 57), UINT8_C( 23), UINT8_C( 6), UINT8_C( 47), UINT8_C( 39), UINT8_C(168), UINT8_C( 49), UINT8_C( 28), UINT8_C( 49), UINT8_C( 36), UINT8_C( 46), UINT8_C( 21), UINT8_C( 35) }, { -INT8_C( 110), -INT8_C( 27), -INT8_C( 62), -INT8_C( 78), -INT8_C( 19), -INT8_C( 43), INT8_C( 100), -INT8_C( 74), -INT8_C( 12), INT8_C( 55), INT8_C( 120), INT8_C( 55), -INT8_C( 117), -INT8_C( 110), -INT8_C( 70), -INT8_C( 99) } }, { { INT8_C( 82), -INT8_C( 88), INT8_C( 23), INT8_C( 105), INT8_C( 47), -INT8_C( 122), INT8_C( 81), -INT8_C( 41), -INT8_C( 72), -INT8_C( 19), -INT8_C( 56), INT8_C( 92), INT8_C( 28), INT8_C( 29), INT8_MAX, -INT8_C( 127) }, { { -INT8_C( 127), INT8_C( 25), -INT8_C( 40), INT8_C( 109), INT8_C( 23), -INT8_C( 62), INT8_C( 12), -INT8_C( 56), -INT8_C( 111), -INT8_C( 42), INT8_C( 56), -INT8_C( 63), -INT8_C( 27), INT8_C( 118), INT8_C( 31), INT8_C( 55) }, { INT8_C( 31), INT8_C( 54), -INT8_C( 96), INT8_C( 78), -INT8_C( 68), -INT8_C( 15), INT8_C( 37), INT8_C( 116), -INT8_C( 33), -INT8_C( 19), -INT8_C( 47), -INT8_C( 5), INT8_C( 10), INT8_C( 80), INT8_C( 124), -INT8_C( 116) }, { INT8_C( 106), INT8_C( 84), -INT8_C( 7), -INT8_C( 127), INT8_C( 22), INT8_C( 6), INT8_C( 73), -INT8_C( 89), -INT8_C( 36), -INT8_C( 126), INT8_C( 104), -INT8_C( 63), -INT8_C( 8), -INT8_C( 121), -INT8_C( 8), INT8_C( 23) }, { -INT8_C( 67), -INT8_C( 103), INT8_C( 101), INT8_C( 122), -INT8_C( 118), -INT8_C( 118), -INT8_C( 18), INT8_C( 105), INT8_C( 119), -INT8_C( 65), INT8_C( 100), -INT8_C( 126), INT8_C( 16), -INT8_C( 31), INT8_C( 14), INT8_C( 122) } }, { UINT8_C( 53), UINT8_C( 7), UINT8_C( 59), UINT8_C( 12), UINT8_C( 13), UINT8_C( 5), UINT8_C( 51), UINT8_C( 42), UINT8_C( 7), UINT8_C( 28), UINT8_C( 43), UINT8_C(191), UINT8_C(227), UINT8_C( 36), UINT8_C( 23), UINT8_C( 33) }, { -INT8_C( 118), -INT8_C( 56), -INT8_C( 126), -INT8_C( 27), INT8_C( 118), -INT8_C( 62), INT8_C( 122), INT8_C( 104), -INT8_C( 56), INT8_C( 10), -INT8_C( 63), INT8_C( 92), INT8_C( 28), INT8_C( 22), INT8_C( 116), INT8_C( 84) } }, { { -INT8_C( 42), INT8_C( 78), -INT8_C( 8), -INT8_C( 29), -INT8_C( 109), -INT8_C( 21), -INT8_C( 51), INT8_C( 90), INT8_C( 71), INT8_C( 121), INT8_C( 26), INT8_C( 43), INT8_C( 29), -INT8_C( 15), -INT8_C( 52), INT8_C( 90) }, { { INT8_C( 45), -INT8_C( 25), INT8_C( 33), -INT8_C( 12), -INT8_C( 16), INT8_C( 82), INT8_C( 51), -INT8_C( 71), -INT8_C( 24), -INT8_C( 13), -INT8_C( 110), INT8_C( 94), -INT8_C( 62), -INT8_C( 27), INT8_C( 10), -INT8_C( 104) }, { INT8_C( 52), INT8_C( 2), INT8_C( 123), -INT8_C( 57), -INT8_C( 18), INT8_C( 73), INT8_C( 34), INT8_C( 53), -INT8_C( 62), INT8_C( 60), INT8_C( 96), -INT8_C( 33), INT8_C( 45), INT8_C( 44), INT8_C( 57), INT8_C( 90) }, { INT8_C( 19), INT8_C( 90), INT8_C( 79), INT8_C( 4), -INT8_C( 83), -INT8_C( 126), -INT8_C( 67), -INT8_C( 107), INT8_C( 117), INT8_C( 80), -INT8_C( 13), INT8_C( 55), INT8_C( 53), -INT8_C( 2), -INT8_C( 49), INT8_C( 105) }, { INT8_C( 0), INT8_C( 75), INT8_C( 49), -INT8_C( 18), -INT8_C( 108), INT8_C( 83), INT8_C( 36), INT8_C( 86), -INT8_C( 113), -INT8_C( 124), INT8_C( 53), -INT8_C( 68), -INT8_C( 79), INT8_C( 110), INT8_C( 22), -INT8_C( 60) } }, { UINT8_C( 8), UINT8_C(101), UINT8_C( 8), UINT8_C( 53), UINT8_C( 39), UINT8_C( 6), UINT8_C( 10), UINT8_C( 93), UINT8_C( 22), UINT8_C( 62), UINT8_C( 20), UINT8_C( 11), UINT8_C( 60), UINT8_C( 36), UINT8_C( 53), UINT8_C( 60) }, { -INT8_C( 24), INT8_C( 78), -INT8_C( 24), INT8_C( 83), -INT8_C( 107), INT8_C( 51), -INT8_C( 110), INT8_C( 90), INT8_C( 34), INT8_C( 22), -INT8_C( 18), INT8_C( 94), -INT8_C( 79), -INT8_C( 83), INT8_C( 83), -INT8_C( 79) } }, { { -INT8_C( 64), -INT8_C( 47), INT8_C( 122), -INT8_C( 89), INT8_C( 87), -INT8_C( 124), INT8_C( 4), INT8_C( 45), -INT8_C( 126), -INT8_C( 103), INT8_C( 57), INT8_C( 126), -INT8_C( 3), -INT8_C( 82), INT8_C( 123), -INT8_C( 84) }, { { INT8_C( 84), INT8_C( 102), -INT8_C( 17), INT8_C( 77), INT8_C( 117), -INT8_C( 120), -INT8_C( 43), INT8_C( 8), INT8_C( 86), INT8_C( 25), INT8_C( 77), -INT8_C( 110), INT8_C( 115), INT8_C( 86), -INT8_C( 106), INT8_C( 51) }, { INT8_C( 39), INT8_C( 16), -INT8_C( 37), INT8_MAX, -INT8_C( 107), -INT8_C( 33), -INT8_C( 84), INT8_C( 23), INT8_C( 120), -INT8_C( 27), -INT8_C( 106), INT8_C( 117), -INT8_C( 109), INT8_C( 17), INT8_C( 33), -INT8_C( 25) }, { INT8_C( 119), INT8_C( 16), INT8_C( 52), -INT8_C( 20), -INT8_C( 104), INT8_C( 9), -INT8_C( 12), -INT8_C( 18), INT8_C( 34), INT8_C( 65), INT8_MIN, -INT8_C( 106), -INT8_C( 105), INT8_C( 23), -INT8_C( 55), -INT8_C( 65) }, { INT8_C( 39), -INT8_C( 92), INT8_C( 62), -INT8_C( 68), -INT8_C( 124), -INT8_C( 22), -INT8_C( 44), -INT8_C( 4), -INT8_C( 48), INT8_C( 106), INT8_C( 114), INT8_C( 99), INT8_C( 123), -INT8_C( 109), INT8_C( 75), -INT8_C( 14) } }, { UINT8_C(164), UINT8_C( 63), UINT8_C( 30), UINT8_C( 60), UINT8_C( 9), UINT8_C(210), UINT8_C( 43), UINT8_C( 43), UINT8_C( 20), UINT8_C( 43), UINT8_C( 1), UINT8_C( 43), UINT8_C( 2), UINT8_C( 11), UINT8_C( 42), UINT8_C( 42) }, { -INT8_C( 64), -INT8_C( 14), INT8_C( 33), INT8_C( 123), INT8_C( 25), -INT8_C( 124), -INT8_C( 106), -INT8_C( 106), -INT8_C( 107), -INT8_C( 106), INT8_C( 102), -INT8_C( 106), -INT8_C( 17), -INT8_C( 110), INT8_MIN, INT8_MIN } }, { { -INT8_C( 111), INT8_C( 47), INT8_C( 22), INT8_C( 26), INT8_C( 2), INT8_C( 65), -INT8_C( 59), INT8_C( 22), -INT8_C( 20), INT8_C( 7), -INT8_C( 63), -INT8_C( 81), INT8_C( 18), INT8_C( 44), -INT8_C( 103), -INT8_C( 63) }, { { -INT8_C( 44), INT8_C( 63), -INT8_C( 11), INT8_C( 103), -INT8_C( 70), INT8_C( 37), -INT8_C( 54), -INT8_C( 98), -INT8_C( 57), -INT8_C( 111), -INT8_C( 2), -INT8_C( 4), -INT8_C( 94), INT8_C( 79), -INT8_C( 42), INT8_C( 51) }, { INT8_MAX, -INT8_C( 20), INT8_C( 77), -INT8_C( 127), INT8_C( 45), INT8_C( 19), -INT8_C( 105), INT8_C( 25), INT8_C( 26), INT8_C( 88), -INT8_C( 56), INT8_C( 44), -INT8_C( 124), INT8_C( 97), -INT8_C( 19), INT8_C( 89) }, { -INT8_C( 95), -INT8_C( 30), -INT8_C( 64), INT8_C( 91), INT8_C( 7), -INT8_C( 117), -INT8_C( 7), -INT8_C( 50), INT8_C( 28), -INT8_C( 9), -INT8_C( 53), -INT8_C( 66), INT8_C( 71), -INT8_C( 95), -INT8_C( 14), -INT8_C( 58) }, { -INT8_C( 115), INT8_C( 63), INT8_C( 71), -INT8_C( 70), INT8_C( 82), -INT8_C( 34), -INT8_C( 45), INT8_C( 108), INT8_C( 54), -INT8_C( 100), -INT8_C( 104), -INT8_C( 69), -INT8_C( 3), -INT8_C( 122), INT8_C( 20), -INT8_C( 98) } }, { UINT8_C( 40), UINT8_C( 20), UINT8_C( 57), UINT8_C( 48), UINT8_C( 31), UINT8_C( 51), UINT8_C( 62), UINT8_C( 59), UINT8_C( 42), UINT8_C( 9), UINT8_C( 58), UINT8_C( 49), UINT8_C( 42), UINT8_C( 44), UINT8_C( 55), UINT8_C( 55) }, { INT8_C( 28), INT8_C( 45), -INT8_C( 100), -INT8_C( 115), INT8_C( 89), -INT8_C( 70), INT8_C( 20), -INT8_C( 69), -INT8_C( 53), -INT8_C( 111), -INT8_C( 104), INT8_C( 63), -INT8_C( 53), INT8_C( 71), INT8_C( 108), INT8_C( 108) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16x4_t t; t.val[0] = simde_vld1q_s8(test_vec[i].t[0]); t.val[1] = simde_vld1q_s8(test_vec[i].t[1]); t.val[2] = simde_vld1q_s8(test_vec[i].t[2]); t.val[3] = simde_vld1q_s8(test_vec[i].t[3]); simde_uint8x16_t idx = simde_vld1q_u8(test_vec[i].idx); simde_int8x16_t r = simde_vqtbx4q_s8(a, t, idx); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); simde_int8x16x4_t t = simde_test_arm_neon_random_i8x16x4(); simde_uint8x16_private idx_ = simde_uint8x16_to_private(simde_test_arm_neon_random_u8x16()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] &= 63; } } simde_uint8x16_t idx = simde_uint8x16_from_private(idx_); simde_int8x16_t r = simde_vqtbx4q_s8(a, t, idx); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16x4(2, t, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vqtbx4q_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 uint8_t a[16]; SIMDE_ALIGN_TO_16 uint8_t t[4][16]; SIMDE_ALIGN_TO_16 uint8_t idx[16]; SIMDE_ALIGN_TO_16 uint8_t r[16]; } test_vec[] = { { { UINT8_MAX, UINT8_C(228), UINT8_C(105), UINT8_C( 74), UINT8_C(188), UINT8_C(233), UINT8_C( 79), UINT8_C( 18), UINT8_C( 38), UINT8_C(224), UINT8_C(181), UINT8_C(158), UINT8_C(234), UINT8_C( 38), UINT8_C(221), UINT8_C(249) }, { { UINT8_C(252), UINT8_C(221), UINT8_C( 44), UINT8_C(189), UINT8_C(132), UINT8_C( 3), UINT8_C( 13), UINT8_C(107), UINT8_C(169), UINT8_C( 96), UINT8_C( 19), UINT8_C(114), UINT8_C( 96), UINT8_C(213), UINT8_C(203), UINT8_C( 95) }, { UINT8_C(185), UINT8_C( 52), UINT8_C(169), UINT8_C(118), UINT8_C( 29), UINT8_C(248), UINT8_C(136), UINT8_C( 68), UINT8_C(217), UINT8_C( 62), UINT8_C(226), UINT8_C(195), UINT8_C(100), UINT8_C(191), UINT8_C(189), UINT8_C( 96) }, { UINT8_C(157), UINT8_C(233), UINT8_C( 29), UINT8_C( 33), UINT8_C(237), UINT8_C( 42), UINT8_C(140), UINT8_C(150), UINT8_C(138), UINT8_C(159), UINT8_C( 8), UINT8_C(235), UINT8_C(117), UINT8_C(211), UINT8_C( 74), UINT8_C( 46) }, { UINT8_C( 8), UINT8_C(244), UINT8_C(164), UINT8_C( 37), UINT8_C(236), UINT8_C( 45), UINT8_C(105), UINT8_C(197), UINT8_C(107), UINT8_C( 76), UINT8_C(137), UINT8_C(207), UINT8_C( 11), UINT8_C( 70), UINT8_C( 48), UINT8_C(168) } }, { UINT8_C( 47), UINT8_C( 77), UINT8_C( 9), UINT8_C( 28), UINT8_C( 56), UINT8_C( 22), UINT8_C( 50), UINT8_C( 2), UINT8_C( 53), UINT8_C( 59), UINT8_C( 45), UINT8_C( 42), UINT8_C( 14), UINT8_C( 56), UINT8_C( 25), UINT8_C( 22) }, { UINT8_C( 46), UINT8_C(228), UINT8_C( 96), UINT8_C(100), UINT8_C(107), UINT8_C(136), UINT8_C(164), UINT8_C( 44), UINT8_C( 45), UINT8_C(207), UINT8_C(211), UINT8_C( 8), UINT8_C(203), UINT8_C(107), UINT8_C( 62), UINT8_C(136) } }, { { UINT8_C( 34), UINT8_C(239), UINT8_C(249), UINT8_C(154), UINT8_C( 69), UINT8_C(171), UINT8_C(157), UINT8_C( 58), UINT8_C(102), UINT8_C(138), UINT8_C(165), UINT8_C(245), UINT8_C(194), UINT8_C( 62), UINT8_C(139), UINT8_C(238) }, { { UINT8_C(123), UINT8_C( 71), UINT8_C( 7), UINT8_C(230), UINT8_C(109), UINT8_C(229), UINT8_C(187), UINT8_C(222), UINT8_C( 76), UINT8_C( 96), UINT8_C( 91), UINT8_C(249), UINT8_C( 53), UINT8_C(129), UINT8_C(213), UINT8_C( 88) }, { UINT8_C(112), UINT8_C(206), UINT8_C(242), UINT8_C(181), UINT8_C(122), UINT8_C(143), UINT8_C(239), UINT8_C(224), UINT8_C( 26), UINT8_C(148), UINT8_C(213), UINT8_C(220), UINT8_C(210), UINT8_C( 97), UINT8_C(203), UINT8_C( 78) }, { UINT8_C(168), UINT8_C(210), UINT8_C( 52), UINT8_C( 21), UINT8_C(183), UINT8_C(239), UINT8_C(244), UINT8_C( 3), UINT8_C( 80), UINT8_C( 79), UINT8_C(252), UINT8_C(133), UINT8_C(208), UINT8_C(209), UINT8_C(221), UINT8_C( 64) }, { UINT8_C(160), UINT8_C(208), UINT8_C(245), UINT8_C( 26), UINT8_C( 95), UINT8_C(229), UINT8_C(250), UINT8_C(121), UINT8_C(121), UINT8_C(208), UINT8_C( 86), UINT8_C( 76), UINT8_C( 49), UINT8_C( 33), UINT8_C(154), UINT8_C(217) } }, { UINT8_C( 51), UINT8_C( 14), UINT8_C( 47), UINT8_C( 42), UINT8_C( 61), UINT8_C( 35), UINT8_C( 45), UINT8_C( 13), UINT8_C( 50), UINT8_C(169), UINT8_C(147), UINT8_C( 3), UINT8_C( 58), UINT8_C( 48), UINT8_C( 3), UINT8_C( 26) }, { UINT8_C( 26), UINT8_C(213), UINT8_C( 64), UINT8_C(252), UINT8_C( 33), UINT8_C( 21), UINT8_C(209), UINT8_C(129), UINT8_C(245), UINT8_C(138), UINT8_C(165), UINT8_C(230), UINT8_C( 86), UINT8_C(160), UINT8_C(230), UINT8_C(213) } }, { { UINT8_C( 75), UINT8_C(248), UINT8_C( 45), UINT8_C( 9), UINT8_C(219), UINT8_C(218), UINT8_C( 22), UINT8_C( 14), UINT8_C(131), UINT8_C(169), UINT8_C( 17), UINT8_C(254), UINT8_C( 26), UINT8_C( 84), UINT8_C( 24), UINT8_C( 90) }, { { UINT8_C(141), UINT8_C( 77), UINT8_C(250), UINT8_C(171), UINT8_C(124), UINT8_C( 20), UINT8_C( 67), UINT8_C(123), UINT8_C(131), UINT8_C( 38), UINT8_C(171), UINT8_C( 20), UINT8_C(164), UINT8_C(180), UINT8_C(151), UINT8_C(239) }, { UINT8_C(173), UINT8_C(197), UINT8_C(248), UINT8_C(136), UINT8_C(159), UINT8_C( 15), UINT8_C(150), UINT8_C( 35), UINT8_C(184), UINT8_C(167), UINT8_C( 33), UINT8_C(210), UINT8_C(252), UINT8_C( 57), UINT8_C( 45), UINT8_C(137) }, { UINT8_C(134), UINT8_C( 39), UINT8_C( 53), UINT8_C( 2), UINT8_C( 59), UINT8_C(120), UINT8_C(125), UINT8_C(191), UINT8_C(158), UINT8_C( 40), UINT8_C(211), UINT8_C( 66), UINT8_C(221), UINT8_C(106), UINT8_C( 50), UINT8_C(138) }, { UINT8_C( 47), UINT8_C( 42), UINT8_C( 18), UINT8_C(207), UINT8_C( 57), UINT8_C(169), UINT8_C(242), UINT8_C(242), UINT8_C( 80), UINT8_C( 19), UINT8_C(196), UINT8_C( 76), UINT8_C( 76), UINT8_C(241), UINT8_C(214), UINT8_C(211) } }, { UINT8_C( 25), UINT8_C( 11), UINT8_C( 21), UINT8_C( 84), UINT8_C(131), UINT8_C( 19), UINT8_C( 19), UINT8_C( 33), UINT8_C( 59), UINT8_C( 38), UINT8_C( 36), UINT8_C( 24), UINT8_C( 17), UINT8_C( 22), UINT8_C( 34), UINT8_C( 0) }, { UINT8_C(167), UINT8_C( 20), UINT8_C( 15), UINT8_C( 9), UINT8_C(219), UINT8_C(136), UINT8_C(136), UINT8_C( 39), UINT8_C( 76), UINT8_C(125), UINT8_C( 59), UINT8_C(184), UINT8_C(197), UINT8_C(150), UINT8_C( 53), UINT8_C(141) } }, { { UINT8_C( 28), UINT8_C( 73), UINT8_C( 15), UINT8_C(159), UINT8_C(156), UINT8_C( 35), UINT8_C(192), UINT8_C( 24), UINT8_C( 9), UINT8_C( 36), UINT8_C(112), UINT8_C( 90), UINT8_C(186), UINT8_C( 83), UINT8_C(219), UINT8_C(123) }, { { UINT8_C( 72), UINT8_C( 42), UINT8_C(117), UINT8_C(230), UINT8_C(108), UINT8_C( 97), UINT8_C(212), UINT8_C(192), UINT8_C( 17), UINT8_C( 15), UINT8_C( 97), UINT8_C(179), UINT8_C( 32), UINT8_C(213), UINT8_C(110), UINT8_C( 60) }, { UINT8_C( 31), UINT8_C(126), UINT8_C(219), UINT8_C(187), UINT8_C(161), UINT8_C(156), UINT8_C(211), UINT8_C(170), UINT8_C(192), UINT8_C( 68), UINT8_C( 5), UINT8_C(123), UINT8_C(151), UINT8_C(224), UINT8_C(246), UINT8_C(223) }, { UINT8_C( 10), UINT8_C(107), UINT8_C(197), UINT8_C(118), UINT8_C(204), UINT8_C(153), UINT8_C( 55), UINT8_C(221), UINT8_C(169), UINT8_C(152), UINT8_C(145), UINT8_C(201), UINT8_C(110), UINT8_MAX, UINT8_C( 6), UINT8_C(141) }, { UINT8_C(125), UINT8_C(225), UINT8_C( 72), UINT8_C( 30), UINT8_C(125), UINT8_C( 28), UINT8_C(201), UINT8_C( 62), UINT8_C( 96), UINT8_C(206), UINT8_C(185), UINT8_C(247), UINT8_C(174), UINT8_C(175), UINT8_C(214), UINT8_C(184) } }, { UINT8_C( 26), UINT8_C( 27), UINT8_C( 47), UINT8_C( 38), UINT8_C( 52), UINT8_C( 38), UINT8_C( 3), UINT8_C( 29), UINT8_C(254), UINT8_C( 20), UINT8_C( 39), UINT8_C( 44), UINT8_C( 20), UINT8_C( 45), UINT8_C( 57), UINT8_C(209) }, { UINT8_C( 5), UINT8_C(123), UINT8_C(141), UINT8_C( 55), UINT8_C(125), UINT8_C( 55), UINT8_C(230), UINT8_C(224), UINT8_C( 9), UINT8_C(161), UINT8_C(221), UINT8_C(110), UINT8_C(161), UINT8_MAX, UINT8_C(206), UINT8_C(123) } }, { { UINT8_C( 38), UINT8_C( 28), UINT8_C(178), UINT8_C( 90), UINT8_C(130), UINT8_C(117), UINT8_C( 56), UINT8_C(129), UINT8_C(202), UINT8_C(223), UINT8_C(237), UINT8_C( 30), UINT8_C(140), UINT8_C(231), UINT8_C(239), UINT8_C( 26) }, { { UINT8_C( 41), UINT8_C(223), UINT8_C( 38), UINT8_C(135), UINT8_C(152), UINT8_C(112), UINT8_C( 69), UINT8_C( 31), UINT8_C(115), UINT8_C(250), UINT8_C( 84), UINT8_C( 37), UINT8_C(133), UINT8_C( 66), UINT8_C(241), UINT8_C(171) }, { UINT8_C( 94), UINT8_C(163), UINT8_C( 5), UINT8_C(225), UINT8_C( 25), UINT8_C( 61), UINT8_C( 98), UINT8_C(227), UINT8_C( 28), UINT8_C( 79), UINT8_C( 1), UINT8_C(168), UINT8_C( 54), UINT8_C(240), UINT8_C(195), UINT8_C( 95) }, { UINT8_C(208), UINT8_C(233), UINT8_C(230), UINT8_C(104), UINT8_C( 90), UINT8_C( 43), UINT8_C(136), UINT8_C(205), UINT8_C( 37), UINT8_C(220), UINT8_C(243), UINT8_C(170), UINT8_C( 30), UINT8_C(228), UINT8_C( 85), UINT8_C(125) }, { UINT8_C(136), UINT8_C( 91), UINT8_C( 94), UINT8_C(161), UINT8_C(152), UINT8_C(192), UINT8_C(132), UINT8_C(181), UINT8_C( 15), UINT8_C(133), UINT8_C( 93), UINT8_C( 70), UINT8_C(117), UINT8_C( 32), UINT8_C(165), UINT8_C( 69) } }, { UINT8_C( 10), UINT8_C( 12), UINT8_C( 46), UINT8_C( 36), UINT8_C(183), UINT8_C( 54), UINT8_C( 49), UINT8_C( 29), UINT8_C( 18), UINT8_C( 36), UINT8_C( 7), UINT8_C( 49), UINT8_C( 9), UINT8_C( 29), UINT8_C( 46), UINT8_C( 17) }, { UINT8_C( 84), UINT8_C(133), UINT8_C( 85), UINT8_C( 90), UINT8_C(130), UINT8_C(132), UINT8_C( 91), UINT8_C(240), UINT8_C( 5), UINT8_C( 90), UINT8_C( 31), UINT8_C( 91), UINT8_C(250), UINT8_C(240), UINT8_C( 85), UINT8_C(163) } }, { { UINT8_C( 83), UINT8_C(164), UINT8_C(113), UINT8_C( 10), UINT8_C(218), UINT8_C(163), UINT8_C(231), UINT8_C(236), UINT8_C(199), UINT8_C(111), UINT8_C( 29), UINT8_C(208), UINT8_C( 76), UINT8_C(203), UINT8_C( 97), UINT8_C(132) }, { { UINT8_C(215), UINT8_C(147), UINT8_C( 84), UINT8_C(163), UINT8_C( 73), UINT8_C(218), UINT8_C(127), UINT8_C(132), UINT8_C(189), UINT8_C(160), UINT8_C( 53), UINT8_C(192), UINT8_C(103), UINT8_C( 43), UINT8_C(206), UINT8_C(186) }, { UINT8_C(207), UINT8_C( 63), UINT8_C(197), UINT8_C(169), UINT8_C(226), UINT8_C(172), UINT8_C(149), UINT8_C(170), UINT8_C( 27), UINT8_C(179), UINT8_C(122), UINT8_C(103), UINT8_C(126), UINT8_C(220), UINT8_C(235), UINT8_C( 86) }, { UINT8_C(111), UINT8_C( 64), UINT8_C(249), UINT8_C(185), UINT8_C( 26), UINT8_C(120), UINT8_C( 61), UINT8_C(215), UINT8_C( 25), UINT8_C(114), UINT8_C(151), UINT8_C(128), UINT8_C(157), UINT8_C(101), UINT8_C( 59), UINT8_C(108) }, { UINT8_C(165), UINT8_C( 0), UINT8_C( 21), UINT8_C(135), UINT8_C(172), UINT8_C(171), UINT8_C( 49), UINT8_C(200), UINT8_C( 94), UINT8_C(172), UINT8_C( 47), UINT8_C(220), UINT8_C(136), UINT8_C( 27), UINT8_C( 50), UINT8_C(247) } }, { UINT8_C( 27), UINT8_C( 44), UINT8_C( 48), UINT8_C( 53), UINT8_C(164), UINT8_C(238), UINT8_C( 12), UINT8_C( 61), UINT8_C( 32), UINT8_C(227), UINT8_C( 62), UINT8_C(254), UINT8_C( 9), UINT8_C( 57), UINT8_C( 42), UINT8_C( 46) }, { UINT8_C(103), UINT8_C(157), UINT8_C(165), UINT8_C(171), UINT8_C(218), UINT8_C(163), UINT8_C(103), UINT8_C( 27), UINT8_C(111), UINT8_C(111), UINT8_C( 50), UINT8_C(208), UINT8_C(160), UINT8_C(172), UINT8_C(151), UINT8_C( 59) } }, { { UINT8_C(196), UINT8_C(131), UINT8_C( 8), UINT8_C(104), UINT8_C(113), UINT8_C( 84), UINT8_C( 38), UINT8_C(209), UINT8_C( 55), UINT8_C(100), UINT8_C(207), UINT8_C(128), UINT8_C(221), UINT8_C( 58), UINT8_C(110), UINT8_C( 86) }, { { UINT8_C(186), UINT8_C(228), UINT8_C(123), UINT8_C(229), UINT8_C(139), UINT8_C(105), UINT8_C(110), UINT8_C(222), UINT8_C(134), UINT8_C(211), UINT8_C(185), UINT8_C(190), UINT8_C(107), UINT8_C(139), UINT8_C( 81), UINT8_C( 47) }, { UINT8_C( 14), UINT8_C( 89), UINT8_C(152), UINT8_C(127), UINT8_C(173), UINT8_C(190), UINT8_C( 81), UINT8_C(228), UINT8_C( 34), UINT8_C( 32), UINT8_C(101), UINT8_MAX, UINT8_C( 90), UINT8_C(211), UINT8_C( 85), UINT8_C( 20) }, { UINT8_C(183), UINT8_C(208), UINT8_C(249), UINT8_C( 66), UINT8_C( 57), UINT8_C(103), UINT8_C( 32), UINT8_C(191), UINT8_C( 59), UINT8_C(217), UINT8_C(125), UINT8_C(166), UINT8_C(101), UINT8_C(206), UINT8_C(214), UINT8_C(115) }, { UINT8_C( 39), UINT8_C(110), UINT8_C(243), UINT8_C(212), UINT8_C( 44), UINT8_C( 68), UINT8_C(185), UINT8_C( 78), UINT8_C(100), UINT8_C( 30), UINT8_C( 77), UINT8_C(191), UINT8_C(241), UINT8_C(162), UINT8_C(211), UINT8_C(169) } }, { UINT8_C(114), UINT8_C( 13), UINT8_C( 43), UINT8_C( 44), UINT8_C( 52), UINT8_C( 12), UINT8_C( 43), UINT8_C( 47), UINT8_C( 37), UINT8_C(233), UINT8_C( 22), UINT8_C( 10), UINT8_C( 55), UINT8_C( 44), UINT8_C( 62), UINT8_C( 31) }, { UINT8_C(196), UINT8_C(139), UINT8_C(166), UINT8_C(101), UINT8_C( 44), UINT8_C(107), UINT8_C(166), UINT8_C(115), UINT8_C(103), UINT8_C(100), UINT8_C( 81), UINT8_C(185), UINT8_C( 78), UINT8_C(101), UINT8_C(211), UINT8_C( 20) } }, { { UINT8_C(185), UINT8_C( 16), UINT8_C(225), UINT8_C(237), UINT8_C( 28), UINT8_C( 77), UINT8_C( 93), UINT8_C( 2), UINT8_C( 54), UINT8_C(115), UINT8_C( 76), UINT8_C(237), UINT8_C( 95), UINT8_C( 10), UINT8_C(204), UINT8_C(185) }, { { UINT8_C(187), UINT8_C(128), UINT8_C( 63), UINT8_C(176), UINT8_C(236), UINT8_C( 19), UINT8_C( 10), UINT8_C(119), UINT8_C( 52), UINT8_C( 34), UINT8_C(243), UINT8_C(247), UINT8_C( 14), UINT8_C( 24), UINT8_C( 44), UINT8_C(199) }, { UINT8_C( 40), UINT8_C( 14), UINT8_C(181), UINT8_C( 69), UINT8_C( 91), UINT8_C( 18), UINT8_C( 71), UINT8_C(145), UINT8_C(133), UINT8_C(147), UINT8_C(126), UINT8_C(228), UINT8_C(158), UINT8_C( 75), UINT8_C(157), UINT8_C( 89) }, { UINT8_C(203), UINT8_C(220), UINT8_C( 10), UINT8_C(183), UINT8_C(239), UINT8_C( 20), UINT8_C( 46), UINT8_C( 35), UINT8_C( 54), UINT8_C( 33), UINT8_C( 26), UINT8_C( 69), UINT8_C( 57), UINT8_C( 70), UINT8_C( 12), UINT8_C( 98) }, { UINT8_C( 84), UINT8_C(193), UINT8_C(167), UINT8_C(175), UINT8_C(211), UINT8_C(238), UINT8_C( 64), UINT8_C( 88), UINT8_C(129), UINT8_C(191), UINT8_C( 60), UINT8_C( 31), UINT8_C( 10), UINT8_C(217), UINT8_C(121), UINT8_C(213) } }, { UINT8_C( 53), UINT8_C( 3), UINT8_C( 12), UINT8_C( 36), UINT8_C( 23), UINT8_C( 59), UINT8_C(199), UINT8_C( 13), UINT8_C( 28), UINT8_C( 33), UINT8_C( 18), UINT8_C( 22), UINT8_C( 40), UINT8_C( 31), UINT8_C( 56), UINT8_C( 60) }, { UINT8_C(238), UINT8_C(176), UINT8_C( 14), UINT8_C(239), UINT8_C(145), UINT8_C( 31), UINT8_C( 93), UINT8_C( 24), UINT8_C(158), UINT8_C(220), UINT8_C(181), UINT8_C( 71), UINT8_C( 54), UINT8_C( 89), UINT8_C(129), UINT8_C( 10) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16x4_t t; t.val[0] = simde_vld1q_u8(test_vec[i].t[0]); t.val[1] = simde_vld1q_u8(test_vec[i].t[1]); t.val[2] = simde_vld1q_u8(test_vec[i].t[2]); t.val[3] = simde_vld1q_u8(test_vec[i].t[3]); simde_uint8x16_t idx = simde_vld1q_u8(test_vec[i].idx); simde_uint8x16_t r = simde_vqtbx4q_u8(a, t, idx); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); simde_uint8x16x4_t t = simde_test_arm_neon_random_u8x16x4(); simde_uint8x16_private idx_ = simde_uint8x16_to_private(simde_test_arm_neon_random_u8x16()); for (size_t j = 0 ; j < (sizeof(idx_.values) / sizeof(idx_.values[0])) ; j++) { if (probability(PROBABILITY)) { idx_.values[j] &= 63; } } simde_uint8x16_t idx = simde_uint8x16_from_private(idx_); simde_uint8x16_t r = simde_vqtbx4q_u8(a, t, idx); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16x4(2, t, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vqtbx1_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqtbx1_u8) #if !defined(SIMDE_BUG_INTEL_857088) SIMDE_TEST_FUNC_LIST_ENTRY(vqtbx2_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqtbx2_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vqtbx3_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqtbx3_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vqtbx4_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqtbx4_u8) #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_TEST_FUNC_LIST_ENTRY(vqtbx1q_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqtbx1q_u8) #if !defined(SIMDE_BUG_INTEL_857088) SIMDE_TEST_FUNC_LIST_ENTRY(vqtbx2q_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqtbx2q_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vqtbx3q_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqtbx3q_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vqtbx4q_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vqtbx4q_u8) #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/rbit.c000066400000000000000000000362611400333146700164330ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN rbit #include "test-neon.h" /* Check that both of these work */ #if defined(__cplusplus) #include "../../../simde/arm/neon/rbit.h" #else #include "../../../simde/arm/neon.h" #endif static int test_simde_vrbit_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t r[8]; } test_vec[] = { { { -INT8_C( 66), INT8_C( 67), INT8_C( 108), INT8_C( 50), -INT8_C( 14), INT8_C( 85), -INT8_C( 13), INT8_C( 64) }, { INT8_C( 125), -INT8_C( 62), INT8_C( 54), INT8_C( 76), INT8_C( 79), -INT8_C( 86), -INT8_C( 49), INT8_C( 2) } }, { { INT8_C( 7), INT8_C( 91), -INT8_C( 13), INT8_C( 90), -INT8_C( 66), INT8_C( 87), INT8_C( 56), INT8_C( 33) }, { -INT8_C( 32), -INT8_C( 38), -INT8_C( 49), INT8_C( 90), INT8_C( 125), -INT8_C( 22), INT8_C( 28), -INT8_C( 124) } }, { { -INT8_C( 121), INT8_C( 54), -INT8_C( 123), INT8_C( 5), INT8_C( 65), INT8_C( 58), -INT8_C( 81), -INT8_C( 45) }, { -INT8_C( 31), INT8_C( 108), -INT8_C( 95), -INT8_C( 96), -INT8_C( 126), INT8_C( 92), -INT8_C( 11), -INT8_C( 53) } }, { { -INT8_C( 31), -INT8_C( 67), -INT8_C( 125), -INT8_C( 9), INT8_C( 93), -INT8_C( 67), -INT8_C( 11), INT8_C( 27) }, { -INT8_C( 121), -INT8_C( 67), -INT8_C( 63), -INT8_C( 17), -INT8_C( 70), -INT8_C( 67), -INT8_C( 81), -INT8_C( 40) } }, { { INT8_C( 0), INT8_C( 97), INT8_C( 78), -INT8_C( 14), -INT8_C( 74), INT8_C( 65), INT8_C( 50), -INT8_C( 67) }, { INT8_C( 0), -INT8_C( 122), INT8_C( 114), INT8_C( 79), INT8_C( 109), -INT8_C( 126), INT8_C( 76), -INT8_C( 67) } }, { { -INT8_C( 99), INT8_C( 37), INT8_C( 23), INT8_C( 91), INT8_C( 125), INT8_C( 79), INT8_C( 124), INT8_C( 4) }, { -INT8_C( 71), -INT8_C( 92), -INT8_C( 24), -INT8_C( 38), -INT8_C( 66), -INT8_C( 14), INT8_C( 62), INT8_C( 32) } }, { { -INT8_C( 123), INT8_C( 2), INT8_C( 10), -INT8_C( 57), INT8_C( 60), -INT8_C( 71), -INT8_C( 102), INT8_C( 29) }, { -INT8_C( 95), INT8_C( 64), INT8_C( 80), -INT8_C( 29), INT8_C( 60), -INT8_C( 99), INT8_C( 89), -INT8_C( 72) } }, { { INT8_C( 118), INT8_C( 30), INT8_C( 21), -INT8_C( 45), -INT8_C( 37), INT8_C( 10), -INT8_C( 17), -INT8_C( 37) }, { INT8_C( 110), INT8_C( 120), -INT8_C( 88), -INT8_C( 53), -INT8_C( 37), INT8_C( 80), -INT8_C( 9), -INT8_C( 37) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t r = simde_vrbit_s8(a); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8_t r = simde_vrbit_s8(a); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrbit_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C(122), UINT8_C(196), UINT8_C( 76), UINT8_C( 92), UINT8_C(137), UINT8_C( 18), UINT8_C(206), UINT8_C(149) }, { UINT8_C( 94), UINT8_C( 35), UINT8_C( 50), UINT8_C( 58), UINT8_C(145), UINT8_C( 72), UINT8_C(115), UINT8_C(169) } }, { { UINT8_C( 38), UINT8_C(182), UINT8_C( 4), UINT8_C( 24), UINT8_C( 1), UINT8_C( 39), UINT8_C( 67), UINT8_C( 7) }, { UINT8_C(100), UINT8_C(109), UINT8_C( 32), UINT8_C( 24), UINT8_C(128), UINT8_C(228), UINT8_C(194), UINT8_C(224) } }, { { UINT8_C(187), UINT8_C( 47), UINT8_C( 18), UINT8_C( 74), UINT8_C(236), UINT8_C( 22), UINT8_C(185), UINT8_C( 52) }, { UINT8_C(221), UINT8_C(244), UINT8_C( 72), UINT8_C( 82), UINT8_C( 55), UINT8_C(104), UINT8_C(157), UINT8_C( 44) } }, { { UINT8_C(105), UINT8_C(226), UINT8_C( 61), UINT8_C( 19), UINT8_C( 3), UINT8_C( 86), UINT8_C( 43), UINT8_C(125) }, { UINT8_C(150), UINT8_C( 71), UINT8_C(188), UINT8_C(200), UINT8_C(192), UINT8_C(106), UINT8_C(212), UINT8_C(190) } }, { { UINT8_C( 26), UINT8_C(119), UINT8_C(218), UINT8_C(163), UINT8_C(137), UINT8_C(168), UINT8_C( 57), UINT8_C(175) }, { UINT8_C( 88), UINT8_C(238), UINT8_C( 91), UINT8_C(197), UINT8_C(145), UINT8_C( 21), UINT8_C(156), UINT8_C(245) } }, { { UINT8_C( 95), UINT8_C( 61), UINT8_C(199), UINT8_C( 96), UINT8_C(101), UINT8_C( 10), UINT8_C(103), UINT8_C( 32) }, { UINT8_C(250), UINT8_C(188), UINT8_C(227), UINT8_C( 6), UINT8_C(166), UINT8_C( 80), UINT8_C(230), UINT8_C( 4) } }, { { UINT8_C( 58), UINT8_C(122), UINT8_C(106), UINT8_C( 38), UINT8_C(144), UINT8_C( 35), UINT8_C( 90), UINT8_C(249) }, { UINT8_C( 92), UINT8_C( 94), UINT8_C( 86), UINT8_C(100), UINT8_C( 9), UINT8_C(196), UINT8_C( 90), UINT8_C(159) } }, { { UINT8_C( 5), UINT8_C(151), UINT8_C( 13), UINT8_C( 9), UINT8_C(237), UINT8_C( 56), UINT8_C(134), UINT8_C( 8) }, { UINT8_C(160), UINT8_C(233), UINT8_C(176), UINT8_C(144), UINT8_C(183), UINT8_C( 28), UINT8_C( 97), UINT8_C( 16) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t r = simde_vrbit_u8(a); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t r = simde_vrbit_u8(a); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrbitq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t r[16]; } test_vec[] = { { { INT8_C( 40), INT8_C( 124), INT8_C( 86), -INT8_C( 8), -INT8_C( 57), -INT8_C( 108), INT8_C( 33), INT8_C( 49), -INT8_C( 114), INT8_C( 82), -INT8_C( 94), INT8_C( 41), INT8_C( 57), INT8_C( 34), INT8_C( 43), INT8_C( 98) }, { INT8_C( 20), INT8_C( 62), INT8_C( 106), INT8_C( 31), -INT8_C( 29), INT8_C( 41), -INT8_C( 124), -INT8_C( 116), INT8_C( 113), INT8_C( 74), INT8_C( 69), -INT8_C( 108), -INT8_C( 100), INT8_C( 68), -INT8_C( 44), INT8_C( 70) } }, { { INT8_C( 7), -INT8_C( 113), -INT8_C( 105), -INT8_C( 98), -INT8_C( 22), -INT8_C( 122), -INT8_C( 64), -INT8_C( 91), INT8_C( 2), -INT8_C( 35), INT8_C( 77), -INT8_C( 102), INT8_C( 126), INT8_C( 9), INT8_C( 32), -INT8_C( 89) }, { -INT8_C( 32), -INT8_C( 15), -INT8_C( 23), INT8_C( 121), INT8_C( 87), INT8_C( 97), INT8_C( 3), -INT8_C( 91), INT8_C( 64), -INT8_C( 69), -INT8_C( 78), INT8_C( 89), INT8_C( 126), -INT8_C( 112), INT8_C( 4), -INT8_C( 27) } }, { { -INT8_C( 123), INT8_C( 118), -INT8_C( 97), INT8_C( 76), INT8_C( 10), -INT8_C( 64), INT8_C( 125), -INT8_C( 104), INT8_C( 19), INT8_C( 32), -INT8_C( 63), INT8_C( 76), INT8_C( 66), -INT8_C( 19), -INT8_C( 82), INT8_C( 73) }, { -INT8_C( 95), INT8_C( 110), -INT8_C( 7), INT8_C( 50), INT8_C( 80), INT8_C( 3), -INT8_C( 66), INT8_C( 25), -INT8_C( 56), INT8_C( 4), -INT8_C( 125), INT8_C( 50), INT8_C( 66), -INT8_C( 73), INT8_C( 117), -INT8_C( 110) } }, { { INT8_C( 124), INT8_C( 70), -INT8_C( 25), INT8_C( 102), -INT8_C( 52), -INT8_C( 89), INT8_C( 11), -INT8_C( 50), -INT8_C( 124), INT8_C( 88), INT8_C( 105), INT8_C( 3), INT8_C( 97), -INT8_C( 119), -INT8_C( 86), -INT8_C( 25) }, { INT8_C( 62), INT8_C( 98), -INT8_C( 25), INT8_C( 102), INT8_C( 51), -INT8_C( 27), -INT8_C( 48), INT8_C( 115), INT8_C( 33), INT8_C( 26), -INT8_C( 106), -INT8_C( 64), -INT8_C( 122), -INT8_C( 111), INT8_C( 85), -INT8_C( 25) } }, { { -INT8_C( 1), INT8_C( 73), INT8_C( 51), INT8_C( 10), INT8_C( 10), -INT8_C( 79), -INT8_C( 94), INT8_C( 29), -INT8_C( 47), INT8_C( 100), INT8_C( 105), INT8_C( 19), INT8_C( 81), INT8_C( 24), INT8_C( 92), -INT8_C( 51) }, { -INT8_C( 1), -INT8_C( 110), -INT8_C( 52), INT8_C( 80), INT8_C( 80), -INT8_C( 115), INT8_C( 69), -INT8_C( 72), -INT8_C( 117), INT8_C( 38), -INT8_C( 106), -INT8_C( 56), -INT8_C( 118), INT8_C( 24), INT8_C( 58), -INT8_C( 77) } }, { { INT8_C( 94), INT8_C( 68), INT8_C( 51), INT8_C( 42), -INT8_C( 21), INT8_C( 62), -INT8_C( 8), INT8_C( 112), -INT8_C( 105), INT8_C( 97), INT8_C( 115), -INT8_C( 8), -INT8_C( 22), INT8_C( 29), -INT8_C( 33), -INT8_C( 22) }, { INT8_C( 122), INT8_C( 34), -INT8_C( 52), INT8_C( 84), -INT8_C( 41), INT8_C( 124), INT8_C( 31), INT8_C( 14), -INT8_C( 23), -INT8_C( 122), -INT8_C( 50), INT8_C( 31), INT8_C( 87), -INT8_C( 72), -INT8_C( 5), INT8_C( 87) } }, { { INT8_C( 102), INT8_C( 19), -INT8_C( 12), INT8_C( 112), -INT8_C( 60), -INT8_C( 106), -INT8_C( 115), -INT8_C( 107), -INT8_C( 6), -INT8_C( 9), -INT8_C( 88), INT8_C( 75), INT8_C( 15), INT8_C( 4), INT8_C( 24), INT8_C( 109) }, { INT8_C( 102), -INT8_C( 56), INT8_C( 47), INT8_C( 14), INT8_C( 35), INT8_C( 105), -INT8_C( 79), -INT8_C( 87), INT8_C( 95), -INT8_C( 17), INT8_C( 21), -INT8_C( 46), -INT8_C( 16), INT8_C( 32), INT8_C( 24), -INT8_C( 74) } }, { { INT8_C( 72), INT8_C( 75), -INT8_C( 105), INT8_C( 52), -INT8_C( 118), -INT8_C( 113), -INT8_C( 92), INT8_C( 33), -INT8_C( 15), INT8_C( 23), INT8_C( 25), -INT8_C( 37), INT8_C( 52), -INT8_C( 7), -INT8_C( 59), -INT8_C( 102) }, { INT8_C( 18), -INT8_C( 46), -INT8_C( 23), INT8_C( 44), INT8_C( 81), -INT8_C( 15), INT8_C( 37), -INT8_C( 124), -INT8_C( 113), -INT8_C( 24), -INT8_C( 104), -INT8_C( 37), INT8_C( 44), -INT8_C( 97), -INT8_C( 93), INT8_C( 89) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t r = simde_vrbitq_s8(a); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); simde_int8x16_t r = simde_vrbitq_s8(a); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrbitq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(133), UINT8_C(193), UINT8_C(210), UINT8_C(254), UINT8_C( 73), UINT8_C( 26), UINT8_C(187), UINT8_C( 26), UINT8_C( 78), UINT8_C(135), UINT8_C(101), UINT8_C(192), UINT8_C( 21), UINT8_C(142), UINT8_C( 24), UINT8_C(173) }, { UINT8_C(161), UINT8_C(131), UINT8_C( 75), UINT8_C(127), UINT8_C(146), UINT8_C( 88), UINT8_C(221), UINT8_C( 88), UINT8_C(114), UINT8_C(225), UINT8_C(166), UINT8_C( 3), UINT8_C(168), UINT8_C(113), UINT8_C( 24), UINT8_C(181) } }, { { UINT8_C( 67), UINT8_C( 44), UINT8_C( 70), UINT8_C(114), UINT8_C(231), UINT8_C( 76), UINT8_C(199), UINT8_C(211), UINT8_C(198), UINT8_C(167), UINT8_C( 31), UINT8_C( 82), UINT8_C(215), UINT8_C( 15), UINT8_C( 52), UINT8_C( 92) }, { UINT8_C(194), UINT8_C( 52), UINT8_C( 98), UINT8_C( 78), UINT8_C(231), UINT8_C( 50), UINT8_C(227), UINT8_C(203), UINT8_C( 99), UINT8_C(229), UINT8_C(248), UINT8_C( 74), UINT8_C(235), UINT8_C(240), UINT8_C( 44), UINT8_C( 58) } }, { { UINT8_C(208), UINT8_C( 7), UINT8_C( 90), UINT8_C( 26), UINT8_C( 33), UINT8_C( 22), UINT8_C( 52), UINT8_C(112), UINT8_C(157), UINT8_C(153), UINT8_C( 48), UINT8_C(178), UINT8_C( 40), UINT8_C( 73), UINT8_C( 95), UINT8_C(107) }, { UINT8_C( 11), UINT8_C(224), UINT8_C( 90), UINT8_C( 88), UINT8_C(132), UINT8_C(104), UINT8_C( 44), UINT8_C( 14), UINT8_C(185), UINT8_C(153), UINT8_C( 12), UINT8_C( 77), UINT8_C( 20), UINT8_C(146), UINT8_C(250), UINT8_C(214) } }, { { UINT8_C(117), UINT8_C(165), UINT8_C(221), UINT8_C( 92), UINT8_C(242), UINT8_C(164), UINT8_C( 47), UINT8_C(184), UINT8_C( 75), UINT8_C( 79), UINT8_C( 10), UINT8_C( 34), UINT8_C( 94), UINT8_C( 62), UINT8_C(127), UINT8_C( 47) }, { UINT8_C(174), UINT8_C(165), UINT8_C(187), UINT8_C( 58), UINT8_C( 79), UINT8_C( 37), UINT8_C(244), UINT8_C( 29), UINT8_C(210), UINT8_C(242), UINT8_C( 80), UINT8_C( 68), UINT8_C(122), UINT8_C(124), UINT8_C(254), UINT8_C(244) } }, { { UINT8_C( 69), UINT8_C(217), UINT8_C( 73), UINT8_C(103), UINT8_C(239), UINT8_C(125), UINT8_C(215), UINT8_C(140), UINT8_C( 23), UINT8_C( 7), UINT8_C( 62), UINT8_C( 63), UINT8_C( 80), UINT8_C(157), UINT8_C(170), UINT8_C(197) }, { UINT8_C(162), UINT8_C(155), UINT8_C(146), UINT8_C(230), UINT8_C(247), UINT8_C(190), UINT8_C(235), UINT8_C( 49), UINT8_C(232), UINT8_C(224), UINT8_C(124), UINT8_C(252), UINT8_C( 10), UINT8_C(185), UINT8_C( 85), UINT8_C(163) } }, { { UINT8_C( 67), UINT8_C(135), UINT8_C( 33), UINT8_C( 53), UINT8_C( 43), UINT8_C( 81), UINT8_C(237), UINT8_C(118), UINT8_C(160), UINT8_C(247), UINT8_C(153), UINT8_C(254), UINT8_C( 53), UINT8_C( 24), UINT8_C( 45), UINT8_C(123) }, { UINT8_C(194), UINT8_C(225), UINT8_C(132), UINT8_C(172), UINT8_C(212), UINT8_C(138), UINT8_C(183), UINT8_C(110), UINT8_C( 5), UINT8_C(239), UINT8_C(153), UINT8_C(127), UINT8_C(172), UINT8_C( 24), UINT8_C(180), UINT8_C(222) } }, { { UINT8_C(241), UINT8_C(118), UINT8_C(226), UINT8_C(225), UINT8_C(244), UINT8_C(185), UINT8_C(109), UINT8_C( 11), UINT8_C(192), UINT8_C(172), UINT8_C( 74), UINT8_C( 17), UINT8_C( 73), UINT8_C(244), UINT8_C(214), UINT8_C(140) }, { UINT8_C(143), UINT8_C(110), UINT8_C( 71), UINT8_C(135), UINT8_C( 47), UINT8_C(157), UINT8_C(182), UINT8_C(208), UINT8_C( 3), UINT8_C( 53), UINT8_C( 82), UINT8_C(136), UINT8_C(146), UINT8_C( 47), UINT8_C(107), UINT8_C( 49) } }, { { UINT8_C(123), UINT8_C(248), UINT8_C(193), UINT8_C(166), UINT8_C( 73), UINT8_C(174), UINT8_C( 28), UINT8_C(233), UINT8_C(165), UINT8_C(181), UINT8_C(231), UINT8_C(219), UINT8_C(205), UINT8_C( 21), UINT8_C( 86), UINT8_C(191) }, { UINT8_C(222), UINT8_C( 31), UINT8_C(131), UINT8_C(101), UINT8_C(146), UINT8_C(117), UINT8_C( 56), UINT8_C(151), UINT8_C(165), UINT8_C(173), UINT8_C(231), UINT8_C(219), UINT8_C(179), UINT8_C(168), UINT8_C(106), UINT8_C(253) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t r = simde_vrbitq_u8(a); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t r = simde_vrbitq_u8(a); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vrbit_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vrbit_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vrbitq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vrbitq_u8) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/reinterpret.c000066400000000000000000007647771400333146700200620ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN reinterpret #include "test-neon.h" #include "../../../simde/arm/neon/reinterpret.h" static int test_simde_vreinterpret_f32_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; } test_vec[] = { { { -INT8_C( 42), INT8_C( 92), INT8_C( 93), INT8_C( 70), INT8_C( 89), -INT8_C( 37), -INT8_C( 103), -INT8_C( 92) }, }, { { INT8_C( 40), -INT8_C( 95), -INT8_C( 44), INT8_C( 67), -INT8_C( 63), -INT8_C( 92), -INT8_C( 108), INT8_C( 10) }, }, { { INT8_C( 37), INT8_C( 114), -INT8_C( 8), -INT8_C( 112), -INT8_C( 60), -INT8_C( 12), -INT8_C( 68), -INT8_C( 73) }, }, { { -INT8_C( 40), -INT8_C( 11), -INT8_C( 75), INT8_C( 51), -INT8_C( 124), -INT8_C( 99), INT8_C( 109), INT8_C( 90) }, }, { { -INT8_C( 7), -INT8_C( 54), -INT8_C( 95), INT8_C( 82), -INT8_C( 91), INT8_C( 58), -INT8_C( 10), -INT8_C( 50) }, }, { { -INT8_C( 37), -INT8_C( 54), INT8_C( 17), -INT8_C( 100), INT8_C( 110), -INT8_C( 91), -INT8_C( 90), -INT8_C( 108) }, }, { { INT8_C( 23), -INT8_C( 97), INT8_C( 36), -INT8_C( 36), -INT8_C( 109), -INT8_C( 31), -INT8_C( 109), INT8_C( 107) }, }, { { -INT8_C( 42), INT8_C( 73), -INT8_C( 97), INT8_C( 91), -INT8_C( 26), INT8_C( 12), -INT8_C( 75), -INT8_C( 33) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_private a_ = simde_int8x8_to_private(a); simde_float32x2_t r = simde_vreinterpret_f32_s8(a); simde_float32x2_private r_ = simde_float32x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_f64_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; } test_vec[] = { { { -INT8_C( 5), INT8_C( 109), -INT8_C( 72), -INT8_C( 54), INT8_C( 91), -INT8_C( 8), INT8_C( 66), INT8_C( 119) }, }, { { INT8_C( 115), INT8_C( 82), INT8_C( 13), -INT8_C( 69), INT8_C( 12), -INT8_C( 98), INT8_C( 10), INT8_C( 113) }, }, { { -INT8_C( 12), INT8_C( 18), INT8_C( 58), INT8_C( 77), -INT8_C( 60), INT8_C( 73), INT8_C( 48), INT8_C( 23) }, }, { { -INT8_C( 71), INT8_MAX, -INT8_C( 101), INT8_C( 99), INT8_C( 32), -INT8_C( 47), INT8_C( 84), INT8_C( 28) }, }, { { INT8_C( 62), INT8_C( 13), -INT8_C( 26), -INT8_C( 102), INT8_C( 5), INT8_C( 41), INT8_C( 17), INT8_C( 120) }, }, { { INT8_C( 123), INT8_C( 30), INT8_C( 51), -INT8_C( 121), -INT8_C( 68), INT8_C( 61), -INT8_C( 8), -INT8_C( 80) }, }, { { INT8_C( 79), INT8_C( 50), -INT8_C( 3), INT8_C( 19), INT8_C( 124), INT8_C( 46), INT8_C( 42), INT8_C( 53) }, }, { { -INT8_C( 83), -INT8_C( 59), -INT8_C( 104), -INT8_C( 51), -INT8_C( 106), -INT8_C( 19), -INT8_C( 23), -INT8_C( 43) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_private a_ = simde_int8x8_to_private(a); simde_float64x1_t r = simde_vreinterpret_f64_s8(a); simde_float64x1_private r_ = simde_float64x1_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s16_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; } test_vec[] = { { { -INT8_C( 119), INT8_C( 49), INT8_C( 78), INT8_C( 105), -INT8_C( 127), -INT8_C( 66), -INT8_C( 26), -INT8_C( 61) }, }, { { INT8_C( 30), -INT8_C( 27), -INT8_C( 30), INT8_C( 39), INT8_C( 99), INT8_C( 61), -INT8_C( 126), -INT8_C( 122) }, }, { { -INT8_C( 69), -INT8_C( 3), INT8_C( 53), INT8_C( 18), INT8_C( 42), -INT8_C( 110), -INT8_C( 95), INT8_C( 32) }, }, { { INT8_C( 65), INT8_C( 49), INT8_C( 29), INT8_C( 105), INT8_C( 80), -INT8_C( 105), -INT8_C( 59), -INT8_C( 38) }, }, { { -INT8_C( 55), INT8_C( 19), INT8_C( 67), INT8_C( 74), -INT8_C( 47), INT8_C( 41), INT8_C( 13), -INT8_C( 17) }, }, { { INT8_C( 14), -INT8_C( 17), INT8_C( 22), INT8_C( 113), INT8_C( 44), -INT8_C( 103), -INT8_C( 9), -INT8_C( 25) }, }, { { -INT8_C( 106), INT8_C( 44), -INT8_C( 7), -INT8_C( 64), -INT8_C( 66), -INT8_C( 101), -INT8_C( 32), INT8_C( 0) }, }, { { -INT8_C( 52), -INT8_C( 3), INT8_C( 105), INT8_C( 28), -INT8_C( 107), INT8_C( 46), -INT8_C( 10), INT8_C( 94) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_private a_ = simde_int8x8_to_private(a); simde_int16x4_t r = simde_vreinterpret_s16_s8(a); simde_int16x4_private r_ = simde_int16x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s32_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; } test_vec[] = { { { INT8_C( 34), INT8_C( 75), INT8_C( 84), -INT8_C( 29), -INT8_C( 85), INT8_C( 72), INT8_C( 5), -INT8_C( 60) }, }, { { INT8_C( 105), INT8_C( 5), -INT8_C( 90), -INT8_C( 42), INT8_C( 51), INT8_C( 40), -INT8_C( 85), INT8_C( 88) }, }, { { INT8_C( 100), INT8_C( 114), -INT8_C( 58), -INT8_C( 66), INT8_C( 110), INT8_C( 62), INT8_C( 12), -INT8_C( 14) }, }, { { -INT8_C( 56), -INT8_C( 14), -INT8_C( 105), INT8_C( 107), INT8_C( 56), INT8_C( 104), -INT8_C( 65), INT8_C( 90) }, }, { { -INT8_C( 77), INT8_C( 19), INT8_C( 61), INT8_C( 94), INT8_C( 91), INT8_C( 66), INT8_C( 34), -INT8_C( 60) }, }, { { INT8_C( 72), -INT8_C( 56), -INT8_C( 102), INT8_C( 123), -INT8_C( 15), INT8_C( 69), -INT8_C( 44), INT8_C( 85) }, }, { { -INT8_C( 72), -INT8_C( 102), INT8_C( 20), INT8_C( 38), -INT8_C( 40), INT8_C( 32), INT8_C( 25), -INT8_C( 96) }, }, { { INT8_C( 18), -INT8_C( 80), INT8_C( 11), INT8_C( 74), INT8_C( 24), -INT8_C( 54), -INT8_C( 92), -INT8_C( 53) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_private a_ = simde_int8x8_to_private(a); simde_int32x2_t r = simde_vreinterpret_s32_s8(a); simde_int32x2_private r_ = simde_int32x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s64_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; } test_vec[] = { { { -INT8_C( 102), -INT8_C( 98), INT8_C( 81), -INT8_C( 77), -INT8_C( 94), -INT8_C( 9), -INT8_C( 74), INT8_C( 34) }, }, { { INT8_C( 104), -INT8_C( 72), INT8_C( 117), -INT8_C( 60), INT8_C( 71), INT8_C( 68), INT8_C( 51), INT8_C( 73) }, }, { { INT8_C( 17), INT8_C( 79), -INT8_C( 67), INT8_C( 89), -INT8_C( 92), -INT8_C( 5), -INT8_C( 54), INT8_C( 76) }, }, { { -INT8_C( 117), -INT8_C( 98), -INT8_C( 82), INT8_C( 106), -INT8_C( 80), INT8_C( 15), -INT8_C( 30), INT8_C( 74) }, }, { { -INT8_C( 83), INT8_C( 51), -INT8_C( 2), INT8_C( 79), INT8_C( 43), -INT8_C( 76), INT8_C( 114), -INT8_C( 109) }, }, { { INT8_C( 109), -INT8_C( 25), INT8_C( 87), -INT8_C( 76), INT8_C( 44), -INT8_C( 117), -INT8_C( 3), INT8_C( 61) }, }, { { -INT8_C( 38), -INT8_C( 69), -INT8_C( 105), INT8_C( 126), -INT8_C( 74), INT8_C( 97), -INT8_C( 53), INT8_C( 65) }, }, { { -INT8_C( 1), INT8_C( 121), -INT8_C( 85), -INT8_C( 81), -INT8_C( 120), -INT8_C( 115), -INT8_C( 6), INT8_C( 53) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_private a_ = simde_int8x8_to_private(a); simde_int64x1_t r = simde_vreinterpret_s64_s8(a); simde_int64x1_private r_ = simde_int64x1_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u8_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; } test_vec[] = { { { INT8_C( 73), -INT8_C( 58), -INT8_C( 54), INT8_C( 0), INT8_MAX, -INT8_C( 14), -INT8_C( 96), INT8_C( 124) }, }, { { INT8_C( 61), -INT8_C( 81), -INT8_C( 61), INT8_C( 43), INT8_C( 92), INT8_C( 16), -INT8_C( 94), INT8_C( 125) }, }, { { -INT8_C( 46), INT8_C( 72), -INT8_C( 42), INT8_C( 56), INT8_C( 3), INT8_C( 67), -INT8_C( 58), -INT8_C( 39) }, }, { { INT8_C( 44), -INT8_C( 82), INT8_C( 29), -INT8_C( 51), INT8_C( 70), INT8_C( 74), -INT8_C( 65), -INT8_C( 113) }, }, { { INT8_C( 17), -INT8_C( 119), -INT8_C( 112), -INT8_C( 112), INT8_C( 124), INT8_C( 48), INT8_C( 12), -INT8_C( 71) }, }, { { -INT8_C( 33), -INT8_C( 49), -INT8_C( 28), INT8_C( 59), -INT8_C( 33), -INT8_C( 122), -INT8_C( 71), -INT8_C( 78) }, }, { { -INT8_C( 50), -INT8_C( 113), -INT8_C( 22), -INT8_C( 47), -INT8_C( 46), -INT8_C( 80), -INT8_C( 86), -INT8_C( 1) }, }, { { INT8_C( 94), -INT8_C( 57), -INT8_C( 52), -INT8_C( 92), INT8_C( 18), -INT8_C( 116), INT8_C( 52), INT8_C( 35) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_private a_ = simde_int8x8_to_private(a); simde_uint8x8_t r = simde_vreinterpret_u8_s8(a); simde_uint8x8_private r_ = simde_uint8x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u16_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; } test_vec[] = { { { -INT8_C( 44), INT8_C( 17), -INT8_C( 26), -INT8_C( 99), INT8_C( 57), -INT8_C( 92), INT8_C( 3), INT8_C( 58) }, }, { { INT8_C( 16), -INT8_C( 90), -INT8_C( 123), -INT8_C( 127), -INT8_C( 28), INT8_C( 59), -INT8_C( 66), INT8_C( 8) }, }, { { -INT8_C( 117), INT8_C( 95), INT8_C( 102), -INT8_C( 39), INT8_C( 9), -INT8_C( 100), INT8_C( 76), INT8_C( 94) }, }, { { -INT8_C( 50), INT8_C( 29), -INT8_C( 46), -INT8_C( 119), INT8_C( 8), -INT8_C( 74), INT8_C( 98), -INT8_C( 36) }, }, { { -INT8_C( 57), INT8_C( 72), INT8_C( 122), INT8_C( 0), -INT8_C( 19), INT8_C( 125), INT8_C( 58), -INT8_C( 3) }, }, { { INT8_C( 35), -INT8_C( 65), INT8_C( 126), INT8_C( 8), -INT8_C( 6), INT8_C( 60), INT8_C( 16), -INT8_C( 122) }, }, { { -INT8_C( 101), INT8_C( 118), INT8_C( 95), -INT8_C( 92), INT8_C( 18), -INT8_C( 85), INT8_C( 2), -INT8_C( 31) }, }, { { -INT8_C( 55), -INT8_C( 43), INT8_C( 106), -INT8_C( 47), -INT8_C( 117), -INT8_C( 52), -INT8_C( 83), INT8_C( 82) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_private a_ = simde_int8x8_to_private(a); simde_uint16x4_t r = simde_vreinterpret_u16_s8(a); simde_uint16x4_private r_ = simde_uint16x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u32_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; } test_vec[] = { { { INT8_C( 74), INT8_C( 41), -INT8_C( 7), INT8_C( 78), -INT8_C( 65), INT8_C( 66), -INT8_C( 17), -INT8_C( 70) }, }, { { INT8_C( 81), -INT8_C( 71), INT8_C( 49), -INT8_C( 102), INT8_C( 15), -INT8_C( 59), -INT8_C( 56), INT8_C( 99) }, }, { { -INT8_C( 40), -INT8_C( 40), -INT8_C( 17), -INT8_C( 13), -INT8_C( 46), -INT8_C( 41), INT8_C( 71), INT8_C( 66) }, }, { { INT8_C( 40), INT8_C( 64), -INT8_C( 25), -INT8_C( 112), INT8_C( 50), INT8_C( 85), -INT8_C( 27), INT8_C( 124) }, }, { { INT8_C( 126), -INT8_C( 34), -INT8_C( 53), INT8_C( 61), INT8_C( 32), -INT8_C( 70), -INT8_C( 9), INT8_C( 113) }, }, { { INT8_C( 116), INT8_C( 41), INT8_C( 11), -INT8_C( 125), -INT8_C( 18), -INT8_C( 44), -INT8_C( 25), -INT8_C( 58) }, }, { { -INT8_C( 84), -INT8_C( 42), -INT8_C( 70), INT8_MAX, -INT8_C( 83), INT8_C( 1), -INT8_C( 63), -INT8_C( 43) }, }, { { INT8_C( 65), -INT8_C( 88), INT8_C( 102), INT8_C( 115), -INT8_C( 3), INT8_C( 75), -INT8_C( 16), INT8_C( 123) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_private a_ = simde_int8x8_to_private(a); simde_uint32x2_t r = simde_vreinterpret_u32_s8(a); simde_uint32x2_private r_ = simde_uint32x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u64_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; } test_vec[] = { { { INT8_C( 40), INT8_C( 42), INT8_C( 95), INT8_C( 87), INT8_C( 71), -INT8_C( 53), -INT8_C( 100), INT8_C( 99) }, }, { { INT8_C( 42), -INT8_C( 25), INT8_C( 55), INT8_C( 109), -INT8_C( 110), -INT8_C( 120), -INT8_C( 78), -INT8_C( 7) }, }, { { -INT8_C( 97), -INT8_C( 1), -INT8_C( 38), INT8_C( 60), INT8_C( 58), INT8_C( 58), INT8_C( 80), INT8_C( 75) }, }, { { INT8_C( 18), INT8_C( 38), -INT8_C( 40), INT8_C( 0), -INT8_C( 110), INT8_C( 19), INT8_C( 60), -INT8_C( 69) }, }, { { INT8_C( 61), -INT8_C( 101), INT8_C( 18), -INT8_C( 124), INT8_C( 102), -INT8_C( 82), -INT8_C( 25), -INT8_C( 111) }, }, { { -INT8_C( 107), INT8_C( 31), -INT8_C( 2), INT8_C( 40), -INT8_C( 89), -INT8_C( 80), INT8_C( 33), INT8_C( 70) }, }, { { -INT8_C( 80), -INT8_C( 4), -INT8_C( 126), -INT8_C( 22), INT8_C( 54), -INT8_C( 46), INT8_C( 54), INT8_C( 73) }, }, { { -INT8_C( 8), INT8_C( 14), INT8_C( 73), -INT8_C( 118), INT8_C( 33), -INT8_C( 123), INT8_C( 69), INT8_C( 94) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_private a_ = simde_int8x8_to_private(a); simde_uint64x1_t r = simde_vreinterpret_u64_s8(a); simde_uint64x1_private r_ = simde_uint64x1_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_f32_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; } test_vec[] = { { { INT8_C( 126), -INT8_C( 42), INT8_C( 86), -INT8_C( 114), -INT8_C( 16), INT8_C( 92), -INT8_C( 85), INT8_C( 13), -INT8_C( 40), INT8_C( 5), INT8_C( 82), INT8_C( 85), INT8_C( 58), INT8_MIN, -INT8_C( 124), -INT8_C( 105) }, }, { { INT8_C( 11), INT8_C( 67), INT8_C( 88), INT8_C( 58), -INT8_C( 6), -INT8_C( 66), -INT8_C( 54), INT8_C( 93), -INT8_C( 80), -INT8_C( 34), -INT8_C( 77), -INT8_C( 9), INT8_C( 108), INT8_C( 83), -INT8_C( 44), -INT8_C( 22) }, }, { { INT8_C( 41), INT8_C( 42), INT8_C( 120), INT8_C( 26), -INT8_C( 122), INT8_C( 35), INT8_C( 39), INT8_C( 95), INT8_C( 41), INT8_C( 121), -INT8_C( 76), INT8_C( 99), -INT8_C( 7), INT8_C( 56), -INT8_C( 6), INT8_C( 4) }, }, { { INT8_C( 124), INT8_C( 82), INT8_C( 62), INT8_C( 118), INT8_C( 16), INT8_C( 8), -INT8_C( 45), -INT8_C( 63), -INT8_C( 25), -INT8_C( 122), -INT8_C( 72), INT8_C( 83), -INT8_C( 39), -INT8_C( 116), INT8_C( 61), INT8_C( 3) }, }, { { -INT8_C( 74), -INT8_C( 75), INT8_C( 29), INT8_C( 61), -INT8_C( 40), INT8_C( 68), -INT8_C( 100), INT8_C( 1), -INT8_C( 67), INT8_C( 80), INT8_C( 100), -INT8_C( 74), -INT8_C( 120), INT8_C( 95), -INT8_C( 69), INT8_C( 4) }, }, { { -INT8_C( 79), -INT8_C( 7), INT8_C( 122), -INT8_C( 62), INT8_C( 2), INT8_C( 77), -INT8_C( 125), -INT8_C( 23), -INT8_C( 45), INT8_C( 59), INT8_C( 60), -INT8_C( 83), -INT8_C( 57), INT8_C( 121), -INT8_C( 80), INT8_C( 125) }, }, { { INT8_C( 46), -INT8_C( 51), -INT8_C( 70), INT8_C( 6), INT8_C( 17), INT8_C( 86), INT8_C( 8), -INT8_C( 50), -INT8_C( 90), INT8_C( 108), -INT8_C( 124), INT8_C( 47), -INT8_C( 53), INT8_C( 63), INT8_C( 51), INT8_C( 125) }, }, { { INT8_C( 57), -INT8_C( 82), INT8_C( 63), INT8_C( 59), -INT8_C( 5), -INT8_C( 62), INT8_C( 36), -INT8_C( 49), -INT8_C( 3), INT8_C( 96), INT8_C( 124), -INT8_C( 60), -INT8_C( 39), INT8_C( 44), INT8_C( 65), INT8_C( 7) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_private a_ = simde_int8x16_to_private(a); simde_float32x4_t r = simde_vreinterpretq_f32_s8(a); simde_float32x4_private r_ = simde_float32x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_f64_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; } test_vec[] = { { { -INT8_C( 26), INT8_C( 77), -INT8_C( 93), INT8_C( 31), -INT8_C( 92), INT8_C( 89), -INT8_C( 113), INT8_C( 91), -INT8_C( 124), -INT8_C( 5), INT8_C( 6), INT8_C( 118), INT8_C( 47), -INT8_C( 29), INT8_C( 24), INT8_C( 83) }, }, { { INT8_C( 14), INT8_C( 2), -INT8_C( 123), -INT8_C( 119), -INT8_C( 9), INT8_C( 101), -INT8_C( 52), INT8_C( 85), -INT8_C( 102), -INT8_C( 80), -INT8_C( 112), -INT8_C( 37), INT8_C( 68), INT8_C( 37), -INT8_C( 82), INT8_C( 42) }, }, { { INT8_C( 114), INT8_C( 81), INT8_C( 73), INT8_C( 22), -INT8_C( 85), -INT8_C( 40), INT8_C( 114), INT8_C( 47), -INT8_C( 45), INT8_C( 120), -INT8_C( 91), INT8_C( 3), INT8_C( 91), -INT8_C( 66), INT8_C( 86), INT8_C( 106) }, }, { { -INT8_C( 64), -INT8_C( 37), -INT8_C( 13), -INT8_C( 72), INT8_C( 64), -INT8_C( 65), INT8_C( 13), -INT8_C( 37), INT8_C( 111), -INT8_C( 99), -INT8_C( 74), -INT8_C( 77), -INT8_C( 62), INT8_C( 100), -INT8_C( 35), INT8_C( 53) }, }, { { -INT8_C( 74), INT8_C( 38), INT8_C( 75), INT8_C( 97), -INT8_C( 2), -INT8_C( 67), -INT8_C( 112), -INT8_C( 46), INT8_C( 53), INT8_C( 53), -INT8_C( 43), -INT8_C( 111), -INT8_C( 13), INT8_C( 43), -INT8_C( 5), -INT8_C( 76) }, }, { { INT8_C( 6), -INT8_C( 18), INT8_C( 108), INT8_C( 70), -INT8_C( 83), INT8_C( 121), INT8_C( 33), INT8_C( 29), INT8_C( 22), -INT8_C( 41), -INT8_C( 48), -INT8_C( 40), INT8_C( 60), -INT8_C( 82), INT8_C( 13), -INT8_C( 14) }, }, { { -INT8_C( 44), INT8_C( 89), INT8_C( 83), -INT8_C( 45), INT8_C( 22), -INT8_C( 29), -INT8_C( 91), INT8_C( 76), INT8_C( 24), INT8_C( 122), -INT8_C( 35), INT8_C( 12), -INT8_C( 91), -INT8_C( 40), -INT8_C( 64), -INT8_C( 85) }, }, { { -INT8_C( 58), INT8_C( 44), -INT8_C( 15), INT8_C( 115), -INT8_C( 91), INT8_C( 19), -INT8_C( 112), -INT8_C( 69), -INT8_C( 22), INT8_C( 97), -INT8_C( 109), INT8_C( 38), INT8_C( 15), -INT8_C( 95), INT8_C( 24), -INT8_C( 29) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_private a_ = simde_int8x16_to_private(a); simde_float64x2_t r = simde_vreinterpretq_f64_s8(a); simde_float64x2_private r_ = simde_float64x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s16_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; } test_vec[] = { { { -INT8_C( 120), -INT8_C( 31), -INT8_C( 33), -INT8_C( 41), INT8_C( 89), INT8_C( 93), -INT8_C( 91), -INT8_C( 75), -INT8_C( 105), INT8_C( 73), -INT8_C( 6), -INT8_C( 100), -INT8_C( 115), -INT8_C( 75), -INT8_C( 112), INT8_C( 36) }, }, { { -INT8_C( 81), -INT8_C( 38), -INT8_C( 58), INT8_C( 33), -INT8_C( 61), INT8_C( 54), INT8_C( 89), INT8_C( 86), -INT8_C( 9), -INT8_C( 91), -INT8_C( 108), -INT8_C( 53), INT8_C( 44), -INT8_C( 109), INT8_C( 118), -INT8_C( 75) }, }, { { INT8_C( 116), INT8_C( 85), -INT8_C( 116), -INT8_C( 50), -INT8_C( 77), INT8_C( 50), -INT8_C( 125), INT8_C( 74), INT8_C( 123), INT8_C( 126), -INT8_C( 26), INT8_C( 9), INT8_C( 51), INT8_C( 118), INT8_C( 45), -INT8_C( 30) }, }, { { INT8_C( 81), -INT8_C( 13), INT8_C( 3), INT8_C( 20), INT8_C( 41), INT8_C( 93), INT8_C( 106), INT8_C( 32), INT8_C( 2), -INT8_C( 1), -INT8_C( 21), INT8_C( 46), -INT8_C( 110), INT8_C( 97), -INT8_C( 29), INT8_C( 6) }, }, { { -INT8_C( 73), INT8_C( 112), -INT8_C( 44), INT8_C( 106), -INT8_C( 94), INT8_C( 88), -INT8_C( 76), INT8_C( 29), -INT8_C( 42), -INT8_C( 101), INT8_C( 38), INT8_C( 9), INT8_C( 17), INT8_C( 83), -INT8_C( 20), INT8_C( 98) }, }, { { INT8_C( 71), -INT8_C( 17), INT8_C( 118), INT8_C( 112), INT8_C( 76), -INT8_C( 31), -INT8_C( 111), INT8_C( 78), -INT8_C( 32), INT8_C( 124), INT8_C( 125), INT8_C( 114), -INT8_C( 34), INT8_C( 96), INT8_C( 120), -INT8_C( 107) }, }, { { -INT8_C( 48), INT8_C( 77), -INT8_C( 1), INT8_C( 114), -INT8_C( 91), -INT8_C( 77), -INT8_C( 112), INT8_C( 123), INT8_C( 78), -INT8_C( 74), -INT8_C( 124), INT8_C( 96), INT8_C( 10), INT8_C( 112), -INT8_C( 62), INT8_C( 81) }, }, { { INT8_C( 96), INT8_C( 57), -INT8_C( 63), -INT8_C( 84), INT8_C( 26), INT8_C( 82), -INT8_C( 5), -INT8_C( 6), -INT8_C( 49), INT8_C( 120), INT8_C( 108), -INT8_C( 83), -INT8_C( 40), -INT8_C( 28), INT8_C( 66), -INT8_C( 87) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_private a_ = simde_int8x16_to_private(a); simde_int16x8_t r = simde_vreinterpretq_s16_s8(a); simde_int16x8_private r_ = simde_int16x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s32_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; } test_vec[] = { { { -INT8_C( 26), -INT8_C( 70), INT8_C( 65), -INT8_C( 95), -INT8_C( 121), -INT8_C( 30), INT8_C( 64), -INT8_C( 26), INT8_C( 11), INT8_C( 80), INT8_C( 104), INT8_C( 24), -INT8_C( 39), INT8_C( 118), INT8_C( 71), INT8_C( 40) }, }, { { INT8_C( 86), INT8_C( 6), -INT8_C( 116), INT8_C( 44), INT8_C( 39), -INT8_C( 8), INT8_C( 58), INT8_C( 70), -INT8_C( 67), -INT8_C( 15), -INT8_C( 45), INT8_C( 124), INT8_C( 104), INT8_C( 2), INT8_C( 7), INT8_C( 78) }, }, { { -INT8_C( 67), INT8_C( 73), -INT8_C( 17), INT8_C( 68), INT8_C( 43), INT8_C( 48), INT8_C( 42), INT8_C( 54), INT8_MIN, -INT8_C( 110), INT8_C( 78), INT8_C( 90), INT8_C( 9), -INT8_C( 107), -INT8_C( 126), INT8_C( 95) }, }, { { -INT8_C( 100), INT8_C( 15), -INT8_C( 117), -INT8_C( 61), INT8_C( 7), -INT8_C( 59), INT8_C( 9), -INT8_C( 60), -INT8_C( 73), -INT8_C( 35), INT8_C( 64), INT8_C( 31), -INT8_C( 33), INT8_C( 72), INT8_C( 109), -INT8_C( 100) }, }, { { -INT8_C( 111), INT8_C( 92), -INT8_C( 32), -INT8_C( 68), -INT8_C( 116), INT8_C( 10), -INT8_C( 13), INT8_C( 13), -INT8_C( 99), INT8_C( 65), INT8_C( 103), -INT8_C( 90), -INT8_C( 41), -INT8_C( 23), INT8_C( 5), INT8_C( 115) }, }, { { -INT8_C( 8), -INT8_C( 112), INT8_C( 54), -INT8_C( 1), INT8_C( 85), INT8_C( 63), -INT8_C( 61), INT8_C( 12), INT8_C( 28), INT8_C( 4), INT8_C( 43), -INT8_C( 4), INT8_C( 76), -INT8_C( 104), -INT8_C( 104), -INT8_C( 35) }, }, { { -INT8_C( 11), INT8_C( 121), -INT8_C( 103), -INT8_C( 127), -INT8_C( 125), -INT8_C( 116), -INT8_C( 114), INT8_C( 32), -INT8_C( 50), -INT8_C( 11), -INT8_C( 58), -INT8_C( 91), -INT8_C( 33), -INT8_C( 53), INT8_C( 24), -INT8_C( 41) }, }, { { INT8_C( 91), INT8_C( 78), -INT8_C( 41), -INT8_C( 79), -INT8_C( 115), -INT8_C( 102), -INT8_C( 67), -INT8_C( 86), -INT8_C( 98), -INT8_C( 23), -INT8_C( 90), -INT8_C( 22), -INT8_C( 127), INT8_C( 62), -INT8_C( 57), INT8_C( 118) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_private a_ = simde_int8x16_to_private(a); simde_int32x4_t r = simde_vreinterpretq_s32_s8(a); simde_int32x4_private r_ = simde_int32x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s64_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; } test_vec[] = { { { INT8_C( 40), -INT8_C( 64), INT8_C( 94), INT8_C( 110), -INT8_C( 13), -INT8_C( 103), -INT8_C( 107), INT8_C( 8), -INT8_C( 65), -INT8_C( 53), -INT8_C( 27), -INT8_C( 10), INT8_C( 108), -INT8_C( 74), INT8_C( 4), INT8_C( 22) }, }, { { -INT8_C( 26), -INT8_C( 37), INT8_C( 97), -INT8_C( 37), -INT8_C( 111), INT8_C( 80), INT8_C( 10), -INT8_C( 66), -INT8_C( 125), -INT8_C( 101), INT8_C( 97), -INT8_C( 59), -INT8_C( 114), -INT8_C( 86), -INT8_C( 58), -INT8_C( 73) }, }, { { INT8_C( 106), INT8_C( 36), INT8_C( 37), INT8_C( 93), -INT8_C( 67), -INT8_C( 70), INT8_C( 102), INT8_C( 124), -INT8_C( 123), INT8_C( 75), INT8_C( 114), -INT8_C( 15), INT8_C( 1), INT8_C( 119), INT8_C( 8), -INT8_C( 25) }, }, { { INT8_C( 82), INT8_C( 105), -INT8_C( 62), -INT8_C( 29), -INT8_C( 70), -INT8_C( 51), -INT8_C( 95), INT8_C( 61), INT8_C( 104), INT8_C( 3), INT8_C( 2), -INT8_C( 10), -INT8_C( 83), -INT8_C( 56), -INT8_C( 83), INT8_C( 23) }, }, { { -INT8_C( 20), -INT8_C( 46), INT8_C( 117), -INT8_C( 86), -INT8_C( 116), -INT8_C( 37), INT8_C( 38), INT8_C( 17), INT8_C( 38), -INT8_C( 103), INT8_C( 3), INT8_C( 39), INT8_C( 16), INT8_C( 11), INT8_C( 15), INT8_C( 98) }, }, { { INT8_C( 116), -INT8_C( 47), INT8_C( 69), INT8_C( 46), -INT8_C( 98), -INT8_C( 25), INT8_C( 107), INT8_C( 6), -INT8_C( 22), INT8_C( 109), -INT8_C( 3), -INT8_C( 105), INT8_C( 53), -INT8_C( 86), -INT8_C( 82), INT8_C( 34) }, }, { { INT8_C( 125), INT8_C( 35), -INT8_C( 52), INT8_C( 9), -INT8_C( 2), -INT8_C( 14), INT8_C( 27), INT8_C( 36), -INT8_C( 117), INT8_C( 30), INT8_C( 76), -INT8_C( 101), INT8_C( 41), INT8_C( 91), -INT8_C( 3), -INT8_C( 99) }, }, { { INT8_C( 44), INT8_C( 67), -INT8_C( 52), -INT8_C( 53), INT8_C( 42), INT8_C( 55), -INT8_C( 47), INT8_C( 20), -INT8_C( 91), -INT8_C( 50), -INT8_C( 85), -INT8_C( 38), INT8_C( 121), INT8_C( 89), -INT8_C( 4), -INT8_C( 10) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_private a_ = simde_int8x16_to_private(a); simde_int64x2_t r = simde_vreinterpretq_s64_s8(a); simde_int64x2_private r_ = simde_int64x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u8_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; } test_vec[] = { { { -INT8_C( 47), -INT8_C( 102), -INT8_C( 31), INT8_C( 11), INT8_C( 32), -INT8_C( 41), -INT8_C( 85), -INT8_C( 54), INT8_C( 70), -INT8_C( 47), INT8_C( 16), -INT8_C( 62), INT8_C( 57), -INT8_C( 93), -INT8_C( 83), INT8_C( 62) }, }, { { -INT8_C( 12), INT8_C( 20), -INT8_C( 76), INT8_C( 35), -INT8_C( 2), -INT8_C( 107), -INT8_C( 1), -INT8_C( 124), INT8_C( 58), INT8_C( 57), -INT8_C( 54), -INT8_C( 62), INT8_C( 112), INT8_C( 79), INT8_C( 71), INT8_C( 65) }, }, { { -INT8_C( 23), INT8_C( 41), INT8_C( 76), INT8_C( 9), INT8_C( 0), -INT8_C( 9), -INT8_C( 45), INT8_C( 70), -INT8_C( 55), -INT8_C( 29), INT8_C( 8), INT8_C( 2), -INT8_C( 121), -INT8_C( 75), INT8_C( 64), INT8_C( 123) }, }, { { -INT8_C( 55), -INT8_C( 12), -INT8_C( 98), -INT8_C( 57), -INT8_C( 119), -INT8_C( 99), INT8_C( 76), -INT8_C( 60), -INT8_C( 41), INT8_C( 22), -INT8_C( 122), INT8_C( 71), INT8_C( 101), -INT8_C( 50), -INT8_C( 119), INT8_C( 78) }, }, { { -INT8_C( 9), -INT8_C( 43), INT8_C( 88), -INT8_C( 9), -INT8_C( 51), INT8_C( 43), INT8_C( 61), -INT8_C( 106), INT8_C( 15), INT8_C( 69), -INT8_C( 104), -INT8_C( 106), -INT8_C( 6), -INT8_C( 39), INT8_C( 17), -INT8_C( 61) }, }, { { -INT8_C( 51), -INT8_C( 81), -INT8_C( 118), INT8_C( 87), INT8_C( 77), -INT8_C( 42), INT8_C( 27), INT8_C( 36), -INT8_C( 19), -INT8_C( 95), INT8_C( 107), INT8_C( 82), INT8_C( 111), -INT8_C( 12), -INT8_C( 95), INT8_C( 102) }, }, { { -INT8_C( 54), -INT8_C( 7), INT8_C( 93), -INT8_C( 105), INT8_C( 36), -INT8_C( 102), INT8_C( 45), INT8_C( 51), -INT8_C( 33), -INT8_C( 59), -INT8_C( 55), -INT8_C( 39), -INT8_C( 98), -INT8_C( 38), -INT8_C( 100), INT8_C( 108) }, }, { { -INT8_C( 118), INT8_C( 39), -INT8_C( 61), -INT8_C( 41), -INT8_C( 3), -INT8_C( 34), -INT8_C( 5), -INT8_C( 22), INT8_MAX, INT8_C( 102), INT8_C( 61), -INT8_C( 17), INT8_C( 91), -INT8_C( 34), INT8_C( 85), INT8_C( 37) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_private a_ = simde_int8x16_to_private(a); simde_uint8x16_t r = simde_vreinterpretq_u8_s8(a); simde_uint8x16_private r_ = simde_uint8x16_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u16_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; } test_vec[] = { { { -INT8_C( 32), INT8_C( 116), INT8_C( 72), INT8_C( 76), INT8_C( 122), INT8_MAX, -INT8_C( 53), INT8_C( 119), -INT8_C( 39), INT8_C( 60), -INT8_C( 101), -INT8_C( 31), INT8_C( 40), INT8_C( 95), -INT8_C( 92), -INT8_C( 22) }, }, { { INT8_C( 41), INT8_C( 0), INT8_C( 1), INT8_C( 27), -INT8_C( 65), -INT8_C( 19), -INT8_C( 52), INT8_C( 119), INT8_C( 114), -INT8_C( 122), -INT8_C( 126), INT8_C( 22), -INT8_C( 64), -INT8_C( 26), INT8_C( 34), -INT8_C( 96) }, }, { { INT8_C( 90), INT8_C( 106), -INT8_C( 19), -INT8_C( 43), -INT8_C( 22), -INT8_C( 72), INT8_C( 76), -INT8_C( 61), -INT8_C( 11), -INT8_C( 25), -INT8_C( 92), INT8_C( 29), INT8_C( 70), INT8_C( 73), INT8_C( 7), INT8_C( 111) }, }, { { INT8_C( 73), INT8_C( 8), -INT8_C( 118), INT8_C( 8), -INT8_C( 11), INT8_C( 86), INT8_MAX, INT8_C( 104), -INT8_C( 36), INT8_C( 1), INT8_C( 126), -INT8_C( 99), -INT8_C( 25), -INT8_C( 95), INT8_C( 61), INT8_C( 66) }, }, { { INT8_C( 11), INT8_C( 42), INT8_C( 23), -INT8_C( 11), -INT8_C( 29), INT8_C( 99), -INT8_C( 72), -INT8_C( 40), INT8_C( 74), INT8_C( 93), -INT8_C( 11), -INT8_C( 111), -INT8_C( 90), -INT8_C( 4), INT8_C( 0), -INT8_C( 17) }, }, { { INT8_C( 4), -INT8_C( 117), -INT8_C( 9), -INT8_C( 7), -INT8_C( 31), INT8_C( 118), INT8_C( 97), -INT8_C( 66), INT8_C( 119), -INT8_C( 32), INT8_C( 91), INT8_C( 95), -INT8_C( 127), -INT8_C( 104), -INT8_C( 95), -INT8_C( 116) }, }, { { -INT8_C( 61), -INT8_C( 72), -INT8_C( 126), -INT8_C( 90), INT8_C( 27), INT8_C( 58), INT8_C( 126), INT8_C( 101), -INT8_C( 105), INT8_C( 115), -INT8_C( 10), INT8_C( 61), INT8_C( 111), -INT8_C( 9), INT8_C( 44), INT8_C( 115) }, }, { { -INT8_C( 126), INT8_C( 35), INT8_C( 108), INT8_C( 99), -INT8_C( 103), -INT8_C( 50), INT8_C( 33), INT8_C( 17), -INT8_C( 82), INT8_C( 124), INT8_C( 112), INT8_C( 47), INT8_C( 21), INT8_C( 17), -INT8_C( 69), -INT8_C( 40) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_private a_ = simde_int8x16_to_private(a); simde_uint16x8_t r = simde_vreinterpretq_u16_s8(a); simde_uint16x8_private r_ = simde_uint16x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u32_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; } test_vec[] = { { { -INT8_C( 76), -INT8_C( 67), -INT8_C( 11), INT8_C( 4), INT8_C( 27), -INT8_C( 63), -INT8_C( 79), -INT8_C( 10), -INT8_C( 12), -INT8_C( 102), INT8_C( 114), -INT8_C( 58), INT8_C( 1), -INT8_C( 93), -INT8_C( 72), INT8_C( 46) }, }, { { INT8_C( 100), INT8_C( 106), INT8_C( 0), INT8_C( 63), INT8_C( 60), -INT8_C( 12), INT8_C( 6), INT8_C( 24), INT8_C( 41), INT8_C( 105), -INT8_C( 29), INT8_C( 97), -INT8_C( 73), INT8_C( 105), INT8_C( 22), INT8_C( 108) }, }, { { INT8_C( 38), INT8_C( 11), INT8_C( 112), INT8_C( 65), -INT8_C( 52), INT8_C( 34), INT8_C( 55), -INT8_C( 64), -INT8_C( 68), -INT8_C( 86), -INT8_C( 122), -INT8_C( 66), INT8_C( 77), INT8_C( 62), -INT8_C( 20), -INT8_C( 79) }, }, { { -INT8_C( 88), -INT8_C( 20), -INT8_C( 16), -INT8_C( 28), -INT8_C( 32), -INT8_C( 10), -INT8_C( 4), INT8_C( 9), INT8_C( 96), -INT8_C( 33), INT8_C( 106), INT8_C( 23), INT8_C( 72), INT8_MIN, -INT8_C( 125), INT8_C( 110) }, }, { { -INT8_C( 117), -INT8_C( 12), -INT8_C( 80), INT8_C( 87), INT8_C( 22), -INT8_C( 25), INT8_C( 23), -INT8_C( 46), -INT8_C( 111), -INT8_C( 99), -INT8_C( 112), -INT8_C( 34), -INT8_C( 37), INT8_C( 125), -INT8_C( 112), -INT8_C( 125) }, }, { { INT8_C( 105), INT8_MIN, INT8_C( 103), INT8_C( 74), INT8_C( 119), INT8_C( 99), INT8_C( 83), -INT8_C( 41), INT8_C( 66), -INT8_C( 66), -INT8_C( 18), -INT8_C( 118), INT8_C( 62), INT8_C( 114), -INT8_C( 7), -INT8_C( 54) }, }, { { INT8_C( 102), -INT8_C( 87), INT8_C( 33), INT8_C( 124), -INT8_C( 112), INT8_C( 57), INT8_C( 78), INT8_C( 34), -INT8_C( 42), -INT8_C( 33), INT8_C( 0), -INT8_C( 78), INT8_C( 92), -INT8_C( 112), INT8_C( 53), -INT8_C( 59) }, }, { { INT8_C( 17), -INT8_C( 99), INT8_C( 15), -INT8_C( 120), INT8_C( 0), INT8_C( 99), INT8_C( 95), INT8_C( 67), INT8_C( 33), INT8_C( 77), -INT8_C( 51), INT8_C( 95), -INT8_C( 65), -INT8_C( 58), INT8_C( 41), INT8_C( 37) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_private a_ = simde_int8x16_to_private(a); simde_uint32x4_t r = simde_vreinterpretq_u32_s8(a); simde_uint32x4_private r_ = simde_uint32x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u64_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; } test_vec[] = { { { INT8_C( 3), INT8_C( 10), -INT8_C( 37), INT8_C( 49), INT8_C( 25), INT8_C( 54), -INT8_C( 41), -INT8_C( 110), -INT8_C( 34), -INT8_C( 112), INT8_C( 93), INT8_C( 12), -INT8_C( 122), INT8_C( 85), -INT8_C( 15), -INT8_C( 94) }, }, { { INT8_C( 81), -INT8_C( 62), INT8_C( 63), INT8_C( 114), -INT8_C( 14), INT8_C( 116), INT8_C( 118), -INT8_C( 98), -INT8_C( 118), INT8_C( 40), INT8_C( 49), INT8_C( 103), -INT8_C( 67), -INT8_C( 35), INT8_C( 31), -INT8_C( 64) }, }, { { -INT8_C( 25), -INT8_C( 6), -INT8_C( 15), INT8_C( 0), INT8_C( 49), -INT8_C( 56), -INT8_C( 109), INT8_C( 15), INT8_C( 88), -INT8_C( 16), INT8_C( 28), -INT8_C( 34), INT8_C( 69), INT8_C( 13), -INT8_C( 127), -INT8_C( 106) }, }, { { -INT8_C( 49), -INT8_C( 64), INT8_C( 8), -INT8_C( 63), INT8_C( 53), INT8_MAX, INT8_C( 96), -INT8_C( 65), -INT8_C( 89), -INT8_C( 111), INT8_C( 38), INT8_C( 100), INT8_C( 110), INT8_C( 69), INT8_C( 36), INT8_C( 85) }, }, { { INT8_C( 63), INT8_C( 21), INT8_C( 85), INT8_C( 112), -INT8_C( 35), -INT8_C( 24), INT8_MIN, INT8_C( 53), -INT8_C( 40), -INT8_C( 100), INT8_C( 19), INT8_C( 29), -INT8_C( 87), -INT8_C( 108), -INT8_C( 76), INT8_C( 120) }, }, { { INT8_C( 85), -INT8_C( 68), INT8_C( 57), -INT8_C( 118), INT8_C( 59), -INT8_C( 103), INT8_C( 73), -INT8_C( 30), INT8_C( 42), INT8_C( 111), INT8_C( 70), -INT8_C( 104), -INT8_C( 76), INT8_C( 106), -INT8_C( 19), -INT8_C( 13) }, }, { { INT8_MAX, INT8_C( 67), INT8_C( 100), INT8_C( 92), INT8_C( 43), -INT8_C( 28), -INT8_C( 111), INT8_C( 4), INT8_MIN, -INT8_C( 91), INT8_C( 33), INT8_C( 41), INT8_C( 57), -INT8_C( 43), -INT8_C( 95), -INT8_C( 114) }, }, { { -INT8_C( 110), -INT8_C( 38), INT8_C( 24), -INT8_C( 51), INT8_C( 116), INT8_C( 97), -INT8_C( 80), -INT8_C( 98), -INT8_C( 48), -INT8_C( 10), INT8_C( 55), -INT8_C( 124), INT8_C( 97), INT8_C( 36), INT8_C( 120), -INT8_C( 32) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_private a_ = simde_int8x16_to_private(a); simde_uint64x2_t r = simde_vreinterpretq_u64_s8(a); simde_uint64x2_private r_ = simde_uint64x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_f32_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; } test_vec[] = { { { -INT16_C( 27026), -INT16_C( 5702), -INT16_C( 32251), -INT16_C( 22224) }, }, { { INT16_C( 18627), -INT16_C( 18788), INT16_C( 28877), INT16_C( 229) }, }, { { -INT16_C( 22464), INT16_C( 10858), -INT16_C( 1223), -INT16_C( 1199) }, }, { { INT16_C( 17727), INT16_C( 2234), INT16_C( 24935), -INT16_C( 10779) }, }, { { -INT16_C( 24584), -INT16_C( 578), -INT16_C( 4575), -INT16_C( 6746) }, }, { { INT16_C( 16951), INT16_C( 1179), -INT16_C( 32590), -INT16_C( 3324) }, }, { { INT16_C( 28456), INT16_C( 24861), INT16_C( 28522), -INT16_C( 22180) }, }, { { INT16_C( 5812), INT16_C( 7090), -INT16_C( 26760), INT16_C( 28912) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_private a_ = simde_int16x4_to_private(a); simde_float32x2_t r = simde_vreinterpret_f32_s16(a); simde_float32x2_private r_ = simde_float32x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_f64_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; } test_vec[] = { { { -INT16_C( 19849), -INT16_C( 26238), INT16_C( 31751), INT16_C( 20490) }, }, { { INT16_C( 6833), INT16_C( 28856), -INT16_C( 28586), INT16_C( 25538) }, }, { { INT16_C( 10997), -INT16_C( 12172), INT16_C( 29916), INT16_C( 9088) }, }, { { -INT16_C( 332), -INT16_C( 390), -INT16_C( 25049), -INT16_C( 24659) }, }, { { INT16_C( 12368), INT16_C( 22584), INT16_C( 17068), INT16_C( 23976) }, }, { { INT16_C( 24669), -INT16_C( 19506), -INT16_C( 28432), -INT16_C( 6634) }, }, { { -INT16_C( 29765), -INT16_C( 26698), INT16_C( 14079), -INT16_C( 19526) }, }, { { INT16_C( 13364), INT16_C( 23474), INT16_C( 24530), INT16_C( 9210) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_private a_ = simde_int16x4_to_private(a); simde_float64x1_t r = simde_vreinterpret_f64_s16(a); simde_float64x1_private r_ = simde_float64x1_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s8_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; } test_vec[] = { { { INT16_C( 12325), -INT16_C( 26211), INT16_C( 30570), -INT16_C( 5405) }, }, { { -INT16_C( 15356), -INT16_C( 17829), INT16_C( 1543), INT16_C( 26527) }, }, { { -INT16_C( 21602), -INT16_C( 10929), -INT16_C( 5199), INT16_C( 544) }, }, { { -INT16_C( 22429), -INT16_C( 24560), INT16_C( 11069), INT16_C( 25227) }, }, { { INT16_C( 10331), -INT16_C( 14852), -INT16_C( 8289), -INT16_C( 23376) }, }, { { INT16_C( 2979), -INT16_C( 21666), -INT16_C( 750), -INT16_C( 20462) }, }, { { INT16_C( 25000), INT16_C( 22917), -INT16_C( 22964), -INT16_C( 20645) }, }, { { INT16_C( 27470), -INT16_C( 29872), -INT16_C( 9322), -INT16_C( 3602) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_private a_ = simde_int16x4_to_private(a); simde_int8x8_t r = simde_vreinterpret_s8_s16(a); simde_int8x8_private r_ = simde_int8x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s32_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; } test_vec[] = { { { -INT16_C( 12560), INT16_C( 27946), -INT16_C( 10148), INT16_C( 28958) }, }, { { -INT16_C( 22095), INT16_C( 22767), -INT16_C( 18596), INT16_C( 14570) }, }, { { -INT16_C( 21018), -INT16_C( 20461), -INT16_C( 11786), -INT16_C( 29585) }, }, { { -INT16_C( 11749), INT16_C( 8247), -INT16_C( 16711), -INT16_C( 21988) }, }, { { INT16_C( 18316), -INT16_C( 6121), INT16_C( 13599), -INT16_C( 12199) }, }, { { INT16_C( 18654), INT16_C( 14888), INT16_C( 4608), -INT16_C( 6541) }, }, { { -INT16_C( 31041), -INT16_C( 18793), INT16_C( 1623), INT16_C( 29250) }, }, { { INT16_C( 31193), -INT16_C( 28014), -INT16_C( 20681), -INT16_C( 15556) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_private a_ = simde_int16x4_to_private(a); simde_int32x2_t r = simde_vreinterpret_s32_s16(a); simde_int32x2_private r_ = simde_int32x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s64_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; } test_vec[] = { { { INT16_C( 25283), INT16_C( 6672), -INT16_C( 6113), -INT16_C( 12546) }, }, { { -INT16_C( 14626), INT16_C( 3187), INT16_C( 15843), INT16_C( 15972) }, }, { { -INT16_C( 2508), INT16_C( 29996), -INT16_C( 16259), -INT16_C( 15064) }, }, { { -INT16_C( 967), INT16_C( 5267), -INT16_C( 1862), INT16_C( 32247) }, }, { { INT16_C( 2138), INT16_C( 31127), -INT16_C( 27152), -INT16_C( 12473) }, }, { { -INT16_C( 17572), INT16_C( 16347), INT16_C( 16376), INT16_C( 11390) }, }, { { -INT16_C( 21962), -INT16_C( 19550), -INT16_C( 13718), -INT16_C( 23687) }, }, { { INT16_C( 3271), -INT16_C( 32329), -INT16_C( 20732), INT16_C( 24318) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_private a_ = simde_int16x4_to_private(a); simde_int64x1_t r = simde_vreinterpret_s64_s16(a); simde_int64x1_private r_ = simde_int64x1_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u8_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; } test_vec[] = { { { -INT16_C( 32486), -INT16_C( 2660), INT16_C( 7883), -INT16_C( 15352) }, }, { { -INT16_C( 12203), -INT16_C( 9918), INT16_C( 13929), -INT16_C( 4761) }, }, { { -INT16_C( 15816), INT16_C( 30875), INT16_C( 6606), -INT16_C( 29294) }, }, { { INT16_C( 23764), -INT16_C( 4001), INT16_C( 15786), -INT16_C( 15111) }, }, { { -INT16_C( 26946), -INT16_C( 30023), -INT16_C( 15948), INT16_C( 2382) }, }, { { -INT16_C( 28527), -INT16_C( 1053), INT16_C( 19142), -INT16_C( 24) }, }, { { -INT16_C( 31988), -INT16_C( 9353), INT16_C( 2461), INT16_C( 29032) }, }, { { -INT16_C( 14491), INT16_C( 3937), INT16_C( 23301), -INT16_C( 15404) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_private a_ = simde_int16x4_to_private(a); simde_uint8x8_t r = simde_vreinterpret_u8_s16(a); simde_uint8x8_private r_ = simde_uint8x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u16_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; } test_vec[] = { { { -INT16_C( 13698), -INT16_C( 2016), INT16_C( 7173), INT16_C( 8812) }, }, { { INT16_C( 14693), -INT16_C( 20452), -INT16_C( 11625), INT16_C( 2112) }, }, { { INT16_C( 12150), -INT16_C( 13614), INT16_C( 29200), -INT16_C( 12138) }, }, { { INT16_C( 5371), INT16_C( 14148), -INT16_C( 22854), INT16_C( 14568) }, }, { { INT16_C( 2160), INT16_C( 30256), -INT16_C( 25564), -INT16_C( 30312) }, }, { { -INT16_C( 18987), INT16_C( 27961), INT16_C( 31111), -INT16_C( 395) }, }, { { INT16_C( 18345), -INT16_C( 17976), INT16_C( 24249), -INT16_C( 19062) }, }, { { -INT16_C( 12685), INT16_C( 11756), -INT16_C( 11148), -INT16_C( 6811) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_private a_ = simde_int16x4_to_private(a); simde_uint16x4_t r = simde_vreinterpret_u16_s16(a); simde_uint16x4_private r_ = simde_uint16x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u32_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; } test_vec[] = { { { -INT16_C( 27149), -INT16_C( 16278), INT16_C( 23388), INT16_C( 23522) }, }, { { INT16_C( 27458), -INT16_C( 32273), INT16_C( 2040), -INT16_C( 22817) }, }, { { INT16_C( 19568), -INT16_C( 31411), -INT16_C( 28489), INT16_C( 18863) }, }, { { INT16_C( 32598), -INT16_C( 30228), INT16_C( 9375), -INT16_C( 28005) }, }, { { INT16_C( 1721), INT16_C( 5459), INT16_C( 13665), -INT16_C( 23695) }, }, { { INT16_C( 24737), -INT16_C( 26332), INT16_C( 1127), -INT16_C( 10433) }, }, { { -INT16_C( 29360), INT16_C( 1885), INT16_C( 3101), INT16_C( 29520) }, }, { { INT16_C( 15500), INT16_C( 11260), -INT16_C( 26528), INT16_C( 6845) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_private a_ = simde_int16x4_to_private(a); simde_uint32x2_t r = simde_vreinterpret_u32_s16(a); simde_uint32x2_private r_ = simde_uint32x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u64_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; } test_vec[] = { { { -INT16_C( 19422), INT16_C( 4112), -INT16_C( 6132), -INT16_C( 22247) }, }, { { INT16_C( 17482), INT16_C( 22232), INT16_C( 7090), INT16_C( 19055) }, }, { { INT16_C( 7162), -INT16_C( 12732), INT16_C( 25546), -INT16_C( 12846) }, }, { { INT16_C( 32293), INT16_C( 15899), -INT16_C( 27923), INT16_C( 4212) }, }, { { -INT16_C( 31674), INT16_C( 21024), INT16_C( 14700), -INT16_C( 18436) }, }, { { -INT16_C( 11139), INT16_C( 12045), INT16_C( 31983), -INT16_C( 5767) }, }, { { -INT16_C( 16745), INT16_C( 25016), -INT16_C( 30175), INT16_C( 17966) }, }, { { INT16_C( 18953), -INT16_C( 2428), -INT16_C( 1828), INT16_C( 8710) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_private a_ = simde_int16x4_to_private(a); simde_uint64x1_t r = simde_vreinterpret_u64_s16(a); simde_uint64x1_private r_ = simde_uint64x1_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_f32_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; } test_vec[] = { { { INT16_C( 26800), INT16_C( 6579), INT16_C( 6869), INT16_C( 1599), -INT16_C( 6376), -INT16_C( 21236), INT16_C( 31152), INT16_C( 26129) }, }, { { INT16_C( 4862), -INT16_C( 26934), -INT16_C( 17327), -INT16_C( 8418), INT16_C( 12000), -INT16_C( 10920), -INT16_C( 22979), -INT16_C( 4503) }, }, { { INT16_C( 7438), -INT16_C( 7417), INT16_C( 17975), INT16_C( 20713), -INT16_C( 2770), -INT16_C( 8451), INT16_C( 3694), INT16_C( 27972) }, }, { { INT16_C( 3616), INT16_C( 28931), INT16_C( 8650), -INT16_C( 21936), -INT16_C( 22449), -INT16_C( 29313), -INT16_C( 6066), INT16_C( 23675) }, }, { { -INT16_C( 32251), INT16_C( 15679), INT16_C( 10441), -INT16_C( 2163), -INT16_C( 30179), -INT16_C( 29483), INT16_C( 6552), -INT16_C( 18183) }, }, { { -INT16_C( 985), -INT16_C( 3798), INT16_C( 31261), INT16_C( 28059), INT16_C( 6691), INT16_C( 29178), INT16_C( 29954), INT16_C( 2254) }, }, { { INT16_C( 3575), -INT16_C( 16315), -INT16_C( 11722), INT16_C( 21431), -INT16_C( 29604), -INT16_C( 2849), -INT16_C( 10075), -INT16_C( 13140) }, }, { { -INT16_C( 10540), -INT16_C( 3395), INT16_C( 22609), INT16_C( 29791), INT16_C( 22898), INT16_C( 30181), -INT16_C( 19506), -INT16_C( 14979) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_private a_ = simde_int16x8_to_private(a); simde_float32x4_t r = simde_vreinterpretq_f32_s16(a); simde_float32x4_private r_ = simde_float32x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_f64_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; } test_vec[] = { { { -INT16_C( 29595), -INT16_C( 8184), INT16_C( 4671), INT16_C( 8346), -INT16_C( 13564), INT16_C( 18435), -INT16_C( 4990), INT16_C( 23480) }, }, { { -INT16_C( 21848), -INT16_C( 19785), -INT16_C( 22100), INT16_C( 15114), INT16_C( 15366), -INT16_C( 9798), INT16_C( 30430), INT16_C( 17186) }, }, { { INT16_C( 10754), INT16_C( 16675), -INT16_C( 16836), INT16_C( 16482), INT16_C( 25993), INT16_C( 3209), INT16_C( 16721), -INT16_C( 1433) }, }, { { INT16_C( 8171), -INT16_C( 26708), -INT16_C( 18744), -INT16_C( 12589), -INT16_C( 29198), -INT16_C( 12121), -INT16_C( 14077), INT16_C( 1300) }, }, { { INT16_C( 14323), INT16_C( 12103), -INT16_C( 22027), INT16_C( 32624), -INT16_C( 1778), INT16_C( 24715), -INT16_C( 3526), INT16_C( 9562) }, }, { { INT16_C( 1553), -INT16_C( 9796), -INT16_C( 28740), -INT16_C( 20569), INT16_C( 19997), INT16_C( 8319), -INT16_C( 27881), INT16_C( 2598) }, }, { { INT16_C( 28107), -INT16_C( 16326), -INT16_C( 21994), INT16_C( 9279), -INT16_C( 13661), -INT16_C( 8828), -INT16_C( 8515), -INT16_C( 12798) }, }, { { -INT16_C( 16668), -INT16_C( 24152), INT16_C( 20302), INT16_C( 27472), -INT16_C( 12386), -INT16_C( 19061), -INT16_C( 20125), INT16_C( 11968) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_private a_ = simde_int16x8_to_private(a); simde_float64x2_t r = simde_vreinterpretq_f64_s16(a); simde_float64x2_private r_ = simde_float64x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s8_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; } test_vec[] = { { { INT16_C( 25297), INT16_C( 2610), INT16_C( 31420), INT16_C( 20877), -INT16_C( 22947), -INT16_C( 6853), -INT16_C( 14640), INT16_C( 23706) }, }, { { INT16_C( 19927), INT16_C( 10263), INT16_C( 461), INT16_C( 18282), INT16_C( 15045), INT16_C( 19626), -INT16_C( 25667), -INT16_C( 28937) }, }, { { INT16_C( 11006), -INT16_C( 17768), INT16_C( 9636), INT16_C( 268), INT16_C( 18380), -INT16_C( 25370), -INT16_C( 32755), -INT16_C( 6920) }, }, { { INT16_C( 4045), -INT16_C( 25843), INT16_C( 30480), -INT16_C( 10526), -INT16_C( 29519), INT16_C( 28450), INT16_C( 6439), INT16_C( 9725) }, }, { { -INT16_C( 27069), -INT16_C( 6176), -INT16_C( 4933), -INT16_C( 30744), -INT16_C( 12493), INT16_C( 16676), INT16_C( 7247), INT16_C( 7461) }, }, { { INT16_C( 12844), INT16_C( 15544), -INT16_C( 25942), INT16_C( 23314), INT16_C( 13350), INT16_C( 19914), -INT16_C( 14258), -INT16_C( 28301) }, }, { { INT16_C( 21342), INT16_C( 6521), INT16_C( 24895), INT16_C( 29345), -INT16_C( 15056), -INT16_C( 32589), -INT16_C( 9759), INT16_C( 3485) }, }, { { INT16_C( 21771), -INT16_C( 19126), INT16_C( 23791), INT16_C( 5393), -INT16_C( 9327), -INT16_C( 8350), -INT16_C( 10845), INT16_C( 368) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_private a_ = simde_int16x8_to_private(a); simde_int8x16_t r = simde_vreinterpretq_s8_s16(a); simde_int8x16_private r_ = simde_int8x16_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s32_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; } test_vec[] = { { { INT16_C( 3755), -INT16_C( 28039), INT16_C( 32547), -INT16_C( 16414), INT16_C( 16253), INT16_C( 9054), -INT16_C( 6945), -INT16_C( 4111) }, }, { { -INT16_C( 24673), INT16_C( 14711), INT16_C( 11169), INT16_C( 31468), -INT16_C( 11576), -INT16_C( 26149), -INT16_C( 11247), -INT16_C( 17209) }, }, { { INT16_C( 16610), INT16_C( 1614), INT16_C( 12736), INT16_C( 15813), INT16_C( 9328), INT16_C( 20321), INT16_C( 21000), -INT16_C( 22465) }, }, { { -INT16_C( 18703), -INT16_C( 27935), -INT16_C( 12574), -INT16_C( 22004), -INT16_C( 6240), -INT16_C( 20156), INT16_C( 3003), -INT16_C( 25234) }, }, { { -INT16_C( 17332), INT16_C( 3235), INT16_C( 27117), INT16_C( 23881), -INT16_C( 21875), -INT16_C( 27219), -INT16_C( 4868), -INT16_C( 4803) }, }, { { INT16_C( 8098), -INT16_C( 31617), -INT16_C( 29715), -INT16_C( 29393), INT16_C( 29554), INT16_C( 11582), -INT16_C( 21378), -INT16_C( 13621) }, }, { { INT16_C( 28265), INT16_C( 22230), INT16_C( 8407), INT16_C( 25780), INT16_C( 25034), -INT16_C( 14342), INT16_C( 14157), -INT16_C( 4172) }, }, { { INT16_C( 13398), INT16_C( 17268), -INT16_C( 23617), INT16_C( 13008), INT16_C( 3862), -INT16_C( 27553), INT16_C( 10939), INT16_C( 9311) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_private a_ = simde_int16x8_to_private(a); simde_int32x4_t r = simde_vreinterpretq_s32_s16(a); simde_int32x4_private r_ = simde_int32x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s64_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; } test_vec[] = { { { INT16_C( 20294), -INT16_C( 29983), INT16_C( 20382), INT16_C( 509), INT16_C( 3811), INT16_C( 8932), INT16_C( 11235), -INT16_C( 11050) }, }, { { INT16_C( 12292), INT16_C( 26311), INT16_C( 3982), -INT16_C( 17091), -INT16_C( 25962), INT16_C( 3530), INT16_C( 13008), INT16_C( 5801) }, }, { { -INT16_C( 30079), INT16_C( 8352), -INT16_C( 25126), -INT16_C( 17119), INT16_C( 1451), -INT16_C( 28705), -INT16_C( 18896), INT16_C( 13411) }, }, { { INT16_C( 10982), INT16_C( 29850), -INT16_C( 10439), -INT16_C( 12494), -INT16_C( 910), INT16_C( 17117), -INT16_C( 31186), -INT16_C( 20648) }, }, { { -INT16_C( 1776), -INT16_C( 5425), -INT16_C( 3690), INT16_C( 17063), -INT16_C( 30730), INT16_C( 10193), INT16_C( 13373), INT16_C( 9051) }, }, { { -INT16_C( 2466), -INT16_C( 26728), -INT16_C( 13619), INT16_C( 16230), INT16_C( 17350), -INT16_C( 2943), -INT16_C( 9527), -INT16_C( 9565) }, }, { { INT16_C( 29651), INT16_C( 27076), INT16_C( 27748), INT16_C( 23211), INT16_C( 31987), INT16_C( 12417), -INT16_C( 8784), INT16_C( 3667) }, }, { { -INT16_C( 5165), -INT16_C( 24411), INT16_C( 3253), INT16_C( 31712), INT16_C( 24911), INT16_C( 6511), INT16_C( 4923), INT16_C( 3827) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_private a_ = simde_int16x8_to_private(a); simde_int64x2_t r = simde_vreinterpretq_s64_s16(a); simde_int64x2_private r_ = simde_int64x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u8_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; } test_vec[] = { { { -INT16_C( 10869), -INT16_C( 3074), -INT16_C( 14055), -INT16_C( 27820), -INT16_C( 19316), INT16_C( 10278), INT16_C( 16875), -INT16_C( 26815) }, }, { { INT16_C( 10309), -INT16_C( 521), -INT16_C( 22342), INT16_C( 24239), -INT16_C( 29301), INT16_C( 29629), INT16_C( 2522), INT16_C( 25958) }, }, { { INT16_C( 25822), -INT16_C( 1960), -INT16_C( 21203), -INT16_C( 18037), -INT16_C( 19871), INT16_C( 19682), INT16_C( 9203), INT16_C( 14564) }, }, { { -INT16_C( 9397), INT16_C( 1333), -INT16_C( 7037), INT16_C( 3939), INT16_C( 8305), INT16_C( 19330), -INT16_C( 6102), INT16_C( 2224) }, }, { { INT16_C( 2380), INT16_C( 30976), -INT16_C( 29514), INT16_C( 5939), INT16_C( 5438), INT16_C( 12644), INT16_C( 18488), -INT16_C( 31895) }, }, { { -INT16_C( 25053), -INT16_C( 22904), -INT16_C( 5246), -INT16_C( 3147), INT16_C( 14348), INT16_C( 13887), -INT16_C( 4320), INT16_C( 27966) }, }, { { INT16_C( 16376), -INT16_C( 20762), INT16_C( 6603), INT16_C( 2502), INT16_C( 10798), INT16_C( 26170), -INT16_C( 23694), -INT16_C( 27159) }, }, { { INT16_C( 28993), -INT16_C( 15557), -INT16_C( 3747), INT16_C( 27062), -INT16_C( 2775), INT16_C( 18847), -INT16_C( 8731), -INT16_C( 8778) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_private a_ = simde_int16x8_to_private(a); simde_uint8x16_t r = simde_vreinterpretq_u8_s16(a); simde_uint8x16_private r_ = simde_uint8x16_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u16_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; } test_vec[] = { { { -INT16_C( 2701), INT16_C( 26145), -INT16_C( 32458), -INT16_C( 9044), INT16_C( 23783), -INT16_C( 21411), INT16_C( 2922), INT16_C( 2234) }, }, { { -INT16_C( 386), INT16_C( 27701), -INT16_C( 15086), INT16_C( 23162), -INT16_C( 2118), -INT16_C( 6704), -INT16_C( 27867), -INT16_C( 26378) }, }, { { INT16_C( 6024), -INT16_C( 16642), -INT16_C( 21863), -INT16_C( 32614), -INT16_C( 2298), INT16_C( 28717), -INT16_C( 6398), -INT16_C( 32391) }, }, { { -INT16_C( 20763), -INT16_C( 2067), INT16_C( 26740), INT16_C( 11858), INT16_C( 8799), -INT16_C( 31724), INT16_C( 2742), INT16_C( 15900) }, }, { { INT16_C( 6690), -INT16_C( 17411), -INT16_C( 26684), -INT16_C( 13765), INT16_C( 26767), -INT16_C( 28358), -INT16_C( 19632), INT16_C( 13586) }, }, { { INT16_C( 98), -INT16_C( 10707), INT16_C( 32616), -INT16_C( 14588), INT16_C( 6305), INT16_C( 22347), INT16_C( 26403), INT16_C( 17814) }, }, { { -INT16_C( 27775), INT16_C( 17664), INT16_C( 15146), -INT16_C( 18161), INT16_C( 18852), -INT16_C( 2997), INT16_C( 24061), INT16_C( 24361) }, }, { { INT16_C( 22109), -INT16_C( 15051), INT16_C( 14805), INT16_C( 30604), -INT16_C( 10414), INT16_C( 30158), INT16_C( 25662), -INT16_C( 16454) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_private a_ = simde_int16x8_to_private(a); simde_uint16x8_t r = simde_vreinterpretq_u16_s16(a); simde_uint16x8_private r_ = simde_uint16x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u32_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; } test_vec[] = { { { -INT16_C( 21556), -INT16_C( 15853), INT16_C( 13986), -INT16_C( 10777), -INT16_C( 13968), -INT16_C( 19349), INT16_C( 19197), INT16_C( 747) }, }, { { INT16_C( 22270), INT16_C( 22312), -INT16_C( 26423), INT16_C( 13709), -INT16_C( 3407), INT16_C( 28470), -INT16_C( 11245), -INT16_C( 8339) }, }, { { -INT16_C( 32385), INT16_C( 8865), -INT16_C( 30281), INT16_C( 10231), INT16_C( 25170), INT16_C( 20444), -INT16_C( 14420), -INT16_C( 21935) }, }, { { INT16_C( 31262), -INT16_C( 6399), -INT16_C( 29166), -INT16_C( 15587), INT16_C( 21376), -INT16_C( 27598), -INT16_C( 24793), -INT16_C( 22669) }, }, { { INT16_C( 5408), -INT16_C( 10295), -INT16_C( 16226), -INT16_C( 3841), -INT16_C( 9438), -INT16_C( 12736), -INT16_C( 28254), -INT16_C( 16264) }, }, { { INT16_C( 30987), INT16_C( 7592), -INT16_C( 15096), -INT16_C( 30496), INT16_C( 4632), INT16_C( 16156), -INT16_C( 28494), -INT16_C( 11546) }, }, { { -INT16_C( 20571), INT16_C( 17322), -INT16_C( 22161), -INT16_C( 28365), INT16_C( 29572), INT16_C( 9823), -INT16_C( 10491), INT16_C( 4327) }, }, { { -INT16_C( 28847), INT16_C( 22830), INT16_C( 3668), INT16_C( 27873), -INT16_C( 479), -INT16_C( 11349), -INT16_C( 28018), INT16_C( 13221) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_private a_ = simde_int16x8_to_private(a); simde_uint32x4_t r = simde_vreinterpretq_u32_s16(a); simde_uint32x4_private r_ = simde_uint32x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u64_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; } test_vec[] = { { { -INT16_C( 20852), -INT16_C( 25483), INT16_C( 22713), -INT16_C( 29124), -INT16_C( 29678), -INT16_C( 23696), INT16_C( 9074), -INT16_C( 27927) }, }, { { -INT16_C( 21498), -INT16_C( 8110), INT16_C( 928), -INT16_C( 30088), INT16_C( 7327), INT16_C( 28745), INT16_C( 15894), -INT16_C( 23830) }, }, { { INT16_C( 24556), -INT16_C( 23234), INT16_C( 31671), -INT16_C( 13773), -INT16_C( 23801), INT16_C( 31085), INT16_C( 22214), -INT16_C( 13300) }, }, { { INT16_C( 24067), -INT16_C( 23636), INT16_C( 9313), INT16_C( 46), INT16_C( 30529), INT16_C( 22385), INT16_C( 23477), -INT16_C( 24071) }, }, { { INT16_C( 14523), INT16_C( 29254), INT16_C( 31155), -INT16_C( 17860), -INT16_C( 22244), -INT16_C( 7629), INT16_C( 16128), INT16_C( 942) }, }, { { INT16_C( 23453), -INT16_C( 90), -INT16_C( 11137), -INT16_C( 16129), INT16_C( 28748), INT16_C( 279), INT16_C( 4556), -INT16_C( 30813) }, }, { { -INT16_C( 5815), -INT16_C( 775), INT16_C( 13923), INT16_C( 32694), -INT16_C( 5665), -INT16_C( 8350), INT16_C( 4137), -INT16_C( 14622) }, }, { { -INT16_C( 30357), -INT16_C( 5179), -INT16_C( 15011), -INT16_C( 22101), -INT16_C( 15563), INT16_C( 427), INT16_C( 20180), INT16_C( 7560) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_private a_ = simde_int16x8_to_private(a); simde_uint64x2_t r = simde_vreinterpretq_u64_s16(a); simde_uint64x2_private r_ = simde_uint64x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_f32_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; } test_vec[] = { { { INT32_C( 1942756607), INT32_C( 1141729398) }, }, { { INT32_C( 1475419083), INT32_C( 1695451994) }, }, { { -INT32_C( 1030043290), -INT32_C( 1608484645) }, }, { { INT32_C( 563301726), INT32_C( 833684529) }, }, { { -INT32_C( 1767604960), -INT32_C( 1311067675) }, }, { { INT32_C( 638110924), -INT32_C( 1114892713) }, }, { { -INT32_C( 1149294881), -INT32_C( 61104226) }, }, { { INT32_C( 505278188), INT32_C( 323997683) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_private a_ = simde_int32x2_to_private(a); simde_float32x2_t r = simde_vreinterpret_f32_s32(a); simde_float32x2_private r_ = simde_float32x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_f64_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; } test_vec[] = { { { -INT32_C( 115647159), INT32_C( 1543473707) }, }, { { -INT32_C( 414762519), -INT32_C( 1092812093) }, }, { { -INT32_C( 524899849), -INT32_C( 1091006359) }, }, { { INT32_C( 1246431952), -INT32_C( 91088719) }, }, { { -INT32_C( 1577865867), INT32_C( 570225207) }, }, { { -INT32_C( 301448405), INT32_C( 1017963845) }, }, { { -INT32_C( 149134450), -INT32_C( 994765581) }, }, { { -INT32_C( 821166050), -INT32_C( 1899388904) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_private a_ = simde_int32x2_to_private(a); simde_float64x1_t r = simde_vreinterpret_f64_s32(a); simde_float64x1_private r_ = simde_float64x1_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s8_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; } test_vec[] = { { { -INT32_C( 1326109008), INT32_C( 1640452524) }, }, { { INT32_C( 708537460), -INT32_C( 1325793864) }, }, { { -INT32_C( 2091701145), INT32_C( 605343901) }, }, { { -INT32_C( 117296960), INT32_C( 536353390) }, }, { { -INT32_C( 187699640), -INT32_C( 1269459137) }, }, { { -INT32_C( 1143041789), -INT32_C( 177416050) }, }, { { -INT32_C( 1585922300), INT32_C( 1338346895) }, }, { { INT32_C( 726255549), INT32_C( 709509601) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_private a_ = simde_int32x2_to_private(a); simde_int8x8_t r = simde_vreinterpret_s8_s32(a); simde_int8x8_private r_ = simde_int8x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s16_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; } test_vec[] = { { { INT32_C( 762512963), INT32_C( 1517888677) }, }, { { INT32_C( 2109169374), INT32_C( 948011566) }, }, { { INT32_C( 354883301), -INT32_C( 206442941) }, }, { { INT32_C( 151649563), -INT32_C( 1321787282) }, }, { { -INT32_C( 874599898), -INT32_C( 1406838578) }, }, { { -INT32_C( 467018314), INT32_C( 1142729311) }, }, { { INT32_C( 72958912), INT32_C( 1308035890) }, }, { { INT32_C( 1985413128), INT32_C( 1176997408) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_private a_ = simde_int32x2_to_private(a); simde_int16x4_t r = simde_vreinterpret_s16_s32(a); simde_int16x4_private r_ = simde_int16x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s64_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; } test_vec[] = { { { -INT32_C( 1772159451), -INT32_C( 273705620) }, }, { { -INT32_C( 1746436879), -INT32_C( 1982011284) }, }, { { -INT32_C( 777181111), -INT32_C( 1931103660) }, }, { { -INT32_C( 1359005937), -INT32_C( 550167878) }, }, { { -INT32_C( 2072603624), INT32_C( 443819305) }, }, { { INT32_C( 246504353), -INT32_C( 1936224957) }, }, { { INT32_C( 157173173), INT32_C( 110511095) }, }, { { INT32_C( 884315514), -INT32_C( 938153297) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_private a_ = simde_int32x2_to_private(a); simde_int64x1_t r = simde_vreinterpret_s64_s32(a); simde_int64x1_private r_ = simde_int64x1_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u8_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; } test_vec[] = { { { -INT32_C( 995763866), -INT32_C( 546679899) }, }, { { INT32_C( 53825220), -INT32_C( 1775030743) }, }, { { INT32_C( 1938520045), -INT32_C( 1292661555) }, }, { { INT32_C( 417415971), -INT32_C( 1253875889) }, }, { { -INT32_C( 579147720), INT32_C( 12444732) }, }, { { INT32_C( 1526985266), INT32_C( 233911840) }, }, { { INT32_C( 2072018350), INT32_C( 741241609) }, }, { { INT32_C( 88412086), -INT32_C( 1480882065) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_private a_ = simde_int32x2_to_private(a); simde_uint8x8_t r = simde_vreinterpret_u8_s32(a); simde_uint8x8_private r_ = simde_uint8x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u16_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; } test_vec[] = { { { -INT32_C( 149455343), -INT32_C( 818642956) }, }, { { INT32_C( 2049044700), -INT32_C( 1172643403) }, }, { { -INT32_C( 1629876328), -INT32_C( 1237868355) }, }, { { -INT32_C( 1013774000), INT32_C( 406067718) }, }, { { -INT32_C( 1945154664), -INT32_C( 1487125301) }, }, { { -INT32_C( 383681228), -INT32_C( 157074594) }, }, { { INT32_C( 143949130), INT32_C( 1857997854) }, }, { { -INT32_C( 617524779), INT32_C( 66283115) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_private a_ = simde_int32x2_to_private(a); simde_uint16x4_t r = simde_vreinterpret_u16_s32(a); simde_uint16x4_private r_ = simde_uint16x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u32_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; } test_vec[] = { { { -INT32_C( 630456070), -INT32_C( 527501120) }, }, { { INT32_C( 283556507), INT32_C( 426914223) }, }, { { INT32_C( 981302759), -INT32_C( 427316278) }, }, { { INT32_C( 1365974946), INT32_C( 2124495236) }, }, { { -INT32_C( 111669959), -INT32_C( 1596332283) }, }, { { INT32_C( 1353760929), -INT32_C( 664198415) }, }, { { INT32_C( 1779623840), INT32_C( 894474642) }, }, { { INT32_C( 1015462840), INT32_C( 767240180) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_private a_ = simde_int32x2_to_private(a); simde_uint32x2_t r = simde_vreinterpret_u32_s32(a); simde_uint32x2_private r_ = simde_uint32x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u64_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; } test_vec[] = { { { -INT32_C( 895446699), INT32_C( 1960405026) }, }, { { INT32_C( 439627100), -INT32_C( 1280121677) }, }, { { -INT32_C( 41083772), INT32_C( 1619246791) }, }, { { -INT32_C( 1666539493), -INT32_C( 179584609) }, }, { { INT32_C( 1975511890), -INT32_C( 1326868140) }, }, { { INT32_C( 2043289030), -INT32_C( 2043839230) }, }, { { INT32_C( 1636022937), -INT32_C( 1816066440) }, }, { { INT32_C( 1160735654), INT32_C( 2134538796) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_private a_ = simde_int32x2_to_private(a); simde_uint64x1_t r = simde_vreinterpret_u64_s32(a); simde_uint64x1_private r_ = simde_uint64x1_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_f32_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; } test_vec[] = { { { -INT32_C( 337377293), INT32_C( 1436377999), INT32_C( 759437979), INT32_C( 598356123) }, }, { { -INT32_C( 1245575572), -INT32_C( 608506485), INT32_C( 1163288433), -INT32_C( 702091293) }, }, { { -INT32_C( 2034169097), INT32_C( 165371501), INT32_C( 339091576), -INT32_C( 1120411568) }, }, { { INT32_C( 1920137702), INT32_C( 1464675814), INT32_C( 1872536460), -INT32_C( 1975139437) }, }, { { INT32_C( 990906061), -INT32_C( 582685851), INT32_C( 1559329291), INT32_C( 1092167770) }, }, { { INT32_C( 129207073), INT32_C( 1163788472), INT32_C( 917830563), -INT32_C( 1950287170) }, }, { { INT32_C( 1707528192), -INT32_C( 951907652), -INT32_C( 551340924), INT32_C( 2116041821) }, }, { { -INT32_C( 2138647608), INT32_C( 1992680659), -INT32_C( 1649575201), INT32_C( 1948872052) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_private a_ = simde_int32x4_to_private(a); simde_float32x4_t r = simde_vreinterpretq_f32_s32(a); simde_float32x4_private r_ = simde_float32x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_f64_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; } test_vec[] = { { { INT32_C( 623130355), INT32_C( 1449239400), -INT32_C( 1128746035), -INT32_C( 1367149809) }, }, { { -INT32_C( 2072522775), -INT32_C( 1280826145), INT32_C( 2128924204), -INT32_C( 1000200240) }, }, { { -INT32_C( 957774242), -INT32_C( 149075415), INT32_C( 229889534), -INT32_C( 1296353591) }, }, { { -INT32_C( 449367547), -INT32_C( 2087133353), INT32_C( 2114026925), INT32_C( 37905316) }, }, { { INT32_C( 331951082), INT32_C( 1946871414), -INT32_C( 2071871813), -INT32_C( 113820428) }, }, { { -INT32_C( 958501265), -INT32_C( 95848627), -INT32_C( 1736946700), -INT32_C( 1734624594) }, }, { { INT32_C( 1554801894), INT32_C( 97564234), INT32_C( 1753895284), -INT32_C( 60636787) }, }, { { INT32_C( 2093105199), -INT32_C( 1418326857), INT32_C( 88403799), -INT32_C( 1885413463) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_private a_ = simde_int32x4_to_private(a); simde_float64x2_t r = simde_vreinterpretq_f64_s32(a); simde_float64x2_private r_ = simde_float64x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s8_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; } test_vec[] = { { { INT32_C( 1739808067), -INT32_C( 119151266), INT32_C( 1625226765), INT32_C( 376018564) }, }, { { INT32_C( 977616236), -INT32_C( 549451966), INT32_C( 391248997), -INT32_C( 1320634002) }, }, { { INT32_C( 1964636950), -INT32_C( 294781215), -INT32_C( 2125509636), INT32_C( 1301788897) }, }, { { INT32_C( 948493557), INT32_C( 1209518307), INT32_C( 845179076), INT32_C( 954443810) }, }, { { -INT32_C( 2052195164), -INT32_C( 143451397), INT32_C( 1182319205), INT32_C( 1888751738) }, }, { { -INT32_C( 794288916), -INT32_C( 1474773020), INT32_C( 1239054375), -INT32_C( 981287647) }, }, { { -INT32_C( 1253429318), -INT32_C( 1347633846), -INT32_C( 84531840), INT32_C( 560630325) }, }, { { -INT32_C( 1963912538), -INT32_C( 114160943), -INT32_C( 1555952510), -INT32_C( 2090285879) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_private a_ = simde_int32x4_to_private(a); simde_int8x16_t r = simde_vreinterpretq_s8_s32(a); simde_int8x16_private r_ = simde_int8x16_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s16_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; } test_vec[] = { { { -INT32_C( 305018345), -INT32_C( 408795924), INT32_C( 1280110211), INT32_C( 1591486194) }, }, { { -INT32_C( 614138690), INT32_C( 905999471), INT32_C( 1411049404), -INT32_C( 422389809) }, }, { { -INT32_C( 1915509855), INT32_C( 1802860008), INT32_C( 1454948708), -INT32_C( 1514892057) }, }, { { -INT32_C( 8382320), INT32_C( 1228243340), INT32_C( 933056616), -INT32_C( 937595097) }, }, { { -INT32_C( 78254061), -INT32_C( 916010395), INT32_C( 1931484811), INT32_C( 1125700786) }, }, { { INT32_C( 2034407917), -INT32_C( 2101184486), -INT32_C( 273063736), -INT32_C( 491268401) }, }, { { INT32_C( 735907014), INT32_C( 1660241111), INT32_C( 366286178), -INT32_C( 698814743) }, }, { { -INT32_C( 1588553081), -INT32_C( 635235822), INT32_C( 1120525426), INT32_C( 2015658162) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_private a_ = simde_int32x4_to_private(a); simde_int16x8_t r = simde_vreinterpretq_s16_s32(a); simde_int16x8_private r_ = simde_int16x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s64_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; } test_vec[] = { { { INT32_C( 912293310), INT32_C( 204558552), -INT32_C( 1953009825), -INT32_C( 2006297133) }, }, { { INT32_C( 1220891744), INT32_C( 2091956675), -INT32_C( 783740282), INT32_C( 1164829574) }, }, { { INT32_C( 947637600), INT32_C( 2084875293), -INT32_C( 301474789), -INT32_C( 1854443215) }, }, { { -INT32_C( 1982251834), INT32_C( 2080737782), INT32_C( 575491740), -INT32_C( 1838695886) }, }, { { -INT32_C( 1513364601), -INT32_C( 1423896689), INT32_C( 496577003), INT32_C( 1622020250) }, }, { { INT32_C( 1139378253), -INT32_C( 1379930351), INT32_C( 1875905853), INT32_C( 1325545415) }, }, { { -INT32_C( 1426797286), -INT32_C( 933947940), -INT32_C( 639242690), INT32_C( 1278841855) }, }, { { INT32_C( 764355355), INT32_C( 1339706898), INT32_C( 599763547), -INT32_C( 59588126) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_private a_ = simde_int32x4_to_private(a); simde_int64x2_t r = simde_vreinterpretq_s64_s32(a); simde_int64x2_private r_ = simde_int64x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u8_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; } test_vec[] = { { { -INT32_C( 689895481), -INT32_C( 1859332607), -INT32_C( 1547664472), -INT32_C( 1320526320) }, }, { { INT32_C( 689737540), INT32_C( 2062555401), -INT32_C( 448572611), INT32_C( 340183628) }, }, { { -INT32_C( 756406063), -INT32_C( 1503389954), -INT32_C( 1438047079), -INT32_C( 950299518) }, }, { { INT32_C( 753956643), -INT32_C( 844701552), -INT32_C( 2135693005), -INT32_C( 2070611533) }, }, { { INT32_C( 525827617), INT32_C( 784776085), INT32_C( 1641549791), -INT32_C( 970443613) }, }, { { INT32_C( 1022498987), INT32_C( 738826489), INT32_C( 900512898), -INT32_C( 675659594) }, }, { { INT32_C( 1425478079), -INT32_C( 1417495092), INT32_C( 1879858124), INT32_C( 976631183) }, }, { { INT32_C( 1182148941), INT32_C( 1131643073), -INT32_C( 226943172), INT32_C( 533345120) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_private a_ = simde_int32x4_to_private(a); simde_uint8x16_t r = simde_vreinterpretq_u8_s32(a); simde_uint8x16_private r_ = simde_uint8x16_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u16_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; } test_vec[] = { { { -INT32_C( 1719971275), INT32_C( 1718980383), INT32_C( 225699668), INT32_C( 2059234434) }, }, { { INT32_C( 356859972), -INT32_C( 729828136), -INT32_C( 1784676808), INT32_C( 1301308951) }, }, { { -INT32_C( 656012359), -INT32_C( 297902950), -INT32_C( 956518077), INT32_C( 1631631645) }, }, { { -INT32_C( 780695815), INT32_C( 1923479098), INT32_C( 185091316), INT32_C( 1683527851) }, }, { { INT32_C( 1044135844), -INT32_C( 567444837), INT32_C( 1218717995), -INT32_C( 609622558) }, }, { { -INT32_C( 1515446165), INT32_C( 169300502), INT32_C( 1091968918), INT32_C( 1554345656) }, }, { { INT32_C( 1218109869), -INT32_C( 2044213413), -INT32_C( 758199312), INT32_C( 464353456) }, }, { { -INT32_C( 1346348392), INT32_C( 1119475884), -INT32_C( 1350250505), -INT32_C( 351590082) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_private a_ = simde_int32x4_to_private(a); simde_uint16x8_t r = simde_vreinterpretq_u16_s32(a); simde_uint16x8_private r_ = simde_uint16x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u32_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; } test_vec[] = { { { -INT32_C( 1711092359), INT32_C( 1337709846), -INT32_C( 858119927), -INT32_C( 1460755540) }, }, { { INT32_C( 490966487), INT32_C( 167270093), -INT32_C( 89510034), INT32_C( 150734735) }, }, { { -INT32_C( 358416684), -INT32_C( 583442989), INT32_C( 732500863), -INT32_C( 1915512906) }, }, { { -INT32_C( 223668444), -INT32_C( 587488403), INT32_C( 1658234323), -INT32_C( 2140417364) }, }, { { -INT32_C( 1536553520), -INT32_C( 360602517), INT32_C( 1846880951), -INT32_C( 419698238) }, }, { { INT32_C( 1842914816), INT32_C( 491377482), INT32_C( 629088377), -INT32_C( 1012471054) }, }, { { INT32_C( 1667698936), INT32_C( 1817045172), -INT32_C( 723885550), INT32_C( 1270601035) }, }, { { -INT32_C( 960916612), -INT32_C( 521993625), INT32_C( 352674338), INT32_C( 1171827533) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_private a_ = simde_int32x4_to_private(a); simde_uint32x4_t r = simde_vreinterpretq_u32_s32(a); simde_uint32x4_private r_ = simde_uint32x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u64_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; } test_vec[] = { { { -INT32_C( 836308430), -INT32_C( 20823114), -INT32_C( 1341228176), -INT32_C( 1296687314) }, }, { { -INT32_C( 1443363026), -INT32_C( 1323579037), -INT32_C( 865858227), -INT32_C( 196110654) }, }, { { INT32_C( 1069774217), INT32_C( 691897784), INT32_C( 785992448), -INT32_C( 1730048150) }, }, { { -INT32_C( 113124970), -INT32_C( 257270622), INT32_C( 750521962), INT32_C( 757140388) }, }, { { INT32_C( 963437697), INT32_C( 1768073577), INT32_C( 1586969588), INT32_C( 1626765514) }, }, { { -INT32_C( 195479471), -INT32_C( 18611308), -INT32_C( 1238654959), INT32_C( 769871020) }, }, { { -INT32_C( 1721348048), -INT32_C( 301807111), -INT32_C( 817063676), INT32_C( 1664041746) }, }, { { INT32_C( 257395067), -INT32_C( 1643234420), -INT32_C( 2007746084), -INT32_C( 1246414971) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_private a_ = simde_int32x4_to_private(a); simde_uint64x2_t r = simde_vreinterpretq_u64_s32(a); simde_uint64x2_private r_ = simde_uint64x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_f32_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[1]; } test_vec[] = { { { INT64_C( 6507368899876856448) }, }, { { -INT64_C( 5606110019210035735) }, }, { { INT64_C( 5538191326012830769) }, }, { { -INT64_C( 3913861957691473674) }, }, { { -INT64_C( 93860817958845234) }, }, { { INT64_C( 3899560943408114888) }, }, { { -INT64_C( 3174423257829653766) }, }, { { -INT64_C( 3548350518083791698) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_private a_ = simde_int64x1_to_private(a); simde_float32x2_t r = simde_vreinterpret_f32_s64(a); simde_float32x2_private r_ = simde_float32x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_f64_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[1]; } test_vec[] = { { { INT64_C( 3937832030626636539) }, }, { { -INT64_C( 1574507081372615631) }, }, { { INT64_C( 9162081502117003319) }, }, { { -INT64_C( 7690620279898524097) }, }, { { INT64_C( 2009877472910240848) }, }, { { -INT64_C( 4012408675839158720) }, }, { { -INT64_C( 6006878943899013745) }, }, { { -INT64_C( 7193353285132165379) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_private a_ = simde_int64x1_to_private(a); simde_float64x1_t r = simde_vreinterpret_f64_s64(a); simde_float64x1_private r_ = simde_float64x1_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s8_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[1]; } test_vec[] = { { { INT64_C( 6328377202146038951) }, }, { { INT64_C( 3619715714540190062) }, }, { { INT64_C( 9155081246081012001) }, }, { { -INT64_C( 2351362546501486091) }, }, { { INT64_C( 5583888352263927452) }, }, { { -INT64_C( 9177717705129095379) }, }, { { INT64_C( 8675559626524729859) }, }, { { -INT64_C( 1740451813690375232) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_private a_ = simde_int64x1_to_private(a); simde_int8x8_t r = simde_vreinterpret_s8_s64(a); simde_int8x8_private r_ = simde_int8x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s16_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[1]; } test_vec[] = { { { -INT64_C( 3168462482490462822) }, }, { { -INT64_C( 1219849104769322211) }, }, { { INT64_C( 4676971831165581582) }, }, { { -INT64_C( 740107272915334304) }, }, { { INT64_C( 3784936939007031946) }, }, { { INT64_C( 8422026410167249983) }, }, { { INT64_C( 6151963219461210886) }, }, { { -INT64_C( 5096855216589139212) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_private a_ = simde_int64x1_to_private(a); simde_int16x4_t r = simde_vreinterpret_s16_s64(a); simde_int16x4_private r_ = simde_int16x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s32_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[1]; } test_vec[] = { { { -INT64_C( 7114466497633344375) }, }, { { -INT64_C( 982042139150972563) }, }, { { INT64_C( 6098053910720327655) }, }, { { INT64_C( 6897172487366335938) }, }, { { -INT64_C( 6892269724810748847) }, }, { { -INT64_C( 4064317378932651322) }, }, { { -INT64_C( 6313913748120124845) }, }, { { INT64_C( 8073265418032673237) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_private a_ = simde_int64x1_to_private(a); simde_int32x2_t r = simde_vreinterpret_s32_s64(a); simde_int32x2_private r_ = simde_int32x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u8_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[1]; } test_vec[] = { { { -INT64_C( 7859406109924154012) }, }, { { INT64_C( 8049504884380889405) }, }, { { INT64_C( 9100164763748731086) }, }, { { INT64_C( 4674145817308398404) }, }, { { INT64_C( 5901648540621360268) }, }, { { -INT64_C( 7509089612126145969) }, }, { { INT64_C( 1982760133211688802) }, }, { { INT64_C( 201577274414764208) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_private a_ = simde_int64x1_to_private(a); simde_uint8x8_t r = simde_vreinterpret_u8_s64(a); simde_uint8x8_private r_ = simde_uint8x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u16_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[1]; } test_vec[] = { { { -INT64_C( 9087886216328385184) }, }, { { -INT64_C( 5531611665732542435) }, }, { { INT64_C( 1909430669626039079) }, }, { { INT64_C( 3587155385237390099) }, }, { { -INT64_C( 3444164426624836849) }, }, { { -INT64_C( 5120178736133780034) }, }, { { -INT64_C( 540421074411963309) }, }, { { -INT64_C( 1778925326933183155) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_private a_ = simde_int64x1_to_private(a); simde_uint16x4_t r = simde_vreinterpret_u16_s64(a); simde_uint16x4_private r_ = simde_uint16x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u32_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[1]; } test_vec[] = { { { -INT64_C( 1016733158405359620) }, }, { { -INT64_C( 2089216970221742623) }, }, { { -INT64_C( 5254723309699434506) }, }, { { INT64_C( 1763286918682342951) }, }, { { INT64_C( 1382685565763607467) }, }, { { -INT64_C( 397112793860774050) }, }, { { -INT64_C( 8982532413052755037) }, }, { { -INT64_C( 7111970021676646888) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_private a_ = simde_int64x1_to_private(a); simde_uint32x2_t r = simde_vreinterpret_u32_s64(a); simde_uint32x2_private r_ = simde_uint32x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u64_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[1]; } test_vec[] = { { { -INT64_C( 5582127008471794557) }, }, { { INT64_C( 7674704104200227240) }, }, { { -INT64_C( 3567127553411135658) }, }, { { INT64_C( 1415740588118772255) }, }, { { -INT64_C( 8703974978045111948) }, }, { { -INT64_C( 6291395285151757427) }, }, { { INT64_C( 4499629426767960896) }, }, { { -INT64_C( 8461345510972825873) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_private a_ = simde_int64x1_to_private(a); simde_uint64x1_t r = simde_vreinterpret_u64_s64(a); simde_uint64x1_private r_ = simde_uint64x1_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_f32_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; } test_vec[] = { { { -INT64_C( 7111391188455227457), -INT64_C( 3254191077044569329) }, }, { { -INT64_C( 1650031472652397181), -INT64_C( 309481479475721661) }, }, { { INT64_C( 5693002100524252415), -INT64_C( 3378233746908740393) }, }, { { -INT64_C( 6847697544965884688), -INT64_C( 4550811824021290590) }, }, { { -INT64_C( 7507740545089526719), INT64_C( 2218023018207253417) }, }, { { -INT64_C( 9191243350082271628), -INT64_C( 8581830885716983195) }, }, { { -INT64_C( 2760187395494638758), INT64_C( 664167231916119275) }, }, { { -INT64_C( 1243752674535709601), -INT64_C( 6115893445395211696) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_private a_ = simde_int64x2_to_private(a); simde_float32x4_t r = simde_vreinterpretq_f32_s64(a); simde_float32x4_private r_ = simde_float32x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_f64_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; } test_vec[] = { { { INT64_C( 8185646939337236695), INT64_C( 2688673206683960774) }, }, { { -INT64_C( 2164464113465532920), -INT64_C( 4581510018376765077) }, }, { { INT64_C( 7623174301693993172), INT64_C( 7721143809931890169) }, }, { { -INT64_C( 6296687322834995615), INT64_C( 7549887383541079837) }, }, { { INT64_C( 1153919958250496759), INT64_C( 1848510735015130585) }, }, { { INT64_C( 2744226032286850352), -INT64_C( 3616685348569136941) }, }, { { -INT64_C( 775628366577954795), -INT64_C( 5119791744112570848) }, }, { { INT64_C( 1037017840322201653), -INT64_C( 7341356102690958982) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_private a_ = simde_int64x2_to_private(a); simde_float64x2_t r = simde_vreinterpretq_f64_s64(a); simde_float64x2_private r_ = simde_float64x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s8_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; } test_vec[] = { { { -INT64_C( 5478452947858036060), INT64_C( 3027308422447858654) }, }, { { INT64_C( 5308556797850632463), -INT64_C( 8026805502122097337) }, }, { { INT64_C( 6106164720098399992), -INT64_C( 9071691710817547528) }, }, { { INT64_C( 699266036727281954), -INT64_C( 7616685799744106289) }, }, { { INT64_C( 3870946397987588590), -INT64_C( 7450009907100281433) }, }, { { INT64_C( 6125940608990329656), -INT64_C( 8683961970705989561) }, }, { { INT64_C( 2349370996918262823), INT64_C( 3983389888589053753) }, }, { { INT64_C( 5183137398324808211), -INT64_C( 8803225149115662156) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_private a_ = simde_int64x2_to_private(a); simde_int8x16_t r = simde_vreinterpretq_s8_s64(a); simde_int8x16_private r_ = simde_int8x16_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s16_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; } test_vec[] = { { { INT64_C( 7635004779189560581), -INT64_C( 7876646184904941736) }, }, { { INT64_C( 1169612658643635266), INT64_C( 6731539315348642040) }, }, { { -INT64_C( 4616652067830273248), INT64_C( 509575005423484026) }, }, { { -INT64_C( 7493878042357771413), INT64_C( 5310470915378310909) }, }, { { -INT64_C( 1285850860918728448), -INT64_C( 6861936389400897798) }, }, { { INT64_C( 5912259103237926868), -INT64_C( 3679060685047253378) }, }, { { INT64_C( 8706685679711862898), -INT64_C( 7491460063933611470) }, }, { { -INT64_C( 6372003961623504661), INT64_C( 353322092022675790) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_private a_ = simde_int64x2_to_private(a); simde_int16x8_t r = simde_vreinterpretq_s16_s64(a); simde_int16x8_private r_ = simde_int16x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s32_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; } test_vec[] = { { { INT64_C( 1900666753600127530), -INT64_C( 3669406538694196815) }, }, { { INT64_C( 274910012553665461), INT64_C( 4245955537745086196) }, }, { { -INT64_C( 2441454381005625427), INT64_C( 2105070435390701879) }, }, { { INT64_C( 5514684512187493202), INT64_C( 5073867488594489119) }, }, { { -INT64_C( 5930561804499730522), INT64_C( 6823035952845641003) }, }, { { INT64_C( 3502229382316161526), INT64_C( 5997326498957077850) }, }, { { INT64_C( 1885559016628126983), -INT64_C( 9108145918797070545) }, }, { { INT64_C( 4798702945091746902), -INT64_C( 4028627368431650792) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_private a_ = simde_int64x2_to_private(a); simde_int32x4_t r = simde_vreinterpretq_s32_s64(a); simde_int32x4_private r_ = simde_int32x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u8_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; } test_vec[] = { { { -INT64_C( 3917874066034605442), -INT64_C( 1949064800907001111) }, }, { { INT64_C( 546159014590848953), INT64_C( 396535052162892911) }, }, { { -INT64_C( 7089481973961799031), -INT64_C( 7610395224178797157) }, }, { { -INT64_C( 4984901720799776239), INT64_C( 5450384428367346454) }, }, { { -INT64_C( 3403951431896589199), -INT64_C( 4357881682333843182) }, }, { { INT64_C( 3218210159869166500), -INT64_C( 1627648328125984676) }, }, { { INT64_C( 6309491700457769098), INT64_C( 3785499309450386962) }, }, { { -INT64_C( 3165524243452323889), -INT64_C( 3098938020335461220) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_private a_ = simde_int64x2_to_private(a); simde_uint8x16_t r = simde_vreinterpretq_u8_s64(a); simde_uint8x16_private r_ = simde_uint8x16_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u16_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; } test_vec[] = { { { INT64_C( 2688746253838422578), INT64_C( 6716962639529351604) }, }, { { -INT64_C( 7712284921796853919), INT64_C( 2773637878869444097) }, }, { { -INT64_C( 4818748208638480254), -INT64_C( 6250232908632565769) }, }, { { INT64_C( 7747105512504161589), -INT64_C( 8565703520504441083) }, }, { { -INT64_C( 9009270136648749774), INT64_C( 7164569695744980859) }, }, { { -INT64_C( 543294039713008481), INT64_C( 3421630694511770347) }, }, { { INT64_C( 2255567448309366847), -INT64_C( 42415447632333351) }, }, { { INT64_C( 8930496842383866627), INT64_C( 3613551510055412217) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_private a_ = simde_int64x2_to_private(a); simde_uint16x8_t r = simde_vreinterpretq_u16_s64(a); simde_uint16x8_private r_ = simde_uint16x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u32_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; } test_vec[] = { { { -INT64_C( 1402027193207326413), -INT64_C( 2166409925850784429) }, }, { { INT64_C( 5097930609605478085), -INT64_C( 2295136319892064806) }, }, { { -INT64_C( 1648147493708327294), INT64_C( 846785885976717368) }, }, { { INT64_C( 3932184930325291114), -INT64_C( 2814545987810144925) }, }, { { -INT64_C( 3184276263924858800), INT64_C( 1254095984874570518) }, }, { { -INT64_C( 1881399073800678063), -INT64_C( 9193845543012043206) }, }, { { -INT64_C( 5092386302282271258), -INT64_C( 3949520022139500936) }, }, { { -INT64_C( 5514276790526899183), INT64_C( 4287616322038361808) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_private a_ = simde_int64x2_to_private(a); simde_uint32x4_t r = simde_vreinterpretq_u32_s64(a); simde_uint32x4_private r_ = simde_uint32x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u64_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; } test_vec[] = { { { -INT64_C( 7785086221317720197), -INT64_C( 3333025110275361121) }, }, { { INT64_C( 557425576608416606), INT64_C( 2865286261606638736) }, }, { { -INT64_C( 4239772750377888703), -INT64_C( 3177635380408631869) }, }, { { INT64_C( 5524890609138883593), INT64_C( 6201906476984207934) }, }, { { INT64_C( 8779790245809580525), -INT64_C( 5973333706829537326) }, }, { { INT64_C( 5825966948614322385), INT64_C( 3887533678598467464) }, }, { { -INT64_C( 4186904048721928523), -INT64_C( 5093246922952772609) }, }, { { INT64_C( 3044468128483812318), INT64_C( 8884400049866898126) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_private a_ = simde_int64x2_to_private(a); simde_uint64x2_t r = simde_vreinterpretq_u64_s64(a); simde_uint64x2_private r_ = simde_uint64x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_f32_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; } test_vec[] = { { { UINT8_C(112), UINT8_C(213), UINT8_C(254), UINT8_C(209), UINT8_C( 81), UINT8_C(219), UINT8_C(117), UINT8_C(223) }, }, { { UINT8_C(124), UINT8_C(231), UINT8_C(194), UINT8_C(124), UINT8_C(243), UINT8_C( 26), UINT8_C(211), UINT8_C(153) }, }, { { UINT8_C(234), UINT8_C( 24), UINT8_C(169), UINT8_C(251), UINT8_C( 31), UINT8_C(217), UINT8_C(225), UINT8_C(202) }, }, { { UINT8_C( 55), UINT8_C(239), UINT8_C( 94), UINT8_C(148), UINT8_C(180), UINT8_C(176), UINT8_C( 71), UINT8_C( 36) }, }, { { UINT8_C(133), UINT8_C( 70), UINT8_C(245), UINT8_C(214), UINT8_C( 33), UINT8_C(106), UINT8_C(181), UINT8_C(157) }, }, { { UINT8_C( 81), UINT8_C(119), UINT8_C( 26), UINT8_C( 68), UINT8_C(145), UINT8_C(237), UINT8_C(222), UINT8_C(123) }, }, { { UINT8_C( 6), UINT8_C(135), UINT8_C(118), UINT8_C( 37), UINT8_C( 96), UINT8_C( 87), UINT8_C(240), UINT8_C(151) }, }, { { UINT8_C( 70), UINT8_C( 78), UINT8_C( 43), UINT8_C(250), UINT8_C(254), UINT8_C(115), UINT8_C( 30), UINT8_C(131) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_private a_ = simde_uint8x8_to_private(a); simde_float32x2_t r = simde_vreinterpret_f32_u8(a); simde_float32x2_private r_ = simde_float32x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_f64_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; } test_vec[] = { { { UINT8_C( 51), UINT8_C( 43), UINT8_C(203), UINT8_C(240), UINT8_C(137), UINT8_C( 63), UINT8_C(125), UINT8_C( 11) }, }, { { UINT8_C( 45), UINT8_C( 94), UINT8_C( 82), UINT8_C(205), UINT8_C(174), UINT8_C( 96), UINT8_C(128), UINT8_C( 2) }, }, { { UINT8_C( 53), UINT8_C(179), UINT8_C(197), UINT8_C( 89), UINT8_C(162), UINT8_C( 51), UINT8_C( 97), UINT8_C(235) }, }, { { UINT8_C(108), UINT8_C( 15), UINT8_C(237), UINT8_C(107), UINT8_C( 76), UINT8_C(137), UINT8_C( 47), UINT8_C(128) }, }, { { UINT8_C(180), UINT8_C(250), UINT8_C(112), UINT8_C( 61), UINT8_C( 58), UINT8_C(237), UINT8_C( 73), UINT8_C(103) }, }, { { UINT8_C( 76), UINT8_C(155), UINT8_C( 52), UINT8_C(250), UINT8_C(251), UINT8_C(180), UINT8_C(252), UINT8_C( 49) }, }, { { UINT8_C(104), UINT8_C(194), UINT8_C(138), UINT8_C( 10), UINT8_C(245), UINT8_C(235), UINT8_C(246), UINT8_C( 97) }, }, { { UINT8_C(250), UINT8_C(227), UINT8_C(205), UINT8_C( 71), UINT8_C(108), UINT8_C(252), UINT8_C(199), UINT8_C( 32) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_private a_ = simde_uint8x8_to_private(a); simde_float64x1_t r = simde_vreinterpret_f64_u8(a); simde_float64x1_private r_ = simde_float64x1_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s8_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; } test_vec[] = { { { UINT8_C(147), UINT8_C(243), UINT8_C( 65), UINT8_C(112), UINT8_C(131), UINT8_C( 66), UINT8_C( 5), UINT8_C(171) }, }, { { UINT8_C( 45), UINT8_C(204), UINT8_C(175), UINT8_C(204), UINT8_C(142), UINT8_C(231), UINT8_C(239), UINT8_C(114) }, }, { { UINT8_C(166), UINT8_C(159), UINT8_C(214), UINT8_C( 13), UINT8_C(153), UINT8_C( 94), UINT8_C( 63), UINT8_C(236) }, }, { { UINT8_C(218), UINT8_C( 50), UINT8_C( 76), UINT8_C(184), UINT8_C( 58), UINT8_C(231), UINT8_C( 21), UINT8_C(205) }, }, { { UINT8_C(218), UINT8_C( 87), UINT8_C( 62), UINT8_C( 93), UINT8_C(153), UINT8_C( 67), UINT8_C( 8), UINT8_C(198) }, }, { { UINT8_C( 15), UINT8_C(183), UINT8_C(147), UINT8_C(157), UINT8_C(159), UINT8_C(130), UINT8_C( 15), UINT8_C( 69) }, }, { { UINT8_C( 34), UINT8_C(229), UINT8_C( 82), UINT8_C(187), UINT8_C( 67), UINT8_C(146), UINT8_C(168), UINT8_C( 29) }, }, { { UINT8_C(196), UINT8_C(244), UINT8_C(214), UINT8_C(254), UINT8_C(219), UINT8_C(235), UINT8_C(204), UINT8_C(182) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_private a_ = simde_uint8x8_to_private(a); simde_int8x8_t r = simde_vreinterpret_s8_u8(a); simde_int8x8_private r_ = simde_int8x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s16_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; } test_vec[] = { { { UINT8_C(167), UINT8_C( 27), UINT8_C( 14), UINT8_C(245), UINT8_C(156), UINT8_C(104), UINT8_C(159), UINT8_C(125) }, }, { { UINT8_C(167), UINT8_C( 59), UINT8_C(147), UINT8_C(116), UINT8_C(103), UINT8_C( 23), UINT8_C(182), UINT8_C( 28) }, }, { { UINT8_C( 55), UINT8_C(112), UINT8_C(154), UINT8_C(182), UINT8_C(106), UINT8_C(171), UINT8_C(127), UINT8_C(159) }, }, { { UINT8_C( 56), UINT8_C(206), UINT8_C(241), UINT8_C( 68), UINT8_C(100), UINT8_C( 28), UINT8_C( 38), UINT8_C( 11) }, }, { { UINT8_C( 55), UINT8_C( 52), UINT8_C( 0), UINT8_C(211), UINT8_C(157), UINT8_C(159), UINT8_C( 80), UINT8_C( 68) }, }, { { UINT8_C(219), UINT8_C(227), UINT8_C(184), UINT8_C( 66), UINT8_C(251), UINT8_C(111), UINT8_C( 94), UINT8_C( 50) }, }, { { UINT8_C(223), UINT8_C(248), UINT8_C(232), UINT8_C( 73), UINT8_C(163), UINT8_C(104), UINT8_C(232), UINT8_C(219) }, }, { { UINT8_C( 54), UINT8_C(218), UINT8_C( 32), UINT8_C(155), UINT8_C(246), UINT8_C( 70), UINT8_C(166), UINT8_C( 45) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_private a_ = simde_uint8x8_to_private(a); simde_int16x4_t r = simde_vreinterpret_s16_u8(a); simde_int16x4_private r_ = simde_int16x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s32_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; } test_vec[] = { { { UINT8_C(226), UINT8_C(243), UINT8_C( 11), UINT8_C( 58), UINT8_C(188), UINT8_C(180), UINT8_C(226), UINT8_C( 20) }, }, { { UINT8_C(127), UINT8_C(241), UINT8_C(231), UINT8_C( 32), UINT8_C(223), UINT8_C(190), UINT8_C( 44), UINT8_C(190) }, }, { { UINT8_C( 90), UINT8_C(254), UINT8_C(207), UINT8_C( 71), UINT8_C(211), UINT8_C(206), UINT8_C( 43), UINT8_C(172) }, }, { { UINT8_C(184), UINT8_C(201), UINT8_C( 96), UINT8_C(102), UINT8_C( 4), UINT8_C(196), UINT8_C( 2), UINT8_C(231) }, }, { { UINT8_C(183), UINT8_C( 14), UINT8_C( 33), UINT8_C(116), UINT8_C(194), UINT8_C( 4), UINT8_C(136), UINT8_C( 65) }, }, { { UINT8_C(245), UINT8_C(111), UINT8_C( 97), UINT8_C(212), UINT8_C( 46), UINT8_C(141), UINT8_C(146), UINT8_C(136) }, }, { { UINT8_C(139), UINT8_C( 97), UINT8_C(208), UINT8_C( 95), UINT8_C( 47), UINT8_C(251), UINT8_C( 11), UINT8_C(231) }, }, { { UINT8_C(197), UINT8_C(108), UINT8_C( 77), UINT8_C(201), UINT8_C( 48), UINT8_C( 79), UINT8_C(176), UINT8_C(231) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_private a_ = simde_uint8x8_to_private(a); simde_int32x2_t r = simde_vreinterpret_s32_u8(a); simde_int32x2_private r_ = simde_int32x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s64_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; } test_vec[] = { { { UINT8_C(128), UINT8_C( 94), UINT8_C(134), UINT8_C(227), UINT8_C(176), UINT8_C(188), UINT8_C(228), UINT8_C(209) }, }, { { UINT8_C(168), UINT8_C(199), UINT8_C(161), UINT8_C( 80), UINT8_C(249), UINT8_C( 54), UINT8_C( 47), UINT8_C( 39) }, }, { { UINT8_C(186), UINT8_C(119), UINT8_C(105), UINT8_C(148), UINT8_C( 32), UINT8_C( 60), UINT8_C( 67), UINT8_C(249) }, }, { { UINT8_C(230), UINT8_C( 73), UINT8_C( 95), UINT8_C(201), UINT8_C(179), UINT8_C(210), UINT8_C(243), UINT8_C( 51) }, }, { { UINT8_C( 48), UINT8_C(121), UINT8_C( 22), UINT8_C(225), UINT8_C( 53), UINT8_C(250), UINT8_C(178), UINT8_C(222) }, }, { { UINT8_C(193), UINT8_C( 83), UINT8_C( 46), UINT8_C(187), UINT8_C(138), UINT8_C( 93), UINT8_C(226), UINT8_C( 68) }, }, { { UINT8_C(212), UINT8_C( 75), UINT8_C(216), UINT8_C(244), UINT8_C(135), UINT8_C( 27), UINT8_C(238), UINT8_C(110) }, }, { { UINT8_C(101), UINT8_C( 77), UINT8_C( 55), UINT8_C( 24), UINT8_C( 31), UINT8_C( 42), UINT8_C( 75), UINT8_C( 79) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_private a_ = simde_uint8x8_to_private(a); simde_int64x1_t r = simde_vreinterpret_s64_u8(a); simde_int64x1_private r_ = simde_int64x1_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u16_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; } test_vec[] = { { { UINT8_C( 36), UINT8_C(221), UINT8_C( 60), UINT8_C(229), UINT8_C( 49), UINT8_C(244), UINT8_C(111), UINT8_C(188) }, }, { { UINT8_C(160), UINT8_C(192), UINT8_C( 80), UINT8_C( 43), UINT8_C(104), UINT8_C(116), UINT8_C(107), UINT8_C( 44) }, }, { { UINT8_C(222), UINT8_C(125), UINT8_C( 68), UINT8_C( 25), UINT8_C(191), UINT8_C(146), UINT8_C(106), UINT8_C(110) }, }, { { UINT8_C(218), UINT8_C( 61), UINT8_C(146), UINT8_C(203), UINT8_C( 45), UINT8_C( 95), UINT8_C( 11), UINT8_C( 81) }, }, { { UINT8_C( 61), UINT8_C( 72), UINT8_C( 55), UINT8_C(110), UINT8_C( 60), UINT8_C(166), UINT8_C( 42), UINT8_C(220) }, }, { { UINT8_C(102), UINT8_C(122), UINT8_C( 7), UINT8_C(206), UINT8_C(239), UINT8_C(115), UINT8_C(251), UINT8_C(205) }, }, { { UINT8_C(240), UINT8_C( 63), UINT8_C(230), UINT8_C(176), UINT8_C(209), UINT8_C( 80), UINT8_C( 30), UINT8_C(171) }, }, { { UINT8_C(141), UINT8_C(177), UINT8_C(118), UINT8_C(186), UINT8_C( 16), UINT8_C(130), UINT8_C( 12), UINT8_C( 77) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_private a_ = simde_uint8x8_to_private(a); simde_uint16x4_t r = simde_vreinterpret_u16_u8(a); simde_uint16x4_private r_ = simde_uint16x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u32_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; } test_vec[] = { { { UINT8_C(186), UINT8_C( 4), UINT8_C( 77), UINT8_C( 72), UINT8_C(170), UINT8_C( 40), UINT8_C(217), UINT8_C(200) }, }, { { UINT8_C( 11), UINT8_C(193), UINT8_C( 28), UINT8_C( 12), UINT8_C(113), UINT8_C(159), UINT8_C( 50), UINT8_C(171) }, }, { { UINT8_C( 1), UINT8_C(131), UINT8_C(236), UINT8_C(170), UINT8_C(169), UINT8_C(148), UINT8_C(243), UINT8_C(225) }, }, { { UINT8_C(201), UINT8_C(223), UINT8_C(238), UINT8_C(192), UINT8_C( 25), UINT8_C( 88), UINT8_C(105), UINT8_C(211) }, }, { { UINT8_C( 92), UINT8_C(182), UINT8_C( 27), UINT8_C( 7), UINT8_C(222), UINT8_C(244), UINT8_C(207), UINT8_C(234) }, }, { { UINT8_C(181), UINT8_C(236), UINT8_C(246), UINT8_C( 39), UINT8_C(139), UINT8_C( 40), UINT8_C(210), UINT8_C(140) }, }, { { UINT8_C(172), UINT8_C(190), UINT8_C( 54), UINT8_C( 85), UINT8_C( 82), UINT8_C( 41), UINT8_C( 54), UINT8_C( 27) }, }, { { UINT8_C( 8), UINT8_C( 36), UINT8_C(219), UINT8_C( 33), UINT8_C(125), UINT8_C( 68), UINT8_C(244), UINT8_C(217) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_private a_ = simde_uint8x8_to_private(a); simde_uint32x2_t r = simde_vreinterpret_u32_u8(a); simde_uint32x2_private r_ = simde_uint32x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u64_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; } test_vec[] = { { { UINT8_C(147), UINT8_C( 20), UINT8_C( 14), UINT8_C(234), UINT8_C(152), UINT8_C(134), UINT8_C( 74), UINT8_C(158) }, }, { { UINT8_C(207), UINT8_C( 18), UINT8_C( 79), UINT8_C( 23), UINT8_C(206), UINT8_C( 14), UINT8_C(245), UINT8_C(245) }, }, { { UINT8_C(157), UINT8_C( 76), UINT8_C(191), UINT8_C(160), UINT8_C(210), UINT8_C(250), UINT8_C(214), UINT8_C( 44) }, }, { { UINT8_C(171), UINT8_C( 35), UINT8_C( 36), UINT8_C(253), UINT8_C( 89), UINT8_C( 67), UINT8_C(191), UINT8_C(237) }, }, { { UINT8_C( 88), UINT8_C(206), UINT8_C(215), UINT8_C(240), UINT8_C( 84), UINT8_C( 34), UINT8_C(142), UINT8_C( 35) }, }, { { UINT8_C( 52), UINT8_C(222), UINT8_C( 59), UINT8_C( 2), UINT8_C(236), UINT8_C( 48), UINT8_C(247), UINT8_C(137) }, }, { { UINT8_C(124), UINT8_C(182), UINT8_C( 41), UINT8_C( 78), UINT8_C(176), UINT8_C( 0), UINT8_C(122), UINT8_C( 91) }, }, { { UINT8_C( 35), UINT8_C(158), UINT8_C( 88), UINT8_C(124), UINT8_C(226), UINT8_C( 24), UINT8_C(105), UINT8_C( 58) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_private a_ = simde_uint8x8_to_private(a); simde_uint64x1_t r = simde_vreinterpret_u64_u8(a); simde_uint64x1_private r_ = simde_uint64x1_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_f32_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; } test_vec[] = { { { UINT8_C( 74), UINT8_C( 19), UINT8_C( 69), UINT8_C( 21), UINT8_C( 71), UINT8_C( 59), UINT8_C(199), UINT8_C(127), UINT8_C( 82), UINT8_C(179), UINT8_C(169), UINT8_C(199), UINT8_C( 92), UINT8_C( 94), UINT8_C( 56), UINT8_C(138) }, }, { { UINT8_C(242), UINT8_C(208), UINT8_C( 27), UINT8_C( 42), UINT8_MAX, UINT8_C(136), UINT8_C( 16), UINT8_C(131), UINT8_C( 2), UINT8_C( 11), UINT8_C( 23), UINT8_C(252), UINT8_C(182), UINT8_C(246), UINT8_C(201), UINT8_C( 0) }, }, { { UINT8_C( 10), UINT8_C( 14), UINT8_C( 22), UINT8_C( 81), UINT8_C( 73), UINT8_C(221), UINT8_C(208), UINT8_C(156), UINT8_C(144), UINT8_C(121), UINT8_C( 99), UINT8_C(236), UINT8_C(215), UINT8_C(155), UINT8_C(118), UINT8_C(202) }, }, { { UINT8_C(108), UINT8_C(145), UINT8_C(244), UINT8_C(107), UINT8_C( 25), UINT8_C( 4), UINT8_C(239), UINT8_C( 28), UINT8_C( 15), UINT8_C( 6), UINT8_C( 24), UINT8_C(197), UINT8_C(253), UINT8_C(225), UINT8_C(198), UINT8_C( 7) }, }, { { UINT8_C(240), UINT8_C(220), UINT8_C( 88), UINT8_C( 57), UINT8_C(185), UINT8_C( 40), UINT8_C(213), UINT8_C( 73), UINT8_C(161), UINT8_C( 56), UINT8_C( 53), UINT8_C(120), UINT8_C(212), UINT8_C(172), UINT8_C( 66), UINT8_C( 64) }, }, { { UINT8_C( 61), UINT8_C( 54), UINT8_C(171), UINT8_C( 87), UINT8_C( 58), UINT8_C(154), UINT8_C(115), UINT8_C( 74), UINT8_C(161), UINT8_C(139), UINT8_C( 15), UINT8_C(158), UINT8_C(108), UINT8_C(213), UINT8_C(165), UINT8_C( 92) }, }, { { UINT8_C(177), UINT8_C(253), UINT8_C(150), UINT8_C(106), UINT8_C( 37), UINT8_C(107), UINT8_C(179), UINT8_C(198), UINT8_C(164), UINT8_C(233), UINT8_C( 62), UINT8_C(120), UINT8_C(149), UINT8_C(129), UINT8_C(184), UINT8_C(210) }, }, { { UINT8_C(183), UINT8_C( 99), UINT8_C( 41), UINT8_C(242), UINT8_C(254), UINT8_C(156), UINT8_C( 60), UINT8_C(159), UINT8_C( 39), UINT8_C( 75), UINT8_C( 61), UINT8_C(148), UINT8_C( 33), UINT8_C(226), UINT8_C(240), UINT8_C(210) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_private a_ = simde_uint8x16_to_private(a); simde_float32x4_t r = simde_vreinterpretq_f32_u8(a); simde_float32x4_private r_ = simde_float32x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_f64_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; } test_vec[] = { { { UINT8_C( 87), UINT8_C(122), UINT8_C(239), UINT8_C(208), UINT8_C(235), UINT8_C( 38), UINT8_C(133), UINT8_C(157), UINT8_C( 51), UINT8_C(250), UINT8_C( 84), UINT8_C( 64), UINT8_C( 33), UINT8_C(196), UINT8_C(113), UINT8_C(243) }, }, { { UINT8_C(192), UINT8_C(103), UINT8_C( 78), UINT8_C(158), UINT8_C( 75), UINT8_C(154), UINT8_C(226), UINT8_C( 23), UINT8_C(169), UINT8_C( 97), UINT8_C( 50), UINT8_C( 85), UINT8_C(198), UINT8_C(111), UINT8_C( 46), UINT8_C( 29) }, }, { { UINT8_C(233), UINT8_C( 29), UINT8_C(238), UINT8_C(212), UINT8_C( 67), UINT8_C(115), UINT8_C(113), UINT8_C(118), UINT8_C(109), UINT8_C(197), UINT8_C(182), UINT8_C(142), UINT8_C(137), UINT8_C( 40), UINT8_C(129), UINT8_C( 73) }, }, { { UINT8_C(143), UINT8_C(208), UINT8_C(231), UINT8_C(219), UINT8_C(106), UINT8_C(201), UINT8_C(242), UINT8_C( 20), UINT8_C( 43), UINT8_C( 36), UINT8_C(105), UINT8_C(241), UINT8_C(147), UINT8_C(151), UINT8_C( 14), UINT8_C(124) }, }, { { UINT8_C(180), UINT8_C(252), UINT8_C( 80), UINT8_C(248), UINT8_C(112), UINT8_C(193), UINT8_C(110), UINT8_C(221), UINT8_C(135), UINT8_C( 37), UINT8_C(108), UINT8_C( 16), UINT8_C( 77), UINT8_C(237), UINT8_C( 90), UINT8_C(220) }, }, { { UINT8_C(189), UINT8_C( 65), UINT8_C(183), UINT8_C( 40), UINT8_C( 11), UINT8_C(169), UINT8_C( 60), UINT8_C( 54), UINT8_C(205), UINT8_C(165), UINT8_C( 39), UINT8_C( 96), UINT8_C( 60), UINT8_C( 53), UINT8_C(220), UINT8_C(240) }, }, { { UINT8_C( 50), UINT8_C( 44), UINT8_C(232), UINT8_C(162), UINT8_C(238), UINT8_C( 87), UINT8_C(127), UINT8_C(117), UINT8_C(124), UINT8_C(235), UINT8_C(133), UINT8_C(201), UINT8_C(217), UINT8_C(223), UINT8_C(165), UINT8_C(150) }, }, { { UINT8_C( 33), UINT8_C( 93), UINT8_C(190), UINT8_C( 44), UINT8_C( 6), UINT8_C(250), UINT8_C( 98), UINT8_C(212), UINT8_C(159), UINT8_C(137), UINT8_C( 52), UINT8_C(219), UINT8_C(190), UINT8_C( 17), UINT8_C(204), UINT8_C(240) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_private a_ = simde_uint8x16_to_private(a); simde_float64x2_t r = simde_vreinterpretq_f64_u8(a); simde_float64x2_private r_ = simde_float64x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s8_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; } test_vec[] = { { { UINT8_C(186), UINT8_C( 15), UINT8_C(251), UINT8_C( 19), UINT8_C(208), UINT8_C( 70), UINT8_C(122), UINT8_C(225), UINT8_C( 58), UINT8_C(175), UINT8_C( 82), UINT8_C(136), UINT8_C(204), UINT8_C(209), UINT8_C( 23), UINT8_C(150) }, }, { { UINT8_C(222), UINT8_C( 56), UINT8_C(218), UINT8_C( 62), UINT8_C( 18), UINT8_C(198), UINT8_C(207), UINT8_C(180), UINT8_C( 84), UINT8_C(208), UINT8_C( 98), UINT8_C(169), UINT8_C(186), UINT8_C(173), UINT8_C( 76), UINT8_C(116) }, }, { { UINT8_C(188), UINT8_C( 71), UINT8_C(136), UINT8_C(140), UINT8_C(142), UINT8_C( 2), UINT8_C(110), UINT8_C(200), UINT8_C(177), UINT8_C(192), UINT8_C( 80), UINT8_C(125), UINT8_C(145), UINT8_C(103), UINT8_C( 19), UINT8_C(111) }, }, { { UINT8_C(160), UINT8_C(238), UINT8_C(173), UINT8_C(178), UINT8_C(180), UINT8_C(124), UINT8_C(102), UINT8_C( 9), UINT8_C( 76), UINT8_C(200), UINT8_C(178), UINT8_C( 6), UINT8_C(117), UINT8_C(254), UINT8_C(123), UINT8_C( 49) }, }, { { UINT8_C( 70), UINT8_C( 3), UINT8_C(190), UINT8_C(212), UINT8_C( 5), UINT8_C( 44), UINT8_C(156), UINT8_C(182), UINT8_C(236), UINT8_C(236), UINT8_C( 51), UINT8_C(125), UINT8_C( 83), UINT8_C( 70), UINT8_C(236), UINT8_C(243) }, }, { { UINT8_C( 52), UINT8_C(153), UINT8_C(165), UINT8_C(233), UINT8_C( 22), UINT8_C( 12), UINT8_C(242), UINT8_C( 98), UINT8_C(212), UINT8_C(164), UINT8_C(105), UINT8_C( 74), UINT8_C(163), UINT8_C(228), UINT8_C(123), UINT8_C(233) }, }, { { UINT8_C(231), UINT8_C( 57), UINT8_C(189), UINT8_C(236), UINT8_C(101), UINT8_C( 89), UINT8_C(162), UINT8_C( 81), UINT8_C( 69), UINT8_C(213), UINT8_C(206), UINT8_C(152), UINT8_C( 27), UINT8_C(186), UINT8_C(140), UINT8_C( 80) }, }, { { UINT8_C( 84), UINT8_C( 49), UINT8_C( 57), UINT8_C(106), UINT8_C( 61), UINT8_C( 43), UINT8_C(204), UINT8_C( 18), UINT8_C(207), UINT8_C( 53), UINT8_C( 92), UINT8_C(114), UINT8_C( 25), UINT8_C(215), UINT8_C( 91), UINT8_C( 0) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_private a_ = simde_uint8x16_to_private(a); simde_int8x16_t r = simde_vreinterpretq_s8_u8(a); simde_int8x16_private r_ = simde_int8x16_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s16_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; } test_vec[] = { { { UINT8_C(221), UINT8_C( 34), UINT8_C( 9), UINT8_C( 16), UINT8_C( 32), UINT8_C(148), UINT8_C( 20), UINT8_C(161), UINT8_C( 71), UINT8_C(215), UINT8_C(169), UINT8_C(199), UINT8_C(183), UINT8_C( 29), UINT8_C(178), UINT8_C( 97) }, }, { { UINT8_C(126), UINT8_C(133), UINT8_C(121), UINT8_C( 62), UINT8_C(164), UINT8_C( 32), UINT8_C(194), UINT8_C(193), UINT8_C( 79), UINT8_C( 90), UINT8_C(204), UINT8_C( 21), UINT8_C(238), UINT8_C(210), UINT8_C( 9), UINT8_C(203) }, }, { { UINT8_C(244), UINT8_C( 19), UINT8_C(220), UINT8_C( 20), UINT8_C(167), UINT8_C(240), UINT8_C(181), UINT8_C(239), UINT8_C(199), UINT8_C( 94), UINT8_C(182), UINT8_C(126), UINT8_C(124), UINT8_C(104), UINT8_C(223), UINT8_C(250) }, }, { { UINT8_C(237), UINT8_C( 89), UINT8_C( 56), UINT8_C(145), UINT8_C(121), UINT8_C(250), UINT8_C( 83), UINT8_C(200), UINT8_C( 84), UINT8_C( 31), UINT8_C(222), UINT8_C( 66), UINT8_C(241), UINT8_C(231), UINT8_C( 14), UINT8_C(229) }, }, { { UINT8_C(250), UINT8_C(234), UINT8_C(249), UINT8_C(162), UINT8_C(218), UINT8_C(175), UINT8_C(145), UINT8_C(162), UINT8_C( 13), UINT8_C( 71), UINT8_C( 32), UINT8_C(137), UINT8_C(176), UINT8_C( 0), UINT8_C(132), UINT8_C(157) }, }, { { UINT8_C( 89), UINT8_C(188), UINT8_C( 47), UINT8_C(210), UINT8_C(183), UINT8_C(130), UINT8_C(155), UINT8_C( 11), UINT8_C(161), UINT8_C(121), UINT8_C( 78), UINT8_C(147), UINT8_C( 96), UINT8_C( 92), UINT8_C(120), UINT8_C( 91) }, }, { { UINT8_C( 70), UINT8_C(114), UINT8_C(253), UINT8_C( 32), UINT8_C( 33), UINT8_C(142), UINT8_C(194), UINT8_C( 46), UINT8_C(213), UINT8_C(227), UINT8_C(184), UINT8_C(133), UINT8_C(227), UINT8_C( 60), UINT8_C( 35), UINT8_C( 60) }, }, { { UINT8_C(248), UINT8_C( 82), UINT8_C( 14), UINT8_C(175), UINT8_C(212), UINT8_C(169), UINT8_C(187), UINT8_C(117), UINT8_C( 34), UINT8_C( 9), UINT8_C( 8), UINT8_C(131), UINT8_C(101), UINT8_C(129), UINT8_C(222), UINT8_C(171) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_private a_ = simde_uint8x16_to_private(a); simde_int16x8_t r = simde_vreinterpretq_s16_u8(a); simde_int16x8_private r_ = simde_int16x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s32_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; } test_vec[] = { { { UINT8_C( 91), UINT8_C( 51), UINT8_C( 32), UINT8_C(225), UINT8_C(239), UINT8_C( 3), UINT8_C( 10), UINT8_C(134), UINT8_C( 49), UINT8_C(209), UINT8_C(250), UINT8_C(165), UINT8_C( 94), UINT8_C(196), UINT8_C( 3), UINT8_C(217) }, }, { { UINT8_C( 35), UINT8_C(151), UINT8_C(185), UINT8_C( 81), UINT8_C(166), UINT8_C( 25), UINT8_C(152), UINT8_C( 24), UINT8_C( 26), UINT8_C(151), UINT8_C( 31), UINT8_C(115), UINT8_C(134), UINT8_C(199), UINT8_C( 5), UINT8_C(225) }, }, { { UINT8_C(250), UINT8_C( 37), UINT8_C(194), UINT8_C(234), UINT8_C( 40), UINT8_C(205), UINT8_C(112), UINT8_C( 90), UINT8_C(158), UINT8_C(106), UINT8_MAX, UINT8_C(252), UINT8_C( 46), UINT8_C( 2), UINT8_C(213), UINT8_C( 82) }, }, { { UINT8_C(154), UINT8_C(142), UINT8_C(163), UINT8_C( 64), UINT8_C(168), UINT8_C( 59), UINT8_C( 88), UINT8_C(194), UINT8_C(211), UINT8_C(120), UINT8_C( 53), UINT8_C( 89), UINT8_C( 63), UINT8_C( 58), UINT8_C( 58), UINT8_C( 57) }, }, { { UINT8_C( 95), UINT8_C(253), UINT8_C( 35), UINT8_C(135), UINT8_C(202), UINT8_C(147), UINT8_C(225), UINT8_C(104), UINT8_C(253), UINT8_C(224), UINT8_C(100), UINT8_C( 44), UINT8_C(227), UINT8_C( 57), UINT8_C(126), UINT8_C(125) }, }, { { UINT8_C(200), UINT8_C( 33), UINT8_C(189), UINT8_C(112), UINT8_C( 93), UINT8_C( 22), UINT8_C( 50), UINT8_C( 48), UINT8_C(142), UINT8_C(103), UINT8_C(137), UINT8_C(205), UINT8_C(161), UINT8_C(195), UINT8_C( 6), UINT8_C( 0) }, }, { { UINT8_C(192), UINT8_C( 42), UINT8_C(135), UINT8_C(138), UINT8_C(189), UINT8_C(105), UINT8_C(242), UINT8_C(187), UINT8_C( 73), UINT8_C( 86), UINT8_C(231), UINT8_C( 44), UINT8_C(144), UINT8_C(101), UINT8_C(169), UINT8_C( 88) }, }, { { UINT8_C(134), UINT8_C(103), UINT8_C(200), UINT8_C(227), UINT8_C(125), UINT8_C(250), UINT8_C( 19), UINT8_C( 11), UINT8_C( 97), UINT8_C(156), UINT8_C(216), UINT8_C( 2), UINT8_C( 96), UINT8_C(222), UINT8_C( 2), UINT8_C( 32) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_private a_ = simde_uint8x16_to_private(a); simde_int32x4_t r = simde_vreinterpretq_s32_u8(a); simde_int32x4_private r_ = simde_int32x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s64_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; } test_vec[] = { { { UINT8_C( 46), UINT8_C(245), UINT8_C(233), UINT8_C(253), UINT8_C( 19), UINT8_C(131), UINT8_C( 29), UINT8_C( 10), UINT8_C(144), UINT8_C(167), UINT8_C(104), UINT8_C(229), UINT8_C(128), UINT8_C(142), UINT8_C( 14), UINT8_C( 54) }, }, { { UINT8_C(127), UINT8_C(158), UINT8_C(248), UINT8_C(141), UINT8_C( 40), UINT8_C(252), UINT8_C(110), UINT8_C( 25), UINT8_C( 81), UINT8_C( 49), UINT8_C( 20), UINT8_C(242), UINT8_C(154), UINT8_C(241), UINT8_C(214), UINT8_C(201) }, }, { { UINT8_C(231), UINT8_C(191), UINT8_C(198), UINT8_C(250), UINT8_C( 66), UINT8_C(227), UINT8_C( 4), UINT8_C(210), UINT8_C(139), UINT8_C(108), UINT8_C(184), UINT8_C( 11), UINT8_C(250), UINT8_C(198), UINT8_C( 65), UINT8_C(121) }, }, { { UINT8_C(100), UINT8_C( 57), UINT8_C( 6), UINT8_C(140), UINT8_C( 54), UINT8_C(117), UINT8_C(165), UINT8_C(135), UINT8_C(166), UINT8_C(185), UINT8_C(121), UINT8_C( 64), UINT8_C(170), UINT8_C( 79), UINT8_C( 9), UINT8_C(145) }, }, { { UINT8_C( 14), UINT8_C(208), UINT8_C(139), UINT8_C( 80), UINT8_C(179), UINT8_C(144), UINT8_C( 34), UINT8_C( 62), UINT8_C(252), UINT8_C(218), UINT8_C( 73), UINT8_C(247), UINT8_C(160), UINT8_C(138), UINT8_C(112), UINT8_C( 4) }, }, { { UINT8_C(196), UINT8_C(119), UINT8_C(144), UINT8_C(250), UINT8_C(236), UINT8_C( 53), UINT8_C(129), UINT8_C(146), UINT8_C(238), UINT8_C(250), UINT8_C(210), UINT8_C(153), UINT8_C( 73), UINT8_C(220), UINT8_C( 42), UINT8_C( 87) }, }, { { UINT8_C(172), UINT8_C(182), UINT8_C(167), UINT8_C( 95), UINT8_C( 70), UINT8_C(201), UINT8_C(158), UINT8_C( 66), UINT8_C(164), UINT8_C(231), UINT8_C( 57), UINT8_C( 68), UINT8_C(114), UINT8_C(170), UINT8_C( 73), UINT8_C( 54) }, }, { { UINT8_C( 33), UINT8_C(217), UINT8_C( 48), UINT8_C( 13), UINT8_C( 15), UINT8_C(177), UINT8_C(159), UINT8_C(253), UINT8_C(171), UINT8_C(113), UINT8_C(150), UINT8_C(244), UINT8_C( 77), UINT8_C(193), UINT8_C( 75), UINT8_C(249) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_private a_ = simde_uint8x16_to_private(a); simde_int64x2_t r = simde_vreinterpretq_s64_u8(a); simde_int64x2_private r_ = simde_int64x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u16_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; } test_vec[] = { { { UINT8_C( 82), UINT8_C(199), UINT8_C(185), UINT8_C(196), UINT8_C(158), UINT8_C( 47), UINT8_C( 61), UINT8_C(140), UINT8_C(219), UINT8_C(212), UINT8_C(210), UINT8_C(116), UINT8_C(240), UINT8_C(118), UINT8_C(167), UINT8_C( 68) }, }, { { UINT8_C( 44), UINT8_C(199), UINT8_C( 11), UINT8_C( 1), UINT8_C( 43), UINT8_C( 34), UINT8_C( 2), UINT8_C(208), UINT8_C( 89), UINT8_C(217), UINT8_C( 35), UINT8_C( 35), UINT8_C(216), UINT8_C(154), UINT8_C(143), UINT8_C( 42) }, }, { { UINT8_C( 97), UINT8_C( 72), UINT8_C(239), UINT8_MAX, UINT8_C(119), UINT8_C( 44), UINT8_C(140), UINT8_C( 82), UINT8_C( 1), UINT8_C( 94), UINT8_C(199), UINT8_C(241), UINT8_C(213), UINT8_C(110), UINT8_C( 53), UINT8_C( 1) }, }, { { UINT8_C( 53), UINT8_C( 64), UINT8_C( 2), UINT8_C( 96), UINT8_C( 99), UINT8_C( 5), UINT8_C( 48), UINT8_C(188), UINT8_C(222), UINT8_C( 83), UINT8_C(223), UINT8_C(183), UINT8_C(237), UINT8_C(110), UINT8_C(225), UINT8_C( 78) }, }, { { UINT8_C(182), UINT8_C(208), UINT8_C( 78), UINT8_C( 45), UINT8_C(253), UINT8_C(218), UINT8_C(128), UINT8_C(254), UINT8_C( 56), UINT8_C( 71), UINT8_C(239), UINT8_C( 13), UINT8_C(181), UINT8_C( 36), UINT8_C( 15), UINT8_C(234) }, }, { { UINT8_C(101), UINT8_C( 17), UINT8_C( 74), UINT8_C(200), UINT8_C( 22), UINT8_C(122), UINT8_C(132), UINT8_C(245), UINT8_C(205), UINT8_C( 99), UINT8_C(172), UINT8_C(186), UINT8_C(209), UINT8_C(141), UINT8_C( 8), UINT8_C(135) }, }, { { UINT8_C( 94), UINT8_C( 86), UINT8_C(180), UINT8_C( 91), UINT8_C( 48), UINT8_C( 52), UINT8_C( 89), UINT8_C(105), UINT8_C(123), UINT8_C( 72), UINT8_C(118), UINT8_C( 48), UINT8_C(108), UINT8_C(133), UINT8_C( 26), UINT8_C(209) }, }, { { UINT8_C(151), UINT8_C(100), UINT8_C(153), UINT8_C(173), UINT8_C(222), UINT8_C( 29), UINT8_C(162), UINT8_C(171), UINT8_C(128), UINT8_C( 78), UINT8_C(101), UINT8_C( 81), UINT8_C(220), UINT8_C(110), UINT8_C(216), UINT8_C( 58) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_private a_ = simde_uint8x16_to_private(a); simde_uint16x8_t r = simde_vreinterpretq_u16_u8(a); simde_uint16x8_private r_ = simde_uint16x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u32_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; } test_vec[] = { { { UINT8_C( 82), UINT8_C( 69), UINT8_C(228), UINT8_C(116), UINT8_C(114), UINT8_C(137), UINT8_C(229), UINT8_C( 73), UINT8_C( 23), UINT8_C(171), UINT8_C(101), UINT8_C(203), UINT8_C(170), UINT8_C(220), UINT8_C(186), UINT8_C( 4) }, }, { { UINT8_C( 57), UINT8_C(134), UINT8_C(119), UINT8_C(221), UINT8_C(153), UINT8_C(228), UINT8_C( 75), UINT8_C( 17), UINT8_C(120), UINT8_C(139), UINT8_C(153), UINT8_C( 31), UINT8_C( 13), UINT8_C( 56), UINT8_C(176), UINT8_C( 96) }, }, { { UINT8_C(125), UINT8_C(149), UINT8_C(212), UINT8_C(240), UINT8_C( 30), UINT8_C(186), UINT8_C( 57), UINT8_C( 54), UINT8_C(101), UINT8_C(158), UINT8_C( 1), UINT8_C( 15), UINT8_C(123), UINT8_C(187), UINT8_C( 19), UINT8_C(180) }, }, { { UINT8_C( 66), UINT8_C(138), UINT8_C(146), UINT8_C(219), UINT8_C(110), UINT8_C(221), UINT8_C(236), UINT8_C(231), UINT8_C(105), UINT8_C(133), UINT8_C( 6), UINT8_C(118), UINT8_C(189), UINT8_C(183), UINT8_C(214), UINT8_C( 59) }, }, { { UINT8_C( 76), UINT8_C(171), UINT8_C( 43), UINT8_C(106), UINT8_C(101), UINT8_C(100), UINT8_C(160), UINT8_C(202), UINT8_C( 3), UINT8_C(162), UINT8_C(218), UINT8_C(126), UINT8_C( 93), UINT8_C(237), UINT8_C( 50), UINT8_C(159) }, }, { { UINT8_C(120), UINT8_C(196), UINT8_C(122), UINT8_C(230), UINT8_C(162), UINT8_C(102), UINT8_C(205), UINT8_C( 11), UINT8_C(235), UINT8_C(212), UINT8_C(129), UINT8_C(169), UINT8_C(139), UINT8_C( 88), UINT8_C(228), UINT8_C(215) }, }, { { UINT8_C( 3), UINT8_C( 15), UINT8_C( 65), UINT8_C(104), UINT8_C(115), UINT8_C(226), UINT8_C( 50), UINT8_C(118), UINT8_C(132), UINT8_C( 12), UINT8_C(244), UINT8_C(225), UINT8_C(250), UINT8_C( 39), UINT8_C(129), UINT8_C(114) }, }, { { UINT8_C(235), UINT8_C(251), UINT8_C( 88), UINT8_C(141), UINT8_C( 98), UINT8_C( 38), UINT8_C(152), UINT8_C( 77), UINT8_C(250), UINT8_C( 26), UINT8_C(246), UINT8_C(133), UINT8_C(114), UINT8_C(218), UINT8_C( 92), UINT8_C(117) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_private a_ = simde_uint8x16_to_private(a); simde_uint32x4_t r = simde_vreinterpretq_u32_u8(a); simde_uint32x4_private r_ = simde_uint32x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u64_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; } test_vec[] = { { { UINT8_C(176), UINT8_C( 47), UINT8_C( 11), UINT8_C( 58), UINT8_C( 97), UINT8_C( 66), UINT8_C(184), UINT8_C( 12), UINT8_C(238), UINT8_C(198), UINT8_C( 66), UINT8_C(160), UINT8_C( 22), UINT8_C( 35), UINT8_C(163), UINT8_C(194) }, }, { { UINT8_C( 84), UINT8_C(146), UINT8_C( 13), UINT8_C( 43), UINT8_C( 21), UINT8_C( 80), UINT8_C(164), UINT8_C( 8), UINT8_C(252), UINT8_C( 83), UINT8_C( 68), UINT8_C(251), UINT8_C( 2), UINT8_C(231), UINT8_C( 63), UINT8_C(178) }, }, { { UINT8_C( 22), UINT8_C( 74), UINT8_C(237), UINT8_C(119), UINT8_C(141), UINT8_C(165), UINT8_C(131), UINT8_C(123), UINT8_C(107), UINT8_C(197), UINT8_C( 27), UINT8_C(130), UINT8_C(232), UINT8_C(190), UINT8_C( 68), UINT8_C( 60) }, }, { { UINT8_C( 80), UINT8_C( 82), UINT8_C(104), UINT8_C(102), UINT8_C(162), UINT8_C( 12), UINT8_C(110), UINT8_C(158), UINT8_C( 95), UINT8_C(178), UINT8_C(154), UINT8_C( 97), UINT8_C(153), UINT8_C(217), UINT8_C( 19), UINT8_C(175) }, }, { { UINT8_C( 35), UINT8_C( 0), UINT8_C( 38), UINT8_C(176), UINT8_C(165), UINT8_C(170), UINT8_C( 44), UINT8_C( 17), UINT8_C(111), UINT8_C( 71), UINT8_C(147), UINT8_C( 88), UINT8_C( 6), UINT8_C(215), UINT8_C(148), UINT8_C( 86) }, }, { { UINT8_C( 41), UINT8_C(252), UINT8_C(188), UINT8_C(203), UINT8_C( 8), UINT8_C( 43), UINT8_C(106), UINT8_C(103), UINT8_C(221), UINT8_C( 4), UINT8_C(200), UINT8_C(119), UINT8_C(221), UINT8_C(220), UINT8_C( 38), UINT8_C( 0) }, }, { { UINT8_C(220), UINT8_C( 77), UINT8_C(177), UINT8_C(130), UINT8_C(247), UINT8_C(221), UINT8_C(147), UINT8_C(102), UINT8_C( 36), UINT8_C( 38), UINT8_C(190), UINT8_C( 42), UINT8_C(253), UINT8_C( 83), UINT8_C(129), UINT8_C( 39) }, }, { { UINT8_C( 79), UINT8_C( 61), UINT8_C(242), UINT8_C( 88), UINT8_C(104), UINT8_C( 92), UINT8_C(191), UINT8_C( 70), UINT8_C( 96), UINT8_C(136), UINT8_C(189), UINT8_C( 61), UINT8_C(100), UINT8_C(227), UINT8_C( 62), UINT8_C( 64) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_private a_ = simde_uint8x16_to_private(a); simde_uint64x2_t r = simde_vreinterpretq_u64_u8(a); simde_uint64x2_private r_ = simde_uint64x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_f32_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; } test_vec[] = { { { UINT16_C(33507), UINT16_C(59494), UINT16_C(30007), UINT16_C(35832) }, }, { { UINT16_C(46775), UINT16_C(12032), UINT16_C( 8902), UINT16_C( 8337) }, }, { { UINT16_C(43537), UINT16_C(37876), UINT16_C(10452), UINT16_C(56119) }, }, { { UINT16_C( 4464), UINT16_C(52152), UINT16_C( 3403), UINT16_C(11777) }, }, { { UINT16_C(26511), UINT16_C(50966), UINT16_C( 3804), UINT16_C(37714) }, }, { { UINT16_C(21189), UINT16_C(35779), UINT16_C(21621), UINT16_C(34475) }, }, { { UINT16_C(41215), UINT16_C(54041), UINT16_C(20936), UINT16_C(14511) }, }, { { UINT16_C(26466), UINT16_C(44291), UINT16_C( 1140), UINT16_C( 987) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_private a_ = simde_uint16x4_to_private(a); simde_float32x2_t r = simde_vreinterpret_f32_u16(a); simde_float32x2_private r_ = simde_float32x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_f64_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; } test_vec[] = { { { UINT16_C( 9483), UINT16_C(16427), UINT16_C(61280), UINT16_C(61320) }, }, { { UINT16_C(61708), UINT16_C(37798), UINT16_C(33678), UINT16_C(42947) }, }, { { UINT16_C( 8615), UINT16_C(53267), UINT16_C(40577), UINT16_C(13982) }, }, { { UINT16_C( 7345), UINT16_C(32884), UINT16_C(39065), UINT16_C(42079) }, }, { { UINT16_C(35518), UINT16_C( 7908), UINT16_C(27769), UINT16_C(34062) }, }, { { UINT16_C(46173), UINT16_C(60185), UINT16_C(56375), UINT16_C(56979) }, }, { { UINT16_C(42749), UINT16_C(32430), UINT16_C(19524), UINT16_C(62900) }, }, { { UINT16_C(10601), UINT16_C( 629), UINT16_C(54465), UINT16_C(32678) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_private a_ = simde_uint16x4_to_private(a); simde_float64x1_t r = simde_vreinterpret_f64_u16(a); simde_float64x1_private r_ = simde_float64x1_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s8_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; } test_vec[] = { { { UINT16_C(37850), UINT16_C(12094), UINT16_C(28613), UINT16_C(54167) }, }, { { UINT16_C(14611), UINT16_C(56465), UINT16_C(40885), UINT16_C(23652) }, }, { { UINT16_C(18906), UINT16_C( 5146), UINT16_C(32725), UINT16_C(39195) }, }, { { UINT16_C( 7126), UINT16_C(50291), UINT16_C(33501), UINT16_C(47348) }, }, { { UINT16_C(12821), UINT16_C(56039), UINT16_C(32417), UINT16_C(46510) }, }, { { UINT16_C(16312), UINT16_C(28049), UINT16_C(63198), UINT16_C(47305) }, }, { { UINT16_C(58175), UINT16_C( 5324), UINT16_C(59491), UINT16_C(14765) }, }, { { UINT16_C( 8195), UINT16_C(57853), UINT16_C(62114), UINT16_C(47257) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_private a_ = simde_uint16x4_to_private(a); simde_int8x8_t r = simde_vreinterpret_s8_u16(a); simde_int8x8_private r_ = simde_int8x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s16_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; } test_vec[] = { { { UINT16_C(56181), UINT16_C(42985), UINT16_C(47798), UINT16_C( 3233) }, }, { { UINT16_C(58242), UINT16_C(62873), UINT16_C(49449), UINT16_C(33758) }, }, { { UINT16_C(50296), UINT16_C(42295), UINT16_C(65104), UINT16_C(15723) }, }, { { UINT16_C(19339), UINT16_C(59497), UINT16_C(22037), UINT16_C(35422) }, }, { { UINT16_C(18225), UINT16_C(59441), UINT16_C(53761), UINT16_C(33780) }, }, { { UINT16_C(36277), UINT16_C(57208), UINT16_C(22094), UINT16_C(50786) }, }, { { UINT16_C(39195), UINT16_C(27499), UINT16_C(54936), UINT16_C( 9129) }, }, { { UINT16_C( 4642), UINT16_C(14091), UINT16_C(26984), UINT16_C(39617) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_private a_ = simde_uint16x4_to_private(a); simde_int16x4_t r = simde_vreinterpret_s16_u16(a); simde_int16x4_private r_ = simde_int16x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s32_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; } test_vec[] = { { { UINT16_C(24639), UINT16_C(32629), UINT16_C(14135), UINT16_C(61730) }, }, { { UINT16_C(40550), UINT16_C(48618), UINT16_C(12348), UINT16_C(20638) }, }, { { UINT16_C(51024), UINT16_C(16956), UINT16_C(15191), UINT16_C(39519) }, }, { { UINT16_C(13932), UINT16_C(62692), UINT16_C(19741), UINT16_C(23705) }, }, { { UINT16_C( 3757), UINT16_C(58843), UINT16_C(65093), UINT16_C(44246) }, }, { { UINT16_C(49308), UINT16_C(55401), UINT16_C( 2032), UINT16_C(16424) }, }, { { UINT16_C(25807), UINT16_C( 9858), UINT16_C(58015), UINT16_C( 3008) }, }, { { UINT16_C(42008), UINT16_C(13568), UINT16_C(39410), UINT16_C(40850) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_private a_ = simde_uint16x4_to_private(a); simde_int32x2_t r = simde_vreinterpret_s32_u16(a); simde_int32x2_private r_ = simde_int32x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s64_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; } test_vec[] = { { { UINT16_C( 5357), UINT16_C(26804), UINT16_C( 1844), UINT16_C(34757) }, }, { { UINT16_C(13646), UINT16_C(39978), UINT16_C(48856), UINT16_C(33659) }, }, { { UINT16_C(30572), UINT16_C( 5381), UINT16_C(23799), UINT16_C(32271) }, }, { { UINT16_C(12043), UINT16_C(62789), UINT16_C(38605), UINT16_C(47698) }, }, { { UINT16_C( 1706), UINT16_C(56866), UINT16_C(59405), UINT16_C(23398) }, }, { { UINT16_C(36893), UINT16_C(62967), UINT16_C(29262), UINT16_C(47737) }, }, { { UINT16_C(32489), UINT16_C(57551), UINT16_C(57307), UINT16_C(58975) }, }, { { UINT16_C(41998), UINT16_C(56283), UINT16_C(11579), UINT16_C(58774) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_private a_ = simde_uint16x4_to_private(a); simde_int64x1_t r = simde_vreinterpret_s64_u16(a); simde_int64x1_private r_ = simde_int64x1_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u8_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; } test_vec[] = { { { UINT16_C(22055), UINT16_C(19585), UINT16_C(41250), UINT16_C( 5379) }, }, { { UINT16_C(46099), UINT16_C(50569), UINT16_C(55295), UINT16_C(25373) }, }, { { UINT16_C(12707), UINT16_C(64309), UINT16_C(48160), UINT16_C(47304) }, }, { { UINT16_C( 8583), UINT16_C(32183), UINT16_C(59924), UINT16_C(15474) }, }, { { UINT16_C(62272), UINT16_C(25224), UINT16_C(35988), UINT16_C(43127) }, }, { { UINT16_C( 64), UINT16_C(16493), UINT16_C(35799), UINT16_C(31395) }, }, { { UINT16_C(55484), UINT16_C(56437), UINT16_C(16020), UINT16_C( 7316) }, }, { { UINT16_C(19295), UINT16_C(29593), UINT16_C( 3125), UINT16_C(30383) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_private a_ = simde_uint16x4_to_private(a); simde_uint8x8_t r = simde_vreinterpret_u8_u16(a); simde_uint8x8_private r_ = simde_uint8x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u32_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; } test_vec[] = { { { UINT16_C(26673), UINT16_C(47249), UINT16_C(35372), UINT16_C(15460) }, }, { { UINT16_C(57841), UINT16_C(41900), UINT16_C(63910), UINT16_C(58747) }, }, { { UINT16_C( 6975), UINT16_C(23152), UINT16_C(12482), UINT16_C(40118) }, }, { { UINT16_C(35860), UINT16_C(44337), UINT16_C(45683), UINT16_C(42280) }, }, { { UINT16_C(47387), UINT16_C(18269), UINT16_C(49731), UINT16_C(13443) }, }, { { UINT16_C(12195), UINT16_C(18904), UINT16_C(21289), UINT16_C(26670) }, }, { { UINT16_C(40558), UINT16_C(12738), UINT16_C(30926), UINT16_C(58061) }, }, { { UINT16_C(65028), UINT16_C(30863), UINT16_C(47025), UINT16_C(52253) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_private a_ = simde_uint16x4_to_private(a); simde_uint32x2_t r = simde_vreinterpret_u32_u16(a); simde_uint32x2_private r_ = simde_uint32x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u64_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; } test_vec[] = { { { UINT16_C(26293), UINT16_C( 7173), UINT16_C(54818), UINT16_C(60544) }, }, { { UINT16_C(24048), UINT16_C(55271), UINT16_C(28786), UINT16_C(63191) }, }, { { UINT16_C(50441), UINT16_C(40165), UINT16_C(56905), UINT16_C( 1621) }, }, { { UINT16_C(60501), UINT16_C(36105), UINT16_C(15052), UINT16_C(33289) }, }, { { UINT16_C( 3744), UINT16_C(50078), UINT16_C( 7908), UINT16_C(54703) }, }, { { UINT16_C(38523), UINT16_C(61100), UINT16_C(33542), UINT16_C( 4068) }, }, { { UINT16_C(51528), UINT16_C(37292), UINT16_C( 424), UINT16_C(64920) }, }, { { UINT16_C(41454), UINT16_C(47754), UINT16_C(38108), UINT16_C(31804) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_private a_ = simde_uint16x4_to_private(a); simde_uint64x1_t r = simde_vreinterpret_u64_u16(a); simde_uint64x1_private r_ = simde_uint64x1_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_f32_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; } test_vec[] = { { { UINT16_C(14787), UINT16_C(26040), UINT16_C(47560), UINT16_C(54334), UINT16_C(46659), UINT16_C(43287), UINT16_C(28461), UINT16_C(11270) }, }, { { UINT16_C(40851), UINT16_C( 3614), UINT16_C(37884), UINT16_C(13423), UINT16_C(40879), UINT16_C(26781), UINT16_C(54165), UINT16_C(22680) }, }, { { UINT16_C(20748), UINT16_C(54461), UINT16_C(64522), UINT16_C(19880), UINT16_C(49074), UINT16_C(57334), UINT16_C(64814), UINT16_C(49419) }, }, { { UINT16_C(10652), UINT16_C(39375), UINT16_C(16060), UINT16_C(27853), UINT16_C(27614), UINT16_C(29652), UINT16_C(27710), UINT16_C(19147) }, }, { { UINT16_C(35261), UINT16_C(50974), UINT16_C(50821), UINT16_C(14100), UINT16_C( 2949), UINT16_C(45846), UINT16_C( 8456), UINT16_C(42100) }, }, { { UINT16_C(17226), UINT16_C( 1597), UINT16_C( 2945), UINT16_C(24434), UINT16_C(18038), UINT16_C(46290), UINT16_C(40627), UINT16_C(28926) }, }, { { UINT16_C( 7207), UINT16_C(44088), UINT16_C(19682), UINT16_C(26595), UINT16_C(63831), UINT16_C(24346), UINT16_C(36378), UINT16_C(25604) }, }, { { UINT16_C(16849), UINT16_C(21098), UINT16_C(56652), UINT16_C(49842), UINT16_C(33827), UINT16_C(54902), UINT16_C(29730), UINT16_C(18759) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_private a_ = simde_uint16x8_to_private(a); simde_float32x4_t r = simde_vreinterpretq_f32_u16(a); simde_float32x4_private r_ = simde_float32x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_f64_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; } test_vec[] = { { { UINT16_C(27485), UINT16_C(34613), UINT16_C( 7236), UINT16_C(17212), UINT16_C(26790), UINT16_C(14469), UINT16_C(59015), UINT16_C(59527) }, }, { { UINT16_C( 6749), UINT16_C(54543), UINT16_C(64291), UINT16_C(27971), UINT16_C(35201), UINT16_C(33391), UINT16_C(41499), UINT16_C(30814) }, }, { { UINT16_C(37645), UINT16_C(20736), UINT16_C(15535), UINT16_C(22164), UINT16_C( 6820), UINT16_C(11150), UINT16_C( 5632), UINT16_C(23828) }, }, { { UINT16_C( 9008), UINT16_C(21298), UINT16_C(29982), UINT16_C(41153), UINT16_C(12542), UINT16_C( 6434), UINT16_C(33234), UINT16_C(57234) }, }, { { UINT16_C(37396), UINT16_C(50224), UINT16_C(50638), UINT16_C(29466), UINT16_C(43231), UINT16_C(57246), UINT16_C(45758), UINT16_C(61244) }, }, { { UINT16_C(28373), UINT16_C(62530), UINT16_C( 996), UINT16_C(58004), UINT16_C(46643), UINT16_C( 1532), UINT16_C(36407), UINT16_C(19684) }, }, { { UINT16_C( 5408), UINT16_C(60944), UINT16_C(10970), UINT16_C(47457), UINT16_C( 210), UINT16_C(37272), UINT16_C(54450), UINT16_C(34944) }, }, { { UINT16_C(49730), UINT16_C( 9852), UINT16_C( 4294), UINT16_C(63753), UINT16_C( 1478), UINT16_C(65279), UINT16_C(58259), UINT16_C(45898) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_private a_ = simde_uint16x8_to_private(a); simde_float64x2_t r = simde_vreinterpretq_f64_u16(a); simde_float64x2_private r_ = simde_float64x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s8_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; } test_vec[] = { { { UINT16_C(30244), UINT16_C(34430), UINT16_C(25246), UINT16_C(16862), UINT16_C(65193), UINT16_C( 4868), UINT16_C(60789), UINT16_C(43837) }, }, { { UINT16_C(50205), UINT16_C(31533), UINT16_C(19434), UINT16_C(59923), UINT16_C( 3352), UINT16_C(55771), UINT16_C( 534), UINT16_C(15315) }, }, { { UINT16_C(21112), UINT16_C( 6081), UINT16_C(40884), UINT16_C(23896), UINT16_C(23709), UINT16_C( 4976), UINT16_C(44362), UINT16_C(26558) }, }, { { UINT16_C(60273), UINT16_C(23522), UINT16_C(63030), UINT16_C(20037), UINT16_C( 8195), UINT16_C( 6695), UINT16_C(64290), UINT16_C(39765) }, }, { { UINT16_C( 5709), UINT16_C( 434), UINT16_C( 2741), UINT16_C(21342), UINT16_C(52838), UINT16_C(45158), UINT16_C( 9339), UINT16_C(60440) }, }, { { UINT16_C(64015), UINT16_C(17991), UINT16_C(36336), UINT16_C(62612), UINT16_C(48301), UINT16_C(53262), UINT16_C(25527), UINT16_C( 1131) }, }, { { UINT16_C( 7545), UINT16_C(11781), UINT16_C(25383), UINT16_C(36225), UINT16_C(59185), UINT16_C(44094), UINT16_C(22027), UINT16_C( 7064) }, }, { { UINT16_C(57168), UINT16_C(16737), UINT16_C(62828), UINT16_C( 6709), UINT16_C(17329), UINT16_C(26858), UINT16_C(21926), UINT16_C( 8044) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_private a_ = simde_uint16x8_to_private(a); simde_int8x16_t r = simde_vreinterpretq_s8_u16(a); simde_int8x16_private r_ = simde_int8x16_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s16_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; } test_vec[] = { { { UINT16_C(37371), UINT16_C( 426), UINT16_C( 2791), UINT16_C(64987), UINT16_C(18284), UINT16_C(18051), UINT16_C( 6331), UINT16_C(40537) }, }, { { UINT16_C( 8541), UINT16_C(40133), UINT16_C(10784), UINT16_C(43297), UINT16_C(16042), UINT16_C( 6646), UINT16_C(19594), UINT16_C(34381) }, }, { { UINT16_C(63454), UINT16_C(50567), UINT16_C(25090), UINT16_C(28355), UINT16_C(18089), UINT16_C(25780), UINT16_C( 3422), UINT16_C(48130) }, }, { { UINT16_C(50991), UINT16_C(20312), UINT16_C(31474), UINT16_C(40184), UINT16_C(61368), UINT16_C(17078), UINT16_C( 827), UINT16_C( 6600) }, }, { { UINT16_C(20474), UINT16_C(64735), UINT16_C(41649), UINT16_C(23146), UINT16_C( 7912), UINT16_C(18110), UINT16_C(49452), UINT16_C(23298) }, }, { { UINT16_C(23432), UINT16_C(31402), UINT16_C(41685), UINT16_C(36119), UINT16_C(52625), UINT16_C(52687), UINT16_C(39120), UINT16_C(51942) }, }, { { UINT16_C(50663), UINT16_C(39367), UINT16_C(12647), UINT16_C(20467), UINT16_C(45648), UINT16_C(31894), UINT16_C(39027), UINT16_C(64471) }, }, { { UINT16_C(33267), UINT16_C(51318), UINT16_C(36131), UINT16_C(46421), UINT16_C( 9562), UINT16_C(10882), UINT16_C(26813), UINT16_C(42228) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_private a_ = simde_uint16x8_to_private(a); simde_int16x8_t r = simde_vreinterpretq_s16_u16(a); simde_int16x8_private r_ = simde_int16x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s32_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; } test_vec[] = { { { UINT16_C(61872), UINT16_C(58906), UINT16_C(40218), UINT16_C(57729), UINT16_C( 4514), UINT16_C(43884), UINT16_C(60853), UINT16_C(48362) }, }, { { UINT16_C(27639), UINT16_C(10283), UINT16_C(29782), UINT16_C( 9365), UINT16_C(13242), UINT16_C(29738), UINT16_C( 2313), UINT16_C(47770) }, }, { { UINT16_C(46330), UINT16_C( 5280), UINT16_C( 8785), UINT16_C(62454), UINT16_C(25139), UINT16_C(59550), UINT16_C(35151), UINT16_C(18084) }, }, { { UINT16_C(53236), UINT16_C(19054), UINT16_C( 835), UINT16_C(64878), UINT16_C(39222), UINT16_C(16498), UINT16_C( 3234), UINT16_C(40186) }, }, { { UINT16_C(39616), UINT16_C( 4528), UINT16_C(42684), UINT16_C(61188), UINT16_C(41480), UINT16_C(22487), UINT16_C(31531), UINT16_C( 8350) }, }, { { UINT16_C( 3146), UINT16_C(36202), UINT16_C(55568), UINT16_C(18059), UINT16_C(64882), UINT16_C( 5254), UINT16_C(32777), UINT16_C(51632) }, }, { { UINT16_C(24603), UINT16_C(55258), UINT16_C(56839), UINT16_C( 4039), UINT16_C(40576), UINT16_C(44135), UINT16_C( 1306), UINT16_C(25804) }, }, { { UINT16_C(13841), UINT16_C( 8690), UINT16_C(32015), UINT16_C(33128), UINT16_C(61050), UINT16_C(33685), UINT16_C(17775), UINT16_C(35404) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_private a_ = simde_uint16x8_to_private(a); simde_int32x4_t r = simde_vreinterpretq_s32_u16(a); simde_int32x4_private r_ = simde_int32x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s64_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; } test_vec[] = { { { UINT16_C( 2822), UINT16_C(48884), UINT16_C(29973), UINT16_C( 1714), UINT16_C(13637), UINT16_C(55111), UINT16_C(52574), UINT16_C( 269) }, }, { { UINT16_C(20351), UINT16_C(21891), UINT16_C(12725), UINT16_C(62731), UINT16_C( 548), UINT16_C(51419), UINT16_C(42962), UINT16_C(55479) }, }, { { UINT16_C(43954), UINT16_C(51350), UINT16_C(18465), UINT16_C(26318), UINT16_C( 5502), UINT16_C(56381), UINT16_C(19171), UINT16_C(25310) }, }, { { UINT16_C(24986), UINT16_C(20408), UINT16_C(50066), UINT16_C(46916), UINT16_C( 8389), UINT16_C(38783), UINT16_C(14023), UINT16_C(31087) }, }, { { UINT16_C( 1505), UINT16_C( 577), UINT16_C( 3918), UINT16_C(52328), UINT16_C(42277), UINT16_C( 2216), UINT16_C(34544), UINT16_C(35434) }, }, { { UINT16_C( 8936), UINT16_C(31449), UINT16_C( 7909), UINT16_C(43569), UINT16_C(45118), UINT16_C( 1345), UINT16_C(45286), UINT16_C(51326) }, }, { { UINT16_C(49334), UINT16_C( 1226), UINT16_C(13263), UINT16_C(62672), UINT16_C(30936), UINT16_C(51452), UINT16_C(26623), UINT16_C(59218) }, }, { { UINT16_C(11401), UINT16_C(28513), UINT16_C(37706), UINT16_C(34841), UINT16_C(23363), UINT16_C(10893), UINT16_C( 2827), UINT16_C(49650) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_private a_ = simde_uint16x8_to_private(a); simde_int64x2_t r = simde_vreinterpretq_s64_u16(a); simde_int64x2_private r_ = simde_int64x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u8_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; } test_vec[] = { { { UINT16_C(31179), UINT16_C(62043), UINT16_C(34234), UINT16_C(60575), UINT16_C(14522), UINT16_C(65218), UINT16_C(28205), UINT16_C( 2962) }, }, { { UINT16_C(26700), UINT16_C( 7147), UINT16_C(59748), UINT16_C( 4323), UINT16_C(15518), UINT16_C(23427), UINT16_C(47621), UINT16_C(53345) }, }, { { UINT16_C(48179), UINT16_C(60866), UINT16_C(24898), UINT16_C(64729), UINT16_C(40089), UINT16_C(51194), UINT16_C(35850), UINT16_C(22482) }, }, { { UINT16_C(48884), UINT16_C(22898), UINT16_C(21927), UINT16_C(17769), UINT16_C(60562), UINT16_C(38816), UINT16_C( 423), UINT16_C(55911) }, }, { { UINT16_C(10686), UINT16_C( 200), UINT16_C(41354), UINT16_C( 9468), UINT16_C(63037), UINT16_C(18667), UINT16_C(48515), UINT16_C(30623) }, }, { { UINT16_C( 4475), UINT16_C( 8912), UINT16_C(14694), UINT16_C(63591), UINT16_C( 2086), UINT16_C(52623), UINT16_C(62985), UINT16_C(51111) }, }, { { UINT16_C(28447), UINT16_C(43719), UINT16_C(49937), UINT16_C(20174), UINT16_C(47546), UINT16_C(15766), UINT16_C(13686), UINT16_C(62132) }, }, { { UINT16_C(34118), UINT16_C(44308), UINT16_C(31934), UINT16_C(58533), UINT16_C(13700), UINT16_C(36273), UINT16_C(22827), UINT16_C(19285) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_private a_ = simde_uint16x8_to_private(a); simde_uint8x16_t r = simde_vreinterpretq_u8_u16(a); simde_uint8x16_private r_ = simde_uint8x16_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u32_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; } test_vec[] = { { { UINT16_C(40287), UINT16_C(10453), UINT16_C( 270), UINT16_C(62770), UINT16_C(21454), UINT16_C(20905), UINT16_C(54272), UINT16_C(28818) }, }, { { UINT16_C( 1885), UINT16_C(52960), UINT16_C(24818), UINT16_C(27591), UINT16_C(57828), UINT16_C(35098), UINT16_C(11676), UINT16_C(64676) }, }, { { UINT16_C(31178), UINT16_C(55588), UINT16_C(22138), UINT16_C(18894), UINT16_C(30634), UINT16_C(43674), UINT16_C(11339), UINT16_C(43290) }, }, { { UINT16_C(64052), UINT16_C( 9847), UINT16_C(15963), UINT16_C(16273), UINT16_C(44063), UINT16_C(48329), UINT16_C(28121), UINT16_C(41912) }, }, { { UINT16_C(56550), UINT16_C(24956), UINT16_C(18995), UINT16_C(56746), UINT16_C(17602), UINT16_C( 3463), UINT16_C(41329), UINT16_C(42422) }, }, { { UINT16_C(11931), UINT16_C(63179), UINT16_C(23660), UINT16_C(35894), UINT16_C(65288), UINT16_C(57672), UINT16_C( 108), UINT16_C(21125) }, }, { { UINT16_C( 476), UINT16_C( 4019), UINT16_C(23884), UINT16_C( 3820), UINT16_C(29602), UINT16_C( 4891), UINT16_C(53780), UINT16_C(45240) }, }, { { UINT16_C(33536), UINT16_C(27814), UINT16_C(56543), UINT16_C(59640), UINT16_C(16603), UINT16_C(18377), UINT16_C(20032), UINT16_C( 7578) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_private a_ = simde_uint16x8_to_private(a); simde_uint32x4_t r = simde_vreinterpretq_u32_u16(a); simde_uint32x4_private r_ = simde_uint32x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u64_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; } test_vec[] = { { { UINT16_C(37256), UINT16_C( 7287), UINT16_C(13665), UINT16_C(19885), UINT16_C(39602), UINT16_C(36139), UINT16_C(23663), UINT16_C(43651) }, }, { { UINT16_C(43283), UINT16_C(33404), UINT16_C(53825), UINT16_C(39703), UINT16_C(31803), UINT16_C( 7036), UINT16_C(31574), UINT16_C(56904) }, }, { { UINT16_C(49164), UINT16_C(28154), UINT16_C(43253), UINT16_C(43194), UINT16_C(58690), UINT16_C(45365), UINT16_C(47426), UINT16_C(21851) }, }, { { UINT16_C(55394), UINT16_C(41943), UINT16_C(61098), UINT16_C(58687), UINT16_C(47979), UINT16_C(49408), UINT16_C(18742), UINT16_C(17056) }, }, { { UINT16_C(39433), UINT16_C(65199), UINT16_C(26946), UINT16_C(33958), UINT16_C(56399), UINT16_C(37174), UINT16_C(37269), UINT16_C(63462) }, }, { { UINT16_C(48489), UINT16_C( 5019), UINT16_C(55980), UINT16_C( 6137), UINT16_C(63893), UINT16_C(52440), UINT16_C(30786), UINT16_C(19214) }, }, { { UINT16_C(48659), UINT16_C(21834), UINT16_C(61479), UINT16_C(30426), UINT16_C( 4300), UINT16_C(24839), UINT16_C(60833), UINT16_C( 2905) }, }, { { UINT16_C(62635), UINT16_C(22302), UINT16_C( 6094), UINT16_C(25454), UINT16_C(17937), UINT16_C(21295), UINT16_C(16063), UINT16_C(53919) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_private a_ = simde_uint16x8_to_private(a); simde_uint64x2_t r = simde_vreinterpretq_u64_u16(a); simde_uint64x2_private r_ = simde_uint64x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_f32_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; } test_vec[] = { { { UINT32_C( 455684687), UINT32_C(1949574478) }, }, { { UINT32_C( 629409756), UINT32_C(2117591771) }, }, { { UINT32_C( 273504167), UINT32_C( 926363004) }, }, { { UINT32_C(3937451016), UINT32_C(3913249434) }, }, { { UINT32_C(4060440740), UINT32_C(1785084301) }, }, { { UINT32_C( 479193921), UINT32_C(1956300749) }, }, { { UINT32_C(2592401182), UINT32_C( 500284181) }, }, { { UINT32_C( 285704823), UINT32_C(2583381748) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_private a_ = simde_uint32x2_to_private(a); simde_float32x2_t r = simde_vreinterpret_f32_u32(a); simde_float32x2_private r_ = simde_float32x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_f64_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; } test_vec[] = { { { UINT32_C(3803574018), UINT32_C( 183881149) }, }, { { UINT32_C( 881972491), UINT32_C(2577490452) }, }, { { UINT32_C( 231932657), UINT32_C(1707981836) }, }, { { UINT32_C(2565105070), UINT32_C(1101186879) }, }, { { UINT32_C(1931696054), UINT32_C( 813504548) }, }, { { UINT32_C( 40111854), UINT32_C(1654326640) }, }, { { UINT32_C( 342846983), UINT32_C(3631824170) }, }, { { UINT32_C(3782303138), UINT32_C(3676508964) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_private a_ = simde_uint32x2_to_private(a); simde_float64x1_t r = simde_vreinterpret_f64_u32(a); simde_float64x1_private r_ = simde_float64x1_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s8_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; } test_vec[] = { { { UINT32_C(1082916978), UINT32_C(3918184476) }, }, { { UINT32_C(1406603480), UINT32_C( 187667497) }, }, { { UINT32_C(2444684576), UINT32_C(2102212792) }, }, { { UINT32_C(1162164356), UINT32_C( 839518400) }, }, { { UINT32_C( 611489288), UINT32_C( 789445718) }, }, { { UINT32_C( 897770764), UINT32_C(2571153785) }, }, { { UINT32_C(1445656222), UINT32_C(3017045806) }, }, { { UINT32_C(1878530479), UINT32_C( 715194913) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_private a_ = simde_uint32x2_to_private(a); simde_int8x8_t r = simde_vreinterpret_s8_u32(a); simde_int8x8_private r_ = simde_int8x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s16_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; } test_vec[] = { { { UINT32_C(1188544432), UINT32_C(1237954591) }, }, { { UINT32_C(2327464733), UINT32_C(2915838870) }, }, { { UINT32_C( 986850432), UINT32_C(3609662263) }, }, { { UINT32_C(2346378695), UINT32_C(1891232960) }, }, { { UINT32_C(3400962219), UINT32_C(1578336065) }, }, { { UINT32_C(1642646986), UINT32_C(2031006969) }, }, { { UINT32_C( 263447000), UINT32_C(3152468724) }, }, { { UINT32_C(2152120767), UINT32_C(1508966317) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_private a_ = simde_uint32x2_to_private(a); simde_int16x4_t r = simde_vreinterpret_s16_u32(a); simde_int16x4_private r_ = simde_int16x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s32_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; } test_vec[] = { { { UINT32_C(4190303205), UINT32_C(3421135521) }, }, { { UINT32_C(3069286763), UINT32_C(2460414754) }, }, { { UINT32_C(1055798285), UINT32_C( 328962366) }, }, { { UINT32_C(3891361501), UINT32_C(2664096697) }, }, { { UINT32_C(2006420950), UINT32_C(1531150831) }, }, { { UINT32_C(1024537626), UINT32_C( 886028071) }, }, { { UINT32_C( 712228332), UINT32_C( 725421646) }, }, { { UINT32_C(1024667268), UINT32_C(3906788625) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_private a_ = simde_uint32x2_to_private(a); simde_int32x2_t r = simde_vreinterpret_s32_u32(a); simde_int32x2_private r_ = simde_int32x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s64_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; } test_vec[] = { { { UINT32_C(1737799811), UINT32_C(4184346154) }, }, { { UINT32_C(3557854092), UINT32_C(4004696328) }, }, { { UINT32_C( 692391293), UINT32_C(3828726549) }, }, { { UINT32_C(1512595862), UINT32_C( 685172645) }, }, { { UINT32_C(3381684895), UINT32_C( 96663672) }, }, { { UINT32_C(2346308227), UINT32_C( 544836515) }, }, { { UINT32_C(2924134040), UINT32_C( 278036345) }, }, { { UINT32_C(2238364384), UINT32_C(1085161633) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_private a_ = simde_uint32x2_to_private(a); simde_int64x1_t r = simde_vreinterpret_s64_u32(a); simde_int64x1_private r_ = simde_int64x1_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u8_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; } test_vec[] = { { { UINT32_C(4009940214), UINT32_C(2454320358) }, }, { { UINT32_C(2871745685), UINT32_C(2450177441) }, }, { { UINT32_C(3471242743), UINT32_C(3298534337) }, }, { { UINT32_C(1355394572), UINT32_C( 983668291) }, }, { { UINT32_C(1361683306), UINT32_C( 753103511) }, }, { { UINT32_C(1893207758), UINT32_C(3254969035) }, }, { { UINT32_C(2576410840), UINT32_C(2556243084) }, }, { { UINT32_C( 501819098), UINT32_C( 660048572) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_private a_ = simde_uint32x2_to_private(a); simde_uint8x8_t r = simde_vreinterpret_u8_u32(a); simde_uint8x8_private r_ = simde_uint8x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u16_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; } test_vec[] = { { { UINT32_C(2334051580), UINT32_C(4009237040) }, }, { { UINT32_C(2584288261), UINT32_C(1769017581) }, }, { { UINT32_C(2631474304), UINT32_C(1500364826) }, }, { { UINT32_C(2537112437), UINT32_C( 134299660) }, }, { { UINT32_C(1033117709), UINT32_C(1059818554) }, }, { { UINT32_C(2514105768), UINT32_C(3321776965) }, }, { { UINT32_C(2170673255), UINT32_C( 232509336) }, }, { { UINT32_C( 514069522), UINT32_C(1629922900) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_private a_ = simde_uint32x2_to_private(a); simde_uint16x4_t r = simde_vreinterpret_u16_u32(a); simde_uint16x4_private r_ = simde_uint16x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u64_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; } test_vec[] = { { { UINT32_C( 870101165), UINT32_C(2209815297) }, }, { { UINT32_C(2428866423), UINT32_C( 306277594) }, }, { { UINT32_C( 381273794), UINT32_C( 528924364) }, }, { { UINT32_C(1140512921), UINT32_C(3548919590) }, }, { { UINT32_C(3892798695), UINT32_C(4151033472) }, }, { { UINT32_C( 679948622), UINT32_C(1597687965) }, }, { { UINT32_C(1517679758), UINT32_C(1266351282) }, }, { { UINT32_C(4220417236), UINT32_C(2479756972) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_private a_ = simde_uint32x2_to_private(a); simde_uint64x1_t r = simde_vreinterpret_u64_u32(a); simde_uint64x1_private r_ = simde_uint64x1_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_f32_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; } test_vec[] = { { { UINT32_C(3279084377), UINT32_C(2161745592), UINT32_C(3654801364), UINT32_C(3056199879) }, }, { { UINT32_C(1218747532), UINT32_C( 135127342), UINT32_C(1086738445), UINT32_C(2027280927) }, }, { { UINT32_C(1832601525), UINT32_C(3119322341), UINT32_C(2995963115), UINT32_C( 946453676) }, }, { { UINT32_C(2273316184), UINT32_C(4220489454), UINT32_C(4248524254), UINT32_C(3832942639) }, }, { { UINT32_C(1011986775), UINT32_C(2985705158), UINT32_C(2942601219), UINT32_C(2649279812) }, }, { { UINT32_C(3374606554), UINT32_C(3586438134), UINT32_C( 936509448), UINT32_C(1746618384) }, }, { { UINT32_C(3232001274), UINT32_C(2909903530), UINT32_C(1734202659), UINT32_C(2097431970) }, }, { { UINT32_C(2756061357), UINT32_C(3816360667), UINT32_C( 454707978), UINT32_C(2390963604) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_private a_ = simde_uint32x4_to_private(a); simde_float32x4_t r = simde_vreinterpretq_f32_u32(a); simde_float32x4_private r_ = simde_float32x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_f64_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; } test_vec[] = { { { UINT32_C( 798628804), UINT32_C(2308740029), UINT32_C(1010183400), UINT32_C( 499135747) }, }, { { UINT32_C(3302186879), UINT32_C(1617207352), UINT32_C(1291082784), UINT32_C(3849247008) }, }, { { UINT32_C(2970880244), UINT32_C(2201727131), UINT32_C(3753865692), UINT32_C( 654147750) }, }, { { UINT32_C( 401264863), UINT32_C(2423803504), UINT32_C(3420220587), UINT32_C( 967854661) }, }, { { UINT32_C(4008428627), UINT32_C(1366435444), UINT32_C(1026568599), UINT32_C(2439196081) }, }, { { UINT32_C(1839746813), UINT32_C(1207771292), UINT32_C(3524516237), UINT32_C(1997325092) }, }, { { UINT32_C(4234540936), UINT32_C(3025000221), UINT32_C(3136388617), UINT32_C(2840286635) }, }, { { UINT32_C(1058469027), UINT32_C(2709984276), UINT32_C( 292854509), UINT32_C(3850928221) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_private a_ = simde_uint32x4_to_private(a); simde_float64x2_t r = simde_vreinterpretq_f64_u32(a); simde_float64x2_private r_ = simde_float64x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s8_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; } test_vec[] = { { { UINT32_C( 298228994), UINT32_C( 760894604), UINT32_C(3945189982), UINT32_C(1190286776) }, }, { { UINT32_C(1621610492), UINT32_C(1517645912), UINT32_C( 933037971), UINT32_C(2965300397) }, }, { { UINT32_C( 247563394), UINT32_C( 909843416), UINT32_C(2787271149), UINT32_C(3018593463) }, }, { { UINT32_C(1007916259), UINT32_C(2543224836), UINT32_C(1036989328), UINT32_C(2599259416) }, }, { { UINT32_C(3936923154), UINT32_C(3072451786), UINT32_C(4233970501), UINT32_C(1001343575) }, }, { { UINT32_C(3799499742), UINT32_C(3682143563), UINT32_C(1478051904), UINT32_C(3908175574) }, }, { { UINT32_C(2144508853), UINT32_C(3291935615), UINT32_C(2395051062), UINT32_C(3167318238) }, }, { { UINT32_C(2141077555), UINT32_C(2371491661), UINT32_C( 921072480), UINT32_C( 790550650) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_private a_ = simde_uint32x4_to_private(a); simde_int8x16_t r = simde_vreinterpretq_s8_u32(a); simde_int8x16_private r_ = simde_int8x16_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s16_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; } test_vec[] = { { { UINT32_C(2708620229), UINT32_C( 717266002), UINT32_C(1782875301), UINT32_C(1655354358) }, }, { { UINT32_C(1944699603), UINT32_C(2523708845), UINT32_C(3280833691), UINT32_C(3291942911) }, }, { { UINT32_C(2774903122), UINT32_C(3939444037), UINT32_C(2689864618), UINT32_C(2583887814) }, }, { { UINT32_C(1846406081), UINT32_C(1141144232), UINT32_C( 84381958), UINT32_C(4090052257) }, }, { { UINT32_C( 764948456), UINT32_C(4263012180), UINT32_C(1100901498), UINT32_C( 752590955) }, }, { { UINT32_C( 882567307), UINT32_C(1752735586), UINT32_C(3513614128), UINT32_C(2781165501) }, }, { { UINT32_C(3134414182), UINT32_C(1069149125), UINT32_C(3279968087), UINT32_C(2213501944) }, }, { { UINT32_C(2797046340), UINT32_C(1494167337), UINT32_C(1814789294), UINT32_C( 420606131) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_private a_ = simde_uint32x4_to_private(a); simde_int16x8_t r = simde_vreinterpretq_s16_u32(a); simde_int16x8_private r_ = simde_int16x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s32_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; } test_vec[] = { { { UINT32_C(1237267332), UINT32_C(4212671649), UINT32_C( 854252956), UINT32_C(3669148588) }, }, { { UINT32_C( 180018859), UINT32_C(1066799467), UINT32_C( 265430687), UINT32_C(3192201018) }, }, { { UINT32_C(4077323346), UINT32_C(3840811080), UINT32_C(2853624317), UINT32_C(1199884444) }, }, { { UINT32_C( 307314343), UINT32_C(3998345295), UINT32_C(1224614670), UINT32_C(2483503938) }, }, { { UINT32_C(2408058439), UINT32_C( 745764398), UINT32_C(3956705871), UINT32_C(4180826706) }, }, { { UINT32_C(3893068952), UINT32_C(2060868716), UINT32_C(3267548288), UINT32_C(1582811415) }, }, { { UINT32_C( 116252632), UINT32_C(2771542357), UINT32_C(1032849643), UINT32_C(4214735714) }, }, { { UINT32_C(3018015303), UINT32_C( 523090335), UINT32_C(2783047566), UINT32_C(2432972985) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_private a_ = simde_uint32x4_to_private(a); simde_int32x4_t r = simde_vreinterpretq_s32_u32(a); simde_int32x4_private r_ = simde_int32x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s64_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; } test_vec[] = { { { UINT32_C( 904851114), UINT32_C( 357340730), UINT32_C(4279416342), UINT32_C(2212412929) }, }, { { UINT32_C( 954152116), UINT32_C(3376279475), UINT32_C( 588881638), UINT32_C(2462761447) }, }, { { UINT32_C(3922180527), UINT32_C(1711149903), UINT32_C(3412332746), UINT32_C(2202944206) }, }, { { UINT32_C( 716909943), UINT32_C( 49543452), UINT32_C(2116357527), UINT32_C(2098262222) }, }, { { UINT32_C(4167554985), UINT32_C(3042797035), UINT32_C(1149288822), UINT32_C(2076691972) }, }, { { UINT32_C( 413500411), UINT32_C( 320510332), UINT32_C(1955676326), UINT32_C(3656491568) }, }, { { UINT32_C(1691441273), UINT32_C( 874065598), UINT32_C(4084767215), UINT32_C(1668169831) }, }, { { UINT32_C(1065030851), UINT32_C(1397921197), UINT32_C( 96986069), UINT32_C(4292786309) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_private a_ = simde_uint32x4_to_private(a); simde_int64x2_t r = simde_vreinterpretq_s64_u32(a); simde_int64x2_private r_ = simde_int64x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u8_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; } test_vec[] = { { { UINT32_C(1105538222), UINT32_C(3755743071), UINT32_C(1126146626), UINT32_C(2100132933) }, }, { { UINT32_C(2827012237), UINT32_C(1563107933), UINT32_C(1378456288), UINT32_C( 200370268) }, }, { { UINT32_C(4081899156), UINT32_C( 802367725), UINT32_C( 275968715), UINT32_C(4237139823) }, }, { { UINT32_C(3315863144), UINT32_C( 270716720), UINT32_C(3194178401), UINT32_C(1204376755) }, }, { { UINT32_C( 406525227), UINT32_C( 155717182), UINT32_C(1863956992), UINT32_C(3261834842) }, }, { { UINT32_C(3850833844), UINT32_C(1089841886), UINT32_C(2852018421), UINT32_C(3639658413) }, }, { { UINT32_C( 451947484), UINT32_C( 975386681), UINT32_C(1302936819), UINT32_C(2534348259) }, }, { { UINT32_C( 58496548), UINT32_C( 910389824), UINT32_C(2011120074), UINT32_C(3830435592) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_private a_ = simde_uint32x4_to_private(a); simde_uint8x16_t r = simde_vreinterpretq_u8_u32(a); simde_uint8x16_private r_ = simde_uint8x16_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u16_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; } test_vec[] = { { { UINT32_C(2012347191), UINT32_C( 228853529), UINT32_C(3184606882), UINT32_C(2192724089) }, }, { { UINT32_C(3249853973), UINT32_C( 441778337), UINT32_C(2784320210), UINT32_C(1495580194) }, }, { { UINT32_C(3536852409), UINT32_C(3202315292), UINT32_C( 729526706), UINT32_C( 397225473) }, }, { { UINT32_C(2782421252), UINT32_C( 884944482), UINT32_C(2866394504), UINT32_C( 738524531) }, }, { { UINT32_C( 805229586), UINT32_C(4226670153), UINT32_C(2418502031), UINT32_C(2611467415) }, }, { { UINT32_C(2554429494), UINT32_C( 919339182), UINT32_C( 685811381), UINT32_C(3059016867) }, }, { { UINT32_C( 48583609), UINT32_C(3237859889), UINT32_C(3528467515), UINT32_C( 795801849) }, }, { { UINT32_C( 650620792), UINT32_C(1700565935), UINT32_C(3717020729), UINT32_C(3667124769) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_private a_ = simde_uint32x4_to_private(a); simde_uint16x8_t r = simde_vreinterpretq_u16_u32(a); simde_uint16x8_private r_ = simde_uint16x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u64_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; } test_vec[] = { { { UINT32_C(2570512756), UINT32_C(3830718427), UINT32_C(3229057798), UINT32_C(1611638719) }, }, { { UINT32_C(3729645958), UINT32_C(1980229974), UINT32_C(3183866417), UINT32_C(4227508103) }, }, { { UINT32_C(1804873872), UINT32_C(1448142928), UINT32_C( 722913131), UINT32_C(4102760046) }, }, { { UINT32_C(1574164487), UINT32_C(4074035905), UINT32_C(1420794317), UINT32_C(3494881600) }, }, { { UINT32_C( 691790809), UINT32_C( 931105739), UINT32_C(3227686482), UINT32_C(3283479996) }, }, { { UINT32_C(2250279109), UINT32_C( 796456034), UINT32_C(3447924621), UINT32_C(2845692624) }, }, { { UINT32_C(2178144693), UINT32_C(3082310244), UINT32_C(2759269096), UINT32_C(3429313543) }, }, { { UINT32_C( 391284916), UINT32_C( 172411516), UINT32_C(3252144881), UINT32_C(1382708636) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_private a_ = simde_uint32x4_to_private(a); simde_uint64x2_t r = simde_vreinterpretq_u64_u32(a); simde_uint64x2_private r_ = simde_uint64x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_f32_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[1]; } test_vec[] = { { { UINT64_C( 8779430749390950521) }, }, { { UINT64_C( 5787192216487944357) }, }, { { UINT64_C( 1212378576244495394) }, }, { { UINT64_C(11587186962892113876) }, }, { { UINT64_C(15419974588312544197) }, }, { { UINT64_C( 584303456178448674) }, }, { { UINT64_C( 9413963231737312176) }, }, { { UINT64_C(10928834220922494436) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_private a_ = simde_uint64x1_to_private(a); simde_float32x2_t r = simde_vreinterpret_f32_u64(a); simde_float32x2_private r_ = simde_float32x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_f64_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[1]; } test_vec[] = { { { UINT64_C(16607055731308338601) }, }, { { UINT64_C( 50860304274552481) }, }, { { UINT64_C( 9862794963658077786) }, }, { { UINT64_C(12392464720683648170) }, }, { { UINT64_C( 8689486400691157374) }, }, { { UINT64_C( 1541513907969526327) }, }, { { UINT64_C( 1716032456096988804) }, }, { { UINT64_C( 1658292891718957143) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_private a_ = simde_uint64x1_to_private(a); simde_float64x1_t r = simde_vreinterpret_f64_u64(a); simde_float64x1_private r_ = simde_float64x1_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s8_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[1]; } test_vec[] = { { { UINT64_C( 6944854049650135482) }, }, { { UINT64_C( 2643384172688610151) }, }, { { UINT64_C( 9694401264440423225) }, }, { { UINT64_C(15030064565503554144) }, }, { { UINT64_C( 7145989937937443285) }, }, { { UINT64_C(10467610559678064954) }, }, { { UINT64_C( 2610778010878432799) }, }, { { UINT64_C(11146599043610132938) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_private a_ = simde_uint64x1_to_private(a); simde_int8x8_t r = simde_vreinterpret_s8_u64(a); simde_int8x8_private r_ = simde_int8x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s16_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[1]; } test_vec[] = { { { UINT64_C( 3986353266778962006) }, }, { { UINT64_C( 7176681434443920232) }, }, { { UINT64_C( 9525955270665666247) }, }, { { UINT64_C(13394712871255028064) }, }, { { UINT64_C( 4535400624906860172) }, }, { { UINT64_C(10650597281808260014) }, }, { { UINT64_C( 8458283561633060611) }, }, { { UINT64_C( 300254048795550477) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_private a_ = simde_uint64x1_to_private(a); simde_int16x4_t r = simde_vreinterpret_s16_u64(a); simde_int16x4_private r_ = simde_int16x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s32_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[1]; } test_vec[] = { { { UINT64_C(18361485350794555758) }, }, { { UINT64_C( 4996426384215785705) }, }, { { UINT64_C(11006711228546924526) }, }, { { UINT64_C(10812642598701388252) }, }, { { UINT64_C(13100146462438765355) }, }, { { UINT64_C( 3308471443271409489) }, }, { { UINT64_C( 5760322122107306700) }, }, { { UINT64_C( 3446722680712776215) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_private a_ = simde_uint64x1_to_private(a); simde_int32x2_t r = simde_vreinterpret_s32_u64(a); simde_int32x2_private r_ = simde_int32x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s64_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[1]; } test_vec[] = { { { UINT64_C( 5657919274295558869) }, }, { { UINT64_C( 3965159768771266939) }, }, { { UINT64_C( 5087912792556967306) }, }, { { UINT64_C( 256521758694905530) }, }, { { UINT64_C( 4270793162607544667) }, }, { { UINT64_C( 3095497553213558000) }, }, { { UINT64_C( 1981335759696853887) }, }, { { UINT64_C( 6874422688824928820) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_private a_ = simde_uint64x1_to_private(a); simde_int64x1_t r = simde_vreinterpret_s64_u64(a); simde_int64x1_private r_ = simde_int64x1_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u8_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[1]; } test_vec[] = { { { UINT64_C( 8751185780542309541) }, }, { { UINT64_C( 1964445804772003291) }, }, { { UINT64_C(10563097091345027527) }, }, { { UINT64_C(14780275390623131008) }, }, { { UINT64_C( 1886520334338935450) }, }, { { UINT64_C( 322713523396222801) }, }, { { UINT64_C( 4033920933575331731) }, }, { { UINT64_C( 3996921152915932802) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_private a_ = simde_uint64x1_to_private(a); simde_uint8x8_t r = simde_vreinterpret_u8_u64(a); simde_uint8x8_private r_ = simde_uint8x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u16_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[1]; } test_vec[] = { { { UINT64_C(15807686447648144263) }, }, { { UINT64_C(13586087338080146970) }, }, { { UINT64_C( 1229395662396261383) }, }, { { UINT64_C( 3399258566591543321) }, }, { { UINT64_C( 5224131783317520018) }, }, { { UINT64_C(12978948260242732014) }, }, { { UINT64_C( 8100327936849454977) }, }, { { UINT64_C(17831050982460608157) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_private a_ = simde_uint64x1_to_private(a); simde_uint16x4_t r = simde_vreinterpret_u16_u64(a); simde_uint16x4_private r_ = simde_uint16x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u32_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[1]; } test_vec[] = { { { UINT64_C( 9719491169896031772) }, }, { { UINT64_C(12281698703164900773) }, }, { { UINT64_C( 6074207952736949374) }, }, { { UINT64_C(12717252293388281153) }, }, { { UINT64_C(16494333851909467816) }, }, { { UINT64_C( 5060794498848419243) }, }, { { UINT64_C( 2731966319390488994) }, }, { { UINT64_C(13928960856351069961) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_private a_ = simde_uint64x1_to_private(a); simde_uint32x2_t r = simde_vreinterpret_u32_u64(a); simde_uint32x2_private r_ = simde_uint32x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_f32_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; } test_vec[] = { { { UINT64_C( 1234218851471792026), UINT64_C( 2677674681134575894) }, }, { { UINT64_C(14515460789981832380), UINT64_C( 8215729928409844471) }, }, { { UINT64_C(18432155657786037817), UINT64_C(11578942536244388252) }, }, { { UINT64_C(16827624080118754063), UINT64_C( 5552162173842289092) }, }, { { UINT64_C(16230363121171568980), UINT64_C( 8591211574267264694) }, }, { { UINT64_C( 117863513551373256), UINT64_C( 8002802099049782445) }, }, { { UINT64_C(11810948322241091523), UINT64_C( 4217752382814901672) }, }, { { UINT64_C( 2754116619510988282), UINT64_C( 4755228904491897970) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_private a_ = simde_uint64x2_to_private(a); simde_float32x4_t r = simde_vreinterpretq_f32_u64(a); simde_float32x4_private r_ = simde_float32x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_f64_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; } test_vec[] = { { { UINT64_C(15792920702104947431), UINT64_C( 4521637955443876984) }, }, { { UINT64_C(11707834749182451748), UINT64_C(15274192250269424470) }, }, { { UINT64_C( 661533889626495134), UINT64_C(10242121696137139489) }, }, { { UINT64_C( 883403401688261107), UINT64_C( 762781161339000790) }, }, { { UINT64_C( 388589280762374089), UINT64_C( 4528384042351654880) }, }, { { UINT64_C( 3022232369636497454), UINT64_C(11830534945542760887) }, }, { { UINT64_C(17731062154276080423), UINT64_C( 3955312798166219611) }, }, { { UINT64_C(11127563320735482609), UINT64_C( 32628938752481525) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_private a_ = simde_uint64x2_to_private(a); simde_float64x2_t r = simde_vreinterpretq_f64_u64(a); simde_float64x2_private r_ = simde_float64x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s8_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; } test_vec[] = { { { UINT64_C( 2322636136670922143), UINT64_C( 5598901547067610440) }, }, { { UINT64_C(13672952454968309605), UINT64_C(14002346704856045130) }, }, { { UINT64_C( 305025124668412889), UINT64_C( 5032682288508933183) }, }, { { UINT64_C( 7364161768165345188), UINT64_C( 3313994378587537432) }, }, { { UINT64_C( 6161068422823701695), UINT64_C(14644779267273947415) }, }, { { UINT64_C( 1819015547946221244), UINT64_C( 428495374170495345) }, }, { { UINT64_C(15359808819437779646), UINT64_C( 8749097411148515194) }, }, { { UINT64_C(10259639877273561927), UINT64_C(14228074120516711721) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_private a_ = simde_uint64x2_to_private(a); simde_int8x16_t r = simde_vreinterpretq_s8_u64(a); simde_int8x16_private r_ = simde_int8x16_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s16_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; } test_vec[] = { { { UINT64_C( 4207544517074619564), UINT64_C(17983195729350452668) }, }, { { UINT64_C(14769595933460037143), UINT64_C( 2242542422084417963) }, }, { { UINT64_C(17209837690262716200), UINT64_C( 5997420568642393605) }, }, { { UINT64_C(18206080017375701916), UINT64_C( 110807516602613238) }, }, { { UINT64_C( 7695087595622811049), UINT64_C( 1303202160156891015) }, }, { { UINT64_C( 963653872485538323), UINT64_C( 4409023572102015572) }, }, { { UINT64_C(14093140080987851716), UINT64_C( 4306456267775623227) }, }, { { UINT64_C(11230286730154127797), UINT64_C( 1421440177691774952) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_private a_ = simde_uint64x2_to_private(a); simde_int16x8_t r = simde_vreinterpretq_s16_u64(a); simde_int16x8_private r_ = simde_int16x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s32_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; } test_vec[] = { { { UINT64_C(15092064537549437282), UINT64_C(10864827923211013704) }, }, { { UINT64_C( 5535174154542260813), UINT64_C(10136250544302758728) }, }, { { UINT64_C(11761525752944890796), UINT64_C(16296547469103199523) }, }, { { UINT64_C(13760420417112478233), UINT64_C( 6568748200859368817) }, }, { { UINT64_C( 9543969152595921780), UINT64_C(12855874340911424243) }, }, { { UINT64_C(13043640873963515600), UINT64_C(10293671953509136847) }, }, { { UINT64_C(10333738717266419332), UINT64_C(10069345882703560306) }, }, { { UINT64_C( 4316653424248691182), UINT64_C( 3798949770022238223) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_private a_ = simde_uint64x2_to_private(a); simde_int32x4_t r = simde_vreinterpretq_s32_u64(a); simde_int32x4_private r_ = simde_int32x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s64_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; } test_vec[] = { { { UINT64_C( 2573975289093980101), UINT64_C(12987093788008753529) }, }, { { UINT64_C(10893662621074775982), UINT64_C( 9071823841222376635) }, }, { { UINT64_C( 728885576711600406), UINT64_C( 2945388765883141678) }, }, { { UINT64_C(14412968017312414975), UINT64_C(12400870028522094562) }, }, { { UINT64_C(10975768628114679222), UINT64_C( 6085387108246304209) }, }, { { UINT64_C( 8391551434180684704), UINT64_C(13909410374581439942) }, }, { { UINT64_C( 4759841299535277580), UINT64_C( 1528972579837956751) }, }, { { UINT64_C( 9759642074302577259), UINT64_C(12992863654388138115) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_private a_ = simde_uint64x2_to_private(a); simde_int64x2_t r = simde_vreinterpretq_s64_u64(a); simde_int64x2_private r_ = simde_int64x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u8_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; } test_vec[] = { { { UINT64_C( 9602034079943150237), UINT64_C(15071447892160942436) }, }, { { UINT64_C( 7977050267529101445), UINT64_C(15152987898855457686) }, }, { { UINT64_C( 3074072634548977156), UINT64_C(14497327183391996253) }, }, { { UINT64_C( 8357763767761154458), UINT64_C( 5568845286471110208) }, }, { { UINT64_C(18226659911175066113), UINT64_C( 1720527327522890708) }, }, { { UINT64_C(16612550484748069179), UINT64_C( 5208029548622103729) }, }, { { UINT64_C(12002096110392883190), UINT64_C(12770693919310474149) }, }, { { UINT64_C( 3752436936427036807), UINT64_C( 623876170662637081) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_private a_ = simde_uint64x2_to_private(a); simde_uint8x16_t r = simde_vreinterpretq_u8_u64(a); simde_uint8x16_private r_ = simde_uint8x16_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u16_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; } test_vec[] = { { { UINT64_C( 4581232213571832684), UINT64_C( 1476113207327915279) }, }, { { UINT64_C( 1473717667153826830), UINT64_C( 3779245527683887938) }, }, { { UINT64_C(15405631003950577694), UINT64_C( 1801577341105853896) }, }, { { UINT64_C(16199323197070044241), UINT64_C( 1548692872333386625) }, }, { { UINT64_C( 5306202986939200775), UINT64_C( 8315111807944860737) }, }, { { UINT64_C(10277774060136857378), UINT64_C( 6083332082561989985) }, }, { { UINT64_C(13390826188593827083), UINT64_C( 7133537160237746794) }, }, { { UINT64_C(10125996505298054890), UINT64_C(13808688142819286529) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_private a_ = simde_uint64x2_to_private(a); simde_uint16x8_t r = simde_vreinterpretq_u16_u64(a); simde_uint16x8_private r_ = simde_uint16x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u32_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; } test_vec[] = { { { UINT64_C( 9421253059950992200), UINT64_C(18190508131208484342) }, }, { { UINT64_C( 3715591700242667934), UINT64_C( 6523973211315662879) }, }, { { UINT64_C(17575881375336364714), UINT64_C(13558360581878608743) }, }, { { UINT64_C( 1206238956071125651), UINT64_C(15728700958152614108) }, }, { { UINT64_C( 1548441871217433484), UINT64_C(10482243575994123678) }, }, { { UINT64_C(11163255515110986731), UINT64_C( 1316123692325467193) }, }, { { UINT64_C( 5633185853516514410), UINT64_C(13955954676396092701) }, }, { { UINT64_C(15865768773427186133), UINT64_C(17299602177782036092) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_private a_ = simde_uint64x2_to_private(a); simde_uint32x4_t r = simde_vreinterpretq_u32_u64(a); simde_uint32x4_private r_ = simde_uint32x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_f64_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( 640.27), SIMDE_FLOAT32_C( -585.88) }, }, { { SIMDE_FLOAT32_C( 495.02), SIMDE_FLOAT32_C( -454.59) }, }, { { SIMDE_FLOAT32_C( -794.50), SIMDE_FLOAT32_C( -757.76) }, }, { { SIMDE_FLOAT32_C( -987.01), SIMDE_FLOAT32_C( -967.63) }, }, { { SIMDE_FLOAT32_C( 776.12), SIMDE_FLOAT32_C( -392.25) }, }, { { SIMDE_FLOAT32_C( 488.06), SIMDE_FLOAT32_C( -965.22) }, }, { { SIMDE_FLOAT32_C( -118.01), SIMDE_FLOAT32_C( 15.16) }, }, { { SIMDE_FLOAT32_C( -392.02), SIMDE_FLOAT32_C( 26.20) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_private a_ = simde_float32x2_to_private(a); simde_float64x1_t r = simde_vreinterpret_f64_f32(a); simde_float64x1_private r_ = simde_float64x1_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s8_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( -587.63), SIMDE_FLOAT32_C( 585.48) }, }, { { SIMDE_FLOAT32_C( -661.12), SIMDE_FLOAT32_C( -256.06) }, }, { { SIMDE_FLOAT32_C( 118.35), SIMDE_FLOAT32_C( 805.41) }, }, { { SIMDE_FLOAT32_C( 505.78), SIMDE_FLOAT32_C( -965.54) }, }, { { SIMDE_FLOAT32_C( -758.58), SIMDE_FLOAT32_C( 899.30) }, }, { { SIMDE_FLOAT32_C( -523.55), SIMDE_FLOAT32_C( -97.39) }, }, { { SIMDE_FLOAT32_C( -615.32), SIMDE_FLOAT32_C( -397.40) }, }, { { SIMDE_FLOAT32_C( -120.59), SIMDE_FLOAT32_C( 913.00) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_private a_ = simde_float32x2_to_private(a); simde_int8x8_t r = simde_vreinterpret_s8_f32(a); simde_int8x8_private r_ = simde_int8x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s16_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( 965.56), SIMDE_FLOAT32_C( 406.80) }, }, { { SIMDE_FLOAT32_C( -967.05), SIMDE_FLOAT32_C( -359.28) }, }, { { SIMDE_FLOAT32_C( -12.20), SIMDE_FLOAT32_C( 830.53) }, }, { { SIMDE_FLOAT32_C( -617.07), SIMDE_FLOAT32_C( 419.80) }, }, { { SIMDE_FLOAT32_C( -178.92), SIMDE_FLOAT32_C( 378.69) }, }, { { SIMDE_FLOAT32_C( 707.86), SIMDE_FLOAT32_C( 990.09) }, }, { { SIMDE_FLOAT32_C( 19.78), SIMDE_FLOAT32_C( -485.54) }, }, { { SIMDE_FLOAT32_C( 4.31), SIMDE_FLOAT32_C( 85.98) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_private a_ = simde_float32x2_to_private(a); simde_int16x4_t r = simde_vreinterpret_s16_f32(a); simde_int16x4_private r_ = simde_int16x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( 795.77), SIMDE_FLOAT32_C( 550.43) }, }, { { SIMDE_FLOAT32_C( -83.89), SIMDE_FLOAT32_C( 973.87) }, }, { { SIMDE_FLOAT32_C( 261.17), SIMDE_FLOAT32_C( 160.88) }, }, { { SIMDE_FLOAT32_C( -625.84), SIMDE_FLOAT32_C( 282.41) }, }, { { SIMDE_FLOAT32_C( 796.67), SIMDE_FLOAT32_C( -170.27) }, }, { { SIMDE_FLOAT32_C( 520.80), SIMDE_FLOAT32_C( 944.18) }, }, { { SIMDE_FLOAT32_C( -80.46), SIMDE_FLOAT32_C( 719.27) }, }, { { SIMDE_FLOAT32_C( 319.33), SIMDE_FLOAT32_C( -849.20) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_private a_ = simde_float32x2_to_private(a); simde_int32x2_t r = simde_vreinterpret_s32_f32(a); simde_int32x2_private r_ = simde_int32x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s64_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( 516.10), SIMDE_FLOAT32_C( -198.61) }, }, { { SIMDE_FLOAT32_C( -669.75), SIMDE_FLOAT32_C( -733.34) }, }, { { SIMDE_FLOAT32_C( -1.26), SIMDE_FLOAT32_C( -21.15) }, }, { { SIMDE_FLOAT32_C( 991.35), SIMDE_FLOAT32_C( -65.88) }, }, { { SIMDE_FLOAT32_C( 207.04), SIMDE_FLOAT32_C( -249.79) }, }, { { SIMDE_FLOAT32_C( -595.20), SIMDE_FLOAT32_C( 987.70) }, }, { { SIMDE_FLOAT32_C( -316.55), SIMDE_FLOAT32_C( 458.49) }, }, { { SIMDE_FLOAT32_C( 515.32), SIMDE_FLOAT32_C( -268.29) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_private a_ = simde_float32x2_to_private(a); simde_int64x1_t r = simde_vreinterpret_s64_f32(a); simde_int64x1_private r_ = simde_int64x1_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u8_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( -703.61), SIMDE_FLOAT32_C( -199.03) }, }, { { SIMDE_FLOAT32_C( -275.51), SIMDE_FLOAT32_C( -704.61) }, }, { { SIMDE_FLOAT32_C( 166.82), SIMDE_FLOAT32_C( 78.78) }, }, { { SIMDE_FLOAT32_C( 393.16), SIMDE_FLOAT32_C( 135.85) }, }, { { SIMDE_FLOAT32_C( -375.89), SIMDE_FLOAT32_C( -388.41) }, }, { { SIMDE_FLOAT32_C( 975.54), SIMDE_FLOAT32_C( 66.77) }, }, { { SIMDE_FLOAT32_C( -130.34), SIMDE_FLOAT32_C( -425.75) }, }, { { SIMDE_FLOAT32_C( 31.57), SIMDE_FLOAT32_C( 530.63) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_private a_ = simde_float32x2_to_private(a); simde_uint8x8_t r = simde_vreinterpret_u8_f32(a); simde_uint8x8_private r_ = simde_uint8x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u16_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( 679.90), SIMDE_FLOAT32_C( 888.01) }, }, { { SIMDE_FLOAT32_C( 262.65), SIMDE_FLOAT32_C( 704.41) }, }, { { SIMDE_FLOAT32_C( -705.92), SIMDE_FLOAT32_C( -576.78) }, }, { { SIMDE_FLOAT32_C( 514.37), SIMDE_FLOAT32_C( -767.53) }, }, { { SIMDE_FLOAT32_C( 476.11), SIMDE_FLOAT32_C( -765.14) }, }, { { SIMDE_FLOAT32_C( 623.02), SIMDE_FLOAT32_C( -915.78) }, }, { { SIMDE_FLOAT32_C( -765.19), SIMDE_FLOAT32_C( -713.01) }, }, { { SIMDE_FLOAT32_C( 118.88), SIMDE_FLOAT32_C( 271.57) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_private a_ = simde_float32x2_to_private(a); simde_uint16x4_t r = simde_vreinterpret_u16_f32(a); simde_uint16x4_private r_ = simde_uint16x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( 661.50), SIMDE_FLOAT32_C( -338.84) }, }, { { SIMDE_FLOAT32_C( 472.10), SIMDE_FLOAT32_C( -850.16) }, }, { { SIMDE_FLOAT32_C( 915.36), SIMDE_FLOAT32_C( -912.72) }, }, { { SIMDE_FLOAT32_C( 252.30), SIMDE_FLOAT32_C( 744.88) }, }, { { SIMDE_FLOAT32_C( 220.69), SIMDE_FLOAT32_C( -108.88) }, }, { { SIMDE_FLOAT32_C( -892.39), SIMDE_FLOAT32_C( 442.34) }, }, { { SIMDE_FLOAT32_C( -537.40), SIMDE_FLOAT32_C( -490.27) }, }, { { SIMDE_FLOAT32_C( -224.54), SIMDE_FLOAT32_C( 427.88) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_private a_ = simde_float32x2_to_private(a); simde_uint32x2_t r = simde_vreinterpret_u32_f32(a); simde_uint32x2_private r_ = simde_uint32x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u64_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( -519.07), SIMDE_FLOAT32_C( -844.22) }, }, { { SIMDE_FLOAT32_C( -231.95), SIMDE_FLOAT32_C( -916.84) }, }, { { SIMDE_FLOAT32_C( 514.42), SIMDE_FLOAT32_C( -429.97) }, }, { { SIMDE_FLOAT32_C( 411.33), SIMDE_FLOAT32_C( -473.13) }, }, { { SIMDE_FLOAT32_C( 825.45), SIMDE_FLOAT32_C( -559.04) }, }, { { SIMDE_FLOAT32_C( 33.66), SIMDE_FLOAT32_C( -851.23) }, }, { { SIMDE_FLOAT32_C( -750.80), SIMDE_FLOAT32_C( 708.79) }, }, { { SIMDE_FLOAT32_C( 71.84), SIMDE_FLOAT32_C( 585.91) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_private a_ = simde_float32x2_to_private(a); simde_uint64x1_t r = simde_vreinterpret_u64_f32(a); simde_uint64x1_private r_ = simde_uint64x1_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_f64_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -811.50), SIMDE_FLOAT32_C( 514.96), SIMDE_FLOAT32_C( -136.00), SIMDE_FLOAT32_C( -141.79) }, }, { { SIMDE_FLOAT32_C( 61.02), SIMDE_FLOAT32_C( 477.62), SIMDE_FLOAT32_C( 148.51), SIMDE_FLOAT32_C( 991.88) }, }, { { SIMDE_FLOAT32_C( -930.50), SIMDE_FLOAT32_C( 415.23), SIMDE_FLOAT32_C( -261.81), SIMDE_FLOAT32_C( 239.22) }, }, { { SIMDE_FLOAT32_C( -235.74), SIMDE_FLOAT32_C( -683.57), SIMDE_FLOAT32_C( -77.80), SIMDE_FLOAT32_C( 884.34) }, }, { { SIMDE_FLOAT32_C( -663.55), SIMDE_FLOAT32_C( 620.01), SIMDE_FLOAT32_C( -701.35), SIMDE_FLOAT32_C( 534.67) }, }, { { SIMDE_FLOAT32_C( -55.34), SIMDE_FLOAT32_C( 286.47), SIMDE_FLOAT32_C( -242.86), SIMDE_FLOAT32_C( -763.34) }, }, { { SIMDE_FLOAT32_C( 995.87), SIMDE_FLOAT32_C( 465.38), SIMDE_FLOAT32_C( 128.46), SIMDE_FLOAT32_C( -623.09) }, }, { { SIMDE_FLOAT32_C( 144.58), SIMDE_FLOAT32_C( 923.36), SIMDE_FLOAT32_C( -420.92), SIMDE_FLOAT32_C( 333.08) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_private a_ = simde_float32x4_to_private(a); simde_float64x2_t r = simde_vreinterpretq_f64_f32(a); simde_float64x2_private r_ = simde_float64x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s8_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -715.28), SIMDE_FLOAT32_C( 33.48), SIMDE_FLOAT32_C( 229.63), SIMDE_FLOAT32_C( -742.42) }, }, { { SIMDE_FLOAT32_C( 472.64), SIMDE_FLOAT32_C( -446.37), SIMDE_FLOAT32_C( 664.03), SIMDE_FLOAT32_C( 26.76) }, }, { { SIMDE_FLOAT32_C( -329.58), SIMDE_FLOAT32_C( 330.64), SIMDE_FLOAT32_C( -243.69), SIMDE_FLOAT32_C( -277.14) }, }, { { SIMDE_FLOAT32_C( 610.24), SIMDE_FLOAT32_C( 639.80), SIMDE_FLOAT32_C( -784.18), SIMDE_FLOAT32_C( -365.62) }, }, { { SIMDE_FLOAT32_C( 114.54), SIMDE_FLOAT32_C( -652.97), SIMDE_FLOAT32_C( -379.89), SIMDE_FLOAT32_C( -610.92) }, }, { { SIMDE_FLOAT32_C( 952.14), SIMDE_FLOAT32_C( -538.54), SIMDE_FLOAT32_C( -682.53), SIMDE_FLOAT32_C( 242.56) }, }, { { SIMDE_FLOAT32_C( -333.51), SIMDE_FLOAT32_C( 450.97), SIMDE_FLOAT32_C( -236.98), SIMDE_FLOAT32_C( 47.06) }, }, { { SIMDE_FLOAT32_C( 18.38), SIMDE_FLOAT32_C( 953.45), SIMDE_FLOAT32_C( 221.44), SIMDE_FLOAT32_C( 303.10) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_private a_ = simde_float32x4_to_private(a); simde_int8x16_t r = simde_vreinterpretq_s8_f32(a); simde_int8x16_private r_ = simde_int8x16_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s16_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -765.62), SIMDE_FLOAT32_C( 361.08), SIMDE_FLOAT32_C( 970.49), SIMDE_FLOAT32_C( 464.20) }, }, { { SIMDE_FLOAT32_C( -945.27), SIMDE_FLOAT32_C( -68.63), SIMDE_FLOAT32_C( 229.51), SIMDE_FLOAT32_C( -351.34) }, }, { { SIMDE_FLOAT32_C( 542.83), SIMDE_FLOAT32_C( -123.36), SIMDE_FLOAT32_C( 927.83), SIMDE_FLOAT32_C( -243.91) }, }, { { SIMDE_FLOAT32_C( -330.95), SIMDE_FLOAT32_C( 461.31), SIMDE_FLOAT32_C( 836.57), SIMDE_FLOAT32_C( 92.21) }, }, { { SIMDE_FLOAT32_C( -186.44), SIMDE_FLOAT32_C( -562.27), SIMDE_FLOAT32_C( -320.25), SIMDE_FLOAT32_C( -379.25) }, }, { { SIMDE_FLOAT32_C( 306.79), SIMDE_FLOAT32_C( -832.12), SIMDE_FLOAT32_C( 438.15), SIMDE_FLOAT32_C( -467.18) }, }, { { SIMDE_FLOAT32_C( -657.92), SIMDE_FLOAT32_C( -254.81), SIMDE_FLOAT32_C( 831.78), SIMDE_FLOAT32_C( 236.89) }, }, { { SIMDE_FLOAT32_C( 865.09), SIMDE_FLOAT32_C( 532.74), SIMDE_FLOAT32_C( -968.34), SIMDE_FLOAT32_C( -900.54) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_private a_ = simde_float32x4_to_private(a); simde_int16x8_t r = simde_vreinterpretq_s16_f32(a); simde_int16x8_private r_ = simde_int16x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -932.02), SIMDE_FLOAT32_C( -132.75), SIMDE_FLOAT32_C( -699.06), SIMDE_FLOAT32_C( -679.87) }, }, { { SIMDE_FLOAT32_C( -936.24), SIMDE_FLOAT32_C( 610.41), SIMDE_FLOAT32_C( -391.45), SIMDE_FLOAT32_C( 201.79) }, }, { { SIMDE_FLOAT32_C( -432.76), SIMDE_FLOAT32_C( 405.49), SIMDE_FLOAT32_C( 203.70), SIMDE_FLOAT32_C( -940.94) }, }, { { SIMDE_FLOAT32_C( -913.18), SIMDE_FLOAT32_C( 765.98), SIMDE_FLOAT32_C( -413.09), SIMDE_FLOAT32_C( -839.58) }, }, { { SIMDE_FLOAT32_C( 525.54), SIMDE_FLOAT32_C( -423.97), SIMDE_FLOAT32_C( -466.96), SIMDE_FLOAT32_C( 911.94) }, }, { { SIMDE_FLOAT32_C( 614.98), SIMDE_FLOAT32_C( 805.06), SIMDE_FLOAT32_C( -958.00), SIMDE_FLOAT32_C( -950.16) }, }, { { SIMDE_FLOAT32_C( -656.23), SIMDE_FLOAT32_C( 55.72), SIMDE_FLOAT32_C( 110.93), SIMDE_FLOAT32_C( 458.68) }, }, { { SIMDE_FLOAT32_C( -160.98), SIMDE_FLOAT32_C( 817.98), SIMDE_FLOAT32_C( 396.81), SIMDE_FLOAT32_C( -92.99) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_private a_ = simde_float32x4_to_private(a); simde_int32x4_t r = simde_vreinterpretq_s32_f32(a); simde_int32x4_private r_ = simde_int32x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s64_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 518.36), SIMDE_FLOAT32_C( -882.32), SIMDE_FLOAT32_C( -73.64), SIMDE_FLOAT32_C( 749.12) }, }, { { SIMDE_FLOAT32_C( -496.02), SIMDE_FLOAT32_C( -306.74), SIMDE_FLOAT32_C( -922.59), SIMDE_FLOAT32_C( 847.19) }, }, { { SIMDE_FLOAT32_C( 777.17), SIMDE_FLOAT32_C( 196.12), SIMDE_FLOAT32_C( -547.27), SIMDE_FLOAT32_C( -497.25) }, }, { { SIMDE_FLOAT32_C( -324.90), SIMDE_FLOAT32_C( -358.94), SIMDE_FLOAT32_C( -741.14), SIMDE_FLOAT32_C( -164.39) }, }, { { SIMDE_FLOAT32_C( -823.67), SIMDE_FLOAT32_C( -347.30), SIMDE_FLOAT32_C( 683.23), SIMDE_FLOAT32_C( -581.31) }, }, { { SIMDE_FLOAT32_C( -908.06), SIMDE_FLOAT32_C( 681.07), SIMDE_FLOAT32_C( -895.66), SIMDE_FLOAT32_C( -13.27) }, }, { { SIMDE_FLOAT32_C( 38.57), SIMDE_FLOAT32_C( -801.68), SIMDE_FLOAT32_C( -141.46), SIMDE_FLOAT32_C( 275.72) }, }, { { SIMDE_FLOAT32_C( -841.66), SIMDE_FLOAT32_C( -993.81), SIMDE_FLOAT32_C( -416.02), SIMDE_FLOAT32_C( 676.69) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_private a_ = simde_float32x4_to_private(a); simde_int64x2_t r = simde_vreinterpretq_s64_f32(a); simde_int64x2_private r_ = simde_int64x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u8_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 666.23), SIMDE_FLOAT32_C( -544.03), SIMDE_FLOAT32_C( 130.05), SIMDE_FLOAT32_C( -777.18) }, }, { { SIMDE_FLOAT32_C( 521.70), SIMDE_FLOAT32_C( 33.98), SIMDE_FLOAT32_C( -958.60), SIMDE_FLOAT32_C( -408.56) }, }, { { SIMDE_FLOAT32_C( 396.34), SIMDE_FLOAT32_C( -364.03), SIMDE_FLOAT32_C( 719.77), SIMDE_FLOAT32_C( 483.05) }, }, { { SIMDE_FLOAT32_C( 307.42), SIMDE_FLOAT32_C( 916.12), SIMDE_FLOAT32_C( -506.50), SIMDE_FLOAT32_C( 167.94) }, }, { { SIMDE_FLOAT32_C( 796.69), SIMDE_FLOAT32_C( -227.61), SIMDE_FLOAT32_C( 168.78), SIMDE_FLOAT32_C( 769.87) }, }, { { SIMDE_FLOAT32_C( 237.37), SIMDE_FLOAT32_C( -942.90), SIMDE_FLOAT32_C( 126.56), SIMDE_FLOAT32_C( -99.19) }, }, { { SIMDE_FLOAT32_C( 684.50), SIMDE_FLOAT32_C( 809.18), SIMDE_FLOAT32_C( 523.87), SIMDE_FLOAT32_C( 945.93) }, }, { { SIMDE_FLOAT32_C( 492.61), SIMDE_FLOAT32_C( 718.52), SIMDE_FLOAT32_C( -237.77), SIMDE_FLOAT32_C( 158.83) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_private a_ = simde_float32x4_to_private(a); simde_uint8x16_t r = simde_vreinterpretq_u8_f32(a); simde_uint8x16_private r_ = simde_uint8x16_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u16_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -555.05), SIMDE_FLOAT32_C( 929.17), SIMDE_FLOAT32_C( -878.30), SIMDE_FLOAT32_C( 403.08) }, }, { { SIMDE_FLOAT32_C( -814.23), SIMDE_FLOAT32_C( 72.21), SIMDE_FLOAT32_C( -782.92), SIMDE_FLOAT32_C( -545.57) }, }, { { SIMDE_FLOAT32_C( -150.39), SIMDE_FLOAT32_C( -981.73), SIMDE_FLOAT32_C( 393.48), SIMDE_FLOAT32_C( -162.45) }, }, { { SIMDE_FLOAT32_C( -204.82), SIMDE_FLOAT32_C( -148.81), SIMDE_FLOAT32_C( -48.01), SIMDE_FLOAT32_C( -385.48) }, }, { { SIMDE_FLOAT32_C( -745.95), SIMDE_FLOAT32_C( 651.42), SIMDE_FLOAT32_C( 885.60), SIMDE_FLOAT32_C( 882.69) }, }, { { SIMDE_FLOAT32_C( 987.57), SIMDE_FLOAT32_C( -664.87), SIMDE_FLOAT32_C( -294.19), SIMDE_FLOAT32_C( 570.81) }, }, { { SIMDE_FLOAT32_C( -549.84), SIMDE_FLOAT32_C( 623.57), SIMDE_FLOAT32_C( -114.75), SIMDE_FLOAT32_C( -564.92) }, }, { { SIMDE_FLOAT32_C( 165.89), SIMDE_FLOAT32_C( -376.09), SIMDE_FLOAT32_C( 997.79), SIMDE_FLOAT32_C( 610.84) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_private a_ = simde_float32x4_to_private(a); simde_uint16x8_t r = simde_vreinterpretq_u16_f32(a); simde_uint16x8_private r_ = simde_uint16x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -639.27), SIMDE_FLOAT32_C( 2.23), SIMDE_FLOAT32_C( -265.16), SIMDE_FLOAT32_C( -362.94) }, }, { { SIMDE_FLOAT32_C( -445.88), SIMDE_FLOAT32_C( 968.53), SIMDE_FLOAT32_C( 312.26), SIMDE_FLOAT32_C( 936.91) }, }, { { SIMDE_FLOAT32_C( -465.40), SIMDE_FLOAT32_C( -23.17), SIMDE_FLOAT32_C( 673.23), SIMDE_FLOAT32_C( -603.22) }, }, { { SIMDE_FLOAT32_C( -429.30), SIMDE_FLOAT32_C( 406.19), SIMDE_FLOAT32_C( 726.96), SIMDE_FLOAT32_C( -436.99) }, }, { { SIMDE_FLOAT32_C( 52.93), SIMDE_FLOAT32_C( -904.72), SIMDE_FLOAT32_C( 225.27), SIMDE_FLOAT32_C( -62.92) }, }, { { SIMDE_FLOAT32_C( -719.26), SIMDE_FLOAT32_C( 97.89), SIMDE_FLOAT32_C( -808.00), SIMDE_FLOAT32_C( 701.29) }, }, { { SIMDE_FLOAT32_C( 151.65), SIMDE_FLOAT32_C( 842.19), SIMDE_FLOAT32_C( 331.08), SIMDE_FLOAT32_C( 329.68) }, }, { { SIMDE_FLOAT32_C( 304.95), SIMDE_FLOAT32_C( 771.15), SIMDE_FLOAT32_C( 630.46), SIMDE_FLOAT32_C( 665.69) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_private a_ = simde_float32x4_to_private(a); simde_uint32x4_t r = simde_vreinterpretq_u32_f32(a); simde_uint32x4_private r_ = simde_uint32x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u64_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 231.72), SIMDE_FLOAT32_C( 123.26), SIMDE_FLOAT32_C( -786.91), SIMDE_FLOAT32_C( -138.95) }, }, { { SIMDE_FLOAT32_C( 166.21), SIMDE_FLOAT32_C( 199.26), SIMDE_FLOAT32_C( -881.03), SIMDE_FLOAT32_C( 363.41) }, }, { { SIMDE_FLOAT32_C( -208.25), SIMDE_FLOAT32_C( -126.94), SIMDE_FLOAT32_C( -663.57), SIMDE_FLOAT32_C( -651.99) }, }, { { SIMDE_FLOAT32_C( -722.64), SIMDE_FLOAT32_C( -951.85), SIMDE_FLOAT32_C( -94.46), SIMDE_FLOAT32_C( 924.65) }, }, { { SIMDE_FLOAT32_C( 896.69), SIMDE_FLOAT32_C( -6.54), SIMDE_FLOAT32_C( -785.89), SIMDE_FLOAT32_C( -806.08) }, }, { { SIMDE_FLOAT32_C( -595.88), SIMDE_FLOAT32_C( 347.71), SIMDE_FLOAT32_C( 118.81), SIMDE_FLOAT32_C( -850.69) }, }, { { SIMDE_FLOAT32_C( -873.62), SIMDE_FLOAT32_C( 679.36), SIMDE_FLOAT32_C( 461.45), SIMDE_FLOAT32_C( -429.35) }, }, { { SIMDE_FLOAT32_C( -437.13), SIMDE_FLOAT32_C( -811.02), SIMDE_FLOAT32_C( 684.01), SIMDE_FLOAT32_C( 794.59) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_private a_ = simde_float32x4_to_private(a); simde_uint64x2_t r = simde_vreinterpretq_u64_f32(a); simde_uint64x2_private r_ = simde_uint64x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_f32_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( 28.67) }, }, { { SIMDE_FLOAT64_C( 473.75) }, }, { { SIMDE_FLOAT64_C( 415.56) }, }, { { SIMDE_FLOAT64_C( 753.78) }, }, { { SIMDE_FLOAT64_C( 619.03) }, }, { { SIMDE_FLOAT64_C( 870.52) }, }, { { SIMDE_FLOAT64_C( -321.27) }, }, { { SIMDE_FLOAT64_C( -568.79) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_private a_ = simde_float64x1_to_private(a); simde_float32x2_t r = simde_vreinterpret_f32_f64(a); simde_float32x2_private r_ = simde_float32x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s8_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( -39.43) }, }, { { SIMDE_FLOAT64_C( 241.14) }, }, { { SIMDE_FLOAT64_C( 480.99) }, }, { { SIMDE_FLOAT64_C( 763.77) }, }, { { SIMDE_FLOAT64_C( 486.92) }, }, { { SIMDE_FLOAT64_C( -31.94) }, }, { { SIMDE_FLOAT64_C( 873.55) }, }, { { SIMDE_FLOAT64_C( 804.51) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_private a_ = simde_float64x1_to_private(a); simde_int8x8_t r = simde_vreinterpret_s8_f64(a); simde_int8x8_private r_ = simde_int8x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s16_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( 937.89) }, }, { { SIMDE_FLOAT64_C( -770.55) }, }, { { SIMDE_FLOAT64_C( 954.11) }, }, { { SIMDE_FLOAT64_C( -761.79) }, }, { { SIMDE_FLOAT64_C( -483.85) }, }, { { SIMDE_FLOAT64_C( -722.69) }, }, { { SIMDE_FLOAT64_C( 63.26) }, }, { { SIMDE_FLOAT64_C( 347.83) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_private a_ = simde_float64x1_to_private(a); simde_int16x4_t r = simde_vreinterpret_s16_f64(a); simde_int16x4_private r_ = simde_int16x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s32_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( -321.48) }, }, { { SIMDE_FLOAT64_C( 239.70) }, }, { { SIMDE_FLOAT64_C( -593.78) }, }, { { SIMDE_FLOAT64_C( -961.46) }, }, { { SIMDE_FLOAT64_C( -714.06) }, }, { { SIMDE_FLOAT64_C( 876.41) }, }, { { SIMDE_FLOAT64_C( 192.37) }, }, { { SIMDE_FLOAT64_C( 113.12) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_private a_ = simde_float64x1_to_private(a); simde_int32x2_t r = simde_vreinterpret_s32_f64(a); simde_int32x2_private r_ = simde_int32x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_s64_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( -679.32) }, }, { { SIMDE_FLOAT64_C( 926.71) }, }, { { SIMDE_FLOAT64_C( 780.61) }, }, { { SIMDE_FLOAT64_C( 116.09) }, }, { { SIMDE_FLOAT64_C( 481.72) }, }, { { SIMDE_FLOAT64_C( -377.58) }, }, { { SIMDE_FLOAT64_C( -976.13) }, }, { { SIMDE_FLOAT64_C( -213.49) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_private a_ = simde_float64x1_to_private(a); simde_int64x1_t r = simde_vreinterpret_s64_f64(a); simde_int64x1_private r_ = simde_int64x1_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u8_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( 642.50) }, }, { { SIMDE_FLOAT64_C( -99.78) }, }, { { SIMDE_FLOAT64_C( 942.91) }, }, { { SIMDE_FLOAT64_C( -784.51) }, }, { { SIMDE_FLOAT64_C( 20.18) }, }, { { SIMDE_FLOAT64_C( 600.51) }, }, { { SIMDE_FLOAT64_C( -214.07) }, }, { { SIMDE_FLOAT64_C( -775.71) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_private a_ = simde_float64x1_to_private(a); simde_uint8x8_t r = simde_vreinterpret_u8_f64(a); simde_uint8x8_private r_ = simde_uint8x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u16_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( -646.34) }, }, { { SIMDE_FLOAT64_C( 355.80) }, }, { { SIMDE_FLOAT64_C( -63.18) }, }, { { SIMDE_FLOAT64_C( 300.54) }, }, { { SIMDE_FLOAT64_C( -572.37) }, }, { { SIMDE_FLOAT64_C( -193.89) }, }, { { SIMDE_FLOAT64_C( -662.99) }, }, { { SIMDE_FLOAT64_C( 16.90) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_private a_ = simde_float64x1_to_private(a); simde_uint16x4_t r = simde_vreinterpret_u16_f64(a); simde_uint16x4_private r_ = simde_uint16x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u32_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( -775.04) }, }, { { SIMDE_FLOAT64_C( -75.64) }, }, { { SIMDE_FLOAT64_C( -833.77) }, }, { { SIMDE_FLOAT64_C( 676.43) }, }, { { SIMDE_FLOAT64_C( -944.49) }, }, { { SIMDE_FLOAT64_C( -660.68) }, }, { { SIMDE_FLOAT64_C( 634.14) }, }, { { SIMDE_FLOAT64_C( 224.28) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_private a_ = simde_float64x1_to_private(a); simde_uint32x2_t r = simde_vreinterpret_u32_f64(a); simde_uint32x2_private r_ = simde_uint32x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpret_u64_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( 417.49) }, }, { { SIMDE_FLOAT64_C( -757.56) }, }, { { SIMDE_FLOAT64_C( -880.56) }, }, { { SIMDE_FLOAT64_C( 795.04) }, }, { { SIMDE_FLOAT64_C( -500.89) }, }, { { SIMDE_FLOAT64_C( -877.89) }, }, { { SIMDE_FLOAT64_C( 559.74) }, }, { { SIMDE_FLOAT64_C( 235.55) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_private a_ = simde_float64x1_to_private(a); simde_uint64x1_t r = simde_vreinterpret_u64_f64(a); simde_uint64x1_private r_ = simde_uint64x1_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_f32_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -143.25), SIMDE_FLOAT64_C( 834.85) }, }, { { SIMDE_FLOAT64_C( -261.67), SIMDE_FLOAT64_C( -140.07) }, }, { { SIMDE_FLOAT64_C( 513.01), SIMDE_FLOAT64_C( -658.64) }, }, { { SIMDE_FLOAT64_C( -687.83), SIMDE_FLOAT64_C( 764.02) }, }, { { SIMDE_FLOAT64_C( -465.12), SIMDE_FLOAT64_C( -765.44) }, }, { { SIMDE_FLOAT64_C( 433.84), SIMDE_FLOAT64_C( -259.91) }, }, { { SIMDE_FLOAT64_C( -578.38), SIMDE_FLOAT64_C( 507.83) }, }, { { SIMDE_FLOAT64_C( 973.67), SIMDE_FLOAT64_C( -82.78) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_private a_ = simde_float64x2_to_private(a); simde_float32x4_t r = simde_vreinterpretq_f32_f64(a); simde_float32x4_private r_ = simde_float32x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s8_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 458.52), SIMDE_FLOAT64_C( 987.27) }, }, { { SIMDE_FLOAT64_C( -744.09), SIMDE_FLOAT64_C( -287.82) }, }, { { SIMDE_FLOAT64_C( -721.99), SIMDE_FLOAT64_C( 994.43) }, }, { { SIMDE_FLOAT64_C( 754.99), SIMDE_FLOAT64_C( -258.36) }, }, { { SIMDE_FLOAT64_C( 866.04), SIMDE_FLOAT64_C( -587.02) }, }, { { SIMDE_FLOAT64_C( -745.20), SIMDE_FLOAT64_C( 415.92) }, }, { { SIMDE_FLOAT64_C( -203.46), SIMDE_FLOAT64_C( 889.41) }, }, { { SIMDE_FLOAT64_C( 665.61), SIMDE_FLOAT64_C( 69.19) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_private a_ = simde_float64x2_to_private(a); simde_int8x16_t r = simde_vreinterpretq_s8_f64(a); simde_int8x16_private r_ = simde_int8x16_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s16_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 94.42), SIMDE_FLOAT64_C( 818.93) }, }, { { SIMDE_FLOAT64_C( -692.67), SIMDE_FLOAT64_C( 786.34) }, }, { { SIMDE_FLOAT64_C( 928.43), SIMDE_FLOAT64_C( -189.58) }, }, { { SIMDE_FLOAT64_C( -918.60), SIMDE_FLOAT64_C( 199.44) }, }, { { SIMDE_FLOAT64_C( 503.38), SIMDE_FLOAT64_C( 887.65) }, }, { { SIMDE_FLOAT64_C( -468.72), SIMDE_FLOAT64_C( -389.59) }, }, { { SIMDE_FLOAT64_C( 846.67), SIMDE_FLOAT64_C( -751.14) }, }, { { SIMDE_FLOAT64_C( -515.92), SIMDE_FLOAT64_C( 180.13) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_private a_ = simde_float64x2_to_private(a); simde_int16x8_t r = simde_vreinterpretq_s16_f64(a); simde_int16x8_private r_ = simde_int16x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s32_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 528.55), SIMDE_FLOAT64_C( 688.78) }, }, { { SIMDE_FLOAT64_C( 610.11), SIMDE_FLOAT64_C( 611.13) }, }, { { SIMDE_FLOAT64_C( -578.07), SIMDE_FLOAT64_C( 982.36) }, }, { { SIMDE_FLOAT64_C( 364.66), SIMDE_FLOAT64_C( -506.04) }, }, { { SIMDE_FLOAT64_C( -873.02), SIMDE_FLOAT64_C( 793.52) }, }, { { SIMDE_FLOAT64_C( 431.73), SIMDE_FLOAT64_C( -420.86) }, }, { { SIMDE_FLOAT64_C( -966.64), SIMDE_FLOAT64_C( 749.49) }, }, { { SIMDE_FLOAT64_C( 468.95), SIMDE_FLOAT64_C( 678.54) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_private a_ = simde_float64x2_to_private(a); simde_int32x4_t r = simde_vreinterpretq_s32_f64(a); simde_int32x4_private r_ = simde_int32x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_s64_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 499.96), SIMDE_FLOAT64_C( 483.36) }, }, { { SIMDE_FLOAT64_C( -570.97), SIMDE_FLOAT64_C( -986.77) }, }, { { SIMDE_FLOAT64_C( 388.29), SIMDE_FLOAT64_C( -157.42) }, }, { { SIMDE_FLOAT64_C( -500.62), SIMDE_FLOAT64_C( -196.54) }, }, { { SIMDE_FLOAT64_C( -186.43), SIMDE_FLOAT64_C( 777.44) }, }, { { SIMDE_FLOAT64_C( -393.24), SIMDE_FLOAT64_C( -993.29) }, }, { { SIMDE_FLOAT64_C( 813.36), SIMDE_FLOAT64_C( 797.87) }, }, { { SIMDE_FLOAT64_C( 890.64), SIMDE_FLOAT64_C( 177.04) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_private a_ = simde_float64x2_to_private(a); simde_int64x2_t r = simde_vreinterpretq_s64_f64(a); simde_int64x2_private r_ = simde_int64x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u8_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 774.17), SIMDE_FLOAT64_C( -982.31) }, }, { { SIMDE_FLOAT64_C( 150.95), SIMDE_FLOAT64_C( 568.12) }, }, { { SIMDE_FLOAT64_C( -697.86), SIMDE_FLOAT64_C( -585.35) }, }, { { SIMDE_FLOAT64_C( -726.09), SIMDE_FLOAT64_C( -258.24) }, }, { { SIMDE_FLOAT64_C( 225.96), SIMDE_FLOAT64_C( 612.94) }, }, { { SIMDE_FLOAT64_C( 513.18), SIMDE_FLOAT64_C( 503.15) }, }, { { SIMDE_FLOAT64_C( 963.82), SIMDE_FLOAT64_C( -77.66) }, }, { { SIMDE_FLOAT64_C( -796.08), SIMDE_FLOAT64_C( 2.44) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_private a_ = simde_float64x2_to_private(a); simde_uint8x16_t r = simde_vreinterpretq_u8_f64(a); simde_uint8x16_private r_ = simde_uint8x16_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u16_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 366.50), SIMDE_FLOAT64_C( -291.14) }, }, { { SIMDE_FLOAT64_C( -469.92), SIMDE_FLOAT64_C( -958.99) }, }, { { SIMDE_FLOAT64_C( -979.93), SIMDE_FLOAT64_C( -904.00) }, }, { { SIMDE_FLOAT64_C( -442.71), SIMDE_FLOAT64_C( -363.03) }, }, { { SIMDE_FLOAT64_C( -172.73), SIMDE_FLOAT64_C( 76.80) }, }, { { SIMDE_FLOAT64_C( 876.29), SIMDE_FLOAT64_C( -629.97) }, }, { { SIMDE_FLOAT64_C( 360.30), SIMDE_FLOAT64_C( 944.37) }, }, { { SIMDE_FLOAT64_C( -412.04), SIMDE_FLOAT64_C( 375.13) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_private a_ = simde_float64x2_to_private(a); simde_uint16x8_t r = simde_vreinterpretq_u16_f64(a); simde_uint16x8_private r_ = simde_uint16x8_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u32_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -625.30), SIMDE_FLOAT64_C( -492.95) }, }, { { SIMDE_FLOAT64_C( 940.16), SIMDE_FLOAT64_C( 246.25) }, }, { { SIMDE_FLOAT64_C( -129.68), SIMDE_FLOAT64_C( -65.50) }, }, { { SIMDE_FLOAT64_C( -587.78), SIMDE_FLOAT64_C( -371.92) }, }, { { SIMDE_FLOAT64_C( 254.10), SIMDE_FLOAT64_C( 929.33) }, }, { { SIMDE_FLOAT64_C( 369.23), SIMDE_FLOAT64_C( -348.00) }, }, { { SIMDE_FLOAT64_C( 551.76), SIMDE_FLOAT64_C( 388.09) }, }, { { SIMDE_FLOAT64_C( -849.56), SIMDE_FLOAT64_C( 541.39) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_private a_ = simde_float64x2_to_private(a); simde_uint32x4_t r = simde_vreinterpretq_u32_f64(a); simde_uint32x4_private r_ = simde_uint32x4_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } static int test_simde_vreinterpretq_u64_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 261.11), SIMDE_FLOAT64_C( 328.35) }, }, { { SIMDE_FLOAT64_C( 953.99), SIMDE_FLOAT64_C( 268.54) }, }, { { SIMDE_FLOAT64_C( 860.91), SIMDE_FLOAT64_C( 685.66) }, }, { { SIMDE_FLOAT64_C( -379.72), SIMDE_FLOAT64_C( 849.19) }, }, { { SIMDE_FLOAT64_C( -182.32), SIMDE_FLOAT64_C( 565.54) }, }, { { SIMDE_FLOAT64_C( 593.89), SIMDE_FLOAT64_C( 988.09) }, }, { { SIMDE_FLOAT64_C( 564.35), SIMDE_FLOAT64_C( 771.05) }, }, { { SIMDE_FLOAT64_C( -993.11), SIMDE_FLOAT64_C( -315.73) }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_private a_ = simde_float64x2_to_private(a); simde_uint64x2_t r = simde_vreinterpretq_u64_f64(a); simde_uint64x2_private r_ = simde_uint64x2_to_private(r); simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f32_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f64_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s16_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s32_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s64_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u8_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u16_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u32_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u64_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f32_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f64_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s16_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s32_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s64_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u8_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u16_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u32_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u64_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f32_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f64_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s8_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s32_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s64_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u8_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u16_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u32_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u64_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f32_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f64_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s8_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s32_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s64_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u8_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u16_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u32_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u64_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f32_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f64_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s8_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s16_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s64_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u8_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u16_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u32_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u64_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f32_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f64_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s8_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s16_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s64_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u8_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u16_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u32_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u64_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f32_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f64_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s8_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s16_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s32_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u8_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u16_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u32_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u64_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f32_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f64_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s8_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s16_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s32_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u8_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u16_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u32_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u64_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f32_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f64_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s8_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s16_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s32_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s64_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u16_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u32_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u64_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f32_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f64_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s8_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s16_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s32_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s64_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u16_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u32_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u64_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f32_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f64_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s8_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s16_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s32_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s64_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u8_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u32_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u64_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f32_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f64_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s8_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s16_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s32_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s64_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u8_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u32_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u64_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f32_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f64_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s8_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s16_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s32_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s64_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u8_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u16_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u64_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f32_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f64_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s8_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s16_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s32_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s64_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u8_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u16_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u64_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f32_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f64_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s8_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s16_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s32_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s64_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u8_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u16_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u32_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f32_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f64_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s8_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s16_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s32_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s64_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u8_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u16_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u32_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f64_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s8_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s16_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s32_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s64_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u8_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u16_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u32_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u64_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f64_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s8_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s16_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s32_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s64_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u8_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u16_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u32_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u64_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f32_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s8_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s16_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s32_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s64_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u8_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u16_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u32_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u64_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f32_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s8_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s16_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s32_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s64_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u8_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u16_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u32_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u64_f64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/rev16.c000066400000000000000000000363031400333146700164330ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN rev16 #include "test-neon.h" /* Check that both of these work */ #if defined(__cplusplus) #include "../../../simde/arm/neon/rev16.h" #else #include "../../../simde/arm/neon.h" #endif static int test_simde_vrev16_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t r[8]; } test_vec[] = { { { -INT8_C( 101), INT8_C( 94), -INT8_C( 1), INT8_C( 100), -INT8_C( 108), INT8_C( 34), INT8_C( 116), -INT8_C( 90) }, { INT8_C( 94), -INT8_C( 101), INT8_C( 100), -INT8_C( 1), INT8_C( 34), -INT8_C( 108), -INT8_C( 90), INT8_C( 116) } }, { { -INT8_C( 10), INT8_C( 59), -INT8_C( 1), INT8_C( 119), -INT8_C( 58), INT8_C( 118), INT8_C( 36), -INT8_C( 89) }, { INT8_C( 59), -INT8_C( 10), INT8_C( 119), -INT8_C( 1), INT8_C( 118), -INT8_C( 58), -INT8_C( 89), INT8_C( 36) } }, { { INT8_C( 92), -INT8_C( 49), INT8_C( 6), -INT8_C( 8), -INT8_C( 93), -INT8_C( 42), -INT8_C( 88), -INT8_C( 7) }, { -INT8_C( 49), INT8_C( 92), -INT8_C( 8), INT8_C( 6), -INT8_C( 42), -INT8_C( 93), -INT8_C( 7), -INT8_C( 88) } }, { { INT8_C( 19), INT8_C( 0), INT8_C( 98), -INT8_C( 77), -INT8_C( 121), INT8_C( 123), INT8_C( 71), INT8_C( 34) }, { INT8_C( 0), INT8_C( 19), -INT8_C( 77), INT8_C( 98), INT8_C( 123), -INT8_C( 121), INT8_C( 34), INT8_C( 71) } }, { { -INT8_C( 39), INT8_C( 70), -INT8_C( 122), INT8_C( 109), INT8_C( 104), -INT8_C( 6), INT8_C( 20), INT8_C( 94) }, { INT8_C( 70), -INT8_C( 39), INT8_C( 109), -INT8_C( 122), -INT8_C( 6), INT8_C( 104), INT8_C( 94), INT8_C( 20) } }, { { INT8_C( 54), INT8_C( 19), -INT8_C( 43), -INT8_C( 4), -INT8_C( 119), -INT8_C( 7), -INT8_C( 92), -INT8_C( 26) }, { INT8_C( 19), INT8_C( 54), -INT8_C( 4), -INT8_C( 43), -INT8_C( 7), -INT8_C( 119), -INT8_C( 26), -INT8_C( 92) } }, { { -INT8_C( 56), -INT8_C( 86), -INT8_C( 34), INT8_C( 107), -INT8_C( 127), -INT8_C( 121), INT8_C( 100), -INT8_C( 108) }, { -INT8_C( 86), -INT8_C( 56), INT8_C( 107), -INT8_C( 34), -INT8_C( 121), -INT8_C( 127), -INT8_C( 108), INT8_C( 100) } }, { { -INT8_C( 121), -INT8_C( 58), INT8_C( 72), INT8_C( 14), INT8_C( 65), -INT8_C( 113), INT8_C( 48), INT8_C( 26) }, { -INT8_C( 58), -INT8_C( 121), INT8_C( 14), INT8_C( 72), -INT8_C( 113), INT8_C( 65), INT8_C( 26), INT8_C( 48) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t r = simde_vrev16_s8(a); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8_t r = simde_vrev16_s8(a); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrev16_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C( 67), UINT8_C(189), UINT8_C( 42), UINT8_C(175), UINT8_C(157), UINT8_C( 11), UINT8_C( 47), UINT8_C( 3) }, { UINT8_C(189), UINT8_C( 67), UINT8_C(175), UINT8_C( 42), UINT8_C( 11), UINT8_C(157), UINT8_C( 3), UINT8_C( 47) } }, { { UINT8_C( 70), UINT8_C( 82), UINT8_MAX, UINT8_C(181), UINT8_C(147), UINT8_C(218), UINT8_C(137), UINT8_C(243) }, { UINT8_C( 82), UINT8_C( 70), UINT8_C(181), UINT8_MAX, UINT8_C(218), UINT8_C(147), UINT8_C(243), UINT8_C(137) } }, { { UINT8_C(226), UINT8_C(250), UINT8_C( 97), UINT8_C(179), UINT8_C(177), UINT8_C( 24), UINT8_C(159), UINT8_C(204) }, { UINT8_C(250), UINT8_C(226), UINT8_C(179), UINT8_C( 97), UINT8_C( 24), UINT8_C(177), UINT8_C(204), UINT8_C(159) } }, { { UINT8_C( 69), UINT8_C( 24), UINT8_C(125), UINT8_C(199), UINT8_C( 65), UINT8_C( 54), UINT8_C(222), UINT8_C(133) }, { UINT8_C( 24), UINT8_C( 69), UINT8_C(199), UINT8_C(125), UINT8_C( 54), UINT8_C( 65), UINT8_C(133), UINT8_C(222) } }, { { UINT8_C(244), UINT8_C( 8), UINT8_C( 52), UINT8_C(145), UINT8_C( 19), UINT8_C( 99), UINT8_C(148), UINT8_C( 89) }, { UINT8_C( 8), UINT8_C(244), UINT8_C(145), UINT8_C( 52), UINT8_C( 99), UINT8_C( 19), UINT8_C( 89), UINT8_C(148) } }, { { UINT8_C(181), UINT8_C(147), UINT8_C( 14), UINT8_C( 72), UINT8_C(110), UINT8_C(151), UINT8_C( 60), UINT8_C( 80) }, { UINT8_C(147), UINT8_C(181), UINT8_C( 72), UINT8_C( 14), UINT8_C(151), UINT8_C(110), UINT8_C( 80), UINT8_C( 60) } }, { { UINT8_C(145), UINT8_C(157), UINT8_C( 4), UINT8_C( 66), UINT8_C(181), UINT8_C(163), UINT8_C( 14), UINT8_C(250) }, { UINT8_C(157), UINT8_C(145), UINT8_C( 66), UINT8_C( 4), UINT8_C(163), UINT8_C(181), UINT8_C(250), UINT8_C( 14) } }, { { UINT8_C(187), UINT8_C(139), UINT8_C(193), UINT8_C(253), UINT8_C(194), UINT8_C(159), UINT8_C(130), UINT8_C(182) }, { UINT8_C(139), UINT8_C(187), UINT8_C(253), UINT8_C(193), UINT8_C(159), UINT8_C(194), UINT8_C(182), UINT8_C(130) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t r = simde_vrev16_u8(a); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t r = simde_vrev16_u8(a); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrev16q_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t r[16]; } test_vec[] = { { { -INT8_C( 125), INT8_C( 86), INT8_C( 69), -INT8_C( 56), -INT8_C( 71), -INT8_C( 24), INT8_C( 68), INT8_C( 8), -INT8_C( 85), INT8_C( 103), -INT8_C( 68), INT8_C( 41), INT8_C( 61), -INT8_C( 94), INT8_C( 49), -INT8_C( 3) }, { INT8_C( 86), -INT8_C( 125), -INT8_C( 56), INT8_C( 69), -INT8_C( 24), -INT8_C( 71), INT8_C( 8), INT8_C( 68), INT8_C( 103), -INT8_C( 85), INT8_C( 41), -INT8_C( 68), -INT8_C( 94), INT8_C( 61), -INT8_C( 3), INT8_C( 49) } }, { { INT8_C( 28), -INT8_C( 16), INT8_C( 36), INT8_C( 0), -INT8_C( 35), INT8_C( 121), INT8_C( 30), INT8_C( 14), -INT8_C( 76), -INT8_C( 111), -INT8_C( 58), -INT8_C( 94), INT8_C( 87), -INT8_C( 122), INT8_C( 80), -INT8_C( 38) }, { -INT8_C( 16), INT8_C( 28), INT8_C( 0), INT8_C( 36), INT8_C( 121), -INT8_C( 35), INT8_C( 14), INT8_C( 30), -INT8_C( 111), -INT8_C( 76), -INT8_C( 94), -INT8_C( 58), -INT8_C( 122), INT8_C( 87), -INT8_C( 38), INT8_C( 80) } }, { { -INT8_C( 35), -INT8_C( 107), -INT8_C( 94), -INT8_C( 106), INT8_C( 125), -INT8_C( 26), -INT8_C( 98), INT8_C( 40), INT8_C( 78), INT8_C( 90), INT8_C( 82), -INT8_C( 117), -INT8_C( 4), -INT8_C( 125), -INT8_C( 119), INT8_C( 24) }, { -INT8_C( 107), -INT8_C( 35), -INT8_C( 106), -INT8_C( 94), -INT8_C( 26), INT8_C( 125), INT8_C( 40), -INT8_C( 98), INT8_C( 90), INT8_C( 78), -INT8_C( 117), INT8_C( 82), -INT8_C( 125), -INT8_C( 4), INT8_C( 24), -INT8_C( 119) } }, { { INT8_C( 115), -INT8_C( 83), INT8_C( 24), INT8_C( 80), INT8_C( 38), INT8_C( 54), INT8_C( 94), -INT8_C( 38), -INT8_C( 57), INT8_C( 36), INT8_C( 124), INT8_C( 31), -INT8_C( 85), -INT8_C( 52), -INT8_C( 7), -INT8_C( 120) }, { -INT8_C( 83), INT8_C( 115), INT8_C( 80), INT8_C( 24), INT8_C( 54), INT8_C( 38), -INT8_C( 38), INT8_C( 94), INT8_C( 36), -INT8_C( 57), INT8_C( 31), INT8_C( 124), -INT8_C( 52), -INT8_C( 85), -INT8_C( 120), -INT8_C( 7) } }, { { INT8_C( 98), -INT8_C( 100), INT8_C( 30), -INT8_C( 33), -INT8_C( 126), -INT8_C( 67), INT8_C( 8), -INT8_C( 48), INT8_C( 23), INT8_C( 90), INT8_C( 92), INT8_C( 20), -INT8_C( 35), -INT8_C( 27), INT8_C( 44), INT8_C( 80) }, { -INT8_C( 100), INT8_C( 98), -INT8_C( 33), INT8_C( 30), -INT8_C( 67), -INT8_C( 126), -INT8_C( 48), INT8_C( 8), INT8_C( 90), INT8_C( 23), INT8_C( 20), INT8_C( 92), -INT8_C( 27), -INT8_C( 35), INT8_C( 80), INT8_C( 44) } }, { { -INT8_C( 110), INT8_C( 69), -INT8_C( 96), -INT8_C( 72), INT8_C( 123), -INT8_C( 2), -INT8_C( 110), INT8_C( 67), INT8_C( 34), INT8_C( 14), INT8_C( 98), -INT8_C( 51), -INT8_C( 38), INT8_C( 91), INT8_C( 85), INT8_C( 60) }, { INT8_C( 69), -INT8_C( 110), -INT8_C( 72), -INT8_C( 96), -INT8_C( 2), INT8_C( 123), INT8_C( 67), -INT8_C( 110), INT8_C( 14), INT8_C( 34), -INT8_C( 51), INT8_C( 98), INT8_C( 91), -INT8_C( 38), INT8_C( 60), INT8_C( 85) } }, { { -INT8_C( 9), INT8_C( 116), INT8_C( 28), INT8_C( 122), INT8_C( 49), INT8_C( 36), INT8_C( 74), INT8_C( 72), INT8_C( 126), -INT8_C( 90), INT8_C( 92), INT8_C( 91), -INT8_C( 117), -INT8_C( 119), -INT8_C( 85), INT8_C( 29) }, { INT8_C( 116), -INT8_C( 9), INT8_C( 122), INT8_C( 28), INT8_C( 36), INT8_C( 49), INT8_C( 72), INT8_C( 74), -INT8_C( 90), INT8_C( 126), INT8_C( 91), INT8_C( 92), -INT8_C( 119), -INT8_C( 117), INT8_C( 29), -INT8_C( 85) } }, { { -INT8_C( 50), INT8_C( 75), -INT8_C( 43), INT8_C( 73), INT8_C( 73), INT8_C( 103), -INT8_C( 116), INT8_C( 107), INT8_C( 117), -INT8_C( 18), INT8_C( 57), INT8_C( 80), INT8_C( 74), -INT8_C( 114), -INT8_C( 116), INT8_C( 65) }, { INT8_C( 75), -INT8_C( 50), INT8_C( 73), -INT8_C( 43), INT8_C( 103), INT8_C( 73), INT8_C( 107), -INT8_C( 116), -INT8_C( 18), INT8_C( 117), INT8_C( 80), INT8_C( 57), -INT8_C( 114), INT8_C( 74), INT8_C( 65), -INT8_C( 116) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t r = simde_vrev16q_s8(a); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); simde_int8x16_t r = simde_vrev16q_s8(a); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrev16q_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(141), UINT8_C( 21), UINT8_C(244), UINT8_C( 64), UINT8_C( 36), UINT8_C(190), UINT8_C( 14), UINT8_C(191), UINT8_C( 34), UINT8_C( 13), UINT8_C(227), UINT8_C(160), UINT8_C(212), UINT8_C(135), UINT8_C(239), UINT8_C(236) }, { UINT8_C( 21), UINT8_C(141), UINT8_C( 64), UINT8_C(244), UINT8_C(190), UINT8_C( 36), UINT8_C(191), UINT8_C( 14), UINT8_C( 13), UINT8_C( 34), UINT8_C(160), UINT8_C(227), UINT8_C(135), UINT8_C(212), UINT8_C(236), UINT8_C(239) } }, { { UINT8_C( 9), UINT8_C( 39), UINT8_C( 99), UINT8_C(250), UINT8_C(220), UINT8_C(252), UINT8_C(157), UINT8_C( 87), UINT8_C( 43), UINT8_C(133), UINT8_C(232), UINT8_C(139), UINT8_C(175), UINT8_C( 29), UINT8_C(173), UINT8_C( 61) }, { UINT8_C( 39), UINT8_C( 9), UINT8_C(250), UINT8_C( 99), UINT8_C(252), UINT8_C(220), UINT8_C( 87), UINT8_C(157), UINT8_C(133), UINT8_C( 43), UINT8_C(139), UINT8_C(232), UINT8_C( 29), UINT8_C(175), UINT8_C( 61), UINT8_C(173) } }, { { UINT8_C( 50), UINT8_C(161), UINT8_C(125), UINT8_C( 86), UINT8_C( 95), UINT8_C(140), UINT8_C( 21), UINT8_C(130), UINT8_C(153), UINT8_C(248), UINT8_C( 34), UINT8_C(110), UINT8_C(128), UINT8_C( 17), UINT8_C( 90), UINT8_C(137) }, { UINT8_C(161), UINT8_C( 50), UINT8_C( 86), UINT8_C(125), UINT8_C(140), UINT8_C( 95), UINT8_C(130), UINT8_C( 21), UINT8_C(248), UINT8_C(153), UINT8_C(110), UINT8_C( 34), UINT8_C( 17), UINT8_C(128), UINT8_C(137), UINT8_C( 90) } }, { { UINT8_C( 56), UINT8_C(189), UINT8_C(131), UINT8_C( 20), UINT8_C(185), UINT8_C( 33), UINT8_C(107), UINT8_C(229), UINT8_C(166), UINT8_C( 84), UINT8_C(112), UINT8_C( 86), UINT8_C(113), UINT8_C( 29), UINT8_C(147), UINT8_C(163) }, { UINT8_C(189), UINT8_C( 56), UINT8_C( 20), UINT8_C(131), UINT8_C( 33), UINT8_C(185), UINT8_C(229), UINT8_C(107), UINT8_C( 84), UINT8_C(166), UINT8_C( 86), UINT8_C(112), UINT8_C( 29), UINT8_C(113), UINT8_C(163), UINT8_C(147) } }, { { UINT8_C(191), UINT8_C( 16), UINT8_C(250), UINT8_C( 30), UINT8_C(156), UINT8_C( 15), UINT8_C(160), UINT8_C( 54), UINT8_C( 8), UINT8_C(195), UINT8_C(164), UINT8_C(136), UINT8_C(212), UINT8_C(254), UINT8_C( 17), UINT8_C( 13) }, { UINT8_C( 16), UINT8_C(191), UINT8_C( 30), UINT8_C(250), UINT8_C( 15), UINT8_C(156), UINT8_C( 54), UINT8_C(160), UINT8_C(195), UINT8_C( 8), UINT8_C(136), UINT8_C(164), UINT8_C(254), UINT8_C(212), UINT8_C( 13), UINT8_C( 17) } }, { { UINT8_C(187), UINT8_C(149), UINT8_C( 33), UINT8_C(116), UINT8_C(182), UINT8_C(141), UINT8_C( 89), UINT8_C( 92), UINT8_C(225), UINT8_C(201), UINT8_C(178), UINT8_C( 82), UINT8_C(231), UINT8_C( 69), UINT8_C(245), UINT8_C(166) }, { UINT8_C(149), UINT8_C(187), UINT8_C(116), UINT8_C( 33), UINT8_C(141), UINT8_C(182), UINT8_C( 92), UINT8_C( 89), UINT8_C(201), UINT8_C(225), UINT8_C( 82), UINT8_C(178), UINT8_C( 69), UINT8_C(231), UINT8_C(166), UINT8_C(245) } }, { { UINT8_C( 86), UINT8_C(239), UINT8_C(196), UINT8_C(242), UINT8_MAX, UINT8_C(101), UINT8_C( 40), UINT8_C( 7), UINT8_C( 40), UINT8_C(204), UINT8_C(143), UINT8_C(252), UINT8_C(202), UINT8_C(160), UINT8_C( 9), UINT8_C(133) }, { UINT8_C(239), UINT8_C( 86), UINT8_C(242), UINT8_C(196), UINT8_C(101), UINT8_MAX, UINT8_C( 7), UINT8_C( 40), UINT8_C(204), UINT8_C( 40), UINT8_C(252), UINT8_C(143), UINT8_C(160), UINT8_C(202), UINT8_C(133), UINT8_C( 9) } }, { { UINT8_C( 53), UINT8_C( 43), UINT8_C(250), UINT8_C(235), UINT8_C(184), UINT8_C( 83), UINT8_C( 72), UINT8_C(153), UINT8_C( 29), UINT8_C(250), UINT8_C(235), UINT8_C( 4), UINT8_C( 64), UINT8_C(224), UINT8_C(170), UINT8_C(150) }, { UINT8_C( 43), UINT8_C( 53), UINT8_C(235), UINT8_C(250), UINT8_C( 83), UINT8_C(184), UINT8_C(153), UINT8_C( 72), UINT8_C(250), UINT8_C( 29), UINT8_C( 4), UINT8_C(235), UINT8_C(224), UINT8_C( 64), UINT8_C(150), UINT8_C(170) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t r = simde_vrev16q_u8(a); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t r = simde_vrev16q_u8(a); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vrev16_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vrev16_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vrev16q_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vrev16q_u8) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/rev32.c000066400000000000000000000630751400333146700164370ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN rev32 #include "test-neon.h" /* Check that both of these work */ #if defined(__cplusplus) #include "../../../simde/arm/neon/rev32.h" #else #include "../../../simde/arm/neon.h" #endif static int test_simde_vrev32_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t r[8]; } test_vec[] = { { { INT8_C( 10), -INT8_C( 9), INT8_C( 64), INT8_C( 118), -INT8_C( 1), -INT8_C( 64), -INT8_C( 72), -INT8_C( 13) }, { INT8_C( 118), INT8_C( 64), -INT8_C( 9), INT8_C( 10), -INT8_C( 13), -INT8_C( 72), -INT8_C( 64), -INT8_C( 1) } }, { { INT8_C( 68), INT8_C( 117), INT8_C( 70), -INT8_C( 116), INT8_C( 76), INT8_C( 58), -INT8_C( 19), -INT8_C( 60) }, { -INT8_C( 116), INT8_C( 70), INT8_C( 117), INT8_C( 68), -INT8_C( 60), -INT8_C( 19), INT8_C( 58), INT8_C( 76) } }, { { INT8_C( 79), -INT8_C( 21), INT8_C( 50), -INT8_C( 7), -INT8_C( 8), -INT8_C( 25), -INT8_C( 17), -INT8_C( 39) }, { -INT8_C( 7), INT8_C( 50), -INT8_C( 21), INT8_C( 79), -INT8_C( 39), -INT8_C( 17), -INT8_C( 25), -INT8_C( 8) } }, { { INT8_C( 72), INT8_C( 94), -INT8_C( 41), -INT8_C( 85), INT8_C( 22), -INT8_C( 54), INT8_C( 31), INT8_C( 32) }, { -INT8_C( 85), -INT8_C( 41), INT8_C( 94), INT8_C( 72), INT8_C( 32), INT8_C( 31), -INT8_C( 54), INT8_C( 22) } }, { { -INT8_C( 63), INT8_C( 96), -INT8_C( 106), -INT8_C( 64), INT8_C( 32), INT8_C( 78), -INT8_C( 77), INT8_C( 101) }, { -INT8_C( 64), -INT8_C( 106), INT8_C( 96), -INT8_C( 63), INT8_C( 101), -INT8_C( 77), INT8_C( 78), INT8_C( 32) } }, { { -INT8_C( 61), -INT8_C( 7), -INT8_C( 15), INT8_C( 15), INT8_C( 52), -INT8_C( 33), -INT8_C( 45), -INT8_C( 125) }, { INT8_C( 15), -INT8_C( 15), -INT8_C( 7), -INT8_C( 61), -INT8_C( 125), -INT8_C( 45), -INT8_C( 33), INT8_C( 52) } }, { { -INT8_C( 54), INT8_C( 5), INT8_C( 124), -INT8_C( 61), -INT8_C( 19), INT8_C( 108), -INT8_C( 100), INT8_C( 53) }, { -INT8_C( 61), INT8_C( 124), INT8_C( 5), -INT8_C( 54), INT8_C( 53), -INT8_C( 100), INT8_C( 108), -INT8_C( 19) } }, { { -INT8_C( 54), INT8_C( 116), -INT8_C( 32), -INT8_C( 32), INT8_C( 62), -INT8_C( 1), INT8_C( 0), -INT8_C( 1) }, { -INT8_C( 32), -INT8_C( 32), INT8_C( 116), -INT8_C( 54), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 62) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t r = simde_vrev32_s8(a); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8_t r = simde_vrev32_s8(a); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrev32_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t r[4]; } test_vec[] = { { { INT16_C( 15754), -INT16_C( 21890), -INT16_C( 21019), -INT16_C( 3597) }, { -INT16_C( 21890), INT16_C( 15754), -INT16_C( 3597), -INT16_C( 21019) } }, { { INT16_C( 10757), INT16_C( 12620), -INT16_C( 4738), INT16_C( 30694) }, { INT16_C( 12620), INT16_C( 10757), INT16_C( 30694), -INT16_C( 4738) } }, { { -INT16_C( 31277), INT16_C( 31359), -INT16_C( 21795), INT16_C( 26775) }, { INT16_C( 31359), -INT16_C( 31277), INT16_C( 26775), -INT16_C( 21795) } }, { { INT16_C( 1077), -INT16_C( 19248), INT16_C( 29596), INT16_C( 10059) }, { -INT16_C( 19248), INT16_C( 1077), INT16_C( 10059), INT16_C( 29596) } }, { { -INT16_C( 13904), -INT16_C( 26927), -INT16_C( 15242), INT16_C( 31879) }, { -INT16_C( 26927), -INT16_C( 13904), INT16_C( 31879), -INT16_C( 15242) } }, { { -INT16_C( 11282), INT16_C( 27821), -INT16_C( 27712), -INT16_C( 27677) }, { INT16_C( 27821), -INT16_C( 11282), -INT16_C( 27677), -INT16_C( 27712) } }, { { INT16_C( 25112), -INT16_C( 2802), -INT16_C( 23283), INT16_C( 16989) }, { -INT16_C( 2802), INT16_C( 25112), INT16_C( 16989), -INT16_C( 23283) } }, { { INT16_C( 11689), INT16_C( 18166), INT16_C( 16801), INT16_C( 20845) }, { INT16_C( 18166), INT16_C( 11689), INT16_C( 20845), INT16_C( 16801) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t r = simde_vrev32_s16(a); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); simde_int16x4_t r = simde_vrev32_s16(a); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrev32_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C( 11), UINT8_C(159), UINT8_C(143), UINT8_C(167), UINT8_C(185), UINT8_C(247), UINT8_C( 63), UINT8_C(210) }, { UINT8_C(167), UINT8_C(143), UINT8_C(159), UINT8_C( 11), UINT8_C(210), UINT8_C( 63), UINT8_C(247), UINT8_C(185) } }, { { UINT8_C(126), UINT8_C(187), UINT8_C( 97), UINT8_C( 34), UINT8_C( 33), UINT8_C( 88), UINT8_C( 12), UINT8_C(158) }, { UINT8_C( 34), UINT8_C( 97), UINT8_C(187), UINT8_C(126), UINT8_C(158), UINT8_C( 12), UINT8_C( 88), UINT8_C( 33) } }, { { UINT8_C(120), UINT8_C(219), UINT8_C( 2), UINT8_C( 23), UINT8_C(244), UINT8_C( 23), UINT8_C( 40), UINT8_C(164) }, { UINT8_C( 23), UINT8_C( 2), UINT8_C(219), UINT8_C(120), UINT8_C(164), UINT8_C( 40), UINT8_C( 23), UINT8_C(244) } }, { { UINT8_C(213), UINT8_C( 66), UINT8_C(210), UINT8_C(230), UINT8_C( 51), UINT8_C(148), UINT8_C(224), UINT8_C( 63) }, { UINT8_C(230), UINT8_C(210), UINT8_C( 66), UINT8_C(213), UINT8_C( 63), UINT8_C(224), UINT8_C(148), UINT8_C( 51) } }, { { UINT8_C( 51), UINT8_C(111), UINT8_C(230), UINT8_C(236), UINT8_C(102), UINT8_C( 37), UINT8_C(191), UINT8_C(228) }, { UINT8_C(236), UINT8_C(230), UINT8_C(111), UINT8_C( 51), UINT8_C(228), UINT8_C(191), UINT8_C( 37), UINT8_C(102) } }, { { UINT8_C(225), UINT8_C( 32), UINT8_C( 7), UINT8_C( 2), UINT8_C(120), UINT8_C( 19), UINT8_C(160), UINT8_C(240) }, { UINT8_C( 2), UINT8_C( 7), UINT8_C( 32), UINT8_C(225), UINT8_C(240), UINT8_C(160), UINT8_C( 19), UINT8_C(120) } }, { { UINT8_C(239), UINT8_C(162), UINT8_C( 8), UINT8_C(227), UINT8_C(186), UINT8_C( 48), UINT8_C(135), UINT8_C(143) }, { UINT8_C(227), UINT8_C( 8), UINT8_C(162), UINT8_C(239), UINT8_C(143), UINT8_C(135), UINT8_C( 48), UINT8_C(186) } }, { { UINT8_C(114), UINT8_C( 90), UINT8_C(118), UINT8_C(165), UINT8_C(238), UINT8_C( 86), UINT8_C(228), UINT8_C( 33) }, { UINT8_C(165), UINT8_C(118), UINT8_C( 90), UINT8_C(114), UINT8_C( 33), UINT8_C(228), UINT8_C( 86), UINT8_C(238) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t r = simde_vrev32_u8(a); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t r = simde_vrev32_u8(a); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrev32_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(28484), UINT16_C(36729), UINT16_C(20702), UINT16_C(17090) }, { UINT16_C(36729), UINT16_C(28484), UINT16_C(17090), UINT16_C(20702) } }, { { UINT16_C( 5558), UINT16_C(16791), UINT16_C( 863), UINT16_C(34542) }, { UINT16_C(16791), UINT16_C( 5558), UINT16_C(34542), UINT16_C( 863) } }, { { UINT16_C(65329), UINT16_C(53465), UINT16_C(26394), UINT16_C(35387) }, { UINT16_C(53465), UINT16_C(65329), UINT16_C(35387), UINT16_C(26394) } }, { { UINT16_C(32028), UINT16_C( 8342), UINT16_C(52300), UINT16_C(36878) }, { UINT16_C( 8342), UINT16_C(32028), UINT16_C(36878), UINT16_C(52300) } }, { { UINT16_C(34619), UINT16_C( 6688), UINT16_C(58071), UINT16_C(36188) }, { UINT16_C( 6688), UINT16_C(34619), UINT16_C(36188), UINT16_C(58071) } }, { { UINT16_C(62455), UINT16_C(22222), UINT16_C(48631), UINT16_C(10460) }, { UINT16_C(22222), UINT16_C(62455), UINT16_C(10460), UINT16_C(48631) } }, { { UINT16_C(46524), UINT16_C(55288), UINT16_C(13085), UINT16_C(14689) }, { UINT16_C(55288), UINT16_C(46524), UINT16_C(14689), UINT16_C(13085) } }, { { UINT16_C(63408), UINT16_C(64601), UINT16_C(26564), UINT16_C(65420) }, { UINT16_C(64601), UINT16_C(63408), UINT16_C(65420), UINT16_C(26564) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t r = simde_vrev32_u16(a); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t r = simde_vrev32_u16(a); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrev32q_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t r[16]; } test_vec[] = { { { INT8_C( 45), -INT8_C( 48), INT8_C( 72), -INT8_C( 116), -INT8_C( 46), INT8_C( 32), -INT8_C( 105), INT8_C( 41), INT8_C( 120), -INT8_C( 96), -INT8_C( 35), INT8_C( 8), -INT8_C( 28), -INT8_C( 14), INT8_C( 47), -INT8_C( 125) }, { -INT8_C( 116), INT8_C( 72), -INT8_C( 48), INT8_C( 45), INT8_C( 41), -INT8_C( 105), INT8_C( 32), -INT8_C( 46), INT8_C( 8), -INT8_C( 35), -INT8_C( 96), INT8_C( 120), -INT8_C( 125), INT8_C( 47), -INT8_C( 14), -INT8_C( 28) } }, { { -INT8_C( 48), INT8_C( 25), INT8_C( 111), -INT8_C( 3), INT8_C( 85), -INT8_C( 34), INT8_C( 77), -INT8_C( 4), -INT8_C( 3), INT8_C( 17), -INT8_C( 104), INT8_C( 73), INT8_C( 99), INT8_C( 29), -INT8_C( 126), -INT8_C( 112) }, { -INT8_C( 3), INT8_C( 111), INT8_C( 25), -INT8_C( 48), -INT8_C( 4), INT8_C( 77), -INT8_C( 34), INT8_C( 85), INT8_C( 73), -INT8_C( 104), INT8_C( 17), -INT8_C( 3), -INT8_C( 112), -INT8_C( 126), INT8_C( 29), INT8_C( 99) } }, { { -INT8_C( 19), -INT8_C( 54), INT8_C( 28), -INT8_C( 65), -INT8_C( 21), -INT8_C( 77), -INT8_C( 24), INT8_C( 99), INT8_C( 83), -INT8_C( 58), INT8_C( 107), INT8_C( 55), -INT8_C( 72), -INT8_C( 102), -INT8_C( 70), -INT8_C( 120) }, { -INT8_C( 65), INT8_C( 28), -INT8_C( 54), -INT8_C( 19), INT8_C( 99), -INT8_C( 24), -INT8_C( 77), -INT8_C( 21), INT8_C( 55), INT8_C( 107), -INT8_C( 58), INT8_C( 83), -INT8_C( 120), -INT8_C( 70), -INT8_C( 102), -INT8_C( 72) } }, { { -INT8_C( 76), INT8_C( 41), -INT8_C( 123), INT8_C( 9), INT8_C( 8), -INT8_C( 45), INT8_C( 5), INT8_C( 5), -INT8_C( 28), -INT8_C( 99), INT8_C( 78), INT8_C( 71), -INT8_C( 70), -INT8_C( 48), -INT8_C( 41), -INT8_C( 88) }, { INT8_C( 9), -INT8_C( 123), INT8_C( 41), -INT8_C( 76), INT8_C( 5), INT8_C( 5), -INT8_C( 45), INT8_C( 8), INT8_C( 71), INT8_C( 78), -INT8_C( 99), -INT8_C( 28), -INT8_C( 88), -INT8_C( 41), -INT8_C( 48), -INT8_C( 70) } }, { { -INT8_C( 102), -INT8_C( 13), INT8_C( 103), -INT8_C( 123), -INT8_C( 90), INT8_C( 80), -INT8_C( 24), -INT8_C( 7), INT8_C( 22), INT8_C( 84), INT8_C( 48), -INT8_C( 50), -INT8_C( 18), -INT8_C( 21), INT8_C( 86), -INT8_C( 94) }, { -INT8_C( 123), INT8_C( 103), -INT8_C( 13), -INT8_C( 102), -INT8_C( 7), -INT8_C( 24), INT8_C( 80), -INT8_C( 90), -INT8_C( 50), INT8_C( 48), INT8_C( 84), INT8_C( 22), -INT8_C( 94), INT8_C( 86), -INT8_C( 21), -INT8_C( 18) } }, { { INT8_C( 20), -INT8_C( 37), -INT8_C( 85), INT8_C( 28), -INT8_C( 82), -INT8_C( 79), INT8_C( 33), -INT8_C( 110), INT8_C( 78), INT8_C( 111), -INT8_C( 39), INT8_C( 9), INT8_C( 63), -INT8_C( 80), -INT8_C( 79), -INT8_C( 38) }, { INT8_C( 28), -INT8_C( 85), -INT8_C( 37), INT8_C( 20), -INT8_C( 110), INT8_C( 33), -INT8_C( 79), -INT8_C( 82), INT8_C( 9), -INT8_C( 39), INT8_C( 111), INT8_C( 78), -INT8_C( 38), -INT8_C( 79), -INT8_C( 80), INT8_C( 63) } }, { { -INT8_C( 93), INT8_C( 24), INT8_C( 95), INT8_C( 73), INT8_C( 104), INT8_C( 72), INT8_C( 66), INT8_C( 126), -INT8_C( 100), INT8_C( 115), INT8_C( 76), -INT8_C( 118), INT8_C( 94), -INT8_C( 94), INT8_C( 45), INT8_C( 114) }, { INT8_C( 73), INT8_C( 95), INT8_C( 24), -INT8_C( 93), INT8_C( 126), INT8_C( 66), INT8_C( 72), INT8_C( 104), -INT8_C( 118), INT8_C( 76), INT8_C( 115), -INT8_C( 100), INT8_C( 114), INT8_C( 45), -INT8_C( 94), INT8_C( 94) } }, { { INT8_C( 126), -INT8_C( 40), -INT8_C( 113), INT8_C( 44), -INT8_C( 119), -INT8_C( 80), -INT8_C( 65), -INT8_C( 40), INT8_C( 32), -INT8_C( 104), -INT8_C( 31), INT8_C( 95), INT8_C( 73), -INT8_C( 110), INT8_C( 57), -INT8_C( 20) }, { INT8_C( 44), -INT8_C( 113), -INT8_C( 40), INT8_C( 126), -INT8_C( 40), -INT8_C( 65), -INT8_C( 80), -INT8_C( 119), INT8_C( 95), -INT8_C( 31), -INT8_C( 104), INT8_C( 32), -INT8_C( 20), INT8_C( 57), -INT8_C( 110), INT8_C( 73) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t r = simde_vrev32q_s8(a); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); simde_int8x16_t r = simde_vrev32q_s8(a); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrev32q_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t r[8]; } test_vec[] = { { { INT16_C( 31985), -INT16_C( 12190), -INT16_C( 3912), -INT16_C( 17108), -INT16_C( 1350), -INT16_C( 2067), -INT16_C( 12798), -INT16_C( 26494) }, { -INT16_C( 12190), INT16_C( 31985), -INT16_C( 17108), -INT16_C( 3912), -INT16_C( 2067), -INT16_C( 1350), -INT16_C( 26494), -INT16_C( 12798) } }, { { INT16_C( 18268), INT16_C( 25032), -INT16_C( 22072), -INT16_C( 3994), -INT16_C( 10072), INT16_C( 17409), -INT16_C( 18539), -INT16_C( 31089) }, { INT16_C( 25032), INT16_C( 18268), -INT16_C( 3994), -INT16_C( 22072), INT16_C( 17409), -INT16_C( 10072), -INT16_C( 31089), -INT16_C( 18539) } }, { { -INT16_C( 3789), -INT16_C( 5289), -INT16_C( 31774), -INT16_C( 25431), -INT16_C( 27011), -INT16_C( 32621), INT16_C( 5477), -INT16_C( 16104) }, { -INT16_C( 5289), -INT16_C( 3789), -INT16_C( 25431), -INT16_C( 31774), -INT16_C( 32621), -INT16_C( 27011), -INT16_C( 16104), INT16_C( 5477) } }, { { -INT16_C( 8100), INT16_C( 9506), -INT16_C( 30582), INT16_C( 12821), INT16_C( 5728), -INT16_C( 2442), INT16_C( 1486), INT16_C( 380) }, { INT16_C( 9506), -INT16_C( 8100), INT16_C( 12821), -INT16_C( 30582), -INT16_C( 2442), INT16_C( 5728), INT16_C( 380), INT16_C( 1486) } }, { { -INT16_C( 11274), -INT16_C( 10003), -INT16_C( 27049), -INT16_C( 11148), INT16_C( 2092), -INT16_C( 28332), INT16_C( 27933), INT16_C( 31314) }, { -INT16_C( 10003), -INT16_C( 11274), -INT16_C( 11148), -INT16_C( 27049), -INT16_C( 28332), INT16_C( 2092), INT16_C( 31314), INT16_C( 27933) } }, { { INT16_C( 29773), -INT16_C( 10337), -INT16_C( 19203), INT16_C( 23817), INT16_C( 32714), -INT16_C( 26541), -INT16_C( 12156), INT16_C( 31642) }, { -INT16_C( 10337), INT16_C( 29773), INT16_C( 23817), -INT16_C( 19203), -INT16_C( 26541), INT16_C( 32714), INT16_C( 31642), -INT16_C( 12156) } }, { { -INT16_C( 30813), -INT16_C( 1453), -INT16_C( 14307), INT16_C( 18895), INT16_C( 9168), -INT16_C( 4645), INT16_C( 11664), -INT16_C( 8601) }, { -INT16_C( 1453), -INT16_C( 30813), INT16_C( 18895), -INT16_C( 14307), -INT16_C( 4645), INT16_C( 9168), -INT16_C( 8601), INT16_C( 11664) } }, { { INT16_C( 1698), -INT16_C( 24651), -INT16_C( 16454), -INT16_C( 31236), INT16_C( 20542), -INT16_C( 15587), -INT16_C( 18656), -INT16_C( 15554) }, { -INT16_C( 24651), INT16_C( 1698), -INT16_C( 31236), -INT16_C( 16454), -INT16_C( 15587), INT16_C( 20542), -INT16_C( 15554), -INT16_C( 18656) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t r = simde_vrev32q_s16(a); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int16x8_t r = simde_vrev32q_s16(a); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrev32q_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(161), UINT8_C( 83), UINT8_C(223), UINT8_C(188), UINT8_C( 0), UINT8_C( 2), UINT8_C(160), UINT8_C( 92), UINT8_C(226), UINT8_C(198), UINT8_C( 48), UINT8_C(190), UINT8_C( 11), UINT8_C( 36), UINT8_C(224), UINT8_C(212) }, { UINT8_C(188), UINT8_C(223), UINT8_C( 83), UINT8_C(161), UINT8_C( 92), UINT8_C(160), UINT8_C( 2), UINT8_C( 0), UINT8_C(190), UINT8_C( 48), UINT8_C(198), UINT8_C(226), UINT8_C(212), UINT8_C(224), UINT8_C( 36), UINT8_C( 11) } }, { { UINT8_C(151), UINT8_C(142), UINT8_C(141), UINT8_C(201), UINT8_C(166), UINT8_C(146), UINT8_C( 50), UINT8_C(151), UINT8_C(121), UINT8_C(173), UINT8_C( 52), UINT8_C(209), UINT8_C(112), UINT8_C( 40), UINT8_C(201), UINT8_C( 17) }, { UINT8_C(201), UINT8_C(141), UINT8_C(142), UINT8_C(151), UINT8_C(151), UINT8_C( 50), UINT8_C(146), UINT8_C(166), UINT8_C(209), UINT8_C( 52), UINT8_C(173), UINT8_C(121), UINT8_C( 17), UINT8_C(201), UINT8_C( 40), UINT8_C(112) } }, { { UINT8_C(123), UINT8_C(169), UINT8_C(205), UINT8_C(124), UINT8_C(171), UINT8_C(109), UINT8_C(216), UINT8_C(141), UINT8_C( 51), UINT8_C( 9), UINT8_C( 76), UINT8_C( 62), UINT8_C( 45), UINT8_C( 44), UINT8_C( 19), UINT8_C(196) }, { UINT8_C(124), UINT8_C(205), UINT8_C(169), UINT8_C(123), UINT8_C(141), UINT8_C(216), UINT8_C(109), UINT8_C(171), UINT8_C( 62), UINT8_C( 76), UINT8_C( 9), UINT8_C( 51), UINT8_C(196), UINT8_C( 19), UINT8_C( 44), UINT8_C( 45) } }, { { UINT8_C(186), UINT8_C(160), UINT8_C(141), UINT8_C( 97), UINT8_C( 50), UINT8_C(191), UINT8_C(248), UINT8_C(171), UINT8_C(108), UINT8_C( 44), UINT8_C(124), UINT8_C(220), UINT8_C( 85), UINT8_C( 69), UINT8_C(238), UINT8_C(208) }, { UINT8_C( 97), UINT8_C(141), UINT8_C(160), UINT8_C(186), UINT8_C(171), UINT8_C(248), UINT8_C(191), UINT8_C( 50), UINT8_C(220), UINT8_C(124), UINT8_C( 44), UINT8_C(108), UINT8_C(208), UINT8_C(238), UINT8_C( 69), UINT8_C( 85) } }, { { UINT8_C(238), UINT8_C(187), UINT8_C( 76), UINT8_C(153), UINT8_C( 41), UINT8_C( 37), UINT8_C( 39), UINT8_C( 92), UINT8_C( 46), UINT8_C(115), UINT8_C(155), UINT8_C( 91), UINT8_C(159), UINT8_C(174), UINT8_C( 31), UINT8_C( 89) }, { UINT8_C(153), UINT8_C( 76), UINT8_C(187), UINT8_C(238), UINT8_C( 92), UINT8_C( 39), UINT8_C( 37), UINT8_C( 41), UINT8_C( 91), UINT8_C(155), UINT8_C(115), UINT8_C( 46), UINT8_C( 89), UINT8_C( 31), UINT8_C(174), UINT8_C(159) } }, { { UINT8_C( 78), UINT8_C(172), UINT8_C(186), UINT8_C(128), UINT8_C(108), UINT8_C(179), UINT8_C( 43), UINT8_C(216), UINT8_C(223), UINT8_C(167), UINT8_C(181), UINT8_C( 52), UINT8_C(236), UINT8_C(163), UINT8_C( 5), UINT8_C(219) }, { UINT8_C(128), UINT8_C(186), UINT8_C(172), UINT8_C( 78), UINT8_C(216), UINT8_C( 43), UINT8_C(179), UINT8_C(108), UINT8_C( 52), UINT8_C(181), UINT8_C(167), UINT8_C(223), UINT8_C(219), UINT8_C( 5), UINT8_C(163), UINT8_C(236) } }, { { UINT8_C( 94), UINT8_C( 81), UINT8_C(116), UINT8_C(135), UINT8_C(118), UINT8_C(155), UINT8_C(228), UINT8_C(164), UINT8_C( 14), UINT8_C(127), UINT8_MAX, UINT8_C(173), UINT8_C( 45), UINT8_C( 30), UINT8_C( 7), UINT8_C(123) }, { UINT8_C(135), UINT8_C(116), UINT8_C( 81), UINT8_C( 94), UINT8_C(164), UINT8_C(228), UINT8_C(155), UINT8_C(118), UINT8_C(173), UINT8_MAX, UINT8_C(127), UINT8_C( 14), UINT8_C(123), UINT8_C( 7), UINT8_C( 30), UINT8_C( 45) } }, { { UINT8_C(203), UINT8_C(193), UINT8_C(251), UINT8_C( 55), UINT8_C(116), UINT8_C( 38), UINT8_C( 15), UINT8_C( 84), UINT8_C(205), UINT8_C(196), UINT8_C(136), UINT8_C(185), UINT8_C(103), UINT8_C(141), UINT8_C(148), UINT8_C(198) }, { UINT8_C( 55), UINT8_C(251), UINT8_C(193), UINT8_C(203), UINT8_C( 84), UINT8_C( 15), UINT8_C( 38), UINT8_C(116), UINT8_C(185), UINT8_C(136), UINT8_C(196), UINT8_C(205), UINT8_C(198), UINT8_C(148), UINT8_C(141), UINT8_C(103) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t r = simde_vrev32q_u8(a); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t r = simde_vrev32q_u8(a); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrev32q_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(42975), UINT16_C(62484), UINT16_C(10015), UINT16_C(42627), UINT16_C(64639), UINT16_C(55475), UINT16_C(16695), UINT16_C(29054) }, { UINT16_C(62484), UINT16_C(42975), UINT16_C(42627), UINT16_C(10015), UINT16_C(55475), UINT16_C(64639), UINT16_C(29054), UINT16_C(16695) } }, { { UINT16_C(44679), UINT16_C(21448), UINT16_C(49234), UINT16_C(30812), UINT16_C( 245), UINT16_C(47171), UINT16_C(12985), UINT16_C(39011) }, { UINT16_C(21448), UINT16_C(44679), UINT16_C(30812), UINT16_C(49234), UINT16_C(47171), UINT16_C( 245), UINT16_C(39011), UINT16_C(12985) } }, { { UINT16_C(30938), UINT16_C(63884), UINT16_C( 4255), UINT16_C( 7839), UINT16_C(21004), UINT16_C(17398), UINT16_C(30099), UINT16_C( 7092) }, { UINT16_C(63884), UINT16_C(30938), UINT16_C( 7839), UINT16_C( 4255), UINT16_C(17398), UINT16_C(21004), UINT16_C( 7092), UINT16_C(30099) } }, { { UINT16_C(32035), UINT16_C(30062), UINT16_C(52029), UINT16_C(13293), UINT16_C(12747), UINT16_C(34027), UINT16_C(20067), UINT16_C(15644) }, { UINT16_C(30062), UINT16_C(32035), UINT16_C(13293), UINT16_C(52029), UINT16_C(34027), UINT16_C(12747), UINT16_C(15644), UINT16_C(20067) } }, { { UINT16_C(43206), UINT16_C(26166), UINT16_C(54712), UINT16_C(50308), UINT16_C(31527), UINT16_C(47880), UINT16_C(48368), UINT16_C( 5078) }, { UINT16_C(26166), UINT16_C(43206), UINT16_C(50308), UINT16_C(54712), UINT16_C(47880), UINT16_C(31527), UINT16_C( 5078), UINT16_C(48368) } }, { { UINT16_C(17465), UINT16_C(30600), UINT16_C(30223), UINT16_C(55978), UINT16_C(38311), UINT16_C( 2654), UINT16_C(31459), UINT16_C(43592) }, { UINT16_C(30600), UINT16_C(17465), UINT16_C(55978), UINT16_C(30223), UINT16_C( 2654), UINT16_C(38311), UINT16_C(43592), UINT16_C(31459) } }, { { UINT16_C(32291), UINT16_C(56080), UINT16_C(37972), UINT16_C(31648), UINT16_C(43023), UINT16_C(65334), UINT16_C( 3172), UINT16_C(40466) }, { UINT16_C(56080), UINT16_C(32291), UINT16_C(31648), UINT16_C(37972), UINT16_C(65334), UINT16_C(43023), UINT16_C(40466), UINT16_C( 3172) } }, { { UINT16_C(39761), UINT16_C(24597), UINT16_C(48913), UINT16_C(47163), UINT16_C(39252), UINT16_C(14274), UINT16_C( 2580), UINT16_C(14305) }, { UINT16_C(24597), UINT16_C(39761), UINT16_C(47163), UINT16_C(48913), UINT16_C(14274), UINT16_C(39252), UINT16_C(14305), UINT16_C( 2580) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t r = simde_vrev32q_u16(a); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t r = simde_vrev32q_u16(a); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vrev32_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vrev32_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vrev32_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vrev32_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vrev32q_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vrev32q_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vrev32q_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vrev32q_u16) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/rev64.c000066400000000000000000001147321400333146700164410ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN rev64 #include "test-neon.h" /* Check that both of these work */ #if defined(__cplusplus) #include "../../../simde/arm/neon/rev64.h" #else #include "../../../simde/arm/neon.h" #endif /* N.B. CM: vrev64_f16 and vrev64q_f16 are omitted as * SIMDe has no 16-bit floating point support. */ static int test_simde_vrev64_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t r[8]; } test_vec[] = { { { INT8_C( 46), -INT8_C( 72), INT8_C( 77), -INT8_C( 5), -INT8_C( 73), -INT8_C( 68), INT8_C( 74), -INT8_C( 6) }, { -INT8_C( 6), INT8_C( 74), -INT8_C( 68), -INT8_C( 73), -INT8_C( 5), INT8_C( 77), -INT8_C( 72), INT8_C( 46) } }, { { INT8_C( 2), INT8_C( 96), -INT8_C( 44), -INT8_C( 111), -INT8_C( 115), -INT8_C( 69), INT8_C( 42), INT8_C( 51) }, { INT8_C( 51), INT8_C( 42), -INT8_C( 69), -INT8_C( 115), -INT8_C( 111), -INT8_C( 44), INT8_C( 96), INT8_C( 2) } }, { { -INT8_C( 53), -INT8_C( 119), INT8_C( 78), INT8_C( 93), -INT8_C( 2), INT8_C( 73), -INT8_C( 56), -INT8_C( 100) }, { -INT8_C( 100), -INT8_C( 56), INT8_C( 73), -INT8_C( 2), INT8_C( 93), INT8_C( 78), -INT8_C( 119), -INT8_C( 53) } }, { { -INT8_C( 71), -INT8_C( 26), INT8_C( 64), -INT8_C( 41), INT8_C( 59), -INT8_C( 30), INT8_C( 14), INT8_C( 106) }, { INT8_C( 106), INT8_C( 14), -INT8_C( 30), INT8_C( 59), -INT8_C( 41), INT8_C( 64), -INT8_C( 26), -INT8_C( 71) } }, { { -INT8_C( 102), INT8_C( 91), INT8_C( 101), INT8_C( 81), INT8_C( 23), -INT8_C( 81), INT8_C( 76), INT8_C( 25) }, { INT8_C( 25), INT8_C( 76), -INT8_C( 81), INT8_C( 23), INT8_C( 81), INT8_C( 101), INT8_C( 91), -INT8_C( 102) } }, { { INT8_C( 15), INT8_C( 32), -INT8_C( 86), -INT8_C( 100), -INT8_C( 37), -INT8_C( 43), -INT8_C( 49), -INT8_C( 90) }, { -INT8_C( 90), -INT8_C( 49), -INT8_C( 43), -INT8_C( 37), -INT8_C( 100), -INT8_C( 86), INT8_C( 32), INT8_C( 15) } }, { { INT8_C( 94), INT8_C( 30), INT8_C( 3), INT8_C( 92), INT8_C( 103), -INT8_C( 53), -INT8_C( 8), INT8_C( 32) }, { INT8_C( 32), -INT8_C( 8), -INT8_C( 53), INT8_C( 103), INT8_C( 92), INT8_C( 3), INT8_C( 30), INT8_C( 94) } }, { { -INT8_C( 79), INT8_C( 56), -INT8_C( 8), -INT8_C( 19), INT8_C( 26), INT8_C( 6), INT8_C( 87), -INT8_C( 76) }, { -INT8_C( 76), INT8_C( 87), INT8_C( 6), INT8_C( 26), -INT8_C( 19), -INT8_C( 8), INT8_C( 56), -INT8_C( 79) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t r = simde_vrev64_s8(a); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8_t r = simde_vrev64_s8(a); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrev64_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t r[4]; } test_vec[] = { { { INT16_C( 11185), -INT16_C( 7304), -INT16_C( 8734), -INT16_C( 11770) }, { -INT16_C( 11770), -INT16_C( 8734), -INT16_C( 7304), INT16_C( 11185) } }, { { -INT16_C( 21481), -INT16_C( 25761), -INT16_C( 1527), -INT16_C( 18865) }, { -INT16_C( 18865), -INT16_C( 1527), -INT16_C( 25761), -INT16_C( 21481) } }, { { INT16_C( 888), -INT16_C( 6773), -INT16_C( 5383), -INT16_C( 7968) }, { -INT16_C( 7968), -INT16_C( 5383), -INT16_C( 6773), INT16_C( 888) } }, { { INT16_C( 2067), -INT16_C( 23157), INT16_C( 20660), INT16_C( 26010) }, { INT16_C( 26010), INT16_C( 20660), -INT16_C( 23157), INT16_C( 2067) } }, { { INT16_C( 4731), INT16_C( 24136), INT16_C( 20207), INT16_C( 1584) }, { INT16_C( 1584), INT16_C( 20207), INT16_C( 24136), INT16_C( 4731) } }, { { -INT16_C( 28678), INT16_C( 1185), -INT16_C( 3959), INT16_C( 442) }, { INT16_C( 442), -INT16_C( 3959), INT16_C( 1185), -INT16_C( 28678) } }, { { INT16_C( 18163), -INT16_C( 4890), -INT16_C( 14800), INT16_C( 17356) }, { INT16_C( 17356), -INT16_C( 14800), -INT16_C( 4890), INT16_C( 18163) } }, { { INT16_C( 22478), -INT16_C( 32023), -INT16_C( 31832), INT16_C( 9191) }, { INT16_C( 9191), -INT16_C( 31832), -INT16_C( 32023), INT16_C( 22478) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t r = simde_vrev64_s16(a); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); simde_int16x4_t r = simde_vrev64_s16(a); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrev64_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t r[2]; } test_vec[] = { { { -INT32_C( 1284480829), INT32_C( 1659778769) }, { INT32_C( 1659778769), -INT32_C( 1284480829) } }, { { -INT32_C( 1499471149), -INT32_C( 866681537) }, { -INT32_C( 866681537), -INT32_C( 1499471149) } }, { { -INT32_C( 1697484102), INT32_C( 526077331) }, { INT32_C( 526077331), -INT32_C( 1697484102) } }, { { INT32_C( 184553202), INT32_C( 1331616140) }, { INT32_C( 1331616140), INT32_C( 184553202) } }, { { INT32_C( 184798777), -INT32_C( 663883516) }, { -INT32_C( 663883516), INT32_C( 184798777) } }, { { INT32_C( 310250707), INT32_C( 1138742665) }, { INT32_C( 1138742665), INT32_C( 310250707) } }, { { -INT32_C( 623005369), -INT32_C( 235324929) }, { -INT32_C( 235324929), -INT32_C( 623005369) } }, { { -INT32_C( 738395833), INT32_C( 203643602) }, { INT32_C( 203643602), -INT32_C( 738395833) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t r = simde_vrev64_s32(a); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_int32x2_t r = simde_vrev64_s32(a); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrev64_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C(179), UINT8_C(177), UINT8_C(132), UINT8_C(204), UINT8_C( 13), UINT8_C( 33), UINT8_C(173), UINT8_C(243) }, { UINT8_C(243), UINT8_C(173), UINT8_C( 33), UINT8_C( 13), UINT8_C(204), UINT8_C(132), UINT8_C(177), UINT8_C(179) } }, { { UINT8_C(192), UINT8_C(222), UINT8_C( 23), UINT8_C( 78), UINT8_C( 87), UINT8_C( 89), UINT8_C(198), UINT8_C(132) }, { UINT8_C(132), UINT8_C(198), UINT8_C( 89), UINT8_C( 87), UINT8_C( 78), UINT8_C( 23), UINT8_C(222), UINT8_C(192) } }, { { UINT8_C(236), UINT8_C(188), UINT8_C( 92), UINT8_C(104), UINT8_C(146), UINT8_C(132), UINT8_C(149), UINT8_C( 72) }, { UINT8_C( 72), UINT8_C(149), UINT8_C(132), UINT8_C(146), UINT8_C(104), UINT8_C( 92), UINT8_C(188), UINT8_C(236) } }, { { UINT8_C( 23), UINT8_C( 76), UINT8_C(240), UINT8_C( 20), UINT8_C(135), UINT8_C(245), UINT8_C(178), UINT8_C( 58) }, { UINT8_C( 58), UINT8_C(178), UINT8_C(245), UINT8_C(135), UINT8_C( 20), UINT8_C(240), UINT8_C( 76), UINT8_C( 23) } }, { { UINT8_C(166), UINT8_C( 55), UINT8_C( 6), UINT8_C(179), UINT8_C( 88), UINT8_C(180), UINT8_C(166), UINT8_C( 24) }, { UINT8_C( 24), UINT8_C(166), UINT8_C(180), UINT8_C( 88), UINT8_C(179), UINT8_C( 6), UINT8_C( 55), UINT8_C(166) } }, { { UINT8_C(146), UINT8_C(189), UINT8_C(102), UINT8_C(233), UINT8_C( 23), UINT8_C( 45), UINT8_C(109), UINT8_C( 3) }, { UINT8_C( 3), UINT8_C(109), UINT8_C( 45), UINT8_C( 23), UINT8_C(233), UINT8_C(102), UINT8_C(189), UINT8_C(146) } }, { { UINT8_C(233), UINT8_C(202), UINT8_C(107), UINT8_C(124), UINT8_C( 78), UINT8_C( 1), UINT8_C(196), UINT8_C(101) }, { UINT8_C(101), UINT8_C(196), UINT8_C( 1), UINT8_C( 78), UINT8_C(124), UINT8_C(107), UINT8_C(202), UINT8_C(233) } }, { { UINT8_C( 77), UINT8_C(180), UINT8_C(121), UINT8_C(212), UINT8_C(170), UINT8_C( 44), UINT8_C( 15), UINT8_C( 80) }, { UINT8_C( 80), UINT8_C( 15), UINT8_C( 44), UINT8_C(170), UINT8_C(212), UINT8_C(121), UINT8_C(180), UINT8_C( 77) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t r = simde_vrev64_u8(a); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t r = simde_vrev64_u8(a); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrev64_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(59270), UINT16_C(33972), UINT16_C(12613), UINT16_C( 118) }, { UINT16_C( 118), UINT16_C(12613), UINT16_C(33972), UINT16_C(59270) } }, { { UINT16_C(52123), UINT16_C( 9798), UINT16_C(57227), UINT16_C(52269) }, { UINT16_C(52269), UINT16_C(57227), UINT16_C( 9798), UINT16_C(52123) } }, { { UINT16_C(32029), UINT16_C( 8590), UINT16_C(40992), UINT16_C(60985) }, { UINT16_C(60985), UINT16_C(40992), UINT16_C( 8590), UINT16_C(32029) } }, { { UINT16_C(60666), UINT16_C(13202), UINT16_C(21293), UINT16_C(46056) }, { UINT16_C(46056), UINT16_C(21293), UINT16_C(13202), UINT16_C(60666) } }, { { UINT16_C(39995), UINT16_C(32824), UINT16_C(44750), UINT16_C(27008) }, { UINT16_C(27008), UINT16_C(44750), UINT16_C(32824), UINT16_C(39995) } }, { { UINT16_C(51065), UINT16_C( 1167), UINT16_C(48294), UINT16_C(50129) }, { UINT16_C(50129), UINT16_C(48294), UINT16_C( 1167), UINT16_C(51065) } }, { { UINT16_C(24377), UINT16_C(23012), UINT16_C( 7679), UINT16_C(63815) }, { UINT16_C(63815), UINT16_C( 7679), UINT16_C(23012), UINT16_C(24377) } }, { { UINT16_C(55562), UINT16_C(14124), UINT16_C( 5165), UINT16_C(26858) }, { UINT16_C(26858), UINT16_C( 5165), UINT16_C(14124), UINT16_C(55562) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t r = simde_vrev64_u16(a); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t r = simde_vrev64_u16(a); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrev64_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C(2730879067), UINT32_C(2920167489) }, { UINT32_C(2920167489), UINT32_C(2730879067) } }, { { UINT32_C(1333580779), UINT32_C(3300500620) }, { UINT32_C(3300500620), UINT32_C(1333580779) } }, { { UINT32_C(1377871977), UINT32_C( 894461476) }, { UINT32_C( 894461476), UINT32_C(1377871977) } }, { { UINT32_C( 948007493), UINT32_C( 539389893) }, { UINT32_C( 539389893), UINT32_C( 948007493) } }, { { UINT32_C(2579688280), UINT32_C( 239587619) }, { UINT32_C( 239587619), UINT32_C(2579688280) } }, { { UINT32_C( 811516836), UINT32_C(3522434920) }, { UINT32_C(3522434920), UINT32_C( 811516836) } }, { { UINT32_C(3827504319), UINT32_C(3222893434) }, { UINT32_C(3222893434), UINT32_C(3827504319) } }, { { UINT32_C(2868419301), UINT32_C(1657413130) }, { UINT32_C(1657413130), UINT32_C(2868419301) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t r = simde_vrev64_u32(a); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t r = simde_vrev64_u32(a); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrev64_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; simde_float32 r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( 197.38), SIMDE_FLOAT32_C( -984.96) }, { SIMDE_FLOAT32_C( -984.96), SIMDE_FLOAT32_C( 197.38) } }, { { SIMDE_FLOAT32_C( 987.38), SIMDE_FLOAT32_C( 664.23) }, { SIMDE_FLOAT32_C( 664.23), SIMDE_FLOAT32_C( 987.38) } }, { { SIMDE_FLOAT32_C( 830.72), SIMDE_FLOAT32_C( -164.98) }, { SIMDE_FLOAT32_C( -164.98), SIMDE_FLOAT32_C( 830.72) } }, { { SIMDE_FLOAT32_C( 120.71), SIMDE_FLOAT32_C( -223.00) }, { SIMDE_FLOAT32_C( -223.00), SIMDE_FLOAT32_C( 120.71) } }, { { SIMDE_FLOAT32_C( 907.59), SIMDE_FLOAT32_C( 9.60) }, { SIMDE_FLOAT32_C( 9.60), SIMDE_FLOAT32_C( 907.59) } }, { { SIMDE_FLOAT32_C( 135.79), SIMDE_FLOAT32_C( 135.93) }, { SIMDE_FLOAT32_C( 135.93), SIMDE_FLOAT32_C( 135.79) } }, { { SIMDE_FLOAT32_C( -918.57), SIMDE_FLOAT32_C( -483.52) }, { SIMDE_FLOAT32_C( -483.52), SIMDE_FLOAT32_C( -918.57) } }, { { SIMDE_FLOAT32_C( -308.42), SIMDE_FLOAT32_C( 475.73) }, { SIMDE_FLOAT32_C( 475.73), SIMDE_FLOAT32_C( -308.42) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t r = simde_vrev64_f32(a); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vrev64_f32(a); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrev64q_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t r[16]; } test_vec[] = { { { -INT8_C( 49), -INT8_C( 126), -INT8_C( 30), INT8_C( 122), -INT8_C( 110), -INT8_C( 111), -INT8_C( 19), -INT8_C( 6), INT8_C( 95), -INT8_C( 73), INT8_C( 6), INT8_C( 16), -INT8_C( 77), -INT8_C( 18), INT8_C( 99), INT8_C( 24) }, { -INT8_C( 6), -INT8_C( 19), -INT8_C( 111), -INT8_C( 110), INT8_C( 122), -INT8_C( 30), -INT8_C( 126), -INT8_C( 49), INT8_C( 24), INT8_C( 99), -INT8_C( 18), -INT8_C( 77), INT8_C( 16), INT8_C( 6), -INT8_C( 73), INT8_C( 95) } }, { { -INT8_C( 97), INT8_C( 39), -INT8_C( 14), -INT8_C( 4), -INT8_C( 33), INT8_C( 56), -INT8_C( 96), INT8_C( 103), -INT8_C( 120), -INT8_C( 111), INT8_C( 13), -INT8_C( 29), -INT8_C( 62), -INT8_C( 117), INT8_C( 80), -INT8_C( 110) }, { INT8_C( 103), -INT8_C( 96), INT8_C( 56), -INT8_C( 33), -INT8_C( 4), -INT8_C( 14), INT8_C( 39), -INT8_C( 97), -INT8_C( 110), INT8_C( 80), -INT8_C( 117), -INT8_C( 62), -INT8_C( 29), INT8_C( 13), -INT8_C( 111), -INT8_C( 120) } }, { { INT8_C( 14), INT8_C( 51), INT8_C( 12), -INT8_C( 96), -INT8_C( 60), -INT8_C( 6), -INT8_C( 101), INT8_C( 35), -INT8_C( 79), -INT8_C( 95), INT8_C( 52), INT8_C( 101), -INT8_C( 112), -INT8_C( 105), INT8_C( 125), INT8_C( 47) }, { INT8_C( 35), -INT8_C( 101), -INT8_C( 6), -INT8_C( 60), -INT8_C( 96), INT8_C( 12), INT8_C( 51), INT8_C( 14), INT8_C( 47), INT8_C( 125), -INT8_C( 105), -INT8_C( 112), INT8_C( 101), INT8_C( 52), -INT8_C( 95), -INT8_C( 79) } }, { { -INT8_C( 65), INT8_C( 111), INT8_C( 43), -INT8_C( 98), -INT8_C( 89), -INT8_C( 53), INT8_C( 5), INT8_C( 47), INT8_C( 92), INT8_C( 19), INT8_C( 19), INT8_C( 31), -INT8_C( 98), INT8_C( 99), -INT8_C( 79), -INT8_C( 84) }, { INT8_C( 47), INT8_C( 5), -INT8_C( 53), -INT8_C( 89), -INT8_C( 98), INT8_C( 43), INT8_C( 111), -INT8_C( 65), -INT8_C( 84), -INT8_C( 79), INT8_C( 99), -INT8_C( 98), INT8_C( 31), INT8_C( 19), INT8_C( 19), INT8_C( 92) } }, { { -INT8_C( 106), -INT8_C( 67), INT8_C( 77), INT8_C( 90), -INT8_C( 73), -INT8_C( 24), INT8_C( 126), INT8_C( 105), -INT8_C( 119), -INT8_C( 78), -INT8_C( 50), INT8_C( 25), INT8_C( 73), INT8_C( 75), INT8_C( 73), INT8_C( 8) }, { INT8_C( 105), INT8_C( 126), -INT8_C( 24), -INT8_C( 73), INT8_C( 90), INT8_C( 77), -INT8_C( 67), -INT8_C( 106), INT8_C( 8), INT8_C( 73), INT8_C( 75), INT8_C( 73), INT8_C( 25), -INT8_C( 50), -INT8_C( 78), -INT8_C( 119) } }, { { -INT8_C( 70), INT8_C( 116), -INT8_C( 89), INT8_C( 98), INT8_C( 64), -INT8_C( 84), -INT8_C( 111), -INT8_C( 100), -INT8_C( 65), -INT8_C( 92), -INT8_C( 69), INT8_C( 94), INT8_C( 8), INT8_C( 108), INT8_C( 10), -INT8_C( 98) }, { -INT8_C( 100), -INT8_C( 111), -INT8_C( 84), INT8_C( 64), INT8_C( 98), -INT8_C( 89), INT8_C( 116), -INT8_C( 70), -INT8_C( 98), INT8_C( 10), INT8_C( 108), INT8_C( 8), INT8_C( 94), -INT8_C( 69), -INT8_C( 92), -INT8_C( 65) } }, { { INT8_C( 42), INT8_C( 87), -INT8_C( 7), -INT8_C( 31), INT8_C( 63), INT8_C( 119), INT8_C( 74), -INT8_C( 55), INT8_C( 41), INT8_C( 24), -INT8_C( 30), INT8_C( 114), INT8_C( 99), INT8_C( 43), INT8_C( 123), INT8_C( 30) }, { -INT8_C( 55), INT8_C( 74), INT8_C( 119), INT8_C( 63), -INT8_C( 31), -INT8_C( 7), INT8_C( 87), INT8_C( 42), INT8_C( 30), INT8_C( 123), INT8_C( 43), INT8_C( 99), INT8_C( 114), -INT8_C( 30), INT8_C( 24), INT8_C( 41) } }, { { -INT8_C( 96), INT8_C( 34), INT8_MIN, -INT8_C( 32), -INT8_C( 50), INT8_C( 17), INT8_C( 124), -INT8_C( 114), -INT8_C( 74), INT8_C( 56), -INT8_C( 20), -INT8_C( 66), -INT8_C( 92), -INT8_C( 10), INT8_C( 92), -INT8_C( 50) }, { -INT8_C( 114), INT8_C( 124), INT8_C( 17), -INT8_C( 50), -INT8_C( 32), INT8_MIN, INT8_C( 34), -INT8_C( 96), -INT8_C( 50), INT8_C( 92), -INT8_C( 10), -INT8_C( 92), -INT8_C( 66), -INT8_C( 20), INT8_C( 56), -INT8_C( 74) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t r = simde_vrev64q_s8(a); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); simde_int8x16_t r = simde_vrev64q_s8(a); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrev64q_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t r[8]; } test_vec[] = { { { INT16_C( 16499), INT16_C( 32260), -INT16_C( 29870), -INT16_C( 22736), -INT16_C( 710), INT16_C( 16837), -INT16_C( 2834), INT16_C( 26044) }, { -INT16_C( 22736), -INT16_C( 29870), INT16_C( 32260), INT16_C( 16499), INT16_C( 26044), -INT16_C( 2834), INT16_C( 16837), -INT16_C( 710) } }, { { -INT16_C( 174), -INT16_C( 25727), -INT16_C( 30182), -INT16_C( 26510), -INT16_C( 1989), -INT16_C( 2810), INT16_C( 5206), -INT16_C( 13657) }, { -INT16_C( 26510), -INT16_C( 30182), -INT16_C( 25727), -INT16_C( 174), -INT16_C( 13657), INT16_C( 5206), -INT16_C( 2810), -INT16_C( 1989) } }, { { -INT16_C( 21676), -INT16_C( 22712), INT16_C( 30775), INT16_C( 29006), INT16_C( 5237), INT16_C( 25779), INT16_C( 28424), INT16_C( 23241) }, { INT16_C( 29006), INT16_C( 30775), -INT16_C( 22712), -INT16_C( 21676), INT16_C( 23241), INT16_C( 28424), INT16_C( 25779), INT16_C( 5237) } }, { { INT16_C( 19311), -INT16_C( 30219), INT16_C( 26581), INT16_C( 4385), INT16_C( 10336), -INT16_C( 18938), -INT16_C( 21188), -INT16_C( 28288) }, { INT16_C( 4385), INT16_C( 26581), -INT16_C( 30219), INT16_C( 19311), -INT16_C( 28288), -INT16_C( 21188), -INT16_C( 18938), INT16_C( 10336) } }, { { -INT16_C( 14247), -INT16_C( 28616), -INT16_C( 31168), -INT16_C( 18943), -INT16_C( 19302), -INT16_C( 23782), -INT16_C( 7388), -INT16_C( 27651) }, { -INT16_C( 18943), -INT16_C( 31168), -INT16_C( 28616), -INT16_C( 14247), -INT16_C( 27651), -INT16_C( 7388), -INT16_C( 23782), -INT16_C( 19302) } }, { { -INT16_C( 3282), INT16_C( 1052), INT16_C( 15706), -INT16_C( 17899), INT16_C( 7013), -INT16_C( 23951), -INT16_C( 3639), INT16_C( 8755) }, { -INT16_C( 17899), INT16_C( 15706), INT16_C( 1052), -INT16_C( 3282), INT16_C( 8755), -INT16_C( 3639), -INT16_C( 23951), INT16_C( 7013) } }, { { INT16_C( 27578), -INT16_C( 1358), -INT16_C( 19471), -INT16_C( 29520), -INT16_C( 13720), -INT16_C( 29649), INT16_C( 11438), -INT16_C( 9185) }, { -INT16_C( 29520), -INT16_C( 19471), -INT16_C( 1358), INT16_C( 27578), -INT16_C( 9185), INT16_C( 11438), -INT16_C( 29649), -INT16_C( 13720) } }, { { INT16_C( 15135), INT16_C( 31456), -INT16_C( 2696), -INT16_C( 8652), -INT16_C( 23279), -INT16_C( 9600), -INT16_C( 19561), INT16_C( 20988) }, { -INT16_C( 8652), -INT16_C( 2696), INT16_C( 31456), INT16_C( 15135), INT16_C( 20988), -INT16_C( 19561), -INT16_C( 9600), -INT16_C( 23279) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t r = simde_vrev64q_s16(a); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int16x8_t r = simde_vrev64q_s16(a); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrev64q_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t r[4]; } test_vec[] = { { { INT32_C( 853077418), -INT32_C( 1621561868), -INT32_C( 976200669), INT32_C( 1316530513) }, { -INT32_C( 1621561868), INT32_C( 853077418), INT32_C( 1316530513), -INT32_C( 976200669) } }, { { INT32_C( 1223214716), INT32_C( 1719530838), -INT32_C( 1485267583), INT32_C( 1261568161) }, { INT32_C( 1719530838), INT32_C( 1223214716), INT32_C( 1261568161), -INT32_C( 1485267583) } }, { { -INT32_C( 495055891), INT32_C( 461494008), -INT32_C( 2082451150), INT32_C( 1993431546) }, { INT32_C( 461494008), -INT32_C( 495055891), INT32_C( 1993431546), -INT32_C( 2082451150) } }, { { INT32_C( 1992276255), INT32_C( 819739823), INT32_C( 2077709529), INT32_C( 1103497556) }, { INT32_C( 819739823), INT32_C( 1992276255), INT32_C( 1103497556), INT32_C( 2077709529) } }, { { INT32_C( 203637780), INT32_C( 1277666330), -INT32_C( 254867210), -INT32_C( 2140692383) }, { INT32_C( 1277666330), INT32_C( 203637780), -INT32_C( 2140692383), -INT32_C( 254867210) } }, { { INT32_C( 167126618), INT32_C( 1010422370), INT32_C( 2058817574), INT32_C( 784104729) }, { INT32_C( 1010422370), INT32_C( 167126618), INT32_C( 784104729), INT32_C( 2058817574) } }, { { -INT32_C( 600121406), INT32_C( 2049532548), -INT32_C( 882182038), -INT32_C( 213134951) }, { INT32_C( 2049532548), -INT32_C( 600121406), -INT32_C( 213134951), -INT32_C( 882182038) } }, { { INT32_C( 1526481655), INT32_C( 999699732), INT32_C( 1588940101), -INT32_C( 1936952886) }, { INT32_C( 999699732), INT32_C( 1526481655), -INT32_C( 1936952886), INT32_C( 1588940101) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t r = simde_vrev64q_s32(a); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int32x4_t r = simde_vrev64q_s32(a); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrev64q_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(105), UINT8_C( 82), UINT8_C( 6), UINT8_C(116), UINT8_C( 54), UINT8_C(104), UINT8_C( 1), UINT8_C( 54), UINT8_C(215), UINT8_C(206), UINT8_C( 89), UINT8_C(218), UINT8_C( 41), UINT8_C(170), UINT8_C( 21), UINT8_C(195) }, { UINT8_C( 54), UINT8_C( 1), UINT8_C(104), UINT8_C( 54), UINT8_C(116), UINT8_C( 6), UINT8_C( 82), UINT8_C(105), UINT8_C(195), UINT8_C( 21), UINT8_C(170), UINT8_C( 41), UINT8_C(218), UINT8_C( 89), UINT8_C(206), UINT8_C(215) } }, { { UINT8_C( 19), UINT8_C(118), UINT8_C(243), UINT8_C( 49), UINT8_C(101), UINT8_C( 80), UINT8_C( 91), UINT8_C(134), UINT8_C( 54), UINT8_C(199), UINT8_C(158), UINT8_C( 68), UINT8_C(237), UINT8_C(174), UINT8_C(137), UINT8_C( 87) }, { UINT8_C(134), UINT8_C( 91), UINT8_C( 80), UINT8_C(101), UINT8_C( 49), UINT8_C(243), UINT8_C(118), UINT8_C( 19), UINT8_C( 87), UINT8_C(137), UINT8_C(174), UINT8_C(237), UINT8_C( 68), UINT8_C(158), UINT8_C(199), UINT8_C( 54) } }, { { UINT8_C( 1), UINT8_C(143), UINT8_C(203), UINT8_C( 55), UINT8_C(248), UINT8_C(204), UINT8_C(109), UINT8_C(207), UINT8_C(155), UINT8_C(198), UINT8_C(169), UINT8_C(196), UINT8_C(112), UINT8_C(191), UINT8_C(136), UINT8_C(131) }, { UINT8_C(207), UINT8_C(109), UINT8_C(204), UINT8_C(248), UINT8_C( 55), UINT8_C(203), UINT8_C(143), UINT8_C( 1), UINT8_C(131), UINT8_C(136), UINT8_C(191), UINT8_C(112), UINT8_C(196), UINT8_C(169), UINT8_C(198), UINT8_C(155) } }, { { UINT8_C( 53), UINT8_C(123), UINT8_C(181), UINT8_C(154), UINT8_C(203), UINT8_C( 16), UINT8_C( 32), UINT8_C( 1), UINT8_C(215), UINT8_C(190), UINT8_C( 69), UINT8_C(197), UINT8_C(109), UINT8_C(206), UINT8_C( 28), UINT8_C(110) }, { UINT8_C( 1), UINT8_C( 32), UINT8_C( 16), UINT8_C(203), UINT8_C(154), UINT8_C(181), UINT8_C(123), UINT8_C( 53), UINT8_C(110), UINT8_C( 28), UINT8_C(206), UINT8_C(109), UINT8_C(197), UINT8_C( 69), UINT8_C(190), UINT8_C(215) } }, { { UINT8_C( 94), UINT8_C(231), UINT8_C(165), UINT8_C( 86), UINT8_C(180), UINT8_C( 19), UINT8_C( 37), UINT8_C( 79), UINT8_C(217), UINT8_C(207), UINT8_C( 19), UINT8_C( 74), UINT8_C(142), UINT8_C(155), UINT8_C(205), UINT8_C(195) }, { UINT8_C( 79), UINT8_C( 37), UINT8_C( 19), UINT8_C(180), UINT8_C( 86), UINT8_C(165), UINT8_C(231), UINT8_C( 94), UINT8_C(195), UINT8_C(205), UINT8_C(155), UINT8_C(142), UINT8_C( 74), UINT8_C( 19), UINT8_C(207), UINT8_C(217) } }, { { UINT8_C( 22), UINT8_C(130), UINT8_C( 93), UINT8_C(225), UINT8_C(146), UINT8_C(125), UINT8_C(226), UINT8_C(106), UINT8_C( 60), UINT8_C( 40), UINT8_C( 47), UINT8_C(169), UINT8_C(246), UINT8_C( 75), UINT8_C( 23), UINT8_C( 84) }, { UINT8_C(106), UINT8_C(226), UINT8_C(125), UINT8_C(146), UINT8_C(225), UINT8_C( 93), UINT8_C(130), UINT8_C( 22), UINT8_C( 84), UINT8_C( 23), UINT8_C( 75), UINT8_C(246), UINT8_C(169), UINT8_C( 47), UINT8_C( 40), UINT8_C( 60) } }, { { UINT8_C( 50), UINT8_C(188), UINT8_C(170), UINT8_C(230), UINT8_C(207), UINT8_C(208), UINT8_C( 53), UINT8_C(169), UINT8_C(159), UINT8_C( 73), UINT8_C(243), UINT8_C( 45), UINT8_C(228), UINT8_C(192), UINT8_C(240), UINT8_C(251) }, { UINT8_C(169), UINT8_C( 53), UINT8_C(208), UINT8_C(207), UINT8_C(230), UINT8_C(170), UINT8_C(188), UINT8_C( 50), UINT8_C(251), UINT8_C(240), UINT8_C(192), UINT8_C(228), UINT8_C( 45), UINT8_C(243), UINT8_C( 73), UINT8_C(159) } }, { { UINT8_C( 67), UINT8_C( 77), UINT8_C(220), UINT8_C(213), UINT8_C(202), UINT8_C(191), UINT8_C( 63), UINT8_C( 6), UINT8_C(231), UINT8_C(110), UINT8_C(175), UINT8_C(221), UINT8_C(185), UINT8_C(198), UINT8_C( 50), UINT8_C(236) }, { UINT8_C( 6), UINT8_C( 63), UINT8_C(191), UINT8_C(202), UINT8_C(213), UINT8_C(220), UINT8_C( 77), UINT8_C( 67), UINT8_C(236), UINT8_C( 50), UINT8_C(198), UINT8_C(185), UINT8_C(221), UINT8_C(175), UINT8_C(110), UINT8_C(231) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t r = simde_vrev64q_u8(a); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t r = simde_vrev64q_u8(a); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrev64q_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(23729), UINT16_C(35203), UINT16_C(54806), UINT16_C(21539), UINT16_C(50954), UINT16_C(24064), UINT16_C(21898), UINT16_C(26296) }, { UINT16_C(21539), UINT16_C(54806), UINT16_C(35203), UINT16_C(23729), UINT16_C(26296), UINT16_C(21898), UINT16_C(24064), UINT16_C(50954) } }, { { UINT16_C(49935), UINT16_C(59261), UINT16_C(59617), UINT16_C(46782), UINT16_C(57390), UINT16_C(44550), UINT16_C(19947), UINT16_C(40107) }, { UINT16_C(46782), UINT16_C(59617), UINT16_C(59261), UINT16_C(49935), UINT16_C(40107), UINT16_C(19947), UINT16_C(44550), UINT16_C(57390) } }, { { UINT16_C(11946), UINT16_C(49189), UINT16_C(18436), UINT16_C( 3860), UINT16_C( 5136), UINT16_C(39533), UINT16_C( 9577), UINT16_C(30977) }, { UINT16_C( 3860), UINT16_C(18436), UINT16_C(49189), UINT16_C(11946), UINT16_C(30977), UINT16_C( 9577), UINT16_C(39533), UINT16_C( 5136) } }, { { UINT16_C(32488), UINT16_C(51552), UINT16_C( 7783), UINT16_C(38271), UINT16_C(34558), UINT16_C(59716), UINT16_C(61395), UINT16_C(32133) }, { UINT16_C(38271), UINT16_C( 7783), UINT16_C(51552), UINT16_C(32488), UINT16_C(32133), UINT16_C(61395), UINT16_C(59716), UINT16_C(34558) } }, { { UINT16_C(43805), UINT16_C( 8509), UINT16_C(21235), UINT16_C( 816), UINT16_C(40294), UINT16_C(53406), UINT16_C(40898), UINT16_C(43593) }, { UINT16_C( 816), UINT16_C(21235), UINT16_C( 8509), UINT16_C(43805), UINT16_C(43593), UINT16_C(40898), UINT16_C(53406), UINT16_C(40294) } }, { { UINT16_C(43293), UINT16_C(33907), UINT16_C(62407), UINT16_C(50714), UINT16_C(24185), UINT16_C(19631), UINT16_C(13645), UINT16_C(27338) }, { UINT16_C(50714), UINT16_C(62407), UINT16_C(33907), UINT16_C(43293), UINT16_C(27338), UINT16_C(13645), UINT16_C(19631), UINT16_C(24185) } }, { { UINT16_C( 2016), UINT16_C(54155), UINT16_C(48217), UINT16_C(49367), UINT16_C(30041), UINT16_C( 7312), UINT16_C(55572), UINT16_C(12742) }, { UINT16_C(49367), UINT16_C(48217), UINT16_C(54155), UINT16_C( 2016), UINT16_C(12742), UINT16_C(55572), UINT16_C( 7312), UINT16_C(30041) } }, { { UINT16_C(14978), UINT16_C(18870), UINT16_C(53293), UINT16_C(42511), UINT16_C(48942), UINT16_C(31730), UINT16_C(48372), UINT16_C(54501) }, { UINT16_C(42511), UINT16_C(53293), UINT16_C(18870), UINT16_C(14978), UINT16_C(54501), UINT16_C(48372), UINT16_C(31730), UINT16_C(48942) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t r = simde_vrev64q_u16(a); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t r = simde_vrev64q_u16(a); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrev64q_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(1434993302), UINT32_C( 552597513), UINT32_C(1341026527), UINT32_C(1033003339) }, { UINT32_C( 552597513), UINT32_C(1434993302), UINT32_C(1033003339), UINT32_C(1341026527) } }, { { UINT32_C( 233765670), UINT32_C( 773899934), UINT32_C(1736511025), UINT32_C(3921435987) }, { UINT32_C( 773899934), UINT32_C( 233765670), UINT32_C(3921435987), UINT32_C(1736511025) } }, { { UINT32_C(2839495839), UINT32_C( 482946621), UINT32_C(3932993695), UINT32_C(1059651097) }, { UINT32_C( 482946621), UINT32_C(2839495839), UINT32_C(1059651097), UINT32_C(3932993695) } }, { { UINT32_C(2555123449), UINT32_C( 247885277), UINT32_C(3463792507), UINT32_C(1136144804) }, { UINT32_C( 247885277), UINT32_C(2555123449), UINT32_C(1136144804), UINT32_C(3463792507) } }, { { UINT32_C(3018651510), UINT32_C(3301946917), UINT32_C(2276408174), UINT32_C( 868669241) }, { UINT32_C(3301946917), UINT32_C(3018651510), UINT32_C( 868669241), UINT32_C(2276408174) } }, { { UINT32_C(3402306285), UINT32_C(4225274239), UINT32_C(2093567448), UINT32_C(4122968447) }, { UINT32_C(4225274239), UINT32_C(3402306285), UINT32_C(4122968447), UINT32_C(2093567448) } }, { { UINT32_C(2661854328), UINT32_C(3496114018), UINT32_C(3965129139), UINT32_C(3592363496) }, { UINT32_C(3496114018), UINT32_C(2661854328), UINT32_C(3592363496), UINT32_C(3965129139) } }, { { UINT32_C(2946558511), UINT32_C(1403681147), UINT32_C(1171223494), UINT32_C(1832554485) }, { UINT32_C(1403681147), UINT32_C(2946558511), UINT32_C(1832554485), UINT32_C(1171223494) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t r = simde_vrev64q_u32(a); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t r = simde_vrev64q_u32(a); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrev64q_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -324.81), SIMDE_FLOAT32_C( -870.47), SIMDE_FLOAT32_C( 342.98), SIMDE_FLOAT32_C( -492.44) }, { SIMDE_FLOAT32_C( -870.47), SIMDE_FLOAT32_C( -324.81), SIMDE_FLOAT32_C( -492.44), SIMDE_FLOAT32_C( 342.98) } }, { { SIMDE_FLOAT32_C( -289.18), SIMDE_FLOAT32_C( -318.73), SIMDE_FLOAT32_C( -927.82), SIMDE_FLOAT32_C( 328.84) }, { SIMDE_FLOAT32_C( -318.73), SIMDE_FLOAT32_C( -289.18), SIMDE_FLOAT32_C( 328.84), SIMDE_FLOAT32_C( -927.82) } }, { { SIMDE_FLOAT32_C( -870.55), SIMDE_FLOAT32_C( -305.56), SIMDE_FLOAT32_C( -62.60), SIMDE_FLOAT32_C( -509.92) }, { SIMDE_FLOAT32_C( -305.56), SIMDE_FLOAT32_C( -870.55), SIMDE_FLOAT32_C( -509.92), SIMDE_FLOAT32_C( -62.60) } }, { { SIMDE_FLOAT32_C( 55.68), SIMDE_FLOAT32_C( 856.66), SIMDE_FLOAT32_C( -934.25), SIMDE_FLOAT32_C( 336.42) }, { SIMDE_FLOAT32_C( 856.66), SIMDE_FLOAT32_C( 55.68), SIMDE_FLOAT32_C( 336.42), SIMDE_FLOAT32_C( -934.25) } }, { { SIMDE_FLOAT32_C( 501.43), SIMDE_FLOAT32_C( -571.21), SIMDE_FLOAT32_C( -251.52), SIMDE_FLOAT32_C( 672.13) }, { SIMDE_FLOAT32_C( -571.21), SIMDE_FLOAT32_C( 501.43), SIMDE_FLOAT32_C( 672.13), SIMDE_FLOAT32_C( -251.52) } }, { { SIMDE_FLOAT32_C( 844.20), SIMDE_FLOAT32_C( 87.76), SIMDE_FLOAT32_C( 609.29), SIMDE_FLOAT32_C( 856.58) }, { SIMDE_FLOAT32_C( 87.76), SIMDE_FLOAT32_C( 844.20), SIMDE_FLOAT32_C( 856.58), SIMDE_FLOAT32_C( 609.29) } }, { { SIMDE_FLOAT32_C( -229.44), SIMDE_FLOAT32_C( -640.32), SIMDE_FLOAT32_C( -844.07), SIMDE_FLOAT32_C( 466.08) }, { SIMDE_FLOAT32_C( -640.32), SIMDE_FLOAT32_C( -229.44), SIMDE_FLOAT32_C( 466.08), SIMDE_FLOAT32_C( -844.07) } }, { { SIMDE_FLOAT32_C( 517.84), SIMDE_FLOAT32_C( 101.16), SIMDE_FLOAT32_C( 322.38), SIMDE_FLOAT32_C( -806.97) }, { SIMDE_FLOAT32_C( 101.16), SIMDE_FLOAT32_C( 517.84), SIMDE_FLOAT32_C( -806.97), SIMDE_FLOAT32_C( 322.38) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t r = simde_vrev64q_f32(a); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vrev64q_f32(a); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vrev64_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vrev64_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vrev64_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vrev64_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vrev64_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vrev64_u32) //SIMDE_TEST_FUNC_LIST_ENTRY(vrev64_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vrev64_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vrev64q_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vrev64q_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vrev64q_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vrev64q_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vrev64q_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vrev64q_u32) //SIMDE_TEST_FUNC_LIST_ENTRY(vrev64q_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vrev64q_f32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/rhadd.c000066400000000000000000001377501400333146700165620ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN rhadd #include "test-neon.h" #include "../../../simde/arm/neon/rhadd.h" static int test_simde_vrhadd_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t b[8]; int8_t r[8]; } test_vec[] = { { { INT8_C( 36), -INT8_C( 29), -INT8_C( 56), INT8_C( 59), -INT8_C( 57), -INT8_C( 47), INT8_C( 80), -INT8_C( 118) }, { INT8_C( 67), INT8_C( 70), INT8_C( 3), -INT8_C( 93), INT8_C( 56), -INT8_C( 95), -INT8_C( 31), -INT8_C( 57) }, { INT8_C( 52), INT8_C( 21), -INT8_C( 26), -INT8_C( 17), INT8_C( 0), -INT8_C( 71), INT8_C( 25), -INT8_C( 87) } }, { { INT8_C( 122), -INT8_C( 28), INT8_C( 39), -INT8_C( 51), INT8_C( 36), INT8_C( 123), -INT8_C( 105), -INT8_C( 67) }, { -INT8_C( 33), INT8_C( 123), -INT8_C( 90), -INT8_C( 110), -INT8_C( 50), -INT8_C( 124), -INT8_C( 64), -INT8_C( 14) }, { INT8_C( 45), INT8_C( 48), -INT8_C( 25), -INT8_C( 80), -INT8_C( 7), INT8_C( 0), -INT8_C( 84), -INT8_C( 40) } }, { { INT8_C( 103), -INT8_C( 119), INT8_C( 45), INT8_C( 46), INT8_C( 90), INT8_C( 125), -INT8_C( 72), -INT8_C( 99) }, { -INT8_C( 61), -INT8_C( 69), INT8_C( 64), -INT8_C( 5), INT8_C( 92), INT8_C( 34), -INT8_C( 62), -INT8_C( 42) }, { INT8_C( 21), -INT8_C( 94), INT8_C( 55), INT8_C( 21), INT8_C( 91), INT8_C( 80), -INT8_C( 67), -INT8_C( 70) } }, { { INT8_C( 6), -INT8_C( 22), -INT8_C( 92), INT8_C( 42), INT8_C( 101), INT8_C( 59), -INT8_C( 25), INT8_C( 68) }, { -INT8_C( 74), -INT8_C( 114), -INT8_C( 42), -INT8_C( 124), INT8_C( 18), -INT8_C( 105), INT8_C( 119), INT8_C( 121) }, { -INT8_C( 34), -INT8_C( 68), -INT8_C( 67), -INT8_C( 41), INT8_C( 60), -INT8_C( 23), INT8_C( 47), INT8_C( 95) } }, { { INT8_C( 32), -INT8_C( 92), -INT8_C( 89), INT8_C( 122), INT8_C( 34), INT8_C( 96), INT8_C( 24), -INT8_C( 27) }, { INT8_C( 27), INT8_C( 88), -INT8_C( 31), INT8_C( 120), INT8_C( 122), -INT8_C( 93), INT8_C( 78), -INT8_C( 127) }, { INT8_C( 30), -INT8_C( 2), -INT8_C( 60), INT8_C( 121), INT8_C( 78), INT8_C( 2), INT8_C( 51), -INT8_C( 77) } }, { { -INT8_C( 115), -INT8_C( 14), -INT8_C( 85), -INT8_C( 13), INT8_C( 45), -INT8_C( 109), INT8_C( 55), -INT8_C( 29) }, { INT8_C( 33), INT8_C( 14), INT8_C( 104), INT8_C( 51), -INT8_C( 91), -INT8_C( 33), -INT8_C( 84), -INT8_C( 59) }, { -INT8_C( 41), INT8_C( 0), INT8_C( 10), INT8_C( 19), -INT8_C( 23), -INT8_C( 71), -INT8_C( 14), -INT8_C( 44) } }, { { -INT8_C( 125), INT8_C( 83), INT8_C( 63), -INT8_C( 91), -INT8_C( 77), INT8_C( 87), -INT8_C( 117), -INT8_C( 49) }, { -INT8_C( 80), INT8_C( 108), INT8_C( 71), INT8_C( 42), INT8_C( 15), -INT8_C( 107), -INT8_C( 85), -INT8_C( 99) }, { -INT8_C( 102), INT8_C( 96), INT8_C( 67), -INT8_C( 24), -INT8_C( 31), -INT8_C( 10), -INT8_C( 101), -INT8_C( 74) } }, { { -INT8_C( 120), INT8_C( 87), -INT8_C( 112), -INT8_C( 75), -INT8_C( 22), -INT8_C( 57), -INT8_C( 103), INT8_C( 11) }, { -INT8_C( 43), INT8_C( 1), INT8_C( 62), INT8_C( 122), -INT8_C( 32), -INT8_C( 22), INT8_C( 63), INT8_C( 99) }, { -INT8_C( 81), INT8_C( 44), -INT8_C( 25), INT8_C( 24), -INT8_C( 27), -INT8_C( 39), -INT8_C( 20), INT8_C( 55) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vrhadd_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); simde_int8x8_t r = simde_vrhadd_s8(a, b); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrhadd_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { { { -INT16_C( 14695), INT16_C( 2201), -INT16_C( 12911), -INT16_C( 29684) }, { INT16_C( 27918), INT16_C( 16028), -INT16_C( 16180), -INT16_C( 15218) }, { INT16_C( 6612), INT16_C( 9115), -INT16_C( 14545), -INT16_C( 22451) } }, { { INT16_C( 19026), INT16_C( 15451), -INT16_C( 3167), -INT16_C( 15174) }, { -INT16_C( 13769), INT16_C( 10656), INT16_C( 509), -INT16_C( 26635) }, { INT16_C( 2629), INT16_C( 13054), -INT16_C( 1329), -INT16_C( 20904) } }, { { -INT16_C( 28984), INT16_C( 22943), -INT16_C( 21668), INT16_C( 27365) }, { -INT16_C( 32232), -INT16_C( 6744), INT16_C( 13890), -INT16_C( 27223) }, { -INT16_C( 30608), INT16_C( 8100), -INT16_C( 3889), INT16_C( 71) } }, { { INT16_C( 1152), INT16_C( 8657), -INT16_C( 29449), INT16_C( 12005) }, { -INT16_C( 31402), INT16_C( 21335), INT16_C( 19591), INT16_C( 20458) }, { -INT16_C( 15125), INT16_C( 14996), -INT16_C( 4929), INT16_C( 16232) } }, { { -INT16_C( 30245), INT16_C( 14248), -INT16_C( 29131), INT16_C( 19873) }, { INT16_C( 18704), INT16_C( 21042), -INT16_C( 9345), -INT16_C( 25) }, { -INT16_C( 5770), INT16_C( 17645), -INT16_C( 19238), INT16_C( 9924) } }, { { -INT16_C( 17953), -INT16_C( 10720), INT16_C( 1349), -INT16_C( 25852) }, { INT16_C( 23691), INT16_C( 4846), -INT16_C( 9816), -INT16_C( 31903) }, { INT16_C( 2869), -INT16_C( 2937), -INT16_C( 4233), -INT16_C( 28877) } }, { { INT16_C( 2402), -INT16_C( 26694), INT16_C( 23447), -INT16_C( 22555) }, { INT16_C( 6052), INT16_C( 9210), -INT16_C( 7693), -INT16_C( 11742) }, { INT16_C( 4227), -INT16_C( 8742), INT16_C( 7877), -INT16_C( 17148) } }, { { INT16_C( 17050), -INT16_C( 8279), -INT16_C( 21176), -INT16_C( 11398) }, { INT16_C( 26889), -INT16_C( 19739), INT16_C( 17986), -INT16_C( 23499) }, { INT16_C( 21970), -INT16_C( 14009), -INT16_C( 1595), -INT16_C( 17448) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_vrhadd_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); simde_int16x4_t r = simde_vrhadd_s16(a, b); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrhadd_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { { { -INT32_C( 2031040152), INT32_C( 1764816448) }, { -INT32_C( 381377707), -INT32_C( 1742334847) }, { -INT32_C( 1206208929), INT32_C( 11240801) } }, { { INT32_C( 1058518367), INT32_C( 1886799718) }, { -INT32_C( 157827149), -INT32_C( 303142780) }, { INT32_C( 450345609), INT32_C( 791828469) } }, { { INT32_C( 1920196145), INT32_C( 702260180) }, { -INT32_C( 955048122), -INT32_C( 1822475980) }, { INT32_C( 482574012), -INT32_C( 560107900) } }, { { INT32_C( 1439856366), INT32_C( 1908754621) }, { -INT32_C( 1939383288), -INT32_C( 159820092) }, { -INT32_C( 249763461), INT32_C( 874467265) } }, { { INT32_C( 157871156), -INT32_C( 718126193) }, { -INT32_C( 1751300765), INT32_C( 1831533695) }, { -INT32_C( 796714804), INT32_C( 556703751) } }, { { INT32_C( 818084979), INT32_C( 1285654340) }, { -INT32_C( 1462171165), -INT32_C( 1818340769) }, { -INT32_C( 322043093), -INT32_C( 266343214) } }, { { -INT32_C( 828635585), -INT32_C( 1398485175) }, { -INT32_C( 1824308972), -INT32_C( 1342083779) }, { -INT32_C( 1326472278), -INT32_C( 1370284477) } }, { { -INT32_C( 1360936087), INT32_C( 788169291) }, { -INT32_C( 355019893), INT32_C( 1702786086) }, { -INT32_C( 857977990), INT32_C( 1245477689) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_vrhadd_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); simde_int32x2_t r = simde_vrhadd_s32(a, b); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrhadd_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint8_t b[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C( 80), UINT8_C(186), UINT8_C(146), UINT8_C(154), UINT8_C(188), UINT8_C(150), UINT8_C(103), UINT8_C( 95) }, { UINT8_C( 52), UINT8_C(115), UINT8_C(142), UINT8_C( 80), UINT8_C( 70), UINT8_C( 37), UINT8_C(168), UINT8_C(229) }, { UINT8_C( 66), UINT8_C(151), UINT8_C(144), UINT8_C(117), UINT8_C(129), UINT8_C( 94), UINT8_C(136), UINT8_C(162) } }, { { UINT8_C(180), UINT8_C( 27), UINT8_C(135), UINT8_C(242), UINT8_C( 51), UINT8_C(146), UINT8_C( 99), UINT8_C(105) }, { UINT8_C(117), UINT8_C(218), UINT8_C( 3), UINT8_C(199), UINT8_C(230), UINT8_C(129), UINT8_C(195), UINT8_C( 55) }, { UINT8_C(149), UINT8_C(123), UINT8_C( 69), UINT8_C(221), UINT8_C(141), UINT8_C(138), UINT8_C(147), UINT8_C( 80) } }, { { UINT8_C( 59), UINT8_C( 85), UINT8_C(209), UINT8_C(247), UINT8_C(235), UINT8_C( 56), UINT8_C( 86), UINT8_C( 31) }, { UINT8_C(171), UINT8_C(229), UINT8_C(111), UINT8_C(241), UINT8_C( 10), UINT8_C( 23), UINT8_C(214), UINT8_C(191) }, { UINT8_C(115), UINT8_C(157), UINT8_C(160), UINT8_C(244), UINT8_C(123), UINT8_C( 40), UINT8_C(150), UINT8_C(111) } }, { { UINT8_C( 50), UINT8_C( 94), UINT8_C(177), UINT8_C(101), UINT8_C(240), UINT8_C( 20), UINT8_C(206), UINT8_C(101) }, { UINT8_C(238), UINT8_C(209), UINT8_C( 44), UINT8_C(213), UINT8_C( 82), UINT8_C(239), UINT8_C( 12), UINT8_C(141) }, { UINT8_C(144), UINT8_C(152), UINT8_C(111), UINT8_C(157), UINT8_C(161), UINT8_C(130), UINT8_C(109), UINT8_C(121) } }, { { UINT8_C( 68), UINT8_C(221), UINT8_C(133), UINT8_C( 47), UINT8_C( 21), UINT8_C(219), UINT8_C( 79), UINT8_C(192) }, { UINT8_C(192), UINT8_C(190), UINT8_C(177), UINT8_C(203), UINT8_C(214), UINT8_C(135), UINT8_C(138), UINT8_C( 8) }, { UINT8_C(130), UINT8_C(206), UINT8_C(155), UINT8_C(125), UINT8_C(118), UINT8_C(177), UINT8_C(109), UINT8_C(100) } }, { { UINT8_C(229), UINT8_C( 59), UINT8_C(110), UINT8_C(213), UINT8_C( 79), UINT8_C( 60), UINT8_C( 58), UINT8_C( 61) }, { UINT8_C( 14), UINT8_C(102), UINT8_C( 18), UINT8_C( 96), UINT8_C( 85), UINT8_C( 30), UINT8_C(238), UINT8_C(153) }, { UINT8_C(122), UINT8_C( 81), UINT8_C( 64), UINT8_C(155), UINT8_C( 82), UINT8_C( 45), UINT8_C(148), UINT8_C(107) } }, { { UINT8_C(251), UINT8_C(115), UINT8_C(201), UINT8_C( 16), UINT8_C( 78), UINT8_C( 24), UINT8_C(208), UINT8_C( 15) }, { UINT8_C(214), UINT8_C(129), UINT8_C(218), UINT8_C(172), UINT8_C( 9), UINT8_C(100), UINT8_C(181), UINT8_C(238) }, { UINT8_C(233), UINT8_C(122), UINT8_C(210), UINT8_C( 94), UINT8_C( 44), UINT8_C( 62), UINT8_C(195), UINT8_C(127) } }, { { UINT8_C(159), UINT8_C( 35), UINT8_C(196), UINT8_C(238), UINT8_C( 95), UINT8_C(254), UINT8_C( 43), UINT8_C(109) }, { UINT8_C(101), UINT8_C( 62), UINT8_C(206), UINT8_C(186), UINT8_C( 92), UINT8_C(188), UINT8_C( 84), UINT8_C( 88) }, { UINT8_C(130), UINT8_C( 49), UINT8_C(201), UINT8_C(212), UINT8_C( 94), UINT8_C(221), UINT8_C( 64), UINT8_C( 99) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vrhadd_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t r = simde_vrhadd_u8(a, b); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrhadd_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(20571), UINT16_C(32595), UINT16_C(53673), UINT16_C(25528) }, { UINT16_C(32933), UINT16_C(36832), UINT16_C(64829), UINT16_C(10516) }, { UINT16_C(26752), UINT16_C(34714), UINT16_C(59251), UINT16_C(18022) } }, { { UINT16_C(59927), UINT16_C(49725), UINT16_C(56129), UINT16_C(50223) }, { UINT16_C(19705), UINT16_C(59602), UINT16_C(22360), UINT16_C(45941) }, { UINT16_C(39816), UINT16_C(54664), UINT16_C(39245), UINT16_C(48082) } }, { { UINT16_C(51367), UINT16_C(20786), UINT16_C(60313), UINT16_C(16052) }, { UINT16_C(37995), UINT16_C(43213), UINT16_C(58001), UINT16_C(43218) }, { UINT16_C(44681), UINT16_C(32000), UINT16_C(59157), UINT16_C(29635) } }, { { UINT16_C( 4044), UINT16_C( 3691), UINT16_C(39658), UINT16_C(58322) }, { UINT16_C(42214), UINT16_C(16075), UINT16_C(16636), UINT16_C(41970) }, { UINT16_C(23129), UINT16_C( 9883), UINT16_C(28147), UINT16_C(50146) } }, { { UINT16_C( 9225), UINT16_C(41716), UINT16_C(43279), UINT16_C(31457) }, { UINT16_C(44605), UINT16_C(53027), UINT16_C(62864), UINT16_C(23927) }, { UINT16_C(26915), UINT16_C(47372), UINT16_C(53072), UINT16_C(27692) } }, { { UINT16_C(57860), UINT16_C(61035), UINT16_C(15740), UINT16_C(25297) }, { UINT16_C(40417), UINT16_C(56737), UINT16_C(37853), UINT16_C(59009) }, { UINT16_C(49139), UINT16_C(58886), UINT16_C(26797), UINT16_C(42153) } }, { { UINT16_C(30135), UINT16_C(51081), UINT16_C(27166), UINT16_C(23617) }, { UINT16_C(25624), UINT16_C(43307), UINT16_C(41561), UINT16_C(23814) }, { UINT16_C(27880), UINT16_C(47194), UINT16_C(34364), UINT16_C(23716) } }, { { UINT16_C(29061), UINT16_C( 331), UINT16_C( 7598), UINT16_C(36708) }, { UINT16_C( 1466), UINT16_C(38765), UINT16_C(61080), UINT16_C(20350) }, { UINT16_C(15264), UINT16_C(19548), UINT16_C(34339), UINT16_C(28529) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vrhadd_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t r = simde_vrhadd_u16(a, b); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrhadd_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C( 499522018), UINT32_C(1059676568) }, { UINT32_C( 159590395), UINT32_C(3867488461) }, { UINT32_C( 329556207), UINT32_C(2463582515) } }, { { UINT32_C(2140259654), UINT32_C(3410156506) }, { UINT32_C(3081595877), UINT32_C( 210904874) }, { UINT32_C(2610927766), UINT32_C(1810530690) } }, { { UINT32_C(3626588224), UINT32_C(3021427385) }, { UINT32_C(1203608186), UINT32_C( 288178891) }, { UINT32_C(2415098205), UINT32_C(1654803138) } }, { { UINT32_C(3801202440), UINT32_C(2142098330) }, { UINT32_C(1748392510), UINT32_C(3245721985) }, { UINT32_C(2774797475), UINT32_C(2693910158) } }, { { UINT32_C(3684277793), UINT32_C(1804579313) }, { UINT32_C( 380783947), UINT32_C(2536038543) }, { UINT32_C(2032530870), UINT32_C(2170308928) } }, { { UINT32_C( 964278687), UINT32_C(3401066124) }, { UINT32_C( 20180608), UINT32_C(3653413047) }, { UINT32_C( 492229648), UINT32_C(3527239586) } }, { { UINT32_C( 934566982), UINT32_C(1487029005) }, { UINT32_C( 544167312), UINT32_C(3568801589) }, { UINT32_C( 739367147), UINT32_C(2527915297) } }, { { UINT32_C(3691852112), UINT32_C(3634808151) }, { UINT32_C(1809439155), UINT32_C(3359939713) }, { UINT32_C(2750645634), UINT32_C(3497373932) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vrhadd_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t r = simde_vrhadd_u32(a, b); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrhaddq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t b[16]; int8_t r[16]; } test_vec[] = { { { INT8_C( 15), -INT8_C( 106), INT8_C( 68), -INT8_C( 52), -INT8_C( 14), -INT8_C( 27), INT8_C( 43), -INT8_C( 62), -INT8_C( 19), INT8_C( 7), INT8_C( 93), -INT8_C( 28), INT8_C( 22), INT8_C( 11), -INT8_C( 122), -INT8_C( 60) }, { INT8_C( 112), -INT8_C( 6), INT8_C( 83), -INT8_C( 35), INT8_C( 100), -INT8_C( 104), -INT8_C( 75), -INT8_C( 91), INT8_C( 32), -INT8_C( 61), -INT8_C( 52), -INT8_C( 87), -INT8_C( 102), INT8_C( 44), -INT8_C( 11), -INT8_C( 87) }, { INT8_C( 64), -INT8_C( 56), INT8_C( 76), -INT8_C( 43), INT8_C( 43), -INT8_C( 65), -INT8_C( 16), -INT8_C( 76), INT8_C( 7), -INT8_C( 27), INT8_C( 21), -INT8_C( 57), -INT8_C( 40), INT8_C( 28), -INT8_C( 66), -INT8_C( 73) } }, { { -INT8_C( 61), INT8_C( 57), INT8_C( 118), -INT8_C( 75), INT8_C( 31), -INT8_C( 95), INT8_C( 120), INT8_C( 12), -INT8_C( 88), -INT8_C( 43), -INT8_C( 16), -INT8_C( 65), -INT8_C( 32), INT8_C( 119), -INT8_C( 125), INT8_C( 80) }, { INT8_C( 113), -INT8_C( 41), INT8_C( 46), -INT8_C( 43), INT8_C( 111), -INT8_C( 29), INT8_C( 122), -INT8_C( 113), -INT8_C( 89), INT8_C( 70), INT8_C( 57), INT8_C( 65), INT8_C( 115), INT8_C( 46), -INT8_C( 21), INT8_C( 54) }, { INT8_C( 26), INT8_C( 8), INT8_C( 82), -INT8_C( 59), INT8_C( 71), -INT8_C( 62), INT8_C( 121), -INT8_C( 50), -INT8_C( 88), INT8_C( 14), INT8_C( 21), INT8_C( 0), INT8_C( 42), INT8_C( 83), -INT8_C( 73), INT8_C( 67) } }, { { INT8_C( 104), INT8_C( 97), -INT8_C( 21), -INT8_C( 121), INT8_C( 2), INT8_C( 99), -INT8_C( 109), -INT8_C( 85), INT8_C( 56), -INT8_C( 125), INT8_C( 106), INT8_C( 25), -INT8_C( 6), -INT8_C( 19), INT8_C( 105), INT8_C( 107) }, { -INT8_C( 60), -INT8_C( 105), INT8_C( 65), INT8_C( 51), INT8_C( 123), -INT8_C( 69), -INT8_C( 61), INT8_C( 34), INT8_C( 2), -INT8_C( 4), INT8_C( 99), INT8_C( 117), INT8_C( 42), INT8_C( 78), -INT8_C( 85), -INT8_C( 110) }, { INT8_C( 22), -INT8_C( 4), INT8_C( 22), -INT8_C( 35), INT8_C( 63), INT8_C( 15), -INT8_C( 85), -INT8_C( 25), INT8_C( 29), -INT8_C( 64), INT8_C( 103), INT8_C( 71), INT8_C( 18), INT8_C( 30), INT8_C( 10), -INT8_C( 1) } }, { { -INT8_C( 81), -INT8_C( 106), INT8_C( 25), -INT8_C( 78), -INT8_C( 6), -INT8_C( 84), INT8_C( 93), INT8_C( 50), INT8_C( 48), -INT8_C( 57), INT8_C( 75), INT8_C( 42), -INT8_C( 76), -INT8_C( 75), -INT8_C( 106), INT8_C( 121) }, { INT8_C( 76), -INT8_C( 41), -INT8_C( 84), -INT8_C( 57), -INT8_C( 110), INT8_C( 111), -INT8_C( 23), -INT8_C( 108), INT8_C( 107), INT8_C( 77), INT8_C( 9), -INT8_C( 106), -INT8_C( 101), -INT8_C( 76), INT8_C( 40), INT8_C( 75) }, { -INT8_C( 2), -INT8_C( 73), -INT8_C( 29), -INT8_C( 67), -INT8_C( 58), INT8_C( 14), INT8_C( 35), -INT8_C( 29), INT8_C( 78), INT8_C( 10), INT8_C( 42), -INT8_C( 32), -INT8_C( 88), -INT8_C( 75), -INT8_C( 33), INT8_C( 98) } }, { { INT8_C( 75), INT8_C( 66), -INT8_C( 3), INT8_C( 69), -INT8_C( 18), INT8_C( 90), INT8_C( 119), INT8_C( 30), INT8_C( 33), -INT8_C( 61), INT8_C( 73), -INT8_C( 43), INT8_C( 120), -INT8_C( 33), INT8_C( 78), -INT8_C( 60) }, { -INT8_C( 74), -INT8_C( 5), -INT8_C( 116), INT8_C( 72), INT8_C( 106), INT8_C( 117), -INT8_C( 35), -INT8_C( 42), -INT8_C( 62), -INT8_C( 26), INT8_C( 108), INT8_C( 94), -INT8_C( 101), -INT8_C( 108), -INT8_C( 87), -INT8_C( 26) }, { INT8_C( 1), INT8_C( 31), -INT8_C( 59), INT8_C( 71), INT8_C( 44), INT8_C( 104), INT8_C( 42), -INT8_C( 6), -INT8_C( 14), -INT8_C( 43), INT8_C( 91), INT8_C( 26), INT8_C( 10), -INT8_C( 70), -INT8_C( 4), -INT8_C( 43) } }, { { -INT8_C( 42), -INT8_C( 90), INT8_C( 43), -INT8_C( 59), INT8_C( 0), -INT8_C( 94), -INT8_C( 29), INT8_C( 33), INT8_C( 101), INT8_C( 44), -INT8_C( 10), -INT8_C( 35), INT8_C( 11), INT8_C( 69), -INT8_C( 94), -INT8_C( 63) }, { INT8_C( 64), INT8_C( 46), INT8_C( 10), -INT8_C( 86), -INT8_C( 93), -INT8_C( 25), INT8_MIN, INT8_C( 102), -INT8_C( 51), -INT8_C( 20), -INT8_C( 60), INT8_C( 104), -INT8_C( 127), INT8_C( 109), INT8_C( 78), INT8_C( 87) }, { INT8_C( 11), -INT8_C( 22), INT8_C( 27), -INT8_C( 72), -INT8_C( 46), -INT8_C( 59), -INT8_C( 78), INT8_C( 68), INT8_C( 25), INT8_C( 12), -INT8_C( 35), INT8_C( 35), -INT8_C( 58), INT8_C( 89), -INT8_C( 8), INT8_C( 12) } }, { { INT8_C( 19), INT8_C( 121), INT8_C( 28), INT8_C( 19), INT8_C( 28), INT8_C( 0), INT8_C( 52), -INT8_C( 127), INT8_C( 44), INT8_C( 42), INT8_C( 95), INT8_C( 56), INT8_C( 111), INT8_C( 1), -INT8_C( 7), -INT8_C( 81) }, { INT8_C( 47), INT8_C( 3), INT8_C( 90), -INT8_C( 46), -INT8_C( 22), -INT8_C( 38), INT8_C( 56), -INT8_C( 72), -INT8_C( 57), -INT8_C( 4), INT8_C( 32), INT8_C( 72), INT8_C( 105), INT8_C( 111), -INT8_C( 97), INT8_C( 124) }, { INT8_C( 33), INT8_C( 62), INT8_C( 59), -INT8_C( 13), INT8_C( 3), -INT8_C( 19), INT8_C( 54), -INT8_C( 99), -INT8_C( 6), INT8_C( 19), INT8_C( 64), INT8_C( 64), INT8_C( 108), INT8_C( 56), -INT8_C( 52), INT8_C( 22) } }, { { -INT8_C( 24), -INT8_C( 68), -INT8_C( 113), INT8_C( 4), -INT8_C( 68), -INT8_C( 61), -INT8_C( 122), -INT8_C( 24), -INT8_C( 18), -INT8_C( 27), INT8_C( 32), INT8_C( 93), -INT8_C( 26), INT8_C( 26), INT8_C( 13), INT8_C( 21) }, { INT8_C( 29), INT8_C( 103), -INT8_C( 25), INT8_C( 8), INT8_C( 65), INT8_C( 32), -INT8_C( 64), INT8_C( 8), INT8_C( 28), -INT8_C( 32), INT8_C( 80), -INT8_C( 122), INT8_C( 79), -INT8_C( 16), INT8_C( 2), INT8_C( 56) }, { INT8_C( 3), INT8_C( 18), -INT8_C( 69), INT8_C( 6), -INT8_C( 1), -INT8_C( 14), -INT8_C( 93), -INT8_C( 8), INT8_C( 5), -INT8_C( 29), INT8_C( 56), -INT8_C( 14), INT8_C( 27), INT8_C( 5), INT8_C( 8), INT8_C( 39) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r = simde_vrhaddq_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); simde_int8x16_t r = simde_vrhaddq_s8(a, b); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrhaddq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { -INT16_C( 12427), -INT16_C( 6809), INT16_C( 2649), -INT16_C( 22355), -INT16_C( 19692), INT16_C( 3337), INT16_C( 9695), -INT16_C( 8924) }, { INT16_C( 26907), INT16_C( 10689), INT16_C( 2586), INT16_C( 9923), INT16_C( 13987), INT16_C( 13485), INT16_C( 13940), -INT16_C( 5796) }, { INT16_C( 7240), INT16_C( 1940), INT16_C( 2618), -INT16_C( 6216), -INT16_C( 2852), INT16_C( 8411), INT16_C( 11818), -INT16_C( 7360) } }, { { -INT16_C( 15611), INT16_C( 24270), INT16_C( 31694), -INT16_C( 7674), INT16_C( 3886), INT16_C( 3567), INT16_C( 4917), INT16_C( 20714) }, { -INT16_C( 21636), -INT16_C( 27015), INT16_C( 15541), INT16_C( 22973), INT16_C( 27251), -INT16_C( 6259), -INT16_C( 5728), -INT16_C( 23088) }, { -INT16_C( 18623), -INT16_C( 1372), INT16_C( 23618), INT16_C( 7650), INT16_C( 15569), -INT16_C( 1346), -INT16_C( 405), -INT16_C( 1187) } }, { { -INT16_C( 24915), INT16_C( 31491), INT16_C( 2585), INT16_C( 18269), INT16_C( 19481), INT16_C( 20053), INT16_C( 16224), -INT16_C( 9057) }, { INT16_C( 6379), -INT16_C( 24461), INT16_C( 12373), -INT16_C( 14087), -INT16_C( 30822), INT16_C( 15023), INT16_C( 32624), INT16_C( 7647) }, { -INT16_C( 9268), INT16_C( 3515), INT16_C( 7479), INT16_C( 2091), -INT16_C( 5670), INT16_C( 17538), INT16_C( 24424), -INT16_C( 705) } }, { { -INT16_C( 7651), INT16_C( 13976), -INT16_C( 2580), INT16_C( 1661), -INT16_C( 11710), -INT16_C( 23980), -INT16_C( 3310), -INT16_C( 642) }, { -INT16_C( 3828), INT16_C( 24989), -INT16_C( 26847), -INT16_C( 17623), -INT16_C( 10210), -INT16_C( 28939), -INT16_C( 11177), INT16_C( 29868) }, { -INT16_C( 5739), INT16_C( 19483), -INT16_C( 14713), -INT16_C( 7981), -INT16_C( 10960), -INT16_C( 26459), -INT16_C( 7243), INT16_C( 14613) } }, { { INT16_C( 17591), -INT16_C( 23638), INT16_C( 10042), INT16_C( 31913), -INT16_C( 262), INT16_C( 3102), -INT16_C( 25359), -INT16_C( 759) }, { -INT16_C( 22898), -INT16_C( 20642), -INT16_C( 30915), INT16_C( 23403), INT16_C( 24671), -INT16_C( 18710), -INT16_C( 27083), -INT16_C( 5078) }, { -INT16_C( 2653), -INT16_C( 22140), -INT16_C( 10436), INT16_C( 27658), INT16_C( 12205), -INT16_C( 7804), -INT16_C( 26221), -INT16_C( 2918) } }, { { -INT16_C( 11046), INT16_C( 5263), INT16_C( 14844), -INT16_C( 2416), -INT16_C( 20937), INT16_C( 10242), INT16_C( 2891), -INT16_C( 9946) }, { -INT16_C( 31567), -INT16_C( 4216), -INT16_C( 3316), INT16_C( 27466), INT16_C( 13396), -INT16_C( 30430), INT16_C( 19658), -INT16_C( 23179) }, { -INT16_C( 21306), INT16_C( 524), INT16_C( 5764), INT16_C( 12525), -INT16_C( 3770), -INT16_C( 10094), INT16_C( 11275), -INT16_C( 16562) } }, { { INT16_C( 1057), INT16_C( 7609), INT16_C( 19005), INT16_C( 29715), INT16_C( 5624), INT16_C( 17309), -INT16_C( 15584), -INT16_C( 12004) }, { -INT16_C( 23225), INT16_C( 21440), INT16_C( 2968), -INT16_C( 4929), -INT16_C( 7873), INT16_C( 2677), -INT16_C( 5587), INT16_C( 20143) }, { -INT16_C( 11084), INT16_C( 14525), INT16_C( 10987), INT16_C( 12393), -INT16_C( 1124), INT16_C( 9993), -INT16_C( 10585), INT16_C( 4070) } }, { { INT16_C( 26863), INT16_C( 11371), INT16_C( 32434), -INT16_C( 21599), INT16_C( 16019), -INT16_C( 19474), INT16_C( 2817), INT16_C( 18565) }, { INT16_C( 17840), INT16_C( 18588), INT16_C( 23376), -INT16_C( 28619), -INT16_C( 21956), INT16_C( 27034), INT16_C( 18837), -INT16_C( 31560) }, { INT16_C( 22352), INT16_C( 14980), INT16_C( 27905), -INT16_C( 25109), -INT16_C( 2968), INT16_C( 3780), INT16_C( 10827), -INT16_C( 6497) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_vrhaddq_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); simde_int16x8_t r = simde_vrhaddq_s16(a, b); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrhaddq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { INT32_C( 760845893), -INT32_C( 891314450), -INT32_C( 836422950), INT32_C( 376615154) }, { -INT32_C( 916305564), INT32_C( 325179058), INT32_C( 1510259932), INT32_C( 894450672) }, { -INT32_C( 77729835), -INT32_C( 283067696), INT32_C( 336918491), INT32_C( 635532913) } }, { { -INT32_C( 1067275822), INT32_C( 579486280), INT32_C( 1727049588), -INT32_C( 998481057) }, { INT32_C( 1586355884), -INT32_C( 1871581516), -INT32_C( 1695910486), -INT32_C( 2083571023) }, { INT32_C( 259540031), -INT32_C( 646047618), INT32_C( 15569551), -INT32_C( 1541026040) } }, { { INT32_C( 742601444), -INT32_C( 397488780), -INT32_C( 598851716), INT32_C( 1319160482) }, { INT32_C( 1554787752), -INT32_C( 974381797), INT32_C( 1130354322), -INT32_C( 171561455) }, { INT32_C( 1148694598), -INT32_C( 685935288), INT32_C( 265751303), INT32_C( 573799514) } }, { { -INT32_C( 736032416), INT32_C( 1404858326), INT32_C( 1345260206), INT32_C( 2090782676) }, { INT32_C( 400050940), -INT32_C( 86195097), -INT32_C( 1405273189), -INT32_C( 895417239) }, { -INT32_C( 167990738), INT32_C( 659331615), -INT32_C( 30006491), INT32_C( 597682719) } }, { { -INT32_C( 459357683), -INT32_C( 533243087), INT32_C( 976250469), INT32_C( 834064181) }, { -INT32_C( 2125951207), -INT32_C( 293919661), -INT32_C( 929384353), -INT32_C( 896386116) }, { -INT32_C( 1292654445), -INT32_C( 413581374), INT32_C( 23433058), -INT32_C( 31160967) } }, { { INT32_C( 799945213), -INT32_C( 250616436), -INT32_C( 2144649397), INT32_C( 682746382) }, { -INT32_C( 995493519), INT32_C( 2092114973), -INT32_C( 1723576868), -INT32_C( 2040277111) }, { -INT32_C( 97774153), INT32_C( 920749269), -INT32_C( 1934113132), -INT32_C( 678765364) } }, { { -INT32_C( 1800072952), INT32_C( 1099285750), INT32_C( 314683652), INT32_C( 70939283) }, { -INT32_C( 2000100501), -INT32_C( 469468409), INT32_C( 1383942345), INT32_C( 668524575) }, { -INT32_C( 1900086726), INT32_C( 314908671), INT32_C( 849312999), INT32_C( 369731929) } }, { { -INT32_C( 407138575), INT32_C( 1445478738), -INT32_C( 2056656398), -INT32_C( 964058277) }, { -INT32_C( 1907469945), -INT32_C( 1770892595), -INT32_C( 1159139430), -INT32_C( 1058946865) }, { -INT32_C( 1157304260), -INT32_C( 162706928), -INT32_C( 1607897914), -INT32_C( 1011502571) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_vrhaddq_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); simde_int32x4_t r = simde_vrhaddq_s32(a, b); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrhaddq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(150), UINT8_C(167), UINT8_C(203), UINT8_C(236), UINT8_C(160), UINT8_C( 41), UINT8_C( 47), UINT8_C(101), UINT8_C( 47), UINT8_C( 78), UINT8_C(231), UINT8_C(185), UINT8_C( 8), UINT8_C( 74), UINT8_C( 24), UINT8_C(128) }, { UINT8_C(164), UINT8_C(162), UINT8_C(165), UINT8_C(162), UINT8_C(209), UINT8_C(237), UINT8_C(222), UINT8_C( 42), UINT8_C( 79), UINT8_C( 11), UINT8_C( 3), UINT8_C(234), UINT8_C(238), UINT8_C(233), UINT8_C( 40), UINT8_C(132) }, { UINT8_C(157), UINT8_C(165), UINT8_C(184), UINT8_C(199), UINT8_C(185), UINT8_C(139), UINT8_C(135), UINT8_C( 72), UINT8_C( 63), UINT8_C( 45), UINT8_C(117), UINT8_C(210), UINT8_C(123), UINT8_C(154), UINT8_C( 32), UINT8_C(130) } }, { { UINT8_C(144), UINT8_C(243), UINT8_C(112), UINT8_C( 48), UINT8_C( 29), UINT8_C(159), UINT8_C(150), UINT8_C( 76), UINT8_C(237), UINT8_C(125), UINT8_C( 5), UINT8_C(246), UINT8_C(199), UINT8_C( 29), UINT8_C(118), UINT8_C(107) }, { UINT8_C(191), UINT8_C( 27), UINT8_C( 14), UINT8_C(144), UINT8_C( 8), UINT8_C(236), UINT8_C(187), UINT8_C( 87), UINT8_C(247), UINT8_C(190), UINT8_C( 66), UINT8_C(229), UINT8_C(167), UINT8_C(106), UINT8_C(105), UINT8_C( 56) }, { UINT8_C(168), UINT8_C(135), UINT8_C( 63), UINT8_C( 96), UINT8_C( 19), UINT8_C(198), UINT8_C(169), UINT8_C( 82), UINT8_C(242), UINT8_C(158), UINT8_C( 36), UINT8_C(238), UINT8_C(183), UINT8_C( 68), UINT8_C(112), UINT8_C( 82) } }, { { UINT8_C( 93), UINT8_C(217), UINT8_C(104), UINT8_C(122), UINT8_C(120), UINT8_C(254), UINT8_C(198), UINT8_C(102), UINT8_C(123), UINT8_C(203), UINT8_C( 92), UINT8_C( 66), UINT8_C(232), UINT8_C(210), UINT8_C(174), UINT8_C(167) }, { UINT8_C(237), UINT8_C(188), UINT8_C( 56), UINT8_C(245), UINT8_C(168), UINT8_C(243), UINT8_C( 77), UINT8_C(159), UINT8_C(177), UINT8_C(143), UINT8_C(133), UINT8_C( 89), UINT8_C(249), UINT8_C(238), UINT8_C(145), UINT8_C( 86) }, { UINT8_C(165), UINT8_C(203), UINT8_C( 80), UINT8_C(184), UINT8_C(144), UINT8_C(249), UINT8_C(138), UINT8_C(131), UINT8_C(150), UINT8_C(173), UINT8_C(113), UINT8_C( 78), UINT8_C(241), UINT8_C(224), UINT8_C(160), UINT8_C(127) } }, { { UINT8_C(200), UINT8_C(249), UINT8_C(209), UINT8_C( 64), UINT8_C(248), UINT8_C(151), UINT8_C(166), UINT8_C(115), UINT8_C( 99), UINT8_C( 2), UINT8_C(182), UINT8_C( 75), UINT8_C(212), UINT8_C(100), UINT8_C(243), UINT8_C(193) }, { UINT8_C( 32), UINT8_C( 43), UINT8_C(183), UINT8_C(200), UINT8_C( 30), UINT8_C( 4), UINT8_C(103), UINT8_C(207), UINT8_C(147), UINT8_C(236), UINT8_C( 40), UINT8_C(140), UINT8_C(219), UINT8_C(185), UINT8_C(226), UINT8_C(163) }, { UINT8_C(116), UINT8_C(146), UINT8_C(196), UINT8_C(132), UINT8_C(139), UINT8_C( 78), UINT8_C(135), UINT8_C(161), UINT8_C(123), UINT8_C(119), UINT8_C(111), UINT8_C(108), UINT8_C(216), UINT8_C(143), UINT8_C(235), UINT8_C(178) } }, { { UINT8_C(179), UINT8_C(179), UINT8_C(227), UINT8_C(171), UINT8_C( 75), UINT8_C(138), UINT8_C( 30), UINT8_C(174), UINT8_C(140), UINT8_C(212), UINT8_C(249), UINT8_C( 97), UINT8_C( 56), UINT8_C(236), UINT8_C( 34), UINT8_C( 88) }, { UINT8_C( 23), UINT8_C(217), UINT8_C( 32), UINT8_C( 53), UINT8_C(221), UINT8_C(136), UINT8_C( 5), UINT8_C(112), UINT8_C(116), UINT8_C( 45), UINT8_C(252), UINT8_C( 79), UINT8_C(231), UINT8_C(223), UINT8_C(242), UINT8_C(154) }, { UINT8_C(101), UINT8_C(198), UINT8_C(130), UINT8_C(112), UINT8_C(148), UINT8_C(137), UINT8_C( 18), UINT8_C(143), UINT8_C(128), UINT8_C(129), UINT8_C(251), UINT8_C( 88), UINT8_C(144), UINT8_C(230), UINT8_C(138), UINT8_C(121) } }, { { UINT8_C(146), UINT8_C(214), UINT8_C( 69), UINT8_C(221), UINT8_C( 96), UINT8_C( 99), UINT8_C(139), UINT8_C(236), UINT8_C( 56), UINT8_C(133), UINT8_C( 77), UINT8_C(112), UINT8_C(113), UINT8_C(112), UINT8_C(201), UINT8_C(137) }, { UINT8_C( 73), UINT8_C(233), UINT8_C(190), UINT8_C( 39), UINT8_C(113), UINT8_C(195), UINT8_C(151), UINT8_C(230), UINT8_C(241), UINT8_C(148), UINT8_C( 53), UINT8_C(216), UINT8_C(115), UINT8_C( 40), UINT8_C(114), UINT8_C( 5) }, { UINT8_C(110), UINT8_C(224), UINT8_C(130), UINT8_C(130), UINT8_C(105), UINT8_C(147), UINT8_C(145), UINT8_C(233), UINT8_C(149), UINT8_C(141), UINT8_C( 65), UINT8_C(164), UINT8_C(114), UINT8_C( 76), UINT8_C(158), UINT8_C( 71) } }, { { UINT8_C(254), UINT8_C(183), UINT8_C(227), UINT8_C( 94), UINT8_C( 26), UINT8_C(110), UINT8_C( 74), UINT8_C( 82), UINT8_C(243), UINT8_C(152), UINT8_C(195), UINT8_C(101), UINT8_C( 8), UINT8_C(140), UINT8_C(238), UINT8_C( 81) }, { UINT8_C(117), UINT8_C(172), UINT8_C(120), UINT8_C(231), UINT8_C(112), UINT8_C( 16), UINT8_C(205), UINT8_C( 97), UINT8_C(164), UINT8_C( 2), UINT8_C( 57), UINT8_C( 23), UINT8_C( 42), UINT8_C(171), UINT8_C( 28), UINT8_C( 40) }, { UINT8_C(186), UINT8_C(178), UINT8_C(174), UINT8_C(163), UINT8_C( 69), UINT8_C( 63), UINT8_C(140), UINT8_C( 90), UINT8_C(204), UINT8_C( 77), UINT8_C(126), UINT8_C( 62), UINT8_C( 25), UINT8_C(156), UINT8_C(133), UINT8_C( 61) } }, { { UINT8_C( 98), UINT8_MAX, UINT8_C(134), UINT8_C(124), UINT8_C(110), UINT8_C(209), UINT8_C(207), UINT8_C( 97), UINT8_C(105), UINT8_C(146), UINT8_C(198), UINT8_C(113), UINT8_C( 30), UINT8_C(180), UINT8_C(194), UINT8_C(147) }, { UINT8_C( 97), UINT8_C( 59), UINT8_C(122), UINT8_C(209), UINT8_C( 75), UINT8_C( 71), UINT8_C( 50), UINT8_C(239), UINT8_C( 74), UINT8_C(107), UINT8_C( 6), UINT8_C(116), UINT8_C( 22), UINT8_C( 34), UINT8_C(157), UINT8_C(120) }, { UINT8_C( 98), UINT8_C(157), UINT8_C(128), UINT8_C(167), UINT8_C( 93), UINT8_C(140), UINT8_C(129), UINT8_C(168), UINT8_C( 90), UINT8_C(127), UINT8_C(102), UINT8_C(115), UINT8_C( 26), UINT8_C(107), UINT8_C(176), UINT8_C(134) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vrhaddq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t r = simde_vrhaddq_u8(a, b); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrhaddq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(51215), UINT16_C(32160), UINT16_C(55135), UINT16_C(43104), UINT16_C(32699), UINT16_C(11689), UINT16_C(18157), UINT16_C(40650) }, { UINT16_C(43290), UINT16_C(35791), UINT16_C(10404), UINT16_C(41944), UINT16_C( 2814), UINT16_C(22591), UINT16_C(25777), UINT16_C(49195) }, { UINT16_C(47253), UINT16_C(33976), UINT16_C(32770), UINT16_C(42524), UINT16_C(17757), UINT16_C(17140), UINT16_C(21967), UINT16_C(44923) } }, { { UINT16_C(52012), UINT16_C(35645), UINT16_C(40355), UINT16_C(24115), UINT16_C(56348), UINT16_C( 2443), UINT16_C(21794), UINT16_C(15783) }, { UINT16_C(30718), UINT16_C(41672), UINT16_C(41375), UINT16_C(40262), UINT16_C(34219), UINT16_C(23797), UINT16_C( 8682), UINT16_C( 5660) }, { UINT16_C(41365), UINT16_C(38659), UINT16_C(40865), UINT16_C(32189), UINT16_C(45284), UINT16_C(13120), UINT16_C(15238), UINT16_C(10722) } }, { { UINT16_C(23020), UINT16_C(36770), UINT16_C(54774), UINT16_C( 4845), UINT16_C(30898), UINT16_C(54300), UINT16_C(50125), UINT16_C(52241) }, { UINT16_C(55866), UINT16_C(55662), UINT16_C(46203), UINT16_C( 9847), UINT16_C(27706), UINT16_C( 9347), UINT16_C(40845), UINT16_C(31290) }, { UINT16_C(39443), UINT16_C(46216), UINT16_C(50489), UINT16_C( 7346), UINT16_C(29302), UINT16_C(31824), UINT16_C(45485), UINT16_C(41766) } }, { { UINT16_C(56569), UINT16_C(61193), UINT16_C(63410), UINT16_C(25602), UINT16_C( 7791), UINT16_C(15672), UINT16_C(19169), UINT16_C( 7177) }, { UINT16_C(30500), UINT16_C(40949), UINT16_C(27692), UINT16_C(26309), UINT16_C(18649), UINT16_C(26250), UINT16_C(50408), UINT16_C(57824) }, { UINT16_C(43535), UINT16_C(51071), UINT16_C(45551), UINT16_C(25956), UINT16_C(13220), UINT16_C(20961), UINT16_C(34789), UINT16_C(32501) } }, { { UINT16_C(60065), UINT16_C(21456), UINT16_C(53985), UINT16_C(20663), UINT16_C(61424), UINT16_C(53901), UINT16_C(38457), UINT16_C(24046) }, { UINT16_C(58126), UINT16_C(15100), UINT16_C(49744), UINT16_C(10656), UINT16_C(10762), UINT16_C(62095), UINT16_C(28910), UINT16_C(36819) }, { UINT16_C(59096), UINT16_C(18278), UINT16_C(51865), UINT16_C(15660), UINT16_C(36093), UINT16_C(57998), UINT16_C(33684), UINT16_C(30433) } }, { { UINT16_C(42074), UINT16_C(15330), UINT16_C(39286), UINT16_C(26507), UINT16_C( 6537), UINT16_C(49721), UINT16_C(10159), UINT16_C(48416) }, { UINT16_C( 7178), UINT16_C(23287), UINT16_C(38878), UINT16_C(59779), UINT16_C( 5057), UINT16_C(45275), UINT16_C(44931), UINT16_C(56639) }, { UINT16_C(24626), UINT16_C(19309), UINT16_C(39082), UINT16_C(43143), UINT16_C( 5797), UINT16_C(47498), UINT16_C(27545), UINT16_C(52528) } }, { { UINT16_C( 8787), UINT16_C(51480), UINT16_C(41915), UINT16_C(17456), UINT16_C(27068), UINT16_C(27655), UINT16_C(10128), UINT16_C(39721) }, { UINT16_C( 8515), UINT16_C( 8949), UINT16_C(31160), UINT16_C(31243), UINT16_C(59020), UINT16_C( 3882), UINT16_C(27029), UINT16_C(59628) }, { UINT16_C( 8651), UINT16_C(30215), UINT16_C(36538), UINT16_C(24350), UINT16_C(43044), UINT16_C(15769), UINT16_C(18579), UINT16_C(49675) } }, { { UINT16_C( 1163), UINT16_C(18354), UINT16_C(58023), UINT16_C(25739), UINT16_C(37452), UINT16_C(56528), UINT16_C(63929), UINT16_C(64887) }, { UINT16_C(27930), UINT16_C(54047), UINT16_C(10982), UINT16_C(29261), UINT16_C(30480), UINT16_C(42625), UINT16_C(28128), UINT16_C(27790) }, { UINT16_C(14547), UINT16_C(36201), UINT16_C(34503), UINT16_C(27500), UINT16_C(33966), UINT16_C(49577), UINT16_C(46029), UINT16_C(46339) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vrhaddq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t r = simde_vrhaddq_u16(a, b); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrhaddq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(2123283924), UINT32_C(2831405666), UINT32_C(1820489986), UINT32_C(2642437646) }, { UINT32_C(1224219618), UINT32_C(3851204320), UINT32_C(2309803833), UINT32_C(4223364647) }, { UINT32_C(1673751771), UINT32_C(3341304993), UINT32_C(2065146910), UINT32_C(3432901147) } }, { { UINT32_C(2390378796), UINT32_C( 506871068), UINT32_C(3297425846), UINT32_C( 23137055) }, { UINT32_C( 122312998), UINT32_C(1240258063), UINT32_C(3453130918), UINT32_C( 868781319) }, { UINT32_C(1256345897), UINT32_C( 873564566), UINT32_C(3375278382), UINT32_C( 445959187) } }, { { UINT32_C(4089529047), UINT32_C( 890369919), UINT32_C(3489307824), UINT32_C(3453049767) }, { UINT32_C(3302235061), UINT32_C(2534260977), UINT32_C(1617223513), UINT32_C(1150496109) }, { UINT32_C(3695882054), UINT32_C(1712315448), UINT32_C(2553265669), UINT32_C(2301772938) } }, { { UINT32_C(4013380719), UINT32_C(4230236235), UINT32_C(2345344740), UINT32_C( 794401914) }, { UINT32_C(2851286455), UINT32_C(1195377134), UINT32_C(1302832608), UINT32_C(1100036818) }, { UINT32_C(3432333587), UINT32_C(2712806685), UINT32_C(1824088674), UINT32_C( 947219366) } }, { { UINT32_C(3643852942), UINT32_C(4124398865), UINT32_C(3984695667), UINT32_C(4112308797) }, { UINT32_C(4120776711), UINT32_C(4047298065), UINT32_C(1430250371), UINT32_C(2878853149) }, { UINT32_C(3882314827), UINT32_C(4085848465), UINT32_C(2707473019), UINT32_C(3495580973) } }, { { UINT32_C(2860894105), UINT32_C(2426362396), UINT32_C( 964501755), UINT32_C( 36608762) }, { UINT32_C(3153579178), UINT32_C( 783037610), UINT32_C( 897837847), UINT32_C(1440750268) }, { UINT32_C(3007236642), UINT32_C(1604700003), UINT32_C( 931169801), UINT32_C( 738679515) } }, { { UINT32_C(4278150626), UINT32_C(3146686144), UINT32_C(3119779007), UINT32_C(1354441382) }, { UINT32_C(2567681006), UINT32_C(4274501607), UINT32_C(1597196963), UINT32_C(1202984037) }, { UINT32_C(3422915816), UINT32_C(3710593876), UINT32_C(2358487985), UINT32_C(1278712710) } }, { { UINT32_C( 960869241), UINT32_C( 284546129), UINT32_C(2261445088), UINT32_C(4208362764) }, { UINT32_C( 529785144), UINT32_C( 991845016), UINT32_C( 177885605), UINT32_C(3746647653) }, { UINT32_C( 745327193), UINT32_C( 638195573), UINT32_C(1219665347), UINT32_C(3977505209) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vrhaddq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t r = simde_vrhaddq_u32(a, b); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vrhadd_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vrhadd_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vrhadd_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vrhadd_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vrhadd_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vrhadd_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vrhaddq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vrhaddq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vrhaddq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vrhaddq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vrhaddq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vrhaddq_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/rnd.c000066400000000000000000000236711400333146700162570ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN rnd #include "test-neon.h" #include "../../../simde/arm/neon/rnd.h" static int test_simde_vrnd_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; simde_float32 r[2]; } test_vec[] = { #if defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NANF, -SIMDE_MATH_NANF }, { SIMDE_MATH_NANF, -SIMDE_MATH_NANF } }, #endif { { SIMDE_FLOAT32_C( -1.50), SIMDE_FLOAT32_C( 1.50) }, { SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00) } }, { { SIMDE_FLOAT32_C( -2.50), SIMDE_FLOAT32_C( 2.50) }, { SIMDE_FLOAT32_C( -2.00), SIMDE_FLOAT32_C( 2.00) } }, { { SIMDE_FLOAT32_C( 782.33), SIMDE_FLOAT32_C( 23.83) }, { SIMDE_FLOAT32_C( 782.00), SIMDE_FLOAT32_C( 23.00) } }, { { SIMDE_FLOAT32_C( -231.98), SIMDE_FLOAT32_C( -121.26) }, { SIMDE_FLOAT32_C( -231.00), SIMDE_FLOAT32_C( -121.00) } }, { { SIMDE_FLOAT32_C( 524.61), SIMDE_FLOAT32_C( 500.02) }, { SIMDE_FLOAT32_C( 524.00), SIMDE_FLOAT32_C( 500.00) } }, { { SIMDE_FLOAT32_C( 80.15), SIMDE_FLOAT32_C( 517.44) }, { SIMDE_FLOAT32_C( 80.00), SIMDE_FLOAT32_C( 517.00) } }, { { SIMDE_FLOAT32_C( -754.87), SIMDE_FLOAT32_C( 128.37) }, { SIMDE_FLOAT32_C( -754.00), SIMDE_FLOAT32_C( 128.00) } }, { { SIMDE_FLOAT32_C( 182.53), SIMDE_FLOAT32_C( 136.96) }, { SIMDE_FLOAT32_C( 182.00), SIMDE_FLOAT32_C( 136.00) } }, { { SIMDE_FLOAT32_C( 605.41), SIMDE_FLOAT32_C( -833.56) }, { SIMDE_FLOAT32_C( 605.00), SIMDE_FLOAT32_C( -833.00) } }, { { SIMDE_FLOAT32_C( 774.26), SIMDE_FLOAT32_C( -578.69) }, { SIMDE_FLOAT32_C( 774.00), SIMDE_FLOAT32_C( -578.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t r = simde_vrnd_f32(a); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vrnd_f32(a); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrnd_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[1]; simde_float64 r[1]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NAN }, { SIMDE_MATH_NAN } }, { { -SIMDE_MATH_NAN }, { -SIMDE_MATH_NAN } }, #endif { { SIMDE_FLOAT64_C( -1.50) }, { SIMDE_FLOAT64_C( -1.00) } }, { { SIMDE_FLOAT64_C( 1.50) }, { SIMDE_FLOAT64_C( 1.00) } }, { { SIMDE_FLOAT64_C( -2.50) }, { SIMDE_FLOAT64_C( -2.00) } }, { { SIMDE_FLOAT64_C( 2.50) }, { SIMDE_FLOAT64_C( 2.00) } }, { { SIMDE_FLOAT64_C( 667.17) }, { SIMDE_FLOAT64_C( 667.00) } }, { { SIMDE_FLOAT64_C( 472.88) }, { SIMDE_FLOAT64_C( 472.00) } }, { { SIMDE_FLOAT64_C( 161.95) }, { SIMDE_FLOAT64_C( 161.00) } }, { { SIMDE_FLOAT64_C( -277.95) }, { SIMDE_FLOAT64_C( -277.00) } }, { { SIMDE_FLOAT64_C( 876.07) }, { SIMDE_FLOAT64_C( 876.00) } }, { { SIMDE_FLOAT64_C( 151.96) }, { SIMDE_FLOAT64_C( 151.00) } }, { { SIMDE_FLOAT64_C( -135.07) }, { SIMDE_FLOAT64_C( -135.00) } }, { { SIMDE_FLOAT64_C( -815.34) }, { SIMDE_FLOAT64_C( -815.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_t r = simde_vrnd_f64(a); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x1_t a = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_float64x1_t r = simde_vrnd_f64(a); simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrndq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; simde_float32 r[4]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NANF, -SIMDE_MATH_NANF, SIMDE_MATH_NANF, -SIMDE_MATH_NANF }, { SIMDE_MATH_NANF, -SIMDE_MATH_NANF, SIMDE_MATH_NANF, -SIMDE_MATH_NANF } }, #endif { { SIMDE_FLOAT32_C( -1.50), SIMDE_FLOAT32_C( 1.50), SIMDE_FLOAT32_C( -2.50), SIMDE_FLOAT32_C( 2.50) }, { SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -2.00), SIMDE_FLOAT32_C( 2.00) } }, { { SIMDE_FLOAT32_C( -722.64), SIMDE_FLOAT32_C( 549.67), SIMDE_FLOAT32_C( 360.83), SIMDE_FLOAT32_C( 702.11) }, { SIMDE_FLOAT32_C( -722.00), SIMDE_FLOAT32_C( 549.00), SIMDE_FLOAT32_C( 360.00), SIMDE_FLOAT32_C( 702.00) } }, { { SIMDE_FLOAT32_C( 923.48), SIMDE_FLOAT32_C( 285.32), SIMDE_FLOAT32_C( 55.43), SIMDE_FLOAT32_C( 705.81) }, { SIMDE_FLOAT32_C( 923.00), SIMDE_FLOAT32_C( 285.00), SIMDE_FLOAT32_C( 55.00), SIMDE_FLOAT32_C( 705.00) } }, { { SIMDE_FLOAT32_C( -690.85), SIMDE_FLOAT32_C( 823.44), SIMDE_FLOAT32_C( -415.44), SIMDE_FLOAT32_C( 833.76) }, { SIMDE_FLOAT32_C( -690.00), SIMDE_FLOAT32_C( 823.00), SIMDE_FLOAT32_C( -415.00), SIMDE_FLOAT32_C( 833.00) } }, { { SIMDE_FLOAT32_C( 323.46), SIMDE_FLOAT32_C( 664.70), SIMDE_FLOAT32_C( 351.21), SIMDE_FLOAT32_C( 568.59) }, { SIMDE_FLOAT32_C( 323.00), SIMDE_FLOAT32_C( 664.00), SIMDE_FLOAT32_C( 351.00), SIMDE_FLOAT32_C( 568.00) } }, { { SIMDE_FLOAT32_C( -206.93), SIMDE_FLOAT32_C( -466.27), SIMDE_FLOAT32_C( -294.45), SIMDE_FLOAT32_C( -601.52) }, { SIMDE_FLOAT32_C( -206.00), SIMDE_FLOAT32_C( -466.00), SIMDE_FLOAT32_C( -294.00), SIMDE_FLOAT32_C( -601.00) } }, { { SIMDE_FLOAT32_C( -299.83), SIMDE_FLOAT32_C( -520.19), SIMDE_FLOAT32_C( -180.21), SIMDE_FLOAT32_C( -632.66) }, { SIMDE_FLOAT32_C( -299.00), SIMDE_FLOAT32_C( -520.00), SIMDE_FLOAT32_C( -180.00), SIMDE_FLOAT32_C( -632.00) } }, { { SIMDE_FLOAT32_C( 952.69), SIMDE_FLOAT32_C( 981.74), SIMDE_FLOAT32_C( 89.39), SIMDE_FLOAT32_C( 828.76) }, { SIMDE_FLOAT32_C( 952.00), SIMDE_FLOAT32_C( 981.00), SIMDE_FLOAT32_C( 89.00), SIMDE_FLOAT32_C( 828.00) } }, { { SIMDE_FLOAT32_C( 133.70), SIMDE_FLOAT32_C( 954.32), SIMDE_FLOAT32_C( -986.58), SIMDE_FLOAT32_C( 411.06) }, { SIMDE_FLOAT32_C( 133.00), SIMDE_FLOAT32_C( 954.00), SIMDE_FLOAT32_C( -986.00), SIMDE_FLOAT32_C( 411.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t r = simde_vrndq_f32(a); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vrndq_f32(a); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrndq_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[2]; simde_float64 r[2]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NAN, -SIMDE_MATH_NAN }, { SIMDE_MATH_NAN, -SIMDE_MATH_NAN } }, #endif { { SIMDE_FLOAT64_C( -1.50), SIMDE_FLOAT64_C( 1.50) }, { SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 1.00) } }, { { SIMDE_FLOAT64_C( -2.50), SIMDE_FLOAT64_C( 2.50) }, { SIMDE_FLOAT64_C( -2.00), SIMDE_FLOAT64_C( 2.00) } }, { { SIMDE_FLOAT64_C( 503.99), SIMDE_FLOAT64_C( 374.26) }, { SIMDE_FLOAT64_C( 503.00), SIMDE_FLOAT64_C( 374.00) } }, { { SIMDE_FLOAT64_C( 113.17), SIMDE_FLOAT64_C( 427.47) }, { SIMDE_FLOAT64_C( 113.00), SIMDE_FLOAT64_C( 427.00) } }, { { SIMDE_FLOAT64_C( -340.42), SIMDE_FLOAT64_C( -831.40) }, { SIMDE_FLOAT64_C( -340.00), SIMDE_FLOAT64_C( -831.00) } }, { { SIMDE_FLOAT64_C( 133.28), SIMDE_FLOAT64_C( -31.27) }, { SIMDE_FLOAT64_C( 133.00), SIMDE_FLOAT64_C( -31.00) } }, { { SIMDE_FLOAT64_C( 992.04), SIMDE_FLOAT64_C( 717.84) }, { SIMDE_FLOAT64_C( 992.00), SIMDE_FLOAT64_C( 717.00) } }, { { SIMDE_FLOAT64_C( -197.51), SIMDE_FLOAT64_C( 315.50) }, { SIMDE_FLOAT64_C( -197.00), SIMDE_FLOAT64_C( 315.00) } }, { { SIMDE_FLOAT64_C( 382.54), SIMDE_FLOAT64_C( -846.31) }, { SIMDE_FLOAT64_C( 382.00), SIMDE_FLOAT64_C( -846.00) } }, { { SIMDE_FLOAT64_C( -115.91), SIMDE_FLOAT64_C( -824.39) }, { SIMDE_FLOAT64_C( -115.00), SIMDE_FLOAT64_C( -824.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t r = simde_vrndq_f64(a); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x2_t a = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); simde_float64x2_t r = simde_vrndq_f64(a); simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vrnd_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vrnd_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vrndq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vrndq_f64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/rndi.c000066400000000000000000000237171400333146700164310ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN rndi #include "test-neon.h" #include "../../../simde/arm/neon/rndi.h" static int test_simde_vrndi_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; simde_float32 r[2]; } test_vec[] = { #if defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NANF, -SIMDE_MATH_NANF }, { SIMDE_MATH_NANF, -SIMDE_MATH_NANF } }, #endif { { SIMDE_FLOAT32_C( -1.50), SIMDE_FLOAT32_C( 1.50) }, { SIMDE_FLOAT32_C( -2.00), SIMDE_FLOAT32_C( 2.00) } }, { { SIMDE_FLOAT32_C( -2.50), SIMDE_FLOAT32_C( 2.50) }, { SIMDE_FLOAT32_C( -2.00), SIMDE_FLOAT32_C( 2.00) } }, { { SIMDE_FLOAT32_C( -787.65), SIMDE_FLOAT32_C( -795.95) }, { SIMDE_FLOAT32_C( -788.00), SIMDE_FLOAT32_C( -796.00) } }, { { SIMDE_FLOAT32_C( 899.31), SIMDE_FLOAT32_C( -93.42) }, { SIMDE_FLOAT32_C( 899.00), SIMDE_FLOAT32_C( -93.00) } }, { { SIMDE_FLOAT32_C( -436.50), SIMDE_FLOAT32_C( -165.94) }, { SIMDE_FLOAT32_C( -436.00), SIMDE_FLOAT32_C( -166.00) } }, { { SIMDE_FLOAT32_C( -516.13), SIMDE_FLOAT32_C( -288.52) }, { SIMDE_FLOAT32_C( -516.00), SIMDE_FLOAT32_C( -289.00) } }, { { SIMDE_FLOAT32_C( -568.31), SIMDE_FLOAT32_C( -937.97) }, { SIMDE_FLOAT32_C( -568.00), SIMDE_FLOAT32_C( -938.00) } }, { { SIMDE_FLOAT32_C( 827.64), SIMDE_FLOAT32_C( 984.63) }, { SIMDE_FLOAT32_C( 828.00), SIMDE_FLOAT32_C( 985.00) } }, { { SIMDE_FLOAT32_C( 261.25), SIMDE_FLOAT32_C( -11.30) }, { SIMDE_FLOAT32_C( 261.00), SIMDE_FLOAT32_C( -11.00) } }, { { SIMDE_FLOAT32_C( 97.38), SIMDE_FLOAT32_C( -824.40) }, { SIMDE_FLOAT32_C( 97.00), SIMDE_FLOAT32_C( -824.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t r = simde_vrndi_f32(a); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vrndi_f32(a); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrndi_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[1]; simde_float64 r[1]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NAN }, { SIMDE_MATH_NAN } }, { { -SIMDE_MATH_NAN }, { -SIMDE_MATH_NAN } }, #endif { { SIMDE_FLOAT64_C( -1.50) }, { SIMDE_FLOAT64_C( -2.00) } }, { { SIMDE_FLOAT64_C( 1.50) }, { SIMDE_FLOAT64_C( 2.00) } }, { { SIMDE_FLOAT64_C( -2.50) }, { SIMDE_FLOAT64_C( -2.00) } }, { { SIMDE_FLOAT64_C( 2.50) }, { SIMDE_FLOAT64_C( 2.00) } }, { { SIMDE_FLOAT64_C( -405.63) }, { SIMDE_FLOAT64_C( -406.00) } }, { { SIMDE_FLOAT64_C( 554.36) }, { SIMDE_FLOAT64_C( 554.00) } }, { { SIMDE_FLOAT64_C( -286.09) }, { SIMDE_FLOAT64_C( -286.00) } }, { { SIMDE_FLOAT64_C( -583.84) }, { SIMDE_FLOAT64_C( -584.00) } }, { { SIMDE_FLOAT64_C( -389.59) }, { SIMDE_FLOAT64_C( -390.00) } }, { { SIMDE_FLOAT64_C( -683.98) }, { SIMDE_FLOAT64_C( -684.00) } }, { { SIMDE_FLOAT64_C( 628.93) }, { SIMDE_FLOAT64_C( 629.00) } }, { { SIMDE_FLOAT64_C( -112.86) }, { SIMDE_FLOAT64_C( -113.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_t r = simde_vrndi_f64(a); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x1_t a = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_float64x1_t r = simde_vrndi_f64(a); simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrndiq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; simde_float32 r[4]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NANF, -SIMDE_MATH_NANF, SIMDE_MATH_NANF, -SIMDE_MATH_NANF }, { SIMDE_MATH_NANF, -SIMDE_MATH_NANF, SIMDE_MATH_NANF, -SIMDE_MATH_NANF } }, #endif { { SIMDE_FLOAT32_C( -1.50), SIMDE_FLOAT32_C( 1.50), SIMDE_FLOAT32_C( -2.50), SIMDE_FLOAT32_C( 2.50) }, { SIMDE_FLOAT32_C( -2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( -2.00), SIMDE_FLOAT32_C( 2.00) } }, { { SIMDE_FLOAT32_C( -938.67), SIMDE_FLOAT32_C( -583.30), SIMDE_FLOAT32_C( -219.07), SIMDE_FLOAT32_C( 510.59) }, { SIMDE_FLOAT32_C( -939.00), SIMDE_FLOAT32_C( -583.00), SIMDE_FLOAT32_C( -219.00), SIMDE_FLOAT32_C( 511.00) } }, { { SIMDE_FLOAT32_C( -715.91), SIMDE_FLOAT32_C( -372.75), SIMDE_FLOAT32_C( -712.38), SIMDE_FLOAT32_C( -503.56) }, { SIMDE_FLOAT32_C( -716.00), SIMDE_FLOAT32_C( -373.00), SIMDE_FLOAT32_C( -712.00), SIMDE_FLOAT32_C( -504.00) } }, { { SIMDE_FLOAT32_C( -168.71), SIMDE_FLOAT32_C( -813.07), SIMDE_FLOAT32_C( 403.02), SIMDE_FLOAT32_C( 394.80) }, { SIMDE_FLOAT32_C( -169.00), SIMDE_FLOAT32_C( -813.00), SIMDE_FLOAT32_C( 403.00), SIMDE_FLOAT32_C( 395.00) } }, { { SIMDE_FLOAT32_C( 21.00), SIMDE_FLOAT32_C( 886.89), SIMDE_FLOAT32_C( -893.72), SIMDE_FLOAT32_C( 452.69) }, { SIMDE_FLOAT32_C( 21.00), SIMDE_FLOAT32_C( 887.00), SIMDE_FLOAT32_C( -894.00), SIMDE_FLOAT32_C( 453.00) } }, { { SIMDE_FLOAT32_C( 948.91), SIMDE_FLOAT32_C( 933.92), SIMDE_FLOAT32_C( 437.32), SIMDE_FLOAT32_C( 210.16) }, { SIMDE_FLOAT32_C( 949.00), SIMDE_FLOAT32_C( 934.00), SIMDE_FLOAT32_C( 437.00), SIMDE_FLOAT32_C( 210.00) } }, { { SIMDE_FLOAT32_C( -77.38), SIMDE_FLOAT32_C( -465.30), SIMDE_FLOAT32_C( 385.77), SIMDE_FLOAT32_C( 516.99) }, { SIMDE_FLOAT32_C( -77.00), SIMDE_FLOAT32_C( -465.00), SIMDE_FLOAT32_C( 386.00), SIMDE_FLOAT32_C( 517.00) } }, { { SIMDE_FLOAT32_C( -910.94), SIMDE_FLOAT32_C( -900.33), SIMDE_FLOAT32_C( 933.15), SIMDE_FLOAT32_C( -300.52) }, { SIMDE_FLOAT32_C( -911.00), SIMDE_FLOAT32_C( -900.00), SIMDE_FLOAT32_C( 933.00), SIMDE_FLOAT32_C( -301.00) } }, { { SIMDE_FLOAT32_C( -584.31), SIMDE_FLOAT32_C( 562.08), SIMDE_FLOAT32_C( 586.62), SIMDE_FLOAT32_C( -522.98) }, { SIMDE_FLOAT32_C( -584.00), SIMDE_FLOAT32_C( 562.00), SIMDE_FLOAT32_C( 587.00), SIMDE_FLOAT32_C( -523.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t r = simde_vrndiq_f32(a); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vrndiq_f32(a); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrndiq_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[2]; simde_float64 r[2]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NAN, -SIMDE_MATH_NAN }, { SIMDE_MATH_NAN, -SIMDE_MATH_NAN } }, #endif { { SIMDE_FLOAT64_C( -1.50), SIMDE_FLOAT64_C( 1.50) }, { SIMDE_FLOAT64_C( -2.00), SIMDE_FLOAT64_C( 2.00) } }, { { SIMDE_FLOAT64_C( -2.50), SIMDE_FLOAT64_C( 2.50) }, { SIMDE_FLOAT64_C( -2.00), SIMDE_FLOAT64_C( 2.00) } }, { { SIMDE_FLOAT64_C( 978.78), SIMDE_FLOAT64_C( -632.45) }, { SIMDE_FLOAT64_C( 979.00), SIMDE_FLOAT64_C( -632.00) } }, { { SIMDE_FLOAT64_C( 987.61), SIMDE_FLOAT64_C( -737.13) }, { SIMDE_FLOAT64_C( 988.00), SIMDE_FLOAT64_C( -737.00) } }, { { SIMDE_FLOAT64_C( -5.20), SIMDE_FLOAT64_C( -724.77) }, { SIMDE_FLOAT64_C( -5.00), SIMDE_FLOAT64_C( -725.00) } }, { { SIMDE_FLOAT64_C( -240.69), SIMDE_FLOAT64_C( 826.09) }, { SIMDE_FLOAT64_C( -241.00), SIMDE_FLOAT64_C( 826.00) } }, { { SIMDE_FLOAT64_C( -537.84), SIMDE_FLOAT64_C( -837.67) }, { SIMDE_FLOAT64_C( -538.00), SIMDE_FLOAT64_C( -838.00) } }, { { SIMDE_FLOAT64_C( 220.89), SIMDE_FLOAT64_C( 483.16) }, { SIMDE_FLOAT64_C( 221.00), SIMDE_FLOAT64_C( 483.00) } }, { { SIMDE_FLOAT64_C( -950.78), SIMDE_FLOAT64_C( 327.17) }, { SIMDE_FLOAT64_C( -951.00), SIMDE_FLOAT64_C( 327.00) } }, { { SIMDE_FLOAT64_C( -64.15), SIMDE_FLOAT64_C( 998.14) }, { SIMDE_FLOAT64_C( -64.00), SIMDE_FLOAT64_C( 998.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t r = simde_vrndiq_f64(a); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x2_t a = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); simde_float64x2_t r = simde_vrndiq_f64(a); simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vrndi_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vrndi_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vrndiq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vrndiq_f64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/rndm.c000066400000000000000000000237171400333146700164350ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN rndm #include "test-neon.h" #include "../../../simde/arm/neon/rndm.h" static int test_simde_vrndm_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; simde_float32 r[2]; } test_vec[] = { #if defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NANF, -SIMDE_MATH_NANF }, { SIMDE_MATH_NANF, -SIMDE_MATH_NANF } }, #endif { { SIMDE_FLOAT32_C( -1.50), SIMDE_FLOAT32_C( 1.50) }, { SIMDE_FLOAT32_C( -2.00), SIMDE_FLOAT32_C( 1.00) } }, { { SIMDE_FLOAT32_C( -2.50), SIMDE_FLOAT32_C( 2.50) }, { SIMDE_FLOAT32_C( -3.00), SIMDE_FLOAT32_C( 2.00) } }, { { SIMDE_FLOAT32_C( -897.30), SIMDE_FLOAT32_C( 351.51) }, { SIMDE_FLOAT32_C( -898.00), SIMDE_FLOAT32_C( 351.00) } }, { { SIMDE_FLOAT32_C( -396.24), SIMDE_FLOAT32_C( -136.90) }, { SIMDE_FLOAT32_C( -397.00), SIMDE_FLOAT32_C( -137.00) } }, { { SIMDE_FLOAT32_C( -966.64), SIMDE_FLOAT32_C( 805.58) }, { SIMDE_FLOAT32_C( -967.00), SIMDE_FLOAT32_C( 805.00) } }, { { SIMDE_FLOAT32_C( 848.81), SIMDE_FLOAT32_C( -910.27) }, { SIMDE_FLOAT32_C( 848.00), SIMDE_FLOAT32_C( -911.00) } }, { { SIMDE_FLOAT32_C( -262.75), SIMDE_FLOAT32_C( 779.23) }, { SIMDE_FLOAT32_C( -263.00), SIMDE_FLOAT32_C( 779.00) } }, { { SIMDE_FLOAT32_C( 824.19), SIMDE_FLOAT32_C( -986.07) }, { SIMDE_FLOAT32_C( 824.00), SIMDE_FLOAT32_C( -987.00) } }, { { SIMDE_FLOAT32_C( 272.13), SIMDE_FLOAT32_C( 812.56) }, { SIMDE_FLOAT32_C( 272.00), SIMDE_FLOAT32_C( 812.00) } }, { { SIMDE_FLOAT32_C( -763.50), SIMDE_FLOAT32_C( 477.59) }, { SIMDE_FLOAT32_C( -764.00), SIMDE_FLOAT32_C( 477.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t r = simde_vrndm_f32(a); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vrndm_f32(a); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrndm_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[1]; simde_float64 r[1]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NAN }, { SIMDE_MATH_NAN } }, { { -SIMDE_MATH_NAN }, { -SIMDE_MATH_NAN } }, #endif { { SIMDE_FLOAT64_C( -1.50) }, { SIMDE_FLOAT64_C( -2.00) } }, { { SIMDE_FLOAT64_C( 1.50) }, { SIMDE_FLOAT64_C( 1.00) } }, { { SIMDE_FLOAT64_C( -2.50) }, { SIMDE_FLOAT64_C( -3.00) } }, { { SIMDE_FLOAT64_C( 2.50) }, { SIMDE_FLOAT64_C( 2.00) } }, { { SIMDE_FLOAT64_C( 333.88) }, { SIMDE_FLOAT64_C( 333.00) } }, { { SIMDE_FLOAT64_C( 629.40) }, { SIMDE_FLOAT64_C( 629.00) } }, { { SIMDE_FLOAT64_C( -124.31) }, { SIMDE_FLOAT64_C( -125.00) } }, { { SIMDE_FLOAT64_C( 133.65) }, { SIMDE_FLOAT64_C( 133.00) } }, { { SIMDE_FLOAT64_C( -307.19) }, { SIMDE_FLOAT64_C( -308.00) } }, { { SIMDE_FLOAT64_C( 596.65) }, { SIMDE_FLOAT64_C( 596.00) } }, { { SIMDE_FLOAT64_C( 827.64) }, { SIMDE_FLOAT64_C( 827.00) } }, { { SIMDE_FLOAT64_C( 250.89) }, { SIMDE_FLOAT64_C( 250.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_t r = simde_vrndm_f64(a); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x1_t a = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_float64x1_t r = simde_vrndm_f64(a); simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrndmq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; simde_float32 r[4]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NANF, -SIMDE_MATH_NANF, SIMDE_MATH_NANF, -SIMDE_MATH_NANF }, { SIMDE_MATH_NANF, -SIMDE_MATH_NANF, SIMDE_MATH_NANF, -SIMDE_MATH_NANF } }, #endif { { SIMDE_FLOAT32_C( -1.50), SIMDE_FLOAT32_C( 1.50), SIMDE_FLOAT32_C( -2.50), SIMDE_FLOAT32_C( 2.50) }, { SIMDE_FLOAT32_C( -2.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -3.00), SIMDE_FLOAT32_C( 2.00) } }, { { SIMDE_FLOAT32_C( 744.58), SIMDE_FLOAT32_C( -175.23), SIMDE_FLOAT32_C( -591.29), SIMDE_FLOAT32_C( 759.75) }, { SIMDE_FLOAT32_C( 744.00), SIMDE_FLOAT32_C( -176.00), SIMDE_FLOAT32_C( -592.00), SIMDE_FLOAT32_C( 759.00) } }, { { SIMDE_FLOAT32_C( 273.17), SIMDE_FLOAT32_C( 118.54), SIMDE_FLOAT32_C( -744.67), SIMDE_FLOAT32_C( 375.86) }, { SIMDE_FLOAT32_C( 273.00), SIMDE_FLOAT32_C( 118.00), SIMDE_FLOAT32_C( -745.00), SIMDE_FLOAT32_C( 375.00) } }, { { SIMDE_FLOAT32_C( -529.96), SIMDE_FLOAT32_C( -140.92), SIMDE_FLOAT32_C( -761.03), SIMDE_FLOAT32_C( -496.59) }, { SIMDE_FLOAT32_C( -530.00), SIMDE_FLOAT32_C( -141.00), SIMDE_FLOAT32_C( -762.00), SIMDE_FLOAT32_C( -497.00) } }, { { SIMDE_FLOAT32_C( -335.34), SIMDE_FLOAT32_C( -912.22), SIMDE_FLOAT32_C( -406.86), SIMDE_FLOAT32_C( 401.91) }, { SIMDE_FLOAT32_C( -336.00), SIMDE_FLOAT32_C( -913.00), SIMDE_FLOAT32_C( -407.00), SIMDE_FLOAT32_C( 401.00) } }, { { SIMDE_FLOAT32_C( 867.01), SIMDE_FLOAT32_C( -582.67), SIMDE_FLOAT32_C( 415.83), SIMDE_FLOAT32_C( 139.14) }, { SIMDE_FLOAT32_C( 867.00), SIMDE_FLOAT32_C( -583.00), SIMDE_FLOAT32_C( 415.00), SIMDE_FLOAT32_C( 139.00) } }, { { SIMDE_FLOAT32_C( -770.11), SIMDE_FLOAT32_C( 652.33), SIMDE_FLOAT32_C( -383.28), SIMDE_FLOAT32_C( 563.77) }, { SIMDE_FLOAT32_C( -771.00), SIMDE_FLOAT32_C( 652.00), SIMDE_FLOAT32_C( -384.00), SIMDE_FLOAT32_C( 563.00) } }, { { SIMDE_FLOAT32_C( 281.73), SIMDE_FLOAT32_C( 492.41), SIMDE_FLOAT32_C( -302.57), SIMDE_FLOAT32_C( 974.54) }, { SIMDE_FLOAT32_C( 281.00), SIMDE_FLOAT32_C( 492.00), SIMDE_FLOAT32_C( -303.00), SIMDE_FLOAT32_C( 974.00) } }, { { SIMDE_FLOAT32_C( 89.06), SIMDE_FLOAT32_C( -474.93), SIMDE_FLOAT32_C( 225.42), SIMDE_FLOAT32_C( -166.36) }, { SIMDE_FLOAT32_C( 89.00), SIMDE_FLOAT32_C( -475.00), SIMDE_FLOAT32_C( 225.00), SIMDE_FLOAT32_C( -167.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t r = simde_vrndmq_f32(a); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vrndmq_f32(a); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrndmq_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[2]; simde_float64 r[2]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NAN, -SIMDE_MATH_NAN }, { SIMDE_MATH_NAN, -SIMDE_MATH_NAN } }, #endif { { SIMDE_FLOAT64_C( -1.50), SIMDE_FLOAT64_C( 1.50) }, { SIMDE_FLOAT64_C( -2.00), SIMDE_FLOAT64_C( 1.00) } }, { { SIMDE_FLOAT64_C( -2.50), SIMDE_FLOAT64_C( 2.50) }, { SIMDE_FLOAT64_C( -3.00), SIMDE_FLOAT64_C( 2.00) } }, { { SIMDE_FLOAT64_C( 349.83), SIMDE_FLOAT64_C( 634.13) }, { SIMDE_FLOAT64_C( 349.00), SIMDE_FLOAT64_C( 634.00) } }, { { SIMDE_FLOAT64_C( -406.61), SIMDE_FLOAT64_C( -377.00) }, { SIMDE_FLOAT64_C( -407.00), SIMDE_FLOAT64_C( -377.00) } }, { { SIMDE_FLOAT64_C( -247.33), SIMDE_FLOAT64_C( -151.28) }, { SIMDE_FLOAT64_C( -248.00), SIMDE_FLOAT64_C( -152.00) } }, { { SIMDE_FLOAT64_C( 998.86), SIMDE_FLOAT64_C( 222.71) }, { SIMDE_FLOAT64_C( 998.00), SIMDE_FLOAT64_C( 222.00) } }, { { SIMDE_FLOAT64_C( 707.80), SIMDE_FLOAT64_C( -762.17) }, { SIMDE_FLOAT64_C( 707.00), SIMDE_FLOAT64_C( -763.00) } }, { { SIMDE_FLOAT64_C( 726.12), SIMDE_FLOAT64_C( -627.54) }, { SIMDE_FLOAT64_C( 726.00), SIMDE_FLOAT64_C( -628.00) } }, { { SIMDE_FLOAT64_C( -674.40), SIMDE_FLOAT64_C( -680.74) }, { SIMDE_FLOAT64_C( -675.00), SIMDE_FLOAT64_C( -681.00) } }, { { SIMDE_FLOAT64_C( 774.37), SIMDE_FLOAT64_C( -807.39) }, { SIMDE_FLOAT64_C( 774.00), SIMDE_FLOAT64_C( -808.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t r = simde_vrndmq_f64(a); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x2_t a = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); simde_float64x2_t r = simde_vrndmq_f64(a); simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vrndm_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vrndm_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vrndmq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vrndmq_f64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/rndn.c000066400000000000000000000237131400333146700164320ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN rndn #include "test-neon.h" #include "../../../simde/arm/neon/rndn.h" static int test_simde_vrndn_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; simde_float32 r[2]; } test_vec[] = { #if defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NANF, -SIMDE_MATH_NANF }, { SIMDE_MATH_NANF, -SIMDE_MATH_NANF } }, #endif { { SIMDE_FLOAT32_C( -1.50), SIMDE_FLOAT32_C( 1.50) }, { SIMDE_FLOAT32_C( -2.00), SIMDE_FLOAT32_C( 2.00) } }, { { SIMDE_FLOAT32_C( -2.50), SIMDE_FLOAT32_C( 2.50) }, { SIMDE_FLOAT32_C( -2.00), SIMDE_FLOAT32_C( 2.00) } }, { { SIMDE_FLOAT32_C( -593.90), SIMDE_FLOAT32_C( 196.84) }, { SIMDE_FLOAT32_C( -594.00), SIMDE_FLOAT32_C( 197.00) } }, { { SIMDE_FLOAT32_C( 569.79), SIMDE_FLOAT32_C( 336.27) }, { SIMDE_FLOAT32_C( 570.00), SIMDE_FLOAT32_C( 336.00) } }, { { SIMDE_FLOAT32_C( -670.11), SIMDE_FLOAT32_C( 299.96) }, { SIMDE_FLOAT32_C( -670.00), SIMDE_FLOAT32_C( 300.00) } }, { { SIMDE_FLOAT32_C( -4.27), SIMDE_FLOAT32_C( -333.31) }, { SIMDE_FLOAT32_C( -4.00), SIMDE_FLOAT32_C( -333.00) } }, { { SIMDE_FLOAT32_C( -389.20), SIMDE_FLOAT32_C( 338.21) }, { SIMDE_FLOAT32_C( -389.00), SIMDE_FLOAT32_C( 338.00) } }, { { SIMDE_FLOAT32_C( 172.22), SIMDE_FLOAT32_C( 764.71) }, { SIMDE_FLOAT32_C( 172.00), SIMDE_FLOAT32_C( 765.00) } }, { { SIMDE_FLOAT32_C( 789.38), SIMDE_FLOAT32_C( -740.62) }, { SIMDE_FLOAT32_C( 789.00), SIMDE_FLOAT32_C( -741.00) } }, { { SIMDE_FLOAT32_C( 713.87), SIMDE_FLOAT32_C( -75.96) }, { SIMDE_FLOAT32_C( 714.00), SIMDE_FLOAT32_C( -76.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t r = simde_vrndn_f32(a); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vrndn_f32(a); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrndn_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[1]; simde_float64 r[1]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NAN }, { SIMDE_MATH_NAN } }, { { -SIMDE_MATH_NAN }, { -SIMDE_MATH_NAN } }, #endif { { SIMDE_FLOAT64_C( -1.50) }, { SIMDE_FLOAT64_C( -2.00) } }, { { SIMDE_FLOAT64_C( 1.50) }, { SIMDE_FLOAT64_C( 2.00) } }, { { SIMDE_FLOAT64_C( -2.50) }, { SIMDE_FLOAT64_C( -2.00) } }, { { SIMDE_FLOAT64_C( 2.50) }, { SIMDE_FLOAT64_C( 2.00) } }, { { SIMDE_FLOAT64_C( 956.89) }, { SIMDE_FLOAT64_C( 957.00) } }, { { SIMDE_FLOAT64_C( 240.71) }, { SIMDE_FLOAT64_C( 241.00) } }, { { SIMDE_FLOAT64_C( -255.78) }, { SIMDE_FLOAT64_C( -256.00) } }, { { SIMDE_FLOAT64_C( 583.46) }, { SIMDE_FLOAT64_C( 583.00) } }, { { SIMDE_FLOAT64_C( 184.46) }, { SIMDE_FLOAT64_C( 184.00) } }, { { SIMDE_FLOAT64_C( -123.90) }, { SIMDE_FLOAT64_C( -124.00) } }, { { SIMDE_FLOAT64_C( 757.51) }, { SIMDE_FLOAT64_C( 758.00) } }, { { SIMDE_FLOAT64_C( 200.47) }, { SIMDE_FLOAT64_C( 200.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_t r = simde_vrndn_f64(a); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x1_t a = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_float64x1_t r = simde_vrndn_f64(a); simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrndnq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; simde_float32 r[4]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NANF, -SIMDE_MATH_NANF, SIMDE_MATH_NANF, -SIMDE_MATH_NANF }, { SIMDE_MATH_NANF, -SIMDE_MATH_NANF, SIMDE_MATH_NANF, -SIMDE_MATH_NANF } }, #endif { { SIMDE_FLOAT32_C( -1.50), SIMDE_FLOAT32_C( 1.50), SIMDE_FLOAT32_C( -2.50), SIMDE_FLOAT32_C( 2.50) }, { SIMDE_FLOAT32_C( -2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( -2.00), SIMDE_FLOAT32_C( 2.00) } }, { { SIMDE_FLOAT32_C( 826.17), SIMDE_FLOAT32_C( -229.41), SIMDE_FLOAT32_C( -487.35), SIMDE_FLOAT32_C( 89.00) }, { SIMDE_FLOAT32_C( 826.00), SIMDE_FLOAT32_C( -229.00), SIMDE_FLOAT32_C( -487.00), SIMDE_FLOAT32_C( 89.00) } }, { { SIMDE_FLOAT32_C( -306.79), SIMDE_FLOAT32_C( -855.59), SIMDE_FLOAT32_C( 532.14), SIMDE_FLOAT32_C( 99.31) }, { SIMDE_FLOAT32_C( -307.00), SIMDE_FLOAT32_C( -856.00), SIMDE_FLOAT32_C( 532.00), SIMDE_FLOAT32_C( 99.00) } }, { { SIMDE_FLOAT32_C( 341.26), SIMDE_FLOAT32_C( 101.93), SIMDE_FLOAT32_C( -564.42), SIMDE_FLOAT32_C( 671.15) }, { SIMDE_FLOAT32_C( 341.00), SIMDE_FLOAT32_C( 102.00), SIMDE_FLOAT32_C( -564.00), SIMDE_FLOAT32_C( 671.00) } }, { { SIMDE_FLOAT32_C( -598.11), SIMDE_FLOAT32_C( 431.31), SIMDE_FLOAT32_C( -662.17), SIMDE_FLOAT32_C( 12.69) }, { SIMDE_FLOAT32_C( -598.00), SIMDE_FLOAT32_C( 431.00), SIMDE_FLOAT32_C( -662.00), SIMDE_FLOAT32_C( 13.00) } }, { { SIMDE_FLOAT32_C( -230.48), SIMDE_FLOAT32_C( 510.05), SIMDE_FLOAT32_C( -222.60), SIMDE_FLOAT32_C( -441.10) }, { SIMDE_FLOAT32_C( -230.00), SIMDE_FLOAT32_C( 510.00), SIMDE_FLOAT32_C( -223.00), SIMDE_FLOAT32_C( -441.00) } }, { { SIMDE_FLOAT32_C( 769.43), SIMDE_FLOAT32_C( -508.73), SIMDE_FLOAT32_C( 482.94), SIMDE_FLOAT32_C( 726.32) }, { SIMDE_FLOAT32_C( 769.00), SIMDE_FLOAT32_C( -509.00), SIMDE_FLOAT32_C( 483.00), SIMDE_FLOAT32_C( 726.00) } }, { { SIMDE_FLOAT32_C( 731.99), SIMDE_FLOAT32_C( -772.85), SIMDE_FLOAT32_C( 309.78), SIMDE_FLOAT32_C( -83.55) }, { SIMDE_FLOAT32_C( 732.00), SIMDE_FLOAT32_C( -773.00), SIMDE_FLOAT32_C( 310.00), SIMDE_FLOAT32_C( -84.00) } }, { { SIMDE_FLOAT32_C( 103.25), SIMDE_FLOAT32_C( 67.29), SIMDE_FLOAT32_C( -883.08), SIMDE_FLOAT32_C( -70.58) }, { SIMDE_FLOAT32_C( 103.00), SIMDE_FLOAT32_C( 67.00), SIMDE_FLOAT32_C( -883.00), SIMDE_FLOAT32_C( -71.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t r = simde_vrndnq_f32(a); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vrndnq_f32(a); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrndnq_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[2]; simde_float64 r[2]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NAN, -SIMDE_MATH_NAN }, { SIMDE_MATH_NAN, -SIMDE_MATH_NAN } }, #endif { { SIMDE_FLOAT64_C( -1.50), SIMDE_FLOAT64_C( 1.50) }, { SIMDE_FLOAT64_C( -2.00), SIMDE_FLOAT64_C( 2.00) } }, { { SIMDE_FLOAT64_C( -2.50), SIMDE_FLOAT64_C( 2.50) }, { SIMDE_FLOAT64_C( -2.00), SIMDE_FLOAT64_C( 2.00) } }, { { SIMDE_FLOAT64_C( 837.88), SIMDE_FLOAT64_C( -370.43) }, { SIMDE_FLOAT64_C( 838.00), SIMDE_FLOAT64_C( -370.00) } }, { { SIMDE_FLOAT64_C( -981.58), SIMDE_FLOAT64_C( -468.91) }, { SIMDE_FLOAT64_C( -982.00), SIMDE_FLOAT64_C( -469.00) } }, { { SIMDE_FLOAT64_C( -226.02), SIMDE_FLOAT64_C( 550.56) }, { SIMDE_FLOAT64_C( -226.00), SIMDE_FLOAT64_C( 551.00) } }, { { SIMDE_FLOAT64_C( 630.40), SIMDE_FLOAT64_C( -884.76) }, { SIMDE_FLOAT64_C( 630.00), SIMDE_FLOAT64_C( -885.00) } }, { { SIMDE_FLOAT64_C( -347.50), SIMDE_FLOAT64_C( -934.02) }, { SIMDE_FLOAT64_C( -348.00), SIMDE_FLOAT64_C( -934.00) } }, { { SIMDE_FLOAT64_C( 786.38), SIMDE_FLOAT64_C( 54.39) }, { SIMDE_FLOAT64_C( 786.00), SIMDE_FLOAT64_C( 54.00) } }, { { SIMDE_FLOAT64_C( 497.29), SIMDE_FLOAT64_C( -875.79) }, { SIMDE_FLOAT64_C( 497.00), SIMDE_FLOAT64_C( -876.00) } }, { { SIMDE_FLOAT64_C( -932.92), SIMDE_FLOAT64_C( -733.19) }, { SIMDE_FLOAT64_C( -933.00), SIMDE_FLOAT64_C( -733.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t r = simde_vrndnq_f64(a); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x2_t a = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); simde_float64x2_t r = simde_vrndnq_f64(a); simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vrndn_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vrndn_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vrndnq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vrndnq_f64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/rndp.c000066400000000000000000000237131400333146700164340ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN rndp #include "test-neon.h" #include "../../../simde/arm/neon/rndp.h" static int test_simde_vrndp_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[2]; simde_float32 r[2]; } test_vec[] = { #if defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NANF, -SIMDE_MATH_NANF }, { SIMDE_MATH_NANF, -SIMDE_MATH_NANF } }, #endif { { SIMDE_FLOAT32_C( -1.50), SIMDE_FLOAT32_C( 1.50) }, { SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 2.00) } }, { { SIMDE_FLOAT32_C( -2.50), SIMDE_FLOAT32_C( 2.50) }, { SIMDE_FLOAT32_C( -2.00), SIMDE_FLOAT32_C( 3.00) } }, { { SIMDE_FLOAT32_C( -980.04), SIMDE_FLOAT32_C( 939.96) }, { SIMDE_FLOAT32_C( -980.00), SIMDE_FLOAT32_C( 940.00) } }, { { SIMDE_FLOAT32_C( 208.31), SIMDE_FLOAT32_C( -110.24) }, { SIMDE_FLOAT32_C( 209.00), SIMDE_FLOAT32_C( -110.00) } }, { { SIMDE_FLOAT32_C( -288.01), SIMDE_FLOAT32_C( 612.61) }, { SIMDE_FLOAT32_C( -288.00), SIMDE_FLOAT32_C( 613.00) } }, { { SIMDE_FLOAT32_C( 975.34), SIMDE_FLOAT32_C( 999.38) }, { SIMDE_FLOAT32_C( 976.00), SIMDE_FLOAT32_C( 1000.00) } }, { { SIMDE_FLOAT32_C( -633.20), SIMDE_FLOAT32_C( -603.45) }, { SIMDE_FLOAT32_C( -633.00), SIMDE_FLOAT32_C( -603.00) } }, { { SIMDE_FLOAT32_C( 29.78), SIMDE_FLOAT32_C( 554.21) }, { SIMDE_FLOAT32_C( 30.00), SIMDE_FLOAT32_C( 555.00) } }, { { SIMDE_FLOAT32_C( -734.21), SIMDE_FLOAT32_C( 840.44) }, { SIMDE_FLOAT32_C( -734.00), SIMDE_FLOAT32_C( 841.00) } }, { { SIMDE_FLOAT32_C( 418.90), SIMDE_FLOAT32_C( 259.02) }, { SIMDE_FLOAT32_C( 419.00), SIMDE_FLOAT32_C( 260.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t r = simde_vrndp_f32(a); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vrndp_f32(a); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrndp_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[1]; simde_float64 r[1]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NAN }, { SIMDE_MATH_NAN } }, { { -SIMDE_MATH_NAN }, { -SIMDE_MATH_NAN } }, #endif { { SIMDE_FLOAT64_C( -1.50) }, { SIMDE_FLOAT64_C( -1.00) } }, { { SIMDE_FLOAT64_C( 1.50) }, { SIMDE_FLOAT64_C( 2.00) } }, { { SIMDE_FLOAT64_C( -2.50) }, { SIMDE_FLOAT64_C( -2.00) } }, { { SIMDE_FLOAT64_C( 2.50) }, { SIMDE_FLOAT64_C( 3.00) } }, { { SIMDE_FLOAT64_C( -235.01) }, { SIMDE_FLOAT64_C( -235.00) } }, { { SIMDE_FLOAT64_C( -729.80) }, { SIMDE_FLOAT64_C( -729.00) } }, { { SIMDE_FLOAT64_C( -569.69) }, { SIMDE_FLOAT64_C( -569.00) } }, { { SIMDE_FLOAT64_C( -128.47) }, { SIMDE_FLOAT64_C( -128.00) } }, { { SIMDE_FLOAT64_C( -404.59) }, { SIMDE_FLOAT64_C( -404.00) } }, { { SIMDE_FLOAT64_C( 535.14) }, { SIMDE_FLOAT64_C( 536.00) } }, { { SIMDE_FLOAT64_C( -863.09) }, { SIMDE_FLOAT64_C( -863.00) } }, { { SIMDE_FLOAT64_C( 977.14) }, { SIMDE_FLOAT64_C( 978.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_t r = simde_vrndp_f64(a); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x1_t a = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_float64x1_t r = simde_vrndp_f64(a); simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrndpq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a[4]; simde_float32 r[4]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NANF, -SIMDE_MATH_NANF, SIMDE_MATH_NANF, -SIMDE_MATH_NANF }, { SIMDE_MATH_NANF, -SIMDE_MATH_NANF, SIMDE_MATH_NANF, -SIMDE_MATH_NANF } }, #endif { { SIMDE_FLOAT32_C( -1.50), SIMDE_FLOAT32_C( 1.50), SIMDE_FLOAT32_C( -2.50), SIMDE_FLOAT32_C( 2.50) }, { SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( -2.00), SIMDE_FLOAT32_C( 3.00) } }, { { SIMDE_FLOAT32_C( 86.24), SIMDE_FLOAT32_C( 581.00), SIMDE_FLOAT32_C( 890.92), SIMDE_FLOAT32_C( 188.30) }, { SIMDE_FLOAT32_C( 87.00), SIMDE_FLOAT32_C( 581.00), SIMDE_FLOAT32_C( 891.00), SIMDE_FLOAT32_C( 189.00) } }, { { SIMDE_FLOAT32_C( 162.02), SIMDE_FLOAT32_C( 633.16), SIMDE_FLOAT32_C( -103.71), SIMDE_FLOAT32_C( 181.98) }, { SIMDE_FLOAT32_C( 163.00), SIMDE_FLOAT32_C( 634.00), SIMDE_FLOAT32_C( -103.00), SIMDE_FLOAT32_C( 182.00) } }, { { SIMDE_FLOAT32_C( 573.12), SIMDE_FLOAT32_C( -895.40), SIMDE_FLOAT32_C( -928.26), SIMDE_FLOAT32_C( -714.90) }, { SIMDE_FLOAT32_C( 574.00), SIMDE_FLOAT32_C( -895.00), SIMDE_FLOAT32_C( -928.00), SIMDE_FLOAT32_C( -714.00) } }, { { SIMDE_FLOAT32_C( 717.20), SIMDE_FLOAT32_C( -952.92), SIMDE_FLOAT32_C( -715.52), SIMDE_FLOAT32_C( -915.99) }, { SIMDE_FLOAT32_C( 718.00), SIMDE_FLOAT32_C( -952.00), SIMDE_FLOAT32_C( -715.00), SIMDE_FLOAT32_C( -915.00) } }, { { SIMDE_FLOAT32_C( -556.37), SIMDE_FLOAT32_C( 314.25), SIMDE_FLOAT32_C( 638.22), SIMDE_FLOAT32_C( -290.58) }, { SIMDE_FLOAT32_C( -556.00), SIMDE_FLOAT32_C( 315.00), SIMDE_FLOAT32_C( 639.00), SIMDE_FLOAT32_C( -290.00) } }, { { SIMDE_FLOAT32_C( 154.70), SIMDE_FLOAT32_C( 57.12), SIMDE_FLOAT32_C( 968.43), SIMDE_FLOAT32_C( 919.68) }, { SIMDE_FLOAT32_C( 155.00), SIMDE_FLOAT32_C( 58.00), SIMDE_FLOAT32_C( 969.00), SIMDE_FLOAT32_C( 920.00) } }, { { SIMDE_FLOAT32_C( 327.32), SIMDE_FLOAT32_C( -601.25), SIMDE_FLOAT32_C( -208.79), SIMDE_FLOAT32_C( 922.73) }, { SIMDE_FLOAT32_C( 328.00), SIMDE_FLOAT32_C( -601.00), SIMDE_FLOAT32_C( -208.00), SIMDE_FLOAT32_C( 923.00) } }, { { SIMDE_FLOAT32_C( 933.89), SIMDE_FLOAT32_C( -71.87), SIMDE_FLOAT32_C( 899.87), SIMDE_FLOAT32_C( 20.13) }, { SIMDE_FLOAT32_C( 934.00), SIMDE_FLOAT32_C( -71.00), SIMDE_FLOAT32_C( 900.00), SIMDE_FLOAT32_C( 21.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t r = simde_vrndpq_f32(a); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vrndpq_f32(a); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrndpq_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a[2]; simde_float64 r[2]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NAN, -SIMDE_MATH_NAN }, { SIMDE_MATH_NAN, -SIMDE_MATH_NAN } }, #endif { { SIMDE_FLOAT64_C( -1.50), SIMDE_FLOAT64_C( 1.50) }, { SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 2.00) } }, { { SIMDE_FLOAT64_C( -2.50), SIMDE_FLOAT64_C( 2.50) }, { SIMDE_FLOAT64_C( -2.00), SIMDE_FLOAT64_C( 3.00) } }, { { SIMDE_FLOAT64_C( -490.87), SIMDE_FLOAT64_C( 790.79) }, { SIMDE_FLOAT64_C( -490.00), SIMDE_FLOAT64_C( 791.00) } }, { { SIMDE_FLOAT64_C( -791.57), SIMDE_FLOAT64_C( 671.15) }, { SIMDE_FLOAT64_C( -791.00), SIMDE_FLOAT64_C( 672.00) } }, { { SIMDE_FLOAT64_C( 423.95), SIMDE_FLOAT64_C( 104.72) }, { SIMDE_FLOAT64_C( 424.00), SIMDE_FLOAT64_C( 105.00) } }, { { SIMDE_FLOAT64_C( -146.87), SIMDE_FLOAT64_C( -2.94) }, { SIMDE_FLOAT64_C( -146.00), SIMDE_FLOAT64_C( -2.00) } }, { { SIMDE_FLOAT64_C( 209.32), SIMDE_FLOAT64_C( -75.14) }, { SIMDE_FLOAT64_C( 210.00), SIMDE_FLOAT64_C( -75.00) } }, { { SIMDE_FLOAT64_C( 282.16), SIMDE_FLOAT64_C( -73.47) }, { SIMDE_FLOAT64_C( 283.00), SIMDE_FLOAT64_C( -73.00) } }, { { SIMDE_FLOAT64_C( -28.06), SIMDE_FLOAT64_C( 566.64) }, { SIMDE_FLOAT64_C( -28.00), SIMDE_FLOAT64_C( 567.00) } }, { { SIMDE_FLOAT64_C( 10.53), SIMDE_FLOAT64_C( 415.57) }, { SIMDE_FLOAT64_C( 11.00), SIMDE_FLOAT64_C( 416.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t r = simde_vrndpq_f64(a); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x2_t a = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); simde_float64x2_t r = simde_vrndpq_f64(a); simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vrndp_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vrndp_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vrndpq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vrndpq_f64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/rshl.c000066400000000000000000002077741400333146700164540ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN rshl #include "test-neon.h" /* Check that both of these work */ #if defined(__cplusplus) #include "../../../simde/arm/neon/rshl.h" #else #include "../../../simde/arm/neon.h" #endif #if 0 #define PROBABILITY 80 #define probability(p) (rand() < (((int64_t) RAND_MAX * (p)) / 100)) #endif static int test_simde_vrshl_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { int8_t a[8]; int8_t b[8]; int8_t r[8]; } test_vec[] = { { { -INT8_C( 1), -INT8_C( 80), INT8_C( 102), INT8_C( 17), INT8_C( 10), INT8_C( 22), INT8_C( 87), -INT8_C( 121) }, { INT8_C( 8), INT8_C( 6), INT8_C( 0), INT8_C( 7), -INT8_C( 8), INT8_C( 7), INT8_C( 49), -INT8_C( 7) }, { INT8_C( 0), INT8_C( 0), INT8_C( 102), INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1) } }, { { -INT8_C( 1), INT8_C( 26), INT8_C( 68), INT8_C( 57), INT8_C( 29), INT8_C( 19), -INT8_C( 63), -INT8_C( 107) }, { -INT8_C( 8), INT8_C( 5), -INT8_C( 6), -INT8_C( 7), INT8_C( 62), -INT8_C( 2), INT8_C( 5), INT8_C( 5) }, { INT8_C( 0), INT8_C( 64), INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 5), INT8_C( 32), -INT8_C( 96) } }, { { -INT8_C( 31), -INT8_C( 21), -INT8_C( 103), INT8_C( 75), INT8_C( 114), INT8_C( 33), -INT8_C( 81), -INT8_C( 75) }, { INT8_C( 7), -INT8_C( 7), -INT8_C( 17), INT8_C( 5), INT8_C( 6), -INT8_C( 5), -INT8_C( 5), -INT8_C( 7) }, { INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_C( 96), INT8_MIN, INT8_C( 1), -INT8_C( 3), -INT8_C( 1) } }, { { INT8_C( 65), -INT8_C( 49), -INT8_C( 12), -INT8_C( 43), INT8_C( 99), INT8_C( 26), INT8_C( 31), INT8_C( 68) }, { INT8_C( 6), -INT8_C( 5), -INT8_C( 7), INT8_C( 7), -INT8_C( 5), INT8_C( 7), INT8_C( 5), INT8_C( 21) }, { INT8_C( 64), -INT8_C( 2), INT8_C( 0), INT8_MIN, INT8_C( 3), INT8_C( 0), -INT8_C( 32), INT8_C( 0) } }, { { -INT8_C( 16), -INT8_C( 102), -INT8_C( 68), -INT8_C( 125), -INT8_C( 121), INT8_C( 39), INT8_C( 74), -INT8_C( 56) }, { -INT8_C( 10), INT8_C( 62), -INT8_C( 98), INT8_C( 90), INT8_C( 5), -INT8_C( 5), -INT8_C( 7), INT8_C( 94) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 32), INT8_C( 1), INT8_C( 1), INT8_C( 0) } }, { { INT8_C( 32), -INT8_C( 46), -INT8_C( 39), -INT8_C( 19), INT8_C( 46), -INT8_C( 36), -INT8_C( 109), INT8_C( 30) }, { INT8_C( 118), INT8_C( 79), -INT8_C( 6), -INT8_C( 3), INT8_C( 7), -INT8_C( 6), -INT8_C( 8), INT8_C( 6) }, { INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 2), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_MIN } }, { { -INT8_C( 108), -INT8_C( 73), -INT8_C( 27), INT8_C( 1), -INT8_C( 69), INT8_C( 73), -INT8_C( 95), -INT8_C( 37) }, { INT8_C( 27), INT8_C( 122), -INT8_C( 7), INT8_C( 4), INT8_C( 5), INT8_C( 5), INT8_C( 6), -INT8_C( 7) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 16), INT8_C( 96), INT8_C( 32), INT8_C( 64), INT8_C( 0) } }, { { -INT8_C( 12), INT8_C( 85), -INT8_C( 96), INT8_C( 21), -INT8_C( 69), -INT8_C( 127), -INT8_C( 84), INT8_C( 79) }, { INT8_C( 7), -INT8_C( 7), INT8_C( 5), -INT8_C( 7), -INT8_C( 37), -INT8_C( 7), -INT8_C( 7), -INT8_C( 10) }, { INT8_C( 0), INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vrshl_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); int8_t a_patch[] = { -INT8_C(1), -INT8_C(1) }; int8_t b_patch[] = { 8, -8 }; for (int i = 0 ; i < 8 ; i++) { simde_int8x8_private a_ = simde_int8x8_to_private(simde_test_arm_neon_random_i8x8()); simde_int8x8_private b_ = simde_int8x8_to_private(simde_test_arm_neon_random_i8x8()); for (size_t j = 0 ; j < (sizeof(b_.values) / sizeof(b_.values[0])) ; j++) { if (probability(PROBABILITY)) { while (abs(b_.values[j]) > 8) { b_.values[j] >>= 1; } } } if ((size_t) i < (sizeof(a_patch) / sizeof(a_patch[0]))) { a_.values[0] = a_patch[i]; b_.values[0] = b_patch[i]; } simde_int8x8_t a = simde_int8x8_from_private(a_); simde_int8x8_t b = simde_int8x8_from_private(b_); simde_int8x8_t r = simde_vrshl_s8(a, b); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrshl_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { { { -INT16_C( 1), INT16_C( 16127), INT16_C( 7767), INT16_C( 21548) }, { INT16_C( 16), INT16_C( 11), INT16_C( 15), -INT16_C( 16) }, { INT16_C( 0), -INT16_C( 2048), INT16_MIN, INT16_C( 0) } }, { { -INT16_C( 1), INT16_C( 155), INT16_C( 26190), -INT16_C( 23334) }, { -INT16_C( 16), INT16_C( 14), INT16_C( 14), -INT16_C( 12) }, { INT16_C( 0), -INT16_C( 16384), INT16_MIN, -INT16_C( 6) } }, { { -INT16_C( 12578), INT16_C( 28010), -INT16_C( 16565), INT16_C( 31821) }, { INT16_C( 11), INT16_C( 12), INT16_C( 11), INT16_C( 7269) }, { -INT16_C( 4096), -INT16_C( 24576), INT16_C( 22528), INT16_C( 0) } }, { { -INT16_C( 1065), INT16_C( 21373), INT16_C( 12341), -INT16_C( 29402) }, { INT16_C( 9), -INT16_C( 9), INT16_C( 13), INT16_C( 12) }, { -INT16_C( 20992), INT16_C( 42), -INT16_C( 24576), INT16_C( 24576) } }, { { -INT16_C( 22525), -INT16_C( 11632), -INT16_C( 2809), -INT16_C( 12818) }, { -INT16_C( 20684), INT16_C( 11), INT16_C( 10), -INT16_C( 16) }, { INT16_C( 0), INT16_MIN, INT16_C( 7168), INT16_C( 0) } }, { { INT16_C( 14547), -INT16_C( 4223), -INT16_C( 9107), INT16_C( 25175) }, { INT16_C( 14), -INT16_C( 15), INT16_C( 14), -INT16_C( 11) }, { -INT16_C( 16384), INT16_C( 0), INT16_C( 16384), INT16_C( 12) } }, { { -INT16_C( 29192), -INT16_C( 23726), -INT16_C( 20257), INT16_C( 24964) }, { -INT16_C( 14), INT16_C( 8), INT16_C( 14), -INT16_C( 26376) }, { -INT16_C( 2), INT16_C( 20992), -INT16_C( 16384), INT16_C( 98) } }, { { -INT16_C( 30838), INT16_C( 27999), -INT16_C( 18012), -INT16_C( 18857) }, { INT16_C( 15), -INT16_C( 9), INT16_C( 9), INT16_C( 31646) }, { INT16_C( 0), INT16_C( 55), INT16_C( 18432), INT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_vrshl_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); int16_t a_patch[] = { -INT16_C(1), -INT16_C(1) }; int16_t b_patch[] = { 16, -16 }; for (int i = 0 ; i < 8 ; i++) { simde_int16x4_private a_ = simde_int16x4_to_private(simde_test_arm_neon_random_i16x4()); simde_int16x4_private b_ = simde_int16x4_to_private(simde_test_arm_neon_random_i16x4()); for (size_t j = 0 ; j < (sizeof(b_.values) / sizeof(b_.values[0])) ; j++) { if (probability(PROBABILITY)) { while (abs(b_.values[j]) > 16) { b_.values[j] >>= 1; } } } if ((size_t) i < (sizeof(a_patch) / sizeof(a_patch[0]))) { a_.values[0] = a_patch[i]; b_.values[0] = b_patch[i]; } simde_int16x4_t a = simde_int16x4_from_private(a_); simde_int16x4_t b = simde_int16x4_from_private(b_); simde_int16x4_t r = simde_vrshl_s16(a, b); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrshl_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { { { -INT32_C( 1), -INT32_C( 1425395738) }, { INT32_C( 32), -INT32_C( 32) }, { INT32_C( 0), INT32_C( 0) } }, { { -INT32_C( 1), INT32_C( 1217089360) }, { -INT32_C( 32), INT32_C( 20) }, { INT32_C( 0), -INT32_C( 184549376) } }, { { -INT32_C( 1741455487), -INT32_C( 478087585) }, { -INT32_C( 1033648548), INT32_C( 25) }, { INT32_C( 0), -INT32_C( 1107296256) } }, { { -INT32_C( 1102370122), INT32_C( 1878396110) }, { -INT32_C( 32), INT32_C( 20) }, { INT32_C( 0), -INT32_C( 857735168) } }, { { -INT32_C( 1522312632), -INT32_C( 915953887) }, { INT32_C( 26), INT32_C( 1737811484) }, { INT32_C( 536870912), INT32_C( 268435456) } }, { { INT32_C( 2144508531), INT32_C( 1342214961) }, { -INT32_C( 21), -INT32_C( 23) }, { INT32_C( 1023), INT32_C( 160) } }, { { INT32_C( 385152843), INT32_C( 746660880) }, { -INT32_C( 21), INT32_C( 1246234488) }, { INT32_C( 184), INT32_C( 0) } }, { { -INT32_C( 1295350051), -INT32_C( 413045626) }, { -INT32_C( 25), -INT32_C( 30) }, { -INT32_C( 39), INT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_vrshl_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); int32_t a_patch[] = { -INT32_C(1), -INT32_C(1) }; int32_t b_patch[] = { 32, -32 }; for (int i = 0 ; i < 8 ; i++) { simde_int32x2_private a_ = simde_int32x2_to_private(simde_test_arm_neon_random_i32x2()); simde_int32x2_private b_ = simde_int32x2_to_private(simde_test_arm_neon_random_i32x2()); for (size_t j = 0 ; j < (sizeof(b_.values) / sizeof(b_.values[0])) ; j++) { if (probability(PROBABILITY)) { while (abs(b_.values[j]) > 32) { b_.values[j] >>= 1; } } } if ((size_t) i < (sizeof(a_patch) / sizeof(a_patch[0]))) { a_.values[0] = a_patch[i]; b_.values[0] = b_patch[i]; } simde_int32x2_t a = simde_int32x2_from_private(a_); simde_int32x2_t b = simde_int32x2_from_private(b_); simde_int32x2_t r = simde_vrshl_s32(a, b); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrshl_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { int64_t a[1]; int64_t b[1]; int64_t r[1]; } test_vec[] = { { { -INT64_C( 1) }, { INT64_C( 64) }, { INT64_C( 0) } }, { { -INT64_C( 1) }, { -INT64_C( 64) }, { INT64_C( 0) } }, { { -INT64_C( 5783365166398861937) }, { -INT64_C( 1682183429783586356) }, { -INT64_C( 1284) } }, { { INT64_C( 1136283642200280026) }, { -INT64_C( 57) }, { INT64_C( 8) } }, { { INT64_C( 3453319167247895989) }, { INT64_C( 53) }, { -INT64_C( 5287225962532962304) } }, { { -INT64_C( 3754790191149012930) }, { -INT64_C( 39) }, { -INT64_C( 6829924) } }, { { INT64_C( 4367597038114582190) }, { -INT64_C( 56) }, { INT64_C( 61) } }, { { -INT64_C( 4313522761509692639) }, { -INT64_C( 43) }, { -INT64_C( 490391) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_int64x1_t r = simde_vrshl_s64(a, b); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); int64_t a_patch[] = { -INT64_C(1), -INT64_C(1) }; int64_t b_patch[] = { 64, -64 }; for (int i = 0 ; i < 8 ; i++) { simde_int64x1_private a_ = simde_int64x1_to_private(simde_test_arm_neon_random_i64x1()); simde_int64x1_private b_ = simde_int64x1_to_private(simde_test_arm_neon_random_i64x1()); for (size_t j = 0 ; j < (sizeof(b_.values) / sizeof(b_.values[0])) ; j++) { if (probability(PROBABILITY)) { while (llabs(b_.values[j]) > 64) { b_.values[j] >>= 1; } } } if ((size_t) i < (sizeof(a_patch) / sizeof(a_patch[0]))) { a_.values[0] = a_patch[i]; b_.values[0] = b_patch[i]; } simde_int64x1_t a = simde_int64x1_from_private(a_); simde_int64x1_t b = simde_int64x1_from_private(b_); simde_int64x1_t r = simde_vrshl_s64(a, b); simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrshl_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { uint8_t a[8]; int8_t b[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_MAX, UINT8_C( 91), UINT8_C(231), UINT8_C(221), UINT8_C( 49), UINT8_C(217), UINT8_C(178), UINT8_C(160) }, { INT8_C( 8), -INT8_C( 123), -INT8_C( 39), INT8_C( 6), -INT8_C( 5), -INT8_C( 8), -INT8_C( 7), INT8_C( 6) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 64), UINT8_C( 2), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0) } }, { { UINT8_C(128), UINT8_C( 86), UINT8_C( 67), UINT8_C( 16), UINT8_C(186), UINT8_C(233), UINT8_C(204), UINT8_C( 62) }, { -INT8_C( 8), -INT8_C( 5), INT8_C( 28), INT8_C( 7), -INT8_C( 8), -INT8_C( 7), INT8_C( 22), -INT8_C( 82) }, { UINT8_C( 1), UINT8_C( 3), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 2), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_MAX, UINT8_C(141), UINT8_C( 43), UINT8_C(127), UINT8_C( 75), UINT8_C(120), UINT8_C( 48), UINT8_C(178) }, { -INT8_C( 9), INT8_C( 7), -INT8_C( 62), -INT8_C( 8), INT8_C( 5), -INT8_C( 8), -INT8_C( 8), -INT8_C( 6) }, { UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C( 0), UINT8_C( 96), UINT8_C( 0), UINT8_C( 0), UINT8_C( 3) } }, { { UINT8_C( 28), UINT8_C(141), UINT8_C( 24), UINT8_C(204), UINT8_C( 54), UINT8_C( 93), UINT8_C(123), UINT8_C(187) }, { -INT8_C( 6), -INT8_C( 89), INT8_C( 7), INT8_C( 6), INT8_C( 7), INT8_C( 6), -INT8_C( 6), -INT8_C( 5) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 64), UINT8_C( 2), UINT8_C( 6) } }, { { UINT8_C( 30), UINT8_C(244), UINT8_C( 74), UINT8_C(207), UINT8_C( 33), UINT8_C(200), UINT8_C(212), UINT8_C( 62) }, { INT8_C( 5), -INT8_C( 5), INT8_C( 5), -INT8_C( 8), INT8_C( 73), -INT8_C( 8), INT8_C( 8), INT8_C( 51) }, { UINT8_C(192), UINT8_C( 8), UINT8_C( 64), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(251), UINT8_C(173), UINT8_C( 4), UINT8_C( 52), UINT8_C(233), UINT8_C(224), UINT8_C(175), UINT8_C( 7) }, { -INT8_C( 6), -INT8_C( 7), -INT8_C( 6), -INT8_C( 5), -INT8_C( 8), -INT8_C( 6), INT8_C( 52), INT8_C( 22) }, { UINT8_C( 4), UINT8_C( 1), UINT8_C( 0), UINT8_C( 2), UINT8_C( 1), UINT8_C( 4), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(104), UINT8_C(124), UINT8_C( 59), UINT8_C( 83), UINT8_C(205), UINT8_C(116), UINT8_C( 28), UINT8_C(201) }, { INT8_C( 8), INT8_C( 8), -INT8_C( 3), INT8_C( 5), INT8_C( 0), -INT8_C( 6), INT8_C( 8), -INT8_C( 6) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 7), UINT8_C( 96), UINT8_C(205), UINT8_C( 2), UINT8_C( 0), UINT8_C( 3) } }, { { UINT8_C( 61), UINT8_C( 31), UINT8_C( 6), UINT8_C( 0), UINT8_C( 6), UINT8_C( 25), UINT8_C(240), UINT8_C(110) }, { -INT8_C( 7), INT8_C( 5), -INT8_C( 8), INT8_C( 6), -INT8_C( 96), -INT8_C( 35), INT8_C( 43), -INT8_C( 8) }, { UINT8_C( 0), UINT8_C(224), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_uint8x8_t r = simde_vrshl_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); uint8_t a_patch[] = { UINT8_MAX, UINT8_C(1) << (8 - 1), UINT8_MAX }; int8_t b_patch[] = { 8, -8, -8 - 1 }; for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_private a_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); simde_int8x8_private b_ = simde_int8x8_to_private(simde_test_arm_neon_random_i8x8()); for (size_t j = 0 ; j < (sizeof(b_.values) / sizeof(b_.values[0])) ; j++) { if (probability(PROBABILITY)) { while (abs(b_.values[j]) > 8) { b_.values[j] >>= 1; } } } if ((size_t) i < (sizeof(a_patch) / sizeof(a_patch[0]))) { a_.values[0] = a_patch[i]; b_.values[0] = b_patch[i]; } simde_uint8x8_t a = simde_uint8x8_from_private(a_); simde_int8x8_t b = simde_int8x8_from_private(b_); simde_uint8x8_t r = simde_vrshl_u8(a, b); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrshl_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { uint16_t a[4]; int16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_MAX, UINT16_C(44116), UINT16_C(39019), UINT16_C(40834) }, { INT16_C( 16), -INT16_C( 9), -INT16_C( 9), -INT16_C( 16) }, { UINT16_C( 0), UINT16_C( 86), UINT16_C( 76), UINT16_C( 1) } }, { { UINT16_C(32768), UINT16_C(25689), UINT16_C(49174), UINT16_C(56283) }, { -INT16_C( 16), -INT16_C( 13), -INT16_C( 9), -INT16_C( 16) }, { UINT16_C( 1), UINT16_C( 3), UINT16_C( 96), UINT16_C( 1) } }, { { UINT16_MAX, UINT16_C(61016), UINT16_C(54701), UINT16_C(28590) }, { -INT16_C( 17), -INT16_C( 14), -INT16_C( 12), -INT16_C( 5560) }, { UINT16_C( 0), UINT16_C( 4), UINT16_C( 13), UINT16_C( 0) } }, { { UINT16_C(12257), UINT16_C(64032), UINT16_C(39150), UINT16_C(17534) }, { -INT16_C( 12), INT16_C( 9), INT16_C( 13), INT16_C( 16) }, { UINT16_C( 3), UINT16_C(16384), UINT16_C(49152), UINT16_C( 0) } }, { { UINT16_C(10277), UINT16_C(63613), UINT16_C(50682), UINT16_C(36066) }, { -INT16_C( 12), -INT16_C( 14), INT16_C( 13), -INT16_C( 15) }, { UINT16_C( 3), UINT16_C( 4), UINT16_C(16384), UINT16_C( 1) } }, { { UINT16_C(59108), UINT16_C(30269), UINT16_C(15643), UINT16_C( 9910) }, { INT16_C( 12), INT16_C( 8), INT16_C( 14), -INT16_C( 15) }, { UINT16_C(16384), UINT16_C(15616), UINT16_C(49152), UINT16_C( 0) } }, { { UINT16_C(62868), UINT16_C(27572), UINT16_C(31221), UINT16_C(36400) }, { INT16_C( 14), -INT16_C( 24647), -INT16_C( 10), INT16_C( 14) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 30), UINT16_C( 0) } }, { { UINT16_C( 8242), UINT16_C(49201), UINT16_C(15195), UINT16_C(23624) }, { INT16_C( 11), -INT16_C( 17338), -INT16_C( 10), INT16_C( 18215) }, { UINT16_C(36864), UINT16_C( 0), UINT16_C( 15), UINT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_uint16x4_t r = simde_vrshl_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); uint16_t a_patch[] = { UINT16_MAX, UINT16_C(1) << (16 - 1), UINT16_MAX }; int16_t b_patch[] = { 16, -16, -16 - 1 }; for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_private a_ = simde_uint16x4_to_private(simde_test_arm_neon_random_u16x4()); simde_int16x4_private b_ = simde_int16x4_to_private(simde_test_arm_neon_random_i16x4()); for (size_t j = 0 ; j < (sizeof(b_.values) / sizeof(b_.values[0])) ; j++) { if (probability(PROBABILITY)) { while (abs(b_.values[j]) > 16) { b_.values[j] >>= 1; } } } if ((size_t) i < (sizeof(a_patch) / sizeof(a_patch[0]))) { a_.values[0] = a_patch[i]; b_.values[0] = b_patch[i]; } simde_uint16x4_t a = simde_uint16x4_from_private(a_); simde_int16x4_t b = simde_int16x4_from_private(b_); simde_uint16x4_t r = simde_vrshl_u16(a, b); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrshl_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { uint32_t a[2]; int32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_MAX, UINT32_C(4251222520) }, { INT32_C( 32), INT32_C( 19) }, { UINT32_C( 0), UINT32_C( 264241152) } }, { { UINT32_C(2147483648), UINT32_C(1560478732) }, { -INT32_C( 32), -INT32_C( 947062820) }, { UINT32_C( 1), UINT32_C( 0) } }, { { UINT32_MAX, UINT32_C(2957451979) }, { -INT32_C( 33), INT32_C( 1240703795) }, { UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(2136813645), UINT32_C(3441609201) }, { INT32_C( 25), -INT32_C( 20) }, { UINT32_C(2583691264), UINT32_C( 3282) } }, { { UINT32_C( 49107157), UINT32_C( 877904641) }, { -INT32_C( 28), -INT32_C( 24) }, { UINT32_C( 0), UINT32_C( 52) } }, { { UINT32_C( 763522156), UINT32_C(1284773402) }, { -INT32_C( 33618445), INT32_C( 23) }, { UINT32_C( 93203), UINT32_C( 218103808) } }, { { UINT32_C(1501702642), UINT32_C(3052142703) }, { INT32_C( 358068427), -INT32_C( 26) }, { UINT32_C( 0), UINT32_C( 45) } }, { { UINT32_C(4041704701), UINT32_C(3421365897) }, { -INT32_C( 28), INT32_C( 25) }, { UINT32_C( 15), UINT32_C( 301989888) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_uint32x2_t r = simde_vrshl_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); uint32_t a_patch[] = { UINT32_MAX, UINT32_C(1) << (32 - 1), UINT32_MAX }; int32_t b_patch[] = { 32, -32, -32 - 1 }; for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_private a_ = simde_uint32x2_to_private(simde_test_arm_neon_random_u32x2()); simde_int32x2_private b_ = simde_int32x2_to_private(simde_test_arm_neon_random_i32x2()); for (size_t j = 0 ; j < (sizeof(b_.values) / sizeof(b_.values[0])) ; j++) { if (probability(PROBABILITY)) { while (abs(b_.values[j]) > 32) { b_.values[j] >>= 1; } } } if ((size_t) i < (sizeof(a_patch) / sizeof(a_patch[0]))) { a_.values[0] = a_patch[i]; b_.values[0] = b_patch[i]; } simde_uint32x2_t a = simde_uint32x2_from_private(a_); simde_int32x2_t b = simde_int32x2_from_private(b_); simde_uint32x2_t r = simde_vrshl_u32(a, b); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrshl_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { uint64_t a[1]; int64_t b[1]; uint64_t r[1]; } test_vec[] = { { { UINT64_MAX }, { INT64_C( 64) }, { UINT64_C( 0) } }, { { UINT64_C( 9223372036854775808) }, { -INT64_C( 64) }, { UINT64_C( 1) } }, { { UINT64_MAX }, { -INT64_C( 65) }, { UINT64_C( 0) } }, { { UINT64_C(10532064071281456190) }, { -INT64_C( 62) }, { UINT64_C( 2) } }, { { UINT64_C( 199871928094595380) }, { -INT64_C( 34) }, { UINT64_C( 11634077) } }, { { UINT64_C( 4091501470598780608) }, { INT64_C( 61) }, { UINT64_C( 0) } }, { { UINT64_C(17991294013086143349) }, { INT64_C( 53) }, { UINT64_C( 7971371340445777920) } }, { { UINT64_C(11884907251678901604) }, { INT64_C( 38) }, { UINT64_C( 3620147532010618880) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_uint64x1_t r = simde_vrshl_u64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); uint64_t a_patch[] = { UINT64_MAX, UINT64_C(1) << (64 - 1), UINT64_MAX }; int64_t b_patch[] = { 64, -64, -64 - 1 }; for (int i = 0 ; i < 8 ; i++) { simde_uint64x1_private a_ = simde_uint64x1_to_private(simde_test_arm_neon_random_u64x1()); simde_int64x1_private b_ = simde_int64x1_to_private(simde_test_arm_neon_random_i64x1()); for (size_t j = 0 ; j < (sizeof(b_.values) / sizeof(b_.values[0])) ; j++) { if (probability(PROBABILITY)) { while (llabs(b_.values[j]) > 64) { b_.values[j] >>= 1; } } } if ((size_t) i < (sizeof(a_patch) / sizeof(a_patch[0]))) { a_.values[0] = a_patch[i]; b_.values[0] = b_patch[i]; } simde_uint64x1_t a = simde_uint64x1_from_private(a_); simde_int64x1_t b = simde_int64x1_from_private(b_); simde_uint64x1_t r = simde_vrshl_u64(a, b); simde_test_arm_neon_write_u64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrshlq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { int8_t a[16]; int8_t b[16]; int8_t r[16]; } test_vec[] = { { { -INT8_C( 1), INT8_C( 67), -INT8_C( 65), -INT8_C( 26), -INT8_C( 36), -INT8_C( 38), -INT8_C( 38), INT8_C( 6), -INT8_C( 85), INT8_C( 72), INT8_C( 19), -INT8_C( 102), INT8_C( 13), -INT8_C( 105), -INT8_C( 35), -INT8_C( 22) }, { INT8_C( 8), -INT8_C( 127), -INT8_C( 7), INT8_C( 51), INT8_C( 5), -INT8_C( 6), -INT8_C( 6), INT8_C( 6), INT8_C( 79), -INT8_C( 8), INT8_C( 7), -INT8_C( 6), -INT8_C( 7), -INT8_C( 5), INT8_C( 5), INT8_C( 8) }, { INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_MIN, -INT8_C( 1), -INT8_C( 1), INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_MIN, -INT8_C( 2), INT8_C( 0), -INT8_C( 3), -INT8_C( 96), INT8_C( 0) } }, { { -INT8_C( 1), -INT8_C( 99), INT8_C( 23), -INT8_C( 95), -INT8_C( 121), -INT8_C( 57), -INT8_C( 71), -INT8_C( 42), INT8_C( 73), INT8_C( 42), -INT8_C( 63), -INT8_C( 30), INT8_C( 6), -INT8_C( 58), -INT8_C( 22), INT8_C( 37) }, { -INT8_C( 8), -INT8_C( 5), INT8_C( 8), INT8_C( 5), -INT8_C( 6), INT8_C( 8), INT8_C( 115), -INT8_C( 5), INT8_C( 6), INT8_C( 5), -INT8_C( 6), -INT8_C( 28), INT8_C( 6), -INT8_C( 5), -INT8_C( 7), INT8_C( 5) }, { INT8_C( 0), -INT8_C( 3), INT8_C( 0), INT8_C( 32), -INT8_C( 2), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 64), INT8_C( 64), -INT8_C( 1), INT8_C( 0), INT8_MIN, -INT8_C( 2), INT8_C( 0), -INT8_C( 96) } }, { { -INT8_C( 108), INT8_C( 45), INT8_C( 16), INT8_C( 53), INT8_C( 79), -INT8_C( 125), -INT8_C( 25), -INT8_C( 121), -INT8_C( 37), -INT8_C( 73), INT8_C( 107), -INT8_C( 11), -INT8_C( 111), INT8_C( 51), INT8_C( 83), INT8_C( 7) }, { INT8_C( 4), INT8_C( 5), INT8_C( 5), -INT8_C( 5), INT8_C( 4), -INT8_C( 5), -INT8_C( 6), -INT8_C( 22), INT8_C( 6), INT8_C( 7), -INT8_C( 46), -INT8_C( 7), INT8_C( 6), -INT8_C( 34), -INT8_C( 5), -INT8_C( 7) }, { INT8_C( 64), -INT8_C( 96), INT8_C( 0), INT8_C( 2), -INT8_C( 16), -INT8_C( 4), INT8_C( 0), INT8_C( 0), -INT8_C( 64), INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_C( 64), INT8_C( 0), INT8_C( 3), INT8_C( 0) } }, { { -INT8_C( 71), INT8_C( 58), INT8_C( 75), -INT8_C( 62), INT8_C( 19), -INT8_C( 12), -INT8_C( 84), -INT8_C( 127), INT8_C( 110), INT8_C( 126), INT8_C( 75), -INT8_C( 93), INT8_C( 93), -INT8_C( 5), INT8_C( 109), INT8_C( 104) }, { -INT8_C( 5), INT8_C( 6), -INT8_C( 8), -INT8_C( 2), INT8_C( 5), -INT8_C( 6), INT8_C( 7), -INT8_C( 5), -INT8_C( 14), INT8_C( 50), INT8_C( 7), INT8_C( 114), -INT8_C( 7), INT8_C( 5), INT8_C( 5), INT8_C( 5) }, { -INT8_C( 2), INT8_MIN, INT8_C( 0), -INT8_C( 15), INT8_C( 96), INT8_C( 0), INT8_C( 0), -INT8_C( 4), INT8_C( 0), INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_C( 1), INT8_C( 96), -INT8_C( 96), INT8_C( 0) } }, { { INT8_C( 46), -INT8_C( 56), INT8_C( 32), INT8_MAX, INT8_C( 110), INT8_C( 62), INT8_C( 109), INT8_C( 96), INT8_C( 112), -INT8_C( 119), -INT8_C( 45), INT8_C( 9), -INT8_C( 37), -INT8_C( 40), INT8_C( 92), INT8_C( 103) }, { INT8_C( 40), INT8_C( 7), INT8_C( 6), INT8_C( 109), INT8_C( 6), INT8_C( 39), INT8_C( 7), INT8_C( 7), -INT8_C( 109), INT8_C( 7), INT8_C( 6), -INT8_C( 6), INT8_C( 7), INT8_C( 18), INT8_C( 28), INT8_C( 6) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_MIN, -INT8_C( 64), INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_C( 0), -INT8_C( 64) } }, { { -INT8_C( 63), INT8_C( 38), -INT8_C( 22), -INT8_C( 14), INT8_C( 77), INT8_C( 9), INT8_C( 99), -INT8_C( 31), INT8_MAX, INT8_C( 113), -INT8_C( 37), -INT8_C( 73), -INT8_C( 125), -INT8_C( 8), INT8_C( 30), INT8_C( 94) }, { INT8_C( 53), INT8_C( 5), -INT8_C( 6), -INT8_C( 5), INT8_C( 89), INT8_C( 5), -INT8_C( 7), INT8_C( 6), -INT8_C( 7), -INT8_C( 8), -INT8_C( 17), INT8_C( 8), -INT8_C( 32), INT8_C( 7), -INT8_C( 7), -INT8_C( 6) }, { INT8_C( 0), -INT8_C( 64), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 32), INT8_C( 1), INT8_C( 64), INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 1) } }, { { -INT8_C( 20), -INT8_C( 15), INT8_C( 27), INT8_C( 69), INT8_C( 66), -INT8_C( 74), INT8_C( 124), INT8_C( 16), INT8_C( 70), INT8_C( 107), INT8_C( 50), INT8_C( 38), INT8_C( 121), -INT8_C( 47), -INT8_C( 57), -INT8_C( 82) }, { INT8_C( 5), INT8_C( 5), INT8_C( 6), -INT8_C( 5), INT8_C( 5), -INT8_C( 7), -INT8_C( 2), -INT8_C( 5), -INT8_C( 47), -INT8_C( 8), -INT8_C( 6), INT8_C( 8), -INT8_C( 6), -INT8_C( 17), INT8_C( 115), -INT8_C( 101) }, { INT8_MIN, INT8_C( 32), -INT8_C( 64), INT8_C( 2), INT8_C( 64), -INT8_C( 1), INT8_C( 31), INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 1), INT8_C( 0), INT8_C( 2), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { -INT8_C( 45), INT8_C( 31), INT8_C( 124), INT8_C( 36), -INT8_C( 78), INT8_C( 122), -INT8_C( 34), -INT8_C( 125), INT8_C( 65), -INT8_C( 125), -INT8_C( 117), -INT8_C( 16), INT8_C( 114), -INT8_C( 1), -INT8_C( 117), INT8_C( 82) }, { -INT8_C( 8), INT8_C( 6), INT8_C( 117), -INT8_C( 45), -INT8_C( 7), -INT8_C( 6), INT8_C( 5), -INT8_C( 8), INT8_C( 6), INT8_C( 8), -INT8_C( 6), INT8_C( 8), -INT8_C( 8), -INT8_C( 5), -INT8_C( 6), INT8_C( 5) }, { INT8_C( 0), -INT8_C( 64), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 2), -INT8_C( 64), INT8_C( 0), INT8_C( 64), INT8_C( 0), -INT8_C( 2), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 2), INT8_C( 64) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r = simde_vrshlq_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); int8_t a_patch[] = { -INT8_C(1), -INT8_C(1) }; int8_t b_patch[] = { 8, -8 }; for (int i = 0 ; i < 8 ; i++) { simde_int8x16_private a_ = simde_int8x16_to_private(simde_test_arm_neon_random_i8x16()); simde_int8x16_private b_ = simde_int8x16_to_private(simde_test_arm_neon_random_i8x16()); for (size_t j = 0 ; j < (sizeof(b_.values) / sizeof(b_.values[0])) ; j++) { if (probability(PROBABILITY)) { while (abs(b_.values[j]) > 8) { b_.values[j] >>= 1; } } } if ((size_t) i < (sizeof(a_patch) / sizeof(a_patch[0]))) { a_.values[0] = a_patch[i]; b_.values[0] = b_patch[i]; } simde_int8x16_t a = simde_int8x16_from_private(a_); simde_int8x16_t b = simde_int8x16_from_private(b_); simde_int8x16_t r = simde_vrshlq_s8(a, b); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrshlq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { -INT16_C( 1), -INT16_C( 24733), -INT16_C( 31663), -INT16_C( 14535), -INT16_C( 12245), -INT16_C( 20602), -INT16_C( 20992), -INT16_C( 32646) }, { INT16_C( 16), -INT16_C( 5416), INT16_C( 15), -INT16_C( 16), -INT16_C( 13), INT16_C( 13), INT16_C( 15), -INT16_C( 15) }, { INT16_C( 0), INT16_C( 0), INT16_MIN, INT16_C( 0), -INT16_C( 1), -INT16_C( 16384), INT16_C( 0), -INT16_C( 1) } }, { { -INT16_C( 1), INT16_C( 28375), INT16_C( 21093), INT16_C( 11758), -INT16_C( 14819), INT16_C( 23319), -INT16_C( 3519), INT16_C( 5852) }, { -INT16_C( 16), -INT16_C( 14), -INT16_C( 13), -INT16_C( 14), -INT16_C( 3822), INT16_C( 4095), INT16_C( 12), -INT16_C( 12) }, { INT16_C( 0), INT16_C( 2), INT16_C( 3), INT16_C( 1), INT16_C( 0), INT16_C( 11660), INT16_C( 4096), INT16_C( 1) } }, { { -INT16_C( 28383), INT16_C( 25305), -INT16_C( 18813), INT16_C( 5240), -INT16_C( 1004), -INT16_C( 28452), INT16_C( 27030), -INT16_C( 22491) }, { INT16_C( 9), -INT16_C( 11), -INT16_C( 9), INT16_C( 15), INT16_C( 10), INT16_C( 11), INT16_C( 11), -INT16_C( 13) }, { INT16_C( 16896), INT16_C( 12), -INT16_C( 37), INT16_C( 0), INT16_C( 20480), -INT16_C( 8192), -INT16_C( 20480), -INT16_C( 3) } }, { { INT16_C( 2726), INT16_C( 15377), INT16_C( 14195), -INT16_C( 12827), -INT16_C( 25253), -INT16_C( 20297), -INT16_C( 24692), -INT16_C( 29715) }, { INT16_C( 15), -INT16_C( 12), -INT16_C( 14211), INT16_C( 9), -INT16_C( 12), -INT16_C( 10), INT16_C( 11), -INT16_C( 15) }, { INT16_C( 0), INT16_C( 4), INT16_C( 0), -INT16_C( 13824), -INT16_C( 6), -INT16_C( 20), INT16_C( 24576), -INT16_C( 1) } }, { { INT16_C( 23984), INT16_C( 15553), -INT16_C( 20484), -INT16_C( 4153), -INT16_C( 19929), -INT16_C( 23409), -INT16_C( 134), -INT16_C( 1609) }, { -INT16_C( 14), -INT16_C( 5147), INT16_C( 10), -INT16_C( 12), INT16_C( 10), -INT16_C( 31106), INT16_C( 9), -INT16_C( 13) }, { INT16_C( 1), INT16_C( 0), -INT16_C( 4096), -INT16_C( 1), -INT16_C( 25600), INT16_C( 0), -INT16_C( 3072), INT16_C( 0) } }, { { -INT16_C( 637), -INT16_C( 558), -INT16_C( 29956), -INT16_C( 25354), -INT16_C( 9361), -INT16_C( 20601), INT16_C( 25389), -INT16_C( 427) }, { -INT16_C( 11154), -INT16_C( 15), INT16_C( 13), INT16_C( 31321), INT16_C( 25459), INT16_C( 31737), INT16_C( 12), -INT16_C( 18263) }, { INT16_C( 0), INT16_C( 0), INT16_MIN, INT16_C( 0), INT16_C( 0), -INT16_C( 161), -INT16_C( 12288), INT16_C( 0) } }, { { -INT16_C( 32121), -INT16_C( 19164), INT16_C( 31462), INT16_C( 21683), INT16_C( 14158), INT16_C( 18143), INT16_C( 14418), -INT16_C( 14911) }, { -INT16_C( 9), -INT16_C( 12), -INT16_C( 5600), -INT16_C( 16), INT16_C( 15718), INT16_C( 13), -INT16_C( 8215), INT16_C( 14) }, { -INT16_C( 63), -INT16_C( 5), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 8192), INT16_C( 0), INT16_C( 16384) } }, { { INT16_C( 31504), INT16_C( 25108), -INT16_C( 10829), INT16_C( 20263), INT16_C( 26767), -INT16_C( 20705), -INT16_C( 22702), -INT16_C( 18381) }, { INT16_C( 11), -INT16_C( 13), INT16_C( 11), INT16_C( 10), INT16_C( 12), -INT16_C( 15), INT16_C( 14), INT16_C( 20055) }, { INT16_MIN, INT16_C( 3), -INT16_C( 26624), -INT16_C( 25600), -INT16_C( 4096), -INT16_C( 1), INT16_MIN, INT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_vrshlq_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); int16_t a_patch[] = { -INT16_C(1), -INT16_C(1) }; int16_t b_patch[] = { 16, -16 }; for (int i = 0 ; i < 8 ; i++) { simde_int16x8_private a_ = simde_int16x8_to_private(simde_test_arm_neon_random_i16x8()); simde_int16x8_private b_ = simde_int16x8_to_private(simde_test_arm_neon_random_i16x8()); for (size_t j = 0 ; j < (sizeof(b_.values) / sizeof(b_.values[0])) ; j++) { if (probability(PROBABILITY)) { while (abs(b_.values[j]) > 16) { b_.values[j] >>= 1; } } } if ((size_t) i < (sizeof(a_patch) / sizeof(a_patch[0]))) { a_.values[0] = a_patch[i]; b_.values[0] = b_patch[i]; } simde_int16x8_t a = simde_int16x8_from_private(a_); simde_int16x8_t b = simde_int16x8_from_private(b_); simde_int16x8_t r = simde_vrshlq_s16(a, b); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrshlq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { -INT32_C( 1), INT32_C( 261209098), INT32_C( 688353192), -INT32_C( 1311148258) }, { INT32_C( 32), INT32_C( 794913842), INT32_C( 18), -INT32_C( 1422236320) }, { INT32_C( 0), INT32_C( 0), -INT32_C( 1096810496), INT32_C( 0) } }, { { -INT32_C( 1), INT32_C( 796745233), -INT32_C( 924821802), INT32_C( 1238788374) }, { -INT32_C( 32), INT32_C( 1787446282), -INT32_C( 24), INT32_C( 24) }, { INT32_C( 0), -INT32_C( 176667648), -INT32_C( 55), INT32_C( 369098752) } }, { { -INT32_C( 1140379162), -INT32_C( 1652234361), INT32_C( 652630864), -INT32_C( 1654235245) }, { INT32_C( 1292431523), INT32_C( 26), INT32_C( 21), -INT32_C( 23) }, { INT32_C( 0), INT32_C( 469762048), INT32_C( 1778384896), -INT32_C( 197) } }, { { INT32_C( 567415760), INT32_C( 21477742), -INT32_C( 1147163368), INT32_C( 1393141662) }, { INT32_C( 30), -INT32_C( 28), -INT32_C( 20), -INT32_C( 1377438756) }, { INT32_C( 0), INT32_C( 0), -INT32_C( 1094), INT32_C( 0) } }, { { -INT32_C( 2005527184), INT32_C( 1615074754), -INT32_C( 2118955588), INT32_C( 797470603) }, { -INT32_C( 19), INT32_C( 1789273229), INT32_C( 30), -INT32_C( 1282153149) }, { -INT32_C( 3825), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { { -INT32_C( 618823649), INT32_C( 1482477773), INT32_C( 881386816), INT32_C( 204163199) }, { -INT32_C( 2072587923), -INT32_C( 24), INT32_C( 27), INT32_C( 23) }, { INT32_C( 0), INT32_C( 88), INT32_C( 0), INT32_C( 1065353216) } }, { { INT32_C( 1446352406), -INT32_C( 91505285), INT32_C( 1913042437), -INT32_C( 453542521) }, { INT32_C( 31), INT32_C( 29), INT32_C( 29), INT32_C( 27) }, { INT32_C( 0), INT32_C( 1610612736), -INT32_C( 1610612736), INT32_C( 939524096) } }, { { INT32_C( 1269846085), -INT32_C( 1900169466), INT32_C( 1047704628), INT32_C( 498857806) }, { INT32_C( 27), INT32_C( 16), INT32_C( 30), -INT32_C( 25) }, { INT32_C( 671088640), -INT32_C( 1224343552), INT32_C( 0), INT32_C( 15) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_vrshlq_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); int32_t a_patch[] = { -INT32_C(1), -INT32_C(1) }; int32_t b_patch[] = { 32, -32 }; for (int i = 0 ; i < 8 ; i++) { simde_int32x4_private a_ = simde_int32x4_to_private(simde_test_arm_neon_random_i32x4()); simde_int32x4_private b_ = simde_int32x4_to_private(simde_test_arm_neon_random_i32x4()); for (size_t j = 0 ; j < (sizeof(b_.values) / sizeof(b_.values[0])) ; j++) { if (probability(PROBABILITY)) { while (abs(b_.values[j]) > 32) { b_.values[j] >>= 1; } } } if ((size_t) i < (sizeof(a_patch) / sizeof(a_patch[0]))) { a_.values[0] = a_patch[i]; b_.values[0] = b_patch[i]; } simde_int32x4_t a = simde_int32x4_from_private(a_); simde_int32x4_t b = simde_int32x4_from_private(b_); simde_int32x4_t r = simde_vrshlq_s32(a, b); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrshlq_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { int64_t a[2]; int64_t b[2]; int64_t r[2]; } test_vec[] = { { { -INT64_C( 1), INT64_C( 8313170098058778935) }, { INT64_C( 64), -INT64_C( 58) }, { INT64_C( 0), INT64_C( 29) } }, { { -INT64_C( 1), INT64_C( 7179860541365923026) }, { -INT64_C( 64), INT64_C( 5674951681768987017) }, { INT64_C( 0), INT64_C( 0) } }, { { -INT64_C( 5284824899776078983), INT64_C( 2324443633642883256) }, { INT64_C( 59), -INT64_C( 3278735591420720609) }, { -INT64_C( 4035225266123964416), -INT64_C( 5919449151413682176) } }, { { -INT64_C( 2576212740381314098), INT64_C( 6562733711055481552) }, { -INT64_C( 52), INT64_C( 41) }, { -INT64_C( 572), -INT64_C( 2101597729238024192) } }, { { INT64_C( 5416257340365700737), -INT64_C( 4269616889089575813) }, { INT64_C( 3370466030787258188), -INT64_C( 58) }, { INT64_C( 0), -INT64_C( 15) } }, { { INT64_C( 6209211028984577356), -INT64_C( 7547495314914264550) }, { INT64_C( 63), -INT64_C( 60) }, { INT64_C( 0), -INT64_C( 7) } }, { { -INT64_C( 9093408780739309977), INT64_C( 5121628945022307474) }, { -INT64_C( 53), -INT64_C( 37) }, { -INT64_C( 1010), INT64_C( 37264755) } }, { { INT64_C( 1082637037262742893), INT64_C( 6751871869302762015) }, { INT64_C( 5088688905005675581), INT64_C( 45) }, { -INT64_C( 6917529027641081856), -INT64_C( 3655832181890088960) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_int64x2_t r = simde_vrshlq_s64(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); int64_t a_patch[] = { -INT64_C(1), -INT64_C(1) }; int64_t b_patch[] = { 64, -64 }; for (int i = 0 ; i < 8 ; i++) { simde_int64x2_private a_ = simde_int64x2_to_private(simde_test_arm_neon_random_i64x2()); simde_int64x2_private b_ = simde_int64x2_to_private(simde_test_arm_neon_random_i64x2()); for (size_t j = 0 ; j < (sizeof(b_.values) / sizeof(b_.values[0])) ; j++) { if (probability(PROBABILITY)) { while (llabs(b_.values[j]) > 64) { b_.values[j] >>= 1; } } } if ((size_t) i < (sizeof(a_patch) / sizeof(a_patch[0]))) { a_.values[0] = a_patch[i]; b_.values[0] = b_patch[i]; } simde_int64x2_t a = simde_int64x2_from_private(a_); simde_int64x2_t b = simde_int64x2_from_private(b_); simde_int64x2_t r = simde_vrshlq_s64(a, b); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrshlq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { uint8_t a[16]; int8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_MAX, UINT8_C( 26), UINT8_C( 19), UINT8_C(248), UINT8_C(135), UINT8_C( 38), UINT8_C( 72), UINT8_C(139), UINT8_C(117), UINT8_C( 87), UINT8_C(157), UINT8_C(107), UINT8_C(117), UINT8_C( 60), UINT8_C(214), UINT8_C( 38) }, { INT8_C( 8), -INT8_C( 24), -INT8_C( 5), -INT8_C( 5), INT8_C( 32), INT8_C( 48), INT8_C( 95), -INT8_C( 5), INT8_C( 6), INT8_C( 7), INT8_C( 4), -INT8_C( 95), -INT8_C( 53), INT8_C( 7), INT8_C( 6), INT8_C( 5) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 8), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 4), UINT8_C( 64), UINT8_C(128), UINT8_C(208), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(128), UINT8_C(192) } }, { { UINT8_C(128), UINT8_C(251), UINT8_C(187), UINT8_C(102), UINT8_C( 43), UINT8_C( 26), UINT8_C( 66), UINT8_C( 92), UINT8_C(138), UINT8_C( 75), UINT8_C(253), UINT8_C( 86), UINT8_C(135), UINT8_C(105), UINT8_C( 97), UINT8_C(222) }, { -INT8_C( 8), INT8_C( 6), -INT8_C( 5), -INT8_C( 8), -INT8_C( 5), INT8_C( 4), -INT8_C( 6), INT8_C( 84), INT8_C( 41), INT8_C( 6), INT8_C( 6), INT8_C( 6), -INT8_C( 8), -INT8_C( 6), INT8_C( 8), -INT8_C( 6) }, { UINT8_C( 1), UINT8_C(192), UINT8_C( 6), UINT8_C( 0), UINT8_C( 1), UINT8_C(160), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C(192), UINT8_C( 64), UINT8_C(128), UINT8_C( 1), UINT8_C( 2), UINT8_C( 0), UINT8_C( 3) } }, { { UINT8_MAX, UINT8_C(233), UINT8_C( 14), UINT8_C(114), UINT8_C( 13), UINT8_C(185), UINT8_C(198), UINT8_C( 54), UINT8_C(234), UINT8_C( 51), UINT8_C(161), UINT8_C(121), UINT8_C(223), UINT8_C(232), UINT8_C( 78), UINT8_C(135) }, { -INT8_C( 9), -INT8_C( 8), INT8_C( 5), INT8_C( 5), INT8_C( 6), -INT8_C( 8), -INT8_C( 6), -INT8_C( 7), -INT8_C( 5), -INT8_C( 6), INT8_C( 30), INT8_C( 4), INT8_C( 4), INT8_C( 4), -INT8_C( 7), -INT8_C( 59) }, { UINT8_C( 0), UINT8_C( 1), UINT8_C(192), UINT8_C( 64), UINT8_C( 64), UINT8_C( 1), UINT8_C( 3), UINT8_C( 0), UINT8_C( 7), UINT8_C( 1), UINT8_C( 0), UINT8_C(144), UINT8_C(240), UINT8_C(128), UINT8_C( 1), UINT8_C( 0) } }, { { UINT8_C(209), UINT8_C(242), UINT8_C(240), UINT8_C(215), UINT8_C(125), UINT8_C(155), UINT8_C(166), UINT8_C( 52), UINT8_C( 66), UINT8_C(196), UINT8_C(130), UINT8_C( 70), UINT8_C( 15), UINT8_C( 80), UINT8_C( 11), UINT8_C( 68) }, { INT8_C( 44), INT8_C( 8), -INT8_C( 8), -INT8_C( 63), INT8_C( 8), -INT8_C( 3), INT8_C( 8), INT8_C( 7), INT8_C( 23), INT8_C( 57), -INT8_C( 8), INT8_C( 6), -INT8_C( 8), INT8_C( 6), INT8_C( 3), INT8_C( 5) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 19), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C(128), UINT8_C( 0), UINT8_C( 0), UINT8_C( 88), UINT8_C(128) } }, { { UINT8_C(101), UINT8_C(108), UINT8_C(124), UINT8_C(165), UINT8_C(106), UINT8_C(188), UINT8_C( 22), UINT8_C(129), UINT8_C(246), UINT8_C(151), UINT8_C(153), UINT8_C(119), UINT8_C(176), UINT8_C(157), UINT8_C(201), UINT8_C(187) }, { -INT8_C( 7), -INT8_C( 7), INT8_C( 8), INT8_C( 7), -INT8_C( 8), -INT8_C( 1), -INT8_C( 5), INT8_C( 5), INT8_C( 62), INT8_C( 6), -INT8_C( 8), -INT8_C( 7), INT8_C( 5), -INT8_C( 5), -INT8_C( 8), -INT8_C( 8) }, { UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C( 94), UINT8_C( 1), UINT8_C( 32), UINT8_C( 0), UINT8_C(192), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 5), UINT8_C( 1), UINT8_C( 1) } }, { { UINT8_C(233), UINT8_C(142), UINT8_C( 29), UINT8_C(171), UINT8_C(142), UINT8_C( 12), UINT8_C( 1), UINT8_C(204), UINT8_C( 18), UINT8_C(249), UINT8_C(154), UINT8_C( 58), UINT8_C(217), UINT8_C( 34), UINT8_C(199), UINT8_C( 37) }, { INT8_C( 4), -INT8_C( 6), -INT8_C( 5), -INT8_C( 6), INT8_C( 8), INT8_C( 4), -INT8_C( 7), INT8_C( 35), -INT8_C( 30), -INT8_C( 7), -INT8_C( 5), INT8_C( 80), -INT8_C( 8), -INT8_C( 1), INT8_C( 4), -INT8_C( 6) }, { UINT8_C(144), UINT8_C( 2), UINT8_C( 1), UINT8_C( 3), UINT8_C( 0), UINT8_C(192), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 2), UINT8_C( 5), UINT8_C( 0), UINT8_C( 1), UINT8_C( 17), UINT8_C(112), UINT8_C( 1) } }, { { UINT8_C(134), UINT8_C( 44), UINT8_C(180), UINT8_C(201), UINT8_C( 62), UINT8_C( 83), UINT8_C(236), UINT8_C( 33), UINT8_C( 32), UINT8_C(160), UINT8_C(113), UINT8_C(228), UINT8_C(160), UINT8_C(191), UINT8_C(145), UINT8_C( 46) }, { INT8_C( 5), -INT8_C( 6), INT8_C( 74), -INT8_C( 6), INT8_C( 8), INT8_C( 6), INT8_C( 5), -INT8_C( 7), -INT8_C( 5), -INT8_C( 16), -INT8_C( 8), INT8_C( 90), INT8_C( 7), INT8_C( 5), INT8_C( 38), INT8_C( 2) }, { UINT8_C(192), UINT8_C( 1), UINT8_C( 0), UINT8_C( 3), UINT8_C( 0), UINT8_C(192), UINT8_C(128), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(224), UINT8_C( 0), UINT8_C(184) } }, { { UINT8_C(173), UINT8_C(112), UINT8_C(157), UINT8_C(241), UINT8_C(162), UINT8_C(201), UINT8_C(137), UINT8_C( 87), UINT8_C(185), UINT8_C( 78), UINT8_C(177), UINT8_C( 53), UINT8_C(100), UINT8_C(215), UINT8_C( 55), UINT8_C(166) }, { -INT8_C( 5), INT8_C( 2), INT8_C( 4), -INT8_C( 5), -INT8_C( 71), -INT8_C( 7), INT8_C( 5), INT8_C( 17), -INT8_C( 5), INT8_C( 5), INT8_C( 4), -INT8_C( 6), INT8_C( 7), INT8_C( 5), -INT8_C( 6), -INT8_C( 7) }, { UINT8_C( 5), UINT8_C(192), UINT8_C(208), UINT8_C( 8), UINT8_C( 0), UINT8_C( 2), UINT8_C( 32), UINT8_C( 0), UINT8_C( 6), UINT8_C(192), UINT8_C( 16), UINT8_C( 1), UINT8_C( 0), UINT8_C(224), UINT8_C( 1), UINT8_C( 1) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_uint8x16_t r = simde_vrshlq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); uint8_t a_patch[] = { UINT8_MAX, UINT8_C(1) << (8 - 1), UINT8_MAX }; int8_t b_patch[] = { 8, -8, -8 - 1 }; for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_private a_ = simde_uint8x16_to_private(simde_test_arm_neon_random_u8x16()); simde_int8x16_private b_ = simde_int8x16_to_private(simde_test_arm_neon_random_i8x16()); for (size_t j = 0 ; j < (sizeof(b_.values) / sizeof(b_.values[0])) ; j++) { if (probability(PROBABILITY)) { while (abs(b_.values[j]) > 8) { b_.values[j] >>= 1; } } } if ((size_t) i < (sizeof(a_patch) / sizeof(a_patch[0]))) { a_.values[0] = a_patch[i]; b_.values[0] = b_patch[i]; } simde_uint8x16_t a = simde_uint8x16_from_private(a_); simde_int8x16_t b = simde_int8x16_from_private(b_); simde_uint8x16_t r = simde_vrshlq_u8(a, b); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrshlq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { uint16_t a[8]; int16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_MAX, UINT16_C(58570), UINT16_C(15575), UINT16_C( 8935), UINT16_C( 7529), UINT16_C(51329), UINT16_C(30589), UINT16_C(20900) }, { INT16_C( 16), INT16_C( 13), INT16_C( 14), INT16_C( 14), INT16_C( 11), INT16_C( 12), -INT16_C( 6158), INT16_C( 14) }, { UINT16_C( 0), UINT16_C(16384), UINT16_C(49152), UINT16_C(49152), UINT16_C(18432), UINT16_C( 4096), UINT16_C( 2), UINT16_C( 0) } }, { { UINT16_C(32768), UINT16_C(40539), UINT16_C( 126), UINT16_C(43759), UINT16_C(13621), UINT16_C(32734), UINT16_C(33340), UINT16_C(61827) }, { -INT16_C( 16), INT16_C( 14), INT16_C( 31151), INT16_C( 15277), -INT16_C( 14), -INT16_C( 28257), INT16_C( 9), -INT16_C( 4060) }, { UINT16_C( 1), UINT16_C(49152), UINT16_C( 0), UINT16_C( 0), UINT16_C( 1), UINT16_C( 0), UINT16_C(30720), UINT16_C( 0) } }, { { UINT16_MAX, UINT16_C(61493), UINT16_C(47284), UINT16_C(14305), UINT16_C(14208), UINT16_C(12203), UINT16_C(22704), UINT16_C( 5995) }, { -INT16_C( 17), -INT16_C( 15), -INT16_C( 13), INT16_C( 11), INT16_C( 13), -INT16_C( 13308), INT16_C( 22765), -INT16_C( 24190) }, { UINT16_C( 0), UINT16_C( 2), UINT16_C( 6), UINT16_C( 2048), UINT16_C( 0), UINT16_C(64176), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C( 8617), UINT16_C(22814), UINT16_C(35193), UINT16_C(40048), UINT16_C( 6291), UINT16_C(49805), UINT16_C(28389), UINT16_C(12572) }, { INT16_C( 16), -INT16_C( 14), -INT16_C( 16), INT16_C( 9), -INT16_C( 12), -INT16_C( 12), -INT16_C( 10), INT16_C( 12) }, { UINT16_C( 0), UINT16_C( 1), UINT16_C( 1), UINT16_C(57344), UINT16_C( 2), UINT16_C( 12), UINT16_C( 28), UINT16_C(49152) } }, { { UINT16_C(31227), UINT16_C(57490), UINT16_C(44775), UINT16_C(50450), UINT16_C( 4302), UINT16_C(18064), UINT16_C(64656), UINT16_C(51016) }, { -INT16_C( 15), INT16_C( 13), INT16_C( 8), INT16_C( 11), -INT16_C( 12), -INT16_C( 14), INT16_C( 10), -INT16_C( 12) }, { UINT16_C( 1), UINT16_C(16384), UINT16_C(59136), UINT16_C(36864), UINT16_C( 1), UINT16_C( 1), UINT16_C(16384), UINT16_C( 12) } }, { { UINT16_C( 2517), UINT16_C(25881), UINT16_C(24837), UINT16_C(65068), UINT16_C(39402), UINT16_C(10598), UINT16_C(59547), UINT16_C(20543) }, { -INT16_C( 22820), -INT16_C( 10), INT16_C( 13), -INT16_C( 15), INT16_C( 14), INT16_C( 11), -INT16_C( 3331), -INT16_C( 11726) }, { UINT16_C( 0), UINT16_C( 25), UINT16_C(40960), UINT16_C( 2), UINT16_C(32768), UINT16_C(12288), UINT16_C( 7443), UINT16_C( 0) } }, { { UINT16_C(26108), UINT16_C(38848), UINT16_C( 77), UINT16_C(10728), UINT16_C(11174), UINT16_C(40924), UINT16_C(24881), UINT16_C(35940) }, { -INT16_C( 9), -INT16_C( 26644), INT16_C( 15), -INT16_C( 13), -INT16_C( 24215), INT16_C( 10), -INT16_C( 14), INT16_C( 428) }, { UINT16_C( 51), UINT16_C( 0), UINT16_C(32768), UINT16_C( 1), UINT16_C( 0), UINT16_C(28672), UINT16_C( 2), UINT16_C( 0) } }, { { UINT16_C(33452), UINT16_C(56754), UINT16_C( 5859), UINT16_C(32105), UINT16_C(22004), UINT16_C(50453), UINT16_C(32627), UINT16_C(56465) }, { INT16_C( 11), INT16_C( 9), -INT16_C( 13), INT16_C( 11), -INT16_C( 16), INT16_C( 15), INT16_C( 15), -INT16_C( 10) }, { UINT16_C(24576), UINT16_C(25600), UINT16_C( 1), UINT16_C(18432), UINT16_C( 0), UINT16_C(32768), UINT16_C(32768), UINT16_C( 55) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_uint16x8_t r = simde_vrshlq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); uint16_t a_patch[] = { UINT16_MAX, UINT16_C(1) << (16 - 1), UINT16_MAX }; int16_t b_patch[] = { 16, -16, -16 - 1 }; for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_private a_ = simde_uint16x8_to_private(simde_test_arm_neon_random_u16x8()); simde_int16x8_private b_ = simde_int16x8_to_private(simde_test_arm_neon_random_i16x8()); for (size_t j = 0 ; j < (sizeof(b_.values) / sizeof(b_.values[0])) ; j++) { if (probability(PROBABILITY)) { while (abs(b_.values[j]) > 16) { b_.values[j] >>= 1; } } } if ((size_t) i < (sizeof(a_patch) / sizeof(a_patch[0]))) { a_.values[0] = a_patch[i]; b_.values[0] = b_patch[i]; } simde_uint16x8_t a = simde_uint16x8_from_private(a_); simde_int16x8_t b = simde_int16x8_from_private(b_); simde_uint16x8_t r = simde_vrshlq_u16(a, b); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrshlq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { uint32_t a[4]; int32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_MAX, UINT32_C(3316255323), UINT32_C( 187098578), UINT32_C(3195457013) }, { INT32_C( 32), -INT32_C( 29), -INT32_C( 22), INT32_C( 27) }, { UINT32_C( 0), UINT32_C( 6), UINT32_C( 45), UINT32_C(2818572288) } }, { { UINT32_C(2147483648), UINT32_C(2776245168), UINT32_C( 425980144), UINT32_C(1793817973) }, { -INT32_C( 32), -INT32_C( 894871393), INT32_C( 19), INT32_C( 20) }, { UINT32_C( 1), UINT32_C( 0), UINT32_C(2273312768), UINT32_C(3612344320) } }, { { UINT32_MAX, UINT32_C(2662521640), UINT32_C(2131271000), UINT32_C( 612199301) }, { -INT32_C( 33), -INT32_C( 18), -INT32_C( 24), INT32_C( 28) }, { UINT32_C( 0), UINT32_C( 10157), UINT32_C( 127), UINT32_C(1342177280) } }, { { UINT32_C( 503939782), UINT32_C(2778534176), UINT32_C(1086921341), UINT32_C(2306259009) }, { INT32_C( 23), -INT32_C( 1778331642), INT32_C( 28), INT32_C( 19) }, { UINT32_C(1660944384), UINT32_C(1732528128), UINT32_C(3489660928), UINT32_C(3255304192) } }, { { UINT32_C( 535224994), UINT32_C(1029681148), UINT32_C(1204213096), UINT32_C(3601222608) }, { -INT32_C( 22), INT32_C( 30), -INT32_C( 26), -INT32_C( 27) }, { UINT32_C( 128), UINT32_C( 0), UINT32_C( 18), UINT32_C( 27) } }, { { UINT32_C( 939543248), UINT32_C(4018128415), UINT32_C( 751117585), UINT32_C(3844157899) }, { INT32_C( 17), -INT32_C( 22), -INT32_C( 22), INT32_C( 1751886744) }, { UINT32_C(2510290944), UINT32_C( 958), UINT32_C( 179), UINT32_C( 0) } }, { { UINT32_C(1124343857), UINT32_C( 292538949), UINT32_C(1744212475), UINT32_C(2427147716) }, { INT32_C( 27), -INT32_C( 25), -INT32_C( 25), -INT32_C( 419691851) }, { UINT32_C(2281701376), UINT32_C( 9), UINT32_C( 52), UINT32_C( 0) } }, { { UINT32_C(3329792459), UINT32_C(3995954986), UINT32_C( 410966212), UINT32_C( 558826433) }, { INT32_C( 30), INT32_C( 22), -INT32_C( 24), -INT32_C( 335069919) }, { UINT32_C(3221225472), UINT32_C(3397386240), UINT32_C( 24), UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_uint32x4_t r = simde_vrshlq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); uint32_t a_patch[] = { UINT32_MAX, UINT32_C(1) << (32 - 1), UINT32_MAX }; int32_t b_patch[] = { 32, -32, -32 - 1 }; for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_private a_ = simde_uint32x4_to_private(simde_test_arm_neon_random_u32x4()); simde_int32x4_private b_ = simde_int32x4_to_private(simde_test_arm_neon_random_i32x4()); for (size_t j = 0 ; j < (sizeof(b_.values) / sizeof(b_.values[0])) ; j++) { if (probability(PROBABILITY)) { while (abs(b_.values[j]) > 32) { b_.values[j] >>= 1; } } } if ((size_t) i < (sizeof(a_patch) / sizeof(a_patch[0]))) { a_.values[0] = a_patch[i]; b_.values[0] = b_patch[i]; } simde_uint32x4_t a = simde_uint32x4_from_private(a_); simde_int32x4_t b = simde_int32x4_from_private(b_); simde_uint32x4_t r = simde_vrshlq_u32(a, b); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrshlq_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { uint64_t a[2]; int64_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_MAX, UINT64_C(16294151362500260038) }, { INT64_C( 64), INT64_C( 58) }, { UINT64_C( 0), UINT64_C( 1729382256910270464) } }, { { UINT64_C( 9223372036854775808), UINT64_C(16863135901910622191) }, { -INT64_C( 64), -INT64_C( 47) }, { UINT64_C( 1), UINT64_C( 119820) } }, { { UINT64_MAX, UINT64_C( 2005102394293229065) }, { -INT64_C( 65), -INT64_C( 47) }, { UINT64_C( 0), UINT64_C( 14247) } }, { { UINT64_C( 1572984371782268265), UINT64_C(10542706757663446378) }, { -INT64_C( 47), -INT64_C( 59) }, { UINT64_C( 11177), UINT64_C( 18) } }, { { UINT64_C( 9387779671739714207), UINT64_C(14053309784719439957) }, { INT64_C( 44), -INT64_C( 47) }, { UINT64_C(15900503842198913024), UINT64_C( 99855) } }, { { UINT64_C( 3933560557134412309), UINT64_C(10367074293171904885) }, { -INT64_C( 54), -INT64_C( 2073254729651704801) }, { UINT64_C( 218), UINT64_C( 4489016506151075840) } }, { { UINT64_C(10824960247039706945), UINT64_C( 4497186091603156343) }, { -INT64_C( 33), -INT64_C( 60) }, { UINT64_C( 1260191231), UINT64_C( 4) } }, { { UINT64_C(13544247016243875936), UINT64_C( 1686370601464357418) }, { INT64_C( 5733672828602753178), -INT64_C( 64) }, { UINT64_C( 0), UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_uint64x2_t r = simde_vrshlq_u64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); uint64_t a_patch[] = { UINT64_MAX, UINT64_C(1) << (64 - 1), UINT64_MAX }; int64_t b_patch[] = { 64, -64, -64 - 1 }; for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_private a_ = simde_uint64x2_to_private(simde_test_arm_neon_random_u64x2()); simde_int64x2_private b_ = simde_int64x2_to_private(simde_test_arm_neon_random_i64x2()); for (size_t j = 0 ; j < (sizeof(b_.values) / sizeof(b_.values[0])) ; j++) { if (probability(PROBABILITY)) { while (llabs(b_.values[j]) > 64) { b_.values[j] >>= 1; } } } if ((size_t) i < (sizeof(a_patch) / sizeof(a_patch[0]))) { a_.values[0] = a_patch[i]; b_.values[0] = b_patch[i]; } simde_uint64x2_t a = simde_uint64x2_from_private(a_); simde_int64x2_t b = simde_int64x2_from_private(b_); simde_uint64x2_t r = simde_vrshlq_u64(a, b); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vrshl_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vrshl_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vrshl_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vrshl_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vrshl_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vrshl_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vrshl_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vrshl_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vrshlq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vrshlq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vrshlq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vrshlq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vrshlq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vrshlq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vrshlq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vrshlq_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/rshr_n.c000066400000000000000000003422011400333146700167600ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN rshr_n #include "test-neon.h" /* Check that both of these work */ #if defined(__cplusplus) #include "../../../simde/arm/neon/rshr_n.h" #else #include "../../../simde/arm/neon.h" #endif static int test_simde_vrshr_n_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t r1[8]; int8_t r3[8]; int8_t r5[8]; int8_t r6[8]; int8_t r8[8]; } test_vec[] = { { { -INT8_C( 87), INT8_C( 13), INT8_C( 107), -INT8_C( 109), -INT8_C( 49), -INT8_C( 33), -INT8_C( 55), -INT8_C( 61) }, { -INT8_C( 43), INT8_C( 7), INT8_C( 54), -INT8_C( 54), -INT8_C( 24), -INT8_C( 16), -INT8_C( 27), -INT8_C( 30) }, { -INT8_C( 11), INT8_C( 2), INT8_C( 13), -INT8_C( 14), -INT8_C( 6), -INT8_C( 4), -INT8_C( 7), -INT8_C( 8) }, { -INT8_C( 3), INT8_C( 0), INT8_C( 3), -INT8_C( 3), -INT8_C( 2), -INT8_C( 1), -INT8_C( 2), -INT8_C( 2) }, { -INT8_C( 1), INT8_C( 0), INT8_C( 2), -INT8_C( 2), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 98), -INT8_C( 18), -INT8_C( 28), INT8_C( 54), -INT8_C( 125), INT8_C( 113), INT8_C( 76), -INT8_C( 98) }, { INT8_C( 49), -INT8_C( 9), -INT8_C( 14), INT8_C( 27), -INT8_C( 62), INT8_C( 57), INT8_C( 38), -INT8_C( 49) }, { INT8_C( 12), -INT8_C( 2), -INT8_C( 3), INT8_C( 7), -INT8_C( 16), INT8_C( 14), INT8_C( 10), -INT8_C( 12) }, { INT8_C( 3), -INT8_C( 1), -INT8_C( 1), INT8_C( 2), -INT8_C( 4), INT8_C( 4), INT8_C( 2), -INT8_C( 3) }, { INT8_C( 2), INT8_C( 0), INT8_C( 0), INT8_C( 1), -INT8_C( 2), INT8_C( 2), INT8_C( 1), -INT8_C( 2) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 38), INT8_C( 9), -INT8_C( 38), INT8_C( 29), INT8_C( 25), -INT8_C( 16), -INT8_C( 92), -INT8_C( 67) }, { INT8_C( 19), INT8_C( 5), -INT8_C( 19), INT8_C( 15), INT8_C( 13), -INT8_C( 8), -INT8_C( 46), -INT8_C( 33) }, { INT8_C( 5), INT8_C( 1), -INT8_C( 5), INT8_C( 4), INT8_C( 3), -INT8_C( 2), -INT8_C( 11), -INT8_C( 8) }, { INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 1), INT8_C( 1), INT8_C( 0), -INT8_C( 3), -INT8_C( 2) }, { INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { -INT8_C( 90), -INT8_C( 49), INT8_C( 85), -INT8_C( 6), -INT8_C( 126), -INT8_C( 96), INT8_C( 107), INT8_C( 44) }, { -INT8_C( 45), -INT8_C( 24), INT8_C( 43), -INT8_C( 3), -INT8_C( 63), -INT8_C( 48), INT8_C( 54), INT8_C( 22) }, { -INT8_C( 11), -INT8_C( 6), INT8_C( 11), -INT8_C( 1), -INT8_C( 16), -INT8_C( 12), INT8_C( 13), INT8_C( 6) }, { -INT8_C( 3), -INT8_C( 2), INT8_C( 3), INT8_C( 0), -INT8_C( 4), -INT8_C( 3), INT8_C( 3), INT8_C( 1) }, { -INT8_C( 1), -INT8_C( 1), INT8_C( 1), INT8_C( 0), -INT8_C( 2), -INT8_C( 1), INT8_C( 2), INT8_C( 1) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { -INT8_C( 83), -INT8_C( 41), -INT8_C( 65), INT8_C( 125), -INT8_C( 74), -INT8_C( 120), INT8_C( 64), INT8_C( 24) }, { -INT8_C( 41), -INT8_C( 20), -INT8_C( 32), INT8_C( 63), -INT8_C( 37), -INT8_C( 60), INT8_C( 32), INT8_C( 12) }, { -INT8_C( 10), -INT8_C( 5), -INT8_C( 8), INT8_C( 16), -INT8_C( 9), -INT8_C( 15), INT8_C( 8), INT8_C( 3) }, { -INT8_C( 3), -INT8_C( 1), -INT8_C( 2), INT8_C( 4), -INT8_C( 2), -INT8_C( 4), INT8_C( 2), INT8_C( 1) }, { -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 2), -INT8_C( 1), -INT8_C( 2), INT8_C( 1), INT8_C( 0) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 118), INT8_C( 37), INT8_C( 78), -INT8_C( 7), -INT8_C( 106), -INT8_C( 101), -INT8_C( 105), -INT8_C( 68) }, { INT8_C( 59), INT8_C( 19), INT8_C( 39), -INT8_C( 3), -INT8_C( 53), -INT8_C( 50), -INT8_C( 52), -INT8_C( 34) }, { INT8_C( 15), INT8_C( 5), INT8_C( 10), -INT8_C( 1), -INT8_C( 13), -INT8_C( 13), -INT8_C( 13), -INT8_C( 8) }, { INT8_C( 4), INT8_C( 1), INT8_C( 2), INT8_C( 0), -INT8_C( 3), -INT8_C( 3), -INT8_C( 3), -INT8_C( 2) }, { INT8_C( 2), INT8_C( 1), INT8_C( 1), INT8_C( 0), -INT8_C( 2), -INT8_C( 2), -INT8_C( 2), -INT8_C( 1) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { -INT8_C( 92), INT8_C( 113), -INT8_C( 39), -INT8_C( 66), INT8_C( 97), INT8_C( 125), INT8_C( 123), INT8_C( 7) }, { -INT8_C( 46), INT8_C( 57), -INT8_C( 19), -INT8_C( 33), INT8_C( 49), INT8_C( 63), INT8_C( 62), INT8_C( 4) }, { -INT8_C( 11), INT8_C( 14), -INT8_C( 5), -INT8_C( 8), INT8_C( 12), INT8_C( 16), INT8_C( 15), INT8_C( 1) }, { -INT8_C( 3), INT8_C( 4), -INT8_C( 1), -INT8_C( 2), INT8_C( 3), INT8_C( 4), INT8_C( 4), INT8_C( 0) }, { -INT8_C( 1), INT8_C( 2), -INT8_C( 1), -INT8_C( 1), INT8_C( 2), INT8_C( 2), INT8_C( 2), INT8_C( 0) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 76), -INT8_C( 48), INT8_C( 1), -INT8_C( 49), INT8_C( 112), INT8_C( 109), -INT8_C( 5), INT8_C( 30) }, { INT8_C( 38), -INT8_C( 24), INT8_C( 1), -INT8_C( 24), INT8_C( 56), INT8_C( 55), -INT8_C( 2), INT8_C( 15) }, { INT8_C( 10), -INT8_C( 6), INT8_C( 0), -INT8_C( 6), INT8_C( 14), INT8_C( 14), -INT8_C( 1), INT8_C( 4) }, { INT8_C( 2), -INT8_C( 1), INT8_C( 0), -INT8_C( 2), INT8_C( 4), INT8_C( 3), INT8_C( 0), INT8_C( 1) }, { INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 2), INT8_C( 2), INT8_C( 0), INT8_C( 0) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t r1 = simde_vrshr_n_s8(a, 1); simde_int8x8_t r3 = simde_vrshr_n_s8(a, 3); simde_int8x8_t r5 = simde_vrshr_n_s8(a, 5); simde_int8x8_t r6 = simde_vrshr_n_s8(a, 6); simde_int8x8_t r8 = simde_vrshr_n_s8(a, 8); simde_test_arm_neon_assert_equal_i8x8(r1, simde_vld1_s8(test_vec[i].r1)); simde_test_arm_neon_assert_equal_i8x8(r3, simde_vld1_s8(test_vec[i].r3)); simde_test_arm_neon_assert_equal_i8x8(r5, simde_vld1_s8(test_vec[i].r5)); simde_test_arm_neon_assert_equal_i8x8(r6, simde_vld1_s8(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i8x8(r8, simde_vld1_s8(test_vec[i].r8)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8_t r1 = simde_vrshr_n_s8(a, 1); simde_int8x8_t r3 = simde_vrshr_n_s8(a, 3); simde_int8x8_t r5 = simde_vrshr_n_s8(a, 5); simde_int8x8_t r6 = simde_vrshr_n_s8(a, 6); simde_int8x8_t r8 = simde_vrshr_n_s8(a, 8); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, r1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r5, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r8, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrshr_n_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t r3[4]; int16_t r6[4]; int16_t r10[4]; int16_t r13[4]; int16_t r16[4]; } test_vec[] = { { { INT16_C( 31120), -INT16_C( 21578), INT16_C( 20732), INT16_C( 19303) }, { INT16_C( 3890), -INT16_C( 2697), INT16_C( 2592), INT16_C( 2413) }, { INT16_C( 486), -INT16_C( 337), INT16_C( 324), INT16_C( 302) }, { INT16_C( 30), -INT16_C( 21), INT16_C( 20), INT16_C( 19) }, { INT16_C( 4), -INT16_C( 3), INT16_C( 3), INT16_C( 2) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 31545), -INT16_C( 8212), -INT16_C( 519), INT16_C( 10785) }, { INT16_C( 3943), -INT16_C( 1026), -INT16_C( 65), INT16_C( 1348) }, { INT16_C( 493), -INT16_C( 128), -INT16_C( 8), INT16_C( 169) }, { INT16_C( 31), -INT16_C( 8), -INT16_C( 1), INT16_C( 11) }, { INT16_C( 4), -INT16_C( 1), INT16_C( 0), INT16_C( 1) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 26531), INT16_C( 29321), INT16_C( 12749), INT16_C( 5762) }, { INT16_C( 3316), INT16_C( 3665), INT16_C( 1594), INT16_C( 720) }, { INT16_C( 415), INT16_C( 458), INT16_C( 199), INT16_C( 90) }, { INT16_C( 26), INT16_C( 29), INT16_C( 12), INT16_C( 6) }, { INT16_C( 3), INT16_C( 4), INT16_C( 2), INT16_C( 1) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 25739), INT16_C( 24284), -INT16_C( 31873), INT16_C( 3935) }, { -INT16_C( 3217), INT16_C( 3036), -INT16_C( 3984), INT16_C( 492) }, { -INT16_C( 402), INT16_C( 379), -INT16_C( 498), INT16_C( 61) }, { -INT16_C( 25), INT16_C( 24), -INT16_C( 31), INT16_C( 4) }, { -INT16_C( 3), INT16_C( 3), -INT16_C( 4), INT16_C( 0) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 5629), -INT16_C( 1605), INT16_C( 8806), -INT16_C( 24764) }, { INT16_C( 704), -INT16_C( 201), INT16_C( 1101), -INT16_C( 3095) }, { INT16_C( 88), -INT16_C( 25), INT16_C( 138), -INT16_C( 387) }, { INT16_C( 5), -INT16_C( 2), INT16_C( 9), -INT16_C( 24) }, { INT16_C( 1), INT16_C( 0), INT16_C( 1), -INT16_C( 3) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 12701), -INT16_C( 26753), -INT16_C( 24530), -INT16_C( 11839) }, { INT16_C( 1588), -INT16_C( 3344), -INT16_C( 3066), -INT16_C( 1480) }, { INT16_C( 198), -INT16_C( 418), -INT16_C( 383), -INT16_C( 185) }, { INT16_C( 12), -INT16_C( 26), -INT16_C( 24), -INT16_C( 12) }, { INT16_C( 2), -INT16_C( 3), -INT16_C( 3), -INT16_C( 1) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 18952), -INT16_C( 10940), -INT16_C( 14725), -INT16_C( 3861) }, { INT16_C( 2369), -INT16_C( 1367), -INT16_C( 1841), -INT16_C( 483) }, { INT16_C( 296), -INT16_C( 171), -INT16_C( 230), -INT16_C( 60) }, { INT16_C( 19), -INT16_C( 11), -INT16_C( 14), -INT16_C( 4) }, { INT16_C( 2), -INT16_C( 1), -INT16_C( 2), INT16_C( 0) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 14495), -INT16_C( 7858), -INT16_C( 20917), INT16_C( 18672) }, { -INT16_C( 1812), -INT16_C( 982), -INT16_C( 2615), INT16_C( 2334) }, { -INT16_C( 226), -INT16_C( 123), -INT16_C( 327), INT16_C( 292) }, { -INT16_C( 14), -INT16_C( 8), -INT16_C( 20), INT16_C( 18) }, { -INT16_C( 2), -INT16_C( 1), -INT16_C( 3), INT16_C( 2) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t r3 = simde_vrshr_n_s16(a, 3); simde_int16x4_t r6 = simde_vrshr_n_s16(a, 6); simde_int16x4_t r10 = simde_vrshr_n_s16(a, 10); simde_int16x4_t r13 = simde_vrshr_n_s16(a, 13); simde_int16x4_t r16 = simde_vrshr_n_s16(a, 16); simde_test_arm_neon_assert_equal_i16x4(r3, simde_vld1_s16(test_vec[i].r3)); simde_test_arm_neon_assert_equal_i16x4(r6, simde_vld1_s16(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i16x4(r10, simde_vld1_s16(test_vec[i].r10)); simde_test_arm_neon_assert_equal_i16x4(r13, simde_vld1_s16(test_vec[i].r13)); simde_test_arm_neon_assert_equal_i16x4(r16, simde_vld1_s16(test_vec[i].r16)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); simde_int16x4_t r3 = simde_vrshr_n_s16(a, 3); simde_int16x4_t r6 = simde_vrshr_n_s16(a, 6); simde_int16x4_t r10 = simde_vrshr_n_s16(a, 10); simde_int16x4_t r13 = simde_vrshr_n_s16(a, 13); simde_int16x4_t r16 = simde_vrshr_n_s16(a, 16); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, r16, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrshr_n_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t r6[2]; int32_t r13[2]; int32_t r19[2]; int32_t r26[2]; int32_t r32[2]; } test_vec[] = { { { -INT32_C( 1255497128), INT32_C( 1491892639) }, { -INT32_C( 19617143), INT32_C( 23310822) }, { -INT32_C( 153259), INT32_C( 182116) }, { -INT32_C( 2395), INT32_C( 2846) }, { -INT32_C( 19), INT32_C( 22) }, { INT32_C( 0), INT32_C( 0) } }, { { -INT32_C( 604469756), INT32_C( 1849346692) }, { -INT32_C( 9444840), INT32_C( 28896042) }, { -INT32_C( 73788), INT32_C( 225750) }, { -INT32_C( 1153), INT32_C( 3527) }, { -INT32_C( 9), INT32_C( 28) }, { INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 431697936), INT32_C( 378319482) }, { INT32_C( 6745280), INT32_C( 5911242) }, { INT32_C( 52698), INT32_C( 46182) }, { INT32_C( 823), INT32_C( 722) }, { INT32_C( 6), INT32_C( 6) }, { INT32_C( 0), INT32_C( 0) } }, { { -INT32_C( 2003865358), INT32_C( 1929771291) }, { -INT32_C( 31310396), INT32_C( 30152676) }, { -INT32_C( 244612), INT32_C( 235568) }, { -INT32_C( 3822), INT32_C( 3681) }, { -INT32_C( 30), INT32_C( 29) }, { INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 992489627), -INT32_C( 1382869847) }, { INT32_C( 15507650), -INT32_C( 21607341) }, { INT32_C( 121154), -INT32_C( 168807) }, { INT32_C( 1893), -INT32_C( 2638) }, { INT32_C( 15), -INT32_C( 21) }, { INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 529042587), INT32_C( 1720566614) }, { INT32_C( 8266290), INT32_C( 26883853) }, { INT32_C( 64580), INT32_C( 210030) }, { INT32_C( 1009), INT32_C( 3282) }, { INT32_C( 8), INT32_C( 26) }, { INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 1837058291), -INT32_C( 326956038) }, { INT32_C( 28704036), -INT32_C( 5108688) }, { INT32_C( 224250), -INT32_C( 39912) }, { INT32_C( 3504), -INT32_C( 624) }, { INT32_C( 27), -INT32_C( 5) }, { INT32_C( 0), INT32_C( 0) } }, { { -INT32_C( 1753935236), -INT32_C( 1509262837) }, { -INT32_C( 27405238), -INT32_C( 23582232) }, { -INT32_C( 214103), -INT32_C( 184236) }, { -INT32_C( 3345), -INT32_C( 2879) }, { -INT32_C( 26), -INT32_C( 22) }, { INT32_C( 0), INT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t r6 = simde_vrshr_n_s32(a, 6); simde_int32x2_t r13 = simde_vrshr_n_s32(a, 13); simde_int32x2_t r19 = simde_vrshr_n_s32(a, 19); simde_int32x2_t r26 = simde_vrshr_n_s32(a, 26); simde_int32x2_t r32 = simde_vrshr_n_s32(a, 32); simde_test_arm_neon_assert_equal_i32x2(r6, simde_vld1_s32(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i32x2(r13, simde_vld1_s32(test_vec[i].r13)); simde_test_arm_neon_assert_equal_i32x2(r19, simde_vld1_s32(test_vec[i].r19)); simde_test_arm_neon_assert_equal_i32x2(r26, simde_vld1_s32(test_vec[i].r26)); simde_test_arm_neon_assert_equal_i32x2(r32, simde_vld1_s32(test_vec[i].r32)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_int32x2_t r6 = simde_vrshr_n_s32(a, 6); simde_int32x2_t r13 = simde_vrshr_n_s32(a, 13); simde_int32x2_t r19 = simde_vrshr_n_s32(a, 19); simde_int32x2_t r26 = simde_vrshr_n_s32(a, 26); simde_int32x2_t r32 = simde_vrshr_n_s32(a, 32); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r19, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r26, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r32, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrshr_n_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[1]; int64_t r13[1]; int64_t r26[1]; int64_t r39[1]; int64_t r52[1]; int64_t r64[1]; } test_vec[] = { { { INT64_C( 5893811813544875365) }, { INT64_C( 719459449895615) }, { INT64_C( 87824639880) }, { INT64_C( 10720781) }, { INT64_C( 1309) }, { INT64_C( 0) } }, { { INT64_C( 5612455711902941949) }, { INT64_C( 685114222644402) }, { INT64_C( 83632107256) }, { INT64_C( 10208997) }, { INT64_C( 1246) }, { INT64_C( 0) } }, { { INT64_C( 301388394613128552) }, { INT64_C( 36790575514298) }, { INT64_C( 4491037050) }, { INT64_C( 548222) }, { INT64_C( 67) }, { INT64_C( 0) } }, { { INT64_C( 8719384580950964818) }, { INT64_C( 1064378000604366) }, { INT64_C( 129928955152) }, { INT64_C( 15860468) }, { INT64_C( 1936) }, { INT64_C( 0) } }, { { INT64_C( 6647140838681368655) }, { INT64_C( 811418559409347) }, { INT64_C( 99050117115) }, { INT64_C( 12091079) }, { INT64_C( 1476) }, { INT64_C( 0) } }, { { -INT64_C( 5787933355660094043) }, { -INT64_C( 706534833454601) }, { -INT64_C( 86246927912) }, { -INT64_C( 10528189) }, { -INT64_C( 1285) }, { INT64_C( 0) } }, { { INT64_C( 1623766213007508575) }, { INT64_C( 198213649048768) }, { INT64_C( 24196002081) }, { INT64_C( 2953614) }, { INT64_C( 361) }, { INT64_C( 0) } }, { { -INT64_C( 1509172499385605759) }, { -INT64_C( 184225158616407) }, { -INT64_C( 22488422683) }, { -INT64_C( 2745169) }, { -INT64_C( 335) }, { INT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t r13 = simde_vrshr_n_s64(a, 13); simde_int64x1_t r26 = simde_vrshr_n_s64(a, 26); simde_int64x1_t r39 = simde_vrshr_n_s64(a, 39); simde_int64x1_t r52 = simde_vrshr_n_s64(a, 52); simde_int64x1_t r64 = simde_vrshr_n_s64(a, 64); simde_test_arm_neon_assert_equal_i64x1(r13, simde_vld1_s64(test_vec[i].r13)); simde_test_arm_neon_assert_equal_i64x1(r26, simde_vld1_s64(test_vec[i].r26)); simde_test_arm_neon_assert_equal_i64x1(r39, simde_vld1_s64(test_vec[i].r39)); simde_test_arm_neon_assert_equal_i64x1(r52, simde_vld1_s64(test_vec[i].r52)); simde_test_arm_neon_assert_equal_i64x1(r64, simde_vld1_s64(test_vec[i].r64)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_t a = simde_test_arm_neon_random_i64x1(); simde_int64x1_t r13 = simde_vrshr_n_s64(a, 13); simde_int64x1_t r26 = simde_vrshr_n_s64(a, 26); simde_int64x1_t r39 = simde_vrshr_n_s64(a, 39); simde_int64x1_t r52 = simde_vrshr_n_s64(a, 52); simde_int64x1_t r64 = simde_vrshr_n_s64(a, 64); simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x1(2, r26, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x1(2, r39, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x1(2, r52, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x1(2, r64, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrshr_n_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint8_t r1[8]; uint8_t r3[8]; uint8_t r5[8]; uint8_t r6[8]; uint8_t r8[8]; } test_vec[] = { { { UINT8_C(188), UINT8_C(100), UINT8_C(233), UINT8_C(106), UINT8_C( 58), UINT8_C( 89), UINT8_C( 6), UINT8_C(109) }, { UINT8_C( 94), UINT8_C( 50), UINT8_C(117), UINT8_C( 53), UINT8_C( 29), UINT8_C( 45), UINT8_C( 3), UINT8_C( 55) }, { UINT8_C( 24), UINT8_C( 13), UINT8_C( 29), UINT8_C( 13), UINT8_C( 7), UINT8_C( 11), UINT8_C( 1), UINT8_C( 14) }, { UINT8_C( 6), UINT8_C( 3), UINT8_C( 7), UINT8_C( 3), UINT8_C( 2), UINT8_C( 3), UINT8_C( 0), UINT8_C( 3) }, { UINT8_C( 3), UINT8_C( 2), UINT8_C( 4), UINT8_C( 2), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 2) }, { UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(186), UINT8_C( 87), UINT8_C(173), UINT8_C(246), UINT8_C( 35), UINT8_C(150), UINT8_C(245), UINT8_C( 42) }, { UINT8_C( 93), UINT8_C( 44), UINT8_C( 87), UINT8_C(123), UINT8_C( 18), UINT8_C( 75), UINT8_C(123), UINT8_C( 21) }, { UINT8_C( 23), UINT8_C( 11), UINT8_C( 22), UINT8_C( 31), UINT8_C( 4), UINT8_C( 19), UINT8_C( 31), UINT8_C( 5) }, { UINT8_C( 6), UINT8_C( 3), UINT8_C( 5), UINT8_C( 8), UINT8_C( 1), UINT8_C( 5), UINT8_C( 8), UINT8_C( 1) }, { UINT8_C( 3), UINT8_C( 1), UINT8_C( 3), UINT8_C( 4), UINT8_C( 1), UINT8_C( 2), UINT8_C( 4), UINT8_C( 1) }, { UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0) } }, { { UINT8_C( 43), UINT8_C( 39), UINT8_C( 42), UINT8_C(113), UINT8_C( 48), UINT8_C(202), UINT8_C( 35), UINT8_C( 65) }, { UINT8_C( 22), UINT8_C( 20), UINT8_C( 21), UINT8_C( 57), UINT8_C( 24), UINT8_C(101), UINT8_C( 18), UINT8_C( 33) }, { UINT8_C( 5), UINT8_C( 5), UINT8_C( 5), UINT8_C( 14), UINT8_C( 6), UINT8_C( 25), UINT8_C( 4), UINT8_C( 8) }, { UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 4), UINT8_C( 2), UINT8_C( 6), UINT8_C( 1), UINT8_C( 2) }, { UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 2), UINT8_C( 1), UINT8_C( 3), UINT8_C( 1), UINT8_C( 1) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 67), UINT8_C(157), UINT8_C(163), UINT8_C(203), UINT8_C( 79), UINT8_C( 53), UINT8_C(214), UINT8_C( 11) }, { UINT8_C( 34), UINT8_C( 79), UINT8_C( 82), UINT8_C(102), UINT8_C( 40), UINT8_C( 27), UINT8_C(107), UINT8_C( 6) }, { UINT8_C( 8), UINT8_C( 20), UINT8_C( 20), UINT8_C( 25), UINT8_C( 10), UINT8_C( 7), UINT8_C( 27), UINT8_C( 1) }, { UINT8_C( 2), UINT8_C( 5), UINT8_C( 5), UINT8_C( 6), UINT8_C( 2), UINT8_C( 2), UINT8_C( 7), UINT8_C( 0) }, { UINT8_C( 1), UINT8_C( 2), UINT8_C( 3), UINT8_C( 3), UINT8_C( 1), UINT8_C( 1), UINT8_C( 3), UINT8_C( 0) }, { UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0) } }, { { UINT8_C(153), UINT8_C(191), UINT8_C(118), UINT8_C(211), UINT8_C( 24), UINT8_C(124), UINT8_C( 64), UINT8_C(211) }, { UINT8_C( 77), UINT8_C( 96), UINT8_C( 59), UINT8_C(106), UINT8_C( 12), UINT8_C( 62), UINT8_C( 32), UINT8_C(106) }, { UINT8_C( 19), UINT8_C( 24), UINT8_C( 15), UINT8_C( 26), UINT8_C( 3), UINT8_C( 16), UINT8_C( 8), UINT8_C( 26) }, { UINT8_C( 5), UINT8_C( 6), UINT8_C( 4), UINT8_C( 7), UINT8_C( 1), UINT8_C( 4), UINT8_C( 2), UINT8_C( 7) }, { UINT8_C( 2), UINT8_C( 3), UINT8_C( 2), UINT8_C( 3), UINT8_C( 0), UINT8_C( 2), UINT8_C( 1), UINT8_C( 3) }, { UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1) } }, { { UINT8_C(211), UINT8_C(237), UINT8_C(201), UINT8_C(247), UINT8_C(132), UINT8_C(190), UINT8_C( 33), UINT8_C(175) }, { UINT8_C(106), UINT8_C(119), UINT8_C(101), UINT8_C(124), UINT8_C( 66), UINT8_C( 95), UINT8_C( 17), UINT8_C( 88) }, { UINT8_C( 26), UINT8_C( 30), UINT8_C( 25), UINT8_C( 31), UINT8_C( 17), UINT8_C( 24), UINT8_C( 4), UINT8_C( 22) }, { UINT8_C( 7), UINT8_C( 7), UINT8_C( 6), UINT8_C( 8), UINT8_C( 4), UINT8_C( 6), UINT8_C( 1), UINT8_C( 5) }, { UINT8_C( 3), UINT8_C( 4), UINT8_C( 3), UINT8_C( 4), UINT8_C( 2), UINT8_C( 3), UINT8_C( 1), UINT8_C( 3) }, { UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1) } }, { { UINT8_C(229), UINT8_C( 75), UINT8_C( 32), UINT8_C( 21), UINT8_C( 21), UINT8_C( 67), UINT8_C( 87), UINT8_C( 88) }, { UINT8_C(115), UINT8_C( 38), UINT8_C( 16), UINT8_C( 11), UINT8_C( 11), UINT8_C( 34), UINT8_C( 44), UINT8_C( 44) }, { UINT8_C( 29), UINT8_C( 9), UINT8_C( 4), UINT8_C( 3), UINT8_C( 3), UINT8_C( 8), UINT8_C( 11), UINT8_C( 11) }, { UINT8_C( 7), UINT8_C( 2), UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 2), UINT8_C( 3), UINT8_C( 3) }, { UINT8_C( 4), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 1) }, { UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(224), UINT8_C(250), UINT8_C( 36), UINT8_C( 47), UINT8_C( 47), UINT8_C(250), UINT8_C( 58), UINT8_C(201) }, { UINT8_C(112), UINT8_C(125), UINT8_C( 18), UINT8_C( 24), UINT8_C( 24), UINT8_C(125), UINT8_C( 29), UINT8_C(101) }, { UINT8_C( 28), UINT8_C( 31), UINT8_C( 5), UINT8_C( 6), UINT8_C( 6), UINT8_C( 31), UINT8_C( 7), UINT8_C( 25) }, { UINT8_C( 7), UINT8_C( 8), UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 8), UINT8_C( 2), UINT8_C( 6) }, { UINT8_C( 4), UINT8_C( 4), UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 4), UINT8_C( 1), UINT8_C( 3) }, { UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t r1 = simde_vrshr_n_u8(a, 1); simde_uint8x8_t r3 = simde_vrshr_n_u8(a, 3); simde_uint8x8_t r5 = simde_vrshr_n_u8(a, 5); simde_uint8x8_t r6 = simde_vrshr_n_u8(a, 6); simde_uint8x8_t r8 = simde_vrshr_n_u8(a, 8); simde_test_arm_neon_assert_equal_u8x8(r1, simde_vld1_u8(test_vec[i].r1)); simde_test_arm_neon_assert_equal_u8x8(r3, simde_vld1_u8(test_vec[i].r3)); simde_test_arm_neon_assert_equal_u8x8(r5, simde_vld1_u8(test_vec[i].r5)); simde_test_arm_neon_assert_equal_u8x8(r6, simde_vld1_u8(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u8x8(r8, simde_vld1_u8(test_vec[i].r8)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t r1 = simde_vrshr_n_u8(a, 1); simde_uint8x8_t r3 = simde_vrshr_n_u8(a, 3); simde_uint8x8_t r5 = simde_vrshr_n_u8(a, 5); simde_uint8x8_t r6 = simde_vrshr_n_u8(a, 6); simde_uint8x8_t r8 = simde_vrshr_n_u8(a, 8); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, r1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r5, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r8, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrshr_n_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t r3[4]; uint16_t r6[4]; uint16_t r10[4]; uint16_t r13[4]; uint16_t r16[4]; } test_vec[] = { { { UINT16_C(26758), UINT16_C( 5620), UINT16_C(28305), UINT16_C( 9341) }, { UINT16_C( 3345), UINT16_C( 703), UINT16_C( 3538), UINT16_C( 1168) }, { UINT16_C( 418), UINT16_C( 88), UINT16_C( 442), UINT16_C( 146) }, { UINT16_C( 26), UINT16_C( 5), UINT16_C( 28), UINT16_C( 9) }, { UINT16_C( 3), UINT16_C( 1), UINT16_C( 3), UINT16_C( 1) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(42950), UINT16_C( 2237), UINT16_C( 9818), UINT16_C(57208) }, { UINT16_C( 5369), UINT16_C( 280), UINT16_C( 1227), UINT16_C( 7151) }, { UINT16_C( 671), UINT16_C( 35), UINT16_C( 153), UINT16_C( 894) }, { UINT16_C( 42), UINT16_C( 2), UINT16_C( 10), UINT16_C( 56) }, { UINT16_C( 5), UINT16_C( 0), UINT16_C( 1), UINT16_C( 7) }, { UINT16_C( 1), UINT16_C( 0), UINT16_C( 0), UINT16_C( 1) } }, { { UINT16_C(58345), UINT16_C(60578), UINT16_C(30620), UINT16_C(38728) }, { UINT16_C( 7293), UINT16_C( 7572), UINT16_C( 3828), UINT16_C( 4841) }, { UINT16_C( 912), UINT16_C( 947), UINT16_C( 478), UINT16_C( 605) }, { UINT16_C( 57), UINT16_C( 59), UINT16_C( 30), UINT16_C( 38) }, { UINT16_C( 7), UINT16_C( 7), UINT16_C( 4), UINT16_C( 5) }, { UINT16_C( 1), UINT16_C( 1), UINT16_C( 0), UINT16_C( 1) } }, { { UINT16_C( 7914), UINT16_C(30904), UINT16_C(54371), UINT16_C(59789) }, { UINT16_C( 989), UINT16_C( 3863), UINT16_C( 6796), UINT16_C( 7474) }, { UINT16_C( 124), UINT16_C( 483), UINT16_C( 850), UINT16_C( 934) }, { UINT16_C( 8), UINT16_C( 30), UINT16_C( 53), UINT16_C( 58) }, { UINT16_C( 1), UINT16_C( 4), UINT16_C( 7), UINT16_C( 7) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 1), UINT16_C( 1) } }, { { UINT16_C(33341), UINT16_C(52991), UINT16_C(31984), UINT16_C(46834) }, { UINT16_C( 4168), UINT16_C( 6624), UINT16_C( 3998), UINT16_C( 5854) }, { UINT16_C( 521), UINT16_C( 828), UINT16_C( 500), UINT16_C( 732) }, { UINT16_C( 33), UINT16_C( 52), UINT16_C( 31), UINT16_C( 46) }, { UINT16_C( 4), UINT16_C( 6), UINT16_C( 4), UINT16_C( 6) }, { UINT16_C( 1), UINT16_C( 1), UINT16_C( 0), UINT16_C( 1) } }, { { UINT16_C(44835), UINT16_C(32190), UINT16_C(14038), UINT16_C(48988) }, { UINT16_C( 5604), UINT16_C( 4024), UINT16_C( 1755), UINT16_C( 6124) }, { UINT16_C( 701), UINT16_C( 503), UINT16_C( 219), UINT16_C( 765) }, { UINT16_C( 44), UINT16_C( 31), UINT16_C( 14), UINT16_C( 48) }, { UINT16_C( 5), UINT16_C( 4), UINT16_C( 2), UINT16_C( 6) }, { UINT16_C( 1), UINT16_C( 0), UINT16_C( 0), UINT16_C( 1) } }, { { UINT16_C(65305), UINT16_C(46508), UINT16_C(62582), UINT16_C(24652) }, { UINT16_C( 8163), UINT16_C( 5814), UINT16_C( 7823), UINT16_C( 3082) }, { UINT16_C( 1020), UINT16_C( 727), UINT16_C( 978), UINT16_C( 385) }, { UINT16_C( 64), UINT16_C( 45), UINT16_C( 61), UINT16_C( 24) }, { UINT16_C( 8), UINT16_C( 6), UINT16_C( 8), UINT16_C( 3) }, { UINT16_C( 1), UINT16_C( 1), UINT16_C( 1), UINT16_C( 0) } }, { { UINT16_C( 1042), UINT16_C(30424), UINT16_C(26329), UINT16_C( 5727) }, { UINT16_C( 130), UINT16_C( 3803), UINT16_C( 3291), UINT16_C( 716) }, { UINT16_C( 16), UINT16_C( 475), UINT16_C( 411), UINT16_C( 89) }, { UINT16_C( 1), UINT16_C( 30), UINT16_C( 26), UINT16_C( 6) }, { UINT16_C( 0), UINT16_C( 4), UINT16_C( 3), UINT16_C( 1) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t r3 = simde_vrshr_n_u16(a, 3); simde_uint16x4_t r6 = simde_vrshr_n_u16(a, 6); simde_uint16x4_t r10 = simde_vrshr_n_u16(a, 10); simde_uint16x4_t r13 = simde_vrshr_n_u16(a, 13); simde_uint16x4_t r16 = simde_vrshr_n_u16(a, 16); simde_test_arm_neon_assert_equal_u16x4(r3, simde_vld1_u16(test_vec[i].r3)); simde_test_arm_neon_assert_equal_u16x4(r6, simde_vld1_u16(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u16x4(r10, simde_vld1_u16(test_vec[i].r10)); simde_test_arm_neon_assert_equal_u16x4(r13, simde_vld1_u16(test_vec[i].r13)); simde_test_arm_neon_assert_equal_u16x4(r16, simde_vld1_u16(test_vec[i].r16)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t r3 = simde_vrshr_n_u16(a, 3); simde_uint16x4_t r6 = simde_vrshr_n_u16(a, 6); simde_uint16x4_t r10 = simde_vrshr_n_u16(a, 10); simde_uint16x4_t r13 = simde_vrshr_n_u16(a, 13); simde_uint16x4_t r16 = simde_vrshr_n_u16(a, 16); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r16, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrshr_n_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint32_t r6[2]; uint32_t r13[2]; uint32_t r19[2]; uint32_t r26[2]; uint32_t r32[2]; } test_vec[] = { { { UINT32_C(1138526429), UINT32_C(1714386701) }, { UINT32_C( 17789475), UINT32_C( 26787292) }, { UINT32_C( 138980), UINT32_C( 209276) }, { UINT32_C( 2172), UINT32_C( 3270) }, { UINT32_C( 17), UINT32_C( 26) }, { UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(4121732862), UINT32_C(4029665682) }, { UINT32_C( 64402076), UINT32_C( 62963526) }, { UINT32_C( 503141), UINT32_C( 491903) }, { UINT32_C( 7862), UINT32_C( 7686) }, { UINT32_C( 61), UINT32_C( 60) }, { UINT32_C( 1), UINT32_C( 1) } }, { { UINT32_C(1123760803), UINT32_C(2793257249) }, { UINT32_C( 17558763), UINT32_C( 43644645) }, { UINT32_C( 137178), UINT32_C( 340974) }, { UINT32_C( 2143), UINT32_C( 5328) }, { UINT32_C( 17), UINT32_C( 42) }, { UINT32_C( 0), UINT32_C( 1) } }, { { UINT32_C(1068092775), UINT32_C(3130298076) }, { UINT32_C( 16688950), UINT32_C( 48910907) }, { UINT32_C( 130382), UINT32_C( 382116) }, { UINT32_C( 2037), UINT32_C( 5971) }, { UINT32_C( 16), UINT32_C( 47) }, { UINT32_C( 0), UINT32_C( 1) } }, { { UINT32_C( 620589078), UINT32_C(3851038183) }, { UINT32_C( 9696704), UINT32_C( 60172472) }, { UINT32_C( 75756), UINT32_C( 470097) }, { UINT32_C( 1184), UINT32_C( 7345) }, { UINT32_C( 9), UINT32_C( 57) }, { UINT32_C( 0), UINT32_C( 1) } }, { { UINT32_C(1708799955), UINT32_C(2874476808) }, { UINT32_C( 26699999), UINT32_C( 44913700) }, { UINT32_C( 208594), UINT32_C( 350888) }, { UINT32_C( 3259), UINT32_C( 5483) }, { UINT32_C( 25), UINT32_C( 43) }, { UINT32_C( 0), UINT32_C( 1) } }, { { UINT32_C(1693274179), UINT32_C(1879796489) }, { UINT32_C( 26457409), UINT32_C( 29371820) }, { UINT32_C( 206699), UINT32_C( 229467) }, { UINT32_C( 3230), UINT32_C( 3585) }, { UINT32_C( 25), UINT32_C( 28) }, { UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C( 363836472), UINT32_C(1489978178) }, { UINT32_C( 5684945), UINT32_C( 23280909) }, { UINT32_C( 44414), UINT32_C( 181882) }, { UINT32_C( 694), UINT32_C( 2842) }, { UINT32_C( 5), UINT32_C( 22) }, { UINT32_C( 0), UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t r6 = simde_vrshr_n_u32(a, 6); simde_uint32x2_t r13 = simde_vrshr_n_u32(a, 13); simde_uint32x2_t r19 = simde_vrshr_n_u32(a, 19); simde_uint32x2_t r26 = simde_vrshr_n_u32(a, 26); simde_uint32x2_t r32 = simde_vrshr_n_u32(a, 32); simde_test_arm_neon_assert_equal_u32x2(r6, simde_vld1_u32(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u32x2(r13, simde_vld1_u32(test_vec[i].r13)); simde_test_arm_neon_assert_equal_u32x2(r19, simde_vld1_u32(test_vec[i].r19)); simde_test_arm_neon_assert_equal_u32x2(r26, simde_vld1_u32(test_vec[i].r26)); simde_test_arm_neon_assert_equal_u32x2(r32, simde_vld1_u32(test_vec[i].r32)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t r6 = simde_vrshr_n_u32(a, 6); simde_uint32x2_t r13 = simde_vrshr_n_u32(a, 13); simde_uint32x2_t r19 = simde_vrshr_n_u32(a, 19); simde_uint32x2_t r26 = simde_vrshr_n_u32(a, 26); simde_uint32x2_t r32 = simde_vrshr_n_u32(a, 32); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r19, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r26, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r32, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrshr_n_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[1]; uint64_t r13[1]; uint64_t r26[1]; uint64_t r39[1]; uint64_t r52[1]; uint64_t r64[1]; } test_vec[] = { { { UINT64_C(16088398316289318313) }, { UINT64_C( 1963915810093911) }, { UINT64_C( 239735816662) }, { UINT64_C( 29264626) }, { UINT64_C( 3572) }, { UINT64_C( 1) } }, { { UINT64_C(15970363434992149467) }, { UINT64_C( 1949507255248065) }, { UINT64_C( 237976959869) }, { UINT64_C( 29049922) }, { UINT64_C( 3546) }, { UINT64_C( 1) } }, { { UINT64_C(17355878290108775677) }, { UINT64_C( 2118637486585544) }, { UINT64_C( 258622740062) }, { UINT64_C( 31570159) }, { UINT64_C( 3854) }, { UINT64_C( 1) } }, { { UINT64_C( 5593719929260279867) }, { UINT64_C( 682827139802280) }, { UINT64_C( 83352922339) }, { UINT64_C( 10174917) }, { UINT64_C( 1242) }, { UINT64_C( 0) } }, { { UINT64_C( 6892171561238006463) }, { UINT64_C( 841329536283936) }, { UINT64_C( 102701359410) }, { UINT64_C( 12536787) }, { UINT64_C( 1530) }, { UINT64_C( 0) } }, { { UINT64_C( 6618027965719723532) }, { UINT64_C( 807864741909146) }, { UINT64_C( 98616301503) }, { UINT64_C( 12038123) }, { UINT64_C( 1469) }, { UINT64_C( 0) } }, { { UINT64_C( 7006102550303846385) }, { UINT64_C( 855237127722637) }, { UINT64_C( 104399063443) }, { UINT64_C( 12744026) }, { UINT64_C( 1556) }, { UINT64_C( 0) } }, { { UINT64_C( 8117332270751076278) }, { UINT64_C( 990885286956918) }, { UINT64_C( 120957676630) }, { UINT64_C( 14765341) }, { UINT64_C( 1802) }, { UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_t r13 = simde_vrshr_n_u64(a, 13); simde_uint64x1_t r26 = simde_vrshr_n_u64(a, 26); simde_uint64x1_t r39 = simde_vrshr_n_u64(a, 39); simde_uint64x1_t r52 = simde_vrshr_n_u64(a, 52); simde_uint64x1_t r64 = simde_vrshr_n_u64(a, 64); simde_test_arm_neon_assert_equal_u64x1(r13, simde_vld1_u64(test_vec[i].r13)); simde_test_arm_neon_assert_equal_u64x1(r26, simde_vld1_u64(test_vec[i].r26)); simde_test_arm_neon_assert_equal_u64x1(r39, simde_vld1_u64(test_vec[i].r39)); simde_test_arm_neon_assert_equal_u64x1(r52, simde_vld1_u64(test_vec[i].r52)); simde_test_arm_neon_assert_equal_u64x1(r64, simde_vld1_u64(test_vec[i].r64)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x1_t a = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t r13 = simde_vrshr_n_u64(a, 13); simde_uint64x1_t r26 = simde_vrshr_n_u64(a, 26); simde_uint64x1_t r39 = simde_vrshr_n_u64(a, 39); simde_uint64x1_t r52 = simde_vrshr_n_u64(a, 52); simde_uint64x1_t r64 = simde_vrshr_n_u64(a, 64); simde_test_arm_neon_write_u64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r26, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r39, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r52, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r64, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrshrq_n_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t r1[16]; int8_t r3[16]; int8_t r5[16]; int8_t r6[16]; int8_t r8[16]; } test_vec[] = { { { INT8_C( 53), INT8_C( 82), INT8_C( 70), -INT8_C( 76), INT8_C( 93), -INT8_C( 85), -INT8_C( 127), -INT8_C( 105), -INT8_C( 120), INT8_C( 101), -INT8_C( 41), -INT8_C( 69), -INT8_C( 1), -INT8_C( 123), INT8_C( 97), -INT8_C( 90) }, { INT8_C( 27), INT8_C( 41), INT8_C( 35), -INT8_C( 38), INT8_C( 47), -INT8_C( 42), -INT8_C( 63), -INT8_C( 52), -INT8_C( 60), INT8_C( 51), -INT8_C( 20), -INT8_C( 34), INT8_C( 0), -INT8_C( 61), INT8_C( 49), -INT8_C( 45) }, { INT8_C( 7), INT8_C( 10), INT8_C( 9), -INT8_C( 9), INT8_C( 12), -INT8_C( 11), -INT8_C( 16), -INT8_C( 13), -INT8_C( 15), INT8_C( 13), -INT8_C( 5), -INT8_C( 9), INT8_C( 0), -INT8_C( 15), INT8_C( 12), -INT8_C( 11) }, { INT8_C( 2), INT8_C( 3), INT8_C( 2), -INT8_C( 2), INT8_C( 3), -INT8_C( 3), -INT8_C( 4), -INT8_C( 3), -INT8_C( 4), INT8_C( 3), -INT8_C( 1), -INT8_C( 2), INT8_C( 0), -INT8_C( 4), INT8_C( 3), -INT8_C( 3) }, { INT8_C( 1), INT8_C( 1), INT8_C( 1), -INT8_C( 1), INT8_C( 1), -INT8_C( 1), -INT8_C( 2), -INT8_C( 2), -INT8_C( 2), INT8_C( 2), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 2), INT8_C( 2), -INT8_C( 1) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 78), INT8_C( 69), -INT8_C( 68), INT8_C( 16), INT8_C( 85), INT8_C( 100), -INT8_C( 51), INT8_C( 44), -INT8_C( 127), INT8_C( 104), INT8_C( 35), INT8_C( 126), INT8_C( 62), -INT8_C( 114), -INT8_C( 74), INT8_C( 116) }, { INT8_C( 39), INT8_C( 35), -INT8_C( 34), INT8_C( 8), INT8_C( 43), INT8_C( 50), -INT8_C( 25), INT8_C( 22), -INT8_C( 63), INT8_C( 52), INT8_C( 18), INT8_C( 63), INT8_C( 31), -INT8_C( 57), -INT8_C( 37), INT8_C( 58) }, { INT8_C( 10), INT8_C( 9), -INT8_C( 8), INT8_C( 2), INT8_C( 11), INT8_C( 13), -INT8_C( 6), INT8_C( 6), -INT8_C( 16), INT8_C( 13), INT8_C( 4), INT8_C( 16), INT8_C( 8), -INT8_C( 14), -INT8_C( 9), INT8_C( 15) }, { INT8_C( 2), INT8_C( 2), -INT8_C( 2), INT8_C( 1), INT8_C( 3), INT8_C( 3), -INT8_C( 2), INT8_C( 1), -INT8_C( 4), INT8_C( 3), INT8_C( 1), INT8_C( 4), INT8_C( 2), -INT8_C( 4), -INT8_C( 2), INT8_C( 4) }, { INT8_C( 1), INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 1), INT8_C( 2), -INT8_C( 1), INT8_C( 1), -INT8_C( 2), INT8_C( 2), INT8_C( 1), INT8_C( 2), INT8_C( 1), -INT8_C( 2), -INT8_C( 1), INT8_C( 2) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { -INT8_C( 32), -INT8_C( 4), INT8_C( 40), INT8_C( 62), -INT8_C( 89), -INT8_C( 87), -INT8_C( 43), INT8_C( 48), INT8_C( 14), -INT8_C( 84), -INT8_C( 21), INT8_C( 13), INT8_C( 49), INT8_C( 76), -INT8_C( 77), INT8_MAX }, { -INT8_C( 16), -INT8_C( 2), INT8_C( 20), INT8_C( 31), -INT8_C( 44), -INT8_C( 43), -INT8_C( 21), INT8_C( 24), INT8_C( 7), -INT8_C( 42), -INT8_C( 10), INT8_C( 7), INT8_C( 25), INT8_C( 38), -INT8_C( 38), INT8_C( 64) }, { -INT8_C( 4), INT8_C( 0), INT8_C( 5), INT8_C( 8), -INT8_C( 11), -INT8_C( 11), -INT8_C( 5), INT8_C( 6), INT8_C( 2), -INT8_C( 10), -INT8_C( 3), INT8_C( 2), INT8_C( 6), INT8_C( 10), -INT8_C( 10), INT8_C( 16) }, { -INT8_C( 1), INT8_C( 0), INT8_C( 1), INT8_C( 2), -INT8_C( 3), -INT8_C( 3), -INT8_C( 1), INT8_C( 2), INT8_C( 0), -INT8_C( 3), -INT8_C( 1), INT8_C( 0), INT8_C( 2), INT8_C( 2), -INT8_C( 2), INT8_C( 4) }, { INT8_C( 0), INT8_C( 0), INT8_C( 1), INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 1), INT8_C( 1), -INT8_C( 1), INT8_C( 2) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { -INT8_C( 111), INT8_C( 112), -INT8_C( 112), -INT8_C( 26), -INT8_C( 44), INT8_C( 93), INT8_C( 18), INT8_C( 85), -INT8_C( 59), INT8_C( 53), -INT8_C( 44), INT8_C( 4), -INT8_C( 61), -INT8_C( 118), INT8_C( 120), -INT8_C( 93) }, { -INT8_C( 55), INT8_C( 56), -INT8_C( 56), -INT8_C( 13), -INT8_C( 22), INT8_C( 47), INT8_C( 9), INT8_C( 43), -INT8_C( 29), INT8_C( 27), -INT8_C( 22), INT8_C( 2), -INT8_C( 30), -INT8_C( 59), INT8_C( 60), -INT8_C( 46) }, { -INT8_C( 14), INT8_C( 14), -INT8_C( 14), -INT8_C( 3), -INT8_C( 5), INT8_C( 12), INT8_C( 2), INT8_C( 11), -INT8_C( 7), INT8_C( 7), -INT8_C( 5), INT8_C( 1), -INT8_C( 8), -INT8_C( 15), INT8_C( 15), -INT8_C( 12) }, { -INT8_C( 3), INT8_C( 4), -INT8_C( 3), -INT8_C( 1), -INT8_C( 1), INT8_C( 3), INT8_C( 1), INT8_C( 3), -INT8_C( 2), INT8_C( 2), -INT8_C( 1), INT8_C( 0), -INT8_C( 2), -INT8_C( 4), INT8_C( 4), -INT8_C( 3) }, { -INT8_C( 2), INT8_C( 2), -INT8_C( 2), INT8_C( 0), -INT8_C( 1), INT8_C( 1), INT8_C( 0), INT8_C( 1), -INT8_C( 1), INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 2), INT8_C( 2), -INT8_C( 1) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { -INT8_C( 121), -INT8_C( 96), -INT8_C( 31), INT8_C( 46), INT8_C( 74), -INT8_C( 74), INT8_C( 94), INT8_C( 88), INT8_C( 98), INT8_C( 73), INT8_C( 102), -INT8_C( 109), -INT8_C( 107), INT8_C( 25), INT8_C( 19), INT8_C( 38) }, { -INT8_C( 60), -INT8_C( 48), -INT8_C( 15), INT8_C( 23), INT8_C( 37), -INT8_C( 37), INT8_C( 47), INT8_C( 44), INT8_C( 49), INT8_C( 37), INT8_C( 51), -INT8_C( 54), -INT8_C( 53), INT8_C( 13), INT8_C( 10), INT8_C( 19) }, { -INT8_C( 15), -INT8_C( 12), -INT8_C( 4), INT8_C( 6), INT8_C( 9), -INT8_C( 9), INT8_C( 12), INT8_C( 11), INT8_C( 12), INT8_C( 9), INT8_C( 13), -INT8_C( 14), -INT8_C( 13), INT8_C( 3), INT8_C( 2), INT8_C( 5) }, { -INT8_C( 4), -INT8_C( 3), -INT8_C( 1), INT8_C( 1), INT8_C( 2), -INT8_C( 2), INT8_C( 3), INT8_C( 3), INT8_C( 3), INT8_C( 2), INT8_C( 3), -INT8_C( 3), -INT8_C( 3), INT8_C( 1), INT8_C( 1), INT8_C( 1) }, { -INT8_C( 2), -INT8_C( 1), INT8_C( 0), INT8_C( 1), INT8_C( 1), -INT8_C( 1), INT8_C( 1), INT8_C( 1), INT8_C( 2), INT8_C( 1), INT8_C( 2), -INT8_C( 2), -INT8_C( 2), INT8_C( 0), INT8_C( 0), INT8_C( 1) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { -INT8_C( 119), -INT8_C( 93), INT8_C( 12), INT8_C( 93), INT8_C( 0), INT8_C( 30), -INT8_C( 77), -INT8_C( 59), INT8_C( 83), -INT8_C( 121), -INT8_C( 55), INT8_C( 22), INT8_C( 17), INT8_C( 65), -INT8_C( 70), -INT8_C( 104) }, { -INT8_C( 59), -INT8_C( 46), INT8_C( 6), INT8_C( 47), INT8_C( 0), INT8_C( 15), -INT8_C( 38), -INT8_C( 29), INT8_C( 42), -INT8_C( 60), -INT8_C( 27), INT8_C( 11), INT8_C( 9), INT8_C( 33), -INT8_C( 35), -INT8_C( 52) }, { -INT8_C( 15), -INT8_C( 12), INT8_C( 2), INT8_C( 12), INT8_C( 0), INT8_C( 4), -INT8_C( 10), -INT8_C( 7), INT8_C( 10), -INT8_C( 15), -INT8_C( 7), INT8_C( 3), INT8_C( 2), INT8_C( 8), -INT8_C( 9), -INT8_C( 13) }, { -INT8_C( 4), -INT8_C( 3), INT8_C( 0), INT8_C( 3), INT8_C( 0), INT8_C( 1), -INT8_C( 2), -INT8_C( 2), INT8_C( 3), -INT8_C( 4), -INT8_C( 2), INT8_C( 1), INT8_C( 1), INT8_C( 2), -INT8_C( 2), -INT8_C( 3) }, { -INT8_C( 2), -INT8_C( 1), INT8_C( 0), INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 1), -INT8_C( 2), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 1), -INT8_C( 1), -INT8_C( 2) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { -INT8_C( 30), -INT8_C( 101), -INT8_C( 57), INT8_C( 44), INT8_C( 82), INT8_C( 37), -INT8_C( 124), -INT8_C( 76), INT8_C( 111), -INT8_C( 22), INT8_C( 72), INT8_C( 4), INT8_C( 4), INT8_C( 91), INT8_C( 43), -INT8_C( 115) }, { -INT8_C( 15), -INT8_C( 50), -INT8_C( 28), INT8_C( 22), INT8_C( 41), INT8_C( 19), -INT8_C( 62), -INT8_C( 38), INT8_C( 56), -INT8_C( 11), INT8_C( 36), INT8_C( 2), INT8_C( 2), INT8_C( 46), INT8_C( 22), -INT8_C( 57) }, { -INT8_C( 4), -INT8_C( 13), -INT8_C( 7), INT8_C( 6), INT8_C( 10), INT8_C( 5), -INT8_C( 15), -INT8_C( 9), INT8_C( 14), -INT8_C( 3), INT8_C( 9), INT8_C( 1), INT8_C( 1), INT8_C( 11), INT8_C( 5), -INT8_C( 14) }, { -INT8_C( 1), -INT8_C( 3), -INT8_C( 2), INT8_C( 1), INT8_C( 3), INT8_C( 1), -INT8_C( 4), -INT8_C( 2), INT8_C( 3), -INT8_C( 1), INT8_C( 2), INT8_C( 0), INT8_C( 0), INT8_C( 3), INT8_C( 1), -INT8_C( 4) }, { INT8_C( 0), -INT8_C( 2), -INT8_C( 1), INT8_C( 1), INT8_C( 1), INT8_C( 1), -INT8_C( 2), -INT8_C( 1), INT8_C( 2), INT8_C( 0), INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 1), INT8_C( 1), -INT8_C( 2) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { -INT8_C( 2), INT8_C( 55), -INT8_C( 21), -INT8_C( 2), INT8_C( 86), -INT8_C( 98), -INT8_C( 61), -INT8_C( 87), INT8_C( 37), -INT8_C( 115), -INT8_C( 64), INT8_C( 54), -INT8_C( 50), INT8_C( 122), -INT8_C( 49), -INT8_C( 80) }, { -INT8_C( 1), INT8_C( 28), -INT8_C( 10), -INT8_C( 1), INT8_C( 43), -INT8_C( 49), -INT8_C( 30), -INT8_C( 43), INT8_C( 19), -INT8_C( 57), -INT8_C( 32), INT8_C( 27), -INT8_C( 25), INT8_C( 61), -INT8_C( 24), -INT8_C( 40) }, { INT8_C( 0), INT8_C( 7), -INT8_C( 3), INT8_C( 0), INT8_C( 11), -INT8_C( 12), -INT8_C( 8), -INT8_C( 11), INT8_C( 5), -INT8_C( 14), -INT8_C( 8), INT8_C( 7), -INT8_C( 6), INT8_C( 15), -INT8_C( 6), -INT8_C( 10) }, { INT8_C( 0), INT8_C( 2), -INT8_C( 1), INT8_C( 0), INT8_C( 3), -INT8_C( 3), -INT8_C( 2), -INT8_C( 3), INT8_C( 1), -INT8_C( 4), -INT8_C( 2), INT8_C( 2), -INT8_C( 2), INT8_C( 4), -INT8_C( 2), -INT8_C( 2) }, { INT8_C( 0), INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 1), -INT8_C( 2), -INT8_C( 1), -INT8_C( 1), INT8_C( 1), -INT8_C( 2), -INT8_C( 1), INT8_C( 1), -INT8_C( 1), INT8_C( 2), -INT8_C( 1), -INT8_C( 1) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t r1 = simde_vrshrq_n_s8(a, 1); simde_int8x16_t r3 = simde_vrshrq_n_s8(a, 3); simde_int8x16_t r5 = simde_vrshrq_n_s8(a, 5); simde_int8x16_t r6 = simde_vrshrq_n_s8(a, 6); simde_int8x16_t r8 = simde_vrshrq_n_s8(a, 8); simde_test_arm_neon_assert_equal_i8x16(r1, simde_vld1q_s8(test_vec[i].r1)); simde_test_arm_neon_assert_equal_i8x16(r3, simde_vld1q_s8(test_vec[i].r3)); simde_test_arm_neon_assert_equal_i8x16(r5, simde_vld1q_s8(test_vec[i].r5)); simde_test_arm_neon_assert_equal_i8x16(r6, simde_vld1q_s8(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i8x16(r8, simde_vld1q_s8(test_vec[i].r8)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); simde_int8x16_t r1 = simde_vrshrq_n_s8(a, 1); simde_int8x16_t r3 = simde_vrshrq_n_s8(a, 3); simde_int8x16_t r5 = simde_vrshrq_n_s8(a, 5); simde_int8x16_t r6 = simde_vrshrq_n_s8(a, 6); simde_int8x16_t r8 = simde_vrshrq_n_s8(a, 8); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, r1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r5, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r8, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrshrq_n_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t r3[8]; int16_t r6[8]; int16_t r10[8]; int16_t r13[8]; int16_t r16[8]; } test_vec[] = { { { -INT16_C( 16620), INT16_C( 3863), -INT16_C( 26718), INT16_C( 477), INT16_C( 12323), -INT16_C( 31606), -INT16_C( 22951), -INT16_C( 13711) }, { -INT16_C( 2077), INT16_C( 483), -INT16_C( 3340), INT16_C( 60), INT16_C( 1540), -INT16_C( 3951), -INT16_C( 2869), -INT16_C( 1714) }, { -INT16_C( 260), INT16_C( 60), -INT16_C( 417), INT16_C( 7), INT16_C( 193), -INT16_C( 494), -INT16_C( 359), -INT16_C( 214) }, { -INT16_C( 16), INT16_C( 4), -INT16_C( 26), INT16_C( 0), INT16_C( 12), -INT16_C( 31), -INT16_C( 22), -INT16_C( 13) }, { -INT16_C( 2), INT16_C( 0), -INT16_C( 3), INT16_C( 0), INT16_C( 2), -INT16_C( 4), -INT16_C( 3), -INT16_C( 2) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 31302), INT16_C( 5069), INT16_C( 28315), -INT16_C( 5931), -INT16_C( 30492), INT16_C( 21319), -INT16_C( 13235), INT16_C( 25012) }, { -INT16_C( 3913), INT16_C( 634), INT16_C( 3539), -INT16_C( 741), -INT16_C( 3811), INT16_C( 2665), -INT16_C( 1654), INT16_C( 3127) }, { -INT16_C( 489), INT16_C( 79), INT16_C( 442), -INT16_C( 93), -INT16_C( 476), INT16_C( 333), -INT16_C( 207), INT16_C( 391) }, { -INT16_C( 31), INT16_C( 5), INT16_C( 28), -INT16_C( 6), -INT16_C( 30), INT16_C( 21), -INT16_C( 13), INT16_C( 24) }, { -INT16_C( 4), INT16_C( 1), INT16_C( 3), -INT16_C( 1), -INT16_C( 4), INT16_C( 3), -INT16_C( 2), INT16_C( 3) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 13428), INT16_C( 11888), INT16_C( 20066), -INT16_C( 31441), -INT16_C( 17794), -INT16_C( 10487), INT16_C( 31328), INT16_C( 6817) }, { -INT16_C( 1678), INT16_C( 1486), INT16_C( 2508), -INT16_C( 3930), -INT16_C( 2224), -INT16_C( 1311), INT16_C( 3916), INT16_C( 852) }, { -INT16_C( 210), INT16_C( 186), INT16_C( 314), -INT16_C( 491), -INT16_C( 278), -INT16_C( 164), INT16_C( 490), INT16_C( 107) }, { -INT16_C( 13), INT16_C( 12), INT16_C( 20), -INT16_C( 31), -INT16_C( 17), -INT16_C( 10), INT16_C( 31), INT16_C( 7) }, { -INT16_C( 2), INT16_C( 1), INT16_C( 2), -INT16_C( 4), -INT16_C( 2), -INT16_C( 1), INT16_C( 4), INT16_C( 1) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 28160), -INT16_C( 25811), INT16_C( 732), -INT16_C( 15997), -INT16_C( 13430), -INT16_C( 10220), -INT16_C( 14185), INT16_C( 9017) }, { INT16_C( 3520), -INT16_C( 3226), INT16_C( 92), -INT16_C( 2000), -INT16_C( 1679), -INT16_C( 1277), -INT16_C( 1773), INT16_C( 1127) }, { INT16_C( 440), -INT16_C( 403), INT16_C( 11), -INT16_C( 250), -INT16_C( 210), -INT16_C( 160), -INT16_C( 222), INT16_C( 141) }, { INT16_C( 28), -INT16_C( 25), INT16_C( 1), -INT16_C( 16), -INT16_C( 13), -INT16_C( 10), -INT16_C( 14), INT16_C( 9) }, { INT16_C( 3), -INT16_C( 3), INT16_C( 0), -INT16_C( 2), -INT16_C( 2), -INT16_C( 1), -INT16_C( 2), INT16_C( 1) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 21869), -INT16_C( 2734), -INT16_C( 32264), INT16_C( 30330), -INT16_C( 31941), -INT16_C( 25523), -INT16_C( 4354), -INT16_C( 330) }, { -INT16_C( 2734), -INT16_C( 342), -INT16_C( 4033), INT16_C( 3791), -INT16_C( 3993), -INT16_C( 3190), -INT16_C( 544), -INT16_C( 41) }, { -INT16_C( 342), -INT16_C( 43), -INT16_C( 504), INT16_C( 474), -INT16_C( 499), -INT16_C( 399), -INT16_C( 68), -INT16_C( 5) }, { -INT16_C( 21), -INT16_C( 3), -INT16_C( 32), INT16_C( 30), -INT16_C( 31), -INT16_C( 25), -INT16_C( 4), INT16_C( 0) }, { -INT16_C( 3), INT16_C( 0), -INT16_C( 4), INT16_C( 4), -INT16_C( 4), -INT16_C( 3), -INT16_C( 1), INT16_C( 0) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 7076), INT16_C( 14745), INT16_C( 7398), INT16_C( 29178), INT16_C( 3815), INT16_C( 32585), -INT16_C( 32042), INT16_C( 27042) }, { -INT16_C( 884), INT16_C( 1843), INT16_C( 925), INT16_C( 3647), INT16_C( 477), INT16_C( 4073), -INT16_C( 4005), INT16_C( 3380) }, { -INT16_C( 111), INT16_C( 230), INT16_C( 116), INT16_C( 456), INT16_C( 60), INT16_C( 509), -INT16_C( 501), INT16_C( 423) }, { -INT16_C( 7), INT16_C( 14), INT16_C( 7), INT16_C( 28), INT16_C( 4), INT16_C( 32), -INT16_C( 31), INT16_C( 26) }, { -INT16_C( 1), INT16_C( 2), INT16_C( 1), INT16_C( 4), INT16_C( 0), INT16_C( 4), -INT16_C( 4), INT16_C( 3) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 3028), INT16_C( 9310), -INT16_C( 10122), -INT16_C( 20070), -INT16_C( 6309), INT16_C( 22861), INT16_C( 1237), INT16_C( 12887) }, { -INT16_C( 378), INT16_C( 1164), -INT16_C( 1265), -INT16_C( 2509), -INT16_C( 789), INT16_C( 2858), INT16_C( 155), INT16_C( 1611) }, { -INT16_C( 47), INT16_C( 145), -INT16_C( 158), -INT16_C( 314), -INT16_C( 99), INT16_C( 357), INT16_C( 19), INT16_C( 201) }, { -INT16_C( 3), INT16_C( 9), -INT16_C( 10), -INT16_C( 20), -INT16_C( 6), INT16_C( 22), INT16_C( 1), INT16_C( 13) }, { INT16_C( 0), INT16_C( 1), -INT16_C( 1), -INT16_C( 2), -INT16_C( 1), INT16_C( 3), INT16_C( 0), INT16_C( 2) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 3864), -INT16_C( 12693), INT16_C( 25869), -INT16_C( 3009), -INT16_C( 30605), INT16_C( 18803), INT16_C( 5643), INT16_C( 14258) }, { -INT16_C( 483), -INT16_C( 1587), INT16_C( 3234), -INT16_C( 376), -INT16_C( 3826), INT16_C( 2350), INT16_C( 705), INT16_C( 1782) }, { -INT16_C( 60), -INT16_C( 198), INT16_C( 404), -INT16_C( 47), -INT16_C( 478), INT16_C( 294), INT16_C( 88), INT16_C( 223) }, { -INT16_C( 4), -INT16_C( 12), INT16_C( 25), -INT16_C( 3), -INT16_C( 30), INT16_C( 18), INT16_C( 6), INT16_C( 14) }, { INT16_C( 0), -INT16_C( 2), INT16_C( 3), INT16_C( 0), -INT16_C( 4), INT16_C( 2), INT16_C( 1), INT16_C( 2) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t r3 = simde_vrshrq_n_s16(a, 3); simde_int16x8_t r6 = simde_vrshrq_n_s16(a, 6); simde_int16x8_t r10 = simde_vrshrq_n_s16(a, 10); simde_int16x8_t r13 = simde_vrshrq_n_s16(a, 13); simde_int16x8_t r16 = simde_vrshrq_n_s16(a, 16); simde_test_arm_neon_assert_equal_i16x8(r3, simde_vld1q_s16(test_vec[i].r3)); simde_test_arm_neon_assert_equal_i16x8(r6, simde_vld1q_s16(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i16x8(r10, simde_vld1q_s16(test_vec[i].r10)); simde_test_arm_neon_assert_equal_i16x8(r13, simde_vld1q_s16(test_vec[i].r13)); simde_test_arm_neon_assert_equal_i16x8(r16, simde_vld1q_s16(test_vec[i].r16)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int16x8_t r3 = simde_vrshrq_n_s16(a, 3); simde_int16x8_t r6 = simde_vrshrq_n_s16(a, 6); simde_int16x8_t r10 = simde_vrshrq_n_s16(a, 10); simde_int16x8_t r13 = simde_vrshrq_n_s16(a, 13); simde_int16x8_t r16 = simde_vrshrq_n_s16(a, 16); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r16, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrshrq_n_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t r6[4]; int32_t r13[4]; int32_t r19[4]; int32_t r26[4]; int32_t r32[4]; } test_vec[] = { { { INT32_C( 1489106393), INT32_C( 1731098689), INT32_C( 131701187), INT32_C( 116014444) }, { INT32_C( 23267287), INT32_C( 27048417), INT32_C( 2057831), INT32_C( 1812726) }, { INT32_C( 181776), INT32_C( 211316), INT32_C( 16077), INT32_C( 14162) }, { INT32_C( 2840), INT32_C( 3302), INT32_C( 251), INT32_C( 221) }, { INT32_C( 22), INT32_C( 26), INT32_C( 2), INT32_C( 2) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 1295077241), -INT32_C( 2045704932), -INT32_C( 1346710917), -INT32_C( 1766006852) }, { INT32_C( 20235582), -INT32_C( 31964140), -INT32_C( 21042358), -INT32_C( 27593857) }, { INT32_C( 158090), -INT32_C( 249720), -INT32_C( 164393), -INT32_C( 215577) }, { INT32_C( 2470), -INT32_C( 3902), -INT32_C( 2569), -INT32_C( 3368) }, { INT32_C( 19), -INT32_C( 30), -INT32_C( 20), -INT32_C( 26) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 384728788), -INT32_C( 1182982922), INT32_C( 566318773), INT32_C( 203926419) }, { INT32_C( 6011387), -INT32_C( 18484108), INT32_C( 8848731), INT32_C( 3186350) }, { INT32_C( 46964), -INT32_C( 144407), INT32_C( 69131), INT32_C( 24893) }, { INT32_C( 734), -INT32_C( 2256), INT32_C( 1080), INT32_C( 389) }, { INT32_C( 6), -INT32_C( 18), INT32_C( 8), INT32_C( 3) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 375019770), -INT32_C( 660772003), -INT32_C( 91728067), INT32_C( 194004023) }, { INT32_C( 5859684), -INT32_C( 10324563), -INT32_C( 1433251), INT32_C( 3031313) }, { INT32_C( 45779), -INT32_C( 80661), -INT32_C( 11197), INT32_C( 23682) }, { INT32_C( 715), -INT32_C( 1260), -INT32_C( 175), INT32_C( 370) }, { INT32_C( 6), -INT32_C( 10), -INT32_C( 1), INT32_C( 3) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { { -INT32_C( 1188987198), INT32_C( 1332911770), -INT32_C( 2005912587), -INT32_C( 661284898) }, { -INT32_C( 18577925), INT32_C( 20826746), -INT32_C( 31342384), -INT32_C( 10332577) }, { -INT32_C( 145140), INT32_C( 162709), -INT32_C( 244862), -INT32_C( 80723) }, { -INT32_C( 2268), INT32_C( 2542), -INT32_C( 3826), -INT32_C( 1261) }, { -INT32_C( 18), INT32_C( 20), -INT32_C( 30), -INT32_C( 10) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 1290792943), -INT32_C( 1759146918), INT32_C( 445754851), -INT32_C( 1272569359) }, { INT32_C( 20168640), -INT32_C( 27486671), INT32_C( 6964920), -INT32_C( 19883896) }, { INT32_C( 157567), -INT32_C( 214740), INT32_C( 54413), -INT32_C( 155343) }, { INT32_C( 2462), -INT32_C( 3355), INT32_C( 850), -INT32_C( 2427) }, { INT32_C( 19), -INT32_C( 26), INT32_C( 7), -INT32_C( 19) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 963463071), -INT32_C( 611786778), -INT32_C( 245106669), INT32_C( 2127231119) }, { INT32_C( 15054110), -INT32_C( 9559168), -INT32_C( 3829792), INT32_C( 33237986) }, { INT32_C( 117610), -INT32_C( 74681), -INT32_C( 29920), INT32_C( 259672) }, { INT32_C( 1838), -INT32_C( 1167), -INT32_C( 468), INT32_C( 4057) }, { INT32_C( 14), -INT32_C( 9), -INT32_C( 4), INT32_C( 32) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 1103870439), INT32_C( 685371461), -INT32_C( 1908184419), INT32_C( 725772684) }, { INT32_C( 17247976), INT32_C( 10708929), -INT32_C( 29815382), INT32_C( 11340198) }, { INT32_C( 134750), INT32_C( 83664), -INT32_C( 232933), INT32_C( 88595) }, { INT32_C( 2105), INT32_C( 1307), -INT32_C( 3640), INT32_C( 1384) }, { INT32_C( 16), INT32_C( 10), -INT32_C( 28), INT32_C( 11) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t r6 = simde_vrshrq_n_s32(a, 6); simde_int32x4_t r13 = simde_vrshrq_n_s32(a, 13); simde_int32x4_t r19 = simde_vrshrq_n_s32(a, 19); simde_int32x4_t r26 = simde_vrshrq_n_s32(a, 26); simde_int32x4_t r32 = simde_vrshrq_n_s32(a, 32); simde_test_arm_neon_assert_equal_i32x4(r6, simde_vld1q_s32(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i32x4(r13, simde_vld1q_s32(test_vec[i].r13)); simde_test_arm_neon_assert_equal_i32x4(r19, simde_vld1q_s32(test_vec[i].r19)); simde_test_arm_neon_assert_equal_i32x4(r26, simde_vld1q_s32(test_vec[i].r26)); simde_test_arm_neon_assert_equal_i32x4(r32, simde_vld1q_s32(test_vec[i].r32)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int32x4_t r6 = simde_vrshrq_n_s32(a, 6); simde_int32x4_t r13 = simde_vrshrq_n_s32(a, 13); simde_int32x4_t r19 = simde_vrshrq_n_s32(a, 19); simde_int32x4_t r26 = simde_vrshrq_n_s32(a, 26); simde_int32x4_t r32 = simde_vrshrq_n_s32(a, 32); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r19, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r26, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r32, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrshrq_n_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; int64_t r13[2]; int64_t r26[2]; int64_t r39[2]; int64_t r52[2]; int64_t r64[2]; } test_vec[] = { { { -INT64_C( 155505297264585433), INT64_C( 1918057023215863005) }, { -INT64_C( 18982580232493), INT64_C( 234137820216780) }, { -INT64_C( 2317209501), INT64_C( 28581276882) }, { -INT64_C( 282862), INT64_C( 3488925) }, { -INT64_C( 35), INT64_C( 426) }, { INT64_C( 0), INT64_C( 0) } }, { { INT64_C( 137607243321795074), INT64_C( 8939380644695547651) }, { INT64_C( 16797759194555), INT64_C( 1091232988854437) }, { INT64_C( 2050507714), INT64_C( 133207151960) }, { INT64_C( 250306), INT64_C( 16260639) }, { INT64_C( 31), INT64_C( 1985) }, { INT64_C( 0), INT64_C( 0) } }, { { INT64_C( 5774704828559649380), INT64_C( 3256442460466306147) }, { INT64_C( 704920023017535), INT64_C( 397514948787391) }, { INT64_C( 86049807497), INT64_C( 48524774022) }, { INT64_C( 10504127), INT64_C( 5923434) }, { INT64_C( 1282), INT64_C( 723) }, { INT64_C( 0), INT64_C( 0) } }, { { -INT64_C( 3699674341953902441), INT64_C( 3498799708177743292) }, { -INT64_C( 451620403070545), INT64_C( 427099573752166) }, { -INT64_C( 55129443734), INT64_C( 52136178437) }, { -INT64_C( 6729668), INT64_C( 6364280) }, { -INT64_C( 821), INT64_C( 777) }, { INT64_C( 0), INT64_C( 0) } }, { { -INT64_C( 2816868020946299355), INT64_C( 5524826802489360213) }, { -INT64_C( 343855959588171), INT64_C( 674417334288252) }, { -INT64_C( 41974604442), INT64_C( 82326334752) }, { -INT64_C( 5123853), INT64_C( 10049601) }, { -INT64_C( 625), INT64_C( 1227) }, { INT64_C( 0), INT64_C( 0) } }, { { -INT64_C( 4927329423533547256), INT64_C( 3122501998596704856) }, { -INT64_C( 601480642521185), INT64_C( 381164794750574) }, { -INT64_C( 73422929995), INT64_C( 46528905609) }, { -INT64_C( 8962760), INT64_C( 5679798) }, { -INT64_C( 1094), INT64_C( 693) }, { INT64_C( 0), INT64_C( 0) } }, { { INT64_C( 8736889974953323505), INT64_C( 6281295867776871978) }, { INT64_C( 1066514889520669), INT64_C( 766759749484481) }, { INT64_C( 130189805850), INT64_C( 93598602232) }, { INT64_C( 15892310), INT64_C( 11425611) }, { INT64_C( 1940), INT64_C( 1395) }, { INT64_C( 0), INT64_C( 0) } }, { { INT64_C( 1315271790320754310), INT64_C( 6540505840195341714) }, { INT64_C( 160555638466889), INT64_C( 798401591820720) }, { INT64_C( 19599076961), INT64_C( 97461131814) }, { INT64_C( 2392465), INT64_C( 11897111) }, { INT64_C( 292), INT64_C( 1452) }, { INT64_C( 0), INT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t r13 = simde_vrshrq_n_s64(a, 13); simde_int64x2_t r26 = simde_vrshrq_n_s64(a, 26); simde_int64x2_t r39 = simde_vrshrq_n_s64(a, 39); simde_int64x2_t r52 = simde_vrshrq_n_s64(a, 52); simde_int64x2_t r64 = simde_vrshrq_n_s64(a, 64); simde_test_arm_neon_assert_equal_i64x2(r13, simde_vld1q_s64(test_vec[i].r13)); simde_test_arm_neon_assert_equal_i64x2(r26, simde_vld1q_s64(test_vec[i].r26)); simde_test_arm_neon_assert_equal_i64x2(r39, simde_vld1q_s64(test_vec[i].r39)); simde_test_arm_neon_assert_equal_i64x2(r52, simde_vld1q_s64(test_vec[i].r52)); simde_test_arm_neon_assert_equal_i64x2(r64, simde_vld1q_s64(test_vec[i].r64)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); simde_int64x2_t r13 = simde_vrshrq_n_s64(a, 13); simde_int64x2_t r26 = simde_vrshrq_n_s64(a, 26); simde_int64x2_t r39 = simde_vrshrq_n_s64(a, 39); simde_int64x2_t r52 = simde_vrshrq_n_s64(a, 52); simde_int64x2_t r64 = simde_vrshrq_n_s64(a, 64); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r26, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r39, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r52, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r64, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrshrq_n_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; uint8_t r1[16]; uint8_t r3[16]; uint8_t r5[16]; uint8_t r6[16]; uint8_t r8[16]; } test_vec[] = { { { UINT8_C(165), UINT8_C(115), UINT8_C(214), UINT8_C(181), UINT8_C( 93), UINT8_C(223), UINT8_C(180), UINT8_C(227), UINT8_C( 52), UINT8_C( 30), UINT8_C( 57), UINT8_C(152), UINT8_C(142), UINT8_C(132), UINT8_C(131), UINT8_C( 10) }, { UINT8_C( 83), UINT8_C( 58), UINT8_C(107), UINT8_C( 91), UINT8_C( 47), UINT8_C(112), UINT8_C( 90), UINT8_C(114), UINT8_C( 26), UINT8_C( 15), UINT8_C( 29), UINT8_C( 76), UINT8_C( 71), UINT8_C( 66), UINT8_C( 66), UINT8_C( 5) }, { UINT8_C( 21), UINT8_C( 14), UINT8_C( 27), UINT8_C( 23), UINT8_C( 12), UINT8_C( 28), UINT8_C( 23), UINT8_C( 28), UINT8_C( 7), UINT8_C( 4), UINT8_C( 7), UINT8_C( 19), UINT8_C( 18), UINT8_C( 17), UINT8_C( 16), UINT8_C( 1) }, { UINT8_C( 5), UINT8_C( 4), UINT8_C( 7), UINT8_C( 6), UINT8_C( 3), UINT8_C( 7), UINT8_C( 6), UINT8_C( 7), UINT8_C( 2), UINT8_C( 1), UINT8_C( 2), UINT8_C( 5), UINT8_C( 4), UINT8_C( 4), UINT8_C( 4), UINT8_C( 0) }, { UINT8_C( 3), UINT8_C( 2), UINT8_C( 3), UINT8_C( 3), UINT8_C( 1), UINT8_C( 3), UINT8_C( 3), UINT8_C( 4), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 2), UINT8_C( 2), UINT8_C( 2), UINT8_C( 2), UINT8_C( 0) }, { UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0) } }, { { UINT8_C( 12), UINT8_C(192), UINT8_C(153), UINT8_C(117), UINT8_C(226), UINT8_C(110), UINT8_C( 85), UINT8_C(196), UINT8_C(134), UINT8_C( 57), UINT8_C(149), UINT8_C(137), UINT8_C( 23), UINT8_C( 24), UINT8_C( 88), UINT8_C(188) }, { UINT8_C( 6), UINT8_C( 96), UINT8_C( 77), UINT8_C( 59), UINT8_C(113), UINT8_C( 55), UINT8_C( 43), UINT8_C( 98), UINT8_C( 67), UINT8_C( 29), UINT8_C( 75), UINT8_C( 69), UINT8_C( 12), UINT8_C( 12), UINT8_C( 44), UINT8_C( 94) }, { UINT8_C( 2), UINT8_C( 24), UINT8_C( 19), UINT8_C( 15), UINT8_C( 28), UINT8_C( 14), UINT8_C( 11), UINT8_C( 25), UINT8_C( 17), UINT8_C( 7), UINT8_C( 19), UINT8_C( 17), UINT8_C( 3), UINT8_C( 3), UINT8_C( 11), UINT8_C( 24) }, { UINT8_C( 0), UINT8_C( 6), UINT8_C( 5), UINT8_C( 4), UINT8_C( 7), UINT8_C( 3), UINT8_C( 3), UINT8_C( 6), UINT8_C( 4), UINT8_C( 2), UINT8_C( 5), UINT8_C( 4), UINT8_C( 1), UINT8_C( 1), UINT8_C( 3), UINT8_C( 6) }, { UINT8_C( 0), UINT8_C( 3), UINT8_C( 2), UINT8_C( 2), UINT8_C( 4), UINT8_C( 2), UINT8_C( 1), UINT8_C( 3), UINT8_C( 2), UINT8_C( 1), UINT8_C( 2), UINT8_C( 2), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 3) }, { UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1) } }, { { UINT8_C(140), UINT8_C( 47), UINT8_C(113), UINT8_C(233), UINT8_C( 14), UINT8_C( 38), UINT8_C(204), UINT8_C( 67), UINT8_C( 68), UINT8_C( 5), UINT8_C(219), UINT8_C(210), UINT8_C(137), UINT8_C( 95), UINT8_C(220), UINT8_C(150) }, { UINT8_C( 70), UINT8_C( 24), UINT8_C( 57), UINT8_C(117), UINT8_C( 7), UINT8_C( 19), UINT8_C(102), UINT8_C( 34), UINT8_C( 34), UINT8_C( 3), UINT8_C(110), UINT8_C(105), UINT8_C( 69), UINT8_C( 48), UINT8_C(110), UINT8_C( 75) }, { UINT8_C( 18), UINT8_C( 6), UINT8_C( 14), UINT8_C( 29), UINT8_C( 2), UINT8_C( 5), UINT8_C( 26), UINT8_C( 8), UINT8_C( 9), UINT8_C( 1), UINT8_C( 27), UINT8_C( 26), UINT8_C( 17), UINT8_C( 12), UINT8_C( 28), UINT8_C( 19) }, { UINT8_C( 4), UINT8_C( 1), UINT8_C( 4), UINT8_C( 7), UINT8_C( 0), UINT8_C( 1), UINT8_C( 6), UINT8_C( 2), UINT8_C( 2), UINT8_C( 0), UINT8_C( 7), UINT8_C( 7), UINT8_C( 4), UINT8_C( 3), UINT8_C( 7), UINT8_C( 5) }, { UINT8_C( 2), UINT8_C( 1), UINT8_C( 2), UINT8_C( 4), UINT8_C( 0), UINT8_C( 1), UINT8_C( 3), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 3), UINT8_C( 3), UINT8_C( 2), UINT8_C( 1), UINT8_C( 3), UINT8_C( 2) }, { UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1) } }, { { UINT8_C( 31), UINT8_C(117), UINT8_C( 11), UINT8_C( 1), UINT8_C(227), UINT8_C( 96), UINT8_C(197), UINT8_C(106), UINT8_C(153), UINT8_C( 90), UINT8_C(243), UINT8_C(176), UINT8_C(115), UINT8_C( 75), UINT8_C(109), UINT8_MAX }, { UINT8_C( 16), UINT8_C( 59), UINT8_C( 6), UINT8_C( 1), UINT8_C(114), UINT8_C( 48), UINT8_C( 99), UINT8_C( 53), UINT8_C( 77), UINT8_C( 45), UINT8_C(122), UINT8_C( 88), UINT8_C( 58), UINT8_C( 38), UINT8_C( 55), UINT8_C(128) }, { UINT8_C( 4), UINT8_C( 15), UINT8_C( 1), UINT8_C( 0), UINT8_C( 28), UINT8_C( 12), UINT8_C( 25), UINT8_C( 13), UINT8_C( 19), UINT8_C( 11), UINT8_C( 30), UINT8_C( 22), UINT8_C( 14), UINT8_C( 9), UINT8_C( 14), UINT8_C( 32) }, { UINT8_C( 1), UINT8_C( 4), UINT8_C( 0), UINT8_C( 0), UINT8_C( 7), UINT8_C( 3), UINT8_C( 6), UINT8_C( 3), UINT8_C( 5), UINT8_C( 3), UINT8_C( 8), UINT8_C( 6), UINT8_C( 4), UINT8_C( 2), UINT8_C( 3), UINT8_C( 8) }, { UINT8_C( 0), UINT8_C( 2), UINT8_C( 0), UINT8_C( 0), UINT8_C( 4), UINT8_C( 2), UINT8_C( 3), UINT8_C( 2), UINT8_C( 2), UINT8_C( 1), UINT8_C( 4), UINT8_C( 3), UINT8_C( 2), UINT8_C( 1), UINT8_C( 2), UINT8_C( 4) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1) } }, { { UINT8_C(122), UINT8_C(222), UINT8_C(232), UINT8_C(137), UINT8_C( 4), UINT8_C(180), UINT8_C(204), UINT8_C( 72), UINT8_C(185), UINT8_C(167), UINT8_C( 26), UINT8_C( 66), UINT8_C( 6), UINT8_C(247), UINT8_C(216), UINT8_C( 38) }, { UINT8_C( 61), UINT8_C(111), UINT8_C(116), UINT8_C( 69), UINT8_C( 2), UINT8_C( 90), UINT8_C(102), UINT8_C( 36), UINT8_C( 93), UINT8_C( 84), UINT8_C( 13), UINT8_C( 33), UINT8_C( 3), UINT8_C(124), UINT8_C(108), UINT8_C( 19) }, { UINT8_C( 15), UINT8_C( 28), UINT8_C( 29), UINT8_C( 17), UINT8_C( 1), UINT8_C( 23), UINT8_C( 26), UINT8_C( 9), UINT8_C( 23), UINT8_C( 21), UINT8_C( 3), UINT8_C( 8), UINT8_C( 1), UINT8_C( 31), UINT8_C( 27), UINT8_C( 5) }, { UINT8_C( 4), UINT8_C( 7), UINT8_C( 7), UINT8_C( 4), UINT8_C( 0), UINT8_C( 6), UINT8_C( 6), UINT8_C( 2), UINT8_C( 6), UINT8_C( 5), UINT8_C( 1), UINT8_C( 2), UINT8_C( 0), UINT8_C( 8), UINT8_C( 7), UINT8_C( 1) }, { UINT8_C( 2), UINT8_C( 3), UINT8_C( 4), UINT8_C( 2), UINT8_C( 0), UINT8_C( 3), UINT8_C( 3), UINT8_C( 1), UINT8_C( 3), UINT8_C( 3), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 4), UINT8_C( 3), UINT8_C( 1) }, { UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0) } }, { { UINT8_C(108), UINT8_C(227), UINT8_C( 39), UINT8_C( 80), UINT8_C( 67), UINT8_C(237), UINT8_C(186), UINT8_C(221), UINT8_C( 71), UINT8_C(173), UINT8_C(141), UINT8_C(186), UINT8_C(248), UINT8_C(250), UINT8_C(185), UINT8_C(115) }, { UINT8_C( 54), UINT8_C(114), UINT8_C( 20), UINT8_C( 40), UINT8_C( 34), UINT8_C(119), UINT8_C( 93), UINT8_C(111), UINT8_C( 36), UINT8_C( 87), UINT8_C( 71), UINT8_C( 93), UINT8_C(124), UINT8_C(125), UINT8_C( 93), UINT8_C( 58) }, { UINT8_C( 14), UINT8_C( 28), UINT8_C( 5), UINT8_C( 10), UINT8_C( 8), UINT8_C( 30), UINT8_C( 23), UINT8_C( 28), UINT8_C( 9), UINT8_C( 22), UINT8_C( 18), UINT8_C( 23), UINT8_C( 31), UINT8_C( 31), UINT8_C( 23), UINT8_C( 14) }, { UINT8_C( 3), UINT8_C( 7), UINT8_C( 1), UINT8_C( 3), UINT8_C( 2), UINT8_C( 7), UINT8_C( 6), UINT8_C( 7), UINT8_C( 2), UINT8_C( 5), UINT8_C( 4), UINT8_C( 6), UINT8_C( 8), UINT8_C( 8), UINT8_C( 6), UINT8_C( 4) }, { UINT8_C( 2), UINT8_C( 4), UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 4), UINT8_C( 3), UINT8_C( 3), UINT8_C( 1), UINT8_C( 3), UINT8_C( 2), UINT8_C( 3), UINT8_C( 4), UINT8_C( 4), UINT8_C( 3), UINT8_C( 2) }, { UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0) } }, { { UINT8_C(217), UINT8_C(161), UINT8_C(252), UINT8_C(221), UINT8_C( 85), UINT8_C(200), UINT8_C( 38), UINT8_C( 14), UINT8_C(111), UINT8_C( 64), UINT8_C( 81), UINT8_C(118), UINT8_C( 55), UINT8_C( 41), UINT8_C(156), UINT8_C(164) }, { UINT8_C(109), UINT8_C( 81), UINT8_C(126), UINT8_C(111), UINT8_C( 43), UINT8_C(100), UINT8_C( 19), UINT8_C( 7), UINT8_C( 56), UINT8_C( 32), UINT8_C( 41), UINT8_C( 59), UINT8_C( 28), UINT8_C( 21), UINT8_C( 78), UINT8_C( 82) }, { UINT8_C( 27), UINT8_C( 20), UINT8_C( 32), UINT8_C( 28), UINT8_C( 11), UINT8_C( 25), UINT8_C( 5), UINT8_C( 2), UINT8_C( 14), UINT8_C( 8), UINT8_C( 10), UINT8_C( 15), UINT8_C( 7), UINT8_C( 5), UINT8_C( 20), UINT8_C( 21) }, { UINT8_C( 7), UINT8_C( 5), UINT8_C( 8), UINT8_C( 7), UINT8_C( 3), UINT8_C( 6), UINT8_C( 1), UINT8_C( 0), UINT8_C( 3), UINT8_C( 2), UINT8_C( 3), UINT8_C( 4), UINT8_C( 2), UINT8_C( 1), UINT8_C( 5), UINT8_C( 5) }, { UINT8_C( 3), UINT8_C( 3), UINT8_C( 4), UINT8_C( 3), UINT8_C( 1), UINT8_C( 3), UINT8_C( 1), UINT8_C( 0), UINT8_C( 2), UINT8_C( 1), UINT8_C( 1), UINT8_C( 2), UINT8_C( 1), UINT8_C( 1), UINT8_C( 2), UINT8_C( 3) }, { UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1) } }, { { UINT8_C( 13), UINT8_C(195), UINT8_C(244), UINT8_C( 80), UINT8_C(176), UINT8_C(174), UINT8_C( 45), UINT8_C(248), UINT8_C( 91), UINT8_C(187), UINT8_C(178), UINT8_C( 83), UINT8_C(181), UINT8_C(108), UINT8_C(198), UINT8_C(142) }, { UINT8_C( 7), UINT8_C( 98), UINT8_C(122), UINT8_C( 40), UINT8_C( 88), UINT8_C( 87), UINT8_C( 23), UINT8_C(124), UINT8_C( 46), UINT8_C( 94), UINT8_C( 89), UINT8_C( 42), UINT8_C( 91), UINT8_C( 54), UINT8_C( 99), UINT8_C( 71) }, { UINT8_C( 2), UINT8_C( 24), UINT8_C( 31), UINT8_C( 10), UINT8_C( 22), UINT8_C( 22), UINT8_C( 6), UINT8_C( 31), UINT8_C( 11), UINT8_C( 23), UINT8_C( 22), UINT8_C( 10), UINT8_C( 23), UINT8_C( 14), UINT8_C( 25), UINT8_C( 18) }, { UINT8_C( 0), UINT8_C( 6), UINT8_C( 8), UINT8_C( 3), UINT8_C( 6), UINT8_C( 5), UINT8_C( 1), UINT8_C( 8), UINT8_C( 3), UINT8_C( 6), UINT8_C( 6), UINT8_C( 3), UINT8_C( 6), UINT8_C( 3), UINT8_C( 6), UINT8_C( 4) }, { UINT8_C( 0), UINT8_C( 3), UINT8_C( 4), UINT8_C( 1), UINT8_C( 3), UINT8_C( 3), UINT8_C( 1), UINT8_C( 4), UINT8_C( 1), UINT8_C( 3), UINT8_C( 3), UINT8_C( 1), UINT8_C( 3), UINT8_C( 2), UINT8_C( 3), UINT8_C( 2) }, { UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t r1 = simde_vrshrq_n_u8(a, 1); simde_uint8x16_t r3 = simde_vrshrq_n_u8(a, 3); simde_uint8x16_t r5 = simde_vrshrq_n_u8(a, 5); simde_uint8x16_t r6 = simde_vrshrq_n_u8(a, 6); simde_uint8x16_t r8 = simde_vrshrq_n_u8(a, 8); simde_test_arm_neon_assert_equal_u8x16(r1, simde_vld1q_u8(test_vec[i].r1)); simde_test_arm_neon_assert_equal_u8x16(r3, simde_vld1q_u8(test_vec[i].r3)); simde_test_arm_neon_assert_equal_u8x16(r5, simde_vld1q_u8(test_vec[i].r5)); simde_test_arm_neon_assert_equal_u8x16(r6, simde_vld1q_u8(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u8x16(r8, simde_vld1q_u8(test_vec[i].r8)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t r1 = simde_vrshrq_n_u8(a, 1); simde_uint8x16_t r3 = simde_vrshrq_n_u8(a, 3); simde_uint8x16_t r5 = simde_vrshrq_n_u8(a, 5); simde_uint8x16_t r6 = simde_vrshrq_n_u8(a, 6); simde_uint8x16_t r8 = simde_vrshrq_n_u8(a, 8); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, r1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r5, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r8, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrshrq_n_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t r3[8]; uint16_t r6[8]; uint16_t r10[8]; uint16_t r13[8]; uint16_t r16[8]; } test_vec[] = { { { UINT16_C( 6904), UINT16_C(18521), UINT16_C(38790), UINT16_C(17479), UINT16_C( 9022), UINT16_C(30369), UINT16_C(47080), UINT16_C(51744) }, { UINT16_C( 863), UINT16_C( 2315), UINT16_C( 4849), UINT16_C( 2185), UINT16_C( 1128), UINT16_C( 3796), UINT16_C( 5885), UINT16_C( 6468) }, { UINT16_C( 108), UINT16_C( 289), UINT16_C( 606), UINT16_C( 273), UINT16_C( 141), UINT16_C( 475), UINT16_C( 736), UINT16_C( 809) }, { UINT16_C( 7), UINT16_C( 18), UINT16_C( 38), UINT16_C( 17), UINT16_C( 9), UINT16_C( 30), UINT16_C( 46), UINT16_C( 51) }, { UINT16_C( 1), UINT16_C( 2), UINT16_C( 5), UINT16_C( 2), UINT16_C( 1), UINT16_C( 4), UINT16_C( 6), UINT16_C( 6) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 1), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 1), UINT16_C( 1) } }, { { UINT16_C(38955), UINT16_C(50982), UINT16_C(43727), UINT16_C(47830), UINT16_C(43384), UINT16_C(65088), UINT16_C(30354), UINT16_C(35491) }, { UINT16_C( 4869), UINT16_C( 6373), UINT16_C( 5466), UINT16_C( 5979), UINT16_C( 5423), UINT16_C( 8136), UINT16_C( 3794), UINT16_C( 4436) }, { UINT16_C( 609), UINT16_C( 797), UINT16_C( 683), UINT16_C( 747), UINT16_C( 678), UINT16_C( 1017), UINT16_C( 474), UINT16_C( 555) }, { UINT16_C( 38), UINT16_C( 50), UINT16_C( 43), UINT16_C( 47), UINT16_C( 42), UINT16_C( 64), UINT16_C( 30), UINT16_C( 35) }, { UINT16_C( 5), UINT16_C( 6), UINT16_C( 5), UINT16_C( 6), UINT16_C( 5), UINT16_C( 8), UINT16_C( 4), UINT16_C( 4) }, { UINT16_C( 1), UINT16_C( 1), UINT16_C( 1), UINT16_C( 1), UINT16_C( 1), UINT16_C( 1), UINT16_C( 0), UINT16_C( 1) } }, { { UINT16_C(64656), UINT16_C( 6098), UINT16_C( 6803), UINT16_C(53595), UINT16_C(64573), UINT16_C( 9799), UINT16_C(26548), UINT16_C(57328) }, { UINT16_C( 8082), UINT16_C( 762), UINT16_C( 850), UINT16_C( 6699), UINT16_C( 8072), UINT16_C( 1225), UINT16_C( 3319), UINT16_C( 7166) }, { UINT16_C( 1010), UINT16_C( 95), UINT16_C( 106), UINT16_C( 837), UINT16_C( 1009), UINT16_C( 153), UINT16_C( 415), UINT16_C( 896) }, { UINT16_C( 63), UINT16_C( 6), UINT16_C( 7), UINT16_C( 52), UINT16_C( 63), UINT16_C( 10), UINT16_C( 26), UINT16_C( 56) }, { UINT16_C( 8), UINT16_C( 1), UINT16_C( 1), UINT16_C( 7), UINT16_C( 8), UINT16_C( 1), UINT16_C( 3), UINT16_C( 7) }, { UINT16_C( 1), UINT16_C( 0), UINT16_C( 0), UINT16_C( 1), UINT16_C( 1), UINT16_C( 0), UINT16_C( 0), UINT16_C( 1) } }, { { UINT16_C( 5632), UINT16_C(53159), UINT16_C(32193), UINT16_C(14729), UINT16_C(51494), UINT16_C(47415), UINT16_C(55871), UINT16_C(53315) }, { UINT16_C( 704), UINT16_C( 6645), UINT16_C( 4024), UINT16_C( 1841), UINT16_C( 6437), UINT16_C( 5927), UINT16_C( 6984), UINT16_C( 6664) }, { UINT16_C( 88), UINT16_C( 831), UINT16_C( 503), UINT16_C( 230), UINT16_C( 805), UINT16_C( 741), UINT16_C( 873), UINT16_C( 833) }, { UINT16_C( 6), UINT16_C( 52), UINT16_C( 31), UINT16_C( 14), UINT16_C( 50), UINT16_C( 46), UINT16_C( 55), UINT16_C( 52) }, { UINT16_C( 1), UINT16_C( 6), UINT16_C( 4), UINT16_C( 2), UINT16_C( 6), UINT16_C( 6), UINT16_C( 7), UINT16_C( 7) }, { UINT16_C( 0), UINT16_C( 1), UINT16_C( 0), UINT16_C( 0), UINT16_C( 1), UINT16_C( 1), UINT16_C( 1), UINT16_C( 1) } }, { { UINT16_C( 5846), UINT16_C(27111), UINT16_C(16944), UINT16_C(27963), UINT16_C(33342), UINT16_C(62099), UINT16_C(34026), UINT16_C(60114) }, { UINT16_C( 731), UINT16_C( 3389), UINT16_C( 2118), UINT16_C( 3495), UINT16_C( 4168), UINT16_C( 7762), UINT16_C( 4253), UINT16_C( 7514) }, { UINT16_C( 91), UINT16_C( 424), UINT16_C( 265), UINT16_C( 437), UINT16_C( 521), UINT16_C( 970), UINT16_C( 532), UINT16_C( 939) }, { UINT16_C( 6), UINT16_C( 26), UINT16_C( 17), UINT16_C( 27), UINT16_C( 33), UINT16_C( 61), UINT16_C( 33), UINT16_C( 59) }, { UINT16_C( 1), UINT16_C( 3), UINT16_C( 2), UINT16_C( 3), UINT16_C( 4), UINT16_C( 8), UINT16_C( 4), UINT16_C( 7) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 1), UINT16_C( 1), UINT16_C( 1), UINT16_C( 1) } }, { { UINT16_C(31130), UINT16_C(23481), UINT16_C(17142), UINT16_C( 7573), UINT16_C(52236), UINT16_C(19414), UINT16_C( 6567), UINT16_C(32027) }, { UINT16_C( 3891), UINT16_C( 2935), UINT16_C( 2143), UINT16_C( 947), UINT16_C( 6530), UINT16_C( 2427), UINT16_C( 821), UINT16_C( 4003) }, { UINT16_C( 486), UINT16_C( 367), UINT16_C( 268), UINT16_C( 118), UINT16_C( 816), UINT16_C( 303), UINT16_C( 103), UINT16_C( 500) }, { UINT16_C( 30), UINT16_C( 23), UINT16_C( 17), UINT16_C( 7), UINT16_C( 51), UINT16_C( 19), UINT16_C( 6), UINT16_C( 31) }, { UINT16_C( 4), UINT16_C( 3), UINT16_C( 2), UINT16_C( 1), UINT16_C( 6), UINT16_C( 2), UINT16_C( 1), UINT16_C( 4) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 1), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C( 559), UINT16_C(24551), UINT16_C( 8772), UINT16_C(33741), UINT16_C(24740), UINT16_C(36469), UINT16_C(18404), UINT16_C(32632) }, { UINT16_C( 70), UINT16_C( 3069), UINT16_C( 1097), UINT16_C( 4218), UINT16_C( 3093), UINT16_C( 4559), UINT16_C( 2301), UINT16_C( 4079) }, { UINT16_C( 9), UINT16_C( 384), UINT16_C( 137), UINT16_C( 527), UINT16_C( 387), UINT16_C( 570), UINT16_C( 288), UINT16_C( 510) }, { UINT16_C( 1), UINT16_C( 24), UINT16_C( 9), UINT16_C( 33), UINT16_C( 24), UINT16_C( 36), UINT16_C( 18), UINT16_C( 32) }, { UINT16_C( 0), UINT16_C( 3), UINT16_C( 1), UINT16_C( 4), UINT16_C( 3), UINT16_C( 4), UINT16_C( 2), UINT16_C( 4) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 1), UINT16_C( 0), UINT16_C( 1), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(12736), UINT16_C(47066), UINT16_C(28532), UINT16_C(32980), UINT16_C(43580), UINT16_C(58315), UINT16_C(59331), UINT16_C(62304) }, { UINT16_C( 1592), UINT16_C( 5883), UINT16_C( 3567), UINT16_C( 4123), UINT16_C( 5448), UINT16_C( 7289), UINT16_C( 7416), UINT16_C( 7788) }, { UINT16_C( 199), UINT16_C( 735), UINT16_C( 446), UINT16_C( 515), UINT16_C( 681), UINT16_C( 911), UINT16_C( 927), UINT16_C( 974) }, { UINT16_C( 12), UINT16_C( 46), UINT16_C( 28), UINT16_C( 32), UINT16_C( 43), UINT16_C( 57), UINT16_C( 58), UINT16_C( 61) }, { UINT16_C( 2), UINT16_C( 6), UINT16_C( 3), UINT16_C( 4), UINT16_C( 5), UINT16_C( 7), UINT16_C( 7), UINT16_C( 8) }, { UINT16_C( 0), UINT16_C( 1), UINT16_C( 0), UINT16_C( 1), UINT16_C( 1), UINT16_C( 1), UINT16_C( 1), UINT16_C( 1) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t r3 = simde_vrshrq_n_u16(a, 3); simde_uint16x8_t r6 = simde_vrshrq_n_u16(a, 6); simde_uint16x8_t r10 = simde_vrshrq_n_u16(a, 10); simde_uint16x8_t r13 = simde_vrshrq_n_u16(a, 13); simde_uint16x8_t r16 = simde_vrshrq_n_u16(a, 16); simde_test_arm_neon_assert_equal_u16x8(r3, simde_vld1q_u16(test_vec[i].r3)); simde_test_arm_neon_assert_equal_u16x8(r6, simde_vld1q_u16(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u16x8(r10, simde_vld1q_u16(test_vec[i].r10)); simde_test_arm_neon_assert_equal_u16x8(r13, simde_vld1q_u16(test_vec[i].r13)); simde_test_arm_neon_assert_equal_u16x8(r16, simde_vld1q_u16(test_vec[i].r16)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t r3 = simde_vrshrq_n_u16(a, 3); simde_uint16x8_t r6 = simde_vrshrq_n_u16(a, 6); simde_uint16x8_t r10 = simde_vrshrq_n_u16(a, 10); simde_uint16x8_t r13 = simde_vrshrq_n_u16(a, 13); simde_uint16x8_t r16 = simde_vrshrq_n_u16(a, 16); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r16, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrshrq_n_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint32_t r6[4]; uint32_t r13[4]; uint32_t r19[4]; uint32_t r26[4]; uint32_t r32[4]; } test_vec[] = { { { UINT32_C( 434564502), UINT32_C(1326181401), UINT32_C(2107734821), UINT32_C(2106644388) }, { UINT32_C( 6790070), UINT32_C( 20721584), UINT32_C( 32933357), UINT32_C( 32916319) }, { UINT32_C( 53047), UINT32_C( 161887), UINT32_C( 257292), UINT32_C( 257159) }, { UINT32_C( 829), UINT32_C( 2529), UINT32_C( 4020), UINT32_C( 4018) }, { UINT32_C( 6), UINT32_C( 20), UINT32_C( 31), UINT32_C( 31) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(1426567452), UINT32_C( 955711525), UINT32_C(3896099004), UINT32_C(3085639456) }, { UINT32_C( 22290116), UINT32_C( 14932993), UINT32_C( 60876547), UINT32_C( 48213117) }, { UINT32_C( 174142), UINT32_C( 116664), UINT32_C( 475598), UINT32_C( 376665) }, { UINT32_C( 2721), UINT32_C( 1823), UINT32_C( 7431), UINT32_C( 5885) }, { UINT32_C( 21), UINT32_C( 14), UINT32_C( 58), UINT32_C( 46) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 1), UINT32_C( 1) } }, { { UINT32_C( 634441996), UINT32_C(3799309245), UINT32_C(4200535382), UINT32_C( 158920685) }, { UINT32_C( 9913156), UINT32_C( 59364207), UINT32_C( 65633365), UINT32_C( 2483136) }, { UINT32_C( 77447), UINT32_C( 463783), UINT32_C( 512761), UINT32_C( 19399) }, { UINT32_C( 1210), UINT32_C( 7247), UINT32_C( 8012), UINT32_C( 303) }, { UINT32_C( 9), UINT32_C( 57), UINT32_C( 63), UINT32_C( 2) }, { UINT32_C( 0), UINT32_C( 1), UINT32_C( 1), UINT32_C( 0) } }, { { UINT32_C(3328081824), UINT32_C(1006523775), UINT32_C( 908277525), UINT32_C(1659702870) }, { UINT32_C( 52001279), UINT32_C( 15726934), UINT32_C( 14191836), UINT32_C( 25932857) }, { UINT32_C( 406260), UINT32_C( 122867), UINT32_C( 110874), UINT32_C( 202600) }, { UINT32_C( 6348), UINT32_C( 1920), UINT32_C( 1732), UINT32_C( 3166) }, { UINT32_C( 50), UINT32_C( 15), UINT32_C( 14), UINT32_C( 25) }, { UINT32_C( 1), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(2626141663), UINT32_C(4001299608), UINT32_C(4276674065), UINT32_C(1846042829) }, { UINT32_C( 41033463), UINT32_C( 62520306), UINT32_C( 66823032), UINT32_C( 28844419) }, { UINT32_C( 320574), UINT32_C( 488440), UINT32_C( 522055), UINT32_C( 225347) }, { UINT32_C( 5009), UINT32_C( 7632), UINT32_C( 8157), UINT32_C( 3521) }, { UINT32_C( 39), UINT32_C( 60), UINT32_C( 64), UINT32_C( 28) }, { UINT32_C( 1), UINT32_C( 1), UINT32_C( 1), UINT32_C( 0) } }, { { UINT32_C(1580492511), UINT32_C(3516543676), UINT32_C(3221732713), UINT32_C(2871194828) }, { UINT32_C( 24695195), UINT32_C( 54945995), UINT32_C( 50339574), UINT32_C( 44862419) }, { UINT32_C( 192931), UINT32_C( 429266), UINT32_C( 393278), UINT32_C( 350488) }, { UINT32_C( 3015), UINT32_C( 6707), UINT32_C( 6145), UINT32_C( 5476) }, { UINT32_C( 24), UINT32_C( 52), UINT32_C( 48), UINT32_C( 43) }, { UINT32_C( 0), UINT32_C( 1), UINT32_C( 1), UINT32_C( 1) } }, { { UINT32_C(1229499057), UINT32_C(3073885862), UINT32_C(1924538532), UINT32_C(1625341568) }, { UINT32_C( 19210923), UINT32_C( 48029467), UINT32_C( 30070915), UINT32_C( 25395962) }, { UINT32_C( 150085), UINT32_C( 375230), UINT32_C( 234929), UINT32_C( 198406) }, { UINT32_C( 2345), UINT32_C( 5863), UINT32_C( 3671), UINT32_C( 3100) }, { UINT32_C( 18), UINT32_C( 46), UINT32_C( 29), UINT32_C( 24) }, { UINT32_C( 0), UINT32_C( 1), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(3770553380), UINT32_C(2947700806), UINT32_C(3798972694), UINT32_C(1603113646) }, { UINT32_C( 58914897), UINT32_C( 46057825), UINT32_C( 59358948), UINT32_C( 25048651) }, { UINT32_C( 460273), UINT32_C( 359827), UINT32_C( 463742), UINT32_C( 195693) }, { UINT32_C( 7192), UINT32_C( 5622), UINT32_C( 7246), UINT32_C( 3058) }, { UINT32_C( 56), UINT32_C( 44), UINT32_C( 57), UINT32_C( 24) }, { UINT32_C( 1), UINT32_C( 1), UINT32_C( 1), UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t r6 = simde_vrshrq_n_u32(a, 6); simde_uint32x4_t r13 = simde_vrshrq_n_u32(a, 13); simde_uint32x4_t r19 = simde_vrshrq_n_u32(a, 19); simde_uint32x4_t r26 = simde_vrshrq_n_u32(a, 26); simde_uint32x4_t r32 = simde_vrshrq_n_u32(a, 32); simde_test_arm_neon_assert_equal_u32x4(r6, simde_vld1q_u32(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u32x4(r13, simde_vld1q_u32(test_vec[i].r13)); simde_test_arm_neon_assert_equal_u32x4(r19, simde_vld1q_u32(test_vec[i].r19)); simde_test_arm_neon_assert_equal_u32x4(r26, simde_vld1q_u32(test_vec[i].r26)); simde_test_arm_neon_assert_equal_u32x4(r32, simde_vld1q_u32(test_vec[i].r32)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t r6 = simde_vrshrq_n_u32(a, 6); simde_uint32x4_t r13 = simde_vrshrq_n_u32(a, 13); simde_uint32x4_t r19 = simde_vrshrq_n_u32(a, 19); simde_uint32x4_t r26 = simde_vrshrq_n_u32(a, 26); simde_uint32x4_t r32 = simde_vrshrq_n_u32(a, 32); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r19, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r26, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r32, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vrshrq_n_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[2]; uint64_t r13[2]; uint64_t r26[2]; uint64_t r39[2]; uint64_t r52[2]; uint64_t r64[2]; } test_vec[] = { { { UINT64_C(14268038043289444930), UINT64_C(16018600277606132720) }, { UINT64_C( 1741703862706231), UINT64_C( 1955395541699967) }, { UINT64_C( 212610334803), UINT64_C( 238695744836) }, { UINT64_C( 25953410), UINT64_C( 29137664) }, { UINT64_C( 3168), UINT64_C( 3557) }, { UINT64_C( 1), UINT64_C( 1) } }, { { UINT64_C( 1004212844393534372), UINT64_C(17706996720702848391) }, { UINT64_C( 122584575731633), UINT64_C( 2161498623132672) }, { UINT64_C( 14963937467), UINT64_C( 263854812394) }, { UINT64_C( 1826653), UINT64_C( 32208839) }, { UINT64_C( 223), UINT64_C( 3932) }, { UINT64_C( 0), UINT64_C( 1) } }, { { UINT64_C(17549758105951322565), UINT64_C( 6999528119195679791) }, { UINT64_C( 2142304456292886), UINT64_C( 854434584862754) }, { UINT64_C( 261511774450), UINT64_C( 104301096785) }, { UINT64_C( 31922824), UINT64_C( 12732067) }, { UINT64_C( 3897), UINT64_C( 1554) }, { UINT64_C( 1), UINT64_C( 0) } }, { { UINT64_C( 5743283352518859108), UINT64_C(15916761319608890170) }, { UINT64_C( 701084393618025), UINT64_C( 1942964028272570) }, { UINT64_C( 85581591018), UINT64_C( 237178226107) }, { UINT64_C( 10446972), UINT64_C( 28952420) }, { UINT64_C( 1275), UINT64_C( 3534) }, { UINT64_C( 0), UINT64_C( 1) } }, { { UINT64_C(11054701464871183732), UINT64_C( 2633199228186095213) }, { UINT64_C( 1349450862411033), UINT64_C( 321435452659435) }, { UINT64_C( 164727888478), UINT64_C( 39237726155) }, { UINT64_C( 20108385), UINT64_C( 4789761) }, { UINT64_C( 2455), UINT64_C( 585) }, { UINT64_C( 1), UINT64_C( 0) } }, { { UINT64_C( 5706200459837691930), UINT64_C( 5666971830166825139) }, { UINT64_C( 696557673320031), UINT64_C( 691769022237161) }, { UINT64_C( 85029012856), UINT64_C( 84444460722) }, { UINT64_C( 10379518), UINT64_C( 10308162) }, { UINT64_C( 1267), UINT64_C( 1258) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C(16593283551739594821), UINT64_C( 4576109281162569117) }, { UINT64_C( 2025547308561962), UINT64_C( 558607089985665) }, { UINT64_C( 247259192940), UINT64_C( 68189342039) }, { UINT64_C( 30183007), UINT64_C( 8323894) }, { UINT64_C( 3684), UINT64_C( 1016) }, { UINT64_C( 1), UINT64_C( 0) } }, { { UINT64_C( 9140145378369293427), UINT64_C( 8836886232157394297) }, { UINT64_C( 1115740402632970), UINT64_C( 1078721463886401) }, { UINT64_C( 136198779618), UINT64_C( 131679866197) }, { UINT64_C( 16625828), UINT64_C( 16074202) }, { UINT64_C( 2030), UINT64_C( 1962) }, { UINT64_C( 0), UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t r13 = simde_vrshrq_n_u64(a, 13); simde_uint64x2_t r26 = simde_vrshrq_n_u64(a, 26); simde_uint64x2_t r39 = simde_vrshrq_n_u64(a, 39); simde_uint64x2_t r52 = simde_vrshrq_n_u64(a, 52); simde_uint64x2_t r64 = simde_vrshrq_n_u64(a, 64); simde_test_arm_neon_assert_equal_u64x2(r13, simde_vld1q_u64(test_vec[i].r13)); simde_test_arm_neon_assert_equal_u64x2(r26, simde_vld1q_u64(test_vec[i].r26)); simde_test_arm_neon_assert_equal_u64x2(r39, simde_vld1q_u64(test_vec[i].r39)); simde_test_arm_neon_assert_equal_u64x2(r52, simde_vld1q_u64(test_vec[i].r52)); simde_test_arm_neon_assert_equal_u64x2(r64, simde_vld1q_u64(test_vec[i].r64)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); simde_uint64x2_t r13 = simde_vrshrq_n_u64(a, 13); simde_uint64x2_t r26 = simde_vrshrq_n_u64(a, 26); simde_uint64x2_t r39 = simde_vrshrq_n_u64(a, 39); simde_uint64x2_t r52 = simde_vrshrq_n_u64(a, 52); simde_uint64x2_t r64 = simde_vrshrq_n_u64(a, 64); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r26, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r39, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r52, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r64, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vrshr_n_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vrshr_n_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vrshr_n_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vrshr_n_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vrshr_n_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vrshr_n_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vrshr_n_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vrshr_n_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vrshrq_n_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vrshrq_n_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vrshrq_n_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vrshrq_n_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vrshrq_n_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vrshrq_n_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vrshrq_n_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vrshrq_n_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/rsra_n.c000066400000000000000000003460031400333146700167550ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN rsra_n #include /* Check that both of these work */ #if defined(__cplusplus) #include "../../../simde/arm/neon/rsra_n.h" #else #include "../../../simde/arm/neon.h" #endif static int test_simde_vrsra_n_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int8_t b[8]; int8_t r1[8]; int8_t r3[8]; int8_t r5[8]; int8_t r6[8]; int8_t r8[8]; } test_vec[] = { { { INT8_C( 53), -INT8_C( 70), INT8_C( 119), INT8_C( 8), INT8_C( 71), -INT8_C( 53), -INT8_C( 109), INT8_C( 51) }, { INT8_C( 60), -INT8_C( 71), -INT8_C( 27), -INT8_C( 112), -INT8_C( 21), INT8_C( 75), INT8_C( 108), -INT8_C( 57) }, { INT8_C( 83), -INT8_C( 105), INT8_C( 106), -INT8_C( 48), INT8_C( 61), -INT8_C( 15), -INT8_C( 55), INT8_C( 23) }, { INT8_C( 61), -INT8_C( 79), INT8_C( 116), -INT8_C( 6), INT8_C( 68), -INT8_C( 44), -INT8_C( 95), INT8_C( 44) }, { INT8_C( 55), -INT8_C( 72), INT8_C( 118), INT8_C( 5), INT8_C( 70), -INT8_C( 51), -INT8_C( 106), INT8_C( 49) }, { INT8_C( 54), -INT8_C( 71), INT8_C( 119), INT8_C( 6), INT8_C( 71), -INT8_C( 52), -INT8_C( 107), INT8_C( 50) }, { INT8_C( 53), -INT8_C( 70), INT8_C( 119), INT8_C( 8), INT8_C( 71), -INT8_C( 53), -INT8_C( 109), INT8_C( 51) } }, { { -INT8_C( 24), INT8_C( 15), INT8_C( 107), INT8_C( 16), INT8_C( 7), -INT8_C( 81), -INT8_C( 124), -INT8_C( 98) }, { INT8_C( 26), -INT8_C( 104), -INT8_C( 113), -INT8_C( 127), INT8_C( 68), INT8_C( 60), INT8_C( 111), INT8_C( 121) }, { -INT8_C( 11), -INT8_C( 37), INT8_C( 51), -INT8_C( 47), INT8_C( 41), -INT8_C( 51), -INT8_C( 68), -INT8_C( 37) }, { -INT8_C( 21), INT8_C( 2), INT8_C( 93), INT8_C( 0), INT8_C( 16), -INT8_C( 73), -INT8_C( 110), -INT8_C( 83) }, { -INT8_C( 23), INT8_C( 12), INT8_C( 103), INT8_C( 12), INT8_C( 9), -INT8_C( 79), -INT8_C( 121), -INT8_C( 94) }, { -INT8_C( 24), INT8_C( 13), INT8_C( 105), INT8_C( 14), INT8_C( 8), -INT8_C( 80), -INT8_C( 122), -INT8_C( 96) }, { -INT8_C( 24), INT8_C( 15), INT8_C( 107), INT8_C( 16), INT8_C( 7), -INT8_C( 81), -INT8_C( 124), -INT8_C( 98) } }, { { -INT8_C( 10), -INT8_C( 26), -INT8_C( 127), INT8_C( 61), -INT8_C( 79), INT8_C( 20), INT8_C( 113), -INT8_C( 18) }, { -INT8_C( 50), INT8_C( 86), INT8_C( 126), -INT8_C( 71), -INT8_C( 95), -INT8_C( 21), INT8_MIN, -INT8_C( 119) }, { -INT8_C( 35), INT8_C( 17), -INT8_C( 64), INT8_C( 26), -INT8_C( 126), INT8_C( 10), INT8_C( 49), -INT8_C( 77) }, { -INT8_C( 16), -INT8_C( 15), -INT8_C( 111), INT8_C( 52), -INT8_C( 91), INT8_C( 17), INT8_C( 97), -INT8_C( 33) }, { -INT8_C( 12), -INT8_C( 23), -INT8_C( 123), INT8_C( 59), -INT8_C( 82), INT8_C( 19), INT8_C( 109), -INT8_C( 22) }, { -INT8_C( 11), -INT8_C( 25), -INT8_C( 125), INT8_C( 60), -INT8_C( 80), INT8_C( 20), INT8_C( 111), -INT8_C( 20) }, { -INT8_C( 10), -INT8_C( 26), -INT8_C( 127), INT8_C( 61), -INT8_C( 79), INT8_C( 20), INT8_C( 113), -INT8_C( 18) } }, { { -INT8_C( 6), -INT8_C( 21), -INT8_C( 102), INT8_C( 2), -INT8_C( 101), INT8_C( 30), -INT8_C( 96), -INT8_C( 75) }, { -INT8_C( 73), INT8_C( 47), INT8_C( 55), -INT8_C( 5), INT8_C( 107), -INT8_C( 90), INT8_C( 116), INT8_C( 98) }, { -INT8_C( 42), INT8_C( 3), -INT8_C( 74), INT8_C( 0), -INT8_C( 47), -INT8_C( 15), -INT8_C( 38), -INT8_C( 26) }, { -INT8_C( 15), -INT8_C( 15), -INT8_C( 95), INT8_C( 1), -INT8_C( 88), INT8_C( 19), -INT8_C( 81), -INT8_C( 63) }, { -INT8_C( 8), -INT8_C( 20), -INT8_C( 100), INT8_C( 2), -INT8_C( 98), INT8_C( 27), -INT8_C( 92), -INT8_C( 72) }, { -INT8_C( 7), -INT8_C( 20), -INT8_C( 101), INT8_C( 2), -INT8_C( 99), INT8_C( 29), -INT8_C( 94), -INT8_C( 73) }, { -INT8_C( 6), -INT8_C( 21), -INT8_C( 102), INT8_C( 2), -INT8_C( 101), INT8_C( 30), -INT8_C( 96), -INT8_C( 75) } }, { { -INT8_C( 116), -INT8_C( 11), -INT8_C( 97), INT8_C( 61), INT8_C( 10), INT8_C( 16), INT8_C( 43), -INT8_C( 40) }, { INT8_C( 102), -INT8_C( 86), -INT8_C( 111), INT8_C( 8), -INT8_C( 107), INT8_C( 17), -INT8_C( 111), -INT8_C( 113) }, { -INT8_C( 65), -INT8_C( 54), INT8_C( 104), INT8_C( 65), -INT8_C( 43), INT8_C( 25), -INT8_C( 12), -INT8_C( 96) }, { -INT8_C( 103), -INT8_C( 22), -INT8_C( 111), INT8_C( 62), -INT8_C( 3), INT8_C( 18), INT8_C( 29), -INT8_C( 54) }, { -INT8_C( 113), -INT8_C( 14), -INT8_C( 100), INT8_C( 61), INT8_C( 7), INT8_C( 17), INT8_C( 40), -INT8_C( 44) }, { -INT8_C( 114), -INT8_C( 12), -INT8_C( 99), INT8_C( 61), INT8_C( 8), INT8_C( 16), INT8_C( 41), -INT8_C( 42) }, { -INT8_C( 116), -INT8_C( 11), -INT8_C( 97), INT8_C( 61), INT8_C( 10), INT8_C( 16), INT8_C( 43), -INT8_C( 40) } }, { { -INT8_C( 3), INT8_C( 43), -INT8_C( 111), -INT8_C( 104), INT8_C( 74), INT8_C( 49), INT8_C( 77), INT8_C( 1) }, { INT8_C( 97), -INT8_C( 124), -INT8_C( 4), -INT8_C( 52), INT8_C( 42), INT8_C( 112), INT8_C( 46), -INT8_C( 74) }, { INT8_C( 46), -INT8_C( 19), -INT8_C( 113), INT8_C( 126), INT8_C( 95), INT8_C( 105), INT8_C( 100), -INT8_C( 36) }, { INT8_C( 9), INT8_C( 28), -INT8_C( 111), -INT8_C( 110), INT8_C( 79), INT8_C( 63), INT8_C( 83), -INT8_C( 8) }, { INT8_C( 0), INT8_C( 39), -INT8_C( 111), -INT8_C( 106), INT8_C( 75), INT8_C( 53), INT8_C( 78), -INT8_C( 1) }, { -INT8_C( 1), INT8_C( 41), -INT8_C( 111), -INT8_C( 105), INT8_C( 75), INT8_C( 51), INT8_C( 78), INT8_C( 0) }, { -INT8_C( 3), INT8_C( 43), -INT8_C( 111), -INT8_C( 104), INT8_C( 74), INT8_C( 49), INT8_C( 77), INT8_C( 1) } }, { { INT8_C( 101), -INT8_C( 50), -INT8_C( 12), INT8_C( 111), -INT8_C( 34), INT8_C( 31), INT8_C( 71), INT8_C( 69) }, { -INT8_C( 55), -INT8_C( 40), INT8_C( 77), INT8_C( 94), -INT8_C( 22), -INT8_C( 34), -INT8_C( 18), -INT8_C( 25) }, { INT8_C( 74), -INT8_C( 70), INT8_C( 27), -INT8_C( 98), -INT8_C( 45), INT8_C( 14), INT8_C( 62), INT8_C( 57) }, { INT8_C( 94), -INT8_C( 55), -INT8_C( 2), INT8_C( 123), -INT8_C( 37), INT8_C( 27), INT8_C( 69), INT8_C( 66) }, { INT8_C( 99), -INT8_C( 51), -INT8_C( 10), INT8_C( 114), -INT8_C( 35), INT8_C( 30), INT8_C( 70), INT8_C( 68) }, { INT8_C( 100), -INT8_C( 51), -INT8_C( 11), INT8_C( 112), -INT8_C( 34), INT8_C( 30), INT8_C( 71), INT8_C( 69) }, { INT8_C( 101), -INT8_C( 50), -INT8_C( 12), INT8_C( 111), -INT8_C( 34), INT8_C( 31), INT8_C( 71), INT8_C( 69) } }, { { INT8_C( 10), INT8_MAX, INT8_MAX, INT8_C( 84), -INT8_C( 79), -INT8_C( 52), INT8_C( 85), INT8_C( 18) }, { INT8_C( 81), INT8_C( 81), -INT8_C( 34), INT8_C( 123), -INT8_C( 63), INT8_C( 13), INT8_C( 50), INT8_C( 38) }, { INT8_C( 51), -INT8_C( 88), INT8_C( 110), -INT8_C( 110), -INT8_C( 110), -INT8_C( 45), INT8_C( 110), INT8_C( 37) }, { INT8_C( 20), -INT8_C( 119), INT8_C( 123), INT8_C( 99), -INT8_C( 87), -INT8_C( 50), INT8_C( 91), INT8_C( 23) }, { INT8_C( 13), -INT8_C( 126), INT8_C( 126), INT8_C( 88), -INT8_C( 81), -INT8_C( 52), INT8_C( 87), INT8_C( 19) }, { INT8_C( 11), INT8_MIN, INT8_C( 126), INT8_C( 86), -INT8_C( 80), -INT8_C( 52), INT8_C( 86), INT8_C( 19) }, { INT8_C( 10), INT8_MAX, INT8_MAX, INT8_C( 84), -INT8_C( 79), -INT8_C( 52), INT8_C( 85), INT8_C( 18) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r1 = simde_vrsra_n_s8(a, b, 1); simde_int8x8_t r3 = simde_vrsra_n_s8(a, b, 3); simde_int8x8_t r5 = simde_vrsra_n_s8(a, b, 5); simde_int8x8_t r6 = simde_vrsra_n_s8(a, b, 6); simde_int8x8_t r8 = simde_vrsra_n_s8(a, b, 8); simde_test_arm_neon_assert_equal_i8x8(r1, simde_vld1_s8(test_vec[i].r1)); simde_test_arm_neon_assert_equal_i8x8(r3, simde_vld1_s8(test_vec[i].r3)); simde_test_arm_neon_assert_equal_i8x8(r5, simde_vld1_s8(test_vec[i].r5)); simde_test_arm_neon_assert_equal_i8x8(r6, simde_vld1_s8(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i8x8(r8, simde_vld1_s8(test_vec[i].r8)); } return 0; } static int test_simde_vrsra_n_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int16_t b[4]; int16_t r3[4]; int16_t r6[4]; int16_t r10[4]; int16_t r13[4]; int16_t r16[4]; } test_vec[] = { { { -INT16_C( 9517), -INT16_C( 14208), -INT16_C( 10515), INT16_C( 3914) }, { INT16_C( 9067), INT16_C( 28811), INT16_C( 28074), -INT16_C( 27330) }, { -INT16_C( 8384), -INT16_C( 10607), -INT16_C( 7006), INT16_C( 498) }, { -INT16_C( 9375), -INT16_C( 13758), -INT16_C( 10076), INT16_C( 3487) }, { -INT16_C( 9508), -INT16_C( 14180), -INT16_C( 10488), INT16_C( 3887) }, { -INT16_C( 9516), -INT16_C( 14204), -INT16_C( 10512), INT16_C( 3911) }, { -INT16_C( 9517), -INT16_C( 14208), -INT16_C( 10515), INT16_C( 3914) } }, { { -INT16_C( 12201), INT16_C( 22657), INT16_C( 17622), INT16_C( 1012) }, { INT16_C( 17717), -INT16_C( 17474), INT16_C( 4757), INT16_C( 26980) }, { -INT16_C( 9986), INT16_C( 20473), INT16_C( 18217), INT16_C( 4385) }, { -INT16_C( 11924), INT16_C( 22384), INT16_C( 17696), INT16_C( 1434) }, { -INT16_C( 12184), INT16_C( 22640), INT16_C( 17627), INT16_C( 1038) }, { -INT16_C( 12199), INT16_C( 22655), INT16_C( 17623), INT16_C( 1015) }, { -INT16_C( 12201), INT16_C( 22657), INT16_C( 17622), INT16_C( 1012) } }, { { -INT16_C( 6932), -INT16_C( 9935), INT16_C( 31674), INT16_C( 9960) }, { INT16_C( 29855), INT16_C( 18838), -INT16_C( 10783), INT16_C( 14558) }, { -INT16_C( 3200), -INT16_C( 7580), INT16_C( 30326), INT16_C( 11780) }, { -INT16_C( 6466), -INT16_C( 9641), INT16_C( 31506), INT16_C( 10187) }, { -INT16_C( 6903), -INT16_C( 9917), INT16_C( 31663), INT16_C( 9974) }, { -INT16_C( 6928), -INT16_C( 9933), INT16_C( 31673), INT16_C( 9962) }, { -INT16_C( 6932), -INT16_C( 9935), INT16_C( 31674), INT16_C( 9960) } }, { { INT16_C( 24485), INT16_C( 31632), -INT16_C( 31580), -INT16_C( 9858) }, { INT16_C( 15817), INT16_C( 24468), -INT16_C( 1713), INT16_C( 15304) }, { INT16_C( 26462), -INT16_C( 30845), -INT16_C( 31794), -INT16_C( 7945) }, { INT16_C( 24732), INT16_C( 32014), -INT16_C( 31607), -INT16_C( 9619) }, { INT16_C( 24500), INT16_C( 31656), -INT16_C( 31582), -INT16_C( 9843) }, { INT16_C( 24487), INT16_C( 31635), -INT16_C( 31580), -INT16_C( 9856) }, { INT16_C( 24485), INT16_C( 31632), -INT16_C( 31580), -INT16_C( 9858) } }, { { -INT16_C( 1571), -INT16_C( 26604), -INT16_C( 908), INT16_C( 5054) }, { INT16_C( 21616), INT16_C( 20828), INT16_C( 15145), -INT16_C( 12663) }, { INT16_C( 1131), -INT16_C( 24000), INT16_C( 985), INT16_C( 3471) }, { -INT16_C( 1233), -INT16_C( 26279), -INT16_C( 671), INT16_C( 4856) }, { -INT16_C( 1550), -INT16_C( 26584), -INT16_C( 893), INT16_C( 5042) }, { -INT16_C( 1568), -INT16_C( 26601), -INT16_C( 906), INT16_C( 5052) }, { -INT16_C( 1571), -INT16_C( 26604), -INT16_C( 908), INT16_C( 5054) } }, { { INT16_C( 6554), INT16_C( 15945), -INT16_C( 14178), INT16_C( 26392) }, { -INT16_C( 21499), INT16_C( 21702), -INT16_C( 29019), -INT16_C( 31857) }, { INT16_C( 3867), INT16_C( 18658), -INT16_C( 17805), INT16_C( 22410) }, { INT16_C( 6218), INT16_C( 16284), -INT16_C( 14631), INT16_C( 25894) }, { INT16_C( 6533), INT16_C( 15966), -INT16_C( 14206), INT16_C( 26361) }, { INT16_C( 6551), INT16_C( 15948), -INT16_C( 14182), INT16_C( 26388) }, { INT16_C( 6554), INT16_C( 15945), -INT16_C( 14178), INT16_C( 26392) } }, { { -INT16_C( 23673), -INT16_C( 997), -INT16_C( 9825), INT16_C( 4111) }, { INT16_C( 27693), INT16_C( 22369), -INT16_C( 5209), INT16_C( 16677) }, { -INT16_C( 20211), INT16_C( 1799), -INT16_C( 10476), INT16_C( 6196) }, { -INT16_C( 23240), -INT16_C( 647), -INT16_C( 9906), INT16_C( 4372) }, { -INT16_C( 23646), -INT16_C( 975), -INT16_C( 9830), INT16_C( 4127) }, { -INT16_C( 23670), -INT16_C( 994), -INT16_C( 9826), INT16_C( 4113) }, { -INT16_C( 23673), -INT16_C( 997), -INT16_C( 9825), INT16_C( 4111) } }, { { INT16_C( 28420), -INT16_C( 23936), -INT16_C( 26569), INT16_C( 15370) }, { -INT16_C( 12220), -INT16_C( 5488), INT16_C( 8031), -INT16_C( 6547) }, { INT16_C( 26893), -INT16_C( 24622), -INT16_C( 25565), INT16_C( 14552) }, { INT16_C( 28229), -INT16_C( 24022), -INT16_C( 26444), INT16_C( 15268) }, { INT16_C( 28408), -INT16_C( 23941), -INT16_C( 26561), INT16_C( 15364) }, { INT16_C( 28419), -INT16_C( 23937), -INT16_C( 26568), INT16_C( 15369) }, { INT16_C( 28420), -INT16_C( 23936), -INT16_C( 26569), INT16_C( 15370) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r3 = simde_vrsra_n_s16(a, b, 3); simde_int16x4_t r6 = simde_vrsra_n_s16(a, b, 6); simde_int16x4_t r10 = simde_vrsra_n_s16(a, b, 10); simde_int16x4_t r13 = simde_vrsra_n_s16(a, b, 13); simde_int16x4_t r16 = simde_vrsra_n_s16(a, b, 16); simde_test_arm_neon_assert_equal_i16x4(r3, simde_vld1_s16(test_vec[i].r3)); simde_test_arm_neon_assert_equal_i16x4(r6, simde_vld1_s16(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i16x4(r10, simde_vld1_s16(test_vec[i].r10)); simde_test_arm_neon_assert_equal_i16x4(r13, simde_vld1_s16(test_vec[i].r13)); simde_test_arm_neon_assert_equal_i16x4(r16, simde_vld1_s16(test_vec[i].r16)); } return 0; } static int test_simde_vrsra_n_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int32_t b[2]; int32_t r6[2]; int32_t r13[2]; int32_t r19[2]; int32_t r26[2]; int32_t r32[2]; } test_vec[] = { { { INT32_C( 1934731521), INT32_C( 76811743) }, { INT32_C( 328250290), -INT32_C( 1106175686) }, { INT32_C( 1939860432), INT32_C( 59527748) }, { INT32_C( 1934771591), INT32_C( 76676712) }, { INT32_C( 1934732147), INT32_C( 76809633) }, { INT32_C( 1934731526), INT32_C( 76811727) }, { INT32_C( 1934731521), INT32_C( 76811743) } }, { { -INT32_C( 790704782), INT32_C( 798683025) }, { INT32_C( 833800098), INT32_C( 1575841628) }, { -INT32_C( 777676655), INT32_C( 823305550) }, { -INT32_C( 790603000), INT32_C( 798875388) }, { -INT32_C( 790703192), INT32_C( 798686031) }, { -INT32_C( 790704770), INT32_C( 798683048) }, { -INT32_C( 790704782), INT32_C( 798683025) } }, { { -INT32_C( 137347560), -INT32_C( 33790645) }, { INT32_C( 1376816152), INT32_C( 386933413) }, { -INT32_C( 115834808), -INT32_C( 27744810) }, { -INT32_C( 137179492), -INT32_C( 33743412) }, { -INT32_C( 137344934), -INT32_C( 33789907) }, { -INT32_C( 137347539), -INT32_C( 33790639) }, { -INT32_C( 137347560), -INT32_C( 33790645) } }, { { -INT32_C( 2132283409), -INT32_C( 2119138594) }, { -INT32_C( 1498258871), -INT32_C( 385638447) }, { INT32_C( 2139273592), -INT32_C( 2125164195) }, { -INT32_C( 2132466302), -INT32_C( 2119185669) }, { -INT32_C( 2132286267), -INT32_C( 2119139330) }, { -INT32_C( 2132283431), -INT32_C( 2119138600) }, { -INT32_C( 2132283409), -INT32_C( 2119138594) } }, { { INT32_C( 685823197), INT32_C( 1361501241) }, { INT32_C( 245642857), INT32_C( 1210496088) }, { INT32_C( 689661367), INT32_C( 1380415242) }, { INT32_C( 685853183), INT32_C( 1361649007) }, { INT32_C( 685823666), INT32_C( 1361503550) }, { INT32_C( 685823201), INT32_C( 1361501259) }, { INT32_C( 685823197), INT32_C( 1361501241) } }, { { -INT32_C( 2100818525), -INT32_C( 654083953) }, { -INT32_C( 1417693734), INT32_C( 831816276) }, { -INT32_C( 2122969990), -INT32_C( 641086824) }, { -INT32_C( 2100991583), -INT32_C( 653982413) }, { -INT32_C( 2100821229), -INT32_C( 654082366) }, { -INT32_C( 2100818546), -INT32_C( 654083941) }, { -INT32_C( 2100818525), -INT32_C( 654083953) } }, { { -INT32_C( 1889962666), -INT32_C( 1159626927) }, { INT32_C( 248087990), -INT32_C( 581505223) }, { -INT32_C( 1886086291), -INT32_C( 1168712946) }, { -INT32_C( 1889932382), -INT32_C( 1159697912) }, { -INT32_C( 1889962193), -INT32_C( 1159628036) }, { -INT32_C( 1889962662), -INT32_C( 1159626936) }, { -INT32_C( 1889962666), -INT32_C( 1159626927) } }, { { -INT32_C( 1939922948), INT32_C( 1919246999) }, { INT32_C( 1797121047), -INT32_C( 1113804186) }, { -INT32_C( 1911842932), INT32_C( 1901843809) }, { -INT32_C( 1939703573), INT32_C( 1919111037) }, { -INT32_C( 1939919520), INT32_C( 1919244875) }, { -INT32_C( 1939922921), INT32_C( 1919246982) }, { -INT32_C( 1939922948), INT32_C( 1919246999) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r6 = simde_vrsra_n_s32(a, b, 6); simde_int32x2_t r13 = simde_vrsra_n_s32(a, b, 13); simde_int32x2_t r19 = simde_vrsra_n_s32(a, b, 19); simde_int32x2_t r26 = simde_vrsra_n_s32(a, b, 26); simde_int32x2_t r32 = simde_vrsra_n_s32(a, b, 32); simde_test_arm_neon_assert_equal_i32x2(r6, simde_vld1_s32(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i32x2(r13, simde_vld1_s32(test_vec[i].r13)); simde_test_arm_neon_assert_equal_i32x2(r19, simde_vld1_s32(test_vec[i].r19)); simde_test_arm_neon_assert_equal_i32x2(r26, simde_vld1_s32(test_vec[i].r26)); simde_test_arm_neon_assert_equal_i32x2(r32, simde_vld1_s32(test_vec[i].r32)); } return 0; } static int test_simde_vrsra_n_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[1]; int64_t b[1]; int64_t r13[1]; int64_t r26[1]; int64_t r39[1]; int64_t r52[1]; int64_t r64[1]; } test_vec[] = { { { INT64_C( 1447598319563480591) }, { -INT64_C( 2394531401172035068) }, { INT64_C( 1447306018367048458) }, { INT64_C( 1447598283882182198) }, { INT64_C( 1447598319559124964) }, { INT64_C( 1447598319563480059) }, { INT64_C( 1447598319563480591) } }, { { INT64_C( 8563258826769889006) }, { INT64_C( 6079905976082192743) }, { INT64_C( 8564001002792359977) }, { INT64_C( 8563258917367547999) }, { INT64_C( 8563258826780948291) }, { INT64_C( 8563258826769890356) }, { INT64_C( 8563258826769889006) } }, { { INT64_C( 6043243903393687752) }, { INT64_C( 6089910573593558084) }, { INT64_C( 6043987300680503372) }, { INT64_C( 6043243994140426865) }, { INT64_C( 6043243903404765235) }, { INT64_C( 6043243903393689104) }, { INT64_C( 6043243903393687752) } }, { { INT64_C( 1200934605591272363) }, { -INT64_C( 4285678638112321044) }, { INT64_C( 1200411451460643418) }, { INT64_C( 1200934541729684151) }, { INT64_C( 1200934605583476759) }, { INT64_C( 1200934605591271411) }, { INT64_C( 1200934605591272363) } }, { { -INT64_C( 8701119115092469347) }, { -INT64_C( 8025221136613887770) }, { -INT64_C( 8702098756344497409) }, { -INT64_C( 8701119234677583120) }, { -INT64_C( 8701119115107067139) }, { -INT64_C( 8701119115092471129) }, { -INT64_C( 8701119115092469347) } }, { { -INT64_C( 2575091484474450550) }, { -INT64_C( 1584822534416754624) }, { -INT64_C( 2575284944256483845) }, { -INT64_C( 2575091508090146599) }, { -INT64_C( 2575091484477333325) }, { -INT64_C( 2575091484474450902) }, { -INT64_C( 2575091484474450550) } }, { { -INT64_C( 2398141546330943763) }, { INT64_C( 2142528781178037318) }, { -INT64_C( 2397880007173085116) }, { -INT64_C( 2398141514404777032) }, { -INT64_C( 2398141546327046526) }, { -INT64_C( 2398141546330943287) }, { -INT64_C( 2398141546330943763) } }, { { INT64_C( 3455269093732548647) }, { -INT64_C( 8942902469132025428) }, { INT64_C( 3454177430833484679) }, { INT64_C( 3455268960472917414) }, { INT64_C( 3455269093716281602) }, { INT64_C( 3455269093732546661) }, { INT64_C( 3455269093732548647) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_int64x1_t r13 = simde_vrsra_n_s64(a, b, 13); simde_int64x1_t r26 = simde_vrsra_n_s64(a, b, 26); simde_int64x1_t r39 = simde_vrsra_n_s64(a, b, 39); simde_int64x1_t r52 = simde_vrsra_n_s64(a, b, 52); simde_int64x1_t r64 = simde_vrsra_n_s64(a, b, 64); simde_test_arm_neon_assert_equal_i64x1(r13, simde_vld1_s64(test_vec[i].r13)); simde_test_arm_neon_assert_equal_i64x1(r26, simde_vld1_s64(test_vec[i].r26)); simde_test_arm_neon_assert_equal_i64x1(r39, simde_vld1_s64(test_vec[i].r39)); simde_test_arm_neon_assert_equal_i64x1(r52, simde_vld1_s64(test_vec[i].r52)); simde_test_arm_neon_assert_equal_i64x1(r64, simde_vld1_s64(test_vec[i].r64)); } return 0; } static int test_simde_vrsra_n_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; uint8_t b[8]; uint8_t r1[8]; uint8_t r3[8]; uint8_t r5[8]; uint8_t r6[8]; uint8_t r8[8]; } test_vec[] = { { { UINT8_C( 5), UINT8_C( 68), UINT8_C( 51), UINT8_C(236), UINT8_C(172), UINT8_C(201), UINT8_C( 57), UINT8_C( 63) }, { UINT8_C(176), UINT8_C( 4), UINT8_C(154), UINT8_C( 70), UINT8_C( 36), UINT8_C(134), UINT8_C(245), UINT8_C(171) }, { UINT8_C( 93), UINT8_C( 70), UINT8_C(128), UINT8_C( 15), UINT8_C(190), UINT8_C( 12), UINT8_C(180), UINT8_C(149) }, { UINT8_C( 27), UINT8_C( 69), UINT8_C( 70), UINT8_C(245), UINT8_C(177), UINT8_C(218), UINT8_C( 88), UINT8_C( 84) }, { UINT8_C( 11), UINT8_C( 68), UINT8_C( 56), UINT8_C(238), UINT8_C(173), UINT8_C(205), UINT8_C( 65), UINT8_C( 68) }, { UINT8_C( 8), UINT8_C( 68), UINT8_C( 53), UINT8_C(237), UINT8_C(173), UINT8_C(203), UINT8_C( 61), UINT8_C( 66) }, { UINT8_C( 6), UINT8_C( 68), UINT8_C( 52), UINT8_C(236), UINT8_C(172), UINT8_C(202), UINT8_C( 58), UINT8_C( 64) } }, { { UINT8_C(209), UINT8_C(154), UINT8_C(217), UINT8_C(111), UINT8_C(135), UINT8_C(222), UINT8_C( 31), UINT8_C( 95) }, { UINT8_C( 21), UINT8_C(220), UINT8_C(219), UINT8_C( 41), UINT8_C( 25), UINT8_C(155), UINT8_C(210), UINT8_C( 30) }, { UINT8_C(220), UINT8_C( 8), UINT8_C( 71), UINT8_C(132), UINT8_C(148), UINT8_C( 44), UINT8_C(136), UINT8_C(110) }, { UINT8_C(212), UINT8_C(182), UINT8_C(244), UINT8_C(116), UINT8_C(138), UINT8_C(241), UINT8_C( 57), UINT8_C( 99) }, { UINT8_C(210), UINT8_C(161), UINT8_C(224), UINT8_C(112), UINT8_C(136), UINT8_C(227), UINT8_C( 38), UINT8_C( 96) }, { UINT8_C(209), UINT8_C(157), UINT8_C(220), UINT8_C(112), UINT8_C(135), UINT8_C(224), UINT8_C( 34), UINT8_C( 95) }, { UINT8_C(209), UINT8_C(155), UINT8_C(218), UINT8_C(111), UINT8_C(135), UINT8_C(223), UINT8_C( 32), UINT8_C( 95) } }, { { UINT8_C(223), UINT8_C( 6), UINT8_C( 10), UINT8_C(139), UINT8_C(207), UINT8_C( 68), UINT8_C(202), UINT8_C(127) }, { UINT8_C( 72), UINT8_C(101), UINT8_C(198), UINT8_C(108), UINT8_C(235), UINT8_C(187), UINT8_C( 23), UINT8_C(188) }, { UINT8_C( 3), UINT8_C( 57), UINT8_C(109), UINT8_C(193), UINT8_C( 69), UINT8_C(162), UINT8_C(214), UINT8_C(221) }, { UINT8_C(232), UINT8_C( 19), UINT8_C( 35), UINT8_C(153), UINT8_C(236), UINT8_C( 91), UINT8_C(205), UINT8_C(151) }, { UINT8_C(225), UINT8_C( 9), UINT8_C( 16), UINT8_C(142), UINT8_C(214), UINT8_C( 74), UINT8_C(203), UINT8_C(133) }, { UINT8_C(224), UINT8_C( 8), UINT8_C( 13), UINT8_C(141), UINT8_C(211), UINT8_C( 71), UINT8_C(202), UINT8_C(130) }, { UINT8_C(223), UINT8_C( 6), UINT8_C( 11), UINT8_C(139), UINT8_C(208), UINT8_C( 69), UINT8_C(202), UINT8_C(128) } }, { { UINT8_C( 86), UINT8_C(240), UINT8_C( 44), UINT8_C(221), UINT8_C(206), UINT8_C( 75), UINT8_C( 61), UINT8_C(227) }, { UINT8_C( 39), UINT8_C( 24), UINT8_C( 12), UINT8_C( 64), UINT8_C(179), UINT8_C(223), UINT8_C( 95), UINT8_C(147) }, { UINT8_C(106), UINT8_C(252), UINT8_C( 50), UINT8_C(253), UINT8_C( 40), UINT8_C(187), UINT8_C(109), UINT8_C( 45) }, { UINT8_C( 91), UINT8_C(243), UINT8_C( 46), UINT8_C(229), UINT8_C(228), UINT8_C(103), UINT8_C( 73), UINT8_C(245) }, { UINT8_C( 87), UINT8_C(241), UINT8_C( 44), UINT8_C(223), UINT8_C(212), UINT8_C( 82), UINT8_C( 64), UINT8_C(232) }, { UINT8_C( 87), UINT8_C(240), UINT8_C( 44), UINT8_C(222), UINT8_C(209), UINT8_C( 78), UINT8_C( 62), UINT8_C(229) }, { UINT8_C( 86), UINT8_C(240), UINT8_C( 44), UINT8_C(221), UINT8_C(207), UINT8_C( 76), UINT8_C( 61), UINT8_C(228) } }, { { UINT8_C(229), UINT8_C(105), UINT8_C( 30), UINT8_C(180), UINT8_C(173), UINT8_C(233), UINT8_C( 52), UINT8_C(245) }, { UINT8_C( 78), UINT8_C(250), UINT8_C( 97), UINT8_C( 57), UINT8_C(181), UINT8_C(120), UINT8_C(246), UINT8_C( 11) }, { UINT8_C( 12), UINT8_C(230), UINT8_C( 79), UINT8_C(209), UINT8_C( 8), UINT8_C( 37), UINT8_C(175), UINT8_C(251) }, { UINT8_C(239), UINT8_C(136), UINT8_C( 42), UINT8_C(187), UINT8_C(196), UINT8_C(248), UINT8_C( 83), UINT8_C(246) }, { UINT8_C(231), UINT8_C(113), UINT8_C( 33), UINT8_C(182), UINT8_C(179), UINT8_C(237), UINT8_C( 60), UINT8_C(245) }, { UINT8_C(230), UINT8_C(109), UINT8_C( 32), UINT8_C(181), UINT8_C(176), UINT8_C(235), UINT8_C( 56), UINT8_C(245) }, { UINT8_C(229), UINT8_C(106), UINT8_C( 30), UINT8_C(180), UINT8_C(174), UINT8_C(233), UINT8_C( 53), UINT8_C(245) } }, { { UINT8_C(104), UINT8_C( 34), UINT8_C(233), UINT8_C( 54), UINT8_C(109), UINT8_C( 38), UINT8_C( 26), UINT8_C(148) }, { UINT8_C( 62), UINT8_C( 38), UINT8_C(212), UINT8_C(241), UINT8_C( 5), UINT8_C( 51), UINT8_C(132), UINT8_C(234) }, { UINT8_C(135), UINT8_C( 53), UINT8_C( 83), UINT8_C(175), UINT8_C(112), UINT8_C( 64), UINT8_C( 92), UINT8_C( 9) }, { UINT8_C(112), UINT8_C( 39), UINT8_C( 4), UINT8_C( 84), UINT8_C(110), UINT8_C( 44), UINT8_C( 43), UINT8_C(177) }, { UINT8_C(106), UINT8_C( 35), UINT8_C(240), UINT8_C( 62), UINT8_C(109), UINT8_C( 40), UINT8_C( 30), UINT8_C(155) }, { UINT8_C(105), UINT8_C( 35), UINT8_C(236), UINT8_C( 58), UINT8_C(109), UINT8_C( 39), UINT8_C( 28), UINT8_C(152) }, { UINT8_C(104), UINT8_C( 34), UINT8_C(234), UINT8_C( 55), UINT8_C(109), UINT8_C( 38), UINT8_C( 27), UINT8_C(149) } }, { { UINT8_C(157), UINT8_C(163), UINT8_C(159), UINT8_C( 74), UINT8_C(140), UINT8_C(211), UINT8_C( 64), UINT8_C(218) }, { UINT8_C(205), UINT8_C(161), UINT8_C( 19), UINT8_C(130), UINT8_C( 26), UINT8_C( 9), UINT8_C(142), UINT8_C(130) }, { UINT8_C( 4), UINT8_C(244), UINT8_C(169), UINT8_C(139), UINT8_C(153), UINT8_C(216), UINT8_C(135), UINT8_C( 27) }, { UINT8_C(183), UINT8_C(183), UINT8_C(161), UINT8_C( 90), UINT8_C(143), UINT8_C(212), UINT8_C( 82), UINT8_C(234) }, { UINT8_C(163), UINT8_C(168), UINT8_C(160), UINT8_C( 78), UINT8_C(141), UINT8_C(211), UINT8_C( 68), UINT8_C(222) }, { UINT8_C(160), UINT8_C(166), UINT8_C(159), UINT8_C( 76), UINT8_C(140), UINT8_C(211), UINT8_C( 66), UINT8_C(220) }, { UINT8_C(158), UINT8_C(164), UINT8_C(159), UINT8_C( 75), UINT8_C(140), UINT8_C(211), UINT8_C( 65), UINT8_C(219) } }, { { UINT8_C( 43), UINT8_C(119), UINT8_C(185), UINT8_C(152), UINT8_C(157), UINT8_C(211), UINT8_C( 44), UINT8_C(219) }, { UINT8_C(249), UINT8_C( 1), UINT8_C(204), UINT8_MAX, UINT8_C( 52), UINT8_C( 81), UINT8_C(233), UINT8_C(209) }, { UINT8_C(168), UINT8_C(120), UINT8_C( 31), UINT8_C( 24), UINT8_C(183), UINT8_C(252), UINT8_C(161), UINT8_C( 68) }, { UINT8_C( 74), UINT8_C(119), UINT8_C(211), UINT8_C(184), UINT8_C(164), UINT8_C(221), UINT8_C( 73), UINT8_C(245) }, { UINT8_C( 51), UINT8_C(119), UINT8_C(191), UINT8_C(160), UINT8_C(159), UINT8_C(214), UINT8_C( 51), UINT8_C(226) }, { UINT8_C( 47), UINT8_C(119), UINT8_C(188), UINT8_C(156), UINT8_C(158), UINT8_C(212), UINT8_C( 48), UINT8_C(222) }, { UINT8_C( 44), UINT8_C(119), UINT8_C(186), UINT8_C(153), UINT8_C(157), UINT8_C(211), UINT8_C( 45), UINT8_C(220) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r1 = simde_vrsra_n_u8(a, b, 1); simde_uint8x8_t r3 = simde_vrsra_n_u8(a, b, 3); simde_uint8x8_t r5 = simde_vrsra_n_u8(a, b, 5); simde_uint8x8_t r6 = simde_vrsra_n_u8(a, b, 6); simde_uint8x8_t r8 = simde_vrsra_n_u8(a, b, 8); simde_test_arm_neon_assert_equal_u8x8(r1, simde_vld1_u8(test_vec[i].r1)); simde_test_arm_neon_assert_equal_u8x8(r3, simde_vld1_u8(test_vec[i].r3)); simde_test_arm_neon_assert_equal_u8x8(r5, simde_vld1_u8(test_vec[i].r5)); simde_test_arm_neon_assert_equal_u8x8(r6, simde_vld1_u8(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u8x8(r8, simde_vld1_u8(test_vec[i].r8)); } return 0; } static int test_simde_vrsra_n_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint16_t b[4]; uint16_t r3[4]; uint16_t r6[4]; uint16_t r10[4]; uint16_t r13[4]; uint16_t r16[4]; } test_vec[] = { { { UINT16_C( 9797), UINT16_C( 3708), UINT16_C( 3895), UINT16_C(56667) }, { UINT16_C(40416), UINT16_C(18906), UINT16_C(39683), UINT16_C(49020) }, { UINT16_C(14849), UINT16_C( 6071), UINT16_C( 8855), UINT16_C(62795) }, { UINT16_C(10429), UINT16_C( 4003), UINT16_C( 4515), UINT16_C(57433) }, { UINT16_C( 9836), UINT16_C( 3726), UINT16_C( 3934), UINT16_C(56715) }, { UINT16_C( 9802), UINT16_C( 3710), UINT16_C( 3900), UINT16_C(56673) }, { UINT16_C( 9798), UINT16_C( 3708), UINT16_C( 3896), UINT16_C(56668) } }, { { UINT16_C(27645), UINT16_C( 2893), UINT16_C(44795), UINT16_C(45319) }, { UINT16_C(59352), UINT16_C(25814), UINT16_C(46436), UINT16_C(43587) }, { UINT16_C(35064), UINT16_C( 6120), UINT16_C(50600), UINT16_C(50767) }, { UINT16_C(28572), UINT16_C( 3296), UINT16_C(45521), UINT16_C(46000) }, { UINT16_C(27703), UINT16_C( 2918), UINT16_C(44840), UINT16_C(45362) }, { UINT16_C(27652), UINT16_C( 2896), UINT16_C(44801), UINT16_C(45324) }, { UINT16_C(27646), UINT16_C( 2893), UINT16_C(44796), UINT16_C(45320) } }, { { UINT16_C(49115), UINT16_C( 5048), UINT16_C( 5326), UINT16_C(44784) }, { UINT16_C(51889), UINT16_C(46327), UINT16_C(29797), UINT16_C(25459) }, { UINT16_C(55601), UINT16_C(10839), UINT16_C( 9051), UINT16_C(47966) }, { UINT16_C(49926), UINT16_C( 5772), UINT16_C( 5792), UINT16_C(45182) }, { UINT16_C(49166), UINT16_C( 5093), UINT16_C( 5355), UINT16_C(44809) }, { UINT16_C(49121), UINT16_C( 5054), UINT16_C( 5330), UINT16_C(44787) }, { UINT16_C(49116), UINT16_C( 5049), UINT16_C( 5326), UINT16_C(44784) } }, { { UINT16_C(49631), UINT16_C(55918), UINT16_C(30319), UINT16_C(18315) }, { UINT16_C(24925), UINT16_C(49579), UINT16_C(60951), UINT16_C(62059) }, { UINT16_C(52747), UINT16_C(62115), UINT16_C(37938), UINT16_C(26072) }, { UINT16_C(50020), UINT16_C(56693), UINT16_C(31271), UINT16_C(19285) }, { UINT16_C(49655), UINT16_C(55966), UINT16_C(30379), UINT16_C(18376) }, { UINT16_C(49634), UINT16_C(55924), UINT16_C(30326), UINT16_C(18323) }, { UINT16_C(49631), UINT16_C(55919), UINT16_C(30320), UINT16_C(18316) } }, { { UINT16_C( 9389), UINT16_C(31749), UINT16_C(62776), UINT16_C(59690) }, { UINT16_C( 8895), UINT16_C( 9630), UINT16_C( 4502), UINT16_C(30088) }, { UINT16_C(10501), UINT16_C(32953), UINT16_C(63339), UINT16_C(63451) }, { UINT16_C( 9528), UINT16_C(31899), UINT16_C(62846), UINT16_C(60160) }, { UINT16_C( 9398), UINT16_C(31758), UINT16_C(62780), UINT16_C(59719) }, { UINT16_C( 9390), UINT16_C(31750), UINT16_C(62777), UINT16_C(59694) }, { UINT16_C( 9389), UINT16_C(31749), UINT16_C(62776), UINT16_C(59690) } }, { { UINT16_C(63186), UINT16_C(16719), UINT16_C(55916), UINT16_C(51593) }, { UINT16_C(13371), UINT16_C(21131), UINT16_C(63011), UINT16_C(53317) }, { UINT16_C(64857), UINT16_C(19360), UINT16_C(63792), UINT16_C(58258) }, { UINT16_C(63395), UINT16_C(17049), UINT16_C(56901), UINT16_C(52426) }, { UINT16_C(63199), UINT16_C(16740), UINT16_C(55978), UINT16_C(51645) }, { UINT16_C(63188), UINT16_C(16722), UINT16_C(55924), UINT16_C(51600) }, { UINT16_C(63186), UINT16_C(16719), UINT16_C(55917), UINT16_C(51594) } }, { { UINT16_C(18970), UINT16_C(21068), UINT16_C(30528), UINT16_C(65340) }, { UINT16_C(55961), UINT16_C(12068), UINT16_C(44267), UINT16_C(48804) }, { UINT16_C(25965), UINT16_C(22577), UINT16_C(36061), UINT16_C( 5905) }, { UINT16_C(19844), UINT16_C(21257), UINT16_C(31220), UINT16_C( 567) }, { UINT16_C(19025), UINT16_C(21080), UINT16_C(30571), UINT16_C(65388) }, { UINT16_C(18977), UINT16_C(21069), UINT16_C(30533), UINT16_C(65346) }, { UINT16_C(18971), UINT16_C(21068), UINT16_C(30529), UINT16_C(65341) } }, { { UINT16_C(62371), UINT16_C( 4095), UINT16_C(35021), UINT16_C( 2265) }, { UINT16_C(25789), UINT16_C(57435), UINT16_C(41050), UINT16_C(30128) }, { UINT16_C( 59), UINT16_C(11274), UINT16_C(40152), UINT16_C( 6031) }, { UINT16_C(62774), UINT16_C( 4992), UINT16_C(35662), UINT16_C( 2736) }, { UINT16_C(62396), UINT16_C( 4151), UINT16_C(35061), UINT16_C( 2294) }, { UINT16_C(62374), UINT16_C( 4102), UINT16_C(35026), UINT16_C( 2269) }, { UINT16_C(62371), UINT16_C( 4096), UINT16_C(35022), UINT16_C( 2265) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r3 = simde_vrsra_n_u16(a, b, 3); simde_uint16x4_t r6 = simde_vrsra_n_u16(a, b, 6); simde_uint16x4_t r10 = simde_vrsra_n_u16(a, b, 10); simde_uint16x4_t r13 = simde_vrsra_n_u16(a, b, 13); simde_uint16x4_t r16 = simde_vrsra_n_u16(a, b, 16); simde_test_arm_neon_assert_equal_u16x4(r3, simde_vld1_u16(test_vec[i].r3)); simde_test_arm_neon_assert_equal_u16x4(r6, simde_vld1_u16(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u16x4(r10, simde_vld1_u16(test_vec[i].r10)); simde_test_arm_neon_assert_equal_u16x4(r13, simde_vld1_u16(test_vec[i].r13)); simde_test_arm_neon_assert_equal_u16x4(r16, simde_vld1_u16(test_vec[i].r16)); } return 0; } static int test_simde_vrsra_n_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint32_t b[2]; uint32_t r6[2]; uint32_t r13[2]; uint32_t r19[2]; uint32_t r26[2]; uint32_t r32[2]; } test_vec[] = { { { UINT32_C(2856545990), UINT32_C(3559013179) }, { UINT32_C(1802418243), UINT32_C(1802606704) }, { UINT32_C(2884708775), UINT32_C(3587178909) }, { UINT32_C(2856766012), UINT32_C(3559233224) }, { UINT32_C(2856549428), UINT32_C(3559016617) }, { UINT32_C(2856546017), UINT32_C(3559013206) }, { UINT32_C(2856545990), UINT32_C(3559013179) } }, { { UINT32_C( 120642863), UINT32_C(3323490355) }, { UINT32_C(1871857396), UINT32_C( 336916046) }, { UINT32_C( 149890635), UINT32_C(3328754668) }, { UINT32_C( 120871361), UINT32_C(3323531482) }, { UINT32_C( 120646433), UINT32_C(3323490998) }, { UINT32_C( 120642891), UINT32_C(3323490360) }, { UINT32_C( 120642863), UINT32_C(3323490355) } }, { { UINT32_C(2545899356), UINT32_C(3597394322) }, { UINT32_C( 155310745), UINT32_C(2708779890) }, { UINT32_C(2548326086), UINT32_C(3639719008) }, { UINT32_C(2545918315), UINT32_C(3597724984) }, { UINT32_C(2545899652), UINT32_C(3597399489) }, { UINT32_C(2545899358), UINT32_C(3597394362) }, { UINT32_C(2545899356), UINT32_C(3597394323) } }, { { UINT32_C(3282609296), UINT32_C( 160022804) }, { UINT32_C(1450711815), UINT32_C(1701481481) }, { UINT32_C(3305276668), UINT32_C( 186608452) }, { UINT32_C(3282786385), UINT32_C( 160230504) }, { UINT32_C(3282612063), UINT32_C( 160026049) }, { UINT32_C(3282609318), UINT32_C( 160022829) }, { UINT32_C(3282609296), UINT32_C( 160022804) } }, { { UINT32_C(1996302820), UINT32_C(2739693578) }, { UINT32_C(3048050242), UINT32_C(3512082753) }, { UINT32_C(2043928605), UINT32_C(2794569871) }, { UINT32_C(1996674896), UINT32_C(2740122299) }, { UINT32_C(1996308634), UINT32_C(2739700277) }, { UINT32_C(1996302865), UINT32_C(2739693630) }, { UINT32_C(1996302821), UINT32_C(2739693579) } }, { { UINT32_C(3667197894), UINT32_C(3353550272) }, { UINT32_C(1109220408), UINT32_C(3433531624) }, { UINT32_C(3684529463), UINT32_C(3407199204) }, { UINT32_C(3667333297), UINT32_C(3353969404) }, { UINT32_C(3667200010), UINT32_C(3353556821) }, { UINT32_C(3667197911), UINT32_C(3353550323) }, { UINT32_C(3667197894), UINT32_C(3353550273) } }, { { UINT32_C(3158549681), UINT32_C(1331662605) }, { UINT32_C(1577323549), UINT32_C(4096744238) }, { UINT32_C(3183195361), UINT32_C(1395674234) }, { UINT32_C(3158742225), UINT32_C(1332162696) }, { UINT32_C(3158552690), UINT32_C(1331670419) }, { UINT32_C(3158549705), UINT32_C(1331662666) }, { UINT32_C(3158549681), UINT32_C(1331662606) } }, { { UINT32_C( 449758042), UINT32_C( 434221792) }, { UINT32_C(4133224206), UINT32_C( 952304263) }, { UINT32_C( 514339670), UINT32_C( 449101546) }, { UINT32_C( 450262586), UINT32_C( 434338040) }, { UINT32_C( 449765925), UINT32_C( 434223608) }, { UINT32_C( 449758104), UINT32_C( 434221806) }, { UINT32_C( 449758043), UINT32_C( 434221792) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r6 = simde_vrsra_n_u32(a, b, 6); simde_uint32x2_t r13 = simde_vrsra_n_u32(a, b, 13); simde_uint32x2_t r19 = simde_vrsra_n_u32(a, b, 19); simde_uint32x2_t r26 = simde_vrsra_n_u32(a, b, 26); simde_uint32x2_t r32 = simde_vrsra_n_u32(a, b, 32); simde_test_arm_neon_assert_equal_u32x2(r6, simde_vld1_u32(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u32x2(r13, simde_vld1_u32(test_vec[i].r13)); simde_test_arm_neon_assert_equal_u32x2(r19, simde_vld1_u32(test_vec[i].r19)); simde_test_arm_neon_assert_equal_u32x2(r26, simde_vld1_u32(test_vec[i].r26)); simde_test_arm_neon_assert_equal_u32x2(r32, simde_vld1_u32(test_vec[i].r32)); } return 0; } static int test_simde_vrsra_n_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[1]; uint64_t b[1]; uint64_t r13[1]; uint64_t r26[1]; uint64_t r39[1]; uint64_t r52[1]; uint64_t r64[1]; } test_vec[] = { { { UINT64_C(10648944995671283270) }, { UINT64_C(11505581380092416709) }, { UINT64_C(10650349485585845333) }, { UINT64_C(10648945167117806044) }, { UINT64_C(10648944995692211801) }, { UINT64_C(10648944995671285825) }, { UINT64_C(10648944995671283271) } }, { { UINT64_C( 1360706969982614345) }, { UINT64_C( 8710257663316107129) }, { UINT64_C( 1361770233857530862) }, { UINT64_C( 1360707099775567826) }, { UINT64_C( 1360706969998458211) }, { UINT64_C( 1360706969982616279) }, { UINT64_C( 1360706969982614345) } }, { { UINT64_C(17781570536506952227) }, { UINT64_C(15606091530443261350) }, { UINT64_C(17783475576976977039) }, { UINT64_C(17781570769055837728) }, { UINT64_C(17781570536535339542) }, { UINT64_C(17781570536506955692) }, { UINT64_C(17781570536506952228) } }, { { UINT64_C( 8578899884654840372) }, { UINT64_C( 6028486122644644862) }, { UINT64_C( 8579635783839733517) }, { UINT64_C( 8578899974486283840) }, { UINT64_C( 8578899884665806124) }, { UINT64_C( 8578899884654841711) }, { UINT64_C( 8578899884654840372) } }, { { UINT64_C( 3399734522907243782) }, { UINT64_C(12192853280877214670) }, { UINT64_C( 3401222908317507114) }, { UINT64_C( 3399734704594915933) }, { UINT64_C( 3399734522929422453) }, { UINT64_C( 3399734522907246489) }, { UINT64_C( 3399734522907243783) } }, { { UINT64_C( 1222362336126677633) }, { UINT64_C( 1394684376301801943) }, { UINT64_C( 1222532585684331662) }, { UINT64_C( 1222362356909094339) }, { UINT64_C( 1222362336129214549) }, { UINT64_C( 1222362336126677943) }, { UINT64_C( 1222362336126677633) } }, { { UINT64_C(17679296120834744224) }, { UINT64_C(13120260273628937717) }, { UINT64_C(17680897715106427444) }, { UINT64_C(17679296316341857467) }, { UINT64_C(17679296120858609838) }, { UINT64_C(17679296120834747137) }, { UINT64_C(17679296120834744225) } }, { { UINT64_C( 4925578365873011001) }, { UINT64_C( 2800296166150610108) }, { UINT64_C( 4925920198901105558) }, { UINT64_C( 4925578407600675563) }, { UINT64_C( 4925578365878104710) }, { UINT64_C( 4925578365873011623) }, { UINT64_C( 4925578365873011001) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_t b = simde_vld1_u64(test_vec[i].b); simde_uint64x1_t r13 = simde_vrsra_n_u64(a, b, 13); simde_uint64x1_t r26 = simde_vrsra_n_u64(a, b, 26); simde_uint64x1_t r39 = simde_vrsra_n_u64(a, b, 39); simde_uint64x1_t r52 = simde_vrsra_n_u64(a, b, 52); simde_uint64x1_t r64 = simde_vrsra_n_u64(a, b, 64); simde_test_arm_neon_assert_equal_u64x1(r13, simde_vld1_u64(test_vec[i].r13)); simde_test_arm_neon_assert_equal_u64x1(r26, simde_vld1_u64(test_vec[i].r26)); simde_test_arm_neon_assert_equal_u64x1(r39, simde_vld1_u64(test_vec[i].r39)); simde_test_arm_neon_assert_equal_u64x1(r52, simde_vld1_u64(test_vec[i].r52)); simde_test_arm_neon_assert_equal_u64x1(r64, simde_vld1_u64(test_vec[i].r64)); } return 0; } static int test_simde_vrsraq_n_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int8_t b[16]; int8_t r1[16]; int8_t r3[16]; int8_t r5[16]; int8_t r6[16]; int8_t r8[16]; } test_vec[] = { { { INT8_C( 105), -INT8_C( 86), INT8_C( 14), -INT8_C( 118), -INT8_C( 4), INT8_C( 43), INT8_C( 78), -INT8_C( 2), -INT8_C( 92), -INT8_C( 12), -INT8_C( 99), INT8_C( 44), INT8_C( 42), INT8_C( 9), INT8_C( 5), -INT8_C( 70) }, { -INT8_C( 84), INT8_C( 58), INT8_C( 42), -INT8_C( 63), INT8_C( 24), -INT8_C( 44), -INT8_C( 126), -INT8_C( 42), INT8_C( 85), INT8_C( 117), -INT8_C( 70), INT8_C( 115), INT8_C( 19), INT8_C( 117), INT8_C( 39), INT8_C( 124) }, { INT8_C( 63), -INT8_C( 57), INT8_C( 35), INT8_C( 107), INT8_C( 8), INT8_C( 21), INT8_C( 15), -INT8_C( 23), -INT8_C( 49), INT8_C( 47), INT8_C( 122), INT8_C( 102), INT8_C( 52), INT8_C( 68), INT8_C( 25), -INT8_C( 8) }, { INT8_C( 95), -INT8_C( 79), INT8_C( 19), -INT8_C( 126), -INT8_C( 1), INT8_C( 38), INT8_C( 62), -INT8_C( 7), -INT8_C( 81), INT8_C( 3), -INT8_C( 108), INT8_C( 58), INT8_C( 44), INT8_C( 24), INT8_C( 10), -INT8_C( 54) }, { INT8_C( 102), -INT8_C( 84), INT8_C( 15), -INT8_C( 120), -INT8_C( 3), INT8_C( 42), INT8_C( 74), -INT8_C( 3), -INT8_C( 89), -INT8_C( 8), -INT8_C( 101), INT8_C( 48), INT8_C( 43), INT8_C( 13), INT8_C( 6), -INT8_C( 66) }, { INT8_C( 104), -INT8_C( 85), INT8_C( 15), -INT8_C( 119), -INT8_C( 4), INT8_C( 42), INT8_C( 76), -INT8_C( 3), -INT8_C( 91), -INT8_C( 10), -INT8_C( 100), INT8_C( 46), INT8_C( 42), INT8_C( 11), INT8_C( 6), -INT8_C( 68) }, { INT8_C( 105), -INT8_C( 86), INT8_C( 14), -INT8_C( 118), -INT8_C( 4), INT8_C( 43), INT8_C( 78), -INT8_C( 2), -INT8_C( 92), -INT8_C( 12), -INT8_C( 99), INT8_C( 44), INT8_C( 42), INT8_C( 9), INT8_C( 5), -INT8_C( 70) } }, { { INT8_C( 32), INT8_C( 53), INT8_C( 7), INT8_C( 28), INT8_C( 97), INT8_C( 85), INT8_C( 26), INT8_C( 5), INT8_C( 73), -INT8_C( 73), INT8_C( 49), INT8_C( 115), -INT8_C( 63), INT8_C( 54), INT8_C( 45), INT8_C( 109) }, { INT8_C( 112), INT8_C( 87), INT8_C( 46), -INT8_C( 120), INT8_C( 43), -INT8_C( 80), INT8_C( 95), -INT8_C( 127), INT8_C( 37), INT8_C( 25), -INT8_C( 12), INT8_C( 57), -INT8_C( 114), INT8_C( 27), -INT8_C( 75), -INT8_C( 82) }, { INT8_C( 88), INT8_C( 97), INT8_C( 30), -INT8_C( 32), INT8_C( 119), INT8_C( 45), INT8_C( 74), -INT8_C( 58), INT8_C( 92), -INT8_C( 60), INT8_C( 43), -INT8_C( 112), -INT8_C( 120), INT8_C( 68), INT8_C( 8), INT8_C( 68) }, { INT8_C( 46), INT8_C( 64), INT8_C( 13), INT8_C( 13), INT8_C( 102), INT8_C( 75), INT8_C( 38), -INT8_C( 11), INT8_C( 78), -INT8_C( 70), INT8_C( 48), INT8_C( 122), -INT8_C( 77), INT8_C( 57), INT8_C( 36), INT8_C( 99) }, { INT8_C( 36), INT8_C( 56), INT8_C( 8), INT8_C( 24), INT8_C( 98), INT8_C( 83), INT8_C( 29), INT8_C( 1), INT8_C( 74), -INT8_C( 72), INT8_C( 49), INT8_C( 117), -INT8_C( 67), INT8_C( 55), INT8_C( 43), INT8_C( 106) }, { INT8_C( 34), INT8_C( 54), INT8_C( 8), INT8_C( 26), INT8_C( 98), INT8_C( 84), INT8_C( 27), INT8_C( 3), INT8_C( 74), -INT8_C( 73), INT8_C( 49), INT8_C( 116), -INT8_C( 65), INT8_C( 54), INT8_C( 44), INT8_C( 108) }, { INT8_C( 32), INT8_C( 53), INT8_C( 7), INT8_C( 28), INT8_C( 97), INT8_C( 85), INT8_C( 26), INT8_C( 5), INT8_C( 73), -INT8_C( 73), INT8_C( 49), INT8_C( 115), -INT8_C( 63), INT8_C( 54), INT8_C( 45), INT8_C( 109) } }, { { INT8_C( 80), -INT8_C( 68), -INT8_C( 54), -INT8_C( 79), INT8_C( 17), -INT8_C( 27), -INT8_C( 74), INT8_C( 90), -INT8_C( 100), -INT8_C( 25), -INT8_C( 51), INT8_C( 93), INT8_C( 29), -INT8_C( 5), -INT8_C( 54), -INT8_C( 115) }, { INT8_C( 82), -INT8_C( 8), INT8_C( 22), INT8_C( 126), -INT8_C( 87), INT8_C( 117), -INT8_C( 1), -INT8_C( 50), -INT8_C( 114), -INT8_C( 13), INT8_C( 7), INT8_C( 28), INT8_C( 14), -INT8_C( 67), -INT8_C( 53), INT8_C( 94) }, { INT8_C( 121), -INT8_C( 72), -INT8_C( 43), -INT8_C( 16), -INT8_C( 26), INT8_C( 32), -INT8_C( 74), INT8_C( 65), INT8_C( 99), -INT8_C( 31), -INT8_C( 47), INT8_C( 107), INT8_C( 36), -INT8_C( 38), -INT8_C( 80), -INT8_C( 68) }, { INT8_C( 90), -INT8_C( 69), -INT8_C( 51), -INT8_C( 63), INT8_C( 6), -INT8_C( 12), -INT8_C( 74), INT8_C( 84), -INT8_C( 114), -INT8_C( 27), -INT8_C( 50), INT8_C( 97), INT8_C( 31), -INT8_C( 13), -INT8_C( 61), -INT8_C( 103) }, { INT8_C( 83), -INT8_C( 68), -INT8_C( 53), -INT8_C( 75), INT8_C( 14), -INT8_C( 23), -INT8_C( 74), INT8_C( 88), -INT8_C( 104), -INT8_C( 25), -INT8_C( 51), INT8_C( 94), INT8_C( 29), -INT8_C( 7), -INT8_C( 56), -INT8_C( 112) }, { INT8_C( 81), -INT8_C( 68), -INT8_C( 54), -INT8_C( 77), INT8_C( 16), -INT8_C( 25), -INT8_C( 74), INT8_C( 89), -INT8_C( 102), -INT8_C( 25), -INT8_C( 51), INT8_C( 93), INT8_C( 29), -INT8_C( 6), -INT8_C( 55), -INT8_C( 114) }, { INT8_C( 80), -INT8_C( 68), -INT8_C( 54), -INT8_C( 79), INT8_C( 17), -INT8_C( 27), -INT8_C( 74), INT8_C( 90), -INT8_C( 100), -INT8_C( 25), -INT8_C( 51), INT8_C( 93), INT8_C( 29), -INT8_C( 5), -INT8_C( 54), -INT8_C( 115) } }, { { INT8_C( 121), -INT8_C( 107), INT8_C( 16), -INT8_C( 117), INT8_C( 122), -INT8_C( 58), -INT8_C( 27), INT8_C( 23), -INT8_C( 82), -INT8_C( 77), INT8_C( 116), -INT8_C( 53), -INT8_C( 82), INT8_C( 63), INT8_C( 89), INT8_C( 0) }, { INT8_C( 55), INT8_C( 111), INT8_C( 126), -INT8_C( 32), -INT8_C( 28), INT8_C( 125), -INT8_C( 81), INT8_C( 114), INT8_C( 112), -INT8_C( 74), -INT8_C( 114), INT8_C( 126), INT8_C( 115), INT8_C( 89), -INT8_C( 35), -INT8_C( 19) }, { -INT8_C( 107), -INT8_C( 51), INT8_C( 79), INT8_C( 123), INT8_C( 108), INT8_C( 5), -INT8_C( 67), INT8_C( 80), -INT8_C( 26), -INT8_C( 114), INT8_C( 59), INT8_C( 10), -INT8_C( 24), INT8_C( 108), INT8_C( 72), -INT8_C( 9) }, { INT8_MIN, -INT8_C( 93), INT8_C( 32), -INT8_C( 121), INT8_C( 119), -INT8_C( 42), -INT8_C( 37), INT8_C( 37), -INT8_C( 68), -INT8_C( 86), INT8_C( 102), -INT8_C( 37), -INT8_C( 68), INT8_C( 74), INT8_C( 85), -INT8_C( 2) }, { INT8_C( 123), -INT8_C( 104), INT8_C( 20), -INT8_C( 118), INT8_C( 121), -INT8_C( 54), -INT8_C( 30), INT8_C( 27), -INT8_C( 78), -INT8_C( 79), INT8_C( 112), -INT8_C( 49), -INT8_C( 78), INT8_C( 66), INT8_C( 88), -INT8_C( 1) }, { INT8_C( 122), -INT8_C( 105), INT8_C( 18), -INT8_C( 117), INT8_C( 122), -INT8_C( 56), -INT8_C( 28), INT8_C( 25), -INT8_C( 80), -INT8_C( 78), INT8_C( 114), -INT8_C( 51), -INT8_C( 80), INT8_C( 64), INT8_C( 88), INT8_C( 0) }, { INT8_C( 121), -INT8_C( 107), INT8_C( 16), -INT8_C( 117), INT8_C( 122), -INT8_C( 58), -INT8_C( 27), INT8_C( 23), -INT8_C( 82), -INT8_C( 77), INT8_C( 116), -INT8_C( 53), -INT8_C( 82), INT8_C( 63), INT8_C( 89), INT8_C( 0) } }, { { -INT8_C( 17), -INT8_C( 19), INT8_C( 120), INT8_C( 105), -INT8_C( 77), INT8_C( 93), INT8_MIN, INT8_C( 97), INT8_C( 16), -INT8_C( 11), INT8_C( 45), -INT8_C( 66), INT8_C( 52), -INT8_C( 122), -INT8_C( 65), INT8_C( 107) }, { -INT8_C( 11), INT8_C( 61), INT8_C( 76), -INT8_C( 39), -INT8_C( 69), -INT8_C( 5), INT8_C( 75), INT8_C( 43), -INT8_C( 79), -INT8_C( 39), -INT8_C( 86), INT8_C( 37), INT8_C( 51), -INT8_C( 121), INT8_C( 18), INT8_C( 34) }, { -INT8_C( 22), INT8_C( 12), -INT8_C( 98), INT8_C( 86), -INT8_C( 111), INT8_C( 91), -INT8_C( 90), INT8_C( 119), -INT8_C( 23), -INT8_C( 30), INT8_C( 2), -INT8_C( 47), INT8_C( 78), INT8_C( 74), -INT8_C( 56), INT8_C( 124) }, { -INT8_C( 18), -INT8_C( 11), -INT8_C( 126), INT8_C( 100), -INT8_C( 86), INT8_C( 92), -INT8_C( 119), INT8_C( 102), INT8_C( 6), -INT8_C( 16), INT8_C( 34), -INT8_C( 61), INT8_C( 58), INT8_C( 119), -INT8_C( 63), INT8_C( 111) }, { -INT8_C( 17), -INT8_C( 17), INT8_C( 122), INT8_C( 104), -INT8_C( 79), INT8_C( 93), -INT8_C( 126), INT8_C( 98), INT8_C( 14), -INT8_C( 12), INT8_C( 42), -INT8_C( 65), INT8_C( 54), -INT8_C( 126), -INT8_C( 64), INT8_C( 108) }, { -INT8_C( 17), -INT8_C( 18), INT8_C( 121), INT8_C( 104), -INT8_C( 78), INT8_C( 93), -INT8_C( 127), INT8_C( 98), INT8_C( 15), -INT8_C( 12), INT8_C( 44), -INT8_C( 65), INT8_C( 53), -INT8_C( 124), -INT8_C( 65), INT8_C( 108) }, { -INT8_C( 17), -INT8_C( 19), INT8_C( 120), INT8_C( 105), -INT8_C( 77), INT8_C( 93), INT8_MIN, INT8_C( 97), INT8_C( 16), -INT8_C( 11), INT8_C( 45), -INT8_C( 66), INT8_C( 52), -INT8_C( 122), -INT8_C( 65), INT8_C( 107) } }, { { INT8_C( 116), -INT8_C( 118), -INT8_C( 117), INT8_C( 39), -INT8_C( 25), INT8_C( 12), -INT8_C( 119), -INT8_C( 8), INT8_C( 1), -INT8_C( 74), -INT8_C( 74), INT8_C( 53), INT8_C( 60), INT8_C( 117), -INT8_C( 96), INT8_C( 49) }, { -INT8_C( 77), -INT8_C( 20), INT8_C( 10), INT8_C( 110), -INT8_C( 25), INT8_C( 85), -INT8_C( 103), -INT8_C( 103), INT8_C( 46), INT8_C( 67), -INT8_C( 66), INT8_C( 97), -INT8_C( 54), -INT8_C( 48), -INT8_C( 125), INT8_C( 62) }, { INT8_C( 78), INT8_MIN, -INT8_C( 112), INT8_C( 94), -INT8_C( 37), INT8_C( 55), INT8_C( 86), -INT8_C( 59), INT8_C( 24), -INT8_C( 40), -INT8_C( 107), INT8_C( 102), INT8_C( 33), INT8_C( 93), INT8_C( 98), INT8_C( 80) }, { INT8_C( 106), -INT8_C( 120), -INT8_C( 116), INT8_C( 53), -INT8_C( 28), INT8_C( 23), INT8_C( 124), -INT8_C( 21), INT8_C( 7), -INT8_C( 66), -INT8_C( 82), INT8_C( 65), INT8_C( 53), INT8_C( 111), -INT8_C( 112), INT8_C( 57) }, { INT8_C( 114), -INT8_C( 119), -INT8_C( 117), INT8_C( 42), -INT8_C( 26), INT8_C( 15), -INT8_C( 122), -INT8_C( 11), INT8_C( 2), -INT8_C( 72), -INT8_C( 76), INT8_C( 56), INT8_C( 58), INT8_C( 116), -INT8_C( 100), INT8_C( 51) }, { INT8_C( 115), -INT8_C( 118), -INT8_C( 117), INT8_C( 41), -INT8_C( 25), INT8_C( 13), -INT8_C( 121), -INT8_C( 10), INT8_C( 2), -INT8_C( 73), -INT8_C( 75), INT8_C( 55), INT8_C( 59), INT8_C( 116), -INT8_C( 98), INT8_C( 50) }, { INT8_C( 116), -INT8_C( 118), -INT8_C( 117), INT8_C( 39), -INT8_C( 25), INT8_C( 12), -INT8_C( 119), -INT8_C( 8), INT8_C( 1), -INT8_C( 74), -INT8_C( 74), INT8_C( 53), INT8_C( 60), INT8_C( 117), -INT8_C( 96), INT8_C( 49) } }, { { INT8_C( 90), INT8_C( 15), INT8_C( 102), INT8_C( 65), INT8_C( 27), -INT8_C( 17), INT8_C( 57), INT8_C( 28), -INT8_C( 91), -INT8_C( 16), INT8_C( 81), -INT8_C( 31), INT8_C( 101), -INT8_C( 15), INT8_C( 18), INT8_C( 24) }, { -INT8_C( 34), INT8_C( 28), -INT8_C( 122), -INT8_C( 59), INT8_C( 113), INT8_C( 32), INT8_C( 94), -INT8_C( 97), INT8_C( 99), INT8_C( 28), INT8_C( 1), INT8_C( 46), -INT8_C( 20), -INT8_C( 124), INT8_C( 108), INT8_C( 70) }, { INT8_C( 73), INT8_C( 29), INT8_C( 41), INT8_C( 36), INT8_C( 84), -INT8_C( 1), INT8_C( 104), -INT8_C( 20), -INT8_C( 41), -INT8_C( 2), INT8_C( 82), -INT8_C( 8), INT8_C( 91), -INT8_C( 77), INT8_C( 72), INT8_C( 59) }, { INT8_C( 86), INT8_C( 19), INT8_C( 87), INT8_C( 58), INT8_C( 41), -INT8_C( 13), INT8_C( 69), INT8_C( 16), -INT8_C( 79), -INT8_C( 12), INT8_C( 81), -INT8_C( 25), INT8_C( 99), -INT8_C( 30), INT8_C( 32), INT8_C( 33) }, { INT8_C( 89), INT8_C( 16), INT8_C( 98), INT8_C( 63), INT8_C( 31), -INT8_C( 16), INT8_C( 60), INT8_C( 25), -INT8_C( 88), -INT8_C( 15), INT8_C( 81), -INT8_C( 30), INT8_C( 100), -INT8_C( 19), INT8_C( 21), INT8_C( 26) }, { INT8_C( 89), INT8_C( 15), INT8_C( 100), INT8_C( 64), INT8_C( 29), -INT8_C( 16), INT8_C( 58), INT8_C( 26), -INT8_C( 89), -INT8_C( 16), INT8_C( 81), -INT8_C( 30), INT8_C( 101), -INT8_C( 17), INT8_C( 20), INT8_C( 25) }, { INT8_C( 90), INT8_C( 15), INT8_C( 102), INT8_C( 65), INT8_C( 27), -INT8_C( 17), INT8_C( 57), INT8_C( 28), -INT8_C( 91), -INT8_C( 16), INT8_C( 81), -INT8_C( 31), INT8_C( 101), -INT8_C( 15), INT8_C( 18), INT8_C( 24) } }, { { -INT8_C( 109), -INT8_C( 46), -INT8_C( 120), -INT8_C( 82), -INT8_C( 63), -INT8_C( 63), -INT8_C( 54), INT8_C( 102), -INT8_C( 79), INT8_C( 27), INT8_C( 71), INT8_C( 23), INT8_C( 13), INT8_C( 89), INT8_C( 47), -INT8_C( 21) }, { INT8_C( 117), -INT8_C( 74), -INT8_C( 80), -INT8_C( 26), -INT8_C( 42), INT8_C( 15), -INT8_C( 122), INT8_C( 57), INT8_C( 43), -INT8_C( 121), INT8_C( 103), INT8_C( 24), INT8_C( 11), -INT8_C( 44), INT8_C( 94), -INT8_C( 97) }, { -INT8_C( 50), -INT8_C( 83), INT8_C( 96), -INT8_C( 95), -INT8_C( 84), -INT8_C( 55), -INT8_C( 115), -INT8_C( 125), -INT8_C( 57), -INT8_C( 33), INT8_C( 123), INT8_C( 35), INT8_C( 19), INT8_C( 67), INT8_C( 94), -INT8_C( 69) }, { -INT8_C( 94), -INT8_C( 55), INT8_C( 126), -INT8_C( 85), -INT8_C( 68), -INT8_C( 61), -INT8_C( 69), INT8_C( 109), -INT8_C( 74), INT8_C( 12), INT8_C( 84), INT8_C( 26), INT8_C( 14), INT8_C( 84), INT8_C( 59), -INT8_C( 33) }, { -INT8_C( 105), -INT8_C( 48), -INT8_C( 122), -INT8_C( 83), -INT8_C( 64), -INT8_C( 63), -INT8_C( 58), INT8_C( 104), -INT8_C( 78), INT8_C( 23), INT8_C( 74), INT8_C( 24), INT8_C( 13), INT8_C( 88), INT8_C( 50), -INT8_C( 24) }, { -INT8_C( 107), -INT8_C( 47), -INT8_C( 121), -INT8_C( 82), -INT8_C( 64), -INT8_C( 63), -INT8_C( 56), INT8_C( 103), -INT8_C( 78), INT8_C( 25), INT8_C( 73), INT8_C( 23), INT8_C( 13), INT8_C( 88), INT8_C( 48), -INT8_C( 23) }, { -INT8_C( 109), -INT8_C( 46), -INT8_C( 120), -INT8_C( 82), -INT8_C( 63), -INT8_C( 63), -INT8_C( 54), INT8_C( 102), -INT8_C( 79), INT8_C( 27), INT8_C( 71), INT8_C( 23), INT8_C( 13), INT8_C( 89), INT8_C( 47), -INT8_C( 21) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r1 = simde_vrsraq_n_s8(a, b, 1); simde_int8x16_t r3 = simde_vrsraq_n_s8(a, b, 3); simde_int8x16_t r5 = simde_vrsraq_n_s8(a, b, 5); simde_int8x16_t r6 = simde_vrsraq_n_s8(a, b, 6); simde_int8x16_t r8 = simde_vrsraq_n_s8(a, b, 8); simde_test_arm_neon_assert_equal_i8x16(r1, simde_vld1q_s8(test_vec[i].r1)); simde_test_arm_neon_assert_equal_i8x16(r3, simde_vld1q_s8(test_vec[i].r3)); simde_test_arm_neon_assert_equal_i8x16(r5, simde_vld1q_s8(test_vec[i].r5)); simde_test_arm_neon_assert_equal_i8x16(r6, simde_vld1q_s8(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i8x16(r8, simde_vld1q_s8(test_vec[i].r8)); } return 0; } static int test_simde_vrsraq_n_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int16_t b[8]; int16_t r3[8]; int16_t r6[8]; int16_t r10[8]; int16_t r13[8]; int16_t r16[8]; } test_vec[] = { { { INT16_C( 27213), -INT16_C( 15621), INT16_C( 4695), -INT16_C( 22303), -INT16_C( 26590), -INT16_C( 9749), INT16_C( 9982), -INT16_C( 6778) }, { INT16_C( 19972), -INT16_C( 25942), -INT16_C( 22797), INT16_C( 18834), -INT16_C( 5405), -INT16_C( 30691), -INT16_C( 3556), INT16_C( 27120) }, { INT16_C( 29710), -INT16_C( 18864), INT16_C( 1845), -INT16_C( 19949), -INT16_C( 27266), -INT16_C( 13585), INT16_C( 9538), -INT16_C( 3388) }, { INT16_C( 27525), -INT16_C( 16026), INT16_C( 4339), -INT16_C( 22009), -INT16_C( 26674), -INT16_C( 10229), INT16_C( 9926), -INT16_C( 6354) }, { INT16_C( 27233), -INT16_C( 15646), INT16_C( 4673), -INT16_C( 22285), -INT16_C( 26595), -INT16_C( 9779), INT16_C( 9979), -INT16_C( 6752) }, { INT16_C( 27215), -INT16_C( 15624), INT16_C( 4692), -INT16_C( 22301), -INT16_C( 26591), -INT16_C( 9753), INT16_C( 9982), -INT16_C( 6775) }, { INT16_C( 27213), -INT16_C( 15621), INT16_C( 4695), -INT16_C( 22303), -INT16_C( 26590), -INT16_C( 9749), INT16_C( 9982), -INT16_C( 6778) } }, { { -INT16_C( 5284), -INT16_C( 19669), INT16_C( 3582), INT16_C( 8283), INT16_C( 18085), -INT16_C( 23303), INT16_C( 32620), INT16_C( 28809) }, { INT16_C( 13261), -INT16_C( 16374), -INT16_C( 25382), -INT16_C( 17143), INT16_C( 9863), -INT16_C( 23738), INT16_C( 13849), INT16_C( 29965) }, { -INT16_C( 3626), -INT16_C( 21716), INT16_C( 409), INT16_C( 6140), INT16_C( 19318), -INT16_C( 26270), -INT16_C( 31185), INT16_C( 32555) }, { -INT16_C( 5077), -INT16_C( 19925), INT16_C( 3185), INT16_C( 8015), INT16_C( 18239), -INT16_C( 23674), -INT16_C( 32700), INT16_C( 29277) }, { -INT16_C( 5271), -INT16_C( 19685), INT16_C( 3557), INT16_C( 8266), INT16_C( 18095), -INT16_C( 23326), INT16_C( 32634), INT16_C( 28838) }, { -INT16_C( 5282), -INT16_C( 19671), INT16_C( 3579), INT16_C( 8281), INT16_C( 18086), -INT16_C( 23306), INT16_C( 32622), INT16_C( 28813) }, { -INT16_C( 5284), -INT16_C( 19669), INT16_C( 3582), INT16_C( 8283), INT16_C( 18085), -INT16_C( 23303), INT16_C( 32620), INT16_C( 28809) } }, { { INT16_C( 14370), INT16_C( 8233), -INT16_C( 31675), -INT16_C( 5312), INT16_C( 14795), INT16_C( 14223), INT16_C( 6328), -INT16_C( 31320) }, { -INT16_C( 19893), INT16_C( 9541), INT16_C( 20303), -INT16_C( 10525), INT16_C( 10613), -INT16_C( 29063), -INT16_C( 31137), -INT16_C( 32508) }, { INT16_C( 11883), INT16_C( 9426), -INT16_C( 29137), -INT16_C( 6628), INT16_C( 16122), INT16_C( 10590), INT16_C( 2436), INT16_C( 30153) }, { INT16_C( 14059), INT16_C( 8382), -INT16_C( 31358), -INT16_C( 5476), INT16_C( 14961), INT16_C( 13769), INT16_C( 5841), -INT16_C( 31828) }, { INT16_C( 14351), INT16_C( 8242), -INT16_C( 31655), -INT16_C( 5322), INT16_C( 14805), INT16_C( 14195), INT16_C( 6298), -INT16_C( 31352) }, { INT16_C( 14368), INT16_C( 8234), -INT16_C( 31673), -INT16_C( 5313), INT16_C( 14796), INT16_C( 14219), INT16_C( 6324), -INT16_C( 31324) }, { INT16_C( 14370), INT16_C( 8233), -INT16_C( 31675), -INT16_C( 5312), INT16_C( 14795), INT16_C( 14223), INT16_C( 6328), -INT16_C( 31320) } }, { { INT16_C( 11711), INT16_C( 1185), -INT16_C( 7759), INT16_C( 31983), INT16_C( 32282), -INT16_C( 11596), INT16_C( 23702), -INT16_C( 7593) }, { -INT16_C( 25330), INT16_C( 23815), -INT16_C( 5396), INT16_C( 24883), -INT16_C( 21229), INT16_C( 29680), -INT16_C( 3021), -INT16_C( 3340) }, { INT16_C( 8545), INT16_C( 4162), -INT16_C( 8433), -INT16_C( 30443), INT16_C( 29628), -INT16_C( 7886), INT16_C( 23324), -INT16_C( 8010) }, { INT16_C( 11315), INT16_C( 1557), -INT16_C( 7843), INT16_C( 32372), INT16_C( 31950), -INT16_C( 11132), INT16_C( 23655), -INT16_C( 7645) }, { INT16_C( 11686), INT16_C( 1208), -INT16_C( 7764), INT16_C( 32007), INT16_C( 32261), -INT16_C( 11567), INT16_C( 23699), -INT16_C( 7596) }, { INT16_C( 11708), INT16_C( 1188), -INT16_C( 7760), INT16_C( 31986), INT16_C( 32279), -INT16_C( 11592), INT16_C( 23702), -INT16_C( 7593) }, { INT16_C( 11711), INT16_C( 1185), -INT16_C( 7759), INT16_C( 31983), INT16_C( 32282), -INT16_C( 11596), INT16_C( 23702), -INT16_C( 7593) } }, { { -INT16_C( 27103), -INT16_C( 11529), -INT16_C( 6537), -INT16_C( 28081), INT16_C( 869), -INT16_C( 1180), -INT16_C( 17313), INT16_C( 28125) }, { -INT16_C( 6823), INT16_C( 17867), -INT16_C( 305), -INT16_C( 7258), -INT16_C( 26965), -INT16_C( 8362), INT16_C( 19082), -INT16_C( 21551) }, { -INT16_C( 27956), -INT16_C( 9296), -INT16_C( 6575), -INT16_C( 28988), -INT16_C( 2502), -INT16_C( 2225), -INT16_C( 14928), INT16_C( 25431) }, { -INT16_C( 27210), -INT16_C( 11250), -INT16_C( 6542), -INT16_C( 28194), INT16_C( 448), -INT16_C( 1311), -INT16_C( 17015), INT16_C( 27788) }, { -INT16_C( 27110), -INT16_C( 11512), -INT16_C( 6537), -INT16_C( 28088), INT16_C( 843), -INT16_C( 1188), -INT16_C( 17294), INT16_C( 28104) }, { -INT16_C( 27104), -INT16_C( 11527), -INT16_C( 6537), -INT16_C( 28082), INT16_C( 866), -INT16_C( 1181), -INT16_C( 17311), INT16_C( 28122) }, { -INT16_C( 27103), -INT16_C( 11529), -INT16_C( 6537), -INT16_C( 28081), INT16_C( 869), -INT16_C( 1180), -INT16_C( 17313), INT16_C( 28125) } }, { { -INT16_C( 14112), INT16_C( 22654), -INT16_C( 12881), INT16_C( 5354), INT16_C( 20176), INT16_C( 12047), -INT16_C( 4854), INT16_C( 25500) }, { INT16_C( 26578), -INT16_C( 24152), INT16_C( 20326), INT16_C( 4484), -INT16_C( 9499), INT16_C( 28912), -INT16_C( 15835), INT16_C( 1307) }, { -INT16_C( 10790), INT16_C( 19635), -INT16_C( 10340), INT16_C( 5915), INT16_C( 18989), INT16_C( 15661), -INT16_C( 6833), INT16_C( 25663) }, { -INT16_C( 13697), INT16_C( 22277), -INT16_C( 12563), INT16_C( 5424), INT16_C( 20028), INT16_C( 12499), -INT16_C( 5101), INT16_C( 25520) }, { -INT16_C( 14086), INT16_C( 22630), -INT16_C( 12861), INT16_C( 5358), INT16_C( 20167), INT16_C( 12075), -INT16_C( 4869), INT16_C( 25501) }, { -INT16_C( 14109), INT16_C( 22651), -INT16_C( 12879), INT16_C( 5355), INT16_C( 20175), INT16_C( 12051), -INT16_C( 4856), INT16_C( 25500) }, { -INT16_C( 14112), INT16_C( 22654), -INT16_C( 12881), INT16_C( 5354), INT16_C( 20176), INT16_C( 12047), -INT16_C( 4854), INT16_C( 25500) } }, { { -INT16_C( 26230), INT16_C( 14685), INT16_C( 18278), INT16_C( 13901), INT16_C( 23958), -INT16_C( 24475), INT16_C( 586), INT16_C( 7172) }, { -INT16_C( 21399), -INT16_C( 12355), INT16_C( 17147), -INT16_C( 7711), -INT16_C( 12004), INT16_C( 16721), INT16_C( 27795), INT16_C( 7751) }, { -INT16_C( 28905), INT16_C( 13141), INT16_C( 20421), INT16_C( 12937), INT16_C( 22458), -INT16_C( 22385), INT16_C( 4060), INT16_C( 8141) }, { -INT16_C( 26564), INT16_C( 14492), INT16_C( 18546), INT16_C( 13781), INT16_C( 23770), -INT16_C( 24214), INT16_C( 1020), INT16_C( 7293) }, { -INT16_C( 26251), INT16_C( 14673), INT16_C( 18295), INT16_C( 13893), INT16_C( 23946), -INT16_C( 24459), INT16_C( 613), INT16_C( 7180) }, { -INT16_C( 26233), INT16_C( 14683), INT16_C( 18280), INT16_C( 13900), INT16_C( 23957), -INT16_C( 24473), INT16_C( 589), INT16_C( 7173) }, { -INT16_C( 26230), INT16_C( 14685), INT16_C( 18278), INT16_C( 13901), INT16_C( 23958), -INT16_C( 24475), INT16_C( 586), INT16_C( 7172) } }, { { -INT16_C( 23546), INT16_C( 27735), -INT16_C( 23060), -INT16_C( 32093), INT16_C( 2050), INT16_C( 19490), INT16_C( 9738), INT16_C( 29800) }, { INT16_C( 9683), -INT16_C( 12733), INT16_C( 9319), -INT16_C( 31569), INT16_C( 246), -INT16_C( 30267), INT16_C( 3181), INT16_C( 29607) }, { -INT16_C( 22336), INT16_C( 26143), -INT16_C( 21895), INT16_C( 29497), INT16_C( 2081), INT16_C( 15707), INT16_C( 10136), -INT16_C( 32035) }, { -INT16_C( 23395), INT16_C( 27536), -INT16_C( 22914), -INT16_C( 32586), INT16_C( 2054), INT16_C( 19017), INT16_C( 9788), INT16_C( 30263) }, { -INT16_C( 23537), INT16_C( 27723), -INT16_C( 23051), -INT16_C( 32124), INT16_C( 2050), INT16_C( 19460), INT16_C( 9741), INT16_C( 29829) }, { -INT16_C( 23545), INT16_C( 27733), -INT16_C( 23059), -INT16_C( 32097), INT16_C( 2050), INT16_C( 19486), INT16_C( 9738), INT16_C( 29804) }, { -INT16_C( 23546), INT16_C( 27735), -INT16_C( 23060), -INT16_C( 32093), INT16_C( 2050), INT16_C( 19490), INT16_C( 9738), INT16_C( 29800) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r3 = simde_vrsraq_n_s16(a, b, 3); simde_int16x8_t r6 = simde_vrsraq_n_s16(a, b, 6); simde_int16x8_t r10 = simde_vrsraq_n_s16(a, b, 10); simde_int16x8_t r13 = simde_vrsraq_n_s16(a, b, 13); simde_int16x8_t r16 = simde_vrsraq_n_s16(a, b, 16); simde_test_arm_neon_assert_equal_i16x8(r3, simde_vld1q_s16(test_vec[i].r3)); simde_test_arm_neon_assert_equal_i16x8(r6, simde_vld1q_s16(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i16x8(r10, simde_vld1q_s16(test_vec[i].r10)); simde_test_arm_neon_assert_equal_i16x8(r13, simde_vld1q_s16(test_vec[i].r13)); simde_test_arm_neon_assert_equal_i16x8(r16, simde_vld1q_s16(test_vec[i].r16)); } return 0; } static int test_simde_vrsraq_n_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t b[4]; int32_t r6[4]; int32_t r13[4]; int32_t r19[4]; int32_t r26[4]; int32_t r32[4]; } test_vec[] = { { { INT32_C( 1502172043), -INT32_C( 504641461), INT32_C( 1553063998), INT32_C( 1523463707) }, { INT32_C( 563728537), INT32_C( 1127529576), INT32_C( 252291388), INT32_C( 1861909731) }, { INT32_C( 1510980301), -INT32_C( 487023811), INT32_C( 1557006051), INT32_C( 1552556047) }, { INT32_C( 1502240858), -INT32_C( 504503823), INT32_C( 1553094795), INT32_C( 1523690991) }, { INT32_C( 1502173118), -INT32_C( 504639310), INT32_C( 1553064479), INT32_C( 1523467258) }, { INT32_C( 1502172051), -INT32_C( 504641444), INT32_C( 1553064002), INT32_C( 1523463735) }, { INT32_C( 1502172043), -INT32_C( 504641461), INT32_C( 1553063998), INT32_C( 1523463707) } }, { { INT32_C( 449283023), -INT32_C( 1963216308), -INT32_C( 1377399662), INT32_C( 1544074435) }, { -INT32_C( 327310972), -INT32_C( 1724927395), INT32_C( 1051212123), -INT32_C( 2069060939) }, { INT32_C( 444168789), -INT32_C( 1990168299), -INT32_C( 1360974473), INT32_C( 1511745358) }, { INT32_C( 449243068), -INT32_C( 1963426870), -INT32_C( 1377271340), INT32_C( 1543821864) }, { INT32_C( 449282399), -INT32_C( 1963219598), -INT32_C( 1377397657), INT32_C( 1544070489) }, { INT32_C( 449283018), -INT32_C( 1963216334), -INT32_C( 1377399646), INT32_C( 1544074404) }, { INT32_C( 449283023), -INT32_C( 1963216308), -INT32_C( 1377399662), INT32_C( 1544074435) } }, { { INT32_C( 1922986790), -INT32_C( 1191405018), -INT32_C( 379133402), INT32_C( 440823446) }, { INT32_C( 1829159695), -INT32_C( 804899211), INT32_C( 604942191), INT32_C( 2007546449) }, { INT32_C( 1951567410), -INT32_C( 1203981568), -INT32_C( 369681180), INT32_C( 472191359) }, { INT32_C( 1923210076), -INT32_C( 1191503272), -INT32_C( 379059557), INT32_C( 441068508) }, { INT32_C( 1922990279), -INT32_C( 1191406553), -INT32_C( 379132248), INT32_C( 440827275) }, { INT32_C( 1922986817), -INT32_C( 1191405030), -INT32_C( 379133393), INT32_C( 440823476) }, { INT32_C( 1922986790), -INT32_C( 1191405018), -INT32_C( 379133402), INT32_C( 440823446) } }, { { INT32_C( 1424573998), INT32_C( 118285792), INT32_C( 1576039111), -INT32_C( 260622624) }, { INT32_C( 1868398330), INT32_C( 591422388), INT32_C( 1682394642), INT32_C( 937160457) }, { INT32_C( 1453767722), INT32_C( 127526767), INT32_C( 1602326527), -INT32_C( 245979492) }, { INT32_C( 1424802074), INT32_C( 118357987), INT32_C( 1576244481), -INT32_C( 260508225) }, { INT32_C( 1424577562), INT32_C( 118286920), INT32_C( 1576042320), -INT32_C( 260620837) }, { INT32_C( 1424574026), INT32_C( 118285801), INT32_C( 1576039136), -INT32_C( 260622610) }, { INT32_C( 1424573998), INT32_C( 118285792), INT32_C( 1576039111), -INT32_C( 260622624) } }, { { INT32_C( 378258741), INT32_C( 1914541994), -INT32_C( 355529462), INT32_C( 1054492484) }, { INT32_C( 2041395141), -INT32_C( 1382224485), INT32_C( 1158800188), INT32_C( 125627858) }, { INT32_C( 410155540), INT32_C( 1892944736), -INT32_C( 337423209), INT32_C( 1056455419) }, { INT32_C( 378507935), INT32_C( 1914373265), -INT32_C( 355388007), INT32_C( 1054507819) }, { INT32_C( 378262635), INT32_C( 1914539358), -INT32_C( 355527252), INT32_C( 1054492724) }, { INT32_C( 378258771), INT32_C( 1914541973), -INT32_C( 355529445), INT32_C( 1054492486) }, { INT32_C( 378258741), INT32_C( 1914541994), -INT32_C( 355529462), INT32_C( 1054492484) } }, { { INT32_C( 1545406386), -INT32_C( 1462879586), -INT32_C( 1936482744), -INT32_C( 1429574171) }, { INT32_C( 1076066213), -INT32_C( 1578254491), INT32_C( 1961295778), -INT32_C( 1636080916) }, { INT32_C( 1562219921), -INT32_C( 1487539812), -INT32_C( 1905837497), -INT32_C( 1455137935) }, { INT32_C( 1545537742), -INT32_C( 1463072244), -INT32_C( 1936243328), -INT32_C( 1429773888) }, { INT32_C( 1545408438), -INT32_C( 1462882596), -INT32_C( 1936479003), -INT32_C( 1429577292) }, { INT32_C( 1545406402), -INT32_C( 1462879610), -INT32_C( 1936482715), -INT32_C( 1429574195) }, { INT32_C( 1545406386), -INT32_C( 1462879586), -INT32_C( 1936482744), -INT32_C( 1429574171) } }, { { INT32_C( 133863785), INT32_C( 464570835), INT32_C( 1286030183), INT32_C( 1442214320) }, { INT32_C( 1318394345), INT32_C( 2062517208), INT32_C( 1861145986), -INT32_C( 1609799369) }, { INT32_C( 154463697), INT32_C( 496797666), INT32_C( 1315110589), INT32_C( 1417061205) }, { INT32_C( 134024722), INT32_C( 464822607), INT32_C( 1286257374), INT32_C( 1442017811) }, { INT32_C( 133866300), INT32_C( 464574769), INT32_C( 1286033733), INT32_C( 1442211250) }, { INT32_C( 133863805), INT32_C( 464570866), INT32_C( 1286030211), INT32_C( 1442214296) }, { INT32_C( 133863785), INT32_C( 464570835), INT32_C( 1286030183), INT32_C( 1442214320) } }, { { -INT32_C( 693697022), INT32_C( 921786319), INT32_C( 1266850202), -INT32_C( 207587318) }, { INT32_C( 1765881489), INT32_C( 1004744889), INT32_C( 1017762053), INT32_C( 1037874491) }, { -INT32_C( 666105124), INT32_C( 937485458), INT32_C( 1282752734), -INT32_C( 191370529) }, { -INT32_C( 693481460), INT32_C( 921908969), INT32_C( 1266974441), -INT32_C( 207460624) }, { -INT32_C( 693693654), INT32_C( 921788235), INT32_C( 1266852143), -INT32_C( 207585338) }, { -INT32_C( 693696996), INT32_C( 921786334), INT32_C( 1266850217), -INT32_C( 207587303) }, { -INT32_C( 693697022), INT32_C( 921786319), INT32_C( 1266850202), -INT32_C( 207587318) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r6 = simde_vrsraq_n_s32(a, b, 6); simde_int32x4_t r13 = simde_vrsraq_n_s32(a, b, 13); simde_int32x4_t r19 = simde_vrsraq_n_s32(a, b, 19); simde_int32x4_t r26 = simde_vrsraq_n_s32(a, b, 26); simde_int32x4_t r32 = simde_vrsraq_n_s32(a, b, 32); simde_test_arm_neon_assert_equal_i32x4(r6, simde_vld1q_s32(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i32x4(r13, simde_vld1q_s32(test_vec[i].r13)); simde_test_arm_neon_assert_equal_i32x4(r19, simde_vld1q_s32(test_vec[i].r19)); simde_test_arm_neon_assert_equal_i32x4(r26, simde_vld1q_s32(test_vec[i].r26)); simde_test_arm_neon_assert_equal_i32x4(r32, simde_vld1q_s32(test_vec[i].r32)); } return 0; } static int test_simde_vrsraq_n_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; int64_t b[2]; int64_t r13[2]; int64_t r26[2]; int64_t r39[2]; int64_t r52[2]; int64_t r64[2]; } test_vec[] = { { { INT64_C( 4041353125507396115), -INT64_C( 5314258365561655216) }, { INT64_C( 7313830278785867243), INT64_C( 2743932506438344679) }, { INT64_C( 4042245927055099468), -INT64_C( 5313923412863115379) }, { INT64_C( 4041353234491960044), -INT64_C( 5314258324673874633) }, { INT64_C( 4041353125520699895), -INT64_C( 5314258365556664032) }, { INT64_C( 4041353125507397739), -INT64_C( 5314258365561654607) }, { INT64_C( 4041353125507396115), -INT64_C( 5314258365561655216) } }, { { -INT64_C( 1057029105044103974), INT64_C( 4845984625688142173) }, { -INT64_C( 6860630707947583614), -INT64_C( 793388787920162364) }, { -INT64_C( 1057866584378570232), INT64_C( 4845887776470866763) }, { -INT64_C( 1057029207275468045), INT64_C( 4845984613865727955) }, { -INT64_C( 1057029105056583389), INT64_C( 4845984625686699007) }, { -INT64_C( 1057029105044105497), INT64_C( 4845984625688141997) }, { -INT64_C( 1057029105044103974), INT64_C( 4845984625688142173) } }, { { -INT64_C( 6863375594008046291), -INT64_C( 3647799926837352763) }, { -INT64_C( 1330253858463349456), INT64_C( 5025104695941973679) }, { -INT64_C( 6863537978512253243), -INT64_C( 3647186510736773909) }, { -INT64_C( 6863375613830373465), -INT64_C( 3647799851957457673) }, { -INT64_C( 6863375594010466009), -INT64_C( 3647799926828212151) }, { -INT64_C( 6863375594008046586), -INT64_C( 3647799926837351647) }, { -INT64_C( 6863375594008046291), -INT64_C( 3647799926837352763) } }, { { -INT64_C( 704015982714893225), -INT64_C( 156654237620411461) }, { -INT64_C( 5508061753547291016), INT64_C( 7895716240167752758) }, { -INT64_C( 704688353534418041), -INT64_C( 155690405071562858) }, { -INT64_C( 704016064791409280), -INT64_C( 156654119965071025) }, { -INT64_C( 704015982724912331), -INT64_C( 156654237606049237) }, { -INT64_C( 704015982714894448), -INT64_C( 156654237620409708) }, { -INT64_C( 704015982714893225), -INT64_C( 156654237620411461) } }, { { INT64_C( 4047682171115644932), INT64_C( 3129739728459284389) }, { -INT64_C( 301919548837374440), -INT64_C( 8704088626041369935) }, { INT64_C( 4047645315701968495), INT64_C( 3128677217640675823) }, { INT64_C( 4047682166616693067), INT64_C( 3129739598758256727) }, { INT64_C( 4047682171115095744), INT64_C( 3129739728443451744) }, { INT64_C( 4047682171115644865), INT64_C( 3129739728459282456) }, { INT64_C( 4047682171115644932), INT64_C( 3129739728459284389) } }, { { INT64_C( 2466539369553214597), -INT64_C( 6843777593601918936) }, { -INT64_C( 2153973854152829862), -INT64_C( 4116046786669746985) }, { INT64_C( 2466276433291721332), -INT64_C( 6844280040719432333) }, { INT64_C( 2466539337456502989), -INT64_C( 6843777654935795586) }, { INT64_C( 2466539369549296541), -INT64_C( 6843777593609405981) }, { INT64_C( 2466539369553214119), -INT64_C( 6843777593601919850) }, { INT64_C( 2466539369553214597), -INT64_C( 6843777593601918936) } }, { { -INT64_C( 5928704661656134375), -INT64_C( 6006236945602556294) }, { -INT64_C( 4145872490280033437), INT64_C( 1608630576584073035) }, { -INT64_C( 5929210749606608012), -INT64_C( 6006040579565375621) }, { -INT64_C( 5928704723434448642), -INT64_C( 6006236921632092771) }, { -INT64_C( 5928704661663675673), -INT64_C( 6006236945599630212) }, { -INT64_C( 5928704661656135296), -INT64_C( 6006236945602555937) }, { -INT64_C( 5928704661656134375), -INT64_C( 6006236945602556294) } }, { { -INT64_C( 7435330705155089507), INT64_C( 5454495663009847732) }, { -INT64_C( 5810957003708727643), INT64_C( 1839315258154187636) }, { -INT64_C( 7436040050492456295), INT64_C( 5454720188798196632) }, { -INT64_C( 7435330791745096510), INT64_C( 5454495690417780880) }, { -INT64_C( 7435330705165659576), INT64_C( 5454495663013193427) }, { -INT64_C( 7435330705155090797), INT64_C( 5454495663009848140) }, { -INT64_C( 7435330705155089507), INT64_C( 5454495663009847732) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_int64x2_t r13 = simde_vrsraq_n_s64(a, b, 13); simde_int64x2_t r26 = simde_vrsraq_n_s64(a, b, 26); simde_int64x2_t r39 = simde_vrsraq_n_s64(a, b, 39); simde_int64x2_t r52 = simde_vrsraq_n_s64(a, b, 52); simde_int64x2_t r64 = simde_vrsraq_n_s64(a, b, 64); simde_test_arm_neon_assert_equal_i64x2(r13, simde_vld1q_s64(test_vec[i].r13)); simde_test_arm_neon_assert_equal_i64x2(r26, simde_vld1q_s64(test_vec[i].r26)); simde_test_arm_neon_assert_equal_i64x2(r39, simde_vld1q_s64(test_vec[i].r39)); simde_test_arm_neon_assert_equal_i64x2(r52, simde_vld1q_s64(test_vec[i].r52)); simde_test_arm_neon_assert_equal_i64x2(r64, simde_vld1q_s64(test_vec[i].r64)); } return 0; } static int test_simde_vrsraq_n_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r1[16]; uint8_t r3[16]; uint8_t r5[16]; uint8_t r6[16]; uint8_t r8[16]; } test_vec[] = { { { UINT8_C(218), UINT8_C(246), UINT8_C(246), UINT8_C( 23), UINT8_C(127), UINT8_C(186), UINT8_C(117), UINT8_C( 23), UINT8_C( 43), UINT8_C( 33), UINT8_C( 60), UINT8_C(103), UINT8_C(131), UINT8_C(168), UINT8_C(165), UINT8_C( 10) }, { UINT8_C( 62), UINT8_C(159), UINT8_C( 43), UINT8_C( 70), UINT8_C(213), UINT8_C( 69), UINT8_C(138), UINT8_C( 70), UINT8_C( 18), UINT8_C(191), UINT8_C( 88), UINT8_C(148), UINT8_C( 8), UINT8_C(195), UINT8_C( 63), UINT8_C(227) }, { UINT8_C(249), UINT8_C( 70), UINT8_C( 12), UINT8_C( 58), UINT8_C(234), UINT8_C(221), UINT8_C(186), UINT8_C( 58), UINT8_C( 52), UINT8_C(129), UINT8_C(104), UINT8_C(177), UINT8_C(135), UINT8_C( 10), UINT8_C(197), UINT8_C(124) }, { UINT8_C(226), UINT8_C( 10), UINT8_C(251), UINT8_C( 32), UINT8_C(154), UINT8_C(195), UINT8_C(134), UINT8_C( 32), UINT8_C( 45), UINT8_C( 57), UINT8_C( 71), UINT8_C(122), UINT8_C(132), UINT8_C(192), UINT8_C(173), UINT8_C( 38) }, { UINT8_C(220), UINT8_C(251), UINT8_C(247), UINT8_C( 25), UINT8_C(134), UINT8_C(188), UINT8_C(121), UINT8_C( 25), UINT8_C( 44), UINT8_C( 39), UINT8_C( 63), UINT8_C(108), UINT8_C(131), UINT8_C(174), UINT8_C(167), UINT8_C( 17) }, { UINT8_C(219), UINT8_C(248), UINT8_C(247), UINT8_C( 24), UINT8_C(130), UINT8_C(187), UINT8_C(119), UINT8_C( 24), UINT8_C( 43), UINT8_C( 36), UINT8_C( 61), UINT8_C(105), UINT8_C(131), UINT8_C(171), UINT8_C(166), UINT8_C( 14) }, { UINT8_C(218), UINT8_C(247), UINT8_C(246), UINT8_C( 23), UINT8_C(128), UINT8_C(186), UINT8_C(118), UINT8_C( 23), UINT8_C( 43), UINT8_C( 34), UINT8_C( 60), UINT8_C(104), UINT8_C(131), UINT8_C(169), UINT8_C(165), UINT8_C( 11) } }, { { UINT8_C(186), UINT8_C( 53), UINT8_C(250), UINT8_C( 57), UINT8_C(239), UINT8_C(112), UINT8_C( 80), UINT8_C( 27), UINT8_C(145), UINT8_C(141), UINT8_C(130), UINT8_C( 20), UINT8_C( 53), UINT8_C( 39), UINT8_C( 30), UINT8_C(115) }, { UINT8_C(199), UINT8_C( 73), UINT8_C(186), UINT8_C(156), UINT8_C(142), UINT8_C( 68), UINT8_C(226), UINT8_C(161), UINT8_C( 3), UINT8_C( 59), UINT8_C( 53), UINT8_C( 12), UINT8_C(254), UINT8_C(116), UINT8_C(239), UINT8_C(184) }, { UINT8_C( 30), UINT8_C( 90), UINT8_C( 87), UINT8_C(135), UINT8_C( 54), UINT8_C(146), UINT8_C(193), UINT8_C(108), UINT8_C(147), UINT8_C(171), UINT8_C(157), UINT8_C( 26), UINT8_C(180), UINT8_C( 97), UINT8_C(150), UINT8_C(207) }, { UINT8_C(211), UINT8_C( 62), UINT8_C( 17), UINT8_C( 77), UINT8_C( 1), UINT8_C(121), UINT8_C(108), UINT8_C( 47), UINT8_C(145), UINT8_C(148), UINT8_C(137), UINT8_C( 22), UINT8_C( 85), UINT8_C( 54), UINT8_C( 60), UINT8_C(138) }, { UINT8_C(192), UINT8_C( 55), UINT8_C( 0), UINT8_C( 62), UINT8_C(243), UINT8_C(114), UINT8_C( 87), UINT8_C( 32), UINT8_C(145), UINT8_C(143), UINT8_C(132), UINT8_C( 20), UINT8_C( 61), UINT8_C( 43), UINT8_C( 37), UINT8_C(121) }, { UINT8_C(189), UINT8_C( 54), UINT8_C(253), UINT8_C( 59), UINT8_C(241), UINT8_C(113), UINT8_C( 84), UINT8_C( 30), UINT8_C(145), UINT8_C(142), UINT8_C(131), UINT8_C( 20), UINT8_C( 57), UINT8_C( 41), UINT8_C( 34), UINT8_C(118) }, { UINT8_C(187), UINT8_C( 53), UINT8_C(251), UINT8_C( 58), UINT8_C(240), UINT8_C(112), UINT8_C( 81), UINT8_C( 28), UINT8_C(145), UINT8_C(141), UINT8_C(130), UINT8_C( 20), UINT8_C( 54), UINT8_C( 39), UINT8_C( 31), UINT8_C(116) } }, { { UINT8_C(170), UINT8_C(233), UINT8_C(241), UINT8_C(153), UINT8_C( 89), UINT8_C( 66), UINT8_C(180), UINT8_C(234), UINT8_C(207), UINT8_C( 54), UINT8_C(254), UINT8_C( 4), UINT8_C( 94), UINT8_C( 29), UINT8_C(120), UINT8_C( 37) }, { UINT8_C(102), UINT8_C( 50), UINT8_C(193), UINT8_C(245), UINT8_C(118), UINT8_C(163), UINT8_C(150), UINT8_C(122), UINT8_C(222), UINT8_C(203), UINT8_C(134), UINT8_C(221), UINT8_C( 64), UINT8_C(117), UINT8_C(149), UINT8_C(234) }, { UINT8_C(221), UINT8_C( 2), UINT8_C( 82), UINT8_C( 20), UINT8_C(148), UINT8_C(148), UINT8_MAX, UINT8_C( 39), UINT8_C( 62), UINT8_C(156), UINT8_C( 65), UINT8_C(115), UINT8_C(126), UINT8_C( 88), UINT8_C(195), UINT8_C(154) }, { UINT8_C(183), UINT8_C(239), UINT8_C( 9), UINT8_C(184), UINT8_C(104), UINT8_C( 86), UINT8_C(199), UINT8_C(249), UINT8_C(235), UINT8_C( 79), UINT8_C( 15), UINT8_C( 32), UINT8_C(102), UINT8_C( 44), UINT8_C(139), UINT8_C( 66) }, { UINT8_C(173), UINT8_C(235), UINT8_C(247), UINT8_C(161), UINT8_C( 93), UINT8_C( 71), UINT8_C(185), UINT8_C(238), UINT8_C(214), UINT8_C( 60), UINT8_C( 2), UINT8_C( 11), UINT8_C( 96), UINT8_C( 33), UINT8_C(125), UINT8_C( 44) }, { UINT8_C(172), UINT8_C(234), UINT8_C(244), UINT8_C(157), UINT8_C( 91), UINT8_C( 69), UINT8_C(182), UINT8_C(236), UINT8_C(210), UINT8_C( 57), UINT8_C( 0), UINT8_C( 7), UINT8_C( 95), UINT8_C( 31), UINT8_C(122), UINT8_C( 41) }, { UINT8_C(170), UINT8_C(233), UINT8_C(242), UINT8_C(154), UINT8_C( 89), UINT8_C( 67), UINT8_C(181), UINT8_C(234), UINT8_C(208), UINT8_C( 55), UINT8_MAX, UINT8_C( 5), UINT8_C( 94), UINT8_C( 29), UINT8_C(121), UINT8_C( 38) } }, { { UINT8_C( 94), UINT8_C(135), UINT8_C(131), UINT8_C(184), UINT8_C(201), UINT8_C( 56), UINT8_C(162), UINT8_C(152), UINT8_C(110), UINT8_C(161), UINT8_C(156), UINT8_C(204), UINT8_C(190), UINT8_C( 20), UINT8_C(241), UINT8_C( 36) }, { UINT8_C( 70), UINT8_C(178), UINT8_C( 25), UINT8_C(189), UINT8_C( 86), UINT8_C(175), UINT8_C( 55), UINT8_C( 52), UINT8_C(123), UINT8_C(189), UINT8_C( 17), UINT8_C(187), UINT8_C( 50), UINT8_C(167), UINT8_C(165), UINT8_C(144) }, { UINT8_C(129), UINT8_C(224), UINT8_C(144), UINT8_C( 23), UINT8_C(244), UINT8_C(144), UINT8_C(190), UINT8_C(178), UINT8_C(172), UINT8_C( 0), UINT8_C(165), UINT8_C( 42), UINT8_C(215), UINT8_C(104), UINT8_C( 68), UINT8_C(108) }, { UINT8_C(103), UINT8_C(157), UINT8_C(134), UINT8_C(208), UINT8_C(212), UINT8_C( 78), UINT8_C(169), UINT8_C(159), UINT8_C(125), UINT8_C(185), UINT8_C(158), UINT8_C(227), UINT8_C(196), UINT8_C( 41), UINT8_C( 6), UINT8_C( 54) }, { UINT8_C( 96), UINT8_C(141), UINT8_C(132), UINT8_C(190), UINT8_C(204), UINT8_C( 61), UINT8_C(164), UINT8_C(154), UINT8_C(114), UINT8_C(167), UINT8_C(157), UINT8_C(210), UINT8_C(192), UINT8_C( 25), UINT8_C(246), UINT8_C( 41) }, { UINT8_C( 95), UINT8_C(138), UINT8_C(131), UINT8_C(187), UINT8_C(202), UINT8_C( 59), UINT8_C(163), UINT8_C(153), UINT8_C(112), UINT8_C(164), UINT8_C(156), UINT8_C(207), UINT8_C(191), UINT8_C( 23), UINT8_C(244), UINT8_C( 38) }, { UINT8_C( 94), UINT8_C(136), UINT8_C(131), UINT8_C(185), UINT8_C(201), UINT8_C( 57), UINT8_C(162), UINT8_C(152), UINT8_C(110), UINT8_C(162), UINT8_C(156), UINT8_C(205), UINT8_C(190), UINT8_C( 21), UINT8_C(242), UINT8_C( 37) } }, { { UINT8_C( 46), UINT8_C( 40), UINT8_C( 72), UINT8_C(247), UINT8_C( 96), UINT8_C(235), UINT8_C(143), UINT8_C(207), UINT8_C(140), UINT8_C( 43), UINT8_C(155), UINT8_C( 74), UINT8_C( 64), UINT8_C(141), UINT8_C(110), UINT8_C(134) }, { UINT8_C( 63), UINT8_C(136), UINT8_C( 67), UINT8_C(149), UINT8_C( 55), UINT8_C(122), UINT8_C(202), UINT8_C(178), UINT8_C( 55), UINT8_C(219), UINT8_C(109), UINT8_C(105), UINT8_C(130), UINT8_C( 18), UINT8_C(250), UINT8_C(176) }, { UINT8_C( 78), UINT8_C(108), UINT8_C(106), UINT8_C( 66), UINT8_C(124), UINT8_C( 40), UINT8_C(244), UINT8_C( 40), UINT8_C(168), UINT8_C(153), UINT8_C(210), UINT8_C(127), UINT8_C(129), UINT8_C(150), UINT8_C(235), UINT8_C(222) }, { UINT8_C( 54), UINT8_C( 57), UINT8_C( 80), UINT8_C( 10), UINT8_C(103), UINT8_C(250), UINT8_C(168), UINT8_C(229), UINT8_C(147), UINT8_C( 70), UINT8_C(169), UINT8_C( 87), UINT8_C( 80), UINT8_C(143), UINT8_C(141), UINT8_C(156) }, { UINT8_C( 48), UINT8_C( 44), UINT8_C( 74), UINT8_C(252), UINT8_C( 98), UINT8_C(239), UINT8_C(149), UINT8_C(213), UINT8_C(142), UINT8_C( 50), UINT8_C(158), UINT8_C( 77), UINT8_C( 68), UINT8_C(142), UINT8_C(118), UINT8_C(140) }, { UINT8_C( 47), UINT8_C( 42), UINT8_C( 73), UINT8_C(249), UINT8_C( 97), UINT8_C(237), UINT8_C(146), UINT8_C(210), UINT8_C(141), UINT8_C( 46), UINT8_C(157), UINT8_C( 76), UINT8_C( 66), UINT8_C(141), UINT8_C(114), UINT8_C(137) }, { UINT8_C( 46), UINT8_C( 41), UINT8_C( 72), UINT8_C(248), UINT8_C( 96), UINT8_C(235), UINT8_C(144), UINT8_C(208), UINT8_C(140), UINT8_C( 44), UINT8_C(155), UINT8_C( 74), UINT8_C( 65), UINT8_C(141), UINT8_C(111), UINT8_C(135) } }, { { UINT8_C( 59), UINT8_C( 66), UINT8_C(167), UINT8_C(155), UINT8_C( 45), UINT8_C( 54), UINT8_C(106), UINT8_C(185), UINT8_C( 98), UINT8_C( 6), UINT8_C( 3), UINT8_C(162), UINT8_C(147), UINT8_C(114), UINT8_C( 40), UINT8_C(210) }, { UINT8_C(250), UINT8_C(108), UINT8_C(104), UINT8_C( 49), UINT8_C(230), UINT8_C( 50), UINT8_C(228), UINT8_C( 30), UINT8_C( 13), UINT8_C( 81), UINT8_C(135), UINT8_C(144), UINT8_C(100), UINT8_C(129), UINT8_C( 64), UINT8_C(159) }, { UINT8_C(184), UINT8_C(120), UINT8_C(219), UINT8_C(180), UINT8_C(160), UINT8_C( 79), UINT8_C(220), UINT8_C(200), UINT8_C(105), UINT8_C( 47), UINT8_C( 71), UINT8_C(234), UINT8_C(197), UINT8_C(179), UINT8_C( 72), UINT8_C( 34) }, { UINT8_C( 90), UINT8_C( 80), UINT8_C(180), UINT8_C(161), UINT8_C( 74), UINT8_C( 60), UINT8_C(135), UINT8_C(189), UINT8_C(100), UINT8_C( 16), UINT8_C( 20), UINT8_C(180), UINT8_C(160), UINT8_C(130), UINT8_C( 48), UINT8_C(230) }, { UINT8_C( 67), UINT8_C( 69), UINT8_C(170), UINT8_C(157), UINT8_C( 52), UINT8_C( 56), UINT8_C(113), UINT8_C(186), UINT8_C( 98), UINT8_C( 9), UINT8_C( 7), UINT8_C(167), UINT8_C(150), UINT8_C(118), UINT8_C( 42), UINT8_C(215) }, { UINT8_C( 63), UINT8_C( 68), UINT8_C(169), UINT8_C(156), UINT8_C( 49), UINT8_C( 55), UINT8_C(110), UINT8_C(185), UINT8_C( 98), UINT8_C( 7), UINT8_C( 5), UINT8_C(164), UINT8_C(149), UINT8_C(116), UINT8_C( 41), UINT8_C(212) }, { UINT8_C( 60), UINT8_C( 66), UINT8_C(167), UINT8_C(155), UINT8_C( 46), UINT8_C( 54), UINT8_C(107), UINT8_C(185), UINT8_C( 98), UINT8_C( 6), UINT8_C( 4), UINT8_C(163), UINT8_C(147), UINT8_C(115), UINT8_C( 40), UINT8_C(211) } }, { { UINT8_C(196), UINT8_C(232), UINT8_C( 58), UINT8_C(241), UINT8_C( 30), UINT8_C(165), UINT8_C(171), UINT8_C(128), UINT8_C(171), UINT8_C(174), UINT8_C( 34), UINT8_C( 62), UINT8_C( 32), UINT8_C( 75), UINT8_C( 16), UINT8_C( 26) }, { UINT8_C(183), UINT8_C(120), UINT8_C( 76), UINT8_C(157), UINT8_C(170), UINT8_C( 48), UINT8_C(187), UINT8_C(184), UINT8_C(129), UINT8_C( 67), UINT8_C( 72), UINT8_C(229), UINT8_C(196), UINT8_C(136), UINT8_C(132), UINT8_C(136) }, { UINT8_C( 32), UINT8_C( 36), UINT8_C( 96), UINT8_C( 64), UINT8_C(115), UINT8_C(189), UINT8_C( 9), UINT8_C(220), UINT8_C(236), UINT8_C(208), UINT8_C( 70), UINT8_C(177), UINT8_C(130), UINT8_C(143), UINT8_C( 82), UINT8_C( 94) }, { UINT8_C(219), UINT8_C(247), UINT8_C( 68), UINT8_C( 5), UINT8_C( 51), UINT8_C(171), UINT8_C(194), UINT8_C(151), UINT8_C(187), UINT8_C(182), UINT8_C( 43), UINT8_C( 91), UINT8_C( 57), UINT8_C( 92), UINT8_C( 33), UINT8_C( 43) }, { UINT8_C(202), UINT8_C(236), UINT8_C( 60), UINT8_C(246), UINT8_C( 35), UINT8_C(167), UINT8_C(177), UINT8_C(134), UINT8_C(175), UINT8_C(176), UINT8_C( 36), UINT8_C( 69), UINT8_C( 38), UINT8_C( 79), UINT8_C( 20), UINT8_C( 30) }, { UINT8_C(199), UINT8_C(234), UINT8_C( 59), UINT8_C(243), UINT8_C( 33), UINT8_C(166), UINT8_C(174), UINT8_C(131), UINT8_C(173), UINT8_C(175), UINT8_C( 35), UINT8_C( 66), UINT8_C( 35), UINT8_C( 77), UINT8_C( 18), UINT8_C( 28) }, { UINT8_C(197), UINT8_C(232), UINT8_C( 58), UINT8_C(242), UINT8_C( 31), UINT8_C(165), UINT8_C(172), UINT8_C(129), UINT8_C(172), UINT8_C(174), UINT8_C( 34), UINT8_C( 63), UINT8_C( 33), UINT8_C( 76), UINT8_C( 17), UINT8_C( 27) } }, { { UINT8_C(112), UINT8_C(191), UINT8_C(122), UINT8_C(143), UINT8_C(100), UINT8_C( 37), UINT8_C( 15), UINT8_C( 15), UINT8_C(211), UINT8_C( 50), UINT8_C( 77), UINT8_C(244), UINT8_C(125), UINT8_C( 93), UINT8_C( 14), UINT8_C( 52) }, { UINT8_C(214), UINT8_C( 90), UINT8_C(209), UINT8_C(128), UINT8_C(138), UINT8_C(141), UINT8_C( 56), UINT8_C( 12), UINT8_C(208), UINT8_C(128), UINT8_C(241), UINT8_C(148), UINT8_C( 9), UINT8_C(118), UINT8_C( 29), UINT8_C(121) }, { UINT8_C(219), UINT8_C(236), UINT8_C(227), UINT8_C(207), UINT8_C(169), UINT8_C(108), UINT8_C( 43), UINT8_C( 21), UINT8_C( 59), UINT8_C(114), UINT8_C(198), UINT8_C( 62), UINT8_C(130), UINT8_C(152), UINT8_C( 29), UINT8_C(113) }, { UINT8_C(139), UINT8_C(202), UINT8_C(148), UINT8_C(159), UINT8_C(117), UINT8_C( 55), UINT8_C( 22), UINT8_C( 17), UINT8_C(237), UINT8_C( 66), UINT8_C(107), UINT8_C( 7), UINT8_C(126), UINT8_C(108), UINT8_C( 18), UINT8_C( 67) }, { UINT8_C(119), UINT8_C(194), UINT8_C(129), UINT8_C(147), UINT8_C(104), UINT8_C( 41), UINT8_C( 17), UINT8_C( 15), UINT8_C(218), UINT8_C( 54), UINT8_C( 85), UINT8_C(249), UINT8_C(125), UINT8_C( 97), UINT8_C( 15), UINT8_C( 56) }, { UINT8_C(115), UINT8_C(192), UINT8_C(125), UINT8_C(145), UINT8_C(102), UINT8_C( 39), UINT8_C( 16), UINT8_C( 15), UINT8_C(214), UINT8_C( 52), UINT8_C( 81), UINT8_C(246), UINT8_C(125), UINT8_C( 95), UINT8_C( 14), UINT8_C( 54) }, { UINT8_C(113), UINT8_C(191), UINT8_C(123), UINT8_C(144), UINT8_C(101), UINT8_C( 38), UINT8_C( 15), UINT8_C( 15), UINT8_C(212), UINT8_C( 51), UINT8_C( 78), UINT8_C(245), UINT8_C(125), UINT8_C( 93), UINT8_C( 14), UINT8_C( 52) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r1 = simde_vrsraq_n_u8(a, b, 1); simde_uint8x16_t r3 = simde_vrsraq_n_u8(a, b, 3); simde_uint8x16_t r5 = simde_vrsraq_n_u8(a, b, 5); simde_uint8x16_t r6 = simde_vrsraq_n_u8(a, b, 6); simde_uint8x16_t r8 = simde_vrsraq_n_u8(a, b, 8); simde_test_arm_neon_assert_equal_u8x16(r1, simde_vld1q_u8(test_vec[i].r1)); simde_test_arm_neon_assert_equal_u8x16(r3, simde_vld1q_u8(test_vec[i].r3)); simde_test_arm_neon_assert_equal_u8x16(r5, simde_vld1q_u8(test_vec[i].r5)); simde_test_arm_neon_assert_equal_u8x16(r6, simde_vld1q_u8(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u8x16(r8, simde_vld1q_u8(test_vec[i].r8)); } return 0; } static int test_simde_vrsraq_n_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint16_t b[8]; uint16_t r3[8]; uint16_t r6[8]; uint16_t r10[8]; uint16_t r13[8]; uint16_t r16[8]; } test_vec[] = { { { UINT16_C(45310), UINT16_C(11477), UINT16_C(43813), UINT16_C( 3311), UINT16_C(63489), UINT16_C(61402), UINT16_C(30085), UINT16_C( 6127) }, { UINT16_C( 6624), UINT16_C(38234), UINT16_C(30401), UINT16_C(39166), UINT16_C(25325), UINT16_C(32420), UINT16_C(63541), UINT16_C(13097) }, { UINT16_C(46138), UINT16_C(16256), UINT16_C(47613), UINT16_C( 8207), UINT16_C( 1119), UINT16_C(65455), UINT16_C(38028), UINT16_C( 7764) }, { UINT16_C(45414), UINT16_C(12074), UINT16_C(44288), UINT16_C( 3923), UINT16_C(63885), UINT16_C(61909), UINT16_C(31078), UINT16_C( 6332) }, { UINT16_C(45316), UINT16_C(11514), UINT16_C(43843), UINT16_C( 3349), UINT16_C(63514), UINT16_C(61434), UINT16_C(30147), UINT16_C( 6140) }, { UINT16_C(45311), UINT16_C(11482), UINT16_C(43817), UINT16_C( 3316), UINT16_C(63492), UINT16_C(61406), UINT16_C(30093), UINT16_C( 6129) }, { UINT16_C(45310), UINT16_C(11478), UINT16_C(43813), UINT16_C( 3312), UINT16_C(63489), UINT16_C(61402), UINT16_C(30086), UINT16_C( 6127) } }, { { UINT16_C(65192), UINT16_C(52575), UINT16_C(20138), UINT16_C(43993), UINT16_C(45894), UINT16_C(52123), UINT16_C(35369), UINT16_C( 2530) }, { UINT16_C(15523), UINT16_C(26014), UINT16_C(40115), UINT16_C(41213), UINT16_C(41471), UINT16_C(13342), UINT16_C(18585), UINT16_C(16743) }, { UINT16_C( 1596), UINT16_C(55827), UINT16_C(25152), UINT16_C(49145), UINT16_C(51078), UINT16_C(53791), UINT16_C(37692), UINT16_C( 4623) }, { UINT16_C(65435), UINT16_C(52981), UINT16_C(20765), UINT16_C(44637), UINT16_C(46542), UINT16_C(52331), UINT16_C(35659), UINT16_C( 2792) }, { UINT16_C(65207), UINT16_C(52600), UINT16_C(20177), UINT16_C(44033), UINT16_C(45934), UINT16_C(52136), UINT16_C(35387), UINT16_C( 2546) }, { UINT16_C(65194), UINT16_C(52578), UINT16_C(20143), UINT16_C(43998), UINT16_C(45899), UINT16_C(52125), UINT16_C(35371), UINT16_C( 2532) }, { UINT16_C(65192), UINT16_C(52575), UINT16_C(20139), UINT16_C(43994), UINT16_C(45895), UINT16_C(52123), UINT16_C(35369), UINT16_C( 2530) } }, { { UINT16_C(50758), UINT16_C(61454), UINT16_C(59412), UINT16_C(23196), UINT16_C(14235), UINT16_C(50213), UINT16_C( 2241), UINT16_C(26061) }, { UINT16_C(27716), UINT16_C(63434), UINT16_C(50952), UINT16_C( 1943), UINT16_C(46697), UINT16_C( 571), UINT16_C(41726), UINT16_C(17476) }, { UINT16_C(54223), UINT16_C( 3847), UINT16_C( 245), UINT16_C(23439), UINT16_C(20072), UINT16_C(50284), UINT16_C( 7457), UINT16_C(28246) }, { UINT16_C(51191), UINT16_C(62445), UINT16_C(60208), UINT16_C(23226), UINT16_C(14965), UINT16_C(50222), UINT16_C( 2893), UINT16_C(26334) }, { UINT16_C(50785), UINT16_C(61516), UINT16_C(59462), UINT16_C(23198), UINT16_C(14281), UINT16_C(50214), UINT16_C( 2282), UINT16_C(26078) }, { UINT16_C(50761), UINT16_C(61462), UINT16_C(59418), UINT16_C(23196), UINT16_C(14241), UINT16_C(50213), UINT16_C( 2246), UINT16_C(26063) }, { UINT16_C(50758), UINT16_C(61455), UINT16_C(59413), UINT16_C(23196), UINT16_C(14236), UINT16_C(50213), UINT16_C( 2242), UINT16_C(26061) } }, { { UINT16_C(21096), UINT16_C(31797), UINT16_C(53562), UINT16_C(54998), UINT16_C(64520), UINT16_C(51610), UINT16_C(26628), UINT16_C(18478) }, { UINT16_C(63700), UINT16_C(56384), UINT16_C(55232), UINT16_C(10724), UINT16_C( 8077), UINT16_C(35627), UINT16_C(28610), UINT16_C(10960) }, { UINT16_C(29059), UINT16_C(38845), UINT16_C(60466), UINT16_C(56339), UINT16_C(65530), UINT16_C(56063), UINT16_C(30204), UINT16_C(19848) }, { UINT16_C(22091), UINT16_C(32678), UINT16_C(54425), UINT16_C(55166), UINT16_C(64646), UINT16_C(52167), UINT16_C(27075), UINT16_C(18649) }, { UINT16_C(21158), UINT16_C(31852), UINT16_C(53616), UINT16_C(55008), UINT16_C(64528), UINT16_C(51645), UINT16_C(26656), UINT16_C(18489) }, { UINT16_C(21104), UINT16_C(31804), UINT16_C(53569), UINT16_C(54999), UINT16_C(64521), UINT16_C(51614), UINT16_C(26631), UINT16_C(18479) }, { UINT16_C(21097), UINT16_C(31798), UINT16_C(53563), UINT16_C(54998), UINT16_C(64520), UINT16_C(51611), UINT16_C(26628), UINT16_C(18478) } }, { { UINT16_C( 1474), UINT16_C(64679), UINT16_C(32214), UINT16_C(57042), UINT16_C(28025), UINT16_C(32167), UINT16_C(54997), UINT16_C(43462) }, { UINT16_C( 1742), UINT16_C(36485), UINT16_C(27101), UINT16_C(27575), UINT16_C(58249), UINT16_C(19446), UINT16_C(50770), UINT16_C( 5237) }, { UINT16_C( 1692), UINT16_C( 3704), UINT16_C(35602), UINT16_C(60489), UINT16_C(35306), UINT16_C(34598), UINT16_C(61343), UINT16_C(44117) }, { UINT16_C( 1501), UINT16_C(65249), UINT16_C(32637), UINT16_C(57473), UINT16_C(28935), UINT16_C(32471), UINT16_C(55790), UINT16_C(43544) }, { UINT16_C( 1476), UINT16_C(64715), UINT16_C(32240), UINT16_C(57069), UINT16_C(28082), UINT16_C(32186), UINT16_C(55047), UINT16_C(43467) }, { UINT16_C( 1474), UINT16_C(64683), UINT16_C(32217), UINT16_C(57045), UINT16_C(28032), UINT16_C(32169), UINT16_C(55003), UINT16_C(43463) }, { UINT16_C( 1474), UINT16_C(64680), UINT16_C(32214), UINT16_C(57042), UINT16_C(28026), UINT16_C(32167), UINT16_C(54998), UINT16_C(43462) } }, { { UINT16_C( 7371), UINT16_C(41233), UINT16_C(58266), UINT16_C( 4991), UINT16_C(10064), UINT16_C( 9617), UINT16_C(22525), UINT16_C(52174) }, { UINT16_C(21597), UINT16_C(14938), UINT16_C( 4541), UINT16_C(18085), UINT16_C(40180), UINT16_C(18321), UINT16_C( 1890), UINT16_C(11867) }, { UINT16_C(10071), UINT16_C(43100), UINT16_C(58834), UINT16_C( 7252), UINT16_C(15087), UINT16_C(11907), UINT16_C(22761), UINT16_C(53657) }, { UINT16_C( 7708), UINT16_C(41466), UINT16_C(58337), UINT16_C( 5274), UINT16_C(10692), UINT16_C( 9903), UINT16_C(22555), UINT16_C(52359) }, { UINT16_C( 7392), UINT16_C(41248), UINT16_C(58270), UINT16_C( 5009), UINT16_C(10103), UINT16_C( 9635), UINT16_C(22527), UINT16_C(52186) }, { UINT16_C( 7374), UINT16_C(41235), UINT16_C(58267), UINT16_C( 4993), UINT16_C(10069), UINT16_C( 9619), UINT16_C(22525), UINT16_C(52175) }, { UINT16_C( 7371), UINT16_C(41233), UINT16_C(58266), UINT16_C( 4991), UINT16_C(10065), UINT16_C( 9617), UINT16_C(22525), UINT16_C(52174) } }, { { UINT16_C(27683), UINT16_C(48591), UINT16_C(20304), UINT16_C(41169), UINT16_C(25206), UINT16_C(29638), UINT16_C(38073), UINT16_C( 5694) }, { UINT16_C(39144), UINT16_C(42576), UINT16_C(63146), UINT16_C(40684), UINT16_C(32402), UINT16_C(62693), UINT16_C(16773), UINT16_C(43042) }, { UINT16_C(32576), UINT16_C(53913), UINT16_C(28197), UINT16_C(46255), UINT16_C(29256), UINT16_C(37475), UINT16_C(40170), UINT16_C(11074) }, { UINT16_C(28295), UINT16_C(49256), UINT16_C(21291), UINT16_C(41805), UINT16_C(25712), UINT16_C(30618), UINT16_C(38335), UINT16_C( 6367) }, { UINT16_C(27721), UINT16_C(48633), UINT16_C(20366), UINT16_C(41209), UINT16_C(25238), UINT16_C(29699), UINT16_C(38089), UINT16_C( 5736) }, { UINT16_C(27688), UINT16_C(48596), UINT16_C(20312), UINT16_C(41174), UINT16_C(25210), UINT16_C(29646), UINT16_C(38075), UINT16_C( 5699) }, { UINT16_C(27684), UINT16_C(48592), UINT16_C(20305), UINT16_C(41170), UINT16_C(25206), UINT16_C(29639), UINT16_C(38073), UINT16_C( 5695) } }, { { UINT16_C(62125), UINT16_C(64870), UINT16_C(14145), UINT16_C(47006), UINT16_C(25753), UINT16_C(21034), UINT16_C(26872), UINT16_C(57704) }, { UINT16_C(47105), UINT16_C(43911), UINT16_C(29614), UINT16_C(16457), UINT16_C(12273), UINT16_C(30261), UINT16_C(22384), UINT16_C( 7455) }, { UINT16_C( 2477), UINT16_C( 4823), UINT16_C(17847), UINT16_C(49063), UINT16_C(27287), UINT16_C(24817), UINT16_C(29670), UINT16_C(58636) }, { UINT16_C(62861), UINT16_C( 20), UINT16_C(14608), UINT16_C(47263), UINT16_C(25945), UINT16_C(21507), UINT16_C(27222), UINT16_C(57820) }, { UINT16_C(62171), UINT16_C(64913), UINT16_C(14174), UINT16_C(47022), UINT16_C(25765), UINT16_C(21064), UINT16_C(26894), UINT16_C(57711) }, { UINT16_C(62131), UINT16_C(64875), UINT16_C(14149), UINT16_C(47008), UINT16_C(25754), UINT16_C(21038), UINT16_C(26875), UINT16_C(57705) }, { UINT16_C(62126), UINT16_C(64871), UINT16_C(14145), UINT16_C(47006), UINT16_C(25753), UINT16_C(21034), UINT16_C(26872), UINT16_C(57704) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r3 = simde_vrsraq_n_u16(a, b, 3); simde_uint16x8_t r6 = simde_vrsraq_n_u16(a, b, 6); simde_uint16x8_t r10 = simde_vrsraq_n_u16(a, b, 10); simde_uint16x8_t r13 = simde_vrsraq_n_u16(a, b, 13); simde_uint16x8_t r16 = simde_vrsraq_n_u16(a, b, 16); simde_test_arm_neon_assert_equal_u16x8(r3, simde_vld1q_u16(test_vec[i].r3)); simde_test_arm_neon_assert_equal_u16x8(r6, simde_vld1q_u16(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u16x8(r10, simde_vld1q_u16(test_vec[i].r10)); simde_test_arm_neon_assert_equal_u16x8(r13, simde_vld1q_u16(test_vec[i].r13)); simde_test_arm_neon_assert_equal_u16x8(r16, simde_vld1q_u16(test_vec[i].r16)); } return 0; } static int test_simde_vrsraq_n_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint32_t b[4]; uint32_t r6[4]; uint32_t r13[4]; uint32_t r19[4]; uint32_t r26[4]; uint32_t r32[4]; } test_vec[] = { { { UINT32_C(2454946824), UINT32_C( 868430946), UINT32_C( 94292273), UINT32_C(4194192115) }, { UINT32_C(3439656751), UINT32_C(2286051294), UINT32_C(3408966289), UINT32_C(3316734140) }, { UINT32_C(2508691461), UINT32_C( 904150497), UINT32_C( 147557371), UINT32_C(4246016086) }, { UINT32_C(2455366704), UINT32_C( 868710005), UINT32_C( 94708407), UINT32_C(4194596990) }, { UINT32_C(2454953385), UINT32_C( 868435306), UINT32_C( 94298775), UINT32_C(4194198441) }, { UINT32_C(2454946875), UINT32_C( 868430980), UINT32_C( 94292324), UINT32_C(4194192164) }, { UINT32_C(2454946825), UINT32_C( 868430947), UINT32_C( 94292274), UINT32_C(4194192116) } }, { { UINT32_C(1062667484), UINT32_C(1769085496), UINT32_C(3597537507), UINT32_C(2312072282) }, { UINT32_C(1247269739), UINT32_C(3285358898), UINT32_C( 143524683), UINT32_C( 952975451) }, { UINT32_C(1082156074), UINT32_C(1820419229), UINT32_C(3599780080), UINT32_C(2326962523) }, { UINT32_C(1062819739), UINT32_C(1769486541), UINT32_C(3597555027), UINT32_C(2312188612) }, { UINT32_C(1062669863), UINT32_C(1769091762), UINT32_C(3597537781), UINT32_C(2312074100) }, { UINT32_C(1062667503), UINT32_C(1769085545), UINT32_C(3597537509), UINT32_C(2312072296) }, { UINT32_C(1062667484), UINT32_C(1769085497), UINT32_C(3597537507), UINT32_C(2312072282) } }, { { UINT32_C(2104960068), UINT32_C( 568781118), UINT32_C(1425495545), UINT32_C( 769509057) }, { UINT32_C(3413587097), UINT32_C( 428755405), UINT32_C(2820742220), UINT32_C(2715872860) }, { UINT32_C(2158297366), UINT32_C( 575480421), UINT32_C(1469569642), UINT32_C( 811944570) }, { UINT32_C(2105376766), UINT32_C( 568833456), UINT32_C(1425839874), UINT32_C( 769840584) }, { UINT32_C(2104966579), UINT32_C( 568781936), UINT32_C(1425500925), UINT32_C( 769514237) }, { UINT32_C(2104960119), UINT32_C( 568781124), UINT32_C(1425495587), UINT32_C( 769509097) }, { UINT32_C(2104960069), UINT32_C( 568781118), UINT32_C(1425495546), UINT32_C( 769509058) } }, { { UINT32_C(1344165650), UINT32_C( 963707968), UINT32_C( 462252121), UINT32_C(3343412014) }, { UINT32_C(1838333855), UINT32_C(1434853384), UINT32_C(2583537468), UINT32_C(2805652885) }, { UINT32_C(1372889616), UINT32_C( 986127552), UINT32_C( 502619894), UINT32_C(3387250340) }, { UINT32_C(1344390056), UINT32_C( 963883121), UINT32_C( 462567494), UINT32_C(3343754501) }, { UINT32_C(1344169156), UINT32_C( 963710705), UINT32_C( 462257049), UINT32_C(3343417365) }, { UINT32_C(1344165677), UINT32_C( 963707989), UINT32_C( 462252159), UINT32_C(3343412056) }, { UINT32_C(1344165650), UINT32_C( 963707968), UINT32_C( 462252122), UINT32_C(3343412015) } }, { { UINT32_C(1962367028), UINT32_C(3064817756), UINT32_C(4275125200), UINT32_C(1170545062) }, { UINT32_C(3769784280), UINT32_C(3006609527), UINT32_C(1951150815), UINT32_C(1125877263) }, { UINT32_C(2021269907), UINT32_C(3111796030), UINT32_C( 10644635), UINT32_C(1188136894) }, { UINT32_C(1962827207), UINT32_C(3065184774), UINT32_C(4275363378), UINT32_C(1170682498) }, { UINT32_C(1962374218), UINT32_C(3064823491), UINT32_C(4275128922), UINT32_C(1170547209) }, { UINT32_C(1962367084), UINT32_C(3064817801), UINT32_C(4275125229), UINT32_C(1170545079) }, { UINT32_C(1962367029), UINT32_C(3064817757), UINT32_C(4275125200), UINT32_C(1170545062) } }, { { UINT32_C(1001853662), UINT32_C(1257334138), UINT32_C(1179173536), UINT32_C(3012234715) }, { UINT32_C(3683860068), UINT32_C(1452263798), UINT32_C( 197843963), UINT32_C(1078912610) }, { UINT32_C(1059413976), UINT32_C(1280025760), UINT32_C(1182264848), UINT32_C(3029092725) }, { UINT32_C(1002303352), UINT32_C(1257511416), UINT32_C(1179197687), UINT32_C(3012366418) }, { UINT32_C(1001860688), UINT32_C(1257336908), UINT32_C(1179173913), UINT32_C(3012236773) }, { UINT32_C(1001853717), UINT32_C(1257334160), UINT32_C(1179173539), UINT32_C(3012234731) }, { UINT32_C(1001853663), UINT32_C(1257334138), UINT32_C(1179173536), UINT32_C(3012234715) } }, { { UINT32_C(1937442552), UINT32_C( 196963435), UINT32_C( 156304942), UINT32_C(2025643027) }, { UINT32_C(2438156314), UINT32_C( 350741017), UINT32_C( 538948030), UINT32_C(2422238871) }, { UINT32_C(1975538744), UINT32_C( 202443763), UINT32_C( 164726005), UINT32_C(2063490509) }, { UINT32_C(1937740179), UINT32_C( 197006250), UINT32_C( 156370732), UINT32_C(2025938710) }, { UINT32_C(1937447202), UINT32_C( 196964104), UINT32_C( 156305970), UINT32_C(2025647647) }, { UINT32_C(1937442588), UINT32_C( 196963440), UINT32_C( 156304950), UINT32_C(2025643063) }, { UINT32_C(1937442553), UINT32_C( 196963435), UINT32_C( 156304942), UINT32_C(2025643028) } }, { { UINT32_C(3741572212), UINT32_C(2011873352), UINT32_C(3665836998), UINT32_C( 844250391) }, { UINT32_C(2797839757), UINT32_C(1186638472), UINT32_C(4083604059), UINT32_C(3162752584) }, { UINT32_C(3785288458), UINT32_C(2030414578), UINT32_C(3729643311), UINT32_C( 893668400) }, { UINT32_C(3741913745), UINT32_C(2012018205), UINT32_C(3666335485), UINT32_C( 844636469) }, { UINT32_C(3741577548), UINT32_C(2011875615), UINT32_C(3665844787), UINT32_C( 844256423) }, { UINT32_C(3741572254), UINT32_C(2011873370), UINT32_C(3665837059), UINT32_C( 844250438) }, { UINT32_C(3741572213), UINT32_C(2011873352), UINT32_C(3665836999), UINT32_C( 844250392) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r6 = simde_vrsraq_n_u32(a, b, 6); simde_uint32x4_t r13 = simde_vrsraq_n_u32(a, b, 13); simde_uint32x4_t r19 = simde_vrsraq_n_u32(a, b, 19); simde_uint32x4_t r26 = simde_vrsraq_n_u32(a, b, 26); simde_uint32x4_t r32 = simde_vrsraq_n_u32(a, b, 32); simde_test_arm_neon_assert_equal_u32x4(r6, simde_vld1q_u32(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u32x4(r13, simde_vld1q_u32(test_vec[i].r13)); simde_test_arm_neon_assert_equal_u32x4(r19, simde_vld1q_u32(test_vec[i].r19)); simde_test_arm_neon_assert_equal_u32x4(r26, simde_vld1q_u32(test_vec[i].r26)); simde_test_arm_neon_assert_equal_u32x4(r32, simde_vld1q_u32(test_vec[i].r32)); } return 0; } static int test_simde_vrsraq_n_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; uint64_t b[2]; uint64_t r13[2]; uint64_t r26[2]; uint64_t r39[2]; uint64_t r52[2]; uint64_t r64[2]; } test_vec[] = { { { UINT64_C( 2393586352393986628), UINT64_C( 3643855986487485346) }, { UINT64_C( 8639460913876601398), UINT64_C(13301034549434564982) }, { UINT64_C( 2394640974087575080), UINT64_C( 3645479647931508120) }, { UINT64_C( 2393586481131986334), UINT64_C( 3643856184688345212) }, { UINT64_C( 2393586352409701716), UINT64_C( 3643855986511679787) }, { UINT64_C( 2393586352393988546), UINT64_C( 3643855986487488299) }, { UINT64_C( 2393586352393986628), UINT64_C( 3643855986487485347) } }, { { UINT64_C(11358642286101086310), UINT64_C(12703782061007496804) }, { UINT64_C(14399410231188048240), UINT64_C( 4460751483934020284) }, { UINT64_C(11360400026607823132), UINT64_C(12704326586335125469) }, { UINT64_C(11358642500669019261), UINT64_C(12703782127477873712) }, { UINT64_C(11358642286127278685), UINT64_C(12703782061015610864) }, { UINT64_C(11358642286101089507), UINT64_C(12703782061007497794) }, { UINT64_C(11358642286101086311), UINT64_C(12703782061007496804) } }, { { UINT64_C( 1557508458989728006), UINT64_C( 5220372188488593992) }, { UINT64_C( 4565939203298658373), UINT64_C( 6314141585117187211) }, { UINT64_C( 1558065824615130674), UINT64_C( 5221142957725058492) }, { UINT64_C( 1557508527027524076), UINT64_C( 5220372282576635553) }, { UINT64_C( 1557508458998033401), UINT64_C( 5220372188500079349) }, { UINT64_C( 1557508458989729020), UINT64_C( 5220372188488595394) }, { UINT64_C( 1557508458989728006), UINT64_C( 5220372188488593992) } }, { { UINT64_C( 634714805581143815), UINT64_C(13837094878020566759) }, { UINT64_C(12792108764284497092), UINT64_C( 5974222042828109119) }, { UINT64_C( 636276342295534012), UINT64_C(13837824153172279175) }, { UINT64_C( 634714996198418521), UINT64_C(13837094967043412427) }, { UINT64_C( 634714805604412525), UINT64_C(13837094878031433806) }, { UINT64_C( 634714805581146655), UINT64_C(13837094878020568086) }, { UINT64_C( 634714805581143816), UINT64_C(13837094878020566759) } }, { { UINT64_C( 2151550972685074450), UINT64_C(12038654939625203414) }, { UINT64_C( 6442246549575460752), UINT64_C( 4613886341815040618) }, { UINT64_C( 2152337379734583173), UINT64_C(12039218158172788258) }, { UINT64_C( 2151551068682028736), UINT64_C(12038655008377467523) }, { UINT64_C( 2151550972696792828), UINT64_C(12038654939633596024) }, { UINT64_C( 2151550972685075880), UINT64_C(12038654939625204438) }, { UINT64_C( 2151550972685074450), UINT64_C(12038654939625203414) } }, { { UINT64_C(13863469342447046929), UINT64_C(13050906310178062485) }, { UINT64_C(17076755345611361673), UINT64_C(14536397728258319948) }, { UINT64_C(13865553907308571753), UINT64_C(13052680772791375268) }, { UINT64_C(13863469596910531002), UINT64_C(13050906526787268212) }, { UINT64_C(13863469342478109366), UINT64_C(13050906310204504038) }, { UINT64_C(13863469342447050721), UINT64_C(13050906310178065713) }, { UINT64_C(13863469342447046930), UINT64_C(13050906310178062486) } }, { { UINT64_C( 5936998566614585017), UINT64_C( 2418125793131132340) }, { UINT64_C( 1685403765198682613), UINT64_C( 7361218993670194343) }, { UINT64_C( 5937204304378891497), UINT64_C( 2419024379434070596) }, { UINT64_C( 5936998591729058199), UINT64_C( 2418125902821843148) }, { UINT64_C( 5936998566617650749), UINT64_C( 2418125793144522319) }, { UINT64_C( 5936998566614585391), UINT64_C( 2418125793131133975) }, { UINT64_C( 5936998566614585017), UINT64_C( 2418125793131132340) } }, { { UINT64_C( 6929753040782702619), UINT64_C(15248282196733720035) }, { UINT64_C(14601705884549341632), UINT64_C(11368582454750819763) }, { UINT64_C( 6931535475583062646), UINT64_C(15249669963146653485) }, { UINT64_C( 6929753258365075710), UINT64_C(15248282366138799739) }, { UINT64_C( 6929753040809262967), UINT64_C(15248282196754399366) }, { UINT64_C( 6929753040782705861), UINT64_C(15248282196733722559) }, { UINT64_C( 6929753040782702620), UINT64_C(15248282196733720036) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_uint64x2_t r13 = simde_vrsraq_n_u64(a, b, 13); simde_uint64x2_t r26 = simde_vrsraq_n_u64(a, b, 26); simde_uint64x2_t r39 = simde_vrsraq_n_u64(a, b, 39); simde_uint64x2_t r52 = simde_vrsraq_n_u64(a, b, 52); simde_uint64x2_t r64 = simde_vrsraq_n_u64(a, b, 64); simde_test_arm_neon_assert_equal_u64x2(r13, simde_vld1q_u64(test_vec[i].r13)); simde_test_arm_neon_assert_equal_u64x2(r26, simde_vld1q_u64(test_vec[i].r26)); simde_test_arm_neon_assert_equal_u64x2(r39, simde_vld1q_u64(test_vec[i].r39)); simde_test_arm_neon_assert_equal_u64x2(r52, simde_vld1q_u64(test_vec[i].r52)); simde_test_arm_neon_assert_equal_u64x2(r64, simde_vld1q_u64(test_vec[i].r64)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vrsra_n_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vrsra_n_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vrsra_n_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vrsra_n_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vrsra_n_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vrsra_n_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vrsra_n_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vrsra_n_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vrsraq_n_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vrsraq_n_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vrsraq_n_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vrsraq_n_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vrsraq_n_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vrsraq_n_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vrsraq_n_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vrsraq_n_u64) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/arm/neon/run-tests-neon.h000066400000000000000000000001221400333146700203640ustar00rootroot00000000000000#include "../../munit/munit.h" MunitSuite* simde_tests_arm_neon_get_suite(void); simde-0.7.2/test/arm/neon/run-tests.c000066400000000000000000000020761400333146700174340ustar00rootroot00000000000000#include "test-neon.h" #include "run-tests.h" static MunitSuite suites[] = { #define SIMDE_TEST_DECLARE_SUITE(name) \ { NULL, NULL, NULL, 1, MUNIT_SUITE_OPTION_NONE }, \ { NULL, NULL, NULL, 1, MUNIT_SUITE_OPTION_NONE }, \ { NULL, NULL, NULL, 1, MUNIT_SUITE_OPTION_NONE }, \ { NULL, NULL, NULL, 1, MUNIT_SUITE_OPTION_NONE }, #include "declare-suites.h" #undef SIMDE_TEST_DECLARE_SUITE { NULL, NULL, NULL, 0, MUNIT_SUITE_OPTION_NONE } }; static MunitSuite suite = { "/neon", NULL, suites, 1, MUNIT_SUITE_OPTION_NONE }; MunitSuite* simde_tests_arm_neon_get_suite(void) { int i = 0; #define SIMDE_TEST_DECLARE_SUITE(name) \ suites[i++] = *HEDLEY_CONCAT3(simde_test_arm_neon_get_suite_, name, _native_c)(); \ suites[i++] = *HEDLEY_CONCAT3(simde_test_arm_neon_get_suite_, name, _native_cpp)(); \ suites[i++] = *HEDLEY_CONCAT3(simde_test_arm_neon_get_suite_, name, _emul_c)(); \ suites[i++] = *HEDLEY_CONCAT3(simde_test_arm_neon_get_suite_, name, _emul_cpp)(); #include "declare-suites.h" #undef SIMDE_TEST_DECLARE_SUITE return &suite; } simde-0.7.2/test/arm/neon/run-tests.h000066400000000000000000000003211400333146700174300ustar00rootroot00000000000000#if defined(SIMDE_TESTS_ARM_NEON_RUN_TESTS_H) #error File already included. #endif #define SIMDE_TESTS_ARM_NEON_RUN_TESTS_H #include "../../munit/munit.h" MunitSuite* simde_tests_arm_neon_get_suite(void); simde-0.7.2/test/arm/neon/set_lane.c000066400000000000000000002037301400333146700172620ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN set_lane #include "test-neon.h" #include "../../../simde/arm/neon/set_lane.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ static int test_simde_vset_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a; simde_float32 v[2]; simde_float32 r[2]; int lane; } test_vec[] = { { SIMDE_FLOAT32_C( -373.45), { SIMDE_FLOAT32_C( -29.14), SIMDE_FLOAT32_C( -643.20) }, { SIMDE_FLOAT32_C( -373.45), SIMDE_FLOAT32_C( -643.20) }, INT32_C( 0) }, { SIMDE_FLOAT32_C( -393.25), { SIMDE_FLOAT32_C( 559.43), SIMDE_FLOAT32_C( 188.28) }, { SIMDE_FLOAT32_C( 559.43), SIMDE_FLOAT32_C( -393.25) }, INT32_C( 1) }, { SIMDE_FLOAT32_C( -417.57), { SIMDE_FLOAT32_C( -310.73), SIMDE_FLOAT32_C( -403.05) }, { SIMDE_FLOAT32_C( -417.57), SIMDE_FLOAT32_C( -403.05) }, INT32_C( 0) }, { SIMDE_FLOAT32_C( 648.73), { SIMDE_FLOAT32_C( -615.41), SIMDE_FLOAT32_C( -429.89) }, { SIMDE_FLOAT32_C( -615.41), SIMDE_FLOAT32_C( 648.73) }, INT32_C( 1) }, { SIMDE_FLOAT32_C( -883.49), { SIMDE_FLOAT32_C( 683.52), SIMDE_FLOAT32_C( -808.37) }, { SIMDE_FLOAT32_C( -883.49), SIMDE_FLOAT32_C( -808.37) }, INT32_C( 0) }, { SIMDE_FLOAT32_C( -958.79), { SIMDE_FLOAT32_C( -189.00), SIMDE_FLOAT32_C( -443.44) }, { SIMDE_FLOAT32_C( -189.00), SIMDE_FLOAT32_C( -958.79) }, INT32_C( 1) }, { SIMDE_FLOAT32_C( -384.49), { SIMDE_FLOAT32_C( -569.39), SIMDE_FLOAT32_C( 61.86) }, { SIMDE_FLOAT32_C( -384.49), SIMDE_FLOAT32_C( 61.86) }, INT32_C( 0) }, { SIMDE_FLOAT32_C( -147.57), { SIMDE_FLOAT32_C( -852.16), SIMDE_FLOAT32_C( 891.56) }, { SIMDE_FLOAT32_C( -852.16), SIMDE_FLOAT32_C( -147.57) }, INT32_C( 1) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32 a = test_vec[i].a; simde_float32x2_t v = simde_vld1_f32(test_vec[i].v); int lane = test_vec[i].lane; simde_float32x2_t r; SIMDE_CONSTIFY_2_(simde_vset_lane_f32, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; for (int i = 0 ; i < 8 ; i++) { simde_float32 a = simde_test_codegen_random_f32(-1000.0f, 1000.0f); simde_float32x2_t v = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); int lane = lanes[i]; simde_float32x2_t r; SIMDE_CONSTIFY_2_(simde_vset_lane_f32, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_codegen_write_f32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, v, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vset_lane_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a; simde_float64 v[1]; simde_float64 r[1]; } test_vec[] = { { SIMDE_FLOAT64_C( -818.08), { SIMDE_FLOAT64_C( -697.80) }, { SIMDE_FLOAT64_C( -818.08) } }, { SIMDE_FLOAT64_C( 601.34), { SIMDE_FLOAT64_C( 459.90) }, { SIMDE_FLOAT64_C( 601.34) } }, { SIMDE_FLOAT64_C( 447.26), { SIMDE_FLOAT64_C( -758.22) }, { SIMDE_FLOAT64_C( 447.26) } }, { SIMDE_FLOAT64_C( 101.07), { SIMDE_FLOAT64_C( 15.34) }, { SIMDE_FLOAT64_C( 101.07) } }, { SIMDE_FLOAT64_C( 79.96), { SIMDE_FLOAT64_C( 259.71) }, { SIMDE_FLOAT64_C( 79.96) } }, { SIMDE_FLOAT64_C( -390.89), { SIMDE_FLOAT64_C( 316.98) }, { SIMDE_FLOAT64_C( -390.89) } }, { SIMDE_FLOAT64_C( 966.83), { SIMDE_FLOAT64_C( 767.40) }, { SIMDE_FLOAT64_C( 966.83) } }, { SIMDE_FLOAT64_C( -253.76), { SIMDE_FLOAT64_C( -391.61) }, { SIMDE_FLOAT64_C( -253.76) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64 a = test_vec[i].a; simde_float64x1_t v = simde_vld1_f64(test_vec[i].v); simde_float64x1_t r = simde_vset_lane_f64(a, v, 0); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64 a = simde_test_codegen_random_f64(-1000.0, 1000.0); simde_float64x1_t v = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_float64x1_t r = simde_vset_lane_f64(a, v, 0); simde_test_codegen_write_f64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x1(2, v, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vset_lane_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a; int8_t v[8]; int8_t r[8]; int lane; } test_vec[] = { { INT8_C( 70), { -INT8_C( 27), INT8_C( 121), -INT8_C( 72), -INT8_C( 82), INT8_C( 88), -INT8_C( 125), -INT8_C( 103), INT8_C( 16) }, { INT8_C( 70), INT8_C( 121), -INT8_C( 72), -INT8_C( 82), INT8_C( 88), -INT8_C( 125), -INT8_C( 103), INT8_C( 16) }, INT32_C( 0) }, { -INT8_C( 35), { INT8_C( 110), -INT8_C( 50), INT8_C( 63), INT8_C( 98), INT8_C( 28), INT8_C( 122), INT8_MAX, -INT8_C( 19) }, { INT8_C( 110), -INT8_C( 35), INT8_C( 63), INT8_C( 98), INT8_C( 28), INT8_C( 122), INT8_MAX, -INT8_C( 19) }, INT32_C( 1) }, { INT8_C( 7), { INT8_C( 91), INT8_C( 62), INT8_C( 61), -INT8_C( 17), INT8_C( 45), -INT8_C( 120), -INT8_C( 38), INT8_C( 107) }, { INT8_C( 91), INT8_C( 62), INT8_C( 7), -INT8_C( 17), INT8_C( 45), -INT8_C( 120), -INT8_C( 38), INT8_C( 107) }, INT32_C( 2) }, { -INT8_C( 111), { -INT8_C( 115), -INT8_C( 114), -INT8_C( 44), -INT8_C( 45), INT8_C( 115), INT8_C( 77), -INT8_C( 117), INT8_C( 33) }, { -INT8_C( 115), -INT8_C( 114), -INT8_C( 44), -INT8_C( 111), INT8_C( 115), INT8_C( 77), -INT8_C( 117), INT8_C( 33) }, INT32_C( 3) }, { -INT8_C( 91), { INT8_C( 15), -INT8_C( 70), -INT8_C( 74), -INT8_C( 20), INT8_C( 40), -INT8_C( 124), INT8_C( 44), -INT8_C( 118) }, { INT8_C( 15), -INT8_C( 70), -INT8_C( 74), -INT8_C( 20), -INT8_C( 91), -INT8_C( 124), INT8_C( 44), -INT8_C( 118) }, INT32_C( 4) }, { -INT8_C( 96), { -INT8_C( 90), INT8_C( 10), -INT8_C( 114), -INT8_C( 83), INT8_C( 101), -INT8_C( 52), -INT8_C( 22), INT8_C( 84) }, { -INT8_C( 90), INT8_C( 10), -INT8_C( 114), -INT8_C( 83), INT8_C( 101), -INT8_C( 96), -INT8_C( 22), INT8_C( 84) }, INT32_C( 5) }, { -INT8_C( 7), { INT8_C( 114), INT8_C( 47), INT8_C( 100), INT8_C( 3), -INT8_C( 68), -INT8_C( 14), -INT8_C( 41), -INT8_C( 112) }, { INT8_C( 114), INT8_C( 47), INT8_C( 100), INT8_C( 3), -INT8_C( 68), -INT8_C( 14), -INT8_C( 7), -INT8_C( 112) }, INT32_C( 6) }, { INT8_C( 101), { INT8_C( 37), INT8_C( 27), -INT8_C( 121), -INT8_C( 54), INT8_C( 42), INT8_C( 65), INT8_MIN, INT8_C( 23) }, { INT8_C( 37), INT8_C( 27), -INT8_C( 121), -INT8_C( 54), INT8_C( 42), INT8_C( 65), INT8_MIN, INT8_C( 101) }, INT32_C( 7) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int8_t a = test_vec[i].a; simde_int8x8_t v = simde_vld1_s8(test_vec[i].v); int lane = test_vec[i].lane; simde_int8x8_t r; SIMDE_CONSTIFY_8_(simde_vset_lane_s8, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); int lanes[] = { 0, 1, 2, 3, 4, 5, 6, 7 }; for (int i = 0 ; i < 8 ; i++) { int8_t a = simde_test_codegen_random_i8(); simde_int8x8_t v = simde_test_arm_neon_random_i8x8(); int lane = lanes[i]; simde_int8x8_t r; SIMDE_CONSTIFY_8_(simde_vset_lane_s8, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_codegen_write_i8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, v, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vset_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a; int16_t v[4]; int16_t r[4]; int lane; } test_vec[] = { { INT16_C( 25881), { -INT16_C( 12793), -INT16_C( 20903), -INT16_C( 17144), -INT16_C( 13187) }, { INT16_C( 25881), -INT16_C( 20903), -INT16_C( 17144), -INT16_C( 13187) }, INT32_C( 0) }, { INT16_C( 8494), { -INT16_C( 5273), INT16_C( 20093), -INT16_C( 7951), -INT16_C( 13815) }, { -INT16_C( 5273), INT16_C( 8494), -INT16_C( 7951), -INT16_C( 13815) }, INT32_C( 1) }, { -INT16_C( 6266), { -INT16_C( 23690), -INT16_C( 22694), INT16_C( 22066), -INT16_C( 28727) }, { -INT16_C( 23690), -INT16_C( 22694), -INT16_C( 6266), -INT16_C( 28727) }, INT32_C( 2) }, { -INT16_C( 7659), { INT16_C( 7412), INT16_C( 19888), -INT16_C( 17974), INT16_C( 18443) }, { INT16_C( 7412), INT16_C( 19888), -INT16_C( 17974), -INT16_C( 7659) }, INT32_C( 3) }, { INT16_C( 14725), { -INT16_C( 4759), -INT16_C( 6363), INT16_C( 5691), INT16_C( 17607) }, { INT16_C( 14725), -INT16_C( 6363), INT16_C( 5691), INT16_C( 17607) }, INT32_C( 0) }, { INT16_C( 20192), { INT16_C( 22316), -INT16_C( 30991), INT16_C( 9214), -INT16_C( 14372) }, { INT16_C( 22316), INT16_C( 20192), INT16_C( 9214), -INT16_C( 14372) }, INT32_C( 1) }, { -INT16_C( 3662), { -INT16_C( 22871), INT16_C( 23054), -INT16_C( 9996), -INT16_C( 237) }, { -INT16_C( 22871), INT16_C( 23054), -INT16_C( 3662), -INT16_C( 237) }, INT32_C( 2) }, { -INT16_C( 26592), { -INT16_C( 30152), INT16_C( 23941), -INT16_C( 16271), INT16_C( 14451) }, { -INT16_C( 30152), INT16_C( 23941), -INT16_C( 16271), -INT16_C( 26592) }, INT32_C( 3) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int16_t a = test_vec[i].a; simde_int16x4_t v = simde_vld1_s16(test_vec[i].v); int lane = test_vec[i].lane; simde_int16x4_t r; SIMDE_CONSTIFY_4_(simde_vset_lane_s16, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); int lanes[] = { 0, 1, 2, 3, 0, 1, 2, 3 }; for (int i = 0 ; i < 8 ; i++) { int16_t a = simde_test_codegen_random_i16(); simde_int16x4_t v = simde_test_arm_neon_random_i16x4(); int lane = lanes[i]; simde_int16x4_t r; SIMDE_CONSTIFY_4_(simde_vset_lane_s16, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_codegen_write_i16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, v, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vset_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a; int32_t v[2]; int32_t r[2]; int lane; } test_vec[] = { { INT32_C( 1781637630), { INT32_C( 1014922503), -INT32_C( 800151665) }, { INT32_C( 1781637630), -INT32_C( 800151665) }, INT32_C( 0) }, { -INT32_C( 1747077392), { -INT32_C( 1502972235), -INT32_C( 1101704828) }, { -INT32_C( 1502972235), -INT32_C( 1747077392) }, INT32_C( 1) }, { INT32_C( 1015661746), { INT32_C( 310432275), INT32_C( 2071769460) }, { INT32_C( 1015661746), INT32_C( 2071769460) }, INT32_C( 0) }, { -INT32_C( 1095239122), { -INT32_C( 1852963167), INT32_C( 1915317180) }, { -INT32_C( 1852963167), -INT32_C( 1095239122) }, INT32_C( 1) }, { INT32_C( 1696109537), { -INT32_C( 1775997212), INT32_C( 1171434802) }, { INT32_C( 1696109537), INT32_C( 1171434802) }, INT32_C( 0) }, { -INT32_C( 212381057), { INT32_C( 846123779), INT32_C( 1878009549) }, { INT32_C( 846123779), -INT32_C( 212381057) }, INT32_C( 1) }, { -INT32_C( 402620884), { -INT32_C( 900060695), -INT32_C( 1607437380) }, { -INT32_C( 402620884), -INT32_C( 1607437380) }, INT32_C( 0) }, { INT32_C( 322393313), { -INT32_C( 2141714175), INT32_C( 1601482844) }, { -INT32_C( 2141714175), INT32_C( 322393313) }, INT32_C( 1) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int32_t a = test_vec[i].a; simde_int32x2_t v = simde_vld1_s32(test_vec[i].v); int lane = test_vec[i].lane; simde_int32x2_t r; SIMDE_CONSTIFY_2_(simde_vset_lane_s32, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; for (int i = 0 ; i < 8 ; i++) { int32_t a = simde_test_codegen_random_i32(); simde_int32x2_t v = simde_test_arm_neon_random_i32x2(); int lane = lanes[i]; simde_int32x2_t r; SIMDE_CONSTIFY_2_(simde_vset_lane_s32, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_codegen_write_i32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, v, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vset_lane_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a; int64_t v[1]; int64_t r[1]; } test_vec[] = { { INT64_C( 8132893339551323787), { -INT64_C( 2277587063383407450) }, { INT64_C( 8132893339551323787) } }, { -INT64_C( 1201043362115393959), { -INT64_C( 7370413409463753472) }, { -INT64_C( 1201043362115393959) } }, { INT64_C( 5599798045544665310), { INT64_C( 8341159202254534462) }, { INT64_C( 5599798045544665310) } }, { INT64_C( 1189717831917504294), { -INT64_C( 8320776697033939489) }, { INT64_C( 1189717831917504294) } }, { -INT64_C( 2563242400167091335), { -INT64_C( 5779385468923819713) }, { -INT64_C( 2563242400167091335) } }, { -INT64_C( 3915944839153045370), { -INT64_C( 4998366386176223127) }, { -INT64_C( 3915944839153045370) } }, { INT64_C( 4312839844605359200), { -INT64_C( 7112768106479183855) }, { INT64_C( 4312839844605359200) } }, { -INT64_C( 5096990314087609201), { -INT64_C( 7606735666318083725) }, { -INT64_C( 5096990314087609201) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int64_t a = test_vec[i].a; simde_int64x1_t v = simde_vld1_s64(test_vec[i].v); simde_int64x1_t r = simde_vset_lane_s64(a, v, 0); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int64_t a = simde_test_codegen_random_i64(); simde_int64x1_t v = simde_test_arm_neon_random_i64x1(); simde_int64x1_t r = simde_vset_lane_s64(a, v, 0); simde_test_codegen_write_i64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, v, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x1(2, r, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vset_lane_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a; uint8_t v[8]; uint8_t r[8]; int lane; } test_vec[] = { { UINT8_C(107), { UINT8_C(232), UINT8_C( 68), UINT8_C( 62), UINT8_C( 98), UINT8_C(251), UINT8_C(241), UINT8_C(101), UINT8_C( 54) }, { UINT8_C(107), UINT8_C( 68), UINT8_C( 62), UINT8_C( 98), UINT8_C(251), UINT8_C(241), UINT8_C(101), UINT8_C( 54) }, INT32_C( 0) }, { UINT8_C(150), { UINT8_C( 83), UINT8_C( 39), UINT8_C(121), UINT8_C(113), UINT8_C(247), UINT8_C( 41), UINT8_C(120), UINT8_C(159) }, { UINT8_C( 83), UINT8_C(150), UINT8_C(121), UINT8_C(113), UINT8_C(247), UINT8_C( 41), UINT8_C(120), UINT8_C(159) }, INT32_C( 1) }, { UINT8_C(173), { UINT8_C(167), UINT8_C( 57), UINT8_C(216), UINT8_C( 30), UINT8_C( 55), UINT8_C( 36), UINT8_C(109), UINT8_C(248) }, { UINT8_C(167), UINT8_C( 57), UINT8_C(173), UINT8_C( 30), UINT8_C( 55), UINT8_C( 36), UINT8_C(109), UINT8_C(248) }, INT32_C( 2) }, { UINT8_C( 60), { UINT8_C(242), UINT8_C(234), UINT8_C(103), UINT8_C( 93), UINT8_C(211), UINT8_C(171), UINT8_C(155), UINT8_C( 53) }, { UINT8_C(242), UINT8_C(234), UINT8_C(103), UINT8_C( 60), UINT8_C(211), UINT8_C(171), UINT8_C(155), UINT8_C( 53) }, INT32_C( 3) }, { UINT8_C(167), { UINT8_C(141), UINT8_C(154), UINT8_C(221), UINT8_C( 35), UINT8_C(237), UINT8_C( 5), UINT8_C(156), UINT8_C( 95) }, { UINT8_C(141), UINT8_C(154), UINT8_C(221), UINT8_C( 35), UINT8_C(167), UINT8_C( 5), UINT8_C(156), UINT8_C( 95) }, INT32_C( 4) }, { UINT8_C(252), { UINT8_C(198), UINT8_C(215), UINT8_C(155), UINT8_C(115), UINT8_C(126), UINT8_C(212), UINT8_C( 76), UINT8_C(156) }, { UINT8_C(198), UINT8_C(215), UINT8_C(155), UINT8_C(115), UINT8_C(126), UINT8_C(252), UINT8_C( 76), UINT8_C(156) }, INT32_C( 5) }, { UINT8_C( 11), { UINT8_C(112), UINT8_C( 10), UINT8_C( 4), UINT8_C(172), UINT8_C(252), UINT8_C(238), UINT8_C( 19), UINT8_C( 89) }, { UINT8_C(112), UINT8_C( 10), UINT8_C( 4), UINT8_C(172), UINT8_C(252), UINT8_C(238), UINT8_C( 11), UINT8_C( 89) }, INT32_C( 6) }, { UINT8_C(193), { UINT8_C(191), UINT8_C(245), UINT8_C(246), UINT8_C(102), UINT8_C(130), UINT8_C(144), UINT8_C( 67), UINT8_C(165) }, { UINT8_C(191), UINT8_C(245), UINT8_C(246), UINT8_C(102), UINT8_C(130), UINT8_C(144), UINT8_C( 67), UINT8_C(193) }, INT32_C( 7) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint8_t a = test_vec[i].a; simde_uint8x8_t v = simde_vld1_u8(test_vec[i].v); int lane = test_vec[i].lane; simde_uint8x8_t r; SIMDE_CONSTIFY_8_(simde_vset_lane_u8, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); int lanes[] = { 0, 1, 2, 3, 4, 5, 6, 7 }; for (int i = 0 ; i < 8 ; i++) { uint8_t a = simde_test_codegen_random_u8(); simde_uint8x8_t v = simde_test_arm_neon_random_u8x8(); int lane = lanes[i]; simde_uint8x8_t r; SIMDE_CONSTIFY_8_(simde_vset_lane_u8, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_codegen_write_u8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, v, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vset_lane_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a; uint16_t v[4]; uint16_t r[4]; int lane; } test_vec[] = { { UINT16_C(45765), { UINT16_C(30953), UINT16_C(24357), UINT16_C( 4588), UINT16_C(51685) }, { UINT16_C(45765), UINT16_C(24357), UINT16_C( 4588), UINT16_C(51685) }, INT32_C( 0) }, { UINT16_C( 3768), { UINT16_C(49965), UINT16_C(13884), UINT16_C( 6907), UINT16_C(15397) }, { UINT16_C(49965), UINT16_C( 3768), UINT16_C( 6907), UINT16_C(15397) }, INT32_C( 1) }, { UINT16_C(43298), { UINT16_C(51529), UINT16_C( 48), UINT16_C(59229), UINT16_C(37787) }, { UINT16_C(51529), UINT16_C( 48), UINT16_C(43298), UINT16_C(37787) }, INT32_C( 2) }, { UINT16_C(24647), { UINT16_C(12357), UINT16_C(27352), UINT16_C(50320), UINT16_C(30075) }, { UINT16_C(12357), UINT16_C(27352), UINT16_C(50320), UINT16_C(24647) }, INT32_C( 3) }, { UINT16_C(13197), { UINT16_C(47747), UINT16_C(49143), UINT16_C(62192), UINT16_C( 5594) }, { UINT16_C(13197), UINT16_C(49143), UINT16_C(62192), UINT16_C( 5594) }, INT32_C( 0) }, { UINT16_C(64558), { UINT16_C(30654), UINT16_C(61125), UINT16_C( 8823), UINT16_C( 4821) }, { UINT16_C(30654), UINT16_C(64558), UINT16_C( 8823), UINT16_C( 4821) }, INT32_C( 1) }, { UINT16_C( 7349), { UINT16_C(64114), UINT16_C(19021), UINT16_C(56676), UINT16_C(57358) }, { UINT16_C(64114), UINT16_C(19021), UINT16_C( 7349), UINT16_C(57358) }, INT32_C( 2) }, { UINT16_C(39762), { UINT16_C(54547), UINT16_C( 2645), UINT16_C(17812), UINT16_C(28412) }, { UINT16_C(54547), UINT16_C( 2645), UINT16_C(17812), UINT16_C(39762) }, INT32_C( 3) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint16_t a = test_vec[i].a; simde_uint16x4_t v = simde_vld1_u16(test_vec[i].v); int lane = test_vec[i].lane; simde_uint16x4_t r; SIMDE_CONSTIFY_4_(simde_vset_lane_u16, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); int lanes[] = { 0, 1, 2, 3, 0, 1, 2, 3 }; for (int i = 0 ; i < 8 ; i++) { uint16_t a = simde_test_codegen_random_u16(); simde_uint16x4_t v = simde_test_arm_neon_random_u16x4(); int lane = lanes[i]; simde_uint16x4_t r; SIMDE_CONSTIFY_4_(simde_vset_lane_u16, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_codegen_write_u16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, v, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vset_lane_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a; uint32_t v[2]; uint32_t r[2]; int lane; } test_vec[] = { { UINT32_C(3686056461), { UINT32_C(3107362155), UINT32_C(1460699361) }, { UINT32_C(3686056461), UINT32_C(1460699361) }, INT32_C( 0) }, { UINT32_C(2717558461), { UINT32_C(4167524849), UINT32_C(3776814077) }, { UINT32_C(4167524849), UINT32_C(2717558461) }, INT32_C( 1) }, { UINT32_C(3227136249), { UINT32_C(2690237075), UINT32_C(4017819012) }, { UINT32_C(3227136249), UINT32_C(4017819012) }, INT32_C( 0) }, { UINT32_C(2208871074), { UINT32_C(4024088626), UINT32_C(1334891870) }, { UINT32_C(4024088626), UINT32_C(2208871074) }, INT32_C( 1) }, { UINT32_C( 860420150), { UINT32_C(2366924179), UINT32_C( 676163477) }, { UINT32_C( 860420150), UINT32_C( 676163477) }, INT32_C( 0) }, { UINT32_C(3049825841), { UINT32_C(1453606068), UINT32_C( 685395190) }, { UINT32_C(1453606068), UINT32_C(3049825841) }, INT32_C( 1) }, { UINT32_C(1645720580), { UINT32_C(3216156553), UINT32_C( 871561887) }, { UINT32_C(1645720580), UINT32_C( 871561887) }, INT32_C( 0) }, { UINT32_C(4106225503), { UINT32_C(2803699062), UINT32_C(1750918580) }, { UINT32_C(2803699062), UINT32_C(4106225503) }, INT32_C( 1) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint32_t a = test_vec[i].a; simde_uint32x2_t v = simde_vld1_u32(test_vec[i].v); int lane = test_vec[i].lane; simde_uint32x2_t r; SIMDE_CONSTIFY_2_(simde_vset_lane_u32, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; for (int i = 0 ; i < 8 ; i++) { uint32_t a = simde_test_codegen_random_u32(); simde_uint32x2_t v = simde_test_arm_neon_random_u32x2(); int lane = lanes[i]; simde_uint32x2_t r; SIMDE_CONSTIFY_2_(simde_vset_lane_u32, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_codegen_write_u32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, v, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vset_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a; uint64_t v[1]; uint64_t r[1]; int lane; } test_vec[] = { { UINT64_C( 6406044039131507225), { UINT64_C( 1703588918030527433) }, { UINT64_C( 6406044039131507225) }, INT32_C( 0) }, { UINT64_C( 6803362325393349286), { UINT64_C( 9198016635619938545) }, { UINT64_C( 6803362325393349286) }, INT32_C( 0) }, { UINT64_C( 4002095255047804694), { UINT64_C( 1073592190821075895) }, { UINT64_C( 4002095255047804694) }, INT32_C( 0) }, { UINT64_C(10745787330633285317), { UINT64_C( 7217255135707226948) }, { UINT64_C(10745787330633285317) }, INT32_C( 0) }, { UINT64_C(10597851576315383662), { UINT64_C( 9917851913051871532) }, { UINT64_C(10597851576315383662) }, INT32_C( 0) }, { UINT64_C(16555350570174246279), { UINT64_C( 3999680018493928142) }, { UINT64_C(16555350570174246279) }, INT32_C( 0) }, { UINT64_C( 7400582118781425734), { UINT64_C(16649137534843688896) }, { UINT64_C( 7400582118781425734) }, INT32_C( 0) }, { UINT64_C(10525426639353108619), { UINT64_C(11296488388647952325) }, { UINT64_C(10525426639353108619) }, INT32_C( 0) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint64_t a = test_vec[i].a; simde_uint64x1_t v = simde_vld1_u64(test_vec[i].v); simde_uint64x1_t r = simde_vset_lane_u64(a, v, 0); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint64_t a = simde_test_codegen_random_u64(); simde_uint64x1_t v = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t r = simde_vset_lane_u64(a, v, 0); simde_test_codegen_write_u64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, v, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vsetq_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a; simde_float32 v[4]; simde_float32 r[4]; int lane; } test_vec[] = { { SIMDE_FLOAT32_C( 571.97), { SIMDE_FLOAT32_C( -685.65), SIMDE_FLOAT32_C( 527.31), SIMDE_FLOAT32_C( 563.52), SIMDE_FLOAT32_C( -329.90) }, { SIMDE_FLOAT32_C( 571.97), SIMDE_FLOAT32_C( 527.31), SIMDE_FLOAT32_C( 563.52), SIMDE_FLOAT32_C( -329.90) }, INT32_C( 0) }, { SIMDE_FLOAT32_C( -413.88), { SIMDE_FLOAT32_C( 284.95), SIMDE_FLOAT32_C( -150.39), SIMDE_FLOAT32_C( -996.77), SIMDE_FLOAT32_C( -669.25) }, { SIMDE_FLOAT32_C( 284.95), SIMDE_FLOAT32_C( -413.88), SIMDE_FLOAT32_C( -996.77), SIMDE_FLOAT32_C( -669.25) }, INT32_C( 1) }, { SIMDE_FLOAT32_C( -28.31), { SIMDE_FLOAT32_C( -917.74), SIMDE_FLOAT32_C( 704.61), SIMDE_FLOAT32_C( -331.98), SIMDE_FLOAT32_C( 790.85) }, { SIMDE_FLOAT32_C( -917.74), SIMDE_FLOAT32_C( 704.61), SIMDE_FLOAT32_C( -28.31), SIMDE_FLOAT32_C( 790.85) }, INT32_C( 2) }, { SIMDE_FLOAT32_C( -568.07), { SIMDE_FLOAT32_C( 411.05), SIMDE_FLOAT32_C( -327.15), SIMDE_FLOAT32_C( -816.35), SIMDE_FLOAT32_C( -260.46) }, { SIMDE_FLOAT32_C( 411.05), SIMDE_FLOAT32_C( -327.15), SIMDE_FLOAT32_C( -816.35), SIMDE_FLOAT32_C( -568.07) }, INT32_C( 3) }, { SIMDE_FLOAT32_C( -649.49), { SIMDE_FLOAT32_C( 712.24), SIMDE_FLOAT32_C( -363.52), SIMDE_FLOAT32_C( -141.46), SIMDE_FLOAT32_C( -532.55) }, { SIMDE_FLOAT32_C( -649.49), SIMDE_FLOAT32_C( -363.52), SIMDE_FLOAT32_C( -141.46), SIMDE_FLOAT32_C( -532.55) }, INT32_C( 0) }, { SIMDE_FLOAT32_C( -673.98), { SIMDE_FLOAT32_C( -328.33), SIMDE_FLOAT32_C( -599.26), SIMDE_FLOAT32_C( 253.06), SIMDE_FLOAT32_C( 22.83) }, { SIMDE_FLOAT32_C( -328.33), SIMDE_FLOAT32_C( -673.98), SIMDE_FLOAT32_C( 253.06), SIMDE_FLOAT32_C( 22.83) }, INT32_C( 1) }, { SIMDE_FLOAT32_C( -654.71), { SIMDE_FLOAT32_C( -174.97), SIMDE_FLOAT32_C( 337.18), SIMDE_FLOAT32_C( 872.60), SIMDE_FLOAT32_C( -611.45) }, { SIMDE_FLOAT32_C( -174.97), SIMDE_FLOAT32_C( 337.18), SIMDE_FLOAT32_C( -654.71), SIMDE_FLOAT32_C( -611.45) }, INT32_C( 2) }, { SIMDE_FLOAT32_C( -992.72), { SIMDE_FLOAT32_C( -541.28), SIMDE_FLOAT32_C( 673.50), SIMDE_FLOAT32_C( -143.10), SIMDE_FLOAT32_C( -538.05) }, { SIMDE_FLOAT32_C( -541.28), SIMDE_FLOAT32_C( 673.50), SIMDE_FLOAT32_C( -143.10), SIMDE_FLOAT32_C( -992.72) }, INT32_C( 3) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32 a = test_vec[i].a; simde_float32x4_t v = simde_vld1q_f32(test_vec[i].v); int lane = test_vec[i].lane; simde_float32x4_t r; SIMDE_CONSTIFY_4_(simde_vsetq_lane_f32, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); int lanes[] = { 0, 1, 2, 3, 0, 1, 2, 3 }; for (int i = 0 ; i < 8 ; i++) { simde_float32 a = simde_test_codegen_random_f32(-1000.0f, 1000.0f); simde_float32x4_t v = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); int lane = lanes[i]; simde_float32x4_t r; SIMDE_CONSTIFY_4_(simde_vsetq_lane_f32, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_codegen_write_f32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, v, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vsetq_lane_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a; simde_float64 v[2]; simde_float64 r[2]; int lane; } test_vec[] = { { SIMDE_FLOAT64_C( 293.82), { SIMDE_FLOAT64_C( 297.03), SIMDE_FLOAT64_C( 420.15) }, { SIMDE_FLOAT64_C( 293.82), SIMDE_FLOAT64_C( 420.15) }, INT32_C( 0) }, { SIMDE_FLOAT64_C( -258.58), { SIMDE_FLOAT64_C( -13.57), SIMDE_FLOAT64_C( -351.16) }, { SIMDE_FLOAT64_C( -13.57), SIMDE_FLOAT64_C( -258.58) }, INT32_C( 1) }, { SIMDE_FLOAT64_C( 926.74), { SIMDE_FLOAT64_C( 398.53), SIMDE_FLOAT64_C( 288.03) }, { SIMDE_FLOAT64_C( 926.74), SIMDE_FLOAT64_C( 288.03) }, INT32_C( 0) }, { SIMDE_FLOAT64_C( 471.78), { SIMDE_FLOAT64_C( 53.12), SIMDE_FLOAT64_C( 54.20) }, { SIMDE_FLOAT64_C( 53.12), SIMDE_FLOAT64_C( 471.78) }, INT32_C( 1) }, { SIMDE_FLOAT64_C( -866.38), { SIMDE_FLOAT64_C( -780.75), SIMDE_FLOAT64_C( -71.08) }, { SIMDE_FLOAT64_C( -866.38), SIMDE_FLOAT64_C( -71.08) }, INT32_C( 0) }, { SIMDE_FLOAT64_C( 629.85), { SIMDE_FLOAT64_C( -883.80), SIMDE_FLOAT64_C( -25.61) }, { SIMDE_FLOAT64_C( -883.80), SIMDE_FLOAT64_C( 629.85) }, INT32_C( 1) }, { SIMDE_FLOAT64_C( -974.10), { SIMDE_FLOAT64_C( 564.55), SIMDE_FLOAT64_C( 339.50) }, { SIMDE_FLOAT64_C( -974.10), SIMDE_FLOAT64_C( 339.50) }, INT32_C( 0) }, { SIMDE_FLOAT64_C( -659.40), { SIMDE_FLOAT64_C( 273.60), SIMDE_FLOAT64_C( -218.27) }, { SIMDE_FLOAT64_C( 273.60), SIMDE_FLOAT64_C( -659.40) }, INT32_C( 1) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64 a = test_vec[i].a; simde_float64x2_t v = simde_vld1q_f64(test_vec[i].v); int lane = test_vec[i].lane; simde_float64x2_t r; SIMDE_CONSTIFY_2_(simde_vsetq_lane_f64, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; for (int i = 0 ; i < 8 ; i++) { simde_float64 a = simde_test_codegen_random_f64(-1000.0, 1000.0); simde_float64x2_t v = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); int lane = lanes[i]; simde_float64x2_t r; SIMDE_CONSTIFY_2_(simde_vsetq_lane_f64, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_codegen_write_f64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x2(2, v, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vsetq_lane_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a; int8_t v[16]; int8_t r[16]; int lane; } test_vec[] = { { INT8_C( 105), { -INT8_C( 61), INT8_C( 86), -INT8_C( 122), -INT8_C( 74), -INT8_C( 57), -INT8_C( 79), -INT8_C( 121), INT8_C( 110), -INT8_C( 44), -INT8_C( 76), INT8_C( 11), INT8_C( 25), -INT8_C( 43), -INT8_C( 10), -INT8_C( 114), -INT8_C( 86) }, { INT8_C( 105), INT8_C( 86), -INT8_C( 122), -INT8_C( 74), -INT8_C( 57), -INT8_C( 79), -INT8_C( 121), INT8_C( 110), -INT8_C( 44), -INT8_C( 76), INT8_C( 11), INT8_C( 25), -INT8_C( 43), -INT8_C( 10), -INT8_C( 114), -INT8_C( 86) }, INT32_C( 0) }, { -INT8_C( 77), { -INT8_C( 110), -INT8_C( 116), INT8_C( 44), INT8_C( 10), -INT8_C( 39), INT8_C( 68), INT8_C( 90), INT8_C( 84), INT8_C( 29), INT8_C( 44), INT8_C( 113), -INT8_C( 70), -INT8_C( 40), -INT8_C( 38), INT8_C( 125), INT8_C( 47) }, { -INT8_C( 110), -INT8_C( 116), -INT8_C( 77), INT8_C( 10), -INT8_C( 39), INT8_C( 68), INT8_C( 90), INT8_C( 84), INT8_C( 29), INT8_C( 44), INT8_C( 113), -INT8_C( 70), -INT8_C( 40), -INT8_C( 38), INT8_C( 125), INT8_C( 47) }, INT32_C( 2) }, { INT8_C( 96), { INT8_C( 51), -INT8_C( 10), INT8_C( 17), -INT8_C( 70), INT8_C( 100), -INT8_C( 26), INT8_C( 111), INT8_C( 112), -INT8_C( 1), INT8_C( 68), INT8_C( 102), -INT8_C( 115), -INT8_C( 18), INT8_C( 25), INT8_C( 32), INT8_C( 123) }, { INT8_C( 51), -INT8_C( 10), INT8_C( 17), -INT8_C( 70), INT8_C( 96), -INT8_C( 26), INT8_C( 111), INT8_C( 112), -INT8_C( 1), INT8_C( 68), INT8_C( 102), -INT8_C( 115), -INT8_C( 18), INT8_C( 25), INT8_C( 32), INT8_C( 123) }, INT32_C( 4) }, { INT8_C( 70), { INT8_C( 42), INT8_C( 84), -INT8_C( 118), -INT8_C( 123), -INT8_C( 87), -INT8_C( 88), -INT8_C( 79), INT8_C( 26), INT8_C( 98), -INT8_C( 119), -INT8_C( 12), -INT8_C( 33), -INT8_C( 72), INT8_C( 85), INT8_C( 19), -INT8_C( 81) }, { INT8_C( 42), INT8_C( 84), -INT8_C( 118), -INT8_C( 123), -INT8_C( 87), -INT8_C( 88), INT8_C( 70), INT8_C( 26), INT8_C( 98), -INT8_C( 119), -INT8_C( 12), -INT8_C( 33), -INT8_C( 72), INT8_C( 85), INT8_C( 19), -INT8_C( 81) }, INT32_C( 6) }, { INT8_C( 102), { -INT8_C( 51), INT8_C( 19), INT8_C( 76), INT8_C( 60), -INT8_C( 125), INT8_C( 75), -INT8_C( 127), -INT8_C( 23), -INT8_C( 39), INT8_C( 111), INT8_C( 3), -INT8_C( 7), -INT8_C( 22), INT8_C( 73), INT8_C( 35), INT8_C( 63) }, { -INT8_C( 51), INT8_C( 19), INT8_C( 76), INT8_C( 60), -INT8_C( 125), INT8_C( 75), -INT8_C( 127), -INT8_C( 23), -INT8_C( 39), INT8_C( 102), INT8_C( 3), -INT8_C( 7), -INT8_C( 22), INT8_C( 73), INT8_C( 35), INT8_C( 63) }, INT32_C( 9) }, { -INT8_C( 45), { -INT8_C( 88), -INT8_C( 24), INT8_C( 123), INT8_C( 89), INT8_C( 2), -INT8_C( 35), -INT8_C( 29), -INT8_C( 10), -INT8_C( 67), -INT8_C( 101), INT8_C( 75), -INT8_C( 48), INT8_C( 74), -INT8_C( 78), -INT8_C( 99), INT8_C( 94) }, { -INT8_C( 88), -INT8_C( 24), INT8_C( 123), INT8_C( 89), INT8_C( 2), -INT8_C( 35), -INT8_C( 29), -INT8_C( 10), -INT8_C( 67), -INT8_C( 101), -INT8_C( 45), -INT8_C( 48), INT8_C( 74), -INT8_C( 78), -INT8_C( 99), INT8_C( 94) }, INT32_C( 10) }, { -INT8_C( 2), { -INT8_C( 38), -INT8_C( 31), INT8_C( 74), INT8_C( 91), -INT8_C( 53), INT8_C( 35), -INT8_C( 54), -INT8_C( 50), INT8_C( 28), -INT8_C( 75), INT8_C( 23), INT8_C( 63), -INT8_C( 12), -INT8_C( 22), -INT8_C( 24), -INT8_C( 36) }, { -INT8_C( 38), -INT8_C( 31), INT8_C( 74), INT8_C( 91), -INT8_C( 53), INT8_C( 35), -INT8_C( 54), -INT8_C( 50), INT8_C( 28), -INT8_C( 75), INT8_C( 23), -INT8_C( 2), -INT8_C( 12), -INT8_C( 22), -INT8_C( 24), -INT8_C( 36) }, INT32_C( 11) }, { INT8_C( 102), { INT8_C( 65), -INT8_C( 34), INT8_C( 67), INT8_C( 36), -INT8_C( 44), INT8_C( 0), -INT8_C( 64), INT8_C( 32), -INT8_C( 48), INT8_C( 10), -INT8_C( 46), INT8_C( 110), INT8_C( 104), -INT8_C( 48), INT8_C( 72), INT8_C( 74) }, { INT8_C( 65), -INT8_C( 34), INT8_C( 67), INT8_C( 36), -INT8_C( 44), INT8_C( 0), -INT8_C( 64), INT8_C( 32), -INT8_C( 48), INT8_C( 10), -INT8_C( 46), INT8_C( 110), INT8_C( 102), -INT8_C( 48), INT8_C( 72), INT8_C( 74) }, INT32_C( 12) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int8_t a = test_vec[i].a; simde_int8x16_t v = simde_vld1q_s8(test_vec[i].v); int lane = test_vec[i].lane; simde_int8x16_t r; SIMDE_CONSTIFY_16_(simde_vsetq_lane_s8, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); int lanes[] = { 0, 2, 4, 6, 9, 10, 11, 12 }; for (int i = 0 ; i < 8 ; i++) { int8_t a = simde_test_codegen_random_i8(); simde_int8x16_t v = simde_test_arm_neon_random_i8x16(); int lane = lanes[i]; simde_int8x16_t r; SIMDE_CONSTIFY_16_(simde_vsetq_lane_s8, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_codegen_write_i8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, v, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vsetq_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a; int16_t v[8]; int16_t r[8]; int lane; } test_vec[] = { { INT16_C( 4491), { -INT16_C( 7694), INT16_C( 31799), INT16_C( 16694), -INT16_C( 15473), -INT16_C( 6552), -INT16_C( 9167), -INT16_C( 11511), -INT16_C( 19751) }, { INT16_C( 4491), INT16_C( 31799), INT16_C( 16694), -INT16_C( 15473), -INT16_C( 6552), -INT16_C( 9167), -INT16_C( 11511), -INT16_C( 19751) }, INT32_C( 0) }, { -INT16_C( 24147), { -INT16_C( 11711), INT16_C( 7395), -INT16_C( 25881), -INT16_C( 25843), -INT16_C( 25342), -INT16_C( 29124), INT16_C( 11951), -INT16_C( 6545) }, { -INT16_C( 11711), -INT16_C( 24147), -INT16_C( 25881), -INT16_C( 25843), -INT16_C( 25342), -INT16_C( 29124), INT16_C( 11951), -INT16_C( 6545) }, INT32_C( 1) }, { -INT16_C( 23125), { INT16_C( 14887), -INT16_C( 28567), -INT16_C( 26080), INT16_C( 10604), INT16_C( 18030), INT16_C( 7131), INT16_C( 7655), -INT16_C( 13586) }, { INT16_C( 14887), -INT16_C( 28567), -INT16_C( 23125), INT16_C( 10604), INT16_C( 18030), INT16_C( 7131), INT16_C( 7655), -INT16_C( 13586) }, INT32_C( 2) }, { -INT16_C( 10951), { INT16_C( 18276), INT16_C( 26480), -INT16_C( 21020), -INT16_C( 27659), INT16_C( 25819), -INT16_C( 31110), -INT16_C( 24310), INT16_C( 29632) }, { INT16_C( 18276), INT16_C( 26480), -INT16_C( 21020), -INT16_C( 10951), INT16_C( 25819), -INT16_C( 31110), -INT16_C( 24310), INT16_C( 29632) }, INT32_C( 3) }, { -INT16_C( 8143), { -INT16_C( 25075), INT16_C( 31497), -INT16_C( 6684), -INT16_C( 13417), -INT16_C( 31486), INT16_C( 15254), -INT16_C( 1446), -INT16_C( 13694) }, { -INT16_C( 25075), INT16_C( 31497), -INT16_C( 6684), -INT16_C( 13417), -INT16_C( 8143), INT16_C( 15254), -INT16_C( 1446), -INT16_C( 13694) }, INT32_C( 4) }, { INT16_C( 26465), { INT16_C( 22135), INT16_C( 21498), INT16_C( 29883), -INT16_C( 14887), -INT16_C( 26090), INT16_C( 18232), INT16_C( 17786), -INT16_C( 31515) }, { INT16_C( 22135), INT16_C( 21498), INT16_C( 29883), -INT16_C( 14887), -INT16_C( 26090), INT16_C( 26465), INT16_C( 17786), -INT16_C( 31515) }, INT32_C( 5) }, { -INT16_C( 13887), { INT16_C( 22633), INT16_C( 27541), INT16_C( 11229), INT16_C( 14246), INT16_C( 10533), -INT16_C( 30975), INT16_C( 31120), -INT16_C( 29987) }, { INT16_C( 22633), INT16_C( 27541), INT16_C( 11229), INT16_C( 14246), INT16_C( 10533), -INT16_C( 30975), -INT16_C( 13887), -INT16_C( 29987) }, INT32_C( 6) }, { -INT16_C( 26420), { -INT16_C( 23041), INT16_C( 5469), -INT16_C( 27329), -INT16_C( 17828), INT16_C( 17115), -INT16_C( 25538), -INT16_C( 22773), -INT16_C( 24332) }, { -INT16_C( 23041), INT16_C( 5469), -INT16_C( 27329), -INT16_C( 17828), INT16_C( 17115), -INT16_C( 25538), -INT16_C( 22773), -INT16_C( 26420) }, INT32_C( 7) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int16_t a = test_vec[i].a; simde_int16x8_t v = simde_vld1q_s16(test_vec[i].v); int lane = test_vec[i].lane; simde_int16x8_t r; SIMDE_CONSTIFY_8_(simde_vsetq_lane_s16, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); int lanes[] = { 0, 1, 2, 3, 4, 5, 6, 7 }; for (int i = 0 ; i < 8 ; i++) { int16_t a = simde_test_codegen_random_i16(); simde_int16x8_t v = simde_test_arm_neon_random_i16x8(); int lane = lanes[i]; simde_int16x8_t r; SIMDE_CONSTIFY_8_(simde_vsetq_lane_s16, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_codegen_write_i16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, v, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vsetq_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a; int32_t v[4]; int32_t r[4]; int lane; } test_vec[] = { { INT32_C( 963230229), { -INT32_C( 66824687), -INT32_C( 1083756224), INT32_C( 656723093), -INT32_C( 1485455016) }, { INT32_C( 963230229), -INT32_C( 1083756224), INT32_C( 656723093), -INT32_C( 1485455016) }, INT32_C( 0) }, { -INT32_C( 554431575), { INT32_C( 944199529), INT32_C( 422117123), -INT32_C( 900558407), INT32_C( 684152807) }, { INT32_C( 944199529), -INT32_C( 554431575), -INT32_C( 900558407), INT32_C( 684152807) }, INT32_C( 1) }, { INT32_C( 501689992), { INT32_C( 1380191482), INT32_C( 1996143053), INT32_C( 760606404), INT32_C( 1231461445) }, { INT32_C( 1380191482), INT32_C( 1996143053), INT32_C( 501689992), INT32_C( 1231461445) }, INT32_C( 2) }, { INT32_C( 1432522395), { INT32_C( 119518239), -INT32_C( 1825577461), INT32_C( 246486548), -INT32_C( 278792926) }, { INT32_C( 119518239), -INT32_C( 1825577461), INT32_C( 246486548), INT32_C( 1432522395) }, INT32_C( 3) }, { INT32_C( 1936087983), { -INT32_C( 1885226167), -INT32_C( 220723369), -INT32_C( 1270400363), -INT32_C( 88381458) }, { INT32_C( 1936087983), -INT32_C( 220723369), -INT32_C( 1270400363), -INT32_C( 88381458) }, INT32_C( 0) }, { INT32_C( 1653467725), { INT32_C( 594558465), -INT32_C( 485240524), INT32_C( 1985378604), -INT32_C( 1962543308) }, { INT32_C( 594558465), INT32_C( 1653467725), INT32_C( 1985378604), -INT32_C( 1962543308) }, INT32_C( 1) }, { -INT32_C( 1820467714), { INT32_C( 88655127), INT32_C( 2046755628), -INT32_C( 270823954), -INT32_C( 15577909) }, { INT32_C( 88655127), INT32_C( 2046755628), -INT32_C( 1820467714), -INT32_C( 15577909) }, INT32_C( 2) }, { INT32_C( 1256334621), { -INT32_C( 759154274), INT32_C( 794674480), -INT32_C( 1178412126), -INT32_C( 859960672) }, { -INT32_C( 759154274), INT32_C( 794674480), -INT32_C( 1178412126), INT32_C( 1256334621) }, INT32_C( 3) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int32_t a = test_vec[i].a; simde_int32x4_t v = simde_vld1q_s32(test_vec[i].v); int lane = test_vec[i].lane; simde_int32x4_t r; SIMDE_CONSTIFY_4_(simde_vsetq_lane_s32, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); int lanes[] = { 0, 1, 2, 3, 0, 1, 2, 3 }; for (int i = 0 ; i < 8 ; i++) { int32_t a = simde_test_codegen_random_i32(); simde_int32x4_t v = simde_test_arm_neon_random_i32x4(); int lane = lanes[i]; simde_int32x4_t r; SIMDE_CONSTIFY_4_(simde_vsetq_lane_s32, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_codegen_write_i32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, v, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vsetq_lane_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a; int64_t v[2]; int64_t r[2]; int lane; } test_vec[] = { { -INT64_C( 6275848456464942524), { -INT64_C( 1023053585428821105), -INT64_C( 1413666450029352452) }, { -INT64_C( 6275848456464942524), -INT64_C( 1413666450029352452) }, INT32_C( 0) }, { INT64_C( 6704605406273618420), { -INT64_C( 8610410823455259451), INT64_C( 3681846863876511223) }, { -INT64_C( 8610410823455259451), INT64_C( 6704605406273618420) }, INT32_C( 1) }, { -INT64_C( 3586240954304351115), { -INT64_C( 6497837878180096675), INT64_C( 4382284209977202767) }, { -INT64_C( 3586240954304351115), INT64_C( 4382284209977202767) }, INT32_C( 0) }, { -INT64_C( 7980243183598333995), { INT64_C( 8455539187811154863), -INT64_C( 5694873878574442639) }, { INT64_C( 8455539187811154863), -INT64_C( 7980243183598333995) }, INT32_C( 1) }, { INT64_C( 5465340758585865814), { INT64_C( 7279672657599478617), INT64_C( 8367484709461026450) }, { INT64_C( 5465340758585865814), INT64_C( 8367484709461026450) }, INT32_C( 0) }, { INT64_C( 2211476088021191326), { -INT64_C( 3678153970188375757), INT64_C( 8568177333557611725) }, { -INT64_C( 3678153970188375757), INT64_C( 2211476088021191326) }, INT32_C( 1) }, { -INT64_C( 4853685099509524690), { INT64_C( 6570589300708895305), INT64_C( 5212064977732888853) }, { -INT64_C( 4853685099509524690), INT64_C( 5212064977732888853) }, INT32_C( 0) }, { INT64_C( 837586917696181829), { INT64_C( 5145047008699793291), -INT64_C( 6643223354434641333) }, { INT64_C( 5145047008699793291), INT64_C( 837586917696181829) }, INT32_C( 1) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int64_t a = test_vec[i].a; simde_int64x2_t v = simde_vld1q_s64(test_vec[i].v); int lane = test_vec[i].lane; simde_int64x2_t r; SIMDE_CONSTIFY_2_(simde_vsetq_lane_s64, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; for (int i = 0 ; i < 8 ; i++) { int64_t a = simde_test_codegen_random_i64(); simde_int64x2_t v = simde_test_arm_neon_random_i64x2(); int lane = lanes[i]; simde_int64x2_t r; SIMDE_CONSTIFY_2_(simde_vsetq_lane_s64, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_codegen_write_i64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, v, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vsetq_lane_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a; uint8_t v[16]; uint8_t r[16]; int lane; } test_vec[] = { { UINT8_C(238), { UINT8_C( 34), UINT8_C( 29), UINT8_C(146), UINT8_C( 44), UINT8_C(194), UINT8_C( 98), UINT8_C(109), UINT8_C(200), UINT8_C( 43), UINT8_C(150), UINT8_C( 94), UINT8_MAX, UINT8_C(180), UINT8_C(219), UINT8_C( 87), UINT8_C(201) }, { UINT8_C(238), UINT8_C( 29), UINT8_C(146), UINT8_C( 44), UINT8_C(194), UINT8_C( 98), UINT8_C(109), UINT8_C(200), UINT8_C( 43), UINT8_C(150), UINT8_C( 94), UINT8_MAX, UINT8_C(180), UINT8_C(219), UINT8_C( 87), UINT8_C(201) }, INT32_C( 0) }, { UINT8_C( 62), { UINT8_C(200), UINT8_C( 35), UINT8_C( 96), UINT8_C( 42), UINT8_C( 87), UINT8_C(172), UINT8_C(236), UINT8_C( 73), UINT8_C( 8), UINT8_C( 33), UINT8_C(247), UINT8_C( 76), UINT8_C( 81), UINT8_C(229), UINT8_C(110), UINT8_C(110) }, { UINT8_C(200), UINT8_C( 35), UINT8_C( 62), UINT8_C( 42), UINT8_C( 87), UINT8_C(172), UINT8_C(236), UINT8_C( 73), UINT8_C( 8), UINT8_C( 33), UINT8_C(247), UINT8_C( 76), UINT8_C( 81), UINT8_C(229), UINT8_C(110), UINT8_C(110) }, INT32_C( 2) }, { UINT8_C(119), { UINT8_C(154), UINT8_C( 48), UINT8_C(217), UINT8_C( 7), UINT8_C(248), UINT8_C( 5), UINT8_C(158), UINT8_C( 86), UINT8_C( 4), UINT8_C( 82), UINT8_C( 49), UINT8_C( 91), UINT8_C( 27), UINT8_C(112), UINT8_C( 35), UINT8_C( 63) }, { UINT8_C(154), UINT8_C( 48), UINT8_C(217), UINT8_C( 7), UINT8_C(119), UINT8_C( 5), UINT8_C(158), UINT8_C( 86), UINT8_C( 4), UINT8_C( 82), UINT8_C( 49), UINT8_C( 91), UINT8_C( 27), UINT8_C(112), UINT8_C( 35), UINT8_C( 63) }, INT32_C( 4) }, { UINT8_C(208), { UINT8_C( 77), UINT8_C(150), UINT8_C(124), UINT8_C( 57), UINT8_C(223), UINT8_C(133), UINT8_C( 90), UINT8_C(214), UINT8_C(209), UINT8_C(172), UINT8_C(187), UINT8_C( 63), UINT8_C( 26), UINT8_C( 51), UINT8_C(218), UINT8_C( 75) }, { UINT8_C( 77), UINT8_C(150), UINT8_C(124), UINT8_C( 57), UINT8_C(223), UINT8_C(133), UINT8_C(208), UINT8_C(214), UINT8_C(209), UINT8_C(172), UINT8_C(187), UINT8_C( 63), UINT8_C( 26), UINT8_C( 51), UINT8_C(218), UINT8_C( 75) }, INT32_C( 6) }, { UINT8_C( 12), { UINT8_C(225), UINT8_C( 67), UINT8_C( 17), UINT8_C(127), UINT8_C(154), UINT8_C( 21), UINT8_C(210), UINT8_C(203), UINT8_C(112), UINT8_C(237), UINT8_C( 59), UINT8_C(147), UINT8_C( 44), UINT8_C( 11), UINT8_C(224), UINT8_C(194) }, { UINT8_C(225), UINT8_C( 67), UINT8_C( 17), UINT8_C(127), UINT8_C(154), UINT8_C( 21), UINT8_C(210), UINT8_C(203), UINT8_C( 12), UINT8_C(237), UINT8_C( 59), UINT8_C(147), UINT8_C( 44), UINT8_C( 11), UINT8_C(224), UINT8_C(194) }, INT32_C( 8) }, { UINT8_C(136), { UINT8_C( 25), UINT8_C(162), UINT8_C( 13), UINT8_C(116), UINT8_C(120), UINT8_C(222), UINT8_C( 32), UINT8_C( 52), UINT8_C( 29), UINT8_C( 58), UINT8_C(103), UINT8_C(247), UINT8_C(133), UINT8_C(115), UINT8_C(217), UINT8_C(201) }, { UINT8_C( 25), UINT8_C(162), UINT8_C( 13), UINT8_C(116), UINT8_C(120), UINT8_C(222), UINT8_C( 32), UINT8_C( 52), UINT8_C( 29), UINT8_C( 58), UINT8_C(136), UINT8_C(247), UINT8_C(133), UINT8_C(115), UINT8_C(217), UINT8_C(201) }, INT32_C( 10) }, { UINT8_C(133), { UINT8_C( 88), UINT8_C( 99), UINT8_C(154), UINT8_C( 42), UINT8_C( 46), UINT8_C( 11), UINT8_C( 24), UINT8_C(106), UINT8_C(158), UINT8_C( 68), UINT8_C(117), UINT8_C(127), UINT8_C( 7), UINT8_C(253), UINT8_C(152), UINT8_C(169) }, { UINT8_C( 88), UINT8_C( 99), UINT8_C(154), UINT8_C( 42), UINT8_C( 46), UINT8_C( 11), UINT8_C( 24), UINT8_C(106), UINT8_C(158), UINT8_C( 68), UINT8_C(117), UINT8_C(127), UINT8_C(133), UINT8_C(253), UINT8_C(152), UINT8_C(169) }, INT32_C( 12) }, { UINT8_C( 10), { UINT8_C( 12), UINT8_C( 33), UINT8_C(232), UINT8_C( 44), UINT8_C( 85), UINT8_C( 6), UINT8_C(103), UINT8_C(188), UINT8_C(253), UINT8_C(236), UINT8_C( 48), UINT8_C(214), UINT8_C(181), UINT8_C(181), UINT8_C( 47), UINT8_C( 24) }, { UINT8_C( 12), UINT8_C( 33), UINT8_C(232), UINT8_C( 44), UINT8_C( 85), UINT8_C( 6), UINT8_C(103), UINT8_C(188), UINT8_C(253), UINT8_C(236), UINT8_C( 48), UINT8_C(214), UINT8_C(181), UINT8_C(181), UINT8_C( 10), UINT8_C( 24) }, INT32_C( 14) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint8_t a = test_vec[i].a; simde_uint8x16_t v = simde_vld1q_u8(test_vec[i].v); int lane = test_vec[i].lane; simde_uint8x16_t r; SIMDE_CONSTIFY_16_(simde_vsetq_lane_u8, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); int lanes[] = { 0, 2, 4, 6, 8, 10, 12, 14 }; for (int i = 0 ; i < 8 ; i++) { uint8_t a = simde_test_codegen_random_u8(); simde_uint8x16_t v = simde_test_arm_neon_random_u8x16(); int lane = lanes[i]; simde_uint8x16_t r; SIMDE_CONSTIFY_16_(simde_vsetq_lane_u8, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_codegen_write_u8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, v, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vsetq_lane_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a; uint16_t v[8]; uint16_t r[8]; int lane; } test_vec[] = { { UINT16_C(32446), { UINT16_C(19553), UINT16_C(61358), UINT16_C(14924), UINT16_C( 6732), UINT16_C(13757), UINT16_C(36062), UINT16_C(42840), UINT16_C(27459) }, { UINT16_C(32446), UINT16_C(61358), UINT16_C(14924), UINT16_C( 6732), UINT16_C(13757), UINT16_C(36062), UINT16_C(42840), UINT16_C(27459) }, INT32_C( 0) }, { UINT16_C(48560), { UINT16_C(32777), UINT16_C(43662), UINT16_C(65055), UINT16_C( 2648), UINT16_C(36938), UINT16_C( 2492), UINT16_C( 7695), UINT16_C(48469) }, { UINT16_C(32777), UINT16_C(48560), UINT16_C(65055), UINT16_C( 2648), UINT16_C(36938), UINT16_C( 2492), UINT16_C( 7695), UINT16_C(48469) }, INT32_C( 1) }, { UINT16_C(41229), { UINT16_C(23031), UINT16_C(46267), UINT16_C(39566), UINT16_C(58944), UINT16_C(33601), UINT16_C(61778), UINT16_C(23360), UINT16_C(53106) }, { UINT16_C(23031), UINT16_C(46267), UINT16_C(41229), UINT16_C(58944), UINT16_C(33601), UINT16_C(61778), UINT16_C(23360), UINT16_C(53106) }, INT32_C( 2) }, { UINT16_C(37126), { UINT16_C(24269), UINT16_C( 6043), UINT16_C(22767), UINT16_C(65056), UINT16_C(30326), UINT16_C(33723), UINT16_C(45591), UINT16_C(54237) }, { UINT16_C(24269), UINT16_C( 6043), UINT16_C(22767), UINT16_C(37126), UINT16_C(30326), UINT16_C(33723), UINT16_C(45591), UINT16_C(54237) }, INT32_C( 3) }, { UINT16_C(27494), { UINT16_C(42605), UINT16_C(44626), UINT16_C(42025), UINT16_C(27295), UINT16_C( 4607), UINT16_C( 1337), UINT16_C( 1698), UINT16_C(15972) }, { UINT16_C(42605), UINT16_C(44626), UINT16_C(42025), UINT16_C(27295), UINT16_C(27494), UINT16_C( 1337), UINT16_C( 1698), UINT16_C(15972) }, INT32_C( 4) }, { UINT16_C(21277), { UINT16_C(16022), UINT16_C( 3153), UINT16_C( 3252), UINT16_C(52111), UINT16_C(27838), UINT16_C( 9374), UINT16_C( 3032), UINT16_C(10954) }, { UINT16_C(16022), UINT16_C( 3153), UINT16_C( 3252), UINT16_C(52111), UINT16_C(27838), UINT16_C(21277), UINT16_C( 3032), UINT16_C(10954) }, INT32_C( 5) }, { UINT16_C(62393), { UINT16_C(22990), UINT16_C(52573), UINT16_C(38506), UINT16_C( 3539), UINT16_C(14236), UINT16_C(47691), UINT16_C(57738), UINT16_C(56312) }, { UINT16_C(22990), UINT16_C(52573), UINT16_C(38506), UINT16_C( 3539), UINT16_C(14236), UINT16_C(47691), UINT16_C(62393), UINT16_C(56312) }, INT32_C( 6) }, { UINT16_C(44269), { UINT16_C(31975), UINT16_C(42359), UINT16_C( 5865), UINT16_C(49609), UINT16_C(37665), UINT16_C(56299), UINT16_C(47494), UINT16_C(58420) }, { UINT16_C(31975), UINT16_C(42359), UINT16_C( 5865), UINT16_C(49609), UINT16_C(37665), UINT16_C(56299), UINT16_C(47494), UINT16_C(44269) }, INT32_C( 7) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint16_t a = test_vec[i].a; simde_uint16x8_t v = simde_vld1q_u16(test_vec[i].v); int lane = test_vec[i].lane; simde_uint16x8_t r; SIMDE_CONSTIFY_8_(simde_vsetq_lane_u16, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); int lanes[] = { 0, 1, 2, 3, 4, 5, 6, 7 }; for (int i = 0 ; i < 8 ; i++) { uint16_t a = simde_test_codegen_random_u16(); simde_uint16x8_t v = simde_test_arm_neon_random_u16x8(); int lane = lanes[i]; simde_uint16x8_t r; SIMDE_CONSTIFY_8_(simde_vsetq_lane_u16, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_codegen_write_u16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, v, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vsetq_lane_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a; uint32_t v[4]; uint32_t r[4]; int lane; } test_vec[] = { { UINT32_C(1099264639), { UINT32_C(1082970218), UINT32_C(2894578869), UINT32_C(4293238305), UINT32_C(1201326841) }, { UINT32_C(1099264639), UINT32_C(2894578869), UINT32_C(4293238305), UINT32_C(1201326841) }, INT32_C( 0) }, { UINT32_C(2013389258), { UINT32_C( 494535141), UINT32_C(3369462601), UINT32_C(3339278941), UINT32_C(3741881642) }, { UINT32_C( 494535141), UINT32_C(2013389258), UINT32_C(3339278941), UINT32_C(3741881642) }, INT32_C( 1) }, { UINT32_C(2056032089), { UINT32_C( 645558573), UINT32_C( 107877435), UINT32_C(3682496502), UINT32_C(3136878705) }, { UINT32_C( 645558573), UINT32_C( 107877435), UINT32_C(2056032089), UINT32_C(3136878705) }, INT32_C( 2) }, { UINT32_C(1099091428), { UINT32_C(1376357160), UINT32_C(2050101537), UINT32_C(3472211616), UINT32_C(1811181359) }, { UINT32_C(1376357160), UINT32_C(2050101537), UINT32_C(3472211616), UINT32_C(1099091428) }, INT32_C( 3) }, { UINT32_C(2037473923), { UINT32_C(1129639890), UINT32_C(3422375399), UINT32_C(1108180762), UINT32_C( 731190794) }, { UINT32_C(2037473923), UINT32_C(3422375399), UINT32_C(1108180762), UINT32_C( 731190794) }, INT32_C( 0) }, { UINT32_C(3349595943), { UINT32_C(3029703557), UINT32_C(2367654410), UINT32_C(3188166892), UINT32_C(1728142207) }, { UINT32_C(3029703557), UINT32_C(3349595943), UINT32_C(3188166892), UINT32_C(1728142207) }, INT32_C( 1) }, { UINT32_C(3274899112), { UINT32_C(2282045309), UINT32_C(2092145237), UINT32_C(3863239009), UINT32_C(4271626740) }, { UINT32_C(2282045309), UINT32_C(2092145237), UINT32_C(3274899112), UINT32_C(4271626740) }, INT32_C( 2) }, { UINT32_C(1351400035), { UINT32_C(3389952843), UINT32_C(2536575214), UINT32_C(2354734094), UINT32_C(4178861987) }, { UINT32_C(3389952843), UINT32_C(2536575214), UINT32_C(2354734094), UINT32_C(1351400035) }, INT32_C( 3) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint32_t a = test_vec[i].a; simde_uint32x4_t v = simde_vld1q_u32(test_vec[i].v); int lane = test_vec[i].lane; simde_uint32x4_t r; SIMDE_CONSTIFY_4_(simde_vsetq_lane_u32, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); int lanes[] = { 0, 1, 2, 3, 0, 1, 2, 3 }; for (int i = 0 ; i < 8 ; i++) { uint32_t a = simde_test_codegen_random_u32(); simde_uint32x4_t v = simde_test_arm_neon_random_u32x4(); int lane = lanes[i]; simde_uint32x4_t r; SIMDE_CONSTIFY_4_(simde_vsetq_lane_u32, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_codegen_write_u32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, v, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vsetq_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a; uint64_t v[2]; uint64_t r[2]; int lane; } test_vec[] = { { UINT64_C( 5390401428211676195), { UINT64_C(14829740467812854006), UINT64_C( 6030377659942938448) }, { UINT64_C( 5390401428211676195), UINT64_C( 6030377659942938448) }, INT32_C( 0) }, { UINT64_C(16865715377620681228), { UINT64_C(16327746488354496450), UINT64_C(17394597901878546230) }, { UINT64_C(16327746488354496450), UINT64_C(16865715377620681228) }, INT32_C( 1) }, { UINT64_C(18413438712080547758), { UINT64_C( 5178889453871920686), UINT64_C(16377951018670153595) }, { UINT64_C(18413438712080547758), UINT64_C(16377951018670153595) }, INT32_C( 0) }, { UINT64_C( 4075162322782160122), { UINT64_C( 7175633015332112801), UINT64_C( 4123669697782129006) }, { UINT64_C( 7175633015332112801), UINT64_C( 4075162322782160122) }, INT32_C( 1) }, { UINT64_C(17557542605282201111), { UINT64_C(12047721669956871279), UINT64_C( 593579713905861028) }, { UINT64_C(17557542605282201111), UINT64_C( 593579713905861028) }, INT32_C( 0) }, { UINT64_C(16386687870275654770), { UINT64_C( 7345177435188955651), UINT64_C( 4477019592935308915) }, { UINT64_C( 7345177435188955651), UINT64_C(16386687870275654770) }, INT32_C( 1) }, { UINT64_C(11661319370131856691), { UINT64_C( 763142462340452840), UINT64_C( 188859325098639172) }, { UINT64_C(11661319370131856691), UINT64_C( 188859325098639172) }, INT32_C( 0) }, { UINT64_C(14000456004831804788), { UINT64_C(11513844660171662330), UINT64_C( 1749344802057787957) }, { UINT64_C(11513844660171662330), UINT64_C(14000456004831804788) }, INT32_C( 1) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint64_t a = test_vec[i].a; simde_uint64x2_t v = simde_vld1q_u64(test_vec[i].v); int lane = test_vec[i].lane; simde_uint64x2_t r; SIMDE_CONSTIFY_2_(simde_vsetq_lane_u64, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; uint64_t a = simde_test_codegen_random_u64(); simde_uint64x2_t v = simde_test_arm_neon_random_u64x2(); int lane = lanes[i]; simde_uint64x2_t r; SIMDE_CONSTIFY_2_(simde_vsetq_lane_u64, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); simde_test_codegen_write_u64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, v, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } HEDLEY_DIAGNOSTIC_POP SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vset_lane_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vset_lane_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vset_lane_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vset_lane_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vset_lane_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vset_lane_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vset_lane_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vset_lane_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vset_lane_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vset_lane_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vsetq_lane_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vsetq_lane_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vsetq_lane_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vsetq_lane_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vsetq_lane_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vsetq_lane_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vsetq_lane_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vsetq_lane_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vsetq_lane_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vsetq_lane_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/shl.c000066400000000000000000001442071400333146700162610ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN shl #include "test-neon.h" #include "../../../simde/arm/neon/shl.h" static int test_simde_vshl_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int8_t b[8]; int8_t r[8]; } test_vec[] = { { { -INT8_C( 50), -INT8_C( 52), -INT8_C( 3), -INT8_C( 40), INT8_C( 58), -INT8_C( 5), INT8_C( 35), INT8_C( 63) }, { INT8_C( 4), INT8_C( 6), -INT8_C( 40), -INT8_C( 26), INT8_C( 8), -INT8_C( 8), INT8_C( 1), -INT8_C( 6) }, { -INT8_C( 32), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 70), INT8_C( 0) } }, { { INT8_C( 97), INT8_C( 31), INT8_C( 25), INT8_C( 62), -INT8_C( 88), INT8_C( 87), -INT8_C( 43), INT8_C( 118) }, { INT8_C( 35), -INT8_C( 6), INT8_C( 78), INT8_C( 5), -INT8_C( 51), INT8_C( 7), -INT8_C( 7), -INT8_C( 6) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 64), -INT8_C( 1), INT8_MIN, -INT8_C( 1), INT8_C( 1) } }, { { INT8_C( 100), -INT8_C( 89), -INT8_C( 100), INT8_C( 15), -INT8_C( 42), -INT8_C( 88), INT8_C( 101), INT8_C( 56) }, { -INT8_C( 8), INT8_C( 7), INT8_C( 7), INT8_C( 6), -INT8_C( 6), INT8_C( 4), -INT8_C( 7), -INT8_C( 7) }, { INT8_C( 0), INT8_MIN, INT8_C( 0), -INT8_C( 64), -INT8_C( 1), INT8_MIN, INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 104), INT8_C( 126), -INT8_C( 112), -INT8_C( 51), INT8_C( 61), -INT8_C( 110), INT8_C( 116), -INT8_C( 95) }, { INT8_C( 7), INT8_C( 8), -INT8_C( 80), INT8_C( 7), -INT8_C( 5), INT8_C( 5), INT8_C( 8), INT8_C( 7) }, { INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_MIN, INT8_C( 1), INT8_C( 64), INT8_C( 0), INT8_MIN } }, { { INT8_C( 9), -INT8_C( 69), INT8_C( 17), -INT8_C( 81), -INT8_C( 82), -INT8_C( 46), INT8_C( 39), INT8_C( 22) }, { INT8_C( 81), -INT8_C( 5), -INT8_C( 8), -INT8_C( 8), INT8_C( 73), INT8_C( 5), INT8_C( 47), -INT8_C( 8) }, { INT8_C( 0), -INT8_C( 3), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 64), INT8_C( 0), INT8_C( 0) } }, { { -INT8_C( 105), -INT8_C( 113), -INT8_C( 13), -INT8_C( 97), INT8_C( 100), INT8_C( 87), -INT8_C( 59), INT8_C( 110) }, { INT8_C( 4), -INT8_C( 6), INT8_C( 7), -INT8_C( 8), -INT8_C( 6), INT8_C( 8), -INT8_C( 41), -INT8_C( 6) }, { INT8_C( 112), -INT8_C( 2), INT8_MIN, -INT8_C( 1), INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 1) } }, { { -INT8_C( 106), INT8_C( 90), -INT8_C( 100), -INT8_C( 118), INT8_C( 52), INT8_C( 61), INT8_C( 19), -INT8_C( 53) }, { -INT8_C( 7), INT8_C( 7), INT8_C( 6), INT8_C( 6), INT8_C( 5), INT8_C( 6), -INT8_C( 7), INT8_C( 7) }, { -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_MIN, INT8_MIN, INT8_C( 64), INT8_C( 0), INT8_MIN } }, { { -INT8_C( 60), INT8_C( 49), INT8_C( 68), -INT8_C( 41), -INT8_C( 24), INT8_C( 12), INT8_C( 83), INT8_C( 126) }, { INT8_C( 6), -INT8_C( 5), INT8_C( 4), -INT8_C( 7), INT8_C( 44), INT8_C( 7), INT8_C( 102), -INT8_C( 7) }, { INT8_C( 0), INT8_C( 1), INT8_C( 64), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vshl_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; } static int test_simde_vshl_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { { { INT16_C( 22332), -INT16_C( 2389), -INT16_C( 6176), INT16_C( 24298) }, { -INT16_C( 11), -INT16_C( 12), INT16_C( 535), INT16_C( 14) }, { INT16_C( 10), -INT16_C( 1), INT16_C( 0), INT16_MIN } }, { { -INT16_C( 30833), -INT16_C( 3392), INT16_C( 7263), INT16_C( 1769) }, { INT16_C( 15), -INT16_C( 10006), -INT16_C( 26926), -INT16_C( 19506) }, { INT16_MIN, -INT16_C( 1), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 24162), INT16_C( 31020), -INT16_C( 13216), -INT16_C( 28236) }, { INT16_C( 9), INT16_C( 11), INT16_C( 16), INT16_C( 2895) }, { -INT16_C( 15360), INT16_C( 24576), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 844), -INT16_C( 30766), -INT16_C( 24430), INT16_C( 3898) }, { INT16_C( 9), -INT16_C( 9), -INT16_C( 16), INT16_C( 2356) }, { INT16_C( 26624), -INT16_C( 61), -INT16_C( 1), INT16_C( 0) } }, { { INT16_C( 7438), -INT16_C( 17736), INT16_C( 2143), -INT16_C( 16698) }, { -INT16_C( 10), -INT16_C( 11), -INT16_C( 15), INT16_C( 12) }, { INT16_C( 7), -INT16_C( 9), INT16_C( 0), INT16_C( 24576) } }, { { INT16_C( 2818), -INT16_C( 21698), INT16_C( 29388), INT16_C( 22965) }, { INT16_C( 10), INT16_C( 27061), INT16_C( 13), -INT16_C( 13277) }, { INT16_C( 2048), -INT16_C( 1), INT16_MIN, INT16_C( 0) } }, { { INT16_C( 17345), -INT16_C( 26964), INT16_C( 10446), -INT16_C( 1539) }, { INT16_C( 14), -INT16_C( 7811), -INT16_C( 9), INT16_C( 19084) }, { INT16_C( 16384), INT16_C( 0), INT16_C( 20), -INT16_C( 1) } }, { { INT16_C( 22929), -INT16_C( 270), INT16_C( 5830), INT16_C( 15563) }, { INT16_C( 10), -INT16_C( 15886), -INT16_C( 13), INT16_C( 12) }, { INT16_C( 17408), -INT16_C( 1), INT16_C( 0), -INT16_C( 20480) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_vshl_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; } static int test_simde_vshl_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { { { INT32_C( 1402414275), -INT32_C( 1645897146) }, { -INT32_C( 2041372483), INT32_C( 27) }, { INT32_C( 0), INT32_C( 805306368) } }, { { -INT32_C( 337674635), INT32_C( 1447925697) }, { -INT32_C( 22), INT32_C( 24) }, { -INT32_C( 81), -INT32_C( 1056964608) } }, { { -INT32_C( 1179497220), INT32_C( 2067727725) }, { INT32_C( 464721261), -INT32_C( 409327608) }, { INT32_C( 0), INT32_C( 1057320192) } }, { { INT32_C( 1773232254), -INT32_C( 1441594400) }, { INT32_C( 31), -INT32_C( 23) }, { INT32_C( 0), -INT32_C( 172) } }, { { INT32_C( 448004524), INT32_C( 506816278) }, { -INT32_C( 24), -INT32_C( 18) }, { INT32_C( 26), INT32_C( 1933) } }, { { -INT32_C( 141688609), -INT32_C( 680092959) }, { INT32_C( 24), INT32_C( 23) }, { -INT32_C( 553648128), INT32_C( 1887436800) } }, { { INT32_C( 614038165), INT32_C( 1976016714) }, { INT32_C( 32), INT32_C( 30) }, { INT32_C( 0), INT32_MIN } }, { { INT32_C( 2026479318), INT32_C( 1507608799) }, { -INT32_C( 18), INT32_C( 21) }, { INT32_C( 7730), INT32_C( 467664896) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_vshl_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; } static int test_simde_vshl_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[1]; int64_t b[1]; int64_t r[1]; } test_vec[] = { { { -INT64_C( 4131760704340726406) }, { -INT64_C( 51) }, { -INT64_C( 1835) } }, { { INT64_C( 4795347804945835666) }, { -INT64_C( 8617733424538507080) }, { INT64_C( 0) } }, { { -INT64_C( 2987477723010756817) }, { -INT64_C( 49) }, { -INT64_C( 5307) } }, { { -INT64_C( 834069088546172233) }, { INT64_C( 62) }, { -INT64_C( 4611686018427387904) } }, { { -INT64_C( 3249651920531950297) }, { INT64_C( 2418075451758470935) }, { INT64_C( 7338760155284111360) } }, { { INT64_C( 5243402359250859729) }, { -INT64_C( 47) }, { INT64_C( 37256) } }, { { -INT64_C( 5953442574285360237) }, { INT64_C( 33) }, { -INT64_C( 1003907829020491776) } }, { { INT64_C( 7218011237552599699) }, { -INT64_C( 36) }, { INT64_C( 105035887) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_int64x1_t r = simde_vshl_s64(a, b); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; } static int test_simde_vshl_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; int8_t b[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C(175), UINT8_C(152), UINT8_C(126), UINT8_C( 1), UINT8_C(164), UINT8_C( 17), UINT8_C(164), UINT8_C( 72) }, { -INT8_C( 8), INT8_C( 7), -INT8_C( 8), -INT8_C( 7), -INT8_C( 7), -INT8_C( 8), INT8_C( 7), INT8_C( 6) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(189), UINT8_C(130), UINT8_C(234), UINT8_C(197), UINT8_C(247), UINT8_C( 15), UINT8_C( 90), UINT8_C(166) }, { -INT8_C( 6), -INT8_C( 40), -INT8_C( 89), INT8_C( 4), -INT8_C( 6), INT8_C( 4), -INT8_C( 7), -INT8_C( 5) }, { UINT8_C( 2), UINT8_C( 0), UINT8_C( 0), UINT8_C( 80), UINT8_C( 3), UINT8_C(240), UINT8_C( 0), UINT8_C( 5) } }, { { UINT8_C(173), UINT8_C( 5), UINT8_C(173), UINT8_C(224), UINT8_C( 34), UINT8_C(193), UINT8_C(253), UINT8_C(223) }, { INT8_C( 68), -INT8_C( 6), -INT8_C( 6), INT8_C( 7), -INT8_C( 5), -INT8_C( 1), -INT8_C( 30), -INT8_C( 7) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 2), UINT8_C( 0), UINT8_C( 1), UINT8_C( 96), UINT8_C( 0), UINT8_C( 1) } }, { { UINT8_C(149), UINT8_C( 67), UINT8_C(249), UINT8_C( 57), UINT8_C( 39), UINT8_C(110), UINT8_C( 16), UINT8_C(213) }, { INT8_C( 7), -INT8_C( 5), -INT8_C( 75), -INT8_C( 7), INT8_MAX, -INT8_C( 5), INT8_C( 7), -INT8_C( 61) }, { UINT8_C(128), UINT8_C( 2), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 3), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(106), UINT8_C( 26), UINT8_C(178), UINT8_C( 63), UINT8_C(152), UINT8_C( 78), UINT8_C(222), UINT8_C( 45) }, { -INT8_C( 7), -INT8_C( 5), INT8_C( 6), -INT8_C( 5), INT8_C( 70), INT8_C( 119), -INT8_C( 8), -INT8_C( 5) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C(128), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1) } }, { { UINT8_C(225), UINT8_C(119), UINT8_C( 36), UINT8_C(251), UINT8_C( 88), UINT8_C( 84), UINT8_C(236), UINT8_C(195) }, { INT8_C( 6), -INT8_C( 7), INT8_C( 2), INT8_C( 7), -INT8_C( 5), -INT8_C( 8), INT8_C( 52), INT8_C( 7) }, { UINT8_C( 64), UINT8_C( 0), UINT8_C(144), UINT8_C(128), UINT8_C( 2), UINT8_C( 0), UINT8_C( 0), UINT8_C(128) } }, { { UINT8_C( 11), UINT8_C( 11), UINT8_C(253), UINT8_C( 2), UINT8_C(210), UINT8_C(118), UINT8_C(148), UINT8_C(179) }, { -INT8_C( 19), -INT8_C( 5), -INT8_C( 82), INT8_C( 8), INT8_C( 12), -INT8_C( 7), INT8_C( 4), INT8_C( 7) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 64), UINT8_C(128) } }, { { UINT8_C( 82), UINT8_C(221), UINT8_C(165), UINT8_C(101), UINT8_C(164), UINT8_C( 95), UINT8_C(174), UINT8_C(175) }, { INT8_C( 106), -INT8_C( 6), -INT8_C( 5), INT8_C( 60), INT8_C( 8), INT8_C( 8), -INT8_C( 5), INT8_C( 7) }, { UINT8_C( 0), UINT8_C( 3), UINT8_C( 5), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 5), UINT8_C(128) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_uint8x8_t r = simde_vshl_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; } static int test_simde_vshl_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; int16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(56931), UINT16_C(23193), UINT16_C( 9883), UINT16_C(43704) }, { -INT16_C( 3137), INT16_C( 14), INT16_C( 13), -INT16_C( 15) }, { UINT16_C( 0), UINT16_C(16384), UINT16_C(24576), UINT16_C( 1) } }, { { UINT16_C(29266), UINT16_C(21354), UINT16_C(31280), UINT16_C(41289) }, { -INT16_C( 9), INT16_C( 13), INT16_C( 16), INT16_C( 10) }, { UINT16_C( 57), UINT16_C(16384), UINT16_C( 0), UINT16_C( 9216) } }, { { UINT16_C(37644), UINT16_C( 2112), UINT16_C(22945), UINT16_C(21910) }, { INT16_C( 16), INT16_C( 21362), -INT16_C( 9070), -INT16_C( 16) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(60364), UINT16_C(34815), UINT16_C(24588), UINT16_C(21725) }, { -INT16_C( 9), -INT16_C( 31142), -INT16_C( 13), INT16_C( 8) }, { UINT16_C( 117), UINT16_C( 0), UINT16_C( 3), UINT16_C(56576) } }, { { UINT16_C(55365), UINT16_C(55369), UINT16_C(61620), UINT16_C( 2714) }, { -INT16_C( 10), -INT16_C( 11), INT16_C( 15), -INT16_C( 10) }, { UINT16_C( 54), UINT16_C( 27), UINT16_C( 0), UINT16_C( 2) } }, { { UINT16_C(42220), UINT16_C(23908), UINT16_C(62271), UINT16_C(13422) }, { -INT16_C( 11), INT16_C( 23850), INT16_C( 29613), INT16_C( 12) }, { UINT16_C( 20), UINT16_C( 0), UINT16_C( 0), UINT16_C(57344) } }, { { UINT16_C(28878), UINT16_C(47086), UINT16_C( 8563), UINT16_C(54957) }, { -INT16_C( 9), INT16_C( 14), INT16_C( 12), -INT16_C( 10) }, { UINT16_C( 56), UINT16_C(32768), UINT16_C(12288), UINT16_C( 53) } }, { { UINT16_C(14781), UINT16_C(27520), UINT16_C(46765), UINT16_C( 4301) }, { INT16_C( 14), INT16_C( 10), INT16_C( 16), INT16_C( 14) }, { UINT16_C(16384), UINT16_C( 0), UINT16_C( 0), UINT16_C(16384) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_uint16x4_t r = simde_vshl_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; } static int test_simde_vshl_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; int32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C(4031652633), UINT32_C( 311972277) }, { -INT32_C( 1862162264), -INT32_C( 27) }, { UINT32_C( 0), UINT32_C( 2) } }, { { UINT32_C(1875928802), UINT32_C(1012393445) }, { INT32_C( 22), INT32_C( 458250701) }, { UINT32_C(3095396352), UINT32_C( 0) } }, { { UINT32_C( 337109612), UINT32_C( 631576714) }, { -INT32_C( 22), -INT32_C( 28) }, { UINT32_C( 80), UINT32_C( 2) } }, { { UINT32_C(1865639127), UINT32_C(3265033205) }, { INT32_C( 400420045), INT32_C( 17) }, { UINT32_C( 0), UINT32_C(3890872320) } }, { { UINT32_C(2728985120), UINT32_C(3863832004) }, { INT32_C( 282615555), -INT32_C( 1988841386) }, { UINT32_C( 357044480), UINT32_C( 0) } }, { { UINT32_C(3391863805), UINT32_C(2564884647) }, { -INT32_C( 21), INT32_C( 17) }, { UINT32_C( 1617), UINT32_C( 290324480) } }, { { UINT32_C(1885057389), UINT32_C(1719677712) }, { INT32_C( 31), -INT32_C( 30) }, { UINT32_C(2147483648), UINT32_C( 1) } }, { { UINT32_C(3825112978), UINT32_C(3499270002) }, { -INT32_C( 31), INT32_C( 19) }, { UINT32_C( 1), UINT32_C(4220518400) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_uint32x2_t r = simde_vshl_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; } static int test_simde_vshl_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[1]; int64_t b[1]; uint64_t r[1]; } test_vec[] = { { { UINT64_C(11696413056005045141) }, { -INT64_C( 44) }, { UINT64_C( 664864) } }, { { UINT64_C( 3547630782645285736) }, { INT64_C( 53) }, { UINT64_C( 7854277750134145024) } }, { { UINT64_C( 4095866130771274042) }, { INT64_C( 51) }, { UINT64_C( 5318751159924555776) } }, { { UINT64_C( 8075566222472937271) }, { INT64_C( 7690559925448101003) }, { UINT64_C( 0) } }, { { UINT64_C(15518374271611565638) }, { -INT64_C( 37) }, { UINT64_C( 112911033) } }, { { UINT64_C( 3189460018293476581) }, { INT64_C( 2719345347016508604) }, { UINT64_C( 0) } }, { { UINT64_C( 2268110154784134107) }, { INT64_C( 59) }, { UINT64_C(15564440312192434176) } }, { { UINT64_C(16898964182742939517) }, { INT64_C( 44) }, { UINT64_C( 1402818507367776256) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_uint64x1_t r = simde_vshl_u64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; } static int test_simde_vshlq_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int8_t b[16]; int8_t r[16]; } test_vec[] = { { { INT8_C( 77), -INT8_C( 116), INT8_C( 46), -INT8_C( 111), -INT8_C( 34), INT8_C( 122), INT8_C( 15), -INT8_C( 95), -INT8_C( 37), -INT8_C( 96), -INT8_C( 54), -INT8_C( 88), -INT8_C( 94), INT8_C( 117), -INT8_C( 66), -INT8_C( 40) }, { -INT8_C( 8), -INT8_C( 7), -INT8_C( 5), -INT8_C( 111), -INT8_C( 7), INT8_C( 6), INT8_C( 11), -INT8_C( 92), INT8_C( 7), -INT8_C( 8), INT8_C( 126), INT8_C( 5), -INT8_C( 5), INT8_C( 5), INT8_C( 6), INT8_C( 5) }, { INT8_C( 0), -INT8_C( 1), INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_MIN, INT8_C( 0), -INT8_C( 1), INT8_MIN, -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 3), -INT8_C( 96), INT8_MIN, INT8_C( 0) } }, { { -INT8_C( 29), -INT8_C( 92), -INT8_C( 119), -INT8_C( 77), INT8_C( 17), -INT8_C( 108), INT8_C( 87), -INT8_C( 119), INT8_C( 26), -INT8_C( 43), -INT8_C( 97), -INT8_C( 7), INT8_C( 2), INT8_C( 3), INT8_C( 37), -INT8_C( 69) }, { -INT8_C( 7), -INT8_C( 8), INT8_C( 5), -INT8_C( 6), -INT8_C( 5), -INT8_C( 8), -INT8_C( 8), INT8_C( 7), -INT8_C( 8), INT8_C( 6), INT8_C( 47), INT8_C( 6), INT8_C( 103), INT8_C( 5), -INT8_C( 2), INT8_C( 4) }, { -INT8_C( 1), -INT8_C( 1), INT8_C( 32), -INT8_C( 2), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_C( 64), INT8_C( 0), INT8_C( 64), INT8_C( 0), INT8_C( 96), INT8_C( 9), -INT8_C( 80) } }, { { INT8_C( 56), INT8_C( 59), INT8_C( 49), -INT8_C( 23), -INT8_C( 57), -INT8_C( 69), INT8_C( 8), INT8_C( 85), -INT8_C( 44), INT8_C( 55), INT8_C( 92), INT8_C( 60), INT8_C( 79), INT8_C( 90), -INT8_C( 121), INT8_C( 10) }, { -INT8_C( 8), -INT8_C( 123), -INT8_C( 6), -INT8_C( 1), -INT8_C( 5), INT8_C( 5), INT8_C( 53), INT8_C( 4), INT8_C( 8), INT8_C( 101), INT8_C( 6), INT8_C( 6), -INT8_C( 5), INT8_C( 6), -INT8_C( 6), -INT8_C( 7) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 12), -INT8_C( 2), INT8_C( 96), INT8_C( 0), INT8_C( 80), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 2), INT8_MIN, -INT8_C( 2), INT8_C( 0) } }, { { -INT8_C( 78), INT8_C( 77), INT8_C( 10), -INT8_C( 116), INT8_C( 122), INT8_C( 63), -INT8_C( 112), -INT8_C( 99), -INT8_C( 92), -INT8_C( 62), -INT8_C( 72), INT8_C( 94), -INT8_C( 36), INT8_C( 97), INT8_C( 81), INT8_C( 49) }, { INT8_C( 60), INT8_C( 45), INT8_C( 4), -INT8_C( 6), INT8_C( 17), -INT8_C( 8), INT8_C( 7), INT8_C( 5), -INT8_C( 8), -INT8_C( 31), -INT8_C( 7), -INT8_C( 5), INT8_C( 7), INT8_C( 7), -INT8_C( 63), -INT8_C( 8) }, { INT8_C( 0), INT8_C( 0), -INT8_C( 96), -INT8_C( 2), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 96), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 2), INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_C( 0) } }, { { -INT8_C( 116), -INT8_C( 6), -INT8_C( 103), -INT8_C( 99), -INT8_C( 70), -INT8_C( 43), -INT8_C( 53), INT8_C( 72), -INT8_C( 74), INT8_C( 99), -INT8_C( 1), -INT8_C( 59), INT8_C( 113), -INT8_C( 64), -INT8_C( 123), -INT8_C( 52) }, { -INT8_C( 8), -INT8_C( 6), -INT8_C( 94), -INT8_C( 7), -INT8_C( 6), INT8_C( 5), INT8_C( 8), INT8_C( 4), INT8_C( 7), INT8_C( 5), -INT8_C( 7), -INT8_C( 7), -INT8_C( 5), INT8_C( 7), -INT8_C( 7), INT8_C( 7) }, { -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 2), -INT8_C( 96), INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_C( 96), -INT8_C( 1), -INT8_C( 1), INT8_C( 3), INT8_C( 0), -INT8_C( 1), INT8_C( 0) } }, { { -INT8_C( 44), -INT8_C( 21), INT8_C( 85), -INT8_C( 125), INT8_C( 0), -INT8_C( 100), -INT8_C( 47), INT8_C( 63), -INT8_C( 16), -INT8_C( 102), INT8_C( 10), -INT8_C( 92), INT8_C( 16), -INT8_C( 99), -INT8_C( 29), INT8_MIN }, { -INT8_C( 7), -INT8_C( 8), -INT8_C( 6), -INT8_C( 7), INT8_C( 6), INT8_C( 7), -INT8_C( 125), INT8_C( 7), -INT8_C( 113), INT8_C( 0), -INT8_C( 8), -INT8_C( 8), INT8_C( 2), INT8_C( 7), -INT8_C( 8), -INT8_C( 6) }, { -INT8_C( 1), -INT8_C( 1), INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_MIN, -INT8_C( 1), -INT8_C( 102), INT8_C( 0), -INT8_C( 1), INT8_C( 64), INT8_MIN, -INT8_C( 1), -INT8_C( 2) } }, { { -INT8_C( 86), INT8_C( 1), -INT8_C( 96), INT8_C( 19), INT8_C( 30), INT8_C( 35), -INT8_C( 121), -INT8_C( 82), INT8_C( 35), INT8_C( 120), INT8_C( 112), INT8_C( 38), -INT8_C( 78), -INT8_C( 16), -INT8_C( 3), -INT8_C( 40) }, { -INT8_C( 8), INT8_C( 5), -INT8_C( 2), INT8_C( 7), -INT8_C( 8), INT8_C( 6), -INT8_C( 7), INT8_C( 4), -INT8_C( 6), -INT8_C( 6), INT8_C( 8), -INT8_C( 5), -INT8_C( 8), INT8_C( 7), -INT8_C( 5), INT8_C( 6) }, { -INT8_C( 1), INT8_C( 32), -INT8_C( 24), INT8_MIN, INT8_C( 0), -INT8_C( 64), -INT8_C( 1), -INT8_C( 32), INT8_C( 0), INT8_C( 1), INT8_C( 0), INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0) } }, { { INT8_C( 22), -INT8_C( 47), -INT8_C( 92), -INT8_C( 103), INT8_C( 53), INT8_C( 63), -INT8_C( 29), INT8_C( 7), -INT8_C( 32), INT8_C( 4), -INT8_C( 26), INT8_C( 108), INT8_C( 123), -INT8_C( 102), -INT8_C( 94), -INT8_C( 13) }, { -INT8_C( 18), -INT8_C( 6), -INT8_C( 8), INT8_C( 102), -INT8_C( 5), -INT8_C( 50), INT8_C( 2), INT8_C( 3), -INT8_C( 126), -INT8_C( 61), -INT8_C( 3), INT8_C( 4), -INT8_C( 8), -INT8_C( 48), -INT8_C( 7), -INT8_C( 7) }, { INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 1), INT8_C( 0), -INT8_C( 116), INT8_C( 56), -INT8_C( 1), INT8_C( 0), -INT8_C( 4), -INT8_C( 64), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r = simde_vshlq_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; } static int test_simde_vshlq_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { INT16_C( 20268), INT16_C( 24220), INT16_C( 20072), -INT16_C( 27645), INT16_C( 1744), INT16_C( 22176), -INT16_C( 26671), INT16_C( 22566) }, { INT16_C( 11), -INT16_C( 13), -INT16_C( 14), -INT16_C( 13), -INT16_C( 14), INT16_C( 16), INT16_C( 10), INT16_C( 4205) }, { INT16_C( 24576), INT16_C( 2), INT16_C( 1), -INT16_C( 4), INT16_C( 0), INT16_C( 0), INT16_C( 17408), INT16_C( 0) } }, { { -INT16_C( 136), INT16_C( 18814), -INT16_C( 23402), INT16_C( 17825), -INT16_C( 12846), INT16_C( 12767), INT16_C( 24470), INT16_C( 16845) }, { -INT16_C( 12), INT16_C( 11), -INT16_C( 13), INT16_C( 12), -INT16_C( 15), INT16_C( 12), INT16_C( 8), INT16_C( 14) }, { -INT16_C( 1), -INT16_C( 4096), -INT16_C( 3), INT16_C( 4096), -INT16_C( 1), -INT16_C( 4096), -INT16_C( 27136), INT16_C( 16384) } }, { { -INT16_C( 469), -INT16_C( 16002), INT16_C( 19293), -INT16_C( 31486), INT16_C( 26094), -INT16_C( 23408), -INT16_C( 21452), INT16_C( 3518) }, { -INT16_C( 12), INT16_C( 12), -INT16_C( 26906), INT16_C( 10), INT16_C( 12), -INT16_C( 6652), INT16_C( 8), -INT16_C( 5838) }, { -INT16_C( 1), -INT16_C( 8192), INT16_C( 0), INT16_C( 2048), -INT16_C( 8192), INT16_C( 18688), INT16_C( 13312), INT16_C( 0) } }, { { -INT16_C( 27631), INT16_C( 18062), INT16_C( 19520), INT16_C( 30035), -INT16_C( 28177), -INT16_C( 10842), INT16_C( 18727), -INT16_C( 27649) }, { INT16_C( 937), INT16_C( 12), -INT16_C( 11), INT16_C( 8), -INT16_C( 12), INT16_C( 22726), -INT16_C( 13913), -INT16_C( 9) }, { -INT16_C( 1), -INT16_C( 8192), INT16_C( 9), INT16_C( 21248), -INT16_C( 7), -INT16_C( 1), INT16_C( 0), -INT16_C( 55) } }, { { -INT16_C( 17949), INT16_C( 3042), -INT16_C( 7934), -INT16_C( 21346), INT16_C( 6373), INT16_C( 2835), INT16_C( 25796), INT16_C( 8275) }, { INT16_C( 12), INT16_C( 10), -INT16_C( 9), INT16_C( 16574), -INT16_C( 17268), -INT16_C( 21795), -INT16_C( 16), -INT16_C( 3402) }, { INT16_C( 12288), -INT16_C( 30720), -INT16_C( 16), -INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 27571), INT16_C( 30570), -INT16_C( 16945), INT16_C( 11928), INT16_C( 4566), -INT16_C( 18125), -INT16_C( 3636), INT16_C( 23033) }, { -INT16_C( 11), -INT16_C( 17405), -INT16_C( 9), INT16_C( 14), -INT16_C( 11), -INT16_C( 13), INT16_C( 14), -INT16_C( 12) }, { INT16_C( 13), -INT16_C( 17584), -INT16_C( 34), INT16_C( 0), INT16_C( 2), -INT16_C( 3), INT16_C( 0), INT16_C( 5) } }, { { INT16_C( 4379), -INT16_C( 6174), -INT16_C( 9470), -INT16_C( 20416), INT16_C( 17330), INT16_C( 31085), INT16_C( 7165), INT16_C( 20457) }, { INT16_C( 13), INT16_C( 14), INT16_C( 8), INT16_C( 11), INT16_C( 31451), INT16_C( 11), -INT16_C( 13), -INT16_C( 13) }, { INT16_C( 24576), INT16_MIN, INT16_C( 512), INT16_C( 0), INT16_C( 0), INT16_C( 26624), INT16_C( 0), INT16_C( 2) } }, { { INT16_C( 6155), INT16_C( 2113), INT16_C( 10804), -INT16_C( 1193), INT16_C( 29232), -INT16_C( 21751), INT16_C( 4536), -INT16_C( 27899) }, { INT16_C( 15), INT16_C( 16), INT16_C( 6903), -INT16_C( 9), INT16_C( 13), INT16_C( 27370), -INT16_C( 14), INT16_C( 2354) }, { INT16_MIN, INT16_C( 0), INT16_C( 21), -INT16_C( 3), INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_vshlq_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; } static int test_simde_vshlq_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { -INT32_C( 2041763791), -INT32_C( 1618428475), INT32_C( 828034002), INT32_C( 1472186433) }, { -INT32_C( 20), -INT32_C( 27), INT32_C( 19), -INT32_C( 23) }, { -INT32_C( 1948), -INT32_C( 13), INT32_C( 1586495488), INT32_C( 175) } }, { { INT32_C( 1298576763), -INT32_C( 1031880575), -INT32_C( 719766135), -INT32_C( 561639655) }, { INT32_C( 25), INT32_C( 28), -INT32_C( 639565865), INT32_C( 28) }, { -INT32_C( 167772160), INT32_C( 268435456), -INT32_C( 1), -INT32_C( 1879048192) } }, { { INT32_C( 1325794246), INT32_C( 1512382273), -INT32_C( 633820370), INT32_C( 71426368) }, { -INT32_C( 19), -INT32_C( 29), -INT32_C( 272289748), -INT32_C( 23) }, { INT32_C( 2528), INT32_C( 2), INT32_C( 0), INT32_C( 8) } }, { { -INT32_C( 1997967014), INT32_C( 1566712093), -INT32_C( 496917758), INT32_C( 1117429619) }, { -INT32_C( 23), -INT32_C( 28), INT32_C( 22), -INT32_C( 24) }, { -INT32_C( 239), INT32_C( 5), -INT32_C( 1065353216), INT32_C( 66) } }, { { -INT32_C( 943223611), -INT32_C( 1565906897), -INT32_C( 2065414969), -INT32_C( 915557522) }, { INT32_C( 18), -INT32_C( 26), INT32_C( 289016772), INT32_C( 29) }, { INT32_C( 856948736), -INT32_C( 24), -INT32_C( 1), -INT32_C( 1073741824) } }, { { INT32_C( 834856554), -INT32_C( 1732860374), -INT32_C( 1604181159), -INT32_C( 1127957143) }, { INT32_C( 26), -INT32_C( 24), -INT32_C( 28), -INT32_C( 24) }, { -INT32_C( 1476395008), -INT32_C( 104), -INT32_C( 6), -INT32_C( 68) } }, { { -INT32_C( 987133588), INT32_C( 375753645), -INT32_C( 288216504), INT32_C( 1599678527) }, { -INT32_C( 21), INT32_C( 18), INT32_C( 22), -INT32_C( 974423719) }, { -INT32_C( 471), INT32_C( 783548416), -INT32_C( 1845493760), INT32_C( 0) } }, { { -INT32_C( 406392673), INT32_C( 1507236122), INT32_C( 2042113985), -INT32_C( 786044923) }, { INT32_C( 20), -INT32_C( 24), -INT32_C( 18), INT32_C( 1269297579) }, { INT32_C( 166723584), INT32_C( 89), INT32_C( 7790), -INT32_C( 1) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_vshlq_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; } static int test_simde_vshlq_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; int64_t b[2]; int64_t r[2]; } test_vec[] = { { { INT64_C( 8363743883797716341), INT64_C( 8779737833653395887) }, { -INT64_C( 63), INT64_C( 246405564730123878) }, { INT64_C( 0), INT64_C( 0) } }, { { INT64_C( 8126799927234921685), -INT64_C( 6485624037076249828) }, { -INT64_C( 58), -INT64_C( 7250353646116027793) }, { INT64_C( 28), INT64_C( 0) } }, { { -INT64_C( 3234373439391986046), -INT64_C( 2587179382443185883) }, { -INT64_C( 33), INT64_C( 50) }, { -INT64_C( 376530625), -INT64_C( 6587640354936193024) } }, { { -INT64_C( 8631416662590524752), INT64_C( 3592062853739595312) }, { INT64_C( 45), INT64_C( 46) }, { -INT64_C( 1272829844685586432), -INT64_C( 7958986441470509056) } }, { { INT64_C( 8962794885418511845), INT64_C( 7398396818986337414) }, { INT64_C( 42), -INT64_C( 4848071295825756004) }, { INT64_C( 4303070696696709120), INT64_C( 0) } }, { { INT64_C( 2287417463048279256), INT64_C( 5354139880492463338) }, { INT64_C( 61), -INT64_C( 54) }, { INT64_C( 0), INT64_C( 297) } }, { { -INT64_C( 3385921522485940017), -INT64_C( 6409014629278950304) }, { INT64_C( 55), INT64_C( 38) }, { INT64_C( 7457960982925541376), INT64_C( 7131194923243536384) } }, { { -INT64_C( 596211911612917029), INT64_C( 5427564910413537154) }, { -INT64_C( 35), INT64_C( 61) }, { -INT64_C( 17352051), INT64_C( 4611686018427387904) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_int64x2_t r = simde_vshlq_s64(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; } static int test_simde_vshlq_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; int8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(227), UINT8_C( 82), UINT8_C( 19), UINT8_C( 82), UINT8_C(244), UINT8_C(233), UINT8_C(145), UINT8_C(146), UINT8_C(188), UINT8_C(114), UINT8_C( 73), UINT8_C(201), UINT8_C(134), UINT8_C(245), UINT8_C(132), UINT8_C(232) }, { INT8_C( 120), -INT8_C( 6), -INT8_C( 7), -INT8_C( 4), -INT8_C( 7), INT8_C( 5), INT8_C( 7), -INT8_C( 8), INT8_C( 7), INT8_C( 7), -INT8_C( 5), -INT8_C( 8), -INT8_C( 8), INT8_C( 6), INT8_C( 8), -INT8_C( 88) }, { UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 5), UINT8_C( 1), UINT8_C( 32), UINT8_C(128), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 2), UINT8_C( 0), UINT8_C( 0), UINT8_C( 64), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(207), UINT8_C( 84), UINT8_C(242), UINT8_C(158), UINT8_C(178), UINT8_C(112), UINT8_C(127), UINT8_C(239), UINT8_C(142), UINT8_C(110), UINT8_C(232), UINT8_C( 84), UINT8_C(213), UINT8_C( 8), UINT8_C(252), UINT8_C(142) }, { INT8_C( 7), -INT8_C( 5), INT8_C( 7), INT8_C( 5), -INT8_C( 8), INT8_C( 7), INT8_C( 6), -INT8_C( 8), INT8_C( 3), -INT8_C( 6), INT8_C( 4), -INT8_C( 8), -INT8_C( 1), INT8_C( 7), INT8_C( 118), -INT8_C( 7) }, { UINT8_C(128), UINT8_C( 2), UINT8_C( 0), UINT8_C(192), UINT8_C( 0), UINT8_C( 0), UINT8_C(192), UINT8_C( 0), UINT8_C(112), UINT8_C( 1), UINT8_C(128), UINT8_C( 0), UINT8_C(106), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1) } }, { { UINT8_C(175), UINT8_C(247), UINT8_C(237), UINT8_C( 49), UINT8_C(113), UINT8_C( 33), UINT8_C(178), UINT8_C(116), UINT8_C(249), UINT8_C(182), UINT8_C(245), UINT8_C(248), UINT8_C( 39), UINT8_C(107), UINT8_C(198), UINT8_C(236) }, { -INT8_C( 6), INT8_C( 6), INT8_C( 6), -INT8_C( 6), INT8_C( 7), -INT8_C( 7), INT8_C( 5), INT8_C( 7), INT8_C( 24), -INT8_C( 6), -INT8_C( 6), INT8_C( 6), -INT8_C( 8), INT8_C( 6), INT8_C( 3), INT8_C( 6) }, { UINT8_C( 2), UINT8_C(192), UINT8_C( 64), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C( 64), UINT8_C( 0), UINT8_C( 0), UINT8_C( 2), UINT8_C( 3), UINT8_C( 0), UINT8_C( 0), UINT8_C(192), UINT8_C( 48), UINT8_C( 0) } }, { { UINT8_C(251), UINT8_C( 72), UINT8_C( 30), UINT8_C( 24), UINT8_C( 18), UINT8_C( 51), UINT8_C(142), UINT8_C( 43), UINT8_C( 3), UINT8_C( 50), UINT8_C(154), UINT8_C(139), UINT8_C(146), UINT8_C(157), UINT8_C(194), UINT8_C(233) }, { -INT8_C( 8), INT8_C( 5), -INT8_C( 5), -INT8_C( 6), INT8_C( 8), -INT8_C( 5), -INT8_C( 6), INT8_C( 5), INT8_C( 8), -INT8_C( 82), INT8_C( 7), -INT8_C( 5), INT8_C( 7), -INT8_C( 7), INT8_C( 5), INT8_C( 7) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 2), UINT8_C( 96), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 4), UINT8_C( 0), UINT8_C( 1), UINT8_C( 64), UINT8_C(128) } }, { { UINT8_C(251), UINT8_C(118), UINT8_C(223), UINT8_C( 64), UINT8_C(101), UINT8_C(138), UINT8_C( 86), UINT8_C(133), UINT8_C( 56), UINT8_C(101), UINT8_C( 67), UINT8_C(175), UINT8_C( 88), UINT8_C(115), UINT8_C( 33), UINT8_C(148) }, { -INT8_C( 8), -INT8_C( 6), -INT8_C( 8), INT8_C( 8), -INT8_C( 8), INT8_C( 92), -INT8_C( 8), INT8_C( 6), INT8_C( 7), -INT8_C( 43), -INT8_C( 23), INT8_C( 8), -INT8_C( 6), -INT8_C( 6), INT8_C( 6), -INT8_C( 6) }, { UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 64), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 64), UINT8_C( 2) } }, { { UINT8_C(128), UINT8_C(109), UINT8_C(201), UINT8_C( 68), UINT8_C(202), UINT8_C(143), UINT8_C( 81), UINT8_C( 58), UINT8_C(100), UINT8_C( 59), UINT8_C( 92), UINT8_C( 10), UINT8_C(233), UINT8_C(190), UINT8_C(171), UINT8_C( 14) }, { -INT8_C( 1), -INT8_C( 8), -INT8_C( 7), -INT8_C( 7), -INT8_C( 58), -INT8_C( 6), -INT8_C( 7), INT8_C( 6), -INT8_C( 5), -INT8_C( 8), INT8_C( 5), -INT8_C( 8), INT8_C( 5), -INT8_C( 7), INT8_C( 4), -INT8_C( 6) }, { UINT8_C( 64), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 2), UINT8_C( 0), UINT8_C(128), UINT8_C( 3), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C( 32), UINT8_C( 1), UINT8_C(176), UINT8_C( 0) } }, { { UINT8_C( 71), UINT8_C( 56), UINT8_C( 58), UINT8_C( 13), UINT8_C(224), UINT8_C( 8), UINT8_C(112), UINT8_C(219), UINT8_C(138), UINT8_C(202), UINT8_C(156), UINT8_C(225), UINT8_C(174), UINT8_C(228), UINT8_C(185), UINT8_MAX }, { -INT8_C( 6), -INT8_C( 6), INT8_C( 6), -INT8_C( 7), INT8_C( 8), INT8_C( 113), -INT8_C( 7), -INT8_C( 23), INT8_C( 35), -INT8_C( 6), INT8_C( 7), -INT8_C( 7), INT8_C( 6), INT8_C( 6), INT8_C( 3), -INT8_C( 6) }, { UINT8_C( 1), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 3), UINT8_C( 0), UINT8_C( 1), UINT8_C(128), UINT8_C( 0), UINT8_C(200), UINT8_C( 3) } }, { { UINT8_C( 63), UINT8_C(184), UINT8_C( 27), UINT8_C(129), UINT8_C( 41), UINT8_C(181), UINT8_C(106), UINT8_C( 76), UINT8_C( 92), UINT8_C(229), UINT8_C(224), UINT8_C(189), UINT8_C( 0), UINT8_C(228), UINT8_C(101), UINT8_C( 84) }, { INT8_C( 33), INT8_C( 6), -INT8_C( 8), INT8_C( 6), INT8_C( 8), -INT8_C( 7), INT8_C( 7), INT8_C( 5), INT8_C( 8), -INT8_C( 6), -INT8_C( 7), -INT8_C( 6), INT8_C( 5), INT8_C( 6), INT8_C( 5), -INT8_C( 7) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 64), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C( 3), UINT8_C( 1), UINT8_C( 2), UINT8_C( 0), UINT8_C( 0), UINT8_C(160), UINT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_uint8x16_t r = simde_vshlq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; } static int test_simde_vshlq_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; int16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(26316), UINT16_C(46829), UINT16_C(17801), UINT16_C( 9381), UINT16_C(58471), UINT16_C(14280), UINT16_C(43915), UINT16_C(18443) }, { -INT16_C( 13), -INT16_C( 15), INT16_C( 8), INT16_C( 28360), INT16_C( 14), -INT16_C( 31360), INT16_C( 10), -INT16_C( 31767) }, { UINT16_C( 3), UINT16_C( 1), UINT16_C(35072), UINT16_C( 0), UINT16_C(49152), UINT16_C( 0), UINT16_C(11264), UINT16_C( 0) } }, { { UINT16_C(12995), UINT16_C(20154), UINT16_C(50653), UINT16_C(13207), UINT16_C(34146), UINT16_C(19137), UINT16_C(35274), UINT16_C(49848) }, { INT16_C( 14589), -INT16_C( 10), INT16_C( 12), INT16_C( 19512), INT16_C( 14), INT16_C( 9), -INT16_C( 11), INT16_C( 5031) }, { UINT16_C( 1624), UINT16_C( 19), UINT16_C(53248), UINT16_C( 0), UINT16_C(32768), UINT16_C(33280), UINT16_C( 17), UINT16_C( 0) } }, { { UINT16_C( 638), UINT16_C(18898), UINT16_C(35467), UINT16_C(35083), UINT16_C(21186), UINT16_C(20798), UINT16_C(30339), UINT16_C(35741) }, { INT16_C( 12520), INT16_C( 14), INT16_C( 10), INT16_C( 11), -INT16_C( 11), -INT16_C( 9), -INT16_C( 11), INT16_C( 9829) }, { UINT16_C( 0), UINT16_C(32768), UINT16_C(11264), UINT16_C(22528), UINT16_C( 10), UINT16_C( 40), UINT16_C( 14), UINT16_C( 0) } }, { { UINT16_C( 204), UINT16_C(20692), UINT16_C(29303), UINT16_C(24539), UINT16_C(35746), UINT16_C(53400), UINT16_C(58849), UINT16_C(39214) }, { -INT16_C( 13), INT16_C( 14), -INT16_C( 9), -INT16_C( 12), -INT16_C( 12269), -INT16_C( 11), -INT16_C( 16), INT16_C( 5720) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 57), UINT16_C( 5), UINT16_C( 0), UINT16_C( 26), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(24525), UINT16_C(44562), UINT16_C(16452), UINT16_C(55367), UINT16_C(48858), UINT16_C( 8211), UINT16_C(29850), UINT16_C(44308) }, { INT16_C( 16), -INT16_C( 15), -INT16_C( 9422), INT16_C( 8868), INT16_C( 11), -INT16_C( 12), INT16_C( 10), INT16_C( 6888) }, { UINT16_C( 0), UINT16_C( 1), UINT16_C( 0), UINT16_C( 0), UINT16_C(53248), UINT16_C( 2), UINT16_C(26624), UINT16_C( 0) } }, { { UINT16_C(57550), UINT16_C(26678), UINT16_C(19028), UINT16_C(38933), UINT16_C(39052), UINT16_C(48678), UINT16_C(51827), UINT16_C(31713) }, { INT16_C( 27605), INT16_C( 8738), INT16_C( 10), INT16_C( 13), INT16_C( 1285), INT16_C( 16481), INT16_C( 11797), -INT16_C( 15) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C(20480), UINT16_C(40960), UINT16_C( 4480), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C( 8440), UINT16_C(27425), UINT16_C( 747), UINT16_C(49382), UINT16_C( 2157), UINT16_C(10723), UINT16_C( 7955), UINT16_C( 6293) }, { -INT16_C( 9), INT16_C( 14), -INT16_C( 11), INT16_C( 13340), INT16_C( 12), INT16_C( 15), -INT16_C( 14), -INT16_C( 16) }, { UINT16_C( 16), UINT16_C(16384), UINT16_C( 0), UINT16_C( 0), UINT16_C(53248), UINT16_C(32768), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(16411), UINT16_C(11818), UINT16_C(49247), UINT16_C(33862), UINT16_C(40631), UINT16_C(56509), UINT16_C(55883), UINT16_C(33808) }, { -INT16_C( 12), INT16_C( 9), INT16_C( 1336), -INT16_C( 12), -INT16_C( 10), INT16_C( 11), -INT16_C( 15), INT16_C( 8) }, { UINT16_C( 4), UINT16_C(21504), UINT16_C( 0), UINT16_C( 8), UINT16_C( 39), UINT16_C(59392), UINT16_C( 1), UINT16_C( 4096) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_uint16x8_t r = simde_vshlq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_vshlq_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; int32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(3874822276), UINT32_C( 973838889), UINT32_C(1360305370), UINT32_C(2317166544) }, { -INT32_C( 1429912463), -INT32_C( 22), INT32_C( 24), INT32_C( 32) }, { UINT32_C( 0), UINT32_C( 232), UINT32_C(3657433088), UINT32_C( 0) } }, { { UINT32_C(1926759064), UINT32_C(1875176862), UINT32_C(2247745812), UINT32_C(4079992614) }, { -INT32_C( 1633882781), INT32_C( 26), -INT32_C( 28), -INT32_C( 24) }, { UINT32_C( 0), UINT32_C(2013265920), UINT32_C( 8), UINT32_C( 243) } }, { { UINT32_C( 176216566), UINT32_C(3767499194), UINT32_C(2631188024), UINT32_C(2268754091) }, { INT32_C( 19), INT32_C( 20), INT32_C( 19), INT32_C( 26) }, { UINT32_C(3484418048), UINT32_C(2610954240), UINT32_C(4055891968), UINT32_C(2885681152) } }, { { UINT32_C(2424153687), UINT32_C( 254562660), UINT32_C(4053231298), UINT32_C(3946678179) }, { -INT32_C( 26), INT32_C( 1092170026), INT32_C( 29), -INT32_C( 21) }, { UINT32_C( 36), UINT32_C( 0), UINT32_C(1073741824), UINT32_C( 1881) } }, { { UINT32_C(1266772105), UINT32_C(1916606671), UINT32_C(2438822559), UINT32_C(3227359894) }, { INT32_C( 24), INT32_C( 16), -INT32_C( 30), -INT32_C( 24) }, { UINT32_C(2298478592), UINT32_C( 416219136), UINT32_C( 2), UINT32_C( 192) } }, { { UINT32_C(1859397838), UINT32_C( 973025699), UINT32_C(2549701835), UINT32_C(3214801618) }, { INT32_C( 32), -INT32_C( 1228476512), -INT32_C( 1370031334), -INT32_C( 32) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 738197504), UINT32_C( 0) } }, { { UINT32_C(3796299030), UINT32_C( 460930889), UINT32_C(2245662266), UINT32_C(2022169560) }, { -INT32_C( 27), -INT32_C( 22), INT32_C( 24), -INT32_C( 25) }, { UINT32_C( 28), UINT32_C( 109), UINT32_C( 973078528), UINT32_C( 60) } }, { { UINT32_C(4290575045), UINT32_C(3917780497), UINT32_C(1029770098), UINT32_C(2686619480) }, { -INT32_C( 19), -INT32_C( 29), -INT32_C( 21), -INT32_C( 27) }, { UINT32_C( 8183), UINT32_C( 7), UINT32_C( 491), UINT32_C( 20) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_uint32x4_t r = simde_vshlq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_vshlq_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; int64_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C( 4829422187629488727), UINT64_C( 2605387542525908668) }, { -INT64_C( 1643974313470714674), INT64_C( 44) }, { UINT64_C( 4289), UINT64_C(13937444578060664832) } }, { { UINT64_C(13970315877439767237), UINT64_C(16522513161567537670) }, { -INT64_C( 58), INT64_C( 61) }, { UINT64_C( 48), UINT64_C(13835058055282163712) } }, { { UINT64_C( 1925351683289998797), UINT64_C( 2238587861661968232) }, { -INT64_C( 58), INT64_C( 38) }, { UINT64_C( 6), UINT64_C( 3979451439293399040) } }, { { UINT64_C(11860546040509214202), UINT64_C( 4340667968086369747) }, { INT64_C( 41), -INT64_C( 60) }, { UINT64_C( 8024275441927847936), UINT64_C( 3) } }, { { UINT64_C( 8910991743644541611), UINT64_C(14516822791415737880) }, { -INT64_C( 45), INT64_C( 47) }, { UINT64_C( 253265), UINT64_C(10884074399447646208) } }, { { UINT64_C(12933258487992063907), UINT64_C(11128579057712752255) }, { -INT64_C( 44), -INT64_C( 61) }, { UINT64_C( 735170), UINT64_C( 4) } }, { { UINT64_C(10921609281154912814), UINT64_C( 2427142075779531086) }, { -INT64_C( 57), -INT64_C( 38) }, { UINT64_C( 75), UINT64_C( 8829891) } }, { { UINT64_C(15939054506333309024), UINT64_C( 5590987143679708579) }, { -INT64_C( 4843971537307842889), INT64_C( 49) }, { UINT64_C( 0), UINT64_C( 1965258287393800192) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_uint64x2_t r = simde_vshlq_u64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vshl_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vshl_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vshl_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vshl_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vshl_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vshl_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vshl_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vshl_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vshlq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vshlq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vshlq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vshlq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vshlq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vshlq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vshlq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vshlq_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/shl_n.c000066400000000000000000003067551400333146700166060ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN shl_n #include "test-neon.h" #include "../../../simde/arm/neon/shl_n.h" static int test_simde_vshl_n_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int8_t r1[8]; int8_t r3[8]; int8_t r5[8]; int8_t r6[8]; int8_t r7[8]; } test_vec[] = { { { -INT8_C( 125), INT8_C( 122), -INT8_C( 9), INT8_C( 63), INT8_C( 82), INT8_C( 28), INT8_C( 109), INT8_C( 1) }, { INT8_C( 6), -INT8_C( 12), -INT8_C( 18), INT8_C( 126), -INT8_C( 92), INT8_C( 56), -INT8_C( 38), INT8_C( 2) }, { INT8_C( 24), -INT8_C( 48), -INT8_C( 72), -INT8_C( 8), -INT8_C( 112), -INT8_C( 32), INT8_C( 104), INT8_C( 8) }, { INT8_C( 96), INT8_C( 64), -INT8_C( 32), -INT8_C( 32), INT8_C( 64), INT8_MIN, -INT8_C( 96), INT8_C( 32) }, { -INT8_C( 64), INT8_MIN, -INT8_C( 64), -INT8_C( 64), INT8_MIN, INT8_C( 0), INT8_C( 64), INT8_C( 64) }, { INT8_MIN, INT8_C( 0), INT8_MIN, INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_MIN, INT8_MIN } }, { { -INT8_C( 58), INT8_C( 26), INT8_C( 83), INT8_C( 124), -INT8_C( 27), INT8_C( 84), -INT8_C( 4), -INT8_C( 94) }, { -INT8_C( 116), INT8_C( 52), -INT8_C( 90), -INT8_C( 8), -INT8_C( 54), -INT8_C( 88), -INT8_C( 8), INT8_C( 68) }, { INT8_C( 48), -INT8_C( 48), -INT8_C( 104), -INT8_C( 32), INT8_C( 40), -INT8_C( 96), -INT8_C( 32), INT8_C( 16) }, { -INT8_C( 64), INT8_C( 64), INT8_C( 96), INT8_MIN, -INT8_C( 96), INT8_MIN, INT8_MIN, INT8_C( 64) }, { INT8_MIN, INT8_MIN, -INT8_C( 64), INT8_C( 0), INT8_C( 64), INT8_C( 0), INT8_C( 0), INT8_MIN }, { INT8_C( 0), INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { -INT8_C( 67), INT8_C( 73), INT8_C( 79), INT8_C( 101), INT8_C( 112), -INT8_C( 36), -INT8_C( 81), INT8_C( 33) }, { INT8_C( 122), -INT8_C( 110), -INT8_C( 98), -INT8_C( 54), -INT8_C( 32), -INT8_C( 72), INT8_C( 94), INT8_C( 66) }, { -INT8_C( 24), INT8_C( 72), INT8_C( 120), INT8_C( 40), INT8_MIN, -INT8_C( 32), INT8_C( 120), INT8_C( 8) }, { -INT8_C( 96), INT8_C( 32), -INT8_C( 32), -INT8_C( 96), INT8_C( 0), INT8_MIN, -INT8_C( 32), INT8_C( 32) }, { INT8_C( 64), INT8_C( 64), -INT8_C( 64), INT8_C( 64), INT8_C( 0), INT8_C( 0), -INT8_C( 64), INT8_C( 64) }, { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_MIN, INT8_MIN } }, { { -INT8_C( 17), INT8_C( 6), -INT8_C( 9), INT8_C( 126), INT8_C( 7), -INT8_C( 51), -INT8_C( 80), -INT8_C( 118) }, { -INT8_C( 34), INT8_C( 12), -INT8_C( 18), -INT8_C( 4), INT8_C( 14), -INT8_C( 102), INT8_C( 96), INT8_C( 20) }, { INT8_C( 120), INT8_C( 48), -INT8_C( 72), -INT8_C( 16), INT8_C( 56), INT8_C( 104), INT8_MIN, INT8_C( 80) }, { -INT8_C( 32), -INT8_C( 64), -INT8_C( 32), -INT8_C( 64), -INT8_C( 32), -INT8_C( 96), INT8_C( 0), INT8_C( 64) }, { -INT8_C( 64), INT8_MIN, -INT8_C( 64), INT8_MIN, -INT8_C( 64), INT8_C( 64), INT8_C( 0), INT8_MIN }, { INT8_MIN, INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_MIN, INT8_MIN, INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 71), -INT8_C( 89), -INT8_C( 55), -INT8_C( 102), -INT8_C( 60), INT8_C( 54), -INT8_C( 101), -INT8_C( 118) }, { -INT8_C( 114), INT8_C( 78), -INT8_C( 110), INT8_C( 52), -INT8_C( 120), INT8_C( 108), INT8_C( 54), INT8_C( 20) }, { INT8_C( 56), INT8_C( 56), INT8_C( 72), -INT8_C( 48), INT8_C( 32), -INT8_C( 80), -INT8_C( 40), INT8_C( 80) }, { -INT8_C( 32), -INT8_C( 32), INT8_C( 32), INT8_C( 64), INT8_MIN, -INT8_C( 64), INT8_C( 96), INT8_C( 64) }, { -INT8_C( 64), -INT8_C( 64), INT8_C( 64), INT8_MIN, INT8_C( 0), INT8_MIN, -INT8_C( 64), INT8_MIN }, { INT8_MIN, INT8_MIN, INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_MIN, INT8_C( 0) } }, { { INT8_C( 80), -INT8_C( 17), INT8_C( 6), INT8_C( 53), INT8_C( 67), INT8_C( 3), -INT8_C( 40), INT8_C( 0) }, { -INT8_C( 96), -INT8_C( 34), INT8_C( 12), INT8_C( 106), -INT8_C( 122), INT8_C( 6), -INT8_C( 80), INT8_C( 0) }, { INT8_MIN, INT8_C( 120), INT8_C( 48), -INT8_C( 88), INT8_C( 24), INT8_C( 24), -INT8_C( 64), INT8_C( 0) }, { INT8_C( 0), -INT8_C( 32), -INT8_C( 64), -INT8_C( 96), INT8_C( 96), INT8_C( 96), INT8_C( 0), INT8_C( 0) }, { INT8_C( 0), -INT8_C( 64), INT8_MIN, INT8_C( 64), -INT8_C( 64), -INT8_C( 64), INT8_C( 0), INT8_C( 0) }, { INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_MIN, INT8_MIN, INT8_MIN, INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 76), INT8_C( 39), INT8_C( 101), -INT8_C( 68), INT8_C( 3), INT8_C( 20), -INT8_C( 35), -INT8_C( 14) }, { -INT8_C( 104), INT8_C( 78), -INT8_C( 54), INT8_C( 120), INT8_C( 6), INT8_C( 40), -INT8_C( 70), -INT8_C( 28) }, { INT8_C( 96), INT8_C( 56), INT8_C( 40), -INT8_C( 32), INT8_C( 24), -INT8_C( 96), -INT8_C( 24), -INT8_C( 112) }, { INT8_MIN, -INT8_C( 32), -INT8_C( 96), INT8_MIN, INT8_C( 96), INT8_MIN, -INT8_C( 96), INT8_C( 64) }, { INT8_C( 0), -INT8_C( 64), INT8_C( 64), INT8_C( 0), -INT8_C( 64), INT8_C( 0), INT8_C( 64), INT8_MIN }, { INT8_C( 0), INT8_MIN, INT8_MIN, INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_MIN, INT8_C( 0) } }, { { INT8_C( 26), -INT8_C( 44), INT8_C( 112), INT8_C( 33), -INT8_C( 94), INT8_C( 33), -INT8_C( 85), -INT8_C( 23) }, { INT8_C( 52), -INT8_C( 88), -INT8_C( 32), INT8_C( 66), INT8_C( 68), INT8_C( 66), INT8_C( 86), -INT8_C( 46) }, { -INT8_C( 48), -INT8_C( 96), INT8_MIN, INT8_C( 8), INT8_C( 16), INT8_C( 8), INT8_C( 88), INT8_C( 72) }, { INT8_C( 64), INT8_MIN, INT8_C( 0), INT8_C( 32), INT8_C( 64), INT8_C( 32), INT8_C( 96), INT8_C( 32) }, { INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_C( 64), INT8_MIN, INT8_C( 64), -INT8_C( 64), INT8_C( 64) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_MIN, INT8_MIN, INT8_MIN } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t r1 = simde_vshl_n_s8(a, 1); simde_int8x8_t r3 = simde_vshl_n_s8(a, 3); simde_int8x8_t r5 = simde_vshl_n_s8(a, 5); simde_int8x8_t r6 = simde_vshl_n_s8(a, 6); simde_int8x8_t r7 = simde_vshl_n_s8(a, 7); simde_test_arm_neon_assert_equal_i8x8(r1, simde_vld1_s8(test_vec[i].r1)); simde_test_arm_neon_assert_equal_i8x8(r3, simde_vld1_s8(test_vec[i].r3)); simde_test_arm_neon_assert_equal_i8x8(r5, simde_vld1_s8(test_vec[i].r5)); simde_test_arm_neon_assert_equal_i8x8(r6, simde_vld1_s8(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i8x8(r7, simde_vld1_s8(test_vec[i].r7)); } return 0; } static int test_simde_vshl_n_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int16_t r3[4]; int16_t r6[4]; int16_t r10[4]; int16_t r13[4]; int16_t r15[4]; } test_vec[] = { { { INT16_C( 18668), INT16_C( 18204), -INT16_C( 3037), -INT16_C( 20293) }, { INT16_C( 18272), INT16_C( 14560), -INT16_C( 24296), -INT16_C( 31272) }, { INT16_C( 15104), -INT16_C( 14592), INT16_C( 2240), INT16_C( 11968) }, { -INT16_C( 20480), INT16_C( 28672), -INT16_C( 29696), -INT16_C( 5120) }, { INT16_MIN, INT16_MIN, INT16_C( 24576), INT16_C( 24576) }, { INT16_C( 0), INT16_C( 0), INT16_MIN, INT16_MIN } }, { { INT16_C( 27569), INT16_C( 5736), -INT16_C( 20588), -INT16_C( 30218) }, { INT16_C( 23944), -INT16_C( 19648), INT16_C( 31904), INT16_C( 20400) }, { -INT16_C( 5056), -INT16_C( 26112), -INT16_C( 6912), INT16_C( 32128) }, { -INT16_C( 15360), -INT16_C( 24576), INT16_C( 20480), -INT16_C( 10240) }, { INT16_C( 8192), INT16_C( 0), INT16_MIN, -INT16_C( 16384) }, { INT16_MIN, INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 9118), INT16_C( 22490), INT16_C( 22818), INT16_C( 31244) }, { -INT16_C( 7408), -INT16_C( 16688), -INT16_C( 14064), -INT16_C( 12192) }, { INT16_C( 6272), -INT16_C( 2432), INT16_C( 18560), -INT16_C( 32000) }, { -INT16_C( 30720), INT16_C( 26624), -INT16_C( 30720), INT16_C( 12288) }, { INT16_C( 16384), INT16_C( 16384), INT16_C( 16384), INT16_MIN }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 18368), INT16_C( 10674), -INT16_C( 29712), -INT16_C( 9127) }, { -INT16_C( 15872), INT16_C( 19856), INT16_C( 24448), -INT16_C( 7480) }, { INT16_C( 4096), INT16_C( 27776), -INT16_C( 1024), INT16_C( 5696) }, { INT16_C( 0), -INT16_C( 14336), -INT16_C( 16384), INT16_C( 25600) }, { INT16_C( 0), INT16_C( 16384), INT16_C( 0), INT16_C( 8192) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_MIN } }, { { INT16_C( 30163), -INT16_C( 2525), -INT16_C( 8343), INT16_C( 7079) }, { -INT16_C( 20840), -INT16_C( 20200), -INT16_C( 1208), -INT16_C( 8904) }, { INT16_C( 29888), -INT16_C( 30528), -INT16_C( 9664), -INT16_C( 5696) }, { INT16_C( 19456), -INT16_C( 29696), -INT16_C( 23552), -INT16_C( 25600) }, { INT16_C( 24576), INT16_C( 24576), INT16_C( 8192), -INT16_C( 8192) }, { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN } }, { { INT16_C( 3914), -INT16_C( 8399), INT16_C( 10174), INT16_C( 8296) }, { INT16_C( 31312), -INT16_C( 1656), INT16_C( 15856), INT16_C( 832) }, { -INT16_C( 11648), -INT16_C( 13248), -INT16_C( 4224), INT16_C( 6656) }, { INT16_C( 10240), -INT16_C( 15360), -INT16_C( 2048), -INT16_C( 24576) }, { INT16_C( 16384), INT16_C( 8192), -INT16_C( 16384), INT16_C( 0) }, { INT16_C( 0), INT16_MIN, INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 16900), INT16_C( 9848), -INT16_C( 31589), -INT16_C( 9312) }, { INT16_C( 4128), INT16_C( 13248), INT16_C( 9432), -INT16_C( 8960) }, { -INT16_C( 32512), -INT16_C( 25088), INT16_C( 9920), -INT16_C( 6144) }, { INT16_C( 4096), -INT16_C( 8192), INT16_C( 27648), INT16_MIN }, { INT16_MIN, INT16_C( 0), INT16_C( 24576), INT16_C( 0) }, { INT16_C( 0), INT16_C( 0), INT16_MIN, INT16_C( 0) } }, { { INT16_C( 21052), INT16_C( 11268), INT16_C( 24029), -INT16_C( 20471) }, { -INT16_C( 28192), INT16_C( 24608), -INT16_C( 4376), -INT16_C( 32696) }, { -INT16_C( 28928), INT16_C( 256), INT16_C( 30528), INT16_C( 576) }, { -INT16_C( 4096), INT16_C( 4096), INT16_C( 29696), INT16_C( 9216) }, { INT16_MIN, INT16_MIN, -INT16_C( 24576), INT16_C( 8192) }, { INT16_C( 0), INT16_C( 0), INT16_MIN, INT16_MIN } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t r3 = simde_vshl_n_s16(a, 3); simde_int16x4_t r6 = simde_vshl_n_s16(a, 6); simde_int16x4_t r10 = simde_vshl_n_s16(a, 10); simde_int16x4_t r13 = simde_vshl_n_s16(a, 13); simde_int16x4_t r15 = simde_vshl_n_s16(a, 15); simde_test_arm_neon_assert_equal_i16x4(r3, simde_vld1_s16(test_vec[i].r3)); simde_test_arm_neon_assert_equal_i16x4(r6, simde_vld1_s16(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i16x4(r10, simde_vld1_s16(test_vec[i].r10)); simde_test_arm_neon_assert_equal_i16x4(r13, simde_vld1_s16(test_vec[i].r13)); simde_test_arm_neon_assert_equal_i16x4(r15, simde_vld1_s16(test_vec[i].r15)); } return 0; } static int test_simde_vshl_n_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int32_t r6[2]; int32_t r13[2]; int32_t r19[2]; int32_t r26[2]; int32_t r31[2]; } test_vec[] = { { { -INT32_C( 635750729), INT32_C( 1289635738) }, { -INT32_C( 2033340992), INT32_C( 932308608) }, { INT32_C( 1725358080), -INT32_C( 923582464) }, { -INT32_C( 1246232576), INT32_C( 1020264448) }, { -INT32_C( 603979776), INT32_C( 1744830464) }, { INT32_MIN, INT32_C( 0) } }, { { -INT32_C( 1230867210), INT32_C( 1451555070) }, { -INT32_C( 1466090112), -INT32_C( 1589756032) }, { INT32_C( 1319026688), -INT32_C( 1625309184) }, { -INT32_C( 1481637888), -INT32_C( 940572672) }, { -INT32_C( 671088640), -INT32_C( 134217728) }, { INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 1859799447), INT32_C( 1800629968) }, { -INT32_C( 1231919680), -INT32_C( 723799040) }, { INT32_C( 1228070912), INT32_C( 1843003392) }, { INT32_C( 1287127040), INT32_C( 1988100096) }, { INT32_C( 1543503872), INT32_C( 1073741824) }, { INT32_MIN, INT32_C( 0) } }, { { -INT32_C( 1341981360), -INT32_C( 2026618416) }, { INT32_C( 12538880), -INT32_C( 854559744) }, { INT32_C( 1604976640), -INT32_C( 2009464832) }, { -INT32_C( 360710144), INT32_C( 243269632) }, { INT32_C( 1073741824), INT32_C( 1073741824) }, { INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 308367223), -INT32_C( 1939980394) }, { -INT32_C( 1739334208), INT32_C( 395306368) }, { INT32_C( 703520768), -INT32_C( 940392448) }, { INT32_C( 2075656192), -INT32_C( 55574528) }, { -INT32_C( 603979776), INT32_C( 1476395008) }, { INT32_MIN, INT32_C( 0) } }, { { -INT32_C( 1304297292), -INT32_C( 1878472711) }, { -INT32_C( 1870648064), INT32_C( 36830784) }, { INT32_C( 1075216384), INT32_C( 419373056) }, { INT32_C( 94371840), INT32_C( 1070071808) }, { -INT32_C( 805306368), -INT32_C( 469762048) }, { INT32_C( 0), INT32_MIN } }, { { -INT32_C( 503389424), -INT32_C( 1572056495) }, { INT32_C( 2142815232), -INT32_C( 1827367872) }, { -INT32_C( 597557248), -INT32_C( 1974853632) }, { INT32_C( 411041792), -INT32_C( 1836580864) }, { INT32_C( 1073741824), INT32_C( 1140850688) }, { INT32_C( 0), INT32_MIN } }, { { INT32_C( 525487695), INT32_C( 128419471) }, { -INT32_C( 728525888), -INT32_C( 371088448) }, { INT32_C( 1237966848), -INT32_C( 254681088) }, { INT32_C( 1920466944), INT32_C( 880279552) }, { INT32_C( 1006632960), INT32_C( 1006632960) }, { INT32_MIN, INT32_MIN } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t r6 = simde_vshl_n_s32(a, 6); simde_int32x2_t r13 = simde_vshl_n_s32(a, 13); simde_int32x2_t r19 = simde_vshl_n_s32(a, 19); simde_int32x2_t r26 = simde_vshl_n_s32(a, 26); simde_int32x2_t r31 = simde_vshl_n_s32(a, 31); simde_test_arm_neon_assert_equal_i32x2(r6, simde_vld1_s32(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i32x2(r13, simde_vld1_s32(test_vec[i].r13)); simde_test_arm_neon_assert_equal_i32x2(r19, simde_vld1_s32(test_vec[i].r19)); simde_test_arm_neon_assert_equal_i32x2(r26, simde_vld1_s32(test_vec[i].r26)); simde_test_arm_neon_assert_equal_i32x2(r31, simde_vld1_s32(test_vec[i].r31)); } return 0; } static int test_simde_vshl_n_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[1]; int64_t r13[1]; int64_t r26[1]; int64_t r39[1]; int64_t r52[1]; int64_t r63[1]; } test_vec[] = { { { -INT64_C( 8607823705097870458) }, { INT64_C( 6610801629861036032) }, { -INT64_C( 3953648589636370432) }, { INT64_C( 4193347132826058752) }, { INT64_C( 4062246863888187392) }, { INT64_C( 0) } }, { { -INT64_C( 1048272605653827765) }, { INT64_C( 8733552832494002176) }, { INT64_C( 8791285945224658944) }, { INT64_C( 2125599518316560384) }, { -INT64_C( 815151532554059776) }, { INT64_MIN } }, { { -INT64_C( 484364469342631954) }, { -INT64_C( 1863757007287369728) }, { INT64_C( 6006689333375926272) }, { -INT64_C( 9114169641495691264) }, { INT64_C( 9142307243562106880) }, { INT64_C( 0) } }, { { INT64_C( 4956995058058425785) }, { INT64_C( 6419809379900923904) }, { -INT64_C( 588913997563035648) }, { INT64_C( 8663479275514494976) }, { INT64_C( 6597773454097776640) }, { INT64_MIN } }, { { INT64_C( 7396623743718276651) }, { -INT64_C( 4412573595754733568) }, { INT64_C( 7815488047943778304) }, { -INT64_C( 4170591090421792768) }, { -INT64_C( 2112188225236762624) }, { INT64_MIN } }, { { -INT64_C( 7519505417405800427) }, { -INT64_C( 6109917272124252160) }, { -INT64_C( 6425621267860160512) }, { INT64_C( 8318160056625397760) }, { INT64_C( 94575592174780416) }, { INT64_MIN } }, { { -INT64_C( 7751056859285444256) }, { -INT64_C( 2964689558082682880) }, { INT64_C( 7625085262141325312) }, { INT64_C( 4023033881195184128) }, { -INT64_C( 7638104968020361216) }, { INT64_C( 0) } }, { { INT64_C( 5849140343110917675) }, { -INT64_C( 8483412732777504768) }, { -INT64_C( 7232181249438121984) }, { INT64_C( 4913169357984497664) }, { -INT64_C( 2112188225236762624) }, { INT64_MIN } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t r13 = simde_vshl_n_s64(a, 13); simde_int64x1_t r26 = simde_vshl_n_s64(a, 26); simde_int64x1_t r39 = simde_vshl_n_s64(a, 39); simde_int64x1_t r52 = simde_vshl_n_s64(a, 52); simde_int64x1_t r63 = simde_vshl_n_s64(a, 63); simde_test_arm_neon_assert_equal_i64x1(r13, simde_vld1_s64(test_vec[i].r13)); simde_test_arm_neon_assert_equal_i64x1(r26, simde_vld1_s64(test_vec[i].r26)); simde_test_arm_neon_assert_equal_i64x1(r39, simde_vld1_s64(test_vec[i].r39)); simde_test_arm_neon_assert_equal_i64x1(r52, simde_vld1_s64(test_vec[i].r52)); simde_test_arm_neon_assert_equal_i64x1(r63, simde_vld1_s64(test_vec[i].r63)); } return 0; } static int test_simde_vshl_n_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; uint8_t r1[8]; uint8_t r3[8]; uint8_t r5[8]; uint8_t r6[8]; uint8_t r7[8]; } test_vec[] = { { { UINT8_C(132), UINT8_C(230), UINT8_C(170), UINT8_C(137), UINT8_C( 6), UINT8_C(148), UINT8_C( 45), UINT8_C( 13) }, { UINT8_C( 8), UINT8_C(204), UINT8_C( 84), UINT8_C( 18), UINT8_C( 12), UINT8_C( 40), UINT8_C( 90), UINT8_C( 26) }, { UINT8_C( 32), UINT8_C( 48), UINT8_C( 80), UINT8_C( 72), UINT8_C( 48), UINT8_C(160), UINT8_C(104), UINT8_C(104) }, { UINT8_C(128), UINT8_C(192), UINT8_C( 64), UINT8_C( 32), UINT8_C(192), UINT8_C(128), UINT8_C(160), UINT8_C(160) }, { UINT8_C( 0), UINT8_C(128), UINT8_C(128), UINT8_C( 64), UINT8_C(128), UINT8_C( 0), UINT8_C( 64), UINT8_C( 64) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C( 0), UINT8_C(128), UINT8_C(128) } }, { { UINT8_C( 64), UINT8_C( 29), UINT8_C( 42), UINT8_C( 83), UINT8_C(183), UINT8_C(157), UINT8_C(126), UINT8_C( 88) }, { UINT8_C(128), UINT8_C( 58), UINT8_C( 84), UINT8_C(166), UINT8_C(110), UINT8_C( 58), UINT8_C(252), UINT8_C(176) }, { UINT8_C( 0), UINT8_C(232), UINT8_C( 80), UINT8_C(152), UINT8_C(184), UINT8_C(232), UINT8_C(240), UINT8_C(192) }, { UINT8_C( 0), UINT8_C(160), UINT8_C( 64), UINT8_C( 96), UINT8_C(224), UINT8_C(160), UINT8_C(192), UINT8_C( 0) }, { UINT8_C( 0), UINT8_C( 64), UINT8_C(128), UINT8_C(192), UINT8_C(192), UINT8_C( 64), UINT8_C(128), UINT8_C( 0) }, { UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C(128), UINT8_C(128), UINT8_C(128), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(251), UINT8_C(190), UINT8_C( 95), UINT8_C(143), UINT8_C(141), UINT8_C( 17), UINT8_C( 26), UINT8_C(234) }, { UINT8_C(246), UINT8_C(124), UINT8_C(190), UINT8_C( 30), UINT8_C( 26), UINT8_C( 34), UINT8_C( 52), UINT8_C(212) }, { UINT8_C(216), UINT8_C(240), UINT8_C(248), UINT8_C(120), UINT8_C(104), UINT8_C(136), UINT8_C(208), UINT8_C( 80) }, { UINT8_C( 96), UINT8_C(192), UINT8_C(224), UINT8_C(224), UINT8_C(160), UINT8_C( 32), UINT8_C( 64), UINT8_C( 64) }, { UINT8_C(192), UINT8_C(128), UINT8_C(192), UINT8_C(192), UINT8_C( 64), UINT8_C( 64), UINT8_C(128), UINT8_C(128) }, { UINT8_C(128), UINT8_C( 0), UINT8_C(128), UINT8_C(128), UINT8_C(128), UINT8_C(128), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 6), UINT8_C( 50), UINT8_C( 2), UINT8_C( 36), UINT8_C( 3), UINT8_C(123), UINT8_C(190), UINT8_C(136) }, { UINT8_C( 12), UINT8_C(100), UINT8_C( 4), UINT8_C( 72), UINT8_C( 6), UINT8_C(246), UINT8_C(124), UINT8_C( 16) }, { UINT8_C( 48), UINT8_C(144), UINT8_C( 16), UINT8_C( 32), UINT8_C( 24), UINT8_C(216), UINT8_C(240), UINT8_C( 64) }, { UINT8_C(192), UINT8_C( 64), UINT8_C( 64), UINT8_C(128), UINT8_C( 96), UINT8_C( 96), UINT8_C(192), UINT8_C( 0) }, { UINT8_C(128), UINT8_C(128), UINT8_C(128), UINT8_C( 0), UINT8_C(192), UINT8_C(192), UINT8_C(128), UINT8_C( 0) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(128), UINT8_C(128), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 97), UINT8_C(104), UINT8_C( 17), UINT8_C(104), UINT8_C(252), UINT8_C( 62), UINT8_C(117), UINT8_C( 60) }, { UINT8_C(194), UINT8_C(208), UINT8_C( 34), UINT8_C(208), UINT8_C(248), UINT8_C(124), UINT8_C(234), UINT8_C(120) }, { UINT8_C( 8), UINT8_C( 64), UINT8_C(136), UINT8_C( 64), UINT8_C(224), UINT8_C(240), UINT8_C(168), UINT8_C(224) }, { UINT8_C( 32), UINT8_C( 0), UINT8_C( 32), UINT8_C( 0), UINT8_C(128), UINT8_C(192), UINT8_C(160), UINT8_C(128) }, { UINT8_C( 64), UINT8_C( 0), UINT8_C( 64), UINT8_C( 0), UINT8_C( 0), UINT8_C(128), UINT8_C( 64), UINT8_C( 0) }, { UINT8_C(128), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(128), UINT8_C( 0) } }, { { UINT8_C( 91), UINT8_C(160), UINT8_C(144), UINT8_C( 18), UINT8_C( 61), UINT8_C( 14), UINT8_C(106), UINT8_C( 57) }, { UINT8_C(182), UINT8_C( 64), UINT8_C( 32), UINT8_C( 36), UINT8_C(122), UINT8_C( 28), UINT8_C(212), UINT8_C(114) }, { UINT8_C(216), UINT8_C( 0), UINT8_C(128), UINT8_C(144), UINT8_C(232), UINT8_C(112), UINT8_C( 80), UINT8_C(200) }, { UINT8_C( 96), UINT8_C( 0), UINT8_C( 0), UINT8_C( 64), UINT8_C(160), UINT8_C(192), UINT8_C( 64), UINT8_C( 32) }, { UINT8_C(192), UINT8_C( 0), UINT8_C( 0), UINT8_C(128), UINT8_C( 64), UINT8_C(128), UINT8_C(128), UINT8_C( 64) }, { UINT8_C(128), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C( 0), UINT8_C(128) } }, { { UINT8_C(204), UINT8_C(201), UINT8_C(200), UINT8_C( 90), UINT8_C(219), UINT8_C(226), UINT8_C( 68), UINT8_C(225) }, { UINT8_C(152), UINT8_C(146), UINT8_C(144), UINT8_C(180), UINT8_C(182), UINT8_C(196), UINT8_C(136), UINT8_C(194) }, { UINT8_C( 96), UINT8_C( 72), UINT8_C( 64), UINT8_C(208), UINT8_C(216), UINT8_C( 16), UINT8_C( 32), UINT8_C( 8) }, { UINT8_C(128), UINT8_C( 32), UINT8_C( 0), UINT8_C( 64), UINT8_C( 96), UINT8_C( 64), UINT8_C(128), UINT8_C( 32) }, { UINT8_C( 0), UINT8_C( 64), UINT8_C( 0), UINT8_C(128), UINT8_C(192), UINT8_C(128), UINT8_C( 0), UINT8_C( 64) }, { UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C( 0), UINT8_C(128) } }, { { UINT8_C( 20), UINT8_C( 70), UINT8_C( 5), UINT8_C( 23), UINT8_C(194), UINT8_C(195), UINT8_C(159), UINT8_C( 35) }, { UINT8_C( 40), UINT8_C(140), UINT8_C( 10), UINT8_C( 46), UINT8_C(132), UINT8_C(134), UINT8_C( 62), UINT8_C( 70) }, { UINT8_C(160), UINT8_C( 48), UINT8_C( 40), UINT8_C(184), UINT8_C( 16), UINT8_C( 24), UINT8_C(248), UINT8_C( 24) }, { UINT8_C(128), UINT8_C(192), UINT8_C(160), UINT8_C(224), UINT8_C( 64), UINT8_C( 96), UINT8_C(224), UINT8_C( 96) }, { UINT8_C( 0), UINT8_C(128), UINT8_C( 64), UINT8_C(192), UINT8_C(128), UINT8_C(192), UINT8_C(192), UINT8_C(192) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C(128), UINT8_C(128), UINT8_C( 0), UINT8_C(128), UINT8_C(128), UINT8_C(128) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t r1 = simde_vshl_n_u8(a, 1); simde_uint8x8_t r3 = simde_vshl_n_u8(a, 3); simde_uint8x8_t r5 = simde_vshl_n_u8(a, 5); simde_uint8x8_t r6 = simde_vshl_n_u8(a, 6); simde_uint8x8_t r7 = simde_vshl_n_u8(a, 7); simde_test_arm_neon_assert_equal_u8x8(r1, simde_vld1_u8(test_vec[i].r1)); simde_test_arm_neon_assert_equal_u8x8(r3, simde_vld1_u8(test_vec[i].r3)); simde_test_arm_neon_assert_equal_u8x8(r5, simde_vld1_u8(test_vec[i].r5)); simde_test_arm_neon_assert_equal_u8x8(r6, simde_vld1_u8(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u8x8(r7, simde_vld1_u8(test_vec[i].r7)); } return 0; } static int test_simde_vshl_n_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint16_t r3[4]; uint16_t r6[4]; uint16_t r10[4]; uint16_t r13[4]; uint16_t r15[4]; } test_vec[] = { { { UINT16_C(12081), UINT16_C(51160), UINT16_C(44606), UINT16_C(13550) }, { UINT16_C(31112), UINT16_C(16064), UINT16_C(29168), UINT16_C(42864) }, { UINT16_C(52288), UINT16_C(62976), UINT16_C(36736), UINT16_C(15232) }, { UINT16_C(50176), UINT16_C(24576), UINT16_C(63488), UINT16_C(47104) }, { UINT16_C( 8192), UINT16_C( 0), UINT16_C(49152), UINT16_C(49152) }, { UINT16_C(32768), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(47809), UINT16_C(24193), UINT16_C(31527), UINT16_C(28814) }, { UINT16_C(54792), UINT16_C(62472), UINT16_C(55608), UINT16_C(33904) }, { UINT16_C(45120), UINT16_C(41024), UINT16_C(51648), UINT16_C( 9088) }, { UINT16_C( 1024), UINT16_C( 1024), UINT16_C(39936), UINT16_C(14336) }, { UINT16_C( 8192), UINT16_C( 8192), UINT16_C(57344), UINT16_C(49152) }, { UINT16_C(32768), UINT16_C(32768), UINT16_C(32768), UINT16_C( 0) } }, { { UINT16_C(53303), UINT16_C(53033), UINT16_C( 960), UINT16_C(31544) }, { UINT16_C(33208), UINT16_C(31048), UINT16_C( 7680), UINT16_C(55744) }, { UINT16_C( 3520), UINT16_C(51776), UINT16_C(61440), UINT16_C(52736) }, { UINT16_C(56320), UINT16_C(41984), UINT16_C( 0), UINT16_C(57344) }, { UINT16_C(57344), UINT16_C( 8192), UINT16_C( 0), UINT16_C( 0) }, { UINT16_C(32768), UINT16_C(32768), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(64480), UINT16_C(18150), UINT16_C(24610), UINT16_C(21645) }, { UINT16_C(57088), UINT16_C(14128), UINT16_C( 272), UINT16_C(42088) }, { UINT16_C(63488), UINT16_C(47488), UINT16_C( 2176), UINT16_C( 9024) }, { UINT16_C(32768), UINT16_C(38912), UINT16_C(34816), UINT16_C(13312) }, { UINT16_C( 0), UINT16_C(49152), UINT16_C(16384), UINT16_C(40960) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(32768) } }, { { UINT16_C(25999), UINT16_C(52507), UINT16_C( 2323), UINT16_C(54273) }, { UINT16_C(11384), UINT16_C(26840), UINT16_C(18584), UINT16_C(40968) }, { UINT16_C(25536), UINT16_C(18112), UINT16_C(17600), UINT16_C( 64) }, { UINT16_C(15360), UINT16_C(27648), UINT16_C(19456), UINT16_C( 1024) }, { UINT16_C(57344), UINT16_C(24576), UINT16_C(24576), UINT16_C( 8192) }, { UINT16_C(32768), UINT16_C(32768), UINT16_C(32768), UINT16_C(32768) } }, { { UINT16_C(33731), UINT16_C(60211), UINT16_C(49662), UINT16_C(13659) }, { UINT16_C( 7704), UINT16_C(22936), UINT16_C( 4080), UINT16_C(43736) }, { UINT16_C(61632), UINT16_C(52416), UINT16_C(32640), UINT16_C(22208) }, { UINT16_C( 3072), UINT16_C(52224), UINT16_C(63488), UINT16_C(27648) }, { UINT16_C(24576), UINT16_C(24576), UINT16_C(49152), UINT16_C(24576) }, { UINT16_C(32768), UINT16_C(32768), UINT16_C( 0), UINT16_C(32768) } }, { { UINT16_C(34193), UINT16_C(20740), UINT16_C(15496), UINT16_C(26829) }, { UINT16_C(11400), UINT16_C(34848), UINT16_C(58432), UINT16_C(18024) }, { UINT16_C(25664), UINT16_C(16640), UINT16_C( 8704), UINT16_C(13120) }, { UINT16_C(17408), UINT16_C( 4096), UINT16_C( 8192), UINT16_C(13312) }, { UINT16_C( 8192), UINT16_C(32768), UINT16_C( 0), UINT16_C(40960) }, { UINT16_C(32768), UINT16_C( 0), UINT16_C( 0), UINT16_C(32768) } }, { { UINT16_C(45880), UINT16_C(23215), UINT16_C(15379), UINT16_C(41646) }, { UINT16_C(39360), UINT16_C(54648), UINT16_C(57496), UINT16_C( 5488) }, { UINT16_C(52736), UINT16_C(43968), UINT16_C( 1216), UINT16_C(43904) }, { UINT16_C(57344), UINT16_C(48128), UINT16_C(19456), UINT16_C(47104) }, { UINT16_C( 0), UINT16_C(57344), UINT16_C(24576), UINT16_C(49152) }, { UINT16_C( 0), UINT16_C(32768), UINT16_C(32768), UINT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t r3 = simde_vshl_n_u16(a, 3); simde_uint16x4_t r6 = simde_vshl_n_u16(a, 6); simde_uint16x4_t r10 = simde_vshl_n_u16(a, 10); simde_uint16x4_t r13 = simde_vshl_n_u16(a, 13); simde_uint16x4_t r15 = simde_vshl_n_u16(a, 15); simde_test_arm_neon_assert_equal_u16x4(r3, simde_vld1_u16(test_vec[i].r3)); simde_test_arm_neon_assert_equal_u16x4(r6, simde_vld1_u16(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u16x4(r10, simde_vld1_u16(test_vec[i].r10)); simde_test_arm_neon_assert_equal_u16x4(r13, simde_vld1_u16(test_vec[i].r13)); simde_test_arm_neon_assert_equal_u16x4(r15, simde_vld1_u16(test_vec[i].r15)); } return 0; } static int test_simde_vshl_n_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint32_t r6[2]; uint32_t r13[2]; uint32_t r19[2]; uint32_t r26[2]; uint32_t r31[2]; } test_vec[] = { { { UINT32_C(2997814030), UINT32_C(3814135967) }, { UINT32_C(2881536896), UINT32_C(3586533312) }, { UINT32_C(3764502528), UINT32_C(3809730560) }, { UINT32_C( 409993216), UINT32_C(3304587264) }, { UINT32_C( 939524096), UINT32_C(2080374784) }, { UINT32_C( 0), UINT32_C(2147483648) } }, { { UINT32_C( 787617874), UINT32_C(2139507274) }, { UINT32_C(3162903680), UINT32_C(3784479360) }, { UINT32_C(1124745216), UINT32_C(3377020928) }, { UINT32_C(3264217088), UINT32_C(1380974592) }, { UINT32_C(1207959552), UINT32_C( 671088640) }, { UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C( 843447853), UINT32_C( 839632241) }, { UINT32_C(2441055040), UINT32_C(2196855872) }, { UINT32_C(3217399808), UINT32_C(2024677376) }, { UINT32_C(4050124800), UINT32_C( 730333184) }, { UINT32_C(3019898880), UINT32_C(3288334336) }, { UINT32_C(2147483648), UINT32_C(2147483648) } }, { { UINT32_C( 654285750), UINT32_C(3336382136) }, { UINT32_C(3219582336), UINT32_C(3075059200) }, { UINT32_C(4084645888), UINT32_C(2765553664) }, { UINT32_C(3719299072), UINT32_C( 901775360) }, { UINT32_C(3623878656), UINT32_C(3758096384) }, { UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(3363408937), UINT32_C(4138455204) }, { UINT32_C( 509807168), UINT32_C(2868128000) }, { UINT32_C( 830808064), UINT32_C(2048163840) }, { UINT32_C(1632108544), UINT32_C(2233466880) }, { UINT32_C(2751463424), UINT32_C(2415919104) }, { UINT32_C(2147483648), UINT32_C( 0) } }, { { UINT32_C( 841260776), UINT32_C( 364030952) }, { UINT32_C(2301082112), UINT32_C(1823144448) }, { UINT32_C(2480734208), UINT32_C(1434255360) }, { UINT32_C(4148166656), UINT32_C(1598029824) }, { UINT32_C(2684354560), UINT32_C(2684354560) }, { UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C( 440924073), UINT32_C(1934381756) }, { UINT32_C(2449336896), UINT32_C(3541348096) }, { UINT32_C(4277477376), UINT32_C(2320990208) }, { UINT32_C(3175612416), UINT32_C(2514485248) }, { UINT32_C(2751463424), UINT32_C(4026531840) }, { UINT32_C(2147483648), UINT32_C( 0) } }, { { UINT32_C(2778287341), UINT32_C(2624353906) }, { UINT32_C(1716730688), UINT32_C( 454925440) }, { UINT32_C( 698195968), UINT32_C(2395881472) }, { UINT32_C(1734868992), UINT32_C(3012558848) }, { UINT32_C(3019898880), UINT32_C(3355443200) }, { UINT32_C(2147483648), UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t r6 = simde_vshl_n_u32(a, 6); simde_uint32x2_t r13 = simde_vshl_n_u32(a, 13); simde_uint32x2_t r19 = simde_vshl_n_u32(a, 19); simde_uint32x2_t r26 = simde_vshl_n_u32(a, 26); simde_uint32x2_t r31 = simde_vshl_n_u32(a, 31); simde_test_arm_neon_assert_equal_u32x2(r6, simde_vld1_u32(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u32x2(r13, simde_vld1_u32(test_vec[i].r13)); simde_test_arm_neon_assert_equal_u32x2(r19, simde_vld1_u32(test_vec[i].r19)); simde_test_arm_neon_assert_equal_u32x2(r26, simde_vld1_u32(test_vec[i].r26)); simde_test_arm_neon_assert_equal_u32x2(r31, simde_vld1_u32(test_vec[i].r31)); } return 0; } static int test_simde_vshl_n_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[1]; uint64_t r13[1]; uint64_t r26[1]; uint64_t r39[1]; uint64_t r52[1]; uint64_t r63[1]; } test_vec[] = { { { UINT64_C(10695459920550031284) }, { UINT64_C(13620063099195654144) }, { UINT64_C( 9648750815430574080) }, { UINT64_C(16715068235543740416) }, { UINT64_C(18104470502029393920) }, { UINT64_C( 0) } }, { { UINT64_C( 3462228061674520113) }, { UINT64_C( 9926639946087931904) }, { UINT64_C( 5786561440634634240) }, { UINT64_C(13825796319085592576) }, { UINT64_C(16361577446237011968) }, { UINT64_C( 9223372036854775808) } }, { { UINT64_C(16074483975084028448) }, { UINT64_C( 9313525749581611008) }, { UINT64_C( 669451709851893760) }, { UINT64_C( 5465417214976851968) }, { UINT64_C( 2449958197289549824) }, { UINT64_C( 0) } }, { { UINT64_C( 7661638601150101201) }, { UINT64_C( 8320081861734440960) }, { UINT64_C(15838003045456674816) }, { UINT64_C( 8969877981803577344) }, { UINT64_C( 7858781349761515520) }, { UINT64_C( 9223372036854775808) } }, { { UINT64_C( 4341277672083759754) }, { UINT64_C(16870859671853940736) }, { UINT64_C( 3075831595521802240) }, { UINT64_C(17406769901065994240) }, { UINT64_C( 2927339757790822400) }, { UINT64_C( 0) } }, { { UINT64_C( 3266055366270500623) }, { UINT64_C( 7746653609091260416) }, { UINT64_C( 3786752114747768832) }, { UINT64_C(12096536107966005248) }, { UINT64_C(17361376563513262080) }, { UINT64_C( 9223372036854775808) } }, { { UINT64_C(14335121394201783054) }, { UINT64_C( 1341688066001190912) }, { UINT64_C(15295912824572739584) }, { UINT64_C(13832110264608096256) }, { UINT64_C(12745186945458503680) }, { UINT64_C( 0) } }, { { UINT64_C( 1056952299709655178) }, { UINT64_C( 7030268651715510272) }, { UINT64_C( 1225796732240003072) }, { UINT64_C( 6698054412109086720) }, { UINT64_C( 9844868785431904256) }, { UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_t r13 = simde_vshl_n_u64(a, 13); simde_uint64x1_t r26 = simde_vshl_n_u64(a, 26); simde_uint64x1_t r39 = simde_vshl_n_u64(a, 39); simde_uint64x1_t r52 = simde_vshl_n_u64(a, 52); simde_uint64x1_t r63 = simde_vshl_n_u64(a, 63); simde_test_arm_neon_assert_equal_u64x1(r13, simde_vld1_u64(test_vec[i].r13)); simde_test_arm_neon_assert_equal_u64x1(r26, simde_vld1_u64(test_vec[i].r26)); simde_test_arm_neon_assert_equal_u64x1(r39, simde_vld1_u64(test_vec[i].r39)); simde_test_arm_neon_assert_equal_u64x1(r52, simde_vld1_u64(test_vec[i].r52)); simde_test_arm_neon_assert_equal_u64x1(r63, simde_vld1_u64(test_vec[i].r63)); } return 0; } static int test_simde_vshlq_n_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int8_t r1[16]; int8_t r3[16]; int8_t r5[16]; int8_t r6[16]; int8_t r7[16]; } test_vec[] = { { { INT8_C( 122), -INT8_C( 111), INT8_C( 30), INT8_C( 69), -INT8_C( 48), -INT8_C( 44), -INT8_C( 67), INT8_C( 98), -INT8_C( 116), INT8_C( 119), -INT8_C( 109), -INT8_C( 89), INT8_C( 71), -INT8_C( 122), INT8_C( 32), INT8_C( 41) }, { -INT8_C( 12), INT8_C( 34), INT8_C( 60), -INT8_C( 118), -INT8_C( 96), -INT8_C( 88), INT8_C( 122), -INT8_C( 60), INT8_C( 24), -INT8_C( 18), INT8_C( 38), INT8_C( 78), -INT8_C( 114), INT8_C( 12), INT8_C( 64), INT8_C( 82) }, { -INT8_C( 48), -INT8_C( 120), -INT8_C( 16), INT8_C( 40), INT8_MIN, -INT8_C( 96), -INT8_C( 24), INT8_C( 16), INT8_C( 96), -INT8_C( 72), -INT8_C( 104), INT8_C( 56), INT8_C( 56), INT8_C( 48), INT8_C( 0), INT8_C( 72) }, { INT8_C( 64), INT8_C( 32), -INT8_C( 64), -INT8_C( 96), INT8_C( 0), INT8_MIN, -INT8_C( 96), INT8_C( 64), INT8_MIN, -INT8_C( 32), INT8_C( 96), -INT8_C( 32), -INT8_C( 32), -INT8_C( 64), INT8_C( 0), INT8_C( 32) }, { INT8_MIN, INT8_C( 64), INT8_MIN, INT8_C( 64), INT8_C( 0), INT8_C( 0), INT8_C( 64), INT8_MIN, INT8_C( 0), -INT8_C( 64), -INT8_C( 64), -INT8_C( 64), -INT8_C( 64), INT8_MIN, INT8_C( 0), INT8_C( 64) }, { INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_MIN } }, { { INT8_C( 100), -INT8_C( 96), INT8_C( 79), -INT8_C( 90), INT8_C( 47), INT8_C( 78), -INT8_C( 14), -INT8_C( 62), -INT8_C( 112), INT8_C( 52), INT8_C( 8), -INT8_C( 107), -INT8_C( 10), INT8_C( 96), INT8_C( 94), INT8_C( 113) }, { -INT8_C( 56), INT8_C( 64), -INT8_C( 98), INT8_C( 76), INT8_C( 94), -INT8_C( 100), -INT8_C( 28), -INT8_C( 124), INT8_C( 32), INT8_C( 104), INT8_C( 16), INT8_C( 42), -INT8_C( 20), -INT8_C( 64), -INT8_C( 68), -INT8_C( 30) }, { INT8_C( 32), INT8_C( 0), INT8_C( 120), INT8_C( 48), INT8_C( 120), INT8_C( 112), -INT8_C( 112), INT8_C( 16), INT8_MIN, -INT8_C( 96), INT8_C( 64), -INT8_C( 88), -INT8_C( 80), INT8_C( 0), -INT8_C( 16), -INT8_C( 120) }, { INT8_MIN, INT8_C( 0), -INT8_C( 32), -INT8_C( 64), -INT8_C( 32), -INT8_C( 64), INT8_C( 64), INT8_C( 64), INT8_C( 0), INT8_MIN, INT8_C( 0), -INT8_C( 96), -INT8_C( 64), INT8_C( 0), -INT8_C( 64), INT8_C( 32) }, { INT8_C( 0), INT8_C( 0), -INT8_C( 64), INT8_MIN, -INT8_C( 64), INT8_MIN, INT8_MIN, INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 64), INT8_MIN, INT8_C( 0), INT8_MIN, INT8_C( 64) }, { INT8_C( 0), INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_MIN } }, { { -INT8_C( 15), INT8_C( 124), -INT8_C( 74), -INT8_C( 63), INT8_C( 80), INT8_C( 115), INT8_C( 36), -INT8_C( 35), -INT8_C( 21), -INT8_C( 73), -INT8_C( 124), INT8_C( 50), INT8_C( 62), -INT8_C( 92), INT8_C( 92), -INT8_C( 94) }, { -INT8_C( 30), -INT8_C( 8), INT8_C( 108), -INT8_C( 126), -INT8_C( 96), -INT8_C( 26), INT8_C( 72), -INT8_C( 70), -INT8_C( 42), INT8_C( 110), INT8_C( 8), INT8_C( 100), INT8_C( 124), INT8_C( 72), -INT8_C( 72), INT8_C( 68) }, { -INT8_C( 120), -INT8_C( 32), -INT8_C( 80), INT8_C( 8), INT8_MIN, -INT8_C( 104), INT8_C( 32), -INT8_C( 24), INT8_C( 88), -INT8_C( 72), INT8_C( 32), -INT8_C( 112), -INT8_C( 16), INT8_C( 32), -INT8_C( 32), INT8_C( 16) }, { INT8_C( 32), INT8_MIN, -INT8_C( 64), INT8_C( 32), INT8_C( 0), INT8_C( 96), INT8_MIN, -INT8_C( 96), INT8_C( 96), -INT8_C( 32), INT8_MIN, INT8_C( 64), -INT8_C( 64), INT8_MIN, INT8_MIN, INT8_C( 64) }, { INT8_C( 64), INT8_C( 0), INT8_MIN, INT8_C( 64), INT8_C( 0), -INT8_C( 64), INT8_C( 0), INT8_C( 64), -INT8_C( 64), -INT8_C( 64), INT8_C( 0), INT8_MIN, INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_MIN }, { INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_MIN, INT8_MIN, INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 68), -INT8_C( 85), INT8_C( 72), INT8_C( 116), -INT8_C( 7), INT8_C( 59), INT8_C( 54), -INT8_C( 119), INT8_C( 111), INT8_C( 62), INT8_C( 31), INT8_C( 101), -INT8_C( 98), INT8_C( 125), -INT8_C( 42), -INT8_C( 112) }, { -INT8_C( 120), INT8_C( 86), -INT8_C( 112), -INT8_C( 24), -INT8_C( 14), INT8_C( 118), INT8_C( 108), INT8_C( 18), -INT8_C( 34), INT8_C( 124), INT8_C( 62), -INT8_C( 54), INT8_C( 60), -INT8_C( 6), -INT8_C( 84), INT8_C( 32) }, { INT8_C( 32), INT8_C( 88), INT8_C( 64), -INT8_C( 96), -INT8_C( 56), -INT8_C( 40), -INT8_C( 80), INT8_C( 72), INT8_C( 120), -INT8_C( 16), -INT8_C( 8), INT8_C( 40), -INT8_C( 16), -INT8_C( 24), -INT8_C( 80), INT8_MIN }, { INT8_MIN, INT8_C( 96), INT8_C( 0), INT8_MIN, INT8_C( 32), INT8_C( 96), -INT8_C( 64), INT8_C( 32), -INT8_C( 32), -INT8_C( 64), -INT8_C( 32), -INT8_C( 96), -INT8_C( 64), -INT8_C( 96), -INT8_C( 64), INT8_C( 0) }, { INT8_C( 0), -INT8_C( 64), INT8_C( 0), INT8_C( 0), INT8_C( 64), -INT8_C( 64), INT8_MIN, INT8_C( 64), -INT8_C( 64), INT8_MIN, -INT8_C( 64), INT8_C( 64), INT8_MIN, INT8_C( 64), INT8_MIN, INT8_C( 0) }, { INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_MIN, INT8_MIN, INT8_C( 0), INT8_MIN, INT8_MIN, INT8_C( 0), INT8_MIN, INT8_MIN, INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_C( 0) } }, { { -INT8_C( 7), -INT8_C( 115), INT8_C( 81), INT8_C( 73), INT8_C( 0), INT8_C( 117), INT8_C( 38), -INT8_C( 21), INT8_C( 45), -INT8_C( 85), INT8_C( 30), INT8_C( 107), INT8_C( 79), INT8_C( 122), INT8_C( 13), -INT8_C( 108) }, { -INT8_C( 14), INT8_C( 26), -INT8_C( 94), -INT8_C( 110), INT8_C( 0), -INT8_C( 22), INT8_C( 76), -INT8_C( 42), INT8_C( 90), INT8_C( 86), INT8_C( 60), -INT8_C( 42), -INT8_C( 98), -INT8_C( 12), INT8_C( 26), INT8_C( 40) }, { -INT8_C( 56), INT8_C( 104), -INT8_C( 120), INT8_C( 72), INT8_C( 0), -INT8_C( 88), INT8_C( 48), INT8_C( 88), INT8_C( 104), INT8_C( 88), -INT8_C( 16), INT8_C( 88), INT8_C( 120), -INT8_C( 48), INT8_C( 104), -INT8_C( 96) }, { INT8_C( 32), -INT8_C( 96), INT8_C( 32), INT8_C( 32), INT8_C( 0), -INT8_C( 96), -INT8_C( 64), INT8_C( 96), -INT8_C( 96), INT8_C( 96), -INT8_C( 64), INT8_C( 96), -INT8_C( 32), INT8_C( 64), -INT8_C( 96), INT8_MIN }, { INT8_C( 64), INT8_C( 64), INT8_C( 64), INT8_C( 64), INT8_C( 0), INT8_C( 64), INT8_MIN, -INT8_C( 64), INT8_C( 64), -INT8_C( 64), INT8_MIN, -INT8_C( 64), -INT8_C( 64), INT8_MIN, INT8_C( 64), INT8_C( 0) }, { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_MIN, INT8_MIN, INT8_MIN, INT8_C( 0), INT8_MIN, INT8_MIN, INT8_C( 0), INT8_MIN, INT8_C( 0) } }, { { INT8_C( 37), INT8_C( 86), INT8_C( 8), INT8_C( 30), -INT8_C( 111), INT8_C( 62), -INT8_C( 88), INT8_C( 0), INT8_C( 125), -INT8_C( 57), INT8_C( 101), INT8_C( 27), INT8_C( 68), INT8_C( 60), -INT8_C( 85), INT8_C( 61) }, { INT8_C( 74), -INT8_C( 84), INT8_C( 16), INT8_C( 60), INT8_C( 34), INT8_C( 124), INT8_C( 80), INT8_C( 0), -INT8_C( 6), -INT8_C( 114), -INT8_C( 54), INT8_C( 54), -INT8_C( 120), INT8_C( 120), INT8_C( 86), INT8_C( 122) }, { INT8_C( 40), -INT8_C( 80), INT8_C( 64), -INT8_C( 16), -INT8_C( 120), -INT8_C( 16), INT8_C( 64), INT8_C( 0), -INT8_C( 24), INT8_C( 56), INT8_C( 40), -INT8_C( 40), INT8_C( 32), -INT8_C( 32), INT8_C( 88), -INT8_C( 24) }, { -INT8_C( 96), -INT8_C( 64), INT8_C( 0), -INT8_C( 64), INT8_C( 32), -INT8_C( 64), INT8_C( 0), INT8_C( 0), -INT8_C( 96), -INT8_C( 32), -INT8_C( 96), INT8_C( 96), INT8_MIN, INT8_MIN, INT8_C( 96), -INT8_C( 96) }, { INT8_C( 64), INT8_MIN, INT8_C( 0), INT8_MIN, INT8_C( 64), INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_C( 64), -INT8_C( 64), INT8_C( 64), -INT8_C( 64), INT8_C( 0), INT8_C( 0), -INT8_C( 64), INT8_C( 64) }, { INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_MIN, INT8_MIN } }, { { -INT8_C( 55), -INT8_C( 3), -INT8_C( 122), -INT8_C( 55), INT8_C( 114), -INT8_C( 83), -INT8_C( 75), -INT8_C( 97), INT8_C( 88), -INT8_C( 45), INT8_C( 10), -INT8_C( 89), INT8_C( 77), INT8_C( 24), INT8_C( 59), INT8_C( 114) }, { -INT8_C( 110), -INT8_C( 6), INT8_C( 12), -INT8_C( 110), -INT8_C( 28), INT8_C( 90), INT8_C( 106), INT8_C( 62), -INT8_C( 80), -INT8_C( 90), INT8_C( 20), INT8_C( 78), -INT8_C( 102), INT8_C( 48), INT8_C( 118), -INT8_C( 28) }, { INT8_C( 72), -INT8_C( 24), INT8_C( 48), INT8_C( 72), -INT8_C( 112), INT8_C( 104), -INT8_C( 88), -INT8_C( 8), -INT8_C( 64), -INT8_C( 104), INT8_C( 80), INT8_C( 56), INT8_C( 104), -INT8_C( 64), -INT8_C( 40), -INT8_C( 112) }, { INT8_C( 32), -INT8_C( 96), -INT8_C( 64), INT8_C( 32), INT8_C( 64), -INT8_C( 96), -INT8_C( 96), -INT8_C( 32), INT8_C( 0), INT8_C( 96), INT8_C( 64), -INT8_C( 32), -INT8_C( 96), INT8_C( 0), INT8_C( 96), INT8_C( 64) }, { INT8_C( 64), INT8_C( 64), INT8_MIN, INT8_C( 64), INT8_MIN, INT8_C( 64), INT8_C( 64), -INT8_C( 64), INT8_C( 0), -INT8_C( 64), INT8_MIN, -INT8_C( 64), INT8_C( 64), INT8_C( 0), -INT8_C( 64), INT8_MIN }, { INT8_MIN, INT8_MIN, INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_MIN, INT8_MIN, INT8_MIN, INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_MIN, INT8_MIN, INT8_C( 0), INT8_MIN, INT8_C( 0) } }, { { INT8_C( 110), INT8_C( 67), -INT8_C( 112), -INT8_C( 1), -INT8_C( 126), INT8_C( 56), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 100), INT8_C( 26), INT8_C( 67), -INT8_C( 96), -INT8_C( 58), INT8_MIN, INT8_C( 105) }, { -INT8_C( 36), -INT8_C( 122), INT8_C( 32), -INT8_C( 2), INT8_C( 4), INT8_C( 112), -INT8_C( 2), -INT8_C( 2), -INT8_C( 2), -INT8_C( 56), INT8_C( 52), -INT8_C( 122), INT8_C( 64), -INT8_C( 116), INT8_C( 0), -INT8_C( 46) }, { INT8_C( 112), INT8_C( 24), INT8_MIN, -INT8_C( 8), INT8_C( 16), -INT8_C( 64), -INT8_C( 8), -INT8_C( 8), -INT8_C( 8), INT8_C( 32), -INT8_C( 48), INT8_C( 24), INT8_C( 0), INT8_C( 48), INT8_C( 0), INT8_C( 72) }, { -INT8_C( 64), INT8_C( 96), INT8_C( 0), -INT8_C( 32), INT8_C( 64), INT8_C( 0), -INT8_C( 32), -INT8_C( 32), -INT8_C( 32), INT8_MIN, INT8_C( 64), INT8_C( 96), INT8_C( 0), -INT8_C( 64), INT8_C( 0), INT8_C( 32) }, { INT8_MIN, -INT8_C( 64), INT8_C( 0), -INT8_C( 64), INT8_MIN, INT8_C( 0), -INT8_C( 64), -INT8_C( 64), -INT8_C( 64), INT8_C( 0), INT8_MIN, -INT8_C( 64), INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_C( 64) }, { INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_MIN, INT8_MIN, INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_MIN } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t r1 = simde_vshlq_n_s8(a, 1); simde_int8x16_t r3 = simde_vshlq_n_s8(a, 3); simde_int8x16_t r5 = simde_vshlq_n_s8(a, 5); simde_int8x16_t r6 = simde_vshlq_n_s8(a, 6); simde_int8x16_t r7 = simde_vshlq_n_s8(a, 7); simde_test_arm_neon_assert_equal_i8x16(r1, simde_vld1q_s8(test_vec[i].r1)); simde_test_arm_neon_assert_equal_i8x16(r3, simde_vld1q_s8(test_vec[i].r3)); simde_test_arm_neon_assert_equal_i8x16(r5, simde_vld1q_s8(test_vec[i].r5)); simde_test_arm_neon_assert_equal_i8x16(r6, simde_vld1q_s8(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i8x16(r7, simde_vld1q_s8(test_vec[i].r7)); } return 0; } static int test_simde_vshlq_n_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int16_t r3[8]; int16_t r6[8]; int16_t r10[8]; int16_t r13[8]; int16_t r15[8]; } test_vec[] = { { { INT16_C( 15177), INT16_C( 24595), -INT16_C( 14772), -INT16_C( 6844), INT16_C( 20381), INT16_C( 7805), -INT16_C( 16411), -INT16_C( 7709) }, { -INT16_C( 9656), INT16_C( 152), INT16_C( 12896), INT16_C( 10784), INT16_C( 31976), -INT16_C( 3096), -INT16_C( 216), INT16_C( 3864) }, { -INT16_C( 11712), INT16_C( 1216), -INT16_C( 27904), INT16_C( 20736), -INT16_C( 6336), -INT16_C( 24768), -INT16_C( 1728), INT16_C( 30912) }, { INT16_C( 9216), INT16_C( 19456), INT16_C( 12288), INT16_C( 4096), INT16_C( 29696), -INT16_C( 3072), -INT16_C( 27648), -INT16_C( 29696) }, { INT16_C( 8192), INT16_C( 24576), INT16_MIN, INT16_MIN, -INT16_C( 24576), -INT16_C( 24576), -INT16_C( 24576), INT16_C( 24576) }, { INT16_MIN, INT16_MIN, INT16_C( 0), INT16_C( 0), INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN } }, { { INT16_C( 13395), INT16_C( 32618), -INT16_C( 6463), -INT16_C( 11506), INT16_C( 27627), -INT16_C( 6770), INT16_C( 1102), -INT16_C( 26764) }, { -INT16_C( 23912), -INT16_C( 1200), INT16_C( 13832), -INT16_C( 26512), INT16_C( 24408), INT16_C( 11376), INT16_C( 8816), -INT16_C( 17504) }, { INT16_C( 5312), -INT16_C( 9600), -INT16_C( 20416), -INT16_C( 15488), -INT16_C( 1344), INT16_C( 25472), INT16_C( 4992), -INT16_C( 8960) }, { INT16_C( 19456), -INT16_C( 22528), INT16_C( 1024), INT16_C( 14336), -INT16_C( 21504), INT16_C( 14336), INT16_C( 14336), -INT16_C( 12288) }, { INT16_C( 24576), INT16_C( 16384), INT16_C( 8192), -INT16_C( 16384), INT16_C( 24576), -INT16_C( 16384), -INT16_C( 16384), INT16_MIN }, { INT16_MIN, INT16_C( 0), INT16_MIN, INT16_C( 0), INT16_MIN, INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 30912), -INT16_C( 29449), INT16_C( 15438), -INT16_C( 5263), -INT16_C( 4213), INT16_C( 28681), -INT16_C( 4946), INT16_C( 337) }, { INT16_C( 14848), INT16_C( 26552), -INT16_C( 7568), INT16_C( 23432), INT16_C( 31832), -INT16_C( 32696), INT16_C( 25968), INT16_C( 2696) }, { -INT16_C( 12288), INT16_C( 15808), INT16_C( 4992), -INT16_C( 9152), -INT16_C( 7488), INT16_C( 576), INT16_C( 11136), INT16_C( 21568) }, { INT16_C( 0), -INT16_C( 9216), INT16_C( 14336), -INT16_C( 15360), INT16_C( 11264), INT16_C( 9216), -INT16_C( 18432), INT16_C( 17408) }, { INT16_C( 0), -INT16_C( 8192), -INT16_C( 16384), INT16_C( 8192), INT16_C( 24576), INT16_C( 8192), -INT16_C( 16384), INT16_C( 8192) }, { INT16_C( 0), INT16_MIN, INT16_C( 0), INT16_MIN, INT16_MIN, INT16_MIN, INT16_C( 0), INT16_MIN } }, { { -INT16_C( 17631), -INT16_C( 7552), -INT16_C( 28766), -INT16_C( 29258), INT16_C( 17658), INT16_C( 18547), -INT16_C( 6327), -INT16_C( 30240) }, { -INT16_C( 9976), INT16_C( 5120), INT16_C( 32016), INT16_C( 28080), INT16_C( 10192), INT16_C( 17304), INT16_C( 14920), INT16_C( 20224) }, { -INT16_C( 14272), -INT16_C( 24576), -INT16_C( 6016), INT16_C( 28032), INT16_C( 16000), INT16_C( 7360), -INT16_C( 11712), INT16_C( 30720) }, { -INT16_C( 31744), INT16_C( 0), -INT16_C( 30720), -INT16_C( 10240), -INT16_C( 6144), -INT16_C( 13312), INT16_C( 9216), INT16_MIN }, { INT16_C( 8192), INT16_C( 0), INT16_C( 16384), -INT16_C( 16384), INT16_C( 16384), INT16_C( 24576), INT16_C( 8192), INT16_C( 0) }, { INT16_MIN, INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_MIN, INT16_MIN, INT16_C( 0) } }, { { -INT16_C( 10386), -INT16_C( 17387), -INT16_C( 31213), -INT16_C( 24921), -INT16_C( 20363), INT16_C( 8974), INT16_C( 24733), -INT16_C( 16859) }, { -INT16_C( 17552), -INT16_C( 8024), INT16_C( 12440), -INT16_C( 2760), -INT16_C( 31832), INT16_C( 6256), INT16_C( 1256), -INT16_C( 3800) }, { -INT16_C( 9344), INT16_C( 1344), -INT16_C( 31552), -INT16_C( 22080), INT16_C( 7488), -INT16_C( 15488), INT16_C( 10048), -INT16_C( 30400) }, { -INT16_C( 18432), INT16_C( 21504), INT16_C( 19456), -INT16_C( 25600), -INT16_C( 11264), INT16_C( 14336), INT16_C( 29696), -INT16_C( 27648) }, { -INT16_C( 16384), -INT16_C( 24576), INT16_C( 24576), -INT16_C( 8192), -INT16_C( 24576), -INT16_C( 16384), -INT16_C( 24576), -INT16_C( 24576) }, { INT16_C( 0), INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_C( 0), INT16_MIN, INT16_MIN } }, { { -INT16_C( 23269), -INT16_C( 16992), INT16_C( 22068), INT16_C( 11851), -INT16_C( 16741), -INT16_C( 7049), INT16_C( 22437), INT16_C( 4973) }, { INT16_C( 10456), -INT16_C( 4864), -INT16_C( 20064), INT16_C( 29272), -INT16_C( 2856), INT16_C( 9144), -INT16_C( 17112), -INT16_C( 25752) }, { INT16_C( 18112), INT16_C( 26624), -INT16_C( 29440), -INT16_C( 27968), -INT16_C( 22848), INT16_C( 7616), -INT16_C( 5824), -INT16_C( 9408) }, { INT16_C( 27648), INT16_MIN, -INT16_C( 12288), INT16_C( 11264), INT16_C( 27648), -INT16_C( 9216), -INT16_C( 27648), -INT16_C( 19456) }, { INT16_C( 24576), INT16_C( 0), INT16_MIN, INT16_C( 24576), INT16_C( 24576), -INT16_C( 8192), -INT16_C( 24576), -INT16_C( 24576) }, { INT16_MIN, INT16_C( 0), INT16_C( 0), INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN } }, { { -INT16_C( 32210), INT16_C( 17104), INT16_C( 30472), INT16_C( 32480), -INT16_C( 4312), -INT16_C( 14943), -INT16_C( 14769), INT16_C( 27267) }, { INT16_C( 4464), INT16_C( 5760), -INT16_C( 18368), -INT16_C( 2304), INT16_C( 31040), INT16_C( 11528), INT16_C( 12920), INT16_C( 21528) }, { -INT16_C( 29824), -INT16_C( 19456), -INT16_C( 15872), -INT16_C( 18432), -INT16_C( 13824), INT16_C( 26688), -INT16_C( 27712), -INT16_C( 24384) }, { -INT16_C( 18432), INT16_C( 16384), INT16_C( 8192), INT16_MIN, -INT16_C( 24576), -INT16_C( 31744), INT16_C( 15360), INT16_C( 3072) }, { -INT16_C( 16384), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 8192), -INT16_C( 8192), INT16_C( 24576) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_MIN, INT16_MIN, INT16_MIN } }, { { INT16_C( 9068), -INT16_C( 24536), INT16_C( 29562), INT16_C( 5583), INT16_C( 17969), -INT16_C( 10503), INT16_C( 26269), -INT16_C( 13335) }, { INT16_C( 7008), INT16_C( 320), -INT16_C( 25648), -INT16_C( 20872), INT16_C( 12680), -INT16_C( 18488), INT16_C( 13544), INT16_C( 24392) }, { -INT16_C( 9472), INT16_C( 2560), -INT16_C( 8576), INT16_C( 29632), -INT16_C( 29632), -INT16_C( 16832), -INT16_C( 22720), -INT16_C( 1472) }, { -INT16_C( 20480), -INT16_C( 24576), -INT16_C( 6144), INT16_C( 15360), -INT16_C( 15360), -INT16_C( 7168), INT16_C( 29696), -INT16_C( 23552) }, { INT16_MIN, INT16_C( 0), INT16_C( 16384), -INT16_C( 8192), INT16_C( 8192), INT16_C( 8192), -INT16_C( 24576), INT16_C( 8192) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t r3 = simde_vshlq_n_s16(a, 3); simde_int16x8_t r6 = simde_vshlq_n_s16(a, 6); simde_int16x8_t r10 = simde_vshlq_n_s16(a, 10); simde_int16x8_t r13 = simde_vshlq_n_s16(a, 13); simde_int16x8_t r15 = simde_vshlq_n_s16(a, 15); simde_test_arm_neon_assert_equal_i16x8(r3, simde_vld1q_s16(test_vec[i].r3)); simde_test_arm_neon_assert_equal_i16x8(r6, simde_vld1q_s16(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i16x8(r10, simde_vld1q_s16(test_vec[i].r10)); simde_test_arm_neon_assert_equal_i16x8(r13, simde_vld1q_s16(test_vec[i].r13)); simde_test_arm_neon_assert_equal_i16x8(r15, simde_vld1q_s16(test_vec[i].r15)); } return 0; } static int test_simde_vshlq_n_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t r6[4]; int32_t r13[4]; int32_t r19[4]; int32_t r26[4]; int32_t r31[4]; } test_vec[] = { { { -INT32_C( 625232409), -INT32_C( 1671477481), -INT32_C( 1663805479), INT32_C( 344789064) }, { -INT32_C( 1360168512), INT32_C( 399623616), INT32_C( 890631744), INT32_C( 591663616) }, { INT32_C( 1992089600), -INT32_C( 387784704), -INT32_C( 1963253760), -INT32_C( 1576468480) }, { -INT32_C( 1355284480), INT32_C( 951582720), -INT32_C( 1094189056), -INT32_C( 2109734912) }, { -INT32_C( 1677721600), INT32_C( 1543503872), INT32_C( 1677721600), INT32_C( 536870912) }, { INT32_MIN, INT32_MIN, INT32_MIN, INT32_C( 0) } }, { { INT32_C( 563748864), INT32_C( 1054148742), INT32_C( 1642772687), INT32_C( 746101317) }, { INT32_C( 1720188928), -INT32_C( 1253957248), INT32_C( 2058236864), INT32_C( 505844032) }, { INT32_C( 1140850688), -INT32_C( 1592737792), INT32_C( 1461313536), INT32_C( 323526656) }, { INT32_C( 0), INT32_C( 1143996416), -INT32_C( 965214208), -INT32_C( 769130496) }, { INT32_C( 0), INT32_C( 402653184), INT32_C( 1006632960), INT32_C( 335544320) }, { INT32_C( 0), INT32_C( 0), INT32_MIN, INT32_MIN } }, { { INT32_C( 1711682383), INT32_C( 1392665978), INT32_C( 99604413), -INT32_C( 417760025) }, { -INT32_C( 2121477184), -INT32_C( 1063690624), INT32_C( 2079715136), -INT32_C( 966837824) }, { -INT32_C( 966139904), INT32_C( 1286553600), -INT32_C( 84434944), INT32_C( 798810112) }, { -INT32_C( 1703411712), INT32_C( 735051776), -INT32_C( 1108869120), -INT32_C( 415760384) }, { INT32_C( 1006632960), -INT32_C( 402653184), -INT32_C( 201326592), -INT32_C( 1677721600) }, { INT32_MIN, INT32_C( 0), INT32_MIN, INT32_MIN } }, { { INT32_C( 587772828), -INT32_C( 1956520516), -INT32_C( 622048363), INT32_C( 872834277) }, { -INT32_C( 1037244672), -INT32_C( 663261440), -INT32_C( 1156389568), INT32_C( 26818880) }, { INT32_C( 376668160), INT32_C( 1001881600), -INT32_C( 1988976640), -INT32_C( 862150656) }, { -INT32_C( 1663041536), -INT32_C( 304087040), INT32_C( 1554513920), INT32_C( 656932864) }, { INT32_C( 1879048192), -INT32_C( 268435456), INT32_C( 1409286144), -INT32_C( 1811939328) }, { INT32_C( 0), INT32_C( 0), INT32_MIN, INT32_MIN } }, { { INT32_C( 295308695), INT32_C( 795122802), INT32_C( 1513378675), INT32_C( 1816219088) }, { INT32_C( 1719887296), -INT32_C( 651748224), -INT32_C( 1928012608), INT32_C( 273904640) }, { INT32_C( 1102241792), -INT32_C( 1819394048), -INT32_C( 1972477952), INT32_C( 700055552) }, { INT32_C( 1823997952), -INT32_C( 477102080), -INT32_C( 1684537344), INT32_C( 1853882368) }, { INT32_C( 1543503872), -INT32_C( 939524096), -INT32_C( 872415232), INT32_C( 1073741824) }, { INT32_MIN, INT32_C( 0), INT32_MIN, INT32_C( 0) } }, { { -INT32_C( 1114683135), -INT32_C( 1152847834), INT32_C( 546649147), INT32_C( 794074264) }, { INT32_C( 1674723392), -INT32_C( 767817344), INT32_C( 625807040), -INT32_C( 718854656) }, { -INT32_C( 383770624), INT32_C( 503627776), -INT32_C( 1501077504), -INT32_C( 1819082752) }, { INT32_C( 1208483840), -INT32_C( 2127560704), -INT32_C( 1579679744), -INT32_C( 457179136) }, { INT32_C( 67108864), -INT32_C( 1744830464), -INT32_C( 335544320), INT32_C( 1610612736) }, { INT32_MIN, INT32_C( 0), INT32_MIN, INT32_C( 0) } }, { { INT32_C( 457240233), -INT32_C( 28597109), -INT32_C( 950435849), -INT32_C( 835413299) }, { -INT32_C( 801396160), -INT32_C( 1830214976), -INT32_C( 698352192), -INT32_C( 1926843584) }, { INT32_C( 500506624), INT32_C( 1955684352), INT32_C( 805232640), -INT32_C( 1822842880) }, { INT32_C( 1967652864), INT32_C( 609746944), -INT32_C( 4718592), -INT32_C( 697827328) }, { -INT32_C( 1543503872), INT32_C( 738197504), -INT32_C( 603979776), INT32_C( 872415232) }, { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN } }, { { INT32_C( 176931812), -INT32_C( 272182348), -INT32_C( 1626318073), -INT32_C( 1597086473) }, { -INT32_C( 1561265920), -INT32_C( 239801088), -INT32_C( 1005141568), INT32_C( 865680832) }, { INT32_C( 2021425152), -INT32_C( 629768192), INT32_C( 190898176), -INT32_C( 862003200) }, { INT32_C( 522190848), -INT32_C( 1650458624), -INT32_C( 667418624), INT32_C( 666370048) }, { -INT32_C( 1879048192), -INT32_C( 805306368), INT32_C( 469762048), -INT32_C( 603979776) }, { INT32_C( 0), INT32_C( 0), INT32_MIN, INT32_MIN } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t r6 = simde_vshlq_n_s32(a, 6); simde_int32x4_t r13 = simde_vshlq_n_s32(a, 13); simde_int32x4_t r19 = simde_vshlq_n_s32(a, 19); simde_int32x4_t r26 = simde_vshlq_n_s32(a, 26); simde_int32x4_t r31 = simde_vshlq_n_s32(a, 31); simde_test_arm_neon_assert_equal_i32x4(r6, simde_vld1q_s32(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i32x4(r13, simde_vld1q_s32(test_vec[i].r13)); simde_test_arm_neon_assert_equal_i32x4(r19, simde_vld1q_s32(test_vec[i].r19)); simde_test_arm_neon_assert_equal_i32x4(r26, simde_vld1q_s32(test_vec[i].r26)); simde_test_arm_neon_assert_equal_i32x4(r31, simde_vld1q_s32(test_vec[i].r31)); } return 0; } static int test_simde_vshlq_n_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; int64_t r13[2]; int64_t r26[2]; int64_t r39[2]; int64_t r52[2]; int64_t r63[2]; } test_vec[] = { { { -INT64_C( 3916933561112073211), -INT64_C( 4395184176717254386) }, { -INT64_C( 8631788449193484288), INT64_C( 2695656213296824320) }, { -INT64_C( 5240941264311943168), INT64_C( 2063043097251545088) }, { -INT64_C( 8217377721311821824), INT64_C( 3231481166708080640) }, { -INT64_C( 4589168020290535424), INT64_C( 1215971899390033920) }, { INT64_MIN, INT64_C( 0) } }, { { -INT64_C( 5027807624829575335), INT64_C( 4868113288224494141) }, { INT64_C( 3779453989547614208), -INT64_C( 2276630224994590720) }, { INT64_C( 7650526689427980288), -INT64_C( 496544635330494464) }, { -INT64_C( 8921722671041871872), INT64_C( 9036787662400258048) }, { -INT64_C( 752101137770872832), INT64_C( 2580562586483294208) }, { INT64_MIN, INT64_MIN } }, { { -INT64_C( 475489388565583121), INT64_C( 4860549388740706153) }, { -INT64_C( 2946071576541536256), -INT64_C( 8899862575057133568) }, { -INT64_C( 5877106616171495424), -INT64_C( 6141635567890202624) }, { INT64_C( 744632705039204352), -INT64_C( 8007483150592638976) }, { -INT64_C( 5841168716699533312), -INT64_C( 680043543732944896) }, { INT64_MIN, INT64_MIN } }, { { INT64_C( 6761600824558648450), -INT64_C( 4297785532867161638) }, { -INT64_C( 4538498565335400448), INT64_C( 7375351463745896448) }, { -INT64_C( 9190938702853963776), INT64_C( 5792349607602159616) }, { INT64_C( 7439455102718443520), INT64_C( 5902227895924817920) }, { -INT64_C( 4026218066869223424), INT64_C( 2134706223373615104) }, { INT64_C( 0), INT64_C( 0) } }, { { -INT64_C( 7943850393553834020), -INT64_C( 8668384723283549563) }, { INT64_C( 4090668054289809408), INT64_C( 8557030642935701504) }, { -INT64_C( 6981281188136615936), INT64_C( 1567546832970579968) }, { -INT64_C( 5748864715547738112), INT64_C( 2409780393143173120) }, { -INT64_C( 162129586585337856), INT64_C( 2904821759653969920) }, { INT64_C( 0), INT64_MIN } }, { { INT64_C( 7255433734221234739), INT64_C( 7054000982656373579) }, { INT64_C( 1103745248179675136), -INT64_C( 7273133011012853760) }, { INT64_C( 2976476970218422272), INT64_C( 1477731864553717760) }, { -INT64_C( 3296325414711984128), INT64_C( 4515322070590029824) }, { INT64_C( 2535526590209589248), INT64_C( 3796534485873328128) }, { INT64_MIN, INT64_MIN } }, { { -INT64_C( 8070194296929106872), INT64_C( 3195784395642389196) }, { INT64_C( 2099079731789496320), INT64_C( 3935928508598550528) }, { INT64_C( 3295686122251747328), -INT64_C( 1782298404970299392) }, { -INT64_C( 7772610424469454848), -INT64_C( 9213971212437291008) }, { INT64_C( 4935945191598063616), INT64_C( 3224577333197275136) }, { INT64_C( 0), INT64_C( 0) } }, { { -INT64_C( 4385327945774793418), -INT64_C( 4438078105841327742) }, { -INT64_C( 8795820274610683904), INT64_C( 1796726229369372672) }, { -INT64_C( 2377337701213929472), -INT64_C( 1720499826321260544) }, { INT64_C( 4611293492776271872), -INT64_C( 1022104909668941824) }, { -INT64_C( 3215570133942534144), INT64_C( 1738389456165011456) }, { INT64_C( 0), INT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t r13 = simde_vshlq_n_s64(a, 13); simde_int64x2_t r26 = simde_vshlq_n_s64(a, 26); simde_int64x2_t r39 = simde_vshlq_n_s64(a, 39); simde_int64x2_t r52 = simde_vshlq_n_s64(a, 52); simde_int64x2_t r63 = simde_vshlq_n_s64(a, 63); simde_test_arm_neon_assert_equal_i64x2(r13, simde_vld1q_s64(test_vec[i].r13)); simde_test_arm_neon_assert_equal_i64x2(r26, simde_vld1q_s64(test_vec[i].r26)); simde_test_arm_neon_assert_equal_i64x2(r39, simde_vld1q_s64(test_vec[i].r39)); simde_test_arm_neon_assert_equal_i64x2(r52, simde_vld1q_s64(test_vec[i].r52)); simde_test_arm_neon_assert_equal_i64x2(r63, simde_vld1q_s64(test_vec[i].r63)); } return 0; } static int test_simde_vshlq_n_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t r1[16]; uint8_t r3[16]; uint8_t r5[16]; uint8_t r6[16]; uint8_t r7[16]; } test_vec[] = { { { UINT8_C( 23), UINT8_C( 50), UINT8_C(143), UINT8_C(105), UINT8_C(115), UINT8_C( 68), UINT8_C(175), UINT8_C( 96), UINT8_C(181), UINT8_C(142), UINT8_C(239), UINT8_C(220), UINT8_C( 83), UINT8_C(110), UINT8_C(136), UINT8_C( 92) }, { UINT8_C( 46), UINT8_C(100), UINT8_C( 30), UINT8_C(210), UINT8_C(230), UINT8_C(136), UINT8_C( 94), UINT8_C(192), UINT8_C(106), UINT8_C( 28), UINT8_C(222), UINT8_C(184), UINT8_C(166), UINT8_C(220), UINT8_C( 16), UINT8_C(184) }, { UINT8_C(184), UINT8_C(144), UINT8_C(120), UINT8_C( 72), UINT8_C(152), UINT8_C( 32), UINT8_C(120), UINT8_C( 0), UINT8_C(168), UINT8_C(112), UINT8_C(120), UINT8_C(224), UINT8_C(152), UINT8_C(112), UINT8_C( 64), UINT8_C(224) }, { UINT8_C(224), UINT8_C( 64), UINT8_C(224), UINT8_C( 32), UINT8_C( 96), UINT8_C(128), UINT8_C(224), UINT8_C( 0), UINT8_C(160), UINT8_C(192), UINT8_C(224), UINT8_C(128), UINT8_C( 96), UINT8_C(192), UINT8_C( 0), UINT8_C(128) }, { UINT8_C(192), UINT8_C(128), UINT8_C(192), UINT8_C( 64), UINT8_C(192), UINT8_C( 0), UINT8_C(192), UINT8_C( 0), UINT8_C( 64), UINT8_C(128), UINT8_C(192), UINT8_C( 0), UINT8_C(192), UINT8_C(128), UINT8_C( 0), UINT8_C( 0) }, { UINT8_C(128), UINT8_C( 0), UINT8_C(128), UINT8_C(128), UINT8_C(128), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 21), UINT8_C( 4), UINT8_C( 13), UINT8_C(229), UINT8_C(186), UINT8_C( 69), UINT8_C(150), UINT8_C(162), UINT8_C(189), UINT8_C(194), UINT8_C(194), UINT8_C( 36), UINT8_C(186), UINT8_C( 97), UINT8_C(180), UINT8_C(210) }, { UINT8_C( 42), UINT8_C( 8), UINT8_C( 26), UINT8_C(202), UINT8_C(116), UINT8_C(138), UINT8_C( 44), UINT8_C( 68), UINT8_C(122), UINT8_C(132), UINT8_C(132), UINT8_C( 72), UINT8_C(116), UINT8_C(194), UINT8_C(104), UINT8_C(164) }, { UINT8_C(168), UINT8_C( 32), UINT8_C(104), UINT8_C( 40), UINT8_C(208), UINT8_C( 40), UINT8_C(176), UINT8_C( 16), UINT8_C(232), UINT8_C( 16), UINT8_C( 16), UINT8_C( 32), UINT8_C(208), UINT8_C( 8), UINT8_C(160), UINT8_C(144) }, { UINT8_C(160), UINT8_C(128), UINT8_C(160), UINT8_C(160), UINT8_C( 64), UINT8_C(160), UINT8_C(192), UINT8_C( 64), UINT8_C(160), UINT8_C( 64), UINT8_C( 64), UINT8_C(128), UINT8_C( 64), UINT8_C( 32), UINT8_C(128), UINT8_C( 64) }, { UINT8_C( 64), UINT8_C( 0), UINT8_C( 64), UINT8_C( 64), UINT8_C(128), UINT8_C( 64), UINT8_C(128), UINT8_C(128), UINT8_C( 64), UINT8_C(128), UINT8_C(128), UINT8_C( 0), UINT8_C(128), UINT8_C( 64), UINT8_C( 0), UINT8_C(128) }, { UINT8_C(128), UINT8_C( 0), UINT8_C(128), UINT8_C(128), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(147), UINT8_C( 67), UINT8_C( 59), UINT8_C( 7), UINT8_C(135), UINT8_C(234), UINT8_C(103), UINT8_C( 61), UINT8_C(120), UINT8_C( 86), UINT8_C( 25), UINT8_C(203), UINT8_C(196), UINT8_C(161), UINT8_C( 39), UINT8_C(217) }, { UINT8_C( 38), UINT8_C(134), UINT8_C(118), UINT8_C( 14), UINT8_C( 14), UINT8_C(212), UINT8_C(206), UINT8_C(122), UINT8_C(240), UINT8_C(172), UINT8_C( 50), UINT8_C(150), UINT8_C(136), UINT8_C( 66), UINT8_C( 78), UINT8_C(178) }, { UINT8_C(152), UINT8_C( 24), UINT8_C(216), UINT8_C( 56), UINT8_C( 56), UINT8_C( 80), UINT8_C( 56), UINT8_C(232), UINT8_C(192), UINT8_C(176), UINT8_C(200), UINT8_C( 88), UINT8_C( 32), UINT8_C( 8), UINT8_C( 56), UINT8_C(200) }, { UINT8_C( 96), UINT8_C( 96), UINT8_C( 96), UINT8_C(224), UINT8_C(224), UINT8_C( 64), UINT8_C(224), UINT8_C(160), UINT8_C( 0), UINT8_C(192), UINT8_C( 32), UINT8_C( 96), UINT8_C(128), UINT8_C( 32), UINT8_C(224), UINT8_C( 32) }, { UINT8_C(192), UINT8_C(192), UINT8_C(192), UINT8_C(192), UINT8_C(192), UINT8_C(128), UINT8_C(192), UINT8_C( 64), UINT8_C( 0), UINT8_C(128), UINT8_C( 64), UINT8_C(192), UINT8_C( 0), UINT8_C( 64), UINT8_C(192), UINT8_C( 64) }, { UINT8_C(128), UINT8_C(128), UINT8_C(128), UINT8_C(128), UINT8_C(128), UINT8_C( 0), UINT8_C(128), UINT8_C(128), UINT8_C( 0), UINT8_C( 0), UINT8_C(128), UINT8_C(128), UINT8_C( 0), UINT8_C(128), UINT8_C(128), UINT8_C(128) } }, { { UINT8_C(165), UINT8_C( 53), UINT8_C(190), UINT8_C( 95), UINT8_C(122), UINT8_C( 85), UINT8_C( 1), UINT8_C( 56), UINT8_C( 23), UINT8_C(196), UINT8_C( 92), UINT8_C(209), UINT8_C( 37), UINT8_C( 16), UINT8_C(163), UINT8_C(185) }, { UINT8_C( 74), UINT8_C(106), UINT8_C(124), UINT8_C(190), UINT8_C(244), UINT8_C(170), UINT8_C( 2), UINT8_C(112), UINT8_C( 46), UINT8_C(136), UINT8_C(184), UINT8_C(162), UINT8_C( 74), UINT8_C( 32), UINT8_C( 70), UINT8_C(114) }, { UINT8_C( 40), UINT8_C(168), UINT8_C(240), UINT8_C(248), UINT8_C(208), UINT8_C(168), UINT8_C( 8), UINT8_C(192), UINT8_C(184), UINT8_C( 32), UINT8_C(224), UINT8_C(136), UINT8_C( 40), UINT8_C(128), UINT8_C( 24), UINT8_C(200) }, { UINT8_C(160), UINT8_C(160), UINT8_C(192), UINT8_C(224), UINT8_C( 64), UINT8_C(160), UINT8_C( 32), UINT8_C( 0), UINT8_C(224), UINT8_C(128), UINT8_C(128), UINT8_C( 32), UINT8_C(160), UINT8_C( 0), UINT8_C( 96), UINT8_C( 32) }, { UINT8_C( 64), UINT8_C( 64), UINT8_C(128), UINT8_C(192), UINT8_C(128), UINT8_C( 64), UINT8_C( 64), UINT8_C( 0), UINT8_C(192), UINT8_C( 0), UINT8_C( 0), UINT8_C( 64), UINT8_C( 64), UINT8_C( 0), UINT8_C(192), UINT8_C( 64) }, { UINT8_C(128), UINT8_C(128), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C(128), UINT8_C(128), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C( 0), UINT8_C(128), UINT8_C(128), UINT8_C( 0), UINT8_C(128), UINT8_C(128) } }, { { UINT8_C( 84), UINT8_C(222), UINT8_C(192), UINT8_C(219), UINT8_C(200), UINT8_C( 39), UINT8_C( 24), UINT8_C( 65), UINT8_C(126), UINT8_C( 49), UINT8_C( 12), UINT8_C( 66), UINT8_C(210), UINT8_C( 52), UINT8_C( 28), UINT8_C(119) }, { UINT8_C(168), UINT8_C(188), UINT8_C(128), UINT8_C(182), UINT8_C(144), UINT8_C( 78), UINT8_C( 48), UINT8_C(130), UINT8_C(252), UINT8_C( 98), UINT8_C( 24), UINT8_C(132), UINT8_C(164), UINT8_C(104), UINT8_C( 56), UINT8_C(238) }, { UINT8_C(160), UINT8_C(240), UINT8_C( 0), UINT8_C(216), UINT8_C( 64), UINT8_C( 56), UINT8_C(192), UINT8_C( 8), UINT8_C(240), UINT8_C(136), UINT8_C( 96), UINT8_C( 16), UINT8_C(144), UINT8_C(160), UINT8_C(224), UINT8_C(184) }, { UINT8_C(128), UINT8_C(192), UINT8_C( 0), UINT8_C( 96), UINT8_C( 0), UINT8_C(224), UINT8_C( 0), UINT8_C( 32), UINT8_C(192), UINT8_C( 32), UINT8_C(128), UINT8_C( 64), UINT8_C( 64), UINT8_C(128), UINT8_C(128), UINT8_C(224) }, { UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C(192), UINT8_C( 0), UINT8_C(192), UINT8_C( 0), UINT8_C( 64), UINT8_C(128), UINT8_C( 64), UINT8_C( 0), UINT8_C(128), UINT8_C(128), UINT8_C( 0), UINT8_C( 0), UINT8_C(192) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(128) } }, { { UINT8_C(105), UINT8_C(218), UINT8_C(214), UINT8_C(227), UINT8_C( 47), UINT8_C(216), UINT8_C( 27), UINT8_C( 70), UINT8_C(156), UINT8_C(120), UINT8_C( 24), UINT8_C(193), UINT8_C(136), UINT8_C(187), UINT8_C(122), UINT8_C(220) }, { UINT8_C(210), UINT8_C(180), UINT8_C(172), UINT8_C(198), UINT8_C( 94), UINT8_C(176), UINT8_C( 54), UINT8_C(140), UINT8_C( 56), UINT8_C(240), UINT8_C( 48), UINT8_C(130), UINT8_C( 16), UINT8_C(118), UINT8_C(244), UINT8_C(184) }, { UINT8_C( 72), UINT8_C(208), UINT8_C(176), UINT8_C( 24), UINT8_C(120), UINT8_C(192), UINT8_C(216), UINT8_C( 48), UINT8_C(224), UINT8_C(192), UINT8_C(192), UINT8_C( 8), UINT8_C( 64), UINT8_C(216), UINT8_C(208), UINT8_C(224) }, { UINT8_C( 32), UINT8_C( 64), UINT8_C(192), UINT8_C( 96), UINT8_C(224), UINT8_C( 0), UINT8_C( 96), UINT8_C(192), UINT8_C(128), UINT8_C( 0), UINT8_C( 0), UINT8_C( 32), UINT8_C( 0), UINT8_C( 96), UINT8_C( 64), UINT8_C(128) }, { UINT8_C( 64), UINT8_C(128), UINT8_C(128), UINT8_C(192), UINT8_C(192), UINT8_C( 0), UINT8_C(192), UINT8_C(128), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 64), UINT8_C( 0), UINT8_C(192), UINT8_C(128), UINT8_C( 0) }, { UINT8_C(128), UINT8_C( 0), UINT8_C( 0), UINT8_C(128), UINT8_C(128), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(154), UINT8_C( 58), UINT8_C(184), UINT8_C( 98), UINT8_C( 98), UINT8_C(208), UINT8_C(163), UINT8_C(224), UINT8_C( 2), UINT8_C(176), UINT8_C( 34), UINT8_C(212), UINT8_C(228), UINT8_C( 62), UINT8_C( 76), UINT8_C( 77) }, { UINT8_C( 52), UINT8_C(116), UINT8_C(112), UINT8_C(196), UINT8_C(196), UINT8_C(160), UINT8_C( 70), UINT8_C(192), UINT8_C( 4), UINT8_C( 96), UINT8_C( 68), UINT8_C(168), UINT8_C(200), UINT8_C(124), UINT8_C(152), UINT8_C(154) }, { UINT8_C(208), UINT8_C(208), UINT8_C(192), UINT8_C( 16), UINT8_C( 16), UINT8_C(128), UINT8_C( 24), UINT8_C( 0), UINT8_C( 16), UINT8_C(128), UINT8_C( 16), UINT8_C(160), UINT8_C( 32), UINT8_C(240), UINT8_C( 96), UINT8_C(104) }, { UINT8_C( 64), UINT8_C( 64), UINT8_C( 0), UINT8_C( 64), UINT8_C( 64), UINT8_C( 0), UINT8_C( 96), UINT8_C( 0), UINT8_C( 64), UINT8_C( 0), UINT8_C( 64), UINT8_C(128), UINT8_C(128), UINT8_C(192), UINT8_C(128), UINT8_C(160) }, { UINT8_C(128), UINT8_C(128), UINT8_C( 0), UINT8_C(128), UINT8_C(128), UINT8_C( 0), UINT8_C(192), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C( 64) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(128) } }, { { UINT8_C( 25), UINT8_C( 34), UINT8_C( 48), UINT8_C( 72), UINT8_C(250), UINT8_C( 76), UINT8_C(143), UINT8_C(150), UINT8_C(196), UINT8_C(167), UINT8_C( 88), UINT8_C( 76), UINT8_C( 98), UINT8_C(210), UINT8_C( 41), UINT8_C(252) }, { UINT8_C( 50), UINT8_C( 68), UINT8_C( 96), UINT8_C(144), UINT8_C(244), UINT8_C(152), UINT8_C( 30), UINT8_C( 44), UINT8_C(136), UINT8_C( 78), UINT8_C(176), UINT8_C(152), UINT8_C(196), UINT8_C(164), UINT8_C( 82), UINT8_C(248) }, { UINT8_C(200), UINT8_C( 16), UINT8_C(128), UINT8_C( 64), UINT8_C(208), UINT8_C( 96), UINT8_C(120), UINT8_C(176), UINT8_C( 32), UINT8_C( 56), UINT8_C(192), UINT8_C( 96), UINT8_C( 16), UINT8_C(144), UINT8_C( 72), UINT8_C(224) }, { UINT8_C( 32), UINT8_C( 64), UINT8_C( 0), UINT8_C( 0), UINT8_C( 64), UINT8_C(128), UINT8_C(224), UINT8_C(192), UINT8_C(128), UINT8_C(224), UINT8_C( 0), UINT8_C(128), UINT8_C( 64), UINT8_C( 64), UINT8_C( 32), UINT8_C(128) }, { UINT8_C( 64), UINT8_C(128), UINT8_C( 0), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C(192), UINT8_C(128), UINT8_C( 0), UINT8_C(192), UINT8_C( 0), UINT8_C( 0), UINT8_C(128), UINT8_C(128), UINT8_C( 64), UINT8_C( 0) }, { UINT8_C(128), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(128), UINT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t r1 = simde_vshlq_n_u8(a, 1); simde_uint8x16_t r3 = simde_vshlq_n_u8(a, 3); simde_uint8x16_t r5 = simde_vshlq_n_u8(a, 5); simde_uint8x16_t r6 = simde_vshlq_n_u8(a, 6); simde_uint8x16_t r7 = simde_vshlq_n_u8(a, 7); simde_test_arm_neon_assert_equal_u8x16(r1, simde_vld1q_u8(test_vec[i].r1)); simde_test_arm_neon_assert_equal_u8x16(r3, simde_vld1q_u8(test_vec[i].r3)); simde_test_arm_neon_assert_equal_u8x16(r5, simde_vld1q_u8(test_vec[i].r5)); simde_test_arm_neon_assert_equal_u8x16(r6, simde_vld1q_u8(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u8x16(r7, simde_vld1q_u8(test_vec[i].r7)); } return 0; } static int test_simde_vshlq_n_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint16_t r3[8]; uint16_t r6[8]; uint16_t r10[8]; uint16_t r13[8]; uint16_t r15[8]; } test_vec[] = { { { UINT16_C(26792), UINT16_C(21973), UINT16_C(45271), UINT16_C( 606), UINT16_C(50076), UINT16_C( 1066), UINT16_C(38310), UINT16_C(50221) }, { UINT16_C(17728), UINT16_C(44712), UINT16_C(34488), UINT16_C( 4848), UINT16_C( 7392), UINT16_C( 8528), UINT16_C(44336), UINT16_C( 8552) }, { UINT16_C(10752), UINT16_C(30016), UINT16_C(13760), UINT16_C(38784), UINT16_C(59136), UINT16_C( 2688), UINT16_C(27008), UINT16_C( 2880) }, { UINT16_C(40960), UINT16_C(21504), UINT16_C(23552), UINT16_C(30720), UINT16_C(28672), UINT16_C(43008), UINT16_C(38912), UINT16_C(46080) }, { UINT16_C( 0), UINT16_C(40960), UINT16_C(57344), UINT16_C(49152), UINT16_C(32768), UINT16_C(16384), UINT16_C(49152), UINT16_C(40960) }, { UINT16_C( 0), UINT16_C(32768), UINT16_C(32768), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(32768) } }, { { UINT16_C(31245), UINT16_C(30511), UINT16_C(48494), UINT16_C(60175), UINT16_C(39412), UINT16_C(16395), UINT16_C(18323), UINT16_C(15357) }, { UINT16_C(53352), UINT16_C(47480), UINT16_C(60272), UINT16_C(22648), UINT16_C(53152), UINT16_C( 88), UINT16_C(15512), UINT16_C(57320) }, { UINT16_C(33600), UINT16_C(52160), UINT16_C(23424), UINT16_C(50112), UINT16_C(32000), UINT16_C( 704), UINT16_C(58560), UINT16_C(65344) }, { UINT16_C(13312), UINT16_C(48128), UINT16_C(47104), UINT16_C(15360), UINT16_C(53248), UINT16_C(11264), UINT16_C(19456), UINT16_C(62464) }, { UINT16_C(40960), UINT16_C(57344), UINT16_C(49152), UINT16_C(57344), UINT16_C(32768), UINT16_C(24576), UINT16_C(24576), UINT16_C(40960) }, { UINT16_C(32768), UINT16_C(32768), UINT16_C( 0), UINT16_C(32768), UINT16_C( 0), UINT16_C(32768), UINT16_C(32768), UINT16_C(32768) } }, { { UINT16_C(53935), UINT16_C(34449), UINT16_C(61314), UINT16_C( 7817), UINT16_C(46003), UINT16_C(22818), UINT16_C(20552), UINT16_C(22046) }, { UINT16_C(38264), UINT16_C(13448), UINT16_C(31760), UINT16_C(62536), UINT16_C(40344), UINT16_C(51472), UINT16_C(33344), UINT16_C(45296) }, { UINT16_C(43968), UINT16_C(42048), UINT16_C(57472), UINT16_C(41536), UINT16_C(60608), UINT16_C(18560), UINT16_C( 4608), UINT16_C(34688) }, { UINT16_C(48128), UINT16_C(17408), UINT16_C( 2048), UINT16_C( 9216), UINT16_C(52224), UINT16_C(34816), UINT16_C( 8192), UINT16_C(30720) }, { UINT16_C(57344), UINT16_C( 8192), UINT16_C(16384), UINT16_C( 8192), UINT16_C(24576), UINT16_C(16384), UINT16_C( 0), UINT16_C(49152) }, { UINT16_C(32768), UINT16_C(32768), UINT16_C( 0), UINT16_C(32768), UINT16_C(32768), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(19914), UINT16_C(14541), UINT16_C(56587), UINT16_C(65315), UINT16_C(11894), UINT16_C( 2368), UINT16_C(15733), UINT16_C( 9541) }, { UINT16_C(28240), UINT16_C(50792), UINT16_C(59480), UINT16_C(63768), UINT16_C(29616), UINT16_C(18944), UINT16_C(60328), UINT16_C(10792) }, { UINT16_C(29312), UINT16_C(13120), UINT16_C(17088), UINT16_C(51392), UINT16_C(40320), UINT16_C(20480), UINT16_C(23872), UINT16_C(20800) }, { UINT16_C(10240), UINT16_C(13312), UINT16_C(11264), UINT16_C(35840), UINT16_C(55296), UINT16_C( 0), UINT16_C(54272), UINT16_C( 5120) }, { UINT16_C(16384), UINT16_C(40960), UINT16_C(24576), UINT16_C(24576), UINT16_C(49152), UINT16_C( 0), UINT16_C(40960), UINT16_C(40960) }, { UINT16_C( 0), UINT16_C(32768), UINT16_C(32768), UINT16_C(32768), UINT16_C( 0), UINT16_C( 0), UINT16_C(32768), UINT16_C(32768) } }, { { UINT16_C(54799), UINT16_C(37291), UINT16_C(13509), UINT16_C(30896), UINT16_C(53991), UINT16_C(12498), UINT16_C(61474), UINT16_C(60806) }, { UINT16_C(45176), UINT16_C(36184), UINT16_C(42536), UINT16_C(50560), UINT16_C(38712), UINT16_C(34448), UINT16_C(33040), UINT16_C(27696) }, { UINT16_C(33728), UINT16_C(27328), UINT16_C(12608), UINT16_C(11264), UINT16_C(47552), UINT16_C(13440), UINT16_C( 2176), UINT16_C(24960) }, { UINT16_C(15360), UINT16_C(44032), UINT16_C( 5120), UINT16_C(49152), UINT16_C(39936), UINT16_C(18432), UINT16_C(34816), UINT16_C( 6144) }, { UINT16_C(57344), UINT16_C(24576), UINT16_C(40960), UINT16_C( 0), UINT16_C(57344), UINT16_C(16384), UINT16_C(16384), UINT16_C(49152) }, { UINT16_C(32768), UINT16_C(32768), UINT16_C(32768), UINT16_C( 0), UINT16_C(32768), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(21309), UINT16_C(18469), UINT16_C(18736), UINT16_C(42824), UINT16_C(34935), UINT16_C(60848), UINT16_C(62917), UINT16_C(54290) }, { UINT16_C(39400), UINT16_C(16680), UINT16_C(18816), UINT16_C(14912), UINT16_C(17336), UINT16_C(28032), UINT16_C(44584), UINT16_C(41104) }, { UINT16_C(53056), UINT16_C( 2368), UINT16_C(19456), UINT16_C(53760), UINT16_C( 7616), UINT16_C(27648), UINT16_C(28992), UINT16_C( 1152) }, { UINT16_C(62464), UINT16_C(37888), UINT16_C(49152), UINT16_C( 8192), UINT16_C(56320), UINT16_C(49152), UINT16_C( 5120), UINT16_C(18432) }, { UINT16_C(40960), UINT16_C(40960), UINT16_C( 0), UINT16_C( 0), UINT16_C(57344), UINT16_C( 0), UINT16_C(40960), UINT16_C(16384) }, { UINT16_C(32768), UINT16_C(32768), UINT16_C( 0), UINT16_C( 0), UINT16_C(32768), UINT16_C( 0), UINT16_C(32768), UINT16_C( 0) } }, { { UINT16_C(48587), UINT16_C(37221), UINT16_C( 5618), UINT16_C(55561), UINT16_C(56296), UINT16_C( 2569), UINT16_C(36811), UINT16_C( 2551) }, { UINT16_C(61016), UINT16_C(35624), UINT16_C(44944), UINT16_C(51272), UINT16_C(57152), UINT16_C(20552), UINT16_C(32344), UINT16_C(20408) }, { UINT16_C(29376), UINT16_C(22848), UINT16_C(31872), UINT16_C(16960), UINT16_C(64000), UINT16_C(33344), UINT16_C(62144), UINT16_C(32192) }, { UINT16_C(11264), UINT16_C(37888), UINT16_C(51200), UINT16_C( 9216), UINT16_C(40960), UINT16_C( 9216), UINT16_C(11264), UINT16_C(56320) }, { UINT16_C(24576), UINT16_C(40960), UINT16_C(16384), UINT16_C( 8192), UINT16_C( 0), UINT16_C( 8192), UINT16_C(24576), UINT16_C(57344) }, { UINT16_C(32768), UINT16_C(32768), UINT16_C( 0), UINT16_C(32768), UINT16_C( 0), UINT16_C(32768), UINT16_C(32768), UINT16_C(32768) } }, { { UINT16_C( 7651), UINT16_C( 4945), UINT16_C(39270), UINT16_C(56762), UINT16_C(27425), UINT16_C(59082), UINT16_C(56416), UINT16_C(11450) }, { UINT16_C(61208), UINT16_C(39560), UINT16_C(52016), UINT16_C(60880), UINT16_C(22792), UINT16_C(13904), UINT16_C(58112), UINT16_C(26064) }, { UINT16_C(30912), UINT16_C(54336), UINT16_C(22912), UINT16_C(28288), UINT16_C(51264), UINT16_C(45696), UINT16_C( 6144), UINT16_C(11904) }, { UINT16_C(35840), UINT16_C(17408), UINT16_C(38912), UINT16_C(59392), UINT16_C(33792), UINT16_C(10240), UINT16_C(32768), UINT16_C(59392) }, { UINT16_C(24576), UINT16_C( 8192), UINT16_C(49152), UINT16_C(16384), UINT16_C( 8192), UINT16_C(16384), UINT16_C( 0), UINT16_C(16384) }, { UINT16_C(32768), UINT16_C(32768), UINT16_C( 0), UINT16_C( 0), UINT16_C(32768), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t r3 = simde_vshlq_n_u16(a, 3); simde_uint16x8_t r6 = simde_vshlq_n_u16(a, 6); simde_uint16x8_t r10 = simde_vshlq_n_u16(a, 10); simde_uint16x8_t r13 = simde_vshlq_n_u16(a, 13); simde_uint16x8_t r15 = simde_vshlq_n_u16(a, 15); simde_test_arm_neon_assert_equal_u16x8(r3, simde_vld1q_u16(test_vec[i].r3)); simde_test_arm_neon_assert_equal_u16x8(r6, simde_vld1q_u16(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u16x8(r10, simde_vld1q_u16(test_vec[i].r10)); simde_test_arm_neon_assert_equal_u16x8(r13, simde_vld1q_u16(test_vec[i].r13)); simde_test_arm_neon_assert_equal_u16x8(r15, simde_vld1q_u16(test_vec[i].r15)); } return 0; } static int test_simde_vshlq_n_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint32_t r6[4]; uint32_t r13[4]; uint32_t r19[4]; uint32_t r26[4]; uint32_t r31[4]; } test_vec[] = { { { UINT32_C( 354535181), UINT32_C( 492657404), UINT32_C(2730110454), UINT32_C( 949699748) }, { UINT32_C(1215415104), UINT32_C(1465302784), UINT32_C(2928377216), UINT32_C( 651241728) }, { UINT32_C( 954310656), UINT32_C(2875162624), UINT32_C(1170128896), UINT32_C(1754562560) }, { UINT32_C( 946339840), UINT32_C(3621781504), UINT32_C(1873805312), UINT32_C( 622854144) }, { UINT32_C( 872415232), UINT32_C(4026531840), UINT32_C(3623878656), UINT32_C(2415919104) }, { UINT32_C(2147483648), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C( 408290964), UINT32_C( 736719627), UINT32_C(1309181443), UINT32_C(3275223222) }, { UINT32_C( 360817920), UINT32_C(4200383168), UINT32_C(2183233728), UINT32_C(3455856000) }, { UINT32_C(3235020800), UINT32_C( 778133504), UINT32_C( 281042944), UINT32_C(4262903808) }, { UINT32_C( 882900992), UINT32_C(2555904000), UINT32_C( 806879232), UINT32_C(2242904064) }, { UINT32_C(1342177280), UINT32_C( 738197504), UINT32_C( 201326592), UINT32_C(3623878656) }, { UINT32_C( 0), UINT32_C(2147483648), UINT32_C(2147483648), UINT32_C( 0) } }, { { UINT32_C(3034142904), UINT32_C(2832283314), UINT32_C( 139103075), UINT32_C(1681974992) }, { UINT32_C( 911617536), UINT32_C( 877505664), UINT32_C( 312662208), UINT32_C( 272217088) }, { UINT32_C( 722927616), UINT32_C( 651575296), UINT32_C(1366056960), UINT32_C( 484048896) }, { UINT32_C(3317694464), UINT32_C(3046113280), UINT32_C(1528299520), UINT32_C( 914358272) }, { UINT32_C(3758096384), UINT32_C(3355443200), UINT32_C(2348810240), UINT32_C(1073741824) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C(2147483648), UINT32_C( 0) } }, { { UINT32_C(4168980204), UINT32_C( 220423690), UINT32_C(2723884012), UINT32_C(3563426332) }, { UINT32_C( 526760704), UINT32_C(1222214272), UINT32_C(2529884928), UINT32_C( 426018560) }, { UINT32_C(3000860672), UINT32_C(1824604160), UINT32_C(1702723584), UINT32_C(2990768128) }, { UINT32_C(3076521984), UINT32_C( 810549248), UINT32_C(1600126976), UINT32_C(2430599168) }, { UINT32_C(2952790016), UINT32_C( 671088640), UINT32_C(2952790016), UINT32_C(1879048192) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(2642951915), UINT32_C(3628489077), UINT32_C(3051393253), UINT32_C(1662591094) }, { UINT32_C(1645198016), UINT32_C( 295066944), UINT32_C(2015639872), UINT32_C(3326614912) }, { UINT32_C( 131948544), UINT32_C(3408830464), UINT32_C( 303865856), UINT32_C( 604946432) }, { UINT32_C(4149739520), UINT32_C(3416784896), UINT32_C(2267545600), UINT32_C( 61865984) }, { UINT32_C(2885681152), UINT32_C(3556769792), UINT32_C(2483027968), UINT32_C(3623878656) }, { UINT32_C(2147483648), UINT32_C(2147483648), UINT32_C(2147483648), UINT32_C( 0) } }, { { UINT32_C(3244005047), UINT32_C(3905847036), UINT32_C(3314166185), UINT32_C(2795106491) }, { UINT32_C(1457892800), UINT32_C( 866107136), UINT32_C(1653238336), UINT32_C(2793156288) }, { UINT32_C(1926684672), UINT32_C(3487531008), UINT32_C(1161109504), UINT32_C(1041719296) }, { UINT32_C(3048734720), UINT32_C(4158652416), UINT32_C(1296564224), UINT32_C(2245525504) }, { UINT32_C(3690987520), UINT32_C(4026531840), UINT32_C(2751463424), UINT32_C(3959422976) }, { UINT32_C(2147483648), UINT32_C( 0), UINT32_C(2147483648), UINT32_C(2147483648) } }, { { UINT32_C(2739151150), UINT32_C(1618774651), UINT32_C(2434096154), UINT32_C( 888417917) }, { UINT32_C(3506981760), UINT32_C( 522362560), UINT32_C(1163331200), UINT32_C(1024171840) }, { UINT32_C(2217066496), UINT32_C(2437898240), UINT32_C(2877505536), UINT32_C(2244976640) }, { UINT32_C( 158334976), UINT32_C(1406664704), UINT32_C(3771727872), UINT32_C(1944584192) }, { UINT32_C(3087007744), UINT32_C(3959422976), UINT32_C(1744830464), UINT32_C(4093640704) }, { UINT32_C( 0), UINT32_C(2147483648), UINT32_C( 0), UINT32_C(2147483648) } }, { { UINT32_C(3254079429), UINT32_C(1990902733), UINT32_C(2805740780), UINT32_C(1397675300) }, { UINT32_C(2102653248), UINT32_C(2863723328), UINT32_C(3473750784), UINT32_C(3551873280) }, { UINT32_C(2851643392), UINT32_C(1484365824), UINT32_C(2258468864), UINT32_C(3668213760) }, { UINT32_C(2116550656), UINT32_C( 510132224), UINT32_C(2808086528), UINT32_C(2837446656) }, { UINT32_C( 335544320), UINT32_C( 872415232), UINT32_C(2952790016), UINT32_C(2415919104) }, { UINT32_C(2147483648), UINT32_C(2147483648), UINT32_C( 0), UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t r6 = simde_vshlq_n_u32(a, 6); simde_uint32x4_t r13 = simde_vshlq_n_u32(a, 13); simde_uint32x4_t r19 = simde_vshlq_n_u32(a, 19); simde_uint32x4_t r26 = simde_vshlq_n_u32(a, 26); simde_uint32x4_t r31 = simde_vshlq_n_u32(a, 31); simde_test_arm_neon_assert_equal_u32x4(r6, simde_vld1q_u32(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u32x4(r13, simde_vld1q_u32(test_vec[i].r13)); simde_test_arm_neon_assert_equal_u32x4(r19, simde_vld1q_u32(test_vec[i].r19)); simde_test_arm_neon_assert_equal_u32x4(r26, simde_vld1q_u32(test_vec[i].r26)); simde_test_arm_neon_assert_equal_u32x4(r31, simde_vld1q_u32(test_vec[i].r31)); } return 0; } static int test_simde_vshlq_n_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; uint64_t r13[2]; uint64_t r26[2]; uint64_t r39[2]; uint64_t r52[2]; uint64_t r63[2]; } test_vec[] = { { { UINT64_C(13917611607917708200), UINT64_C(17112162255472035057) }, { UINT64_C(12195916536836587520), UINT64_C( 6024980708028456960) }, { UINT64_C( 1382366554393411584), UINT64_C(11601562996068843520) }, { UINT64_C(16492696406872555520), UINT64_C( 2378596044356190208) }, { UINT64_C( 4215369251218784256), UINT64_C( 5697053528623677440) }, { UINT64_C( 0), UINT64_C( 9223372036854775808) } }, { { UINT64_C( 8092300662856394291), UINT64_C(10892570253513852943) }, { UINT64_C(12975573281163075584), UINT64_C( 5034432252382142464) }, { UINT64_C( 5756966573478772736), UINT64_C(13596006773663203328) }, { UINT64_C(11192317536492322816), UINT64_C(15493516864398557184) }, { UINT64_C( 7147212608636977152), UINT64_C( 9290926031265333248) }, { UINT64_C( 9223372036854775808), UINT64_C( 9223372036854775808) } }, { { UINT64_C( 3366027571197309937), UINT64_C(14439360183432903052) }, { UINT64_C(15062217126292889600), UINT64_C( 6715622056696840192) }, { UINT64_C(17858333621870395392), UINT64_C( 6185060658631933952) }, { UINT64_C(12788525845534736384), UINT64_C(13257689106374197248) }, { UINT64_C( 4544132024016830464), UINT64_C(11006797489293492224) }, { UINT64_C( 9223372036854775808), UINT64_C( 0) } }, { { UINT64_C( 5157481632533420175), UINT64_C( 6961892599648108823) }, { UINT64_C( 7045604918904872960), UINT64_C(12938244481083432960) }, { UINT64_C(16180033105241833472), UINT64_C(13554085574108774400) }, { UINT64_C( 6975028537971441664), UINT64_C( 4116443441288708096) }, { UINT64_C( 9867386783568756736), UINT64_C( 1256504296036368384) }, { UINT64_C( 9223372036854775808), UINT64_C( 9223372036854775808) } }, { { UINT64_C( 1511815881612918927), UINT64_C(16312859544596867589) }, { UINT64_C( 7030428713922715648), UINT64_C( 6731319385547382784) }, { UINT64_C( 2537026333666443264), UINT64_C( 5650370086309986304) }, { UINT64_C(12285898398548099072), UINT64_C( 4950866114142797824) }, { UINT64_C( 644014746713980928), UINT64_C(11551733044205322240) }, { UINT64_C( 9223372036854775808), UINT64_C( 9223372036854775808) } }, { { UINT64_C( 5261216133352334211), UINT64_C(11648819999460521433) }, { UINT64_C( 8288408236809281536), UINT64_C( 2126342281081069568) }, { UINT64_C(14622084690484396032), UINT64_C( 5269561034305175552) }, { UINT64_C( 9408513852053651456), UINT64_C( 2862860547647340544) }, { UINT64_C( 4048736065006075904), UINT64_C( 6741888642173632512) }, { UINT64_C( 9223372036854775808), UINT64_C( 9223372036854775808) } }, { { UINT64_C( 1047484977181031916), UINT64_C( 9298256467379884756) }, { UINT64_C( 3260938792071954432), UINT64_C( 4710700429277298688) }, { UINT64_C( 2725165922019966976), UINT64_C(17916058512958423040) }, { UINT64_C( 3998903999012012032), UINT64_C( 6055487722208886784) }, { UINT64_C(16050829071948447744), UINT64_C( 3260606130216239104) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C( 9271012395537207086), UINT64_C(15089716154388248757) }, { UINT64_C( 2888192778576445440), UINT64_C( 3322698820828438528) }, { UINT64_C(11349339602595872768), UINT64_C(10601231504979787776) }, { UINT64_C( 2199892969249570816), UINT64_C(16464133843562004480) }, { UINT64_C(17500988151961747456), UINT64_C(10038523569408835584) }, { UINT64_C( 0), UINT64_C( 9223372036854775808) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t r13 = simde_vshlq_n_u64(a, 13); simde_uint64x2_t r26 = simde_vshlq_n_u64(a, 26); simde_uint64x2_t r39 = simde_vshlq_n_u64(a, 39); simde_uint64x2_t r52 = simde_vshlq_n_u64(a, 52); simde_uint64x2_t r63 = simde_vshlq_n_u64(a, 63); simde_test_arm_neon_assert_equal_u64x2(r13, simde_vld1q_u64(test_vec[i].r13)); simde_test_arm_neon_assert_equal_u64x2(r26, simde_vld1q_u64(test_vec[i].r26)); simde_test_arm_neon_assert_equal_u64x2(r39, simde_vld1q_u64(test_vec[i].r39)); simde_test_arm_neon_assert_equal_u64x2(r52, simde_vld1q_u64(test_vec[i].r52)); simde_test_arm_neon_assert_equal_u64x2(r63, simde_vld1q_u64(test_vec[i].r63)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vshl_n_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vshl_n_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vshl_n_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vshl_n_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vshl_n_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vshl_n_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vshl_n_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vshl_n_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vshlq_n_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vshlq_n_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vshlq_n_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vshlq_n_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vshlq_n_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vshlq_n_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vshlq_n_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vshlq_n_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/shr_n.c000066400000000000000000003071611400333146700166040ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN shr_n #include "test-neon.h" #include "../../../simde/arm/neon/shr_n.h" static int test_simde_vshr_n_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int8_t r1[8]; int8_t r3[8]; int8_t r5[8]; int8_t r6[8]; int8_t r8[8]; } test_vec[] = { { { INT8_C( 14), -INT8_C( 74), -INT8_C( 83), -INT8_C( 76), -INT8_C( 68), INT8_C( 105), INT8_C( 53), INT8_C( 43) }, { INT8_C( 7), -INT8_C( 37), -INT8_C( 42), -INT8_C( 38), -INT8_C( 34), INT8_C( 52), INT8_C( 26), INT8_C( 21) }, { INT8_C( 1), -INT8_C( 10), -INT8_C( 11), -INT8_C( 10), -INT8_C( 9), INT8_C( 13), INT8_C( 6), INT8_C( 5) }, { INT8_C( 0), -INT8_C( 3), -INT8_C( 3), -INT8_C( 3), -INT8_C( 3), INT8_C( 3), INT8_C( 1), INT8_C( 1) }, { INT8_C( 0), -INT8_C( 2), -INT8_C( 2), -INT8_C( 2), -INT8_C( 2), INT8_C( 1), INT8_C( 0), INT8_C( 0) }, { INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 57), INT8_C( 96), -INT8_C( 64), -INT8_C( 76), INT8_C( 99), -INT8_C( 51), -INT8_C( 25), -INT8_C( 73) }, { INT8_C( 28), INT8_C( 48), -INT8_C( 32), -INT8_C( 38), INT8_C( 49), -INT8_C( 26), -INT8_C( 13), -INT8_C( 37) }, { INT8_C( 7), INT8_C( 12), -INT8_C( 8), -INT8_C( 10), INT8_C( 12), -INT8_C( 7), -INT8_C( 4), -INT8_C( 10) }, { INT8_C( 1), INT8_C( 3), -INT8_C( 2), -INT8_C( 3), INT8_C( 3), -INT8_C( 2), -INT8_C( 1), -INT8_C( 3) }, { INT8_C( 0), INT8_C( 1), -INT8_C( 1), -INT8_C( 2), INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 2) }, { INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1) } }, { { INT8_C( 107), INT8_C( 102), INT8_C( 97), -INT8_C( 115), -INT8_C( 78), -INT8_C( 41), -INT8_C( 109), INT8_C( 42) }, { INT8_C( 53), INT8_C( 51), INT8_C( 48), -INT8_C( 58), -INT8_C( 39), -INT8_C( 21), -INT8_C( 55), INT8_C( 21) }, { INT8_C( 13), INT8_C( 12), INT8_C( 12), -INT8_C( 15), -INT8_C( 10), -INT8_C( 6), -INT8_C( 14), INT8_C( 5) }, { INT8_C( 3), INT8_C( 3), INT8_C( 3), -INT8_C( 4), -INT8_C( 3), -INT8_C( 2), -INT8_C( 4), INT8_C( 1) }, { INT8_C( 1), INT8_C( 1), INT8_C( 1), -INT8_C( 2), -INT8_C( 2), -INT8_C( 1), -INT8_C( 2), INT8_C( 0) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 0) } }, { { INT8_C( 47), -INT8_C( 23), INT8_C( 0), -INT8_C( 58), INT8_C( 15), INT8_C( 122), INT8_C( 37), INT8_C( 30) }, { INT8_C( 23), -INT8_C( 12), INT8_C( 0), -INT8_C( 29), INT8_C( 7), INT8_C( 61), INT8_C( 18), INT8_C( 15) }, { INT8_C( 5), -INT8_C( 3), INT8_C( 0), -INT8_C( 8), INT8_C( 1), INT8_C( 15), INT8_C( 4), INT8_C( 3) }, { INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 2), INT8_C( 0), INT8_C( 3), INT8_C( 1), INT8_C( 0) }, { INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 1), INT8_C( 0), INT8_C( 0) }, { INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 48), -INT8_C( 46), -INT8_C( 46), -INT8_C( 20), INT8_C( 59), INT8_C( 7), INT8_C( 23), INT8_C( 116) }, { INT8_C( 24), -INT8_C( 23), -INT8_C( 23), -INT8_C( 10), INT8_C( 29), INT8_C( 3), INT8_C( 11), INT8_C( 58) }, { INT8_C( 6), -INT8_C( 6), -INT8_C( 6), -INT8_C( 3), INT8_C( 7), INT8_C( 0), INT8_C( 2), INT8_C( 14) }, { INT8_C( 1), -INT8_C( 2), -INT8_C( 2), -INT8_C( 1), INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 3) }, { INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 1) }, { INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 104), -INT8_C( 41), INT8_C( 40), -INT8_C( 53), -INT8_C( 91), INT8_C( 16), -INT8_C( 126), INT8_C( 16) }, { INT8_C( 52), -INT8_C( 21), INT8_C( 20), -INT8_C( 27), -INT8_C( 46), INT8_C( 8), -INT8_C( 63), INT8_C( 8) }, { INT8_C( 13), -INT8_C( 6), INT8_C( 5), -INT8_C( 7), -INT8_C( 12), INT8_C( 2), -INT8_C( 16), INT8_C( 2) }, { INT8_C( 3), -INT8_C( 2), INT8_C( 1), -INT8_C( 2), -INT8_C( 3), INT8_C( 0), -INT8_C( 4), INT8_C( 0) }, { INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 2), INT8_C( 0), -INT8_C( 2), INT8_C( 0) }, { INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0) } }, { { INT8_C( 118), -INT8_C( 29), -INT8_C( 98), INT8_C( 40), -INT8_C( 69), INT8_C( 49), INT8_C( 83), -INT8_C( 22) }, { INT8_C( 59), -INT8_C( 15), -INT8_C( 49), INT8_C( 20), -INT8_C( 35), INT8_C( 24), INT8_C( 41), -INT8_C( 11) }, { INT8_C( 14), -INT8_C( 4), -INT8_C( 13), INT8_C( 5), -INT8_C( 9), INT8_C( 6), INT8_C( 10), -INT8_C( 3) }, { INT8_C( 3), -INT8_C( 1), -INT8_C( 4), INT8_C( 1), -INT8_C( 3), INT8_C( 1), INT8_C( 2), -INT8_C( 1) }, { INT8_C( 1), -INT8_C( 1), -INT8_C( 2), INT8_C( 0), -INT8_C( 2), INT8_C( 0), INT8_C( 1), -INT8_C( 1) }, { INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1) } }, { { INT8_C( 26), INT8_C( 83), -INT8_C( 79), INT8_C( 42), -INT8_C( 50), -INT8_C( 42), INT8_C( 72), -INT8_C( 2) }, { INT8_C( 13), INT8_C( 41), -INT8_C( 40), INT8_C( 21), -INT8_C( 25), -INT8_C( 21), INT8_C( 36), -INT8_C( 1) }, { INT8_C( 3), INT8_C( 10), -INT8_C( 10), INT8_C( 5), -INT8_C( 7), -INT8_C( 6), INT8_C( 9), -INT8_C( 1) }, { INT8_C( 0), INT8_C( 2), -INT8_C( 3), INT8_C( 1), -INT8_C( 2), -INT8_C( 2), INT8_C( 2), -INT8_C( 1) }, { INT8_C( 0), INT8_C( 1), -INT8_C( 2), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 1), -INT8_C( 1) }, { INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t r1 = simde_vshr_n_s8(a, 1); simde_int8x8_t r3 = simde_vshr_n_s8(a, 3); simde_int8x8_t r5 = simde_vshr_n_s8(a, 5); simde_int8x8_t r6 = simde_vshr_n_s8(a, 6); simde_int8x8_t r8 = simde_vshr_n_s8(a, 8); simde_test_arm_neon_assert_equal_i8x8(r1, simde_vld1_s8(test_vec[i].r1)); simde_test_arm_neon_assert_equal_i8x8(r3, simde_vld1_s8(test_vec[i].r3)); simde_test_arm_neon_assert_equal_i8x8(r5, simde_vld1_s8(test_vec[i].r5)); simde_test_arm_neon_assert_equal_i8x8(r6, simde_vld1_s8(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i8x8(r8, simde_vld1_s8(test_vec[i].r8)); } return 0; } static int test_simde_vshr_n_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int16_t r3[4]; int16_t r6[4]; int16_t r10[4]; int16_t r13[4]; int16_t r16[4]; } test_vec[] = { { { INT16_C( 2391), -INT16_C( 30287), INT16_C( 21648), -INT16_C( 9648) }, { INT16_C( 298), -INT16_C( 3786), INT16_C( 2706), -INT16_C( 1206) }, { INT16_C( 37), -INT16_C( 474), INT16_C( 338), -INT16_C( 151) }, { INT16_C( 2), -INT16_C( 30), INT16_C( 21), -INT16_C( 10) }, { INT16_C( 0), -INT16_C( 4), INT16_C( 2), -INT16_C( 2) }, { INT16_C( 0), -INT16_C( 1), INT16_C( 0), -INT16_C( 1) } }, { { -INT16_C( 32696), -INT16_C( 7749), INT16_C( 3517), -INT16_C( 3032) }, { -INT16_C( 4087), -INT16_C( 969), INT16_C( 439), -INT16_C( 379) }, { -INT16_C( 511), -INT16_C( 122), INT16_C( 54), -INT16_C( 48) }, { -INT16_C( 32), -INT16_C( 8), INT16_C( 3), -INT16_C( 3) }, { -INT16_C( 4), -INT16_C( 1), INT16_C( 0), -INT16_C( 1) }, { -INT16_C( 1), -INT16_C( 1), INT16_C( 0), -INT16_C( 1) } }, { { -INT16_C( 25896), -INT16_C( 19991), -INT16_C( 18945), INT16_C( 17860) }, { -INT16_C( 3237), -INT16_C( 2499), -INT16_C( 2369), INT16_C( 2232) }, { -INT16_C( 405), -INT16_C( 313), -INT16_C( 297), INT16_C( 279) }, { -INT16_C( 26), -INT16_C( 20), -INT16_C( 19), INT16_C( 17) }, { -INT16_C( 4), -INT16_C( 3), -INT16_C( 3), INT16_C( 2) }, { -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), INT16_C( 0) } }, { { -INT16_C( 15514), -INT16_C( 7567), -INT16_C( 4534), -INT16_C( 24034) }, { -INT16_C( 1940), -INT16_C( 946), -INT16_C( 567), -INT16_C( 3005) }, { -INT16_C( 243), -INT16_C( 119), -INT16_C( 71), -INT16_C( 376) }, { -INT16_C( 16), -INT16_C( 8), -INT16_C( 5), -INT16_C( 24) }, { -INT16_C( 2), -INT16_C( 1), -INT16_C( 1), -INT16_C( 3) }, { -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1) } }, { { -INT16_C( 12297), -INT16_C( 30933), INT16_C( 31524), INT16_C( 27746) }, { -INT16_C( 1538), -INT16_C( 3867), INT16_C( 3940), INT16_C( 3468) }, { -INT16_C( 193), -INT16_C( 484), INT16_C( 492), INT16_C( 433) }, { -INT16_C( 13), -INT16_C( 31), INT16_C( 30), INT16_C( 27) }, { -INT16_C( 2), -INT16_C( 4), INT16_C( 3), INT16_C( 3) }, { -INT16_C( 1), -INT16_C( 1), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 7675), -INT16_C( 18355), INT16_C( 30250), INT16_C( 940) }, { INT16_C( 959), -INT16_C( 2295), INT16_C( 3781), INT16_C( 117) }, { INT16_C( 119), -INT16_C( 287), INT16_C( 472), INT16_C( 14) }, { INT16_C( 7), -INT16_C( 18), INT16_C( 29), INT16_C( 0) }, { INT16_C( 0), -INT16_C( 3), INT16_C( 3), INT16_C( 0) }, { INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 27376), INT16_C( 4020), INT16_C( 30795), -INT16_C( 20139) }, { -INT16_C( 3422), INT16_C( 502), INT16_C( 3849), -INT16_C( 2518) }, { -INT16_C( 428), INT16_C( 62), INT16_C( 481), -INT16_C( 315) }, { -INT16_C( 27), INT16_C( 3), INT16_C( 30), -INT16_C( 20) }, { -INT16_C( 4), INT16_C( 0), INT16_C( 3), -INT16_C( 3) }, { -INT16_C( 1), INT16_C( 0), INT16_C( 0), -INT16_C( 1) } }, { { -INT16_C( 14789), -INT16_C( 31085), -INT16_C( 20044), -INT16_C( 21720) }, { -INT16_C( 1849), -INT16_C( 3886), -INT16_C( 2506), -INT16_C( 2715) }, { -INT16_C( 232), -INT16_C( 486), -INT16_C( 314), -INT16_C( 340) }, { -INT16_C( 15), -INT16_C( 31), -INT16_C( 20), -INT16_C( 22) }, { -INT16_C( 2), -INT16_C( 4), -INT16_C( 3), -INT16_C( 3) }, { -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t r3 = simde_vshr_n_s16(a, 3); simde_int16x4_t r6 = simde_vshr_n_s16(a, 6); simde_int16x4_t r10 = simde_vshr_n_s16(a, 10); simde_int16x4_t r13 = simde_vshr_n_s16(a, 13); simde_int16x4_t r16 = simde_vshr_n_s16(a, 16); simde_test_arm_neon_assert_equal_i16x4(r3, simde_vld1_s16(test_vec[i].r3)); simde_test_arm_neon_assert_equal_i16x4(r6, simde_vld1_s16(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i16x4(r10, simde_vld1_s16(test_vec[i].r10)); simde_test_arm_neon_assert_equal_i16x4(r13, simde_vld1_s16(test_vec[i].r13)); simde_test_arm_neon_assert_equal_i16x4(r16, simde_vld1_s16(test_vec[i].r16)); } return 0; } static int test_simde_vshr_n_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int32_t r6[2]; int32_t r13[2]; int32_t r19[2]; int32_t r26[2]; int32_t r32[2]; } test_vec[] = { { { INT32_C( 1708457718), -INT32_C( 2095866327) }, { INT32_C( 26694651), -INT32_C( 32747912) }, { INT32_C( 208551), -INT32_C( 255844) }, { INT32_C( 3258), -INT32_C( 3998) }, { INT32_C( 25), -INT32_C( 32) }, { INT32_C( 0), -INT32_C( 1) } }, { { -INT32_C( 614449246), -INT32_C( 1580529702) }, { -INT32_C( 9600770), -INT32_C( 24695777) }, { -INT32_C( 75007), -INT32_C( 192936) }, { -INT32_C( 1172), -INT32_C( 3015) }, { -INT32_C( 10), -INT32_C( 24) }, { -INT32_C( 1), -INT32_C( 1) } }, { { INT32_C( 1039885425), -INT32_C( 887448430) }, { INT32_C( 16248209), -INT32_C( 13866382) }, { INT32_C( 126939), -INT32_C( 108332) }, { INT32_C( 1983), -INT32_C( 1693) }, { INT32_C( 15), -INT32_C( 14) }, { INT32_C( 0), -INT32_C( 1) } }, { { INT32_C( 1148170012), INT32_C( 1346167130) }, { INT32_C( 17940156), INT32_C( 21033861) }, { INT32_C( 140157), INT32_C( 164327) }, { INT32_C( 2189), INT32_C( 2567) }, { INT32_C( 17), INT32_C( 20) }, { INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 162926816), INT32_C( 1351403950) }, { INT32_C( 2545731), INT32_C( 21115686) }, { INT32_C( 19888), INT32_C( 164966) }, { INT32_C( 310), INT32_C( 2577) }, { INT32_C( 2), INT32_C( 20) }, { INT32_C( 0), INT32_C( 0) } }, { { -INT32_C( 500438008), INT32_C( 1686370035) }, { -INT32_C( 7819344), INT32_C( 26349531) }, { -INT32_C( 61089), INT32_C( 205855) }, { -INT32_C( 955), INT32_C( 3216) }, { -INT32_C( 8), INT32_C( 25) }, { -INT32_C( 1), INT32_C( 0) } }, { { -INT32_C( 308117925), INT32_C( 934919195) }, { -INT32_C( 4814343), INT32_C( 14608112) }, { -INT32_C( 37613), INT32_C( 114125) }, { -INT32_C( 588), INT32_C( 1783) }, { -INT32_C( 5), INT32_C( 13) }, { -INT32_C( 1), INT32_C( 0) } }, { { -INT32_C( 964941717), -INT32_C( 384387063) }, { -INT32_C( 15077215), -INT32_C( 6006048) }, { -INT32_C( 117791), -INT32_C( 46923) }, { -INT32_C( 1841), -INT32_C( 734) }, { -INT32_C( 15), -INT32_C( 6) }, { -INT32_C( 1), -INT32_C( 1) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t r6 = simde_vshr_n_s32(a, 6); simde_int32x2_t r13 = simde_vshr_n_s32(a, 13); simde_int32x2_t r19 = simde_vshr_n_s32(a, 19); simde_int32x2_t r26 = simde_vshr_n_s32(a, 26); simde_int32x2_t r32 = simde_vshr_n_s32(a, 32); simde_test_arm_neon_assert_equal_i32x2(r6, simde_vld1_s32(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i32x2(r13, simde_vld1_s32(test_vec[i].r13)); simde_test_arm_neon_assert_equal_i32x2(r19, simde_vld1_s32(test_vec[i].r19)); simde_test_arm_neon_assert_equal_i32x2(r26, simde_vld1_s32(test_vec[i].r26)); simde_test_arm_neon_assert_equal_i32x2(r32, simde_vld1_s32(test_vec[i].r32)); } return 0; } static int test_simde_vshr_n_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[1]; int64_t r13[1]; int64_t r26[1]; int64_t r39[1]; int64_t r52[1]; int64_t r64[1]; } test_vec[] = { { { -INT64_C( 3995365984096218581) }, { -INT64_C( 487715574230496) }, { -INT64_C( 59535592558) }, { -INT64_C( 7267529) }, { -INT64_C( 888) }, { -INT64_C( 1) } }, { { INT64_C( 1985728392210632478) }, { INT64_C( 242398485377274) }, { INT64_C( 29589658859) }, { INT64_C( 3612018) }, { INT64_C( 440) }, { INT64_C( 0) } }, { { INT64_C( 1822468078428861345) }, { INT64_C( 222469247855085) }, { INT64_C( 27156890607) }, { INT64_C( 3315050) }, { INT64_C( 404) }, { INT64_C( 0) } }, { { INT64_C( 3171718320008749032) }, { INT64_C( 387172646485442) }, { INT64_C( 47262285947) }, { INT64_C( 5769322) }, { INT64_C( 704) }, { INT64_C( 0) } }, { { INT64_C( 206608010581592391) }, { INT64_C( 25220704416698) }, { INT64_C( 3078699269) }, { INT64_C( 375817) }, { INT64_C( 45) }, { INT64_C( 0) } }, { { INT64_C( 6405032054101179732) }, { INT64_C( 781864264416647) }, { INT64_C( 95442415089) }, { INT64_C( 11650685) }, { INT64_C( 1422) }, { INT64_C( 0) } }, { { -INT64_C( 4748376384868048655) }, { -INT64_C( 579635789168463) }, { -INT64_C( 70756321920) }, { -INT64_C( 8637247) }, { -INT64_C( 1055) }, { -INT64_C( 1) } }, { { INT64_C( 5759454149627204287) }, { INT64_C( 703058367874414) }, { INT64_C( 85822554672) }, { INT64_C( 10476386) }, { INT64_C( 1278) }, { INT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t r13 = simde_vshr_n_s64(a, 13); simde_int64x1_t r26 = simde_vshr_n_s64(a, 26); simde_int64x1_t r39 = simde_vshr_n_s64(a, 39); simde_int64x1_t r52 = simde_vshr_n_s64(a, 52); simde_int64x1_t r64 = simde_vshr_n_s64(a, 64); simde_test_arm_neon_assert_equal_i64x1(r13, simde_vld1_s64(test_vec[i].r13)); simde_test_arm_neon_assert_equal_i64x1(r26, simde_vld1_s64(test_vec[i].r26)); simde_test_arm_neon_assert_equal_i64x1(r39, simde_vld1_s64(test_vec[i].r39)); simde_test_arm_neon_assert_equal_i64x1(r52, simde_vld1_s64(test_vec[i].r52)); simde_test_arm_neon_assert_equal_i64x1(r64, simde_vld1_s64(test_vec[i].r64)); } return 0; } static int test_simde_vshr_n_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; uint8_t r1[8]; uint8_t r3[8]; uint8_t r5[8]; uint8_t r6[8]; uint8_t r8[8]; } test_vec[] = { { { UINT8_C( 81), UINT8_C(224), UINT8_C( 99), UINT8_C(157), UINT8_C(155), UINT8_C( 57), UINT8_C(165), UINT8_C(241) }, { UINT8_C( 40), UINT8_C(112), UINT8_C( 49), UINT8_C( 78), UINT8_C( 77), UINT8_C( 28), UINT8_C( 82), UINT8_C(120) }, { UINT8_C( 10), UINT8_C( 28), UINT8_C( 12), UINT8_C( 19), UINT8_C( 19), UINT8_C( 7), UINT8_C( 20), UINT8_C( 30) }, { UINT8_C( 2), UINT8_C( 7), UINT8_C( 3), UINT8_C( 4), UINT8_C( 4), UINT8_C( 1), UINT8_C( 5), UINT8_C( 7) }, { UINT8_C( 1), UINT8_C( 3), UINT8_C( 1), UINT8_C( 2), UINT8_C( 2), UINT8_C( 0), UINT8_C( 2), UINT8_C( 3) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(206), UINT8_C( 99), UINT8_C(157), UINT8_C( 5), UINT8_C( 72), UINT8_C(119), UINT8_C(134), UINT8_C( 48) }, { UINT8_C(103), UINT8_C( 49), UINT8_C( 78), UINT8_C( 2), UINT8_C( 36), UINT8_C( 59), UINT8_C( 67), UINT8_C( 24) }, { UINT8_C( 25), UINT8_C( 12), UINT8_C( 19), UINT8_C( 0), UINT8_C( 9), UINT8_C( 14), UINT8_C( 16), UINT8_C( 6) }, { UINT8_C( 6), UINT8_C( 3), UINT8_C( 4), UINT8_C( 0), UINT8_C( 2), UINT8_C( 3), UINT8_C( 4), UINT8_C( 1) }, { UINT8_C( 3), UINT8_C( 1), UINT8_C( 2), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 2), UINT8_C( 0) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(191), UINT8_C(148), UINT8_C(238), UINT8_C(173), UINT8_C(114), UINT8_C(252), UINT8_C( 44), UINT8_C( 59) }, { UINT8_C( 95), UINT8_C( 74), UINT8_C(119), UINT8_C( 86), UINT8_C( 57), UINT8_C(126), UINT8_C( 22), UINT8_C( 29) }, { UINT8_C( 23), UINT8_C( 18), UINT8_C( 29), UINT8_C( 21), UINT8_C( 14), UINT8_C( 31), UINT8_C( 5), UINT8_C( 7) }, { UINT8_C( 5), UINT8_C( 4), UINT8_C( 7), UINT8_C( 5), UINT8_C( 3), UINT8_C( 7), UINT8_C( 1), UINT8_C( 1) }, { UINT8_C( 2), UINT8_C( 2), UINT8_C( 3), UINT8_C( 2), UINT8_C( 1), UINT8_C( 3), UINT8_C( 0), UINT8_C( 0) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 56), UINT8_C(111), UINT8_C(144), UINT8_C(208), UINT8_C( 21), UINT8_C(102), UINT8_C(139), UINT8_C(102) }, { UINT8_C( 28), UINT8_C( 55), UINT8_C( 72), UINT8_C(104), UINT8_C( 10), UINT8_C( 51), UINT8_C( 69), UINT8_C( 51) }, { UINT8_C( 7), UINT8_C( 13), UINT8_C( 18), UINT8_C( 26), UINT8_C( 2), UINT8_C( 12), UINT8_C( 17), UINT8_C( 12) }, { UINT8_C( 1), UINT8_C( 3), UINT8_C( 4), UINT8_C( 6), UINT8_C( 0), UINT8_C( 3), UINT8_C( 4), UINT8_C( 3) }, { UINT8_C( 0), UINT8_C( 1), UINT8_C( 2), UINT8_C( 3), UINT8_C( 0), UINT8_C( 1), UINT8_C( 2), UINT8_C( 1) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 70), UINT8_C(238), UINT8_C( 3), UINT8_C(225), UINT8_C( 39), UINT8_C(168), UINT8_C(211), UINT8_C(245) }, { UINT8_C( 35), UINT8_C(119), UINT8_C( 1), UINT8_C(112), UINT8_C( 19), UINT8_C( 84), UINT8_C(105), UINT8_C(122) }, { UINT8_C( 8), UINT8_C( 29), UINT8_C( 0), UINT8_C( 28), UINT8_C( 4), UINT8_C( 21), UINT8_C( 26), UINT8_C( 30) }, { UINT8_C( 2), UINT8_C( 7), UINT8_C( 0), UINT8_C( 7), UINT8_C( 1), UINT8_C( 5), UINT8_C( 6), UINT8_C( 7) }, { UINT8_C( 1), UINT8_C( 3), UINT8_C( 0), UINT8_C( 3), UINT8_C( 0), UINT8_C( 2), UINT8_C( 3), UINT8_C( 3) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 11), UINT8_C(112), UINT8_C(251), UINT8_C( 83), UINT8_C(231), UINT8_C(129), UINT8_C(131), UINT8_C(167) }, { UINT8_C( 5), UINT8_C( 56), UINT8_C(125), UINT8_C( 41), UINT8_C(115), UINT8_C( 64), UINT8_C( 65), UINT8_C( 83) }, { UINT8_C( 1), UINT8_C( 14), UINT8_C( 31), UINT8_C( 10), UINT8_C( 28), UINT8_C( 16), UINT8_C( 16), UINT8_C( 20) }, { UINT8_C( 0), UINT8_C( 3), UINT8_C( 7), UINT8_C( 2), UINT8_C( 7), UINT8_C( 4), UINT8_C( 4), UINT8_C( 5) }, { UINT8_C( 0), UINT8_C( 1), UINT8_C( 3), UINT8_C( 1), UINT8_C( 3), UINT8_C( 2), UINT8_C( 2), UINT8_C( 2) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 21), UINT8_C(114), UINT8_C( 84), UINT8_C(135), UINT8_C(110), UINT8_C(128), UINT8_C(195), UINT8_C(166) }, { UINT8_C( 10), UINT8_C( 57), UINT8_C( 42), UINT8_C( 67), UINT8_C( 55), UINT8_C( 64), UINT8_C( 97), UINT8_C( 83) }, { UINT8_C( 2), UINT8_C( 14), UINT8_C( 10), UINT8_C( 16), UINT8_C( 13), UINT8_C( 16), UINT8_C( 24), UINT8_C( 20) }, { UINT8_C( 0), UINT8_C( 3), UINT8_C( 2), UINT8_C( 4), UINT8_C( 3), UINT8_C( 4), UINT8_C( 6), UINT8_C( 5) }, { UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 2), UINT8_C( 1), UINT8_C( 2), UINT8_C( 3), UINT8_C( 2) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(240), UINT8_C( 83), UINT8_C(119), UINT8_C( 5), UINT8_C(185), UINT8_C( 2), UINT8_C(107), UINT8_MAX }, { UINT8_C(120), UINT8_C( 41), UINT8_C( 59), UINT8_C( 2), UINT8_C( 92), UINT8_C( 1), UINT8_C( 53), UINT8_C(127) }, { UINT8_C( 30), UINT8_C( 10), UINT8_C( 14), UINT8_C( 0), UINT8_C( 23), UINT8_C( 0), UINT8_C( 13), UINT8_C( 31) }, { UINT8_C( 7), UINT8_C( 2), UINT8_C( 3), UINT8_C( 0), UINT8_C( 5), UINT8_C( 0), UINT8_C( 3), UINT8_C( 7) }, { UINT8_C( 3), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 2), UINT8_C( 0), UINT8_C( 1), UINT8_C( 3) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t r1 = simde_vshr_n_u8(a, 1); simde_uint8x8_t r3 = simde_vshr_n_u8(a, 3); simde_uint8x8_t r5 = simde_vshr_n_u8(a, 5); simde_uint8x8_t r6 = simde_vshr_n_u8(a, 6); simde_uint8x8_t r8 = simde_vshr_n_u8(a, 8); simde_test_arm_neon_assert_equal_u8x8(r1, simde_vld1_u8(test_vec[i].r1)); simde_test_arm_neon_assert_equal_u8x8(r3, simde_vld1_u8(test_vec[i].r3)); simde_test_arm_neon_assert_equal_u8x8(r5, simde_vld1_u8(test_vec[i].r5)); simde_test_arm_neon_assert_equal_u8x8(r6, simde_vld1_u8(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u8x8(r8, simde_vld1_u8(test_vec[i].r8)); } return 0; } static int test_simde_vshr_n_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint16_t r3[4]; uint16_t r6[4]; uint16_t r10[4]; uint16_t r13[4]; uint16_t r16[4]; } test_vec[] = { { { UINT16_C(18082), UINT16_C(57692), UINT16_C(41793), UINT16_C(56495) }, { UINT16_C( 2260), UINT16_C( 7211), UINT16_C( 5224), UINT16_C( 7061) }, { UINT16_C( 282), UINT16_C( 901), UINT16_C( 653), UINT16_C( 882) }, { UINT16_C( 17), UINT16_C( 56), UINT16_C( 40), UINT16_C( 55) }, { UINT16_C( 2), UINT16_C( 7), UINT16_C( 5), UINT16_C( 6) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C( 8780), UINT16_C(52988), UINT16_C(13539), UINT16_C(19184) }, { UINT16_C( 1097), UINT16_C( 6623), UINT16_C( 1692), UINT16_C( 2398) }, { UINT16_C( 137), UINT16_C( 827), UINT16_C( 211), UINT16_C( 299) }, { UINT16_C( 8), UINT16_C( 51), UINT16_C( 13), UINT16_C( 18) }, { UINT16_C( 1), UINT16_C( 6), UINT16_C( 1), UINT16_C( 2) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(63422), UINT16_C(13365), UINT16_C(41288), UINT16_C(19151) }, { UINT16_C( 7927), UINT16_C( 1670), UINT16_C( 5161), UINT16_C( 2393) }, { UINT16_C( 990), UINT16_C( 208), UINT16_C( 645), UINT16_C( 299) }, { UINT16_C( 61), UINT16_C( 13), UINT16_C( 40), UINT16_C( 18) }, { UINT16_C( 7), UINT16_C( 1), UINT16_C( 5), UINT16_C( 2) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(52253), UINT16_C( 3308), UINT16_C(26061), UINT16_C(28915) }, { UINT16_C( 6531), UINT16_C( 413), UINT16_C( 3257), UINT16_C( 3614) }, { UINT16_C( 816), UINT16_C( 51), UINT16_C( 407), UINT16_C( 451) }, { UINT16_C( 51), UINT16_C( 3), UINT16_C( 25), UINT16_C( 28) }, { UINT16_C( 6), UINT16_C( 0), UINT16_C( 3), UINT16_C( 3) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(20395), UINT16_C(60753), UINT16_C( 242), UINT16_C(16329) }, { UINT16_C( 2549), UINT16_C( 7594), UINT16_C( 30), UINT16_C( 2041) }, { UINT16_C( 318), UINT16_C( 949), UINT16_C( 3), UINT16_C( 255) }, { UINT16_C( 19), UINT16_C( 59), UINT16_C( 0), UINT16_C( 15) }, { UINT16_C( 2), UINT16_C( 7), UINT16_C( 0), UINT16_C( 1) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(50722), UINT16_C( 1293), UINT16_C(65018), UINT16_C(47184) }, { UINT16_C( 6340), UINT16_C( 161), UINT16_C( 8127), UINT16_C( 5898) }, { UINT16_C( 792), UINT16_C( 20), UINT16_C( 1015), UINT16_C( 737) }, { UINT16_C( 49), UINT16_C( 1), UINT16_C( 63), UINT16_C( 46) }, { UINT16_C( 6), UINT16_C( 0), UINT16_C( 7), UINT16_C( 5) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(34292), UINT16_C(15596), UINT16_C(47910), UINT16_C(17286) }, { UINT16_C( 4286), UINT16_C( 1949), UINT16_C( 5988), UINT16_C( 2160) }, { UINT16_C( 535), UINT16_C( 243), UINT16_C( 748), UINT16_C( 270) }, { UINT16_C( 33), UINT16_C( 15), UINT16_C( 46), UINT16_C( 16) }, { UINT16_C( 4), UINT16_C( 1), UINT16_C( 5), UINT16_C( 2) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(29575), UINT16_C(21839), UINT16_C(17112), UINT16_C(33989) }, { UINT16_C( 3696), UINT16_C( 2729), UINT16_C( 2139), UINT16_C( 4248) }, { UINT16_C( 462), UINT16_C( 341), UINT16_C( 267), UINT16_C( 531) }, { UINT16_C( 28), UINT16_C( 21), UINT16_C( 16), UINT16_C( 33) }, { UINT16_C( 3), UINT16_C( 2), UINT16_C( 2), UINT16_C( 4) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t r3 = simde_vshr_n_u16(a, 3); simde_uint16x4_t r6 = simde_vshr_n_u16(a, 6); simde_uint16x4_t r10 = simde_vshr_n_u16(a, 10); simde_uint16x4_t r13 = simde_vshr_n_u16(a, 13); simde_uint16x4_t r16 = simde_vshr_n_u16(a, 16); simde_test_arm_neon_assert_equal_u16x4(r3, simde_vld1_u16(test_vec[i].r3)); simde_test_arm_neon_assert_equal_u16x4(r6, simde_vld1_u16(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u16x4(r10, simde_vld1_u16(test_vec[i].r10)); simde_test_arm_neon_assert_equal_u16x4(r13, simde_vld1_u16(test_vec[i].r13)); simde_test_arm_neon_assert_equal_u16x4(r16, simde_vld1_u16(test_vec[i].r16)); } return 0; } static int test_simde_vshr_n_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint32_t r6[2]; uint32_t r13[2]; uint32_t r19[2]; uint32_t r26[2]; uint32_t r32[2]; } test_vec[] = { { { UINT32_C( 696026497), UINT32_C(3975571712) }, { UINT32_C( 10875414), UINT32_C( 62118308) }, { UINT32_C( 84964), UINT32_C( 485299) }, { UINT32_C( 1327), UINT32_C( 7582) }, { UINT32_C( 10), UINT32_C( 59) }, { UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(2582254795), UINT32_C(3772650691) }, { UINT32_C( 40347731), UINT32_C( 58947667) }, { UINT32_C( 315216), UINT32_C( 460528) }, { UINT32_C( 4925), UINT32_C( 7195) }, { UINT32_C( 38), UINT32_C( 56) }, { UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C( 283642668), UINT32_C(2746048542) }, { UINT32_C( 4431916), UINT32_C( 42907008) }, { UINT32_C( 34624), UINT32_C( 335211) }, { UINT32_C( 541), UINT32_C( 5237) }, { UINT32_C( 4), UINT32_C( 40) }, { UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C( 505097202), UINT32_C(2508049172) }, { UINT32_C( 7892143), UINT32_C( 39188268) }, { UINT32_C( 61657), UINT32_C( 306158) }, { UINT32_C( 963), UINT32_C( 4783) }, { UINT32_C( 7), UINT32_C( 37) }, { UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(1354692944), UINT32_C( 775795811) }, { UINT32_C( 21167077), UINT32_C( 12121809) }, { UINT32_C( 165367), UINT32_C( 94701) }, { UINT32_C( 2583), UINT32_C( 1479) }, { UINT32_C( 20), UINT32_C( 11) }, { UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(2345084872), UINT32_C(1735173691) }, { UINT32_C( 36641951), UINT32_C( 27112088) }, { UINT32_C( 286265), UINT32_C( 211813) }, { UINT32_C( 4472), UINT32_C( 3309) }, { UINT32_C( 34), UINT32_C( 25) }, { UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(3497481393), UINT32_C(2809341108) }, { UINT32_C( 54648146), UINT32_C( 43895954) }, { UINT32_C( 426938), UINT32_C( 342937) }, { UINT32_C( 6670), UINT32_C( 5358) }, { UINT32_C( 52), UINT32_C( 41) }, { UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(1673891407), UINT32_C(2851619417) }, { UINT32_C( 26154553), UINT32_C( 44556553) }, { UINT32_C( 204332), UINT32_C( 348098) }, { UINT32_C( 3192), UINT32_C( 5439) }, { UINT32_C( 24), UINT32_C( 42) }, { UINT32_C( 0), UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t r6 = simde_vshr_n_u32(a, 6); simde_uint32x2_t r13 = simde_vshr_n_u32(a, 13); simde_uint32x2_t r19 = simde_vshr_n_u32(a, 19); simde_uint32x2_t r26 = simde_vshr_n_u32(a, 26); simde_uint32x2_t r32 = simde_vshr_n_u32(a, 32); simde_test_arm_neon_assert_equal_u32x2(r6, simde_vld1_u32(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u32x2(r13, simde_vld1_u32(test_vec[i].r13)); simde_test_arm_neon_assert_equal_u32x2(r19, simde_vld1_u32(test_vec[i].r19)); simde_test_arm_neon_assert_equal_u32x2(r26, simde_vld1_u32(test_vec[i].r26)); simde_test_arm_neon_assert_equal_u32x2(r32, simde_vld1_u32(test_vec[i].r32)); } return 0; } static int test_simde_vshr_n_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[1]; uint64_t r13[1]; uint64_t r26[1]; uint64_t r39[1]; uint64_t r52[1]; uint64_t r64[1]; } test_vec[] = { { { UINT64_C(17160806007911097636) }, { UINT64_C( 2094824952137585) }, { UINT64_C( 255715936540) }, { UINT64_C( 31215324) }, { UINT64_C( 3810) }, { UINT64_C( 0) } }, { { UINT64_C(13009184620787666765) }, { UINT64_C( 1588035232029744) }, { UINT64_C( 193851957034) }, { UINT64_C( 23663568) }, { UINT64_C( 2888) }, { UINT64_C( 0) } }, { { UINT64_C(16411739272322698297) }, { UINT64_C( 2003386141640954) }, { UINT64_C( 244553972368) }, { UINT64_C( 29852779) }, { UINT64_C( 3644) }, { UINT64_C( 0) } }, { { UINT64_C( 3898171221045080521) }, { UINT64_C( 475850979131479) }, { UINT64_C( 58087277726) }, { UINT64_C( 7090732) }, { UINT64_C( 865) }, { UINT64_C( 0) } }, { { UINT64_C(14073411793873930043) }, { UINT64_C( 1717945775619376) }, { UINT64_C( 209710177687) }, { UINT64_C( 25599386) }, { UINT64_C( 3124) }, { UINT64_C( 0) } }, { { UINT64_C(17658043201827558621) }, { UINT64_C( 2155522851785590) }, { UINT64_C( 263125348118) }, { UINT64_C( 32119793) }, { UINT64_C( 3920) }, { UINT64_C( 0) } }, { { UINT64_C( 3103817293289106500) }, { UINT64_C( 378883946934705) }, { UINT64_C( 46250481803) }, { UINT64_C( 5645810) }, { UINT64_C( 689) }, { UINT64_C( 0) } }, { { UINT64_C( 7940616020180540650) }, { UINT64_C( 969313479025944) }, { UINT64_C( 118324399295) }, { UINT64_C( 14443896) }, { UINT64_C( 1763) }, { UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_t r13 = simde_vshr_n_u64(a, 13); simde_uint64x1_t r26 = simde_vshr_n_u64(a, 26); simde_uint64x1_t r39 = simde_vshr_n_u64(a, 39); simde_uint64x1_t r52 = simde_vshr_n_u64(a, 52); simde_uint64x1_t r64 = simde_vshr_n_u64(a, 64); simde_test_arm_neon_assert_equal_u64x1(r13, simde_vld1_u64(test_vec[i].r13)); simde_test_arm_neon_assert_equal_u64x1(r26, simde_vld1_u64(test_vec[i].r26)); simde_test_arm_neon_assert_equal_u64x1(r39, simde_vld1_u64(test_vec[i].r39)); simde_test_arm_neon_assert_equal_u64x1(r52, simde_vld1_u64(test_vec[i].r52)); simde_test_arm_neon_assert_equal_u64x1(r64, simde_vld1_u64(test_vec[i].r64)); } return 0; } static int test_simde_vshrq_n_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int8_t r1[16]; int8_t r3[16]; int8_t r5[16]; int8_t r6[16]; int8_t r8[16]; } test_vec[] = { { { -INT8_C( 72), -INT8_C( 70), INT8_C( 111), INT8_C( 46), -INT8_C( 126), -INT8_C( 55), INT8_C( 9), -INT8_C( 8), -INT8_C( 118), INT8_C( 77), -INT8_C( 85), INT8_C( 57), -INT8_C( 77), INT8_C( 54), -INT8_C( 40), -INT8_C( 61) }, { -INT8_C( 36), -INT8_C( 35), INT8_C( 55), INT8_C( 23), -INT8_C( 63), -INT8_C( 28), INT8_C( 4), -INT8_C( 4), -INT8_C( 59), INT8_C( 38), -INT8_C( 43), INT8_C( 28), -INT8_C( 39), INT8_C( 27), -INT8_C( 20), -INT8_C( 31) }, { -INT8_C( 9), -INT8_C( 9), INT8_C( 13), INT8_C( 5), -INT8_C( 16), -INT8_C( 7), INT8_C( 1), -INT8_C( 1), -INT8_C( 15), INT8_C( 9), -INT8_C( 11), INT8_C( 7), -INT8_C( 10), INT8_C( 6), -INT8_C( 5), -INT8_C( 8) }, { -INT8_C( 3), -INT8_C( 3), INT8_C( 3), INT8_C( 1), -INT8_C( 4), -INT8_C( 2), INT8_C( 0), -INT8_C( 1), -INT8_C( 4), INT8_C( 2), -INT8_C( 3), INT8_C( 1), -INT8_C( 3), INT8_C( 1), -INT8_C( 2), -INT8_C( 2) }, { -INT8_C( 2), -INT8_C( 2), INT8_C( 1), INT8_C( 0), -INT8_C( 2), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 2), INT8_C( 1), -INT8_C( 2), INT8_C( 0), -INT8_C( 2), INT8_C( 0), -INT8_C( 1), -INT8_C( 1) }, { -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1) } }, { { INT8_C( 31), INT8_C( 20), -INT8_C( 87), INT8_C( 65), INT8_C( 111), INT8_C( 32), INT8_C( 124), -INT8_C( 48), -INT8_C( 20), INT8_C( 13), INT8_C( 53), -INT8_C( 120), -INT8_C( 14), INT8_C( 11), INT8_C( 9), -INT8_C( 86) }, { INT8_C( 15), INT8_C( 10), -INT8_C( 44), INT8_C( 32), INT8_C( 55), INT8_C( 16), INT8_C( 62), -INT8_C( 24), -INT8_C( 10), INT8_C( 6), INT8_C( 26), -INT8_C( 60), -INT8_C( 7), INT8_C( 5), INT8_C( 4), -INT8_C( 43) }, { INT8_C( 3), INT8_C( 2), -INT8_C( 11), INT8_C( 8), INT8_C( 13), INT8_C( 4), INT8_C( 15), -INT8_C( 6), -INT8_C( 3), INT8_C( 1), INT8_C( 6), -INT8_C( 15), -INT8_C( 2), INT8_C( 1), INT8_C( 1), -INT8_C( 11) }, { INT8_C( 0), INT8_C( 0), -INT8_C( 3), INT8_C( 2), INT8_C( 3), INT8_C( 1), INT8_C( 3), -INT8_C( 2), -INT8_C( 1), INT8_C( 0), INT8_C( 1), -INT8_C( 4), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 3) }, { INT8_C( 0), INT8_C( 0), -INT8_C( 2), INT8_C( 1), INT8_C( 1), INT8_C( 0), INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 2), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 2) }, { INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1) } }, { { -INT8_C( 59), INT8_C( 120), -INT8_C( 40), INT8_C( 72), INT8_C( 65), -INT8_C( 30), INT8_C( 64), -INT8_C( 53), INT8_C( 47), -INT8_C( 20), INT8_C( 4), -INT8_C( 30), INT8_C( 34), -INT8_C( 36), -INT8_C( 91), INT8_C( 65) }, { -INT8_C( 30), INT8_C( 60), -INT8_C( 20), INT8_C( 36), INT8_C( 32), -INT8_C( 15), INT8_C( 32), -INT8_C( 27), INT8_C( 23), -INT8_C( 10), INT8_C( 2), -INT8_C( 15), INT8_C( 17), -INT8_C( 18), -INT8_C( 46), INT8_C( 32) }, { -INT8_C( 8), INT8_C( 15), -INT8_C( 5), INT8_C( 9), INT8_C( 8), -INT8_C( 4), INT8_C( 8), -INT8_C( 7), INT8_C( 5), -INT8_C( 3), INT8_C( 0), -INT8_C( 4), INT8_C( 4), -INT8_C( 5), -INT8_C( 12), INT8_C( 8) }, { -INT8_C( 2), INT8_C( 3), -INT8_C( 2), INT8_C( 2), INT8_C( 2), -INT8_C( 1), INT8_C( 2), -INT8_C( 2), INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 1), -INT8_C( 2), -INT8_C( 3), INT8_C( 2) }, { -INT8_C( 1), INT8_C( 1), -INT8_C( 1), INT8_C( 1), INT8_C( 1), -INT8_C( 1), INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 2), INT8_C( 1) }, { -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0) } }, { { -INT8_C( 16), INT8_C( 78), -INT8_C( 126), INT8_C( 95), INT8_C( 111), -INT8_C( 1), INT8_C( 47), INT8_C( 91), INT8_C( 12), INT8_C( 100), -INT8_C( 28), -INT8_C( 2), INT8_C( 112), -INT8_C( 19), -INT8_C( 87), INT8_C( 53) }, { -INT8_C( 8), INT8_C( 39), -INT8_C( 63), INT8_C( 47), INT8_C( 55), -INT8_C( 1), INT8_C( 23), INT8_C( 45), INT8_C( 6), INT8_C( 50), -INT8_C( 14), -INT8_C( 1), INT8_C( 56), -INT8_C( 10), -INT8_C( 44), INT8_C( 26) }, { -INT8_C( 2), INT8_C( 9), -INT8_C( 16), INT8_C( 11), INT8_C( 13), -INT8_C( 1), INT8_C( 5), INT8_C( 11), INT8_C( 1), INT8_C( 12), -INT8_C( 4), -INT8_C( 1), INT8_C( 14), -INT8_C( 3), -INT8_C( 11), INT8_C( 6) }, { -INT8_C( 1), INT8_C( 2), -INT8_C( 4), INT8_C( 2), INT8_C( 3), -INT8_C( 1), INT8_C( 1), INT8_C( 2), INT8_C( 0), INT8_C( 3), -INT8_C( 1), -INT8_C( 1), INT8_C( 3), -INT8_C( 1), -INT8_C( 3), INT8_C( 1) }, { -INT8_C( 1), INT8_C( 1), -INT8_C( 2), INT8_C( 1), INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 1), INT8_C( 0), INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 1), -INT8_C( 1), -INT8_C( 2), INT8_C( 0) }, { -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0) } }, { { INT8_C( 101), -INT8_C( 127), INT8_C( 125), -INT8_C( 90), INT8_C( 99), -INT8_C( 66), INT8_C( 113), -INT8_C( 110), -INT8_C( 86), INT8_C( 117), INT8_C( 116), -INT8_C( 52), INT8_C( 82), INT8_C( 25), INT8_C( 13), INT8_C( 66) }, { INT8_C( 50), -INT8_C( 64), INT8_C( 62), -INT8_C( 45), INT8_C( 49), -INT8_C( 33), INT8_C( 56), -INT8_C( 55), -INT8_C( 43), INT8_C( 58), INT8_C( 58), -INT8_C( 26), INT8_C( 41), INT8_C( 12), INT8_C( 6), INT8_C( 33) }, { INT8_C( 12), -INT8_C( 16), INT8_C( 15), -INT8_C( 12), INT8_C( 12), -INT8_C( 9), INT8_C( 14), -INT8_C( 14), -INT8_C( 11), INT8_C( 14), INT8_C( 14), -INT8_C( 7), INT8_C( 10), INT8_C( 3), INT8_C( 1), INT8_C( 8) }, { INT8_C( 3), -INT8_C( 4), INT8_C( 3), -INT8_C( 3), INT8_C( 3), -INT8_C( 3), INT8_C( 3), -INT8_C( 4), -INT8_C( 3), INT8_C( 3), INT8_C( 3), -INT8_C( 2), INT8_C( 2), INT8_C( 0), INT8_C( 0), INT8_C( 2) }, { INT8_C( 1), -INT8_C( 2), INT8_C( 1), -INT8_C( 2), INT8_C( 1), -INT8_C( 2), INT8_C( 1), -INT8_C( 2), -INT8_C( 2), INT8_C( 1), INT8_C( 1), -INT8_C( 1), INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 1) }, { INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 104), -INT8_C( 112), -INT8_C( 94), -INT8_C( 41), -INT8_C( 113), -INT8_C( 47), INT8_C( 50), -INT8_C( 101), INT8_C( 54), INT8_C( 22), -INT8_C( 103), -INT8_C( 90), INT8_C( 3), INT8_C( 66), -INT8_C( 37), INT8_C( 104) }, { INT8_C( 52), -INT8_C( 56), -INT8_C( 47), -INT8_C( 21), -INT8_C( 57), -INT8_C( 24), INT8_C( 25), -INT8_C( 51), INT8_C( 27), INT8_C( 11), -INT8_C( 52), -INT8_C( 45), INT8_C( 1), INT8_C( 33), -INT8_C( 19), INT8_C( 52) }, { INT8_C( 13), -INT8_C( 14), -INT8_C( 12), -INT8_C( 6), -INT8_C( 15), -INT8_C( 6), INT8_C( 6), -INT8_C( 13), INT8_C( 6), INT8_C( 2), -INT8_C( 13), -INT8_C( 12), INT8_C( 0), INT8_C( 8), -INT8_C( 5), INT8_C( 13) }, { INT8_C( 3), -INT8_C( 4), -INT8_C( 3), -INT8_C( 2), -INT8_C( 4), -INT8_C( 2), INT8_C( 1), -INT8_C( 4), INT8_C( 1), INT8_C( 0), -INT8_C( 4), -INT8_C( 3), INT8_C( 0), INT8_C( 2), -INT8_C( 2), INT8_C( 3) }, { INT8_C( 1), -INT8_C( 2), -INT8_C( 2), -INT8_C( 1), -INT8_C( 2), -INT8_C( 1), INT8_C( 0), -INT8_C( 2), INT8_C( 0), INT8_C( 0), -INT8_C( 2), -INT8_C( 2), INT8_C( 0), INT8_C( 1), -INT8_C( 1), INT8_C( 1) }, { INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0) } }, { { -INT8_C( 60), INT8_C( 89), INT8_C( 14), INT8_C( 39), INT8_C( 23), INT8_MAX, -INT8_C( 70), -INT8_C( 63), -INT8_C( 11), INT8_C( 46), -INT8_C( 115), INT8_C( 71), INT8_C( 72), -INT8_C( 102), -INT8_C( 119), -INT8_C( 80) }, { -INT8_C( 30), INT8_C( 44), INT8_C( 7), INT8_C( 19), INT8_C( 11), INT8_C( 63), -INT8_C( 35), -INT8_C( 32), -INT8_C( 6), INT8_C( 23), -INT8_C( 58), INT8_C( 35), INT8_C( 36), -INT8_C( 51), -INT8_C( 60), -INT8_C( 40) }, { -INT8_C( 8), INT8_C( 11), INT8_C( 1), INT8_C( 4), INT8_C( 2), INT8_C( 15), -INT8_C( 9), -INT8_C( 8), -INT8_C( 2), INT8_C( 5), -INT8_C( 15), INT8_C( 8), INT8_C( 9), -INT8_C( 13), -INT8_C( 15), -INT8_C( 10) }, { -INT8_C( 2), INT8_C( 2), INT8_C( 0), INT8_C( 1), INT8_C( 0), INT8_C( 3), -INT8_C( 3), -INT8_C( 2), -INT8_C( 1), INT8_C( 1), -INT8_C( 4), INT8_C( 2), INT8_C( 2), -INT8_C( 4), -INT8_C( 4), -INT8_C( 3) }, { -INT8_C( 1), INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 1), -INT8_C( 2), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 2), INT8_C( 1), INT8_C( 1), -INT8_C( 2), -INT8_C( 2), -INT8_C( 2) }, { -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1) } }, { { INT8_C( 42), INT8_C( 43), -INT8_C( 121), -INT8_C( 71), -INT8_C( 3), -INT8_C( 71), INT8_C( 84), INT8_C( 51), -INT8_C( 48), -INT8_C( 18), -INT8_C( 39), -INT8_C( 45), INT8_C( 48), -INT8_C( 76), INT8_C( 60), -INT8_C( 12) }, { INT8_C( 21), INT8_C( 21), -INT8_C( 61), -INT8_C( 36), -INT8_C( 2), -INT8_C( 36), INT8_C( 42), INT8_C( 25), -INT8_C( 24), -INT8_C( 9), -INT8_C( 20), -INT8_C( 23), INT8_C( 24), -INT8_C( 38), INT8_C( 30), -INT8_C( 6) }, { INT8_C( 5), INT8_C( 5), -INT8_C( 16), -INT8_C( 9), -INT8_C( 1), -INT8_C( 9), INT8_C( 10), INT8_C( 6), -INT8_C( 6), -INT8_C( 3), -INT8_C( 5), -INT8_C( 6), INT8_C( 6), -INT8_C( 10), INT8_C( 7), -INT8_C( 2) }, { INT8_C( 1), INT8_C( 1), -INT8_C( 4), -INT8_C( 3), -INT8_C( 1), -INT8_C( 3), INT8_C( 2), INT8_C( 1), -INT8_C( 2), -INT8_C( 1), -INT8_C( 2), -INT8_C( 2), INT8_C( 1), -INT8_C( 3), INT8_C( 1), -INT8_C( 1) }, { INT8_C( 0), INT8_C( 0), -INT8_C( 2), -INT8_C( 2), -INT8_C( 1), -INT8_C( 2), INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 2), INT8_C( 0), -INT8_C( 1) }, { INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t r1 = simde_vshrq_n_s8(a, 1); simde_int8x16_t r3 = simde_vshrq_n_s8(a, 3); simde_int8x16_t r5 = simde_vshrq_n_s8(a, 5); simde_int8x16_t r6 = simde_vshrq_n_s8(a, 6); simde_int8x16_t r8 = simde_vshrq_n_s8(a, 8); simde_test_arm_neon_assert_equal_i8x16(r1, simde_vld1q_s8(test_vec[i].r1)); simde_test_arm_neon_assert_equal_i8x16(r3, simde_vld1q_s8(test_vec[i].r3)); simde_test_arm_neon_assert_equal_i8x16(r5, simde_vld1q_s8(test_vec[i].r5)); simde_test_arm_neon_assert_equal_i8x16(r6, simde_vld1q_s8(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i8x16(r8, simde_vld1q_s8(test_vec[i].r8)); } return 0; } static int test_simde_vshrq_n_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int16_t r3[8]; int16_t r6[8]; int16_t r10[8]; int16_t r13[8]; int16_t r16[8]; } test_vec[] = { { { -INT16_C( 14981), INT16_C( 10413), -INT16_C( 8036), -INT16_C( 13777), INT16_C( 6499), -INT16_C( 22969), INT16_C( 4904), INT16_C( 5266) }, { -INT16_C( 1873), INT16_C( 1301), -INT16_C( 1005), -INT16_C( 1723), INT16_C( 812), -INT16_C( 2872), INT16_C( 613), INT16_C( 658) }, { -INT16_C( 235), INT16_C( 162), -INT16_C( 126), -INT16_C( 216), INT16_C( 101), -INT16_C( 359), INT16_C( 76), INT16_C( 82) }, { -INT16_C( 15), INT16_C( 10), -INT16_C( 8), -INT16_C( 14), INT16_C( 6), -INT16_C( 23), INT16_C( 4), INT16_C( 5) }, { -INT16_C( 2), INT16_C( 1), -INT16_C( 1), -INT16_C( 2), INT16_C( 0), -INT16_C( 3), INT16_C( 0), INT16_C( 0) }, { -INT16_C( 1), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 9136), -INT16_C( 5468), INT16_C( 31374), -INT16_C( 435), -INT16_C( 18326), INT16_C( 7725), INT16_C( 28282), -INT16_C( 2743) }, { INT16_C( 1142), -INT16_C( 684), INT16_C( 3921), -INT16_C( 55), -INT16_C( 2291), INT16_C( 965), INT16_C( 3535), -INT16_C( 343) }, { INT16_C( 142), -INT16_C( 86), INT16_C( 490), -INT16_C( 7), -INT16_C( 287), INT16_C( 120), INT16_C( 441), -INT16_C( 43) }, { INT16_C( 8), -INT16_C( 6), INT16_C( 30), -INT16_C( 1), -INT16_C( 18), INT16_C( 7), INT16_C( 27), -INT16_C( 3) }, { INT16_C( 1), -INT16_C( 1), INT16_C( 3), -INT16_C( 1), -INT16_C( 3), INT16_C( 0), INT16_C( 3), -INT16_C( 1) }, { INT16_C( 0), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), INT16_C( 0), -INT16_C( 1) } }, { { -INT16_C( 2253), -INT16_C( 12258), INT16_C( 19927), INT16_C( 15002), -INT16_C( 7833), -INT16_C( 28704), INT16_C( 29684), -INT16_C( 23389) }, { -INT16_C( 282), -INT16_C( 1533), INT16_C( 2490), INT16_C( 1875), -INT16_C( 980), -INT16_C( 3588), INT16_C( 3710), -INT16_C( 2924) }, { -INT16_C( 36), -INT16_C( 192), INT16_C( 311), INT16_C( 234), -INT16_C( 123), -INT16_C( 449), INT16_C( 463), -INT16_C( 366) }, { -INT16_C( 3), -INT16_C( 12), INT16_C( 19), INT16_C( 14), -INT16_C( 8), -INT16_C( 29), INT16_C( 28), -INT16_C( 23) }, { -INT16_C( 1), -INT16_C( 2), INT16_C( 2), INT16_C( 1), -INT16_C( 1), -INT16_C( 4), INT16_C( 3), -INT16_C( 3) }, { -INT16_C( 1), -INT16_C( 1), INT16_C( 0), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), -INT16_C( 1) } }, { { INT16_C( 18326), INT16_C( 9358), -INT16_C( 9278), INT16_C( 11299), INT16_C( 20627), INT16_C( 3658), -INT16_C( 27458), -INT16_C( 3581) }, { INT16_C( 2290), INT16_C( 1169), -INT16_C( 1160), INT16_C( 1412), INT16_C( 2578), INT16_C( 457), -INT16_C( 3433), -INT16_C( 448) }, { INT16_C( 286), INT16_C( 146), -INT16_C( 145), INT16_C( 176), INT16_C( 322), INT16_C( 57), -INT16_C( 430), -INT16_C( 56) }, { INT16_C( 17), INT16_C( 9), -INT16_C( 10), INT16_C( 11), INT16_C( 20), INT16_C( 3), -INT16_C( 27), -INT16_C( 4) }, { INT16_C( 2), INT16_C( 1), -INT16_C( 2), INT16_C( 1), INT16_C( 2), INT16_C( 0), -INT16_C( 4), -INT16_C( 1) }, { INT16_C( 0), INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1), -INT16_C( 1) } }, { { INT16_C( 8587), INT16_C( 25282), INT16_C( 23663), -INT16_C( 10595), INT16_C( 32061), INT16_C( 12901), INT16_C( 2288), -INT16_C( 31018) }, { INT16_C( 1073), INT16_C( 3160), INT16_C( 2957), -INT16_C( 1325), INT16_C( 4007), INT16_C( 1612), INT16_C( 286), -INT16_C( 3878) }, { INT16_C( 134), INT16_C( 395), INT16_C( 369), -INT16_C( 166), INT16_C( 500), INT16_C( 201), INT16_C( 35), -INT16_C( 485) }, { INT16_C( 8), INT16_C( 24), INT16_C( 23), -INT16_C( 11), INT16_C( 31), INT16_C( 12), INT16_C( 2), -INT16_C( 31) }, { INT16_C( 1), INT16_C( 3), INT16_C( 2), -INT16_C( 2), INT16_C( 3), INT16_C( 1), INT16_C( 0), -INT16_C( 4) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1) } }, { { INT16_C( 25935), INT16_C( 4523), -INT16_C( 12736), -INT16_C( 11203), -INT16_C( 30690), -INT16_C( 8990), -INT16_C( 6884), -INT16_C( 22578) }, { INT16_C( 3241), INT16_C( 565), -INT16_C( 1592), -INT16_C( 1401), -INT16_C( 3837), -INT16_C( 1124), -INT16_C( 861), -INT16_C( 2823) }, { INT16_C( 405), INT16_C( 70), -INT16_C( 199), -INT16_C( 176), -INT16_C( 480), -INT16_C( 141), -INT16_C( 108), -INT16_C( 353) }, { INT16_C( 25), INT16_C( 4), -INT16_C( 13), -INT16_C( 11), -INT16_C( 30), -INT16_C( 9), -INT16_C( 7), -INT16_C( 23) }, { INT16_C( 3), INT16_C( 0), -INT16_C( 2), -INT16_C( 2), -INT16_C( 4), -INT16_C( 2), -INT16_C( 1), -INT16_C( 3) }, { INT16_C( 0), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1) } }, { { -INT16_C( 28665), INT16_C( 30217), -INT16_C( 22804), INT16_C( 10828), -INT16_C( 20188), INT16_C( 5212), INT16_C( 12985), INT16_C( 2203) }, { -INT16_C( 3584), INT16_C( 3777), -INT16_C( 2851), INT16_C( 1353), -INT16_C( 2524), INT16_C( 651), INT16_C( 1623), INT16_C( 275) }, { -INT16_C( 448), INT16_C( 472), -INT16_C( 357), INT16_C( 169), -INT16_C( 316), INT16_C( 81), INT16_C( 202), INT16_C( 34) }, { -INT16_C( 28), INT16_C( 29), -INT16_C( 23), INT16_C( 10), -INT16_C( 20), INT16_C( 5), INT16_C( 12), INT16_C( 2) }, { -INT16_C( 4), INT16_C( 3), -INT16_C( 3), INT16_C( 1), -INT16_C( 3), INT16_C( 0), INT16_C( 1), INT16_C( 0) }, { -INT16_C( 1), INT16_C( 0), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 18071), -INT16_C( 10214), INT16_C( 22292), INT16_C( 12972), -INT16_C( 28961), -INT16_C( 1266), -INT16_C( 8845), INT16_C( 31394) }, { INT16_C( 2258), -INT16_C( 1277), INT16_C( 2786), INT16_C( 1621), -INT16_C( 3621), -INT16_C( 159), -INT16_C( 1106), INT16_C( 3924) }, { INT16_C( 282), -INT16_C( 160), INT16_C( 348), INT16_C( 202), -INT16_C( 453), -INT16_C( 20), -INT16_C( 139), INT16_C( 490) }, { INT16_C( 17), -INT16_C( 10), INT16_C( 21), INT16_C( 12), -INT16_C( 29), -INT16_C( 2), -INT16_C( 9), INT16_C( 30) }, { INT16_C( 2), -INT16_C( 2), INT16_C( 2), INT16_C( 1), -INT16_C( 4), -INT16_C( 1), -INT16_C( 2), INT16_C( 3) }, { INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), INT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t r3 = simde_vshrq_n_s16(a, 3); simde_int16x8_t r6 = simde_vshrq_n_s16(a, 6); simde_int16x8_t r10 = simde_vshrq_n_s16(a, 10); simde_int16x8_t r13 = simde_vshrq_n_s16(a, 13); simde_int16x8_t r16 = simde_vshrq_n_s16(a, 16); simde_test_arm_neon_assert_equal_i16x8(r3, simde_vld1q_s16(test_vec[i].r3)); simde_test_arm_neon_assert_equal_i16x8(r6, simde_vld1q_s16(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i16x8(r10, simde_vld1q_s16(test_vec[i].r10)); simde_test_arm_neon_assert_equal_i16x8(r13, simde_vld1q_s16(test_vec[i].r13)); simde_test_arm_neon_assert_equal_i16x8(r16, simde_vld1q_s16(test_vec[i].r16)); } return 0; } static int test_simde_vshrq_n_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t r6[4]; int32_t r13[4]; int32_t r19[4]; int32_t r26[4]; int32_t r32[4]; } test_vec[] = { { { INT32_C( 125514276), INT32_C( 1669026353), -INT32_C( 1870822169), INT32_C( 1046181581) }, { INT32_C( 1961160), INT32_C( 26078536), -INT32_C( 29231597), INT32_C( 16346587) }, { INT32_C( 15321), INT32_C( 203738), -INT32_C( 228372), INT32_C( 127707) }, { INT32_C( 239), INT32_C( 3183), -INT32_C( 3569), INT32_C( 1995) }, { INT32_C( 1), INT32_C( 24), -INT32_C( 28), INT32_C( 15) }, { INT32_C( 0), INT32_C( 0), -INT32_C( 1), INT32_C( 0) } }, { { INT32_C( 123929250), INT32_C( 1830231953), -INT32_C( 1397146087), INT32_C( 193304550) }, { INT32_C( 1936394), INT32_C( 28597374), -INT32_C( 21830408), INT32_C( 3020383) }, { INT32_C( 15128), INT32_C( 223416), -INT32_C( 170551), INT32_C( 23596) }, { INT32_C( 236), INT32_C( 3490), -INT32_C( 2665), INT32_C( 368) }, { INT32_C( 1), INT32_C( 27), -INT32_C( 21), INT32_C( 2) }, { INT32_C( 0), INT32_C( 0), -INT32_C( 1), INT32_C( 0) } }, { { -INT32_C( 82705975), INT32_C( 979275347), -INT32_C( 540353518), -INT32_C( 182639278) }, { -INT32_C( 1292281), INT32_C( 15301177), -INT32_C( 8443024), -INT32_C( 2853739) }, { -INT32_C( 10096), INT32_C( 119540), -INT32_C( 65962), -INT32_C( 22295) }, { -INT32_C( 158), INT32_C( 1867), -INT32_C( 1031), -INT32_C( 349) }, { -INT32_C( 2), INT32_C( 14), -INT32_C( 9), -INT32_C( 3) }, { -INT32_C( 1), INT32_C( 0), -INT32_C( 1), -INT32_C( 1) } }, { { -INT32_C( 1174634200), -INT32_C( 1188621408), INT32_C( 946200658), INT32_C( 1078193015) }, { -INT32_C( 18353660), -INT32_C( 18572210), INT32_C( 14784385), INT32_C( 16846765) }, { -INT32_C( 143388), -INT32_C( 145096), INT32_C( 115503), INT32_C( 131615) }, { -INT32_C( 2241), -INT32_C( 2268), INT32_C( 1804), INT32_C( 2056) }, { -INT32_C( 18), -INT32_C( 18), INT32_C( 14), INT32_C( 16) }, { -INT32_C( 1), -INT32_C( 1), INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 1060853484), -INT32_C( 159737116), -INT32_C( 925547402), -INT32_C( 1833045142) }, { INT32_C( 16575835), -INT32_C( 2495893), -INT32_C( 14461679), -INT32_C( 28641331) }, { INT32_C( 129498), -INT32_C( 19500), -INT32_C( 112982), -INT32_C( 223761) }, { INT32_C( 2023), -INT32_C( 305), -INT32_C( 1766), -INT32_C( 3497) }, { INT32_C( 15), -INT32_C( 3), -INT32_C( 14), -INT32_C( 28) }, { INT32_C( 0), -INT32_C( 1), -INT32_C( 1), -INT32_C( 1) } }, { { INT32_C( 340506996), INT32_C( 533623501), -INT32_C( 917032110), INT32_C( 168467230) }, { INT32_C( 5320421), INT32_C( 8337867), -INT32_C( 14328627), INT32_C( 2632300) }, { INT32_C( 41565), INT32_C( 65139), -INT32_C( 111943), INT32_C( 20564) }, { INT32_C( 649), INT32_C( 1017), -INT32_C( 1750), INT32_C( 321) }, { INT32_C( 5), INT32_C( 7), -INT32_C( 14), INT32_C( 2) }, { INT32_C( 0), INT32_C( 0), -INT32_C( 1), INT32_C( 0) } }, { { -INT32_C( 716552719), INT32_C( 1439417567), INT32_C( 1914609672), INT32_C( 117758867) }, { -INT32_C( 11196137), INT32_C( 22490899), INT32_C( 29915776), INT32_C( 1839982) }, { -INT32_C( 87470), INT32_C( 175710), INT32_C( 233717), INT32_C( 14374) }, { -INT32_C( 1367), INT32_C( 2745), INT32_C( 3651), INT32_C( 224) }, { -INT32_C( 11), INT32_C( 21), INT32_C( 28), INT32_C( 1) }, { -INT32_C( 1), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 1646022805), INT32_C( 360835778), INT32_C( 1021237277), INT32_C( 1682368627) }, { INT32_C( 25719106), INT32_C( 5638059), INT32_C( 15956832), INT32_C( 26287009) }, { INT32_C( 200930), INT32_C( 44047), INT32_C( 124662), INT32_C( 205367) }, { INT32_C( 3139), INT32_C( 688), INT32_C( 1947), INT32_C( 3208) }, { INT32_C( 24), INT32_C( 5), INT32_C( 15), INT32_C( 25) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t r6 = simde_vshrq_n_s32(a, 6); simde_int32x4_t r13 = simde_vshrq_n_s32(a, 13); simde_int32x4_t r19 = simde_vshrq_n_s32(a, 19); simde_int32x4_t r26 = simde_vshrq_n_s32(a, 26); simde_int32x4_t r32 = simde_vshrq_n_s32(a, 32); simde_test_arm_neon_assert_equal_i32x4(r6, simde_vld1q_s32(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i32x4(r13, simde_vld1q_s32(test_vec[i].r13)); simde_test_arm_neon_assert_equal_i32x4(r19, simde_vld1q_s32(test_vec[i].r19)); simde_test_arm_neon_assert_equal_i32x4(r26, simde_vld1q_s32(test_vec[i].r26)); simde_test_arm_neon_assert_equal_i32x4(r32, simde_vld1q_s32(test_vec[i].r32)); } return 0; } static int test_simde_vshrq_n_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; int64_t r13[2]; int64_t r26[2]; int64_t r39[2]; int64_t r52[2]; int64_t r64[2]; } test_vec[] = { { { INT64_C( 7258000848010340907), INT64_C( 6356962164739953033) }, { INT64_C( 885986431641887), INT64_C( 775996358000482) }, { INT64_C( 108152640581), INT64_C( 94726117919) }, { INT64_C( 13202226), INT64_C( 11563246) }, { INT64_C( 1611), INT64_C( 1411) }, { INT64_C( 0), INT64_C( 0) } }, { { -INT64_C( 4083202087668540142), -INT64_C( 666128355631102955) }, { -INT64_C( 498437754842352), -INT64_C( 81314496537000) }, { -INT64_C( 60844452496), -INT64_C( 9926086004) }, { -INT64_C( 7427302), -INT64_C( 1211681) }, { -INT64_C( 907), -INT64_C( 148) }, { -INT64_C( 1), -INT64_C( 1) } }, { { INT64_C( 508841331230762074), -INT64_C( 8802639018395763947) }, { INT64_C( 62114420316255), -INT64_C( 1074540895800265) }, { INT64_C( 7582326698), -INT64_C( 131169542945) }, { INT64_C( 925576), -INT64_C( 16011908) }, { INT64_C( 112), -INT64_C( 1955) }, { INT64_C( 0), -INT64_C( 1) } }, { { -INT64_C( 2596152810134882085), -INT64_C( 8259092421874367328) }, { -INT64_C( 316913184830919), -INT64_C( 1008189992904586) }, { -INT64_C( 38685691508), -INT64_C( 123070067494) }, { -INT64_C( 4722375), -INT64_C( 15023202) }, { -INT64_C( 577), -INT64_C( 1834) }, { -INT64_C( 1), -INT64_C( 1) } }, { { INT64_C( 5060155032460684468), INT64_C( 6358126638685011780) }, { INT64_C( 617694706110923), INT64_C( 776138505698853) }, { INT64_C( 75402185804), INT64_C( 94743469933) }, { INT64_C( 9204368), INT64_C( 11565364) }, { INT64_C( 1123), INT64_C( 1411) }, { INT64_C( 0), INT64_C( 0) } }, { { -INT64_C( 4506834250564533037), INT64_C( 434471274424336966) }, { -INT64_C( 550150665352116), INT64_C( 53036044241252) }, { -INT64_C( 67157063642), INT64_C( 6474126494) }, { -INT64_C( 8197884), INT64_C( 790298) }, { -INT64_C( 1001), INT64_C( 96) }, { -INT64_C( 1), INT64_C( 0) } }, { { -INT64_C( 3715063387966127879), -INT64_C( 6699736868146957008) }, { -INT64_C( 453498948726334), -INT64_C( 817838973162471) }, { -INT64_C( 55358758390), -INT64_C( 99833859029) }, { -INT64_C( 6757661), -INT64_C( 12186751) }, { -INT64_C( 825), -INT64_C( 1488) }, { -INT64_C( 1), -INT64_C( 1) } }, { { INT64_C( 6279618844180647748), -INT64_C( 2843222864020090574) }, { INT64_C( 766555034690020), -INT64_C( 347073103518078) }, { INT64_C( 93573612633), -INT64_C( 42367322207) }, { INT64_C( 11422560), -INT64_C( 5171793) }, { INT64_C( 1394), -INT64_C( 632) }, { INT64_C( 0), -INT64_C( 1) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t r13 = simde_vshrq_n_s64(a, 13); simde_int64x2_t r26 = simde_vshrq_n_s64(a, 26); simde_int64x2_t r39 = simde_vshrq_n_s64(a, 39); simde_int64x2_t r52 = simde_vshrq_n_s64(a, 52); simde_int64x2_t r64 = simde_vshrq_n_s64(a, 64); simde_test_arm_neon_assert_equal_i64x2(r13, simde_vld1q_s64(test_vec[i].r13)); simde_test_arm_neon_assert_equal_i64x2(r26, simde_vld1q_s64(test_vec[i].r26)); simde_test_arm_neon_assert_equal_i64x2(r39, simde_vld1q_s64(test_vec[i].r39)); simde_test_arm_neon_assert_equal_i64x2(r52, simde_vld1q_s64(test_vec[i].r52)); simde_test_arm_neon_assert_equal_i64x2(r64, simde_vld1q_s64(test_vec[i].r64)); } return 0; } static int test_simde_vshrq_n_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t r1[16]; uint8_t r3[16]; uint8_t r5[16]; uint8_t r6[16]; uint8_t r8[16]; } test_vec[] = { { { UINT8_C(152), UINT8_C( 87), UINT8_C( 17), UINT8_C(199), UINT8_C(188), UINT8_C(234), UINT8_C( 66), UINT8_C(209), UINT8_C(194), UINT8_C(193), UINT8_C(190), UINT8_C( 69), UINT8_C(146), UINT8_C(243), UINT8_C(209), UINT8_C( 73) }, { UINT8_C( 76), UINT8_C( 43), UINT8_C( 8), UINT8_C( 99), UINT8_C( 94), UINT8_C(117), UINT8_C( 33), UINT8_C(104), UINT8_C( 97), UINT8_C( 96), UINT8_C( 95), UINT8_C( 34), UINT8_C( 73), UINT8_C(121), UINT8_C(104), UINT8_C( 36) }, { UINT8_C( 19), UINT8_C( 10), UINT8_C( 2), UINT8_C( 24), UINT8_C( 23), UINT8_C( 29), UINT8_C( 8), UINT8_C( 26), UINT8_C( 24), UINT8_C( 24), UINT8_C( 23), UINT8_C( 8), UINT8_C( 18), UINT8_C( 30), UINT8_C( 26), UINT8_C( 9) }, { UINT8_C( 4), UINT8_C( 2), UINT8_C( 0), UINT8_C( 6), UINT8_C( 5), UINT8_C( 7), UINT8_C( 2), UINT8_C( 6), UINT8_C( 6), UINT8_C( 6), UINT8_C( 5), UINT8_C( 2), UINT8_C( 4), UINT8_C( 7), UINT8_C( 6), UINT8_C( 2) }, { UINT8_C( 2), UINT8_C( 1), UINT8_C( 0), UINT8_C( 3), UINT8_C( 2), UINT8_C( 3), UINT8_C( 1), UINT8_C( 3), UINT8_C( 3), UINT8_C( 3), UINT8_C( 2), UINT8_C( 1), UINT8_C( 2), UINT8_C( 3), UINT8_C( 3), UINT8_C( 1) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(140), UINT8_C( 32), UINT8_C( 70), UINT8_C(100), UINT8_C( 70), UINT8_C(102), UINT8_C(246), UINT8_C(204), UINT8_C(207), UINT8_C(243), UINT8_C(234), UINT8_C( 39), UINT8_C(159), UINT8_C( 16), UINT8_C(196), UINT8_C( 55) }, { UINT8_C( 70), UINT8_C( 16), UINT8_C( 35), UINT8_C( 50), UINT8_C( 35), UINT8_C( 51), UINT8_C(123), UINT8_C(102), UINT8_C(103), UINT8_C(121), UINT8_C(117), UINT8_C( 19), UINT8_C( 79), UINT8_C( 8), UINT8_C( 98), UINT8_C( 27) }, { UINT8_C( 17), UINT8_C( 4), UINT8_C( 8), UINT8_C( 12), UINT8_C( 8), UINT8_C( 12), UINT8_C( 30), UINT8_C( 25), UINT8_C( 25), UINT8_C( 30), UINT8_C( 29), UINT8_C( 4), UINT8_C( 19), UINT8_C( 2), UINT8_C( 24), UINT8_C( 6) }, { UINT8_C( 4), UINT8_C( 1), UINT8_C( 2), UINT8_C( 3), UINT8_C( 2), UINT8_C( 3), UINT8_C( 7), UINT8_C( 6), UINT8_C( 6), UINT8_C( 7), UINT8_C( 7), UINT8_C( 1), UINT8_C( 4), UINT8_C( 0), UINT8_C( 6), UINT8_C( 1) }, { UINT8_C( 2), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 3), UINT8_C( 3), UINT8_C( 3), UINT8_C( 3), UINT8_C( 3), UINT8_C( 0), UINT8_C( 2), UINT8_C( 0), UINT8_C( 3), UINT8_C( 0) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(103), UINT8_C(213), UINT8_MAX, UINT8_C( 36), UINT8_C(191), UINT8_C( 65), UINT8_C(245), UINT8_C(130), UINT8_C( 2), UINT8_C(179), UINT8_C(199), UINT8_C(148), UINT8_C(167), UINT8_C(152), UINT8_C(221), UINT8_C( 51) }, { UINT8_C( 51), UINT8_C(106), UINT8_C(127), UINT8_C( 18), UINT8_C( 95), UINT8_C( 32), UINT8_C(122), UINT8_C( 65), UINT8_C( 1), UINT8_C( 89), UINT8_C( 99), UINT8_C( 74), UINT8_C( 83), UINT8_C( 76), UINT8_C(110), UINT8_C( 25) }, { UINT8_C( 12), UINT8_C( 26), UINT8_C( 31), UINT8_C( 4), UINT8_C( 23), UINT8_C( 8), UINT8_C( 30), UINT8_C( 16), UINT8_C( 0), UINT8_C( 22), UINT8_C( 24), UINT8_C( 18), UINT8_C( 20), UINT8_C( 19), UINT8_C( 27), UINT8_C( 6) }, { UINT8_C( 3), UINT8_C( 6), UINT8_C( 7), UINT8_C( 1), UINT8_C( 5), UINT8_C( 2), UINT8_C( 7), UINT8_C( 4), UINT8_C( 0), UINT8_C( 5), UINT8_C( 6), UINT8_C( 4), UINT8_C( 5), UINT8_C( 4), UINT8_C( 6), UINT8_C( 1) }, { UINT8_C( 1), UINT8_C( 3), UINT8_C( 3), UINT8_C( 0), UINT8_C( 2), UINT8_C( 1), UINT8_C( 3), UINT8_C( 2), UINT8_C( 0), UINT8_C( 2), UINT8_C( 3), UINT8_C( 2), UINT8_C( 2), UINT8_C( 2), UINT8_C( 3), UINT8_C( 0) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(184), UINT8_C( 36), UINT8_C(151), UINT8_MAX, UINT8_C(138), UINT8_C(142), UINT8_C(203), UINT8_C( 89), UINT8_C(129), UINT8_C(181), UINT8_C(128), UINT8_C( 32), UINT8_C(198), UINT8_C( 69), UINT8_C( 87), UINT8_C( 45) }, { UINT8_C( 92), UINT8_C( 18), UINT8_C( 75), UINT8_C(127), UINT8_C( 69), UINT8_C( 71), UINT8_C(101), UINT8_C( 44), UINT8_C( 64), UINT8_C( 90), UINT8_C( 64), UINT8_C( 16), UINT8_C( 99), UINT8_C( 34), UINT8_C( 43), UINT8_C( 22) }, { UINT8_C( 23), UINT8_C( 4), UINT8_C( 18), UINT8_C( 31), UINT8_C( 17), UINT8_C( 17), UINT8_C( 25), UINT8_C( 11), UINT8_C( 16), UINT8_C( 22), UINT8_C( 16), UINT8_C( 4), UINT8_C( 24), UINT8_C( 8), UINT8_C( 10), UINT8_C( 5) }, { UINT8_C( 5), UINT8_C( 1), UINT8_C( 4), UINT8_C( 7), UINT8_C( 4), UINT8_C( 4), UINT8_C( 6), UINT8_C( 2), UINT8_C( 4), UINT8_C( 5), UINT8_C( 4), UINT8_C( 1), UINT8_C( 6), UINT8_C( 2), UINT8_C( 2), UINT8_C( 1) }, { UINT8_C( 2), UINT8_C( 0), UINT8_C( 2), UINT8_C( 3), UINT8_C( 2), UINT8_C( 2), UINT8_C( 3), UINT8_C( 1), UINT8_C( 2), UINT8_C( 2), UINT8_C( 2), UINT8_C( 0), UINT8_C( 3), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 26), UINT8_C( 86), UINT8_C( 81), UINT8_C(218), UINT8_C(151), UINT8_C( 70), UINT8_C( 92), UINT8_C(153), UINT8_C(250), UINT8_C( 35), UINT8_C( 46), UINT8_C(161), UINT8_C(187), UINT8_C( 11), UINT8_C(212), UINT8_C(115) }, { UINT8_C( 13), UINT8_C( 43), UINT8_C( 40), UINT8_C(109), UINT8_C( 75), UINT8_C( 35), UINT8_C( 46), UINT8_C( 76), UINT8_C(125), UINT8_C( 17), UINT8_C( 23), UINT8_C( 80), UINT8_C( 93), UINT8_C( 5), UINT8_C(106), UINT8_C( 57) }, { UINT8_C( 3), UINT8_C( 10), UINT8_C( 10), UINT8_C( 27), UINT8_C( 18), UINT8_C( 8), UINT8_C( 11), UINT8_C( 19), UINT8_C( 31), UINT8_C( 4), UINT8_C( 5), UINT8_C( 20), UINT8_C( 23), UINT8_C( 1), UINT8_C( 26), UINT8_C( 14) }, { UINT8_C( 0), UINT8_C( 2), UINT8_C( 2), UINT8_C( 6), UINT8_C( 4), UINT8_C( 2), UINT8_C( 2), UINT8_C( 4), UINT8_C( 7), UINT8_C( 1), UINT8_C( 1), UINT8_C( 5), UINT8_C( 5), UINT8_C( 0), UINT8_C( 6), UINT8_C( 3) }, { UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 3), UINT8_C( 2), UINT8_C( 1), UINT8_C( 1), UINT8_C( 2), UINT8_C( 3), UINT8_C( 0), UINT8_C( 0), UINT8_C( 2), UINT8_C( 2), UINT8_C( 0), UINT8_C( 3), UINT8_C( 1) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 47), UINT8_C(107), UINT8_C(114), UINT8_C(185), UINT8_C(249), UINT8_C( 62), UINT8_C( 18), UINT8_C(122), UINT8_C(243), UINT8_C(147), UINT8_C(154), UINT8_C(185), UINT8_C(216), UINT8_C(242), UINT8_C(231), UINT8_C(242) }, { UINT8_C( 23), UINT8_C( 53), UINT8_C( 57), UINT8_C( 92), UINT8_C(124), UINT8_C( 31), UINT8_C( 9), UINT8_C( 61), UINT8_C(121), UINT8_C( 73), UINT8_C( 77), UINT8_C( 92), UINT8_C(108), UINT8_C(121), UINT8_C(115), UINT8_C(121) }, { UINT8_C( 5), UINT8_C( 13), UINT8_C( 14), UINT8_C( 23), UINT8_C( 31), UINT8_C( 7), UINT8_C( 2), UINT8_C( 15), UINT8_C( 30), UINT8_C( 18), UINT8_C( 19), UINT8_C( 23), UINT8_C( 27), UINT8_C( 30), UINT8_C( 28), UINT8_C( 30) }, { UINT8_C( 1), UINT8_C( 3), UINT8_C( 3), UINT8_C( 5), UINT8_C( 7), UINT8_C( 1), UINT8_C( 0), UINT8_C( 3), UINT8_C( 7), UINT8_C( 4), UINT8_C( 4), UINT8_C( 5), UINT8_C( 6), UINT8_C( 7), UINT8_C( 7), UINT8_C( 7) }, { UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 2), UINT8_C( 3), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 3), UINT8_C( 2), UINT8_C( 2), UINT8_C( 2), UINT8_C( 3), UINT8_C( 3), UINT8_C( 3), UINT8_C( 3) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 72), UINT8_C( 56), UINT8_C(204), UINT8_C(224), UINT8_C(127), UINT8_C( 40), UINT8_C(121), UINT8_C(121), UINT8_C( 75), UINT8_C(167), UINT8_C( 26), UINT8_C( 6), UINT8_C(179), UINT8_C(238), UINT8_C(122), UINT8_C(226) }, { UINT8_C( 36), UINT8_C( 28), UINT8_C(102), UINT8_C(112), UINT8_C( 63), UINT8_C( 20), UINT8_C( 60), UINT8_C( 60), UINT8_C( 37), UINT8_C( 83), UINT8_C( 13), UINT8_C( 3), UINT8_C( 89), UINT8_C(119), UINT8_C( 61), UINT8_C(113) }, { UINT8_C( 9), UINT8_C( 7), UINT8_C( 25), UINT8_C( 28), UINT8_C( 15), UINT8_C( 5), UINT8_C( 15), UINT8_C( 15), UINT8_C( 9), UINT8_C( 20), UINT8_C( 3), UINT8_C( 0), UINT8_C( 22), UINT8_C( 29), UINT8_C( 15), UINT8_C( 28) }, { UINT8_C( 2), UINT8_C( 1), UINT8_C( 6), UINT8_C( 7), UINT8_C( 3), UINT8_C( 1), UINT8_C( 3), UINT8_C( 3), UINT8_C( 2), UINT8_C( 5), UINT8_C( 0), UINT8_C( 0), UINT8_C( 5), UINT8_C( 7), UINT8_C( 3), UINT8_C( 7) }, { UINT8_C( 1), UINT8_C( 0), UINT8_C( 3), UINT8_C( 3), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 2), UINT8_C( 0), UINT8_C( 0), UINT8_C( 2), UINT8_C( 3), UINT8_C( 1), UINT8_C( 3) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 89), UINT8_C(236), UINT8_C(156), UINT8_C( 83), UINT8_C( 42), UINT8_C(174), UINT8_C(205), UINT8_C( 30), UINT8_C( 65), UINT8_C(104), UINT8_C(215), UINT8_C( 25), UINT8_C( 90), UINT8_C(190), UINT8_C( 12), UINT8_C(162) }, { UINT8_C( 44), UINT8_C(118), UINT8_C( 78), UINT8_C( 41), UINT8_C( 21), UINT8_C( 87), UINT8_C(102), UINT8_C( 15), UINT8_C( 32), UINT8_C( 52), UINT8_C(107), UINT8_C( 12), UINT8_C( 45), UINT8_C( 95), UINT8_C( 6), UINT8_C( 81) }, { UINT8_C( 11), UINT8_C( 29), UINT8_C( 19), UINT8_C( 10), UINT8_C( 5), UINT8_C( 21), UINT8_C( 25), UINT8_C( 3), UINT8_C( 8), UINT8_C( 13), UINT8_C( 26), UINT8_C( 3), UINT8_C( 11), UINT8_C( 23), UINT8_C( 1), UINT8_C( 20) }, { UINT8_C( 2), UINT8_C( 7), UINT8_C( 4), UINT8_C( 2), UINT8_C( 1), UINT8_C( 5), UINT8_C( 6), UINT8_C( 0), UINT8_C( 2), UINT8_C( 3), UINT8_C( 6), UINT8_C( 0), UINT8_C( 2), UINT8_C( 5), UINT8_C( 0), UINT8_C( 5) }, { UINT8_C( 1), UINT8_C( 3), UINT8_C( 2), UINT8_C( 1), UINT8_C( 0), UINT8_C( 2), UINT8_C( 3), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 3), UINT8_C( 0), UINT8_C( 1), UINT8_C( 2), UINT8_C( 0), UINT8_C( 2) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t r1 = simde_vshrq_n_u8(a, 1); simde_uint8x16_t r3 = simde_vshrq_n_u8(a, 3); simde_uint8x16_t r5 = simde_vshrq_n_u8(a, 5); simde_uint8x16_t r6 = simde_vshrq_n_u8(a, 6); simde_uint8x16_t r8 = simde_vshrq_n_u8(a, 8); simde_test_arm_neon_assert_equal_u8x16(r1, simde_vld1q_u8(test_vec[i].r1)); simde_test_arm_neon_assert_equal_u8x16(r3, simde_vld1q_u8(test_vec[i].r3)); simde_test_arm_neon_assert_equal_u8x16(r5, simde_vld1q_u8(test_vec[i].r5)); simde_test_arm_neon_assert_equal_u8x16(r6, simde_vld1q_u8(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u8x16(r8, simde_vld1q_u8(test_vec[i].r8)); } return 0; } static int test_simde_vshrq_n_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint16_t r3[8]; uint16_t r6[8]; uint16_t r10[8]; uint16_t r13[8]; uint16_t r16[8]; } test_vec[] = { { { UINT16_C(24398), UINT16_C(48722), UINT16_C(55176), UINT16_C( 3988), UINT16_C(49143), UINT16_C( 3217), UINT16_C(60719), UINT16_C(38374) }, { UINT16_C( 3049), UINT16_C( 6090), UINT16_C( 6897), UINT16_C( 498), UINT16_C( 6142), UINT16_C( 402), UINT16_C( 7589), UINT16_C( 4796) }, { UINT16_C( 381), UINT16_C( 761), UINT16_C( 862), UINT16_C( 62), UINT16_C( 767), UINT16_C( 50), UINT16_C( 948), UINT16_C( 599) }, { UINT16_C( 23), UINT16_C( 47), UINT16_C( 53), UINT16_C( 3), UINT16_C( 47), UINT16_C( 3), UINT16_C( 59), UINT16_C( 37) }, { UINT16_C( 2), UINT16_C( 5), UINT16_C( 6), UINT16_C( 0), UINT16_C( 5), UINT16_C( 0), UINT16_C( 7), UINT16_C( 4) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(65497), UINT16_C(64072), UINT16_C( 6860), UINT16_C( 4494), UINT16_C(32051), UINT16_C(16486), UINT16_C(63839), UINT16_C(44397) }, { UINT16_C( 8187), UINT16_C( 8009), UINT16_C( 857), UINT16_C( 561), UINT16_C( 4006), UINT16_C( 2060), UINT16_C( 7979), UINT16_C( 5549) }, { UINT16_C( 1023), UINT16_C( 1001), UINT16_C( 107), UINT16_C( 70), UINT16_C( 500), UINT16_C( 257), UINT16_C( 997), UINT16_C( 693) }, { UINT16_C( 63), UINT16_C( 62), UINT16_C( 6), UINT16_C( 4), UINT16_C( 31), UINT16_C( 16), UINT16_C( 62), UINT16_C( 43) }, { UINT16_C( 7), UINT16_C( 7), UINT16_C( 0), UINT16_C( 0), UINT16_C( 3), UINT16_C( 2), UINT16_C( 7), UINT16_C( 5) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(48984), UINT16_C(57451), UINT16_C( 151), UINT16_C(36591), UINT16_C(32959), UINT16_C(61338), UINT16_C(32877), UINT16_C(18052) }, { UINT16_C( 6123), UINT16_C( 7181), UINT16_C( 18), UINT16_C( 4573), UINT16_C( 4119), UINT16_C( 7667), UINT16_C( 4109), UINT16_C( 2256) }, { UINT16_C( 765), UINT16_C( 897), UINT16_C( 2), UINT16_C( 571), UINT16_C( 514), UINT16_C( 958), UINT16_C( 513), UINT16_C( 282) }, { UINT16_C( 47), UINT16_C( 56), UINT16_C( 0), UINT16_C( 35), UINT16_C( 32), UINT16_C( 59), UINT16_C( 32), UINT16_C( 17) }, { UINT16_C( 5), UINT16_C( 7), UINT16_C( 0), UINT16_C( 4), UINT16_C( 4), UINT16_C( 7), UINT16_C( 4), UINT16_C( 2) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(52351), UINT16_C(19264), UINT16_C(52966), UINT16_C( 6493), UINT16_C(49995), UINT16_C(43865), UINT16_C(50876), UINT16_C( 5208) }, { UINT16_C( 6543), UINT16_C( 2408), UINT16_C( 6620), UINT16_C( 811), UINT16_C( 6249), UINT16_C( 5483), UINT16_C( 6359), UINT16_C( 651) }, { UINT16_C( 817), UINT16_C( 301), UINT16_C( 827), UINT16_C( 101), UINT16_C( 781), UINT16_C( 685), UINT16_C( 794), UINT16_C( 81) }, { UINT16_C( 51), UINT16_C( 18), UINT16_C( 51), UINT16_C( 6), UINT16_C( 48), UINT16_C( 42), UINT16_C( 49), UINT16_C( 5) }, { UINT16_C( 6), UINT16_C( 2), UINT16_C( 6), UINT16_C( 0), UINT16_C( 6), UINT16_C( 5), UINT16_C( 6), UINT16_C( 0) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(50310), UINT16_C( 7668), UINT16_C(58308), UINT16_C(33707), UINT16_C(17763), UINT16_C(53362), UINT16_C(63173), UINT16_C(17686) }, { UINT16_C( 6288), UINT16_C( 958), UINT16_C( 7288), UINT16_C( 4213), UINT16_C( 2220), UINT16_C( 6670), UINT16_C( 7896), UINT16_C( 2210) }, { UINT16_C( 786), UINT16_C( 119), UINT16_C( 911), UINT16_C( 526), UINT16_C( 277), UINT16_C( 833), UINT16_C( 987), UINT16_C( 276) }, { UINT16_C( 49), UINT16_C( 7), UINT16_C( 56), UINT16_C( 32), UINT16_C( 17), UINT16_C( 52), UINT16_C( 61), UINT16_C( 17) }, { UINT16_C( 6), UINT16_C( 0), UINT16_C( 7), UINT16_C( 4), UINT16_C( 2), UINT16_C( 6), UINT16_C( 7), UINT16_C( 2) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(22211), UINT16_C(43408), UINT16_C(60708), UINT16_C(28867), UINT16_C( 7345), UINT16_C(27931), UINT16_C(29667), UINT16_C(27010) }, { UINT16_C( 2776), UINT16_C( 5426), UINT16_C( 7588), UINT16_C( 3608), UINT16_C( 918), UINT16_C( 3491), UINT16_C( 3708), UINT16_C( 3376) }, { UINT16_C( 347), UINT16_C( 678), UINT16_C( 948), UINT16_C( 451), UINT16_C( 114), UINT16_C( 436), UINT16_C( 463), UINT16_C( 422) }, { UINT16_C( 21), UINT16_C( 42), UINT16_C( 59), UINT16_C( 28), UINT16_C( 7), UINT16_C( 27), UINT16_C( 28), UINT16_C( 26) }, { UINT16_C( 2), UINT16_C( 5), UINT16_C( 7), UINT16_C( 3), UINT16_C( 0), UINT16_C( 3), UINT16_C( 3), UINT16_C( 3) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(30263), UINT16_C(64390), UINT16_C(12634), UINT16_C(48511), UINT16_C(61814), UINT16_C(15246), UINT16_C(42216), UINT16_C(43904) }, { UINT16_C( 3782), UINT16_C( 8048), UINT16_C( 1579), UINT16_C( 6063), UINT16_C( 7726), UINT16_C( 1905), UINT16_C( 5277), UINT16_C( 5488) }, { UINT16_C( 472), UINT16_C( 1006), UINT16_C( 197), UINT16_C( 757), UINT16_C( 965), UINT16_C( 238), UINT16_C( 659), UINT16_C( 686) }, { UINT16_C( 29), UINT16_C( 62), UINT16_C( 12), UINT16_C( 47), UINT16_C( 60), UINT16_C( 14), UINT16_C( 41), UINT16_C( 42) }, { UINT16_C( 3), UINT16_C( 7), UINT16_C( 1), UINT16_C( 5), UINT16_C( 7), UINT16_C( 1), UINT16_C( 5), UINT16_C( 5) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C( 4603), UINT16_C( 8020), UINT16_C( 6142), UINT16_C(44943), UINT16_C(43572), UINT16_C( 5917), UINT16_C(40734), UINT16_C(21888) }, { UINT16_C( 575), UINT16_C( 1002), UINT16_C( 767), UINT16_C( 5617), UINT16_C( 5446), UINT16_C( 739), UINT16_C( 5091), UINT16_C( 2736) }, { UINT16_C( 71), UINT16_C( 125), UINT16_C( 95), UINT16_C( 702), UINT16_C( 680), UINT16_C( 92), UINT16_C( 636), UINT16_C( 342) }, { UINT16_C( 4), UINT16_C( 7), UINT16_C( 5), UINT16_C( 43), UINT16_C( 42), UINT16_C( 5), UINT16_C( 39), UINT16_C( 21) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 5), UINT16_C( 5), UINT16_C( 0), UINT16_C( 4), UINT16_C( 2) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t r3 = simde_vshrq_n_u16(a, 3); simde_uint16x8_t r6 = simde_vshrq_n_u16(a, 6); simde_uint16x8_t r10 = simde_vshrq_n_u16(a, 10); simde_uint16x8_t r13 = simde_vshrq_n_u16(a, 13); simde_uint16x8_t r16 = simde_vshrq_n_u16(a, 16); simde_test_arm_neon_assert_equal_u16x8(r3, simde_vld1q_u16(test_vec[i].r3)); simde_test_arm_neon_assert_equal_u16x8(r6, simde_vld1q_u16(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u16x8(r10, simde_vld1q_u16(test_vec[i].r10)); simde_test_arm_neon_assert_equal_u16x8(r13, simde_vld1q_u16(test_vec[i].r13)); simde_test_arm_neon_assert_equal_u16x8(r16, simde_vld1q_u16(test_vec[i].r16)); } return 0; } static int test_simde_vshrq_n_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint32_t r6[4]; uint32_t r13[4]; uint32_t r19[4]; uint32_t r26[4]; uint32_t r32[4]; } test_vec[] = { { { UINT32_C(1301916297), UINT32_C(2359195108), UINT32_C(3209238876), UINT32_C(3414373642) }, { UINT32_C( 20342442), UINT32_C( 36862423), UINT32_C( 50144357), UINT32_C( 53349588) }, { UINT32_C( 158925), UINT32_C( 287987), UINT32_C( 391752), UINT32_C( 416793) }, { UINT32_C( 2483), UINT32_C( 4499), UINT32_C( 6121), UINT32_C( 6512) }, { UINT32_C( 19), UINT32_C( 35), UINT32_C( 47), UINT32_C( 50) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C( 841976147), UINT32_C( 375262057), UINT32_C(4032949437), UINT32_C(2942361382) }, { UINT32_C( 13155877), UINT32_C( 5863469), UINT32_C( 63014834), UINT32_C( 45974396) }, { UINT32_C( 102780), UINT32_C( 45808), UINT32_C( 492303), UINT32_C( 359174) }, { UINT32_C( 1605), UINT32_C( 715), UINT32_C( 7692), UINT32_C( 5612) }, { UINT32_C( 12), UINT32_C( 5), UINT32_C( 60), UINT32_C( 43) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(1862072714), UINT32_C(3405421166), UINT32_C(3196666803), UINT32_C(3414756472) }, { UINT32_C( 29094886), UINT32_C( 53209705), UINT32_C( 49947918), UINT32_C( 53355569) }, { UINT32_C( 227303), UINT32_C( 415700), UINT32_C( 390218), UINT32_C( 416840) }, { UINT32_C( 3551), UINT32_C( 6495), UINT32_C( 6097), UINT32_C( 6513) }, { UINT32_C( 27), UINT32_C( 50), UINT32_C( 47), UINT32_C( 50) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(4294883734), UINT32_C(2165660868), UINT32_C(1852995144), UINT32_C(3743339093) }, { UINT32_C( 67107558), UINT32_C( 33838451), UINT32_C( 28953049), UINT32_C( 58489673) }, { UINT32_C( 524277), UINT32_C( 264362), UINT32_C( 226195), UINT32_C( 456950) }, { UINT32_C( 8191), UINT32_C( 4130), UINT32_C( 3534), UINT32_C( 7139) }, { UINT32_C( 63), UINT32_C( 32), UINT32_C( 27), UINT32_C( 55) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C( 961354443), UINT32_C(1745111221), UINT32_C( 69635467), UINT32_C( 818917530) }, { UINT32_C( 15021163), UINT32_C( 27267362), UINT32_C( 1088054), UINT32_C( 12795586) }, { UINT32_C( 117352), UINT32_C( 213026), UINT32_C( 8500), UINT32_C( 99965) }, { UINT32_C( 1833), UINT32_C( 3328), UINT32_C( 132), UINT32_C( 1561) }, { UINT32_C( 14), UINT32_C( 26), UINT32_C( 1), UINT32_C( 12) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C( 758107497), UINT32_C(1924023338), UINT32_C( 266412218), UINT32_C(3186622450) }, { UINT32_C( 11845429), UINT32_C( 30062864), UINT32_C( 4162690), UINT32_C( 49790975) }, { UINT32_C( 92542), UINT32_C( 234866), UINT32_C( 32521), UINT32_C( 388991) }, { UINT32_C( 1445), UINT32_C( 3669), UINT32_C( 508), UINT32_C( 6077) }, { UINT32_C( 11), UINT32_C( 28), UINT32_C( 3), UINT32_C( 47) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(3472309273), UINT32_C( 272104324), UINT32_C( 571760008), UINT32_C(1985143565) }, { UINT32_C( 54254832), UINT32_C( 4251630), UINT32_C( 8933750), UINT32_C( 31017868) }, { UINT32_C( 423865), UINT32_C( 33215), UINT32_C( 69794), UINT32_C( 242327) }, { UINT32_C( 6622), UINT32_C( 518), UINT32_C( 1090), UINT32_C( 3786) }, { UINT32_C( 51), UINT32_C( 4), UINT32_C( 8), UINT32_C( 29) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(3684925873), UINT32_C(2135773893), UINT32_C(1703882354), UINT32_C(1193442861) }, { UINT32_C( 57576966), UINT32_C( 33371467), UINT32_C( 26623161), UINT32_C( 18647544) }, { UINT32_C( 449820), UINT32_C( 260714), UINT32_C( 207993), UINT32_C( 145683) }, { UINT32_C( 7028), UINT32_C( 4073), UINT32_C( 3249), UINT32_C( 2276) }, { UINT32_C( 54), UINT32_C( 31), UINT32_C( 25), UINT32_C( 17) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t r6 = simde_vshrq_n_u32(a, 6); simde_uint32x4_t r13 = simde_vshrq_n_u32(a, 13); simde_uint32x4_t r19 = simde_vshrq_n_u32(a, 19); simde_uint32x4_t r26 = simde_vshrq_n_u32(a, 26); simde_uint32x4_t r32 = simde_vshrq_n_u32(a, 32); simde_test_arm_neon_assert_equal_u32x4(r6, simde_vld1q_u32(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u32x4(r13, simde_vld1q_u32(test_vec[i].r13)); simde_test_arm_neon_assert_equal_u32x4(r19, simde_vld1q_u32(test_vec[i].r19)); simde_test_arm_neon_assert_equal_u32x4(r26, simde_vld1q_u32(test_vec[i].r26)); simde_test_arm_neon_assert_equal_u32x4(r32, simde_vld1q_u32(test_vec[i].r32)); } return 0; } static int test_simde_vshrq_n_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; uint64_t r13[2]; uint64_t r26[2]; uint64_t r39[2]; uint64_t r52[2]; uint64_t r64[2]; } test_vec[] = { { { UINT64_C( 4114336365591682096), UINT64_C(10892621211583185910) }, { UINT64_C( 502238325877890), UINT64_C( 1329665675242088) }, { UINT64_C( 61308389389), UINT64_C( 162312704497) }, { UINT64_C( 7483934), UINT64_C( 19813562) }, { UINT64_C( 913), UINT64_C( 2418) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C( 62757455042096864), UINT64_C( 2063308251897106318) }, { UINT64_C( 7660822148693), UINT64_C( 251868683092908) }, { UINT64_C( 935158953), UINT64_C( 30745688854) }, { UINT64_C( 114155), UINT64_C( 3753135) }, { UINT64_C( 13), UINT64_C( 458) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C( 4039001276083680180), UINT64_C(10902133567454599789) }, { UINT64_C( 493042147959433), UINT64_C( 1330826851495922) }, { UINT64_C( 60185809077), UINT64_C( 162454449645) }, { UINT64_C( 7346900), UINT64_C( 19830865) }, { UINT64_C( 896), UINT64_C( 2420) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C(11054679070670198487), UINT64_C(16912305462898253740) }, { UINT64_C( 1349448128743920), UINT64_C( 2064490412951446) }, { UINT64_C( 164727554778), UINT64_C( 252012989862) }, { UINT64_C( 20108344), UINT64_C( 30763304) }, { UINT64_C( 2454), UINT64_C( 3755) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C(15943530412416828871), UINT64_C( 4572805538656698406) }, { UINT64_C( 1946231739796976), UINT64_C( 558203801105554) }, { UINT64_C( 237577116674), UINT64_C( 68140112439) }, { UINT64_C( 29001112), UINT64_C( 8317884) }, { UINT64_C( 3540), UINT64_C( 1015) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C( 8978229116225041396), UINT64_C(16650872287755799538) }, { UINT64_C( 1095975233914189), UINT64_C( 2032577183563940) }, { UINT64_C( 133786039296), UINT64_C( 248117331978) }, { UINT64_C( 16331303), UINT64_C( 30287760) }, { UINT64_C( 1993), UINT64_C( 3697) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C(14130945006247203277), UINT64_C( 9230997754973695347) }, { UINT64_C( 1724968872832910), UINT64_C( 1126830780636437) }, { UINT64_C( 210567489359), UINT64_C( 137552585526) }, { UINT64_C( 25704039), UINT64_C( 16791087) }, { UINT64_C( 3137), UINT64_C( 2049) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C(14490079839417058507), UINT64_C(13883758671297588133) }, { UINT64_C( 1768808574147590), UINT64_C( 1694794759679881) }, { UINT64_C( 215919015398), UINT64_C( 206884125937) }, { UINT64_C( 26357301), UINT64_C( 25254409) }, { UINT64_C( 3217), UINT64_C( 3082) }, { UINT64_C( 0), UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t r13 = simde_vshrq_n_u64(a, 13); simde_uint64x2_t r26 = simde_vshrq_n_u64(a, 26); simde_uint64x2_t r39 = simde_vshrq_n_u64(a, 39); simde_uint64x2_t r52 = simde_vshrq_n_u64(a, 52); simde_uint64x2_t r64 = simde_vshrq_n_u64(a, 64); simde_test_arm_neon_assert_equal_u64x2(r13, simde_vld1q_u64(test_vec[i].r13)); simde_test_arm_neon_assert_equal_u64x2(r26, simde_vld1q_u64(test_vec[i].r26)); simde_test_arm_neon_assert_equal_u64x2(r39, simde_vld1q_u64(test_vec[i].r39)); simde_test_arm_neon_assert_equal_u64x2(r52, simde_vld1q_u64(test_vec[i].r52)); simde_test_arm_neon_assert_equal_u64x2(r64, simde_vld1q_u64(test_vec[i].r64)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vshr_n_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vshr_n_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vshr_n_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vshr_n_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vshr_n_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vshr_n_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vshr_n_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vshr_n_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vshrq_n_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vshrq_n_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vshrq_n_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vshrq_n_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vshrq_n_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vshrq_n_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vshrq_n_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vshrq_n_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/skel-single.c000066400000000000000000000373671400333146700177200ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN xxx #include "test-neon.h" #include "../../../simde/arm/neon/xxx.h" static int test_simde_vxxx_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { simde_float32 a[2]; simde_float32 r[2]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t r = simde_vxxx_f32(a); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vxxx_f32(a); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxx_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { simde_float64 a[1]; simde_float64 r[1]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_t r = simde_vxxx_f64(a); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x1_t a = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_float64x1_t r = simde_vxxx_f64(a); simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxx_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { int8_t a[8]; int8_t r[8]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t r = simde_vxxx_s8(a); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8_t r = simde_vxxx_s8(a); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxx_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { int16_t a[4]; int16_t r[4]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t r = simde_vxxx_s16(a); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); simde_int16x4_t r = simde_vxxx_s16(a); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxx_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { int32_t a[2]; int32_t r[2]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t r = simde_vxxx_s32(a); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_int32x2_t r = simde_vxxx_s32(a); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxx_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { int64_t a[1]; int64_t r[1]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t r = simde_vxxx_s64(a); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_t a = simde_test_arm_neon_random_i64x1(); simde_int64x1_t r = simde_vxxx_s64(a); simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxx_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { uint8_t a[8]; uint8_t r[8]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t r = simde_vxxx_u8(a); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t r = simde_vxxx_u8(a); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxx_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { uint16_t a[4]; uint16_t r[4]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t r = simde_vxxx_u16(a); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t r = simde_vxxx_u16(a); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxx_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { uint32_t a[2]; uint32_t r[2]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t r = simde_vxxx_u32(a); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t r = simde_vxxx_u32(a); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxx_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { uint64_t a[1]; uint64_t r[1]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_t r = simde_vxxx_u64(a); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x1_t a = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t r = simde_vxxx_u64(a); simde_test_arm_neon_write_u64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxxq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { simde_float32 a[4]; simde_float32 r[4]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t r = simde_vxxxq_f32(a); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vxxxq_f32(a); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxxq_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { simde_float64 a[2]; simde_float64 r[2]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t r = simde_vxxxq_f64(a); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x2_t a = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); simde_float64x2_t r = simde_vxxxq_f64(a); simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxxq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { int8_t a[16]; int8_t r[16]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t r = simde_vxxxq_s8(a); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); simde_int8x16_t r = simde_vxxxq_s8(a); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxxq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { int16_t a[8]; int16_t r[8]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t r = simde_vxxxq_s16(a); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int16x8_t r = simde_vxxxq_s16(a); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxxq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { int32_t a[4]; int32_t r[4]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t r = simde_vxxxq_s32(a); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int32x4_t r = simde_vxxxq_s32(a); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxxq_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { int64_t a[2]; int64_t r[2]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t r = simde_vxxxq_s64(a); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); simde_int64x2_t r = simde_vxxxq_s64(a); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxxq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { uint8_t a[16]; uint8_t r[16]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t r = simde_vxxxq_u8(a); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t r = simde_vxxxq_u8(a); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxxq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { uint16_t a[8]; uint16_t r[8]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t r = simde_vxxxq_u16(a); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t r = simde_vxxxq_u16(a); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxxq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { uint32_t a[4]; uint32_t r[4]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t r = simde_vxxxq_u32(a); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t r = simde_vxxxq_u32(a); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxxq_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { uint64_t a[2]; uint64_t r[2]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t r = simde_vxxxq_u64(a); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); simde_uint64x2_t r = simde_vxxxq_u64(a); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vxxx_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vxxx_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vxxx_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vxxx_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vxxx_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vxxx_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vxxx_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vxxx_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vxxx_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vxxx_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vxxxq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vxxxq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vxxxq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vxxxq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vxxxq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vxxxq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vxxxq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vxxxq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vxxxq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vxxxq_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/skel-triple.c000066400000000000000000000602271400333146700177250ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN xxx #include "test-neon.h" #include "../../../simde/arm/neon/xxx.h" static int test_simde_vxxx_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { simde_float32 a[2]; simde_float32 b[2]; simde_float32 c[2]; simde_float32 r[2]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x2_t c = simde_vld1_f32(test_vec[i].c); simde_float32x2_t r = simde_vxxx_f32(a, b, c); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t c = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vxxx_f32(a, b, c); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxx_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { simde_float64 a[1]; simde_float64 b[1]; simde_float64 c[1]; simde_float64 r[1]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_t b = simde_vld1_f64(test_vec[i].b); simde_float64x1_t c = simde_vld1_f64(test_vec[i].c); simde_float64x1_t r = simde_vxxx_f64(a, b, c); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x1_t a = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_float64x1_t b = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_float64x1_t c = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_float64x1_t r = simde_vxxx_f64(a, b, c); simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x1(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxx_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { int8_t a[8]; int8_t b[8]; int8_t c[8]; int8_t r[8]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t c = simde_vld1_s8(test_vec[i].c); simde_int8x8_t r = simde_vxxx_s8(a, b, c); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); simde_int8x8_t c = simde_test_arm_neon_random_i8x8(); simde_int8x8_t r = simde_vxxx_s8(a, b, c); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxx_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { int16_t a[4]; int16_t b[4]; int16_t c[4]; int16_t r[4]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t c = simde_vld1_s16(test_vec[i].c); simde_int16x4_t r = simde_vxxx_s16(a, b, c); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); simde_int16x4_t c = simde_test_arm_neon_random_i16x4(); simde_int16x4_t r = simde_vxxx_s16(a, b, c); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxx_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { int32_t a[2]; int32_t b[2]; int32_t c[2]; int32_t r[2]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t c = simde_vld1_s32(test_vec[i].c); simde_int32x2_t r = simde_vxxx_s32(a, b, c); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); simde_int32x2_t c = simde_test_arm_neon_random_i32x2(); simde_int32x2_t r = simde_vxxx_s32(a, b, c); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxx_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { int64_t a[1]; int64_t b[1]; int64_t c[1]; int64_t r[1]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_int64x1_t c = simde_vld1_s64(test_vec[i].c); simde_int64x1_t r = simde_vxxx_s64(a, b, c); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_t a = simde_test_arm_neon_random_i64x1(); simde_int64x1_t b = simde_test_arm_neon_random_i64x1(); simde_int64x1_t c = simde_test_arm_neon_random_i64x1(); simde_int64x1_t r = simde_vxxx_s64(a, b, c); simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x1(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxx_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { uint8_t a[8]; uint8_t b[8]; uint8_t c[8]; uint8_t r[8]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t c = simde_vld1_u8(test_vec[i].c); simde_uint8x8_t r = simde_vxxx_u8(a, b, c); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t c = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t r = simde_vxxx_u8(a, b, c); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxx_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { uint16_t a[4]; uint16_t b[4]; uint16_t c[4]; uint16_t r[4]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t c = simde_vld1_u16(test_vec[i].c); simde_uint16x4_t r = simde_vxxx_u16(a, b, c); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t c = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t r = simde_vxxx_u16(a, b, c); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxx_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { uint32_t a[2]; uint32_t b[2]; uint32_t c[2]; uint32_t r[2]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t c = simde_vld1_u32(test_vec[i].c); simde_uint32x2_t r = simde_vxxx_u32(a, b, c); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t c = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t r = simde_vxxx_u32(a, b, c); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxx_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { uint64_t a[1]; uint64_t b[1]; uint64_t c[1]; uint64_t r[1]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_t b = simde_vld1_u64(test_vec[i].b); simde_uint64x1_t c = simde_vld1_u64(test_vec[i].c); simde_uint64x1_t r = simde_vxxx_u64(a, b, c); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x1_t a = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t b = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t c = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t r = simde_vxxx_u64(a, b, c); simde_test_arm_neon_write_u64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxxq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { simde_float32 a[4]; simde_float32 b[4]; simde_float32 c[4]; simde_float32 r[4]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t c = simde_vld1q_f32(test_vec[i].c); simde_float32x4_t r = simde_vxxxq_f32(a, b, c); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t c = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vxxxq_f32(a, b, c); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxxq_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { simde_float64 a[2]; simde_float64 b[2]; simde_float64 c[2]; simde_float64 r[2]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_float64x2_t c = simde_vld1q_f64(test_vec[i].c); simde_float64x2_t r = simde_vxxxq_f64(a, b, c); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x2_t a = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); simde_float64x2_t b = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); simde_float64x2_t c = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); simde_float64x2_t r = simde_vxxxq_f64(a, b, c); simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x2(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxxq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { int8_t a[16]; int8_t b[16]; int8_t c[16]; int8_t r[16]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t c = simde_vld1q_s8(test_vec[i].c); simde_int8x16_t r = simde_vxxxq_s8(a, b, c); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); simde_int8x16_t c = simde_test_arm_neon_random_i8x16(); simde_int8x16_t r = simde_vxxxq_s8(a, b, c); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxxq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { int16_t a[8]; int16_t b[8]; int16_t c[8]; int16_t r[8]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t c = simde_vld1q_s16(test_vec[i].c); simde_int16x8_t r = simde_vxxxq_s16(a, b, c); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); simde_int16x8_t c = simde_test_arm_neon_random_i16x8(); simde_int16x8_t r = simde_vxxxq_s16(a, b, c); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxxq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { int32_t a[4]; int32_t b[4]; int32_t c[4]; int32_t r[4]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t c = simde_vld1q_s32(test_vec[i].c); simde_int32x4_t r = simde_vxxxq_s32(a, b, c); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); simde_int32x4_t c = simde_test_arm_neon_random_i32x4(); simde_int32x4_t r = simde_vxxxq_s32(a, b, c); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxxq_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { int64_t a[2]; int64_t b[2]; int64_t c[2]; int64_t r[2]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_int64x2_t c = simde_vld1q_s64(test_vec[i].c); simde_int64x2_t r = simde_vxxxq_s64(a, b, c); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); simde_int64x2_t b = simde_test_arm_neon_random_i64x2(); simde_int64x2_t c = simde_test_arm_neon_random_i64x2(); simde_int64x2_t r = simde_vxxxq_s64(a, b, c); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxxq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { uint8_t a[16]; uint8_t b[16]; uint8_t c[16]; uint8_t r[16]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t c = simde_vld1q_u8(test_vec[i].c); simde_uint8x16_t r = simde_vxxxq_u8(a, b, c); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t c = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t r = simde_vxxxq_u8(a, b, c); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxxq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { uint16_t a[8]; uint16_t b[8]; uint16_t c[8]; uint16_t r[8]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t c = simde_vld1q_u16(test_vec[i].c); simde_uint16x8_t r = simde_vxxxq_u16(a, b, c); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t c = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t r = simde_vxxxq_u16(a, b, c); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxxq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { uint32_t a[4]; uint32_t b[4]; uint32_t c[4]; uint32_t r[4]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t c = simde_vld1q_u32(test_vec[i].c); simde_uint32x4_t r = simde_vxxxq_u32(a, b, c); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t c = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t r = simde_vxxxq_u32(a, b, c); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxxq_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { uint64_t a[2]; uint64_t b[2]; uint64_t c[2]; uint64_t r[2]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_uint64x2_t c = simde_vld1q_u64(test_vec[i].c); simde_uint64x2_t r = simde_vxxxq_u64(a, b, c); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); simde_uint64x2_t b = simde_test_arm_neon_random_u64x2(); simde_uint64x2_t c = simde_test_arm_neon_random_u64x2(); simde_uint64x2_t r = simde_vxxxq_u64(a, b, c); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vxxx_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vxxx_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vxxx_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vxxx_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vxxx_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vxxx_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vxxx_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vxxx_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vxxx_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vxxx_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vxxxq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vxxxq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vxxxq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vxxxq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vxxxq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vxxxq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vxxxq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vxxxq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vxxxq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vxxxq_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/skel.c000066400000000000000000000477071400333146700164400ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN xxx #include "test-neon.h" #include "../../../simde/arm/neon/xxx.h" static int test_simde_vxxx_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { simde_float32 a[2]; simde_float32 b[2]; simde_float32 r[2]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x2_t r = simde_vxxx_f32(a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t r = simde_vxxx_f32(a, b); simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxx_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { simde_float64 a[1]; simde_float64 b[1]; simde_float64 r[1]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_t b = simde_vld1_f64(test_vec[i].b); simde_float64x1_t r = simde_vxxx_f64(a, b); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x1_t a = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_float64x1_t b = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_float64x1_t r = simde_vxxx_f64(a, b); simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxx_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { int8_t a[8]; int8_t b[8]; int8_t r[8]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vxxx_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); simde_int8x8_t r = simde_vxxx_s8(a, b); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxx_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_vxxx_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); simde_int16x4_t r = simde_vxxx_s16(a, b); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxx_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_vxxx_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); simde_int32x2_t r = simde_vxxx_s32(a, b); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxx_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { int64_t a[1]; int64_t b[1]; int64_t r[1]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_int64x1_t r = simde_vxxx_s64(a, b); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_t a = simde_test_arm_neon_random_i64x1(); simde_int64x1_t b = simde_test_arm_neon_random_i64x1(); simde_int64x1_t r = simde_vxxx_s64(a, b); simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxx_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { uint8_t a[8]; uint8_t b[8]; uint8_t r[8]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vxxx_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t r = simde_vxxx_u8(a, b); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxx_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vxxx_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t r = simde_vxxx_u16(a, b); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxx_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vxxx_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t r = simde_vxxx_u32(a, b); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxx_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { uint64_t a[1]; uint64_t b[1]; uint64_t r[1]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_t b = simde_vld1_u64(test_vec[i].b); simde_uint64x1_t r = simde_vxxx_u64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x1_t a = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t b = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t r = simde_vxxx_u64(a, b); simde_test_arm_neon_write_u64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxxq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { simde_float32 a[4]; simde_float32 b[4]; simde_float32 r[4]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r = simde_vxxxq_f32(a, b); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t r = simde_vxxxq_f32(a, b); simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxxq_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { simde_float64 a[2]; simde_float64 b[2]; simde_float64 r[2]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_float64x2_t r = simde_vxxxq_f64(a, b); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x2_t a = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); simde_float64x2_t b = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); simde_float64x2_t r = simde_vxxxq_f64(a, b); simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxxq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { int8_t a[16]; int8_t b[16]; int8_t r[16]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r = simde_vxxxq_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); simde_int8x16_t r = simde_vxxxq_s8(a, b); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxxq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_vxxxq_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); simde_int16x8_t r = simde_vxxxq_s16(a, b); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxxq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_vxxxq_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); simde_int32x4_t r = simde_vxxxq_s32(a, b); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxxq_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { int64_t a[2]; int64_t b[2]; int64_t r[2]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_int64x2_t r = simde_vxxxq_s64(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); simde_int64x2_t b = simde_test_arm_neon_random_i64x2(); simde_int64x2_t r = simde_vxxxq_s64(a, b); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxxq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vxxxq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t r = simde_vxxxq_u8(a, b); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxxq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vxxxq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t r = simde_vxxxq_u16(a, b); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxxq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vxxxq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t r = simde_vxxxq_u32(a, b); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vxxxq_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { uint64_t a[2]; uint64_t b[2]; uint64_t r[2]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_uint64x2_t r = simde_vxxxq_u64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); simde_uint64x2_t b = simde_test_arm_neon_random_u64x2(); simde_uint64x2_t r = simde_vxxxq_u64(a, b); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vxxx_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vxxx_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vxxx_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vxxx_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vxxx_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vxxx_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vxxx_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vxxx_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vxxx_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vxxx_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vxxxq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vxxxq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vxxxq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vxxxq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vxxxq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vxxxq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vxxxq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vxxxq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vxxxq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vxxxq_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/sra_n.c000066400000000000000000003454241400333146700166010ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN sra_n #include #include static int test_simde_vsra_n_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int8_t b[8]; int8_t r1[8]; int8_t r3[8]; int8_t r5[8]; int8_t r6[8]; int8_t r8[8]; } test_vec[] = { { { -INT8_C( 110), -INT8_C( 96), -INT8_C( 68), INT8_C( 99), INT8_C( 115), INT8_C( 79), INT8_C( 90), -INT8_C( 125) }, { INT8_C( 44), INT8_C( 15), INT8_C( 85), INT8_C( 89), INT8_C( 6), -INT8_C( 90), -INT8_C( 51), -INT8_C( 80) }, { -INT8_C( 88), -INT8_C( 89), -INT8_C( 26), -INT8_C( 113), INT8_C( 118), INT8_C( 34), INT8_C( 64), INT8_C( 91) }, { -INT8_C( 105), -INT8_C( 95), -INT8_C( 58), INT8_C( 110), INT8_C( 115), INT8_C( 67), INT8_C( 83), INT8_C( 121) }, { -INT8_C( 109), -INT8_C( 96), -INT8_C( 66), INT8_C( 101), INT8_C( 115), INT8_C( 76), INT8_C( 88), INT8_MIN }, { -INT8_C( 110), -INT8_C( 96), -INT8_C( 67), INT8_C( 100), INT8_C( 115), INT8_C( 77), INT8_C( 89), -INT8_C( 127) }, { -INT8_C( 110), -INT8_C( 96), -INT8_C( 68), INT8_C( 99), INT8_C( 115), INT8_C( 78), INT8_C( 89), -INT8_C( 126) } }, { { INT8_C( 22), -INT8_C( 35), INT8_C( 93), -INT8_C( 94), -INT8_C( 68), INT8_C( 118), INT8_C( 60), -INT8_C( 98) }, { -INT8_C( 104), INT8_C( 7), INT8_C( 42), INT8_C( 107), -INT8_C( 70), INT8_C( 83), -INT8_C( 72), INT8_C( 76) }, { -INT8_C( 30), -INT8_C( 32), INT8_C( 114), -INT8_C( 41), -INT8_C( 103), -INT8_C( 97), INT8_C( 24), -INT8_C( 60) }, { INT8_C( 9), -INT8_C( 35), INT8_C( 98), -INT8_C( 81), -INT8_C( 77), INT8_MIN, INT8_C( 51), -INT8_C( 89) }, { INT8_C( 18), -INT8_C( 35), INT8_C( 94), -INT8_C( 91), -INT8_C( 71), INT8_C( 120), INT8_C( 57), -INT8_C( 96) }, { INT8_C( 20), -INT8_C( 35), INT8_C( 93), -INT8_C( 93), -INT8_C( 70), INT8_C( 119), INT8_C( 58), -INT8_C( 97) }, { INT8_C( 21), -INT8_C( 35), INT8_C( 93), -INT8_C( 94), -INT8_C( 69), INT8_C( 118), INT8_C( 59), -INT8_C( 98) } }, { { -INT8_C( 13), INT8_C( 116), -INT8_C( 81), INT8_C( 102), -INT8_C( 60), INT8_C( 10), -INT8_C( 23), -INT8_C( 16) }, { INT8_C( 25), INT8_C( 63), INT8_C( 74), INT8_C( 31), -INT8_C( 27), INT8_C( 23), -INT8_C( 49), -INT8_C( 4) }, { -INT8_C( 1), -INT8_C( 109), -INT8_C( 44), INT8_C( 117), -INT8_C( 74), INT8_C( 21), -INT8_C( 48), -INT8_C( 18) }, { -INT8_C( 10), INT8_C( 123), -INT8_C( 72), INT8_C( 105), -INT8_C( 64), INT8_C( 12), -INT8_C( 30), -INT8_C( 17) }, { -INT8_C( 13), INT8_C( 117), -INT8_C( 79), INT8_C( 102), -INT8_C( 61), INT8_C( 10), -INT8_C( 25), -INT8_C( 17) }, { -INT8_C( 13), INT8_C( 116), -INT8_C( 80), INT8_C( 102), -INT8_C( 61), INT8_C( 10), -INT8_C( 24), -INT8_C( 17) }, { -INT8_C( 13), INT8_C( 116), -INT8_C( 81), INT8_C( 102), -INT8_C( 61), INT8_C( 10), -INT8_C( 24), -INT8_C( 17) } }, { { -INT8_C( 12), INT8_C( 44), -INT8_C( 98), -INT8_C( 80), -INT8_C( 93), -INT8_C( 38), INT8_C( 79), INT8_C( 59) }, { -INT8_C( 31), INT8_C( 121), -INT8_C( 90), -INT8_C( 100), -INT8_C( 52), INT8_C( 95), -INT8_C( 24), -INT8_C( 64) }, { -INT8_C( 28), INT8_C( 104), INT8_C( 113), INT8_C( 126), -INT8_C( 119), INT8_C( 9), INT8_C( 67), INT8_C( 27) }, { -INT8_C( 16), INT8_C( 59), -INT8_C( 110), -INT8_C( 93), -INT8_C( 100), -INT8_C( 27), INT8_C( 76), INT8_C( 51) }, { -INT8_C( 13), INT8_C( 47), -INT8_C( 101), -INT8_C( 84), -INT8_C( 95), -INT8_C( 36), INT8_C( 78), INT8_C( 57) }, { -INT8_C( 13), INT8_C( 45), -INT8_C( 100), -INT8_C( 82), -INT8_C( 94), -INT8_C( 37), INT8_C( 78), INT8_C( 58) }, { -INT8_C( 13), INT8_C( 44), -INT8_C( 99), -INT8_C( 81), -INT8_C( 94), -INT8_C( 38), INT8_C( 78), INT8_C( 58) } }, { { -INT8_C( 45), -INT8_C( 104), INT8_C( 38), -INT8_C( 105), -INT8_C( 94), INT8_C( 16), -INT8_C( 120), -INT8_C( 69) }, { INT8_C( 79), -INT8_C( 46), -INT8_C( 37), INT8_C( 52), -INT8_C( 23), -INT8_C( 86), INT8_C( 48), -INT8_C( 34) }, { -INT8_C( 6), -INT8_C( 127), INT8_C( 19), -INT8_C( 79), -INT8_C( 106), -INT8_C( 27), -INT8_C( 96), -INT8_C( 86) }, { -INT8_C( 36), -INT8_C( 110), INT8_C( 33), -INT8_C( 99), -INT8_C( 97), INT8_C( 5), -INT8_C( 114), -INT8_C( 74) }, { -INT8_C( 43), -INT8_C( 106), INT8_C( 36), -INT8_C( 104), -INT8_C( 95), INT8_C( 13), -INT8_C( 119), -INT8_C( 71) }, { -INT8_C( 44), -INT8_C( 105), INT8_C( 37), -INT8_C( 105), -INT8_C( 95), INT8_C( 14), -INT8_C( 120), -INT8_C( 70) }, { -INT8_C( 45), -INT8_C( 105), INT8_C( 37), -INT8_C( 105), -INT8_C( 95), INT8_C( 15), -INT8_C( 120), -INT8_C( 70) } }, { { -INT8_C( 41), -INT8_C( 49), -INT8_C( 114), INT8_C( 122), -INT8_C( 87), -INT8_C( 35), -INT8_C( 75), -INT8_C( 117) }, { INT8_C( 87), INT8_C( 92), INT8_C( 39), INT8_C( 35), -INT8_C( 69), INT8_C( 15), -INT8_C( 29), -INT8_C( 114) }, { INT8_C( 2), -INT8_C( 3), -INT8_C( 95), -INT8_C( 117), -INT8_C( 122), -INT8_C( 28), -INT8_C( 90), INT8_C( 82) }, { -INT8_C( 31), -INT8_C( 38), -INT8_C( 110), INT8_C( 126), -INT8_C( 96), -INT8_C( 34), -INT8_C( 79), INT8_C( 124) }, { -INT8_C( 39), -INT8_C( 47), -INT8_C( 113), INT8_C( 123), -INT8_C( 90), -INT8_C( 35), -INT8_C( 76), -INT8_C( 121) }, { -INT8_C( 40), -INT8_C( 48), -INT8_C( 114), INT8_C( 122), -INT8_C( 89), -INT8_C( 35), -INT8_C( 76), -INT8_C( 119) }, { -INT8_C( 41), -INT8_C( 49), -INT8_C( 114), INT8_C( 122), -INT8_C( 88), -INT8_C( 35), -INT8_C( 76), -INT8_C( 118) } }, { { -INT8_C( 89), INT8_C( 10), INT8_C( 38), INT8_C( 73), INT8_C( 26), -INT8_C( 82), INT8_C( 5), INT8_C( 105) }, { INT8_MIN, -INT8_C( 32), -INT8_C( 99), INT8_C( 105), -INT8_C( 118), -INT8_C( 50), INT8_C( 71), INT8_C( 97) }, { INT8_C( 103), -INT8_C( 6), -INT8_C( 12), INT8_C( 125), -INT8_C( 33), -INT8_C( 107), INT8_C( 40), -INT8_C( 103) }, { -INT8_C( 105), INT8_C( 6), INT8_C( 25), INT8_C( 86), INT8_C( 11), -INT8_C( 89), INT8_C( 13), INT8_C( 117) }, { -INT8_C( 93), INT8_C( 9), INT8_C( 34), INT8_C( 76), INT8_C( 22), -INT8_C( 84), INT8_C( 7), INT8_C( 108) }, { -INT8_C( 91), INT8_C( 9), INT8_C( 36), INT8_C( 74), INT8_C( 24), -INT8_C( 83), INT8_C( 6), INT8_C( 106) }, { -INT8_C( 90), INT8_C( 9), INT8_C( 37), INT8_C( 73), INT8_C( 25), -INT8_C( 83), INT8_C( 5), INT8_C( 105) } }, { { -INT8_C( 99), -INT8_C( 42), -INT8_C( 37), INT8_C( 70), -INT8_C( 77), -INT8_C( 111), -INT8_C( 47), INT8_C( 10) }, { -INT8_C( 19), -INT8_C( 8), INT8_C( 46), -INT8_C( 88), INT8_C( 8), INT8_C( 17), INT8_C( 54), -INT8_C( 81) }, { -INT8_C( 109), -INT8_C( 46), -INT8_C( 14), INT8_C( 26), -INT8_C( 73), -INT8_C( 103), -INT8_C( 20), -INT8_C( 31) }, { -INT8_C( 102), -INT8_C( 43), -INT8_C( 32), INT8_C( 59), -INT8_C( 76), -INT8_C( 109), -INT8_C( 41), -INT8_C( 1) }, { -INT8_C( 100), -INT8_C( 43), -INT8_C( 36), INT8_C( 67), -INT8_C( 77), -INT8_C( 111), -INT8_C( 46), INT8_C( 7) }, { -INT8_C( 100), -INT8_C( 43), -INT8_C( 37), INT8_C( 68), -INT8_C( 77), -INT8_C( 111), -INT8_C( 47), INT8_C( 8) }, { -INT8_C( 100), -INT8_C( 43), -INT8_C( 37), INT8_C( 69), -INT8_C( 77), -INT8_C( 111), -INT8_C( 47), INT8_C( 9) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r1 = simde_vsra_n_s8(a, b, 1); simde_int8x8_t r3 = simde_vsra_n_s8(a, b, 3); simde_int8x8_t r5 = simde_vsra_n_s8(a, b, 5); simde_int8x8_t r6 = simde_vsra_n_s8(a, b, 6); simde_int8x8_t r8 = simde_vsra_n_s8(a, b, 8); simde_test_arm_neon_assert_equal_i8x8(r1, simde_vld1_s8(test_vec[i].r1)); simde_test_arm_neon_assert_equal_i8x8(r3, simde_vld1_s8(test_vec[i].r3)); simde_test_arm_neon_assert_equal_i8x8(r5, simde_vld1_s8(test_vec[i].r5)); simde_test_arm_neon_assert_equal_i8x8(r6, simde_vld1_s8(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i8x8(r8, simde_vld1_s8(test_vec[i].r8)); } return 0; } static int test_simde_vsra_n_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int16_t b[4]; int16_t r3[4]; int16_t r6[4]; int16_t r10[4]; int16_t r13[4]; int16_t r16[4]; } test_vec[] = { { { INT16_C( 16993), -INT16_C( 28686), INT16_C( 3619), INT16_C( 18657) }, { INT16_C( 1775), INT16_C( 16485), INT16_C( 22371), INT16_C( 8441) }, { INT16_C( 17214), -INT16_C( 26626), INT16_C( 6415), INT16_C( 19712) }, { INT16_C( 17020), -INT16_C( 28429), INT16_C( 3968), INT16_C( 18788) }, { INT16_C( 16994), -INT16_C( 28670), INT16_C( 3640), INT16_C( 18665) }, { INT16_C( 16993), -INT16_C( 28684), INT16_C( 3621), INT16_C( 18658) }, { INT16_C( 16993), -INT16_C( 28686), INT16_C( 3619), INT16_C( 18657) } }, { { -INT16_C( 20281), INT16_C( 10068), INT16_C( 32106), INT16_C( 23214) }, { -INT16_C( 24984), -INT16_C( 5113), -INT16_C( 5004), -INT16_C( 10556) }, { -INT16_C( 23404), INT16_C( 9428), INT16_C( 31480), INT16_C( 21894) }, { -INT16_C( 20672), INT16_C( 9988), INT16_C( 32027), INT16_C( 23049) }, { -INT16_C( 20306), INT16_C( 10063), INT16_C( 32101), INT16_C( 23203) }, { -INT16_C( 20285), INT16_C( 10067), INT16_C( 32105), INT16_C( 23212) }, { -INT16_C( 20282), INT16_C( 10067), INT16_C( 32105), INT16_C( 23213) } }, { { -INT16_C( 18897), INT16_C( 21093), INT16_C( 18372), -INT16_C( 19302) }, { -INT16_C( 179), -INT16_C( 20236), -INT16_C( 4521), INT16_C( 7888) }, { -INT16_C( 18920), INT16_C( 18563), INT16_C( 17806), -INT16_C( 18316) }, { -INT16_C( 18900), INT16_C( 20776), INT16_C( 18301), -INT16_C( 19179) }, { -INT16_C( 18898), INT16_C( 21073), INT16_C( 18367), -INT16_C( 19295) }, { -INT16_C( 18898), INT16_C( 21090), INT16_C( 18371), -INT16_C( 19302) }, { -INT16_C( 18898), INT16_C( 21092), INT16_C( 18371), -INT16_C( 19302) } }, { { INT16_C( 9630), INT16_C( 2117), -INT16_C( 3166), INT16_C( 2658) }, { INT16_C( 27025), INT16_C( 1782), -INT16_C( 17834), -INT16_C( 31268) }, { INT16_C( 13008), INT16_C( 2339), -INT16_C( 5396), -INT16_C( 1251) }, { INT16_C( 10052), INT16_C( 2144), -INT16_C( 3445), INT16_C( 2169) }, { INT16_C( 9656), INT16_C( 2118), -INT16_C( 3184), INT16_C( 2627) }, { INT16_C( 9633), INT16_C( 2117), -INT16_C( 3169), INT16_C( 2654) }, { INT16_C( 9630), INT16_C( 2117), -INT16_C( 3167), INT16_C( 2657) } }, { { INT16_C( 16752), INT16_C( 13783), INT16_C( 29064), -INT16_C( 10775) }, { -INT16_C( 8847), -INT16_C( 14203), INT16_C( 22219), INT16_C( 27110) }, { INT16_C( 15646), INT16_C( 12007), INT16_C( 31841), -INT16_C( 7387) }, { INT16_C( 16613), INT16_C( 13561), INT16_C( 29411), -INT16_C( 10352) }, { INT16_C( 16743), INT16_C( 13769), INT16_C( 29085), -INT16_C( 10749) }, { INT16_C( 16750), INT16_C( 13781), INT16_C( 29066), -INT16_C( 10772) }, { INT16_C( 16751), INT16_C( 13782), INT16_C( 29064), -INT16_C( 10775) } }, { { INT16_C( 11387), INT16_C( 7538), -INT16_C( 11233), -INT16_C( 20185) }, { INT16_C( 7742), -INT16_C( 27465), -INT16_C( 27688), INT16_C( 18713) }, { INT16_C( 12354), INT16_C( 4104), -INT16_C( 14694), -INT16_C( 17846) }, { INT16_C( 11507), INT16_C( 7108), -INT16_C( 11666), -INT16_C( 19893) }, { INT16_C( 11394), INT16_C( 7511), -INT16_C( 11261), -INT16_C( 20167) }, { INT16_C( 11387), INT16_C( 7534), -INT16_C( 11237), -INT16_C( 20183) }, { INT16_C( 11387), INT16_C( 7537), -INT16_C( 11234), -INT16_C( 20185) } }, { { -INT16_C( 3884), INT16_C( 23934), INT16_C( 26465), -INT16_C( 11726) }, { -INT16_C( 18364), INT16_C( 4250), -INT16_C( 32498), -INT16_C( 30343) }, { -INT16_C( 6180), INT16_C( 24465), INT16_C( 22402), -INT16_C( 15519) }, { -INT16_C( 4171), INT16_C( 24000), INT16_C( 25957), -INT16_C( 12201) }, { -INT16_C( 3902), INT16_C( 23938), INT16_C( 26433), -INT16_C( 11756) }, { -INT16_C( 3887), INT16_C( 23934), INT16_C( 26461), -INT16_C( 11730) }, { -INT16_C( 3885), INT16_C( 23934), INT16_C( 26464), -INT16_C( 11727) } }, { { -INT16_C( 5203), -INT16_C( 13146), -INT16_C( 12864), -INT16_C( 387) }, { INT16_C( 13547), -INT16_C( 15214), -INT16_C( 21561), -INT16_C( 25587) }, { -INT16_C( 3510), -INT16_C( 15048), -INT16_C( 15560), -INT16_C( 3586) }, { -INT16_C( 4992), -INT16_C( 13384), -INT16_C( 13201), -INT16_C( 787) }, { -INT16_C( 5190), -INT16_C( 13161), -INT16_C( 12886), -INT16_C( 412) }, { -INT16_C( 5202), -INT16_C( 13148), -INT16_C( 12867), -INT16_C( 391) }, { -INT16_C( 5203), -INT16_C( 13147), -INT16_C( 12865), -INT16_C( 388) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r3 = simde_vsra_n_s16(a, b, 3); simde_int16x4_t r6 = simde_vsra_n_s16(a, b, 6); simde_int16x4_t r10 = simde_vsra_n_s16(a, b, 10); simde_int16x4_t r13 = simde_vsra_n_s16(a, b, 13); simde_int16x4_t r16 = simde_vsra_n_s16(a, b, 16); simde_test_arm_neon_assert_equal_i16x4(r3, simde_vld1_s16(test_vec[i].r3)); simde_test_arm_neon_assert_equal_i16x4(r6, simde_vld1_s16(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i16x4(r10, simde_vld1_s16(test_vec[i].r10)); simde_test_arm_neon_assert_equal_i16x4(r13, simde_vld1_s16(test_vec[i].r13)); simde_test_arm_neon_assert_equal_i16x4(r16, simde_vld1_s16(test_vec[i].r16)); } return 0; } static int test_simde_vsra_n_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int32_t b[2]; int32_t r6[2]; int32_t r13[2]; int32_t r19[2]; int32_t r26[2]; int32_t r32[2]; } test_vec[] = { { { INT32_C( 1211834128), INT32_C( 1573037445) }, { INT32_C( 1562803979), -INT32_C( 2071099084) }, { INT32_C( 1236252940), INT32_C( 1540676521) }, { INT32_C( 1212024899), INT32_C( 1572784625) }, { INT32_C( 1211837108), INT32_C( 1573033494) }, { INT32_C( 1211834151), INT32_C( 1573037414) }, { INT32_C( 1211834128), INT32_C( 1573037444) } }, { { -INT32_C( 1716331456), INT32_C( 1015424990) }, { INT32_C( 665328187), -INT32_C( 1086393426) }, { -INT32_C( 1705935704), INT32_C( 998450092) }, { -INT32_C( 1716250240), INT32_C( 1015292373) }, { -INT32_C( 1716330187), INT32_C( 1015422917) }, { -INT32_C( 1716331447), INT32_C( 1015424973) }, { -INT32_C( 1716331456), INT32_C( 1015424989) } }, { { -INT32_C( 1744340462), INT32_C( 720750879) }, { INT32_C( 2106071880), -INT32_C( 452913755) }, { -INT32_C( 1711433089), INT32_C( 713674101) }, { -INT32_C( 1744083374), INT32_C( 720695591) }, { -INT32_C( 1744336445), INT32_C( 720750015) }, { -INT32_C( 1744340431), INT32_C( 720750872) }, { -INT32_C( 1744340462), INT32_C( 720750878) } }, { { -INT32_C( 797002767), INT32_C( 369886426) }, { -INT32_C( 851593954), -INT32_C( 1165198424) }, { -INT32_C( 810308923), INT32_C( 351680200) }, { -INT32_C( 797106722), INT32_C( 369744189) }, { -INT32_C( 797004392), INT32_C( 369884203) }, { -INT32_C( 797002780), INT32_C( 369886408) }, { -INT32_C( 797002768), INT32_C( 369886425) } }, { { INT32_C( 357733365), -INT32_C( 1539356580) }, { INT32_C( 136431459), -INT32_C( 823319843) }, { INT32_C( 359865106), -INT32_C( 1552220953) }, { INT32_C( 357750019), -INT32_C( 1539457083) }, { INT32_C( 357733625), -INT32_C( 1539358151) }, { INT32_C( 357733367), -INT32_C( 1539356593) }, { INT32_C( 357733365), -INT32_C( 1539356581) } }, { { -INT32_C( 1331794731), -INT32_C( 1882805392) }, { INT32_C( 140247904), INT32_C( 1958930558) }, { -INT32_C( 1329603358), -INT32_C( 1852197103) }, { -INT32_C( 1331777611), -INT32_C( 1882566265) }, { -INT32_C( 1331794464), -INT32_C( 1882801656) }, { -INT32_C( 1331794729), -INT32_C( 1882805363) }, { -INT32_C( 1331794731), -INT32_C( 1882805392) } }, { { -INT32_C( 678881925), -INT32_C( 1065629603) }, { INT32_C( 1841929616), -INT32_C( 1791248705) }, { -INT32_C( 650101775), -INT32_C( 1093617865) }, { -INT32_C( 678657081), -INT32_C( 1065848262) }, { -INT32_C( 678878412), -INT32_C( 1065633020) }, { -INT32_C( 678881898), -INT32_C( 1065629630) }, { -INT32_C( 678881925), -INT32_C( 1065629604) } }, { { -INT32_C( 1824138718), -INT32_C( 450753659) }, { -INT32_C( 1930592754), -INT32_C( 520048794) }, { -INT32_C( 1854304230), -INT32_C( 458879422) }, { -INT32_C( 1824374387), -INT32_C( 450817142) }, { -INT32_C( 1824142401), -INT32_C( 450754651) }, { -INT32_C( 1824138747), -INT32_C( 450753667) }, { -INT32_C( 1824138719), -INT32_C( 450753660) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r6 = simde_vsra_n_s32(a, b, 6); simde_int32x2_t r13 = simde_vsra_n_s32(a, b, 13); simde_int32x2_t r19 = simde_vsra_n_s32(a, b, 19); simde_int32x2_t r26 = simde_vsra_n_s32(a, b, 26); simde_int32x2_t r32 = simde_vsra_n_s32(a, b, 32); simde_test_arm_neon_assert_equal_i32x2(r6, simde_vld1_s32(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i32x2(r13, simde_vld1_s32(test_vec[i].r13)); simde_test_arm_neon_assert_equal_i32x2(r19, simde_vld1_s32(test_vec[i].r19)); simde_test_arm_neon_assert_equal_i32x2(r26, simde_vld1_s32(test_vec[i].r26)); simde_test_arm_neon_assert_equal_i32x2(r32, simde_vld1_s32(test_vec[i].r32)); } return 0; } static int test_simde_vsra_n_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[1]; int64_t b[1]; int64_t r13[1]; int64_t r26[1]; int64_t r39[1]; int64_t r52[1]; int64_t r64[1]; } test_vec[] = { { { INT64_C( 881464614002129577) }, { INT64_C( 7580105769023611175) }, { INT64_C( 882389919882137342) }, { INT64_C( 881464726954507507) }, { INT64_C( 881464614015917709) }, { INT64_C( 881464614002131260) }, { INT64_C( 881464614002129577) } }, { { -INT64_C( 6596341350465540194) }, { -INT64_C( 4922836740847382444) }, { -INT64_C( 6596942282684881916) }, { -INT64_C( 6596341423821524001) }, { -INT64_C( 6596341350474494782) }, { -INT64_C( 6596341350465541288) }, { -INT64_C( 6596341350465540195) } }, { { INT64_C( 1764960191635937804) }, { -INT64_C( 7459521215422330873) }, { INT64_C( 1764049605550070820) }, { INT64_C( 1764960080480409744) }, { INT64_C( 1764960191622369013) }, { INT64_C( 1764960191635936147) }, { INT64_C( 1764960191635937803) } }, { { INT64_C( 3135817985201324387) }, { -INT64_C( 5816714875682811526) }, { INT64_C( 3135107936998726387) }, { INT64_C( 3135817898525518405) }, { INT64_C( 3135817985190743844) }, { INT64_C( 3135817985201323095) }, { INT64_C( 3135817985201324386) } }, { { -INT64_C( 2502418275722235589) }, { INT64_C( 6861912416510183591) }, { -INT64_C( 2501580639929204561) }, { -INT64_C( 2502418173471772573) }, { -INT64_C( 2502418275709753844) }, { -INT64_C( 2502418275722234066) }, { -INT64_C( 2502418275722235589) } }, { { -INT64_C( 7913734837817545175) }, { INT64_C( 8802395695456542421) }, { -INT64_C( 7912660326624252141) }, { -INT64_C( 7913734706651628025) }, { -INT64_C( 7913734837801533711) }, { -INT64_C( 7913734837817543221) }, { -INT64_C( 7913734837817545175) } }, { { -INT64_C( 8103686669931755051) }, { INT64_C( 634843580358227827) }, { -INT64_C( 8103609174377512104) }, { -INT64_C( 8103686660471848528) }, { -INT64_C( 8103686669930600278) }, { -INT64_C( 8103686669931754911) }, { -INT64_C( 8103686669931755051) } }, { { -INT64_C( 6232077175440732199) }, { -INT64_C( 2788492309201575788) }, { -INT64_C( 6232417567568320282) }, { -INT64_C( 6232077216992505587) }, { -INT64_C( 6232077175445804437) }, { -INT64_C( 6232077175440732819) }, { -INT64_C( 6232077175440732200) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_int64x1_t r13 = simde_vsra_n_s64(a, b, 13); simde_int64x1_t r26 = simde_vsra_n_s64(a, b, 26); simde_int64x1_t r39 = simde_vsra_n_s64(a, b, 39); simde_int64x1_t r52 = simde_vsra_n_s64(a, b, 52); simde_int64x1_t r64 = simde_vsra_n_s64(a, b, 64); simde_test_arm_neon_assert_equal_i64x1(r13, simde_vld1_s64(test_vec[i].r13)); simde_test_arm_neon_assert_equal_i64x1(r26, simde_vld1_s64(test_vec[i].r26)); simde_test_arm_neon_assert_equal_i64x1(r39, simde_vld1_s64(test_vec[i].r39)); simde_test_arm_neon_assert_equal_i64x1(r52, simde_vld1_s64(test_vec[i].r52)); simde_test_arm_neon_assert_equal_i64x1(r64, simde_vld1_s64(test_vec[i].r64)); } return 0; } static int test_simde_vsra_n_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; uint8_t b[8]; uint8_t r1[8]; uint8_t r3[8]; uint8_t r5[8]; uint8_t r6[8]; uint8_t r8[8]; } test_vec[] = { { { UINT8_C(141), UINT8_C(222), UINT8_C(212), UINT8_C( 6), UINT8_C(198), UINT8_C( 82), UINT8_C(111), UINT8_C(181) }, { UINT8_C(141), UINT8_C(166), UINT8_C( 70), UINT8_C(142), UINT8_C(210), UINT8_C(176), UINT8_C(168), UINT8_C(170) }, { UINT8_C(211), UINT8_C( 49), UINT8_C(247), UINT8_C( 77), UINT8_C( 47), UINT8_C(170), UINT8_C(195), UINT8_C( 10) }, { UINT8_C(158), UINT8_C(242), UINT8_C(220), UINT8_C( 23), UINT8_C(224), UINT8_C(104), UINT8_C(132), UINT8_C(202) }, { UINT8_C(145), UINT8_C(227), UINT8_C(214), UINT8_C( 10), UINT8_C(204), UINT8_C( 87), UINT8_C(116), UINT8_C(186) }, { UINT8_C(143), UINT8_C(224), UINT8_C(213), UINT8_C( 8), UINT8_C(201), UINT8_C( 84), UINT8_C(113), UINT8_C(183) }, { UINT8_C(141), UINT8_C(222), UINT8_C(212), UINT8_C( 6), UINT8_C(198), UINT8_C( 82), UINT8_C(111), UINT8_C(181) } }, { { UINT8_C(183), UINT8_C( 33), UINT8_C( 32), UINT8_C( 32), UINT8_C( 41), UINT8_C(119), UINT8_C(166), UINT8_C(158) }, { UINT8_C(237), UINT8_C( 83), UINT8_C( 80), UINT8_C(142), UINT8_C(111), UINT8_C(137), UINT8_C( 95), UINT8_C(252) }, { UINT8_C( 45), UINT8_C( 74), UINT8_C( 72), UINT8_C(103), UINT8_C( 96), UINT8_C(187), UINT8_C(213), UINT8_C( 28) }, { UINT8_C(212), UINT8_C( 43), UINT8_C( 42), UINT8_C( 49), UINT8_C( 54), UINT8_C(136), UINT8_C(177), UINT8_C(189) }, { UINT8_C(190), UINT8_C( 35), UINT8_C( 34), UINT8_C( 36), UINT8_C( 44), UINT8_C(123), UINT8_C(168), UINT8_C(165) }, { UINT8_C(186), UINT8_C( 34), UINT8_C( 33), UINT8_C( 34), UINT8_C( 42), UINT8_C(121), UINT8_C(167), UINT8_C(161) }, { UINT8_C(183), UINT8_C( 33), UINT8_C( 32), UINT8_C( 32), UINT8_C( 41), UINT8_C(119), UINT8_C(166), UINT8_C(158) } }, { { UINT8_C(103), UINT8_C( 51), UINT8_C( 2), UINT8_C( 45), UINT8_C(133), UINT8_C(113), UINT8_C(226), UINT8_C( 19) }, { UINT8_C( 23), UINT8_C( 40), UINT8_C(161), UINT8_C(233), UINT8_C(217), UINT8_C( 73), UINT8_C(148), UINT8_C(144) }, { UINT8_C(114), UINT8_C( 71), UINT8_C( 82), UINT8_C(161), UINT8_C(241), UINT8_C(149), UINT8_C( 44), UINT8_C( 91) }, { UINT8_C(105), UINT8_C( 56), UINT8_C( 22), UINT8_C( 74), UINT8_C(160), UINT8_C(122), UINT8_C(244), UINT8_C( 37) }, { UINT8_C(103), UINT8_C( 52), UINT8_C( 7), UINT8_C( 52), UINT8_C(139), UINT8_C(115), UINT8_C(230), UINT8_C( 23) }, { UINT8_C(103), UINT8_C( 51), UINT8_C( 4), UINT8_C( 48), UINT8_C(136), UINT8_C(114), UINT8_C(228), UINT8_C( 21) }, { UINT8_C(103), UINT8_C( 51), UINT8_C( 2), UINT8_C( 45), UINT8_C(133), UINT8_C(113), UINT8_C(226), UINT8_C( 19) } }, { { UINT8_C(107), UINT8_C(180), UINT8_C(176), UINT8_C(148), UINT8_C( 43), UINT8_C( 86), UINT8_C( 50), UINT8_C( 25) }, { UINT8_C(169), UINT8_C(130), UINT8_C(167), UINT8_C( 24), UINT8_C( 11), UINT8_C( 6), UINT8_C( 20), UINT8_C(114) }, { UINT8_C(191), UINT8_C(245), UINT8_C( 3), UINT8_C(160), UINT8_C( 48), UINT8_C( 89), UINT8_C( 60), UINT8_C( 82) }, { UINT8_C(128), UINT8_C(196), UINT8_C(196), UINT8_C(151), UINT8_C( 44), UINT8_C( 86), UINT8_C( 52), UINT8_C( 39) }, { UINT8_C(112), UINT8_C(184), UINT8_C(181), UINT8_C(148), UINT8_C( 43), UINT8_C( 86), UINT8_C( 50), UINT8_C( 28) }, { UINT8_C(109), UINT8_C(182), UINT8_C(178), UINT8_C(148), UINT8_C( 43), UINT8_C( 86), UINT8_C( 50), UINT8_C( 26) }, { UINT8_C(107), UINT8_C(180), UINT8_C(176), UINT8_C(148), UINT8_C( 43), UINT8_C( 86), UINT8_C( 50), UINT8_C( 25) } }, { { UINT8_C( 57), UINT8_C( 22), UINT8_C(160), UINT8_C(191), UINT8_C(135), UINT8_C(130), UINT8_C(210), UINT8_C(158) }, { UINT8_C(171), UINT8_C(115), UINT8_C(136), UINT8_C(132), UINT8_C(188), UINT8_C( 28), UINT8_C( 20), UINT8_C( 39) }, { UINT8_C(142), UINT8_C( 79), UINT8_C(228), UINT8_C( 1), UINT8_C(229), UINT8_C(144), UINT8_C(220), UINT8_C(177) }, { UINT8_C( 78), UINT8_C( 36), UINT8_C(177), UINT8_C(207), UINT8_C(158), UINT8_C(133), UINT8_C(212), UINT8_C(162) }, { UINT8_C( 62), UINT8_C( 25), UINT8_C(164), UINT8_C(195), UINT8_C(140), UINT8_C(130), UINT8_C(210), UINT8_C(159) }, { UINT8_C( 59), UINT8_C( 23), UINT8_C(162), UINT8_C(193), UINT8_C(137), UINT8_C(130), UINT8_C(210), UINT8_C(158) }, { UINT8_C( 57), UINT8_C( 22), UINT8_C(160), UINT8_C(191), UINT8_C(135), UINT8_C(130), UINT8_C(210), UINT8_C(158) } }, { { UINT8_C(208), UINT8_C(196), UINT8_C(187), UINT8_C(252), UINT8_C( 26), UINT8_C(238), UINT8_C( 21), UINT8_C(195) }, { UINT8_C(112), UINT8_C(188), UINT8_C(219), UINT8_C(124), UINT8_C(194), UINT8_C(239), UINT8_C(238), UINT8_C(251) }, { UINT8_C( 8), UINT8_C( 34), UINT8_C( 40), UINT8_C( 58), UINT8_C(123), UINT8_C(101), UINT8_C(140), UINT8_C( 64) }, { UINT8_C(222), UINT8_C(219), UINT8_C(214), UINT8_C( 11), UINT8_C( 50), UINT8_C( 11), UINT8_C( 50), UINT8_C(226) }, { UINT8_C(211), UINT8_C(201), UINT8_C(193), UINT8_MAX, UINT8_C( 32), UINT8_C(245), UINT8_C( 28), UINT8_C(202) }, { UINT8_C(209), UINT8_C(198), UINT8_C(190), UINT8_C(253), UINT8_C( 29), UINT8_C(241), UINT8_C( 24), UINT8_C(198) }, { UINT8_C(208), UINT8_C(196), UINT8_C(187), UINT8_C(252), UINT8_C( 26), UINT8_C(238), UINT8_C( 21), UINT8_C(195) } }, { { UINT8_C( 5), UINT8_C(142), UINT8_C(186), UINT8_C(140), UINT8_C( 17), UINT8_C(140), UINT8_C( 42), UINT8_C(188) }, { UINT8_MAX, UINT8_C(178), UINT8_C( 64), UINT8_C(188), UINT8_C(206), UINT8_C( 84), UINT8_C(227), UINT8_C(159) }, { UINT8_C(132), UINT8_C(231), UINT8_C(218), UINT8_C(234), UINT8_C(120), UINT8_C(182), UINT8_C(155), UINT8_C( 11) }, { UINT8_C( 36), UINT8_C(164), UINT8_C(194), UINT8_C(163), UINT8_C( 42), UINT8_C(150), UINT8_C( 70), UINT8_C(207) }, { UINT8_C( 12), UINT8_C(147), UINT8_C(188), UINT8_C(145), UINT8_C( 23), UINT8_C(142), UINT8_C( 49), UINT8_C(192) }, { UINT8_C( 8), UINT8_C(144), UINT8_C(187), UINT8_C(142), UINT8_C( 20), UINT8_C(141), UINT8_C( 45), UINT8_C(190) }, { UINT8_C( 5), UINT8_C(142), UINT8_C(186), UINT8_C(140), UINT8_C( 17), UINT8_C(140), UINT8_C( 42), UINT8_C(188) } }, { { UINT8_C( 24), UINT8_C(159), UINT8_C(155), UINT8_C( 50), UINT8_C(141), UINT8_C(176), UINT8_C(245), UINT8_C(253) }, { UINT8_C(108), UINT8_C(208), UINT8_C(121), UINT8_C( 46), UINT8_C(191), UINT8_C(104), UINT8_C( 41), UINT8_C(196) }, { UINT8_C( 78), UINT8_C( 7), UINT8_C(215), UINT8_C( 73), UINT8_C(236), UINT8_C(228), UINT8_C( 9), UINT8_C( 95) }, { UINT8_C( 37), UINT8_C(185), UINT8_C(170), UINT8_C( 55), UINT8_C(164), UINT8_C(189), UINT8_C(250), UINT8_C( 21) }, { UINT8_C( 27), UINT8_C(165), UINT8_C(158), UINT8_C( 51), UINT8_C(146), UINT8_C(179), UINT8_C(246), UINT8_C( 3) }, { UINT8_C( 25), UINT8_C(162), UINT8_C(156), UINT8_C( 50), UINT8_C(143), UINT8_C(177), UINT8_C(245), UINT8_C( 0) }, { UINT8_C( 24), UINT8_C(159), UINT8_C(155), UINT8_C( 50), UINT8_C(141), UINT8_C(176), UINT8_C(245), UINT8_C(253) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r1 = simde_vsra_n_u8(a, b, 1); simde_uint8x8_t r3 = simde_vsra_n_u8(a, b, 3); simde_uint8x8_t r5 = simde_vsra_n_u8(a, b, 5); simde_uint8x8_t r6 = simde_vsra_n_u8(a, b, 6); simde_uint8x8_t r8 = simde_vsra_n_u8(a, b, 8); simde_test_arm_neon_assert_equal_u8x8(r1, simde_vld1_u8(test_vec[i].r1)); simde_test_arm_neon_assert_equal_u8x8(r3, simde_vld1_u8(test_vec[i].r3)); simde_test_arm_neon_assert_equal_u8x8(r5, simde_vld1_u8(test_vec[i].r5)); simde_test_arm_neon_assert_equal_u8x8(r6, simde_vld1_u8(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u8x8(r8, simde_vld1_u8(test_vec[i].r8)); } return 0; } static int test_simde_vsra_n_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint16_t b[4]; uint16_t r3[4]; uint16_t r6[4]; uint16_t r10[4]; uint16_t r13[4]; uint16_t r16[4]; } test_vec[] = { { { UINT16_C(20498), UINT16_C(25529), UINT16_C( 4469), UINT16_C( 6127) }, { UINT16_C(49386), UINT16_C( 6749), UINT16_C(16746), UINT16_C(54822) }, { UINT16_C(26671), UINT16_C(26372), UINT16_C( 6562), UINT16_C(12979) }, { UINT16_C(21269), UINT16_C(25634), UINT16_C( 4730), UINT16_C( 6983) }, { UINT16_C(20546), UINT16_C(25535), UINT16_C( 4485), UINT16_C( 6180) }, { UINT16_C(20504), UINT16_C(25529), UINT16_C( 4471), UINT16_C( 6133) }, { UINT16_C(20498), UINT16_C(25529), UINT16_C( 4469), UINT16_C( 6127) } }, { { UINT16_C(61768), UINT16_C(49523), UINT16_C(29425), UINT16_C(46079) }, { UINT16_C(25128), UINT16_C(13143), UINT16_C( 8747), UINT16_C(15851) }, { UINT16_C(64909), UINT16_C(51165), UINT16_C(30518), UINT16_C(48060) }, { UINT16_C(62160), UINT16_C(49728), UINT16_C(29561), UINT16_C(46326) }, { UINT16_C(61792), UINT16_C(49535), UINT16_C(29433), UINT16_C(46094) }, { UINT16_C(61771), UINT16_C(49524), UINT16_C(29426), UINT16_C(46080) }, { UINT16_C(61768), UINT16_C(49523), UINT16_C(29425), UINT16_C(46079) } }, { { UINT16_C(42098), UINT16_C(59296), UINT16_C(36790), UINT16_C(41214) }, { UINT16_C(23631), UINT16_C(47802), UINT16_C(57757), UINT16_C(58768) }, { UINT16_C(45051), UINT16_C(65271), UINT16_C(44009), UINT16_C(48560) }, { UINT16_C(42467), UINT16_C(60042), UINT16_C(37692), UINT16_C(42132) }, { UINT16_C(42121), UINT16_C(59342), UINT16_C(36846), UINT16_C(41271) }, { UINT16_C(42100), UINT16_C(59301), UINT16_C(36797), UINT16_C(41221) }, { UINT16_C(42098), UINT16_C(59296), UINT16_C(36790), UINT16_C(41214) } }, { { UINT16_C( 1234), UINT16_C(50342), UINT16_C(42614), UINT16_C(40567) }, { UINT16_C(52744), UINT16_C(13521), UINT16_C(48368), UINT16_C(25457) }, { UINT16_C( 7827), UINT16_C(52032), UINT16_C(48660), UINT16_C(43749) }, { UINT16_C( 2058), UINT16_C(50553), UINT16_C(43369), UINT16_C(40964) }, { UINT16_C( 1285), UINT16_C(50355), UINT16_C(42661), UINT16_C(40591) }, { UINT16_C( 1240), UINT16_C(50343), UINT16_C(42619), UINT16_C(40570) }, { UINT16_C( 1234), UINT16_C(50342), UINT16_C(42614), UINT16_C(40567) } }, { { UINT16_C( 4705), UINT16_C( 5962), UINT16_C(18849), UINT16_C(61879) }, { UINT16_C(29093), UINT16_C(17067), UINT16_C(15186), UINT16_C( 9511) }, { UINT16_C( 8341), UINT16_C( 8095), UINT16_C(20747), UINT16_C(63067) }, { UINT16_C( 5159), UINT16_C( 6228), UINT16_C(19086), UINT16_C(62027) }, { UINT16_C( 4733), UINT16_C( 5978), UINT16_C(18863), UINT16_C(61888) }, { UINT16_C( 4708), UINT16_C( 5964), UINT16_C(18850), UINT16_C(61880) }, { UINT16_C( 4705), UINT16_C( 5962), UINT16_C(18849), UINT16_C(61879) } }, { { UINT16_C(52543), UINT16_C(46569), UINT16_C(24691), UINT16_C(31828) }, { UINT16_C( 9519), UINT16_C( 8112), UINT16_C( 8674), UINT16_C(17282) }, { UINT16_C(53732), UINT16_C(47583), UINT16_C(25775), UINT16_C(33988) }, { UINT16_C(52691), UINT16_C(46695), UINT16_C(24826), UINT16_C(32098) }, { UINT16_C(52552), UINT16_C(46576), UINT16_C(24699), UINT16_C(31844) }, { UINT16_C(52544), UINT16_C(46569), UINT16_C(24692), UINT16_C(31830) }, { UINT16_C(52543), UINT16_C(46569), UINT16_C(24691), UINT16_C(31828) } }, { { UINT16_C(52531), UINT16_C(54618), UINT16_C( 4374), UINT16_C(48070) }, { UINT16_C(29058), UINT16_C(54781), UINT16_C( 9388), UINT16_C(60666) }, { UINT16_C(56163), UINT16_C(61465), UINT16_C( 5547), UINT16_C(55653) }, { UINT16_C(52985), UINT16_C(55473), UINT16_C( 4520), UINT16_C(49017) }, { UINT16_C(52559), UINT16_C(54671), UINT16_C( 4383), UINT16_C(48129) }, { UINT16_C(52534), UINT16_C(54624), UINT16_C( 4375), UINT16_C(48077) }, { UINT16_C(52531), UINT16_C(54618), UINT16_C( 4374), UINT16_C(48070) } }, { { UINT16_C(58353), UINT16_C(26017), UINT16_C(62787), UINT16_C(29409) }, { UINT16_C(37147), UINT16_C(64914), UINT16_C( 5298), UINT16_C(58944) }, { UINT16_C(62996), UINT16_C(34131), UINT16_C(63449), UINT16_C(36777) }, { UINT16_C(58933), UINT16_C(27031), UINT16_C(62869), UINT16_C(30330) }, { UINT16_C(58389), UINT16_C(26080), UINT16_C(62792), UINT16_C(29466) }, { UINT16_C(58357), UINT16_C(26024), UINT16_C(62787), UINT16_C(29416) }, { UINT16_C(58353), UINT16_C(26017), UINT16_C(62787), UINT16_C(29409) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r3 = simde_vsra_n_u16(a, b, 3); simde_uint16x4_t r6 = simde_vsra_n_u16(a, b, 6); simde_uint16x4_t r10 = simde_vsra_n_u16(a, b, 10); simde_uint16x4_t r13 = simde_vsra_n_u16(a, b, 13); simde_uint16x4_t r16 = simde_vsra_n_u16(a, b, 16); simde_test_arm_neon_assert_equal_u16x4(r3, simde_vld1_u16(test_vec[i].r3)); simde_test_arm_neon_assert_equal_u16x4(r6, simde_vld1_u16(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u16x4(r10, simde_vld1_u16(test_vec[i].r10)); simde_test_arm_neon_assert_equal_u16x4(r13, simde_vld1_u16(test_vec[i].r13)); simde_test_arm_neon_assert_equal_u16x4(r16, simde_vld1_u16(test_vec[i].r16)); } return 0; } static int test_simde_vsra_n_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint32_t b[2]; uint32_t r6[2]; uint32_t r13[2]; uint32_t r19[2]; uint32_t r26[2]; uint32_t r32[2]; } test_vec[] = { { { UINT32_C(3659509063), UINT32_C(2568259154) }, { UINT32_C(2718098257), UINT32_C(1952907512) }, { UINT32_C(3701979348), UINT32_C(2598773333) }, { UINT32_C(3659840862), UINT32_C(2568497546) }, { UINT32_C(3659514247), UINT32_C(2568262878) }, { UINT32_C(3659509103), UINT32_C(2568259183) }, { UINT32_C(3659509063), UINT32_C(2568259154) } }, { { UINT32_C(1073588353), UINT32_C(2728011964) }, { UINT32_C(3612159475), UINT32_C(4002580135) }, { UINT32_C(1130028344), UINT32_C(2790552278) }, { UINT32_C(1074029290), UINT32_C(2728500560) }, { UINT32_C(1073595242), UINT32_C(2728019598) }, { UINT32_C(1073588406), UINT32_C(2728012023) }, { UINT32_C(1073588353), UINT32_C(2728011964) } }, { { UINT32_C(2261299508), UINT32_C(2300566584) }, { UINT32_C(3073057215), UINT32_C(2737541922) }, { UINT32_C(2309316026), UINT32_C(2343340676) }, { UINT32_C(2261674637), UINT32_C(2300900756) }, { UINT32_C(2261305369), UINT32_C(2300571805) }, { UINT32_C(2261299553), UINT32_C(2300566624) }, { UINT32_C(2261299508), UINT32_C(2300566584) } }, { { UINT32_C(4175571259), UINT32_C(1167752529) }, { UINT32_C(1360849066), UINT32_C(2722082670) }, { UINT32_C(4196834525), UINT32_C(1210285070) }, { UINT32_C(4175737378), UINT32_C(1168084814) }, { UINT32_C(4175573854), UINT32_C(1167757720) }, { UINT32_C(4175571279), UINT32_C(1167752569) }, { UINT32_C(4175571259), UINT32_C(1167752529) } }, { { UINT32_C(2552760160), UINT32_C(2736932836) }, { UINT32_C(2338016617), UINT32_C( 472811232) }, { UINT32_C(2589291669), UINT32_C(2744320511) }, { UINT32_C(2553045562), UINT32_C(2736990552) }, { UINT32_C(2552764619), UINT32_C(2736933737) }, { UINT32_C(2552760194), UINT32_C(2736932843) }, { UINT32_C(2552760160), UINT32_C(2736932836) } }, { { UINT32_C( 18092207), UINT32_C( 944156301) }, { UINT32_C( 92889750), UINT32_C(1923598609) }, { UINT32_C( 19543609), UINT32_C( 974212529) }, { UINT32_C( 18103546), UINT32_C( 944391115) }, { UINT32_C( 18092384), UINT32_C( 944159969) }, { UINT32_C( 18092208), UINT32_C( 944156329) }, { UINT32_C( 18092207), UINT32_C( 944156301) } }, { { UINT32_C(3020607696), UINT32_C(2153262103) }, { UINT32_C(1510716282), UINT32_C(3916839225) }, { UINT32_C(3044212637), UINT32_C(2214462715) }, { UINT32_C(3020792109), UINT32_C(2153740232) }, { UINT32_C(3020610577), UINT32_C(2153269573) }, { UINT32_C(3020607718), UINT32_C(2153262161) }, { UINT32_C(3020607696), UINT32_C(2153262103) } }, { { UINT32_C(3622472266), UINT32_C(3473879097) }, { UINT32_C(2765396370), UINT32_C( 840334434) }, { UINT32_C(3665681584), UINT32_C(3487009322) }, { UINT32_C(3622809838), UINT32_C(3473981676) }, { UINT32_C(3622477540), UINT32_C(3473880699) }, { UINT32_C(3622472307), UINT32_C(3473879109) }, { UINT32_C(3622472266), UINT32_C(3473879097) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r6 = simde_vsra_n_u32(a, b, 6); simde_uint32x2_t r13 = simde_vsra_n_u32(a, b, 13); simde_uint32x2_t r19 = simde_vsra_n_u32(a, b, 19); simde_uint32x2_t r26 = simde_vsra_n_u32(a, b, 26); simde_uint32x2_t r32 = simde_vsra_n_u32(a, b, 32); simde_test_arm_neon_assert_equal_u32x2(r6, simde_vld1_u32(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u32x2(r13, simde_vld1_u32(test_vec[i].r13)); simde_test_arm_neon_assert_equal_u32x2(r19, simde_vld1_u32(test_vec[i].r19)); simde_test_arm_neon_assert_equal_u32x2(r26, simde_vld1_u32(test_vec[i].r26)); simde_test_arm_neon_assert_equal_u32x2(r32, simde_vld1_u32(test_vec[i].r32)); } return 0; } static int test_simde_vsra_n_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[1]; uint64_t b[1]; uint64_t r13[1]; uint64_t r26[1]; uint64_t r39[1]; uint64_t r52[1]; uint64_t r64[1]; } test_vec[] = { { { UINT64_C(13919412844583952900) }, { UINT64_C( 1584289983876654346) }, { UINT64_C(13919606239357375343) }, { UINT64_C(13919412868191713327) }, { UINT64_C(13919412844586834706) }, { UINT64_C(13919412844583953251) }, { UINT64_C(13919412844583952900) } }, { { UINT64_C( 3471010265826057447) }, { UINT64_C( 6591570382045680709) }, { UINT64_C( 3471814900882459507) }, { UINT64_C( 3471010364048110230) }, { UINT64_C( 3471010265838047443) }, { UINT64_C( 3471010265826058910) }, { UINT64_C( 3471010265826057447) } }, { { UINT64_C(11715387210285901976) }, { UINT64_C( 3547795583048729295) }, { UINT64_C(11715820290801410854) }, { UINT64_C(11715387263152175841) }, { UINT64_C(11715387210292355378) }, { UINT64_C(11715387210285902763) }, { UINT64_C(11715387210285901976) } }, { { UINT64_C( 5606046988161484585) }, { UINT64_C( 3167302207410969923) }, { UINT64_C( 5606433621731725181) }, { UINT64_C( 5606047035357965327) }, { UINT64_C( 5606046988167245874) }, { UINT64_C( 5606046988161485288) }, { UINT64_C( 5606046988161484585) } }, { { UINT64_C( 7829389879338283884) }, { UINT64_C( 8057449110591240278) }, { UINT64_C( 7830373454669166603) }, { UINT64_C( 7829389999403631892) }, { UINT64_C( 7829389879352940298) }, { UINT64_C( 7829389879338285673) }, { UINT64_C( 7829389879338283884) } }, { { UINT64_C( 5092400213135152214) }, { UINT64_C(11975867704836863604) }, { UINT64_C( 5093862111048340307) }, { UINT64_C( 5092400391589487319) }, { UINT64_C( 5092400213156936190) }, { UINT64_C( 5092400213135154873) }, { UINT64_C( 5092400213135152214) } }, { { UINT64_C( 2022208341025698055) }, { UINT64_C(14411955805773144907) }, { UINT64_C( 2023967612974644972) }, { UINT64_C( 2022208555780574635) }, { UINT64_C( 2022208341051913249) }, { UINT64_C( 2022208341025701255) }, { UINT64_C( 2022208341025698055) } }, { { UINT64_C( 6281063407037058273) }, { UINT64_C(16530621718664710791) }, { UINT64_C( 6283081305196074961) }, { UINT64_C( 6281063653362517137) }, { UINT64_C( 6281063407067127298) }, { UINT64_C( 6281063407037061943) }, { UINT64_C( 6281063407037058273) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_t b = simde_vld1_u64(test_vec[i].b); simde_uint64x1_t r13 = simde_vsra_n_u64(a, b, 13); simde_uint64x1_t r26 = simde_vsra_n_u64(a, b, 26); simde_uint64x1_t r39 = simde_vsra_n_u64(a, b, 39); simde_uint64x1_t r52 = simde_vsra_n_u64(a, b, 52); simde_uint64x1_t r64 = simde_vsra_n_u64(a, b, 64); simde_test_arm_neon_assert_equal_u64x1(r13, simde_vld1_u64(test_vec[i].r13)); simde_test_arm_neon_assert_equal_u64x1(r26, simde_vld1_u64(test_vec[i].r26)); simde_test_arm_neon_assert_equal_u64x1(r39, simde_vld1_u64(test_vec[i].r39)); simde_test_arm_neon_assert_equal_u64x1(r52, simde_vld1_u64(test_vec[i].r52)); simde_test_arm_neon_assert_equal_u64x1(r64, simde_vld1_u64(test_vec[i].r64)); } return 0; } static int test_simde_vsraq_n_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int8_t b[16]; int8_t r1[16]; int8_t r3[16]; int8_t r5[16]; int8_t r6[16]; int8_t r8[16]; } test_vec[] = { { { INT8_C( 110), INT8_C( 63), -INT8_C( 32), INT8_C( 64), -INT8_C( 122), -INT8_C( 34), INT8_C( 93), -INT8_C( 112), INT8_C( 37), -INT8_C( 2), INT8_C( 120), -INT8_C( 61), -INT8_C( 24), INT8_C( 68), -INT8_C( 47), -INT8_C( 81) }, { INT8_C( 67), INT8_C( 4), INT8_C( 19), -INT8_C( 68), -INT8_C( 34), INT8_C( 22), -INT8_C( 66), INT8_MAX, -INT8_C( 50), -INT8_C( 39), -INT8_C( 16), -INT8_C( 65), INT8_C( 92), -INT8_C( 85), INT8_C( 75), -INT8_C( 54) }, { -INT8_C( 113), INT8_C( 65), -INT8_C( 23), INT8_C( 30), INT8_C( 117), -INT8_C( 23), INT8_C( 60), -INT8_C( 49), INT8_C( 12), -INT8_C( 22), INT8_C( 112), -INT8_C( 94), INT8_C( 22), INT8_C( 25), -INT8_C( 10), -INT8_C( 108) }, { INT8_C( 118), INT8_C( 63), -INT8_C( 30), INT8_C( 55), -INT8_C( 127), -INT8_C( 32), INT8_C( 84), -INT8_C( 97), INT8_C( 30), -INT8_C( 7), INT8_C( 118), -INT8_C( 70), -INT8_C( 13), INT8_C( 57), -INT8_C( 38), -INT8_C( 88) }, { INT8_C( 112), INT8_C( 63), -INT8_C( 32), INT8_C( 61), -INT8_C( 124), -INT8_C( 34), INT8_C( 90), -INT8_C( 109), INT8_C( 35), -INT8_C( 4), INT8_C( 119), -INT8_C( 64), -INT8_C( 22), INT8_C( 65), -INT8_C( 45), -INT8_C( 83) }, { INT8_C( 111), INT8_C( 63), -INT8_C( 32), INT8_C( 62), -INT8_C( 123), -INT8_C( 34), INT8_C( 91), -INT8_C( 111), INT8_C( 36), -INT8_C( 3), INT8_C( 119), -INT8_C( 63), -INT8_C( 23), INT8_C( 66), -INT8_C( 46), -INT8_C( 82) }, { INT8_C( 110), INT8_C( 63), -INT8_C( 32), INT8_C( 63), -INT8_C( 123), -INT8_C( 34), INT8_C( 92), -INT8_C( 112), INT8_C( 36), -INT8_C( 3), INT8_C( 119), -INT8_C( 62), -INT8_C( 24), INT8_C( 67), -INT8_C( 47), -INT8_C( 82) } }, { { -INT8_C( 22), INT8_C( 43), INT8_C( 10), INT8_C( 112), INT8_C( 10), INT8_C( 104), INT8_C( 0), INT8_C( 47), INT8_C( 102), INT8_C( 121), -INT8_C( 14), INT8_C( 78), -INT8_C( 67), -INT8_C( 61), -INT8_C( 3), INT8_C( 1) }, { -INT8_C( 57), INT8_C( 16), -INT8_C( 67), -INT8_C( 91), INT8_C( 38), INT8_C( 123), INT8_C( 36), -INT8_C( 12), INT8_C( 84), INT8_C( 20), -INT8_C( 77), -INT8_C( 80), -INT8_C( 64), -INT8_C( 2), INT8_C( 122), -INT8_C( 86) }, { -INT8_C( 51), INT8_C( 51), -INT8_C( 24), INT8_C( 66), INT8_C( 29), -INT8_C( 91), INT8_C( 18), INT8_C( 41), -INT8_C( 112), -INT8_C( 125), -INT8_C( 53), INT8_C( 38), -INT8_C( 99), -INT8_C( 62), INT8_C( 58), -INT8_C( 42) }, { -INT8_C( 30), INT8_C( 45), INT8_C( 1), INT8_C( 100), INT8_C( 14), INT8_C( 119), INT8_C( 4), INT8_C( 45), INT8_C( 112), INT8_C( 123), -INT8_C( 24), INT8_C( 68), -INT8_C( 75), -INT8_C( 62), INT8_C( 12), -INT8_C( 10) }, { -INT8_C( 24), INT8_C( 43), INT8_C( 7), INT8_C( 109), INT8_C( 11), INT8_C( 107), INT8_C( 1), INT8_C( 46), INT8_C( 104), INT8_C( 121), -INT8_C( 17), INT8_C( 75), -INT8_C( 69), -INT8_C( 62), INT8_C( 0), -INT8_C( 2) }, { -INT8_C( 23), INT8_C( 43), INT8_C( 8), INT8_C( 110), INT8_C( 10), INT8_C( 105), INT8_C( 0), INT8_C( 46), INT8_C( 103), INT8_C( 121), -INT8_C( 16), INT8_C( 76), -INT8_C( 68), -INT8_C( 62), -INT8_C( 2), -INT8_C( 1) }, { -INT8_C( 23), INT8_C( 43), INT8_C( 9), INT8_C( 111), INT8_C( 10), INT8_C( 104), INT8_C( 0), INT8_C( 46), INT8_C( 102), INT8_C( 121), -INT8_C( 15), INT8_C( 77), -INT8_C( 68), -INT8_C( 62), -INT8_C( 3), INT8_C( 0) } }, { { INT8_C( 42), -INT8_C( 123), INT8_C( 27), INT8_C( 52), -INT8_C( 19), INT8_C( 27), INT8_C( 99), INT8_C( 83), -INT8_C( 108), INT8_C( 85), -INT8_C( 94), INT8_C( 82), INT8_C( 25), -INT8_C( 97), INT8_C( 83), -INT8_C( 32) }, { -INT8_C( 80), INT8_C( 16), -INT8_C( 122), -INT8_C( 42), -INT8_C( 117), -INT8_C( 86), -INT8_C( 53), -INT8_C( 33), -INT8_C( 65), INT8_C( 126), -INT8_C( 112), INT8_MAX, INT8_C( 125), INT8_C( 10), INT8_C( 41), -INT8_C( 89) }, { INT8_C( 2), -INT8_C( 115), -INT8_C( 34), INT8_C( 31), -INT8_C( 78), -INT8_C( 16), INT8_C( 72), INT8_C( 66), INT8_C( 115), -INT8_C( 108), INT8_C( 106), -INT8_C( 111), INT8_C( 87), -INT8_C( 92), INT8_C( 103), -INT8_C( 77) }, { INT8_C( 32), -INT8_C( 121), INT8_C( 11), INT8_C( 46), -INT8_C( 34), INT8_C( 16), INT8_C( 92), INT8_C( 78), -INT8_C( 117), INT8_C( 100), -INT8_C( 108), INT8_C( 97), INT8_C( 40), -INT8_C( 96), INT8_C( 88), -INT8_C( 44) }, { INT8_C( 39), -INT8_C( 123), INT8_C( 23), INT8_C( 50), -INT8_C( 23), INT8_C( 24), INT8_C( 97), INT8_C( 81), -INT8_C( 111), INT8_C( 88), -INT8_C( 98), INT8_C( 85), INT8_C( 28), -INT8_C( 97), INT8_C( 84), -INT8_C( 35) }, { INT8_C( 40), -INT8_C( 123), INT8_C( 25), INT8_C( 51), -INT8_C( 21), INT8_C( 25), INT8_C( 98), INT8_C( 82), -INT8_C( 110), INT8_C( 86), -INT8_C( 96), INT8_C( 83), INT8_C( 26), -INT8_C( 97), INT8_C( 83), -INT8_C( 34) }, { INT8_C( 41), -INT8_C( 123), INT8_C( 26), INT8_C( 51), -INT8_C( 20), INT8_C( 26), INT8_C( 98), INT8_C( 82), -INT8_C( 109), INT8_C( 85), -INT8_C( 95), INT8_C( 82), INT8_C( 25), -INT8_C( 97), INT8_C( 83), -INT8_C( 33) } }, { { -INT8_C( 113), INT8_C( 68), -INT8_C( 37), INT8_C( 124), INT8_C( 96), INT8_C( 62), -INT8_C( 48), -INT8_C( 12), -INT8_C( 109), INT8_C( 114), INT8_C( 70), -INT8_C( 84), INT8_C( 17), -INT8_C( 103), -INT8_C( 115), -INT8_C( 63) }, { -INT8_C( 87), INT8_C( 19), -INT8_C( 104), INT8_C( 52), -INT8_C( 67), INT8_C( 99), INT8_C( 20), INT8_C( 124), -INT8_C( 31), -INT8_C( 92), -INT8_C( 5), INT8_C( 94), -INT8_C( 82), INT8_C( 37), INT8_C( 5), INT8_C( 62) }, { INT8_C( 99), INT8_C( 77), -INT8_C( 89), -INT8_C( 106), INT8_C( 62), INT8_C( 111), -INT8_C( 38), INT8_C( 50), -INT8_C( 125), INT8_C( 68), INT8_C( 67), -INT8_C( 37), -INT8_C( 24), -INT8_C( 85), -INT8_C( 113), -INT8_C( 32) }, { -INT8_C( 124), INT8_C( 70), -INT8_C( 50), -INT8_C( 126), INT8_C( 87), INT8_C( 74), -INT8_C( 46), INT8_C( 3), -INT8_C( 113), INT8_C( 102), INT8_C( 69), -INT8_C( 73), INT8_C( 6), -INT8_C( 99), -INT8_C( 115), -INT8_C( 56) }, { -INT8_C( 116), INT8_C( 68), -INT8_C( 41), INT8_C( 125), INT8_C( 93), INT8_C( 65), -INT8_C( 48), -INT8_C( 9), -INT8_C( 110), INT8_C( 111), INT8_C( 69), -INT8_C( 82), INT8_C( 14), -INT8_C( 102), -INT8_C( 115), -INT8_C( 62) }, { -INT8_C( 115), INT8_C( 68), -INT8_C( 39), INT8_C( 124), INT8_C( 94), INT8_C( 63), -INT8_C( 48), -INT8_C( 11), -INT8_C( 110), INT8_C( 112), INT8_C( 69), -INT8_C( 83), INT8_C( 15), -INT8_C( 103), -INT8_C( 115), -INT8_C( 63) }, { -INT8_C( 114), INT8_C( 68), -INT8_C( 38), INT8_C( 124), INT8_C( 95), INT8_C( 62), -INT8_C( 48), -INT8_C( 12), -INT8_C( 110), INT8_C( 113), INT8_C( 69), -INT8_C( 84), INT8_C( 16), -INT8_C( 103), -INT8_C( 115), -INT8_C( 63) } }, { { INT8_C( 105), -INT8_C( 32), -INT8_C( 70), -INT8_C( 55), INT8_C( 30), -INT8_C( 118), -INT8_C( 66), -INT8_C( 78), -INT8_C( 4), INT8_C( 4), INT8_C( 94), INT8_C( 14), -INT8_C( 98), -INT8_C( 21), -INT8_C( 49), INT8_C( 71) }, { -INT8_C( 2), INT8_C( 103), INT8_C( 124), -INT8_C( 68), -INT8_C( 54), -INT8_C( 112), INT8_C( 56), -INT8_C( 84), INT8_C( 52), INT8_C( 52), INT8_C( 10), -INT8_C( 30), INT8_C( 89), INT8_C( 16), INT8_C( 32), -INT8_C( 62) }, { INT8_C( 104), INT8_C( 19), -INT8_C( 8), -INT8_C( 89), INT8_C( 3), INT8_C( 82), -INT8_C( 38), -INT8_C( 120), INT8_C( 22), INT8_C( 30), INT8_C( 99), -INT8_C( 1), -INT8_C( 54), -INT8_C( 13), -INT8_C( 33), INT8_C( 40) }, { INT8_C( 104), -INT8_C( 20), -INT8_C( 55), -INT8_C( 64), INT8_C( 23), INT8_C( 124), -INT8_C( 59), -INT8_C( 89), INT8_C( 2), INT8_C( 10), INT8_C( 95), INT8_C( 10), -INT8_C( 87), -INT8_C( 19), -INT8_C( 45), INT8_C( 63) }, { INT8_C( 104), -INT8_C( 29), -INT8_C( 67), -INT8_C( 58), INT8_C( 28), -INT8_C( 122), -INT8_C( 65), -INT8_C( 81), -INT8_C( 3), INT8_C( 5), INT8_C( 94), INT8_C( 13), -INT8_C( 96), -INT8_C( 21), -INT8_C( 48), INT8_C( 69) }, { INT8_C( 104), -INT8_C( 31), -INT8_C( 69), -INT8_C( 57), INT8_C( 29), -INT8_C( 120), -INT8_C( 66), -INT8_C( 80), -INT8_C( 4), INT8_C( 4), INT8_C( 94), INT8_C( 13), -INT8_C( 97), -INT8_C( 21), -INT8_C( 49), INT8_C( 70) }, { INT8_C( 104), -INT8_C( 32), -INT8_C( 70), -INT8_C( 56), INT8_C( 29), -INT8_C( 119), -INT8_C( 66), -INT8_C( 79), -INT8_C( 4), INT8_C( 4), INT8_C( 94), INT8_C( 13), -INT8_C( 98), -INT8_C( 21), -INT8_C( 49), INT8_C( 70) } }, { { -INT8_C( 16), -INT8_C( 37), -INT8_C( 116), INT8_C( 15), INT8_C( 101), INT8_C( 74), -INT8_C( 63), INT8_C( 98), INT8_C( 78), INT8_C( 31), INT8_C( 112), -INT8_C( 20), INT8_C( 11), INT8_C( 63), INT8_C( 52), INT8_C( 9) }, { -INT8_C( 89), -INT8_C( 80), -INT8_C( 59), INT8_C( 113), INT8_C( 64), -INT8_C( 2), INT8_C( 29), INT8_C( 116), INT8_C( 50), INT8_C( 40), INT8_C( 86), -INT8_C( 117), INT8_C( 56), INT8_C( 119), INT8_C( 77), INT8_C( 40) }, { -INT8_C( 61), -INT8_C( 77), INT8_C( 110), INT8_C( 71), -INT8_C( 123), INT8_C( 73), -INT8_C( 49), -INT8_C( 100), INT8_C( 103), INT8_C( 51), -INT8_C( 101), -INT8_C( 79), INT8_C( 39), INT8_C( 122), INT8_C( 90), INT8_C( 29) }, { -INT8_C( 28), -INT8_C( 47), -INT8_C( 124), INT8_C( 29), INT8_C( 109), INT8_C( 73), -INT8_C( 60), INT8_C( 112), INT8_C( 84), INT8_C( 36), INT8_C( 122), -INT8_C( 35), INT8_C( 18), INT8_C( 77), INT8_C( 61), INT8_C( 14) }, { -INT8_C( 19), -INT8_C( 40), -INT8_C( 118), INT8_C( 18), INT8_C( 103), INT8_C( 73), -INT8_C( 63), INT8_C( 101), INT8_C( 79), INT8_C( 32), INT8_C( 114), -INT8_C( 24), INT8_C( 12), INT8_C( 66), INT8_C( 54), INT8_C( 10) }, { -INT8_C( 18), -INT8_C( 39), -INT8_C( 117), INT8_C( 16), INT8_C( 102), INT8_C( 73), -INT8_C( 63), INT8_C( 99), INT8_C( 78), INT8_C( 31), INT8_C( 113), -INT8_C( 22), INT8_C( 11), INT8_C( 64), INT8_C( 53), INT8_C( 9) }, { -INT8_C( 17), -INT8_C( 38), -INT8_C( 117), INT8_C( 15), INT8_C( 101), INT8_C( 73), -INT8_C( 63), INT8_C( 98), INT8_C( 78), INT8_C( 31), INT8_C( 112), -INT8_C( 21), INT8_C( 11), INT8_C( 63), INT8_C( 52), INT8_C( 9) } }, { { INT8_C( 82), -INT8_C( 39), INT8_C( 55), -INT8_C( 73), INT8_C( 35), -INT8_C( 8), INT8_C( 25), INT8_C( 114), INT8_C( 24), -INT8_C( 119), INT8_C( 94), INT8_C( 35), -INT8_C( 55), -INT8_C( 110), INT8_C( 44), INT8_C( 112) }, { INT8_C( 66), -INT8_C( 14), -INT8_C( 31), -INT8_C( 126), -INT8_C( 16), -INT8_C( 1), -INT8_C( 10), INT8_C( 34), INT8_C( 39), INT8_C( 77), -INT8_C( 83), INT8_C( 95), -INT8_C( 60), -INT8_C( 6), -INT8_C( 121), INT8_C( 22) }, { INT8_C( 115), -INT8_C( 46), INT8_C( 39), INT8_C( 120), INT8_C( 27), -INT8_C( 9), INT8_C( 20), -INT8_C( 125), INT8_C( 43), -INT8_C( 81), INT8_C( 52), INT8_C( 82), -INT8_C( 85), -INT8_C( 113), -INT8_C( 17), INT8_C( 123) }, { INT8_C( 90), -INT8_C( 41), INT8_C( 51), -INT8_C( 89), INT8_C( 33), -INT8_C( 9), INT8_C( 23), INT8_C( 118), INT8_C( 28), -INT8_C( 110), INT8_C( 83), INT8_C( 46), -INT8_C( 63), -INT8_C( 111), INT8_C( 28), INT8_C( 114) }, { INT8_C( 84), -INT8_C( 40), INT8_C( 54), -INT8_C( 77), INT8_C( 34), -INT8_C( 9), INT8_C( 24), INT8_C( 115), INT8_C( 25), -INT8_C( 117), INT8_C( 91), INT8_C( 37), -INT8_C( 57), -INT8_C( 111), INT8_C( 40), INT8_C( 112) }, { INT8_C( 83), -INT8_C( 40), INT8_C( 54), -INT8_C( 75), INT8_C( 34), -INT8_C( 9), INT8_C( 24), INT8_C( 114), INT8_C( 24), -INT8_C( 118), INT8_C( 92), INT8_C( 36), -INT8_C( 56), -INT8_C( 111), INT8_C( 42), INT8_C( 112) }, { INT8_C( 82), -INT8_C( 40), INT8_C( 54), -INT8_C( 74), INT8_C( 34), -INT8_C( 9), INT8_C( 24), INT8_C( 114), INT8_C( 24), -INT8_C( 119), INT8_C( 93), INT8_C( 35), -INT8_C( 56), -INT8_C( 111), INT8_C( 43), INT8_C( 112) } }, { { -INT8_C( 44), -INT8_C( 65), -INT8_C( 51), -INT8_C( 9), -INT8_C( 73), -INT8_C( 25), INT8_C( 105), -INT8_C( 49), INT8_C( 112), -INT8_C( 56), -INT8_C( 14), INT8_C( 57), INT8_C( 90), INT8_C( 31), -INT8_C( 87), -INT8_C( 99) }, { INT8_C( 17), -INT8_C( 117), INT8_C( 31), INT8_C( 1), -INT8_C( 118), INT8_C( 22), INT8_C( 35), -INT8_C( 79), INT8_C( 99), -INT8_C( 48), INT8_C( 16), INT8_C( 39), -INT8_C( 54), -INT8_C( 105), INT8_C( 61), -INT8_C( 98) }, { -INT8_C( 36), -INT8_C( 124), -INT8_C( 36), -INT8_C( 9), INT8_C( 124), -INT8_C( 14), INT8_C( 122), -INT8_C( 89), -INT8_C( 95), -INT8_C( 80), -INT8_C( 6), INT8_C( 76), INT8_C( 63), -INT8_C( 22), -INT8_C( 57), INT8_C( 108) }, { -INT8_C( 42), -INT8_C( 80), -INT8_C( 48), -INT8_C( 9), -INT8_C( 88), -INT8_C( 23), INT8_C( 109), -INT8_C( 59), INT8_C( 124), -INT8_C( 62), -INT8_C( 12), INT8_C( 61), INT8_C( 83), INT8_C( 17), -INT8_C( 80), -INT8_C( 112) }, { -INT8_C( 44), -INT8_C( 69), -INT8_C( 51), -INT8_C( 9), -INT8_C( 77), -INT8_C( 25), INT8_C( 106), -INT8_C( 52), INT8_C( 115), -INT8_C( 58), -INT8_C( 14), INT8_C( 58), INT8_C( 88), INT8_C( 27), -INT8_C( 86), -INT8_C( 103) }, { -INT8_C( 44), -INT8_C( 67), -INT8_C( 51), -INT8_C( 9), -INT8_C( 75), -INT8_C( 25), INT8_C( 105), -INT8_C( 51), INT8_C( 113), -INT8_C( 57), -INT8_C( 14), INT8_C( 57), INT8_C( 89), INT8_C( 29), -INT8_C( 87), -INT8_C( 101) }, { -INT8_C( 44), -INT8_C( 66), -INT8_C( 51), -INT8_C( 9), -INT8_C( 74), -INT8_C( 25), INT8_C( 105), -INT8_C( 50), INT8_C( 112), -INT8_C( 57), -INT8_C( 14), INT8_C( 57), INT8_C( 89), INT8_C( 30), -INT8_C( 87), -INT8_C( 100) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r1 = simde_vsraq_n_s8(a, b, 1); simde_int8x16_t r3 = simde_vsraq_n_s8(a, b, 3); simde_int8x16_t r5 = simde_vsraq_n_s8(a, b, 5); simde_int8x16_t r6 = simde_vsraq_n_s8(a, b, 6); simde_int8x16_t r8 = simde_vsraq_n_s8(a, b, 8); simde_test_arm_neon_assert_equal_i8x16(r1, simde_vld1q_s8(test_vec[i].r1)); simde_test_arm_neon_assert_equal_i8x16(r3, simde_vld1q_s8(test_vec[i].r3)); simde_test_arm_neon_assert_equal_i8x16(r5, simde_vld1q_s8(test_vec[i].r5)); simde_test_arm_neon_assert_equal_i8x16(r6, simde_vld1q_s8(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i8x16(r8, simde_vld1q_s8(test_vec[i].r8)); } return 0; } static int test_simde_vsraq_n_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int16_t b[8]; int16_t r3[8]; int16_t r6[8]; int16_t r10[8]; int16_t r13[8]; int16_t r16[8]; } test_vec[] = { { { INT16_C( 19770), INT16_C( 15436), -INT16_C( 22964), INT16_C( 18988), INT16_C( 13523), -INT16_C( 16063), -INT16_C( 25309), -INT16_C( 5024) }, { INT16_C( 15790), -INT16_C( 31011), -INT16_C( 5854), -INT16_C( 25855), INT16_C( 22081), INT16_C( 11412), -INT16_C( 32023), INT16_C( 9002) }, { INT16_C( 21743), INT16_C( 11559), -INT16_C( 23696), INT16_C( 15756), INT16_C( 16283), -INT16_C( 14637), -INT16_C( 29312), -INT16_C( 3899) }, { INT16_C( 20016), INT16_C( 14951), -INT16_C( 23056), INT16_C( 18584), INT16_C( 13868), -INT16_C( 15885), -INT16_C( 25810), -INT16_C( 4884) }, { INT16_C( 19785), INT16_C( 15405), -INT16_C( 22970), INT16_C( 18962), INT16_C( 13544), -INT16_C( 16052), -INT16_C( 25341), -INT16_C( 5016) }, { INT16_C( 19771), INT16_C( 15432), -INT16_C( 22965), INT16_C( 18984), INT16_C( 13525), -INT16_C( 16062), -INT16_C( 25313), -INT16_C( 5023) }, { INT16_C( 19770), INT16_C( 15435), -INT16_C( 22965), INT16_C( 18987), INT16_C( 13523), -INT16_C( 16063), -INT16_C( 25310), -INT16_C( 5024) } }, { { INT16_C( 30671), INT16_C( 7008), -INT16_C( 29667), -INT16_C( 3995), -INT16_C( 22848), -INT16_C( 6991), INT16_C( 4419), -INT16_C( 3376) }, { -INT16_C( 21169), INT16_C( 29048), INT16_C( 31126), -INT16_C( 10483), -INT16_C( 24112), -INT16_C( 18172), INT16_C( 11812), -INT16_C( 3107) }, { INT16_C( 28024), INT16_C( 10639), -INT16_C( 25777), -INT16_C( 5306), -INT16_C( 25862), -INT16_C( 9263), INT16_C( 5895), -INT16_C( 3765) }, { INT16_C( 30340), INT16_C( 7461), -INT16_C( 29181), -INT16_C( 4159), -INT16_C( 23225), -INT16_C( 7275), INT16_C( 4603), -INT16_C( 3425) }, { INT16_C( 30650), INT16_C( 7036), -INT16_C( 29637), -INT16_C( 4006), -INT16_C( 22872), -INT16_C( 7009), INT16_C( 4430), -INT16_C( 3380) }, { INT16_C( 30668), INT16_C( 7011), -INT16_C( 29664), -INT16_C( 3997), -INT16_C( 22851), -INT16_C( 6994), INT16_C( 4420), -INT16_C( 3377) }, { INT16_C( 30670), INT16_C( 7008), -INT16_C( 29667), -INT16_C( 3996), -INT16_C( 22849), -INT16_C( 6992), INT16_C( 4419), -INT16_C( 3377) } }, { { INT16_C( 15781), -INT16_C( 15857), INT16_C( 29897), -INT16_C( 30030), INT16_C( 25627), INT16_C( 24174), INT16_C( 15989), -INT16_C( 15280) }, { -INT16_C( 13845), -INT16_C( 32458), INT16_C( 17218), INT16_C( 4696), INT16_C( 23780), INT16_C( 2252), -INT16_C( 22133), INT16_C( 12540) }, { INT16_C( 14050), -INT16_C( 19915), INT16_C( 32049), -INT16_C( 29443), INT16_C( 28599), INT16_C( 24455), INT16_C( 13222), -INT16_C( 13713) }, { INT16_C( 15564), -INT16_C( 16365), INT16_C( 30166), -INT16_C( 29957), INT16_C( 25998), INT16_C( 24209), INT16_C( 15643), -INT16_C( 15085) }, { INT16_C( 15767), -INT16_C( 15889), INT16_C( 29913), -INT16_C( 30026), INT16_C( 25650), INT16_C( 24176), INT16_C( 15967), -INT16_C( 15268) }, { INT16_C( 15779), -INT16_C( 15861), INT16_C( 29899), -INT16_C( 30030), INT16_C( 25629), INT16_C( 24174), INT16_C( 15986), -INT16_C( 15279) }, { INT16_C( 15780), -INT16_C( 15858), INT16_C( 29897), -INT16_C( 30030), INT16_C( 25627), INT16_C( 24174), INT16_C( 15988), -INT16_C( 15280) } }, { { INT16_C( 3046), -INT16_C( 20493), -INT16_C( 23169), -INT16_C( 26055), -INT16_C( 22775), INT16_C( 32761), INT16_C( 18917), -INT16_C( 12221) }, { INT16_C( 30994), INT16_C( 21841), -INT16_C( 21828), -INT16_C( 24217), INT16_C( 13062), -INT16_C( 28247), -INT16_C( 23076), -INT16_C( 15678) }, { INT16_C( 6920), -INT16_C( 17763), -INT16_C( 25898), -INT16_C( 29083), -INT16_C( 21143), INT16_C( 29230), INT16_C( 16032), -INT16_C( 14181) }, { INT16_C( 3530), -INT16_C( 20152), -INT16_C( 23511), -INT16_C( 26434), -INT16_C( 22571), INT16_C( 32319), INT16_C( 18556), -INT16_C( 12466) }, { INT16_C( 3076), -INT16_C( 20472), -INT16_C( 23191), -INT16_C( 26079), -INT16_C( 22763), INT16_C( 32733), INT16_C( 18894), -INT16_C( 12237) }, { INT16_C( 3049), -INT16_C( 20491), -INT16_C( 23172), -INT16_C( 26058), -INT16_C( 22774), INT16_C( 32757), INT16_C( 18914), -INT16_C( 12223) }, { INT16_C( 3046), -INT16_C( 20493), -INT16_C( 23170), -INT16_C( 26056), -INT16_C( 22775), INT16_C( 32760), INT16_C( 18916), -INT16_C( 12222) } }, { { -INT16_C( 19024), INT16_C( 12402), -INT16_C( 21670), INT16_C( 25802), -INT16_C( 15533), INT16_C( 14563), INT16_C( 9741), INT16_C( 7945) }, { INT16_C( 23200), INT16_C( 23668), -INT16_C( 9212), INT16_C( 3069), -INT16_C( 22769), -INT16_C( 4964), INT16_C( 24140), -INT16_C( 594) }, { -INT16_C( 16124), INT16_C( 15360), -INT16_C( 22822), INT16_C( 26185), -INT16_C( 18380), INT16_C( 13942), INT16_C( 12758), INT16_C( 7870) }, { -INT16_C( 18662), INT16_C( 12771), -INT16_C( 21814), INT16_C( 25849), -INT16_C( 15889), INT16_C( 14485), INT16_C( 10118), INT16_C( 7935) }, { -INT16_C( 19002), INT16_C( 12425), -INT16_C( 21679), INT16_C( 25804), -INT16_C( 15556), INT16_C( 14558), INT16_C( 9764), INT16_C( 7944) }, { -INT16_C( 19022), INT16_C( 12404), -INT16_C( 21672), INT16_C( 25802), -INT16_C( 15536), INT16_C( 14562), INT16_C( 9743), INT16_C( 7944) }, { -INT16_C( 19024), INT16_C( 12402), -INT16_C( 21671), INT16_C( 25802), -INT16_C( 15534), INT16_C( 14562), INT16_C( 9741), INT16_C( 7944) } }, { { INT16_C( 8211), INT16_C( 28205), -INT16_C( 2100), INT16_C( 8146), -INT16_C( 19013), -INT16_C( 14249), INT16_C( 24795), INT16_C( 31719) }, { INT16_C( 23739), -INT16_C( 16424), -INT16_C( 10952), INT16_C( 18378), INT16_C( 26492), -INT16_C( 14029), -INT16_C( 7483), -INT16_C( 9786) }, { INT16_C( 11178), INT16_C( 26152), -INT16_C( 3469), INT16_C( 10443), -INT16_C( 15702), -INT16_C( 16003), INT16_C( 23859), INT16_C( 30495) }, { INT16_C( 8581), INT16_C( 27948), -INT16_C( 2272), INT16_C( 8433), -INT16_C( 18600), -INT16_C( 14469), INT16_C( 24678), INT16_C( 31566) }, { INT16_C( 8234), INT16_C( 28188), -INT16_C( 2111), INT16_C( 8163), -INT16_C( 18988), -INT16_C( 14263), INT16_C( 24787), INT16_C( 31709) }, { INT16_C( 8213), INT16_C( 28202), -INT16_C( 2102), INT16_C( 8148), -INT16_C( 19010), -INT16_C( 14251), INT16_C( 24794), INT16_C( 31717) }, { INT16_C( 8211), INT16_C( 28204), -INT16_C( 2101), INT16_C( 8146), -INT16_C( 19013), -INT16_C( 14250), INT16_C( 24794), INT16_C( 31718) } }, { { -INT16_C( 3326), -INT16_C( 12729), INT16_C( 6634), -INT16_C( 23059), INT16_C( 17870), -INT16_C( 22163), INT16_C( 21925), INT16_C( 24613) }, { -INT16_C( 591), -INT16_C( 5856), -INT16_C( 5422), INT16_C( 20272), INT16_C( 25681), INT16_C( 5912), -INT16_C( 8634), INT16_C( 18672) }, { -INT16_C( 3400), -INT16_C( 13461), INT16_C( 5956), -INT16_C( 20525), INT16_C( 21080), -INT16_C( 21424), INT16_C( 20845), INT16_C( 26947) }, { -INT16_C( 3336), -INT16_C( 12821), INT16_C( 6549), -INT16_C( 22743), INT16_C( 18271), -INT16_C( 22071), INT16_C( 21790), INT16_C( 24904) }, { -INT16_C( 3327), -INT16_C( 12735), INT16_C( 6628), -INT16_C( 23040), INT16_C( 17895), -INT16_C( 22158), INT16_C( 21916), INT16_C( 24631) }, { -INT16_C( 3327), -INT16_C( 12730), INT16_C( 6633), -INT16_C( 23057), INT16_C( 17873), -INT16_C( 22163), INT16_C( 21923), INT16_C( 24615) }, { -INT16_C( 3327), -INT16_C( 12730), INT16_C( 6633), -INT16_C( 23059), INT16_C( 17870), -INT16_C( 22163), INT16_C( 21924), INT16_C( 24613) } }, { { INT16_C( 14289), -INT16_C( 17641), INT16_C( 1104), INT16_C( 7777), -INT16_C( 12727), -INT16_C( 4153), -INT16_C( 5085), -INT16_C( 11185) }, { INT16_C( 28649), -INT16_C( 17219), -INT16_C( 4518), -INT16_C( 21749), INT16_C( 9042), -INT16_C( 26430), -INT16_C( 19967), -INT16_C( 11552) }, { INT16_C( 17870), -INT16_C( 19794), INT16_C( 539), INT16_C( 5058), -INT16_C( 11597), -INT16_C( 7457), -INT16_C( 7581), -INT16_C( 12629) }, { INT16_C( 14736), -INT16_C( 17911), INT16_C( 1033), INT16_C( 7437), -INT16_C( 12586), -INT16_C( 4566), -INT16_C( 5397), -INT16_C( 11366) }, { INT16_C( 14316), -INT16_C( 17658), INT16_C( 1099), INT16_C( 7755), -INT16_C( 12719), -INT16_C( 4179), -INT16_C( 5105), -INT16_C( 11197) }, { INT16_C( 14292), -INT16_C( 17644), INT16_C( 1103), INT16_C( 7774), -INT16_C( 12726), -INT16_C( 4157), -INT16_C( 5088), -INT16_C( 11187) }, { INT16_C( 14289), -INT16_C( 17642), INT16_C( 1103), INT16_C( 7776), -INT16_C( 12727), -INT16_C( 4154), -INT16_C( 5086), -INT16_C( 11186) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r3 = simde_vsraq_n_s16(a, b, 3); simde_int16x8_t r6 = simde_vsraq_n_s16(a, b, 6); simde_int16x8_t r10 = simde_vsraq_n_s16(a, b, 10); simde_int16x8_t r13 = simde_vsraq_n_s16(a, b, 13); simde_int16x8_t r16 = simde_vsraq_n_s16(a, b, 16); simde_test_arm_neon_assert_equal_i16x8(r3, simde_vld1q_s16(test_vec[i].r3)); simde_test_arm_neon_assert_equal_i16x8(r6, simde_vld1q_s16(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i16x8(r10, simde_vld1q_s16(test_vec[i].r10)); simde_test_arm_neon_assert_equal_i16x8(r13, simde_vld1q_s16(test_vec[i].r13)); simde_test_arm_neon_assert_equal_i16x8(r16, simde_vld1q_s16(test_vec[i].r16)); } return 0; } static int test_simde_vsraq_n_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t b[4]; int32_t r6[4]; int32_t r13[4]; int32_t r19[4]; int32_t r26[4]; int32_t r32[4]; } test_vec[] = { { { -INT32_C( 1161302666), -INT32_C( 522417114), INT32_C( 348743160), INT32_C( 1895160957) }, { -INT32_C( 1197267636), -INT32_C( 1580672420), INT32_C( 1226420721), INT32_C( 1011036358) }, { -INT32_C( 1180009973), -INT32_C( 547115121), INT32_C( 367905983), INT32_C( 1910958400) }, { -INT32_C( 1161448817), -INT32_C( 522610068), INT32_C( 348892869), INT32_C( 1895284374) }, { -INT32_C( 1161304950), -INT32_C( 522420129), INT32_C( 348745499), INT32_C( 1895162885) }, { -INT32_C( 1161302684), -INT32_C( 522417138), INT32_C( 348743178), INT32_C( 1895160972) }, { -INT32_C( 1161302667), -INT32_C( 522417115), INT32_C( 348743160), INT32_C( 1895160957) } }, { { INT32_C( 1140198173), -INT32_C( 1893477481), -INT32_C( 1230771144), INT32_C( 354851016) }, { INT32_C( 432916925), -INT32_C( 1782933852), INT32_C( 232707143), INT32_C( 558506500) }, { INT32_C( 1146962499), -INT32_C( 1921335823), -INT32_C( 1227135095), INT32_C( 363577680) }, { INT32_C( 1140251019), -INT32_C( 1893695125), -INT32_C( 1230742738), INT32_C( 354919193) }, { INT32_C( 1140198998), -INT32_C( 1893480882), -INT32_C( 1230770701), INT32_C( 354852081) }, { INT32_C( 1140198179), -INT32_C( 1893477508), -INT32_C( 1230771141), INT32_C( 354851024) }, { INT32_C( 1140198173), -INT32_C( 1893477482), -INT32_C( 1230771144), INT32_C( 354851016) } }, { { -INT32_C( 999997395), INT32_C( 1280608275), INT32_C( 1023604597), INT32_C( 1297229968) }, { -INT32_C( 1771626254), -INT32_C( 47504970), -INT32_C( 116716811), INT32_C( 1494963500) }, { -INT32_C( 1027679056), INT32_C( 1279866009), INT32_C( 1021780896), INT32_C( 1320588772) }, { -INT32_C( 1000213658), INT32_C( 1280602476), INT32_C( 1023590349), INT32_C( 1297412458) }, { -INT32_C( 1000000775), INT32_C( 1280608184), INT32_C( 1023604374), INT32_C( 1297232819) }, { -INT32_C( 999997422), INT32_C( 1280608274), INT32_C( 1023604595), INT32_C( 1297229990) }, { -INT32_C( 999997396), INT32_C( 1280608274), INT32_C( 1023604596), INT32_C( 1297229968) } }, { { -INT32_C( 1457684331), INT32_C( 2113237256), -INT32_C( 105121943), INT32_C( 289803551) }, { -INT32_C( 475550419), -INT32_C( 991833137), INT32_C( 163507421), -INT32_C( 698164927) }, { -INT32_C( 1465114807), INT32_C( 2097739863), -INT32_C( 102567140), INT32_C( 278894724) }, { -INT32_C( 1457742382), INT32_C( 2113116182), -INT32_C( 105101984), INT32_C( 289718325) }, { -INT32_C( 1457685239), INT32_C( 2113235364), -INT32_C( 105121632), INT32_C( 289802219) }, { -INT32_C( 1457684339), INT32_C( 2113237241), -INT32_C( 105121941), INT32_C( 289803540) }, { -INT32_C( 1457684332), INT32_C( 2113237255), -INT32_C( 105121943), INT32_C( 289803550) } }, { { INT32_C( 1635745625), INT32_C( 1524593905), -INT32_C( 1957455253), -INT32_C( 711157337) }, { INT32_C( 381174855), -INT32_C( 187000553), -INT32_C( 956458875), -INT32_C( 895656079) }, { INT32_C( 1641701482), INT32_C( 1521672021), -INT32_C( 1972399923), -INT32_C( 725151964) }, { INT32_C( 1635792155), INT32_C( 1524571077), -INT32_C( 1957572009), -INT32_C( 711266671) }, { INT32_C( 1635746352), INT32_C( 1524593548), -INT32_C( 1957457078), -INT32_C( 711159046) }, { INT32_C( 1635745630), INT32_C( 1524593902), -INT32_C( 1957455268), -INT32_C( 711157351) }, { INT32_C( 1635745625), INT32_C( 1524593904), -INT32_C( 1957455254), -INT32_C( 711157338) } }, { { -INT32_C( 819192610), -INT32_C( 64418927), INT32_C( 1283947685), INT32_C( 1562453014) }, { INT32_C( 2138298984), -INT32_C( 109884045), INT32_C( 1472164070), -INT32_C( 1390256945) }, { -INT32_C( 785781689), -INT32_C( 66135866), INT32_C( 1306950248), INT32_C( 1540730249) }, { -INT32_C( 818931588), -INT32_C( 64432341), INT32_C( 1284127392), INT32_C( 1562283304) }, { -INT32_C( 819188532), -INT32_C( 64419137), INT32_C( 1283950492), INT32_C( 1562450362) }, { -INT32_C( 819192579), -INT32_C( 64418929), INT32_C( 1283947706), INT32_C( 1562452993) }, { -INT32_C( 819192610), -INT32_C( 64418928), INT32_C( 1283947685), INT32_C( 1562453013) } }, { { INT32_C( 175984249), -INT32_C( 33118631), INT32_C( 961187363), INT32_C( 446065842) }, { -INT32_C( 1181152954), INT32_C( 1018301526), INT32_C( 1268019836), INT32_C( 1207482062) }, { INT32_C( 157528734), -INT32_C( 17207670), INT32_C( 981000172), INT32_C( 464932749) }, { INT32_C( 175840065), -INT32_C( 32994327), INT32_C( 961342150), INT32_C( 446213239) }, { INT32_C( 175981996), -INT32_C( 33116689), INT32_C( 961189781), INT32_C( 446068145) }, { INT32_C( 175984231), -INT32_C( 33118616), INT32_C( 961187381), INT32_C( 446065859) }, { INT32_C( 175984248), -INT32_C( 33118631), INT32_C( 961187363), INT32_C( 446065842) } }, { { INT32_C( 1565619460), INT32_C( 1062950940), -INT32_C( 1736923674), INT32_C( 1471286801) }, { INT32_C( 1829849879), -INT32_C( 743783593), INT32_C( 69090869), -INT32_C( 129296652) }, { INT32_C( 1594210864), INT32_C( 1051329321), -INT32_C( 1735844130), INT32_C( 1469266540) }, { INT32_C( 1565842830), INT32_C( 1062860146), -INT32_C( 1736915241), INT32_C( 1471271017) }, { INT32_C( 1565622950), INT32_C( 1062949521), -INT32_C( 1736923543), INT32_C( 1471286554) }, { INT32_C( 1565619487), INT32_C( 1062950928), -INT32_C( 1736923673), INT32_C( 1471286799) }, { INT32_C( 1565619460), INT32_C( 1062950939), -INT32_C( 1736923674), INT32_C( 1471286800) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r6 = simde_vsraq_n_s32(a, b, 6); simde_int32x4_t r13 = simde_vsraq_n_s32(a, b, 13); simde_int32x4_t r19 = simde_vsraq_n_s32(a, b, 19); simde_int32x4_t r26 = simde_vsraq_n_s32(a, b, 26); simde_int32x4_t r32 = simde_vsraq_n_s32(a, b, 32); simde_test_arm_neon_assert_equal_i32x4(r6, simde_vld1q_s32(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i32x4(r13, simde_vld1q_s32(test_vec[i].r13)); simde_test_arm_neon_assert_equal_i32x4(r19, simde_vld1q_s32(test_vec[i].r19)); simde_test_arm_neon_assert_equal_i32x4(r26, simde_vld1q_s32(test_vec[i].r26)); simde_test_arm_neon_assert_equal_i32x4(r32, simde_vld1q_s32(test_vec[i].r32)); } return 0; } static int test_simde_vsraq_n_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; int64_t b[2]; int64_t r13[2]; int64_t r26[2]; int64_t r39[2]; int64_t r52[2]; int64_t r64[2]; } test_vec[] = { { { -INT64_C( 1487247765215587762), -INT64_C( 2795427882804660423) }, { -INT64_C( 6291787764080571768), -INT64_C( 9040106312942818602) }, { -INT64_C( 1488015805714132754), -INT64_C( 2796531411407314576) }, { -INT64_C( 1487247858970531433), -INT64_C( 2795428017512741802) }, { -INT64_C( 1487247765227032458), -INT64_C( 2795427882821104281) }, { -INT64_C( 1487247765215589160), -INT64_C( 2795427882804662431) }, { -INT64_C( 1487247765215587763), -INT64_C( 2795427882804660424) } }, { { INT64_C( 6187220334653006057), INT64_C( 4549844769850200356) }, { -INT64_C( 2064032859925579690), -INT64_C( 6793991508038475265) }, { INT64_C( 6186968377516784672), INT64_C( 4549015425183691753) }, { INT64_C( 6187220303896519701), INT64_C( 4549844668611837745) }, { INT64_C( 6187220334649251603), INT64_C( 4549844769837842157) }, { INT64_C( 6187220334653005598), INT64_C( 4549844769850198847) }, { INT64_C( 6187220334653006056), INT64_C( 4549844769850200355) } }, { { INT64_C( 6284333698155529924), -INT64_C( 1191266314545443454) }, { -INT64_C( 959831922041496130), -INT64_C( 5838954393325140322) }, { INT64_C( 6284216531172858842), -INT64_C( 1191979077532909902) }, { INT64_C( 6284333683852919734), -INT64_C( 1191266401552644073) }, { INT64_C( 6284333698153783999), -INT64_C( 1191266314556064451) }, { INT64_C( 6284333698155529710), -INT64_C( 1191266314545444751) }, { INT64_C( 6284333698155529923), -INT64_C( 1191266314545443455) } }, { { INT64_C( 8948024616275577507), INT64_C( 5952118874571132690) }, { -INT64_C( 2886866485712700406), -INT64_C( 7608790941223282992) }, { INT64_C( 8947672215581520780), INT64_C( 5951190067083190394) }, { INT64_C( 8948024573257914658), INT64_C( 5952118761191312384) }, { INT64_C( 8948024616270326327), INT64_C( 5952118874557292379) }, { INT64_C( 8948024616275576865), INT64_C( 5952118874571131000) }, { INT64_C( 8948024616275577506), INT64_C( 5952118874571132689) } }, { { -INT64_C( 3664352591020689490), INT64_C( 1175459323992136297) }, { INT64_C( 603197819340025166), INT64_C( 6745314372626909971) }, { -INT64_C( 3664278958474383335), INT64_C( 1176282726625513605) }, { -INT64_C( 3664352582032341553), INT64_C( 1175459424505153066) }, { -INT64_C( 3664352591019592280), INT64_C( 1175459324004405952) }, { -INT64_C( 3664352591020689357), INT64_C( 1175459323992137794) }, { -INT64_C( 3664352591020689490), INT64_C( 1175459323992136297) } }, { { INT64_C( 984928930610153762), -INT64_C( 8855101380133968891) }, { INT64_C( 4110215420899502537), INT64_C( 1946642424453385718) }, { INT64_C( 985430665891025283), -INT64_C( 8854863752884890109) }, { INT64_C( 984928991857136290), -INT64_C( 8855101351126736338) }, { INT64_C( 984928930617630200), -INT64_C( 8855101380130427970) }, { INT64_C( 984928930610154674), -INT64_C( 8855101380133968459) }, { INT64_C( 984928930610153762), -INT64_C( 8855101380133968891) } }, { { INT64_C( 4149966127205255653), -INT64_C( 3997962821365816632) }, { -INT64_C( 942708094007668598), -INT64_C( 1462373526339573088) }, { INT64_C( 4149851050533623857), -INT64_C( 3998141333759168631) }, { INT64_C( 4149966113157810385), -INT64_C( 3997962843156880274) }, { INT64_C( 4149966127203540876), -INT64_C( 3997962821368476674) }, { INT64_C( 4149966127205255443), -INT64_C( 3997962821365816957) }, { INT64_C( 4149966127205255652), -INT64_C( 3997962821365816633) } }, { { INT64_C( 1878859015223881125), INT64_C( 4860964793614948526) }, { -INT64_C( 5612647241480913363), -INT64_C( 866099267586247927) }, { INT64_C( 1878173877621161286), INT64_C( 4860859068606698251) }, { INT64_C( 1878858931588919855), INT64_C( 4860964780709063729) }, { INT64_C( 1878859015213671779), INT64_C( 4860964793613373100) }, { INT64_C( 1878859015223879878), INT64_C( 4860964793614948333) }, { INT64_C( 1878859015223881124), INT64_C( 4860964793614948525) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_int64x2_t r13 = simde_vsraq_n_s64(a, b, 13); simde_int64x2_t r26 = simde_vsraq_n_s64(a, b, 26); simde_int64x2_t r39 = simde_vsraq_n_s64(a, b, 39); simde_int64x2_t r52 = simde_vsraq_n_s64(a, b, 52); simde_int64x2_t r64 = simde_vsraq_n_s64(a, b, 64); simde_test_arm_neon_assert_equal_i64x2(r13, simde_vld1q_s64(test_vec[i].r13)); simde_test_arm_neon_assert_equal_i64x2(r26, simde_vld1q_s64(test_vec[i].r26)); simde_test_arm_neon_assert_equal_i64x2(r39, simde_vld1q_s64(test_vec[i].r39)); simde_test_arm_neon_assert_equal_i64x2(r52, simde_vld1q_s64(test_vec[i].r52)); simde_test_arm_neon_assert_equal_i64x2(r64, simde_vld1q_s64(test_vec[i].r64)); } return 0; } static int test_simde_vsraq_n_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r1[16]; uint8_t r3[16]; uint8_t r5[16]; uint8_t r6[16]; uint8_t r8[16]; } test_vec[] = { { { UINT8_C(207), UINT8_C(199), UINT8_C(161), UINT8_C( 7), UINT8_C( 24), UINT8_C(229), UINT8_C( 6), UINT8_C(223), UINT8_C( 93), UINT8_C( 54), UINT8_C( 83), UINT8_C( 89), UINT8_C( 94), UINT8_C(134), UINT8_C(165), UINT8_C(240) }, { UINT8_C(171), UINT8_C(176), UINT8_C( 43), UINT8_C( 82), UINT8_C(162), UINT8_C(176), UINT8_C(226), UINT8_C(136), UINT8_C(199), UINT8_C(117), UINT8_MAX, UINT8_C(140), UINT8_C( 70), UINT8_C(104), UINT8_C(145), UINT8_C( 22) }, { UINT8_C( 36), UINT8_C( 31), UINT8_C(182), UINT8_C( 48), UINT8_C(105), UINT8_C( 61), UINT8_C(119), UINT8_C( 35), UINT8_C(192), UINT8_C(112), UINT8_C(210), UINT8_C(159), UINT8_C(129), UINT8_C(186), UINT8_C(237), UINT8_C(251) }, { UINT8_C(228), UINT8_C(221), UINT8_C(166), UINT8_C( 17), UINT8_C( 44), UINT8_C(251), UINT8_C( 34), UINT8_C(240), UINT8_C(117), UINT8_C( 68), UINT8_C(114), UINT8_C(106), UINT8_C(102), UINT8_C(147), UINT8_C(183), UINT8_C(242) }, { UINT8_C(212), UINT8_C(204), UINT8_C(162), UINT8_C( 9), UINT8_C( 29), UINT8_C(234), UINT8_C( 13), UINT8_C(227), UINT8_C( 99), UINT8_C( 57), UINT8_C( 90), UINT8_C( 93), UINT8_C( 96), UINT8_C(137), UINT8_C(169), UINT8_C(240) }, { UINT8_C(209), UINT8_C(201), UINT8_C(161), UINT8_C( 8), UINT8_C( 26), UINT8_C(231), UINT8_C( 9), UINT8_C(225), UINT8_C( 96), UINT8_C( 55), UINT8_C( 86), UINT8_C( 91), UINT8_C( 95), UINT8_C(135), UINT8_C(167), UINT8_C(240) }, { UINT8_C(207), UINT8_C(199), UINT8_C(161), UINT8_C( 7), UINT8_C( 24), UINT8_C(229), UINT8_C( 6), UINT8_C(223), UINT8_C( 93), UINT8_C( 54), UINT8_C( 83), UINT8_C( 89), UINT8_C( 94), UINT8_C(134), UINT8_C(165), UINT8_C(240) } }, { { UINT8_C( 47), UINT8_C( 50), UINT8_C( 29), UINT8_C( 71), UINT8_C( 24), UINT8_C( 35), UINT8_C( 38), UINT8_C(117), UINT8_C( 90), UINT8_C(121), UINT8_C(206), UINT8_C(184), UINT8_MAX, UINT8_C(116), UINT8_C(168), UINT8_C(171) }, { UINT8_C( 36), UINT8_C(211), UINT8_C(253), UINT8_C(199), UINT8_C(132), UINT8_C(224), UINT8_C( 79), UINT8_C( 75), UINT8_C( 85), UINT8_C( 78), UINT8_C(215), UINT8_C(156), UINT8_C(182), UINT8_C(104), UINT8_C(178), UINT8_C(229) }, { UINT8_C( 65), UINT8_C(155), UINT8_C(155), UINT8_C(170), UINT8_C( 90), UINT8_C(147), UINT8_C( 77), UINT8_C(154), UINT8_C(132), UINT8_C(160), UINT8_C( 57), UINT8_C( 6), UINT8_C( 90), UINT8_C(168), UINT8_C( 1), UINT8_C( 29) }, { UINT8_C( 51), UINT8_C( 76), UINT8_C( 60), UINT8_C( 95), UINT8_C( 40), UINT8_C( 63), UINT8_C( 47), UINT8_C(126), UINT8_C(100), UINT8_C(130), UINT8_C(232), UINT8_C(203), UINT8_C( 21), UINT8_C(129), UINT8_C(190), UINT8_C(199) }, { UINT8_C( 48), UINT8_C( 56), UINT8_C( 36), UINT8_C( 77), UINT8_C( 28), UINT8_C( 42), UINT8_C( 40), UINT8_C(119), UINT8_C( 92), UINT8_C(123), UINT8_C(212), UINT8_C(188), UINT8_C( 4), UINT8_C(119), UINT8_C(173), UINT8_C(178) }, { UINT8_C( 47), UINT8_C( 53), UINT8_C( 32), UINT8_C( 74), UINT8_C( 26), UINT8_C( 38), UINT8_C( 39), UINT8_C(118), UINT8_C( 91), UINT8_C(122), UINT8_C(209), UINT8_C(186), UINT8_C( 1), UINT8_C(117), UINT8_C(170), UINT8_C(174) }, { UINT8_C( 47), UINT8_C( 50), UINT8_C( 29), UINT8_C( 71), UINT8_C( 24), UINT8_C( 35), UINT8_C( 38), UINT8_C(117), UINT8_C( 90), UINT8_C(121), UINT8_C(206), UINT8_C(184), UINT8_MAX, UINT8_C(116), UINT8_C(168), UINT8_C(171) } }, { { UINT8_C(155), UINT8_C(207), UINT8_C( 44), UINT8_C(179), UINT8_C(243), UINT8_C( 82), UINT8_C( 40), UINT8_C( 77), UINT8_C(204), UINT8_C(247), UINT8_C( 5), UINT8_C(203), UINT8_C(107), UINT8_C(173), UINT8_C(118), UINT8_C(143) }, { UINT8_C(129), UINT8_C(116), UINT8_C( 86), UINT8_C( 5), UINT8_C( 84), UINT8_C(165), UINT8_C( 80), UINT8_C(169), UINT8_C(244), UINT8_C( 39), UINT8_C( 69), UINT8_C(170), UINT8_C(144), UINT8_C(247), UINT8_C(144), UINT8_C( 43) }, { UINT8_C(219), UINT8_C( 9), UINT8_C( 87), UINT8_C(181), UINT8_C( 29), UINT8_C(164), UINT8_C( 80), UINT8_C(161), UINT8_C( 70), UINT8_C( 10), UINT8_C( 39), UINT8_C( 32), UINT8_C(179), UINT8_C( 40), UINT8_C(190), UINT8_C(164) }, { UINT8_C(171), UINT8_C(221), UINT8_C( 54), UINT8_C(179), UINT8_C(253), UINT8_C(102), UINT8_C( 50), UINT8_C( 98), UINT8_C(234), UINT8_C(251), UINT8_C( 13), UINT8_C(224), UINT8_C(125), UINT8_C(203), UINT8_C(136), UINT8_C(148) }, { UINT8_C(159), UINT8_C(210), UINT8_C( 46), UINT8_C(179), UINT8_C(245), UINT8_C( 87), UINT8_C( 42), UINT8_C( 82), UINT8_C(211), UINT8_C(248), UINT8_C( 7), UINT8_C(208), UINT8_C(111), UINT8_C(180), UINT8_C(122), UINT8_C(144) }, { UINT8_C(157), UINT8_C(208), UINT8_C( 45), UINT8_C(179), UINT8_C(244), UINT8_C( 84), UINT8_C( 41), UINT8_C( 79), UINT8_C(207), UINT8_C(247), UINT8_C( 6), UINT8_C(205), UINT8_C(109), UINT8_C(176), UINT8_C(120), UINT8_C(143) }, { UINT8_C(155), UINT8_C(207), UINT8_C( 44), UINT8_C(179), UINT8_C(243), UINT8_C( 82), UINT8_C( 40), UINT8_C( 77), UINT8_C(204), UINT8_C(247), UINT8_C( 5), UINT8_C(203), UINT8_C(107), UINT8_C(173), UINT8_C(118), UINT8_C(143) } }, { { UINT8_C(199), UINT8_C(188), UINT8_C(222), UINT8_C(186), UINT8_C( 15), UINT8_C( 6), UINT8_C( 7), UINT8_C(219), UINT8_C(253), UINT8_C( 12), UINT8_C(166), UINT8_C(104), UINT8_C(185), UINT8_C( 29), UINT8_C(248), UINT8_C( 58) }, { UINT8_C(145), UINT8_C( 78), UINT8_C( 63), UINT8_C(229), UINT8_C(244), UINT8_C(143), UINT8_C(142), UINT8_C(232), UINT8_C(183), UINT8_C(212), UINT8_C(146), UINT8_C( 71), UINT8_C(203), UINT8_C( 34), UINT8_C(114), UINT8_C(146) }, { UINT8_C( 15), UINT8_C(227), UINT8_C(253), UINT8_C( 44), UINT8_C(137), UINT8_C( 77), UINT8_C( 78), UINT8_C( 79), UINT8_C( 88), UINT8_C(118), UINT8_C(239), UINT8_C(139), UINT8_C( 30), UINT8_C( 46), UINT8_C( 49), UINT8_C(131) }, { UINT8_C(217), UINT8_C(197), UINT8_C(229), UINT8_C(214), UINT8_C( 45), UINT8_C( 23), UINT8_C( 24), UINT8_C(248), UINT8_C( 19), UINT8_C( 38), UINT8_C(184), UINT8_C(112), UINT8_C(210), UINT8_C( 33), UINT8_C( 6), UINT8_C( 76) }, { UINT8_C(203), UINT8_C(190), UINT8_C(223), UINT8_C(193), UINT8_C( 22), UINT8_C( 10), UINT8_C( 11), UINT8_C(226), UINT8_C( 2), UINT8_C( 18), UINT8_C(170), UINT8_C(106), UINT8_C(191), UINT8_C( 30), UINT8_C(251), UINT8_C( 62) }, { UINT8_C(201), UINT8_C(189), UINT8_C(222), UINT8_C(189), UINT8_C( 18), UINT8_C( 8), UINT8_C( 9), UINT8_C(222), UINT8_MAX, UINT8_C( 15), UINT8_C(168), UINT8_C(105), UINT8_C(188), UINT8_C( 29), UINT8_C(249), UINT8_C( 60) }, { UINT8_C(199), UINT8_C(188), UINT8_C(222), UINT8_C(186), UINT8_C( 15), UINT8_C( 6), UINT8_C( 7), UINT8_C(219), UINT8_C(253), UINT8_C( 12), UINT8_C(166), UINT8_C(104), UINT8_C(185), UINT8_C( 29), UINT8_C(248), UINT8_C( 58) } }, { { UINT8_C(223), UINT8_C( 80), UINT8_C( 76), UINT8_C(238), UINT8_C( 86), UINT8_C( 83), UINT8_C(201), UINT8_C( 84), UINT8_C( 95), UINT8_C(111), UINT8_C(188), UINT8_C( 25), UINT8_C(140), UINT8_C(180), UINT8_C( 83), UINT8_C( 29) }, { UINT8_C( 3), UINT8_C(147), UINT8_C( 2), UINT8_C(247), UINT8_C( 34), UINT8_C(145), UINT8_C(223), UINT8_C(217), UINT8_C(101), UINT8_C(113), UINT8_C( 32), UINT8_C( 48), UINT8_C(148), UINT8_C(146), UINT8_C(195), UINT8_C(115) }, { UINT8_C(224), UINT8_C(153), UINT8_C( 77), UINT8_C(105), UINT8_C(103), UINT8_C(155), UINT8_C( 56), UINT8_C(192), UINT8_C(145), UINT8_C(167), UINT8_C(204), UINT8_C( 49), UINT8_C(214), UINT8_C(253), UINT8_C(180), UINT8_C( 86) }, { UINT8_C(223), UINT8_C( 98), UINT8_C( 76), UINT8_C( 12), UINT8_C( 90), UINT8_C(101), UINT8_C(228), UINT8_C(111), UINT8_C(107), UINT8_C(125), UINT8_C(192), UINT8_C( 31), UINT8_C(158), UINT8_C(198), UINT8_C(107), UINT8_C( 43) }, { UINT8_C(223), UINT8_C( 84), UINT8_C( 76), UINT8_C(245), UINT8_C( 87), UINT8_C( 87), UINT8_C(207), UINT8_C( 90), UINT8_C( 98), UINT8_C(114), UINT8_C(189), UINT8_C( 26), UINT8_C(144), UINT8_C(184), UINT8_C( 89), UINT8_C( 32) }, { UINT8_C(223), UINT8_C( 82), UINT8_C( 76), UINT8_C(241), UINT8_C( 86), UINT8_C( 85), UINT8_C(204), UINT8_C( 87), UINT8_C( 96), UINT8_C(112), UINT8_C(188), UINT8_C( 25), UINT8_C(142), UINT8_C(182), UINT8_C( 86), UINT8_C( 30) }, { UINT8_C(223), UINT8_C( 80), UINT8_C( 76), UINT8_C(238), UINT8_C( 86), UINT8_C( 83), UINT8_C(201), UINT8_C( 84), UINT8_C( 95), UINT8_C(111), UINT8_C(188), UINT8_C( 25), UINT8_C(140), UINT8_C(180), UINT8_C( 83), UINT8_C( 29) } }, { { UINT8_C(226), UINT8_C( 15), UINT8_C( 97), UINT8_C( 57), UINT8_C( 99), UINT8_C( 42), UINT8_C(141), UINT8_C(194), UINT8_C(153), UINT8_C( 73), UINT8_C(219), UINT8_C( 38), UINT8_C(254), UINT8_C( 47), UINT8_C( 67), UINT8_C( 1) }, { UINT8_C(194), UINT8_C( 70), UINT8_C(248), UINT8_C(228), UINT8_C(215), UINT8_C(215), UINT8_C(190), UINT8_C( 60), UINT8_C( 72), UINT8_C(222), UINT8_C(108), UINT8_C(220), UINT8_C(113), UINT8_C( 47), UINT8_C( 79), UINT8_C( 83) }, { UINT8_C( 67), UINT8_C( 50), UINT8_C(221), UINT8_C(171), UINT8_C(206), UINT8_C(149), UINT8_C(236), UINT8_C(224), UINT8_C(189), UINT8_C(184), UINT8_C( 17), UINT8_C(148), UINT8_C( 54), UINT8_C( 70), UINT8_C(106), UINT8_C( 42) }, { UINT8_C(250), UINT8_C( 23), UINT8_C(128), UINT8_C( 85), UINT8_C(125), UINT8_C( 68), UINT8_C(164), UINT8_C(201), UINT8_C(162), UINT8_C(100), UINT8_C(232), UINT8_C( 65), UINT8_C( 12), UINT8_C( 52), UINT8_C( 76), UINT8_C( 11) }, { UINT8_C(232), UINT8_C( 17), UINT8_C(104), UINT8_C( 64), UINT8_C(105), UINT8_C( 48), UINT8_C(146), UINT8_C(195), UINT8_C(155), UINT8_C( 79), UINT8_C(222), UINT8_C( 44), UINT8_C( 1), UINT8_C( 48), UINT8_C( 69), UINT8_C( 3) }, { UINT8_C(229), UINT8_C( 16), UINT8_C(100), UINT8_C( 60), UINT8_C(102), UINT8_C( 45), UINT8_C(143), UINT8_C(194), UINT8_C(154), UINT8_C( 76), UINT8_C(220), UINT8_C( 41), UINT8_MAX, UINT8_C( 47), UINT8_C( 68), UINT8_C( 2) }, { UINT8_C(226), UINT8_C( 15), UINT8_C( 97), UINT8_C( 57), UINT8_C( 99), UINT8_C( 42), UINT8_C(141), UINT8_C(194), UINT8_C(153), UINT8_C( 73), UINT8_C(219), UINT8_C( 38), UINT8_C(254), UINT8_C( 47), UINT8_C( 67), UINT8_C( 1) } }, { { UINT8_C( 63), UINT8_C(176), UINT8_C(140), UINT8_C(162), UINT8_C(218), UINT8_C( 25), UINT8_C(100), UINT8_C(116), UINT8_C( 99), UINT8_C( 64), UINT8_C(154), UINT8_C( 97), UINT8_C(111), UINT8_C(221), UINT8_C( 98), UINT8_C( 49) }, { UINT8_C( 35), UINT8_C( 90), UINT8_C( 21), UINT8_C(250), UINT8_C( 49), UINT8_C(211), UINT8_C( 54), UINT8_C(121), UINT8_C(178), UINT8_C(163), UINT8_C( 86), UINT8_C( 35), UINT8_C(210), UINT8_C(165), UINT8_C(118), UINT8_C( 17) }, { UINT8_C( 80), UINT8_C(221), UINT8_C(150), UINT8_C( 31), UINT8_C(242), UINT8_C(130), UINT8_C(127), UINT8_C(176), UINT8_C(188), UINT8_C(145), UINT8_C(197), UINT8_C(114), UINT8_C(216), UINT8_C( 47), UINT8_C(157), UINT8_C( 57) }, { UINT8_C( 67), UINT8_C(187), UINT8_C(142), UINT8_C(193), UINT8_C(224), UINT8_C( 51), UINT8_C(106), UINT8_C(131), UINT8_C(121), UINT8_C( 84), UINT8_C(164), UINT8_C(101), UINT8_C(137), UINT8_C(241), UINT8_C(112), UINT8_C( 51) }, { UINT8_C( 64), UINT8_C(178), UINT8_C(140), UINT8_C(169), UINT8_C(219), UINT8_C( 31), UINT8_C(101), UINT8_C(119), UINT8_C(104), UINT8_C( 69), UINT8_C(156), UINT8_C( 98), UINT8_C(117), UINT8_C(226), UINT8_C(101), UINT8_C( 49) }, { UINT8_C( 63), UINT8_C(177), UINT8_C(140), UINT8_C(165), UINT8_C(218), UINT8_C( 28), UINT8_C(100), UINT8_C(117), UINT8_C(101), UINT8_C( 66), UINT8_C(155), UINT8_C( 97), UINT8_C(114), UINT8_C(223), UINT8_C( 99), UINT8_C( 49) }, { UINT8_C( 63), UINT8_C(176), UINT8_C(140), UINT8_C(162), UINT8_C(218), UINT8_C( 25), UINT8_C(100), UINT8_C(116), UINT8_C( 99), UINT8_C( 64), UINT8_C(154), UINT8_C( 97), UINT8_C(111), UINT8_C(221), UINT8_C( 98), UINT8_C( 49) } }, { { UINT8_C( 86), UINT8_C( 3), UINT8_C(179), UINT8_C( 48), UINT8_C( 28), UINT8_C( 24), UINT8_C(164), UINT8_C(127), UINT8_C( 88), UINT8_C( 62), UINT8_C(224), UINT8_C(199), UINT8_C( 28), UINT8_C( 66), UINT8_C(248), UINT8_C( 63) }, { UINT8_C(156), UINT8_C( 13), UINT8_C( 58), UINT8_C(205), UINT8_C(225), UINT8_C(112), UINT8_C( 71), UINT8_C(147), UINT8_C( 19), UINT8_C(157), UINT8_C(182), UINT8_C(230), UINT8_C( 66), UINT8_C( 44), UINT8_C(247), UINT8_C(152) }, { UINT8_C(164), UINT8_C( 9), UINT8_C(208), UINT8_C(150), UINT8_C(140), UINT8_C( 80), UINT8_C(199), UINT8_C(200), UINT8_C( 97), UINT8_C(140), UINT8_C( 59), UINT8_C( 58), UINT8_C( 61), UINT8_C( 88), UINT8_C(115), UINT8_C(139) }, { UINT8_C(105), UINT8_C( 4), UINT8_C(186), UINT8_C( 73), UINT8_C( 56), UINT8_C( 38), UINT8_C(172), UINT8_C(145), UINT8_C( 90), UINT8_C( 81), UINT8_C(246), UINT8_C(227), UINT8_C( 36), UINT8_C( 71), UINT8_C( 22), UINT8_C( 82) }, { UINT8_C( 90), UINT8_C( 3), UINT8_C(180), UINT8_C( 54), UINT8_C( 35), UINT8_C( 27), UINT8_C(166), UINT8_C(131), UINT8_C( 88), UINT8_C( 66), UINT8_C(229), UINT8_C(206), UINT8_C( 30), UINT8_C( 67), UINT8_MAX, UINT8_C( 67) }, { UINT8_C( 88), UINT8_C( 3), UINT8_C(179), UINT8_C( 51), UINT8_C( 31), UINT8_C( 25), UINT8_C(165), UINT8_C(129), UINT8_C( 88), UINT8_C( 64), UINT8_C(226), UINT8_C(202), UINT8_C( 29), UINT8_C( 66), UINT8_C(251), UINT8_C( 65) }, { UINT8_C( 86), UINT8_C( 3), UINT8_C(179), UINT8_C( 48), UINT8_C( 28), UINT8_C( 24), UINT8_C(164), UINT8_C(127), UINT8_C( 88), UINT8_C( 62), UINT8_C(224), UINT8_C(199), UINT8_C( 28), UINT8_C( 66), UINT8_C(248), UINT8_C( 63) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r1 = simde_vsraq_n_u8(a, b, 1); simde_uint8x16_t r3 = simde_vsraq_n_u8(a, b, 3); simde_uint8x16_t r5 = simde_vsraq_n_u8(a, b, 5); simde_uint8x16_t r6 = simde_vsraq_n_u8(a, b, 6); simde_uint8x16_t r8 = simde_vsraq_n_u8(a, b, 8); simde_test_arm_neon_assert_equal_u8x16(r1, simde_vld1q_u8(test_vec[i].r1)); simde_test_arm_neon_assert_equal_u8x16(r3, simde_vld1q_u8(test_vec[i].r3)); simde_test_arm_neon_assert_equal_u8x16(r5, simde_vld1q_u8(test_vec[i].r5)); simde_test_arm_neon_assert_equal_u8x16(r6, simde_vld1q_u8(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u8x16(r8, simde_vld1q_u8(test_vec[i].r8)); } return 0; } static int test_simde_vsraq_n_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint16_t b[8]; uint16_t r3[8]; uint16_t r6[8]; uint16_t r10[8]; uint16_t r13[8]; uint16_t r16[8]; } test_vec[] = { { { UINT16_C(29593), UINT16_C(36810), UINT16_C(36769), UINT16_C(28231), UINT16_C( 6255), UINT16_C(25272), UINT16_C(37052), UINT16_C(22697) }, { UINT16_C( 6890), UINT16_C(46431), UINT16_C(39077), UINT16_C(28289), UINT16_C(61361), UINT16_C( 1761), UINT16_C(17578), UINT16_C(17438) }, { UINT16_C(30454), UINT16_C(42613), UINT16_C(41653), UINT16_C(31767), UINT16_C(13925), UINT16_C(25492), UINT16_C(39249), UINT16_C(24876) }, { UINT16_C(29700), UINT16_C(37535), UINT16_C(37379), UINT16_C(28673), UINT16_C( 7213), UINT16_C(25299), UINT16_C(37326), UINT16_C(22969) }, { UINT16_C(29599), UINT16_C(36855), UINT16_C(36807), UINT16_C(28258), UINT16_C( 6314), UINT16_C(25273), UINT16_C(37069), UINT16_C(22714) }, { UINT16_C(29593), UINT16_C(36815), UINT16_C(36773), UINT16_C(28234), UINT16_C( 6262), UINT16_C(25272), UINT16_C(37054), UINT16_C(22699) }, { UINT16_C(29593), UINT16_C(36810), UINT16_C(36769), UINT16_C(28231), UINT16_C( 6255), UINT16_C(25272), UINT16_C(37052), UINT16_C(22697) } }, { { UINT16_C(59575), UINT16_C(22739), UINT16_C( 6775), UINT16_C(59079), UINT16_C(32562), UINT16_C(61000), UINT16_C(61711), UINT16_C(64070) }, { UINT16_C(42508), UINT16_C(45487), UINT16_C(12350), UINT16_C(61215), UINT16_C( 287), UINT16_C(51957), UINT16_C( 4933), UINT16_C(64782) }, { UINT16_C(64888), UINT16_C(28424), UINT16_C( 8318), UINT16_C( 1194), UINT16_C(32597), UINT16_C( 1958), UINT16_C(62327), UINT16_C( 6631) }, { UINT16_C(60239), UINT16_C(23449), UINT16_C( 6967), UINT16_C(60035), UINT16_C(32566), UINT16_C(61811), UINT16_C(61788), UINT16_C(65082) }, { UINT16_C(59616), UINT16_C(22783), UINT16_C( 6787), UINT16_C(59138), UINT16_C(32562), UINT16_C(61050), UINT16_C(61715), UINT16_C(64133) }, { UINT16_C(59580), UINT16_C(22744), UINT16_C( 6776), UINT16_C(59086), UINT16_C(32562), UINT16_C(61006), UINT16_C(61711), UINT16_C(64077) }, { UINT16_C(59575), UINT16_C(22739), UINT16_C( 6775), UINT16_C(59079), UINT16_C(32562), UINT16_C(61000), UINT16_C(61711), UINT16_C(64070) } }, { { UINT16_C(57851), UINT16_C(29269), UINT16_C( 7419), UINT16_C(11608), UINT16_C(41371), UINT16_C(43803), UINT16_C(24978), UINT16_C(40613) }, { UINT16_C(21511), UINT16_C(17744), UINT16_C(28548), UINT16_C(42036), UINT16_C(10864), UINT16_C(46702), UINT16_C(31805), UINT16_C(14771) }, { UINT16_C(60539), UINT16_C(31487), UINT16_C(10987), UINT16_C(16862), UINT16_C(42729), UINT16_C(49640), UINT16_C(28953), UINT16_C(42459) }, { UINT16_C(58187), UINT16_C(29546), UINT16_C( 7865), UINT16_C(12264), UINT16_C(41540), UINT16_C(44532), UINT16_C(25474), UINT16_C(40843) }, { UINT16_C(57872), UINT16_C(29286), UINT16_C( 7446), UINT16_C(11649), UINT16_C(41381), UINT16_C(43848), UINT16_C(25009), UINT16_C(40627) }, { UINT16_C(57853), UINT16_C(29271), UINT16_C( 7422), UINT16_C(11613), UINT16_C(41372), UINT16_C(43808), UINT16_C(24981), UINT16_C(40614) }, { UINT16_C(57851), UINT16_C(29269), UINT16_C( 7419), UINT16_C(11608), UINT16_C(41371), UINT16_C(43803), UINT16_C(24978), UINT16_C(40613) } }, { { UINT16_C( 2141), UINT16_C(22699), UINT16_C( 1061), UINT16_C(49285), UINT16_C(41125), UINT16_C(14187), UINT16_C( 4097), UINT16_C( 2518) }, { UINT16_C( 9828), UINT16_C(59726), UINT16_C(33685), UINT16_C( 1677), UINT16_C(64429), UINT16_C(60092), UINT16_C(28535), UINT16_C(54307) }, { UINT16_C( 3369), UINT16_C(30164), UINT16_C( 5271), UINT16_C(49494), UINT16_C(49178), UINT16_C(21698), UINT16_C( 7663), UINT16_C( 9306) }, { UINT16_C( 2294), UINT16_C(23632), UINT16_C( 1587), UINT16_C(49311), UINT16_C(42131), UINT16_C(15125), UINT16_C( 4542), UINT16_C( 3366) }, { UINT16_C( 2150), UINT16_C(22757), UINT16_C( 1093), UINT16_C(49286), UINT16_C(41187), UINT16_C(14245), UINT16_C( 4124), UINT16_C( 2571) }, { UINT16_C( 2142), UINT16_C(22706), UINT16_C( 1065), UINT16_C(49285), UINT16_C(41132), UINT16_C(14194), UINT16_C( 4100), UINT16_C( 2524) }, { UINT16_C( 2141), UINT16_C(22699), UINT16_C( 1061), UINT16_C(49285), UINT16_C(41125), UINT16_C(14187), UINT16_C( 4097), UINT16_C( 2518) } }, { { UINT16_C(53111), UINT16_C(39980), UINT16_C(45523), UINT16_C(30813), UINT16_C(51281), UINT16_C(21167), UINT16_C(34265), UINT16_C(15707) }, { UINT16_C(43691), UINT16_C(16678), UINT16_C(45869), UINT16_C(55879), UINT16_C( 942), UINT16_C( 9668), UINT16_C(59506), UINT16_C(59897) }, { UINT16_C(58572), UINT16_C(42064), UINT16_C(51256), UINT16_C(37797), UINT16_C(51398), UINT16_C(22375), UINT16_C(41703), UINT16_C(23194) }, { UINT16_C(53793), UINT16_C(40240), UINT16_C(46239), UINT16_C(31686), UINT16_C(51295), UINT16_C(21318), UINT16_C(35194), UINT16_C(16642) }, { UINT16_C(53153), UINT16_C(39996), UINT16_C(45567), UINT16_C(30867), UINT16_C(51281), UINT16_C(21176), UINT16_C(34323), UINT16_C(15765) }, { UINT16_C(53116), UINT16_C(39982), UINT16_C(45528), UINT16_C(30819), UINT16_C(51281), UINT16_C(21168), UINT16_C(34272), UINT16_C(15714) }, { UINT16_C(53111), UINT16_C(39980), UINT16_C(45523), UINT16_C(30813), UINT16_C(51281), UINT16_C(21167), UINT16_C(34265), UINT16_C(15707) } }, { { UINT16_C( 9655), UINT16_C(35462), UINT16_C(58326), UINT16_C( 9986), UINT16_C(45483), UINT16_C(33914), UINT16_C(54583), UINT16_C(58050) }, { UINT16_C(59519), UINT16_C(44067), UINT16_C(27292), UINT16_C(19078), UINT16_C(19309), UINT16_C(57200), UINT16_C(26931), UINT16_C(60105) }, { UINT16_C(17094), UINT16_C(40970), UINT16_C(61737), UINT16_C(12370), UINT16_C(47896), UINT16_C(41064), UINT16_C(57949), UINT16_C( 27) }, { UINT16_C(10584), UINT16_C(36150), UINT16_C(58752), UINT16_C(10284), UINT16_C(45784), UINT16_C(34807), UINT16_C(55003), UINT16_C(58989) }, { UINT16_C( 9713), UINT16_C(35505), UINT16_C(58352), UINT16_C(10004), UINT16_C(45501), UINT16_C(33969), UINT16_C(54609), UINT16_C(58108) }, { UINT16_C( 9662), UINT16_C(35467), UINT16_C(58329), UINT16_C( 9988), UINT16_C(45485), UINT16_C(33920), UINT16_C(54586), UINT16_C(58057) }, { UINT16_C( 9655), UINT16_C(35462), UINT16_C(58326), UINT16_C( 9986), UINT16_C(45483), UINT16_C(33914), UINT16_C(54583), UINT16_C(58050) } }, { { UINT16_C(20367), UINT16_C(25972), UINT16_C(30258), UINT16_C(56717), UINT16_C( 1831), UINT16_C(24162), UINT16_C( 9436), UINT16_C(23617) }, { UINT16_C(25612), UINT16_C(43016), UINT16_C(36815), UINT16_C(15603), UINT16_C(25562), UINT16_C( 3356), UINT16_C(58828), UINT16_C(23543) }, { UINT16_C(23568), UINT16_C(31349), UINT16_C(34859), UINT16_C(58667), UINT16_C( 5026), UINT16_C(24581), UINT16_C(16789), UINT16_C(26559) }, { UINT16_C(20767), UINT16_C(26644), UINT16_C(30833), UINT16_C(56960), UINT16_C( 2230), UINT16_C(24214), UINT16_C(10355), UINT16_C(23984) }, { UINT16_C(20392), UINT16_C(26014), UINT16_C(30293), UINT16_C(56732), UINT16_C( 1855), UINT16_C(24165), UINT16_C( 9493), UINT16_C(23639) }, { UINT16_C(20370), UINT16_C(25977), UINT16_C(30262), UINT16_C(56718), UINT16_C( 1834), UINT16_C(24162), UINT16_C( 9443), UINT16_C(23619) }, { UINT16_C(20367), UINT16_C(25972), UINT16_C(30258), UINT16_C(56717), UINT16_C( 1831), UINT16_C(24162), UINT16_C( 9436), UINT16_C(23617) } }, { { UINT16_C(27444), UINT16_C(26305), UINT16_C(20193), UINT16_C( 2115), UINT16_C(42325), UINT16_C(12647), UINT16_C(43209), UINT16_C(54925) }, { UINT16_C(38412), UINT16_C(56190), UINT16_C(28965), UINT16_C(65304), UINT16_C(13524), UINT16_C(41228), UINT16_C( 793), UINT16_C(19964) }, { UINT16_C(32245), UINT16_C(33328), UINT16_C(23813), UINT16_C(10278), UINT16_C(44015), UINT16_C(17800), UINT16_C(43308), UINT16_C(57420) }, { UINT16_C(28044), UINT16_C(27182), UINT16_C(20645), UINT16_C( 3135), UINT16_C(42536), UINT16_C(13291), UINT16_C(43221), UINT16_C(55236) }, { UINT16_C(27481), UINT16_C(26359), UINT16_C(20221), UINT16_C( 2178), UINT16_C(42338), UINT16_C(12687), UINT16_C(43209), UINT16_C(54944) }, { UINT16_C(27448), UINT16_C(26311), UINT16_C(20196), UINT16_C( 2122), UINT16_C(42326), UINT16_C(12652), UINT16_C(43209), UINT16_C(54927) }, { UINT16_C(27444), UINT16_C(26305), UINT16_C(20193), UINT16_C( 2115), UINT16_C(42325), UINT16_C(12647), UINT16_C(43209), UINT16_C(54925) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r3 = simde_vsraq_n_u16(a, b, 3); simde_uint16x8_t r6 = simde_vsraq_n_u16(a, b, 6); simde_uint16x8_t r10 = simde_vsraq_n_u16(a, b, 10); simde_uint16x8_t r13 = simde_vsraq_n_u16(a, b, 13); simde_uint16x8_t r16 = simde_vsraq_n_u16(a, b, 16); simde_test_arm_neon_assert_equal_u16x8(r3, simde_vld1q_u16(test_vec[i].r3)); simde_test_arm_neon_assert_equal_u16x8(r6, simde_vld1q_u16(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u16x8(r10, simde_vld1q_u16(test_vec[i].r10)); simde_test_arm_neon_assert_equal_u16x8(r13, simde_vld1q_u16(test_vec[i].r13)); simde_test_arm_neon_assert_equal_u16x8(r16, simde_vld1q_u16(test_vec[i].r16)); } return 0; } static int test_simde_vsraq_n_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint32_t b[4]; uint32_t r6[4]; uint32_t r13[4]; uint32_t r19[4]; uint32_t r26[4]; uint32_t r32[4]; } test_vec[] = { { { UINT32_C(1979807148), UINT32_C(3348053427), UINT32_C(1572125531), UINT32_C( 385796267) }, { UINT32_C( 891890417), UINT32_C(3586666332), UINT32_C(3784648901), UINT32_C(1110980757) }, { UINT32_C(1993742935), UINT32_C(3404095088), UINT32_C(1631260670), UINT32_C( 403155341) }, { UINT32_C(1979916021), UINT32_C(3348491252), UINT32_C(1572587524), UINT32_C( 385931884) }, { UINT32_C(1979808849), UINT32_C(3348060268), UINT32_C(1572132749), UINT32_C( 385798386) }, { UINT32_C(1979807161), UINT32_C(3348053480), UINT32_C(1572125587), UINT32_C( 385796283) }, { UINT32_C(1979807148), UINT32_C(3348053427), UINT32_C(1572125531), UINT32_C( 385796267) } }, { { UINT32_C(1622686125), UINT32_C(3525789558), UINT32_C(2905594882), UINT32_C(2529373604) }, { UINT32_C(3033263192), UINT32_C(3767112475), UINT32_C(1371676347), UINT32_C( 60029526) }, { UINT32_C(1670080862), UINT32_C(3584650690), UINT32_C(2927027324), UINT32_C(2530311565) }, { UINT32_C(1623056396), UINT32_C(3526249410), UINT32_C(2905762322), UINT32_C(2529380931) }, { UINT32_C(1622691910), UINT32_C(3525796743), UINT32_C(2905597498), UINT32_C(2529373718) }, { UINT32_C(1622686170), UINT32_C(3525789614), UINT32_C(2905594902), UINT32_C(2529373604) }, { UINT32_C(1622686125), UINT32_C(3525789558), UINT32_C(2905594882), UINT32_C(2529373604) } }, { { UINT32_C(2858699572), UINT32_C(2491190162), UINT32_C( 205630567), UINT32_C( 832701913) }, { UINT32_C( 233139697), UINT32_C(3152899840), UINT32_C(3826036621), UINT32_C(3739721642) }, { UINT32_C(2862342379), UINT32_C(2540454222), UINT32_C( 265412389), UINT32_C( 891135063) }, { UINT32_C(2858728031), UINT32_C(2491575037), UINT32_C( 206097612), UINT32_C( 833158421) }, { UINT32_C(2858700016), UINT32_C(2491196175), UINT32_C( 205637864), UINT32_C( 832709045) }, { UINT32_C(2858699575), UINT32_C(2491190208), UINT32_C( 205630624), UINT32_C( 832701968) }, { UINT32_C(2858699572), UINT32_C(2491190162), UINT32_C( 205630567), UINT32_C( 832701913) } }, { { UINT32_C(2089307114), UINT32_C(1041237463), UINT32_C(2320126641), UINT32_C(1220340823) }, { UINT32_C(1498784089), UINT32_C(2652193552), UINT32_C(2625774066), UINT32_C(2876926401) }, { UINT32_C(2112725615), UINT32_C(1082677987), UINT32_C(2361154360), UINT32_C(1265292798) }, { UINT32_C(2089490071), UINT32_C(1041561217), UINT32_C(2320447170), UINT32_C(1220692010) }, { UINT32_C(2089309972), UINT32_C(1041242521), UINT32_C(2320131649), UINT32_C(1220346310) }, { UINT32_C(2089307136), UINT32_C(1041237502), UINT32_C(2320126680), UINT32_C(1220340865) }, { UINT32_C(2089307114), UINT32_C(1041237463), UINT32_C(2320126641), UINT32_C(1220340823) } }, { { UINT32_C(2351432629), UINT32_C(3117037576), UINT32_C(3779270026), UINT32_C(1529544449) }, { UINT32_C(2981396385), UINT32_C(3041905090), UINT32_C(2891043307), UINT32_C(4032285755) }, { UINT32_C(2398016947), UINT32_C(3164567343), UINT32_C(3824442577), UINT32_C(1592548913) }, { UINT32_C(2351796568), UINT32_C(3117408902), UINT32_C(3779622936), UINT32_C(1530036671) }, { UINT32_C(2351438315), UINT32_C(3117043377), UINT32_C(3779275540), UINT32_C(1529552139) }, { UINT32_C(2351432673), UINT32_C(3117037621), UINT32_C(3779270069), UINT32_C(1529544509) }, { UINT32_C(2351432629), UINT32_C(3117037576), UINT32_C(3779270026), UINT32_C(1529544449) } }, { { UINT32_C(3615260623), UINT32_C(1116751544), UINT32_C(1562694491), UINT32_C(1958235859) }, { UINT32_C(2418371789), UINT32_C( 558200118), UINT32_C(2177734214), UINT32_C( 829498466) }, { UINT32_C(3653047682), UINT32_C(1125473420), UINT32_C(1596721588), UINT32_C(1971196772) }, { UINT32_C(3615555834), UINT32_C(1116819683), UINT32_C(1562960327), UINT32_C(1958337116) }, { UINT32_C(3615265235), UINT32_C(1116752608), UINT32_C(1562698644), UINT32_C(1958237441) }, { UINT32_C(3615260659), UINT32_C(1116751552), UINT32_C(1562694523), UINT32_C(1958235871) }, { UINT32_C(3615260623), UINT32_C(1116751544), UINT32_C(1562694491), UINT32_C(1958235859) } }, { { UINT32_C(1544088996), UINT32_C(2409535540), UINT32_C(1072480876), UINT32_C(3736314896) }, { UINT32_C(1198446609), UINT32_C(2489889613), UINT32_C(2887071049), UINT32_C(4259153753) }, { UINT32_C(1562814724), UINT32_C(2448440065), UINT32_C(1117591361), UINT32_C(3802864173) }, { UINT32_C(1544235290), UINT32_C(2409839481), UINT32_C(1072833301), UINT32_C(3736834812) }, { UINT32_C(1544091281), UINT32_C(2409540289), UINT32_C(1072486382), UINT32_C(3736323019) }, { UINT32_C(1544089013), UINT32_C(2409535577), UINT32_C(1072480919), UINT32_C(3736314959) }, { UINT32_C(1544088996), UINT32_C(2409535540), UINT32_C(1072480876), UINT32_C(3736314896) } }, { { UINT32_C(2824463988), UINT32_C(3929602174), UINT32_C(3408471226), UINT32_C(3668565193) }, { UINT32_C( 35723189), UINT32_C( 328632778), UINT32_C( 398437566), UINT32_C(2803211571) }, { UINT32_C(2825022162), UINT32_C(3934737061), UINT32_C(3414696812), UINT32_C(3712365373) }, { UINT32_C(2824468348), UINT32_C(3929642290), UINT32_C(3408519863), UINT32_C(3668907381) }, { UINT32_C(2824464056), UINT32_C(3929602800), UINT32_C(3408471985), UINT32_C(3668570539) }, { UINT32_C(2824463988), UINT32_C(3929602178), UINT32_C(3408471231), UINT32_C(3668565234) }, { UINT32_C(2824463988), UINT32_C(3929602174), UINT32_C(3408471226), UINT32_C(3668565193) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r6 = simde_vsraq_n_u32(a, b, 6); simde_uint32x4_t r13 = simde_vsraq_n_u32(a, b, 13); simde_uint32x4_t r19 = simde_vsraq_n_u32(a, b, 19); simde_uint32x4_t r26 = simde_vsraq_n_u32(a, b, 26); simde_uint32x4_t r32 = simde_vsraq_n_u32(a, b, 32); simde_test_arm_neon_assert_equal_u32x4(r6, simde_vld1q_u32(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u32x4(r13, simde_vld1q_u32(test_vec[i].r13)); simde_test_arm_neon_assert_equal_u32x4(r19, simde_vld1q_u32(test_vec[i].r19)); simde_test_arm_neon_assert_equal_u32x4(r26, simde_vld1q_u32(test_vec[i].r26)); simde_test_arm_neon_assert_equal_u32x4(r32, simde_vld1q_u32(test_vec[i].r32)); } return 0; } static int test_simde_vsraq_n_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; uint64_t b[2]; uint64_t r13[2]; uint64_t r26[2]; uint64_t r39[2]; uint64_t r52[2]; uint64_t r64[2]; } test_vec[] = { { { UINT64_C(17874051099209012816), UINT64_C( 8625195464406836271) }, { UINT64_C(16725053657440447411), UINT64_C( 14759499813092458) }, { UINT64_C(17876092731735555839), UINT64_C( 8625197266103590798) }, { UINT64_C(17874051348431733341), UINT64_C( 8625195464626769956) }, { UINT64_C(17874051099239435511), UINT64_C( 8625195464406863118) }, { UINT64_C(17874051099209016529), UINT64_C( 8625195464406836274) }, { UINT64_C(17874051099209012816), UINT64_C( 8625195464406836271) } }, { { UINT64_C( 5675430669503806469), UINT64_C(10942796997914856402) }, { UINT64_C(12619904489882514058), UINT64_C( 113922584089103844) }, { UINT64_C( 5676971185188606580), UINT64_C(10942810904480296966) }, { UINT64_C( 5675430857555037523), UINT64_C(10942796999612435191) }, { UINT64_C( 5675430669526761941), UINT64_C(10942796997915063625) }, { UINT64_C( 5675430669503809271), UINT64_C(10942796997914856427) }, { UINT64_C( 5675430669503806469), UINT64_C(10942796997914856402) } }, { { UINT64_C(13107040498182042999), UINT64_C(18092920619843375770) }, { UINT64_C(12744720525670343657), UINT64_C(10944266200905180388) }, { UINT64_C(13108596250199336742), UINT64_C(18094256589838603453) }, { UINT64_C(13107040688093177922), UINT64_C(18092920782925650578) }, { UINT64_C(13107040498205225510), UINT64_C(18092920619863283274) }, { UINT64_C(13107040498182045828), UINT64_C(18092920619843378200) }, { UINT64_C(13107040498182042999), UINT64_C(18092920619843375770) } }, { { UINT64_C(15502817343082179981), UINT64_C( 2256422393928368100) }, { UINT64_C(13892246799416984912), UINT64_C( 8378808463846805392) }, { UINT64_C(15504513173990311937), UINT64_C( 2257445197695927524) }, { UINT64_C(15502817550092788883), UINT64_C( 2256422518782343632) }, { UINT64_C(15502817343107449830), UINT64_C( 2256422393943609063) }, { UINT64_C(15502817343082183065), UINT64_C( 2256422393928369960) }, { UINT64_C(15502817343082179981), UINT64_C( 2256422393928368100) } }, { { UINT64_C( 7728750387756561454), UINT64_C( 6097611131503876169) }, { UINT64_C(12192435071695777175), UINT64_C( 9558785026189546571) }, { UINT64_C( 7730238722115899317), UINT64_C( 6098777975379143447) }, { UINT64_C( 7728750569438001802), UINT64_C( 6097611273940872661) }, { UINT64_C( 7728750387778739364), UINT64_C( 6097611131521263497) }, { UINT64_C( 7728750387756564161), UINT64_C( 6097611131503878291) }, { UINT64_C( 7728750387756561454), UINT64_C( 6097611131503876169) } }, { { UINT64_C(15998810719909808917), UINT64_C( 8152356292189341897) }, { UINT64_C( 1849880820033295081), UINT64_C(13531050401365427693) }, { UINT64_C(15999036535439598137), UINT64_C( 8154008031740289825) }, { UINT64_C(15998810747475181205), UINT64_C( 8152356493817705049) }, { UINT64_C(15998810719913173830), UINT64_C( 8152356292213954734) }, { UINT64_C(15998810719909809327), UINT64_C( 8152356292189344901) }, { UINT64_C(15998810719909808917), UINT64_C( 8152356292189341897) } }, { { UINT64_C(12894191558988380030), UINT64_C( 3779261480348700199) }, { UINT64_C( 6637430016772101685), UINT64_C(15585100524618213352) }, { UINT64_C(12895001792144724280), UINT64_C( 3781163958440084258) }, { UINT64_C(12894191657893794622), UINT64_C( 3779261712584795338) }, { UINT64_C(12894191559000453444), UINT64_C( 3779261480377049331) }, { UINT64_C(12894191558988381503), UINT64_C( 3779261480348703659) }, { UINT64_C(12894191558988380030), UINT64_C( 3779261480348700199) } }, { { UINT64_C( 2790222957046898717), UINT64_C( 1128883820157155370) }, { UINT64_C(12446311941118550511), UINT64_C(15981169241923213718) }, { UINT64_C( 2791742282235023540), UINT64_C( 1130834646480632324) }, { UINT64_C( 2790223142511399220), UINT64_C( 1128884058295134310) }, { UINT64_C( 2790222957069538426), UINT64_C( 1128883820186224947) }, { UINT64_C( 2790222957046901480), UINT64_C( 1128883820157158918) }, { UINT64_C( 2790222957046898717), UINT64_C( 1128883820157155370) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_uint64x2_t r13 = simde_vsraq_n_u64(a, b, 13); simde_uint64x2_t r26 = simde_vsraq_n_u64(a, b, 26); simde_uint64x2_t r39 = simde_vsraq_n_u64(a, b, 39); simde_uint64x2_t r52 = simde_vsraq_n_u64(a, b, 52); simde_uint64x2_t r64 = simde_vsraq_n_u64(a, b, 64); simde_test_arm_neon_assert_equal_u64x2(r13, simde_vld1q_u64(test_vec[i].r13)); simde_test_arm_neon_assert_equal_u64x2(r26, simde_vld1q_u64(test_vec[i].r26)); simde_test_arm_neon_assert_equal_u64x2(r39, simde_vld1q_u64(test_vec[i].r39)); simde_test_arm_neon_assert_equal_u64x2(r52, simde_vld1q_u64(test_vec[i].r52)); simde_test_arm_neon_assert_equal_u64x2(r64, simde_vld1q_u64(test_vec[i].r64)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vsra_n_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vsra_n_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vsra_n_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vsra_n_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vsra_n_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vsra_n_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vsra_n_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vsra_n_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vsraq_n_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vsraq_n_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vsraq_n_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vsraq_n_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vsraq_n_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vsraq_n_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vsraq_n_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vsraq_n_u64) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/arm/neon/st1_lane.c000066400000000000000000001316611400333146700172010ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN st1_lane #include "test-neon.h" #include "../../../simde/arm/neon/st1_lane.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ static int test_simde_vst1_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a; simde_float32 val[2]; int lane; } test_vec[] = { { SIMDE_FLOAT32_C( 758.86), { SIMDE_FLOAT32_C( 758.86), SIMDE_FLOAT32_C( 425.86) }, INT32_C( 0) }, { SIMDE_FLOAT32_C( -723.17), { SIMDE_FLOAT32_C( -381.17), SIMDE_FLOAT32_C( -723.17) }, INT32_C( 1) }, { SIMDE_FLOAT32_C( -445.02), { SIMDE_FLOAT32_C( -445.02), SIMDE_FLOAT32_C( -699.72) }, INT32_C( 0) }, { SIMDE_FLOAT32_C( -700.54), { SIMDE_FLOAT32_C( -207.44), SIMDE_FLOAT32_C( -700.54) }, INT32_C( 1) }, { SIMDE_FLOAT32_C( -873.60), { SIMDE_FLOAT32_C( -873.60), SIMDE_FLOAT32_C( -557.72) }, INT32_C( 0) }, { SIMDE_FLOAT32_C( 122.92), { SIMDE_FLOAT32_C( 178.64), SIMDE_FLOAT32_C( 122.92) }, INT32_C( 1) }, { SIMDE_FLOAT32_C( 972.30), { SIMDE_FLOAT32_C( 972.30), SIMDE_FLOAT32_C( 277.03) }, INT32_C( 0) }, { SIMDE_FLOAT32_C( 73.76), { SIMDE_FLOAT32_C( 826.05), SIMDE_FLOAT32_C( 73.76) }, INT32_C( 1) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t val = simde_vld1_f32(test_vec[i].val); simde_float32 a; SIMDE_CONSTIFY_2_NO_RESULT_(simde_vst1_lane_f32, HEDLEY_UNREACHABLE(), test_vec[i].lane, &a, val); simde_assert_equal_f32(a, test_vec[i].a, 1); } return 0; #else fputc('\n', stdout); int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; for (int i = 0 ; i < 8 ; i++) { simde_float32 a; simde_float32x2_t val = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_vst1_lane_f32(&a, val, lanes[i]); simde_test_codegen_write_f32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, val, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lanes[i], SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vst1_lane_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a; simde_float64 val[1]; } test_vec[] = { { SIMDE_FLOAT64_C( -672.00), { SIMDE_FLOAT64_C( -672.00) } }, { SIMDE_FLOAT64_C( 684.22), { SIMDE_FLOAT64_C( 684.22) } }, { SIMDE_FLOAT64_C( -14.73), { SIMDE_FLOAT64_C( -14.73) } }, { SIMDE_FLOAT64_C( -277.48), { SIMDE_FLOAT64_C( -277.48) } }, { SIMDE_FLOAT64_C( -464.79), { SIMDE_FLOAT64_C( -464.79) } }, { SIMDE_FLOAT64_C( -299.87), { SIMDE_FLOAT64_C( -299.87) } }, { SIMDE_FLOAT64_C( -770.81), { SIMDE_FLOAT64_C( -770.81) } }, { SIMDE_FLOAT64_C( -134.46), { SIMDE_FLOAT64_C( -134.46) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t val = simde_vld1_f64(test_vec[i].val); simde_float64 a; simde_vst1_lane_f64(&a, val, 0); simde_assert_equal_f64(a, test_vec[i].a, 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64 a; simde_float64x1_t val = simde_test_arm_neon_random_f64x1(-1000.0, 1000.0); simde_vst1_lane_f64(&a, val, 0); int lane = 0; simde_test_codegen_write_f64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x1(2, val, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vst1_lane_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a; int8_t val[8]; int lane; } test_vec[] = { { INT8_C( 9), { INT8_C( 9), INT8_C( 57), INT8_C( 8), -INT8_C( 94), INT8_C( 72), INT8_C( 37), -INT8_C( 66), INT8_C( 77) }, INT32_C( 0) }, { INT8_C( 114), { -INT8_C( 76), INT8_C( 114), INT8_C( 11), -INT8_C( 90), -INT8_C( 75), INT8_C( 3), -INT8_C( 20), -INT8_C( 3) }, INT32_C( 1) }, { -INT8_C( 5), { INT8_C( 90), INT8_C( 34), -INT8_C( 5), -INT8_C( 23), -INT8_C( 121), -INT8_C( 42), INT8_C( 41), -INT8_C( 74) }, INT32_C( 2) }, { INT8_C( 15), { INT8_C( 123), INT8_C( 18), -INT8_C( 2), INT8_C( 15), INT8_C( 126), -INT8_C( 126), INT8_C( 31), -INT8_C( 121) }, INT32_C( 3) }, { INT8_C( 76), { -INT8_C( 69), INT8_C( 39), INT8_C( 42), INT8_C( 3), INT8_C( 76), -INT8_C( 24), INT8_C( 80), INT8_C( 1) }, INT32_C( 4) }, { -INT8_C( 109), { INT8_C( 90), INT8_C( 91), -INT8_C( 89), INT8_C( 16), INT8_C( 94), -INT8_C( 109), INT8_C( 13), -INT8_C( 72) }, INT32_C( 5) }, { -INT8_C( 13), { -INT8_C( 75), INT8_C( 8), -INT8_C( 94), INT8_C( 60), -INT8_C( 34), -INT8_C( 53), -INT8_C( 13), INT8_C( 90) }, INT32_C( 6) }, { INT8_C( 46), { -INT8_C( 35), -INT8_C( 15), INT8_C( 105), INT8_C( 91), INT8_C( 115), -INT8_C( 120), -INT8_C( 30), INT8_C( 46) }, INT32_C( 7) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int8_t a; simde_int8x8_t val = simde_vld1_s8(test_vec[i].val); SIMDE_CONSTIFY_8_NO_RESULT_(simde_vst1_lane_s8, HEDLEY_UNREACHABLE(), test_vec[i].lane, &a, val); simde_assert_equal_i8(a, test_vec[i].a); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 2, 3, 4, 5, 6, 7 }; for (int i = 0 ; i < 8 ; i++) { int8_t a; simde_int8x8_t val = simde_test_arm_neon_random_i8x8(); const int lane = lanes[i]; simde_vst1_lane_s8(&a, val, lane); simde_test_codegen_write_i8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, val, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vst1_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a; int16_t val[4]; int lane; } test_vec[] = { { INT16_C( 25194), { INT16_C( 25194), INT16_C( 22539), INT16_C( 14552), INT16_C( 12907) }, INT32_C( 0) }, { -INT16_C( 28985), { -INT16_C( 7314), -INT16_C( 28985), INT16_C( 29951), -INT16_C( 32699) }, INT32_C( 1) }, { -INT16_C( 15453), { -INT16_C( 32139), -INT16_C( 19838), -INT16_C( 15453), -INT16_C( 18078) }, INT32_C( 2) }, { -INT16_C( 834), { -INT16_C( 3979), INT16_C( 28323), -INT16_C( 12399), -INT16_C( 834) }, INT32_C( 3) }, { -INT16_C( 14031), { -INT16_C( 14031), INT16_C( 2644), -INT16_C( 16639), INT16_C( 28476) }, INT32_C( 0) }, { -INT16_C( 24067), { INT16_C( 930), -INT16_C( 24067), INT16_C( 17271), -INT16_C( 5086) }, INT32_C( 1) }, { INT16_C( 359), { -INT16_C( 23355), INT16_C( 26783), INT16_C( 359), -INT16_C( 9183) }, INT32_C( 2) }, { -INT16_C( 14978), { -INT16_C( 14863), -INT16_C( 32181), INT16_C( 2452), -INT16_C( 14978) }, INT32_C( 3) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t val = simde_vld1_s16(test_vec[i].val); int16_t a; SIMDE_CONSTIFY_4_NO_RESULT_(simde_vst1_lane_s16, HEDLEY_UNREACHABLE(), test_vec[i].lane, &a, val); simde_assert_equal_i16(a, test_vec[i].a); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 2, 3, 0, 1, 2, 3 }; for (int i = 0 ; i < 8 ; i++) { int16_t a; simde_int16x4_t val = simde_test_arm_neon_random_i16x4(); const int lane = lanes[i]; simde_vst1_lane_s16(&a, val, lane); simde_test_codegen_write_i16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, val, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vst1_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a; int32_t val[2]; int lane; } test_vec[] = { { -INT32_C( 1176692915), { -INT32_C( 1176692915), -INT32_C( 1019739805) }, INT32_C( 0) }, { INT32_C( 639152599), { INT32_C( 1743701338), INT32_C( 639152599) }, INT32_C( 1) }, { -INT32_C( 2131206463), { -INT32_C( 2131206463), -INT32_C( 691177546) }, INT32_C( 0) }, { INT32_C( 773618656), { -INT32_C( 1531866460), INT32_C( 773618656) }, INT32_C( 1) }, { -INT32_C( 152569453), { -INT32_C( 152569453), INT32_C( 1438261498) }, INT32_C( 0) }, { INT32_C( 451073113), { -INT32_C( 1128486683), INT32_C( 451073113) }, INT32_C( 1) }, { -INT32_C( 392439246), { -INT32_C( 392439246), -INT32_C( 155228079) }, INT32_C( 0) }, { INT32_C( 2131343340), { -INT32_C( 610635525), INT32_C( 2131343340) }, INT32_C( 1) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t val = simde_vld1_s32(test_vec[i].val); int32_t a; SIMDE_CONSTIFY_2_NO_RESULT_(simde_vst1_lane_s32, HEDLEY_UNREACHABLE(), test_vec[i].lane, &a, val); simde_assert_equal_i32(a, test_vec[i].a); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; for (int i = 0 ; i < 8 ; i++) { int32_t a; simde_int32x2_t val = simde_test_arm_neon_random_i32x2(); const int lane = lanes[i]; simde_vst1_lane_s32(&a, val, lane); simde_test_codegen_write_i32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, val, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vst1_lane_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a; int64_t val[1]; } test_vec[] = { { -INT64_C( 3973240137489820217), { -INT64_C( 3973240137489820217) } }, { -INT64_C( 194556375924638676), { -INT64_C( 194556375924638676) } }, { INT64_C( 2167462674658928341), { INT64_C( 2167462674658928341) } }, { INT64_C( 140538722373190919), { INT64_C( 140538722373190919) } }, { -INT64_C( 1185217718330684256), { -INT64_C( 1185217718330684256) } }, { INT64_C( 7547189185119307710), { INT64_C( 7547189185119307710) } }, { INT64_C( 1843413786645214118), { INT64_C( 1843413786645214118) } }, { -INT64_C( 4747008286953647387), { -INT64_C( 4747008286953647387) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t val = simde_vld1_s64(test_vec[i].val); int64_t a; simde_vst1_lane_s64(&a, val, 0); simde_assert_equal_i64(a, test_vec[i].a); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; for (int i = 0 ; i < 8 ; i++) { int64_t a; simde_int64x1_t val = simde_test_arm_neon_random_i64x1(); const int lane = lanes[i]; simde_vst1_lane_s64(&a, val, lane); simde_test_codegen_write_i64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, val, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vst1_lane_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a; uint8_t val[8]; int lane; } test_vec[] = { { UINT8_C(144), { UINT8_C(144), UINT8_C(105), UINT8_C( 20), UINT8_C(162), UINT8_C( 36), UINT8_C( 2), UINT8_C(118), UINT8_C(233) }, INT32_C( 0) }, { UINT8_C(133), { UINT8_C( 80), UINT8_C(133), UINT8_C(190), UINT8_C( 62), UINT8_C(191), UINT8_C(138), UINT8_C( 84), UINT8_C(132) }, INT32_C( 1) }, { UINT8_C(209), { UINT8_C( 52), UINT8_C( 89), UINT8_C(209), UINT8_C(104), UINT8_C( 51), UINT8_C(132), UINT8_C(163), UINT8_C( 59) }, INT32_C( 2) }, { UINT8_C( 59), { UINT8_C( 57), UINT8_C( 21), UINT8_C(200), UINT8_C( 59), UINT8_C(214), UINT8_C(197), UINT8_C( 18), UINT8_C(102) }, INT32_C( 3) }, { UINT8_C( 40), { UINT8_C( 47), UINT8_C( 38), UINT8_C( 8), UINT8_C( 83), UINT8_C( 40), UINT8_C(127), UINT8_C( 60), UINT8_C(120) }, INT32_C( 4) }, { UINT8_C( 11), { UINT8_C( 4), UINT8_C(250), UINT8_C(183), UINT8_C(196), UINT8_C(133), UINT8_C( 11), UINT8_C( 72), UINT8_C(185) }, INT32_C( 5) }, { UINT8_C(210), { UINT8_C(100), UINT8_C( 25), UINT8_C( 33), UINT8_C(151), UINT8_C(158), UINT8_C(197), UINT8_C(210), UINT8_C(215) }, INT32_C( 6) }, { UINT8_C(143), { UINT8_C(218), UINT8_C(154), UINT8_C( 18), UINT8_C(177), UINT8_C( 96), UINT8_C( 36), UINT8_C( 23), UINT8_C(143) }, INT32_C( 7) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t val = simde_vld1_u8(test_vec[i].val); uint8_t a; SIMDE_CONSTIFY_8_NO_RESULT_(simde_vst1_lane_u8, HEDLEY_UNREACHABLE(), test_vec[i].lane, &a, val); simde_assert_equal_u8(a, test_vec[i].a); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 2, 3, 4, 5, 6, 7 }; for (int i = 0 ; i < 8 ; i++) { uint8_t a; simde_uint8x8_t val = simde_test_arm_neon_random_u8x8(); const uint lane = lanes[i]; simde_vst1_lane_u8(&a, val, lane); simde_test_codegen_write_u8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, val, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vst1_lane_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a; uint16_t val[4]; int lane; } test_vec[] = { { UINT16_C(52711), { UINT16_C(52711), UINT16_C(64187), UINT16_C(20378), UINT16_C(54290) }, INT32_C( 0) }, { UINT16_C(52158), { UINT16_C(59808), UINT16_C(52158), UINT16_C(18507), UINT16_C(50802) }, INT32_C( 1) }, { UINT16_C(27778), { UINT16_C(16782), UINT16_C(16914), UINT16_C(27778), UINT16_C(16423) }, INT32_C( 2) }, { UINT16_C(26042), { UINT16_C(44278), UINT16_C(62894), UINT16_C(31614), UINT16_C(26042) }, INT32_C( 3) }, { UINT16_C(30281), { UINT16_C(30281), UINT16_C(58207), UINT16_C(29381), UINT16_C(26039) }, INT32_C( 0) }, { UINT16_C(42544), { UINT16_C(30299), UINT16_C(42544), UINT16_C(41662), UINT16_C(19564) }, INT32_C( 1) }, { UINT16_C(46827), { UINT16_C(32483), UINT16_C(25998), UINT16_C(46827), UINT16_C(57765) }, INT32_C( 2) }, { UINT16_C( 6214), { UINT16_C(21602), UINT16_C(57558), UINT16_C(37071), UINT16_C( 6214) }, INT32_C( 3) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t val = simde_vld1_u16(test_vec[i].val); uint16_t a; SIMDE_CONSTIFY_4_NO_RESULT_(simde_vst1_lane_u16, HEDLEY_UNREACHABLE(), test_vec[i].lane, &a, val); simde_assert_equal_u16(a, test_vec[i].a); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 2, 3, 0, 1, 2, 3 }; for (int i = 0 ; i < 8 ; i++) { uint16_t a; simde_uint16x4_t val = simde_test_arm_neon_random_u16x4(); const uint lane = lanes[i]; simde_vst1_lane_u16(&a, val, lane); simde_test_codegen_write_u16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, val, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vst1_lane_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a; uint32_t val[2]; int lane; } test_vec[] = { { UINT32_C( 481195642), { UINT32_C( 481195642), UINT32_C(2023252945) }, INT32_C( 0) }, { UINT32_C(1656647457), { UINT32_C(1504231974), UINT32_C(1656647457) }, INT32_C( 1) }, { UINT32_C( 583019691), { UINT32_C( 583019691), UINT32_C(1078655062) }, INT32_C( 0) }, { UINT32_C(3518892630), { UINT32_C(1791365296), UINT32_C(3518892630) }, INT32_C( 1) }, { UINT32_C(1374514304), { UINT32_C(1374514304), UINT32_C(4123624911) }, INT32_C( 0) }, { UINT32_C(2378960353), { UINT32_C(1766814280), UINT32_C(2378960353) }, INT32_C( 1) }, { UINT32_C(2410646585), { UINT32_C(2410646585), UINT32_C( 886045060) }, INT32_C( 0) }, { UINT32_C( 523328671), { UINT32_C(1621005578), UINT32_C( 523328671) }, INT32_C( 1) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t val = simde_vld1_u32(test_vec[i].val); uint32_t a; SIMDE_CONSTIFY_2_NO_RESULT_(simde_vst1_lane_u32, HEDLEY_UNREACHABLE(), test_vec[i].lane, &a, val); simde_assert_equal_u32(a, test_vec[i].a); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; for (int i = 0 ; i < 8 ; i++) { uint32_t a; simde_uint32x2_t val = simde_test_arm_neon_random_u32x2(); const uint lane = lanes[i]; simde_vst1_lane_u32(&a, val, lane); simde_test_codegen_write_u32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, val, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vst1_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a; uint64_t val[1]; } test_vec[] = { { UINT64_C( 5250363356913382472), { UINT64_C( 5250363356913382472) } }, { UINT64_C( 6309684774697879748), { UINT64_C( 6309684774697879748) } }, { UINT64_C( 2553653612082696328), { UINT64_C( 2553653612082696328) } }, { UINT64_C( 687680480654673178), { UINT64_C( 687680480654673178) } }, { UINT64_C(16556837661035665621), { UINT64_C(16556837661035665621) } }, { UINT64_C( 6679908984284861564), { UINT64_C( 6679908984284861564) } }, { UINT64_C(14442753382987286749), { UINT64_C(14442753382987286749) } }, { UINT64_C(14485341070503236157), { UINT64_C(14485341070503236157) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t val = simde_vld1_u64(test_vec[i].val); uint64_t a; simde_vst1_lane_u64(&a, val, 0); simde_assert_equal_u64(a, test_vec[i].a); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint64_t a; simde_uint64x1_t val = simde_test_arm_neon_random_u64x1(); simde_vst1_lane_u64(&a, val, 0); simde_test_codegen_write_u64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, val, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, 0, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vst1q_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 a; simde_float32 val[4]; int lane; } test_vec[] = { { SIMDE_FLOAT32_C( 455.33), { SIMDE_FLOAT32_C( 455.33), SIMDE_FLOAT32_C( -829.93), SIMDE_FLOAT32_C( 456.39), SIMDE_FLOAT32_C( -14.07) }, INT32_C( 0) }, { SIMDE_FLOAT32_C( 915.54), { SIMDE_FLOAT32_C( 138.27), SIMDE_FLOAT32_C( 915.54), SIMDE_FLOAT32_C( -484.19), SIMDE_FLOAT32_C( -949.94) }, INT32_C( 1) }, { SIMDE_FLOAT32_C( -971.20), { SIMDE_FLOAT32_C( 540.73), SIMDE_FLOAT32_C( 942.66), SIMDE_FLOAT32_C( -971.20), SIMDE_FLOAT32_C( 91.92) }, INT32_C( 2) }, { SIMDE_FLOAT32_C( -278.01), { SIMDE_FLOAT32_C( -834.19), SIMDE_FLOAT32_C( 557.12), SIMDE_FLOAT32_C( -444.80), SIMDE_FLOAT32_C( -278.01) }, INT32_C( 3) }, { SIMDE_FLOAT32_C( -88.43), { SIMDE_FLOAT32_C( -88.43), SIMDE_FLOAT32_C( -162.64), SIMDE_FLOAT32_C( 136.08), SIMDE_FLOAT32_C( 284.45) }, INT32_C( 0) }, { SIMDE_FLOAT32_C( -56.88), { SIMDE_FLOAT32_C( 241.27), SIMDE_FLOAT32_C( -56.88), SIMDE_FLOAT32_C( -1.53), SIMDE_FLOAT32_C( -841.12) }, INT32_C( 1) }, { SIMDE_FLOAT32_C( 514.21), { SIMDE_FLOAT32_C( -687.80), SIMDE_FLOAT32_C( -889.79), SIMDE_FLOAT32_C( 514.21), SIMDE_FLOAT32_C( -216.22) }, INT32_C( 2) }, { SIMDE_FLOAT32_C( -681.62), { SIMDE_FLOAT32_C( -136.95), SIMDE_FLOAT32_C( 169.15), SIMDE_FLOAT32_C( -293.70), SIMDE_FLOAT32_C( -681.62) }, INT32_C( 3) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t val = simde_vld1q_f32(test_vec[i].val); simde_float32 a; SIMDE_CONSTIFY_4_NO_RESULT_(simde_vst1q_lane_f32, HEDLEY_UNREACHABLE(), test_vec[i].lane, &a, val); simde_assert_equal_f32(a, test_vec[i].a, 1); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 }; for (int i = 0 ; i < 8 ; i++) { simde_float32 a; simde_float32x4_t val = simde_test_arm_neon_random_f32x4(-1000.0, 1000.0); const uint lane = lanes[i]; simde_vst1q_lane_f32(&a, val, lane); simde_test_codegen_write_f32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, val, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vst1q_lane_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 a; simde_float64 val[2]; int lane; } test_vec[] = { { SIMDE_FLOAT64_C( 521.90), { SIMDE_FLOAT64_C( 521.90), SIMDE_FLOAT64_C( -472.06) }, INT32_C( 0) }, { SIMDE_FLOAT64_C( -745.36), { SIMDE_FLOAT64_C( -586.76), SIMDE_FLOAT64_C( -745.36) }, INT32_C( 1) }, { SIMDE_FLOAT64_C( 833.21), { SIMDE_FLOAT64_C( 833.21), SIMDE_FLOAT64_C( -230.32) }, INT32_C( 0) }, { SIMDE_FLOAT64_C( 228.92), { SIMDE_FLOAT64_C( 948.92), SIMDE_FLOAT64_C( 228.92) }, INT32_C( 1) }, { SIMDE_FLOAT64_C( 102.77), { SIMDE_FLOAT64_C( 102.77), SIMDE_FLOAT64_C( 74.33) }, INT32_C( 0) }, { SIMDE_FLOAT64_C( 972.13), { SIMDE_FLOAT64_C( 437.97), SIMDE_FLOAT64_C( 972.13) }, INT32_C( 1) }, { SIMDE_FLOAT64_C( 896.14), { SIMDE_FLOAT64_C( 896.14), SIMDE_FLOAT64_C( 91.47) }, INT32_C( 0) }, { SIMDE_FLOAT64_C( -462.82), { SIMDE_FLOAT64_C( -392.14), SIMDE_FLOAT64_C( -462.82) }, INT32_C( 1) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t val = simde_vld1q_f64(test_vec[i].val); simde_float64 a; SIMDE_CONSTIFY_2_NO_RESULT_(simde_vst1q_lane_f64, HEDLEY_UNREACHABLE(), test_vec[i].lane, &a, val); simde_assert_equal_f64(a, test_vec[i].a, 1); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 }; for (int i = 0 ; i < 8 ; i++) { simde_float64 a; simde_float64x2_t val = simde_test_arm_neon_random_f64x2(-1000.0, 1000.0); const uint lane = lanes[i]; simde_vst1q_lane_f64(&a, val, lane); simde_test_codegen_write_f64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x2(2, val, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vst1q_lane_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a; int8_t val[16]; int lane; } test_vec[] = { { -INT8_C( 2), { INT8_C( 74), INT8_C( 57), -INT8_C( 2), -INT8_C( 13), INT8_C( 54), INT8_C( 95), INT8_C( 36), -INT8_C( 117), -INT8_C( 126), -INT8_C( 27), -INT8_C( 19), -INT8_C( 94), -INT8_C( 73), INT8_C( 84), INT8_C( 45), -INT8_C( 105) }, INT32_C( 2) }, { -INT8_C( 47), { INT8_C( 46), INT8_C( 81), -INT8_C( 75), INT8_C( 63), -INT8_C( 47), INT8_C( 32), INT8_C( 57), -INT8_C( 115), INT8_C( 108), INT8_C( 37), INT8_C( 83), -INT8_C( 59), -INT8_C( 67), INT8_C( 34), -INT8_C( 112), INT8_C( 7) }, INT32_C( 4) }, { INT8_C( 29), { INT8_C( 91), -INT8_C( 114), -INT8_C( 5), -INT8_C( 111), -INT8_C( 19), INT8_C( 31), INT8_C( 29), INT8_C( 111), INT8_C( 4), INT8_C( 10), INT8_C( 18), -INT8_C( 68), INT8_C( 94), INT8_C( 63), INT8_C( 83), -INT8_C( 115) }, INT32_C( 6) }, { INT8_C( 42), { -INT8_C( 112), INT8_C( 8), -INT8_C( 52), INT8_C( 97), INT8_C( 40), INT8_C( 5), -INT8_C( 18), -INT8_C( 107), INT8_C( 42), INT8_C( 66), INT8_C( 90), -INT8_C( 24), INT8_C( 100), -INT8_C( 22), -INT8_C( 17), -INT8_C( 65) }, INT32_C( 8) }, { -INT8_C( 54), { INT8_C( 120), -INT8_C( 22), INT8_C( 80), INT8_C( 101), INT8_C( 9), INT8_C( 109), -INT8_C( 43), INT8_C( 14), INT8_C( 119), -INT8_C( 25), -INT8_C( 54), -INT8_C( 42), INT8_C( 38), INT8_C( 29), INT8_C( 99), -INT8_C( 74) }, INT32_C( 10) }, { INT8_C( 39), { INT8_C( 37), INT8_C( 47), INT8_C( 23), INT8_C( 77), INT8_C( 53), INT8_C( 5), -INT8_C( 30), INT8_C( 95), INT8_C( 71), INT8_C( 61), INT8_C( 71), -INT8_C( 85), INT8_C( 39), INT8_C( 55), INT8_C( 106), -INT8_C( 96) }, INT32_C( 12) }, { -INT8_C( 99), { INT8_C( 33), -INT8_C( 69), INT8_C( 5), INT8_C( 43), INT8_C( 40), -INT8_C( 38), INT8_C( 57), -INT8_C( 96), -INT8_C( 63), INT8_C( 3), INT8_C( 118), -INT8_C( 25), INT8_C( 32), -INT8_C( 39), -INT8_C( 99), INT8_C( 69) }, INT32_C( 14) }, { INT8_C( 61), { INT8_C( 8), -INT8_C( 76), -INT8_C( 110), INT8_C( 61), -INT8_C( 70), INT8_C( 117), -INT8_C( 99), INT8_C( 1), -INT8_C( 78), -INT8_C( 28), -INT8_C( 83), -INT8_C( 39), INT8_C( 27), INT8_C( 23), INT8_C( 121), INT8_C( 61) }, INT32_C( 15) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t val = simde_vld1q_s8(test_vec[i].val); int8_t a; SIMDE_CONSTIFY_16_NO_RESULT_(simde_vst1q_lane_s8, HEDLEY_UNREACHABLE(), test_vec[i].lane, &a, val); simde_assert_equal_i8(a, test_vec[i].a); } return 0; #else fputc('\n', stdout); const int lanes[] = { 2, 4, 6, 8, 10, 12, 14, 15 }; for (int i = 0 ; i < 8 ; i++) { int8_t a; simde_int8x16_t val = simde_test_arm_neon_random_i8x16(); const uint lane = lanes[i]; simde_vst1q_lane_s8(&a, val, lane); simde_test_codegen_write_i8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, val, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vst1q_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a; int16_t val[8]; int lane; } test_vec[] = { { INT16_C( 23780), { INT16_C( 23780), INT16_C( 19170), INT16_C( 16396), -INT16_C( 25092), INT16_C( 4214), -INT16_C( 32078), -INT16_C( 29052), INT16_C( 30694) }, INT32_C( 0) }, { -INT16_C( 28904), { INT16_C( 511), -INT16_C( 28904), -INT16_C( 22919), -INT16_C( 13843), INT16_C( 6751), -INT16_C( 1510), -INT16_C( 5103), -INT16_C( 2704) }, INT32_C( 1) }, { INT16_C( 15507), { INT16_C( 21320), INT16_C( 21568), INT16_C( 15507), INT16_C( 2801), -INT16_C( 23476), -INT16_C( 12148), INT16_C( 29234), INT16_C( 12872) }, INT32_C( 2) }, { INT16_C( 26038), { INT16_C( 24692), -INT16_C( 4671), -INT16_C( 20730), INT16_C( 26038), -INT16_C( 12087), -INT16_C( 9377), -INT16_C( 12100), INT16_C( 1232) }, INT32_C( 3) }, { INT16_C( 19694), { INT16_C( 4131), -INT16_C( 18855), INT16_C( 19020), -INT16_C( 26432), INT16_C( 19694), INT16_C( 8553), -INT16_C( 20033), INT16_C( 13139) }, INT32_C( 4) }, { INT16_C( 25192), { INT16_C( 5137), INT16_C( 6176), -INT16_C( 10557), -INT16_C( 29315), -INT16_C( 8794), INT16_C( 25192), INT16_C( 14509), -INT16_C( 12186) }, INT32_C( 5) }, { INT16_C( 27720), { -INT16_C( 16567), -INT16_C( 27258), INT16_C( 18186), -INT16_C( 2002), -INT16_C( 26733), INT16_C( 21017), INT16_C( 27720), INT16_C( 22917) }, INT32_C( 6) }, { -INT16_C( 17591), { -INT16_C( 23167), INT16_C( 17521), -INT16_C( 4229), INT16_C( 8657), INT16_C( 14796), INT16_C( 31107), -INT16_C( 5518), -INT16_C( 17591) }, INT32_C( 7) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t val = simde_vld1q_s16(test_vec[i].val); int16_t a; SIMDE_CONSTIFY_8_NO_RESULT_(simde_vst1q_lane_s16, HEDLEY_UNREACHABLE(), test_vec[i].lane, &a, val); simde_assert_equal_i16(a, test_vec[i].a); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 2, 3, 4, 5, 6, 7 }; for (int i = 0 ; i < 8 ; i++) { int16_t a; simde_int16x8_t val = simde_test_arm_neon_random_i16x8(); const uint lane = lanes[i]; simde_vst1q_lane_s16(&a, val, lane); simde_test_codegen_write_i16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, val, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vst1q_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a; int32_t val[4]; int lane; } test_vec[] = { { -INT32_C( 407901645), { -INT32_C( 407901645), -INT32_C( 1768410816), INT32_C( 630594925), -INT32_C( 1378185327) }, INT32_C( 0) }, { -INT32_C( 1750011511), { INT32_C( 11366593), -INT32_C( 1750011511), INT32_C( 2023889963), INT32_C( 1010411273) }, INT32_C( 1) }, { INT32_C( 1789722329), { -INT32_C( 769398639), -INT32_C( 2023179239), INT32_C( 1789722329), INT32_C( 1310164621) }, INT32_C( 2) }, { -INT32_C( 228551840), { INT32_C( 2135868662), -INT32_C( 518586698), INT32_C( 609859867), -INT32_C( 228551840) }, INT32_C( 3) }, { -INT32_C( 1782283397), { -INT32_C( 1782283397), INT32_C( 404499519), -INT32_C( 1199388630), INT32_C( 1158060622) }, INT32_C( 0) }, { INT32_C( 1861606227), { INT32_C( 348411230), INT32_C( 1861606227), -INT32_C( 191737964), INT32_C( 1575416546) }, INT32_C( 1) }, { INT32_C( 632901846), { -INT32_C( 1242387850), INT32_C( 30281430), INT32_C( 632901846), INT32_C( 1231732714) }, INT32_C( 2) }, { -INT32_C( 773752741), { INT32_C( 1750937108), -INT32_C( 1646898423), -INT32_C( 2070779486), -INT32_C( 773752741) }, INT32_C( 3) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t val = simde_vld1q_s32(test_vec[i].val); int32_t a; SIMDE_CONSTIFY_4_NO_RESULT_(simde_vst1q_lane_s32, HEDLEY_UNREACHABLE(), test_vec[i].lane, &a, val); simde_assert_equal_i32(a, test_vec[i].a); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 2, 3, 0, 1, 2, 3 }; for (int i = 0 ; i < 8 ; i++) { int32_t a; simde_int32x4_t val = simde_test_arm_neon_random_i32x4(); const uint lane = lanes[i]; simde_vst1q_lane_s32(&a, val, lane); simde_test_codegen_write_i32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, val, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vst1q_lane_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a; int64_t val[2]; int lane; } test_vec[] = { { -INT64_C( 2426938743921840642), { -INT64_C( 2426938743921840642), INT64_C( 7093888712037390708) }, INT32_C( 0) }, { INT64_C( 8177834832853814489), { INT64_C( 7893852987986178362), INT64_C( 8177834832853814489) }, INT32_C( 1) }, { -INT64_C( 6818477128198757266), { -INT64_C( 6818477128198757266), INT64_C( 5536305007872607040) }, INT32_C( 0) }, { INT64_C( 4621725464521429311), { INT64_C( 6431125428008052569), INT64_C( 4621725464521429311) }, INT32_C( 1) }, { -INT64_C( 2171543568441558769), { -INT64_C( 2171543568441558769), -INT64_C( 7709555375194483805) }, INT32_C( 0) }, { -INT64_C( 3527884484289751561), { -INT64_C( 7284389485093623793), -INT64_C( 3527884484289751561) }, INT32_C( 1) }, { -INT64_C( 8166177399901533399), { -INT64_C( 8166177399901533399), INT64_C( 3966341036426723131) }, INT32_C( 0) }, { -INT64_C( 8623016287588913211), { INT64_C( 1198994447883793070), -INT64_C( 8623016287588913211) }, INT32_C( 1) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t val = simde_vld1q_s64(test_vec[i].val); int64_t a; SIMDE_CONSTIFY_2_NO_RESULT_(simde_vst1q_lane_s64, HEDLEY_UNREACHABLE(), test_vec[i].lane, &a, val); simde_assert_equal_i64(a, test_vec[i].a); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; for (int i = 0 ; i < 8 ; i++) { int64_t a; simde_int64x2_t val = simde_test_arm_neon_random_i64x2(); const uint lane = lanes[i]; simde_vst1q_lane_s64(&a, val, lane); simde_test_codegen_write_i64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, val, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vst1q_lane_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a; uint8_t val[16]; int lane; } test_vec[] = { { UINT8_C(151), { UINT8_C(151), UINT8_C( 83), UINT8_C(125), UINT8_C(109), UINT8_C(174), UINT8_C(214), UINT8_C( 29), UINT8_C(179), UINT8_C( 77), UINT8_C(149), UINT8_C(157), UINT8_C(193), UINT8_C( 32), UINT8_C( 6), UINT8_C(198), UINT8_C(243) }, INT32_C( 0) }, { UINT8_C(115), { UINT8_C(227), UINT8_C(151), UINT8_C(115), UINT8_C( 95), UINT8_C( 96), UINT8_C( 23), UINT8_C(103), UINT8_C( 20), UINT8_C( 9), UINT8_C(192), UINT8_C(138), UINT8_C( 13), UINT8_C(134), UINT8_C(240), UINT8_C(243), UINT8_C( 29) }, INT32_C( 2) }, { UINT8_C( 71), { UINT8_C( 67), UINT8_C(113), UINT8_C(139), UINT8_C(241), UINT8_C( 71), UINT8_C(168), UINT8_C(164), UINT8_C(149), UINT8_C( 61), UINT8_C( 66), UINT8_C( 86), UINT8_C( 93), UINT8_C( 72), UINT8_C( 28), UINT8_C( 80), UINT8_C( 43) }, INT32_C( 4) }, { UINT8_C( 40), { UINT8_C(179), UINT8_C(195), UINT8_C(139), UINT8_C( 20), UINT8_C(218), UINT8_C(242), UINT8_C( 40), UINT8_C(227), UINT8_C(178), UINT8_C(178), UINT8_C(241), UINT8_C( 56), UINT8_C(162), UINT8_C(228), UINT8_C( 86), UINT8_C(229) }, INT32_C( 6) }, { UINT8_C(189), { UINT8_C( 85), UINT8_C(225), UINT8_C(215), UINT8_C(157), UINT8_C(137), UINT8_C(123), UINT8_C( 50), UINT8_C(198), UINT8_C(189), UINT8_C(136), UINT8_C( 35), UINT8_C( 5), UINT8_C(165), UINT8_C(115), UINT8_C( 49), UINT8_C( 88) }, INT32_C( 8) }, { UINT8_C(152), { UINT8_C( 55), UINT8_C(188), UINT8_C(108), UINT8_C( 17), UINT8_C(174), UINT8_C(148), UINT8_C(245), UINT8_C( 96), UINT8_C( 70), UINT8_C(230), UINT8_C(152), UINT8_C(232), UINT8_C(202), UINT8_C(238), UINT8_C(206), UINT8_C( 32) }, INT32_C( 10) }, { UINT8_C(181), { UINT8_C(207), UINT8_C(165), UINT8_C(189), UINT8_C( 88), UINT8_C( 32), UINT8_C(239), UINT8_C( 30), UINT8_C(222), UINT8_C(119), UINT8_C( 65), UINT8_C(227), UINT8_C( 28), UINT8_C(181), UINT8_C( 20), UINT8_C(117), UINT8_C(236) }, INT32_C( 12) }, { UINT8_C(195), { UINT8_C(208), UINT8_C(225), UINT8_C(253), UINT8_C(126), UINT8_C(118), UINT8_C(242), UINT8_C(222), UINT8_C(188), UINT8_C(216), UINT8_C(119), UINT8_C(165), UINT8_C(163), UINT8_C(101), UINT8_C(115), UINT8_C(195), UINT8_C( 53) }, INT32_C( 14) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t val = simde_vld1q_u8(test_vec[i].val); uint8_t a; SIMDE_CONSTIFY_16_NO_RESULT_(simde_vst1q_lane_u8, HEDLEY_UNREACHABLE(), test_vec[i].lane, &a, val); simde_assert_equal_u8(a, test_vec[i].a); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 2, 4, 6, 8, 10, 12, 14 }; for (int i = 0 ; i < 8 ; i++) { uint8_t a; simde_uint8x16_t val = simde_test_arm_neon_random_u8x16(); const uint lane = lanes[i]; simde_vst1q_lane_u8(&a, val, lane); simde_test_codegen_write_u8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, val, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vst1q_lane_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a; uint16_t val[8]; int lane; } test_vec[] = { { UINT16_C(43458), { UINT16_C(43458), UINT16_C(13306), UINT16_C(35769), UINT16_C(22726), UINT16_C(32286), UINT16_C( 4435), UINT16_C(23748), UINT16_C(58650) }, INT32_C( 0) }, { UINT16_C(23157), { UINT16_C(41658), UINT16_C(23157), UINT16_C(39294), UINT16_C(28061), UINT16_C(23415), UINT16_C(19135), UINT16_C(31134), UINT16_C(24696) }, INT32_C( 1) }, { UINT16_C(23037), { UINT16_C(29218), UINT16_C(56211), UINT16_C(23037), UINT16_C( 6964), UINT16_C(34775), UINT16_C(39725), UINT16_C(18403), UINT16_C(40576) }, INT32_C( 2) }, { UINT16_C( 2004), { UINT16_C(63209), UINT16_C(26616), UINT16_C(38287), UINT16_C( 2004), UINT16_C(37873), UINT16_C(36689), UINT16_C(51468), UINT16_C(12015) }, INT32_C( 3) }, { UINT16_C(33221), { UINT16_C(33339), UINT16_C(14602), UINT16_C(16091), UINT16_C(45652), UINT16_C(33221), UINT16_C(43086), UINT16_C(52936), UINT16_C(45382) }, INT32_C( 4) }, { UINT16_C(36180), { UINT16_C(16068), UINT16_C(21528), UINT16_C(60884), UINT16_C(50523), UINT16_C(44160), UINT16_C(36180), UINT16_C(17270), UINT16_C(45499) }, INT32_C( 5) }, { UINT16_C(46959), { UINT16_C(50629), UINT16_C(41194), UINT16_C(16131), UINT16_C(51282), UINT16_C(41152), UINT16_C(35185), UINT16_C(46959), UINT16_C(13114) }, INT32_C( 6) }, { UINT16_C(60342), { UINT16_C(21494), UINT16_C(51847), UINT16_C(57920), UINT16_C(49295), UINT16_C(58255), UINT16_C( 1357), UINT16_C( 2342), UINT16_C(60342) }, INT32_C( 7) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t val = simde_vld1q_u16(test_vec[i].val); uint16_t a; SIMDE_CONSTIFY_8_NO_RESULT_(simde_vst1q_lane_u16, HEDLEY_UNREACHABLE(), test_vec[i].lane, &a, val); simde_assert_equal_u16(a, test_vec[i].a); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 2, 3, 4, 5, 6, 7 }; for (int i = 0 ; i < 8 ; i++) { uint16_t a; simde_uint16x8_t val = simde_test_arm_neon_random_u16x8(); const uint lane = lanes[i]; simde_vst1q_lane_u16(&a, val, lane); simde_test_codegen_write_u16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, val, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vst1q_lane_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a; uint32_t val[4]; int lane; } test_vec[] = { { UINT32_C(2806064924), { UINT32_C(2806064924), UINT32_C(2135519540), UINT32_C(1714780091), UINT32_C( 822981959) }, INT32_C( 0) }, { UINT32_C( 476712139), { UINT32_C(3079758904), UINT32_C( 476712139), UINT32_C(1175560326), UINT32_C(2229864040) }, INT32_C( 1) }, { UINT32_C( 868095724), { UINT32_C(1630218797), UINT32_C(1474327708), UINT32_C( 868095724), UINT32_C( 57002955) }, INT32_C( 2) }, { UINT32_C(1538373934), { UINT32_C(4206556719), UINT32_C(2299995139), UINT32_C( 768551109), UINT32_C(1538373934) }, INT32_C( 3) }, { UINT32_C(2143149283), { UINT32_C(2143149283), UINT32_C(1037540689), UINT32_C(2121307571), UINT32_C(2407650656) }, INT32_C( 0) }, { UINT32_C( 626565216), { UINT32_C(3481877708), UINT32_C( 626565216), UINT32_C(4149356489), UINT32_C(3293774816) }, INT32_C( 1) }, { UINT32_C( 266329775), { UINT32_C( 826478816), UINT32_C(1634605741), UINT32_C( 266329775), UINT32_C(2157863348) }, INT32_C( 2) }, { UINT32_C(1500765305), { UINT32_C(4249823389), UINT32_C(2435033032), UINT32_C(2945021390), UINT32_C(1500765305) }, INT32_C( 3) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t val = simde_vld1q_u32(test_vec[i].val); uint32_t a; SIMDE_CONSTIFY_4_NO_RESULT_(simde_vst1q_lane_u32, HEDLEY_UNREACHABLE(), test_vec[i].lane, &a, val); simde_assert_equal_u32(a, test_vec[i].a); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 2, 3, 0, 1, 2, 3 }; for (int i = 0 ; i < 8 ; i++) { uint32_t a; simde_uint32x4_t val = simde_test_arm_neon_random_u32x4(); const uint lane = lanes[i]; simde_vst1q_lane_u32(&a, val, lane); simde_test_codegen_write_u32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, val, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vst1q_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a; uint64_t val[2]; int lane; } test_vec[] = { { UINT64_C(16971928657155608481), { UINT64_C(16971928657155608481), UINT64_C( 4844228207813433367) }, INT32_C( 0) }, { UINT64_C(16041346042065173678), { UINT64_C( 1292742978285177726), UINT64_C(16041346042065173678) }, INT32_C( 1) }, { UINT64_C( 2718961477228275359), { UINT64_C( 2718961477228275359), UINT64_C( 391814847681421386) }, INT32_C( 0) }, { UINT64_C(15529282571677133160), { UINT64_C( 7962554083699327628), UINT64_C(15529282571677133160) }, INT32_C( 1) }, { UINT64_C(12101844403231370431), { UINT64_C(12101844403231370431), UINT64_C( 4994568648897444035) }, INT32_C( 0) }, { UINT64_C(14352207887664082717), { UINT64_C( 2410891140534635591), UINT64_C(14352207887664082717) }, INT32_C( 1) }, { UINT64_C(18156972424159352309), { UINT64_C(18156972424159352309), UINT64_C( 6646634741389250366) }, INT32_C( 0) }, { UINT64_C( 9338151759985132488), { UINT64_C( 4935339525449970339), UINT64_C( 9338151759985132488) }, INT32_C( 1) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t val = simde_vld1q_u64(test_vec[i].val); uint64_t a; SIMDE_CONSTIFY_2_NO_RESULT_(simde_vst1q_lane_u64, HEDLEY_UNREACHABLE(), test_vec[i].lane, &a, val); simde_assert_equal_u64(a, test_vec[i].a); } return 0; #else fputc('\n', stdout); const int lanes[] = { 0, 1, 0, 1, 0, 1, 0, 1 }; for (int i = 0 ; i < 8 ; i++) { uint64_t a; simde_uint64x2_t val = simde_test_arm_neon_random_u64x2(); const uint lane = lanes[i]; simde_vst1q_lane_u64(&a, val, lane); simde_test_codegen_write_u64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, val, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, lane, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } HEDLEY_DIAGNOSTIC_POP SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vst1_lane_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vst1_lane_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vst1_lane_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vst1_lane_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vst1_lane_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vst1_lane_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vst1_lane_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vst1_lane_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vst1_lane_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vst1_lane_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vst1q_lane_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vst1q_lane_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vst1q_lane_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vst1q_lane_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vst1q_lane_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vst1q_lane_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vst1q_lane_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vst1q_lane_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vst1q_lane_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vst1q_lane_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/st3.c000066400000000000000000004372671400333146700162170ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN st3 #include "test-neon.h" #include "../../../simde/arm/neon/st3.h" #include "../../../simde/arm/neon/ld3.h" #include "../../../simde/arm/neon/get_lane.h" /* Implementor notes (seanptmaher): * * the way that I'm opting to test this is that I'll have a r[3][x] * array of vectors, as well as a flat buffer of results. The flat * buffer contains the expected result of a st3, done on arm hardware, * which will be compared against a st3 done at test time. * * To test the ld3, the stored st3 will be ld3'd, and then * individually compared against the r[3][x] vector. */ #if !defined(SIMDE_BUG_INTEL_857088) static int test_simde_vst3_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 r0[2]; simde_float32 r1[2]; simde_float32 r2[2]; simde_float32 a[6]; } test_vec[] = { { { SIMDE_FLOAT32_C( 966.95), SIMDE_FLOAT32_C( -826.32) }, { SIMDE_FLOAT32_C( -900.75), SIMDE_FLOAT32_C( 795.70) }, { SIMDE_FLOAT32_C( -772.41), SIMDE_FLOAT32_C( 343.84) }, { SIMDE_FLOAT32_C( 966.95), SIMDE_FLOAT32_C( -900.75), SIMDE_FLOAT32_C( -772.41), SIMDE_FLOAT32_C( -826.32), SIMDE_FLOAT32_C( 795.70), SIMDE_FLOAT32_C( 343.84), } }, { { SIMDE_FLOAT32_C( -783.51), SIMDE_FLOAT32_C( 493.54) }, { SIMDE_FLOAT32_C( -971.39), SIMDE_FLOAT32_C( 895.53) }, { SIMDE_FLOAT32_C( 208.75), SIMDE_FLOAT32_C( 253.35) }, { SIMDE_FLOAT32_C( -783.51), SIMDE_FLOAT32_C( -971.39), SIMDE_FLOAT32_C( 208.75), SIMDE_FLOAT32_C( 493.54), SIMDE_FLOAT32_C( 895.53), SIMDE_FLOAT32_C( 253.35), } }, { { SIMDE_FLOAT32_C( 867.44), SIMDE_FLOAT32_C( 363.06) }, { SIMDE_FLOAT32_C( 749.57), SIMDE_FLOAT32_C( -109.51) }, { SIMDE_FLOAT32_C( -81.12), SIMDE_FLOAT32_C( -786.46) }, { SIMDE_FLOAT32_C( 867.44), SIMDE_FLOAT32_C( 749.57), SIMDE_FLOAT32_C( -81.12), SIMDE_FLOAT32_C( 363.06), SIMDE_FLOAT32_C( -109.51), SIMDE_FLOAT32_C( -786.46), } }, { { SIMDE_FLOAT32_C( 687.74), SIMDE_FLOAT32_C( -617.77) }, { SIMDE_FLOAT32_C( 482.69), SIMDE_FLOAT32_C( 219.53) }, { SIMDE_FLOAT32_C( 980.32), SIMDE_FLOAT32_C( -243.91) }, { SIMDE_FLOAT32_C( 687.74), SIMDE_FLOAT32_C( 482.69), SIMDE_FLOAT32_C( 980.32), SIMDE_FLOAT32_C( -617.77), SIMDE_FLOAT32_C( 219.53), SIMDE_FLOAT32_C( -243.91), } }, { { SIMDE_FLOAT32_C( -516.59), SIMDE_FLOAT32_C( -483.00) }, { SIMDE_FLOAT32_C( 444.86), SIMDE_FLOAT32_C( 663.90) }, { SIMDE_FLOAT32_C( 259.84), SIMDE_FLOAT32_C( -602.70) }, { SIMDE_FLOAT32_C( -516.59), SIMDE_FLOAT32_C( 444.86), SIMDE_FLOAT32_C( 259.84), SIMDE_FLOAT32_C( -483.00), SIMDE_FLOAT32_C( 663.90), SIMDE_FLOAT32_C( -602.70), } }, { { SIMDE_FLOAT32_C( -83.32), SIMDE_FLOAT32_C( 226.79) }, { SIMDE_FLOAT32_C( -429.02), SIMDE_FLOAT32_C( 15.93) }, { SIMDE_FLOAT32_C( 22.49), SIMDE_FLOAT32_C( -201.43) }, { SIMDE_FLOAT32_C( -83.32), SIMDE_FLOAT32_C( -429.02), SIMDE_FLOAT32_C( 22.49), SIMDE_FLOAT32_C( 226.79), SIMDE_FLOAT32_C( 15.93), SIMDE_FLOAT32_C( -201.43), } }, { { SIMDE_FLOAT32_C( -640.23), SIMDE_FLOAT32_C( 238.98) }, { SIMDE_FLOAT32_C( -707.89), SIMDE_FLOAT32_C( -611.62) }, { SIMDE_FLOAT32_C( 134.51), SIMDE_FLOAT32_C( 500.86) }, { SIMDE_FLOAT32_C( -640.23), SIMDE_FLOAT32_C( -707.89), SIMDE_FLOAT32_C( 134.51), SIMDE_FLOAT32_C( 238.98), SIMDE_FLOAT32_C( -611.62), SIMDE_FLOAT32_C( 500.86), } }, { { SIMDE_FLOAT32_C( 641.73), SIMDE_FLOAT32_C( 1.95) }, { SIMDE_FLOAT32_C( -136.08), SIMDE_FLOAT32_C( 391.30) }, { SIMDE_FLOAT32_C( 892.44), SIMDE_FLOAT32_C( 782.80) }, { SIMDE_FLOAT32_C( 641.73), SIMDE_FLOAT32_C( -136.08), SIMDE_FLOAT32_C( 892.44), SIMDE_FLOAT32_C( 1.95), SIMDE_FLOAT32_C( 391.30), SIMDE_FLOAT32_C( 782.80), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2x3_t r_ = { { simde_vld1_f32(test_vec[i].r0), simde_vld1_f32(test_vec[i].r1), simde_vld1_f32(test_vec[i].r2), } }; simde_float32 a_[6]; simde_vst3_f32(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld3_f32(a_); simde_test_arm_neon_assert_equal_f32x2(r_.val[0], simde_vld1_f32(test_vec[i].r0), 1); simde_test_arm_neon_assert_equal_f32x2(r_.val[1], simde_vld1_f32(test_vec[i].r1), 1); simde_test_arm_neon_assert_equal_f32x2(r_.val[2], simde_vld1_f32(test_vec[i].r2), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t a0 = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t a1 = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t a2 = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2x3_t a = { { a0, a1, a2 } }; simde_test_arm_neon_write_f32x2(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); simde_float32_t buf[6]; simde_vst3_f32(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_f32(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst3_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 r0[1]; simde_float64 r1[1]; simde_float64 r2[1]; simde_float64 a[3]; } test_vec[] = { { { SIMDE_FLOAT64_C( -319.19) }, { SIMDE_FLOAT64_C( 569.35) }, { SIMDE_FLOAT64_C( -226.94) }, { SIMDE_FLOAT64_C( -319.19), SIMDE_FLOAT64_C( 569.35), SIMDE_FLOAT64_C( -226.94), } }, { { SIMDE_FLOAT64_C( -79.23) }, { SIMDE_FLOAT64_C( -856.19) }, { SIMDE_FLOAT64_C( 982.37) }, { SIMDE_FLOAT64_C( -79.23), SIMDE_FLOAT64_C( -856.19), SIMDE_FLOAT64_C( 982.37), } }, { { SIMDE_FLOAT64_C( -330.57) }, { SIMDE_FLOAT64_C( 730.79) }, { SIMDE_FLOAT64_C( -392.36) }, { SIMDE_FLOAT64_C( -330.57), SIMDE_FLOAT64_C( 730.79), SIMDE_FLOAT64_C( -392.36), } }, { { SIMDE_FLOAT64_C( -114.08) }, { SIMDE_FLOAT64_C( -298.63) }, { SIMDE_FLOAT64_C( 209.96) }, { SIMDE_FLOAT64_C( -114.08), SIMDE_FLOAT64_C( -298.63), SIMDE_FLOAT64_C( 209.96), } }, { { SIMDE_FLOAT64_C( -867.54) }, { SIMDE_FLOAT64_C( -281.79) }, { SIMDE_FLOAT64_C( 780.67) }, { SIMDE_FLOAT64_C( -867.54), SIMDE_FLOAT64_C( -281.79), SIMDE_FLOAT64_C( 780.67), } }, { { SIMDE_FLOAT64_C( 665.37) }, { SIMDE_FLOAT64_C( 109.67) }, { SIMDE_FLOAT64_C( 882.86) }, { SIMDE_FLOAT64_C( 665.37), SIMDE_FLOAT64_C( 109.67), SIMDE_FLOAT64_C( 882.86), } }, { { SIMDE_FLOAT64_C( -7.69) }, { SIMDE_FLOAT64_C( -441.62) }, { SIMDE_FLOAT64_C( 205.44) }, { SIMDE_FLOAT64_C( -7.69), SIMDE_FLOAT64_C( -441.62), SIMDE_FLOAT64_C( 205.44), } }, { { SIMDE_FLOAT64_C( 761.86) }, { SIMDE_FLOAT64_C( 259.74) }, { SIMDE_FLOAT64_C( 953.99) }, { SIMDE_FLOAT64_C( 761.86), SIMDE_FLOAT64_C( 259.74), SIMDE_FLOAT64_C( 953.99), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1x3_t r_ = { { simde_vld1_f64(test_vec[i].r0), simde_vld1_f64(test_vec[i].r1), simde_vld1_f64(test_vec[i].r2), } }; simde_float64_t a_[3]; simde_vst3_f64(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld3_f64(a_); simde_test_arm_neon_assert_equal_f64x1(r_.val[0], simde_vld1_f64(test_vec[i].r0), 1); simde_test_arm_neon_assert_equal_f64x1(r_.val[1], simde_vld1_f64(test_vec[i].r1), 1); simde_test_arm_neon_assert_equal_f64x1(r_.val[2], simde_vld1_f64(test_vec[i].r2), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x1_t a0 = simde_test_arm_neon_random_f64x1(-1000.0f, 1000.0f); simde_float64x1_t a1 = simde_test_arm_neon_random_f64x1(-1000.0f, 1000.0f); simde_float64x1_t a2 = simde_test_arm_neon_random_f64x1(-1000.0f, 1000.0f); simde_float64x1x3_t a = { { a0, a1, a2 } }; simde_test_arm_neon_write_f64x1(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x1(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x1(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); simde_float64_t buf[3]; simde_vst3_f64(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_f64(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst3_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t r0[8]; int8_t r1[8]; int8_t r2[8]; int8_t a[24]; } test_vec[] = { { { -INT8_C( 125), -INT8_C( 82), -INT8_C( 51), -INT8_C( 114), INT8_C( 105), INT8_C( 0), -INT8_C( 73), INT8_C( 100) }, { INT8_C( 63), -INT8_C( 12), INT8_C( 81), INT8_C( 72), -INT8_C( 10), INT8_C( 76), INT8_C( 20), -INT8_C( 89) }, { -INT8_C( 63), -INT8_C( 118), INT8_C( 94), INT8_C( 42), -INT8_C( 88), -INT8_C( 57), -INT8_C( 6), INT8_C( 16) }, { -INT8_C( 125), INT8_C( 63), -INT8_C( 63), -INT8_C( 82), -INT8_C( 12), -INT8_C( 118), -INT8_C( 51), INT8_C( 81), INT8_C( 94), -INT8_C( 114), INT8_C( 72), INT8_C( 42), INT8_C( 105), -INT8_C( 10), -INT8_C( 88), INT8_C( 0), INT8_C( 76), -INT8_C( 57), -INT8_C( 73), INT8_C( 20), -INT8_C( 6), INT8_C( 100), -INT8_C( 89), INT8_C( 16), } }, { { INT8_C( 37), INT8_C( 25), -INT8_C( 10), INT8_C( 35), INT8_C( 77), INT8_C( 32), -INT8_C( 94), -INT8_C( 48) }, { -INT8_C( 49), INT8_C( 111), INT8_C( 94), INT8_C( 56), INT8_C( 112), INT8_C( 21), -INT8_C( 100), -INT8_C( 81) }, { INT8_C( 9), -INT8_C( 18), -INT8_C( 9), -INT8_C( 1), INT8_C( 58), INT8_C( 12), -INT8_C( 90), -INT8_C( 4) }, { INT8_C( 37), -INT8_C( 49), INT8_C( 9), INT8_C( 25), INT8_C( 111), -INT8_C( 18), -INT8_C( 10), INT8_C( 94), -INT8_C( 9), INT8_C( 35), INT8_C( 56), -INT8_C( 1), INT8_C( 77), INT8_C( 112), INT8_C( 58), INT8_C( 32), INT8_C( 21), INT8_C( 12), -INT8_C( 94), -INT8_C( 100), -INT8_C( 90), -INT8_C( 48), -INT8_C( 81), -INT8_C( 4), } }, { { -INT8_C( 106), INT8_C( 5), INT8_C( 38), INT8_C( 63), -INT8_C( 52), INT8_C( 32), INT8_C( 79), -INT8_C( 14) }, { INT8_C( 57), INT8_C( 69), INT8_C( 21), -INT8_C( 122), INT8_C( 102), -INT8_C( 73), INT8_C( 86), INT8_C( 53) }, { INT8_C( 39), -INT8_C( 76), INT8_C( 109), -INT8_C( 105), -INT8_C( 54), INT8_C( 10), INT8_C( 70), -INT8_C( 45) }, { -INT8_C( 106), INT8_C( 57), INT8_C( 39), INT8_C( 5), INT8_C( 69), -INT8_C( 76), INT8_C( 38), INT8_C( 21), INT8_C( 109), INT8_C( 63), -INT8_C( 122), -INT8_C( 105), -INT8_C( 52), INT8_C( 102), -INT8_C( 54), INT8_C( 32), -INT8_C( 73), INT8_C( 10), INT8_C( 79), INT8_C( 86), INT8_C( 70), -INT8_C( 14), INT8_C( 53), -INT8_C( 45), } }, { { -INT8_C( 8), INT8_C( 62), -INT8_C( 45), INT8_C( 50), INT8_C( 74), INT8_C( 121), INT8_C( 46), -INT8_C( 32) }, { INT8_C( 126), INT8_C( 84), INT8_C( 31), INT8_C( 75), INT8_C( 116), INT8_C( 111), INT8_C( 61), -INT8_C( 83) }, { -INT8_C( 76), INT8_C( 82), INT8_C( 51), INT8_C( 26), INT8_C( 10), -INT8_C( 119), INT8_C( 79), INT8_C( 49) }, { -INT8_C( 8), INT8_C( 126), -INT8_C( 76), INT8_C( 62), INT8_C( 84), INT8_C( 82), -INT8_C( 45), INT8_C( 31), INT8_C( 51), INT8_C( 50), INT8_C( 75), INT8_C( 26), INT8_C( 74), INT8_C( 116), INT8_C( 10), INT8_C( 121), INT8_C( 111), -INT8_C( 119), INT8_C( 46), INT8_C( 61), INT8_C( 79), -INT8_C( 32), -INT8_C( 83), INT8_C( 49), } }, { { INT8_C( 62), -INT8_C( 67), -INT8_C( 56), INT8_C( 8), -INT8_C( 57), INT8_C( 14), -INT8_C( 37), -INT8_C( 65) }, { INT8_C( 76), -INT8_C( 82), -INT8_C( 15), -INT8_C( 106), INT8_C( 40), INT8_C( 32), INT8_C( 119), -INT8_C( 90) }, { INT8_C( 116), -INT8_C( 106), -INT8_C( 15), -INT8_C( 23), INT8_C( 5), INT8_C( 46), -INT8_C( 106), -INT8_C( 70) }, { INT8_C( 62), INT8_C( 76), INT8_C( 116), -INT8_C( 67), -INT8_C( 82), -INT8_C( 106), -INT8_C( 56), -INT8_C( 15), -INT8_C( 15), INT8_C( 8), -INT8_C( 106), -INT8_C( 23), -INT8_C( 57), INT8_C( 40), INT8_C( 5), INT8_C( 14), INT8_C( 32), INT8_C( 46), -INT8_C( 37), INT8_C( 119), -INT8_C( 106), -INT8_C( 65), -INT8_C( 90), -INT8_C( 70), } }, { { -INT8_C( 127), -INT8_C( 54), -INT8_C( 44), -INT8_C( 117), INT8_C( 83), INT8_C( 36), -INT8_C( 68), -INT8_C( 111) }, { -INT8_C( 31), -INT8_C( 124), -INT8_C( 103), -INT8_C( 88), -INT8_C( 110), INT8_C( 117), INT8_C( 103), -INT8_C( 33) }, { INT8_C( 35), INT8_C( 88), INT8_C( 117), INT8_C( 75), INT8_C( 120), -INT8_C( 20), -INT8_C( 14), -INT8_C( 19) }, { -INT8_C( 127), -INT8_C( 31), INT8_C( 35), -INT8_C( 54), -INT8_C( 124), INT8_C( 88), -INT8_C( 44), -INT8_C( 103), INT8_C( 117), -INT8_C( 117), -INT8_C( 88), INT8_C( 75), INT8_C( 83), -INT8_C( 110), INT8_C( 120), INT8_C( 36), INT8_C( 117), -INT8_C( 20), -INT8_C( 68), INT8_C( 103), -INT8_C( 14), -INT8_C( 111), -INT8_C( 33), -INT8_C( 19), } }, { { -INT8_C( 125), -INT8_C( 29), -INT8_C( 42), -INT8_C( 120), INT8_C( 18), INT8_C( 108), INT8_C( 66), -INT8_C( 109) }, { INT8_C( 54), INT8_C( 23), INT8_C( 30), -INT8_C( 118), INT8_C( 59), -INT8_C( 38), INT8_C( 27), INT8_C( 28) }, { INT8_C( 94), -INT8_C( 75), -INT8_C( 60), -INT8_C( 16), INT8_C( 42), INT8_C( 43), -INT8_C( 49), INT8_C( 77) }, { -INT8_C( 125), INT8_C( 54), INT8_C( 94), -INT8_C( 29), INT8_C( 23), -INT8_C( 75), -INT8_C( 42), INT8_C( 30), -INT8_C( 60), -INT8_C( 120), -INT8_C( 118), -INT8_C( 16), INT8_C( 18), INT8_C( 59), INT8_C( 42), INT8_C( 108), -INT8_C( 38), INT8_C( 43), INT8_C( 66), INT8_C( 27), -INT8_C( 49), -INT8_C( 109), INT8_C( 28), INT8_C( 77), } }, { { -INT8_C( 125), INT8_C( 69), -INT8_C( 103), -INT8_C( 4), INT8_C( 49), -INT8_C( 117), -INT8_C( 23), -INT8_C( 76) }, { INT8_C( 110), -INT8_C( 65), INT8_C( 61), INT8_MIN, INT8_C( 43), INT8_MAX, INT8_C( 19), INT8_C( 98) }, { -INT8_C( 106), INT8_C( 49), -INT8_C( 20), -INT8_C( 47), INT8_C( 11), INT8_C( 7), -INT8_C( 19), INT8_C( 105) }, { -INT8_C( 125), INT8_C( 110), -INT8_C( 106), INT8_C( 69), -INT8_C( 65), INT8_C( 49), -INT8_C( 103), INT8_C( 61), -INT8_C( 20), -INT8_C( 4), INT8_MIN, -INT8_C( 47), INT8_C( 49), INT8_C( 43), INT8_C( 11), -INT8_C( 117), INT8_MAX, INT8_C( 7), -INT8_C( 23), INT8_C( 19), -INT8_C( 19), -INT8_C( 76), INT8_C( 98), INT8_C( 105), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8x3_t r_ = { { simde_vld1_s8(test_vec[i].r0), simde_vld1_s8(test_vec[i].r1), simde_vld1_s8(test_vec[i].r2), } }; int8_t a_[24]; simde_vst3_s8(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(a_))); r_ = simde_vld3_s8(a_); simde_test_arm_neon_assert_equal_i8x8(r_.val[0], simde_vld1_s8(test_vec[i].r0)); simde_test_arm_neon_assert_equal_i8x8(r_.val[1], simde_vld1_s8(test_vec[i].r1)); simde_test_arm_neon_assert_equal_i8x8(r_.val[2], simde_vld1_s8(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a0 = simde_test_arm_neon_random_i8x8(); simde_int8x8_t a1 = simde_test_arm_neon_random_i8x8(); simde_int8x8_t a2 = simde_test_arm_neon_random_i8x8(); simde_int8x8x3_t a = { { a0, a1, a2 } }; simde_test_arm_neon_write_i8x8(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); int8_t buf[24]; simde_vst3_s8(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_i8(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst3_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t r0[4]; int16_t r1[4]; int16_t r2[4]; int16_t a[12]; } test_vec[] = { { { -INT16_C( 12961), -INT16_C( 6596), INT16_C( 18449), INT16_C( 9642) }, { -INT16_C( 26186), INT16_C( 4954), -INT16_C( 25970), INT16_C( 13983) }, { INT16_C( 14092), -INT16_C( 14144), INT16_C( 792), INT16_C( 6146) }, { -INT16_C( 12961), -INT16_C( 26186), INT16_C( 14092), -INT16_C( 6596), INT16_C( 4954), -INT16_C( 14144), INT16_C( 18449), -INT16_C( 25970), INT16_C( 792), INT16_C( 9642), INT16_C( 13983), INT16_C( 6146), } }, { { -INT16_C( 19593), -INT16_C( 4112), -INT16_C( 6692), INT16_C( 15284) }, { -INT16_C( 3918), -INT16_C( 15327), -INT16_C( 13512), -INT16_C( 4375) }, { INT16_C( 17252), -INT16_C( 3583), -INT16_C( 24355), -INT16_C( 5592) }, { -INT16_C( 19593), -INT16_C( 3918), INT16_C( 17252), -INT16_C( 4112), -INT16_C( 15327), -INT16_C( 3583), -INT16_C( 6692), -INT16_C( 13512), -INT16_C( 24355), INT16_C( 15284), -INT16_C( 4375), -INT16_C( 5592), } }, { { -INT16_C( 5673), -INT16_C( 3918), -INT16_C( 19220), INT16_C( 25352) }, { -INT16_C( 1945), INT16_C( 17234), INT16_C( 1757), -INT16_C( 28545) }, { -INT16_C( 24330), INT16_C( 11860), INT16_C( 15724), -INT16_C( 12260) }, { -INT16_C( 5673), -INT16_C( 1945), -INT16_C( 24330), -INT16_C( 3918), INT16_C( 17234), INT16_C( 11860), -INT16_C( 19220), INT16_C( 1757), INT16_C( 15724), INT16_C( 25352), -INT16_C( 28545), -INT16_C( 12260), } }, { { INT16_C( 7552), INT16_C( 24003), -INT16_C( 5186), -INT16_C( 27321) }, { -INT16_C( 1580), -INT16_C( 15995), -INT16_C( 29267), INT16_C( 5156) }, { INT16_C( 30598), INT16_C( 25432), -INT16_C( 10371), INT16_C( 29939) }, { INT16_C( 7552), -INT16_C( 1580), INT16_C( 30598), INT16_C( 24003), -INT16_C( 15995), INT16_C( 25432), -INT16_C( 5186), -INT16_C( 29267), -INT16_C( 10371), -INT16_C( 27321), INT16_C( 5156), INT16_C( 29939), } }, { { INT16_C( 18295), -INT16_C( 7262), -INT16_C( 16508), INT16_C( 1204) }, { INT16_C( 30684), -INT16_C( 26014), -INT16_C( 22174), INT16_C( 14128) }, { -INT16_C( 19037), INT16_C( 20728), INT16_C( 7235), -INT16_C( 13979) }, { INT16_C( 18295), INT16_C( 30684), -INT16_C( 19037), -INT16_C( 7262), -INT16_C( 26014), INT16_C( 20728), -INT16_C( 16508), -INT16_C( 22174), INT16_C( 7235), INT16_C( 1204), INT16_C( 14128), -INT16_C( 13979), } }, { { -INT16_C( 17005), INT16_C( 4396), INT16_C( 8340), INT16_C( 2949) }, { INT16_C( 10087), -INT16_C( 4881), -INT16_C( 23578), -INT16_C( 15376) }, { INT16_C( 21018), INT16_C( 31837), -INT16_C( 29188), -INT16_C( 24653) }, { -INT16_C( 17005), INT16_C( 10087), INT16_C( 21018), INT16_C( 4396), -INT16_C( 4881), INT16_C( 31837), INT16_C( 8340), -INT16_C( 23578), -INT16_C( 29188), INT16_C( 2949), -INT16_C( 15376), -INT16_C( 24653), } }, { { -INT16_C( 21693), -INT16_C( 30993), INT16_C( 21704), INT16_C( 23375) }, { INT16_C( 31505), -INT16_C( 23188), -INT16_C( 3685), INT16_C( 945) }, { -INT16_C( 24551), -INT16_C( 17), -INT16_C( 8381), INT16_C( 24002) }, { -INT16_C( 21693), INT16_C( 31505), -INT16_C( 24551), -INT16_C( 30993), -INT16_C( 23188), -INT16_C( 17), INT16_C( 21704), -INT16_C( 3685), -INT16_C( 8381), INT16_C( 23375), INT16_C( 945), INT16_C( 24002), } }, { { INT16_C( 8242), INT16_C( 11993), -INT16_C( 29267), -INT16_C( 3891) }, { -INT16_C( 17352), INT16_C( 118), -INT16_C( 15087), INT16_C( 8796) }, { -INT16_C( 14271), -INT16_C( 9016), INT16_C( 31162), -INT16_C( 11297) }, { INT16_C( 8242), -INT16_C( 17352), -INT16_C( 14271), INT16_C( 11993), INT16_C( 118), -INT16_C( 9016), -INT16_C( 29267), -INT16_C( 15087), INT16_C( 31162), -INT16_C( 3891), INT16_C( 8796), -INT16_C( 11297), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4x3_t r_ = { { simde_vld1_s16(test_vec[i].r0), simde_vld1_s16(test_vec[i].r1), simde_vld1_s16(test_vec[i].r2), } }; int16_t a_[12]; simde_vst3_s16(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld3_s16(a_); simde_test_arm_neon_assert_equal_i16x4(r_.val[0], simde_vld1_s16(test_vec[i].r0)); simde_test_arm_neon_assert_equal_i16x4(r_.val[1], simde_vld1_s16(test_vec[i].r1)); simde_test_arm_neon_assert_equal_i16x4(r_.val[2], simde_vld1_s16(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a0 = simde_test_arm_neon_random_i16x4(); simde_int16x4_t a1 = simde_test_arm_neon_random_i16x4(); simde_int16x4_t a2 = simde_test_arm_neon_random_i16x4(); simde_int16x4x3_t a = { { a0, a1, a2 } }; simde_test_arm_neon_write_i16x4(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); int16_t buf[12]; simde_vst3_s16(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_i16(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst3_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t r0[2]; int32_t r1[2]; int32_t r2[2]; int32_t a[6]; } test_vec[] = { { { INT32_C( 1914427947), INT32_C( 1925674776) }, { INT32_C( 1209568771), INT32_C( 1487047557) }, { INT32_C( 1914078099), -INT32_C( 432278526) }, { INT32_C( 1914427947), INT32_C( 1209568771), INT32_C( 1914078099), INT32_C( 1925674776), INT32_C( 1487047557), -INT32_C( 432278526), } }, { { INT32_C( 1941302689), -INT32_C( 1422572160) }, { INT32_C( 924733471), -INT32_C( 894769721) }, { -INT32_C( 116145548), -INT32_C( 531516083) }, { INT32_C( 1941302689), INT32_C( 924733471), -INT32_C( 116145548), -INT32_C( 1422572160), -INT32_C( 894769721), -INT32_C( 531516083), } }, { { INT32_C( 995256376), -INT32_C( 48132772) }, { -INT32_C( 76491141), INT32_C( 983999771) }, { -INT32_C( 1116551947), INT32_C( 512171178) }, { INT32_C( 995256376), -INT32_C( 76491141), -INT32_C( 1116551947), -INT32_C( 48132772), INT32_C( 983999771), INT32_C( 512171178), } }, { { INT32_C( 722967262), -INT32_C( 2012452784) }, { INT32_C( 751001296), INT32_C( 1730799084) }, { -INT32_C( 681403717), INT32_C( 890308671) }, { INT32_C( 722967262), INT32_C( 751001296), -INT32_C( 681403717), -INT32_C( 2012452784), INT32_C( 1730799084), INT32_C( 890308671), } }, { { INT32_C( 2012382157), INT32_C( 2106947999) }, { INT32_C( 1688841236), -INT32_C( 437471980) }, { -INT32_C( 15618029), INT32_C( 1348877205) }, { INT32_C( 2012382157), INT32_C( 1688841236), -INT32_C( 15618029), INT32_C( 2106947999), -INT32_C( 437471980), INT32_C( 1348877205), } }, { { INT32_C( 338151637), -INT32_C( 1639368239) }, { INT32_C( 1544895420), -INT32_C( 908481867) }, { INT32_C( 1781367382), INT32_C( 1263474999) }, { INT32_C( 338151637), INT32_C( 1544895420), INT32_C( 1781367382), -INT32_C( 1639368239), -INT32_C( 908481867), INT32_C( 1263474999), } }, { { INT32_C( 1581933001), INT32_C( 1907339676) }, { INT32_C( 1250285177), -INT32_C( 857157873) }, { -INT32_C( 1087832822), -INT32_C( 41418329) }, { INT32_C( 1581933001), INT32_C( 1250285177), -INT32_C( 1087832822), INT32_C( 1907339676), -INT32_C( 857157873), -INT32_C( 41418329), } }, { { -INT32_C( 1150765692), -INT32_C( 1744390193) }, { -INT32_C( 1258860264), INT32_C( 2066064898) }, { -INT32_C( 1933137028), -INT32_C( 2074562950) }, { -INT32_C( 1150765692), -INT32_C( 1258860264), -INT32_C( 1933137028), -INT32_C( 1744390193), INT32_C( 2066064898), -INT32_C( 2074562950), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2x3_t r_ = { { simde_vld1_s32(test_vec[i].r0), simde_vld1_s32(test_vec[i].r1), simde_vld1_s32(test_vec[i].r2), } }; int32_t a_[6]; simde_vst3_s32(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld3_s32(a_); simde_test_arm_neon_assert_equal_i32x2(r_.val[0], simde_vld1_s32(test_vec[i].r0)); simde_test_arm_neon_assert_equal_i32x2(r_.val[1], simde_vld1_s32(test_vec[i].r1)); simde_test_arm_neon_assert_equal_i32x2(r_.val[2], simde_vld1_s32(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a0 = simde_test_arm_neon_random_i32x2(); simde_int32x2_t a1 = simde_test_arm_neon_random_i32x2(); simde_int32x2_t a2 = simde_test_arm_neon_random_i32x2(); simde_int32x2x3_t a = { { a0, a1, a2 } }; simde_test_arm_neon_write_i32x2(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); int32_t buf[6]; simde_vst3_s32(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_i32(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst3_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t r0[1]; int64_t r1[1]; int64_t r2[1]; int64_t a[3]; } test_vec[] = { { { INT64_C( 3572290692995033026) }, { INT64_C( 3052598082801596878) }, { INT64_C( 2809764841870853617) }, { INT64_C( 3572290692995033026), INT64_C( 3052598082801596878), INT64_C( 2809764841870853617), } }, { { -INT64_C( 8572706592134440696) }, { -INT64_C( 7967295716091007032) }, { -INT64_C( 3537755775352906508) }, { -INT64_C( 8572706592134440696), -INT64_C( 7967295716091007032), -INT64_C( 3537755775352906508), } }, { { INT64_C( 5892241348017586019) }, { INT64_C( 6492965336728335051) }, { INT64_C( 8099449588095058452) }, { INT64_C( 5892241348017586019), INT64_C( 6492965336728335051), INT64_C( 8099449588095058452), } }, { { INT64_C( 2922014572781856738) }, { INT64_C( 4516420428428149011) }, { INT64_C( 246550016633621118) }, { INT64_C( 2922014572781856738), INT64_C( 4516420428428149011), INT64_C( 246550016633621118), } }, { { -INT64_C( 1523834019503275283) }, { INT64_C( 3356930063699871657) }, { INT64_C( 7487577332208262455) }, { -INT64_C( 1523834019503275283), INT64_C( 3356930063699871657), INT64_C( 7487577332208262455), } }, { { -INT64_C( 7893153571695773310) }, { INT64_C( 4168318472599392229) }, { -INT64_C( 1213652234887790114) }, { -INT64_C( 7893153571695773310), INT64_C( 4168318472599392229), -INT64_C( 1213652234887790114), } }, { { INT64_C( 7848230343900050330) }, { INT64_C( 2987104008165810748) }, { INT64_C( 2768850959988125858) }, { INT64_C( 7848230343900050330), INT64_C( 2987104008165810748), INT64_C( 2768850959988125858), } }, { { -INT64_C( 1506374561952689296) }, { INT64_C( 8165687162410349307) }, { INT64_C( 2677890818908565691) }, { -INT64_C( 1506374561952689296), INT64_C( 8165687162410349307), INT64_C( 2677890818908565691), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1x3_t r_ = { { simde_vld1_s64(test_vec[i].r0), simde_vld1_s64(test_vec[i].r1), simde_vld1_s64(test_vec[i].r2), } }; int64_t a_[3]; simde_vst3_s64(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld3_s64(a_); simde_test_arm_neon_assert_equal_i64x1(r_.val[0], simde_vld1_s64(test_vec[i].r0)); simde_test_arm_neon_assert_equal_i64x1(r_.val[1], simde_vld1_s64(test_vec[i].r1)); simde_test_arm_neon_assert_equal_i64x1(r_.val[2], simde_vld1_s64(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_t a0 = simde_test_arm_neon_random_i64x1(); simde_int64x1_t a1 = simde_test_arm_neon_random_i64x1(); simde_int64x1_t a2 = simde_test_arm_neon_random_i64x1(); simde_int64x1x3_t a = { { a0, a1, a2 } }; simde_test_arm_neon_write_i64x1(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x1(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); int64_t buf[3]; simde_vst3_s64(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_i64(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst3_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t r0[8]; uint8_t r1[8]; uint8_t r2[8]; uint8_t a[24]; } test_vec[] = { { { UINT8_C( 12), UINT8_C( 67), UINT8_C(124), UINT8_C(237), UINT8_C(151), UINT8_C(131), UINT8_C(124), UINT8_C(175) }, { UINT8_C(205), UINT8_C(128), UINT8_C(134), UINT8_C(187), UINT8_C(236), UINT8_C( 2), UINT8_C( 61), UINT8_C( 50) }, { UINT8_C( 18), UINT8_C(148), UINT8_C(148), UINT8_C( 14), UINT8_C(133), UINT8_C( 73), UINT8_C( 44), UINT8_C(182) }, { UINT8_C( 12), UINT8_C(205), UINT8_C( 18), UINT8_C( 67), UINT8_C(128), UINT8_C(148), UINT8_C(124), UINT8_C(134), UINT8_C(148), UINT8_C(237), UINT8_C(187), UINT8_C( 14), UINT8_C(151), UINT8_C(236), UINT8_C(133), UINT8_C(131), UINT8_C( 2), UINT8_C( 73), UINT8_C(124), UINT8_C( 61), UINT8_C( 44), UINT8_C(175), UINT8_C( 50), UINT8_C(182), } }, { { UINT8_C( 10), UINT8_C(157), UINT8_C(245), UINT8_C( 84), UINT8_C(206), UINT8_C( 89), UINT8_C(211), UINT8_C(218) }, { UINT8_C(156), UINT8_C( 79), UINT8_C(199), UINT8_C( 51), UINT8_C(210), UINT8_C( 67), UINT8_C(226), UINT8_C(159) }, { UINT8_C(196), UINT8_C(104), UINT8_C( 90), UINT8_C(176), UINT8_C(106), UINT8_C(152), UINT8_C(226), UINT8_C(125) }, { UINT8_C( 10), UINT8_C(156), UINT8_C(196), UINT8_C(157), UINT8_C( 79), UINT8_C(104), UINT8_C(245), UINT8_C(199), UINT8_C( 90), UINT8_C( 84), UINT8_C( 51), UINT8_C(176), UINT8_C(206), UINT8_C(210), UINT8_C(106), UINT8_C( 89), UINT8_C( 67), UINT8_C(152), UINT8_C(211), UINT8_C(226), UINT8_C(226), UINT8_C(218), UINT8_C(159), UINT8_C(125), } }, { { UINT8_C( 44), UINT8_C(119), UINT8_C(139), UINT8_C(177), UINT8_C(192), UINT8_C(183), UINT8_C(103), UINT8_C(202) }, { UINT8_C( 85), UINT8_C( 92), UINT8_C( 30), UINT8_C( 35), UINT8_C(181), UINT8_C(241), UINT8_C(253), UINT8_C( 81) }, { UINT8_C( 64), UINT8_C(196), UINT8_C(132), UINT8_C( 19), UINT8_C( 8), UINT8_C(103), UINT8_C(178), UINT8_C(204) }, { UINT8_C( 44), UINT8_C( 85), UINT8_C( 64), UINT8_C(119), UINT8_C( 92), UINT8_C(196), UINT8_C(139), UINT8_C( 30), UINT8_C(132), UINT8_C(177), UINT8_C( 35), UINT8_C( 19), UINT8_C(192), UINT8_C(181), UINT8_C( 8), UINT8_C(183), UINT8_C(241), UINT8_C(103), UINT8_C(103), UINT8_C(253), UINT8_C(178), UINT8_C(202), UINT8_C( 81), UINT8_C(204), } }, { { UINT8_C(207), UINT8_C( 13), UINT8_C(124), UINT8_C( 58), UINT8_C(165), UINT8_C( 94), UINT8_C(183), UINT8_C(209) }, { UINT8_C(213), UINT8_C( 66), UINT8_C(130), UINT8_C(150), UINT8_C(250), UINT8_C(233), UINT8_C( 96), UINT8_C( 79) }, { UINT8_C( 69), UINT8_C(127), UINT8_C(114), UINT8_C(250), UINT8_C(112), UINT8_C(111), UINT8_C( 75), UINT8_C(177) }, { UINT8_C(207), UINT8_C(213), UINT8_C( 69), UINT8_C( 13), UINT8_C( 66), UINT8_C(127), UINT8_C(124), UINT8_C(130), UINT8_C(114), UINT8_C( 58), UINT8_C(150), UINT8_C(250), UINT8_C(165), UINT8_C(250), UINT8_C(112), UINT8_C( 94), UINT8_C(233), UINT8_C(111), UINT8_C(183), UINT8_C( 96), UINT8_C( 75), UINT8_C(209), UINT8_C( 79), UINT8_C(177), } }, { { UINT8_C( 51), UINT8_C(207), UINT8_C(196), UINT8_C( 59), UINT8_C( 54), UINT8_C(118), UINT8_C( 7), UINT8_C( 6) }, { UINT8_C(131), UINT8_C(131), UINT8_C( 64), UINT8_C( 40), UINT8_C(226), UINT8_C(247), UINT8_C(249), UINT8_C(183) }, { UINT8_C( 57), UINT8_C(123), UINT8_C( 77), UINT8_C( 51), UINT8_C(100), UINT8_C(174), UINT8_C(130), UINT8_C(169) }, { UINT8_C( 51), UINT8_C(131), UINT8_C( 57), UINT8_C(207), UINT8_C(131), UINT8_C(123), UINT8_C(196), UINT8_C( 64), UINT8_C( 77), UINT8_C( 59), UINT8_C( 40), UINT8_C( 51), UINT8_C( 54), UINT8_C(226), UINT8_C(100), UINT8_C(118), UINT8_C(247), UINT8_C(174), UINT8_C( 7), UINT8_C(249), UINT8_C(130), UINT8_C( 6), UINT8_C(183), UINT8_C(169), } }, { { UINT8_C( 45), UINT8_C(244), UINT8_C(163), UINT8_C(157), UINT8_C( 99), UINT8_C(238), UINT8_C( 78), UINT8_C(151) }, { UINT8_C(190), UINT8_C( 18), UINT8_C(210), UINT8_C(244), UINT8_C(137), UINT8_C(218), UINT8_C(250), UINT8_C( 12) }, { UINT8_C( 93), UINT8_C( 58), UINT8_C( 53), UINT8_C( 63), UINT8_C( 49), UINT8_C( 46), UINT8_C(247), UINT8_C(107) }, { UINT8_C( 45), UINT8_C(190), UINT8_C( 93), UINT8_C(244), UINT8_C( 18), UINT8_C( 58), UINT8_C(163), UINT8_C(210), UINT8_C( 53), UINT8_C(157), UINT8_C(244), UINT8_C( 63), UINT8_C( 99), UINT8_C(137), UINT8_C( 49), UINT8_C(238), UINT8_C(218), UINT8_C( 46), UINT8_C( 78), UINT8_C(250), UINT8_C(247), UINT8_C(151), UINT8_C( 12), UINT8_C(107), } }, { { UINT8_C(170), UINT8_C( 68), UINT8_C(158), UINT8_C( 14), UINT8_C(242), UINT8_C( 33), UINT8_C(184), UINT8_C( 31) }, { UINT8_C( 21), UINT8_C( 91), UINT8_C(189), UINT8_C(121), UINT8_C( 74), UINT8_C( 11), UINT8_C( 16), UINT8_C( 8) }, { UINT8_C( 30), UINT8_C(226), UINT8_C(252), UINT8_C(167), UINT8_C(188), UINT8_C(247), UINT8_C(179), UINT8_C( 26) }, { UINT8_C(170), UINT8_C( 21), UINT8_C( 30), UINT8_C( 68), UINT8_C( 91), UINT8_C(226), UINT8_C(158), UINT8_C(189), UINT8_C(252), UINT8_C( 14), UINT8_C(121), UINT8_C(167), UINT8_C(242), UINT8_C( 74), UINT8_C(188), UINT8_C( 33), UINT8_C( 11), UINT8_C(247), UINT8_C(184), UINT8_C( 16), UINT8_C(179), UINT8_C( 31), UINT8_C( 8), UINT8_C( 26), } }, { { UINT8_C( 49), UINT8_C(232), UINT8_C( 89), UINT8_C( 99), UINT8_C( 23), UINT8_C( 80), UINT8_C(206), UINT8_C(193) }, { UINT8_C(149), UINT8_C(108), UINT8_C(207), UINT8_C(135), UINT8_C(141), UINT8_C(135), UINT8_C(167), UINT8_C(163) }, { UINT8_C(227), UINT8_C(100), UINT8_C( 28), UINT8_C( 45), UINT8_C(111), UINT8_C( 44), UINT8_C( 53), UINT8_C(141) }, { UINT8_C( 49), UINT8_C(149), UINT8_C(227), UINT8_C(232), UINT8_C(108), UINT8_C(100), UINT8_C( 89), UINT8_C(207), UINT8_C( 28), UINT8_C( 99), UINT8_C(135), UINT8_C( 45), UINT8_C( 23), UINT8_C(141), UINT8_C(111), UINT8_C( 80), UINT8_C(135), UINT8_C( 44), UINT8_C(206), UINT8_C(167), UINT8_C( 53), UINT8_C(193), UINT8_C(163), UINT8_C(141), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8x3_t r_ = { { simde_vld1_u8(test_vec[i].r0), simde_vld1_u8(test_vec[i].r1), simde_vld1_u8(test_vec[i].r2), } }; uint8_t a_[24]; simde_vst3_u8(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld3_u8(a_); simde_test_arm_neon_assert_equal_u8x8(r_.val[0], simde_vld1_u8(test_vec[i].r0)); simde_test_arm_neon_assert_equal_u8x8(r_.val[1], simde_vld1_u8(test_vec[i].r1)); simde_test_arm_neon_assert_equal_u8x8(r_.val[2], simde_vld1_u8(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (size_t i = 0 ; i < 8 ; i++) { simde_uint8x8_t a0 = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t a1 = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t a2 = simde_test_arm_neon_random_u8x8(); simde_uint8x8x3_t a = { { a0, a1, a2 } }; simde_test_arm_neon_write_u8x8(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); uint8_t buf[24]; simde_vst3_u8(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_u8(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst3_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t r0[4]; uint16_t r1[4]; uint16_t r2[4]; uint16_t a[12]; } test_vec[] = { { { UINT16_C( 9344), UINT16_C(14996), UINT16_C( 7435), UINT16_C(59276) }, { UINT16_C(26277), UINT16_C(23774), UINT16_C(21915), UINT16_C( 425) }, { UINT16_C(18737), UINT16_C(57584), UINT16_C(38525), UINT16_C(39425) }, { UINT16_C( 9344), UINT16_C(26277), UINT16_C(18737), UINT16_C(14996), UINT16_C(23774), UINT16_C(57584), UINT16_C( 7435), UINT16_C(21915), UINT16_C(38525), UINT16_C(59276), UINT16_C( 425), UINT16_C(39425), } }, { { UINT16_C(31766), UINT16_C(36501), UINT16_C(13366), UINT16_C(47054) }, { UINT16_C(25177), UINT16_C(25841), UINT16_C(32127), UINT16_C( 9547) }, { UINT16_C(10724), UINT16_C(32641), UINT16_C(10878), UINT16_C(45184) }, { UINT16_C(31766), UINT16_C(25177), UINT16_C(10724), UINT16_C(36501), UINT16_C(25841), UINT16_C(32641), UINT16_C(13366), UINT16_C(32127), UINT16_C(10878), UINT16_C(47054), UINT16_C( 9547), UINT16_C(45184), } }, { { UINT16_C(29044), UINT16_C(61840), UINT16_C(37127), UINT16_C( 7563) }, { UINT16_C( 8206), UINT16_C(17579), UINT16_C(31317), UINT16_C(44795) }, { UINT16_C(60892), UINT16_C(23570), UINT16_C(23914), UINT16_C(20097) }, { UINT16_C(29044), UINT16_C( 8206), UINT16_C(60892), UINT16_C(61840), UINT16_C(17579), UINT16_C(23570), UINT16_C(37127), UINT16_C(31317), UINT16_C(23914), UINT16_C( 7563), UINT16_C(44795), UINT16_C(20097), } }, { { UINT16_C( 646), UINT16_C( 1230), UINT16_C(20013), UINT16_C(41396) }, { UINT16_C(17599), UINT16_C(51090), UINT16_C( 7638), UINT16_C(58596) }, { UINT16_C(36926), UINT16_C(37672), UINT16_C( 9226), UINT16_C(58945) }, { UINT16_C( 646), UINT16_C(17599), UINT16_C(36926), UINT16_C( 1230), UINT16_C(51090), UINT16_C(37672), UINT16_C(20013), UINT16_C( 7638), UINT16_C( 9226), UINT16_C(41396), UINT16_C(58596), UINT16_C(58945), } }, { { UINT16_C(21265), UINT16_C(31554), UINT16_C(50096), UINT16_C(14026) }, { UINT16_C(39110), UINT16_C(62266), UINT16_C(61414), UINT16_C(42644) }, { UINT16_C( 9779), UINT16_C( 2413), UINT16_C(20803), UINT16_C(33261) }, { UINT16_C(21265), UINT16_C(39110), UINT16_C( 9779), UINT16_C(31554), UINT16_C(62266), UINT16_C( 2413), UINT16_C(50096), UINT16_C(61414), UINT16_C(20803), UINT16_C(14026), UINT16_C(42644), UINT16_C(33261), } }, { { UINT16_C( 5857), UINT16_C(60180), UINT16_C(21818), UINT16_C(19410) }, { UINT16_C( 5288), UINT16_C(22726), UINT16_C(37080), UINT16_C(40590) }, { UINT16_C(51496), UINT16_C( 3985), UINT16_C( 9656), UINT16_C(60341) }, { UINT16_C( 5857), UINT16_C( 5288), UINT16_C(51496), UINT16_C(60180), UINT16_C(22726), UINT16_C( 3985), UINT16_C(21818), UINT16_C(37080), UINT16_C( 9656), UINT16_C(19410), UINT16_C(40590), UINT16_C(60341), } }, { { UINT16_C( 8779), UINT16_C(36597), UINT16_C(57971), UINT16_C(21776) }, { UINT16_C( 9464), UINT16_C(12864), UINT16_C( 4730), UINT16_C( 8829) }, { UINT16_C(17447), UINT16_C(65403), UINT16_C( 2516), UINT16_C(64925) }, { UINT16_C( 8779), UINT16_C( 9464), UINT16_C(17447), UINT16_C(36597), UINT16_C(12864), UINT16_C(65403), UINT16_C(57971), UINT16_C( 4730), UINT16_C( 2516), UINT16_C(21776), UINT16_C( 8829), UINT16_C(64925), } }, { { UINT16_C(11986), UINT16_C(35340), UINT16_C(49491), UINT16_C(40566) }, { UINT16_C(27619), UINT16_C(22060), UINT16_C(15437), UINT16_C(18091) }, { UINT16_C(60513), UINT16_C(56184), UINT16_C(63230), UINT16_C( 9725) }, { UINT16_C(11986), UINT16_C(27619), UINT16_C(60513), UINT16_C(35340), UINT16_C(22060), UINT16_C(56184), UINT16_C(49491), UINT16_C(15437), UINT16_C(63230), UINT16_C(40566), UINT16_C(18091), UINT16_C( 9725), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4x3_t r_ = { { simde_vld1_u16(test_vec[i].r0), simde_vld1_u16(test_vec[i].r1), simde_vld1_u16(test_vec[i].r2), } }; uint16_t a_[12]; simde_vst3_u16(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld3_u16(a_); simde_test_arm_neon_assert_equal_u16x4(r_.val[0], simde_vld1_u16(test_vec[i].r0)); simde_test_arm_neon_assert_equal_u16x4(r_.val[1], simde_vld1_u16(test_vec[i].r1)); simde_test_arm_neon_assert_equal_u16x4(r_.val[2], simde_vld1_u16(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (size_t i = 0 ; i < 8 ; i++) { simde_uint16x4_t a0 = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t a1 = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t a2 = simde_test_arm_neon_random_u16x4(); simde_uint16x4x3_t a = { { a0, a1, a2 } }; simde_test_arm_neon_write_u16x4(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); uint16_t buf[12]; simde_vst3_u16(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_u16(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst3_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t r0[2]; uint32_t r1[2]; uint32_t r2[2]; uint32_t a[6]; } test_vec[] = { { { UINT32_C(1250214441), UINT32_C(1747430402) }, { UINT32_C( 407036501), UINT32_C( 975185396) }, { UINT32_C(3833042844), UINT32_C( 714403033) }, { UINT32_C(1250214441), UINT32_C( 407036501), UINT32_C(3833042844), UINT32_C(1747430402), UINT32_C( 975185396), UINT32_C( 714403033), } }, { { UINT32_C( 284881893), UINT32_C(1679415099) }, { UINT32_C(2628689306), UINT32_C(2667894090) }, { UINT32_C(2897692599), UINT32_C( 149346156) }, { UINT32_C( 284881893), UINT32_C(2628689306), UINT32_C(2897692599), UINT32_C(1679415099), UINT32_C(2667894090), UINT32_C( 149346156), } }, { { UINT32_C(1206738286), UINT32_C( 779190601) }, { UINT32_C(2956880757), UINT32_C(3709098051) }, { UINT32_C(1064944373), UINT32_C(1323203991) }, { UINT32_C(1206738286), UINT32_C(2956880757), UINT32_C(1064944373), UINT32_C( 779190601), UINT32_C(3709098051), UINT32_C(1323203991), } }, { { UINT32_C( 838505924), UINT32_C(3661226348) }, { UINT32_C(2283939390), UINT32_C( 498504616) }, { UINT32_C(1120794111), UINT32_C(1109385549) }, { UINT32_C( 838505924), UINT32_C(2283939390), UINT32_C(1120794111), UINT32_C(3661226348), UINT32_C( 498504616), UINT32_C(1109385549), } }, { { UINT32_C( 981637283), UINT32_C(3666370581) }, { UINT32_C(1644921846), UINT32_C(2721924196) }, { UINT32_C( 321544043), UINT32_C(4046512626) }, { UINT32_C( 981637283), UINT32_C(1644921846), UINT32_C( 321544043), UINT32_C(3666370581), UINT32_C(2721924196), UINT32_C(4046512626), } }, { { UINT32_C( 590609878), UINT32_C(2170901214) }, { UINT32_C( 12314602), UINT32_C(1054491464) }, { UINT32_C( 715187654), UINT32_C(2496519465) }, { UINT32_C( 590609878), UINT32_C( 12314602), UINT32_C( 715187654), UINT32_C(2170901214), UINT32_C(1054491464), UINT32_C(2496519465), } }, { { UINT32_C( 799536956), UINT32_C(2921387992) }, { UINT32_C(3000063188), UINT32_C(2436052902) }, { UINT32_C(1720839710), UINT32_C(4171524914) }, { UINT32_C( 799536956), UINT32_C(3000063188), UINT32_C(1720839710), UINT32_C(2921387992), UINT32_C(2436052902), UINT32_C(4171524914), } }, { { UINT32_C(2032354640), UINT32_C(1594814498) }, { UINT32_C(3230578151), UINT32_C(1634643597) }, { UINT32_C(2836676610), UINT32_C(2503624567) }, { UINT32_C(2032354640), UINT32_C(3230578151), UINT32_C(2836676610), UINT32_C(1594814498), UINT32_C(1634643597), UINT32_C(2503624567), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2x3_t r_ = { { simde_vld1_u32(test_vec[i].r0), simde_vld1_u32(test_vec[i].r1), simde_vld1_u32(test_vec[i].r2), } }; uint32_t a_[6]; simde_vst3_u32(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld3_u32(a_); simde_test_arm_neon_assert_equal_u32x2(r_.val[0], simde_vld1_u32(test_vec[i].r0)); simde_test_arm_neon_assert_equal_u32x2(r_.val[1], simde_vld1_u32(test_vec[i].r1)); simde_test_arm_neon_assert_equal_u32x2(r_.val[2], simde_vld1_u32(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (size_t i = 0 ; i < 8 ; i++) { simde_uint32x2_t a0 = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t a1 = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t a2 = simde_test_arm_neon_random_u32x2(); simde_uint32x2x3_t a = { { a0, a1, a2 } }; simde_test_arm_neon_write_u32x2(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); uint32_t buf[6]; simde_vst3_u32(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_u32(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst3_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t r0[1]; uint64_t r1[1]; uint64_t r2[1]; uint64_t a[3]; } test_vec[] = { { { UINT64_C(12422824856183467782) }, { UINT64_C( 7004324963154263795) }, { UINT64_C( 9338655374062009309) }, { UINT64_C(12422824856183467782), UINT64_C( 7004324963154263795), UINT64_C( 9338655374062009309), } }, { { UINT64_C( 6390283869042032365) }, { UINT64_C( 1465642543073747784) }, { UINT64_C(14254966942331603968) }, { UINT64_C( 6390283869042032365), UINT64_C( 1465642543073747784), UINT64_C(14254966942331603968), } }, { { UINT64_C( 1378937280728634636) }, { UINT64_C( 7737497671101794998) }, { UINT64_C(18425754395142978945) }, { UINT64_C( 1378937280728634636), UINT64_C( 7737497671101794998), UINT64_C(18425754395142978945), } }, { { UINT64_C( 3361450187992158969) }, { UINT64_C(13048320093653501658) }, { UINT64_C( 2506568346036438075) }, { UINT64_C( 3361450187992158969), UINT64_C(13048320093653501658), UINT64_C( 2506568346036438075), } }, { { UINT64_C( 4979425705785089819) }, { UINT64_C(12329956177065970635) }, { UINT64_C( 1839369556765502675) }, { UINT64_C( 4979425705785089819), UINT64_C(12329956177065970635), UINT64_C( 1839369556765502675), } }, { { UINT64_C( 9081580541621781572) }, { UINT64_C( 1275277660055456800) }, { UINT64_C( 8771073248822548798) }, { UINT64_C( 9081580541621781572), UINT64_C( 1275277660055456800), UINT64_C( 8771073248822548798), } }, { { UINT64_C( 7709829577090688264) }, { UINT64_C(15561442731964266005) }, { UINT64_C( 9114713076917374501) }, { UINT64_C( 7709829577090688264), UINT64_C(15561442731964266005), UINT64_C( 9114713076917374501), } }, { { UINT64_C(11596565186140273872) }, { UINT64_C( 2250819095794956716) }, { UINT64_C( 831350528676433402) }, { UINT64_C(11596565186140273872), UINT64_C( 2250819095794956716), UINT64_C( 831350528676433402), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1x3_t r_ = { { simde_vld1_u64(test_vec[i].r0), simde_vld1_u64(test_vec[i].r1), simde_vld1_u64(test_vec[i].r2), } }; uint64_t a_[3]; simde_vst3_u64(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld3_u64(a_); simde_test_arm_neon_assert_equal_u64x1(r_.val[0], simde_vld1_u64(test_vec[i].r0)); simde_test_arm_neon_assert_equal_u64x1(r_.val[1], simde_vld1_u64(test_vec[i].r1)); simde_test_arm_neon_assert_equal_u64x1(r_.val[2], simde_vld1_u64(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (size_t i = 0 ; i < 8 ; i++) { simde_uint64x1_t a0 = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t a1 = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t a2 = simde_test_arm_neon_random_u64x1(); simde_uint64x1x3_t a = { { a0, a1, a2 } }; simde_test_arm_neon_write_u64x1(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); uint64_t buf[3]; simde_vst3_u64(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_u64(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst3q_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 r0[4]; simde_float32 r1[4]; simde_float32 r2[4]; simde_float32 a[12]; } test_vec[] = { { { SIMDE_FLOAT32_C( -985.78), SIMDE_FLOAT32_C( -181.56), SIMDE_FLOAT32_C( -910.82), SIMDE_FLOAT32_C( -732.91) }, { SIMDE_FLOAT32_C( -353.18), SIMDE_FLOAT32_C( -450.78), SIMDE_FLOAT32_C( -83.42), SIMDE_FLOAT32_C( -225.91) }, { SIMDE_FLOAT32_C( -974.60), SIMDE_FLOAT32_C( -205.59), SIMDE_FLOAT32_C( -350.99), SIMDE_FLOAT32_C( -219.80) }, { SIMDE_FLOAT32_C( -985.78), SIMDE_FLOAT32_C( -353.18), SIMDE_FLOAT32_C( -974.60), SIMDE_FLOAT32_C( -181.56), SIMDE_FLOAT32_C( -450.78), SIMDE_FLOAT32_C( -205.59), SIMDE_FLOAT32_C( -910.82), SIMDE_FLOAT32_C( -83.42), SIMDE_FLOAT32_C( -350.99), SIMDE_FLOAT32_C( -732.91), SIMDE_FLOAT32_C( -225.91), SIMDE_FLOAT32_C( -219.80), } }, { { SIMDE_FLOAT32_C( 583.13), SIMDE_FLOAT32_C( 717.32), SIMDE_FLOAT32_C( -138.31), SIMDE_FLOAT32_C( 428.23) }, { SIMDE_FLOAT32_C( -15.12), SIMDE_FLOAT32_C( -349.44), SIMDE_FLOAT32_C( 838.60), SIMDE_FLOAT32_C( 645.26) }, { SIMDE_FLOAT32_C( 917.57), SIMDE_FLOAT32_C( -862.79), SIMDE_FLOAT32_C( 403.85), SIMDE_FLOAT32_C( 196.39) }, { SIMDE_FLOAT32_C( 583.13), SIMDE_FLOAT32_C( -15.12), SIMDE_FLOAT32_C( 917.57), SIMDE_FLOAT32_C( 717.32), SIMDE_FLOAT32_C( -349.44), SIMDE_FLOAT32_C( -862.79), SIMDE_FLOAT32_C( -138.31), SIMDE_FLOAT32_C( 838.60), SIMDE_FLOAT32_C( 403.85), SIMDE_FLOAT32_C( 428.23), SIMDE_FLOAT32_C( 645.26), SIMDE_FLOAT32_C( 196.39), } }, { { SIMDE_FLOAT32_C( -768.47), SIMDE_FLOAT32_C( -949.16), SIMDE_FLOAT32_C( 42.85), SIMDE_FLOAT32_C( -252.64) }, { SIMDE_FLOAT32_C( -690.55), SIMDE_FLOAT32_C( -771.86), SIMDE_FLOAT32_C( -903.80), SIMDE_FLOAT32_C( -676.33) }, { SIMDE_FLOAT32_C( 46.58), SIMDE_FLOAT32_C( -814.62), SIMDE_FLOAT32_C( -409.24), SIMDE_FLOAT32_C( 693.40) }, { SIMDE_FLOAT32_C( -768.47), SIMDE_FLOAT32_C( -690.55), SIMDE_FLOAT32_C( 46.58), SIMDE_FLOAT32_C( -949.16), SIMDE_FLOAT32_C( -771.86), SIMDE_FLOAT32_C( -814.62), SIMDE_FLOAT32_C( 42.85), SIMDE_FLOAT32_C( -903.80), SIMDE_FLOAT32_C( -409.24), SIMDE_FLOAT32_C( -252.64), SIMDE_FLOAT32_C( -676.33), SIMDE_FLOAT32_C( 693.40), } }, { { SIMDE_FLOAT32_C( -265.40), SIMDE_FLOAT32_C( 507.34), SIMDE_FLOAT32_C( -532.51), SIMDE_FLOAT32_C( -240.00) }, { SIMDE_FLOAT32_C( -698.24), SIMDE_FLOAT32_C( 116.50), SIMDE_FLOAT32_C( 540.20), SIMDE_FLOAT32_C( 884.89) }, { SIMDE_FLOAT32_C( -166.18), SIMDE_FLOAT32_C( -598.11), SIMDE_FLOAT32_C( 313.12), SIMDE_FLOAT32_C( 818.70) }, { SIMDE_FLOAT32_C( -265.40), SIMDE_FLOAT32_C( -698.24), SIMDE_FLOAT32_C( -166.18), SIMDE_FLOAT32_C( 507.34), SIMDE_FLOAT32_C( 116.50), SIMDE_FLOAT32_C( -598.11), SIMDE_FLOAT32_C( -532.51), SIMDE_FLOAT32_C( 540.20), SIMDE_FLOAT32_C( 313.12), SIMDE_FLOAT32_C( -240.00), SIMDE_FLOAT32_C( 884.89), SIMDE_FLOAT32_C( 818.70), } }, { { SIMDE_FLOAT32_C( 52.45), SIMDE_FLOAT32_C( 151.71), SIMDE_FLOAT32_C( 463.96), SIMDE_FLOAT32_C( -29.98) }, { SIMDE_FLOAT32_C( 288.92), SIMDE_FLOAT32_C( -132.19), SIMDE_FLOAT32_C( -833.58), SIMDE_FLOAT32_C( 520.45) }, { SIMDE_FLOAT32_C( -81.36), SIMDE_FLOAT32_C( 209.27), SIMDE_FLOAT32_C( -732.19), SIMDE_FLOAT32_C( 228.09) }, { SIMDE_FLOAT32_C( 52.45), SIMDE_FLOAT32_C( 288.92), SIMDE_FLOAT32_C( -81.36), SIMDE_FLOAT32_C( 151.71), SIMDE_FLOAT32_C( -132.19), SIMDE_FLOAT32_C( 209.27), SIMDE_FLOAT32_C( 463.96), SIMDE_FLOAT32_C( -833.58), SIMDE_FLOAT32_C( -732.19), SIMDE_FLOAT32_C( -29.98), SIMDE_FLOAT32_C( 520.45), SIMDE_FLOAT32_C( 228.09), } }, { { SIMDE_FLOAT32_C( 437.41), SIMDE_FLOAT32_C( -635.99), SIMDE_FLOAT32_C( 551.76), SIMDE_FLOAT32_C( -516.01) }, { SIMDE_FLOAT32_C( -450.62), SIMDE_FLOAT32_C( -857.48), SIMDE_FLOAT32_C( -822.60), SIMDE_FLOAT32_C( 283.98) }, { SIMDE_FLOAT32_C( 649.87), SIMDE_FLOAT32_C( -355.11), SIMDE_FLOAT32_C( -956.02), SIMDE_FLOAT32_C( 951.62) }, { SIMDE_FLOAT32_C( 437.41), SIMDE_FLOAT32_C( -450.62), SIMDE_FLOAT32_C( 649.87), SIMDE_FLOAT32_C( -635.99), SIMDE_FLOAT32_C( -857.48), SIMDE_FLOAT32_C( -355.11), SIMDE_FLOAT32_C( 551.76), SIMDE_FLOAT32_C( -822.60), SIMDE_FLOAT32_C( -956.02), SIMDE_FLOAT32_C( -516.01), SIMDE_FLOAT32_C( 283.98), SIMDE_FLOAT32_C( 951.62), } }, { { SIMDE_FLOAT32_C( 761.38), SIMDE_FLOAT32_C( 584.18), SIMDE_FLOAT32_C( 836.51), SIMDE_FLOAT32_C( -404.80) }, { SIMDE_FLOAT32_C( 986.07), SIMDE_FLOAT32_C( 149.63), SIMDE_FLOAT32_C( -586.10), SIMDE_FLOAT32_C( 38.52) }, { SIMDE_FLOAT32_C( -698.66), SIMDE_FLOAT32_C( 877.86), SIMDE_FLOAT32_C( -991.46), SIMDE_FLOAT32_C( 590.26) }, { SIMDE_FLOAT32_C( 761.38), SIMDE_FLOAT32_C( 986.07), SIMDE_FLOAT32_C( -698.66), SIMDE_FLOAT32_C( 584.18), SIMDE_FLOAT32_C( 149.63), SIMDE_FLOAT32_C( 877.86), SIMDE_FLOAT32_C( 836.51), SIMDE_FLOAT32_C( -586.10), SIMDE_FLOAT32_C( -991.46), SIMDE_FLOAT32_C( -404.80), SIMDE_FLOAT32_C( 38.52), SIMDE_FLOAT32_C( 590.26), } }, { { SIMDE_FLOAT32_C( -254.33), SIMDE_FLOAT32_C( -825.04), SIMDE_FLOAT32_C( 110.71), SIMDE_FLOAT32_C( 664.31) }, { SIMDE_FLOAT32_C( 384.23), SIMDE_FLOAT32_C( 378.52), SIMDE_FLOAT32_C( -107.59), SIMDE_FLOAT32_C( -178.36) }, { SIMDE_FLOAT32_C( 742.53), SIMDE_FLOAT32_C( -555.83), SIMDE_FLOAT32_C( 305.63), SIMDE_FLOAT32_C( -708.09) }, { SIMDE_FLOAT32_C( -254.33), SIMDE_FLOAT32_C( 384.23), SIMDE_FLOAT32_C( 742.53), SIMDE_FLOAT32_C( -825.04), SIMDE_FLOAT32_C( 378.52), SIMDE_FLOAT32_C( -555.83), SIMDE_FLOAT32_C( 110.71), SIMDE_FLOAT32_C( -107.59), SIMDE_FLOAT32_C( 305.63), SIMDE_FLOAT32_C( 664.31), SIMDE_FLOAT32_C( -178.36), SIMDE_FLOAT32_C( -708.09), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4x3_t r_ = { { simde_vld1q_f32(test_vec[i].r0), simde_vld1q_f32(test_vec[i].r1), simde_vld1q_f32(test_vec[i].r2), } }; simde_float32 a_[12]; simde_vst3q_f32(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld3q_f32(a_); simde_test_arm_neon_assert_equal_f32x4(r_.val[0], simde_vld1q_f32(test_vec[i].r0), 1); simde_test_arm_neon_assert_equal_f32x4(r_.val[1], simde_vld1q_f32(test_vec[i].r1), 1); simde_test_arm_neon_assert_equal_f32x4(r_.val[2], simde_vld1q_f32(test_vec[i].r2), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t a0 = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t a1 = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t a2 = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4x3_t a = { { a0, a1, a2 } }; simde_test_arm_neon_write_f32x4(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); simde_float32_t buf[12]; simde_vst3q_f32(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_f32(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst3q_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 r0[2]; simde_float64 r1[2]; simde_float64 r2[2]; simde_float64 a[6]; } test_vec[] = { { { SIMDE_FLOAT64_C( 758.07), SIMDE_FLOAT64_C( 84.29) }, { SIMDE_FLOAT64_C( -600.63), SIMDE_FLOAT64_C( -472.51) }, { SIMDE_FLOAT64_C( 725.40), SIMDE_FLOAT64_C( 903.42) }, { SIMDE_FLOAT64_C( 758.07), SIMDE_FLOAT64_C( -600.63), SIMDE_FLOAT64_C( 725.40), SIMDE_FLOAT64_C( 84.29), SIMDE_FLOAT64_C( -472.51), SIMDE_FLOAT64_C( 903.42), } }, { { SIMDE_FLOAT64_C( -564.41), SIMDE_FLOAT64_C( -453.39) }, { SIMDE_FLOAT64_C( -311.50), SIMDE_FLOAT64_C( -928.88) }, { SIMDE_FLOAT64_C( 670.94), SIMDE_FLOAT64_C( -991.23) }, { SIMDE_FLOAT64_C( -564.41), SIMDE_FLOAT64_C( -311.50), SIMDE_FLOAT64_C( 670.94), SIMDE_FLOAT64_C( -453.39), SIMDE_FLOAT64_C( -928.88), SIMDE_FLOAT64_C( -991.23), } }, { { SIMDE_FLOAT64_C( 354.81), SIMDE_FLOAT64_C( 507.91) }, { SIMDE_FLOAT64_C( 71.00), SIMDE_FLOAT64_C( 659.02) }, { SIMDE_FLOAT64_C( 105.92), SIMDE_FLOAT64_C( 373.60) }, { SIMDE_FLOAT64_C( 354.81), SIMDE_FLOAT64_C( 71.00), SIMDE_FLOAT64_C( 105.92), SIMDE_FLOAT64_C( 507.91), SIMDE_FLOAT64_C( 659.02), SIMDE_FLOAT64_C( 373.60), } }, { { SIMDE_FLOAT64_C( -261.08), SIMDE_FLOAT64_C( -713.36) }, { SIMDE_FLOAT64_C( -70.21), SIMDE_FLOAT64_C( 417.30) }, { SIMDE_FLOAT64_C( -815.10), SIMDE_FLOAT64_C( 451.49) }, { SIMDE_FLOAT64_C( -261.08), SIMDE_FLOAT64_C( -70.21), SIMDE_FLOAT64_C( -815.10), SIMDE_FLOAT64_C( -713.36), SIMDE_FLOAT64_C( 417.30), SIMDE_FLOAT64_C( 451.49), } }, { { SIMDE_FLOAT64_C( 701.42), SIMDE_FLOAT64_C( -485.46) }, { SIMDE_FLOAT64_C( -486.64), SIMDE_FLOAT64_C( 421.84) }, { SIMDE_FLOAT64_C( -955.83), SIMDE_FLOAT64_C( -150.69) }, { SIMDE_FLOAT64_C( 701.42), SIMDE_FLOAT64_C( -486.64), SIMDE_FLOAT64_C( -955.83), SIMDE_FLOAT64_C( -485.46), SIMDE_FLOAT64_C( 421.84), SIMDE_FLOAT64_C( -150.69), } }, { { SIMDE_FLOAT64_C( 498.32), SIMDE_FLOAT64_C( 802.24) }, { SIMDE_FLOAT64_C( 933.60), SIMDE_FLOAT64_C( 897.69) }, { SIMDE_FLOAT64_C( -670.27), SIMDE_FLOAT64_C( 659.00) }, { SIMDE_FLOAT64_C( 498.32), SIMDE_FLOAT64_C( 933.60), SIMDE_FLOAT64_C( -670.27), SIMDE_FLOAT64_C( 802.24), SIMDE_FLOAT64_C( 897.69), SIMDE_FLOAT64_C( 659.00), } }, { { SIMDE_FLOAT64_C( 801.12), SIMDE_FLOAT64_C( -234.68) }, { SIMDE_FLOAT64_C( -794.39), SIMDE_FLOAT64_C( -510.38) }, { SIMDE_FLOAT64_C( -163.56), SIMDE_FLOAT64_C( 876.55) }, { SIMDE_FLOAT64_C( 801.12), SIMDE_FLOAT64_C( -794.39), SIMDE_FLOAT64_C( -163.56), SIMDE_FLOAT64_C( -234.68), SIMDE_FLOAT64_C( -510.38), SIMDE_FLOAT64_C( 876.55), } }, { { SIMDE_FLOAT64_C( -501.62), SIMDE_FLOAT64_C( -808.75) }, { SIMDE_FLOAT64_C( 384.46), SIMDE_FLOAT64_C( 569.38) }, { SIMDE_FLOAT64_C( 850.27), SIMDE_FLOAT64_C( -509.62) }, { SIMDE_FLOAT64_C( -501.62), SIMDE_FLOAT64_C( 384.46), SIMDE_FLOAT64_C( 850.27), SIMDE_FLOAT64_C( -808.75), SIMDE_FLOAT64_C( 569.38), SIMDE_FLOAT64_C( -509.62), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2x3_t r_ = { { simde_vld1q_f64(test_vec[i].r0), simde_vld1q_f64(test_vec[i].r1), simde_vld1q_f64(test_vec[i].r2), } }; simde_float64_t a_[6]; simde_vst3q_f64(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld3q_f64(a_); simde_test_arm_neon_assert_equal_f64x2(r_.val[0], simde_vld1q_f64(test_vec[i].r0), 1); simde_test_arm_neon_assert_equal_f64x2(r_.val[1], simde_vld1q_f64(test_vec[i].r1), 1); simde_test_arm_neon_assert_equal_f64x2(r_.val[2], simde_vld1q_f64(test_vec[i].r2), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x2_t a0 = simde_test_arm_neon_random_f64x2(-1000.0f, 1000.0f); simde_float64x2_t a1 = simde_test_arm_neon_random_f64x2(-1000.0f, 1000.0f); simde_float64x2_t a2 = simde_test_arm_neon_random_f64x2(-1000.0f, 1000.0f); simde_float64x2x3_t a = { { a0, a1, a2 } }; simde_test_arm_neon_write_f64x2(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x2(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x2(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); simde_float64_t buf[6]; simde_vst3q_f64(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_f64(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst3q_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t r0[16]; int8_t r1[16]; int8_t r2[16]; int8_t a[48]; } test_vec[] = { { { INT8_C( 38), -INT8_C( 65), INT8_C( 42), INT8_C( 56), INT8_C( 36), -INT8_C( 28), -INT8_C( 63), -INT8_C( 86), INT8_C( 15), INT8_C( 115), INT8_C( 26), INT8_C( 40), -INT8_C( 25), INT8_C( 24), -INT8_C( 121), INT8_C( 86) }, { INT8_C( 63), -INT8_C( 66), -INT8_C( 76), -INT8_C( 106), INT8_C( 57), -INT8_C( 69), -INT8_C( 57), -INT8_C( 91), -INT8_C( 60), -INT8_C( 43), -INT8_C( 102), -INT8_C( 91), -INT8_C( 16), -INT8_C( 95), INT8_MIN, INT8_C( 22) }, { INT8_C( 97), -INT8_C( 86), INT8_C( 79), -INT8_C( 123), -INT8_C( 114), INT8_C( 16), INT8_C( 47), -INT8_C( 99), -INT8_C( 124), INT8_C( 73), -INT8_C( 58), INT8_C( 107), INT8_C( 97), INT8_C( 77), -INT8_C( 63), -INT8_C( 96) }, { INT8_C( 38), INT8_C( 63), INT8_C( 97), -INT8_C( 65), -INT8_C( 66), -INT8_C( 86), INT8_C( 42), -INT8_C( 76), INT8_C( 79), INT8_C( 56), -INT8_C( 106), -INT8_C( 123), INT8_C( 36), INT8_C( 57), -INT8_C( 114), -INT8_C( 28), -INT8_C( 69), INT8_C( 16), -INT8_C( 63), -INT8_C( 57), INT8_C( 47), -INT8_C( 86), -INT8_C( 91), -INT8_C( 99), INT8_C( 15), -INT8_C( 60), -INT8_C( 124), INT8_C( 115), -INT8_C( 43), INT8_C( 73), INT8_C( 26), -INT8_C( 102), -INT8_C( 58), INT8_C( 40), -INT8_C( 91), INT8_C( 107), -INT8_C( 25), -INT8_C( 16), INT8_C( 97), INT8_C( 24), -INT8_C( 95), INT8_C( 77), -INT8_C( 121), INT8_MIN, -INT8_C( 63), INT8_C( 86), INT8_C( 22), -INT8_C( 96), } }, { { INT8_C( 11), INT8_C( 117), INT8_C( 54), INT8_C( 69), INT8_C( 48), -INT8_C( 2), -INT8_C( 22), -INT8_C( 11), -INT8_C( 45), -INT8_C( 124), -INT8_C( 102), -INT8_C( 60), INT8_C( 38), INT8_C( 26), -INT8_C( 38), -INT8_C( 121) }, { -INT8_C( 60), INT8_C( 41), INT8_C( 12), INT8_C( 83), INT8_C( 58), INT8_C( 59), -INT8_C( 16), -INT8_C( 66), -INT8_C( 123), -INT8_C( 74), INT8_C( 41), -INT8_C( 26), INT8_C( 4), -INT8_C( 21), -INT8_C( 121), INT8_C( 15) }, { INT8_C( 96), -INT8_C( 67), INT8_C( 84), -INT8_C( 111), -INT8_C( 69), INT8_C( 62), -INT8_C( 122), -INT8_C( 113), -INT8_C( 61), INT8_C( 32), INT8_C( 83), -INT8_C( 23), INT8_C( 58), INT8_C( 45), INT8_C( 112), -INT8_C( 2) }, { INT8_C( 11), -INT8_C( 60), INT8_C( 96), INT8_C( 117), INT8_C( 41), -INT8_C( 67), INT8_C( 54), INT8_C( 12), INT8_C( 84), INT8_C( 69), INT8_C( 83), -INT8_C( 111), INT8_C( 48), INT8_C( 58), -INT8_C( 69), -INT8_C( 2), INT8_C( 59), INT8_C( 62), -INT8_C( 22), -INT8_C( 16), -INT8_C( 122), -INT8_C( 11), -INT8_C( 66), -INT8_C( 113), -INT8_C( 45), -INT8_C( 123), -INT8_C( 61), -INT8_C( 124), -INT8_C( 74), INT8_C( 32), -INT8_C( 102), INT8_C( 41), INT8_C( 83), -INT8_C( 60), -INT8_C( 26), -INT8_C( 23), INT8_C( 38), INT8_C( 4), INT8_C( 58), INT8_C( 26), -INT8_C( 21), INT8_C( 45), -INT8_C( 38), -INT8_C( 121), INT8_C( 112), -INT8_C( 121), INT8_C( 15), -INT8_C( 2), } }, { { INT8_C( 87), INT8_C( 124), INT8_C( 81), -INT8_C( 111), -INT8_C( 73), INT8_C( 66), INT8_C( 79), INT8_C( 60), -INT8_C( 8), INT8_C( 120), INT8_C( 35), -INT8_C( 4), INT8_C( 99), -INT8_C( 86), INT8_C( 12), -INT8_C( 60) }, { INT8_C( 103), INT8_C( 96), INT8_C( 85), INT8_C( 35), -INT8_C( 97), -INT8_C( 37), -INT8_C( 78), INT8_C( 98), -INT8_C( 5), INT8_C( 5), INT8_C( 75), INT8_C( 53), INT8_C( 50), -INT8_C( 69), INT8_C( 51), -INT8_C( 119) }, { INT8_C( 55), -INT8_C( 123), INT8_C( 26), -INT8_C( 18), -INT8_C( 57), INT8_C( 105), INT8_C( 43), -INT8_C( 65), -INT8_C( 30), INT8_C( 78), -INT8_C( 68), INT8_C( 69), -INT8_C( 8), -INT8_C( 56), INT8_C( 9), INT8_C( 95) }, { INT8_C( 87), INT8_C( 103), INT8_C( 55), INT8_C( 124), INT8_C( 96), -INT8_C( 123), INT8_C( 81), INT8_C( 85), INT8_C( 26), -INT8_C( 111), INT8_C( 35), -INT8_C( 18), -INT8_C( 73), -INT8_C( 97), -INT8_C( 57), INT8_C( 66), -INT8_C( 37), INT8_C( 105), INT8_C( 79), -INT8_C( 78), INT8_C( 43), INT8_C( 60), INT8_C( 98), -INT8_C( 65), -INT8_C( 8), -INT8_C( 5), -INT8_C( 30), INT8_C( 120), INT8_C( 5), INT8_C( 78), INT8_C( 35), INT8_C( 75), -INT8_C( 68), -INT8_C( 4), INT8_C( 53), INT8_C( 69), INT8_C( 99), INT8_C( 50), -INT8_C( 8), -INT8_C( 86), -INT8_C( 69), -INT8_C( 56), INT8_C( 12), INT8_C( 51), INT8_C( 9), -INT8_C( 60), -INT8_C( 119), INT8_C( 95), } }, { { INT8_C( 40), INT8_C( 94), -INT8_C( 126), -INT8_C( 57), INT8_C( 57), INT8_C( 52), INT8_C( 41), INT8_C( 52), INT8_C( 57), INT8_C( 116), INT8_C( 105), INT8_C( 108), INT8_C( 47), -INT8_C( 99), -INT8_C( 11), INT8_C( 102) }, { INT8_C( 34), INT8_C( 16), INT8_C( 85), -INT8_C( 23), INT8_C( 121), INT8_MIN, -INT8_C( 88), INT8_C( 91), -INT8_C( 50), INT8_C( 100), -INT8_C( 95), -INT8_C( 58), INT8_C( 44), -INT8_C( 86), INT8_C( 37), INT8_C( 85) }, { INT8_C( 9), -INT8_C( 88), INT8_C( 28), INT8_C( 66), -INT8_C( 36), INT8_C( 70), INT8_C( 119), INT8_C( 22), -INT8_C( 70), -INT8_C( 32), -INT8_C( 126), -INT8_C( 22), INT8_C( 125), INT8_C( 119), INT8_C( 80), -INT8_C( 97) }, { INT8_C( 40), INT8_C( 34), INT8_C( 9), INT8_C( 94), INT8_C( 16), -INT8_C( 88), -INT8_C( 126), INT8_C( 85), INT8_C( 28), -INT8_C( 57), -INT8_C( 23), INT8_C( 66), INT8_C( 57), INT8_C( 121), -INT8_C( 36), INT8_C( 52), INT8_MIN, INT8_C( 70), INT8_C( 41), -INT8_C( 88), INT8_C( 119), INT8_C( 52), INT8_C( 91), INT8_C( 22), INT8_C( 57), -INT8_C( 50), -INT8_C( 70), INT8_C( 116), INT8_C( 100), -INT8_C( 32), INT8_C( 105), -INT8_C( 95), -INT8_C( 126), INT8_C( 108), -INT8_C( 58), -INT8_C( 22), INT8_C( 47), INT8_C( 44), INT8_C( 125), -INT8_C( 99), -INT8_C( 86), INT8_C( 119), -INT8_C( 11), INT8_C( 37), INT8_C( 80), INT8_C( 102), INT8_C( 85), -INT8_C( 97), } }, { { -INT8_C( 121), -INT8_C( 91), -INT8_C( 120), INT8_C( 1), INT8_C( 37), INT8_C( 49), INT8_C( 92), -INT8_C( 13), -INT8_C( 107), -INT8_C( 3), -INT8_C( 71), -INT8_C( 62), -INT8_C( 88), -INT8_C( 33), INT8_C( 23), -INT8_C( 79) }, { -INT8_C( 121), INT8_C( 51), -INT8_C( 13), INT8_C( 99), INT8_C( 121), INT8_C( 106), INT8_C( 121), INT8_C( 52), INT8_C( 75), -INT8_C( 5), INT8_C( 30), -INT8_C( 56), INT8_C( 115), INT8_C( 110), INT8_C( 104), -INT8_C( 6) }, { INT8_C( 20), -INT8_C( 16), -INT8_C( 5), INT8_C( 57), INT8_C( 33), INT8_C( 88), INT8_C( 45), -INT8_C( 73), INT8_C( 85), -INT8_C( 26), INT8_C( 121), -INT8_C( 3), -INT8_C( 59), -INT8_C( 112), -INT8_C( 82), INT8_C( 76) }, { -INT8_C( 121), -INT8_C( 121), INT8_C( 20), -INT8_C( 91), INT8_C( 51), -INT8_C( 16), -INT8_C( 120), -INT8_C( 13), -INT8_C( 5), INT8_C( 1), INT8_C( 99), INT8_C( 57), INT8_C( 37), INT8_C( 121), INT8_C( 33), INT8_C( 49), INT8_C( 106), INT8_C( 88), INT8_C( 92), INT8_C( 121), INT8_C( 45), -INT8_C( 13), INT8_C( 52), -INT8_C( 73), -INT8_C( 107), INT8_C( 75), INT8_C( 85), -INT8_C( 3), -INT8_C( 5), -INT8_C( 26), -INT8_C( 71), INT8_C( 30), INT8_C( 121), -INT8_C( 62), -INT8_C( 56), -INT8_C( 3), -INT8_C( 88), INT8_C( 115), -INT8_C( 59), -INT8_C( 33), INT8_C( 110), -INT8_C( 112), INT8_C( 23), INT8_C( 104), -INT8_C( 82), -INT8_C( 79), -INT8_C( 6), INT8_C( 76), } }, { { -INT8_C( 61), -INT8_C( 94), -INT8_C( 80), INT8_C( 61), INT8_C( 12), INT8_C( 41), INT8_C( 113), INT8_C( 87), INT8_C( 37), -INT8_C( 113), INT8_C( 32), -INT8_C( 104), -INT8_C( 3), -INT8_C( 120), -INT8_C( 110), INT8_C( 17) }, { INT8_C( 120), -INT8_C( 114), INT8_C( 75), -INT8_C( 102), -INT8_C( 26), INT8_C( 120), INT8_C( 81), INT8_C( 59), INT8_C( 94), -INT8_C( 54), INT8_C( 57), INT8_C( 36), INT8_C( 90), -INT8_C( 25), INT8_C( 112), INT8_C( 29) }, { -INT8_C( 119), INT8_C( 32), INT8_C( 90), -INT8_C( 106), INT8_C( 74), -INT8_C( 53), -INT8_C( 19), INT8_C( 111), INT8_C( 90), INT8_C( 13), INT8_C( 7), INT8_C( 88), -INT8_C( 107), -INT8_C( 103), INT8_C( 105), INT8_C( 14) }, { -INT8_C( 61), INT8_C( 120), -INT8_C( 119), -INT8_C( 94), -INT8_C( 114), INT8_C( 32), -INT8_C( 80), INT8_C( 75), INT8_C( 90), INT8_C( 61), -INT8_C( 102), -INT8_C( 106), INT8_C( 12), -INT8_C( 26), INT8_C( 74), INT8_C( 41), INT8_C( 120), -INT8_C( 53), INT8_C( 113), INT8_C( 81), -INT8_C( 19), INT8_C( 87), INT8_C( 59), INT8_C( 111), INT8_C( 37), INT8_C( 94), INT8_C( 90), -INT8_C( 113), -INT8_C( 54), INT8_C( 13), INT8_C( 32), INT8_C( 57), INT8_C( 7), -INT8_C( 104), INT8_C( 36), INT8_C( 88), -INT8_C( 3), INT8_C( 90), -INT8_C( 107), -INT8_C( 120), -INT8_C( 25), -INT8_C( 103), -INT8_C( 110), INT8_C( 112), INT8_C( 105), INT8_C( 17), INT8_C( 29), INT8_C( 14), } }, { { INT8_C( 39), -INT8_C( 76), -INT8_C( 88), INT8_C( 13), INT8_C( 44), -INT8_C( 7), INT8_C( 73), -INT8_C( 117), -INT8_C( 61), -INT8_C( 126), -INT8_C( 81), INT8_C( 29), INT8_C( 105), INT8_C( 31), INT8_C( 58), -INT8_C( 13) }, { INT8_C( 64), -INT8_C( 107), -INT8_C( 119), -INT8_C( 118), INT8_C( 96), INT8_C( 118), -INT8_C( 7), -INT8_C( 69), -INT8_C( 124), INT8_C( 0), INT8_C( 19), INT8_C( 25), -INT8_C( 103), INT8_C( 124), INT8_C( 39), -INT8_C( 63) }, { INT8_C( 49), -INT8_C( 49), -INT8_C( 50), INT8_C( 93), -INT8_C( 56), INT8_C( 23), -INT8_C( 24), -INT8_C( 117), -INT8_C( 103), -INT8_C( 105), -INT8_C( 88), INT8_C( 3), -INT8_C( 73), -INT8_C( 29), -INT8_C( 10), -INT8_C( 9) }, { INT8_C( 39), INT8_C( 64), INT8_C( 49), -INT8_C( 76), -INT8_C( 107), -INT8_C( 49), -INT8_C( 88), -INT8_C( 119), -INT8_C( 50), INT8_C( 13), -INT8_C( 118), INT8_C( 93), INT8_C( 44), INT8_C( 96), -INT8_C( 56), -INT8_C( 7), INT8_C( 118), INT8_C( 23), INT8_C( 73), -INT8_C( 7), -INT8_C( 24), -INT8_C( 117), -INT8_C( 69), -INT8_C( 117), -INT8_C( 61), -INT8_C( 124), -INT8_C( 103), -INT8_C( 126), INT8_C( 0), -INT8_C( 105), -INT8_C( 81), INT8_C( 19), -INT8_C( 88), INT8_C( 29), INT8_C( 25), INT8_C( 3), INT8_C( 105), -INT8_C( 103), -INT8_C( 73), INT8_C( 31), INT8_C( 124), -INT8_C( 29), INT8_C( 58), INT8_C( 39), -INT8_C( 10), -INT8_C( 13), -INT8_C( 63), -INT8_C( 9), } }, { { INT8_C( 120), INT8_MAX, -INT8_C( 127), -INT8_C( 40), -INT8_C( 11), INT8_C( 122), -INT8_C( 109), INT8_C( 121), INT8_C( 122), -INT8_C( 90), -INT8_C( 109), INT8_C( 19), INT8_C( 35), -INT8_C( 70), -INT8_C( 44), INT8_C( 84) }, { -INT8_C( 118), -INT8_C( 93), -INT8_C( 79), INT8_C( 82), -INT8_C( 70), -INT8_C( 102), -INT8_C( 34), INT8_C( 84), INT8_C( 49), -INT8_C( 122), INT8_C( 87), -INT8_C( 24), INT8_C( 105), INT8_C( 77), -INT8_C( 33), -INT8_C( 31) }, { -INT8_C( 52), INT8_C( 96), -INT8_C( 70), -INT8_C( 63), -INT8_C( 38), INT8_C( 77), INT8_C( 59), INT8_C( 84), -INT8_C( 12), -INT8_C( 50), INT8_C( 104), INT8_C( 23), -INT8_C( 120), INT8_C( 60), INT8_C( 107), INT8_C( 18) }, { INT8_C( 120), -INT8_C( 118), -INT8_C( 52), INT8_MAX, -INT8_C( 93), INT8_C( 96), -INT8_C( 127), -INT8_C( 79), -INT8_C( 70), -INT8_C( 40), INT8_C( 82), -INT8_C( 63), -INT8_C( 11), -INT8_C( 70), -INT8_C( 38), INT8_C( 122), -INT8_C( 102), INT8_C( 77), -INT8_C( 109), -INT8_C( 34), INT8_C( 59), INT8_C( 121), INT8_C( 84), INT8_C( 84), INT8_C( 122), INT8_C( 49), -INT8_C( 12), -INT8_C( 90), -INT8_C( 122), -INT8_C( 50), -INT8_C( 109), INT8_C( 87), INT8_C( 104), INT8_C( 19), -INT8_C( 24), INT8_C( 23), INT8_C( 35), INT8_C( 105), -INT8_C( 120), -INT8_C( 70), INT8_C( 77), INT8_C( 60), -INT8_C( 44), -INT8_C( 33), INT8_C( 107), INT8_C( 84), -INT8_C( 31), INT8_C( 18), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16x3_t r_ = { { simde_vld1q_s8(test_vec[i].r0), simde_vld1q_s8(test_vec[i].r1), simde_vld1q_s8(test_vec[i].r2), } }; int8_t a_[48]; simde_vst3q_s8(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld3q_s8(a_); simde_test_arm_neon_assert_equal_i8x16(r_.val[0], simde_vld1q_s8(test_vec[i].r0)); simde_test_arm_neon_assert_equal_i8x16(r_.val[1], simde_vld1q_s8(test_vec[i].r1)); simde_test_arm_neon_assert_equal_i8x16(r_.val[2], simde_vld1q_s8(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a0 = simde_test_arm_neon_random_i8x16(); simde_int8x16_t a1 = simde_test_arm_neon_random_i8x16(); simde_int8x16_t a2 = simde_test_arm_neon_random_i8x16(); simde_int8x16x3_t a = { { a0, a1, a2 } }; simde_test_arm_neon_write_i8x16(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); int8_t buf[48]; simde_vst3q_s8(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_i8(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst3q_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t r0[8]; int16_t r1[8]; int16_t r2[8]; int16_t a[24]; } test_vec[] = { { { -INT16_C( 7093), INT16_C( 1303), -INT16_C( 17439), -INT16_C( 22859), INT16_C( 31645), INT16_C( 1370), INT16_C( 32280), -INT16_C( 15440) }, { INT16_C( 13391), INT16_C( 3162), INT16_C( 2283), INT16_C( 24508), -INT16_C( 9937), INT16_C( 27470), INT16_C( 29591), -INT16_C( 7429) }, { INT16_C( 4695), INT16_C( 14823), -INT16_C( 25139), INT16_C( 27359), INT16_C( 14616), INT16_C( 12399), INT16_C( 8375), INT16_C( 1779) }, { -INT16_C( 7093), INT16_C( 13391), INT16_C( 4695), INT16_C( 1303), INT16_C( 3162), INT16_C( 14823), -INT16_C( 17439), INT16_C( 2283), -INT16_C( 25139), -INT16_C( 22859), INT16_C( 24508), INT16_C( 27359), INT16_C( 31645), -INT16_C( 9937), INT16_C( 14616), INT16_C( 1370), INT16_C( 27470), INT16_C( 12399), INT16_C( 32280), INT16_C( 29591), INT16_C( 8375), -INT16_C( 15440), -INT16_C( 7429), INT16_C( 1779), } }, { { INT16_C( 19796), INT16_C( 16147), -INT16_C( 12459), -INT16_C( 31586), -INT16_C( 4952), INT16_C( 16368), -INT16_C( 5280), -INT16_C( 18655) }, { INT16_C( 2557), -INT16_C( 13328), -INT16_C( 12378), -INT16_C( 16843), -INT16_C( 23288), -INT16_C( 16402), -INT16_C( 7483), INT16_C( 6598) }, { -INT16_C( 9937), -INT16_C( 31400), -INT16_C( 2392), INT16_C( 20489), -INT16_C( 1566), INT16_C( 17040), -INT16_C( 19996), -INT16_C( 7430) }, { INT16_C( 19796), INT16_C( 2557), -INT16_C( 9937), INT16_C( 16147), -INT16_C( 13328), -INT16_C( 31400), -INT16_C( 12459), -INT16_C( 12378), -INT16_C( 2392), -INT16_C( 31586), -INT16_C( 16843), INT16_C( 20489), -INT16_C( 4952), -INT16_C( 23288), -INT16_C( 1566), INT16_C( 16368), -INT16_C( 16402), INT16_C( 17040), -INT16_C( 5280), -INT16_C( 7483), -INT16_C( 19996), -INT16_C( 18655), INT16_C( 6598), -INT16_C( 7430), } }, { { -INT16_C( 5446), INT16_C( 24749), -INT16_C( 7494), -INT16_C( 15842), INT16_C( 3463), INT16_C( 19586), INT16_C( 18671), INT16_C( 7781) }, { -INT16_C( 17119), -INT16_C( 13917), -INT16_C( 21069), -INT16_C( 27111), -INT16_C( 22106), -INT16_C( 29736), -INT16_C( 11685), INT16_C( 5485) }, { INT16_C( 6845), INT16_C( 30582), -INT16_C( 27396), -INT16_C( 31687), -INT16_C( 17503), -INT16_C( 28464), INT16_C( 13827), INT16_C( 9391) }, { -INT16_C( 5446), -INT16_C( 17119), INT16_C( 6845), INT16_C( 24749), -INT16_C( 13917), INT16_C( 30582), -INT16_C( 7494), -INT16_C( 21069), -INT16_C( 27396), -INT16_C( 15842), -INT16_C( 27111), -INT16_C( 31687), INT16_C( 3463), -INT16_C( 22106), -INT16_C( 17503), INT16_C( 19586), -INT16_C( 29736), -INT16_C( 28464), INT16_C( 18671), -INT16_C( 11685), INT16_C( 13827), INT16_C( 7781), INT16_C( 5485), INT16_C( 9391), } }, { { INT16_C( 21235), -INT16_C( 22547), INT16_C( 2047), -INT16_C( 22979), INT16_C( 5552), INT16_C( 2865), -INT16_C( 24856), -INT16_C( 23263) }, { -INT16_C( 26696), -INT16_C( 19428), INT16_C( 21803), -INT16_C( 13000), INT16_C( 2321), INT16_C( 5213), INT16_C( 3135), INT16_C( 12857) }, { INT16_C( 9823), INT16_C( 24281), INT16_C( 5677), -INT16_C( 8700), INT16_C( 13612), INT16_C( 5353), INT16_C( 2771), -INT16_C( 29767) }, { INT16_C( 21235), -INT16_C( 26696), INT16_C( 9823), -INT16_C( 22547), -INT16_C( 19428), INT16_C( 24281), INT16_C( 2047), INT16_C( 21803), INT16_C( 5677), -INT16_C( 22979), -INT16_C( 13000), -INT16_C( 8700), INT16_C( 5552), INT16_C( 2321), INT16_C( 13612), INT16_C( 2865), INT16_C( 5213), INT16_C( 5353), -INT16_C( 24856), INT16_C( 3135), INT16_C( 2771), -INT16_C( 23263), INT16_C( 12857), -INT16_C( 29767), } }, { { -INT16_C( 10847), -INT16_C( 12992), INT16_C( 30762), INT16_C( 15258), -INT16_C( 2175), -INT16_C( 16304), -INT16_C( 30460), INT16_C( 25587) }, { -INT16_C( 13137), -INT16_C( 8767), -INT16_C( 14621), INT16_C( 4027), -INT16_C( 23301), -INT16_C( 12509), -INT16_C( 9041), INT16_C( 20570) }, { -INT16_C( 25935), -INT16_C( 9443), -INT16_C( 18669), -INT16_C( 27625), INT16_C( 26543), -INT16_C( 19627), INT16_C( 18672), -INT16_C( 24810) }, { -INT16_C( 10847), -INT16_C( 13137), -INT16_C( 25935), -INT16_C( 12992), -INT16_C( 8767), -INT16_C( 9443), INT16_C( 30762), -INT16_C( 14621), -INT16_C( 18669), INT16_C( 15258), INT16_C( 4027), -INT16_C( 27625), -INT16_C( 2175), -INT16_C( 23301), INT16_C( 26543), -INT16_C( 16304), -INT16_C( 12509), -INT16_C( 19627), -INT16_C( 30460), -INT16_C( 9041), INT16_C( 18672), INT16_C( 25587), INT16_C( 20570), -INT16_C( 24810), } }, { { -INT16_C( 10476), -INT16_C( 2180), INT16_C( 14237), -INT16_C( 26362), INT16_C( 10716), -INT16_C( 29848), -INT16_C( 15867), -INT16_C( 18725) }, { -INT16_C( 1699), INT16_C( 28818), -INT16_C( 22096), INT16_C( 24324), INT16_C( 22800), INT16_C( 18), INT16_C( 10401), -INT16_C( 18785) }, { INT16_C( 7168), -INT16_C( 25171), -INT16_C( 19373), INT16_C( 12086), -INT16_C( 24867), -INT16_C( 7238), -INT16_C( 27039), -INT16_C( 16743) }, { -INT16_C( 10476), -INT16_C( 1699), INT16_C( 7168), -INT16_C( 2180), INT16_C( 28818), -INT16_C( 25171), INT16_C( 14237), -INT16_C( 22096), -INT16_C( 19373), -INT16_C( 26362), INT16_C( 24324), INT16_C( 12086), INT16_C( 10716), INT16_C( 22800), -INT16_C( 24867), -INT16_C( 29848), INT16_C( 18), -INT16_C( 7238), -INT16_C( 15867), INT16_C( 10401), -INT16_C( 27039), -INT16_C( 18725), -INT16_C( 18785), -INT16_C( 16743), } }, { { INT16_C( 11151), INT16_C( 16174), INT16_C( 13012), -INT16_C( 7009), -INT16_C( 20084), INT16_C( 11748), -INT16_C( 31526), -INT16_C( 9501) }, { -INT16_C( 28256), -INT16_C( 3209), -INT16_C( 20923), INT16_C( 8739), -INT16_C( 8884), -INT16_C( 21243), -INT16_C( 24717), INT16_C( 619) }, { -INT16_C( 26166), -INT16_C( 24766), -INT16_C( 7732), INT16_C( 22659), INT16_C( 26770), INT16_C( 27781), INT16_C( 27116), -INT16_C( 29626) }, { INT16_C( 11151), -INT16_C( 28256), -INT16_C( 26166), INT16_C( 16174), -INT16_C( 3209), -INT16_C( 24766), INT16_C( 13012), -INT16_C( 20923), -INT16_C( 7732), -INT16_C( 7009), INT16_C( 8739), INT16_C( 22659), -INT16_C( 20084), -INT16_C( 8884), INT16_C( 26770), INT16_C( 11748), -INT16_C( 21243), INT16_C( 27781), -INT16_C( 31526), -INT16_C( 24717), INT16_C( 27116), -INT16_C( 9501), INT16_C( 619), -INT16_C( 29626), } }, { { -INT16_C( 16646), INT16_C( 16255), -INT16_C( 23956), -INT16_C( 18335), INT16_C( 26496), -INT16_C( 3226), -INT16_C( 12026), -INT16_C( 12042) }, { INT16_C( 14443), INT16_C( 14191), -INT16_C( 3303), -INT16_C( 21617), INT16_C( 5211), INT16_C( 18200), INT16_C( 24189), INT16_C( 30675) }, { INT16_C( 21020), -INT16_C( 30538), INT16_C( 6389), INT16_C( 30017), -INT16_C( 22657), -INT16_C( 31384), INT16_C( 24184), -INT16_C( 7339) }, { -INT16_C( 16646), INT16_C( 14443), INT16_C( 21020), INT16_C( 16255), INT16_C( 14191), -INT16_C( 30538), -INT16_C( 23956), -INT16_C( 3303), INT16_C( 6389), -INT16_C( 18335), -INT16_C( 21617), INT16_C( 30017), INT16_C( 26496), INT16_C( 5211), -INT16_C( 22657), -INT16_C( 3226), INT16_C( 18200), -INT16_C( 31384), -INT16_C( 12026), INT16_C( 24189), INT16_C( 24184), -INT16_C( 12042), INT16_C( 30675), -INT16_C( 7339), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8x3_t r_ = { { simde_vld1q_s16(test_vec[i].r0), simde_vld1q_s16(test_vec[i].r1), simde_vld1q_s16(test_vec[i].r2), } }; int16_t a_[24]; simde_vst3q_s16(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld3q_s16(a_); simde_test_arm_neon_assert_equal_i16x8(r_.val[0], simde_vld1q_s16(test_vec[i].r0)); simde_test_arm_neon_assert_equal_i16x8(r_.val[1], simde_vld1q_s16(test_vec[i].r1)); simde_test_arm_neon_assert_equal_i16x8(r_.val[2], simde_vld1q_s16(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a0 = simde_test_arm_neon_random_i16x8(); simde_int16x8_t a1 = simde_test_arm_neon_random_i16x8(); simde_int16x8_t a2 = simde_test_arm_neon_random_i16x8(); simde_int16x8x3_t a = { { a0, a1, a2 } }; simde_test_arm_neon_write_i16x8(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); int16_t buf[24]; simde_vst3q_s16(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_i16(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst3q_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t r0[4]; int32_t r1[4]; int32_t r2[4]; int32_t a[12]; } test_vec[] = { { { -INT32_C( 1750261198), INT32_C( 365683672), INT32_C( 720805854), INT32_C( 97011150) }, { INT32_C( 497640506), INT32_C( 1227936960), INT32_C( 1527923171), -INT32_C( 1488084108) }, { -INT32_C( 1640039482), -INT32_C( 1112340002), INT32_C( 1994893736), INT32_C( 696037359) }, { -INT32_C( 1750261198), INT32_C( 497640506), -INT32_C( 1640039482), INT32_C( 365683672), INT32_C( 1227936960), -INT32_C( 1112340002), INT32_C( 720805854), INT32_C( 1527923171), INT32_C( 1994893736), INT32_C( 97011150), -INT32_C( 1488084108), INT32_C( 696037359), } }, { { -INT32_C( 733600492), -INT32_C( 602048519), INT32_C( 741814200), -INT32_C( 1714190893) }, { INT32_C( 1580667264), -INT32_C( 1021580774), -INT32_C( 2110192749), -INT32_C( 961825358) }, { -INT32_C( 744820006), INT32_C( 565164137), -INT32_C( 1169299481), -INT32_C( 330096276) }, { -INT32_C( 733600492), INT32_C( 1580667264), -INT32_C( 744820006), -INT32_C( 602048519), -INT32_C( 1021580774), INT32_C( 565164137), INT32_C( 741814200), -INT32_C( 2110192749), -INT32_C( 1169299481), -INT32_C( 1714190893), -INT32_C( 961825358), -INT32_C( 330096276), } }, { { INT32_C( 1296730674), INT32_C( 135292532), INT32_C( 462047593), -INT32_C( 639486209) }, { -INT32_C( 1850901464), INT32_C( 481451060), -INT32_C( 1344864445), INT32_C( 1402677792) }, { INT32_C( 698410676), -INT32_C( 1255034804), -INT32_C( 120538119), INT32_C( 433238769) }, { INT32_C( 1296730674), -INT32_C( 1850901464), INT32_C( 698410676), INT32_C( 135292532), INT32_C( 481451060), -INT32_C( 1255034804), INT32_C( 462047593), -INT32_C( 1344864445), -INT32_C( 120538119), -INT32_C( 639486209), INT32_C( 1402677792), INT32_C( 433238769), } }, { { INT32_C( 1672118063), INT32_C( 528440539), INT32_C( 2093897308), INT32_C( 886008448) }, { -INT32_C( 1671598256), INT32_C( 424775199), INT32_C( 990978378), INT32_C( 55960532) }, { INT32_C( 1046937442), -INT32_C( 1201805732), -INT32_C( 1137431748), -INT32_C( 437255019) }, { INT32_C( 1672118063), -INT32_C( 1671598256), INT32_C( 1046937442), INT32_C( 528440539), INT32_C( 424775199), -INT32_C( 1201805732), INT32_C( 2093897308), INT32_C( 990978378), -INT32_C( 1137431748), INT32_C( 886008448), INT32_C( 55960532), -INT32_C( 437255019), } }, { { -INT32_C( 1820242317), INT32_C( 648860380), -INT32_C( 933052940), INT32_C( 63682465) }, { INT32_C( 306262454), INT32_C( 1405787671), INT32_C( 1594884042), INT32_C( 1984233475) }, { INT32_C( 705283662), -INT32_C( 1924024935), INT32_C( 341160819), INT32_C( 538386538) }, { -INT32_C( 1820242317), INT32_C( 306262454), INT32_C( 705283662), INT32_C( 648860380), INT32_C( 1405787671), -INT32_C( 1924024935), -INT32_C( 933052940), INT32_C( 1594884042), INT32_C( 341160819), INT32_C( 63682465), INT32_C( 1984233475), INT32_C( 538386538), } }, { { INT32_C( 1764972881), -INT32_C( 1044578825), -INT32_C( 14562052), INT32_C( 443967180) }, { -INT32_C( 985366740), -INT32_C( 1470982859), -INT32_C( 1296259256), INT32_C( 433312711) }, { INT32_C( 612501036), INT32_C( 15023619), -INT32_C( 687929846), -INT32_C( 1712294548) }, { INT32_C( 1764972881), -INT32_C( 985366740), INT32_C( 612501036), -INT32_C( 1044578825), -INT32_C( 1470982859), INT32_C( 15023619), -INT32_C( 14562052), -INT32_C( 1296259256), -INT32_C( 687929846), INT32_C( 443967180), INT32_C( 433312711), -INT32_C( 1712294548), } }, { { INT32_C( 710817269), INT32_C( 332575178), INT32_C( 549817944), -INT32_C( 1908828063) }, { -INT32_C( 1565344866), INT32_C( 77764601), INT32_C( 182100382), INT32_C( 212060951) }, { -INT32_C( 902430208), INT32_C( 199035059), -INT32_C( 148135018), -INT32_C( 628792261) }, { INT32_C( 710817269), -INT32_C( 1565344866), -INT32_C( 902430208), INT32_C( 332575178), INT32_C( 77764601), INT32_C( 199035059), INT32_C( 549817944), INT32_C( 182100382), -INT32_C( 148135018), -INT32_C( 1908828063), INT32_C( 212060951), -INT32_C( 628792261), } }, { { INT32_C( 427570975), INT32_C( 1830624975), -INT32_C( 696780865), -INT32_C( 1025369150) }, { -INT32_C( 796059619), -INT32_C( 1227134432), INT32_C( 1236141837), -INT32_C( 1960627349) }, { INT32_C( 983867243), INT32_C( 2091368893), INT32_C( 2069044920), INT32_C( 1446851897) }, { INT32_C( 427570975), -INT32_C( 796059619), INT32_C( 983867243), INT32_C( 1830624975), -INT32_C( 1227134432), INT32_C( 2091368893), -INT32_C( 696780865), INT32_C( 1236141837), INT32_C( 2069044920), -INT32_C( 1025369150), -INT32_C( 1960627349), INT32_C( 1446851897), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4x3_t r_ = { { simde_vld1q_s32(test_vec[i].r0), simde_vld1q_s32(test_vec[i].r1), simde_vld1q_s32(test_vec[i].r2), } }; int32_t a_[12]; simde_vst3q_s32(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld3q_s32(a_); simde_test_arm_neon_assert_equal_i32x4(r_.val[0], simde_vld1q_s32(test_vec[i].r0)); simde_test_arm_neon_assert_equal_i32x4(r_.val[1], simde_vld1q_s32(test_vec[i].r1)); simde_test_arm_neon_assert_equal_i32x4(r_.val[2], simde_vld1q_s32(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a0 = simde_test_arm_neon_random_i32x4(); simde_int32x4_t a1 = simde_test_arm_neon_random_i32x4(); simde_int32x4_t a2 = simde_test_arm_neon_random_i32x4(); simde_int32x4x3_t a = { { a0, a1, a2 } }; simde_test_arm_neon_write_i32x4(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); int32_t buf[12]; simde_vst3q_s32(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_i32(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst3q_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t r0[2]; int64_t r1[2]; int64_t r2[2]; int64_t a[6]; } test_vec[] = { { { -INT64_C( 635012862125708663), -INT64_C( 4551834836964699426) }, { -INT64_C( 6853432729242667566), -INT64_C( 286868685105955221) }, { INT64_C( 1108378301783684416), INT64_C( 5650045543870290514) }, { -INT64_C( 635012862125708663), -INT64_C( 6853432729242667566), INT64_C( 1108378301783684416), -INT64_C( 4551834836964699426), -INT64_C( 286868685105955221), INT64_C( 5650045543870290514), } }, { { -INT64_C( 49088157496910733), INT64_C( 8829424968667718680) }, { INT64_C( 3306001507121699489), INT64_C( 9073411890563255072) }, { INT64_C( 6245507785642976025), INT64_C( 2349307249855435749) }, { -INT64_C( 49088157496910733), INT64_C( 3306001507121699489), INT64_C( 6245507785642976025), INT64_C( 8829424968667718680), INT64_C( 9073411890563255072), INT64_C( 2349307249855435749), } }, { { -INT64_C( 1833858103574888572), -INT64_C( 5303429886675558177) }, { -INT64_C( 313828010536757999), INT64_C( 8557601191857129251) }, { INT64_C( 7412750748315137075), -INT64_C( 2155897315551911231) }, { -INT64_C( 1833858103574888572), -INT64_C( 313828010536757999), INT64_C( 7412750748315137075), -INT64_C( 5303429886675558177), INT64_C( 8557601191857129251), -INT64_C( 2155897315551911231), } }, { { -INT64_C( 6170821512464861365), INT64_C( 6900656108198459229) }, { -INT64_C( 4850315250566129214), INT64_C( 93220673490190233) }, { -INT64_C( 5775937637409719130), -INT64_C( 882492009611386058) }, { -INT64_C( 6170821512464861365), -INT64_C( 4850315250566129214), -INT64_C( 5775937637409719130), INT64_C( 6900656108198459229), INT64_C( 93220673490190233), -INT64_C( 882492009611386058), } }, { { -INT64_C( 1724054782324434852), INT64_C( 190168630465539564) }, { -INT64_C( 424455724669598438), INT64_C( 4057774997028739627) }, { INT64_C( 7380732911912193333), INT64_C( 8515650882758033358) }, { -INT64_C( 1724054782324434852), -INT64_C( 424455724669598438), INT64_C( 7380732911912193333), INT64_C( 190168630465539564), INT64_C( 4057774997028739627), INT64_C( 8515650882758033358), } }, { { -INT64_C( 8110702121954615887), INT64_C( 2102685047284616217) }, { INT64_C( 3290177900306939337), INT64_C( 1054269323458622158) }, { -INT64_C( 7145536389092440863), -INT64_C( 4123397312635354688) }, { -INT64_C( 8110702121954615887), INT64_C( 3290177900306939337), -INT64_C( 7145536389092440863), INT64_C( 2102685047284616217), INT64_C( 1054269323458622158), -INT64_C( 4123397312635354688), } }, { { INT64_C( 4974423115788019836), -INT64_C( 4192551203404554395) }, { INT64_C( 699150071208039828), -INT64_C( 9122473133432814174) }, { -INT64_C( 8603172560225668897), -INT64_C( 5733896944988058685) }, { INT64_C( 4974423115788019836), INT64_C( 699150071208039828), -INT64_C( 8603172560225668897), -INT64_C( 4192551203404554395), -INT64_C( 9122473133432814174), -INT64_C( 5733896944988058685), } }, { { -INT64_C( 233488605370681232), -INT64_C( 3799627498977987392) }, { -INT64_C( 3954797066086932110), -INT64_C( 823070748352419640) }, { -INT64_C( 7589848679630152874), -INT64_C( 4856393004998834204) }, { -INT64_C( 233488605370681232), -INT64_C( 3954797066086932110), -INT64_C( 7589848679630152874), -INT64_C( 3799627498977987392), -INT64_C( 823070748352419640), -INT64_C( 4856393004998834204), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2x3_t r_ = { { simde_vld1q_s64(test_vec[i].r0), simde_vld1q_s64(test_vec[i].r1), simde_vld1q_s64(test_vec[i].r2), } }; int64_t a_[6]; simde_vst3q_s64(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld3q_s64(a_); simde_test_arm_neon_assert_equal_i64x2(r_.val[0], simde_vld1q_s64(test_vec[i].r0)); simde_test_arm_neon_assert_equal_i64x2(r_.val[1], simde_vld1q_s64(test_vec[i].r1)); simde_test_arm_neon_assert_equal_i64x2(r_.val[2], simde_vld1q_s64(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a0 = simde_test_arm_neon_random_i64x2(); simde_int64x2_t a1 = simde_test_arm_neon_random_i64x2(); simde_int64x2_t a2 = simde_test_arm_neon_random_i64x2(); simde_int64x2x3_t a = { { a0, a1, a2 } }; simde_test_arm_neon_write_i64x2(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); int64_t buf[6]; simde_vst3q_s64(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_i64(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst3q_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t r0[16]; uint8_t r1[16]; uint8_t r2[16]; uint8_t a[48]; } test_vec[] = { { { UINT8_C(195), UINT8_C( 18), UINT8_C( 66), UINT8_C( 58), UINT8_C(252), UINT8_C(168), UINT8_C( 43), UINT8_C( 6), UINT8_C(179), UINT8_C( 14), UINT8_C(200), UINT8_C( 15), UINT8_C( 92), UINT8_C(143), UINT8_C( 19), UINT8_C( 5) }, { UINT8_C(122), UINT8_C(198), UINT8_C(166), UINT8_C(235), UINT8_C(197), UINT8_C(222), UINT8_C(151), UINT8_C(111), UINT8_C( 19), UINT8_C(223), UINT8_C(248), UINT8_C(105), UINT8_C( 28), UINT8_C( 2), UINT8_C(173), UINT8_C(223) }, { UINT8_C( 20), UINT8_C(240), UINT8_C( 26), UINT8_C( 16), UINT8_C(152), UINT8_C( 69), UINT8_C( 23), UINT8_C( 76), UINT8_C( 84), UINT8_C(223), UINT8_C( 91), UINT8_C(176), UINT8_C(111), UINT8_C(110), UINT8_C(181), UINT8_C(233) }, { UINT8_C(195), UINT8_C(122), UINT8_C( 20), UINT8_C( 18), UINT8_C(198), UINT8_C(240), UINT8_C( 66), UINT8_C(166), UINT8_C( 26), UINT8_C( 58), UINT8_C(235), UINT8_C( 16), UINT8_C(252), UINT8_C(197), UINT8_C(152), UINT8_C(168), UINT8_C(222), UINT8_C( 69), UINT8_C( 43), UINT8_C(151), UINT8_C( 23), UINT8_C( 6), UINT8_C(111), UINT8_C( 76), UINT8_C(179), UINT8_C( 19), UINT8_C( 84), UINT8_C( 14), UINT8_C(223), UINT8_C(223), UINT8_C(200), UINT8_C(248), UINT8_C( 91), UINT8_C( 15), UINT8_C(105), UINT8_C(176), UINT8_C( 92), UINT8_C( 28), UINT8_C(111), UINT8_C(143), UINT8_C( 2), UINT8_C(110), UINT8_C( 19), UINT8_C(173), UINT8_C(181), UINT8_C( 5), UINT8_C(223), UINT8_C(233), } }, { { UINT8_C( 53), UINT8_C( 92), UINT8_C(212), UINT8_C(250), UINT8_C( 58), UINT8_C(108), UINT8_C(105), UINT8_C( 78), UINT8_C( 75), UINT8_C( 97), UINT8_C(183), UINT8_C(103), UINT8_C( 99), UINT8_C(100), UINT8_C( 71), UINT8_C(119) }, { UINT8_C( 84), UINT8_C( 97), UINT8_C(136), UINT8_C(237), UINT8_C(166), UINT8_C(159), UINT8_C( 57), UINT8_C(250), UINT8_C(126), UINT8_C(148), UINT8_C(171), UINT8_C(237), UINT8_C( 2), UINT8_C( 96), UINT8_C(215), UINT8_C( 55) }, { UINT8_C(188), UINT8_C(171), UINT8_C( 49), UINT8_C(247), UINT8_C( 23), UINT8_C(154), UINT8_C( 69), UINT8_C( 98), UINT8_C(252), UINT8_C(252), UINT8_C(202), UINT8_C( 95), UINT8_C( 96), UINT8_C( 17), UINT8_C(215), UINT8_C(181) }, { UINT8_C( 53), UINT8_C( 84), UINT8_C(188), UINT8_C( 92), UINT8_C( 97), UINT8_C(171), UINT8_C(212), UINT8_C(136), UINT8_C( 49), UINT8_C(250), UINT8_C(237), UINT8_C(247), UINT8_C( 58), UINT8_C(166), UINT8_C( 23), UINT8_C(108), UINT8_C(159), UINT8_C(154), UINT8_C(105), UINT8_C( 57), UINT8_C( 69), UINT8_C( 78), UINT8_C(250), UINT8_C( 98), UINT8_C( 75), UINT8_C(126), UINT8_C(252), UINT8_C( 97), UINT8_C(148), UINT8_C(252), UINT8_C(183), UINT8_C(171), UINT8_C(202), UINT8_C(103), UINT8_C(237), UINT8_C( 95), UINT8_C( 99), UINT8_C( 2), UINT8_C( 96), UINT8_C(100), UINT8_C( 96), UINT8_C( 17), UINT8_C( 71), UINT8_C(215), UINT8_C(215), UINT8_C(119), UINT8_C( 55), UINT8_C(181), } }, { { UINT8_C(114), UINT8_C( 95), UINT8_C(162), UINT8_C( 24), UINT8_C(254), UINT8_C(219), UINT8_C( 19), UINT8_C(124), UINT8_C(111), UINT8_C(190), UINT8_C(106), UINT8_C(113), UINT8_C( 30), UINT8_C( 65), UINT8_C(169), UINT8_C(219) }, { UINT8_C(236), UINT8_C(218), UINT8_C(210), UINT8_C( 4), UINT8_C(117), UINT8_C( 23), UINT8_C(102), UINT8_C(113), UINT8_C( 19), UINT8_C( 48), UINT8_C(208), UINT8_C(115), UINT8_C( 65), UINT8_C(167), UINT8_C( 40), UINT8_C(179) }, { UINT8_C( 6), UINT8_C(202), UINT8_C(204), UINT8_C( 4), UINT8_C(165), UINT8_C(223), UINT8_C(129), UINT8_C( 20), UINT8_C(157), UINT8_C(235), UINT8_C(134), UINT8_C(187), UINT8_C( 44), UINT8_C( 47), UINT8_C(150), UINT8_C( 24) }, { UINT8_C(114), UINT8_C(236), UINT8_C( 6), UINT8_C( 95), UINT8_C(218), UINT8_C(202), UINT8_C(162), UINT8_C(210), UINT8_C(204), UINT8_C( 24), UINT8_C( 4), UINT8_C( 4), UINT8_C(254), UINT8_C(117), UINT8_C(165), UINT8_C(219), UINT8_C( 23), UINT8_C(223), UINT8_C( 19), UINT8_C(102), UINT8_C(129), UINT8_C(124), UINT8_C(113), UINT8_C( 20), UINT8_C(111), UINT8_C( 19), UINT8_C(157), UINT8_C(190), UINT8_C( 48), UINT8_C(235), UINT8_C(106), UINT8_C(208), UINT8_C(134), UINT8_C(113), UINT8_C(115), UINT8_C(187), UINT8_C( 30), UINT8_C( 65), UINT8_C( 44), UINT8_C( 65), UINT8_C(167), UINT8_C( 47), UINT8_C(169), UINT8_C( 40), UINT8_C(150), UINT8_C(219), UINT8_C(179), UINT8_C( 24), } }, { { UINT8_C( 9), UINT8_C(104), UINT8_C( 28), UINT8_C(126), UINT8_C(127), UINT8_C(131), UINT8_C(239), UINT8_C(146), UINT8_C(179), UINT8_C(192), UINT8_C( 6), UINT8_C(245), UINT8_C(103), UINT8_C( 46), UINT8_C(168), UINT8_C(110) }, { UINT8_C(249), UINT8_C(116), UINT8_C(114), UINT8_C(158), UINT8_C( 83), UINT8_C(243), UINT8_C(179), UINT8_C(240), UINT8_C(222), UINT8_C( 57), UINT8_C(172), UINT8_C( 10), UINT8_C(104), UINT8_C( 66), UINT8_C( 35), UINT8_C(113) }, { UINT8_C(171), UINT8_C( 63), UINT8_C(240), UINT8_C( 42), UINT8_C(194), UINT8_C(223), UINT8_C(189), UINT8_C(118), UINT8_C(159), UINT8_C(195), UINT8_C(107), UINT8_C( 7), UINT8_C(241), UINT8_C( 19), UINT8_C(117), UINT8_C(234) }, { UINT8_C( 9), UINT8_C(249), UINT8_C(171), UINT8_C(104), UINT8_C(116), UINT8_C( 63), UINT8_C( 28), UINT8_C(114), UINT8_C(240), UINT8_C(126), UINT8_C(158), UINT8_C( 42), UINT8_C(127), UINT8_C( 83), UINT8_C(194), UINT8_C(131), UINT8_C(243), UINT8_C(223), UINT8_C(239), UINT8_C(179), UINT8_C(189), UINT8_C(146), UINT8_C(240), UINT8_C(118), UINT8_C(179), UINT8_C(222), UINT8_C(159), UINT8_C(192), UINT8_C( 57), UINT8_C(195), UINT8_C( 6), UINT8_C(172), UINT8_C(107), UINT8_C(245), UINT8_C( 10), UINT8_C( 7), UINT8_C(103), UINT8_C(104), UINT8_C(241), UINT8_C( 46), UINT8_C( 66), UINT8_C( 19), UINT8_C(168), UINT8_C( 35), UINT8_C(117), UINT8_C(110), UINT8_C(113), UINT8_C(234), } }, { { UINT8_C(136), UINT8_C(231), UINT8_C(137), UINT8_C(219), UINT8_C(219), UINT8_C( 60), UINT8_C(204), UINT8_C(185), UINT8_C(117), UINT8_C(120), UINT8_C(196), UINT8_C(221), UINT8_C(186), UINT8_C(231), UINT8_C( 78), UINT8_C(101) }, { UINT8_C( 38), UINT8_C( 62), UINT8_C(144), UINT8_C(233), UINT8_C( 30), UINT8_C( 77), UINT8_C( 95), UINT8_C(189), UINT8_C( 16), UINT8_C(202), UINT8_C(196), UINT8_C( 1), UINT8_C(221), UINT8_C( 57), UINT8_C(236), UINT8_C(101) }, { UINT8_C( 33), UINT8_C(117), UINT8_C( 65), UINT8_C(252), UINT8_C(177), UINT8_C( 13), UINT8_C(181), UINT8_C( 38), UINT8_C(133), UINT8_C(121), UINT8_C( 3), UINT8_C( 63), UINT8_C( 96), UINT8_C( 81), UINT8_C(165), UINT8_C(135) }, { UINT8_C(136), UINT8_C( 38), UINT8_C( 33), UINT8_C(231), UINT8_C( 62), UINT8_C(117), UINT8_C(137), UINT8_C(144), UINT8_C( 65), UINT8_C(219), UINT8_C(233), UINT8_C(252), UINT8_C(219), UINT8_C( 30), UINT8_C(177), UINT8_C( 60), UINT8_C( 77), UINT8_C( 13), UINT8_C(204), UINT8_C( 95), UINT8_C(181), UINT8_C(185), UINT8_C(189), UINT8_C( 38), UINT8_C(117), UINT8_C( 16), UINT8_C(133), UINT8_C(120), UINT8_C(202), UINT8_C(121), UINT8_C(196), UINT8_C(196), UINT8_C( 3), UINT8_C(221), UINT8_C( 1), UINT8_C( 63), UINT8_C(186), UINT8_C(221), UINT8_C( 96), UINT8_C(231), UINT8_C( 57), UINT8_C( 81), UINT8_C( 78), UINT8_C(236), UINT8_C(165), UINT8_C(101), UINT8_C(101), UINT8_C(135), } }, { { UINT8_C(144), UINT8_C( 53), UINT8_C(112), UINT8_C(174), UINT8_C(130), UINT8_C(207), UINT8_C(107), UINT8_C(146), UINT8_C(153), UINT8_C( 48), UINT8_C(147), UINT8_C(118), UINT8_C(105), UINT8_C(127), UINT8_C(220), UINT8_C(138) }, { UINT8_C(244), UINT8_C( 29), UINT8_C(134), UINT8_C(165), UINT8_C( 42), UINT8_C( 60), UINT8_C(203), UINT8_C(175), UINT8_C(181), UINT8_C(206), UINT8_C(238), UINT8_C( 22), UINT8_C( 32), UINT8_C(147), UINT8_C(157), UINT8_C(176) }, { UINT8_C(200), UINT8_C( 13), UINT8_C( 94), UINT8_C( 74), UINT8_C(220), UINT8_C(201), UINT8_C(220), UINT8_C(117), UINT8_C(249), UINT8_C(112), UINT8_C(235), UINT8_C( 99), UINT8_C(239), UINT8_C(199), UINT8_C(237), UINT8_C(228) }, { UINT8_C(144), UINT8_C(244), UINT8_C(200), UINT8_C( 53), UINT8_C( 29), UINT8_C( 13), UINT8_C(112), UINT8_C(134), UINT8_C( 94), UINT8_C(174), UINT8_C(165), UINT8_C( 74), UINT8_C(130), UINT8_C( 42), UINT8_C(220), UINT8_C(207), UINT8_C( 60), UINT8_C(201), UINT8_C(107), UINT8_C(203), UINT8_C(220), UINT8_C(146), UINT8_C(175), UINT8_C(117), UINT8_C(153), UINT8_C(181), UINT8_C(249), UINT8_C( 48), UINT8_C(206), UINT8_C(112), UINT8_C(147), UINT8_C(238), UINT8_C(235), UINT8_C(118), UINT8_C( 22), UINT8_C( 99), UINT8_C(105), UINT8_C( 32), UINT8_C(239), UINT8_C(127), UINT8_C(147), UINT8_C(199), UINT8_C(220), UINT8_C(157), UINT8_C(237), UINT8_C(138), UINT8_C(176), UINT8_C(228), } }, { { UINT8_C(228), UINT8_C(116), UINT8_C(137), UINT8_C( 14), UINT8_C(176), UINT8_C( 85), UINT8_C(189), UINT8_C(101), UINT8_C( 35), UINT8_C(172), UINT8_C(123), UINT8_C( 67), UINT8_C( 63), UINT8_C( 24), UINT8_C(243), UINT8_C( 8) }, { UINT8_C( 37), UINT8_C( 81), UINT8_C( 82), UINT8_C( 1), UINT8_C( 27), UINT8_C( 47), UINT8_C(118), UINT8_C( 20), UINT8_C(159), UINT8_C( 98), UINT8_C(119), UINT8_C(142), UINT8_C( 41), UINT8_C(101), UINT8_C(114), UINT8_C( 14) }, { UINT8_C(217), UINT8_C(252), UINT8_C( 28), UINT8_C(137), UINT8_C( 81), UINT8_C(218), UINT8_C(238), UINT8_C(116), UINT8_C(134), UINT8_C(106), UINT8_C(184), UINT8_C(197), UINT8_C(130), UINT8_C(171), UINT8_C(205), UINT8_C(168) }, { UINT8_C(228), UINT8_C( 37), UINT8_C(217), UINT8_C(116), UINT8_C( 81), UINT8_C(252), UINT8_C(137), UINT8_C( 82), UINT8_C( 28), UINT8_C( 14), UINT8_C( 1), UINT8_C(137), UINT8_C(176), UINT8_C( 27), UINT8_C( 81), UINT8_C( 85), UINT8_C( 47), UINT8_C(218), UINT8_C(189), UINT8_C(118), UINT8_C(238), UINT8_C(101), UINT8_C( 20), UINT8_C(116), UINT8_C( 35), UINT8_C(159), UINT8_C(134), UINT8_C(172), UINT8_C( 98), UINT8_C(106), UINT8_C(123), UINT8_C(119), UINT8_C(184), UINT8_C( 67), UINT8_C(142), UINT8_C(197), UINT8_C( 63), UINT8_C( 41), UINT8_C(130), UINT8_C( 24), UINT8_C(101), UINT8_C(171), UINT8_C(243), UINT8_C(114), UINT8_C(205), UINT8_C( 8), UINT8_C( 14), UINT8_C(168), } }, { { UINT8_C(253), UINT8_C( 32), UINT8_C(169), UINT8_C( 24), UINT8_C( 79), UINT8_C( 32), UINT8_C( 44), UINT8_C(238), UINT8_C(130), UINT8_C(164), UINT8_C(124), UINT8_C(171), UINT8_C( 9), UINT8_C(239), UINT8_C(185), UINT8_C(226) }, { UINT8_C(235), UINT8_C(214), UINT8_C(107), UINT8_C( 60), UINT8_C(176), UINT8_C( 89), UINT8_C(176), UINT8_C( 54), UINT8_C(195), UINT8_C(104), UINT8_C(251), UINT8_C( 70), UINT8_C( 20), UINT8_C(201), UINT8_C(238), UINT8_C( 17) }, { UINT8_C(233), UINT8_C(151), UINT8_C( 41), UINT8_C( 56), UINT8_C(183), UINT8_C( 85), UINT8_C( 38), UINT8_C( 57), UINT8_C(249), UINT8_C(162), UINT8_C(229), UINT8_C( 2), UINT8_C(145), UINT8_C(158), UINT8_C(228), UINT8_C(124) }, { UINT8_C(253), UINT8_C(235), UINT8_C(233), UINT8_C( 32), UINT8_C(214), UINT8_C(151), UINT8_C(169), UINT8_C(107), UINT8_C( 41), UINT8_C( 24), UINT8_C( 60), UINT8_C( 56), UINT8_C( 79), UINT8_C(176), UINT8_C(183), UINT8_C( 32), UINT8_C( 89), UINT8_C( 85), UINT8_C( 44), UINT8_C(176), UINT8_C( 38), UINT8_C(238), UINT8_C( 54), UINT8_C( 57), UINT8_C(130), UINT8_C(195), UINT8_C(249), UINT8_C(164), UINT8_C(104), UINT8_C(162), UINT8_C(124), UINT8_C(251), UINT8_C(229), UINT8_C(171), UINT8_C( 70), UINT8_C( 2), UINT8_C( 9), UINT8_C( 20), UINT8_C(145), UINT8_C(239), UINT8_C(201), UINT8_C(158), UINT8_C(185), UINT8_C(238), UINT8_C(228), UINT8_C(226), UINT8_C( 17), UINT8_C(124), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16x3_t r_ = { { simde_vld1q_u8(test_vec[i].r0), simde_vld1q_u8(test_vec[i].r1), simde_vld1q_u8(test_vec[i].r2), } }; uint8_t a_[48]; simde_vst3q_u8(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld3q_u8(a_); simde_test_arm_neon_assert_equal_u8x16(r_.val[0], simde_vld1q_u8(test_vec[i].r0)); simde_test_arm_neon_assert_equal_u8x16(r_.val[1], simde_vld1q_u8(test_vec[i].r1)); simde_test_arm_neon_assert_equal_u8x16(r_.val[2], simde_vld1q_u8(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (size_t i = 0 ; i < 8 ; i++) { simde_uint8x16_t a0 = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t a1 = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t a2 = simde_test_arm_neon_random_u8x16(); simde_uint8x16x3_t a = { { a0, a1, a2 } }; simde_test_arm_neon_write_u8x16(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); uint8_t buf[48]; simde_vst3q_u8(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_u8(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst3q_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t r0[8]; uint16_t r1[8]; uint16_t r2[8]; uint16_t a[24]; } test_vec[] = { { { UINT16_C(28854), UINT16_C(51168), UINT16_C(18132), UINT16_C(49742), UINT16_C(35222), UINT16_C(24102), UINT16_C(16864), UINT16_C( 8805) }, { UINT16_C(42923), UINT16_C(13774), UINT16_C(28620), UINT16_C(22537), UINT16_C(57874), UINT16_C(53954), UINT16_C(25731), UINT16_C(14804) }, { UINT16_C(46548), UINT16_C(43008), UINT16_C(20475), UINT16_C(37226), UINT16_C(37080), UINT16_C(47343), UINT16_C(21714), UINT16_C(32218) }, { UINT16_C(28854), UINT16_C(42923), UINT16_C(46548), UINT16_C(51168), UINT16_C(13774), UINT16_C(43008), UINT16_C(18132), UINT16_C(28620), UINT16_C(20475), UINT16_C(49742), UINT16_C(22537), UINT16_C(37226), UINT16_C(35222), UINT16_C(57874), UINT16_C(37080), UINT16_C(24102), UINT16_C(53954), UINT16_C(47343), UINT16_C(16864), UINT16_C(25731), UINT16_C(21714), UINT16_C( 8805), UINT16_C(14804), UINT16_C(32218), } }, { { UINT16_C(43516), UINT16_C(51378), UINT16_C(48152), UINT16_C(11041), UINT16_C(58270), UINT16_C( 8701), UINT16_C(53831), UINT16_C( 7259) }, { UINT16_C(23431), UINT16_C(33476), UINT16_C(12202), UINT16_C(33299), UINT16_C( 959), UINT16_C(37178), UINT16_C( 5463), UINT16_C(21262) }, { UINT16_C(49598), UINT16_C(54812), UINT16_C(15741), UINT16_C( 6913), UINT16_C(65312), UINT16_C(26684), UINT16_C(38865), UINT16_C(22660) }, { UINT16_C(43516), UINT16_C(23431), UINT16_C(49598), UINT16_C(51378), UINT16_C(33476), UINT16_C(54812), UINT16_C(48152), UINT16_C(12202), UINT16_C(15741), UINT16_C(11041), UINT16_C(33299), UINT16_C( 6913), UINT16_C(58270), UINT16_C( 959), UINT16_C(65312), UINT16_C( 8701), UINT16_C(37178), UINT16_C(26684), UINT16_C(53831), UINT16_C( 5463), UINT16_C(38865), UINT16_C( 7259), UINT16_C(21262), UINT16_C(22660), } }, { { UINT16_C(18675), UINT16_C(40410), UINT16_C(60791), UINT16_C(14112), UINT16_C(23280), UINT16_C(18632), UINT16_C(55151), UINT16_C(11675) }, { UINT16_C(47000), UINT16_C( 5380), UINT16_C( 1524), UINT16_C( 5424), UINT16_C(27652), UINT16_C(54653), UINT16_C( 260), UINT16_C(63277) }, { UINT16_C( 1865), UINT16_C(49556), UINT16_C(46325), UINT16_C(58872), UINT16_C(49167), UINT16_C(32301), UINT16_C(51607), UINT16_C(12204) }, { UINT16_C(18675), UINT16_C(47000), UINT16_C( 1865), UINT16_C(40410), UINT16_C( 5380), UINT16_C(49556), UINT16_C(60791), UINT16_C( 1524), UINT16_C(46325), UINT16_C(14112), UINT16_C( 5424), UINT16_C(58872), UINT16_C(23280), UINT16_C(27652), UINT16_C(49167), UINT16_C(18632), UINT16_C(54653), UINT16_C(32301), UINT16_C(55151), UINT16_C( 260), UINT16_C(51607), UINT16_C(11675), UINT16_C(63277), UINT16_C(12204), } }, { { UINT16_C(45184), UINT16_C(30020), UINT16_C(29877), UINT16_C(47754), UINT16_C( 2017), UINT16_C(58767), UINT16_C(48392), UINT16_C(20956) }, { UINT16_C(28868), UINT16_C(47378), UINT16_C( 2597), UINT16_C(13471), UINT16_C(52427), UINT16_C(25266), UINT16_C(24213), UINT16_C( 5778) }, { UINT16_C(54798), UINT16_C(50315), UINT16_C( 5451), UINT16_C(11390), UINT16_C( 3356), UINT16_C( 9233), UINT16_C(60874), UINT16_C(36725) }, { UINT16_C(45184), UINT16_C(28868), UINT16_C(54798), UINT16_C(30020), UINT16_C(47378), UINT16_C(50315), UINT16_C(29877), UINT16_C( 2597), UINT16_C( 5451), UINT16_C(47754), UINT16_C(13471), UINT16_C(11390), UINT16_C( 2017), UINT16_C(52427), UINT16_C( 3356), UINT16_C(58767), UINT16_C(25266), UINT16_C( 9233), UINT16_C(48392), UINT16_C(24213), UINT16_C(60874), UINT16_C(20956), UINT16_C( 5778), UINT16_C(36725), } }, { { UINT16_C(34909), UINT16_C(33352), UINT16_C(59282), UINT16_C(23990), UINT16_C(27060), UINT16_C(18880), UINT16_C(21191), UINT16_C(54879) }, { UINT16_C(59944), UINT16_C(29594), UINT16_C( 6399), UINT16_C( 7071), UINT16_C(45093), UINT16_C(61503), UINT16_C(46493), UINT16_C(64383) }, { UINT16_C(51005), UINT16_C(53117), UINT16_C(13487), UINT16_C(25389), UINT16_C(60829), UINT16_C(25772), UINT16_C( 3135), UINT16_C(26426) }, { UINT16_C(34909), UINT16_C(59944), UINT16_C(51005), UINT16_C(33352), UINT16_C(29594), UINT16_C(53117), UINT16_C(59282), UINT16_C( 6399), UINT16_C(13487), UINT16_C(23990), UINT16_C( 7071), UINT16_C(25389), UINT16_C(27060), UINT16_C(45093), UINT16_C(60829), UINT16_C(18880), UINT16_C(61503), UINT16_C(25772), UINT16_C(21191), UINT16_C(46493), UINT16_C( 3135), UINT16_C(54879), UINT16_C(64383), UINT16_C(26426), } }, { { UINT16_C(54518), UINT16_C(63195), UINT16_C(31468), UINT16_C( 4625), UINT16_C(20779), UINT16_C(51202), UINT16_C(33030), UINT16_C(17347) }, { UINT16_C(16712), UINT16_C(63250), UINT16_C(16245), UINT16_C( 4698), UINT16_C( 1836), UINT16_C(27510), UINT16_C(45331), UINT16_C( 2515) }, { UINT16_C(44677), UINT16_C(29439), UINT16_C( 4392), UINT16_C(21380), UINT16_C(34402), UINT16_C(26652), UINT16_C(57095), UINT16_C(20395) }, { UINT16_C(54518), UINT16_C(16712), UINT16_C(44677), UINT16_C(63195), UINT16_C(63250), UINT16_C(29439), UINT16_C(31468), UINT16_C(16245), UINT16_C( 4392), UINT16_C( 4625), UINT16_C( 4698), UINT16_C(21380), UINT16_C(20779), UINT16_C( 1836), UINT16_C(34402), UINT16_C(51202), UINT16_C(27510), UINT16_C(26652), UINT16_C(33030), UINT16_C(45331), UINT16_C(57095), UINT16_C(17347), UINT16_C( 2515), UINT16_C(20395), } }, { { UINT16_C(48416), UINT16_C(38215), UINT16_C(41469), UINT16_C(10663), UINT16_C( 7848), UINT16_C(48021), UINT16_C(26831), UINT16_C(21701) }, { UINT16_C(50198), UINT16_C(16070), UINT16_C(19157), UINT16_C(14226), UINT16_C(44752), UINT16_C(55199), UINT16_C(19085), UINT16_C(44583) }, { UINT16_C(28168), UINT16_C( 1347), UINT16_C(60175), UINT16_C(47150), UINT16_C(49929), UINT16_C(55411), UINT16_C(14379), UINT16_C(16684) }, { UINT16_C(48416), UINT16_C(50198), UINT16_C(28168), UINT16_C(38215), UINT16_C(16070), UINT16_C( 1347), UINT16_C(41469), UINT16_C(19157), UINT16_C(60175), UINT16_C(10663), UINT16_C(14226), UINT16_C(47150), UINT16_C( 7848), UINT16_C(44752), UINT16_C(49929), UINT16_C(48021), UINT16_C(55199), UINT16_C(55411), UINT16_C(26831), UINT16_C(19085), UINT16_C(14379), UINT16_C(21701), UINT16_C(44583), UINT16_C(16684), } }, { { UINT16_C(62461), UINT16_C(53888), UINT16_C( 4669), UINT16_C( 3594), UINT16_C(43456), UINT16_C(19941), UINT16_C( 3316), UINT16_C(64763) }, { UINT16_C(16250), UINT16_C(35329), UINT16_C(12074), UINT16_C(13122), UINT16_C(46579), UINT16_C( 7691), UINT16_C(14318), UINT16_C(60256) }, { UINT16_C(57386), UINT16_C(26813), UINT16_C(51186), UINT16_C(45686), UINT16_C(23409), UINT16_C(26111), UINT16_C(64360), UINT16_C(57953) }, { UINT16_C(62461), UINT16_C(16250), UINT16_C(57386), UINT16_C(53888), UINT16_C(35329), UINT16_C(26813), UINT16_C( 4669), UINT16_C(12074), UINT16_C(51186), UINT16_C( 3594), UINT16_C(13122), UINT16_C(45686), UINT16_C(43456), UINT16_C(46579), UINT16_C(23409), UINT16_C(19941), UINT16_C( 7691), UINT16_C(26111), UINT16_C( 3316), UINT16_C(14318), UINT16_C(64360), UINT16_C(64763), UINT16_C(60256), UINT16_C(57953), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8x3_t r_ = { { simde_vld1q_u16(test_vec[i].r0), simde_vld1q_u16(test_vec[i].r1), simde_vld1q_u16(test_vec[i].r2), } }; uint16_t a_[24]; simde_vst3q_u16(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld3q_u16(a_); simde_test_arm_neon_assert_equal_u16x8(r_.val[0], simde_vld1q_u16(test_vec[i].r0)); simde_test_arm_neon_assert_equal_u16x8(r_.val[1], simde_vld1q_u16(test_vec[i].r1)); simde_test_arm_neon_assert_equal_u16x8(r_.val[2], simde_vld1q_u16(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (size_t i = 0 ; i < 8 ; i++) { simde_uint16x8_t a0 = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t a1 = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t a2 = simde_test_arm_neon_random_u16x8(); simde_uint16x8x3_t a = { { a0, a1, a2 } }; simde_test_arm_neon_write_u16x8(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); uint16_t buf[24]; simde_vst3q_u16(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_u16(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst3q_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t r0[4]; uint32_t r1[4]; uint32_t r2[4]; uint32_t a[12]; } test_vec[] = { { { UINT32_C(2644289703), UINT32_C(4217165007), UINT32_C(4135406643), UINT32_C(1998399494) }, { UINT32_C( 139488877), UINT32_C(1168216955), UINT32_C(3519844586), UINT32_C(1657552315) }, { UINT32_C(3036637413), UINT32_C(1890540348), UINT32_C(2925931687), UINT32_C(3240461140) }, { UINT32_C(2644289703), UINT32_C( 139488877), UINT32_C(3036637413), UINT32_C(4217165007), UINT32_C(1168216955), UINT32_C(1890540348), UINT32_C(4135406643), UINT32_C(3519844586), UINT32_C(2925931687), UINT32_C(1998399494), UINT32_C(1657552315), UINT32_C(3240461140), } }, { { UINT32_C(1841985266), UINT32_C(4088621833), UINT32_C(3066396667), UINT32_C(2702741948) }, { UINT32_C( 894769145), UINT32_C( 447022195), UINT32_C(2227702832), UINT32_C(2168909455) }, { UINT32_C(1844383844), UINT32_C(1986110075), UINT32_C(3727500833), UINT32_C(2961130935) }, { UINT32_C(1841985266), UINT32_C( 894769145), UINT32_C(1844383844), UINT32_C(4088621833), UINT32_C( 447022195), UINT32_C(1986110075), UINT32_C(3066396667), UINT32_C(2227702832), UINT32_C(3727500833), UINT32_C(2702741948), UINT32_C(2168909455), UINT32_C(2961130935), } }, { { UINT32_C(3504723293), UINT32_C( 183143385), UINT32_C( 646886295), UINT32_C( 94950561) }, { UINT32_C(1601345508), UINT32_C(1524028217), UINT32_C(2956461049), UINT32_C(2774579272) }, { UINT32_C(1718961805), UINT32_C(1752195281), UINT32_C(3029270291), UINT32_C(3099145939) }, { UINT32_C(3504723293), UINT32_C(1601345508), UINT32_C(1718961805), UINT32_C( 183143385), UINT32_C(1524028217), UINT32_C(1752195281), UINT32_C( 646886295), UINT32_C(2956461049), UINT32_C(3029270291), UINT32_C( 94950561), UINT32_C(2774579272), UINT32_C(3099145939), } }, { { UINT32_C( 102181837), UINT32_C(4167167487), UINT32_C( 967416304), UINT32_C(3739093329) }, { UINT32_C( 541414479), UINT32_C(3347625396), UINT32_C(2289768372), UINT32_C( 457192525) }, { UINT32_C(1579243359), UINT32_C( 894927429), UINT32_C(1835925532), UINT32_C(1498172681) }, { UINT32_C( 102181837), UINT32_C( 541414479), UINT32_C(1579243359), UINT32_C(4167167487), UINT32_C(3347625396), UINT32_C( 894927429), UINT32_C( 967416304), UINT32_C(2289768372), UINT32_C(1835925532), UINT32_C(3739093329), UINT32_C( 457192525), UINT32_C(1498172681), } }, { { UINT32_C(1434030497), UINT32_C(4212916806), UINT32_C(1719899929), UINT32_C( 713147339) }, { UINT32_C(1602855706), UINT32_C(1100341285), UINT32_C(3920561120), UINT32_C(4047698768) }, { UINT32_C(3527851148), UINT32_C(3620561598), UINT32_C(3292352761), UINT32_C( 787463955) }, { UINT32_C(1434030497), UINT32_C(1602855706), UINT32_C(3527851148), UINT32_C(4212916806), UINT32_C(1100341285), UINT32_C(3620561598), UINT32_C(1719899929), UINT32_C(3920561120), UINT32_C(3292352761), UINT32_C( 713147339), UINT32_C(4047698768), UINT32_C( 787463955), } }, { { UINT32_C(2274195554), UINT32_C( 952705624), UINT32_C(1981904934), UINT32_C(4285031539) }, { UINT32_C(3738283552), UINT32_C( 179674897), UINT32_C( 63959791), UINT32_C( 322027185) }, { UINT32_C(2392571446), UINT32_C( 130442465), UINT32_C(1333651420), UINT32_C(1800332619) }, { UINT32_C(2274195554), UINT32_C(3738283552), UINT32_C(2392571446), UINT32_C( 952705624), UINT32_C( 179674897), UINT32_C( 130442465), UINT32_C(1981904934), UINT32_C( 63959791), UINT32_C(1333651420), UINT32_C(4285031539), UINT32_C( 322027185), UINT32_C(1800332619), } }, { { UINT32_C(2773032852), UINT32_C(2930769598), UINT32_C(2729541361), UINT32_C(1924588092) }, { UINT32_C(2164281760), UINT32_C(2441660085), UINT32_C(4192208558), UINT32_C(2137337579) }, { UINT32_C( 203730509), UINT32_C(2663044269), UINT32_C(2403363666), UINT32_C(3976328781) }, { UINT32_C(2773032852), UINT32_C(2164281760), UINT32_C( 203730509), UINT32_C(2930769598), UINT32_C(2441660085), UINT32_C(2663044269), UINT32_C(2729541361), UINT32_C(4192208558), UINT32_C(2403363666), UINT32_C(1924588092), UINT32_C(2137337579), UINT32_C(3976328781), } }, { { UINT32_C(4235133511), UINT32_C(1989015496), UINT32_C(3916459517), UINT32_C(3915961755) }, { UINT32_C( 821398915), UINT32_C(3016666977), UINT32_C(1732382490), UINT32_C(1297368069) }, { UINT32_C( 239715142), UINT32_C(3095779259), UINT32_C(3768710468), UINT32_C(1305021130) }, { UINT32_C(4235133511), UINT32_C( 821398915), UINT32_C( 239715142), UINT32_C(1989015496), UINT32_C(3016666977), UINT32_C(3095779259), UINT32_C(3916459517), UINT32_C(1732382490), UINT32_C(3768710468), UINT32_C(3915961755), UINT32_C(1297368069), UINT32_C(1305021130), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4x3_t r_ = { { simde_vld1q_u32(test_vec[i].r0), simde_vld1q_u32(test_vec[i].r1), simde_vld1q_u32(test_vec[i].r2), } }; uint32_t a_[12]; simde_vst3q_u32(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld3q_u32(a_); simde_test_arm_neon_assert_equal_u32x4(r_.val[0], simde_vld1q_u32(test_vec[i].r0)); simde_test_arm_neon_assert_equal_u32x4(r_.val[1], simde_vld1q_u32(test_vec[i].r1)); simde_test_arm_neon_assert_equal_u32x4(r_.val[2], simde_vld1q_u32(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (size_t i = 0 ; i < 8 ; i++) { simde_uint32x4_t a0 = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t a1 = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t a2 = simde_test_arm_neon_random_u32x4(); simde_uint32x4x3_t a = { { a0, a1, a2 } }; simde_test_arm_neon_write_u32x4(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); uint32_t buf[12]; simde_vst3q_u32(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])); j++) { simde_test_codegen_write_u32(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst3q_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t r0[2]; uint64_t r1[2]; uint64_t r2[2]; uint64_t a[6]; } test_vec[] = { { { UINT64_C(18060883008768479347), UINT64_C( 5018011187030834501) }, { UINT64_C(11625139375786132363), UINT64_C( 5059326699072687921) }, { UINT64_C( 2974855029542469350), UINT64_C( 5361348160817082507) }, { UINT64_C(18060883008768479347), UINT64_C(11625139375786132363), UINT64_C( 2974855029542469350), UINT64_C( 5018011187030834501), UINT64_C( 5059326699072687921), UINT64_C( 5361348160817082507), } }, { { UINT64_C( 9666145736659077849), UINT64_C( 4878956801423443611) }, { UINT64_C( 4131166095989137223), UINT64_C( 3824614212338648586) }, { UINT64_C( 8298128430671186765), UINT64_C( 5441640290016459493) }, { UINT64_C( 9666145736659077849), UINT64_C( 4131166095989137223), UINT64_C( 8298128430671186765), UINT64_C( 4878956801423443611), UINT64_C( 3824614212338648586), UINT64_C( 5441640290016459493), } }, { { UINT64_C(17968741216735860086), UINT64_C(14366999007002278096) }, { UINT64_C(16134869119939451808), UINT64_C(15734070097341369356) }, { UINT64_C( 7755862228120555706), UINT64_C(15660697304851506195) }, { UINT64_C(17968741216735860086), UINT64_C(16134869119939451808), UINT64_C( 7755862228120555706), UINT64_C(14366999007002278096), UINT64_C(15734070097341369356), UINT64_C(15660697304851506195), } }, { { UINT64_C( 6696073435993456404), UINT64_C( 7874199468129389832) }, { UINT64_C(13014481120863339927), UINT64_C( 4697420632340706333) }, { UINT64_C(14196257235655426390), UINT64_C(14801492728372883273) }, { UINT64_C( 6696073435993456404), UINT64_C(13014481120863339927), UINT64_C(14196257235655426390), UINT64_C( 7874199468129389832), UINT64_C( 4697420632340706333), UINT64_C(14801492728372883273), } }, { { UINT64_C( 6201908828263317436), UINT64_C( 4701157587375969231) }, { UINT64_C( 1288570213115661407), UINT64_C( 5070620563681907291) }, { UINT64_C(14304757290167917814), UINT64_C( 7940689536953233671) }, { UINT64_C( 6201908828263317436), UINT64_C( 1288570213115661407), UINT64_C(14304757290167917814), UINT64_C( 4701157587375969231), UINT64_C( 5070620563681907291), UINT64_C( 7940689536953233671), } }, { { UINT64_C( 7018133269242321547), UINT64_C(16090324530462093949) }, { UINT64_C(12279788565508779948), UINT64_C(13276838393395555779) }, { UINT64_C(12848893113834130153), UINT64_C( 4515709179427111651) }, { UINT64_C( 7018133269242321547), UINT64_C(12279788565508779948), UINT64_C(12848893113834130153), UINT64_C(16090324530462093949), UINT64_C(13276838393395555779), UINT64_C( 4515709179427111651), } }, { { UINT64_C( 1027750122758519029), UINT64_C(10835134783212806010) }, { UINT64_C(13678511626096306923), UINT64_C( 9793135194231376919) }, { UINT64_C(10467880760741186285), UINT64_C( 3640481109499192897) }, { UINT64_C( 1027750122758519029), UINT64_C(13678511626096306923), UINT64_C(10467880760741186285), UINT64_C(10835134783212806010), UINT64_C( 9793135194231376919), UINT64_C( 3640481109499192897), } }, { { UINT64_C(17913111364571958016), UINT64_C( 4885430699418526387) }, { UINT64_C(10447718153612684372), UINT64_C(14726776052189705702) }, { UINT64_C(10297550757555581711), UINT64_C( 7492758411626645857) }, { UINT64_C(17913111364571958016), UINT64_C(10447718153612684372), UINT64_C(10297550757555581711), UINT64_C( 4885430699418526387), UINT64_C(14726776052189705702), UINT64_C( 7492758411626645857), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2x3_t r_ = { { simde_vld1q_u64(test_vec[i].r0), simde_vld1q_u64(test_vec[i].r1), simde_vld1q_u64(test_vec[i].r2), } }; uint64_t a_[6]; simde_vst3q_u64(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld3q_u64(a_); simde_test_arm_neon_assert_equal_u64x2(r_.val[0], simde_vld1q_u64(test_vec[i].r0)); simde_test_arm_neon_assert_equal_u64x2(r_.val[1], simde_vld1q_u64(test_vec[i].r1)); simde_test_arm_neon_assert_equal_u64x2(r_.val[2], simde_vld1q_u64(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (size_t i = 0 ; i < 8 ; i++) { simde_uint64x2_t a0 = simde_test_arm_neon_random_u64x2(); simde_uint64x2_t a1 = simde_test_arm_neon_random_u64x2(); simde_uint64x2_t a2 = simde_test_arm_neon_random_u64x2(); simde_uint64x2x3_t a = { { a0, a1, a2 } }; simde_test_arm_neon_write_u64x2(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); uint64_t buf[6]; simde_vst3q_u64(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])); j++) { simde_test_codegen_write_u64(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_TEST_FUNC_LIST_BEGIN #if !defined(SIMDE_BUG_INTEL_857088) SIMDE_TEST_FUNC_LIST_ENTRY(vst3_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vst3_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vst3_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vst3_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vst3_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vst3_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vst3_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vst3_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vst3_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vst3_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vst3q_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vst3q_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vst3q_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vst3q_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vst3q_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vst3q_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vst3q_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vst3q_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vst3q_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vst3q_u64) #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/st4.c000066400000000000000000005566721400333146700162220ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN st4 #include "test-neon.h" #include "../../../simde/arm/neon/st4.h" #include "../../../simde/arm/neon/ld4.h" #include "../../../simde/arm/neon/get_lane.h" /* Implementor notes (seanptmaher): * * the way that I'm opting to test this is that I'll have a r[4][x] * array of vectors, as well as a flat buffer of results. The flat * buffer contains the expected result of a st4, done on arm hardware, * which will be compared against a st4 done at test time. * * To test the ld4, the stored st4 will be ld4'd, and then * individually compared against the r[4][x] vector. */ #if !defined(SIMDE_BUG_INTEL_857088) static int test_simde_vst4_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 r0[2]; simde_float32 r1[2]; simde_float32 r2[2]; simde_float32 r3[2]; simde_float32 a[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 77.38), SIMDE_FLOAT32_C( 677.34) }, { SIMDE_FLOAT32_C( -113.63), SIMDE_FLOAT32_C( -677.73) }, { SIMDE_FLOAT32_C( -489.72), SIMDE_FLOAT32_C( 841.42) }, { SIMDE_FLOAT32_C( -32.02), SIMDE_FLOAT32_C( 794.35) }, { SIMDE_FLOAT32_C( 77.38), SIMDE_FLOAT32_C( -113.63), SIMDE_FLOAT32_C( -489.72), SIMDE_FLOAT32_C( -32.02), SIMDE_FLOAT32_C( 677.34), SIMDE_FLOAT32_C( -677.73), SIMDE_FLOAT32_C( 841.42), SIMDE_FLOAT32_C( 794.35), } }, { { SIMDE_FLOAT32_C( -290.22), SIMDE_FLOAT32_C( -690.11) }, { SIMDE_FLOAT32_C( -998.58), SIMDE_FLOAT32_C( -130.70) }, { SIMDE_FLOAT32_C( -215.14), SIMDE_FLOAT32_C( 735.01) }, { SIMDE_FLOAT32_C( 247.54), SIMDE_FLOAT32_C( 37.32) }, { SIMDE_FLOAT32_C( -290.22), SIMDE_FLOAT32_C( -998.58), SIMDE_FLOAT32_C( -215.14), SIMDE_FLOAT32_C( 247.54), SIMDE_FLOAT32_C( -690.11), SIMDE_FLOAT32_C( -130.70), SIMDE_FLOAT32_C( 735.01), SIMDE_FLOAT32_C( 37.32), } }, { { SIMDE_FLOAT32_C( 469.24), SIMDE_FLOAT32_C( 265.50) }, { SIMDE_FLOAT32_C( 319.83), SIMDE_FLOAT32_C( -183.68) }, { SIMDE_FLOAT32_C( -305.25), SIMDE_FLOAT32_C( -951.48) }, { SIMDE_FLOAT32_C( 557.71), SIMDE_FLOAT32_C( -797.73) }, { SIMDE_FLOAT32_C( 469.24), SIMDE_FLOAT32_C( 319.83), SIMDE_FLOAT32_C( -305.25), SIMDE_FLOAT32_C( 557.71), SIMDE_FLOAT32_C( 265.50), SIMDE_FLOAT32_C( -183.68), SIMDE_FLOAT32_C( -951.48), SIMDE_FLOAT32_C( -797.73), } }, { { SIMDE_FLOAT32_C( 453.48), SIMDE_FLOAT32_C( -751.30) }, { SIMDE_FLOAT32_C( -461.55), SIMDE_FLOAT32_C( -927.67) }, { SIMDE_FLOAT32_C( -520.41), SIMDE_FLOAT32_C( 745.85) }, { SIMDE_FLOAT32_C( -100.92), SIMDE_FLOAT32_C( 556.97) }, { SIMDE_FLOAT32_C( 453.48), SIMDE_FLOAT32_C( -461.55), SIMDE_FLOAT32_C( -520.41), SIMDE_FLOAT32_C( -100.92), SIMDE_FLOAT32_C( -751.30), SIMDE_FLOAT32_C( -927.67), SIMDE_FLOAT32_C( 745.85), SIMDE_FLOAT32_C( 556.97), } }, { { SIMDE_FLOAT32_C( 423.19), SIMDE_FLOAT32_C( 785.44) }, { SIMDE_FLOAT32_C( 879.23), SIMDE_FLOAT32_C( 933.48) }, { SIMDE_FLOAT32_C( 626.86), SIMDE_FLOAT32_C( -152.79) }, { SIMDE_FLOAT32_C( 727.83), SIMDE_FLOAT32_C( -663.36) }, { SIMDE_FLOAT32_C( 423.19), SIMDE_FLOAT32_C( 879.23), SIMDE_FLOAT32_C( 626.86), SIMDE_FLOAT32_C( 727.83), SIMDE_FLOAT32_C( 785.44), SIMDE_FLOAT32_C( 933.48), SIMDE_FLOAT32_C( -152.79), SIMDE_FLOAT32_C( -663.36), } }, { { SIMDE_FLOAT32_C( 157.09), SIMDE_FLOAT32_C( 729.25) }, { SIMDE_FLOAT32_C( 205.94), SIMDE_FLOAT32_C( 941.95) }, { SIMDE_FLOAT32_C( 464.26), SIMDE_FLOAT32_C( -546.52) }, { SIMDE_FLOAT32_C( -20.73), SIMDE_FLOAT32_C( -66.51) }, { SIMDE_FLOAT32_C( 157.09), SIMDE_FLOAT32_C( 205.94), SIMDE_FLOAT32_C( 464.26), SIMDE_FLOAT32_C( -20.73), SIMDE_FLOAT32_C( 729.25), SIMDE_FLOAT32_C( 941.95), SIMDE_FLOAT32_C( -546.52), SIMDE_FLOAT32_C( -66.51), } }, { { SIMDE_FLOAT32_C( 718.98), SIMDE_FLOAT32_C( -700.91) }, { SIMDE_FLOAT32_C( 749.81), SIMDE_FLOAT32_C( -586.28) }, { SIMDE_FLOAT32_C( -652.38), SIMDE_FLOAT32_C( 307.52) }, { SIMDE_FLOAT32_C( -384.00), SIMDE_FLOAT32_C( 801.10) }, { SIMDE_FLOAT32_C( 718.98), SIMDE_FLOAT32_C( 749.81), SIMDE_FLOAT32_C( -652.38), SIMDE_FLOAT32_C( -384.00), SIMDE_FLOAT32_C( -700.91), SIMDE_FLOAT32_C( -586.28), SIMDE_FLOAT32_C( 307.52), SIMDE_FLOAT32_C( 801.10), } }, { { SIMDE_FLOAT32_C( 556.22), SIMDE_FLOAT32_C( 154.44) }, { SIMDE_FLOAT32_C( 873.42), SIMDE_FLOAT32_C( -964.19) }, { SIMDE_FLOAT32_C( -99.70), SIMDE_FLOAT32_C( -227.50) }, { SIMDE_FLOAT32_C( 592.78), SIMDE_FLOAT32_C( -676.51) }, { SIMDE_FLOAT32_C( 556.22), SIMDE_FLOAT32_C( 873.42), SIMDE_FLOAT32_C( -99.70), SIMDE_FLOAT32_C( 592.78), SIMDE_FLOAT32_C( 154.44), SIMDE_FLOAT32_C( -964.19), SIMDE_FLOAT32_C( -227.50), SIMDE_FLOAT32_C( -676.51), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2x4_t r_ = { { simde_vld1_f32(test_vec[i].r0), simde_vld1_f32(test_vec[i].r1), simde_vld1_f32(test_vec[i].r2), simde_vld1_f32(test_vec[i].r3), } }; simde_float32 a_[8]; simde_vst4_f32(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld4_f32(a_); simde_test_arm_neon_assert_equal_f32x2(r_.val[0], simde_vld1_f32(test_vec[i].r0), 1); simde_test_arm_neon_assert_equal_f32x2(r_.val[1], simde_vld1_f32(test_vec[i].r1), 1); simde_test_arm_neon_assert_equal_f32x2(r_.val[2], simde_vld1_f32(test_vec[i].r2), 1); simde_test_arm_neon_assert_equal_f32x2(r_.val[3], simde_vld1_f32(test_vec[i].r3), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x2_t a0 = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t a1 = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t a2 = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2_t a3 = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); simde_float32x2x4_t a = { { a0, a1, a2, a3 } }; simde_test_arm_neon_write_f32x2(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x2(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x2(2, a3, SIMDE_TEST_VEC_POS_MIDDLE); simde_float32_t buf[8]; simde_vst4_f32(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_f32(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst4_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 r0[1]; simde_float64 r1[1]; simde_float64 r2[1]; simde_float64 r3[1]; simde_float64 a[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( 624.60) }, { SIMDE_FLOAT64_C( 658.33) }, { SIMDE_FLOAT64_C( 990.90) }, { SIMDE_FLOAT64_C( -345.25) }, { SIMDE_FLOAT64_C( 624.60), SIMDE_FLOAT64_C( 658.33), SIMDE_FLOAT64_C( 990.90), SIMDE_FLOAT64_C( -345.25), } }, { { SIMDE_FLOAT64_C( 563.48) }, { SIMDE_FLOAT64_C( 508.25) }, { SIMDE_FLOAT64_C( 888.63) }, { SIMDE_FLOAT64_C( -575.04) }, { SIMDE_FLOAT64_C( 563.48), SIMDE_FLOAT64_C( 508.25), SIMDE_FLOAT64_C( 888.63), SIMDE_FLOAT64_C( -575.04), } }, { { SIMDE_FLOAT64_C( -646.15) }, { SIMDE_FLOAT64_C( 389.76) }, { SIMDE_FLOAT64_C( 742.88) }, { SIMDE_FLOAT64_C( -788.98) }, { SIMDE_FLOAT64_C( -646.15), SIMDE_FLOAT64_C( 389.76), SIMDE_FLOAT64_C( 742.88), SIMDE_FLOAT64_C( -788.98), } }, { { SIMDE_FLOAT64_C( 856.06) }, { SIMDE_FLOAT64_C( 722.96) }, { SIMDE_FLOAT64_C( -525.80) }, { SIMDE_FLOAT64_C( -581.83) }, { SIMDE_FLOAT64_C( 856.06), SIMDE_FLOAT64_C( 722.96), SIMDE_FLOAT64_C( -525.80), SIMDE_FLOAT64_C( -581.83), } }, { { SIMDE_FLOAT64_C( 936.61) }, { SIMDE_FLOAT64_C( -287.02) }, { SIMDE_FLOAT64_C( 260.93) }, { SIMDE_FLOAT64_C( 698.72) }, { SIMDE_FLOAT64_C( 936.61), SIMDE_FLOAT64_C( -287.02), SIMDE_FLOAT64_C( 260.93), SIMDE_FLOAT64_C( 698.72), } }, { { SIMDE_FLOAT64_C( -737.87) }, { SIMDE_FLOAT64_C( 387.05) }, { SIMDE_FLOAT64_C( 511.74) }, { SIMDE_FLOAT64_C( -545.62) }, { SIMDE_FLOAT64_C( -737.87), SIMDE_FLOAT64_C( 387.05), SIMDE_FLOAT64_C( 511.74), SIMDE_FLOAT64_C( -545.62), } }, { { SIMDE_FLOAT64_C( -116.76) }, { SIMDE_FLOAT64_C( 449.01) }, { SIMDE_FLOAT64_C( 535.14) }, { SIMDE_FLOAT64_C( 592.89) }, { SIMDE_FLOAT64_C( -116.76), SIMDE_FLOAT64_C( 449.01), SIMDE_FLOAT64_C( 535.14), SIMDE_FLOAT64_C( 592.89), } }, { { SIMDE_FLOAT64_C( -783.83) }, { SIMDE_FLOAT64_C( 958.98) }, { SIMDE_FLOAT64_C( -896.71) }, { SIMDE_FLOAT64_C( 840.77) }, { SIMDE_FLOAT64_C( -783.83), SIMDE_FLOAT64_C( 958.98), SIMDE_FLOAT64_C( -896.71), SIMDE_FLOAT64_C( 840.77), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1x4_t r_ = { { simde_vld1_f64(test_vec[i].r0), simde_vld1_f64(test_vec[i].r1), simde_vld1_f64(test_vec[i].r2), simde_vld1_f64(test_vec[i].r3), } }; simde_float64_t a_[4]; simde_vst4_f64(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld4_f64(a_); simde_test_arm_neon_assert_equal_f64x1(r_.val[0], simde_vld1_f64(test_vec[i].r0), 1); simde_test_arm_neon_assert_equal_f64x1(r_.val[1], simde_vld1_f64(test_vec[i].r1), 1); simde_test_arm_neon_assert_equal_f64x1(r_.val[2], simde_vld1_f64(test_vec[i].r2), 1); simde_test_arm_neon_assert_equal_f64x1(r_.val[3], simde_vld1_f64(test_vec[i].r3), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x1_t a0 = simde_test_arm_neon_random_f64x1(-1000.0f, 1000.0f); simde_float64x1_t a1 = simde_test_arm_neon_random_f64x1(-1000.0f, 1000.0f); simde_float64x1_t a2 = simde_test_arm_neon_random_f64x1(-1000.0f, 1000.0f); simde_float64x1_t a3 = simde_test_arm_neon_random_f64x1(-1000.0f, 1000.0f); simde_float64x1x4_t a = { { a0, a1, a2, a3 } }; simde_test_arm_neon_write_f64x1(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x1(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x1(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x1(2, a3, SIMDE_TEST_VEC_POS_MIDDLE); simde_float64_t buf[4]; simde_vst4_f64(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_f64(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst4_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t r0[8]; int8_t r1[8]; int8_t r2[8]; int8_t r3[8]; int8_t a[32]; } test_vec[] = { { { -INT8_C( 32), -INT8_C( 94), -INT8_C( 106), INT8_C( 115), -INT8_C( 56), INT8_C( 118), -INT8_C( 83), INT8_C( 12) }, { -INT8_C( 18), -INT8_C( 2), -INT8_C( 8), INT8_C( 95), -INT8_C( 117), INT8_C( 56), INT8_C( 92), -INT8_C( 94) }, { -INT8_C( 45), -INT8_C( 37), -INT8_C( 110), -INT8_C( 10), INT8_MAX, INT8_C( 100), -INT8_C( 25), INT8_C( 115) }, { -INT8_C( 6), INT8_C( 60), -INT8_C( 25), INT8_C( 92), -INT8_C( 89), -INT8_C( 118), -INT8_C( 50), -INT8_C( 120) }, { -INT8_C( 32), -INT8_C( 18), -INT8_C( 45), -INT8_C( 6), -INT8_C( 94), -INT8_C( 2), -INT8_C( 37), INT8_C( 60), -INT8_C( 106), -INT8_C( 8), -INT8_C( 110), -INT8_C( 25), INT8_C( 115), INT8_C( 95), -INT8_C( 10), INT8_C( 92), -INT8_C( 56), -INT8_C( 117), INT8_MAX, -INT8_C( 89), INT8_C( 118), INT8_C( 56), INT8_C( 100), -INT8_C( 118), -INT8_C( 83), INT8_C( 92), -INT8_C( 25), -INT8_C( 50), INT8_C( 12), -INT8_C( 94), INT8_C( 115), -INT8_C( 120), } }, { { INT8_C( 44), INT8_C( 100), -INT8_C( 5), -INT8_C( 11), -INT8_C( 38), -INT8_C( 87), INT8_C( 1), -INT8_C( 55) }, { -INT8_C( 89), -INT8_C( 7), INT8_C( 40), INT8_C( 50), INT8_C( 49), -INT8_C( 123), -INT8_C( 44), INT8_C( 5) }, { INT8_C( 96), INT8_C( 102), -INT8_C( 5), -INT8_C( 33), -INT8_C( 53), -INT8_C( 30), INT8_C( 82), -INT8_C( 59) }, { INT8_C( 30), INT8_C( 57), INT8_C( 34), -INT8_C( 58), -INT8_C( 61), -INT8_C( 16), INT8_C( 78), -INT8_C( 16) }, { INT8_C( 44), -INT8_C( 89), INT8_C( 96), INT8_C( 30), INT8_C( 100), -INT8_C( 7), INT8_C( 102), INT8_C( 57), -INT8_C( 5), INT8_C( 40), -INT8_C( 5), INT8_C( 34), -INT8_C( 11), INT8_C( 50), -INT8_C( 33), -INT8_C( 58), -INT8_C( 38), INT8_C( 49), -INT8_C( 53), -INT8_C( 61), -INT8_C( 87), -INT8_C( 123), -INT8_C( 30), -INT8_C( 16), INT8_C( 1), -INT8_C( 44), INT8_C( 82), INT8_C( 78), -INT8_C( 55), INT8_C( 5), -INT8_C( 59), -INT8_C( 16), } }, { { INT8_C( 84), INT8_C( 73), -INT8_C( 27), INT8_C( 46), -INT8_C( 14), -INT8_C( 26), -INT8_C( 9), -INT8_C( 103) }, { -INT8_C( 33), INT8_C( 32), -INT8_C( 52), INT8_C( 17), -INT8_C( 91), -INT8_C( 96), INT8_C( 22), INT8_C( 5) }, { INT8_C( 7), INT8_C( 17), -INT8_C( 28), -INT8_C( 46), -INT8_C( 13), INT8_C( 55), -INT8_C( 105), INT8_C( 17) }, { INT8_C( 112), -INT8_C( 71), -INT8_C( 41), INT8_C( 52), -INT8_C( 87), INT8_C( 37), INT8_C( 36), -INT8_C( 3) }, { INT8_C( 84), -INT8_C( 33), INT8_C( 7), INT8_C( 112), INT8_C( 73), INT8_C( 32), INT8_C( 17), -INT8_C( 71), -INT8_C( 27), -INT8_C( 52), -INT8_C( 28), -INT8_C( 41), INT8_C( 46), INT8_C( 17), -INT8_C( 46), INT8_C( 52), -INT8_C( 14), -INT8_C( 91), -INT8_C( 13), -INT8_C( 87), -INT8_C( 26), -INT8_C( 96), INT8_C( 55), INT8_C( 37), -INT8_C( 9), INT8_C( 22), -INT8_C( 105), INT8_C( 36), -INT8_C( 103), INT8_C( 5), INT8_C( 17), -INT8_C( 3), } }, { { INT8_C( 111), INT8_C( 9), INT8_C( 44), INT8_C( 97), -INT8_C( 17), INT8_C( 35), -INT8_C( 5), -INT8_C( 50) }, { INT8_C( 67), -INT8_C( 57), -INT8_C( 33), -INT8_C( 24), INT8_C( 103), -INT8_C( 11), -INT8_C( 19), INT8_C( 110) }, { INT8_C( 6), -INT8_C( 46), INT8_C( 64), -INT8_C( 7), INT8_C( 9), -INT8_C( 40), INT8_C( 11), INT8_C( 121) }, { -INT8_C( 111), -INT8_C( 30), -INT8_C( 83), INT8_C( 59), INT8_C( 8), -INT8_C( 47), INT8_C( 56), INT8_C( 119) }, { INT8_C( 111), INT8_C( 67), INT8_C( 6), -INT8_C( 111), INT8_C( 9), -INT8_C( 57), -INT8_C( 46), -INT8_C( 30), INT8_C( 44), -INT8_C( 33), INT8_C( 64), -INT8_C( 83), INT8_C( 97), -INT8_C( 24), -INT8_C( 7), INT8_C( 59), -INT8_C( 17), INT8_C( 103), INT8_C( 9), INT8_C( 8), INT8_C( 35), -INT8_C( 11), -INT8_C( 40), -INT8_C( 47), -INT8_C( 5), -INT8_C( 19), INT8_C( 11), INT8_C( 56), -INT8_C( 50), INT8_C( 110), INT8_C( 121), INT8_C( 119), } }, { { -INT8_C( 38), INT8_C( 100), -INT8_C( 40), -INT8_C( 55), -INT8_C( 120), -INT8_C( 45), -INT8_C( 104), -INT8_C( 53) }, { -INT8_C( 102), INT8_C( 119), -INT8_C( 76), INT8_C( 2), INT8_C( 109), -INT8_C( 95), INT8_C( 112), INT8_C( 115) }, { INT8_C( 115), -INT8_C( 79), INT8_C( 109), INT8_C( 124), -INT8_C( 119), INT8_C( 120), -INT8_C( 10), INT8_C( 26) }, { INT8_C( 90), -INT8_C( 93), INT8_C( 85), INT8_C( 98), INT8_C( 117), -INT8_C( 114), -INT8_C( 39), INT8_C( 79) }, { -INT8_C( 38), -INT8_C( 102), INT8_C( 115), INT8_C( 90), INT8_C( 100), INT8_C( 119), -INT8_C( 79), -INT8_C( 93), -INT8_C( 40), -INT8_C( 76), INT8_C( 109), INT8_C( 85), -INT8_C( 55), INT8_C( 2), INT8_C( 124), INT8_C( 98), -INT8_C( 120), INT8_C( 109), -INT8_C( 119), INT8_C( 117), -INT8_C( 45), -INT8_C( 95), INT8_C( 120), -INT8_C( 114), -INT8_C( 104), INT8_C( 112), -INT8_C( 10), -INT8_C( 39), -INT8_C( 53), INT8_C( 115), INT8_C( 26), INT8_C( 79), } }, { { -INT8_C( 14), -INT8_C( 78), INT8_C( 25), INT8_C( 122), -INT8_C( 123), -INT8_C( 79), INT8_C( 70), INT8_C( 32) }, { INT8_C( 40), -INT8_C( 6), INT8_C( 34), -INT8_C( 107), -INT8_C( 101), -INT8_C( 110), INT8_C( 9), INT8_C( 15) }, { INT8_C( 67), INT8_C( 118), -INT8_C( 117), -INT8_C( 52), -INT8_C( 18), -INT8_C( 127), -INT8_C( 25), INT8_C( 72) }, { INT8_C( 37), INT8_C( 60), -INT8_C( 85), -INT8_C( 102), -INT8_C( 54), -INT8_C( 124), -INT8_C( 23), -INT8_C( 67) }, { -INT8_C( 14), INT8_C( 40), INT8_C( 67), INT8_C( 37), -INT8_C( 78), -INT8_C( 6), INT8_C( 118), INT8_C( 60), INT8_C( 25), INT8_C( 34), -INT8_C( 117), -INT8_C( 85), INT8_C( 122), -INT8_C( 107), -INT8_C( 52), -INT8_C( 102), -INT8_C( 123), -INT8_C( 101), -INT8_C( 18), -INT8_C( 54), -INT8_C( 79), -INT8_C( 110), -INT8_C( 127), -INT8_C( 124), INT8_C( 70), INT8_C( 9), -INT8_C( 25), -INT8_C( 23), INT8_C( 32), INT8_C( 15), INT8_C( 72), -INT8_C( 67), } }, { { INT8_C( 54), INT8_C( 2), INT8_C( 55), -INT8_C( 68), -INT8_C( 77), INT8_C( 125), -INT8_C( 36), -INT8_C( 36) }, { INT8_C( 119), -INT8_C( 2), INT8_C( 113), INT8_C( 19), -INT8_C( 112), INT8_C( 122), INT8_C( 34), -INT8_C( 44) }, { -INT8_C( 16), -INT8_C( 83), -INT8_C( 96), -INT8_C( 34), INT8_C( 47), -INT8_C( 121), INT8_C( 39), INT8_C( 84) }, { -INT8_C( 60), -INT8_C( 46), -INT8_C( 18), -INT8_C( 114), INT8_C( 86), -INT8_C( 41), INT8_C( 75), -INT8_C( 115) }, { INT8_C( 54), INT8_C( 119), -INT8_C( 16), -INT8_C( 60), INT8_C( 2), -INT8_C( 2), -INT8_C( 83), -INT8_C( 46), INT8_C( 55), INT8_C( 113), -INT8_C( 96), -INT8_C( 18), -INT8_C( 68), INT8_C( 19), -INT8_C( 34), -INT8_C( 114), -INT8_C( 77), -INT8_C( 112), INT8_C( 47), INT8_C( 86), INT8_C( 125), INT8_C( 122), -INT8_C( 121), -INT8_C( 41), -INT8_C( 36), INT8_C( 34), INT8_C( 39), INT8_C( 75), -INT8_C( 36), -INT8_C( 44), INT8_C( 84), -INT8_C( 115), } }, { { -INT8_C( 38), -INT8_C( 125), INT8_C( 73), -INT8_C( 115), INT8_C( 0), INT8_C( 37), INT8_C( 105), INT8_C( 120) }, { INT8_C( 35), -INT8_C( 37), -INT8_C( 117), -INT8_C( 77), INT8_C( 85), -INT8_C( 83), -INT8_C( 121), INT8_C( 70) }, { INT8_C( 90), INT8_C( 40), INT8_C( 36), -INT8_C( 119), -INT8_C( 81), INT8_C( 75), -INT8_C( 35), INT8_C( 115) }, { INT8_C( 29), -INT8_C( 53), INT8_C( 2), INT8_C( 116), -INT8_C( 93), INT8_C( 77), INT8_C( 1), INT8_C( 125) }, { -INT8_C( 38), INT8_C( 35), INT8_C( 90), INT8_C( 29), -INT8_C( 125), -INT8_C( 37), INT8_C( 40), -INT8_C( 53), INT8_C( 73), -INT8_C( 117), INT8_C( 36), INT8_C( 2), -INT8_C( 115), -INT8_C( 77), -INT8_C( 119), INT8_C( 116), INT8_C( 0), INT8_C( 85), -INT8_C( 81), -INT8_C( 93), INT8_C( 37), -INT8_C( 83), INT8_C( 75), INT8_C( 77), INT8_C( 105), -INT8_C( 121), -INT8_C( 35), INT8_C( 1), INT8_C( 120), INT8_C( 70), INT8_C( 115), INT8_C( 125), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8x4_t r_ = { { simde_vld1_s8(test_vec[i].r0), simde_vld1_s8(test_vec[i].r1), simde_vld1_s8(test_vec[i].r2), simde_vld1_s8(test_vec[i].r3), } }; int8_t a_[32]; simde_vst4_s8(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(a_))); r_ = simde_vld4_s8(a_); simde_test_arm_neon_assert_equal_i8x8(r_.val[0], simde_vld1_s8(test_vec[i].r0)); simde_test_arm_neon_assert_equal_i8x8(r_.val[1], simde_vld1_s8(test_vec[i].r1)); simde_test_arm_neon_assert_equal_i8x8(r_.val[2], simde_vld1_s8(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a0 = simde_test_arm_neon_random_i8x8(); simde_int8x8_t a1 = simde_test_arm_neon_random_i8x8(); simde_int8x8_t a2 = simde_test_arm_neon_random_i8x8(); simde_int8x8_t a3 = simde_test_arm_neon_random_i8x8(); simde_int8x8x4_t a = { { a0, a1, a2, a3 } }; simde_test_arm_neon_write_i8x8(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, a3, SIMDE_TEST_VEC_POS_MIDDLE); int8_t buf[32]; simde_vst4_s8(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_i8(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst4_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t r0[4]; int16_t r1[4]; int16_t r2[4]; int16_t r3[4]; int16_t a[16]; } test_vec[] = { { { INT16_C( 5398), -INT16_C( 25729), INT16_C( 15178), INT16_C( 458) }, { INT16_C( 10086), INT16_C( 28789), -INT16_C( 32315), -INT16_C( 3249) }, { INT16_C( 31188), INT16_C( 24333), INT16_C( 18929), -INT16_C( 14162) }, { -INT16_C( 4559), INT16_C( 31053), INT16_C( 11632), -INT16_C( 30981) }, { INT16_C( 5398), INT16_C( 10086), INT16_C( 31188), -INT16_C( 4559), -INT16_C( 25729), INT16_C( 28789), INT16_C( 24333), INT16_C( 31053), INT16_C( 15178), -INT16_C( 32315), INT16_C( 18929), INT16_C( 11632), INT16_C( 458), -INT16_C( 3249), -INT16_C( 14162), -INT16_C( 30981), } }, { { INT16_C( 31298), -INT16_C( 29663), -INT16_C( 5194), INT16_C( 7309) }, { INT16_C( 786), -INT16_C( 10356), -INT16_C( 9340), INT16_C( 22730) }, { -INT16_C( 10156), INT16_C( 17848), INT16_C( 26145), INT16_C( 21005) }, { INT16_C( 23381), -INT16_C( 14900), -INT16_C( 14456), -INT16_C( 13493) }, { INT16_C( 31298), INT16_C( 786), -INT16_C( 10156), INT16_C( 23381), -INT16_C( 29663), -INT16_C( 10356), INT16_C( 17848), -INT16_C( 14900), -INT16_C( 5194), -INT16_C( 9340), INT16_C( 26145), -INT16_C( 14456), INT16_C( 7309), INT16_C( 22730), INT16_C( 21005), -INT16_C( 13493), } }, { { INT16_C( 27713), -INT16_C( 2217), -INT16_C( 6824), INT16_C( 27155) }, { -INT16_C( 24600), INT16_C( 27714), INT16_C( 3195), -INT16_C( 12347) }, { INT16_C( 32228), INT16_C( 1301), INT16_C( 8931), INT16_C( 14424) }, { INT16_C( 9341), INT16_C( 1789), INT16_C( 18667), INT16_C( 11473) }, { INT16_C( 27713), -INT16_C( 24600), INT16_C( 32228), INT16_C( 9341), -INT16_C( 2217), INT16_C( 27714), INT16_C( 1301), INT16_C( 1789), -INT16_C( 6824), INT16_C( 3195), INT16_C( 8931), INT16_C( 18667), INT16_C( 27155), -INT16_C( 12347), INT16_C( 14424), INT16_C( 11473), } }, { { INT16_C( 10421), INT16_C( 3364), INT16_C( 14093), -INT16_C( 2697) }, { -INT16_C( 17961), INT16_C( 21090), INT16_C( 10182), -INT16_C( 21983) }, { INT16_C( 13988), -INT16_C( 30800), INT16_C( 2137), -INT16_C( 10560) }, { -INT16_C( 17108), INT16_C( 6108), -INT16_C( 21242), -INT16_C( 17597) }, { INT16_C( 10421), -INT16_C( 17961), INT16_C( 13988), -INT16_C( 17108), INT16_C( 3364), INT16_C( 21090), -INT16_C( 30800), INT16_C( 6108), INT16_C( 14093), INT16_C( 10182), INT16_C( 2137), -INT16_C( 21242), -INT16_C( 2697), -INT16_C( 21983), -INT16_C( 10560), -INT16_C( 17597), } }, { { INT16_C( 26582), -INT16_C( 7224), INT16_C( 16287), INT16_C( 30425) }, { INT16_C( 15353), -INT16_C( 16440), -INT16_C( 5790), INT16_C( 1641) }, { INT16_C( 6432), INT16_C( 31117), INT16_C( 19745), INT16_C( 19791) }, { INT16_C( 11275), INT16_C( 4452), -INT16_C( 22311), -INT16_C( 20532) }, { INT16_C( 26582), INT16_C( 15353), INT16_C( 6432), INT16_C( 11275), -INT16_C( 7224), -INT16_C( 16440), INT16_C( 31117), INT16_C( 4452), INT16_C( 16287), -INT16_C( 5790), INT16_C( 19745), -INT16_C( 22311), INT16_C( 30425), INT16_C( 1641), INT16_C( 19791), -INT16_C( 20532), } }, { { -INT16_C( 27633), -INT16_C( 20845), INT16_C( 27859), -INT16_C( 13276) }, { -INT16_C( 4953), INT16_C( 2443), -INT16_C( 2602), -INT16_C( 2545) }, { -INT16_C( 25586), INT16_C( 12399), -INT16_C( 16662), -INT16_C( 2691) }, { -INT16_C( 7446), -INT16_C( 15354), -INT16_C( 11638), -INT16_C( 26253) }, { -INT16_C( 27633), -INT16_C( 4953), -INT16_C( 25586), -INT16_C( 7446), -INT16_C( 20845), INT16_C( 2443), INT16_C( 12399), -INT16_C( 15354), INT16_C( 27859), -INT16_C( 2602), -INT16_C( 16662), -INT16_C( 11638), -INT16_C( 13276), -INT16_C( 2545), -INT16_C( 2691), -INT16_C( 26253), } }, { { INT16_C( 1638), INT16_C( 14664), INT16_C( 27762), INT16_C( 6406) }, { -INT16_C( 28327), INT16_C( 12066), INT16_C( 12678), -INT16_C( 27355) }, { -INT16_C( 27442), -INT16_C( 18235), INT16_C( 16978), INT16_C( 15789) }, { -INT16_C( 19676), -INT16_C( 20991), INT16_C( 29829), -INT16_C( 5304) }, { INT16_C( 1638), -INT16_C( 28327), -INT16_C( 27442), -INT16_C( 19676), INT16_C( 14664), INT16_C( 12066), -INT16_C( 18235), -INT16_C( 20991), INT16_C( 27762), INT16_C( 12678), INT16_C( 16978), INT16_C( 29829), INT16_C( 6406), -INT16_C( 27355), INT16_C( 15789), -INT16_C( 5304), } }, { { -INT16_C( 28549), -INT16_C( 4828), INT16_C( 11004), INT16_C( 21767) }, { INT16_C( 10684), INT16_C( 17028), -INT16_C( 22181), INT16_C( 10711) }, { -INT16_C( 25539), -INT16_C( 28447), -INT16_C( 28961), INT16_C( 973) }, { -INT16_C( 12735), -INT16_C( 14670), -INT16_C( 1470), -INT16_C( 16975) }, { -INT16_C( 28549), INT16_C( 10684), -INT16_C( 25539), -INT16_C( 12735), -INT16_C( 4828), INT16_C( 17028), -INT16_C( 28447), -INT16_C( 14670), INT16_C( 11004), -INT16_C( 22181), -INT16_C( 28961), -INT16_C( 1470), INT16_C( 21767), INT16_C( 10711), INT16_C( 973), -INT16_C( 16975), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4x4_t r_ = { { simde_vld1_s16(test_vec[i].r0), simde_vld1_s16(test_vec[i].r1), simde_vld1_s16(test_vec[i].r2), simde_vld1_s16(test_vec[i].r3), } }; int16_t a_[16]; simde_vst4_s16(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld4_s16(a_); simde_test_arm_neon_assert_equal_i16x4(r_.val[0], simde_vld1_s16(test_vec[i].r0)); simde_test_arm_neon_assert_equal_i16x4(r_.val[1], simde_vld1_s16(test_vec[i].r1)); simde_test_arm_neon_assert_equal_i16x4(r_.val[2], simde_vld1_s16(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a0 = simde_test_arm_neon_random_i16x4(); simde_int16x4_t a1 = simde_test_arm_neon_random_i16x4(); simde_int16x4_t a2 = simde_test_arm_neon_random_i16x4(); simde_int16x4_t a3 = simde_test_arm_neon_random_i16x4(); simde_int16x4x4_t a = { { a0, a1, a2, a3 } }; simde_test_arm_neon_write_i16x4(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, a3, SIMDE_TEST_VEC_POS_MIDDLE); int16_t buf[16]; simde_vst4_s16(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_i16(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst4_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t r0[2]; int32_t r1[2]; int32_t r2[2]; int32_t r3[2]; int32_t a[8]; } test_vec[] = { { { -INT32_C( 1421389733), INT32_C( 1091510019) }, { -INT32_C( 855567543), INT32_C( 1758550749) }, { -INT32_C( 548252799), INT32_C( 1558828652) }, { -INT32_C( 1985916028), -INT32_C( 288439918) }, { -INT32_C( 1421389733), -INT32_C( 855567543), -INT32_C( 548252799), -INT32_C( 1985916028), INT32_C( 1091510019), INT32_C( 1758550749), INT32_C( 1558828652), -INT32_C( 288439918), } }, { { INT32_C( 362354193), INT32_C( 2119608373) }, { -INT32_C( 1723049796), INT32_C( 872488370) }, { -INT32_C( 585936016), -INT32_C( 1371931606) }, { -INT32_C( 499655856), -INT32_C( 1378876004) }, { INT32_C( 362354193), -INT32_C( 1723049796), -INT32_C( 585936016), -INT32_C( 499655856), INT32_C( 2119608373), INT32_C( 872488370), -INT32_C( 1371931606), -INT32_C( 1378876004), } }, { { INT32_C( 1371695387), -INT32_C( 825288430) }, { INT32_C( 593959793), -INT32_C( 1453889479) }, { -INT32_C( 427398468), -INT32_C( 1214988185) }, { INT32_C( 932826011), -INT32_C( 320574768) }, { INT32_C( 1371695387), INT32_C( 593959793), -INT32_C( 427398468), INT32_C( 932826011), -INT32_C( 825288430), -INT32_C( 1453889479), -INT32_C( 1214988185), -INT32_C( 320574768), } }, { { -INT32_C( 448944173), INT32_C( 833817792) }, { INT32_C( 1632901672), INT32_C( 1057664131) }, { INT32_C( 2099613974), -INT32_C( 332089007) }, { INT32_C( 1411632772), INT32_C( 188745528) }, { -INT32_C( 448944173), INT32_C( 1632901672), INT32_C( 2099613974), INT32_C( 1411632772), INT32_C( 833817792), INT32_C( 1057664131), -INT32_C( 332089007), INT32_C( 188745528), } }, { { INT32_C( 1861320110), -INT32_C( 1298160502) }, { INT32_C( 1108604095), -INT32_C( 1233052256) }, { -INT32_C( 13326674), -INT32_C( 471111585) }, { INT32_C( 1849101878), -INT32_C( 998606826) }, { INT32_C( 1861320110), INT32_C( 1108604095), -INT32_C( 13326674), INT32_C( 1849101878), -INT32_C( 1298160502), -INT32_C( 1233052256), -INT32_C( 471111585), -INT32_C( 998606826), } }, { { INT32_C( 2134076405), -INT32_C( 835595761) }, { INT32_C( 1712342214), INT32_C( 270373218) }, { -INT32_C( 1777315529), -INT32_C( 260441159) }, { INT32_C( 543076618), INT32_C( 518314025) }, { INT32_C( 2134076405), INT32_C( 1712342214), -INT32_C( 1777315529), INT32_C( 543076618), -INT32_C( 835595761), INT32_C( 270373218), -INT32_C( 260441159), INT32_C( 518314025), } }, { { INT32_C( 1402869571), -INT32_C( 1339961366) }, { INT32_C( 1981231636), -INT32_C( 75090749) }, { INT32_C( 1049728645), -INT32_C( 1674703982) }, { -INT32_C( 440627780), -INT32_C( 1459380123) }, { INT32_C( 1402869571), INT32_C( 1981231636), INT32_C( 1049728645), -INT32_C( 440627780), -INT32_C( 1339961366), -INT32_C( 75090749), -INT32_C( 1674703982), -INT32_C( 1459380123), } }, { { -INT32_C( 1560501832), -INT32_C( 2058216079) }, { INT32_C( 335243599), INT32_C( 571376029) }, { -INT32_C( 1436442856), INT32_C( 1715900330) }, { -INT32_C( 2109013476), INT32_C( 1512787874) }, { -INT32_C( 1560501832), INT32_C( 335243599), -INT32_C( 1436442856), -INT32_C( 2109013476), -INT32_C( 2058216079), INT32_C( 571376029), INT32_C( 1715900330), INT32_C( 1512787874), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2x4_t r_ = { { simde_vld1_s32(test_vec[i].r0), simde_vld1_s32(test_vec[i].r1), simde_vld1_s32(test_vec[i].r2), simde_vld1_s32(test_vec[i].r3), } }; int32_t a_[8]; simde_vst4_s32(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld4_s32(a_); simde_test_arm_neon_assert_equal_i32x2(r_.val[0], simde_vld1_s32(test_vec[i].r0)); simde_test_arm_neon_assert_equal_i32x2(r_.val[1], simde_vld1_s32(test_vec[i].r1)); simde_test_arm_neon_assert_equal_i32x2(r_.val[2], simde_vld1_s32(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a0 = simde_test_arm_neon_random_i32x2(); simde_int32x2_t a1 = simde_test_arm_neon_random_i32x2(); simde_int32x2_t a2 = simde_test_arm_neon_random_i32x2(); simde_int32x2_t a3 = simde_test_arm_neon_random_i32x2(); simde_int32x2x4_t a = { { a0, a1, a2, a3 } }; simde_test_arm_neon_write_i32x2(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, a3, SIMDE_TEST_VEC_POS_MIDDLE); int32_t buf[8]; simde_vst4_s32(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_i32(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst4_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t r0[1]; int64_t r1[1]; int64_t r2[1]; int64_t r3[1]; int64_t a[4]; } test_vec[] = { { { -INT64_C( 1447547888260701651) }, { -INT64_C( 3827526438922877263) }, { INT64_C( 524167343945907796) }, { -INT64_C( 7563624692450300060) }, { -INT64_C( 1447547888260701651), -INT64_C( 3827526438922877263), INT64_C( 524167343945907796), -INT64_C( 7563624692450300060), } }, { { -INT64_C( 2808569240011087299) }, { -INT64_C( 3116109957209416452) }, { INT64_C( 2183170197427815128) }, { INT64_C( 5016629588355083676) }, { -INT64_C( 2808569240011087299), -INT64_C( 3116109957209416452), INT64_C( 2183170197427815128), INT64_C( 5016629588355083676), } }, { { -INT64_C( 7166522641185002871) }, { INT64_C( 3915129315457696769) }, { INT64_C( 481824040889826519) }, { INT64_C( 9043980067198029365) }, { -INT64_C( 7166522641185002871), INT64_C( 3915129315457696769), INT64_C( 481824040889826519), INT64_C( 9043980067198029365), } }, { { INT64_C( 5157389337233449302) }, { INT64_C( 5931837169018086846) }, { -INT64_C( 3762032262956986790) }, { -INT64_C( 2858789043925624863) }, { INT64_C( 5157389337233449302), INT64_C( 5931837169018086846), -INT64_C( 3762032262956986790), -INT64_C( 2858789043925624863), } }, { { -INT64_C( 362152481212431765) }, { INT64_C( 4202536713573548678) }, { -INT64_C( 6154229596725299658) }, { -INT64_C( 7639562743766343777) }, { -INT64_C( 362152481212431765), INT64_C( 4202536713573548678), -INT64_C( 6154229596725299658), -INT64_C( 7639562743766343777), } }, { { -INT64_C( 5797751023027631788) }, { INT64_C( 4547793440195256322) }, { -INT64_C( 8958886834058028232) }, { INT64_C( 5909181196396735299) }, { -INT64_C( 5797751023027631788), INT64_C( 4547793440195256322), -INT64_C( 8958886834058028232), INT64_C( 5909181196396735299), } }, { { -INT64_C( 8412040716332414871) }, { INT64_C( 3266952460168524301) }, { INT64_C( 5192327137732747942) }, { -INT64_C( 4876072028891401467) }, { -INT64_C( 8412040716332414871), INT64_C( 3266952460168524301), INT64_C( 5192327137732747942), -INT64_C( 4876072028891401467), } }, { { -INT64_C( 5687041768591247361) }, { -INT64_C( 5502239805841833597) }, { -INT64_C( 5562425281119136896) }, { INT64_C( 4039457532579972095) }, { -INT64_C( 5687041768591247361), -INT64_C( 5502239805841833597), -INT64_C( 5562425281119136896), INT64_C( 4039457532579972095), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1x4_t r_ = { { simde_vld1_s64(test_vec[i].r0), simde_vld1_s64(test_vec[i].r1), simde_vld1_s64(test_vec[i].r2), simde_vld1_s64(test_vec[i].r3), } }; int64_t a_[4]; simde_vst4_s64(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld4_s64(a_); simde_test_arm_neon_assert_equal_i64x1(r_.val[0], simde_vld1_s64(test_vec[i].r0)); simde_test_arm_neon_assert_equal_i64x1(r_.val[1], simde_vld1_s64(test_vec[i].r1)); simde_test_arm_neon_assert_equal_i64x1(r_.val[2], simde_vld1_s64(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_t a0 = simde_test_arm_neon_random_i64x1(); simde_int64x1_t a1 = simde_test_arm_neon_random_i64x1(); simde_int64x1_t a2 = simde_test_arm_neon_random_i64x1(); simde_int64x1_t a3 = simde_test_arm_neon_random_i64x1(); simde_int64x1x4_t a = { { a0, a1, a2, a3 } }; simde_test_arm_neon_write_i64x1(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x1(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x1(2, a3, SIMDE_TEST_VEC_POS_MIDDLE); int64_t buf[4]; simde_vst4_s64(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_i64(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst4_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t r0[8]; uint8_t r1[8]; uint8_t r2[8]; uint8_t r3[8]; uint8_t a[32]; } test_vec[] = { { { UINT8_C(195), UINT8_C(195), UINT8_C( 72), UINT8_C(180), UINT8_C(152), UINT8_C(218), UINT8_C( 85), UINT8_C(252) }, { UINT8_C(167), UINT8_C( 84), UINT8_C(101), UINT8_C(151), UINT8_C( 48), UINT8_C(151), UINT8_C( 79), UINT8_C( 76) }, { UINT8_C(221), UINT8_C( 97), UINT8_C( 95), UINT8_C(196), UINT8_C( 26), UINT8_C( 93), UINT8_C(153), UINT8_C(174) }, { UINT8_C(240), UINT8_C( 96), UINT8_C( 57), UINT8_C( 51), UINT8_C(183), UINT8_C( 88), UINT8_C(175), UINT8_C(122) }, { UINT8_C(195), UINT8_C(167), UINT8_C(221), UINT8_C(240), UINT8_C(195), UINT8_C( 84), UINT8_C( 97), UINT8_C( 96), UINT8_C( 72), UINT8_C(101), UINT8_C( 95), UINT8_C( 57), UINT8_C(180), UINT8_C(151), UINT8_C(196), UINT8_C( 51), UINT8_C(152), UINT8_C( 48), UINT8_C( 26), UINT8_C(183), UINT8_C(218), UINT8_C(151), UINT8_C( 93), UINT8_C( 88), UINT8_C( 85), UINT8_C( 79), UINT8_C(153), UINT8_C(175), UINT8_C(252), UINT8_C( 76), UINT8_C(174), UINT8_C(122), } }, { { UINT8_C( 27), UINT8_C(247), UINT8_C( 47), UINT8_C(180), UINT8_C(209), UINT8_C(132), UINT8_C(176), UINT8_C(120) }, { UINT8_C(216), UINT8_C( 22), UINT8_C( 15), UINT8_C( 9), UINT8_C(173), UINT8_C( 95), UINT8_C( 85), UINT8_C(138) }, { UINT8_C(192), UINT8_C(180), UINT8_C( 79), UINT8_C(219), UINT8_C( 17), UINT8_C(232), UINT8_C(137), UINT8_C( 2) }, { UINT8_C( 73), UINT8_C(195), UINT8_C( 53), UINT8_C( 0), UINT8_C( 27), UINT8_C(228), UINT8_C(123), UINT8_C( 54) }, { UINT8_C( 27), UINT8_C(216), UINT8_C(192), UINT8_C( 73), UINT8_C(247), UINT8_C( 22), UINT8_C(180), UINT8_C(195), UINT8_C( 47), UINT8_C( 15), UINT8_C( 79), UINT8_C( 53), UINT8_C(180), UINT8_C( 9), UINT8_C(219), UINT8_C( 0), UINT8_C(209), UINT8_C(173), UINT8_C( 17), UINT8_C( 27), UINT8_C(132), UINT8_C( 95), UINT8_C(232), UINT8_C(228), UINT8_C(176), UINT8_C( 85), UINT8_C(137), UINT8_C(123), UINT8_C(120), UINT8_C(138), UINT8_C( 2), UINT8_C( 54), } }, { { UINT8_C(219), UINT8_C(170), UINT8_C(234), UINT8_C(172), UINT8_C( 46), UINT8_C(155), UINT8_C( 36), UINT8_C( 6) }, { UINT8_C(177), UINT8_C( 52), UINT8_C( 15), UINT8_C( 94), UINT8_C(147), UINT8_C(101), UINT8_C(233), UINT8_C( 83) }, { UINT8_C( 25), UINT8_C( 56), UINT8_C( 46), UINT8_C( 43), UINT8_C( 32), UINT8_C(184), UINT8_C( 45), UINT8_C(105) }, { UINT8_C(123), UINT8_C( 98), UINT8_C(106), UINT8_C(150), UINT8_C( 71), UINT8_C(229), UINT8_C(204), UINT8_C( 34) }, { UINT8_C(219), UINT8_C(177), UINT8_C( 25), UINT8_C(123), UINT8_C(170), UINT8_C( 52), UINT8_C( 56), UINT8_C( 98), UINT8_C(234), UINT8_C( 15), UINT8_C( 46), UINT8_C(106), UINT8_C(172), UINT8_C( 94), UINT8_C( 43), UINT8_C(150), UINT8_C( 46), UINT8_C(147), UINT8_C( 32), UINT8_C( 71), UINT8_C(155), UINT8_C(101), UINT8_C(184), UINT8_C(229), UINT8_C( 36), UINT8_C(233), UINT8_C( 45), UINT8_C(204), UINT8_C( 6), UINT8_C( 83), UINT8_C(105), UINT8_C( 34), } }, { { UINT8_C(143), UINT8_C(183), UINT8_C(207), UINT8_C(189), UINT8_C( 82), UINT8_C(243), UINT8_C(195), UINT8_C( 3) }, { UINT8_C( 39), UINT8_C(211), UINT8_C( 97), UINT8_C(186), UINT8_C( 56), UINT8_C( 74), UINT8_C( 14), UINT8_C( 81) }, { UINT8_C(130), UINT8_C( 60), UINT8_C(124), UINT8_C(163), UINT8_C(244), UINT8_C(169), UINT8_C( 12), UINT8_C(111) }, { UINT8_C( 12), UINT8_C(118), UINT8_C( 5), UINT8_C( 83), UINT8_C( 91), UINT8_C(210), UINT8_C(117), UINT8_C(234) }, { UINT8_C(143), UINT8_C( 39), UINT8_C(130), UINT8_C( 12), UINT8_C(183), UINT8_C(211), UINT8_C( 60), UINT8_C(118), UINT8_C(207), UINT8_C( 97), UINT8_C(124), UINT8_C( 5), UINT8_C(189), UINT8_C(186), UINT8_C(163), UINT8_C( 83), UINT8_C( 82), UINT8_C( 56), UINT8_C(244), UINT8_C( 91), UINT8_C(243), UINT8_C( 74), UINT8_C(169), UINT8_C(210), UINT8_C(195), UINT8_C( 14), UINT8_C( 12), UINT8_C(117), UINT8_C( 3), UINT8_C( 81), UINT8_C(111), UINT8_C(234), } }, { { UINT8_C(137), UINT8_C( 68), UINT8_C(167), UINT8_C(219), UINT8_C( 56), UINT8_C(107), UINT8_C(222), UINT8_C( 95) }, { UINT8_C( 62), UINT8_C( 63), UINT8_C( 26), UINT8_C(118), UINT8_C(138), UINT8_C( 40), UINT8_C(199), UINT8_C( 12) }, { UINT8_C(100), UINT8_C( 68), UINT8_C(175), UINT8_C( 89), UINT8_C(237), UINT8_C(188), UINT8_C(200), UINT8_C(249) }, { UINT8_C( 50), UINT8_C(206), UINT8_C( 76), UINT8_C(142), UINT8_C(160), UINT8_C(194), UINT8_C(120), UINT8_C( 41) }, { UINT8_C(137), UINT8_C( 62), UINT8_C(100), UINT8_C( 50), UINT8_C( 68), UINT8_C( 63), UINT8_C( 68), UINT8_C(206), UINT8_C(167), UINT8_C( 26), UINT8_C(175), UINT8_C( 76), UINT8_C(219), UINT8_C(118), UINT8_C( 89), UINT8_C(142), UINT8_C( 56), UINT8_C(138), UINT8_C(237), UINT8_C(160), UINT8_C(107), UINT8_C( 40), UINT8_C(188), UINT8_C(194), UINT8_C(222), UINT8_C(199), UINT8_C(200), UINT8_C(120), UINT8_C( 95), UINT8_C( 12), UINT8_C(249), UINT8_C( 41), } }, { { UINT8_C( 6), UINT8_C( 32), UINT8_C( 4), UINT8_C( 62), UINT8_C(139), UINT8_C(226), UINT8_C(158), UINT8_C(201) }, { UINT8_C( 33), UINT8_C(184), UINT8_C( 63), UINT8_C(171), UINT8_C(224), UINT8_C( 6), UINT8_C(184), UINT8_C( 68) }, { UINT8_C( 74), UINT8_C(103), UINT8_C(157), UINT8_C( 56), UINT8_C( 35), UINT8_C(102), UINT8_C( 49), UINT8_C( 86) }, { UINT8_C( 52), UINT8_C(126), UINT8_C(228), UINT8_C(212), UINT8_C( 64), UINT8_C( 92), UINT8_C(253), UINT8_C( 70) }, { UINT8_C( 6), UINT8_C( 33), UINT8_C( 74), UINT8_C( 52), UINT8_C( 32), UINT8_C(184), UINT8_C(103), UINT8_C(126), UINT8_C( 4), UINT8_C( 63), UINT8_C(157), UINT8_C(228), UINT8_C( 62), UINT8_C(171), UINT8_C( 56), UINT8_C(212), UINT8_C(139), UINT8_C(224), UINT8_C( 35), UINT8_C( 64), UINT8_C(226), UINT8_C( 6), UINT8_C(102), UINT8_C( 92), UINT8_C(158), UINT8_C(184), UINT8_C( 49), UINT8_C(253), UINT8_C(201), UINT8_C( 68), UINT8_C( 86), UINT8_C( 70), } }, { { UINT8_C(124), UINT8_C( 1), UINT8_C(133), UINT8_C( 7), UINT8_C(227), UINT8_C( 35), UINT8_C(208), UINT8_C( 4) }, { UINT8_C(219), UINT8_C( 15), UINT8_C(176), UINT8_C(187), UINT8_C( 22), UINT8_C(104), UINT8_MAX, UINT8_C( 96) }, { UINT8_C(207), UINT8_C(157), UINT8_C(152), UINT8_C(243), UINT8_C( 3), UINT8_C(202), UINT8_C( 73), UINT8_C( 55) }, { UINT8_C( 72), UINT8_C( 45), UINT8_C( 11), UINT8_C(136), UINT8_C(137), UINT8_C( 8), UINT8_C(206), UINT8_C( 6) }, { UINT8_C(124), UINT8_C(219), UINT8_C(207), UINT8_C( 72), UINT8_C( 1), UINT8_C( 15), UINT8_C(157), UINT8_C( 45), UINT8_C(133), UINT8_C(176), UINT8_C(152), UINT8_C( 11), UINT8_C( 7), UINT8_C(187), UINT8_C(243), UINT8_C(136), UINT8_C(227), UINT8_C( 22), UINT8_C( 3), UINT8_C(137), UINT8_C( 35), UINT8_C(104), UINT8_C(202), UINT8_C( 8), UINT8_C(208), UINT8_MAX, UINT8_C( 73), UINT8_C(206), UINT8_C( 4), UINT8_C( 96), UINT8_C( 55), UINT8_C( 6), } }, { { UINT8_C( 9), UINT8_C( 83), UINT8_C( 13), UINT8_C(236), UINT8_C(118), UINT8_C(222), UINT8_C(240), UINT8_C( 81) }, { UINT8_C(237), UINT8_C(160), UINT8_C( 12), UINT8_C( 3), UINT8_C( 8), UINT8_C( 12), UINT8_C(100), UINT8_C(216) }, { UINT8_C(169), UINT8_C(252), UINT8_C(203), UINT8_C(172), UINT8_C(198), UINT8_C( 20), UINT8_C(227), UINT8_C( 14) }, { UINT8_C( 65), UINT8_C(238), UINT8_C(150), UINT8_C(202), UINT8_C(246), UINT8_C(101), UINT8_C(208), UINT8_MAX }, { UINT8_C( 9), UINT8_C(237), UINT8_C(169), UINT8_C( 65), UINT8_C( 83), UINT8_C(160), UINT8_C(252), UINT8_C(238), UINT8_C( 13), UINT8_C( 12), UINT8_C(203), UINT8_C(150), UINT8_C(236), UINT8_C( 3), UINT8_C(172), UINT8_C(202), UINT8_C(118), UINT8_C( 8), UINT8_C(198), UINT8_C(246), UINT8_C(222), UINT8_C( 12), UINT8_C( 20), UINT8_C(101), UINT8_C(240), UINT8_C(100), UINT8_C(227), UINT8_C(208), UINT8_C( 81), UINT8_C(216), UINT8_C( 14), UINT8_MAX, } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8x4_t r_ = { { simde_vld1_u8(test_vec[i].r0), simde_vld1_u8(test_vec[i].r1), simde_vld1_u8(test_vec[i].r2), simde_vld1_u8(test_vec[i].r3), } }; uint8_t a_[32]; simde_vst4_u8(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld4_u8(a_); simde_test_arm_neon_assert_equal_u8x8(r_.val[0], simde_vld1_u8(test_vec[i].r0)); simde_test_arm_neon_assert_equal_u8x8(r_.val[1], simde_vld1_u8(test_vec[i].r1)); simde_test_arm_neon_assert_equal_u8x8(r_.val[2], simde_vld1_u8(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (size_t i = 0 ; i < 8 ; i++) { simde_uint8x8_t a0 = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t a1 = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t a2 = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t a3 = simde_test_arm_neon_random_u8x8(); simde_uint8x8x4_t a = { { a0, a1, a2, a3 } }; simde_test_arm_neon_write_u8x8(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, a3, SIMDE_TEST_VEC_POS_MIDDLE); uint8_t buf[32]; simde_vst4_u8(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_u8(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst4_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t r0[4]; uint16_t r1[4]; uint16_t r2[4]; uint16_t r3[4]; uint16_t a[16]; } test_vec[] = { { { UINT16_C( 7053), UINT16_C(18194), UINT16_C(41501), UINT16_C( 5028) }, { UINT16_C(23380), UINT16_C(23265), UINT16_C(39297), UINT16_C(36422) }, { UINT16_C(64123), UINT16_C(47306), UINT16_C(63985), UINT16_C(40762) }, { UINT16_C(48714), UINT16_C(44810), UINT16_C(41814), UINT16_C(58351) }, { UINT16_C( 7053), UINT16_C(23380), UINT16_C(64123), UINT16_C(48714), UINT16_C(18194), UINT16_C(23265), UINT16_C(47306), UINT16_C(44810), UINT16_C(41501), UINT16_C(39297), UINT16_C(63985), UINT16_C(41814), UINT16_C( 5028), UINT16_C(36422), UINT16_C(40762), UINT16_C(58351), } }, { { UINT16_C( 446), UINT16_C(56106), UINT16_C(53155), UINT16_C(63470) }, { UINT16_C(53034), UINT16_C(44114), UINT16_C(39016), UINT16_C(58170) }, { UINT16_C( 1171), UINT16_C(33948), UINT16_C(55037), UINT16_C(18211) }, { UINT16_C(11669), UINT16_C(60407), UINT16_C(59088), UINT16_C(36558) }, { UINT16_C( 446), UINT16_C(53034), UINT16_C( 1171), UINT16_C(11669), UINT16_C(56106), UINT16_C(44114), UINT16_C(33948), UINT16_C(60407), UINT16_C(53155), UINT16_C(39016), UINT16_C(55037), UINT16_C(59088), UINT16_C(63470), UINT16_C(58170), UINT16_C(18211), UINT16_C(36558), } }, { { UINT16_C(63719), UINT16_C(35434), UINT16_C(22727), UINT16_C(62081) }, { UINT16_C(54056), UINT16_C(37022), UINT16_C(55404), UINT16_C(65396) }, { UINT16_C( 4316), UINT16_C(55939), UINT16_C(42726), UINT16_C(31521) }, { UINT16_C( 6355), UINT16_C(42086), UINT16_C(13566), UINT16_C(58674) }, { UINT16_C(63719), UINT16_C(54056), UINT16_C( 4316), UINT16_C( 6355), UINT16_C(35434), UINT16_C(37022), UINT16_C(55939), UINT16_C(42086), UINT16_C(22727), UINT16_C(55404), UINT16_C(42726), UINT16_C(13566), UINT16_C(62081), UINT16_C(65396), UINT16_C(31521), UINT16_C(58674), } }, { { UINT16_C(39981), UINT16_C(62575), UINT16_C(61941), UINT16_C( 7654) }, { UINT16_C(33988), UINT16_C(12461), UINT16_C( 8540), UINT16_C(14639) }, { UINT16_C(45617), UINT16_C( 6163), UINT16_C(13400), UINT16_C(11411) }, { UINT16_C(64077), UINT16_C(19408), UINT16_C( 558), UINT16_C(23345) }, { UINT16_C(39981), UINT16_C(33988), UINT16_C(45617), UINT16_C(64077), UINT16_C(62575), UINT16_C(12461), UINT16_C( 6163), UINT16_C(19408), UINT16_C(61941), UINT16_C( 8540), UINT16_C(13400), UINT16_C( 558), UINT16_C( 7654), UINT16_C(14639), UINT16_C(11411), UINT16_C(23345), } }, { { UINT16_C(41119), UINT16_C(37968), UINT16_C(13969), UINT16_C(22193) }, { UINT16_C(24251), UINT16_C( 6022), UINT16_C(46720), UINT16_C(45392) }, { UINT16_C(25448), UINT16_C(49609), UINT16_C(23960), UINT16_C(58861) }, { UINT16_C(48471), UINT16_C(34096), UINT16_C(25023), UINT16_C(24289) }, { UINT16_C(41119), UINT16_C(24251), UINT16_C(25448), UINT16_C(48471), UINT16_C(37968), UINT16_C( 6022), UINT16_C(49609), UINT16_C(34096), UINT16_C(13969), UINT16_C(46720), UINT16_C(23960), UINT16_C(25023), UINT16_C(22193), UINT16_C(45392), UINT16_C(58861), UINT16_C(24289), } }, { { UINT16_C(12546), UINT16_C(37874), UINT16_C(41831), UINT16_C( 8937) }, { UINT16_C(28674), UINT16_C(33338), UINT16_C(35366), UINT16_C(36403) }, { UINT16_C(65006), UINT16_C(34383), UINT16_C(15450), UINT16_C(45419) }, { UINT16_C(39929), UINT16_C(47414), UINT16_C( 6141), UINT16_C(65303) }, { UINT16_C(12546), UINT16_C(28674), UINT16_C(65006), UINT16_C(39929), UINT16_C(37874), UINT16_C(33338), UINT16_C(34383), UINT16_C(47414), UINT16_C(41831), UINT16_C(35366), UINT16_C(15450), UINT16_C( 6141), UINT16_C( 8937), UINT16_C(36403), UINT16_C(45419), UINT16_C(65303), } }, { { UINT16_C( 2632), UINT16_C(45202), UINT16_C(31917), UINT16_C(45010) }, { UINT16_C( 3308), UINT16_C( 4657), UINT16_C(26007), UINT16_C(34208) }, { UINT16_C(61538), UINT16_C(48139), UINT16_C(30252), UINT16_C( 9837) }, { UINT16_C(41745), UINT16_C( 3807), UINT16_C(63163), UINT16_C( 781) }, { UINT16_C( 2632), UINT16_C( 3308), UINT16_C(61538), UINT16_C(41745), UINT16_C(45202), UINT16_C( 4657), UINT16_C(48139), UINT16_C( 3807), UINT16_C(31917), UINT16_C(26007), UINT16_C(30252), UINT16_C(63163), UINT16_C(45010), UINT16_C(34208), UINT16_C( 9837), UINT16_C( 781), } }, { { UINT16_C(40960), UINT16_C(44723), UINT16_C(34332), UINT16_C( 2141) }, { UINT16_C(36754), UINT16_C(10522), UINT16_C(47860), UINT16_C(22190) }, { UINT16_C(47530), UINT16_C(55058), UINT16_C(32559), UINT16_C(16893) }, { UINT16_C(56354), UINT16_C(56655), UINT16_C(24018), UINT16_C(54241) }, { UINT16_C(40960), UINT16_C(36754), UINT16_C(47530), UINT16_C(56354), UINT16_C(44723), UINT16_C(10522), UINT16_C(55058), UINT16_C(56655), UINT16_C(34332), UINT16_C(47860), UINT16_C(32559), UINT16_C(24018), UINT16_C( 2141), UINT16_C(22190), UINT16_C(16893), UINT16_C(54241), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4x4_t r_ = { { simde_vld1_u16(test_vec[i].r0), simde_vld1_u16(test_vec[i].r1), simde_vld1_u16(test_vec[i].r2), simde_vld1_u16(test_vec[i].r3), } }; uint16_t a_[16]; simde_vst4_u16(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld4_u16(a_); simde_test_arm_neon_assert_equal_u16x4(r_.val[0], simde_vld1_u16(test_vec[i].r0)); simde_test_arm_neon_assert_equal_u16x4(r_.val[1], simde_vld1_u16(test_vec[i].r1)); simde_test_arm_neon_assert_equal_u16x4(r_.val[2], simde_vld1_u16(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (size_t i = 0 ; i < 8 ; i++) { simde_uint16x4_t a0 = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t a1 = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t a2 = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t a3 = simde_test_arm_neon_random_u16x4(); simde_uint16x4x4_t a = { { a0, a1, a2, a3 } }; simde_test_arm_neon_write_u16x4(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, a3, SIMDE_TEST_VEC_POS_MIDDLE); uint16_t buf[16]; simde_vst4_u16(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_u16(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst4_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t r0[2]; uint32_t r1[2]; uint32_t r2[2]; uint32_t r3[2]; uint32_t a[8]; } test_vec[] = { { { UINT32_C( 160598487), UINT32_C(3431268484) }, { UINT32_C( 277942869), UINT32_C(3957648286) }, { UINT32_C(3792231046), UINT32_C(2849176706) }, { UINT32_C(4241723360), UINT32_C(2166330026) }, { UINT32_C( 160598487), UINT32_C( 277942869), UINT32_C(3792231046), UINT32_C(4241723360), UINT32_C(3431268484), UINT32_C(3957648286), UINT32_C(2849176706), UINT32_C(2166330026), } }, { { UINT32_C(2676666651), UINT32_C( 124456881) }, { UINT32_C(3222797601), UINT32_C(1856765160) }, { UINT32_C(1481683926), UINT32_C(2399216303) }, { UINT32_C(1535956145), UINT32_C(2178788198) }, { UINT32_C(2676666651), UINT32_C(3222797601), UINT32_C(1481683926), UINT32_C(1535956145), UINT32_C( 124456881), UINT32_C(1856765160), UINT32_C(2399216303), UINT32_C(2178788198), } }, { { UINT32_C( 237004636), UINT32_C(2551548791) }, { UINT32_C(1884826760), UINT32_C(4275962920) }, { UINT32_C(1733701303), UINT32_C( 49698640) }, { UINT32_C(2438824491), UINT32_C(2316450349) }, { UINT32_C( 237004636), UINT32_C(1884826760), UINT32_C(1733701303), UINT32_C(2438824491), UINT32_C(2551548791), UINT32_C(4275962920), UINT32_C( 49698640), UINT32_C(2316450349), } }, { { UINT32_C( 429404834), UINT32_C(1186049470) }, { UINT32_C( 45550297), UINT32_C(3305149710) }, { UINT32_C( 338450372), UINT32_C(3658884014) }, { UINT32_C(3547034789), UINT32_C(1348304558) }, { UINT32_C( 429404834), UINT32_C( 45550297), UINT32_C( 338450372), UINT32_C(3547034789), UINT32_C(1186049470), UINT32_C(3305149710), UINT32_C(3658884014), UINT32_C(1348304558), } }, { { UINT32_C(1852437936), UINT32_C(2075466658) }, { UINT32_C( 863857701), UINT32_C(3321396737) }, { UINT32_C(2212111829), UINT32_C(3982356552) }, { UINT32_C( 331401572), UINT32_C(4150467911) }, { UINT32_C(1852437936), UINT32_C( 863857701), UINT32_C(2212111829), UINT32_C( 331401572), UINT32_C(2075466658), UINT32_C(3321396737), UINT32_C(3982356552), UINT32_C(4150467911), } }, { { UINT32_C(3026636050), UINT32_C( 221256680) }, { UINT32_C(2285940103), UINT32_C( 5126187) }, { UINT32_C(2776901725), UINT32_C(2106843416) }, { UINT32_C(4052767658), UINT32_C(2213147505) }, { UINT32_C(3026636050), UINT32_C(2285940103), UINT32_C(2776901725), UINT32_C(4052767658), UINT32_C( 221256680), UINT32_C( 5126187), UINT32_C(2106843416), UINT32_C(2213147505), } }, { { UINT32_C(2822262720), UINT32_C(4055197802) }, { UINT32_C(1098511637), UINT32_C(2336343854) }, { UINT32_C( 137479663), UINT32_C(1367721127) }, { UINT32_C(2286097687), UINT32_C(3373018120) }, { UINT32_C(2822262720), UINT32_C(1098511637), UINT32_C( 137479663), UINT32_C(2286097687), UINT32_C(4055197802), UINT32_C(2336343854), UINT32_C(1367721127), UINT32_C(3373018120), } }, { { UINT32_C(3849405563), UINT32_C(3252037548) }, { UINT32_C(1241665308), UINT32_C( 114705431) }, { UINT32_C(2953709321), UINT32_C(3791819723) }, { UINT32_C(2976597416), UINT32_C(3967448945) }, { UINT32_C(3849405563), UINT32_C(1241665308), UINT32_C(2953709321), UINT32_C(2976597416), UINT32_C(3252037548), UINT32_C( 114705431), UINT32_C(3791819723), UINT32_C(3967448945), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2x4_t r_ = { { simde_vld1_u32(test_vec[i].r0), simde_vld1_u32(test_vec[i].r1), simde_vld1_u32(test_vec[i].r2), simde_vld1_u32(test_vec[i].r3), } }; uint32_t a_[8]; simde_vst4_u32(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld4_u32(a_); simde_test_arm_neon_assert_equal_u32x2(r_.val[0], simde_vld1_u32(test_vec[i].r0)); simde_test_arm_neon_assert_equal_u32x2(r_.val[1], simde_vld1_u32(test_vec[i].r1)); simde_test_arm_neon_assert_equal_u32x2(r_.val[2], simde_vld1_u32(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (size_t i = 0 ; i < 8 ; i++) { simde_uint32x2_t a0 = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t a1 = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t a2 = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t a3 = simde_test_arm_neon_random_u32x2(); simde_uint32x2x4_t a = { { a0, a1, a2, a3 } }; simde_test_arm_neon_write_u32x2(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, a3, SIMDE_TEST_VEC_POS_MIDDLE); uint32_t buf[8]; simde_vst4_u32(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_u32(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst4_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t r0[1]; uint64_t r1[1]; uint64_t r2[1]; uint64_t r3[1]; uint64_t a[4]; } test_vec[] = { { { UINT64_C( 5721188545113066611) }, { UINT64_C(14561941195432479727) }, { UINT64_C(18383632674776778752) }, { UINT64_C(15980372823853894486) }, { UINT64_C( 5721188545113066611), UINT64_C(14561941195432479727), UINT64_C(18383632674776778752), UINT64_C(15980372823853894486), } }, { { UINT64_C(17453329832926135356) }, { UINT64_C( 9305115780059437306) }, { UINT64_C(10769792532928690161) }, { UINT64_C( 9174047135429923592) }, { UINT64_C(17453329832926135356), UINT64_C( 9305115780059437306), UINT64_C(10769792532928690161), UINT64_C( 9174047135429923592), } }, { { UINT64_C( 4971021025742920199) }, { UINT64_C(12229453011547344821) }, { UINT64_C(11171622612186862901) }, { UINT64_C(15125726142156113446) }, { UINT64_C( 4971021025742920199), UINT64_C(12229453011547344821), UINT64_C(11171622612186862901), UINT64_C(15125726142156113446), } }, { { UINT64_C(11783624364083859705) }, { UINT64_C( 1623425490471166758) }, { UINT64_C( 6869277207765361957) }, { UINT64_C(13489603327554052761) }, { UINT64_C(11783624364083859705), UINT64_C( 1623425490471166758), UINT64_C( 6869277207765361957), UINT64_C(13489603327554052761), } }, { { UINT64_C( 905933701970267906) }, { UINT64_C( 2422780449713187752) }, { UINT64_C( 6174203900964972822) }, { UINT64_C( 3174171503893182608) }, { UINT64_C( 905933701970267906), UINT64_C( 2422780449713187752), UINT64_C( 6174203900964972822), UINT64_C( 3174171503893182608), } }, { { UINT64_C( 4030069174546664187) }, { UINT64_C( 6860775845552772889) }, { UINT64_C(14815256441915314056) }, { UINT64_C( 4191491432061489877) }, { UINT64_C( 4030069174546664187), UINT64_C( 6860775845552772889), UINT64_C(14815256441915314056), UINT64_C( 4191491432061489877), } }, { { UINT64_C( 1079748961840678709) }, { UINT64_C(11927881590574923233) }, { UINT64_C( 7670619264820787049) }, { UINT64_C(17460338915384332758) }, { UINT64_C( 1079748961840678709), UINT64_C(11927881590574923233), UINT64_C( 7670619264820787049), UINT64_C(17460338915384332758), } }, { { UINT64_C( 6259919325679283164) }, { UINT64_C(14209428967911261040) }, { UINT64_C( 8448153254045096767) }, { UINT64_C( 1738065358680139632) }, { UINT64_C( 6259919325679283164), UINT64_C(14209428967911261040), UINT64_C( 8448153254045096767), UINT64_C( 1738065358680139632), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1x4_t r_ = { { simde_vld1_u64(test_vec[i].r0), simde_vld1_u64(test_vec[i].r1), simde_vld1_u64(test_vec[i].r2), simde_vld1_u64(test_vec[i].r3), } }; uint64_t a_[4]; simde_vst4_u64(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld4_u64(a_); simde_test_arm_neon_assert_equal_u64x1(r_.val[0], simde_vld1_u64(test_vec[i].r0)); simde_test_arm_neon_assert_equal_u64x1(r_.val[1], simde_vld1_u64(test_vec[i].r1)); simde_test_arm_neon_assert_equal_u64x1(r_.val[2], simde_vld1_u64(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (size_t i = 0 ; i < 8 ; i++) { simde_uint64x1_t a0 = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t a1 = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t a2 = simde_test_arm_neon_random_u64x1(); simde_uint64x1_t a3 = simde_test_arm_neon_random_u64x1(); simde_uint64x1x4_t a = { { a0, a1, a2, a3 } }; simde_test_arm_neon_write_u64x1(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, a3, SIMDE_TEST_VEC_POS_MIDDLE); uint64_t buf[4]; simde_vst4_u64(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_u64(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst4q_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float32 r0[4]; simde_float32 r1[4]; simde_float32 r2[4]; simde_float32 r3[4]; simde_float32 a[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( -988.70), SIMDE_FLOAT32_C( 709.53), SIMDE_FLOAT32_C( -895.28), SIMDE_FLOAT32_C( -307.54) }, { SIMDE_FLOAT32_C( -137.31), SIMDE_FLOAT32_C( 947.38), SIMDE_FLOAT32_C( 965.29), SIMDE_FLOAT32_C( -533.59) }, { SIMDE_FLOAT32_C( -896.71), SIMDE_FLOAT32_C( 365.46), SIMDE_FLOAT32_C( -161.98), SIMDE_FLOAT32_C( -31.53) }, { SIMDE_FLOAT32_C( -334.29), SIMDE_FLOAT32_C( -613.71), SIMDE_FLOAT32_C( 414.87), SIMDE_FLOAT32_C( -872.42) }, { SIMDE_FLOAT32_C( -988.70), SIMDE_FLOAT32_C( -137.31), SIMDE_FLOAT32_C( -896.71), SIMDE_FLOAT32_C( -334.29), SIMDE_FLOAT32_C( 709.53), SIMDE_FLOAT32_C( 947.38), SIMDE_FLOAT32_C( 365.46), SIMDE_FLOAT32_C( -613.71), SIMDE_FLOAT32_C( -895.28), SIMDE_FLOAT32_C( 965.29), SIMDE_FLOAT32_C( -161.98), SIMDE_FLOAT32_C( 414.87), SIMDE_FLOAT32_C( -307.54), SIMDE_FLOAT32_C( -533.59), SIMDE_FLOAT32_C( -31.53), SIMDE_FLOAT32_C( -872.42), } }, { { SIMDE_FLOAT32_C( -175.89), SIMDE_FLOAT32_C( -87.40), SIMDE_FLOAT32_C( -428.46), SIMDE_FLOAT32_C( -598.83) }, { SIMDE_FLOAT32_C( -917.69), SIMDE_FLOAT32_C( -293.34), SIMDE_FLOAT32_C( 593.88), SIMDE_FLOAT32_C( -196.10) }, { SIMDE_FLOAT32_C( 921.26), SIMDE_FLOAT32_C( 218.55), SIMDE_FLOAT32_C( -799.38), SIMDE_FLOAT32_C( -616.04) }, { SIMDE_FLOAT32_C( 227.38), SIMDE_FLOAT32_C( 356.05), SIMDE_FLOAT32_C( 155.96), SIMDE_FLOAT32_C( 238.69) }, { SIMDE_FLOAT32_C( -175.89), SIMDE_FLOAT32_C( -917.69), SIMDE_FLOAT32_C( 921.26), SIMDE_FLOAT32_C( 227.38), SIMDE_FLOAT32_C( -87.40), SIMDE_FLOAT32_C( -293.34), SIMDE_FLOAT32_C( 218.55), SIMDE_FLOAT32_C( 356.05), SIMDE_FLOAT32_C( -428.46), SIMDE_FLOAT32_C( 593.88), SIMDE_FLOAT32_C( -799.38), SIMDE_FLOAT32_C( 155.96), SIMDE_FLOAT32_C( -598.83), SIMDE_FLOAT32_C( -196.10), SIMDE_FLOAT32_C( -616.04), SIMDE_FLOAT32_C( 238.69), } }, { { SIMDE_FLOAT32_C( 65.58), SIMDE_FLOAT32_C( 260.68), SIMDE_FLOAT32_C( 931.15), SIMDE_FLOAT32_C( 928.27) }, { SIMDE_FLOAT32_C( 208.06), SIMDE_FLOAT32_C( 896.44), SIMDE_FLOAT32_C( -605.32), SIMDE_FLOAT32_C( 311.35) }, { SIMDE_FLOAT32_C( 261.91), SIMDE_FLOAT32_C( 232.70), SIMDE_FLOAT32_C( -720.18), SIMDE_FLOAT32_C( 927.62) }, { SIMDE_FLOAT32_C( 618.99), SIMDE_FLOAT32_C( 694.69), SIMDE_FLOAT32_C( -944.80), SIMDE_FLOAT32_C( -556.90) }, { SIMDE_FLOAT32_C( 65.58), SIMDE_FLOAT32_C( 208.06), SIMDE_FLOAT32_C( 261.91), SIMDE_FLOAT32_C( 618.99), SIMDE_FLOAT32_C( 260.68), SIMDE_FLOAT32_C( 896.44), SIMDE_FLOAT32_C( 232.70), SIMDE_FLOAT32_C( 694.69), SIMDE_FLOAT32_C( 931.15), SIMDE_FLOAT32_C( -605.32), SIMDE_FLOAT32_C( -720.18), SIMDE_FLOAT32_C( -944.80), SIMDE_FLOAT32_C( 928.27), SIMDE_FLOAT32_C( 311.35), SIMDE_FLOAT32_C( 927.62), SIMDE_FLOAT32_C( -556.90), } }, { { SIMDE_FLOAT32_C( -392.71), SIMDE_FLOAT32_C( -373.26), SIMDE_FLOAT32_C( -155.73), SIMDE_FLOAT32_C( -310.40) }, { SIMDE_FLOAT32_C( 333.40), SIMDE_FLOAT32_C( -561.84), SIMDE_FLOAT32_C( 493.50), SIMDE_FLOAT32_C( 254.66) }, { SIMDE_FLOAT32_C( 656.70), SIMDE_FLOAT32_C( 694.12), SIMDE_FLOAT32_C( 638.62), SIMDE_FLOAT32_C( -115.91) }, { SIMDE_FLOAT32_C( 50.17), SIMDE_FLOAT32_C( -205.42), SIMDE_FLOAT32_C( -877.23), SIMDE_FLOAT32_C( -884.25) }, { SIMDE_FLOAT32_C( -392.71), SIMDE_FLOAT32_C( 333.40), SIMDE_FLOAT32_C( 656.70), SIMDE_FLOAT32_C( 50.17), SIMDE_FLOAT32_C( -373.26), SIMDE_FLOAT32_C( -561.84), SIMDE_FLOAT32_C( 694.12), SIMDE_FLOAT32_C( -205.42), SIMDE_FLOAT32_C( -155.73), SIMDE_FLOAT32_C( 493.50), SIMDE_FLOAT32_C( 638.62), SIMDE_FLOAT32_C( -877.23), SIMDE_FLOAT32_C( -310.40), SIMDE_FLOAT32_C( 254.66), SIMDE_FLOAT32_C( -115.91), SIMDE_FLOAT32_C( -884.25), } }, { { SIMDE_FLOAT32_C( -944.74), SIMDE_FLOAT32_C( -946.08), SIMDE_FLOAT32_C( -955.97), SIMDE_FLOAT32_C( 263.32) }, { SIMDE_FLOAT32_C( 950.37), SIMDE_FLOAT32_C( -561.29), SIMDE_FLOAT32_C( -425.33), SIMDE_FLOAT32_C( 212.28) }, { SIMDE_FLOAT32_C( 671.41), SIMDE_FLOAT32_C( -145.51), SIMDE_FLOAT32_C( 139.89), SIMDE_FLOAT32_C( 290.40) }, { SIMDE_FLOAT32_C( -450.81), SIMDE_FLOAT32_C( 195.09), SIMDE_FLOAT32_C( 733.51), SIMDE_FLOAT32_C( 156.48) }, { SIMDE_FLOAT32_C( -944.74), SIMDE_FLOAT32_C( 950.37), SIMDE_FLOAT32_C( 671.41), SIMDE_FLOAT32_C( -450.81), SIMDE_FLOAT32_C( -946.08), SIMDE_FLOAT32_C( -561.29), SIMDE_FLOAT32_C( -145.51), SIMDE_FLOAT32_C( 195.09), SIMDE_FLOAT32_C( -955.97), SIMDE_FLOAT32_C( -425.33), SIMDE_FLOAT32_C( 139.89), SIMDE_FLOAT32_C( 733.51), SIMDE_FLOAT32_C( 263.32), SIMDE_FLOAT32_C( 212.28), SIMDE_FLOAT32_C( 290.40), SIMDE_FLOAT32_C( 156.48), } }, { { SIMDE_FLOAT32_C( 821.83), SIMDE_FLOAT32_C( -422.22), SIMDE_FLOAT32_C( 846.08), SIMDE_FLOAT32_C( 155.24) }, { SIMDE_FLOAT32_C( 15.94), SIMDE_FLOAT32_C( 339.58), SIMDE_FLOAT32_C( -590.10), SIMDE_FLOAT32_C( -327.36) }, { SIMDE_FLOAT32_C( 33.70), SIMDE_FLOAT32_C( -951.48), SIMDE_FLOAT32_C( 556.72), SIMDE_FLOAT32_C( -916.13) }, { SIMDE_FLOAT32_C( -156.90), SIMDE_FLOAT32_C( 679.50), SIMDE_FLOAT32_C( -800.38), SIMDE_FLOAT32_C( -101.64) }, { SIMDE_FLOAT32_C( 821.83), SIMDE_FLOAT32_C( 15.94), SIMDE_FLOAT32_C( 33.70), SIMDE_FLOAT32_C( -156.90), SIMDE_FLOAT32_C( -422.22), SIMDE_FLOAT32_C( 339.58), SIMDE_FLOAT32_C( -951.48), SIMDE_FLOAT32_C( 679.50), SIMDE_FLOAT32_C( 846.08), SIMDE_FLOAT32_C( -590.10), SIMDE_FLOAT32_C( 556.72), SIMDE_FLOAT32_C( -800.38), SIMDE_FLOAT32_C( 155.24), SIMDE_FLOAT32_C( -327.36), SIMDE_FLOAT32_C( -916.13), SIMDE_FLOAT32_C( -101.64), } }, { { SIMDE_FLOAT32_C( 733.42), SIMDE_FLOAT32_C( -756.36), SIMDE_FLOAT32_C( -838.32), SIMDE_FLOAT32_C( 683.79) }, { SIMDE_FLOAT32_C( -317.65), SIMDE_FLOAT32_C( -263.65), SIMDE_FLOAT32_C( -103.93), SIMDE_FLOAT32_C( -646.24) }, { SIMDE_FLOAT32_C( 590.85), SIMDE_FLOAT32_C( -964.04), SIMDE_FLOAT32_C( 644.16), SIMDE_FLOAT32_C( -859.96) }, { SIMDE_FLOAT32_C( 231.05), SIMDE_FLOAT32_C( 377.67), SIMDE_FLOAT32_C( 296.52), SIMDE_FLOAT32_C( 52.89) }, { SIMDE_FLOAT32_C( 733.42), SIMDE_FLOAT32_C( -317.65), SIMDE_FLOAT32_C( 590.85), SIMDE_FLOAT32_C( 231.05), SIMDE_FLOAT32_C( -756.36), SIMDE_FLOAT32_C( -263.65), SIMDE_FLOAT32_C( -964.04), SIMDE_FLOAT32_C( 377.67), SIMDE_FLOAT32_C( -838.32), SIMDE_FLOAT32_C( -103.93), SIMDE_FLOAT32_C( 644.16), SIMDE_FLOAT32_C( 296.52), SIMDE_FLOAT32_C( 683.79), SIMDE_FLOAT32_C( -646.24), SIMDE_FLOAT32_C( -859.96), SIMDE_FLOAT32_C( 52.89), } }, { { SIMDE_FLOAT32_C( 955.45), SIMDE_FLOAT32_C( 142.60), SIMDE_FLOAT32_C( -791.88), SIMDE_FLOAT32_C( -28.62) }, { SIMDE_FLOAT32_C( -517.82), SIMDE_FLOAT32_C( -381.98), SIMDE_FLOAT32_C( 644.02), SIMDE_FLOAT32_C( 515.88) }, { SIMDE_FLOAT32_C( -333.46), SIMDE_FLOAT32_C( 200.74), SIMDE_FLOAT32_C( 599.74), SIMDE_FLOAT32_C( 509.64) }, { SIMDE_FLOAT32_C( -119.76), SIMDE_FLOAT32_C( 799.36), SIMDE_FLOAT32_C( -592.00), SIMDE_FLOAT32_C( -386.34) }, { SIMDE_FLOAT32_C( 955.45), SIMDE_FLOAT32_C( -517.82), SIMDE_FLOAT32_C( -333.46), SIMDE_FLOAT32_C( -119.76), SIMDE_FLOAT32_C( 142.60), SIMDE_FLOAT32_C( -381.98), SIMDE_FLOAT32_C( 200.74), SIMDE_FLOAT32_C( 799.36), SIMDE_FLOAT32_C( -791.88), SIMDE_FLOAT32_C( 644.02), SIMDE_FLOAT32_C( 599.74), SIMDE_FLOAT32_C( -592.00), SIMDE_FLOAT32_C( -28.62), SIMDE_FLOAT32_C( 515.88), SIMDE_FLOAT32_C( 509.64), SIMDE_FLOAT32_C( -386.34), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4x4_t r_ = { { simde_vld1q_f32(test_vec[i].r0), simde_vld1q_f32(test_vec[i].r1), simde_vld1q_f32(test_vec[i].r2), simde_vld1q_f32(test_vec[i].r3), } }; simde_float32 a_[16]; simde_vst4q_f32(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld4q_f32(a_); simde_test_arm_neon_assert_equal_f32x4(r_.val[0], simde_vld1q_f32(test_vec[i].r0), 1); simde_test_arm_neon_assert_equal_f32x4(r_.val[1], simde_vld1q_f32(test_vec[i].r1), 1); simde_test_arm_neon_assert_equal_f32x4(r_.val[2], simde_vld1q_f32(test_vec[i].r2), 1); simde_test_arm_neon_assert_equal_f32x4(r_.val[3], simde_vld1q_f32(test_vec[i].r3), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32x4_t a0 = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t a1 = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t a2 = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4_t a3 = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); simde_float32x4x4_t a = { { a0, a1, a2, a3 } }; simde_test_arm_neon_write_f32x4(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f32x4(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f32x4(2, a3, SIMDE_TEST_VEC_POS_MIDDLE); simde_float32_t buf[16]; simde_vst4q_f32(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_f32(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst4q_f64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { simde_float64 r0[2]; simde_float64 r1[2]; simde_float64 r2[2]; simde_float64 r3[2]; simde_float64 a[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( -818.28), SIMDE_FLOAT64_C( -30.31) }, { SIMDE_FLOAT64_C( 709.73), SIMDE_FLOAT64_C( -485.43) }, { SIMDE_FLOAT64_C( 859.77), SIMDE_FLOAT64_C( -908.87) }, { SIMDE_FLOAT64_C( 467.51), SIMDE_FLOAT64_C( 159.39) }, { SIMDE_FLOAT64_C( -818.28), SIMDE_FLOAT64_C( 709.73), SIMDE_FLOAT64_C( 859.77), SIMDE_FLOAT64_C( 467.51), SIMDE_FLOAT64_C( -30.31), SIMDE_FLOAT64_C( -485.43), SIMDE_FLOAT64_C( -908.87), SIMDE_FLOAT64_C( 159.39), } }, { { SIMDE_FLOAT64_C( -267.86), SIMDE_FLOAT64_C( -945.08) }, { SIMDE_FLOAT64_C( 703.56), SIMDE_FLOAT64_C( -499.98) }, { SIMDE_FLOAT64_C( 229.30), SIMDE_FLOAT64_C( -572.30) }, { SIMDE_FLOAT64_C( 838.48), SIMDE_FLOAT64_C( -276.60) }, { SIMDE_FLOAT64_C( -267.86), SIMDE_FLOAT64_C( 703.56), SIMDE_FLOAT64_C( 229.30), SIMDE_FLOAT64_C( 838.48), SIMDE_FLOAT64_C( -945.08), SIMDE_FLOAT64_C( -499.98), SIMDE_FLOAT64_C( -572.30), SIMDE_FLOAT64_C( -276.60), } }, { { SIMDE_FLOAT64_C( 417.49), SIMDE_FLOAT64_C( -54.87) }, { SIMDE_FLOAT64_C( 57.43), SIMDE_FLOAT64_C( 286.91) }, { SIMDE_FLOAT64_C( -186.44), SIMDE_FLOAT64_C( 308.53) }, { SIMDE_FLOAT64_C( 898.55), SIMDE_FLOAT64_C( -632.39) }, { SIMDE_FLOAT64_C( 417.49), SIMDE_FLOAT64_C( 57.43), SIMDE_FLOAT64_C( -186.44), SIMDE_FLOAT64_C( 898.55), SIMDE_FLOAT64_C( -54.87), SIMDE_FLOAT64_C( 286.91), SIMDE_FLOAT64_C( 308.53), SIMDE_FLOAT64_C( -632.39), } }, { { SIMDE_FLOAT64_C( 104.89), SIMDE_FLOAT64_C( -475.34) }, { SIMDE_FLOAT64_C( -11.27), SIMDE_FLOAT64_C( 402.75) }, { SIMDE_FLOAT64_C( -840.97), SIMDE_FLOAT64_C( -286.94) }, { SIMDE_FLOAT64_C( -305.37), SIMDE_FLOAT64_C( -659.25) }, { SIMDE_FLOAT64_C( 104.89), SIMDE_FLOAT64_C( -11.27), SIMDE_FLOAT64_C( -840.97), SIMDE_FLOAT64_C( -305.37), SIMDE_FLOAT64_C( -475.34), SIMDE_FLOAT64_C( 402.75), SIMDE_FLOAT64_C( -286.94), SIMDE_FLOAT64_C( -659.25), } }, { { SIMDE_FLOAT64_C( 682.75), SIMDE_FLOAT64_C( -595.64) }, { SIMDE_FLOAT64_C( -144.67), SIMDE_FLOAT64_C( 542.52) }, { SIMDE_FLOAT64_C( -504.51), SIMDE_FLOAT64_C( -677.16) }, { SIMDE_FLOAT64_C( -298.09), SIMDE_FLOAT64_C( 227.63) }, { SIMDE_FLOAT64_C( 682.75), SIMDE_FLOAT64_C( -144.67), SIMDE_FLOAT64_C( -504.51), SIMDE_FLOAT64_C( -298.09), SIMDE_FLOAT64_C( -595.64), SIMDE_FLOAT64_C( 542.52), SIMDE_FLOAT64_C( -677.16), SIMDE_FLOAT64_C( 227.63), } }, { { SIMDE_FLOAT64_C( -622.24), SIMDE_FLOAT64_C( -594.53) }, { SIMDE_FLOAT64_C( 727.66), SIMDE_FLOAT64_C( 607.06) }, { SIMDE_FLOAT64_C( -166.83), SIMDE_FLOAT64_C( 566.14) }, { SIMDE_FLOAT64_C( -669.54), SIMDE_FLOAT64_C( -749.34) }, { SIMDE_FLOAT64_C( -622.24), SIMDE_FLOAT64_C( 727.66), SIMDE_FLOAT64_C( -166.83), SIMDE_FLOAT64_C( -669.54), SIMDE_FLOAT64_C( -594.53), SIMDE_FLOAT64_C( 607.06), SIMDE_FLOAT64_C( 566.14), SIMDE_FLOAT64_C( -749.34), } }, { { SIMDE_FLOAT64_C( -488.73), SIMDE_FLOAT64_C( 387.89) }, { SIMDE_FLOAT64_C( 537.58), SIMDE_FLOAT64_C( 324.83) }, { SIMDE_FLOAT64_C( -303.58), SIMDE_FLOAT64_C( 436.13) }, { SIMDE_FLOAT64_C( 692.45), SIMDE_FLOAT64_C( 801.32) }, { SIMDE_FLOAT64_C( -488.73), SIMDE_FLOAT64_C( 537.58), SIMDE_FLOAT64_C( -303.58), SIMDE_FLOAT64_C( 692.45), SIMDE_FLOAT64_C( 387.89), SIMDE_FLOAT64_C( 324.83), SIMDE_FLOAT64_C( 436.13), SIMDE_FLOAT64_C( 801.32), } }, { { SIMDE_FLOAT64_C( 960.79), SIMDE_FLOAT64_C( -318.82) }, { SIMDE_FLOAT64_C( 204.07), SIMDE_FLOAT64_C( -880.18) }, { SIMDE_FLOAT64_C( 394.24), SIMDE_FLOAT64_C( 898.70) }, { SIMDE_FLOAT64_C( -539.42), SIMDE_FLOAT64_C( 76.99) }, { SIMDE_FLOAT64_C( 960.79), SIMDE_FLOAT64_C( 204.07), SIMDE_FLOAT64_C( 394.24), SIMDE_FLOAT64_C( -539.42), SIMDE_FLOAT64_C( -318.82), SIMDE_FLOAT64_C( -880.18), SIMDE_FLOAT64_C( 898.70), SIMDE_FLOAT64_C( 76.99), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2x4_t r_ = { { simde_vld1q_f64(test_vec[i].r0), simde_vld1q_f64(test_vec[i].r1), simde_vld1q_f64(test_vec[i].r2), simde_vld1q_f64(test_vec[i].r3), } }; simde_float64_t a_[8]; simde_vst4q_f64(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld4q_f64(a_); simde_test_arm_neon_assert_equal_f64x2(r_.val[0], simde_vld1q_f64(test_vec[i].r0), 1); simde_test_arm_neon_assert_equal_f64x2(r_.val[1], simde_vld1q_f64(test_vec[i].r1), 1); simde_test_arm_neon_assert_equal_f64x2(r_.val[2], simde_vld1q_f64(test_vec[i].r2), 1); simde_test_arm_neon_assert_equal_f64x2(r_.val[3], simde_vld1q_f64(test_vec[i].r3), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float64x2_t a0 = simde_test_arm_neon_random_f64x2(-1000.0f, 1000.0f); simde_float64x2_t a1 = simde_test_arm_neon_random_f64x2(-1000.0f, 1000.0f); simde_float64x2_t a2 = simde_test_arm_neon_random_f64x2(-1000.0f, 1000.0f); simde_float64x2_t a3 = simde_test_arm_neon_random_f64x2(-1000.0f, 1000.0f); simde_float64x2x4_t a = { { a0, a1, a2, a3 } }; simde_test_arm_neon_write_f64x2(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_f64x2(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x2(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_f64x2(2, a3, SIMDE_TEST_VEC_POS_MIDDLE); simde_float64_t buf[8]; simde_vst4q_f64(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_f64(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst4q_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t r0[16]; int8_t r1[16]; int8_t r2[16]; int8_t r3[16]; int8_t a[64]; } test_vec[] = { { { -INT8_C( 104), INT8_C( 69), -INT8_C( 31), INT8_C( 55), -INT8_C( 109), INT8_C( 42), -INT8_C( 54), -INT8_C( 38), INT8_C( 7), -INT8_C( 16), INT8_C( 6), -INT8_C( 40), -INT8_C( 55), INT8_C( 115), INT8_C( 4), -INT8_C( 113) }, { -INT8_C( 64), INT8_C( 107), -INT8_C( 83), INT8_C( 34), INT8_C( 99), INT8_C( 1), -INT8_C( 67), INT8_C( 53), -INT8_C( 37), -INT8_C( 78), -INT8_C( 45), -INT8_C( 28), INT8_C( 122), INT8_MIN, -INT8_C( 26), INT8_C( 19) }, { -INT8_C( 59), -INT8_C( 57), INT8_C( 74), INT8_C( 89), -INT8_C( 15), INT8_C( 20), INT8_C( 51), -INT8_C( 8), INT8_C( 5), INT8_C( 57), -INT8_C( 47), -INT8_C( 50), -INT8_C( 84), -INT8_C( 43), INT8_C( 93), INT8_C( 108) }, { INT8_C( 64), INT8_C( 10), -INT8_C( 113), -INT8_C( 93), INT8_C( 11), INT8_C( 76), -INT8_C( 39), -INT8_C( 25), -INT8_C( 2), -INT8_C( 84), -INT8_C( 53), INT8_C( 121), INT8_C( 45), -INT8_C( 79), -INT8_C( 116), -INT8_C( 14) }, { -INT8_C( 104), -INT8_C( 64), -INT8_C( 59), INT8_C( 64), INT8_C( 69), INT8_C( 107), -INT8_C( 57), INT8_C( 10), -INT8_C( 31), -INT8_C( 83), INT8_C( 74), -INT8_C( 113), INT8_C( 55), INT8_C( 34), INT8_C( 89), -INT8_C( 93), -INT8_C( 109), INT8_C( 99), -INT8_C( 15), INT8_C( 11), INT8_C( 42), INT8_C( 1), INT8_C( 20), INT8_C( 76), -INT8_C( 54), -INT8_C( 67), INT8_C( 51), -INT8_C( 39), -INT8_C( 38), INT8_C( 53), -INT8_C( 8), -INT8_C( 25), INT8_C( 7), -INT8_C( 37), INT8_C( 5), -INT8_C( 2), -INT8_C( 16), -INT8_C( 78), INT8_C( 57), -INT8_C( 84), INT8_C( 6), -INT8_C( 45), -INT8_C( 47), -INT8_C( 53), -INT8_C( 40), -INT8_C( 28), -INT8_C( 50), INT8_C( 121), -INT8_C( 55), INT8_C( 122), -INT8_C( 84), INT8_C( 45), INT8_C( 115), INT8_MIN, -INT8_C( 43), -INT8_C( 79), INT8_C( 4), -INT8_C( 26), INT8_C( 93), -INT8_C( 116), -INT8_C( 113), INT8_C( 19), INT8_C( 108), -INT8_C( 14), } }, { { INT8_C( 120), -INT8_C( 42), INT8_C( 75), INT8_C( 106), -INT8_C( 21), INT8_MAX, INT8_C( 98), -INT8_C( 16), -INT8_C( 72), INT8_C( 51), -INT8_C( 66), INT8_C( 101), INT8_C( 8), INT8_C( 27), -INT8_C( 47), INT8_C( 72) }, { INT8_C( 37), INT8_C( 96), -INT8_C( 20), INT8_C( 48), -INT8_C( 83), -INT8_C( 59), INT8_C( 23), -INT8_C( 85), INT8_C( 113), -INT8_C( 30), INT8_C( 36), -INT8_C( 98), -INT8_C( 109), -INT8_C( 80), -INT8_C( 111), INT8_C( 12) }, { -INT8_C( 121), -INT8_C( 36), INT8_C( 118), INT8_C( 114), INT8_C( 91), -INT8_C( 40), INT8_C( 98), INT8_C( 20), INT8_C( 12), INT8_C( 32), INT8_C( 121), INT8_C( 20), INT8_C( 59), INT8_C( 74), INT8_C( 93), INT8_C( 96) }, { -INT8_C( 85), INT8_C( 73), -INT8_C( 112), INT8_C( 88), INT8_C( 14), -INT8_C( 88), INT8_C( 3), INT8_MAX, -INT8_C( 118), INT8_C( 40), INT8_C( 30), INT8_C( 30), -INT8_C( 40), -INT8_C( 81), INT8_C( 42), INT8_C( 95) }, { INT8_C( 120), INT8_C( 37), -INT8_C( 121), -INT8_C( 85), -INT8_C( 42), INT8_C( 96), -INT8_C( 36), INT8_C( 73), INT8_C( 75), -INT8_C( 20), INT8_C( 118), -INT8_C( 112), INT8_C( 106), INT8_C( 48), INT8_C( 114), INT8_C( 88), -INT8_C( 21), -INT8_C( 83), INT8_C( 91), INT8_C( 14), INT8_MAX, -INT8_C( 59), -INT8_C( 40), -INT8_C( 88), INT8_C( 98), INT8_C( 23), INT8_C( 98), INT8_C( 3), -INT8_C( 16), -INT8_C( 85), INT8_C( 20), INT8_MAX, -INT8_C( 72), INT8_C( 113), INT8_C( 12), -INT8_C( 118), INT8_C( 51), -INT8_C( 30), INT8_C( 32), INT8_C( 40), -INT8_C( 66), INT8_C( 36), INT8_C( 121), INT8_C( 30), INT8_C( 101), -INT8_C( 98), INT8_C( 20), INT8_C( 30), INT8_C( 8), -INT8_C( 109), INT8_C( 59), -INT8_C( 40), INT8_C( 27), -INT8_C( 80), INT8_C( 74), -INT8_C( 81), -INT8_C( 47), -INT8_C( 111), INT8_C( 93), INT8_C( 42), INT8_C( 72), INT8_C( 12), INT8_C( 96), INT8_C( 95), } }, { { -INT8_C( 117), -INT8_C( 96), -INT8_C( 47), -INT8_C( 25), INT8_C( 120), INT8_C( 51), -INT8_C( 5), -INT8_C( 124), INT8_C( 83), INT8_C( 116), -INT8_C( 103), -INT8_C( 114), -INT8_C( 66), -INT8_C( 10), -INT8_C( 18), INT8_C( 105) }, { INT8_C( 63), INT8_MAX, -INT8_C( 63), INT8_C( 77), INT8_C( 39), -INT8_C( 59), -INT8_C( 52), -INT8_C( 79), -INT8_C( 19), -INT8_C( 22), -INT8_C( 49), -INT8_C( 59), -INT8_C( 103), -INT8_C( 7), INT8_C( 37), INT8_C( 37) }, { -INT8_C( 103), -INT8_C( 10), INT8_C( 12), INT8_C( 18), INT8_C( 42), INT8_C( 7), -INT8_C( 106), INT8_C( 125), INT8_C( 123), INT8_C( 47), INT8_C( 12), INT8_C( 57), INT8_C( 37), -INT8_C( 6), -INT8_C( 93), INT8_C( 100) }, { INT8_C( 121), INT8_C( 100), -INT8_C( 79), -INT8_C( 96), INT8_C( 41), INT8_C( 126), INT8_C( 82), INT8_C( 22), INT8_C( 104), INT8_C( 33), -INT8_C( 36), INT8_C( 2), INT8_C( 27), INT8_C( 1), INT8_C( 39), -INT8_C( 76) }, { -INT8_C( 117), INT8_C( 63), -INT8_C( 103), INT8_C( 121), -INT8_C( 96), INT8_MAX, -INT8_C( 10), INT8_C( 100), -INT8_C( 47), -INT8_C( 63), INT8_C( 12), -INT8_C( 79), -INT8_C( 25), INT8_C( 77), INT8_C( 18), -INT8_C( 96), INT8_C( 120), INT8_C( 39), INT8_C( 42), INT8_C( 41), INT8_C( 51), -INT8_C( 59), INT8_C( 7), INT8_C( 126), -INT8_C( 5), -INT8_C( 52), -INT8_C( 106), INT8_C( 82), -INT8_C( 124), -INT8_C( 79), INT8_C( 125), INT8_C( 22), INT8_C( 83), -INT8_C( 19), INT8_C( 123), INT8_C( 104), INT8_C( 116), -INT8_C( 22), INT8_C( 47), INT8_C( 33), -INT8_C( 103), -INT8_C( 49), INT8_C( 12), -INT8_C( 36), -INT8_C( 114), -INT8_C( 59), INT8_C( 57), INT8_C( 2), -INT8_C( 66), -INT8_C( 103), INT8_C( 37), INT8_C( 27), -INT8_C( 10), -INT8_C( 7), -INT8_C( 6), INT8_C( 1), -INT8_C( 18), INT8_C( 37), -INT8_C( 93), INT8_C( 39), INT8_C( 105), INT8_C( 37), INT8_C( 100), -INT8_C( 76), } }, { { -INT8_C( 9), INT8_C( 51), -INT8_C( 58), INT8_C( 33), INT8_C( 58), INT8_C( 93), -INT8_C( 97), -INT8_C( 75), -INT8_C( 116), -INT8_C( 85), -INT8_C( 18), -INT8_C( 78), -INT8_C( 91), -INT8_C( 111), INT8_C( 22), INT8_C( 31) }, { -INT8_C( 10), -INT8_C( 56), -INT8_C( 65), INT8_C( 31), INT8_C( 70), INT8_C( 17), INT8_C( 54), -INT8_C( 82), INT8_C( 51), INT8_C( 18), -INT8_C( 80), INT8_C( 78), INT8_C( 19), -INT8_C( 41), INT8_C( 2), INT8_C( 10) }, { INT8_C( 10), -INT8_C( 55), INT8_C( 44), INT8_C( 68), INT8_C( 38), -INT8_C( 53), -INT8_C( 7), -INT8_C( 78), INT8_C( 118), -INT8_C( 24), INT8_C( 100), INT8_C( 27), INT8_C( 121), INT8_C( 123), INT8_C( 58), INT8_C( 111) }, { INT8_C( 67), -INT8_C( 6), -INT8_C( 113), -INT8_C( 119), INT8_C( 11), -INT8_C( 59), INT8_C( 55), INT8_C( 62), -INT8_C( 41), -INT8_C( 24), -INT8_C( 116), -INT8_C( 22), -INT8_C( 65), -INT8_C( 113), -INT8_C( 12), -INT8_C( 54) }, { -INT8_C( 9), -INT8_C( 10), INT8_C( 10), INT8_C( 67), INT8_C( 51), -INT8_C( 56), -INT8_C( 55), -INT8_C( 6), -INT8_C( 58), -INT8_C( 65), INT8_C( 44), -INT8_C( 113), INT8_C( 33), INT8_C( 31), INT8_C( 68), -INT8_C( 119), INT8_C( 58), INT8_C( 70), INT8_C( 38), INT8_C( 11), INT8_C( 93), INT8_C( 17), -INT8_C( 53), -INT8_C( 59), -INT8_C( 97), INT8_C( 54), -INT8_C( 7), INT8_C( 55), -INT8_C( 75), -INT8_C( 82), -INT8_C( 78), INT8_C( 62), -INT8_C( 116), INT8_C( 51), INT8_C( 118), -INT8_C( 41), -INT8_C( 85), INT8_C( 18), -INT8_C( 24), -INT8_C( 24), -INT8_C( 18), -INT8_C( 80), INT8_C( 100), -INT8_C( 116), -INT8_C( 78), INT8_C( 78), INT8_C( 27), -INT8_C( 22), -INT8_C( 91), INT8_C( 19), INT8_C( 121), -INT8_C( 65), -INT8_C( 111), -INT8_C( 41), INT8_C( 123), -INT8_C( 113), INT8_C( 22), INT8_C( 2), INT8_C( 58), -INT8_C( 12), INT8_C( 31), INT8_C( 10), INT8_C( 111), -INT8_C( 54), } }, { { INT8_C( 88), INT8_C( 32), INT8_C( 14), INT8_C( 126), -INT8_C( 21), INT8_C( 8), INT8_C( 48), INT8_C( 97), -INT8_C( 16), -INT8_C( 107), INT8_C( 125), INT8_C( 105), INT8_C( 16), -INT8_C( 73), -INT8_C( 39), INT8_C( 83) }, { -INT8_C( 79), INT8_C( 104), -INT8_C( 36), -INT8_C( 67), INT8_C( 45), INT8_C( 19), -INT8_C( 5), INT8_C( 4), -INT8_C( 5), -INT8_C( 120), -INT8_C( 18), -INT8_C( 69), INT8_C( 23), -INT8_C( 30), -INT8_C( 123), INT8_C( 111) }, { INT8_C( 3), -INT8_C( 109), -INT8_C( 19), -INT8_C( 18), -INT8_C( 101), INT8_C( 29), INT8_C( 80), -INT8_C( 117), -INT8_C( 78), -INT8_C( 51), -INT8_C( 11), -INT8_C( 62), -INT8_C( 124), -INT8_C( 50), INT8_C( 21), INT8_C( 54) }, { INT8_C( 54), -INT8_C( 15), -INT8_C( 13), INT8_C( 99), INT8_C( 5), -INT8_C( 18), INT8_C( 103), INT8_C( 0), INT8_C( 118), INT8_C( 85), -INT8_C( 69), -INT8_C( 115), INT8_C( 55), INT8_C( 64), -INT8_C( 4), INT8_C( 58) }, { INT8_C( 88), -INT8_C( 79), INT8_C( 3), INT8_C( 54), INT8_C( 32), INT8_C( 104), -INT8_C( 109), -INT8_C( 15), INT8_C( 14), -INT8_C( 36), -INT8_C( 19), -INT8_C( 13), INT8_C( 126), -INT8_C( 67), -INT8_C( 18), INT8_C( 99), -INT8_C( 21), INT8_C( 45), -INT8_C( 101), INT8_C( 5), INT8_C( 8), INT8_C( 19), INT8_C( 29), -INT8_C( 18), INT8_C( 48), -INT8_C( 5), INT8_C( 80), INT8_C( 103), INT8_C( 97), INT8_C( 4), -INT8_C( 117), INT8_C( 0), -INT8_C( 16), -INT8_C( 5), -INT8_C( 78), INT8_C( 118), -INT8_C( 107), -INT8_C( 120), -INT8_C( 51), INT8_C( 85), INT8_C( 125), -INT8_C( 18), -INT8_C( 11), -INT8_C( 69), INT8_C( 105), -INT8_C( 69), -INT8_C( 62), -INT8_C( 115), INT8_C( 16), INT8_C( 23), -INT8_C( 124), INT8_C( 55), -INT8_C( 73), -INT8_C( 30), -INT8_C( 50), INT8_C( 64), -INT8_C( 39), -INT8_C( 123), INT8_C( 21), -INT8_C( 4), INT8_C( 83), INT8_C( 111), INT8_C( 54), INT8_C( 58), } }, { { -INT8_C( 44), -INT8_C( 23), INT8_C( 41), INT8_C( 111), INT8_C( 7), INT8_C( 121), -INT8_C( 5), -INT8_C( 71), INT8_C( 70), -INT8_C( 16), INT8_C( 124), -INT8_C( 54), -INT8_C( 66), -INT8_C( 111), INT8_C( 0), -INT8_C( 12) }, { -INT8_C( 125), -INT8_C( 13), INT8_C( 87), -INT8_C( 120), -INT8_C( 30), -INT8_C( 66), -INT8_C( 120), INT8_C( 88), INT8_C( 19), INT8_C( 68), -INT8_C( 26), INT8_C( 74), -INT8_C( 124), -INT8_C( 30), -INT8_C( 123), INT8_C( 88) }, { -INT8_C( 52), -INT8_C( 82), -INT8_C( 56), -INT8_C( 45), INT8_C( 39), -INT8_C( 61), -INT8_C( 116), INT8_C( 109), -INT8_C( 77), INT8_C( 8), INT8_C( 55), INT8_C( 113), -INT8_C( 102), INT8_C( 56), INT8_C( 101), INT8_C( 29) }, { INT8_C( 43), -INT8_C( 68), -INT8_C( 91), INT8_C( 13), INT8_C( 122), INT8_C( 45), INT8_C( 102), -INT8_C( 115), INT8_C( 113), INT8_C( 76), -INT8_C( 41), -INT8_C( 10), INT8_C( 46), INT8_C( 92), INT8_C( 78), -INT8_C( 6) }, { -INT8_C( 44), -INT8_C( 125), -INT8_C( 52), INT8_C( 43), -INT8_C( 23), -INT8_C( 13), -INT8_C( 82), -INT8_C( 68), INT8_C( 41), INT8_C( 87), -INT8_C( 56), -INT8_C( 91), INT8_C( 111), -INT8_C( 120), -INT8_C( 45), INT8_C( 13), INT8_C( 7), -INT8_C( 30), INT8_C( 39), INT8_C( 122), INT8_C( 121), -INT8_C( 66), -INT8_C( 61), INT8_C( 45), -INT8_C( 5), -INT8_C( 120), -INT8_C( 116), INT8_C( 102), -INT8_C( 71), INT8_C( 88), INT8_C( 109), -INT8_C( 115), INT8_C( 70), INT8_C( 19), -INT8_C( 77), INT8_C( 113), -INT8_C( 16), INT8_C( 68), INT8_C( 8), INT8_C( 76), INT8_C( 124), -INT8_C( 26), INT8_C( 55), -INT8_C( 41), -INT8_C( 54), INT8_C( 74), INT8_C( 113), -INT8_C( 10), -INT8_C( 66), -INT8_C( 124), -INT8_C( 102), INT8_C( 46), -INT8_C( 111), -INT8_C( 30), INT8_C( 56), INT8_C( 92), INT8_C( 0), -INT8_C( 123), INT8_C( 101), INT8_C( 78), -INT8_C( 12), INT8_C( 88), INT8_C( 29), -INT8_C( 6), } }, { { INT8_C( 10), INT8_C( 22), -INT8_C( 51), INT8_C( 49), -INT8_C( 39), INT8_C( 90), -INT8_C( 98), -INT8_C( 116), INT8_C( 98), -INT8_C( 42), -INT8_C( 3), -INT8_C( 4), INT8_C( 14), INT8_C( 98), INT8_C( 25), INT8_C( 57) }, { INT8_C( 30), -INT8_C( 66), INT8_C( 71), -INT8_C( 104), -INT8_C( 20), -INT8_C( 83), INT8_C( 37), INT8_C( 93), -INT8_C( 7), -INT8_C( 3), INT8_C( 83), INT8_C( 39), INT8_C( 89), -INT8_C( 94), INT8_C( 34), INT8_C( 100) }, { -INT8_C( 72), -INT8_C( 17), -INT8_C( 107), -INT8_C( 110), INT8_C( 73), INT8_C( 52), INT8_C( 30), -INT8_C( 84), INT8_C( 10), INT8_C( 28), -INT8_C( 88), INT8_C( 24), INT8_C( 126), -INT8_C( 62), INT8_C( 81), -INT8_C( 99) }, { INT8_MIN, -INT8_C( 104), INT8_C( 53), INT8_C( 108), INT8_C( 69), INT8_C( 91), -INT8_C( 54), INT8_C( 62), INT8_C( 88), INT8_C( 29), INT8_C( 102), -INT8_C( 79), -INT8_C( 65), -INT8_C( 120), INT8_C( 21), INT8_C( 120) }, { INT8_C( 10), INT8_C( 30), -INT8_C( 72), INT8_MIN, INT8_C( 22), -INT8_C( 66), -INT8_C( 17), -INT8_C( 104), -INT8_C( 51), INT8_C( 71), -INT8_C( 107), INT8_C( 53), INT8_C( 49), -INT8_C( 104), -INT8_C( 110), INT8_C( 108), -INT8_C( 39), -INT8_C( 20), INT8_C( 73), INT8_C( 69), INT8_C( 90), -INT8_C( 83), INT8_C( 52), INT8_C( 91), -INT8_C( 98), INT8_C( 37), INT8_C( 30), -INT8_C( 54), -INT8_C( 116), INT8_C( 93), -INT8_C( 84), INT8_C( 62), INT8_C( 98), -INT8_C( 7), INT8_C( 10), INT8_C( 88), -INT8_C( 42), -INT8_C( 3), INT8_C( 28), INT8_C( 29), -INT8_C( 3), INT8_C( 83), -INT8_C( 88), INT8_C( 102), -INT8_C( 4), INT8_C( 39), INT8_C( 24), -INT8_C( 79), INT8_C( 14), INT8_C( 89), INT8_C( 126), -INT8_C( 65), INT8_C( 98), -INT8_C( 94), -INT8_C( 62), -INT8_C( 120), INT8_C( 25), INT8_C( 34), INT8_C( 81), INT8_C( 21), INT8_C( 57), INT8_C( 100), -INT8_C( 99), INT8_C( 120), } }, { { INT8_C( 119), -INT8_C( 85), INT8_C( 10), -INT8_C( 63), -INT8_C( 33), INT8_C( 40), INT8_C( 109), -INT8_C( 23), INT8_C( 68), INT8_C( 21), INT8_C( 1), -INT8_C( 61), -INT8_C( 41), INT8_C( 82), INT8_C( 96), INT8_C( 88) }, { -INT8_C( 21), -INT8_C( 107), -INT8_C( 60), INT8_C( 48), -INT8_C( 16), -INT8_C( 114), INT8_C( 111), INT8_C( 72), -INT8_C( 84), -INT8_C( 43), -INT8_C( 6), INT8_C( 107), INT8_C( 93), INT8_C( 15), -INT8_C( 29), -INT8_C( 44) }, { -INT8_C( 70), -INT8_C( 19), -INT8_C( 107), -INT8_C( 103), INT8_C( 22), INT8_C( 2), -INT8_C( 126), INT8_C( 90), INT8_C( 24), -INT8_C( 125), INT8_C( 29), -INT8_C( 17), -INT8_C( 42), INT8_C( 125), INT8_C( 71), -INT8_C( 63) }, { INT8_C( 19), INT8_C( 12), -INT8_C( 15), INT8_C( 3), -INT8_C( 102), INT8_C( 96), INT8_C( 76), INT8_C( 70), INT8_C( 53), INT8_C( 70), -INT8_C( 78), -INT8_C( 110), INT8_C( 85), -INT8_C( 107), INT8_C( 103), INT8_C( 16) }, { INT8_C( 119), -INT8_C( 21), -INT8_C( 70), INT8_C( 19), -INT8_C( 85), -INT8_C( 107), -INT8_C( 19), INT8_C( 12), INT8_C( 10), -INT8_C( 60), -INT8_C( 107), -INT8_C( 15), -INT8_C( 63), INT8_C( 48), -INT8_C( 103), INT8_C( 3), -INT8_C( 33), -INT8_C( 16), INT8_C( 22), -INT8_C( 102), INT8_C( 40), -INT8_C( 114), INT8_C( 2), INT8_C( 96), INT8_C( 109), INT8_C( 111), -INT8_C( 126), INT8_C( 76), -INT8_C( 23), INT8_C( 72), INT8_C( 90), INT8_C( 70), INT8_C( 68), -INT8_C( 84), INT8_C( 24), INT8_C( 53), INT8_C( 21), -INT8_C( 43), -INT8_C( 125), INT8_C( 70), INT8_C( 1), -INT8_C( 6), INT8_C( 29), -INT8_C( 78), -INT8_C( 61), INT8_C( 107), -INT8_C( 17), -INT8_C( 110), -INT8_C( 41), INT8_C( 93), -INT8_C( 42), INT8_C( 85), INT8_C( 82), INT8_C( 15), INT8_C( 125), -INT8_C( 107), INT8_C( 96), -INT8_C( 29), INT8_C( 71), INT8_C( 103), INT8_C( 88), -INT8_C( 44), -INT8_C( 63), INT8_C( 16), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16x4_t r_ = { { simde_vld1q_s8(test_vec[i].r0), simde_vld1q_s8(test_vec[i].r1), simde_vld1q_s8(test_vec[i].r2), simde_vld1q_s8(test_vec[i].r3), } }; int8_t a_[64]; simde_vst4q_s8(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld4q_s8(a_); simde_test_arm_neon_assert_equal_i8x16(r_.val[0], simde_vld1q_s8(test_vec[i].r0)); simde_test_arm_neon_assert_equal_i8x16(r_.val[1], simde_vld1q_s8(test_vec[i].r1)); simde_test_arm_neon_assert_equal_i8x16(r_.val[2], simde_vld1q_s8(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a0 = simde_test_arm_neon_random_i8x16(); simde_int8x16_t a1 = simde_test_arm_neon_random_i8x16(); simde_int8x16_t a2 = simde_test_arm_neon_random_i8x16(); simde_int8x16_t a3 = simde_test_arm_neon_random_i8x16(); simde_int8x16x4_t a = { { a0, a1, a2, a3 } }; simde_test_arm_neon_write_i8x16(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, a3, SIMDE_TEST_VEC_POS_MIDDLE); int8_t buf[64]; simde_vst4q_s8(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_i8(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst4q_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t r0[8]; int16_t r1[8]; int16_t r2[8]; int16_t r3[8]; int16_t a[32]; } test_vec[] = { { { -INT16_C( 5888), INT16_C( 846), INT16_C( 30499), -INT16_C( 13089), INT16_C( 15915), INT16_C( 21143), INT16_C( 25352), -INT16_C( 16725) }, { -INT16_C( 30030), -INT16_C( 28847), -INT16_C( 11856), -INT16_C( 11620), INT16_C( 12783), INT16_C( 25877), INT16_C( 2830), INT16_C( 3662) }, { -INT16_C( 25356), INT16_C( 6161), -INT16_C( 4077), INT16_C( 16100), INT16_C( 31791), INT16_C( 14225), INT16_C( 15583), -INT16_C( 28171) }, { INT16_C( 18119), INT16_C( 30496), -INT16_C( 17385), INT16_C( 1609), INT16_C( 24301), -INT16_C( 1172), -INT16_C( 17814), INT16_C( 24074) }, { -INT16_C( 5888), -INT16_C( 30030), -INT16_C( 25356), INT16_C( 18119), INT16_C( 846), -INT16_C( 28847), INT16_C( 6161), INT16_C( 30496), INT16_C( 30499), -INT16_C( 11856), -INT16_C( 4077), -INT16_C( 17385), -INT16_C( 13089), -INT16_C( 11620), INT16_C( 16100), INT16_C( 1609), INT16_C( 15915), INT16_C( 12783), INT16_C( 31791), INT16_C( 24301), INT16_C( 21143), INT16_C( 25877), INT16_C( 14225), -INT16_C( 1172), INT16_C( 25352), INT16_C( 2830), INT16_C( 15583), -INT16_C( 17814), -INT16_C( 16725), INT16_C( 3662), -INT16_C( 28171), INT16_C( 24074), } }, { { INT16_C( 6999), INT16_C( 27254), INT16_C( 23308), INT16_C( 15273), INT16_C( 15063), -INT16_C( 18830), INT16_C( 26486), INT16_C( 15687) }, { INT16_C( 26542), -INT16_C( 14924), -INT16_C( 733), INT16_C( 4300), INT16_C( 14428), -INT16_C( 14836), INT16_C( 5874), INT16_C( 18724) }, { -INT16_C( 25807), INT16_C( 15796), INT16_C( 24054), -INT16_C( 12936), -INT16_C( 5481), INT16_C( 3459), -INT16_C( 13742), INT16_C( 75) }, { -INT16_C( 207), INT16_C( 21701), -INT16_C( 28163), INT16_C( 22884), INT16_C( 28873), -INT16_C( 17377), INT16_C( 17286), -INT16_C( 18427) }, { INT16_C( 6999), INT16_C( 26542), -INT16_C( 25807), -INT16_C( 207), INT16_C( 27254), -INT16_C( 14924), INT16_C( 15796), INT16_C( 21701), INT16_C( 23308), -INT16_C( 733), INT16_C( 24054), -INT16_C( 28163), INT16_C( 15273), INT16_C( 4300), -INT16_C( 12936), INT16_C( 22884), INT16_C( 15063), INT16_C( 14428), -INT16_C( 5481), INT16_C( 28873), -INT16_C( 18830), -INT16_C( 14836), INT16_C( 3459), -INT16_C( 17377), INT16_C( 26486), INT16_C( 5874), -INT16_C( 13742), INT16_C( 17286), INT16_C( 15687), INT16_C( 18724), INT16_C( 75), -INT16_C( 18427), } }, { { -INT16_C( 17954), -INT16_C( 11019), INT16_C( 28182), -INT16_C( 21087), INT16_C( 9304), -INT16_C( 21829), INT16_C( 1774), INT16_C( 8106) }, { INT16_C( 28677), INT16_C( 627), -INT16_C( 10239), -INT16_C( 13477), INT16_C( 31304), -INT16_C( 12409), -INT16_C( 29506), -INT16_C( 25465) }, { INT16_C( 31814), INT16_C( 23665), INT16_C( 4842), INT16_C( 17162), -INT16_C( 15049), INT16_C( 9709), -INT16_C( 26421), -INT16_C( 12219) }, { -INT16_C( 18424), INT16_C( 2515), INT16_C( 11920), -INT16_C( 9772), INT16_C( 23465), INT16_C( 26536), INT16_C( 12264), INT16_C( 11779) }, { -INT16_C( 17954), INT16_C( 28677), INT16_C( 31814), -INT16_C( 18424), -INT16_C( 11019), INT16_C( 627), INT16_C( 23665), INT16_C( 2515), INT16_C( 28182), -INT16_C( 10239), INT16_C( 4842), INT16_C( 11920), -INT16_C( 21087), -INT16_C( 13477), INT16_C( 17162), -INT16_C( 9772), INT16_C( 9304), INT16_C( 31304), -INT16_C( 15049), INT16_C( 23465), -INT16_C( 21829), -INT16_C( 12409), INT16_C( 9709), INT16_C( 26536), INT16_C( 1774), -INT16_C( 29506), -INT16_C( 26421), INT16_C( 12264), INT16_C( 8106), -INT16_C( 25465), -INT16_C( 12219), INT16_C( 11779), } }, { { INT16_C( 29867), -INT16_C( 26998), -INT16_C( 27513), -INT16_C( 16679), -INT16_C( 14759), INT16_C( 9443), INT16_C( 10334), INT16_C( 26357) }, { -INT16_C( 14111), INT16_C( 29040), INT16_C( 17654), -INT16_C( 24758), -INT16_C( 3424), -INT16_C( 30714), INT16_C( 2593), -INT16_C( 12874) }, { INT16_C( 16510), INT16_C( 1379), INT16_C( 15573), INT16_C( 11971), -INT16_C( 22782), INT16_C( 24915), INT16_C( 18639), -INT16_C( 20281) }, { INT16_C( 14096), INT16_C( 1570), INT16_C( 27772), INT16_C( 7334), -INT16_C( 21409), -INT16_C( 32604), INT16_C( 23222), INT16_C( 13645) }, { INT16_C( 29867), -INT16_C( 14111), INT16_C( 16510), INT16_C( 14096), -INT16_C( 26998), INT16_C( 29040), INT16_C( 1379), INT16_C( 1570), -INT16_C( 27513), INT16_C( 17654), INT16_C( 15573), INT16_C( 27772), -INT16_C( 16679), -INT16_C( 24758), INT16_C( 11971), INT16_C( 7334), -INT16_C( 14759), -INT16_C( 3424), -INT16_C( 22782), -INT16_C( 21409), INT16_C( 9443), -INT16_C( 30714), INT16_C( 24915), -INT16_C( 32604), INT16_C( 10334), INT16_C( 2593), INT16_C( 18639), INT16_C( 23222), INT16_C( 26357), -INT16_C( 12874), -INT16_C( 20281), INT16_C( 13645), } }, { { -INT16_C( 20326), INT16_C( 28474), -INT16_C( 276), -INT16_C( 4194), -INT16_C( 3675), INT16_C( 29776), INT16_C( 5945), INT16_C( 18725) }, { INT16_C( 18255), -INT16_C( 13489), -INT16_C( 2637), INT16_C( 4839), -INT16_C( 29790), INT16_C( 22675), -INT16_C( 7963), INT16_C( 32653) }, { -INT16_C( 14191), INT16_C( 32239), -INT16_C( 29242), INT16_C( 27500), -INT16_C( 17282), -INT16_C( 18465), INT16_C( 1236), INT16_C( 8960) }, { INT16_C( 20299), -INT16_C( 18), -INT16_C( 10939), -INT16_C( 6383), -INT16_C( 23456), INT16_C( 17727), -INT16_C( 12923), INT16_C( 5828) }, { -INT16_C( 20326), INT16_C( 18255), -INT16_C( 14191), INT16_C( 20299), INT16_C( 28474), -INT16_C( 13489), INT16_C( 32239), -INT16_C( 18), -INT16_C( 276), -INT16_C( 2637), -INT16_C( 29242), -INT16_C( 10939), -INT16_C( 4194), INT16_C( 4839), INT16_C( 27500), -INT16_C( 6383), -INT16_C( 3675), -INT16_C( 29790), -INT16_C( 17282), -INT16_C( 23456), INT16_C( 29776), INT16_C( 22675), -INT16_C( 18465), INT16_C( 17727), INT16_C( 5945), -INT16_C( 7963), INT16_C( 1236), -INT16_C( 12923), INT16_C( 18725), INT16_C( 32653), INT16_C( 8960), INT16_C( 5828), } }, { { -INT16_C( 19563), INT16_C( 23443), INT16_C( 64), -INT16_C( 16698), -INT16_C( 23108), -INT16_C( 28555), INT16_C( 30122), -INT16_C( 2637) }, { -INT16_C( 24123), INT16_C( 2804), INT16_C( 1654), -INT16_C( 10511), INT16_C( 12458), INT16_C( 12059), -INT16_C( 7939), -INT16_C( 28091) }, { -INT16_C( 9837), -INT16_C( 11027), -INT16_C( 19495), -INT16_C( 27246), INT16_C( 2137), INT16_C( 806), -INT16_C( 9859), INT16_C( 17144) }, { -INT16_C( 4741), -INT16_C( 3764), INT16_C( 15859), -INT16_C( 25144), -INT16_C( 7314), INT16_C( 27597), INT16_C( 4803), INT16_C( 22526) }, { -INT16_C( 19563), -INT16_C( 24123), -INT16_C( 9837), -INT16_C( 4741), INT16_C( 23443), INT16_C( 2804), -INT16_C( 11027), -INT16_C( 3764), INT16_C( 64), INT16_C( 1654), -INT16_C( 19495), INT16_C( 15859), -INT16_C( 16698), -INT16_C( 10511), -INT16_C( 27246), -INT16_C( 25144), -INT16_C( 23108), INT16_C( 12458), INT16_C( 2137), -INT16_C( 7314), -INT16_C( 28555), INT16_C( 12059), INT16_C( 806), INT16_C( 27597), INT16_C( 30122), -INT16_C( 7939), -INT16_C( 9859), INT16_C( 4803), -INT16_C( 2637), -INT16_C( 28091), INT16_C( 17144), INT16_C( 22526), } }, { { -INT16_C( 5141), -INT16_C( 15317), -INT16_C( 16993), -INT16_C( 1958), -INT16_C( 32571), INT16_C( 17403), -INT16_C( 3239), -INT16_C( 11131) }, { -INT16_C( 11552), -INT16_C( 11322), -INT16_C( 29169), INT16_C( 32113), INT16_C( 15985), INT16_C( 13801), -INT16_C( 6320), INT16_C( 15500) }, { -INT16_C( 18478), INT16_C( 28928), INT16_C( 23156), INT16_C( 14953), INT16_C( 25818), INT16_C( 13437), INT16_C( 600), INT16_C( 14344) }, { -INT16_C( 12588), -INT16_C( 7156), INT16_C( 32092), -INT16_C( 12703), INT16_C( 19131), INT16_C( 2819), -INT16_C( 28879), INT16_C( 1095) }, { -INT16_C( 5141), -INT16_C( 11552), -INT16_C( 18478), -INT16_C( 12588), -INT16_C( 15317), -INT16_C( 11322), INT16_C( 28928), -INT16_C( 7156), -INT16_C( 16993), -INT16_C( 29169), INT16_C( 23156), INT16_C( 32092), -INT16_C( 1958), INT16_C( 32113), INT16_C( 14953), -INT16_C( 12703), -INT16_C( 32571), INT16_C( 15985), INT16_C( 25818), INT16_C( 19131), INT16_C( 17403), INT16_C( 13801), INT16_C( 13437), INT16_C( 2819), -INT16_C( 3239), -INT16_C( 6320), INT16_C( 600), -INT16_C( 28879), -INT16_C( 11131), INT16_C( 15500), INT16_C( 14344), INT16_C( 1095), } }, { { INT16_C( 18502), -INT16_C( 17803), -INT16_C( 8286), INT16_C( 32244), INT16_C( 28995), -INT16_C( 25679), -INT16_C( 18060), INT16_C( 18644) }, { -INT16_C( 8056), -INT16_C( 7124), -INT16_C( 29091), INT16_C( 6322), -INT16_C( 18984), INT16_C( 2595), INT16_C( 27460), -INT16_C( 30194) }, { -INT16_C( 31821), INT16_C( 21829), INT16_C( 14690), -INT16_C( 22830), -INT16_C( 31829), INT16_C( 8001), INT16_C( 5437), -INT16_C( 15001) }, { -INT16_C( 27403), INT16_C( 21161), INT16_C( 23586), -INT16_C( 1430), -INT16_C( 29167), INT16_C( 22020), INT16_C( 4857), -INT16_C( 21280) }, { INT16_C( 18502), -INT16_C( 8056), -INT16_C( 31821), -INT16_C( 27403), -INT16_C( 17803), -INT16_C( 7124), INT16_C( 21829), INT16_C( 21161), -INT16_C( 8286), -INT16_C( 29091), INT16_C( 14690), INT16_C( 23586), INT16_C( 32244), INT16_C( 6322), -INT16_C( 22830), -INT16_C( 1430), INT16_C( 28995), -INT16_C( 18984), -INT16_C( 31829), -INT16_C( 29167), -INT16_C( 25679), INT16_C( 2595), INT16_C( 8001), INT16_C( 22020), -INT16_C( 18060), INT16_C( 27460), INT16_C( 5437), INT16_C( 4857), INT16_C( 18644), -INT16_C( 30194), -INT16_C( 15001), -INT16_C( 21280), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8x4_t r_ = { { simde_vld1q_s16(test_vec[i].r0), simde_vld1q_s16(test_vec[i].r1), simde_vld1q_s16(test_vec[i].r2), simde_vld1q_s16(test_vec[i].r3), } }; int16_t a_[32]; simde_vst4q_s16(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld4q_s16(a_); simde_test_arm_neon_assert_equal_i16x8(r_.val[0], simde_vld1q_s16(test_vec[i].r0)); simde_test_arm_neon_assert_equal_i16x8(r_.val[1], simde_vld1q_s16(test_vec[i].r1)); simde_test_arm_neon_assert_equal_i16x8(r_.val[2], simde_vld1q_s16(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a0 = simde_test_arm_neon_random_i16x8(); simde_int16x8_t a1 = simde_test_arm_neon_random_i16x8(); simde_int16x8_t a2 = simde_test_arm_neon_random_i16x8(); simde_int16x8_t a3 = simde_test_arm_neon_random_i16x8(); simde_int16x8x4_t a = { { a0, a1, a2, a3 } }; simde_test_arm_neon_write_i16x8(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, a3, SIMDE_TEST_VEC_POS_MIDDLE); int16_t buf[32]; simde_vst4q_s16(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_i16(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst4q_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t r0[4]; int32_t r1[4]; int32_t r2[4]; int32_t r3[4]; int32_t a[16]; } test_vec[] = { { { INT32_C( 828023628), -INT32_C( 521468002), INT32_C( 799178535), INT32_C( 116664159) }, { -INT32_C( 1033881595), INT32_C( 549532486), -INT32_C( 1241418747), -INT32_C( 1122383247) }, { INT32_C( 200242029), -INT32_C( 1561535877), -INT32_C( 1177448871), -INT32_C( 1161837131) }, { INT32_C( 1132273661), INT32_C( 1482899027), INT32_C( 588145842), -INT32_C( 1662965969) }, { INT32_C( 828023628), -INT32_C( 1033881595), INT32_C( 200242029), INT32_C( 1132273661), -INT32_C( 521468002), INT32_C( 549532486), -INT32_C( 1561535877), INT32_C( 1482899027), INT32_C( 799178535), -INT32_C( 1241418747), -INT32_C( 1177448871), INT32_C( 588145842), INT32_C( 116664159), -INT32_C( 1122383247), -INT32_C( 1161837131), -INT32_C( 1662965969), } }, { { INT32_C( 380162203), INT32_C( 62428330), -INT32_C( 675509982), INT32_C( 1284602702) }, { -INT32_C( 292614501), -INT32_C( 12127411), -INT32_C( 2044504745), INT32_C( 388170876) }, { INT32_C( 2116930516), -INT32_C( 2122193569), -INT32_C( 1118290322), INT32_C( 1409935801) }, { INT32_C( 1161992440), -INT32_C( 482047605), INT32_C( 1516857310), INT32_C( 1064471659) }, { INT32_C( 380162203), -INT32_C( 292614501), INT32_C( 2116930516), INT32_C( 1161992440), INT32_C( 62428330), -INT32_C( 12127411), -INT32_C( 2122193569), -INT32_C( 482047605), -INT32_C( 675509982), -INT32_C( 2044504745), -INT32_C( 1118290322), INT32_C( 1516857310), INT32_C( 1284602702), INT32_C( 388170876), INT32_C( 1409935801), INT32_C( 1064471659), } }, { { -INT32_C( 1229086889), -INT32_C( 214483067), INT32_C( 917540733), INT32_C( 1904982393) }, { -INT32_C( 575222446), INT32_C( 885062230), -INT32_C( 846255518), INT32_C( 235733430) }, { INT32_C( 633653920), -INT32_C( 2045117431), INT32_C( 79546763), -INT32_C( 713734013) }, { INT32_C( 1823615765), -INT32_C( 2002750682), INT32_C( 1398091677), -INT32_C( 782147024) }, { -INT32_C( 1229086889), -INT32_C( 575222446), INT32_C( 633653920), INT32_C( 1823615765), -INT32_C( 214483067), INT32_C( 885062230), -INT32_C( 2045117431), -INT32_C( 2002750682), INT32_C( 917540733), -INT32_C( 846255518), INT32_C( 79546763), INT32_C( 1398091677), INT32_C( 1904982393), INT32_C( 235733430), -INT32_C( 713734013), -INT32_C( 782147024), } }, { { INT32_C( 922101293), -INT32_C( 1380118750), INT32_C( 1555200729), -INT32_C( 684644414) }, { INT32_C( 2034492243), -INT32_C( 217979818), INT32_C( 1145525779), -INT32_C( 434788167) }, { -INT32_C( 266597426), -INT32_C( 190916325), INT32_C( 357584979), -INT32_C( 890404489) }, { -INT32_C( 1153224604), INT32_C( 682509332), INT32_C( 1416426907), INT32_C( 1815773598) }, { INT32_C( 922101293), INT32_C( 2034492243), -INT32_C( 266597426), -INT32_C( 1153224604), -INT32_C( 1380118750), -INT32_C( 217979818), -INT32_C( 190916325), INT32_C( 682509332), INT32_C( 1555200729), INT32_C( 1145525779), INT32_C( 357584979), INT32_C( 1416426907), -INT32_C( 684644414), -INT32_C( 434788167), -INT32_C( 890404489), INT32_C( 1815773598), } }, { { -INT32_C( 1487055220), -INT32_C( 2086929616), -INT32_C( 1030100149), -INT32_C( 779254164) }, { -INT32_C( 879963978), -INT32_C( 1326237163), -INT32_C( 838574288), INT32_C( 1815756512) }, { -INT32_C( 1005283436), -INT32_C( 582439022), INT32_C( 127984027), INT32_C( 500706663) }, { INT32_C( 317220093), -INT32_C( 809313377), INT32_C( 446547514), -INT32_C( 1719150844) }, { -INT32_C( 1487055220), -INT32_C( 879963978), -INT32_C( 1005283436), INT32_C( 317220093), -INT32_C( 2086929616), -INT32_C( 1326237163), -INT32_C( 582439022), -INT32_C( 809313377), -INT32_C( 1030100149), -INT32_C( 838574288), INT32_C( 127984027), INT32_C( 446547514), -INT32_C( 779254164), INT32_C( 1815756512), INT32_C( 500706663), -INT32_C( 1719150844), } }, { { INT32_C( 22911855), -INT32_C( 438327990), -INT32_C( 303202426), -INT32_C( 1458846292) }, { -INT32_C( 910363862), INT32_C( 160988879), INT32_C( 1227109701), INT32_C( 2078452492) }, { -INT32_C( 1870839738), INT32_C( 1819696357), -INT32_C( 2024184869), INT32_C( 1378903080) }, { INT32_C( 656141400), -INT32_C( 1338985621), -INT32_C( 168209176), INT32_C( 1165024511) }, { INT32_C( 22911855), -INT32_C( 910363862), -INT32_C( 1870839738), INT32_C( 656141400), -INT32_C( 438327990), INT32_C( 160988879), INT32_C( 1819696357), -INT32_C( 1338985621), -INT32_C( 303202426), INT32_C( 1227109701), -INT32_C( 2024184869), -INT32_C( 168209176), -INT32_C( 1458846292), INT32_C( 2078452492), INT32_C( 1378903080), INT32_C( 1165024511), } }, { { INT32_C( 30862620), INT32_C( 611142729), -INT32_C( 676608081), -INT32_C( 2094343125) }, { INT32_C( 866797000), -INT32_C( 505160711), INT32_C( 802610479), -INT32_C( 713799751) }, { INT32_C( 2127972916), INT32_C( 1168262294), INT32_C( 907890187), -INT32_C( 222673110) }, { -INT32_C( 2061081460), INT32_C( 1869023551), -INT32_C( 1617019418), -INT32_C( 1183575420) }, { INT32_C( 30862620), INT32_C( 866797000), INT32_C( 2127972916), -INT32_C( 2061081460), INT32_C( 611142729), -INT32_C( 505160711), INT32_C( 1168262294), INT32_C( 1869023551), -INT32_C( 676608081), INT32_C( 802610479), INT32_C( 907890187), -INT32_C( 1617019418), -INT32_C( 2094343125), -INT32_C( 713799751), -INT32_C( 222673110), -INT32_C( 1183575420), } }, { { -INT32_C( 214480035), -INT32_C( 1707484785), INT32_C( 1372608039), INT32_C( 692357789) }, { INT32_C( 783248111), INT32_C( 1520244339), -INT32_C( 654754989), -INT32_C( 1416532402) }, { INT32_C( 1218365625), -INT32_C( 907880543), -INT32_C( 904220115), INT32_C( 754212413) }, { INT32_C( 1012573128), INT32_C( 211220665), -INT32_C( 2115727565), -INT32_C( 1238600195) }, { -INT32_C( 214480035), INT32_C( 783248111), INT32_C( 1218365625), INT32_C( 1012573128), -INT32_C( 1707484785), INT32_C( 1520244339), -INT32_C( 907880543), INT32_C( 211220665), INT32_C( 1372608039), -INT32_C( 654754989), -INT32_C( 904220115), -INT32_C( 2115727565), INT32_C( 692357789), -INT32_C( 1416532402), INT32_C( 754212413), -INT32_C( 1238600195), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4x4_t r_ = { { simde_vld1q_s32(test_vec[i].r0), simde_vld1q_s32(test_vec[i].r1), simde_vld1q_s32(test_vec[i].r2), simde_vld1q_s32(test_vec[i].r3), } }; int32_t a_[16]; simde_vst4q_s32(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld4q_s32(a_); simde_test_arm_neon_assert_equal_i32x4(r_.val[0], simde_vld1q_s32(test_vec[i].r0)); simde_test_arm_neon_assert_equal_i32x4(r_.val[1], simde_vld1q_s32(test_vec[i].r1)); simde_test_arm_neon_assert_equal_i32x4(r_.val[2], simde_vld1q_s32(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a0 = simde_test_arm_neon_random_i32x4(); simde_int32x4_t a1 = simde_test_arm_neon_random_i32x4(); simde_int32x4_t a2 = simde_test_arm_neon_random_i32x4(); simde_int32x4_t a3 = simde_test_arm_neon_random_i32x4(); simde_int32x4x4_t a = { { a0, a1, a2, a3 } }; simde_test_arm_neon_write_i32x4(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, a3, SIMDE_TEST_VEC_POS_MIDDLE); int32_t buf[16]; simde_vst4q_s32(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_i32(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst4q_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t r0[2]; int64_t r1[2]; int64_t r2[2]; int64_t r3[2]; int64_t a[8]; } test_vec[] = { { { -INT64_C( 8541829943489260097), INT64_C( 3564085409604377743) }, { INT64_C( 3948167333166941086), -INT64_C( 5324707249671790809) }, { -INT64_C( 8979769786078813461), -INT64_C( 3779948158193827698) }, { -INT64_C( 6995210109496671887), INT64_C( 815857554770381220) }, { -INT64_C( 8541829943489260097), INT64_C( 3948167333166941086), -INT64_C( 8979769786078813461), -INT64_C( 6995210109496671887), INT64_C( 3564085409604377743), -INT64_C( 5324707249671790809), -INT64_C( 3779948158193827698), INT64_C( 815857554770381220), } }, { { INT64_C( 5664476923655507492), INT64_C( 8231509200709555012) }, { INT64_C( 2308117329321948915), INT64_C( 1695339124149153885) }, { -INT64_C( 1050521044186535332), INT64_C( 4058480232873726687) }, { -INT64_C( 1463481250095138283), -INT64_C( 3087517191304090597) }, { INT64_C( 5664476923655507492), INT64_C( 2308117329321948915), -INT64_C( 1050521044186535332), -INT64_C( 1463481250095138283), INT64_C( 8231509200709555012), INT64_C( 1695339124149153885), INT64_C( 4058480232873726687), -INT64_C( 3087517191304090597), } }, { { INT64_C( 141965789005764185), INT64_C( 1075292658120940399) }, { INT64_C( 6136523074534608559), -INT64_C( 2735222639929617989) }, { -INT64_C( 3962084270849000225), -INT64_C( 4018221210646223564) }, { -INT64_C( 1835727935554266121), INT64_C( 5916083336943233472) }, { INT64_C( 141965789005764185), INT64_C( 6136523074534608559), -INT64_C( 3962084270849000225), -INT64_C( 1835727935554266121), INT64_C( 1075292658120940399), -INT64_C( 2735222639929617989), -INT64_C( 4018221210646223564), INT64_C( 5916083336943233472), } }, { { INT64_C( 668036694159981346), INT64_C( 2458530176053002812) }, { INT64_C( 1396636312019346511), -INT64_C( 4212995808429900605) }, { INT64_C( 8018806691179125865), INT64_C( 4870049206829940552) }, { -INT64_C( 8832369474108790521), -INT64_C( 7493351864596184933) }, { INT64_C( 668036694159981346), INT64_C( 1396636312019346511), INT64_C( 8018806691179125865), -INT64_C( 8832369474108790521), INT64_C( 2458530176053002812), -INT64_C( 4212995808429900605), INT64_C( 4870049206829940552), -INT64_C( 7493351864596184933), } }, { { INT64_C( 1279900569163940640), INT64_C( 8274267464691648670) }, { INT64_C( 5425493607493440003), INT64_C( 7764358908210644730) }, { INT64_C( 6028774767652214735), INT64_C( 5470106943301692939) }, { -INT64_C( 1637237753729927861), INT64_C( 3105463238554407010) }, { INT64_C( 1279900569163940640), INT64_C( 5425493607493440003), INT64_C( 6028774767652214735), -INT64_C( 1637237753729927861), INT64_C( 8274267464691648670), INT64_C( 7764358908210644730), INT64_C( 5470106943301692939), INT64_C( 3105463238554407010), } }, { { INT64_C( 7237967780673476457), INT64_C( 6643025027209582492) }, { -INT64_C( 49082622515068551), -INT64_C( 6991904161123245200) }, { -INT64_C( 4298353615892071781), INT64_C( 8282921226878858373) }, { INT64_C( 6464403697443686937), -INT64_C( 4439955325513801073) }, { INT64_C( 7237967780673476457), -INT64_C( 49082622515068551), -INT64_C( 4298353615892071781), INT64_C( 6464403697443686937), INT64_C( 6643025027209582492), -INT64_C( 6991904161123245200), INT64_C( 8282921226878858373), -INT64_C( 4439955325513801073), } }, { { -INT64_C( 3259742678585762089), INT64_C( 6750995090579680069) }, { INT64_C( 5037217593653758628), INT64_C( 6310274718567724) }, { INT64_C( 2641856918791900554), INT64_C( 1283264675808678446) }, { -INT64_C( 2695256297990790244), -INT64_C( 7512774877096598893) }, { -INT64_C( 3259742678585762089), INT64_C( 5037217593653758628), INT64_C( 2641856918791900554), -INT64_C( 2695256297990790244), INT64_C( 6750995090579680069), INT64_C( 6310274718567724), INT64_C( 1283264675808678446), -INT64_C( 7512774877096598893), } }, { { -INT64_C( 5975405342833591023), INT64_C( 2641343072094352682) }, { INT64_C( 6942879150843053778), -INT64_C( 2569624869237061711) }, { INT64_C( 560099869357491608), -INT64_C( 6485106687171763116) }, { INT64_C( 4087439451308324747), INT64_C( 5309266436860401159) }, { -INT64_C( 5975405342833591023), INT64_C( 6942879150843053778), INT64_C( 560099869357491608), INT64_C( 4087439451308324747), INT64_C( 2641343072094352682), -INT64_C( 2569624869237061711), -INT64_C( 6485106687171763116), INT64_C( 5309266436860401159), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2x4_t r_ = { { simde_vld1q_s64(test_vec[i].r0), simde_vld1q_s64(test_vec[i].r1), simde_vld1q_s64(test_vec[i].r2), simde_vld1q_s64(test_vec[i].r3), } }; int64_t a_[8]; simde_vst4q_s64(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld4q_s64(a_); simde_test_arm_neon_assert_equal_i64x2(r_.val[0], simde_vld1q_s64(test_vec[i].r0)); simde_test_arm_neon_assert_equal_i64x2(r_.val[1], simde_vld1q_s64(test_vec[i].r1)); simde_test_arm_neon_assert_equal_i64x2(r_.val[2], simde_vld1q_s64(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a0 = simde_test_arm_neon_random_i64x2(); simde_int64x2_t a1 = simde_test_arm_neon_random_i64x2(); simde_int64x2_t a2 = simde_test_arm_neon_random_i64x2(); simde_int64x2_t a3 = simde_test_arm_neon_random_i64x2(); simde_int64x2x4_t a = { { a0, a1, a2, a3 } }; simde_test_arm_neon_write_i64x2(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, a3, SIMDE_TEST_VEC_POS_MIDDLE); int64_t buf[8]; simde_vst4q_s64(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_i64(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst4q_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t r0[16]; uint8_t r1[16]; uint8_t r2[16]; uint8_t r3[16]; uint8_t a[64]; } test_vec[] = { { { UINT8_C( 22), UINT8_C(175), UINT8_C(135), UINT8_C( 79), UINT8_C(237), UINT8_C(179), UINT8_C( 98), UINT8_C(110), UINT8_C( 15), UINT8_C( 62), UINT8_C(209), UINT8_C(178), UINT8_C(179), UINT8_C(169), UINT8_C( 95), UINT8_C( 83) }, { UINT8_C( 45), UINT8_C(101), UINT8_C(113), UINT8_C(245), UINT8_C( 76), UINT8_C(222), UINT8_C(163), UINT8_C(217), UINT8_C(132), UINT8_C(175), UINT8_C(183), UINT8_C(169), UINT8_C(170), UINT8_C(171), UINT8_C(187), UINT8_C(192) }, { UINT8_C( 90), UINT8_C( 66), UINT8_C( 16), UINT8_C( 71), UINT8_C(246), UINT8_C(114), UINT8_C(182), UINT8_C( 5), UINT8_C(176), UINT8_C(135), UINT8_C(183), UINT8_C( 99), UINT8_C( 48), UINT8_C( 22), UINT8_C(183), UINT8_C( 94) }, { UINT8_C(124), UINT8_C( 40), UINT8_C( 83), UINT8_C(200), UINT8_C( 6), UINT8_C(246), UINT8_C(162), UINT8_C(139), UINT8_C(165), UINT8_C( 89), UINT8_C( 52), UINT8_C( 80), UINT8_C( 4), UINT8_C(240), UINT8_C( 16), UINT8_C( 95) }, { UINT8_C( 22), UINT8_C( 45), UINT8_C( 90), UINT8_C(124), UINT8_C(175), UINT8_C(101), UINT8_C( 66), UINT8_C( 40), UINT8_C(135), UINT8_C(113), UINT8_C( 16), UINT8_C( 83), UINT8_C( 79), UINT8_C(245), UINT8_C( 71), UINT8_C(200), UINT8_C(237), UINT8_C( 76), UINT8_C(246), UINT8_C( 6), UINT8_C(179), UINT8_C(222), UINT8_C(114), UINT8_C(246), UINT8_C( 98), UINT8_C(163), UINT8_C(182), UINT8_C(162), UINT8_C(110), UINT8_C(217), UINT8_C( 5), UINT8_C(139), UINT8_C( 15), UINT8_C(132), UINT8_C(176), UINT8_C(165), UINT8_C( 62), UINT8_C(175), UINT8_C(135), UINT8_C( 89), UINT8_C(209), UINT8_C(183), UINT8_C(183), UINT8_C( 52), UINT8_C(178), UINT8_C(169), UINT8_C( 99), UINT8_C( 80), UINT8_C(179), UINT8_C(170), UINT8_C( 48), UINT8_C( 4), UINT8_C(169), UINT8_C(171), UINT8_C( 22), UINT8_C(240), UINT8_C( 95), UINT8_C(187), UINT8_C(183), UINT8_C( 16), UINT8_C( 83), UINT8_C(192), UINT8_C( 94), UINT8_C( 95), } }, { { UINT8_C( 50), UINT8_C( 32), UINT8_C(166), UINT8_C( 40), UINT8_C(147), UINT8_C( 92), UINT8_C( 45), UINT8_C( 67), UINT8_C(227), UINT8_C(229), UINT8_C(167), UINT8_C( 20), UINT8_C(251), UINT8_C( 94), UINT8_C(114), UINT8_C(119) }, { UINT8_C(134), UINT8_C(197), UINT8_C( 64), UINT8_C(141), UINT8_C(188), UINT8_C(226), UINT8_C( 24), UINT8_C( 97), UINT8_C( 59), UINT8_C( 76), UINT8_C(177), UINT8_C( 64), UINT8_C( 60), UINT8_C(194), UINT8_C(159), UINT8_C(111) }, { UINT8_C(226), UINT8_C( 69), UINT8_C(151), UINT8_C(117), UINT8_C(162), UINT8_C(197), UINT8_C(185), UINT8_C(133), UINT8_C(170), UINT8_C( 96), UINT8_C(153), UINT8_C(165), UINT8_C(190), UINT8_C( 11), UINT8_C( 29), UINT8_C( 68) }, { UINT8_C(209), UINT8_C( 93), UINT8_C(209), UINT8_C(141), UINT8_C( 63), UINT8_C(233), UINT8_C(238), UINT8_C(122), UINT8_C( 54), UINT8_C(160), UINT8_C(186), UINT8_C(114), UINT8_C( 98), UINT8_C( 89), UINT8_C(225), UINT8_C( 68) }, { UINT8_C( 50), UINT8_C(134), UINT8_C(226), UINT8_C(209), UINT8_C( 32), UINT8_C(197), UINT8_C( 69), UINT8_C( 93), UINT8_C(166), UINT8_C( 64), UINT8_C(151), UINT8_C(209), UINT8_C( 40), UINT8_C(141), UINT8_C(117), UINT8_C(141), UINT8_C(147), UINT8_C(188), UINT8_C(162), UINT8_C( 63), UINT8_C( 92), UINT8_C(226), UINT8_C(197), UINT8_C(233), UINT8_C( 45), UINT8_C( 24), UINT8_C(185), UINT8_C(238), UINT8_C( 67), UINT8_C( 97), UINT8_C(133), UINT8_C(122), UINT8_C(227), UINT8_C( 59), UINT8_C(170), UINT8_C( 54), UINT8_C(229), UINT8_C( 76), UINT8_C( 96), UINT8_C(160), UINT8_C(167), UINT8_C(177), UINT8_C(153), UINT8_C(186), UINT8_C( 20), UINT8_C( 64), UINT8_C(165), UINT8_C(114), UINT8_C(251), UINT8_C( 60), UINT8_C(190), UINT8_C( 98), UINT8_C( 94), UINT8_C(194), UINT8_C( 11), UINT8_C( 89), UINT8_C(114), UINT8_C(159), UINT8_C( 29), UINT8_C(225), UINT8_C(119), UINT8_C(111), UINT8_C( 68), UINT8_C( 68), } }, { { UINT8_C(159), UINT8_C(121), UINT8_C(186), UINT8_C( 65), UINT8_C( 62), UINT8_C(115), UINT8_C(198), UINT8_C(232), UINT8_C(211), UINT8_C( 96), UINT8_C(141), UINT8_C(145), UINT8_C(107), UINT8_C(170), UINT8_C(213), UINT8_C( 60) }, { UINT8_C( 7), UINT8_C(167), UINT8_C(201), UINT8_C( 70), UINT8_C(144), UINT8_C(184), UINT8_C(193), UINT8_C(198), UINT8_C( 88), UINT8_C(123), UINT8_C( 57), UINT8_C(186), UINT8_C(213), UINT8_C( 26), UINT8_C(254), UINT8_C(116) }, { UINT8_C(147), UINT8_C(184), UINT8_C(181), UINT8_C(209), UINT8_C( 43), UINT8_C(123), UINT8_C(185), UINT8_C(254), UINT8_C(219), UINT8_C( 71), UINT8_C(143), UINT8_C( 71), UINT8_C(241), UINT8_C(101), UINT8_C(131), UINT8_C(249) }, { UINT8_C( 12), UINT8_C( 77), UINT8_C( 63), UINT8_C(156), UINT8_C( 5), UINT8_C( 0), UINT8_C( 99), UINT8_C( 93), UINT8_C(124), UINT8_C(156), UINT8_C( 23), UINT8_C( 81), UINT8_C(182), UINT8_C( 21), UINT8_C(197), UINT8_C( 74) }, { UINT8_C(159), UINT8_C( 7), UINT8_C(147), UINT8_C( 12), UINT8_C(121), UINT8_C(167), UINT8_C(184), UINT8_C( 77), UINT8_C(186), UINT8_C(201), UINT8_C(181), UINT8_C( 63), UINT8_C( 65), UINT8_C( 70), UINT8_C(209), UINT8_C(156), UINT8_C( 62), UINT8_C(144), UINT8_C( 43), UINT8_C( 5), UINT8_C(115), UINT8_C(184), UINT8_C(123), UINT8_C( 0), UINT8_C(198), UINT8_C(193), UINT8_C(185), UINT8_C( 99), UINT8_C(232), UINT8_C(198), UINT8_C(254), UINT8_C( 93), UINT8_C(211), UINT8_C( 88), UINT8_C(219), UINT8_C(124), UINT8_C( 96), UINT8_C(123), UINT8_C( 71), UINT8_C(156), UINT8_C(141), UINT8_C( 57), UINT8_C(143), UINT8_C( 23), UINT8_C(145), UINT8_C(186), UINT8_C( 71), UINT8_C( 81), UINT8_C(107), UINT8_C(213), UINT8_C(241), UINT8_C(182), UINT8_C(170), UINT8_C( 26), UINT8_C(101), UINT8_C( 21), UINT8_C(213), UINT8_C(254), UINT8_C(131), UINT8_C(197), UINT8_C( 60), UINT8_C(116), UINT8_C(249), UINT8_C( 74), } }, { { UINT8_C(206), UINT8_C(122), UINT8_C( 27), UINT8_C(249), UINT8_C(245), UINT8_C(213), UINT8_C(248), UINT8_C(209), UINT8_C( 28), UINT8_C(135), UINT8_C( 24), UINT8_C( 13), UINT8_C(236), UINT8_C(155), UINT8_C( 6), UINT8_C(248) }, { UINT8_C(232), UINT8_C( 70), UINT8_C(149), UINT8_C(237), UINT8_C( 70), UINT8_C(248), UINT8_C( 74), UINT8_C(194), UINT8_C(148), UINT8_C( 97), UINT8_C( 19), UINT8_C( 74), UINT8_C(119), UINT8_C(216), UINT8_C(148), UINT8_C( 69) }, { UINT8_C( 82), UINT8_C(176), UINT8_C( 62), UINT8_C( 72), UINT8_C(133), UINT8_C( 54), UINT8_C( 25), UINT8_C(161), UINT8_C(190), UINT8_C( 49), UINT8_C(174), UINT8_C(170), UINT8_C(204), UINT8_C(181), UINT8_C(163), UINT8_C(181) }, { UINT8_C(251), UINT8_C( 56), UINT8_C(162), UINT8_C( 65), UINT8_C( 48), UINT8_C(237), UINT8_C( 4), UINT8_C(196), UINT8_C( 78), UINT8_C( 23), UINT8_C( 14), UINT8_C(197), UINT8_C(240), UINT8_C(163), UINT8_C( 10), UINT8_C( 66) }, { UINT8_C(206), UINT8_C(232), UINT8_C( 82), UINT8_C(251), UINT8_C(122), UINT8_C( 70), UINT8_C(176), UINT8_C( 56), UINT8_C( 27), UINT8_C(149), UINT8_C( 62), UINT8_C(162), UINT8_C(249), UINT8_C(237), UINT8_C( 72), UINT8_C( 65), UINT8_C(245), UINT8_C( 70), UINT8_C(133), UINT8_C( 48), UINT8_C(213), UINT8_C(248), UINT8_C( 54), UINT8_C(237), UINT8_C(248), UINT8_C( 74), UINT8_C( 25), UINT8_C( 4), UINT8_C(209), UINT8_C(194), UINT8_C(161), UINT8_C(196), UINT8_C( 28), UINT8_C(148), UINT8_C(190), UINT8_C( 78), UINT8_C(135), UINT8_C( 97), UINT8_C( 49), UINT8_C( 23), UINT8_C( 24), UINT8_C( 19), UINT8_C(174), UINT8_C( 14), UINT8_C( 13), UINT8_C( 74), UINT8_C(170), UINT8_C(197), UINT8_C(236), UINT8_C(119), UINT8_C(204), UINT8_C(240), UINT8_C(155), UINT8_C(216), UINT8_C(181), UINT8_C(163), UINT8_C( 6), UINT8_C(148), UINT8_C(163), UINT8_C( 10), UINT8_C(248), UINT8_C( 69), UINT8_C(181), UINT8_C( 66), } }, { { UINT8_C( 83), UINT8_C( 73), UINT8_C(138), UINT8_C(216), UINT8_C(127), UINT8_C(163), UINT8_C(121), UINT8_C( 61), UINT8_C(212), UINT8_C( 39), UINT8_C(232), UINT8_C(161), UINT8_C(220), UINT8_C(139), UINT8_C( 86), UINT8_C(215) }, { UINT8_C(195), UINT8_C(248), UINT8_C( 25), UINT8_C(243), UINT8_C(229), UINT8_C( 29), UINT8_C(183), UINT8_C( 52), UINT8_C( 52), UINT8_C(197), UINT8_C(249), UINT8_C( 36), UINT8_C(104), UINT8_C( 4), UINT8_C(103), UINT8_C(187) }, { UINT8_C( 77), UINT8_C(241), UINT8_C(147), UINT8_C(204), UINT8_C(149), UINT8_C( 12), UINT8_C( 10), UINT8_C(105), UINT8_C( 52), UINT8_C(242), UINT8_C( 10), UINT8_C( 16), UINT8_C(125), UINT8_C( 96), UINT8_C(232), UINT8_C( 64) }, { UINT8_C( 89), UINT8_C( 1), UINT8_C( 51), UINT8_C( 62), UINT8_C( 30), UINT8_C(234), UINT8_C(114), UINT8_C( 82), UINT8_C(175), UINT8_C(108), UINT8_C(119), UINT8_C( 24), UINT8_C(112), UINT8_C(222), UINT8_C(211), UINT8_C(189) }, { UINT8_C( 83), UINT8_C(195), UINT8_C( 77), UINT8_C( 89), UINT8_C( 73), UINT8_C(248), UINT8_C(241), UINT8_C( 1), UINT8_C(138), UINT8_C( 25), UINT8_C(147), UINT8_C( 51), UINT8_C(216), UINT8_C(243), UINT8_C(204), UINT8_C( 62), UINT8_C(127), UINT8_C(229), UINT8_C(149), UINT8_C( 30), UINT8_C(163), UINT8_C( 29), UINT8_C( 12), UINT8_C(234), UINT8_C(121), UINT8_C(183), UINT8_C( 10), UINT8_C(114), UINT8_C( 61), UINT8_C( 52), UINT8_C(105), UINT8_C( 82), UINT8_C(212), UINT8_C( 52), UINT8_C( 52), UINT8_C(175), UINT8_C( 39), UINT8_C(197), UINT8_C(242), UINT8_C(108), UINT8_C(232), UINT8_C(249), UINT8_C( 10), UINT8_C(119), UINT8_C(161), UINT8_C( 36), UINT8_C( 16), UINT8_C( 24), UINT8_C(220), UINT8_C(104), UINT8_C(125), UINT8_C(112), UINT8_C(139), UINT8_C( 4), UINT8_C( 96), UINT8_C(222), UINT8_C( 86), UINT8_C(103), UINT8_C(232), UINT8_C(211), UINT8_C(215), UINT8_C(187), UINT8_C( 64), UINT8_C(189), } }, { { UINT8_C(207), UINT8_C(103), UINT8_C(137), UINT8_C(100), UINT8_C(115), UINT8_C(147), UINT8_C(206), UINT8_C(167), UINT8_C(133), UINT8_C(216), UINT8_C(184), UINT8_C( 2), UINT8_C( 57), UINT8_C(160), UINT8_C( 66), UINT8_C(146) }, { UINT8_C(161), UINT8_C(117), UINT8_C(208), UINT8_C(191), UINT8_C( 95), UINT8_C( 67), UINT8_C( 17), UINT8_C( 15), UINT8_C(175), UINT8_C(136), UINT8_C( 39), UINT8_C( 31), UINT8_C(102), UINT8_C(250), UINT8_C(220), UINT8_C( 54) }, { UINT8_C( 97), UINT8_C(101), UINT8_C(154), UINT8_C(213), UINT8_C(249), UINT8_C(104), UINT8_C(124), UINT8_C(126), UINT8_C( 65), UINT8_C( 52), UINT8_C(129), UINT8_C(122), UINT8_C(212), UINT8_C(195), UINT8_C( 12), UINT8_C(117) }, { UINT8_C( 57), UINT8_C(220), UINT8_C( 52), UINT8_C(152), UINT8_C( 31), UINT8_C( 70), UINT8_C(167), UINT8_C(206), UINT8_C(206), UINT8_C(206), UINT8_C(237), UINT8_C( 53), UINT8_C(201), UINT8_C(201), UINT8_C(107), UINT8_C( 42) }, { UINT8_C(207), UINT8_C(161), UINT8_C( 97), UINT8_C( 57), UINT8_C(103), UINT8_C(117), UINT8_C(101), UINT8_C(220), UINT8_C(137), UINT8_C(208), UINT8_C(154), UINT8_C( 52), UINT8_C(100), UINT8_C(191), UINT8_C(213), UINT8_C(152), UINT8_C(115), UINT8_C( 95), UINT8_C(249), UINT8_C( 31), UINT8_C(147), UINT8_C( 67), UINT8_C(104), UINT8_C( 70), UINT8_C(206), UINT8_C( 17), UINT8_C(124), UINT8_C(167), UINT8_C(167), UINT8_C( 15), UINT8_C(126), UINT8_C(206), UINT8_C(133), UINT8_C(175), UINT8_C( 65), UINT8_C(206), UINT8_C(216), UINT8_C(136), UINT8_C( 52), UINT8_C(206), UINT8_C(184), UINT8_C( 39), UINT8_C(129), UINT8_C(237), UINT8_C( 2), UINT8_C( 31), UINT8_C(122), UINT8_C( 53), UINT8_C( 57), UINT8_C(102), UINT8_C(212), UINT8_C(201), UINT8_C(160), UINT8_C(250), UINT8_C(195), UINT8_C(201), UINT8_C( 66), UINT8_C(220), UINT8_C( 12), UINT8_C(107), UINT8_C(146), UINT8_C( 54), UINT8_C(117), UINT8_C( 42), } }, { { UINT8_C( 47), UINT8_C( 5), UINT8_MAX, UINT8_C( 40), UINT8_C(110), UINT8_C(124), UINT8_C(166), UINT8_C(175), UINT8_C(176), UINT8_C( 39), UINT8_C( 41), UINT8_C(133), UINT8_C(235), UINT8_C( 53), UINT8_C(250), UINT8_C( 36) }, { UINT8_C( 17), UINT8_C( 47), UINT8_C(188), UINT8_C( 49), UINT8_C(117), UINT8_C(100), UINT8_MAX, UINT8_C( 67), UINT8_C( 50), UINT8_C(237), UINT8_C(120), UINT8_C(251), UINT8_C(182), UINT8_C(227), UINT8_C( 38), UINT8_C(229) }, { UINT8_C(233), UINT8_C( 37), UINT8_C( 13), UINT8_C( 87), UINT8_C(161), UINT8_C(180), UINT8_C( 6), UINT8_C( 82), UINT8_C(219), UINT8_C( 47), UINT8_C(215), UINT8_C(198), UINT8_C(100), UINT8_C(209), UINT8_C(234), UINT8_C(117) }, { UINT8_C( 0), UINT8_C(167), UINT8_C(166), UINT8_C(117), UINT8_C( 11), UINT8_C(166), UINT8_C(185), UINT8_C( 61), UINT8_C(147), UINT8_C( 49), UINT8_C( 57), UINT8_C( 73), UINT8_C( 21), UINT8_C( 95), UINT8_C( 47), UINT8_C(254) }, { UINT8_C( 47), UINT8_C( 17), UINT8_C(233), UINT8_C( 0), UINT8_C( 5), UINT8_C( 47), UINT8_C( 37), UINT8_C(167), UINT8_MAX, UINT8_C(188), UINT8_C( 13), UINT8_C(166), UINT8_C( 40), UINT8_C( 49), UINT8_C( 87), UINT8_C(117), UINT8_C(110), UINT8_C(117), UINT8_C(161), UINT8_C( 11), UINT8_C(124), UINT8_C(100), UINT8_C(180), UINT8_C(166), UINT8_C(166), UINT8_MAX, UINT8_C( 6), UINT8_C(185), UINT8_C(175), UINT8_C( 67), UINT8_C( 82), UINT8_C( 61), UINT8_C(176), UINT8_C( 50), UINT8_C(219), UINT8_C(147), UINT8_C( 39), UINT8_C(237), UINT8_C( 47), UINT8_C( 49), UINT8_C( 41), UINT8_C(120), UINT8_C(215), UINT8_C( 57), UINT8_C(133), UINT8_C(251), UINT8_C(198), UINT8_C( 73), UINT8_C(235), UINT8_C(182), UINT8_C(100), UINT8_C( 21), UINT8_C( 53), UINT8_C(227), UINT8_C(209), UINT8_C( 95), UINT8_C(250), UINT8_C( 38), UINT8_C(234), UINT8_C( 47), UINT8_C( 36), UINT8_C(229), UINT8_C(117), UINT8_C(254), } }, { { UINT8_C(132), UINT8_C( 60), UINT8_C( 85), UINT8_C( 38), UINT8_C(240), UINT8_C( 91), UINT8_C(120), UINT8_C(204), UINT8_C(138), UINT8_C( 79), UINT8_C(146), UINT8_C(238), UINT8_C( 32), UINT8_C(125), UINT8_C( 99), UINT8_C( 33) }, { UINT8_C( 36), UINT8_C( 10), UINT8_C(150), UINT8_C( 47), UINT8_C(176), UINT8_C( 79), UINT8_C(108), UINT8_C( 67), UINT8_C(129), UINT8_C(165), UINT8_C(140), UINT8_C(150), UINT8_C( 4), UINT8_C(187), UINT8_C(148), UINT8_C(137) }, { UINT8_C(248), UINT8_C(233), UINT8_C(175), UINT8_C(232), UINT8_C( 68), UINT8_C( 39), UINT8_C(180), UINT8_C(206), UINT8_C(118), UINT8_C( 71), UINT8_C(188), UINT8_C(150), UINT8_C(196), UINT8_C( 31), UINT8_C(183), UINT8_C(232) }, { UINT8_C( 41), UINT8_C( 78), UINT8_C( 23), UINT8_C(217), UINT8_C(157), UINT8_C(131), UINT8_C( 28), UINT8_C( 30), UINT8_C( 41), UINT8_C(169), UINT8_C(180), UINT8_C( 45), UINT8_C(100), UINT8_C( 72), UINT8_C(182), UINT8_C( 92) }, { UINT8_C(132), UINT8_C( 36), UINT8_C(248), UINT8_C( 41), UINT8_C( 60), UINT8_C( 10), UINT8_C(233), UINT8_C( 78), UINT8_C( 85), UINT8_C(150), UINT8_C(175), UINT8_C( 23), UINT8_C( 38), UINT8_C( 47), UINT8_C(232), UINT8_C(217), UINT8_C(240), UINT8_C(176), UINT8_C( 68), UINT8_C(157), UINT8_C( 91), UINT8_C( 79), UINT8_C( 39), UINT8_C(131), UINT8_C(120), UINT8_C(108), UINT8_C(180), UINT8_C( 28), UINT8_C(204), UINT8_C( 67), UINT8_C(206), UINT8_C( 30), UINT8_C(138), UINT8_C(129), UINT8_C(118), UINT8_C( 41), UINT8_C( 79), UINT8_C(165), UINT8_C( 71), UINT8_C(169), UINT8_C(146), UINT8_C(140), UINT8_C(188), UINT8_C(180), UINT8_C(238), UINT8_C(150), UINT8_C(150), UINT8_C( 45), UINT8_C( 32), UINT8_C( 4), UINT8_C(196), UINT8_C(100), UINT8_C(125), UINT8_C(187), UINT8_C( 31), UINT8_C( 72), UINT8_C( 99), UINT8_C(148), UINT8_C(183), UINT8_C(182), UINT8_C( 33), UINT8_C(137), UINT8_C(232), UINT8_C( 92), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16x4_t r_ = { { simde_vld1q_u8(test_vec[i].r0), simde_vld1q_u8(test_vec[i].r1), simde_vld1q_u8(test_vec[i].r2), simde_vld1q_u8(test_vec[i].r3), } }; uint8_t a_[64]; simde_vst4q_u8(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld4q_u8(a_); simde_test_arm_neon_assert_equal_u8x16(r_.val[0], simde_vld1q_u8(test_vec[i].r0)); simde_test_arm_neon_assert_equal_u8x16(r_.val[1], simde_vld1q_u8(test_vec[i].r1)); simde_test_arm_neon_assert_equal_u8x16(r_.val[2], simde_vld1q_u8(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (size_t i = 0 ; i < 8 ; i++) { simde_uint8x16_t a0 = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t a1 = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t a2 = simde_test_arm_neon_random_u8x16(); simde_uint8x16_t a3 = simde_test_arm_neon_random_u8x16(); simde_uint8x16x4_t a = { { a0, a1, a2, a3 } }; simde_test_arm_neon_write_u8x16(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, a3, SIMDE_TEST_VEC_POS_MIDDLE); uint8_t buf[64]; simde_vst4q_u8(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_u8(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst4q_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t r0[8]; uint16_t r1[8]; uint16_t r2[8]; uint16_t r3[8]; uint16_t a[32]; } test_vec[] = { { { UINT16_C(15069), UINT16_C(48098), UINT16_C(50460), UINT16_C(57042), UINT16_C(60925), UINT16_C(31976), UINT16_C(20855), UINT16_C(12914) }, { UINT16_C(15353), UINT16_C(16232), UINT16_C(47274), UINT16_C(46855), UINT16_C(31075), UINT16_C(46180), UINT16_C(53651), UINT16_C(28697) }, { UINT16_C(64523), UINT16_C(10027), UINT16_C(64961), UINT16_C(48645), UINT16_C(60906), UINT16_C(24890), UINT16_C(44095), UINT16_C(14483) }, { UINT16_C(64487), UINT16_C(37239), UINT16_C(32435), UINT16_C( 5705), UINT16_C(44535), UINT16_C(35531), UINT16_C(58494), UINT16_C(35322) }, { UINT16_C(15069), UINT16_C(15353), UINT16_C(64523), UINT16_C(64487), UINT16_C(48098), UINT16_C(16232), UINT16_C(10027), UINT16_C(37239), UINT16_C(50460), UINT16_C(47274), UINT16_C(64961), UINT16_C(32435), UINT16_C(57042), UINT16_C(46855), UINT16_C(48645), UINT16_C( 5705), UINT16_C(60925), UINT16_C(31075), UINT16_C(60906), UINT16_C(44535), UINT16_C(31976), UINT16_C(46180), UINT16_C(24890), UINT16_C(35531), UINT16_C(20855), UINT16_C(53651), UINT16_C(44095), UINT16_C(58494), UINT16_C(12914), UINT16_C(28697), UINT16_C(14483), UINT16_C(35322), } }, { { UINT16_C( 9696), UINT16_C(41393), UINT16_C(46626), UINT16_C( 3424), UINT16_C(39588), UINT16_C(58222), UINT16_C( 583), UINT16_C(11803) }, { UINT16_C(37629), UINT16_C(45504), UINT16_C( 2321), UINT16_C( 2247), UINT16_C(37558), UINT16_C(13459), UINT16_C(36215), UINT16_C(22461) }, { UINT16_C(28339), UINT16_C(54777), UINT16_C(22821), UINT16_C(51682), UINT16_C(20979), UINT16_C(15020), UINT16_C(51027), UINT16_C(20585) }, { UINT16_C(10585), UINT16_C(27137), UINT16_C(51506), UINT16_C(59507), UINT16_C( 1627), UINT16_C(53788), UINT16_C(55699), UINT16_C(17962) }, { UINT16_C( 9696), UINT16_C(37629), UINT16_C(28339), UINT16_C(10585), UINT16_C(41393), UINT16_C(45504), UINT16_C(54777), UINT16_C(27137), UINT16_C(46626), UINT16_C( 2321), UINT16_C(22821), UINT16_C(51506), UINT16_C( 3424), UINT16_C( 2247), UINT16_C(51682), UINT16_C(59507), UINT16_C(39588), UINT16_C(37558), UINT16_C(20979), UINT16_C( 1627), UINT16_C(58222), UINT16_C(13459), UINT16_C(15020), UINT16_C(53788), UINT16_C( 583), UINT16_C(36215), UINT16_C(51027), UINT16_C(55699), UINT16_C(11803), UINT16_C(22461), UINT16_C(20585), UINT16_C(17962), } }, { { UINT16_C( 9032), UINT16_C(27932), UINT16_C(65148), UINT16_C(28470), UINT16_C(57935), UINT16_C(41642), UINT16_C( 5033), UINT16_C( 755) }, { UINT16_C(62524), UINT16_C(28269), UINT16_C(57533), UINT16_C( 6486), UINT16_C(29414), UINT16_C(31211), UINT16_C( 5451), UINT16_C(37824) }, { UINT16_C(56376), UINT16_C(46080), UINT16_C(14042), UINT16_C(10788), UINT16_C(52760), UINT16_C(49612), UINT16_C(49121), UINT16_C( 7620) }, { UINT16_C(12724), UINT16_C(29067), UINT16_C(57617), UINT16_C(63370), UINT16_C(30291), UINT16_C(40560), UINT16_C(12427), UINT16_C(50226) }, { UINT16_C( 9032), UINT16_C(62524), UINT16_C(56376), UINT16_C(12724), UINT16_C(27932), UINT16_C(28269), UINT16_C(46080), UINT16_C(29067), UINT16_C(65148), UINT16_C(57533), UINT16_C(14042), UINT16_C(57617), UINT16_C(28470), UINT16_C( 6486), UINT16_C(10788), UINT16_C(63370), UINT16_C(57935), UINT16_C(29414), UINT16_C(52760), UINT16_C(30291), UINT16_C(41642), UINT16_C(31211), UINT16_C(49612), UINT16_C(40560), UINT16_C( 5033), UINT16_C( 5451), UINT16_C(49121), UINT16_C(12427), UINT16_C( 755), UINT16_C(37824), UINT16_C( 7620), UINT16_C(50226), } }, { { UINT16_C(12812), UINT16_C(59256), UINT16_C(40041), UINT16_C(33041), UINT16_C(56682), UINT16_C(19267), UINT16_C( 1949), UINT16_C(20840) }, { UINT16_C(62264), UINT16_C(18882), UINT16_C(19924), UINT16_C(10048), UINT16_C(45251), UINT16_C(20166), UINT16_C(63713), UINT16_C(60690) }, { UINT16_C(35626), UINT16_C(37844), UINT16_C(58663), UINT16_C(37397), UINT16_C(22723), UINT16_C(24797), UINT16_C(18015), UINT16_C(38833) }, { UINT16_C(29497), UINT16_C( 3808), UINT16_C( 8384), UINT16_C(33589), UINT16_C(64464), UINT16_C(45522), UINT16_C(58611), UINT16_C( 7839) }, { UINT16_C(12812), UINT16_C(62264), UINT16_C(35626), UINT16_C(29497), UINT16_C(59256), UINT16_C(18882), UINT16_C(37844), UINT16_C( 3808), UINT16_C(40041), UINT16_C(19924), UINT16_C(58663), UINT16_C( 8384), UINT16_C(33041), UINT16_C(10048), UINT16_C(37397), UINT16_C(33589), UINT16_C(56682), UINT16_C(45251), UINT16_C(22723), UINT16_C(64464), UINT16_C(19267), UINT16_C(20166), UINT16_C(24797), UINT16_C(45522), UINT16_C( 1949), UINT16_C(63713), UINT16_C(18015), UINT16_C(58611), UINT16_C(20840), UINT16_C(60690), UINT16_C(38833), UINT16_C( 7839), } }, { { UINT16_C(29551), UINT16_C(38833), UINT16_C(50777), UINT16_C( 7209), UINT16_C( 1566), UINT16_C(32124), UINT16_C(11596), UINT16_C(34324) }, { UINT16_C(62624), UINT16_C(24980), UINT16_C(51476), UINT16_C(58852), UINT16_C(46789), UINT16_C(47254), UINT16_C(13723), UINT16_C( 2774) }, { UINT16_C(34985), UINT16_C( 673), UINT16_C(51790), UINT16_C(27934), UINT16_C(39633), UINT16_C( 7658), UINT16_C(65479), UINT16_C(26531) }, { UINT16_C(14323), UINT16_C( 2248), UINT16_C(44289), UINT16_C(50925), UINT16_C(33635), UINT16_C(65150), UINT16_C(21945), UINT16_C(25097) }, { UINT16_C(29551), UINT16_C(62624), UINT16_C(34985), UINT16_C(14323), UINT16_C(38833), UINT16_C(24980), UINT16_C( 673), UINT16_C( 2248), UINT16_C(50777), UINT16_C(51476), UINT16_C(51790), UINT16_C(44289), UINT16_C( 7209), UINT16_C(58852), UINT16_C(27934), UINT16_C(50925), UINT16_C( 1566), UINT16_C(46789), UINT16_C(39633), UINT16_C(33635), UINT16_C(32124), UINT16_C(47254), UINT16_C( 7658), UINT16_C(65150), UINT16_C(11596), UINT16_C(13723), UINT16_C(65479), UINT16_C(21945), UINT16_C(34324), UINT16_C( 2774), UINT16_C(26531), UINT16_C(25097), } }, { { UINT16_C(43741), UINT16_C(11108), UINT16_C(33397), UINT16_C(18072), UINT16_C(33564), UINT16_C(58211), UINT16_C( 1922), UINT16_C(30026) }, { UINT16_C( 4926), UINT16_C(16253), UINT16_C(27328), UINT16_C( 8965), UINT16_C(34030), UINT16_C(42786), UINT16_C(11225), UINT16_C(46601) }, { UINT16_C(28117), UINT16_C(19169), UINT16_C(31471), UINT16_C( 2960), UINT16_C(62717), UINT16_C(32750), UINT16_C(14587), UINT16_C(14836) }, { UINT16_C(29259), UINT16_C( 2937), UINT16_C(32476), UINT16_C(51759), UINT16_C(20738), UINT16_C(56177), UINT16_C(31356), UINT16_C(20881) }, { UINT16_C(43741), UINT16_C( 4926), UINT16_C(28117), UINT16_C(29259), UINT16_C(11108), UINT16_C(16253), UINT16_C(19169), UINT16_C( 2937), UINT16_C(33397), UINT16_C(27328), UINT16_C(31471), UINT16_C(32476), UINT16_C(18072), UINT16_C( 8965), UINT16_C( 2960), UINT16_C(51759), UINT16_C(33564), UINT16_C(34030), UINT16_C(62717), UINT16_C(20738), UINT16_C(58211), UINT16_C(42786), UINT16_C(32750), UINT16_C(56177), UINT16_C( 1922), UINT16_C(11225), UINT16_C(14587), UINT16_C(31356), UINT16_C(30026), UINT16_C(46601), UINT16_C(14836), UINT16_C(20881), } }, { { UINT16_C(29671), UINT16_C(54940), UINT16_C(11501), UINT16_C(60129), UINT16_C(53024), UINT16_C( 7017), UINT16_C(23816), UINT16_C(21333) }, { UINT16_C(52943), UINT16_C(44127), UINT16_C(36428), UINT16_C(20342), UINT16_C(59615), UINT16_C(23338), UINT16_C(48226), UINT16_C(19116) }, { UINT16_C(18479), UINT16_C( 7200), UINT16_C( 629), UINT16_C(38150), UINT16_C(28625), UINT16_C(55729), UINT16_C( 1740), UINT16_C(39981) }, { UINT16_C(36052), UINT16_C( 8264), UINT16_C(48666), UINT16_C(63855), UINT16_C(39590), UINT16_C( 2388), UINT16_C( 86), UINT16_C(34131) }, { UINT16_C(29671), UINT16_C(52943), UINT16_C(18479), UINT16_C(36052), UINT16_C(54940), UINT16_C(44127), UINT16_C( 7200), UINT16_C( 8264), UINT16_C(11501), UINT16_C(36428), UINT16_C( 629), UINT16_C(48666), UINT16_C(60129), UINT16_C(20342), UINT16_C(38150), UINT16_C(63855), UINT16_C(53024), UINT16_C(59615), UINT16_C(28625), UINT16_C(39590), UINT16_C( 7017), UINT16_C(23338), UINT16_C(55729), UINT16_C( 2388), UINT16_C(23816), UINT16_C(48226), UINT16_C( 1740), UINT16_C( 86), UINT16_C(21333), UINT16_C(19116), UINT16_C(39981), UINT16_C(34131), } }, { { UINT16_C(29513), UINT16_C(48801), UINT16_C(42869), UINT16_C(18259), UINT16_C( 1046), UINT16_C(57888), UINT16_C(19722), UINT16_C(56958) }, { UINT16_C(50905), UINT16_C(62463), UINT16_C(28293), UINT16_C(11244), UINT16_C(16392), UINT16_C(24116), UINT16_C(34625), UINT16_C(35555) }, { UINT16_C(34043), UINT16_C(28744), UINT16_C(39723), UINT16_C(16823), UINT16_C(55456), UINT16_C(43556), UINT16_C(41509), UINT16_C(65417) }, { UINT16_C(34921), UINT16_C(61170), UINT16_C(57334), UINT16_C(65305), UINT16_C(19999), UINT16_C(24669), UINT16_C(16853), UINT16_C(53482) }, { UINT16_C(29513), UINT16_C(50905), UINT16_C(34043), UINT16_C(34921), UINT16_C(48801), UINT16_C(62463), UINT16_C(28744), UINT16_C(61170), UINT16_C(42869), UINT16_C(28293), UINT16_C(39723), UINT16_C(57334), UINT16_C(18259), UINT16_C(11244), UINT16_C(16823), UINT16_C(65305), UINT16_C( 1046), UINT16_C(16392), UINT16_C(55456), UINT16_C(19999), UINT16_C(57888), UINT16_C(24116), UINT16_C(43556), UINT16_C(24669), UINT16_C(19722), UINT16_C(34625), UINT16_C(41509), UINT16_C(16853), UINT16_C(56958), UINT16_C(35555), UINT16_C(65417), UINT16_C(53482), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8x4_t r_ = { { simde_vld1q_u16(test_vec[i].r0), simde_vld1q_u16(test_vec[i].r1), simde_vld1q_u16(test_vec[i].r2), simde_vld1q_u16(test_vec[i].r3), } }; uint16_t a_[32]; simde_vst4q_u16(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld4q_u16(a_); simde_test_arm_neon_assert_equal_u16x8(r_.val[0], simde_vld1q_u16(test_vec[i].r0)); simde_test_arm_neon_assert_equal_u16x8(r_.val[1], simde_vld1q_u16(test_vec[i].r1)); simde_test_arm_neon_assert_equal_u16x8(r_.val[2], simde_vld1q_u16(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (size_t i = 0 ; i < 8 ; i++) { simde_uint16x8_t a0 = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t a1 = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t a2 = simde_test_arm_neon_random_u16x8(); simde_uint16x8_t a3 = simde_test_arm_neon_random_u16x8(); simde_uint16x8x4_t a = { { a0, a1, a2, a3 } }; simde_test_arm_neon_write_u16x8(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, a3, SIMDE_TEST_VEC_POS_MIDDLE); uint16_t buf[32]; simde_vst4q_u16(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])) ; j++) { simde_test_codegen_write_u16(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst4q_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t r0[4]; uint32_t r1[4]; uint32_t r2[4]; uint32_t r3[4]; uint32_t a[16]; } test_vec[] = { { { UINT32_C( 816928038), UINT32_C(4024508959), UINT32_C(2589801649), UINT32_C(2940729031) }, { UINT32_C(3956440918), UINT32_C( 899659297), UINT32_C(1093083275), UINT32_C( 598365436) }, { UINT32_C(3377683625), UINT32_C( 800601470), UINT32_C( 969545329), UINT32_C(1659375884) }, { UINT32_C(2991438481), UINT32_C(4243057777), UINT32_C( 154996236), UINT32_C( 204269667) }, { UINT32_C( 816928038), UINT32_C(3956440918), UINT32_C(3377683625), UINT32_C(2991438481), UINT32_C(4024508959), UINT32_C( 899659297), UINT32_C( 800601470), UINT32_C(4243057777), UINT32_C(2589801649), UINT32_C(1093083275), UINT32_C( 969545329), UINT32_C( 154996236), UINT32_C(2940729031), UINT32_C( 598365436), UINT32_C(1659375884), UINT32_C( 204269667), } }, { { UINT32_C(3268771652), UINT32_C( 653364916), UINT32_C(2959063972), UINT32_C(1578321869) }, { UINT32_C(1930452994), UINT32_C(1500510029), UINT32_C(1751297285), UINT32_C(3648360085) }, { UINT32_C(3264956942), UINT32_C(2095615192), UINT32_C( 355288904), UINT32_C(2440249487) }, { UINT32_C(3976496032), UINT32_C(2135389050), UINT32_C(3051923744), UINT32_C(1183735096) }, { UINT32_C(3268771652), UINT32_C(1930452994), UINT32_C(3264956942), UINT32_C(3976496032), UINT32_C( 653364916), UINT32_C(1500510029), UINT32_C(2095615192), UINT32_C(2135389050), UINT32_C(2959063972), UINT32_C(1751297285), UINT32_C( 355288904), UINT32_C(3051923744), UINT32_C(1578321869), UINT32_C(3648360085), UINT32_C(2440249487), UINT32_C(1183735096), } }, { { UINT32_C(2148018599), UINT32_C(4277989814), UINT32_C(3339921720), UINT32_C( 173573737) }, { UINT32_C(2214026249), UINT32_C(4026678992), UINT32_C( 547810024), UINT32_C(4016452679) }, { UINT32_C( 342847070), UINT32_C(2551343967), UINT32_C(4267648405), UINT32_C(3020470443) }, { UINT32_C(3828809748), UINT32_C( 651508030), UINT32_C(1799781156), UINT32_C( 224046255) }, { UINT32_C(2148018599), UINT32_C(2214026249), UINT32_C( 342847070), UINT32_C(3828809748), UINT32_C(4277989814), UINT32_C(4026678992), UINT32_C(2551343967), UINT32_C( 651508030), UINT32_C(3339921720), UINT32_C( 547810024), UINT32_C(4267648405), UINT32_C(1799781156), UINT32_C( 173573737), UINT32_C(4016452679), UINT32_C(3020470443), UINT32_C( 224046255), } }, { { UINT32_C(2049034523), UINT32_C(3390190389), UINT32_C( 63468120), UINT32_C(1052234026) }, { UINT32_C( 254013137), UINT32_C(1278670888), UINT32_C( 582450291), UINT32_C(1144001065) }, { UINT32_C( 280908251), UINT32_C(3722105221), UINT32_C(1843503939), UINT32_C(1168873588) }, { UINT32_C(2941570695), UINT32_C( 972786374), UINT32_C( 811381255), UINT32_C(2691992772) }, { UINT32_C(2049034523), UINT32_C( 254013137), UINT32_C( 280908251), UINT32_C(2941570695), UINT32_C(3390190389), UINT32_C(1278670888), UINT32_C(3722105221), UINT32_C( 972786374), UINT32_C( 63468120), UINT32_C( 582450291), UINT32_C(1843503939), UINT32_C( 811381255), UINT32_C(1052234026), UINT32_C(1144001065), UINT32_C(1168873588), UINT32_C(2691992772), } }, { { UINT32_C(1655714525), UINT32_C(1178635011), UINT32_C(2729648430), UINT32_C(1088905145) }, { UINT32_C(4109318957), UINT32_C(3442338502), UINT32_C(1644005789), UINT32_C(4076957973) }, { UINT32_C(2807411363), UINT32_C(1810732349), UINT32_C(1879941558), UINT32_C( 766571520) }, { UINT32_C(4112621615), UINT32_C( 667045770), UINT32_C(4002004952), UINT32_C(3571550768) }, { UINT32_C(1655714525), UINT32_C(4109318957), UINT32_C(2807411363), UINT32_C(4112621615), UINT32_C(1178635011), UINT32_C(3442338502), UINT32_C(1810732349), UINT32_C( 667045770), UINT32_C(2729648430), UINT32_C(1644005789), UINT32_C(1879941558), UINT32_C(4002004952), UINT32_C(1088905145), UINT32_C(4076957973), UINT32_C( 766571520), UINT32_C(3571550768), } }, { { UINT32_C(2038117948), UINT32_C(2196007116), UINT32_C( 166916361), UINT32_C( 355967973) }, { UINT32_C(3440007235), UINT32_C(2163592615), UINT32_C(3178135180), UINT32_C(1167150856) }, { UINT32_C(1371409541), UINT32_C(2127864692), UINT32_C(2055718548), UINT32_C(2895101545) }, { UINT32_C(3195705623), UINT32_C(4080955238), UINT32_C(4121996525), UINT32_C(2168078843) }, { UINT32_C(2038117948), UINT32_C(3440007235), UINT32_C(1371409541), UINT32_C(3195705623), UINT32_C(2196007116), UINT32_C(2163592615), UINT32_C(2127864692), UINT32_C(4080955238), UINT32_C( 166916361), UINT32_C(3178135180), UINT32_C(2055718548), UINT32_C(4121996525), UINT32_C( 355967973), UINT32_C(1167150856), UINT32_C(2895101545), UINT32_C(2168078843), } }, { { UINT32_C(3251829069), UINT32_C( 809477788), UINT32_C(3601516397), UINT32_C(2625845637) }, { UINT32_C( 962330067), UINT32_C(1496095084), UINT32_C(1095687238), UINT32_C(1791133981) }, { UINT32_C( 506238338), UINT32_C(2823711547), UINT32_C(3095394610), UINT32_C( 89391666) }, { UINT32_C(1799335935), UINT32_C(2412014409), UINT32_C(1708134984), UINT32_C( 500208539) }, { UINT32_C(3251829069), UINT32_C( 962330067), UINT32_C( 506238338), UINT32_C(1799335935), UINT32_C( 809477788), UINT32_C(1496095084), UINT32_C(2823711547), UINT32_C(2412014409), UINT32_C(3601516397), UINT32_C(1095687238), UINT32_C(3095394610), UINT32_C(1708134984), UINT32_C(2625845637), UINT32_C(1791133981), UINT32_C( 89391666), UINT32_C( 500208539), } }, { { UINT32_C(1664875560), UINT32_C(2584513127), UINT32_C(3042085763), UINT32_C(2361108109) }, { UINT32_C(2683828822), UINT32_C(2905520997), UINT32_C(1762918093), UINT32_C(3112625041) }, { UINT32_C(1176355551), UINT32_C(3487574348), UINT32_C(1099182772), UINT32_C( 801980377) }, { UINT32_C(2681127993), UINT32_C(1280113791), UINT32_C(2360696826), UINT32_C( 558185538) }, { UINT32_C(1664875560), UINT32_C(2683828822), UINT32_C(1176355551), UINT32_C(2681127993), UINT32_C(2584513127), UINT32_C(2905520997), UINT32_C(3487574348), UINT32_C(1280113791), UINT32_C(3042085763), UINT32_C(1762918093), UINT32_C(1099182772), UINT32_C(2360696826), UINT32_C(2361108109), UINT32_C(3112625041), UINT32_C( 801980377), UINT32_C( 558185538), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4x4_t r_ = { { simde_vld1q_u32(test_vec[i].r0), simde_vld1q_u32(test_vec[i].r1), simde_vld1q_u32(test_vec[i].r2), simde_vld1q_u32(test_vec[i].r3), } }; uint32_t a_[16]; simde_vst4q_u32(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld4q_u32(a_); simde_test_arm_neon_assert_equal_u32x4(r_.val[0], simde_vld1q_u32(test_vec[i].r0)); simde_test_arm_neon_assert_equal_u32x4(r_.val[1], simde_vld1q_u32(test_vec[i].r1)); simde_test_arm_neon_assert_equal_u32x4(r_.val[2], simde_vld1q_u32(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (size_t i = 0 ; i < 8 ; i++) { simde_uint32x4_t a0 = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t a1 = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t a2 = simde_test_arm_neon_random_u32x4(); simde_uint32x4_t a3 = simde_test_arm_neon_random_u32x4(); simde_uint32x4x4_t a = { { a0, a1, a2, a3 } }; simde_test_arm_neon_write_u32x4(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, a3, SIMDE_TEST_VEC_POS_MIDDLE); uint32_t buf[16]; simde_vst4q_u32(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])); j++) { simde_test_codegen_write_u32(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } static int test_simde_vst4q_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t r0[2]; uint64_t r1[2]; uint64_t r2[2]; uint64_t r3[2]; uint64_t a[8]; } test_vec[] = { { { UINT64_C(10037403692393182480), UINT64_C( 8361711425298189690) }, { UINT64_C(12218369200614863841), UINT64_C( 2381342151739302120) }, { UINT64_C(15453747378136373104), UINT64_C( 6105776580658310922) }, { UINT64_C( 4376731735500977414), UINT64_C( 8959977481816756774) }, { UINT64_C(10037403692393182480), UINT64_C(12218369200614863841), UINT64_C(15453747378136373104), UINT64_C( 4376731735500977414), UINT64_C( 8361711425298189690), UINT64_C( 2381342151739302120), UINT64_C( 6105776580658310922), UINT64_C( 8959977481816756774), } }, { { UINT64_C(10431987683485534611), UINT64_C(16741170512735555212) }, { UINT64_C( 8510035628354374282), UINT64_C( 1734736352801278346) }, { UINT64_C(16421474816743668430), UINT64_C(15880597538443869594) }, { UINT64_C(11675532657163853633), UINT64_C( 9779932425969316892) }, { UINT64_C(10431987683485534611), UINT64_C( 8510035628354374282), UINT64_C(16421474816743668430), UINT64_C(11675532657163853633), UINT64_C(16741170512735555212), UINT64_C( 1734736352801278346), UINT64_C(15880597538443869594), UINT64_C( 9779932425969316892), } }, { { UINT64_C( 2738962131928727751), UINT64_C( 6749212612852049276) }, { UINT64_C( 5989212698123854948), UINT64_C( 7703879554845398697) }, { UINT64_C( 8878023709869165), UINT64_C( 3019797439513018479) }, { UINT64_C( 8387887008481400541), UINT64_C( 8441644595801381657) }, { UINT64_C( 2738962131928727751), UINT64_C( 5989212698123854948), UINT64_C( 8878023709869165), UINT64_C( 8387887008481400541), UINT64_C( 6749212612852049276), UINT64_C( 7703879554845398697), UINT64_C( 3019797439513018479), UINT64_C( 8441644595801381657), } }, { { UINT64_C(12107801399286214275), UINT64_C( 3115396163888797005) }, { UINT64_C(13633423723477328371), UINT64_C( 5512018679744235777) }, { UINT64_C(14352790015444249677), UINT64_C( 8262441203209495344) }, { UINT64_C(10473512980685384184), UINT64_C(13727844063193584237) }, { UINT64_C(12107801399286214275), UINT64_C(13633423723477328371), UINT64_C(14352790015444249677), UINT64_C(10473512980685384184), UINT64_C( 3115396163888797005), UINT64_C( 5512018679744235777), UINT64_C( 8262441203209495344), UINT64_C(13727844063193584237), } }, { { UINT64_C( 7009417625986258438), UINT64_C( 3443434260875124951) }, { UINT64_C(14669389872461773427), UINT64_C( 5703111053441904630) }, { UINT64_C(13109158135934642779), UINT64_C(14527439590170348853) }, { UINT64_C( 9293285077180769999), UINT64_C(15791886936142510736) }, { UINT64_C( 7009417625986258438), UINT64_C(14669389872461773427), UINT64_C(13109158135934642779), UINT64_C( 9293285077180769999), UINT64_C( 3443434260875124951), UINT64_C( 5703111053441904630), UINT64_C(14527439590170348853), UINT64_C(15791886936142510736), } }, { { UINT64_C( 4686933348686755703), UINT64_C(13493428713541211938) }, { UINT64_C( 52883688490798768), UINT64_C( 2825448768405086939) }, { UINT64_C( 1461005702183886329), UINT64_C(17170201446903051424) }, { UINT64_C(17099573023644070109), UINT64_C( 2403935012266714758) }, { UINT64_C( 4686933348686755703), UINT64_C( 52883688490798768), UINT64_C( 1461005702183886329), UINT64_C(17099573023644070109), UINT64_C(13493428713541211938), UINT64_C( 2825448768405086939), UINT64_C(17170201446903051424), UINT64_C( 2403935012266714758), } }, { { UINT64_C( 153805605195667739), UINT64_C( 6669222462536724321) }, { UINT64_C( 2235463927463523847), UINT64_C(16674210156985535772) }, { UINT64_C( 6624539943386844555), UINT64_C( 6602708424234597831) }, { UINT64_C( 6771013884191132222), UINT64_C( 9905958744852942022) }, { UINT64_C( 153805605195667739), UINT64_C( 2235463927463523847), UINT64_C( 6624539943386844555), UINT64_C( 6771013884191132222), UINT64_C( 6669222462536724321), UINT64_C(16674210156985535772), UINT64_C( 6602708424234597831), UINT64_C( 9905958744852942022), } }, { { UINT64_C( 5542635390721486485), UINT64_C(12506834188639594210) }, { UINT64_C( 4907653102754399614), UINT64_C( 6579846770763152841) }, { UINT64_C( 977235125474150077), UINT64_C(15264904023753630140) }, { UINT64_C(11717712954493328602), UINT64_C( 9262982781576246122) }, { UINT64_C( 5542635390721486485), UINT64_C( 4907653102754399614), UINT64_C( 977235125474150077), UINT64_C(11717712954493328602), UINT64_C(12506834188639594210), UINT64_C( 6579846770763152841), UINT64_C(15264904023753630140), UINT64_C( 9262982781576246122), } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2x4_t r_ = { { simde_vld1q_u64(test_vec[i].r0), simde_vld1q_u64(test_vec[i].r1), simde_vld1q_u64(test_vec[i].r2), simde_vld1q_u64(test_vec[i].r3), } }; uint64_t a_[8]; simde_vst4q_u64(a_, r_); simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); r_ = simde_vld4q_u64(a_); simde_test_arm_neon_assert_equal_u64x2(r_.val[0], simde_vld1q_u64(test_vec[i].r0)); simde_test_arm_neon_assert_equal_u64x2(r_.val[1], simde_vld1q_u64(test_vec[i].r1)); simde_test_arm_neon_assert_equal_u64x2(r_.val[2], simde_vld1q_u64(test_vec[i].r2)); } return 0; #else fputc('\n', stdout); for (size_t i = 0 ; i < 8 ; i++) { simde_uint64x2_t a0 = simde_test_arm_neon_random_u64x2(); simde_uint64x2_t a1 = simde_test_arm_neon_random_u64x2(); simde_uint64x2_t a2 = simde_test_arm_neon_random_u64x2(); simde_uint64x2_t a3 = simde_test_arm_neon_random_u64x2(); simde_uint64x2x4_t a = { { a0, a1, a2, a3 } }; simde_test_arm_neon_write_u64x2(2, a0, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, a1, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, a2, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, a3, SIMDE_TEST_VEC_POS_MIDDLE); uint64_t buf[8]; simde_vst4q_u64(buf, a); printf(" {\n"); for (size_t j = 0; j < (sizeof(buf) / sizeof(buf[0])); j++) { simde_test_codegen_write_u64(2, buf[j], SIMDE_TEST_VEC_POS_MIDDLE); } printf(" }\n },\n"); } return 1; #endif } #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_TEST_FUNC_LIST_BEGIN #if !defined(SIMDE_BUG_INTEL_857088) SIMDE_TEST_FUNC_LIST_ENTRY(vst4_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vst4_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vst4_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vst4_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vst4_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vst4_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vst4_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vst4_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vst4_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vst4_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vst4q_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vst4q_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vst4q_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vst4q_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vst4q_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vst4q_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vst4q_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vst4q_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vst4q_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vst4q_u64) #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/sub.c000066400000000000000000001654301400333146700162650ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN sub #include "test-neon.h" #include "../../../simde/arm/neon/sub.h" static int test_simde_vsub_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[2]; simde_float32 b[2]; simde_float32 r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( 360.99), SIMDE_FLOAT32_C( 722.92) }, { SIMDE_FLOAT32_C( 682.14), SIMDE_FLOAT32_C( 426.96) }, { SIMDE_FLOAT32_C( -321.15), SIMDE_FLOAT32_C( 295.96) } }, { { SIMDE_FLOAT32_C( -11.59), SIMDE_FLOAT32_C( 342.55) }, { SIMDE_FLOAT32_C( 590.06), SIMDE_FLOAT32_C( 965.15) }, { SIMDE_FLOAT32_C( -601.65), SIMDE_FLOAT32_C( -622.60) } }, { { SIMDE_FLOAT32_C( 547.91), SIMDE_FLOAT32_C( -219.14) }, { SIMDE_FLOAT32_C( -886.88), SIMDE_FLOAT32_C( 282.47) }, { SIMDE_FLOAT32_C( 1434.79), SIMDE_FLOAT32_C( -501.61) } }, { { SIMDE_FLOAT32_C( 13.06), SIMDE_FLOAT32_C( 946.64) }, { SIMDE_FLOAT32_C( 963.27), SIMDE_FLOAT32_C( 959.34) }, { SIMDE_FLOAT32_C( -950.22), SIMDE_FLOAT32_C( -12.70) } }, { { SIMDE_FLOAT32_C( 829.13), SIMDE_FLOAT32_C( -382.80) }, { SIMDE_FLOAT32_C( -854.14), SIMDE_FLOAT32_C( 146.76) }, { SIMDE_FLOAT32_C( 1683.27), SIMDE_FLOAT32_C( -529.55) } }, { { SIMDE_FLOAT32_C( 73.22), SIMDE_FLOAT32_C( -493.34) }, { SIMDE_FLOAT32_C( 912.86), SIMDE_FLOAT32_C( 976.77) }, { SIMDE_FLOAT32_C( -839.64), SIMDE_FLOAT32_C( -1470.11) } }, { { SIMDE_FLOAT32_C( 314.03), SIMDE_FLOAT32_C( -507.33) }, { SIMDE_FLOAT32_C( 923.09), SIMDE_FLOAT32_C( -827.02) }, { SIMDE_FLOAT32_C( -609.06), SIMDE_FLOAT32_C( 319.69) } }, { { SIMDE_FLOAT32_C( -988.72), SIMDE_FLOAT32_C( -917.98) }, { SIMDE_FLOAT32_C( -195.14), SIMDE_FLOAT32_C( 372.28) }, { SIMDE_FLOAT32_C( -793.58), SIMDE_FLOAT32_C( -1290.26) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a, b, r; a = simde_vld1_f32(test_vec[i].a); b = simde_vld1_f32(test_vec[i].b); r = simde_vsub_f32(a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vsub_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[1]; simde_float64 b[1]; simde_float64 r[1]; } test_vec[] = { { { SIMDE_FLOAT64_C( -464.36) }, { SIMDE_FLOAT64_C( 759.96) }, { SIMDE_FLOAT64_C( -1224.33) } }, { { SIMDE_FLOAT64_C( 959.10) }, { SIMDE_FLOAT64_C( 297.97) }, { SIMDE_FLOAT64_C( 661.13) } }, { { SIMDE_FLOAT64_C( 736.45) }, { SIMDE_FLOAT64_C( -164.58) }, { SIMDE_FLOAT64_C( 901.03) } }, { { SIMDE_FLOAT64_C( 849.22) }, { SIMDE_FLOAT64_C( 985.27) }, { SIMDE_FLOAT64_C( -136.05) } }, { { SIMDE_FLOAT64_C( 917.24) }, { SIMDE_FLOAT64_C( 640.75) }, { SIMDE_FLOAT64_C( 276.49) } }, { { SIMDE_FLOAT64_C( 422.18) }, { SIMDE_FLOAT64_C( 767.10) }, { SIMDE_FLOAT64_C( -344.92) } }, { { SIMDE_FLOAT64_C( -365.38) }, { SIMDE_FLOAT64_C( 691.56) }, { SIMDE_FLOAT64_C( -1056.93) } }, { { SIMDE_FLOAT64_C( 655.10) }, { SIMDE_FLOAT64_C( 559.29) }, { SIMDE_FLOAT64_C( 95.82) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); simde_float64x1_t b = simde_vld1_f64(test_vec[i].b); simde_float64x1_t r = simde_vsub_f64(a, b); simde_test_arm_neon_assert_equal_f64x1(r, simde_vld1_f64(test_vec[i].r), 1); } return 0; } static int test_simde_vsub_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int8_t b[8]; int8_t r[8]; } test_vec[] = { { { -INT8_C( 86), INT8_C( 59), -INT8_C( 116), -INT8_C( 123), -INT8_C( 44), -INT8_C( 120), INT8_C( 28), -INT8_C( 104) }, { -INT8_C( 122), INT8_C( 123), -INT8_C( 26), INT8_C( 51), INT8_C( 71), -INT8_C( 64), INT8_C( 123), INT8_C( 12) }, { INT8_C( 36), -INT8_C( 64), -INT8_C( 90), INT8_C( 82), -INT8_C( 115), -INT8_C( 56), -INT8_C( 95), -INT8_C( 116) } }, { { -INT8_C( 23), -INT8_C( 9), -INT8_C( 81), -INT8_C( 94), INT8_C( 57), -INT8_C( 16), -INT8_C( 81), -INT8_C( 110) }, { -INT8_C( 28), INT8_C( 125), -INT8_C( 21), -INT8_C( 64), INT8_C( 43), INT8_C( 80), INT8_C( 73), -INT8_C( 43) }, { INT8_C( 5), INT8_C( 122), -INT8_C( 60), -INT8_C( 30), INT8_C( 14), -INT8_C( 96), INT8_C( 102), -INT8_C( 67) } }, { { -INT8_C( 116), -INT8_C( 43), INT8_C( 91), INT8_C( 96), INT8_C( 94), INT8_C( 119), -INT8_C( 8), -INT8_C( 28) }, { -INT8_C( 14), -INT8_C( 34), INT8_C( 24), INT8_C( 57), -INT8_C( 98), -INT8_C( 109), INT8_C( 69), -INT8_C( 121) }, { -INT8_C( 102), -INT8_C( 9), INT8_C( 67), INT8_C( 39), -INT8_C( 64), -INT8_C( 28), -INT8_C( 77), INT8_C( 93) } }, { { -INT8_C( 118), -INT8_C( 12), INT8_C( 42), -INT8_C( 60), -INT8_C( 28), -INT8_C( 39), INT8_C( 86), -INT8_C( 55) }, { INT8_C( 86), INT8_C( 65), -INT8_C( 119), -INT8_C( 126), -INT8_C( 110), -INT8_C( 46), INT8_C( 87), INT8_C( 30) }, { INT8_C( 52), -INT8_C( 77), -INT8_C( 95), INT8_C( 66), INT8_C( 82), INT8_C( 7), -INT8_C( 1), -INT8_C( 85) } }, { { -INT8_C( 89), -INT8_C( 78), INT8_C( 126), INT8_C( 5), INT8_C( 41), INT8_C( 118), -INT8_C( 22), INT8_C( 27) }, { INT8_C( 84), INT8_C( 2), INT8_C( 84), -INT8_C( 13), -INT8_C( 107), -INT8_C( 102), INT8_C( 122), INT8_C( 31) }, { INT8_C( 83), -INT8_C( 80), INT8_C( 42), INT8_C( 18), -INT8_C( 108), -INT8_C( 36), INT8_C( 112), -INT8_C( 4) } }, { { -INT8_C( 114), -INT8_C( 92), -INT8_C( 29), INT8_C( 115), INT8_C( 126), INT8_C( 57), INT8_C( 60), -INT8_C( 44) }, { INT8_C( 123), -INT8_C( 59), INT8_C( 86), INT8_C( 13), -INT8_C( 105), -INT8_C( 82), INT8_C( 43), INT8_C( 62) }, { INT8_C( 19), -INT8_C( 33), -INT8_C( 115), INT8_C( 102), -INT8_C( 25), -INT8_C( 117), INT8_C( 17), -INT8_C( 106) } }, { { INT8_C( 96), -INT8_C( 87), INT8_C( 68), -INT8_C( 118), INT8_C( 31), INT8_C( 46), -INT8_C( 91), INT8_C( 115) }, { INT8_C( 48), -INT8_C( 6), INT8_C( 102), -INT8_C( 59), -INT8_C( 108), -INT8_C( 31), -INT8_C( 28), INT8_C( 34) }, { INT8_C( 48), -INT8_C( 81), -INT8_C( 34), -INT8_C( 59), -INT8_C( 117), INT8_C( 77), -INT8_C( 63), INT8_C( 81) } }, { { -INT8_C( 123), -INT8_C( 56), -INT8_C( 107), INT8_C( 3), INT8_C( 1), -INT8_C( 47), -INT8_C( 40), INT8_C( 124) }, { -INT8_C( 106), INT8_C( 46), -INT8_C( 119), INT8_C( 45), -INT8_C( 36), -INT8_C( 76), INT8_C( 108), INT8_C( 61) }, { -INT8_C( 17), -INT8_C( 102), INT8_C( 12), -INT8_C( 42), INT8_C( 37), INT8_C( 29), INT8_C( 108), INT8_C( 63) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vsub_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; } static int test_simde_vsub_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { { { -INT16_C( 24371), INT16_C( 25777), -INT16_C( 6138), -INT16_C( 26958) }, { INT16_C( 31782), -INT16_C( 21475), -INT16_C( 30815), INT16_C( 16870) }, { INT16_C( 9383), -INT16_C( 18284), INT16_C( 24677), INT16_C( 21708) } }, { { INT16_C( 30389), INT16_C( 1729), -INT16_C( 20391), INT16_C( 4694) }, { INT16_C( 28093), -INT16_C( 9420), INT16_C( 22129), INT16_C( 16024) }, { INT16_C( 2296), INT16_C( 11149), INT16_C( 23016), -INT16_C( 11330) } }, { { INT16_C( 19190), -INT16_C( 606), INT16_C( 21810), INT16_C( 22675) }, { -INT16_C( 20271), INT16_C( 29444), -INT16_C( 5321), -INT16_C( 4684) }, { -INT16_C( 26075), -INT16_C( 30050), INT16_C( 27131), INT16_C( 27359) } }, { { INT16_C( 30049), -INT16_C( 17677), INT16_C( 18982), -INT16_C( 7220) }, { INT16_C( 183), INT16_C( 10686), INT16_C( 22358), INT16_C( 19815) }, { INT16_C( 29866), -INT16_C( 28363), -INT16_C( 3376), -INT16_C( 27035) } }, { { INT16_C( 2721), -INT16_C( 11446), -INT16_C( 8865), INT16_C( 12331) }, { INT16_C( 12429), -INT16_C( 15197), INT16_C( 22555), INT16_C( 31921) }, { -INT16_C( 9708), INT16_C( 3751), -INT16_C( 31420), -INT16_C( 19590) } }, { { -INT16_C( 23091), -INT16_C( 3274), INT16_C( 751), -INT16_C( 22825) }, { -INT16_C( 27389), INT16_C( 22991), INT16_C( 14316), -INT16_C( 29274) }, { INT16_C( 4298), -INT16_C( 26265), -INT16_C( 13565), INT16_C( 6449) } }, { { -INT16_C( 4031), -INT16_C( 24480), -INT16_C( 29491), INT16_C( 23248) }, { INT16_C( 29884), -INT16_C( 10465), -INT16_C( 12084), -INT16_C( 26285) }, { INT16_C( 31621), -INT16_C( 14015), -INT16_C( 17407), -INT16_C( 16003) } }, { { -INT16_C( 30347), INT16_C( 25741), INT16_C( 25739), -INT16_C( 29173) }, { -INT16_C( 9479), -INT16_C( 6424), -INT16_C( 29167), INT16_C( 21107) }, { -INT16_C( 20868), INT16_C( 32165), -INT16_C( 10630), INT16_C( 15256) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_vsub_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; } static int test_simde_vsub_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { { { -INT32_C( 2124589663), -INT32_C( 360776502) }, { INT32_C( 2043609956), -INT32_C( 1270893877) }, { INT32_C( 126767677), INT32_C( 910117375) } }, { { INT32_C( 343997614), INT32_C( 705859580) }, { INT32_C( 1578649653), INT32_C( 517998717) }, { -INT32_C( 1234652039), INT32_C( 187860863) } }, { { INT32_C( 664747613), -INT32_C( 1642980038) }, { -INT32_C( 350756576), INT32_C( 1100961427) }, { INT32_C( 1015504189), INT32_C( 1551025831) } }, { { INT32_C( 1314267218), -INT32_C( 461870929) }, { INT32_C( 759337136), -INT32_C( 162847847) }, { INT32_C( 554930082), -INT32_C( 299023082) } }, { { -INT32_C( 1692472479), INT32_C( 691613704) }, { -INT32_C( 1525395439), -INT32_C( 102320986) }, { -INT32_C( 167077040), INT32_C( 793934690) } }, { { -INT32_C( 2092483372), INT32_C( 1432862885) }, { -INT32_C( 377247152), INT32_C( 786484941) }, { -INT32_C( 1715236220), INT32_C( 646377944) } }, { { -INT32_C( 1026949447), INT32_C( 1072366126) }, { -INT32_C( 119210158), -INT32_C( 2014196813) }, { -INT32_C( 907739289), -INT32_C( 1208404357) } }, { { -INT32_C( 1408616185), INT32_C( 1224897273) }, { -INT32_C( 382499556), INT32_C( 219616083) }, { -INT32_C( 1026116629), INT32_C( 1005281190) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_vsub_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; } static int test_simde_vsub_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[1]; int64_t b[1]; int64_t r[1]; } test_vec[] = { { { -INT64_C( 657685634693326830) }, { -INT64_C( 8547862028071945079) }, { INT64_C( 7890176393378618249) } }, { { INT64_C( 410282319565482483) }, { -INT64_C( 8894323999304500444) }, { -INT64_C( 9142137754839568689) } }, { { -INT64_C( 4433867253433316581) }, { INT64_C( 561055581874761594) }, { -INT64_C( 4994922835308078175) } }, { { -INT64_C( 2464801910170707154) }, { INT64_C( 2554893422116537470) }, { -INT64_C( 5019695332287244624) } }, { { -INT64_C( 8461106941343280231) }, { -INT64_C( 1340220289429308853) }, { -INT64_C( 7120886651913971378) } }, { { -INT64_C( 4952413417304229456) }, { -INT64_C( 1464478769234607229) }, { -INT64_C( 3487934648069622227) } }, { { -INT64_C( 5112057892408831373) }, { INT64_C( 2123587332767078860) }, { -INT64_C( 7235645225175910233) } }, { { INT64_C( 3202283264815803256) }, { -INT64_C( 5869487482085106866) }, { INT64_C( 9071770746900910122) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_int64x1_t r = simde_vsub_s64(a, b); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; } static int test_simde_vsub_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(226), UINT8_C(152), UINT8_C(175), UINT8_C( 29), UINT8_C(233), UINT8_C( 81), UINT8_C( 65), UINT8_C( 7) }, { UINT8_C(166), UINT8_C(215), UINT8_C(155), UINT8_C( 54), UINT8_C(226), UINT8_C(113), UINT8_C( 20), UINT8_C(177) }, { UINT8_C( 60), UINT8_C(193), UINT8_C( 20), UINT8_C(231), UINT8_C( 7), UINT8_C(224), UINT8_C( 45), UINT8_C( 86) } }, { { UINT8_C( 32), UINT8_C(227), UINT8_C( 13), UINT8_C(184), UINT8_C(211), UINT8_C(108), UINT8_C(142), UINT8_C( 85) }, { UINT8_C( 75), UINT8_C(149), UINT8_C( 97), UINT8_C( 87), UINT8_C(227), UINT8_C(174), UINT8_C( 4), UINT8_C(197) }, { UINT8_C(213), UINT8_C( 78), UINT8_C(172), UINT8_C( 97), UINT8_C(240), UINT8_C(190), UINT8_C(138), UINT8_C(144) } }, { { UINT8_C( 70), UINT8_C(180), UINT8_C(227), UINT8_C( 48), UINT8_C( 5), UINT8_C( 36), UINT8_C( 55), UINT8_C(171) }, { UINT8_C(251), UINT8_C(210), UINT8_C(226), UINT8_C(222), UINT8_C( 67), UINT8_C(246), UINT8_C(143), UINT8_C(100) }, { UINT8_C( 75), UINT8_C(226), UINT8_C( 1), UINT8_C( 82), UINT8_C(194), UINT8_C( 46), UINT8_C(168), UINT8_C( 71) } }, { { UINT8_C(218), UINT8_C(156), UINT8_C( 28), UINT8_C(173), UINT8_C( 9), UINT8_C(171), UINT8_C( 2), UINT8_C( 84) }, { UINT8_C( 64), UINT8_C( 99), UINT8_C(171), UINT8_C( 36), UINT8_C( 17), UINT8_C(175), UINT8_C(233), UINT8_C( 88) }, { UINT8_C(154), UINT8_C( 57), UINT8_C(113), UINT8_C(137), UINT8_C(248), UINT8_C(252), UINT8_C( 25), UINT8_C(252) } }, { { UINT8_C( 99), UINT8_C(204), UINT8_C(136), UINT8_C(105), UINT8_C(240), UINT8_C(191), UINT8_C( 20), UINT8_C(236) }, { UINT8_C(145), UINT8_C(246), UINT8_C(202), UINT8_C(212), UINT8_C(237), UINT8_C( 89), UINT8_C( 56), UINT8_C(199) }, { UINT8_C(210), UINT8_C(214), UINT8_C(190), UINT8_C(149), UINT8_C( 3), UINT8_C(102), UINT8_C(220), UINT8_C( 37) } }, { { UINT8_C(246), UINT8_C( 85), UINT8_C(116), UINT8_C(255), UINT8_C( 0), UINT8_C(118), UINT8_C( 83), UINT8_C( 64) }, { UINT8_C(218), UINT8_C(254), UINT8_C(100), UINT8_C(235), UINT8_C(173), UINT8_C( 78), UINT8_C( 67), UINT8_C( 17) }, { UINT8_C( 28), UINT8_C( 87), UINT8_C( 16), UINT8_C( 20), UINT8_C( 83), UINT8_C( 40), UINT8_C( 16), UINT8_C( 47) } }, { { UINT8_C( 26), UINT8_C(203), UINT8_C(122), UINT8_C( 11), UINT8_C(138), UINT8_C(142), UINT8_C(247), UINT8_C( 27) }, { UINT8_C(133), UINT8_C(193), UINT8_C(240), UINT8_C(114), UINT8_C( 26), UINT8_C( 40), UINT8_C( 57), UINT8_C( 16) }, { UINT8_C(149), UINT8_C( 10), UINT8_C(138), UINT8_C(153), UINT8_C(112), UINT8_C(102), UINT8_C(190), UINT8_C( 11) } }, { { UINT8_C(125), UINT8_C(173), UINT8_C( 15), UINT8_C(125), UINT8_C( 35), UINT8_C( 98), UINT8_C(190), UINT8_C(253) }, { UINT8_C( 96), UINT8_C( 34), UINT8_C(233), UINT8_C( 14), UINT8_C(112), UINT8_C( 44), UINT8_C( 31), UINT8_C(139) }, { UINT8_C( 29), UINT8_C(139), UINT8_C( 38), UINT8_C(111), UINT8_C(179), UINT8_C( 54), UINT8_C(159), UINT8_C(114) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vsub_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; } static int test_simde_vsub_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(39416), UINT16_C(33430), UINT16_C(36135), UINT16_C(44190) }, { UINT16_C(36430), UINT16_C(26654), UINT16_C(22454), UINT16_C(13433) }, { UINT16_C( 2986), UINT16_C( 6776), UINT16_C(13681), UINT16_C(30757) } }, { { UINT16_C(34820), UINT16_C(10417), UINT16_C(28651), UINT16_C(19237) }, { UINT16_C( 3730), UINT16_C( 601), UINT16_C(30779), UINT16_C(13197) }, { UINT16_C(31090), UINT16_C( 9816), UINT16_C(63408), UINT16_C( 6040) } }, { { UINT16_C( 8977), UINT16_C(14773), UINT16_C(21424), UINT16_C(65253) }, { UINT16_C( 1249), UINT16_C(39015), UINT16_C(57435), UINT16_C(24780) }, { UINT16_C( 7728), UINT16_C(41294), UINT16_C(29525), UINT16_C(40473) } }, { { UINT16_C(32104), UINT16_C(21384), UINT16_C(44525), UINT16_C(32671) }, { UINT16_C(63676), UINT16_C(63361), UINT16_C( 3953), UINT16_C(33322) }, { UINT16_C(33964), UINT16_C(23559), UINT16_C(40572), UINT16_C(64885) } }, { { UINT16_C(57138), UINT16_C(58299), UINT16_C(41267), UINT16_C( 5345) }, { UINT16_C(18597), UINT16_C( 172), UINT16_C(30760), UINT16_C(37216) }, { UINT16_C(38541), UINT16_C(58127), UINT16_C(10507), UINT16_C(33665) } }, { { UINT16_C(59638), UINT16_C(58340), UINT16_C(33686), UINT16_C(21090) }, { UINT16_C(58236), UINT16_C(60745), UINT16_C(29682), UINT16_C( 9583) }, { UINT16_C( 1402), UINT16_C(63131), UINT16_C( 4004), UINT16_C(11507) } }, { { UINT16_C(11090), UINT16_C(34056), UINT16_C(59852), UINT16_C(29082) }, { UINT16_C(17970), UINT16_C(23153), UINT16_C(53951), UINT16_C(46571) }, { UINT16_C(58656), UINT16_C(10903), UINT16_C( 5901), UINT16_C(48047) } }, { { UINT16_C(53434), UINT16_C(20632), UINT16_C(64083), UINT16_C(53154) }, { UINT16_C(60381), UINT16_C(53436), UINT16_C(11358), UINT16_C(45557) }, { UINT16_C(58589), UINT16_C(32732), UINT16_C(52725), UINT16_C( 7597) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vsub_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; } static int test_simde_vsub_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C( 590806359), UINT32_C( 412405990) }, { UINT32_C(3597862167), UINT32_C(2458607319) }, { UINT32_C(1287911488), UINT32_C(2248765967) } }, { { UINT32_C(2195858222), UINT32_C(4199646493) }, { UINT32_C(3486125680), UINT32_C(2441133882) }, { UINT32_C(3004699838), UINT32_C(1758512611) } }, { { UINT32_C(2746529468), UINT32_C(2663073927) }, { UINT32_C( 628371021), UINT32_C(3149397901) }, { UINT32_C(2118158447), UINT32_C(3808643322) } }, { { UINT32_C(1061001506), UINT32_C(2402914078) }, { UINT32_C(3613263005), UINT32_C(2154356419) }, { UINT32_C(1742705797), UINT32_C( 248557659) } }, { { UINT32_C( 455285908), UINT32_C(2981748324) }, { UINT32_C(2597727501), UINT32_C(1314229548) }, { UINT32_C(2152525703), UINT32_C(1667518776) } }, { { UINT32_C(1166906151), UINT32_C(3218392866) }, { UINT32_C(2392208075), UINT32_C(2769223184) }, { UINT32_C(3069665372), UINT32_C( 449169682) } }, { { UINT32_C(2126524698), UINT32_C( 489650704) }, { UINT32_C(3568764583), UINT32_C(3122793619) }, { UINT32_C(2852727411), UINT32_C(1661824381) } }, { { UINT32_C(3238047903), UINT32_C(1115739255) }, { UINT32_C( 399513095), UINT32_C( 784129812) }, { UINT32_C(2838534808), UINT32_C( 331609443) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vsub_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; } static int test_simde_vsub_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[1]; uint64_t b[1]; uint64_t r[1]; } test_vec[] = { { { UINT64_C(11402512785535171856) }, { UINT64_C(11542889678448555234) }, { UINT64_C(18306367180796168238) } }, { { UINT64_C( 864376688470011973) }, { UINT64_C(13708077300151734009) }, { UINT64_C( 5603043462027829580) } }, { { UINT64_C(12029709184206301276) }, { UINT64_C( 4517758008823669776) }, { UINT64_C( 7511951175382631500) } }, { { UINT64_C(17261445209413784700) }, { UINT64_C(16947671631759519687) }, { UINT64_C( 313773577654265013) } }, { { UINT64_C( 4434972706747584801) }, { UINT64_C( 8128155052344846485) }, { UINT64_C(14753561728112289932) } }, { { UINT64_C( 9798563678062233365) }, { UINT64_C( 5955216873392699436) }, { UINT64_C( 3843346804669533929) } }, { { UINT64_C(12225519354979607615) }, { UINT64_C( 1067922630293125229) }, { UINT64_C(11157596724686482386) } }, { { UINT64_C( 7787591547626374092) }, { UINT64_C( 8975898620757217037) }, { UINT64_C(17258437000578708671) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_t b = simde_vld1_u64(test_vec[i].b); simde_uint64x1_t r = simde_vsub_u64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; } static int test_simde_vsubq_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; simde_float32 b[4]; simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 313.36), SIMDE_FLOAT32_C( 130.95), SIMDE_FLOAT32_C( 558.01), SIMDE_FLOAT32_C( -224.30) }, { SIMDE_FLOAT32_C( -431.17), SIMDE_FLOAT32_C( -838.29), SIMDE_FLOAT32_C( 973.18), SIMDE_FLOAT32_C( -249.71) }, { SIMDE_FLOAT32_C( 744.52), SIMDE_FLOAT32_C( 969.24), SIMDE_FLOAT32_C( -415.17), SIMDE_FLOAT32_C( 25.41) } }, { { SIMDE_FLOAT32_C( -619.93), SIMDE_FLOAT32_C( -195.67), SIMDE_FLOAT32_C( 784.76), SIMDE_FLOAT32_C( 593.47) }, { SIMDE_FLOAT32_C( -109.46), SIMDE_FLOAT32_C( -51.93), SIMDE_FLOAT32_C( -685.95), SIMDE_FLOAT32_C( 54.63) }, { SIMDE_FLOAT32_C( -510.46), SIMDE_FLOAT32_C( -143.74), SIMDE_FLOAT32_C( 1470.70), SIMDE_FLOAT32_C( 538.84) } }, { { SIMDE_FLOAT32_C( 238.71), SIMDE_FLOAT32_C( -726.25), SIMDE_FLOAT32_C( -693.46), SIMDE_FLOAT32_C( 265.88) }, { SIMDE_FLOAT32_C( 740.54), SIMDE_FLOAT32_C( 826.68), SIMDE_FLOAT32_C( -296.56), SIMDE_FLOAT32_C( -34.22) }, { SIMDE_FLOAT32_C( -501.83), SIMDE_FLOAT32_C( -1552.93), SIMDE_FLOAT32_C( -396.90), SIMDE_FLOAT32_C( 300.10) } }, { { SIMDE_FLOAT32_C( 788.21), SIMDE_FLOAT32_C( 906.36), SIMDE_FLOAT32_C( -290.22), SIMDE_FLOAT32_C( 38.77) }, { SIMDE_FLOAT32_C( 692.11), SIMDE_FLOAT32_C( 669.99), SIMDE_FLOAT32_C( 83.43), SIMDE_FLOAT32_C( 5.46) }, { SIMDE_FLOAT32_C( 96.10), SIMDE_FLOAT32_C( 236.38), SIMDE_FLOAT32_C( -373.65), SIMDE_FLOAT32_C( 33.31) } }, { { SIMDE_FLOAT32_C( -199.06), SIMDE_FLOAT32_C( -358.56), SIMDE_FLOAT32_C( 781.16), SIMDE_FLOAT32_C( 369.77) }, { SIMDE_FLOAT32_C( -196.85), SIMDE_FLOAT32_C( 754.34), SIMDE_FLOAT32_C( -879.94), SIMDE_FLOAT32_C( 183.22) }, { SIMDE_FLOAT32_C( -2.21), SIMDE_FLOAT32_C( -1112.90), SIMDE_FLOAT32_C( 1661.10), SIMDE_FLOAT32_C( 186.55) } }, { { SIMDE_FLOAT32_C( -441.33), SIMDE_FLOAT32_C( 904.82), SIMDE_FLOAT32_C( -223.31), SIMDE_FLOAT32_C( 449.20) }, { SIMDE_FLOAT32_C( -147.11), SIMDE_FLOAT32_C( 90.74), SIMDE_FLOAT32_C( -496.16), SIMDE_FLOAT32_C( -908.40) }, { SIMDE_FLOAT32_C( -294.22), SIMDE_FLOAT32_C( 814.08), SIMDE_FLOAT32_C( 272.85), SIMDE_FLOAT32_C( 1357.61) } }, { { SIMDE_FLOAT32_C( 364.49), SIMDE_FLOAT32_C( -189.62), SIMDE_FLOAT32_C( 357.48), SIMDE_FLOAT32_C( 105.03) }, { SIMDE_FLOAT32_C( -362.94), SIMDE_FLOAT32_C( -939.08), SIMDE_FLOAT32_C( -929.19), SIMDE_FLOAT32_C( -574.73) }, { SIMDE_FLOAT32_C( 727.43), SIMDE_FLOAT32_C( 749.46), SIMDE_FLOAT32_C( 1286.67), SIMDE_FLOAT32_C( 679.76) } }, { { SIMDE_FLOAT32_C( 967.28), SIMDE_FLOAT32_C( -219.41), SIMDE_FLOAT32_C( 464.05), SIMDE_FLOAT32_C( 659.39) }, { SIMDE_FLOAT32_C( -549.43), SIMDE_FLOAT32_C( -452.53), SIMDE_FLOAT32_C( -335.15), SIMDE_FLOAT32_C( 251.51) }, { SIMDE_FLOAT32_C( 1516.71), SIMDE_FLOAT32_C( 233.11), SIMDE_FLOAT32_C( 799.20), SIMDE_FLOAT32_C( 407.88) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r = simde_vsubq_f32(a, b); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vsubq_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[2]; simde_float64 b[2]; simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 188.91), SIMDE_FLOAT64_C( -553.99) }, { SIMDE_FLOAT64_C( -378.72), SIMDE_FLOAT64_C( 992.06) }, { SIMDE_FLOAT64_C( 567.63), SIMDE_FLOAT64_C( -1546.05) } }, { { SIMDE_FLOAT64_C( -799.65), SIMDE_FLOAT64_C( -258.66) }, { SIMDE_FLOAT64_C( 175.28), SIMDE_FLOAT64_C( -240.98) }, { SIMDE_FLOAT64_C( -974.93), SIMDE_FLOAT64_C( -17.68) } }, { { SIMDE_FLOAT64_C( -353.84), SIMDE_FLOAT64_C( 951.96) }, { SIMDE_FLOAT64_C( -791.78), SIMDE_FLOAT64_C( 499.05) }, { SIMDE_FLOAT64_C( 437.94), SIMDE_FLOAT64_C( 452.91) } }, { { SIMDE_FLOAT64_C( 42.70), SIMDE_FLOAT64_C( -287.94) }, { SIMDE_FLOAT64_C( 590.65), SIMDE_FLOAT64_C( -592.80) }, { SIMDE_FLOAT64_C( -547.94), SIMDE_FLOAT64_C( 304.86) } }, { { SIMDE_FLOAT64_C( 522.44), SIMDE_FLOAT64_C( -51.88) }, { SIMDE_FLOAT64_C( 512.22), SIMDE_FLOAT64_C( -840.50) }, { SIMDE_FLOAT64_C( 10.21), SIMDE_FLOAT64_C( 788.62) } }, { { SIMDE_FLOAT64_C( 9.04), SIMDE_FLOAT64_C( 583.04) }, { SIMDE_FLOAT64_C( -415.23), SIMDE_FLOAT64_C( -23.68) }, { SIMDE_FLOAT64_C( 424.27), SIMDE_FLOAT64_C( 606.72) } }, { { SIMDE_FLOAT64_C( -636.38), SIMDE_FLOAT64_C( -951.18) }, { SIMDE_FLOAT64_C( -364.30), SIMDE_FLOAT64_C( -185.81) }, { SIMDE_FLOAT64_C( -272.08), SIMDE_FLOAT64_C( -765.37) } }, { { SIMDE_FLOAT64_C( -403.71), SIMDE_FLOAT64_C( 300.55) }, { SIMDE_FLOAT64_C( -934.30), SIMDE_FLOAT64_C( 785.21) }, { SIMDE_FLOAT64_C( 530.59), SIMDE_FLOAT64_C( -484.65) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_float64x2_t r = simde_vsubq_f64(a, b); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; } static int test_simde_vsubq_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int8_t b[16]; int8_t r[16]; } test_vec[] = { { { INT8_C( 49), INT8_C( 77), INT8_C( 3), -INT8_C( 119), INT8_C( 60), INT8_C( 2), -INT8_C( 57), -INT8_C( 36), INT8_C( 62), -INT8_C( 100), INT8_C( 15), INT8_C( 100), INT8_C( 13), INT8_C( 79), -INT8_C( 52), INT8_C( 90) }, { -INT8_C( 83), INT8_C( 119), -INT8_C( 39), INT8_C( 82), INT8_C( 122), INT8_C( 105), -INT8_C( 39), INT8_C( 96), INT8_C( 58), INT8_C( 116), -INT8_C( 80), -INT8_C( 127), -INT8_C( 85), INT8_C( 24), -INT8_C( 118), -INT8_C( 35) }, { -INT8_C( 124), -INT8_C( 42), INT8_C( 42), INT8_C( 55), -INT8_C( 62), -INT8_C( 103), -INT8_C( 18), INT8_C( 124), INT8_C( 4), INT8_C( 40), INT8_C( 95), -INT8_C( 29), INT8_C( 98), INT8_C( 55), INT8_C( 66), INT8_C( 125) } }, { { INT8_C( 101), -INT8_C( 115), INT8_C( 102), -INT8_C( 94), -INT8_C( 113), INT8_C( 46), INT8_C( 126), -INT8_C( 50), -INT8_C( 54), -INT8_C( 115), INT8_C( 50), -INT8_C( 40), -INT8_C( 35), -INT8_C( 2), INT8_C( 50), -INT8_C( 118) }, { INT8_C( 117), INT8_C( 11), -INT8_C( 35), -INT8_C( 17), INT8_C( 116), -INT8_C( 74), INT8_C( 79), -INT8_C( 82), INT8_C( 42), -INT8_C( 1), INT8_C( 47), -INT8_C( 42), INT8_C( 23), -INT8_C( 70), -INT8_C( 77), INT8_C( 125) }, { -INT8_C( 16), -INT8_C( 126), -INT8_C( 119), -INT8_C( 77), INT8_C( 27), INT8_C( 120), INT8_C( 47), INT8_C( 32), -INT8_C( 96), -INT8_C( 114), INT8_C( 3), INT8_C( 2), -INT8_C( 58), INT8_C( 68), INT8_MAX, INT8_C( 13) } }, { { INT8_C( 71), INT8_C( 25), INT8_C( 31), -INT8_C( 41), INT8_C( 71), -INT8_C( 99), -INT8_C( 91), INT8_C( 18), INT8_C( 42), -INT8_C( 41), -INT8_C( 22), INT8_C( 7), -INT8_C( 43), INT8_C( 28), -INT8_C( 110), INT8_C( 74) }, { INT8_C( 39), INT8_C( 111), INT8_C( 57), -INT8_C( 101), INT8_C( 37), -INT8_C( 120), INT8_C( 73), INT8_C( 80), -INT8_C( 120), INT8_C( 121), INT8_C( 38), -INT8_C( 97), INT8_C( 51), -INT8_C( 39), INT8_C( 28), INT8_C( 122) }, { INT8_C( 32), -INT8_C( 86), -INT8_C( 26), INT8_C( 60), INT8_C( 34), INT8_C( 21), INT8_C( 92), -INT8_C( 62), -INT8_C( 94), INT8_C( 94), -INT8_C( 60), INT8_C( 104), -INT8_C( 94), INT8_C( 67), INT8_C( 118), -INT8_C( 48) } }, { { -INT8_C( 14), INT8_C( 59), INT8_C( 81), INT8_C( 58), -INT8_C( 40), -INT8_C( 10), INT8_C( 76), INT8_C( 3), -INT8_C( 51), INT8_C( 54), INT8_C( 10), -INT8_C( 94), INT8_C( 82), -INT8_C( 100), -INT8_C( 20), INT8_C( 121) }, { INT8_C( 11), INT8_C( 37), INT8_C( 20), INT8_C( 49), -INT8_C( 82), INT8_C( 93), -INT8_C( 127), INT8_C( 54), -INT8_C( 42), -INT8_C( 89), -INT8_C( 43), INT8_C( 9), INT8_MIN, -INT8_C( 14), -INT8_C( 124), INT8_C( 114) }, { -INT8_C( 25), INT8_C( 22), INT8_C( 61), INT8_C( 9), INT8_C( 42), -INT8_C( 103), -INT8_C( 53), -INT8_C( 51), -INT8_C( 9), -INT8_C( 113), INT8_C( 53), -INT8_C( 103), -INT8_C( 46), -INT8_C( 86), INT8_C( 104), INT8_C( 7) } }, { { INT8_C( 45), -INT8_C( 43), -INT8_C( 84), INT8_C( 6), -INT8_C( 52), -INT8_C( 8), INT8_C( 9), -INT8_C( 103), INT8_C( 46), INT8_C( 19), INT8_C( 60), INT8_MIN, -INT8_C( 80), INT8_C( 40), -INT8_C( 7), -INT8_C( 69) }, { INT8_C( 78), INT8_C( 13), -INT8_C( 20), -INT8_C( 4), INT8_C( 107), INT8_C( 109), INT8_C( 50), INT8_C( 65), INT8_C( 20), INT8_C( 7), INT8_C( 75), -INT8_C( 108), -INT8_C( 7), -INT8_C( 49), INT8_C( 7), INT8_C( 39) }, { -INT8_C( 33), -INT8_C( 56), -INT8_C( 64), INT8_C( 10), INT8_C( 97), -INT8_C( 117), -INT8_C( 41), INT8_C( 88), INT8_C( 26), INT8_C( 12), -INT8_C( 15), -INT8_C( 20), -INT8_C( 73), INT8_C( 89), -INT8_C( 14), -INT8_C( 108) } }, { { -INT8_C( 92), -INT8_C( 77), INT8_C( 45), INT8_C( 112), -INT8_C( 84), INT8_C( 54), INT8_C( 10), -INT8_C( 38), INT8_C( 73), INT8_C( 70), INT8_C( 91), -INT8_C( 7), INT8_C( 110), INT8_C( 84), -INT8_C( 75), -INT8_C( 68) }, { INT8_C( 98), -INT8_C( 95), -INT8_C( 72), -INT8_C( 51), INT8_C( 15), -INT8_C( 22), INT8_C( 14), INT8_C( 35), -INT8_C( 14), INT8_C( 89), -INT8_C( 72), -INT8_C( 21), INT8_C( 40), -INT8_C( 65), INT8_C( 18), -INT8_C( 51) }, { INT8_C( 66), INT8_C( 18), INT8_C( 117), -INT8_C( 93), -INT8_C( 99), INT8_C( 76), -INT8_C( 4), -INT8_C( 73), INT8_C( 87), -INT8_C( 19), -INT8_C( 93), INT8_C( 14), INT8_C( 70), -INT8_C( 107), -INT8_C( 93), -INT8_C( 17) } }, { { INT8_C( 114), INT8_C( 63), INT8_C( 61), INT8_C( 30), INT8_C( 117), INT8_C( 71), -INT8_C( 7), -INT8_C( 65), -INT8_C( 115), INT8_C( 84), -INT8_C( 72), -INT8_C( 4), -INT8_C( 88), INT8_C( 109), -INT8_C( 72), INT8_C( 10) }, { INT8_C( 15), INT8_C( 113), -INT8_C( 41), INT8_C( 30), INT8_C( 91), -INT8_C( 26), INT8_C( 65), INT8_C( 77), INT8_C( 63), -INT8_C( 7), INT8_C( 57), INT8_C( 104), -INT8_C( 72), INT8_C( 75), INT8_C( 53), INT8_C( 43) }, { INT8_C( 99), -INT8_C( 50), INT8_C( 102), INT8_C( 0), INT8_C( 26), INT8_C( 97), -INT8_C( 72), INT8_C( 114), INT8_C( 78), INT8_C( 91), INT8_MAX, -INT8_C( 108), -INT8_C( 16), INT8_C( 34), -INT8_C( 125), -INT8_C( 33) } }, { { -INT8_C( 117), INT8_C( 114), INT8_C( 73), INT8_C( 0), -INT8_C( 70), INT8_C( 66), -INT8_C( 65), INT8_C( 71), -INT8_C( 106), INT8_C( 120), INT8_C( 67), INT8_C( 63), -INT8_C( 27), -INT8_C( 4), INT8_C( 73), -INT8_C( 12) }, { INT8_C( 109), INT8_C( 33), INT8_C( 18), -INT8_C( 56), INT8_C( 7), INT8_C( 84), INT8_C( 22), INT8_C( 70), INT8_C( 77), INT8_C( 79), -INT8_C( 82), INT8_C( 6), -INT8_C( 102), -INT8_C( 29), INT8_C( 49), INT8_C( 37) }, { INT8_C( 30), INT8_C( 81), INT8_C( 55), INT8_C( 56), -INT8_C( 77), -INT8_C( 18), -INT8_C( 87), INT8_C( 1), INT8_C( 73), INT8_C( 41), -INT8_C( 107), INT8_C( 57), INT8_C( 75), INT8_C( 25), INT8_C( 24), -INT8_C( 49) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r = simde_vsubq_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; } static int test_simde_vsubq_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { INT16_C( 31318), INT16_C( 4134), -INT16_C( 6723), INT16_C( 21335), -INT16_C( 25763), INT16_C( 17298), -INT16_C( 9065), INT16_C( 1079) }, { INT16_C( 19197), INT16_C( 1228), -INT16_C( 7522), -INT16_C( 5302), -INT16_C( 1743), -INT16_C( 13071), INT16_C( 8924), INT16_C( 13041) }, { INT16_C( 12121), INT16_C( 2906), INT16_C( 799), INT16_C( 26637), -INT16_C( 24020), INT16_C( 30369), -INT16_C( 17989), -INT16_C( 11962) } }, { { INT16_C( 6045), INT16_C( 23106), -INT16_C( 25859), INT16_C( 23213), INT16_C( 16437), -INT16_C( 13155), -INT16_C( 10980), INT16_C( 6608) }, { -INT16_C( 25569), -INT16_C( 17123), INT16_C( 26495), -INT16_C( 20312), -INT16_C( 26016), INT16_C( 15740), INT16_C( 28348), INT16_C( 22895) }, { INT16_C( 31614), -INT16_C( 25307), INT16_C( 13182), -INT16_C( 22011), -INT16_C( 23083), -INT16_C( 28895), INT16_C( 26208), -INT16_C( 16287) } }, { { -INT16_C( 19835), -INT16_C( 32077), INT16_C( 24908), -INT16_C( 32291), INT16_C( 31393), -INT16_C( 17075), INT16_C( 7503), INT16_C( 28374) }, { -INT16_C( 3143), INT16_C( 14379), -INT16_C( 11174), -INT16_C( 17431), INT16_C( 25966), INT16_C( 11000), INT16_C( 26579), INT16_C( 22916) }, { -INT16_C( 16692), INT16_C( 19080), -INT16_C( 29454), -INT16_C( 14860), INT16_C( 5427), -INT16_C( 28075), -INT16_C( 19076), INT16_C( 5458) } }, { { INT16_C( 14105), INT16_C( 26075), -INT16_C( 18280), INT16_C( 14822), INT16_C( 13107), -INT16_C( 32010), -INT16_C( 13232), INT16_C( 2801) }, { INT16_C( 7359), INT16_C( 6722), INT16_C( 11248), INT16_C( 24277), -INT16_C( 12911), INT16_C( 25737), INT16_C( 3380), INT16_C( 20157) }, { INT16_C( 6746), INT16_C( 19353), -INT16_C( 29528), -INT16_C( 9455), INT16_C( 26018), INT16_C( 7789), -INT16_C( 16612), -INT16_C( 17356) } }, { { -INT16_C( 26300), -INT16_C( 8781), -INT16_C( 26031), -INT16_C( 31722), INT16_C( 3533), INT16_C( 7687), -INT16_C( 1831), -INT16_C( 26328) }, { INT16_C( 27156), INT16_C( 1459), -INT16_C( 30570), INT16_C( 10083), -INT16_C( 5035), -INT16_C( 30325), INT16_C( 18937), INT16_C( 16087) }, { INT16_C( 12080), -INT16_C( 10240), INT16_C( 4539), INT16_C( 23731), INT16_C( 8568), -INT16_C( 27524), -INT16_C( 20768), INT16_C( 23121) } }, { { -INT16_C( 29726), INT16_C( 13083), INT16_C( 12581), -INT16_C( 3400), -INT16_C( 16578), INT16_C( 6160), INT16_C( 14519), -INT16_C( 13391) }, { INT16_C( 25763), INT16_C( 14800), INT16_C( 13548), INT16_C( 16736), -INT16_C( 5344), INT16_C( 6858), -INT16_C( 24012), INT16_C( 5720) }, { INT16_C( 10047), -INT16_C( 1717), -INT16_C( 967), -INT16_C( 20136), -INT16_C( 11234), -INT16_C( 698), -INT16_C( 27005), -INT16_C( 19111) } }, { { INT16_C( 29485), INT16_C( 21066), INT16_C( 676), -INT16_C( 7356), INT16_C( 21953), INT16_C( 30971), -INT16_C( 21363), INT16_C( 12355) }, { INT16_C( 5136), -INT16_C( 919), -INT16_C( 14008), INT16_C( 26685), INT16_C( 1973), -INT16_C( 5758), -INT16_C( 9559), -INT16_C( 10752) }, { INT16_C( 24349), INT16_C( 21985), INT16_C( 14684), INT16_C( 31495), INT16_C( 19980), -INT16_C( 28807), -INT16_C( 11804), INT16_C( 23107) } }, { { INT16_C( 19021), -INT16_C( 3544), INT16_C( 27980), INT16_C( 3541), -INT16_C( 12094), INT16_C( 20357), -INT16_C( 14212), -INT16_C( 29568) }, { -INT16_C( 5668), INT16_C( 9352), -INT16_C( 14925), INT16_C( 26765), INT16_C( 4044), INT16_C( 30289), INT16_C( 20970), INT16_C( 14156) }, { INT16_C( 24689), -INT16_C( 12896), -INT16_C( 22631), -INT16_C( 23224), -INT16_C( 16138), -INT16_C( 9932), INT16_C( 30354), INT16_C( 21812) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_vsubq_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; } static int test_simde_vsubq_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { -INT32_C( 416713317), -INT32_C( 1527447838), INT32_C( 1257470414), INT32_C( 517370690) }, { INT32_C( 272850525), -INT32_C( 260517853), -INT32_C( 916010529), -INT32_C( 1241402853) }, { -INT32_C( 689563842), -INT32_C( 1266929985), -INT32_C( 2121486353), INT32_C( 1758773543) } }, { { INT32_C( 161360423), -INT32_C( 139619799), INT32_C( 1312989452), INT32_C( 1902909460) }, { -INT32_C( 1702776969), INT32_C( 1602943359), -INT32_C( 567742269), -INT32_C( 896259677) }, { INT32_C( 1864137392), -INT32_C( 1742563158), INT32_C( 1880731721), -INT32_C( 1495798159) } }, { { INT32_C( 2111058516), -INT32_C( 780893755), INT32_C( 924825122), INT32_C( 1185450959) }, { -INT32_C( 1159714245), -INT32_C( 434541789), -INT32_C( 20692389), -INT32_C( 1077323413) }, { -INT32_C( 1024194535), -INT32_C( 346351966), INT32_C( 945517511), -INT32_C( 2032192924) } }, { { INT32_C( 1346149771), INT32_C( 1092727070), INT32_C( 913850471), INT32_C( 125575372) }, { INT32_C( 1858166090), INT32_C( 592763848), -INT32_C( 2011031267), -INT32_C( 45552782) }, { -INT32_C( 512016319), INT32_C( 499963222), -INT32_C( 1370085558), INT32_C( 171128154) } }, { { -INT32_C( 1504803704), -INT32_C( 1645777099), INT32_C( 2094227376), -INT32_C( 897363840) }, { INT32_C( 1966621869), INT32_C( 1016630559), INT32_C( 415611558), INT32_C( 756354469) }, { INT32_C( 823541723), INT32_C( 1632559638), INT32_C( 1678615818), -INT32_C( 1653718309) } }, { { -INT32_C( 942382191), -INT32_C( 2090550317), -INT32_C( 1677773029), INT32_C( 879067783) }, { -INT32_C( 425091386), -INT32_C( 786283989), -INT32_C( 1561729028), -INT32_C( 2033189132) }, { -INT32_C( 517290805), -INT32_C( 1304266328), -INT32_C( 116044001), -INT32_C( 1382710381) } }, { { INT32_C( 894280546), INT32_C( 2058924383), INT32_C( 1880471528), -INT32_C( 5998023) }, { INT32_C( 1139101208), -INT32_C( 1944844144), -INT32_C( 466682385), INT32_C( 1584070396) }, { -INT32_C( 244820662), -INT32_C( 291198769), -INT32_C( 1947813383), -INT32_C( 1590068419) } }, { { INT32_C( 9680801), INT32_C( 1350191976), INT32_C( 1002475266), INT32_C( 574252298) }, { INT32_C( 1130766515), INT32_C( 399473192), INT32_C( 1962671736), -INT32_C( 1647155716) }, { -INT32_C( 1121085714), INT32_C( 950718784), -INT32_C( 960196470), -INT32_C( 2073559282) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_vsubq_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; } static int test_simde_vsubq_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; int64_t b[2]; int64_t r[2]; } test_vec[] = { { { -INT64_C( 5560511018500725476), -INT64_C( 5921063652388137560) }, { INT64_C( 3280802739062323783), INT64_C( 312846142526292670) }, { -INT64_C( 8841313757563049259), -INT64_C( 6233909794914430230) } }, { { -INT64_C( 5315832993644153384), -INT64_C( 7594410036925290509) }, { INT64_C( 745781472090492023), INT64_C( 5042654366893668879) }, { -INT64_C( 6061614465734645407), INT64_C( 5809679669890592228) } }, { { -INT64_C( 2966456111776037871), -INT64_C( 1147749979637138130) }, { -INT64_C( 5530009519274583829), INT64_C( 3612351040494110063) }, { INT64_C( 2563553407498545958), -INT64_C( 4760101020131248193) } }, { { INT64_C( 2585953444973112874), -INT64_C( 950908203764242589) }, { -INT64_C( 3593191495057151542), -INT64_C( 7078589392559026832) }, { INT64_C( 6179144940030264416), INT64_C( 6127681188794784243) } }, { { -INT64_C( 7073873831480650565), -INT64_C( 1068119307084729292) }, { INT64_C( 3629482117552952880), -INT64_C( 8043279582318360241) }, { INT64_C( 7743388124675948171), INT64_C( 6975160275233630949) } }, { { -INT64_C( 3714601521476728165), INT64_C( 7322910394983902601) }, { -INT64_C( 3279092754793889653), INT64_C( 7260354318767047259) }, { -INT64_C( 435508766682838512), INT64_C( 62556076216855342) } }, { { -INT64_C( 6889190113625373953), INT64_C( 3113659290876930971) }, { -INT64_C( 2934387092794955279), -INT64_C( 3430112546453398728) }, { -INT64_C( 3954803020830418674), INT64_C( 6543771837330329699) } }, { { -INT64_C( 1619946426246651700), -INT64_C( 5331436475357998025) }, { -INT64_C( 6382884422338053864), INT64_C( 787019694814557260) }, { INT64_C( 4762937996091402164), -INT64_C( 6118456170172555285) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_int64x2_t r = simde_vsubq_s64(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; } static int test_simde_vsubq_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C( 46), UINT8_C(195), UINT8_C( 10), UINT8_C(219), UINT8_C(166), UINT8_C( 78), UINT8_C(182), UINT8_C(237), UINT8_C(213), UINT8_C(145), UINT8_C( 21), UINT8_C(119), UINT8_C(125), UINT8_C(251), UINT8_C( 49), UINT8_C(192) }, { UINT8_C( 82), UINT8_C(167), UINT8_C( 39), UINT8_C(113), UINT8_C( 53), UINT8_C( 0), UINT8_C(195), UINT8_C( 43), UINT8_C( 66), UINT8_C(194), UINT8_C( 13), UINT8_C(182), UINT8_C( 87), UINT8_C( 48), UINT8_C( 39), UINT8_C(134) }, { UINT8_C(220), UINT8_C( 28), UINT8_C(227), UINT8_C(106), UINT8_C(113), UINT8_C( 78), UINT8_C(243), UINT8_C(194), UINT8_C(147), UINT8_C(207), UINT8_C( 8), UINT8_C(193), UINT8_C( 38), UINT8_C(203), UINT8_C( 10), UINT8_C( 58) } }, { { UINT8_C(243), UINT8_C( 49), UINT8_C( 97), UINT8_C(154), UINT8_C(127), UINT8_C( 23), UINT8_C(135), UINT8_C( 84), UINT8_C(169), UINT8_C(156), UINT8_C(203), UINT8_C( 38), UINT8_C(152), UINT8_C(252), UINT8_C(231), UINT8_C(234) }, { UINT8_C(163), UINT8_C( 14), UINT8_C( 91), UINT8_C(217), UINT8_C( 15), UINT8_C( 30), UINT8_C( 4), UINT8_C( 81), UINT8_C(225), UINT8_C( 17), UINT8_C( 7), UINT8_C( 56), UINT8_C( 65), UINT8_C( 46), UINT8_C(190), UINT8_C( 53) }, { UINT8_C( 80), UINT8_C( 35), UINT8_C( 6), UINT8_C(193), UINT8_C(112), UINT8_C(249), UINT8_C(131), UINT8_C( 3), UINT8_C(200), UINT8_C(139), UINT8_C(196), UINT8_C(238), UINT8_C( 87), UINT8_C(206), UINT8_C( 41), UINT8_C(181) } }, { { UINT8_C( 95), UINT8_C( 32), UINT8_C(207), UINT8_C(223), UINT8_C( 55), UINT8_C( 86), UINT8_C( 51), UINT8_C(224), UINT8_C(243), UINT8_MAX, UINT8_C( 7), UINT8_C(139), UINT8_C(251), UINT8_C(238), UINT8_C(117), UINT8_C(159) }, { UINT8_C(252), UINT8_C(208), UINT8_C(120), UINT8_C( 11), UINT8_C(238), UINT8_C(124), UINT8_C( 93), UINT8_C(207), UINT8_C(141), UINT8_C(100), UINT8_C( 8), UINT8_C(206), UINT8_C(147), UINT8_C(198), UINT8_C( 3), UINT8_C(242) }, { UINT8_C( 99), UINT8_C( 80), UINT8_C( 87), UINT8_C(212), UINT8_C( 73), UINT8_C(218), UINT8_C(214), UINT8_C( 17), UINT8_C(102), UINT8_C(155), UINT8_MAX, UINT8_C(189), UINT8_C(104), UINT8_C( 40), UINT8_C(114), UINT8_C(173) } }, { { UINT8_C(230), UINT8_C(210), UINT8_C(209), UINT8_C( 30), UINT8_C( 41), UINT8_C( 5), UINT8_C(254), UINT8_C( 28), UINT8_C( 4), UINT8_C( 5), UINT8_C(167), UINT8_MAX, UINT8_C(243), UINT8_C( 28), UINT8_C(158), UINT8_C(240) }, { UINT8_C(236), UINT8_C( 22), UINT8_C(251), UINT8_C(218), UINT8_C(146), UINT8_C( 88), UINT8_C(170), UINT8_C( 31), UINT8_C(189), UINT8_C(178), UINT8_C(238), UINT8_C( 80), UINT8_C(120), UINT8_C(241), UINT8_C( 66), UINT8_C( 95) }, { UINT8_C(250), UINT8_C(188), UINT8_C(214), UINT8_C( 68), UINT8_C(151), UINT8_C(173), UINT8_C( 84), UINT8_C(253), UINT8_C( 71), UINT8_C( 83), UINT8_C(185), UINT8_C(175), UINT8_C(123), UINT8_C( 43), UINT8_C( 92), UINT8_C(145) } }, { { UINT8_C(196), UINT8_C( 20), UINT8_C(125), UINT8_C(237), UINT8_C( 25), UINT8_C(123), UINT8_C( 9), UINT8_C( 29), UINT8_C(129), UINT8_C(176), UINT8_C( 28), UINT8_C(116), UINT8_C(204), UINT8_C(187), UINT8_C(100), UINT8_C(184) }, { UINT8_C(209), UINT8_C( 96), UINT8_C(146), UINT8_C(100), UINT8_C(184), UINT8_C( 60), UINT8_C(131), UINT8_C(117), UINT8_C(238), UINT8_C(113), UINT8_C(197), UINT8_C(103), UINT8_C( 99), UINT8_C( 8), UINT8_C(198), UINT8_C( 39) }, { UINT8_C(243), UINT8_C(180), UINT8_C(235), UINT8_C(137), UINT8_C( 97), UINT8_C( 63), UINT8_C(134), UINT8_C(168), UINT8_C(147), UINT8_C( 63), UINT8_C( 87), UINT8_C( 13), UINT8_C(105), UINT8_C(179), UINT8_C(158), UINT8_C(145) } }, { { UINT8_C( 28), UINT8_C( 67), UINT8_C( 20), UINT8_C( 53), UINT8_C(190), UINT8_C( 29), UINT8_C( 82), UINT8_C( 63), UINT8_C(205), UINT8_C(110), UINT8_C(180), UINT8_C(153), UINT8_C( 41), UINT8_C( 24), UINT8_C( 81), UINT8_C(251) }, { UINT8_C(120), UINT8_C(227), UINT8_C( 95), UINT8_C( 49), UINT8_C( 32), UINT8_C(226), UINT8_C(166), UINT8_C( 14), UINT8_C( 84), UINT8_C(108), UINT8_C(117), UINT8_C(183), UINT8_C(116), UINT8_C( 59), UINT8_C(222), UINT8_C(144) }, { UINT8_C(164), UINT8_C( 96), UINT8_C(181), UINT8_C( 4), UINT8_C(158), UINT8_C( 59), UINT8_C(172), UINT8_C( 49), UINT8_C(121), UINT8_C( 2), UINT8_C( 63), UINT8_C(226), UINT8_C(181), UINT8_C(221), UINT8_C(115), UINT8_C(107) } }, { { UINT8_C(126), UINT8_C(242), UINT8_C(197), UINT8_C( 61), UINT8_C( 15), UINT8_C( 23), UINT8_C(124), UINT8_C(220), UINT8_C(133), UINT8_C( 48), UINT8_C(117), UINT8_C(175), UINT8_C( 73), UINT8_C(198), UINT8_C(170), UINT8_C(193) }, { UINT8_C(169), UINT8_C( 9), UINT8_C(242), UINT8_C(201), UINT8_C(235), UINT8_C(153), UINT8_C(216), UINT8_C( 63), UINT8_C( 5), UINT8_C( 77), UINT8_C(246), UINT8_C(121), UINT8_C(137), UINT8_C(212), UINT8_C( 9), UINT8_C( 7) }, { UINT8_C(213), UINT8_C(233), UINT8_C(211), UINT8_C(116), UINT8_C( 36), UINT8_C(126), UINT8_C(164), UINT8_C(157), UINT8_C(128), UINT8_C(227), UINT8_C(127), UINT8_C( 54), UINT8_C(192), UINT8_C(242), UINT8_C(161), UINT8_C(186) } }, { { UINT8_C(198), UINT8_C(206), UINT8_C( 68), UINT8_C(213), UINT8_C(229), UINT8_C(193), UINT8_C(177), UINT8_C(106), UINT8_C(241), UINT8_C( 38), UINT8_C( 25), UINT8_C( 58), UINT8_C(236), UINT8_C(195), UINT8_C(252), UINT8_C(150) }, { UINT8_C(204), UINT8_C(238), UINT8_C( 95), UINT8_C(184), UINT8_C(135), UINT8_C( 55), UINT8_C(247), UINT8_C(140), UINT8_C(133), UINT8_C(238), UINT8_C( 5), UINT8_C( 14), UINT8_C(194), UINT8_C( 14), UINT8_C( 21), UINT8_C(137) }, { UINT8_C(250), UINT8_C(224), UINT8_C(229), UINT8_C( 29), UINT8_C( 94), UINT8_C(138), UINT8_C(186), UINT8_C(222), UINT8_C(108), UINT8_C( 56), UINT8_C( 20), UINT8_C( 44), UINT8_C( 42), UINT8_C(181), UINT8_C(231), UINT8_C( 13) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vsubq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; } static int test_simde_vsubq_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(43750), UINT16_C(51981), UINT16_C(43836), UINT16_C(49690), UINT16_C(27185), UINT16_C(26458), UINT16_C(47312), UINT16_C(58715) }, { UINT16_C( 9265), UINT16_C(46000), UINT16_C(44438), UINT16_C( 9529), UINT16_C(38462), UINT16_C(62869), UINT16_C(57124), UINT16_C( 2719) }, { UINT16_C(34485), UINT16_C( 5981), UINT16_C(64934), UINT16_C(40161), UINT16_C(54259), UINT16_C(29125), UINT16_C(55724), UINT16_C(55996) } }, { { UINT16_C(44170), UINT16_C(50902), UINT16_C(61527), UINT16_C(34952), UINT16_C(57946), UINT16_C(11248), UINT16_C(19354), UINT16_C(52240) }, { UINT16_C(49519), UINT16_C( 1407), UINT16_C(47214), UINT16_C(44331), UINT16_C(49231), UINT16_C(29602), UINT16_C(16800), UINT16_C(10877) }, { UINT16_C(60187), UINT16_C(49495), UINT16_C(14313), UINT16_C(56157), UINT16_C( 8715), UINT16_C(47182), UINT16_C( 2554), UINT16_C(41363) } }, { { UINT16_C(21485), UINT16_C(17648), UINT16_C(30787), UINT16_C(40653), UINT16_C(48474), UINT16_C(62665), UINT16_C(55560), UINT16_C(30656) }, { UINT16_C(16282), UINT16_C( 2429), UINT16_C(43256), UINT16_C(18358), UINT16_C(22632), UINT16_C( 2234), UINT16_C(14233), UINT16_C(34354) }, { UINT16_C( 5203), UINT16_C(15219), UINT16_C(53067), UINT16_C(22295), UINT16_C(25842), UINT16_C(60431), UINT16_C(41327), UINT16_C(61838) } }, { { UINT16_C( 8843), UINT16_C(52938), UINT16_C(38810), UINT16_C(62572), UINT16_C(13652), UINT16_C(23785), UINT16_C(43279), UINT16_C(43476) }, { UINT16_C(20969), UINT16_C(57778), UINT16_C(26873), UINT16_C(24872), UINT16_C(58048), UINT16_C(22890), UINT16_C(39961), UINT16_C(42207) }, { UINT16_C(53410), UINT16_C(60696), UINT16_C(11937), UINT16_C(37700), UINT16_C(21140), UINT16_C( 895), UINT16_C( 3318), UINT16_C( 1269) } }, { { UINT16_C(43711), UINT16_C(22899), UINT16_C(57153), UINT16_C(38478), UINT16_C(14101), UINT16_C( 9458), UINT16_C(50912), UINT16_C(51661) }, { UINT16_C(32791), UINT16_C( 4266), UINT16_C(53992), UINT16_C(43378), UINT16_C(56500), UINT16_C(52738), UINT16_C(57976), UINT16_C(14194) }, { UINT16_C(10920), UINT16_C(18633), UINT16_C( 3161), UINT16_C(60636), UINT16_C(23137), UINT16_C(22256), UINT16_C(58472), UINT16_C(37467) } }, { { UINT16_C(58764), UINT16_C(52625), UINT16_C(57285), UINT16_C(55907), UINT16_C(22038), UINT16_C(63230), UINT16_C(51996), UINT16_C(13504) }, { UINT16_C(27211), UINT16_C(13380), UINT16_C(46653), UINT16_C(61917), UINT16_C(57234), UINT16_C( 3007), UINT16_C(12993), UINT16_C(19778) }, { UINT16_C(31553), UINT16_C(39245), UINT16_C(10632), UINT16_C(59526), UINT16_C(30340), UINT16_C(60223), UINT16_C(39003), UINT16_C(59262) } }, { { UINT16_C(54039), UINT16_C(56347), UINT16_C(32434), UINT16_C(51382), UINT16_C(46292), UINT16_C(61887), UINT16_C(32640), UINT16_C(52005) }, { UINT16_C(27113), UINT16_C( 9983), UINT16_C(56352), UINT16_C(45592), UINT16_C(55228), UINT16_C(32189), UINT16_C( 9), UINT16_C( 8651) }, { UINT16_C(26926), UINT16_C(46364), UINT16_C(41618), UINT16_C( 5790), UINT16_C(56600), UINT16_C(29698), UINT16_C(32631), UINT16_C(43354) } }, { { UINT16_C(59091), UINT16_C(34557), UINT16_C(46180), UINT16_C(14670), UINT16_C( 3432), UINT16_C(59434), UINT16_C(20364), UINT16_C(30388) }, { UINT16_C(46008), UINT16_C(55452), UINT16_C(46224), UINT16_C(19595), UINT16_C(18572), UINT16_C(38345), UINT16_C(37960), UINT16_C( 7350) }, { UINT16_C(13083), UINT16_C(44641), UINT16_C(65492), UINT16_C(60611), UINT16_C(50396), UINT16_C(21089), UINT16_C(47940), UINT16_C(23038) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vsubq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_vsubq_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(3751982202), UINT32_C(3491295336), UINT32_C(2327397118), UINT32_C(1224764817) }, { UINT32_C(2955058464), UINT32_C(3724324177), UINT32_C(1047774965), UINT32_C(3579455834) }, { UINT32_C( 796923738), UINT32_C(4061938455), UINT32_C(1279622153), UINT32_C(1940276279) } }, { { UINT32_C(1169489117), UINT32_C(3927362796), UINT32_C(2675298062), UINT32_C(1558738236) }, { UINT32_C(1678576146), UINT32_C(2906720695), UINT32_C( 720090319), UINT32_C(3154068958) }, { UINT32_C(3785880267), UINT32_C(1020642101), UINT32_C(1955207743), UINT32_C(2699636574) } }, { { UINT32_C( 755086145), UINT32_C(2367166335), UINT32_C( 573345254), UINT32_C( 360584194) }, { UINT32_C(3598289695), UINT32_C(1686354581), UINT32_C(1301180015), UINT32_C(4094201267) }, { UINT32_C(1451763746), UINT32_C( 680811754), UINT32_C(3567132535), UINT32_C( 561350223) } }, { { UINT32_C(3206678848), UINT32_C( 105658912), UINT32_C(3374872775), UINT32_C(2900273037) }, { UINT32_C(3347207986), UINT32_C(2167146002), UINT32_C( 684636788), UINT32_C(2283591240) }, { UINT32_C(4154438158), UINT32_C(2233480206), UINT32_C(2690235987), UINT32_C( 616681797) } }, { { UINT32_C( 4734688), UINT32_C(1057461368), UINT32_C(2584293133), UINT32_C( 155641814) }, { UINT32_C(1372637247), UINT32_C(1137900750), UINT32_C(4285243575), UINT32_C(1451722614) }, { UINT32_C(2927064737), UINT32_C(4214527914), UINT32_C(2594016854), UINT32_C(2998886496) } }, { { UINT32_C(1045942214), UINT32_C(1904107108), UINT32_C(1678477197), UINT32_C(2909622638) }, { UINT32_C(3908975897), UINT32_C(4046180410), UINT32_C(3891304048), UINT32_C(3812456221) }, { UINT32_C(1431933613), UINT32_C(2152893994), UINT32_C(2082140445), UINT32_C(3392133713) } }, { { UINT32_C(2871170119), UINT32_C(2149359858), UINT32_C(2514757415), UINT32_C(2437108088) }, { UINT32_C(3363389838), UINT32_C(2193204242), UINT32_C(1483319610), UINT32_C(1748739873) }, { UINT32_C(3802747577), UINT32_C(4251122912), UINT32_C(1031437805), UINT32_C( 688368215) } }, { { UINT32_C( 773020987), UINT32_C( 615395325), UINT32_C(3468333654), UINT32_C(1902116323) }, { UINT32_C(1346033982), UINT32_C(3100898173), UINT32_C(3188735133), UINT32_C( 522603491) }, { UINT32_C(3721954301), UINT32_C(1809464448), UINT32_C( 279598521), UINT32_C(1379512832) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vsubq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_vsubq_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; uint64_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C(13748358714363820457), UINT64_C(13898648870508594573) }, { UINT64_C(12464525672885328836), UINT64_C(18312639204639246880) }, { UINT64_C( 1283833041478491621), UINT64_C(14032753739578899309) } }, { { UINT64_C(17937679446536712393), UINT64_C( 3114038371788815349) }, { UINT64_C( 5557555380804733029), UINT64_C(16412652075366845041) }, { UINT64_C(12380124065731979364), UINT64_C( 5148130370131521924) } }, { { UINT64_C(14935910925362031332), UINT64_C( 6705716878113156990) }, { UINT64_C( 6070836784052403398), UINT64_C(16751385124290662523) }, { UINT64_C( 8865074141309627934), UINT64_C( 8401075827532046083) } }, { { UINT64_C( 1005286295086795850), UINT64_C( 8972016764501244205) }, { UINT64_C(14030593306424989054), UINT64_C(18099070462252798271) }, { UINT64_C( 5421437062371358412), UINT64_C( 9319690375957997550) } }, { { UINT64_C( 1150971945603261276), UINT64_C( 7745832836702530379) }, { UINT64_C(15714891553410182414), UINT64_C(11168727231119650387) }, { UINT64_C( 3882824465902630478), UINT64_C(15023849679292431608) } }, { { UINT64_C( 8907979534016847533), UINT64_C(11224091280831009899) }, { UINT64_C( 7376339916272210152), UINT64_C( 8308414289197825140) }, { UINT64_C( 1531639617744637381), UINT64_C( 2915676991633184759) } }, { { UINT64_C(13777815640503210888), UINT64_C(10709519970759848310) }, { UINT64_C( 784601148808531819), UINT64_C( 4623801230915620594) }, { UINT64_C(12993214491694679069), UINT64_C( 6085718739844227716) } }, { { UINT64_C( 5183694642829930205), UINT64_C( 5557700198912427999) }, { UINT64_C(11536725458092243066), UINT64_C(13832107935466575868) }, { UINT64_C(12093713258447238755), UINT64_C(10172336337155403747) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_uint64x2_t r = simde_vsubq_u64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vsub_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vsub_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vsub_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vsub_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vsub_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vsub_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vsub_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vsub_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vsub_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vsub_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vsubq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vsubq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vsubq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vsubq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vsubq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vsubq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vsubq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vsubq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vsubq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vsubq_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/subl.c000066400000000000000000000466311400333146700164420ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN subl #include "test-neon.h" #include "../../../simde/arm/neon/subl.h" static int test_simde_vsubl_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t b[8]; int16_t r[8]; } test_vec[] = { { { INT8_C( 73), INT8_C( 90), INT8_C( 68), INT8_C( 85), -INT8_C( 7), INT8_C( 13), INT8_C( 88), INT8_C( 3) }, { INT8_C( 50), INT8_C( 45), INT8_C( 6), -INT8_C( 8), -INT8_C( 12), INT8_C( 115), INT8_C( 13), -INT8_C( 98) }, { INT16_C( 23), INT16_C( 45), INT16_C( 62), INT16_C( 93), INT16_C( 5), -INT16_C( 102), INT16_C( 75), INT16_C( 101) } }, { { INT8_C( 105), -INT8_C( 61), INT8_C( 109), INT8_C( 79), -INT8_C( 55), INT8_C( 109), INT8_C( 100), INT8_C( 54) }, { -INT8_C( 24), INT8_C( 73), -INT8_C( 65), INT8_C( 38), INT8_C( 60), -INT8_C( 21), -INT8_C( 75), -INT8_C( 123) }, { INT16_C( 129), -INT16_C( 134), INT16_C( 174), INT16_C( 41), -INT16_C( 115), INT16_C( 130), INT16_C( 175), INT16_C( 177) } }, { { INT8_C( 69), -INT8_C( 6), -INT8_C( 38), INT8_C( 62), INT8_C( 7), INT8_C( 50), INT8_C( 65), INT8_C( 58) }, { INT8_C( 96), INT8_C( 71), INT8_C( 50), INT8_C( 84), -INT8_C( 70), INT8_C( 63), -INT8_C( 14), INT8_C( 36) }, { -INT16_C( 27), -INT16_C( 77), -INT16_C( 88), -INT16_C( 22), INT16_C( 77), -INT16_C( 13), INT16_C( 79), INT16_C( 22) } }, { { INT8_C( 2), INT8_C( 95), INT8_C( 115), -INT8_C( 53), -INT8_C( 51), -INT8_C( 41), INT8_C( 2), -INT8_C( 75) }, { INT8_C( 32), -INT8_C( 63), -INT8_C( 37), INT8_C( 92), -INT8_C( 84), -INT8_C( 111), -INT8_C( 31), -INT8_C( 15) }, { -INT16_C( 30), INT16_C( 158), INT16_C( 152), -INT16_C( 145), INT16_C( 33), INT16_C( 70), INT16_C( 33), -INT16_C( 60) } }, { { -INT8_C( 117), -INT8_C( 68), INT8_C( 47), -INT8_C( 110), -INT8_C( 18), INT8_C( 112), -INT8_C( 52), INT8_C( 78) }, { -INT8_C( 73), -INT8_C( 2), -INT8_C( 94), INT8_C( 113), INT8_C( 61), -INT8_C( 107), -INT8_C( 107), INT8_C( 63) }, { -INT16_C( 44), -INT16_C( 66), INT16_C( 141), -INT16_C( 223), -INT16_C( 79), INT16_C( 219), INT16_C( 55), INT16_C( 15) } }, { { -INT8_C( 12), INT8_C( 8), INT8_C( 11), -INT8_C( 63), -INT8_C( 33), INT8_C( 13), INT8_C( 118), -INT8_C( 1) }, { -INT8_C( 50), INT8_C( 82), INT8_C( 91), INT8_C( 122), -INT8_C( 29), INT8_C( 61), INT8_C( 107), INT8_C( 110) }, { INT16_C( 38), -INT16_C( 74), -INT16_C( 80), -INT16_C( 185), -INT16_C( 4), -INT16_C( 48), INT16_C( 11), -INT16_C( 111) } }, { { -INT8_C( 7), -INT8_C( 102), INT8_C( 0), -INT8_C( 25), INT8_C( 10), -INT8_C( 51), INT8_C( 54), -INT8_C( 63) }, { -INT8_C( 53), -INT8_C( 40), INT8_C( 50), INT8_C( 9), INT8_C( 109), -INT8_C( 56), INT8_C( 72), INT8_C( 98) }, { INT16_C( 46), -INT16_C( 62), -INT16_C( 50), -INT16_C( 34), -INT16_C( 99), INT16_C( 5), -INT16_C( 18), -INT16_C( 161) } }, { { -INT8_C( 48), INT8_C( 83), INT8_C( 35), -INT8_C( 80), INT8_C( 96), -INT8_C( 102), -INT8_C( 81), INT8_C( 46) }, { -INT8_C( 20), INT8_C( 11), -INT8_C( 88), -INT8_C( 49), INT8_C( 72), INT8_C( 19), INT8_C( 61), INT8_C( 65) }, { -INT16_C( 28), INT16_C( 72), INT16_C( 123), -INT16_C( 31), INT16_C( 24), -INT16_C( 121), -INT16_C( 142), -INT16_C( 19) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int16x8_t r = simde_vsubl_s8(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); simde_int16x8_t r = simde_vsubl_s8(a, b); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vsubl_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t b[4]; int32_t r[4]; } test_vec[] = { { { -INT16_C( 9069), INT16_C( 13208), INT16_C( 2333), INT16_C( 207) }, { INT16_C( 11583), -INT16_C( 25535), -INT16_C( 7178), -INT16_C( 12996) }, { -INT32_C( 20652), INT32_C( 38743), INT32_C( 9511), INT32_C( 13203) } }, { { INT16_C( 18526), -INT16_C( 21718), INT16_C( 19358), -INT16_C( 20790) }, { INT16_C( 23289), -INT16_C( 25899), -INT16_C( 6854), -INT16_C( 12968) }, { -INT32_C( 4763), INT32_C( 4181), INT32_C( 26212), -INT32_C( 7822) } }, { { -INT16_C( 3647), -INT16_C( 8703), -INT16_C( 12038), INT16_C( 14815) }, { INT16_C( 8445), -INT16_C( 2859), INT16_C( 4611), INT16_C( 25025) }, { -INT32_C( 12092), -INT32_C( 5844), -INT32_C( 16649), -INT32_C( 10210) } }, { { -INT16_C( 5286), -INT16_C( 2035), -INT16_C( 10442), INT16_C( 12198) }, { INT16_C( 31537), INT16_C( 27593), INT16_C( 8545), INT16_C( 8761) }, { -INT32_C( 36823), -INT32_C( 29628), -INT32_C( 18987), INT32_C( 3437) } }, { { INT16_C( 14866), INT16_C( 3329), -INT16_C( 8182), INT16_C( 2118) }, { INT16_C( 7168), INT16_C( 1276), -INT16_C( 17106), -INT16_C( 30619) }, { INT32_C( 7698), INT32_C( 2053), INT32_C( 8924), INT32_C( 32737) } }, { { INT16_C( 29352), -INT16_C( 8575), INT16_C( 10057), INT16_C( 31245) }, { -INT16_C( 10589), INT16_C( 1254), INT16_C( 8183), INT16_C( 2598) }, { INT32_C( 39941), -INT32_C( 9829), INT32_C( 1874), INT32_C( 28647) } }, { { INT16_C( 10073), INT16_C( 25367), INT16_C( 23815), INT16_C( 2155) }, { INT16_C( 26489), -INT16_C( 22772), INT16_C( 28964), -INT16_C( 13264) }, { -INT32_C( 16416), INT32_C( 48139), -INT32_C( 5149), INT32_C( 15419) } }, { { -INT16_C( 19996), INT16_C( 11690), -INT16_C( 18472), INT16_C( 31656) }, { -INT16_C( 29043), -INT16_C( 31361), -INT16_C( 22867), INT16_C( 1679) }, { INT32_C( 9047), INT32_C( 43051), INT32_C( 4395), INT32_C( 29977) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int32x4_t r = simde_vsubl_s16(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); simde_int32x4_t r = simde_vsubl_s16(a, b); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vsubl_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t b[2]; int64_t r[2]; } test_vec[] = { { { -INT32_C( 1313200066), INT32_C( 576591193) }, { INT32_C( 2041640284), INT32_C( 1738130097) }, { -INT64_C( 3354840350), -INT64_C( 1161538904) } }, { { INT32_C( 120563334), -INT32_C( 648305082) }, { -INT32_C( 104614879), -INT32_C( 998726778) }, { INT64_C( 225178213), INT64_C( 350421696) } }, { { INT32_C( 611660747), -INT32_C( 1538862008) }, { INT32_C( 2048849609), INT32_C( 1071757241) }, { -INT64_C( 1437188862), -INT64_C( 2610619249) } }, { { -INT32_C( 1538911906), -INT32_C( 662854985) }, { -INT32_C( 590266282), -INT32_C( 1348449564) }, { -INT64_C( 948645624), INT64_C( 685594579) } }, { { -INT32_C( 975956611), -INT32_C( 1301669143) }, { -INT32_C( 902985711), -INT32_C( 1660350913) }, { -INT64_C( 72970900), INT64_C( 358681770) } }, { { -INT32_C( 683585761), INT32_C( 1202700017) }, { -INT32_C( 484212225), INT32_C( 1217577931) }, { -INT64_C( 199373536), -INT64_C( 14877914) } }, { { -INT32_C( 1039309095), -INT32_C( 1837795455) }, { INT32_C( 1063035647), -INT32_C( 790862416) }, { -INT64_C( 2102344742), -INT64_C( 1046933039) } }, { { -INT32_C( 1498997068), -INT32_C( 605202724) }, { -INT32_C( 1564601897), -INT32_C( 1377152556) }, { INT64_C( 65604829), INT64_C( 771949832) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int64x2_t r = simde_vsubl_s32(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); simde_int64x2_t r = simde_vsubl_s32(a, b); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vsubl_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint8_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT8_C(183), UINT8_C(248), UINT8_C(112), UINT8_C( 56), UINT8_C(111), UINT8_C(229), UINT8_C(202), UINT8_C(111) }, { UINT8_C(135), UINT8_C( 38), UINT8_C(174), UINT8_C( 55), UINT8_C(139), UINT8_C(138), UINT8_C( 7), UINT8_C( 64) }, { UINT16_C( 48), UINT16_C( 210), UINT16_C(65474), UINT16_C( 1), UINT16_C(65508), UINT16_C( 91), UINT16_C( 195), UINT16_C( 47) } }, { { UINT8_C(168), UINT8_C(174), UINT8_C(230), UINT8_C(133), UINT8_C( 5), UINT8_C(211), UINT8_C( 96), UINT8_C(220) }, { UINT8_C(228), UINT8_C( 31), UINT8_C(127), UINT8_C(185), UINT8_C(112), UINT8_C(105), UINT8_C(102), UINT8_C( 39) }, { UINT16_C(65476), UINT16_C( 143), UINT16_C( 103), UINT16_C(65484), UINT16_C(65429), UINT16_C( 106), UINT16_C(65530), UINT16_C( 181) } }, { { UINT8_C( 97), UINT8_C(214), UINT8_C( 96), UINT8_C(209), UINT8_C(187), UINT8_C( 42), UINT8_C( 64), UINT8_C( 66) }, { UINT8_C( 81), UINT8_C(238), UINT8_C(122), UINT8_C(220), UINT8_C(120), UINT8_C(129), UINT8_C( 28), UINT8_C( 33) }, { UINT16_C( 16), UINT16_C(65512), UINT16_C(65510), UINT16_C(65525), UINT16_C( 67), UINT16_C(65449), UINT16_C( 36), UINT16_C( 33) } }, { { UINT8_C( 48), UINT8_C( 2), UINT8_C(166), UINT8_C( 53), UINT8_C(214), UINT8_C( 6), UINT8_C( 17), UINT8_C(186) }, { UINT8_C( 37), UINT8_C(144), UINT8_C(115), UINT8_C(149), UINT8_C(250), UINT8_C(218), UINT8_C(189), UINT8_C( 91) }, { UINT16_C( 11), UINT16_C(65394), UINT16_C( 51), UINT16_C(65440), UINT16_C(65500), UINT16_C(65324), UINT16_C(65364), UINT16_C( 95) } }, { { UINT8_C(176), UINT8_C( 29), UINT8_C( 44), UINT8_C(108), UINT8_C( 71), UINT8_C(108), UINT8_C(174), UINT8_C(152) }, { UINT8_C( 90), UINT8_C( 40), UINT8_C(117), UINT8_C(211), UINT8_C(170), UINT8_C(145), UINT8_C(244), UINT8_C(218) }, { UINT16_C( 86), UINT16_C(65525), UINT16_C(65463), UINT16_C(65433), UINT16_C(65437), UINT16_C(65499), UINT16_C(65466), UINT16_C(65470) } }, { { UINT8_C(148), UINT8_C(154), UINT8_C( 15), UINT8_C(106), UINT8_C(160), UINT8_C( 32), UINT8_C( 36), UINT8_C(198) }, { UINT8_C(177), UINT8_C(152), UINT8_C( 91), UINT8_C(171), UINT8_C(114), UINT8_C( 24), UINT8_C( 6), UINT8_C( 34) }, { UINT16_C(65507), UINT16_C( 2), UINT16_C(65460), UINT16_C(65471), UINT16_C( 46), UINT16_C( 8), UINT16_C( 30), UINT16_C( 164) } }, { { UINT8_C( 53), UINT8_C( 51), UINT8_C(142), UINT8_C(125), UINT8_C(159), UINT8_C( 61), UINT8_C( 21), UINT8_C(250) }, { UINT8_C(101), UINT8_C(138), UINT8_C(205), UINT8_C( 15), UINT8_C( 28), UINT8_C(193), UINT8_C(233), UINT8_C(176) }, { UINT16_C(65488), UINT16_C(65449), UINT16_C(65473), UINT16_C( 110), UINT16_C( 131), UINT16_C(65404), UINT16_C(65324), UINT16_C( 74) } }, { { UINT8_C( 91), UINT8_C(248), UINT8_C( 26), UINT8_C(251), UINT8_C( 25), UINT8_C( 62), UINT8_C(193), UINT8_C(202) }, { UINT8_C(214), UINT8_C( 29), UINT8_C(117), UINT8_C( 72), UINT8_C( 53), UINT8_C(123), UINT8_C(107), UINT8_C(107) }, { UINT16_C(65413), UINT16_C( 219), UINT16_C(65445), UINT16_C( 179), UINT16_C(65508), UINT16_C(65475), UINT16_C( 86), UINT16_C( 95) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint16x8_t r = simde_vsubl_u8(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); simde_uint16x8_t r = simde_vsubl_u8(a, b); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vsubl_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT16_C(63918), UINT16_C(20200), UINT16_C(64822), UINT16_C(40008) }, { UINT16_C( 5512), UINT16_C(42155), UINT16_C(38358), UINT16_C(12628) }, { UINT32_C( 58406), UINT32_C(4294945341), UINT32_C( 26464), UINT32_C( 27380) } }, { { UINT16_C(28301), UINT16_C(42540), UINT16_C(61100), UINT16_C(33648) }, { UINT16_C(58635), UINT16_C(16587), UINT16_C(13921), UINT16_C( 4011) }, { UINT32_C(4294936962), UINT32_C( 25953), UINT32_C( 47179), UINT32_C( 29637) } }, { { UINT16_C(37680), UINT16_C(26205), UINT16_C(42385), UINT16_C( 6402) }, { UINT16_C(44730), UINT16_C(37053), UINT16_C( 4419), UINT16_C(53441) }, { UINT32_C(4294960246), UINT32_C(4294956448), UINT32_C( 37966), UINT32_C(4294920257) } }, { { UINT16_C(61055), UINT16_C(11127), UINT16_C(59356), UINT16_C(59310) }, { UINT16_C(31437), UINT16_C(11815), UINT16_C(54192), UINT16_C(57405) }, { UINT32_C( 29618), UINT32_C(4294966608), UINT32_C( 5164), UINT32_C( 1905) } }, { { UINT16_C(39782), UINT16_C(63303), UINT16_C(18752), UINT16_C(64272) }, { UINT16_C(52727), UINT16_C(14987), UINT16_C(19934), UINT16_C(23819) }, { UINT32_C(4294954351), UINT32_C( 48316), UINT32_C(4294966114), UINT32_C( 40453) } }, { { UINT16_C(33339), UINT16_C( 6025), UINT16_C(14185), UINT16_C(14078) }, { UINT16_C( 9649), UINT16_C(25188), UINT16_C(41720), UINT16_C(24386) }, { UINT32_C( 23690), UINT32_C(4294948133), UINT32_C(4294939761), UINT32_C(4294956988) } }, { { UINT16_C(35133), UINT16_C(32086), UINT16_C(26579), UINT16_C(51832) }, { UINT16_C( 1076), UINT16_C( 4869), UINT16_C( 4177), UINT16_C(35952) }, { UINT32_C( 34057), UINT32_C( 27217), UINT32_C( 22402), UINT32_C( 15880) } }, { { UINT16_C(63890), UINT16_C(64419), UINT16_C(41265), UINT16_C(57906) }, { UINT16_C(38598), UINT16_C(48964), UINT16_C(34616), UINT16_C(29982) }, { UINT32_C( 25292), UINT32_C( 15455), UINT32_C( 6649), UINT32_C( 27924) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint32x4_t r = simde_vsubl_u16(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); simde_uint32x4_t r = simde_vsubl_u16(a, b); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vsubl_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint32_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT32_C(3824382992), UINT32_C( 279866331) }, { UINT32_C(3223565167), UINT32_C(1431081923) }, { UINT64_C( 600817825), UINT64_C(18446744072558336024) } }, { { UINT32_C(3192975245), UINT32_C(1470136976) }, { UINT32_C(1360454937), UINT32_C(2093429868) }, { UINT64_C( 1832520308), UINT64_C(18446744073086258724) } }, { { UINT32_C(2220931752), UINT32_C(2509508133) }, { UINT32_C(2220210113), UINT32_C(3621364298) }, { UINT64_C( 721639), UINT64_C(18446744072597695451) } }, { { UINT32_C( 580200849), UINT32_C(3313055404) }, { UINT32_C(2266402587), UINT32_C(1795415491) }, { UINT64_C(18446744072023349878), UINT64_C( 1517639913) } }, { { UINT32_C(3186582423), UINT32_C( 844268401) }, { UINT32_C(2243340090), UINT32_C(3680276297) }, { UINT64_C( 943242333), UINT64_C(18446744070873543720) } }, { { UINT32_C(1711141561), UINT32_C(1126856232) }, { UINT32_C(3368697861), UINT32_C(3040070942) }, { UINT64_C(18446744072051995316), UINT64_C(18446744071796336906) } }, { { UINT32_C(2725389105), UINT32_C(3788883110) }, { UINT32_C(3043396460), UINT32_C(3566256667) }, { UINT64_C(18446744073391544261), UINT64_C( 222626443) } }, { { UINT32_C(3694759348), UINT32_C( 136274691) }, { UINT32_C(3251693987), UINT32_C(3900114103) }, { UINT64_C( 443065361), UINT64_C(18446744069945712204) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint64x2_t r = simde_vsubl_u32(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); simde_uint64x2_t r = simde_vsubl_u32(a, b); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vsubl_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vsubl_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vsubl_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vsubl_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vsubl_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vsubl_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/subw.c000066400000000000000000000506541400333146700164550ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN subw #include "test-neon.h" #include "../../../simde/arm/neon/subw.h" static int test_simde_vsubw_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int8_t b[8]; int16_t r[8]; } test_vec[] = { { { INT16_C( 6820), INT16_C( 27304), INT16_C( 25938), INT16_C( 515), -INT16_C( 18451), INT16_C( 12057), INT16_C( 26265), -INT16_C( 7182) }, { INT8_C( 5), -INT8_C( 1), INT8_C( 74), -INT8_C( 78), -INT8_C( 49), -INT8_C( 82), -INT8_C( 53), INT8_C( 15) }, { INT16_C( 6815), INT16_C( 27305), INT16_C( 25864), INT16_C( 593), -INT16_C( 18402), INT16_C( 12139), INT16_C( 26318), -INT16_C( 7197) } }, { { INT16_C( 21054), INT16_C( 20717), INT16_C( 5564), INT16_C( 24729), INT16_C( 16687), -INT16_C( 32053), -INT16_C( 12633), -INT16_C( 27516) }, { -INT8_C( 123), -INT8_C( 98), -INT8_C( 61), INT8_C( 30), INT8_C( 4), -INT8_C( 74), INT8_C( 2), INT8_C( 10) }, { INT16_C( 21177), INT16_C( 20815), INT16_C( 5625), INT16_C( 24699), INT16_C( 16683), -INT16_C( 31979), -INT16_C( 12635), -INT16_C( 27526) } }, { { INT16_C( 19637), -INT16_C( 31556), -INT16_C( 30726), INT16_C( 14483), -INT16_C( 32550), -INT16_C( 27000), INT16_C( 8854), -INT16_C( 14858) }, { INT8_C( 99), -INT8_C( 63), INT8_C( 71), INT8_C( 10), -INT8_C( 112), -INT8_C( 52), -INT8_C( 97), INT8_C( 21) }, { INT16_C( 19538), -INT16_C( 31493), -INT16_C( 30797), INT16_C( 14473), -INT16_C( 32438), -INT16_C( 26948), INT16_C( 8951), -INT16_C( 14879) } }, { { INT16_C( 25194), INT16_C( 28212), INT16_C( 13848), -INT16_C( 12936), INT16_C( 13442), INT16_C( 31825), -INT16_C( 6724), -INT16_C( 26956) }, { INT8_C( 101), INT8_C( 60), INT8_C( 44), -INT8_C( 5), INT8_C( 94), INT8_C( 34), -INT8_C( 63), -INT8_C( 62) }, { INT16_C( 25093), INT16_C( 28152), INT16_C( 13804), -INT16_C( 12931), INT16_C( 13348), INT16_C( 31791), -INT16_C( 6661), -INT16_C( 26894) } }, { { INT16_C( 2276), INT16_C( 29900), INT16_C( 27604), INT16_C( 16009), -INT16_C( 16946), -INT16_C( 6483), INT16_C( 9715), INT16_C( 30132) }, { INT8_C( 90), INT8_C( 5), -INT8_C( 15), INT8_C( 22), -INT8_C( 22), -INT8_C( 91), -INT8_C( 84), INT8_C( 80) }, { INT16_C( 2186), INT16_C( 29895), INT16_C( 27619), INT16_C( 15987), -INT16_C( 16924), -INT16_C( 6392), INT16_C( 9799), INT16_C( 30052) } }, { { -INT16_C( 10014), INT16_C( 16459), INT16_C( 3322), -INT16_C( 8702), -INT16_C( 12523), -INT16_C( 5806), -INT16_C( 9158), INT16_C( 2088) }, { -INT8_C( 103), -INT8_C( 43), -INT8_C( 17), -INT8_C( 115), -INT8_C( 6), -INT8_C( 93), INT8_C( 2), INT8_C( 84) }, { -INT16_C( 9911), INT16_C( 16502), INT16_C( 3339), -INT16_C( 8587), -INT16_C( 12517), -INT16_C( 5713), -INT16_C( 9160), INT16_C( 2004) } }, { { -INT16_C( 2904), -INT16_C( 27798), INT16_C( 5785), INT16_C( 31715), INT16_C( 12014), -INT16_C( 5700), -INT16_C( 16837), INT16_C( 20679) }, { -INT8_C( 115), INT8_C( 26), INT8_C( 57), -INT8_C( 56), -INT8_C( 10), INT8_C( 97), -INT8_C( 48), -INT8_C( 113) }, { -INT16_C( 2789), -INT16_C( 27824), INT16_C( 5728), INT16_C( 31771), INT16_C( 12024), -INT16_C( 5797), -INT16_C( 16789), INT16_C( 20792) } }, { { -INT16_C( 16586), INT16_C( 12572), INT16_C( 8034), INT16_C( 2949), -INT16_C( 4077), -INT16_C( 21346), -INT16_C( 32506), -INT16_C( 2776) }, { -INT8_C( 81), -INT8_C( 28), -INT8_C( 34), -INT8_C( 22), -INT8_C( 94), -INT8_C( 91), INT8_C( 58), INT8_C( 48) }, { -INT16_C( 16505), INT16_C( 12600), INT16_C( 8068), INT16_C( 2971), -INT16_C( 3983), -INT16_C( 21255), -INT16_C( 32564), -INT16_C( 2824) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int16x8_t r = simde_vsubw_s8(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); simde_int16x8_t r = simde_vsubw_s8(a, b); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vsubw_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int16_t b[4]; int32_t r[4]; } test_vec[] = { { { -INT32_C( 1242008385), INT32_C( 205899989), -INT32_C( 365076088), -INT32_C( 1812610432) }, { -INT16_C( 27726), -INT16_C( 18112), INT16_C( 26644), -INT16_C( 15186) }, { -INT32_C( 1241980659), INT32_C( 205918101), -INT32_C( 365102732), -INT32_C( 1812595246) } }, { { -INT32_C( 290550708), -INT32_C( 249632463), INT32_C( 849745501), INT32_C( 1732176863) }, { INT16_C( 31565), -INT16_C( 12975), INT16_C( 18238), -INT16_C( 3999) }, { -INT32_C( 290582273), -INT32_C( 249619488), INT32_C( 849727263), INT32_C( 1732180862) } }, { { -INT32_C( 274095654), INT32_C( 1437816585), INT32_C( 356737507), -INT32_C( 1492753846) }, { -INT16_C( 21384), INT16_C( 22490), INT16_C( 6296), -INT16_C( 6722) }, { -INT32_C( 274074270), INT32_C( 1437794095), INT32_C( 356731211), -INT32_C( 1492747124) } }, { { -INT32_C( 760082284), INT32_C( 834802519), -INT32_C( 1121948492), -INT32_C( 1491938365) }, { INT16_C( 22069), INT16_C( 32700), -INT16_C( 15688), INT16_C( 12327) }, { -INT32_C( 760104353), INT32_C( 834769819), -INT32_C( 1121932804), -INT32_C( 1491950692) } }, { { INT32_C( 109576558), -INT32_C( 1377090023), -INT32_C( 1384145322), INT32_C( 1725907633) }, { -INT16_C( 82), INT16_C( 28963), INT16_C( 14035), INT16_C( 2072) }, { INT32_C( 109576640), -INT32_C( 1377118986), -INT32_C( 1384159357), INT32_C( 1725905561) } }, { { INT32_C( 1149752460), INT32_C( 91532950), -INT32_C( 921961297), -INT32_C( 1720256701) }, { -INT16_C( 2411), INT16_C( 17991), INT16_C( 9784), -INT16_C( 6484) }, { INT32_C( 1149754871), INT32_C( 91514959), -INT32_C( 921971081), -INT32_C( 1720250217) } }, { { -INT32_C( 128462811), -INT32_C( 1845465082), -INT32_C( 606697404), -INT32_C( 421508554) }, { -INT16_C( 5305), -INT16_C( 30033), INT16_C( 9698), INT16_C( 30499) }, { -INT32_C( 128457506), -INT32_C( 1845435049), -INT32_C( 606707102), -INT32_C( 421539053) } }, { { INT32_C( 1404987931), -INT32_C( 1237751152), INT32_C( 1085182266), INT32_C( 1171435265) }, { -INT16_C( 22473), INT16_C( 27936), INT16_C( 243), INT16_C( 14931) }, { INT32_C( 1405010404), -INT32_C( 1237779088), INT32_C( 1085182023), INT32_C( 1171420334) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int32x4_t r = simde_vsubw_s16(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); simde_int32x4_t r = simde_vsubw_s16(a, b); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vsubw_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; int32_t b[2]; int64_t r[2]; } test_vec[] = { { { INT64_C( 4847816211364578028), -INT64_C( 6225997310173182894) }, { INT32_C( 1659455329), INT32_C( 766033142) }, { INT64_C( 4847816209705122699), -INT64_C( 6225997310939216036) } }, { { -INT64_C( 5363243132695951260), -INT64_C( 8116391164187027984) }, { INT32_C( 1014100941), INT32_C( 635767491) }, { -INT64_C( 5363243133710052201), -INT64_C( 8116391164822795475) } }, { { -INT64_C( 1192275730394263983), -INT64_C( 1335641659382821128) }, { INT32_C( 1778776365), -INT32_C( 1862704701) }, { -INT64_C( 1192275732173040348), -INT64_C( 1335641657520116427) } }, { { -INT64_C( 4161975364135130284), -INT64_C( 1370921726416927616) }, { -INT32_C( 1934802801), INT32_C( 1165566744) }, { -INT64_C( 4161975362200327483), -INT64_C( 1370921727582494360) } }, { { INT64_C( 3778706870654500380), INT64_C( 7948723524930911252) }, { INT32_C( 1115184206), INT32_C( 1831760605) }, { INT64_C( 3778706869539316174), INT64_C( 7948723523099150647) } }, { { INT64_C( 1877846008289418162), INT64_C( 9008723632334749680) }, { -INT32_C( 1811459674), INT32_C( 688019163) }, { INT64_C( 1877846010100877836), INT64_C( 9008723631646730517) } }, { { -INT64_C( 6918204206961951821), INT64_C( 6452664756106557044) }, { -INT32_C( 1591114184), -INT32_C( 1759629584) }, { -INT64_C( 6918204205370837637), INT64_C( 6452664757866186628) } }, { { INT64_C( 3423912728348796288), -INT64_C( 273103808043683927) }, { INT32_C( 477011635), INT32_C( 1316354582) }, { INT64_C( 3423912727871784653), -INT64_C( 273103809360038509) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int64x2_t r = simde_vsubw_s32(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); simde_int64x2_t r = simde_vsubw_s32(a, b); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vsubw_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint8_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(40577), UINT16_C(29167), UINT16_C( 3789), UINT16_C(19720), UINT16_C(13363), UINT16_C(44968), UINT16_C(11362), UINT16_C( 3038) }, { UINT8_C( 27), UINT8_C(158), UINT8_C(161), UINT8_C(163), UINT8_C( 91), UINT8_C(214), UINT8_C(159), UINT8_C( 14) }, { UINT16_C(40550), UINT16_C(29009), UINT16_C( 3628), UINT16_C(19557), UINT16_C(13272), UINT16_C(44754), UINT16_C(11203), UINT16_C( 3024) } }, { { UINT16_C( 3444), UINT16_C(35370), UINT16_C(40711), UINT16_C(35033), UINT16_C(51262), UINT16_C( 3066), UINT16_C( 726), UINT16_C( 2648) }, { UINT8_C( 54), UINT8_C( 0), UINT8_C(185), UINT8_C(152), UINT8_C( 44), UINT8_C(152), UINT8_C(163), UINT8_C( 71) }, { UINT16_C( 3390), UINT16_C(35370), UINT16_C(40526), UINT16_C(34881), UINT16_C(51218), UINT16_C( 2914), UINT16_C( 563), UINT16_C( 2577) } }, { { UINT16_C(17462), UINT16_C(37354), UINT16_C(35098), UINT16_C(36511), UINT16_C(51862), UINT16_C(40217), UINT16_C(62057), UINT16_C(42790) }, { UINT8_C(186), UINT8_C( 32), UINT8_C(178), UINT8_C(145), UINT8_C( 34), UINT8_C( 10), UINT8_C(155), UINT8_C( 89) }, { UINT16_C(17276), UINT16_C(37322), UINT16_C(34920), UINT16_C(36366), UINT16_C(51828), UINT16_C(40207), UINT16_C(61902), UINT16_C(42701) } }, { { UINT16_C(21514), UINT16_C(14065), UINT16_C(38380), UINT16_C( 8829), UINT16_C(26585), UINT16_C(62643), UINT16_C(21488), UINT16_C(34434) }, { UINT8_C( 29), UINT8_C(155), UINT8_C( 36), UINT8_C(134), UINT8_C(141), UINT8_C( 74), UINT8_C( 46), UINT8_C( 72) }, { UINT16_C(21485), UINT16_C(13910), UINT16_C(38344), UINT16_C( 8695), UINT16_C(26444), UINT16_C(62569), UINT16_C(21442), UINT16_C(34362) } }, { { UINT16_C(57450), UINT16_C(36057), UINT16_C(29931), UINT16_C(62949), UINT16_C(55240), UINT16_C(46380), UINT16_C(43372), UINT16_C(17879) }, { UINT8_C( 17), UINT8_C(139), UINT8_C( 57), UINT8_C( 1), UINT8_C(222), UINT8_C(188), UINT8_C(136), UINT8_C(251) }, { UINT16_C(57433), UINT16_C(35918), UINT16_C(29874), UINT16_C(62948), UINT16_C(55018), UINT16_C(46192), UINT16_C(43236), UINT16_C(17628) } }, { { UINT16_C(44119), UINT16_C(58753), UINT16_C(45046), UINT16_C(24621), UINT16_C( 1680), UINT16_C(31724), UINT16_C(53882), UINT16_C(17008) }, { UINT8_C(169), UINT8_C(156), UINT8_C(247), UINT8_C( 21), UINT8_C( 70), UINT8_C(207), UINT8_C( 90), UINT8_C( 87) }, { UINT16_C(43950), UINT16_C(58597), UINT16_C(44799), UINT16_C(24600), UINT16_C( 1610), UINT16_C(31517), UINT16_C(53792), UINT16_C(16921) } }, { { UINT16_C(37978), UINT16_C(14424), UINT16_C(57424), UINT16_C(42803), UINT16_C(46220), UINT16_C(33420), UINT16_C(47460), UINT16_C(62690) }, { UINT8_C(191), UINT8_C(207), UINT8_C(111), UINT8_C( 57), UINT8_C(161), UINT8_C(223), UINT8_C(124), UINT8_C( 74) }, { UINT16_C(37787), UINT16_C(14217), UINT16_C(57313), UINT16_C(42746), UINT16_C(46059), UINT16_C(33197), UINT16_C(47336), UINT16_C(62616) } }, { { UINT16_C(29564), UINT16_C(49759), UINT16_C(47426), UINT16_C(39961), UINT16_C(29005), UINT16_C(40404), UINT16_C( 1874), UINT16_C(56901) }, { UINT8_C(188), UINT8_C(209), UINT8_C( 97), UINT8_C( 32), UINT8_C(139), UINT8_C( 67), UINT8_C( 20), UINT8_C( 74) }, { UINT16_C(29376), UINT16_C(49550), UINT16_C(47329), UINT16_C(39929), UINT16_C(28866), UINT16_C(40337), UINT16_C( 1854), UINT16_C(56827) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint16x8_t r = simde_vsubw_u8(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); simde_uint16x8_t r = simde_vsubw_u8(a, b); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vsubw_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint16_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(3011805970), UINT32_C(3741122658), UINT32_C(3063962739), UINT32_C(1666365718) }, { UINT16_C(10027), UINT16_C(32001), UINT16_C(17966), UINT16_C(59995) }, { UINT32_C(3011795943), UINT32_C(3741090657), UINT32_C(3063944773), UINT32_C(1666305723) } }, { { UINT32_C(2718612503), UINT32_C( 317529600), UINT32_C( 80114081), UINT32_C(3840066417) }, { UINT16_C(33568), UINT16_C(13978), UINT16_C(60732), UINT16_C(26521) }, { UINT32_C(2718578935), UINT32_C( 317515622), UINT32_C( 80053349), UINT32_C(3840039896) } }, { { UINT32_C(1122277908), UINT32_C(4163715296), UINT32_C(4237965308), UINT32_C(4144990038) }, { UINT16_C(54776), UINT16_C(27131), UINT16_C(56984), UINT16_C(47182) }, { UINT32_C(1122223132), UINT32_C(4163688165), UINT32_C(4237908324), UINT32_C(4144942856) } }, { { UINT32_C(2649679969), UINT32_C(3909454037), UINT32_C( 53274914), UINT32_C( 654006569) }, { UINT16_C(38288), UINT16_C(58914), UINT16_C(12573), UINT16_C( 5598) }, { UINT32_C(2649641681), UINT32_C(3909395123), UINT32_C( 53262341), UINT32_C( 654000971) } }, { { UINT32_C(2675955974), UINT32_C( 408407479), UINT32_C(2343978677), UINT32_C(4034182094) }, { UINT16_C(41124), UINT16_C(52979), UINT16_C(61177), UINT16_C(35572) }, { UINT32_C(2675914850), UINT32_C( 408354500), UINT32_C(2343917500), UINT32_C(4034146522) } }, { { UINT32_C(2708477572), UINT32_C(1320570440), UINT32_C(3756864808), UINT32_C(3103278338) }, { UINT16_C(44683), UINT16_C(22851), UINT16_C(46953), UINT16_C( 3401) }, { UINT32_C(2708432889), UINT32_C(1320547589), UINT32_C(3756817855), UINT32_C(3103274937) } }, { { UINT32_C(1373322584), UINT32_C(2950418219), UINT32_C( 777014502), UINT32_C(3262908314) }, { UINT16_C(27196), UINT16_C(16290), UINT16_C(39599), UINT16_C(15095) }, { UINT32_C(1373295388), UINT32_C(2950401929), UINT32_C( 776974903), UINT32_C(3262893219) } }, { { UINT32_C(2979215944), UINT32_C(1237245169), UINT32_C(1167825433), UINT32_C(1341421161) }, { UINT16_C(17858), UINT16_C(23933), UINT16_C(64076), UINT16_C(34847) }, { UINT32_C(2979198086), UINT32_C(1237221236), UINT32_C(1167761357), UINT32_C(1341386314) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint32x4_t r = simde_vsubw_u16(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); simde_uint32x4_t r = simde_vsubw_u16(a, b); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vsubw_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[2]; uint32_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C(11767270699772133732), UINT64_C(15434702050035032312) }, { UINT32_C( 370921133), UINT32_C( 124129093) }, { UINT64_C(11767270699401212599), UINT64_C(15434702049910903219) } }, { { UINT64_C( 4695430173924057940), UINT64_C(12052936278448336965) }, { UINT32_C(1049729409), UINT32_C(1494533548) }, { UINT64_C( 4695430172874328531), UINT64_C(12052936276953803417) } }, { { UINT64_C(10655751184254447507), UINT64_C( 1069706928083649721) }, { UINT32_C(4239338573), UINT32_C(1336144846) }, { UINT64_C(10655751180015108934), UINT64_C( 1069706926747504875) } }, { { UINT64_C(10201112727033558412), UINT64_C(10349912778846110160) }, { UINT32_C(1395053962), UINT32_C(2137128498) }, { UINT64_C(10201112725638504450), UINT64_C(10349912776708981662) } }, { { UINT64_C(10396592742187339818), UINT64_C( 5177831665045132627) }, { UINT32_C( 827736411), UINT32_C( 297924743) }, { UINT64_C(10396592741359603407), UINT64_C( 5177831664747207884) } }, { { UINT64_C( 1116828819955967950), UINT64_C( 7811863096301517525) }, { UINT32_C(2629382437), UINT32_C(3873739915) }, { UINT64_C( 1116828817326585513), UINT64_C( 7811863092427777610) } }, { { UINT64_C( 103258656087751381), UINT64_C( 7905084639033807552) }, { UINT32_C(2454109306), UINT32_C( 855551756) }, { UINT64_C( 103258653633642075), UINT64_C( 7905084638178255796) } }, { { UINT64_C( 2473516661669279969), UINT64_C( 297778827730250733) }, { UINT32_C(1470702271), UINT32_C( 482631074) }, { UINT64_C( 2473516660198577698), UINT64_C( 297778827247619659) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint64x2_t r = simde_vsubw_u32(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); simde_uint64x2_t r = simde_vsubw_u32(a, b); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vsubw_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vsubw_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vsubw_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vsubw_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vsubw_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vsubw_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/subw_high.c000066400000000000000000000604751400333146700174560ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN subw_high #include "test-neon.h" #include "../../../simde/arm/neon/subw_high.h" static int test_simde_vsubw_high_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int8_t b[16]; int16_t r[8]; } test_vec[] = { { { -INT16_C( 8634), INT16_C( 8369), INT16_C( 24454), -INT16_C( 16207), -INT16_C( 12454), -INT16_C( 19437), INT16_C( 30243), -INT16_C( 2979) }, { INT8_C( 49), INT8_C( 50), INT8_C( 31), -INT8_C( 122), INT8_C( 92), -INT8_C( 76), -INT8_C( 99), -INT8_C( 69), INT8_C( 107), -INT8_C( 98), -INT8_C( 15), INT8_C( 98), INT8_C( 45), INT8_C( 125), INT8_C( 117), INT8_C( 115) }, { -INT16_C( 8741), INT16_C( 8467), INT16_C( 24469), -INT16_C( 16305), -INT16_C( 12499), -INT16_C( 19562), INT16_C( 30126), -INT16_C( 3094) } }, { { INT16_C( 9820), -INT16_C( 7532), INT16_C( 17798), -INT16_C( 8029), -INT16_C( 18924), INT16_C( 14484), -INT16_C( 3796), INT16_C( 23852) }, { INT8_C( 36), INT8_C( 75), -INT8_C( 29), INT8_MIN, -INT8_C( 1), -INT8_C( 127), INT8_C( 59), INT8_C( 106), INT8_C( 31), INT8_C( 44), -INT8_C( 52), INT8_C( 77), -INT8_C( 86), INT8_C( 66), -INT8_C( 64), INT8_C( 6) }, { INT16_C( 9789), -INT16_C( 7576), INT16_C( 17850), -INT16_C( 8106), -INT16_C( 18838), INT16_C( 14418), -INT16_C( 3732), INT16_C( 23846) } }, { { INT16_C( 21608), -INT16_C( 4376), -INT16_C( 29799), -INT16_C( 20785), INT16_C( 25410), INT16_C( 28390), INT16_C( 4693), INT16_C( 31180) }, { INT8_C( 93), -INT8_C( 81), -INT8_C( 7), INT8_C( 92), INT8_C( 48), INT8_C( 52), -INT8_C( 57), INT8_C( 80), INT8_C( 97), -INT8_C( 109), -INT8_C( 99), INT8_C( 11), -INT8_C( 43), INT8_C( 93), INT8_C( 17), INT8_C( 62) }, { INT16_C( 21511), -INT16_C( 4267), -INT16_C( 29700), -INT16_C( 20796), INT16_C( 25453), INT16_C( 28297), INT16_C( 4676), INT16_C( 31118) } }, { { -INT16_C( 1614), INT16_C( 19244), -INT16_C( 1147), -INT16_C( 14343), -INT16_C( 8353), -INT16_C( 19403), INT16_C( 497), INT16_C( 20013) }, { -INT8_C( 79), INT8_C( 38), -INT8_C( 85), -INT8_C( 31), INT8_C( 90), INT8_C( 114), INT8_C( 49), -INT8_C( 69), INT8_C( 5), -INT8_C( 50), -INT8_C( 58), -INT8_C( 37), INT8_C( 44), -INT8_C( 41), INT8_C( 25), -INT8_C( 34) }, { -INT16_C( 1619), INT16_C( 19294), -INT16_C( 1089), -INT16_C( 14306), -INT16_C( 8397), -INT16_C( 19362), INT16_C( 472), INT16_C( 20047) } }, { { INT16_C( 17873), INT16_C( 22057), INT16_C( 9025), -INT16_C( 24547), INT16_C( 20994), -INT16_C( 2988), -INT16_C( 32428), INT16_C( 1346) }, { -INT8_C( 89), -INT8_C( 19), -INT8_C( 26), INT8_C( 1), INT8_C( 95), INT8_C( 24), -INT8_C( 67), INT8_C( 101), -INT8_C( 26), -INT8_C( 125), INT8_C( 64), INT8_C( 18), INT8_C( 91), INT8_C( 89), -INT8_C( 16), INT8_C( 44) }, { INT16_C( 17899), INT16_C( 22182), INT16_C( 8961), -INT16_C( 24565), INT16_C( 20903), -INT16_C( 3077), -INT16_C( 32412), INT16_C( 1302) } }, { { INT16_C( 6814), -INT16_C( 8318), -INT16_C( 24771), INT16_C( 16255), -INT16_C( 11279), INT16_C( 17715), INT16_C( 30292), -INT16_C( 1206) }, { INT8_C( 99), INT8_C( 49), -INT8_C( 3), -INT8_C( 61), INT8_C( 73), -INT8_C( 70), INT8_C( 40), INT8_C( 47), INT8_C( 61), INT8_C( 104), INT8_C( 66), -INT8_C( 104), -INT8_C( 63), INT8_C( 50), -INT8_C( 60), INT8_C( 95) }, { INT16_C( 6753), -INT16_C( 8422), -INT16_C( 24837), INT16_C( 16359), -INT16_C( 11216), INT16_C( 17665), INT16_C( 30352), -INT16_C( 1301) } }, { { INT16_C( 17996), -INT16_C( 30401), -INT16_C( 16667), -INT16_C( 10295), -INT16_C( 878), -INT16_C( 6628), INT16_C( 26482), -INT16_C( 10526) }, { -INT8_C( 104), -INT8_C( 33), -INT8_C( 103), -INT8_C( 31), -INT8_C( 103), -INT8_C( 63), INT8_C( 16), -INT8_C( 42), INT8_C( 41), INT8_C( 82), INT8_C( 111), -INT8_C( 22), -INT8_C( 123), INT8_C( 51), INT8_C( 73), -INT8_C( 47) }, { INT16_C( 17955), -INT16_C( 30483), -INT16_C( 16778), -INT16_C( 10273), -INT16_C( 755), -INT16_C( 6679), INT16_C( 26409), -INT16_C( 10479) } }, { { -INT16_C( 30598), INT16_C( 24411), INT16_C( 9287), -INT16_C( 9930), INT16_C( 21280), -INT16_C( 27713), -INT16_C( 24134), INT16_C( 21097) }, { INT8_MIN, INT8_C( 2), INT8_C( 51), INT8_C( 25), -INT8_C( 61), INT8_C( 67), -INT8_C( 16), -INT8_C( 20), -INT8_C( 106), INT8_C( 95), -INT8_C( 42), INT8_C( 27), -INT8_C( 110), INT8_C( 31), -INT8_C( 20), INT8_C( 12) }, { -INT16_C( 30492), INT16_C( 24316), INT16_C( 9329), -INT16_C( 9957), INT16_C( 21390), -INT16_C( 27744), -INT16_C( 24114), INT16_C( 21085) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int16x8_t r = simde_vsubw_high_s8(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); simde_int16x8_t r = simde_vsubw_high_s8(a, b); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vsubw_high_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int16_t b[8]; int32_t r[4]; } test_vec[] = { { { -INT32_C( 278116440), -INT32_C( 1933008277), -INT32_C( 1356888075), -INT32_C( 1459517399) }, { INT16_C( 13450), INT16_C( 19907), -INT16_C( 19592), INT16_C( 3641), INT16_C( 3858), -INT16_C( 23511), INT16_C( 5422), -INT16_C( 10575) }, { -INT32_C( 278120298), -INT32_C( 1932984766), -INT32_C( 1356893497), -INT32_C( 1459506824) } }, { { -INT32_C( 926605987), -INT32_C( 1252749889), INT32_C( 1046770453), -INT32_C( 2048432389) }, { -INT16_C( 21862), INT16_C( 4818), INT16_C( 2909), INT16_C( 28448), INT16_C( 18714), INT16_C( 18708), -INT16_C( 15009), -INT16_C( 17377) }, { -INT32_C( 926624701), -INT32_C( 1252768597), INT32_C( 1046785462), -INT32_C( 2048415012) } }, { { -INT32_C( 1585125918), -INT32_C( 2024351374), INT32_C( 1220918092), -INT32_C( 1144148703) }, { -INT16_C( 24489), -INT16_C( 18994), -INT16_C( 4437), -INT16_C( 14812), INT16_C( 14392), -INT16_C( 26865), INT16_C( 12029), -INT16_C( 8365) }, { -INT32_C( 1585140310), -INT32_C( 2024324509), INT32_C( 1220906063), -INT32_C( 1144140338) } }, { { -INT32_C( 2038311149), -INT32_C( 49424464), -INT32_C( 1287269486), -INT32_C( 680586624) }, { INT16_C( 15794), INT16_C( 24204), -INT16_C( 20181), INT16_C( 25380), INT16_C( 13289), -INT16_C( 6150), INT16_C( 19809), INT16_C( 30150) }, { -INT32_C( 2038324438), -INT32_C( 49418314), -INT32_C( 1287289295), -INT32_C( 680616774) } }, { { -INT32_C( 704952539), -INT32_C( 1311635425), INT32_C( 1533351899), -INT32_C( 600583126) }, { -INT16_C( 16623), INT16_C( 15418), INT16_C( 24176), INT16_C( 23200), -INT16_C( 25967), -INT16_C( 3263), INT16_C( 2024), INT16_C( 3432) }, { -INT32_C( 704926572), -INT32_C( 1311632162), INT32_C( 1533349875), -INT32_C( 600586558) } }, { { INT32_C( 1860330319), INT32_C( 1193260395), -INT32_C( 157121332), INT32_C( 1775490392) }, { INT16_C( 3477), INT16_C( 1446), INT16_C( 18028), -INT16_C( 673), -INT16_C( 24352), -INT16_C( 14096), INT16_C( 22696), -INT16_C( 2091) }, { INT32_C( 1860354671), INT32_C( 1193274491), -INT32_C( 157144028), INT32_C( 1775492483) } }, { { INT32_C( 660977851), INT32_C( 963544173), INT32_C( 1630539785), INT32_C( 2076902374) }, { INT16_C( 28944), INT16_C( 31872), -INT16_C( 8009), -INT16_C( 26758), INT16_C( 27264), INT16_C( 10336), INT16_C( 13763), INT16_C( 32287) }, { INT32_C( 660950587), INT32_C( 963533837), INT32_C( 1630526022), INT32_C( 2076870087) } }, { { INT32_C( 1520796909), INT32_C( 311694089), INT32_C( 175359012), -INT32_C( 679133497) }, { INT16_C( 1455), INT16_C( 26196), -INT16_C( 12571), INT16_C( 26366), INT16_C( 24120), -INT16_C( 1138), -INT16_C( 20845), -INT16_C( 32390) }, { INT32_C( 1520772789), INT32_C( 311695227), INT32_C( 175379857), -INT32_C( 679101107) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int32x4_t r = simde_vsubw_high_s16(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); simde_int32x4_t r = simde_vsubw_high_s16(a, b); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vsubw_high_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; int32_t b[4]; int64_t r[2]; } test_vec[] = { { { INT64_C( 6290806520344682290), -INT64_C( 5777301384871165645) }, { -INT32_C( 787142933), INT32_C( 741807092), INT32_C( 86558065), -INT32_C( 1501126029) }, { INT64_C( 6290806520258124225), -INT64_C( 5777301383370039616) } }, { { INT64_C( 309393578208092609), INT64_C( 9123960324121144560) }, { -INT32_C( 347097865), INT32_C( 957843143), -INT32_C( 1086439605), -INT32_C( 1553611551) }, { INT64_C( 309393579294532214), INT64_C( 9123960325674756111) } }, { { INT64_C( 7348717312772884005), -INT64_C( 4422266780297856369) }, { INT32_C( 1873670055), -INT32_C( 1062681483), -INT32_C( 444602876), -INT32_C( 813112150) }, { INT64_C( 7348717313217486881), -INT64_C( 4422266779484744219) } }, { { -INT64_C( 7924434272985407445), -INT64_C( 700217045811962949) }, { -INT32_C( 1201277629), -INT32_C( 1116140103), -INT32_C( 1650198285), INT32_C( 141306845) }, { -INT64_C( 7924434271335209160), -INT64_C( 700217045953269794) } }, { { -INT64_C( 5845198481330261429), INT64_C( 3264684125834220809) }, { INT32_C( 736539505), -INT32_C( 1276616768), INT32_C( 877693783), INT32_C( 37534903) }, { -INT64_C( 5845198482207955212), INT64_C( 3264684125796685906) } }, { { -INT64_C( 7092832327393221137), -INT64_C( 5485591867381398073) }, { -INT32_C( 1226914314), INT32_C( 2070529828), INT32_C( 162576722), INT32_C( 1678568565) }, { -INT64_C( 7092832327555797859), -INT64_C( 5485591869059966638) } }, { { INT64_C( 6197754147606322641), INT64_C( 3500395575433033629) }, { -INT32_C( 790203988), -INT32_C( 1957998792), INT32_C( 2106915592), -INT32_C( 1176330008) }, { INT64_C( 6197754145499407049), INT64_C( 3500395576609363637) } }, { { -INT64_C( 6997714204985775619), INT64_C( 9115586715782671127) }, { -INT32_C( 1152489598), -INT32_C( 1102669386), INT32_C( 2101140117), INT32_C( 2016812667) }, { -INT64_C( 6997714207086915736), INT64_C( 9115586713765858460) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int64x2_t r = simde_vsubw_high_s32(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); simde_int64x2_t r = simde_vsubw_high_s32(a, b); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vsubw_high_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint8_t b[16]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(21831), UINT16_C(18437), UINT16_C(59509), UINT16_C(36326), UINT16_C(25811), UINT16_C(42462), UINT16_C(24693), UINT16_C(63523) }, { UINT8_C(199), UINT8_C(113), UINT8_C(179), UINT8_C(126), UINT8_C( 10), UINT8_C(249), UINT8_C( 60), UINT8_C(159), UINT8_C(211), UINT8_C(120), UINT8_C( 28), UINT8_C( 78), UINT8_C(150), UINT8_C( 83), UINT8_C(199), UINT8_C(221) }, { UINT16_C(21620), UINT16_C(18317), UINT16_C(59481), UINT16_C(36248), UINT16_C(25661), UINT16_C(42379), UINT16_C(24494), UINT16_C(63302) } }, { { UINT16_C(52392), UINT16_C( 7461), UINT16_C( 3252), UINT16_C(34730), UINT16_C(35184), UINT16_C(58924), UINT16_C(20457), UINT16_C(45278) }, { UINT8_C(192), UINT8_C(145), UINT8_C( 46), UINT8_C(202), UINT8_C(138), UINT8_C(107), UINT8_C(106), UINT8_C( 93), UINT8_C(227), UINT8_C(134), UINT8_C(172), UINT8_C(122), UINT8_C(217), UINT8_C(115), UINT8_C( 87), UINT8_C(129) }, { UINT16_C(52165), UINT16_C( 7327), UINT16_C( 3080), UINT16_C(34608), UINT16_C(34967), UINT16_C(58809), UINT16_C(20370), UINT16_C(45149) } }, { { UINT16_C(32063), UINT16_C(62367), UINT16_C(18825), UINT16_C(63866), UINT16_C(42706), UINT16_C(48095), UINT16_C(48629), UINT16_C(46444) }, { UINT8_C( 78), UINT8_C(154), UINT8_C(127), UINT8_C(216), UINT8_C( 5), UINT8_C(233), UINT8_C( 54), UINT8_C(233), UINT8_C(112), UINT8_C(226), UINT8_C( 99), UINT8_C( 73), UINT8_C( 85), UINT8_C(186), UINT8_C(203), UINT8_C(148) }, { UINT16_C(31951), UINT16_C(62141), UINT16_C(18726), UINT16_C(63793), UINT16_C(42621), UINT16_C(47909), UINT16_C(48426), UINT16_C(46296) } }, { { UINT16_C(27191), UINT16_C(49287), UINT16_C( 435), UINT16_C(34490), UINT16_C(39335), UINT16_C(40001), UINT16_C(44375), UINT16_C(42321) }, { UINT8_C( 72), UINT8_C(208), UINT8_C(126), UINT8_C( 77), UINT8_C(186), UINT8_C(180), UINT8_C( 54), UINT8_C( 42), UINT8_C(150), UINT8_C(153), UINT8_C(115), UINT8_C(235), UINT8_C( 84), UINT8_C( 62), UINT8_C(127), UINT8_C(139) }, { UINT16_C(27041), UINT16_C(49134), UINT16_C( 320), UINT16_C(34255), UINT16_C(39251), UINT16_C(39939), UINT16_C(44248), UINT16_C(42182) } }, { { UINT16_C( 1704), UINT16_C(23628), UINT16_C( 1543), UINT16_C(44770), UINT16_C( 9119), UINT16_C(63050), UINT16_C(39889), UINT16_C( 6556) }, { UINT8_C(107), UINT8_C( 26), UINT8_C(102), UINT8_C( 37), UINT8_C(206), UINT8_C(157), UINT8_C( 79), UINT8_C(100), UINT8_C( 54), UINT8_C(195), UINT8_C( 79), UINT8_C(138), UINT8_C( 1), UINT8_C(206), UINT8_C( 22), UINT8_C(170) }, { UINT16_C( 1650), UINT16_C(23433), UINT16_C( 1464), UINT16_C(44632), UINT16_C( 9118), UINT16_C(62844), UINT16_C(39867), UINT16_C( 6386) } }, { { UINT16_C(25300), UINT16_C(56070), UINT16_C(59496), UINT16_C( 1929), UINT16_C(54027), UINT16_C(56574), UINT16_C(39534), UINT16_C(55797) }, { UINT8_C(180), UINT8_C( 92), UINT8_MAX, UINT8_C(130), UINT8_C(249), UINT8_C( 78), UINT8_C(230), UINT8_C( 47), UINT8_C( 17), UINT8_C( 53), UINT8_C(186), UINT8_C( 19), UINT8_C( 3), UINT8_C(208), UINT8_C(189), UINT8_C(215) }, { UINT16_C(25283), UINT16_C(56017), UINT16_C(59310), UINT16_C( 1910), UINT16_C(54024), UINT16_C(56366), UINT16_C(39345), UINT16_C(55582) } }, { { UINT16_C(49970), UINT16_C(39602), UINT16_C(15275), UINT16_C(46753), UINT16_C(40718), UINT16_C(31891), UINT16_C(34873), UINT16_C(60757) }, { UINT8_C(228), UINT8_C( 84), UINT8_C(111), UINT8_C(221), UINT8_C(163), UINT8_C( 85), UINT8_C( 13), UINT8_C(180), UINT8_C(138), UINT8_C(199), UINT8_C(199), UINT8_C(141), UINT8_C(151), UINT8_C(132), UINT8_C(100), UINT8_C(201) }, { UINT16_C(49832), UINT16_C(39403), UINT16_C(15076), UINT16_C(46612), UINT16_C(40567), UINT16_C(31759), UINT16_C(34773), UINT16_C(60556) } }, { { UINT16_C( 5703), UINT16_C(62051), UINT16_C( 1105), UINT16_C(24489), UINT16_C(15524), UINT16_C(56795), UINT16_C(12740), UINT16_C(43467) }, { UINT8_C(133), UINT8_C( 58), UINT8_C(134), UINT8_C( 40), UINT8_C(144), UINT8_C(147), UINT8_C(221), UINT8_C( 26), UINT8_C( 90), UINT8_C(164), UINT8_C(168), UINT8_C(241), UINT8_C( 41), UINT8_C( 12), UINT8_C(186), UINT8_C(112) }, { UINT16_C( 5613), UINT16_C(61887), UINT16_C( 937), UINT16_C(24248), UINT16_C(15483), UINT16_C(56783), UINT16_C(12554), UINT16_C(43355) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint16x8_t r = simde_vsubw_high_u8(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); simde_uint16x8_t r = simde_vsubw_high_u8(a, b); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vsubw_high_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint16_t b[8]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(1952652579), UINT32_C(3335785506), UINT32_C( 212053832), UINT32_C(1723166432) }, { UINT16_C(15529), UINT16_C(14734), UINT16_C(27599), UINT16_C(10835), UINT16_C(64272), UINT16_C(14619), UINT16_C(54792), UINT16_C(11177) }, { UINT32_C(1952588307), UINT32_C(3335770887), UINT32_C( 211999040), UINT32_C(1723155255) } }, { { UINT32_C( 362745075), UINT32_C(1624994584), UINT32_C( 57507619), UINT32_C(2523472621) }, { UINT16_C(63582), UINT16_C(11983), UINT16_C( 9059), UINT16_C(29528), UINT16_C(29470), UINT16_C( 9900), UINT16_C(22089), UINT16_C(15697) }, { UINT32_C( 362715605), UINT32_C(1624984684), UINT32_C( 57485530), UINT32_C(2523456924) } }, { { UINT32_C(2069033314), UINT32_C(2279288420), UINT32_C(2592819373), UINT32_C(3375494251) }, { UINT16_C( 236), UINT16_C(20727), UINT16_C(20259), UINT16_C(17091), UINT16_C(28867), UINT16_C( 3176), UINT16_C(47814), UINT16_C(10313) }, { UINT32_C(2069004447), UINT32_C(2279285244), UINT32_C(2592771559), UINT32_C(3375483938) } }, { { UINT32_C( 262380715), UINT32_C(2006417354), UINT32_C( 839983815), UINT32_C( 66863638) }, { UINT16_C(62275), UINT16_C(26195), UINT16_C( 5699), UINT16_C( 1704), UINT16_C( 4486), UINT16_C(19474), UINT16_C(23755), UINT16_C(30325) }, { UINT32_C( 262376229), UINT32_C(2006397880), UINT32_C( 839960060), UINT32_C( 66833313) } }, { { UINT32_C(3263502584), UINT32_C(1597578391), UINT32_C(1435585086), UINT32_C(3495464333) }, { UINT16_C(43905), UINT16_C(50230), UINT16_C(57281), UINT16_C(18634), UINT16_C(56560), UINT16_C(48020), UINT16_C( 2360), UINT16_C(12337) }, { UINT32_C(3263446024), UINT32_C(1597530371), UINT32_C(1435582726), UINT32_C(3495451996) } }, { { UINT32_C(3119691298), UINT32_C( 286796755), UINT32_C( 57059958), UINT32_C(3100884535) }, { UINT16_C( 2409), UINT16_C(11132), UINT16_C(18152), UINT16_C(55411), UINT16_C( 1827), UINT16_C(23443), UINT16_C(50193), UINT16_C(13196) }, { UINT32_C(3119689471), UINT32_C( 286773312), UINT32_C( 57009765), UINT32_C(3100871339) } }, { { UINT32_C(1324121723), UINT32_C( 543098282), UINT32_C(3861104303), UINT32_C(4003460740) }, { UINT16_C( 7167), UINT16_C(59417), UINT16_C(35938), UINT16_C(34240), UINT16_C(21651), UINT16_C(42208), UINT16_C(27672), UINT16_C(37847) }, { UINT32_C(1324100072), UINT32_C( 543056074), UINT32_C(3861076631), UINT32_C(4003422893) } }, { { UINT32_C(2514601195), UINT32_C(2025144777), UINT32_C(2338248711), UINT32_C(3447324110) }, { UINT16_C(37401), UINT16_C(31669), UINT16_C(30238), UINT16_C(45568), UINT16_C(57546), UINT16_C(57942), UINT16_C(11853), UINT16_C(14454) }, { UINT32_C(2514543649), UINT32_C(2025086835), UINT32_C(2338236858), UINT32_C(3447309656) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint32x4_t r = simde_vsubw_high_u16(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); simde_uint32x4_t r = simde_vsubw_high_u16(a, b); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vsubw_high_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[2]; uint32_t b[4]; uint64_t r[2]; } test_vec[] = { { { UINT64_C(11471656268340221938), UINT64_C(12174818102427226458) }, { UINT32_C(1428400951), UINT32_C(3943113505), UINT32_C(1355636227), UINT32_C(2122859404) }, { UINT64_C(11471656266984585711), UINT64_C(12174818100304367054) } }, { { UINT64_C( 3590332999477581211), UINT64_C(15651222164577320701) }, { UINT32_C( 456087546), UINT32_C(2114336378), UINT32_C( 550425492), UINT32_C(2996721431) }, { UINT64_C( 3590332998927155719), UINT64_C(15651222161580599270) } }, { { UINT64_C( 4734893183381919660), UINT64_C( 6355144628429983670) }, { UINT32_C(3564331353), UINT32_C( 743602584), UINT32_C(1665998924), UINT32_C( 605416311) }, { UINT64_C( 4734893181715920736), UINT64_C( 6355144627824567359) } }, { { UINT64_C( 7658192714859084739), UINT64_C( 8080290163990926700) }, { UINT32_C(4014249303), UINT32_C(1528534542), UINT32_C( 784230583), UINT32_C( 391304276) }, { UINT64_C( 7658192714074854156), UINT64_C( 8080290163599622424) } }, { { UINT64_C(14189827956752120527), UINT64_C(18158990497204383610) }, { UINT32_C(1441482311), UINT32_C(2477786844), UINT32_C(3284299631), UINT32_C( 316281923) }, { UINT64_C(14189827953467820896), UINT64_C(18158990496888101687) } }, { { UINT64_C(15432570781989926671), UINT64_C(12957253519079356976) }, { UINT32_C(2332605615), UINT32_C( 840939971), UINT32_C(1811276072), UINT32_C( 75354101) }, { UINT64_C(15432570780178650599), UINT64_C(12957253519004002875) } }, { { UINT64_C(14121484760512139718), UINT64_C( 8750611494977691985) }, { UINT32_C(3909384486), UINT32_C(1511727922), UINT32_C(4207218692), UINT32_C(2801681119) }, { UINT64_C(14121484756304921026), UINT64_C( 8750611492176010866) } }, { { UINT64_C( 5956505295596907091), UINT64_C(15160946671494381859) }, { UINT32_C( 733703161), UINT32_C(2475022222), UINT32_C(3347925735), UINT32_C(3765275532) }, { UINT64_C( 5956505292248981356), UINT64_C(15160946667729106327) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint64x2_t r = simde_vsubw_high_u32(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); simde_uint64x2_t r = simde_vsubw_high_u32(a, b); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vsubw_high_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vsubw_high_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vsubw_high_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vsubw_high_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vsubw_high_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vsubw_high_u32) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/tbl.c000066400000000000000000001375461400333146700162640ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN tbl #include "test-neon.h" /* Check that both of these work */ #if defined(__cplusplus) #include "../../../simde/arm/neon/tbl.h" #else #include "../../../simde/arm/neon.h" #endif #if 0 #define PROBABILITY 80 #define probability(p) (rand() < ((HEDLEY_STATIC_CAST(int64_t, RAND_MAX) * (p)) / 100)) #endif static int test_simde_vtbl1_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 int8_t a[8]; SIMDE_ALIGN_TO_16 int8_t b[8]; SIMDE_ALIGN_TO_16 int8_t r[8]; } test_vec[] = { { { INT8_C( 99), -INT8_C( 37), -INT8_C( 95), INT8_MAX, -INT8_C( 56), -INT8_C( 46), -INT8_C( 47), -INT8_C( 86) }, { INT8_C( 2), -INT8_C( 89), INT8_C( 7), INT8_C( 1), INT8_C( 116), INT8_C( 7), INT8_C( 5), INT8_C( 3) }, { -INT8_C( 95), INT8_C( 0), -INT8_C( 86), -INT8_C( 37), INT8_C( 0), -INT8_C( 86), -INT8_C( 46), INT8_MAX } }, { { INT8_C( 121), -INT8_C( 56), -INT8_C( 37), -INT8_C( 111), -INT8_C( 103), -INT8_C( 111), INT8_C( 17), -INT8_C( 3) }, { INT8_C( 4), INT8_C( 2), INT8_C( 4), INT8_C( 4), INT8_C( 5), INT8_C( 5), INT8_C( 6), INT8_C( 7) }, { -INT8_C( 103), -INT8_C( 37), -INT8_C( 103), -INT8_C( 103), -INT8_C( 111), -INT8_C( 111), INT8_C( 17), -INT8_C( 3) } }, { { -INT8_C( 81), -INT8_C( 56), -INT8_C( 30), INT8_C( 116), INT8_C( 86), -INT8_C( 72), -INT8_C( 31), -INT8_C( 48) }, { INT8_MIN, INT8_C( 4), INT8_C( 1), INT8_C( 2), INT8_C( 5), INT8_C( 2), INT8_C( 7), INT8_C( 1) }, { INT8_C( 0), INT8_C( 86), -INT8_C( 56), -INT8_C( 30), -INT8_C( 72), -INT8_C( 30), -INT8_C( 48), -INT8_C( 56) } }, { { -INT8_C( 127), INT8_C( 17), INT8_C( 63), INT8_C( 79), INT8_MAX, INT8_C( 35), INT8_C( 42), INT8_C( 46) }, { INT8_C( 3), INT8_C( 5), INT8_C( 3), INT8_C( 1), -INT8_C( 59), INT8_C( 4), INT8_C( 1), INT8_C( 70) }, { INT8_C( 79), INT8_C( 35), INT8_C( 79), INT8_C( 17), INT8_C( 0), INT8_MAX, INT8_C( 17), INT8_C( 0) } }, { { INT8_C( 10), INT8_C( 51), -INT8_C( 76), -INT8_C( 21), -INT8_C( 2), -INT8_C( 11), -INT8_C( 63), INT8_MIN }, { INT8_C( 7), INT8_C( 0), -INT8_C( 49), INT8_C( 6), INT8_C( 3), INT8_C( 1), INT8_C( 5), INT8_C( 6) }, { INT8_MIN, INT8_C( 10), INT8_C( 0), -INT8_C( 63), -INT8_C( 21), INT8_C( 51), -INT8_C( 11), -INT8_C( 63) } }, { { -INT8_C( 44), INT8_C( 114), -INT8_C( 87), -INT8_C( 70), -INT8_C( 23), -INT8_C( 17), -INT8_C( 60), -INT8_C( 13) }, { INT8_C( 2), INT8_C( 1), -INT8_C( 33), INT8_C( 0), INT8_C( 6), INT8_C( 0), INT8_C( 0), INT8_C( 5) }, { -INT8_C( 87), INT8_C( 114), INT8_C( 0), -INT8_C( 44), -INT8_C( 60), -INT8_C( 44), -INT8_C( 44), -INT8_C( 17) } }, { { INT8_C( 9), INT8_C( 35), INT8_C( 59), -INT8_C( 27), -INT8_C( 124), INT8_C( 77), INT8_C( 1), INT8_C( 89) }, { INT8_C( 7), INT8_C( 2), INT8_C( 3), INT8_C( 0), INT8_C( 1), INT8_C( 7), INT8_C( 4), INT8_C( 3) }, { INT8_C( 89), INT8_C( 59), -INT8_C( 27), INT8_C( 9), INT8_C( 35), INT8_C( 89), -INT8_C( 124), -INT8_C( 27) } }, { { -INT8_C( 21), INT8_C( 48), -INT8_C( 127), INT8_C( 84), -INT8_C( 31), INT8_C( 84), -INT8_C( 60), -INT8_C( 22) }, { INT8_C( 7), INT8_C( 7), INT8_C( 7), INT8_C( 3), INT8_C( 5), INT8_C( 0), INT8_C( 4), INT8_C( 4) }, { -INT8_C( 22), -INT8_C( 22), -INT8_C( 22), INT8_C( 84), INT8_C( 84), -INT8_C( 21), -INT8_C( 31), -INT8_C( 31) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vtbl1_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8_private b_ = simde_int8x8_to_private(simde_test_arm_neon_random_i8x8()); for (size_t j = 0 ; j < (sizeof(b_.values) / sizeof(b_.values[0])) ; j++) { if (probability(PROBABILITY)) { b_.values[j] &= 7; } } simde_int8x8_t b = simde_int8x8_from_private(b_); simde_int8x8_t r = simde_vtbl1_s8(a, b); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vtbl1_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 uint8_t a[8]; SIMDE_ALIGN_TO_16 uint8_t b[8]; SIMDE_ALIGN_TO_16 uint8_t r[8]; } test_vec[] = { { { UINT8_C( 30), UINT8_C( 72), UINT8_C(249), UINT8_C( 35), UINT8_C( 96), UINT8_C( 70), UINT8_C(235), UINT8_C( 89) }, { UINT8_C(225), UINT8_C( 4), UINT8_C(200), UINT8_C( 3), UINT8_C( 0), UINT8_C( 39), UINT8_C( 3), UINT8_C( 1) }, { UINT8_C( 0), UINT8_C( 96), UINT8_C( 0), UINT8_C( 35), UINT8_C( 30), UINT8_C( 0), UINT8_C( 35), UINT8_C( 72) } }, { { UINT8_C(107), UINT8_C( 14), UINT8_C(200), UINT8_C( 75), UINT8_C(110), UINT8_C( 50), UINT8_C(145), UINT8_C(140) }, { UINT8_C(122), UINT8_C( 2), UINT8_C( 0), UINT8_C( 2), UINT8_C( 0), UINT8_C( 3), UINT8_C( 4), UINT8_C( 2) }, { UINT8_C( 0), UINT8_C(200), UINT8_C(107), UINT8_C(200), UINT8_C(107), UINT8_C( 75), UINT8_C(110), UINT8_C(200) } }, { { UINT8_C(243), UINT8_C( 22), UINT8_C( 24), UINT8_C(214), UINT8_C(109), UINT8_C(109), UINT8_C(151), UINT8_C(216) }, { UINT8_C( 3), UINT8_C( 96), UINT8_C( 4), UINT8_C( 1), UINT8_C( 2), UINT8_C( 5), UINT8_C( 6), UINT8_C( 4) }, { UINT8_C(214), UINT8_C( 0), UINT8_C(109), UINT8_C( 22), UINT8_C( 24), UINT8_C(109), UINT8_C(151), UINT8_C(109) } }, { { UINT8_C( 23), UINT8_C(159), UINT8_C(185), UINT8_C( 58), UINT8_C( 55), UINT8_C(242), UINT8_C( 49), UINT8_C( 43) }, { UINT8_C( 8), UINT8_C( 2), UINT8_C( 1), UINT8_C( 5), UINT8_C( 7), UINT8_C( 0), UINT8_C( 6), UINT8_C( 51) }, { UINT8_C( 0), UINT8_C(185), UINT8_C(159), UINT8_C(242), UINT8_C( 43), UINT8_C( 23), UINT8_C( 49), UINT8_C( 0) } }, { { UINT8_C(184), UINT8_C(126), UINT8_C(117), UINT8_C(122), UINT8_C(153), UINT8_C( 55), UINT8_C(155), UINT8_C(176) }, { UINT8_C( 6), UINT8_C( 4), UINT8_C(234), UINT8_C( 5), UINT8_C( 6), UINT8_C( 3), UINT8_C( 0), UINT8_C( 7) }, { UINT8_C(155), UINT8_C(153), UINT8_C( 0), UINT8_C( 55), UINT8_C(155), UINT8_C(122), UINT8_C(184), UINT8_C(176) } }, { { UINT8_C(132), UINT8_C(108), UINT8_C( 85), UINT8_C(171), UINT8_MAX, UINT8_C(236), UINT8_C( 17), UINT8_C(183) }, { UINT8_C( 2), UINT8_C( 7), UINT8_C( 49), UINT8_C( 3), UINT8_C(190), UINT8_C( 4), UINT8_C( 3), UINT8_C( 4) }, { UINT8_C( 85), UINT8_C(183), UINT8_C( 0), UINT8_C(171), UINT8_C( 0), UINT8_MAX, UINT8_C(171), UINT8_MAX } }, { { UINT8_C( 19), UINT8_C(122), UINT8_C( 59), UINT8_C(229), UINT8_C(141), UINT8_C(139), UINT8_C(176), UINT8_C( 17) }, { UINT8_C( 7), UINT8_C( 5), UINT8_C(189), UINT8_C( 6), UINT8_C( 1), UINT8_C(206), UINT8_C( 6), UINT8_C( 3) }, { UINT8_C( 17), UINT8_C(139), UINT8_C( 0), UINT8_C(176), UINT8_C(122), UINT8_C( 0), UINT8_C(176), UINT8_C(229) } }, { { UINT8_C(174), UINT8_C( 73), UINT8_C( 51), UINT8_C(102), UINT8_C( 35), UINT8_C(233), UINT8_C(132), UINT8_C( 54) }, { UINT8_C(100), UINT8_C( 7), UINT8_C( 4), UINT8_C( 1), UINT8_C( 2), UINT8_C( 4), UINT8_C( 2), UINT8_C( 2) }, { UINT8_C( 0), UINT8_C( 54), UINT8_C( 35), UINT8_C( 73), UINT8_C( 51), UINT8_C( 35), UINT8_C( 51), UINT8_C( 51) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vtbl1_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x8_private b_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); for (size_t j = 0 ; j < (sizeof(b_.values) / sizeof(b_.values[0])) ; j++) { if (probability(PROBABILITY)) { b_.values[j] &= 7; } } simde_uint8x8_t b = simde_uint8x8_from_private(b_); simde_uint8x8_t r = simde_vtbl1_u8(a, b); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #if !defined(SIMDE_BUG_INTEL_857088) static int test_simde_vtbl2_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 int8_t a[2][8]; SIMDE_ALIGN_TO_16 int8_t b[8]; SIMDE_ALIGN_TO_16 int8_t r[8]; } test_vec[] = { { { { -INT8_C( 96), INT8_C( 73), -INT8_C( 46), -INT8_C( 52), INT8_C( 62), INT8_C( 65), INT8_C( 82), -INT8_C( 124) }, { -INT8_C( 84), INT8_C( 54), INT8_C( 67), INT8_C( 104), -INT8_C( 32), -INT8_C( 89), INT8_C( 38), -INT8_C( 123) } }, { INT8_C( 8), INT8_C( 0), INT8_C( 9), -INT8_C( 5), INT8_C( 110), INT8_C( 5), INT8_C( 104), INT8_C( 88) }, { -INT8_C( 84), -INT8_C( 96), INT8_C( 54), INT8_C( 0), INT8_C( 0), INT8_C( 65), INT8_C( 0), INT8_C( 0) } }, { { { -INT8_C( 28), INT8_C( 46), -INT8_C( 69), INT8_C( 34), INT8_C( 111), INT8_C( 13), -INT8_C( 89), INT8_C( 27) }, { INT8_C( 67), -INT8_C( 22), -INT8_C( 124), INT8_C( 35), -INT8_C( 110), -INT8_C( 86), -INT8_C( 87), INT8_C( 106) } }, { INT8_C( 10), INT8_C( 2), INT8_C( 6), INT8_C( 9), INT8_C( 7), INT8_C( 14), INT8_C( 1), INT8_C( 3) }, { -INT8_C( 124), -INT8_C( 69), -INT8_C( 89), -INT8_C( 22), INT8_C( 27), -INT8_C( 87), INT8_C( 46), INT8_C( 34) } }, { { { -INT8_C( 98), INT8_C( 79), INT8_C( 126), INT8_C( 14), INT8_C( 92), INT8_C( 37), INT8_C( 41), -INT8_C( 97) }, { INT8_C( 15), -INT8_C( 83), -INT8_C( 62), -INT8_C( 95), INT8_C( 87), INT8_C( 107), INT8_C( 12), -INT8_C( 46) } }, { INT8_C( 13), INT8_C( 2), INT8_C( 11), INT8_C( 5), INT8_C( 0), -INT8_C( 4), INT8_C( 8), INT8_C( 6) }, { INT8_C( 107), INT8_C( 126), -INT8_C( 95), INT8_C( 37), -INT8_C( 98), INT8_C( 0), INT8_C( 15), INT8_C( 41) } }, { { { INT8_C( 30), INT8_C( 42), INT8_C( 25), INT8_C( 122), INT8_C( 79), INT8_C( 67), INT8_C( 25), INT8_C( 95) }, { -INT8_C( 16), -INT8_C( 36), INT8_C( 0), INT8_C( 72), INT8_C( 71), INT8_C( 12), INT8_C( 26), -INT8_C( 11) } }, { INT8_C( 14), INT8_C( 5), INT8_C( 10), -INT8_C( 65), INT8_C( 1), INT8_C( 2), INT8_C( 85), INT8_C( 11) }, { INT8_C( 26), INT8_C( 67), INT8_C( 0), INT8_C( 0), INT8_C( 42), INT8_C( 25), INT8_C( 0), INT8_C( 72) } }, { { { -INT8_C( 45), INT8_MIN, -INT8_C( 7), INT8_C( 34), -INT8_C( 61), INT8_C( 19), -INT8_C( 127), -INT8_C( 76) }, { -INT8_C( 17), -INT8_C( 126), -INT8_C( 4), INT8_C( 54), -INT8_C( 114), INT8_C( 22), INT8_C( 43), INT8_C( 13) } }, { INT8_C( 11), INT8_C( 5), INT8_C( 12), INT8_C( 12), INT8_C( 7), INT8_C( 1), INT8_C( 103), INT8_C( 6) }, { INT8_C( 54), INT8_C( 19), -INT8_C( 114), -INT8_C( 114), -INT8_C( 76), INT8_MIN, INT8_C( 0), -INT8_C( 127) } }, { { { INT8_C( 89), -INT8_C( 117), INT8_C( 1), INT8_C( 28), -INT8_C( 98), -INT8_C( 125), -INT8_C( 48), -INT8_C( 115) }, { INT8_C( 5), -INT8_C( 52), -INT8_C( 60), -INT8_C( 109), -INT8_C( 30), -INT8_C( 17), -INT8_C( 96), -INT8_C( 51) } }, { INT8_C( 5), INT8_C( 12), INT8_C( 10), INT8_C( 12), INT8_C( 14), INT8_C( 1), INT8_C( 98), INT8_C( 0) }, { -INT8_C( 125), -INT8_C( 30), -INT8_C( 60), -INT8_C( 30), -INT8_C( 96), -INT8_C( 117), INT8_C( 0), INT8_C( 89) } }, { { { INT8_C( 113), INT8_C( 65), INT8_C( 34), INT8_C( 15), -INT8_C( 60), -INT8_C( 14), -INT8_C( 99), -INT8_C( 55) }, { -INT8_C( 65), INT8_C( 97), INT8_C( 93), -INT8_C( 95), INT8_C( 80), -INT8_C( 3), INT8_C( 111), INT8_C( 117) } }, { INT8_C( 106), INT8_C( 9), INT8_C( 2), -INT8_C( 8), INT8_C( 10), INT8_C( 4), INT8_C( 8), INT8_C( 15) }, { INT8_C( 0), INT8_C( 97), INT8_C( 34), INT8_C( 0), INT8_C( 93), -INT8_C( 60), -INT8_C( 65), INT8_C( 117) } }, { { { INT8_C( 34), -INT8_C( 52), -INT8_C( 14), -INT8_C( 26), -INT8_C( 65), -INT8_C( 113), -INT8_C( 80), INT8_C( 126) }, { -INT8_C( 16), INT8_C( 13), INT8_C( 31), INT8_C( 64), INT8_C( 10), -INT8_C( 114), -INT8_C( 74), INT8_C( 116) } }, { INT8_C( 7), INT8_C( 8), INT8_C( 12), INT8_C( 2), -INT8_C( 100), INT8_C( 5), -INT8_C( 79), -INT8_C( 107) }, { INT8_C( 126), -INT8_C( 16), INT8_C( 10), -INT8_C( 14), INT8_C( 0), -INT8_C( 113), INT8_C( 0), INT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8x2_t a; a.val[0] = simde_vld1_s8(test_vec[i].a[0]); a.val[1] = simde_vld1_s8(test_vec[i].a[1]); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vtbl2_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8x2_t a = simde_test_arm_neon_random_i8x8x2(); simde_int8x8_private b_ = simde_int8x8_to_private(simde_test_arm_neon_random_i8x8()); for (size_t j = 0 ; j < (sizeof(b_.values) / sizeof(b_.values[0])) ; j++) { if (probability(PROBABILITY)) { b_.values[j] &= 15; } } simde_int8x8_t b = simde_int8x8_from_private(b_); simde_int8x8_t r = simde_vtbl2_s8(a, b); simde_test_arm_neon_write_i8x8x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vtbl2_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 uint8_t a[2][8]; SIMDE_ALIGN_TO_16 uint8_t b[8]; SIMDE_ALIGN_TO_16 uint8_t r[8]; } test_vec[] = { { { { UINT8_C(134), UINT8_C(191), UINT8_C(113), UINT8_C( 91), UINT8_C( 5), UINT8_C(138), UINT8_C( 44), UINT8_C(141) }, { UINT8_C( 43), UINT8_C( 29), UINT8_C( 89), UINT8_C(116), UINT8_C(215), UINT8_C(159), UINT8_C(180), UINT8_C(156) } }, { UINT8_C( 13), UINT8_C( 7), UINT8_C( 1), UINT8_C( 12), UINT8_C( 7), UINT8_C( 2), UINT8_C( 7), UINT8_C( 5) }, { UINT8_C(159), UINT8_C(141), UINT8_C(191), UINT8_C(215), UINT8_C(141), UINT8_C(113), UINT8_C(141), UINT8_C(138) } }, { { { UINT8_C( 93), UINT8_C( 44), UINT8_C( 91), UINT8_C( 98), UINT8_C(182), UINT8_C(136), UINT8_C(239), UINT8_C(225) }, { UINT8_C(165), UINT8_C( 73), UINT8_C( 85), UINT8_C(125), UINT8_C(232), UINT8_C( 10), UINT8_C( 25), UINT8_C( 69) } }, { UINT8_C( 1), UINT8_C( 10), UINT8_C( 1), UINT8_C( 8), UINT8_C(204), UINT8_C(249), UINT8_C( 13), UINT8_C( 11) }, { UINT8_C( 44), UINT8_C( 85), UINT8_C( 44), UINT8_C(165), UINT8_C( 0), UINT8_C( 0), UINT8_C( 10), UINT8_C(125) } }, { { { UINT8_C(197), UINT8_C(163), UINT8_C(238), UINT8_C(123), UINT8_C( 43), UINT8_C(221), UINT8_C( 92), UINT8_C(209) }, { UINT8_C( 38), UINT8_C(178), UINT8_C( 78), UINT8_C( 14), UINT8_C(188), UINT8_C(103), UINT8_C( 83), UINT8_C(221) } }, { UINT8_C( 2), UINT8_C( 5), UINT8_C(197), UINT8_C( 14), UINT8_C( 14), UINT8_C( 2), UINT8_C( 41), UINT8_C( 11) }, { UINT8_C(238), UINT8_C(221), UINT8_C( 0), UINT8_C( 83), UINT8_C( 83), UINT8_C(238), UINT8_C( 0), UINT8_C( 14) } }, { { { UINT8_C(206), UINT8_C(236), UINT8_C(225), UINT8_C(250), UINT8_C(202), UINT8_C( 61), UINT8_C(203), UINT8_C(240) }, { UINT8_C(239), UINT8_C( 25), UINT8_MAX, UINT8_C(171), UINT8_C(128), UINT8_C( 82), UINT8_C(136), UINT8_C( 2) } }, { UINT8_C( 39), UINT8_C( 13), UINT8_C( 81), UINT8_C( 5), UINT8_C( 15), UINT8_C( 10), UINT8_C( 1), UINT8_C( 67) }, { UINT8_C( 0), UINT8_C( 82), UINT8_C( 0), UINT8_C( 61), UINT8_C( 2), UINT8_MAX, UINT8_C(236), UINT8_C( 0) } }, { { { UINT8_C(160), UINT8_C(106), UINT8_C(103), UINT8_C(106), UINT8_C(168), UINT8_C( 50), UINT8_C( 91), UINT8_C(151) }, { UINT8_C( 75), UINT8_C( 90), UINT8_C( 67), UINT8_C(204), UINT8_C(172), UINT8_C(203), UINT8_C(206), UINT8_C(212) } }, { UINT8_C( 9), UINT8_C( 15), UINT8_C(201), UINT8_C( 8), UINT8_C( 10), UINT8_C( 10), UINT8_C( 11), UINT8_C( 12) }, { UINT8_C( 90), UINT8_C(212), UINT8_C( 0), UINT8_C( 75), UINT8_C( 67), UINT8_C( 67), UINT8_C(204), UINT8_C(172) } }, { { { UINT8_C( 52), UINT8_C( 66), UINT8_C(208), UINT8_C(220), UINT8_C(117), UINT8_C( 43), UINT8_C(115), UINT8_C(192) }, { UINT8_C(133), UINT8_C(182), UINT8_C(140), UINT8_C( 49), UINT8_C(130), UINT8_C( 91), UINT8_C( 5), UINT8_C(155) } }, { UINT8_C( 10), UINT8_C( 15), UINT8_C(179), UINT8_C( 4), UINT8_C( 9), UINT8_C( 15), UINT8_C( 49), UINT8_C( 8) }, { UINT8_C(140), UINT8_C(155), UINT8_C( 0), UINT8_C(117), UINT8_C(182), UINT8_C(155), UINT8_C( 0), UINT8_C(133) } }, { { { UINT8_C(131), UINT8_C( 26), UINT8_C( 74), UINT8_C(248), UINT8_C( 69), UINT8_C(190), UINT8_C(184), UINT8_C(202) }, { UINT8_C(116), UINT8_C( 69), UINT8_C(252), UINT8_C(246), UINT8_C(160), UINT8_C( 1), UINT8_C(145), UINT8_C( 26) } }, { UINT8_C( 0), UINT8_C( 5), UINT8_C( 15), UINT8_C( 10), UINT8_C( 4), UINT8_C( 0), UINT8_C( 2), UINT8_C(116) }, { UINT8_C(131), UINT8_C(190), UINT8_C( 26), UINT8_C(252), UINT8_C( 69), UINT8_C(131), UINT8_C( 74), UINT8_C( 0) } }, { { { UINT8_C(190), UINT8_C(196), UINT8_C( 19), UINT8_C( 3), UINT8_C(130), UINT8_C(203), UINT8_C(206), UINT8_C(247) }, { UINT8_C( 16), UINT8_C(202), UINT8_C(237), UINT8_C(176), UINT8_C(203), UINT8_C(127), UINT8_C(203), UINT8_C(156) } }, { UINT8_C( 4), UINT8_C( 10), UINT8_C(150), UINT8_C( 8), UINT8_C( 10), UINT8_C( 8), UINT8_C(140), UINT8_C( 11) }, { UINT8_C(130), UINT8_C(237), UINT8_C( 0), UINT8_C( 16), UINT8_C(237), UINT8_C( 16), UINT8_C( 0), UINT8_C(176) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8x2_t a; a.val[0] = simde_vld1_u8(test_vec[i].a[0]); a.val[1] = simde_vld1_u8(test_vec[i].a[1]); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vtbl2_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8x2_t a = simde_test_arm_neon_random_u8x8x2(); simde_uint8x8_private b_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); for (size_t j = 0 ; j < (sizeof(b_.values) / sizeof(b_.values[0])) ; j++) { if (probability(PROBABILITY)) { b_.values[j] &= 15; } } simde_uint8x8_t b = simde_uint8x8_from_private(b_); simde_uint8x8_t r = simde_vtbl2_u8(a, b); simde_test_arm_neon_write_u8x8x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vtbl3_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 int8_t a[3][8]; SIMDE_ALIGN_TO_16 int8_t b[8]; SIMDE_ALIGN_TO_16 int8_t r[8]; } test_vec[] = { { { { -INT8_C( 89), -INT8_C( 72), -INT8_C( 99), -INT8_C( 2), -INT8_C( 113), -INT8_C( 51), INT8_C( 112), INT8_C( 1) }, { INT8_C( 55), -INT8_C( 3), INT8_C( 112), -INT8_C( 71), -INT8_C( 65), -INT8_C( 106), -INT8_C( 111), -INT8_C( 9) }, { -INT8_C( 42), INT8_C( 59), INT8_C( 19), -INT8_C( 33), INT8_C( 14), -INT8_C( 42), -INT8_C( 60), INT8_C( 2) } }, { -INT8_C( 71), INT8_C( 8), INT8_C( 119), INT8_C( 10), INT8_C( 22), INT8_C( 20), INT8_C( 16), INT8_C( 5) }, { INT8_C( 0), INT8_C( 55), INT8_C( 0), INT8_C( 112), -INT8_C( 60), INT8_C( 14), -INT8_C( 42), -INT8_C( 51) } }, { { { -INT8_C( 23), -INT8_C( 28), INT8_C( 108), -INT8_C( 88), INT8_C( 123), -INT8_C( 2), -INT8_C( 96), INT8_C( 81) }, { INT8_C( 57), -INT8_C( 77), INT8_C( 48), INT8_C( 71), -INT8_C( 119), -INT8_C( 12), INT8_C( 73), INT8_C( 67) }, { -INT8_C( 44), -INT8_C( 63), -INT8_C( 35), -INT8_C( 86), -INT8_C( 19), -INT8_C( 18), INT8_C( 39), -INT8_C( 47) } }, { INT8_C( 12), INT8_C( 19), INT8_C( 20), INT8_C( 23), INT8_C( 23), INT8_C( 16), -INT8_C( 54), INT8_C( 0) }, { -INT8_C( 119), -INT8_C( 86), -INT8_C( 19), -INT8_C( 47), -INT8_C( 47), -INT8_C( 44), INT8_C( 0), -INT8_C( 23) } }, { { { INT8_C( 116), -INT8_C( 102), -INT8_C( 74), -INT8_C( 2), -INT8_C( 114), -INT8_C( 1), INT8_C( 65), INT8_C( 99) }, { -INT8_C( 64), INT8_C( 30), INT8_C( 13), -INT8_C( 83), INT8_C( 12), INT8_C( 53), INT8_C( 126), -INT8_C( 88) }, { -INT8_C( 40), -INT8_C( 62), -INT8_C( 64), INT8_C( 103), INT8_C( 123), -INT8_C( 118), -INT8_C( 33), INT8_C( 24) } }, { INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 6), INT8_C( 1), -INT8_C( 103), INT8_C( 5), INT8_C( 6) }, { -INT8_C( 102), INT8_C( 116), INT8_C( 116), INT8_C( 65), -INT8_C( 102), INT8_C( 0), -INT8_C( 1), INT8_C( 65) } }, { { { -INT8_C( 109), INT8_C( 50), -INT8_C( 120), -INT8_C( 96), INT8_C( 103), INT8_C( 7), INT8_C( 72), INT8_C( 63) }, { -INT8_C( 55), INT8_C( 8), -INT8_C( 90), INT8_C( 68), -INT8_C( 109), -INT8_C( 122), INT8_C( 92), INT8_C( 84) }, { -INT8_C( 122), -INT8_C( 116), INT8_C( 75), INT8_C( 72), INT8_C( 38), -INT8_C( 80), INT8_C( 126), INT8_C( 89) } }, { INT8_C( 11), -INT8_C( 78), INT8_C( 3), INT8_C( 13), INT8_C( 15), INT8_C( 16), INT8_C( 0), INT8_C( 18) }, { INT8_C( 68), INT8_C( 0), -INT8_C( 96), -INT8_C( 122), INT8_C( 84), -INT8_C( 122), -INT8_C( 109), INT8_C( 75) } }, { { { -INT8_C( 85), -INT8_C( 64), INT8_C( 94), INT8_C( 62), INT8_C( 70), -INT8_C( 70), -INT8_C( 109), -INT8_C( 51) }, { INT8_C( 71), -INT8_C( 34), INT8_C( 21), INT8_C( 109), -INT8_C( 114), -INT8_C( 109), -INT8_C( 58), INT8_C( 89) }, { INT8_C( 69), -INT8_C( 30), INT8_C( 62), INT8_C( 108), INT8_C( 34), -INT8_C( 1), INT8_C( 38), -INT8_C( 107) } }, { INT8_C( 0), INT8_C( 9), INT8_C( 16), INT8_C( 8), INT8_C( 12), INT8_C( 18), INT8_C( 9), INT8_C( 15) }, { -INT8_C( 85), -INT8_C( 34), INT8_C( 69), INT8_C( 71), -INT8_C( 114), INT8_C( 62), -INT8_C( 34), INT8_C( 89) } }, { { { INT8_MAX, INT8_C( 115), INT8_C( 126), INT8_C( 13), INT8_C( 6), INT8_C( 68), INT8_C( 102), INT8_C( 75) }, { INT8_C( 38), -INT8_C( 92), -INT8_C( 73), INT8_C( 73), -INT8_C( 93), -INT8_C( 35), -INT8_C( 34), -INT8_C( 21) }, { INT8_C( 94), INT8_C( 78), -INT8_C( 125), -INT8_C( 126), -INT8_C( 40), INT8_C( 53), INT8_C( 82), INT8_C( 35) } }, { INT8_C( 20), INT8_C( 0), INT8_C( 12), INT8_C( 14), INT8_C( 1), INT8_C( 18), INT8_C( 7), INT8_C( 8) }, { -INT8_C( 40), INT8_MAX, -INT8_C( 93), -INT8_C( 34), INT8_C( 115), -INT8_C( 125), INT8_C( 75), INT8_C( 38) } }, { { { -INT8_C( 105), -INT8_C( 115), INT8_C( 81), INT8_C( 59), INT8_C( 106), INT8_C( 48), INT8_C( 38), -INT8_C( 55) }, { INT8_C( 126), -INT8_C( 86), INT8_C( 75), INT8_C( 87), -INT8_C( 33), -INT8_C( 99), INT8_C( 122), INT8_C( 35) }, { -INT8_C( 3), INT8_C( 46), INT8_C( 50), -INT8_C( 2), INT8_C( 64), INT8_C( 81), INT8_C( 126), -INT8_C( 59) } }, { INT8_C( 23), INT8_C( 11), INT8_C( 8), INT8_C( 17), -INT8_C( 2), INT8_C( 38), INT8_C( 1), INT8_C( 6) }, { -INT8_C( 59), INT8_C( 87), INT8_C( 126), INT8_C( 46), INT8_C( 0), INT8_C( 0), -INT8_C( 115), INT8_C( 38) } }, { { { -INT8_C( 95), INT8_C( 50), INT8_C( 48), INT8_MIN, -INT8_C( 49), -INT8_C( 86), -INT8_C( 92), -INT8_C( 51) }, { -INT8_C( 40), -INT8_C( 42), -INT8_C( 53), INT8_C( 24), INT8_C( 39), INT8_C( 74), -INT8_C( 35), INT8_C( 22) }, { INT8_C( 85), INT8_C( 45), -INT8_C( 25), INT8_C( 84), INT8_C( 83), -INT8_C( 63), -INT8_C( 22), INT8_C( 6) } }, { -INT8_C( 20), INT8_C( 19), INT8_C( 36), INT8_C( 23), INT8_C( 10), INT8_C( 10), INT8_C( 8), INT8_C( 12) }, { INT8_C( 0), INT8_C( 84), INT8_C( 0), INT8_C( 6), -INT8_C( 53), -INT8_C( 53), -INT8_C( 40), INT8_C( 39) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8x3_t a; a.val[0] = simde_vld1_s8(test_vec[i].a[0]); a.val[1] = simde_vld1_s8(test_vec[i].a[1]); a.val[2] = simde_vld1_s8(test_vec[i].a[2]); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vtbl3_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8x3_t a = simde_test_arm_neon_random_i8x8x3(); simde_int8x8_private b_ = simde_int8x8_to_private(simde_test_arm_neon_random_i8x8()); for (size_t j = 0 ; j < (sizeof(b_.values) / sizeof(b_.values[0])) ; j++) { if (probability(PROBABILITY)) { b_.values[j] = HEDLEY_STATIC_CAST(int8_t, HEDLEY_STATIC_CAST(uint8_t, b_.values[j]) % 24); } } simde_int8x8_t b = simde_int8x8_from_private(b_); simde_int8x8_t r = simde_vtbl3_s8(a, b); simde_test_arm_neon_write_i8x8x3(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vtbl3_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 uint8_t a[3][8]; SIMDE_ALIGN_TO_16 uint8_t b[8]; SIMDE_ALIGN_TO_16 uint8_t r[8]; } test_vec[] = { { { { UINT8_C( 83), UINT8_C(122), UINT8_C(109), UINT8_C(136), UINT8_C(147), UINT8_C( 79), UINT8_C(110), UINT8_C(111) }, { UINT8_C(199), UINT8_C(218), UINT8_C( 64), UINT8_C(161), UINT8_C(211), UINT8_C( 77), UINT8_C(142), UINT8_C(130) }, { UINT8_C(195), UINT8_C(231), UINT8_C(103), UINT8_C(249), UINT8_C(173), UINT8_C( 87), UINT8_C(179), UINT8_C(160) } }, { UINT8_C(174), UINT8_C(220), UINT8_C( 16), UINT8_C(209), UINT8_C( 22), UINT8_C(221), UINT8_C( 2), UINT8_C( 10) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C(195), UINT8_C( 0), UINT8_C(179), UINT8_C( 0), UINT8_C(109), UINT8_C( 64) } }, { { { UINT8_C(106), UINT8_C(153), UINT8_C(184), UINT8_C( 62), UINT8_C(230), UINT8_C( 70), UINT8_C(192), UINT8_C(169) }, { UINT8_C( 45), UINT8_C( 39), UINT8_C(162), UINT8_C(218), UINT8_C(126), UINT8_C( 85), UINT8_C(123), UINT8_C( 45) }, { UINT8_C( 50), UINT8_C(235), UINT8_C(254), UINT8_C(120), UINT8_C(200), UINT8_C(145), UINT8_C( 18), UINT8_C( 31) } }, { UINT8_C( 0), UINT8_C( 5), UINT8_C( 9), UINT8_C( 7), UINT8_C( 5), UINT8_C( 2), UINT8_C(246), UINT8_C( 0) }, { UINT8_C(106), UINT8_C( 70), UINT8_C( 39), UINT8_C(169), UINT8_C( 70), UINT8_C(184), UINT8_C( 0), UINT8_C(106) } }, { { { UINT8_C( 85), UINT8_C( 46), UINT8_C(252), UINT8_C(211), UINT8_C(132), UINT8_C(119), UINT8_C( 0), UINT8_C(182) }, { UINT8_C( 98), UINT8_MAX, UINT8_C( 46), UINT8_C( 42), UINT8_C(144), UINT8_C( 65), UINT8_C( 73), UINT8_C( 32) }, { UINT8_C(118), UINT8_C( 82), UINT8_C( 0), UINT8_C( 59), UINT8_C(180), UINT8_C(246), UINT8_C(107), UINT8_C(176) } }, { UINT8_C( 20), UINT8_C( 1), UINT8_C(147), UINT8_C( 8), UINT8_C( 7), UINT8_C( 7), UINT8_C( 17), UINT8_C( 92) }, { UINT8_C(180), UINT8_C( 46), UINT8_C( 0), UINT8_C( 98), UINT8_C(182), UINT8_C(182), UINT8_C( 82), UINT8_C( 0) } }, { { { UINT8_C( 47), UINT8_C(182), UINT8_C(184), UINT8_C(191), UINT8_C(247), UINT8_C( 1), UINT8_C(224), UINT8_C(109) }, { UINT8_C( 83), UINT8_C(224), UINT8_C(168), UINT8_C( 8), UINT8_C(214), UINT8_C( 20), UINT8_C(184), UINT8_C(122) }, { UINT8_C(237), UINT8_C( 75), UINT8_C( 18), UINT8_C(245), UINT8_C(106), UINT8_C(203), UINT8_C( 81), UINT8_C(184) } }, { UINT8_C( 9), UINT8_C(129), UINT8_C( 17), UINT8_C( 5), UINT8_C( 10), UINT8_C( 17), UINT8_C( 12), UINT8_C( 9) }, { UINT8_C(224), UINT8_C( 0), UINT8_C( 75), UINT8_C( 1), UINT8_C(168), UINT8_C( 75), UINT8_C(214), UINT8_C(224) } }, { { { UINT8_C( 97), UINT8_C(211), UINT8_C( 81), UINT8_C( 55), UINT8_C(231), UINT8_C( 10), UINT8_C(177), UINT8_C(213) }, { UINT8_C( 85), UINT8_C(195), UINT8_C(202), UINT8_C(192), UINT8_C(142), UINT8_C( 27), UINT8_C(120), UINT8_C( 15) }, { UINT8_C(157), UINT8_C( 1), UINT8_C(189), UINT8_C( 79), UINT8_C( 18), UINT8_C(249), UINT8_C( 48), UINT8_C(217) } }, { UINT8_C( 21), UINT8_C( 17), UINT8_C(151), UINT8_C( 11), UINT8_C( 82), UINT8_C( 2), UINT8_C( 45), UINT8_C(179) }, { UINT8_C(249), UINT8_C( 1), UINT8_C( 0), UINT8_C(192), UINT8_C( 0), UINT8_C( 81), UINT8_C( 0), UINT8_C( 0) } }, { { { UINT8_C( 94), UINT8_C( 28), UINT8_C(158), UINT8_C(237), UINT8_C( 56), UINT8_C( 22), UINT8_C(252), UINT8_C(213) }, { UINT8_C( 23), UINT8_C(185), UINT8_C( 36), UINT8_C( 42), UINT8_C(178), UINT8_C( 84), UINT8_C( 3), UINT8_C(160) }, { UINT8_C( 38), UINT8_C(155), UINT8_C(131), UINT8_C(120), UINT8_C( 93), UINT8_C(176), UINT8_C( 44), UINT8_C(243) } }, { UINT8_C( 23), UINT8_C( 22), UINT8_C( 17), UINT8_C( 15), UINT8_C( 10), UINT8_C( 3), UINT8_C( 5), UINT8_C( 16) }, { UINT8_C(243), UINT8_C( 44), UINT8_C(155), UINT8_C(160), UINT8_C( 36), UINT8_C(237), UINT8_C( 22), UINT8_C( 38) } }, { { { UINT8_C(179), UINT8_C( 17), UINT8_C(139), UINT8_C(102), UINT8_C(101), UINT8_C(142), UINT8_C( 6), UINT8_C(139) }, { UINT8_C( 41), UINT8_C(137), UINT8_C( 4), UINT8_C(135), UINT8_C( 58), UINT8_C( 48), UINT8_C(122), UINT8_C(105) }, { UINT8_C( 70), UINT8_C(235), UINT8_C( 32), UINT8_C(248), UINT8_C(175), UINT8_C(182), UINT8_C( 9), UINT8_C(143) } }, { UINT8_C( 17), UINT8_C( 6), UINT8_C( 23), UINT8_C( 51), UINT8_C( 0), UINT8_C( 4), UINT8_C( 4), UINT8_C( 12) }, { UINT8_C(235), UINT8_C( 6), UINT8_C(143), UINT8_C( 0), UINT8_C(179), UINT8_C(101), UINT8_C(101), UINT8_C( 58) } }, { { { UINT8_C(169), UINT8_C(154), UINT8_C( 94), UINT8_C(227), UINT8_C(202), UINT8_C(216), UINT8_C( 76), UINT8_C( 16) }, { UINT8_C(196), UINT8_C(109), UINT8_C( 9), UINT8_C(115), UINT8_C( 35), UINT8_C( 18), UINT8_C( 2), UINT8_C( 12) }, { UINT8_C( 24), UINT8_C(169), UINT8_C( 63), UINT8_C( 25), UINT8_C( 61), UINT8_C(211), UINT8_C(205), UINT8_C(226) } }, { UINT8_C( 2), UINT8_C( 15), UINT8_C( 20), UINT8_C( 16), UINT8_C( 7), UINT8_C( 10), UINT8_C( 23), UINT8_C( 8) }, { UINT8_C( 94), UINT8_C( 12), UINT8_C( 61), UINT8_C( 24), UINT8_C( 16), UINT8_C( 9), UINT8_C(226), UINT8_C(196) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8x3_t a; a.val[0] = simde_vld1_u8(test_vec[i].a[0]); a.val[1] = simde_vld1_u8(test_vec[i].a[1]); a.val[2] = simde_vld1_u8(test_vec[i].a[2]); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vtbl3_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8x3_t a = simde_test_arm_neon_random_u8x8x3(); simde_uint8x8_private b_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); for (size_t j = 0 ; j < (sizeof(b_.values) / sizeof(b_.values[0])) ; j++) { if (probability(PROBABILITY)) { b_.values[j] %= 24; } } simde_uint8x8_t b = simde_uint8x8_from_private(b_); simde_uint8x8_t r = simde_vtbl3_u8(a, b); simde_test_arm_neon_write_u8x8x3(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vtbl4_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 int8_t a[4][8]; SIMDE_ALIGN_TO_16 int8_t b[8]; SIMDE_ALIGN_TO_16 int8_t r[8]; } test_vec[] = { { { { INT8_C( 80), INT8_C( 36), INT8_C( 51), INT8_C( 30), INT8_C( 72), INT8_C( 86), -INT8_C( 48), INT8_C( 15) }, { -INT8_C( 40), INT8_C( 67), INT8_C( 47), -INT8_C( 91), -INT8_C( 107), -INT8_C( 100), -INT8_C( 70), INT8_C( 120) }, { INT8_C( 84), INT8_C( 18), INT8_C( 9), INT8_C( 40), -INT8_C( 83), INT8_C( 41), -INT8_C( 95), INT8_C( 102) }, { -INT8_C( 53), INT8_C( 17), INT8_C( 57), INT8_C( 104), INT8_C( 105), -INT8_C( 103), INT8_MIN, -INT8_C( 71) } }, { -INT8_C( 67), INT8_C( 19), INT8_C( 23), INT8_C( 5), INT8_C( 9), INT8_C( 7), INT8_C( 20), INT8_C( 1) }, { INT8_C( 0), INT8_C( 40), INT8_C( 102), INT8_C( 86), INT8_C( 67), INT8_C( 15), -INT8_C( 83), INT8_C( 36) } }, { { { INT8_C( 83), INT8_C( 1), INT8_C( 92), INT8_C( 0), INT8_C( 42), -INT8_C( 3), INT8_C( 103), -INT8_C( 10) }, { INT8_C( 15), -INT8_C( 96), INT8_C( 94), INT8_C( 120), INT8_C( 57), -INT8_C( 34), INT8_C( 49), -INT8_C( 10) }, { -INT8_C( 111), INT8_C( 8), -INT8_C( 5), -INT8_C( 102), -INT8_C( 80), INT8_C( 16), INT8_C( 123), -INT8_C( 101) }, { INT8_C( 84), INT8_C( 2), INT8_C( 27), INT8_C( 52), INT8_C( 67), INT8_C( 20), INT8_C( 104), -INT8_C( 106) } }, { INT8_C( 21), INT8_C( 4), INT8_C( 23), INT8_C( 0), INT8_C( 1), -INT8_C( 2), INT8_C( 22), -INT8_C( 48) }, { INT8_C( 16), INT8_C( 42), -INT8_C( 101), INT8_C( 83), INT8_C( 1), INT8_C( 0), INT8_C( 123), INT8_C( 0) } }, { { { -INT8_C( 126), -INT8_C( 54), -INT8_C( 99), INT8_C( 50), -INT8_C( 38), INT8_C( 24), -INT8_C( 51), INT8_C( 46) }, { INT8_C( 27), -INT8_C( 24), INT8_C( 98), INT8_C( 94), -INT8_C( 4), -INT8_C( 54), -INT8_C( 11), INT8_C( 18) }, { -INT8_C( 114), -INT8_C( 116), INT8_C( 82), INT8_C( 79), -INT8_C( 118), -INT8_C( 120), INT8_C( 32), INT8_C( 40) }, { INT8_C( 28), INT8_C( 104), INT8_C( 0), -INT8_C( 114), -INT8_C( 30), -INT8_C( 49), -INT8_C( 111), INT8_C( 100) } }, { INT8_C( 25), INT8_C( 14), INT8_C( 22), INT8_C( 19), INT8_C( 70), INT8_C( 3), -INT8_C( 95), INT8_C( 97) }, { INT8_C( 104), -INT8_C( 11), INT8_C( 32), INT8_C( 79), INT8_C( 0), INT8_C( 50), INT8_C( 0), INT8_C( 0) } }, { { { INT8_C( 65), -INT8_C( 84), -INT8_C( 86), -INT8_C( 53), INT8_C( 52), -INT8_C( 54), -INT8_C( 13), INT8_C( 80) }, { INT8_C( 51), -INT8_C( 12), -INT8_C( 34), INT8_C( 21), -INT8_C( 61), INT8_C( 111), INT8_C( 121), INT8_C( 92) }, { -INT8_C( 99), INT8_C( 15), -INT8_C( 49), -INT8_C( 29), INT8_C( 114), INT8_C( 112), INT8_C( 69), -INT8_C( 67) }, { INT8_C( 115), INT8_C( 5), INT8_C( 5), INT8_C( 64), -INT8_C( 70), INT8_C( 95), -INT8_C( 101), -INT8_C( 5) } }, { INT8_C( 11), INT8_C( 5), INT8_C( 6), INT8_C( 31), INT8_C( 16), INT8_C( 25), INT8_C( 15), INT8_C( 3) }, { INT8_C( 21), -INT8_C( 54), -INT8_C( 13), -INT8_C( 5), -INT8_C( 99), INT8_C( 5), INT8_C( 92), -INT8_C( 53) } }, { { { -INT8_C( 32), -INT8_C( 101), INT8_C( 93), INT8_C( 82), INT8_C( 11), -INT8_C( 94), INT8_C( 15), INT8_C( 126) }, { -INT8_C( 89), INT8_C( 21), -INT8_C( 66), INT8_C( 97), INT8_C( 116), INT8_C( 89), INT8_C( 92), INT8_MIN }, { -INT8_C( 97), INT8_C( 34), -INT8_C( 65), -INT8_C( 81), -INT8_C( 37), INT8_C( 79), -INT8_C( 14), -INT8_C( 119) }, { -INT8_C( 68), INT8_C( 74), -INT8_C( 7), -INT8_C( 103), INT8_C( 27), -INT8_C( 58), INT8_C( 18), -INT8_C( 5) } }, { INT8_C( 1), INT8_C( 15), INT8_C( 13), INT8_C( 13), INT8_C( 17), INT8_C( 28), INT8_C( 11), INT8_C( 24) }, { -INT8_C( 101), INT8_MIN, INT8_C( 89), INT8_C( 89), INT8_C( 34), INT8_C( 27), INT8_C( 97), -INT8_C( 68) } }, { { { -INT8_C( 105), INT8_C( 37), INT8_C( 81), INT8_C( 115), INT8_C( 116), INT8_C( 67), -INT8_C( 4), INT8_C( 49) }, { -INT8_C( 115), -INT8_C( 11), -INT8_C( 54), -INT8_C( 88), -INT8_C( 69), -INT8_C( 36), -INT8_C( 93), INT8_C( 29) }, { INT8_C( 76), -INT8_C( 16), -INT8_C( 118), INT8_C( 93), INT8_C( 77), INT8_C( 117), INT8_C( 22), -INT8_C( 66) }, { INT8_C( 31), INT8_C( 47), -INT8_C( 92), INT8_C( 35), -INT8_C( 91), INT8_C( 10), -INT8_C( 59), INT8_C( 60) } }, { INT8_C( 16), INT8_C( 23), INT8_C( 15), INT8_C( 4), INT8_C( 26), -INT8_C( 85), INT8_C( 21), INT8_C( 8) }, { INT8_C( 76), -INT8_C( 66), INT8_C( 29), INT8_C( 116), -INT8_C( 92), INT8_C( 0), INT8_C( 117), -INT8_C( 115) } }, { { { INT8_C( 36), INT8_C( 3), INT8_C( 37), INT8_C( 113), INT8_C( 121), INT8_C( 59), INT8_C( 48), -INT8_C( 104) }, { INT8_C( 107), -INT8_C( 44), -INT8_C( 69), INT8_C( 16), -INT8_C( 33), -INT8_C( 127), INT8_C( 76), INT8_C( 15) }, { -INT8_C( 104), -INT8_C( 4), -INT8_C( 77), -INT8_C( 14), -INT8_C( 89), -INT8_C( 119), -INT8_C( 38), INT8_C( 72) }, { INT8_C( 40), INT8_C( 107), -INT8_C( 91), -INT8_C( 92), -INT8_C( 97), INT8_C( 30), INT8_C( 108), -INT8_C( 61) } }, { INT8_C( 2), INT8_C( 18), INT8_C( 53), INT8_C( 27), INT8_C( 13), INT8_C( 5), INT8_C( 19), INT8_C( 24) }, { INT8_C( 37), -INT8_C( 77), INT8_C( 0), -INT8_C( 92), -INT8_C( 127), INT8_C( 59), -INT8_C( 14), INT8_C( 40) } }, { { { -INT8_C( 111), -INT8_C( 37), -INT8_C( 6), INT8_C( 56), INT8_C( 100), -INT8_C( 43), -INT8_C( 127), -INT8_C( 116) }, { INT8_C( 64), INT8_C( 38), INT8_C( 49), -INT8_C( 33), INT8_C( 68), -INT8_C( 99), -INT8_C( 94), INT8_C( 102) }, { INT8_C( 47), -INT8_C( 41), INT8_C( 1), -INT8_C( 3), INT8_C( 60), INT8_C( 53), INT8_C( 53), INT8_C( 118) }, { INT8_C( 36), INT8_C( 126), -INT8_C( 114), -INT8_C( 108), INT8_C( 19), -INT8_C( 74), -INT8_C( 100), -INT8_C( 92) } }, { INT8_C( 17), INT8_C( 22), INT8_C( 28), INT8_C( 21), INT8_C( 11), INT8_C( 29), INT8_C( 1), INT8_C( 11) }, { -INT8_C( 41), INT8_C( 53), INT8_C( 19), INT8_C( 53), -INT8_C( 33), -INT8_C( 74), -INT8_C( 37), -INT8_C( 33) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8x4_t a; a.val[0] = simde_vld1_s8(test_vec[i].a[0]); a.val[1] = simde_vld1_s8(test_vec[i].a[1]); a.val[2] = simde_vld1_s8(test_vec[i].a[2]); a.val[3] = simde_vld1_s8(test_vec[i].a[3]); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vtbl4_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8x4_t a = simde_test_arm_neon_random_i8x8x4(); simde_int8x8_private b_ = simde_int8x8_to_private(simde_test_arm_neon_random_i8x8()); for (size_t j = 0 ; j < (sizeof(b_.values) / sizeof(b_.values[0])) ; j++) { if (probability(PROBABILITY)) { b_.values[j] &= 31; } } simde_int8x8_t b = simde_int8x8_from_private(b_); simde_int8x8_t r = simde_vtbl4_s8(a, b); simde_test_arm_neon_write_i8x8x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vtbl4_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 uint8_t a[4][8]; SIMDE_ALIGN_TO_16 uint8_t b[8]; SIMDE_ALIGN_TO_16 uint8_t r[8]; } test_vec[] = { { { { UINT8_C(202), UINT8_C(241), UINT8_C(181), UINT8_C( 46), UINT8_C( 29), UINT8_C( 91), UINT8_C(135), UINT8_C(225) }, { UINT8_C(183), UINT8_C(167), UINT8_C(250), UINT8_C( 29), UINT8_C( 81), UINT8_C(210), UINT8_C( 74), UINT8_C(199) }, { UINT8_C( 26), UINT8_C(215), UINT8_C( 11), UINT8_C(119), UINT8_C( 22), UINT8_C(138), UINT8_C(201), UINT8_C(123) }, { UINT8_C(187), UINT8_C(202), UINT8_C(201), UINT8_C( 90), UINT8_C( 24), UINT8_C( 18), UINT8_C(225), UINT8_C(227) } }, { UINT8_C( 3), UINT8_C( 22), UINT8_C( 17), UINT8_C( 1), UINT8_C(241), UINT8_C( 25), UINT8_C( 2), UINT8_C(169) }, { UINT8_C( 46), UINT8_C(201), UINT8_C(215), UINT8_C(241), UINT8_C( 0), UINT8_C(202), UINT8_C(181), UINT8_C( 0) } }, { { { UINT8_C(232), UINT8_C( 99), UINT8_C( 96), UINT8_C(254), UINT8_C(237), UINT8_C( 41), UINT8_C(121), UINT8_C(168) }, { UINT8_C(243), UINT8_C( 67), UINT8_C( 3), UINT8_C( 12), UINT8_C( 85), UINT8_C(228), UINT8_C(239), UINT8_C( 88) }, { UINT8_C(123), UINT8_C( 0), UINT8_C(121), UINT8_C(108), UINT8_C(153), UINT8_C(124), UINT8_C( 21), UINT8_C(217) }, { UINT8_C(121), UINT8_C(219), UINT8_C(106), UINT8_C( 72), UINT8_C(236), UINT8_C(194), UINT8_C( 49), UINT8_C(212) } }, { UINT8_C( 5), UINT8_C( 17), UINT8_C( 18), UINT8_C( 18), UINT8_C( 26), UINT8_C( 12), UINT8_C( 27), UINT8_C( 13) }, { UINT8_C( 41), UINT8_C( 0), UINT8_C(121), UINT8_C(121), UINT8_C(106), UINT8_C( 85), UINT8_C( 72), UINT8_C(228) } }, { { { UINT8_C(169), UINT8_C(182), UINT8_C(138), UINT8_C( 66), UINT8_C( 50), UINT8_C(159), UINT8_C( 28), UINT8_C(171) }, { UINT8_C(123), UINT8_C(134), UINT8_C(243), UINT8_C(103), UINT8_C( 73), UINT8_C( 36), UINT8_C( 59), UINT8_C(110) }, { UINT8_C(181), UINT8_C( 13), UINT8_C(129), UINT8_C(111), UINT8_C( 89), UINT8_C( 60), UINT8_C( 28), UINT8_C(232) }, { UINT8_C(250), UINT8_C(214), UINT8_C(204), UINT8_C(156), UINT8_C(126), UINT8_C( 9), UINT8_C(186), UINT8_C( 39) } }, { UINT8_C( 31), UINT8_C( 4), UINT8_C(106), UINT8_C( 17), UINT8_C( 3), UINT8_C( 6), UINT8_C( 28), UINT8_C( 30) }, { UINT8_C( 39), UINT8_C( 50), UINT8_C( 0), UINT8_C( 13), UINT8_C( 66), UINT8_C( 28), UINT8_C(126), UINT8_C(186) } }, { { { UINT8_C( 14), UINT8_C( 69), UINT8_C(215), UINT8_C(103), UINT8_C(129), UINT8_C(243), UINT8_C( 80), UINT8_C(123) }, { UINT8_C(201), UINT8_C( 28), UINT8_C( 23), UINT8_C( 72), UINT8_C( 37), UINT8_C(209), UINT8_C(111), UINT8_C(228) }, { UINT8_C( 21), UINT8_C(217), UINT8_C(213), UINT8_C(249), UINT8_C( 95), UINT8_C(113), UINT8_C( 87), UINT8_C(108) }, { UINT8_C( 0), UINT8_C( 29), UINT8_C(193), UINT8_C(179), UINT8_C( 29), UINT8_C(133), UINT8_C( 27), UINT8_C( 43) } }, { UINT8_C( 10), UINT8_C(242), UINT8_C( 19), UINT8_C( 75), UINT8_C( 6), UINT8_C(227), UINT8_C(198), UINT8_C( 15) }, { UINT8_C( 23), UINT8_C( 0), UINT8_C(249), UINT8_C( 0), UINT8_C( 80), UINT8_C( 0), UINT8_C( 0), UINT8_C(228) } }, { { { UINT8_C( 64), UINT8_C(223), UINT8_C(190), UINT8_C(160), UINT8_C( 80), UINT8_C( 21), UINT8_C( 12), UINT8_C( 81) }, { UINT8_C( 50), UINT8_C(205), UINT8_C( 4), UINT8_C( 80), UINT8_C( 83), UINT8_C( 32), UINT8_C(123), UINT8_C( 29) }, { UINT8_C( 18), UINT8_C( 14), UINT8_C(105), UINT8_C(248), UINT8_C(241), UINT8_C( 47), UINT8_C(168), UINT8_C(241) }, { UINT8_C( 13), UINT8_C(159), UINT8_C( 22), UINT8_C(189), UINT8_C( 6), UINT8_C( 31), UINT8_C(130), UINT8_C( 71) } }, { UINT8_C(254), UINT8_C( 0), UINT8_C( 7), UINT8_C( 15), UINT8_C( 21), UINT8_C( 19), UINT8_C( 0), UINT8_C( 8) }, { UINT8_C( 0), UINT8_C( 64), UINT8_C( 81), UINT8_C( 29), UINT8_C( 47), UINT8_C(248), UINT8_C( 64), UINT8_C( 50) } }, { { { UINT8_C( 98), UINT8_C(154), UINT8_C(207), UINT8_C( 83), UINT8_C(201), UINT8_C(119), UINT8_C( 68), UINT8_C(215) }, { UINT8_C( 23), UINT8_C( 90), UINT8_C(148), UINT8_C( 29), UINT8_C(122), UINT8_C( 22), UINT8_C(100), UINT8_C(120) }, { UINT8_C( 86), UINT8_C( 75), UINT8_C(199), UINT8_C(171), UINT8_C( 62), UINT8_C(103), UINT8_C( 51), UINT8_MAX }, { UINT8_C( 12), UINT8_C( 11), UINT8_C( 18), UINT8_C(208), UINT8_C( 95), UINT8_C( 67), UINT8_C(167), UINT8_C(193) } }, { UINT8_C( 29), UINT8_C( 23), UINT8_C( 20), UINT8_C( 7), UINT8_C( 14), UINT8_C( 25), UINT8_C( 30), UINT8_C( 5) }, { UINT8_C( 67), UINT8_MAX, UINT8_C( 62), UINT8_C(215), UINT8_C(100), UINT8_C( 11), UINT8_C(167), UINT8_C(119) } }, { { { UINT8_C(211), UINT8_C(109), UINT8_C( 41), UINT8_C( 17), UINT8_C(213), UINT8_C( 93), UINT8_C( 16), UINT8_C(225) }, { UINT8_C(104), UINT8_C( 35), UINT8_C(177), UINT8_C(199), UINT8_C(102), UINT8_C( 89), UINT8_C(136), UINT8_C( 68) }, { UINT8_C(208), UINT8_C(157), UINT8_C(235), UINT8_C(190), UINT8_C(246), UINT8_C(105), UINT8_C(196), UINT8_C(169) }, { UINT8_C(123), UINT8_C(231), UINT8_C(215), UINT8_C(163), UINT8_C(110), UINT8_C(125), UINT8_C( 33), UINT8_C( 65) } }, { UINT8_C( 10), UINT8_C( 10), UINT8_C( 19), UINT8_C( 31), UINT8_C(167), UINT8_C( 3), UINT8_C(160), UINT8_C( 16) }, { UINT8_C(177), UINT8_C(177), UINT8_C(190), UINT8_C( 65), UINT8_C( 0), UINT8_C( 17), UINT8_C( 0), UINT8_C(208) } }, { { { UINT8_C(253), UINT8_C( 28), UINT8_C( 57), UINT8_C(243), UINT8_C(133), UINT8_C(253), UINT8_C(156), UINT8_C( 0) }, { UINT8_C(228), UINT8_C(115), UINT8_C(163), UINT8_C( 83), UINT8_C(240), UINT8_C(196), UINT8_C(148), UINT8_C(219) }, { UINT8_C( 14), UINT8_C(231), UINT8_C(154), UINT8_C(182), UINT8_C( 75), UINT8_C( 59), UINT8_C(198), UINT8_C(209) }, { UINT8_C(141), UINT8_C(157), UINT8_C(190), UINT8_C( 56), UINT8_C(253), UINT8_C(239), UINT8_C(179), UINT8_C(250) } }, { UINT8_C( 11), UINT8_C( 12), UINT8_C( 13), UINT8_C( 16), UINT8_C(234), UINT8_C(138), UINT8_C( 16), UINT8_C( 14) }, { UINT8_C( 83), UINT8_C(240), UINT8_C(196), UINT8_C( 14), UINT8_C( 0), UINT8_C( 0), UINT8_C( 14), UINT8_C(148) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8x4_t a; a.val[0] = simde_vld1_u8(test_vec[i].a[0]); a.val[1] = simde_vld1_u8(test_vec[i].a[1]); a.val[2] = simde_vld1_u8(test_vec[i].a[2]); a.val[3] = simde_vld1_u8(test_vec[i].a[3]); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vtbl4_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8x4_t a = simde_test_arm_neon_random_u8x8x4(); simde_uint8x8_private b_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); for (size_t j = 0 ; j < (sizeof(b_.values) / sizeof(b_.values[0])) ; j++) { if (probability(PROBABILITY)) { b_.values[j] &= 31; } } simde_uint8x8_t b = simde_uint8x8_from_private(b_); simde_uint8x8_t r = simde_vtbl4_u8(a, b); simde_test_arm_neon_write_u8x8x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vtbl1_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vtbl1_u8) #if !defined(SIMDE_BUG_INTEL_857088) SIMDE_TEST_FUNC_LIST_ENTRY(vtbl2_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vtbl2_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vtbl3_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vtbl3_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vtbl4_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vtbl4_u8) #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/tbx.c000066400000000000000000001622721400333146700162720ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN tbx #include "test-neon.h" /* Check that both of these work */ #if defined(__cplusplus) #include "../../../simde/arm/neon/tbx.h" #else #include "../../../simde/arm/neon.h" #endif #if 0 #define PROBABILITY 80 #define probability(p) (rand() < ((HEDLEY_STATIC_CAST(int64_t, RAND_MAX) * (p)) / 100)) #endif static int test_simde_vtbx1_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 int8_t a[8]; SIMDE_ALIGN_TO_16 int8_t b[8]; SIMDE_ALIGN_TO_16 int8_t c[8]; SIMDE_ALIGN_TO_16 int8_t r[8]; } test_vec[] = { { { INT8_C( 37), -INT8_C( 53), INT8_C( 27), INT8_C( 42), -INT8_C( 10), -INT8_C( 65), INT8_C( 122), -INT8_C( 112) }, { -INT8_C( 51), -INT8_C( 51), INT8_C( 80), INT8_C( 6), -INT8_C( 1), -INT8_C( 100), -INT8_C( 123), INT8_C( 47) }, { INT8_C( 1), INT8_C( 112), INT8_C( 7), INT8_C( 3), -INT8_C( 80), -INT8_C( 60), INT8_C( 3), -INT8_C( 89) }, { -INT8_C( 51), -INT8_C( 53), INT8_C( 47), INT8_C( 6), -INT8_C( 10), -INT8_C( 65), INT8_C( 6), -INT8_C( 112) } }, { { INT8_C( 123), INT8_C( 67), -INT8_C( 58), INT8_C( 113), INT8_C( 2), INT8_C( 64), INT8_C( 1), -INT8_C( 48) }, { INT8_C( 13), INT8_C( 81), -INT8_C( 42), INT8_C( 12), -INT8_C( 18), INT8_C( 91), INT8_C( 60), -INT8_C( 17) }, { INT8_C( 3), INT8_C( 3), INT8_C( 2), INT8_C( 4), INT8_C( 0), INT8_C( 5), INT8_C( 3), INT8_C( 7) }, { INT8_C( 12), INT8_C( 12), -INT8_C( 42), -INT8_C( 18), INT8_C( 13), INT8_C( 91), INT8_C( 12), -INT8_C( 17) } }, { { -INT8_C( 59), -INT8_C( 85), INT8_C( 93), -INT8_C( 57), -INT8_C( 21), INT8_C( 94), -INT8_C( 105), -INT8_C( 7) }, { -INT8_C( 80), INT8_C( 109), INT8_C( 5), -INT8_C( 98), -INT8_C( 55), INT8_C( 65), -INT8_C( 115), -INT8_C( 108) }, { INT8_C( 5), INT8_C( 0), INT8_C( 0), INT8_C( 5), INT8_C( 5), INT8_C( 3), INT8_C( 4), INT8_C( 1) }, { INT8_C( 65), -INT8_C( 80), -INT8_C( 80), INT8_C( 65), INT8_C( 65), -INT8_C( 98), -INT8_C( 55), INT8_C( 109) } }, { { INT8_C( 42), -INT8_C( 79), INT8_C( 11), INT8_C( 22), INT8_C( 15), -INT8_C( 93), INT8_C( 15), -INT8_C( 65) }, { INT8_C( 16), INT8_C( 20), INT8_C( 93), -INT8_C( 39), INT8_C( 86), -INT8_C( 21), INT8_C( 110), -INT8_C( 101) }, { INT8_C( 19), INT8_C( 6), INT8_C( 0), -INT8_C( 112), INT8_C( 2), INT8_C( 4), INT8_C( 1), -INT8_C( 89) }, { INT8_C( 42), INT8_C( 110), INT8_C( 16), INT8_C( 22), INT8_C( 93), INT8_C( 86), INT8_C( 20), -INT8_C( 65) } }, { { INT8_C( 19), INT8_C( 24), INT8_C( 60), INT8_C( 34), -INT8_C( 69), INT8_C( 75), -INT8_C( 30), -INT8_C( 53) }, { INT8_C( 95), INT8_C( 63), -INT8_C( 91), -INT8_C( 75), INT8_C( 42), INT8_C( 19), INT8_C( 80), INT8_C( 61) }, { INT8_C( 1), -INT8_C( 8), INT8_C( 6), INT8_C( 3), INT8_C( 4), INT8_C( 7), INT8_C( 3), INT8_C( 6) }, { INT8_C( 63), INT8_C( 24), INT8_C( 80), -INT8_C( 75), INT8_C( 42), INT8_C( 61), -INT8_C( 75), INT8_C( 80) } }, { { INT8_C( 123), -INT8_C( 119), -INT8_C( 111), INT8_C( 54), -INT8_C( 44), INT8_C( 115), INT8_C( 1), INT8_C( 51) }, { -INT8_C( 78), -INT8_C( 90), -INT8_C( 23), -INT8_C( 35), -INT8_C( 71), INT8_C( 57), INT8_C( 26), INT8_C( 75) }, { INT8_C( 50), INT8_C( 0), INT8_C( 6), INT8_C( 6), -INT8_C( 104), INT8_C( 1), INT8_C( 4), -INT8_C( 61) }, { INT8_C( 123), -INT8_C( 78), INT8_C( 26), INT8_C( 26), -INT8_C( 44), -INT8_C( 90), -INT8_C( 71), INT8_C( 51) } }, { { -INT8_C( 64), -INT8_C( 50), INT8_C( 111), -INT8_C( 108), INT8_C( 65), INT8_C( 112), -INT8_C( 56), -INT8_C( 13) }, { INT8_C( 23), -INT8_C( 79), -INT8_C( 48), -INT8_C( 48), -INT8_C( 22), -INT8_C( 21), INT8_C( 27), INT8_C( 28) }, { INT8_C( 3), INT8_C( 2), -INT8_C( 109), INT8_C( 3), INT8_C( 35), INT8_C( 7), INT8_C( 7), INT8_C( 6) }, { -INT8_C( 48), -INT8_C( 48), INT8_C( 111), -INT8_C( 48), INT8_C( 65), INT8_C( 28), INT8_C( 28), INT8_C( 27) } }, { { INT8_C( 112), INT8_C( 88), -INT8_C( 90), -INT8_C( 79), -INT8_C( 55), INT8_C( 110), -INT8_C( 92), -INT8_C( 32) }, { INT8_C( 31), INT8_C( 117), -INT8_C( 80), INT8_C( 9), INT8_C( 96), -INT8_C( 52), INT8_C( 38), INT8_C( 51) }, { INT8_C( 118), INT8_C( 1), INT8_C( 7), -INT8_C( 103), INT8_C( 80), INT8_C( 6), INT8_C( 0), INT8_C( 3) }, { INT8_C( 112), INT8_C( 117), INT8_C( 51), -INT8_C( 79), -INT8_C( 55), INT8_C( 38), INT8_C( 31), INT8_C( 9) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t c = simde_vld1_s8(test_vec[i].c); simde_int8x8_t r = simde_vtbx1_s8(a, b, c); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); simde_int8x8_private c_ = simde_int8x8_to_private(simde_test_arm_neon_random_i8x8()); for (size_t j = 0 ; j < (sizeof(c_.values) / sizeof(c_.values[0])) ; j++) { if (probability(PROBABILITY)) { c_.values[j] &= 7; } } simde_int8x8_t c = simde_int8x8_from_private(c_); simde_int8x8_t r = simde_vtbx1_s8(a, b, c); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vtbx1_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 uint8_t a[8]; SIMDE_ALIGN_TO_16 uint8_t b[8]; SIMDE_ALIGN_TO_16 uint8_t c[8]; SIMDE_ALIGN_TO_16 uint8_t r[8]; } test_vec[] = { { { UINT8_C( 43), UINT8_C(177), UINT8_C(250), UINT8_C(224), UINT8_C(205), UINT8_C( 54), UINT8_C(248), UINT8_C(150) }, { UINT8_C(177), UINT8_C( 61), UINT8_C(200), UINT8_C( 40), UINT8_C( 83), UINT8_C( 15), UINT8_C( 13), UINT8_C(128) }, { UINT8_C( 5), UINT8_C( 7), UINT8_C( 3), UINT8_C( 4), UINT8_C( 0), UINT8_C( 5), UINT8_C( 2), UINT8_C( 7) }, { UINT8_C( 15), UINT8_C(128), UINT8_C( 40), UINT8_C( 83), UINT8_C(177), UINT8_C( 15), UINT8_C(200), UINT8_C(128) } }, { { UINT8_C( 88), UINT8_C( 17), UINT8_C(177), UINT8_C( 37), UINT8_C( 71), UINT8_C(170), UINT8_C(187), UINT8_C(249) }, { UINT8_C(231), UINT8_C(131), UINT8_C( 33), UINT8_C( 59), UINT8_C(146), UINT8_C( 47), UINT8_C(187), UINT8_C(240) }, { UINT8_C( 6), UINT8_C( 6), UINT8_C( 4), UINT8_C( 6), UINT8_C( 11), UINT8_C( 6), UINT8_C( 6), UINT8_C( 6) }, { UINT8_C(187), UINT8_C(187), UINT8_C(146), UINT8_C(187), UINT8_C( 71), UINT8_C(187), UINT8_C(187), UINT8_C(187) } }, { { UINT8_C(231), UINT8_C(161), UINT8_C( 89), UINT8_C( 46), UINT8_C( 75), UINT8_C( 21), UINT8_C( 39), UINT8_C( 50) }, { UINT8_C(152), UINT8_C( 73), UINT8_C(109), UINT8_C( 43), UINT8_C(120), UINT8_C( 40), UINT8_C( 27), UINT8_C(254) }, { UINT8_C( 30), UINT8_C(127), UINT8_C( 4), UINT8_C( 41), UINT8_C( 6), UINT8_C( 2), UINT8_C( 7), UINT8_C( 7) }, { UINT8_C(231), UINT8_C(161), UINT8_C(120), UINT8_C( 46), UINT8_C( 27), UINT8_C(109), UINT8_C(254), UINT8_C(254) } }, { { UINT8_C(174), UINT8_C(225), UINT8_C( 10), UINT8_C(249), UINT8_C(246), UINT8_C( 49), UINT8_C( 43), UINT8_C(142) }, { UINT8_C(122), UINT8_C(153), UINT8_C(185), UINT8_C(242), UINT8_C(193), UINT8_C(212), UINT8_C(240), UINT8_C(224) }, { UINT8_C( 84), UINT8_C( 5), UINT8_C( 9), UINT8_C( 2), UINT8_C( 71), UINT8_C( 1), UINT8_C(217), UINT8_C( 7) }, { UINT8_C(174), UINT8_C(212), UINT8_C( 10), UINT8_C(185), UINT8_C(246), UINT8_C(153), UINT8_C( 43), UINT8_C(224) } }, { { UINT8_C(123), UINT8_C( 97), UINT8_C(170), UINT8_C(113), UINT8_C(147), UINT8_C(214), UINT8_MAX, UINT8_C( 13) }, { UINT8_C(111), UINT8_C(185), UINT8_C( 0), UINT8_C( 48), UINT8_C(141), UINT8_C(240), UINT8_C( 16), UINT8_C(225) }, { UINT8_C( 5), UINT8_C( 2), UINT8_C(187), UINT8_C( 5), UINT8_C( 3), UINT8_C(148), UINT8_C( 4), UINT8_C( 10) }, { UINT8_C(240), UINT8_C( 0), UINT8_C(170), UINT8_C(240), UINT8_C( 48), UINT8_C(214), UINT8_C(141), UINT8_C( 13) } }, { { UINT8_C( 63), UINT8_C(234), UINT8_C(148), UINT8_C(210), UINT8_C(192), UINT8_C(148), UINT8_C(224), UINT8_C( 47) }, { UINT8_C( 77), UINT8_C(224), UINT8_C( 96), UINT8_C(218), UINT8_C(208), UINT8_C(112), UINT8_C(188), UINT8_C(198) }, { UINT8_C( 2), UINT8_C(119), UINT8_C( 3), UINT8_C( 5), UINT8_C( 4), UINT8_C( 7), UINT8_C( 24), UINT8_C( 7) }, { UINT8_C( 96), UINT8_C(234), UINT8_C(218), UINT8_C(112), UINT8_C(208), UINT8_C(198), UINT8_C(224), UINT8_C(198) } }, { { UINT8_C( 80), UINT8_C(110), UINT8_C(142), UINT8_C( 16), UINT8_C( 2), UINT8_C(110), UINT8_C( 64), UINT8_C( 79) }, { UINT8_C( 78), UINT8_C(160), UINT8_C( 41), UINT8_C( 31), UINT8_C( 16), UINT8_C(229), UINT8_C(229), UINT8_C(155) }, { UINT8_C( 5), UINT8_C( 0), UINT8_C( 0), UINT8_C(105), UINT8_C( 7), UINT8_C( 0), UINT8_C( 0), UINT8_C( 4) }, { UINT8_C(229), UINT8_C( 78), UINT8_C( 78), UINT8_C( 16), UINT8_C(155), UINT8_C( 78), UINT8_C( 78), UINT8_C( 16) } }, { { UINT8_C(242), UINT8_C( 38), UINT8_C(235), UINT8_C(244), UINT8_C(148), UINT8_C( 43), UINT8_C( 67), UINT8_C(227) }, { UINT8_C(203), UINT8_C(108), UINT8_C( 2), UINT8_C(220), UINT8_C( 82), UINT8_C(231), UINT8_C(119), UINT8_C(175) }, { UINT8_C( 7), UINT8_C( 7), UINT8_C( 0), UINT8_C(182), UINT8_C( 0), UINT8_C( 0), UINT8_C(170), UINT8_C( 7) }, { UINT8_C(175), UINT8_C(175), UINT8_C(203), UINT8_C(244), UINT8_C(203), UINT8_C(203), UINT8_C( 67), UINT8_C(175) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t c = simde_vld1_u8(test_vec[i].c); simde_uint8x8_t r = simde_vtbx1_u8(a, b, c); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); simde_uint8x8_private c_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); for (size_t j = 0 ; j < (sizeof(c_.values) / sizeof(c_.values[0])) ; j++) { if (probability(PROBABILITY)) { c_.values[j] &= 7; } } simde_uint8x8_t c = simde_uint8x8_from_private(c_); simde_uint8x8_t r = simde_vtbx1_u8(a, b, c); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #if !defined(SIMDE_BUG_INTEL_857088) static int test_simde_vtbx2_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 int8_t a[8]; SIMDE_ALIGN_TO_16 int8_t b[2][8]; SIMDE_ALIGN_TO_16 int8_t c[8]; SIMDE_ALIGN_TO_16 int8_t r[8]; } test_vec[] = { { { INT8_C( 43), -INT8_C( 44), -INT8_C( 14), -INT8_C( 19), INT8_C( 84), INT8_C( 72), INT8_C( 125), INT8_C( 94) }, { { INT8_C( 106), -INT8_C( 98), INT8_C( 24), -INT8_C( 9), -INT8_C( 40), INT8_C( 36), -INT8_C( 25), INT8_C( 3) }, { INT8_C( 113), INT8_C( 40), -INT8_C( 48), -INT8_C( 2), -INT8_C( 25), -INT8_C( 61), INT8_C( 90), -INT8_C( 92) } }, { INT8_C( 13), INT8_C( 8), INT8_C( 0), INT8_C( 9), INT8_C( 7), INT8_C( 3), INT8_C( 5), INT8_C( 2) }, { -INT8_C( 61), INT8_C( 113), INT8_C( 106), INT8_C( 40), INT8_C( 3), -INT8_C( 9), INT8_C( 36), INT8_C( 24) } }, { { INT8_C( 75), -INT8_C( 61), -INT8_C( 95), INT8_C( 35), -INT8_C( 25), -INT8_C( 120), INT8_C( 38), INT8_C( 89) }, { { -INT8_C( 80), -INT8_C( 9), INT8_C( 87), -INT8_C( 105), -INT8_C( 70), -INT8_C( 79), INT8_C( 59), -INT8_C( 57) }, { -INT8_C( 103), -INT8_C( 21), -INT8_C( 111), -INT8_C( 80), INT8_C( 15), -INT8_C( 106), -INT8_C( 14), INT8_C( 7) } }, { INT8_C( 14), INT8_C( 33), INT8_C( 3), INT8_C( 14), -INT8_C( 50), INT8_C( 14), INT8_C( 8), INT8_C( 25) }, { -INT8_C( 14), -INT8_C( 61), -INT8_C( 105), -INT8_C( 14), -INT8_C( 25), -INT8_C( 14), -INT8_C( 103), INT8_C( 89) } }, { { INT8_C( 90), INT8_C( 89), -INT8_C( 22), INT8_C( 20), INT8_C( 11), INT8_C( 37), -INT8_C( 37), -INT8_C( 92) }, { { INT8_C( 17), INT8_C( 108), INT8_C( 85), INT8_C( 32), INT8_C( 3), INT8_C( 71), INT8_C( 39), -INT8_C( 111) }, { INT8_C( 105), INT8_C( 122), INT8_C( 96), INT8_C( 55), INT8_C( 121), -INT8_C( 40), INT8_C( 80), INT8_C( 58) } }, { INT8_C( 2), INT8_C( 12), INT8_C( 3), INT8_C( 4), INT8_C( 15), INT8_C( 5), INT8_C( 6), INT8_C( 73) }, { INT8_C( 85), INT8_C( 121), INT8_C( 32), INT8_C( 3), INT8_C( 58), INT8_C( 71), INT8_C( 39), -INT8_C( 92) } }, { { -INT8_C( 91), INT8_C( 67), INT8_C( 39), -INT8_C( 88), -INT8_C( 117), INT8_C( 78), INT8_C( 58), -INT8_C( 12) }, { { -INT8_C( 56), -INT8_C( 102), INT8_C( 43), INT8_C( 65), INT8_C( 114), INT8_C( 123), INT8_C( 124), INT8_C( 100) }, { INT8_C( 7), INT8_C( 95), -INT8_C( 8), -INT8_C( 9), INT8_C( 69), -INT8_C( 33), INT8_C( 64), -INT8_C( 124) } }, { INT8_C( 15), INT8_C( 14), INT8_C( 14), INT8_C( 5), -INT8_C( 41), INT8_C( 12), INT8_C( 12), INT8_C( 124) }, { -INT8_C( 124), INT8_C( 64), INT8_C( 64), INT8_C( 123), -INT8_C( 117), INT8_C( 69), INT8_C( 69), -INT8_C( 12) } }, { { -INT8_C( 7), -INT8_C( 86), INT8_C( 43), INT8_C( 107), INT8_C( 37), -INT8_C( 89), -INT8_C( 48), INT8_C( 44) }, { { INT8_C( 7), -INT8_C( 56), INT8_C( 35), INT8_C( 76), -INT8_C( 89), INT8_C( 100), -INT8_C( 48), INT8_C( 87) }, { INT8_C( 2), -INT8_C( 98), -INT8_C( 4), -INT8_C( 39), INT8_C( 90), -INT8_C( 87), INT8_C( 85), INT8_C( 90) } }, { INT8_C( 124), INT8_C( 10), -INT8_C( 27), INT8_C( 14), INT8_C( 9), INT8_C( 4), INT8_C( 8), INT8_C( 2) }, { -INT8_C( 7), -INT8_C( 4), INT8_C( 43), INT8_C( 85), -INT8_C( 98), -INT8_C( 89), INT8_C( 2), INT8_C( 35) } }, { { -INT8_C( 42), -INT8_C( 125), -INT8_C( 82), INT8_C( 126), -INT8_C( 25), INT8_C( 126), -INT8_C( 43), -INT8_C( 23) }, { { INT8_C( 28), -INT8_C( 47), -INT8_C( 62), INT8_C( 118), INT8_C( 122), INT8_C( 24), -INT8_C( 47), -INT8_C( 9) }, { -INT8_C( 110), -INT8_C( 74), -INT8_C( 107), INT8_C( 108), INT8_C( 27), INT8_C( 29), INT8_C( 62), INT8_C( 41) } }, { INT8_C( 0), INT8_C( 12), INT8_C( 13), INT8_C( 11), -INT8_C( 118), INT8_C( 13), INT8_C( 13), INT8_C( 1) }, { INT8_C( 28), INT8_C( 27), INT8_C( 29), INT8_C( 108), -INT8_C( 25), INT8_C( 29), INT8_C( 29), -INT8_C( 47) } }, { { -INT8_C( 123), -INT8_C( 44), INT8_C( 76), INT8_C( 0), -INT8_C( 20), INT8_C( 29), -INT8_C( 9), INT8_C( 126) }, { { -INT8_C( 45), -INT8_C( 116), -INT8_C( 22), -INT8_C( 18), -INT8_C( 87), INT8_C( 41), INT8_C( 24), INT8_C( 121) }, { -INT8_C( 91), INT8_C( 117), -INT8_C( 91), INT8_C( 48), INT8_C( 50), INT8_C( 50), -INT8_C( 111), INT8_C( 114) } }, { INT8_C( 110), INT8_C( 112), -INT8_C( 102), INT8_C( 7), INT8_C( 4), INT8_C( 12), INT8_C( 13), INT8_C( 9) }, { -INT8_C( 123), -INT8_C( 44), INT8_C( 76), INT8_C( 121), -INT8_C( 87), INT8_C( 50), INT8_C( 50), INT8_C( 117) } }, { { INT8_C( 44), -INT8_C( 43), INT8_C( 40), -INT8_C( 43), -INT8_C( 2), INT8_C( 64), INT8_C( 79), -INT8_C( 93) }, { { -INT8_C( 75), -INT8_C( 12), -INT8_C( 45), -INT8_C( 25), INT8_C( 38), INT8_C( 100), INT8_C( 89), -INT8_C( 108) }, { -INT8_C( 44), -INT8_C( 12), -INT8_C( 68), -INT8_C( 8), -INT8_C( 96), -INT8_C( 71), -INT8_C( 94), INT8_C( 32) } }, { INT8_C( 2), INT8_C( 11), INT8_C( 12), INT8_C( 8), -INT8_C( 20), INT8_C( 6), INT8_C( 1), INT8_C( 8) }, { -INT8_C( 45), -INT8_C( 8), -INT8_C( 96), -INT8_C( 44), -INT8_C( 2), INT8_C( 89), -INT8_C( 12), -INT8_C( 44) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8x2_t b; b.val[0] = simde_vld1_s8(test_vec[i].b[0]); b.val[1] = simde_vld1_s8(test_vec[i].b[1]); simde_int8x8_t c = simde_vld1_s8(test_vec[i].c); simde_int8x8_t r = simde_vtbx2_s8(a, b, c); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8x2_t b = simde_test_arm_neon_random_i8x8x2(); simde_int8x8_private c_ = simde_int8x8_to_private(simde_test_arm_neon_random_i8x8()); for (size_t j = 0 ; j < (sizeof(c_.values) / sizeof(c_.values[0])) ; j++) { if (probability(PROBABILITY)) { c_.values[j] &= 15; } } simde_int8x8_t c = simde_int8x8_from_private(c_); simde_int8x8_t r = simde_vtbx2_s8(a, b, c); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vtbx2_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 uint8_t a[8]; SIMDE_ALIGN_TO_16 uint8_t b[2][8]; SIMDE_ALIGN_TO_16 uint8_t c[8]; SIMDE_ALIGN_TO_16 uint8_t r[8]; } test_vec[] = { { { UINT8_C(243), UINT8_C(177), UINT8_C( 55), UINT8_C( 17), UINT8_C(166), UINT8_C(193), UINT8_C(205), UINT8_C( 24) }, { { UINT8_C(241), UINT8_C(110), UINT8_C( 3), UINT8_C(203), UINT8_C( 42), UINT8_C(156), UINT8_C( 41), UINT8_C( 49) }, { UINT8_C(205), UINT8_C( 78), UINT8_C( 20), UINT8_C( 3), UINT8_C(123), UINT8_C(104), UINT8_C(193), UINT8_C( 77) } }, { UINT8_C( 12), UINT8_C( 0), UINT8_C(230), UINT8_C( 7), UINT8_C( 2), UINT8_C( 8), UINT8_C( 14), UINT8_C( 6) }, { UINT8_C(123), UINT8_C(241), UINT8_C( 55), UINT8_C( 49), UINT8_C( 3), UINT8_C(205), UINT8_C(193), UINT8_C( 41) } }, { { UINT8_C(130), UINT8_C( 43), UINT8_C( 82), UINT8_C(172), UINT8_C(199), UINT8_C(123), UINT8_C(222), UINT8_C(148) }, { { UINT8_C(201), UINT8_C(242), UINT8_C(151), UINT8_C( 68), UINT8_C( 90), UINT8_C( 88), UINT8_C(145), UINT8_C( 22) }, { UINT8_C(168), UINT8_C(119), UINT8_C(190), UINT8_C(235), UINT8_C( 48), UINT8_C( 92), UINT8_C( 33), UINT8_C(154) } }, { UINT8_C( 1), UINT8_C( 8), UINT8_C( 10), UINT8_C( 7), UINT8_C( 12), UINT8_C(210), UINT8_C( 14), UINT8_C( 14) }, { UINT8_C(242), UINT8_C(168), UINT8_C(190), UINT8_C( 22), UINT8_C( 48), UINT8_C(123), UINT8_C( 33), UINT8_C( 33) } }, { { UINT8_C(123), UINT8_C(240), UINT8_C( 40), UINT8_C(213), UINT8_C( 72), UINT8_C(186), UINT8_C(236), UINT8_C(241) }, { { UINT8_C( 49), UINT8_C(170), UINT8_C(220), UINT8_C( 97), UINT8_C( 6), UINT8_C(253), UINT8_C(251), UINT8_C( 55) }, { UINT8_C(101), UINT8_C(165), UINT8_C(254), UINT8_C(225), UINT8_C(119), UINT8_C( 76), UINT8_C(223), UINT8_C(116) } }, { UINT8_C( 12), UINT8_C( 10), UINT8_C( 8), UINT8_C( 7), UINT8_C( 19), UINT8_C(145), UINT8_C(235), UINT8_C(142) }, { UINT8_C(119), UINT8_C(254), UINT8_C(101), UINT8_C( 55), UINT8_C( 72), UINT8_C(186), UINT8_C(236), UINT8_C(241) } }, { { UINT8_C(250), UINT8_C(150), UINT8_C( 96), UINT8_C( 0), UINT8_C(147), UINT8_C( 92), UINT8_C( 55), UINT8_C(248) }, { { UINT8_C( 1), UINT8_C( 53), UINT8_C(217), UINT8_C(121), UINT8_C(129), UINT8_C(185), UINT8_C(237), UINT8_C(109) }, { UINT8_C( 67), UINT8_C( 38), UINT8_C(116), UINT8_C( 87), UINT8_C(183), UINT8_C( 95), UINT8_C(229), UINT8_C( 56) } }, { UINT8_C( 2), UINT8_C( 9), UINT8_C( 1), UINT8_C( 0), UINT8_C( 9), UINT8_C( 12), UINT8_C( 15), UINT8_C( 3) }, { UINT8_C(217), UINT8_C( 38), UINT8_C( 53), UINT8_C( 1), UINT8_C( 38), UINT8_C(183), UINT8_C( 56), UINT8_C(121) } }, { { UINT8_MAX, UINT8_C(184), UINT8_C(118), UINT8_C(128), UINT8_C(113), UINT8_C( 99), UINT8_C(237), UINT8_C(180) }, { { UINT8_C(137), UINT8_C( 97), UINT8_C( 11), UINT8_C( 64), UINT8_C(192), UINT8_C(241), UINT8_C(120), UINT8_C( 51) }, { UINT8_C( 58), UINT8_C(122), UINT8_C(115), UINT8_C(212), UINT8_C( 54), UINT8_C(178), UINT8_C(103), UINT8_C(136) } }, { UINT8_C( 1), UINT8_C( 11), UINT8_C( 14), UINT8_C( 77), UINT8_C( 5), UINT8_C( 13), UINT8_C( 74), UINT8_C( 5) }, { UINT8_C( 97), UINT8_C(212), UINT8_C(103), UINT8_C(128), UINT8_C(241), UINT8_C(178), UINT8_C(237), UINT8_C(241) } }, { { UINT8_C(148), UINT8_C( 54), UINT8_C(237), UINT8_C( 85), UINT8_C( 39), UINT8_C(102), UINT8_C(136), UINT8_C( 97) }, { { UINT8_C(224), UINT8_C(251), UINT8_C( 53), UINT8_C( 22), UINT8_C(173), UINT8_C(157), UINT8_C(158), UINT8_C(254) }, { UINT8_C(152), UINT8_C( 13), UINT8_C( 75), UINT8_C( 93), UINT8_C( 90), UINT8_C(149), UINT8_C( 34), UINT8_C( 95) } }, { UINT8_C( 85), UINT8_C( 8), UINT8_C( 5), UINT8_C(121), UINT8_C( 11), UINT8_C( 15), UINT8_C( 38), UINT8_C( 15) }, { UINT8_C(148), UINT8_C(152), UINT8_C(157), UINT8_C( 85), UINT8_C( 93), UINT8_C( 95), UINT8_C(136), UINT8_C( 95) } }, { { UINT8_C( 7), UINT8_C(243), UINT8_C(111), UINT8_C(180), UINT8_C(144), UINT8_C( 14), UINT8_C(179), UINT8_C( 40) }, { { UINT8_C( 27), UINT8_C(254), UINT8_C(134), UINT8_C(117), UINT8_C(148), UINT8_C(168), UINT8_C(212), UINT8_C(233) }, { UINT8_C( 16), UINT8_C(169), UINT8_C( 98), UINT8_C(171), UINT8_C(168), UINT8_C(136), UINT8_C(219), UINT8_C(222) } }, { UINT8_C( 12), UINT8_C( 15), UINT8_C( 10), UINT8_C( 5), UINT8_C( 12), UINT8_C( 8), UINT8_C( 15), UINT8_C( 3) }, { UINT8_C(168), UINT8_C(222), UINT8_C( 98), UINT8_C(168), UINT8_C(168), UINT8_C( 16), UINT8_C(222), UINT8_C(117) } }, { { UINT8_C(217), UINT8_C( 43), UINT8_C(124), UINT8_C(109), UINT8_C(211), UINT8_C( 80), UINT8_C( 87), UINT8_C(228) }, { { UINT8_C(249), UINT8_C(185), UINT8_C(143), UINT8_C(162), UINT8_C( 66), UINT8_C(106), UINT8_C(128), UINT8_C(222) }, { UINT8_C(202), UINT8_C(186), UINT8_C(243), UINT8_C( 54), UINT8_C(179), UINT8_C( 98), UINT8_C(169), UINT8_C(159) } }, { UINT8_C( 1), UINT8_C( 1), UINT8_C( 11), UINT8_C( 13), UINT8_C( 12), UINT8_C( 0), UINT8_C( 53), UINT8_C(134) }, { UINT8_C(185), UINT8_C(185), UINT8_C( 54), UINT8_C( 98), UINT8_C(179), UINT8_C(249), UINT8_C( 87), UINT8_C(228) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8x2_t b; b.val[0] = simde_vld1_u8(test_vec[i].b[0]); b.val[1] = simde_vld1_u8(test_vec[i].b[1]); simde_uint8x8_t c = simde_vld1_u8(test_vec[i].c); simde_uint8x8_t r = simde_vtbx2_u8(a, b, c); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x8x2_t b = simde_test_arm_neon_random_u8x8x2(); simde_uint8x8_private c_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); for (size_t j = 0 ; j < (sizeof(c_.values) / sizeof(c_.values[0])) ; j++) { if (probability(PROBABILITY)) { c_.values[j] &= 15; } } simde_uint8x8_t c = simde_uint8x8_from_private(c_); simde_uint8x8_t r = simde_vtbx2_u8(a, b, c); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vtbx3_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 int8_t a[8]; SIMDE_ALIGN_TO_16 int8_t b[3][8]; SIMDE_ALIGN_TO_16 int8_t c[8]; SIMDE_ALIGN_TO_16 int8_t r[8]; } test_vec[] = { { { -INT8_C( 119), -INT8_C( 54), -INT8_C( 9), -INT8_C( 83), -INT8_C( 77), INT8_C( 35), -INT8_C( 42), INT8_C( 12) }, { { INT8_C( 101), -INT8_C( 93), INT8_C( 97), -INT8_C( 22), INT8_C( 120), -INT8_C( 98), -INT8_C( 56), INT8_C( 75) }, { -INT8_C( 122), INT8_C( 27), -INT8_C( 103), -INT8_C( 64), -INT8_C( 41), INT8_C( 6), INT8_C( 109), -INT8_C( 56) }, { INT8_C( 13), INT8_C( 42), -INT8_C( 24), INT8_C( 48), -INT8_C( 33), -INT8_C( 106), -INT8_C( 21), INT8_C( 104) } }, { INT8_C( 97), -INT8_C( 29), INT8_C( 21), INT8_C( 20), INT8_C( 6), INT8_C( 19), INT8_C( 8), INT8_C( 11) }, { -INT8_C( 119), -INT8_C( 54), -INT8_C( 106), -INT8_C( 33), -INT8_C( 56), INT8_C( 48), -INT8_C( 122), -INT8_C( 64) } }, { { INT8_C( 57), -INT8_C( 20), INT8_C( 103), INT8_C( 16), -INT8_C( 14), -INT8_C( 44), -INT8_C( 40), -INT8_C( 1) }, { { -INT8_C( 2), -INT8_C( 63), INT8_C( 48), -INT8_C( 34), INT8_C( 87), INT8_C( 27), INT8_C( 70), -INT8_C( 72) }, { -INT8_C( 2), INT8_C( 92), -INT8_C( 52), INT8_C( 4), INT8_C( 71), -INT8_C( 19), INT8_C( 111), -INT8_C( 42) }, { INT8_C( 111), -INT8_C( 59), -INT8_C( 35), -INT8_C( 113), -INT8_C( 29), INT8_C( 48), INT8_C( 53), INT8_C( 28) } }, { INT8_C( 28), INT8_C( 12), INT8_C( 20), INT8_C( 14), INT8_C( 16), INT8_C( 5), INT8_C( 14), INT8_C( 15) }, { INT8_C( 57), INT8_C( 71), -INT8_C( 29), INT8_C( 111), INT8_C( 111), INT8_C( 27), INT8_C( 111), -INT8_C( 42) } }, { { -INT8_C( 17), -INT8_C( 94), INT8_C( 92), INT8_C( 55), -INT8_C( 113), -INT8_C( 52), INT8_C( 13), -INT8_C( 2) }, { { -INT8_C( 111), -INT8_C( 21), -INT8_C( 115), INT8_C( 116), INT8_C( 27), -INT8_C( 61), -INT8_C( 112), INT8_C( 55) }, { INT8_C( 95), -INT8_C( 68), INT8_C( 69), -INT8_C( 48), -INT8_C( 63), INT8_C( 83), INT8_C( 63), -INT8_C( 121) }, { -INT8_C( 111), -INT8_C( 116), -INT8_C( 91), -INT8_C( 21), INT8_C( 31), INT8_C( 123), INT8_C( 67), INT8_C( 15) } }, { INT8_C( 29), INT8_C( 15), INT8_C( 22), INT8_C( 5), INT8_C( 107), INT8_C( 11), INT8_C( 3), INT8_C( 12) }, { -INT8_C( 17), -INT8_C( 121), INT8_C( 67), -INT8_C( 61), -INT8_C( 113), -INT8_C( 48), INT8_C( 116), -INT8_C( 63) } }, { { -INT8_C( 67), -INT8_C( 42), INT8_C( 43), INT8_C( 126), INT8_C( 41), INT8_C( 106), INT8_C( 6), -INT8_C( 69) }, { { -INT8_C( 10), -INT8_C( 85), -INT8_C( 90), INT8_C( 22), INT8_C( 38), -INT8_C( 23), INT8_C( 37), INT8_C( 67) }, { -INT8_C( 120), INT8_C( 107), -INT8_C( 16), -INT8_C( 12), -INT8_C( 66), -INT8_C( 100), -INT8_C( 16), -INT8_C( 3) }, { -INT8_C( 43), INT8_C( 97), INT8_C( 86), -INT8_C( 47), INT8_C( 97), -INT8_C( 25), INT8_C( 44), INT8_C( 30) } }, { -INT8_C( 67), INT8_C( 16), INT8_C( 13), INT8_C( 14), INT8_C( 2), INT8_C( 19), INT8_C( 17), -INT8_C( 71) }, { -INT8_C( 67), -INT8_C( 43), -INT8_C( 100), -INT8_C( 16), -INT8_C( 90), -INT8_C( 47), INT8_C( 97), -INT8_C( 69) } }, { { INT8_C( 95), -INT8_C( 88), -INT8_C( 83), INT8_C( 29), INT8_C( 68), -INT8_C( 99), INT8_C( 26), INT8_C( 25) }, { { -INT8_C( 2), INT8_C( 113), -INT8_C( 22), INT8_C( 96), INT8_C( 88), INT8_C( 22), INT8_C( 126), INT8_C( 21) }, { INT8_C( 110), INT8_C( 27), -INT8_C( 5), INT8_C( 49), -INT8_C( 66), -INT8_C( 99), -INT8_C( 22), INT8_C( 12) }, { -INT8_C( 28), -INT8_C( 71), INT8_MIN, INT8_C( 21), -INT8_C( 83), INT8_C( 56), -INT8_C( 50), INT8_C( 12) } }, { INT8_C( 8), INT8_C( 3), INT8_C( 17), INT8_C( 12), INT8_C( 0), INT8_C( 20), INT8_C( 13), INT8_C( 23) }, { INT8_C( 110), INT8_C( 96), -INT8_C( 71), -INT8_C( 66), -INT8_C( 2), -INT8_C( 83), -INT8_C( 99), INT8_C( 12) } }, { { INT8_C( 17), INT8_C( 29), -INT8_C( 35), -INT8_C( 49), -INT8_C( 70), -INT8_C( 57), -INT8_C( 36), -INT8_C( 97) }, { { INT8_MIN, INT8_C( 92), -INT8_C( 76), INT8_C( 45), -INT8_C( 108), -INT8_C( 126), INT8_C( 57), INT8_C( 116) }, { -INT8_C( 3), INT8_C( 98), -INT8_C( 104), INT8_C( 21), -INT8_C( 90), -INT8_C( 43), INT8_C( 44), INT8_C( 91) }, { -INT8_C( 4), -INT8_C( 93), INT8_C( 104), INT8_C( 58), -INT8_C( 103), -INT8_C( 118), -INT8_C( 26), -INT8_C( 86) } }, { INT8_C( 0), INT8_C( 3), INT8_C( 1), INT8_C( 2), INT8_C( 18), INT8_C( 85), INT8_C( 1), INT8_C( 10) }, { INT8_MIN, INT8_C( 45), INT8_C( 92), -INT8_C( 76), INT8_C( 104), -INT8_C( 57), INT8_C( 92), -INT8_C( 104) } }, { { -INT8_C( 46), INT8_C( 83), INT8_C( 74), INT8_C( 121), INT8_C( 41), INT8_C( 118), -INT8_C( 44), INT8_C( 37) }, { { INT8_C( 26), INT8_C( 61), INT8_C( 95), -INT8_C( 77), -INT8_C( 57), INT8_C( 69), INT8_C( 93), INT8_C( 111) }, { INT8_C( 8), -INT8_C( 42), -INT8_C( 46), -INT8_C( 110), INT8_C( 44), -INT8_C( 45), -INT8_C( 100), -INT8_C( 34) }, { -INT8_C( 119), -INT8_C( 45), INT8_C( 36), -INT8_C( 64), INT8_C( 67), -INT8_C( 33), -INT8_C( 11), INT8_C( 22) } }, { INT8_C( 3), INT8_C( 15), INT8_C( 23), INT8_C( 92), INT8_C( 13), INT8_C( 3), INT8_C( 9), -INT8_C( 49) }, { -INT8_C( 77), -INT8_C( 34), INT8_C( 22), INT8_C( 121), -INT8_C( 45), -INT8_C( 77), -INT8_C( 42), INT8_C( 37) } }, { { -INT8_C( 74), -INT8_C( 87), -INT8_C( 63), -INT8_C( 30), INT8_C( 125), INT8_C( 94), -INT8_C( 64), INT8_C( 6) }, { { INT8_C( 49), -INT8_C( 28), -INT8_C( 58), INT8_C( 117), -INT8_C( 60), -INT8_C( 69), -INT8_C( 117), -INT8_C( 9) }, { -INT8_C( 6), INT8_C( 26), INT8_C( 83), -INT8_C( 80), INT8_C( 125), -INT8_C( 44), INT8_MAX, INT8_C( 30) }, { -INT8_C( 75), INT8_C( 2), -INT8_C( 122), -INT8_C( 36), -INT8_C( 31), INT8_C( 93), INT8_C( 11), -INT8_C( 105) } }, { INT8_C( 7), INT8_C( 12), INT8_C( 1), -INT8_C( 124), INT8_C( 18), INT8_C( 9), INT8_C( 18), INT8_C( 20) }, { -INT8_C( 9), INT8_C( 125), -INT8_C( 28), -INT8_C( 30), -INT8_C( 122), INT8_C( 26), -INT8_C( 122), -INT8_C( 31) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8x3_t b; b.val[0] = simde_vld1_s8(test_vec[i].b[0]); b.val[1] = simde_vld1_s8(test_vec[i].b[1]); b.val[2] = simde_vld1_s8(test_vec[i].b[2]); simde_int8x8_t c = simde_vld1_s8(test_vec[i].c); simde_int8x8_t r = simde_vtbx3_s8(a, b, c); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8x3_t b = simde_test_arm_neon_random_i8x8x3(); simde_int8x8_private c_ = simde_int8x8_to_private(simde_test_arm_neon_random_i8x8()); for (size_t j = 0 ; j < (sizeof(c_.values) / sizeof(c_.values[0])) ; j++) { if (probability(PROBABILITY)) { c_.values[j] = HEDLEY_STATIC_CAST(int8_t, HEDLEY_STATIC_CAST(uint8_t, c_.values[j]) % 24); } } simde_int8x8_t c = simde_int8x8_from_private(c_); simde_int8x8_t r = simde_vtbx3_s8(a, b, c); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8x3(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vtbx3_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 uint8_t a[8]; SIMDE_ALIGN_TO_16 uint8_t b[3][8]; SIMDE_ALIGN_TO_16 uint8_t c[8]; SIMDE_ALIGN_TO_16 uint8_t r[8]; } test_vec[] = { { { UINT8_C( 32), UINT8_C(243), UINT8_C(142), UINT8_C( 4), UINT8_C( 43), UINT8_C(102), UINT8_C(233), UINT8_C( 31) }, { { UINT8_C(210), UINT8_C(176), UINT8_C(234), UINT8_C(207), UINT8_C( 39), UINT8_C(178), UINT8_C( 37), UINT8_C(233) }, { UINT8_C(129), UINT8_C(164), UINT8_C(149), UINT8_C(232), UINT8_C(245), UINT8_C(178), UINT8_C( 90), UINT8_C( 42) }, { UINT8_C(189), UINT8_C(140), UINT8_C(136), UINT8_C( 81), UINT8_C(218), UINT8_C(117), UINT8_C(123), UINT8_C(251) } }, { UINT8_C( 8), UINT8_C( 10), UINT8_C( 15), UINT8_C( 4), UINT8_C( 16), UINT8_C(232), UINT8_C( 11), UINT8_C( 19) }, { UINT8_C(129), UINT8_C(149), UINT8_C( 42), UINT8_C( 39), UINT8_C(189), UINT8_C(102), UINT8_C(232), UINT8_C( 81) } }, { { UINT8_C(219), UINT8_C( 62), UINT8_C(185), UINT8_C(208), UINT8_C(241), UINT8_C( 19), UINT8_C(251), UINT8_C(174) }, { { UINT8_C(159), UINT8_C(131), UINT8_MAX, UINT8_C(122), UINT8_C(248), UINT8_C(123), UINT8_C(117), UINT8_C( 97) }, { UINT8_C(133), UINT8_C(116), UINT8_C(245), UINT8_C(245), UINT8_C( 93), UINT8_C(168), UINT8_C( 56), UINT8_C(245) }, { UINT8_C( 69), UINT8_C( 75), UINT8_C(181), UINT8_C(149), UINT8_C(130), UINT8_C( 94), UINT8_C(102), UINT8_C( 94) } }, { UINT8_C(156), UINT8_C( 7), UINT8_C( 46), UINT8_C( 21), UINT8_C( 2), UINT8_C( 17), UINT8_C( 12), UINT8_C( 18) }, { UINT8_C(219), UINT8_C( 97), UINT8_C(185), UINT8_C( 94), UINT8_MAX, UINT8_C( 75), UINT8_C( 93), UINT8_C(181) } }, { { UINT8_C( 53), UINT8_C(251), UINT8_C( 49), UINT8_C(146), UINT8_C(163), UINT8_C(105), UINT8_C(136), UINT8_C(233) }, { { UINT8_C(180), UINT8_C( 61), UINT8_C(126), UINT8_C( 55), UINT8_C(155), UINT8_C(228), UINT8_C(149), UINT8_C( 55) }, { UINT8_C( 3), UINT8_C(195), UINT8_C(197), UINT8_C( 53), UINT8_C(237), UINT8_C( 1), UINT8_C( 7), UINT8_C(154) }, { UINT8_C( 60), UINT8_C( 83), UINT8_C( 63), UINT8_C(243), UINT8_C( 20), UINT8_C( 70), UINT8_C( 46), UINT8_C( 74) } }, { UINT8_C( 17), UINT8_C( 23), UINT8_C( 4), UINT8_C( 13), UINT8_C( 9), UINT8_C( 4), UINT8_C( 14), UINT8_C(125) }, { UINT8_C( 83), UINT8_C( 74), UINT8_C(155), UINT8_C( 1), UINT8_C(195), UINT8_C(155), UINT8_C( 7), UINT8_C(233) } }, { { UINT8_C( 13), UINT8_C( 57), UINT8_C(104), UINT8_C(250), UINT8_C( 58), UINT8_C(112), UINT8_C(148), UINT8_C(118) }, { { UINT8_C(195), UINT8_C(211), UINT8_C(105), UINT8_C(216), UINT8_C( 25), UINT8_C(152), UINT8_C( 34), UINT8_C( 91) }, { UINT8_C(247), UINT8_C(254), UINT8_C( 64), UINT8_C(192), UINT8_C( 99), UINT8_C( 14), UINT8_C( 62), UINT8_C( 4) }, { UINT8_C( 90), UINT8_C(242), UINT8_C( 65), UINT8_C(138), UINT8_C( 60), UINT8_C(181), UINT8_C(189), UINT8_C( 73) } }, { UINT8_C( 22), UINT8_C( 13), UINT8_C( 19), UINT8_C( 16), UINT8_C(149), UINT8_C( 23), UINT8_C( 14), UINT8_C( 17) }, { UINT8_C(189), UINT8_C( 14), UINT8_C(138), UINT8_C( 90), UINT8_C( 58), UINT8_C( 73), UINT8_C( 62), UINT8_C(242) } }, { { UINT8_C( 81), UINT8_C( 95), UINT8_C( 88), UINT8_C(180), UINT8_C(109), UINT8_C(150), UINT8_C(185), UINT8_C(199) }, { { UINT8_C(136), UINT8_C(250), UINT8_C( 81), UINT8_C(196), UINT8_C(175), UINT8_C( 14), UINT8_C( 13), UINT8_C(157) }, { UINT8_C( 51), UINT8_C( 80), UINT8_C(197), UINT8_C(201), UINT8_C( 39), UINT8_C( 99), UINT8_C( 34), UINT8_C(210) }, { UINT8_C(107), UINT8_C( 83), UINT8_C(150), UINT8_C( 11), UINT8_C(166), UINT8_C(181), UINT8_C(163), UINT8_C(247) } }, { UINT8_C( 20), UINT8_C( 11), UINT8_C( 4), UINT8_C( 9), UINT8_C( 1), UINT8_C( 5), UINT8_C( 0), UINT8_C( 1) }, { UINT8_C(166), UINT8_C(201), UINT8_C(175), UINT8_C( 80), UINT8_C(250), UINT8_C( 14), UINT8_C(136), UINT8_C(250) } }, { { UINT8_C( 60), UINT8_C(112), UINT8_C(163), UINT8_C( 99), UINT8_C(211), UINT8_C(197), UINT8_C( 53), UINT8_C( 63) }, { { UINT8_C( 24), UINT8_C(203), UINT8_C( 74), UINT8_C(190), UINT8_C(128), UINT8_C(237), UINT8_C(182), UINT8_C(148) }, { UINT8_C(232), UINT8_C( 98), UINT8_C( 21), UINT8_C(121), UINT8_C(199), UINT8_C( 93), UINT8_C(147), UINT8_C( 38) }, { UINT8_C(246), UINT8_C(113), UINT8_C( 52), UINT8_C(157), UINT8_C( 92), UINT8_C(223), UINT8_C(120), UINT8_C(152) } }, { UINT8_C( 7), UINT8_C( 3), UINT8_C( 12), UINT8_C( 10), UINT8_C( 9), UINT8_C( 1), UINT8_C( 1), UINT8_C( 9) }, { UINT8_C(148), UINT8_C(190), UINT8_C(199), UINT8_C( 21), UINT8_C( 98), UINT8_C(203), UINT8_C(203), UINT8_C( 98) } }, { { UINT8_C(208), UINT8_C( 39), UINT8_C(251), UINT8_C(151), UINT8_C(133), UINT8_C(142), UINT8_C(189), UINT8_C(123) }, { { UINT8_MAX, UINT8_C(241), UINT8_C( 25), UINT8_C( 92), UINT8_C(208), UINT8_C(145), UINT8_C(244), UINT8_C( 31) }, { UINT8_C(172), UINT8_C(240), UINT8_C( 65), UINT8_C(141), UINT8_C( 34), UINT8_C(163), UINT8_C(135), UINT8_C( 31) }, { UINT8_C( 79), UINT8_C( 63), UINT8_C(156), UINT8_C(232), UINT8_C(173), UINT8_C(174), UINT8_C(106), UINT8_C(125) } }, { UINT8_C( 22), UINT8_C( 6), UINT8_C( 20), UINT8_C( 19), UINT8_C( 4), UINT8_C( 17), UINT8_C( 22), UINT8_C( 4) }, { UINT8_C(106), UINT8_C(244), UINT8_C(173), UINT8_C(232), UINT8_C(208), UINT8_C( 63), UINT8_C(106), UINT8_C(208) } }, { { UINT8_C( 53), UINT8_C(242), UINT8_C(186), UINT8_C( 87), UINT8_C(149), UINT8_C( 65), UINT8_C(118), UINT8_C(228) }, { { UINT8_C(128), UINT8_C( 18), UINT8_C(205), UINT8_C( 45), UINT8_C(193), UINT8_C( 55), UINT8_C(170), UINT8_C(151) }, { UINT8_C(157), UINT8_C(190), UINT8_C(242), UINT8_C(146), UINT8_C(143), UINT8_C(200), UINT8_C(134), UINT8_C( 81) }, { UINT8_C(184), UINT8_C(214), UINT8_C(227), UINT8_C( 56), UINT8_C( 26), UINT8_C(148), UINT8_C(101), UINT8_C( 79) } }, { UINT8_C( 15), UINT8_C( 32), UINT8_C( 22), UINT8_C( 4), UINT8_C( 97), UINT8_C( 4), UINT8_C( 1), UINT8_C( 10) }, { UINT8_C( 81), UINT8_C(242), UINT8_C(101), UINT8_C(193), UINT8_C(149), UINT8_C(193), UINT8_C( 18), UINT8_C(242) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8x3_t b; b.val[0] = simde_vld1_u8(test_vec[i].b[0]); b.val[1] = simde_vld1_u8(test_vec[i].b[1]); b.val[2] = simde_vld1_u8(test_vec[i].b[2]); simde_uint8x8_t c = simde_vld1_u8(test_vec[i].c); simde_uint8x8_t r = simde_vtbx3_u8(a, b, c); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x8x3_t b = simde_test_arm_neon_random_u8x8x3(); simde_uint8x8_private c_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); for (size_t j = 0 ; j < (sizeof(c_.values) / sizeof(c_.values[0])) ; j++) { if (probability(PROBABILITY)) { c_.values[j] %= 24; } } simde_uint8x8_t c = simde_uint8x8_from_private(c_); simde_uint8x8_t r = simde_vtbx3_u8(a, b, c); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8x3(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vtbx4_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 int8_t a[8]; SIMDE_ALIGN_TO_16 int8_t b[4][8]; SIMDE_ALIGN_TO_16 int8_t c[8]; SIMDE_ALIGN_TO_16 int8_t r[8]; } test_vec[] = { { { -INT8_C( 5), INT8_C( 1), INT8_C( 61), -INT8_C( 12), INT8_C( 126), -INT8_C( 85), INT8_C( 88), INT8_C( 104) }, { { INT8_C( 87), -INT8_C( 84), INT8_C( 12), INT8_C( 71), -INT8_C( 6), INT8_C( 22), INT8_C( 6), INT8_C( 106) }, { -INT8_C( 2), INT8_C( 85), INT8_C( 63), -INT8_C( 77), INT8_C( 5), -INT8_C( 22), INT8_C( 81), -INT8_C( 33) }, { -INT8_C( 13), -INT8_C( 70), INT8_C( 54), INT8_C( 26), INT8_C( 35), INT8_C( 67), INT8_C( 109), INT8_C( 30) }, { INT8_C( 68), -INT8_C( 86), INT8_C( 18), -INT8_C( 61), INT8_C( 86), INT8_C( 106), INT8_C( 43), -INT8_C( 83) } }, { INT8_C( 22), INT8_C( 23), INT8_C( 21), INT8_C( 16), INT8_C( 13), INT8_C( 27), INT8_C( 26), INT8_C( 11) }, { INT8_C( 109), INT8_C( 30), INT8_C( 67), -INT8_C( 13), -INT8_C( 22), -INT8_C( 61), INT8_C( 18), -INT8_C( 77) } }, { { INT8_C( 11), INT8_C( 107), -INT8_C( 79), INT8_C( 46), -INT8_C( 82), INT8_C( 31), INT8_C( 76), -INT8_C( 13) }, { { -INT8_C( 55), INT8_C( 94), -INT8_C( 74), INT8_C( 31), -INT8_C( 56), -INT8_C( 31), -INT8_C( 51), -INT8_C( 33) }, { INT8_C( 24), -INT8_C( 62), -INT8_C( 17), INT8_C( 101), -INT8_C( 67), INT8_C( 106), -INT8_C( 80), INT8_C( 14) }, { INT8_C( 36), -INT8_C( 81), INT8_C( 101), -INT8_C( 56), INT8_C( 0), -INT8_C( 102), INT8_C( 95), INT8_C( 11) }, { INT8_C( 6), INT8_C( 16), INT8_C( 57), -INT8_C( 76), INT8_C( 47), -INT8_C( 123), -INT8_C( 89), -INT8_C( 7) } }, { INT8_C( 3), INT8_C( 93), INT8_C( 24), INT8_C( 11), INT8_C( 30), INT8_C( 5), INT8_C( 10), INT8_C( 22) }, { INT8_C( 31), INT8_C( 107), INT8_C( 6), INT8_C( 101), -INT8_C( 89), -INT8_C( 31), -INT8_C( 17), INT8_C( 95) } }, { { INT8_C( 27), -INT8_C( 40), -INT8_C( 48), INT8_C( 27), INT8_C( 115), INT8_C( 47), INT8_C( 38), INT8_C( 121) }, { { INT8_C( 63), INT8_C( 95), INT8_C( 45), INT8_C( 111), -INT8_C( 28), -INT8_C( 43), INT8_C( 104), -INT8_C( 57) }, { INT8_C( 50), INT8_MIN, INT8_C( 115), INT8_C( 113), INT8_C( 102), -INT8_C( 3), -INT8_C( 57), INT8_C( 13) }, { INT8_C( 119), -INT8_C( 125), INT8_C( 114), INT8_C( 91), -INT8_C( 17), -INT8_C( 26), INT8_C( 99), INT8_C( 10) }, { -INT8_C( 66), INT8_C( 51), INT8_C( 38), INT8_C( 49), INT8_C( 98), INT8_C( 76), -INT8_C( 86), -INT8_C( 94) } }, { INT8_C( 12), INT8_C( 24), INT8_C( 17), INT8_C( 16), INT8_C( 13), INT8_C( 25), INT8_C( 24), INT8_C( 31) }, { INT8_C( 102), -INT8_C( 66), -INT8_C( 125), INT8_C( 119), -INT8_C( 3), INT8_C( 51), -INT8_C( 66), -INT8_C( 94) } }, { { -INT8_C( 101), -INT8_C( 33), -INT8_C( 101), -INT8_C( 118), -INT8_C( 59), -INT8_C( 1), -INT8_C( 108), -INT8_C( 124) }, { { INT8_C( 50), -INT8_C( 70), -INT8_C( 75), -INT8_C( 107), INT8_C( 7), INT8_C( 96), INT8_C( 55), -INT8_C( 77) }, { INT8_C( 56), INT8_C( 72), INT8_C( 67), -INT8_C( 27), -INT8_C( 63), -INT8_C( 101), -INT8_C( 60), -INT8_C( 70) }, { INT8_C( 102), INT8_C( 21), INT8_C( 26), INT8_C( 47), INT8_C( 45), -INT8_C( 121), INT8_C( 111), -INT8_C( 56) }, { INT8_C( 102), INT8_C( 10), INT8_C( 82), INT8_C( 44), INT8_C( 9), -INT8_C( 26), -INT8_C( 80), INT8_C( 60) } }, { INT8_C( 1), INT8_C( 5), INT8_C( 17), INT8_C( 8), INT8_C( 5), INT8_C( 8), INT8_C( 27), -INT8_C( 3) }, { -INT8_C( 70), INT8_C( 96), INT8_C( 21), INT8_C( 56), INT8_C( 96), INT8_C( 56), INT8_C( 44), -INT8_C( 124) } }, { { -INT8_C( 68), -INT8_C( 27), -INT8_C( 49), -INT8_C( 23), INT8_C( 108), INT8_C( 62), -INT8_C( 79), -INT8_C( 45) }, { { INT8_C( 73), INT8_C( 3), -INT8_C( 1), INT8_C( 82), -INT8_C( 23), -INT8_C( 81), -INT8_C( 114), -INT8_C( 118) }, { INT8_C( 20), INT8_C( 95), INT8_C( 50), -INT8_C( 38), INT8_C( 103), -INT8_C( 115), -INT8_C( 41), -INT8_C( 73) }, { INT8_C( 44), -INT8_C( 70), -INT8_C( 56), INT8_C( 102), INT8_C( 97), -INT8_C( 108), INT8_C( 6), INT8_C( 29) }, { INT8_C( 121), -INT8_C( 42), INT8_C( 6), -INT8_C( 26), INT8_C( 20), -INT8_C( 73), -INT8_C( 71), INT8_C( 93) } }, { INT8_C( 26), -INT8_C( 72), INT8_C( 16), INT8_C( 3), INT8_C( 7), INT8_C( 30), INT8_C( 14), INT8_C( 27) }, { INT8_C( 6), -INT8_C( 27), INT8_C( 44), INT8_C( 82), -INT8_C( 118), -INT8_C( 71), -INT8_C( 41), -INT8_C( 26) } }, { { -INT8_C( 25), -INT8_C( 123), INT8_MIN, INT8_C( 72), INT8_C( 25), -INT8_C( 122), INT8_C( 101), -INT8_C( 109) }, { { INT8_C( 92), INT8_C( 107), INT8_C( 121), INT8_C( 113), INT8_C( 34), INT8_C( 50), -INT8_C( 50), -INT8_C( 36) }, { -INT8_C( 22), INT8_C( 126), INT8_MAX, INT8_C( 81), -INT8_C( 67), -INT8_C( 83), -INT8_C( 52), INT8_C( 91) }, { INT8_C( 14), INT8_C( 34), INT8_C( 96), -INT8_C( 4), INT8_C( 79), INT8_C( 29), INT8_C( 22), INT8_C( 54) }, { -INT8_C( 93), -INT8_C( 106), INT8_C( 126), -INT8_C( 68), INT8_C( 28), -INT8_C( 29), INT8_C( 79), INT8_C( 121) } }, { INT8_C( 14), -INT8_C( 56), INT8_C( 10), INT8_C( 16), INT8_C( 26), -INT8_C( 72), INT8_C( 76), INT8_C( 4) }, { -INT8_C( 52), -INT8_C( 123), INT8_MAX, INT8_C( 14), INT8_C( 126), -INT8_C( 122), INT8_C( 101), INT8_C( 34) } }, { { INT8_C( 36), -INT8_C( 81), -INT8_C( 125), INT8_C( 115), -INT8_C( 51), -INT8_C( 103), -INT8_C( 87), INT8_C( 112) }, { { INT8_C( 47), INT8_C( 39), INT8_C( 44), INT8_C( 75), INT8_C( 10), INT8_C( 124), -INT8_C( 60), INT8_C( 88) }, { INT8_C( 68), -INT8_C( 82), -INT8_C( 56), INT8_C( 63), INT8_C( 103), INT8_C( 20), INT8_C( 35), -INT8_C( 98) }, { -INT8_C( 33), INT8_C( 89), -INT8_C( 110), INT8_C( 88), INT8_C( 91), -INT8_C( 31), -INT8_C( 33), INT8_MAX }, { -INT8_C( 112), INT8_C( 98), -INT8_C( 14), INT8_C( 93), -INT8_C( 5), -INT8_C( 101), -INT8_C( 51), INT8_C( 42) } }, { INT8_C( 2), -INT8_C( 6), INT8_C( 118), -INT8_C( 52), INT8_C( 22), INT8_C( 26), INT8_C( 4), INT8_C( 26) }, { INT8_C( 44), -INT8_C( 81), -INT8_C( 125), INT8_C( 115), -INT8_C( 33), -INT8_C( 14), INT8_C( 10), -INT8_C( 14) } }, { { INT8_C( 118), INT8_MIN, INT8_C( 56), -INT8_C( 47), INT8_C( 97), INT8_C( 23), INT8_C( 80), -INT8_C( 15) }, { { INT8_C( 122), INT8_C( 66), INT8_C( 79), INT8_C( 117), -INT8_C( 35), INT8_C( 28), -INT8_C( 96), -INT8_C( 97) }, { INT8_C( 22), INT8_C( 22), INT8_C( 107), -INT8_C( 116), INT8_C( 80), -INT8_C( 113), INT8_C( 71), INT8_C( 57) }, { INT8_C( 123), INT8_C( 64), -INT8_C( 119), INT8_C( 123), INT8_C( 93), INT8_C( 119), INT8_C( 90), -INT8_C( 45) }, { -INT8_C( 9), -INT8_C( 110), -INT8_C( 92), INT8_C( 88), -INT8_C( 86), -INT8_C( 12), INT8_C( 74), INT8_C( 36) } }, { INT8_C( 22), INT8_C( 25), INT8_C( 25), INT8_C( 19), INT8_C( 21), INT8_C( 57), INT8_C( 18), INT8_C( 12) }, { INT8_C( 90), -INT8_C( 110), -INT8_C( 110), INT8_C( 123), INT8_C( 119), INT8_C( 23), -INT8_C( 119), INT8_C( 80) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8x4_t b; b.val[0] = simde_vld1_s8(test_vec[i].b[0]); b.val[1] = simde_vld1_s8(test_vec[i].b[1]); b.val[2] = simde_vld1_s8(test_vec[i].b[2]); b.val[3] = simde_vld1_s8(test_vec[i].b[3]); simde_int8x8_t c = simde_vld1_s8(test_vec[i].c); simde_int8x8_t r = simde_vtbx4_s8(a, b, c); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_int8x8x4_t b = simde_test_arm_neon_random_i8x8x4(); simde_int8x8_private c_ = simde_int8x8_to_private(simde_test_arm_neon_random_i8x8()); for (size_t j = 0 ; j < (sizeof(c_.values) / sizeof(c_.values[0])) ; j++) { if (probability(PROBABILITY)) { c_.values[j] &= 31; } } simde_int8x8_t c = simde_int8x8_from_private(c_); simde_int8x8_t r = simde_vtbx4_s8(a, b, c); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vtbx4_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 struct { SIMDE_ALIGN_TO_16 uint8_t a[8]; SIMDE_ALIGN_TO_16 uint8_t b[4][8]; SIMDE_ALIGN_TO_16 uint8_t c[8]; SIMDE_ALIGN_TO_16 uint8_t r[8]; } test_vec[] = { { { UINT8_C(242), UINT8_C(225), UINT8_C(193), UINT8_C( 59), UINT8_C(239), UINT8_C(138), UINT8_C(184), UINT8_C(104) }, { { UINT8_C( 77), UINT8_C( 36), UINT8_C( 30), UINT8_C( 81), UINT8_C(112), UINT8_C(130), UINT8_C( 60), UINT8_C(231) }, { UINT8_C(122), UINT8_C( 21), UINT8_C( 38), UINT8_C( 44), UINT8_C( 62), UINT8_C( 98), UINT8_C(206), UINT8_C(238) }, { UINT8_C(114), UINT8_C( 68), UINT8_C(207), UINT8_C( 74), UINT8_C(164), UINT8_C(109), UINT8_C( 79), UINT8_C(150) }, { UINT8_C( 78), UINT8_C( 16), UINT8_C(209), UINT8_C( 61), UINT8_C(155), UINT8_C(137), UINT8_C(166), UINT8_C(232) } }, { UINT8_C( 14), UINT8_C(196), UINT8_C( 25), UINT8_C( 30), UINT8_C( 6), UINT8_C(117), UINT8_C( 5), UINT8_C( 1) }, { UINT8_C(206), UINT8_C(225), UINT8_C( 16), UINT8_C(166), UINT8_C( 60), UINT8_C(138), UINT8_C(130), UINT8_C( 36) } }, { { UINT8_C( 0), UINT8_C(133), UINT8_C( 73), UINT8_C(164), UINT8_C(242), UINT8_C(152), UINT8_C( 58), UINT8_C( 65) }, { { UINT8_C(169), UINT8_C( 11), UINT8_C(126), UINT8_C( 68), UINT8_C(149), UINT8_C( 36), UINT8_C( 44), UINT8_C( 67) }, { UINT8_C(233), UINT8_C(101), UINT8_C( 97), UINT8_C( 47), UINT8_C(218), UINT8_C(102), UINT8_C(240), UINT8_C(100) }, { UINT8_C(145), UINT8_C(222), UINT8_C( 45), UINT8_C( 30), UINT8_C(154), UINT8_C(227), UINT8_C( 30), UINT8_C(154) }, { UINT8_C(105), UINT8_C(103), UINT8_C( 62), UINT8_C( 91), UINT8_C( 0), UINT8_C(120), UINT8_C(156), UINT8_C(169) } }, { UINT8_C( 3), UINT8_C( 27), UINT8_C( 13), UINT8_C( 24), UINT8_C( 31), UINT8_C( 25), UINT8_C( 91), UINT8_C( 8) }, { UINT8_C( 68), UINT8_C( 91), UINT8_C(102), UINT8_C(105), UINT8_C(169), UINT8_C(103), UINT8_C( 58), UINT8_C(233) } }, { { UINT8_C( 38), UINT8_C(233), UINT8_C(210), UINT8_C(192), UINT8_C(205), UINT8_C(240), UINT8_C( 90), UINT8_C( 54) }, { { UINT8_C( 87), UINT8_C(152), UINT8_C(145), UINT8_C( 87), UINT8_C( 16), UINT8_C( 46), UINT8_C( 0), UINT8_C(148) }, { UINT8_C( 73), UINT8_C(237), UINT8_C(172), UINT8_C(136), UINT8_C( 6), UINT8_C( 8), UINT8_C(177), UINT8_C(132) }, { UINT8_C(196), UINT8_C( 9), UINT8_C(220), UINT8_C(231), UINT8_C( 81), UINT8_C(153), UINT8_C(154), UINT8_C(120) }, { UINT8_C(130), UINT8_C(108), UINT8_C( 56), UINT8_C( 79), UINT8_C( 92), UINT8_C(147), UINT8_C(133), UINT8_C(180) } }, { UINT8_C( 11), UINT8_C( 23), UINT8_C( 11), UINT8_C( 60), UINT8_C( 5), UINT8_C( 12), UINT8_C( 16), UINT8_C( 14) }, { UINT8_C(136), UINT8_C(120), UINT8_C(136), UINT8_C(192), UINT8_C( 46), UINT8_C( 6), UINT8_C(196), UINT8_C(177) } }, { { UINT8_C(208), UINT8_C( 97), UINT8_C( 48), UINT8_C( 34), UINT8_C(250), UINT8_C(202), UINT8_C(154), UINT8_C(124) }, { { UINT8_C( 55), UINT8_C(210), UINT8_C(204), UINT8_C(147), UINT8_C(101), UINT8_C( 81), UINT8_C( 71), UINT8_C(145) }, { UINT8_C(104), UINT8_C( 83), UINT8_C(205), UINT8_C(173), UINT8_C( 95), UINT8_C(157), UINT8_C( 59), UINT8_C( 88) }, { UINT8_C( 25), UINT8_C( 82), UINT8_C( 88), UINT8_C(158), UINT8_C( 25), UINT8_C(221), UINT8_C(231), UINT8_C(234) }, { UINT8_C( 62), UINT8_C( 23), UINT8_C( 12), UINT8_C( 56), UINT8_C(225), UINT8_C(166), UINT8_C(180), UINT8_C( 24) } }, { UINT8_C( 24), UINT8_C( 0), UINT8_C(172), UINT8_C( 30), UINT8_C( 18), UINT8_C(243), UINT8_C( 15), UINT8_C( 26) }, { UINT8_C( 62), UINT8_C( 55), UINT8_C( 48), UINT8_C(180), UINT8_C( 88), UINT8_C(202), UINT8_C( 88), UINT8_C( 12) } }, { { UINT8_C(117), UINT8_C( 86), UINT8_C(144), UINT8_C(143), UINT8_C( 51), UINT8_C(119), UINT8_C(121), UINT8_C(113) }, { { UINT8_C(142), UINT8_C(133), UINT8_C(169), UINT8_C(112), UINT8_C( 43), UINT8_C( 94), UINT8_C(136), UINT8_C(163) }, { UINT8_C(222), UINT8_C( 52), UINT8_C(129), UINT8_C(176), UINT8_C( 40), UINT8_C(240), UINT8_C(235), UINT8_C(110) }, { UINT8_C( 44), UINT8_C(211), UINT8_C( 20), UINT8_C( 5), UINT8_C(246), UINT8_C( 18), UINT8_C(248), UINT8_C(108) }, { UINT8_C(104), UINT8_C(136), UINT8_C(251), UINT8_C(156), UINT8_C( 0), UINT8_C(116), UINT8_C( 13), UINT8_C(142) } }, { UINT8_C( 25), UINT8_C(183), UINT8_C( 30), UINT8_C( 4), UINT8_C( 21), UINT8_C(135), UINT8_C(199), UINT8_C( 19) }, { UINT8_C(136), UINT8_C( 86), UINT8_C( 13), UINT8_C( 43), UINT8_C( 18), UINT8_C(119), UINT8_C(121), UINT8_C( 5) } }, { { UINT8_C( 98), UINT8_C(102), UINT8_C(107), UINT8_C( 88), UINT8_C(120), UINT8_C( 99), UINT8_C(196), UINT8_C(224) }, { { UINT8_C(236), UINT8_C(191), UINT8_C(124), UINT8_C(236), UINT8_C( 51), UINT8_C(138), UINT8_C(122), UINT8_C( 44) }, { UINT8_C( 65), UINT8_C(121), UINT8_C( 80), UINT8_C( 86), UINT8_C( 0), UINT8_C( 24), UINT8_C( 73), UINT8_C(187) }, { UINT8_C( 97), UINT8_C(237), UINT8_C(159), UINT8_C(154), UINT8_C(124), UINT8_C(241), UINT8_C( 0), UINT8_C(222) }, { UINT8_C( 87), UINT8_C(108), UINT8_C( 55), UINT8_C(207), UINT8_C(207), UINT8_C(251), UINT8_C(175), UINT8_C(187) } }, { UINT8_C( 27), UINT8_C( 12), UINT8_C( 7), UINT8_C(238), UINT8_C( 22), UINT8_C( 2), UINT8_C( 27), UINT8_C( 23) }, { UINT8_C(207), UINT8_C( 0), UINT8_C( 44), UINT8_C( 88), UINT8_C( 0), UINT8_C(124), UINT8_C(207), UINT8_C(222) } }, { { UINT8_C(132), UINT8_C(245), UINT8_C(127), UINT8_C( 0), UINT8_C(230), UINT8_C(127), UINT8_C(223), UINT8_C( 61) }, { { UINT8_C(235), UINT8_C( 22), UINT8_C( 12), UINT8_C(187), UINT8_C( 17), UINT8_C(188), UINT8_C(118), UINT8_C(204) }, { UINT8_C(232), UINT8_C( 30), UINT8_C(187), UINT8_C(158), UINT8_C( 64), UINT8_C(214), UINT8_C(149), UINT8_C(219) }, { UINT8_C( 65), UINT8_C(226), UINT8_C(118), UINT8_C(197), UINT8_C(120), UINT8_C(204), UINT8_C(169), UINT8_C(252) }, { UINT8_C(194), UINT8_C( 40), UINT8_C(253), UINT8_C(168), UINT8_C(168), UINT8_C(220), UINT8_C(230), UINT8_C(147) } }, { UINT8_C( 18), UINT8_C( 18), UINT8_C( 14), UINT8_C( 3), UINT8_C( 14), UINT8_C( 5), UINT8_C( 16), UINT8_C( 22) }, { UINT8_C(118), UINT8_C(118), UINT8_C(149), UINT8_C(187), UINT8_C(149), UINT8_C(188), UINT8_C( 65), UINT8_C(169) } }, { { UINT8_C(171), UINT8_C(116), UINT8_C(103), UINT8_C( 36), UINT8_C( 64), UINT8_C( 17), UINT8_C( 32), UINT8_C( 2) }, { { UINT8_C( 57), UINT8_C( 29), UINT8_C(171), UINT8_C(225), UINT8_C(249), UINT8_C(145), UINT8_C(117), UINT8_C(235) }, { UINT8_C(131), UINT8_C(195), UINT8_C(239), UINT8_C( 50), UINT8_C(136), UINT8_C(191), UINT8_C(200), UINT8_C(107) }, { UINT8_C( 74), UINT8_C(253), UINT8_C(142), UINT8_C(171), UINT8_C(198), UINT8_C(140), UINT8_C( 77), UINT8_C(114) }, { UINT8_C( 0), UINT8_C(181), UINT8_C(150), UINT8_C( 65), UINT8_C(198), UINT8_C(182), UINT8_C( 67), UINT8_MAX } }, { UINT8_C( 20), UINT8_C( 14), UINT8_C( 1), UINT8_C( 13), UINT8_C(127), UINT8_C( 22), UINT8_C( 25), UINT8_C( 3) }, { UINT8_C(198), UINT8_C(200), UINT8_C( 29), UINT8_C(191), UINT8_C( 64), UINT8_C( 77), UINT8_C(181), UINT8_C(225) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8x4_t b; b.val[0] = simde_vld1_u8(test_vec[i].b[0]); b.val[1] = simde_vld1_u8(test_vec[i].b[1]); b.val[2] = simde_vld1_u8(test_vec[i].b[2]); b.val[3] = simde_vld1_u8(test_vec[i].b[3]); simde_uint8x8_t c = simde_vld1_u8(test_vec[i].c); simde_uint8x8_t r = simde_vtbx4_u8(a, b, c); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); simde_uint8x8x4_t b = simde_test_arm_neon_random_u8x8x4(); simde_uint8x8_private c_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); for (size_t j = 0 ; j < (sizeof(c_.values) / sizeof(c_.values[0])) ; j++) { if (probability(PROBABILITY)) { c_.values[j] &= 31; } } simde_uint8x8_t c = simde_uint8x8_from_private(c_); simde_uint8x8_t r = simde_vtbx4_u8(a, b, c); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vtbx1_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vtbx1_u8) #if !defined(SIMDE_BUG_INTEL_857088) SIMDE_TEST_FUNC_LIST_ENTRY(vtbx2_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vtbx2_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vtbx3_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vtbx3_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vtbx4_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vtbx4_u8) #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/test-neon-footer.h000066400000000000000000000022451400333146700207030ustar00rootroot00000000000000#if defined(SIMDE_TEST_BARE) int main(void) { int retval = EXIT_SUCCESS; fprintf(stdout, "1..%zu\n", (sizeof(test_suite_tests) / sizeof(test_suite_tests[0]))); for (size_t i = 0 ; i < (sizeof(test_suite_tests) / sizeof(test_suite_tests[0])) ; i++) { int res = test_suite_tests[i].func(); if (res != 0) { retval = EXIT_FAILURE; fprintf(stdout, "not ok %zu " HEDLEY_STRINGIFY(SIMDE_TEST_ARM_NEON_INSN) "/%s\n", i + 1, test_suite_tests[i].name); } else { fprintf(stdout, "ok %zu " HEDLEY_STRINGIFY(SIMDE_TEST_ARM_NEON_INSN) "/%s\n", i + 1, test_suite_tests[i].name); } } return retval; } #else #if defined(__cplusplus) static MunitSuite suite = { const_cast("/" HEDLEY_STRINGIFY(SIMDE_TEST_ARM_NEON_INSN)), test_suite_tests, NULL, 1, MUNIT_SUITE_OPTION_NONE }; #else static MunitSuite suite = { (char*) "/" HEDLEY_STRINGIFY(SIMDE_TEST_ARM_NEON_INSN), test_suite_tests, NULL, 1, MUNIT_SUITE_OPTION_NONE }; #endif HEDLEY_C_DECL MunitSuite* SIMDE_TEST_GENERATE_VARIANT_SYMBOL_CURRENT(HEDLEY_CONCAT(simde_test_arm_neon_get_suite_,SIMDE_TEST_ARM_NEON_INSN)) (void) { return &suite; } #endif simde-0.7.2/test/arm/neon/test-neon.h000066400000000000000000001045601400333146700174120ustar00rootroot00000000000000#if !defined(SIMDE_TEST_ARM_NEON_TEST_NEON_H) #define SIMDE_TEST_ARM_NEON_TEST_NEON_H #include "../../test.h" #include "../../../simde/arm/neon/ld1.h" #include "../../../simde/arm/neon/st1.h" #define SIMDE_TEST_ARM_NEON_GENERATE_INT_TYPE_FUNCS_(NT, ET, element_count, modifier, symbol_identifier, neon_identifier) \ static simde_##NT \ simde_test_arm_neon_random_##symbol_identifier##x##element_count(void) { \ simde_##NT v; \ simde_test_codegen_random_memory(sizeof(v), HEDLEY_REINTERPRET_CAST(uint8_t*, &v)); \ return v; \ } \ \ static void \ simde_test_arm_neon_write_##symbol_identifier##x##element_count(int indent, simde_##NT value, SimdeTestVecPos pos) { \ ET value_[sizeof(value) / sizeof(ET)]; \ \ simde_vst1##modifier##_##neon_identifier(value_, value); \ \ simde_test_codegen_write_v##symbol_identifier(indent, sizeof(value) / sizeof(ET), value_, pos); \ \ } \ \ static int \ simde_test_arm_neon_assert_equal_##symbol_identifier##x##element_count##_(simde_##NT a, simde_##NT b, \ const char* filename, int line, const char* astr, const char* bstr) { \ ET a_[sizeof(a) / sizeof(ET)], b_[sizeof(b) / sizeof(ET)]; \ \ simde_vst1##modifier##_##neon_identifier(a_, a); \ simde_vst1##modifier##_##neon_identifier(b_, b); \ \ return simde_assert_equal_v##symbol_identifier##_(sizeof(a_) / sizeof(a_[0]), a_, b_, filename, line, astr, bstr); \ } #define SIMDE_TEST_ARM_NEON_GENERATE_FLOAT_TYPE_FUNCS_(NT, ET, SET, element_count, modifier, symbol_identifier) \ static simde_##NT \ simde_test_arm_neon_random_##symbol_identifier##x##element_count(ET min, ET max) { \ SET v[sizeof(simde_##NT) / sizeof(ET)]; \ simde_test_codegen_random_v##symbol_identifier(sizeof(v) / sizeof(v[0]), v, HEDLEY_STATIC_CAST(SET, min), HEDLEY_STATIC_CAST(SET, max)); \ return simde_vld1##modifier##_##symbol_identifier(HEDLEY_REINTERPRET_CAST(ET*, v)); \ } \ \ static void \ simde_test_arm_neon_write_##symbol_identifier##x##element_count(int indent, simde_##NT value, SimdeTestVecPos pos) { \ ET value_[sizeof(value) / sizeof(ET)]; \ simde_vst1##modifier##_##symbol_identifier(value_, value); \ simde_test_codegen_write_v##symbol_identifier(indent, sizeof(value_) / sizeof(value_[0]), value_, pos); \ } \ \ static int \ simde_test_arm_neon_assert_equal_##symbol_identifier##x##element_count##_(simde_##NT a, simde_##NT b, ET slop, \ const char* filename, int line, const char* astr, const char* bstr) { \ SET a_[sizeof(a) / sizeof(ET)], b_[sizeof(b) / sizeof(ET)]; \ \ simde_vst1##modifier##_##symbol_identifier(a_, a); \ simde_vst1##modifier##_##symbol_identifier(b_, b); \ \ return simde_assert_equal_v##symbol_identifier##_(sizeof(a_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a_), HEDLEY_REINTERPRET_CAST(SET*, b_), slop, filename, line, astr, bstr); \ } \ \ static void \ simde_test_arm_neon_random_##symbol_identifier##x##element_count##_full( \ size_t test_sets, size_t vectors_per_set, \ ET values[HEDLEY_ARRAY_PARAM(test_sets * vectors_per_set * (sizeof(simde_##NT) / sizeof(ET)))], \ ET min, ET max, SimdeTestVecFloatType type) { \ simde_test_codegen_random_v##symbol_identifier##_full(test_sets, vectors_per_set, sizeof(simde_##NT) / sizeof(ET), values, HEDLEY_STATIC_CAST(SET, min), HEDLEY_STATIC_CAST(SET, max), type); \ } \ \ static simde_##NT \ simde_test_arm_neon_random_extract_##symbol_identifier##x##element_count(size_t set_num, size_t vectors_per_set, size_t vector_num, ET* values) { \ const size_t elements_per_vector = sizeof(simde_##NT) / sizeof(ET); \ const size_t elements_per_set = elements_per_vector * vectors_per_set; \ const size_t pos = (elements_per_set * set_num) + (elements_per_vector * vector_num); \ return simde_vld1##modifier##_##symbol_identifier(&(values[pos])); \ } HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ SIMDE_TEST_ARM_NEON_GENERATE_INT_TYPE_FUNCS_( int8x8_t, int8_t, 8, , i8, s8) SIMDE_TEST_ARM_NEON_GENERATE_INT_TYPE_FUNCS_( int16x4_t, int16_t, 4, , i16, s16) SIMDE_TEST_ARM_NEON_GENERATE_INT_TYPE_FUNCS_( int32x2_t, int32_t, 2, , i32, s32) SIMDE_TEST_ARM_NEON_GENERATE_INT_TYPE_FUNCS_( int64x1_t, int64_t, 1, , i64, s64) SIMDE_TEST_ARM_NEON_GENERATE_INT_TYPE_FUNCS_( uint8x8_t, uint8_t, 8, , u8, u8) SIMDE_TEST_ARM_NEON_GENERATE_INT_TYPE_FUNCS_( uint16x4_t, uint16_t, 4, , u16, u16) SIMDE_TEST_ARM_NEON_GENERATE_INT_TYPE_FUNCS_( uint32x2_t, uint32_t, 2, , u32, u32) SIMDE_TEST_ARM_NEON_GENERATE_INT_TYPE_FUNCS_( uint64x1_t, uint64_t, 1, , u64, u64) SIMDE_TEST_ARM_NEON_GENERATE_FLOAT_TYPE_FUNCS_(float32x2_t, simde_float32_t, simde_float32, 2, , f32) SIMDE_TEST_ARM_NEON_GENERATE_FLOAT_TYPE_FUNCS_(float64x1_t, simde_float64_t, simde_float64, 1, , f64) SIMDE_TEST_ARM_NEON_GENERATE_INT_TYPE_FUNCS_( int8x16_t, int8_t, 16, q, i8, s8) SIMDE_TEST_ARM_NEON_GENERATE_INT_TYPE_FUNCS_( int16x8_t, int16_t, 8, q, i16, s16) SIMDE_TEST_ARM_NEON_GENERATE_INT_TYPE_FUNCS_( int32x4_t, int32_t, 4, q, i32, s32) SIMDE_TEST_ARM_NEON_GENERATE_INT_TYPE_FUNCS_( int64x2_t, int64_t, 2, q, i64, s64) SIMDE_TEST_ARM_NEON_GENERATE_INT_TYPE_FUNCS_( uint8x16_t, uint8_t, 16, q, u8, u8) SIMDE_TEST_ARM_NEON_GENERATE_INT_TYPE_FUNCS_( uint16x8_t, uint16_t, 8, q, u16, u16) SIMDE_TEST_ARM_NEON_GENERATE_INT_TYPE_FUNCS_( uint32x4_t, uint32_t, 4, q, u32, u32) SIMDE_TEST_ARM_NEON_GENERATE_INT_TYPE_FUNCS_( uint64x2_t, uint64_t, 2, q, u64, u64) SIMDE_TEST_ARM_NEON_GENERATE_FLOAT_TYPE_FUNCS_(float32x4_t, simde_float32_t, simde_float32, 4, q, f32) SIMDE_TEST_ARM_NEON_GENERATE_FLOAT_TYPE_FUNCS_(float64x2_t, simde_float64_t, simde_float64, 2, q, f64) HEDLEY_DIAGNOSTIC_POP #define SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_INT_TYPE_FUNCS_(NT, ET, element_count, modifier, symbol_identifier, neon_identifier) \ static simde_##NT \ simde_test_arm_neon_random_##symbol_identifier##x##element_count##x2(void) { \ simde_##NT v; \ simde_test_codegen_random_memory(sizeof(v), HEDLEY_REINTERPRET_CAST(uint8_t*, &v)); \ return v; \ } \ \ static void \ simde_test_arm_neon_write_##symbol_identifier##x##element_count##x2(int indent, simde_##NT value, SimdeTestVecPos pos) { \ if (pos == SIMDE_TEST_VEC_POS_FIRST) { \ simde_test_codegen_write_indent(indent); \ fputs("{\n", stdout); \ } \ ET value0_[sizeof(value) / sizeof(ET) / 2]; \ ET value1_[sizeof(value) / sizeof(ET) / 2]; \ \ simde_vst1##modifier##_##neon_identifier(value0_, value.val[0]); \ simde_vst1##modifier##_##neon_identifier(value1_, value.val[1]); \ \ simde_test_codegen_write_v##symbol_identifier(indent+2, sizeof(value0_) / sizeof(ET), value0_, SIMDE_TEST_VEC_POS_FIRST); \ simde_test_codegen_write_v##symbol_identifier(indent+2, sizeof(value1_) / sizeof(ET), value1_, SIMDE_TEST_VEC_POS_LAST); \ if (pos == SIMDE_TEST_VEC_POS_LAST) { \ simde_test_codegen_write_indent(indent); \ fputs("},\n", stdout); \ } \ \ } \ \ static int \ simde_test_arm_neon_assert_equal_##symbol_identifier##x##element_count##x2_(simde_##NT a, simde_##NT b, \ const char* filename, int line, const char* astr, const char* bstr) { \ ET a0_[sizeof(a.val[0]) / sizeof(ET)], b0_[sizeof(b.val[0]) / sizeof(ET)]; \ ET a1_[sizeof(a.val[1]) / sizeof(ET)], b1_[sizeof(b.val[1]) / sizeof(ET)]; \ \ simde_vst1##modifier##_##neon_identifier(a0_, a.val[0]); \ simde_vst1##modifier##_##neon_identifier(b0_, b.val[0]); \ simde_vst1##modifier##_##neon_identifier(a1_, a.val[1]); \ simde_vst1##modifier##_##neon_identifier(b1_, b.val[1]); \ \ return simde_assert_equal_v##symbol_identifier##_(sizeof(a0_) / sizeof(a0_[0]), a0_, b0_, filename, line, astr, bstr) \ && simde_assert_equal_v##symbol_identifier##_(sizeof(a1_) / sizeof(a1_[0]), a1_, b1_, filename, line, astr, bstr); \ } #define SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_FLOAT_TYPE_FUNCS_(NT, ET, SET, element_count, modifier, symbol_identifier) \ static simde_##NT \ simde_test_arm_neon_random_##symbol_identifier##x##element_count##x2(ET min, ET max) { \ SET v0[sizeof(simde_##NT) / sizeof(ET) / 2]; \ SET v1[sizeof(simde_##NT) / sizeof(ET) / 2]; \ simde_test_codegen_random_v##symbol_identifier(sizeof(v0) / sizeof(v0[0]), v0, HEDLEY_STATIC_CAST(SET, min), HEDLEY_STATIC_CAST(SET, max)); \ simde_test_codegen_random_v##symbol_identifier(sizeof(v1) / sizeof(v1[0]), v1, HEDLEY_STATIC_CAST(SET, min), HEDLEY_STATIC_CAST(SET, max)); \ simde_##NT r; \ r.val[0] = simde_vld1##modifier##_##symbol_identifier(HEDLEY_REINTERPRET_CAST(ET*, v0)); \ r.val[1] = simde_vld1##modifier##_##symbol_identifier(HEDLEY_REINTERPRET_CAST(ET*, v1)); \ return r; \ } \ \ static void \ simde_test_arm_neon_write_##symbol_identifier##x##element_count##x2(int indent, simde_##NT value, SimdeTestVecPos pos) { \ if (pos == SIMDE_TEST_VEC_POS_FIRST) { \ simde_test_codegen_write_indent(indent); \ fputs("{\n", stdout); \ } \ \ ET value0_[sizeof(value) / sizeof(ET) / 2]; \ ET value1_[sizeof(value) / sizeof(ET) / 2]; \ simde_vst1##modifier##_##symbol_identifier(value0_, value.val[0]); \ simde_vst1##modifier##_##symbol_identifier(value1_, value.val[1]); \ simde_test_codegen_write_v##symbol_identifier(indent + 2, sizeof(value0_) / sizeof(value0_[0]), value0_, SIMDE_TEST_VEC_POS_FIRST); \ simde_test_codegen_write_v##symbol_identifier(indent + 2, sizeof(value1_) / sizeof(value1_[0]), value1_, SIMDE_TEST_VEC_POS_LAST); \ if (pos == SIMDE_TEST_VEC_POS_LAST) { \ simde_test_codegen_write_indent(indent); \ fputs("},\n", stdout); \ } \ } \ \ static int \ simde_test_arm_neon_assert_equal_##symbol_identifier##x##element_count##x2_(simde_##NT a, simde_##NT b, ET slop, \ const char* filename, int line, const char* astr, const char* bstr) { \ SET a0_[sizeof(a) / sizeof(ET)], b0_[sizeof(b) / sizeof(ET)]; \ SET a1_[sizeof(a) / sizeof(ET)], b1_[sizeof(b) / sizeof(ET)]; \ \ simde_vst1##modifier##_##symbol_identifier(a0_, a.val[0]); \ simde_vst1##modifier##_##symbol_identifier(b0_, b.val[0]); \ simde_vst1##modifier##_##symbol_identifier(a1_, a.val[1]); \ simde_vst1##modifier##_##symbol_identifier(b1_, b.val[1]); \ \ return simde_assert_equal_v##symbol_identifier##_(sizeof(a0_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a0_), HEDLEY_REINTERPRET_CAST(SET*, b0_), slop, filename, line, astr, bstr) && \ simde_assert_equal_v##symbol_identifier##_(sizeof(a1_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a1_), HEDLEY_REINTERPRET_CAST(SET*, b1_), slop, filename, line, astr, bstr); \ } #if !defined(SIMDE_BUG_INTEL_857088) SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_INT_TYPE_FUNCS_( int8x8x2_t, int8_t, 8, , i8, s8) SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_INT_TYPE_FUNCS_( int16x4x2_t, int16_t, 4, , i16, s16) SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_INT_TYPE_FUNCS_( int32x2x2_t, int32_t, 2, , i32, s32) SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_INT_TYPE_FUNCS_( int64x1x2_t, int64_t, 1, , i64, s64) SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_INT_TYPE_FUNCS_( uint8x8x2_t, uint8_t, 8, , u8, u8) SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_INT_TYPE_FUNCS_( uint16x4x2_t, uint16_t, 4, , u16, u16) SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_INT_TYPE_FUNCS_( uint32x2x2_t, uint32_t, 2, , u32, u32) SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_INT_TYPE_FUNCS_( uint64x1x2_t, uint64_t, 1, , u64, u64) SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_FLOAT_TYPE_FUNCS_(float32x2x2_t, simde_float32_t, simde_float32, 2, , f32) SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_FLOAT_TYPE_FUNCS_(float64x1x2_t, simde_float64_t, simde_float64, 1, , f64) SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_INT_TYPE_FUNCS_( int8x16x2_t, int8_t, 16, q, i8, s8) SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_INT_TYPE_FUNCS_( int16x8x2_t, int16_t, 8, q, i16, s16) SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_INT_TYPE_FUNCS_( int32x4x2_t, int32_t, 4, q, i32, s32) SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_INT_TYPE_FUNCS_( int64x2x2_t, int64_t, 2, q, i64, s64) SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_INT_TYPE_FUNCS_( uint8x16x2_t, uint8_t, 16, q, u8, u8) SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_INT_TYPE_FUNCS_( uint16x8x2_t, uint16_t, 8, q, u16, u16) SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_INT_TYPE_FUNCS_( uint32x4x2_t, uint32_t, 4, q, u32, u32) SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_INT_TYPE_FUNCS_( uint64x2x2_t, uint64_t, 2, q, u64, u64) SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_FLOAT_TYPE_FUNCS_(float32x4x2_t, simde_float32_t, simde_float32, 4, q, f32) SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_FLOAT_TYPE_FUNCS_(float64x2x2_t, simde_float64_t, simde_float64, 2, q, f64) #endif #define SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_INT_TYPE_FUNCS_(NT, ET, element_count, modifier, symbol_identifier, neon_identifier) \ static simde_##NT \ simde_test_arm_neon_random_##symbol_identifier##x##element_count##x3(void) { \ simde_##NT v; \ simde_test_codegen_random_memory(sizeof(v), HEDLEY_REINTERPRET_CAST(uint8_t*, &v)); \ return v; \ } \ \ static void \ simde_test_arm_neon_write_##symbol_identifier##x##element_count##x3(int indent, simde_##NT value, SimdeTestVecPos pos) { \ if (pos == SIMDE_TEST_VEC_POS_FIRST) { \ simde_test_codegen_write_indent(indent); \ fputs("{\n", stdout); \ } \ ET value0_[sizeof(value) / sizeof(ET) / 3]; \ ET value1_[sizeof(value) / sizeof(ET) / 3]; \ ET value2_[sizeof(value) / sizeof(ET) / 3]; \ \ simde_vst1##modifier##_##neon_identifier(value0_, value.val[0]); \ simde_vst1##modifier##_##neon_identifier(value1_, value.val[1]); \ simde_vst1##modifier##_##neon_identifier(value2_, value.val[2]); \ \ simde_test_codegen_write_v##symbol_identifier(indent+2, sizeof(value0_) / sizeof(ET), value0_, SIMDE_TEST_VEC_POS_FIRST); \ simde_test_codegen_write_v##symbol_identifier(indent+2, sizeof(value1_) / sizeof(ET), value1_, SIMDE_TEST_VEC_POS_MIDDLE); \ simde_test_codegen_write_v##symbol_identifier(indent+2, sizeof(value2_) / sizeof(ET), value2_, SIMDE_TEST_VEC_POS_LAST); \ if (pos == SIMDE_TEST_VEC_POS_LAST) { \ simde_test_codegen_write_indent(indent); \ fputs("},\n", stdout); \ } \ \ } \ \ static int \ simde_test_arm_neon_assert_equal_##symbol_identifier##x##element_count##x3_(simde_##NT a, simde_##NT b, \ const char* filename, int line, const char* astr, const char* bstr) { \ ET a0_[sizeof(a.val[0]) / sizeof(ET)], b0_[sizeof(b.val[0]) / sizeof(ET)]; \ ET a1_[sizeof(a.val[1]) / sizeof(ET)], b1_[sizeof(b.val[1]) / sizeof(ET)]; \ ET a2_[sizeof(a.val[2]) / sizeof(ET)], b2_[sizeof(b.val[2]) / sizeof(ET)]; \ \ simde_vst1##modifier##_##neon_identifier(a0_, a.val[0]); \ simde_vst1##modifier##_##neon_identifier(b0_, b.val[0]); \ simde_vst1##modifier##_##neon_identifier(a1_, a.val[1]); \ simde_vst1##modifier##_##neon_identifier(b1_, b.val[1]); \ simde_vst1##modifier##_##neon_identifier(a2_, a.val[2]); \ simde_vst1##modifier##_##neon_identifier(b2_, b.val[2]); \ \ return simde_assert_equal_v##symbol_identifier##_(sizeof(a0_) / sizeof(a0_[0]), a0_, b0_, filename, line, astr, bstr) \ && simde_assert_equal_v##symbol_identifier##_(sizeof(a1_) / sizeof(a1_[0]), a1_, b1_, filename, line, astr, bstr) \ && simde_assert_equal_v##symbol_identifier##_(sizeof(a2_) / sizeof(a2_[0]), a2_, b2_, filename, line, astr, bstr); \ } #define SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_FLOAT_TYPE_FUNCS_(NT, ET, SET, element_count, modifier, symbol_identifier) \ static simde_##NT \ simde_test_arm_neon_random_##symbol_identifier##x##element_count##x3(ET min, ET max) { \ SET v0[sizeof(simde_##NT) / sizeof(ET) / 3]; \ SET v1[sizeof(simde_##NT) / sizeof(ET) / 3]; \ SET v2[sizeof(simde_##NT) / sizeof(ET) / 3]; \ simde_test_codegen_random_v##symbol_identifier(sizeof(v0) / sizeof(v0[0]), v0, HEDLEY_STATIC_CAST(SET, min), HEDLEY_STATIC_CAST(SET, max)); \ simde_test_codegen_random_v##symbol_identifier(sizeof(v1) / sizeof(v1[0]), v1, HEDLEY_STATIC_CAST(SET, min), HEDLEY_STATIC_CAST(SET, max)); \ simde_test_codegen_random_v##symbol_identifier(sizeof(v2) / sizeof(v2[0]), v2, HEDLEY_STATIC_CAST(SET, min), HEDLEY_STATIC_CAST(SET, max)); \ simde_##NT r; \ r.val[0] = simde_vld1##modifier##_##symbol_identifier(HEDLEY_REINTERPRET_CAST(ET*, v0)); \ r.val[1] = simde_vld1##modifier##_##symbol_identifier(HEDLEY_REINTERPRET_CAST(ET*, v1)); \ r.val[2] = simde_vld1##modifier##_##symbol_identifier(HEDLEY_REINTERPRET_CAST(ET*, v2)); \ return r; \ } \ \ static void \ simde_test_arm_neon_write_##symbol_identifier##x##element_count##x3(int indent, simde_##NT value, SimdeTestVecPos pos) { \ if (pos == SIMDE_TEST_VEC_POS_FIRST) { \ simde_test_codegen_write_indent(indent); \ fputs("{\n", stdout); \ } \ \ ET value0_[sizeof(value) / sizeof(ET) / 3]; \ ET value1_[sizeof(value) / sizeof(ET) / 3]; \ ET value2_[sizeof(value) / sizeof(ET) / 3]; \ simde_vst1##modifier##_##symbol_identifier(value0_, value.val[0]); \ simde_vst1##modifier##_##symbol_identifier(value1_, value.val[1]); \ simde_vst1##modifier##_##symbol_identifier(value2_, value.val[2]); \ simde_test_codegen_write_v##symbol_identifier(indent + 2, sizeof(value0_) / sizeof(value0_[0]), value0_, SIMDE_TEST_VEC_POS_FIRST); \ simde_test_codegen_write_v##symbol_identifier(indent + 2, sizeof(value1_) / sizeof(value1_[0]), value1_, SIMDE_TEST_VEC_POS_MIDDLE); \ simde_test_codegen_write_v##symbol_identifier(indent + 2, sizeof(value2_) / sizeof(value2_[0]), value2_, SIMDE_TEST_VEC_POS_LAST); \ if (pos == SIMDE_TEST_VEC_POS_LAST) { \ simde_test_codegen_write_indent(indent); \ fputs("},\n", stdout); \ } \ } \ \ static int \ simde_test_arm_neon_assert_equal_##symbol_identifier##x##element_count##x3_(simde_##NT a, simde_##NT b, ET slop, \ const char* filename, int line, const char* astr, const char* bstr) { \ SET a0_[sizeof(a) / sizeof(ET)], b0_[sizeof(b) / sizeof(ET)]; \ SET a1_[sizeof(a) / sizeof(ET)], b1_[sizeof(b) / sizeof(ET)]; \ SET a2_[sizeof(a) / sizeof(ET)], b2_[sizeof(b) / sizeof(ET)]; \ \ simde_vst1##modifier##_##symbol_identifier(a0_, a.val[0]); \ simde_vst1##modifier##_##symbol_identifier(b0_, b.val[0]); \ simde_vst1##modifier##_##symbol_identifier(a1_, a.val[1]); \ simde_vst1##modifier##_##symbol_identifier(b1_, b.val[1]); \ simde_vst1##modifier##_##symbol_identifier(a2_, a.val[2]); \ simde_vst1##modifier##_##symbol_identifier(b2_, b.val[2]); \ \ return simde_assert_equal_v##symbol_identifier##_(sizeof(a0_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a0_), HEDLEY_REINTERPRET_CAST(SET*, b0_), slop, filename, line, astr, bstr) && \ simde_assert_equal_v##symbol_identifier##_(sizeof(a1_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a1_), HEDLEY_REINTERPRET_CAST(SET*, b1_), slop, filename, line, astr, bstr) && \ simde_assert_equal_v##symbol_identifier##_(sizeof(a2_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a2_), HEDLEY_REINTERPRET_CAST(SET*, b2_), slop, filename, line, astr, bstr); \ } #if !defined(SIMDE_BUG_INTEL_857088) SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_INT_TYPE_FUNCS_( int8x8x3_t, int8_t, 8, , i8, s8) SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_INT_TYPE_FUNCS_( int16x4x3_t, int16_t, 4, , i16, s16) SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_INT_TYPE_FUNCS_( int32x2x3_t, int32_t, 2, , i32, s32) SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_INT_TYPE_FUNCS_( int64x1x3_t, int64_t, 1, , i64, s64) SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_INT_TYPE_FUNCS_( uint8x8x3_t, uint8_t, 8, , u8, u8) SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_INT_TYPE_FUNCS_( uint16x4x3_t, uint16_t, 4, , u16, u16) SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_INT_TYPE_FUNCS_( uint32x2x3_t, uint32_t, 2, , u32, u32) SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_INT_TYPE_FUNCS_( uint64x1x3_t, uint64_t, 1, , u64, u64) SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_FLOAT_TYPE_FUNCS_(float32x2x3_t, simde_float32_t, simde_float32, 2, , f32) SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_FLOAT_TYPE_FUNCS_(float64x1x3_t, simde_float64_t, simde_float64, 1, , f64) SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_INT_TYPE_FUNCS_( int8x16x3_t, int8_t, 16, q, i8, s8) SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_INT_TYPE_FUNCS_( int16x8x3_t, int16_t, 8, q, i16, s16) SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_INT_TYPE_FUNCS_( int32x4x3_t, int32_t, 4, q, i32, s32) SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_INT_TYPE_FUNCS_( int64x2x3_t, int64_t, 2, q, i64, s64) SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_INT_TYPE_FUNCS_( uint8x16x3_t, uint8_t, 16, q, u8, u8) SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_INT_TYPE_FUNCS_( uint16x8x3_t, uint16_t, 8, q, u16, u16) SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_INT_TYPE_FUNCS_( uint32x4x3_t, uint32_t, 4, q, u32, u32) SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_INT_TYPE_FUNCS_( uint64x2x3_t, uint64_t, 2, q, u64, u64) SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_FLOAT_TYPE_FUNCS_(float32x4x3_t, simde_float32_t, simde_float32, 4, q, f32) SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_FLOAT_TYPE_FUNCS_(float64x2x3_t, simde_float64_t, simde_float64, 2, q, f64) #endif #define SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_INT_TYPE_FUNCS_(NT, ET, element_count, modifier, symbol_identifier, neon_identifier) \ static simde_##NT \ simde_test_arm_neon_random_##symbol_identifier##x##element_count##x4(void) { \ simde_##NT v; \ simde_test_codegen_random_memory(sizeof(v), HEDLEY_REINTERPRET_CAST(uint8_t*, &v)); \ return v; \ } \ \ static void \ simde_test_arm_neon_write_##symbol_identifier##x##element_count##x4(int indent, simde_##NT value, SimdeTestVecPos pos) { \ if (pos == SIMDE_TEST_VEC_POS_FIRST) { \ simde_test_codegen_write_indent(indent); \ fputs("{\n", stdout); \ } \ ET value0_[sizeof(value) / sizeof(ET) / 4]; \ ET value1_[sizeof(value) / sizeof(ET) / 4]; \ ET value2_[sizeof(value) / sizeof(ET) / 4]; \ ET value3_[sizeof(value) / sizeof(ET) / 4]; \ \ simde_vst1##modifier##_##neon_identifier(value0_, value.val[0]); \ simde_vst1##modifier##_##neon_identifier(value1_, value.val[1]); \ simde_vst1##modifier##_##neon_identifier(value2_, value.val[2]); \ simde_vst1##modifier##_##neon_identifier(value3_, value.val[3]); \ \ simde_test_codegen_write_v##symbol_identifier(indent+2, sizeof(value0_) / sizeof(ET), value0_, SIMDE_TEST_VEC_POS_FIRST); \ simde_test_codegen_write_v##symbol_identifier(indent+2, sizeof(value1_) / sizeof(ET), value1_, SIMDE_TEST_VEC_POS_MIDDLE); \ simde_test_codegen_write_v##symbol_identifier(indent+2, sizeof(value2_) / sizeof(ET), value2_, SIMDE_TEST_VEC_POS_MIDDLE); \ simde_test_codegen_write_v##symbol_identifier(indent+2, sizeof(value3_) / sizeof(ET), value3_, SIMDE_TEST_VEC_POS_LAST); \ if (pos == SIMDE_TEST_VEC_POS_LAST) { \ simde_test_codegen_write_indent(indent); \ fputs("},\n", stdout); \ } \ \ } \ \ static int \ simde_test_arm_neon_assert_equal_##symbol_identifier##x##element_count##x4_(simde_##NT a, simde_##NT b, \ const char* filename, int line, const char* astr, const char* bstr) { \ ET a0_[sizeof(a.val[0]) / sizeof(ET)], b0_[sizeof(b.val[0]) / sizeof(ET)]; \ ET a1_[sizeof(a.val[1]) / sizeof(ET)], b1_[sizeof(b.val[1]) / sizeof(ET)]; \ ET a2_[sizeof(a.val[2]) / sizeof(ET)], b2_[sizeof(b.val[2]) / sizeof(ET)]; \ ET a3_[sizeof(a.val[3]) / sizeof(ET)], b3_[sizeof(b.val[3]) / sizeof(ET)]; \ \ simde_vst1##modifier##_##neon_identifier(a0_, a.val[0]); \ simde_vst1##modifier##_##neon_identifier(b0_, b.val[0]); \ simde_vst1##modifier##_##neon_identifier(a1_, a.val[1]); \ simde_vst1##modifier##_##neon_identifier(b1_, b.val[1]); \ simde_vst1##modifier##_##neon_identifier(a2_, a.val[2]); \ simde_vst1##modifier##_##neon_identifier(b2_, b.val[2]); \ simde_vst1##modifier##_##neon_identifier(a3_, a.val[3]); \ simde_vst1##modifier##_##neon_identifier(b3_, b.val[3]); \ \ return simde_assert_equal_v##symbol_identifier##_(sizeof(a0_) / sizeof(a0_[0]), a0_, b0_, filename, line, astr, bstr) \ && simde_assert_equal_v##symbol_identifier##_(sizeof(a1_) / sizeof(a1_[0]), a1_, b1_, filename, line, astr, bstr) \ && simde_assert_equal_v##symbol_identifier##_(sizeof(a2_) / sizeof(a2_[0]), a2_, b2_, filename, line, astr, bstr) \ && simde_assert_equal_v##symbol_identifier##_(sizeof(a3_) / sizeof(a3_[0]), a3_, b3_, filename, line, astr, bstr); \ } #define SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_FLOAT_TYPE_FUNCS_(NT, ET, SET, element_count, modifier, symbol_identifier) \ static simde_##NT \ simde_test_arm_neon_random_##symbol_identifier##x##element_count##x4(ET min, ET max) { \ SET v0[sizeof(simde_##NT) / sizeof(ET) / 4]; \ SET v1[sizeof(simde_##NT) / sizeof(ET) / 4]; \ SET v2[sizeof(simde_##NT) / sizeof(ET) / 4]; \ SET v3[sizeof(simde_##NT) / sizeof(ET) / 4]; \ simde_test_codegen_random_v##symbol_identifier(sizeof(v0) / sizeof(v0[0]), v0, HEDLEY_STATIC_CAST(SET, min), HEDLEY_STATIC_CAST(SET, max)); \ simde_test_codegen_random_v##symbol_identifier(sizeof(v1) / sizeof(v1[0]), v1, HEDLEY_STATIC_CAST(SET, min), HEDLEY_STATIC_CAST(SET, max)); \ simde_test_codegen_random_v##symbol_identifier(sizeof(v2) / sizeof(v2[0]), v2, HEDLEY_STATIC_CAST(SET, min), HEDLEY_STATIC_CAST(SET, max)); \ simde_test_codegen_random_v##symbol_identifier(sizeof(v3) / sizeof(v3[0]), v3, HEDLEY_STATIC_CAST(SET, min), HEDLEY_STATIC_CAST(SET, max)); \ simde_##NT r; \ r.val[0] = simde_vld1##modifier##_##symbol_identifier(HEDLEY_REINTERPRET_CAST(ET*, v0)); \ r.val[1] = simde_vld1##modifier##_##symbol_identifier(HEDLEY_REINTERPRET_CAST(ET*, v1)); \ r.val[2] = simde_vld1##modifier##_##symbol_identifier(HEDLEY_REINTERPRET_CAST(ET*, v2)); \ r.val[3] = simde_vld1##modifier##_##symbol_identifier(HEDLEY_REINTERPRET_CAST(ET*, v3)); \ return r; \ } \ \ static void \ simde_test_arm_neon_write_##symbol_identifier##x##element_count##x4(int indent, simde_##NT value, SimdeTestVecPos pos) { \ if (pos == SIMDE_TEST_VEC_POS_FIRST) { \ simde_test_codegen_write_indent(indent); \ fputs("{\n", stdout); \ } \ \ ET value0_[sizeof(value) / sizeof(ET) / 4]; \ ET value1_[sizeof(value) / sizeof(ET) / 4]; \ ET value2_[sizeof(value) / sizeof(ET) / 4]; \ ET value3_[sizeof(value) / sizeof(ET) / 4]; \ simde_vst1##modifier##_##symbol_identifier(value0_, value.val[0]); \ simde_vst1##modifier##_##symbol_identifier(value1_, value.val[1]); \ simde_vst1##modifier##_##symbol_identifier(value2_, value.val[2]); \ simde_vst1##modifier##_##symbol_identifier(value3_, value.val[3]); \ simde_test_codegen_write_v##symbol_identifier(indent + 2, sizeof(value0_) / sizeof(value0_[0]), value0_, SIMDE_TEST_VEC_POS_FIRST); \ simde_test_codegen_write_v##symbol_identifier(indent + 2, sizeof(value1_) / sizeof(value1_[0]), value1_, SIMDE_TEST_VEC_POS_MIDDLE); \ simde_test_codegen_write_v##symbol_identifier(indent + 2, sizeof(value2_) / sizeof(value2_[0]), value2_, SIMDE_TEST_VEC_POS_MIDDLE); \ simde_test_codegen_write_v##symbol_identifier(indent + 2, sizeof(value3_) / sizeof(value3_[0]), value3_, SIMDE_TEST_VEC_POS_LAST); \ if (pos == SIMDE_TEST_VEC_POS_LAST) { \ simde_test_codegen_write_indent(indent); \ fputs("},\n", stdout); \ } \ } \ \ static int \ simde_test_arm_neon_assert_equal_##symbol_identifier##x##element_count##x4_(simde_##NT a, simde_##NT b, ET slop, \ const char* filename, int line, const char* astr, const char* bstr) { \ SET a0_[sizeof(a) / sizeof(ET)], b0_[sizeof(b) / sizeof(ET)]; \ SET a1_[sizeof(a) / sizeof(ET)], b1_[sizeof(b) / sizeof(ET)]; \ SET a2_[sizeof(a) / sizeof(ET)], b2_[sizeof(b) / sizeof(ET)]; \ SET a3_[sizeof(a) / sizeof(ET)], b3_[sizeof(b) / sizeof(ET)]; \ \ simde_vst1##modifier##_##symbol_identifier(a0_, a.val[0]); \ simde_vst1##modifier##_##symbol_identifier(b0_, b.val[0]); \ simde_vst1##modifier##_##symbol_identifier(a1_, a.val[1]); \ simde_vst1##modifier##_##symbol_identifier(b1_, b.val[1]); \ simde_vst1##modifier##_##symbol_identifier(a2_, a.val[2]); \ simde_vst1##modifier##_##symbol_identifier(b2_, b.val[2]); \ simde_vst1##modifier##_##symbol_identifier(a3_, a.val[3]); \ simde_vst1##modifier##_##symbol_identifier(b3_, b.val[3]); \ \ return simde_assert_equal_v##symbol_identifier##_(sizeof(a0_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a0_), HEDLEY_REINTERPRET_CAST(SET*, b0_), slop, filename, line, astr, bstr) && \ simde_assert_equal_v##symbol_identifier##_(sizeof(a1_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a1_), HEDLEY_REINTERPRET_CAST(SET*, b1_), slop, filename, line, astr, bstr) && \ simde_assert_equal_v##symbol_identifier##_(sizeof(a2_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a2_), HEDLEY_REINTERPRET_CAST(SET*, b2_), slop, filename, line, astr, bstr) && \ simde_assert_equal_v##symbol_identifier##_(sizeof(a3_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a3_), HEDLEY_REINTERPRET_CAST(SET*, b3_), slop, filename, line, astr, bstr); \ } #if !defined(SIMDE_BUG_INTEL_857088) SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_INT_TYPE_FUNCS_( int8x8x4_t, int8_t, 8, , i8, s8) SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_INT_TYPE_FUNCS_( int16x4x4_t, int16_t, 4, , i16, s16) SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_INT_TYPE_FUNCS_( int32x2x4_t, int32_t, 2, , i32, s32) SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_INT_TYPE_FUNCS_( int64x1x4_t, int64_t, 1, , i64, s64) SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_INT_TYPE_FUNCS_( uint8x8x4_t, uint8_t, 8, , u8, u8) SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_INT_TYPE_FUNCS_( uint16x4x4_t, uint16_t, 4, , u16, u16) SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_INT_TYPE_FUNCS_( uint32x2x4_t, uint32_t, 2, , u32, u32) SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_INT_TYPE_FUNCS_( uint64x1x4_t, uint64_t, 1, , u64, u64) SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_FLOAT_TYPE_FUNCS_(float32x2x4_t, simde_float32_t, simde_float32, 2, , f32) SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_FLOAT_TYPE_FUNCS_(float64x1x4_t, simde_float64_t, simde_float64, 1, , f64) SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_INT_TYPE_FUNCS_( int8x16x4_t, int8_t, 16, q, i8, s8) SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_INT_TYPE_FUNCS_( int16x8x4_t, int16_t, 8, q, i16, s16) SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_INT_TYPE_FUNCS_( int32x4x4_t, int32_t, 4, q, i32, s32) SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_INT_TYPE_FUNCS_( int64x2x4_t, int64_t, 2, q, i64, s64) SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_INT_TYPE_FUNCS_( uint8x16x4_t, uint8_t, 16, q, u8, u8) SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_INT_TYPE_FUNCS_( uint16x8x4_t, uint16_t, 8, q, u16, u16) SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_INT_TYPE_FUNCS_( uint32x4x4_t, uint32_t, 4, q, u32, u32) SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_INT_TYPE_FUNCS_( uint64x2x4_t, uint64_t, 2, q, u64, u64) SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_FLOAT_TYPE_FUNCS_(float32x4x4_t, simde_float32_t, simde_float32, 4, q, f32) SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_FLOAT_TYPE_FUNCS_(float64x2x4_t, simde_float64_t, simde_float64, 2, q, f64) #endif #define simde_test_arm_neon_assert_equal_i8x8(a, b) do { if (simde_test_arm_neon_assert_equal_i8x8_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_arm_neon_assert_equal_i16x4(a, b) do { if (simde_test_arm_neon_assert_equal_i16x4_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_arm_neon_assert_equal_i32x2(a, b) do { if (simde_test_arm_neon_assert_equal_i32x2_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_arm_neon_assert_equal_i64x1(a, b) do { if (simde_test_arm_neon_assert_equal_i64x1_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_arm_neon_assert_equal_u8x8(a, b) do { if (simde_test_arm_neon_assert_equal_u8x8_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_arm_neon_assert_equal_u16x4(a, b) do { if (simde_test_arm_neon_assert_equal_u16x4_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_arm_neon_assert_equal_u32x2(a, b) do { if (simde_test_arm_neon_assert_equal_u32x2_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_arm_neon_assert_equal_u64x1(a, b) do { if (simde_test_arm_neon_assert_equal_u64x1_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_arm_neon_assert_equal_f32x2(a, b, precision) do { if (simde_test_arm_neon_assert_equal_f32x2_(a, b, 1e-##precision##f, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_arm_neon_assert_equal_f64x1(a, b, precision) do { if (simde_test_arm_neon_assert_equal_f64x1_(a, b, 1e-##precision, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_arm_neon_assert_equal_i8x16(a, b) do { if (simde_test_arm_neon_assert_equal_i8x16_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_arm_neon_assert_equal_i16x8(a, b) do { if (simde_test_arm_neon_assert_equal_i16x8_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_arm_neon_assert_equal_i32x4(a, b) do { if (simde_test_arm_neon_assert_equal_i32x4_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_arm_neon_assert_equal_i64x2(a, b) do { if (simde_test_arm_neon_assert_equal_i64x2_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_arm_neon_assert_equal_u8x16(a, b) do { if (simde_test_arm_neon_assert_equal_u8x16_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_arm_neon_assert_equal_u16x8(a, b) do { if (simde_test_arm_neon_assert_equal_u16x8_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_arm_neon_assert_equal_u32x4(a, b) do { if (simde_test_arm_neon_assert_equal_u32x4_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_arm_neon_assert_equal_u64x2(a, b) do { if (simde_test_arm_neon_assert_equal_u64x2_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_arm_neon_assert_equal_f32x4(a, b, precision) do { if (simde_test_arm_neon_assert_equal_f32x4_(a, b, 1e-##precision##f, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_arm_neon_assert_equal_f64x2(a, b, precision) do { if (simde_test_arm_neon_assert_equal_f64x2_(a, b, 1e-##precision, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #if !defined(SIMDE_TEST_BARE) #define SIMDE_TEST_DECLARE_SUITE(name) SIMDE_TEST_SUITE_DECLARE_GETTERS(HEDLEY_CONCAT(simde_test_arm_neon_get_suite_,name)) #include "declare-suites.h" #undef SIMDE_TEST_DECLARE_SUITE #endif #endif /* !defined(SIMDE_TEST_ARM_NEON_TEST_NEON_H) */ simde-0.7.2/test/arm/neon/trn.c000066400000000000000000002042601400333146700162720ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN trn #include #include #if !defined(SIMDE_BUG_INTEL_857088) static int test_simde_vtrn_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[2]; simde_float32 b[2]; simde_float32 r[2][2]; } test_vec[] = { { { SIMDE_FLOAT32_C( 394.91), SIMDE_FLOAT32_C( 715.66) }, { SIMDE_FLOAT32_C( -854.42), SIMDE_FLOAT32_C( 359.77) }, { { SIMDE_FLOAT32_C( 394.91), SIMDE_FLOAT32_C( -854.42) }, { SIMDE_FLOAT32_C( 715.66), SIMDE_FLOAT32_C( 359.77) }, }, }, { { SIMDE_FLOAT32_C( -579.97), SIMDE_FLOAT32_C( -698.40) }, { SIMDE_FLOAT32_C( 147.69), SIMDE_FLOAT32_C( 533.92) }, { { SIMDE_FLOAT32_C( -579.97), SIMDE_FLOAT32_C( 147.69) }, { SIMDE_FLOAT32_C( -698.40), SIMDE_FLOAT32_C( 533.92) }, }, }, { { SIMDE_FLOAT32_C( -647.47), SIMDE_FLOAT32_C( 944.41) }, { SIMDE_FLOAT32_C( -570.83), SIMDE_FLOAT32_C( -698.09) }, { { SIMDE_FLOAT32_C( -647.47), SIMDE_FLOAT32_C( -570.83) }, { SIMDE_FLOAT32_C( 944.41), SIMDE_FLOAT32_C( -698.09) }, }, }, { { SIMDE_FLOAT32_C( -636.98), SIMDE_FLOAT32_C( 269.75) }, { SIMDE_FLOAT32_C( 457.30), SIMDE_FLOAT32_C( -517.32) }, { { SIMDE_FLOAT32_C( -636.98), SIMDE_FLOAT32_C( 457.30) }, { SIMDE_FLOAT32_C( 269.75), SIMDE_FLOAT32_C( -517.32) }, }, }, { { SIMDE_FLOAT32_C( 625.59), SIMDE_FLOAT32_C( 316.45) }, { SIMDE_FLOAT32_C( -969.65), SIMDE_FLOAT32_C( 377.99) }, { { SIMDE_FLOAT32_C( 625.59), SIMDE_FLOAT32_C( -969.65) }, { SIMDE_FLOAT32_C( 316.45), SIMDE_FLOAT32_C( 377.99) }, }, }, { { SIMDE_FLOAT32_C( 858.52), SIMDE_FLOAT32_C( -465.14) }, { SIMDE_FLOAT32_C( 468.83), SIMDE_FLOAT32_C( -15.96) }, { { SIMDE_FLOAT32_C( 858.52), SIMDE_FLOAT32_C( 468.83) }, { SIMDE_FLOAT32_C( -465.14), SIMDE_FLOAT32_C( -15.96) }, }, }, { { SIMDE_FLOAT32_C( -52.83), SIMDE_FLOAT32_C( 881.31) }, { SIMDE_FLOAT32_C( -110.73), SIMDE_FLOAT32_C( 761.20) }, { { SIMDE_FLOAT32_C( -52.83), SIMDE_FLOAT32_C( -110.73) }, { SIMDE_FLOAT32_C( 881.31), SIMDE_FLOAT32_C( 761.20) }, }, }, { { SIMDE_FLOAT32_C( -940.87), SIMDE_FLOAT32_C( 486.49) }, { SIMDE_FLOAT32_C( 618.70), SIMDE_FLOAT32_C( 454.05) }, { { SIMDE_FLOAT32_C( -940.87), SIMDE_FLOAT32_C( 618.70) }, { SIMDE_FLOAT32_C( 486.49), SIMDE_FLOAT32_C( 454.05) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x2x2_t r = simde_vtrn_f32(a, b); simde_test_arm_neon_assert_equal_f32x2(r.val[0], simde_vld1_f32(test_vec[i].r[0]), 1); simde_test_arm_neon_assert_equal_f32x2(r.val[1], simde_vld1_f32(test_vec[i].r[1]), 1); } return 0; } static int test_simde_vtrn_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int8_t b[8]; int8_t r[2][8]; } test_vec[] = { { { -INT8_C( 66), INT8_C( 38), -INT8_C( 90), INT8_C( 86), INT8_C( 96), INT8_C( 65), INT8_C( 68), -INT8_C( 61) }, { INT8_C( 123), INT8_C( 78), -INT8_C( 60), INT8_C( 111), INT8_C( 17), INT8_C( 8), INT8_C( 37), INT8_C( 95) }, { { -INT8_C( 66), INT8_C( 123), -INT8_C( 90), -INT8_C( 60), INT8_C( 96), INT8_C( 17), INT8_C( 68), INT8_C( 37) }, { INT8_C( 38), INT8_C( 78), INT8_C( 86), INT8_C( 111), INT8_C( 65), INT8_C( 8), -INT8_C( 61), INT8_C( 95) }, }, }, { { -INT8_C( 36), INT8_C( 105), INT8_C( 18), INT8_C( 56), -INT8_C( 95), INT8_C( 16), -INT8_C( 54), INT8_C( 86) }, { -INT8_C( 27), -INT8_C( 28), -INT8_C( 104), -INT8_C( 70), -INT8_C( 36), INT8_C( 120), -INT8_C( 63), -INT8_C( 102) }, { { -INT8_C( 36), -INT8_C( 27), INT8_C( 18), -INT8_C( 104), -INT8_C( 95), -INT8_C( 36), -INT8_C( 54), -INT8_C( 63) }, { INT8_C( 105), -INT8_C( 28), INT8_C( 56), -INT8_C( 70), INT8_C( 16), INT8_C( 120), INT8_C( 86), -INT8_C( 102) }, }, }, { { -INT8_C( 98), INT8_C( 103), -INT8_C( 16), -INT8_C( 1), -INT8_C( 88), INT8_C( 52), -INT8_C( 62), INT8_C( 35) }, { -INT8_C( 126), -INT8_C( 121), -INT8_C( 109), -INT8_C( 108), -INT8_C( 113), -INT8_C( 72), -INT8_C( 13), INT8_C( 107) }, { { -INT8_C( 98), -INT8_C( 126), -INT8_C( 16), -INT8_C( 109), -INT8_C( 88), -INT8_C( 113), -INT8_C( 62), -INT8_C( 13) }, { INT8_C( 103), -INT8_C( 121), -INT8_C( 1), -INT8_C( 108), INT8_C( 52), -INT8_C( 72), INT8_C( 35), INT8_C( 107) }, }, }, { { INT8_C( 33), INT8_C( 5), -INT8_C( 93), -INT8_C( 62), INT8_C( 21), INT8_C( 110), INT8_C( 24), -INT8_C( 5) }, { INT8_C( 82), -INT8_C( 80), -INT8_C( 75), INT8_C( 46), INT8_C( 40), INT8_C( 118), -INT8_C( 56), -INT8_C( 57) }, { { INT8_C( 33), INT8_C( 82), -INT8_C( 93), -INT8_C( 75), INT8_C( 21), INT8_C( 40), INT8_C( 24), -INT8_C( 56) }, { INT8_C( 5), -INT8_C( 80), -INT8_C( 62), INT8_C( 46), INT8_C( 110), INT8_C( 118), -INT8_C( 5), -INT8_C( 57) }, }, }, { { -INT8_C( 35), -INT8_C( 72), -INT8_C( 58), -INT8_C( 123), -INT8_C( 19), -INT8_C( 120), -INT8_C( 87), INT8_C( 111) }, { INT8_C( 15), INT8_C( 60), INT8_C( 3), -INT8_C( 98), -INT8_C( 12), -INT8_C( 10), INT8_C( 9), INT8_C( 21) }, { { -INT8_C( 35), INT8_C( 15), -INT8_C( 58), INT8_C( 3), -INT8_C( 19), -INT8_C( 12), -INT8_C( 87), INT8_C( 9) }, { -INT8_C( 72), INT8_C( 60), -INT8_C( 123), -INT8_C( 98), -INT8_C( 120), -INT8_C( 10), INT8_C( 111), INT8_C( 21) }, }, }, { { -INT8_C( 5), -INT8_C( 83), -INT8_C( 41), INT8_C( 17), INT8_C( 27), -INT8_C( 17), INT8_C( 12), INT8_C( 109) }, { -INT8_C( 97), -INT8_C( 63), -INT8_C( 101), -INT8_C( 56), INT8_C( 55), INT8_C( 99), -INT8_C( 113), INT8_C( 20) }, { { -INT8_C( 5), -INT8_C( 97), -INT8_C( 41), -INT8_C( 101), INT8_C( 27), INT8_C( 55), INT8_C( 12), -INT8_C( 113) }, { -INT8_C( 83), -INT8_C( 63), INT8_C( 17), -INT8_C( 56), -INT8_C( 17), INT8_C( 99), INT8_C( 109), INT8_C( 20) }, }, }, { { INT8_C( 27), INT8_C( 85), -INT8_C( 103), INT8_C( 8), -INT8_C( 35), INT8_C( 66), INT8_C( 120), -INT8_C( 19) }, { INT8_C( 126), INT8_C( 123), -INT8_C( 117), INT8_C( 114), INT8_C( 114), -INT8_C( 107), -INT8_C( 121), INT8_C( 109) }, { { INT8_C( 27), INT8_C( 126), -INT8_C( 103), -INT8_C( 117), -INT8_C( 35), INT8_C( 114), INT8_C( 120), -INT8_C( 121) }, { INT8_C( 85), INT8_C( 123), INT8_C( 8), INT8_C( 114), INT8_C( 66), -INT8_C( 107), -INT8_C( 19), INT8_C( 109) }, }, }, { { INT8_C( 66), INT8_C( 94), INT8_C( 126), INT8_C( 93), INT8_C( 77), -INT8_C( 118), -INT8_C( 54), -INT8_C( 19) }, { INT8_C( 75), INT8_C( 101), -INT8_C( 75), -INT8_C( 126), -INT8_C( 56), INT8_C( 68), -INT8_C( 106), -INT8_C( 29) }, { { INT8_C( 66), INT8_C( 75), INT8_C( 126), -INT8_C( 75), INT8_C( 77), -INT8_C( 56), -INT8_C( 54), -INT8_C( 106) }, { INT8_C( 94), INT8_C( 101), INT8_C( 93), -INT8_C( 126), -INT8_C( 118), INT8_C( 68), -INT8_C( 19), -INT8_C( 29) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8x2_t r = simde_vtrn_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r.val[0], simde_vld1_s8(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_i8x8(r.val[1], simde_vld1_s8(test_vec[i].r[1])); } return 0; } static int test_simde_vtrn_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int16_t b[4]; int16_t r[2][4]; } test_vec[] = { { { INT16_C( 29957), -INT16_C( 14479), -INT16_C( 14585), INT16_C( 20407) }, { -INT16_C( 13813), -INT16_C( 5946), -INT16_C( 9907), INT16_C( 28653) }, { { INT16_C( 29957), -INT16_C( 13813), -INT16_C( 14585), -INT16_C( 9907) }, { -INT16_C( 14479), -INT16_C( 5946), INT16_C( 20407), INT16_C( 28653) }, }, }, { { -INT16_C( 5745), INT16_C( 30710), INT16_C( 14724), -INT16_C( 32756) }, { INT16_C( 19325), -INT16_C( 32716), -INT16_C( 11477), INT16_C( 12334) }, { { -INT16_C( 5745), INT16_C( 19325), INT16_C( 14724), -INT16_C( 11477) }, { INT16_C( 30710), -INT16_C( 32716), -INT16_C( 32756), INT16_C( 12334) }, }, }, { { -INT16_C( 24760), INT16_C( 20728), -INT16_C( 20633), INT16_C( 29343) }, { INT16_C( 25978), -INT16_C( 14502), INT16_C( 18494), -INT16_C( 12746) }, { { -INT16_C( 24760), INT16_C( 25978), -INT16_C( 20633), INT16_C( 18494) }, { INT16_C( 20728), -INT16_C( 14502), INT16_C( 29343), -INT16_C( 12746) }, }, }, { { INT16_C( 11313), -INT16_C( 18875), INT16_C( 21093), -INT16_C( 7626) }, { INT16_C( 27293), -INT16_C( 14238), -INT16_C( 28355), -INT16_C( 30983) }, { { INT16_C( 11313), INT16_C( 27293), INT16_C( 21093), -INT16_C( 28355) }, { -INT16_C( 18875), -INT16_C( 14238), -INT16_C( 7626), -INT16_C( 30983) }, }, }, { { -INT16_C( 3792), -INT16_C( 26666), INT16_C( 30112), INT16_C( 6666) }, { INT16_C( 25818), INT16_C( 6625), INT16_C( 6060), -INT16_C( 8473) }, { { -INT16_C( 3792), INT16_C( 25818), INT16_C( 30112), INT16_C( 6060) }, { -INT16_C( 26666), INT16_C( 6625), INT16_C( 6666), -INT16_C( 8473) }, }, }, { { INT16_C( 11331), -INT16_C( 22380), -INT16_C( 13698), INT16_C( 7051) }, { -INT16_C( 4811), INT16_C( 29412), -INT16_C( 8834), -INT16_C( 20488) }, { { INT16_C( 11331), -INT16_C( 4811), -INT16_C( 13698), -INT16_C( 8834) }, { -INT16_C( 22380), INT16_C( 29412), INT16_C( 7051), -INT16_C( 20488) }, }, }, { { -INT16_C( 12594), INT16_C( 28230), INT16_C( 20547), INT16_C( 7817) }, { INT16_C( 27317), INT16_C( 24887), INT16_C( 7810), -INT16_C( 15041) }, { { -INT16_C( 12594), INT16_C( 27317), INT16_C( 20547), INT16_C( 7810) }, { INT16_C( 28230), INT16_C( 24887), INT16_C( 7817), -INT16_C( 15041) }, }, }, { { -INT16_C( 11446), -INT16_C( 13970), -INT16_C( 1634), -INT16_C( 11292) }, { -INT16_C( 14106), INT16_C( 25925), INT16_C( 16037), INT16_C( 29460) }, { { -INT16_C( 11446), -INT16_C( 14106), -INT16_C( 1634), INT16_C( 16037) }, { -INT16_C( 13970), INT16_C( 25925), -INT16_C( 11292), INT16_C( 29460) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4x2_t r = simde_vtrn_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r.val[0], simde_vld1_s16(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_i16x4(r.val[1], simde_vld1_s16(test_vec[i].r[1])); } return 0; } static int test_simde_vtrn_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int32_t b[2]; int32_t r[2][2]; } test_vec[] = { { { INT32_C( 2034330713), -INT32_C( 659240214) }, { -INT32_C( 236237116), INT32_C( 331646780) }, { { INT32_C( 2034330713), -INT32_C( 236237116) }, { -INT32_C( 659240214), INT32_C( 331646780) }, }, }, { { -INT32_C( 1506908108), INT32_C( 1079419151) }, { -INT32_C( 1477742611), INT32_C( 506021572) }, { { -INT32_C( 1506908108), -INT32_C( 1477742611) }, { INT32_C( 1079419151), INT32_C( 506021572) }, }, }, { { -INT32_C( 1667798350), -INT32_C( 126596300) }, { -INT32_C( 689348711), INT32_C( 468299494) }, { { -INT32_C( 1667798350), -INT32_C( 689348711) }, { -INT32_C( 126596300), INT32_C( 468299494) }, }, }, { { INT32_C( 633411606), -INT32_C( 1503323975) }, { INT32_C( 1280135559), INT32_C( 1231713943) }, { { INT32_C( 633411606), INT32_C( 1280135559) }, { -INT32_C( 1503323975), INT32_C( 1231713943) }, }, }, { { INT32_C( 367329760), -INT32_C( 452110004) }, { -INT32_C( 1615071303), -INT32_C( 1145395803) }, { { INT32_C( 367329760), -INT32_C( 1615071303) }, { -INT32_C( 452110004), -INT32_C( 1145395803) }, }, }, { { INT32_C( 1994423485), INT32_C( 454837908) }, { INT32_C( 778529175), -INT32_C( 1082666529) }, { { INT32_C( 1994423485), INT32_C( 778529175) }, { INT32_C( 454837908), -INT32_C( 1082666529) }, }, }, { { INT32_C( 517233874), INT32_C( 1845813941) }, { INT32_C( 2114895833), INT32_C( 557434980) }, { { INT32_C( 517233874), INT32_C( 2114895833) }, { INT32_C( 1845813941), INT32_C( 557434980) }, }, }, { { -INT32_C( 661186236), -INT32_C( 151735457) }, { -INT32_C( 81503460), -INT32_C( 4482259) }, { { -INT32_C( 661186236), -INT32_C( 81503460) }, { -INT32_C( 151735457), -INT32_C( 4482259) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2x2_t r = simde_vtrn_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r.val[0], simde_vld1_s32(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_i32x2(r.val[1], simde_vld1_s32(test_vec[i].r[1])); } return 0; } static int test_simde_vtrn_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; uint8_t b[8]; uint8_t r[2][8]; } test_vec[] = { { { UINT8_C(156), UINT8_C(211), UINT8_C(239), UINT8_C(201), UINT8_C(251), UINT8_C( 68), UINT8_C(216), UINT8_C(173) }, { UINT8_C(147), UINT8_C( 21), UINT8_C( 33), UINT8_C(140), UINT8_C(167), UINT8_C(115), UINT8_C( 36), UINT8_C( 31) }, { { UINT8_C(156), UINT8_C(147), UINT8_C(239), UINT8_C( 33), UINT8_C(251), UINT8_C(167), UINT8_C(216), UINT8_C( 36) }, { UINT8_C(211), UINT8_C( 21), UINT8_C(201), UINT8_C(140), UINT8_C( 68), UINT8_C(115), UINT8_C(173), UINT8_C( 31) }, }, }, { { UINT8_C( 53), UINT8_C( 85), UINT8_C(184), UINT8_C( 44), UINT8_C( 3), UINT8_C( 4), UINT8_C(166), UINT8_C( 52) }, { UINT8_C( 61), UINT8_C(108), UINT8_C( 17), UINT8_C( 42), UINT8_C( 56), UINT8_C(179), UINT8_C(216), UINT8_C(212) }, { { UINT8_C( 53), UINT8_C( 61), UINT8_C(184), UINT8_C( 17), UINT8_C( 3), UINT8_C( 56), UINT8_C(166), UINT8_C(216) }, { UINT8_C( 85), UINT8_C(108), UINT8_C( 44), UINT8_C( 42), UINT8_C( 4), UINT8_C(179), UINT8_C( 52), UINT8_C(212) }, }, }, { { UINT8_C(134), UINT8_C(199), UINT8_C(157), UINT8_C(129), UINT8_C( 11), UINT8_C(118), UINT8_C( 46), UINT8_C(158) }, { UINT8_C(139), UINT8_C( 80), UINT8_C( 43), UINT8_C( 50), UINT8_C(195), UINT8_C( 79), UINT8_C( 81), UINT8_C(249) }, { { UINT8_C(134), UINT8_C(139), UINT8_C(157), UINT8_C( 43), UINT8_C( 11), UINT8_C(195), UINT8_C( 46), UINT8_C( 81) }, { UINT8_C(199), UINT8_C( 80), UINT8_C(129), UINT8_C( 50), UINT8_C(118), UINT8_C( 79), UINT8_C(158), UINT8_C(249) }, }, }, { { UINT8_C(164), UINT8_C( 9), UINT8_C( 37), UINT8_C(167), UINT8_C( 13), UINT8_C(203), UINT8_C(219), UINT8_C( 75) }, { UINT8_C( 56), UINT8_C(236), UINT8_C(117), UINT8_C(112), UINT8_C(159), UINT8_C( 77), UINT8_C( 69), UINT8_C( 37) }, { { UINT8_C(164), UINT8_C( 56), UINT8_C( 37), UINT8_C(117), UINT8_C( 13), UINT8_C(159), UINT8_C(219), UINT8_C( 69) }, { UINT8_C( 9), UINT8_C(236), UINT8_C(167), UINT8_C(112), UINT8_C(203), UINT8_C( 77), UINT8_C( 75), UINT8_C( 37) }, }, }, { { UINT8_C( 21), UINT8_C(226), UINT8_C(166), UINT8_C( 32), UINT8_C( 88), UINT8_C(213), UINT8_C(191), UINT8_C(227) }, { UINT8_C( 37), UINT8_C(234), UINT8_C( 22), UINT8_C(232), UINT8_C( 57), UINT8_C(103), UINT8_C(225), UINT8_C(221) }, { { UINT8_C( 21), UINT8_C( 37), UINT8_C(166), UINT8_C( 22), UINT8_C( 88), UINT8_C( 57), UINT8_C(191), UINT8_C(225) }, { UINT8_C(226), UINT8_C(234), UINT8_C( 32), UINT8_C(232), UINT8_C(213), UINT8_C(103), UINT8_C(227), UINT8_C(221) }, }, }, { { UINT8_C(113), UINT8_C( 6), UINT8_C(132), UINT8_C(126), UINT8_C(210), UINT8_C( 95), UINT8_C(201), UINT8_C( 10) }, { UINT8_C( 75), UINT8_C( 63), UINT8_C(122), UINT8_C(235), UINT8_C(140), UINT8_C(191), UINT8_C( 16), UINT8_C(161) }, { { UINT8_C(113), UINT8_C( 75), UINT8_C(132), UINT8_C(122), UINT8_C(210), UINT8_C(140), UINT8_C(201), UINT8_C( 16) }, { UINT8_C( 6), UINT8_C( 63), UINT8_C(126), UINT8_C(235), UINT8_C( 95), UINT8_C(191), UINT8_C( 10), UINT8_C(161) }, }, }, { { UINT8_C(162), UINT8_C(183), UINT8_C(194), UINT8_C(250), UINT8_C(140), UINT8_C(129), UINT8_C(222), UINT8_C(177) }, { UINT8_C(107), UINT8_C(244), UINT8_C(153), UINT8_C(164), UINT8_C( 91), UINT8_C(123), UINT8_C(129), UINT8_C(204) }, { { UINT8_C(162), UINT8_C(107), UINT8_C(194), UINT8_C(153), UINT8_C(140), UINT8_C( 91), UINT8_C(222), UINT8_C(129) }, { UINT8_C(183), UINT8_C(244), UINT8_C(250), UINT8_C(164), UINT8_C(129), UINT8_C(123), UINT8_C(177), UINT8_C(204) }, }, }, { { UINT8_C(129), UINT8_C( 5), UINT8_C( 75), UINT8_C( 83), UINT8_C(100), UINT8_C( 20), UINT8_C( 93), UINT8_C(175) }, { UINT8_C( 83), UINT8_C(216), UINT8_C(154), UINT8_C(224), UINT8_C(151), UINT8_C(171), UINT8_C(129), UINT8_C( 57) }, { { UINT8_C(129), UINT8_C( 83), UINT8_C( 75), UINT8_C(154), UINT8_C(100), UINT8_C(151), UINT8_C( 93), UINT8_C(129) }, { UINT8_C( 5), UINT8_C(216), UINT8_C( 83), UINT8_C(224), UINT8_C( 20), UINT8_C(171), UINT8_C(175), UINT8_C( 57) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8x2_t r = simde_vtrn_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r.val[0], simde_vld1_u8(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_u8x8(r.val[1], simde_vld1_u8(test_vec[i].r[1])); } return 0; } static int test_simde_vtrn_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[2][4]; } test_vec[] = { { { UINT16_C(39077), UINT16_C(38350), UINT16_C( 1512), UINT16_C(63202) }, { UINT16_C(45315), UINT16_C( 4567), UINT16_C(18974), UINT16_C(10275) }, { { UINT16_C(39077), UINT16_C(45315), UINT16_C( 1512), UINT16_C(18974) }, { UINT16_C(38350), UINT16_C( 4567), UINT16_C(63202), UINT16_C(10275) }, }, }, { { UINT16_C(14563), UINT16_C(36858), UINT16_C(33303), UINT16_C(63726) }, { UINT16_C(24487), UINT16_C(30580), UINT16_C(30894), UINT16_C(21608) }, { { UINT16_C(14563), UINT16_C(24487), UINT16_C(33303), UINT16_C(30894) }, { UINT16_C(36858), UINT16_C(30580), UINT16_C(63726), UINT16_C(21608) }, }, }, { { UINT16_C(13841), UINT16_C(63977), UINT16_C(52284), UINT16_C(16367) }, { UINT16_C(50813), UINT16_C(39761), UINT16_C(29712), UINT16_C(62660) }, { { UINT16_C(13841), UINT16_C(50813), UINT16_C(52284), UINT16_C(29712) }, { UINT16_C(63977), UINT16_C(39761), UINT16_C(16367), UINT16_C(62660) }, }, }, { { UINT16_C(48812), UINT16_C(50051), UINT16_C(28993), UINT16_C(59580) }, { UINT16_C(12497), UINT16_C(32608), UINT16_C(51369), UINT16_C(47827) }, { { UINT16_C(48812), UINT16_C(12497), UINT16_C(28993), UINT16_C(51369) }, { UINT16_C(50051), UINT16_C(32608), UINT16_C(59580), UINT16_C(47827) }, }, }, { { UINT16_C(48639), UINT16_C(15283), UINT16_C(41609), UINT16_C( 1658) }, { UINT16_C(52073), UINT16_C(31138), UINT16_C(26175), UINT16_C(60269) }, { { UINT16_C(48639), UINT16_C(52073), UINT16_C(41609), UINT16_C(26175) }, { UINT16_C(15283), UINT16_C(31138), UINT16_C( 1658), UINT16_C(60269) }, }, }, { { UINT16_C(61732), UINT16_C(26031), UINT16_C(27490), UINT16_C(13134) }, { UINT16_C(44699), UINT16_C(17587), UINT16_C(34422), UINT16_C(30206) }, { { UINT16_C(61732), UINT16_C(44699), UINT16_C(27490), UINT16_C(34422) }, { UINT16_C(26031), UINT16_C(17587), UINT16_C(13134), UINT16_C(30206) }, }, }, { { UINT16_C(45379), UINT16_C(52400), UINT16_C(11092), UINT16_C(48595) }, { UINT16_C(30198), UINT16_C(13878), UINT16_C(42203), UINT16_C(65313) }, { { UINT16_C(45379), UINT16_C(30198), UINT16_C(11092), UINT16_C(42203) }, { UINT16_C(52400), UINT16_C(13878), UINT16_C(48595), UINT16_C(65313) }, }, }, { { UINT16_C(53397), UINT16_C(63333), UINT16_C(45883), UINT16_C(55083) }, { UINT16_C(56929), UINT16_C(55067), UINT16_C( 6756), UINT16_C(43085) }, { { UINT16_C(53397), UINT16_C(56929), UINT16_C(45883), UINT16_C( 6756) }, { UINT16_C(63333), UINT16_C(55067), UINT16_C(55083), UINT16_C(43085) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4x2_t r = simde_vtrn_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r.val[0], simde_vld1_u16(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_u16x4(r.val[1], simde_vld1_u16(test_vec[i].r[1])); } return 0; } static int test_simde_vtrn_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2][2]; } test_vec[] = { { { UINT32_C(4008918152), UINT32_C(1302258451) }, { UINT32_C( 362140121), UINT32_C(3609334470) }, { { UINT32_C(4008918152), UINT32_C( 362140121) }, { UINT32_C(1302258451), UINT32_C(3609334470) }, }, }, { { UINT32_C(2784007120), UINT32_C( 955628426) }, { UINT32_C(1766220593), UINT32_C(4102521708) }, { { UINT32_C(2784007120), UINT32_C(1766220593) }, { UINT32_C( 955628426), UINT32_C(4102521708) }, }, }, { { UINT32_C(3538057919), UINT32_C( 924877150) }, { UINT32_C( 407745874), UINT32_C(2565828552) }, { { UINT32_C(3538057919), UINT32_C( 407745874) }, { UINT32_C( 924877150), UINT32_C(2565828552) }, }, }, { { UINT32_C(2369707779), UINT32_C(3435541403) }, { UINT32_C( 37031062), UINT32_C(1324792975) }, { { UINT32_C(2369707779), UINT32_C( 37031062) }, { UINT32_C(3435541403), UINT32_C(1324792975) }, }, }, { { UINT32_C(2502023479), UINT32_C(2899067226) }, { UINT32_C(3200588278), UINT32_C(2354558089) }, { { UINT32_C(2502023479), UINT32_C(3200588278) }, { UINT32_C(2899067226), UINT32_C(2354558089) }, }, }, { { UINT32_C( 790205844), UINT32_C(1610342344) }, { UINT32_C(2069967084), UINT32_C( 600463596) }, { { UINT32_C( 790205844), UINT32_C(2069967084) }, { UINT32_C(1610342344), UINT32_C( 600463596) }, }, }, { { UINT32_C(2344151857), UINT32_C( 574063916) }, { UINT32_C( 669121694), UINT32_C(1169373361) }, { { UINT32_C(2344151857), UINT32_C( 669121694) }, { UINT32_C( 574063916), UINT32_C(1169373361) }, }, }, { { UINT32_C(2507460045), UINT32_C(2566156204) }, { UINT32_C(2333365919), UINT32_C(3752844974) }, { { UINT32_C(2507460045), UINT32_C(2333365919) }, { UINT32_C(2566156204), UINT32_C(3752844974) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2x2_t r = simde_vtrn_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r.val[0], simde_vld1_u32(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_u32x2(r.val[1], simde_vld1_u32(test_vec[i].r[1])); } return 0; } static int test_simde_vtrnq_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; simde_float32 b[4]; simde_float32 r[2][4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -21.16), SIMDE_FLOAT32_C( -415.28), SIMDE_FLOAT32_C( 122.64), SIMDE_FLOAT32_C( -871.38) }, { SIMDE_FLOAT32_C( 823.55), SIMDE_FLOAT32_C( 479.12), SIMDE_FLOAT32_C( -299.37), SIMDE_FLOAT32_C( -925.94) }, { { SIMDE_FLOAT32_C( -21.16), SIMDE_FLOAT32_C( 823.55), SIMDE_FLOAT32_C( 122.64), SIMDE_FLOAT32_C( -299.37) }, { SIMDE_FLOAT32_C( -415.28), SIMDE_FLOAT32_C( 479.12), SIMDE_FLOAT32_C( -871.38), SIMDE_FLOAT32_C( -925.94) }, }, }, { { SIMDE_FLOAT32_C( -127.06), SIMDE_FLOAT32_C( -474.56), SIMDE_FLOAT32_C( -700.09), SIMDE_FLOAT32_C( 980.65) }, { SIMDE_FLOAT32_C( -759.85), SIMDE_FLOAT32_C( 613.71), SIMDE_FLOAT32_C( 336.49), SIMDE_FLOAT32_C( 564.00) }, { { SIMDE_FLOAT32_C( -127.06), SIMDE_FLOAT32_C( -759.85), SIMDE_FLOAT32_C( -700.09), SIMDE_FLOAT32_C( 336.49) }, { SIMDE_FLOAT32_C( -474.56), SIMDE_FLOAT32_C( 613.71), SIMDE_FLOAT32_C( 980.65), SIMDE_FLOAT32_C( 564.00) }, }, }, { { SIMDE_FLOAT32_C( -466.35), SIMDE_FLOAT32_C( 378.63), SIMDE_FLOAT32_C( -371.33), SIMDE_FLOAT32_C( 706.88) }, { SIMDE_FLOAT32_C( -64.44), SIMDE_FLOAT32_C( 420.87), SIMDE_FLOAT32_C( -117.58), SIMDE_FLOAT32_C( 403.73) }, { { SIMDE_FLOAT32_C( -466.35), SIMDE_FLOAT32_C( -64.44), SIMDE_FLOAT32_C( -371.33), SIMDE_FLOAT32_C( -117.58) }, { SIMDE_FLOAT32_C( 378.63), SIMDE_FLOAT32_C( 420.87), SIMDE_FLOAT32_C( 706.88), SIMDE_FLOAT32_C( 403.73) }, }, }, { { SIMDE_FLOAT32_C( 250.86), SIMDE_FLOAT32_C( 812.70), SIMDE_FLOAT32_C( 383.33), SIMDE_FLOAT32_C( 70.53) }, { SIMDE_FLOAT32_C( -526.93), SIMDE_FLOAT32_C( -755.27), SIMDE_FLOAT32_C( -138.40), SIMDE_FLOAT32_C( 451.91) }, { { SIMDE_FLOAT32_C( 250.86), SIMDE_FLOAT32_C( -526.93), SIMDE_FLOAT32_C( 383.33), SIMDE_FLOAT32_C( -138.40) }, { SIMDE_FLOAT32_C( 812.70), SIMDE_FLOAT32_C( -755.27), SIMDE_FLOAT32_C( 70.53), SIMDE_FLOAT32_C( 451.91) }, }, }, { { SIMDE_FLOAT32_C( -170.55), SIMDE_FLOAT32_C( 984.25), SIMDE_FLOAT32_C( 580.53), SIMDE_FLOAT32_C( -347.00) }, { SIMDE_FLOAT32_C( 463.36), SIMDE_FLOAT32_C( -718.84), SIMDE_FLOAT32_C( -272.94), SIMDE_FLOAT32_C( -663.70) }, { { SIMDE_FLOAT32_C( -170.55), SIMDE_FLOAT32_C( 463.36), SIMDE_FLOAT32_C( 580.53), SIMDE_FLOAT32_C( -272.94) }, { SIMDE_FLOAT32_C( 984.25), SIMDE_FLOAT32_C( -718.84), SIMDE_FLOAT32_C( -347.00), SIMDE_FLOAT32_C( -663.70) }, }, }, { { SIMDE_FLOAT32_C( -193.40), SIMDE_FLOAT32_C( 26.97), SIMDE_FLOAT32_C( -683.05), SIMDE_FLOAT32_C( 46.75) }, { SIMDE_FLOAT32_C( -359.31), SIMDE_FLOAT32_C( 653.44), SIMDE_FLOAT32_C( -389.25), SIMDE_FLOAT32_C( 174.33) }, { { SIMDE_FLOAT32_C( -193.40), SIMDE_FLOAT32_C( -359.31), SIMDE_FLOAT32_C( -683.05), SIMDE_FLOAT32_C( -389.25) }, { SIMDE_FLOAT32_C( 26.97), SIMDE_FLOAT32_C( 653.44), SIMDE_FLOAT32_C( 46.75), SIMDE_FLOAT32_C( 174.33) }, }, }, { { SIMDE_FLOAT32_C( 32.08), SIMDE_FLOAT32_C( 239.42), SIMDE_FLOAT32_C( -118.78), SIMDE_FLOAT32_C( 967.64) }, { SIMDE_FLOAT32_C( -339.71), SIMDE_FLOAT32_C( 763.63), SIMDE_FLOAT32_C( 371.37), SIMDE_FLOAT32_C( 911.15) }, { { SIMDE_FLOAT32_C( 32.08), SIMDE_FLOAT32_C( -339.71), SIMDE_FLOAT32_C( -118.78), SIMDE_FLOAT32_C( 371.37) }, { SIMDE_FLOAT32_C( 239.42), SIMDE_FLOAT32_C( 763.63), SIMDE_FLOAT32_C( 967.64), SIMDE_FLOAT32_C( 911.15) }, }, }, { { SIMDE_FLOAT32_C( 576.34), SIMDE_FLOAT32_C( -245.30), SIMDE_FLOAT32_C( -18.32), SIMDE_FLOAT32_C( -950.59) }, { SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( 843.29), SIMDE_FLOAT32_C( 501.32), SIMDE_FLOAT32_C( 828.88) }, { { SIMDE_FLOAT32_C( 576.34), SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( -18.32), SIMDE_FLOAT32_C( 501.32) }, { SIMDE_FLOAT32_C( -245.30), SIMDE_FLOAT32_C( 843.29), SIMDE_FLOAT32_C( -950.59), SIMDE_FLOAT32_C( 828.88) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4x2_t r = simde_vtrnq_f32(a, b); simde_test_arm_neon_assert_equal_f32x4(r.val[0], simde_vld1q_f32(test_vec[i].r[0]), 1); simde_test_arm_neon_assert_equal_f32x4(r.val[1], simde_vld1q_f32(test_vec[i].r[1]), 1); } return 0; } static int test_simde_vtrnq_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int8_t b[16]; int8_t r[2][16]; } test_vec[] = { { { -INT8_C( 33), -INT8_C( 2), INT8_C( 49), INT8_C( 24), INT8_C( 54), INT8_C( 26), INT8_C( 79), -INT8_C( 31), INT8_C( 21), -INT8_C( 121), INT8_C( 14), -INT8_C( 47), INT8_C( 102), -INT8_C( 38), INT8_C( 73), INT8_C( 24) }, { -INT8_C( 8), INT8_C( 7), INT8_C( 22), -INT8_C( 62), INT8_C( 52), -INT8_C( 22), -INT8_C( 122), INT8_C( 110), INT8_C( 20), -INT8_C( 71), INT8_C( 94), -INT8_C( 2), -INT8_C( 12), -INT8_C( 71), -INT8_C( 73), -INT8_C( 44) }, { { -INT8_C( 33), -INT8_C( 8), INT8_C( 49), INT8_C( 22), INT8_C( 54), INT8_C( 52), INT8_C( 79), -INT8_C( 122), INT8_C( 21), INT8_C( 20), INT8_C( 14), INT8_C( 94), INT8_C( 102), -INT8_C( 12), INT8_C( 73), -INT8_C( 73) }, { -INT8_C( 2), INT8_C( 7), INT8_C( 24), -INT8_C( 62), INT8_C( 26), -INT8_C( 22), -INT8_C( 31), INT8_C( 110), -INT8_C( 121), -INT8_C( 71), -INT8_C( 47), -INT8_C( 2), -INT8_C( 38), -INT8_C( 71), INT8_C( 24), -INT8_C( 44) }, }, }, { { -INT8_C( 73), -INT8_C( 24), -INT8_C( 20), -INT8_C( 19), INT8_C( 2), INT8_C( 59), -INT8_C( 49), INT8_C( 23), -INT8_C( 62), -INT8_C( 35), -INT8_C( 23), INT8_C( 40), -INT8_C( 73), INT8_C( 50), INT8_C( 64), -INT8_C( 81) }, { INT8_C( 58), INT8_C( 86), INT8_C( 114), INT8_C( 110), INT8_C( 64), -INT8_C( 8), -INT8_C( 36), INT8_C( 85), -INT8_C( 79), INT8_C( 58), INT8_C( 83), -INT8_C( 90), -INT8_C( 13), INT8_C( 10), INT8_C( 122), -INT8_C( 85) }, { { -INT8_C( 73), INT8_C( 58), -INT8_C( 20), INT8_C( 114), INT8_C( 2), INT8_C( 64), -INT8_C( 49), -INT8_C( 36), -INT8_C( 62), -INT8_C( 79), -INT8_C( 23), INT8_C( 83), -INT8_C( 73), -INT8_C( 13), INT8_C( 64), INT8_C( 122) }, { -INT8_C( 24), INT8_C( 86), -INT8_C( 19), INT8_C( 110), INT8_C( 59), -INT8_C( 8), INT8_C( 23), INT8_C( 85), -INT8_C( 35), INT8_C( 58), INT8_C( 40), -INT8_C( 90), INT8_C( 50), INT8_C( 10), -INT8_C( 81), -INT8_C( 85) }, }, }, { { -INT8_C( 14), INT8_C( 102), -INT8_C( 104), -INT8_C( 11), -INT8_C( 95), INT8_C( 103), INT8_C( 12), INT8_C( 99), INT8_C( 68), -INT8_C( 11), -INT8_C( 117), -INT8_C( 5), INT8_C( 40), -INT8_C( 52), -INT8_C( 85), INT8_C( 98) }, { INT8_C( 34), INT8_C( 29), -INT8_C( 48), INT8_C( 99), INT8_C( 21), -INT8_C( 84), -INT8_C( 72), -INT8_C( 58), -INT8_C( 25), INT8_C( 11), INT8_C( 108), -INT8_C( 38), INT8_C( 22), -INT8_C( 26), -INT8_C( 123), INT8_C( 8) }, { { -INT8_C( 14), INT8_C( 34), -INT8_C( 104), -INT8_C( 48), -INT8_C( 95), INT8_C( 21), INT8_C( 12), -INT8_C( 72), INT8_C( 68), -INT8_C( 25), -INT8_C( 117), INT8_C( 108), INT8_C( 40), INT8_C( 22), -INT8_C( 85), -INT8_C( 123) }, { INT8_C( 102), INT8_C( 29), -INT8_C( 11), INT8_C( 99), INT8_C( 103), -INT8_C( 84), INT8_C( 99), -INT8_C( 58), -INT8_C( 11), INT8_C( 11), -INT8_C( 5), -INT8_C( 38), -INT8_C( 52), -INT8_C( 26), INT8_C( 98), INT8_C( 8) }, }, }, { { INT8_C( 76), INT8_C( 30), -INT8_C( 3), -INT8_C( 19), -INT8_C( 123), INT8_C( 10), INT8_C( 80), -INT8_C( 54), -INT8_C( 1), -INT8_C( 36), -INT8_C( 59), INT8_C( 39), -INT8_C( 88), INT8_C( 112), -INT8_C( 119), -INT8_C( 54) }, { -INT8_C( 115), INT8_C( 89), INT8_C( 45), -INT8_C( 94), INT8_C( 6), -INT8_C( 27), INT8_C( 105), -INT8_C( 19), -INT8_C( 15), -INT8_C( 43), -INT8_C( 57), INT8_C( 7), -INT8_C( 68), INT8_C( 77), INT8_C( 15), INT8_C( 8) }, { { INT8_C( 76), -INT8_C( 115), -INT8_C( 3), INT8_C( 45), -INT8_C( 123), INT8_C( 6), INT8_C( 80), INT8_C( 105), -INT8_C( 1), -INT8_C( 15), -INT8_C( 59), -INT8_C( 57), -INT8_C( 88), -INT8_C( 68), -INT8_C( 119), INT8_C( 15) }, { INT8_C( 30), INT8_C( 89), -INT8_C( 19), -INT8_C( 94), INT8_C( 10), -INT8_C( 27), -INT8_C( 54), -INT8_C( 19), -INT8_C( 36), -INT8_C( 43), INT8_C( 39), INT8_C( 7), INT8_C( 112), INT8_C( 77), -INT8_C( 54), INT8_C( 8) }, }, }, { { INT8_C( 107), INT8_C( 13), -INT8_C( 10), -INT8_C( 16), INT8_C( 23), INT8_C( 70), -INT8_C( 70), INT8_C( 22), INT8_C( 34), INT8_MIN, INT8_C( 62), -INT8_C( 54), -INT8_C( 16), -INT8_C( 57), -INT8_C( 107), INT8_C( 126) }, { INT8_C( 33), -INT8_C( 62), INT8_C( 32), INT8_C( 39), -INT8_C( 88), -INT8_C( 119), INT8_C( 20), -INT8_C( 103), INT8_C( 95), -INT8_C( 37), -INT8_C( 96), INT8_C( 27), INT8_C( 40), -INT8_C( 81), INT8_C( 35), -INT8_C( 109) }, { { INT8_C( 107), INT8_C( 33), -INT8_C( 10), INT8_C( 32), INT8_C( 23), -INT8_C( 88), -INT8_C( 70), INT8_C( 20), INT8_C( 34), INT8_C( 95), INT8_C( 62), -INT8_C( 96), -INT8_C( 16), INT8_C( 40), -INT8_C( 107), INT8_C( 35) }, { INT8_C( 13), -INT8_C( 62), -INT8_C( 16), INT8_C( 39), INT8_C( 70), -INT8_C( 119), INT8_C( 22), -INT8_C( 103), INT8_MIN, -INT8_C( 37), -INT8_C( 54), INT8_C( 27), -INT8_C( 57), -INT8_C( 81), INT8_C( 126), -INT8_C( 109) }, }, }, { { -INT8_C( 68), INT8_C( 25), -INT8_C( 124), -INT8_C( 45), INT8_C( 96), INT8_C( 62), -INT8_C( 22), -INT8_C( 126), -INT8_C( 66), INT8_C( 40), INT8_C( 77), -INT8_C( 81), -INT8_C( 17), -INT8_C( 30), INT8_C( 45), INT8_C( 16) }, { -INT8_C( 92), INT8_C( 77), INT8_C( 55), INT8_C( 76), -INT8_C( 41), INT8_C( 75), -INT8_C( 27), INT8_C( 54), INT8_C( 39), -INT8_C( 123), INT8_C( 81), INT8_C( 79), INT8_C( 53), INT8_C( 116), -INT8_C( 29), -INT8_C( 15) }, { { -INT8_C( 68), -INT8_C( 92), -INT8_C( 124), INT8_C( 55), INT8_C( 96), -INT8_C( 41), -INT8_C( 22), -INT8_C( 27), -INT8_C( 66), INT8_C( 39), INT8_C( 77), INT8_C( 81), -INT8_C( 17), INT8_C( 53), INT8_C( 45), -INT8_C( 29) }, { INT8_C( 25), INT8_C( 77), -INT8_C( 45), INT8_C( 76), INT8_C( 62), INT8_C( 75), -INT8_C( 126), INT8_C( 54), INT8_C( 40), -INT8_C( 123), -INT8_C( 81), INT8_C( 79), -INT8_C( 30), INT8_C( 116), INT8_C( 16), -INT8_C( 15) }, }, }, { { -INT8_C( 114), INT8_C( 103), -INT8_C( 59), -INT8_C( 18), -INT8_C( 91), -INT8_C( 81), INT8_C( 112), INT8_C( 100), -INT8_C( 41), -INT8_C( 67), INT8_C( 19), -INT8_C( 58), -INT8_C( 97), INT8_C( 64), -INT8_C( 41), INT8_C( 68) }, { -INT8_C( 115), INT8_C( 14), -INT8_C( 112), INT8_C( 100), INT8_C( 90), INT8_C( 118), -INT8_C( 102), -INT8_C( 127), -INT8_C( 5), -INT8_C( 21), -INT8_C( 48), INT8_C( 48), INT8_C( 96), -INT8_C( 77), INT8_C( 34), -INT8_C( 18) }, { { -INT8_C( 114), -INT8_C( 115), -INT8_C( 59), -INT8_C( 112), -INT8_C( 91), INT8_C( 90), INT8_C( 112), -INT8_C( 102), -INT8_C( 41), -INT8_C( 5), INT8_C( 19), -INT8_C( 48), -INT8_C( 97), INT8_C( 96), -INT8_C( 41), INT8_C( 34) }, { INT8_C( 103), INT8_C( 14), -INT8_C( 18), INT8_C( 100), -INT8_C( 81), INT8_C( 118), INT8_C( 100), -INT8_C( 127), -INT8_C( 67), -INT8_C( 21), -INT8_C( 58), INT8_C( 48), INT8_C( 64), -INT8_C( 77), INT8_C( 68), -INT8_C( 18) }, }, }, { { INT8_C( 26), -INT8_C( 25), -INT8_C( 36), -INT8_C( 64), -INT8_C( 106), INT8_C( 76), INT8_C( 36), INT8_C( 109), INT8_C( 10), INT8_C( 55), INT8_C( 51), -INT8_C( 87), INT8_C( 119), INT8_C( 10), -INT8_C( 19), INT8_C( 4) }, { INT8_C( 25), INT8_C( 126), INT8_C( 105), INT8_C( 115), -INT8_C( 12), INT8_C( 3), -INT8_C( 12), -INT8_C( 17), -INT8_C( 17), -INT8_C( 60), INT8_C( 32), INT8_C( 79), INT8_C( 120), INT8_C( 66), INT8_C( 61), -INT8_C( 110) }, { { INT8_C( 26), INT8_C( 25), -INT8_C( 36), INT8_C( 105), -INT8_C( 106), -INT8_C( 12), INT8_C( 36), -INT8_C( 12), INT8_C( 10), -INT8_C( 17), INT8_C( 51), INT8_C( 32), INT8_C( 119), INT8_C( 120), -INT8_C( 19), INT8_C( 61) }, { -INT8_C( 25), INT8_C( 126), -INT8_C( 64), INT8_C( 115), INT8_C( 76), INT8_C( 3), INT8_C( 109), -INT8_C( 17), INT8_C( 55), -INT8_C( 60), -INT8_C( 87), INT8_C( 79), INT8_C( 10), INT8_C( 66), INT8_C( 4), -INT8_C( 110) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16x2_t r = simde_vtrnq_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r.val[0], simde_vld1q_s8(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_i8x16(r.val[1], simde_vld1q_s8(test_vec[i].r[1])); } return 0; } static int test_simde_vtrnq_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int16_t b[8]; int16_t r[2][8]; } test_vec[] = { { { INT16_C( 10915), -INT16_C( 16438), INT16_C( 7957), -INT16_C( 18291), -INT16_C( 19156), INT16_C( 21219), -INT16_C( 21359), INT16_C( 13451) }, { -INT16_C( 24388), INT16_C( 21235), INT16_C( 19711), -INT16_C( 13138), INT16_C( 19613), INT16_C( 3470), INT16_C( 9286), -INT16_C( 5705) }, { { INT16_C( 10915), -INT16_C( 24388), INT16_C( 7957), INT16_C( 19711), -INT16_C( 19156), INT16_C( 19613), -INT16_C( 21359), INT16_C( 9286) }, { -INT16_C( 16438), INT16_C( 21235), -INT16_C( 18291), -INT16_C( 13138), INT16_C( 21219), INT16_C( 3470), INT16_C( 13451), -INT16_C( 5705) }, }, }, { { -INT16_C( 32434), INT16_C( 25769), INT16_C( 13984), -INT16_C( 13284), -INT16_C( 20), INT16_C( 32031), -INT16_C( 21844), INT16_C( 26801) }, { -INT16_C( 23222), INT16_C( 19130), INT16_C( 27121), -INT16_C( 29162), -INT16_C( 23115), -INT16_C( 1125), INT16_C( 21193), INT16_C( 6117) }, { { -INT16_C( 32434), -INT16_C( 23222), INT16_C( 13984), INT16_C( 27121), -INT16_C( 20), -INT16_C( 23115), -INT16_C( 21844), INT16_C( 21193) }, { INT16_C( 25769), INT16_C( 19130), -INT16_C( 13284), -INT16_C( 29162), INT16_C( 32031), -INT16_C( 1125), INT16_C( 26801), INT16_C( 6117) }, }, }, { { -INT16_C( 28972), INT16_C( 29819), -INT16_C( 26428), -INT16_C( 20415), INT16_C( 24727), INT16_C( 17198), -INT16_C( 8438), INT16_C( 21676) }, { INT16_C( 26244), INT16_C( 30110), -INT16_C( 18993), -INT16_C( 31741), -INT16_C( 24998), INT16_C( 9088), INT16_C( 26097), -INT16_C( 15046) }, { { -INT16_C( 28972), INT16_C( 26244), -INT16_C( 26428), -INT16_C( 18993), INT16_C( 24727), -INT16_C( 24998), -INT16_C( 8438), INT16_C( 26097) }, { INT16_C( 29819), INT16_C( 30110), -INT16_C( 20415), -INT16_C( 31741), INT16_C( 17198), INT16_C( 9088), INT16_C( 21676), -INT16_C( 15046) }, }, }, { { -INT16_C( 18701), -INT16_C( 18631), INT16_C( 31310), -INT16_C( 6808), -INT16_C( 26918), -INT16_C( 7127), -INT16_C( 10891), -INT16_C( 1479) }, { -INT16_C( 10437), INT16_C( 2927), INT16_C( 29580), -INT16_C( 6513), INT16_C( 3857), INT16_C( 521), INT16_C( 17524), INT16_C( 26567) }, { { -INT16_C( 18701), -INT16_C( 10437), INT16_C( 31310), INT16_C( 29580), -INT16_C( 26918), INT16_C( 3857), -INT16_C( 10891), INT16_C( 17524) }, { -INT16_C( 18631), INT16_C( 2927), -INT16_C( 6808), -INT16_C( 6513), -INT16_C( 7127), INT16_C( 521), -INT16_C( 1479), INT16_C( 26567) }, }, }, { { INT16_C( 506), INT16_C( 18463), -INT16_C( 30853), INT16_C( 22061), INT16_C( 22045), -INT16_C( 28102), INT16_C( 29483), INT16_C( 26508) }, { -INT16_C( 949), -INT16_C( 10382), INT16_C( 367), -INT16_C( 32578), -INT16_C( 14575), -INT16_C( 31357), INT16_C( 18955), INT16_C( 1517) }, { { INT16_C( 506), -INT16_C( 949), -INT16_C( 30853), INT16_C( 367), INT16_C( 22045), -INT16_C( 14575), INT16_C( 29483), INT16_C( 18955) }, { INT16_C( 18463), -INT16_C( 10382), INT16_C( 22061), -INT16_C( 32578), -INT16_C( 28102), -INT16_C( 31357), INT16_C( 26508), INT16_C( 1517) }, }, }, { { INT16_C( 3147), -INT16_C( 14515), INT16_C( 31635), -INT16_C( 20451), INT16_C( 22481), -INT16_C( 702), -INT16_C( 12341), INT16_C( 5732) }, { -INT16_C( 10549), INT16_C( 15085), -INT16_C( 21545), -INT16_C( 5958), INT16_C( 15731), INT16_C( 32366), INT16_C( 23432), -INT16_C( 11388) }, { { INT16_C( 3147), -INT16_C( 10549), INT16_C( 31635), -INT16_C( 21545), INT16_C( 22481), INT16_C( 15731), -INT16_C( 12341), INT16_C( 23432) }, { -INT16_C( 14515), INT16_C( 15085), -INT16_C( 20451), -INT16_C( 5958), -INT16_C( 702), INT16_C( 32366), INT16_C( 5732), -INT16_C( 11388) }, }, }, { { -INT16_C( 11929), -INT16_C( 1382), -INT16_C( 18612), INT16_C( 7850), -INT16_C( 5105), -INT16_C( 9701), INT16_C( 32699), -INT16_C( 30992) }, { -INT16_C( 8875), INT16_C( 11456), INT16_C( 31625), -INT16_C( 1003), -INT16_C( 31816), INT16_C( 16506), -INT16_C( 290), INT16_C( 17684) }, { { -INT16_C( 11929), -INT16_C( 8875), -INT16_C( 18612), INT16_C( 31625), -INT16_C( 5105), -INT16_C( 31816), INT16_C( 32699), -INT16_C( 290) }, { -INT16_C( 1382), INT16_C( 11456), INT16_C( 7850), -INT16_C( 1003), -INT16_C( 9701), INT16_C( 16506), -INT16_C( 30992), INT16_C( 17684) }, }, }, { { -INT16_C( 20784), INT16_C( 7231), -INT16_C( 5786), INT16_C( 30010), INT16_C( 21973), -INT16_C( 28337), INT16_C( 16340), INT16_C( 10519) }, { -INT16_C( 10212), -INT16_C( 23210), INT16_C( 27475), INT16_C( 2977), INT16_C( 7406), -INT16_C( 13236), INT16_C( 24602), -INT16_C( 5615) }, { { -INT16_C( 20784), -INT16_C( 10212), -INT16_C( 5786), INT16_C( 27475), INT16_C( 21973), INT16_C( 7406), INT16_C( 16340), INT16_C( 24602) }, { INT16_C( 7231), -INT16_C( 23210), INT16_C( 30010), INT16_C( 2977), -INT16_C( 28337), -INT16_C( 13236), INT16_C( 10519), -INT16_C( 5615) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8x2_t r = simde_vtrnq_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r.val[0], simde_vld1q_s16(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_i16x8(r.val[1], simde_vld1q_s16(test_vec[i].r[1])); } return 0; } static int test_simde_vtrnq_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t b[4]; int32_t r[2][4]; } test_vec[] = { { { INT32_C( 1703801270), -INT32_C( 232086468), INT32_C( 5800579), -INT32_C( 1062703425) }, { -INT32_C( 1987166982), INT32_C( 1610152052), INT32_C( 676881874), INT32_C( 1462420385) }, { { INT32_C( 1703801270), -INT32_C( 1987166982), INT32_C( 5800579), INT32_C( 676881874) }, { -INT32_C( 232086468), INT32_C( 1610152052), -INT32_C( 1062703425), INT32_C( 1462420385) }, }, }, { { -INT32_C( 239290187), -INT32_C( 538712484), INT32_C( 685784169), -INT32_C( 1528198998) }, { INT32_C( 942503876), INT32_C( 1100490351), INT32_C( 745205899), INT32_C( 1753519283) }, { { -INT32_C( 239290187), INT32_C( 942503876), INT32_C( 685784169), INT32_C( 745205899) }, { -INT32_C( 538712484), INT32_C( 1100490351), -INT32_C( 1528198998), INT32_C( 1753519283) }, }, }, { { -INT32_C( 1453768628), -INT32_C( 1870119641), INT32_C( 599287929), -INT32_C( 1261985296) }, { -INT32_C( 1997671400), -INT32_C( 1513519846), INT32_C( 684864373), INT32_C( 345069256) }, { { -INT32_C( 1453768628), -INT32_C( 1997671400), INT32_C( 599287929), INT32_C( 684864373) }, { -INT32_C( 1870119641), -INT32_C( 1513519846), -INT32_C( 1261985296), INT32_C( 345069256) }, }, }, { { -INT32_C( 1111627114), -INT32_C( 1605548505), -INT32_C( 1614608722), -INT32_C( 1068266841) }, { -INT32_C( 1723318145), INT32_C( 977211845), INT32_C( 224596293), -INT32_C( 48106393) }, { { -INT32_C( 1111627114), -INT32_C( 1723318145), -INT32_C( 1614608722), INT32_C( 224596293) }, { -INT32_C( 1605548505), INT32_C( 977211845), -INT32_C( 1068266841), -INT32_C( 48106393) }, }, }, { { INT32_C( 112975838), -INT32_C( 744093659), -INT32_C( 1234015730), INT32_C( 1937164020) }, { -INT32_C( 871514618), INT32_C( 335957199), -INT32_C( 1004443299), INT32_C( 1019298653) }, { { INT32_C( 112975838), -INT32_C( 871514618), -INT32_C( 1234015730), -INT32_C( 1004443299) }, { -INT32_C( 744093659), INT32_C( 335957199), INT32_C( 1937164020), INT32_C( 1019298653) }, }, }, { { INT32_C( 1195539490), -INT32_C( 1826953083), INT32_C( 1196002642), INT32_C( 1505410899) }, { INT32_C( 1294321533), INT32_C( 1885416467), -INT32_C( 214662251), -INT32_C( 399509818) }, { { INT32_C( 1195539490), INT32_C( 1294321533), INT32_C( 1196002642), -INT32_C( 214662251) }, { -INT32_C( 1826953083), INT32_C( 1885416467), INT32_C( 1505410899), -INT32_C( 399509818) }, }, }, { { -INT32_C( 147885710), -INT32_C( 1400157863), INT32_C( 703845590), INT32_C( 293842324) }, { -INT32_C( 2007062411), INT32_C( 1794752724), INT32_C( 157101379), -INT32_C( 1762554845) }, { { -INT32_C( 147885710), -INT32_C( 2007062411), INT32_C( 703845590), INT32_C( 157101379) }, { -INT32_C( 1400157863), INT32_C( 1794752724), INT32_C( 293842324), -INT32_C( 1762554845) }, }, }, { { INT32_C( 1452089597), INT32_C( 1073879145), -INT32_C( 2123762195), INT32_C( 412282019) }, { INT32_C( 1772155285), -INT32_C( 187459151), -INT32_C( 352505657), -INT32_C( 1182732612) }, { { INT32_C( 1452089597), INT32_C( 1772155285), -INT32_C( 2123762195), -INT32_C( 352505657) }, { INT32_C( 1073879145), -INT32_C( 187459151), INT32_C( 412282019), -INT32_C( 1182732612) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4x2_t r = simde_vtrnq_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r.val[0], simde_vld1q_s32(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_i32x4(r.val[1], simde_vld1q_s32(test_vec[i].r[1])); } return 0; } static int test_simde_vtrnq_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[2][16]; } test_vec[] = { { { UINT8_C(125), UINT8_C( 52), UINT8_C(219), UINT8_C( 49), UINT8_C(184), UINT8_C( 70), UINT8_C( 81), UINT8_C( 21), UINT8_C(251), UINT8_C(161), UINT8_C( 96), UINT8_C( 40), UINT8_C( 3), UINT8_C(143), UINT8_C(153), UINT8_C(129) }, { UINT8_C( 6), UINT8_C(209), UINT8_C(203), UINT8_C( 43), UINT8_C(151), UINT8_C( 96), UINT8_C(213), UINT8_C(182), UINT8_C( 61), UINT8_C(132), UINT8_C( 40), UINT8_C( 31), UINT8_C(188), UINT8_C(204), UINT8_C(218), UINT8_C( 57) }, { { UINT8_C(125), UINT8_C( 6), UINT8_C(219), UINT8_C(203), UINT8_C(184), UINT8_C(151), UINT8_C( 81), UINT8_C(213), UINT8_C(251), UINT8_C( 61), UINT8_C( 96), UINT8_C( 40), UINT8_C( 3), UINT8_C(188), UINT8_C(153), UINT8_C(218) }, { UINT8_C( 52), UINT8_C(209), UINT8_C( 49), UINT8_C( 43), UINT8_C( 70), UINT8_C( 96), UINT8_C( 21), UINT8_C(182), UINT8_C(161), UINT8_C(132), UINT8_C( 40), UINT8_C( 31), UINT8_C(143), UINT8_C(204), UINT8_C(129), UINT8_C( 57) }, }, }, { { UINT8_C( 0), UINT8_C(182), UINT8_C(106), UINT8_C(185), UINT8_C(252), UINT8_C(188), UINT8_C(206), UINT8_C(247), UINT8_C( 93), UINT8_C( 46), UINT8_C( 32), UINT8_C( 97), UINT8_C(189), UINT8_C(185), UINT8_C(226), UINT8_C(196) }, { UINT8_C(138), UINT8_C(173), UINT8_C(239), UINT8_C( 33), UINT8_C( 14), UINT8_C(196), UINT8_C(216), UINT8_C( 75), UINT8_C( 72), UINT8_C( 0), UINT8_C(107), UINT8_C( 5), UINT8_C(204), UINT8_C( 69), UINT8_C( 62), UINT8_C(204) }, { { UINT8_C( 0), UINT8_C(138), UINT8_C(106), UINT8_C(239), UINT8_C(252), UINT8_C( 14), UINT8_C(206), UINT8_C(216), UINT8_C( 93), UINT8_C( 72), UINT8_C( 32), UINT8_C(107), UINT8_C(189), UINT8_C(204), UINT8_C(226), UINT8_C( 62) }, { UINT8_C(182), UINT8_C(173), UINT8_C(185), UINT8_C( 33), UINT8_C(188), UINT8_C(196), UINT8_C(247), UINT8_C( 75), UINT8_C( 46), UINT8_C( 0), UINT8_C( 97), UINT8_C( 5), UINT8_C(185), UINT8_C( 69), UINT8_C(196), UINT8_C(204) }, }, }, { { UINT8_C(251), UINT8_C(169), UINT8_C(133), UINT8_C(248), UINT8_C(101), UINT8_C( 83), UINT8_C(239), UINT8_C(194), UINT8_C(130), UINT8_C( 15), UINT8_C( 35), UINT8_C( 63), UINT8_C(200), UINT8_C( 5), UINT8_C( 3), UINT8_C( 82) }, { UINT8_C(179), UINT8_C(243), UINT8_C(116), UINT8_C(193), UINT8_C(183), UINT8_C( 76), UINT8_C( 12), UINT8_C( 0), UINT8_C( 76), UINT8_C(119), UINT8_C( 5), UINT8_C( 24), UINT8_C(189), UINT8_C( 67), UINT8_C(228), UINT8_C(184) }, { { UINT8_C(251), UINT8_C(179), UINT8_C(133), UINT8_C(116), UINT8_C(101), UINT8_C(183), UINT8_C(239), UINT8_C( 12), UINT8_C(130), UINT8_C( 76), UINT8_C( 35), UINT8_C( 5), UINT8_C(200), UINT8_C(189), UINT8_C( 3), UINT8_C(228) }, { UINT8_C(169), UINT8_C(243), UINT8_C(248), UINT8_C(193), UINT8_C( 83), UINT8_C( 76), UINT8_C(194), UINT8_C( 0), UINT8_C( 15), UINT8_C(119), UINT8_C( 63), UINT8_C( 24), UINT8_C( 5), UINT8_C( 67), UINT8_C( 82), UINT8_C(184) }, }, }, { { UINT8_C(236), UINT8_C(106), UINT8_C(176), UINT8_C( 81), UINT8_C(189), UINT8_C(160), UINT8_C( 20), UINT8_C( 63), UINT8_C(175), UINT8_C( 55), UINT8_C(127), UINT8_C(120), UINT8_C( 61), UINT8_C(130), UINT8_C(202), UINT8_C(240) }, { UINT8_C(117), UINT8_C( 62), UINT8_C(177), UINT8_C( 45), UINT8_C(138), UINT8_C(189), UINT8_C( 45), UINT8_C(214), UINT8_C( 53), UINT8_C( 50), UINT8_C(238), UINT8_C(242), UINT8_C(117), UINT8_C(211), UINT8_C(170), UINT8_C( 98) }, { { UINT8_C(236), UINT8_C(117), UINT8_C(176), UINT8_C(177), UINT8_C(189), UINT8_C(138), UINT8_C( 20), UINT8_C( 45), UINT8_C(175), UINT8_C( 53), UINT8_C(127), UINT8_C(238), UINT8_C( 61), UINT8_C(117), UINT8_C(202), UINT8_C(170) }, { UINT8_C(106), UINT8_C( 62), UINT8_C( 81), UINT8_C( 45), UINT8_C(160), UINT8_C(189), UINT8_C( 63), UINT8_C(214), UINT8_C( 55), UINT8_C( 50), UINT8_C(120), UINT8_C(242), UINT8_C(130), UINT8_C(211), UINT8_C(240), UINT8_C( 98) }, }, }, { { UINT8_C( 61), UINT8_C( 91), UINT8_C(179), UINT8_C(250), UINT8_C(251), UINT8_C(199), UINT8_C( 58), UINT8_C(170), UINT8_MAX, UINT8_C(185), UINT8_C( 34), UINT8_C( 60), UINT8_C( 59), UINT8_C(237), UINT8_C( 44), UINT8_C(177) }, { UINT8_C( 43), UINT8_C(221), UINT8_C(222), UINT8_C(182), UINT8_C(154), UINT8_C( 11), UINT8_C(140), UINT8_C(207), UINT8_C( 61), UINT8_C(123), UINT8_C(193), UINT8_C(178), UINT8_C( 78), UINT8_C(108), UINT8_C( 20), UINT8_C(139) }, { { UINT8_C( 61), UINT8_C( 43), UINT8_C(179), UINT8_C(222), UINT8_C(251), UINT8_C(154), UINT8_C( 58), UINT8_C(140), UINT8_MAX, UINT8_C( 61), UINT8_C( 34), UINT8_C(193), UINT8_C( 59), UINT8_C( 78), UINT8_C( 44), UINT8_C( 20) }, { UINT8_C( 91), UINT8_C(221), UINT8_C(250), UINT8_C(182), UINT8_C(199), UINT8_C( 11), UINT8_C(170), UINT8_C(207), UINT8_C(185), UINT8_C(123), UINT8_C( 60), UINT8_C(178), UINT8_C(237), UINT8_C(108), UINT8_C(177), UINT8_C(139) }, }, }, { { UINT8_C(199), UINT8_C(200), UINT8_C(133), UINT8_C(194), UINT8_C(143), UINT8_C(191), UINT8_C(108), UINT8_C(142), UINT8_C(120), UINT8_C(143), UINT8_C(202), UINT8_C(180), UINT8_C(124), UINT8_C(246), UINT8_C(101), UINT8_C(167) }, { UINT8_C(211), UINT8_C( 67), UINT8_C( 93), UINT8_C(110), UINT8_C( 78), UINT8_C(234), UINT8_C( 61), UINT8_C(139), UINT8_C(101), UINT8_MAX, UINT8_C( 61), UINT8_C(179), UINT8_C(107), UINT8_C( 82), UINT8_C( 62), UINT8_C( 50) }, { { UINT8_C(199), UINT8_C(211), UINT8_C(133), UINT8_C( 93), UINT8_C(143), UINT8_C( 78), UINT8_C(108), UINT8_C( 61), UINT8_C(120), UINT8_C(101), UINT8_C(202), UINT8_C( 61), UINT8_C(124), UINT8_C(107), UINT8_C(101), UINT8_C( 62) }, { UINT8_C(200), UINT8_C( 67), UINT8_C(194), UINT8_C(110), UINT8_C(191), UINT8_C(234), UINT8_C(142), UINT8_C(139), UINT8_C(143), UINT8_MAX, UINT8_C(180), UINT8_C(179), UINT8_C(246), UINT8_C( 82), UINT8_C(167), UINT8_C( 50) }, }, }, { { UINT8_C( 26), UINT8_C(195), UINT8_C(244), UINT8_C(169), UINT8_C(131), UINT8_C( 96), UINT8_C( 56), UINT8_C(251), UINT8_C(239), UINT8_C( 2), UINT8_C(175), UINT8_C(107), UINT8_C(249), UINT8_C( 20), UINT8_C( 19), UINT8_C(204) }, { UINT8_C( 87), UINT8_C(112), UINT8_C( 58), UINT8_C(165), UINT8_C( 90), UINT8_C(120), UINT8_C( 48), UINT8_C(191), UINT8_C(119), UINT8_C(110), UINT8_C(114), UINT8_C(226), UINT8_C(192), UINT8_C(176), UINT8_C( 20), UINT8_C(218) }, { { UINT8_C( 26), UINT8_C( 87), UINT8_C(244), UINT8_C( 58), UINT8_C(131), UINT8_C( 90), UINT8_C( 56), UINT8_C( 48), UINT8_C(239), UINT8_C(119), UINT8_C(175), UINT8_C(114), UINT8_C(249), UINT8_C(192), UINT8_C( 19), UINT8_C( 20) }, { UINT8_C(195), UINT8_C(112), UINT8_C(169), UINT8_C(165), UINT8_C( 96), UINT8_C(120), UINT8_C(251), UINT8_C(191), UINT8_C( 2), UINT8_C(110), UINT8_C(107), UINT8_C(226), UINT8_C( 20), UINT8_C(176), UINT8_C(204), UINT8_C(218) }, }, }, { { UINT8_C(116), UINT8_C( 8), UINT8_C(131), UINT8_C(247), UINT8_C(104), UINT8_C(187), UINT8_C(242), UINT8_C( 88), UINT8_C(190), UINT8_C(162), UINT8_C(195), UINT8_C(183), UINT8_C(182), UINT8_C(214), UINT8_C(131), UINT8_C( 14) }, { UINT8_C( 71), UINT8_C(190), UINT8_C(179), UINT8_C(161), UINT8_C( 54), UINT8_C(228), UINT8_C( 97), UINT8_C(173), UINT8_C( 82), UINT8_C(211), UINT8_C(143), UINT8_C( 18), UINT8_C(132), UINT8_C(163), UINT8_C(236), UINT8_C(248) }, { { UINT8_C(116), UINT8_C( 71), UINT8_C(131), UINT8_C(179), UINT8_C(104), UINT8_C( 54), UINT8_C(242), UINT8_C( 97), UINT8_C(190), UINT8_C( 82), UINT8_C(195), UINT8_C(143), UINT8_C(182), UINT8_C(132), UINT8_C(131), UINT8_C(236) }, { UINT8_C( 8), UINT8_C(190), UINT8_C(247), UINT8_C(161), UINT8_C(187), UINT8_C(228), UINT8_C( 88), UINT8_C(173), UINT8_C(162), UINT8_C(211), UINT8_C(183), UINT8_C( 18), UINT8_C(214), UINT8_C(163), UINT8_C( 14), UINT8_C(248) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16x2_t r = simde_vtrnq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r.val[0], simde_vld1q_u8(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_u8x16(r.val[1], simde_vld1q_u8(test_vec[i].r[1])); } return 0; } static int test_simde_vtrnq_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[2][8]; } test_vec[] = { { { UINT16_C(60557), UINT16_C( 2003), UINT16_C(18326), UINT16_C(50944), UINT16_C(20033), UINT16_C(41177), UINT16_C(55949), UINT16_C(55500) }, { UINT16_C(24134), UINT16_C(17611), UINT16_C(40546), UINT16_C(12683), UINT16_C(16844), UINT16_C(15740), UINT16_C(47346), UINT16_C(32687) }, { { UINT16_C(60557), UINT16_C(24134), UINT16_C(18326), UINT16_C(40546), UINT16_C(20033), UINT16_C(16844), UINT16_C(55949), UINT16_C(47346) }, { UINT16_C( 2003), UINT16_C(17611), UINT16_C(50944), UINT16_C(12683), UINT16_C(41177), UINT16_C(15740), UINT16_C(55500), UINT16_C(32687) }, }, }, { { UINT16_C(33444), UINT16_C(14982), UINT16_C(34505), UINT16_C( 2818), UINT16_C(56277), UINT16_C(25259), UINT16_C(30645), UINT16_C(64314) }, { UINT16_C( 1493), UINT16_C(14399), UINT16_C(52132), UINT16_C(28777), UINT16_C(58636), UINT16_C(65453), UINT16_C(23709), UINT16_C(16766) }, { { UINT16_C(33444), UINT16_C( 1493), UINT16_C(34505), UINT16_C(52132), UINT16_C(56277), UINT16_C(58636), UINT16_C(30645), UINT16_C(23709) }, { UINT16_C(14982), UINT16_C(14399), UINT16_C( 2818), UINT16_C(28777), UINT16_C(25259), UINT16_C(65453), UINT16_C(64314), UINT16_C(16766) }, }, }, { { UINT16_C( 1502), UINT16_C(43132), UINT16_C(32395), UINT16_C(24755), UINT16_C(24153), UINT16_C( 3779), UINT16_C(64981), UINT16_C(43786) }, { UINT16_C(18691), UINT16_C(42979), UINT16_C(19476), UINT16_C( 8471), UINT16_C(50225), UINT16_C(53024), UINT16_C(40480), UINT16_C(65040) }, { { UINT16_C( 1502), UINT16_C(18691), UINT16_C(32395), UINT16_C(19476), UINT16_C(24153), UINT16_C(50225), UINT16_C(64981), UINT16_C(40480) }, { UINT16_C(43132), UINT16_C(42979), UINT16_C(24755), UINT16_C( 8471), UINT16_C( 3779), UINT16_C(53024), UINT16_C(43786), UINT16_C(65040) }, }, }, { { UINT16_C(36003), UINT16_C(12198), UINT16_C(22794), UINT16_C(25487), UINT16_C(21175), UINT16_C(36210), UINT16_C(31824), UINT16_C(21304) }, { UINT16_C( 7109), UINT16_C(56058), UINT16_C( 4455), UINT16_C(39163), UINT16_C( 7125), UINT16_C(62823), UINT16_C(30905), UINT16_C(24051) }, { { UINT16_C(36003), UINT16_C( 7109), UINT16_C(22794), UINT16_C( 4455), UINT16_C(21175), UINT16_C( 7125), UINT16_C(31824), UINT16_C(30905) }, { UINT16_C(12198), UINT16_C(56058), UINT16_C(25487), UINT16_C(39163), UINT16_C(36210), UINT16_C(62823), UINT16_C(21304), UINT16_C(24051) }, }, }, { { UINT16_C(39428), UINT16_C( 3980), UINT16_C( 7155), UINT16_C(43890), UINT16_C(58478), UINT16_C(48696), UINT16_C(28768), UINT16_C( 9745) }, { UINT16_C( 2955), UINT16_C(61952), UINT16_C(64284), UINT16_C(61834), UINT16_C(61974), UINT16_C(53222), UINT16_C(55658), UINT16_C(28204) }, { { UINT16_C(39428), UINT16_C( 2955), UINT16_C( 7155), UINT16_C(64284), UINT16_C(58478), UINT16_C(61974), UINT16_C(28768), UINT16_C(55658) }, { UINT16_C( 3980), UINT16_C(61952), UINT16_C(43890), UINT16_C(61834), UINT16_C(48696), UINT16_C(53222), UINT16_C( 9745), UINT16_C(28204) }, }, }, { { UINT16_C(47219), UINT16_C(26493), UINT16_C(61652), UINT16_C(16914), UINT16_C(19156), UINT16_C(13568), UINT16_C( 4538), UINT16_C(17755) }, { UINT16_C(23324), UINT16_C(14391), UINT16_C(49494), UINT16_C(27689), UINT16_C( 4019), UINT16_C( 7483), UINT16_C(26856), UINT16_C(23692) }, { { UINT16_C(47219), UINT16_C(23324), UINT16_C(61652), UINT16_C(49494), UINT16_C(19156), UINT16_C( 4019), UINT16_C( 4538), UINT16_C(26856) }, { UINT16_C(26493), UINT16_C(14391), UINT16_C(16914), UINT16_C(27689), UINT16_C(13568), UINT16_C( 7483), UINT16_C(17755), UINT16_C(23692) }, }, }, { { UINT16_C( 2336), UINT16_C(62659), UINT16_C(54777), UINT16_C(52790), UINT16_C(13855), UINT16_C(55555), UINT16_C(24135), UINT16_C(25374) }, { UINT16_C(21945), UINT16_C( 3995), UINT16_C(50198), UINT16_C(51835), UINT16_C(46803), UINT16_C(48359), UINT16_C(29470), UINT16_C(16152) }, { { UINT16_C( 2336), UINT16_C(21945), UINT16_C(54777), UINT16_C(50198), UINT16_C(13855), UINT16_C(46803), UINT16_C(24135), UINT16_C(29470) }, { UINT16_C(62659), UINT16_C( 3995), UINT16_C(52790), UINT16_C(51835), UINT16_C(55555), UINT16_C(48359), UINT16_C(25374), UINT16_C(16152) }, }, }, { { UINT16_C(56189), UINT16_C(30259), UINT16_C(27312), UINT16_C(53060), UINT16_C(18336), UINT16_C(59560), UINT16_C(50853), UINT16_C(24139) }, { UINT16_C(59163), UINT16_C(12653), UINT16_C(59563), UINT16_C(32763), UINT16_C(58271), UINT16_C(48443), UINT16_C(21334), UINT16_C(54268) }, { { UINT16_C(56189), UINT16_C(59163), UINT16_C(27312), UINT16_C(59563), UINT16_C(18336), UINT16_C(58271), UINT16_C(50853), UINT16_C(21334) }, { UINT16_C(30259), UINT16_C(12653), UINT16_C(53060), UINT16_C(32763), UINT16_C(59560), UINT16_C(48443), UINT16_C(24139), UINT16_C(54268) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8x2_t r = simde_vtrnq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r.val[0], simde_vld1q_u16(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_u16x8(r.val[1], simde_vld1q_u16(test_vec[i].r[1])); } return 0; } static int test_simde_vtrnq_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[2][4]; } test_vec[] = { { { UINT32_C(1309098840), UINT32_C( 298067663), UINT32_C( 699572772), UINT32_C( 489541366) }, { UINT32_C(2219089279), UINT32_C(2283573183), UINT32_C(1328363319), UINT32_C( 765180373) }, { { UINT32_C(1309098840), UINT32_C(2219089279), UINT32_C( 699572772), UINT32_C(1328363319) }, { UINT32_C( 298067663), UINT32_C(2283573183), UINT32_C( 489541366), UINT32_C( 765180373) }, }, }, { { UINT32_C(3430654716), UINT32_C(3973922760), UINT32_C(3608514529), UINT32_C(3723838301) }, { UINT32_C(2673949152), UINT32_C( 2588105), UINT32_C(2303743156), UINT32_C( 179759885) }, { { UINT32_C(3430654716), UINT32_C(2673949152), UINT32_C(3608514529), UINT32_C(2303743156) }, { UINT32_C(3973922760), UINT32_C( 2588105), UINT32_C(3723838301), UINT32_C( 179759885) }, }, }, { { UINT32_C(1456877965), UINT32_C(1380103024), UINT32_C(2670286914), UINT32_C(2071731867) }, { UINT32_C( 555408728), UINT32_C( 237060698), UINT32_C(2761388438), UINT32_C(3937291612) }, { { UINT32_C(1456877965), UINT32_C( 555408728), UINT32_C(2670286914), UINT32_C(2761388438) }, { UINT32_C(1380103024), UINT32_C( 237060698), UINT32_C(2071731867), UINT32_C(3937291612) }, }, }, { { UINT32_C(4013982846), UINT32_C(2034336311), UINT32_C(1964534490), UINT32_C(3790640521) }, { UINT32_C(3439463282), UINT32_C(3822789453), UINT32_C(4052186005), UINT32_C(1071330752) }, { { UINT32_C(4013982846), UINT32_C(3439463282), UINT32_C(1964534490), UINT32_C(4052186005) }, { UINT32_C(2034336311), UINT32_C(3822789453), UINT32_C(3790640521), UINT32_C(1071330752) }, }, }, { { UINT32_C(4029553593), UINT32_C(2020175774), UINT32_C(1659798233), UINT32_C(2302926359) }, { UINT32_C( 911623657), UINT32_C(4263129705), UINT32_C(1710203301), UINT32_C(2426719191) }, { { UINT32_C(4029553593), UINT32_C( 911623657), UINT32_C(1659798233), UINT32_C(1710203301) }, { UINT32_C(2020175774), UINT32_C(4263129705), UINT32_C(2302926359), UINT32_C(2426719191) }, }, }, { { UINT32_C(2223100646), UINT32_C( 469625409), UINT32_C(2206067564), UINT32_C(3004023241) }, { UINT32_C(1877566214), UINT32_C( 980222869), UINT32_C(2090884517), UINT32_C( 235684904) }, { { UINT32_C(2223100646), UINT32_C(1877566214), UINT32_C(2206067564), UINT32_C(2090884517) }, { UINT32_C( 469625409), UINT32_C( 980222869), UINT32_C(3004023241), UINT32_C( 235684904) }, }, }, { { UINT32_C(1486064919), UINT32_C(3832778872), UINT32_C(1147728251), UINT32_C(3103225266) }, { UINT32_C(1848173016), UINT32_C(2309527012), UINT32_C( 436553970), UINT32_C(2754155149) }, { { UINT32_C(1486064919), UINT32_C(1848173016), UINT32_C(1147728251), UINT32_C( 436553970) }, { UINT32_C(3832778872), UINT32_C(2309527012), UINT32_C(3103225266), UINT32_C(2754155149) }, }, }, { { UINT32_C( 402439327), UINT32_C(3355209804), UINT32_C( 319513697), UINT32_C(2982872025) }, { UINT32_C(3357537252), UINT32_C(2069022857), UINT32_C(2643875600), UINT32_C( 155303785) }, { { UINT32_C( 402439327), UINT32_C(3357537252), UINT32_C( 319513697), UINT32_C(2643875600) }, { UINT32_C(3355209804), UINT32_C(2069022857), UINT32_C(2982872025), UINT32_C( 155303785) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4x2_t r = simde_vtrnq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r.val[0], simde_vld1q_u32(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_u32x4(r.val[1], simde_vld1q_u32(test_vec[i].r[1])); } return 0; } #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_TEST_FUNC_LIST_BEGIN #if !defined(SIMDE_BUG_INTEL_857088) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vtrnq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vtrnq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vtrnq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vtrnq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vtrnq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vtrnq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vtrnq_u32) #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/arm/neon/trn1.c000066400000000000000000001547001400333146700163560ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN trn1 #include #include static int test_simde_vtrn1_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[2]; simde_float32 b[2]; simde_float32 r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( -734.98), SIMDE_FLOAT32_C( 365.45) }, { SIMDE_FLOAT32_C( -158.72), SIMDE_FLOAT32_C( 801.49) }, { SIMDE_FLOAT32_C( -734.98), SIMDE_FLOAT32_C( -158.72) } }, { { SIMDE_FLOAT32_C( -244.32), SIMDE_FLOAT32_C( 246.47) }, { SIMDE_FLOAT32_C( -998.98), SIMDE_FLOAT32_C( -876.14) }, { SIMDE_FLOAT32_C( -244.32), SIMDE_FLOAT32_C( -998.98) } }, { { SIMDE_FLOAT32_C( 821.58), SIMDE_FLOAT32_C( 188.36) }, { SIMDE_FLOAT32_C( 526.46), SIMDE_FLOAT32_C( 806.12) }, { SIMDE_FLOAT32_C( 821.58), SIMDE_FLOAT32_C( 526.46) } }, { { SIMDE_FLOAT32_C( 231.60), SIMDE_FLOAT32_C( -192.02) }, { SIMDE_FLOAT32_C( -230.26), SIMDE_FLOAT32_C( -950.64) }, { SIMDE_FLOAT32_C( 231.60), SIMDE_FLOAT32_C( -230.26) } }, { { SIMDE_FLOAT32_C( 514.95), SIMDE_FLOAT32_C( -326.95) }, { SIMDE_FLOAT32_C( -930.10), SIMDE_FLOAT32_C( 113.30) }, { SIMDE_FLOAT32_C( 514.95), SIMDE_FLOAT32_C( -930.10) } }, { { SIMDE_FLOAT32_C( 562.02), SIMDE_FLOAT32_C( -958.09) }, { SIMDE_FLOAT32_C( 753.36), SIMDE_FLOAT32_C( -955.88) }, { SIMDE_FLOAT32_C( 562.02), SIMDE_FLOAT32_C( 753.36) } }, { { SIMDE_FLOAT32_C( 690.44), SIMDE_FLOAT32_C( -991.78) }, { SIMDE_FLOAT32_C( 276.78), SIMDE_FLOAT32_C( 537.69) }, { SIMDE_FLOAT32_C( 690.44), SIMDE_FLOAT32_C( 276.78) } }, { { SIMDE_FLOAT32_C( -86.37), SIMDE_FLOAT32_C( -105.16) }, { SIMDE_FLOAT32_C( 319.11), SIMDE_FLOAT32_C( 178.65) }, { SIMDE_FLOAT32_C( -86.37), SIMDE_FLOAT32_C( 319.11) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x2_t r = simde_vtrn1_f32(a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vtrn1_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int8_t b[8]; int8_t r[8]; } test_vec[] = { { { -INT8_C( 60), -INT8_C( 45), -INT8_C( 88), -INT8_C( 68), -INT8_C( 80), INT8_C( 32), INT8_C( 27), -INT8_C( 59) }, { INT8_C( 0), INT8_C( 78), -INT8_C( 30), -INT8_C( 82), -INT8_C( 119), INT8_C( 46), INT8_C( 17), INT8_C( 49) }, { -INT8_C( 60), INT8_C( 0), -INT8_C( 88), -INT8_C( 30), -INT8_C( 80), -INT8_C( 119), INT8_C( 27), INT8_C( 17) } }, { { INT8_C( 101), INT8_C( 29), INT8_C( 88), INT8_C( 118), -INT8_C( 106), -INT8_C( 13), -INT8_C( 99), INT8_C( 125) }, { INT8_C( 71), -INT8_C( 38), INT8_C( 116), -INT8_C( 26), -INT8_C( 54), -INT8_C( 111), INT8_C( 13), -INT8_C( 114) }, { INT8_C( 101), INT8_C( 71), INT8_C( 88), INT8_C( 116), -INT8_C( 106), -INT8_C( 54), -INT8_C( 99), INT8_C( 13) } }, { { INT8_C( 101), -INT8_C( 75), INT8_C( 75), INT8_C( 21), -INT8_C( 43), INT8_C( 102), -INT8_C( 37), -INT8_C( 42) }, { -INT8_C( 75), -INT8_C( 67), -INT8_C( 124), INT8_C( 62), -INT8_C( 20), -INT8_C( 107), INT8_C( 112), INT8_C( 81) }, { INT8_C( 101), -INT8_C( 75), INT8_C( 75), -INT8_C( 124), -INT8_C( 43), -INT8_C( 20), -INT8_C( 37), INT8_C( 112) } }, { { -INT8_C( 77), -INT8_C( 56), -INT8_C( 57), INT8_C( 73), -INT8_C( 69), INT8_C( 100), -INT8_C( 58), INT8_C( 2) }, { INT8_C( 62), INT8_C( 58), -INT8_C( 23), INT8_C( 8), -INT8_C( 52), -INT8_C( 10), -INT8_C( 105), INT8_C( 49) }, { -INT8_C( 77), INT8_C( 62), -INT8_C( 57), -INT8_C( 23), -INT8_C( 69), -INT8_C( 52), -INT8_C( 58), -INT8_C( 105) } }, { { -INT8_C( 84), -INT8_C( 30), INT8_C( 70), -INT8_C( 127), INT8_C( 72), INT8_C( 33), INT8_C( 87), -INT8_C( 3) }, { -INT8_C( 33), -INT8_C( 37), INT8_C( 60), -INT8_C( 53), INT8_C( 113), -INT8_C( 84), INT8_C( 28), INT8_C( 36) }, { -INT8_C( 84), -INT8_C( 33), INT8_C( 70), INT8_C( 60), INT8_C( 72), INT8_C( 113), INT8_C( 87), INT8_C( 28) } }, { { INT8_C( 116), -INT8_C( 29), INT8_C( 109), INT8_C( 47), INT8_C( 71), INT8_C( 51), INT8_C( 50), -INT8_C( 123) }, { INT8_C( 110), INT8_C( 27), -INT8_C( 115), INT8_C( 58), INT8_C( 17), INT8_C( 36), INT8_C( 107), -INT8_C( 67) }, { INT8_C( 116), INT8_C( 110), INT8_C( 109), -INT8_C( 115), INT8_C( 71), INT8_C( 17), INT8_C( 50), INT8_C( 107) } }, { { INT8_C( 6), -INT8_C( 79), INT8_C( 63), INT8_C( 79), -INT8_C( 45), -INT8_C( 106), INT8_C( 76), -INT8_C( 78) }, { INT8_C( 114), -INT8_C( 120), INT8_C( 125), -INT8_C( 29), INT8_C( 52), -INT8_C( 103), INT8_C( 7), -INT8_C( 88) }, { INT8_C( 6), INT8_C( 114), INT8_C( 63), INT8_C( 125), -INT8_C( 45), INT8_C( 52), INT8_C( 76), INT8_C( 7) } }, { { INT8_C( 124), INT8_C( 116), -INT8_C( 40), -INT8_C( 61), -INT8_C( 89), INT8_C( 10), INT8_C( 72), INT8_C( 21) }, { INT8_C( 37), -INT8_C( 43), INT8_C( 79), INT8_C( 54), -INT8_C( 6), -INT8_C( 70), -INT8_C( 12), INT8_C( 0) }, { INT8_C( 124), INT8_C( 37), -INT8_C( 40), INT8_C( 79), -INT8_C( 89), -INT8_C( 6), INT8_C( 72), -INT8_C( 12) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vtrn1_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; } static int test_simde_vtrn1_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { { { -INT16_C( 12600), INT16_C( 17726), INT16_C( 25964), -INT16_C( 31246) }, { INT16_C( 29446), INT16_C( 19730), INT16_C( 5918), INT16_C( 11654) }, { -INT16_C( 12600), INT16_C( 29446), INT16_C( 25964), INT16_C( 5918) } }, { { INT16_C( 20514), INT16_C( 22734), -INT16_C( 2490), -INT16_C( 23811) }, { -INT16_C( 13108), INT16_C( 28476), -INT16_C( 13288), -INT16_C( 8175) }, { INT16_C( 20514), -INT16_C( 13108), -INT16_C( 2490), -INT16_C( 13288) } }, { { INT16_C( 20378), INT16_C( 1574), INT16_C( 6324), -INT16_C( 17781) }, { -INT16_C( 24948), -INT16_C( 22008), -INT16_C( 29003), -INT16_C( 10280) }, { INT16_C( 20378), -INT16_C( 24948), INT16_C( 6324), -INT16_C( 29003) } }, { { -INT16_C( 22817), INT16_C( 9519), INT16_C( 11676), INT16_C( 26823) }, { INT16_C( 1273), INT16_C( 4823), -INT16_C( 5936), INT16_C( 27378) }, { -INT16_C( 22817), INT16_C( 1273), INT16_C( 11676), -INT16_C( 5936) } }, { { INT16_C( 6199), -INT16_C( 5264), -INT16_C( 975), -INT16_C( 16986) }, { -INT16_C( 20838), INT16_C( 20327), INT16_C( 16188), INT16_C( 6951) }, { INT16_C( 6199), -INT16_C( 20838), -INT16_C( 975), INT16_C( 16188) } }, { { INT16_C( 22245), -INT16_C( 32447), INT16_C( 2179), INT16_C( 32233) }, { -INT16_C( 16372), -INT16_C( 9073), -INT16_C( 32344), -INT16_C( 8378) }, { INT16_C( 22245), -INT16_C( 16372), INT16_C( 2179), -INT16_C( 32344) } }, { { -INT16_C( 18534), -INT16_C( 13365), INT16_C( 29107), INT16_C( 19848) }, { -INT16_C( 4321), INT16_C( 23452), -INT16_C( 15569), INT16_C( 5239) }, { -INT16_C( 18534), -INT16_C( 4321), INT16_C( 29107), -INT16_C( 15569) } }, { { -INT16_C( 18406), -INT16_C( 25194), INT16_C( 32704), -INT16_C( 13030) }, { -INT16_C( 22208), -INT16_C( 5975), -INT16_C( 4053), -INT16_C( 14904) }, { -INT16_C( 18406), -INT16_C( 22208), INT16_C( 32704), -INT16_C( 4053) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_vtrn1_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; } static int test_simde_vtrn1_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { { { -INT32_C( 2113992025), INT32_C( 1335496731) }, { -INT32_C( 441354692), -INT32_C( 150353327) }, { -INT32_C( 2113992025), -INT32_C( 441354692) } }, { { INT32_C( 131230526), -INT32_C( 1810676118) }, { INT32_C( 2093845), -INT32_C( 1520903682) }, { INT32_C( 131230526), INT32_C( 2093845) } }, { { -INT32_C( 14264092), -INT32_C( 1538342552) }, { -INT32_C( 2004287433), INT32_C( 159421386) }, { -INT32_C( 14264092), -INT32_C( 2004287433) } }, { { INT32_C( 1762677502), -INT32_C( 1375919208) }, { INT32_C( 363666710), -INT32_C( 625342986) }, { INT32_C( 1762677502), INT32_C( 363666710) } }, { { -INT32_C( 975511203), -INT32_C( 647419742) }, { -INT32_C( 211684568), -INT32_C( 2063801978) }, { -INT32_C( 975511203), -INT32_C( 211684568) } }, { { -INT32_C( 873657293), INT32_C( 1182329647) }, { -INT32_C( 27581176), -INT32_C( 1999039189) }, { -INT32_C( 873657293), -INT32_C( 27581176) } }, { { -INT32_C( 1739672586), INT32_C( 74561499) }, { INT32_C( 821547946), -INT32_C( 390728780) }, { -INT32_C( 1739672586), INT32_C( 821547946) } }, { { INT32_C( 783524607), -INT32_C( 1787548531) }, { INT32_C( 2090127185), -INT32_C( 620401179) }, { INT32_C( 783524607), INT32_C( 2090127185) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_vtrn1_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; } static int test_simde_vtrn1_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; uint8_t b[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C(180), UINT8_C(234), UINT8_C( 38), UINT8_C(184), UINT8_C(112), UINT8_C( 40), UINT8_C(124), UINT8_C( 20) }, { UINT8_C( 30), UINT8_C(250), UINT8_C( 91), UINT8_C( 78), UINT8_C(166), UINT8_C(229), UINT8_C(128), UINT8_C(117) }, { UINT8_C(180), UINT8_C( 30), UINT8_C( 38), UINT8_C( 91), UINT8_C(112), UINT8_C(166), UINT8_C(124), UINT8_C(128) } }, { { UINT8_C( 51), UINT8_C(233), UINT8_C(232), UINT8_C( 60), UINT8_C(220), UINT8_C( 27), UINT8_C(182), UINT8_C(252) }, { UINT8_C( 75), UINT8_C( 64), UINT8_C(212), UINT8_C(237), UINT8_C( 88), UINT8_C( 50), UINT8_C( 94), UINT8_C( 12) }, { UINT8_C( 51), UINT8_C( 75), UINT8_C(232), UINT8_C(212), UINT8_C(220), UINT8_C( 88), UINT8_C(182), UINT8_C( 94) } }, { { UINT8_C( 28), UINT8_C(133), UINT8_C(196), UINT8_C(140), UINT8_C(173), UINT8_C( 64), UINT8_C(161), UINT8_C(204) }, { UINT8_C( 59), UINT8_C(252), UINT8_C( 26), UINT8_C(225), UINT8_C(225), UINT8_C(154), UINT8_C( 86), UINT8_C( 21) }, { UINT8_C( 28), UINT8_C( 59), UINT8_C(196), UINT8_C( 26), UINT8_C(173), UINT8_C(225), UINT8_C(161), UINT8_C( 86) } }, { { UINT8_C(132), UINT8_C( 62), UINT8_C( 81), UINT8_C( 96), UINT8_C( 89), UINT8_C( 7), UINT8_C( 92), UINT8_C(164) }, { UINT8_C( 71), UINT8_C( 49), UINT8_C(145), UINT8_C(159), UINT8_C( 99), UINT8_C(240), UINT8_C(171), UINT8_C(127) }, { UINT8_C(132), UINT8_C( 71), UINT8_C( 81), UINT8_C(145), UINT8_C( 89), UINT8_C( 99), UINT8_C( 92), UINT8_C(171) } }, { { UINT8_C(117), UINT8_C(111), UINT8_C( 12), UINT8_C( 34), UINT8_C(176), UINT8_C(173), UINT8_C(238), UINT8_C(235) }, { UINT8_C(169), UINT8_C( 9), UINT8_C(204), UINT8_C(138), UINT8_C(163), UINT8_C( 34), UINT8_C(159), UINT8_C( 39) }, { UINT8_C(117), UINT8_C(169), UINT8_C( 12), UINT8_C(204), UINT8_C(176), UINT8_C(163), UINT8_C(238), UINT8_C(159) } }, { { UINT8_C( 97), UINT8_C(240), UINT8_C(136), UINT8_C(186), UINT8_C(247), UINT8_C(228), UINT8_C( 95), UINT8_C( 62) }, { UINT8_C( 21), UINT8_C(240), UINT8_C(221), UINT8_C(120), UINT8_C(224), UINT8_C(136), UINT8_C(248), UINT8_C( 85) }, { UINT8_C( 97), UINT8_C( 21), UINT8_C(136), UINT8_C(221), UINT8_C(247), UINT8_C(224), UINT8_C( 95), UINT8_C(248) } }, { { UINT8_C(248), UINT8_C( 4), UINT8_C(120), UINT8_C(168), UINT8_C(177), UINT8_C(102), UINT8_C(147), UINT8_C( 90) }, { UINT8_C(111), UINT8_C( 95), UINT8_C(228), UINT8_C( 19), UINT8_C(129), UINT8_C(132), UINT8_C( 58), UINT8_C(226) }, { UINT8_C(248), UINT8_C(111), UINT8_C(120), UINT8_C(228), UINT8_C(177), UINT8_C(129), UINT8_C(147), UINT8_C( 58) } }, { { UINT8_C(116), UINT8_C(194), UINT8_C(157), UINT8_C(108), UINT8_C(167), UINT8_C(252), UINT8_C(170), UINT8_C(188) }, { UINT8_C(236), UINT8_C(136), UINT8_C( 53), UINT8_C(205), UINT8_C( 16), UINT8_C( 45), UINT8_C( 34), UINT8_C( 8) }, { UINT8_C(116), UINT8_C(236), UINT8_C(157), UINT8_C( 53), UINT8_C(167), UINT8_C( 16), UINT8_C(170), UINT8_C( 34) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vtrn1_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; } static int test_simde_vtrn1_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(60788), UINT16_C(40466), UINT16_C(30557), UINT16_C(47308) }, { UINT16_C(27512), UINT16_C( 3061), UINT16_C(41915), UINT16_C(15633) }, { UINT16_C(60788), UINT16_C(27512), UINT16_C(30557), UINT16_C(41915) } }, { { UINT16_C(31826), UINT16_C(40911), UINT16_C(11794), UINT16_C( 5983) }, { UINT16_C(15515), UINT16_C(15720), UINT16_C(44460), UINT16_C( 8198) }, { UINT16_C(31826), UINT16_C(15515), UINT16_C(11794), UINT16_C(44460) } }, { { UINT16_C( 6298), UINT16_C(63678), UINT16_C(35471), UINT16_C( 1968) }, { UINT16_C(42485), UINT16_C(45331), UINT16_C( 9289), UINT16_C(39918) }, { UINT16_C( 6298), UINT16_C(42485), UINT16_C(35471), UINT16_C( 9289) } }, { { UINT16_C(48544), UINT16_C(45627), UINT16_C(39659), UINT16_C(34762) }, { UINT16_C(13015), UINT16_C(33732), UINT16_C(51936), UINT16_C(31395) }, { UINT16_C(48544), UINT16_C(13015), UINT16_C(39659), UINT16_C(51936) } }, { { UINT16_C(25058), UINT16_C(29298), UINT16_C( 8940), UINT16_C(57721) }, { UINT16_C(36040), UINT16_C( 4498), UINT16_C(32944), UINT16_C(20908) }, { UINT16_C(25058), UINT16_C(36040), UINT16_C( 8940), UINT16_C(32944) } }, { { UINT16_C(59197), UINT16_C(10499), UINT16_C(52610), UINT16_C(22960) }, { UINT16_C(29696), UINT16_C(57564), UINT16_C(32575), UINT16_C( 8538) }, { UINT16_C(59197), UINT16_C(29696), UINT16_C(52610), UINT16_C(32575) } }, { { UINT16_C(52704), UINT16_C(52371), UINT16_C( 3567), UINT16_C(47022) }, { UINT16_C(16537), UINT16_C(19144), UINT16_C(30145), UINT16_C(65179) }, { UINT16_C(52704), UINT16_C(16537), UINT16_C( 3567), UINT16_C(30145) } }, { { UINT16_C(40540), UINT16_C(56871), UINT16_C(55148), UINT16_C(27703) }, { UINT16_C( 4940), UINT16_C(35660), UINT16_C(42642), UINT16_C(29612) }, { UINT16_C(40540), UINT16_C( 4940), UINT16_C(55148), UINT16_C(42642) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vtrn1_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; } static int test_simde_vtrn1_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C( 434122982), UINT32_C(2452681820) }, { UINT32_C(2633101907), UINT32_C(3594086343) }, { UINT32_C( 434122982), UINT32_C(2633101907) } }, { { UINT32_C(3976061469), UINT32_C(1299237744) }, { UINT32_C(3511335039), UINT32_C(2688013498) }, { UINT32_C(3976061469), UINT32_C(3511335039) } }, { { UINT32_C(1622808580), UINT32_C(1626532364) }, { UINT32_C(2768036828), UINT32_C(1736062282) }, { UINT32_C(1622808580), UINT32_C(2768036828) } }, { { UINT32_C(2270394135), UINT32_C(3251946562) }, { UINT32_C( 915545980), UINT32_C(4158049011) }, { UINT32_C(2270394135), UINT32_C( 915545980) } }, { { UINT32_C(3998716130), UINT32_C(1464748410) }, { UINT32_C(1996180012), UINT32_C(2547873151) }, { UINT32_C(3998716130), UINT32_C(1996180012) } }, { { UINT32_C( 773730540), UINT32_C(1894773748) }, { UINT32_C( 94798354), UINT32_C( 788299084) }, { UINT32_C( 773730540), UINT32_C( 94798354) } }, { { UINT32_C(2283557645), UINT32_C(3370085276) }, { UINT32_C( 893311669), UINT32_C(1003232079) }, { UINT32_C(2283557645), UINT32_C( 893311669) } }, { { UINT32_C(1080683083), UINT32_C(4021311709) }, { UINT32_C( 653547482), UINT32_C(3780440276) }, { UINT32_C(1080683083), UINT32_C( 653547482) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vtrn1_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; } static int test_simde_vtrn1q_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; simde_float32 b[4]; simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -522.18), SIMDE_FLOAT32_C( 365.36), SIMDE_FLOAT32_C( -879.25), SIMDE_FLOAT32_C( -911.32) }, { SIMDE_FLOAT32_C( 508.26), SIMDE_FLOAT32_C( 535.56), SIMDE_FLOAT32_C( -927.14), SIMDE_FLOAT32_C( -145.00) }, { SIMDE_FLOAT32_C( -522.18), SIMDE_FLOAT32_C( 508.26), SIMDE_FLOAT32_C( -879.25), SIMDE_FLOAT32_C( -927.14) } }, { { SIMDE_FLOAT32_C( -644.24), SIMDE_FLOAT32_C( 758.58), SIMDE_FLOAT32_C( 592.05), SIMDE_FLOAT32_C( 670.85) }, { SIMDE_FLOAT32_C( 408.23), SIMDE_FLOAT32_C( -493.59), SIMDE_FLOAT32_C( -660.12), SIMDE_FLOAT32_C( 163.68) }, { SIMDE_FLOAT32_C( -644.24), SIMDE_FLOAT32_C( 408.23), SIMDE_FLOAT32_C( 592.05), SIMDE_FLOAT32_C( -660.12) } }, { { SIMDE_FLOAT32_C( -787.94), SIMDE_FLOAT32_C( -882.32), SIMDE_FLOAT32_C( 367.24), SIMDE_FLOAT32_C( -16.83) }, { SIMDE_FLOAT32_C( -482.56), SIMDE_FLOAT32_C( -96.42), SIMDE_FLOAT32_C( -722.75), SIMDE_FLOAT32_C( -446.06) }, { SIMDE_FLOAT32_C( -787.94), SIMDE_FLOAT32_C( -482.56), SIMDE_FLOAT32_C( 367.24), SIMDE_FLOAT32_C( -722.75) } }, { { SIMDE_FLOAT32_C( -23.04), SIMDE_FLOAT32_C( 914.12), SIMDE_FLOAT32_C( 495.70), SIMDE_FLOAT32_C( 665.01) }, { SIMDE_FLOAT32_C( 292.03), SIMDE_FLOAT32_C( -265.04), SIMDE_FLOAT32_C( 537.61), SIMDE_FLOAT32_C( 769.84) }, { SIMDE_FLOAT32_C( -23.04), SIMDE_FLOAT32_C( 292.03), SIMDE_FLOAT32_C( 495.70), SIMDE_FLOAT32_C( 537.61) } }, { { SIMDE_FLOAT32_C( -899.68), SIMDE_FLOAT32_C( 658.36), SIMDE_FLOAT32_C( 858.52), SIMDE_FLOAT32_C( 608.58) }, { SIMDE_FLOAT32_C( 193.92), SIMDE_FLOAT32_C( 931.38), SIMDE_FLOAT32_C( -536.43), SIMDE_FLOAT32_C( 549.69) }, { SIMDE_FLOAT32_C( -899.68), SIMDE_FLOAT32_C( 193.92), SIMDE_FLOAT32_C( 858.52), SIMDE_FLOAT32_C( -536.43) } }, { { SIMDE_FLOAT32_C( 689.97), SIMDE_FLOAT32_C( -944.37), SIMDE_FLOAT32_C( 220.54), SIMDE_FLOAT32_C( 98.20) }, { SIMDE_FLOAT32_C( -437.96), SIMDE_FLOAT32_C( 560.42), SIMDE_FLOAT32_C( -738.12), SIMDE_FLOAT32_C( -225.90) }, { SIMDE_FLOAT32_C( 689.97), SIMDE_FLOAT32_C( -437.96), SIMDE_FLOAT32_C( 220.54), SIMDE_FLOAT32_C( -738.12) } }, { { SIMDE_FLOAT32_C( 678.09), SIMDE_FLOAT32_C( 629.12), SIMDE_FLOAT32_C( 757.27), SIMDE_FLOAT32_C( -804.46) }, { SIMDE_FLOAT32_C( -467.30), SIMDE_FLOAT32_C( -965.48), SIMDE_FLOAT32_C( -250.52), SIMDE_FLOAT32_C( 509.67) }, { SIMDE_FLOAT32_C( 678.09), SIMDE_FLOAT32_C( -467.30), SIMDE_FLOAT32_C( 757.27), SIMDE_FLOAT32_C( -250.52) } }, { { SIMDE_FLOAT32_C( 948.64), SIMDE_FLOAT32_C( -754.82), SIMDE_FLOAT32_C( 174.67), SIMDE_FLOAT32_C( 240.67) }, { SIMDE_FLOAT32_C( -19.85), SIMDE_FLOAT32_C( -287.72), SIMDE_FLOAT32_C( 10.52), SIMDE_FLOAT32_C( 80.47) }, { SIMDE_FLOAT32_C( 948.64), SIMDE_FLOAT32_C( -19.85), SIMDE_FLOAT32_C( 174.67), SIMDE_FLOAT32_C( 10.52) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r = simde_vtrn1q_f32(a, b); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vtrn1q_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[2]; simde_float64 b[2]; simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -499.12), SIMDE_FLOAT64_C( -611.84) }, { SIMDE_FLOAT64_C( 618.92), SIMDE_FLOAT64_C( -353.88) }, { SIMDE_FLOAT64_C( -499.12), SIMDE_FLOAT64_C( 618.92) } }, { { SIMDE_FLOAT64_C( 542.91), SIMDE_FLOAT64_C( 443.42) }, { SIMDE_FLOAT64_C( -774.93), SIMDE_FLOAT64_C( 219.66) }, { SIMDE_FLOAT64_C( 542.91), SIMDE_FLOAT64_C( -774.93) } }, { { SIMDE_FLOAT64_C( 43.30), SIMDE_FLOAT64_C( 437.28) }, { SIMDE_FLOAT64_C( 412.68), SIMDE_FLOAT64_C( -401.41) }, { SIMDE_FLOAT64_C( 43.30), SIMDE_FLOAT64_C( 412.68) } }, { { SIMDE_FLOAT64_C( 823.43), SIMDE_FLOAT64_C( 254.13) }, { SIMDE_FLOAT64_C( -356.45), SIMDE_FLOAT64_C( 862.90) }, { SIMDE_FLOAT64_C( 823.43), SIMDE_FLOAT64_C( -356.45) } }, { { SIMDE_FLOAT64_C( -30.73), SIMDE_FLOAT64_C( 631.32) }, { SIMDE_FLOAT64_C( 321.40), SIMDE_FLOAT64_C( 984.72) }, { SIMDE_FLOAT64_C( -30.73), SIMDE_FLOAT64_C( 321.40) } }, { { SIMDE_FLOAT64_C( 283.84), SIMDE_FLOAT64_C( -391.31) }, { SIMDE_FLOAT64_C( 957.98), SIMDE_FLOAT64_C( 515.26) }, { SIMDE_FLOAT64_C( 283.84), SIMDE_FLOAT64_C( 957.98) } }, { { SIMDE_FLOAT64_C( -327.25), SIMDE_FLOAT64_C( -666.99) }, { SIMDE_FLOAT64_C( -844.37), SIMDE_FLOAT64_C( -877.10) }, { SIMDE_FLOAT64_C( -327.25), SIMDE_FLOAT64_C( -844.37) } }, { { SIMDE_FLOAT64_C( 397.41), SIMDE_FLOAT64_C( -596.55) }, { SIMDE_FLOAT64_C( 732.55), SIMDE_FLOAT64_C( 898.29) }, { SIMDE_FLOAT64_C( 397.41), SIMDE_FLOAT64_C( 732.55) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_float64x2_t r = simde_vtrn1q_f64(a, b); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; } static int test_simde_vtrn1q_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int8_t b[16]; int8_t r[16]; } test_vec[] = { { { -INT8_C( 5), INT8_C( 91), -INT8_C( 64), -INT8_C( 51), -INT8_C( 31), -INT8_C( 105), -INT8_C( 45), INT8_C( 96), INT8_C( 5), -INT8_C( 120), INT8_C( 11), INT8_C( 81), -INT8_C( 40), INT8_C( 33), INT8_C( 126), -INT8_C( 48) }, { -INT8_C( 4), -INT8_C( 12), INT8_C( 64), -INT8_C( 88), INT8_C( 17), INT8_C( 67), -INT8_C( 83), INT8_C( 106), INT8_C( 71), -INT8_C( 114), INT8_C( 1), INT8_C( 60), INT8_C( 54), INT8_C( 81), INT8_C( 123), INT8_C( 49) }, { -INT8_C( 5), -INT8_C( 4), -INT8_C( 64), INT8_C( 64), -INT8_C( 31), INT8_C( 17), -INT8_C( 45), -INT8_C( 83), INT8_C( 5), INT8_C( 71), INT8_C( 11), INT8_C( 1), -INT8_C( 40), INT8_C( 54), INT8_C( 126), INT8_C( 123) } }, { { -INT8_C( 84), INT8_C( 59), -INT8_C( 2), -INT8_C( 115), -INT8_C( 46), -INT8_C( 47), -INT8_C( 19), -INT8_C( 40), INT8_C( 89), -INT8_C( 8), INT8_C( 41), INT8_C( 50), INT8_C( 25), -INT8_C( 89), INT8_C( 2), INT8_C( 21) }, { -INT8_C( 101), INT8_C( 67), -INT8_C( 67), -INT8_C( 83), -INT8_C( 122), INT8_C( 107), INT8_C( 23), -INT8_C( 50), -INT8_C( 7), INT8_C( 25), INT8_C( 10), INT8_C( 47), INT8_C( 106), -INT8_C( 123), INT8_C( 97), INT8_C( 22) }, { -INT8_C( 84), -INT8_C( 101), -INT8_C( 2), -INT8_C( 67), -INT8_C( 46), -INT8_C( 122), -INT8_C( 19), INT8_C( 23), INT8_C( 89), -INT8_C( 7), INT8_C( 41), INT8_C( 10), INT8_C( 25), INT8_C( 106), INT8_C( 2), INT8_C( 97) } }, { { -INT8_C( 64), INT8_C( 95), -INT8_C( 93), -INT8_C( 110), INT8_C( 49), -INT8_C( 111), INT8_C( 106), -INT8_C( 118), -INT8_C( 119), -INT8_C( 109), -INT8_C( 68), -INT8_C( 93), INT8_C( 59), -INT8_C( 65), -INT8_C( 72), -INT8_C( 42) }, { INT8_C( 2), INT8_C( 118), -INT8_C( 125), -INT8_C( 120), -INT8_C( 31), -INT8_C( 101), INT8_C( 86), -INT8_C( 38), -INT8_C( 76), INT8_C( 96), INT8_C( 9), INT8_C( 30), -INT8_C( 27), INT8_C( 106), INT8_C( 52), -INT8_C( 91) }, { -INT8_C( 64), INT8_C( 2), -INT8_C( 93), -INT8_C( 125), INT8_C( 49), -INT8_C( 31), INT8_C( 106), INT8_C( 86), -INT8_C( 119), -INT8_C( 76), -INT8_C( 68), INT8_C( 9), INT8_C( 59), -INT8_C( 27), -INT8_C( 72), INT8_C( 52) } }, { { -INT8_C( 54), -INT8_C( 41), INT8_C( 56), -INT8_C( 5), INT8_C( 104), -INT8_C( 94), -INT8_C( 123), -INT8_C( 14), INT8_C( 54), INT8_C( 66), -INT8_C( 107), INT8_C( 113), INT8_C( 1), INT8_C( 77), INT8_C( 71), INT8_C( 3) }, { -INT8_C( 61), -INT8_C( 53), -INT8_C( 117), -INT8_C( 92), INT8_C( 102), -INT8_C( 30), INT8_C( 126), INT8_C( 26), INT8_C( 66), -INT8_C( 120), INT8_C( 56), INT8_C( 40), -INT8_C( 14), INT8_C( 108), -INT8_C( 51), -INT8_C( 68) }, { -INT8_C( 54), -INT8_C( 61), INT8_C( 56), -INT8_C( 117), INT8_C( 104), INT8_C( 102), -INT8_C( 123), INT8_C( 126), INT8_C( 54), INT8_C( 66), -INT8_C( 107), INT8_C( 56), INT8_C( 1), -INT8_C( 14), INT8_C( 71), -INT8_C( 51) } }, { { INT8_C( 67), INT8_C( 5), -INT8_C( 73), -INT8_C( 84), -INT8_C( 88), INT8_C( 61), -INT8_C( 98), -INT8_C( 34), INT8_MAX, INT8_C( 51), INT8_C( 79), INT8_MIN, INT8_MIN, -INT8_C( 106), -INT8_C( 125), INT8_C( 68) }, { INT8_C( 97), INT8_C( 14), -INT8_C( 24), -INT8_C( 57), -INT8_C( 16), INT8_C( 103), -INT8_C( 31), INT8_C( 51), -INT8_C( 17), INT8_C( 25), INT8_C( 91), -INT8_C( 31), -INT8_C( 123), INT8_C( 40), -INT8_C( 98), -INT8_C( 55) }, { INT8_C( 67), INT8_C( 97), -INT8_C( 73), -INT8_C( 24), -INT8_C( 88), -INT8_C( 16), -INT8_C( 98), -INT8_C( 31), INT8_MAX, -INT8_C( 17), INT8_C( 79), INT8_C( 91), INT8_MIN, -INT8_C( 123), -INT8_C( 125), -INT8_C( 98) } }, { { INT8_C( 46), INT8_C( 85), INT8_C( 117), -INT8_C( 42), -INT8_C( 110), INT8_C( 19), -INT8_C( 76), INT8_C( 17), INT8_C( 70), INT8_C( 3), -INT8_C( 111), -INT8_C( 58), -INT8_C( 103), INT8_C( 20), INT8_C( 10), -INT8_C( 5) }, { INT8_C( 35), -INT8_C( 13), -INT8_C( 62), INT8_C( 19), INT8_C( 90), -INT8_C( 92), INT8_C( 70), INT8_C( 73), -INT8_C( 67), -INT8_C( 95), INT8_C( 42), INT8_C( 67), -INT8_C( 54), -INT8_C( 56), INT8_C( 12), -INT8_C( 8) }, { INT8_C( 46), INT8_C( 35), INT8_C( 117), -INT8_C( 62), -INT8_C( 110), INT8_C( 90), -INT8_C( 76), INT8_C( 70), INT8_C( 70), -INT8_C( 67), -INT8_C( 111), INT8_C( 42), -INT8_C( 103), -INT8_C( 54), INT8_C( 10), INT8_C( 12) } }, { { INT8_C( 30), -INT8_C( 127), -INT8_C( 50), -INT8_C( 80), -INT8_C( 108), -INT8_C( 126), -INT8_C( 62), -INT8_C( 38), -INT8_C( 123), INT8_C( 83), -INT8_C( 96), INT8_C( 30), INT8_C( 104), -INT8_C( 85), INT8_C( 25), -INT8_C( 117) }, { -INT8_C( 98), -INT8_C( 36), -INT8_C( 98), -INT8_C( 8), INT8_MIN, -INT8_C( 27), INT8_C( 65), INT8_C( 61), -INT8_C( 122), INT8_C( 107), INT8_MIN, INT8_C( 80), INT8_C( 52), -INT8_C( 116), INT8_C( 72), INT8_C( 82) }, { INT8_C( 30), -INT8_C( 98), -INT8_C( 50), -INT8_C( 98), -INT8_C( 108), INT8_MIN, -INT8_C( 62), INT8_C( 65), -INT8_C( 123), -INT8_C( 122), -INT8_C( 96), INT8_MIN, INT8_C( 104), INT8_C( 52), INT8_C( 25), INT8_C( 72) } }, { { INT8_C( 13), INT8_C( 22), INT8_C( 2), -INT8_C( 95), -INT8_C( 104), -INT8_C( 60), INT8_C( 123), INT8_C( 29), INT8_C( 24), INT8_C( 28), INT8_C( 60), INT8_MIN, -INT8_C( 57), INT8_C( 85), INT8_C( 11), INT8_C( 101) }, { INT8_C( 49), -INT8_C( 87), INT8_C( 93), -INT8_C( 79), -INT8_C( 114), -INT8_C( 98), -INT8_C( 17), INT8_C( 21), INT8_C( 9), INT8_C( 111), INT8_C( 101), INT8_C( 61), -INT8_C( 4), -INT8_C( 82), -INT8_C( 113), INT8_C( 9) }, { INT8_C( 13), INT8_C( 49), INT8_C( 2), INT8_C( 93), -INT8_C( 104), -INT8_C( 114), INT8_C( 123), -INT8_C( 17), INT8_C( 24), INT8_C( 9), INT8_C( 60), INT8_C( 101), -INT8_C( 57), -INT8_C( 4), INT8_C( 11), -INT8_C( 113) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r = simde_vtrn1q_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; } static int test_simde_vtrn1q_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { INT16_C( 14817), -INT16_C( 15912), INT16_C( 6827), -INT16_C( 16841), -INT16_C( 15885), INT16_C( 6412), INT16_C( 12676), INT16_C( 26747) }, { INT16_C( 31336), INT16_C( 23842), INT16_C( 32211), -INT16_C( 23641), INT16_C( 22041), -INT16_C( 28634), -INT16_C( 6631), -INT16_C( 1058) }, { INT16_C( 14817), INT16_C( 31336), INT16_C( 6827), INT16_C( 32211), -INT16_C( 15885), INT16_C( 22041), INT16_C( 12676), -INT16_C( 6631) } }, { { -INT16_C( 18913), -INT16_C( 13636), -INT16_C( 3119), -INT16_C( 15224), -INT16_C( 27468), INT16_C( 14558), INT16_C( 22981), INT16_C( 11680) }, { -INT16_C( 15405), -INT16_C( 22902), INT16_C( 12864), INT16_C( 22857), INT16_C( 28552), -INT16_C( 24086), -INT16_C( 14251), INT16_C( 29852) }, { -INT16_C( 18913), -INT16_C( 15405), -INT16_C( 3119), INT16_C( 12864), -INT16_C( 27468), INT16_C( 28552), INT16_C( 22981), -INT16_C( 14251) } }, { { INT16_C( 22655), INT16_C( 20543), -INT16_C( 14517), INT16_C( 20), -INT16_C( 3492), INT16_C( 8504), -INT16_C( 9908), INT16_C( 8015) }, { -INT16_C( 9828), -INT16_C( 9018), INT16_C( 3851), -INT16_C( 27850), INT16_C( 8319), -INT16_C( 11211), -INT16_C( 11800), INT16_C( 26441) }, { INT16_C( 22655), -INT16_C( 9828), -INT16_C( 14517), INT16_C( 3851), -INT16_C( 3492), INT16_C( 8319), -INT16_C( 9908), -INT16_C( 11800) } }, { { -INT16_C( 30678), INT16_C( 30135), -INT16_C( 13233), -INT16_C( 21643), -INT16_C( 20802), INT16_C( 2765), INT16_C( 7303), INT16_C( 9002) }, { -INT16_C( 3851), INT16_C( 511), INT16_C( 13823), INT16_C( 32404), -INT16_C( 13995), INT16_C( 15955), -INT16_C( 25445), -INT16_C( 14939) }, { -INT16_C( 30678), -INT16_C( 3851), -INT16_C( 13233), INT16_C( 13823), -INT16_C( 20802), -INT16_C( 13995), INT16_C( 7303), -INT16_C( 25445) } }, { { INT16_C( 23844), INT16_C( 29498), -INT16_C( 20439), -INT16_C( 6369), -INT16_C( 5026), -INT16_C( 6670), INT16_C( 7176), -INT16_C( 760) }, { INT16_C( 1804), INT16_C( 3070), -INT16_C( 27843), -INT16_C( 28022), -INT16_C( 8868), -INT16_C( 2096), INT16_C( 30329), -INT16_C( 25156) }, { INT16_C( 23844), INT16_C( 1804), -INT16_C( 20439), -INT16_C( 27843), -INT16_C( 5026), -INT16_C( 8868), INT16_C( 7176), INT16_C( 30329) } }, { { -INT16_C( 2093), -INT16_C( 1008), INT16_C( 12199), INT16_C( 1507), -INT16_C( 10981), INT16_C( 9194), -INT16_C( 3343), -INT16_C( 735) }, { INT16_C( 8185), INT16_C( 13833), -INT16_C( 27726), INT16_C( 4041), -INT16_C( 26256), -INT16_C( 5882), -INT16_C( 15601), -INT16_C( 7546) }, { -INT16_C( 2093), INT16_C( 8185), INT16_C( 12199), -INT16_C( 27726), -INT16_C( 10981), -INT16_C( 26256), -INT16_C( 3343), -INT16_C( 15601) } }, { { -INT16_C( 26950), INT16_C( 25054), -INT16_C( 15674), -INT16_C( 7834), INT16_C( 20631), -INT16_C( 30459), INT16_C( 9794), INT16_C( 15238) }, { -INT16_C( 28859), -INT16_C( 1934), INT16_C( 15138), -INT16_C( 28153), INT16_C( 3540), -INT16_C( 7045), INT16_C( 464), -INT16_C( 30010) }, { -INT16_C( 26950), -INT16_C( 28859), -INT16_C( 15674), INT16_C( 15138), INT16_C( 20631), INT16_C( 3540), INT16_C( 9794), INT16_C( 464) } }, { { -INT16_C( 23144), INT16_C( 24299), INT16_C( 20839), -INT16_C( 449), INT16_C( 17569), -INT16_C( 7289), INT16_C( 3690), -INT16_C( 20449) }, { -INT16_C( 28259), -INT16_C( 16216), -INT16_C( 20532), -INT16_C( 24494), -INT16_C( 12612), -INT16_C( 29308), INT16_C( 19407), INT16_C( 26391) }, { -INT16_C( 23144), -INT16_C( 28259), INT16_C( 20839), -INT16_C( 20532), INT16_C( 17569), -INT16_C( 12612), INT16_C( 3690), INT16_C( 19407) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_vtrn1q_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; } static int test_simde_vtrn1q_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { -INT32_C( 1341418916), INT32_C( 777003733), -INT32_C( 1026009579), INT32_C( 1018759377) }, { -INT32_C( 587710840), -INT32_C( 1130803257), INT32_C( 296467978), INT32_C( 730158286) }, { -INT32_C( 1341418916), -INT32_C( 587710840), -INT32_C( 1026009579), INT32_C( 296467978) } }, { { -INT32_C( 1126461209), -INT32_C( 924111949), INT32_C( 1401668482), INT32_C( 1418675403) }, { INT32_C( 1227917186), -INT32_C( 536426026), INT32_C( 1441902983), -INT32_C( 310348026) }, { -INT32_C( 1126461209), INT32_C( 1227917186), INT32_C( 1401668482), INT32_C( 1441902983) } }, { { -INT32_C( 1163306233), INT32_C( 159618183), INT32_C( 593235544), -INT32_C( 730338478) }, { INT32_C( 1209968498), -INT32_C( 148364176), -INT32_C( 615704107), -INT32_C( 1731670640) }, { -INT32_C( 1163306233), INT32_C( 1209968498), INT32_C( 593235544), -INT32_C( 615704107) } }, { { -INT32_C( 1336774103), INT32_C( 1589236998), INT32_C( 897717731), INT32_C( 1913321728) }, { INT32_C( 297412769), INT32_C( 554295884), -INT32_C( 1929554181), INT32_C( 1277478179) }, { -INT32_C( 1336774103), INT32_C( 297412769), INT32_C( 897717731), -INT32_C( 1929554181) } }, { { INT32_C( 1056732727), INT32_C( 798799180), -INT32_C( 899342646), -INT32_C( 1187221736) }, { -INT32_C( 473237865), -INT32_C( 737881128), INT32_C( 1298137385), -INT32_C( 23493433) }, { INT32_C( 1056732727), -INT32_C( 473237865), -INT32_C( 899342646), INT32_C( 1298137385) } }, { { INT32_C( 1178375930), INT32_C( 376887627), INT32_C( 266394615), -INT32_C( 506978998) }, { -INT32_C( 322661613), -INT32_C( 1866413978), -INT32_C( 1847779126), -INT32_C( 1634765148) }, { INT32_C( 1178375930), -INT32_C( 322661613), INT32_C( 266394615), -INT32_C( 1847779126) } }, { { INT32_C( 1491454988), -INT32_C( 1670489179), -INT32_C( 2136191434), INT32_C( 2137093227) }, { INT32_C( 1852515591), -INT32_C( 1208079379), -INT32_C( 280437941), INT32_C( 1586354257) }, { INT32_C( 1491454988), INT32_C( 1852515591), -INT32_C( 2136191434), -INT32_C( 280437941) } }, { { INT32_C( 1236693668), INT32_C( 65348813), -INT32_C( 561802894), INT32_C( 224257030) }, { -INT32_C( 142882807), INT32_C( 1051621875), -INT32_C( 1506937004), INT32_C( 1929689807) }, { INT32_C( 1236693668), -INT32_C( 142882807), -INT32_C( 561802894), -INT32_C( 1506937004) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_vtrn1q_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; } static int test_simde_vtrn1q_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; int64_t b[2]; int64_t r[2]; } test_vec[] = { { { INT64_C( 2370942768217045703), INT64_C( 3834060604335488599) }, { INT64_C( 5962793575161641133), INT64_C( 2451362664542500656) }, { INT64_C( 2370942768217045703), INT64_C( 5962793575161641133) } }, { { -INT64_C( 91528125595492425), INT64_C( 8454808771016685138) }, { INT64_C( 1997428809620116027), INT64_C( 2503757759122402625) }, { -INT64_C( 91528125595492425), INT64_C( 1997428809620116027) } }, { { -INT64_C( 3375599107157742718), INT64_C( 3758894877307401709) }, { INT64_C( 4548726874596619289), -INT64_C( 2068388762230178113) }, { -INT64_C( 3375599107157742718), INT64_C( 4548726874596619289) } }, { { INT64_C( 7752440428129683270), -INT64_C( 1741121954605135960) }, { -INT64_C( 9134413012119359234), -INT64_C( 464162460032952246) }, { INT64_C( 7752440428129683270), -INT64_C( 9134413012119359234) } }, { { INT64_C( 8495572088113830653), INT64_C( 5733906210614609629) }, { -INT64_C( 3197405612916048547), INT64_C( 4488976398999767712) }, { INT64_C( 8495572088113830653), -INT64_C( 3197405612916048547) } }, { { INT64_C( 4533049365362183078), INT64_C( 5280325345817001894) }, { -INT64_C( 4948321176345078792), -INT64_C( 8876213478574075309) }, { INT64_C( 4533049365362183078), -INT64_C( 4948321176345078792) } }, { { -INT64_C( 3385053066255955098), INT64_C( 6995515642783330015) }, { -INT64_C( 2776126643429474653), INT64_C( 5146839089142400778) }, { -INT64_C( 3385053066255955098), -INT64_C( 2776126643429474653) } }, { { INT64_C( 2649826617627403465), INT64_C( 4813228284004306689) }, { -INT64_C( 1088892655976271997), INT64_C( 6352686333166178304) }, { INT64_C( 2649826617627403465), -INT64_C( 1088892655976271997) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_int64x2_t r = simde_vtrn1q_s64(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; } static int test_simde_vtrn1q_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C( 32), UINT8_C(105), UINT8_C( 60), UINT8_C( 17), UINT8_C(121), UINT8_C(145), UINT8_C( 45), UINT8_C(135), UINT8_C(225), UINT8_C( 38), UINT8_C(228), UINT8_C( 88), UINT8_C(200), UINT8_C(213), UINT8_C(147), UINT8_C(151) }, { UINT8_C(177), UINT8_C(122), UINT8_C(147), UINT8_C( 63), UINT8_C(245), UINT8_C(244), UINT8_C(140), UINT8_C(165), UINT8_C(180), UINT8_C( 76), UINT8_C( 77), UINT8_C(249), UINT8_C( 12), UINT8_C( 91), UINT8_C( 53), UINT8_C( 45) }, { UINT8_C( 32), UINT8_C(177), UINT8_C( 60), UINT8_C(147), UINT8_C(121), UINT8_C(245), UINT8_C( 45), UINT8_C(140), UINT8_C(225), UINT8_C(180), UINT8_C(228), UINT8_C( 77), UINT8_C(200), UINT8_C( 12), UINT8_C(147), UINT8_C( 53) } }, { { UINT8_C(196), UINT8_C(113), UINT8_C( 62), UINT8_C( 61), UINT8_C( 2), UINT8_C(108), UINT8_C(196), UINT8_C(228), UINT8_C(146), UINT8_C(168), UINT8_C( 60), UINT8_C( 90), UINT8_C(126), UINT8_C(207), UINT8_C(241), UINT8_C( 47) }, { UINT8_C( 73), UINT8_C(133), UINT8_C(110), UINT8_C( 62), UINT8_C(121), UINT8_C(251), UINT8_C(227), UINT8_C( 45), UINT8_C( 71), UINT8_C( 49), UINT8_C( 38), UINT8_C( 83), UINT8_C(140), UINT8_C( 91), UINT8_C(128), UINT8_C( 80) }, { UINT8_C(196), UINT8_C( 73), UINT8_C( 62), UINT8_C(110), UINT8_C( 2), UINT8_C(121), UINT8_C(196), UINT8_C(227), UINT8_C(146), UINT8_C( 71), UINT8_C( 60), UINT8_C( 38), UINT8_C(126), UINT8_C(140), UINT8_C(241), UINT8_C(128) } }, { { UINT8_C(204), UINT8_C(191), UINT8_C(141), UINT8_C(206), UINT8_C( 43), UINT8_C( 81), UINT8_C(178), UINT8_C(189), UINT8_C(250), UINT8_C(238), UINT8_C( 23), UINT8_C(120), UINT8_C(189), UINT8_C( 9), UINT8_C(167), UINT8_C( 7) }, { UINT8_C(142), UINT8_C( 21), UINT8_C( 69), UINT8_C( 7), UINT8_C( 16), UINT8_C( 41), UINT8_C( 52), UINT8_C( 87), UINT8_C( 90), UINT8_C( 90), UINT8_C(171), UINT8_C(230), UINT8_C(181), UINT8_C( 43), UINT8_C( 54), UINT8_C(129) }, { UINT8_C(204), UINT8_C(142), UINT8_C(141), UINT8_C( 69), UINT8_C( 43), UINT8_C( 16), UINT8_C(178), UINT8_C( 52), UINT8_C(250), UINT8_C( 90), UINT8_C( 23), UINT8_C(171), UINT8_C(189), UINT8_C(181), UINT8_C(167), UINT8_C( 54) } }, { { UINT8_C(234), UINT8_C(195), UINT8_C( 79), UINT8_C( 21), UINT8_C( 20), UINT8_C( 2), UINT8_C(210), UINT8_C( 14), UINT8_C(240), UINT8_C(234), UINT8_C(134), UINT8_C(174), UINT8_C(243), UINT8_C( 45), UINT8_C(181), UINT8_C(129) }, { UINT8_C( 67), UINT8_C(250), UINT8_C(136), UINT8_C( 83), UINT8_C( 35), UINT8_C(188), UINT8_C(171), UINT8_C(125), UINT8_C( 22), UINT8_C( 86), UINT8_C( 99), UINT8_C(203), UINT8_C(129), UINT8_C(153), UINT8_C( 76), UINT8_C(108) }, { UINT8_C(234), UINT8_C( 67), UINT8_C( 79), UINT8_C(136), UINT8_C( 20), UINT8_C( 35), UINT8_C(210), UINT8_C(171), UINT8_C(240), UINT8_C( 22), UINT8_C(134), UINT8_C( 99), UINT8_C(243), UINT8_C(129), UINT8_C(181), UINT8_C( 76) } }, { { UINT8_C( 92), UINT8_C(155), UINT8_C(129), UINT8_C(113), UINT8_C(157), UINT8_C( 84), UINT8_C(127), UINT8_C(142), UINT8_C( 62), UINT8_C( 6), UINT8_C( 60), UINT8_C( 49), UINT8_C( 51), UINT8_C(241), UINT8_C(178), UINT8_C(118) }, { UINT8_C(235), UINT8_C( 58), UINT8_C(202), UINT8_C( 15), UINT8_C(246), UINT8_C(117), UINT8_C(140), UINT8_C( 12), UINT8_C(203), UINT8_C(240), UINT8_C(215), UINT8_C( 76), UINT8_C(137), UINT8_C( 35), UINT8_C(184), UINT8_C(230) }, { UINT8_C( 92), UINT8_C(235), UINT8_C(129), UINT8_C(202), UINT8_C(157), UINT8_C(246), UINT8_C(127), UINT8_C(140), UINT8_C( 62), UINT8_C(203), UINT8_C( 60), UINT8_C(215), UINT8_C( 51), UINT8_C(137), UINT8_C(178), UINT8_C(184) } }, { { UINT8_C(190), UINT8_C( 58), UINT8_C( 87), UINT8_C( 92), UINT8_C(142), UINT8_C(214), UINT8_C(234), UINT8_C(204), UINT8_C(220), UINT8_C( 38), UINT8_C(253), UINT8_C( 16), UINT8_C( 23), UINT8_C(175), UINT8_C(134), UINT8_C( 2) }, { UINT8_C(233), UINT8_C( 80), UINT8_C( 17), UINT8_C(223), UINT8_C(197), UINT8_C(158), UINT8_C(235), UINT8_C(144), UINT8_C(142), UINT8_C(194), UINT8_C(221), UINT8_C( 23), UINT8_C(229), UINT8_C(149), UINT8_C(253), UINT8_C(163) }, { UINT8_C(190), UINT8_C(233), UINT8_C( 87), UINT8_C( 17), UINT8_C(142), UINT8_C(197), UINT8_C(234), UINT8_C(235), UINT8_C(220), UINT8_C(142), UINT8_C(253), UINT8_C(221), UINT8_C( 23), UINT8_C(229), UINT8_C(134), UINT8_C(253) } }, { { UINT8_C(207), UINT8_C( 84), UINT8_MAX, UINT8_C( 93), UINT8_C( 43), UINT8_C(233), UINT8_C( 41), UINT8_C( 7), UINT8_C( 15), UINT8_C( 38), UINT8_C( 23), UINT8_C( 38), UINT8_C(213), UINT8_C(158), UINT8_C( 41), UINT8_C(190) }, { UINT8_C(238), UINT8_C( 58), UINT8_C(157), UINT8_C(180), UINT8_C(216), UINT8_C(136), UINT8_C( 68), UINT8_C(102), UINT8_C( 74), UINT8_C( 33), UINT8_C(126), UINT8_C( 47), UINT8_C(183), UINT8_C(123), UINT8_C(211), UINT8_C(134) }, { UINT8_C(207), UINT8_C(238), UINT8_MAX, UINT8_C(157), UINT8_C( 43), UINT8_C(216), UINT8_C( 41), UINT8_C( 68), UINT8_C( 15), UINT8_C( 74), UINT8_C( 23), UINT8_C(126), UINT8_C(213), UINT8_C(183), UINT8_C( 41), UINT8_C(211) } }, { { UINT8_C(208), UINT8_C(210), UINT8_C(228), UINT8_C(251), UINT8_C(188), UINT8_C( 13), UINT8_C( 2), UINT8_C(203), UINT8_C( 52), UINT8_C( 26), UINT8_C(242), UINT8_C( 9), UINT8_C(184), UINT8_C( 27), UINT8_C(200), UINT8_C(166) }, { UINT8_C( 85), UINT8_C(101), UINT8_C( 90), UINT8_C( 46), UINT8_C(238), UINT8_C(159), UINT8_C(148), UINT8_C( 56), UINT8_C(192), UINT8_C( 18), UINT8_C(104), UINT8_C(119), UINT8_C(142), UINT8_C( 59), UINT8_C(254), UINT8_C( 94) }, { UINT8_C(208), UINT8_C( 85), UINT8_C(228), UINT8_C( 90), UINT8_C(188), UINT8_C(238), UINT8_C( 2), UINT8_C(148), UINT8_C( 52), UINT8_C(192), UINT8_C(242), UINT8_C(104), UINT8_C(184), UINT8_C(142), UINT8_C(200), UINT8_C(254) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vtrn1q_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; } static int test_simde_vtrn1q_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(14167), UINT16_C(21279), UINT16_C(48551), UINT16_C(14943), UINT16_C(56959), UINT16_C( 3699), UINT16_C(63440), UINT16_C(55030) }, { UINT16_C(40429), UINT16_C(42656), UINT16_C(10703), UINT16_C(50757), UINT16_C(39485), UINT16_C(24810), UINT16_C(45053), UINT16_C(22004) }, { UINT16_C(14167), UINT16_C(40429), UINT16_C(48551), UINT16_C(10703), UINT16_C(56959), UINT16_C(39485), UINT16_C(63440), UINT16_C(45053) } }, { { UINT16_C( 5094), UINT16_C(36264), UINT16_C( 2001), UINT16_C(20680), UINT16_C(15333), UINT16_C(46430), UINT16_C(21555), UINT16_C( 8331) }, { UINT16_C(11505), UINT16_C(49351), UINT16_C( 3157), UINT16_C(37510), UINT16_C(28839), UINT16_C(42226), UINT16_C(59167), UINT16_C( 1785) }, { UINT16_C( 5094), UINT16_C(11505), UINT16_C( 2001), UINT16_C( 3157), UINT16_C(15333), UINT16_C(28839), UINT16_C(21555), UINT16_C(59167) } }, { { UINT16_C(41466), UINT16_C(52115), UINT16_C(23464), UINT16_C(36124), UINT16_C(31383), UINT16_C(51778), UINT16_C(52943), UINT16_C(49386) }, { UINT16_C(45562), UINT16_C(20353), UINT16_C( 1982), UINT16_C(26082), UINT16_C(54392), UINT16_C(38665), UINT16_C( 955), UINT16_C(46749) }, { UINT16_C(41466), UINT16_C(45562), UINT16_C(23464), UINT16_C( 1982), UINT16_C(31383), UINT16_C(54392), UINT16_C(52943), UINT16_C( 955) } }, { { UINT16_C(12708), UINT16_C(19841), UINT16_C(40332), UINT16_C( 9178), UINT16_C( 7448), UINT16_C(59373), UINT16_C(55531), UINT16_C(58791) }, { UINT16_C(10377), UINT16_C(18228), UINT16_C( 5680), UINT16_C(43180), UINT16_C(46827), UINT16_C(42559), UINT16_C(56761), UINT16_C(23900) }, { UINT16_C(12708), UINT16_C(10377), UINT16_C(40332), UINT16_C( 5680), UINT16_C( 7448), UINT16_C(46827), UINT16_C(55531), UINT16_C(56761) } }, { { UINT16_C(56846), UINT16_C(39594), UINT16_C(34171), UINT16_C(37822), UINT16_C(43938), UINT16_C(36218), UINT16_C( 8835), UINT16_C( 3442) }, { UINT16_C(42570), UINT16_C(31316), UINT16_C( 445), UINT16_C(43042), UINT16_C(25271), UINT16_C(28750), UINT16_C(43839), UINT16_C(19917) }, { UINT16_C(56846), UINT16_C(42570), UINT16_C(34171), UINT16_C( 445), UINT16_C(43938), UINT16_C(25271), UINT16_C( 8835), UINT16_C(43839) } }, { { UINT16_C(30857), UINT16_C( 1255), UINT16_C(42493), UINT16_C(40856), UINT16_C( 4689), UINT16_C(54316), UINT16_C(40500), UINT16_C(32737) }, { UINT16_C(13892), UINT16_C( 505), UINT16_C( 7223), UINT16_C(61097), UINT16_C(63614), UINT16_C(48478), UINT16_C(11171), UINT16_C(11274) }, { UINT16_C(30857), UINT16_C(13892), UINT16_C(42493), UINT16_C( 7223), UINT16_C( 4689), UINT16_C(63614), UINT16_C(40500), UINT16_C(11171) } }, { { UINT16_C(61859), UINT16_C(41008), UINT16_C(51351), UINT16_C(59455), UINT16_C(27611), UINT16_C( 4028), UINT16_C(40457), UINT16_C(20110) }, { UINT16_C(35028), UINT16_C( 2895), UINT16_C(63908), UINT16_C( 8953), UINT16_C(22513), UINT16_C(38111), UINT16_C(59778), UINT16_C( 9920) }, { UINT16_C(61859), UINT16_C(35028), UINT16_C(51351), UINT16_C(63908), UINT16_C(27611), UINT16_C(22513), UINT16_C(40457), UINT16_C(59778) } }, { { UINT16_C(61658), UINT16_C(29126), UINT16_C( 1721), UINT16_C(37977), UINT16_C( 5745), UINT16_C(31651), UINT16_C(12980), UINT16_C(35017) }, { UINT16_C( 6330), UINT16_C(24211), UINT16_C(35857), UINT16_C( 640), UINT16_C(24547), UINT16_C(26006), UINT16_C(22088), UINT16_C( 8843) }, { UINT16_C(61658), UINT16_C( 6330), UINT16_C( 1721), UINT16_C(35857), UINT16_C( 5745), UINT16_C(24547), UINT16_C(12980), UINT16_C(22088) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vtrn1q_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_vtrn1q_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C( 497895404), UINT32_C(1842434419), UINT32_C(2057525496), UINT32_C(2082404662) }, { UINT32_C( 744362374), UINT32_C(1722726623), UINT32_C(2688313178), UINT32_C(3645033197) }, { UINT32_C( 497895404), UINT32_C( 744362374), UINT32_C(2057525496), UINT32_C(2688313178) } }, { { UINT32_C(2230775569), UINT32_C( 955369280), UINT32_C(1454544160), UINT32_C( 366203278) }, { UINT32_C(3242275298), UINT32_C(1143468009), UINT32_C(1155818583), UINT32_C(1058874926) }, { UINT32_C(2230775569), UINT32_C(3242275298), UINT32_C(1454544160), UINT32_C(1155818583) } }, { { UINT32_C(1438848021), UINT32_C(4220433883), UINT32_C(3629269066), UINT32_C(4092405009) }, { UINT32_C(1068772950), UINT32_C(1971575838), UINT32_C(1857644608), UINT32_C(2762856078) }, { UINT32_C(1438848021), UINT32_C(1068772950), UINT32_C(3629269066), UINT32_C(1857644608) } }, { { UINT32_C(3338236138), UINT32_C(1874954021), UINT32_C(3645379528), UINT32_C(2395813176) }, { UINT32_C(2194571620), UINT32_C(2650231133), UINT32_C(1208725689), UINT32_C(1911339142) }, { UINT32_C(3338236138), UINT32_C(2194571620), UINT32_C(3645379528), UINT32_C(1208725689) } }, { { UINT32_C(1312285993), UINT32_C( 901707885), UINT32_C(1141769740), UINT32_C(2681461563) }, { UINT32_C(3122766173), UINT32_C(2891454706), UINT32_C(1341416392), UINT32_C(1170268188) }, { UINT32_C(1312285993), UINT32_C(3122766173), UINT32_C(1141769740), UINT32_C(1341416392) } }, { { UINT32_C( 848558021), UINT32_C(4217852399), UINT32_C(2470475351), UINT32_C(2922517329) }, { UINT32_C(2791920820), UINT32_C( 894615916), UINT32_C(1082410532), UINT32_C(3968156710) }, { UINT32_C( 848558021), UINT32_C(2791920820), UINT32_C(2470475351), UINT32_C(1082410532) } }, { { UINT32_C( 706615611), UINT32_C(3257304682), UINT32_C(1297442556), UINT32_C( 771524473) }, { UINT32_C(1221813723), UINT32_C(1249715750), UINT32_C(2475360620), UINT32_C(2155810885) }, { UINT32_C( 706615611), UINT32_C(1221813723), UINT32_C(1297442556), UINT32_C(2475360620) } }, { { UINT32_C(2494209321), UINT32_C( 525783075), UINT32_C(2943200054), UINT32_C( 249325874) }, { UINT32_C(4099322062), UINT32_C(1111413718), UINT32_C( 433441236), UINT32_C( 60380378) }, { UINT32_C(2494209321), UINT32_C(4099322062), UINT32_C(2943200054), UINT32_C( 433441236) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vtrn1q_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_vtrn1q_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; uint64_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C( 4815427970974768462), UINT64_C( 6946629186605919678) }, { UINT64_C(17577554379567712573), UINT64_C( 9507926976227586244) }, { UINT64_C( 4815427970974768462), UINT64_C(17577554379567712573) } }, { { UINT64_C(17783107680982229326), UINT64_C( 7694430944701500407) }, { UINT64_C(14946435657532704485), UINT64_C( 1392019949421056920) }, { UINT64_C(17783107680982229326), UINT64_C(14946435657532704485) } }, { { UINT64_C( 720685969029384403), UINT64_C(13170277731653498159) }, { UINT64_C( 4546836441613680191), UINT64_C(15944534928651032686) }, { UINT64_C( 720685969029384403), UINT64_C( 4546836441613680191) } }, { { UINT64_C( 8377513640867717158), UINT64_C( 3252891876890621085) }, { UINT64_C( 6754432899425457110), UINT64_C( 5553173804405016872) }, { UINT64_C( 8377513640867717158), UINT64_C( 6754432899425457110) } }, { { UINT64_C( 9111002919100414135), UINT64_C(12652601303620206972) }, { UINT64_C(14317480457102167653), UINT64_C(14926910591795282610) }, { UINT64_C( 9111002919100414135), UINT64_C(14317480457102167653) } }, { { UINT64_C(17465588781926821122), UINT64_C(14197020995446656893) }, { UINT64_C(14728401471583891713), UINT64_C( 7779348683327627021) }, { UINT64_C(17465588781926821122), UINT64_C(14728401471583891713) } }, { { UINT64_C( 8854554042291961977), UINT64_C( 3541753660244507904) }, { UINT64_C( 1724092549986008839), UINT64_C(15666108166636934036) }, { UINT64_C( 8854554042291961977), UINT64_C( 1724092549986008839) } }, { { UINT64_C( 7595788518996621554), UINT64_C( 1677572234304633318) }, { UINT64_C(16235800240466695983), UINT64_C(12894166098514972345) }, { UINT64_C( 7595788518996621554), UINT64_C(16235800240466695983) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_uint64x2_t r = simde_vtrn1q_u64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vtrn1_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn1_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn1_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn1_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn1_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn1_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn1_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn1q_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn1q_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn1q_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn1q_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn1q_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn1q_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn1q_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn1q_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn1q_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn1q_u64) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/arm/neon/trn2.c000066400000000000000000001547001400333146700163570ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN trn2 #include #include static int test_simde_vtrn2_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[2]; simde_float32 b[2]; simde_float32 r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( 83.42), SIMDE_FLOAT32_C( 803.99) }, { SIMDE_FLOAT32_C( 636.36), SIMDE_FLOAT32_C( 295.59) }, { SIMDE_FLOAT32_C( 803.99), SIMDE_FLOAT32_C( 295.59) } }, { { SIMDE_FLOAT32_C( 877.14), SIMDE_FLOAT32_C( -285.61) }, { SIMDE_FLOAT32_C( 689.26), SIMDE_FLOAT32_C( 398.34) }, { SIMDE_FLOAT32_C( -285.61), SIMDE_FLOAT32_C( 398.34) } }, { { SIMDE_FLOAT32_C( -478.64), SIMDE_FLOAT32_C( -23.77) }, { SIMDE_FLOAT32_C( 747.86), SIMDE_FLOAT32_C( 759.56) }, { SIMDE_FLOAT32_C( -23.77), SIMDE_FLOAT32_C( 759.56) } }, { { SIMDE_FLOAT32_C( -573.98), SIMDE_FLOAT32_C( 874.62) }, { SIMDE_FLOAT32_C( 584.77), SIMDE_FLOAT32_C( 826.51) }, { SIMDE_FLOAT32_C( 874.62), SIMDE_FLOAT32_C( 826.51) } }, { { SIMDE_FLOAT32_C( -73.10), SIMDE_FLOAT32_C( -203.77) }, { SIMDE_FLOAT32_C( 333.96), SIMDE_FLOAT32_C( -54.11) }, { SIMDE_FLOAT32_C( -203.77), SIMDE_FLOAT32_C( -54.11) } }, { { SIMDE_FLOAT32_C( 552.91), SIMDE_FLOAT32_C( -34.42) }, { SIMDE_FLOAT32_C( -526.96), SIMDE_FLOAT32_C( -200.73) }, { SIMDE_FLOAT32_C( -34.42), SIMDE_FLOAT32_C( -200.73) } }, { { SIMDE_FLOAT32_C( 276.54), SIMDE_FLOAT32_C( -171.29) }, { SIMDE_FLOAT32_C( -906.91), SIMDE_FLOAT32_C( -286.58) }, { SIMDE_FLOAT32_C( -171.29), SIMDE_FLOAT32_C( -286.58) } }, { { SIMDE_FLOAT32_C( 665.38), SIMDE_FLOAT32_C( 440.56) }, { SIMDE_FLOAT32_C( 826.61), SIMDE_FLOAT32_C( -251.20) }, { SIMDE_FLOAT32_C( 440.56), SIMDE_FLOAT32_C( -251.20) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x2_t r = simde_vtrn2_f32(a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vtrn2_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int8_t b[8]; int8_t r[8]; } test_vec[] = { { { -INT8_C( 106), INT8_C( 93), INT8_C( 108), INT8_C( 69), INT8_C( 23), INT8_C( 43), INT8_C( 82), -INT8_C( 47) }, { INT8_C( 44), INT8_C( 36), -INT8_C( 125), INT8_C( 77), INT8_C( 46), -INT8_C( 121), INT8_C( 2), INT8_C( 4) }, { INT8_C( 93), INT8_C( 36), INT8_C( 69), INT8_C( 77), INT8_C( 43), -INT8_C( 121), -INT8_C( 47), INT8_C( 4) } }, { { INT8_C( 71), INT8_C( 37), -INT8_C( 29), INT8_C( 29), -INT8_C( 88), INT8_C( 1), INT8_C( 106), -INT8_C( 41) }, { INT8_C( 123), -INT8_C( 54), -INT8_C( 57), INT8_C( 117), -INT8_C( 92), INT8_MIN, -INT8_C( 68), INT8_C( 58) }, { INT8_C( 37), -INT8_C( 54), INT8_C( 29), INT8_C( 117), INT8_C( 1), INT8_MIN, -INT8_C( 41), INT8_C( 58) } }, { { -INT8_C( 35), INT8_C( 40), INT8_MIN, -INT8_C( 11), INT8_C( 83), -INT8_C( 46), -INT8_C( 58), INT8_MAX }, { -INT8_C( 9), INT8_C( 73), -INT8_C( 52), INT8_C( 37), -INT8_C( 48), -INT8_C( 49), INT8_C( 41), INT8_C( 24) }, { INT8_C( 40), INT8_C( 73), -INT8_C( 11), INT8_C( 37), -INT8_C( 46), -INT8_C( 49), INT8_MAX, INT8_C( 24) } }, { { -INT8_C( 12), INT8_C( 13), INT8_C( 53), -INT8_C( 100), INT8_C( 14), -INT8_C( 96), INT8_C( 115), -INT8_C( 118) }, { INT8_C( 106), INT8_C( 59), -INT8_C( 1), INT8_C( 14), -INT8_C( 69), -INT8_C( 69), INT8_C( 72), -INT8_C( 104) }, { INT8_C( 13), INT8_C( 59), -INT8_C( 100), INT8_C( 14), -INT8_C( 96), -INT8_C( 69), -INT8_C( 118), -INT8_C( 104) } }, { { -INT8_C( 29), -INT8_C( 56), -INT8_C( 115), INT8_C( 55), -INT8_C( 101), INT8_C( 84), -INT8_C( 74), -INT8_C( 110) }, { -INT8_C( 99), -INT8_C( 125), -INT8_C( 73), INT8_C( 110), INT8_C( 82), -INT8_C( 31), -INT8_C( 122), INT8_C( 70) }, { -INT8_C( 56), -INT8_C( 125), INT8_C( 55), INT8_C( 110), INT8_C( 84), -INT8_C( 31), -INT8_C( 110), INT8_C( 70) } }, { { -INT8_C( 18), -INT8_C( 69), -INT8_C( 30), -INT8_C( 4), INT8_C( 91), INT8_C( 85), -INT8_C( 122), -INT8_C( 59) }, { -INT8_C( 112), -INT8_C( 122), -INT8_C( 45), INT8_C( 75), INT8_C( 65), INT8_C( 28), -INT8_C( 28), INT8_C( 37) }, { -INT8_C( 69), -INT8_C( 122), -INT8_C( 4), INT8_C( 75), INT8_C( 85), INT8_C( 28), -INT8_C( 59), INT8_C( 37) } }, { { -INT8_C( 28), INT8_C( 113), INT8_C( 92), INT8_MAX, -INT8_C( 59), INT8_C( 18), INT8_C( 17), INT8_C( 99) }, { -INT8_C( 107), -INT8_C( 55), -INT8_C( 47), -INT8_C( 25), -INT8_C( 86), INT8_C( 87), INT8_C( 45), -INT8_C( 104) }, { INT8_C( 113), -INT8_C( 55), INT8_MAX, -INT8_C( 25), INT8_C( 18), INT8_C( 87), INT8_C( 99), -INT8_C( 104) } }, { { INT8_C( 18), INT8_C( 15), -INT8_C( 108), INT8_C( 110), INT8_C( 101), INT8_C( 27), INT8_C( 51), -INT8_C( 11) }, { -INT8_C( 95), INT8_C( 7), INT8_C( 65), -INT8_C( 30), INT8_C( 35), INT8_C( 37), INT8_C( 7), INT8_C( 7) }, { INT8_C( 15), INT8_C( 7), INT8_C( 110), -INT8_C( 30), INT8_C( 27), INT8_C( 37), -INT8_C( 11), INT8_C( 7) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vtrn2_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; } static int test_simde_vtrn2_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { { { -INT16_C( 21140), -INT16_C( 8901), -INT16_C( 7583), -INT16_C( 12924) }, { INT16_C( 7835), INT16_C( 4605), INT16_C( 31307), INT16_C( 32045) }, { -INT16_C( 8901), INT16_C( 4605), -INT16_C( 12924), INT16_C( 32045) } }, { { -INT16_C( 20882), INT16_C( 23307), -INT16_C( 2013), INT16_C( 19039) }, { -INT16_C( 26641), -INT16_C( 29962), INT16_C( 15491), -INT16_C( 4182) }, { INT16_C( 23307), -INT16_C( 29962), INT16_C( 19039), -INT16_C( 4182) } }, { { -INT16_C( 6679), INT16_C( 19149), INT16_C( 20935), INT16_C( 25111) }, { INT16_C( 5231), -INT16_C( 17804), -INT16_C( 24178), -INT16_C( 968) }, { INT16_C( 19149), -INT16_C( 17804), INT16_C( 25111), -INT16_C( 968) } }, { { INT16_C( 17232), INT16_C( 29527), -INT16_C( 18884), INT16_C( 11197) }, { -INT16_C( 19635), -INT16_C( 12107), INT16_C( 24559), -INT16_C( 10048) }, { INT16_C( 29527), -INT16_C( 12107), INT16_C( 11197), -INT16_C( 10048) } }, { { -INT16_C( 29372), INT16_C( 3106), INT16_C( 14814), INT16_C( 19822) }, { -INT16_C( 7603), -INT16_C( 9465), INT16_C( 16260), -INT16_C( 11049) }, { INT16_C( 3106), -INT16_C( 9465), INT16_C( 19822), -INT16_C( 11049) } }, { { INT16_C( 11907), -INT16_C( 16569), INT16_C( 1252), INT16_C( 13034) }, { -INT16_C( 24649), -INT16_C( 22782), -INT16_C( 15617), INT16_C( 17279) }, { -INT16_C( 16569), -INT16_C( 22782), INT16_C( 13034), INT16_C( 17279) } }, { { -INT16_C( 23985), INT16_C( 11599), -INT16_C( 16677), INT16_C( 10618) }, { -INT16_C( 32096), INT16_C( 9220), -INT16_C( 9023), INT16_C( 17656) }, { INT16_C( 11599), INT16_C( 9220), INT16_C( 10618), INT16_C( 17656) } }, { { INT16_C( 16138), -INT16_C( 4349), -INT16_C( 4797), -INT16_C( 1247) }, { INT16_C( 9101), -INT16_C( 29534), INT16_C( 8678), INT16_C( 13775) }, { -INT16_C( 4349), -INT16_C( 29534), -INT16_C( 1247), INT16_C( 13775) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_vtrn2_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; } static int test_simde_vtrn2_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { { { INT32_C( 10645001), INT32_C( 1062972149) }, { -INT32_C( 1146901624), INT32_C( 636172842) }, { INT32_C( 1062972149), INT32_C( 636172842) } }, { { INT32_C( 1131938959), -INT32_C( 1546593877) }, { INT32_C( 1194148703), -INT32_C( 460102437) }, { -INT32_C( 1546593877), -INT32_C( 460102437) } }, { { -INT32_C( 924568109), INT32_C( 1728528351) }, { INT32_C( 337816298), INT32_C( 1933119204) }, { INT32_C( 1728528351), INT32_C( 1933119204) } }, { { -INT32_C( 1095323118), -INT32_C( 345930101) }, { -INT32_C( 1540190263), -INT32_C( 964114957) }, { -INT32_C( 345930101), -INT32_C( 964114957) } }, { { -INT32_C( 644977414), -INT32_C( 1790863957) }, { INT32_C( 598303551), -INT32_C( 2070486159) }, { -INT32_C( 1790863957), -INT32_C( 2070486159) } }, { { INT32_C( 541215893), -INT32_C( 1676958765) }, { INT32_C( 641809714), -INT32_C( 51590910) }, { -INT32_C( 1676958765), -INT32_C( 51590910) } }, { { -INT32_C( 506037450), INT32_C( 1349981968) }, { -INT32_C( 327999366), -INT32_C( 1737487869) }, { INT32_C( 1349981968), -INT32_C( 1737487869) } }, { { INT32_C( 700035670), -INT32_C( 2000239531) }, { INT32_C( 78513922), INT32_C( 100768464) }, { -INT32_C( 2000239531), INT32_C( 100768464) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_vtrn2_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; } static int test_simde_vtrn2_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; uint8_t b[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C( 15), UINT8_C(216), UINT8_C( 0), UINT8_C( 22), UINT8_C( 58), UINT8_C(248), UINT8_C( 89), UINT8_C(225) }, { UINT8_C( 12), UINT8_C(184), UINT8_C(182), UINT8_C(163), UINT8_C( 1), UINT8_C(177), UINT8_C( 19), UINT8_C(121) }, { UINT8_C(216), UINT8_C(184), UINT8_C( 22), UINT8_C(163), UINT8_C(248), UINT8_C(177), UINT8_C(225), UINT8_C(121) } }, { { UINT8_C( 68), UINT8_C(225), UINT8_C(135), UINT8_C(204), UINT8_C(162), UINT8_C( 76), UINT8_C(186), UINT8_C(237) }, { UINT8_C( 95), UINT8_C(179), UINT8_C( 10), UINT8_C( 85), UINT8_C(205), UINT8_C( 94), UINT8_C( 62), UINT8_C(220) }, { UINT8_C(225), UINT8_C(179), UINT8_C(204), UINT8_C( 85), UINT8_C( 76), UINT8_C( 94), UINT8_C(237), UINT8_C(220) } }, { { UINT8_C( 54), UINT8_C( 62), UINT8_C(243), UINT8_C(112), UINT8_C( 54), UINT8_C( 76), UINT8_C( 81), UINT8_C( 66) }, { UINT8_C( 5), UINT8_C( 7), UINT8_C(229), UINT8_C( 6), UINT8_C(185), UINT8_C(248), UINT8_C(128), UINT8_C(253) }, { UINT8_C( 62), UINT8_C( 7), UINT8_C(112), UINT8_C( 6), UINT8_C( 76), UINT8_C(248), UINT8_C( 66), UINT8_C(253) } }, { { UINT8_C(218), UINT8_C( 7), UINT8_C(201), UINT8_C(124), UINT8_C( 83), UINT8_C(131), UINT8_C(105), UINT8_C(178) }, { UINT8_C( 55), UINT8_C(116), UINT8_C( 7), UINT8_C( 4), UINT8_C(210), UINT8_C( 70), UINT8_C(224), UINT8_C( 8) }, { UINT8_C( 7), UINT8_C(116), UINT8_C(124), UINT8_C( 4), UINT8_C(131), UINT8_C( 70), UINT8_C(178), UINT8_C( 8) } }, { { UINT8_C(132), UINT8_C(211), UINT8_C(120), UINT8_C(187), UINT8_C( 32), UINT8_C(201), UINT8_C(253), UINT8_C( 37) }, { UINT8_C(208), UINT8_C(227), UINT8_C( 43), UINT8_C(137), UINT8_C(219), UINT8_C(171), UINT8_C(134), UINT8_C(181) }, { UINT8_C(211), UINT8_C(227), UINT8_C(187), UINT8_C(137), UINT8_C(201), UINT8_C(171), UINT8_C( 37), UINT8_C(181) } }, { { UINT8_C(179), UINT8_C( 79), UINT8_C( 50), UINT8_C( 6), UINT8_C(211), UINT8_C(155), UINT8_C(185), UINT8_C( 10) }, { UINT8_C( 15), UINT8_C(192), UINT8_C( 14), UINT8_C(225), UINT8_C( 6), UINT8_C(238), UINT8_C(233), UINT8_C(139) }, { UINT8_C( 79), UINT8_C(192), UINT8_C( 6), UINT8_C(225), UINT8_C(155), UINT8_C(238), UINT8_C( 10), UINT8_C(139) } }, { { UINT8_C(194), UINT8_C( 97), UINT8_C( 70), UINT8_C(226), UINT8_C( 42), UINT8_C( 67), UINT8_C( 7), UINT8_C(251) }, { UINT8_C( 38), UINT8_C( 50), UINT8_C(132), UINT8_C( 2), UINT8_C(222), UINT8_C( 11), UINT8_C(183), UINT8_C(145) }, { UINT8_C( 97), UINT8_C( 50), UINT8_C(226), UINT8_C( 2), UINT8_C( 67), UINT8_C( 11), UINT8_C(251), UINT8_C(145) } }, { { UINT8_C( 90), UINT8_C(233), UINT8_C(151), UINT8_C( 45), UINT8_C(133), UINT8_C( 80), UINT8_C( 55), UINT8_C(148) }, { UINT8_C( 17), UINT8_C( 69), UINT8_C(118), UINT8_C( 23), UINT8_C( 52), UINT8_C( 95), UINT8_C(162), UINT8_C(246) }, { UINT8_C(233), UINT8_C( 69), UINT8_C( 45), UINT8_C( 23), UINT8_C( 80), UINT8_C( 95), UINT8_C(148), UINT8_C(246) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vtrn2_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; } static int test_simde_vtrn2_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C( 5340), UINT16_C(40669), UINT16_C(17077), UINT16_C(60369) }, { UINT16_C( 8272), UINT16_C(45663), UINT16_C( 988), UINT16_C(27971) }, { UINT16_C(40669), UINT16_C(45663), UINT16_C(60369), UINT16_C(27971) } }, { { UINT16_C(17106), UINT16_C( 4797), UINT16_C( 3841), UINT16_C(52059) }, { UINT16_C(56359), UINT16_C(46318), UINT16_C(18079), UINT16_C(31782) }, { UINT16_C( 4797), UINT16_C(46318), UINT16_C(52059), UINT16_C(31782) } }, { { UINT16_C( 859), UINT16_C( 4122), UINT16_C(60229), UINT16_C(38651) }, { UINT16_C(23051), UINT16_C(59464), UINT16_C(35678), UINT16_C(12373) }, { UINT16_C( 4122), UINT16_C(59464), UINT16_C(38651), UINT16_C(12373) } }, { { UINT16_C( 5069), UINT16_C(53058), UINT16_C(40482), UINT16_C(18842) }, { UINT16_C(35194), UINT16_C( 6909), UINT16_C( 9167), UINT16_C(10902) }, { UINT16_C(53058), UINT16_C( 6909), UINT16_C(18842), UINT16_C(10902) } }, { { UINT16_C(45094), UINT16_C(27450), UINT16_C(13980), UINT16_C(42753) }, { UINT16_C(18832), UINT16_C(61071), UINT16_C(58836), UINT16_C(41503) }, { UINT16_C(27450), UINT16_C(61071), UINT16_C(42753), UINT16_C(41503) } }, { { UINT16_C(25080), UINT16_C( 6769), UINT16_C( 3071), UINT16_C(31331) }, { UINT16_C(24724), UINT16_C(25748), UINT16_C(10883), UINT16_C(43406) }, { UINT16_C( 6769), UINT16_C(25748), UINT16_C(31331), UINT16_C(43406) } }, { { UINT16_C(51674), UINT16_C(30228), UINT16_C( 5887), UINT16_C(36638) }, { UINT16_C(44383), UINT16_C(13438), UINT16_C(40338), UINT16_C(35542) }, { UINT16_C(30228), UINT16_C(13438), UINT16_C(36638), UINT16_C(35542) } }, { { UINT16_C(18430), UINT16_C(65188), UINT16_C( 1874), UINT16_C(59256) }, { UINT16_C( 3175), UINT16_C(59979), UINT16_C(55606), UINT16_C( 4243) }, { UINT16_C(65188), UINT16_C(59979), UINT16_C(59256), UINT16_C( 4243) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vtrn2_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; } static int test_simde_vtrn2_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C(4196725491), UINT32_C(3634714177) }, { UINT32_C(1414317671), UINT32_C(1496429915) }, { UINT32_C(3634714177), UINT32_C(1496429915) } }, { { UINT32_C(3347760164), UINT32_C(1815798194) }, { UINT32_C(3174009957), UINT32_C(4006824187) }, { UINT32_C(1815798194), UINT32_C(4006824187) } }, { { UINT32_C(2078930746), UINT32_C(3025374797) }, { UINT32_C(2936578132), UINT32_C(2030582357) }, { UINT32_C(3025374797), UINT32_C(2030582357) } }, { { UINT32_C(2957021950), UINT32_C(3642587763) }, { UINT32_C( 43404295), UINT32_C(3488704916) }, { UINT32_C(3642587763), UINT32_C(3488704916) } }, { { UINT32_C(2907363936), UINT32_C(3160514152) }, { UINT32_C(2473290302), UINT32_C(2718790820) }, { UINT32_C(3160514152), UINT32_C(2718790820) } }, { { UINT32_C(2052279558), UINT32_C(3478352072) }, { UINT32_C(1355934140), UINT32_C(3005203027) }, { UINT32_C(3478352072), UINT32_C(3005203027) } }, { { UINT32_C( 73493148), UINT32_C(1187037704) }, { UINT32_C(3503893548), UINT32_C(2792613536) }, { UINT32_C(1187037704), UINT32_C(2792613536) } }, { { UINT32_C(4230006324), UINT32_C(4073419574) }, { UINT32_C(2957155421), UINT32_C(4217594463) }, { UINT32_C(4073419574), UINT32_C(4217594463) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vtrn2_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; } static int test_simde_vtrn2q_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; simde_float32 b[4]; simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -211.13), SIMDE_FLOAT32_C( -979.06), SIMDE_FLOAT32_C( 252.18), SIMDE_FLOAT32_C( 585.60) }, { SIMDE_FLOAT32_C( -353.90), SIMDE_FLOAT32_C( -614.27), SIMDE_FLOAT32_C( -898.32), SIMDE_FLOAT32_C( 395.91) }, { SIMDE_FLOAT32_C( -979.06), SIMDE_FLOAT32_C( -614.27), SIMDE_FLOAT32_C( 585.60), SIMDE_FLOAT32_C( 395.91) } }, { { SIMDE_FLOAT32_C( 662.54), SIMDE_FLOAT32_C( 470.15), SIMDE_FLOAT32_C( -417.90), SIMDE_FLOAT32_C( -256.21) }, { SIMDE_FLOAT32_C( -510.87), SIMDE_FLOAT32_C( 875.70), SIMDE_FLOAT32_C( -440.43), SIMDE_FLOAT32_C( 620.00) }, { SIMDE_FLOAT32_C( 470.15), SIMDE_FLOAT32_C( 875.70), SIMDE_FLOAT32_C( -256.21), SIMDE_FLOAT32_C( 620.00) } }, { { SIMDE_FLOAT32_C( -175.24), SIMDE_FLOAT32_C( 314.73), SIMDE_FLOAT32_C( -141.11), SIMDE_FLOAT32_C( -855.73) }, { SIMDE_FLOAT32_C( -301.48), SIMDE_FLOAT32_C( -992.19), SIMDE_FLOAT32_C( -362.14), SIMDE_FLOAT32_C( -921.90) }, { SIMDE_FLOAT32_C( 314.73), SIMDE_FLOAT32_C( -992.19), SIMDE_FLOAT32_C( -855.73), SIMDE_FLOAT32_C( -921.90) } }, { { SIMDE_FLOAT32_C( 931.20), SIMDE_FLOAT32_C( -308.80), SIMDE_FLOAT32_C( 750.81), SIMDE_FLOAT32_C( -865.99) }, { SIMDE_FLOAT32_C( -783.84), SIMDE_FLOAT32_C( -467.10), SIMDE_FLOAT32_C( -605.39), SIMDE_FLOAT32_C( 5.03) }, { SIMDE_FLOAT32_C( -308.80), SIMDE_FLOAT32_C( -467.10), SIMDE_FLOAT32_C( -865.99), SIMDE_FLOAT32_C( 5.03) } }, { { SIMDE_FLOAT32_C( -446.16), SIMDE_FLOAT32_C( 646.79), SIMDE_FLOAT32_C( -409.36), SIMDE_FLOAT32_C( 199.94) }, { SIMDE_FLOAT32_C( -967.48), SIMDE_FLOAT32_C( -307.68), SIMDE_FLOAT32_C( -404.15), SIMDE_FLOAT32_C( 695.06) }, { SIMDE_FLOAT32_C( 646.79), SIMDE_FLOAT32_C( -307.68), SIMDE_FLOAT32_C( 199.94), SIMDE_FLOAT32_C( 695.06) } }, { { SIMDE_FLOAT32_C( -837.54), SIMDE_FLOAT32_C( 177.95), SIMDE_FLOAT32_C( -561.15), SIMDE_FLOAT32_C( -348.41) }, { SIMDE_FLOAT32_C( 53.65), SIMDE_FLOAT32_C( -1.58), SIMDE_FLOAT32_C( -728.41), SIMDE_FLOAT32_C( 878.40) }, { SIMDE_FLOAT32_C( 177.95), SIMDE_FLOAT32_C( -1.58), SIMDE_FLOAT32_C( -348.41), SIMDE_FLOAT32_C( 878.40) } }, { { SIMDE_FLOAT32_C( -686.85), SIMDE_FLOAT32_C( 130.48), SIMDE_FLOAT32_C( -977.32), SIMDE_FLOAT32_C( 11.67) }, { SIMDE_FLOAT32_C( 138.29), SIMDE_FLOAT32_C( -339.46), SIMDE_FLOAT32_C( 89.77), SIMDE_FLOAT32_C( 69.49) }, { SIMDE_FLOAT32_C( 130.48), SIMDE_FLOAT32_C( -339.46), SIMDE_FLOAT32_C( 11.67), SIMDE_FLOAT32_C( 69.49) } }, { { SIMDE_FLOAT32_C( 351.74), SIMDE_FLOAT32_C( -159.41), SIMDE_FLOAT32_C( 203.49), SIMDE_FLOAT32_C( 567.89) }, { SIMDE_FLOAT32_C( 373.49), SIMDE_FLOAT32_C( 598.10), SIMDE_FLOAT32_C( -427.08), SIMDE_FLOAT32_C( 927.33) }, { SIMDE_FLOAT32_C( -159.41), SIMDE_FLOAT32_C( 598.10), SIMDE_FLOAT32_C( 567.89), SIMDE_FLOAT32_C( 927.33) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r = simde_vtrn2q_f32(a, b); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vtrn2q_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[2]; simde_float64 b[2]; simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 808.16), SIMDE_FLOAT64_C( -66.58) }, { SIMDE_FLOAT64_C( 640.72), SIMDE_FLOAT64_C( -179.06) }, { SIMDE_FLOAT64_C( -66.58), SIMDE_FLOAT64_C( -179.06) } }, { { SIMDE_FLOAT64_C( -763.23), SIMDE_FLOAT64_C( 245.88) }, { SIMDE_FLOAT64_C( 64.27), SIMDE_FLOAT64_C( 245.45) }, { SIMDE_FLOAT64_C( 245.88), SIMDE_FLOAT64_C( 245.45) } }, { { SIMDE_FLOAT64_C( 533.54), SIMDE_FLOAT64_C( -357.67) }, { SIMDE_FLOAT64_C( -180.56), SIMDE_FLOAT64_C( -85.73) }, { SIMDE_FLOAT64_C( -357.67), SIMDE_FLOAT64_C( -85.73) } }, { { SIMDE_FLOAT64_C( 913.02), SIMDE_FLOAT64_C( -45.29) }, { SIMDE_FLOAT64_C( -710.20), SIMDE_FLOAT64_C( 600.50) }, { SIMDE_FLOAT64_C( -45.29), SIMDE_FLOAT64_C( 600.50) } }, { { SIMDE_FLOAT64_C( 103.86), SIMDE_FLOAT64_C( 824.18) }, { SIMDE_FLOAT64_C( 417.27), SIMDE_FLOAT64_C( 319.73) }, { SIMDE_FLOAT64_C( 824.18), SIMDE_FLOAT64_C( 319.73) } }, { { SIMDE_FLOAT64_C( 543.92), SIMDE_FLOAT64_C( 468.66) }, { SIMDE_FLOAT64_C( -370.42), SIMDE_FLOAT64_C( -598.68) }, { SIMDE_FLOAT64_C( 468.66), SIMDE_FLOAT64_C( -598.68) } }, { { SIMDE_FLOAT64_C( -891.19), SIMDE_FLOAT64_C( 383.96) }, { SIMDE_FLOAT64_C( 82.38), SIMDE_FLOAT64_C( -999.44) }, { SIMDE_FLOAT64_C( 383.96), SIMDE_FLOAT64_C( -999.44) } }, { { SIMDE_FLOAT64_C( 899.21), SIMDE_FLOAT64_C( -263.60) }, { SIMDE_FLOAT64_C( -211.00), SIMDE_FLOAT64_C( 707.37) }, { SIMDE_FLOAT64_C( -263.60), SIMDE_FLOAT64_C( 707.37) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_float64x2_t r = simde_vtrn2q_f64(a, b); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; } static int test_simde_vtrn2q_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int8_t b[16]; int8_t r[16]; } test_vec[] = { { { INT8_C( 126), -INT8_C( 47), -INT8_C( 55), INT8_C( 7), -INT8_C( 42), -INT8_C( 98), INT8_C( 55), -INT8_C( 9), INT8_C( 121), INT8_C( 61), -INT8_C( 71), INT8_C( 49), -INT8_C( 54), -INT8_C( 19), INT8_C( 62), INT8_C( 54) }, { INT8_C( 113), INT8_C( 96), INT8_C( 112), INT8_C( 65), INT8_C( 76), -INT8_C( 70), INT8_C( 97), -INT8_C( 119), INT8_C( 42), INT8_C( 57), INT8_C( 15), -INT8_C( 105), INT8_C( 114), -INT8_C( 24), -INT8_C( 38), -INT8_C( 15) }, { -INT8_C( 47), INT8_C( 96), INT8_C( 7), INT8_C( 65), -INT8_C( 98), -INT8_C( 70), -INT8_C( 9), -INT8_C( 119), INT8_C( 61), INT8_C( 57), INT8_C( 49), -INT8_C( 105), -INT8_C( 19), -INT8_C( 24), INT8_C( 54), -INT8_C( 15) } }, { { -INT8_C( 71), -INT8_C( 92), -INT8_C( 8), -INT8_C( 112), INT8_C( 66), INT8_C( 48), -INT8_C( 121), -INT8_C( 69), INT8_C( 109), INT8_C( 65), -INT8_C( 20), INT8_C( 56), INT8_C( 46), INT8_C( 42), INT8_C( 110), -INT8_C( 97) }, { -INT8_C( 117), -INT8_C( 34), -INT8_C( 32), -INT8_C( 41), -INT8_C( 104), INT8_C( 66), INT8_C( 96), -INT8_C( 62), INT8_C( 123), INT8_C( 111), INT8_C( 90), -INT8_C( 18), INT8_C( 88), INT8_C( 52), -INT8_C( 33), INT8_C( 17) }, { -INT8_C( 92), -INT8_C( 34), -INT8_C( 112), -INT8_C( 41), INT8_C( 48), INT8_C( 66), -INT8_C( 69), -INT8_C( 62), INT8_C( 65), INT8_C( 111), INT8_C( 56), -INT8_C( 18), INT8_C( 42), INT8_C( 52), -INT8_C( 97), INT8_C( 17) } }, { { -INT8_C( 40), -INT8_C( 41), -INT8_C( 95), INT8_C( 26), INT8_C( 7), INT8_C( 41), -INT8_C( 42), INT8_C( 117), INT8_C( 106), -INT8_C( 62), -INT8_C( 83), -INT8_C( 104), -INT8_C( 19), INT8_C( 27), INT8_C( 55), INT8_C( 120) }, { -INT8_C( 7), INT8_C( 24), INT8_C( 79), -INT8_C( 110), INT8_C( 90), -INT8_C( 81), INT8_C( 84), -INT8_C( 43), INT8_C( 31), -INT8_C( 82), -INT8_C( 61), INT8_C( 119), -INT8_C( 29), -INT8_C( 94), -INT8_C( 120), -INT8_C( 69) }, { -INT8_C( 41), INT8_C( 24), INT8_C( 26), -INT8_C( 110), INT8_C( 41), -INT8_C( 81), INT8_C( 117), -INT8_C( 43), -INT8_C( 62), -INT8_C( 82), -INT8_C( 104), INT8_C( 119), INT8_C( 27), -INT8_C( 94), INT8_C( 120), -INT8_C( 69) } }, { { INT8_C( 122), INT8_C( 42), -INT8_C( 42), -INT8_C( 127), INT8_C( 83), -INT8_C( 84), -INT8_C( 10), -INT8_C( 67), INT8_C( 110), -INT8_C( 93), INT8_C( 85), INT8_C( 91), -INT8_C( 66), -INT8_C( 116), -INT8_C( 45), -INT8_C( 72) }, { -INT8_C( 92), INT8_C( 34), INT8_C( 74), -INT8_C( 2), -INT8_C( 46), -INT8_C( 98), -INT8_C( 44), -INT8_C( 15), INT8_C( 77), -INT8_C( 105), INT8_C( 104), INT8_C( 48), INT8_C( 58), -INT8_C( 16), -INT8_C( 21), -INT8_C( 76) }, { INT8_C( 42), INT8_C( 34), -INT8_C( 127), -INT8_C( 2), -INT8_C( 84), -INT8_C( 98), -INT8_C( 67), -INT8_C( 15), -INT8_C( 93), -INT8_C( 105), INT8_C( 91), INT8_C( 48), -INT8_C( 116), -INT8_C( 16), -INT8_C( 72), -INT8_C( 76) } }, { { INT8_C( 26), -INT8_C( 63), INT8_C( 53), INT8_C( 109), INT8_C( 109), INT8_C( 44), INT8_C( 42), -INT8_C( 36), -INT8_C( 49), INT8_MAX, INT8_C( 55), -INT8_C( 114), INT8_C( 12), INT8_C( 11), INT8_C( 70), -INT8_C( 80) }, { INT8_C( 45), -INT8_C( 112), -INT8_C( 81), -INT8_C( 1), INT8_C( 46), -INT8_C( 125), -INT8_C( 16), INT8_C( 123), INT8_C( 26), INT8_C( 88), -INT8_C( 85), INT8_C( 84), INT8_C( 73), -INT8_C( 105), INT8_C( 8), INT8_C( 99) }, { -INT8_C( 63), -INT8_C( 112), INT8_C( 109), -INT8_C( 1), INT8_C( 44), -INT8_C( 125), -INT8_C( 36), INT8_C( 123), INT8_MAX, INT8_C( 88), -INT8_C( 114), INT8_C( 84), INT8_C( 11), -INT8_C( 105), -INT8_C( 80), INT8_C( 99) } }, { { INT8_C( 88), INT8_C( 62), -INT8_C( 47), -INT8_C( 58), INT8_C( 106), -INT8_C( 5), -INT8_C( 94), INT8_C( 57), INT8_C( 123), -INT8_C( 39), -INT8_C( 57), -INT8_C( 121), -INT8_C( 28), INT8_C( 13), INT8_C( 55), INT8_C( 18) }, { -INT8_C( 99), -INT8_C( 26), INT8_C( 17), -INT8_C( 52), INT8_C( 105), INT8_C( 2), INT8_C( 71), -INT8_C( 124), INT8_C( 90), -INT8_C( 13), -INT8_C( 40), -INT8_C( 93), -INT8_C( 118), -INT8_C( 31), INT8_C( 7), -INT8_C( 30) }, { INT8_C( 62), -INT8_C( 26), -INT8_C( 58), -INT8_C( 52), -INT8_C( 5), INT8_C( 2), INT8_C( 57), -INT8_C( 124), -INT8_C( 39), -INT8_C( 13), -INT8_C( 121), -INT8_C( 93), INT8_C( 13), -INT8_C( 31), INT8_C( 18), -INT8_C( 30) } }, { { INT8_C( 31), -INT8_C( 40), -INT8_C( 88), -INT8_C( 119), -INT8_C( 45), INT8_C( 74), -INT8_C( 62), INT8_C( 78), INT8_C( 36), -INT8_C( 118), -INT8_C( 43), INT8_C( 8), -INT8_C( 105), INT8_C( 13), INT8_C( 26), INT8_C( 53) }, { -INT8_C( 13), INT8_C( 44), INT8_C( 1), INT8_C( 93), INT8_C( 46), INT8_C( 72), -INT8_C( 31), -INT8_C( 120), INT8_C( 59), -INT8_C( 71), INT8_C( 44), -INT8_C( 59), -INT8_C( 102), INT8_C( 51), -INT8_C( 88), -INT8_C( 71) }, { -INT8_C( 40), INT8_C( 44), -INT8_C( 119), INT8_C( 93), INT8_C( 74), INT8_C( 72), INT8_C( 78), -INT8_C( 120), -INT8_C( 118), -INT8_C( 71), INT8_C( 8), -INT8_C( 59), INT8_C( 13), INT8_C( 51), INT8_C( 53), -INT8_C( 71) } }, { { INT8_C( 11), INT8_C( 80), INT8_C( 66), -INT8_C( 34), -INT8_C( 101), INT8_C( 5), INT8_C( 45), -INT8_C( 65), -INT8_C( 113), INT8_C( 2), -INT8_C( 57), INT8_C( 38), INT8_C( 15), -INT8_C( 30), INT8_C( 91), INT8_C( 3) }, { INT8_C( 14), INT8_C( 92), INT8_C( 96), INT8_C( 60), -INT8_C( 91), INT8_C( 65), -INT8_C( 60), -INT8_C( 32), -INT8_C( 6), -INT8_C( 16), -INT8_C( 90), -INT8_C( 107), INT8_C( 35), INT8_C( 78), INT8_C( 78), INT8_C( 46) }, { INT8_C( 80), INT8_C( 92), -INT8_C( 34), INT8_C( 60), INT8_C( 5), INT8_C( 65), -INT8_C( 65), -INT8_C( 32), INT8_C( 2), -INT8_C( 16), INT8_C( 38), -INT8_C( 107), -INT8_C( 30), INT8_C( 78), INT8_C( 3), INT8_C( 46) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r = simde_vtrn2q_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; } static int test_simde_vtrn2q_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { -INT16_C( 25616), INT16_C( 12174), -INT16_C( 26288), INT16_C( 19899), INT16_C( 18017), -INT16_C( 25514), -INT16_C( 3524), INT16_C( 25184) }, { -INT16_C( 5010), INT16_C( 21716), INT16_C( 8269), -INT16_C( 9581), INT16_C( 9727), -INT16_C( 17748), -INT16_C( 10274), -INT16_C( 12721) }, { INT16_C( 12174), INT16_C( 21716), INT16_C( 19899), -INT16_C( 9581), -INT16_C( 25514), -INT16_C( 17748), INT16_C( 25184), -INT16_C( 12721) } }, { { -INT16_C( 8846), -INT16_C( 15363), -INT16_C( 18313), -INT16_C( 10224), INT16_C( 26366), INT16_C( 14964), -INT16_C( 10919), -INT16_C( 14436) }, { INT16_C( 29121), INT16_C( 3611), -INT16_C( 20847), -INT16_C( 28183), -INT16_C( 27181), -INT16_C( 20149), -INT16_C( 26004), -INT16_C( 8321) }, { -INT16_C( 15363), INT16_C( 3611), -INT16_C( 10224), -INT16_C( 28183), INT16_C( 14964), -INT16_C( 20149), -INT16_C( 14436), -INT16_C( 8321) } }, { { INT16_C( 31863), -INT16_C( 4446), -INT16_C( 19916), INT16_C( 12999), INT16_C( 15128), INT16_C( 29036), INT16_C( 2320), -INT16_C( 11720) }, { INT16_C( 21626), INT16_C( 3040), -INT16_C( 14078), -INT16_C( 10596), -INT16_C( 6305), -INT16_C( 13433), INT16_C( 1921), -INT16_C( 1622) }, { -INT16_C( 4446), INT16_C( 3040), INT16_C( 12999), -INT16_C( 10596), INT16_C( 29036), -INT16_C( 13433), -INT16_C( 11720), -INT16_C( 1622) } }, { { INT16_C( 19587), -INT16_C( 18201), -INT16_C( 20738), INT16_C( 6122), INT16_C( 22506), -INT16_C( 1400), -INT16_C( 16032), -INT16_C( 9524) }, { -INT16_C( 21227), INT16_C( 6117), -INT16_C( 32138), -INT16_C( 10771), INT16_C( 30057), -INT16_C( 5215), INT16_C( 19324), -INT16_C( 28) }, { -INT16_C( 18201), INT16_C( 6117), INT16_C( 6122), -INT16_C( 10771), -INT16_C( 1400), -INT16_C( 5215), -INT16_C( 9524), -INT16_C( 28) } }, { { -INT16_C( 13416), -INT16_C( 26953), -INT16_C( 23942), INT16_C( 25773), INT16_C( 14073), INT16_C( 22878), INT16_C( 11255), INT16_C( 3123) }, { INT16_C( 6360), INT16_C( 20003), INT16_C( 4506), INT16_C( 1060), -INT16_C( 14970), INT16_C( 751), -INT16_C( 11504), -INT16_C( 22527) }, { -INT16_C( 26953), INT16_C( 20003), INT16_C( 25773), INT16_C( 1060), INT16_C( 22878), INT16_C( 751), INT16_C( 3123), -INT16_C( 22527) } }, { { -INT16_C( 18018), INT16_C( 6207), -INT16_C( 5029), INT16_C( 21628), -INT16_C( 9438), INT16_C( 6573), -INT16_C( 8186), -INT16_C( 8667) }, { INT16_C( 18936), -INT16_C( 27860), INT16_C( 20570), -INT16_C( 8041), -INT16_C( 31211), INT16_C( 9954), -INT16_C( 7335), -INT16_C( 2098) }, { INT16_C( 6207), -INT16_C( 27860), INT16_C( 21628), -INT16_C( 8041), INT16_C( 6573), INT16_C( 9954), -INT16_C( 8667), -INT16_C( 2098) } }, { { INT16_C( 3484), -INT16_C( 2288), -INT16_C( 29446), INT16_C( 7243), -INT16_C( 1945), INT16_C( 27958), INT16_C( 23512), -INT16_C( 11957) }, { INT16_C( 30884), -INT16_C( 412), -INT16_C( 1080), -INT16_C( 8482), -INT16_C( 16255), -INT16_C( 9724), -INT16_C( 11612), INT16_C( 16593) }, { -INT16_C( 2288), -INT16_C( 412), INT16_C( 7243), -INT16_C( 8482), INT16_C( 27958), -INT16_C( 9724), -INT16_C( 11957), INT16_C( 16593) } }, { { -INT16_C( 7712), -INT16_C( 9672), -INT16_C( 31890), -INT16_C( 10762), INT16_C( 11388), INT16_C( 21571), -INT16_C( 29048), INT16_C( 11301) }, { -INT16_C( 30458), -INT16_C( 12501), INT16_C( 2436), INT16_C( 1453), -INT16_C( 20022), INT16_C( 28383), -INT16_C( 20093), INT16_C( 25518) }, { -INT16_C( 9672), -INT16_C( 12501), -INT16_C( 10762), INT16_C( 1453), INT16_C( 21571), INT16_C( 28383), INT16_C( 11301), INT16_C( 25518) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_vtrn2q_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; } static int test_simde_vtrn2q_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { -INT32_C( 1320359767), INT32_C( 1637046083), INT32_C( 635603625), -INT32_C( 170216369) }, { -INT32_C( 1440222354), -INT32_C( 386047728), INT32_C( 1218036504), -INT32_C( 1417935102) }, { INT32_C( 1637046083), -INT32_C( 386047728), -INT32_C( 170216369), -INT32_C( 1417935102) } }, { { INT32_C( 861719023), -INT32_C( 912986080), -INT32_C( 890276230), -INT32_C( 1732261590) }, { -INT32_C( 851253572), INT32_C( 1605713735), INT32_C( 78073346), INT32_C( 1169106005) }, { -INT32_C( 912986080), INT32_C( 1605713735), -INT32_C( 1732261590), INT32_C( 1169106005) } }, { { INT32_C( 225971437), INT32_C( 1993739516), -INT32_C( 1405041278), INT32_C( 1262813070) }, { INT32_C( 739804901), -INT32_C( 930361915), INT32_C( 1909207836), INT32_C( 1152810071) }, { INT32_C( 1993739516), -INT32_C( 930361915), INT32_C( 1262813070), INT32_C( 1152810071) } }, { { -INT32_C( 2075054456), -INT32_C( 1124456646), INT32_C( 2070428653), INT32_C( 549891130) }, { -INT32_C( 129180110), -INT32_C( 926885716), INT32_C( 1647938571), -INT32_C( 1868107768) }, { -INT32_C( 1124456646), -INT32_C( 926885716), INT32_C( 549891130), -INT32_C( 1868107768) } }, { { INT32_C( 1494546206), INT32_C( 185929502), -INT32_C( 2071495094), INT32_C( 1571048746) }, { -INT32_C( 665456340), -INT32_C( 727706167), -INT32_C( 1439245919), -INT32_C( 398795575) }, { INT32_C( 185929502), -INT32_C( 727706167), INT32_C( 1571048746), -INT32_C( 398795575) } }, { { -INT32_C( 247377965), -INT32_C( 1459792290), -INT32_C( 13859628), -INT32_C( 44248623) }, { -INT32_C( 1948929598), INT32_C( 1734309318), INT32_C( 403805519), INT32_C( 1140870257) }, { -INT32_C( 1459792290), INT32_C( 1734309318), -INT32_C( 44248623), INT32_C( 1140870257) } }, { { -INT32_C( 113950309), INT32_C( 1822503576), -INT32_C( 2006200906), INT32_C( 1619380126) }, { INT32_C( 1055611768), INT32_C( 530991824), INT32_C( 1345894367), -INT32_C( 1634453501) }, { INT32_C( 1822503576), INT32_C( 530991824), INT32_C( 1619380126), -INT32_C( 1634453501) } }, { { INT32_C( 311937658), -INT32_C( 1283573508), -INT32_C( 1539577338), INT32_C( 705020081) }, { -INT32_C( 328667109), INT32_C( 436932155), -INT32_C( 915717178), -INT32_C( 160891012) }, { -INT32_C( 1283573508), INT32_C( 436932155), INT32_C( 705020081), -INT32_C( 160891012) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_vtrn2q_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; } static int test_simde_vtrn2q_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; int64_t b[2]; int64_t r[2]; } test_vec[] = { { { INT64_C( 45048472413891075), INT64_C( 152343486260863284) }, { -INT64_C( 7730196300375534852), -INT64_C( 8005652403684821719) }, { INT64_C( 152343486260863284), -INT64_C( 8005652403684821719) } }, { { INT64_C( 1006278375945588127), -INT64_C( 7206005011547724955) }, { INT64_C( 1148738847977968370), -INT64_C( 2056190901628563624) }, { -INT64_C( 7206005011547724955), -INT64_C( 2056190901628563624) } }, { { INT64_C( 4946309049015261886), -INT64_C( 5085055702267746507) }, { INT64_C( 6144890095936133458), INT64_C( 89703941150560535) }, { -INT64_C( 5085055702267746507), INT64_C( 89703941150560535) } }, { { INT64_C( 2211054014804007052), -INT64_C( 8906606921584021020) }, { -INT64_C( 7238688109679051718), INT64_C( 331072265161983572) }, { -INT64_C( 8906606921584021020), INT64_C( 331072265161983572) } }, { { INT64_C( 5932400063550533708), -INT64_C( 2250282873078790641) }, { -INT64_C( 2810630427950743073), INT64_C( 8423146550228677737) }, { -INT64_C( 2250282873078790641), INT64_C( 8423146550228677737) } }, { { -INT64_C( 9009452761172257736), INT64_C( 4092514606168405316) }, { -INT64_C( 1340495419970625015), INT64_C( 1453734540291790223) }, { INT64_C( 4092514606168405316), INT64_C( 1453734540291790223) } }, { { INT64_C( 1800509477191603719), INT64_C( 5780595826556583847) }, { -INT64_C( 134753468448648017), -INT64_C( 6122395599108706281) }, { INT64_C( 5780595826556583847), -INT64_C( 6122395599108706281) } }, { { INT64_C( 1991472244662321847), INT64_C( 2555603067598513888) }, { -INT64_C( 5866650351400692696), -INT64_C( 2037973200194614680) }, { INT64_C( 2555603067598513888), -INT64_C( 2037973200194614680) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_int64x2_t r = simde_vtrn2q_s64(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; } static int test_simde_vtrn2q_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C( 20), UINT8_C(238), UINT8_C(252), UINT8_C(213), UINT8_C( 66), UINT8_C(233), UINT8_C(216), UINT8_C( 19), UINT8_C(139), UINT8_C(224), UINT8_C( 95), UINT8_C( 61), UINT8_C(180), UINT8_C(215), UINT8_C(124), UINT8_C(191) }, { UINT8_C( 7), UINT8_C( 5), UINT8_C(234), UINT8_C(232), UINT8_C(111), UINT8_C(230), UINT8_C(156), UINT8_C( 66), UINT8_C( 82), UINT8_C( 3), UINT8_C(221), UINT8_C(221), UINT8_C(135), UINT8_C( 8), UINT8_C( 89), UINT8_C(155) }, { UINT8_C(238), UINT8_C( 5), UINT8_C(213), UINT8_C(232), UINT8_C(233), UINT8_C(230), UINT8_C( 19), UINT8_C( 66), UINT8_C(224), UINT8_C( 3), UINT8_C( 61), UINT8_C(221), UINT8_C(215), UINT8_C( 8), UINT8_C(191), UINT8_C(155) } }, { { UINT8_C(247), UINT8_C( 85), UINT8_C(112), UINT8_C( 57), UINT8_C( 62), UINT8_C( 73), UINT8_C( 76), UINT8_C(202), UINT8_C( 41), UINT8_C(171), UINT8_C( 7), UINT8_C(221), UINT8_C(130), UINT8_C(132), UINT8_C(156), UINT8_C(137) }, { UINT8_C(137), UINT8_C(135), UINT8_C(113), UINT8_C(249), UINT8_C(109), UINT8_C( 14), UINT8_C( 59), UINT8_C(191), UINT8_C( 17), UINT8_C( 25), UINT8_C(157), UINT8_C(152), UINT8_C( 33), UINT8_C(246), UINT8_C( 51), UINT8_C( 24) }, { UINT8_C( 85), UINT8_C(135), UINT8_C( 57), UINT8_C(249), UINT8_C( 73), UINT8_C( 14), UINT8_C(202), UINT8_C(191), UINT8_C(171), UINT8_C( 25), UINT8_C(221), UINT8_C(152), UINT8_C(132), UINT8_C(246), UINT8_C(137), UINT8_C( 24) } }, { { UINT8_C( 75), UINT8_C(163), UINT8_C( 81), UINT8_C(138), UINT8_C(236), UINT8_C(157), UINT8_C( 84), UINT8_C( 22), UINT8_C( 72), UINT8_C( 91), UINT8_C(243), UINT8_C(202), UINT8_C(223), UINT8_C(144), UINT8_C( 84), UINT8_C(105) }, { UINT8_C( 23), UINT8_C(197), UINT8_C( 98), UINT8_C(132), UINT8_C(211), UINT8_C(157), UINT8_C( 67), UINT8_C(228), UINT8_C(182), UINT8_C(224), UINT8_C(124), UINT8_C(216), UINT8_C(214), UINT8_C(175), UINT8_C(240), UINT8_C( 34) }, { UINT8_C(163), UINT8_C(197), UINT8_C(138), UINT8_C(132), UINT8_C(157), UINT8_C(157), UINT8_C( 22), UINT8_C(228), UINT8_C( 91), UINT8_C(224), UINT8_C(202), UINT8_C(216), UINT8_C(144), UINT8_C(175), UINT8_C(105), UINT8_C( 34) } }, { { UINT8_C( 83), UINT8_C( 66), UINT8_C(172), UINT8_C( 63), UINT8_C(223), UINT8_C( 0), UINT8_C( 85), UINT8_C( 40), UINT8_C( 91), UINT8_C( 73), UINT8_C(242), UINT8_C( 59), UINT8_C(217), UINT8_C( 70), UINT8_C(164), UINT8_C(240) }, { UINT8_C( 12), UINT8_C( 6), UINT8_C(116), UINT8_C(223), UINT8_C(163), UINT8_C(183), UINT8_C(196), UINT8_C( 90), UINT8_C(152), UINT8_C( 64), UINT8_C( 50), UINT8_C(110), UINT8_C(240), UINT8_C( 34), UINT8_C(144), UINT8_C( 67) }, { UINT8_C( 66), UINT8_C( 6), UINT8_C( 63), UINT8_C(223), UINT8_C( 0), UINT8_C(183), UINT8_C( 40), UINT8_C( 90), UINT8_C( 73), UINT8_C( 64), UINT8_C( 59), UINT8_C(110), UINT8_C( 70), UINT8_C( 34), UINT8_C(240), UINT8_C( 67) } }, { { UINT8_C(100), UINT8_C( 60), UINT8_C(130), UINT8_C( 68), UINT8_C( 60), UINT8_C(216), UINT8_C(108), UINT8_C(152), UINT8_C( 33), UINT8_C( 94), UINT8_C(211), UINT8_C(250), UINT8_C(165), UINT8_C(119), UINT8_C(234), UINT8_C(177) }, { UINT8_C(125), UINT8_C( 94), UINT8_C(144), UINT8_C( 32), UINT8_C( 21), UINT8_C( 84), UINT8_C(122), UINT8_C(173), UINT8_C(149), UINT8_C(172), UINT8_C( 28), UINT8_C(133), UINT8_C(207), UINT8_C(172), UINT8_C(200), UINT8_C( 51) }, { UINT8_C( 60), UINT8_C( 94), UINT8_C( 68), UINT8_C( 32), UINT8_C(216), UINT8_C( 84), UINT8_C(152), UINT8_C(173), UINT8_C( 94), UINT8_C(172), UINT8_C(250), UINT8_C(133), UINT8_C(119), UINT8_C(172), UINT8_C(177), UINT8_C( 51) } }, { { UINT8_C(233), UINT8_C( 74), UINT8_C(119), UINT8_C( 37), UINT8_C( 34), UINT8_C(227), UINT8_C(189), UINT8_C( 67), UINT8_C( 66), UINT8_C(144), UINT8_C( 61), UINT8_C(231), UINT8_C( 7), UINT8_C( 39), UINT8_C(152), UINT8_C(132) }, { UINT8_C(133), UINT8_C( 40), UINT8_C(165), UINT8_C(155), UINT8_C(125), UINT8_C( 31), UINT8_C( 72), UINT8_C( 18), UINT8_C(204), UINT8_C(100), UINT8_C(151), UINT8_C(155), UINT8_C( 17), UINT8_C( 95), UINT8_C(206), UINT8_C(250) }, { UINT8_C( 74), UINT8_C( 40), UINT8_C( 37), UINT8_C(155), UINT8_C(227), UINT8_C( 31), UINT8_C( 67), UINT8_C( 18), UINT8_C(144), UINT8_C(100), UINT8_C(231), UINT8_C(155), UINT8_C( 39), UINT8_C( 95), UINT8_C(132), UINT8_C(250) } }, { { UINT8_C(169), UINT8_C( 70), UINT8_C( 31), UINT8_C(204), UINT8_C( 41), UINT8_C(221), UINT8_C( 15), UINT8_C(107), UINT8_C(109), UINT8_C( 77), UINT8_C( 82), UINT8_C(117), UINT8_C(116), UINT8_C(234), UINT8_C(249), UINT8_C(250) }, { UINT8_C( 19), UINT8_C(158), UINT8_C(149), UINT8_C(144), UINT8_C(190), UINT8_C(221), UINT8_C(162), UINT8_C(138), UINT8_C( 66), UINT8_C( 57), UINT8_C( 37), UINT8_C( 83), UINT8_C(152), UINT8_C(243), UINT8_C( 77), UINT8_C( 65) }, { UINT8_C( 70), UINT8_C(158), UINT8_C(204), UINT8_C(144), UINT8_C(221), UINT8_C(221), UINT8_C(107), UINT8_C(138), UINT8_C( 77), UINT8_C( 57), UINT8_C(117), UINT8_C( 83), UINT8_C(234), UINT8_C(243), UINT8_C(250), UINT8_C( 65) } }, { { UINT8_C( 57), UINT8_C(108), UINT8_C( 13), UINT8_C( 99), UINT8_C( 73), UINT8_C( 29), UINT8_C(206), UINT8_C(183), UINT8_C(106), UINT8_C( 33), UINT8_C( 44), UINT8_C(222), UINT8_C( 11), UINT8_C( 37), UINT8_C(216), UINT8_C( 30) }, { UINT8_C(196), UINT8_C(109), UINT8_C(174), UINT8_C(130), UINT8_C( 75), UINT8_C( 80), UINT8_C( 12), UINT8_C(141), UINT8_C(137), UINT8_C( 49), UINT8_C(224), UINT8_C( 33), UINT8_C( 36), UINT8_C( 45), UINT8_C( 99), UINT8_C( 94) }, { UINT8_C(108), UINT8_C(109), UINT8_C( 99), UINT8_C(130), UINT8_C( 29), UINT8_C( 80), UINT8_C(183), UINT8_C(141), UINT8_C( 33), UINT8_C( 49), UINT8_C(222), UINT8_C( 33), UINT8_C( 37), UINT8_C( 45), UINT8_C( 30), UINT8_C( 94) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vtrn2q_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; } static int test_simde_vtrn2q_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C( 871), UINT16_C(60974), UINT16_C(46339), UINT16_C(41413), UINT16_C(48049), UINT16_C(26805), UINT16_C( 6238), UINT16_C(17076) }, { UINT16_C(16789), UINT16_C(31129), UINT16_C(27483), UINT16_C(21460), UINT16_C(34444), UINT16_C(40210), UINT16_C(60228), UINT16_C(43796) }, { UINT16_C(60974), UINT16_C(31129), UINT16_C(41413), UINT16_C(21460), UINT16_C(26805), UINT16_C(40210), UINT16_C(17076), UINT16_C(43796) } }, { { UINT16_C(17391), UINT16_C(62105), UINT16_C(24568), UINT16_C(43411), UINT16_C(18714), UINT16_C(30993), UINT16_C(50529), UINT16_C(63163) }, { UINT16_C(21510), UINT16_C(24944), UINT16_C(17600), UINT16_C(19636), UINT16_C(50890), UINT16_C( 3817), UINT16_C(64945), UINT16_C(41145) }, { UINT16_C(62105), UINT16_C(24944), UINT16_C(43411), UINT16_C(19636), UINT16_C(30993), UINT16_C( 3817), UINT16_C(63163), UINT16_C(41145) } }, { { UINT16_C(21312), UINT16_C(14483), UINT16_C( 9906), UINT16_C(52449), UINT16_C(62063), UINT16_C(53317), UINT16_C( 439), UINT16_C(48583) }, { UINT16_C(14165), UINT16_C( 5406), UINT16_C(53883), UINT16_C(17761), UINT16_C(19096), UINT16_C(19028), UINT16_C( 3400), UINT16_C(35050) }, { UINT16_C(14483), UINT16_C( 5406), UINT16_C(52449), UINT16_C(17761), UINT16_C(53317), UINT16_C(19028), UINT16_C(48583), UINT16_C(35050) } }, { { UINT16_C(32096), UINT16_C( 4801), UINT16_C(41636), UINT16_C( 5087), UINT16_C( 9365), UINT16_C(19684), UINT16_C(43813), UINT16_C(31498) }, { UINT16_C(10466), UINT16_C(23952), UINT16_C(62203), UINT16_C(37794), UINT16_C(63036), UINT16_C(34013), UINT16_C(51204), UINT16_C(25613) }, { UINT16_C( 4801), UINT16_C(23952), UINT16_C( 5087), UINT16_C(37794), UINT16_C(19684), UINT16_C(34013), UINT16_C(31498), UINT16_C(25613) } }, { { UINT16_C(52805), UINT16_C(59767), UINT16_C(22128), UINT16_C( 1533), UINT16_C(57722), UINT16_C(41042), UINT16_C(23692), UINT16_C(28187) }, { UINT16_C(43908), UINT16_C(32715), UINT16_C(28061), UINT16_C(55827), UINT16_C(61540), UINT16_C(26718), UINT16_C(27576), UINT16_C(65228) }, { UINT16_C(59767), UINT16_C(32715), UINT16_C( 1533), UINT16_C(55827), UINT16_C(41042), UINT16_C(26718), UINT16_C(28187), UINT16_C(65228) } }, { { UINT16_C(17209), UINT16_C(43751), UINT16_C(58521), UINT16_C( 5295), UINT16_C( 453), UINT16_C(20916), UINT16_C(53085), UINT16_C(58047) }, { UINT16_C(35450), UINT16_C( 6241), UINT16_C(29944), UINT16_C(23794), UINT16_C(20581), UINT16_C( 7620), UINT16_C(37052), UINT16_C(62747) }, { UINT16_C(43751), UINT16_C( 6241), UINT16_C( 5295), UINT16_C(23794), UINT16_C(20916), UINT16_C( 7620), UINT16_C(58047), UINT16_C(62747) } }, { { UINT16_C( 980), UINT16_C(28063), UINT16_C(20455), UINT16_C(44417), UINT16_C(13648), UINT16_C(44798), UINT16_C(48644), UINT16_C(32656) }, { UINT16_C(61768), UINT16_C(16535), UINT16_C(35174), UINT16_C(52124), UINT16_C(24793), UINT16_C(38376), UINT16_C( 1265), UINT16_C(50571) }, { UINT16_C(28063), UINT16_C(16535), UINT16_C(44417), UINT16_C(52124), UINT16_C(44798), UINT16_C(38376), UINT16_C(32656), UINT16_C(50571) } }, { { UINT16_C(10759), UINT16_C(60978), UINT16_C(46201), UINT16_C(51867), UINT16_C(39657), UINT16_C(61048), UINT16_C( 2136), UINT16_C(41069) }, { UINT16_C( 1273), UINT16_C(24545), UINT16_C(32141), UINT16_C(26154), UINT16_C( 5086), UINT16_C(53244), UINT16_C(34583), UINT16_C( 7828) }, { UINT16_C(60978), UINT16_C(24545), UINT16_C(51867), UINT16_C(26154), UINT16_C(61048), UINT16_C(53244), UINT16_C(41069), UINT16_C( 7828) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vtrn2q_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_vtrn2q_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(1786091532), UINT32_C(2913397648), UINT32_C(2660777932), UINT32_C(1683023251) }, { UINT32_C( 419814195), UINT32_C(3980578117), UINT32_C( 636435713), UINT32_C(2575415437) }, { UINT32_C(2913397648), UINT32_C(3980578117), UINT32_C(1683023251), UINT32_C(2575415437) } }, { { UINT32_C(3775198801), UINT32_C(2911808225), UINT32_C(2085299945), UINT32_C(1071684619) }, { UINT32_C(3159942775), UINT32_C(3031014067), UINT32_C(1691982295), UINT32_C(2734512721) }, { UINT32_C(2911808225), UINT32_C(3031014067), UINT32_C(1071684619), UINT32_C(2734512721) } }, { { UINT32_C( 847511889), UINT32_C(2514490027), UINT32_C(1141975865), UINT32_C(1065611975) }, { UINT32_C(2348538840), UINT32_C(1279239541), UINT32_C(2410682430), UINT32_C(3274878578) }, { UINT32_C(2514490027), UINT32_C(1279239541), UINT32_C(1065611975), UINT32_C(3274878578) } }, { { UINT32_C(1542895279), UINT32_C( 32560840), UINT32_C(3376808193), UINT32_C(3406350835) }, { UINT32_C( 441844645), UINT32_C(3865548200), UINT32_C( 544610221), UINT32_C(1977854149) }, { UINT32_C( 32560840), UINT32_C(3865548200), UINT32_C(3406350835), UINT32_C(1977854149) } }, { { UINT32_C( 651221342), UINT32_C(2972237999), UINT32_C(3044699841), UINT32_C(3699409463) }, { UINT32_C( 787994501), UINT32_C( 437542508), UINT32_C( 993692277), UINT32_C(2427460914) }, { UINT32_C(2972237999), UINT32_C( 437542508), UINT32_C(3699409463), UINT32_C(2427460914) } }, { { UINT32_C(2797043959), UINT32_C( 22536000), UINT32_C(2226573645), UINT32_C(3647027027) }, { UINT32_C(2047301646), UINT32_C( 731126710), UINT32_C(3630616230), UINT32_C(3815315180) }, { UINT32_C( 22536000), UINT32_C( 731126710), UINT32_C(3647027027), UINT32_C(3815315180) } }, { { UINT32_C(3599310998), UINT32_C(1289282047), UINT32_C( 114331314), UINT32_C(3554619845) }, { UINT32_C(1062135433), UINT32_C(2808865281), UINT32_C(2642465201), UINT32_C(2122377704) }, { UINT32_C(1289282047), UINT32_C(2808865281), UINT32_C(3554619845), UINT32_C(2122377704) } }, { { UINT32_C( 139790601), UINT32_C(2639539690), UINT32_C(2174952635), UINT32_C(3746857558) }, { UINT32_C(1763680872), UINT32_C( 907119237), UINT32_C(1137938779), UINT32_C(2210550650) }, { UINT32_C(2639539690), UINT32_C( 907119237), UINT32_C(3746857558), UINT32_C(2210550650) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vtrn2q_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_vtrn2q_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; uint64_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C( 6945319424494524748), UINT64_C(16610643692567852571) }, { UINT64_C( 6955632144508350698), UINT64_C( 8616304367102141145) }, { UINT64_C(16610643692567852571), UINT64_C( 8616304367102141145) } }, { { UINT64_C( 483247366258044957), UINT64_C(10529975198939663308) }, { UINT64_C( 1036481247570548332), UINT64_C( 9734062660554794868) }, { UINT64_C(10529975198939663308), UINT64_C( 9734062660554794868) } }, { { UINT64_C( 3276246002131765894), UINT64_C( 6310820620303658843) }, { UINT64_C( 2711935197491257183), UINT64_C( 4743376430044571105) }, { UINT64_C( 6310820620303658843), UINT64_C( 4743376430044571105) } }, { { UINT64_C(11165594730051579749), UINT64_C( 4925304002160909336) }, { UINT64_C( 5649618867251098256), UINT64_C( 7843984154933472479) }, { UINT64_C( 4925304002160909336), UINT64_C( 7843984154933472479) } }, { { UINT64_C(14913150880268389661), UINT64_C( 6101299713457361539) }, { UINT64_C(16112188414821303774), UINT64_C(13807286187674121257) }, { UINT64_C( 6101299713457361539), UINT64_C(13807286187674121257) } }, { { UINT64_C( 8820038150351737058), UINT64_C( 4808593589038889379) }, { UINT64_C( 8715071445204291859), UINT64_C(11798381818995567451) }, { UINT64_C( 4808593589038889379), UINT64_C(11798381818995567451) } }, { { UINT64_C(10053903435184264981), UINT64_C( 757640439803308252) }, { UINT64_C(10647933628473741822), UINT64_C(14474090585097530747) }, { UINT64_C( 757640439803308252), UINT64_C(14474090585097530747) } }, { { UINT64_C(11139191600147929635), UINT64_C( 3874607743106779588) }, { UINT64_C( 7870963881294881648), UINT64_C( 4192999787675110324) }, { UINT64_C( 3874607743106779588), UINT64_C( 4192999787675110324) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_uint64x2_t r = simde_vtrn2q_u64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vtrn2_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn2_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn2_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn2_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn2_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn2_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn2_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn2q_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn2q_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn2q_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn2q_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn2q_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn2q_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn2q_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn2q_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn2q_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn2q_u64) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/arm/neon/tst.c000066400000000000000000001766031400333146700163120ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN tst #include "test-neon.h" /* Check that both of these work */ #if defined(__cplusplus) #include "../../../simde/arm/neon/tst.h" #else #include "../../../simde/arm/neon.h" #endif static int test_simde_vtst_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; int8_t b[8]; uint8_t r[8]; } test_vec[] = { { { -INT8_C( 42), -INT8_C( 92), INT8_C( 8), INT8_C( 20), INT8_C( 123), -INT8_C( 127), -INT8_C( 20), INT8_C( 74) }, { -INT8_C( 48), INT8_C( 90), -INT8_C( 3), INT8_C( 68), INT8_C( 104), INT8_C( 126), -INT8_C( 103), INT8_C( 100) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { INT8_C( 58), INT8_C( 21), INT8_C( 103), -INT8_C( 65), -INT8_C( 93), INT8_C( 33), -INT8_C( 117), -INT8_C( 9) }, { -INT8_C( 59), -INT8_C( 108), INT8_C( 11), INT8_C( 64), INT8_C( 92), -INT8_C( 9), -INT8_C( 117), INT8_C( 44) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { INT8_C( 63), INT8_C( 126), -INT8_C( 127), -INT8_C( 6), INT8_C( 27), -INT8_C( 11), INT8_C( 62), INT8_C( 121) }, { -INT8_C( 88), -INT8_C( 127), INT8_C( 126), -INT8_C( 55), -INT8_C( 94), INT8_C( 10), -INT8_C( 63), INT8_C( 104) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { -INT8_C( 68), -INT8_C( 24), -INT8_C( 49), -INT8_C( 62), -INT8_C( 97), -INT8_C( 18), INT8_C( 26), -INT8_C( 84) }, { INT8_C( 80), INT8_C( 23), -INT8_C( 40), INT8_C( 108), -INT8_C( 86), -INT8_C( 14), -INT8_C( 27), INT8_C( 83) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { INT8_C( 61), INT8_C( 38), INT8_C( 6), -INT8_C( 98), -INT8_C( 66), -INT8_C( 40), -INT8_C( 65), INT8_C( 22) }, { -INT8_C( 62), INT8_C( 83), -INT8_C( 40), INT8_C( 97), INT8_C( 65), INT8_C( 39), INT8_C( 64), -INT8_C( 111) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { -INT8_C( 58), INT8_C( 89), -INT8_C( 108), INT8_C( 108), INT8_C( 54), INT8_C( 45), INT8_C( 86), -INT8_C( 32) }, { INT8_C( 57), -INT8_C( 57), INT8_C( 78), -INT8_C( 109), -INT8_C( 13), -INT8_C( 46), -INT8_C( 87), -INT8_C( 75) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { INT8_C( 111), -INT8_C( 11), INT8_C( 43), INT8_C( 98), -INT8_C( 40), INT8_C( 14), -INT8_C( 127), INT8_C( 31) }, { INT8_C( 103), -INT8_C( 115), -INT8_C( 44), -INT8_C( 99), -INT8_C( 97), -INT8_C( 107), INT8_C( 126), -INT8_C( 40) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, { { -INT8_C( 106), INT8_C( 26), INT8_C( 41), INT8_C( 17), -INT8_C( 7), INT8_C( 33), INT8_C( 39), -INT8_C( 32) }, { INT8_C( 22), -INT8_C( 8), INT8_C( 90), -INT8_C( 18), INT8_C( 6), INT8_C( 82), INT8_C( 13), INT8_C( 109) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_uint8x8_t r = simde_vtst_s8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_private a_ = simde_int8x8_to_private(simde_test_arm_neon_random_i8x8()); simde_int8x8_private b_ = simde_int8x8_to_private(simde_test_arm_neon_random_i8x8()); /* Make some complements which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = ~b_.values[j]; } simde_int8x8_t a = simde_int8x8_from_private(a_); simde_int8x8_t b = simde_int8x8_from_private(b_); simde_uint8x8_t r = simde_vtst_s8(a, b); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vtst_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; int16_t b[4]; uint16_t r[4]; } test_vec[] = { { { INT16_C( 8893), -INT16_C( 30633), -INT16_C( 5180), -INT16_C( 21986) }, { -INT16_C( 8894), -INT16_C( 8410), INT16_C( 5179), INT16_C( 917) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { INT16_C( 22356), INT16_C( 13046), -INT16_C( 21246), -INT16_C( 17745) }, { INT16_C( 17277), -INT16_C( 20888), -INT16_C( 16629), INT16_C( 6966) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { -INT16_C( 5326), INT16_C( 28390), -INT16_C( 1563), -INT16_C( 6543) }, { -INT16_C( 15463), -INT16_C( 4592), INT16_C( 1562), INT16_C( 7200) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { INT16_C( 16147), INT16_C( 25142), INT16_C( 5887), -INT16_C( 15559) }, { -INT16_C( 149), -INT16_C( 25143), -INT16_C( 20502), -INT16_C( 5621) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { -INT16_C( 20131), INT16_C( 18995), INT16_C( 21617), -INT16_C( 25737) }, { INT16_C( 20130), -INT16_C( 18996), -INT16_C( 21618), -INT16_C( 29229) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, { { INT16_C( 19885), INT16_C( 27720), -INT16_C( 10807), -INT16_C( 31471) }, { -INT16_C( 19886), -INT16_C( 27721), INT16_C( 27027), INT16_C( 31470) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { -INT16_C( 7756), INT16_C( 17043), INT16_C( 26253), -INT16_C( 375) }, { INT16_C( 8051), INT16_C( 32635), INT16_C( 17720), INT16_C( 374) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { -INT16_C( 11348), -INT16_C( 25600), -INT16_C( 4586), INT16_C( 26038) }, { INT16_C( 11347), INT16_C( 1840), -INT16_C( 15603), -INT16_C( 26039) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_uint16x4_t r = simde_vtst_s16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_private a_ = simde_int16x4_to_private(simde_test_arm_neon_random_i16x4()); simde_int16x4_private b_ = simde_int16x4_to_private(simde_test_arm_neon_random_i16x4()); /* Make some complements which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = ~b_.values[j]; } simde_int16x4_t a = simde_int16x4_from_private(a_); simde_int16x4_t b = simde_int16x4_from_private(b_); simde_uint16x4_t r = simde_vtst_s16(a, b); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vtst_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; int32_t b[2]; uint32_t r[2]; } test_vec[] = { { { INT32_C( 2123657738), INT32_C( 1952785266) }, { -INT32_C( 968828515), -INT32_C( 1864600079) }, { UINT32_MAX, UINT32_MAX } }, { { -INT32_C( 1508136360), -INT32_C( 427542109) }, { INT32_C( 1508136359), INT32_C( 1322053306) }, { UINT32_C( 0), UINT32_MAX } }, { { -INT32_C( 1076671081), -INT32_C( 484753708) }, { INT32_C( 1076671080), INT32_C( 484753707) }, { UINT32_C( 0), UINT32_C( 0) } }, { { INT32_C( 1378540991), -INT32_C( 2024385843) }, { -INT32_C( 1378540992), INT32_C( 310331232) }, { UINT32_C( 0), UINT32_MAX } }, { { INT32_C( 1490709835), INT32_C( 162460638) }, { -INT32_C( 1490709836), -INT32_C( 2027980263) }, { UINT32_C( 0), UINT32_MAX } }, { { INT32_C( 429832559), INT32_C( 1338460399) }, { -INT32_C( 429832560), INT32_C( 923953009) }, { UINT32_C( 0), UINT32_MAX } }, { { -INT32_C( 537296342), INT32_C( 780541460) }, { INT32_C( 280339865), INT32_C( 81628817) }, { UINT32_MAX, UINT32_MAX } }, { { -INT32_C( 585404851), INT32_C( 650345653) }, { -INT32_C( 1201809693), -INT32_C( 1069905722) }, { UINT32_MAX, UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_uint32x2_t r = simde_vtst_s32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_private a_ = simde_int32x2_to_private(simde_test_arm_neon_random_i32x2()); simde_int32x2_private b_ = simde_int32x2_to_private(simde_test_arm_neon_random_i32x2()); /* Make some complements which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = ~b_.values[j]; } simde_int32x2_t a = simde_int32x2_from_private(a_); simde_int32x2_t b = simde_int32x2_from_private(b_); simde_uint32x2_t r = simde_vtst_s32(a, b); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vtst_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[1]; int64_t b[1]; uint64_t r[1]; } test_vec[] = { { { -INT64_C( 1837781170212727101) }, { -INT64_C( 4247103962042125515) }, { UINT64_MAX } }, { { INT64_C( 4526798036743003940) }, { -INT64_C( 4526798036743003941) }, { UINT64_C( 0) } }, { { INT64_C( 327556247060509157) }, { -INT64_C( 327556247060509158) }, { UINT64_C( 0) } }, { { INT64_C( 4814377440359057512) }, { -INT64_C( 4814377440359057513) }, { UINT64_C( 0) } }, { { -INT64_C( 1890867640475375592) }, { INT64_C( 511230107150325363) }, { UINT64_MAX } }, { { INT64_C( 1701538502363315684) }, { -INT64_C( 1701538502363315685) }, { UINT64_C( 0) } }, { { INT64_C( 5053963978680335937) }, { -INT64_C( 5053963978680335938) }, { UINT64_C( 0) } }, { { INT64_C( 2736194239582835245) }, { INT64_C( 7939050786364962868) }, { UINT64_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); simde_uint64x1_t r = simde_vtst_s64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_private a_ = simde_int64x1_to_private(simde_test_arm_neon_random_i64x1()); simde_int64x1_private b_ = simde_int64x1_to_private(simde_test_arm_neon_random_i64x1()); /* Make some complements which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = ~b_.values[j]; } simde_int64x1_t a = simde_int64x1_from_private(a_); simde_int64x1_t b = simde_int64x1_from_private(b_); simde_uint64x1_t r = simde_vtst_s64(a, b); simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vtst_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[8]; uint8_t b[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C( 89), UINT8_C(123), UINT8_C( 24), UINT8_C( 18), UINT8_C( 98), UINT8_C(227), UINT8_C( 29), UINT8_C( 73) }, { UINT8_C( 57), UINT8_C(132), UINT8_C(155), UINT8_C(237), UINT8_C( 99), UINT8_C(140), UINT8_C( 26), UINT8_C( 6) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C( 31), UINT8_C(154), UINT8_C(207), UINT8_C( 7), UINT8_C( 69), UINT8_C(110), UINT8_C(116), UINT8_C(225) }, { UINT8_C(224), UINT8_C( 2), UINT8_C( 48), UINT8_C( 66), UINT8_C(229), UINT8_C( 77), UINT8_C(139), UINT8_C( 30) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(117), UINT8_C( 3), UINT8_C(142), UINT8_C(199), UINT8_C(118), UINT8_C(239), UINT8_C(116), UINT8_C( 67) }, { UINT8_C(138), UINT8_C( 99), UINT8_C( 74), UINT8_C(207), UINT8_C(210), UINT8_C( 52), UINT8_C(110), UINT8_C(178) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C(166), UINT8_C( 70), UINT8_C(242), UINT8_C( 90), UINT8_C(108), UINT8_C( 45), UINT8_C(157), UINT8_C( 72) }, { UINT8_C( 48), UINT8_C( 43), UINT8_C( 15), UINT8_C(166), UINT8_C( 26), UINT8_C(131), UINT8_C(233), UINT8_C(164) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C(128), UINT8_C( 95), UINT8_C( 32), UINT8_C(108), UINT8_C(222), UINT8_C(131), UINT8_C( 42), UINT8_C(133) }, { UINT8_C( 61), UINT8_C( 28), UINT8_C(223), UINT8_C(169), UINT8_C( 74), UINT8_C(124), UINT8_C(241), UINT8_C(122) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C( 62), UINT8_C(218), UINT8_C( 36), UINT8_C(166), UINT8_C(188), UINT8_C(143), UINT8_C( 69), UINT8_C(203) }, { UINT8_C(238), UINT8_C( 0), UINT8_C(169), UINT8_C(204), UINT8_C(247), UINT8_C(211), UINT8_C( 81), UINT8_C( 52) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C(206), UINT8_C( 30), UINT8_C( 21), UINT8_C(238), UINT8_C(223), UINT8_C(247), UINT8_C(178), UINT8_C(161) }, { UINT8_C( 85), UINT8_C(225), UINT8_C(195), UINT8_C( 17), UINT8_C(112), UINT8_C( 8), UINT8_C( 77), UINT8_C( 94) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(173), UINT8_C( 61), UINT8_MAX, UINT8_C( 89), UINT8_C(194), UINT8_C(166), UINT8_C(135), UINT8_C(173) }, { UINT8_C(123), UINT8_C(194), UINT8_C( 0), UINT8_C( 91), UINT8_C( 61), UINT8_C(189), UINT8_C(120), UINT8_C(146) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vtst_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x8_private a_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); simde_uint8x8_private b_ = simde_uint8x8_to_private(simde_test_arm_neon_random_u8x8()); /* Make some complements which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = ~b_.values[j]; } simde_uint8x8_t a = simde_uint8x8_from_private(a_); simde_uint8x8_t b = simde_uint8x8_from_private(b_); simde_uint8x8_t r = simde_vtst_u8(a, b); simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vtst_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(17167), UINT16_C( 2845), UINT16_C(60553), UINT16_C(39974) }, { UINT16_C(48368), UINT16_C(51998), UINT16_C( 4982), UINT16_C(25561) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(27310), UINT16_C(11088), UINT16_C(18399), UINT16_C(51232) }, { UINT16_C(38225), UINT16_C(54447), UINT16_C(52558), UINT16_C(14303) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { UINT16_C(55890), UINT16_C(47439), UINT16_C(54509), UINT16_C(48385) }, { UINT16_C(11404), UINT16_C(18096), UINT16_C(49763), UINT16_C(17150) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { UINT16_C(14483), UINT16_C(57647), UINT16_C( 3589), UINT16_C(17689) }, { UINT16_C(54692), UINT16_C(63350), UINT16_C(29103), UINT16_C(40127) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(47595), UINT16_C(22531), UINT16_C( 5783), UINT16_C(31862) }, { UINT16_C(17940), UINT16_C(43004), UINT16_C(11134), UINT16_C(33673) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { UINT16_C(15991), UINT16_C( 9940), UINT16_C( 7749), UINT16_C(44782) }, { UINT16_C(59007), UINT16_C(38600), UINT16_C(57786), UINT16_C(20753) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C( 8481), UINT16_C(37811), UINT16_C(15385), UINT16_C(21014) }, { UINT16_C(57054), UINT16_C(21808), UINT16_C( 1053), UINT16_C(52603) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(35619), UINT16_C(57006), UINT16_C(49004), UINT16_C(25647) }, { UINT16_C( 8846), UINT16_C(41840), UINT16_C( 8976), UINT16_C(10806) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vtst_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x4_private a_ = simde_uint16x4_to_private(simde_test_arm_neon_random_u16x4()); simde_uint16x4_private b_ = simde_uint16x4_to_private(simde_test_arm_neon_random_u16x4()); /* Make some complements which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = ~b_.values[j]; } simde_uint16x4_t a = simde_uint16x4_from_private(a_); simde_uint16x4_t b = simde_uint16x4_from_private(b_); simde_uint16x4_t r = simde_vtst_u16(a, b); simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vtst_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C(1186490664), UINT32_C( 158677967) }, { UINT32_C(3108476631), UINT32_C(3180949110) }, { UINT32_C( 0), UINT32_MAX } }, { { UINT32_C(2684851262), UINT32_C(2893485814) }, { UINT32_C(1446231173), UINT32_C( 357496804) }, { UINT32_MAX, UINT32_MAX } }, { { UINT32_C( 673588817), UINT32_C(2227163532) }, { UINT32_C(2901375964), UINT32_C(2067803763) }, { UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C(1090798578), UINT32_C(3849131521) }, { UINT32_C(3204168717), UINT32_C( 14961881) }, { UINT32_C( 0), UINT32_MAX } }, { { UINT32_C(1094764559), UINT32_C(2952572818) }, { UINT32_C(3200202736), UINT32_C(1342394477) }, { UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(1961547111), UINT32_C(1781720465) }, { UINT32_C(2775193137), UINT32_C(2331695287) }, { UINT32_MAX, UINT32_MAX } }, { { UINT32_C(1024399692), UINT32_C(3204173392) }, { UINT32_C(2249129758), UINT32_C(3982193923) }, { UINT32_MAX, UINT32_MAX } }, { { UINT32_C( 55975122), UINT32_C(3936928051) }, { UINT32_C(3161760665), UINT32_C(2111225966) }, { UINT32_MAX, UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vtst_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x2_private a_ = simde_uint32x2_to_private(simde_test_arm_neon_random_u32x2()); simde_uint32x2_private b_ = simde_uint32x2_to_private(simde_test_arm_neon_random_u32x2()); /* Make some complements which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = ~b_.values[j]; } simde_uint32x2_t a = simde_uint32x2_from_private(a_); simde_uint32x2_t b = simde_uint32x2_from_private(b_); simde_uint32x2_t r = simde_vtst_u32(a, b); simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vtst_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[1]; uint64_t b[1]; uint64_t r[1]; } test_vec[] = { { { UINT64_C( 1909471733032218373) }, { UINT64_C(16537272340677333242) }, { UINT64_C( 0) } }, { { UINT64_C(15227949905678791626) }, { UINT64_C( 9252676368976284696) }, { UINT64_MAX } }, { { UINT64_C(10377830220437337974) }, { UINT64_C(17301460315111856336) }, { UINT64_MAX } }, { { UINT64_C( 1759847872496586826) }, { UINT64_C(16686896201212964789) }, { UINT64_C( 0) } }, { { UINT64_C(13538799517923171882) }, { UINT64_C( 4907944555786379733) }, { UINT64_C( 0) } }, { { UINT64_C(16778310820077750940) }, { UINT64_C(17276065934006257829) }, { UINT64_MAX } }, { { UINT64_C(14488914339200827468) }, { UINT64_C(12871270039798326502) }, { UINT64_MAX } }, { { UINT64_C( 1200755092811906376) }, { UINT64_C( 591546085826762793) }, { UINT64_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); simde_uint64x1_t b = simde_vld1_u64(test_vec[i].b); simde_uint64x1_t r = simde_vtst_u64(a, b); simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x1_private a_ = simde_uint64x1_to_private(simde_test_arm_neon_random_u64x1()); simde_uint64x1_private b_ = simde_uint64x1_to_private(simde_test_arm_neon_random_u64x1()); /* Make some complements which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = ~b_.values[j]; } simde_uint64x1_t a = simde_uint64x1_from_private(a_); simde_uint64x1_t b = simde_uint64x1_from_private(b_); simde_uint64x1_t r = simde_vtst_u64(a, b); simde_test_arm_neon_write_u64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vtstq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; int8_t b[16]; uint8_t r[16]; } test_vec[] = { { { -INT8_C( 4), INT8_C( 75), -INT8_C( 89), -INT8_C( 82), -INT8_C( 39), INT8_C( 33), INT8_C( 86), -INT8_C( 81), INT8_C( 87), -INT8_C( 64), INT8_C( 68), -INT8_C( 88), INT8_C( 116), -INT8_C( 54), -INT8_C( 114), INT8_C( 9) }, { INT8_MIN, INT8_C( 113), INT8_C( 88), INT8_C( 81), INT8_C( 119), -INT8_C( 34), -INT8_C( 85), INT8_C( 80), -INT8_C( 15), INT8_C( 56), -INT8_C( 114), INT8_C( 87), -INT8_C( 11), INT8_C( 53), -INT8_C( 104), -INT8_C( 14) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } }, { { INT8_C( 45), -INT8_C( 61), -INT8_C( 5), -INT8_C( 23), -INT8_C( 95), INT8_C( 11), -INT8_C( 11), -INT8_C( 110), -INT8_C( 33), -INT8_C( 15), -INT8_C( 28), INT8_C( 60), -INT8_C( 72), -INT8_C( 126), -INT8_C( 110), INT8_C( 7) }, { -INT8_C( 94), -INT8_C( 99), -INT8_C( 108), INT8_C( 22), -INT8_C( 53), -INT8_C( 12), -INT8_C( 32), -INT8_C( 71), -INT8_C( 103), INT8_C( 14), INT8_C( 27), -INT8_C( 61), -INT8_C( 53), -INT8_C( 122), INT8_C( 109), -INT8_C( 8) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { INT8_C( 39), -INT8_C( 101), INT8_C( 125), -INT8_C( 14), INT8_C( 122), -INT8_C( 125), -INT8_C( 85), INT8_C( 101), -INT8_C( 111), INT8_C( 121), -INT8_C( 19), INT8_C( 92), INT8_C( 77), INT8_C( 90), INT8_C( 32), -INT8_C( 105) }, { -INT8_C( 61), -INT8_C( 14), -INT8_C( 126), -INT8_C( 45), -INT8_C( 123), INT8_C( 0), -INT8_C( 62), -INT8_C( 102), INT8_C( 104), -INT8_C( 122), INT8_C( 104), INT8_C( 82), INT8_C( 16), INT8_C( 112), -INT8_C( 33), INT8_C( 55) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, { { -INT8_C( 31), INT8_C( 2), INT8_C( 38), -INT8_C( 65), -INT8_C( 52), INT8_C( 102), INT8_C( 89), INT8_C( 107), -INT8_C( 20), -INT8_C( 62), -INT8_C( 66), -INT8_C( 4), INT8_C( 12), -INT8_C( 99), INT8_C( 51), -INT8_C( 46) }, { INT8_C( 30), INT8_C( 93), -INT8_C( 39), INT8_C( 35), INT8_C( 51), -INT8_C( 98), -INT8_C( 71), -INT8_C( 49), INT8_C( 80), -INT8_C( 85), -INT8_C( 72), INT8_C( 92), -INT8_C( 13), INT8_C( 56), INT8_C( 44), INT8_C( 45) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { INT8_C( 104), INT8_C( 73), INT8_C( 69), INT8_C( 50), -INT8_C( 18), INT8_C( 61), INT8_C( 16), INT8_C( 62), -INT8_C( 42), INT8_C( 105), -INT8_C( 61), -INT8_C( 99), -INT8_C( 95), INT8_C( 76), -INT8_C( 54), -INT8_C( 14) }, { -INT8_C( 105), -INT8_C( 74), INT8_C( 25), -INT8_C( 51), -INT8_C( 5), -INT8_C( 62), -INT8_C( 17), INT8_C( 3), INT8_C( 41), INT8_C( 13), INT8_C( 60), INT8_C( 45), INT8_C( 94), -INT8_C( 77), INT8_C( 79), INT8_C( 13) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } }, { { INT8_C( 100), -INT8_C( 97), -INT8_C( 63), INT8_C( 86), INT8_C( 97), -INT8_C( 28), INT8_C( 89), -INT8_C( 118), -INT8_C( 15), -INT8_C( 72), -INT8_C( 73), INT8_C( 79), INT8_C( 20), INT8_C( 6), INT8_C( 93), -INT8_C( 71) }, { -INT8_C( 101), INT8_C( 76), INT8_C( 62), INT8_C( 47), -INT8_C( 19), INT8_C( 109), INT8_C( 108), -INT8_C( 10), INT8_C( 55), INT8_C( 71), -INT8_C( 95), -INT8_C( 56), -INT8_C( 21), INT8_C( 39), -INT8_C( 16), INT8_C( 70) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { -INT8_C( 77), INT8_C( 94), INT8_C( 57), -INT8_C( 96), -INT8_C( 53), -INT8_C( 91), -INT8_C( 38), INT8_C( 3), -INT8_C( 20), -INT8_C( 48), -INT8_C( 53), -INT8_C( 40), INT8_C( 95), -INT8_C( 68), INT8_C( 30), INT8_C( 37) }, { -INT8_C( 95), -INT8_C( 70), INT8_C( 76), INT8_C( 107), -INT8_C( 81), -INT8_C( 3), INT8_C( 37), INT8_C( 57), INT8_C( 101), INT8_C( 47), INT8_C( 13), -INT8_C( 45), -INT8_C( 106), INT8_C( 45), -INT8_C( 35), INT8_C( 74) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { INT8_C( 88), -INT8_C( 14), -INT8_C( 93), INT8_C( 7), INT8_C( 112), INT8_C( 73), -INT8_C( 24), -INT8_C( 73), INT8_C( 66), INT8_C( 103), -INT8_C( 65), INT8_C( 67), -INT8_C( 55), -INT8_C( 100), INT8_C( 35), INT8_C( 6) }, { -INT8_C( 77), INT8_C( 13), INT8_C( 92), INT8_C( 111), -INT8_C( 113), -INT8_C( 74), INT8_C( 23), INT8_C( 72), -INT8_C( 37), -INT8_C( 104), INT8_C( 97), -INT8_C( 68), INT8_C( 54), -INT8_C( 98), INT8_C( 62), -INT8_C( 113) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_uint8x16_t r = simde_vtstq_s8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_private a_ = simde_int8x16_to_private(simde_test_arm_neon_random_i8x16()); simde_int8x16_private b_ = simde_int8x16_to_private(simde_test_arm_neon_random_i8x16()); /* Make some complements which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = ~b_.values[j]; } simde_int8x16_t a = simde_int8x16_from_private(a_); simde_int8x16_t b = simde_int8x16_from_private(b_); simde_uint8x16_t r = simde_vtstq_s8(a, b); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vtstq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; int16_t b[8]; uint16_t r[8]; } test_vec[] = { { { INT16_C( 15409), -INT16_C( 4795), -INT16_C( 7107), -INT16_C( 17315), -INT16_C( 32198), -INT16_C( 14318), INT16_C( 2859), INT16_C( 31103) }, { INT16_C( 30163), INT16_C( 31895), -INT16_C( 30120), -INT16_C( 7879), INT16_C( 26743), INT16_C( 14317), -INT16_C( 29989), INT16_C( 3276) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { INT16_C( 23210), INT16_C( 1163), INT16_C( 2660), INT16_C( 14206), INT16_C( 5503), -INT16_C( 10317), -INT16_C( 4705), INT16_C( 6072) }, { -INT16_C( 23211), INT16_C( 12622), INT16_C( 6960), -INT16_C( 2499), INT16_C( 14125), INT16_C( 9210), -INT16_C( 18035), INT16_C( 26196) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { INT16_C( 25342), -INT16_C( 25279), -INT16_C( 1713), INT16_C( 29889), INT16_C( 22109), -INT16_C( 12331), INT16_C( 20077), INT16_C( 19397) }, { -INT16_C( 16566), -INT16_C( 10386), -INT16_C( 15495), -INT16_C( 29890), -INT16_C( 22110), -INT16_C( 29694), -INT16_C( 20078), -INT16_C( 28426) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { -INT16_C( 8987), INT16_C( 926), INT16_C( 25839), INT16_C( 14670), -INT16_C( 18396), -INT16_C( 3153), -INT16_C( 22314), INT16_C( 8744) }, { INT16_C( 10999), -INT16_C( 30290), -INT16_C( 23333), -INT16_C( 4583), INT16_C( 18395), INT16_C( 3152), INT16_C( 22313), INT16_C( 3803) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, { { -INT16_C( 2972), -INT16_C( 25698), -INT16_C( 14662), -INT16_C( 20035), INT16_C( 27632), -INT16_C( 13253), INT16_C( 21520), -INT16_C( 24109) }, { INT16_C( 2971), -INT16_C( 15113), -INT16_C( 11422), -INT16_C( 27182), -INT16_C( 7347), INT16_C( 11191), INT16_C( 4674), INT16_C( 24108) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { INT16_C( 16241), INT16_C( 12621), INT16_C( 1912), INT16_C( 5149), INT16_C( 5138), INT16_C( 29912), -INT16_C( 21529), INT16_C( 151) }, { -INT16_C( 16242), -INT16_C( 11937), -INT16_C( 29486), INT16_C( 20527), INT16_C( 10326), -INT16_C( 6265), INT16_C( 28894), -INT16_C( 152) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { -INT16_C( 1694), INT16_C( 19011), -INT16_C( 3881), INT16_C( 13182), -INT16_C( 27077), -INT16_C( 8188), INT16_C( 13162), -INT16_C( 6737) }, { -INT16_C( 18597), INT16_C( 14759), INT16_C( 3880), -INT16_C( 17352), INT16_C( 27076), -INT16_C( 32311), -INT16_C( 5449), INT16_C( 6736) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { -INT16_C( 16448), INT16_C( 7960), -INT16_C( 270), -INT16_C( 13051), -INT16_C( 24268), -INT16_C( 8826), -INT16_C( 16478), INT16_C( 26266) }, { INT16_C( 25384), -INT16_C( 7961), INT16_C( 14158), INT16_C( 13050), INT16_C( 24267), -INT16_C( 21574), INT16_C( 30272), INT16_C( 409) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_uint16x8_t r = simde_vtstq_s16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_private a_ = simde_int16x8_to_private(simde_test_arm_neon_random_i16x8()); simde_int16x8_private b_ = simde_int16x8_to_private(simde_test_arm_neon_random_i16x8()); /* Make some complements which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = ~b_.values[j]; } simde_int16x8_t a = simde_int16x8_from_private(a_); simde_int16x8_t b = simde_int16x8_from_private(b_); simde_uint16x8_t r = simde_vtstq_s16(a, b); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vtstq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; int32_t b[4]; uint32_t r[4]; } test_vec[] = { { { -INT32_C( 1407376814), -INT32_C( 1356235838), -INT32_C( 1757797231), INT32_C( 256404884) }, { INT32_C( 176509125), -INT32_C( 2017860982), -INT32_C( 2045646060), INT32_C( 2051097384) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { INT32_C( 1809273049), INT32_C( 134353524), INT32_C( 1159154303), INT32_C( 625974427) }, { -INT32_C( 1834219394), INT32_C( 471383540), INT32_C( 1721129216), -INT32_C( 1852916296) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { -INT32_C( 141951368), -INT32_C( 2010725056), -INT32_C( 1157002436), -INT32_C( 2008173164) }, { INT32_C( 1923376754), INT32_C( 2010725055), INT32_C( 67725303), INT32_C( 1166345677) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_MAX } }, { { -INT32_C( 349079122), INT32_C( 503604971), INT32_C( 1629615087), -INT32_C( 1055941776) }, { -INT32_C( 141513473), -INT32_C( 503604972), -INT32_C( 1557692001), INT32_C( 1055941775) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { INT32_C( 1249627995), -INT32_C( 1688291285), INT32_C( 1630895713), INT32_C( 1062782251) }, { -INT32_C( 31435937), INT32_C( 1688291284), -INT32_C( 1985834581), INT32_C( 225168306) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_MAX } }, { { -INT32_C( 445119869), INT32_C( 1266168829), -INT32_C( 756375949), -INT32_C( 1569528026) }, { -INT32_C( 30772910), -INT32_C( 1266168830), INT32_C( 834794135), INT32_C( 1569528025) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { INT32_C( 642156723), INT32_C( 543444986), INT32_C( 499373623), INT32_C( 999032633) }, { INT32_C( 1794053075), -INT32_C( 543444987), -INT32_C( 499373624), -INT32_C( 495092177) }, { UINT32_MAX, UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, { { INT32_C( 792920563), INT32_C( 62726602), -INT32_C( 62961368), INT32_C( 1121172021) }, { -INT32_C( 1488977185), INT32_C( 1519025195), INT32_C( 3934202), -INT32_C( 1121172022) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_uint32x4_t r = simde_vtstq_s32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_private a_ = simde_int32x4_to_private(simde_test_arm_neon_random_i32x4()); simde_int32x4_private b_ = simde_int32x4_to_private(simde_test_arm_neon_random_i32x4()); /* Make some complements which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = ~b_.values[j]; } simde_int32x4_t a = simde_int32x4_from_private(a_); simde_int32x4_t b = simde_int32x4_from_private(b_); simde_uint32x4_t r = simde_vtstq_s32(a, b); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vtstq_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; int64_t b[2]; uint64_t r[2]; } test_vec[] = { { { -INT64_C( 6906564504408707199), -INT64_C( 1146050713782097844) }, { INT64_C( 6906564504408707198), INT64_C( 7871827673557111993) }, { UINT64_C( 0), UINT64_MAX } }, { { -INT64_C( 9072437593107422918), INT64_C( 8307808349814397871) }, { -INT64_C( 1128897749981389977), -INT64_C( 8307808349814397872) }, { UINT64_MAX, UINT64_C( 0) } }, { { INT64_C( 3448291084656666900), -INT64_C( 4475145380862654405) }, { -INT64_C( 3448291084656666901), INT64_C( 4475145380862654404) }, { UINT64_C( 0), UINT64_C( 0) } }, { { INT64_C( 7730266204185906066), -INT64_C( 6692627503882292371) }, { INT64_C( 278832263600842306), -INT64_C( 5609719181803134700) }, { UINT64_MAX, UINT64_MAX } }, { { INT64_C( 4259197047024433569), INT64_C( 6126162669054319166) }, { INT64_C( 7757174925230701543), INT64_C( 1960548500776339251) }, { UINT64_MAX, UINT64_MAX } }, { { INT64_C( 1713351143557313590), -INT64_C( 4878520122427607347) }, { -INT64_C( 1713351143557313591), -INT64_C( 3458717455958101682) }, { UINT64_C( 0), UINT64_MAX } }, { { INT64_C( 7016167619831180015), -INT64_C( 6460363201520064191) }, { -INT64_C( 6591043928234225762), INT64_C( 1413314601794032747) }, { UINT64_MAX, UINT64_MAX } }, { { -INT64_C( 3349850824658449413), -INT64_C( 6384525313472305283) }, { INT64_C( 5642723914519993458), -INT64_C( 9204180589598903705) }, { UINT64_MAX, UINT64_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_uint64x2_t r = simde_vtstq_s64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_private a_ = simde_int64x2_to_private(simde_test_arm_neon_random_i64x2()); simde_int64x2_private b_ = simde_int64x2_to_private(simde_test_arm_neon_random_i64x2()); /* Make some complements which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = ~b_.values[j]; } simde_int64x2_t a = simde_int64x2_from_private(a_); simde_int64x2_t b = simde_int64x2_from_private(b_); simde_uint64x2_t r = simde_vtstq_s64(a, b); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vtstq_u8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(189), UINT8_C(210), UINT8_C(102), UINT8_C(249), UINT8_C(193), UINT8_C(191), UINT8_C(107), UINT8_C( 96), UINT8_C( 75), UINT8_C( 43), UINT8_C( 42), UINT8_C(225), UINT8_C(222), UINT8_C(196), UINT8_C(189), UINT8_C( 32) }, { UINT8_C( 96), UINT8_C( 45), UINT8_C(153), UINT8_C(254), UINT8_C( 8), UINT8_C( 64), UINT8_C(118), UINT8_C(199), UINT8_C(204), UINT8_C(246), UINT8_C(134), UINT8_C(155), UINT8_C( 33), UINT8_C( 24), UINT8_C(140), UINT8_C(223) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C( 29), UINT8_C(246), UINT8_C( 33), UINT8_C(126), UINT8_C( 54), UINT8_C(225), UINT8_C(133), UINT8_C(158), UINT8_C(215), UINT8_C( 11), UINT8_C(223), UINT8_C(175), UINT8_C( 35), UINT8_C(130), UINT8_C(215), UINT8_C( 32) }, { UINT8_C(226), UINT8_C(175), UINT8_C(222), UINT8_C(129), UINT8_C(243), UINT8_C(252), UINT8_C(107), UINT8_C( 97), UINT8_C( 68), UINT8_C( 54), UINT8_C( 32), UINT8_C( 80), UINT8_C(190), UINT8_C(125), UINT8_C(188), UINT8_C(116) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { UINT8_C(170), UINT8_C(226), UINT8_C(213), UINT8_C(163), UINT8_C(222), UINT8_C( 64), UINT8_C(196), UINT8_C( 34), UINT8_C(158), UINT8_C(211), UINT8_C(114), UINT8_C( 53), UINT8_C( 80), UINT8_C( 46), UINT8_C(169), UINT8_C(196) }, { UINT8_C( 85), UINT8_C(218), UINT8_C(109), UINT8_C( 92), UINT8_C(145), UINT8_C( 25), UINT8_C( 59), UINT8_C( 82), UINT8_C( 97), UINT8_C( 18), UINT8_C( 55), UINT8_C(211), UINT8_C(192), UINT8_C( 59), UINT8_C( 40), UINT8_C( 30) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C(109), UINT8_C(119), UINT8_C( 34), UINT8_C(254), UINT8_C(144), UINT8_C(172), UINT8_C( 80), UINT8_C(219), UINT8_C(113), UINT8_C(135), UINT8_C(197), UINT8_C(127), UINT8_C(195), UINT8_C(140), UINT8_C(157), UINT8_C(225) }, { UINT8_C(234), UINT8_C( 13), UINT8_C(221), UINT8_C( 40), UINT8_C( 47), UINT8_C(252), UINT8_C(221), UINT8_C( 36), UINT8_C(142), UINT8_C(198), UINT8_C(105), UINT8_C( 78), UINT8_C( 89), UINT8_C(115), UINT8_C( 99), UINT8_C(198) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { UINT8_C(210), UINT8_C( 29), UINT8_C( 3), UINT8_C(152), UINT8_C( 26), UINT8_C( 13), UINT8_C( 17), UINT8_C(152), UINT8_C(212), UINT8_C( 37), UINT8_C( 57), UINT8_C( 73), UINT8_C(152), UINT8_C( 89), UINT8_C( 65), UINT8_C(142) }, { UINT8_C( 45), UINT8_C(183), UINT8_C(252), UINT8_C(174), UINT8_C(203), UINT8_C(104), UINT8_C(238), UINT8_C(103), UINT8_C(152), UINT8_C(172), UINT8_C(198), UINT8_C(182), UINT8_C( 8), UINT8_C( 6), UINT8_C(190), UINT8_C(113) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(154), UINT8_C( 2), UINT8_C( 17), UINT8_C(158), UINT8_C(106), UINT8_MAX, UINT8_C(206), UINT8_C( 32), UINT8_C(171), UINT8_C(160), UINT8_C(185), UINT8_C(180), UINT8_C(167), UINT8_C(119), UINT8_C( 37), UINT8_C( 58) }, { UINT8_C(101), UINT8_C( 47), UINT8_C( 9), UINT8_C( 97), UINT8_C(244), UINT8_C(239), UINT8_C( 49), UINT8_C(223), UINT8_C(203), UINT8_C( 46), UINT8_C( 99), UINT8_C( 0), UINT8_C( 30), UINT8_C(105), UINT8_C( 98), UINT8_C(197) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C( 66), UINT8_C(179), UINT8_C(247), UINT8_C(207), UINT8_C(161), UINT8_C( 30), UINT8_C(250), UINT8_C(239), UINT8_C(140), UINT8_C(223), UINT8_C(231), UINT8_C( 78), UINT8_C(198), UINT8_C(113), UINT8_C( 21), UINT8_C( 50) }, { UINT8_C(189), UINT8_C( 76), UINT8_C( 8), UINT8_C( 48), UINT8_C( 94), UINT8_C(225), UINT8_C( 78), UINT8_C( 16), UINT8_C(115), UINT8_C( 32), UINT8_C(106), UINT8_C(124), UINT8_C( 23), UINT8_C(142), UINT8_C(234), UINT8_C( 62) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { UINT8_C(217), UINT8_C( 4), UINT8_C(105), UINT8_C(248), UINT8_C(176), UINT8_C(169), UINT8_C( 9), UINT8_MAX, UINT8_C( 77), UINT8_C(115), UINT8_C(136), UINT8_C(100), UINT8_C( 1), UINT8_C(108), UINT8_C(162), UINT8_C( 13) }, { UINT8_C( 38), UINT8_C(251), UINT8_C(150), UINT8_C(226), UINT8_C( 79), UINT8_C( 86), UINT8_C(206), UINT8_C( 0), UINT8_C(253), UINT8_C( 9), UINT8_C(119), UINT8_C(239), UINT8_C( 87), UINT8_C( 33), UINT8_C(158), UINT8_C(242) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vtstq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint8x16_private a_ = simde_uint8x16_to_private(simde_test_arm_neon_random_u8x16()); simde_uint8x16_private b_ = simde_uint8x16_to_private(simde_test_arm_neon_random_u8x16()); /* Make some complements which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = ~b_.values[j]; } simde_uint8x16_t a = simde_uint8x16_from_private(a_); simde_uint8x16_t b = simde_uint8x16_from_private(b_); simde_uint8x16_t r = simde_vtstq_u8(a, b); simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vtstq_u16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(11577), UINT16_C(56621), UINT16_C(19835), UINT16_C(50703), UINT16_C(34400), UINT16_C( 4561), UINT16_C(39613), UINT16_C(15587) }, { UINT16_C(53958), UINT16_C( 4816), UINT16_C(45700), UINT16_C(14816), UINT16_C(31135), UINT16_C( 383), UINT16_C(25922), UINT16_C(59203) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { UINT16_C(15330), UINT16_C(19346), UINT16_C(18495), UINT16_C(57955), UINT16_C(40927), UINT16_C(40472), UINT16_C(63692), UINT16_C(27863) }, { UINT16_C(22130), UINT16_C(46189), UINT16_C(45244), UINT16_C( 7580), UINT16_C(24608), UINT16_C(40129), UINT16_C(11060), UINT16_C( 5872) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(50311), UINT16_C(21589), UINT16_C(11708), UINT16_C(20181), UINT16_C(45343), UINT16_C(16355), UINT16_C(32733), UINT16_C(48330) }, { UINT16_C( 7903), UINT16_C( 5274), UINT16_C(35402), UINT16_C(45354), UINT16_C(20192), UINT16_C(32087), UINT16_C( 956), UINT16_C(17205) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { UINT16_C(46974), UINT16_C(24954), UINT16_C(55060), UINT16_C(62559), UINT16_C(63990), UINT16_C(16392), UINT16_C(12931), UINT16_C(25585) }, { UINT16_C(18561), UINT16_C(15841), UINT16_C( 5708), UINT16_C( 4992), UINT16_C( 6304), UINT16_C(22679), UINT16_C(19055), UINT16_C(62611) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(61613), UINT16_C(12571), UINT16_C(59498), UINT16_C(41876), UINT16_C(30036), UINT16_C(41184), UINT16_C(24971), UINT16_C(11444) }, { UINT16_C(19321), UINT16_C(59524), UINT16_C( 6037), UINT16_C(30172), UINT16_C(12836), UINT16_C( 2409), UINT16_C(20966), UINT16_C(38116) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(35911), UINT16_C(23040), UINT16_C(46152), UINT16_C(64204), UINT16_C( 2560), UINT16_C(38314), UINT16_C(34337), UINT16_C(17930) }, { UINT16_C(29624), UINT16_C(40783), UINT16_C(13508), UINT16_C( 1331), UINT16_C(63539), UINT16_C(16233), UINT16_C(28753), UINT16_C( 8351) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(26091), UINT16_C(11753), UINT16_C(62610), UINT16_C(19059), UINT16_C(65111), UINT16_C( 1021), UINT16_C(40188), UINT16_C(61615) }, { UINT16_C(39444), UINT16_C(26217), UINT16_C( 2058), UINT16_C(25222), UINT16_C( 424), UINT16_C(64514), UINT16_C(25347), UINT16_C( 3920) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C( 1195), UINT16_C(43093), UINT16_C( 1440), UINT16_C(12697), UINT16_C(65165), UINT16_C(38860), UINT16_C(58797), UINT16_C(14857) }, { UINT16_C(64340), UINT16_C(22442), UINT16_C(64095), UINT16_C(52838), UINT16_C(41524), UINT16_C(25039), UINT16_C( 6738), UINT16_C(50678) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vtstq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint16x8_private a_ = simde_uint16x8_to_private(simde_test_arm_neon_random_u16x8()); simde_uint16x8_private b_ = simde_uint16x8_to_private(simde_test_arm_neon_random_u16x8()); /* Make some complements which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = ~b_.values[j]; } simde_uint16x8_t a = simde_uint16x8_from_private(a_); simde_uint16x8_t b = simde_uint16x8_from_private(b_); simde_uint16x8_t r = simde_vtstq_u16(a, b); simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vtstq_u32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(3039705452), UINT32_C(2634367223), UINT32_C(3690212691), UINT32_C( 322215036) }, { UINT32_C( 809289908), UINT32_C(1288906048), UINT32_C( 604754604), UINT32_C( 686277563) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { UINT32_C(3108576102), UINT32_C(2375368264), UINT32_C(3778502956), UINT32_C(1116440472) }, { UINT32_C(1186391193), UINT32_C(1919599031), UINT32_C(2157597316), UINT32_C(3178526823) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C(3242058821), UINT32_C( 708879250), UINT32_C(1799599313), UINT32_C( 852380359) }, { UINT32_C(1052908474), UINT32_C(3586088045), UINT32_C( 798110676), UINT32_C(3442586936) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C(2257597108), UINT32_C(2146554854), UINT32_C(2278466253), UINT32_C( 734433469) }, { UINT32_C(3238036973), UINT32_C(4042298040), UINT32_C(1287463239), UINT32_C( 755125624) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { UINT32_C(4166130751), UINT32_C( 62567238), UINT32_C( 825131076), UINT32_C(3186765317) }, { UINT32_C( 128836544), UINT32_C(2102618628), UINT32_C( 933909943), UINT32_C(1302158823) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { UINT32_C(2034907445), UINT32_C(3198843321), UINT32_C(4138918391), UINT32_C(2221877376) }, { UINT32_C(1241629587), UINT32_C( 8497944), UINT32_C( 156048904), UINT32_C(3595619488) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { UINT32_C(3145660948), UINT32_C(3215279200), UINT32_C(3046871842), UINT32_C(1862245718) }, { UINT32_C(1366196552), UINT32_C(1079688095), UINT32_C(2383850376), UINT32_C(1515021638) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_MAX } }, { { UINT32_C(3876403653), UINT32_C(3559910682), UINT32_C(2391841606), UINT32_C(1362257174) }, { UINT32_C(1123826362), UINT32_C( 735056613), UINT32_C(3615825527), UINT32_C(2932710121) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vtstq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint32x4_private a_ = simde_uint32x4_to_private(simde_test_arm_neon_random_u32x4()); simde_uint32x4_private b_ = simde_uint32x4_to_private(simde_test_arm_neon_random_u32x4()); /* Make some complements which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = ~b_.values[j]; } simde_uint32x4_t a = simde_uint32x4_from_private(a_); simde_uint32x4_t b = simde_uint32x4_from_private(b_); simde_uint32x4_t r = simde_vtstq_u32(a, b); simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vtstq_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { uint64_t a[2]; uint64_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C(10528700444459983810), UINT64_C( 1761940834975752996) }, { UINT64_C( 9932437595201082497), UINT64_C(16684803238733798619) }, { UINT64_MAX, UINT64_C( 0) } }, { { UINT64_C( 5091877151210656604), UINT64_C( 1066874122888010824) }, { UINT64_C(13354866922498895011), UINT64_C(11382078829017005141) }, { UINT64_C( 0), UINT64_MAX } }, { { UINT64_C( 9063377370790874867), UINT64_C( 6050081726145094505) }, { UINT64_C(14319394822410993894), UINT64_C(12396662347564457110) }, { UINT64_MAX, UINT64_C( 0) } }, { { UINT64_C(10412951015911017111), UINT64_C( 1106603098493244346) }, { UINT64_C( 3239137215810657310), UINT64_C(17340140975216307269) }, { UINT64_MAX, UINT64_C( 0) } }, { { UINT64_C( 3135671028341776206), UINT64_C( 2938365675075114183) }, { UINT64_C( 982391747816647171), UINT64_C( 4155543630013790838) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C( 8944818937377997577), UINT64_C(10805608432536573718) }, { UINT64_C( 4412910401907889313), UINT64_C( 7641135641172977897) }, { UINT64_MAX, UINT64_C( 0) } }, { { UINT64_C(11115115817610934008), UINT64_C( 1547521650231446886) }, { UINT64_C( 616655974268410409), UINT64_C( 5913741270551752546) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C( 1961969470644239469), UINT64_C(17422221396708292660) }, { UINT64_C(17658369120158031761), UINT64_C( 2933273528357185933) }, { UINT64_MAX, UINT64_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_uint64x2_t r = simde_vtstq_u64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_uint64x2_private a_ = simde_uint64x2_to_private(simde_test_arm_neon_random_u64x2()); simde_uint64x2_private b_ = simde_uint64x2_to_private(simde_test_arm_neon_random_u64x2()); /* Make some complements which is unlikely to happen by accident. */ for (size_t j = 0 ; j < (sizeof(a_.values) / sizeof(a_.values[0])) ; j++) { if (rand() < (RAND_MAX / 3)) a_.values[j] = ~b_.values[j]; } simde_uint64x2_t a = simde_uint64x2_from_private(a_); simde_uint64x2_t b = simde_uint64x2_from_private(b_); simde_uint64x2_t r = simde_vtstq_u64(a, b); simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vtst_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vtst_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vtst_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vtst_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vtst_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vtst_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vtst_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vtst_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vtstq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vtstq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vtstq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vtstq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vtstq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vtstq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vtstq_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vtstq_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/uqadd.c000066400000000000000000001064141400333146700165670ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN uqadd #include "test-neon.h" #include "../../../simde/arm/neon/uqadd.h" static int test_simde_vuqaddb_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a; uint8_t b; int8_t r; } test_vec[] = { { INT8_C( 63), UINT8_C(186), INT8_MAX }, { INT8_C( 46), UINT8_C(228), INT8_MAX }, { INT8_C( 4), UINT8_C( 92), INT8_C( 96) }, { INT8_C( 80), UINT8_C(144), INT8_MAX }, { -INT8_C( 91), UINT8_C(184), INT8_C( 93) }, { -INT8_C( 82), UINT8_C(209), INT8_MAX }, { INT8_C( 71), UINT8_C(212), INT8_MAX }, { INT8_C( 126), UINT8_C(232), INT8_MAX } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int8_t r = simde_vuqaddb_s8(test_vec[i].a, test_vec[i].b); simde_assert_equal_i8(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int8_t a = simde_test_codegen_random_i8(); uint8_t b = simde_test_codegen_random_u8(); int8_t r = simde_vuqaddb_s8(a, b); simde_test_codegen_write_i8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vuqaddh_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a; uint16_t b; int16_t r; } test_vec[] = { { -INT16_C( 29377), UINT16_C( 4536), -INT16_C( 24841) }, { -INT16_C( 10071), UINT16_C( 5627), -INT16_C( 4444) }, { INT16_C( 1195), UINT16_C( 268), INT16_C( 1463) }, { -INT16_C( 21310), UINT16_C( 8582), -INT16_C( 12728) }, { INT16_C( 21719), UINT16_C(40392), INT16_MAX }, { -INT16_C( 13629), UINT16_C(23905), INT16_C( 10276) }, { -INT16_C( 17304), UINT16_C(51167), INT16_MAX }, { -INT16_C( 16371), UINT16_C(19572), INT16_C( 3201) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int16_t r = simde_vuqaddh_s16(test_vec[i].a, test_vec[i].b); simde_assert_equal_i16(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int16_t a = simde_test_codegen_random_i16(); uint16_t b = simde_test_codegen_random_u16(); int16_t r = simde_vuqaddh_s16(a, b); simde_test_codegen_write_i16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vuqadds_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a; uint32_t b; int32_t r; } test_vec[] = { { -INT32_C( 161600179), UINT32_C(2970310917), INT32_MAX }, { INT32_C( 548542301), UINT32_C(2604742851), INT32_MAX }, { INT32_C( 1329072524), UINT32_C(1001167315), INT32_MAX }, { INT32_C( 1644399445), UINT32_C(2578413388), INT32_MAX }, { -INT32_C( 1433399900), UINT32_C(3277560678), INT32_C( 1844160778) }, { INT32_C( 1994591667), UINT32_C(3507561541), INT32_MAX }, { INT32_C( 18893101), UINT32_C( 943508707), INT32_C( 962401808) }, { -INT32_C( 1533329576), UINT32_C(1530743479), -INT32_C( 2586097) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int32_t r = simde_vuqadds_s32(test_vec[i].a, test_vec[i].b); simde_assert_equal_i32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int32_t a = simde_test_codegen_random_i32(); uint32_t b = simde_test_codegen_random_u32(); int32_t r = simde_vuqadds_s32(a, b); simde_test_codegen_write_i32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vuqaddd_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a; uint64_t b; int64_t r; } test_vec[] = { { INT64_C( 0), HEDLEY_STATIC_CAST(uint64_t, INT64_MAX) - 1, INT64_MAX - 1 }, { INT64_C( 0), HEDLEY_STATIC_CAST(uint64_t, INT64_MAX), INT64_MAX }, { INT64_C( 0), HEDLEY_STATIC_CAST(uint64_t, INT64_MAX) + 1, INT64_MAX }, { INT64_C( 2053747437340052823), UINT64_C(13151536243870426221), INT64_MAX }, { INT64_C( 2053747437340052823), UINT64_C(13151536243870426221), INT64_MAX }, { -INT64_C( 4032424325469985811), UINT64_C( 4991582605610361907), INT64_C( 959158280140376096) }, { INT64_C( 1872515219788548983), UINT64_C( 2316344661394558694), INT64_C( 4188859881183107677) }, { INT64_C( 156211806211202290), UINT64_C(13245141756903462557), INT64_MAX }, { INT64_C( 4269855915686482557), UINT64_C( 5653371427721186593), INT64_MAX }, { -INT64_C( 324862399252175701), UINT64_C( 1831091054024551026), INT64_C( 1506228654772375325) }, { -INT64_C( 5423878131695943638), UINT64_C(16157363179885192592), INT64_MAX }, { INT64_C( 4123846759084190689), UINT64_C( 4897475589879611714), INT64_C( 9021322348963802403) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int64_t r = simde_vuqaddd_s64(test_vec[i].a, test_vec[i].b); simde_assert_equal_i64(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int64_t a = simde_test_codegen_random_i64(); uint64_t b = simde_test_codegen_random_u64(); int64_t r = simde_vuqaddd_s64(a, b); simde_test_codegen_write_i64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vuqadd_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[8]; uint8_t b[8]; int8_t r[8]; } test_vec[] = { { { INT8_C( 84), INT8_C( 57), -INT8_C( 62), -INT8_C( 25), -INT8_C( 68), INT8_C( 124), -INT8_C( 101), INT8_C( 76) }, { UINT8_C(225), UINT8_C(164), UINT8_C( 56), UINT8_C( 22), UINT8_C( 32), UINT8_C(114), UINT8_C(246), UINT8_C( 1) }, { INT8_MAX, INT8_MAX, -INT8_C( 6), -INT8_C( 3), -INT8_C( 36), INT8_MAX, INT8_MAX, INT8_C( 77) } }, { { INT8_C( 94), INT8_C( 81), INT8_C( 64), INT8_C( 37), INT8_C( 41), INT8_C( 123), INT8_C( 94), INT8_C( 108) }, { UINT8_C( 60), UINT8_C( 73), UINT8_C( 74), UINT8_C( 85), UINT8_C(157), UINT8_C( 66), UINT8_C(152), UINT8_C(241) }, { INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 122), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX } }, { { INT8_C( 123), INT8_C( 90), -INT8_C( 40), INT8_C( 55), -INT8_C( 41), INT8_C( 115), -INT8_C( 125), -INT8_C( 72) }, { UINT8_C( 23), UINT8_C(187), UINT8_C(206), UINT8_C( 56), UINT8_C( 45), UINT8_C(196), UINT8_C( 57), UINT8_C(139) }, { INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 111), INT8_C( 4), INT8_MAX, -INT8_C( 68), INT8_C( 67) } }, { { INT8_C( 21), INT8_C( 122), -INT8_C( 80), INT8_C( 63), -INT8_C( 11), INT8_C( 14), -INT8_C( 85), INT8_C( 49) }, { UINT8_C( 87), UINT8_C(245), UINT8_C(135), UINT8_C(244), UINT8_C( 55), UINT8_C( 31), UINT8_C(229), UINT8_C(178) }, { INT8_C( 108), INT8_MAX, INT8_C( 55), INT8_MAX, INT8_C( 44), INT8_C( 45), INT8_MAX, INT8_MAX } }, { { INT8_C( 122), -INT8_C( 67), -INT8_C( 23), INT8_C( 81), INT8_C( 49), INT8_C( 108), INT8_C( 9), INT8_C( 72) }, { UINT8_C( 39), UINT8_C(216), UINT8_C(128), UINT8_C( 85), UINT8_C(156), UINT8_C(186), UINT8_C(224), UINT8_C(178) }, { INT8_MAX, INT8_MAX, INT8_C( 105), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX } }, { { INT8_C( 52), -INT8_C( 111), -INT8_C( 15), INT8_C( 41), -INT8_C( 97), -INT8_C( 100), INT8_C( 90), -INT8_C( 9) }, { UINT8_C(145), UINT8_C(225), UINT8_C(235), UINT8_C(201), UINT8_C( 1), UINT8_C(209), UINT8_C(123), UINT8_C(123) }, { INT8_MAX, INT8_C( 114), INT8_MAX, INT8_MAX, -INT8_C( 96), INT8_C( 109), INT8_MAX, INT8_C( 114) } }, { { -INT8_C( 114), INT8_C( 101), -INT8_C( 52), -INT8_C( 65), -INT8_C( 47), -INT8_C( 43), INT8_C( 8), -INT8_C( 7) }, { UINT8_C(173), UINT8_C(136), UINT8_C( 78), UINT8_C( 74), UINT8_C( 66), UINT8_C( 46), UINT8_C(252), UINT8_C(118) }, { INT8_C( 59), INT8_MAX, INT8_C( 26), INT8_C( 9), INT8_C( 19), INT8_C( 3), INT8_MAX, INT8_C( 111) } }, { { -INT8_C( 65), -INT8_C( 19), -INT8_C( 97), INT8_C( 95), -INT8_C( 119), -INT8_C( 6), INT8_C( 86), INT8_C( 26) }, { UINT8_C(219), UINT8_C( 65), UINT8_C(227), UINT8_C(220), UINT8_C( 18), UINT8_C( 95), UINT8_C( 87), UINT8_C(161) }, { INT8_MAX, INT8_C( 46), INT8_MAX, INT8_MAX, -INT8_C( 101), INT8_C( 89), INT8_MAX, INT8_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_int8x8_t r = simde_vuqadd_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); simde_int8x8_t r = simde_vuqadd_s8(a, b); simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vuqadd_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[4]; uint16_t b[4]; int16_t r[4]; } test_vec[] = { { { INT16_C( 9156), -INT16_C( 27296), INT16_C( 26873), -INT16_C( 22898) }, { UINT16_C(56561), UINT16_C(13296), UINT16_C(60427), UINT16_C(51882) }, { INT16_MAX, -INT16_C( 14000), INT16_MAX, INT16_C( 28984) } }, { { INT16_C( 18905), INT16_C( 25129), INT16_C( 32579), INT16_C( 8061) }, { UINT16_C(24769), UINT16_C(54267), UINT16_C(21439), UINT16_C(33652) }, { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX } }, { { -INT16_C( 10890), INT16_C( 28441), -INT16_C( 22723), INT16_C( 11798) }, { UINT16_C( 1668), UINT16_C(36706), UINT16_C( 3315), UINT16_C(52313) }, { -INT16_C( 9222), INT16_MAX, -INT16_C( 19408), INT16_MAX } }, { { -INT16_C( 31915), -INT16_C( 26321), -INT16_C( 21502), -INT16_C( 15432) }, { UINT16_C(45836), UINT16_C(52375), UINT16_C( 2822), UINT16_C(32079) }, { INT16_C( 13921), INT16_C( 26054), -INT16_C( 18680), INT16_C( 16647) } }, { { INT16_C( 26848), INT16_C( 7916), INT16_C( 528), -INT16_C( 27572) }, { UINT16_C(44553), UINT16_C(64547), UINT16_C(31930), UINT16_C( 4296) }, { INT16_MAX, INT16_MAX, INT16_C( 32458), -INT16_C( 23276) } }, { { -INT16_C( 2049), INT16_C( 681), INT16_C( 24995), -INT16_C( 20283) }, { UINT16_C(23572), UINT16_C( 7036), UINT16_C(52072), UINT16_C(18584) }, { INT16_C( 21523), INT16_C( 7717), INT16_MAX, -INT16_C( 1699) } }, { { -INT16_C( 31692), INT16_C( 17510), -INT16_C( 19577), -INT16_C( 28456) }, { UINT16_C(64353), UINT16_C( 7308), UINT16_C(21623), UINT16_C(30508) }, { INT16_C( 32661), INT16_C( 24818), INT16_C( 2046), INT16_C( 2052) } }, { { -INT16_C( 10932), -INT16_C( 4231), INT16_C( 15926), INT16_C( 19103) }, { UINT16_C( 7067), UINT16_C( 869), UINT16_C(64999), UINT16_C( 6987) }, { -INT16_C( 3865), -INT16_C( 3362), INT16_MAX, INT16_C( 26090) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_int16x4_t r = simde_vuqadd_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); simde_int16x4_t r = simde_vuqadd_s16(a, b); simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vuqadd_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[2]; uint32_t b[2]; int32_t r[2]; } test_vec[] = { { { INT32_C( 157266562), -INT32_C( 963037339) }, { UINT32_C(2850170162), UINT32_C(3307212409) }, { INT32_MAX, INT32_MAX } }, { { INT32_C( 431331811), INT32_C( 1935955160) }, { UINT32_C(1467402608), UINT32_C(1232257479) }, { INT32_C( 1898734419), INT32_MAX } }, { { -INT32_C( 665661069), INT32_C( 983558920) }, { UINT32_C(2313388304), UINT32_C(1934558352) }, { INT32_C( 1647727235), INT32_MAX } }, { { INT32_C( 1972176029), -INT32_C( 924257960) }, { UINT32_C(2166316730), UINT32_C(2479526176) }, { INT32_MAX, INT32_C( 1555268216) } }, { { INT32_C( 1785470050), INT32_C( 396626695) }, { UINT32_C( 480348300), UINT32_C( 697364620) }, { INT32_MAX, INT32_C( 1093991315) } }, { { INT32_C( 1285496308), -INT32_C( 938113266) }, { UINT32_C( 105526502), UINT32_C( 681120966) }, { INT32_C( 1391022810), -INT32_C( 256992300) } }, { { INT32_C( 949159217), -INT32_C( 1655687408) }, { UINT32_C(1270477247), UINT32_C(3581233633) }, { INT32_MAX, INT32_C( 1925546225) } }, { { INT32_C( 1948324966), -INT32_C( 2126694757) }, { UINT32_C( 830965611), UINT32_C(3428393371) }, { INT32_MAX, INT32_C( 1301698614) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_int32x2_t r = simde_vuqadd_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); simde_int32x2_t r = simde_vuqadd_s32(a, b); simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vuqadd_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[1]; uint64_t b[1]; int64_t r[1]; } test_vec[] = { { { -INT64_C( 2029903918006932442) }, { UINT64_C( 4466626174427041094) }, { INT64_C( 2436722256420108652) } }, { { -INT64_C( 4623526296965472840) }, { UINT64_C(11880013668570651766) }, { INT64_C( 7256487371605178926) } }, { { INT64_C( 9096619478935659318) }, { UINT64_C(14434495371120307260) }, { INT64_MAX } }, { { INT64_C( 7553084072065237950) }, { UINT64_C( 4715083043050209613) }, { INT64_MAX } }, { { INT64_C( 3886845044301974074) }, { UINT64_C(17662724346743461189) }, { INT64_MAX } }, { { INT64_C( 8716674384905321117) }, { UINT64_C( 209051007555170458) }, { INT64_C( 8925725392460491575) } }, { { -INT64_C( 6977371756340870660) }, { UINT64_C(12332564481777628411) }, { INT64_C( 5355192725436757751) } }, { { -INT64_C( 6777863531057762740) }, { UINT64_C(16315466017688466337) }, { INT64_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); simde_uint64x1_t b = simde_vld1_u64(test_vec[i].b); simde_int64x1_t r = simde_vuqadd_s64(a, b); simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x1_t a = simde_test_arm_neon_random_i64x1(); simde_uint64x1_t b = simde_test_arm_neon_random_u64x1(); simde_int64x1_t r = simde_vuqadd_s64(a, b); simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x1(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vuqaddq_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int8_t a[16]; uint8_t b[16]; int8_t r[16]; } test_vec[] = { { { -INT8_C( 81), -INT8_C( 31), -INT8_C( 40), INT8_C( 9), INT8_C( 70), INT8_C( 3), -INT8_C( 88), INT8_C( 65), INT8_C( 119), -INT8_C( 100), INT8_C( 115), -INT8_C( 123), -INT8_C( 81), -INT8_C( 103), INT8_C( 48), -INT8_C( 5) }, { UINT8_C(199), UINT8_C(106), UINT8_C(115), UINT8_C(206), UINT8_C(155), UINT8_C( 99), UINT8_C(112), UINT8_C( 60), UINT8_C(150), UINT8_C(183), UINT8_C(166), UINT8_C(124), UINT8_C(228), UINT8_C( 18), UINT8_C( 95), UINT8_C(147) }, { INT8_C( 118), INT8_C( 75), INT8_C( 75), INT8_MAX, INT8_MAX, INT8_C( 102), INT8_C( 24), INT8_C( 125), INT8_MAX, INT8_C( 83), INT8_MAX, INT8_C( 1), INT8_MAX, -INT8_C( 85), INT8_MAX, INT8_MAX } }, { { -INT8_C( 13), INT8_C( 55), -INT8_C( 100), INT8_C( 57), INT8_C( 58), INT8_C( 68), INT8_C( 123), -INT8_C( 79), -INT8_C( 31), -INT8_C( 18), INT8_C( 54), -INT8_C( 112), -INT8_C( 121), INT8_C( 102), -INT8_C( 117), INT8_C( 79) }, { UINT8_C(208), UINT8_C(254), UINT8_C( 29), UINT8_C(107), UINT8_C( 97), UINT8_C(141), UINT8_C(167), UINT8_C(248), UINT8_C( 68), UINT8_C( 77), UINT8_C(116), UINT8_C( 41), UINT8_C( 95), UINT8_C(211), UINT8_C(188), UINT8_C( 82) }, { INT8_MAX, INT8_MAX, -INT8_C( 71), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 37), INT8_C( 59), INT8_MAX, -INT8_C( 71), -INT8_C( 26), INT8_MAX, INT8_C( 71), INT8_MAX } }, { { INT8_C( 10), INT8_C( 89), -INT8_C( 116), INT8_C( 68), -INT8_C( 99), INT8_C( 7), -INT8_C( 11), INT8_C( 126), -INT8_C( 11), INT8_C( 43), INT8_C( 14), INT8_C( 124), -INT8_C( 111), -INT8_C( 102), -INT8_C( 53), INT8_C( 97) }, { UINT8_C(152), UINT8_C(233), UINT8_C(204), UINT8_C(250), UINT8_C(118), UINT8_C(116), UINT8_C(242), UINT8_C(187), UINT8_C(193), UINT8_C(102), UINT8_C(228), UINT8_C( 33), UINT8_C( 58), UINT8_C(160), UINT8_C(115), UINT8_C( 68) }, { INT8_MAX, INT8_MAX, INT8_C( 88), INT8_MAX, INT8_C( 19), INT8_C( 123), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, -INT8_C( 53), INT8_C( 58), INT8_C( 62), INT8_MAX } }, { { -INT8_C( 7), -INT8_C( 1), -INT8_C( 119), -INT8_C( 105), INT8_C( 6), INT8_C( 126), INT8_C( 21), -INT8_C( 5), -INT8_C( 86), INT8_C( 36), INT8_C( 120), INT8_C( 59), -INT8_C( 66), INT8_C( 67), -INT8_C( 99), INT8_C( 86) }, { UINT8_C( 44), UINT8_C(105), UINT8_C( 80), UINT8_C(163), UINT8_C(221), UINT8_C( 66), UINT8_C( 94), UINT8_C(159), UINT8_C(169), UINT8_C( 66), UINT8_C(192), UINT8_C(227), UINT8_C(226), UINT8_C( 51), UINT8_C( 39), UINT8_C(220) }, { INT8_C( 37), INT8_C( 104), -INT8_C( 39), INT8_C( 58), INT8_MAX, INT8_MAX, INT8_C( 115), INT8_MAX, INT8_C( 83), INT8_C( 102), INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 118), -INT8_C( 60), INT8_MAX } }, { { INT8_C( 51), -INT8_C( 80), INT8_C( 115), INT8_C( 57), INT8_C( 47), -INT8_C( 120), INT8_C( 53), -INT8_C( 39), -INT8_C( 84), -INT8_C( 83), INT8_C( 20), INT8_C( 106), -INT8_C( 16), -INT8_C( 79), -INT8_C( 63), INT8_C( 29) }, { UINT8_C( 27), UINT8_C( 17), UINT8_C(192), UINT8_C(248), UINT8_C( 84), UINT8_C( 30), UINT8_C(151), UINT8_C(253), UINT8_C( 96), UINT8_C( 87), UINT8_C(224), UINT8_C( 66), UINT8_C(139), UINT8_C( 7), UINT8_C( 30), UINT8_C(190) }, { INT8_C( 78), -INT8_C( 63), INT8_MAX, INT8_MAX, INT8_MAX, -INT8_C( 90), INT8_MAX, INT8_MAX, INT8_C( 12), INT8_C( 4), INT8_MAX, INT8_MAX, INT8_C( 123), -INT8_C( 72), -INT8_C( 33), INT8_MAX } }, { { -INT8_C( 72), -INT8_C( 111), -INT8_C( 9), -INT8_C( 25), INT8_C( 26), INT8_C( 44), -INT8_C( 64), -INT8_C( 58), -INT8_C( 39), -INT8_C( 44), INT8_C( 49), -INT8_C( 54), -INT8_C( 122), -INT8_C( 14), -INT8_C( 25), -INT8_C( 95) }, { UINT8_C( 3), UINT8_C(167), UINT8_C(153), UINT8_C( 87), UINT8_C(197), UINT8_C( 49), UINT8_C( 84), UINT8_C( 37), UINT8_C(136), UINT8_C( 52), UINT8_C(103), UINT8_C( 19), UINT8_C( 60), UINT8_C(134), UINT8_C(209), UINT8_C(244) }, { -INT8_C( 69), INT8_C( 56), INT8_MAX, INT8_C( 62), INT8_MAX, INT8_C( 93), INT8_C( 20), -INT8_C( 21), INT8_C( 97), INT8_C( 8), INT8_MAX, -INT8_C( 35), -INT8_C( 62), INT8_C( 120), INT8_MAX, INT8_MAX } }, { { INT8_C( 23), -INT8_C( 55), -INT8_C( 37), INT8_C( 49), -INT8_C( 11), -INT8_C( 101), -INT8_C( 8), -INT8_C( 49), INT8_C( 111), INT8_C( 41), -INT8_C( 103), -INT8_C( 11), INT8_C( 27), INT8_MIN, -INT8_C( 106), INT8_C( 30) }, { UINT8_C( 39), UINT8_C( 48), UINT8_C(118), UINT8_C(236), UINT8_C( 97), UINT8_C(202), UINT8_C( 17), UINT8_C(233), UINT8_MAX, UINT8_C(120), UINT8_C(253), UINT8_C( 59), UINT8_C(254), UINT8_C(206), UINT8_C( 47), UINT8_C( 22) }, { INT8_C( 62), -INT8_C( 7), INT8_C( 81), INT8_MAX, INT8_C( 86), INT8_C( 101), INT8_C( 9), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 48), INT8_MAX, INT8_C( 78), -INT8_C( 59), INT8_C( 52) } }, { { -INT8_C( 105), INT8_C( 10), INT8_C( 71), -INT8_C( 115), -INT8_C( 91), INT8_C( 63), INT8_C( 92), INT8_C( 20), INT8_C( 104), -INT8_C( 11), INT8_C( 10), -INT8_C( 125), INT8_C( 117), -INT8_C( 96), -INT8_C( 94), -INT8_C( 100) }, { UINT8_C(208), UINT8_C( 24), UINT8_C(136), UINT8_C( 49), UINT8_C(226), UINT8_C(153), UINT8_C( 27), UINT8_C(225), UINT8_C( 17), UINT8_C( 24), UINT8_C( 28), UINT8_C( 16), UINT8_C(230), UINT8_C( 75), UINT8_C( 38), UINT8_C(126) }, { INT8_C( 103), INT8_C( 34), INT8_MAX, -INT8_C( 66), INT8_MAX, INT8_MAX, INT8_C( 119), INT8_MAX, INT8_C( 121), INT8_C( 13), INT8_C( 38), -INT8_C( 109), INT8_MAX, -INT8_C( 21), -INT8_C( 56), INT8_C( 26) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_int8x16_t r = simde_vuqaddq_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); simde_int8x16_t r = simde_vuqaddq_s8(a, b); simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vuqaddq_s16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int16_t a[8]; uint16_t b[8]; int16_t r[8]; } test_vec[] = { { { INT16_C( 27989), -INT16_C( 1525), INT16_C( 26541), INT16_C( 5391), INT16_C( 6492), -INT16_C( 11879), INT16_C( 15289), -INT16_C( 30099) }, { UINT16_C(62803), UINT16_C(13755), UINT16_C(54926), UINT16_C(40727), UINT16_C(13294), UINT16_C(54703), UINT16_C(54655), UINT16_C(54355) }, { INT16_MAX, INT16_C( 12230), INT16_MAX, INT16_MAX, INT16_C( 19786), INT16_MAX, INT16_MAX, INT16_C( 24256) } }, { { INT16_C( 24131), -INT16_C( 3889), -INT16_C( 8507), INT16_C( 8453), -INT16_C( 24841), -INT16_C( 20238), INT16_C( 24537), INT16_C( 11322) }, { UINT16_C(63060), UINT16_C(57954), UINT16_C(31180), UINT16_C(48001), UINT16_C(12716), UINT16_C(11152), UINT16_C(58118), UINT16_C(18688) }, { INT16_MAX, INT16_MAX, INT16_C( 22673), INT16_MAX, -INT16_C( 12125), -INT16_C( 9086), INT16_MAX, INT16_C( 30010) } }, { { -INT16_C( 12479), INT16_C( 1593), INT16_C( 16301), -INT16_C( 23513), INT16_C( 6621), -INT16_C( 18604), -INT16_C( 28808), -INT16_C( 13085) }, { UINT16_C(17797), UINT16_C(20910), UINT16_C(12222), UINT16_C(27404), UINT16_C(40032), UINT16_C(26518), UINT16_C(38527), UINT16_C(49328) }, { INT16_C( 5318), INT16_C( 22503), INT16_C( 28523), INT16_C( 3891), INT16_MAX, INT16_C( 7914), INT16_C( 9719), INT16_MAX } }, { { -INT16_C( 5531), INT16_C( 4806), -INT16_C( 4823), INT16_C( 1718), INT16_C( 2822), INT16_C( 32445), -INT16_C( 24166), INT16_C( 8010) }, { UINT16_C(63718), UINT16_C(42352), UINT16_C(32040), UINT16_C(34832), UINT16_C(42521), UINT16_C(39407), UINT16_C(41021), UINT16_C(41561) }, { INT16_MAX, INT16_MAX, INT16_C( 27217), INT16_MAX, INT16_MAX, INT16_MAX, INT16_C( 16855), INT16_MAX } }, { { INT16_C( 8330), -INT16_C( 19531), INT16_C( 27405), INT16_C( 5305), INT16_C( 30582), INT16_C( 4242), -INT16_C( 8936), -INT16_C( 465) }, { UINT16_C(41173), UINT16_C(64931), UINT16_C(45853), UINT16_C(13958), UINT16_C(30042), UINT16_C(38863), UINT16_C(10517), UINT16_C(40761) }, { INT16_MAX, INT16_MAX, INT16_MAX, INT16_C( 19263), INT16_MAX, INT16_MAX, INT16_C( 1581), INT16_MAX } }, { { -INT16_C( 4535), INT16_C( 22098), INT16_C( 3162), -INT16_C( 12182), -INT16_C( 637), -INT16_C( 25631), INT16_C( 4314), -INT16_C( 20583) }, { UINT16_C(15792), UINT16_C(52653), UINT16_C(13296), UINT16_C(18948), UINT16_C(54184), UINT16_C(48865), UINT16_C( 7164), UINT16_C(17757) }, { INT16_C( 11257), INT16_MAX, INT16_C( 16458), INT16_C( 6766), INT16_MAX, INT16_C( 23234), INT16_C( 11478), -INT16_C( 2826) } }, { { -INT16_C( 20471), INT16_C( 25500), INT16_C( 1724), INT16_C( 16180), INT16_C( 5379), -INT16_C( 8742), INT16_C( 29477), -INT16_C( 10611) }, { UINT16_C(15024), UINT16_C(41379), UINT16_C(42861), UINT16_C( 5611), UINT16_C(52603), UINT16_C(30675), UINT16_C(12776), UINT16_C(61885) }, { -INT16_C( 5447), INT16_MAX, INT16_MAX, INT16_C( 21791), INT16_MAX, INT16_C( 21933), INT16_MAX, INT16_MAX } }, { { INT16_C( 23009), -INT16_C( 25259), -INT16_C( 30369), INT16_C( 25564), -INT16_C( 18786), -INT16_C( 15552), -INT16_C( 13015), -INT16_C( 9575) }, { UINT16_C(15623), UINT16_C(29819), UINT16_C(26340), UINT16_C(24458), UINT16_C(23859), UINT16_C( 7127), UINT16_C(38030), UINT16_C(28429) }, { INT16_MAX, INT16_C( 4560), -INT16_C( 4029), INT16_MAX, INT16_C( 5073), -INT16_C( 8425), INT16_C( 25015), INT16_C( 18854) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_int16x8_t r = simde_vuqaddq_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); simde_int16x8_t r = simde_vuqaddq_s16(a, b); simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vuqaddq_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int32_t a[4]; uint32_t b[4]; int32_t r[4]; } test_vec[] = { { { INT32_C( 1275880173), -INT32_C( 1984960277), -INT32_C( 934481762), -INT32_C( 979179843) }, { UINT32_C( 121183523), UINT32_C(3077030787), UINT32_C(2949791265), UINT32_C(3206537170) }, { INT32_C( 1397063696), INT32_C( 1092070510), INT32_C( 2015309503), INT32_MAX } }, { { INT32_C( 738929473), -INT32_C( 1296712940), INT32_C( 1752826539), INT32_C( 187505896) }, { UINT32_C(3172099897), UINT32_C(1265924394), UINT32_C(2314946231), UINT32_C(1732778534) }, { INT32_MAX, -INT32_C( 30788546), INT32_MAX, INT32_C( 1920284430) } }, { { INT32_C( 1502893125), -INT32_C( 1173599985), INT32_C( 857900619), -INT32_C( 599895901) }, { UINT32_C(3784921527), UINT32_C(2183990730), UINT32_C(2047551572), UINT32_C(2279691330) }, { INT32_MAX, INT32_C( 1010390745), INT32_MAX, INT32_C( 1679795429) } }, { { -INT32_C( 1209961048), INT32_C( 175238591), INT32_C( 373199731), -INT32_C( 1695318813) }, { UINT32_C(2558299341), UINT32_C(3994724762), UINT32_C( 325592529), UINT32_C( 563759481) }, { INT32_C( 1348338293), INT32_MAX, INT32_C( 698792260), -INT32_C( 1131559332) } }, { { INT32_C( 2128116671), -INT32_C( 595048088), -INT32_C( 1057831203), INT32_C( 274457923) }, { UINT32_C( 212391794), UINT32_C(1375388288), UINT32_C(1633968872), UINT32_C(1787035307) }, { INT32_MAX, INT32_C( 780340200), INT32_C( 576137669), INT32_C( 2061493230) } }, { { -INT32_C( 488088710), -INT32_C( 2101448283), INT32_C( 2051191095), INT32_C( 143367574) }, { UINT32_C(4094964596), UINT32_C(3729067766), UINT32_C( 473934192), UINT32_C( 579257000) }, { INT32_MAX, INT32_C( 1627619483), INT32_MAX, INT32_C( 722624574) } }, { { -INT32_C( 1023119586), INT32_C( 390448096), INT32_C( 177375092), -INT32_C( 1726800603) }, { UINT32_C(1183721296), UINT32_C(2787431222), UINT32_C( 633496701), UINT32_C(1145522470) }, { INT32_C( 160601710), INT32_MAX, INT32_C( 810871793), -INT32_C( 581278133) } }, { { -INT32_C( 1744352328), -INT32_C( 2102440946), -INT32_C( 108183084), -INT32_C( 1349345186) }, { UINT32_C(4260700359), UINT32_C(1906580212), UINT32_C(2761320062), UINT32_C(1743379887) }, { INT32_MAX, -INT32_C( 195860734), INT32_MAX, INT32_C( 394034701) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_int32x4_t r = simde_vuqaddq_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); simde_int32x4_t r = simde_vuqaddq_s32(a, b); simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_vuqaddq_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { int64_t a[2]; uint64_t b[2]; int64_t r[2]; } test_vec[] = { { { INT64_C( 1277244646803632168), -INT64_C( 5837055554043099408) }, { UINT64_C(10025662640714150845), UINT64_C(13676615772832577718) }, { INT64_MAX, INT64_C( 7839560218789478310) } }, { { INT64_C( 7806900323564702729), -INT64_C( 4933207412548345356) }, { UINT64_C( 4198918928307336364), UINT64_C(10995657851050882374) }, { INT64_MAX, INT64_C( 6062450438502537018) } }, { { INT64_C( 3395773749102808377), INT64_C( 4706423876967127959) }, { UINT64_C(10414081876204174536), UINT64_C(14689621110835324342) }, { INT64_MAX, INT64_MAX } }, { { -INT64_C( 8576084105018492015), INT64_C( 7012507698472027771) }, { UINT64_C( 4757546734726237227), UINT64_C( 483683368102752601) }, { -INT64_C( 3818537370292254788), INT64_C( 7496191066574780372) } }, { { INT64_C( 5879676728020972830), INT64_C( 5751011118699099860) }, { UINT64_C( 706444675669916926), UINT64_C(15472304225789432892) }, { INT64_C( 6586121403690889756), INT64_MAX } }, { { INT64_C( 3244418405824302047), INT64_C( 1704446541564634931) }, { UINT64_C( 8403556614726053094), UINT64_C( 2040374986822554562) }, { INT64_MAX, INT64_C( 3744821528387189493) } }, { { -INT64_C( 1805987758484671127), -INT64_C( 2779029987225859522) }, { UINT64_C( 4755254193201025106), UINT64_C( 7852882482205238433) }, { INT64_C( 2949266434716353979), INT64_C( 5073852494979378911) } }, { { -INT64_C( 2100332582411514816), INT64_C( 5869783500826745000) }, { UINT64_C(10698540553456051384), UINT64_C( 9109495888139727869) }, { INT64_C( 8598207971044536568), INT64_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_int64x2_t r = simde_vuqaddq_s64(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); simde_uint64x2_t b = simde_test_arm_neon_random_u64x2(); simde_int64x2_t r = simde_vuqaddq_s64(a, b); simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_arm_neon_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vuqaddb_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vuqaddh_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vuqadds_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vuqaddd_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vuqadd_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vuqadd_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vuqadd_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vuqadd_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vuqaddq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vuqaddq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vuqaddq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vuqaddq_s64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/uzp.c000066400000000000000000002042241400333146700163050ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN uzp #include "test-neon.h" #include #if !defined(SIMDE_BUG_INTEL_857088) static int test_simde_vuzp_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[2]; simde_float32 b[2]; simde_float32 r[2][2]; } test_vec[] = { { { SIMDE_FLOAT32_C( 929.29), SIMDE_FLOAT32_C( 951.10) }, { SIMDE_FLOAT32_C( 363.77), SIMDE_FLOAT32_C( 789.61) }, { { SIMDE_FLOAT32_C( 929.29), SIMDE_FLOAT32_C( 363.77) }, { SIMDE_FLOAT32_C( 951.10), SIMDE_FLOAT32_C( 789.61) }, }, }, { { SIMDE_FLOAT32_C( -759.57), SIMDE_FLOAT32_C( 685.98) }, { SIMDE_FLOAT32_C( 841.21), SIMDE_FLOAT32_C( 515.20) }, { { SIMDE_FLOAT32_C( -759.57), SIMDE_FLOAT32_C( 841.21) }, { SIMDE_FLOAT32_C( 685.98), SIMDE_FLOAT32_C( 515.20) }, }, }, { { SIMDE_FLOAT32_C( 69.74), SIMDE_FLOAT32_C( -338.66) }, { SIMDE_FLOAT32_C( 456.03), SIMDE_FLOAT32_C( -463.79) }, { { SIMDE_FLOAT32_C( 69.74), SIMDE_FLOAT32_C( 456.03) }, { SIMDE_FLOAT32_C( -338.66), SIMDE_FLOAT32_C( -463.79) }, }, }, { { SIMDE_FLOAT32_C( -878.94), SIMDE_FLOAT32_C( 168.02) }, { SIMDE_FLOAT32_C( -767.09), SIMDE_FLOAT32_C( 264.02) }, { { SIMDE_FLOAT32_C( -878.94), SIMDE_FLOAT32_C( -767.09) }, { SIMDE_FLOAT32_C( 168.02), SIMDE_FLOAT32_C( 264.02) }, }, }, { { SIMDE_FLOAT32_C( -711.03), SIMDE_FLOAT32_C( 572.52) }, { SIMDE_FLOAT32_C( 131.81), SIMDE_FLOAT32_C( -844.24) }, { { SIMDE_FLOAT32_C( -711.03), SIMDE_FLOAT32_C( 131.81) }, { SIMDE_FLOAT32_C( 572.52), SIMDE_FLOAT32_C( -844.24) }, }, }, { { SIMDE_FLOAT32_C( -898.82), SIMDE_FLOAT32_C( -685.93) }, { SIMDE_FLOAT32_C( -963.34), SIMDE_FLOAT32_C( -684.35) }, { { SIMDE_FLOAT32_C( -898.82), SIMDE_FLOAT32_C( -963.34) }, { SIMDE_FLOAT32_C( -685.93), SIMDE_FLOAT32_C( -684.35) }, }, }, { { SIMDE_FLOAT32_C( -332.36), SIMDE_FLOAT32_C( 141.67) }, { SIMDE_FLOAT32_C( -308.83), SIMDE_FLOAT32_C( 753.40) }, { { SIMDE_FLOAT32_C( -332.36), SIMDE_FLOAT32_C( -308.83) }, { SIMDE_FLOAT32_C( 141.67), SIMDE_FLOAT32_C( 753.40) }, }, }, { { SIMDE_FLOAT32_C( 573.65), SIMDE_FLOAT32_C( -669.39) }, { SIMDE_FLOAT32_C( 323.27), SIMDE_FLOAT32_C( 502.94) }, { { SIMDE_FLOAT32_C( 573.65), SIMDE_FLOAT32_C( 323.27) }, { SIMDE_FLOAT32_C( -669.39), SIMDE_FLOAT32_C( 502.94) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x2x2_t r = simde_vuzp_f32(a, b); simde_test_arm_neon_assert_equal_f32x2(r.val[0], simde_vld1_f32(test_vec[i].r[0]), 1); simde_test_arm_neon_assert_equal_f32x2(r.val[1], simde_vld1_f32(test_vec[i].r[1]), 1); } return 0; } static int test_simde_vuzp_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int8_t b[8]; int8_t r[2][8]; } test_vec[] = { { { -INT8_C( 16), -INT8_C( 95), INT8_C( 126), INT8_C( 101), -INT8_C( 104), -INT8_C( 55), -INT8_C( 25), -INT8_C( 95) }, { -INT8_C( 93), INT8_C( 64), INT8_C( 101), INT8_C( 61), INT8_C( 13), -INT8_C( 69), -INT8_C( 19), -INT8_C( 4) }, { { -INT8_C( 16), INT8_C( 126), -INT8_C( 104), -INT8_C( 25), -INT8_C( 93), INT8_C( 101), INT8_C( 13), -INT8_C( 19) }, { -INT8_C( 95), INT8_C( 101), -INT8_C( 55), -INT8_C( 95), INT8_C( 64), INT8_C( 61), -INT8_C( 69), -INT8_C( 4) }, }, }, { { -INT8_C( 2), INT8_C( 116), INT8_C( 70), INT8_C( 52), INT8_C( 122), -INT8_C( 58), INT8_C( 86), INT8_C( 8) }, { INT8_C( 13), -INT8_C( 26), -INT8_C( 56), -INT8_C( 121), INT8_C( 18), -INT8_C( 54), -INT8_C( 106), INT8_C( 2) }, { { -INT8_C( 2), INT8_C( 70), INT8_C( 122), INT8_C( 86), INT8_C( 13), -INT8_C( 56), INT8_C( 18), -INT8_C( 106) }, { INT8_C( 116), INT8_C( 52), -INT8_C( 58), INT8_C( 8), -INT8_C( 26), -INT8_C( 121), -INT8_C( 54), INT8_C( 2) }, }, }, { { INT8_C( 107), INT8_C( 20), INT8_C( 104), INT8_C( 4), -INT8_C( 35), INT8_C( 79), -INT8_C( 91), INT8_MIN }, { -INT8_C( 113), INT8_C( 10), -INT8_C( 67), -INT8_C( 100), -INT8_C( 59), -INT8_C( 86), -INT8_C( 104), -INT8_C( 61) }, { { INT8_C( 107), INT8_C( 104), -INT8_C( 35), -INT8_C( 91), -INT8_C( 113), -INT8_C( 67), -INT8_C( 59), -INT8_C( 104) }, { INT8_C( 20), INT8_C( 4), INT8_C( 79), INT8_MIN, INT8_C( 10), -INT8_C( 100), -INT8_C( 86), -INT8_C( 61) }, }, }, { { INT8_C( 30), -INT8_C( 33), -INT8_C( 8), -INT8_C( 104), -INT8_C( 91), INT8_C( 78), -INT8_C( 96), -INT8_C( 78) }, { INT8_C( 52), INT8_C( 105), INT8_C( 58), INT8_C( 70), INT8_C( 51), -INT8_C( 48), INT8_C( 72), -INT8_C( 98) }, { { INT8_C( 30), -INT8_C( 8), -INT8_C( 91), -INT8_C( 96), INT8_C( 52), INT8_C( 58), INT8_C( 51), INT8_C( 72) }, { -INT8_C( 33), -INT8_C( 104), INT8_C( 78), -INT8_C( 78), INT8_C( 105), INT8_C( 70), -INT8_C( 48), -INT8_C( 98) }, }, }, { { -INT8_C( 28), -INT8_C( 80), -INT8_C( 94), -INT8_C( 62), -INT8_C( 1), INT8_C( 71), INT8_C( 66), -INT8_C( 114) }, { INT8_C( 81), INT8_C( 0), INT8_C( 43), INT8_C( 23), -INT8_C( 86), -INT8_C( 61), -INT8_C( 38), -INT8_C( 55) }, { { -INT8_C( 28), -INT8_C( 94), -INT8_C( 1), INT8_C( 66), INT8_C( 81), INT8_C( 43), -INT8_C( 86), -INT8_C( 38) }, { -INT8_C( 80), -INT8_C( 62), INT8_C( 71), -INT8_C( 114), INT8_C( 0), INT8_C( 23), -INT8_C( 61), -INT8_C( 55) }, }, }, { { -INT8_C( 94), -INT8_C( 46), INT8_C( 97), INT8_C( 71), INT8_C( 32), INT8_C( 2), -INT8_C( 6), INT8_C( 84) }, { INT8_C( 107), INT8_C( 52), -INT8_C( 102), -INT8_C( 98), INT8_C( 4), -INT8_C( 29), INT8_C( 60), -INT8_C( 24) }, { { -INT8_C( 94), INT8_C( 97), INT8_C( 32), -INT8_C( 6), INT8_C( 107), -INT8_C( 102), INT8_C( 4), INT8_C( 60) }, { -INT8_C( 46), INT8_C( 71), INT8_C( 2), INT8_C( 84), INT8_C( 52), -INT8_C( 98), -INT8_C( 29), -INT8_C( 24) }, }, }, { { -INT8_C( 109), -INT8_C( 33), -INT8_C( 86), -INT8_C( 109), INT8_C( 38), -INT8_C( 19), INT8_C( 33), INT8_C( 120) }, { -INT8_C( 19), INT8_C( 76), -INT8_C( 113), -INT8_C( 105), INT8_C( 16), INT8_C( 105), INT8_C( 96), -INT8_C( 78) }, { { -INT8_C( 109), -INT8_C( 86), INT8_C( 38), INT8_C( 33), -INT8_C( 19), -INT8_C( 113), INT8_C( 16), INT8_C( 96) }, { -INT8_C( 33), -INT8_C( 109), -INT8_C( 19), INT8_C( 120), INT8_C( 76), -INT8_C( 105), INT8_C( 105), -INT8_C( 78) }, }, }, { { INT8_C( 60), -INT8_C( 62), -INT8_C( 6), INT8_C( 92), -INT8_C( 60), -INT8_C( 12), -INT8_C( 79), INT8_C( 47) }, { INT8_C( 40), INT8_C( 75), -INT8_C( 51), INT8_C( 44), INT8_C( 46), INT8_C( 9), INT8_C( 20), -INT8_C( 62) }, { { INT8_C( 60), -INT8_C( 6), -INT8_C( 60), -INT8_C( 79), INT8_C( 40), -INT8_C( 51), INT8_C( 46), INT8_C( 20) }, { -INT8_C( 62), INT8_C( 92), -INT8_C( 12), INT8_C( 47), INT8_C( 75), INT8_C( 44), INT8_C( 9), -INT8_C( 62) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8x2_t r = simde_vuzp_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r.val[0], simde_vld1_s8(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_i8x8(r.val[1], simde_vld1_s8(test_vec[i].r[1])); } return 0; } static int test_simde_vuzp_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int16_t b[4]; int16_t r[2][4]; } test_vec[] = { { { -INT16_C( 28880), -INT16_C( 22644), INT16_C( 8023), INT16_C( 26036) }, { -INT16_C( 28471), -INT16_C( 15106), INT16_C( 18561), INT16_C( 11767) }, { { -INT16_C( 28880), INT16_C( 8023), -INT16_C( 28471), INT16_C( 18561) }, { -INT16_C( 22644), INT16_C( 26036), -INT16_C( 15106), INT16_C( 11767) }, }, }, { { INT16_C( 13251), INT16_C( 27928), INT16_C( 6580), -INT16_C( 9190) }, { INT16_C( 31504), -INT16_C( 17767), -INT16_C( 13685), -INT16_C( 17646) }, { { INT16_C( 13251), INT16_C( 6580), INT16_C( 31504), -INT16_C( 13685) }, { INT16_C( 27928), -INT16_C( 9190), -INT16_C( 17767), -INT16_C( 17646) }, }, }, { { -INT16_C( 24999), -INT16_C( 20382), INT16_C( 5822), -INT16_C( 30955) }, { INT16_C( 5031), INT16_C( 10315), INT16_C( 16988), INT16_C( 8021) }, { { -INT16_C( 24999), INT16_C( 5822), INT16_C( 5031), INT16_C( 16988) }, { -INT16_C( 20382), -INT16_C( 30955), INT16_C( 10315), INT16_C( 8021) }, }, }, { { INT16_C( 28021), INT16_C( 10637), -INT16_C( 22650), -INT16_C( 27130) }, { -INT16_C( 24798), -INT16_C( 21167), INT16_C( 25449), -INT16_C( 15767) }, { { INT16_C( 28021), -INT16_C( 22650), -INT16_C( 24798), INT16_C( 25449) }, { INT16_C( 10637), -INT16_C( 27130), -INT16_C( 21167), -INT16_C( 15767) }, }, }, { { -INT16_C( 13567), -INT16_C( 16525), -INT16_C( 30494), -INT16_C( 30394) }, { -INT16_C( 28260), -INT16_C( 1871), INT16_C( 1747), INT16_C( 18711) }, { { -INT16_C( 13567), -INT16_C( 30494), -INT16_C( 28260), INT16_C( 1747) }, { -INT16_C( 16525), -INT16_C( 30394), -INT16_C( 1871), INT16_C( 18711) }, }, }, { { -INT16_C( 23436), -INT16_C( 1422), INT16_C( 30795), INT16_C( 28305) }, { -INT16_C( 7657), -INT16_C( 32741), -INT16_C( 31675), INT16_C( 17987) }, { { -INT16_C( 23436), INT16_C( 30795), -INT16_C( 7657), -INT16_C( 31675) }, { -INT16_C( 1422), INT16_C( 28305), -INT16_C( 32741), INT16_C( 17987) }, }, }, { { -INT16_C( 18864), INT16_C( 12806), INT16_C( 19518), -INT16_C( 9541) }, { INT16_C( 27870), -INT16_C( 20014), -INT16_C( 5518), -INT16_C( 6406) }, { { -INT16_C( 18864), INT16_C( 19518), INT16_C( 27870), -INT16_C( 5518) }, { INT16_C( 12806), -INT16_C( 9541), -INT16_C( 20014), -INT16_C( 6406) }, }, }, { { INT16_C( 28046), -INT16_C( 9503), INT16_C( 29413), -INT16_C( 696) }, { INT16_C( 25428), -INT16_C( 26243), -INT16_C( 16152), INT16_C( 14559) }, { { INT16_C( 28046), INT16_C( 29413), INT16_C( 25428), -INT16_C( 16152) }, { -INT16_C( 9503), -INT16_C( 696), -INT16_C( 26243), INT16_C( 14559) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4x2_t r = simde_vuzp_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r.val[0], simde_vld1_s16(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_i16x4(r.val[1], simde_vld1_s16(test_vec[i].r[1])); } return 0; } static int test_simde_vuzp_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int32_t b[2]; int32_t r[2][2]; } test_vec[] = { { { INT32_C( 1801551691), -INT32_C( 2123910970) }, { -INT32_C( 406438839), INT32_C( 1744642352) }, { { INT32_C( 1801551691), -INT32_C( 406438839) }, { -INT32_C( 2123910970), INT32_C( 1744642352) }, }, }, { { INT32_C( 1736006795), -INT32_C( 571974838) }, { -INT32_C( 1018486927), INT32_C( 227361474) }, { { INT32_C( 1736006795), -INT32_C( 1018486927) }, { -INT32_C( 571974838), INT32_C( 227361474) }, }, }, { { -INT32_C( 2022117695), -INT32_C( 402071650) }, { INT32_C( 1288687388), INT32_C( 2075380976) }, { { -INT32_C( 2022117695), INT32_C( 1288687388) }, { -INT32_C( 402071650), INT32_C( 2075380976) }, }, }, { { INT32_C( 1927490600), -INT32_C( 112145529) }, { -INT32_C( 1396925462), -INT32_C( 1615246882) }, { { INT32_C( 1927490600), -INT32_C( 1396925462) }, { -INT32_C( 112145529), -INT32_C( 1615246882) }, }, }, { { -INT32_C( 702074313), INT32_C( 767438609) }, { -INT32_C( 293958146), -INT32_C( 2106970791) }, { { -INT32_C( 702074313), -INT32_C( 293958146) }, { INT32_C( 767438609), -INT32_C( 2106970791) }, }, }, { { -INT32_C( 504083110), INT32_C( 47858712) }, { -INT32_C( 1095854112), INT32_C( 408774624) }, { { -INT32_C( 504083110), -INT32_C( 1095854112) }, { INT32_C( 47858712), INT32_C( 408774624) }, }, }, { { -INT32_C( 1410431847), -INT32_C( 1294422860) }, { -INT32_C( 1834921415), -INT32_C( 636220544) }, { { -INT32_C( 1410431847), -INT32_C( 1834921415) }, { -INT32_C( 1294422860), -INT32_C( 636220544) }, }, }, { { INT32_C( 1891305816), INT32_C( 762484301) }, { INT32_C( 233513005), INT32_C( 556091783) }, { { INT32_C( 1891305816), INT32_C( 233513005) }, { INT32_C( 762484301), INT32_C( 556091783) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2x2_t r = simde_vuzp_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r.val[0], simde_vld1_s32(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_i32x2(r.val[1], simde_vld1_s32(test_vec[i].r[1])); } return 0; } static int test_simde_vuzp_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; uint8_t b[8]; uint8_t r[2][8]; } test_vec[] = { { { UINT8_C(221), UINT8_C(128), UINT8_C(185), UINT8_C(185), UINT8_C( 80), UINT8_C(201), UINT8_C( 16), UINT8_C(240) }, { UINT8_C(182), UINT8_C(179), UINT8_C(131), UINT8_C(144), UINT8_C( 26), UINT8_C(213), UINT8_C( 46), UINT8_C(116) }, { { UINT8_C(221), UINT8_C(185), UINT8_C( 80), UINT8_C( 16), UINT8_C(182), UINT8_C(131), UINT8_C( 26), UINT8_C( 46) }, { UINT8_C(128), UINT8_C(185), UINT8_C(201), UINT8_C(240), UINT8_C(179), UINT8_C(144), UINT8_C(213), UINT8_C(116) }, }, }, { { UINT8_C(128), UINT8_C(109), UINT8_C(182), UINT8_C( 21), UINT8_C(134), UINT8_C( 17), UINT8_C(237), UINT8_C(242) }, { UINT8_C(198), UINT8_C(216), UINT8_C(134), UINT8_C(196), UINT8_C( 59), UINT8_C(141), UINT8_C(156), UINT8_C( 24) }, { { UINT8_C(128), UINT8_C(182), UINT8_C(134), UINT8_C(237), UINT8_C(198), UINT8_C(134), UINT8_C( 59), UINT8_C(156) }, { UINT8_C(109), UINT8_C( 21), UINT8_C( 17), UINT8_C(242), UINT8_C(216), UINT8_C(196), UINT8_C(141), UINT8_C( 24) }, }, }, { { UINT8_C( 14), UINT8_C( 85), UINT8_C(209), UINT8_C( 94), UINT8_C( 31), UINT8_C(225), UINT8_C( 78), UINT8_C(213) }, { UINT8_C(148), UINT8_C(210), UINT8_C(101), UINT8_C(174), UINT8_C(167), UINT8_C(148), UINT8_C( 34), UINT8_C( 39) }, { { UINT8_C( 14), UINT8_C(209), UINT8_C( 31), UINT8_C( 78), UINT8_C(148), UINT8_C(101), UINT8_C(167), UINT8_C( 34) }, { UINT8_C( 85), UINT8_C( 94), UINT8_C(225), UINT8_C(213), UINT8_C(210), UINT8_C(174), UINT8_C(148), UINT8_C( 39) }, }, }, { { UINT8_C( 1), UINT8_C(217), UINT8_C( 60), UINT8_C(135), UINT8_C(234), UINT8_C( 42), UINT8_C(121), UINT8_C(176) }, { UINT8_C( 2), UINT8_C( 0), UINT8_C(116), UINT8_C( 62), UINT8_C(141), UINT8_C( 16), UINT8_C( 86), UINT8_C(155) }, { { UINT8_C( 1), UINT8_C( 60), UINT8_C(234), UINT8_C(121), UINT8_C( 2), UINT8_C(116), UINT8_C(141), UINT8_C( 86) }, { UINT8_C(217), UINT8_C(135), UINT8_C( 42), UINT8_C(176), UINT8_C( 0), UINT8_C( 62), UINT8_C( 16), UINT8_C(155) }, }, }, { { UINT8_C(102), UINT8_C( 40), UINT8_C(249), UINT8_C(133), UINT8_C( 9), UINT8_C( 72), UINT8_C( 90), UINT8_C(158) }, { UINT8_C( 26), UINT8_C(192), UINT8_C( 76), UINT8_C(193), UINT8_C( 84), UINT8_C(111), UINT8_C(232), UINT8_C( 85) }, { { UINT8_C(102), UINT8_C(249), UINT8_C( 9), UINT8_C( 90), UINT8_C( 26), UINT8_C( 76), UINT8_C( 84), UINT8_C(232) }, { UINT8_C( 40), UINT8_C(133), UINT8_C( 72), UINT8_C(158), UINT8_C(192), UINT8_C(193), UINT8_C(111), UINT8_C( 85) }, }, }, { { UINT8_C( 72), UINT8_C( 37), UINT8_C(220), UINT8_C( 50), UINT8_C( 79), UINT8_C( 86), UINT8_C(226), UINT8_C( 81) }, { UINT8_C( 86), UINT8_C( 86), UINT8_C(143), UINT8_C(227), UINT8_C(103), UINT8_C(230), UINT8_C(127), UINT8_C(205) }, { { UINT8_C( 72), UINT8_C(220), UINT8_C( 79), UINT8_C(226), UINT8_C( 86), UINT8_C(143), UINT8_C(103), UINT8_C(127) }, { UINT8_C( 37), UINT8_C( 50), UINT8_C( 86), UINT8_C( 81), UINT8_C( 86), UINT8_C(227), UINT8_C(230), UINT8_C(205) }, }, }, { { UINT8_C( 14), UINT8_C(120), UINT8_C( 82), UINT8_C( 23), UINT8_C(192), UINT8_C(172), UINT8_C(181), UINT8_C(218) }, { UINT8_C(108), UINT8_C( 2), UINT8_C(155), UINT8_C(192), UINT8_C(113), UINT8_C(132), UINT8_C( 21), UINT8_C(185) }, { { UINT8_C( 14), UINT8_C( 82), UINT8_C(192), UINT8_C(181), UINT8_C(108), UINT8_C(155), UINT8_C(113), UINT8_C( 21) }, { UINT8_C(120), UINT8_C( 23), UINT8_C(172), UINT8_C(218), UINT8_C( 2), UINT8_C(192), UINT8_C(132), UINT8_C(185) }, }, }, { { UINT8_C(169), UINT8_C(242), UINT8_C(235), UINT8_C(248), UINT8_C( 72), UINT8_C(205), UINT8_C( 73), UINT8_C(158) }, { UINT8_C( 35), UINT8_C(217), UINT8_C(129), UINT8_C(138), UINT8_C(191), UINT8_C( 0), UINT8_C( 87), UINT8_C(205) }, { { UINT8_C(169), UINT8_C(235), UINT8_C( 72), UINT8_C( 73), UINT8_C( 35), UINT8_C(129), UINT8_C(191), UINT8_C( 87) }, { UINT8_C(242), UINT8_C(248), UINT8_C(205), UINT8_C(158), UINT8_C(217), UINT8_C(138), UINT8_C( 0), UINT8_C(205) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8x2_t r = simde_vuzp_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r.val[0], simde_vld1_u8(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_u8x8(r.val[1], simde_vld1_u8(test_vec[i].r[1])); } return 0; } static int test_simde_vuzp_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[2][4]; } test_vec[] = { { { UINT16_C(53183), UINT16_C( 8344), UINT16_C(47465), UINT16_C(51179) }, { UINT16_C(23788), UINT16_C( 1786), UINT16_C(23927), UINT16_C(30761) }, { { UINT16_C(53183), UINT16_C(47465), UINT16_C(23788), UINT16_C(23927) }, { UINT16_C( 8344), UINT16_C(51179), UINT16_C( 1786), UINT16_C(30761) }, }, }, { { UINT16_C(50756), UINT16_C(22956), UINT16_C( 7637), UINT16_C(43027) }, { UINT16_C(20916), UINT16_C(48634), UINT16_C( 346), UINT16_C( 6692) }, { { UINT16_C(50756), UINT16_C( 7637), UINT16_C(20916), UINT16_C( 346) }, { UINT16_C(22956), UINT16_C(43027), UINT16_C(48634), UINT16_C( 6692) }, }, }, { { UINT16_C(48336), UINT16_C(14650), UINT16_C( 9590), UINT16_C(25089) }, { UINT16_C(64386), UINT16_C(63848), UINT16_C(37209), UINT16_C(40306) }, { { UINT16_C(48336), UINT16_C( 9590), UINT16_C(64386), UINT16_C(37209) }, { UINT16_C(14650), UINT16_C(25089), UINT16_C(63848), UINT16_C(40306) }, }, }, { { UINT16_C( 7768), UINT16_C(11767), UINT16_C( 2619), UINT16_C(61397) }, { UINT16_C(53340), UINT16_C(46765), UINT16_C(53713), UINT16_C(41424) }, { { UINT16_C( 7768), UINT16_C( 2619), UINT16_C(53340), UINT16_C(53713) }, { UINT16_C(11767), UINT16_C(61397), UINT16_C(46765), UINT16_C(41424) }, }, }, { { UINT16_C( 2701), UINT16_C( 986), UINT16_C(56112), UINT16_C(45669) }, { UINT16_C(52951), UINT16_C(12459), UINT16_C( 7519), UINT16_C(47053) }, { { UINT16_C( 2701), UINT16_C(56112), UINT16_C(52951), UINT16_C( 7519) }, { UINT16_C( 986), UINT16_C(45669), UINT16_C(12459), UINT16_C(47053) }, }, }, { { UINT16_C(50235), UINT16_C(30692), UINT16_C(47823), UINT16_C(11110) }, { UINT16_C( 5002), UINT16_C(23521), UINT16_C(45796), UINT16_C(29436) }, { { UINT16_C(50235), UINT16_C(47823), UINT16_C( 5002), UINT16_C(45796) }, { UINT16_C(30692), UINT16_C(11110), UINT16_C(23521), UINT16_C(29436) }, }, }, { { UINT16_C(54972), UINT16_C(60533), UINT16_C(56242), UINT16_C(35230) }, { UINT16_C(19113), UINT16_C( 2233), UINT16_C(34407), UINT16_C(41920) }, { { UINT16_C(54972), UINT16_C(56242), UINT16_C(19113), UINT16_C(34407) }, { UINT16_C(60533), UINT16_C(35230), UINT16_C( 2233), UINT16_C(41920) }, }, }, { { UINT16_C(42059), UINT16_C( 6682), UINT16_C(32862), UINT16_C(59461) }, { UINT16_C( 9876), UINT16_C(30787), UINT16_C(16344), UINT16_C(38378) }, { { UINT16_C(42059), UINT16_C(32862), UINT16_C( 9876), UINT16_C(16344) }, { UINT16_C( 6682), UINT16_C(59461), UINT16_C(30787), UINT16_C(38378) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4x2_t r = simde_vuzp_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r.val[0], simde_vld1_u16(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_u16x4(r.val[1], simde_vld1_u16(test_vec[i].r[1])); } return 0; } static int test_simde_vuzp_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2][2]; } test_vec[] = { { { UINT32_C(4244796768), UINT32_C(1520735748) }, { UINT32_C(1408214781), UINT32_C( 807312678) }, { { UINT32_C(4244796768), UINT32_C(1408214781) }, { UINT32_C(1520735748), UINT32_C( 807312678) }, }, }, { { UINT32_C(2360753501), UINT32_C(2681976087) }, { UINT32_C( 762530411), UINT32_C(1624729856) }, { { UINT32_C(2360753501), UINT32_C( 762530411) }, { UINT32_C(2681976087), UINT32_C(1624729856) }, }, }, { { UINT32_C(3797801438), UINT32_C(1832714608) }, { UINT32_C(3468766375), UINT32_C( 654237641) }, { { UINT32_C(3797801438), UINT32_C(3468766375) }, { UINT32_C(1832714608), UINT32_C( 654237641) }, }, }, { { UINT32_C( 867349532), UINT32_C(3570568553) }, { UINT32_C(3607185111), UINT32_C(2369247663) }, { { UINT32_C( 867349532), UINT32_C(3607185111) }, { UINT32_C(3570568553), UINT32_C(2369247663) }, }, }, { { UINT32_C( 577738162), UINT32_C(1049668758) }, { UINT32_C(2718716377), UINT32_C(1288243760) }, { { UINT32_C( 577738162), UINT32_C(2718716377) }, { UINT32_C(1049668758), UINT32_C(1288243760) }, }, }, { { UINT32_C( 662666174), UINT32_C(3774632201) }, { UINT32_C(1186528663), UINT32_C(2312368086) }, { { UINT32_C( 662666174), UINT32_C(1186528663) }, { UINT32_C(3774632201), UINT32_C(2312368086) }, }, }, { { UINT32_C( 464208772), UINT32_C(3361291247) }, { UINT32_C(3161154956), UINT32_C( 755512431) }, { { UINT32_C( 464208772), UINT32_C(3161154956) }, { UINT32_C(3361291247), UINT32_C( 755512431) }, }, }, { { UINT32_C(3092547503), UINT32_C(1889095897) }, { UINT32_C( 615993678), UINT32_C(3316484672) }, { { UINT32_C(3092547503), UINT32_C( 615993678) }, { UINT32_C(1889095897), UINT32_C(3316484672) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2x2_t r = simde_vuzp_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r.val[0], simde_vld1_u32(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_u32x2(r.val[1], simde_vld1_u32(test_vec[i].r[1])); } return 0; } static int test_simde_vuzpq_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; simde_float32 b[4]; simde_float32 r[2][4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 336.86), SIMDE_FLOAT32_C( 940.59), SIMDE_FLOAT32_C( 624.20), SIMDE_FLOAT32_C( 198.24) }, { SIMDE_FLOAT32_C( -819.71), SIMDE_FLOAT32_C( 812.42), SIMDE_FLOAT32_C( -979.32), SIMDE_FLOAT32_C( -307.51) }, { { SIMDE_FLOAT32_C( 336.86), SIMDE_FLOAT32_C( 624.20), SIMDE_FLOAT32_C( -819.71), SIMDE_FLOAT32_C( -979.32) }, { SIMDE_FLOAT32_C( 940.59), SIMDE_FLOAT32_C( 198.24), SIMDE_FLOAT32_C( 812.42), SIMDE_FLOAT32_C( -307.51) }, }, }, { { SIMDE_FLOAT32_C( 894.01), SIMDE_FLOAT32_C( -916.74), SIMDE_FLOAT32_C( -332.33), SIMDE_FLOAT32_C( 0.85) }, { SIMDE_FLOAT32_C( 121.08), SIMDE_FLOAT32_C( 820.47), SIMDE_FLOAT32_C( -541.38), SIMDE_FLOAT32_C( 730.98) }, { { SIMDE_FLOAT32_C( 894.01), SIMDE_FLOAT32_C( -332.33), SIMDE_FLOAT32_C( 121.08), SIMDE_FLOAT32_C( -541.38) }, { SIMDE_FLOAT32_C( -916.74), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( 820.47), SIMDE_FLOAT32_C( 730.98) }, }, }, { { SIMDE_FLOAT32_C( -88.10), SIMDE_FLOAT32_C( -685.13), SIMDE_FLOAT32_C( -502.77), SIMDE_FLOAT32_C( -970.47) }, { SIMDE_FLOAT32_C( 803.17), SIMDE_FLOAT32_C( 233.77), SIMDE_FLOAT32_C( 409.34), SIMDE_FLOAT32_C( 846.91) }, { { SIMDE_FLOAT32_C( -88.10), SIMDE_FLOAT32_C( -502.77), SIMDE_FLOAT32_C( 803.17), SIMDE_FLOAT32_C( 409.34) }, { SIMDE_FLOAT32_C( -685.13), SIMDE_FLOAT32_C( -970.47), SIMDE_FLOAT32_C( 233.77), SIMDE_FLOAT32_C( 846.91) }, }, }, { { SIMDE_FLOAT32_C( 785.89), SIMDE_FLOAT32_C( -295.61), SIMDE_FLOAT32_C( -65.85), SIMDE_FLOAT32_C( 708.46) }, { SIMDE_FLOAT32_C( -432.93), SIMDE_FLOAT32_C( 209.38), SIMDE_FLOAT32_C( 884.61), SIMDE_FLOAT32_C( 903.93) }, { { SIMDE_FLOAT32_C( 785.89), SIMDE_FLOAT32_C( -65.85), SIMDE_FLOAT32_C( -432.93), SIMDE_FLOAT32_C( 884.61) }, { SIMDE_FLOAT32_C( -295.61), SIMDE_FLOAT32_C( 708.46), SIMDE_FLOAT32_C( 209.38), SIMDE_FLOAT32_C( 903.93) }, }, }, { { SIMDE_FLOAT32_C( 149.97), SIMDE_FLOAT32_C( 508.80), SIMDE_FLOAT32_C( 102.16), SIMDE_FLOAT32_C( 330.26) }, { SIMDE_FLOAT32_C( 321.22), SIMDE_FLOAT32_C( 122.85), SIMDE_FLOAT32_C( -977.25), SIMDE_FLOAT32_C( 215.24) }, { { SIMDE_FLOAT32_C( 149.97), SIMDE_FLOAT32_C( 102.16), SIMDE_FLOAT32_C( 321.22), SIMDE_FLOAT32_C( -977.25) }, { SIMDE_FLOAT32_C( 508.80), SIMDE_FLOAT32_C( 330.26), SIMDE_FLOAT32_C( 122.85), SIMDE_FLOAT32_C( 215.24) }, }, }, { { SIMDE_FLOAT32_C( 206.11), SIMDE_FLOAT32_C( -309.58), SIMDE_FLOAT32_C( -783.92), SIMDE_FLOAT32_C( -672.81) }, { SIMDE_FLOAT32_C( -489.11), SIMDE_FLOAT32_C( -325.29), SIMDE_FLOAT32_C( -941.84), SIMDE_FLOAT32_C( 422.79) }, { { SIMDE_FLOAT32_C( 206.11), SIMDE_FLOAT32_C( -783.92), SIMDE_FLOAT32_C( -489.11), SIMDE_FLOAT32_C( -941.84) }, { SIMDE_FLOAT32_C( -309.58), SIMDE_FLOAT32_C( -672.81), SIMDE_FLOAT32_C( -325.29), SIMDE_FLOAT32_C( 422.79) }, }, }, { { SIMDE_FLOAT32_C( -10.42), SIMDE_FLOAT32_C( -444.60), SIMDE_FLOAT32_C( 452.32), SIMDE_FLOAT32_C( -207.25) }, { SIMDE_FLOAT32_C( 789.16), SIMDE_FLOAT32_C( -138.34), SIMDE_FLOAT32_C( -360.35), SIMDE_FLOAT32_C( 575.05) }, { { SIMDE_FLOAT32_C( -10.42), SIMDE_FLOAT32_C( 452.32), SIMDE_FLOAT32_C( 789.16), SIMDE_FLOAT32_C( -360.35) }, { SIMDE_FLOAT32_C( -444.60), SIMDE_FLOAT32_C( -207.25), SIMDE_FLOAT32_C( -138.34), SIMDE_FLOAT32_C( 575.05) }, }, }, { { SIMDE_FLOAT32_C( 566.05), SIMDE_FLOAT32_C( 573.80), SIMDE_FLOAT32_C( 283.51), SIMDE_FLOAT32_C( -866.88) }, { SIMDE_FLOAT32_C( -216.82), SIMDE_FLOAT32_C( 168.12), SIMDE_FLOAT32_C( -962.96), SIMDE_FLOAT32_C( 933.15) }, { { SIMDE_FLOAT32_C( 566.05), SIMDE_FLOAT32_C( 283.51), SIMDE_FLOAT32_C( -216.82), SIMDE_FLOAT32_C( -962.96) }, { SIMDE_FLOAT32_C( 573.80), SIMDE_FLOAT32_C( -866.88), SIMDE_FLOAT32_C( 168.12), SIMDE_FLOAT32_C( 933.15) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4x2_t r = simde_vuzpq_f32(a, b); simde_test_arm_neon_assert_equal_f32x4(r.val[0], simde_vld1q_f32(test_vec[i].r[0]), 1); simde_test_arm_neon_assert_equal_f32x4(r.val[1], simde_vld1q_f32(test_vec[i].r[1]), 1); } return 0; } static int test_simde_vuzpq_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int8_t b[16]; int8_t r[2][16]; } test_vec[] = { { { INT8_C( 122), INT8_C( 73), -INT8_C( 37), INT8_C( 66), INT8_C( 123), INT8_C( 122), INT8_C( 37), INT8_C( 13), -INT8_C( 49), -INT8_C( 9), INT8_C( 86), INT8_C( 52), INT8_C( 118), -INT8_C( 30), INT8_C( 83), -INT8_C( 46) }, { -INT8_C( 34), INT8_C( 15), INT8_C( 51), -INT8_C( 94), -INT8_C( 38), INT8_C( 91), INT8_C( 64), -INT8_C( 31), INT8_C( 90), -INT8_C( 46), INT8_C( 96), INT8_C( 12), INT8_C( 83), INT8_C( 13), INT8_C( 24), -INT8_C( 51) }, { { INT8_C( 122), -INT8_C( 37), INT8_C( 123), INT8_C( 37), -INT8_C( 49), INT8_C( 86), INT8_C( 118), INT8_C( 83), -INT8_C( 34), INT8_C( 51), -INT8_C( 38), INT8_C( 64), INT8_C( 90), INT8_C( 96), INT8_C( 83), INT8_C( 24) }, { INT8_C( 73), INT8_C( 66), INT8_C( 122), INT8_C( 13), -INT8_C( 9), INT8_C( 52), -INT8_C( 30), -INT8_C( 46), INT8_C( 15), -INT8_C( 94), INT8_C( 91), -INT8_C( 31), -INT8_C( 46), INT8_C( 12), INT8_C( 13), -INT8_C( 51) }, }, }, { { INT8_C( 87), -INT8_C( 12), INT8_C( 16), -INT8_C( 46), INT8_C( 110), INT8_C( 53), -INT8_C( 33), INT8_C( 61), INT8_C( 44), INT8_C( 53), INT8_C( 113), -INT8_C( 94), INT8_C( 23), -INT8_C( 60), INT8_C( 117), -INT8_C( 10) }, { -INT8_C( 45), -INT8_C( 88), -INT8_C( 104), -INT8_C( 83), INT8_C( 3), -INT8_C( 39), -INT8_C( 113), INT8_C( 93), -INT8_C( 85), -INT8_C( 17), INT8_C( 105), -INT8_C( 1), -INT8_C( 3), -INT8_C( 126), -INT8_C( 52), INT8_C( 84) }, { { INT8_C( 87), INT8_C( 16), INT8_C( 110), -INT8_C( 33), INT8_C( 44), INT8_C( 113), INT8_C( 23), INT8_C( 117), -INT8_C( 45), -INT8_C( 104), INT8_C( 3), -INT8_C( 113), -INT8_C( 85), INT8_C( 105), -INT8_C( 3), -INT8_C( 52) }, { -INT8_C( 12), -INT8_C( 46), INT8_C( 53), INT8_C( 61), INT8_C( 53), -INT8_C( 94), -INT8_C( 60), -INT8_C( 10), -INT8_C( 88), -INT8_C( 83), -INT8_C( 39), INT8_C( 93), -INT8_C( 17), -INT8_C( 1), -INT8_C( 126), INT8_C( 84) }, }, }, { { INT8_C( 118), -INT8_C( 36), INT8_C( 38), -INT8_C( 28), INT8_C( 17), INT8_C( 5), INT8_C( 33), INT8_C( 61), INT8_C( 59), -INT8_C( 110), -INT8_C( 32), INT8_C( 82), INT8_C( 86), INT8_C( 85), INT8_C( 72), INT8_C( 41) }, { -INT8_C( 3), -INT8_C( 31), -INT8_C( 41), INT8_C( 0), -INT8_C( 70), INT8_C( 102), INT8_C( 93), INT8_C( 101), INT8_C( 85), -INT8_C( 57), INT8_C( 100), INT8_C( 82), INT8_C( 73), INT8_C( 49), -INT8_C( 90), -INT8_C( 65) }, { { INT8_C( 118), INT8_C( 38), INT8_C( 17), INT8_C( 33), INT8_C( 59), -INT8_C( 32), INT8_C( 86), INT8_C( 72), -INT8_C( 3), -INT8_C( 41), -INT8_C( 70), INT8_C( 93), INT8_C( 85), INT8_C( 100), INT8_C( 73), -INT8_C( 90) }, { -INT8_C( 36), -INT8_C( 28), INT8_C( 5), INT8_C( 61), -INT8_C( 110), INT8_C( 82), INT8_C( 85), INT8_C( 41), -INT8_C( 31), INT8_C( 0), INT8_C( 102), INT8_C( 101), -INT8_C( 57), INT8_C( 82), INT8_C( 49), -INT8_C( 65) }, }, }, { { INT8_C( 13), -INT8_C( 52), -INT8_C( 93), INT8_C( 31), -INT8_C( 46), -INT8_C( 60), INT8_C( 92), INT8_C( 13), INT8_C( 86), INT8_C( 60), INT8_C( 95), -INT8_C( 84), -INT8_C( 111), -INT8_C( 88), -INT8_C( 43), -INT8_C( 114) }, { -INT8_C( 119), -INT8_C( 84), -INT8_C( 114), INT8_C( 67), INT8_C( 18), -INT8_C( 20), -INT8_C( 88), INT8_C( 104), -INT8_C( 77), INT8_C( 13), -INT8_C( 70), -INT8_C( 4), INT8_C( 62), INT8_C( 97), -INT8_C( 69), INT8_C( 75) }, { { INT8_C( 13), -INT8_C( 93), -INT8_C( 46), INT8_C( 92), INT8_C( 86), INT8_C( 95), -INT8_C( 111), -INT8_C( 43), -INT8_C( 119), -INT8_C( 114), INT8_C( 18), -INT8_C( 88), -INT8_C( 77), -INT8_C( 70), INT8_C( 62), -INT8_C( 69) }, { -INT8_C( 52), INT8_C( 31), -INT8_C( 60), INT8_C( 13), INT8_C( 60), -INT8_C( 84), -INT8_C( 88), -INT8_C( 114), -INT8_C( 84), INT8_C( 67), -INT8_C( 20), INT8_C( 104), INT8_C( 13), -INT8_C( 4), INT8_C( 97), INT8_C( 75) }, }, }, { { INT8_C( 45), INT8_C( 94), INT8_C( 106), -INT8_C( 1), INT8_C( 34), -INT8_C( 57), INT8_C( 12), INT8_C( 120), INT8_C( 3), INT8_C( 108), INT8_C( 36), -INT8_C( 107), INT8_C( 20), -INT8_C( 7), INT8_C( 35), -INT8_C( 99) }, { -INT8_C( 90), -INT8_C( 78), -INT8_C( 32), -INT8_C( 72), -INT8_C( 98), -INT8_C( 120), INT8_C( 32), INT8_C( 81), -INT8_C( 107), -INT8_C( 37), INT8_C( 77), -INT8_C( 45), INT8_C( 60), INT8_C( 8), INT8_C( 31), INT8_C( 105) }, { { INT8_C( 45), INT8_C( 106), INT8_C( 34), INT8_C( 12), INT8_C( 3), INT8_C( 36), INT8_C( 20), INT8_C( 35), -INT8_C( 90), -INT8_C( 32), -INT8_C( 98), INT8_C( 32), -INT8_C( 107), INT8_C( 77), INT8_C( 60), INT8_C( 31) }, { INT8_C( 94), -INT8_C( 1), -INT8_C( 57), INT8_C( 120), INT8_C( 108), -INT8_C( 107), -INT8_C( 7), -INT8_C( 99), -INT8_C( 78), -INT8_C( 72), -INT8_C( 120), INT8_C( 81), -INT8_C( 37), -INT8_C( 45), INT8_C( 8), INT8_C( 105) }, }, }, { { INT8_C( 102), -INT8_C( 119), INT8_C( 105), -INT8_C( 120), INT8_C( 80), INT8_C( 117), INT8_C( 0), INT8_C( 84), -INT8_C( 31), INT8_C( 36), -INT8_C( 23), -INT8_C( 11), INT8_C( 29), INT8_C( 12), -INT8_C( 110), -INT8_C( 61) }, { -INT8_C( 66), INT8_C( 114), INT8_C( 124), INT8_C( 92), -INT8_C( 5), -INT8_C( 100), -INT8_C( 83), -INT8_C( 112), INT8_C( 119), -INT8_C( 6), INT8_C( 100), -INT8_C( 77), INT8_C( 2), -INT8_C( 125), INT8_C( 29), INT8_C( 104) }, { { INT8_C( 102), INT8_C( 105), INT8_C( 80), INT8_C( 0), -INT8_C( 31), -INT8_C( 23), INT8_C( 29), -INT8_C( 110), -INT8_C( 66), INT8_C( 124), -INT8_C( 5), -INT8_C( 83), INT8_C( 119), INT8_C( 100), INT8_C( 2), INT8_C( 29) }, { -INT8_C( 119), -INT8_C( 120), INT8_C( 117), INT8_C( 84), INT8_C( 36), -INT8_C( 11), INT8_C( 12), -INT8_C( 61), INT8_C( 114), INT8_C( 92), -INT8_C( 100), -INT8_C( 112), -INT8_C( 6), -INT8_C( 77), -INT8_C( 125), INT8_C( 104) }, }, }, { { INT8_C( 12), -INT8_C( 122), -INT8_C( 16), INT8_C( 93), -INT8_C( 5), -INT8_C( 16), -INT8_C( 79), -INT8_C( 35), INT8_C( 20), -INT8_C( 102), -INT8_C( 46), INT8_C( 50), -INT8_C( 90), INT8_C( 101), -INT8_C( 11), INT8_C( 101) }, { -INT8_C( 41), INT8_C( 113), -INT8_C( 63), -INT8_C( 46), INT8_C( 14), INT8_C( 111), INT8_C( 99), -INT8_C( 123), INT8_C( 105), -INT8_C( 57), INT8_C( 57), INT8_C( 108), INT8_C( 74), INT8_C( 86), -INT8_C( 44), INT8_C( 86) }, { { INT8_C( 12), -INT8_C( 16), -INT8_C( 5), -INT8_C( 79), INT8_C( 20), -INT8_C( 46), -INT8_C( 90), -INT8_C( 11), -INT8_C( 41), -INT8_C( 63), INT8_C( 14), INT8_C( 99), INT8_C( 105), INT8_C( 57), INT8_C( 74), -INT8_C( 44) }, { -INT8_C( 122), INT8_C( 93), -INT8_C( 16), -INT8_C( 35), -INT8_C( 102), INT8_C( 50), INT8_C( 101), INT8_C( 101), INT8_C( 113), -INT8_C( 46), INT8_C( 111), -INT8_C( 123), -INT8_C( 57), INT8_C( 108), INT8_C( 86), INT8_C( 86) }, }, }, { { -INT8_C( 36), -INT8_C( 59), -INT8_C( 77), -INT8_C( 41), -INT8_C( 75), INT8_C( 100), -INT8_C( 76), -INT8_C( 54), -INT8_C( 2), -INT8_C( 121), -INT8_C( 4), -INT8_C( 91), -INT8_C( 20), -INT8_C( 15), INT8_C( 10), -INT8_C( 61) }, { INT8_C( 99), -INT8_C( 53), -INT8_C( 106), INT8_C( 113), INT8_C( 58), -INT8_C( 7), -INT8_C( 10), -INT8_C( 92), -INT8_C( 64), INT8_C( 47), INT8_C( 16), INT8_C( 10), -INT8_C( 123), -INT8_C( 28), INT8_C( 96), INT8_C( 97) }, { { -INT8_C( 36), -INT8_C( 77), -INT8_C( 75), -INT8_C( 76), -INT8_C( 2), -INT8_C( 4), -INT8_C( 20), INT8_C( 10), INT8_C( 99), -INT8_C( 106), INT8_C( 58), -INT8_C( 10), -INT8_C( 64), INT8_C( 16), -INT8_C( 123), INT8_C( 96) }, { -INT8_C( 59), -INT8_C( 41), INT8_C( 100), -INT8_C( 54), -INT8_C( 121), -INT8_C( 91), -INT8_C( 15), -INT8_C( 61), -INT8_C( 53), INT8_C( 113), -INT8_C( 7), -INT8_C( 92), INT8_C( 47), INT8_C( 10), -INT8_C( 28), INT8_C( 97) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16x2_t r = simde_vuzpq_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r.val[0], simde_vld1q_s8(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_i8x16(r.val[1], simde_vld1q_s8(test_vec[i].r[1])); } return 0; } static int test_simde_vuzpq_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int16_t b[8]; int16_t r[2][8]; } test_vec[] = { { { INT16_C( 18981), -INT16_C( 19032), -INT16_C( 1653), -INT16_C( 25342), -INT16_C( 26301), INT16_C( 658), -INT16_C( 19301), -INT16_C( 11855) }, { -INT16_C( 13642), INT16_C( 19323), -INT16_C( 29207), -INT16_C( 12273), INT16_C( 24472), INT16_C( 16673), -INT16_C( 23665), -INT16_C( 18984) }, { { INT16_C( 18981), -INT16_C( 1653), -INT16_C( 26301), -INT16_C( 19301), -INT16_C( 13642), -INT16_C( 29207), INT16_C( 24472), -INT16_C( 23665) }, { -INT16_C( 19032), -INT16_C( 25342), INT16_C( 658), -INT16_C( 11855), INT16_C( 19323), -INT16_C( 12273), INT16_C( 16673), -INT16_C( 18984) }, }, }, { { -INT16_C( 32275), INT16_C( 30826), INT16_C( 28026), -INT16_C( 17130), -INT16_C( 22522), -INT16_C( 24129), INT16_C( 28765), INT16_C( 4979) }, { -INT16_C( 4550), INT16_C( 9055), INT16_C( 28283), INT16_C( 5364), INT16_C( 5581), INT16_C( 23893), INT16_C( 11960), -INT16_C( 23022) }, { { -INT16_C( 32275), INT16_C( 28026), -INT16_C( 22522), INT16_C( 28765), -INT16_C( 4550), INT16_C( 28283), INT16_C( 5581), INT16_C( 11960) }, { INT16_C( 30826), -INT16_C( 17130), -INT16_C( 24129), INT16_C( 4979), INT16_C( 9055), INT16_C( 5364), INT16_C( 23893), -INT16_C( 23022) }, }, }, { { INT16_C( 31919), INT16_C( 10526), INT16_C( 13545), -INT16_C( 4122), -INT16_C( 23075), INT16_C( 14993), INT16_C( 1046), INT16_C( 20557) }, { -INT16_C( 21262), INT16_C( 28276), INT16_C( 26650), -INT16_C( 6014), -INT16_C( 10371), INT16_C( 13637), INT16_C( 22277), -INT16_C( 19237) }, { { INT16_C( 31919), INT16_C( 13545), -INT16_C( 23075), INT16_C( 1046), -INT16_C( 21262), INT16_C( 26650), -INT16_C( 10371), INT16_C( 22277) }, { INT16_C( 10526), -INT16_C( 4122), INT16_C( 14993), INT16_C( 20557), INT16_C( 28276), -INT16_C( 6014), INT16_C( 13637), -INT16_C( 19237) }, }, }, { { -INT16_C( 1325), -INT16_C( 16931), -INT16_C( 15570), INT16_C( 2988), INT16_C( 15721), INT16_C( 32581), -INT16_C( 27839), INT16_C( 13519) }, { INT16_C( 17215), INT16_C( 23202), INT16_C( 9387), INT16_C( 10306), -INT16_C( 30725), INT16_C( 350), INT16_C( 14814), -INT16_C( 20043) }, { { -INT16_C( 1325), -INT16_C( 15570), INT16_C( 15721), -INT16_C( 27839), INT16_C( 17215), INT16_C( 9387), -INT16_C( 30725), INT16_C( 14814) }, { -INT16_C( 16931), INT16_C( 2988), INT16_C( 32581), INT16_C( 13519), INT16_C( 23202), INT16_C( 10306), INT16_C( 350), -INT16_C( 20043) }, }, }, { { -INT16_C( 27853), INT16_C( 25198), INT16_C( 6998), -INT16_C( 16531), -INT16_C( 19624), -INT16_C( 26050), INT16_C( 3654), -INT16_C( 31282) }, { INT16_C( 28753), -INT16_C( 545), INT16_C( 8596), -INT16_C( 28891), -INT16_C( 31832), -INT16_C( 31088), INT16_C( 18109), -INT16_C( 4040) }, { { -INT16_C( 27853), INT16_C( 6998), -INT16_C( 19624), INT16_C( 3654), INT16_C( 28753), INT16_C( 8596), -INT16_C( 31832), INT16_C( 18109) }, { INT16_C( 25198), -INT16_C( 16531), -INT16_C( 26050), -INT16_C( 31282), -INT16_C( 545), -INT16_C( 28891), -INT16_C( 31088), -INT16_C( 4040) }, }, }, { { -INT16_C( 22823), INT16_C( 12114), -INT16_C( 16191), INT16_C( 6895), INT16_C( 11635), -INT16_C( 17996), -INT16_C( 32197), -INT16_C( 29378) }, { INT16_C( 7922), -INT16_C( 31094), -INT16_C( 20673), -INT16_C( 6123), -INT16_C( 22989), -INT16_C( 3986), -INT16_C( 22804), -INT16_C( 14880) }, { { -INT16_C( 22823), -INT16_C( 16191), INT16_C( 11635), -INT16_C( 32197), INT16_C( 7922), -INT16_C( 20673), -INT16_C( 22989), -INT16_C( 22804) }, { INT16_C( 12114), INT16_C( 6895), -INT16_C( 17996), -INT16_C( 29378), -INT16_C( 31094), -INT16_C( 6123), -INT16_C( 3986), -INT16_C( 14880) }, }, }, { { INT16_C( 13133), INT16_C( 3828), -INT16_C( 7181), INT16_C( 26152), -INT16_C( 9199), INT16_C( 19487), INT16_C( 23902), INT16_C( 20697) }, { INT16_C( 25467), -INT16_C( 17450), -INT16_C( 5101), INT16_C( 18083), INT16_C( 4498), INT16_C( 32310), INT16_C( 5816), INT16_C( 1347) }, { { INT16_C( 13133), -INT16_C( 7181), -INT16_C( 9199), INT16_C( 23902), INT16_C( 25467), -INT16_C( 5101), INT16_C( 4498), INT16_C( 5816) }, { INT16_C( 3828), INT16_C( 26152), INT16_C( 19487), INT16_C( 20697), -INT16_C( 17450), INT16_C( 18083), INT16_C( 32310), INT16_C( 1347) }, }, }, { { INT16_C( 14153), INT16_C( 15379), INT16_C( 15387), INT16_C( 11426), -INT16_C( 16104), INT16_C( 30584), INT16_C( 21023), -INT16_C( 25913) }, { -INT16_C( 24907), -INT16_C( 14251), -INT16_C( 1910), INT16_C( 7182), INT16_C( 17418), -INT16_C( 15718), -INT16_C( 8869), -INT16_C( 23353) }, { { INT16_C( 14153), INT16_C( 15387), -INT16_C( 16104), INT16_C( 21023), -INT16_C( 24907), -INT16_C( 1910), INT16_C( 17418), -INT16_C( 8869) }, { INT16_C( 15379), INT16_C( 11426), INT16_C( 30584), -INT16_C( 25913), -INT16_C( 14251), INT16_C( 7182), -INT16_C( 15718), -INT16_C( 23353) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8x2_t r = simde_vuzpq_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r.val[0], simde_vld1q_s16(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_i16x8(r.val[1], simde_vld1q_s16(test_vec[i].r[1])); } return 0; } static int test_simde_vuzpq_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t b[4]; int32_t r[2][4]; } test_vec[] = { { { INT32_C( 1135740105), INT32_C( 497863278), INT32_C( 193109829), -INT32_C( 1962880757) }, { INT32_C( 2070865737), INT32_C( 111450563), -INT32_C( 1085389284), INT32_C( 1874856614) }, { { INT32_C( 1135740105), INT32_C( 193109829), INT32_C( 2070865737), -INT32_C( 1085389284) }, { INT32_C( 497863278), -INT32_C( 1962880757), INT32_C( 111450563), INT32_C( 1874856614) }, }, }, { { INT32_C( 2108912142), -INT32_C( 2120589508), INT32_C( 160242942), INT32_C( 915705325) }, { INT32_C( 934347380), -INT32_C( 1220717157), INT32_C( 1098288283), -INT32_C( 1531890026) }, { { INT32_C( 2108912142), INT32_C( 160242942), INT32_C( 934347380), INT32_C( 1098288283) }, { -INT32_C( 2120589508), INT32_C( 915705325), -INT32_C( 1220717157), -INT32_C( 1531890026) }, }, }, { { -INT32_C( 450796376), -INT32_C( 1050231869), -INT32_C( 976489513), -INT32_C( 184852608) }, { -INT32_C( 47403678), -INT32_C( 1632278270), -INT32_C( 1948308491), INT32_C( 170954850) }, { { -INT32_C( 450796376), -INT32_C( 976489513), -INT32_C( 47403678), -INT32_C( 1948308491) }, { -INT32_C( 1050231869), -INT32_C( 184852608), -INT32_C( 1632278270), INT32_C( 170954850) }, }, }, { { -INT32_C( 1209052684), -INT32_C( 461810163), -INT32_C( 894876599), INT32_C( 96380323) }, { INT32_C( 1409542738), INT32_C( 1240643668), INT32_C( 1171641059), INT32_C( 1464862050) }, { { -INT32_C( 1209052684), -INT32_C( 894876599), INT32_C( 1409542738), INT32_C( 1171641059) }, { -INT32_C( 461810163), INT32_C( 96380323), INT32_C( 1240643668), INT32_C( 1464862050) }, }, }, { { INT32_C( 1661878102), -INT32_C( 548894827), INT32_C( 1873408459), -INT32_C( 395024490) }, { -INT32_C( 1505921198), INT32_C( 334442287), INT32_C( 1683538945), INT32_C( 549169353) }, { { INT32_C( 1661878102), INT32_C( 1873408459), -INT32_C( 1505921198), INT32_C( 1683538945) }, { -INT32_C( 548894827), -INT32_C( 395024490), INT32_C( 334442287), INT32_C( 549169353) }, }, }, { { INT32_C( 2105788904), INT32_C( 475843409), INT32_C( 1401619901), -INT32_C( 1086586771) }, { -INT32_C( 1486521993), -INT32_C( 1430629208), -INT32_C( 502394343), -INT32_C( 1560098373) }, { { INT32_C( 2105788904), INT32_C( 1401619901), -INT32_C( 1486521993), -INT32_C( 502394343) }, { INT32_C( 475843409), -INT32_C( 1086586771), -INT32_C( 1430629208), -INT32_C( 1560098373) }, }, }, { { -INT32_C( 484407662), INT32_C( 234913105), -INT32_C( 278754430), INT32_C( 61775499) }, { -INT32_C( 1079373033), -INT32_C( 2123799448), INT32_C( 828602230), -INT32_C( 741054912) }, { { -INT32_C( 484407662), -INT32_C( 278754430), -INT32_C( 1079373033), INT32_C( 828602230) }, { INT32_C( 234913105), INT32_C( 61775499), -INT32_C( 2123799448), -INT32_C( 741054912) }, }, }, { { INT32_C( 1035400684), -INT32_C( 196299150), -INT32_C( 840651198), INT32_C( 1674613324) }, { INT32_C( 237140646), INT32_C( 1435471070), INT32_C( 1149694467), INT32_C( 1142381400) }, { { INT32_C( 1035400684), -INT32_C( 840651198), INT32_C( 237140646), INT32_C( 1149694467) }, { -INT32_C( 196299150), INT32_C( 1674613324), INT32_C( 1435471070), INT32_C( 1142381400) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4x2_t r = simde_vuzpq_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r.val[0], simde_vld1q_s32(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_i32x4(r.val[1], simde_vld1q_s32(test_vec[i].r[1])); } return 0; } static int test_simde_vuzpq_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[2][16]; } test_vec[] = { { { UINT8_C( 32), UINT8_C(240), UINT8_C( 75), UINT8_C( 60), UINT8_C(238), UINT8_C(113), UINT8_C(237), UINT8_C( 92), UINT8_C( 44), UINT8_C(211), UINT8_C(242), UINT8_C(151), UINT8_C( 20), UINT8_C( 59), UINT8_C(160), UINT8_C( 43) }, { UINT8_C( 51), UINT8_C( 41), UINT8_C(155), UINT8_C(227), UINT8_C(199), UINT8_C(249), UINT8_C(233), UINT8_C(251), UINT8_C( 68), UINT8_C(216), UINT8_C(183), UINT8_C(229), UINT8_C(161), UINT8_C( 14), UINT8_C(130), UINT8_C(193) }, { { UINT8_C( 32), UINT8_C( 75), UINT8_C(238), UINT8_C(237), UINT8_C( 44), UINT8_C(242), UINT8_C( 20), UINT8_C(160), UINT8_C( 51), UINT8_C(155), UINT8_C(199), UINT8_C(233), UINT8_C( 68), UINT8_C(183), UINT8_C(161), UINT8_C(130) }, { UINT8_C(240), UINT8_C( 60), UINT8_C(113), UINT8_C( 92), UINT8_C(211), UINT8_C(151), UINT8_C( 59), UINT8_C( 43), UINT8_C( 41), UINT8_C(227), UINT8_C(249), UINT8_C(251), UINT8_C(216), UINT8_C(229), UINT8_C( 14), UINT8_C(193) }, }, }, { { UINT8_MAX, UINT8_C(206), UINT8_C(253), UINT8_C(237), UINT8_C( 63), UINT8_C(234), UINT8_C( 73), UINT8_C(107), UINT8_C(189), UINT8_C( 59), UINT8_C( 3), UINT8_C(209), UINT8_C(119), UINT8_C(163), UINT8_C(252), UINT8_C(170) }, { UINT8_C(204), UINT8_C(152), UINT8_C(141), UINT8_C(147), UINT8_C(145), UINT8_C(118), UINT8_C(143), UINT8_C(213), UINT8_C( 79), UINT8_C( 70), UINT8_C(186), UINT8_C(240), UINT8_C( 85), UINT8_C( 60), UINT8_C(177), UINT8_C( 84) }, { { UINT8_MAX, UINT8_C(253), UINT8_C( 63), UINT8_C( 73), UINT8_C(189), UINT8_C( 3), UINT8_C(119), UINT8_C(252), UINT8_C(204), UINT8_C(141), UINT8_C(145), UINT8_C(143), UINT8_C( 79), UINT8_C(186), UINT8_C( 85), UINT8_C(177) }, { UINT8_C(206), UINT8_C(237), UINT8_C(234), UINT8_C(107), UINT8_C( 59), UINT8_C(209), UINT8_C(163), UINT8_C(170), UINT8_C(152), UINT8_C(147), UINT8_C(118), UINT8_C(213), UINT8_C( 70), UINT8_C(240), UINT8_C( 60), UINT8_C( 84) }, }, }, { { UINT8_C( 10), UINT8_C(174), UINT8_C( 65), UINT8_C( 74), UINT8_C(152), UINT8_C(139), UINT8_C(181), UINT8_C( 85), UINT8_C(198), UINT8_C(184), UINT8_C( 38), UINT8_C( 61), UINT8_C( 91), UINT8_C( 34), UINT8_C(231), UINT8_C( 40) }, { UINT8_C(186), UINT8_C(117), UINT8_C(187), UINT8_C( 75), UINT8_C(235), UINT8_C( 74), UINT8_C( 32), UINT8_C( 58), UINT8_C(145), UINT8_C(218), UINT8_C( 42), UINT8_C(230), UINT8_C( 23), UINT8_C(219), UINT8_C( 58), UINT8_C( 33) }, { { UINT8_C( 10), UINT8_C( 65), UINT8_C(152), UINT8_C(181), UINT8_C(198), UINT8_C( 38), UINT8_C( 91), UINT8_C(231), UINT8_C(186), UINT8_C(187), UINT8_C(235), UINT8_C( 32), UINT8_C(145), UINT8_C( 42), UINT8_C( 23), UINT8_C( 58) }, { UINT8_C(174), UINT8_C( 74), UINT8_C(139), UINT8_C( 85), UINT8_C(184), UINT8_C( 61), UINT8_C( 34), UINT8_C( 40), UINT8_C(117), UINT8_C( 75), UINT8_C( 74), UINT8_C( 58), UINT8_C(218), UINT8_C(230), UINT8_C(219), UINT8_C( 33) }, }, }, { { UINT8_C(137), UINT8_C(123), UINT8_C(107), UINT8_C( 33), UINT8_C( 6), UINT8_C( 33), UINT8_C(118), UINT8_C(205), UINT8_C(217), UINT8_C(156), UINT8_C( 10), UINT8_C( 53), UINT8_C(191), UINT8_C(242), UINT8_C( 93), UINT8_C(121) }, { UINT8_C(103), UINT8_C( 24), UINT8_C(197), UINT8_C( 82), UINT8_C( 99), UINT8_C(229), UINT8_C(141), UINT8_C(244), UINT8_C(192), UINT8_C(183), UINT8_C(218), UINT8_C(215), UINT8_C(147), UINT8_C( 20), UINT8_C(248), UINT8_C( 28) }, { { UINT8_C(137), UINT8_C(107), UINT8_C( 6), UINT8_C(118), UINT8_C(217), UINT8_C( 10), UINT8_C(191), UINT8_C( 93), UINT8_C(103), UINT8_C(197), UINT8_C( 99), UINT8_C(141), UINT8_C(192), UINT8_C(218), UINT8_C(147), UINT8_C(248) }, { UINT8_C(123), UINT8_C( 33), UINT8_C( 33), UINT8_C(205), UINT8_C(156), UINT8_C( 53), UINT8_C(242), UINT8_C(121), UINT8_C( 24), UINT8_C( 82), UINT8_C(229), UINT8_C(244), UINT8_C(183), UINT8_C(215), UINT8_C( 20), UINT8_C( 28) }, }, }, { { UINT8_C(143), UINT8_C(100), UINT8_C( 62), UINT8_C(150), UINT8_C(133), UINT8_C(180), UINT8_C( 99), UINT8_C( 94), UINT8_C( 81), UINT8_C(109), UINT8_C(147), UINT8_C( 16), UINT8_C( 95), UINT8_C(240), UINT8_C(137), UINT8_C(198) }, { UINT8_C( 9), UINT8_C( 78), UINT8_C( 25), UINT8_C(108), UINT8_C( 52), UINT8_C(166), UINT8_C( 96), UINT8_C(244), UINT8_C( 93), UINT8_C( 58), UINT8_C(203), UINT8_C(240), UINT8_C( 78), UINT8_C(195), UINT8_C( 13), UINT8_C(221) }, { { UINT8_C(143), UINT8_C( 62), UINT8_C(133), UINT8_C( 99), UINT8_C( 81), UINT8_C(147), UINT8_C( 95), UINT8_C(137), UINT8_C( 9), UINT8_C( 25), UINT8_C( 52), UINT8_C( 96), UINT8_C( 93), UINT8_C(203), UINT8_C( 78), UINT8_C( 13) }, { UINT8_C(100), UINT8_C(150), UINT8_C(180), UINT8_C( 94), UINT8_C(109), UINT8_C( 16), UINT8_C(240), UINT8_C(198), UINT8_C( 78), UINT8_C(108), UINT8_C(166), UINT8_C(244), UINT8_C( 58), UINT8_C(240), UINT8_C(195), UINT8_C(221) }, }, }, { { UINT8_C( 39), UINT8_C( 75), UINT8_C(115), UINT8_C(172), UINT8_MAX, UINT8_C(214), UINT8_C( 11), UINT8_C( 80), UINT8_C( 68), UINT8_C(158), UINT8_C( 96), UINT8_C(163), UINT8_C(143), UINT8_C(234), UINT8_C(106), UINT8_C(152) }, { UINT8_C( 56), UINT8_C(131), UINT8_C( 4), UINT8_C(108), UINT8_C( 41), UINT8_C(100), UINT8_C( 96), UINT8_C(134), UINT8_C(158), UINT8_C( 43), UINT8_C(119), UINT8_C(236), UINT8_C(239), UINT8_C(132), UINT8_C(201), UINT8_C( 22) }, { { UINT8_C( 39), UINT8_C(115), UINT8_MAX, UINT8_C( 11), UINT8_C( 68), UINT8_C( 96), UINT8_C(143), UINT8_C(106), UINT8_C( 56), UINT8_C( 4), UINT8_C( 41), UINT8_C( 96), UINT8_C(158), UINT8_C(119), UINT8_C(239), UINT8_C(201) }, { UINT8_C( 75), UINT8_C(172), UINT8_C(214), UINT8_C( 80), UINT8_C(158), UINT8_C(163), UINT8_C(234), UINT8_C(152), UINT8_C(131), UINT8_C(108), UINT8_C(100), UINT8_C(134), UINT8_C( 43), UINT8_C(236), UINT8_C(132), UINT8_C( 22) }, }, }, { { UINT8_C(207), UINT8_C( 61), UINT8_C(195), UINT8_C(206), UINT8_C( 19), UINT8_C(206), UINT8_C( 31), UINT8_C( 87), UINT8_C(108), UINT8_C(127), UINT8_C(251), UINT8_C(251), UINT8_C(105), UINT8_C(101), UINT8_C(147), UINT8_C(162) }, { UINT8_C(232), UINT8_C(151), UINT8_C( 14), UINT8_C( 17), UINT8_C(251), UINT8_C(111), UINT8_C(151), UINT8_C(153), UINT8_C(154), UINT8_C( 14), UINT8_C(133), UINT8_C(137), UINT8_C(146), UINT8_C( 79), UINT8_C(160), UINT8_C( 97) }, { { UINT8_C(207), UINT8_C(195), UINT8_C( 19), UINT8_C( 31), UINT8_C(108), UINT8_C(251), UINT8_C(105), UINT8_C(147), UINT8_C(232), UINT8_C( 14), UINT8_C(251), UINT8_C(151), UINT8_C(154), UINT8_C(133), UINT8_C(146), UINT8_C(160) }, { UINT8_C( 61), UINT8_C(206), UINT8_C(206), UINT8_C( 87), UINT8_C(127), UINT8_C(251), UINT8_C(101), UINT8_C(162), UINT8_C(151), UINT8_C( 17), UINT8_C(111), UINT8_C(153), UINT8_C( 14), UINT8_C(137), UINT8_C( 79), UINT8_C( 97) }, }, }, { { UINT8_C(140), UINT8_C( 99), UINT8_C( 48), UINT8_C(159), UINT8_C( 49), UINT8_C( 79), UINT8_C(247), UINT8_C(157), UINT8_C(206), UINT8_C(242), UINT8_C(153), UINT8_C( 56), UINT8_C( 87), UINT8_C( 44), UINT8_C(218), UINT8_C( 63) }, { UINT8_C(196), UINT8_C(232), UINT8_C( 80), UINT8_C(191), UINT8_C( 87), UINT8_C(231), UINT8_C( 89), UINT8_C(242), UINT8_C(246), UINT8_C(222), UINT8_C(123), UINT8_C(136), UINT8_C( 45), UINT8_C( 27), UINT8_C(234), UINT8_C(185) }, { { UINT8_C(140), UINT8_C( 48), UINT8_C( 49), UINT8_C(247), UINT8_C(206), UINT8_C(153), UINT8_C( 87), UINT8_C(218), UINT8_C(196), UINT8_C( 80), UINT8_C( 87), UINT8_C( 89), UINT8_C(246), UINT8_C(123), UINT8_C( 45), UINT8_C(234) }, { UINT8_C( 99), UINT8_C(159), UINT8_C( 79), UINT8_C(157), UINT8_C(242), UINT8_C( 56), UINT8_C( 44), UINT8_C( 63), UINT8_C(232), UINT8_C(191), UINT8_C(231), UINT8_C(242), UINT8_C(222), UINT8_C(136), UINT8_C( 27), UINT8_C(185) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16x2_t r = simde_vuzpq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r.val[0], simde_vld1q_u8(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_u8x16(r.val[1], simde_vld1q_u8(test_vec[i].r[1])); } return 0; } static int test_simde_vuzpq_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[2][8]; } test_vec[] = { { { UINT16_C(15666), UINT16_C( 9230), UINT16_C( 9080), UINT16_C(20406), UINT16_C(26210), UINT16_C( 1132), UINT16_C(52811), UINT16_C(62092) }, { UINT16_C( 2971), UINT16_C(24403), UINT16_C(65053), UINT16_C(13489), UINT16_C(33869), UINT16_C( 3101), UINT16_C(43650), UINT16_C(46245) }, { { UINT16_C(15666), UINT16_C( 9080), UINT16_C(26210), UINT16_C(52811), UINT16_C( 2971), UINT16_C(65053), UINT16_C(33869), UINT16_C(43650) }, { UINT16_C( 9230), UINT16_C(20406), UINT16_C( 1132), UINT16_C(62092), UINT16_C(24403), UINT16_C(13489), UINT16_C( 3101), UINT16_C(46245) }, }, }, { { UINT16_C(46055), UINT16_C(24536), UINT16_C(36567), UINT16_C(14766), UINT16_C( 6900), UINT16_C(16189), UINT16_C(51689), UINT16_C(33841) }, { UINT16_C(34260), UINT16_C(61923), UINT16_C(38019), UINT16_C(53285), UINT16_C(16920), UINT16_C(39900), UINT16_C(33261), UINT16_C(54351) }, { { UINT16_C(46055), UINT16_C(36567), UINT16_C( 6900), UINT16_C(51689), UINT16_C(34260), UINT16_C(38019), UINT16_C(16920), UINT16_C(33261) }, { UINT16_C(24536), UINT16_C(14766), UINT16_C(16189), UINT16_C(33841), UINT16_C(61923), UINT16_C(53285), UINT16_C(39900), UINT16_C(54351) }, }, }, { { UINT16_C(10293), UINT16_C( 3124), UINT16_C(58038), UINT16_C(43845), UINT16_C(33533), UINT16_C(59114), UINT16_C( 7243), UINT16_C( 8298) }, { UINT16_C(19873), UINT16_C( 9233), UINT16_C(14305), UINT16_C(64244), UINT16_C(53625), UINT16_C(26261), UINT16_C(58450), UINT16_C(34619) }, { { UINT16_C(10293), UINT16_C(58038), UINT16_C(33533), UINT16_C( 7243), UINT16_C(19873), UINT16_C(14305), UINT16_C(53625), UINT16_C(58450) }, { UINT16_C( 3124), UINT16_C(43845), UINT16_C(59114), UINT16_C( 8298), UINT16_C( 9233), UINT16_C(64244), UINT16_C(26261), UINT16_C(34619) }, }, }, { { UINT16_C(28428), UINT16_C(50067), UINT16_C(55377), UINT16_C(20078), UINT16_C(22618), UINT16_C(42548), UINT16_C(40564), UINT16_C( 5574) }, { UINT16_C(55275), UINT16_C(52537), UINT16_C(11790), UINT16_C(35015), UINT16_C(23807), UINT16_C(20974), UINT16_C(10560), UINT16_C(19929) }, { { UINT16_C(28428), UINT16_C(55377), UINT16_C(22618), UINT16_C(40564), UINT16_C(55275), UINT16_C(11790), UINT16_C(23807), UINT16_C(10560) }, { UINT16_C(50067), UINT16_C(20078), UINT16_C(42548), UINT16_C( 5574), UINT16_C(52537), UINT16_C(35015), UINT16_C(20974), UINT16_C(19929) }, }, }, { { UINT16_C(27800), UINT16_C(59920), UINT16_C(32325), UINT16_C(40760), UINT16_C(28118), UINT16_C(19269), UINT16_C( 2827), UINT16_C(63328) }, { UINT16_C(39651), UINT16_C(61892), UINT16_C(35784), UINT16_C(51065), UINT16_C(26855), UINT16_C(10008), UINT16_C(61841), UINT16_C(10868) }, { { UINT16_C(27800), UINT16_C(32325), UINT16_C(28118), UINT16_C( 2827), UINT16_C(39651), UINT16_C(35784), UINT16_C(26855), UINT16_C(61841) }, { UINT16_C(59920), UINT16_C(40760), UINT16_C(19269), UINT16_C(63328), UINT16_C(61892), UINT16_C(51065), UINT16_C(10008), UINT16_C(10868) }, }, }, { { UINT16_C(33886), UINT16_C(41748), UINT16_C(19458), UINT16_C(55618), UINT16_C(35001), UINT16_C(50468), UINT16_C(33939), UINT16_C(30396) }, { UINT16_C(32798), UINT16_C(58984), UINT16_C(57611), UINT16_C(62125), UINT16_C(50761), UINT16_C(56089), UINT16_C(36535), UINT16_C( 5381) }, { { UINT16_C(33886), UINT16_C(19458), UINT16_C(35001), UINT16_C(33939), UINT16_C(32798), UINT16_C(57611), UINT16_C(50761), UINT16_C(36535) }, { UINT16_C(41748), UINT16_C(55618), UINT16_C(50468), UINT16_C(30396), UINT16_C(58984), UINT16_C(62125), UINT16_C(56089), UINT16_C( 5381) }, }, }, { { UINT16_C( 6418), UINT16_C( 5560), UINT16_C(64357), UINT16_C( 8174), UINT16_C( 4739), UINT16_C( 5860), UINT16_C(41110), UINT16_C(46477) }, { UINT16_C(62752), UINT16_C(11163), UINT16_C(18902), UINT16_C( 8221), UINT16_C(13839), UINT16_C(50939), UINT16_C( 196), UINT16_C(55260) }, { { UINT16_C( 6418), UINT16_C(64357), UINT16_C( 4739), UINT16_C(41110), UINT16_C(62752), UINT16_C(18902), UINT16_C(13839), UINT16_C( 196) }, { UINT16_C( 5560), UINT16_C( 8174), UINT16_C( 5860), UINT16_C(46477), UINT16_C(11163), UINT16_C( 8221), UINT16_C(50939), UINT16_C(55260) }, }, }, { { UINT16_C(37913), UINT16_C(32492), UINT16_C(55951), UINT16_C( 4765), UINT16_C(33260), UINT16_C(33321), UINT16_C(46625), UINT16_C(16695) }, { UINT16_C(54187), UINT16_C(33132), UINT16_C(35100), UINT16_C(11169), UINT16_C(40128), UINT16_C(34033), UINT16_C(52636), UINT16_C(46427) }, { { UINT16_C(37913), UINT16_C(55951), UINT16_C(33260), UINT16_C(46625), UINT16_C(54187), UINT16_C(35100), UINT16_C(40128), UINT16_C(52636) }, { UINT16_C(32492), UINT16_C( 4765), UINT16_C(33321), UINT16_C(16695), UINT16_C(33132), UINT16_C(11169), UINT16_C(34033), UINT16_C(46427) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8x2_t r = simde_vuzpq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r.val[0], simde_vld1q_u16(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_u16x8(r.val[1], simde_vld1q_u16(test_vec[i].r[1])); } return 0; } static int test_simde_vuzpq_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[2][4]; } test_vec[] = { { { UINT32_C(1622875691), UINT32_C( 117780204), UINT32_C(2996661462), UINT32_C( 959522199) }, { UINT32_C(2276532505), UINT32_C(2007160179), UINT32_C(2433206818), UINT32_C(3600242091) }, { { UINT32_C(1622875691), UINT32_C(2996661462), UINT32_C(2276532505), UINT32_C(2433206818) }, { UINT32_C( 117780204), UINT32_C( 959522199), UINT32_C(2007160179), UINT32_C(3600242091) }, }, }, { { UINT32_C(1597461107), UINT32_C(1466317953), UINT32_C(1057555368), UINT32_C(1115175465) }, { UINT32_C(3469289819), UINT32_C( 558263295), UINT32_C(3786558774), UINT32_C( 364333730) }, { { UINT32_C(1597461107), UINT32_C(1057555368), UINT32_C(3469289819), UINT32_C(3786558774) }, { UINT32_C(1466317953), UINT32_C(1115175465), UINT32_C( 558263295), UINT32_C( 364333730) }, }, }, { { UINT32_C( 494202524), UINT32_C(3530873642), UINT32_C( 118652638), UINT32_C( 340429496) }, { UINT32_C(3017937844), UINT32_C(3050580095), UINT32_C( 395741045), UINT32_C(1831620049) }, { { UINT32_C( 494202524), UINT32_C( 118652638), UINT32_C(3017937844), UINT32_C( 395741045) }, { UINT32_C(3530873642), UINT32_C( 340429496), UINT32_C(3050580095), UINT32_C(1831620049) }, }, }, { { UINT32_C(1720426812), UINT32_C(1513750396), UINT32_C( 912411517), UINT32_C(2303372501) }, { UINT32_C(1044131007), UINT32_C(3404927317), UINT32_C(1776454040), UINT32_C( 332795607) }, { { UINT32_C(1720426812), UINT32_C( 912411517), UINT32_C(1044131007), UINT32_C(1776454040) }, { UINT32_C(1513750396), UINT32_C(2303372501), UINT32_C(3404927317), UINT32_C( 332795607) }, }, }, { { UINT32_C( 729375151), UINT32_C(3733369441), UINT32_C(3541362941), UINT32_C(1398562452) }, { UINT32_C(3767703947), UINT32_C(1118471594), UINT32_C(3870002191), UINT32_C(1257865627) }, { { UINT32_C( 729375151), UINT32_C(3541362941), UINT32_C(3767703947), UINT32_C(3870002191) }, { UINT32_C(3733369441), UINT32_C(1398562452), UINT32_C(1118471594), UINT32_C(1257865627) }, }, }, { { UINT32_C(1148613347), UINT32_C( 572718117), UINT32_C(2029336548), UINT32_C( 550195861) }, { UINT32_C(2499829227), UINT32_C(4074220515), UINT32_C(3537404471), UINT32_C(3860713731) }, { { UINT32_C(1148613347), UINT32_C(2029336548), UINT32_C(2499829227), UINT32_C(3537404471) }, { UINT32_C( 572718117), UINT32_C( 550195861), UINT32_C(4074220515), UINT32_C(3860713731) }, }, }, { { UINT32_C(1747620675), UINT32_C(1938509199), UINT32_C( 434864260), UINT32_C(3174741714) }, { UINT32_C(4149361172), UINT32_C( 501819877), UINT32_C(2951725483), UINT32_C(3583315090) }, { { UINT32_C(1747620675), UINT32_C( 434864260), UINT32_C(4149361172), UINT32_C(2951725483) }, { UINT32_C(1938509199), UINT32_C(3174741714), UINT32_C( 501819877), UINT32_C(3583315090) }, }, }, { { UINT32_C( 775864479), UINT32_C(2443299085), UINT32_C( 480939081), UINT32_C(1473897539) }, { UINT32_C( 72231967), UINT32_C( 18954069), UINT32_C(2326794744), UINT32_C(3177137437) }, { { UINT32_C( 775864479), UINT32_C( 480939081), UINT32_C( 72231967), UINT32_C(2326794744) }, { UINT32_C(2443299085), UINT32_C(1473897539), UINT32_C( 18954069), UINT32_C(3177137437) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4x2_t r = simde_vuzpq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r.val[0], simde_vld1q_u32(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_u32x4(r.val[1], simde_vld1q_u32(test_vec[i].r[1])); } return 0; } #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_TEST_FUNC_LIST_BEGIN #if !defined(SIMDE_BUG_INTEL_857088) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vuzpq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vuzpq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vuzpq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vuzpq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vuzpq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vuzpq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vuzpq_u32) #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/uzp1.c000066400000000000000000001546271400333146700164010ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN uzp1 #include "test-neon.h" #include "../../../simde/arm/neon/uzp1.h" static int test_simde_vuzp1_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[2]; simde_float32 b[2]; simde_float32 r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( 188.14), SIMDE_FLOAT32_C( -87.23) }, { SIMDE_FLOAT32_C( -966.94), SIMDE_FLOAT32_C( -341.57) }, { SIMDE_FLOAT32_C( 188.14), SIMDE_FLOAT32_C( -966.94) } }, { { SIMDE_FLOAT32_C( 682.15), SIMDE_FLOAT32_C( -964.96) }, { SIMDE_FLOAT32_C( 256.02), SIMDE_FLOAT32_C( 570.89) }, { SIMDE_FLOAT32_C( 682.15), SIMDE_FLOAT32_C( 256.02) } }, { { SIMDE_FLOAT32_C( -610.16), SIMDE_FLOAT32_C( -498.24) }, { SIMDE_FLOAT32_C( -566.43), SIMDE_FLOAT32_C( -742.76) }, { SIMDE_FLOAT32_C( -610.16), SIMDE_FLOAT32_C( -566.43) } }, { { SIMDE_FLOAT32_C( 804.69), SIMDE_FLOAT32_C( -146.31) }, { SIMDE_FLOAT32_C( -114.26), SIMDE_FLOAT32_C( 461.82) }, { SIMDE_FLOAT32_C( 804.69), SIMDE_FLOAT32_C( -114.26) } }, { { SIMDE_FLOAT32_C( -520.26), SIMDE_FLOAT32_C( 290.59) }, { SIMDE_FLOAT32_C( 275.50), SIMDE_FLOAT32_C( -573.98) }, { SIMDE_FLOAT32_C( -520.26), SIMDE_FLOAT32_C( 275.50) } }, { { SIMDE_FLOAT32_C( 376.60), SIMDE_FLOAT32_C( 324.13) }, { SIMDE_FLOAT32_C( 478.86), SIMDE_FLOAT32_C( 385.76) }, { SIMDE_FLOAT32_C( 376.60), SIMDE_FLOAT32_C( 478.86) } }, { { SIMDE_FLOAT32_C( -299.73), SIMDE_FLOAT32_C( 292.94) }, { SIMDE_FLOAT32_C( -120.55), SIMDE_FLOAT32_C( 448.35) }, { SIMDE_FLOAT32_C( -299.73), SIMDE_FLOAT32_C( -120.55) } }, { { SIMDE_FLOAT32_C( 411.84), SIMDE_FLOAT32_C( 757.01) }, { SIMDE_FLOAT32_C( -355.19), SIMDE_FLOAT32_C( -400.02) }, { SIMDE_FLOAT32_C( 411.84), SIMDE_FLOAT32_C( -355.19) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a, b, r; a = simde_vld1_f32(test_vec[i].a); b = simde_vld1_f32(test_vec[i].b); r = simde_vuzp1_f32(a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vuzp1_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int8_t b[8]; int8_t r[8]; } test_vec[] = { { { -INT8_C( 118), -INT8_C( 36), INT8_C( 91), -INT8_C( 109), -INT8_C( 14), INT8_C( 74), -INT8_C( 81), INT8_C( 58) }, { -INT8_C( 10), INT8_MIN, -INT8_C( 52), -INT8_C( 53), -INT8_C( 80), INT8_C( 35), -INT8_C( 119), INT8_C( 69) }, { -INT8_C( 118), INT8_C( 91), -INT8_C( 14), -INT8_C( 81), -INT8_C( 10), -INT8_C( 52), -INT8_C( 80), -INT8_C( 119) } }, { { -INT8_C( 37), INT8_C( 112), -INT8_C( 36), INT8_C( 99), INT8_C( 49), -INT8_C( 105), INT8_C( 96), INT8_C( 97) }, { INT8_C( 79), -INT8_C( 113), -INT8_C( 17), INT8_C( 16), -INT8_C( 104), INT8_C( 44), -INT8_C( 121), INT8_C( 35) }, { -INT8_C( 37), -INT8_C( 36), INT8_C( 49), INT8_C( 96), INT8_C( 79), -INT8_C( 17), -INT8_C( 104), -INT8_C( 121) } }, { { INT8_C( 8), -INT8_C( 30), -INT8_C( 74), -INT8_C( 5), INT8_C( 45), INT8_C( 101), INT8_C( 53), INT8_C( 35) }, { -INT8_C( 27), INT8_C( 1), -INT8_C( 18), -INT8_C( 107), INT8_C( 36), INT8_C( 119), -INT8_C( 37), -INT8_C( 1) }, { INT8_C( 8), -INT8_C( 74), INT8_C( 45), INT8_C( 53), -INT8_C( 27), -INT8_C( 18), INT8_C( 36), -INT8_C( 37) } }, { { -INT8_C( 25), -INT8_C( 73), INT8_C( 99), INT8_C( 25), INT8_C( 79), -INT8_C( 61), INT8_C( 122), -INT8_C( 98) }, { INT8_C( 83), INT8_C( 106), -INT8_C( 82), -INT8_C( 21), -INT8_C( 106), INT8_C( 53), INT8_C( 14), -INT8_C( 98) }, { -INT8_C( 25), INT8_C( 99), INT8_C( 79), INT8_C( 122), INT8_C( 83), -INT8_C( 82), -INT8_C( 106), INT8_C( 14) } }, { { INT8_C( 24), -INT8_C( 60), -INT8_C( 103), INT8_C( 69), INT8_C( 41), -INT8_C( 50), INT8_C( 104), INT8_C( 15) }, { -INT8_C( 49), INT8_C( 86), -INT8_C( 92), -INT8_C( 12), -INT8_C( 50), INT8_MAX, -INT8_C( 13), -INT8_C( 75) }, { INT8_C( 24), -INT8_C( 103), INT8_C( 41), INT8_C( 104), -INT8_C( 49), -INT8_C( 92), -INT8_C( 50), -INT8_C( 13) } }, { { INT8_C( 55), INT8_C( 86), -INT8_C( 50), -INT8_C( 122), INT8_C( 26), INT8_C( 73), INT8_C( 36), INT8_C( 109) }, { -INT8_C( 77), -INT8_C( 46), INT8_C( 88), INT8_C( 73), INT8_C( 7), INT8_C( 103), -INT8_C( 25), INT8_C( 31) }, { INT8_C( 55), -INT8_C( 50), INT8_C( 26), INT8_C( 36), -INT8_C( 77), INT8_C( 88), INT8_C( 7), -INT8_C( 25) } }, { { INT8_C( 43), -INT8_C( 127), INT8_C( 100), INT8_C( 85), INT8_C( 79), -INT8_C( 52), INT8_C( 100), INT8_C( 31) }, { INT8_C( 35), INT8_C( 8), INT8_C( 19), -INT8_C( 15), -INT8_C( 120), INT8_C( 6), -INT8_C( 90), -INT8_C( 65) }, { INT8_C( 43), INT8_C( 100), INT8_C( 79), INT8_C( 100), INT8_C( 35), INT8_C( 19), -INT8_C( 120), -INT8_C( 90) } }, { { INT8_C( 93), INT8_C( 117), INT8_C( 69), INT8_C( 119), -INT8_C( 66), INT8_C( 105), -INT8_C( 28), INT8_C( 113) }, { INT8_C( 59), INT8_C( 60), -INT8_C( 70), INT8_C( 66), -INT8_C( 93), -INT8_C( 95), INT8_C( 98), -INT8_C( 49) }, { INT8_C( 93), INT8_C( 69), -INT8_C( 66), -INT8_C( 28), INT8_C( 59), -INT8_C( 70), -INT8_C( 93), INT8_C( 98) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vuzp1_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; } static int test_simde_vuzp1_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { { { -INT16_C( 4834), -INT16_C( 29643), -INT16_C( 562), INT16_C( 6247) }, { -INT16_C( 31539), -INT16_C( 23021), -INT16_C( 19325), INT16_C( 7646) }, { -INT16_C( 4834), -INT16_C( 562), -INT16_C( 31539), -INT16_C( 19325) } }, { { -INT16_C( 24760), -INT16_C( 30696), -INT16_C( 19537), INT16_C( 32224) }, { INT16_C( 30005), -INT16_C( 11182), -INT16_C( 21755), INT16_C( 9051) }, { -INT16_C( 24760), -INT16_C( 19537), INT16_C( 30005), -INT16_C( 21755) } }, { { -INT16_C( 28520), INT16_C( 26287), INT16_C( 5773), INT16_C( 23422) }, { -INT16_C( 28262), INT16_C( 7681), -INT16_C( 8379), -INT16_C( 29381) }, { -INT16_C( 28520), INT16_C( 5773), -INT16_C( 28262), -INT16_C( 8379) } }, { { INT16_C( 21375), INT16_C( 11798), -INT16_C( 2553), INT16_C( 15531) }, { -INT16_C( 404), INT16_C( 28944), INT16_C( 27561), INT16_C( 16788) }, { INT16_C( 21375), -INT16_C( 2553), -INT16_C( 404), INT16_C( 27561) } }, { { INT16_C( 17404), -INT16_C( 30297), INT16_C( 9561), -INT16_C( 3100) }, { -INT16_C( 6474), -INT16_C( 1263), INT16_C( 19909), INT16_C( 17544) }, { INT16_C( 17404), INT16_C( 9561), -INT16_C( 6474), INT16_C( 19909) } }, { { -INT16_C( 24928), -INT16_C( 22670), INT16_C( 7829), INT16_C( 484) }, { -INT16_C( 3044), -INT16_C( 14990), INT16_C( 1632), INT16_C( 23558) }, { -INT16_C( 24928), INT16_C( 7829), -INT16_C( 3044), INT16_C( 1632) } }, { { -INT16_C( 21175), -INT16_C( 23835), -INT16_C( 13614), -INT16_C( 30571) }, { -INT16_C( 22608), INT16_C( 30083), INT16_C( 3060), -INT16_C( 27462) }, { -INT16_C( 21175), -INT16_C( 13614), -INT16_C( 22608), INT16_C( 3060) } }, { { INT16_C( 11434), INT16_C( 16188), INT16_C( 8266), INT16_C( 26176) }, { -INT16_C( 19948), INT16_C( 29739), INT16_C( 12728), INT16_C( 464) }, { INT16_C( 11434), INT16_C( 8266), -INT16_C( 19948), INT16_C( 12728) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_vuzp1_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; } static int test_simde_vuzp1_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { { { -INT32_C( 1869944670), -INT32_C( 1067514177) }, { -INT32_C( 648958507), INT32_C( 857291726) }, { -INT32_C( 1869944670), -INT32_C( 648958507) } }, { { INT32_C( 1021903255), INT32_C( 2109018270) }, { -INT32_C( 1188524118), -INT32_C( 636618440) }, { INT32_C( 1021903255), -INT32_C( 1188524118) } }, { { -INT32_C( 1603627039), INT32_C( 1918945949) }, { INT32_C( 1162588535), -INT32_C( 2072484371) }, { -INT32_C( 1603627039), INT32_C( 1162588535) } }, { { INT32_C( 96493926), INT32_C( 478311794) }, { INT32_C( 1020635652), -INT32_C( 2062032221) }, { INT32_C( 96493926), INT32_C( 1020635652) } }, { { INT32_C( 371556729), -INT32_C( 1031240117) }, { INT32_C( 604492855), -INT32_C( 1616347335) }, { INT32_C( 371556729), INT32_C( 604492855) } }, { { INT32_C( 1403283681), -INT32_C( 496032034) }, { INT32_C( 1948206289), -INT32_C( 1611057626) }, { INT32_C( 1403283681), INT32_C( 1948206289) } }, { { INT32_C( 62201783), -INT32_C( 591053403) }, { INT32_C( 1241632017), INT32_C( 770353484) }, { INT32_C( 62201783), INT32_C( 1241632017) } }, { { -INT32_C( 260010478), -INT32_C( 2032996427) }, { INT32_C( 1509617971), -INT32_C( 537332697) }, { -INT32_C( 260010478), INT32_C( 1509617971) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_vuzp1_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; } static int test_simde_vuzp1_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; uint8_t b[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C(224), UINT8_C(140), UINT8_C(245), UINT8_C( 77), UINT8_C(190), UINT8_C( 25), UINT8_C(105), UINT8_C(180) }, { UINT8_C( 70), UINT8_C( 19), UINT8_C(231), UINT8_C(216), UINT8_C(120), UINT8_C( 45), UINT8_C(243), UINT8_C(217) }, { UINT8_C(224), UINT8_C(245), UINT8_C(190), UINT8_C(105), UINT8_C( 70), UINT8_C(231), UINT8_C(120), UINT8_C(243) } }, { { UINT8_C( 46), UINT8_C(188), UINT8_C(216), UINT8_C( 61), UINT8_C( 60), UINT8_C(108), UINT8_C(192), UINT8_C(214) }, { UINT8_C(216), UINT8_C( 93), UINT8_C( 95), UINT8_C( 31), UINT8_C( 37), UINT8_C(155), UINT8_C(131), UINT8_C( 6) }, { UINT8_C( 46), UINT8_C(216), UINT8_C( 60), UINT8_C(192), UINT8_C(216), UINT8_C( 95), UINT8_C( 37), UINT8_C(131) } }, { { UINT8_C( 39), UINT8_C(120), UINT8_C( 83), UINT8_C(229), UINT8_C(145), UINT8_C(188), UINT8_C(154), UINT8_C(216) }, { UINT8_C(207), UINT8_C(129), UINT8_C(176), UINT8_C( 71), UINT8_C(174), UINT8_C(164), UINT8_C( 32), UINT8_C(220) }, { UINT8_C( 39), UINT8_C( 83), UINT8_C(145), UINT8_C(154), UINT8_C(207), UINT8_C(176), UINT8_C(174), UINT8_C( 32) } }, { { UINT8_C( 96), UINT8_C(248), UINT8_C( 25), UINT8_C(157), UINT8_C(100), UINT8_C(217), UINT8_C(115), UINT8_C( 60) }, { UINT8_C( 54), UINT8_C(210), UINT8_C( 92), UINT8_C( 92), UINT8_C(109), UINT8_C(223), UINT8_C( 98), UINT8_C(149) }, { UINT8_C( 96), UINT8_C( 25), UINT8_C(100), UINT8_C(115), UINT8_C( 54), UINT8_C( 92), UINT8_C(109), UINT8_C( 98) } }, { { UINT8_C( 87), UINT8_C(181), UINT8_C(122), UINT8_C(232), UINT8_C(113), UINT8_C( 20), UINT8_C(192), UINT8_C( 64) }, { UINT8_C(149), UINT8_C(113), UINT8_C(135), UINT8_C( 68), UINT8_C( 21), UINT8_C(167), UINT8_C( 32), UINT8_C(117) }, { UINT8_C( 87), UINT8_C(122), UINT8_C(113), UINT8_C(192), UINT8_C(149), UINT8_C(135), UINT8_C( 21), UINT8_C( 32) } }, { { UINT8_C(159), UINT8_C( 58), UINT8_C( 18), UINT8_C( 3), UINT8_C( 19), UINT8_C(133), UINT8_C( 63), UINT8_C( 74) }, { UINT8_C( 88), UINT8_C(155), UINT8_C(166), UINT8_C(197), UINT8_C(122), UINT8_C( 8), UINT8_C( 90), UINT8_C(209) }, { UINT8_C(159), UINT8_C( 18), UINT8_C( 19), UINT8_C( 63), UINT8_C( 88), UINT8_C(166), UINT8_C(122), UINT8_C( 90) } }, { { UINT8_C(189), UINT8_C(213), UINT8_C(186), UINT8_C( 46), UINT8_C(233), UINT8_C(122), UINT8_C(110), UINT8_C(127) }, { UINT8_C(235), UINT8_C(245), UINT8_C(195), UINT8_C( 0), UINT8_C(156), UINT8_C(227), UINT8_C(118), UINT8_C( 59) }, { UINT8_C(189), UINT8_C(186), UINT8_C(233), UINT8_C(110), UINT8_C(235), UINT8_C(195), UINT8_C(156), UINT8_C(118) } }, { { UINT8_C( 29), UINT8_C(136), UINT8_C( 62), UINT8_C( 49), UINT8_C( 14), UINT8_C(125), UINT8_C(123), UINT8_C(102) }, { UINT8_C( 25), UINT8_C( 33), UINT8_C( 43), UINT8_C(147), UINT8_C( 41), UINT8_C(134), UINT8_C(101), UINT8_C(230) }, { UINT8_C( 29), UINT8_C( 62), UINT8_C( 14), UINT8_C(123), UINT8_C( 25), UINT8_C( 43), UINT8_C( 41), UINT8_C(101) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vuzp1_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; } static int test_simde_vuzp1_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(38227), UINT16_C(36183), UINT16_C(19486), UINT16_C(49723) }, { UINT16_C(36067), UINT16_C(18109), UINT16_C( 2364), UINT16_C(26479) }, { UINT16_C(38227), UINT16_C(19486), UINT16_C(36067), UINT16_C( 2364) } }, { { UINT16_C(43498), UINT16_C(28068), UINT16_C(33746), UINT16_C(12344) }, { UINT16_C(23108), UINT16_C( 9831), UINT16_C(27483), UINT16_C(44840) }, { UINT16_C(43498), UINT16_C(33746), UINT16_C(23108), UINT16_C(27483) } }, { { UINT16_C(32512), UINT16_C( 7740), UINT16_C(30668), UINT16_C(45024) }, { UINT16_C(40195), UINT16_C(16629), UINT16_C(26022), UINT16_C(37031) }, { UINT16_C(32512), UINT16_C(30668), UINT16_C(40195), UINT16_C(26022) } }, { { UINT16_C(19470), UINT16_C(57598), UINT16_C(14031), UINT16_C( 4881) }, { UINT16_C(30864), UINT16_C(60217), UINT16_C(25059), UINT16_C(58266) }, { UINT16_C(19470), UINT16_C(14031), UINT16_C(30864), UINT16_C(25059) } }, { { UINT16_C(55265), UINT16_C(44289), UINT16_C(57934), UINT16_C(21084) }, { UINT16_C(20863), UINT16_C( 9874), UINT16_C(14774), UINT16_C(50358) }, { UINT16_C(55265), UINT16_C(57934), UINT16_C(20863), UINT16_C(14774) } }, { { UINT16_C(46213), UINT16_C(21669), UINT16_C(46826), UINT16_C(31335) }, { UINT16_C(41262), UINT16_C( 4454), UINT16_C( 2), UINT16_C(58356) }, { UINT16_C(46213), UINT16_C(46826), UINT16_C(41262), UINT16_C( 2) } }, { { UINT16_C(62935), UINT16_C( 9872), UINT16_C(60631), UINT16_C(22392) }, { UINT16_C( 2622), UINT16_C(62589), UINT16_C(13123), UINT16_C(51641) }, { UINT16_C(62935), UINT16_C(60631), UINT16_C( 2622), UINT16_C(13123) } }, { { UINT16_C(24296), UINT16_C(53789), UINT16_C(34068), UINT16_C(16973) }, { UINT16_C(45862), UINT16_C(10323), UINT16_C(18355), UINT16_C(35596) }, { UINT16_C(24296), UINT16_C(34068), UINT16_C(45862), UINT16_C(18355) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vuzp1_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; } static int test_simde_vuzp1_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C(1701003685), UINT32_C(3515233496) }, { UINT32_C(2993134067), UINT32_C(4134691062) }, { UINT32_C(1701003685), UINT32_C(2993134067) } }, { { UINT32_C(2824142393), UINT32_C(2314310222) }, { UINT32_C(3987908417), UINT32_C(2842669572) }, { UINT32_C(2824142393), UINT32_C(3987908417) } }, { { UINT32_C(3406811891), UINT32_C(4204565766) }, { UINT32_C( 615252782), UINT32_C(2619022947) }, { UINT32_C(3406811891), UINT32_C( 615252782) } }, { { UINT32_C(1782935324), UINT32_C(1123235329) }, { UINT32_C(3794839006), UINT32_C(1250729815) }, { UINT32_C(1782935324), UINT32_C(3794839006) } }, { { UINT32_C(2014681970), UINT32_C(1601352240) }, { UINT32_C( 428023477), UINT32_C(1488297532) }, { UINT32_C(2014681970), UINT32_C( 428023477) } }, { { UINT32_C( 264436237), UINT32_C( 257013041) }, { UINT32_C(2985394522), UINT32_C(2482732321) }, { UINT32_C( 264436237), UINT32_C(2985394522) } }, { { UINT32_C(1225462040), UINT32_C(2024308419) }, { UINT32_C(3650169756), UINT32_C(3610331082) }, { UINT32_C(1225462040), UINT32_C(3650169756) } }, { { UINT32_C(1927738433), UINT32_C( 75577513) }, { UINT32_C(3669324729), UINT32_C( 158183920) }, { UINT32_C(1927738433), UINT32_C(3669324729) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vuzp1_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; } static int test_simde_vuzp1q_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; simde_float32 b[4]; simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 44.64), SIMDE_FLOAT32_C( -905.59), SIMDE_FLOAT32_C( -768.48), SIMDE_FLOAT32_C( -818.27) }, { SIMDE_FLOAT32_C( 904.80), SIMDE_FLOAT32_C( -408.01), SIMDE_FLOAT32_C( 720.84), SIMDE_FLOAT32_C( 882.26) }, { SIMDE_FLOAT32_C( 44.64), SIMDE_FLOAT32_C( -768.48), SIMDE_FLOAT32_C( 904.80), SIMDE_FLOAT32_C( 720.84) } }, { { SIMDE_FLOAT32_C( -100.34), SIMDE_FLOAT32_C( -322.81), SIMDE_FLOAT32_C( 486.25), SIMDE_FLOAT32_C( -834.82) }, { SIMDE_FLOAT32_C( 981.41), SIMDE_FLOAT32_C( -245.05), SIMDE_FLOAT32_C( -204.05), SIMDE_FLOAT32_C( -317.62) }, { SIMDE_FLOAT32_C( -100.34), SIMDE_FLOAT32_C( 486.25), SIMDE_FLOAT32_C( 981.41), SIMDE_FLOAT32_C( -204.05) } }, { { SIMDE_FLOAT32_C( -41.99), SIMDE_FLOAT32_C( 859.14), SIMDE_FLOAT32_C( -108.92), SIMDE_FLOAT32_C( 737.09) }, { SIMDE_FLOAT32_C( 401.23), SIMDE_FLOAT32_C( -9.98), SIMDE_FLOAT32_C( -396.63), SIMDE_FLOAT32_C( 925.63) }, { SIMDE_FLOAT32_C( -41.99), SIMDE_FLOAT32_C( -108.92), SIMDE_FLOAT32_C( 401.23), SIMDE_FLOAT32_C( -396.63) } }, { { SIMDE_FLOAT32_C( -768.54), SIMDE_FLOAT32_C( 832.74), SIMDE_FLOAT32_C( 920.95), SIMDE_FLOAT32_C( -472.96) }, { SIMDE_FLOAT32_C( -329.25), SIMDE_FLOAT32_C( -890.65), SIMDE_FLOAT32_C( -956.60), SIMDE_FLOAT32_C( 715.39) }, { SIMDE_FLOAT32_C( -768.54), SIMDE_FLOAT32_C( 920.95), SIMDE_FLOAT32_C( -329.25), SIMDE_FLOAT32_C( -956.60) } }, { { SIMDE_FLOAT32_C( -796.25), SIMDE_FLOAT32_C( -725.08), SIMDE_FLOAT32_C( 897.12), SIMDE_FLOAT32_C( -891.44) }, { SIMDE_FLOAT32_C( -133.09), SIMDE_FLOAT32_C( 617.96), SIMDE_FLOAT32_C( 990.81), SIMDE_FLOAT32_C( 766.56) }, { SIMDE_FLOAT32_C( -796.25), SIMDE_FLOAT32_C( 897.12), SIMDE_FLOAT32_C( -133.09), SIMDE_FLOAT32_C( 990.81) } }, { { SIMDE_FLOAT32_C( -704.85), SIMDE_FLOAT32_C( 477.06), SIMDE_FLOAT32_C( 931.74), SIMDE_FLOAT32_C( -723.44) }, { SIMDE_FLOAT32_C( -767.99), SIMDE_FLOAT32_C( -272.30), SIMDE_FLOAT32_C( -41.06), SIMDE_FLOAT32_C( 190.02) }, { SIMDE_FLOAT32_C( -704.85), SIMDE_FLOAT32_C( 931.74), SIMDE_FLOAT32_C( -767.99), SIMDE_FLOAT32_C( -41.06) } }, { { SIMDE_FLOAT32_C( -413.17), SIMDE_FLOAT32_C( 850.02), SIMDE_FLOAT32_C( -72.89), SIMDE_FLOAT32_C( 988.07) }, { SIMDE_FLOAT32_C( -159.95), SIMDE_FLOAT32_C( 530.48), SIMDE_FLOAT32_C( 913.70), SIMDE_FLOAT32_C( 71.51) }, { SIMDE_FLOAT32_C( -413.17), SIMDE_FLOAT32_C( -72.89), SIMDE_FLOAT32_C( -159.95), SIMDE_FLOAT32_C( 913.70) } }, { { SIMDE_FLOAT32_C( 363.22), SIMDE_FLOAT32_C( 834.65), SIMDE_FLOAT32_C( 598.55), SIMDE_FLOAT32_C( -966.03) }, { SIMDE_FLOAT32_C( 943.99), SIMDE_FLOAT32_C( 641.95), SIMDE_FLOAT32_C( 749.36), SIMDE_FLOAT32_C( -852.25) }, { SIMDE_FLOAT32_C( 363.22), SIMDE_FLOAT32_C( 598.55), SIMDE_FLOAT32_C( 943.99), SIMDE_FLOAT32_C( 749.36) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r = simde_vuzp1q_f32(a, b); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vuzp1q_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[2]; simde_float64 b[2]; simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 322.53), SIMDE_FLOAT64_C( 856.49) }, { SIMDE_FLOAT64_C( 802.23), SIMDE_FLOAT64_C( 452.91) }, { SIMDE_FLOAT64_C( 322.53), SIMDE_FLOAT64_C( 802.23) } }, { { SIMDE_FLOAT64_C( 459.71), SIMDE_FLOAT64_C( -770.97) }, { SIMDE_FLOAT64_C( 594.98), SIMDE_FLOAT64_C( 107.24) }, { SIMDE_FLOAT64_C( 459.71), SIMDE_FLOAT64_C( 594.98) } }, { { SIMDE_FLOAT64_C( 374.90), SIMDE_FLOAT64_C( 400.47) }, { SIMDE_FLOAT64_C( 344.98), SIMDE_FLOAT64_C( -972.39) }, { SIMDE_FLOAT64_C( 374.90), SIMDE_FLOAT64_C( 344.98) } }, { { SIMDE_FLOAT64_C( -625.14), SIMDE_FLOAT64_C( 7.91) }, { SIMDE_FLOAT64_C( 75.57), SIMDE_FLOAT64_C( 770.62) }, { SIMDE_FLOAT64_C( -625.14), SIMDE_FLOAT64_C( 75.57) } }, { { SIMDE_FLOAT64_C( 809.69), SIMDE_FLOAT64_C( -906.73) }, { SIMDE_FLOAT64_C( 321.38), SIMDE_FLOAT64_C( -635.84) }, { SIMDE_FLOAT64_C( 809.69), SIMDE_FLOAT64_C( 321.38) } }, { { SIMDE_FLOAT64_C( 759.95), SIMDE_FLOAT64_C( 514.69) }, { SIMDE_FLOAT64_C( -113.83), SIMDE_FLOAT64_C( 699.68) }, { SIMDE_FLOAT64_C( 759.95), SIMDE_FLOAT64_C( -113.83) } }, { { SIMDE_FLOAT64_C( 226.46), SIMDE_FLOAT64_C( 68.20) }, { SIMDE_FLOAT64_C( -55.03), SIMDE_FLOAT64_C( -667.20) }, { SIMDE_FLOAT64_C( 226.46), SIMDE_FLOAT64_C( -55.03) } }, { { SIMDE_FLOAT64_C( 435.49), SIMDE_FLOAT64_C( -654.23) }, { SIMDE_FLOAT64_C( -732.44), SIMDE_FLOAT64_C( -241.98) }, { SIMDE_FLOAT64_C( 435.49), SIMDE_FLOAT64_C( -732.44) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_float64x2_t r = simde_vuzp1q_f64(a, b); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; } static int test_simde_vuzp1q_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int8_t b[16]; int8_t r[16]; } test_vec[] = { { { INT8_C( 38), INT8_C( 121), -INT8_C( 35), -INT8_C( 2), INT8_C( 99), -INT8_C( 33), INT8_C( 12), INT8_C( 51), INT8_C( 33), -INT8_C( 30), -INT8_C( 74), -INT8_C( 115), -INT8_C( 20), INT8_C( 79), INT8_C( 110), INT8_C( 93) }, { INT8_C( 31), INT8_C( 91), -INT8_C( 6), INT8_C( 32), INT8_C( 27), -INT8_C( 14), -INT8_C( 75), -INT8_C( 53), -INT8_C( 8), INT8_C( 39), -INT8_C( 107), -INT8_C( 36), -INT8_C( 81), -INT8_C( 19), -INT8_C( 26), -INT8_C( 43) }, { INT8_C( 38), -INT8_C( 35), INT8_C( 99), INT8_C( 12), INT8_C( 33), -INT8_C( 74), -INT8_C( 20), INT8_C( 110), INT8_C( 31), -INT8_C( 6), INT8_C( 27), -INT8_C( 75), -INT8_C( 8), -INT8_C( 107), -INT8_C( 81), -INT8_C( 26) } }, { { INT8_C( 102), -INT8_C( 61), -INT8_C( 45), -INT8_C( 55), -INT8_C( 94), -INT8_C( 33), -INT8_C( 4), -INT8_C( 61), -INT8_C( 62), -INT8_C( 78), INT8_C( 80), -INT8_C( 82), INT8_C( 1), -INT8_C( 66), INT8_C( 11), INT8_C( 33) }, { INT8_C( 26), INT8_C( 5), INT8_C( 65), INT8_C( 53), -INT8_C( 9), -INT8_C( 10), INT8_C( 0), -INT8_C( 16), INT8_C( 30), -INT8_C( 107), -INT8_C( 52), -INT8_C( 51), -INT8_C( 126), -INT8_C( 78), -INT8_C( 94), -INT8_C( 24) }, { INT8_C( 102), -INT8_C( 45), -INT8_C( 94), -INT8_C( 4), -INT8_C( 62), INT8_C( 80), INT8_C( 1), INT8_C( 11), INT8_C( 26), INT8_C( 65), -INT8_C( 9), INT8_C( 0), INT8_C( 30), -INT8_C( 52), -INT8_C( 126), -INT8_C( 94) } }, { { INT8_C( 117), INT8_C( 117), -INT8_C( 79), INT8_C( 23), INT8_C( 84), -INT8_C( 83), -INT8_C( 37), INT8_C( 22), INT8_C( 96), INT8_C( 43), -INT8_C( 59), INT8_C( 97), -INT8_C( 22), -INT8_C( 48), -INT8_C( 126), INT8_C( 4) }, { -INT8_C( 42), -INT8_C( 60), INT8_C( 57), -INT8_C( 51), -INT8_C( 70), INT8_C( 57), -INT8_C( 67), -INT8_C( 40), -INT8_C( 50), -INT8_C( 119), -INT8_C( 91), INT8_C( 81), INT8_C( 59), INT8_C( 71), INT8_C( 57), -INT8_C( 80) }, { INT8_C( 117), -INT8_C( 79), INT8_C( 84), -INT8_C( 37), INT8_C( 96), -INT8_C( 59), -INT8_C( 22), -INT8_C( 126), -INT8_C( 42), INT8_C( 57), -INT8_C( 70), -INT8_C( 67), -INT8_C( 50), -INT8_C( 91), INT8_C( 59), INT8_C( 57) } }, { { -INT8_C( 68), -INT8_C( 21), -INT8_C( 56), INT8_C( 17), -INT8_C( 104), -INT8_C( 93), INT8_C( 39), -INT8_C( 8), -INT8_C( 50), -INT8_C( 20), INT8_C( 90), -INT8_C( 72), -INT8_C( 67), -INT8_C( 36), -INT8_C( 68), -INT8_C( 109) }, { -INT8_C( 96), -INT8_C( 11), INT8_C( 96), INT8_C( 91), INT8_C( 46), INT8_C( 30), INT8_C( 51), -INT8_C( 3), -INT8_C( 89), -INT8_C( 39), INT8_C( 78), -INT8_C( 29), INT8_C( 32), -INT8_C( 121), -INT8_C( 109), -INT8_C( 35) }, { -INT8_C( 68), -INT8_C( 56), -INT8_C( 104), INT8_C( 39), -INT8_C( 50), INT8_C( 90), -INT8_C( 67), -INT8_C( 68), -INT8_C( 96), INT8_C( 96), INT8_C( 46), INT8_C( 51), -INT8_C( 89), INT8_C( 78), INT8_C( 32), -INT8_C( 109) } }, { { INT8_C( 114), INT8_C( 91), -INT8_C( 18), INT8_C( 11), -INT8_C( 2), INT8_C( 21), INT8_C( 3), -INT8_C( 51), INT8_C( 2), INT8_C( 93), -INT8_C( 123), -INT8_C( 65), INT8_C( 58), INT8_C( 66), INT8_C( 82), -INT8_C( 38) }, { INT8_C( 55), -INT8_C( 78), INT8_C( 53), INT8_C( 102), -INT8_C( 48), INT8_C( 105), INT8_C( 99), INT8_C( 120), INT8_C( 66), -INT8_C( 79), INT8_C( 91), INT8_C( 98), INT8_C( 56), -INT8_C( 18), INT8_C( 63), -INT8_C( 85) }, { INT8_C( 114), -INT8_C( 18), -INT8_C( 2), INT8_C( 3), INT8_C( 2), -INT8_C( 123), INT8_C( 58), INT8_C( 82), INT8_C( 55), INT8_C( 53), -INT8_C( 48), INT8_C( 99), INT8_C( 66), INT8_C( 91), INT8_C( 56), INT8_C( 63) } }, { { INT8_C( 74), INT8_C( 45), -INT8_C( 74), INT8_C( 72), INT8_C( 67), -INT8_C( 71), INT8_C( 21), INT8_C( 69), INT8_C( 23), -INT8_C( 101), INT8_C( 4), INT8_C( 81), -INT8_C( 35), INT8_C( 86), INT8_C( 43), INT8_C( 20) }, { INT8_C( 8), INT8_C( 97), INT8_C( 122), -INT8_C( 39), -INT8_C( 54), -INT8_C( 35), INT8_C( 81), INT8_C( 12), -INT8_C( 114), -INT8_C( 84), INT8_C( 110), -INT8_C( 57), -INT8_C( 102), -INT8_C( 82), INT8_C( 114), -INT8_C( 28) }, { INT8_C( 74), -INT8_C( 74), INT8_C( 67), INT8_C( 21), INT8_C( 23), INT8_C( 4), -INT8_C( 35), INT8_C( 43), INT8_C( 8), INT8_C( 122), -INT8_C( 54), INT8_C( 81), -INT8_C( 114), INT8_C( 110), -INT8_C( 102), INT8_C( 114) } }, { { -INT8_C( 37), INT8_C( 40), INT8_C( 45), INT8_C( 30), -INT8_C( 31), INT8_C( 66), INT8_C( 99), -INT8_C( 8), -INT8_C( 35), INT8_C( 103), INT8_C( 73), -INT8_C( 70), -INT8_C( 67), INT8_C( 117), -INT8_C( 49), -INT8_C( 58) }, { -INT8_C( 42), INT8_C( 73), -INT8_C( 97), -INT8_C( 96), INT8_C( 39), -INT8_C( 16), -INT8_C( 84), -INT8_C( 75), -INT8_C( 100), INT8_C( 26), INT8_C( 124), INT8_C( 54), -INT8_C( 56), -INT8_C( 18), INT8_C( 27), -INT8_C( 92) }, { -INT8_C( 37), INT8_C( 45), -INT8_C( 31), INT8_C( 99), -INT8_C( 35), INT8_C( 73), -INT8_C( 67), -INT8_C( 49), -INT8_C( 42), -INT8_C( 97), INT8_C( 39), -INT8_C( 84), -INT8_C( 100), INT8_C( 124), -INT8_C( 56), INT8_C( 27) } }, { { INT8_C( 22), INT8_C( 72), -INT8_C( 62), -INT8_C( 8), -INT8_C( 118), INT8_C( 38), -INT8_C( 16), INT8_C( 104), -INT8_C( 115), INT8_C( 58), INT8_C( 34), INT8_C( 75), -INT8_C( 81), -INT8_C( 15), INT8_C( 17), -INT8_C( 123) }, { INT8_C( 59), -INT8_C( 80), INT8_C( 37), INT8_C( 98), -INT8_C( 96), -INT8_C( 47), INT8_C( 23), INT8_C( 60), -INT8_C( 21), -INT8_C( 108), INT8_C( 114), -INT8_C( 76), -INT8_C( 126), -INT8_C( 115), INT8_C( 88), -INT8_C( 103) }, { INT8_C( 22), -INT8_C( 62), -INT8_C( 118), -INT8_C( 16), -INT8_C( 115), INT8_C( 34), -INT8_C( 81), INT8_C( 17), INT8_C( 59), INT8_C( 37), -INT8_C( 96), INT8_C( 23), -INT8_C( 21), INT8_C( 114), -INT8_C( 126), INT8_C( 88) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r = simde_vuzp1q_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; } static int test_simde_vuzp1q_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { INT16_C( 28802), -INT16_C( 8035), -INT16_C( 32633), -INT16_C( 25299), -INT16_C( 17451), -INT16_C( 23212), -INT16_C( 14430), -INT16_C( 10436) }, { INT16_C( 120), -INT16_C( 141), -INT16_C( 24402), INT16_C( 14459), INT16_C( 3588), -INT16_C( 13077), INT16_C( 29855), INT16_C( 8734) }, { INT16_C( 28802), -INT16_C( 32633), -INT16_C( 17451), -INT16_C( 14430), INT16_C( 120), -INT16_C( 24402), INT16_C( 3588), INT16_C( 29855) } }, { { -INT16_C( 17179), INT16_C( 27650), INT16_C( 12092), INT16_C( 4362), INT16_C( 24298), -INT16_C( 29514), -INT16_C( 3546), -INT16_C( 24989) }, { -INT16_C( 10254), -INT16_C( 24419), INT16_C( 6263), INT16_C( 31704), -INT16_C( 15322), -INT16_C( 14777), INT16_C( 25912), INT16_C( 7656) }, { -INT16_C( 17179), INT16_C( 12092), INT16_C( 24298), -INT16_C( 3546), -INT16_C( 10254), INT16_C( 6263), -INT16_C( 15322), INT16_C( 25912) } }, { { -INT16_C( 5599), INT16_C( 23946), -INT16_C( 27623), INT16_C( 878), INT16_C( 9458), INT16_C( 6287), -INT16_C( 3561), INT16_C( 2486) }, { INT16_C( 21449), INT16_C( 16554), -INT16_C( 32149), -INT16_C( 27973), INT16_C( 582), INT16_C( 32600), INT16_C( 16488), -INT16_C( 30308) }, { -INT16_C( 5599), -INT16_C( 27623), INT16_C( 9458), -INT16_C( 3561), INT16_C( 21449), -INT16_C( 32149), INT16_C( 582), INT16_C( 16488) } }, { { INT16_C( 9770), INT16_C( 17383), INT16_C( 21946), -INT16_C( 21178), -INT16_C( 10886), -INT16_C( 28219), INT16_C( 31943), -INT16_C( 28262) }, { INT16_C( 17615), INT16_C( 15313), -INT16_C( 29241), INT16_C( 3533), INT16_C( 9615), -INT16_C( 2164), INT16_C( 10597), -INT16_C( 28799) }, { INT16_C( 9770), INT16_C( 21946), -INT16_C( 10886), INT16_C( 31943), INT16_C( 17615), -INT16_C( 29241), INT16_C( 9615), INT16_C( 10597) } }, { { INT16_C( 26703), INT16_C( 2770), INT16_C( 6333), INT16_C( 14263), INT16_C( 31981), -INT16_C( 19256), INT16_C( 25592), -INT16_C( 14267) }, { INT16_C( 6055), INT16_C( 28163), -INT16_C( 12124), INT16_C( 13180), INT16_C( 2293), INT16_C( 23083), -INT16_C( 21455), -INT16_C( 32279) }, { INT16_C( 26703), INT16_C( 6333), INT16_C( 31981), INT16_C( 25592), INT16_C( 6055), -INT16_C( 12124), INT16_C( 2293), -INT16_C( 21455) } }, { { -INT16_C( 17644), -INT16_C( 11893), INT16_C( 17107), -INT16_C( 16375), -INT16_C( 11842), -INT16_C( 18572), -INT16_C( 17868), -INT16_C( 9089) }, { -INT16_C( 32047), INT16_C( 30026), -INT16_C( 14766), INT16_C( 18344), -INT16_C( 11313), INT16_C( 161), -INT16_C( 30081), -INT16_C( 27775) }, { -INT16_C( 17644), INT16_C( 17107), -INT16_C( 11842), -INT16_C( 17868), -INT16_C( 32047), -INT16_C( 14766), -INT16_C( 11313), -INT16_C( 30081) } }, { { INT16_C( 3141), INT16_C( 6245), INT16_C( 28238), INT16_C( 3544), INT16_C( 19519), INT16_C( 29892), INT16_C( 17158), -INT16_C( 10416) }, { -INT16_C( 25915), INT16_C( 5964), -INT16_C( 2719), INT16_C( 12382), -INT16_C( 56), INT16_C( 18480), -INT16_C( 19831), -INT16_C( 12581) }, { INT16_C( 3141), INT16_C( 28238), INT16_C( 19519), INT16_C( 17158), -INT16_C( 25915), -INT16_C( 2719), -INT16_C( 56), -INT16_C( 19831) } }, { { INT16_C( 16574), INT16_C( 3558), -INT16_C( 16722), -INT16_C( 4582), -INT16_C( 8694), INT16_C( 4450), -INT16_C( 19935), -INT16_C( 6424) }, { INT16_C( 13644), -INT16_C( 20995), INT16_C( 23338), -INT16_C( 3363), INT16_C( 3674), -INT16_C( 7366), INT16_C( 5824), INT16_C( 32433) }, { INT16_C( 16574), -INT16_C( 16722), -INT16_C( 8694), -INT16_C( 19935), INT16_C( 13644), INT16_C( 23338), INT16_C( 3674), INT16_C( 5824) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_vuzp1q_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; } static int test_simde_vuzp1q_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { -INT32_C( 1658835482), INT32_C( 1386526692), INT32_C( 457295551), -INT32_C( 1136371809) }, { -INT32_C( 238751028), -INT32_C( 509259920), -INT32_C( 1935223641), INT32_C( 1865224840) }, { -INT32_C( 1658835482), INT32_C( 457295551), -INT32_C( 238751028), -INT32_C( 1935223641) } }, { { INT32_C( 604785983), -INT32_C( 998854652), INT32_C( 383760502), -INT32_C( 606985457) }, { -INT32_C( 2033412330), -INT32_C( 1905823258), -INT32_C( 837153467), INT32_C( 1597851424) }, { INT32_C( 604785983), INT32_C( 383760502), -INT32_C( 2033412330), -INT32_C( 837153467) } }, { { -INT32_C( 1719449195), INT32_C( 1885207289), -INT32_C( 1048166990), INT32_C( 1989957728) }, { -INT32_C( 688035601), INT32_C( 526673113), -INT32_C( 1829929358), INT32_C( 1525754565) }, { -INT32_C( 1719449195), -INT32_C( 1048166990), -INT32_C( 688035601), -INT32_C( 1829929358) } }, { { INT32_C( 1827960179), INT32_C( 568086895), -INT32_C( 270376306), -INT32_C( 1436188997) }, { -INT32_C( 1082105114), INT32_C( 970908871), INT32_C( 684444514), INT32_C( 1753398517) }, { INT32_C( 1827960179), -INT32_C( 270376306), -INT32_C( 1082105114), INT32_C( 684444514) } }, { { -INT32_C( 1596623311), INT32_C( 1455534536), -INT32_C( 817519852), INT32_C( 125414177) }, { -INT32_C( 725091827), INT32_C( 1091413470), INT32_C( 1718212721), -INT32_C( 959452267) }, { -INT32_C( 1596623311), -INT32_C( 817519852), -INT32_C( 725091827), INT32_C( 1718212721) } }, { { INT32_C( 711435362), INT32_C( 1770006613), -INT32_C( 315046196), INT32_C( 2130031217) }, { -INT32_C( 1974223700), -INT32_C( 758423455), -INT32_C( 835111879), -INT32_C( 2104227808) }, { INT32_C( 711435362), -INT32_C( 315046196), -INT32_C( 1974223700), -INT32_C( 835111879) } }, { { INT32_C( 28113836), -INT32_C( 261411804), INT32_C( 1675469810), INT32_C( 31642197) }, { -INT32_C( 259246706), -INT32_C( 826124395), -INT32_C( 1398998132), -INT32_C( 1355927293) }, { INT32_C( 28113836), INT32_C( 1675469810), -INT32_C( 259246706), -INT32_C( 1398998132) } }, { { INT32_C( 1353833004), -INT32_C( 113239034), INT32_C( 358358719), INT32_C( 2132164336) }, { INT32_C( 158311027), -INT32_C( 2032717318), INT32_C( 808612909), -INT32_C( 773824347) }, { INT32_C( 1353833004), INT32_C( 358358719), INT32_C( 158311027), INT32_C( 808612909) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_vuzp1q_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; } static int test_simde_vuzp1q_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; int64_t b[2]; int64_t r[2]; } test_vec[] = { { { INT64_C( 5827239787330973229), INT64_C( 2915356885086758845) }, { INT64_C( 5814266470607118820), -INT64_C( 6457386186900046585) }, { INT64_C( 5827239787330973229), INT64_C( 5814266470607118820) } }, { { -INT64_C( 8901666173858462609), -INT64_C( 2476975425049424684) }, { INT64_C( 1531584572990988644), -INT64_C( 4655526498762125499) }, { -INT64_C( 8901666173858462609), INT64_C( 1531584572990988644) } }, { { INT64_C( 3805361778811302787), -INT64_C( 4197832719869321752) }, { -INT64_C( 2012555291946232082), -INT64_C( 5650602259715461498) }, { INT64_C( 3805361778811302787), -INT64_C( 2012555291946232082) } }, { { -INT64_C( 4679296260350641286), -INT64_C( 553839359093064663) }, { -INT64_C( 8991430070435051851), INT64_C( 6761010124897770483) }, { -INT64_C( 4679296260350641286), -INT64_C( 8991430070435051851) } }, { { -INT64_C( 4179543197053563031), -INT64_C( 5547132927536554237) }, { -INT64_C( 1070378977852917595), -INT64_C( 3000552466135482853) }, { -INT64_C( 4179543197053563031), -INT64_C( 1070378977852917595) } }, { { -INT64_C( 2516079689914149963), -INT64_C( 6167699460678064714) }, { -INT64_C( 3975145004899471641), -INT64_C( 9064581884553008912) }, { -INT64_C( 2516079689914149963), -INT64_C( 3975145004899471641) } }, { { INT64_C( 1688260941073042357), INT64_C( 3313578703455999821) }, { -INT64_C( 6118448256376551264), -INT64_C( 5467584354072338993) }, { INT64_C( 1688260941073042357), -INT64_C( 6118448256376551264) } }, { { INT64_C( 2882500414803341959), -INT64_C( 6954552889939670007) }, { INT64_C( 4910053296832351934), -INT64_C( 1110982377861010208) }, { INT64_C( 2882500414803341959), INT64_C( 4910053296832351934) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_int64x2_t r = simde_vuzp1q_s64(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; } static int test_simde_vuzp1q_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C( 13), UINT8_C( 30), UINT8_C(195), UINT8_C(224), UINT8_C( 7), UINT8_C( 80), UINT8_C(120), UINT8_C( 64), UINT8_C(227), UINT8_C( 90), UINT8_C(198), UINT8_C( 93), UINT8_C( 80), UINT8_C( 77), UINT8_C( 92), UINT8_C( 51) }, { UINT8_C(139), UINT8_C( 36), UINT8_C( 9), UINT8_C( 63), UINT8_C(133), UINT8_C( 65), UINT8_C( 27), UINT8_C(182), UINT8_C(157), UINT8_C(178), UINT8_C(119), UINT8_C(192), UINT8_C(218), UINT8_C( 9), UINT8_C(107), UINT8_C(231) }, { UINT8_C( 13), UINT8_C(195), UINT8_C( 7), UINT8_C(120), UINT8_C(227), UINT8_C(198), UINT8_C( 80), UINT8_C( 92), UINT8_C(139), UINT8_C( 9), UINT8_C(133), UINT8_C( 27), UINT8_C(157), UINT8_C(119), UINT8_C(218), UINT8_C(107) } }, { { UINT8_C( 39), UINT8_C( 46), UINT8_C(199), UINT8_C( 46), UINT8_C(127), UINT8_C( 64), UINT8_C(110), UINT8_C( 98), UINT8_C(154), UINT8_C( 53), UINT8_C(191), UINT8_C(234), UINT8_C(130), UINT8_C( 27), UINT8_C( 29), UINT8_C( 14) }, { UINT8_C( 63), UINT8_C( 38), UINT8_C( 77), UINT8_C(197), UINT8_C(104), UINT8_C(104), UINT8_C(123), UINT8_C( 5), UINT8_C( 26), UINT8_C(242), UINT8_C(197), UINT8_C(244), UINT8_C(251), UINT8_C( 48), UINT8_C(220), UINT8_C( 35) }, { UINT8_C( 39), UINT8_C(199), UINT8_C(127), UINT8_C(110), UINT8_C(154), UINT8_C(191), UINT8_C(130), UINT8_C( 29), UINT8_C( 63), UINT8_C( 77), UINT8_C(104), UINT8_C(123), UINT8_C( 26), UINT8_C(197), UINT8_C(251), UINT8_C(220) } }, { { UINT8_C( 95), UINT8_C(163), UINT8_C( 81), UINT8_C(222), UINT8_C(227), UINT8_C(192), UINT8_C( 64), UINT8_C(126), UINT8_C(245), UINT8_MAX, UINT8_C(104), UINT8_C(119), UINT8_C( 26), UINT8_C(134), UINT8_C(133), UINT8_C( 89) }, { UINT8_C(172), UINT8_C(210), UINT8_C( 30), UINT8_C( 20), UINT8_C( 58), UINT8_C(153), UINT8_C( 26), UINT8_C( 85), UINT8_C(139), UINT8_C(223), UINT8_C( 73), UINT8_C(135), UINT8_C( 16), UINT8_C( 37), UINT8_C(170), UINT8_C(111) }, { UINT8_C( 95), UINT8_C( 81), UINT8_C(227), UINT8_C( 64), UINT8_C(245), UINT8_C(104), UINT8_C( 26), UINT8_C(133), UINT8_C(172), UINT8_C( 30), UINT8_C( 58), UINT8_C( 26), UINT8_C(139), UINT8_C( 73), UINT8_C( 16), UINT8_C(170) } }, { { UINT8_C(201), UINT8_C(251), UINT8_C( 77), UINT8_C(172), UINT8_C(187), UINT8_C(141), UINT8_C( 42), UINT8_C(176), UINT8_C(140), UINT8_C(147), UINT8_C( 40), UINT8_C(166), UINT8_C( 25), UINT8_C(173), UINT8_MAX, UINT8_C(197) }, { UINT8_C(128), UINT8_C( 30), UINT8_C(218), UINT8_C(186), UINT8_C(183), UINT8_C(244), UINT8_C( 15), UINT8_C( 67), UINT8_C(211), UINT8_C( 89), UINT8_C(202), UINT8_C(227), UINT8_C(126), UINT8_C(116), UINT8_C( 82), UINT8_C( 71) }, { UINT8_C(201), UINT8_C( 77), UINT8_C(187), UINT8_C( 42), UINT8_C(140), UINT8_C( 40), UINT8_C( 25), UINT8_MAX, UINT8_C(128), UINT8_C(218), UINT8_C(183), UINT8_C( 15), UINT8_C(211), UINT8_C(202), UINT8_C(126), UINT8_C( 82) } }, { { UINT8_C(111), UINT8_C(159), UINT8_C(244), UINT8_C( 43), UINT8_C( 44), UINT8_C( 30), UINT8_C(219), UINT8_C(184), UINT8_C(177), UINT8_C( 3), UINT8_C( 94), UINT8_C(202), UINT8_C(177), UINT8_C( 94), UINT8_C(144), UINT8_C( 49) }, { UINT8_C(124), UINT8_C(106), UINT8_C(235), UINT8_C( 51), UINT8_C( 94), UINT8_C(251), UINT8_C(118), UINT8_C( 49), UINT8_C( 84), UINT8_C( 64), UINT8_C( 21), UINT8_C(210), UINT8_C(180), UINT8_C(103), UINT8_C( 26), UINT8_C( 36) }, { UINT8_C(111), UINT8_C(244), UINT8_C( 44), UINT8_C(219), UINT8_C(177), UINT8_C( 94), UINT8_C(177), UINT8_C(144), UINT8_C(124), UINT8_C(235), UINT8_C( 94), UINT8_C(118), UINT8_C( 84), UINT8_C( 21), UINT8_C(180), UINT8_C( 26) } }, { { UINT8_C( 7), UINT8_C( 14), UINT8_C( 79), UINT8_C( 51), UINT8_C( 44), UINT8_C( 42), UINT8_C(236), UINT8_C(222), UINT8_C( 46), UINT8_C( 74), UINT8_C(168), UINT8_C(223), UINT8_C(168), UINT8_C( 56), UINT8_C( 16), UINT8_C( 36) }, { UINT8_C(162), UINT8_C(251), UINT8_C( 88), UINT8_C( 0), UINT8_C(246), UINT8_C(206), UINT8_C( 50), UINT8_C( 74), UINT8_C( 15), UINT8_C( 71), UINT8_C( 29), UINT8_C(195), UINT8_C(174), UINT8_C( 55), UINT8_C(231), UINT8_C(181) }, { UINT8_C( 7), UINT8_C( 79), UINT8_C( 44), UINT8_C(236), UINT8_C( 46), UINT8_C(168), UINT8_C(168), UINT8_C( 16), UINT8_C(162), UINT8_C( 88), UINT8_C(246), UINT8_C( 50), UINT8_C( 15), UINT8_C( 29), UINT8_C(174), UINT8_C(231) } }, { { UINT8_C( 69), UINT8_C( 54), UINT8_C(233), UINT8_C(113), UINT8_C( 97), UINT8_C(213), UINT8_C( 79), UINT8_C(143), UINT8_C( 31), UINT8_C(248), UINT8_C(110), UINT8_C(200), UINT8_C( 48), UINT8_C(126), UINT8_C(236), UINT8_C(211) }, { UINT8_C(121), UINT8_C( 68), UINT8_C(211), UINT8_C(112), UINT8_C( 19), UINT8_C( 5), UINT8_C(186), UINT8_C( 34), UINT8_C( 76), UINT8_C(215), UINT8_C(229), UINT8_C(251), UINT8_C( 14), UINT8_C(205), UINT8_C(176), UINT8_C( 83) }, { UINT8_C( 69), UINT8_C(233), UINT8_C( 97), UINT8_C( 79), UINT8_C( 31), UINT8_C(110), UINT8_C( 48), UINT8_C(236), UINT8_C(121), UINT8_C(211), UINT8_C( 19), UINT8_C(186), UINT8_C( 76), UINT8_C(229), UINT8_C( 14), UINT8_C(176) } }, { { UINT8_C( 3), UINT8_C(153), UINT8_C(197), UINT8_C(100), UINT8_C(110), UINT8_C( 20), UINT8_C(243), UINT8_C(142), UINT8_C( 12), UINT8_C( 97), UINT8_C( 86), UINT8_C( 61), UINT8_C(223), UINT8_C( 66), UINT8_C( 16), UINT8_C( 89) }, { UINT8_C(135), UINT8_C(227), UINT8_C(201), UINT8_C(154), UINT8_C(233), UINT8_C(131), UINT8_C(188), UINT8_C( 53), UINT8_C( 91), UINT8_C(161), UINT8_C( 48), UINT8_C(105), UINT8_C(110), UINT8_C(225), UINT8_C(189), UINT8_C(114) }, { UINT8_C( 3), UINT8_C(197), UINT8_C(110), UINT8_C(243), UINT8_C( 12), UINT8_C( 86), UINT8_C(223), UINT8_C( 16), UINT8_C(135), UINT8_C(201), UINT8_C(233), UINT8_C(188), UINT8_C( 91), UINT8_C( 48), UINT8_C(110), UINT8_C(189) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vuzp1q_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; } static int test_simde_vuzp1q_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C( 5468), UINT16_C(62275), UINT16_C(44648), UINT16_C(16027), UINT16_C(21390), UINT16_C( 3408), UINT16_C(46720), UINT16_C(12823) }, { UINT16_C(55771), UINT16_C(60892), UINT16_C(60923), UINT16_C(16505), UINT16_C(32383), UINT16_C(21278), UINT16_C(52886), UINT16_C(62044) }, { UINT16_C( 5468), UINT16_C(44648), UINT16_C(21390), UINT16_C(46720), UINT16_C(55771), UINT16_C(60923), UINT16_C(32383), UINT16_C(52886) } }, { { UINT16_C(40931), UINT16_C(19429), UINT16_C(33101), UINT16_C(56457), UINT16_C(55764), UINT16_C(21737), UINT16_C( 143), UINT16_C(27270) }, { UINT16_C(25306), UINT16_C(54615), UINT16_C(53327), UINT16_C(52757), UINT16_C(13390), UINT16_C(58402), UINT16_C(32258), UINT16_C(58839) }, { UINT16_C(40931), UINT16_C(33101), UINT16_C(55764), UINT16_C( 143), UINT16_C(25306), UINT16_C(53327), UINT16_C(13390), UINT16_C(32258) } }, { { UINT16_C(48157), UINT16_C(27441), UINT16_C(47677), UINT16_C( 4423), UINT16_C(12436), UINT16_C( 9061), UINT16_C(60209), UINT16_C( 2958) }, { UINT16_C(58702), UINT16_C(40416), UINT16_C(63158), UINT16_C( 1132), UINT16_C(36394), UINT16_C(11497), UINT16_C(49164), UINT16_C(10513) }, { UINT16_C(48157), UINT16_C(47677), UINT16_C(12436), UINT16_C(60209), UINT16_C(58702), UINT16_C(63158), UINT16_C(36394), UINT16_C(49164) } }, { { UINT16_C(17020), UINT16_C(47764), UINT16_C(56317), UINT16_C(37323), UINT16_C(12556), UINT16_C(15796), UINT16_C(16924), UINT16_C(27208) }, { UINT16_C(10280), UINT16_C(56840), UINT16_C(29726), UINT16_C(18658), UINT16_C(51970), UINT16_C( 3700), UINT16_C(34443), UINT16_C( 2103) }, { UINT16_C(17020), UINT16_C(56317), UINT16_C(12556), UINT16_C(16924), UINT16_C(10280), UINT16_C(29726), UINT16_C(51970), UINT16_C(34443) } }, { { UINT16_C(52424), UINT16_C(50626), UINT16_C(36263), UINT16_C(45910), UINT16_C( 3006), UINT16_C(56304), UINT16_C(14413), UINT16_C(30021) }, { UINT16_C(19809), UINT16_C(32595), UINT16_C(14017), UINT16_C(50120), UINT16_C(15361), UINT16_C(36305), UINT16_C( 2498), UINT16_C(35733) }, { UINT16_C(52424), UINT16_C(36263), UINT16_C( 3006), UINT16_C(14413), UINT16_C(19809), UINT16_C(14017), UINT16_C(15361), UINT16_C( 2498) } }, { { UINT16_C(22485), UINT16_C(31824), UINT16_C(42980), UINT16_C(41776), UINT16_C( 8370), UINT16_C(65406), UINT16_C(50009), UINT16_C(47733) }, { UINT16_C(51217), UINT16_C(53817), UINT16_C( 510), UINT16_C( 150), UINT16_C(26430), UINT16_C( 141), UINT16_C( 8816), UINT16_C(17803) }, { UINT16_C(22485), UINT16_C(42980), UINT16_C( 8370), UINT16_C(50009), UINT16_C(51217), UINT16_C( 510), UINT16_C(26430), UINT16_C( 8816) } }, { { UINT16_C(56441), UINT16_C(24002), UINT16_C(62083), UINT16_C(13568), UINT16_C(32274), UINT16_C(27444), UINT16_C(43330), UINT16_C(21285) }, { UINT16_C(24434), UINT16_C(28709), UINT16_C(47968), UINT16_C(40560), UINT16_C(64803), UINT16_C(37791), UINT16_C(10783), UINT16_C(39129) }, { UINT16_C(56441), UINT16_C(62083), UINT16_C(32274), UINT16_C(43330), UINT16_C(24434), UINT16_C(47968), UINT16_C(64803), UINT16_C(10783) } }, { { UINT16_C(39686), UINT16_C(35318), UINT16_C(63117), UINT16_C(40894), UINT16_C(62325), UINT16_C(46859), UINT16_C(12444), UINT16_C( 3594) }, { UINT16_C(12175), UINT16_C(61567), UINT16_C(61419), UINT16_C( 3726), UINT16_C(11757), UINT16_C( 3233), UINT16_C(31320), UINT16_C(24229) }, { UINT16_C(39686), UINT16_C(63117), UINT16_C(62325), UINT16_C(12444), UINT16_C(12175), UINT16_C(61419), UINT16_C(11757), UINT16_C(31320) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vuzp1q_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_vuzp1q_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C( 676667838), UINT32_C(2715607902), UINT32_C(2426014406), UINT32_C(3446879341) }, { UINT32_C(3643743832), UINT32_C(2346026890), UINT32_C( 842992455), UINT32_C(2599884251) }, { UINT32_C( 676667838), UINT32_C(2426014406), UINT32_C(3643743832), UINT32_C( 842992455) } }, { { UINT32_C(2495761462), UINT32_C(4130709040), UINT32_C( 361156776), UINT32_C(1575221508) }, { UINT32_C(2721452568), UINT32_C(3761048729), UINT32_C(4061293591), UINT32_C(3096185217) }, { UINT32_C(2495761462), UINT32_C( 361156776), UINT32_C(2721452568), UINT32_C(4061293591) } }, { { UINT32_C(2253147734), UINT32_C(2507965164), UINT32_C(1453982546), UINT32_C( 347311612) }, { UINT32_C( 951577247), UINT32_C( 219735286), UINT32_C(3539937873), UINT32_C(2324335668) }, { UINT32_C(2253147734), UINT32_C(1453982546), UINT32_C( 951577247), UINT32_C(3539937873) } }, { { UINT32_C(3339769818), UINT32_C(2874969177), UINT32_C(2348877455), UINT32_C( 866170260) }, { UINT32_C(2506905503), UINT32_C(2376238140), UINT32_C(3814695343), UINT32_C( 141421101) }, { UINT32_C(3339769818), UINT32_C(2348877455), UINT32_C(2506905503), UINT32_C(3814695343) } }, { { UINT32_C( 449805761), UINT32_C(2579835657), UINT32_C(3307587121), UINT32_C( 452576635) }, { UINT32_C(1504666909), UINT32_C(2565231081), UINT32_C( 544949747), UINT32_C(4029212719) }, { UINT32_C( 449805761), UINT32_C(3307587121), UINT32_C(1504666909), UINT32_C( 544949747) } }, { { UINT32_C(1862989669), UINT32_C(1409863458), UINT32_C( 286862742), UINT32_C( 254546674) }, { UINT32_C(1634261879), UINT32_C( 553209389), UINT32_C(3275781524), UINT32_C(3283380573) }, { UINT32_C(1862989669), UINT32_C( 286862742), UINT32_C(1634261879), UINT32_C(3275781524) } }, { { UINT32_C(2201140832), UINT32_C( 618085006), UINT32_C(1496707175), UINT32_C(2053726467) }, { UINT32_C(1792790845), UINT32_C(3028997408), UINT32_C(2809645642), UINT32_C(2489985843) }, { UINT32_C(2201140832), UINT32_C(1496707175), UINT32_C(1792790845), UINT32_C(2809645642) } }, { { UINT32_C(2014813418), UINT32_C(1033694934), UINT32_C(3784823262), UINT32_C(1885077555) }, { UINT32_C(4057610193), UINT32_C(1453679628), UINT32_C(1660820782), UINT32_C( 855009352) }, { UINT32_C(2014813418), UINT32_C(3784823262), UINT32_C(4057610193), UINT32_C(1660820782) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vuzp1q_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_vuzp1q_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; uint64_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C(16117741059595622227), UINT64_C( 2565286127910827920) }, { UINT64_C( 500143091789421190), UINT64_C(10628186432806376979) }, { UINT64_C(16117741059595622227), UINT64_C( 500143091789421190) } }, { { UINT64_C(12950214132161407935), UINT64_C( 6153497975068038174) }, { UINT64_C( 5727240880030441463), UINT64_C( 3380185094979684373) }, { UINT64_C(12950214132161407935), UINT64_C( 5727240880030441463) } }, { { UINT64_C( 2816367189010391615), UINT64_C(12679549088990567123) }, { UINT64_C(11149117747892602157), UINT64_C( 2505647239719816999) }, { UINT64_C( 2816367189010391615), UINT64_C(11149117747892602157) } }, { { UINT64_C( 6821433410092059770), UINT64_C( 1088453712281605043) }, { UINT64_C( 385959220597046844), UINT64_C(11405191389216787266) }, { UINT64_C( 6821433410092059770), UINT64_C( 385959220597046844) } }, { { UINT64_C( 1736362389326187310), UINT64_C( 5391262915657668832) }, { UINT64_C(13888370619831765979), UINT64_C(16822401168388217266) }, { UINT64_C( 1736362389326187310), UINT64_C(13888370619831765979) } }, { { UINT64_C( 4860433358764808183), UINT64_C( 7938675798306059986) }, { UINT64_C( 5791880490741348130), UINT64_C( 7557775138006451518) }, { UINT64_C( 4860433358764808183), UINT64_C( 5791880490741348130) } }, { { UINT64_C( 1286401389819561011), UINT64_C(15577703406384635693) }, { UINT64_C( 8738065454028314197), UINT64_C( 4176618091569113629) }, { UINT64_C( 1286401389819561011), UINT64_C( 8738065454028314197) } }, { { UINT64_C( 2173456247646370520), UINT64_C(10238950491402738569) }, { UINT64_C( 9735029126993121637), UINT64_C(13289855178218244911) }, { UINT64_C( 2173456247646370520), UINT64_C( 9735029126993121637) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_uint64x2_t r = simde_vuzp1q_u64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vuzp1_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp1_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp1_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp1_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp1_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp1_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp1_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp1q_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp1q_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp1q_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp1q_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp1q_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp1q_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp1q_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp1q_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp1q_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp1q_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/uzp2.c000066400000000000000000001546271400333146700164020ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN uzp2 #include "test-neon.h" #include "../../../simde/arm/neon/uzp2.h" static int test_simde_vuzp2_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[2]; simde_float32 b[2]; simde_float32 r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( 343.73), SIMDE_FLOAT32_C( -384.82) }, { SIMDE_FLOAT32_C( 348.76), SIMDE_FLOAT32_C( 874.35) }, { SIMDE_FLOAT32_C( -384.82), SIMDE_FLOAT32_C( 874.35) } }, { { SIMDE_FLOAT32_C( 328.67), SIMDE_FLOAT32_C( -556.62) }, { SIMDE_FLOAT32_C( -584.13), SIMDE_FLOAT32_C( 886.34) }, { SIMDE_FLOAT32_C( -556.62), SIMDE_FLOAT32_C( 886.34) } }, { { SIMDE_FLOAT32_C( -727.41), SIMDE_FLOAT32_C( 622.83) }, { SIMDE_FLOAT32_C( -328.14), SIMDE_FLOAT32_C( 552.33) }, { SIMDE_FLOAT32_C( 622.83), SIMDE_FLOAT32_C( 552.33) } }, { { SIMDE_FLOAT32_C( 169.12), SIMDE_FLOAT32_C( 762.41) }, { SIMDE_FLOAT32_C( -165.94), SIMDE_FLOAT32_C( 247.93) }, { SIMDE_FLOAT32_C( 762.41), SIMDE_FLOAT32_C( 247.93) } }, { { SIMDE_FLOAT32_C( 975.47), SIMDE_FLOAT32_C( 559.60) }, { SIMDE_FLOAT32_C( -75.67), SIMDE_FLOAT32_C( 142.37) }, { SIMDE_FLOAT32_C( 559.60), SIMDE_FLOAT32_C( 142.37) } }, { { SIMDE_FLOAT32_C( -342.52), SIMDE_FLOAT32_C( -803.88) }, { SIMDE_FLOAT32_C( -138.51), SIMDE_FLOAT32_C( -568.39) }, { SIMDE_FLOAT32_C( -803.88), SIMDE_FLOAT32_C( -568.39) } }, { { SIMDE_FLOAT32_C( 706.38), SIMDE_FLOAT32_C( -329.34) }, { SIMDE_FLOAT32_C( -697.68), SIMDE_FLOAT32_C( 236.14) }, { SIMDE_FLOAT32_C( -329.34), SIMDE_FLOAT32_C( 236.14) } }, { { SIMDE_FLOAT32_C( 537.08), SIMDE_FLOAT32_C( 467.12) }, { SIMDE_FLOAT32_C( 923.63), SIMDE_FLOAT32_C( -119.19) }, { SIMDE_FLOAT32_C( 467.12), SIMDE_FLOAT32_C( -119.19) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a, b, r; a = simde_vld1_f32(test_vec[i].a); b = simde_vld1_f32(test_vec[i].b); r = simde_vuzp2_f32(a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vuzp2_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int8_t b[8]; int8_t r[8]; } test_vec[] = { { { INT8_C( 74), INT8_C( 41), -INT8_C( 101), -INT8_C( 39), INT8_C( 90), -INT8_C( 112), INT8_C( 51), INT8_C( 51) }, { -INT8_C( 123), -INT8_C( 118), -INT8_C( 2), INT8_C( 101), INT8_C( 68), -INT8_C( 103), -INT8_C( 57), -INT8_C( 67) }, { INT8_C( 41), -INT8_C( 39), -INT8_C( 112), INT8_C( 51), -INT8_C( 118), INT8_C( 101), -INT8_C( 103), -INT8_C( 67) } }, { { INT8_C( 20), INT8_C( 116), INT8_C( 51), INT8_C( 68), -INT8_C( 48), INT8_C( 60), INT8_C( 39), INT8_C( 78) }, { -INT8_C( 64), INT8_C( 38), INT8_C( 69), -INT8_C( 38), -INT8_C( 56), INT8_C( 75), -INT8_C( 7), INT8_C( 18) }, { INT8_C( 116), INT8_C( 68), INT8_C( 60), INT8_C( 78), INT8_C( 38), -INT8_C( 38), INT8_C( 75), INT8_C( 18) } }, { { INT8_C( 117), -INT8_C( 108), -INT8_C( 21), -INT8_C( 49), INT8_C( 37), INT8_C( 31), INT8_C( 2), -INT8_C( 86) }, { -INT8_C( 87), INT8_C( 0), INT8_C( 15), -INT8_C( 19), -INT8_C( 103), -INT8_C( 42), -INT8_C( 86), -INT8_C( 82) }, { -INT8_C( 108), -INT8_C( 49), INT8_C( 31), -INT8_C( 86), INT8_C( 0), -INT8_C( 19), -INT8_C( 42), -INT8_C( 82) } }, { { INT8_C( 74), -INT8_C( 34), -INT8_C( 14), INT8_C( 27), INT8_C( 26), INT8_C( 25), INT8_C( 105), -INT8_C( 38) }, { INT8_C( 63), -INT8_C( 82), -INT8_C( 75), INT8_C( 7), -INT8_C( 7), -INT8_C( 82), INT8_C( 25), INT8_C( 110) }, { -INT8_C( 34), INT8_C( 27), INT8_C( 25), -INT8_C( 38), -INT8_C( 82), INT8_C( 7), -INT8_C( 82), INT8_C( 110) } }, { { INT8_C( 66), INT8_C( 4), INT8_C( 61), INT8_C( 103), INT8_C( 35), INT8_C( 64), INT8_C( 17), -INT8_C( 52) }, { INT8_C( 64), INT8_C( 32), -INT8_C( 71), -INT8_C( 38), -INT8_C( 9), INT8_C( 100), -INT8_C( 120), INT8_C( 65) }, { INT8_C( 4), INT8_C( 103), INT8_C( 64), -INT8_C( 52), INT8_C( 32), -INT8_C( 38), INT8_C( 100), INT8_C( 65) } }, { { INT8_C( 66), INT8_C( 122), INT8_C( 92), INT8_C( 92), -INT8_C( 109), -INT8_C( 59), INT8_C( 55), -INT8_C( 46) }, { INT8_C( 115), -INT8_C( 20), -INT8_C( 39), INT8_C( 109), -INT8_C( 102), -INT8_C( 14), -INT8_C( 37), -INT8_C( 36) }, { INT8_C( 122), INT8_C( 92), -INT8_C( 59), -INT8_C( 46), -INT8_C( 20), INT8_C( 109), -INT8_C( 14), -INT8_C( 36) } }, { { -INT8_C( 10), INT8_C( 25), INT8_C( 68), INT8_C( 26), INT8_C( 89), INT8_C( 85), -INT8_C( 26), -INT8_C( 103) }, { INT8_C( 118), -INT8_C( 96), INT8_C( 115), INT8_C( 109), INT8_C( 4), -INT8_C( 5), -INT8_C( 82), INT8_C( 70) }, { INT8_C( 25), INT8_C( 26), INT8_C( 85), -INT8_C( 103), -INT8_C( 96), INT8_C( 109), -INT8_C( 5), INT8_C( 70) } }, { { INT8_C( 117), INT8_C( 11), -INT8_C( 94), INT8_C( 8), -INT8_C( 48), -INT8_C( 39), -INT8_C( 38), INT8_C( 68) }, { -INT8_C( 59), -INT8_C( 77), -INT8_C( 79), INT8_C( 95), -INT8_C( 91), -INT8_C( 116), INT8_C( 60), -INT8_C( 100) }, { INT8_C( 11), INT8_C( 8), -INT8_C( 39), INT8_C( 68), -INT8_C( 77), INT8_C( 95), -INT8_C( 116), -INT8_C( 100) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vuzp2_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; } static int test_simde_vuzp2_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { { { -INT16_C( 8469), INT16_C( 4214), INT16_C( 6247), -INT16_C( 14565) }, { INT16_C( 17510), -INT16_C( 31761), -INT16_C( 26454), INT16_C( 6595) }, { INT16_C( 4214), -INT16_C( 14565), -INT16_C( 31761), INT16_C( 6595) } }, { { INT16_C( 15772), -INT16_C( 26012), INT16_C( 25267), INT16_C( 18574) }, { INT16_C( 26296), INT16_C( 18542), -INT16_C( 31381), INT16_C( 22512) }, { -INT16_C( 26012), INT16_C( 18574), INT16_C( 18542), INT16_C( 22512) } }, { { INT16_C( 26211), -INT16_C( 13465), -INT16_C( 32129), -INT16_C( 6766) }, { -INT16_C( 32313), INT16_C( 29032), INT16_C( 11289), -INT16_C( 19062) }, { -INT16_C( 13465), -INT16_C( 6766), INT16_C( 29032), -INT16_C( 19062) } }, { { -INT16_C( 4503), INT16_C( 7247), -INT16_C( 8624), INT16_C( 2405) }, { -INT16_C( 11452), -INT16_C( 20399), INT16_C( 16984), -INT16_C( 17401) }, { INT16_C( 7247), INT16_C( 2405), -INT16_C( 20399), -INT16_C( 17401) } }, { { INT16_C( 28328), INT16_C( 10119), INT16_C( 6640), -INT16_C( 18676) }, { INT16_C( 30106), -INT16_C( 19416), -INT16_C( 19551), INT16_C( 2665) }, { INT16_C( 10119), -INT16_C( 18676), -INT16_C( 19416), INT16_C( 2665) } }, { { -INT16_C( 18015), -INT16_C( 3546), -INT16_C( 29801), -INT16_C( 9221) }, { INT16_C( 19550), -INT16_C( 18549), -INT16_C( 28018), INT16_C( 14195) }, { -INT16_C( 3546), -INT16_C( 9221), -INT16_C( 18549), INT16_C( 14195) } }, { { -INT16_C( 1536), -INT16_C( 3746), INT16_C( 27411), -INT16_C( 21080) }, { -INT16_C( 11808), -INT16_C( 32415), -INT16_C( 13436), INT16_C( 9611) }, { -INT16_C( 3746), -INT16_C( 21080), -INT16_C( 32415), INT16_C( 9611) } }, { { -INT16_C( 20092), INT16_C( 6935), INT16_C( 4669), -INT16_C( 25610) }, { -INT16_C( 32161), -INT16_C( 4782), -INT16_C( 15084), INT16_C( 5412) }, { INT16_C( 6935), -INT16_C( 25610), -INT16_C( 4782), INT16_C( 5412) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_vuzp2_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; } static int test_simde_vuzp2_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { { { INT32_C( 700519944), INT32_C( 806752298) }, { -INT32_C( 1414503859), -INT32_C( 1821262512) }, { INT32_C( 806752298), -INT32_C( 1821262512) } }, { { -INT32_C( 1774050292), INT32_C( 309171441) }, { -INT32_C( 1372346662), INT32_C( 1901278057) }, { INT32_C( 309171441), INT32_C( 1901278057) } }, { { INT32_C( 1939477833), INT32_C( 1923330085) }, { INT32_C( 1646088978), INT32_C( 485854992) }, { INT32_C( 1923330085), INT32_C( 485854992) } }, { { -INT32_C( 1531824205), -INT32_C( 1498013749) }, { INT32_C( 794094022), INT32_C( 1705027612) }, { -INT32_C( 1498013749), INT32_C( 1705027612) } }, { { -INT32_C( 489145667), -INT32_C( 61506582) }, { -INT32_C( 564170034), -INT32_C( 1258662911) }, { -INT32_C( 61506582), -INT32_C( 1258662911) } }, { { INT32_C( 1465429388), -INT32_C( 1828909364) }, { INT32_C( 331436791), -INT32_C( 1216847366) }, { -INT32_C( 1828909364), -INT32_C( 1216847366) } }, { { -INT32_C( 2053549925), -INT32_C( 1719472437) }, { INT32_C( 1652023649), -INT32_C( 1055428043) }, { -INT32_C( 1719472437), -INT32_C( 1055428043) } }, { { -INT32_C( 350654689), INT32_C( 1971197566) }, { INT32_C( 1653161832), INT32_C( 1008271777) }, { INT32_C( 1971197566), INT32_C( 1008271777) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_vuzp2_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; } static int test_simde_vuzp2_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; uint8_t b[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C( 32), UINT8_C(115), UINT8_C( 94), UINT8_C(134), UINT8_C(188), UINT8_C( 5), UINT8_C( 28), UINT8_C( 50) }, { UINT8_C( 84), UINT8_C( 27), UINT8_C(113), UINT8_C(241), UINT8_MAX, UINT8_C( 18), UINT8_C(122), UINT8_C( 84) }, { UINT8_C(115), UINT8_C(134), UINT8_C( 5), UINT8_C( 50), UINT8_C( 27), UINT8_C(241), UINT8_C( 18), UINT8_C( 84) } }, { { UINT8_C( 24), UINT8_C( 59), UINT8_C(106), UINT8_C(201), UINT8_C( 70), UINT8_C(150), UINT8_C( 86), UINT8_C(152) }, { UINT8_C(111), UINT8_C(245), UINT8_C(121), UINT8_C(191), UINT8_C(114), UINT8_C(111), UINT8_C( 11), UINT8_C(146) }, { UINT8_C( 59), UINT8_C(201), UINT8_C(150), UINT8_C(152), UINT8_C(245), UINT8_C(191), UINT8_C(111), UINT8_C(146) } }, { { UINT8_C(226), UINT8_C(105), UINT8_C( 24), UINT8_C(158), UINT8_C(110), UINT8_C( 52), UINT8_C(208), UINT8_C(194) }, { UINT8_C( 79), UINT8_C( 66), UINT8_C(180), UINT8_C( 78), UINT8_C( 84), UINT8_C( 46), UINT8_C(162), UINT8_C(108) }, { UINT8_C(105), UINT8_C(158), UINT8_C( 52), UINT8_C(194), UINT8_C( 66), UINT8_C( 78), UINT8_C( 46), UINT8_C(108) } }, { { UINT8_C(105), UINT8_C( 12), UINT8_C( 53), UINT8_C(176), UINT8_C(162), UINT8_C(139), UINT8_C( 72), UINT8_C( 17) }, { UINT8_C(128), UINT8_C(193), UINT8_C(209), UINT8_C(242), UINT8_C( 48), UINT8_C(220), UINT8_C(132), UINT8_C( 18) }, { UINT8_C( 12), UINT8_C(176), UINT8_C(139), UINT8_C( 17), UINT8_C(193), UINT8_C(242), UINT8_C(220), UINT8_C( 18) } }, { { UINT8_C( 69), UINT8_C(156), UINT8_C(176), UINT8_C(180), UINT8_C(209), UINT8_C(129), UINT8_C(118), UINT8_C( 32) }, { UINT8_C(195), UINT8_C( 42), UINT8_C(111), UINT8_C( 23), UINT8_C( 89), UINT8_C( 17), UINT8_C(131), UINT8_C(194) }, { UINT8_C(156), UINT8_C(180), UINT8_C(129), UINT8_C( 32), UINT8_C( 42), UINT8_C( 23), UINT8_C( 17), UINT8_C(194) } }, { { UINT8_C( 30), UINT8_C(184), UINT8_C(114), UINT8_C(192), UINT8_C( 67), UINT8_C(186), UINT8_C(210), UINT8_C(195) }, { UINT8_C(124), UINT8_C(163), UINT8_C(182), UINT8_C(172), UINT8_C(127), UINT8_C( 58), UINT8_C(191), UINT8_C(196) }, { UINT8_C(184), UINT8_C(192), UINT8_C(186), UINT8_C(195), UINT8_C(163), UINT8_C(172), UINT8_C( 58), UINT8_C(196) } }, { { UINT8_C(215), UINT8_C(111), UINT8_C(120), UINT8_C(168), UINT8_C(240), UINT8_C(239), UINT8_C(200), UINT8_C(179) }, { UINT8_C( 25), UINT8_C( 55), UINT8_C(202), UINT8_C(114), UINT8_C( 73), UINT8_C( 77), UINT8_C( 53), UINT8_C(103) }, { UINT8_C(111), UINT8_C(168), UINT8_C(239), UINT8_C(179), UINT8_C( 55), UINT8_C(114), UINT8_C( 77), UINT8_C(103) } }, { { UINT8_C( 5), UINT8_C(167), UINT8_C( 39), UINT8_C( 72), UINT8_C( 98), UINT8_C(249), UINT8_C( 12), UINT8_C(222) }, { UINT8_C(156), UINT8_C(194), UINT8_C(138), UINT8_C( 27), UINT8_C(252), UINT8_C( 73), UINT8_C(224), UINT8_C(211) }, { UINT8_C(167), UINT8_C( 72), UINT8_C(249), UINT8_C(222), UINT8_C(194), UINT8_C( 27), UINT8_C( 73), UINT8_C(211) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vuzp2_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; } static int test_simde_vuzp2_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C( 5562), UINT16_C(29929), UINT16_C(59535), UINT16_C( 4681) }, { UINT16_C( 4562), UINT16_C(56009), UINT16_C(26174), UINT16_C(13284) }, { UINT16_C(29929), UINT16_C( 4681), UINT16_C(56009), UINT16_C(13284) } }, { { UINT16_C( 5231), UINT16_C( 5283), UINT16_C(50924), UINT16_C(52138) }, { UINT16_C(51348), UINT16_C(34623), UINT16_C(61476), UINT16_C(57252) }, { UINT16_C( 5283), UINT16_C(52138), UINT16_C(34623), UINT16_C(57252) } }, { { UINT16_C(36102), UINT16_C(38227), UINT16_C(40053), UINT16_C(18343) }, { UINT16_C(28845), UINT16_C(60193), UINT16_C( 1494), UINT16_C(17950) }, { UINT16_C(38227), UINT16_C(18343), UINT16_C(60193), UINT16_C(17950) } }, { { UINT16_C(49434), UINT16_C( 1626), UINT16_C( 1159), UINT16_C( 7122) }, { UINT16_C( 4556), UINT16_C(61603), UINT16_C(18177), UINT16_C( 1999) }, { UINT16_C( 1626), UINT16_C( 7122), UINT16_C(61603), UINT16_C( 1999) } }, { { UINT16_C( 8916), UINT16_C(18844), UINT16_C(17342), UINT16_C(27536) }, { UINT16_C(45748), UINT16_C(35414), UINT16_C(29879), UINT16_C(53712) }, { UINT16_C(18844), UINT16_C(27536), UINT16_C(35414), UINT16_C(53712) } }, { { UINT16_C(10805), UINT16_C(48344), UINT16_C(43566), UINT16_C(64216) }, { UINT16_C(31675), UINT16_C(48363), UINT16_C(47810), UINT16_C(38596) }, { UINT16_C(48344), UINT16_C(64216), UINT16_C(48363), UINT16_C(38596) } }, { { UINT16_C(24797), UINT16_C(39903), UINT16_C(28580), UINT16_C(22535) }, { UINT16_C(23841), UINT16_C(55778), UINT16_C(46034), UINT16_C( 1962) }, { UINT16_C(39903), UINT16_C(22535), UINT16_C(55778), UINT16_C( 1962) } }, { { UINT16_C(33501), UINT16_C( 3268), UINT16_C(39980), UINT16_C(59142) }, { UINT16_C(61719), UINT16_C(55716), UINT16_C(26796), UINT16_C(35183) }, { UINT16_C( 3268), UINT16_C(59142), UINT16_C(55716), UINT16_C(35183) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vuzp2_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; } static int test_simde_vuzp2_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C(4094091955), UINT32_C(3041287499) }, { UINT32_C(2743144671), UINT32_C(3197617136) }, { UINT32_C(3041287499), UINT32_C(3197617136) } }, { { UINT32_C(2304641764), UINT32_C(2188309395) }, { UINT32_C(1096318907), UINT32_C( 611816304) }, { UINT32_C(2188309395), UINT32_C( 611816304) } }, { { UINT32_C(3222830453), UINT32_C(3128254171) }, { UINT32_C(1599993711), UINT32_C(2652828858) }, { UINT32_C(3128254171), UINT32_C(2652828858) } }, { { UINT32_C(2435284222), UINT32_C( 722769264) }, { UINT32_C(2221698068), UINT32_C(1957225727) }, { UINT32_C( 722769264), UINT32_C(1957225727) } }, { { UINT32_C(1010090337), UINT32_C(2398530079) }, { UINT32_C(1542345633), UINT32_C(1173949511) }, { UINT32_C(2398530079), UINT32_C(1173949511) } }, { { UINT32_C(4174848136), UINT32_C(3374640053) }, { UINT32_C(1464701015), UINT32_C(3603691124) }, { UINT32_C(3374640053), UINT32_C(3603691124) } }, { { UINT32_C(3591504055), UINT32_C(1264912810) }, { UINT32_C(2762363740), UINT32_C(3890847583) }, { UINT32_C(1264912810), UINT32_C(3890847583) } }, { { UINT32_C(1960886463), UINT32_C( 54330539) }, { UINT32_C( 156928660), UINT32_C( 937371008) }, { UINT32_C( 54330539), UINT32_C( 937371008) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vuzp2_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; } static int test_simde_vuzp2q_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; simde_float32 b[4]; simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -781.39), SIMDE_FLOAT32_C( 88.58), SIMDE_FLOAT32_C( 100.41), SIMDE_FLOAT32_C( 266.53) }, { SIMDE_FLOAT32_C( 803.29), SIMDE_FLOAT32_C( -418.13), SIMDE_FLOAT32_C( 24.45), SIMDE_FLOAT32_C( 955.27) }, { SIMDE_FLOAT32_C( 88.58), SIMDE_FLOAT32_C( 266.53), SIMDE_FLOAT32_C( -418.13), SIMDE_FLOAT32_C( 955.27) } }, { { SIMDE_FLOAT32_C( -28.60), SIMDE_FLOAT32_C( -622.57), SIMDE_FLOAT32_C( 870.72), SIMDE_FLOAT32_C( 957.13) }, { SIMDE_FLOAT32_C( -115.95), SIMDE_FLOAT32_C( -205.52), SIMDE_FLOAT32_C( -866.47), SIMDE_FLOAT32_C( -650.57) }, { SIMDE_FLOAT32_C( -622.57), SIMDE_FLOAT32_C( 957.13), SIMDE_FLOAT32_C( -205.52), SIMDE_FLOAT32_C( -650.57) } }, { { SIMDE_FLOAT32_C( 110.44), SIMDE_FLOAT32_C( 300.16), SIMDE_FLOAT32_C( -300.84), SIMDE_FLOAT32_C( 217.46) }, { SIMDE_FLOAT32_C( -171.62), SIMDE_FLOAT32_C( 375.64), SIMDE_FLOAT32_C( 342.58), SIMDE_FLOAT32_C( 76.53) }, { SIMDE_FLOAT32_C( 300.16), SIMDE_FLOAT32_C( 217.46), SIMDE_FLOAT32_C( 375.64), SIMDE_FLOAT32_C( 76.53) } }, { { SIMDE_FLOAT32_C( -412.19), SIMDE_FLOAT32_C( -951.34), SIMDE_FLOAT32_C( 730.36), SIMDE_FLOAT32_C( 100.26) }, { SIMDE_FLOAT32_C( 107.62), SIMDE_FLOAT32_C( 786.75), SIMDE_FLOAT32_C( 778.67), SIMDE_FLOAT32_C( 326.23) }, { SIMDE_FLOAT32_C( -951.34), SIMDE_FLOAT32_C( 100.26), SIMDE_FLOAT32_C( 786.75), SIMDE_FLOAT32_C( 326.23) } }, { { SIMDE_FLOAT32_C( -124.68), SIMDE_FLOAT32_C( -120.92), SIMDE_FLOAT32_C( -407.25), SIMDE_FLOAT32_C( -321.39) }, { SIMDE_FLOAT32_C( 460.95), SIMDE_FLOAT32_C( 617.20), SIMDE_FLOAT32_C( -366.12), SIMDE_FLOAT32_C( -567.65) }, { SIMDE_FLOAT32_C( -120.92), SIMDE_FLOAT32_C( -321.39), SIMDE_FLOAT32_C( 617.20), SIMDE_FLOAT32_C( -567.65) } }, { { SIMDE_FLOAT32_C( 994.64), SIMDE_FLOAT32_C( -495.39), SIMDE_FLOAT32_C( -610.52), SIMDE_FLOAT32_C( -121.31) }, { SIMDE_FLOAT32_C( 299.08), SIMDE_FLOAT32_C( -477.00), SIMDE_FLOAT32_C( 228.11), SIMDE_FLOAT32_C( -590.48) }, { SIMDE_FLOAT32_C( -495.39), SIMDE_FLOAT32_C( -121.31), SIMDE_FLOAT32_C( -477.00), SIMDE_FLOAT32_C( -590.48) } }, { { SIMDE_FLOAT32_C( 823.17), SIMDE_FLOAT32_C( 927.28), SIMDE_FLOAT32_C( 626.99), SIMDE_FLOAT32_C( -348.45) }, { SIMDE_FLOAT32_C( 302.91), SIMDE_FLOAT32_C( -30.44), SIMDE_FLOAT32_C( 728.08), SIMDE_FLOAT32_C( 890.73) }, { SIMDE_FLOAT32_C( 927.28), SIMDE_FLOAT32_C( -348.45), SIMDE_FLOAT32_C( -30.44), SIMDE_FLOAT32_C( 890.73) } }, { { SIMDE_FLOAT32_C( 18.22), SIMDE_FLOAT32_C( 458.45), SIMDE_FLOAT32_C( -9.02), SIMDE_FLOAT32_C( -874.16) }, { SIMDE_FLOAT32_C( 245.19), SIMDE_FLOAT32_C( -230.35), SIMDE_FLOAT32_C( 452.07), SIMDE_FLOAT32_C( -879.48) }, { SIMDE_FLOAT32_C( 458.45), SIMDE_FLOAT32_C( -874.16), SIMDE_FLOAT32_C( -230.35), SIMDE_FLOAT32_C( -879.48) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r = simde_vuzp2q_f32(a, b); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vuzp2q_f64 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[2]; simde_float64 b[2]; simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -339.85), SIMDE_FLOAT64_C( -574.12) }, { SIMDE_FLOAT64_C( 477.11), SIMDE_FLOAT64_C( -17.44) }, { SIMDE_FLOAT64_C( -574.12), SIMDE_FLOAT64_C( -17.44) } }, { { SIMDE_FLOAT64_C( -819.58), SIMDE_FLOAT64_C( -361.32) }, { SIMDE_FLOAT64_C( -283.52), SIMDE_FLOAT64_C( -25.85) }, { SIMDE_FLOAT64_C( -361.32), SIMDE_FLOAT64_C( -25.85) } }, { { SIMDE_FLOAT64_C( 881.34), SIMDE_FLOAT64_C( 140.92) }, { SIMDE_FLOAT64_C( -394.34), SIMDE_FLOAT64_C( 314.91) }, { SIMDE_FLOAT64_C( 140.92), SIMDE_FLOAT64_C( 314.91) } }, { { SIMDE_FLOAT64_C( -960.58), SIMDE_FLOAT64_C( 916.46) }, { SIMDE_FLOAT64_C( 40.77), SIMDE_FLOAT64_C( 550.34) }, { SIMDE_FLOAT64_C( 916.46), SIMDE_FLOAT64_C( 550.34) } }, { { SIMDE_FLOAT64_C( 784.55), SIMDE_FLOAT64_C( -320.66) }, { SIMDE_FLOAT64_C( 942.86), SIMDE_FLOAT64_C( 541.82) }, { SIMDE_FLOAT64_C( -320.66), SIMDE_FLOAT64_C( 541.82) } }, { { SIMDE_FLOAT64_C( 991.85), SIMDE_FLOAT64_C( 533.80) }, { SIMDE_FLOAT64_C( -388.26), SIMDE_FLOAT64_C( 204.04) }, { SIMDE_FLOAT64_C( 533.80), SIMDE_FLOAT64_C( 204.04) } }, { { SIMDE_FLOAT64_C( -77.27), SIMDE_FLOAT64_C( 884.24) }, { SIMDE_FLOAT64_C( 202.14), SIMDE_FLOAT64_C( 892.43) }, { SIMDE_FLOAT64_C( 884.24), SIMDE_FLOAT64_C( 892.43) } }, { { SIMDE_FLOAT64_C( -326.62), SIMDE_FLOAT64_C( -793.35) }, { SIMDE_FLOAT64_C( -340.99), SIMDE_FLOAT64_C( 333.54) }, { SIMDE_FLOAT64_C( -793.35), SIMDE_FLOAT64_C( 333.54) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); simde_float64x2_t r = simde_vuzp2q_f64(a, b); simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); } return 0; } static int test_simde_vuzp2q_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int8_t b[16]; int8_t r[16]; } test_vec[] = { { { INT8_C( 69), INT8_C( 59), -INT8_C( 19), INT8_C( 70), -INT8_C( 88), -INT8_C( 12), INT8_C( 31), -INT8_C( 75), -INT8_C( 45), -INT8_C( 113), -INT8_C( 84), -INT8_C( 102), INT8_C( 44), INT8_C( 120), INT8_C( 29), INT8_C( 35) }, { INT8_C( 95), -INT8_C( 119), INT8_C( 70), INT8_C( 64), -INT8_C( 30), INT8_C( 105), -INT8_C( 87), INT8_C( 30), -INT8_C( 21), INT8_C( 45), INT8_C( 82), INT8_C( 18), -INT8_C( 117), INT8_C( 116), INT8_C( 26), -INT8_C( 48) }, { INT8_C( 59), INT8_C( 70), -INT8_C( 12), -INT8_C( 75), -INT8_C( 113), -INT8_C( 102), INT8_C( 120), INT8_C( 35), -INT8_C( 119), INT8_C( 64), INT8_C( 105), INT8_C( 30), INT8_C( 45), INT8_C( 18), INT8_C( 116), -INT8_C( 48) } }, { { -INT8_C( 81), INT8_C( 7), INT8_C( 22), INT8_C( 87), -INT8_C( 4), INT8_C( 53), INT8_C( 12), -INT8_C( 49), -INT8_C( 59), -INT8_C( 71), INT8_C( 105), -INT8_C( 15), INT8_C( 49), -INT8_C( 121), INT8_C( 21), -INT8_C( 112) }, { INT8_C( 16), INT8_C( 91), -INT8_C( 48), -INT8_C( 14), -INT8_C( 59), INT8_C( 121), INT8_C( 16), -INT8_C( 80), -INT8_C( 90), INT8_C( 99), -INT8_C( 61), INT8_C( 49), -INT8_C( 41), -INT8_C( 35), INT8_C( 1), -INT8_C( 121) }, { INT8_C( 7), INT8_C( 87), INT8_C( 53), -INT8_C( 49), -INT8_C( 71), -INT8_C( 15), -INT8_C( 121), -INT8_C( 112), INT8_C( 91), -INT8_C( 14), INT8_C( 121), -INT8_C( 80), INT8_C( 99), INT8_C( 49), -INT8_C( 35), -INT8_C( 121) } }, { { -INT8_C( 28), INT8_C( 23), -INT8_C( 34), -INT8_C( 32), INT8_C( 77), -INT8_C( 21), -INT8_C( 80), INT8_C( 18), -INT8_C( 92), INT8_C( 25), INT8_C( 3), -INT8_C( 43), -INT8_C( 96), INT8_C( 24), INT8_C( 101), -INT8_C( 79) }, { INT8_C( 116), INT8_C( 53), -INT8_C( 93), INT8_C( 57), -INT8_C( 82), -INT8_C( 76), -INT8_C( 23), INT8_C( 84), INT8_C( 23), -INT8_C( 84), -INT8_C( 122), -INT8_C( 18), -INT8_C( 119), -INT8_C( 121), INT8_C( 117), INT8_C( 110) }, { INT8_C( 23), -INT8_C( 32), -INT8_C( 21), INT8_C( 18), INT8_C( 25), -INT8_C( 43), INT8_C( 24), -INT8_C( 79), INT8_C( 53), INT8_C( 57), -INT8_C( 76), INT8_C( 84), -INT8_C( 84), -INT8_C( 18), -INT8_C( 121), INT8_C( 110) } }, { { -INT8_C( 97), INT8_C( 84), INT8_C( 78), -INT8_C( 20), INT8_C( 63), -INT8_C( 2), -INT8_C( 2), -INT8_C( 29), INT8_C( 24), INT8_C( 1), -INT8_C( 72), -INT8_C( 72), INT8_C( 26), INT8_C( 29), INT8_C( 105), -INT8_C( 114) }, { INT8_C( 82), INT8_C( 13), -INT8_C( 57), INT8_C( 0), -INT8_C( 63), -INT8_C( 80), INT8_C( 84), -INT8_C( 40), INT8_C( 93), -INT8_C( 38), -INT8_C( 58), -INT8_C( 26), INT8_C( 98), INT8_C( 60), INT8_C( 84), INT8_C( 1) }, { INT8_C( 84), -INT8_C( 20), -INT8_C( 2), -INT8_C( 29), INT8_C( 1), -INT8_C( 72), INT8_C( 29), -INT8_C( 114), INT8_C( 13), INT8_C( 0), -INT8_C( 80), -INT8_C( 40), -INT8_C( 38), -INT8_C( 26), INT8_C( 60), INT8_C( 1) } }, { { -INT8_C( 112), -INT8_C( 93), -INT8_C( 19), -INT8_C( 49), -INT8_C( 95), -INT8_C( 21), -INT8_C( 78), -INT8_C( 71), -INT8_C( 20), INT8_C( 106), INT8_C( 114), INT8_C( 6), -INT8_C( 121), -INT8_C( 37), -INT8_C( 108), -INT8_C( 39) }, { -INT8_C( 24), INT8_C( 91), -INT8_C( 39), -INT8_C( 87), INT8_C( 12), INT8_C( 45), -INT8_C( 127), INT8_C( 105), INT8_C( 8), INT8_C( 72), INT8_C( 79), INT8_C( 106), -INT8_C( 124), -INT8_C( 92), INT8_C( 107), INT8_C( 20) }, { -INT8_C( 93), -INT8_C( 49), -INT8_C( 21), -INT8_C( 71), INT8_C( 106), INT8_C( 6), -INT8_C( 37), -INT8_C( 39), INT8_C( 91), -INT8_C( 87), INT8_C( 45), INT8_C( 105), INT8_C( 72), INT8_C( 106), -INT8_C( 92), INT8_C( 20) } }, { { INT8_C( 71), INT8_C( 88), -INT8_C( 29), -INT8_C( 24), INT8_C( 67), -INT8_C( 107), -INT8_C( 94), INT8_C( 47), -INT8_C( 1), INT8_C( 20), INT8_C( 54), -INT8_C( 122), -INT8_C( 17), -INT8_C( 54), INT8_C( 95), -INT8_C( 40) }, { INT8_C( 38), INT8_C( 56), -INT8_C( 127), INT8_C( 50), INT8_C( 101), INT8_C( 3), -INT8_C( 101), INT8_C( 109), INT8_C( 75), -INT8_C( 22), -INT8_C( 41), -INT8_C( 49), -INT8_C( 114), INT8_C( 66), -INT8_C( 29), -INT8_C( 43) }, { INT8_C( 88), -INT8_C( 24), -INT8_C( 107), INT8_C( 47), INT8_C( 20), -INT8_C( 122), -INT8_C( 54), -INT8_C( 40), INT8_C( 56), INT8_C( 50), INT8_C( 3), INT8_C( 109), -INT8_C( 22), -INT8_C( 49), INT8_C( 66), -INT8_C( 43) } }, { { -INT8_C( 102), -INT8_C( 58), -INT8_C( 66), -INT8_C( 35), INT8_C( 91), INT8_C( 96), INT8_C( 13), INT8_C( 90), INT8_C( 116), INT8_C( 67), -INT8_C( 32), INT8_C( 99), INT8_C( 13), INT8_C( 63), INT8_C( 59), INT8_C( 51) }, { INT8_C( 119), -INT8_C( 67), INT8_C( 101), -INT8_C( 36), -INT8_C( 64), INT8_C( 0), INT8_C( 74), INT8_C( 11), -INT8_C( 21), INT8_C( 33), -INT8_C( 38), INT8_C( 121), INT8_C( 100), -INT8_C( 67), INT8_C( 79), -INT8_C( 2) }, { -INT8_C( 58), -INT8_C( 35), INT8_C( 96), INT8_C( 90), INT8_C( 67), INT8_C( 99), INT8_C( 63), INT8_C( 51), -INT8_C( 67), -INT8_C( 36), INT8_C( 0), INT8_C( 11), INT8_C( 33), INT8_C( 121), -INT8_C( 67), -INT8_C( 2) } }, { { -INT8_C( 125), INT8_C( 13), -INT8_C( 36), -INT8_C( 34), INT8_C( 109), -INT8_C( 23), INT8_C( 56), -INT8_C( 31), INT8_C( 44), INT8_C( 24), INT8_C( 68), INT8_C( 57), INT8_C( 87), INT8_MIN, INT8_C( 109), -INT8_C( 50) }, { INT8_C( 61), -INT8_C( 46), -INT8_C( 86), -INT8_C( 3), -INT8_C( 45), -INT8_C( 12), INT8_C( 8), -INT8_C( 66), INT8_C( 22), -INT8_C( 30), INT8_C( 55), INT8_C( 122), -INT8_C( 97), -INT8_C( 122), INT8_C( 120), INT8_C( 34) }, { INT8_C( 13), -INT8_C( 34), -INT8_C( 23), -INT8_C( 31), INT8_C( 24), INT8_C( 57), INT8_MIN, -INT8_C( 50), -INT8_C( 46), -INT8_C( 3), -INT8_C( 12), -INT8_C( 66), -INT8_C( 30), INT8_C( 122), -INT8_C( 122), INT8_C( 34) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r = simde_vuzp2q_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; } static int test_simde_vuzp2q_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { -INT16_C( 22831), INT16_C( 10979), -INT16_C( 19770), -INT16_C( 5690), INT16_C( 6784), -INT16_C( 857), -INT16_C( 21684), INT16_C( 4304) }, { INT16_C( 17678), -INT16_C( 9320), -INT16_C( 16347), INT16_C( 18514), INT16_C( 28188), -INT16_C( 11922), -INT16_C( 28199), -INT16_C( 21770) }, { INT16_C( 10979), -INT16_C( 5690), -INT16_C( 857), INT16_C( 4304), -INT16_C( 9320), INT16_C( 18514), -INT16_C( 11922), -INT16_C( 21770) } }, { { -INT16_C( 9673), -INT16_C( 555), -INT16_C( 25716), INT16_C( 3303), -INT16_C( 29003), INT16_C( 520), -INT16_C( 10183), INT16_C( 18194) }, { -INT16_C( 21987), INT16_C( 17186), INT16_C( 29802), -INT16_C( 31093), -INT16_C( 1566), -INT16_C( 17321), INT16_C( 19850), -INT16_C( 16026) }, { -INT16_C( 555), INT16_C( 3303), INT16_C( 520), INT16_C( 18194), INT16_C( 17186), -INT16_C( 31093), -INT16_C( 17321), -INT16_C( 16026) } }, { { INT16_C( 15143), -INT16_C( 19266), -INT16_C( 23082), -INT16_C( 29504), -INT16_C( 14029), INT16_C( 27790), -INT16_C( 24415), -INT16_C( 16460) }, { -INT16_C( 10678), -INT16_C( 19454), -INT16_C( 29365), INT16_C( 11578), -INT16_C( 28282), INT16_C( 4329), INT16_C( 20702), INT16_C( 1745) }, { -INT16_C( 19266), -INT16_C( 29504), INT16_C( 27790), -INT16_C( 16460), -INT16_C( 19454), INT16_C( 11578), INT16_C( 4329), INT16_C( 1745) } }, { { -INT16_C( 28789), INT16_C( 25274), INT16_C( 31285), INT16_C( 26862), INT16_C( 31811), -INT16_C( 6699), -INT16_C( 30436), INT16_C( 26276) }, { -INT16_C( 22945), -INT16_C( 21990), INT16_C( 21555), -INT16_C( 17960), -INT16_C( 15899), -INT16_C( 15415), -INT16_C( 26095), -INT16_C( 25143) }, { INT16_C( 25274), INT16_C( 26862), -INT16_C( 6699), INT16_C( 26276), -INT16_C( 21990), -INT16_C( 17960), -INT16_C( 15415), -INT16_C( 25143) } }, { { -INT16_C( 31959), INT16_C( 24319), -INT16_C( 4610), INT16_C( 16839), -INT16_C( 25495), -INT16_C( 31450), -INT16_C( 13787), -INT16_C( 31509) }, { INT16_C( 1392), -INT16_C( 23761), INT16_C( 1881), INT16_C( 15964), INT16_C( 9672), -INT16_C( 9727), -INT16_C( 13377), -INT16_C( 5769) }, { INT16_C( 24319), INT16_C( 16839), -INT16_C( 31450), -INT16_C( 31509), -INT16_C( 23761), INT16_C( 15964), -INT16_C( 9727), -INT16_C( 5769) } }, { { INT16_C( 30286), INT16_C( 19527), INT16_C( 3683), -INT16_C( 13170), -INT16_C( 19286), -INT16_C( 12463), INT16_C( 15487), -INT16_C( 4268) }, { -INT16_C( 31935), -INT16_C( 25965), -INT16_C( 4214), INT16_C( 21208), -INT16_C( 9963), -INT16_C( 11220), -INT16_C( 23644), -INT16_C( 3139) }, { INT16_C( 19527), -INT16_C( 13170), -INT16_C( 12463), -INT16_C( 4268), -INT16_C( 25965), INT16_C( 21208), -INT16_C( 11220), -INT16_C( 3139) } }, { { INT16_C( 1305), INT16_C( 31807), -INT16_C( 13037), -INT16_C( 16824), -INT16_C( 26238), INT16_C( 397), -INT16_C( 7723), INT16_C( 5872) }, { -INT16_C( 31900), -INT16_C( 4432), -INT16_C( 30605), -INT16_C( 30655), INT16_C( 28002), INT16_C( 1628), INT16_C( 6673), INT16_C( 11001) }, { INT16_C( 31807), -INT16_C( 16824), INT16_C( 397), INT16_C( 5872), -INT16_C( 4432), -INT16_C( 30655), INT16_C( 1628), INT16_C( 11001) } }, { { INT16_C( 14623), INT16_C( 12967), -INT16_C( 4346), -INT16_C( 30480), INT16_C( 32393), INT16_C( 24201), INT16_C( 31327), -INT16_C( 15243) }, { INT16_C( 9725), INT16_C( 28850), -INT16_C( 3154), INT16_C( 4344), INT16_C( 21857), INT16_C( 29206), INT16_C( 4207), -INT16_C( 29028) }, { INT16_C( 12967), -INT16_C( 30480), INT16_C( 24201), -INT16_C( 15243), INT16_C( 28850), INT16_C( 4344), INT16_C( 29206), -INT16_C( 29028) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_vuzp2q_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; } static int test_simde_vuzp2q_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { INT32_C( 1680086568), INT32_C( 1994775530), -INT32_C( 313035908), -INT32_C( 1619858099) }, { INT32_C( 1449835667), INT32_C( 1456501112), -INT32_C( 78577876), -INT32_C( 1017570149) }, { INT32_C( 1994775530), -INT32_C( 1619858099), INT32_C( 1456501112), -INT32_C( 1017570149) } }, { { INT32_C( 489127474), -INT32_C( 711783335), -INT32_C( 842864001), INT32_C( 1802253528) }, { INT32_C( 1774311153), INT32_C( 1941934407), INT32_C( 745410705), INT32_C( 1609549868) }, { -INT32_C( 711783335), INT32_C( 1802253528), INT32_C( 1941934407), INT32_C( 1609549868) } }, { { -INT32_C( 1619257786), -INT32_C( 1586229470), -INT32_C( 781306119), INT32_C( 1547491947) }, { -INT32_C( 138019408), INT32_C( 543851919), -INT32_C( 1051928171), -INT32_C( 417317983) }, { -INT32_C( 1586229470), INT32_C( 1547491947), INT32_C( 543851919), -INT32_C( 417317983) } }, { { INT32_C( 1938201681), -INT32_C( 1525351765), -INT32_C( 1686731984), INT32_C( 251114333) }, { INT32_C( 1057340848), -INT32_C( 681611198), -INT32_C( 359093431), INT32_C( 936491494) }, { -INT32_C( 1525351765), INT32_C( 251114333), -INT32_C( 681611198), INT32_C( 936491494) } }, { { INT32_C( 27940693), -INT32_C( 2119778479), -INT32_C( 1625482174), -INT32_C( 2136140593) }, { INT32_C( 331330513), INT32_C( 1827282723), -INT32_C( 1319730230), -INT32_C( 1847056580) }, { -INT32_C( 2119778479), -INT32_C( 2136140593), INT32_C( 1827282723), -INT32_C( 1847056580) } }, { { -INT32_C( 812477570), -INT32_C( 1806682030), INT32_C( 607350101), INT32_C( 1403314562) }, { -INT32_C( 1217960812), INT32_C( 1294160259), INT32_C( 285112788), INT32_C( 513992608) }, { -INT32_C( 1806682030), INT32_C( 1403314562), INT32_C( 1294160259), INT32_C( 513992608) } }, { { -INT32_C( 856869766), -INT32_C( 1033880211), INT32_C( 770085803), INT32_C( 142642036) }, { INT32_C( 1925179375), INT32_C( 230679097), -INT32_C( 81871269), INT32_C( 521781413) }, { -INT32_C( 1033880211), INT32_C( 142642036), INT32_C( 230679097), INT32_C( 521781413) } }, { { INT32_C( 1642792692), -INT32_C( 282899644), INT32_C( 1394346718), -INT32_C( 2074370923) }, { -INT32_C( 1107944572), INT32_C( 1506457085), INT32_C( 408217715), -INT32_C( 1657311576) }, { -INT32_C( 282899644), -INT32_C( 2074370923), INT32_C( 1506457085), -INT32_C( 1657311576) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_vuzp2q_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; } static int test_simde_vuzp2q_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; int64_t b[2]; int64_t r[2]; } test_vec[] = { { { -INT64_C( 1572565854752738975), INT64_C( 1665189159162091459) }, { INT64_C( 6552428771358605317), INT64_C( 3695040570659838411) }, { INT64_C( 1665189159162091459), INT64_C( 3695040570659838411) } }, { { -INT64_C( 5262955398493842340), -INT64_C( 7196877999693453743) }, { -INT64_C( 5608706773722595186), -INT64_C( 4035965878303981577) }, { -INT64_C( 7196877999693453743), -INT64_C( 4035965878303981577) } }, { { -INT64_C( 7720355845962003918), INT64_C( 7612134344586186096) }, { INT64_C( 7590360267460416444), -INT64_C( 175304159150248608) }, { INT64_C( 7612134344586186096), -INT64_C( 175304159150248608) } }, { { -INT64_C( 7056017815032929221), INT64_C( 2850576714195906379) }, { -INT64_C( 8667423604670282021), INT64_C( 6221128166867593869) }, { INT64_C( 2850576714195906379), INT64_C( 6221128166867593869) } }, { { -INT64_C( 4606082402684143801), -INT64_C( 2582404268411144041) }, { -INT64_C( 6707583791840758725), INT64_C( 7295075065064471499) }, { -INT64_C( 2582404268411144041), INT64_C( 7295075065064471499) } }, { { -INT64_C( 7927198485548690745), -INT64_C( 7817196344013692249) }, { INT64_C( 6370587472936938929), INT64_C( 8835355148860482074) }, { -INT64_C( 7817196344013692249), INT64_C( 8835355148860482074) } }, { { INT64_C( 1016040448205420686), INT64_C( 2053704565793336725) }, { INT64_C( 7983837966002976230), INT64_C( 1574364892958690221) }, { INT64_C( 2053704565793336725), INT64_C( 1574364892958690221) } }, { { -INT64_C( 2868311088181702468), INT64_C( 498568190981695332) }, { -INT64_C( 4215856231531032901), INT64_C( 2732838800146637904) }, { INT64_C( 498568190981695332), INT64_C( 2732838800146637904) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_int64x2_t r = simde_vuzp2q_s64(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; } static int test_simde_vuzp2q_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(222), UINT8_C(236), UINT8_C(203), UINT8_C(186), UINT8_C(216), UINT8_C(218), UINT8_C( 88), UINT8_C(216), UINT8_C(199), UINT8_C(141), UINT8_C( 87), UINT8_C(129), UINT8_C( 11), UINT8_C(113), UINT8_C(215), UINT8_C( 37) }, { UINT8_C(103), UINT8_C( 87), UINT8_C(221), UINT8_C( 35), UINT8_C(248), UINT8_C( 47), UINT8_C(244), UINT8_C(217), UINT8_C( 30), UINT8_C( 82), UINT8_C(106), UINT8_C(233), UINT8_C( 90), UINT8_C(182), UINT8_C(233), UINT8_C( 56) }, { UINT8_C(236), UINT8_C(186), UINT8_C(218), UINT8_C(216), UINT8_C(141), UINT8_C(129), UINT8_C(113), UINT8_C( 37), UINT8_C( 87), UINT8_C( 35), UINT8_C( 47), UINT8_C(217), UINT8_C( 82), UINT8_C(233), UINT8_C(182), UINT8_C( 56) } }, { { UINT8_C(162), UINT8_C(180), UINT8_C(242), UINT8_C(122), UINT8_C(143), UINT8_C( 74), UINT8_C( 83), UINT8_C( 86), UINT8_C(215), UINT8_C(170), UINT8_C(215), UINT8_C(226), UINT8_C( 27), UINT8_C(175), UINT8_C( 7), UINT8_C(131) }, { UINT8_C( 6), UINT8_C(229), UINT8_C(166), UINT8_C(254), UINT8_C( 20), UINT8_C(155), UINT8_C(216), UINT8_C( 50), UINT8_C(237), UINT8_C( 66), UINT8_C( 28), UINT8_C( 71), UINT8_C(248), UINT8_C( 5), UINT8_C(127), UINT8_C(154) }, { UINT8_C(180), UINT8_C(122), UINT8_C( 74), UINT8_C( 86), UINT8_C(170), UINT8_C(226), UINT8_C(175), UINT8_C(131), UINT8_C(229), UINT8_C(254), UINT8_C(155), UINT8_C( 50), UINT8_C( 66), UINT8_C( 71), UINT8_C( 5), UINT8_C(154) } }, { { UINT8_C(185), UINT8_C(113), UINT8_C( 21), UINT8_C( 72), UINT8_C(187), UINT8_C(104), UINT8_C(159), UINT8_C(146), UINT8_C( 18), UINT8_C(118), UINT8_C(116), UINT8_C( 45), UINT8_C( 37), UINT8_C(124), UINT8_C(176), UINT8_C( 44) }, { UINT8_C( 97), UINT8_C( 87), UINT8_C( 42), UINT8_C(117), UINT8_C(242), UINT8_C( 2), UINT8_C(168), UINT8_C(223), UINT8_C( 69), UINT8_C(196), UINT8_C( 39), UINT8_C( 61), UINT8_C(201), UINT8_C(166), UINT8_C(216), UINT8_C(130) }, { UINT8_C(113), UINT8_C( 72), UINT8_C(104), UINT8_C(146), UINT8_C(118), UINT8_C( 45), UINT8_C(124), UINT8_C( 44), UINT8_C( 87), UINT8_C(117), UINT8_C( 2), UINT8_C(223), UINT8_C(196), UINT8_C( 61), UINT8_C(166), UINT8_C(130) } }, { { UINT8_C( 24), UINT8_C(237), UINT8_C(203), UINT8_C(211), UINT8_C( 85), UINT8_C(106), UINT8_C(102), UINT8_C(103), UINT8_C(224), UINT8_C(218), UINT8_C(148), UINT8_C( 6), UINT8_C( 86), UINT8_C( 69), UINT8_C( 50), UINT8_C(183) }, { UINT8_C(156), UINT8_C( 92), UINT8_C( 45), UINT8_C(142), UINT8_C( 95), UINT8_C(213), UINT8_C(109), UINT8_C(164), UINT8_C(153), UINT8_C(148), UINT8_C(225), UINT8_C( 98), UINT8_C( 59), UINT8_C(185), UINT8_C(228), UINT8_C( 83) }, { UINT8_C(237), UINT8_C(211), UINT8_C(106), UINT8_C(103), UINT8_C(218), UINT8_C( 6), UINT8_C( 69), UINT8_C(183), UINT8_C( 92), UINT8_C(142), UINT8_C(213), UINT8_C(164), UINT8_C(148), UINT8_C( 98), UINT8_C(185), UINT8_C( 83) } }, { { UINT8_C(166), UINT8_C(175), UINT8_C( 38), UINT8_C(251), UINT8_C( 25), UINT8_C(140), UINT8_C( 98), UINT8_C(250), UINT8_C(103), UINT8_C(247), UINT8_C( 0), UINT8_C(189), UINT8_C( 60), UINT8_C( 50), UINT8_C(117), UINT8_C(216) }, { UINT8_C(142), UINT8_C(162), UINT8_C(102), UINT8_C(237), UINT8_C(119), UINT8_C(211), UINT8_C(145), UINT8_C( 16), UINT8_C(104), UINT8_C(115), UINT8_C(114), UINT8_C(163), UINT8_C( 44), UINT8_C( 86), UINT8_C(246), UINT8_C(211) }, { UINT8_C(175), UINT8_C(251), UINT8_C(140), UINT8_C(250), UINT8_C(247), UINT8_C(189), UINT8_C( 50), UINT8_C(216), UINT8_C(162), UINT8_C(237), UINT8_C(211), UINT8_C( 16), UINT8_C(115), UINT8_C(163), UINT8_C( 86), UINT8_C(211) } }, { { UINT8_C( 6), UINT8_C( 28), UINT8_C(206), UINT8_C( 31), UINT8_C(169), UINT8_C( 49), UINT8_C( 25), UINT8_C( 16), UINT8_C( 40), UINT8_C( 25), UINT8_C(205), UINT8_C(100), UINT8_C( 75), UINT8_C( 66), UINT8_C( 60), UINT8_C(218) }, { UINT8_C(228), UINT8_C(162), UINT8_C(199), UINT8_C( 91), UINT8_C(117), UINT8_C( 89), UINT8_C(107), UINT8_C(221), UINT8_C(204), UINT8_C(221), UINT8_C(128), UINT8_C(248), UINT8_C( 52), UINT8_C(118), UINT8_C(203), UINT8_C( 58) }, { UINT8_C( 28), UINT8_C( 31), UINT8_C( 49), UINT8_C( 16), UINT8_C( 25), UINT8_C(100), UINT8_C( 66), UINT8_C(218), UINT8_C(162), UINT8_C( 91), UINT8_C( 89), UINT8_C(221), UINT8_C(221), UINT8_C(248), UINT8_C(118), UINT8_C( 58) } }, { { UINT8_C(147), UINT8_C(154), UINT8_C( 89), UINT8_C( 60), UINT8_C(203), UINT8_C(115), UINT8_C( 76), UINT8_C(243), UINT8_C(140), UINT8_C( 25), UINT8_C( 87), UINT8_C(216), UINT8_C( 92), UINT8_C(147), UINT8_C(178), UINT8_C( 64) }, { UINT8_C( 53), UINT8_C(121), UINT8_C(156), UINT8_C(170), UINT8_C(210), UINT8_C( 7), UINT8_C(136), UINT8_C(158), UINT8_C(229), UINT8_C( 8), UINT8_C(151), UINT8_C( 25), UINT8_C(127), UINT8_C( 98), UINT8_C( 83), UINT8_C( 18) }, { UINT8_C(154), UINT8_C( 60), UINT8_C(115), UINT8_C(243), UINT8_C( 25), UINT8_C(216), UINT8_C(147), UINT8_C( 64), UINT8_C(121), UINT8_C(170), UINT8_C( 7), UINT8_C(158), UINT8_C( 8), UINT8_C( 25), UINT8_C( 98), UINT8_C( 18) } }, { { UINT8_C(252), UINT8_C(172), UINT8_C( 78), UINT8_C(199), UINT8_C( 31), UINT8_C(154), UINT8_C(186), UINT8_C(172), UINT8_C(179), UINT8_C( 17), UINT8_C(132), UINT8_C( 15), UINT8_C(164), UINT8_C( 54), UINT8_C( 80), UINT8_C(217) }, { UINT8_C(175), UINT8_C(236), UINT8_C(132), UINT8_C(130), UINT8_C(243), UINT8_C( 12), UINT8_C( 32), UINT8_C(216), UINT8_C( 20), UINT8_C(183), UINT8_C(241), UINT8_C(147), UINT8_C( 26), UINT8_C( 68), UINT8_C(165), UINT8_C( 22) }, { UINT8_C(172), UINT8_C(199), UINT8_C(154), UINT8_C(172), UINT8_C( 17), UINT8_C( 15), UINT8_C( 54), UINT8_C(217), UINT8_C(236), UINT8_C(130), UINT8_C( 12), UINT8_C(216), UINT8_C(183), UINT8_C(147), UINT8_C( 68), UINT8_C( 22) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vuzp2q_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; } static int test_simde_vuzp2q_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(37226), UINT16_C(57568), UINT16_C(51742), UINT16_C(48634), UINT16_C(62066), UINT16_C( 8229), UINT16_C(12583), UINT16_C(19540) }, { UINT16_C(48673), UINT16_C(18245), UINT16_C(29777), UINT16_C(48054), UINT16_C(41040), UINT16_C(22301), UINT16_C(43109), UINT16_C(53223) }, { UINT16_C(57568), UINT16_C(48634), UINT16_C( 8229), UINT16_C(19540), UINT16_C(18245), UINT16_C(48054), UINT16_C(22301), UINT16_C(53223) } }, { { UINT16_C(51001), UINT16_C(22448), UINT16_C(43665), UINT16_C( 1044), UINT16_C(14748), UINT16_C(49956), UINT16_C(31082), UINT16_C(35599) }, { UINT16_C(21559), UINT16_C(35283), UINT16_C(35273), UINT16_C( 6468), UINT16_C(25129), UINT16_C(36465), UINT16_C(22538), UINT16_C(17246) }, { UINT16_C(22448), UINT16_C( 1044), UINT16_C(49956), UINT16_C(35599), UINT16_C(35283), UINT16_C( 6468), UINT16_C(36465), UINT16_C(17246) } }, { { UINT16_C( 3615), UINT16_C(45466), UINT16_C(44728), UINT16_C(21941), UINT16_C(55783), UINT16_C(21016), UINT16_C(10322), UINT16_C(35549) }, { UINT16_C(45180), UINT16_C(17683), UINT16_C(22329), UINT16_C(25183), UINT16_C(53433), UINT16_C(50161), UINT16_C(20264), UINT16_C(18182) }, { UINT16_C(45466), UINT16_C(21941), UINT16_C(21016), UINT16_C(35549), UINT16_C(17683), UINT16_C(25183), UINT16_C(50161), UINT16_C(18182) } }, { { UINT16_C(41053), UINT16_C( 5624), UINT16_C(44366), UINT16_C(13930), UINT16_C(33671), UINT16_C(55688), UINT16_C(26027), UINT16_C(10083) }, { UINT16_C(30230), UINT16_C(20333), UINT16_C(52430), UINT16_C(34738), UINT16_C(41884), UINT16_C(50251), UINT16_C(20978), UINT16_C(20235) }, { UINT16_C( 5624), UINT16_C(13930), UINT16_C(55688), UINT16_C(10083), UINT16_C(20333), UINT16_C(34738), UINT16_C(50251), UINT16_C(20235) } }, { { UINT16_C( 1266), UINT16_C(16484), UINT16_C(53169), UINT16_C(14454), UINT16_C(65106), UINT16_C(64786), UINT16_C(30052), UINT16_C(31268) }, { UINT16_C(37356), UINT16_C(47817), UINT16_C(31581), UINT16_C(63809), UINT16_C(35870), UINT16_C( 4285), UINT16_C(51678), UINT16_C(53343) }, { UINT16_C(16484), UINT16_C(14454), UINT16_C(64786), UINT16_C(31268), UINT16_C(47817), UINT16_C(63809), UINT16_C( 4285), UINT16_C(53343) } }, { { UINT16_C(50381), UINT16_C(32272), UINT16_C(34707), UINT16_C(58807), UINT16_C(51589), UINT16_C(59874), UINT16_C( 1598), UINT16_C(10851) }, { UINT16_C(11672), UINT16_C(62948), UINT16_C( 9896), UINT16_C(51183), UINT16_C(44210), UINT16_C(37079), UINT16_C(14197), UINT16_C(16992) }, { UINT16_C(32272), UINT16_C(58807), UINT16_C(59874), UINT16_C(10851), UINT16_C(62948), UINT16_C(51183), UINT16_C(37079), UINT16_C(16992) } }, { { UINT16_C(29179), UINT16_C(36545), UINT16_C(30968), UINT16_C(32115), UINT16_C(21825), UINT16_C(32615), UINT16_C(51803), UINT16_C(62378) }, { UINT16_C(36599), UINT16_C(41193), UINT16_C(55476), UINT16_C(26471), UINT16_C(16004), UINT16_C(64247), UINT16_C(22645), UINT16_C(28732) }, { UINT16_C(36545), UINT16_C(32115), UINT16_C(32615), UINT16_C(62378), UINT16_C(41193), UINT16_C(26471), UINT16_C(64247), UINT16_C(28732) } }, { { UINT16_C(64969), UINT16_C(49662), UINT16_C(29045), UINT16_C(46654), UINT16_C(42438), UINT16_C( 8758), UINT16_C(57456), UINT16_C(26389) }, { UINT16_C(65134), UINT16_C( 8967), UINT16_C(28374), UINT16_C(23434), UINT16_C(33197), UINT16_C( 8789), UINT16_C(37337), UINT16_C(41619) }, { UINT16_C(49662), UINT16_C(46654), UINT16_C( 8758), UINT16_C(26389), UINT16_C( 8967), UINT16_C(23434), UINT16_C( 8789), UINT16_C(41619) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vuzp2q_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_vuzp2q_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(2900651911), UINT32_C(2436311970), UINT32_C(3817606193), UINT32_C(2719667300) }, { UINT32_C(2172837903), UINT32_C(3994048712), UINT32_C( 856971193), UINT32_C(2581448722) }, { UINT32_C(2436311970), UINT32_C(2719667300), UINT32_C(3994048712), UINT32_C(2581448722) } }, { { UINT32_C(3662070072), UINT32_C( 510361069), UINT32_C(3976329353), UINT32_C(3700366284) }, { UINT32_C(3294433787), UINT32_C( 598895978), UINT32_C(3596011204), UINT32_C(3329241998) }, { UINT32_C( 510361069), UINT32_C(3700366284), UINT32_C( 598895978), UINT32_C(3329241998) } }, { { UINT32_C(3802183413), UINT32_C(3170896947), UINT32_C(3500802308), UINT32_C( 413940253) }, { UINT32_C(3051096651), UINT32_C(1020890743), UINT32_C(3809619796), UINT32_C(1487503971) }, { UINT32_C(3170896947), UINT32_C( 413940253), UINT32_C(1020890743), UINT32_C(1487503971) } }, { { UINT32_C(1815759416), UINT32_C(1512651350), UINT32_C(1479201595), UINT32_C(1500632845) }, { UINT32_C(1477332449), UINT32_C( 815065052), UINT32_C(2048108311), UINT32_C(1657978153) }, { UINT32_C(1512651350), UINT32_C(1500632845), UINT32_C( 815065052), UINT32_C(1657978153) } }, { { UINT32_C(1573784583), UINT32_C(2176317254), UINT32_C(3638223306), UINT32_C(2570144696) }, { UINT32_C(1962033048), UINT32_C(1051035175), UINT32_C(1471723565), UINT32_C(2092534389) }, { UINT32_C(2176317254), UINT32_C(2570144696), UINT32_C(1051035175), UINT32_C(2092534389) } }, { { UINT32_C(3705243542), UINT32_C(1214091390), UINT32_C( 706754418), UINT32_C( 465850754) }, { UINT32_C(3096426129), UINT32_C(1794520124), UINT32_C(1656860397), UINT32_C(3470752312) }, { UINT32_C(1214091390), UINT32_C( 465850754), UINT32_C(1794520124), UINT32_C(3470752312) } }, { { UINT32_C(2141894657), UINT32_C(3150382921), UINT32_C(3253069887), UINT32_C(3403458873) }, { UINT32_C(2625793119), UINT32_C(2366011552), UINT32_C(1592837926), UINT32_C(1110232897) }, { UINT32_C(3150382921), UINT32_C(3403458873), UINT32_C(2366011552), UINT32_C(1110232897) } }, { { UINT32_C(3502364295), UINT32_C( 495683806), UINT32_C(2866704752), UINT32_C(2054470426) }, { UINT32_C(3340171047), UINT32_C(2522160239), UINT32_C( 619988451), UINT32_C(2607161620) }, { UINT32_C( 495683806), UINT32_C(2054470426), UINT32_C(2522160239), UINT32_C(2607161620) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vuzp2q_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_vuzp2q_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; uint64_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C(10128971028910510737), UINT64_C(17729527479315999417) }, { UINT64_C( 3335331942345100554), UINT64_C(11269845798600382417) }, { UINT64_C(17729527479315999417), UINT64_C(11269845798600382417) } }, { { UINT64_C(16475121993802503579), UINT64_C(14326970468804119694) }, { UINT64_C(11373276584639549932), UINT64_C(18139705864536774166) }, { UINT64_C(14326970468804119694), UINT64_C(18139705864536774166) } }, { { UINT64_C( 9372473455084277507), UINT64_C( 3532002181234206082) }, { UINT64_C(14910203097960200578), UINT64_C(16492371921639289732) }, { UINT64_C( 3532002181234206082), UINT64_C(16492371921639289732) } }, { { UINT64_C( 3220394418092766011), UINT64_C(14069084986605703112) }, { UINT64_C( 5381235056558806823), UINT64_C( 9544269215124331695) }, { UINT64_C(14069084986605703112), UINT64_C( 9544269215124331695) } }, { { UINT64_C( 8203135926171567617), UINT64_C(15454311783178649971) }, { UINT64_C( 3883619683466119637), UINT64_C( 2579500605500691200) }, { UINT64_C(15454311783178649971), UINT64_C( 2579500605500691200) } }, { { UINT64_C( 6330836097350664128), UINT64_C(17108193226076674790) }, { UINT64_C( 9640075422859797005), UINT64_C( 9927792619412164225) }, { UINT64_C(17108193226076674790), UINT64_C( 9927792619412164225) } }, { { UINT64_C( 2401771254059341082), UINT64_C(17306527302493298728) }, { UINT64_C(11839765661985519172), UINT64_C(15144777308092964314) }, { UINT64_C(17306527302493298728), UINT64_C(15144777308092964314) } }, { { UINT64_C( 1656802321261076643), UINT64_C( 2042527392187463044) }, { UINT64_C( 144455106247973689), UINT64_C(18076835750634408275) }, { UINT64_C( 2042527392187463044), UINT64_C(18076835750634408275) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_uint64x2_t r = simde_vuzp2q_u64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vuzp2_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp2_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp2_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp2_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp2_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp2_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp2_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp2q_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp2q_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp2q_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp2q_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp2q_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp2q_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp2q_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp2q_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp2q_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp2q_u64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" simde-0.7.2/test/arm/neon/zip.c000066400000000000000000002042601400333146700162710ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN zip #include #include #if !defined(SIMDE_BUG_INTEL_857088) static int test_simde_vzip_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[2]; simde_float32 b[2]; simde_float32 r[2][2]; } test_vec[] = { { { SIMDE_FLOAT32_C( -358.82), SIMDE_FLOAT32_C( 78.84) }, { SIMDE_FLOAT32_C( 983.85), SIMDE_FLOAT32_C( -720.74) }, { { SIMDE_FLOAT32_C( -358.82), SIMDE_FLOAT32_C( 983.85) }, { SIMDE_FLOAT32_C( 78.84), SIMDE_FLOAT32_C( -720.74) }, }, }, { { SIMDE_FLOAT32_C( 775.76), SIMDE_FLOAT32_C( 749.10) }, { SIMDE_FLOAT32_C( -498.97), SIMDE_FLOAT32_C( 508.88) }, { { SIMDE_FLOAT32_C( 775.76), SIMDE_FLOAT32_C( -498.97) }, { SIMDE_FLOAT32_C( 749.10), SIMDE_FLOAT32_C( 508.88) }, }, }, { { SIMDE_FLOAT32_C( -772.99), SIMDE_FLOAT32_C( -138.42) }, { SIMDE_FLOAT32_C( 768.93), SIMDE_FLOAT32_C( -389.94) }, { { SIMDE_FLOAT32_C( -772.99), SIMDE_FLOAT32_C( 768.93) }, { SIMDE_FLOAT32_C( -138.42), SIMDE_FLOAT32_C( -389.94) }, }, }, { { SIMDE_FLOAT32_C( 998.17), SIMDE_FLOAT32_C( -286.00) }, { SIMDE_FLOAT32_C( 648.24), SIMDE_FLOAT32_C( -347.96) }, { { SIMDE_FLOAT32_C( 998.17), SIMDE_FLOAT32_C( 648.24) }, { SIMDE_FLOAT32_C( -286.00), SIMDE_FLOAT32_C( -347.96) }, }, }, { { SIMDE_FLOAT32_C( -694.32), SIMDE_FLOAT32_C( -443.35) }, { SIMDE_FLOAT32_C( -763.65), SIMDE_FLOAT32_C( 250.75) }, { { SIMDE_FLOAT32_C( -694.32), SIMDE_FLOAT32_C( -763.65) }, { SIMDE_FLOAT32_C( -443.35), SIMDE_FLOAT32_C( 250.75) }, }, }, { { SIMDE_FLOAT32_C( -26.88), SIMDE_FLOAT32_C( -994.99) }, { SIMDE_FLOAT32_C( 145.10), SIMDE_FLOAT32_C( -814.15) }, { { SIMDE_FLOAT32_C( -26.88), SIMDE_FLOAT32_C( 145.10) }, { SIMDE_FLOAT32_C( -994.99), SIMDE_FLOAT32_C( -814.15) }, }, }, { { SIMDE_FLOAT32_C( 982.86), SIMDE_FLOAT32_C( -888.57) }, { SIMDE_FLOAT32_C( -651.00), SIMDE_FLOAT32_C( 623.08) }, { { SIMDE_FLOAT32_C( 982.86), SIMDE_FLOAT32_C( -651.00) }, { SIMDE_FLOAT32_C( -888.57), SIMDE_FLOAT32_C( 623.08) }, }, }, { { SIMDE_FLOAT32_C( -237.29), SIMDE_FLOAT32_C( 58.52) }, { SIMDE_FLOAT32_C( -15.19), SIMDE_FLOAT32_C( 403.89) }, { { SIMDE_FLOAT32_C( -237.29), SIMDE_FLOAT32_C( -15.19) }, { SIMDE_FLOAT32_C( 58.52), SIMDE_FLOAT32_C( 403.89) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x2x2_t r = simde_vzip_f32(a, b); simde_test_arm_neon_assert_equal_f32x2(r.val[0], simde_vld1_f32(test_vec[i].r[0]), 1); simde_test_arm_neon_assert_equal_f32x2(r.val[1], simde_vld1_f32(test_vec[i].r[1]), 1); } return 0; } static int test_simde_vzip_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int8_t b[8]; int8_t r[2][8]; } test_vec[] = { { { -INT8_C( 26), -INT8_C( 14), -INT8_C( 73), -INT8_C( 66), INT8_C( 84), -INT8_C( 37), -INT8_C( 44), INT8_C( 48) }, { -INT8_C( 17), INT8_C( 122), -INT8_C( 84), -INT8_C( 31), -INT8_C( 118), INT8_C( 66), -INT8_C( 5), -INT8_C( 66) }, { { -INT8_C( 26), -INT8_C( 17), -INT8_C( 14), INT8_C( 122), -INT8_C( 73), -INT8_C( 84), -INT8_C( 66), -INT8_C( 31) }, { INT8_C( 84), -INT8_C( 118), -INT8_C( 37), INT8_C( 66), -INT8_C( 44), -INT8_C( 5), INT8_C( 48), -INT8_C( 66) }, }, }, { { -INT8_C( 65), -INT8_C( 32), INT8_C( 65), -INT8_C( 83), INT8_C( 64), INT8_C( 26), -INT8_C( 27), -INT8_C( 82) }, { INT8_C( 10), -INT8_C( 11), -INT8_C( 8), INT8_C( 126), INT8_MAX, INT8_C( 56), -INT8_C( 15), INT8_C( 101) }, { { -INT8_C( 65), INT8_C( 10), -INT8_C( 32), -INT8_C( 11), INT8_C( 65), -INT8_C( 8), -INT8_C( 83), INT8_C( 126) }, { INT8_C( 64), INT8_MAX, INT8_C( 26), INT8_C( 56), -INT8_C( 27), -INT8_C( 15), -INT8_C( 82), INT8_C( 101) }, }, }, { { INT8_C( 43), -INT8_C( 87), INT8_C( 35), INT8_MAX, -INT8_C( 124), -INT8_C( 9), -INT8_C( 80), INT8_C( 115) }, { INT8_C( 114), INT8_C( 92), INT8_C( 85), -INT8_C( 4), -INT8_C( 98), INT8_C( 80), -INT8_C( 70), INT8_C( 94) }, { { INT8_C( 43), INT8_C( 114), -INT8_C( 87), INT8_C( 92), INT8_C( 35), INT8_C( 85), INT8_MAX, -INT8_C( 4) }, { -INT8_C( 124), -INT8_C( 98), -INT8_C( 9), INT8_C( 80), -INT8_C( 80), -INT8_C( 70), INT8_C( 115), INT8_C( 94) }, }, }, { { INT8_C( 48), -INT8_C( 5), INT8_C( 11), INT8_C( 113), INT8_C( 21), -INT8_C( 16), INT8_C( 31), INT8_C( 32) }, { -INT8_C( 27), INT8_C( 24), -INT8_C( 98), INT8_C( 100), INT8_C( 80), -INT8_C( 112), -INT8_C( 55), INT8_C( 123) }, { { INT8_C( 48), -INT8_C( 27), -INT8_C( 5), INT8_C( 24), INT8_C( 11), -INT8_C( 98), INT8_C( 113), INT8_C( 100) }, { INT8_C( 21), INT8_C( 80), -INT8_C( 16), -INT8_C( 112), INT8_C( 31), -INT8_C( 55), INT8_C( 32), INT8_C( 123) }, }, }, { { INT8_C( 57), -INT8_C( 19), -INT8_C( 5), -INT8_C( 67), -INT8_C( 28), -INT8_C( 85), INT8_C( 49), INT8_C( 86) }, { INT8_C( 7), -INT8_C( 122), INT8_C( 82), -INT8_C( 90), -INT8_C( 42), INT8_C( 13), INT8_C( 4), INT8_C( 6) }, { { INT8_C( 57), INT8_C( 7), -INT8_C( 19), -INT8_C( 122), -INT8_C( 5), INT8_C( 82), -INT8_C( 67), -INT8_C( 90) }, { -INT8_C( 28), -INT8_C( 42), -INT8_C( 85), INT8_C( 13), INT8_C( 49), INT8_C( 4), INT8_C( 86), INT8_C( 6) }, }, }, { { INT8_C( 8), INT8_C( 15), INT8_C( 119), INT8_C( 30), -INT8_C( 1), -INT8_C( 105), INT8_C( 62), -INT8_C( 28) }, { -INT8_C( 81), -INT8_C( 36), INT8_C( 72), -INT8_C( 1), INT8_C( 108), INT8_C( 17), INT8_C( 123), -INT8_C( 91) }, { { INT8_C( 8), -INT8_C( 81), INT8_C( 15), -INT8_C( 36), INT8_C( 119), INT8_C( 72), INT8_C( 30), -INT8_C( 1) }, { -INT8_C( 1), INT8_C( 108), -INT8_C( 105), INT8_C( 17), INT8_C( 62), INT8_C( 123), -INT8_C( 28), -INT8_C( 91) }, }, }, { { -INT8_C( 2), INT8_C( 118), INT8_C( 99), -INT8_C( 29), INT8_C( 33), -INT8_C( 108), INT8_C( 57), INT8_C( 40) }, { INT8_C( 26), -INT8_C( 116), -INT8_C( 50), -INT8_C( 16), -INT8_C( 103), -INT8_C( 46), -INT8_C( 10), -INT8_C( 95) }, { { -INT8_C( 2), INT8_C( 26), INT8_C( 118), -INT8_C( 116), INT8_C( 99), -INT8_C( 50), -INT8_C( 29), -INT8_C( 16) }, { INT8_C( 33), -INT8_C( 103), -INT8_C( 108), -INT8_C( 46), INT8_C( 57), -INT8_C( 10), INT8_C( 40), -INT8_C( 95) }, }, }, { { -INT8_C( 31), INT8_C( 110), -INT8_C( 65), -INT8_C( 32), INT8_C( 5), -INT8_C( 3), -INT8_C( 60), -INT8_C( 76) }, { -INT8_C( 38), INT8_C( 12), -INT8_C( 77), INT8_C( 70), INT8_C( 30), INT8_C( 46), -INT8_C( 20), INT8_C( 28) }, { { -INT8_C( 31), -INT8_C( 38), INT8_C( 110), INT8_C( 12), -INT8_C( 65), -INT8_C( 77), -INT8_C( 32), INT8_C( 70) }, { INT8_C( 5), INT8_C( 30), -INT8_C( 3), INT8_C( 46), -INT8_C( 60), -INT8_C( 20), -INT8_C( 76), INT8_C( 28) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8x2_t r = simde_vzip_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r.val[0], simde_vld1_s8(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_i8x8(r.val[1], simde_vld1_s8(test_vec[i].r[1])); } return 0; } static int test_simde_vzip_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int16_t b[4]; int16_t r[2][4]; } test_vec[] = { { { -INT16_C( 26683), INT16_C( 14034), -INT16_C( 3551), INT16_C( 32721) }, { INT16_C( 2584), -INT16_C( 16387), -INT16_C( 1878), INT16_C( 21655) }, { { -INT16_C( 26683), INT16_C( 2584), INT16_C( 14034), -INT16_C( 16387) }, { -INT16_C( 3551), -INT16_C( 1878), INT16_C( 32721), INT16_C( 21655) }, }, }, { { -INT16_C( 3666), -INT16_C( 14488), INT16_C( 5375), INT16_C( 21517) }, { -INT16_C( 18844), INT16_C( 26070), -INT16_C( 14981), INT16_C( 16854) }, { { -INT16_C( 3666), -INT16_C( 18844), -INT16_C( 14488), INT16_C( 26070) }, { INT16_C( 5375), -INT16_C( 14981), INT16_C( 21517), INT16_C( 16854) }, }, }, { { -INT16_C( 22436), INT16_C( 32119), INT16_C( 18586), -INT16_C( 19716) }, { -INT16_C( 1454), -INT16_C( 910), INT16_C( 2546), -INT16_C( 24239) }, { { -INT16_C( 22436), -INT16_C( 1454), INT16_C( 32119), -INT16_C( 910) }, { INT16_C( 18586), INT16_C( 2546), -INT16_C( 19716), -INT16_C( 24239) }, }, }, { { -INT16_C( 17926), -INT16_C( 1688), INT16_C( 30157), INT16_C( 12621) }, { INT16_C( 9003), -INT16_C( 22633), INT16_C( 28136), INT16_C( 17640) }, { { -INT16_C( 17926), INT16_C( 9003), -INT16_C( 1688), -INT16_C( 22633) }, { INT16_C( 30157), INT16_C( 28136), INT16_C( 12621), INT16_C( 17640) }, }, }, { { INT16_C( 24341), -INT16_C( 20543), -INT16_C( 16728), -INT16_C( 1439) }, { -INT16_C( 11336), -INT16_C( 21769), INT16_C( 18653), -INT16_C( 10421) }, { { INT16_C( 24341), -INT16_C( 11336), -INT16_C( 20543), -INT16_C( 21769) }, { -INT16_C( 16728), INT16_C( 18653), -INT16_C( 1439), -INT16_C( 10421) }, }, }, { { -INT16_C( 19455), -INT16_C( 12335), INT16_C( 7721), INT16_C( 21760) }, { -INT16_C( 26814), INT16_C( 11004), -INT16_C( 7164), INT16_C( 6511) }, { { -INT16_C( 19455), -INT16_C( 26814), -INT16_C( 12335), INT16_C( 11004) }, { INT16_C( 7721), -INT16_C( 7164), INT16_C( 21760), INT16_C( 6511) }, }, }, { { INT16_C( 12355), -INT16_C( 5176), INT16_C( 10990), -INT16_C( 22810) }, { -INT16_C( 8707), -INT16_C( 9647), -INT16_C( 25563), INT16_C( 9906) }, { { INT16_C( 12355), -INT16_C( 8707), -INT16_C( 5176), -INT16_C( 9647) }, { INT16_C( 10990), -INT16_C( 25563), -INT16_C( 22810), INT16_C( 9906) }, }, }, { { -INT16_C( 31920), INT16_C( 31477), -INT16_C( 2399), -INT16_C( 7217) }, { -INT16_C( 13427), -INT16_C( 28146), INT16_C( 32175), -INT16_C( 3413) }, { { -INT16_C( 31920), -INT16_C( 13427), INT16_C( 31477), -INT16_C( 28146) }, { -INT16_C( 2399), INT16_C( 32175), -INT16_C( 7217), -INT16_C( 3413) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4x2_t r = simde_vzip_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r.val[0], simde_vld1_s16(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_i16x4(r.val[1], simde_vld1_s16(test_vec[i].r[1])); } return 0; } static int test_simde_vzip_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int32_t b[2]; int32_t r[2][2]; } test_vec[] = { { { -INT32_C( 960648692), -INT32_C( 1572658442) }, { -INT32_C( 8820380), -INT32_C( 932021964) }, { { -INT32_C( 960648692), -INT32_C( 8820380) }, { -INT32_C( 1572658442), -INT32_C( 932021964) }, }, }, { { -INT32_C( 1950299668), INT32_C( 91704458) }, { INT32_C( 1292135811), -INT32_C( 1098573390) }, { { -INT32_C( 1950299668), INT32_C( 1292135811) }, { INT32_C( 91704458), -INT32_C( 1098573390) }, }, }, { { -INT32_C( 1165737276), -INT32_C( 933443484) }, { INT32_C( 1724372273), INT32_C( 992885071) }, { { -INT32_C( 1165737276), INT32_C( 1724372273) }, { -INT32_C( 933443484), INT32_C( 992885071) }, }, }, { { -INT32_C( 1916342526), -INT32_C( 1097712325) }, { INT32_C( 1544263338), INT32_C( 1931120815) }, { { -INT32_C( 1916342526), INT32_C( 1544263338) }, { -INT32_C( 1097712325), INT32_C( 1931120815) }, }, }, { { INT32_C( 909025234), -INT32_C( 1728148889) }, { -INT32_C( 1342257824), INT32_C( 32124414) }, { { INT32_C( 909025234), -INT32_C( 1342257824) }, { -INT32_C( 1728148889), INT32_C( 32124414) }, }, }, { { INT32_C( 1468968988), -INT32_C( 1760222995) }, { INT32_C( 1710432438), -INT32_C( 2082927183) }, { { INT32_C( 1468968988), INT32_C( 1710432438) }, { -INT32_C( 1760222995), -INT32_C( 2082927183) }, }, }, { { INT32_C( 330958764), -INT32_C( 240338799) }, { INT32_C( 2090904190), -INT32_C( 209876265) }, { { INT32_C( 330958764), INT32_C( 2090904190) }, { -INT32_C( 240338799), -INT32_C( 209876265) }, }, }, { { INT32_C( 659163962), -INT32_C( 507617493) }, { INT32_C( 826782080), INT32_C( 1806966975) }, { { INT32_C( 659163962), INT32_C( 826782080) }, { -INT32_C( 507617493), INT32_C( 1806966975) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2x2_t r = simde_vzip_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r.val[0], simde_vld1_s32(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_i32x2(r.val[1], simde_vld1_s32(test_vec[i].r[1])); } return 0; } static int test_simde_vzip_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; uint8_t b[8]; uint8_t r[2][8]; } test_vec[] = { { { UINT8_C( 45), UINT8_C(117), UINT8_C(190), UINT8_C(201), UINT8_C( 7), UINT8_C(205), UINT8_C( 18), UINT8_C(175) }, { UINT8_C( 25), UINT8_C( 6), UINT8_C(174), UINT8_C(106), UINT8_C( 95), UINT8_C( 1), UINT8_C( 19), UINT8_C(123) }, { { UINT8_C( 45), UINT8_C( 25), UINT8_C(117), UINT8_C( 6), UINT8_C(190), UINT8_C(174), UINT8_C(201), UINT8_C(106) }, { UINT8_C( 7), UINT8_C( 95), UINT8_C(205), UINT8_C( 1), UINT8_C( 18), UINT8_C( 19), UINT8_C(175), UINT8_C(123) }, }, }, { { UINT8_C( 28), UINT8_C( 48), UINT8_C( 25), UINT8_C( 23), UINT8_C( 60), UINT8_C(125), UINT8_C(102), UINT8_C( 12) }, { UINT8_C(248), UINT8_C( 49), UINT8_C( 20), UINT8_C(252), UINT8_C(118), UINT8_C(241), UINT8_C(127), UINT8_C(163) }, { { UINT8_C( 28), UINT8_C(248), UINT8_C( 48), UINT8_C( 49), UINT8_C( 25), UINT8_C( 20), UINT8_C( 23), UINT8_C(252) }, { UINT8_C( 60), UINT8_C(118), UINT8_C(125), UINT8_C(241), UINT8_C(102), UINT8_C(127), UINT8_C( 12), UINT8_C(163) }, }, }, { { UINT8_C(102), UINT8_C( 61), UINT8_C(109), UINT8_C(109), UINT8_C( 10), UINT8_C(127), UINT8_C( 28), UINT8_C( 36) }, { UINT8_C(134), UINT8_C(202), UINT8_C(142), UINT8_C(229), UINT8_C(204), UINT8_C(161), UINT8_C( 96), UINT8_C(232) }, { { UINT8_C(102), UINT8_C(134), UINT8_C( 61), UINT8_C(202), UINT8_C(109), UINT8_C(142), UINT8_C(109), UINT8_C(229) }, { UINT8_C( 10), UINT8_C(204), UINT8_C(127), UINT8_C(161), UINT8_C( 28), UINT8_C( 96), UINT8_C( 36), UINT8_C(232) }, }, }, { { UINT8_C(210), UINT8_C(121), UINT8_MAX, UINT8_C( 14), UINT8_C(246), UINT8_C(101), UINT8_C( 27), UINT8_C(238) }, { UINT8_C(150), UINT8_C( 47), UINT8_C(234), UINT8_C( 12), UINT8_C( 32), UINT8_C(105), UINT8_C(176), UINT8_C(135) }, { { UINT8_C(210), UINT8_C(150), UINT8_C(121), UINT8_C( 47), UINT8_MAX, UINT8_C(234), UINT8_C( 14), UINT8_C( 12) }, { UINT8_C(246), UINT8_C( 32), UINT8_C(101), UINT8_C(105), UINT8_C( 27), UINT8_C(176), UINT8_C(238), UINT8_C(135) }, }, }, { { UINT8_C(167), UINT8_C( 29), UINT8_C(244), UINT8_C(177), UINT8_C(156), UINT8_C( 17), UINT8_C(213), UINT8_C( 34) }, { UINT8_C(219), UINT8_C(100), UINT8_C( 7), UINT8_C(167), UINT8_C( 5), UINT8_C(103), UINT8_C(144), UINT8_C(215) }, { { UINT8_C(167), UINT8_C(219), UINT8_C( 29), UINT8_C(100), UINT8_C(244), UINT8_C( 7), UINT8_C(177), UINT8_C(167) }, { UINT8_C(156), UINT8_C( 5), UINT8_C( 17), UINT8_C(103), UINT8_C(213), UINT8_C(144), UINT8_C( 34), UINT8_C(215) }, }, }, { { UINT8_C(224), UINT8_C(143), UINT8_C(230), UINT8_C(214), UINT8_C(245), UINT8_C( 1), UINT8_C(196), UINT8_C(139) }, { UINT8_C( 48), UINT8_C(174), UINT8_C(152), UINT8_C( 81), UINT8_C( 24), UINT8_C( 72), UINT8_C(216), UINT8_C(191) }, { { UINT8_C(224), UINT8_C( 48), UINT8_C(143), UINT8_C(174), UINT8_C(230), UINT8_C(152), UINT8_C(214), UINT8_C( 81) }, { UINT8_C(245), UINT8_C( 24), UINT8_C( 1), UINT8_C( 72), UINT8_C(196), UINT8_C(216), UINT8_C(139), UINT8_C(191) }, }, }, { { UINT8_C(101), UINT8_C(204), UINT8_C(112), UINT8_C( 1), UINT8_C(221), UINT8_C( 70), UINT8_C( 36), UINT8_C(185) }, { UINT8_C(170), UINT8_C( 43), UINT8_C( 96), UINT8_C(175), UINT8_C(147), UINT8_C(240), UINT8_C(135), UINT8_C(115) }, { { UINT8_C(101), UINT8_C(170), UINT8_C(204), UINT8_C( 43), UINT8_C(112), UINT8_C( 96), UINT8_C( 1), UINT8_C(175) }, { UINT8_C(221), UINT8_C(147), UINT8_C( 70), UINT8_C(240), UINT8_C( 36), UINT8_C(135), UINT8_C(185), UINT8_C(115) }, }, }, { { UINT8_C(128), UINT8_C(109), UINT8_C( 74), UINT8_C(117), UINT8_C(110), UINT8_C( 14), UINT8_C( 0), UINT8_C(158) }, { UINT8_C(189), UINT8_C(152), UINT8_C(239), UINT8_C(213), UINT8_C(224), UINT8_C(199), UINT8_C(148), UINT8_C( 69) }, { { UINT8_C(128), UINT8_C(189), UINT8_C(109), UINT8_C(152), UINT8_C( 74), UINT8_C(239), UINT8_C(117), UINT8_C(213) }, { UINT8_C(110), UINT8_C(224), UINT8_C( 14), UINT8_C(199), UINT8_C( 0), UINT8_C(148), UINT8_C(158), UINT8_C( 69) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8x2_t r = simde_vzip_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r.val[0], simde_vld1_u8(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_u8x8(r.val[1], simde_vld1_u8(test_vec[i].r[1])); } return 0; } static int test_simde_vzip_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[2][4]; } test_vec[] = { { { UINT16_C(39442), UINT16_C(17230), UINT16_C(36311), UINT16_C(63975) }, { UINT16_C(17092), UINT16_C(21998), UINT16_C(48945), UINT16_C( 9146) }, { { UINT16_C(39442), UINT16_C(17092), UINT16_C(17230), UINT16_C(21998) }, { UINT16_C(36311), UINT16_C(48945), UINT16_C(63975), UINT16_C( 9146) }, }, }, { { UINT16_C(23687), UINT16_C(28300), UINT16_C(30888), UINT16_C(65167) }, { UINT16_C(17067), UINT16_C(55130), UINT16_C(36877), UINT16_C( 8076) }, { { UINT16_C(23687), UINT16_C(17067), UINT16_C(28300), UINT16_C(55130) }, { UINT16_C(30888), UINT16_C(36877), UINT16_C(65167), UINT16_C( 8076) }, }, }, { { UINT16_C(55850), UINT16_C( 611), UINT16_C(19047), UINT16_C(11515) }, { UINT16_C(60044), UINT16_C(48769), UINT16_C(15273), UINT16_C(12513) }, { { UINT16_C(55850), UINT16_C(60044), UINT16_C( 611), UINT16_C(48769) }, { UINT16_C(19047), UINT16_C(15273), UINT16_C(11515), UINT16_C(12513) }, }, }, { { UINT16_C(28055), UINT16_C(16542), UINT16_C(11749), UINT16_C(36926) }, { UINT16_C(39023), UINT16_C(32103), UINT16_C(62248), UINT16_C(21404) }, { { UINT16_C(28055), UINT16_C(39023), UINT16_C(16542), UINT16_C(32103) }, { UINT16_C(11749), UINT16_C(62248), UINT16_C(36926), UINT16_C(21404) }, }, }, { { UINT16_C(65485), UINT16_C(13653), UINT16_C(20553), UINT16_C(54881) }, { UINT16_C(57914), UINT16_C(58260), UINT16_C(29982), UINT16_C(46356) }, { { UINT16_C(65485), UINT16_C(57914), UINT16_C(13653), UINT16_C(58260) }, { UINT16_C(20553), UINT16_C(29982), UINT16_C(54881), UINT16_C(46356) }, }, }, { { UINT16_C(45794), UINT16_C(51445), UINT16_C(13280), UINT16_C(20312) }, { UINT16_C(49356), UINT16_C(62668), UINT16_C(27059), UINT16_C(33095) }, { { UINT16_C(45794), UINT16_C(49356), UINT16_C(51445), UINT16_C(62668) }, { UINT16_C(13280), UINT16_C(27059), UINT16_C(20312), UINT16_C(33095) }, }, }, { { UINT16_C(40040), UINT16_C(45750), UINT16_C( 6125), UINT16_C(10120) }, { UINT16_C( 7417), UINT16_C( 5899), UINT16_C( 8081), UINT16_C(29645) }, { { UINT16_C(40040), UINT16_C( 7417), UINT16_C(45750), UINT16_C( 5899) }, { UINT16_C( 6125), UINT16_C( 8081), UINT16_C(10120), UINT16_C(29645) }, }, }, { { UINT16_C(49873), UINT16_C(45371), UINT16_C(38134), UINT16_C(49665) }, { UINT16_C(52564), UINT16_C( 1974), UINT16_C(65078), UINT16_C(40840) }, { { UINT16_C(49873), UINT16_C(52564), UINT16_C(45371), UINT16_C( 1974) }, { UINT16_C(38134), UINT16_C(65078), UINT16_C(49665), UINT16_C(40840) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4x2_t r = simde_vzip_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r.val[0], simde_vld1_u16(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_u16x4(r.val[1], simde_vld1_u16(test_vec[i].r[1])); } return 0; } static int test_simde_vzip_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2][2]; } test_vec[] = { { { UINT32_C(2533684775), UINT32_C(2516841981) }, { UINT32_C( 791409457), UINT32_C(2232449215) }, { { UINT32_C(2533684775), UINT32_C( 791409457) }, { UINT32_C(2516841981), UINT32_C(2232449215) }, }, }, { { UINT32_C(3580136835), UINT32_C(1752780447) }, { UINT32_C(2448707753), UINT32_C(1837365830) }, { { UINT32_C(3580136835), UINT32_C(2448707753) }, { UINT32_C(1752780447), UINT32_C(1837365830) }, }, }, { { UINT32_C(3976497136), UINT32_C(2927888252) }, { UINT32_C(3135090683), UINT32_C(2856316455) }, { { UINT32_C(3976497136), UINT32_C(3135090683) }, { UINT32_C(2927888252), UINT32_C(2856316455) }, }, }, { { UINT32_C( 444638075), UINT32_C(2592274929) }, { UINT32_C(2402056009), UINT32_C(1644015473) }, { { UINT32_C( 444638075), UINT32_C(2402056009) }, { UINT32_C(2592274929), UINT32_C(1644015473) }, }, }, { { UINT32_C(3008233783), UINT32_C( 73519625) }, { UINT32_C(2847817601), UINT32_C(2824076589) }, { { UINT32_C(3008233783), UINT32_C(2847817601) }, { UINT32_C( 73519625), UINT32_C(2824076589) }, }, }, { { UINT32_C(2462241697), UINT32_C( 355288268) }, { UINT32_C( 749033915), UINT32_C(1066246664) }, { { UINT32_C(2462241697), UINT32_C( 749033915) }, { UINT32_C( 355288268), UINT32_C(1066246664) }, }, }, { { UINT32_C(2901662883), UINT32_C( 816862382) }, { UINT32_C(3235475091), UINT32_C( 224930924) }, { { UINT32_C(2901662883), UINT32_C(3235475091) }, { UINT32_C( 816862382), UINT32_C( 224930924) }, }, }, { { UINT32_C(3432983040), UINT32_C( 719506543) }, { UINT32_C( 777488165), UINT32_C(3429753897) }, { { UINT32_C(3432983040), UINT32_C( 777488165) }, { UINT32_C( 719506543), UINT32_C(3429753897) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2x2_t r = simde_vzip_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r.val[0], simde_vld1_u32(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_u32x2(r.val[1], simde_vld1_u32(test_vec[i].r[1])); } return 0; } static int test_simde_vzipq_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; simde_float32 b[4]; simde_float32 r[2][4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -748.99), SIMDE_FLOAT32_C( -706.11), SIMDE_FLOAT32_C( 869.97), SIMDE_FLOAT32_C( 247.18) }, { SIMDE_FLOAT32_C( 281.30), SIMDE_FLOAT32_C( -318.87), SIMDE_FLOAT32_C( -402.07), SIMDE_FLOAT32_C( 850.87) }, { { SIMDE_FLOAT32_C( -748.99), SIMDE_FLOAT32_C( 281.30), SIMDE_FLOAT32_C( -706.11), SIMDE_FLOAT32_C( -318.87) }, { SIMDE_FLOAT32_C( 869.97), SIMDE_FLOAT32_C( -402.07), SIMDE_FLOAT32_C( 247.18), SIMDE_FLOAT32_C( 850.87) }, }, }, { { SIMDE_FLOAT32_C( -830.23), SIMDE_FLOAT32_C( -661.11), SIMDE_FLOAT32_C( -631.24), SIMDE_FLOAT32_C( 130.32) }, { SIMDE_FLOAT32_C( -203.49), SIMDE_FLOAT32_C( 634.65), SIMDE_FLOAT32_C( 396.96), SIMDE_FLOAT32_C( 711.13) }, { { SIMDE_FLOAT32_C( -830.23), SIMDE_FLOAT32_C( -203.49), SIMDE_FLOAT32_C( -661.11), SIMDE_FLOAT32_C( 634.65) }, { SIMDE_FLOAT32_C( -631.24), SIMDE_FLOAT32_C( 396.96), SIMDE_FLOAT32_C( 130.32), SIMDE_FLOAT32_C( 711.13) }, }, }, { { SIMDE_FLOAT32_C( 282.82), SIMDE_FLOAT32_C( 202.09), SIMDE_FLOAT32_C( -453.11), SIMDE_FLOAT32_C( 640.01) }, { SIMDE_FLOAT32_C( 822.42), SIMDE_FLOAT32_C( 187.30), SIMDE_FLOAT32_C( 18.58), SIMDE_FLOAT32_C( 809.63) }, { { SIMDE_FLOAT32_C( 282.82), SIMDE_FLOAT32_C( 822.42), SIMDE_FLOAT32_C( 202.09), SIMDE_FLOAT32_C( 187.30) }, { SIMDE_FLOAT32_C( -453.11), SIMDE_FLOAT32_C( 18.58), SIMDE_FLOAT32_C( 640.01), SIMDE_FLOAT32_C( 809.63) }, }, }, { { SIMDE_FLOAT32_C( -225.67), SIMDE_FLOAT32_C( 681.44), SIMDE_FLOAT32_C( 463.00), SIMDE_FLOAT32_C( 309.35) }, { SIMDE_FLOAT32_C( 589.93), SIMDE_FLOAT32_C( 877.78), SIMDE_FLOAT32_C( 246.23), SIMDE_FLOAT32_C( 840.94) }, { { SIMDE_FLOAT32_C( -225.67), SIMDE_FLOAT32_C( 589.93), SIMDE_FLOAT32_C( 681.44), SIMDE_FLOAT32_C( 877.78) }, { SIMDE_FLOAT32_C( 463.00), SIMDE_FLOAT32_C( 246.23), SIMDE_FLOAT32_C( 309.35), SIMDE_FLOAT32_C( 840.94) }, }, }, { { SIMDE_FLOAT32_C( -828.33), SIMDE_FLOAT32_C( 116.19), SIMDE_FLOAT32_C( 88.12), SIMDE_FLOAT32_C( 452.97) }, { SIMDE_FLOAT32_C( 797.32), SIMDE_FLOAT32_C( 686.05), SIMDE_FLOAT32_C( 303.84), SIMDE_FLOAT32_C( 967.09) }, { { SIMDE_FLOAT32_C( -828.33), SIMDE_FLOAT32_C( 797.32), SIMDE_FLOAT32_C( 116.19), SIMDE_FLOAT32_C( 686.05) }, { SIMDE_FLOAT32_C( 88.12), SIMDE_FLOAT32_C( 303.84), SIMDE_FLOAT32_C( 452.97), SIMDE_FLOAT32_C( 967.09) }, }, }, { { SIMDE_FLOAT32_C( -975.06), SIMDE_FLOAT32_C( 672.60), SIMDE_FLOAT32_C( 97.41), SIMDE_FLOAT32_C( -178.55) }, { SIMDE_FLOAT32_C( 307.24), SIMDE_FLOAT32_C( -505.62), SIMDE_FLOAT32_C( -467.42), SIMDE_FLOAT32_C( -409.94) }, { { SIMDE_FLOAT32_C( -975.06), SIMDE_FLOAT32_C( 307.24), SIMDE_FLOAT32_C( 672.60), SIMDE_FLOAT32_C( -505.62) }, { SIMDE_FLOAT32_C( 97.41), SIMDE_FLOAT32_C( -467.42), SIMDE_FLOAT32_C( -178.55), SIMDE_FLOAT32_C( -409.94) }, }, }, { { SIMDE_FLOAT32_C( 696.47), SIMDE_FLOAT32_C( 79.47), SIMDE_FLOAT32_C( -769.92), SIMDE_FLOAT32_C( 518.88) }, { SIMDE_FLOAT32_C( -733.23), SIMDE_FLOAT32_C( 248.65), SIMDE_FLOAT32_C( 328.52), SIMDE_FLOAT32_C( 41.09) }, { { SIMDE_FLOAT32_C( 696.47), SIMDE_FLOAT32_C( -733.23), SIMDE_FLOAT32_C( 79.47), SIMDE_FLOAT32_C( 248.65) }, { SIMDE_FLOAT32_C( -769.92), SIMDE_FLOAT32_C( 328.52), SIMDE_FLOAT32_C( 518.88), SIMDE_FLOAT32_C( 41.09) }, }, }, { { SIMDE_FLOAT32_C( -69.90), SIMDE_FLOAT32_C( -208.48), SIMDE_FLOAT32_C( -649.56), SIMDE_FLOAT32_C( -479.97) }, { SIMDE_FLOAT32_C( -330.70), SIMDE_FLOAT32_C( 596.67), SIMDE_FLOAT32_C( -639.03), SIMDE_FLOAT32_C( -159.03) }, { { SIMDE_FLOAT32_C( -69.90), SIMDE_FLOAT32_C( -330.70), SIMDE_FLOAT32_C( -208.48), SIMDE_FLOAT32_C( 596.67) }, { SIMDE_FLOAT32_C( -649.56), SIMDE_FLOAT32_C( -639.03), SIMDE_FLOAT32_C( -479.97), SIMDE_FLOAT32_C( -159.03) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4x2_t r = simde_vzipq_f32(a, b); simde_test_arm_neon_assert_equal_f32x4(r.val[0], simde_vld1q_f32(test_vec[i].r[0]), 1); simde_test_arm_neon_assert_equal_f32x4(r.val[1], simde_vld1q_f32(test_vec[i].r[1]), 1); } return 0; } static int test_simde_vzipq_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int8_t b[16]; int8_t r[2][16]; } test_vec[] = { { { INT8_C( 106), -INT8_C( 117), -INT8_C( 106), -INT8_C( 113), -INT8_C( 27), -INT8_C( 54), INT8_C( 13), -INT8_C( 22), -INT8_C( 117), INT8_C( 99), INT8_C( 3), INT8_C( 35), INT8_C( 32), -INT8_C( 99), -INT8_C( 83), -INT8_C( 26) }, { INT8_C( 80), -INT8_C( 97), -INT8_C( 66), INT8_C( 69), INT8_C( 11), -INT8_C( 96), -INT8_C( 40), INT8_C( 91), -INT8_C( 122), INT8_C( 27), -INT8_C( 48), INT8_C( 8), INT8_C( 78), INT8_C( 103), -INT8_C( 89), -INT8_C( 72) }, { { INT8_C( 106), INT8_C( 80), -INT8_C( 117), -INT8_C( 97), -INT8_C( 106), -INT8_C( 66), -INT8_C( 113), INT8_C( 69), -INT8_C( 27), INT8_C( 11), -INT8_C( 54), -INT8_C( 96), INT8_C( 13), -INT8_C( 40), -INT8_C( 22), INT8_C( 91) }, { -INT8_C( 117), -INT8_C( 122), INT8_C( 99), INT8_C( 27), INT8_C( 3), -INT8_C( 48), INT8_C( 35), INT8_C( 8), INT8_C( 32), INT8_C( 78), -INT8_C( 99), INT8_C( 103), -INT8_C( 83), -INT8_C( 89), -INT8_C( 26), -INT8_C( 72) }, }, }, { { -INT8_C( 14), INT8_C( 61), INT8_C( 72), -INT8_C( 41), INT8_C( 7), INT8_C( 85), -INT8_C( 62), -INT8_C( 110), -INT8_C( 72), -INT8_C( 59), -INT8_C( 75), -INT8_C( 39), INT8_C( 98), INT8_C( 98), -INT8_C( 65), -INT8_C( 78) }, { INT8_C( 1), INT8_C( 126), -INT8_C( 8), INT8_C( 12), INT8_C( 30), -INT8_C( 48), INT8_C( 103), -INT8_C( 91), -INT8_C( 21), INT8_C( 55), -INT8_C( 83), INT8_C( 57), -INT8_C( 98), INT8_C( 84), -INT8_C( 14), -INT8_C( 112) }, { { -INT8_C( 14), INT8_C( 1), INT8_C( 61), INT8_C( 126), INT8_C( 72), -INT8_C( 8), -INT8_C( 41), INT8_C( 12), INT8_C( 7), INT8_C( 30), INT8_C( 85), -INT8_C( 48), -INT8_C( 62), INT8_C( 103), -INT8_C( 110), -INT8_C( 91) }, { -INT8_C( 72), -INT8_C( 21), -INT8_C( 59), INT8_C( 55), -INT8_C( 75), -INT8_C( 83), -INT8_C( 39), INT8_C( 57), INT8_C( 98), -INT8_C( 98), INT8_C( 98), INT8_C( 84), -INT8_C( 65), -INT8_C( 14), -INT8_C( 78), -INT8_C( 112) }, }, }, { { -INT8_C( 111), INT8_C( 58), INT8_C( 104), -INT8_C( 104), -INT8_C( 113), INT8_C( 42), INT8_C( 42), INT8_C( 71), -INT8_C( 17), -INT8_C( 33), INT8_C( 32), INT8_C( 82), INT8_C( 65), -INT8_C( 32), INT8_C( 4), INT8_C( 66) }, { INT8_C( 94), -INT8_C( 4), INT8_C( 78), INT8_C( 124), -INT8_C( 51), -INT8_C( 74), INT8_C( 33), -INT8_C( 72), -INT8_C( 19), -INT8_C( 50), -INT8_C( 14), -INT8_C( 116), INT8_C( 34), -INT8_C( 28), INT8_C( 28), -INT8_C( 77) }, { { -INT8_C( 111), INT8_C( 94), INT8_C( 58), -INT8_C( 4), INT8_C( 104), INT8_C( 78), -INT8_C( 104), INT8_C( 124), -INT8_C( 113), -INT8_C( 51), INT8_C( 42), -INT8_C( 74), INT8_C( 42), INT8_C( 33), INT8_C( 71), -INT8_C( 72) }, { -INT8_C( 17), -INT8_C( 19), -INT8_C( 33), -INT8_C( 50), INT8_C( 32), -INT8_C( 14), INT8_C( 82), -INT8_C( 116), INT8_C( 65), INT8_C( 34), -INT8_C( 32), -INT8_C( 28), INT8_C( 4), INT8_C( 28), INT8_C( 66), -INT8_C( 77) }, }, }, { { INT8_C( 30), -INT8_C( 124), INT8_C( 75), -INT8_C( 83), -INT8_C( 82), INT8_C( 117), -INT8_C( 12), -INT8_C( 98), INT8_C( 84), INT8_C( 21), -INT8_C( 16), -INT8_C( 107), -INT8_C( 11), -INT8_C( 12), -INT8_C( 41), INT8_C( 83) }, { -INT8_C( 15), INT8_C( 38), -INT8_C( 49), -INT8_C( 66), -INT8_C( 36), -INT8_C( 15), INT8_C( 118), -INT8_C( 55), -INT8_C( 65), INT8_C( 104), INT8_C( 85), -INT8_C( 30), INT8_C( 76), INT8_C( 114), -INT8_C( 107), INT8_C( 106) }, { { INT8_C( 30), -INT8_C( 15), -INT8_C( 124), INT8_C( 38), INT8_C( 75), -INT8_C( 49), -INT8_C( 83), -INT8_C( 66), -INT8_C( 82), -INT8_C( 36), INT8_C( 117), -INT8_C( 15), -INT8_C( 12), INT8_C( 118), -INT8_C( 98), -INT8_C( 55) }, { INT8_C( 84), -INT8_C( 65), INT8_C( 21), INT8_C( 104), -INT8_C( 16), INT8_C( 85), -INT8_C( 107), -INT8_C( 30), -INT8_C( 11), INT8_C( 76), -INT8_C( 12), INT8_C( 114), -INT8_C( 41), -INT8_C( 107), INT8_C( 83), INT8_C( 106) }, }, }, { { -INT8_C( 10), -INT8_C( 31), INT8_C( 23), -INT8_C( 91), INT8_C( 86), INT8_C( 12), INT8_C( 67), -INT8_C( 85), INT8_C( 33), INT8_C( 51), INT8_C( 64), INT8_C( 22), INT8_C( 39), INT8_C( 24), INT8_C( 105), INT8_C( 24) }, { INT8_C( 62), INT8_C( 56), -INT8_C( 42), INT8_C( 26), INT8_C( 41), INT8_C( 77), -INT8_C( 29), -INT8_C( 23), -INT8_C( 75), INT8_C( 57), -INT8_C( 53), INT8_C( 2), -INT8_C( 85), INT8_C( 96), INT8_C( 108), -INT8_C( 95) }, { { -INT8_C( 10), INT8_C( 62), -INT8_C( 31), INT8_C( 56), INT8_C( 23), -INT8_C( 42), -INT8_C( 91), INT8_C( 26), INT8_C( 86), INT8_C( 41), INT8_C( 12), INT8_C( 77), INT8_C( 67), -INT8_C( 29), -INT8_C( 85), -INT8_C( 23) }, { INT8_C( 33), -INT8_C( 75), INT8_C( 51), INT8_C( 57), INT8_C( 64), -INT8_C( 53), INT8_C( 22), INT8_C( 2), INT8_C( 39), -INT8_C( 85), INT8_C( 24), INT8_C( 96), INT8_C( 105), INT8_C( 108), INT8_C( 24), -INT8_C( 95) }, }, }, { { INT8_C( 65), -INT8_C( 124), INT8_C( 70), -INT8_C( 104), -INT8_C( 112), -INT8_C( 119), INT8_C( 67), -INT8_C( 79), -INT8_C( 68), -INT8_C( 125), -INT8_C( 57), -INT8_C( 28), -INT8_C( 101), INT8_C( 48), -INT8_C( 4), -INT8_C( 39) }, { INT8_C( 104), -INT8_C( 45), -INT8_C( 13), -INT8_C( 110), INT8_C( 32), -INT8_C( 41), INT8_C( 123), -INT8_C( 43), INT8_C( 16), INT8_C( 70), -INT8_C( 41), -INT8_C( 69), -INT8_C( 90), INT8_C( 68), INT8_C( 92), -INT8_C( 24) }, { { INT8_C( 65), INT8_C( 104), -INT8_C( 124), -INT8_C( 45), INT8_C( 70), -INT8_C( 13), -INT8_C( 104), -INT8_C( 110), -INT8_C( 112), INT8_C( 32), -INT8_C( 119), -INT8_C( 41), INT8_C( 67), INT8_C( 123), -INT8_C( 79), -INT8_C( 43) }, { -INT8_C( 68), INT8_C( 16), -INT8_C( 125), INT8_C( 70), -INT8_C( 57), -INT8_C( 41), -INT8_C( 28), -INT8_C( 69), -INT8_C( 101), -INT8_C( 90), INT8_C( 48), INT8_C( 68), -INT8_C( 4), INT8_C( 92), -INT8_C( 39), -INT8_C( 24) }, }, }, { { -INT8_C( 56), -INT8_C( 93), INT8_MIN, INT8_C( 88), INT8_C( 44), -INT8_C( 61), INT8_C( 9), -INT8_C( 23), INT8_C( 70), -INT8_C( 48), -INT8_C( 51), -INT8_C( 30), INT8_C( 0), -INT8_C( 55), -INT8_C( 69), INT8_C( 104) }, { -INT8_C( 100), -INT8_C( 81), -INT8_C( 6), -INT8_C( 68), -INT8_C( 122), INT8_C( 117), -INT8_C( 110), -INT8_C( 106), -INT8_C( 69), INT8_C( 105), INT8_C( 81), INT8_C( 98), -INT8_C( 83), -INT8_C( 83), INT8_C( 74), INT8_C( 117) }, { { -INT8_C( 56), -INT8_C( 100), -INT8_C( 93), -INT8_C( 81), INT8_MIN, -INT8_C( 6), INT8_C( 88), -INT8_C( 68), INT8_C( 44), -INT8_C( 122), -INT8_C( 61), INT8_C( 117), INT8_C( 9), -INT8_C( 110), -INT8_C( 23), -INT8_C( 106) }, { INT8_C( 70), -INT8_C( 69), -INT8_C( 48), INT8_C( 105), -INT8_C( 51), INT8_C( 81), -INT8_C( 30), INT8_C( 98), INT8_C( 0), -INT8_C( 83), -INT8_C( 55), -INT8_C( 83), -INT8_C( 69), INT8_C( 74), INT8_C( 104), INT8_C( 117) }, }, }, { { INT8_C( 80), -INT8_C( 54), -INT8_C( 51), INT8_C( 125), -INT8_C( 115), -INT8_C( 42), INT8_C( 102), -INT8_C( 45), -INT8_C( 90), INT8_C( 51), -INT8_C( 75), -INT8_C( 90), -INT8_C( 4), INT8_C( 113), INT8_C( 15), -INT8_C( 103) }, { INT8_C( 32), INT8_C( 9), INT8_C( 85), -INT8_C( 90), INT8_MAX, -INT8_C( 25), INT8_C( 60), INT8_C( 58), INT8_C( 81), -INT8_C( 115), -INT8_C( 100), -INT8_C( 2), INT8_C( 58), -INT8_C( 26), INT8_C( 116), -INT8_C( 117) }, { { INT8_C( 80), INT8_C( 32), -INT8_C( 54), INT8_C( 9), -INT8_C( 51), INT8_C( 85), INT8_C( 125), -INT8_C( 90), -INT8_C( 115), INT8_MAX, -INT8_C( 42), -INT8_C( 25), INT8_C( 102), INT8_C( 60), -INT8_C( 45), INT8_C( 58) }, { -INT8_C( 90), INT8_C( 81), INT8_C( 51), -INT8_C( 115), -INT8_C( 75), -INT8_C( 100), -INT8_C( 90), -INT8_C( 2), -INT8_C( 4), INT8_C( 58), INT8_C( 113), -INT8_C( 26), INT8_C( 15), INT8_C( 116), -INT8_C( 103), -INT8_C( 117) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16x2_t r = simde_vzipq_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r.val[0], simde_vld1q_s8(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_i8x16(r.val[1], simde_vld1q_s8(test_vec[i].r[1])); } return 0; } static int test_simde_vzipq_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int16_t b[8]; int16_t r[2][8]; } test_vec[] = { { { INT16_C( 3008), -INT16_C( 17292), -INT16_C( 32024), -INT16_C( 11129), INT16_C( 23848), -INT16_C( 24759), INT16_C( 23386), INT16_C( 16016) }, { INT16_C( 20554), INT16_C( 14369), -INT16_C( 8685), -INT16_C( 28269), INT16_C( 23126), INT16_C( 29189), INT16_C( 8337), INT16_C( 21143) }, { { INT16_C( 3008), INT16_C( 20554), -INT16_C( 17292), INT16_C( 14369), -INT16_C( 32024), -INT16_C( 8685), -INT16_C( 11129), -INT16_C( 28269) }, { INT16_C( 23848), INT16_C( 23126), -INT16_C( 24759), INT16_C( 29189), INT16_C( 23386), INT16_C( 8337), INT16_C( 16016), INT16_C( 21143) }, }, }, { { INT16_C( 3116), INT16_C( 5134), -INT16_C( 26994), -INT16_C( 18456), INT16_C( 13043), INT16_C( 19798), -INT16_C( 6515), -INT16_C( 10100) }, { -INT16_C( 21194), INT16_C( 18704), -INT16_C( 23669), -INT16_C( 7718), -INT16_C( 8194), -INT16_C( 28844), -INT16_C( 5376), INT16_C( 11489) }, { { INT16_C( 3116), -INT16_C( 21194), INT16_C( 5134), INT16_C( 18704), -INT16_C( 26994), -INT16_C( 23669), -INT16_C( 18456), -INT16_C( 7718) }, { INT16_C( 13043), -INT16_C( 8194), INT16_C( 19798), -INT16_C( 28844), -INT16_C( 6515), -INT16_C( 5376), -INT16_C( 10100), INT16_C( 11489) }, }, }, { { -INT16_C( 3849), -INT16_C( 31168), INT16_C( 10630), INT16_C( 31037), -INT16_C( 27813), -INT16_C( 5946), INT16_C( 21113), -INT16_C( 20544) }, { -INT16_C( 11777), -INT16_C( 29703), -INT16_C( 11404), INT16_C( 29292), -INT16_C( 16205), -INT16_C( 19710), -INT16_C( 7252), -INT16_C( 23585) }, { { -INT16_C( 3849), -INT16_C( 11777), -INT16_C( 31168), -INT16_C( 29703), INT16_C( 10630), -INT16_C( 11404), INT16_C( 31037), INT16_C( 29292) }, { -INT16_C( 27813), -INT16_C( 16205), -INT16_C( 5946), -INT16_C( 19710), INT16_C( 21113), -INT16_C( 7252), -INT16_C( 20544), -INT16_C( 23585) }, }, }, { { INT16_C( 8147), INT16_C( 22825), INT16_C( 26184), -INT16_C( 23598), -INT16_C( 26119), INT16_C( 29324), INT16_C( 19691), -INT16_C( 5342) }, { INT16_C( 6941), -INT16_C( 28042), -INT16_C( 7442), -INT16_C( 24316), INT16_C( 1699), INT16_C( 20308), INT16_C( 13290), -INT16_C( 16910) }, { { INT16_C( 8147), INT16_C( 6941), INT16_C( 22825), -INT16_C( 28042), INT16_C( 26184), -INT16_C( 7442), -INT16_C( 23598), -INT16_C( 24316) }, { -INT16_C( 26119), INT16_C( 1699), INT16_C( 29324), INT16_C( 20308), INT16_C( 19691), INT16_C( 13290), -INT16_C( 5342), -INT16_C( 16910) }, }, }, { { INT16_C( 7251), -INT16_C( 25833), -INT16_C( 5758), INT16_C( 31807), -INT16_C( 13438), INT16_C( 28398), INT16_C( 4119), INT16_C( 13657) }, { -INT16_C( 12501), INT16_C( 6855), -INT16_C( 13391), INT16_C( 21691), INT16_C( 4306), -INT16_C( 17245), -INT16_C( 27069), -INT16_C( 27015) }, { { INT16_C( 7251), -INT16_C( 12501), -INT16_C( 25833), INT16_C( 6855), -INT16_C( 5758), -INT16_C( 13391), INT16_C( 31807), INT16_C( 21691) }, { -INT16_C( 13438), INT16_C( 4306), INT16_C( 28398), -INT16_C( 17245), INT16_C( 4119), -INT16_C( 27069), INT16_C( 13657), -INT16_C( 27015) }, }, }, { { -INT16_C( 28494), INT16_C( 13362), INT16_C( 29050), -INT16_C( 848), -INT16_C( 24772), INT16_C( 21354), -INT16_C( 15441), -INT16_C( 9336) }, { INT16_C( 20370), INT16_C( 17653), -INT16_C( 20453), -INT16_C( 4712), INT16_C( 15552), INT16_C( 1193), INT16_C( 8914), -INT16_C( 31590) }, { { -INT16_C( 28494), INT16_C( 20370), INT16_C( 13362), INT16_C( 17653), INT16_C( 29050), -INT16_C( 20453), -INT16_C( 848), -INT16_C( 4712) }, { -INT16_C( 24772), INT16_C( 15552), INT16_C( 21354), INT16_C( 1193), -INT16_C( 15441), INT16_C( 8914), -INT16_C( 9336), -INT16_C( 31590) }, }, }, { { -INT16_C( 13133), INT16_C( 11704), INT16_C( 26941), INT16_C( 31017), -INT16_C( 27640), -INT16_C( 18483), INT16_C( 21847), -INT16_C( 5486) }, { -INT16_C( 30811), -INT16_C( 16338), -INT16_C( 14792), -INT16_C( 1875), INT16_C( 22018), -INT16_C( 11012), -INT16_C( 26760), INT16_C( 11096) }, { { -INT16_C( 13133), -INT16_C( 30811), INT16_C( 11704), -INT16_C( 16338), INT16_C( 26941), -INT16_C( 14792), INT16_C( 31017), -INT16_C( 1875) }, { -INT16_C( 27640), INT16_C( 22018), -INT16_C( 18483), -INT16_C( 11012), INT16_C( 21847), -INT16_C( 26760), -INT16_C( 5486), INT16_C( 11096) }, }, }, { { INT16_C( 4451), -INT16_C( 24232), -INT16_C( 32134), -INT16_C( 32230), -INT16_C( 6378), INT16_C( 27961), -INT16_C( 13251), -INT16_C( 7593) }, { -INT16_C( 31405), -INT16_C( 29790), INT16_C( 20300), INT16_C( 20100), -INT16_C( 32603), INT16_C( 7459), INT16_C( 31511), INT16_C( 31561) }, { { INT16_C( 4451), -INT16_C( 31405), -INT16_C( 24232), -INT16_C( 29790), -INT16_C( 32134), INT16_C( 20300), -INT16_C( 32230), INT16_C( 20100) }, { -INT16_C( 6378), -INT16_C( 32603), INT16_C( 27961), INT16_C( 7459), -INT16_C( 13251), INT16_C( 31511), -INT16_C( 7593), INT16_C( 31561) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8x2_t r = simde_vzipq_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r.val[0], simde_vld1q_s16(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_i16x8(r.val[1], simde_vld1q_s16(test_vec[i].r[1])); } return 0; } static int test_simde_vzipq_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t b[4]; int32_t r[2][4]; } test_vec[] = { { { -INT32_C( 99917994), -INT32_C( 280558692), INT32_C( 308406472), -INT32_C( 1858489826) }, { INT32_C( 758961887), -INT32_C( 1062247511), -INT32_C( 830256915), INT32_C( 425864131) }, { { -INT32_C( 99917994), INT32_C( 758961887), -INT32_C( 280558692), -INT32_C( 1062247511) }, { INT32_C( 308406472), -INT32_C( 830256915), -INT32_C( 1858489826), INT32_C( 425864131) }, }, }, { { INT32_C( 638807690), INT32_C( 957700977), INT32_C( 1632400963), INT32_C( 133334312) }, { INT32_C( 87371355), INT32_C( 2126898321), -INT32_C( 280213460), -INT32_C( 49696909) }, { { INT32_C( 638807690), INT32_C( 87371355), INT32_C( 957700977), INT32_C( 2126898321) }, { INT32_C( 1632400963), -INT32_C( 280213460), INT32_C( 133334312), -INT32_C( 49696909) }, }, }, { { -INT32_C( 1910236131), -INT32_C( 1161348745), -INT32_C( 669314128), -INT32_C( 186642791) }, { -INT32_C( 822536900), INT32_C( 642563833), INT32_C( 2048235783), INT32_C( 1702370888) }, { { -INT32_C( 1910236131), -INT32_C( 822536900), -INT32_C( 1161348745), INT32_C( 642563833) }, { -INT32_C( 669314128), INT32_C( 2048235783), -INT32_C( 186642791), INT32_C( 1702370888) }, }, }, { { -INT32_C( 1292657605), -INT32_C( 2056406315), INT32_C( 1734248654), -INT32_C( 748994922) }, { INT32_C( 1285641555), INT32_C( 443739411), -INT32_C( 829060986), -INT32_C( 516747866) }, { { -INT32_C( 1292657605), INT32_C( 1285641555), -INT32_C( 2056406315), INT32_C( 443739411) }, { INT32_C( 1734248654), -INT32_C( 829060986), -INT32_C( 748994922), -INT32_C( 516747866) }, }, }, { { INT32_C( 2123638441), -INT32_C( 1358691871), INT32_C( 538337929), -INT32_C( 202149472) }, { -INT32_C( 633367354), INT32_C( 150254209), -INT32_C( 522811078), INT32_C( 1069681302) }, { { INT32_C( 2123638441), -INT32_C( 633367354), -INT32_C( 1358691871), INT32_C( 150254209) }, { INT32_C( 538337929), -INT32_C( 522811078), -INT32_C( 202149472), INT32_C( 1069681302) }, }, }, { { INT32_C( 297686576), -INT32_C( 524238249), -INT32_C( 1006578140), INT32_C( 246936392) }, { INT32_C( 166262407), -INT32_C( 502145624), -INT32_C( 37492890), INT32_C( 574391793) }, { { INT32_C( 297686576), INT32_C( 166262407), -INT32_C( 524238249), -INT32_C( 502145624) }, { -INT32_C( 1006578140), -INT32_C( 37492890), INT32_C( 246936392), INT32_C( 574391793) }, }, }, { { INT32_C( 842267355), -INT32_C( 535628612), INT32_C( 312742858), -INT32_C( 1910416634) }, { -INT32_C( 90764974), INT32_C( 1306372326), -INT32_C( 2125815665), INT32_C( 10716709) }, { { INT32_C( 842267355), -INT32_C( 90764974), -INT32_C( 535628612), INT32_C( 1306372326) }, { INT32_C( 312742858), -INT32_C( 2125815665), -INT32_C( 1910416634), INT32_C( 10716709) }, }, }, { { INT32_C( 1026741889), -INT32_C( 1793178422), INT32_C( 1588052567), INT32_C( 1894565918) }, { -INT32_C( 1200978990), -INT32_C( 1174059221), INT32_C( 205213671), INT32_C( 1460461270) }, { { INT32_C( 1026741889), -INT32_C( 1200978990), -INT32_C( 1793178422), -INT32_C( 1174059221) }, { INT32_C( 1588052567), INT32_C( 205213671), INT32_C( 1894565918), INT32_C( 1460461270) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4x2_t r = simde_vzipq_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r.val[0], simde_vld1q_s32(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_i32x4(r.val[1], simde_vld1q_s32(test_vec[i].r[1])); } return 0; } static int test_simde_vzipq_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[2][16]; } test_vec[] = { { { UINT8_C(183), UINT8_C(104), UINT8_C(229), UINT8_C(148), UINT8_C(145), UINT8_C( 93), UINT8_C(206), UINT8_C( 25), UINT8_C(179), UINT8_C(235), UINT8_C(112), UINT8_C( 10), UINT8_C( 54), UINT8_C(251), UINT8_C(230), UINT8_C(234) }, { UINT8_C(252), UINT8_C( 45), UINT8_C( 96), UINT8_C(130), UINT8_C(187), UINT8_C( 4), UINT8_C(186), UINT8_C( 78), UINT8_C( 95), UINT8_C( 69), UINT8_C(242), UINT8_C(216), UINT8_C(128), UINT8_C(201), UINT8_C(251), UINT8_C( 55) }, { { UINT8_C(183), UINT8_C(252), UINT8_C(104), UINT8_C( 45), UINT8_C(229), UINT8_C( 96), UINT8_C(148), UINT8_C(130), UINT8_C(145), UINT8_C(187), UINT8_C( 93), UINT8_C( 4), UINT8_C(206), UINT8_C(186), UINT8_C( 25), UINT8_C( 78) }, { UINT8_C(179), UINT8_C( 95), UINT8_C(235), UINT8_C( 69), UINT8_C(112), UINT8_C(242), UINT8_C( 10), UINT8_C(216), UINT8_C( 54), UINT8_C(128), UINT8_C(251), UINT8_C(201), UINT8_C(230), UINT8_C(251), UINT8_C(234), UINT8_C( 55) }, }, }, { { UINT8_C( 49), UINT8_C(224), UINT8_C(203), UINT8_C(195), UINT8_C( 61), UINT8_C(153), UINT8_C(220), UINT8_C(240), UINT8_C(132), UINT8_C( 76), UINT8_C(251), UINT8_C(187), UINT8_C( 71), UINT8_C(225), UINT8_C(165), UINT8_C( 68) }, { UINT8_C( 15), UINT8_C( 5), UINT8_C(198), UINT8_C(202), UINT8_C( 9), UINT8_C(128), UINT8_C( 24), UINT8_C(104), UINT8_C(198), UINT8_C( 10), UINT8_C( 64), UINT8_C( 70), UINT8_C(211), UINT8_C( 59), UINT8_C(125), UINT8_C( 5) }, { { UINT8_C( 49), UINT8_C( 15), UINT8_C(224), UINT8_C( 5), UINT8_C(203), UINT8_C(198), UINT8_C(195), UINT8_C(202), UINT8_C( 61), UINT8_C( 9), UINT8_C(153), UINT8_C(128), UINT8_C(220), UINT8_C( 24), UINT8_C(240), UINT8_C(104) }, { UINT8_C(132), UINT8_C(198), UINT8_C( 76), UINT8_C( 10), UINT8_C(251), UINT8_C( 64), UINT8_C(187), UINT8_C( 70), UINT8_C( 71), UINT8_C(211), UINT8_C(225), UINT8_C( 59), UINT8_C(165), UINT8_C(125), UINT8_C( 68), UINT8_C( 5) }, }, }, { { UINT8_C( 27), UINT8_C( 72), UINT8_C(200), UINT8_C( 88), UINT8_C(225), UINT8_C(164), UINT8_C( 73), UINT8_C(102), UINT8_C(240), UINT8_C( 68), UINT8_C( 33), UINT8_C( 55), UINT8_C( 37), UINT8_C(198), UINT8_C(123), UINT8_C( 52) }, { UINT8_C(203), UINT8_C( 66), UINT8_MAX, UINT8_C(213), UINT8_C(194), UINT8_C( 23), UINT8_C( 61), UINT8_C(136), UINT8_C( 34), UINT8_C(126), UINT8_C(206), UINT8_C(245), UINT8_C(185), UINT8_C( 75), UINT8_C(250), UINT8_C(213) }, { { UINT8_C( 27), UINT8_C(203), UINT8_C( 72), UINT8_C( 66), UINT8_C(200), UINT8_MAX, UINT8_C( 88), UINT8_C(213), UINT8_C(225), UINT8_C(194), UINT8_C(164), UINT8_C( 23), UINT8_C( 73), UINT8_C( 61), UINT8_C(102), UINT8_C(136) }, { UINT8_C(240), UINT8_C( 34), UINT8_C( 68), UINT8_C(126), UINT8_C( 33), UINT8_C(206), UINT8_C( 55), UINT8_C(245), UINT8_C( 37), UINT8_C(185), UINT8_C(198), UINT8_C( 75), UINT8_C(123), UINT8_C(250), UINT8_C( 52), UINT8_C(213) }, }, }, { { UINT8_C(147), UINT8_C(194), UINT8_C( 45), UINT8_C(117), UINT8_C(102), UINT8_C(118), UINT8_C(219), UINT8_C( 86), UINT8_C(186), UINT8_C(252), UINT8_C(142), UINT8_C(224), UINT8_C(194), UINT8_C( 9), UINT8_C( 20), UINT8_C(141) }, { UINT8_C( 75), UINT8_C( 19), UINT8_C( 98), UINT8_C( 14), UINT8_C( 43), UINT8_C(160), UINT8_C(150), UINT8_C( 77), UINT8_C( 30), UINT8_C(101), UINT8_C( 66), UINT8_C(215), UINT8_C(176), UINT8_C( 61), UINT8_C(172), UINT8_C( 68) }, { { UINT8_C(147), UINT8_C( 75), UINT8_C(194), UINT8_C( 19), UINT8_C( 45), UINT8_C( 98), UINT8_C(117), UINT8_C( 14), UINT8_C(102), UINT8_C( 43), UINT8_C(118), UINT8_C(160), UINT8_C(219), UINT8_C(150), UINT8_C( 86), UINT8_C( 77) }, { UINT8_C(186), UINT8_C( 30), UINT8_C(252), UINT8_C(101), UINT8_C(142), UINT8_C( 66), UINT8_C(224), UINT8_C(215), UINT8_C(194), UINT8_C(176), UINT8_C( 9), UINT8_C( 61), UINT8_C( 20), UINT8_C(172), UINT8_C(141), UINT8_C( 68) }, }, }, { { UINT8_MAX, UINT8_C(218), UINT8_C(185), UINT8_C(102), UINT8_C( 80), UINT8_C(148), UINT8_C(188), UINT8_C( 11), UINT8_C(144), UINT8_C( 74), UINT8_C(235), UINT8_C( 82), UINT8_C( 84), UINT8_MAX, UINT8_C(223), UINT8_C(159) }, { UINT8_C( 19), UINT8_C( 66), UINT8_C(173), UINT8_C( 62), UINT8_C(226), UINT8_C( 68), UINT8_C(139), UINT8_C( 0), UINT8_C(169), UINT8_C(205), UINT8_C(215), UINT8_C( 89), UINT8_C( 10), UINT8_C(132), UINT8_C(157), UINT8_C( 10) }, { { UINT8_MAX, UINT8_C( 19), UINT8_C(218), UINT8_C( 66), UINT8_C(185), UINT8_C(173), UINT8_C(102), UINT8_C( 62), UINT8_C( 80), UINT8_C(226), UINT8_C(148), UINT8_C( 68), UINT8_C(188), UINT8_C(139), UINT8_C( 11), UINT8_C( 0) }, { UINT8_C(144), UINT8_C(169), UINT8_C( 74), UINT8_C(205), UINT8_C(235), UINT8_C(215), UINT8_C( 82), UINT8_C( 89), UINT8_C( 84), UINT8_C( 10), UINT8_MAX, UINT8_C(132), UINT8_C(223), UINT8_C(157), UINT8_C(159), UINT8_C( 10) }, }, }, { { UINT8_C( 94), UINT8_C( 86), UINT8_C(112), UINT8_C(174), UINT8_C(234), UINT8_C( 44), UINT8_C(185), UINT8_C(122), UINT8_C(119), UINT8_C(164), UINT8_C(204), UINT8_C(203), UINT8_C(164), UINT8_C(172), UINT8_C(106), UINT8_C(183) }, { UINT8_C(238), UINT8_C( 24), UINT8_C(245), UINT8_C(208), UINT8_C( 92), UINT8_C(128), UINT8_C(208), UINT8_C( 5), UINT8_C( 77), UINT8_C(167), UINT8_C( 94), UINT8_C( 88), UINT8_C( 43), UINT8_C(252), UINT8_C( 98), UINT8_C(137) }, { { UINT8_C( 94), UINT8_C(238), UINT8_C( 86), UINT8_C( 24), UINT8_C(112), UINT8_C(245), UINT8_C(174), UINT8_C(208), UINT8_C(234), UINT8_C( 92), UINT8_C( 44), UINT8_C(128), UINT8_C(185), UINT8_C(208), UINT8_C(122), UINT8_C( 5) }, { UINT8_C(119), UINT8_C( 77), UINT8_C(164), UINT8_C(167), UINT8_C(204), UINT8_C( 94), UINT8_C(203), UINT8_C( 88), UINT8_C(164), UINT8_C( 43), UINT8_C(172), UINT8_C(252), UINT8_C(106), UINT8_C( 98), UINT8_C(183), UINT8_C(137) }, }, }, { { UINT8_C( 82), UINT8_C(210), UINT8_C( 56), UINT8_C( 61), UINT8_C(254), UINT8_C(241), UINT8_C(183), UINT8_C(117), UINT8_C(150), UINT8_C(132), UINT8_C( 64), UINT8_C( 58), UINT8_C( 48), UINT8_C(171), UINT8_C(241), UINT8_C( 30) }, { UINT8_C(195), UINT8_C(230), UINT8_C(238), UINT8_C( 31), UINT8_C(102), UINT8_C(190), UINT8_C( 36), UINT8_C(179), UINT8_C(101), UINT8_C(130), UINT8_C( 11), UINT8_C(145), UINT8_C(126), UINT8_C(109), UINT8_C( 26), UINT8_C(209) }, { { UINT8_C( 82), UINT8_C(195), UINT8_C(210), UINT8_C(230), UINT8_C( 56), UINT8_C(238), UINT8_C( 61), UINT8_C( 31), UINT8_C(254), UINT8_C(102), UINT8_C(241), UINT8_C(190), UINT8_C(183), UINT8_C( 36), UINT8_C(117), UINT8_C(179) }, { UINT8_C(150), UINT8_C(101), UINT8_C(132), UINT8_C(130), UINT8_C( 64), UINT8_C( 11), UINT8_C( 58), UINT8_C(145), UINT8_C( 48), UINT8_C(126), UINT8_C(171), UINT8_C(109), UINT8_C(241), UINT8_C( 26), UINT8_C( 30), UINT8_C(209) }, }, }, { { UINT8_C( 63), UINT8_C( 82), UINT8_C( 14), UINT8_C( 62), UINT8_C( 68), UINT8_C(197), UINT8_C(179), UINT8_C(218), UINT8_C( 73), UINT8_C(244), UINT8_C( 20), UINT8_C(121), UINT8_C(159), UINT8_C( 5), UINT8_C(151), UINT8_C( 98) }, { UINT8_C(235), UINT8_C(133), UINT8_C(129), UINT8_C( 81), UINT8_C( 67), UINT8_C(165), UINT8_C( 4), UINT8_C(169), UINT8_C( 39), UINT8_C( 16), UINT8_C( 58), UINT8_C(166), UINT8_C(125), UINT8_C( 84), UINT8_C(119), UINT8_C(189) }, { { UINT8_C( 63), UINT8_C(235), UINT8_C( 82), UINT8_C(133), UINT8_C( 14), UINT8_C(129), UINT8_C( 62), UINT8_C( 81), UINT8_C( 68), UINT8_C( 67), UINT8_C(197), UINT8_C(165), UINT8_C(179), UINT8_C( 4), UINT8_C(218), UINT8_C(169) }, { UINT8_C( 73), UINT8_C( 39), UINT8_C(244), UINT8_C( 16), UINT8_C( 20), UINT8_C( 58), UINT8_C(121), UINT8_C(166), UINT8_C(159), UINT8_C(125), UINT8_C( 5), UINT8_C( 84), UINT8_C(151), UINT8_C(119), UINT8_C( 98), UINT8_C(189) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16x2_t r = simde_vzipq_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r.val[0], simde_vld1q_u8(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_u8x16(r.val[1], simde_vld1q_u8(test_vec[i].r[1])); } return 0; } static int test_simde_vzipq_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[2][8]; } test_vec[] = { { { UINT16_C(36776), UINT16_C(50991), UINT16_C(55785), UINT16_C( 4670), UINT16_C(53567), UINT16_C(64246), UINT16_C(18557), UINT16_C(28447) }, { UINT16_C(38812), UINT16_C(39983), UINT16_C(53745), UINT16_C(29110), UINT16_C(47188), UINT16_C(39243), UINT16_C(50912), UINT16_C(34824) }, { { UINT16_C(36776), UINT16_C(38812), UINT16_C(50991), UINT16_C(39983), UINT16_C(55785), UINT16_C(53745), UINT16_C( 4670), UINT16_C(29110) }, { UINT16_C(53567), UINT16_C(47188), UINT16_C(64246), UINT16_C(39243), UINT16_C(18557), UINT16_C(50912), UINT16_C(28447), UINT16_C(34824) }, }, }, { { UINT16_C(14166), UINT16_C(16207), UINT16_C(36113), UINT16_C(20561), UINT16_C(18271), UINT16_C(56394), UINT16_C(27280), UINT16_C(11339) }, { UINT16_C(31233), UINT16_C(62408), UINT16_C(32331), UINT16_C(41060), UINT16_C(44854), UINT16_C( 5945), UINT16_C(16758), UINT16_C(52383) }, { { UINT16_C(14166), UINT16_C(31233), UINT16_C(16207), UINT16_C(62408), UINT16_C(36113), UINT16_C(32331), UINT16_C(20561), UINT16_C(41060) }, { UINT16_C(18271), UINT16_C(44854), UINT16_C(56394), UINT16_C( 5945), UINT16_C(27280), UINT16_C(16758), UINT16_C(11339), UINT16_C(52383) }, }, }, { { UINT16_C(61304), UINT16_C(35083), UINT16_C(23932), UINT16_C(56282), UINT16_C( 9380), UINT16_C(13496), UINT16_C( 910), UINT16_C(36961) }, { UINT16_C(10622), UINT16_C(51587), UINT16_C(59304), UINT16_C(56937), UINT16_C(41623), UINT16_C( 3573), UINT16_C(38371), UINT16_C(23769) }, { { UINT16_C(61304), UINT16_C(10622), UINT16_C(35083), UINT16_C(51587), UINT16_C(23932), UINT16_C(59304), UINT16_C(56282), UINT16_C(56937) }, { UINT16_C( 9380), UINT16_C(41623), UINT16_C(13496), UINT16_C( 3573), UINT16_C( 910), UINT16_C(38371), UINT16_C(36961), UINT16_C(23769) }, }, }, { { UINT16_C(58500), UINT16_C( 229), UINT16_C(48961), UINT16_C(59100), UINT16_C(38116), UINT16_C(29210), UINT16_C(31639), UINT16_C( 5378) }, { UINT16_C(34213), UINT16_C(19935), UINT16_C(18541), UINT16_C( 1067), UINT16_C( 8683), UINT16_C(52753), UINT16_C(60086), UINT16_C(14890) }, { { UINT16_C(58500), UINT16_C(34213), UINT16_C( 229), UINT16_C(19935), UINT16_C(48961), UINT16_C(18541), UINT16_C(59100), UINT16_C( 1067) }, { UINT16_C(38116), UINT16_C( 8683), UINT16_C(29210), UINT16_C(52753), UINT16_C(31639), UINT16_C(60086), UINT16_C( 5378), UINT16_C(14890) }, }, }, { { UINT16_C( 4302), UINT16_C( 4154), UINT16_C( 5839), UINT16_C(46070), UINT16_C( 4266), UINT16_C(16934), UINT16_C(10380), UINT16_C(12631) }, { UINT16_C(13998), UINT16_C( 7038), UINT16_C(43391), UINT16_C(27167), UINT16_C(12490), UINT16_C(32824), UINT16_C(25370), UINT16_C(59578) }, { { UINT16_C( 4302), UINT16_C(13998), UINT16_C( 4154), UINT16_C( 7038), UINT16_C( 5839), UINT16_C(43391), UINT16_C(46070), UINT16_C(27167) }, { UINT16_C( 4266), UINT16_C(12490), UINT16_C(16934), UINT16_C(32824), UINT16_C(10380), UINT16_C(25370), UINT16_C(12631), UINT16_C(59578) }, }, }, { { UINT16_C(62835), UINT16_C(17144), UINT16_C(60939), UINT16_C(46838), UINT16_C( 7423), UINT16_C(35832), UINT16_C(20292), UINT16_C(62140) }, { UINT16_C(14982), UINT16_C( 1293), UINT16_C(11491), UINT16_C(44655), UINT16_C(42844), UINT16_C(30254), UINT16_C(59658), UINT16_C(32095) }, { { UINT16_C(62835), UINT16_C(14982), UINT16_C(17144), UINT16_C( 1293), UINT16_C(60939), UINT16_C(11491), UINT16_C(46838), UINT16_C(44655) }, { UINT16_C( 7423), UINT16_C(42844), UINT16_C(35832), UINT16_C(30254), UINT16_C(20292), UINT16_C(59658), UINT16_C(62140), UINT16_C(32095) }, }, }, { { UINT16_C(22494), UINT16_C(59840), UINT16_C(46662), UINT16_C(17823), UINT16_C(38866), UINT16_C( 5840), UINT16_C(36071), UINT16_C(27913) }, { UINT16_C( 5830), UINT16_C(43378), UINT16_C(57667), UINT16_C(40791), UINT16_C(34440), UINT16_C(37654), UINT16_C(30063), UINT16_C(19728) }, { { UINT16_C(22494), UINT16_C( 5830), UINT16_C(59840), UINT16_C(43378), UINT16_C(46662), UINT16_C(57667), UINT16_C(17823), UINT16_C(40791) }, { UINT16_C(38866), UINT16_C(34440), UINT16_C( 5840), UINT16_C(37654), UINT16_C(36071), UINT16_C(30063), UINT16_C(27913), UINT16_C(19728) }, }, }, { { UINT16_C(53452), UINT16_C( 4662), UINT16_C(54918), UINT16_C(22615), UINT16_C(10093), UINT16_C(21615), UINT16_C(30899), UINT16_C(31169) }, { UINT16_C(13198), UINT16_C(53539), UINT16_C(31252), UINT16_C(40305), UINT16_C(34560), UINT16_C(28464), UINT16_C(16636), UINT16_C(51388) }, { { UINT16_C(53452), UINT16_C(13198), UINT16_C( 4662), UINT16_C(53539), UINT16_C(54918), UINT16_C(31252), UINT16_C(22615), UINT16_C(40305) }, { UINT16_C(10093), UINT16_C(34560), UINT16_C(21615), UINT16_C(28464), UINT16_C(30899), UINT16_C(16636), UINT16_C(31169), UINT16_C(51388) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8x2_t r = simde_vzipq_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r.val[0], simde_vld1q_u16(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_u16x8(r.val[1], simde_vld1q_u16(test_vec[i].r[1])); } return 0; } static int test_simde_vzipq_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[2][4]; } test_vec[] = { { { UINT32_C(4087665949), UINT32_C(3816891322), UINT32_C(3003239570), UINT32_C(1218963958) }, { UINT32_C(3855336699), UINT32_C(2367470621), UINT32_C(2238585450), UINT32_C(2997669013) }, { { UINT32_C(4087665949), UINT32_C(3855336699), UINT32_C(3816891322), UINT32_C(2367470621) }, { UINT32_C(3003239570), UINT32_C(2238585450), UINT32_C(1218963958), UINT32_C(2997669013) }, }, }, { { UINT32_C(1554403489), UINT32_C( 104802163), UINT32_C(3921232115), UINT32_C( 607215657) }, { UINT32_C(1091239204), UINT32_C( 617555641), UINT32_C(3601415489), UINT32_C(2877904137) }, { { UINT32_C(1554403489), UINT32_C(1091239204), UINT32_C( 104802163), UINT32_C( 617555641) }, { UINT32_C(3921232115), UINT32_C(3601415489), UINT32_C( 607215657), UINT32_C(2877904137) }, }, }, { { UINT32_C( 419901350), UINT32_C(1243563606), UINT32_C(2939410566), UINT32_C(1574135096) }, { UINT32_C( 463396194), UINT32_C(1161784580), UINT32_C(3021728171), UINT32_C(3831473214) }, { { UINT32_C( 419901350), UINT32_C( 463396194), UINT32_C(1243563606), UINT32_C(1161784580) }, { UINT32_C(2939410566), UINT32_C(3021728171), UINT32_C(1574135096), UINT32_C(3831473214) }, }, }, { { UINT32_C( 721315539), UINT32_C( 846470572), UINT32_C( 786540534), UINT32_C(1854649612) }, { UINT32_C(2525637266), UINT32_C(1121700247), UINT32_C(4059559858), UINT32_C(1876252315) }, { { UINT32_C( 721315539), UINT32_C(2525637266), UINT32_C( 846470572), UINT32_C(1121700247) }, { UINT32_C( 786540534), UINT32_C(4059559858), UINT32_C(1854649612), UINT32_C(1876252315) }, }, }, { { UINT32_C(1771688893), UINT32_C(3885764081), UINT32_C(3239411124), UINT32_C(3308233010) }, { UINT32_C(1650178507), UINT32_C( 900020099), UINT32_C(3374750766), UINT32_C(2939747570) }, { { UINT32_C(1771688893), UINT32_C(1650178507), UINT32_C(3885764081), UINT32_C( 900020099) }, { UINT32_C(3239411124), UINT32_C(3374750766), UINT32_C(3308233010), UINT32_C(2939747570) }, }, }, { { UINT32_C(3222917583), UINT32_C(2477241822), UINT32_C(1700052274), UINT32_C( 690651998) }, { UINT32_C(3230369085), UINT32_C(3941937340), UINT32_C(3216252108), UINT32_C(3882806296) }, { { UINT32_C(3222917583), UINT32_C(3230369085), UINT32_C(2477241822), UINT32_C(3941937340) }, { UINT32_C(1700052274), UINT32_C(3216252108), UINT32_C( 690651998), UINT32_C(3882806296) }, }, }, { { UINT32_C(2628290494), UINT32_C(1865371452), UINT32_C(1792312076), UINT32_C(1150549511) }, { UINT32_C(1074012035), UINT32_C( 472578383), UINT32_C( 769383957), UINT32_C(2299873739) }, { { UINT32_C(2628290494), UINT32_C(1074012035), UINT32_C(1865371452), UINT32_C( 472578383) }, { UINT32_C(1792312076), UINT32_C( 769383957), UINT32_C(1150549511), UINT32_C(2299873739) }, }, }, { { UINT32_C( 220577233), UINT32_C( 427578636), UINT32_C(3749925080), UINT32_C(3525515086) }, { UINT32_C(2232559414), UINT32_C( 916536353), UINT32_C(3865345051), UINT32_C(2540665286) }, { { UINT32_C( 220577233), UINT32_C(2232559414), UINT32_C( 427578636), UINT32_C( 916536353) }, { UINT32_C(3749925080), UINT32_C(3865345051), UINT32_C(3525515086), UINT32_C(2540665286) }, }, }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4x2_t r = simde_vzipq_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r.val[0], simde_vld1q_u32(test_vec[i].r[0])); simde_test_arm_neon_assert_equal_u32x4(r.val[1], simde_vld1q_u32(test_vec[i].r[1])); } return 0; } #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_TEST_FUNC_LIST_BEGIN #if !defined(SIMDE_BUG_INTEL_857088) SIMDE_TEST_FUNC_LIST_ENTRY(vzip_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vzip_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vzip_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vzip_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vzip_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vzip_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vzip_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vzipq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vzipq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vzipq_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vzipq_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vzipq_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vzipq_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vzipq_u32) #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/arm/neon/zip1.c000066400000000000000000001505311400333146700163530ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN zip1 #include #include static int test_simde_vzip1_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[2]; simde_float32 b[2]; simde_float32 r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( -594.82), SIMDE_FLOAT32_C( -36.31) }, { SIMDE_FLOAT32_C( -44.16), SIMDE_FLOAT32_C( -416.70) }, { SIMDE_FLOAT32_C( -594.82), SIMDE_FLOAT32_C( -44.16) } }, { { SIMDE_FLOAT32_C( -546.79), SIMDE_FLOAT32_C( 545.60) }, { SIMDE_FLOAT32_C( -407.46), SIMDE_FLOAT32_C( 803.36) }, { SIMDE_FLOAT32_C( -546.79), SIMDE_FLOAT32_C( -407.46) } }, { { SIMDE_FLOAT32_C( -396.25), SIMDE_FLOAT32_C( -289.22) }, { SIMDE_FLOAT32_C( 158.83), SIMDE_FLOAT32_C( 797.31) }, { SIMDE_FLOAT32_C( -396.25), SIMDE_FLOAT32_C( 158.83) } }, { { SIMDE_FLOAT32_C( 984.23), SIMDE_FLOAT32_C( -68.91) }, { SIMDE_FLOAT32_C( 830.88), SIMDE_FLOAT32_C( -995.28) }, { SIMDE_FLOAT32_C( 984.23), SIMDE_FLOAT32_C( 830.88) } }, { { SIMDE_FLOAT32_C( -188.76), SIMDE_FLOAT32_C( 262.91) }, { SIMDE_FLOAT32_C( 778.87), SIMDE_FLOAT32_C( -450.18) }, { SIMDE_FLOAT32_C( -188.76), SIMDE_FLOAT32_C( 778.87) } }, { { SIMDE_FLOAT32_C( 915.01), SIMDE_FLOAT32_C( 593.75) }, { SIMDE_FLOAT32_C( 831.12), SIMDE_FLOAT32_C( -136.74) }, { SIMDE_FLOAT32_C( 915.01), SIMDE_FLOAT32_C( 831.12) } }, { { SIMDE_FLOAT32_C( 353.93), SIMDE_FLOAT32_C( 120.70) }, { SIMDE_FLOAT32_C( -711.16), SIMDE_FLOAT32_C( -974.32) }, { SIMDE_FLOAT32_C( 353.93), SIMDE_FLOAT32_C( -711.16) } }, { { SIMDE_FLOAT32_C( -502.47), SIMDE_FLOAT32_C( 570.22) }, { SIMDE_FLOAT32_C( 392.65), SIMDE_FLOAT32_C( -97.29) }, { SIMDE_FLOAT32_C( -502.47), SIMDE_FLOAT32_C( 392.65) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x2_t r = simde_vzip1_f32(a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vzip1_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int8_t b[8]; int8_t r[8]; } test_vec[] = { { { INT8_C( 75), INT8_C( 64), INT8_C( 23), -INT8_C( 70), -INT8_C( 20), -INT8_C( 62), -INT8_C( 104), INT8_C( 0) }, { INT8_C( 77), -INT8_C( 69), -INT8_C( 118), INT8_C( 40), -INT8_C( 21), INT8_C( 81), INT8_C( 79), -INT8_C( 43) }, { INT8_C( 75), INT8_C( 77), INT8_C( 64), -INT8_C( 69), INT8_C( 23), -INT8_C( 118), -INT8_C( 70), INT8_C( 40) } }, { { -INT8_C( 10), INT8_C( 47), -INT8_C( 91), -INT8_C( 90), INT8_C( 57), -INT8_C( 23), -INT8_C( 71), -INT8_C( 7) }, { -INT8_C( 83), INT8_C( 112), INT8_C( 83), -INT8_C( 15), INT8_C( 26), -INT8_C( 11), -INT8_C( 125), INT8_C( 101) }, { -INT8_C( 10), -INT8_C( 83), INT8_C( 47), INT8_C( 112), -INT8_C( 91), INT8_C( 83), -INT8_C( 90), -INT8_C( 15) } }, { { INT8_C( 53), -INT8_C( 102), INT8_C( 32), INT8_C( 33), INT8_C( 93), -INT8_C( 72), INT8_C( 33), -INT8_C( 86) }, { INT8_C( 115), -INT8_C( 84), -INT8_C( 46), INT8_C( 95), -INT8_C( 3), INT8_C( 33), INT8_C( 52), -INT8_C( 13) }, { INT8_C( 53), INT8_C( 115), -INT8_C( 102), -INT8_C( 84), INT8_C( 32), -INT8_C( 46), INT8_C( 33), INT8_C( 95) } }, { { INT8_C( 81), -INT8_C( 39), -INT8_C( 103), -INT8_C( 118), -INT8_C( 62), INT8_C( 82), -INT8_C( 125), INT8_C( 111) }, { -INT8_C( 61), -INT8_C( 42), INT8_C( 97), -INT8_C( 35), -INT8_C( 53), -INT8_C( 28), INT8_C( 67), INT8_C( 1) }, { INT8_C( 81), -INT8_C( 61), -INT8_C( 39), -INT8_C( 42), -INT8_C( 103), INT8_C( 97), -INT8_C( 118), -INT8_C( 35) } }, { { INT8_MAX, INT8_C( 99), INT8_C( 34), -INT8_C( 36), INT8_C( 27), INT8_C( 68), -INT8_C( 122), -INT8_C( 114) }, { -INT8_C( 16), INT8_C( 88), -INT8_C( 19), -INT8_C( 19), INT8_C( 122), INT8_C( 33), -INT8_C( 31), -INT8_C( 53) }, { INT8_MAX, -INT8_C( 16), INT8_C( 99), INT8_C( 88), INT8_C( 34), -INT8_C( 19), -INT8_C( 36), -INT8_C( 19) } }, { { -INT8_C( 5), INT8_C( 122), INT8_C( 85), -INT8_C( 67), -INT8_C( 51), -INT8_C( 40), INT8_C( 45), -INT8_C( 112) }, { -INT8_C( 82), -INT8_C( 114), INT8_C( 109), INT8_C( 121), INT8_C( 114), -INT8_C( 80), INT8_C( 122), -INT8_C( 15) }, { -INT8_C( 5), -INT8_C( 82), INT8_C( 122), -INT8_C( 114), INT8_C( 85), INT8_C( 109), -INT8_C( 67), INT8_C( 121) } }, { { INT8_C( 19), -INT8_C( 99), -INT8_C( 51), INT8_C( 46), -INT8_C( 31), INT8_C( 83), -INT8_C( 67), -INT8_C( 47) }, { -INT8_C( 84), -INT8_C( 86), -INT8_C( 66), INT8_C( 38), -INT8_C( 52), -INT8_C( 97), -INT8_C( 15), -INT8_C( 57) }, { INT8_C( 19), -INT8_C( 84), -INT8_C( 99), -INT8_C( 86), -INT8_C( 51), -INT8_C( 66), INT8_C( 46), INT8_C( 38) } }, { { INT8_C( 26), INT8_C( 70), -INT8_C( 124), -INT8_C( 25), INT8_C( 30), -INT8_C( 79), INT8_C( 119), -INT8_C( 52) }, { INT8_C( 63), -INT8_C( 28), INT8_C( 69), -INT8_C( 78), -INT8_C( 107), -INT8_C( 64), -INT8_C( 93), -INT8_C( 88) }, { INT8_C( 26), INT8_C( 63), INT8_C( 70), -INT8_C( 28), -INT8_C( 124), INT8_C( 69), -INT8_C( 25), -INT8_C( 78) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vzip1_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; } static int test_simde_vzip1_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { { { INT16_C( 6394), -INT16_C( 7304), INT16_C( 4761), -INT16_C( 23200) }, { INT16_C( 27245), INT16_C( 9875), -INT16_C( 24091), -INT16_C( 25797) }, { INT16_C( 6394), INT16_C( 27245), -INT16_C( 7304), INT16_C( 9875) } }, { { -INT16_C( 916), -INT16_C( 24207), -INT16_C( 2865), -INT16_C( 14036) }, { -INT16_C( 31945), INT16_C( 23444), -INT16_C( 22348), -INT16_C( 20771) }, { -INT16_C( 916), -INT16_C( 31945), -INT16_C( 24207), INT16_C( 23444) } }, { { INT16_C( 21953), INT16_C( 23185), -INT16_C( 3736), -INT16_C( 10753) }, { -INT16_C( 27812), INT16_C( 16891), INT16_C( 14132), -INT16_C( 24355) }, { INT16_C( 21953), -INT16_C( 27812), INT16_C( 23185), INT16_C( 16891) } }, { { INT16_C( 20019), INT16_C( 834), INT16_C( 28226), INT16_C( 31436) }, { INT16_C( 24817), -INT16_C( 23083), -INT16_C( 19959), -INT16_C( 13741) }, { INT16_C( 20019), INT16_C( 24817), INT16_C( 834), -INT16_C( 23083) } }, { { -INT16_C( 6905), INT16_C( 28452), INT16_C( 9174), INT16_C( 12868) }, { INT16_C( 16566), -INT16_C( 5260), INT16_C( 20855), -INT16_C( 21877) }, { -INT16_C( 6905), INT16_C( 16566), INT16_C( 28452), -INT16_C( 5260) } }, { { -INT16_C( 12897), -INT16_C( 7763), INT16_C( 31036), INT16_C( 11611) }, { INT16_C( 12506), -INT16_C( 7213), INT16_C( 9954), -INT16_C( 5459) }, { -INT16_C( 12897), INT16_C( 12506), -INT16_C( 7763), -INT16_C( 7213) } }, { { -INT16_C( 12021), -INT16_C( 7591), -INT16_C( 24844), -INT16_C( 21740) }, { -INT16_C( 30498), INT16_C( 21910), INT16_C( 8665), INT16_C( 30975) }, { -INT16_C( 12021), -INT16_C( 30498), -INT16_C( 7591), INT16_C( 21910) } }, { { -INT16_C( 21009), INT16_C( 11098), -INT16_C( 19162), INT16_C( 88) }, { INT16_C( 11238), -INT16_C( 14109), -INT16_C( 28590), INT16_C( 23986) }, { -INT16_C( 21009), INT16_C( 11238), INT16_C( 11098), -INT16_C( 14109) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_vzip1_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; } static int test_simde_vzip1_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { { { INT32_C( 1426738482), INT32_C( 30050468) }, { -INT32_C( 404588153), -INT32_C( 112205881) }, { INT32_C( 1426738482), -INT32_C( 404588153) } }, { { -INT32_C( 1485126862), -INT32_C( 789445372) }, { -INT32_C( 141244937), INT32_C( 1236357399) }, { -INT32_C( 1485126862), -INT32_C( 141244937) } }, { { INT32_C( 1117699230), -INT32_C( 884774844) }, { -INT32_C( 1447942686), INT32_C( 916586756) }, { INT32_C( 1117699230), -INT32_C( 1447942686) } }, { { -INT32_C( 908255803), INT32_C( 429510690) }, { -INT32_C( 1408225899), INT32_C( 485868158) }, { -INT32_C( 908255803), -INT32_C( 1408225899) } }, { { -INT32_C( 1033988994), -INT32_C( 561143300) }, { -INT32_C( 897105978), INT32_C( 83962432) }, { -INT32_C( 1033988994), -INT32_C( 897105978) } }, { { INT32_C( 1775164999), INT32_C( 1132685230) }, { INT32_C( 317690772), -INT32_C( 751901355) }, { INT32_C( 1775164999), INT32_C( 317690772) } }, { { INT32_C( 1972735097), -INT32_C( 212590035) }, { -INT32_C( 1564550303), INT32_C( 1286061829) }, { INT32_C( 1972735097), -INT32_C( 1564550303) } }, { { INT32_C( 1287026333), INT32_C( 1921989085) }, { INT32_C( 579108812), -INT32_C( 571100316) }, { INT32_C( 1287026333), INT32_C( 579108812) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_vzip1_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; } static int test_simde_vzip1_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; uint8_t b[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C(207), UINT8_C(175), UINT8_C( 76), UINT8_C(120), UINT8_C(200), UINT8_C(106), UINT8_C(207), UINT8_C(167) }, { UINT8_C( 5), UINT8_C( 22), UINT8_C(155), UINT8_C( 62), UINT8_C(249), UINT8_C(184), UINT8_C(197), UINT8_C(224) }, { UINT8_C(207), UINT8_C( 5), UINT8_C(175), UINT8_C( 22), UINT8_C( 76), UINT8_C(155), UINT8_C(120), UINT8_C( 62) } }, { { UINT8_C( 52), UINT8_C( 5), UINT8_C( 80), UINT8_C( 18), UINT8_C(180), UINT8_C( 4), UINT8_C( 6), UINT8_C(243) }, { UINT8_C(213), UINT8_C( 60), UINT8_C(131), UINT8_C( 8), UINT8_C(202), UINT8_C( 69), UINT8_C( 73), UINT8_C(154) }, { UINT8_C( 52), UINT8_C(213), UINT8_C( 5), UINT8_C( 60), UINT8_C( 80), UINT8_C(131), UINT8_C( 18), UINT8_C( 8) } }, { { UINT8_C(244), UINT8_C(149), UINT8_C( 18), UINT8_C(188), UINT8_C( 0), UINT8_C(225), UINT8_C(100), UINT8_C( 5) }, { UINT8_C(248), UINT8_MAX, UINT8_C( 68), UINT8_C(241), UINT8_C(183), UINT8_C( 9), UINT8_C(209), UINT8_C(235) }, { UINT8_C(244), UINT8_C(248), UINT8_C(149), UINT8_MAX, UINT8_C( 18), UINT8_C( 68), UINT8_C(188), UINT8_C(241) } }, { { UINT8_C( 14), UINT8_C( 33), UINT8_C(254), UINT8_C(195), UINT8_C( 37), UINT8_C( 4), UINT8_C(182), UINT8_C(250) }, { UINT8_C( 65), UINT8_C( 57), UINT8_C( 3), UINT8_C( 11), UINT8_C(126), UINT8_C( 76), UINT8_C(165), UINT8_C(114) }, { UINT8_C( 14), UINT8_C( 65), UINT8_C( 33), UINT8_C( 57), UINT8_C(254), UINT8_C( 3), UINT8_C(195), UINT8_C( 11) } }, { { UINT8_C(225), UINT8_C(184), UINT8_C( 47), UINT8_C(225), UINT8_C(153), UINT8_C(147), UINT8_C(231), UINT8_C(145) }, { UINT8_C(146), UINT8_C( 43), UINT8_C(131), UINT8_C( 73), UINT8_C( 52), UINT8_C( 84), UINT8_C( 52), UINT8_C( 67) }, { UINT8_C(225), UINT8_C(146), UINT8_C(184), UINT8_C( 43), UINT8_C( 47), UINT8_C(131), UINT8_C(225), UINT8_C( 73) } }, { { UINT8_C(118), UINT8_C( 50), UINT8_C( 6), UINT8_C(155), UINT8_C( 55), UINT8_C(188), UINT8_C(150), UINT8_C(120) }, { UINT8_C(245), UINT8_C(153), UINT8_C(131), UINT8_C(116), UINT8_C(229), UINT8_C( 41), UINT8_C(230), UINT8_C(198) }, { UINT8_C(118), UINT8_C(245), UINT8_C( 50), UINT8_C(153), UINT8_C( 6), UINT8_C(131), UINT8_C(155), UINT8_C(116) } }, { { UINT8_C(225), UINT8_C( 21), UINT8_C(168), UINT8_C(122), UINT8_C(168), UINT8_C(143), UINT8_C( 12), UINT8_C( 58) }, { UINT8_C(186), UINT8_C(143), UINT8_C(131), UINT8_C(238), UINT8_C(227), UINT8_C(184), UINT8_C( 49), UINT8_C( 89) }, { UINT8_C(225), UINT8_C(186), UINT8_C( 21), UINT8_C(143), UINT8_C(168), UINT8_C(131), UINT8_C(122), UINT8_C(238) } }, { { UINT8_C(234), UINT8_C( 55), UINT8_C(245), UINT8_C( 33), UINT8_C(243), UINT8_C(139), UINT8_C(153), UINT8_C(233) }, { UINT8_C( 36), UINT8_C( 29), UINT8_C( 93), UINT8_C( 9), UINT8_C( 70), UINT8_C( 67), UINT8_C(207), UINT8_C( 39) }, { UINT8_C(234), UINT8_C( 36), UINT8_C( 55), UINT8_C( 29), UINT8_C(245), UINT8_C( 93), UINT8_C( 33), UINT8_C( 9) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vzip1_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; } static int test_simde_vzip1_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(54503), UINT16_C(20876), UINT16_C(27990), UINT16_C( 1388) }, { UINT16_C( 5134), UINT16_C(61930), UINT16_C(11315), UINT16_C(24246) }, { UINT16_C(54503), UINT16_C( 5134), UINT16_C(20876), UINT16_C(61930) } }, { { UINT16_C(54834), UINT16_C(28009), UINT16_C( 5311), UINT16_C(11808) }, { UINT16_C(44084), UINT16_C(65289), UINT16_C( 886), UINT16_C(24131) }, { UINT16_C(54834), UINT16_C(44084), UINT16_C(28009), UINT16_C(65289) } }, { { UINT16_C(53207), UINT16_C(11951), UINT16_C( 6972), UINT16_C(18995) }, { UINT16_C( 7471), UINT16_C(25147), UINT16_C(61770), UINT16_C(31937) }, { UINT16_C(53207), UINT16_C( 7471), UINT16_C(11951), UINT16_C(25147) } }, { { UINT16_C(10951), UINT16_C(34537), UINT16_C( 2622), UINT16_C(29364) }, { UINT16_C(48822), UINT16_C(11377), UINT16_C(46273), UINT16_C(39050) }, { UINT16_C(10951), UINT16_C(48822), UINT16_C(34537), UINT16_C(11377) } }, { { UINT16_C(14723), UINT16_C(49094), UINT16_C(63828), UINT16_C(33545) }, { UINT16_C(17431), UINT16_C(25062), UINT16_C(42805), UINT16_C(64989) }, { UINT16_C(14723), UINT16_C(17431), UINT16_C(49094), UINT16_C(25062) } }, { { UINT16_C(51153), UINT16_C( 3971), UINT16_C(14545), UINT16_C(34689) }, { UINT16_C(62198), UINT16_C(47027), UINT16_C(16039), UINT16_C(10831) }, { UINT16_C(51153), UINT16_C(62198), UINT16_C( 3971), UINT16_C(47027) } }, { { UINT16_C( 5751), UINT16_C(52458), UINT16_C(62223), UINT16_C( 9807) }, { UINT16_C(13624), UINT16_C(28039), UINT16_C(26076), UINT16_C(44394) }, { UINT16_C( 5751), UINT16_C(13624), UINT16_C(52458), UINT16_C(28039) } }, { { UINT16_C(60972), UINT16_C(64956), UINT16_C(15654), UINT16_C( 7300) }, { UINT16_C(14128), UINT16_C(55251), UINT16_C( 8821), UINT16_C(60673) }, { UINT16_C(60972), UINT16_C(14128), UINT16_C(64956), UINT16_C(55251) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vzip1_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; } static int test_simde_vzip1_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C( 509442038), UINT32_C(1705505599) }, { UINT32_C(1361533953), UINT32_C(2713165570) }, { UINT32_C( 509442038), UINT32_C(1361533953) } }, { { UINT32_C(1841080646), UINT32_C(1488848664) }, { UINT32_C(1673216874), UINT32_C(1840895607) }, { UINT32_C(1841080646), UINT32_C(1673216874) } }, { { UINT32_C(2357990989), UINT32_C( 183644937) }, { UINT32_C(2455443855), UINT32_C(4281537464) }, { UINT32_C(2357990989), UINT32_C(2455443855) } }, { { UINT32_C(3563909052), UINT32_C(1630350070) }, { UINT32_C(3854887021), UINT32_C( 206732735) }, { UINT32_C(3563909052), UINT32_C(3854887021) } }, { { UINT32_C(2627329683), UINT32_C(2712046353) }, { UINT32_C(1563624100), UINT32_C(3512493589) }, { UINT32_C(2627329683), UINT32_C(1563624100) } }, { { UINT32_C(1269221461), UINT32_C(1621939186) }, { UINT32_C(2051371195), UINT32_C(2156369901) }, { UINT32_C(1269221461), UINT32_C(2051371195) } }, { { UINT32_C(2266767478), UINT32_C(1328071595) }, { UINT32_C(3668728773), UINT32_C( 380307649) }, { UINT32_C(2266767478), UINT32_C(3668728773) } }, { { UINT32_C(3278000592), UINT32_C(3760393764) }, { UINT32_C(1817864319), UINT32_C(1978524159) }, { UINT32_C(3278000592), UINT32_C(1817864319) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vzip1_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; } static int test_simde_vzip1q_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; simde_float32 b[4]; simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 269.04), SIMDE_FLOAT32_C( 671.35), SIMDE_FLOAT32_C( 766.94), SIMDE_FLOAT32_C( -408.87) }, { SIMDE_FLOAT32_C( -755.43), SIMDE_FLOAT32_C( -620.83), SIMDE_FLOAT32_C( -105.18), SIMDE_FLOAT32_C( 580.87) }, { SIMDE_FLOAT32_C( 269.04), SIMDE_FLOAT32_C( -755.43), SIMDE_FLOAT32_C( 671.35), SIMDE_FLOAT32_C( -620.83) } }, { { SIMDE_FLOAT32_C( -7.35), SIMDE_FLOAT32_C( -641.39), SIMDE_FLOAT32_C( -306.86), SIMDE_FLOAT32_C( -476.86) }, { SIMDE_FLOAT32_C( 983.02), SIMDE_FLOAT32_C( 145.77), SIMDE_FLOAT32_C( -393.28), SIMDE_FLOAT32_C( -514.11) }, { SIMDE_FLOAT32_C( -7.35), SIMDE_FLOAT32_C( 983.02), SIMDE_FLOAT32_C( -641.39), SIMDE_FLOAT32_C( 145.77) } }, { { SIMDE_FLOAT32_C( 201.77), SIMDE_FLOAT32_C( 778.35), SIMDE_FLOAT32_C( 479.04), SIMDE_FLOAT32_C( -404.39) }, { SIMDE_FLOAT32_C( 748.07), SIMDE_FLOAT32_C( -591.33), SIMDE_FLOAT32_C( 845.51), SIMDE_FLOAT32_C( -130.39) }, { SIMDE_FLOAT32_C( 201.77), SIMDE_FLOAT32_C( 748.07), SIMDE_FLOAT32_C( 778.35), SIMDE_FLOAT32_C( -591.33) } }, { { SIMDE_FLOAT32_C( 187.46), SIMDE_FLOAT32_C( 563.12), SIMDE_FLOAT32_C( -567.04), SIMDE_FLOAT32_C( 46.32) }, { SIMDE_FLOAT32_C( 549.79), SIMDE_FLOAT32_C( -703.64), SIMDE_FLOAT32_C( 919.82), SIMDE_FLOAT32_C( -181.17) }, { SIMDE_FLOAT32_C( 187.46), SIMDE_FLOAT32_C( 549.79), SIMDE_FLOAT32_C( 563.12), SIMDE_FLOAT32_C( -703.64) } }, { { SIMDE_FLOAT32_C( 967.71), SIMDE_FLOAT32_C( 686.76), SIMDE_FLOAT32_C( 409.96), SIMDE_FLOAT32_C( -787.73) }, { SIMDE_FLOAT32_C( -934.07), SIMDE_FLOAT32_C( -695.22), SIMDE_FLOAT32_C( 793.14), SIMDE_FLOAT32_C( 58.59) }, { SIMDE_FLOAT32_C( 967.71), SIMDE_FLOAT32_C( -934.07), SIMDE_FLOAT32_C( 686.76), SIMDE_FLOAT32_C( -695.22) } }, { { SIMDE_FLOAT32_C( -336.61), SIMDE_FLOAT32_C( -513.72), SIMDE_FLOAT32_C( 581.73), SIMDE_FLOAT32_C( -353.60) }, { SIMDE_FLOAT32_C( 632.05), SIMDE_FLOAT32_C( -811.55), SIMDE_FLOAT32_C( 132.29), SIMDE_FLOAT32_C( -166.18) }, { SIMDE_FLOAT32_C( -336.61), SIMDE_FLOAT32_C( 632.05), SIMDE_FLOAT32_C( -513.72), SIMDE_FLOAT32_C( -811.55) } }, { { SIMDE_FLOAT32_C( 966.80), SIMDE_FLOAT32_C( -388.67), SIMDE_FLOAT32_C( 429.43), SIMDE_FLOAT32_C( 714.87) }, { SIMDE_FLOAT32_C( 20.00), SIMDE_FLOAT32_C( 274.94), SIMDE_FLOAT32_C( -415.52), SIMDE_FLOAT32_C( -792.53) }, { SIMDE_FLOAT32_C( 966.80), SIMDE_FLOAT32_C( 20.00), SIMDE_FLOAT32_C( -388.67), SIMDE_FLOAT32_C( 274.94) } }, { { SIMDE_FLOAT32_C( -161.95), SIMDE_FLOAT32_C( 17.44), SIMDE_FLOAT32_C( 253.79), SIMDE_FLOAT32_C( -612.15) }, { SIMDE_FLOAT32_C( 313.79), SIMDE_FLOAT32_C( 173.61), SIMDE_FLOAT32_C( 206.68), SIMDE_FLOAT32_C( 281.50) }, { SIMDE_FLOAT32_C( -161.95), SIMDE_FLOAT32_C( 313.79), SIMDE_FLOAT32_C( 17.44), SIMDE_FLOAT32_C( 173.61) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r = simde_vzip1q_f32(a, b); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vzip1q_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int8_t b[16]; int8_t r[16]; } test_vec[] = { { { -INT8_C( 70), -INT8_C( 22), -INT8_C( 88), INT8_C( 42), -INT8_C( 9), -INT8_C( 4), -INT8_C( 126), INT8_C( 11), -INT8_C( 37), -INT8_C( 127), INT8_C( 7), -INT8_C( 109), INT8_C( 119), -INT8_C( 90), -INT8_C( 99), INT8_C( 0) }, { -INT8_C( 41), INT8_C( 47), INT8_C( 38), INT8_C( 5), INT8_C( 103), -INT8_C( 61), -INT8_C( 45), -INT8_C( 22), INT8_C( 43), -INT8_C( 92), INT8_C( 19), INT8_C( 114), INT8_C( 110), -INT8_C( 85), INT8_C( 62), INT8_C( 41) }, { -INT8_C( 70), -INT8_C( 41), -INT8_C( 22), INT8_C( 47), -INT8_C( 88), INT8_C( 38), INT8_C( 42), INT8_C( 5), -INT8_C( 9), INT8_C( 103), -INT8_C( 4), -INT8_C( 61), -INT8_C( 126), -INT8_C( 45), INT8_C( 11), -INT8_C( 22) } }, { { -INT8_C( 107), -INT8_C( 26), INT8_C( 83), -INT8_C( 116), -INT8_C( 30), -INT8_C( 43), -INT8_C( 104), -INT8_C( 66), INT8_C( 87), -INT8_C( 97), INT8_C( 81), -INT8_C( 50), INT8_C( 69), -INT8_C( 18), -INT8_C( 50), INT8_C( 28) }, { INT8_C( 30), -INT8_C( 11), INT8_C( 34), -INT8_C( 123), -INT8_C( 72), -INT8_C( 11), INT8_C( 111), -INT8_C( 29), -INT8_C( 103), -INT8_C( 125), INT8_C( 85), INT8_C( 7), INT8_C( 46), -INT8_C( 108), INT8_C( 48), -INT8_C( 61) }, { -INT8_C( 107), INT8_C( 30), -INT8_C( 26), -INT8_C( 11), INT8_C( 83), INT8_C( 34), -INT8_C( 116), -INT8_C( 123), -INT8_C( 30), -INT8_C( 72), -INT8_C( 43), -INT8_C( 11), -INT8_C( 104), INT8_C( 111), -INT8_C( 66), -INT8_C( 29) } }, { { INT8_C( 122), -INT8_C( 124), INT8_C( 79), INT8_C( 93), INT8_C( 89), -INT8_C( 25), INT8_C( 27), -INT8_C( 80), -INT8_C( 122), INT8_C( 108), INT8_C( 126), -INT8_C( 53), INT8_C( 90), INT8_C( 77), -INT8_C( 24), INT8_C( 120) }, { INT8_C( 66), INT8_C( 10), -INT8_C( 2), -INT8_C( 6), -INT8_C( 1), INT8_C( 109), -INT8_C( 35), -INT8_C( 104), -INT8_C( 16), INT8_C( 51), -INT8_C( 97), INT8_C( 30), -INT8_C( 57), -INT8_C( 48), -INT8_C( 31), INT8_C( 65) }, { INT8_C( 122), INT8_C( 66), -INT8_C( 124), INT8_C( 10), INT8_C( 79), -INT8_C( 2), INT8_C( 93), -INT8_C( 6), INT8_C( 89), -INT8_C( 1), -INT8_C( 25), INT8_C( 109), INT8_C( 27), -INT8_C( 35), -INT8_C( 80), -INT8_C( 104) } }, { { INT8_C( 84), INT8_C( 49), -INT8_C( 98), -INT8_C( 83), INT8_C( 24), -INT8_C( 71), INT8_C( 94), -INT8_C( 97), INT8_C( 37), -INT8_C( 36), INT8_C( 106), INT8_MIN, INT8_C( 41), INT8_C( 82), -INT8_C( 8), INT8_C( 107) }, { INT8_C( 92), -INT8_C( 10), INT8_C( 101), INT8_C( 91), INT8_C( 100), INT8_C( 67), -INT8_C( 13), INT8_C( 84), INT8_C( 118), -INT8_C( 109), INT8_C( 115), INT8_C( 61), INT8_C( 99), INT8_C( 84), INT8_C( 126), -INT8_C( 73) }, { INT8_C( 84), INT8_C( 92), INT8_C( 49), -INT8_C( 10), -INT8_C( 98), INT8_C( 101), -INT8_C( 83), INT8_C( 91), INT8_C( 24), INT8_C( 100), -INT8_C( 71), INT8_C( 67), INT8_C( 94), -INT8_C( 13), -INT8_C( 97), INT8_C( 84) } }, { { -INT8_C( 123), INT8_C( 29), INT8_C( 100), -INT8_C( 98), -INT8_C( 42), -INT8_C( 62), INT8_C( 61), -INT8_C( 4), -INT8_C( 97), -INT8_C( 89), INT8_C( 124), -INT8_C( 56), -INT8_C( 6), INT8_C( 116), INT8_C( 52), INT8_C( 86) }, { INT8_C( 107), -INT8_C( 103), -INT8_C( 78), -INT8_C( 49), -INT8_C( 36), -INT8_C( 91), INT8_C( 35), INT8_C( 82), INT8_C( 56), -INT8_C( 106), -INT8_C( 113), -INT8_C( 101), -INT8_C( 21), INT8_C( 14), INT8_C( 82), INT8_C( 112) }, { -INT8_C( 123), INT8_C( 107), INT8_C( 29), -INT8_C( 103), INT8_C( 100), -INT8_C( 78), -INT8_C( 98), -INT8_C( 49), -INT8_C( 42), -INT8_C( 36), -INT8_C( 62), -INT8_C( 91), INT8_C( 61), INT8_C( 35), -INT8_C( 4), INT8_C( 82) } }, { { INT8_C( 43), -INT8_C( 73), INT8_C( 14), INT8_C( 1), INT8_C( 121), INT8_C( 75), -INT8_C( 3), INT8_C( 24), -INT8_C( 13), INT8_C( 121), -INT8_C( 31), -INT8_C( 19), -INT8_C( 18), INT8_C( 21), INT8_C( 67), INT8_C( 89) }, { -INT8_C( 82), -INT8_C( 11), INT8_C( 40), -INT8_C( 117), -INT8_C( 101), INT8_C( 75), -INT8_C( 35), -INT8_C( 45), -INT8_C( 30), INT8_C( 109), INT8_C( 111), -INT8_C( 51), INT8_C( 123), -INT8_C( 63), INT8_C( 61), -INT8_C( 90) }, { INT8_C( 43), -INT8_C( 82), -INT8_C( 73), -INT8_C( 11), INT8_C( 14), INT8_C( 40), INT8_C( 1), -INT8_C( 117), INT8_C( 121), -INT8_C( 101), INT8_C( 75), INT8_C( 75), -INT8_C( 3), -INT8_C( 35), INT8_C( 24), -INT8_C( 45) } }, { { INT8_C( 120), INT8_C( 76), -INT8_C( 89), -INT8_C( 14), -INT8_C( 105), -INT8_C( 91), INT8_C( 10), -INT8_C( 118), INT8_C( 30), -INT8_C( 21), INT8_C( 119), INT8_C( 12), INT8_C( 0), -INT8_C( 69), INT8_C( 101), -INT8_C( 81) }, { -INT8_C( 80), -INT8_C( 115), INT8_C( 58), INT8_C( 75), -INT8_C( 39), INT8_C( 23), INT8_C( 31), -INT8_C( 69), -INT8_C( 124), -INT8_C( 114), -INT8_C( 120), -INT8_C( 1), INT8_C( 79), -INT8_C( 59), -INT8_C( 91), -INT8_C( 56) }, { INT8_C( 120), -INT8_C( 80), INT8_C( 76), -INT8_C( 115), -INT8_C( 89), INT8_C( 58), -INT8_C( 14), INT8_C( 75), -INT8_C( 105), -INT8_C( 39), -INT8_C( 91), INT8_C( 23), INT8_C( 10), INT8_C( 31), -INT8_C( 118), -INT8_C( 69) } }, { { INT8_C( 17), INT8_C( 77), -INT8_C( 70), -INT8_C( 87), -INT8_C( 14), -INT8_C( 60), INT8_C( 51), INT8_C( 16), -INT8_C( 80), -INT8_C( 85), INT8_C( 29), -INT8_C( 80), INT8_C( 102), -INT8_C( 126), INT8_C( 95), INT8_C( 22) }, { INT8_C( 16), -INT8_C( 103), INT8_C( 98), -INT8_C( 23), -INT8_C( 79), -INT8_C( 127), -INT8_C( 92), INT8_C( 53), INT8_C( 15), INT8_C( 44), INT8_C( 53), INT8_C( 94), -INT8_C( 15), -INT8_C( 38), INT8_C( 38), INT8_C( 3) }, { INT8_C( 17), INT8_C( 16), INT8_C( 77), -INT8_C( 103), -INT8_C( 70), INT8_C( 98), -INT8_C( 87), -INT8_C( 23), -INT8_C( 14), -INT8_C( 79), -INT8_C( 60), -INT8_C( 127), INT8_C( 51), -INT8_C( 92), INT8_C( 16), INT8_C( 53) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r = simde_vzip1q_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; } static int test_simde_vzip1q_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { INT16_C( 28283), -INT16_C( 8974), -INT16_C( 22113), INT16_C( 11484), -INT16_C( 30181), -INT16_C( 20779), -INT16_C( 25603), INT16_C( 17370) }, { INT16_C( 31538), -INT16_C( 31974), INT16_C( 9962), -INT16_C( 13557), INT16_C( 1664), INT16_C( 31891), -INT16_C( 5649), INT16_C( 27379) }, { INT16_C( 28283), INT16_C( 31538), -INT16_C( 8974), -INT16_C( 31974), -INT16_C( 22113), INT16_C( 9962), INT16_C( 11484), -INT16_C( 13557) } }, { { -INT16_C( 6568), -INT16_C( 2234), INT16_C( 8847), -INT16_C( 21981), -INT16_C( 1620), -INT16_C( 21927), INT16_C( 13204), -INT16_C( 14611) }, { INT16_C( 1966), -INT16_C( 26551), INT16_C( 21806), -INT16_C( 20893), -INT16_C( 2213), INT16_C( 18986), INT16_C( 7904), INT16_C( 14516) }, { -INT16_C( 6568), INT16_C( 1966), -INT16_C( 2234), -INT16_C( 26551), INT16_C( 8847), INT16_C( 21806), -INT16_C( 21981), -INT16_C( 20893) } }, { { -INT16_C( 1276), -INT16_C( 27857), INT16_C( 21277), -INT16_C( 13762), -INT16_C( 26804), -INT16_C( 8076), INT16_C( 25034), INT16_C( 30886) }, { -INT16_C( 3992), -INT16_C( 27119), INT16_C( 29765), -INT16_C( 24507), INT16_C( 28523), INT16_C( 19690), -INT16_C( 24947), -INT16_C( 28284) }, { -INT16_C( 1276), -INT16_C( 3992), -INT16_C( 27857), -INT16_C( 27119), INT16_C( 21277), INT16_C( 29765), -INT16_C( 13762), -INT16_C( 24507) } }, { { -INT16_C( 19303), -INT16_C( 18651), INT16_C( 25351), INT16_C( 21377), -INT16_C( 2566), -INT16_C( 15309), -INT16_C( 9898), -INT16_C( 16836) }, { INT16_C( 19913), INT16_C( 3669), -INT16_C( 25918), INT16_C( 11694), -INT16_C( 26615), -INT16_C( 26759), -INT16_C( 457), -INT16_C( 12248) }, { -INT16_C( 19303), INT16_C( 19913), -INT16_C( 18651), INT16_C( 3669), INT16_C( 25351), -INT16_C( 25918), INT16_C( 21377), INT16_C( 11694) } }, { { INT16_C( 19890), -INT16_C( 18041), INT16_C( 2224), -INT16_C( 22004), INT16_C( 16381), INT16_C( 21358), -INT16_C( 21736), -INT16_C( 7662) }, { INT16_C( 26616), -INT16_C( 17680), -INT16_C( 24831), INT16_C( 2792), INT16_C( 24887), INT16_C( 28321), -INT16_C( 13729), INT16_C( 4415) }, { INT16_C( 19890), INT16_C( 26616), -INT16_C( 18041), -INT16_C( 17680), INT16_C( 2224), -INT16_C( 24831), -INT16_C( 22004), INT16_C( 2792) } }, { { -INT16_C( 14825), -INT16_C( 14134), -INT16_C( 10545), -INT16_C( 13198), -INT16_C( 7915), INT16_C( 11808), INT16_C( 12940), -INT16_C( 31728) }, { INT16_C( 153), -INT16_C( 26049), INT16_C( 10143), -INT16_C( 10332), INT16_C( 18056), -INT16_C( 6075), -INT16_C( 31728), INT16_C( 10233) }, { -INT16_C( 14825), INT16_C( 153), -INT16_C( 14134), -INT16_C( 26049), -INT16_C( 10545), INT16_C( 10143), -INT16_C( 13198), -INT16_C( 10332) } }, { { -INT16_C( 15285), INT16_C( 6895), INT16_C( 25242), -INT16_C( 20250), INT16_C( 1603), -INT16_C( 12322), -INT16_C( 4552), -INT16_C( 11949) }, { -INT16_C( 27922), -INT16_C( 29077), INT16_C( 4281), INT16_C( 16997), -INT16_C( 21930), INT16_C( 26154), INT16_C( 9007), INT16_C( 31373) }, { -INT16_C( 15285), -INT16_C( 27922), INT16_C( 6895), -INT16_C( 29077), INT16_C( 25242), INT16_C( 4281), -INT16_C( 20250), INT16_C( 16997) } }, { { INT16_C( 32231), -INT16_C( 32108), INT16_C( 31455), INT16_C( 8754), INT16_C( 4225), -INT16_C( 17935), INT16_C( 17662), -INT16_C( 4981) }, { -INT16_C( 2345), -INT16_C( 28550), -INT16_C( 8442), INT16_C( 23762), -INT16_C( 886), -INT16_C( 17982), INT16_C( 20512), INT16_C( 1843) }, { INT16_C( 32231), -INT16_C( 2345), -INT16_C( 32108), -INT16_C( 28550), INT16_C( 31455), -INT16_C( 8442), INT16_C( 8754), INT16_C( 23762) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_vzip1q_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; } static int test_simde_vzip1q_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { -INT32_C( 1675320975), -INT32_C( 1896477137), INT32_C( 1084998411), -INT32_C( 1861441150) }, { -INT32_C( 22335743), -INT32_C( 1566060132), INT32_C( 509923909), INT32_C( 28916624) }, { -INT32_C( 1675320975), -INT32_C( 22335743), -INT32_C( 1896477137), -INT32_C( 1566060132) } }, { { INT32_C( 211672541), -INT32_C( 157641493), -INT32_C( 650689193), -INT32_C( 261471250) }, { INT32_C( 267261299), INT32_C( 733058534), -INT32_C( 146205337), INT32_C( 788005457) }, { INT32_C( 211672541), INT32_C( 267261299), -INT32_C( 157641493), INT32_C( 733058534) } }, { { -INT32_C( 902130209), -INT32_C( 2134780631), INT32_C( 156956698), -INT32_C( 1359363013) }, { -INT32_C( 1061230630), -INT32_C( 471044228), -INT32_C( 690342523), INT32_C( 386191928) }, { -INT32_C( 902130209), -INT32_C( 1061230630), -INT32_C( 2134780631), -INT32_C( 471044228) } }, { { -INT32_C( 1847443865), INT32_C( 772907795), -INT32_C( 701011045), INT32_C( 176500784) }, { -INT32_C( 1815461097), INT32_C( 930526898), INT32_C( 604852460), -INT32_C( 1992617694) }, { -INT32_C( 1847443865), -INT32_C( 1815461097), INT32_C( 772907795), INT32_C( 930526898) } }, { { INT32_C( 1662655824), INT32_C( 1536240832), -INT32_C( 952973161), INT32_C( 265402360) }, { -INT32_C( 1398629126), INT32_C( 1055135826), -INT32_C( 1973227160), INT32_C( 1393860099) }, { INT32_C( 1662655824), -INT32_C( 1398629126), INT32_C( 1536240832), INT32_C( 1055135826) } }, { { INT32_C( 2092314299), -INT32_C( 220772262), INT32_C( 163121424), -INT32_C( 1172796480) }, { INT32_C( 2036841255), INT32_C( 1018710995), INT32_C( 1069947452), INT32_C( 1955781304) }, { INT32_C( 2092314299), INT32_C( 2036841255), -INT32_C( 220772262), INT32_C( 1018710995) } }, { { INT32_C( 1676691721), -INT32_C( 1588213871), -INT32_C( 1851125807), -INT32_C( 1051933798) }, { INT32_C( 1362801534), INT32_C( 982381310), -INT32_C( 981838835), INT32_C( 926485550) }, { INT32_C( 1676691721), INT32_C( 1362801534), -INT32_C( 1588213871), INT32_C( 982381310) } }, { { -INT32_C( 426038955), -INT32_C( 1031212815), -INT32_C( 1722600705), INT32_C( 1935319029) }, { INT32_C( 1355126098), -INT32_C( 1802808697), -INT32_C( 715520602), INT32_C( 1728877329) }, { -INT32_C( 426038955), INT32_C( 1355126098), -INT32_C( 1031212815), -INT32_C( 1802808697) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_vzip1q_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; } static int test_simde_vzip1q_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; int64_t b[2]; int64_t r[2]; } test_vec[] = { { { -INT64_C( 448570424052926050), -INT64_C( 8985443691791153961) }, { -INT64_C( 8040865633061995433), -INT64_C( 4704410991765481408) }, { -INT64_C( 448570424052926050), -INT64_C( 8040865633061995433) } }, { { INT64_C( 9032521722090244812), INT64_C( 9119651969885461627) }, { -INT64_C( 7142984589955546880), -INT64_C( 6918627632000909283) }, { INT64_C( 9032521722090244812), -INT64_C( 7142984589955546880) } }, { { -INT64_C( 7096168734649275294), INT64_C( 4600489963625823284) }, { -INT64_C( 7461780158734764678), INT64_C( 5701787941971809454) }, { -INT64_C( 7096168734649275294), -INT64_C( 7461780158734764678) } }, { { -INT64_C( 6071453218912133123), INT64_C( 1952555225160509337) }, { -INT64_C( 4438750342192912813), INT64_C( 753337045300230969) }, { -INT64_C( 6071453218912133123), -INT64_C( 4438750342192912813) } }, { { INT64_C( 4802781869398215811), -INT64_C( 2446524920958899121) }, { -INT64_C( 7228257185826013479), -INT64_C( 2637643357437693106) }, { INT64_C( 4802781869398215811), -INT64_C( 7228257185826013479) } }, { { -INT64_C( 4040717907954069503), INT64_C( 1338347791406597929) }, { -INT64_C( 6935349563607728190), -INT64_C( 5127804786686918237) }, { -INT64_C( 4040717907954069503), -INT64_C( 6935349563607728190) } }, { { INT64_C( 5849981475946147056), INT64_C( 7900044558967629654) }, { -INT64_C( 736248026253581609), INT64_C( 1315550417814340307) }, { INT64_C( 5849981475946147056), -INT64_C( 736248026253581609) } }, { { INT64_C( 5326820422534734962), INT64_C( 7037572101881524881) }, { INT64_C( 7017823791531768860), -INT64_C( 8266027145664703468) }, { INT64_C( 5326820422534734962), INT64_C( 7017823791531768860) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_int64x2_t r = simde_vzip1q_s64(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; } static int test_simde_vzip1q_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(100), UINT8_C(203), UINT8_C(215), UINT8_C(200), UINT8_C(136), UINT8_C(145), UINT8_C(227), UINT8_C(166), UINT8_C( 22), UINT8_C(195), UINT8_C(118), UINT8_C(159), UINT8_C(105), UINT8_C(241), UINT8_C(107), UINT8_C(158) }, { UINT8_C( 53), UINT8_C( 73), UINT8_C(155), UINT8_C(249), UINT8_C(100), UINT8_C( 88), UINT8_C(190), UINT8_C( 51), UINT8_C( 43), UINT8_C(242), UINT8_C(209), UINT8_C( 1), UINT8_C(234), UINT8_C(166), UINT8_C( 44), UINT8_C( 78) }, { UINT8_C(100), UINT8_C( 53), UINT8_C(203), UINT8_C( 73), UINT8_C(215), UINT8_C(155), UINT8_C(200), UINT8_C(249), UINT8_C(136), UINT8_C(100), UINT8_C(145), UINT8_C( 88), UINT8_C(227), UINT8_C(190), UINT8_C(166), UINT8_C( 51) } }, { { UINT8_C(113), UINT8_C( 4), UINT8_C( 22), UINT8_C(250), UINT8_C(149), UINT8_C(249), UINT8_C(160), UINT8_C(171), UINT8_C(188), UINT8_C( 22), UINT8_C( 75), UINT8_C( 38), UINT8_C( 7), UINT8_C(182), UINT8_C(196), UINT8_C( 60) }, { UINT8_MAX, UINT8_C( 95), UINT8_C( 54), UINT8_C( 99), UINT8_C(184), UINT8_C(244), UINT8_C(151), UINT8_C(227), UINT8_C(230), UINT8_C(104), UINT8_C(229), UINT8_C(208), UINT8_C( 14), UINT8_C( 17), UINT8_C( 30), UINT8_C(128) }, { UINT8_C(113), UINT8_MAX, UINT8_C( 4), UINT8_C( 95), UINT8_C( 22), UINT8_C( 54), UINT8_C(250), UINT8_C( 99), UINT8_C(149), UINT8_C(184), UINT8_C(249), UINT8_C(244), UINT8_C(160), UINT8_C(151), UINT8_C(171), UINT8_C(227) } }, { { UINT8_C( 21), UINT8_C( 53), UINT8_C(122), UINT8_C(171), UINT8_C( 46), UINT8_C( 26), UINT8_C( 86), UINT8_C(235), UINT8_C( 48), UINT8_C(161), UINT8_C( 17), UINT8_C( 56), UINT8_C( 87), UINT8_C(213), UINT8_C(116), UINT8_C( 86) }, { UINT8_C( 53), UINT8_C(170), UINT8_C(186), UINT8_C(237), UINT8_C(158), UINT8_C( 81), UINT8_C(208), UINT8_C(132), UINT8_C(185), UINT8_C(181), UINT8_C( 84), UINT8_C(199), UINT8_C(199), UINT8_C(115), UINT8_C( 71), UINT8_C(220) }, { UINT8_C( 21), UINT8_C( 53), UINT8_C( 53), UINT8_C(170), UINT8_C(122), UINT8_C(186), UINT8_C(171), UINT8_C(237), UINT8_C( 46), UINT8_C(158), UINT8_C( 26), UINT8_C( 81), UINT8_C( 86), UINT8_C(208), UINT8_C(235), UINT8_C(132) } }, { { UINT8_C(168), UINT8_C(193), UINT8_C(135), UINT8_C(214), UINT8_C(219), UINT8_C(222), UINT8_C(193), UINT8_C( 12), UINT8_C(127), UINT8_C(210), UINT8_C( 68), UINT8_C(215), UINT8_C(168), UINT8_C(184), UINT8_C( 45), UINT8_C(221) }, { UINT8_C( 99), UINT8_C(231), UINT8_C(202), UINT8_C( 1), UINT8_C( 56), UINT8_C(154), UINT8_C(134), UINT8_C(241), UINT8_C( 80), UINT8_C(218), UINT8_C(185), UINT8_C( 23), UINT8_C( 77), UINT8_C( 0), UINT8_C(243), UINT8_C(245) }, { UINT8_C(168), UINT8_C( 99), UINT8_C(193), UINT8_C(231), UINT8_C(135), UINT8_C(202), UINT8_C(214), UINT8_C( 1), UINT8_C(219), UINT8_C( 56), UINT8_C(222), UINT8_C(154), UINT8_C(193), UINT8_C(134), UINT8_C( 12), UINT8_C(241) } }, { { UINT8_C(194), UINT8_C(123), UINT8_C(204), UINT8_C(157), UINT8_C( 89), UINT8_C(141), UINT8_C(169), UINT8_C(216), UINT8_C( 96), UINT8_C(237), UINT8_C(175), UINT8_C( 8), UINT8_C(166), UINT8_C(221), UINT8_C(229), UINT8_C( 9) }, { UINT8_C(196), UINT8_C(175), UINT8_C( 10), UINT8_C(253), UINT8_C( 73), UINT8_C(144), UINT8_C(238), UINT8_C(153), UINT8_C(107), UINT8_C(167), UINT8_C(176), UINT8_C(184), UINT8_C(168), UINT8_C(164), UINT8_C(174), UINT8_C(106) }, { UINT8_C(194), UINT8_C(196), UINT8_C(123), UINT8_C(175), UINT8_C(204), UINT8_C( 10), UINT8_C(157), UINT8_C(253), UINT8_C( 89), UINT8_C( 73), UINT8_C(141), UINT8_C(144), UINT8_C(169), UINT8_C(238), UINT8_C(216), UINT8_C(153) } }, { { UINT8_C( 31), UINT8_C(122), UINT8_C( 7), UINT8_C(120), UINT8_C( 7), UINT8_C(177), UINT8_C( 80), UINT8_C(103), UINT8_C(158), UINT8_C( 0), UINT8_C(111), UINT8_C( 68), UINT8_C(221), UINT8_C( 84), UINT8_C( 77), UINT8_C(161) }, { UINT8_C( 3), UINT8_C( 88), UINT8_C(158), UINT8_C( 77), UINT8_C(232), UINT8_C(141), UINT8_C(230), UINT8_C( 83), UINT8_C( 52), UINT8_C(151), UINT8_C( 12), UINT8_C(220), UINT8_C( 59), UINT8_C(186), UINT8_C( 70), UINT8_C( 90) }, { UINT8_C( 31), UINT8_C( 3), UINT8_C(122), UINT8_C( 88), UINT8_C( 7), UINT8_C(158), UINT8_C(120), UINT8_C( 77), UINT8_C( 7), UINT8_C(232), UINT8_C(177), UINT8_C(141), UINT8_C( 80), UINT8_C(230), UINT8_C(103), UINT8_C( 83) } }, { { UINT8_C( 52), UINT8_C( 78), UINT8_C(210), UINT8_C( 59), UINT8_MAX, UINT8_C( 34), UINT8_C(163), UINT8_C(157), UINT8_C( 34), UINT8_C( 18), UINT8_C(226), UINT8_MAX, UINT8_C(103), UINT8_C( 47), UINT8_C(161), UINT8_C(106) }, { UINT8_C(135), UINT8_C( 63), UINT8_C(183), UINT8_C(112), UINT8_C(204), UINT8_C(158), UINT8_C(195), UINT8_C( 1), UINT8_C( 53), UINT8_C(207), UINT8_C(221), UINT8_C(112), UINT8_C(137), UINT8_C( 36), UINT8_C(202), UINT8_C(189) }, { UINT8_C( 52), UINT8_C(135), UINT8_C( 78), UINT8_C( 63), UINT8_C(210), UINT8_C(183), UINT8_C( 59), UINT8_C(112), UINT8_MAX, UINT8_C(204), UINT8_C( 34), UINT8_C(158), UINT8_C(163), UINT8_C(195), UINT8_C(157), UINT8_C( 1) } }, { { UINT8_C(114), UINT8_C(156), UINT8_C(249), UINT8_C(113), UINT8_C(190), UINT8_C(156), UINT8_C( 14), UINT8_C(225), UINT8_C(174), UINT8_C(240), UINT8_C(224), UINT8_C( 21), UINT8_C( 32), UINT8_C(129), UINT8_C(128), UINT8_C(167) }, { UINT8_C(193), UINT8_C( 55), UINT8_C( 23), UINT8_C(141), UINT8_C(213), UINT8_C(219), UINT8_C(142), UINT8_C( 10), UINT8_C(170), UINT8_C(108), UINT8_C(122), UINT8_C( 52), UINT8_C(144), UINT8_C( 68), UINT8_C(241), UINT8_C( 2) }, { UINT8_C(114), UINT8_C(193), UINT8_C(156), UINT8_C( 55), UINT8_C(249), UINT8_C( 23), UINT8_C(113), UINT8_C(141), UINT8_C(190), UINT8_C(213), UINT8_C(156), UINT8_C(219), UINT8_C( 14), UINT8_C(142), UINT8_C(225), UINT8_C( 10) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vzip1q_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; } static int test_simde_vzip1q_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(30177), UINT16_C(25079), UINT16_C(19204), UINT16_C( 1663), UINT16_C(38129), UINT16_C(42655), UINT16_C( 5776), UINT16_C( 1946) }, { UINT16_C( 7669), UINT16_C(25887), UINT16_C(18190), UINT16_C(29117), UINT16_C(38689), UINT16_C(11637), UINT16_C(17900), UINT16_C(52555) }, { UINT16_C(30177), UINT16_C( 7669), UINT16_C(25079), UINT16_C(25887), UINT16_C(19204), UINT16_C(18190), UINT16_C( 1663), UINT16_C(29117) } }, { { UINT16_C(17082), UINT16_C(48686), UINT16_C(44686), UINT16_C(32709), UINT16_C(25666), UINT16_C(53797), UINT16_C(49275), UINT16_C(28889) }, { UINT16_C(63709), UINT16_C(60374), UINT16_C(37696), UINT16_C(24924), UINT16_C(53802), UINT16_C( 6030), UINT16_C(55575), UINT16_C(53732) }, { UINT16_C(17082), UINT16_C(63709), UINT16_C(48686), UINT16_C(60374), UINT16_C(44686), UINT16_C(37696), UINT16_C(32709), UINT16_C(24924) } }, { { UINT16_C( 4892), UINT16_C(43663), UINT16_C(21697), UINT16_C( 809), UINT16_C(20409), UINT16_C(13526), UINT16_C(44815), UINT16_C(60580) }, { UINT16_C(31400), UINT16_C(59607), UINT16_C(13325), UINT16_C(14409), UINT16_C(55302), UINT16_C( 7503), UINT16_C(13233), UINT16_C(52718) }, { UINT16_C( 4892), UINT16_C(31400), UINT16_C(43663), UINT16_C(59607), UINT16_C(21697), UINT16_C(13325), UINT16_C( 809), UINT16_C(14409) } }, { { UINT16_C(32070), UINT16_C( 1911), UINT16_C(41426), UINT16_C(35595), UINT16_C(57840), UINT16_C(65471), UINT16_C(25488), UINT16_C(14571) }, { UINT16_C(49886), UINT16_C(60192), UINT16_C(27382), UINT16_C(64547), UINT16_C(29250), UINT16_C(62233), UINT16_C( 1958), UINT16_C(60609) }, { UINT16_C(32070), UINT16_C(49886), UINT16_C( 1911), UINT16_C(60192), UINT16_C(41426), UINT16_C(27382), UINT16_C(35595), UINT16_C(64547) } }, { { UINT16_C(14469), UINT16_C(22516), UINT16_C(65497), UINT16_C(51682), UINT16_C(41440), UINT16_C(28872), UINT16_C(45828), UINT16_C(58025) }, { UINT16_C(51574), UINT16_C(27854), UINT16_C(61747), UINT16_C(30057), UINT16_C(33380), UINT16_C( 2665), UINT16_C(10890), UINT16_C( 4086) }, { UINT16_C(14469), UINT16_C(51574), UINT16_C(22516), UINT16_C(27854), UINT16_C(65497), UINT16_C(61747), UINT16_C(51682), UINT16_C(30057) } }, { { UINT16_C(60002), UINT16_C(15462), UINT16_C(18665), UINT16_C(51461), UINT16_C(52969), UINT16_C(60730), UINT16_C(58241), UINT16_C(63440) }, { UINT16_C(40620), UINT16_C(57444), UINT16_C(52623), UINT16_C(62293), UINT16_C(48719), UINT16_C(55805), UINT16_C(62696), UINT16_C(19432) }, { UINT16_C(60002), UINT16_C(40620), UINT16_C(15462), UINT16_C(57444), UINT16_C(18665), UINT16_C(52623), UINT16_C(51461), UINT16_C(62293) } }, { { UINT16_C(20190), UINT16_C(51335), UINT16_C(35990), UINT16_C(32657), UINT16_C(52058), UINT16_C(56429), UINT16_C(15790), UINT16_C(23507) }, { UINT16_C(14299), UINT16_C(27195), UINT16_C(36868), UINT16_C(21598), UINT16_C(23375), UINT16_C(14125), UINT16_C( 5711), UINT16_C(11906) }, { UINT16_C(20190), UINT16_C(14299), UINT16_C(51335), UINT16_C(27195), UINT16_C(35990), UINT16_C(36868), UINT16_C(32657), UINT16_C(21598) } }, { { UINT16_C( 2404), UINT16_C(64502), UINT16_C(34710), UINT16_C(61562), UINT16_C(59219), UINT16_C( 460), UINT16_C(40996), UINT16_C(65372) }, { UINT16_C(38871), UINT16_C(56426), UINT16_C(51240), UINT16_C(30512), UINT16_C(23843), UINT16_C(29614), UINT16_C(12659), UINT16_C(55457) }, { UINT16_C( 2404), UINT16_C(38871), UINT16_C(64502), UINT16_C(56426), UINT16_C(34710), UINT16_C(51240), UINT16_C(61562), UINT16_C(30512) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vzip1q_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_vzip1q_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C(2065304680), UINT32_C( 4706323), UINT32_C( 57610732), UINT32_C(1886136178) }, { UINT32_C(1747746222), UINT32_C(2734993287), UINT32_C(2980376650), UINT32_C(4253478037) }, { UINT32_C(2065304680), UINT32_C(1747746222), UINT32_C( 4706323), UINT32_C(2734993287) } }, { { UINT32_C( 192454904), UINT32_C(1561116784), UINT32_C(1130396625), UINT32_C(1370737826) }, { UINT32_C(3501842249), UINT32_C(3648306575), UINT32_C(1116412077), UINT32_C(4231991556) }, { UINT32_C( 192454904), UINT32_C(3501842249), UINT32_C(1561116784), UINT32_C(3648306575) } }, { { UINT32_C( 570996914), UINT32_C(1233065080), UINT32_C( 848158607), UINT32_C(4119019692) }, { UINT32_C(2949004320), UINT32_C(2793945849), UINT32_C(1458049874), UINT32_C(3612485413) }, { UINT32_C( 570996914), UINT32_C(2949004320), UINT32_C(1233065080), UINT32_C(2793945849) } }, { { UINT32_C(1475959519), UINT32_C(4271995246), UINT32_C( 70266456), UINT32_C(2398794606) }, { UINT32_C(3896361199), UINT32_C(1284425466), UINT32_C(4272060121), UINT32_C(2111173789) }, { UINT32_C(1475959519), UINT32_C(3896361199), UINT32_C(4271995246), UINT32_C(1284425466) } }, { { UINT32_C(3184840527), UINT32_C(2696639816), UINT32_C( 312863651), UINT32_C(2376114078) }, { UINT32_C(1500896863), UINT32_C(2107966372), UINT32_C( 394020729), UINT32_C(2324975931) }, { UINT32_C(3184840527), UINT32_C(1500896863), UINT32_C(2696639816), UINT32_C(2107966372) } }, { { UINT32_C(1749575712), UINT32_C(2164851678), UINT32_C(2375266031), UINT32_C(2887464013) }, { UINT32_C(3053817874), UINT32_C( 221489812), UINT32_C( 740601841), UINT32_C( 565688321) }, { UINT32_C(1749575712), UINT32_C(3053817874), UINT32_C(2164851678), UINT32_C( 221489812) } }, { { UINT32_C(4287299361), UINT32_C(4051735298), UINT32_C(2390692929), UINT32_C(1513790024) }, { UINT32_C(3188735786), UINT32_C(3670819817), UINT32_C(4094095603), UINT32_C(3390422441) }, { UINT32_C(4287299361), UINT32_C(3188735786), UINT32_C(4051735298), UINT32_C(3670819817) } }, { { UINT32_C(3217661884), UINT32_C(1940932914), UINT32_C(2768318301), UINT32_C(4110367689) }, { UINT32_C(1672613754), UINT32_C(1178435155), UINT32_C( 406471791), UINT32_C(3202502401) }, { UINT32_C(3217661884), UINT32_C(1672613754), UINT32_C(1940932914), UINT32_C(1178435155) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vzip1q_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_vzip1q_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; uint64_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C( 7183801971137112578), UINT64_C(16626255232757114459) }, { UINT64_C( 8131359811310929053), UINT64_C(16147889417916265065) }, { UINT64_C( 7183801971137112578), UINT64_C( 8131359811310929053) } }, { { UINT64_C(15057051385318308029), UINT64_C( 6700214673640614922) }, { UINT64_C(15803371279345651253), UINT64_C( 1282336582655705100) }, { UINT64_C(15057051385318308029), UINT64_C(15803371279345651253) } }, { { UINT64_C(17115254058352082495), UINT64_C(16221964400771381764) }, { UINT64_C( 529755678047281420), UINT64_C( 1077977485933676945) }, { UINT64_C(17115254058352082495), UINT64_C( 529755678047281420) } }, { { UINT64_C( 3750451961221322812), UINT64_C( 1841996491325378050) }, { UINT64_C(17526587403435266359), UINT64_C( 2871552645751319803) }, { UINT64_C( 3750451961221322812), UINT64_C(17526587403435266359) } }, { { UINT64_C(16702253309784136805), UINT64_C( 1190680762967769696) }, { UINT64_C( 921966960516931190), UINT64_C(15400815873310041001) }, { UINT64_C(16702253309784136805), UINT64_C( 921966960516931190) } }, { { UINT64_C(12905124458874470477), UINT64_C( 5395495782249967352) }, { UINT64_C( 5251459535740216260), UINT64_C(13102825539333044113) }, { UINT64_C(12905124458874470477), UINT64_C( 5251459535740216260) } }, { { UINT64_C( 8114641523400131223), UINT64_C( 2795083225679051949) }, { UINT64_C( 7794846792540745028), UINT64_C( 3868666430013376659) }, { UINT64_C( 8114641523400131223), UINT64_C( 7794846792540745028) } }, { { UINT64_C( 4786688231835932293), UINT64_C( 1369531685565345400) }, { UINT64_C( 7692467946781080698), UINT64_C(17869743954202013226) }, { UINT64_C( 4786688231835932293), UINT64_C( 7692467946781080698) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_uint64x2_t r = simde_vzip1q_u64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vzip1_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vzip1_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vzip1_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vzip1_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vzip1_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vzip1_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vzip1_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vzip1q_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vzip1q_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vzip1q_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vzip1q_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vzip1q_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vzip1q_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vzip1q_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vzip1q_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vzip1q_u64) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/arm/neon/zip2.c000066400000000000000000001505311400333146700163540ustar00rootroot00000000000000#define SIMDE_TEST_ARM_NEON_INSN zip2 #include #include static int test_simde_vzip2_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[2]; simde_float32 b[2]; simde_float32 r[2]; } test_vec[] = { { { SIMDE_FLOAT32_C( -56.55), SIMDE_FLOAT32_C( 618.01) }, { SIMDE_FLOAT32_C( 834.26), SIMDE_FLOAT32_C( -509.20) }, { SIMDE_FLOAT32_C( 618.01), SIMDE_FLOAT32_C( -509.20) } }, { { SIMDE_FLOAT32_C( 293.29), SIMDE_FLOAT32_C( -837.93) }, { SIMDE_FLOAT32_C( -408.68), SIMDE_FLOAT32_C( 430.47) }, { SIMDE_FLOAT32_C( -837.93), SIMDE_FLOAT32_C( 430.47) } }, { { SIMDE_FLOAT32_C( 222.04), SIMDE_FLOAT32_C( -192.86) }, { SIMDE_FLOAT32_C( 566.42), SIMDE_FLOAT32_C( 826.36) }, { SIMDE_FLOAT32_C( -192.86), SIMDE_FLOAT32_C( 826.36) } }, { { SIMDE_FLOAT32_C( -395.53), SIMDE_FLOAT32_C( 599.03) }, { SIMDE_FLOAT32_C( -555.41), SIMDE_FLOAT32_C( 22.87) }, { SIMDE_FLOAT32_C( 599.03), SIMDE_FLOAT32_C( 22.87) } }, { { SIMDE_FLOAT32_C( 968.44), SIMDE_FLOAT32_C( 95.81) }, { SIMDE_FLOAT32_C( -638.58), SIMDE_FLOAT32_C( 816.26) }, { SIMDE_FLOAT32_C( 95.81), SIMDE_FLOAT32_C( 816.26) } }, { { SIMDE_FLOAT32_C( -762.51), SIMDE_FLOAT32_C( 946.71) }, { SIMDE_FLOAT32_C( -370.92), SIMDE_FLOAT32_C( 887.37) }, { SIMDE_FLOAT32_C( 946.71), SIMDE_FLOAT32_C( 887.37) } }, { { SIMDE_FLOAT32_C( -457.71), SIMDE_FLOAT32_C( -445.84) }, { SIMDE_FLOAT32_C( -147.78), SIMDE_FLOAT32_C( -754.97) }, { SIMDE_FLOAT32_C( -445.84), SIMDE_FLOAT32_C( -754.97) } }, { { SIMDE_FLOAT32_C( 814.41), SIMDE_FLOAT32_C( 434.23) }, { SIMDE_FLOAT32_C( 418.66), SIMDE_FLOAT32_C( -242.14) }, { SIMDE_FLOAT32_C( 434.23), SIMDE_FLOAT32_C( -242.14) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); simde_float32x2_t r = simde_vzip2_f32(a, b); simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vzip2_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[8]; int8_t b[8]; int8_t r[8]; } test_vec[] = { { { INT8_C( 70), -INT8_C( 80), -INT8_C( 52), INT8_C( 94), INT8_C( 33), INT8_C( 70), -INT8_C( 2), INT8_C( 95) }, { -INT8_C( 15), INT8_C( 85), INT8_MAX, INT8_C( 75), -INT8_C( 126), INT8_C( 59), INT8_C( 49), -INT8_C( 48) }, { INT8_C( 33), -INT8_C( 126), INT8_C( 70), INT8_C( 59), -INT8_C( 2), INT8_C( 49), INT8_C( 95), -INT8_C( 48) } }, { { INT8_C( 95), INT8_C( 87), INT8_C( 16), INT8_C( 40), INT8_C( 102), -INT8_C( 8), INT8_C( 74), -INT8_C( 100) }, { -INT8_C( 117), -INT8_C( 49), INT8_C( 35), -INT8_C( 122), -INT8_C( 72), -INT8_C( 81), -INT8_C( 87), -INT8_C( 2) }, { INT8_C( 102), -INT8_C( 72), -INT8_C( 8), -INT8_C( 81), INT8_C( 74), -INT8_C( 87), -INT8_C( 100), -INT8_C( 2) } }, { { INT8_C( 95), INT8_C( 117), INT8_C( 93), -INT8_C( 127), -INT8_C( 68), INT8_C( 91), -INT8_C( 32), -INT8_C( 83) }, { -INT8_C( 80), INT8_C( 96), -INT8_C( 8), INT8_C( 50), -INT8_C( 101), INT8_C( 42), INT8_C( 3), -INT8_C( 6) }, { -INT8_C( 68), -INT8_C( 101), INT8_C( 91), INT8_C( 42), -INT8_C( 32), INT8_C( 3), -INT8_C( 83), -INT8_C( 6) } }, { { -INT8_C( 127), INT8_C( 19), INT8_C( 34), -INT8_C( 25), INT8_C( 11), INT8_C( 109), -INT8_C( 125), -INT8_C( 106) }, { INT8_C( 60), -INT8_C( 89), INT8_C( 28), -INT8_C( 12), INT8_C( 86), -INT8_C( 59), -INT8_C( 13), -INT8_C( 75) }, { INT8_C( 11), INT8_C( 86), INT8_C( 109), -INT8_C( 59), -INT8_C( 125), -INT8_C( 13), -INT8_C( 106), -INT8_C( 75) } }, { { INT8_C( 59), INT8_C( 80), INT8_C( 54), -INT8_C( 9), -INT8_C( 85), INT8_C( 23), -INT8_C( 92), INT8_C( 91) }, { INT8_C( 119), -INT8_C( 100), -INT8_C( 114), INT8_C( 18), -INT8_C( 58), -INT8_C( 111), INT8_C( 12), INT8_C( 72) }, { -INT8_C( 85), -INT8_C( 58), INT8_C( 23), -INT8_C( 111), -INT8_C( 92), INT8_C( 12), INT8_C( 91), INT8_C( 72) } }, { { -INT8_C( 92), INT8_C( 47), INT8_C( 47), -INT8_C( 81), -INT8_C( 100), -INT8_C( 77), INT8_C( 69), -INT8_C( 40) }, { INT8_C( 90), INT8_C( 97), -INT8_C( 51), -INT8_C( 80), INT8_C( 38), -INT8_C( 64), INT8_C( 101), INT8_C( 97) }, { -INT8_C( 100), INT8_C( 38), -INT8_C( 77), -INT8_C( 64), INT8_C( 69), INT8_C( 101), -INT8_C( 40), INT8_C( 97) } }, { { INT8_C( 16), -INT8_C( 100), INT8_C( 88), -INT8_C( 69), -INT8_C( 77), -INT8_C( 4), INT8_C( 22), INT8_C( 42) }, { -INT8_C( 103), -INT8_C( 92), INT8_C( 60), INT8_C( 95), INT8_C( 53), INT8_C( 72), -INT8_C( 89), -INT8_C( 39) }, { -INT8_C( 77), INT8_C( 53), -INT8_C( 4), INT8_C( 72), INT8_C( 22), -INT8_C( 89), INT8_C( 42), -INT8_C( 39) } }, { { INT8_C( 119), -INT8_C( 41), -INT8_C( 120), INT8_C( 19), -INT8_C( 118), -INT8_C( 51), -INT8_C( 20), -INT8_C( 28) }, { INT8_C( 46), -INT8_C( 71), -INT8_C( 108), INT8_C( 85), INT8_C( 121), -INT8_C( 7), -INT8_C( 74), -INT8_C( 119) }, { -INT8_C( 118), INT8_C( 121), -INT8_C( 51), -INT8_C( 7), -INT8_C( 20), -INT8_C( 74), -INT8_C( 28), -INT8_C( 119) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); simde_int8x8_t r = simde_vzip2_s8(a, b); simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); } return 0; } static int test_simde_vzip2_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[4]; int16_t b[4]; int16_t r[4]; } test_vec[] = { { { INT16_C( 31586), -INT16_C( 31027), INT16_C( 5600), -INT16_C( 5698) }, { INT16_C( 32317), INT16_C( 13734), INT16_C( 18228), -INT16_C( 31524) }, { INT16_C( 5600), INT16_C( 18228), -INT16_C( 5698), -INT16_C( 31524) } }, { { INT16_C( 11907), INT16_C( 17733), INT16_C( 12727), -INT16_C( 17795) }, { INT16_C( 19182), -INT16_C( 25509), INT16_C( 32649), -INT16_C( 5297) }, { INT16_C( 12727), INT16_C( 32649), -INT16_C( 17795), -INT16_C( 5297) } }, { { INT16_C( 7419), -INT16_C( 9359), INT16_C( 12082), INT16_C( 28612) }, { INT16_C( 27310), -INT16_C( 7516), -INT16_C( 32590), INT16_C( 13671) }, { INT16_C( 12082), -INT16_C( 32590), INT16_C( 28612), INT16_C( 13671) } }, { { -INT16_C( 21329), INT16_C( 26234), -INT16_C( 2083), -INT16_C( 13280) }, { INT16_C( 31553), -INT16_C( 13464), -INT16_C( 18438), -INT16_C( 2634) }, { -INT16_C( 2083), -INT16_C( 18438), -INT16_C( 13280), -INT16_C( 2634) } }, { { INT16_C( 10452), INT16_C( 1744), -INT16_C( 27561), INT16_C( 1397) }, { INT16_C( 6655), -INT16_C( 19992), INT16_C( 20378), INT16_C( 18918) }, { -INT16_C( 27561), INT16_C( 20378), INT16_C( 1397), INT16_C( 18918) } }, { { INT16_C( 24827), -INT16_C( 9809), -INT16_C( 12457), -INT16_C( 26459) }, { INT16_C( 3402), INT16_C( 17507), INT16_C( 6853), -INT16_C( 26310) }, { -INT16_C( 12457), INT16_C( 6853), -INT16_C( 26459), -INT16_C( 26310) } }, { { INT16_C( 2626), -INT16_C( 26209), INT16_C( 5279), -INT16_C( 24929) }, { -INT16_C( 30931), -INT16_C( 14513), INT16_C( 13782), -INT16_C( 12016) }, { INT16_C( 5279), INT16_C( 13782), -INT16_C( 24929), -INT16_C( 12016) } }, { { -INT16_C( 16491), -INT16_C( 4950), INT16_C( 20366), -INT16_C( 10108) }, { -INT16_C( 6051), INT16_C( 8733), INT16_C( 22274), INT16_C( 17595) }, { INT16_C( 20366), INT16_C( 22274), -INT16_C( 10108), INT16_C( 17595) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); simde_int16x4_t r = simde_vzip2_s16(a, b); simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } return 0; } static int test_simde_vzip2_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int32_t b[2]; int32_t r[2]; } test_vec[] = { { { -INT32_C( 1128440363), INT32_C( 500102932) }, { -INT32_C( 826084822), INT32_C( 2023511124) }, { INT32_C( 500102932), INT32_C( 2023511124) } }, { { INT32_C( 1458785856), -INT32_C( 835584527) }, { -INT32_C( 586230260), INT32_C( 2044523428) }, { -INT32_C( 835584527), INT32_C( 2044523428) } }, { { INT32_C( 1681234256), -INT32_C( 1149172847) }, { INT32_C( 1250510070), -INT32_C( 658365032) }, { -INT32_C( 1149172847), -INT32_C( 658365032) } }, { { INT32_C( 1697559923), -INT32_C( 1087152205) }, { -INT32_C( 644070859), -INT32_C( 2058127307) }, { -INT32_C( 1087152205), -INT32_C( 2058127307) } }, { { -INT32_C( 1561753583), -INT32_C( 2107807092) }, { INT32_C( 1187833774), -INT32_C( 2145415668) }, { -INT32_C( 2107807092), -INT32_C( 2145415668) } }, { { -INT32_C( 152744637), -INT32_C( 491448147) }, { -INT32_C( 1883483814), -INT32_C( 619442231) }, { -INT32_C( 491448147), -INT32_C( 619442231) } }, { { INT32_C( 595459479), INT32_C( 379968360) }, { -INT32_C( 832736830), INT32_C( 1129217279) }, { INT32_C( 379968360), INT32_C( 1129217279) } }, { { INT32_C( 1983460297), -INT32_C( 1504055476) }, { INT32_C( 171316544), -INT32_C( 1142601180) }, { -INT32_C( 1504055476), -INT32_C( 1142601180) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); simde_int32x2_t r = simde_vzip2_s32(a, b); simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } return 0; } static int test_simde_vzip2_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[8]; uint8_t b[8]; uint8_t r[8]; } test_vec[] = { { { UINT8_C( 92), UINT8_C( 83), UINT8_C( 54), UINT8_C(128), UINT8_C(189), UINT8_C(177), UINT8_C(148), UINT8_C(204) }, { UINT8_C( 3), UINT8_C( 8), UINT8_C( 33), UINT8_C( 35), UINT8_C( 77), UINT8_C(187), UINT8_C(189), UINT8_C(155) }, { UINT8_C(189), UINT8_C( 77), UINT8_C(177), UINT8_C(187), UINT8_C(148), UINT8_C(189), UINT8_C(204), UINT8_C(155) } }, { { UINT8_C(183), UINT8_C( 14), UINT8_C( 96), UINT8_C(141), UINT8_C( 14), UINT8_C(153), UINT8_C(229), UINT8_C( 0) }, { UINT8_C(117), UINT8_C(187), UINT8_C(126), UINT8_C(177), UINT8_C(175), UINT8_C(115), UINT8_C(204), UINT8_C( 11) }, { UINT8_C( 14), UINT8_C(175), UINT8_C(153), UINT8_C(115), UINT8_C(229), UINT8_C(204), UINT8_C( 0), UINT8_C( 11) } }, { { UINT8_C(198), UINT8_C( 2), UINT8_C(139), UINT8_C(131), UINT8_C(179), UINT8_C( 31), UINT8_C( 80), UINT8_C(182) }, { UINT8_C( 40), UINT8_C(113), UINT8_C(218), UINT8_C(117), UINT8_C( 44), UINT8_C(151), UINT8_C( 16), UINT8_C(228) }, { UINT8_C(179), UINT8_C( 44), UINT8_C( 31), UINT8_C(151), UINT8_C( 80), UINT8_C( 16), UINT8_C(182), UINT8_C(228) } }, { { UINT8_C(166), UINT8_C(112), UINT8_C(113), UINT8_C(180), UINT8_C( 9), UINT8_C( 86), UINT8_C(181), UINT8_C(126) }, { UINT8_C( 17), UINT8_C( 51), UINT8_C( 47), UINT8_C(192), UINT8_C(166), UINT8_C(251), UINT8_C(203), UINT8_C(108) }, { UINT8_C( 9), UINT8_C(166), UINT8_C( 86), UINT8_C(251), UINT8_C(181), UINT8_C(203), UINT8_C(126), UINT8_C(108) } }, { { UINT8_C(254), UINT8_C( 86), UINT8_C(239), UINT8_C(177), UINT8_C(118), UINT8_C( 63), UINT8_C(104), UINT8_C(158) }, { UINT8_C(177), UINT8_C( 66), UINT8_C( 19), UINT8_C(221), UINT8_C(217), UINT8_C( 35), UINT8_C(193), UINT8_C(127) }, { UINT8_C(118), UINT8_C(217), UINT8_C( 63), UINT8_C( 35), UINT8_C(104), UINT8_C(193), UINT8_C(158), UINT8_C(127) } }, { { UINT8_C(147), UINT8_C( 50), UINT8_C( 52), UINT8_C(156), UINT8_C(136), UINT8_C(233), UINT8_C( 27), UINT8_C(154) }, { UINT8_C( 28), UINT8_C( 74), UINT8_C( 90), UINT8_C(194), UINT8_C( 70), UINT8_C( 38), UINT8_C( 46), UINT8_C( 68) }, { UINT8_C(136), UINT8_C( 70), UINT8_C(233), UINT8_C( 38), UINT8_C( 27), UINT8_C( 46), UINT8_C(154), UINT8_C( 68) } }, { { UINT8_C(124), UINT8_C( 29), UINT8_C(245), UINT8_C(242), UINT8_C( 93), UINT8_C( 93), UINT8_C(144), UINT8_C( 14) }, { UINT8_C(159), UINT8_C(163), UINT8_C(235), UINT8_C(121), UINT8_C(198), UINT8_C(173), UINT8_C(248), UINT8_C( 89) }, { UINT8_C( 93), UINT8_C(198), UINT8_C( 93), UINT8_C(173), UINT8_C(144), UINT8_C(248), UINT8_C( 14), UINT8_C( 89) } }, { { UINT8_C(223), UINT8_C( 44), UINT8_C(246), UINT8_C(104), UINT8_C( 21), UINT8_C( 17), UINT8_C( 2), UINT8_C( 49) }, { UINT8_C( 91), UINT8_C( 92), UINT8_C(243), UINT8_C(161), UINT8_C(130), UINT8_C( 33), UINT8_C(229), UINT8_MAX }, { UINT8_C( 21), UINT8_C(130), UINT8_C( 17), UINT8_C( 33), UINT8_C( 2), UINT8_C(229), UINT8_C( 49), UINT8_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); simde_uint8x8_t r = simde_vzip2_u8(a, b); simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); } return 0; } static int test_simde_vzip2_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[4]; uint16_t b[4]; uint16_t r[4]; } test_vec[] = { { { UINT16_C(17902), UINT16_C(53967), UINT16_C(20983), UINT16_C(42835) }, { UINT16_C(47613), UINT16_C(55873), UINT16_C(16028), UINT16_C(21060) }, { UINT16_C(20983), UINT16_C(16028), UINT16_C(42835), UINT16_C(21060) } }, { { UINT16_C(47255), UINT16_C(46865), UINT16_C(60625), UINT16_C(64413) }, { UINT16_C(53844), UINT16_C(24480), UINT16_C(58718), UINT16_C(19663) }, { UINT16_C(60625), UINT16_C(58718), UINT16_C(64413), UINT16_C(19663) } }, { { UINT16_C(40490), UINT16_C( 8478), UINT16_C(29168), UINT16_C(60872) }, { UINT16_C( 2346), UINT16_C(51143), UINT16_C( 3144), UINT16_C(57113) }, { UINT16_C(29168), UINT16_C( 3144), UINT16_C(60872), UINT16_C(57113) } }, { { UINT16_C(10948), UINT16_C(38295), UINT16_C(13334), UINT16_C(27536) }, { UINT16_C(12550), UINT16_C(25802), UINT16_C(39190), UINT16_C(16560) }, { UINT16_C(13334), UINT16_C(39190), UINT16_C(27536), UINT16_C(16560) } }, { { UINT16_C(52792), UINT16_C(10337), UINT16_C(10559), UINT16_C(27157) }, { UINT16_C(56626), UINT16_C(31281), UINT16_C(19177), UINT16_C(44378) }, { UINT16_C(10559), UINT16_C(19177), UINT16_C(27157), UINT16_C(44378) } }, { { UINT16_C(61813), UINT16_C(35650), UINT16_C(54053), UINT16_C(11254) }, { UINT16_C(49156), UINT16_C( 6800), UINT16_C(16474), UINT16_C(37466) }, { UINT16_C(54053), UINT16_C(16474), UINT16_C(11254), UINT16_C(37466) } }, { { UINT16_C(47887), UINT16_C(20154), UINT16_C(53220), UINT16_C( 5816) }, { UINT16_C(59820), UINT16_C(38289), UINT16_C(60212), UINT16_C(43330) }, { UINT16_C(53220), UINT16_C(60212), UINT16_C( 5816), UINT16_C(43330) } }, { { UINT16_C(34268), UINT16_C( 308), UINT16_C(11096), UINT16_C(23596) }, { UINT16_C(48363), UINT16_C(17782), UINT16_C(53501), UINT16_C( 3287) }, { UINT16_C(11096), UINT16_C(53501), UINT16_C(23596), UINT16_C( 3287) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); simde_uint16x4_t r = simde_vzip2_u16(a, b); simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } return 0; } static int test_simde_vzip2_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[2]; uint32_t b[2]; uint32_t r[2]; } test_vec[] = { { { UINT32_C( 711269039), UINT32_C(1500404369) }, { UINT32_C( 481603462), UINT32_C(3826687848) }, { UINT32_C(1500404369), UINT32_C(3826687848) } }, { { UINT32_C(2274796837), UINT32_C(1676736426) }, { UINT32_C(1950206382), UINT32_C(1098409105) }, { UINT32_C(1676736426), UINT32_C(1098409105) } }, { { UINT32_C( 275504510), UINT32_C(3194608184) }, { UINT32_C(4057603465), UINT32_C(3805671612) }, { UINT32_C(3194608184), UINT32_C(3805671612) } }, { { UINT32_C(1080650902), UINT32_C( 295917923) }, { UINT32_C(3028672803), UINT32_C(3287678277) }, { UINT32_C( 295917923), UINT32_C(3287678277) } }, { { UINT32_C( 332620251), UINT32_C(3302046779) }, { UINT32_C( 381070169), UINT32_C( 855149468) }, { UINT32_C(3302046779), UINT32_C( 855149468) } }, { { UINT32_C(1517445623), UINT32_C(3714848186) }, { UINT32_C( 999485942), UINT32_C(3405744111) }, { UINT32_C(3714848186), UINT32_C(3405744111) } }, { { UINT32_C( 601740008), UINT32_C(1760079375) }, { UINT32_C(4118715993), UINT32_C( 556234281) }, { UINT32_C(1760079375), UINT32_C( 556234281) } }, { { UINT32_C(2457573847), UINT32_C(2775574447) }, { UINT32_C(3370189273), UINT32_C(1905451145) }, { UINT32_C(2775574447), UINT32_C(1905451145) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); simde_uint32x2_t r = simde_vzip2_u32(a, b); simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } return 0; } static int test_simde_vzip2q_f32 (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; simde_float32 b[4]; simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 991.39), SIMDE_FLOAT32_C( -819.33), SIMDE_FLOAT32_C( -284.09), SIMDE_FLOAT32_C( -188.89) }, { SIMDE_FLOAT32_C( -984.89), SIMDE_FLOAT32_C( -117.35), SIMDE_FLOAT32_C( 890.93), SIMDE_FLOAT32_C( 277.64) }, { SIMDE_FLOAT32_C( -284.09), SIMDE_FLOAT32_C( 890.93), SIMDE_FLOAT32_C( -188.89), SIMDE_FLOAT32_C( 277.64) } }, { { SIMDE_FLOAT32_C( 299.93), SIMDE_FLOAT32_C( 132.15), SIMDE_FLOAT32_C( -176.04), SIMDE_FLOAT32_C( 602.01) }, { SIMDE_FLOAT32_C( 395.84), SIMDE_FLOAT32_C( 482.13), SIMDE_FLOAT32_C( 505.78), SIMDE_FLOAT32_C( -624.38) }, { SIMDE_FLOAT32_C( -176.04), SIMDE_FLOAT32_C( 505.78), SIMDE_FLOAT32_C( 602.01), SIMDE_FLOAT32_C( -624.38) } }, { { SIMDE_FLOAT32_C( -485.02), SIMDE_FLOAT32_C( 246.12), SIMDE_FLOAT32_C( -226.41), SIMDE_FLOAT32_C( -660.41) }, { SIMDE_FLOAT32_C( 304.88), SIMDE_FLOAT32_C( 546.13), SIMDE_FLOAT32_C( 742.69), SIMDE_FLOAT32_C( -648.05) }, { SIMDE_FLOAT32_C( -226.41), SIMDE_FLOAT32_C( 742.69), SIMDE_FLOAT32_C( -660.41), SIMDE_FLOAT32_C( -648.05) } }, { { SIMDE_FLOAT32_C( -973.16), SIMDE_FLOAT32_C( 350.44), SIMDE_FLOAT32_C( 651.48), SIMDE_FLOAT32_C( -776.37) }, { SIMDE_FLOAT32_C( -169.18), SIMDE_FLOAT32_C( 445.04), SIMDE_FLOAT32_C( -966.52), SIMDE_FLOAT32_C( -177.79) }, { SIMDE_FLOAT32_C( 651.48), SIMDE_FLOAT32_C( -966.52), SIMDE_FLOAT32_C( -776.37), SIMDE_FLOAT32_C( -177.79) } }, { { SIMDE_FLOAT32_C( 625.71), SIMDE_FLOAT32_C( -250.61), SIMDE_FLOAT32_C( 633.32), SIMDE_FLOAT32_C( 640.82) }, { SIMDE_FLOAT32_C( 632.04), SIMDE_FLOAT32_C( 524.25), SIMDE_FLOAT32_C( -81.55), SIMDE_FLOAT32_C( -68.03) }, { SIMDE_FLOAT32_C( 633.32), SIMDE_FLOAT32_C( -81.55), SIMDE_FLOAT32_C( 640.82), SIMDE_FLOAT32_C( -68.03) } }, { { SIMDE_FLOAT32_C( -343.59), SIMDE_FLOAT32_C( 742.42), SIMDE_FLOAT32_C( -466.02), SIMDE_FLOAT32_C( -947.75) }, { SIMDE_FLOAT32_C( 224.55), SIMDE_FLOAT32_C( -960.24), SIMDE_FLOAT32_C( -572.13), SIMDE_FLOAT32_C( 739.52) }, { SIMDE_FLOAT32_C( -466.02), SIMDE_FLOAT32_C( -572.13), SIMDE_FLOAT32_C( -947.75), SIMDE_FLOAT32_C( 739.52) } }, { { SIMDE_FLOAT32_C( 285.88), SIMDE_FLOAT32_C( 201.46), SIMDE_FLOAT32_C( -920.88), SIMDE_FLOAT32_C( -409.24) }, { SIMDE_FLOAT32_C( -252.40), SIMDE_FLOAT32_C( 821.81), SIMDE_FLOAT32_C( -57.30), SIMDE_FLOAT32_C( -225.56) }, { SIMDE_FLOAT32_C( -920.88), SIMDE_FLOAT32_C( -57.30), SIMDE_FLOAT32_C( -409.24), SIMDE_FLOAT32_C( -225.56) } }, { { SIMDE_FLOAT32_C( 172.25), SIMDE_FLOAT32_C( -405.81), SIMDE_FLOAT32_C( -1.92), SIMDE_FLOAT32_C( -996.93) }, { SIMDE_FLOAT32_C( -960.77), SIMDE_FLOAT32_C( 31.56), SIMDE_FLOAT32_C( -174.72), SIMDE_FLOAT32_C( 664.94) }, { SIMDE_FLOAT32_C( -1.92), SIMDE_FLOAT32_C( -174.72), SIMDE_FLOAT32_C( -996.93), SIMDE_FLOAT32_C( 664.94) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); simde_float32x4_t r = simde_vzip2q_f32(a, b); simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; } static int test_simde_vzip2q_s8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int8_t b[16]; int8_t r[16]; } test_vec[] = { { { INT8_C( 85), INT8_C( 12), INT8_C( 36), INT8_C( 28), INT8_C( 122), INT8_C( 111), INT8_C( 64), -INT8_C( 98), -INT8_C( 65), INT8_C( 75), INT8_C( 48), -INT8_C( 66), INT8_C( 91), -INT8_C( 17), -INT8_C( 68), -INT8_C( 84) }, { INT8_C( 66), -INT8_C( 115), -INT8_C( 31), -INT8_C( 33), INT8_C( 20), INT8_MIN, INT8_C( 7), -INT8_C( 96), -INT8_C( 89), -INT8_C( 66), -INT8_C( 35), INT8_C( 3), INT8_MAX, INT8_C( 12), INT8_C( 18), -INT8_C( 44) }, { -INT8_C( 65), -INT8_C( 89), INT8_C( 75), -INT8_C( 66), INT8_C( 48), -INT8_C( 35), -INT8_C( 66), INT8_C( 3), INT8_C( 91), INT8_MAX, -INT8_C( 17), INT8_C( 12), -INT8_C( 68), INT8_C( 18), -INT8_C( 84), -INT8_C( 44) } }, { { INT8_C( 25), INT8_C( 54), -INT8_C( 15), -INT8_C( 109), -INT8_C( 90), INT8_C( 49), INT8_C( 49), INT8_C( 101), INT8_C( 124), INT8_C( 97), INT8_C( 35), -INT8_C( 41), INT8_C( 80), -INT8_C( 32), -INT8_C( 125), -INT8_C( 109) }, { INT8_C( 109), INT8_C( 100), INT8_C( 114), -INT8_C( 126), -INT8_C( 28), INT8_C( 121), INT8_C( 34), -INT8_C( 116), INT8_C( 56), -INT8_C( 1), -INT8_C( 113), -INT8_C( 73), INT8_C( 12), -INT8_C( 94), -INT8_C( 117), INT8_C( 37) }, { INT8_C( 124), INT8_C( 56), INT8_C( 97), -INT8_C( 1), INT8_C( 35), -INT8_C( 113), -INT8_C( 41), -INT8_C( 73), INT8_C( 80), INT8_C( 12), -INT8_C( 32), -INT8_C( 94), -INT8_C( 125), -INT8_C( 117), -INT8_C( 109), INT8_C( 37) } }, { { -INT8_C( 40), INT8_C( 124), -INT8_C( 72), INT8_C( 126), -INT8_C( 83), -INT8_C( 23), -INT8_C( 28), INT8_C( 41), INT8_C( 74), INT8_C( 7), INT8_C( 0), -INT8_C( 102), -INT8_C( 25), -INT8_C( 125), INT8_C( 45), INT8_C( 85) }, { -INT8_C( 25), -INT8_C( 97), -INT8_C( 41), -INT8_C( 52), INT8_C( 25), -INT8_C( 7), INT8_C( 88), INT8_C( 81), -INT8_C( 7), -INT8_C( 25), INT8_C( 8), INT8_C( 5), -INT8_C( 119), -INT8_C( 109), INT8_C( 42), INT8_C( 98) }, { INT8_C( 74), -INT8_C( 7), INT8_C( 7), -INT8_C( 25), INT8_C( 0), INT8_C( 8), -INT8_C( 102), INT8_C( 5), -INT8_C( 25), -INT8_C( 119), -INT8_C( 125), -INT8_C( 109), INT8_C( 45), INT8_C( 42), INT8_C( 85), INT8_C( 98) } }, { { INT8_C( 16), -INT8_C( 30), -INT8_C( 32), -INT8_C( 67), -INT8_C( 53), -INT8_C( 60), -INT8_C( 25), INT8_C( 21), -INT8_C( 52), -INT8_C( 25), -INT8_C( 81), -INT8_C( 77), INT8_C( 107), -INT8_C( 35), INT8_C( 8), INT8_C( 82) }, { INT8_C( 124), -INT8_C( 33), INT8_C( 30), -INT8_C( 107), -INT8_C( 39), INT8_C( 118), -INT8_C( 26), -INT8_C( 46), INT8_C( 94), -INT8_C( 18), -INT8_C( 41), -INT8_C( 25), -INT8_C( 126), INT8_C( 1), INT8_C( 73), -INT8_C( 110) }, { -INT8_C( 52), INT8_C( 94), -INT8_C( 25), -INT8_C( 18), -INT8_C( 81), -INT8_C( 41), -INT8_C( 77), -INT8_C( 25), INT8_C( 107), -INT8_C( 126), -INT8_C( 35), INT8_C( 1), INT8_C( 8), INT8_C( 73), INT8_C( 82), -INT8_C( 110) } }, { { -INT8_C( 29), INT8_C( 42), INT8_C( 79), -INT8_C( 82), -INT8_C( 18), INT8_C( 54), -INT8_C( 61), -INT8_C( 70), INT8_C( 30), INT8_C( 114), INT8_C( 110), -INT8_C( 119), INT8_C( 79), INT8_C( 118), -INT8_C( 37), -INT8_C( 52) }, { INT8_C( 86), -INT8_C( 6), INT8_C( 97), INT8_C( 47), INT8_C( 112), INT8_C( 72), INT8_C( 1), -INT8_C( 50), INT8_C( 54), -INT8_C( 40), -INT8_C( 74), -INT8_C( 72), -INT8_C( 39), -INT8_C( 1), INT8_C( 74), -INT8_C( 68) }, { INT8_C( 30), INT8_C( 54), INT8_C( 114), -INT8_C( 40), INT8_C( 110), -INT8_C( 74), -INT8_C( 119), -INT8_C( 72), INT8_C( 79), -INT8_C( 39), INT8_C( 118), -INT8_C( 1), -INT8_C( 37), INT8_C( 74), -INT8_C( 52), -INT8_C( 68) } }, { { INT8_C( 41), -INT8_C( 102), INT8_C( 106), INT8_C( 24), -INT8_C( 48), INT8_C( 45), -INT8_C( 46), -INT8_C( 18), -INT8_C( 97), INT8_C( 64), INT8_C( 119), -INT8_C( 17), -INT8_C( 73), INT8_C( 83), -INT8_C( 69), INT8_C( 13) }, { INT8_C( 77), INT8_C( 28), INT8_C( 60), -INT8_C( 67), INT8_C( 100), INT8_C( 61), -INT8_C( 116), -INT8_C( 101), INT8_C( 21), INT8_C( 66), INT8_C( 83), -INT8_C( 18), INT8_C( 65), -INT8_C( 98), -INT8_C( 86), INT8_C( 107) }, { -INT8_C( 97), INT8_C( 21), INT8_C( 64), INT8_C( 66), INT8_C( 119), INT8_C( 83), -INT8_C( 17), -INT8_C( 18), -INT8_C( 73), INT8_C( 65), INT8_C( 83), -INT8_C( 98), -INT8_C( 69), -INT8_C( 86), INT8_C( 13), INT8_C( 107) } }, { { INT8_C( 56), INT8_C( 20), -INT8_C( 125), INT8_C( 8), INT8_C( 65), INT8_C( 85), -INT8_C( 9), -INT8_C( 32), -INT8_C( 106), INT8_C( 110), -INT8_C( 49), INT8_C( 77), -INT8_C( 63), -INT8_C( 118), INT8_C( 90), INT8_C( 14) }, { -INT8_C( 89), -INT8_C( 106), -INT8_C( 52), INT8_C( 11), -INT8_C( 45), INT8_C( 88), -INT8_C( 90), -INT8_C( 24), -INT8_C( 102), -INT8_C( 6), -INT8_C( 42), -INT8_C( 37), -INT8_C( 104), INT8_MIN, INT8_C( 70), -INT8_C( 48) }, { -INT8_C( 106), -INT8_C( 102), INT8_C( 110), -INT8_C( 6), -INT8_C( 49), -INT8_C( 42), INT8_C( 77), -INT8_C( 37), -INT8_C( 63), -INT8_C( 104), -INT8_C( 118), INT8_MIN, INT8_C( 90), INT8_C( 70), INT8_C( 14), -INT8_C( 48) } }, { { -INT8_C( 108), -INT8_C( 55), -INT8_C( 40), -INT8_C( 43), INT8_C( 31), -INT8_C( 49), -INT8_C( 75), -INT8_C( 75), INT8_C( 62), -INT8_C( 123), INT8_C( 2), -INT8_C( 1), INT8_C( 15), INT8_C( 92), INT8_C( 14), -INT8_C( 74) }, { -INT8_C( 14), -INT8_C( 38), -INT8_C( 62), -INT8_C( 59), INT8_C( 50), INT8_C( 104), -INT8_C( 83), -INT8_C( 52), INT8_C( 98), -INT8_C( 125), -INT8_C( 89), -INT8_C( 6), INT8_C( 3), -INT8_C( 18), -INT8_C( 54), -INT8_C( 105) }, { INT8_C( 62), INT8_C( 98), -INT8_C( 123), -INT8_C( 125), INT8_C( 2), -INT8_C( 89), -INT8_C( 1), -INT8_C( 6), INT8_C( 15), INT8_C( 3), INT8_C( 92), -INT8_C( 18), INT8_C( 14), -INT8_C( 54), -INT8_C( 74), -INT8_C( 105) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); simde_int8x16_t r = simde_vzip2q_s8(a, b); simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); } return 0; } static int test_simde_vzip2q_s16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { INT16_C( 29172), INT16_C( 1064), -INT16_C( 10040), -INT16_C( 22031), INT16_C( 23069), INT16_C( 25595), INT16_C( 31505), -INT16_C( 6235) }, { -INT16_C( 20461), -INT16_C( 29355), INT16_C( 11474), -INT16_C( 31606), INT16_C( 1073), -INT16_C( 19233), -INT16_C( 22064), -INT16_C( 15219) }, { INT16_C( 23069), INT16_C( 1073), INT16_C( 25595), -INT16_C( 19233), INT16_C( 31505), -INT16_C( 22064), -INT16_C( 6235), -INT16_C( 15219) } }, { { -INT16_C( 19174), -INT16_C( 7224), -INT16_C( 18034), -INT16_C( 21620), -INT16_C( 30957), INT16_C( 9231), -INT16_C( 19454), INT16_C( 5644) }, { INT16_C( 24932), INT16_C( 13987), INT16_C( 11661), -INT16_C( 16710), -INT16_C( 26319), INT16_C( 370), INT16_C( 67), INT16_C( 24006) }, { -INT16_C( 30957), -INT16_C( 26319), INT16_C( 9231), INT16_C( 370), -INT16_C( 19454), INT16_C( 67), INT16_C( 5644), INT16_C( 24006) } }, { { -INT16_C( 29003), INT16_C( 17216), -INT16_C( 13240), INT16_C( 23535), -INT16_C( 428), INT16_C( 22144), -INT16_C( 29518), INT16_C( 5740) }, { INT16_C( 4077), INT16_C( 31308), INT16_C( 1852), INT16_C( 27960), -INT16_C( 21856), -INT16_C( 7313), INT16_C( 13738), INT16_C( 24641) }, { -INT16_C( 428), -INT16_C( 21856), INT16_C( 22144), -INT16_C( 7313), -INT16_C( 29518), INT16_C( 13738), INT16_C( 5740), INT16_C( 24641) } }, { { -INT16_C( 32317), INT16_C( 2979), -INT16_C( 28082), -INT16_C( 23961), -INT16_C( 6256), INT16_C( 17144), INT16_C( 25971), INT16_C( 24664) }, { -INT16_C( 23180), -INT16_C( 20006), INT16_C( 4780), INT16_C( 19486), -INT16_C( 29252), INT16_C( 26416), INT16_C( 29122), -INT16_C( 31033) }, { -INT16_C( 6256), -INT16_C( 29252), INT16_C( 17144), INT16_C( 26416), INT16_C( 25971), INT16_C( 29122), INT16_C( 24664), -INT16_C( 31033) } }, { { INT16_C( 27378), INT16_C( 16529), -INT16_C( 1795), -INT16_C( 29214), -INT16_C( 9249), INT16_C( 21200), INT16_C( 10304), -INT16_C( 19278) }, { -INT16_C( 29491), INT16_C( 31077), -INT16_C( 31586), INT16_C( 23494), -INT16_C( 2543), -INT16_C( 11070), -INT16_C( 30361), INT16_C( 22874) }, { -INT16_C( 9249), -INT16_C( 2543), INT16_C( 21200), -INT16_C( 11070), INT16_C( 10304), -INT16_C( 30361), -INT16_C( 19278), INT16_C( 22874) } }, { { -INT16_C( 5133), -INT16_C( 3942), INT16_C( 31972), -INT16_C( 15490), INT16_C( 20055), -INT16_C( 26858), -INT16_C( 14218), INT16_C( 17484) }, { -INT16_C( 20139), -INT16_C( 3139), -INT16_C( 31947), INT16_C( 18254), INT16_C( 4217), -INT16_C( 8165), INT16_C( 30105), -INT16_C( 29382) }, { INT16_C( 20055), INT16_C( 4217), -INT16_C( 26858), -INT16_C( 8165), -INT16_C( 14218), INT16_C( 30105), INT16_C( 17484), -INT16_C( 29382) } }, { { -INT16_C( 11168), INT16_C( 17533), -INT16_C( 1200), -INT16_C( 22520), INT16_C( 7753), -INT16_C( 16321), -INT16_C( 29722), INT16_C( 15108) }, { -INT16_C( 16067), INT16_C( 29231), INT16_C( 32069), -INT16_C( 16711), -INT16_C( 11122), INT16_C( 10143), -INT16_C( 9911), -INT16_C( 21836) }, { INT16_C( 7753), -INT16_C( 11122), -INT16_C( 16321), INT16_C( 10143), -INT16_C( 29722), -INT16_C( 9911), INT16_C( 15108), -INT16_C( 21836) } }, { { INT16_C( 12973), -INT16_C( 530), -INT16_C( 2515), INT16_C( 30629), -INT16_C( 6892), -INT16_C( 1225), INT16_C( 15216), -INT16_C( 21194) }, { INT16_C( 26108), INT16_C( 16672), -INT16_C( 9757), INT16_C( 28928), -INT16_C( 24658), -INT16_C( 2152), INT16_C( 19832), INT16_C( 9633) }, { -INT16_C( 6892), -INT16_C( 24658), -INT16_C( 1225), -INT16_C( 2152), INT16_C( 15216), INT16_C( 19832), -INT16_C( 21194), INT16_C( 9633) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); simde_int16x8_t r = simde_vzip2q_s16(a, b); simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } return 0; } static int test_simde_vzip2q_s32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { -INT32_C( 447870806), INT32_C( 1960556641), INT32_C( 1546653011), -INT32_C( 295568563) }, { -INT32_C( 145715186), INT32_C( 1570097258), -INT32_C( 1928396662), -INT32_C( 810854875) }, { INT32_C( 1546653011), -INT32_C( 1928396662), -INT32_C( 295568563), -INT32_C( 810854875) } }, { { -INT32_C( 1078593186), INT32_C( 70553777), -INT32_C( 345938787), INT32_C( 1842987615) }, { -INT32_C( 1134220974), INT32_C( 2065300209), INT32_C( 537471482), -INT32_C( 554716033) }, { -INT32_C( 345938787), INT32_C( 537471482), INT32_C( 1842987615), -INT32_C( 554716033) } }, { { INT32_C( 1604166829), -INT32_C( 765210316), -INT32_C( 1782725579), -INT32_C( 654141818) }, { -INT32_C( 1332385857), INT32_C( 1546366817), INT32_C( 1467757784), -INT32_C( 1774883864) }, { -INT32_C( 1782725579), INT32_C( 1467757784), -INT32_C( 654141818), -INT32_C( 1774883864) } }, { { INT32_C( 1156961040), -INT32_C( 636069724), -INT32_C( 1552952547), INT32_C( 679244137) }, { INT32_C( 987239129), -INT32_C( 1718221631), INT32_C( 569381432), -INT32_C( 1900599682) }, { -INT32_C( 1552952547), INT32_C( 569381432), INT32_C( 679244137), -INT32_C( 1900599682) } }, { { -INT32_C( 1647137543), INT32_C( 561506564), INT32_C( 650503868), INT32_C( 827212120) }, { INT32_C( 342566739), INT32_C( 1672282667), -INT32_C( 1836802540), -INT32_C( 1121961020) }, { INT32_C( 650503868), -INT32_C( 1836802540), INT32_C( 827212120), -INT32_C( 1121961020) } }, { { -INT32_C( 329583641), -INT32_C( 1743924516), INT32_C( 280941240), INT32_C( 1732316436) }, { INT32_C( 1602006324), -INT32_C( 1010685521), -INT32_C( 1957279801), INT32_C( 1783133826) }, { INT32_C( 280941240), -INT32_C( 1957279801), INT32_C( 1732316436), INT32_C( 1783133826) } }, { { INT32_C( 1163305833), INT32_C( 769549173), INT32_C( 1245617206), -INT32_C( 575570007) }, { -INT32_C( 616813268), INT32_C( 513802071), -INT32_C( 928320186), -INT32_C( 718081429) }, { INT32_C( 1245617206), -INT32_C( 928320186), -INT32_C( 575570007), -INT32_C( 718081429) } }, { { INT32_C( 169511061), INT32_C( 574159084), INT32_C( 1047295637), INT32_C( 572268021) }, { -INT32_C( 1560455093), -INT32_C( 1648321449), -INT32_C( 43619695), -INT32_C( 221079459) }, { INT32_C( 1047295637), -INT32_C( 43619695), INT32_C( 572268021), -INT32_C( 221079459) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); simde_int32x4_t r = simde_vzip2q_s32(a, b); simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } return 0; } static int test_simde_vzip2q_s64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; int64_t b[2]; int64_t r[2]; } test_vec[] = { { { INT64_C( 3389154455161181546), INT64_C( 7789600881989832426) }, { -INT64_C( 6216138325886961127), INT64_C( 8128889602060271500) }, { INT64_C( 7789600881989832426), INT64_C( 8128889602060271500) } }, { { INT64_C( 4280588231775133491), -INT64_C( 3843400407768008318) }, { -INT64_C( 2503139029515142790), INT64_C( 4595617808483249743) }, { -INT64_C( 3843400407768008318), INT64_C( 4595617808483249743) } }, { { -INT64_C( 6190516375007038583), INT64_C( 8490571704889211736) }, { -INT64_C( 8558999586011666166), -INT64_C( 4175499598915218751) }, { INT64_C( 8490571704889211736), -INT64_C( 4175499598915218751) } }, { { -INT64_C( 4905907207152761576), INT64_C( 7900326818308542771) }, { -INT64_C( 4070128137537015102), INT64_C( 2402004138219365202) }, { INT64_C( 7900326818308542771), INT64_C( 2402004138219365202) } }, { { -INT64_C( 2286056552104069593), INT64_C( 7416498006436743965) }, { INT64_C( 518167619684185188), -INT64_C( 8109598195675904694) }, { INT64_C( 7416498006436743965), -INT64_C( 8109598195675904694) } }, { { INT64_C( 4498921349512353505), -INT64_C( 7820595108767695508) }, { INT64_C( 727944592766064991), INT64_C( 9001237270419583218) }, { -INT64_C( 7820595108767695508), INT64_C( 9001237270419583218) } }, { { -INT64_C( 3312544423324482393), -INT64_C( 3478649799431698953) }, { -INT64_C( 3026702744038492620), -INT64_C( 3115054789748040787) }, { -INT64_C( 3478649799431698953), -INT64_C( 3115054789748040787) } }, { { INT64_C( 4780188694459830302), -INT64_C( 4059675570677991062) }, { INT64_C( 3931846471610764248), -INT64_C( 498416479562121196) }, { -INT64_C( 4059675570677991062), -INT64_C( 498416479562121196) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); simde_int64x2_t r = simde_vzip2q_s64(a, b); simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); } return 0; } static int test_simde_vzip2q_u8 (SIMDE_MUNIT_TEST_ARGS) { struct { uint8_t a[16]; uint8_t b[16]; uint8_t r[16]; } test_vec[] = { { { UINT8_C(127), UINT8_C( 19), UINT8_C(254), UINT8_C( 88), UINT8_C( 56), UINT8_C( 15), UINT8_C( 99), UINT8_C( 77), UINT8_C( 51), UINT8_C(112), UINT8_C( 89), UINT8_C(110), UINT8_C(170), UINT8_C( 43), UINT8_C( 97), UINT8_C( 38) }, { UINT8_C(185), UINT8_C(103), UINT8_C( 79), UINT8_C(147), UINT8_C( 2), UINT8_C( 18), UINT8_C(242), UINT8_C(119), UINT8_C( 51), UINT8_C( 19), UINT8_C( 24), UINT8_C( 45), UINT8_C(135), UINT8_C(119), UINT8_C( 0), UINT8_C( 6) }, { UINT8_C( 51), UINT8_C( 51), UINT8_C(112), UINT8_C( 19), UINT8_C( 89), UINT8_C( 24), UINT8_C(110), UINT8_C( 45), UINT8_C(170), UINT8_C(135), UINT8_C( 43), UINT8_C(119), UINT8_C( 97), UINT8_C( 0), UINT8_C( 38), UINT8_C( 6) } }, { { UINT8_C(139), UINT8_C(254), UINT8_C( 95), UINT8_C(195), UINT8_C( 14), UINT8_C(194), UINT8_C( 16), UINT8_C( 65), UINT8_C( 50), UINT8_C(105), UINT8_C(175), UINT8_C(220), UINT8_C(148), UINT8_C( 16), UINT8_C( 3), UINT8_C( 78) }, { UINT8_C(120), UINT8_C( 82), UINT8_C(225), UINT8_C(122), UINT8_C(100), UINT8_C(211), UINT8_C(241), UINT8_C(151), UINT8_C(230), UINT8_C( 10), UINT8_C(196), UINT8_C(109), UINT8_C(129), UINT8_C(196), UINT8_C(116), UINT8_C( 12) }, { UINT8_C( 50), UINT8_C(230), UINT8_C(105), UINT8_C( 10), UINT8_C(175), UINT8_C(196), UINT8_C(220), UINT8_C(109), UINT8_C(148), UINT8_C(129), UINT8_C( 16), UINT8_C(196), UINT8_C( 3), UINT8_C(116), UINT8_C( 78), UINT8_C( 12) } }, { { UINT8_C(195), UINT8_C(211), UINT8_C(207), UINT8_C(209), UINT8_C(149), UINT8_C(223), UINT8_C( 18), UINT8_C(199), UINT8_C( 72), UINT8_C(193), UINT8_C(163), UINT8_C(221), UINT8_C(210), UINT8_C(166), UINT8_C( 43), UINT8_C( 74) }, { UINT8_C(248), UINT8_C( 12), UINT8_C(196), UINT8_C( 93), UINT8_C(223), UINT8_C(182), UINT8_C(244), UINT8_C(197), UINT8_C(192), UINT8_C(185), UINT8_C( 51), UINT8_C( 65), UINT8_C(125), UINT8_C(167), UINT8_C( 78), UINT8_C( 64) }, { UINT8_C( 72), UINT8_C(192), UINT8_C(193), UINT8_C(185), UINT8_C(163), UINT8_C( 51), UINT8_C(221), UINT8_C( 65), UINT8_C(210), UINT8_C(125), UINT8_C(166), UINT8_C(167), UINT8_C( 43), UINT8_C( 78), UINT8_C( 74), UINT8_C( 64) } }, { { UINT8_C(122), UINT8_C( 29), UINT8_C( 17), UINT8_C( 15), UINT8_C(253), UINT8_C( 35), UINT8_C(214), UINT8_C( 69), UINT8_C(229), UINT8_C(121), UINT8_C( 34), UINT8_C(183), UINT8_C( 32), UINT8_C( 77), UINT8_C( 1), UINT8_C( 24) }, { UINT8_C( 89), UINT8_C(197), UINT8_C(117), UINT8_C( 56), UINT8_C(123), UINT8_C(106), UINT8_C(254), UINT8_C( 59), UINT8_C( 35), UINT8_C( 49), UINT8_C(125), UINT8_C(160), UINT8_C(216), UINT8_C(203), UINT8_C(225), UINT8_C( 82) }, { UINT8_C(229), UINT8_C( 35), UINT8_C(121), UINT8_C( 49), UINT8_C( 34), UINT8_C(125), UINT8_C(183), UINT8_C(160), UINT8_C( 32), UINT8_C(216), UINT8_C( 77), UINT8_C(203), UINT8_C( 1), UINT8_C(225), UINT8_C( 24), UINT8_C( 82) } }, { { UINT8_C(232), UINT8_C(242), UINT8_C( 97), UINT8_C(229), UINT8_C( 22), UINT8_C( 55), UINT8_C( 43), UINT8_C(251), UINT8_C(176), UINT8_C( 77), UINT8_C(178), UINT8_C(208), UINT8_C(155), UINT8_C(179), UINT8_C(233), UINT8_C(244) }, { UINT8_C(120), UINT8_C( 94), UINT8_C( 45), UINT8_C(244), UINT8_C(200), UINT8_C( 43), UINT8_C( 47), UINT8_C(235), UINT8_C( 92), UINT8_C(172), UINT8_C(140), UINT8_C( 52), UINT8_C(119), UINT8_C(109), UINT8_C(134), UINT8_C( 96) }, { UINT8_C(176), UINT8_C( 92), UINT8_C( 77), UINT8_C(172), UINT8_C(178), UINT8_C(140), UINT8_C(208), UINT8_C( 52), UINT8_C(155), UINT8_C(119), UINT8_C(179), UINT8_C(109), UINT8_C(233), UINT8_C(134), UINT8_C(244), UINT8_C( 96) } }, { { UINT8_C( 95), UINT8_C(231), UINT8_C( 69), UINT8_C(117), UINT8_C( 30), UINT8_C(112), UINT8_C(112), UINT8_C(206), UINT8_C(190), UINT8_C( 34), UINT8_C(159), UINT8_C( 89), UINT8_C(213), UINT8_C(136), UINT8_C( 77), UINT8_C( 78) }, { UINT8_C(230), UINT8_C(122), UINT8_C( 66), UINT8_C(175), UINT8_C(165), UINT8_C(113), UINT8_C(154), UINT8_C( 1), UINT8_C( 30), UINT8_C( 38), UINT8_C( 53), UINT8_C(149), UINT8_C(147), UINT8_C(187), UINT8_C(245), UINT8_C(243) }, { UINT8_C(190), UINT8_C( 30), UINT8_C( 34), UINT8_C( 38), UINT8_C(159), UINT8_C( 53), UINT8_C( 89), UINT8_C(149), UINT8_C(213), UINT8_C(147), UINT8_C(136), UINT8_C(187), UINT8_C( 77), UINT8_C(245), UINT8_C( 78), UINT8_C(243) } }, { { UINT8_C(162), UINT8_C( 59), UINT8_C(104), UINT8_C(192), UINT8_C(171), UINT8_C(217), UINT8_C(143), UINT8_C(105), UINT8_C(251), UINT8_C( 46), UINT8_C(194), UINT8_C(209), UINT8_C(182), UINT8_C( 16), UINT8_C( 31), UINT8_C(156) }, { UINT8_C(138), UINT8_C( 97), UINT8_C( 75), UINT8_C( 48), UINT8_C(210), UINT8_C(230), UINT8_C( 49), UINT8_C(240), UINT8_C( 12), UINT8_C(103), UINT8_C(134), UINT8_C(160), UINT8_C( 34), UINT8_C(123), UINT8_C(147), UINT8_C(197) }, { UINT8_C(251), UINT8_C( 12), UINT8_C( 46), UINT8_C(103), UINT8_C(194), UINT8_C(134), UINT8_C(209), UINT8_C(160), UINT8_C(182), UINT8_C( 34), UINT8_C( 16), UINT8_C(123), UINT8_C( 31), UINT8_C(147), UINT8_C(156), UINT8_C(197) } }, { { UINT8_C(182), UINT8_C(251), UINT8_C(133), UINT8_C( 98), UINT8_C(212), UINT8_C( 20), UINT8_C(203), UINT8_C(208), UINT8_C( 66), UINT8_C(142), UINT8_C(161), UINT8_C(248), UINT8_C(158), UINT8_C(192), UINT8_C(149), UINT8_C( 40) }, { UINT8_C( 33), UINT8_C(224), UINT8_C( 88), UINT8_C(243), UINT8_C(198), UINT8_C(138), UINT8_C(228), UINT8_C(211), UINT8_C(241), UINT8_C(106), UINT8_C(115), UINT8_C( 19), UINT8_C(229), UINT8_C( 6), UINT8_C(216), UINT8_C(156) }, { UINT8_C( 66), UINT8_C(241), UINT8_C(142), UINT8_C(106), UINT8_C(161), UINT8_C(115), UINT8_C(248), UINT8_C( 19), UINT8_C(158), UINT8_C(229), UINT8_C(192), UINT8_C( 6), UINT8_C(149), UINT8_C(216), UINT8_C( 40), UINT8_C(156) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); simde_uint8x16_t r = simde_vzip2q_u8(a, b); simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); } return 0; } static int test_simde_vzip2q_u16 (SIMDE_MUNIT_TEST_ARGS) { struct { uint16_t a[8]; uint16_t b[8]; uint16_t r[8]; } test_vec[] = { { { UINT16_C(56843), UINT16_C(48859), UINT16_C(62267), UINT16_C(16307), UINT16_C(14313), UINT16_C(51328), UINT16_C(63506), UINT16_C(34614) }, { UINT16_C(29688), UINT16_C(53182), UINT16_C(19541), UINT16_C(62702), UINT16_C(38625), UINT16_C( 4081), UINT16_C(60683), UINT16_C( 5668) }, { UINT16_C(14313), UINT16_C(38625), UINT16_C(51328), UINT16_C( 4081), UINT16_C(63506), UINT16_C(60683), UINT16_C(34614), UINT16_C( 5668) } }, { { UINT16_C(65484), UINT16_C( 2005), UINT16_C(35058), UINT16_C(56390), UINT16_C(51135), UINT16_C(53668), UINT16_C(55999), UINT16_C(46936) }, { UINT16_C( 5965), UINT16_C(41607), UINT16_C(30051), UINT16_C(17814), UINT16_C(34571), UINT16_C( 5716), UINT16_C(31093), UINT16_C(16685) }, { UINT16_C(51135), UINT16_C(34571), UINT16_C(53668), UINT16_C( 5716), UINT16_C(55999), UINT16_C(31093), UINT16_C(46936), UINT16_C(16685) } }, { { UINT16_C( 632), UINT16_C(27464), UINT16_C(36490), UINT16_C(19015), UINT16_C(60245), UINT16_C( 5403), UINT16_C(29893), UINT16_C( 5068) }, { UINT16_C(21387), UINT16_C(61109), UINT16_C(19657), UINT16_C(54323), UINT16_C(35027), UINT16_C(18667), UINT16_C( 6145), UINT16_C(31113) }, { UINT16_C(60245), UINT16_C(35027), UINT16_C( 5403), UINT16_C(18667), UINT16_C(29893), UINT16_C( 6145), UINT16_C( 5068), UINT16_C(31113) } }, { { UINT16_C(53530), UINT16_C(42212), UINT16_C(11104), UINT16_C(46574), UINT16_C( 2582), UINT16_C(56522), UINT16_C(38782), UINT16_C( 2543) }, { UINT16_C(42218), UINT16_C(46071), UINT16_C(11248), UINT16_C(50312), UINT16_C(29619), UINT16_C(46092), UINT16_C(38539), UINT16_C(42285) }, { UINT16_C( 2582), UINT16_C(29619), UINT16_C(56522), UINT16_C(46092), UINT16_C(38782), UINT16_C(38539), UINT16_C( 2543), UINT16_C(42285) } }, { { UINT16_C( 4711), UINT16_C(51017), UINT16_C(14397), UINT16_C(21629), UINT16_C(18242), UINT16_C(49200), UINT16_C( 8158), UINT16_C(51657) }, { UINT16_C(49347), UINT16_C(46204), UINT16_C( 1259), UINT16_C(40568), UINT16_C(33911), UINT16_C( 594), UINT16_C(32794), UINT16_C(33447) }, { UINT16_C(18242), UINT16_C(33911), UINT16_C(49200), UINT16_C( 594), UINT16_C( 8158), UINT16_C(32794), UINT16_C(51657), UINT16_C(33447) } }, { { UINT16_C(61842), UINT16_C(53065), UINT16_C(50729), UINT16_C(27427), UINT16_C(21262), UINT16_C(60459), UINT16_C(62578), UINT16_C(14005) }, { UINT16_C(12980), UINT16_C(41194), UINT16_C(25142), UINT16_C(44606), UINT16_C(37350), UINT16_C( 432), UINT16_C(22545), UINT16_C(41859) }, { UINT16_C(21262), UINT16_C(37350), UINT16_C(60459), UINT16_C( 432), UINT16_C(62578), UINT16_C(22545), UINT16_C(14005), UINT16_C(41859) } }, { { UINT16_C(52297), UINT16_C(29298), UINT16_C(38547), UINT16_C(41437), UINT16_C( 2281), UINT16_C(23693), UINT16_C(17404), UINT16_C(45202) }, { UINT16_C(31861), UINT16_C(43856), UINT16_C(36830), UINT16_C(50265), UINT16_C( 2592), UINT16_C(12741), UINT16_C(18530), UINT16_C(43988) }, { UINT16_C( 2281), UINT16_C( 2592), UINT16_C(23693), UINT16_C(12741), UINT16_C(17404), UINT16_C(18530), UINT16_C(45202), UINT16_C(43988) } }, { { UINT16_C(17941), UINT16_C(43037), UINT16_C(64220), UINT16_C(50761), UINT16_C(54786), UINT16_C(65058), UINT16_C(46105), UINT16_C(36526) }, { UINT16_C(65328), UINT16_C( 3642), UINT16_C(37774), UINT16_C(44754), UINT16_C(39069), UINT16_C(65503), UINT16_C(46048), UINT16_C(62890) }, { UINT16_C(54786), UINT16_C(39069), UINT16_C(65058), UINT16_C(65503), UINT16_C(46105), UINT16_C(46048), UINT16_C(36526), UINT16_C(62890) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); simde_uint16x8_t r = simde_vzip2q_u16(a, b); simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } return 0; } static int test_simde_vzip2q_u32 (SIMDE_MUNIT_TEST_ARGS) { struct { uint32_t a[4]; uint32_t b[4]; uint32_t r[4]; } test_vec[] = { { { UINT32_C( 478890778), UINT32_C(1273886894), UINT32_C( 298449113), UINT32_C(1553786033) }, { UINT32_C(3000604583), UINT32_C(2965413592), UINT32_C(2252713574), UINT32_C(1703353163) }, { UINT32_C( 298449113), UINT32_C(2252713574), UINT32_C(1553786033), UINT32_C(1703353163) } }, { { UINT32_C( 360780391), UINT32_C(3831524875), UINT32_C( 418720102), UINT32_C(3027538189) }, { UINT32_C( 6769960), UINT32_C(1387341804), UINT32_C( 551089877), UINT32_C(2038849298) }, { UINT32_C( 418720102), UINT32_C( 551089877), UINT32_C(3027538189), UINT32_C(2038849298) } }, { { UINT32_C(2106460018), UINT32_C(3697405814), UINT32_C( 653547032), UINT32_C( 265972199) }, { UINT32_C(2718974390), UINT32_C(1056227688), UINT32_C(3378433207), UINT32_C(2655183916) }, { UINT32_C( 653547032), UINT32_C(3378433207), UINT32_C( 265972199), UINT32_C(2655183916) } }, { { UINT32_C(1645990380), UINT32_C(3627973824), UINT32_C(3120444370), UINT32_C(1388894620) }, { UINT32_C(2213926938), UINT32_C(1371662745), UINT32_C(3793362870), UINT32_C(4034944260) }, { UINT32_C(3120444370), UINT32_C(3793362870), UINT32_C(1388894620), UINT32_C(4034944260) } }, { { UINT32_C(3998391086), UINT32_C(3922104343), UINT32_C(1604502979), UINT32_C(3098700446) }, { UINT32_C(3694896963), UINT32_C(1177418896), UINT32_C( 539510812), UINT32_C(3541084325) }, { UINT32_C(1604502979), UINT32_C( 539510812), UINT32_C(3098700446), UINT32_C(3541084325) } }, { { UINT32_C(1522623043), UINT32_C(3057879026), UINT32_C(3927303500), UINT32_C(2476984144) }, { UINT32_C(4285521518), UINT32_C(4148534747), UINT32_C(2316791525), UINT32_C(1516054294) }, { UINT32_C(3927303500), UINT32_C(2316791525), UINT32_C(2476984144), UINT32_C(1516054294) } }, { { UINT32_C(2075401865), UINT32_C(4063361189), UINT32_C( 769411037), UINT32_C(2109767438) }, { UINT32_C( 964440158), UINT32_C(2989539789), UINT32_C(1178355503), UINT32_C(4154497390) }, { UINT32_C( 769411037), UINT32_C(1178355503), UINT32_C(2109767438), UINT32_C(4154497390) } }, { { UINT32_C(1550996663), UINT32_C( 709796940), UINT32_C(4183239659), UINT32_C( 141957290) }, { UINT32_C( 356643400), UINT32_C(3821498804), UINT32_C( 640222136), UINT32_C(1394461084) }, { UINT32_C(4183239659), UINT32_C( 640222136), UINT32_C( 141957290), UINT32_C(1394461084) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); simde_uint32x4_t r = simde_vzip2q_u32(a, b); simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } return 0; } static int test_simde_vzip2q_u64 (SIMDE_MUNIT_TEST_ARGS) { struct { uint64_t a[2]; uint64_t b[2]; uint64_t r[2]; } test_vec[] = { { { UINT64_C( 4280958701235489246), UINT64_C(11535681782777318835) }, { UINT64_C( 6878166422813626130), UINT64_C(12381793737624657829) }, { UINT64_C(11535681782777318835), UINT64_C(12381793737624657829) } }, { { UINT64_C(14969386940695648064), UINT64_C( 3104490981576679679) }, { UINT64_C(13112230382089857197), UINT64_C( 8545878496494894880) }, { UINT64_C( 3104490981576679679), UINT64_C( 8545878496494894880) } }, { { UINT64_C( 1374078459952243238), UINT64_C( 7285027289849944533) }, { UINT64_C(14288505062639445382), UINT64_C(13466410036153779510) }, { UINT64_C( 7285027289849944533), UINT64_C(13466410036153779510) } }, { { UINT64_C( 7247714085351774830), UINT64_C( 4721016760819653991) }, { UINT64_C(12717357733718164241), UINT64_C(17002494307029758118) }, { UINT64_C( 4721016760819653991), UINT64_C(17002494307029758118) } }, { { UINT64_C( 7263466160932188377), UINT64_C(11249156841034424097) }, { UINT64_C(13453888246246536075), UINT64_C(12135486648795582720) }, { UINT64_C(11249156841034424097), UINT64_C(12135486648795582720) } }, { { UINT64_C(18103026683644860376), UINT64_C( 5847067349770288893) }, { UINT64_C( 5239669095272970216), UINT64_C( 7805915270211808220) }, { UINT64_C( 5847067349770288893), UINT64_C( 7805915270211808220) } }, { { UINT64_C( 4673466768805750021), UINT64_C(10327578197059285819) }, { UINT64_C(10372990280714201189), UINT64_C(17217137029729139438) }, { UINT64_C(10327578197059285819), UINT64_C(17217137029729139438) } }, { { UINT64_C(16823101498774663667), UINT64_C(10561851715167600732) }, { UINT64_C( 5847417664724475876), UINT64_C(16412457277402079725) }, { UINT64_C(10561851715167600732), UINT64_C(16412457277402079725) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); simde_uint64x2_t r = simde_vzip2q_u64(a, b); simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vzip2_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vzip2_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vzip2_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vzip2_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vzip2_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vzip2_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vzip2_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vzip2q_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vzip2q_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vzip2q_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vzip2q_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vzip2q_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vzip2q_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vzip2q_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vzip2q_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vzip2q_u64) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/arm/run-tests.c000066400000000000000000000012021400333146700164630ustar00rootroot00000000000000#include "run-tests.h" #include "../../simde/hedley.h" static MunitSuite suites[] = { #define SIMDE_TEST_DECLARE_SUITE(name) { (char*) "/", NULL, NULL, 1, MUNIT_SUITE_OPTION_NONE }, #include "declare-suites.h" #undef SIMDE_TEST_DECLARE_SUITE { NULL, NULL, NULL, 0, MUNIT_SUITE_OPTION_NONE } }; static MunitSuite suite = { "/arm", NULL, suites, 1, MUNIT_SUITE_OPTION_NONE }; MunitSuite* simde_tests_arm_get_suite(void) { size_t i = 0; #define SIMDE_TEST_DECLARE_SUITE(name) suites[i++] = *HEDLEY_CONCAT3(simde_tests_arm_, name, _get_suite)(); #include "declare-suites.h" #undef SIMDE_TEST_DECLARE_SUITE return &suite; } simde-0.7.2/test/arm/run-tests.h000066400000000000000000000003241400333146700164740ustar00rootroot00000000000000#if defined(SIMDE_TESTS_ARM_RUN_TESTS_H) #error File already included. #endif #define SIMDE_TESTS_ARM_RUN_TESTS_H #include "../test.h" #include "neon/run-tests.h" MunitSuite* simde_tests_arm_get_suite(void); simde-0.7.2/test/arm/test-arm.h000066400000000000000000000000001400333146700162530ustar00rootroot00000000000000simde-0.7.2/test/cmake/000077500000000000000000000000001400333146700146615ustar00rootroot00000000000000simde-0.7.2/test/cmake/AddCompilerFlags.cmake000066400000000000000000000130761400333146700210320ustar00rootroot00000000000000# This module provides a convenient way to add C/C++ compiler flags if # the compiler supports them. include (CheckCCompilerFlag) include (CheckCXXCompilerFlag) cmake_policy(SET CMP0054 NEW) # Depending on the settings, some compilers will accept unknown flags. # We try to disable this behavior by also passing these flags when we # check if a flag is supported. set (ADD_COMPILER_FLAGS_PREPEND "") if (CMAKE_C_COMPILER_ID STREQUAL "GNU") set (ADD_COMPILER_FLAGS_PREPEND "-Wall -Wextra -Werror") elseif (CMAKE_C_COMPILER_ID STREQUAL "Clang") set (ADD_COMPILER_FLAGS_PREPEND "-Werror=unknown-warning-option") endif () ## # Set a variable to different flags, depending on which compiler is in # use. # # Example: # set_compiler_specific_flags(VARIABLE varname MSVC /wd666 INTEL /wd1729) # # This will set varname to /wd666 if the compiler is MSVC, and /wd1729 # if it is Intel. # # Possible compilers: # - GCC: GNU C Compiler # - GCCISH: A compiler that (tries to) be GCC-compatible on the CLI # (i.e., anything but MSVC). # - CLANG: clang # - MSVC: Microsoft Visual C++ compiler # - INTEL: Intel C Compiler # - PGI: PGI C Compiler # # Note: the compiler is determined based on the value of the # CMAKE_C_COMPILER_ID variable, not CMAKE_CXX_COMPILER_ID. ## function (set_compiler_specific_flags) set (oneValueArgs VARIABLE) set (multiValueArgs GCC GCCISH INTEL CLANG MSVC PGI) cmake_parse_arguments(COMPILER_SPECIFIC_FLAGS "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) unset (options) unset (oneValueArgs) unset (multiValueArgs) set (compiler_flags) if (CMAKE_C_COMPILER_ID STREQUAL "GNU") list (APPEND compiler_flags ${COMPILER_SPECIFIC_FLAGS_GCC}) elseif(CMAKE_C_COMPILER_ID STREQUAL "Clang") list (APPEND compiler_flags ${COMPILER_SPECIFIC_FLAGS_CLANG}) elseif(CMAKE_C_COMPILER_ID STREQUAL "Intel") list (APPEND compiler_flags ${COMPILER_SPECIFIC_FLAGS_INTEL}) elseif(CMAKE_C_COMPILER_ID STREQUAL "MSVC") list (APPEND compiler_flags ${COMPILER_SPECIFIC_FLAGS_MSVC}) elseif(CMAKE_C_COMPILER_ID STREQUAL "PGI") list (APPEND compiler_flags ${COMPILER_SPECIFIC_FLAGS_PGI}) endif() set(GCCISH_COMPILERS GNU Clang Intel) list(FIND GCCISH_COMPILERS "${CMAKE_C_COMPILER_ID}" IS_GCCISH) if (IS_GCCISH GREATER -1) list (APPEND compiler_flags ${COMPILER_SPECIFIC_FLAGS_GCCISH}) endif () set (${COMPILER_SPECIFIC_FLAGS_VARIABLE} "${compiler_flags}" PARENT_SCOPE) endfunction () function (source_file_add_compiler_flags_unchecked file) set (flags ${ARGV}) list (REMOVE_AT flags 0) get_source_file_property (sources ${file} SOURCES) foreach (flag ${flags}) get_source_file_property (existing ${file} COMPILE_FLAGS) if ("${existing}" STREQUAL "NOTFOUND") set_source_files_properties (${file} PROPERTIES COMPILE_FLAGS "${flag}") else () set_source_files_properties (${file} PROPERTIES COMPILE_FLAGS "${existing} ${flag}") endif () endforeach (flag) endfunction () function (source_file_add_compiler_flags file) set (flags ${ARGV}) list (REMOVE_AT flags 0) get_source_file_property (sources ${file} SOURCES) foreach (flag ${flags}) if (CMAKE_C_COMPILER_ID STREQUAL "GNU") # Because https://gcc.gnu.org/wiki/FAQ#wnowarning string (REGEX REPLACE "\\-Wno\\-(.+)" "-W\\1" flag_to_test "${flag}") else () set (flag_to_test ${flag}) endif () if (file MATCHES "\\.c$") string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" test_name "CFLAG_${flag_to_test}") CHECK_C_COMPILER_FLAG ("${ADD_COMPILER_FLAGS_PREPEND} ${flag_to_test}" ${test_name}) elseif (file MATCHES "\\.(cpp|cc|cxx)$") string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" test_name "CXXFLAG_${flag_to_test}") CHECK_CXX_COMPILER_FLAG ("${ADD_COMPILER_FLAGS_PREPEND} ${flag_to_test}" ${test_name}) endif () if (${test_name}) source_file_add_compiler_flags_unchecked (${file} ${flag}) endif () unset (test_name) unset (flag_to_test) endforeach (flag) unset (flags) endfunction () function (target_add_compiler_flags target) set (flags ${ARGV}) list (REMOVE_AT flags 0) get_target_property (sources ${target} SOURCES) foreach (source ${sources}) source_file_add_compiler_flags (${source} ${flags}) endforeach (source) unset (flags) unset (sources) endfunction (target_add_compiler_flags) # global_add_compiler_flags (flag1 [flag2 [flag3 ...]]): # # This just adds the requested compiler flags to # CMAKE_C/CXX_FLAGS variable if they work with the compiler. function (global_add_compiler_flags) set (flags ${ARGV}) foreach (flag ${flags}) if ("GNU" STREQUAL "${CMAKE_C_COMPILER_ID}") # Because https://gcc.gnu.org/wiki/FAQ#wnowarning string (REGEX REPLACE "\\-Wno\\-(.+)" "-W\\1" flag_to_test "${flag}") else () set (flag_to_test "${flag}") endif () string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" c_test_name "CFLAG_${flag_to_test}") CHECK_C_COMPILER_FLAG ("${ADD_COMPILER_FLAGS_PREPEND} ${flag_to_test}" ${c_test_name}) if (${c_test_name}) set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${flag}") endif () unset (c_test_name) string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" cxx_test_name "CFLAG_${flag_to_test}") CHECK_CXX_COMPILER_FLAG ("${ADD_COMPILER_FLAGS_PREPEND} ${flag_to_test}" ${cxx_test_name}) if (${cxx_test_name}) set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${flag}") endif () unset (cxx_test_name) unset (flag_to_test) endforeach (flag) unset (flags) set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" PARENT_SCOPE) set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" PARENT_SCOPE) endfunction (global_add_compiler_flags) simde-0.7.2/test/cmake/ExtraWarningFlags.cmake000066400000000000000000000032111400333146700212460ustar00rootroot00000000000000# This is basically supposed to be the CMake equivalent of # https://git.gnome.org/browse/gnome-common/tree/macros2/gnome-compiler-flags.m4 include (AddCompilerFlags) if (MSVC) set (EXTRA_WARNING_FLAGS /W4 /analyze) else () set (EXTRA_WARNING_FLAGS -Wall -Wcast-align -Wclobbered -Wempty-body -Werror=format=2 -Werror=format-security -Werror=implicit-function-declaration -Werror=init-self -Werror=missing-include-dirs -Werror=missing-prototypes -Werror=pointer-arith -Wextra -Wformat-nonliteral -Wformat-security -Wignored-qualifiers -Winit-self -Winvalid-pch -Wlogical-op -Wmissing-declarations -Wmissing-format-attribute -Wmissing-include-dirs -Wmissing-noreturn -Wmissing-parameter-type -Wmissing-prototypes -Wnested-externs -Wno-missing-field-initializers -Wno-strict-aliasing -Wno-uninitialized -Wno-unused-parameter -Wold-style-definition -Woverride-init -Wpacked -Wpointer-arith -Wredundant-decls -Wreturn-type -Wshadow -Wsign-compare -Wstrict-prototypes -Wswitch-enum -Wsync-nand -Wtype-limits -Wundef -Wuninitialized -WUnsafe-loop-optimizations -Wwrite-strings -Wsuggest-attribute=format) endif () mark_as_advanced (EXTRA_WARNING_FLAGS) function (target_add_extra_warning_flags target) target_add_compiler_flags (${target} ${EXTRA_WARNING_FLAGS}) endfunction (target_add_extra_warning_flags) function (source_file_add_extra_warning_flags file) source_file_add_compiler_flags (${file} ${EXTRA_WARNING_FLAGS}) endfunction (source_file_add_extra_warning_flags) simde-0.7.2/test/common/000077500000000000000000000000001400333146700150715ustar00rootroot00000000000000simde-0.7.2/test/common/common.c000066400000000000000000000062141400333146700165300ustar00rootroot00000000000000#include "../test.h" /* These tests are basically to verify assumptions we make about the * target platform. */ #if defined(SIMDE_IEEE754_STORAGE) static int test_simde_ieee754_storage_f32 (SIMDE_MUNIT_TEST_ARGS) { static const simde_float32 pif_as_f32 = SIMDE_MATH_PIF; uint32_t pif_as_u32; simde_memcpy(&pif_as_u32, &pif_as_f32, sizeof(simde_float32)); simde_assert_equal_u32(pif_as_u32, UINT32_C(0x40490fdb)); return 0; } static int test_simde_ieee754_storage_f64 (SIMDE_MUNIT_TEST_ARGS) { static const simde_float64 pid_as_f64 = SIMDE_MATH_PI; uint64_t pid_as_u64; simde_memcpy(&pid_as_u64, &pid_as_f64, sizeof(simde_float64)); simde_assert_equal_u64(pid_as_u64, UINT64_C(0x400921fb54442d18)); return 0; } #endif /* These next two make sure that all we need to do is flip a single * bit in order to flip the sign of a value without altering the * absolute value. i.e., we want to make sure the parts of the float * aren't stored as two's complement or something. */ static int test_simde_single_bit_sign_f32 (SIMDE_MUNIT_TEST_ARGS) { static const simde_float32 ppif_as_f32 = SIMDE_MATH_PIF; static const simde_float32 npif_as_f32 = -SIMDE_MATH_PIF; uint32_t ppif_as_u32, npif_as_u32, v; simde_memcpy(&ppif_as_u32, &ppif_as_f32, sizeof(uint32_t)); simde_memcpy(&npif_as_u32, &npif_as_f32, sizeof(uint32_t)); /* is_power_of_two(pi ^ -pi) */ v = ppif_as_u32 ^ npif_as_u32; v = (v & (v - 1)) == 0; simde_assert_equal_u32(v, UINT32_C(1)); return 0; } static int test_simde_single_bit_sign_f64 (SIMDE_MUNIT_TEST_ARGS) { static const simde_float64 ppif_as_f64 = SIMDE_MATH_PI; static const simde_float64 npif_as_f64 = -SIMDE_MATH_PI; uint64_t ppif_as_u64, npif_as_u64, v; simde_memcpy(&ppif_as_u64, &ppif_as_f64, sizeof(uint64_t)); simde_memcpy(&npif_as_u64, &npif_as_f64, sizeof(uint64_t)); /* is_power_of_two(pi ^ -pi) */ v = ppif_as_u64 ^ npif_as_u64; v = (v & (v - 1)) == 0; simde_assert_equal_u64(v, UINT64_C(1)); return 0; } /* We can handle little and big endian, but not PDP endian (or any * other endianness). */ static int test_simde_endian (SIMDE_MUNIT_TEST_ARGS) { uint8_t a[] = { 1, 2, 3, 4 }; uint32_t v; simde_memcpy(&v, a, sizeof(v)); switch(v) { case UINT32_C(0x01020304): /* Big endian */ case UINT32_C(0x04030201): /* Little endian */ return 0; default: return 1; } } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(ieee754_storage_f32) SIMDE_TEST_FUNC_LIST_ENTRY(ieee754_storage_f64) SIMDE_TEST_FUNC_LIST_ENTRY(single_bit_sign_f32) SIMDE_TEST_FUNC_LIST_ENTRY(single_bit_sign_f64) SIMDE_TEST_FUNC_LIST_ENTRY(endian) SIMDE_TEST_FUNC_LIST_END int main(void) { int retval = EXIT_SUCCESS; fprintf(stdout, "1..%zu\n", (sizeof(test_suite_tests) / sizeof(test_suite_tests[0]))); for (size_t i = 0 ; i < (sizeof(test_suite_tests) / sizeof(test_suite_tests[0])) ; i++) { int res = test_suite_tests[i].func(); if (res != 0) { retval = EXIT_FAILURE; fprintf(stdout, "not ok %zu %s\n", i + 1, test_suite_tests[i].name); } else { fprintf(stdout, "ok %zu %s\n", i + 1, test_suite_tests[i].name); } } return retval; } simde-0.7.2/test/common/meson.build000066400000000000000000000015451400333146700172400ustar00rootroot00000000000000simde_test_common_tests = [ 'common' ] simde_test_common_sources = [] foreach name : simde_test_common_tests foreach lang : ['c', 'cpp'] source_file = name + '.c' if lang == 'cpp' source_file = configure_file(input: name + '.c', output: name + '.cpp', copy: true) endif simde_test_common_sources += source_file extra_flags = ['-DSIMDE_TEST_BARE'] x = executable(name + '-' + lang, source_file, c_args: simde_c_args + simde_c_defs + simde_native_c_flags + extra_flags, cpp_args: simde_c_args + simde_c_defs + simde_native_c_flags + extra_flags, include_directories: simde_include_dir, dependencies: simde_deps) test('common/' + name + '/' + lang, x, protocol: 'tap', # Emscripten tests must be run from builddir workdir: meson.current_build_dir()) endforeach endforeach simde-0.7.2/test/declare-suites.h000066400000000000000000000000741400333146700166640ustar00rootroot00000000000000SIMDE_TEST_DECLARE_SUITE(x86) SIMDE_TEST_DECLARE_SUITE(arm) simde-0.7.2/test/docker/000077500000000000000000000000001400333146700150505ustar00rootroot00000000000000simde-0.7.2/test/docker/Dockerfile.Ubuntu1404000066400000000000000000000015161400333146700206370ustar00rootroot00000000000000#trusty with gcc 4.8.4 FROM ubuntu:14.04 RUN apt-get update && apt-get install --no-install-recommends -y gcc g++ wget unzip software-properties-common RUN add-apt-repository ppa:deadsnakes/ppa && apt-get update && apt-get install -y --no-install-recommends python3.6 RUN wget https://bootstrap.pypa.io/get-pip.py && python3.6 get-pip.py && rm get-pip.py && python3.6 -m pip install meson RUN wget https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-linux.zip && unzip ninja-linux.zip && mv ninja /usr/bin && rm -Rf ninja-linux* COPY . /simde WORKDIR /simde RUN mkdir -p build_ubuntu14.04 WORKDIR /simde/build_ubuntu14.04 RUN CC=/usr/bin/gcc CXX=/usr/bin/g++ CFLAGS="-Wall -Wextra -Werror -Werror=unused-but-set-variable" CXXFLAGS="-Wall -Wextra -Werror -Werror=unused-but-set-variable" meson .. \ && ninja -v && test/run-tests simde-0.7.2/test/docker/Dockerfile.Ubuntu1604000066400000000000000000000026361400333146700206450ustar00rootroot00000000000000# xenial with gcc 5.4 FROM ubuntu:16.04 RUN apt-get update && apt-get install --no-install-recommends -y \ clang-3.8 \ cmake \ gcc \ g++ \ libomp-dev \ libxml2-utils \ libc++-dev \ make \ ninja-build \ python3-pip \ python3-setuptools \ python3-wheel RUN pip3 install meson==0.50.0 COPY . /simde WORKDIR /simde/test RUN mkdir -p /simde/test/build_ubuntu16.04_clang WORKDIR /simde/test/build_ubuntu16.04_clang RUN CC=/usr/bin/clang-3.8 CXX=/usr/bin/clang++-3.8 cmake -DCMAKE_C_FLAGS="-mavx2 -Weverything -Werror -Wno-c++98-compat-pedantic -Wno-newline-eof" -DCMAKE_CXX_FLAGS="-mavx2 -Weverything -Werror -Wno-c++98-compat-pedantic -Wno-newline-eof" .. \ && make -j $(nproc) && ./run-tests RUN mkdir -p /simde/test/build_ubuntu16.04_gcc WORKDIR /simde/test/build_ubuntu16.04_gcc RUN CC=/usr/bin/gcc CXX=/usr/bin/g++ cmake -DCMAKE_C_FLAGS="-Wall -Wextra -Werror -Werror=unused-but-set-variable" -DCMAKE_CXX_FLAGS="-Wall -Wextra -Werror -Werror=unused-but-set-variable" .. \ && make -j $(nproc) && ./run-tests WORKDIR /simde/ RUN bash ./test/native-aliases.sh RUN mkdir -p /simde/build_ubuntu16.04_clang_native WORKDIR /simde/build_ubuntu16.04_clang_native RUN CC=/usr/bin/clang CXX=/usr/bin/clang++ CFLAGS="-Wall -Wextra -DSIMDE_ENABLE_NATIVE_ALIASES -DSIMDE_NATIVE_ALIASES_TESTING" CXXFLAGS="-Wall -Wextra -DSIMDE_ENABLE_NATIVE_ALIASES -DSIMDE_NATIVE_ALIASES_TESTING" meson .. \ && ninja && ./test/run-tests simde-0.7.2/test/docker/Dockerfile.Ubuntu1804000066400000000000000000000006711400333146700206440ustar00rootroot00000000000000# bionic with gcc 7.4 FROM ubuntu:18.04 RUN apt-get update && apt-get install --no-install-recommends -y gcc g++ cmake make COPY . /simde WORKDIR /simde/test RUN mkdir -p build_ubuntu18.04 WORKDIR /simde/test/build_ubuntu18.04 RUN CC=/usr/bin/gcc CXX=/usr/bin/g++ cmake -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON \ -DCMAKE_C_FLAGS="-Wall -Wextra -Werror -O3" \ -DCMAKE_CXX_FLAGS="-Wall -Wextra -Werror -O3" .. \ && make -j $(nproc) && ./run-tests simde-0.7.2/test/docker/Dockerfile.arm64000066400000000000000000000020341400333146700177710ustar00rootroot00000000000000FROM debian:bullseye-slim RUN apt-get update && apt-get install --no-install-recommends -y \ binfmt-support \ clang \ cmake \ gcc \ gcc-9-aarch64-linux-gnu \ g++-9-aarch64-linux-gnu \ make \ qemu-user-static COPY . /simde RUN mkdir -p /simde/test/build_gcc_arm64 WORKDIR /simde/test/build_gcc_arm64 RUN CC=/usr/bin/aarch64-linux-gnu-gcc-9 CXX=/usr/bin/aarch64-linux-gnu-g++-9 \ cmake -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -DCMAKE_C_FLAGS="-march=armv8-a" \ -DCMAKE_CXX_FLAGS="-march=armv8-a" ../ && make -j$(nproc) RUN QEMU_LD_PREFIX=/usr/aarch64-linux-gnu/ /usr/bin/qemu-aarch64-static ./run-tests RUN mkdir -p /simde/test/build_arm64_clang WORKDIR /simde/test/build_arm64_clang RUN CC=/usr/bin/clang CXX=/usr/bin/clang++ cmake -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON \ -DCMAKE_C_FLAGS="--target=aarch64-linux-gnu -I/usr/aarch64-linux-gnu/include" \ -DCMAKE_CXX_FLAGS="--target=aarch64-linux-gnu -I/usr/aarch64-linux-gnu/include" \ ../ && make -j$(nproc) RUN QEMU_LD_PREFIX=/usr/s390x-linux-gnu/ /usr/bin/qemu-s390x-static ./run-tests simde-0.7.2/test/docker/Dockerfile.arm7000066400000000000000000000024201400333146700177050ustar00rootroot00000000000000FROM debian:bullseye-slim RUN apt-get update && apt-get install --no-install-recommends -y \ binfmt-support \ clang-9 \ ninja-build \ python3-pip \ gcc \ gcc-10-arm-linux-gnueabihf \ g++-10-arm-linux-gnueabihf \ libstdc++-10-dev-armhf-cross \ make \ parallel \ qemu-user-static RUN pip3 install meson ENV QEMU_LD_PREFIX=/usr/arm-linux-gnueabihf COPY . /simde RUN mkdir -p /simde/build_gcc_arm7 WORKDIR /simde/build_gcc_arm7 RUN CC=/usr/bin/arm-linux-gnueabihf-gcc-10 CXX=/usr/bin/arm-linux-gnueabihf-g++-10 CFLAGS="-march=armv8-a -mfpu=neon" \ CXXFLAGS="-march=armv7-a -mfpu=neon" \ meson .. || (cat meson-logs/meson-log.txt; false) && ninja -v && \ ./test/run-tests --list | grep -oP "^/([^/]+)/([^/]+)" | sort -u | xargs parallel ./test/run-tests --color always {} ::: RUN mkdir -p /simde/build_clang_arm7 WORKDIR /simde/build_clang_arm7 RUN CC=clang-9 CXX=clang++-9 CFLAGS="--target=arm-linux-gnueabihf -march=armv8-a -mfpu=neon -I/usr/arm-linux-gnueabihf/include" \ CXXFLAGS="--target=arm-linux-gnueabihf -march=armv7a -mfpu=neon -I/usr/arm-linux-gnueabihf/include" \ meson .. || (cat meson-logs/meson-log.txt; false) && ninja -v && \ ./test/run-tests --list | grep -oP "^/([^/]+)/([^/]+)" | sort -u | xargs parallel ./test/run-tests --color always {} ::: simde-0.7.2/test/docker/Dockerfile.icc000066400000000000000000000014371400333146700176040ustar00rootroot00000000000000FROM debian:bullseye-slim RUN apt-get update && apt-get install -y --no-install-recommends \ ca-certificates \ cmake \ gcc \ gnupg \ g++ \ libstdc++-9-dev \ make \ wget RUN wget -q https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB -O- | apt-key add - RUN echo deb https://apt.repos.intel.com/oneapi all main > /etc/apt/sources.list.d/inteloneapi.list RUN apt-get update && apt-get install -y intel-oneapi-icc COPY . /simde WORKDIR /simde RUN mkdir -p test/build_s390x WORKDIR /simde/test/build_s390x RUN bash -c 'source /opt/intel/inteloneapi/compiler/latest/env/vars.sh && \ CC=icc CXX=icpc cmake -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -DCMAKE_C_FLAGS="-wd13200 -wd13203" -DCMAKE_CXX_FLAGS="-wd13200 -wd13203" ../ && \ make -j$(nproc)' RUN ./run-tests simde-0.7.2/test/docker/Dockerfile.mipsel000066400000000000000000000010431400333146700203300ustar00rootroot00000000000000FROM debian:buster-slim RUN apt-get update && apt-get install -y gcc-8-mipsel-linux-gnu g++-8-mipsel-linux-gnu qemu-user-static cmake binfmt-support COPY . /simde WORKDIR /simde RUN mkdir -p test/build_mipsel WORKDIR /simde/test/build_mipsel RUN CC=/usr/bin/mipsel-linux-gnu-gcc-8 CXX=/usr/bin/mipsel-linux-gnu-g++-8 cmake -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -DCMAKE_C_FLAGS="-Wall -Wextra -Werror" -DCMAKE_CXX_FLAGS="-Wall -Wextra -Werror" ../ && \ make -j$(nproc) RUN QEMU_LD_PREFIX=/usr/mipsel-linux-gnu/ /usr/bin/qemu-mipsel-static ./run-tests simde-0.7.2/test/docker/Dockerfile.ppc64el000066400000000000000000000024451400333146700203230ustar00rootroot00000000000000FROM debian:bullseye-slim RUN apt-get update && apt-get install --no-install-recommends -y \ binfmt-support \ ca-certificates \ cmake \ curl \ binutils \ gcc-9-powerpc64le-linux-gnu \ g++-powerpc64le-linux-gnu \ libxml2-utils \ make \ qemu-user-static COPY . /simde WORKDIR /simde # RUN mkdir -p test/build_ppc64le # WORKDIR /simde/test/build_ppc64le # RUN CC=/usr/bin/powerpc64le-linux-gnu-gcc-9 CXX=/usr/bin/powerpc64le-linux-gnu-g++-9 cmake -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -DCMAKE_C_FLAGS="-Wall -Wextra -Werror -mcpu=power8" -DCMAKE_CXX_FLAGS="-Wall -Wextra -Werror -mcpu=power8" ../ && \ # make -j$(nproc) # RUN QEMU_LD_PREFIX=/usr/powerpc64le-linux-gnu/ /usr/bin/qemu-ppc64le-static ./run-tests WORKDIR /simde RUN ./test/native-aliases.sh RUN mkdir -p test/build_ppc64le_native WORKDIR /simde/test/build_ppc64le_native RUN CC=/usr/bin/powerpc64le-linux-gnu-gcc-9 CXX=/usr/bin/powerpc64le-linux-gnu-g++-9 cmake -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -DCMAKE_C_FLAGS="-Wall -Wextra -Werror -mcpu=power8 -DSIMDE_ENABLE_NATIVE_ALIASES -DSIMDE_NATIVE_ALIASES_TESTING" -DCMAKE_CXX_FLAGS="-Wall -Wextra -Werror -mcpu=power8 -DSIMDE_ENABLE_NATIVE_ALIASES -DSIMDE_NATIVE_ALIASES_TESTING" ../ && \ make -j$(nproc) RUN QEMU_LD_PREFIX=/usr/powerpc64le-linux-gnu/ /usr/bin/qemu-ppc64le-static ./run-tests simde-0.7.2/test/docker/Dockerfile.qemu5.s390x000066400000000000000000000021251400333146700207620ustar00rootroot00000000000000FROM debian:unstable-slim RUN apt-get update && apt-get install --no-install-recommends -y \ binfmt-support \ clang \ cmake \ gcc \ gcc-9-s390x-linux-gnu \ g++-9-s390x-linux-gnu \ make \ qemu-user-static COPY . /simde WORKDIR /simde RUN mkdir -p test/build_s390x_clang WORKDIR /simde/test/build_s390x_clang RUN CC=/usr/bin/clang CXX=/usr/bin/clang++ cmake -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -DCMAKE_C_FLAGS="-Wall -Wextra -Werror --target=s390x-linux-gnu -march=z196 -I/usr/s390x-linux-gnu/include -O3" -DCMAKE_CXX_FLAGS="-Wall -Wextra -Werror --target=s390x-linux-gnu -march=z196 -I/usr/s390x-linux-gnu/include -O3" ../ && \ make -j$(nproc) RUN QEMU_LD_PREFIX=/usr/s390x-linux-gnu/ /usr/bin/qemu-s390x-static ./run-tests RUN mkdir -p test/build_s390x_gnu WORKDIR /simde/test/build_s390x_gnu RUN CC=/usr/bin/s390x-linux-gnu-gcc-9 CXX=/usr/bin/s390x-linux-gnu-g++-9 cmake -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -DCMAKE_C_FLAGS="-Wall -Wextra -Werror" -DCMAKE_CXX_FLAGS="-Wall -Wextra -Werror" ../ && \ make -j$(nproc) RUN QEMU_LD_PREFIX=/usr/s390x-linux-gnu/ /usr/bin/qemu-s390x-static ./run-tests simde-0.7.2/test/docker/Dockerfile.s390x000066400000000000000000000021211400333146700177230ustar00rootroot00000000000000FROM debian:bullseye-slim RUN apt-get update && apt-get install --no-install-recommends -y \ binfmt-support \ clang \ cmake \ gcc \ gcc-9-s390x-linux-gnu \ g++-9-s390x-linux-gnu \ make \ qemu-user-static COPY . /simde RUN mkdir -p /simde/test/build_s390x_clang WORKDIR /simde/test/build_s390x_clang RUN CC=/usr/bin/clang CXX=/usr/bin/clang++ cmake -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON \ -DCMAKE_C_FLAGS="-Wall -Wextra -Werror --target=s390x-linux-gnu -march=z196 \ -I/usr/s390x-linux-gnu/include" -DCMAKE_CXX_FLAGS="-Wall -Wextra -Werror \ --target=s390x-linux-gnu -march=z196 -I/usr/s390x-linux-gnu/include" ../ && \ make -j$(nproc) RUN QEMU_LD_PREFIX=/usr/s390x-linux-gnu/ /usr/bin/qemu-s390x-static ./run-tests RUN mkdir -p test/build_s390x_gnu WORKDIR /simde/test/build_s390x_gnu RUN CC=/usr/bin/s390x-linux-gnu-gcc-9 CXX=/usr/bin/s390x-linux-gnu-g++-9 cmake -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -DCMAKE_C_FLAGS="-Wall -Wextra -Werror" -DCMAKE_CXX_FLAGS="-Wall -Wextra -Werror" ../ && \ make -j$(nproc) RUN QEMU_LD_PREFIX=/usr/s390x-linux-gnu/ /usr/bin/qemu-s390x-static ./run-tests simde-0.7.2/test/download-iig.sh000077500000000000000000000005701400333146700165170ustar00rootroot00000000000000#!/bin/sh if [ "x$1" != "x" ]; then OUTFILE="$1" else OUTFILE="iig.xml" fi if [ ! -e "${OUTFILE}" ]; then IIG_VERSION="$(curl -s https://software.intel.com/sites/landingpage/IntrinsicsGuide/ | grep -Po '(?<=intrinsicsguide.min.js\?)([0-9\.]+)')" curl -s "https://software.intel.com/sites/landingpage/IntrinsicsGuide/files/data-${IIG_VERSION}.xml" > "${OUTFILE}"; fi simde-0.7.2/test/download-sde.sh000077500000000000000000000007221400333146700165210ustar00rootroot00000000000000#!/bin/sh FRAGMENT="$(curl -sL 'https://software.intel.com/content/www/us/en/develop/articles/pre-release-license-agreement-for-intel-software-development-emulator-accept-end-user-license-agreement-and-download.html' | \ grep -oP '/content/dam/develop/external/us/en/documents/sde-external-([0-9\.\-]+)-lin.tar.bz2' | head -n1)" if [ ! -e "$1" ]; then mkdir -p "$1" fi curl -sL "https://software.intel.com${FRAGMENT}" | \ tar --strip-components 1 -jxC "$1" simde-0.7.2/test/meson.build000066400000000000000000000034571400333146700157540ustar00rootroot00000000000000cc = meson.get_compiler('c') libm = cc.find_library('m', required : false) simde_c_defs = [] simde_cxx_defs = [] simde_c_args = [] simde_cxx_args = [] simde_deps = [libm] if get_option('sleef').enabled() or meson.get_external_property('sleef', false) sleef = cc.find_library('sleef', required: true) simde_deps += sleef simde_c_defs = ['-DSIMDE_MATH_SLEEF_ENABLE'] simde_cxx_defs = ['-DSIMDE_MATH_SLEEF_ENABLE'] endif c_openmp_simd = false cxx_openmp_simd = false foreach omp_arg : ['-fopenmp-simd', '-qopenmp-simd'] if (not c_openmp_simd) and cc.has_argument(omp_arg) simde_c_args += omp_arg simde_c_defs += '-DSIMDE_ENABLE_OPENMP' c_openmp_simd = true endif if (not cxx_openmp_simd) and cxx.has_argument(omp_arg) simde_cxx_args += omp_arg simde_cxx_defs += '-DSIMDE_ENABLE_OPENMP' cxx_openmp_simd = true endif endforeach simde_native_c_flags = [] if cc.has_argument('-Wpsabi') simde_native_c_flags += '-Wno-psabi' endif # if cc.has_argument('-march=native+simd') # simde_native_c_flags += '-march=native+simd' # elif cc.has_argument('-march=native') # simde_native_c_flags += '-march=native' # endif if cc.get_id() == 'intel' simde_native_c_flags += '-DSIMDE_FAST_MATH' endif simde_native_cxx_flags = [] if cxx.has_argument('-Wpsabi') simde_native_cxx_flags += '-Wno-psabi' endif # if cxx.has_argument('-march=native+simd') # simde_native_cxx_flags += '-march=native+simd' # elif cxx.has_argument('-march=native') # simde_native_cxx_flags += '-march=native' # endif if cxx.get_id() == 'intel' simde_native_cxx_flags += '-DSIMDE_FAST_MATH' endif if not c_openmp_simd simde_deps += dependency('openmp', required: false) endif simde_deps += cc.find_library('m', required: false) simde_include_dir = include_directories('..') subdir('common') subdir('x86') subdir('arm') simde-0.7.2/test/munit/000077500000000000000000000000001400333146700147355ustar00rootroot00000000000000simde-0.7.2/test/native-aliases.sh000077500000000000000000000016551400333146700170540ustar00rootroot00000000000000#!/bin/bash -e # Convert SIMDe test cases to strip the simde_ prefix to test native # aliases. # # Based on a script written by Michael R. Crusoe # # After this is done, you should be able to do something like # # cd path/to/simde/root # mkdir build && cd build # CFLAGS='-DSIMDE_ENABLE_NATIVE_ALIASES -DSIMDE_NATIVE_ALIASES_TESTING' CXXFLAGS='-DSIMDE_ENABLE_NATIVE_ALIASES -DSIMDE_NATIVE_ALIASES_TESTING' meson .. # ninja DIRNAME="$(realpath "$(dirname "${0}")")" cd "${DIRNAME}" # x86 ./download-iig.sh "${DIRNAME}/iig.xml" PATTERN="$(xmllint --xpath '//intrinsic/@name' "${DIRNAME}/iig.xml" | grep -Po '(?<=")[^"]+' | grep -Pv '^(_mm256_cvtsi256_si32|_mm512_loadu_epi.+)$' | xargs printf '%s|' | rev | cut -c 2- | rev)" echo "s/([^_])simde(${PATTERN})/\1\2/g" > pattern ls x86/*.c | xargs -n1 -P$(nproc) sed -i -E -f pattern # NEON perl -p -i -e 's/([^a-zA-Z0-9_])simde_v/$1v/g' arm/neon/*.{c,h} simde-0.7.2/test/run-tests.c000066400000000000000000000012741400333146700157150ustar00rootroot00000000000000#include "run-tests.h" #include "../simde/hedley.h" static MunitSuite suites[] = { #define SIMDE_TEST_DECLARE_SUITE(name) { (char*) "/", NULL, NULL, 1, MUNIT_SUITE_OPTION_NONE }, #include "declare-suites.h" #undef SIMDE_TEST_DECLARE_SUITE { NULL, NULL, NULL, 0, MUNIT_SUITE_OPTION_NONE } }; static MunitSuite suite = { "", NULL, suites, 1, MUNIT_SUITE_OPTION_NONE }; int main(int argc, char* argv[HEDLEY_ARRAY_PARAM(argc + 1)]) { { size_t i = 0; #define SIMDE_TEST_DECLARE_SUITE(name) suites[i++] = *HEDLEY_CONCAT3(simde_tests_, name, _get_suite)(); #include "declare-suites.h" #undef SIMDE_TEST_DECLARE_SUITE } return munit_suite_main(&suite, NULL, argc, argv); } simde-0.7.2/test/run-tests.h000066400000000000000000000002651400333146700157210ustar00rootroot00000000000000#if defined(SIMDE_TESTS_RUN_TESTS_H) #error File already included. #endif #define SIMDE_TESTS_RUN_TESTS_H #include "test.h" #include "arm/run-tests.h" #include "x86/run-tests.h" simde-0.7.2/test/test.h000066400000000000000000001021261400333146700147330ustar00rootroot00000000000000#if !defined(SIMDE_TESTS_H) #define SIMDE_TESTS_H #define SIMDE_NO_CHECK_IMMEDIATE_CONSTANT #include "../simde/simde-common.h" #include #include #include #include #include typedef enum SimdeTestVecPos { SIMDE_TEST_VEC_POS_FIRST = 1, SIMDE_TEST_VEC_POS_MIDDLE = 0, SIMDE_TEST_VEC_POS_LAST = -1 } SimdeTestVecPos; HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_VLA_ SIMDE_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION_ SIMDE_DIAGNOSTIC_DISABLE_PADDED_ SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ #if \ HEDLEY_HAS_BUILTIN(__builtin_abort) || \ HEDLEY_GCC_VERSION_CHECK(3,4,6) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) #define simde_abort() __builtin_abort() #elif defined(SIMDE_HAVE_STDLIB_H) #define simde_abort() abort() #endif #define SIMDE_TEST_ASSERT_CONTINUE 0 #define SIMDE_TEST_ASSERT_TRAP 1 #define SIMDE_TEST_ASSERT_ABORT 2 #if !defined(SIMDE_TEST_ASSERT_FAILURE) #if defined(SIMDE_TEST_BARE) #define SIMDE_TEST_ASSERT_FAILURE SIMDE_TEST_ASSERT_CONTINUE #else #define SIMDE_TEST_ASSERT_FAILURE SIMDE_TEST_ASSERT_ABORT #endif #endif #if !defined(SIMDE_TEST_ASSERT_ABORT) && !defined(SIMDE_TEST_ASSERT_CONTINUE) && !defined(SIMDE_TEST_ASSERT_TRAP) #if defined(SIMDE_TEST_BARE) #define SIMDE_TEST_ASSERT_CONTINUE #else #define SIMDE_TEST_ASSERT_ABORT #endif #endif #if SIMDE_TEST_ASSERT_FAILURE == SIMDE_TEST_ASSERT_ABORT #define SIMDE_TEST_ASSERT_RETURN(value) ((void) 0) #else #define SIMDE_TEST_ASSERT_RETURN(value) return value #endif #if defined(SIMDE_TEST_BARE) #define SIMDE_CODEGEN_FP stderr #else #define SIMDE_CODEGEN_FP stdout #endif #if SIMDE_TEST_ASSERT_FAILURE == 2 HEDLEY_NO_RETURN #endif HEDLEY_PRINTF_FORMAT(1, 2) static void simde_test_debug_printf_(const char* format, ...) { va_list ap; va_start(ap, format); vfprintf(stderr, format, ap); va_end(ap); fflush(stderr); /* Debug trap is great for local development where you can attach a * debugger, but processes exiting with a SIGTRAP seem to be rather * confusing for CI. */ #if SIMDE_TEST_ASSERT_FAILURE == 1 simde_trap(); #elif SIMDE_TEST_ASSERT_FAILURE == 2 simde_abort(); #endif } HEDLEY_PRINTF_FORMAT(3, 4) static void simde_test_codegen_snprintf_(char* str, size_t size, const char* format, ...) { va_list ap; int w; va_start(ap, format); w = vsnprintf(str, size, format, ap); va_end(ap); if (w > HEDLEY_STATIC_CAST(int, size)) { simde_test_debug_printf_("Not enough space to write value (given %zu bytes, need %d bytes)\n", size, w + 1); } } static void simde_test_codegen_f32(size_t buf_len, char buf[HEDLEY_ARRAY_PARAM(buf_len)], simde_float32 value) { if (simde_math_isnan(value)) { simde_test_codegen_snprintf_(buf, buf_len, " SIMDE_MATH_NANF"); } else if (simde_math_isinf(value)) { simde_test_codegen_snprintf_(buf, buf_len, "%5cSIMDE_MATH_INFINITYF", value < 0 ? '-' : ' '); } else { simde_test_codegen_snprintf_(buf, buf_len, "SIMDE_FLOAT32_C(%9.2f)", HEDLEY_STATIC_CAST(double, value)); } } static void simde_test_codegen_f64(size_t buf_len, char buf[HEDLEY_ARRAY_PARAM(buf_len)], simde_float64 value) { if (simde_math_isnan(value)) { simde_test_codegen_snprintf_(buf, buf_len, " SIMDE_MATH_NAN"); } else if (simde_math_isinf(value)) { simde_test_codegen_snprintf_(buf, buf_len, "%7cSIMDE_MATH_INFINITY", value < 0 ? '-' : ' '); } else { simde_test_codegen_snprintf_(buf, buf_len, "SIMDE_FLOAT64_C(%9.2f)", HEDLEY_STATIC_CAST(double, value)); } } static void simde_test_codegen_i8(size_t buf_len, char buf[HEDLEY_ARRAY_PARAM(buf_len)], int8_t value) { if (value == INT8_MIN) { simde_test_codegen_snprintf_(buf, buf_len, " INT8_MIN"); } else if (value == INT8_MAX) { simde_test_codegen_snprintf_(buf, buf_len, " INT8_MAX"); } else { simde_test_codegen_snprintf_(buf, buf_len, "%cINT8_C(%4" PRId8 ")", (value < 0) ? '-' : ' ', HEDLEY_STATIC_CAST(int8_t, (value < 0) ? -value : value)); } } static void simde_test_codegen_i16(size_t buf_len, char buf[HEDLEY_ARRAY_PARAM(buf_len)], int16_t value) { if (value == INT16_MIN) { simde_test_codegen_snprintf_(buf, buf_len, "%15s", "INT16_MIN"); } else if (value == INT16_MAX) { simde_test_codegen_snprintf_(buf, buf_len, "%15s", "INT16_MAX"); } else { simde_test_codegen_snprintf_(buf, buf_len, "%cINT16_C(%6" PRId16 ")", (value < 0) ? '-' : ' ', HEDLEY_STATIC_CAST(int16_t, (value < 0) ? -value : value)); } } static void simde_test_codegen_i32(size_t buf_len, char buf[HEDLEY_ARRAY_PARAM(buf_len)], int32_t value) { if (value == INT32_MIN) { simde_test_codegen_snprintf_(buf, buf_len, "%20s", "INT32_MIN"); } else if (value == INT32_MAX) { simde_test_codegen_snprintf_(buf, buf_len, "%20s", "INT32_MAX"); } else { simde_test_codegen_snprintf_(buf, buf_len, "%cINT32_C(%12" PRId32 ")", (value < 0) ? '-' : ' ', HEDLEY_STATIC_CAST(int32_t, (value < 0) ? -value : value)); } } static void simde_test_codegen_i64(size_t buf_len, char buf[HEDLEY_ARRAY_PARAM(buf_len)], int64_t value) { if (value == INT64_MIN) { simde_test_codegen_snprintf_(buf, buf_len, "%29s", "INT64_MIN"); } else if (value == INT64_MAX) { simde_test_codegen_snprintf_(buf, buf_len, "%29s", "INT64_MAX"); } else { simde_test_codegen_snprintf_(buf, buf_len, "%cINT64_C(%20" PRId64 ")", (value < 0) ? '-' : ' ', HEDLEY_STATIC_CAST(int64_t, (value < 0) ? -value : value)); } } static void simde_test_codegen_u8(size_t buf_len, char buf[HEDLEY_ARRAY_PARAM(buf_len)], uint8_t value) { if (value == UINT8_MAX) { simde_test_codegen_snprintf_(buf, buf_len, " UINT8_MAX"); } else { simde_test_codegen_snprintf_(buf, buf_len, "UINT8_C(%3" PRIu8 ")", value); } } static void simde_test_codegen_u16(size_t buf_len, char buf[HEDLEY_ARRAY_PARAM(buf_len)], uint16_t value) { if (value == UINT16_MAX) { simde_test_codegen_snprintf_(buf, buf_len, "%15s", "UINT16_MAX"); } else { simde_test_codegen_snprintf_(buf, buf_len, "UINT16_C(%5" PRIu16 ")", value); } } static void simde_test_codegen_u32(size_t buf_len, char buf[HEDLEY_ARRAY_PARAM(buf_len)], uint32_t value) { if (value == UINT32_MAX) { simde_test_codegen_snprintf_(buf, buf_len, "%20s", "UINT32_MAX"); } else { simde_test_codegen_snprintf_(buf, buf_len, "UINT32_C(%10" PRIu32 ")", value); } } static void simde_test_codegen_u64(size_t buf_len, char buf[HEDLEY_ARRAY_PARAM(buf_len)], uint64_t value) { if (value == UINT64_MAX) { simde_test_codegen_snprintf_(buf, buf_len, "%29s", "UINT64_MAX"); } else { simde_test_codegen_snprintf_(buf, buf_len, "UINT64_C(%20" PRIu64 ")", value); } } static void simde_test_codegen_write_indent(int indent) { for (int i = 0 ; i < indent ; i++) { fputs(" ", SIMDE_CODEGEN_FP); } } static int simde_test_codegen_rand(void) { /* Single-threaded programs are so nice */ static int is_init = 0; if (HEDLEY_UNLIKELY(!is_init)) { FILE* fp = fopen("/dev/urandom", "r"); if (fp == NULL) fp = fopen("/dev/random", "r"); if (fp != NULL) { unsigned int seed; size_t nread = fread(&seed, sizeof(seed), 1, fp); fclose(fp); if (nread == 1) { srand(seed); is_init = 1; } } if (!is_init) { srand(HEDLEY_STATIC_CAST(unsigned int, time(NULL))); is_init = 1; } } return rand(); } static void simde_test_codegen_random_memory(size_t buf_len, uint8_t buf[HEDLEY_ARRAY_PARAM(buf_len)]) { for (size_t i = 0 ; i < buf_len ; i++) { buf[i] = HEDLEY_STATIC_CAST(uint8_t, simde_test_codegen_rand() & 0xff); } } static simde_float32 simde_test_codegen_random_f32(simde_float32 min, simde_float32 max) { simde_float32 v = (HEDLEY_STATIC_CAST(simde_float32, simde_test_codegen_rand()) / (HEDLEY_STATIC_CAST(simde_float32, RAND_MAX) / (max - min))) + min; return simde_math_roundf(v * SIMDE_FLOAT32_C(100.0)) / SIMDE_FLOAT32_C(100.0); } static simde_float64 simde_test_codegen_random_f64(simde_float64 min, simde_float64 max) { simde_float64 v = (HEDLEY_STATIC_CAST(simde_float64, simde_test_codegen_rand()) / (HEDLEY_STATIC_CAST(simde_float64, RAND_MAX) / (max - min))) + min; return simde_math_round(v * SIMDE_FLOAT64_C(100.0)) / SIMDE_FLOAT64_C(100.0); } typedef enum SimdeTestVecFloatMask { SIMDE_TEST_VEC_FLOAT_DEFAULT = 0, SIMDE_TEST_VEC_FLOAT_PAIR = 1, SIMDE_TEST_VEC_FLOAT_NAN = 2, SIMDE_TEST_VEC_FLOAT_EQUAL = 4, SIMDE_TEST_VEC_FLOAT_ROUND = 8 } #if \ (HEDLEY_HAS_ATTRIBUTE(flag_enum) && !defined(HEDLEY_IBM_VERSION)) && \ (!defined(__cplusplus) || SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0)) __attribute__((__flag_enum__)) #endif SimdeTestVecFloatType; /* This is a bit messy, sorry. And I haven't really tested with * anything greater than 4-element vectors, there is no input * validation, etc. I'm not going to lose any sleep since it's * just a test harness, but you probably shouldn't use this API * directly since there is a good chance it will change. */ static void simde_test_codegen_calc_pair(int pairwise, size_t test_sets, size_t vectors_per_set, size_t elements_per_vector, size_t pos, size_t* a, size_t* b) { (void) test_sets; // <- for validating ranges if (pairwise) { *a = (((pos * 2) + 0) % elements_per_vector) + ((((pos * 2) + 0) / elements_per_vector) * elements_per_vector); *b = (((pos * 2) + 1) % elements_per_vector) + ((((pos * 2) + 1) / elements_per_vector) * elements_per_vector); } else { size_t elements_per_set = elements_per_vector * vectors_per_set; size_t set_num = pos / elements_per_vector; size_t pos_in_set = pos % elements_per_vector; *a = (elements_per_set * set_num) + pos_in_set; *b = *a + elements_per_vector; } } static void simde_test_codegen_float_set_value_(size_t element_size, size_t pos, void* values, simde_float32 f32_val, simde_float64 f64_val) { switch (element_size) { case sizeof(simde_float32): HEDLEY_REINTERPRET_CAST(simde_float32*, values)[pos] = f32_val; break; case sizeof(simde_float64): HEDLEY_REINTERPRET_CAST(simde_float64*, values)[pos] = f64_val; break; } } static void simde_test_codegen_random_vfX_full_( size_t test_sets, size_t vectors_per_set, size_t elements_per_vector, size_t elem_size, void* values, simde_float64 min, simde_float64 max, SimdeTestVecFloatType vec_type) { for (size_t i = 0 ; i < (test_sets * vectors_per_set * elements_per_vector) ; i++) { simde_float64 v = simde_test_codegen_random_f64(min, max); if (vec_type & SIMDE_TEST_VEC_FLOAT_ROUND) { if (simde_test_codegen_rand() & 7) { do { v = HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, v)); if (simde_test_codegen_rand() & 7) v += 0.5; } while (v > max || v < min); } } simde_test_codegen_float_set_value_(elem_size, i, values, HEDLEY_STATIC_CAST(simde_float32, v), v); } int pairwise = !!(vec_type & SIMDE_TEST_VEC_FLOAT_PAIR); size_t pos = 0; size_t a, b; if (vec_type & SIMDE_TEST_VEC_FLOAT_NAN) { simde_test_codegen_calc_pair(pairwise, test_sets, vectors_per_set, elements_per_vector, pos++, &a, &b); simde_test_codegen_float_set_value_(elem_size, a, values, SIMDE_MATH_NANF, SIMDE_MATH_NAN); simde_test_codegen_calc_pair(pairwise, test_sets, vectors_per_set, elements_per_vector, pos++, &a, &b); simde_test_codegen_float_set_value_(elem_size, b, values, SIMDE_MATH_NANF, SIMDE_MATH_NAN); simde_test_codegen_calc_pair(pairwise, test_sets, vectors_per_set, elements_per_vector, pos++, &a, &b); simde_test_codegen_float_set_value_(elem_size, a, values, SIMDE_MATH_NANF, SIMDE_MATH_NAN); simde_test_codegen_float_set_value_(elem_size, b, values, SIMDE_MATH_NANF, SIMDE_MATH_NAN); } if (vec_type & SIMDE_TEST_VEC_FLOAT_EQUAL) { simde_test_codegen_calc_pair(pairwise, test_sets, vectors_per_set, elements_per_vector, pos++, &a, &b); simde_float64 v = simde_test_codegen_random_f64(min, max); simde_test_codegen_float_set_value_(elem_size, a, values, HEDLEY_STATIC_CAST(simde_float32, v), v); simde_test_codegen_float_set_value_(elem_size, b, values, HEDLEY_STATIC_CAST(simde_float32, v), v); } } static void simde_test_codegen_random_vf32_full( size_t test_sets, size_t vectors_per_set, size_t elements_per_vector, simde_float32 values[HEDLEY_ARRAY_PARAM(test_sets * vectors_per_set * elements_per_vector)], simde_float32 min, simde_float32 max, SimdeTestVecFloatType vec_type) { simde_test_codegen_random_vfX_full_(test_sets, vectors_per_set, elements_per_vector, sizeof(simde_float32), values, HEDLEY_STATIC_CAST(simde_float64, min), HEDLEY_STATIC_CAST(simde_float64, max), vec_type); } static void simde_test_codegen_random_vf64_full( size_t test_sets, size_t vectors_per_set, size_t elements_per_vector, simde_float64 values[HEDLEY_ARRAY_PARAM(test_sets * vectors_per_set * elements_per_vector)], simde_float64 min, simde_float64 max, SimdeTestVecFloatType vec_type) { simde_test_codegen_random_vfX_full_(test_sets, vectors_per_set, elements_per_vector, sizeof(simde_float64), values, min, max, vec_type); } static void simde_test_codegen_random_vf32(size_t elem_count, simde_float32 values[HEDLEY_ARRAY_PARAM(elem_count)], simde_float32 min, simde_float32 max) { for (size_t i = 0 ; i < elem_count ; i++) { values[i] = simde_test_codegen_random_f32(min, max); } } static void simde_test_codegen_random_vf64(size_t elem_count, simde_float64 values[HEDLEY_ARRAY_PARAM(elem_count)], simde_float64 min, simde_float64 max) { for (size_t i = 0 ; i < elem_count ; i++) { values[i] = simde_test_codegen_random_f64(min, max); } } #define SIMDE_TEST_CODEGEN_GENERATE_RANDOM_INT_FUNC_(T, symbol_identifier) \ static T simde_test_codegen_random_##symbol_identifier(void) { \ T r; \ simde_test_codegen_random_memory(sizeof(r), HEDLEY_REINTERPRET_CAST(uint8_t*, &r)); \ return r; \ } SIMDE_TEST_CODEGEN_GENERATE_RANDOM_INT_FUNC_(int8_t, i8) SIMDE_TEST_CODEGEN_GENERATE_RANDOM_INT_FUNC_(int16_t, i16) SIMDE_TEST_CODEGEN_GENERATE_RANDOM_INT_FUNC_(int32_t, i32) SIMDE_TEST_CODEGEN_GENERATE_RANDOM_INT_FUNC_(int64_t, i64) SIMDE_TEST_CODEGEN_GENERATE_RANDOM_INT_FUNC_(uint8_t, u8) SIMDE_TEST_CODEGEN_GENERATE_RANDOM_INT_FUNC_(uint16_t, u16) SIMDE_TEST_CODEGEN_GENERATE_RANDOM_INT_FUNC_(uint32_t, u32) SIMDE_TEST_CODEGEN_GENERATE_RANDOM_INT_FUNC_(uint64_t, u64) #define SIMDE_TEST_CODEGEN_GENERATE_WRITE_VECTOR_FUNC_(T, symbol_identifier, elements_per_line) \ static void \ simde_test_codegen_write_v##symbol_identifier(int indent, size_t elem_count, T values[HEDLEY_ARRAY_PARAM(elem_count)], SimdeTestVecPos pos) { \ switch (pos) { \ case SIMDE_TEST_VEC_POS_FIRST: \ simde_test_codegen_write_indent(indent); \ indent++; \ fputs("{ ", SIMDE_CODEGEN_FP); \ break; \ case SIMDE_TEST_VEC_POS_MIDDLE: \ case SIMDE_TEST_VEC_POS_LAST: \ indent++; \ simde_test_codegen_write_indent(indent); \ break; \ } \ \ fputs("{ ", SIMDE_CODEGEN_FP); \ for (size_t i = 0 ; i < elem_count ; i++) { \ if (i != 0) { \ fputc(',', SIMDE_CODEGEN_FP); \ if ((i % elements_per_line) == 0) { \ fputc('\n', SIMDE_CODEGEN_FP); \ simde_test_codegen_write_indent(indent + 1); \ } else { \ fputc(' ', SIMDE_CODEGEN_FP); \ } \ } \ \ char buf[53]; \ simde_test_codegen_##symbol_identifier(sizeof(buf), buf, values[i]); \ fputs(buf, SIMDE_CODEGEN_FP); \ } \ fputs(" }", SIMDE_CODEGEN_FP); \ \ switch (pos) { \ case SIMDE_TEST_VEC_POS_FIRST: \ case SIMDE_TEST_VEC_POS_MIDDLE: \ fputc(',', SIMDE_CODEGEN_FP); \ break; \ case SIMDE_TEST_VEC_POS_LAST: \ fputs(" },", SIMDE_CODEGEN_FP); \ break; \ } \ \ fputc('\n', SIMDE_CODEGEN_FP); \ } SIMDE_TEST_CODEGEN_GENERATE_WRITE_VECTOR_FUNC_(simde_float32, f32, 4) SIMDE_TEST_CODEGEN_GENERATE_WRITE_VECTOR_FUNC_(simde_float64, f64, 4) SIMDE_TEST_CODEGEN_GENERATE_WRITE_VECTOR_FUNC_(int8_t, i8, 8) SIMDE_TEST_CODEGEN_GENERATE_WRITE_VECTOR_FUNC_(int16_t, i16, 8) SIMDE_TEST_CODEGEN_GENERATE_WRITE_VECTOR_FUNC_(int32_t, i32, 8) SIMDE_TEST_CODEGEN_GENERATE_WRITE_VECTOR_FUNC_(int64_t, i64, 4) SIMDE_TEST_CODEGEN_GENERATE_WRITE_VECTOR_FUNC_(uint8_t, u8, 8) SIMDE_TEST_CODEGEN_GENERATE_WRITE_VECTOR_FUNC_(uint16_t, u16, 8) SIMDE_TEST_CODEGEN_GENERATE_WRITE_VECTOR_FUNC_(uint32_t, u32, 8) SIMDE_TEST_CODEGEN_GENERATE_WRITE_VECTOR_FUNC_(uint64_t, u64, 4) #define SIMDE_TEST_CODEGEN_WRITE_SCALAR_FUNC_(T, symbol_identifier) \ static void \ simde_test_codegen_write_##symbol_identifier(int indent, T value, SimdeTestVecPos pos) { \ switch (pos) { \ case SIMDE_TEST_VEC_POS_FIRST: \ simde_test_codegen_write_indent(indent); \ indent++; \ fputs("{ ", SIMDE_CODEGEN_FP); \ break; \ case SIMDE_TEST_VEC_POS_MIDDLE: \ case SIMDE_TEST_VEC_POS_LAST: \ indent++; \ simde_test_codegen_write_indent(indent); \ break; \ } \ \ { \ char buf[53]; \ simde_test_codegen_##symbol_identifier(sizeof(buf), buf, value); \ fputs(buf, SIMDE_CODEGEN_FP); \ } \ \ switch (pos) { \ case SIMDE_TEST_VEC_POS_FIRST: \ case SIMDE_TEST_VEC_POS_MIDDLE: \ fputc(',', SIMDE_CODEGEN_FP); \ break; \ case SIMDE_TEST_VEC_POS_LAST: \ fputs(" },", SIMDE_CODEGEN_FP); \ break; \ } \ \ fputc('\n', SIMDE_CODEGEN_FP); \ } SIMDE_TEST_CODEGEN_WRITE_SCALAR_FUNC_(int8_t, i8) SIMDE_TEST_CODEGEN_WRITE_SCALAR_FUNC_(int16_t, i16) SIMDE_TEST_CODEGEN_WRITE_SCALAR_FUNC_(int32_t, i32) SIMDE_TEST_CODEGEN_WRITE_SCALAR_FUNC_(int64_t, i64) SIMDE_TEST_CODEGEN_WRITE_SCALAR_FUNC_(uint8_t, u8) SIMDE_TEST_CODEGEN_WRITE_SCALAR_FUNC_(uint16_t, u16) SIMDE_TEST_CODEGEN_WRITE_SCALAR_FUNC_(uint32_t, u32) SIMDE_TEST_CODEGEN_WRITE_SCALAR_FUNC_(uint64_t, u64) SIMDE_TEST_CODEGEN_WRITE_SCALAR_FUNC_(simde_float32, f32) SIMDE_TEST_CODEGEN_WRITE_SCALAR_FUNC_(simde_float64, f64) HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ static int simde_test_equal_f32(simde_float32 a, simde_float32 b, simde_float32 slop) { if (simde_math_isnan(a)) { return simde_math_isnan(b); } else if (simde_math_isinf(a)) { return !((a < b) || (a > b)); } else { simde_float32 lo = a - slop; if (HEDLEY_UNLIKELY(lo == a)) lo = simde_math_nextafterf(a, -SIMDE_MATH_INFINITYF); simde_float32 hi = a + slop; if (HEDLEY_UNLIKELY(hi == a)) hi = simde_math_nextafterf(a, SIMDE_MATH_INFINITYF); return ((b >= lo) && (b <= hi)); } } static int simde_test_equal_f64(simde_float64 a, simde_float64 b, simde_float64 slop) { if (simde_math_isnan(a)) { return simde_math_isnan(b); } else if (simde_math_isinf(a)) { return !((a < b) || (a > b)); } else { simde_float64 lo = a - slop; if (HEDLEY_UNLIKELY(lo == a)) lo = simde_math_nextafter(a, -SIMDE_MATH_INFINITY); simde_float64 hi = a + slop; if (HEDLEY_UNLIKELY(hi == a)) hi = simde_math_nextafter(a, SIMDE_MATH_INFINITY); return ((b >= lo) && (b <= hi)); } } HEDLEY_DIAGNOSTIC_POP static float simde_test_f32_precision_to_slop(int precision) { return simde_math_powf(SIMDE_FLOAT32_C(10.0), -HEDLEY_STATIC_CAST(float, precision)); } static double simde_test_f64_precision_to_slop(int precision) { return simde_math_pow(SIMDE_FLOAT64_C(10.0), -HEDLEY_STATIC_CAST(double, precision)); } static int simde_assert_equal_vf32_( size_t vec_len, simde_float32 const a[HEDLEY_ARRAY_PARAM(vec_len)], simde_float32 const b[HEDLEY_ARRAY_PARAM(vec_len)], simde_float32 slop, const char* filename, int line, const char* astr, const char* bstr) { for (size_t i = 0 ; i < vec_len ; i++) { if (HEDLEY_UNLIKELY(!simde_test_equal_f32(a[i], b[i], slop))) { simde_test_debug_printf_("%s:%d: assertion failed: %s[%zu] ~= %s[%zu] (%f ~= %f)\n", filename, line, astr, i, bstr, i, HEDLEY_STATIC_CAST(double, a[i]), HEDLEY_STATIC_CAST(double, b[i])); SIMDE_TEST_ASSERT_RETURN(1); } } return 0; } #define simde_assert_equal_vf32(vec_len, a, b, precision) simde_assert_equal_vf32_(vec_len, a, b, simde_test_f32_precision_to_slop(precision), __FILE__, __LINE__, #a, #b) static int simde_assert_equal_f32_(simde_float32 a, simde_float32 b, simde_float32 slop, const char* filename, int line, const char* astr, const char* bstr) { if (HEDLEY_UNLIKELY(!simde_test_equal_f32(a, b, slop))) { simde_test_debug_printf_("%s:%d: assertion failed: %s ~= %s (%f ~= %f)\n", filename, line, astr, bstr, HEDLEY_STATIC_CAST(double, a), HEDLEY_STATIC_CAST(double, b)); SIMDE_TEST_ASSERT_RETURN(1); } return 0; } #define simde_assert_equal_f32(a, b, precision) simde_assert_equal_f32_(a, b, simde_test_f32_precision_to_slop(precision), __FILE__, __LINE__, #a, #b) static int simde_assert_equal_vf64_( size_t vec_len, simde_float64 const a[HEDLEY_ARRAY_PARAM(vec_len)], simde_float64 const b[HEDLEY_ARRAY_PARAM(vec_len)], simde_float64 slop, const char* filename, int line, const char* astr, const char* bstr) { for (size_t i = 0 ; i < vec_len ; i++) { if (HEDLEY_UNLIKELY(!simde_test_equal_f64(a[i], b[i], slop))) { simde_test_debug_printf_("%s:%d: assertion failed: %s[%zu] ~= %s[%zu] (%f ~= %f)\n", filename, line, astr, i, bstr, i, HEDLEY_STATIC_CAST(double, a[i]), HEDLEY_STATIC_CAST(double, b[i])); SIMDE_TEST_ASSERT_RETURN(1); } } return 0; } #define simde_assert_equal_vf64(vec_len, a, b, precision) simde_assert_equal_vf64_(vec_len, a, b, simde_test_f64_precision_to_slop(precision), __FILE__, __LINE__, #a, #b) static int simde_assert_equal_f64_(simde_float64 a, simde_float64 b, simde_float64 slop, const char* filename, int line, const char* astr, const char* bstr) { if (HEDLEY_UNLIKELY(!simde_test_equal_f64(a, b, slop))) { simde_test_debug_printf_("%s:%d: assertion failed: %s ~= %s (%f ~= %f)\n", filename, line, astr, bstr, a, b); SIMDE_TEST_ASSERT_RETURN(1); } return 0; } #define simde_assert_equal_f64(a, b, precision) simde_assert_equal_f64_(a, b, simde_test_f64_precision_to_slop(precision), __FILE__, __LINE__, #a, #b) #define SIMDE_TEST_GENERATE_ASSERT_EQUAL_FUNC_(T, symbol_identifier, fmt) \ static int \ simde_assert_equal_v##symbol_identifier##_( \ size_t vec_len, const T a[HEDLEY_ARRAY_PARAM(vec_len)], const T b[HEDLEY_ARRAY_PARAM(vec_len)], \ const char* filename, int line, const char* astr, const char* bstr) { \ for (size_t i = 0 ; i < vec_len ; i++) { \ if (HEDLEY_UNLIKELY(a[i] != b[i])) { \ simde_test_debug_printf_("%s:%d: assertion failed: %s[%zu] == %s[%zu] (%" fmt " == %" fmt ")\n", \ filename, line, astr, i, bstr, i, a[i], b[i]); \ SIMDE_TEST_ASSERT_RETURN(1); \ } \ } \ return 0; \ } \ \ static int \ simde_assert_equal_##symbol_identifier##_(T a, T b, \ const char* filename, int line, const char* astr, const char* bstr) { \ if (HEDLEY_UNLIKELY(a != b)) { \ simde_test_debug_printf_("%s:%d: assertion failed: %s == %s (%" fmt " == %" fmt ")\n", \ filename, line, astr, bstr, a, b); \ SIMDE_TEST_ASSERT_RETURN(1); \ } \ return 0; \ } \ \ static int \ simde_assert_close_v##symbol_identifier##_( \ size_t vec_len, const T a[HEDLEY_ARRAY_PARAM(vec_len)], const T b[HEDLEY_ARRAY_PARAM(vec_len)], const T slop, \ const char* filename, int line, const char* astr, const char* bstr) { \ for (size_t i = 0 ; i < vec_len ; i++) { \ if (((a[i] + slop) < b[i]) || ((a[i] - slop) > b[i])) { \ simde_test_debug_printf_("%s:%d: assertion failed: %s[%zu] == %s[%zu] (%" fmt " == %" fmt ")\n", \ filename, line, astr, i, bstr, i, a[i], b[i]); \ SIMDE_TEST_ASSERT_RETURN(1); \ } \ } \ return 0; \ } \ \ static int \ simde_assert_close_##symbol_identifier##_(T a, T b, T slop, \ const char* filename, int line, const char* astr, const char* bstr) { \ if (((a + slop) < b) || ((a - slop) > b)) { \ simde_test_debug_printf_("%s:%d: assertion failed: %s == %s +/- %" fmt " (%" fmt " == %" fmt ")\n", \ filename, line, astr, bstr, slop, a, b); \ SIMDE_TEST_ASSERT_RETURN(1); \ } \ return 0; \ } static int simde_assert_equal_i_(int a, int b, const char* filename, int line, const char* astr, const char* bstr) { if (HEDLEY_UNLIKELY(a != b)) { simde_test_debug_printf_("%s:%d: assertion failed: %s == %s (%d == %d)\n", filename, line, astr, bstr, a, b); SIMDE_TEST_ASSERT_RETURN(1); } return 0; } SIMDE_TEST_GENERATE_ASSERT_EQUAL_FUNC_(int8_t, i8, PRId8) SIMDE_TEST_GENERATE_ASSERT_EQUAL_FUNC_(int16_t, i16, PRId16) SIMDE_TEST_GENERATE_ASSERT_EQUAL_FUNC_(int32_t, i32, PRId32) SIMDE_TEST_GENERATE_ASSERT_EQUAL_FUNC_(int64_t, i64, PRId64) SIMDE_TEST_GENERATE_ASSERT_EQUAL_FUNC_(uint8_t, u8, PRIu8) SIMDE_TEST_GENERATE_ASSERT_EQUAL_FUNC_(uint16_t, u16, PRIu16) SIMDE_TEST_GENERATE_ASSERT_EQUAL_FUNC_(uint32_t, u32, PRIu32) SIMDE_TEST_GENERATE_ASSERT_EQUAL_FUNC_(uint64_t, u64, PRIu64) #define simde_assert_equal_vi8(vec_len, a, b) do { if (simde_assert_equal_vi8_(vec_len, a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_equal_vi16(vec_len, a, b) do { if (simde_assert_equal_vi16_(vec_len, a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_equal_vi32(vec_len, a, b) do { if (simde_assert_equal_vi32_(vec_len, a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_equal_vi64(vec_len, a, b) do { if (simde_assert_equal_vi64_(vec_len, a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_equal_vu8(vec_len, a, b) do { if (simde_assert_equal_vu8_(vec_len, a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_equal_vu16(vec_len, a, b) do { if (simde_assert_equal_vu16_(vec_len, a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_equal_vu32(vec_len, a, b) do { if (simde_assert_equal_vu32_(vec_len, a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_equal_vu64(vec_len, a, b) do { if (simde_assert_equal_vu64_(vec_len, a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_equal_i8(a, b) do { if (simde_assert_equal_i8_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_equal_i16(a, b) do { if (simde_assert_equal_i16_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_equal_i32(a, b) do { if (simde_assert_equal_i32_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_equal_i64(a, b) do { if (simde_assert_equal_i64_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_equal_u8(a, b) do { if (simde_assert_equal_u8_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_equal_u16(a, b) do { if (simde_assert_equal_u16_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_equal_u32(a, b) do { if (simde_assert_equal_u32_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_equal_u64(a, b) do { if (simde_assert_equal_u64_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_equal_i(a, b) do { if (simde_assert_equal_i_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_close_vi8(vec_len, a, b, slop) do { if (simde_assert_close_vi8_(vec_len, a, b, slop, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_close_vi16(vec_len, a, b, slop) do { if (simde_assert_close_vi16_(vec_len, a, b, slop, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_close_vi32(vec_len, a, b, slop) do { if (simde_assert_close_vi32_(vec_len, a, b, slop, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_close_vi64(vec_len, a, b, slop) do { if (simde_assert_close_vi64_(vec_len, a, b, slop, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_close_vu8(vec_len, a, b, slop) do { if (simde_assert_close_vu8_(vec_len, a, b, slop, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_close_vu16(vec_len, a, b, slop) do { if (simde_assert_close_vu16_(vec_len, a, b, slop, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_close_vu32(vec_len, a, b, slop) do { if (simde_assert_close_vu32_(vec_len, a, b, slop, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_close_vu64(vec_len, a, b, slop) do { if (simde_assert_close_vu64_(vec_len, a, b, slop, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_close_i8(a, b, slop) do { if (simde_assert_close_i8_(a, b, slop, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_close_i16(a, b, slop) do { if (simde_assert_close_i16_(a, b, slop, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_close_i32(a, b, slop) do { if (simde_assert_close_i32_(a, b, slop, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_close_i64(a, b, slop) do { if (simde_assert_close_i64_(a, b, slop, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_close_u8(a, b, slop) do { if (simde_assert_close_u8_(a, b, slop, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_close_u16(a, b, slop) do { if (simde_assert_close_u16_(a, b, slop, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_close_u32(a, b, slop) do { if (simde_assert_close_u32_(a, b, slop, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_close_u64(a, b, slop) do { if (simde_assert_close_u64_(a, b, slop, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_assert_close_i(a, b, slop) do { if (simde_assert_close_i_(a, b, slop, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) /* Since each test is compiled in 4 different versions (C/C++ and * native/emul), we need to be able to generate different symbols * depending on preprocessor macros. */ #if defined(SIMDE_NO_NATIVE) #if defined(__cplusplus) #define SIMDE_TEST_GENERATE_VARIANT_SYMBOL_CURRENT(name) HEDLEY_CONCAT(name,_emul_cpp) #define SIMDE_TEST_GENERATE_VARIANT_NAME_CURRENT(name) #name "/emul/cpp" #else #define SIMDE_TEST_GENERATE_VARIANT_SYMBOL_CURRENT(name) HEDLEY_CONCAT(name,_emul_c) #define SIMDE_TEST_GENERATE_VARIANT_NAME_CURRENT(name) #name "/emul/c" #endif #else #if defined(__cplusplus) #define SIMDE_TEST_GENERATE_VARIANT_SYMBOL_CURRENT(name) HEDLEY_CONCAT(name,_native_cpp) #define SIMDE_TEST_GENERATE_VARIANT_NAME_CURRENT(name) #name "/native/cpp" #else #define SIMDE_TEST_GENERATE_VARIANT_SYMBOL_CURRENT(name) HEDLEY_CONCAT(name,_native_c) #define SIMDE_TEST_GENERATE_VARIANT_NAME_CURRENT(name) #name "/native/c" #endif #endif /* The bare version basically assumes you just want to run a single * test suite. It doesn't use munit, or any other dependencies so * it's easy to use with creduce. */ #if defined(SIMDE_TEST_BARE) #define SIMDE_TEST_FUNC_LIST_BEGIN static const struct { int (* func)(void); const char* name; } test_suite_tests[] = { #define SIMDE_TEST_FUNC_LIST_ENTRY(name) { test_simde_##name, #name }, #define SIMDE_TEST_FUNC_LIST_END }; #define SIMDE_MUNIT_TEST_ARGS void #else HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ #include "munit/munit.h" HEDLEY_DIAGNOSTIC_POP #if \ HEDLEY_HAS_ATTRIBUTE(unused) || \ HEDLEY_GCC_VERSION_CHECK(3,1,0) #define SIMDE_MUNIT_TEST_ARGS __attribute__((__unused__)) const MunitParameter params[], __attribute__((__unused__)) void* data #else /* Compilers other than emscripten are fine with casting away * arguments. */ #define SIMDE_MUNIT_TEST_ARGS void #endif #define SIMDE_TEST_FUNC_LIST_BEGIN static MunitTest test_suite_tests[] = { #if defined(__cplusplus) #define SIMDE_TEST_FUNC_LIST_ENTRY(name) { \ const_cast("/" SIMDE_TEST_GENERATE_VARIANT_NAME_CURRENT(name)), \ HEDLEY_REINTERPRET_CAST(MunitTestFunc, test_simde_##name), \ NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }, #else #define SIMDE_TEST_FUNC_LIST_ENTRY(name) { \ (char*) "/" SIMDE_TEST_GENERATE_VARIANT_NAME_CURRENT(name), \ HEDLEY_REINTERPRET_CAST(MunitTestFunc, test_simde_##name), \ NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }, #endif #define SIMDE_TEST_FUNC_LIST_END { NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL } }; #define SIMDE_TEST_SUITE_DECLARE_GETTERS(name) \ HEDLEY_C_DECL MunitSuite* HEDLEY_CONCAT(name, _native_c)(void); \ HEDLEY_C_DECL MunitSuite* HEDLEY_CONCAT(name, _emul_c)(void); \ HEDLEY_C_DECL MunitSuite* HEDLEY_CONCAT(name, _native_cpp)(void); \ HEDLEY_C_DECL MunitSuite* HEDLEY_CONCAT(name, _emul_cpp)(void); #endif #endif /* !defined(SIMDE_TESTS_H) */ simde-0.7.2/test/x86/000077500000000000000000000000001400333146700142265ustar00rootroot00000000000000simde-0.7.2/test/x86/avx.c000066400000000000000000036170611400333146700152060ustar00rootroot00000000000000/* Copyright (c) 2018, 2019 Evan Nemerson * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define SIMDE_TESTS_CURRENT_ISAX avx #include #include static simde_float32 u32_to_f32(uint32_t u32) { simde_float32 f32; simde_memcpy(&f32, &u32, sizeof(f32)); return f32; } static simde_float64 u64_to_f64(uint64_t u64) { simde_float64 f64; simde_memcpy(&f64, &u64, sizeof(f64)); return f64; } #define SIMDE_F32_ALL_SET (u32_to_f32(~UINT32_C(0))) #define SIMDE_F64_ALL_SET (u64_to_f64(~UINT64_C(0))) static int test_simde_mm256_set_epi8(SIMDE_MUNIT_TEST_ARGS) { for (size_t i = 0 ; i < 32 ; i++) { int8_t a[32]; simde__m256i r; simde_test_codegen_random_memory(sizeof(a), HEDLEY_REINTERPRET_CAST(uint8_t*, a)); r = simde_mm256_set_epi8(a[31], a[30], a[29], a[28], a[27], a[26], a[25], a[24], a[23], a[22], a[21], a[20], a[19], a[18], a[17], a[16], a[15], a[14], a[13], a[12], a[11], a[10], a[ 9], a[ 8], a[ 7], a[ 6], a[ 5], a[ 4], a[ 3], a[ 2], a[ 1], a[ 0]); simde_test_x86_assert_equal_i8x32(simde_x_mm256_loadu_epi8(a), r); } return 0; } static int test_simde_mm256_set_epi16(SIMDE_MUNIT_TEST_ARGS) { for (size_t i = 0 ; i < 16 ; i++) { int16_t a[16]; simde__m256i r; simde_test_codegen_random_memory(sizeof(a), HEDLEY_REINTERPRET_CAST(uint8_t*, a)); r = simde_mm256_set_epi16(a[15], a[14], a[13], a[12], a[11], a[10], a[ 9], a[ 8], a[ 7], a[ 6], a[ 5], a[ 4], a[ 3], a[ 2], a[ 1], a[ 0]); simde_test_x86_assert_equal_i16x16(simde_x_mm256_loadu_epi16(a), r); } return 0; } static int test_simde_mm256_set_epi32(SIMDE_MUNIT_TEST_ARGS) { for (size_t i = 0 ; i < 32 ; i++) { int32_t a[8]; simde__m256i r; simde_test_codegen_random_memory(sizeof(a), HEDLEY_REINTERPRET_CAST(uint8_t*, a)); r = simde_mm256_set_epi32(a[7], a[6], a[5], a[4], a[3], a[2], a[1], a[0]); simde_test_x86_assert_equal_i32x8(simde_x_mm256_loadu_epi32(a), r); } return 0; } static int test_simde_mm256_set_epi64x(SIMDE_MUNIT_TEST_ARGS) { for (size_t i = 0 ; i < 64 ; i++) { int64_t a[4]; simde__m256i r; simde_test_codegen_random_memory(sizeof(a), HEDLEY_REINTERPRET_CAST(uint8_t*, a)); r = simde_mm256_set_epi64x(a[3], a[2], a[1], a[0]); simde_test_x86_assert_equal_i64x4(simde_x_mm256_loadu_epi64(a), r); } return 0; } static int test_simde_mm256_set_ps(SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[sizeof(simde__m256) / sizeof(simde_float32)]; simde_float32 r[sizeof(simde__m256) / sizeof(simde_float32)]; } test_vec[] = { { { SIMDE_FLOAT32_C( -114.94), SIMDE_FLOAT32_C( -844.95), SIMDE_FLOAT32_C( 616.69), SIMDE_FLOAT32_C( -717.57), SIMDE_FLOAT32_C( 321.75), SIMDE_FLOAT32_C( 709.09), SIMDE_FLOAT32_C( -540.81), SIMDE_FLOAT32_C( 218.53) }, { SIMDE_FLOAT32_C( -114.94), SIMDE_FLOAT32_C( -844.95), SIMDE_FLOAT32_C( 616.69), SIMDE_FLOAT32_C( -717.57), SIMDE_FLOAT32_C( 321.75), SIMDE_FLOAT32_C( 709.09), SIMDE_FLOAT32_C( -540.81), SIMDE_FLOAT32_C( 218.53) } }, { { SIMDE_FLOAT32_C( -651.16), SIMDE_FLOAT32_C( -438.85), SIMDE_FLOAT32_C( 9.87), SIMDE_FLOAT32_C( 429.00), SIMDE_FLOAT32_C( 129.97), SIMDE_FLOAT32_C( 774.42), SIMDE_FLOAT32_C( -864.41), SIMDE_FLOAT32_C( -659.36) }, { SIMDE_FLOAT32_C( -651.16), SIMDE_FLOAT32_C( -438.85), SIMDE_FLOAT32_C( 9.87), SIMDE_FLOAT32_C( 429.00), SIMDE_FLOAT32_C( 129.97), SIMDE_FLOAT32_C( 774.42), SIMDE_FLOAT32_C( -864.41), SIMDE_FLOAT32_C( -659.36) } }, { { SIMDE_FLOAT32_C( 454.07), SIMDE_FLOAT32_C( -313.38), SIMDE_FLOAT32_C( 658.78), SIMDE_FLOAT32_C( 553.99), SIMDE_FLOAT32_C( 841.57), SIMDE_FLOAT32_C( -926.85), SIMDE_FLOAT32_C( -371.71), SIMDE_FLOAT32_C( -519.83) }, { SIMDE_FLOAT32_C( 454.07), SIMDE_FLOAT32_C( -313.38), SIMDE_FLOAT32_C( 658.78), SIMDE_FLOAT32_C( 553.99), SIMDE_FLOAT32_C( 841.57), SIMDE_FLOAT32_C( -926.85), SIMDE_FLOAT32_C( -371.71), SIMDE_FLOAT32_C( -519.83) } }, { { SIMDE_FLOAT32_C( -569.24), SIMDE_FLOAT32_C( -886.28), SIMDE_FLOAT32_C( 502.22), SIMDE_FLOAT32_C( 647.50), SIMDE_FLOAT32_C( 962.06), SIMDE_FLOAT32_C( 396.40), SIMDE_FLOAT32_C( 624.85), SIMDE_FLOAT32_C( -152.88) }, { SIMDE_FLOAT32_C( -569.24), SIMDE_FLOAT32_C( -886.28), SIMDE_FLOAT32_C( 502.22), SIMDE_FLOAT32_C( 647.50), SIMDE_FLOAT32_C( 962.06), SIMDE_FLOAT32_C( 396.40), SIMDE_FLOAT32_C( 624.85), SIMDE_FLOAT32_C( -152.88) } }, { { SIMDE_FLOAT32_C( 551.45), SIMDE_FLOAT32_C( 241.55), SIMDE_FLOAT32_C( 129.54), SIMDE_FLOAT32_C( -126.80), SIMDE_FLOAT32_C( -49.37), SIMDE_FLOAT32_C( 588.74), SIMDE_FLOAT32_C( -908.27), SIMDE_FLOAT32_C( 299.47) }, { SIMDE_FLOAT32_C( 551.45), SIMDE_FLOAT32_C( 241.55), SIMDE_FLOAT32_C( 129.54), SIMDE_FLOAT32_C( -126.80), SIMDE_FLOAT32_C( -49.37), SIMDE_FLOAT32_C( 588.74), SIMDE_FLOAT32_C( -908.27), SIMDE_FLOAT32_C( 299.47) } }, { { SIMDE_FLOAT32_C( -850.12), SIMDE_FLOAT32_C( 101.60), SIMDE_FLOAT32_C( -271.53), SIMDE_FLOAT32_C( 279.86), SIMDE_FLOAT32_C( -123.98), SIMDE_FLOAT32_C( -135.94), SIMDE_FLOAT32_C( 620.49), SIMDE_FLOAT32_C( -669.90) }, { SIMDE_FLOAT32_C( -850.12), SIMDE_FLOAT32_C( 101.60), SIMDE_FLOAT32_C( -271.53), SIMDE_FLOAT32_C( 279.86), SIMDE_FLOAT32_C( -123.98), SIMDE_FLOAT32_C( -135.94), SIMDE_FLOAT32_C( 620.49), SIMDE_FLOAT32_C( -669.90) } }, { { SIMDE_FLOAT32_C( 550.68), SIMDE_FLOAT32_C( 279.27), SIMDE_FLOAT32_C( 884.09), SIMDE_FLOAT32_C( 392.25), SIMDE_FLOAT32_C( 352.43), SIMDE_FLOAT32_C( -487.62), SIMDE_FLOAT32_C( 872.43), SIMDE_FLOAT32_C( 783.18) }, { SIMDE_FLOAT32_C( 550.68), SIMDE_FLOAT32_C( 279.27), SIMDE_FLOAT32_C( 884.09), SIMDE_FLOAT32_C( 392.25), SIMDE_FLOAT32_C( 352.43), SIMDE_FLOAT32_C( -487.62), SIMDE_FLOAT32_C( 872.43), SIMDE_FLOAT32_C( 783.18) } }, { { SIMDE_FLOAT32_C( -373.91), SIMDE_FLOAT32_C( 374.65), SIMDE_FLOAT32_C( 430.69), SIMDE_FLOAT32_C( -411.85), SIMDE_FLOAT32_C( -228.96), SIMDE_FLOAT32_C( 55.54), SIMDE_FLOAT32_C( 435.27), SIMDE_FLOAT32_C( -677.51) }, { SIMDE_FLOAT32_C( -373.91), SIMDE_FLOAT32_C( 374.65), SIMDE_FLOAT32_C( 430.69), SIMDE_FLOAT32_C( -411.85), SIMDE_FLOAT32_C( -228.96), SIMDE_FLOAT32_C( 55.54), SIMDE_FLOAT32_C( 435.27), SIMDE_FLOAT32_C( -677.51) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32* a = test_vec[i].a; simde__m256 r = simde_mm256_set_ps(a[7], a[6], a[5], a[4], a[3], a[2], a[1], a[0]); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_set_pd(SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[sizeof(simde__m256) / sizeof(simde_float64)]; simde_float64 r[sizeof(simde__m256) / sizeof(simde_float64)]; } test_vec[] = { { { SIMDE_FLOAT64_C( 237.07), SIMDE_FLOAT64_C( -691.18), SIMDE_FLOAT64_C( -606.39), SIMDE_FLOAT64_C( 84.18) }, { SIMDE_FLOAT64_C( 237.07), SIMDE_FLOAT64_C( -691.18), SIMDE_FLOAT64_C( -606.39), SIMDE_FLOAT64_C( 84.18) } }, { { SIMDE_FLOAT64_C( 106.38), SIMDE_FLOAT64_C( -554.05), SIMDE_FLOAT64_C( 782.28), SIMDE_FLOAT64_C( -694.29) }, { SIMDE_FLOAT64_C( 106.38), SIMDE_FLOAT64_C( -554.05), SIMDE_FLOAT64_C( 782.28), SIMDE_FLOAT64_C( -694.29) } }, { { SIMDE_FLOAT64_C( 250.86), SIMDE_FLOAT64_C( -269.75), SIMDE_FLOAT64_C( -927.01), SIMDE_FLOAT64_C( 214.49) }, { SIMDE_FLOAT64_C( 250.86), SIMDE_FLOAT64_C( -269.75), SIMDE_FLOAT64_C( -927.01), SIMDE_FLOAT64_C( 214.49) } }, { { SIMDE_FLOAT64_C( 515.10), SIMDE_FLOAT64_C( -597.89), SIMDE_FLOAT64_C( 888.27), SIMDE_FLOAT64_C( -756.42) }, { SIMDE_FLOAT64_C( 515.10), SIMDE_FLOAT64_C( -597.89), SIMDE_FLOAT64_C( 888.27), SIMDE_FLOAT64_C( -756.42) } }, { { SIMDE_FLOAT64_C( 947.11), SIMDE_FLOAT64_C( -148.81), SIMDE_FLOAT64_C( 852.53), SIMDE_FLOAT64_C( 316.80) }, { SIMDE_FLOAT64_C( 947.11), SIMDE_FLOAT64_C( -148.81), SIMDE_FLOAT64_C( 852.53), SIMDE_FLOAT64_C( 316.80) } }, { { SIMDE_FLOAT64_C( -150.15), SIMDE_FLOAT64_C( -882.96), SIMDE_FLOAT64_C( -23.16), SIMDE_FLOAT64_C( 367.96) }, { SIMDE_FLOAT64_C( -150.15), SIMDE_FLOAT64_C( -882.96), SIMDE_FLOAT64_C( -23.16), SIMDE_FLOAT64_C( 367.96) } }, { { SIMDE_FLOAT64_C( -169.14), SIMDE_FLOAT64_C( 745.70), SIMDE_FLOAT64_C( -976.16), SIMDE_FLOAT64_C( 641.22) }, { SIMDE_FLOAT64_C( -169.14), SIMDE_FLOAT64_C( 745.70), SIMDE_FLOAT64_C( -976.16), SIMDE_FLOAT64_C( 641.22) } }, { { SIMDE_FLOAT64_C( -923.21), SIMDE_FLOAT64_C( 559.44), SIMDE_FLOAT64_C( -648.43), SIMDE_FLOAT64_C( 313.86) }, { SIMDE_FLOAT64_C( -923.21), SIMDE_FLOAT64_C( 559.44), SIMDE_FLOAT64_C( -648.43), SIMDE_FLOAT64_C( 313.86) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float64* a = test_vec[i].a; simde__m256d r = simde_mm256_set_pd(a[3], a[2], a[1], a[0]); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_set_m128(SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 lo[sizeof(simde__m128) / sizeof(simde_float32)]; simde_float32 hi[sizeof(simde__m128) / sizeof(simde_float32)]; simde_float32 r[sizeof(simde__m256) / sizeof(simde_float32)]; } test_vec[] = { { { SIMDE_FLOAT32_C( 955.26), SIMDE_FLOAT32_C( -742.43), SIMDE_FLOAT32_C( -685.02), SIMDE_FLOAT32_C( 649.65) }, { SIMDE_FLOAT32_C( 76.57), SIMDE_FLOAT32_C( 826.18), SIMDE_FLOAT32_C( 499.67), SIMDE_FLOAT32_C( 965.25) }, { SIMDE_FLOAT32_C( 955.26), SIMDE_FLOAT32_C( -742.43), SIMDE_FLOAT32_C( -685.02), SIMDE_FLOAT32_C( 649.65), SIMDE_FLOAT32_C( 76.57), SIMDE_FLOAT32_C( 826.18), SIMDE_FLOAT32_C( 499.67), SIMDE_FLOAT32_C( 965.25) } }, { { SIMDE_FLOAT32_C( -924.37), SIMDE_FLOAT32_C( -870.83), SIMDE_FLOAT32_C( 379.26), SIMDE_FLOAT32_C( 192.12) }, { SIMDE_FLOAT32_C( 260.12), SIMDE_FLOAT32_C( -54.88), SIMDE_FLOAT32_C( -275.93), SIMDE_FLOAT32_C( 53.40) }, { SIMDE_FLOAT32_C( -924.37), SIMDE_FLOAT32_C( -870.83), SIMDE_FLOAT32_C( 379.26), SIMDE_FLOAT32_C( 192.12), SIMDE_FLOAT32_C( 260.12), SIMDE_FLOAT32_C( -54.88), SIMDE_FLOAT32_C( -275.93), SIMDE_FLOAT32_C( 53.40) } }, { { SIMDE_FLOAT32_C( 572.77), SIMDE_FLOAT32_C( -29.00), SIMDE_FLOAT32_C( 535.58), SIMDE_FLOAT32_C( -761.05) }, { SIMDE_FLOAT32_C( -139.13), SIMDE_FLOAT32_C( 923.28), SIMDE_FLOAT32_C( 643.94), SIMDE_FLOAT32_C( -866.67) }, { SIMDE_FLOAT32_C( 572.77), SIMDE_FLOAT32_C( -29.00), SIMDE_FLOAT32_C( 535.58), SIMDE_FLOAT32_C( -761.05), SIMDE_FLOAT32_C( -139.13), SIMDE_FLOAT32_C( 923.28), SIMDE_FLOAT32_C( 643.94), SIMDE_FLOAT32_C( -866.67) } }, { { SIMDE_FLOAT32_C( 430.48), SIMDE_FLOAT32_C( 138.84), SIMDE_FLOAT32_C( -254.67), SIMDE_FLOAT32_C( -492.95) }, { SIMDE_FLOAT32_C( -242.21), SIMDE_FLOAT32_C( -193.36), SIMDE_FLOAT32_C( -353.17), SIMDE_FLOAT32_C( -95.67) }, { SIMDE_FLOAT32_C( 430.48), SIMDE_FLOAT32_C( 138.84), SIMDE_FLOAT32_C( -254.67), SIMDE_FLOAT32_C( -492.95), SIMDE_FLOAT32_C( -242.21), SIMDE_FLOAT32_C( -193.36), SIMDE_FLOAT32_C( -353.17), SIMDE_FLOAT32_C( -95.67) } }, { { SIMDE_FLOAT32_C( -497.42), SIMDE_FLOAT32_C( -212.71), SIMDE_FLOAT32_C( -430.07), SIMDE_FLOAT32_C( 762.70) }, { SIMDE_FLOAT32_C( -34.98), SIMDE_FLOAT32_C( -754.99), SIMDE_FLOAT32_C( -527.69), SIMDE_FLOAT32_C( -79.72) }, { SIMDE_FLOAT32_C( -497.42), SIMDE_FLOAT32_C( -212.71), SIMDE_FLOAT32_C( -430.07), SIMDE_FLOAT32_C( 762.70), SIMDE_FLOAT32_C( -34.98), SIMDE_FLOAT32_C( -754.99), SIMDE_FLOAT32_C( -527.69), SIMDE_FLOAT32_C( -79.72) } }, { { SIMDE_FLOAT32_C( 423.17), SIMDE_FLOAT32_C( -804.64), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( -715.96) }, { SIMDE_FLOAT32_C( 732.41), SIMDE_FLOAT32_C( 294.00), SIMDE_FLOAT32_C( -183.90), SIMDE_FLOAT32_C( 808.04) }, { SIMDE_FLOAT32_C( 423.17), SIMDE_FLOAT32_C( -804.64), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( -715.96), SIMDE_FLOAT32_C( 732.41), SIMDE_FLOAT32_C( 294.00), SIMDE_FLOAT32_C( -183.90), SIMDE_FLOAT32_C( 808.04) } }, { { SIMDE_FLOAT32_C( 615.10), SIMDE_FLOAT32_C( 952.95), SIMDE_FLOAT32_C( 930.37), SIMDE_FLOAT32_C( -627.11) }, { SIMDE_FLOAT32_C( -881.36), SIMDE_FLOAT32_C( -355.90), SIMDE_FLOAT32_C( -582.63), SIMDE_FLOAT32_C( 691.42) }, { SIMDE_FLOAT32_C( 615.10), SIMDE_FLOAT32_C( 952.95), SIMDE_FLOAT32_C( 930.37), SIMDE_FLOAT32_C( -627.11), SIMDE_FLOAT32_C( -881.36), SIMDE_FLOAT32_C( -355.90), SIMDE_FLOAT32_C( -582.63), SIMDE_FLOAT32_C( 691.42) } }, { { SIMDE_FLOAT32_C( 716.04), SIMDE_FLOAT32_C( -977.44), SIMDE_FLOAT32_C( -302.88), SIMDE_FLOAT32_C( -318.94) }, { SIMDE_FLOAT32_C( -240.41), SIMDE_FLOAT32_C( -422.80), SIMDE_FLOAT32_C( 277.23), SIMDE_FLOAT32_C( -809.93) }, { SIMDE_FLOAT32_C( 716.04), SIMDE_FLOAT32_C( -977.44), SIMDE_FLOAT32_C( -302.88), SIMDE_FLOAT32_C( -318.94), SIMDE_FLOAT32_C( -240.41), SIMDE_FLOAT32_C( -422.80), SIMDE_FLOAT32_C( 277.23), SIMDE_FLOAT32_C( -809.93) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 lo = simde_mm_loadu_ps(test_vec[i].lo), hi = simde_mm_loadu_ps(test_vec[i].hi); simde__m256 r = simde_mm256_set_m128(hi, lo); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_set_m128d(SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 lo[sizeof(simde__m128d) / sizeof(simde_float64)]; simde_float64 hi[sizeof(simde__m128d) / sizeof(simde_float64)]; simde_float64 r[sizeof(simde__m256) / sizeof(simde_float64)]; } test_vec[] = { { { SIMDE_FLOAT64_C( 102.87), SIMDE_FLOAT64_C( 237.14) }, { SIMDE_FLOAT64_C( 970.02), SIMDE_FLOAT64_C( 617.63) }, { SIMDE_FLOAT64_C( 102.87), SIMDE_FLOAT64_C( 237.14), SIMDE_FLOAT64_C( 970.02), SIMDE_FLOAT64_C( 617.63) } }, { { SIMDE_FLOAT64_C( -786.65), SIMDE_FLOAT64_C( 343.67) }, { SIMDE_FLOAT64_C( -249.99), SIMDE_FLOAT64_C( -396.81) }, { SIMDE_FLOAT64_C( -786.65), SIMDE_FLOAT64_C( 343.67), SIMDE_FLOAT64_C( -249.99), SIMDE_FLOAT64_C( -396.81) } }, { { SIMDE_FLOAT64_C( 334.77), SIMDE_FLOAT64_C( 890.51) }, { SIMDE_FLOAT64_C( -50.87), SIMDE_FLOAT64_C( -723.47) }, { SIMDE_FLOAT64_C( 334.77), SIMDE_FLOAT64_C( 890.51), SIMDE_FLOAT64_C( -50.87), SIMDE_FLOAT64_C( -723.47) } }, { { SIMDE_FLOAT64_C( -834.03), SIMDE_FLOAT64_C( -242.09) }, { SIMDE_FLOAT64_C( -638.54), SIMDE_FLOAT64_C( -98.76) }, { SIMDE_FLOAT64_C( -834.03), SIMDE_FLOAT64_C( -242.09), SIMDE_FLOAT64_C( -638.54), SIMDE_FLOAT64_C( -98.76) } }, { { SIMDE_FLOAT64_C( 453.20), SIMDE_FLOAT64_C( 958.59) }, { SIMDE_FLOAT64_C( 231.94), SIMDE_FLOAT64_C( -131.61) }, { SIMDE_FLOAT64_C( 453.20), SIMDE_FLOAT64_C( 958.59), SIMDE_FLOAT64_C( 231.94), SIMDE_FLOAT64_C( -131.61) } }, { { SIMDE_FLOAT64_C( -981.17), SIMDE_FLOAT64_C( -996.20) }, { SIMDE_FLOAT64_C( 210.92), SIMDE_FLOAT64_C( -800.51) }, { SIMDE_FLOAT64_C( -981.17), SIMDE_FLOAT64_C( -996.20), SIMDE_FLOAT64_C( 210.92), SIMDE_FLOAT64_C( -800.51) } }, { { SIMDE_FLOAT64_C( 774.24), SIMDE_FLOAT64_C( -383.33) }, { SIMDE_FLOAT64_C( -513.10), SIMDE_FLOAT64_C( -84.69) }, { SIMDE_FLOAT64_C( 774.24), SIMDE_FLOAT64_C( -383.33), SIMDE_FLOAT64_C( -513.10), SIMDE_FLOAT64_C( -84.69) } }, { { SIMDE_FLOAT64_C( 101.10), SIMDE_FLOAT64_C( -236.55) }, { SIMDE_FLOAT64_C( -206.58), SIMDE_FLOAT64_C( 21.06) }, { SIMDE_FLOAT64_C( 101.10), SIMDE_FLOAT64_C( -236.55), SIMDE_FLOAT64_C( -206.58), SIMDE_FLOAT64_C( 21.06) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d lo = simde_mm_loadu_pd(test_vec[i].lo), hi = simde_mm_loadu_pd(test_vec[i].hi); simde__m256d r = simde_mm256_set_m128d(hi, lo); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_set_m128i(SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[sizeof(simde__m256) / sizeof(simde_float32)]; simde_float32 r[sizeof(simde__m256) / sizeof(simde_float32)]; } test_vec[] = { { { SIMDE_FLOAT32_C( -862.79), SIMDE_FLOAT32_C( 575.51), SIMDE_FLOAT32_C( -229.06), SIMDE_FLOAT32_C( 41.19), SIMDE_FLOAT32_C( -323.42), SIMDE_FLOAT32_C( -210.46), SIMDE_FLOAT32_C( 770.69), SIMDE_FLOAT32_C( 519.80) }, { SIMDE_FLOAT32_C( -862.79), SIMDE_FLOAT32_C( 575.51), SIMDE_FLOAT32_C( -229.06), SIMDE_FLOAT32_C( 41.19), SIMDE_FLOAT32_C( -323.42), SIMDE_FLOAT32_C( -210.46), SIMDE_FLOAT32_C( 770.69), SIMDE_FLOAT32_C( 519.80) } }, { { SIMDE_FLOAT32_C( 547.64), SIMDE_FLOAT32_C( -280.68), SIMDE_FLOAT32_C( -533.93), SIMDE_FLOAT32_C( -474.55), SIMDE_FLOAT32_C( 108.16), SIMDE_FLOAT32_C( 945.55), SIMDE_FLOAT32_C( -578.81), SIMDE_FLOAT32_C( 909.72) }, { SIMDE_FLOAT32_C( 547.64), SIMDE_FLOAT32_C( -280.68), SIMDE_FLOAT32_C( -533.93), SIMDE_FLOAT32_C( -474.55), SIMDE_FLOAT32_C( 108.16), SIMDE_FLOAT32_C( 945.55), SIMDE_FLOAT32_C( -578.81), SIMDE_FLOAT32_C( 909.72) } }, { { SIMDE_FLOAT32_C( 712.09), SIMDE_FLOAT32_C( 149.26), SIMDE_FLOAT32_C( -126.56), SIMDE_FLOAT32_C( -722.11), SIMDE_FLOAT32_C( 49.51), SIMDE_FLOAT32_C( 630.17), SIMDE_FLOAT32_C( 155.12), SIMDE_FLOAT32_C( 912.29) }, { SIMDE_FLOAT32_C( 712.09), SIMDE_FLOAT32_C( 149.26), SIMDE_FLOAT32_C( -126.56), SIMDE_FLOAT32_C( -722.11), SIMDE_FLOAT32_C( 49.51), SIMDE_FLOAT32_C( 630.17), SIMDE_FLOAT32_C( 155.12), SIMDE_FLOAT32_C( 912.29) } }, { { SIMDE_FLOAT32_C( -870.50), SIMDE_FLOAT32_C( -720.59), SIMDE_FLOAT32_C( 906.13), SIMDE_FLOAT32_C( -438.81), SIMDE_FLOAT32_C( 17.74), SIMDE_FLOAT32_C( 559.71), SIMDE_FLOAT32_C( -313.51), SIMDE_FLOAT32_C( 154.94) }, { SIMDE_FLOAT32_C( -870.50), SIMDE_FLOAT32_C( -720.59), SIMDE_FLOAT32_C( 906.13), SIMDE_FLOAT32_C( -438.81), SIMDE_FLOAT32_C( 17.74), SIMDE_FLOAT32_C( 559.71), SIMDE_FLOAT32_C( -313.51), SIMDE_FLOAT32_C( 154.94) } }, { { SIMDE_FLOAT32_C( 135.22), SIMDE_FLOAT32_C( 457.42), SIMDE_FLOAT32_C( -803.87), SIMDE_FLOAT32_C( 811.80), SIMDE_FLOAT32_C( -753.03), SIMDE_FLOAT32_C( 966.83), SIMDE_FLOAT32_C( 331.60), SIMDE_FLOAT32_C( 794.61) }, { SIMDE_FLOAT32_C( 135.22), SIMDE_FLOAT32_C( 457.42), SIMDE_FLOAT32_C( -803.87), SIMDE_FLOAT32_C( 811.80), SIMDE_FLOAT32_C( -753.03), SIMDE_FLOAT32_C( 966.83), SIMDE_FLOAT32_C( 331.60), SIMDE_FLOAT32_C( 794.61) } }, { { SIMDE_FLOAT32_C( -313.86), SIMDE_FLOAT32_C( 797.67), SIMDE_FLOAT32_C( -679.93), SIMDE_FLOAT32_C( 794.30), SIMDE_FLOAT32_C( 743.22), SIMDE_FLOAT32_C( -258.75), SIMDE_FLOAT32_C( 704.02), SIMDE_FLOAT32_C( 455.31) }, { SIMDE_FLOAT32_C( -313.86), SIMDE_FLOAT32_C( 797.67), SIMDE_FLOAT32_C( -679.93), SIMDE_FLOAT32_C( 794.30), SIMDE_FLOAT32_C( 743.22), SIMDE_FLOAT32_C( -258.75), SIMDE_FLOAT32_C( 704.02), SIMDE_FLOAT32_C( 455.31) } }, { { SIMDE_FLOAT32_C( 890.51), SIMDE_FLOAT32_C( -422.54), SIMDE_FLOAT32_C( 733.20), SIMDE_FLOAT32_C( -59.98), SIMDE_FLOAT32_C( -792.37), SIMDE_FLOAT32_C( -111.68), SIMDE_FLOAT32_C( -147.69), SIMDE_FLOAT32_C( -662.87) }, { SIMDE_FLOAT32_C( 890.51), SIMDE_FLOAT32_C( -422.54), SIMDE_FLOAT32_C( 733.20), SIMDE_FLOAT32_C( -59.98), SIMDE_FLOAT32_C( -792.37), SIMDE_FLOAT32_C( -111.68), SIMDE_FLOAT32_C( -147.69), SIMDE_FLOAT32_C( -662.87) } }, { { SIMDE_FLOAT32_C( 167.73), SIMDE_FLOAT32_C( -241.56), SIMDE_FLOAT32_C( -101.68), SIMDE_FLOAT32_C( -814.53), SIMDE_FLOAT32_C( -681.85), SIMDE_FLOAT32_C( 584.81), SIMDE_FLOAT32_C( 340.41), SIMDE_FLOAT32_C( 453.36) }, { SIMDE_FLOAT32_C( 167.73), SIMDE_FLOAT32_C( -241.56), SIMDE_FLOAT32_C( -101.68), SIMDE_FLOAT32_C( -814.53), SIMDE_FLOAT32_C( -681.85), SIMDE_FLOAT32_C( 584.81), SIMDE_FLOAT32_C( 340.41), SIMDE_FLOAT32_C( 453.36) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32* a = test_vec[i].a; simde__m256 r = simde_mm256_set_ps(a[7], a[6], a[5], a[4], a[3], a[2], a[1], a[0]); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_set1_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { int8_t a; simde__m256i r; } test_vec[8] = { { 41, simde_mm256_set_epi8(INT8_C( 41), INT8_C( 41), INT8_C( 41), INT8_C( 41), INT8_C( 41), INT8_C( 41), INT8_C( 41), INT8_C( 41), INT8_C( 41), INT8_C( 41), INT8_C( 41), INT8_C( 41), INT8_C( 41), INT8_C( 41), INT8_C( 41), INT8_C( 41), INT8_C( 41), INT8_C( 41), INT8_C( 41), INT8_C( 41), INT8_C( 41), INT8_C( 41), INT8_C( 41), INT8_C( 41), INT8_C( 41), INT8_C( 41), INT8_C( 41), INT8_C( 41), INT8_C( 41), INT8_C( 41), INT8_C( 41), INT8_C( 41)) }, { -5, simde_mm256_set_epi8(INT8_C( -5), INT8_C( -5), INT8_C( -5), INT8_C( -5), INT8_C( -5), INT8_C( -5), INT8_C( -5), INT8_C( -5), INT8_C( -5), INT8_C( -5), INT8_C( -5), INT8_C( -5), INT8_C( -5), INT8_C( -5), INT8_C( -5), INT8_C( -5), INT8_C( -5), INT8_C( -5), INT8_C( -5), INT8_C( -5), INT8_C( -5), INT8_C( -5), INT8_C( -5), INT8_C( -5), INT8_C( -5), INT8_C( -5), INT8_C( -5), INT8_C( -5), INT8_C( -5), INT8_C( -5), INT8_C( -5), INT8_C( -5)) }, { -85, simde_mm256_set_epi8(INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85), INT8_C( -85)) }, { 61, simde_mm256_set_epi8(INT8_C( 61), INT8_C( 61), INT8_C( 61), INT8_C( 61), INT8_C( 61), INT8_C( 61), INT8_C( 61), INT8_C( 61), INT8_C( 61), INT8_C( 61), INT8_C( 61), INT8_C( 61), INT8_C( 61), INT8_C( 61), INT8_C( 61), INT8_C( 61), INT8_C( 61), INT8_C( 61), INT8_C( 61), INT8_C( 61), INT8_C( 61), INT8_C( 61), INT8_C( 61), INT8_C( 61), INT8_C( 61), INT8_C( 61), INT8_C( 61), INT8_C( 61), INT8_C( 61), INT8_C( 61), INT8_C( 61), INT8_C( 61)) }, { -125, simde_mm256_set_epi8(INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125), INT8_C(-125)) }, { -117, simde_mm256_set_epi8(INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117)) }, { 5, simde_mm256_set_epi8(INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5), INT8_C( 5)) }, { -41, simde_mm256_set_epi8(INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41), INT8_C( -41)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_set1_epi8(test_vec[i].a); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_set1_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { int16_t a; simde__m256i r; } test_vec[8] = { { INT16_C( -7117), simde_mm256_set_epi16(INT16_C( -7117), INT16_C( -7117), INT16_C( -7117), INT16_C( -7117), INT16_C( -7117), INT16_C( -7117), INT16_C( -7117), INT16_C( -7117), INT16_C( -7117), INT16_C( -7117), INT16_C( -7117), INT16_C( -7117), INT16_C( -7117), INT16_C( -7117), INT16_C( -7117), INT16_C( -7117)) }, { INT16_C( -4832), simde_mm256_set_epi16(INT16_C( -4832), INT16_C( -4832), INT16_C( -4832), INT16_C( -4832), INT16_C( -4832), INT16_C( -4832), INT16_C( -4832), INT16_C( -4832), INT16_C( -4832), INT16_C( -4832), INT16_C( -4832), INT16_C( -4832), INT16_C( -4832), INT16_C( -4832), INT16_C( -4832), INT16_C( -4832)) }, { INT16_C( 20615), simde_mm256_set_epi16(INT16_C( 20615), INT16_C( 20615), INT16_C( 20615), INT16_C( 20615), INT16_C( 20615), INT16_C( 20615), INT16_C( 20615), INT16_C( 20615), INT16_C( 20615), INT16_C( 20615), INT16_C( 20615), INT16_C( 20615), INT16_C( 20615), INT16_C( 20615), INT16_C( 20615), INT16_C( 20615)) }, { INT16_C(-30117), simde_mm256_set_epi16(INT16_C(-30117), INT16_C(-30117), INT16_C(-30117), INT16_C(-30117), INT16_C(-30117), INT16_C(-30117), INT16_C(-30117), INT16_C(-30117), INT16_C(-30117), INT16_C(-30117), INT16_C(-30117), INT16_C(-30117), INT16_C(-30117), INT16_C(-30117), INT16_C(-30117), INT16_C(-30117)) }, { INT16_C( 32088), simde_mm256_set_epi16(INT16_C( 32088), INT16_C( 32088), INT16_C( 32088), INT16_C( 32088), INT16_C( 32088), INT16_C( 32088), INT16_C( 32088), INT16_C( 32088), INT16_C( 32088), INT16_C( 32088), INT16_C( 32088), INT16_C( 32088), INT16_C( 32088), INT16_C( 32088), INT16_C( 32088), INT16_C( 32088)) }, { INT16_C(-14076), simde_mm256_set_epi16(INT16_C(-14076), INT16_C(-14076), INT16_C(-14076), INT16_C(-14076), INT16_C(-14076), INT16_C(-14076), INT16_C(-14076), INT16_C(-14076), INT16_C(-14076), INT16_C(-14076), INT16_C(-14076), INT16_C(-14076), INT16_C(-14076), INT16_C(-14076), INT16_C(-14076), INT16_C(-14076)) }, { INT16_C( -9132), simde_mm256_set_epi16(INT16_C( -9132), INT16_C( -9132), INT16_C( -9132), INT16_C( -9132), INT16_C( -9132), INT16_C( -9132), INT16_C( -9132), INT16_C( -9132), INT16_C( -9132), INT16_C( -9132), INT16_C( -9132), INT16_C( -9132), INT16_C( -9132), INT16_C( -9132), INT16_C( -9132), INT16_C( -9132)) }, { INT16_C(-24801), simde_mm256_set_epi16(INT16_C(-24801), INT16_C(-24801), INT16_C(-24801), INT16_C(-24801), INT16_C(-24801), INT16_C(-24801), INT16_C(-24801), INT16_C(-24801), INT16_C(-24801), INT16_C(-24801), INT16_C(-24801), INT16_C(-24801), INT16_C(-24801), INT16_C(-24801), INT16_C(-24801), INT16_C(-24801)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_set1_epi16(test_vec[i].a); simde_assert_m256i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_set1_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { int32_t a; simde__m256i r; } test_vec[8] = { { INT32_C( 1458307866), simde_mm256_set_epi32(INT32_C( 1458307866), INT32_C( 1458307866), INT32_C( 1458307866), INT32_C( 1458307866), INT32_C( 1458307866), INT32_C( 1458307866), INT32_C( 1458307866), INT32_C( 1458307866)) }, { INT32_C(-1231481357), simde_mm256_set_epi32(INT32_C(-1231481357), INT32_C(-1231481357), INT32_C(-1231481357), INT32_C(-1231481357), INT32_C(-1231481357), INT32_C(-1231481357), INT32_C(-1231481357), INT32_C(-1231481357)) }, { INT32_C( 1330347041), simde_mm256_set_epi32(INT32_C( 1330347041), INT32_C( 1330347041), INT32_C( 1330347041), INT32_C( 1330347041), INT32_C( 1330347041), INT32_C( 1330347041), INT32_C( 1330347041), INT32_C( 1330347041)) }, { INT32_C(-2031969158), simde_mm256_set_epi32(INT32_C(-2031969158), INT32_C(-2031969158), INT32_C(-2031969158), INT32_C(-2031969158), INT32_C(-2031969158), INT32_C(-2031969158), INT32_C(-2031969158), INT32_C(-2031969158)) }, { INT32_C( 138293031), simde_mm256_set_epi32(INT32_C( 138293031), INT32_C( 138293031), INT32_C( 138293031), INT32_C( 138293031), INT32_C( 138293031), INT32_C( 138293031), INT32_C( 138293031), INT32_C( 138293031)) }, { INT32_C( -870589211), simde_mm256_set_epi32(INT32_C( -870589211), INT32_C( -870589211), INT32_C( -870589211), INT32_C( -870589211), INT32_C( -870589211), INT32_C( -870589211), INT32_C( -870589211), INT32_C( -870589211)) }, { INT32_C( 1439279097), simde_mm256_set_epi32(INT32_C( 1439279097), INT32_C( 1439279097), INT32_C( 1439279097), INT32_C( 1439279097), INT32_C( 1439279097), INT32_C( 1439279097), INT32_C( 1439279097), INT32_C( 1439279097)) }, { INT32_C( -102725779), simde_mm256_set_epi32(INT32_C( -102725779), INT32_C( -102725779), INT32_C( -102725779), INT32_C( -102725779), INT32_C( -102725779), INT32_C( -102725779), INT32_C( -102725779), INT32_C( -102725779)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_set1_epi32(test_vec[i].a); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_set1_epi64x(SIMDE_MUNIT_TEST_ARGS) { const struct { int64_t a; simde__m256i r; } test_vec[8] = { { INT64_C( 5105791061004147197), simde_mm256_set_epi64x(INT64_C( 5105791061004147197), INT64_C( 5105791061004147197), INT64_C( 5105791061004147197), INT64_C( 5105791061004147197)) }, { INT64_C(-2078502026959165134), simde_mm256_set_epi64x(INT64_C(-2078502026959165134), INT64_C(-2078502026959165134), INT64_C(-2078502026959165134), INT64_C(-2078502026959165134)) }, { INT64_C( 3468007801991671414), simde_mm256_set_epi64x(INT64_C( 3468007801991671414), INT64_C( 3468007801991671414), INT64_C( 3468007801991671414), INT64_C( 3468007801991671414)) }, { INT64_C(-1026830878024479084), simde_mm256_set_epi64x(INT64_C(-1026830878024479084), INT64_C(-1026830878024479084), INT64_C(-1026830878024479084), INT64_C(-1026830878024479084)) }, { INT64_C(-5815393469667065909), simde_mm256_set_epi64x(INT64_C(-5815393469667065909), INT64_C(-5815393469667065909), INT64_C(-5815393469667065909), INT64_C(-5815393469667065909)) }, { INT64_C(-1858841878581774308), simde_mm256_set_epi64x(INT64_C(-1858841878581774308), INT64_C(-1858841878581774308), INT64_C(-1858841878581774308), INT64_C(-1858841878581774308)) }, { INT64_C( 6272125310275044670), simde_mm256_set_epi64x(INT64_C( 6272125310275044670), INT64_C( 6272125310275044670), INT64_C( 6272125310275044670), INT64_C( 6272125310275044670)) }, { INT64_C( 6393396529564376044), simde_mm256_set_epi64x(INT64_C( 6393396529564376044), INT64_C( 6393396529564376044), INT64_C( 6393396529564376044), INT64_C( 6393396529564376044)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_set1_epi64x(test_vec[i].a); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_set1_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde_float32 a; simde__m256 r; } test_vec[8] = { { SIMDE_FLOAT32_C( -73.91), simde_mm256_set_ps(SIMDE_FLOAT32_C( -73.91), SIMDE_FLOAT32_C( -73.91), SIMDE_FLOAT32_C( -73.91), SIMDE_FLOAT32_C( -73.91), SIMDE_FLOAT32_C( -73.91), SIMDE_FLOAT32_C( -73.91), SIMDE_FLOAT32_C( -73.91), SIMDE_FLOAT32_C( -73.91)) }, { SIMDE_FLOAT32_C( 953.36), simde_mm256_set_ps(SIMDE_FLOAT32_C( 953.36), SIMDE_FLOAT32_C( 953.36), SIMDE_FLOAT32_C( 953.36), SIMDE_FLOAT32_C( 953.36), SIMDE_FLOAT32_C( 953.36), SIMDE_FLOAT32_C( 953.36), SIMDE_FLOAT32_C( 953.36), SIMDE_FLOAT32_C( 953.36)) }, { SIMDE_FLOAT32_C( 888.18), simde_mm256_set_ps(SIMDE_FLOAT32_C( 888.18), SIMDE_FLOAT32_C( 888.18), SIMDE_FLOAT32_C( 888.18), SIMDE_FLOAT32_C( 888.18), SIMDE_FLOAT32_C( 888.18), SIMDE_FLOAT32_C( 888.18), SIMDE_FLOAT32_C( 888.18), SIMDE_FLOAT32_C( 888.18)) }, { SIMDE_FLOAT32_C( 232.71), simde_mm256_set_ps(SIMDE_FLOAT32_C( 232.71), SIMDE_FLOAT32_C( 232.71), SIMDE_FLOAT32_C( 232.71), SIMDE_FLOAT32_C( 232.71), SIMDE_FLOAT32_C( 232.71), SIMDE_FLOAT32_C( 232.71), SIMDE_FLOAT32_C( 232.71), SIMDE_FLOAT32_C( 232.71)) }, { SIMDE_FLOAT32_C( -598.38), simde_mm256_set_ps(SIMDE_FLOAT32_C( -598.38), SIMDE_FLOAT32_C( -598.38), SIMDE_FLOAT32_C( -598.38), SIMDE_FLOAT32_C( -598.38), SIMDE_FLOAT32_C( -598.38), SIMDE_FLOAT32_C( -598.38), SIMDE_FLOAT32_C( -598.38), SIMDE_FLOAT32_C( -598.38)) }, { SIMDE_FLOAT32_C( 762.88), simde_mm256_set_ps(SIMDE_FLOAT32_C( 762.88), SIMDE_FLOAT32_C( 762.88), SIMDE_FLOAT32_C( 762.88), SIMDE_FLOAT32_C( 762.88), SIMDE_FLOAT32_C( 762.88), SIMDE_FLOAT32_C( 762.88), SIMDE_FLOAT32_C( 762.88), SIMDE_FLOAT32_C( 762.88)) }, { SIMDE_FLOAT32_C( -225.89), simde_mm256_set_ps(SIMDE_FLOAT32_C( -225.89), SIMDE_FLOAT32_C( -225.89), SIMDE_FLOAT32_C( -225.89), SIMDE_FLOAT32_C( -225.89), SIMDE_FLOAT32_C( -225.89), SIMDE_FLOAT32_C( -225.89), SIMDE_FLOAT32_C( -225.89), SIMDE_FLOAT32_C( -225.89)) }, { SIMDE_FLOAT32_C( -115.32), simde_mm256_set_ps(SIMDE_FLOAT32_C( -115.32), SIMDE_FLOAT32_C( -115.32), SIMDE_FLOAT32_C( -115.32), SIMDE_FLOAT32_C( -115.32), SIMDE_FLOAT32_C( -115.32), SIMDE_FLOAT32_C( -115.32), SIMDE_FLOAT32_C( -115.32), SIMDE_FLOAT32_C( -115.32)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_set1_ps(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_set1_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde_float64 a; simde__m256d r; } test_vec[8] = { { SIMDE_FLOAT64_C( -494.25), simde_mm256_set_pd(SIMDE_FLOAT64_C( -494.25), SIMDE_FLOAT64_C( -494.25), SIMDE_FLOAT64_C( -494.25), SIMDE_FLOAT64_C( -494.25)) }, { SIMDE_FLOAT64_C( 610.20), simde_mm256_set_pd(SIMDE_FLOAT64_C( 610.20), SIMDE_FLOAT64_C( 610.20), SIMDE_FLOAT64_C( 610.20), SIMDE_FLOAT64_C( 610.20)) }, { SIMDE_FLOAT64_C( -471.35), simde_mm256_set_pd(SIMDE_FLOAT64_C( -471.35), SIMDE_FLOAT64_C( -471.35), SIMDE_FLOAT64_C( -471.35), SIMDE_FLOAT64_C( -471.35)) }, { SIMDE_FLOAT64_C( -211.28), simde_mm256_set_pd(SIMDE_FLOAT64_C( -211.28), SIMDE_FLOAT64_C( -211.28), SIMDE_FLOAT64_C( -211.28), SIMDE_FLOAT64_C( -211.28)) }, { SIMDE_FLOAT64_C( -27.80), simde_mm256_set_pd(SIMDE_FLOAT64_C( -27.80), SIMDE_FLOAT64_C( -27.80), SIMDE_FLOAT64_C( -27.80), SIMDE_FLOAT64_C( -27.80)) }, { SIMDE_FLOAT64_C( 866.02), simde_mm256_set_pd(SIMDE_FLOAT64_C( 866.02), SIMDE_FLOAT64_C( 866.02), SIMDE_FLOAT64_C( 866.02), SIMDE_FLOAT64_C( 866.02)) }, { SIMDE_FLOAT64_C( -278.22), simde_mm256_set_pd(SIMDE_FLOAT64_C( -278.22), SIMDE_FLOAT64_C( -278.22), SIMDE_FLOAT64_C( -278.22), SIMDE_FLOAT64_C( -278.22)) }, { SIMDE_FLOAT64_C( 868.90), simde_mm256_set_pd(SIMDE_FLOAT64_C( 868.90), SIMDE_FLOAT64_C( 868.90), SIMDE_FLOAT64_C( 868.90), SIMDE_FLOAT64_C( 868.90)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_set1_pd(test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_x_mm256_deinterleaveeven_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[16]; const int16_t b[16]; const int16_t r[16]; } test_vec[] = { { { -INT16_C( 29098), INT16_C( 21049), INT16_C( 28104), INT16_C( 2629), -INT16_C( 20812), -INT16_C( 9859), -INT16_C( 24603), -INT16_C( 22056), -INT16_C( 6828), INT16_C( 2737), INT16_C( 22872), -INT16_C( 974), -INT16_C( 27405), INT16_C( 22221), INT16_C( 1092), -INT16_C( 26097) }, { INT16_C( 18834), INT16_C( 23277), INT16_C( 12982), INT16_C( 27236), -INT16_C( 7711), -INT16_C( 14781), INT16_C( 7040), -INT16_C( 10896), INT16_C( 8448), INT16_C( 23007), INT16_C( 4731), INT16_C( 28245), INT16_C( 8870), -INT16_C( 5436), -INT16_C( 11226), -INT16_C( 18300) }, { -INT16_C( 29098), INT16_C( 28104), -INT16_C( 20812), -INT16_C( 24603), INT16_C( 18834), INT16_C( 12982), -INT16_C( 7711), INT16_C( 7040), -INT16_C( 6828), INT16_C( 22872), -INT16_C( 27405), INT16_C( 1092), INT16_C( 8448), INT16_C( 4731), INT16_C( 8870), -INT16_C( 11226) } }, { { INT16_C( 28957), -INT16_C( 11502), INT16_C( 30372), -INT16_C( 31427), -INT16_C( 32425), -INT16_C( 10165), -INT16_C( 17508), -INT16_C( 25171), -INT16_C( 29475), INT16_C( 22774), INT16_C( 19358), INT16_C( 17606), -INT16_C( 30099), -INT16_C( 27602), -INT16_C( 19618), INT16_C( 31564) }, { INT16_C( 24356), -INT16_C( 14258), -INT16_C( 29483), INT16_C( 11597), -INT16_C( 26355), -INT16_C( 22267), -INT16_C( 19884), INT16_C( 12614), INT16_C( 15422), -INT16_C( 8823), INT16_C( 20359), -INT16_C( 2783), INT16_C( 20698), INT16_C( 14473), -INT16_C( 11005), INT16_C( 10164) }, { INT16_C( 28957), INT16_C( 30372), -INT16_C( 32425), -INT16_C( 17508), INT16_C( 24356), -INT16_C( 29483), -INT16_C( 26355), -INT16_C( 19884), -INT16_C( 29475), INT16_C( 19358), -INT16_C( 30099), -INT16_C( 19618), INT16_C( 15422), INT16_C( 20359), INT16_C( 20698), -INT16_C( 11005) } }, { { INT16_C( 564), INT16_C( 2800), INT16_C( 15758), -INT16_C( 25801), INT16_C( 15574), INT16_C( 11077), -INT16_C( 29714), INT16_C( 11356), -INT16_C( 6456), INT16_C( 20233), INT16_C( 11061), INT16_C( 3908), -INT16_C( 12933), INT16_C( 32328), -INT16_C( 861), -INT16_C( 10331) }, { -INT16_C( 27138), -INT16_C( 29215), INT16_C( 6355), -INT16_C( 22232), INT16_C( 27988), INT16_C( 17108), INT16_C( 12793), -INT16_C( 16017), INT16_C( 30743), INT16_C( 19472), INT16_C( 21923), INT16_C( 7772), -INT16_C( 23518), -INT16_C( 14948), INT16_C( 17056), -INT16_C( 24931) }, { INT16_C( 564), INT16_C( 15758), INT16_C( 15574), -INT16_C( 29714), -INT16_C( 27138), INT16_C( 6355), INT16_C( 27988), INT16_C( 12793), -INT16_C( 6456), INT16_C( 11061), -INT16_C( 12933), -INT16_C( 861), INT16_C( 30743), INT16_C( 21923), -INT16_C( 23518), INT16_C( 17056) } }, { { INT16_C( 32471), -INT16_C( 21973), INT16_C( 21655), -INT16_C( 5292), INT16_C( 10433), -INT16_C( 17874), -INT16_C( 25255), INT16_C( 28795), -INT16_C( 29675), -INT16_C( 17987), INT16_C( 6625), INT16_C( 983), INT16_C( 29885), INT16_C( 24009), INT16_C( 26294), -INT16_C( 29189) }, { INT16_C( 10212), INT16_C( 31544), -INT16_C( 29573), INT16_C( 15463), -INT16_C( 27212), INT16_C( 3831), INT16_C( 29234), INT16_C( 18302), INT16_C( 15358), -INT16_C( 8448), -INT16_C( 10156), INT16_C( 4579), -INT16_C( 21428), INT16_C( 622), INT16_C( 27154), -INT16_C( 2417) }, { INT16_C( 32471), INT16_C( 21655), INT16_C( 10433), -INT16_C( 25255), INT16_C( 10212), -INT16_C( 29573), -INT16_C( 27212), INT16_C( 29234), -INT16_C( 29675), INT16_C( 6625), INT16_C( 29885), INT16_C( 26294), INT16_C( 15358), -INT16_C( 10156), -INT16_C( 21428), INT16_C( 27154) } }, { { -INT16_C( 14447), INT16_C( 3186), -INT16_C( 9901), INT16_C( 2120), INT16_C( 16238), -INT16_C( 24554), -INT16_C( 27470), -INT16_C( 20249), -INT16_C( 5936), INT16_C( 9360), INT16_C( 29632), INT16_C( 3126), -INT16_C( 23521), INT16_C( 12558), -INT16_C( 25330), -INT16_C( 24793) }, { -INT16_C( 26267), -INT16_C( 18261), -INT16_C( 2958), -INT16_C( 8000), -INT16_C( 10701), -INT16_C( 6784), INT16_C( 26731), INT16_C( 15254), INT16_C( 9808), INT16_C( 4191), -INT16_C( 27239), -INT16_C( 18404), INT16_C( 10810), INT16_C( 18665), INT16_C( 4295), INT16_C( 11496) }, { -INT16_C( 14447), -INT16_C( 9901), INT16_C( 16238), -INT16_C( 27470), -INT16_C( 26267), -INT16_C( 2958), -INT16_C( 10701), INT16_C( 26731), -INT16_C( 5936), INT16_C( 29632), -INT16_C( 23521), -INT16_C( 25330), INT16_C( 9808), -INT16_C( 27239), INT16_C( 10810), INT16_C( 4295) } }, { { -INT16_C( 27734), INT16_C( 7397), -INT16_C( 23161), -INT16_C( 17411), INT16_C( 32124), -INT16_C( 6240), INT16_C( 14053), INT16_C( 13602), -INT16_C( 32420), -INT16_C( 2747), INT16_C( 24855), INT16_C( 20909), -INT16_C( 26997), INT16_C( 21401), -INT16_C( 32345), INT16_C( 20863) }, { INT16_C( 25621), -INT16_C( 25491), INT16_C( 27146), -INT16_C( 31145), -INT16_C( 1816), -INT16_C( 12947), -INT16_C( 28882), -INT16_C( 29949), INT16_C( 18448), INT16_C( 10112), INT16_C( 11946), INT16_C( 13688), INT16_C( 4804), INT16_C( 27528), INT16_C( 2195), -INT16_C( 22340) }, { -INT16_C( 27734), -INT16_C( 23161), INT16_C( 32124), INT16_C( 14053), INT16_C( 25621), INT16_C( 27146), -INT16_C( 1816), -INT16_C( 28882), -INT16_C( 32420), INT16_C( 24855), -INT16_C( 26997), -INT16_C( 32345), INT16_C( 18448), INT16_C( 11946), INT16_C( 4804), INT16_C( 2195) } }, { { INT16_C( 10860), INT16_C( 30277), -INT16_C( 25452), INT16_C( 31996), INT16_C( 27028), -INT16_C( 15542), INT16_C( 19960), INT16_C( 2382), -INT16_C( 12651), INT16_C( 16176), -INT16_C( 22020), -INT16_C( 16011), -INT16_C( 581), INT16_C( 20012), -INT16_C( 5883), INT16_C( 29431) }, { INT16_C( 15379), -INT16_C( 22552), -INT16_C( 6696), INT16_C( 27940), INT16_C( 28238), INT16_C( 18224), INT16_C( 32443), INT16_C( 20560), -INT16_C( 32692), INT16_C( 18832), INT16_C( 1321), -INT16_C( 7158), INT16_C( 13826), INT16_C( 2099), INT16_C( 10783), INT16_C( 12922) }, { INT16_C( 10860), -INT16_C( 25452), INT16_C( 27028), INT16_C( 19960), INT16_C( 15379), -INT16_C( 6696), INT16_C( 28238), INT16_C( 32443), -INT16_C( 12651), -INT16_C( 22020), -INT16_C( 581), -INT16_C( 5883), -INT16_C( 32692), INT16_C( 1321), INT16_C( 13826), INT16_C( 10783) } }, { { INT16_C( 25190), INT16_C( 16090), -INT16_C( 441), -INT16_C( 26965), -INT16_C( 9364), INT16_C( 10205), INT16_C( 11609), -INT16_C( 22921), INT16_C( 1965), -INT16_C( 10257), -INT16_C( 1780), INT16_C( 4027), -INT16_C( 4561), INT16_C( 20247), -INT16_C( 28392), INT16_C( 32385) }, { INT16_C( 23539), INT16_C( 15293), INT16_C( 26713), -INT16_C( 14895), -INT16_C( 20924), -INT16_C( 25108), INT16_C( 25819), -INT16_C( 30653), INT16_C( 12907), INT16_C( 30815), INT16_C( 6955), INT16_C( 23431), -INT16_C( 25079), INT16_C( 8874), INT16_C( 11055), INT16_C( 8864) }, { INT16_C( 25190), -INT16_C( 441), -INT16_C( 9364), INT16_C( 11609), INT16_C( 23539), INT16_C( 26713), -INT16_C( 20924), INT16_C( 25819), INT16_C( 1965), -INT16_C( 1780), -INT16_C( 4561), -INT16_C( 28392), INT16_C( 12907), INT16_C( 6955), -INT16_C( 25079), INT16_C( 11055) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi16(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi16(test_vec[i].b); simde__m256i r = simde_x_mm256_deinterleaveeven_epi16(a, b); simde_test_x86_assert_equal_i16x16(r, simde_x_mm256_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_x_mm256_deinterleaveodd_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[16]; const int16_t b[16]; const int16_t r[16]; } test_vec[] = { { { INT16_C( 15247), -INT16_C( 26208), -INT16_C( 25489), INT16_C( 16456), -INT16_C( 16003), INT16_C( 13526), INT16_C( 13559), -INT16_C( 25115), -INT16_C( 9262), INT16_C( 3203), -INT16_C( 31835), INT16_C( 158), INT16_C( 13038), -INT16_C( 14959), -INT16_C( 10715), -INT16_C( 19425) }, { -INT16_C( 16623), -INT16_C( 32435), -INT16_C( 27301), -INT16_C( 10047), -INT16_C( 26538), INT16_C( 19725), -INT16_C( 3380), -INT16_C( 24854), INT16_C( 28110), INT16_C( 29610), INT16_C( 18929), -INT16_C( 8333), INT16_C( 1147), -INT16_C( 24412), -INT16_C( 15398), -INT16_C( 5292) }, { -INT16_C( 26208), INT16_C( 16456), INT16_C( 13526), -INT16_C( 25115), -INT16_C( 32435), -INT16_C( 10047), INT16_C( 19725), -INT16_C( 24854), INT16_C( 3203), INT16_C( 158), -INT16_C( 14959), -INT16_C( 19425), INT16_C( 29610), -INT16_C( 8333), -INT16_C( 24412), -INT16_C( 5292) } }, { { -INT16_C( 24190), -INT16_C( 8852), INT16_C( 11830), -INT16_C( 29258), -INT16_C( 15418), -INT16_C( 27942), -INT16_C( 14923), -INT16_C( 31952), -INT16_C( 9678), INT16_C( 9206), INT16_C( 26915), -INT16_C( 24829), -INT16_C( 22675), INT16_C( 18239), -INT16_C( 27541), -INT16_C( 4813) }, { -INT16_C( 24779), INT16_C( 27851), -INT16_C( 32307), -INT16_C( 27655), -INT16_C( 11452), -INT16_C( 1755), INT16_C( 21912), -INT16_C( 13443), INT16_C( 29488), INT16_C( 21486), -INT16_C( 3619), INT16_C( 19186), INT16_C( 12953), INT16_C( 1170), -INT16_C( 14906), -INT16_C( 1039) }, { -INT16_C( 8852), -INT16_C( 29258), -INT16_C( 27942), -INT16_C( 31952), INT16_C( 27851), -INT16_C( 27655), -INT16_C( 1755), -INT16_C( 13443), INT16_C( 9206), -INT16_C( 24829), INT16_C( 18239), -INT16_C( 4813), INT16_C( 21486), INT16_C( 19186), INT16_C( 1170), -INT16_C( 1039) } }, { { -INT16_C( 17308), INT16_C( 12903), INT16_C( 24637), -INT16_C( 32315), -INT16_C( 5324), -INT16_C( 13189), -INT16_C( 1984), INT16_C( 28823), -INT16_C( 31125), INT16_C( 18628), -INT16_C( 18825), INT16_C( 4243), INT16_C( 9704), -INT16_C( 20972), INT16_C( 1770), INT16_C( 20138) }, { INT16_C( 4546), INT16_C( 128), INT16_C( 18034), -INT16_C( 22911), -INT16_C( 975), INT16_C( 29042), INT16_C( 2804), INT16_C( 24802), -INT16_C( 22896), INT16_C( 1960), INT16_C( 15196), INT16_C( 17688), INT16_C( 11360), INT16_C( 19187), -INT16_C( 25294), -INT16_C( 2663) }, { INT16_C( 12903), -INT16_C( 32315), -INT16_C( 13189), INT16_C( 28823), INT16_C( 128), -INT16_C( 22911), INT16_C( 29042), INT16_C( 24802), INT16_C( 18628), INT16_C( 4243), -INT16_C( 20972), INT16_C( 20138), INT16_C( 1960), INT16_C( 17688), INT16_C( 19187), -INT16_C( 2663) } }, { { INT16_C( 6575), INT16_C( 8693), INT16_C( 30303), -INT16_C( 28473), INT16_C( 14707), INT16_C( 26370), -INT16_C( 7101), -INT16_C( 11321), INT16_C( 28810), -INT16_C( 6437), -INT16_C( 3157), INT16_C( 3115), INT16_C( 7967), INT16_C( 21078), -INT16_C( 4164), INT16_C( 27463) }, { INT16_C( 15369), INT16_C( 26764), INT16_C( 21426), INT16_C( 9721), -INT16_C( 1139), -INT16_C( 12147), INT16_C( 21727), INT16_C( 27044), INT16_C( 32708), INT16_C( 28751), INT16_C( 31602), -INT16_C( 28292), -INT16_C( 11622), INT16_C( 22243), INT16_C( 10946), -INT16_C( 13374) }, { INT16_C( 8693), -INT16_C( 28473), INT16_C( 26370), -INT16_C( 11321), INT16_C( 26764), INT16_C( 9721), -INT16_C( 12147), INT16_C( 27044), -INT16_C( 6437), INT16_C( 3115), INT16_C( 21078), INT16_C( 27463), INT16_C( 28751), -INT16_C( 28292), INT16_C( 22243), -INT16_C( 13374) } }, { { INT16_C( 20070), INT16_C( 6451), INT16_C( 11426), INT16_C( 12094), -INT16_C( 13529), INT16_C( 1791), -INT16_C( 23776), -INT16_C( 7057), -INT16_C( 16606), -INT16_C( 27564), -INT16_C( 12230), -INT16_C( 11226), INT16_C( 2467), INT16_C( 25898), -INT16_C( 5068), -INT16_C( 26064) }, { INT16_C( 25403), -INT16_C( 8781), -INT16_C( 3440), -INT16_C( 18676), INT16_C( 3005), -INT16_C( 8770), INT16_C( 11695), -INT16_C( 11838), INT16_C( 5868), INT16_C( 9830), -INT16_C( 29465), -INT16_C( 29958), INT16_C( 9621), -INT16_C( 13841), INT16_C( 7953), INT16_C( 19556) }, { INT16_C( 6451), INT16_C( 12094), INT16_C( 1791), -INT16_C( 7057), -INT16_C( 8781), -INT16_C( 18676), -INT16_C( 8770), -INT16_C( 11838), -INT16_C( 27564), -INT16_C( 11226), INT16_C( 25898), -INT16_C( 26064), INT16_C( 9830), -INT16_C( 29958), -INT16_C( 13841), INT16_C( 19556) } }, { { INT16_C( 6018), INT16_C( 4649), INT16_C( 13577), -INT16_C( 14390), -INT16_C( 30655), -INT16_C( 3932), INT16_C( 26293), -INT16_C( 23871), INT16_C( 10109), INT16_C( 25800), -INT16_C( 15437), INT16_C( 18926), -INT16_C( 8728), -INT16_C( 1774), INT16_C( 30460), INT16_C( 32326) }, { INT16_C( 28558), -INT16_C( 26735), INT16_C( 23461), -INT16_C( 6562), INT16_C( 995), -INT16_C( 26410), -INT16_C( 26775), -INT16_C( 6598), INT16_C( 959), INT16_C( 29258), INT16_C( 14534), -INT16_C( 20805), -INT16_C( 12779), INT16_C( 4519), -INT16_C( 4796), -INT16_C( 11632) }, { INT16_C( 4649), -INT16_C( 14390), -INT16_C( 3932), -INT16_C( 23871), -INT16_C( 26735), -INT16_C( 6562), -INT16_C( 26410), -INT16_C( 6598), INT16_C( 25800), INT16_C( 18926), -INT16_C( 1774), INT16_C( 32326), INT16_C( 29258), -INT16_C( 20805), INT16_C( 4519), -INT16_C( 11632) } }, { { INT16_C( 8541), INT16_C( 618), -INT16_C( 14212), INT16_C( 24552), -INT16_C( 16693), INT16_C( 13815), INT16_C( 12885), INT16_C( 5147), INT16_C( 26165), -INT16_C( 1145), INT16_C( 17054), -INT16_C( 19287), INT16_C( 20496), INT16_C( 21957), INT16_C( 21822), -INT16_C( 25817) }, { -INT16_C( 28298), -INT16_C( 3427), -INT16_C( 31398), INT16_C( 9553), INT16_C( 18755), -INT16_C( 26534), INT16_C( 30331), -INT16_C( 20307), INT16_C( 13532), INT16_C( 31403), INT16_C( 21622), -INT16_C( 30930), -INT16_C( 2908), -INT16_C( 7460), INT16_C( 841), -INT16_C( 16259) }, { INT16_C( 618), INT16_C( 24552), INT16_C( 13815), INT16_C( 5147), -INT16_C( 3427), INT16_C( 9553), -INT16_C( 26534), -INT16_C( 20307), -INT16_C( 1145), -INT16_C( 19287), INT16_C( 21957), -INT16_C( 25817), INT16_C( 31403), -INT16_C( 30930), -INT16_C( 7460), -INT16_C( 16259) } }, { { INT16_C( 6805), -INT16_C( 4174), INT16_C( 1183), -INT16_C( 7660), INT16_C( 28493), -INT16_C( 14213), INT16_C( 10469), -INT16_C( 16008), INT16_C( 9052), -INT16_C( 11717), INT16_C( 27255), INT16_C( 7001), INT16_C( 13662), -INT16_C( 22530), INT16_C( 31545), -INT16_C( 12697) }, { INT16_C( 6806), INT16_C( 13757), -INT16_C( 12002), INT16_C( 27416), -INT16_C( 27840), INT16_C( 9523), -INT16_C( 21573), INT16_C( 6118), INT16_C( 8910), INT16_C( 17897), INT16_C( 17292), -INT16_C( 5536), INT16_C( 24184), -INT16_C( 20079), -INT16_C( 1574), INT16_C( 28799) }, { -INT16_C( 4174), -INT16_C( 7660), -INT16_C( 14213), -INT16_C( 16008), INT16_C( 13757), INT16_C( 27416), INT16_C( 9523), INT16_C( 6118), -INT16_C( 11717), INT16_C( 7001), -INT16_C( 22530), -INT16_C( 12697), INT16_C( 17897), -INT16_C( 5536), -INT16_C( 20079), INT16_C( 28799) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi16(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi16(test_vec[i].b); simde__m256i r = simde_x_mm256_deinterleaveodd_epi16(a, b); simde_test_x86_assert_equal_i16x16(r, simde_x_mm256_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_x_mm256_deinterleaveeven_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[8]; const int32_t b[8]; const int32_t r[8]; } test_vec[] = { { { INT32_C( 504841746), -INT32_C( 184668304), INT32_C( 2075750092), -INT32_C( 1750132198), -INT32_C( 1448517056), -INT32_C( 409880474), -INT32_C( 1199547026), INT32_C( 732426777) }, { -INT32_C( 1454784967), -INT32_C( 1164097298), -INT32_C( 785033546), -INT32_C( 1318525839), -INT32_C( 1386606010), INT32_C( 949283786), INT32_C( 1643124040), INT32_C( 1082955783) }, { INT32_C( 504841746), INT32_C( 2075750092), -INT32_C( 1454784967), -INT32_C( 785033546), -INT32_C( 1448517056), -INT32_C( 1199547026), -INT32_C( 1386606010), INT32_C( 1643124040) } }, { { INT32_C( 1156240982), -INT32_C( 721516770), INT32_C( 1336226782), INT32_C( 1577061911), -INT32_C( 368354784), -INT32_C( 1927045307), -INT32_C( 1125182540), INT32_C( 50101163) }, { INT32_C( 1866917457), INT32_C( 1279542638), -INT32_C( 1868830344), INT32_C( 401513463), INT32_C( 973273589), INT32_C( 1304896920), -INT32_C( 469125576), -INT32_C( 2082077390) }, { INT32_C( 1156240982), INT32_C( 1336226782), INT32_C( 1866917457), -INT32_C( 1868830344), -INT32_C( 368354784), -INT32_C( 1125182540), INT32_C( 973273589), -INT32_C( 469125576) } }, { { INT32_C( 1525886188), -INT32_C( 358205583), INT32_C( 410665248), -INT32_C( 785422116), -INT32_C( 116706975), -INT32_C( 1891184042), -INT32_C( 1150070903), INT32_C( 1094605141) }, { -INT32_C( 140824187), -INT32_C( 1981726360), INT32_C( 1587633026), INT32_C( 607113411), INT32_C( 1478310402), -INT32_C( 1779997684), INT32_C( 139483827), INT32_C( 961122227) }, { INT32_C( 1525886188), INT32_C( 410665248), -INT32_C( 140824187), INT32_C( 1587633026), -INT32_C( 116706975), -INT32_C( 1150070903), INT32_C( 1478310402), INT32_C( 139483827) } }, { { INT32_C( 691070144), -INT32_C( 1481502427), INT32_C( 788878188), INT32_C( 626209827), INT32_C( 2071883886), -INT32_C( 2012191276), INT32_C( 1938842048), -INT32_C( 1330849040) }, { -INT32_C( 455484226), INT32_C( 1502317549), INT32_C( 42504670), INT32_C( 875027397), INT32_C( 548382028), -INT32_C( 878133493), INT32_C( 272513312), -INT32_C( 775820525) }, { INT32_C( 691070144), INT32_C( 788878188), -INT32_C( 455484226), INT32_C( 42504670), INT32_C( 2071883886), INT32_C( 1938842048), INT32_C( 548382028), INT32_C( 272513312) } }, { { -INT32_C( 1246389561), INT32_C( 68043046), -INT32_C( 1761175598), -INT32_C( 1093980558), -INT32_C( 555779373), INT32_C( 1521059642), -INT32_C( 747902784), -INT32_C( 1700451117) }, { -INT32_C( 330343738), INT32_C( 1844534939), INT32_C( 1728444405), -INT32_C( 114896859), -INT32_C( 2049505973), INT32_C( 1306493324), INT32_C( 1008749161), INT32_C( 1037485430) }, { -INT32_C( 1246389561), -INT32_C( 1761175598), -INT32_C( 330343738), INT32_C( 1728444405), -INT32_C( 555779373), -INT32_C( 747902784), -INT32_C( 2049505973), INT32_C( 1008749161) } }, { { -INT32_C( 1154931168), INT32_C( 2032736900), INT32_C( 937438738), INT32_C( 1227884286), -INT32_C( 1731327989), -INT32_C( 219828599), INT32_C( 1865287160), -INT32_C( 341048117) }, { -INT32_C( 1364798166), INT32_C( 36163568), -INT32_C( 63371011), INT32_C( 440756750), -INT32_C( 72215438), -INT32_C( 1158834238), INT32_C( 1730747292), INT32_C( 1246942495) }, { -INT32_C( 1154931168), INT32_C( 937438738), -INT32_C( 1364798166), -INT32_C( 63371011), -INT32_C( 1731327989), INT32_C( 1865287160), -INT32_C( 72215438), INT32_C( 1730747292) } }, { { -INT32_C( 1678181973), -INT32_C( 962780984), INT32_C( 918738472), -INT32_C( 1303378112), -INT32_C( 575864293), INT32_C( 915970713), -INT32_C( 711081547), INT32_C( 1109389463) }, { -INT32_C( 1310910487), INT32_C( 1601665591), -INT32_C( 1852425904), INT32_C( 1547953729), -INT32_C( 2110066199), INT32_C( 1102631563), INT32_C( 706107027), INT32_C( 795620678) }, { -INT32_C( 1678181973), INT32_C( 918738472), -INT32_C( 1310910487), -INT32_C( 1852425904), -INT32_C( 575864293), -INT32_C( 711081547), -INT32_C( 2110066199), INT32_C( 706107027) } }, { { -INT32_C( 2065675956), INT32_C( 350443715), -INT32_C( 760907375), INT32_C( 1227876448), INT32_C( 1707829721), -INT32_C( 827947973), INT32_C( 553237722), INT32_C( 1028613617) }, { INT32_C( 1925263279), INT32_C( 428254599), INT32_C( 2129341214), -INT32_C( 305718764), -INT32_C( 1101884541), -INT32_C( 242354153), -INT32_C( 1525578060), -INT32_C( 1696374549) }, { -INT32_C( 2065675956), -INT32_C( 760907375), INT32_C( 1925263279), INT32_C( 2129341214), INT32_C( 1707829721), INT32_C( 553237722), -INT32_C( 1101884541), -INT32_C( 1525578060) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi32(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi32(test_vec[i].b); simde__m256i r = simde_x_mm256_deinterleaveeven_epi32(a, b); simde_test_x86_assert_equal_i32x8(r, simde_x_mm256_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_x_mm256_deinterleaveodd_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[8]; const int32_t b[8]; const int32_t r[8]; } test_vec[] = { { { INT32_C( 1074233397), INT32_C( 226594984), -INT32_C( 1186849467), INT32_C( 1628384119), -INT32_C( 330208028), INT32_C( 1002695508), INT32_C( 1501784430), -INT32_C( 932890733) }, { INT32_C( 1611230648), INT32_C( 1114540797), INT32_C( 536588456), -INT32_C( 1082061861), -INT32_C( 877931913), INT32_C( 738619069), INT32_C( 1803913944), INT32_C( 2033511361) }, { INT32_C( 226594984), INT32_C( 1628384119), INT32_C( 1114540797), -INT32_C( 1082061861), INT32_C( 1002695508), -INT32_C( 932890733), INT32_C( 738619069), INT32_C( 2033511361) } }, { { INT32_C( 1440365912), INT32_C( 1872251079), -INT32_C( 745565192), INT32_C( 378671262), -INT32_C( 1595851294), -INT32_C( 2066945876), INT32_C( 871321970), -INT32_C( 1800592580) }, { INT32_C( 686458720), -INT32_C( 946371889), -INT32_C( 1264966123), INT32_C( 432679990), INT32_C( 364489577), INT32_C( 93947283), INT32_C( 322537942), INT32_C( 229107372) }, { INT32_C( 1872251079), INT32_C( 378671262), -INT32_C( 946371889), INT32_C( 432679990), -INT32_C( 2066945876), -INT32_C( 1800592580), INT32_C( 93947283), INT32_C( 229107372) } }, { { INT32_C( 1010143597), INT32_C( 688114707), INT32_C( 702389747), INT32_C( 843229129), -INT32_C( 431424686), INT32_C( 1475076480), INT32_C( 392832106), INT32_C( 1998852362) }, { -INT32_C( 1229760093), INT32_C( 417314341), INT32_C( 474135635), -INT32_C( 1236302749), INT32_C( 10262400), -INT32_C( 480802952), -INT32_C( 1225080404), INT32_C( 1982734035) }, { INT32_C( 688114707), INT32_C( 843229129), INT32_C( 417314341), -INT32_C( 1236302749), INT32_C( 1475076480), INT32_C( 1998852362), -INT32_C( 480802952), INT32_C( 1982734035) } }, { { -INT32_C( 1674780297), -INT32_C( 340456296), INT32_C( 738785224), -INT32_C( 69052549), INT32_C( 1727823598), -INT32_C( 1320594683), -INT32_C( 395820267), -INT32_C( 664889759) }, { INT32_C( 259361399), INT32_C( 1610295958), -INT32_C( 1668611295), INT32_C( 1217949018), -INT32_C( 256994069), -INT32_C( 56428313), -INT32_C( 1645999557), INT32_C( 393560736) }, { -INT32_C( 340456296), -INT32_C( 69052549), INT32_C( 1610295958), INT32_C( 1217949018), -INT32_C( 1320594683), -INT32_C( 664889759), -INT32_C( 56428313), INT32_C( 393560736) } }, { { INT32_C( 1663560397), INT32_C( 901915156), INT32_C( 2144488741), -INT32_C( 1513657670), -INT32_C( 443124226), -INT32_C( 1444792211), -INT32_C( 498678206), -INT32_C( 688276727) }, { -INT32_C( 1170661210), INT32_C( 1743846466), INT32_C( 82231881), INT32_C( 715762988), -INT32_C( 1878048989), -INT32_C( 1187384969), -INT32_C( 1046773832), -INT32_C( 510159557) }, { INT32_C( 901915156), -INT32_C( 1513657670), INT32_C( 1743846466), INT32_C( 715762988), -INT32_C( 1444792211), -INT32_C( 688276727), -INT32_C( 1187384969), -INT32_C( 510159557) } }, { { -INT32_C( 124006219), INT32_C( 375360460), INT32_C( 2031765069), INT32_C( 379831283), INT32_C( 2057810691), INT32_C( 1546969252), -INT32_C( 1692545184), INT32_C( 444380260) }, { INT32_C( 1360140165), -INT32_C( 261656157), -INT32_C( 1419083337), INT32_C( 1220611397), INT32_C( 1707239616), -INT32_C( 1446906295), INT32_C( 709156806), INT32_C( 407158931) }, { INT32_C( 375360460), INT32_C( 379831283), -INT32_C( 261656157), INT32_C( 1220611397), INT32_C( 1546969252), INT32_C( 444380260), -INT32_C( 1446906295), INT32_C( 407158931) } }, { { INT32_C( 2053789399), INT32_C( 2137772488), -INT32_C( 1742023341), -INT32_C( 1545540382), -INT32_C( 1660378540), INT32_C( 1598474649), INT32_C( 1015646888), INT32_C( 575983178) }, { -INT32_C( 325272028), -INT32_C( 479459440), -INT32_C( 1082419492), -INT32_C( 681419902), -INT32_C( 1770755331), -INT32_C( 587875533), -INT32_C( 1877442747), INT32_C( 1907518541) }, { INT32_C( 2137772488), -INT32_C( 1545540382), -INT32_C( 479459440), -INT32_C( 681419902), INT32_C( 1598474649), INT32_C( 575983178), -INT32_C( 587875533), INT32_C( 1907518541) } }, { { -INT32_C( 1151447509), INT32_C( 849267286), -INT32_C( 470738592), INT32_C( 1908036468), -INT32_C( 251121987), INT32_C( 802029033), -INT32_C( 910170756), INT32_C( 2084270417) }, { INT32_C( 355965375), -INT32_C( 1018636957), INT32_C( 1655060974), INT32_C( 1255432333), INT32_C( 2017189007), INT32_C( 1453787353), INT32_C( 1059022573), -INT32_C( 1749329193) }, { INT32_C( 849267286), INT32_C( 1908036468), -INT32_C( 1018636957), INT32_C( 1255432333), INT32_C( 802029033), INT32_C( 2084270417), INT32_C( 1453787353), -INT32_C( 1749329193) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi32(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi32(test_vec[i].b); simde__m256i r = simde_x_mm256_deinterleaveodd_epi32(a, b); simde_test_x86_assert_equal_i32x8(r, simde_x_mm256_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_x_mm256_deinterleaveeven_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 b[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( -685.40), SIMDE_FLOAT32_C( -82.27), SIMDE_FLOAT32_C( -652.43), SIMDE_FLOAT32_C( 401.52), SIMDE_FLOAT32_C( 925.39), SIMDE_FLOAT32_C( -588.47), SIMDE_FLOAT32_C( -408.21), SIMDE_FLOAT32_C( 201.70) }, { SIMDE_FLOAT32_C( -2.27), SIMDE_FLOAT32_C( -451.03), SIMDE_FLOAT32_C( 989.34), SIMDE_FLOAT32_C( -840.15), SIMDE_FLOAT32_C( 879.25), SIMDE_FLOAT32_C( -69.37), SIMDE_FLOAT32_C( 209.43), SIMDE_FLOAT32_C( -340.80) }, { SIMDE_FLOAT32_C( -685.40), SIMDE_FLOAT32_C( -652.43), SIMDE_FLOAT32_C( -2.27), SIMDE_FLOAT32_C( 989.34), SIMDE_FLOAT32_C( 925.39), SIMDE_FLOAT32_C( -408.21), SIMDE_FLOAT32_C( 879.25), SIMDE_FLOAT32_C( 209.43) } }, { { SIMDE_FLOAT32_C( -774.02), SIMDE_FLOAT32_C( 172.30), SIMDE_FLOAT32_C( -637.75), SIMDE_FLOAT32_C( 965.23), SIMDE_FLOAT32_C( 366.44), SIMDE_FLOAT32_C( -40.30), SIMDE_FLOAT32_C( 124.64), SIMDE_FLOAT32_C( -125.21) }, { SIMDE_FLOAT32_C( 935.91), SIMDE_FLOAT32_C( 798.97), SIMDE_FLOAT32_C( -165.02), SIMDE_FLOAT32_C( 726.84), SIMDE_FLOAT32_C( 834.23), SIMDE_FLOAT32_C( 584.97), SIMDE_FLOAT32_C( 734.98), SIMDE_FLOAT32_C( -851.17) }, { SIMDE_FLOAT32_C( -774.02), SIMDE_FLOAT32_C( -637.75), SIMDE_FLOAT32_C( 935.91), SIMDE_FLOAT32_C( -165.02), SIMDE_FLOAT32_C( 366.44), SIMDE_FLOAT32_C( 124.64), SIMDE_FLOAT32_C( 834.23), SIMDE_FLOAT32_C( 734.98) } }, { { SIMDE_FLOAT32_C( -497.30), SIMDE_FLOAT32_C( -917.45), SIMDE_FLOAT32_C( 550.35), SIMDE_FLOAT32_C( -571.91), SIMDE_FLOAT32_C( -505.92), SIMDE_FLOAT32_C( -857.86), SIMDE_FLOAT32_C( 629.79), SIMDE_FLOAT32_C( 491.80) }, { SIMDE_FLOAT32_C( -308.89), SIMDE_FLOAT32_C( 619.14), SIMDE_FLOAT32_C( 651.65), SIMDE_FLOAT32_C( -429.64), SIMDE_FLOAT32_C( -450.24), SIMDE_FLOAT32_C( -138.92), SIMDE_FLOAT32_C( 229.56), SIMDE_FLOAT32_C( -224.25) }, { SIMDE_FLOAT32_C( -497.30), SIMDE_FLOAT32_C( 550.35), SIMDE_FLOAT32_C( -308.89), SIMDE_FLOAT32_C( 651.65), SIMDE_FLOAT32_C( -505.92), SIMDE_FLOAT32_C( 629.79), SIMDE_FLOAT32_C( -450.24), SIMDE_FLOAT32_C( 229.56) } }, { { SIMDE_FLOAT32_C( -966.62), SIMDE_FLOAT32_C( 591.82), SIMDE_FLOAT32_C( -259.02), SIMDE_FLOAT32_C( 399.82), SIMDE_FLOAT32_C( -448.49), SIMDE_FLOAT32_C( 865.62), SIMDE_FLOAT32_C( -725.39), SIMDE_FLOAT32_C( -512.58) }, { SIMDE_FLOAT32_C( 664.59), SIMDE_FLOAT32_C( 109.59), SIMDE_FLOAT32_C( -785.74), SIMDE_FLOAT32_C( 498.82), SIMDE_FLOAT32_C( -305.44), SIMDE_FLOAT32_C( 949.24), SIMDE_FLOAT32_C( 647.65), SIMDE_FLOAT32_C( 197.26) }, { SIMDE_FLOAT32_C( -966.62), SIMDE_FLOAT32_C( -259.02), SIMDE_FLOAT32_C( 664.59), SIMDE_FLOAT32_C( -785.74), SIMDE_FLOAT32_C( -448.49), SIMDE_FLOAT32_C( -725.39), SIMDE_FLOAT32_C( -305.44), SIMDE_FLOAT32_C( 647.65) } }, { { SIMDE_FLOAT32_C( -968.21), SIMDE_FLOAT32_C( 198.01), SIMDE_FLOAT32_C( 625.35), SIMDE_FLOAT32_C( -474.13), SIMDE_FLOAT32_C( 340.14), SIMDE_FLOAT32_C( 255.15), SIMDE_FLOAT32_C( -982.32), SIMDE_FLOAT32_C( -968.75) }, { SIMDE_FLOAT32_C( -125.72), SIMDE_FLOAT32_C( 669.33), SIMDE_FLOAT32_C( -398.39), SIMDE_FLOAT32_C( 424.05), SIMDE_FLOAT32_C( -469.59), SIMDE_FLOAT32_C( 831.17), SIMDE_FLOAT32_C( -800.21), SIMDE_FLOAT32_C( -436.21) }, { SIMDE_FLOAT32_C( -968.21), SIMDE_FLOAT32_C( 625.35), SIMDE_FLOAT32_C( -125.72), SIMDE_FLOAT32_C( -398.39), SIMDE_FLOAT32_C( 340.14), SIMDE_FLOAT32_C( -982.32), SIMDE_FLOAT32_C( -469.59), SIMDE_FLOAT32_C( -800.21) } }, { { SIMDE_FLOAT32_C( 422.99), SIMDE_FLOAT32_C( -59.23), SIMDE_FLOAT32_C( 963.60), SIMDE_FLOAT32_C( 974.50), SIMDE_FLOAT32_C( -193.61), SIMDE_FLOAT32_C( -761.79), SIMDE_FLOAT32_C( -538.08), SIMDE_FLOAT32_C( -529.01) }, { SIMDE_FLOAT32_C( 347.80), SIMDE_FLOAT32_C( -323.81), SIMDE_FLOAT32_C( 969.81), SIMDE_FLOAT32_C( -957.64), SIMDE_FLOAT32_C( -374.57), SIMDE_FLOAT32_C( 617.46), SIMDE_FLOAT32_C( 239.62), SIMDE_FLOAT32_C( -342.78) }, { SIMDE_FLOAT32_C( 422.99), SIMDE_FLOAT32_C( 963.60), SIMDE_FLOAT32_C( 347.80), SIMDE_FLOAT32_C( 969.81), SIMDE_FLOAT32_C( -193.61), SIMDE_FLOAT32_C( -538.08), SIMDE_FLOAT32_C( -374.57), SIMDE_FLOAT32_C( 239.62) } }, { { SIMDE_FLOAT32_C( -184.53), SIMDE_FLOAT32_C( -135.02), SIMDE_FLOAT32_C( 183.10), SIMDE_FLOAT32_C( -844.38), SIMDE_FLOAT32_C( -879.88), SIMDE_FLOAT32_C( 200.77), SIMDE_FLOAT32_C( -813.13), SIMDE_FLOAT32_C( -5.59) }, { SIMDE_FLOAT32_C( -129.90), SIMDE_FLOAT32_C( -211.52), SIMDE_FLOAT32_C( -581.55), SIMDE_FLOAT32_C( 400.51), SIMDE_FLOAT32_C( -380.35), SIMDE_FLOAT32_C( -381.75), SIMDE_FLOAT32_C( 964.30), SIMDE_FLOAT32_C( -957.36) }, { SIMDE_FLOAT32_C( -184.53), SIMDE_FLOAT32_C( 183.10), SIMDE_FLOAT32_C( -129.90), SIMDE_FLOAT32_C( -581.55), SIMDE_FLOAT32_C( -879.88), SIMDE_FLOAT32_C( -813.13), SIMDE_FLOAT32_C( -380.35), SIMDE_FLOAT32_C( 964.30) } }, { { SIMDE_FLOAT32_C( 559.02), SIMDE_FLOAT32_C( 927.90), SIMDE_FLOAT32_C( -982.86), SIMDE_FLOAT32_C( -634.59), SIMDE_FLOAT32_C( -833.88), SIMDE_FLOAT32_C( -520.93), SIMDE_FLOAT32_C( -163.60), SIMDE_FLOAT32_C( 513.91) }, { SIMDE_FLOAT32_C( 155.25), SIMDE_FLOAT32_C( -193.79), SIMDE_FLOAT32_C( 556.27), SIMDE_FLOAT32_C( 780.68), SIMDE_FLOAT32_C( -576.33), SIMDE_FLOAT32_C( -204.10), SIMDE_FLOAT32_C( -562.10), SIMDE_FLOAT32_C( 239.14) }, { SIMDE_FLOAT32_C( 559.02), SIMDE_FLOAT32_C( -982.86), SIMDE_FLOAT32_C( 155.25), SIMDE_FLOAT32_C( 556.27), SIMDE_FLOAT32_C( -833.88), SIMDE_FLOAT32_C( -163.60), SIMDE_FLOAT32_C( -576.33), SIMDE_FLOAT32_C( -562.10) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 b = simde_mm256_loadu_ps(test_vec[i].b); simde__m256 r = simde_x_mm256_deinterleaveeven_ps(a, b); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_x_mm256_deinterleaveodd_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 b[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 646.61), SIMDE_FLOAT32_C( 356.61), SIMDE_FLOAT32_C( 18.50), SIMDE_FLOAT32_C( 854.67), SIMDE_FLOAT32_C( 135.33), SIMDE_FLOAT32_C( 893.27), SIMDE_FLOAT32_C( -572.24), SIMDE_FLOAT32_C( -566.66) }, { SIMDE_FLOAT32_C( 44.03), SIMDE_FLOAT32_C( 606.85), SIMDE_FLOAT32_C( 868.96), SIMDE_FLOAT32_C( -267.27), SIMDE_FLOAT32_C( 902.13), SIMDE_FLOAT32_C( -607.89), SIMDE_FLOAT32_C( -775.37), SIMDE_FLOAT32_C( 704.62) }, { SIMDE_FLOAT32_C( 356.61), SIMDE_FLOAT32_C( 854.67), SIMDE_FLOAT32_C( 606.85), SIMDE_FLOAT32_C( -267.27), SIMDE_FLOAT32_C( 893.27), SIMDE_FLOAT32_C( -566.66), SIMDE_FLOAT32_C( -607.89), SIMDE_FLOAT32_C( 704.62) } }, { { SIMDE_FLOAT32_C( -749.05), SIMDE_FLOAT32_C( 899.25), SIMDE_FLOAT32_C( -160.48), SIMDE_FLOAT32_C( -536.81), SIMDE_FLOAT32_C( -788.17), SIMDE_FLOAT32_C( 841.08), SIMDE_FLOAT32_C( -487.56), SIMDE_FLOAT32_C( 7.91) }, { SIMDE_FLOAT32_C( -727.94), SIMDE_FLOAT32_C( -117.44), SIMDE_FLOAT32_C( 410.25), SIMDE_FLOAT32_C( 324.46), SIMDE_FLOAT32_C( 559.11), SIMDE_FLOAT32_C( 628.63), SIMDE_FLOAT32_C( -801.85), SIMDE_FLOAT32_C( 205.72) }, { SIMDE_FLOAT32_C( 899.25), SIMDE_FLOAT32_C( -536.81), SIMDE_FLOAT32_C( -117.44), SIMDE_FLOAT32_C( 324.46), SIMDE_FLOAT32_C( 841.08), SIMDE_FLOAT32_C( 7.91), SIMDE_FLOAT32_C( 628.63), SIMDE_FLOAT32_C( 205.72) } }, { { SIMDE_FLOAT32_C( -14.76), SIMDE_FLOAT32_C( 216.65), SIMDE_FLOAT32_C( 60.39), SIMDE_FLOAT32_C( -879.43), SIMDE_FLOAT32_C( 109.91), SIMDE_FLOAT32_C( 488.15), SIMDE_FLOAT32_C( -446.09), SIMDE_FLOAT32_C( -846.06) }, { SIMDE_FLOAT32_C( 95.01), SIMDE_FLOAT32_C( -577.14), SIMDE_FLOAT32_C( -113.32), SIMDE_FLOAT32_C( -2.87), SIMDE_FLOAT32_C( -185.03), SIMDE_FLOAT32_C( 111.30), SIMDE_FLOAT32_C( -298.24), SIMDE_FLOAT32_C( 65.92) }, { SIMDE_FLOAT32_C( 216.65), SIMDE_FLOAT32_C( -879.43), SIMDE_FLOAT32_C( -577.14), SIMDE_FLOAT32_C( -2.87), SIMDE_FLOAT32_C( 488.15), SIMDE_FLOAT32_C( -846.06), SIMDE_FLOAT32_C( 111.30), SIMDE_FLOAT32_C( 65.92) } }, { { SIMDE_FLOAT32_C( 10.55), SIMDE_FLOAT32_C( 541.28), SIMDE_FLOAT32_C( 529.11), SIMDE_FLOAT32_C( 222.38), SIMDE_FLOAT32_C( 382.36), SIMDE_FLOAT32_C( -958.44), SIMDE_FLOAT32_C( -769.71), SIMDE_FLOAT32_C( 654.42) }, { SIMDE_FLOAT32_C( -75.88), SIMDE_FLOAT32_C( 640.54), SIMDE_FLOAT32_C( -21.12), SIMDE_FLOAT32_C( -516.77), SIMDE_FLOAT32_C( 269.17), SIMDE_FLOAT32_C( 177.02), SIMDE_FLOAT32_C( 688.96), SIMDE_FLOAT32_C( -745.60) }, { SIMDE_FLOAT32_C( 541.28), SIMDE_FLOAT32_C( 222.38), SIMDE_FLOAT32_C( 640.54), SIMDE_FLOAT32_C( -516.77), SIMDE_FLOAT32_C( -958.44), SIMDE_FLOAT32_C( 654.42), SIMDE_FLOAT32_C( 177.02), SIMDE_FLOAT32_C( -745.60) } }, { { SIMDE_FLOAT32_C( -606.33), SIMDE_FLOAT32_C( -250.65), SIMDE_FLOAT32_C( -625.03), SIMDE_FLOAT32_C( 503.58), SIMDE_FLOAT32_C( -762.50), SIMDE_FLOAT32_C( -71.12), SIMDE_FLOAT32_C( 657.53), SIMDE_FLOAT32_C( 332.51) }, { SIMDE_FLOAT32_C( 351.74), SIMDE_FLOAT32_C( -455.80), SIMDE_FLOAT32_C( -670.36), SIMDE_FLOAT32_C( -833.29), SIMDE_FLOAT32_C( 655.50), SIMDE_FLOAT32_C( 31.40), SIMDE_FLOAT32_C( 232.63), SIMDE_FLOAT32_C( -333.95) }, { SIMDE_FLOAT32_C( -250.65), SIMDE_FLOAT32_C( 503.58), SIMDE_FLOAT32_C( -455.80), SIMDE_FLOAT32_C( -833.29), SIMDE_FLOAT32_C( -71.12), SIMDE_FLOAT32_C( 332.51), SIMDE_FLOAT32_C( 31.40), SIMDE_FLOAT32_C( -333.95) } }, { { SIMDE_FLOAT32_C( -427.32), SIMDE_FLOAT32_C( -238.26), SIMDE_FLOAT32_C( 888.43), SIMDE_FLOAT32_C( 955.03), SIMDE_FLOAT32_C( -196.70), SIMDE_FLOAT32_C( -881.28), SIMDE_FLOAT32_C( 609.45), SIMDE_FLOAT32_C( 727.42) }, { SIMDE_FLOAT32_C( 759.26), SIMDE_FLOAT32_C( -411.67), SIMDE_FLOAT32_C( -789.35), SIMDE_FLOAT32_C( 28.42), SIMDE_FLOAT32_C( 765.35), SIMDE_FLOAT32_C( 899.61), SIMDE_FLOAT32_C( 282.83), SIMDE_FLOAT32_C( -840.98) }, { SIMDE_FLOAT32_C( -238.26), SIMDE_FLOAT32_C( 955.03), SIMDE_FLOAT32_C( -411.67), SIMDE_FLOAT32_C( 28.42), SIMDE_FLOAT32_C( -881.28), SIMDE_FLOAT32_C( 727.42), SIMDE_FLOAT32_C( 899.61), SIMDE_FLOAT32_C( -840.98) } }, { { SIMDE_FLOAT32_C( -351.05), SIMDE_FLOAT32_C( 657.80), SIMDE_FLOAT32_C( 662.60), SIMDE_FLOAT32_C( -113.55), SIMDE_FLOAT32_C( -413.32), SIMDE_FLOAT32_C( 320.13), SIMDE_FLOAT32_C( -781.04), SIMDE_FLOAT32_C( 938.42) }, { SIMDE_FLOAT32_C( 864.33), SIMDE_FLOAT32_C( -451.40), SIMDE_FLOAT32_C( -894.87), SIMDE_FLOAT32_C( 519.84), SIMDE_FLOAT32_C( 580.00), SIMDE_FLOAT32_C( 337.76), SIMDE_FLOAT32_C( -814.11), SIMDE_FLOAT32_C( -847.32) }, { SIMDE_FLOAT32_C( 657.80), SIMDE_FLOAT32_C( -113.55), SIMDE_FLOAT32_C( -451.40), SIMDE_FLOAT32_C( 519.84), SIMDE_FLOAT32_C( 320.13), SIMDE_FLOAT32_C( 938.42), SIMDE_FLOAT32_C( 337.76), SIMDE_FLOAT32_C( -847.32) } }, { { SIMDE_FLOAT32_C( -900.50), SIMDE_FLOAT32_C( -925.68), SIMDE_FLOAT32_C( -892.29), SIMDE_FLOAT32_C( -97.20), SIMDE_FLOAT32_C( -806.96), SIMDE_FLOAT32_C( 717.16), SIMDE_FLOAT32_C( -369.78), SIMDE_FLOAT32_C( 952.30) }, { SIMDE_FLOAT32_C( -694.51), SIMDE_FLOAT32_C( -159.13), SIMDE_FLOAT32_C( -19.28), SIMDE_FLOAT32_C( -929.16), SIMDE_FLOAT32_C( -259.52), SIMDE_FLOAT32_C( -736.45), SIMDE_FLOAT32_C( -770.14), SIMDE_FLOAT32_C( 389.43) }, { SIMDE_FLOAT32_C( -925.68), SIMDE_FLOAT32_C( -97.20), SIMDE_FLOAT32_C( -159.13), SIMDE_FLOAT32_C( -929.16), SIMDE_FLOAT32_C( 717.16), SIMDE_FLOAT32_C( 952.30), SIMDE_FLOAT32_C( -736.45), SIMDE_FLOAT32_C( 389.43) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 b = simde_mm256_loadu_ps(test_vec[i].b); simde__m256 r = simde_x_mm256_deinterleaveodd_ps(a, b); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_x_mm256_deinterleaveeven_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[4]; const simde_float64 b[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( -917.73), SIMDE_FLOAT64_C( -652.52), SIMDE_FLOAT64_C( 938.00), SIMDE_FLOAT64_C( 628.32) }, { SIMDE_FLOAT64_C( -379.39), SIMDE_FLOAT64_C( -366.67), SIMDE_FLOAT64_C( 60.53), SIMDE_FLOAT64_C( -36.15) }, { SIMDE_FLOAT64_C( -917.73), SIMDE_FLOAT64_C( -379.39), SIMDE_FLOAT64_C( 938.00), SIMDE_FLOAT64_C( 60.53) } }, { { SIMDE_FLOAT64_C( 719.20), SIMDE_FLOAT64_C( 657.85), SIMDE_FLOAT64_C( 649.12), SIMDE_FLOAT64_C( -199.69) }, { SIMDE_FLOAT64_C( -297.79), SIMDE_FLOAT64_C( -137.86), SIMDE_FLOAT64_C( -554.27), SIMDE_FLOAT64_C( -359.22) }, { SIMDE_FLOAT64_C( 719.20), SIMDE_FLOAT64_C( -297.79), SIMDE_FLOAT64_C( 649.12), SIMDE_FLOAT64_C( -554.27) } }, { { SIMDE_FLOAT64_C( -704.27), SIMDE_FLOAT64_C( -704.53), SIMDE_FLOAT64_C( 753.64), SIMDE_FLOAT64_C( -12.36) }, { SIMDE_FLOAT64_C( -714.88), SIMDE_FLOAT64_C( 976.56), SIMDE_FLOAT64_C( -603.38), SIMDE_FLOAT64_C( 193.76) }, { SIMDE_FLOAT64_C( -704.27), SIMDE_FLOAT64_C( -714.88), SIMDE_FLOAT64_C( 753.64), SIMDE_FLOAT64_C( -603.38) } }, { { SIMDE_FLOAT64_C( -307.02), SIMDE_FLOAT64_C( 740.87), SIMDE_FLOAT64_C( -356.03), SIMDE_FLOAT64_C( 819.67) }, { SIMDE_FLOAT64_C( -962.98), SIMDE_FLOAT64_C( 552.09), SIMDE_FLOAT64_C( -784.02), SIMDE_FLOAT64_C( -880.71) }, { SIMDE_FLOAT64_C( -307.02), SIMDE_FLOAT64_C( -962.98), SIMDE_FLOAT64_C( -356.03), SIMDE_FLOAT64_C( -784.02) } }, { { SIMDE_FLOAT64_C( 899.57), SIMDE_FLOAT64_C( -846.01), SIMDE_FLOAT64_C( 747.61), SIMDE_FLOAT64_C( -479.82) }, { SIMDE_FLOAT64_C( -212.68), SIMDE_FLOAT64_C( -191.86), SIMDE_FLOAT64_C( 484.03), SIMDE_FLOAT64_C( -493.48) }, { SIMDE_FLOAT64_C( 899.57), SIMDE_FLOAT64_C( -212.68), SIMDE_FLOAT64_C( 747.61), SIMDE_FLOAT64_C( 484.03) } }, { { SIMDE_FLOAT64_C( -534.01), SIMDE_FLOAT64_C( 133.14), SIMDE_FLOAT64_C( 306.83), SIMDE_FLOAT64_C( 168.20) }, { SIMDE_FLOAT64_C( 995.29), SIMDE_FLOAT64_C( 752.56), SIMDE_FLOAT64_C( 808.98), SIMDE_FLOAT64_C( -708.99) }, { SIMDE_FLOAT64_C( -534.01), SIMDE_FLOAT64_C( 995.29), SIMDE_FLOAT64_C( 306.83), SIMDE_FLOAT64_C( 808.98) } }, { { SIMDE_FLOAT64_C( -951.97), SIMDE_FLOAT64_C( 562.62), SIMDE_FLOAT64_C( 278.66), SIMDE_FLOAT64_C( -666.85) }, { SIMDE_FLOAT64_C( 539.18), SIMDE_FLOAT64_C( 675.28), SIMDE_FLOAT64_C( 526.91), SIMDE_FLOAT64_C( -767.85) }, { SIMDE_FLOAT64_C( -951.97), SIMDE_FLOAT64_C( 539.18), SIMDE_FLOAT64_C( 278.66), SIMDE_FLOAT64_C( 526.91) } }, { { SIMDE_FLOAT64_C( 416.15), SIMDE_FLOAT64_C( -829.13), SIMDE_FLOAT64_C( -948.18), SIMDE_FLOAT64_C( 453.17) }, { SIMDE_FLOAT64_C( 722.96), SIMDE_FLOAT64_C( -732.19), SIMDE_FLOAT64_C( 572.46), SIMDE_FLOAT64_C( 622.53) }, { SIMDE_FLOAT64_C( 416.15), SIMDE_FLOAT64_C( 722.96), SIMDE_FLOAT64_C( -948.18), SIMDE_FLOAT64_C( 572.46) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d b = simde_mm256_loadu_pd(test_vec[i].b); simde__m256d r = simde_x_mm256_deinterleaveeven_pd(a, b); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_x_mm256_deinterleaveodd_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[4]; const simde_float64 b[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( -648.87), SIMDE_FLOAT64_C( -409.81), SIMDE_FLOAT64_C( 258.85), SIMDE_FLOAT64_C( 552.93) }, { SIMDE_FLOAT64_C( 522.44), SIMDE_FLOAT64_C( 561.99), SIMDE_FLOAT64_C( 622.89), SIMDE_FLOAT64_C( -995.80) }, { SIMDE_FLOAT64_C( -409.81), SIMDE_FLOAT64_C( 561.99), SIMDE_FLOAT64_C( 552.93), SIMDE_FLOAT64_C( -995.80) } }, { { SIMDE_FLOAT64_C( -482.94), SIMDE_FLOAT64_C( -355.20), SIMDE_FLOAT64_C( 567.28), SIMDE_FLOAT64_C( 592.90) }, { SIMDE_FLOAT64_C( 321.96), SIMDE_FLOAT64_C( -278.34), SIMDE_FLOAT64_C( 707.76), SIMDE_FLOAT64_C( -993.72) }, { SIMDE_FLOAT64_C( -355.20), SIMDE_FLOAT64_C( -278.34), SIMDE_FLOAT64_C( 592.90), SIMDE_FLOAT64_C( -993.72) } }, { { SIMDE_FLOAT64_C( 24.59), SIMDE_FLOAT64_C( 115.17), SIMDE_FLOAT64_C( -487.45), SIMDE_FLOAT64_C( -271.65) }, { SIMDE_FLOAT64_C( 637.48), SIMDE_FLOAT64_C( -438.22), SIMDE_FLOAT64_C( 252.23), SIMDE_FLOAT64_C( -247.08) }, { SIMDE_FLOAT64_C( 115.17), SIMDE_FLOAT64_C( -438.22), SIMDE_FLOAT64_C( -271.65), SIMDE_FLOAT64_C( -247.08) } }, { { SIMDE_FLOAT64_C( 513.68), SIMDE_FLOAT64_C( -28.49), SIMDE_FLOAT64_C( -561.16), SIMDE_FLOAT64_C( -840.08) }, { SIMDE_FLOAT64_C( 486.66), SIMDE_FLOAT64_C( -58.51), SIMDE_FLOAT64_C( -788.82), SIMDE_FLOAT64_C( 837.79) }, { SIMDE_FLOAT64_C( -28.49), SIMDE_FLOAT64_C( -58.51), SIMDE_FLOAT64_C( -840.08), SIMDE_FLOAT64_C( 837.79) } }, { { SIMDE_FLOAT64_C( 531.68), SIMDE_FLOAT64_C( 470.03), SIMDE_FLOAT64_C( 390.72), SIMDE_FLOAT64_C( 54.12) }, { SIMDE_FLOAT64_C( 32.02), SIMDE_FLOAT64_C( 13.61), SIMDE_FLOAT64_C( 58.32), SIMDE_FLOAT64_C( 549.08) }, { SIMDE_FLOAT64_C( 470.03), SIMDE_FLOAT64_C( 13.61), SIMDE_FLOAT64_C( 54.12), SIMDE_FLOAT64_C( 549.08) } }, { { SIMDE_FLOAT64_C( 658.41), SIMDE_FLOAT64_C( -374.40), SIMDE_FLOAT64_C( 141.98), SIMDE_FLOAT64_C( -19.63) }, { SIMDE_FLOAT64_C( 347.26), SIMDE_FLOAT64_C( -150.25), SIMDE_FLOAT64_C( -13.34), SIMDE_FLOAT64_C( -628.15) }, { SIMDE_FLOAT64_C( -374.40), SIMDE_FLOAT64_C( -150.25), SIMDE_FLOAT64_C( -19.63), SIMDE_FLOAT64_C( -628.15) } }, { { SIMDE_FLOAT64_C( 964.92), SIMDE_FLOAT64_C( 499.21), SIMDE_FLOAT64_C( 100.21), SIMDE_FLOAT64_C( 602.40) }, { SIMDE_FLOAT64_C( -939.01), SIMDE_FLOAT64_C( -647.56), SIMDE_FLOAT64_C( -644.68), SIMDE_FLOAT64_C( 574.67) }, { SIMDE_FLOAT64_C( 499.21), SIMDE_FLOAT64_C( -647.56), SIMDE_FLOAT64_C( 602.40), SIMDE_FLOAT64_C( 574.67) } }, { { SIMDE_FLOAT64_C( 323.95), SIMDE_FLOAT64_C( -205.84), SIMDE_FLOAT64_C( 734.60), SIMDE_FLOAT64_C( -189.39) }, { SIMDE_FLOAT64_C( 735.65), SIMDE_FLOAT64_C( 945.77), SIMDE_FLOAT64_C( -351.60), SIMDE_FLOAT64_C( 267.33) }, { SIMDE_FLOAT64_C( -205.84), SIMDE_FLOAT64_C( 945.77), SIMDE_FLOAT64_C( -189.39), SIMDE_FLOAT64_C( 267.33) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d b = simde_mm256_loadu_pd(test_vec[i].b); simde__m256d r = simde_x_mm256_deinterleaveodd_pd(a, b); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_add_ps (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[8]; simde_float32 b[8]; simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 593.61), SIMDE_FLOAT32_C( -557.89), SIMDE_FLOAT32_C( 791.53), SIMDE_FLOAT32_C( 154.11), SIMDE_FLOAT32_C( -737.14), SIMDE_FLOAT32_C( 771.63), SIMDE_FLOAT32_C( -560.40), SIMDE_FLOAT32_C( 722.01) }, { SIMDE_FLOAT32_C( 419.25), SIMDE_FLOAT32_C( 900.75), SIMDE_FLOAT32_C( -9.61), SIMDE_FLOAT32_C( -733.53), SIMDE_FLOAT32_C( -182.34), SIMDE_FLOAT32_C( -977.20), SIMDE_FLOAT32_C( 52.09), SIMDE_FLOAT32_C( -330.07) }, { SIMDE_FLOAT32_C( 1012.86), SIMDE_FLOAT32_C( 342.86), SIMDE_FLOAT32_C( 781.92), SIMDE_FLOAT32_C( -579.42), SIMDE_FLOAT32_C( -919.48), SIMDE_FLOAT32_C( -205.56), SIMDE_FLOAT32_C( -508.31), SIMDE_FLOAT32_C( 391.94) } }, { { SIMDE_FLOAT32_C( 931.06), SIMDE_FLOAT32_C( -653.59), SIMDE_FLOAT32_C( 236.39), SIMDE_FLOAT32_C( -464.67), SIMDE_FLOAT32_C( 290.33), SIMDE_FLOAT32_C( 742.49), SIMDE_FLOAT32_C( -756.45), SIMDE_FLOAT32_C( 594.08) }, { SIMDE_FLOAT32_C( -246.91), SIMDE_FLOAT32_C( 947.78), SIMDE_FLOAT32_C( -474.92), SIMDE_FLOAT32_C( -744.97), SIMDE_FLOAT32_C( 488.25), SIMDE_FLOAT32_C( 386.30), SIMDE_FLOAT32_C( 828.81), SIMDE_FLOAT32_C( 81.87) }, { SIMDE_FLOAT32_C( 684.15), SIMDE_FLOAT32_C( 294.20), SIMDE_FLOAT32_C( -238.53), SIMDE_FLOAT32_C( -1209.65), SIMDE_FLOAT32_C( 778.59), SIMDE_FLOAT32_C( 1128.78), SIMDE_FLOAT32_C( 72.36), SIMDE_FLOAT32_C( 675.95) } }, { { SIMDE_FLOAT32_C( 828.40), SIMDE_FLOAT32_C( 620.34), SIMDE_FLOAT32_C( -764.02), SIMDE_FLOAT32_C( -908.74), SIMDE_FLOAT32_C( 391.97), SIMDE_FLOAT32_C( -324.42), SIMDE_FLOAT32_C( 813.27), SIMDE_FLOAT32_C( -188.78) }, { SIMDE_FLOAT32_C( -423.67), SIMDE_FLOAT32_C( -196.34), SIMDE_FLOAT32_C( 77.69), SIMDE_FLOAT32_C( 393.99), SIMDE_FLOAT32_C( -173.54), SIMDE_FLOAT32_C( -870.22), SIMDE_FLOAT32_C( -936.08), SIMDE_FLOAT32_C( -242.47) }, { SIMDE_FLOAT32_C( 404.74), SIMDE_FLOAT32_C( 424.00), SIMDE_FLOAT32_C( -686.33), SIMDE_FLOAT32_C( -514.74), SIMDE_FLOAT32_C( 218.44), SIMDE_FLOAT32_C( -1194.64), SIMDE_FLOAT32_C( -122.81), SIMDE_FLOAT32_C( -431.25) } }, { { SIMDE_FLOAT32_C( -523.81), SIMDE_FLOAT32_C( 300.32), SIMDE_FLOAT32_C( 292.85), SIMDE_FLOAT32_C( 766.52), SIMDE_FLOAT32_C( 42.80), SIMDE_FLOAT32_C( 536.40), SIMDE_FLOAT32_C( 360.60), SIMDE_FLOAT32_C( 795.89) }, { SIMDE_FLOAT32_C( 484.18), SIMDE_FLOAT32_C( 885.68), SIMDE_FLOAT32_C( -949.08), SIMDE_FLOAT32_C( -27.56), SIMDE_FLOAT32_C( 271.98), SIMDE_FLOAT32_C( 879.73), SIMDE_FLOAT32_C( -945.69), SIMDE_FLOAT32_C( 100.38) }, { SIMDE_FLOAT32_C( -39.63), SIMDE_FLOAT32_C( 1186.00), SIMDE_FLOAT32_C( -656.23), SIMDE_FLOAT32_C( 738.96), SIMDE_FLOAT32_C( 314.78), SIMDE_FLOAT32_C( 1416.13), SIMDE_FLOAT32_C( -585.09), SIMDE_FLOAT32_C( 896.27) } }, { { SIMDE_FLOAT32_C( 500.07), SIMDE_FLOAT32_C( -709.71), SIMDE_FLOAT32_C( 191.65), SIMDE_FLOAT32_C( -107.96), SIMDE_FLOAT32_C( -34.13), SIMDE_FLOAT32_C( 4.92), SIMDE_FLOAT32_C( 703.26), SIMDE_FLOAT32_C( 542.20) }, { SIMDE_FLOAT32_C( 808.58), SIMDE_FLOAT32_C( -219.05), SIMDE_FLOAT32_C( -63.81), SIMDE_FLOAT32_C( -364.96), SIMDE_FLOAT32_C( -89.27), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 392.56), SIMDE_FLOAT32_C( 386.92) }, { SIMDE_FLOAT32_C( 1308.64), SIMDE_FLOAT32_C( -928.76), SIMDE_FLOAT32_C( 127.84), SIMDE_FLOAT32_C( -472.92), SIMDE_FLOAT32_C( -123.40), SIMDE_FLOAT32_C( 5.03), SIMDE_FLOAT32_C( 1095.83), SIMDE_FLOAT32_C( 929.12) } }, { { SIMDE_FLOAT32_C( -699.57), SIMDE_FLOAT32_C( -314.58), SIMDE_FLOAT32_C( 153.44), SIMDE_FLOAT32_C( 343.24), SIMDE_FLOAT32_C( -778.18), SIMDE_FLOAT32_C( -485.96), SIMDE_FLOAT32_C( 139.13), SIMDE_FLOAT32_C( 706.00) }, { SIMDE_FLOAT32_C( -600.28), SIMDE_FLOAT32_C( 190.04), SIMDE_FLOAT32_C( -321.56), SIMDE_FLOAT32_C( 671.70), SIMDE_FLOAT32_C( 69.77), SIMDE_FLOAT32_C( -267.25), SIMDE_FLOAT32_C( -227.92), SIMDE_FLOAT32_C( -430.16) }, { SIMDE_FLOAT32_C( -1299.84), SIMDE_FLOAT32_C( -124.54), SIMDE_FLOAT32_C( -168.11), SIMDE_FLOAT32_C( 1014.94), SIMDE_FLOAT32_C( -708.41), SIMDE_FLOAT32_C( -753.20), SIMDE_FLOAT32_C( -88.79), SIMDE_FLOAT32_C( 275.85) } }, { { SIMDE_FLOAT32_C( 23.04), SIMDE_FLOAT32_C( 963.73), SIMDE_FLOAT32_C( 461.88), SIMDE_FLOAT32_C( 988.91), SIMDE_FLOAT32_C( -31.35), SIMDE_FLOAT32_C( 165.15), SIMDE_FLOAT32_C( 531.11), SIMDE_FLOAT32_C( -222.78) }, { SIMDE_FLOAT32_C( 946.10), SIMDE_FLOAT32_C( -532.70), SIMDE_FLOAT32_C( 412.26), SIMDE_FLOAT32_C( -143.17), SIMDE_FLOAT32_C( 467.42), SIMDE_FLOAT32_C( -195.17), SIMDE_FLOAT32_C( -756.25), SIMDE_FLOAT32_C( 767.85) }, { SIMDE_FLOAT32_C( 969.14), SIMDE_FLOAT32_C( 431.03), SIMDE_FLOAT32_C( 874.15), SIMDE_FLOAT32_C( 845.74), SIMDE_FLOAT32_C( 436.06), SIMDE_FLOAT32_C( -30.03), SIMDE_FLOAT32_C( -225.14), SIMDE_FLOAT32_C( 545.07) } }, { { SIMDE_FLOAT32_C( 490.24), SIMDE_FLOAT32_C( 397.20), SIMDE_FLOAT32_C( 111.09), SIMDE_FLOAT32_C( 712.06), SIMDE_FLOAT32_C( 911.24), SIMDE_FLOAT32_C( -749.78), SIMDE_FLOAT32_C( 418.07), SIMDE_FLOAT32_C( -689.03) }, { SIMDE_FLOAT32_C( 440.26), SIMDE_FLOAT32_C( -903.49), SIMDE_FLOAT32_C( 982.67), SIMDE_FLOAT32_C( -489.97), SIMDE_FLOAT32_C( -170.74), SIMDE_FLOAT32_C( -245.25), SIMDE_FLOAT32_C( 79.87), SIMDE_FLOAT32_C( 852.30) }, { SIMDE_FLOAT32_C( 930.50), SIMDE_FLOAT32_C( -506.29), SIMDE_FLOAT32_C( 1093.75), SIMDE_FLOAT32_C( 222.09), SIMDE_FLOAT32_C( 740.50), SIMDE_FLOAT32_C( -995.03), SIMDE_FLOAT32_C( 497.94), SIMDE_FLOAT32_C( 163.27) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 b = simde_mm256_loadu_ps(test_vec[i].b); simde__m256 r = simde_mm256_add_ps(a, b); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_add_pd (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[4]; simde_float64 b[4]; simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( 664.30), SIMDE_FLOAT64_C( 577.46), SIMDE_FLOAT64_C( 740.89), SIMDE_FLOAT64_C( -918.31) }, { SIMDE_FLOAT64_C( -427.81), SIMDE_FLOAT64_C( 139.30), SIMDE_FLOAT64_C( 658.39), SIMDE_FLOAT64_C( 247.77) }, { SIMDE_FLOAT64_C( 236.49), SIMDE_FLOAT64_C( 716.76), SIMDE_FLOAT64_C( 1399.29), SIMDE_FLOAT64_C( -670.54) } }, { { SIMDE_FLOAT64_C( -470.94), SIMDE_FLOAT64_C( -355.75), SIMDE_FLOAT64_C( 525.85), SIMDE_FLOAT64_C( 336.05) }, { SIMDE_FLOAT64_C( -706.73), SIMDE_FLOAT64_C( -71.41), SIMDE_FLOAT64_C( 586.58), SIMDE_FLOAT64_C( -587.59) }, { SIMDE_FLOAT64_C( -1177.66), SIMDE_FLOAT64_C( -427.15), SIMDE_FLOAT64_C( 1112.44), SIMDE_FLOAT64_C( -251.54) } }, { { SIMDE_FLOAT64_C( 109.20), SIMDE_FLOAT64_C( -848.44), SIMDE_FLOAT64_C( -937.49), SIMDE_FLOAT64_C( -391.24) }, { SIMDE_FLOAT64_C( 350.57), SIMDE_FLOAT64_C( -211.87), SIMDE_FLOAT64_C( 614.09), SIMDE_FLOAT64_C( 386.98) }, { SIMDE_FLOAT64_C( 459.78), SIMDE_FLOAT64_C( -1060.32), SIMDE_FLOAT64_C( -323.40), SIMDE_FLOAT64_C( -4.26) } }, { { SIMDE_FLOAT64_C( -377.97), SIMDE_FLOAT64_C( 804.63), SIMDE_FLOAT64_C( -715.40), SIMDE_FLOAT64_C( -782.75) }, { SIMDE_FLOAT64_C( -311.74), SIMDE_FLOAT64_C( 976.76), SIMDE_FLOAT64_C( 342.71), SIMDE_FLOAT64_C( -647.44) }, { SIMDE_FLOAT64_C( -689.71), SIMDE_FLOAT64_C( 1781.39), SIMDE_FLOAT64_C( -372.68), SIMDE_FLOAT64_C( -1430.19) } }, { { SIMDE_FLOAT64_C( 554.22), SIMDE_FLOAT64_C( 83.61), SIMDE_FLOAT64_C( -565.75), SIMDE_FLOAT64_C( -873.59) }, { SIMDE_FLOAT64_C( -777.09), SIMDE_FLOAT64_C( -907.36), SIMDE_FLOAT64_C( 374.18), SIMDE_FLOAT64_C( -248.03) }, { SIMDE_FLOAT64_C( -222.87), SIMDE_FLOAT64_C( -823.75), SIMDE_FLOAT64_C( -191.57), SIMDE_FLOAT64_C( -1121.62) } }, { { SIMDE_FLOAT64_C( -263.10), SIMDE_FLOAT64_C( -99.97), SIMDE_FLOAT64_C( -911.97), SIMDE_FLOAT64_C( 30.17) }, { SIMDE_FLOAT64_C( 828.63), SIMDE_FLOAT64_C( 674.61), SIMDE_FLOAT64_C( 442.58), SIMDE_FLOAT64_C( -62.17) }, { SIMDE_FLOAT64_C( 565.52), SIMDE_FLOAT64_C( 574.64), SIMDE_FLOAT64_C( -469.39), SIMDE_FLOAT64_C( -32.00) } }, { { SIMDE_FLOAT64_C( 826.16), SIMDE_FLOAT64_C( 505.09), SIMDE_FLOAT64_C( 546.59), SIMDE_FLOAT64_C( 176.74) }, { SIMDE_FLOAT64_C( -706.78), SIMDE_FLOAT64_C( 160.68), SIMDE_FLOAT64_C( -436.28), SIMDE_FLOAT64_C( -84.75) }, { SIMDE_FLOAT64_C( 119.38), SIMDE_FLOAT64_C( 665.77), SIMDE_FLOAT64_C( 110.30), SIMDE_FLOAT64_C( 91.98) } }, { { SIMDE_FLOAT64_C( -34.70), SIMDE_FLOAT64_C( -151.68), SIMDE_FLOAT64_C( 132.50), SIMDE_FLOAT64_C( 653.56) }, { SIMDE_FLOAT64_C( -174.92), SIMDE_FLOAT64_C( -524.79), SIMDE_FLOAT64_C( -993.87), SIMDE_FLOAT64_C( -620.70) }, { SIMDE_FLOAT64_C( -209.61), SIMDE_FLOAT64_C( -676.47), SIMDE_FLOAT64_C( -861.38), SIMDE_FLOAT64_C( 32.86) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d b = simde_mm256_loadu_pd(test_vec[i].b); simde__m256d r = simde_mm256_add_pd(a, b); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_addsub_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 b; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( -665.97), SIMDE_FLOAT32_C( -119.17), SIMDE_FLOAT32_C( 98.44), SIMDE_FLOAT32_C( -870.79), SIMDE_FLOAT32_C( 715.06), SIMDE_FLOAT32_C( 168.23), SIMDE_FLOAT32_C( 291.85), SIMDE_FLOAT32_C( 803.77)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 437.26), SIMDE_FLOAT32_C( 621.28), SIMDE_FLOAT32_C( 727.27), SIMDE_FLOAT32_C( -902.73), SIMDE_FLOAT32_C( -279.74), SIMDE_FLOAT32_C( 960.47), SIMDE_FLOAT32_C( -437.81), SIMDE_FLOAT32_C( 516.31)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -228.71), SIMDE_FLOAT32_C( -740.45), SIMDE_FLOAT32_C( 825.71), SIMDE_FLOAT32_C( 31.94), SIMDE_FLOAT32_C( 435.32), SIMDE_FLOAT32_C( -792.24), SIMDE_FLOAT32_C( -145.96), SIMDE_FLOAT32_C( 287.46)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -441.72), SIMDE_FLOAT32_C( 881.45), SIMDE_FLOAT32_C( 512.79), SIMDE_FLOAT32_C( -201.00), SIMDE_FLOAT32_C( 709.10), SIMDE_FLOAT32_C( 130.51), SIMDE_FLOAT32_C( -836.20), SIMDE_FLOAT32_C( 276.45)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -410.67), SIMDE_FLOAT32_C( 408.11), SIMDE_FLOAT32_C( -371.59), SIMDE_FLOAT32_C( -135.76), SIMDE_FLOAT32_C( -896.75), SIMDE_FLOAT32_C( -185.21), SIMDE_FLOAT32_C( -154.35), SIMDE_FLOAT32_C( -995.14)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -852.39), SIMDE_FLOAT32_C( 473.34), SIMDE_FLOAT32_C( 141.20), SIMDE_FLOAT32_C( -65.24), SIMDE_FLOAT32_C( -187.65), SIMDE_FLOAT32_C( 315.72), SIMDE_FLOAT32_C( -990.55), SIMDE_FLOAT32_C( 1271.59)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 792.33), SIMDE_FLOAT32_C( -677.48), SIMDE_FLOAT32_C( -78.97), SIMDE_FLOAT32_C( 595.95), SIMDE_FLOAT32_C( 441.86), SIMDE_FLOAT32_C( 221.91), SIMDE_FLOAT32_C( 688.66), SIMDE_FLOAT32_C( -937.21)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 615.36), SIMDE_FLOAT32_C( -858.28), SIMDE_FLOAT32_C( 102.87), SIMDE_FLOAT32_C( -380.97), SIMDE_FLOAT32_C( -155.81), SIMDE_FLOAT32_C( -426.12), SIMDE_FLOAT32_C( -862.23), SIMDE_FLOAT32_C( -891.31)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 1407.69), SIMDE_FLOAT32_C( 180.80), SIMDE_FLOAT32_C( 23.90), SIMDE_FLOAT32_C( 976.92), SIMDE_FLOAT32_C( 286.05), SIMDE_FLOAT32_C( 648.03), SIMDE_FLOAT32_C( -173.57), SIMDE_FLOAT32_C( -45.90)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -493.40), SIMDE_FLOAT32_C( -257.06), SIMDE_FLOAT32_C( -968.46), SIMDE_FLOAT32_C( 634.36), SIMDE_FLOAT32_C( -600.69), SIMDE_FLOAT32_C( -769.31), SIMDE_FLOAT32_C( 230.22), SIMDE_FLOAT32_C( -863.68)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -678.13), SIMDE_FLOAT32_C( 166.99), SIMDE_FLOAT32_C( 125.37), SIMDE_FLOAT32_C( -846.30), SIMDE_FLOAT32_C( 414.00), SIMDE_FLOAT32_C( -144.57), SIMDE_FLOAT32_C( -43.08), SIMDE_FLOAT32_C( 287.75)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-1171.53), SIMDE_FLOAT32_C( -424.05), SIMDE_FLOAT32_C( -843.09), SIMDE_FLOAT32_C( 1480.66), SIMDE_FLOAT32_C( -186.69), SIMDE_FLOAT32_C( -624.74), SIMDE_FLOAT32_C( 187.14), SIMDE_FLOAT32_C(-1151.43)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -449.73), SIMDE_FLOAT32_C( -350.27), SIMDE_FLOAT32_C( -591.20), SIMDE_FLOAT32_C( 571.44), SIMDE_FLOAT32_C( 787.95), SIMDE_FLOAT32_C( 514.78), SIMDE_FLOAT32_C( -355.32), SIMDE_FLOAT32_C( 545.86)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 341.77), SIMDE_FLOAT32_C( -839.01), SIMDE_FLOAT32_C( -9.88), SIMDE_FLOAT32_C( 193.01), SIMDE_FLOAT32_C( 871.45), SIMDE_FLOAT32_C( -840.06), SIMDE_FLOAT32_C( 689.52), SIMDE_FLOAT32_C( -902.32)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -107.96), SIMDE_FLOAT32_C( 488.74), SIMDE_FLOAT32_C( -601.08), SIMDE_FLOAT32_C( 378.43), SIMDE_FLOAT32_C( 1659.40), SIMDE_FLOAT32_C( 1354.84), SIMDE_FLOAT32_C( 334.20), SIMDE_FLOAT32_C( 1448.18)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -336.63), SIMDE_FLOAT32_C( 960.76), SIMDE_FLOAT32_C( 657.64), SIMDE_FLOAT32_C( -548.45), SIMDE_FLOAT32_C( -101.48), SIMDE_FLOAT32_C( -271.70), SIMDE_FLOAT32_C( 23.46), SIMDE_FLOAT32_C( 562.46)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -564.95), SIMDE_FLOAT32_C( -418.72), SIMDE_FLOAT32_C( -693.90), SIMDE_FLOAT32_C( -314.48), SIMDE_FLOAT32_C( -760.76), SIMDE_FLOAT32_C( 7.61), SIMDE_FLOAT32_C( 228.32), SIMDE_FLOAT32_C( -230.31)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -901.58), SIMDE_FLOAT32_C( 1379.48), SIMDE_FLOAT32_C( -36.26), SIMDE_FLOAT32_C( -233.97), SIMDE_FLOAT32_C( -862.24), SIMDE_FLOAT32_C( -279.31), SIMDE_FLOAT32_C( 251.78), SIMDE_FLOAT32_C( 792.77)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -451.91), SIMDE_FLOAT32_C( -184.14), SIMDE_FLOAT32_C( -772.77), SIMDE_FLOAT32_C( -278.00), SIMDE_FLOAT32_C( -640.89), SIMDE_FLOAT32_C( -35.35), SIMDE_FLOAT32_C( -518.18), SIMDE_FLOAT32_C( -851.69)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -152.24), SIMDE_FLOAT32_C( 636.58), SIMDE_FLOAT32_C( -808.67), SIMDE_FLOAT32_C( 166.94), SIMDE_FLOAT32_C( -359.45), SIMDE_FLOAT32_C( -888.48), SIMDE_FLOAT32_C( -553.07), SIMDE_FLOAT32_C( -570.58)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -604.15), SIMDE_FLOAT32_C( -820.72), SIMDE_FLOAT32_C(-1581.44), SIMDE_FLOAT32_C( -444.94), SIMDE_FLOAT32_C(-1000.34), SIMDE_FLOAT32_C( 853.13), SIMDE_FLOAT32_C(-1071.25), SIMDE_FLOAT32_C( -281.11)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -3.71), SIMDE_FLOAT32_C( -782.28), SIMDE_FLOAT32_C( 533.36), SIMDE_FLOAT32_C( 848.42), SIMDE_FLOAT32_C( 345.49), SIMDE_FLOAT32_C( -110.04), SIMDE_FLOAT32_C( -550.91), SIMDE_FLOAT32_C( -5.89)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 186.13), SIMDE_FLOAT32_C( -310.95), SIMDE_FLOAT32_C( 742.56), SIMDE_FLOAT32_C( -943.39), SIMDE_FLOAT32_C( -294.98), SIMDE_FLOAT32_C( -455.35), SIMDE_FLOAT32_C( 262.46), SIMDE_FLOAT32_C( 299.17)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 182.42), SIMDE_FLOAT32_C( -471.33), SIMDE_FLOAT32_C( 1275.92), SIMDE_FLOAT32_C( 1791.81), SIMDE_FLOAT32_C( 50.51), SIMDE_FLOAT32_C( 345.31), SIMDE_FLOAT32_C( -288.45), SIMDE_FLOAT32_C( -305.06)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_addsub_ps(test_vec[i].a, test_vec[i].b); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_addsub_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d b; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 715.06), SIMDE_FLOAT64_C( 168.23), SIMDE_FLOAT64_C( 291.85), SIMDE_FLOAT64_C( 803.77)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -279.74), SIMDE_FLOAT64_C( 960.47), SIMDE_FLOAT64_C( -437.81), SIMDE_FLOAT64_C( 516.31)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 435.32), SIMDE_FLOAT64_C( -792.24), SIMDE_FLOAT64_C( -145.96), SIMDE_FLOAT64_C( 287.46)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -665.97), SIMDE_FLOAT64_C( -119.17), SIMDE_FLOAT64_C( 98.44), SIMDE_FLOAT64_C( -870.79)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 437.26), SIMDE_FLOAT64_C( 621.28), SIMDE_FLOAT64_C( 727.27), SIMDE_FLOAT64_C( -902.73)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -228.71), SIMDE_FLOAT64_C( -740.45), SIMDE_FLOAT64_C( 825.71), SIMDE_FLOAT64_C( 31.94)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 709.10), SIMDE_FLOAT64_C( 130.51), SIMDE_FLOAT64_C( -836.20), SIMDE_FLOAT64_C( 276.45)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -896.75), SIMDE_FLOAT64_C( -185.21), SIMDE_FLOAT64_C( -154.35), SIMDE_FLOAT64_C( -995.14)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -187.65), SIMDE_FLOAT64_C( 315.72), SIMDE_FLOAT64_C( -990.55), SIMDE_FLOAT64_C( 1271.59)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -441.72), SIMDE_FLOAT64_C( 881.45), SIMDE_FLOAT64_C( 512.79), SIMDE_FLOAT64_C( -201.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -410.67), SIMDE_FLOAT64_C( 408.11), SIMDE_FLOAT64_C( -371.59), SIMDE_FLOAT64_C( -135.76)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -852.39), SIMDE_FLOAT64_C( 473.34), SIMDE_FLOAT64_C( 141.20), SIMDE_FLOAT64_C( -65.24)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 441.86), SIMDE_FLOAT64_C( 221.91), SIMDE_FLOAT64_C( 688.66), SIMDE_FLOAT64_C( -937.21)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -155.81), SIMDE_FLOAT64_C( -426.12), SIMDE_FLOAT64_C( -862.23), SIMDE_FLOAT64_C( -891.31)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 286.05), SIMDE_FLOAT64_C( 648.03), SIMDE_FLOAT64_C( -173.57), SIMDE_FLOAT64_C( -45.90)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 792.33), SIMDE_FLOAT64_C( -677.48), SIMDE_FLOAT64_C( -78.97), SIMDE_FLOAT64_C( 595.95)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 615.36), SIMDE_FLOAT64_C( -858.28), SIMDE_FLOAT64_C( 102.87), SIMDE_FLOAT64_C( -380.97)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 1407.69), SIMDE_FLOAT64_C( 180.80), SIMDE_FLOAT64_C( 23.90), SIMDE_FLOAT64_C( 976.92)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -600.69), SIMDE_FLOAT64_C( -769.31), SIMDE_FLOAT64_C( 230.22), SIMDE_FLOAT64_C( -863.68)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 414.00), SIMDE_FLOAT64_C( -144.57), SIMDE_FLOAT64_C( -43.08), SIMDE_FLOAT64_C( 287.75)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -186.69), SIMDE_FLOAT64_C( -624.74), SIMDE_FLOAT64_C( 187.14), SIMDE_FLOAT64_C(-1151.43)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -493.40), SIMDE_FLOAT64_C( -257.06), SIMDE_FLOAT64_C( -968.46), SIMDE_FLOAT64_C( 634.36)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -678.13), SIMDE_FLOAT64_C( 166.99), SIMDE_FLOAT64_C( 125.37), SIMDE_FLOAT64_C( -846.30)), simde_mm256_set_pd(SIMDE_FLOAT64_C(-1171.53), SIMDE_FLOAT64_C( -424.05), SIMDE_FLOAT64_C( -843.09), SIMDE_FLOAT64_C( 1480.66)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_addsub_pd(test_vec[i].a, test_vec[i].b); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_and_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 b; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( -927.26), SIMDE_FLOAT32_C( -802.03), SIMDE_FLOAT32_C( -266.41), SIMDE_FLOAT32_C( -50.41), SIMDE_FLOAT32_C( -309.19), SIMDE_FLOAT32_C( -707.19), SIMDE_FLOAT32_C( -220.07), SIMDE_FLOAT32_C( 127.67)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -205.91), SIMDE_FLOAT32_C( -275.07), SIMDE_FLOAT32_C( -13.57), SIMDE_FLOAT32_C( 990.91), SIMDE_FLOAT32_C( -167.84), SIMDE_FLOAT32_C( 346.85), SIMDE_FLOAT32_C( 124.29), SIMDE_FLOAT32_C( 759.96)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -3.09), SIMDE_FLOAT32_C( -2.13), SIMDE_FLOAT32_C( -8.07), SIMDE_FLOAT32_C( 3.03), SIMDE_FLOAT32_C( -130.56), SIMDE_FLOAT32_C( 2.50), SIMDE_FLOAT32_C( 54.02), SIMDE_FLOAT32_C( 2.96)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -39.85), SIMDE_FLOAT32_C( 325.28), SIMDE_FLOAT32_C( 658.09), SIMDE_FLOAT32_C( 797.01), SIMDE_FLOAT32_C( -512.38), SIMDE_FLOAT32_C( -352.93), SIMDE_FLOAT32_C( -357.04), SIMDE_FLOAT32_C( -456.94)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -802.91), SIMDE_FLOAT32_C( 574.48), SIMDE_FLOAT32_C( -277.42), SIMDE_FLOAT32_C( 470.68), SIMDE_FLOAT32_C( -575.30), SIMDE_FLOAT32_C( -85.17), SIMDE_FLOAT32_C( 928.67), SIMDE_FLOAT32_C( 878.11)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -2.13), SIMDE_FLOAT32_C( 2.04), SIMDE_FLOAT32_C( 2.01), SIMDE_FLOAT32_C( 3.05), SIMDE_FLOAT32_C( -512.25), SIMDE_FLOAT32_C( -80.17), SIMDE_FLOAT32_C( 2.50), SIMDE_FLOAT32_C( 3.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 758.85), SIMDE_FLOAT32_C( 749.41), SIMDE_FLOAT32_C( 129.33), SIMDE_FLOAT32_C( -389.28), SIMDE_FLOAT32_C( -248.13), SIMDE_FLOAT32_C( 236.41), SIMDE_FLOAT32_C( 416.63), SIMDE_FLOAT32_C( -413.79)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -615.54), SIMDE_FLOAT32_C( 496.18), SIMDE_FLOAT32_C( -485.54), SIMDE_FLOAT32_C( -818.26), SIMDE_FLOAT32_C( -139.14), SIMDE_FLOAT32_C( 955.99), SIMDE_FLOAT32_C( 356.33), SIMDE_FLOAT32_C( -498.79)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 614.53), SIMDE_FLOAT32_C( 2.88), SIMDE_FLOAT32_C( 128.27), SIMDE_FLOAT32_C( -3.01), SIMDE_FLOAT32_C( -136.13), SIMDE_FLOAT32_C( 3.69), SIMDE_FLOAT32_C( 288.00), SIMDE_FLOAT32_C( -400.79)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -131.10), SIMDE_FLOAT32_C( -368.55), SIMDE_FLOAT32_C( 657.24), SIMDE_FLOAT32_C( 939.62), SIMDE_FLOAT32_C( -50.98), SIMDE_FLOAT32_C( 313.56), SIMDE_FLOAT32_C( -914.58), SIMDE_FLOAT32_C( -29.59)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -195.92), SIMDE_FLOAT32_C( -475.65), SIMDE_FLOAT32_C( -524.29), SIMDE_FLOAT32_C( 537.04), SIMDE_FLOAT32_C( -280.43), SIMDE_FLOAT32_C( 379.54), SIMDE_FLOAT32_C( -38.50), SIMDE_FLOAT32_C( -188.05)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -131.04), SIMDE_FLOAT32_C( -336.52), SIMDE_FLOAT32_C( 512.03), SIMDE_FLOAT32_C( 521.04), SIMDE_FLOAT32_C( -34.03), SIMDE_FLOAT32_C( 313.54), SIMDE_FLOAT32_C( -2.00), SIMDE_FLOAT32_C( -10.75)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 159.88), SIMDE_FLOAT32_C( -559.41), SIMDE_FLOAT32_C( -99.24), SIMDE_FLOAT32_C( -420.50), SIMDE_FLOAT32_C( -953.72), SIMDE_FLOAT32_C( 849.74), SIMDE_FLOAT32_C( 80.27), SIMDE_FLOAT32_C( -41.97)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 457.08), SIMDE_FLOAT32_C( -505.60), SIMDE_FLOAT32_C( -390.51), SIMDE_FLOAT32_C( -524.68), SIMDE_FLOAT32_C( -419.12), SIMDE_FLOAT32_C( 955.42), SIMDE_FLOAT32_C( 180.08), SIMDE_FLOAT32_C( 33.67)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 132.50), SIMDE_FLOAT32_C( -2.14), SIMDE_FLOAT32_C( -97.13), SIMDE_FLOAT32_C( -2.03), SIMDE_FLOAT32_C( -3.00), SIMDE_FLOAT32_C( 785.16), SIMDE_FLOAT32_C( 40.00), SIMDE_FLOAT32_C( 33.66)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 157.14), SIMDE_FLOAT32_C( -391.25), SIMDE_FLOAT32_C( -362.22), SIMDE_FLOAT32_C( -137.98), SIMDE_FLOAT32_C( -303.90), SIMDE_FLOAT32_C( 545.52), SIMDE_FLOAT32_C( -383.06), SIMDE_FLOAT32_C( 973.73)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -973.80), SIMDE_FLOAT32_C( 457.53), SIMDE_FLOAT32_C( -199.04), SIMDE_FLOAT32_C( 856.68), SIMDE_FLOAT32_C( -830.67), SIMDE_FLOAT32_C( -121.42), SIMDE_FLOAT32_C( 772.32), SIMDE_FLOAT32_C( 704.39)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 2.27), SIMDE_FLOAT32_C( 385.00), SIMDE_FLOAT32_C( -133.03), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( -2.12), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.02), SIMDE_FLOAT32_C( 704.14)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 942.14), SIMDE_FLOAT32_C( 176.96), SIMDE_FLOAT32_C( 525.37), SIMDE_FLOAT32_C( 924.18), SIMDE_FLOAT32_C( -300.50), SIMDE_FLOAT32_C( -450.02), SIMDE_FLOAT32_C( 708.11), SIMDE_FLOAT32_C( 742.76)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 647.68), SIMDE_FLOAT32_C( 632.57), SIMDE_FLOAT32_C( -972.04), SIMDE_FLOAT32_C( -483.76), SIMDE_FLOAT32_C( 274.26), SIMDE_FLOAT32_C( 570.79), SIMDE_FLOAT32_C( -945.60), SIMDE_FLOAT32_C( -519.05)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 646.13), SIMDE_FLOAT32_C( 2.25), SIMDE_FLOAT32_C( 524.04), SIMDE_FLOAT32_C( 3.52), SIMDE_FLOAT32_C( 256.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 640.09), SIMDE_FLOAT32_C( 518.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 361.93), SIMDE_FLOAT32_C( -223.38), SIMDE_FLOAT32_C( -51.19), SIMDE_FLOAT32_C( -300.56), SIMDE_FLOAT32_C( 363.29), SIMDE_FLOAT32_C( 804.04), SIMDE_FLOAT32_C( 99.54), SIMDE_FLOAT32_C( -622.58)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 446.79), SIMDE_FLOAT32_C( 300.89), SIMDE_FLOAT32_C( 727.13), SIMDE_FLOAT32_C( -671.93), SIMDE_FLOAT32_C( 708.76), SIMDE_FLOAT32_C( -834.24), SIMDE_FLOAT32_C( -881.82), SIMDE_FLOAT32_C( 430.54)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 296.79), SIMDE_FLOAT32_C( 150.38), SIMDE_FLOAT32_C( 2.07), SIMDE_FLOAT32_C( -2.10), SIMDE_FLOAT32_C( 2.77), SIMDE_FLOAT32_C( 768.03), SIMDE_FLOAT32_C( 3.06), SIMDE_FLOAT32_C( 2.30)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_and_ps(test_vec[i].a, test_vec[i].b); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_and_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d b; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -309.19), SIMDE_FLOAT64_C( -707.19), SIMDE_FLOAT64_C( -220.07), SIMDE_FLOAT64_C( 127.67)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -167.84), SIMDE_FLOAT64_C( 346.85), SIMDE_FLOAT64_C( 124.29), SIMDE_FLOAT64_C( 759.96)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -130.56), SIMDE_FLOAT64_C( 2.50), SIMDE_FLOAT64_C( 54.02), SIMDE_FLOAT64_C( 2.96)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -927.26), SIMDE_FLOAT64_C( -802.03), SIMDE_FLOAT64_C( -266.41), SIMDE_FLOAT64_C( -50.41)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -205.91), SIMDE_FLOAT64_C( -275.07), SIMDE_FLOAT64_C( -13.57), SIMDE_FLOAT64_C( 990.91)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -3.09), SIMDE_FLOAT64_C( -2.13), SIMDE_FLOAT64_C( -8.07), SIMDE_FLOAT64_C( 3.03)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -512.38), SIMDE_FLOAT64_C( -352.93), SIMDE_FLOAT64_C( -357.04), SIMDE_FLOAT64_C( -456.94)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -575.30), SIMDE_FLOAT64_C( -85.17), SIMDE_FLOAT64_C( 928.67), SIMDE_FLOAT64_C( 878.11)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -512.25), SIMDE_FLOAT64_C( -80.17), SIMDE_FLOAT64_C( 2.50), SIMDE_FLOAT64_C( 3.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -39.85), SIMDE_FLOAT64_C( 325.28), SIMDE_FLOAT64_C( 658.09), SIMDE_FLOAT64_C( 797.01)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -802.91), SIMDE_FLOAT64_C( 574.48), SIMDE_FLOAT64_C( -277.42), SIMDE_FLOAT64_C( 470.68)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -2.13), SIMDE_FLOAT64_C( 2.04), SIMDE_FLOAT64_C( 2.01), SIMDE_FLOAT64_C( 3.05)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -248.13), SIMDE_FLOAT64_C( 236.41), SIMDE_FLOAT64_C( 416.63), SIMDE_FLOAT64_C( -413.79)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -139.14), SIMDE_FLOAT64_C( 955.99), SIMDE_FLOAT64_C( 356.33), SIMDE_FLOAT64_C( -498.79)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -136.13), SIMDE_FLOAT64_C( 3.69), SIMDE_FLOAT64_C( 288.00), SIMDE_FLOAT64_C( -400.79)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 758.85), SIMDE_FLOAT64_C( 749.41), SIMDE_FLOAT64_C( 129.33), SIMDE_FLOAT64_C( -389.28)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -615.54), SIMDE_FLOAT64_C( 496.18), SIMDE_FLOAT64_C( -485.54), SIMDE_FLOAT64_C( -818.26)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 614.53), SIMDE_FLOAT64_C( 2.88), SIMDE_FLOAT64_C( 128.27), SIMDE_FLOAT64_C( -3.01)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -50.98), SIMDE_FLOAT64_C( 313.56), SIMDE_FLOAT64_C( -914.58), SIMDE_FLOAT64_C( -29.59)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -280.43), SIMDE_FLOAT64_C( 379.54), SIMDE_FLOAT64_C( -38.50), SIMDE_FLOAT64_C( -188.05)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -34.03), SIMDE_FLOAT64_C( 313.54), SIMDE_FLOAT64_C( -2.00), SIMDE_FLOAT64_C( -10.75)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -131.10), SIMDE_FLOAT64_C( -368.55), SIMDE_FLOAT64_C( 657.24), SIMDE_FLOAT64_C( 939.62)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -195.92), SIMDE_FLOAT64_C( -475.65), SIMDE_FLOAT64_C( -524.29), SIMDE_FLOAT64_C( 537.04)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -131.04), SIMDE_FLOAT64_C( -336.52), SIMDE_FLOAT64_C( 512.03), SIMDE_FLOAT64_C( 521.04)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_and_pd(test_vec[i].a, test_vec[i].b); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_andnot_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C(-1269691626), INT32_C(-1170050076), INT32_C( -309781764), INT32_C( -576457271), INT32_C( -634907762), INT32_C( 2098123667), INT32_C( -562209537), INT32_C( 5131913)), simde_mm256_set_epi32(INT32_C( 1938996560), INT32_C( -950945230), INT32_C( 1149254280), INT32_C( 44378753), INT32_C( 2057504516), INT32_C( -993118301), INT32_C( 1739559582), INT32_C( -627102775)), simde_mm256_set_epi32(INT32_C( 1132508224), INT32_C( 1158776850), INT32_C( 8192), INT32_C( 33819136), INT32_C( 545457152), INT32_C(-2134891488), INT32_C( 562201088), INT32_C( -628022976)) }, { simde_mm256_set_epi32(INT32_C( 1623231278), INT32_C( 1012862807), INT32_C(-1490329263), INT32_C( -978121763), INT32_C( 1155479170), INT32_C( 1443672525), INT32_C(-2006927873), INT32_C(-1659310326)), simde_mm256_set_epi32(INT32_C( 1899349789), INT32_C( -971300838), INT32_C(-1615418151), INT32_C( 1727488659), INT32_C( -760518716), INT32_C( -855193027), INT32_C(-1052461502), INT32_C( 1027044741)), simde_mm256_set_epi32(INT32_C( 288703505), INT32_C(-1040179192), INT32_C( 412388488), INT32_C( 574906370), INT32_C(-1843379900), INT32_C(-1996338640), INT32_C( 1090781696), INT32_C( 539431045)) }, { simde_mm256_set_epi32(INT32_C(-2122027976), INT32_C( -148145974), INT32_C( -727916), INT32_C( -26694351), INT32_C(-1526957699), INT32_C( -878651731), INT32_C( 1530541127), INT32_C(-1559858122)), simde_mm256_set_epi32(INT32_C( -534865702), INT32_C(-2025680665), INT32_C( 838358535), INT32_C( 1081018378), INT32_C(-1920205149), INT32_C( -965251235), INT32_C( 386054154), INT32_C(-2050252028)), simde_mm256_set_epi32(INT32_C( 1612353730), INT32_C( 4227109), INT32_C( 528899), INT32_C( 458762), INT32_C( 151225986), INT32_C( 72821072), INT32_C( 67143688), INT32_C( 80315136)) }, { simde_mm256_set_epi32(INT32_C( 677275617), INT32_C(-1590450048), INT32_C( 1228022027), INT32_C(-1869880241), INT32_C( -606167369), INT32_C( 1388808224), INT32_C( 759425545), INT32_C( 885944499)), simde_mm256_set_epi32(INT32_C( 148370019), INT32_C( 1263704), INT32_C( -153021241), INT32_C( 1201345211), INT32_C( 1277824171), INT32_C(-1597492935), INT32_C( 875204555), INT32_C( 494146139)), simde_mm256_set_epi32(INT32_C( 8491010), INT32_C( 16472), INT32_C(-1228860220), INT32_C( 1192235696), INT32_C( 69206024), INT32_C(-1610075879), INT32_C( 271058370), INT32_C( 154141256)) }, { simde_mm256_set_epi32(INT32_C(-1186629793), INT32_C(-1939451012), INT32_C( 1901741359), INT32_C( 320110090), INT32_C( 759094695), INT32_C(-2033042315), INT32_C(-1537152402), INT32_C( -430790655)), simde_mm256_set_epi32(INT32_C(-1742460754), INT32_C( 244002796), INT32_C( 1060007632), INT32_C( 2113362450), INT32_C( 1987282511), INT32_C(-1021964277), INT32_C( 124383649), INT32_C( -826572412)), simde_mm256_set_epi32(INT32_C( 2099360), INT32_C( 42544256), INT32_C( 237251280), INT32_C( 1826816016), INT32_C( 1379992136), INT32_C( 1090781194), INT32_C( 50925953), INT32_C( 145293700)) }, { simde_mm256_set_epi32(INT32_C( 1671320788), INT32_C( 16189529), INT32_C(-1485817573), INT32_C( 275783232), INT32_C( -537268511), INT32_C(-1218162385), INT32_C( 832178136), INT32_C(-1011542055)), simde_mm256_set_epi32(INT32_C( 1761443736), INT32_C( 53908736), INT32_C(-1387734476), INT32_C( 460708168), INT32_C( 1701279267), INT32_C( 1520265677), INT32_C(-1733909786), INT32_C( 106810720)), simde_mm256_set_epi32(INT32_C( 140585736), INT32_C( 50369792), INT32_C( 134791716), INT32_C( 184926472), INT32_C( 537268226), INT32_C( 1217996992), INT32_C(-2010865626), INT32_C( 71878688)) }, { simde_mm256_set_epi32(INT32_C( 1589201791), INT32_C(-1842375346), INT32_C(-2000660080), INT32_C( -325292371), INT32_C( 507717673), INT32_C(-1795722597), INT32_C( -367442910), INT32_C( -52157671)), simde_mm256_set_epi32(INT32_C( 948353791), INT32_C( -947869222), INT32_C( 1848909924), INT32_C( 1620600148), INT32_C(-1789584398), INT32_C(-1352425263), INT32_C( 1123687286), INT32_C( -367624963)), simde_mm256_set_epi32(INT32_C( 537309312), INT32_C( 1166026896), INT32_C( 1714692196), INT32_C( 336), INT32_C(-2129390638), INT32_C( 721453120), INT32_C( 14818132), INT32_C( 34757860)) }, { simde_mm256_set_epi32(INT32_C( 296819835), INT32_C( 519012224), INT32_C( 1292929763), INT32_C( 1826612612), INT32_C(-1166644367), INT32_C(-1218677893), INT32_C( 1596508105), INT32_C( 479196973)), simde_mm256_set_epi32(INT32_C(-1798100031), INT32_C( 1081570687), INT32_C( -734752872), INT32_C( 1848654589), INT32_C(-1212254773), INT32_C( 1838985430), INT32_C( 1056167509), INT32_C(-1850230510)), simde_mm256_set_epi32(INT32_C(-2076040320), INT32_C( 1074790527), INT32_C(-1876687592), INT32_C( 35658361), INT32_C( 92804234), INT32_C( 1216381060), INT32_C( 550703124), INT32_C(-2127558638)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_castps_si256(simde_mm256_andnot_ps(simde_mm256_castsi256_ps(test_vec[i].a), simde_mm256_castsi256_ps(test_vec[i].b))); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_andnot_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi64x(INT64_C(-8439082616481350053), INT64_C(-1591722586538286382), INT64_C( 1596940992066035921), INT64_C( 7218980746644065590)), simde_mm256_set_epi64x(INT64_C(-2889835723484335944), INT64_C( 5702037989414933855), INT64_C(-7588441850660451968), INT64_C( 5622786757188964496)), simde_mm256_set_epi64x(INT64_C( 6126343173960302752), INT64_C( 432527267492795661), INT64_C(-9182839637781772032), INT64_C( 720582821084942464)) }, { simde_mm256_set_epi64x(INT64_C(-3971123103425330466), INT64_C( 247751845170013697), INT64_C( 5585328925838094706), INT64_C( 530115277368604043)), simde_mm256_set_epi64x(INT64_C( 4478919331480512906), INT64_C( 7586040465456902581), INT64_C(-3267189003236240754), INT64_C( 4065656935289916932)), simde_mm256_set_epi64x(INT64_C( 3893433775916460288), INT64_C( 7495967802570838452), INT64_C(-7914921412211473780), INT64_C( 4045363244402936324)) }, { simde_mm256_set_epi64x(INT64_C(-3585412687111992497), INT64_C( 6297904517919445636), INT64_C( 3051626993143041669), INT64_C( 4507652791803794073)), simde_mm256_set_epi64x(INT64_C( 7790656180790731512), INT64_C(-2238278921609140434), INT64_C(-3659407193774134076), INT64_C(-7376863833910103250)), simde_mm256_set_epi64x(INT64_C( 2306388938282574000), INT64_C(-6876986580769954518), INT64_C(-4240666768871972800), INT64_C(-9142279579591991002)) }, { simde_mm256_set_epi64x(INT64_C( 7999698065344811916), INT64_C(-7341244500866629440), INT64_C(-7328850488568705697), INT64_C(-2323514350316525032)), simde_mm256_set_epi64x(INT64_C( -107804005911866164), INT64_C( 111123727598973975), INT64_C(-1111380546027591655), INT64_C( 4764066650640007854)), simde_mm256_set_epi64x(INT64_C(-8034139356564159424), INT64_C( 108159439879631895), INT64_C( 6958347297832099840), INT64_C( 7957580804685990)) }, { simde_mm256_set_epi64x(INT64_C( 3459048579714364836), INT64_C(-3973289916381214951), INT64_C(-7209230353107463172), INT64_C( 2930204184482970329)), simde_mm256_set_epi64x(INT64_C( 3010733025435453292), INT64_C( -745749005863907277), INT64_C(-3524732567674843907), INT64_C(-5826758923945327044)), simde_mm256_set_epi64x(INT64_C( 704889912361353800), INT64_C( 3828781103108395042), INT64_C( 4901042599637093377), INT64_C(-8718669257106881500)) }, { simde_mm256_set_epi64x(INT64_C(-7592832401699875339), INT64_C(-8023654109997292762), INT64_C(-2771476036962446296), INT64_C( 9102393041728593835)), simde_mm256_set_epi64x(INT64_C( 8663885080599503900), INT64_C(-7386785027439544145), INT64_C( 8164489019718037552), INT64_C( 750237314916317106)), simde_mm256_set_epi64x(INT64_C( 7501880507188158472), INT64_C( 673462032523630729), INT64_C( 2325549012813766672), INT64_C( 11611668710957072)) }, { simde_mm256_set_epi64x(INT64_C( 845408412650914951), INT64_C(-8878005092884591737), INT64_C( 8700006383250628168), INT64_C( 5164616504794613874)), simde_mm256_set_epi64x(INT64_C(-8091532354365363750), INT64_C(-8752462265867882388), INT64_C( 4660987830245455204), INT64_C(-5583031631743087362)), simde_mm256_set_epi64x(INT64_C(-8934015756479190696), INT64_C( 144397281535078504), INT64_C( 851058845950244), INT64_C(-5764308427349687156)) }, { simde_mm256_set_epi64x(INT64_C( 2555908784249146521), INT64_C(-8475938792743727314), INT64_C( 2225136272678261710), INT64_C( -363869634240011329)), simde_mm256_set_epi64x(INT64_C(-1884165091148513154), INT64_C(-7810796383227036924), INT64_C(-3384871664970291123), INT64_C( 3413049808639883718)), simde_mm256_set_epi64x(INT64_C(-4286845753352634266), INT64_C( 1261025490606501888), INT64_C(-4537798832575469567), INT64_C( 363824553704642624)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_castpd_si256(simde_mm256_andnot_pd(simde_mm256_castsi256_pd(test_vec[i].a), simde_mm256_castsi256_pd(test_vec[i].b))); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_castps_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 17.89), SIMDE_FLOAT32_C( -439.16), SIMDE_FLOAT32_C( 198.42), SIMDE_FLOAT32_C( 352.58), SIMDE_FLOAT32_C( 461.89), SIMDE_FLOAT32_C( -105.28), SIMDE_FLOAT32_C( 143.68), SIMDE_FLOAT32_C( 337.71)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 17.89), SIMDE_FLOAT32_C( -439.16), SIMDE_FLOAT32_C( 198.42), SIMDE_FLOAT32_C( 352.58), SIMDE_FLOAT32_C( 461.89), SIMDE_FLOAT32_C( -105.28), SIMDE_FLOAT32_C( 143.68), SIMDE_FLOAT32_C( 337.71)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 104.39), SIMDE_FLOAT32_C( 34.20), SIMDE_FLOAT32_C( 868.43), SIMDE_FLOAT32_C( -354.71), SIMDE_FLOAT32_C( 71.91), SIMDE_FLOAT32_C( -620.66), SIMDE_FLOAT32_C( -727.46), SIMDE_FLOAT32_C( 516.70)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 104.39), SIMDE_FLOAT32_C( 34.20), SIMDE_FLOAT32_C( 868.43), SIMDE_FLOAT32_C( -354.71), SIMDE_FLOAT32_C( 71.91), SIMDE_FLOAT32_C( -620.66), SIMDE_FLOAT32_C( -727.46), SIMDE_FLOAT32_C( 516.70)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 57.24), SIMDE_FLOAT32_C( -937.39), SIMDE_FLOAT32_C( -902.27), SIMDE_FLOAT32_C( -540.76), SIMDE_FLOAT32_C( -319.95), SIMDE_FLOAT32_C( 472.57), SIMDE_FLOAT32_C( -514.05), SIMDE_FLOAT32_C( -395.89)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 57.24), SIMDE_FLOAT32_C( -937.39), SIMDE_FLOAT32_C( -902.27), SIMDE_FLOAT32_C( -540.76), SIMDE_FLOAT32_C( -319.95), SIMDE_FLOAT32_C( 472.57), SIMDE_FLOAT32_C( -514.05), SIMDE_FLOAT32_C( -395.89)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -289.46), SIMDE_FLOAT32_C( -292.09), SIMDE_FLOAT32_C( 300.75), SIMDE_FLOAT32_C( 515.47), SIMDE_FLOAT32_C( -443.08), SIMDE_FLOAT32_C( 738.56), SIMDE_FLOAT32_C( 388.31), SIMDE_FLOAT32_C( -979.02)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -289.46), SIMDE_FLOAT32_C( -292.09), SIMDE_FLOAT32_C( 300.75), SIMDE_FLOAT32_C( 515.47), SIMDE_FLOAT32_C( -443.08), SIMDE_FLOAT32_C( 738.56), SIMDE_FLOAT32_C( 388.31), SIMDE_FLOAT32_C( -979.02)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -416.84), SIMDE_FLOAT32_C( -145.54), SIMDE_FLOAT32_C( 814.92), SIMDE_FLOAT32_C( -389.04), SIMDE_FLOAT32_C( 271.28), SIMDE_FLOAT32_C( 795.75), SIMDE_FLOAT32_C( 715.64), SIMDE_FLOAT32_C( -282.83)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -416.84), SIMDE_FLOAT32_C( -145.54), SIMDE_FLOAT32_C( 814.92), SIMDE_FLOAT32_C( -389.04), SIMDE_FLOAT32_C( 271.28), SIMDE_FLOAT32_C( 795.75), SIMDE_FLOAT32_C( 715.64), SIMDE_FLOAT32_C( -282.83)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 36.85), SIMDE_FLOAT32_C( 358.48), SIMDE_FLOAT32_C( -119.62), SIMDE_FLOAT32_C( 444.11), SIMDE_FLOAT32_C( 221.60), SIMDE_FLOAT32_C( -300.30), SIMDE_FLOAT32_C( -48.26), SIMDE_FLOAT32_C( 969.79)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 36.85), SIMDE_FLOAT32_C( 358.48), SIMDE_FLOAT32_C( -119.62), SIMDE_FLOAT32_C( 444.11), SIMDE_FLOAT32_C( 221.60), SIMDE_FLOAT32_C( -300.30), SIMDE_FLOAT32_C( -48.26), SIMDE_FLOAT32_C( 969.79)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -242.13), SIMDE_FLOAT32_C( -172.63), SIMDE_FLOAT32_C( -556.49), SIMDE_FLOAT32_C( -637.53), SIMDE_FLOAT32_C( 39.04), SIMDE_FLOAT32_C( -822.45), SIMDE_FLOAT32_C( -881.86), SIMDE_FLOAT32_C( 639.51)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -242.13), SIMDE_FLOAT32_C( -172.63), SIMDE_FLOAT32_C( -556.49), SIMDE_FLOAT32_C( -637.53), SIMDE_FLOAT32_C( 39.04), SIMDE_FLOAT32_C( -822.45), SIMDE_FLOAT32_C( -881.86), SIMDE_FLOAT32_C( 639.51)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 262.21), SIMDE_FLOAT32_C( 189.91), SIMDE_FLOAT32_C( -147.18), SIMDE_FLOAT32_C( -378.37), SIMDE_FLOAT32_C( -474.15), SIMDE_FLOAT32_C( 728.93), SIMDE_FLOAT32_C( -779.84), SIMDE_FLOAT32_C( -836.44)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 262.21), SIMDE_FLOAT32_C( 189.91), SIMDE_FLOAT32_C( -147.18), SIMDE_FLOAT32_C( -378.37), SIMDE_FLOAT32_C( -474.15), SIMDE_FLOAT32_C( 728.93), SIMDE_FLOAT32_C( -779.84), SIMDE_FLOAT32_C( -836.44)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_castpd_ps(simde_mm256_castps_pd(test_vec[i].a)); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_castpd_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 866.38), SIMDE_FLOAT64_C( -294.05), SIMDE_FLOAT64_C( -595.07), SIMDE_FLOAT64_C( 30.82)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 866.38), SIMDE_FLOAT64_C( -294.05), SIMDE_FLOAT64_C( -595.07), SIMDE_FLOAT64_C( 30.82)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 967.90), SIMDE_FLOAT64_C( 598.86), SIMDE_FLOAT64_C( -336.28), SIMDE_FLOAT64_C( 771.72)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 967.90), SIMDE_FLOAT64_C( 598.86), SIMDE_FLOAT64_C( -336.28), SIMDE_FLOAT64_C( 771.72)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -949.57), SIMDE_FLOAT64_C( -900.42), SIMDE_FLOAT64_C( 702.02), SIMDE_FLOAT64_C( -980.34)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -949.57), SIMDE_FLOAT64_C( -900.42), SIMDE_FLOAT64_C( 702.02), SIMDE_FLOAT64_C( -980.34)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 969.39), SIMDE_FLOAT64_C( 513.49), SIMDE_FLOAT64_C( 950.16), SIMDE_FLOAT64_C( -812.08)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 969.39), SIMDE_FLOAT64_C( 513.49), SIMDE_FLOAT64_C( 950.16), SIMDE_FLOAT64_C( -812.08)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -507.62), SIMDE_FLOAT64_C( -207.64), SIMDE_FLOAT64_C( 179.82), SIMDE_FLOAT64_C( 43.82)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -507.62), SIMDE_FLOAT64_C( -207.64), SIMDE_FLOAT64_C( 179.82), SIMDE_FLOAT64_C( 43.82)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 461.29), SIMDE_FLOAT64_C( 530.93), SIMDE_FLOAT64_C( 26.40), SIMDE_FLOAT64_C( -295.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 461.29), SIMDE_FLOAT64_C( 530.93), SIMDE_FLOAT64_C( 26.40), SIMDE_FLOAT64_C( -295.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 195.05), SIMDE_FLOAT64_C( -631.18), SIMDE_FLOAT64_C( -125.11), SIMDE_FLOAT64_C( -657.02)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 195.05), SIMDE_FLOAT64_C( -631.18), SIMDE_FLOAT64_C( -125.11), SIMDE_FLOAT64_C( -657.02)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 839.46), SIMDE_FLOAT64_C( -787.94), SIMDE_FLOAT64_C( -273.33), SIMDE_FLOAT64_C( -261.67)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 839.46), SIMDE_FLOAT64_C( -787.94), SIMDE_FLOAT64_C( -273.33), SIMDE_FLOAT64_C( -261.67)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_castps_pd(simde_mm256_castpd_ps(test_vec[i].a)); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_castps128_ps256(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m256 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 351.31), SIMDE_FLOAT32_C( 331.36), SIMDE_FLOAT32_C( 112.22), SIMDE_FLOAT32_C( -15.48)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 351.31), SIMDE_FLOAT32_C( 331.36), SIMDE_FLOAT32_C( 112.22), SIMDE_FLOAT32_C( -15.48)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 905.42), SIMDE_FLOAT32_C( -224.21), SIMDE_FLOAT32_C( -76.15), SIMDE_FLOAT32_C( 663.18)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 905.42), SIMDE_FLOAT32_C( -224.21), SIMDE_FLOAT32_C( -76.15), SIMDE_FLOAT32_C( 663.18)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 830.23), SIMDE_FLOAT32_C( -5.95), SIMDE_FLOAT32_C( 918.64), SIMDE_FLOAT32_C( 777.19)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 830.23), SIMDE_FLOAT32_C( -5.95), SIMDE_FLOAT32_C( 918.64), SIMDE_FLOAT32_C( 777.19)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -168.85), SIMDE_FLOAT32_C( -871.57), SIMDE_FLOAT32_C( 40.10), SIMDE_FLOAT32_C( -558.27)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -168.85), SIMDE_FLOAT32_C( -871.57), SIMDE_FLOAT32_C( 40.10), SIMDE_FLOAT32_C( -558.27)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -791.93), SIMDE_FLOAT32_C( 293.41), SIMDE_FLOAT32_C( 390.85), SIMDE_FLOAT32_C( 476.42)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -791.93), SIMDE_FLOAT32_C( 293.41), SIMDE_FLOAT32_C( 390.85), SIMDE_FLOAT32_C( 476.42)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 481.03), SIMDE_FLOAT32_C( -319.83), SIMDE_FLOAT32_C( -437.10), SIMDE_FLOAT32_C( -775.98)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 481.03), SIMDE_FLOAT32_C( -319.83), SIMDE_FLOAT32_C( -437.10), SIMDE_FLOAT32_C( -775.98)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 193.26), SIMDE_FLOAT32_C( 702.14), SIMDE_FLOAT32_C( -820.75), SIMDE_FLOAT32_C( -158.78)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 193.26), SIMDE_FLOAT32_C( 702.14), SIMDE_FLOAT32_C( -820.75), SIMDE_FLOAT32_C( -158.78)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -995.99), SIMDE_FLOAT32_C( 290.31), SIMDE_FLOAT32_C( -219.12), SIMDE_FLOAT32_C( -837.21)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -995.99), SIMDE_FLOAT32_C( 290.31), SIMDE_FLOAT32_C( -219.12), SIMDE_FLOAT32_C( -837.21)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256_private r = simde__m256_to_private(simde_mm256_castps128_ps256(test_vec[i].a)); simde__m256_private expected = simde__m256_to_private(test_vec[i].r); simde_assert_m128_equal(r.m128[0], expected.m128[0]); } return 0; } static int test_simde_mm256_castps256_ps128(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m128 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( -556.83), SIMDE_FLOAT32_C( 534.45), SIMDE_FLOAT32_C( 421.40), SIMDE_FLOAT32_C( 932.30), SIMDE_FLOAT32_C( 169.92), SIMDE_FLOAT32_C( 399.10), SIMDE_FLOAT32_C( -742.08), SIMDE_FLOAT32_C( -830.66)), simde_mm_set_ps(SIMDE_FLOAT32_C( 169.92), SIMDE_FLOAT32_C( 399.10), SIMDE_FLOAT32_C( -742.08), SIMDE_FLOAT32_C( -830.66)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 802.82), SIMDE_FLOAT32_C( -21.64), SIMDE_FLOAT32_C( -298.77), SIMDE_FLOAT32_C( -723.27), SIMDE_FLOAT32_C( 42.85), SIMDE_FLOAT32_C( -154.70), SIMDE_FLOAT32_C( -285.18), SIMDE_FLOAT32_C( -881.89)), simde_mm_set_ps(SIMDE_FLOAT32_C( 42.85), SIMDE_FLOAT32_C( -154.70), SIMDE_FLOAT32_C( -285.18), SIMDE_FLOAT32_C( -881.89)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 606.40), SIMDE_FLOAT32_C( 978.13), SIMDE_FLOAT32_C( 281.04), SIMDE_FLOAT32_C( 316.13), SIMDE_FLOAT32_C( 8.74), SIMDE_FLOAT32_C( -824.14), SIMDE_FLOAT32_C( -338.77), SIMDE_FLOAT32_C( -977.64)), simde_mm_set_ps(SIMDE_FLOAT32_C( 8.74), SIMDE_FLOAT32_C( -824.14), SIMDE_FLOAT32_C( -338.77), SIMDE_FLOAT32_C( -977.64)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 500.38), SIMDE_FLOAT32_C( -378.47), SIMDE_FLOAT32_C( -151.95), SIMDE_FLOAT32_C( -513.15), SIMDE_FLOAT32_C( -509.66), SIMDE_FLOAT32_C( 542.95), SIMDE_FLOAT32_C( -511.97), SIMDE_FLOAT32_C( 606.72)), simde_mm_set_ps(SIMDE_FLOAT32_C( -509.66), SIMDE_FLOAT32_C( 542.95), SIMDE_FLOAT32_C( -511.97), SIMDE_FLOAT32_C( 606.72)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -156.57), SIMDE_FLOAT32_C( -909.62), SIMDE_FLOAT32_C( 457.12), SIMDE_FLOAT32_C( -549.96), SIMDE_FLOAT32_C( 250.75), SIMDE_FLOAT32_C( -503.56), SIMDE_FLOAT32_C( -397.59), SIMDE_FLOAT32_C( 644.59)), simde_mm_set_ps(SIMDE_FLOAT32_C( 250.75), SIMDE_FLOAT32_C( -503.56), SIMDE_FLOAT32_C( -397.59), SIMDE_FLOAT32_C( 644.59)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 692.42), SIMDE_FLOAT32_C( 776.78), SIMDE_FLOAT32_C( -240.36), SIMDE_FLOAT32_C( -615.28), SIMDE_FLOAT32_C( -428.59), SIMDE_FLOAT32_C( 807.96), SIMDE_FLOAT32_C( -867.86), SIMDE_FLOAT32_C( 511.34)), simde_mm_set_ps(SIMDE_FLOAT32_C( -428.59), SIMDE_FLOAT32_C( 807.96), SIMDE_FLOAT32_C( -867.86), SIMDE_FLOAT32_C( 511.34)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 115.07), SIMDE_FLOAT32_C( -200.05), SIMDE_FLOAT32_C( -278.17), SIMDE_FLOAT32_C( 321.78), SIMDE_FLOAT32_C( 793.85), SIMDE_FLOAT32_C( 416.18), SIMDE_FLOAT32_C( -935.48), SIMDE_FLOAT32_C( -637.83)), simde_mm_set_ps(SIMDE_FLOAT32_C( 793.85), SIMDE_FLOAT32_C( 416.18), SIMDE_FLOAT32_C( -935.48), SIMDE_FLOAT32_C( -637.83)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 951.45), SIMDE_FLOAT32_C( 803.49), SIMDE_FLOAT32_C( -646.03), SIMDE_FLOAT32_C( -379.68), SIMDE_FLOAT32_C( 433.22), SIMDE_FLOAT32_C( 128.68), SIMDE_FLOAT32_C( 589.03), SIMDE_FLOAT32_C( 956.87)), simde_mm_set_ps(SIMDE_FLOAT32_C( 433.22), SIMDE_FLOAT32_C( 128.68), SIMDE_FLOAT32_C( 589.03), SIMDE_FLOAT32_C( 956.87)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm256_castps256_ps128(test_vec[i].a); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_castps_si256(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C(-1649031696), INT32_C( 834872153), INT32_C( 230986620), INT32_C( -480324866), INT32_C( 1237553077), INT32_C( 596539913), INT32_C( -724550399), INT32_C( -685617130)), simde_mm256_set_epi32(INT32_C(-1649031696), INT32_C( 834872153), INT32_C( 230986620), INT32_C( -480324866), INT32_C( 1237553077), INT32_C( 596539913), INT32_C( -724550399), INT32_C( -685617130)) }, { simde_mm256_set_epi32(INT32_C( 2090398598), INT32_C( 2109187943), INT32_C( 2052808539), INT32_C( 748311192), INT32_C( -177311449), INT32_C( 358911508), INT32_C( 1754057382), INT32_C(-1724435543)), simde_mm256_set_epi32(INT32_C( 2090398598), INT32_C( 2109187943), INT32_C( 2052808539), INT32_C( 748311192), INT32_C( -177311449), INT32_C( 358911508), INT32_C( 1754057382), INT32_C(-1724435543)) }, { simde_mm256_set_epi32(INT32_C(-1888707460), INT32_C( 492300795), INT32_C(-1881516103), INT32_C( 1522261816), INT32_C( 516271628), INT32_C( 1619360533), INT32_C( 585952460), INT32_C(-1270838330)), simde_mm256_set_epi32(INT32_C(-1888707460), INT32_C( 492300795), INT32_C(-1881516103), INT32_C( 1522261816), INT32_C( 516271628), INT32_C( 1619360533), INT32_C( 585952460), INT32_C(-1270838330)) }, { simde_mm256_set_epi32(INT32_C(-1165169384), INT32_C(-2002996511), INT32_C(-1387036009), INT32_C( 730682044), INT32_C( -504180431), INT32_C(-1682623046), INT32_C( 1968017036), INT32_C( 283253644)), simde_mm256_set_epi32(INT32_C(-1165169384), INT32_C(-2002996511), INT32_C(-1387036009), INT32_C( 730682044), INT32_C( -504180431), INT32_C(-1682623046), INT32_C( 1968017036), INT32_C( 283253644)) }, { simde_mm256_set_epi32(INT32_C( 1949666143), INT32_C( -888451700), INT32_C( 605916520), INT32_C(-1265057380), INT32_C( 1158984758), INT32_C( 710723273), INT32_C( -342604717), INT32_C(-1218392316)), simde_mm256_set_epi32(INT32_C( 1949666143), INT32_C( -888451700), INT32_C( 605916520), INT32_C(-1265057380), INT32_C( 1158984758), INT32_C( 710723273), INT32_C( -342604717), INT32_C(-1218392316)) }, { simde_mm256_set_epi32(INT32_C(-1870151604), INT32_C(-2002713920), INT32_C(-1131057702), INT32_C(-1611852985), INT32_C( 1725575775), INT32_C( -263968835), INT32_C( 26802813), INT32_C( -641556710)), simde_mm256_set_epi32(INT32_C(-1870151604), INT32_C(-2002713920), INT32_C(-1131057702), INT32_C(-1611852985), INT32_C( 1725575775), INT32_C( -263968835), INT32_C( 26802813), INT32_C( -641556710)) }, { simde_mm256_set_epi32(INT32_C( 938077299), INT32_C( 4161792), INT32_C( 1718084645), INT32_C( 1391219860), INT32_C( 1311036795), INT32_C( 132407700), INT32_C(-1161361885), INT32_C( -462662147)), simde_mm256_set_epi32(INT32_C( 938077299), INT32_C( 4161792), INT32_C( 1718084645), INT32_C( 1391219860), INT32_C( 1311036795), INT32_C( 132407700), INT32_C(-1161361885), INT32_C( -462662147)) }, { simde_mm256_set_epi32(INT32_C( 987097256), INT32_C( -835194619), INT32_C(-1566547652), INT32_C(-1345603026), INT32_C( 138933650), INT32_C(-1430090796), INT32_C(-1310267132), INT32_C( 1931451372)), simde_mm256_set_epi32(INT32_C( 987097256), INT32_C( -835194619), INT32_C(-1566547652), INT32_C(-1345603026), INT32_C( 138933650), INT32_C(-1430090796), INT32_C(-1310267132), INT32_C( 1931451372)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_castps_si256(simde_mm256_castsi256_ps(test_vec[i].a)); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_castpd_si256(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi64x(INT64_C(-6436426043624243132), INT64_C( 2719911931068686329), INT64_C(-3355851641471628446), INT64_C(-4058286728495258453)), simde_mm256_set_epi64x(INT64_C(-6436426043624243132), INT64_C( 2719911931068686329), INT64_C(-3355851641471628446), INT64_C(-4058286728495258453)) }, { simde_mm256_set_epi64x(INT64_C(-6993645949082966147), INT64_C( 4041637144880323460), INT64_C( 2966258866008904789), INT64_C( 2735372768247448487)), simde_mm256_set_epi64x(INT64_C(-6993645949082966147), INT64_C( 4041637144880323460), INT64_C( 2966258866008904789), INT64_C( 2735372768247448487)) }, { simde_mm256_set_epi64x(INT64_C(-6467543300276167050), INT64_C(-7030233167547396539), INT64_C(-5268215840490095714), INT64_C( 8140300440770855984)), simde_mm256_set_epi64x(INT64_C(-6467543300276167050), INT64_C(-7030233167547396539), INT64_C(-5268215840490095714), INT64_C( 8140300440770855984)) }, { simde_mm256_set_epi64x(INT64_C(-3250744318785917277), INT64_C( 2545355707516900387), INT64_C( 2404409761557662509), INT64_C( 3863384403090649322)), simde_mm256_set_epi64x(INT64_C(-3250744318785917277), INT64_C( 2545355707516900387), INT64_C( 2404409761557662509), INT64_C( 3863384403090649322)) }, { simde_mm256_set_epi64x(INT64_C(-4992649395117694343), INT64_C( 2252708120662783492), INT64_C(-3886485865609467666), INT64_C( 4133517733748490879)), simde_mm256_set_epi64x(INT64_C(-4992649395117694343), INT64_C( 2252708120662783492), INT64_C(-3886485865609467666), INT64_C( 4133517733748490879)) }, { simde_mm256_set_epi64x(INT64_C(-7084284413768371436), INT64_C( 727608602759940145), INT64_C( 5594257850626695037), INT64_C(-7304190896383027628)), simde_mm256_set_epi64x(INT64_C(-7084284413768371436), INT64_C( 727608602759940145), INT64_C( 5594257850626695037), INT64_C(-7304190896383027628)) }, { simde_mm256_set_epi64x(INT64_C(-1886107943195258905), INT64_C(-7906247581446835510), INT64_C( 9068725184054777835), INT64_C( 3330105325701476873)), simde_mm256_set_epi64x(INT64_C(-1886107943195258905), INT64_C(-7906247581446835510), INT64_C( 9068725184054777835), INT64_C( 3330105325701476873)) }, { simde_mm256_set_epi64x(INT64_C( 1390912152688035821), INT64_C(-4783191750990221778), INT64_C(-4829331002619468971), INT64_C( 2643188978129753257)), simde_mm256_set_epi64x(INT64_C( 1390912152688035821), INT64_C(-4783191750990221778), INT64_C(-4829331002619468971), INT64_C( 2643188978129753257)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_castpd_si256(simde_mm256_castsi256_pd(test_vec[i].a)); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_castsi128_si256(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m256i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 1176995756), INT32_C(-1870675232), INT32_C( 996429243), INT32_C( 550488102)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1176995756), INT32_C(-1870675232), INT32_C( 996429243), INT32_C( 550488102)) }, { simde_mm_set_epi32(INT32_C( 1022574086), INT32_C( -246750524), INT32_C(-1886376341), INT32_C(-1870907175)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1022574086), INT32_C( -246750524), INT32_C(-1886376341), INT32_C(-1870907175)) }, { simde_mm_set_epi32(INT32_C( 1399644059), INT32_C(-2062431582), INT32_C( 861056404), INT32_C(-1456249685)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1399644059), INT32_C(-2062431582), INT32_C( 861056404), INT32_C(-1456249685)) }, { simde_mm_set_epi32(INT32_C(-1357701315), INT32_C( -200201270), INT32_C(-2128732965), INT32_C( -971656840)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C(-1357701315), INT32_C( -200201270), INT32_C(-2128732965), INT32_C( -971656840)) }, { simde_mm_set_epi32(INT32_C( 1758025228), INT32_C( 19121992), INT32_C( 1973849856), INT32_C( -609470236)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1758025228), INT32_C( 19121992), INT32_C( 1973849856), INT32_C( -609470236)) }, { simde_mm_set_epi32(INT32_C(-1315323340), INT32_C( 1995350243), INT32_C(-1725897434), INT32_C( 791183816)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C(-1315323340), INT32_C( 1995350243), INT32_C(-1725897434), INT32_C( 791183816)) }, { simde_mm_set_epi32(INT32_C(-2046705493), INT32_C( 441938624), INT32_C( -46886380), INT32_C( 606821245)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C(-2046705493), INT32_C( 441938624), INT32_C( -46886380), INT32_C( 606821245)) }, { simde_mm_set_epi32(INT32_C( 1167221394), INT32_C( 739794596), INT32_C( 187704590), INT32_C( 1165304892)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1167221394), INT32_C( 739794596), INT32_C( 187704590), INT32_C( 1165304892)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm256_extractf128_si256(simde_mm256_castsi128_si256(test_vec[i].a), 0); simde__m128i e = simde_mm256_extractf128_si256(test_vec[i].r, 0); simde_assert_m128i_equal(r, e); } return 0; } static int test_simde_mm256_castsi256_si128(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m128i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C( 1033968789), INT32_C( 712909368), INT32_C( -15382203), INT32_C( 726776461), INT32_C( 1212968394), INT32_C( -910350077), INT32_C(-1401880553), INT32_C(-1640064659)), simde_mm_set_epi32(INT32_C( 1212968394), INT32_C( -910350077), INT32_C(-1401880553), INT32_C(-1640064659)) }, { simde_mm256_set_epi32(INT32_C( -244971373), INT32_C( -839397474), INT32_C(-1281097070), INT32_C( 1259688200), INT32_C(-1678523239), INT32_C(-1335997395), INT32_C( 1104214719), INT32_C(-1646552356)), simde_mm_set_epi32(INT32_C(-1678523239), INT32_C(-1335997395), INT32_C( 1104214719), INT32_C(-1646552356)) }, { simde_mm256_set_epi32(INT32_C( 1339422473), INT32_C( -532071515), INT32_C(-1679156122), INT32_C( -104726847), INT32_C( -189233938), INT32_C(-1476384511), INT32_C( 59015981), INT32_C( -574854746)), simde_mm_set_epi32(INT32_C( -189233938), INT32_C(-1476384511), INT32_C( 59015981), INT32_C( -574854746)) }, { simde_mm256_set_epi32(INT32_C( 104804994), INT32_C(-1602912924), INT32_C(-1184587502), INT32_C( -929055139), INT32_C(-1913020666), INT32_C( 1485870300), INT32_C( -930325282), INT32_C( 971511935)), simde_mm_set_epi32(INT32_C(-1913020666), INT32_C( 1485870300), INT32_C( -930325282), INT32_C( 971511935)) }, { simde_mm256_set_epi32(INT32_C( -640493670), INT32_C( -513373085), INT32_C( 396752088), INT32_C( 1774159809), INT32_C(-1068197323), INT32_C( -727216092), INT32_C( 2046795601), INT32_C( -954579053)), simde_mm_set_epi32(INT32_C(-1068197323), INT32_C( -727216092), INT32_C( 2046795601), INT32_C( -954579053)) }, { simde_mm256_set_epi32(INT32_C( 968938230), INT32_C( 324986947), INT32_C( 1563795037), INT32_C( 1925209729), INT32_C(-1635044296), INT32_C( 685246103), INT32_C( 1765586923), INT32_C( -978308891)), simde_mm_set_epi32(INT32_C(-1635044296), INT32_C( 685246103), INT32_C( 1765586923), INT32_C( -978308891)) }, { simde_mm256_set_epi32(INT32_C(-1695851306), INT32_C(-2116140969), INT32_C( 146847367), INT32_C( -593023293), INT32_C( 1573776318), INT32_C(-1046034616), INT32_C( -645014018), INT32_C( 1014255016)), simde_mm_set_epi32(INT32_C( 1573776318), INT32_C(-1046034616), INT32_C( -645014018), INT32_C( 1014255016)) }, { simde_mm256_set_epi32(INT32_C( 1123223298), INT32_C( 1981751223), INT32_C( 1306799664), INT32_C(-1888445770), INT32_C( 1502796782), INT32_C( 1707347280), INT32_C( 760982117), INT32_C( 1124914897)), simde_mm_set_epi32(INT32_C( 1502796782), INT32_C( 1707347280), INT32_C( 760982117), INT32_C( 1124914897)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm256_castsi256_si128(test_vec[i].a); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_castsi256_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C(-1649031696), INT32_C( 834872153), INT32_C( 230986620), INT32_C( -480324866), INT32_C( 1237553077), INT32_C( 596539913), INT32_C( -724550399), INT32_C( -685617130)), simde_mm256_set_epi32(INT32_C(-1649031696), INT32_C( 834872153), INT32_C( 230986620), INT32_C( -480324866), INT32_C( 1237553077), INT32_C( 596539913), INT32_C( -724550399), INT32_C( -685617130)) }, { simde_mm256_set_epi32(INT32_C( 2090398598), INT32_C( 2109187943), INT32_C( 2052808539), INT32_C( 748311192), INT32_C( -177311449), INT32_C( 358911508), INT32_C( 1754057382), INT32_C(-1724435543)), simde_mm256_set_epi32(INT32_C( 2090398598), INT32_C( 2109187943), INT32_C( 2052808539), INT32_C( 748311192), INT32_C( -177311449), INT32_C( 358911508), INT32_C( 1754057382), INT32_C(-1724435543)) }, { simde_mm256_set_epi32(INT32_C(-1888707460), INT32_C( 492300795), INT32_C(-1881516103), INT32_C( 1522261816), INT32_C( 516271628), INT32_C( 1619360533), INT32_C( 585952460), INT32_C(-1270838330)), simde_mm256_set_epi32(INT32_C(-1888707460), INT32_C( 492300795), INT32_C(-1881516103), INT32_C( 1522261816), INT32_C( 516271628), INT32_C( 1619360533), INT32_C( 585952460), INT32_C(-1270838330)) }, { simde_mm256_set_epi32(INT32_C(-1165169384), INT32_C(-2002996511), INT32_C(-1387036009), INT32_C( 730682044), INT32_C( -504180431), INT32_C(-1682623046), INT32_C( 1968017036), INT32_C( 283253644)), simde_mm256_set_epi32(INT32_C(-1165169384), INT32_C(-2002996511), INT32_C(-1387036009), INT32_C( 730682044), INT32_C( -504180431), INT32_C(-1682623046), INT32_C( 1968017036), INT32_C( 283253644)) }, { simde_mm256_set_epi32(INT32_C( 1949666143), INT32_C( -888451700), INT32_C( 605916520), INT32_C(-1265057380), INT32_C( 1158984758), INT32_C( 710723273), INT32_C( -342604717), INT32_C(-1218392316)), simde_mm256_set_epi32(INT32_C( 1949666143), INT32_C( -888451700), INT32_C( 605916520), INT32_C(-1265057380), INT32_C( 1158984758), INT32_C( 710723273), INT32_C( -342604717), INT32_C(-1218392316)) }, { simde_mm256_set_epi32(INT32_C(-1870151604), INT32_C(-2002713920), INT32_C(-1131057702), INT32_C(-1611852985), INT32_C( 1725575775), INT32_C( -263968835), INT32_C( 26802813), INT32_C( -641556710)), simde_mm256_set_epi32(INT32_C(-1870151604), INT32_C(-2002713920), INT32_C(-1131057702), INT32_C(-1611852985), INT32_C( 1725575775), INT32_C( -263968835), INT32_C( 26802813), INT32_C( -641556710)) }, { simde_mm256_set_epi32(INT32_C( 938077299), INT32_C( 4161792), INT32_C( 1718084645), INT32_C( 1391219860), INT32_C( 1311036795), INT32_C( 132407700), INT32_C(-1161361885), INT32_C( -462662147)), simde_mm256_set_epi32(INT32_C( 938077299), INT32_C( 4161792), INT32_C( 1718084645), INT32_C( 1391219860), INT32_C( 1311036795), INT32_C( 132407700), INT32_C(-1161361885), INT32_C( -462662147)) }, { simde_mm256_set_epi32(INT32_C( 987097256), INT32_C( -835194619), INT32_C(-1566547652), INT32_C(-1345603026), INT32_C( 138933650), INT32_C(-1430090796), INT32_C(-1310267132), INT32_C( 1931451372)), simde_mm256_set_epi32(INT32_C( 987097256), INT32_C( -835194619), INT32_C(-1566547652), INT32_C(-1345603026), INT32_C( 138933650), INT32_C(-1430090796), INT32_C(-1310267132), INT32_C( 1931451372)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_castps_si256(simde_mm256_castsi256_ps(test_vec[i].a)); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_castsi256_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi64x(INT64_C(-6436426043624243132), INT64_C( 2719911931068686329), INT64_C(-3355851641471628446), INT64_C(-4058286728495258453)), simde_mm256_set_epi64x(INT64_C(-6436426043624243132), INT64_C( 2719911931068686329), INT64_C(-3355851641471628446), INT64_C(-4058286728495258453)) }, { simde_mm256_set_epi64x(INT64_C(-6993645949082966147), INT64_C( 4041637144880323460), INT64_C( 2966258866008904789), INT64_C( 2735372768247448487)), simde_mm256_set_epi64x(INT64_C(-6993645949082966147), INT64_C( 4041637144880323460), INT64_C( 2966258866008904789), INT64_C( 2735372768247448487)) }, { simde_mm256_set_epi64x(INT64_C(-6467543300276167050), INT64_C(-7030233167547396539), INT64_C(-5268215840490095714), INT64_C( 8140300440770855984)), simde_mm256_set_epi64x(INT64_C(-6467543300276167050), INT64_C(-7030233167547396539), INT64_C(-5268215840490095714), INT64_C( 8140300440770855984)) }, { simde_mm256_set_epi64x(INT64_C(-3250744318785917277), INT64_C( 2545355707516900387), INT64_C( 2404409761557662509), INT64_C( 3863384403090649322)), simde_mm256_set_epi64x(INT64_C(-3250744318785917277), INT64_C( 2545355707516900387), INT64_C( 2404409761557662509), INT64_C( 3863384403090649322)) }, { simde_mm256_set_epi64x(INT64_C(-4992649395117694343), INT64_C( 2252708120662783492), INT64_C(-3886485865609467666), INT64_C( 4133517733748490879)), simde_mm256_set_epi64x(INT64_C(-4992649395117694343), INT64_C( 2252708120662783492), INT64_C(-3886485865609467666), INT64_C( 4133517733748490879)) }, { simde_mm256_set_epi64x(INT64_C(-7084284413768371436), INT64_C( 727608602759940145), INT64_C( 5594257850626695037), INT64_C(-7304190896383027628)), simde_mm256_set_epi64x(INT64_C(-7084284413768371436), INT64_C( 727608602759940145), INT64_C( 5594257850626695037), INT64_C(-7304190896383027628)) }, { simde_mm256_set_epi64x(INT64_C(-1886107943195258905), INT64_C(-7906247581446835510), INT64_C( 9068725184054777835), INT64_C( 3330105325701476873)), simde_mm256_set_epi64x(INT64_C(-1886107943195258905), INT64_C(-7906247581446835510), INT64_C( 9068725184054777835), INT64_C( 3330105325701476873)) }, { simde_mm256_set_epi64x(INT64_C( 1390912152688035821), INT64_C(-4783191750990221778), INT64_C(-4829331002619468971), INT64_C( 2643188978129753257)), simde_mm256_set_epi64x(INT64_C( 1390912152688035821), INT64_C(-4783191750990221778), INT64_C(-4829331002619468971), INT64_C( 2643188978129753257)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_castpd_si256(simde_mm256_castsi256_pd(test_vec[i].a)); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_blend_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 b; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 61.35), SIMDE_FLOAT32_C( 540.33), SIMDE_FLOAT32_C( -888.48), SIMDE_FLOAT32_C( 570.09), SIMDE_FLOAT32_C( 312.02), SIMDE_FLOAT32_C( -960.46), SIMDE_FLOAT32_C( -440.55), SIMDE_FLOAT32_C( -796.55)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -286.42), SIMDE_FLOAT32_C( -835.51), SIMDE_FLOAT32_C( 177.42), SIMDE_FLOAT32_C( 142.03), SIMDE_FLOAT32_C( 501.36), SIMDE_FLOAT32_C( -894.74), SIMDE_FLOAT32_C( -798.77), SIMDE_FLOAT32_C( 511.25)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 61.35), SIMDE_FLOAT32_C( 540.33), SIMDE_FLOAT32_C( -888.48), SIMDE_FLOAT32_C( 570.09), SIMDE_FLOAT32_C( 501.36), SIMDE_FLOAT32_C( -894.74), SIMDE_FLOAT32_C( -440.55), SIMDE_FLOAT32_C( 511.25)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 783.21), SIMDE_FLOAT32_C( 251.09), SIMDE_FLOAT32_C( -929.85), SIMDE_FLOAT32_C( -378.97), SIMDE_FLOAT32_C( 496.83), SIMDE_FLOAT32_C( -643.84), SIMDE_FLOAT32_C( -785.45), SIMDE_FLOAT32_C( -676.85)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 375.42), SIMDE_FLOAT32_C( -689.69), SIMDE_FLOAT32_C( 240.54), SIMDE_FLOAT32_C( -955.13), SIMDE_FLOAT32_C( 82.52), SIMDE_FLOAT32_C( 210.36), SIMDE_FLOAT32_C( 621.75), SIMDE_FLOAT32_C( -780.72)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 783.21), SIMDE_FLOAT32_C( 251.09), SIMDE_FLOAT32_C( -929.85), SIMDE_FLOAT32_C( -378.97), SIMDE_FLOAT32_C( 82.52), SIMDE_FLOAT32_C( 210.36), SIMDE_FLOAT32_C( -785.45), SIMDE_FLOAT32_C( -780.72)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -747.80), SIMDE_FLOAT32_C( -376.86), SIMDE_FLOAT32_C( 238.38), SIMDE_FLOAT32_C( -668.84), SIMDE_FLOAT32_C( 238.09), SIMDE_FLOAT32_C( 936.53), SIMDE_FLOAT32_C( -693.41), SIMDE_FLOAT32_C( -381.56)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 83.85), SIMDE_FLOAT32_C( 559.24), SIMDE_FLOAT32_C( -896.35), SIMDE_FLOAT32_C( 225.46), SIMDE_FLOAT32_C( -243.15), SIMDE_FLOAT32_C( -714.74), SIMDE_FLOAT32_C( 388.91), SIMDE_FLOAT32_C( 608.15)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -747.80), SIMDE_FLOAT32_C( -376.86), SIMDE_FLOAT32_C( 238.38), SIMDE_FLOAT32_C( -668.84), SIMDE_FLOAT32_C( -243.15), SIMDE_FLOAT32_C( -714.74), SIMDE_FLOAT32_C( -693.41), SIMDE_FLOAT32_C( 608.15)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 1.67), SIMDE_FLOAT32_C( -361.23), SIMDE_FLOAT32_C( 362.97), SIMDE_FLOAT32_C( -860.62), SIMDE_FLOAT32_C( 518.00), SIMDE_FLOAT32_C( 985.53), SIMDE_FLOAT32_C( -40.74), SIMDE_FLOAT32_C( 246.28)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 236.67), SIMDE_FLOAT32_C( -133.38), SIMDE_FLOAT32_C( -240.09), SIMDE_FLOAT32_C( 681.13), SIMDE_FLOAT32_C( -437.53), SIMDE_FLOAT32_C( 645.53), SIMDE_FLOAT32_C( 472.51), SIMDE_FLOAT32_C( 30.02)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 1.67), SIMDE_FLOAT32_C( -361.23), SIMDE_FLOAT32_C( 362.97), SIMDE_FLOAT32_C( -860.62), SIMDE_FLOAT32_C( -437.53), SIMDE_FLOAT32_C( 645.53), SIMDE_FLOAT32_C( -40.74), SIMDE_FLOAT32_C( 30.02)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -759.07), SIMDE_FLOAT32_C( 240.96), SIMDE_FLOAT32_C( -743.41), SIMDE_FLOAT32_C( -766.95), SIMDE_FLOAT32_C( -733.55), SIMDE_FLOAT32_C( -798.68), SIMDE_FLOAT32_C( -189.75), SIMDE_FLOAT32_C( -424.58)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -396.91), SIMDE_FLOAT32_C( 509.13), SIMDE_FLOAT32_C( 462.02), SIMDE_FLOAT32_C( 520.45), SIMDE_FLOAT32_C( 948.24), SIMDE_FLOAT32_C( 730.18), SIMDE_FLOAT32_C( -709.02), SIMDE_FLOAT32_C( -858.64)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -759.07), SIMDE_FLOAT32_C( 240.96), SIMDE_FLOAT32_C( -743.41), SIMDE_FLOAT32_C( -766.95), SIMDE_FLOAT32_C( 948.24), SIMDE_FLOAT32_C( 730.18), SIMDE_FLOAT32_C( -189.75), SIMDE_FLOAT32_C( -858.64)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 624.31), SIMDE_FLOAT32_C( 375.12), SIMDE_FLOAT32_C( 629.27), SIMDE_FLOAT32_C( 901.24), SIMDE_FLOAT32_C( -896.96), SIMDE_FLOAT32_C( -769.47), SIMDE_FLOAT32_C( 452.93), SIMDE_FLOAT32_C( -251.17)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -148.06), SIMDE_FLOAT32_C( -474.61), SIMDE_FLOAT32_C( -146.01), SIMDE_FLOAT32_C( -955.67), SIMDE_FLOAT32_C( 931.60), SIMDE_FLOAT32_C( -773.07), SIMDE_FLOAT32_C( 626.99), SIMDE_FLOAT32_C( 431.72)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 624.31), SIMDE_FLOAT32_C( 375.12), SIMDE_FLOAT32_C( 629.27), SIMDE_FLOAT32_C( 901.24), SIMDE_FLOAT32_C( 931.60), SIMDE_FLOAT32_C( -773.07), SIMDE_FLOAT32_C( 452.93), SIMDE_FLOAT32_C( 431.72)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -812.40), SIMDE_FLOAT32_C( -551.18), SIMDE_FLOAT32_C( 389.71), SIMDE_FLOAT32_C( 667.74), SIMDE_FLOAT32_C( -2.37), SIMDE_FLOAT32_C( -129.53), SIMDE_FLOAT32_C( 860.16), SIMDE_FLOAT32_C( 760.95)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -172.95), SIMDE_FLOAT32_C( -591.71), SIMDE_FLOAT32_C( -137.29), SIMDE_FLOAT32_C( -689.63), SIMDE_FLOAT32_C( 644.48), SIMDE_FLOAT32_C( 859.03), SIMDE_FLOAT32_C( -963.16), SIMDE_FLOAT32_C( -158.52)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -812.40), SIMDE_FLOAT32_C( -551.18), SIMDE_FLOAT32_C( 389.71), SIMDE_FLOAT32_C( 667.74), SIMDE_FLOAT32_C( 644.48), SIMDE_FLOAT32_C( 859.03), SIMDE_FLOAT32_C( 860.16), SIMDE_FLOAT32_C( -158.52)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 53.03), SIMDE_FLOAT32_C( -493.76), SIMDE_FLOAT32_C( 623.37), SIMDE_FLOAT32_C( 296.00), SIMDE_FLOAT32_C( -416.71), SIMDE_FLOAT32_C( -539.89), SIMDE_FLOAT32_C( 210.88), SIMDE_FLOAT32_C( 585.18)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -34.37), SIMDE_FLOAT32_C( -267.21), SIMDE_FLOAT32_C( 411.37), SIMDE_FLOAT32_C( -265.51), SIMDE_FLOAT32_C( 345.42), SIMDE_FLOAT32_C( 252.46), SIMDE_FLOAT32_C( 286.48), SIMDE_FLOAT32_C( -858.99)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 53.03), SIMDE_FLOAT32_C( -493.76), SIMDE_FLOAT32_C( 623.37), SIMDE_FLOAT32_C( 296.00), SIMDE_FLOAT32_C( 345.42), SIMDE_FLOAT32_C( 252.46), SIMDE_FLOAT32_C( 210.88), SIMDE_FLOAT32_C( -858.99)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_blend_ps(test_vec[i].a, test_vec[i].b, 13); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_blend_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d b; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 983.61), SIMDE_FLOAT64_C( -51.56), SIMDE_FLOAT64_C( 561.13), SIMDE_FLOAT64_C( -977.17)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 311.03), SIMDE_FLOAT64_C( -876.87), SIMDE_FLOAT64_C( 15.56), SIMDE_FLOAT64_C( 821.58)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 311.03), SIMDE_FLOAT64_C( -876.87), SIMDE_FLOAT64_C( 561.13), SIMDE_FLOAT64_C( 821.58)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 486.15), SIMDE_FLOAT64_C( -809.80), SIMDE_FLOAT64_C( -134.49), SIMDE_FLOAT64_C( -709.30)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -588.52), SIMDE_FLOAT64_C( -823.37), SIMDE_FLOAT64_C( -436.62), SIMDE_FLOAT64_C( -938.83)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -588.52), SIMDE_FLOAT64_C( -823.37), SIMDE_FLOAT64_C( -134.49), SIMDE_FLOAT64_C( -938.83)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 10.97), SIMDE_FLOAT64_C( -837.09), SIMDE_FLOAT64_C( -238.93), SIMDE_FLOAT64_C( -927.62)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -150.59), SIMDE_FLOAT64_C( 428.13), SIMDE_FLOAT64_C( 655.11), SIMDE_FLOAT64_C( -28.81)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -150.59), SIMDE_FLOAT64_C( 428.13), SIMDE_FLOAT64_C( -238.93), SIMDE_FLOAT64_C( -28.81)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -691.52), SIMDE_FLOAT64_C( -756.72), SIMDE_FLOAT64_C( 69.02), SIMDE_FLOAT64_C( -249.34)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -857.57), SIMDE_FLOAT64_C( -720.61), SIMDE_FLOAT64_C( 529.12), SIMDE_FLOAT64_C( 813.95)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -857.57), SIMDE_FLOAT64_C( -720.61), SIMDE_FLOAT64_C( 69.02), SIMDE_FLOAT64_C( 813.95)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 743.57), SIMDE_FLOAT64_C( 671.72), SIMDE_FLOAT64_C( 747.66), SIMDE_FLOAT64_C( 592.11)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -559.29), SIMDE_FLOAT64_C( 529.63), SIMDE_FLOAT64_C( 121.55), SIMDE_FLOAT64_C( -352.32)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -559.29), SIMDE_FLOAT64_C( 529.63), SIMDE_FLOAT64_C( 747.66), SIMDE_FLOAT64_C( -352.32)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -466.05), SIMDE_FLOAT64_C( -621.64), SIMDE_FLOAT64_C( 113.70), SIMDE_FLOAT64_C( -906.12)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -919.48), SIMDE_FLOAT64_C( 972.84), SIMDE_FLOAT64_C( 378.79), SIMDE_FLOAT64_C( -196.68)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -919.48), SIMDE_FLOAT64_C( 972.84), SIMDE_FLOAT64_C( 113.70), SIMDE_FLOAT64_C( -196.68)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -749.13), SIMDE_FLOAT64_C( 30.92), SIMDE_FLOAT64_C( 753.62), SIMDE_FLOAT64_C( -864.28)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -997.13), SIMDE_FLOAT64_C( 675.64), SIMDE_FLOAT64_C( -135.93), SIMDE_FLOAT64_C( -647.89)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -997.13), SIMDE_FLOAT64_C( 675.64), SIMDE_FLOAT64_C( 753.62), SIMDE_FLOAT64_C( -647.89)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 757.64), SIMDE_FLOAT64_C( 119.62), SIMDE_FLOAT64_C( 682.14), SIMDE_FLOAT64_C( -348.74)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 467.61), SIMDE_FLOAT64_C( 532.40), SIMDE_FLOAT64_C( 959.59), SIMDE_FLOAT64_C( -392.58)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 467.61), SIMDE_FLOAT64_C( 532.40), SIMDE_FLOAT64_C( 682.14), SIMDE_FLOAT64_C( -392.58)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_blend_pd(test_vec[i].a, test_vec[i].b, 13); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_blendv_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 b; simde__m256 mask; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( -169.19), SIMDE_FLOAT32_C( -303.51), SIMDE_FLOAT32_C( 280.62), SIMDE_FLOAT32_C( 971.56), SIMDE_FLOAT32_C( 558.62), SIMDE_FLOAT32_C( 244.31), SIMDE_FLOAT32_C( -482.20), SIMDE_FLOAT32_C( 526.92)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 779.01), SIMDE_FLOAT32_C( -628.61), SIMDE_FLOAT32_C( -781.26), SIMDE_FLOAT32_C( -923.79), SIMDE_FLOAT32_C( -624.75), SIMDE_FLOAT32_C( -481.19), SIMDE_FLOAT32_C( 750.60), SIMDE_FLOAT32_C( 693.30)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 392.97), SIMDE_FLOAT32_C( 752.07), SIMDE_FLOAT32_C( -74.68), SIMDE_FLOAT32_C( -769.29), SIMDE_FLOAT32_C( 600.30), SIMDE_FLOAT32_C( -577.83), SIMDE_FLOAT32_C( 257.89), SIMDE_FLOAT32_C( -759.37)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -169.19), SIMDE_FLOAT32_C( -303.51), SIMDE_FLOAT32_C( -781.26), SIMDE_FLOAT32_C( -923.79), SIMDE_FLOAT32_C( 558.62), SIMDE_FLOAT32_C( -481.19), SIMDE_FLOAT32_C( -482.20), SIMDE_FLOAT32_C( 693.30)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 436.77), SIMDE_FLOAT32_C( 265.17), SIMDE_FLOAT32_C( -598.85), SIMDE_FLOAT32_C( -424.56), SIMDE_FLOAT32_C( -24.79), SIMDE_FLOAT32_C( -558.99), SIMDE_FLOAT32_C( -299.03), SIMDE_FLOAT32_C( -367.92)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 434.09), SIMDE_FLOAT32_C( 46.59), SIMDE_FLOAT32_C( 85.98), SIMDE_FLOAT32_C( -164.97), SIMDE_FLOAT32_C( 72.68), SIMDE_FLOAT32_C( -140.26), SIMDE_FLOAT32_C( 458.69), SIMDE_FLOAT32_C( 804.02)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 696.57), SIMDE_FLOAT32_C( 799.50), SIMDE_FLOAT32_C( 216.00), SIMDE_FLOAT32_C( 812.94), SIMDE_FLOAT32_C( 321.91), SIMDE_FLOAT32_C( 497.67), SIMDE_FLOAT32_C( -321.87), SIMDE_FLOAT32_C( -96.28)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 436.77), SIMDE_FLOAT32_C( 265.17), SIMDE_FLOAT32_C( -598.85), SIMDE_FLOAT32_C( -424.56), SIMDE_FLOAT32_C( -24.79), SIMDE_FLOAT32_C( -558.99), SIMDE_FLOAT32_C( 458.69), SIMDE_FLOAT32_C( 804.02)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 593.15), SIMDE_FLOAT32_C( 822.85), SIMDE_FLOAT32_C( -843.43), SIMDE_FLOAT32_C( -486.43), SIMDE_FLOAT32_C( 259.42), SIMDE_FLOAT32_C( -708.30), SIMDE_FLOAT32_C( -398.61), SIMDE_FLOAT32_C( 689.88)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -402.96), SIMDE_FLOAT32_C( 346.21), SIMDE_FLOAT32_C( -865.06), SIMDE_FLOAT32_C( 330.41), SIMDE_FLOAT32_C( 355.72), SIMDE_FLOAT32_C( -380.53), SIMDE_FLOAT32_C( 702.28), SIMDE_FLOAT32_C( 6.18)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -441.80), SIMDE_FLOAT32_C( 453.07), SIMDE_FLOAT32_C( -312.81), SIMDE_FLOAT32_C( 655.80), SIMDE_FLOAT32_C( -443.61), SIMDE_FLOAT32_C( -292.20), SIMDE_FLOAT32_C( -429.77), SIMDE_FLOAT32_C( 815.31)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -402.96), SIMDE_FLOAT32_C( 822.85), SIMDE_FLOAT32_C( -865.06), SIMDE_FLOAT32_C( -486.43), SIMDE_FLOAT32_C( 355.72), SIMDE_FLOAT32_C( -380.53), SIMDE_FLOAT32_C( 702.28), SIMDE_FLOAT32_C( 689.88)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 627.12), SIMDE_FLOAT32_C( -903.61), SIMDE_FLOAT32_C( 802.66), SIMDE_FLOAT32_C( 393.79), SIMDE_FLOAT32_C( -634.24), SIMDE_FLOAT32_C( 782.19), SIMDE_FLOAT32_C( 964.94), SIMDE_FLOAT32_C( -554.43)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -249.23), SIMDE_FLOAT32_C( -358.11), SIMDE_FLOAT32_C( 69.71), SIMDE_FLOAT32_C( 774.95), SIMDE_FLOAT32_C( -447.84), SIMDE_FLOAT32_C( -947.94), SIMDE_FLOAT32_C( -908.85), SIMDE_FLOAT32_C( -21.47)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 241.23), SIMDE_FLOAT32_C( 27.04), SIMDE_FLOAT32_C( 341.47), SIMDE_FLOAT32_C( 482.33), SIMDE_FLOAT32_C( 411.77), SIMDE_FLOAT32_C( -282.69), SIMDE_FLOAT32_C( 915.57), SIMDE_FLOAT32_C( -213.96)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 627.12), SIMDE_FLOAT32_C( -903.61), SIMDE_FLOAT32_C( 802.66), SIMDE_FLOAT32_C( 393.79), SIMDE_FLOAT32_C( -634.24), SIMDE_FLOAT32_C( -947.94), SIMDE_FLOAT32_C( 964.94), SIMDE_FLOAT32_C( -21.47)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 365.56), SIMDE_FLOAT32_C( 297.70), SIMDE_FLOAT32_C( -723.56), SIMDE_FLOAT32_C( -52.07), SIMDE_FLOAT32_C( 692.93), SIMDE_FLOAT32_C( -882.05), SIMDE_FLOAT32_C( -424.36), SIMDE_FLOAT32_C( -366.57)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 744.52), SIMDE_FLOAT32_C( 387.36), SIMDE_FLOAT32_C( -311.40), SIMDE_FLOAT32_C( -280.68), SIMDE_FLOAT32_C( 556.91), SIMDE_FLOAT32_C( 703.77), SIMDE_FLOAT32_C( -828.92), SIMDE_FLOAT32_C( 893.13)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -152.86), SIMDE_FLOAT32_C( 793.25), SIMDE_FLOAT32_C( 667.69), SIMDE_FLOAT32_C( 940.95), SIMDE_FLOAT32_C( -273.21), SIMDE_FLOAT32_C( 148.61), SIMDE_FLOAT32_C( 420.18), SIMDE_FLOAT32_C( 4.30)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 744.52), SIMDE_FLOAT32_C( 297.70), SIMDE_FLOAT32_C( -723.56), SIMDE_FLOAT32_C( -52.07), SIMDE_FLOAT32_C( 556.91), SIMDE_FLOAT32_C( -882.05), SIMDE_FLOAT32_C( -424.36), SIMDE_FLOAT32_C( -366.57)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 806.04), SIMDE_FLOAT32_C( 998.56), SIMDE_FLOAT32_C( 954.81), SIMDE_FLOAT32_C( -105.93), SIMDE_FLOAT32_C( 810.39), SIMDE_FLOAT32_C( -451.40), SIMDE_FLOAT32_C( -991.41), SIMDE_FLOAT32_C( 24.70)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 69.57), SIMDE_FLOAT32_C( -71.31), SIMDE_FLOAT32_C( -379.77), SIMDE_FLOAT32_C( -507.58), SIMDE_FLOAT32_C( -931.37), SIMDE_FLOAT32_C( -271.48), SIMDE_FLOAT32_C( 709.92), SIMDE_FLOAT32_C( -442.85)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -608.56), SIMDE_FLOAT32_C( -319.60), SIMDE_FLOAT32_C( -930.98), SIMDE_FLOAT32_C( -628.59), SIMDE_FLOAT32_C( 898.10), SIMDE_FLOAT32_C( -782.18), SIMDE_FLOAT32_C( -846.42), SIMDE_FLOAT32_C( 513.23)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 69.57), SIMDE_FLOAT32_C( -71.31), SIMDE_FLOAT32_C( -379.77), SIMDE_FLOAT32_C( -507.58), SIMDE_FLOAT32_C( 810.39), SIMDE_FLOAT32_C( -271.48), SIMDE_FLOAT32_C( 709.92), SIMDE_FLOAT32_C( 24.70)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -598.65), SIMDE_FLOAT32_C( -864.46), SIMDE_FLOAT32_C( -182.16), SIMDE_FLOAT32_C( 855.39), SIMDE_FLOAT32_C( -689.66), SIMDE_FLOAT32_C( 116.31), SIMDE_FLOAT32_C( -552.00), SIMDE_FLOAT32_C( 962.19)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 2.01), SIMDE_FLOAT32_C( 601.06), SIMDE_FLOAT32_C( 276.21), SIMDE_FLOAT32_C( -129.29), SIMDE_FLOAT32_C( -199.59), SIMDE_FLOAT32_C( -345.44), SIMDE_FLOAT32_C( -185.67), SIMDE_FLOAT32_C( 900.58)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 857.48), SIMDE_FLOAT32_C( -480.62), SIMDE_FLOAT32_C( -406.99), SIMDE_FLOAT32_C( -422.80), SIMDE_FLOAT32_C( 1.49), SIMDE_FLOAT32_C( 102.14), SIMDE_FLOAT32_C( 113.98), SIMDE_FLOAT32_C( -405.64)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -598.65), SIMDE_FLOAT32_C( 601.06), SIMDE_FLOAT32_C( 276.21), SIMDE_FLOAT32_C( -129.29), SIMDE_FLOAT32_C( -689.66), SIMDE_FLOAT32_C( 116.31), SIMDE_FLOAT32_C( -552.00), SIMDE_FLOAT32_C( 900.58)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 890.83), SIMDE_FLOAT32_C( 201.56), SIMDE_FLOAT32_C( 104.24), SIMDE_FLOAT32_C( 496.38), SIMDE_FLOAT32_C( 607.57), SIMDE_FLOAT32_C( 285.01), SIMDE_FLOAT32_C( 501.29), SIMDE_FLOAT32_C( -590.78)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -643.60), SIMDE_FLOAT32_C( 256.83), SIMDE_FLOAT32_C( 140.60), SIMDE_FLOAT32_C( 204.90), SIMDE_FLOAT32_C( 371.07), SIMDE_FLOAT32_C( -744.00), SIMDE_FLOAT32_C( 751.14), SIMDE_FLOAT32_C( 233.26)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -384.25), SIMDE_FLOAT32_C( -588.66), SIMDE_FLOAT32_C( -531.19), SIMDE_FLOAT32_C( -520.11), SIMDE_FLOAT32_C( 648.35), SIMDE_FLOAT32_C( -584.03), SIMDE_FLOAT32_C( 700.38), SIMDE_FLOAT32_C( 21.17)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -643.60), SIMDE_FLOAT32_C( 256.83), SIMDE_FLOAT32_C( 140.60), SIMDE_FLOAT32_C( 204.90), SIMDE_FLOAT32_C( 607.57), SIMDE_FLOAT32_C( -744.00), SIMDE_FLOAT32_C( 501.29), SIMDE_FLOAT32_C( -590.78)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_blendv_ps(test_vec[i].a, test_vec[i].b, test_vec[i].mask); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_blendv_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d b; simde__m256d mask; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -587.29), SIMDE_FLOAT64_C( 745.99), SIMDE_FLOAT64_C( 660.01), SIMDE_FLOAT64_C( -72.44)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 307.98), SIMDE_FLOAT64_C( 879.25), SIMDE_FLOAT64_C( 340.44), SIMDE_FLOAT64_C( -338.42)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -599.03), SIMDE_FLOAT64_C( 269.37), SIMDE_FLOAT64_C( -940.99), SIMDE_FLOAT64_C( -383.55)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 307.98), SIMDE_FLOAT64_C( 745.99), SIMDE_FLOAT64_C( 340.44), SIMDE_FLOAT64_C( -338.42)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -369.37), SIMDE_FLOAT64_C( 888.66), SIMDE_FLOAT64_C( -159.55), SIMDE_FLOAT64_C( -869.53)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 649.46), SIMDE_FLOAT64_C( 886.19), SIMDE_FLOAT64_C( 926.89), SIMDE_FLOAT64_C( -697.40)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -515.74), SIMDE_FLOAT64_C( -918.64), SIMDE_FLOAT64_C( 131.75), SIMDE_FLOAT64_C( 581.75)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 649.46), SIMDE_FLOAT64_C( 886.19), SIMDE_FLOAT64_C( -159.55), SIMDE_FLOAT64_C( -869.53)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -900.47), SIMDE_FLOAT64_C( 409.14), SIMDE_FLOAT64_C( -799.12), SIMDE_FLOAT64_C( -260.50)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -10.16), SIMDE_FLOAT64_C( 623.74), SIMDE_FLOAT64_C( -915.24), SIMDE_FLOAT64_C( -491.31)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 804.78), SIMDE_FLOAT64_C( -317.20), SIMDE_FLOAT64_C( -335.85), SIMDE_FLOAT64_C( -779.77)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -900.47), SIMDE_FLOAT64_C( 623.74), SIMDE_FLOAT64_C( -915.24), SIMDE_FLOAT64_C( -491.31)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -87.84), SIMDE_FLOAT64_C( 244.36), SIMDE_FLOAT64_C( -17.33), SIMDE_FLOAT64_C( 496.74)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 176.35), SIMDE_FLOAT64_C( 303.26), SIMDE_FLOAT64_C( -414.16), SIMDE_FLOAT64_C( -98.44)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -385.65), SIMDE_FLOAT64_C( -192.42), SIMDE_FLOAT64_C( 392.65), SIMDE_FLOAT64_C( -902.76)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 176.35), SIMDE_FLOAT64_C( 303.26), SIMDE_FLOAT64_C( -17.33), SIMDE_FLOAT64_C( -98.44)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -793.35), SIMDE_FLOAT64_C( 6.53), SIMDE_FLOAT64_C( 858.41), SIMDE_FLOAT64_C( 175.72)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 376.10), SIMDE_FLOAT64_C( -410.32), SIMDE_FLOAT64_C( -49.60), SIMDE_FLOAT64_C( -434.06)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -290.24), SIMDE_FLOAT64_C( 223.04), SIMDE_FLOAT64_C( 738.63), SIMDE_FLOAT64_C( -193.43)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 376.10), SIMDE_FLOAT64_C( 6.53), SIMDE_FLOAT64_C( 858.41), SIMDE_FLOAT64_C( -434.06)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -894.04), SIMDE_FLOAT64_C( -968.20), SIMDE_FLOAT64_C( 146.09), SIMDE_FLOAT64_C( -741.36)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -570.76), SIMDE_FLOAT64_C( -340.64), SIMDE_FLOAT64_C( 593.54), SIMDE_FLOAT64_C( -684.62)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 155.79), SIMDE_FLOAT64_C( 975.56), SIMDE_FLOAT64_C( 939.33), SIMDE_FLOAT64_C( 615.78)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -894.04), SIMDE_FLOAT64_C( -968.20), SIMDE_FLOAT64_C( 146.09), SIMDE_FLOAT64_C( -741.36)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -512.44), SIMDE_FLOAT64_C( 657.99), SIMDE_FLOAT64_C( 888.55), SIMDE_FLOAT64_C( 863.80)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 344.43), SIMDE_FLOAT64_C( 994.17), SIMDE_FLOAT64_C( -142.41), SIMDE_FLOAT64_C( -388.31)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -738.41), SIMDE_FLOAT64_C( 935.62), SIMDE_FLOAT64_C( -743.51), SIMDE_FLOAT64_C( -41.25)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 344.43), SIMDE_FLOAT64_C( 657.99), SIMDE_FLOAT64_C( -142.41), SIMDE_FLOAT64_C( -388.31)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -178.00), SIMDE_FLOAT64_C( -981.39), SIMDE_FLOAT64_C( -631.33), SIMDE_FLOAT64_C( 518.52)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 592.14), SIMDE_FLOAT64_C( -27.22), SIMDE_FLOAT64_C( 736.38), SIMDE_FLOAT64_C( 579.20)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 893.09), SIMDE_FLOAT64_C( 120.96), SIMDE_FLOAT64_C( 910.10), SIMDE_FLOAT64_C( -128.61)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -178.00), SIMDE_FLOAT64_C( -981.39), SIMDE_FLOAT64_C( -631.33), SIMDE_FLOAT64_C( 579.20)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_blendv_pd(test_vec[i].a, test_vec[i].b, test_vec[i].mask); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_broadcast_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 430.96), SIMDE_FLOAT32_C( 742.21), SIMDE_FLOAT32_C( 80.10), SIMDE_FLOAT32_C( -808.38) }, { SIMDE_FLOAT32_C( 430.96), SIMDE_FLOAT32_C( 742.21), SIMDE_FLOAT32_C( 80.10), SIMDE_FLOAT32_C( -808.38), SIMDE_FLOAT32_C( 430.96), SIMDE_FLOAT32_C( 742.21), SIMDE_FLOAT32_C( 80.10), SIMDE_FLOAT32_C( -808.38) } }, { { SIMDE_FLOAT32_C( -52.11), SIMDE_FLOAT32_C( -124.82), SIMDE_FLOAT32_C( 257.00), SIMDE_FLOAT32_C( 583.90) }, { SIMDE_FLOAT32_C( -52.11), SIMDE_FLOAT32_C( -124.82), SIMDE_FLOAT32_C( 257.00), SIMDE_FLOAT32_C( 583.90), SIMDE_FLOAT32_C( -52.11), SIMDE_FLOAT32_C( -124.82), SIMDE_FLOAT32_C( 257.00), SIMDE_FLOAT32_C( 583.90) } }, { { SIMDE_FLOAT32_C( -353.01), SIMDE_FLOAT32_C( 791.63), SIMDE_FLOAT32_C( 304.14), SIMDE_FLOAT32_C( -431.18) }, { SIMDE_FLOAT32_C( -353.01), SIMDE_FLOAT32_C( 791.63), SIMDE_FLOAT32_C( 304.14), SIMDE_FLOAT32_C( -431.18), SIMDE_FLOAT32_C( -353.01), SIMDE_FLOAT32_C( 791.63), SIMDE_FLOAT32_C( 304.14), SIMDE_FLOAT32_C( -431.18) } }, { { SIMDE_FLOAT32_C( 839.02), SIMDE_FLOAT32_C( -532.53), SIMDE_FLOAT32_C( 12.86), SIMDE_FLOAT32_C( -518.51) }, { SIMDE_FLOAT32_C( 839.02), SIMDE_FLOAT32_C( -532.53), SIMDE_FLOAT32_C( 12.86), SIMDE_FLOAT32_C( -518.51), SIMDE_FLOAT32_C( 839.02), SIMDE_FLOAT32_C( -532.53), SIMDE_FLOAT32_C( 12.86), SIMDE_FLOAT32_C( -518.51) } }, { { SIMDE_FLOAT32_C( 815.19), SIMDE_FLOAT32_C( 919.67), SIMDE_FLOAT32_C( -404.62), SIMDE_FLOAT32_C( -140.25) }, { SIMDE_FLOAT32_C( 815.19), SIMDE_FLOAT32_C( 919.67), SIMDE_FLOAT32_C( -404.62), SIMDE_FLOAT32_C( -140.25), SIMDE_FLOAT32_C( 815.19), SIMDE_FLOAT32_C( 919.67), SIMDE_FLOAT32_C( -404.62), SIMDE_FLOAT32_C( -140.25) } }, { { SIMDE_FLOAT32_C( 487.65), SIMDE_FLOAT32_C( -341.39), SIMDE_FLOAT32_C( -448.94), SIMDE_FLOAT32_C( -588.75) }, { SIMDE_FLOAT32_C( 487.65), SIMDE_FLOAT32_C( -341.39), SIMDE_FLOAT32_C( -448.94), SIMDE_FLOAT32_C( -588.75), SIMDE_FLOAT32_C( 487.65), SIMDE_FLOAT32_C( -341.39), SIMDE_FLOAT32_C( -448.94), SIMDE_FLOAT32_C( -588.75) } }, { { SIMDE_FLOAT32_C( 22.82), SIMDE_FLOAT32_C( -871.28), SIMDE_FLOAT32_C( 241.67), SIMDE_FLOAT32_C( 474.50) }, { SIMDE_FLOAT32_C( 22.82), SIMDE_FLOAT32_C( -871.28), SIMDE_FLOAT32_C( 241.67), SIMDE_FLOAT32_C( 474.50), SIMDE_FLOAT32_C( 22.82), SIMDE_FLOAT32_C( -871.28), SIMDE_FLOAT32_C( 241.67), SIMDE_FLOAT32_C( 474.50) } }, { { SIMDE_FLOAT32_C( -594.42), SIMDE_FLOAT32_C( -935.66), SIMDE_FLOAT32_C( -297.52), SIMDE_FLOAT32_C( 836.54) }, { SIMDE_FLOAT32_C( -594.42), SIMDE_FLOAT32_C( -935.66), SIMDE_FLOAT32_C( -297.52), SIMDE_FLOAT32_C( 836.54), SIMDE_FLOAT32_C( -594.42), SIMDE_FLOAT32_C( -935.66), SIMDE_FLOAT32_C( -297.52), SIMDE_FLOAT32_C( 836.54) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 const* a_ = SIMDE_ALIGN_CAST(simde__m128 const*, test_vec[i].a); simde__m256 r = simde_mm256_broadcast_ps(a_); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_broadcast_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( -644.23), SIMDE_FLOAT64_C( 202.59) }, { SIMDE_FLOAT64_C( -644.23), SIMDE_FLOAT64_C( 202.59), SIMDE_FLOAT64_C( -644.23), SIMDE_FLOAT64_C( 202.59) } }, { { SIMDE_FLOAT64_C( 46.64), SIMDE_FLOAT64_C( -251.29) }, { SIMDE_FLOAT64_C( 46.64), SIMDE_FLOAT64_C( -251.29), SIMDE_FLOAT64_C( 46.64), SIMDE_FLOAT64_C( -251.29) } }, { { SIMDE_FLOAT64_C( -358.76), SIMDE_FLOAT64_C( -455.49) }, { SIMDE_FLOAT64_C( -358.76), SIMDE_FLOAT64_C( -455.49), SIMDE_FLOAT64_C( -358.76), SIMDE_FLOAT64_C( -455.49) } }, { { SIMDE_FLOAT64_C( 621.72), SIMDE_FLOAT64_C( -738.99) }, { SIMDE_FLOAT64_C( 621.72), SIMDE_FLOAT64_C( -738.99), SIMDE_FLOAT64_C( 621.72), SIMDE_FLOAT64_C( -738.99) } }, { { SIMDE_FLOAT64_C( 354.14), SIMDE_FLOAT64_C( -365.61) }, { SIMDE_FLOAT64_C( 354.14), SIMDE_FLOAT64_C( -365.61), SIMDE_FLOAT64_C( 354.14), SIMDE_FLOAT64_C( -365.61) } }, { { SIMDE_FLOAT64_C( 5.77), SIMDE_FLOAT64_C( -199.92) }, { SIMDE_FLOAT64_C( 5.77), SIMDE_FLOAT64_C( -199.92), SIMDE_FLOAT64_C( 5.77), SIMDE_FLOAT64_C( -199.92) } }, { { SIMDE_FLOAT64_C( 814.08), SIMDE_FLOAT64_C( -186.66) }, { SIMDE_FLOAT64_C( 814.08), SIMDE_FLOAT64_C( -186.66), SIMDE_FLOAT64_C( 814.08), SIMDE_FLOAT64_C( -186.66) } }, { { SIMDE_FLOAT64_C( -805.67), SIMDE_FLOAT64_C( -248.07) }, { SIMDE_FLOAT64_C( -805.67), SIMDE_FLOAT64_C( -248.07), SIMDE_FLOAT64_C( -805.67), SIMDE_FLOAT64_C( -248.07) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d r = simde_mm256_broadcast_pd(SIMDE_ALIGN_CAST(const simde__m128d*, test_vec[i].a)); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_broadcast_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde_float64 a; simde__m256d r; } test_vec[8] = { { SIMDE_FLOAT64_C( 800.84), simde_mm256_set_pd(SIMDE_FLOAT64_C( 800.84), SIMDE_FLOAT64_C( 800.84), SIMDE_FLOAT64_C( 800.84), SIMDE_FLOAT64_C( 800.84)) }, { SIMDE_FLOAT64_C( 700.06), simde_mm256_set_pd(SIMDE_FLOAT64_C( 700.06), SIMDE_FLOAT64_C( 700.06), SIMDE_FLOAT64_C( 700.06), SIMDE_FLOAT64_C( 700.06)) }, { SIMDE_FLOAT64_C( -801.66), simde_mm256_set_pd(SIMDE_FLOAT64_C( -801.66), SIMDE_FLOAT64_C( -801.66), SIMDE_FLOAT64_C( -801.66), SIMDE_FLOAT64_C( -801.66)) }, { SIMDE_FLOAT64_C( -941.38), simde_mm256_set_pd(SIMDE_FLOAT64_C( -941.38), SIMDE_FLOAT64_C( -941.38), SIMDE_FLOAT64_C( -941.38), SIMDE_FLOAT64_C( -941.38)) }, { SIMDE_FLOAT64_C( -346.77), simde_mm256_set_pd(SIMDE_FLOAT64_C( -346.77), SIMDE_FLOAT64_C( -346.77), SIMDE_FLOAT64_C( -346.77), SIMDE_FLOAT64_C( -346.77)) }, { SIMDE_FLOAT64_C( -833.73), simde_mm256_set_pd(SIMDE_FLOAT64_C( -833.73), SIMDE_FLOAT64_C( -833.73), SIMDE_FLOAT64_C( -833.73), SIMDE_FLOAT64_C( -833.73)) }, { SIMDE_FLOAT64_C( 315.88), simde_mm256_set_pd(SIMDE_FLOAT64_C( 315.88), SIMDE_FLOAT64_C( 315.88), SIMDE_FLOAT64_C( 315.88), SIMDE_FLOAT64_C( 315.88)) }, { SIMDE_FLOAT64_C( -868.73), simde_mm256_set_pd(SIMDE_FLOAT64_C( -868.73), SIMDE_FLOAT64_C( -868.73), SIMDE_FLOAT64_C( -868.73), SIMDE_FLOAT64_C( -868.73)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_broadcast_sd(&(test_vec[i].a)); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_broadcast_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde_float32 a; simde__m128 r; } test_vec[8] = { { SIMDE_FLOAT32_C( 137.82), simde_mm_set_ps(SIMDE_FLOAT32_C( 137.82), SIMDE_FLOAT32_C( 137.82), SIMDE_FLOAT32_C( 137.82), SIMDE_FLOAT32_C( 137.82)) }, { SIMDE_FLOAT32_C( -118.58), simde_mm_set_ps(SIMDE_FLOAT32_C( -118.58), SIMDE_FLOAT32_C( -118.58), SIMDE_FLOAT32_C( -118.58), SIMDE_FLOAT32_C( -118.58)) }, { SIMDE_FLOAT32_C( 963.02), simde_mm_set_ps(SIMDE_FLOAT32_C( 963.02), SIMDE_FLOAT32_C( 963.02), SIMDE_FLOAT32_C( 963.02), SIMDE_FLOAT32_C( 963.02)) }, { SIMDE_FLOAT32_C( 515.85), simde_mm_set_ps(SIMDE_FLOAT32_C( 515.85), SIMDE_FLOAT32_C( 515.85), SIMDE_FLOAT32_C( 515.85), SIMDE_FLOAT32_C( 515.85)) }, { SIMDE_FLOAT32_C( 110.78), simde_mm_set_ps(SIMDE_FLOAT32_C( 110.78), SIMDE_FLOAT32_C( 110.78), SIMDE_FLOAT32_C( 110.78), SIMDE_FLOAT32_C( 110.78)) }, { SIMDE_FLOAT32_C( -190.98), simde_mm_set_ps(SIMDE_FLOAT32_C( -190.98), SIMDE_FLOAT32_C( -190.98), SIMDE_FLOAT32_C( -190.98), SIMDE_FLOAT32_C( -190.98)) }, { SIMDE_FLOAT32_C( -429.63), simde_mm_set_ps(SIMDE_FLOAT32_C( -429.63), SIMDE_FLOAT32_C( -429.63), SIMDE_FLOAT32_C( -429.63), SIMDE_FLOAT32_C( -429.63)) }, { SIMDE_FLOAT32_C( -924.63), simde_mm_set_ps(SIMDE_FLOAT32_C( -924.63), SIMDE_FLOAT32_C( -924.63), SIMDE_FLOAT32_C( -924.63), SIMDE_FLOAT32_C( -924.63)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_broadcast_ss(&(test_vec[i].a)); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_broadcast_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde_float32 a; simde__m256 r; } test_vec[8] = { { SIMDE_FLOAT32_C( -970.00), simde_mm256_set_ps(SIMDE_FLOAT32_C( -970.00), SIMDE_FLOAT32_C( -970.00), SIMDE_FLOAT32_C( -970.00), SIMDE_FLOAT32_C( -970.00), SIMDE_FLOAT32_C( -970.00), SIMDE_FLOAT32_C( -970.00), SIMDE_FLOAT32_C( -970.00), SIMDE_FLOAT32_C( -970.00)) }, { SIMDE_FLOAT32_C( 425.08), simde_mm256_set_ps(SIMDE_FLOAT32_C( 425.08), SIMDE_FLOAT32_C( 425.08), SIMDE_FLOAT32_C( 425.08), SIMDE_FLOAT32_C( 425.08), SIMDE_FLOAT32_C( 425.08), SIMDE_FLOAT32_C( 425.08), SIMDE_FLOAT32_C( 425.08), SIMDE_FLOAT32_C( 425.08)) }, { SIMDE_FLOAT32_C( 814.32), simde_mm256_set_ps(SIMDE_FLOAT32_C( 814.32), SIMDE_FLOAT32_C( 814.32), SIMDE_FLOAT32_C( 814.32), SIMDE_FLOAT32_C( 814.32), SIMDE_FLOAT32_C( 814.32), SIMDE_FLOAT32_C( 814.32), SIMDE_FLOAT32_C( 814.32), SIMDE_FLOAT32_C( 814.32)) }, { SIMDE_FLOAT32_C( 309.83), simde_mm256_set_ps(SIMDE_FLOAT32_C( 309.83), SIMDE_FLOAT32_C( 309.83), SIMDE_FLOAT32_C( 309.83), SIMDE_FLOAT32_C( 309.83), SIMDE_FLOAT32_C( 309.83), SIMDE_FLOAT32_C( 309.83), SIMDE_FLOAT32_C( 309.83), SIMDE_FLOAT32_C( 309.83)) }, { SIMDE_FLOAT32_C( -410.17), simde_mm256_set_ps(SIMDE_FLOAT32_C( -410.17), SIMDE_FLOAT32_C( -410.17), SIMDE_FLOAT32_C( -410.17), SIMDE_FLOAT32_C( -410.17), SIMDE_FLOAT32_C( -410.17), SIMDE_FLOAT32_C( -410.17), SIMDE_FLOAT32_C( -410.17), SIMDE_FLOAT32_C( -410.17)) }, { SIMDE_FLOAT32_C( -592.37), simde_mm256_set_ps(SIMDE_FLOAT32_C( -592.37), SIMDE_FLOAT32_C( -592.37), SIMDE_FLOAT32_C( -592.37), SIMDE_FLOAT32_C( -592.37), SIMDE_FLOAT32_C( -592.37), SIMDE_FLOAT32_C( -592.37), SIMDE_FLOAT32_C( -592.37), SIMDE_FLOAT32_C( -592.37)) }, { SIMDE_FLOAT32_C( 297.30), simde_mm256_set_ps(SIMDE_FLOAT32_C( 297.30), SIMDE_FLOAT32_C( 297.30), SIMDE_FLOAT32_C( 297.30), SIMDE_FLOAT32_C( 297.30), SIMDE_FLOAT32_C( 297.30), SIMDE_FLOAT32_C( 297.30), SIMDE_FLOAT32_C( 297.30), SIMDE_FLOAT32_C( 297.30)) }, { SIMDE_FLOAT32_C( -549.85), simde_mm256_set_ps(SIMDE_FLOAT32_C( -549.85), SIMDE_FLOAT32_C( -549.85), SIMDE_FLOAT32_C( -549.85), SIMDE_FLOAT32_C( -549.85), SIMDE_FLOAT32_C( -549.85), SIMDE_FLOAT32_C( -549.85), SIMDE_FLOAT32_C( -549.85), SIMDE_FLOAT32_C( -549.85)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_broadcast_ss(&(test_vec[i].a)); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_castpd128_pd256(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m256d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -698.37), SIMDE_FLOAT64_C( 516.77)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -698.37), SIMDE_FLOAT64_C( 516.77)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -509.42), SIMDE_FLOAT64_C( -285.35)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -509.42), SIMDE_FLOAT64_C( -285.35)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -919.57), SIMDE_FLOAT64_C( 938.94)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -919.57), SIMDE_FLOAT64_C( 938.94)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 39.71), SIMDE_FLOAT64_C( 227.66)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 39.71), SIMDE_FLOAT64_C( 227.66)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 548.32), SIMDE_FLOAT64_C( -120.08)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 548.32), SIMDE_FLOAT64_C( -120.08)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -962.85), SIMDE_FLOAT64_C( 234.42)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -962.85), SIMDE_FLOAT64_C( 234.42)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -939.16), SIMDE_FLOAT64_C( -985.25)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -939.16), SIMDE_FLOAT64_C( -985.25)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 302.61), SIMDE_FLOAT64_C( 350.72)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 302.61), SIMDE_FLOAT64_C( 350.72)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d_private r = simde__m256d_to_private(simde_mm256_castpd128_pd256(test_vec[i].a)); simde__m256d_private expected = simde__m256d_to_private(test_vec[i].r); simde_assert_m128d_equal(r.m128d[0], expected.m128d[0]); } return 0; } static int test_simde_mm256_castpd256_pd128(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m128d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -956.85), SIMDE_FLOAT64_C( 625.41), SIMDE_FLOAT64_C( 728.85), SIMDE_FLOAT64_C( 239.74)), simde_mm_set_pd(SIMDE_FLOAT64_C( 728.85), SIMDE_FLOAT64_C( 239.74)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -642.31), SIMDE_FLOAT64_C( -953.04), SIMDE_FLOAT64_C( -288.66), SIMDE_FLOAT64_C( 999.01)), simde_mm_set_pd(SIMDE_FLOAT64_C( -288.66), SIMDE_FLOAT64_C( 999.01)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -330.74), SIMDE_FLOAT64_C( 875.72), SIMDE_FLOAT64_C( -137.28), SIMDE_FLOAT64_C( -787.08)), simde_mm_set_pd(SIMDE_FLOAT64_C( -137.28), SIMDE_FLOAT64_C( -787.08)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -616.04), SIMDE_FLOAT64_C( -762.33), SIMDE_FLOAT64_C( 806.25), SIMDE_FLOAT64_C( -621.65)), simde_mm_set_pd(SIMDE_FLOAT64_C( 806.25), SIMDE_FLOAT64_C( -621.65)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -28.93), SIMDE_FLOAT64_C( 468.91), SIMDE_FLOAT64_C( 242.39), SIMDE_FLOAT64_C( -4.32)), simde_mm_set_pd(SIMDE_FLOAT64_C( 242.39), SIMDE_FLOAT64_C( -4.32)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -635.10), SIMDE_FLOAT64_C( -479.80), SIMDE_FLOAT64_C( 479.34), SIMDE_FLOAT64_C( 994.78)), simde_mm_set_pd(SIMDE_FLOAT64_C( 479.34), SIMDE_FLOAT64_C( 994.78)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 839.27), SIMDE_FLOAT64_C( -846.55), SIMDE_FLOAT64_C( -287.23), SIMDE_FLOAT64_C( 498.33)), simde_mm_set_pd(SIMDE_FLOAT64_C( -287.23), SIMDE_FLOAT64_C( 498.33)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -68.67), SIMDE_FLOAT64_C( 956.25), SIMDE_FLOAT64_C( 462.89), SIMDE_FLOAT64_C( -555.47)), simde_mm_set_pd(SIMDE_FLOAT64_C( 462.89), SIMDE_FLOAT64_C( -555.47)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm256_castpd256_pd128(test_vec[i].a); simde_assert_m128i_equal(simde_mm_castpd_si128(r), simde_mm_castpd_si128(test_vec[i].r)); } return 0; } static int test_simde_mm256_ceil_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -242.41), SIMDE_FLOAT64_C( -377.59), SIMDE_FLOAT64_C( 787.73), SIMDE_FLOAT64_C( 903.22)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -242.00), SIMDE_FLOAT64_C( -377.00), SIMDE_FLOAT64_C( 788.00), SIMDE_FLOAT64_C( 904.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -851.63), SIMDE_FLOAT64_C( -168.29), SIMDE_FLOAT64_C( -47.72), SIMDE_FLOAT64_C( -227.89)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -851.00), SIMDE_FLOAT64_C( -168.00), SIMDE_FLOAT64_C( -47.00), SIMDE_FLOAT64_C( -227.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 922.71), SIMDE_FLOAT64_C( -494.40), SIMDE_FLOAT64_C( -263.96), SIMDE_FLOAT64_C( -353.64)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 923.00), SIMDE_FLOAT64_C( -494.00), SIMDE_FLOAT64_C( -263.00), SIMDE_FLOAT64_C( -353.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -611.84), SIMDE_FLOAT64_C( 512.63), SIMDE_FLOAT64_C( -238.35), SIMDE_FLOAT64_C( -170.16)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -611.00), SIMDE_FLOAT64_C( 513.00), SIMDE_FLOAT64_C( -238.00), SIMDE_FLOAT64_C( -170.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 816.43), SIMDE_FLOAT64_C( 815.17), SIMDE_FLOAT64_C( 214.52), SIMDE_FLOAT64_C( -660.09)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 817.00), SIMDE_FLOAT64_C( 816.00), SIMDE_FLOAT64_C( 215.00), SIMDE_FLOAT64_C( -660.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 434.90), SIMDE_FLOAT64_C( 54.02), SIMDE_FLOAT64_C( -447.58), SIMDE_FLOAT64_C( 766.46)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 435.00), SIMDE_FLOAT64_C( 55.00), SIMDE_FLOAT64_C( -447.00), SIMDE_FLOAT64_C( 767.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 372.78), SIMDE_FLOAT64_C( -135.62), SIMDE_FLOAT64_C( 715.18), SIMDE_FLOAT64_C( -737.69)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 373.00), SIMDE_FLOAT64_C( -135.00), SIMDE_FLOAT64_C( 716.00), SIMDE_FLOAT64_C( -737.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 491.75), SIMDE_FLOAT64_C( 481.14), SIMDE_FLOAT64_C( 571.31), SIMDE_FLOAT64_C( 426.99)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 492.00), SIMDE_FLOAT64_C( 482.00), SIMDE_FLOAT64_C( 572.00), SIMDE_FLOAT64_C( 427.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_ceil_pd(test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_ceil_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 719.50), SIMDE_FLOAT32_C( 423.42), SIMDE_FLOAT32_C( -325.80), SIMDE_FLOAT32_C( -7.65), SIMDE_FLOAT32_C( 549.35), SIMDE_FLOAT32_C( 88.23), SIMDE_FLOAT32_C( 442.11), SIMDE_FLOAT32_C( 103.18)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 720.00), SIMDE_FLOAT32_C( 424.00), SIMDE_FLOAT32_C( -325.00), SIMDE_FLOAT32_C( -7.00), SIMDE_FLOAT32_C( 550.00), SIMDE_FLOAT32_C( 89.00), SIMDE_FLOAT32_C( 443.00), SIMDE_FLOAT32_C( 104.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 295.26), SIMDE_FLOAT32_C( 174.52), SIMDE_FLOAT32_C( 976.35), SIMDE_FLOAT32_C( -556.97), SIMDE_FLOAT32_C( -188.36), SIMDE_FLOAT32_C( -888.83), SIMDE_FLOAT32_C( -89.34), SIMDE_FLOAT32_C( 743.04)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 296.00), SIMDE_FLOAT32_C( 175.00), SIMDE_FLOAT32_C( 977.00), SIMDE_FLOAT32_C( -556.00), SIMDE_FLOAT32_C( -188.00), SIMDE_FLOAT32_C( -888.00), SIMDE_FLOAT32_C( -89.00), SIMDE_FLOAT32_C( 744.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -846.44), SIMDE_FLOAT32_C( 768.02), SIMDE_FLOAT32_C( 217.87), SIMDE_FLOAT32_C( 724.14), SIMDE_FLOAT32_C( -751.28), SIMDE_FLOAT32_C( 377.99), SIMDE_FLOAT32_C( -892.77), SIMDE_FLOAT32_C( -779.41)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -846.00), SIMDE_FLOAT32_C( 769.00), SIMDE_FLOAT32_C( 218.00), SIMDE_FLOAT32_C( 725.00), SIMDE_FLOAT32_C( -751.00), SIMDE_FLOAT32_C( 378.00), SIMDE_FLOAT32_C( -892.00), SIMDE_FLOAT32_C( -779.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -291.21), SIMDE_FLOAT32_C( -433.62), SIMDE_FLOAT32_C( 331.96), SIMDE_FLOAT32_C( 13.15), SIMDE_FLOAT32_C( -6.69), SIMDE_FLOAT32_C( -467.28), SIMDE_FLOAT32_C( -722.45), SIMDE_FLOAT32_C( -121.36)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -291.00), SIMDE_FLOAT32_C( -433.00), SIMDE_FLOAT32_C( 332.00), SIMDE_FLOAT32_C( 14.00), SIMDE_FLOAT32_C( -6.00), SIMDE_FLOAT32_C( -467.00), SIMDE_FLOAT32_C( -722.00), SIMDE_FLOAT32_C( -121.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 337.66), SIMDE_FLOAT32_C( 332.63), SIMDE_FLOAT32_C( 164.76), SIMDE_FLOAT32_C( 401.70), SIMDE_FLOAT32_C( -359.22), SIMDE_FLOAT32_C( -704.77), SIMDE_FLOAT32_C( 780.49), SIMDE_FLOAT32_C( -605.11)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 338.00), SIMDE_FLOAT32_C( 333.00), SIMDE_FLOAT32_C( 165.00), SIMDE_FLOAT32_C( 402.00), SIMDE_FLOAT32_C( -359.00), SIMDE_FLOAT32_C( -704.00), SIMDE_FLOAT32_C( 781.00), SIMDE_FLOAT32_C( -605.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -670.88), SIMDE_FLOAT32_C( 812.79), SIMDE_FLOAT32_C( -668.93), SIMDE_FLOAT32_C( 476.98), SIMDE_FLOAT32_C( 590.12), SIMDE_FLOAT32_C( 1.22), SIMDE_FLOAT32_C( -683.68), SIMDE_FLOAT32_C( -789.77)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -670.00), SIMDE_FLOAT32_C( 813.00), SIMDE_FLOAT32_C( -668.00), SIMDE_FLOAT32_C( 477.00), SIMDE_FLOAT32_C( 591.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( -683.00), SIMDE_FLOAT32_C( -789.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -872.42), SIMDE_FLOAT32_C( -77.05), SIMDE_FLOAT32_C( -381.51), SIMDE_FLOAT32_C( -862.58), SIMDE_FLOAT32_C( -846.15), SIMDE_FLOAT32_C( -734.49), SIMDE_FLOAT32_C( -50.68), SIMDE_FLOAT32_C( 512.52)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -872.00), SIMDE_FLOAT32_C( -77.00), SIMDE_FLOAT32_C( -381.00), SIMDE_FLOAT32_C( -862.00), SIMDE_FLOAT32_C( -846.00), SIMDE_FLOAT32_C( -734.00), SIMDE_FLOAT32_C( -50.00), SIMDE_FLOAT32_C( 513.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -195.22), SIMDE_FLOAT32_C( -18.78), SIMDE_FLOAT32_C( 479.49), SIMDE_FLOAT32_C( 552.41), SIMDE_FLOAT32_C( 445.93), SIMDE_FLOAT32_C( -70.46), SIMDE_FLOAT32_C( -477.54), SIMDE_FLOAT32_C( 557.19)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -195.00), SIMDE_FLOAT32_C( -18.00), SIMDE_FLOAT32_C( 480.00), SIMDE_FLOAT32_C( 553.00), SIMDE_FLOAT32_C( 446.00), SIMDE_FLOAT32_C( -70.00), SIMDE_FLOAT32_C( -477.00), SIMDE_FLOAT32_C( 558.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_ceil_ps(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_cmp_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[] = { /* 0 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( -529.39), SIMDE_FLOAT64_C( 120.21)), simde_mm_set_pd(SIMDE_FLOAT64_C( -464.46), SIMDE_FLOAT64_C( 787.53)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 952.78), SIMDE_FLOAT64_C( -168.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( -565.68), SIMDE_FLOAT64_C( -927.74)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 154.75), SIMDE_FLOAT64_C( 421.91)), simde_mm_set_pd(SIMDE_FLOAT64_C( 154.75), SIMDE_FLOAT64_C( 421.91)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 824.22), SIMDE_FLOAT64_C( 893.34)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( -441.87), SIMDE_FLOAT64_C( 716.23)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, /* 1 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( 696.22), SIMDE_FLOAT64_C( -762.68)), simde_mm_set_pd(SIMDE_FLOAT64_C( 919.14), SIMDE_FLOAT64_C( 193.81)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 633.35), SIMDE_FLOAT64_C( 108.68)), simde_mm_set_pd(SIMDE_FLOAT64_C( -781.31), SIMDE_FLOAT64_C( -480.81)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -959.59), SIMDE_FLOAT64_C( 537.83)), simde_mm_set_pd(SIMDE_FLOAT64_C( -959.59), SIMDE_FLOAT64_C( 537.83)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -264.04), SIMDE_FLOAT64_C( 375.43)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( -794.42), SIMDE_FLOAT64_C( 595.55)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, /* 2 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( 867.95), SIMDE_FLOAT64_C( 922.57)), simde_mm_set_pd(SIMDE_FLOAT64_C( 898.26), SIMDE_FLOAT64_C( 935.85)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 202.85), SIMDE_FLOAT64_C( -31.08)), simde_mm_set_pd(SIMDE_FLOAT64_C( -774.78), SIMDE_FLOAT64_C( -447.16)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -73.41), SIMDE_FLOAT64_C( -449.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( -73.41), SIMDE_FLOAT64_C( -449.20)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 566.15), SIMDE_FLOAT64_C( 58.04)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 933.00), SIMDE_FLOAT64_C( 506.59)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, /* 3 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( -556.30), SIMDE_FLOAT64_C( -294.30)), simde_mm_set_pd(SIMDE_FLOAT64_C( 925.83), SIMDE_FLOAT64_C( 574.06)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 845.95), SIMDE_FLOAT64_C( -455.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( 819.95), SIMDE_FLOAT64_C( -999.32)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 836.45), SIMDE_FLOAT64_C( 52.64)), simde_mm_set_pd(SIMDE_FLOAT64_C( 836.45), SIMDE_FLOAT64_C( 52.64)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 885.07), SIMDE_FLOAT64_C( 301.93)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_x_mm_setone_pd() }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 439.24), SIMDE_FLOAT64_C( -579.89)), simde_x_mm_setone_pd() }, /* 4 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( -870.88), SIMDE_FLOAT64_C( 245.45)), simde_mm_set_pd(SIMDE_FLOAT64_C( -24.26), SIMDE_FLOAT64_C( 522.18)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 197.35), SIMDE_FLOAT64_C( 431.63)), simde_mm_set_pd(SIMDE_FLOAT64_C( -239.79), SIMDE_FLOAT64_C( -923.26)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -817.59), SIMDE_FLOAT64_C( 878.84)), simde_mm_set_pd(SIMDE_FLOAT64_C( -817.59), SIMDE_FLOAT64_C( 878.84)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 732.05), SIMDE_FLOAT64_C( 358.02)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_x_mm_setone_pd() }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 376.41), SIMDE_FLOAT64_C( -150.39)), simde_x_mm_setone_pd() }, /* 5 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( -752.97), SIMDE_FLOAT64_C( -94.36)), simde_mm_set_pd(SIMDE_FLOAT64_C( 129.56), SIMDE_FLOAT64_C( 291.14)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -694.60), SIMDE_FLOAT64_C( 283.15)), simde_mm_set_pd(SIMDE_FLOAT64_C( -744.59), SIMDE_FLOAT64_C( -281.74)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -245.17), SIMDE_FLOAT64_C( 713.29)), simde_mm_set_pd(SIMDE_FLOAT64_C( -245.17), SIMDE_FLOAT64_C( 713.29)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 800.54), SIMDE_FLOAT64_C( 336.83)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_x_mm_setone_pd() }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( -984.20), SIMDE_FLOAT64_C( 916.79)), simde_x_mm_setone_pd() }, /* 6 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( 418.56), SIMDE_FLOAT64_C( 721.42)), simde_mm_set_pd(SIMDE_FLOAT64_C( 588.80), SIMDE_FLOAT64_C( 954.29)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -876.64), SIMDE_FLOAT64_C( 774.45)), simde_mm_set_pd(SIMDE_FLOAT64_C( -978.38), SIMDE_FLOAT64_C( -374.92)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 421.63), SIMDE_FLOAT64_C( 686.94)), simde_mm_set_pd(SIMDE_FLOAT64_C( 421.63), SIMDE_FLOAT64_C( 686.94)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 562.25), SIMDE_FLOAT64_C( 712.76)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_x_mm_setone_pd() }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( -34.73), SIMDE_FLOAT64_C( 32.85)), simde_x_mm_setone_pd() }, /* 7 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( -970.08), SIMDE_FLOAT64_C( 342.40)), simde_mm_set_pd(SIMDE_FLOAT64_C( -564.35), SIMDE_FLOAT64_C( 715.16)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 74.04), SIMDE_FLOAT64_C( 782.97)), simde_mm_set_pd(SIMDE_FLOAT64_C( -252.40), SIMDE_FLOAT64_C( -653.45)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -820.35), SIMDE_FLOAT64_C( 846.68)), simde_mm_set_pd(SIMDE_FLOAT64_C( -820.35), SIMDE_FLOAT64_C( 846.68)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 262.97), SIMDE_FLOAT64_C( -220.11)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 652.85), SIMDE_FLOAT64_C( 169.21)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, /* 8 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( 61.55), SIMDE_FLOAT64_C( -860.01)), simde_mm_set_pd(SIMDE_FLOAT64_C( 131.29), SIMDE_FLOAT64_C( -493.79)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 793.95), SIMDE_FLOAT64_C( 172.98)), simde_mm_set_pd(SIMDE_FLOAT64_C( -232.93), SIMDE_FLOAT64_C( -38.95)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 900.57), SIMDE_FLOAT64_C( 468.19)), simde_mm_set_pd(SIMDE_FLOAT64_C( 900.57), SIMDE_FLOAT64_C( 468.19)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -955.72), SIMDE_FLOAT64_C( -966.92)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_x_mm_setone_pd() }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 955.81), SIMDE_FLOAT64_C( 521.19)), simde_x_mm_setone_pd() }, /* 9 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( 295.38), SIMDE_FLOAT64_C( -590.44)), simde_mm_set_pd(SIMDE_FLOAT64_C( 311.26), SIMDE_FLOAT64_C( 337.73)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -88.55), SIMDE_FLOAT64_C( -176.19)), simde_mm_set_pd(SIMDE_FLOAT64_C( -876.40), SIMDE_FLOAT64_C( -707.99)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 723.57), SIMDE_FLOAT64_C( 487.84)), simde_mm_set_pd(SIMDE_FLOAT64_C( 723.57), SIMDE_FLOAT64_C( 487.84)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -138.67), SIMDE_FLOAT64_C( -722.72)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_x_mm_setone_pd() }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 469.74), SIMDE_FLOAT64_C( -238.12)), simde_x_mm_setone_pd() }, /* 10 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( -964.03), SIMDE_FLOAT64_C( -655.88)), simde_mm_set_pd(SIMDE_FLOAT64_C( 882.48), SIMDE_FLOAT64_C( -632.34)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -490.96), SIMDE_FLOAT64_C( -663.74)), simde_mm_set_pd(SIMDE_FLOAT64_C( -923.12), SIMDE_FLOAT64_C( -723.09)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -820.28), SIMDE_FLOAT64_C( 245.58)), simde_mm_set_pd(SIMDE_FLOAT64_C( -820.28), SIMDE_FLOAT64_C( 245.58)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -45.30), SIMDE_FLOAT64_C( -758.31)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_x_mm_setone_pd() }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( -336.62), SIMDE_FLOAT64_C( 364.29)), simde_x_mm_setone_pd() }, /* 11 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( -362.47), SIMDE_FLOAT64_C( -433.79)), simde_mm_set_pd(SIMDE_FLOAT64_C( 48.67), SIMDE_FLOAT64_C( 358.03)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -861.52), SIMDE_FLOAT64_C( 340.11)), simde_mm_set_pd(SIMDE_FLOAT64_C( -862.78), SIMDE_FLOAT64_C( 207.12)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -51.45), SIMDE_FLOAT64_C( -796.45)), simde_mm_set_pd(SIMDE_FLOAT64_C( -51.45), SIMDE_FLOAT64_C( -796.45)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 402.78), SIMDE_FLOAT64_C( -860.46)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 908.56), SIMDE_FLOAT64_C( -80.23)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, /* 12 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( -144.07), SIMDE_FLOAT64_C( 314.22)), simde_mm_set_pd(SIMDE_FLOAT64_C( -99.92), SIMDE_FLOAT64_C( 650.58)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -107.66), SIMDE_FLOAT64_C( 861.03)), simde_mm_set_pd(SIMDE_FLOAT64_C( -934.47), SIMDE_FLOAT64_C( -412.02)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -526.73), SIMDE_FLOAT64_C( 343.68)), simde_mm_set_pd(SIMDE_FLOAT64_C( -526.73), SIMDE_FLOAT64_C( 343.68)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -938.85), SIMDE_FLOAT64_C( 543.93)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 129.05), SIMDE_FLOAT64_C( -765.37)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, /* 13 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( 61.28), SIMDE_FLOAT64_C( -648.14)), simde_mm_set_pd(SIMDE_FLOAT64_C( 99.59), SIMDE_FLOAT64_C( -325.86)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 939.35), SIMDE_FLOAT64_C( -697.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( 734.09), SIMDE_FLOAT64_C( -772.91)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 218.18), SIMDE_FLOAT64_C( 171.79)), simde_mm_set_pd(SIMDE_FLOAT64_C( 218.18), SIMDE_FLOAT64_C( 171.79)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 886.78), SIMDE_FLOAT64_C( 528.38)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 952.68), SIMDE_FLOAT64_C( 182.51)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, /* 14 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( -948.92), SIMDE_FLOAT64_C( 488.55)), simde_mm_set_pd(SIMDE_FLOAT64_C( 567.68), SIMDE_FLOAT64_C( 834.87)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -912.43), SIMDE_FLOAT64_C( -847.25)), simde_mm_set_pd(SIMDE_FLOAT64_C( -926.68), SIMDE_FLOAT64_C( -927.41)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -280.98), SIMDE_FLOAT64_C( -321.44)), simde_mm_set_pd(SIMDE_FLOAT64_C( -280.98), SIMDE_FLOAT64_C( -321.44)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 508.36), SIMDE_FLOAT64_C( -377.93)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 632.00), SIMDE_FLOAT64_C( 236.33)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, /* 15 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( -534.25), SIMDE_FLOAT64_C( 462.64)), simde_mm_set_pd(SIMDE_FLOAT64_C( -400.74), SIMDE_FLOAT64_C( 786.60)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 22.21), SIMDE_FLOAT64_C( 3.85)), simde_mm_set_pd(SIMDE_FLOAT64_C( -806.06), SIMDE_FLOAT64_C( -774.50)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -135.43), SIMDE_FLOAT64_C( 5.39)), simde_mm_set_pd(SIMDE_FLOAT64_C( -135.43), SIMDE_FLOAT64_C( 5.39)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -328.86), SIMDE_FLOAT64_C( -1.43)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_x_mm_setone_pd() }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( -578.68), SIMDE_FLOAT64_C( -504.59)), simde_x_mm_setone_pd() }, /* 16 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( -183.81), SIMDE_FLOAT64_C( 940.71)), simde_mm_set_pd(SIMDE_FLOAT64_C( 116.32), SIMDE_FLOAT64_C( 968.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 162.80), SIMDE_FLOAT64_C( 230.56)), simde_mm_set_pd(SIMDE_FLOAT64_C( 42.45), SIMDE_FLOAT64_C( -802.62)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 372.34), SIMDE_FLOAT64_C( 537.19)), simde_mm_set_pd(SIMDE_FLOAT64_C( 372.34), SIMDE_FLOAT64_C( 537.19)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -170.70), SIMDE_FLOAT64_C( 78.35)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( -787.71), SIMDE_FLOAT64_C( -624.37)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, /* 17 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( 571.06), SIMDE_FLOAT64_C( -858.35)), simde_mm_set_pd(SIMDE_FLOAT64_C( 935.34), SIMDE_FLOAT64_C( 933.83)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 66.28), SIMDE_FLOAT64_C( 64.16)), simde_mm_set_pd(SIMDE_FLOAT64_C( -291.80), SIMDE_FLOAT64_C( -28.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -336.17), SIMDE_FLOAT64_C( -225.92)), simde_mm_set_pd(SIMDE_FLOAT64_C( -336.17), SIMDE_FLOAT64_C( -225.92)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -780.19), SIMDE_FLOAT64_C( 709.48)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( -626.75), SIMDE_FLOAT64_C( -830.11)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, /* 18 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( -366.69), SIMDE_FLOAT64_C( 668.86)), simde_mm_set_pd(SIMDE_FLOAT64_C( 797.88), SIMDE_FLOAT64_C( 703.26)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 789.39), SIMDE_FLOAT64_C( 864.32)), simde_mm_set_pd(SIMDE_FLOAT64_C( -300.63), SIMDE_FLOAT64_C( -736.94)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 83.76), SIMDE_FLOAT64_C( -578.41)), simde_mm_set_pd(SIMDE_FLOAT64_C( 83.76), SIMDE_FLOAT64_C( -578.41)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 92.61), SIMDE_FLOAT64_C( 780.89)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 133.54), SIMDE_FLOAT64_C( 148.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, /* 19 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( -871.45), SIMDE_FLOAT64_C( -75.05)), simde_mm_set_pd(SIMDE_FLOAT64_C( 790.19), SIMDE_FLOAT64_C( 387.76)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -715.95), SIMDE_FLOAT64_C( -242.69)), simde_mm_set_pd(SIMDE_FLOAT64_C( -870.55), SIMDE_FLOAT64_C( -419.34)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -163.63), SIMDE_FLOAT64_C( 323.84)), simde_mm_set_pd(SIMDE_FLOAT64_C( -163.63), SIMDE_FLOAT64_C( 323.84)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -855.58), SIMDE_FLOAT64_C( 821.79)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_x_mm_setone_pd() }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( -759.48), SIMDE_FLOAT64_C( -410.97)), simde_x_mm_setone_pd() }, /* 20 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( 608.05), SIMDE_FLOAT64_C( 305.86)), simde_mm_set_pd(SIMDE_FLOAT64_C( 786.81), SIMDE_FLOAT64_C( 485.46)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -332.74), SIMDE_FLOAT64_C( -240.33)), simde_mm_set_pd(SIMDE_FLOAT64_C( -490.37), SIMDE_FLOAT64_C( -482.95)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 959.57), SIMDE_FLOAT64_C( -245.61)), simde_mm_set_pd(SIMDE_FLOAT64_C( 959.57), SIMDE_FLOAT64_C( -245.61)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 471.52), SIMDE_FLOAT64_C( -254.36)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_x_mm_setone_pd() }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 805.34), SIMDE_FLOAT64_C( 47.10)), simde_x_mm_setone_pd() }, /* 21 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( -883.05), SIMDE_FLOAT64_C( -685.82)), simde_mm_set_pd(SIMDE_FLOAT64_C( 437.76), SIMDE_FLOAT64_C( -261.16)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 25.17), SIMDE_FLOAT64_C( -978.34)), simde_mm_set_pd(SIMDE_FLOAT64_C( -316.84), SIMDE_FLOAT64_C( -988.59)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -278.35), SIMDE_FLOAT64_C( 800.33)), simde_mm_set_pd(SIMDE_FLOAT64_C( -278.35), SIMDE_FLOAT64_C( 800.33)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 504.01), SIMDE_FLOAT64_C( -993.03)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_x_mm_setone_pd() }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( -482.47), SIMDE_FLOAT64_C( -682.33)), simde_x_mm_setone_pd() }, /* 22 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( -947.05), SIMDE_FLOAT64_C( 497.51)), simde_mm_set_pd(SIMDE_FLOAT64_C( -479.84), SIMDE_FLOAT64_C( 771.58)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -957.22), SIMDE_FLOAT64_C( -498.17)), simde_mm_set_pd(SIMDE_FLOAT64_C( -996.36), SIMDE_FLOAT64_C( -893.25)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -196.35), SIMDE_FLOAT64_C( 70.52)), simde_mm_set_pd(SIMDE_FLOAT64_C( -196.35), SIMDE_FLOAT64_C( 70.52)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -895.00), SIMDE_FLOAT64_C( -516.24)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_x_mm_setone_pd() }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( -605.59), SIMDE_FLOAT64_C( 95.56)), simde_x_mm_setone_pd() }, /* 23 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( -277.10), SIMDE_FLOAT64_C( 593.77)), simde_mm_set_pd(SIMDE_FLOAT64_C( -225.52), SIMDE_FLOAT64_C( 955.22)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 210.46), SIMDE_FLOAT64_C( -668.10)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.93), SIMDE_FLOAT64_C( -776.93)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 268.81), SIMDE_FLOAT64_C( 42.66)), simde_mm_set_pd(SIMDE_FLOAT64_C( 268.81), SIMDE_FLOAT64_C( 42.66)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -67.56), SIMDE_FLOAT64_C( -371.19)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( -744.56), SIMDE_FLOAT64_C( -864.69)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, /* 24 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( 175.90), SIMDE_FLOAT64_C( 695.21)), simde_mm_set_pd(SIMDE_FLOAT64_C( 929.95), SIMDE_FLOAT64_C( 903.96)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -218.17), SIMDE_FLOAT64_C( -840.49)), simde_mm_set_pd(SIMDE_FLOAT64_C( -226.66), SIMDE_FLOAT64_C( -928.05)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 395.44), SIMDE_FLOAT64_C( 139.62)), simde_mm_set_pd(SIMDE_FLOAT64_C( 395.44), SIMDE_FLOAT64_C( 139.62)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -61.05), SIMDE_FLOAT64_C( 478.18)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_x_mm_setone_pd() }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( -577.95), SIMDE_FLOAT64_C( -574.53)), simde_x_mm_setone_pd() }, /* 25 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( 416.40), SIMDE_FLOAT64_C( -526.24)), simde_mm_set_pd(SIMDE_FLOAT64_C( 660.03), SIMDE_FLOAT64_C( -28.91)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -774.73), SIMDE_FLOAT64_C( 566.48)), simde_mm_set_pd(SIMDE_FLOAT64_C( -917.37), SIMDE_FLOAT64_C( -880.02)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -0.48), SIMDE_FLOAT64_C( -440.72)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.48), SIMDE_FLOAT64_C( -440.72)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -568.05), SIMDE_FLOAT64_C( -288.94)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_x_mm_setone_pd() }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 864.36), SIMDE_FLOAT64_C( 525.81)), simde_x_mm_setone_pd() }, /* 26 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( -542.28), SIMDE_FLOAT64_C( 892.19)), simde_mm_set_pd(SIMDE_FLOAT64_C( -306.92), SIMDE_FLOAT64_C( 949.05)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 412.57), SIMDE_FLOAT64_C( 620.17)), simde_mm_set_pd(SIMDE_FLOAT64_C( 59.07), SIMDE_FLOAT64_C( 108.31)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -427.72), SIMDE_FLOAT64_C( 114.96)), simde_mm_set_pd(SIMDE_FLOAT64_C( -427.72), SIMDE_FLOAT64_C( 114.96)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 946.11), SIMDE_FLOAT64_C( 48.04)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_x_mm_setone_pd() }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 318.10), SIMDE_FLOAT64_C( -874.65)), simde_x_mm_setone_pd() }, /* 27 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( 143.93), SIMDE_FLOAT64_C( 507.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( 234.55), SIMDE_FLOAT64_C( 992.31)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 929.86), SIMDE_FLOAT64_C( 964.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( -205.11), SIMDE_FLOAT64_C( 847.14)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -693.95), SIMDE_FLOAT64_C( 407.42)), simde_mm_set_pd(SIMDE_FLOAT64_C( -693.95), SIMDE_FLOAT64_C( 407.42)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -669.92), SIMDE_FLOAT64_C( -839.60)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 788.01), SIMDE_FLOAT64_C( -470.01)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, /* 28 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( 979.33), SIMDE_FLOAT64_C( -879.66)), simde_mm_set_pd(SIMDE_FLOAT64_C( 997.49), SIMDE_FLOAT64_C( 973.15)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -307.60), SIMDE_FLOAT64_C( 589.50)), simde_mm_set_pd(SIMDE_FLOAT64_C( -312.27), SIMDE_FLOAT64_C( -323.93)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -607.36), SIMDE_FLOAT64_C( 554.31)), simde_mm_set_pd(SIMDE_FLOAT64_C( -607.36), SIMDE_FLOAT64_C( 554.31)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 121.31), SIMDE_FLOAT64_C( -306.77)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( -196.08), SIMDE_FLOAT64_C( -442.55)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, /* 29 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( 205.23), SIMDE_FLOAT64_C( 463.92)), simde_mm_set_pd(SIMDE_FLOAT64_C( 891.83), SIMDE_FLOAT64_C( 478.06)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 83.52), SIMDE_FLOAT64_C( -712.98)), simde_mm_set_pd(SIMDE_FLOAT64_C( -494.14), SIMDE_FLOAT64_C( -713.70)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -225.77), SIMDE_FLOAT64_C( -14.65)), simde_mm_set_pd(SIMDE_FLOAT64_C( -225.77), SIMDE_FLOAT64_C( -14.65)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 277.42), SIMDE_FLOAT64_C( -194.72)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 401.09), SIMDE_FLOAT64_C( -152.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, /* 30 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( -392.42), SIMDE_FLOAT64_C( 780.51)), simde_mm_set_pd(SIMDE_FLOAT64_C( -149.10), SIMDE_FLOAT64_C( 880.51)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -834.34), SIMDE_FLOAT64_C( 188.96)), simde_mm_set_pd(SIMDE_FLOAT64_C( -969.00), SIMDE_FLOAT64_C( -426.74)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 862.72), SIMDE_FLOAT64_C( 212.40)), simde_mm_set_pd(SIMDE_FLOAT64_C( 862.72), SIMDE_FLOAT64_C( 212.40)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 70.37), SIMDE_FLOAT64_C( 306.92)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 111.27), SIMDE_FLOAT64_C( 341.53)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, /* 31 */ { simde_mm_set_pd(SIMDE_FLOAT64_C( -438.67), SIMDE_FLOAT64_C( 106.33)), simde_mm_set_pd(SIMDE_FLOAT64_C( 49.90), SIMDE_FLOAT64_C( 670.54)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -363.04), SIMDE_FLOAT64_C( 742.23)), simde_mm_set_pd(SIMDE_FLOAT64_C( -538.59), SIMDE_FLOAT64_C( 410.25)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 183.16), SIMDE_FLOAT64_C( -831.08)), simde_mm_set_pd(SIMDE_FLOAT64_C( 183.16), SIMDE_FLOAT64_C( -831.08)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 429.68), SIMDE_FLOAT64_C( -7.96)), simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_x_mm_setone_pd() }, { simde_mm_set_pd( SIMDE_MATH_NAN , SIMDE_MATH_NAN ), simde_mm_set_pd(SIMDE_FLOAT64_C( 700.81), SIMDE_FLOAT64_C( -641.71)), simde_x_mm_setone_pd() }, }; for (size_t i = 0 ; i < 1 ; i++) { simde__m128d r = simde_mm_cmp_pd(test_vec[(0 * 5) + i].a, test_vec[(0 * 5) + i].b, 0); simde_assert_m128d_equal(r, test_vec[0 * 5].r); r = simde_mm_cmp_pd(test_vec[(1 * 5) + i].a, test_vec[(1 * 5) + i].b, 1); simde_assert_m128d_equal(r, test_vec[1 * 5].r); r = simde_mm_cmp_pd(test_vec[(2 * 5) + i].a, test_vec[(2 * 5) + i].b, 2); simde_assert_m128d_equal(r, test_vec[2 * 5].r); r = simde_mm_cmp_pd(test_vec[(3 * 5) + i].a, test_vec[(3 * 5) + i].b, 3); simde_assert_m128d_equal(r, test_vec[3 * 5].r); r = simde_mm_cmp_pd(test_vec[(4 * 5) + i].a, test_vec[(4 * 5) + i].b, 4); simde_assert_m128d_equal(r, test_vec[4 * 5].r); r = simde_mm_cmp_pd(test_vec[(5 * 5) + i].a, test_vec[(5 * 5) + i].b, 5); simde_assert_m128d_equal(r, test_vec[5 * 5].r); r = simde_mm_cmp_pd(test_vec[(6 * 5) + i].a, test_vec[(6 * 5) + i].b, 6); simde_assert_m128d_equal(r, test_vec[6 * 5].r); r = simde_mm_cmp_pd(test_vec[(7 * 5) + i].a, test_vec[(7 * 5) + i].b, 7); simde_assert_m128d_equal(r, test_vec[7 * 5].r); r = simde_mm_cmp_pd(test_vec[(8 * 5) + i].a, test_vec[(8 * 5) + i].b, 8); simde_assert_m128d_equal(r, test_vec[8 * 5].r); r = simde_mm_cmp_pd(test_vec[(9 * 5) + i].a, test_vec[(9 * 5) + i].b, 9); simde_assert_m128d_equal(r, test_vec[9 * 5].r); r = simde_mm_cmp_pd(test_vec[(10 * 5) + i].a, test_vec[(10 * 5) + i].b, 10); simde_assert_m128d_equal(r, test_vec[10 * 5].r); r = simde_mm_cmp_pd(test_vec[(11 * 5) + i].a, test_vec[(11 * 5) + i].b, 11); simde_assert_m128d_equal(r, test_vec[11 * 5].r); r = simde_mm_cmp_pd(test_vec[(12 * 5) + i].a, test_vec[(12 * 5) + i].b, 12); simde_assert_m128d_equal(r, test_vec[12 * 5].r); r = simde_mm_cmp_pd(test_vec[(13 * 5) + i].a, test_vec[(13 * 5) + i].b, 13); simde_assert_m128d_equal(r, test_vec[13 * 5].r); r = simde_mm_cmp_pd(test_vec[(14 * 5) + i].a, test_vec[(14 * 5) + i].b, 14); simde_assert_m128d_equal(r, test_vec[14 * 5].r); r = simde_mm_cmp_pd(test_vec[(15 * 5) + i].a, test_vec[(15 * 5) + i].b, 15); simde_assert_m128d_equal(r, test_vec[15 * 5].r); r = simde_mm_cmp_pd(test_vec[(16 * 5) + i].a, test_vec[(16 * 5) + i].b, 16); simde_assert_m128d_equal(r, test_vec[16 * 5].r); r = simde_mm_cmp_pd(test_vec[(17 * 5) + i].a, test_vec[(17 * 5) + i].b, 17); simde_assert_m128d_equal(r, test_vec[17 * 5].r); r = simde_mm_cmp_pd(test_vec[(18 * 5) + i].a, test_vec[(18 * 5) + i].b, 18); simde_assert_m128d_equal(r, test_vec[18 * 5].r); r = simde_mm_cmp_pd(test_vec[(19 * 5) + i].a, test_vec[(19 * 5) + i].b, 19); simde_assert_m128d_equal(r, test_vec[19 * 5].r); r = simde_mm_cmp_pd(test_vec[(20 * 5) + i].a, test_vec[(20 * 5) + i].b, 20); simde_assert_m128d_equal(r, test_vec[20 * 5].r); r = simde_mm_cmp_pd(test_vec[(21 * 5) + i].a, test_vec[(21 * 5) + i].b, 21); simde_assert_m128d_equal(r, test_vec[21 * 5].r); r = simde_mm_cmp_pd(test_vec[(22 * 5) + i].a, test_vec[(22 * 5) + i].b, 22); simde_assert_m128d_equal(r, test_vec[22 * 5].r); r = simde_mm_cmp_pd(test_vec[(23 * 5) + i].a, test_vec[(23 * 5) + i].b, 23); simde_assert_m128d_equal(r, test_vec[23 * 5].r); r = simde_mm_cmp_pd(test_vec[(24 * 5) + i].a, test_vec[(24 * 5) + i].b, SIMDE_CMP_EQ_US); simde_assert_m128d_equal(r, test_vec[24 * 5].r); r = simde_mm_cmp_pd(test_vec[(25 * 5) + i].a, test_vec[(25 * 5) + i].b, 25); simde_assert_m128d_equal(r, test_vec[25 * 5].r); r = simde_mm_cmp_pd(test_vec[(26 * 5) + i].a, test_vec[(26 * 5) + i].b, 26); simde_assert_m128d_equal(r, test_vec[26 * 5].r); r = simde_mm_cmp_pd(test_vec[(27 * 5) + i].a, test_vec[(27 * 5) + i].b, 27); simde_assert_m128d_equal(r, test_vec[27 * 5].r); r = simde_mm_cmp_pd(test_vec[(28 * 5) + i].a, test_vec[(28 * 5) + i].b, 28); simde_assert_m128d_equal(r, test_vec[28 * 5].r); r = simde_mm_cmp_pd(test_vec[(29 * 5) + i].a, test_vec[(29 * 5) + i].b, 29); simde_assert_m128d_equal(r, test_vec[29 * 5].r); r = simde_mm_cmp_pd(test_vec[(30 * 5) + i].a, test_vec[(30 * 5) + i].b, 30); simde_assert_m128d_equal(r, test_vec[30 * 5].r); r = simde_mm_cmp_pd(test_vec[(31 * 5) + i].a, test_vec[(31 * 5) + i].b, 31); simde_assert_m128d_equal(r, test_vec[31 * 5].r); } return 0; } static int test_simde_mm_cmp_ps(SIMDE_MUNIT_TEST_ARGS) { simde__m128 a, b, r, e; a = simde_mm_set_ps(SIMDE_FLOAT32_C( 78.15), SIMDE_FLOAT32_C( 891.26), SIMDE_FLOAT32_C( -343.00), SIMDE_FLOAT32_C( 84.56)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 698.49), SIMDE_FLOAT32_C( -963.46), SIMDE_FLOAT32_C( -116.01), SIMDE_FLOAT32_C( -590.30)); e = simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_EQ_OQ); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( -265.69), SIMDE_FLOAT32_C( 933.07), SIMDE_FLOAT32_C( -527.28), SIMDE_FLOAT32_C( -86.99)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( -890.35), SIMDE_FLOAT32_C( -111.28), SIMDE_FLOAT32_C( 338.07), SIMDE_FLOAT32_C( -617.73)); e = simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00)); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_LT_OS); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 768.00), SIMDE_FLOAT32_C( 58.55), SIMDE_FLOAT32_C( 317.58), SIMDE_FLOAT32_C( -318.11)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 357.58), SIMDE_FLOAT32_C( 682.66), SIMDE_FLOAT32_C( 366.57), SIMDE_FLOAT32_C( -244.60)); e = simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_LE_OS); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( -194.29), SIMDE_FLOAT32_C( 977.31), SIMDE_FLOAT32_C( -40.23), SIMDE_FLOAT32_C( 277.84)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 740.22), SIMDE_FLOAT32_C( 146.59), SIMDE_FLOAT32_C( 780.20), SIMDE_FLOAT32_C( 471.27)); e = simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_UNORD_Q); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( -120.65), SIMDE_FLOAT32_C( -397.11), SIMDE_FLOAT32_C( 121.30), SIMDE_FLOAT32_C( -296.83)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 705.83), SIMDE_FLOAT32_C( 29.37), SIMDE_FLOAT32_C( 519.72), SIMDE_FLOAT32_C( -103.90)); e = simde_mm_set_ps(SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_NEQ_UQ); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 303.98), SIMDE_FLOAT32_C( 122.35), SIMDE_FLOAT32_C( -371.51), SIMDE_FLOAT32_C( -830.97)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 595.80), SIMDE_FLOAT32_C( -291.27), SIMDE_FLOAT32_C( 984.58), SIMDE_FLOAT32_C( -789.32)); e = simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_NLT_US); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 390.87), SIMDE_FLOAT32_C( -500.80), SIMDE_FLOAT32_C( 239.69), SIMDE_FLOAT32_C( -651.02)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( -942.42), SIMDE_FLOAT32_C( -340.28), SIMDE_FLOAT32_C( 850.37), SIMDE_FLOAT32_C( 422.67)); e = simde_mm_set_ps(SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_NLE_US); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 241.90), SIMDE_FLOAT32_C( 133.36), SIMDE_FLOAT32_C( 343.70), SIMDE_FLOAT32_C( -917.50)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( -715.40), SIMDE_FLOAT32_C( -986.32), SIMDE_FLOAT32_C( 113.89), SIMDE_FLOAT32_C( 742.57)); e = simde_mm_set_ps(SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_ORD_Q); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 230.45), SIMDE_FLOAT32_C( -311.44), SIMDE_FLOAT32_C( 132.25), SIMDE_FLOAT32_C( 566.85)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( -731.72), SIMDE_FLOAT32_C( 889.65), SIMDE_FLOAT32_C( -586.14), SIMDE_FLOAT32_C( -778.06)); e = simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_EQ_UQ); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 702.60), SIMDE_FLOAT32_C( -355.63), SIMDE_FLOAT32_C( 969.32), SIMDE_FLOAT32_C( 907.68)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 987.82), SIMDE_FLOAT32_C( -0.12), SIMDE_FLOAT32_C( 651.63), SIMDE_FLOAT32_C( 179.23)); e = simde_mm_set_ps(SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_NGE_US); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( -252.24), SIMDE_FLOAT32_C( 835.98), SIMDE_FLOAT32_C( -89.83), SIMDE_FLOAT32_C( -473.83)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 473.44), SIMDE_FLOAT32_C( -13.26), SIMDE_FLOAT32_C( 522.39), SIMDE_FLOAT32_C( -29.61)); e = simde_mm_set_ps(SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_NGT_US); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( -250.48), SIMDE_FLOAT32_C( -560.42), SIMDE_FLOAT32_C( -229.52), SIMDE_FLOAT32_C( 960.81)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 541.13), SIMDE_FLOAT32_C( -966.78), SIMDE_FLOAT32_C( -436.47), SIMDE_FLOAT32_C( -463.72)); e = simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_FALSE_OQ); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 178.14), SIMDE_FLOAT32_C( -394.90), SIMDE_FLOAT32_C( -713.61), SIMDE_FLOAT32_C( -298.00)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( -303.13), SIMDE_FLOAT32_C( -717.53), SIMDE_FLOAT32_C( 358.73), SIMDE_FLOAT32_C( 268.02)); e = simde_mm_set_ps(SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_NEQ_OQ); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 241.76), SIMDE_FLOAT32_C( 236.50), SIMDE_FLOAT32_C( 831.59), SIMDE_FLOAT32_C( -144.41)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( -899.96), SIMDE_FLOAT32_C( -131.90), SIMDE_FLOAT32_C( -192.42), SIMDE_FLOAT32_C( 450.27)); e = simde_mm_set_ps(SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00)); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_GE_OS); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( -203.58), SIMDE_FLOAT32_C( -605.68), SIMDE_FLOAT32_C( 670.89), SIMDE_FLOAT32_C( -47.94)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 952.52), SIMDE_FLOAT32_C( 790.50), SIMDE_FLOAT32_C( -574.64), SIMDE_FLOAT32_C( 891.61)); e = simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00)); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_GT_OS); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 598.32), SIMDE_FLOAT32_C( -203.48), SIMDE_FLOAT32_C( 825.79), SIMDE_FLOAT32_C( 618.02)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 823.42), SIMDE_FLOAT32_C( 557.19), SIMDE_FLOAT32_C( -969.07), SIMDE_FLOAT32_C( 94.87)); e = simde_mm_set_ps(SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_TRUE_UQ); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( -576.38), SIMDE_FLOAT32_C( -929.43), SIMDE_FLOAT32_C( 533.57), SIMDE_FLOAT32_C( -129.85)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( -792.74), SIMDE_FLOAT32_C( -537.71), SIMDE_FLOAT32_C( -20.65), SIMDE_FLOAT32_C( 249.94)); e = simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_EQ_OS); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 545.05), SIMDE_FLOAT32_C( -368.18), SIMDE_FLOAT32_C( -221.86), SIMDE_FLOAT32_C( -21.72)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 521.14), SIMDE_FLOAT32_C( 669.26), SIMDE_FLOAT32_C( -763.27), SIMDE_FLOAT32_C( 590.72)); e = simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_LT_OQ); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 763.29), SIMDE_FLOAT32_C( 255.26), SIMDE_FLOAT32_C( 981.60), SIMDE_FLOAT32_C( 404.33)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 913.52), SIMDE_FLOAT32_C( 284.87), SIMDE_FLOAT32_C( 420.08), SIMDE_FLOAT32_C( -817.56)); e = simde_mm_set_ps(SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_LE_OQ); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 989.95), SIMDE_FLOAT32_C( 638.39), SIMDE_FLOAT32_C( -397.70), SIMDE_FLOAT32_C( -288.05)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 358.68), SIMDE_FLOAT32_C( -151.14), SIMDE_FLOAT32_C( 886.12), SIMDE_FLOAT32_C( -860.69)); e = simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_UNORD_S); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 995.87), SIMDE_FLOAT32_C( -552.98), SIMDE_FLOAT32_C( -836.61), SIMDE_FLOAT32_C( 490.77)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 436.74), SIMDE_FLOAT32_C( 515.09), SIMDE_FLOAT32_C( -164.26), SIMDE_FLOAT32_C( 49.24)); e = simde_mm_set_ps(SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_NEQ_US); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 827.88), SIMDE_FLOAT32_C( 337.02), SIMDE_FLOAT32_C( -301.47), SIMDE_FLOAT32_C( -546.27)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 388.16), SIMDE_FLOAT32_C( -352.07), SIMDE_FLOAT32_C( -527.92), SIMDE_FLOAT32_C( 674.15)); e = simde_mm_set_ps(SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00)); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_NLT_UQ); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 819.64), SIMDE_FLOAT32_C( 88.69), SIMDE_FLOAT32_C( -965.38), SIMDE_FLOAT32_C( 535.26)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( -769.63), SIMDE_FLOAT32_C( 452.84), SIMDE_FLOAT32_C( -471.48), SIMDE_FLOAT32_C( 287.79)); e = simde_mm_set_ps(SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_NLE_UQ); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 936.69), SIMDE_FLOAT32_C( -942.15), SIMDE_FLOAT32_C( -816.57), SIMDE_FLOAT32_C( -442.35)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( -647.50), SIMDE_FLOAT32_C( -658.30), SIMDE_FLOAT32_C( 279.90), SIMDE_FLOAT32_C( -609.51)); e = simde_mm_set_ps(SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_ORD_S); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 97.05), SIMDE_FLOAT32_C( 576.41), SIMDE_FLOAT32_C( -784.84), SIMDE_FLOAT32_C( 209.13)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 856.70), SIMDE_FLOAT32_C( -622.84), SIMDE_FLOAT32_C( -415.55), SIMDE_FLOAT32_C( 89.79)); e = simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_EQ_US); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 286.91), SIMDE_FLOAT32_C( -919.92), SIMDE_FLOAT32_C( -349.88), SIMDE_FLOAT32_C( -228.88)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( -295.09), SIMDE_FLOAT32_C( 514.22), SIMDE_FLOAT32_C( 562.09), SIMDE_FLOAT32_C( 891.91)); e = simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_NGE_UQ); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( -640.97), SIMDE_FLOAT32_C( -490.26), SIMDE_FLOAT32_C( -933.90), SIMDE_FLOAT32_C( -330.97)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 534.02), SIMDE_FLOAT32_C( -107.29), SIMDE_FLOAT32_C( -715.08), SIMDE_FLOAT32_C( 503.15)); e = simde_mm_set_ps(SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_NGT_UQ); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( -792.34), SIMDE_FLOAT32_C( 644.13), SIMDE_FLOAT32_C( -103.88), SIMDE_FLOAT32_C( 651.28)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 95.97), SIMDE_FLOAT32_C( 33.42), SIMDE_FLOAT32_C( -634.97), SIMDE_FLOAT32_C( 883.69)); e = simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_FALSE_OS); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 218.19), SIMDE_FLOAT32_C( -253.10), SIMDE_FLOAT32_C( 639.91), SIMDE_FLOAT32_C( 539.70)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 363.48), SIMDE_FLOAT32_C( -215.54), SIMDE_FLOAT32_C( -953.32), SIMDE_FLOAT32_C( 216.26)); e = simde_mm_set_ps(SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_NEQ_OS); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( -168.77), SIMDE_FLOAT32_C( -919.40), SIMDE_FLOAT32_C( 791.57), SIMDE_FLOAT32_C( -550.77)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 879.65), SIMDE_FLOAT32_C( 140.25), SIMDE_FLOAT32_C( -548.43), SIMDE_FLOAT32_C( -209.64)); e = simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00)); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_GE_OQ); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( -239.98), SIMDE_FLOAT32_C( -569.53), SIMDE_FLOAT32_C( 225.34), SIMDE_FLOAT32_C( -242.34)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 514.36), SIMDE_FLOAT32_C( 873.84), SIMDE_FLOAT32_C( 151.62), SIMDE_FLOAT32_C( 886.55)); e = simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00)); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_GT_OQ); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( -63.94), SIMDE_FLOAT32_C( 491.29), SIMDE_FLOAT32_C( -755.45), SIMDE_FLOAT32_C( 869.26)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 206.87), SIMDE_FLOAT32_C( -613.92), SIMDE_FLOAT32_C( 387.86), SIMDE_FLOAT32_C( -36.43)); e = simde_mm_set_ps(SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET); r = simde_mm_cmp_ps(a, b, SIMDE_CMP_TRUE_US); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); return 0; } static int test_simde_mm_cmp_sd(SIMDE_MUNIT_TEST_ARGS) { simde__m128d a, b, r, e; a = simde_mm_set_pd(SIMDE_FLOAT64_C( 107.30), SIMDE_FLOAT64_C( -206.83)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( 787.17), SIMDE_FLOAT64_C( -721.13)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( 107.30), SIMDE_FLOAT64_C( 0.00)); r = simde_mm_cmp_sd(a, b, 0); simde_assert_m128d_equal(r, e); a = simde_mm_set_pd(SIMDE_FLOAT64_C( 33.46), SIMDE_FLOAT64_C( 248.77)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( -730.30), SIMDE_FLOAT64_C( 751.84)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( 33.46), SIMDE_F64_ALL_SET); r = simde_mm_cmp_sd(a, b, 1); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); a = simde_mm_set_pd(SIMDE_FLOAT64_C( -53.40), SIMDE_FLOAT64_C( 23.60)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( -939.31), SIMDE_FLOAT64_C( -627.35)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( -53.40), SIMDE_FLOAT64_C( 0.00)); r = simde_mm_cmp_sd(a, b, 2); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); a = simde_mm_set_pd(SIMDE_FLOAT64_C( -636.30), SIMDE_FLOAT64_C( -129.84)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( 230.46), SIMDE_FLOAT64_C( -440.12)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( -636.30), SIMDE_FLOAT64_C( 0.00)); r = simde_mm_cmp_sd(a, b, 3); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); a = simde_mm_set_pd(SIMDE_FLOAT64_C( 961.48), SIMDE_FLOAT64_C( 556.89)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( 374.50), SIMDE_FLOAT64_C( 904.56)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( 961.48), SIMDE_F64_ALL_SET); r = simde_mm_cmp_sd(a, b, 4); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); a = simde_mm_set_pd(SIMDE_FLOAT64_C( -605.79), SIMDE_FLOAT64_C( -288.15)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( -656.14), SIMDE_FLOAT64_C( 374.06)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( -605.79), SIMDE_FLOAT64_C( 0.00)); r = simde_mm_cmp_sd(a, b, 5); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); a = simde_mm_set_pd(SIMDE_FLOAT64_C( -592.25), SIMDE_FLOAT64_C( -155.18)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( -130.80), SIMDE_FLOAT64_C( 432.83)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( -592.25), SIMDE_FLOAT64_C( 0.00)); r = simde_mm_cmp_sd(a, b, 6); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); a = simde_mm_set_pd(SIMDE_FLOAT64_C( -431.55), SIMDE_FLOAT64_C( 636.53)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( -452.34), SIMDE_FLOAT64_C( -728.36)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( -431.55), SIMDE_F64_ALL_SET); r = simde_mm_cmp_sd(a, b, 7); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); a = simde_mm_set_pd(SIMDE_FLOAT64_C( -183.31), SIMDE_FLOAT64_C( -243.73)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( 442.30), SIMDE_FLOAT64_C( 450.60)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( -183.31), SIMDE_FLOAT64_C( 0.00)); r = simde_mm_cmp_sd(a, b, 8); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); a = simde_mm_set_pd(SIMDE_FLOAT64_C( -210.29), SIMDE_FLOAT64_C( -50.46)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( -734.21), SIMDE_FLOAT64_C( 273.75)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( -210.29), SIMDE_F64_ALL_SET); r = simde_mm_cmp_sd(a, b, 9); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); a = simde_mm_set_pd(SIMDE_FLOAT64_C( -186.57), SIMDE_FLOAT64_C( -822.86)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( -462.18), SIMDE_FLOAT64_C( -425.13)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( -186.57), SIMDE_F64_ALL_SET); r = simde_mm_cmp_sd(a, b, 10); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); a = simde_mm_set_pd(SIMDE_FLOAT64_C( 968.62), SIMDE_FLOAT64_C( -745.37)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( -464.28), SIMDE_FLOAT64_C( 713.42)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( 968.62), SIMDE_FLOAT64_C( 0.00)); r = simde_mm_cmp_sd(a, b, 11); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); a = simde_mm_set_pd(SIMDE_FLOAT64_C( 671.94), SIMDE_FLOAT64_C( -405.59)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( -842.74), SIMDE_FLOAT64_C( -483.02)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( 671.94), SIMDE_F64_ALL_SET); r = simde_mm_cmp_sd(a, b, 12); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); a = simde_mm_set_pd(SIMDE_FLOAT64_C( -228.63), SIMDE_FLOAT64_C( 298.91)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( -708.14), SIMDE_FLOAT64_C( 189.31)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( -228.63), SIMDE_F64_ALL_SET); r = simde_mm_cmp_sd(a, b, 13); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); a = simde_mm_set_pd(SIMDE_FLOAT64_C( -797.77), SIMDE_FLOAT64_C( -286.96)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( 288.87), SIMDE_FLOAT64_C( 398.76)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( -797.77), SIMDE_FLOAT64_C( 0.00)); r = simde_mm_cmp_sd(a, b, 14); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); a = simde_mm_set_pd(SIMDE_FLOAT64_C( -248.44), SIMDE_FLOAT64_C( 191.43)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( 602.04), SIMDE_FLOAT64_C( 999.35)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( -248.44), SIMDE_F64_ALL_SET); r = simde_mm_cmp_sd(a, b, 15); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); a = simde_mm_set_pd(SIMDE_FLOAT64_C( -614.65), SIMDE_FLOAT64_C( 963.28)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( -385.61), SIMDE_FLOAT64_C( 770.12)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( -614.65), SIMDE_FLOAT64_C( 0.00)); r = simde_mm_cmp_sd(a, b, 16); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); a = simde_mm_set_pd(SIMDE_FLOAT64_C( -717.70), SIMDE_FLOAT64_C( 750.92)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( 618.97), SIMDE_FLOAT64_C( 676.03)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( -717.70), SIMDE_FLOAT64_C( 0.00)); r = simde_mm_cmp_sd(a, b, 17); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); a = simde_mm_set_pd(SIMDE_FLOAT64_C( 561.50), SIMDE_FLOAT64_C( 549.62)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( -454.02), SIMDE_FLOAT64_C( -961.18)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( 561.50), SIMDE_FLOAT64_C( 0.00)); r = simde_mm_cmp_sd(a, b, 18); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); a = simde_mm_set_pd(SIMDE_FLOAT64_C( -480.89), SIMDE_FLOAT64_C( -68.38)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( 293.48), SIMDE_FLOAT64_C( 459.12)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( -480.89), SIMDE_FLOAT64_C( 0.00)); r = simde_mm_cmp_sd(a, b, 19); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); a = simde_mm_set_pd(SIMDE_FLOAT64_C( -600.32), SIMDE_FLOAT64_C( -105.54)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( -623.63), SIMDE_FLOAT64_C( 235.12)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( -600.32), SIMDE_F64_ALL_SET); r = simde_mm_cmp_sd(a, b, 20); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); a = simde_mm_set_pd(SIMDE_FLOAT64_C( 548.84), SIMDE_FLOAT64_C( 411.69)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( 824.49), SIMDE_FLOAT64_C( -866.20)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( 548.84), SIMDE_F64_ALL_SET); r = simde_mm_cmp_sd(a, b, 21); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); a = simde_mm_set_pd(SIMDE_FLOAT64_C( 201.44), SIMDE_FLOAT64_C( 276.75)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( -301.93), SIMDE_FLOAT64_C( -238.56)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( 201.44), SIMDE_F64_ALL_SET); r = simde_mm_cmp_sd(a, b, 22); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); a = simde_mm_set_pd(SIMDE_FLOAT64_C( 250.23), SIMDE_FLOAT64_C( -604.81)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( 557.49), SIMDE_FLOAT64_C( 137.99)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( 250.23), SIMDE_F64_ALL_SET); r = simde_mm_cmp_sd(a, b, 23); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); a = simde_mm_set_pd(SIMDE_FLOAT64_C( -235.15), SIMDE_FLOAT64_C( -121.76)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( -394.35), SIMDE_FLOAT64_C( 272.69)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( -235.15), SIMDE_FLOAT64_C( 0.00)); r = simde_mm_cmp_sd(a, b, 24); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); a = simde_mm_set_pd(SIMDE_FLOAT64_C( 119.18), SIMDE_FLOAT64_C( 423.89)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( 610.02), SIMDE_FLOAT64_C( -437.27)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( 119.18), SIMDE_FLOAT64_C( 0.00)); r = simde_mm_cmp_sd(a, b, 25); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); a = simde_mm_set_pd(SIMDE_FLOAT64_C( -156.34), SIMDE_FLOAT64_C( -571.13)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( -665.54), SIMDE_FLOAT64_C( -18.98)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( -156.34), SIMDE_F64_ALL_SET); r = simde_mm_cmp_sd(a, b, 26); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); a = simde_mm_set_pd(SIMDE_FLOAT64_C( 685.87), SIMDE_FLOAT64_C( -600.86)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( -804.08), SIMDE_FLOAT64_C( -631.16)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( 685.87), SIMDE_FLOAT64_C( 0.00)); r = simde_mm_cmp_sd(a, b, 27); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); a = simde_mm_set_pd(SIMDE_FLOAT64_C( 959.19), SIMDE_FLOAT64_C( -478.47)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( -520.61), SIMDE_FLOAT64_C( -214.50)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( 959.19), SIMDE_F64_ALL_SET); r = simde_mm_cmp_sd(a, b, 28); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); a = simde_mm_set_pd(SIMDE_FLOAT64_C( -527.73), SIMDE_FLOAT64_C( -211.69)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( 356.74), SIMDE_FLOAT64_C( 955.71)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( -527.73), SIMDE_FLOAT64_C( 0.00)); r = simde_mm_cmp_sd(a, b, 29); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); a = simde_mm_set_pd(SIMDE_FLOAT64_C( -426.25), SIMDE_FLOAT64_C( -493.55)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( 886.66), SIMDE_FLOAT64_C( 569.52)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( -426.25), SIMDE_FLOAT64_C( 0.00)); r = simde_mm_cmp_sd(a, b, 30); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); a = simde_mm_set_pd(SIMDE_FLOAT64_C( 981.35), SIMDE_FLOAT64_C( -586.10)); b = simde_mm_set_pd(SIMDE_FLOAT64_C( 775.81), SIMDE_FLOAT64_C( 631.37)); e = simde_mm_set_pd(SIMDE_FLOAT64_C( 981.35), SIMDE_F64_ALL_SET); r = simde_mm_cmp_sd(a, b, 31); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(e)); return 0; } static int test_simde_mm_cmp_ss(SIMDE_MUNIT_TEST_ARGS) { simde__m128 a, b, r, e; a = simde_mm_set_ps(SIMDE_FLOAT32_C( 29.86), SIMDE_FLOAT32_C( -506.56), SIMDE_FLOAT32_C( 52.70), SIMDE_FLOAT32_C( -451.19)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( -773.77), SIMDE_FLOAT32_C( 947.93), SIMDE_FLOAT32_C( -234.67), SIMDE_FLOAT32_C( -847.97)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( 29.86), SIMDE_FLOAT32_C( -506.56), SIMDE_FLOAT32_C( 52.70), SIMDE_FLOAT32_C( 0.00)), simde_mm_setzero_ps()); r = simde_mm_cmp_ss(a, b, 0); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( -133.43), SIMDE_FLOAT32_C( 949.13), SIMDE_FLOAT32_C( 326.28), SIMDE_FLOAT32_C( 158.71)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 684.48), SIMDE_FLOAT32_C( 677.57), SIMDE_FLOAT32_C( -960.66), SIMDE_FLOAT32_C( -282.67)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -133.43), SIMDE_FLOAT32_C( 949.13), SIMDE_FLOAT32_C( 326.28), SIMDE_FLOAT32_C( 0.00)), simde_mm_setzero_ps()); r = simde_mm_cmp_ss(a, b, 1); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 193.40), SIMDE_FLOAT32_C( 779.62), SIMDE_FLOAT32_C( -982.70), SIMDE_FLOAT32_C( 733.89)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 820.93), SIMDE_FLOAT32_C( 29.11), SIMDE_FLOAT32_C( -999.26), SIMDE_FLOAT32_C( 78.74)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( 193.40), SIMDE_FLOAT32_C( 779.62), SIMDE_FLOAT32_C( -982.70), SIMDE_FLOAT32_C( 0.00)), simde_mm_setzero_ps()); r = simde_mm_cmp_ss(a, b, 2); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 253.28), SIMDE_FLOAT32_C( 166.31), SIMDE_FLOAT32_C( 346.10), SIMDE_FLOAT32_C( 502.59)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 807.03), SIMDE_FLOAT32_C( 802.13), SIMDE_FLOAT32_C( 411.74), SIMDE_FLOAT32_C( 596.93)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( 253.28), SIMDE_FLOAT32_C( 166.31), SIMDE_FLOAT32_C( 346.10), SIMDE_FLOAT32_C( 0.00)), simde_mm_setzero_ps()); r = simde_mm_cmp_ss(a, b, 3); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 820.59), SIMDE_FLOAT32_C( 257.56), SIMDE_FLOAT32_C( -468.51), SIMDE_FLOAT32_C( -573.74)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( -952.67), SIMDE_FLOAT32_C( -547.39), SIMDE_FLOAT32_C( 82.21), SIMDE_FLOAT32_C( 55.32)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( 820.59), SIMDE_FLOAT32_C( 257.56), SIMDE_FLOAT32_C( -468.51), SIMDE_FLOAT32_C( -573.74)), simde_x_mm_setone_ps()); r = simde_mm_cmp_ss(a, b, 4); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( -856.96), SIMDE_FLOAT32_C( 380.95), SIMDE_FLOAT32_C( 354.16), SIMDE_FLOAT32_C( 933.34)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 937.78), SIMDE_FLOAT32_C( -846.91), SIMDE_FLOAT32_C( 481.96), SIMDE_FLOAT32_C( -401.55)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -856.96), SIMDE_FLOAT32_C( 380.95), SIMDE_FLOAT32_C( 354.16), SIMDE_FLOAT32_C( 0.0)), simde_x_mm_setone_ps()); r = simde_mm_cmp_ss(a, b, 5); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 701.49), SIMDE_FLOAT32_C( 203.70), SIMDE_FLOAT32_C( -473.49), SIMDE_FLOAT32_C( 919.68)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 917.48), SIMDE_FLOAT32_C( 496.48), SIMDE_FLOAT32_C( -380.99), SIMDE_FLOAT32_C( -612.04)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( 701.49), SIMDE_FLOAT32_C( 203.70), SIMDE_FLOAT32_C( -473.49), SIMDE_FLOAT32_C( 0.0)), simde_x_mm_setone_ps()); r = simde_mm_cmp_ss(a, b, 6); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( -495.27), SIMDE_FLOAT32_C( 2.61), SIMDE_FLOAT32_C( 190.46), SIMDE_FLOAT32_C( -489.47)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 733.40), SIMDE_FLOAT32_C( -262.53), SIMDE_FLOAT32_C( -250.54), SIMDE_FLOAT32_C( 753.56)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -495.27), SIMDE_FLOAT32_C( 2.61), SIMDE_FLOAT32_C( 190.46), SIMDE_FLOAT32_C( 0.0)), simde_x_mm_setone_ps()); r = simde_mm_cmp_ss(a, b, 7); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( -295.58), SIMDE_FLOAT32_C( 686.48), SIMDE_FLOAT32_C( -715.68), SIMDE_FLOAT32_C( -678.55)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( -64.78), SIMDE_FLOAT32_C( -994.32), SIMDE_FLOAT32_C( 819.61), SIMDE_FLOAT32_C( -435.68)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -295.58), SIMDE_FLOAT32_C( 686.48), SIMDE_FLOAT32_C( -715.68), SIMDE_FLOAT32_C( 0.00)), simde_mm_setzero_ps()); r = simde_mm_cmp_ss(a, b, 8); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( -446.36), SIMDE_FLOAT32_C( -630.25), SIMDE_FLOAT32_C( 895.61), SIMDE_FLOAT32_C( -359.83)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( -712.74), SIMDE_FLOAT32_C( -683.20), SIMDE_FLOAT32_C( -684.03), SIMDE_FLOAT32_C( 476.51)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -446.36), SIMDE_FLOAT32_C( -630.25), SIMDE_FLOAT32_C( 895.61), SIMDE_FLOAT32_C( 0.0)), simde_x_mm_setone_ps()); r = simde_mm_cmp_ss(a, b, 9); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 453.59), SIMDE_FLOAT32_C( -704.68), SIMDE_FLOAT32_C( 968.54), SIMDE_FLOAT32_C( -73.76)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( -793.56), SIMDE_FLOAT32_C( -626.26), SIMDE_FLOAT32_C( 371.33), SIMDE_FLOAT32_C( 945.10)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( 453.59), SIMDE_FLOAT32_C( -704.68), SIMDE_FLOAT32_C( 968.54), SIMDE_FLOAT32_C( 0.0)), simde_x_mm_setone_ps()); r = simde_mm_cmp_ss(a, b, 10); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 747.32), SIMDE_FLOAT32_C( 722.71), SIMDE_FLOAT32_C( 10.11), SIMDE_FLOAT32_C( -589.46)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( -358.96), SIMDE_FLOAT32_C( -216.71), SIMDE_FLOAT32_C( -367.08), SIMDE_FLOAT32_C( 571.81)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( 747.32), SIMDE_FLOAT32_C( 722.71), SIMDE_FLOAT32_C( 10.11), SIMDE_FLOAT32_C( -589.46)), simde_mm_setzero_ps()); r = simde_mm_cmp_ss(a, b, 11); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( -393.04), SIMDE_FLOAT32_C( -521.21), SIMDE_FLOAT32_C( 315.85), SIMDE_FLOAT32_C( 101.30)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( -254.13), SIMDE_FLOAT32_C( 380.76), SIMDE_FLOAT32_C( 862.95), SIMDE_FLOAT32_C( -994.97)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -393.04), SIMDE_FLOAT32_C( -521.21), SIMDE_FLOAT32_C( 315.85), SIMDE_FLOAT32_C( 0.0)), simde_x_mm_setone_ps()); r = simde_mm_cmp_ss(a, b, 12); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( -429.51), SIMDE_FLOAT32_C( 950.04), SIMDE_FLOAT32_C( 770.94), SIMDE_FLOAT32_C( -853.08)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 178.64), SIMDE_FLOAT32_C( -50.79), SIMDE_FLOAT32_C( 741.69), SIMDE_FLOAT32_C( -786.81)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -429.51), SIMDE_FLOAT32_C( 950.04), SIMDE_FLOAT32_C( 770.94), SIMDE_FLOAT32_C( 0.00)), simde_mm_setzero_ps()); r = simde_mm_cmp_ss(a, b, 13); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 499.30), SIMDE_FLOAT32_C( 807.95), SIMDE_FLOAT32_C( -410.68), SIMDE_FLOAT32_C( 505.23)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( -114.66), SIMDE_FLOAT32_C( 865.01), SIMDE_FLOAT32_C( -665.39), SIMDE_FLOAT32_C( 342.00)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( 499.30), SIMDE_FLOAT32_C( 807.95), SIMDE_FLOAT32_C( -410.68), SIMDE_FLOAT32_C( 0.0)), simde_x_mm_setone_ps()); r = simde_mm_cmp_ss(a, b, 14); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 744.11), SIMDE_FLOAT32_C( 103.80), SIMDE_FLOAT32_C( 230.08), SIMDE_FLOAT32_C( -784.93)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( -640.30), SIMDE_FLOAT32_C( 690.88), SIMDE_FLOAT32_C( -782.01), SIMDE_FLOAT32_C( -779.01)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( 744.11), SIMDE_FLOAT32_C( 103.80), SIMDE_FLOAT32_C( 230.08), SIMDE_FLOAT32_C( 0.0)), simde_x_mm_setone_ps()); r = simde_mm_cmp_ss(a, b, 15); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 937.00), SIMDE_FLOAT32_C( -237.56), SIMDE_FLOAT32_C( 614.04), SIMDE_FLOAT32_C( -552.02)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 636.71), SIMDE_FLOAT32_C( 821.24), SIMDE_FLOAT32_C( 385.34), SIMDE_FLOAT32_C( -655.54)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( 937.00), SIMDE_FLOAT32_C( -237.56), SIMDE_FLOAT32_C( 614.04), SIMDE_FLOAT32_C( 0.00)), simde_mm_setzero_ps()); r = simde_mm_cmp_ss(a, b, 16); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( -840.84), SIMDE_FLOAT32_C( -184.65), SIMDE_FLOAT32_C( 856.01), SIMDE_FLOAT32_C( 182.80)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( -54.26), SIMDE_FLOAT32_C( 831.01), SIMDE_FLOAT32_C( -693.60), SIMDE_FLOAT32_C( -149.67)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -840.84), SIMDE_FLOAT32_C( -184.65), SIMDE_FLOAT32_C( 856.01), SIMDE_FLOAT32_C( 0.00)), simde_mm_setzero_ps()); r = simde_mm_cmp_ss(a, b, 17); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 422.72), SIMDE_FLOAT32_C( -740.98), SIMDE_FLOAT32_C( -971.04), SIMDE_FLOAT32_C( 90.38)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 787.23), SIMDE_FLOAT32_C( -946.13), SIMDE_FLOAT32_C( 562.60), SIMDE_FLOAT32_C( 34.39)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( 422.72), SIMDE_FLOAT32_C( -740.98), SIMDE_FLOAT32_C( -971.04), SIMDE_FLOAT32_C( 0.00)), simde_mm_setzero_ps()); r = simde_mm_cmp_ss(a, b, 18); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 379.97), SIMDE_FLOAT32_C( 252.47), SIMDE_FLOAT32_C( 573.41), SIMDE_FLOAT32_C( 371.64)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 86.67), SIMDE_FLOAT32_C( 230.06), SIMDE_FLOAT32_C( 816.36), SIMDE_FLOAT32_C( -574.41)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( 379.97), SIMDE_FLOAT32_C( 252.47), SIMDE_FLOAT32_C( 573.41), SIMDE_FLOAT32_C( 0.00)), simde_mm_setzero_ps()); r = simde_mm_cmp_ss(a, b, 19); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( -409.94), SIMDE_FLOAT32_C( 339.05), SIMDE_FLOAT32_C( 567.23)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 548.12), SIMDE_FLOAT32_C( 482.27), SIMDE_FLOAT32_C( -877.01), SIMDE_FLOAT32_C( 105.90)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( -409.94), SIMDE_FLOAT32_C( 339.05), SIMDE_FLOAT32_C( 0.0)), simde_x_mm_setone_ps()); r = simde_mm_cmp_ss(a, b, 20); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 443.93), SIMDE_FLOAT32_C( 879.02), SIMDE_FLOAT32_C( 280.77), SIMDE_FLOAT32_C( 215.63)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 858.68), SIMDE_FLOAT32_C( 507.37), SIMDE_FLOAT32_C( 274.86), SIMDE_FLOAT32_C( -935.31)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( 443.93), SIMDE_FLOAT32_C( 879.02), SIMDE_FLOAT32_C( 280.77), SIMDE_FLOAT32_C( 0.0)), simde_x_mm_setone_ps()); r = simde_mm_cmp_ss(a, b, 21); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 417.98), SIMDE_FLOAT32_C( 304.47), SIMDE_FLOAT32_C( -370.04), SIMDE_FLOAT32_C( 620.00)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( -491.67), SIMDE_FLOAT32_C( 756.54), SIMDE_FLOAT32_C( -538.71), SIMDE_FLOAT32_C( -880.53)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( 417.98), SIMDE_FLOAT32_C( 304.47), SIMDE_FLOAT32_C( -370.04), SIMDE_FLOAT32_C( 0.0)), simde_x_mm_setone_ps()); r = simde_mm_cmp_ss(a, b, 22); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( -778.82), SIMDE_FLOAT32_C( -277.37), SIMDE_FLOAT32_C( -561.10), SIMDE_FLOAT32_C( 913.75)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( -799.75), SIMDE_FLOAT32_C( 322.89), SIMDE_FLOAT32_C( 168.49), SIMDE_FLOAT32_C( -586.31)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -778.82), SIMDE_FLOAT32_C( -277.37), SIMDE_FLOAT32_C( -561.10), SIMDE_FLOAT32_C( 0.0)), simde_x_mm_setone_ps()); r = simde_mm_cmp_ss(a, b, 23); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 19.63), SIMDE_FLOAT32_C( -796.59), SIMDE_FLOAT32_C( 829.80), SIMDE_FLOAT32_C( 577.01)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 825.83), SIMDE_FLOAT32_C( -702.38), SIMDE_FLOAT32_C( 909.63), SIMDE_FLOAT32_C( -668.68)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( 19.63), SIMDE_FLOAT32_C( -796.59), SIMDE_FLOAT32_C( 829.80), SIMDE_FLOAT32_C( 0.00)), simde_mm_setzero_ps()); r = simde_mm_cmp_ss(a, b, 24); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( -935.55), SIMDE_FLOAT32_C( -906.06), SIMDE_FLOAT32_C( 23.18), SIMDE_FLOAT32_C( -374.29)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 903.69), SIMDE_FLOAT32_C( -930.74), SIMDE_FLOAT32_C( -354.90), SIMDE_FLOAT32_C( -304.33)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -935.55), SIMDE_FLOAT32_C( -906.06), SIMDE_FLOAT32_C( 23.18), SIMDE_FLOAT32_C( 0.0)), simde_x_mm_setone_ps()); r = simde_mm_cmp_ss(a, b, 25); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( -730.13), SIMDE_FLOAT32_C( -258.50), SIMDE_FLOAT32_C( -873.85), SIMDE_FLOAT32_C( -348.94)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 29.53), SIMDE_FLOAT32_C( 827.18), SIMDE_FLOAT32_C( 334.24), SIMDE_FLOAT32_C( -491.97)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -730.13), SIMDE_FLOAT32_C( -258.50), SIMDE_FLOAT32_C( -873.85), SIMDE_FLOAT32_C( 0.00)), simde_mm_setzero_ps()); r = simde_mm_cmp_ss(a, b, 26); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( -381.04), SIMDE_FLOAT32_C( 669.32), SIMDE_FLOAT32_C( -574.70), SIMDE_FLOAT32_C( 440.24)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( -837.63), SIMDE_FLOAT32_C( -877.92), SIMDE_FLOAT32_C( -226.69), SIMDE_FLOAT32_C( -557.27)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -381.04), SIMDE_FLOAT32_C( 669.32), SIMDE_FLOAT32_C( -574.70), SIMDE_FLOAT32_C( 0.00)), simde_mm_setzero_ps()); r = simde_mm_cmp_ss(a, b, 27); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( 239.21), SIMDE_FLOAT32_C( 527.22), SIMDE_FLOAT32_C( 489.56), SIMDE_FLOAT32_C( 238.84)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( -364.20), SIMDE_FLOAT32_C( 722.98), SIMDE_FLOAT32_C( 475.77), SIMDE_FLOAT32_C( -967.04)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( 239.21), SIMDE_FLOAT32_C( 527.22), SIMDE_FLOAT32_C( 489.56), SIMDE_FLOAT32_C( 0.0)), simde_x_mm_setone_ps()); r = simde_mm_cmp_ss(a, b, 28); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( -308.58), SIMDE_FLOAT32_C( -108.66), SIMDE_FLOAT32_C( 857.88), SIMDE_FLOAT32_C( -131.87)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( -505.11), SIMDE_FLOAT32_C( -213.47), SIMDE_FLOAT32_C( 71.14), SIMDE_FLOAT32_C( -330.60)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -308.58), SIMDE_FLOAT32_C( -108.66), SIMDE_FLOAT32_C( 857.88), SIMDE_FLOAT32_C( 0.0)), simde_x_mm_setone_ps()); r = simde_mm_cmp_ss(a, b, 29); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( -236.42), SIMDE_FLOAT32_C( 925.60), SIMDE_FLOAT32_C( 252.25), SIMDE_FLOAT32_C( -546.68)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( 67.98), SIMDE_FLOAT32_C( -613.65), SIMDE_FLOAT32_C( -165.27), SIMDE_FLOAT32_C( 109.49)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -236.42), SIMDE_FLOAT32_C( 925.60), SIMDE_FLOAT32_C( 252.25), SIMDE_FLOAT32_C( 0.00)), simde_mm_setzero_ps()); r = simde_mm_cmp_ss(a, b, 30); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); a = simde_mm_set_ps(SIMDE_FLOAT32_C( -851.42), SIMDE_FLOAT32_C( 716.28), SIMDE_FLOAT32_C( 257.21), SIMDE_FLOAT32_C( 191.16)); b = simde_mm_set_ps(SIMDE_FLOAT32_C( -758.42), SIMDE_FLOAT32_C( 731.61), SIMDE_FLOAT32_C( 23.45), SIMDE_FLOAT32_C( 503.57)); e = simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -851.42), SIMDE_FLOAT32_C( 716.28), SIMDE_FLOAT32_C( 257.21), SIMDE_FLOAT32_C( 0.0)), simde_x_mm_setone_ps()); r = simde_mm_cmp_ss(a, b, 31); simde_assert_m128i_u32(simde_mm_castps_si128(r), ==, simde_mm_castps_si128(e)); return 0; } static int test_simde_mm256_cmp_pd(SIMDE_MUNIT_TEST_ARGS) { simde__m256d a, b, r, e; a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -357.57), SIMDE_FLOAT64_C( 765.93), SIMDE_FLOAT64_C( -550.14), SIMDE_FLOAT64_C( -22.41)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( 868.56), SIMDE_FLOAT64_C( 688.68), SIMDE_FLOAT64_C( -724.59), SIMDE_FLOAT64_C( 334.75)); e = simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)); r = simde_mm256_cmp_pd(a, b, 0); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -54.73), SIMDE_FLOAT64_C( 28.08), SIMDE_FLOAT64_C( 97.90), SIMDE_FLOAT64_C( 892.29)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( -838.35), SIMDE_FLOAT64_C( 389.60), SIMDE_FLOAT64_C( -784.06), SIMDE_FLOAT64_C( -852.24)); e = simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_F64_ALL_SET, SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)); r = simde_mm256_cmp_pd(a, b, 1); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -763.82), SIMDE_FLOAT64_C( -666.98), SIMDE_FLOAT64_C( 95.45), SIMDE_FLOAT64_C( 511.10)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( 72.91), SIMDE_FLOAT64_C( 842.59), SIMDE_FLOAT64_C( -301.64), SIMDE_FLOAT64_C( 977.39)); e = simde_mm256_set_pd(SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET, SIMDE_FLOAT64_C( 0.00), SIMDE_F64_ALL_SET); r = simde_mm256_cmp_pd(a, b, 2); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -891.91), SIMDE_FLOAT64_C( -127.40), SIMDE_FLOAT64_C( 463.49), SIMDE_FLOAT64_C( 177.91)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( -341.50), SIMDE_FLOAT64_C( 153.72), SIMDE_FLOAT64_C( 151.04), SIMDE_FLOAT64_C( -348.13)); e = simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)); r = simde_mm256_cmp_pd(a, b, 3); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( 100.63), SIMDE_FLOAT64_C( 228.90), SIMDE_FLOAT64_C( -642.19), SIMDE_FLOAT64_C( -977.08)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( 565.32), SIMDE_FLOAT64_C( -745.60), SIMDE_FLOAT64_C( -937.14), SIMDE_FLOAT64_C( -396.84)); e = simde_mm256_set_pd(SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET); r = simde_mm256_cmp_pd(a, b, 4); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -698.69), SIMDE_FLOAT64_C( 381.53), SIMDE_FLOAT64_C( -995.38), SIMDE_FLOAT64_C( 904.84)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( 610.68), SIMDE_FLOAT64_C( 607.60), SIMDE_FLOAT64_C( 346.14), SIMDE_FLOAT64_C( -567.77)); e = simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_F64_ALL_SET); r = simde_mm256_cmp_pd(a, b, 5); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -32.88), SIMDE_FLOAT64_C( 456.08), SIMDE_FLOAT64_C( -158.08), SIMDE_FLOAT64_C( -924.19)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( -46.81), SIMDE_FLOAT64_C( -380.35), SIMDE_FLOAT64_C( 820.23), SIMDE_FLOAT64_C( -250.91)); e = simde_mm256_set_pd(SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET, SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)); r = simde_mm256_cmp_pd(a, b, 6); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -875.29), SIMDE_FLOAT64_C( 57.11), SIMDE_FLOAT64_C( 260.25), SIMDE_FLOAT64_C( 164.20)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( 715.03), SIMDE_FLOAT64_C( 526.68), SIMDE_FLOAT64_C( 724.10), SIMDE_FLOAT64_C( -661.45)); e = simde_mm256_set_pd(SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET); r = simde_mm256_cmp_pd(a, b, 7); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -59.93), SIMDE_FLOAT64_C( 531.74), SIMDE_FLOAT64_C( 694.87), SIMDE_FLOAT64_C( 114.93)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( 415.34), SIMDE_FLOAT64_C( 611.46), SIMDE_FLOAT64_C( -279.38), SIMDE_FLOAT64_C( 402.62)); e = simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)); r = simde_mm256_cmp_pd(a, b, 8); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( 414.18), SIMDE_FLOAT64_C( -763.39), SIMDE_FLOAT64_C( 386.06), SIMDE_FLOAT64_C( 874.65)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( 544.80), SIMDE_FLOAT64_C( 381.68), SIMDE_FLOAT64_C( 466.15), SIMDE_FLOAT64_C( -212.81)); e = simde_mm256_set_pd(SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET, SIMDE_FLOAT64_C( 0.00)); r = simde_mm256_cmp_pd(a, b, 9); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( 970.77), SIMDE_FLOAT64_C( -757.81), SIMDE_FLOAT64_C( 907.57), SIMDE_FLOAT64_C( 981.95)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( 537.33), SIMDE_FLOAT64_C( -552.73), SIMDE_FLOAT64_C( -335.84), SIMDE_FLOAT64_C( 263.81)); e = simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_F64_ALL_SET, SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)); r = simde_mm256_cmp_pd(a, b, 10); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( 792.31), SIMDE_FLOAT64_C( 400.04), SIMDE_FLOAT64_C( -788.48), SIMDE_FLOAT64_C( 167.61)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( 212.33), SIMDE_FLOAT64_C( 296.89), SIMDE_FLOAT64_C( 622.33), SIMDE_FLOAT64_C( -766.53)); e = simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)); r = simde_mm256_cmp_pd(a, b, 11); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( 758.22), SIMDE_FLOAT64_C( -663.32), SIMDE_FLOAT64_C( -999.81), SIMDE_FLOAT64_C( 133.54)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( -236.40), SIMDE_FLOAT64_C( 458.49), SIMDE_FLOAT64_C( -796.87), SIMDE_FLOAT64_C( 971.44)); e = simde_mm256_set_pd(SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET); r = simde_mm256_cmp_pd(a, b, 12); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -918.24), SIMDE_FLOAT64_C( -553.29), SIMDE_FLOAT64_C( 709.03), SIMDE_FLOAT64_C( -42.30)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( -249.29), SIMDE_FLOAT64_C( -863.89), SIMDE_FLOAT64_C( 838.41), SIMDE_FLOAT64_C( -285.41)); e = simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_F64_ALL_SET, SIMDE_FLOAT64_C( 0.00), SIMDE_F64_ALL_SET); r = simde_mm256_cmp_pd(a, b, 13); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -668.93), SIMDE_FLOAT64_C( -420.18), SIMDE_FLOAT64_C( 785.36), SIMDE_FLOAT64_C( -788.63)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( 111.55), SIMDE_FLOAT64_C( -222.13), SIMDE_FLOAT64_C( -579.35), SIMDE_FLOAT64_C( -996.45)); e = simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET); r = simde_mm256_cmp_pd(a, b, 14); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( 841.84), SIMDE_FLOAT64_C( -686.81), SIMDE_FLOAT64_C( -199.31), SIMDE_FLOAT64_C( 982.01)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( 421.52), SIMDE_FLOAT64_C( 488.69), SIMDE_FLOAT64_C( 995.06), SIMDE_FLOAT64_C( -730.80)); e = simde_mm256_set_pd(SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET); r = simde_mm256_cmp_pd(a, b, 15); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -95.24), SIMDE_FLOAT64_C( 253.40), SIMDE_FLOAT64_C( -815.08), SIMDE_FLOAT64_C( 358.42)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( -399.53), SIMDE_FLOAT64_C( -710.61), SIMDE_FLOAT64_C( -422.64), SIMDE_FLOAT64_C( -148.83)); e = simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)); r = simde_mm256_cmp_pd(a, b, 16); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( 77.77), SIMDE_FLOAT64_C( 698.58), SIMDE_FLOAT64_C( -27.60), SIMDE_FLOAT64_C( 435.81)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( -150.12), SIMDE_FLOAT64_C( -751.03), SIMDE_FLOAT64_C( -597.97), SIMDE_FLOAT64_C( -937.82)); e = simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)); r = simde_mm256_cmp_pd(a, b, 17); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -838.78), SIMDE_FLOAT64_C( 93.35), SIMDE_FLOAT64_C( -825.83), SIMDE_FLOAT64_C( -323.02)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( -282.39), SIMDE_FLOAT64_C( 572.90), SIMDE_FLOAT64_C( -581.23), SIMDE_FLOAT64_C( 32.08)); e = simde_mm256_set_pd(SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET); r = simde_mm256_cmp_pd(a, b, 18); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -807.33), SIMDE_FLOAT64_C( 664.63), SIMDE_FLOAT64_C( 982.61), SIMDE_FLOAT64_C( 63.27)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( 946.39), SIMDE_FLOAT64_C( 207.32), SIMDE_FLOAT64_C( -9.66), SIMDE_FLOAT64_C( 11.76)); e = simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)); r = simde_mm256_cmp_pd(a, b, 19); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -738.69), SIMDE_FLOAT64_C( -322.11), SIMDE_FLOAT64_C( -163.93), SIMDE_FLOAT64_C( -138.57)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( 684.55), SIMDE_FLOAT64_C( -319.23), SIMDE_FLOAT64_C( 930.19), SIMDE_FLOAT64_C( 517.01)); e = simde_mm256_set_pd(SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET); r = simde_mm256_cmp_pd(a, b, 20); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -76.84), SIMDE_FLOAT64_C( 457.06), SIMDE_FLOAT64_C( 575.12), SIMDE_FLOAT64_C( 845.68)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( 233.22), SIMDE_FLOAT64_C( -237.00), SIMDE_FLOAT64_C( -964.93), SIMDE_FLOAT64_C( 750.37)); e = simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET); r = simde_mm256_cmp_pd(a, b, 21); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( 199.98), SIMDE_FLOAT64_C( 741.05), SIMDE_FLOAT64_C( -723.44), SIMDE_FLOAT64_C( 323.27)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( 103.44), SIMDE_FLOAT64_C( -854.52), SIMDE_FLOAT64_C( 244.92), SIMDE_FLOAT64_C( 486.47)); e = simde_mm256_set_pd(SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET, SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)); r = simde_mm256_cmp_pd(a, b, 22); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -576.37), SIMDE_FLOAT64_C( 750.85), SIMDE_FLOAT64_C( 434.13), SIMDE_FLOAT64_C( 344.29)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( -968.83), SIMDE_FLOAT64_C( 577.41), SIMDE_FLOAT64_C( 995.59), SIMDE_FLOAT64_C( 750.10)); e = simde_mm256_set_pd(SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET); r = simde_mm256_cmp_pd(a, b, 23); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( 526.25), SIMDE_FLOAT64_C( -57.74), SIMDE_FLOAT64_C( -432.94), SIMDE_FLOAT64_C( 882.68)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( -495.53), SIMDE_FLOAT64_C( -602.01), SIMDE_FLOAT64_C( -925.63), SIMDE_FLOAT64_C( 123.17)); e = simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)); r = simde_mm256_cmp_pd(a, b, 24); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -567.21), SIMDE_FLOAT64_C( 600.16), SIMDE_FLOAT64_C( -766.87), SIMDE_FLOAT64_C( 11.16)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( 695.58), SIMDE_FLOAT64_C( 482.48), SIMDE_FLOAT64_C( 350.48), SIMDE_FLOAT64_C( -969.97)); e = simde_mm256_set_pd(SIMDE_F64_ALL_SET, SIMDE_FLOAT64_C( 0.00), SIMDE_F64_ALL_SET, SIMDE_FLOAT64_C( 0.00)); r = simde_mm256_cmp_pd(a, b, 25); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -85.80), SIMDE_FLOAT64_C( 500.17), SIMDE_FLOAT64_C( 916.37), SIMDE_FLOAT64_C( 398.15)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( 745.00), SIMDE_FLOAT64_C( -144.13), SIMDE_FLOAT64_C( -516.66), SIMDE_FLOAT64_C( 995.75)); e = simde_mm256_set_pd(SIMDE_F64_ALL_SET, SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_F64_ALL_SET); r = simde_mm256_cmp_pd(a, b, 26); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( 546.73), SIMDE_FLOAT64_C( -603.02), SIMDE_FLOAT64_C( -971.83), SIMDE_FLOAT64_C( 389.90)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( 776.68), SIMDE_FLOAT64_C( -130.82), SIMDE_FLOAT64_C( 580.30), SIMDE_FLOAT64_C( 704.29)); e = simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)); r = simde_mm256_cmp_pd(a, b, 27); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -164.05), SIMDE_FLOAT64_C( 409.22), SIMDE_FLOAT64_C( -602.22), SIMDE_FLOAT64_C( 375.71)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( 363.61), SIMDE_FLOAT64_C( -315.81), SIMDE_FLOAT64_C( -199.39), SIMDE_FLOAT64_C( 806.44)); e = simde_mm256_set_pd(SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET); r = simde_mm256_cmp_pd(a, b, 28); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( 200.12), SIMDE_FLOAT64_C( 648.82), SIMDE_FLOAT64_C( -75.31), SIMDE_FLOAT64_C( 801.78)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( 431.61), SIMDE_FLOAT64_C( 123.43), SIMDE_FLOAT64_C( 753.52), SIMDE_FLOAT64_C( -346.75)); e = simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_F64_ALL_SET, SIMDE_FLOAT64_C( 0.00), SIMDE_F64_ALL_SET); r = simde_mm256_cmp_pd(a, b, 29); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -27.55), SIMDE_FLOAT64_C( 895.94), SIMDE_FLOAT64_C( 742.64), SIMDE_FLOAT64_C( -59.01)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( 812.89), SIMDE_FLOAT64_C( -405.22), SIMDE_FLOAT64_C( 782.32), SIMDE_FLOAT64_C( -131.42)); e = simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_F64_ALL_SET, SIMDE_FLOAT64_C( 0.00), SIMDE_F64_ALL_SET); r = simde_mm256_cmp_pd(a, b, 30); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); a = simde_mm256_set_pd(SIMDE_FLOAT64_C( -68.17), SIMDE_FLOAT64_C( -246.45), SIMDE_FLOAT64_C( 32.69), SIMDE_FLOAT64_C( -878.59)); b = simde_mm256_set_pd(SIMDE_FLOAT64_C( -82.57), SIMDE_FLOAT64_C( 930.53), SIMDE_FLOAT64_C( -591.17), SIMDE_FLOAT64_C( -164.90)); e = simde_mm256_set_pd(SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET, SIMDE_F64_ALL_SET); r = simde_mm256_cmp_pd(a, b, 31); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); return 0; } static int test_simde_mm256_cmp_ps(SIMDE_MUNIT_TEST_ARGS) { simde__m256 a, b, r, e; a = simde_mm256_set_ps(SIMDE_FLOAT32_C( 803.75), SIMDE_FLOAT32_C( 41.82), SIMDE_FLOAT32_C( -135.21), SIMDE_FLOAT32_C( -426.86), SIMDE_FLOAT32_C( -667.45), SIMDE_FLOAT32_C( -471.27), SIMDE_FLOAT32_C( 582.67), SIMDE_FLOAT32_C( -556.69)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( 947.23), SIMDE_FLOAT32_C( -284.56), SIMDE_FLOAT32_C( -696.40), SIMDE_FLOAT32_C( 148.35), SIMDE_FLOAT32_C( 759.58), SIMDE_FLOAT32_C( 941.33), SIMDE_FLOAT32_C( -734.70), SIMDE_FLOAT32_C( -491.66)); e = simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)); r = simde_mm256_cmp_ps(a, b, 0); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( 631.77), SIMDE_FLOAT32_C( 277.52), SIMDE_FLOAT32_C( -622.54), SIMDE_FLOAT32_C( -161.68), SIMDE_FLOAT32_C( -898.32), SIMDE_FLOAT32_C( -313.15), SIMDE_FLOAT32_C( -233.08), SIMDE_FLOAT32_C( -49.93)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( -870.15), SIMDE_FLOAT32_C( -811.55), SIMDE_FLOAT32_C( 94.74), SIMDE_FLOAT32_C( -36.95), SIMDE_FLOAT32_C( -368.63), SIMDE_FLOAT32_C( 744.09), SIMDE_FLOAT32_C( -903.28), SIMDE_FLOAT32_C( 297.64)); e = simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET); r = simde_mm256_cmp_ps(a, b, 1); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( -390.61), SIMDE_FLOAT32_C( 155.36), SIMDE_FLOAT32_C( -387.67), SIMDE_FLOAT32_C( -531.88), SIMDE_FLOAT32_C( -713.17), SIMDE_FLOAT32_C( 805.71), SIMDE_FLOAT32_C( 886.16), SIMDE_FLOAT32_C( 319.56)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( -28.95), SIMDE_FLOAT32_C( -30.42), SIMDE_FLOAT32_C( 455.91), SIMDE_FLOAT32_C( -309.55), SIMDE_FLOAT32_C( 306.68), SIMDE_FLOAT32_C( 44.72), SIMDE_FLOAT32_C( 995.53), SIMDE_FLOAT32_C( -435.97)); e = simde_mm256_set_ps(SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00)); r = simde_mm256_cmp_ps(a, b, 2); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( 108.25), SIMDE_FLOAT32_C( -817.89), SIMDE_FLOAT32_C( 544.18), SIMDE_FLOAT32_C( -228.91), SIMDE_FLOAT32_C( 781.88), SIMDE_FLOAT32_C( -335.51), SIMDE_FLOAT32_C( 838.72), SIMDE_FLOAT32_C( 548.66)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( -959.57), SIMDE_FLOAT32_C( -439.07), SIMDE_FLOAT32_C( 447.94), SIMDE_FLOAT32_C( -832.97), SIMDE_FLOAT32_C( 300.93), SIMDE_FLOAT32_C( 268.82), SIMDE_FLOAT32_C( -44.38), SIMDE_FLOAT32_C( -764.37)); e = simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)); r = simde_mm256_cmp_ps(a, b, 3); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( 681.83), SIMDE_FLOAT32_C( -98.68), SIMDE_FLOAT32_C( 576.60), SIMDE_FLOAT32_C( -825.51), SIMDE_FLOAT32_C( -981.15), SIMDE_FLOAT32_C( -781.62), SIMDE_FLOAT32_C( 238.97), SIMDE_FLOAT32_C( 708.96)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( 598.93), SIMDE_FLOAT32_C( 530.78), SIMDE_FLOAT32_C( 290.29), SIMDE_FLOAT32_C( 456.74), SIMDE_FLOAT32_C( -688.44), SIMDE_FLOAT32_C( 479.20), SIMDE_FLOAT32_C( -526.39), SIMDE_FLOAT32_C( 111.85)); e = simde_mm256_set_ps(SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET); r = simde_mm256_cmp_ps(a, b, 4); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( 2.01), SIMDE_FLOAT32_C( 601.19), SIMDE_FLOAT32_C( -226.39), SIMDE_FLOAT32_C( -602.11), SIMDE_FLOAT32_C( -733.84), SIMDE_FLOAT32_C( 571.60), SIMDE_FLOAT32_C( 888.40), SIMDE_FLOAT32_C( 435.87)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( -568.84), SIMDE_FLOAT32_C( -290.97), SIMDE_FLOAT32_C( -428.69), SIMDE_FLOAT32_C( 594.16), SIMDE_FLOAT32_C( -385.45), SIMDE_FLOAT32_C( 878.38), SIMDE_FLOAT32_C( 771.04), SIMDE_FLOAT32_C( -520.10)); e = simde_mm256_set_ps(SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET); r = simde_mm256_cmp_ps(a, b, 5); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( -220.16), SIMDE_FLOAT32_C( -128.37), SIMDE_FLOAT32_C( -559.41), SIMDE_FLOAT32_C( -454.29), SIMDE_FLOAT32_C( 350.58), SIMDE_FLOAT32_C( 911.03), SIMDE_FLOAT32_C( 296.06), SIMDE_FLOAT32_C( 568.62)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( -352.18), SIMDE_FLOAT32_C( -372.36), SIMDE_FLOAT32_C( 447.08), SIMDE_FLOAT32_C( 452.17), SIMDE_FLOAT32_C( -243.40), SIMDE_FLOAT32_C( 350.62), SIMDE_FLOAT32_C( 305.52), SIMDE_FLOAT32_C( -502.03)); e = simde_mm256_set_ps(SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET); r = simde_mm256_cmp_ps(a, b, 6); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( 615.60), SIMDE_FLOAT32_C( 619.40), SIMDE_FLOAT32_C( 792.77), SIMDE_FLOAT32_C( 901.05), SIMDE_FLOAT32_C( 333.13), SIMDE_FLOAT32_C( 48.96), SIMDE_FLOAT32_C( 132.32), SIMDE_FLOAT32_C( 26.71)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( 120.11), SIMDE_FLOAT32_C( 415.63), SIMDE_FLOAT32_C( -658.13), SIMDE_FLOAT32_C( 663.72), SIMDE_FLOAT32_C( 841.19), SIMDE_FLOAT32_C( -155.80), SIMDE_FLOAT32_C( 212.50), SIMDE_FLOAT32_C( -466.55)); e = simde_mm256_set_ps(SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET); r = simde_mm256_cmp_ps(a, b, 7); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( -433.41), SIMDE_FLOAT32_C( -764.18), SIMDE_FLOAT32_C( -961.58), SIMDE_FLOAT32_C( -874.94), SIMDE_FLOAT32_C( -163.70), SIMDE_FLOAT32_C( -839.31), SIMDE_FLOAT32_C( -667.08), SIMDE_FLOAT32_C( 337.92)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( 249.20), SIMDE_FLOAT32_C( -898.78), SIMDE_FLOAT32_C( -555.22), SIMDE_FLOAT32_C( 707.96), SIMDE_FLOAT32_C( 516.92), SIMDE_FLOAT32_C( -556.84), SIMDE_FLOAT32_C( -537.76), SIMDE_FLOAT32_C( -705.36)); e = simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)); r = simde_mm256_cmp_ps(a, b, 8); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( -696.67), SIMDE_FLOAT32_C( -489.80), SIMDE_FLOAT32_C( 200.42), SIMDE_FLOAT32_C( -399.90), SIMDE_FLOAT32_C( 153.90), SIMDE_FLOAT32_C( 505.83), SIMDE_FLOAT32_C( 799.31), SIMDE_FLOAT32_C( -15.86)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( -405.31), SIMDE_FLOAT32_C( -462.02), SIMDE_FLOAT32_C( 631.03), SIMDE_FLOAT32_C( -295.26), SIMDE_FLOAT32_C( -48.91), SIMDE_FLOAT32_C( 969.53), SIMDE_FLOAT32_C( -852.51), SIMDE_FLOAT32_C( 559.42)); e = simde_mm256_set_ps(SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET); r = simde_mm256_cmp_ps(a, b, 9); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( -561.97), SIMDE_FLOAT32_C( -971.30), SIMDE_FLOAT32_C( -250.24), SIMDE_FLOAT32_C( -325.06), SIMDE_FLOAT32_C( -51.47), SIMDE_FLOAT32_C( -259.37), SIMDE_FLOAT32_C( -492.34), SIMDE_FLOAT32_C( 100.58)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( -848.06), SIMDE_FLOAT32_C( -621.13), SIMDE_FLOAT32_C( -636.90), SIMDE_FLOAT32_C( 992.53), SIMDE_FLOAT32_C( -474.40), SIMDE_FLOAT32_C( -74.21), SIMDE_FLOAT32_C( -340.21), SIMDE_FLOAT32_C( -243.84)); e = simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00)); r = simde_mm256_cmp_ps(a, b, 10); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( 239.07), SIMDE_FLOAT32_C( 874.09), SIMDE_FLOAT32_C( 973.63), SIMDE_FLOAT32_C( -326.74), SIMDE_FLOAT32_C( -105.33), SIMDE_FLOAT32_C( -113.10), SIMDE_FLOAT32_C( -907.53), SIMDE_FLOAT32_C( -299.90)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( 153.03), SIMDE_FLOAT32_C( 322.07), SIMDE_FLOAT32_C( -271.79), SIMDE_FLOAT32_C( 197.54), SIMDE_FLOAT32_C( 293.92), SIMDE_FLOAT32_C( 8.21), SIMDE_FLOAT32_C( 456.82), SIMDE_FLOAT32_C( -366.71)); e = simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)); r = simde_mm256_cmp_ps(a, b, 11); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( -287.90), SIMDE_FLOAT32_C( -229.67), SIMDE_FLOAT32_C( -929.73), SIMDE_FLOAT32_C( -179.99), SIMDE_FLOAT32_C( 848.21), SIMDE_FLOAT32_C( -997.57), SIMDE_FLOAT32_C( -656.96), SIMDE_FLOAT32_C( -862.20)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( 580.33), SIMDE_FLOAT32_C( -689.10), SIMDE_FLOAT32_C( 649.25), SIMDE_FLOAT32_C( 176.45), SIMDE_FLOAT32_C( 565.41), SIMDE_FLOAT32_C( -548.53), SIMDE_FLOAT32_C( -949.03), SIMDE_FLOAT32_C( 336.81)); e = simde_mm256_set_ps(SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET); r = simde_mm256_cmp_ps(a, b, 12); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( -617.06), SIMDE_FLOAT32_C( -436.28), SIMDE_FLOAT32_C( -774.87), SIMDE_FLOAT32_C( 811.68), SIMDE_FLOAT32_C( -458.23), SIMDE_FLOAT32_C( 612.49), SIMDE_FLOAT32_C( 857.84), SIMDE_FLOAT32_C( 49.61)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( -70.22), SIMDE_FLOAT32_C( 598.15), SIMDE_FLOAT32_C( -715.51), SIMDE_FLOAT32_C( -654.30), SIMDE_FLOAT32_C( -597.85), SIMDE_FLOAT32_C( -502.38), SIMDE_FLOAT32_C( 175.10), SIMDE_FLOAT32_C( -126.08)); e = simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET); r = simde_mm256_cmp_ps(a, b, 13); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( 108.84), SIMDE_FLOAT32_C( 652.10), SIMDE_FLOAT32_C( -310.88), SIMDE_FLOAT32_C( -644.71), SIMDE_FLOAT32_C( -530.21), SIMDE_FLOAT32_C( 877.99), SIMDE_FLOAT32_C( -626.95), SIMDE_FLOAT32_C( -794.28)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( -222.83), SIMDE_FLOAT32_C( -112.15), SIMDE_FLOAT32_C( 359.65), SIMDE_FLOAT32_C( 441.32), SIMDE_FLOAT32_C( -219.23), SIMDE_FLOAT32_C( 477.22), SIMDE_FLOAT32_C( -766.46), SIMDE_FLOAT32_C( -440.48)); e = simde_mm256_set_ps(SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00)); r = simde_mm256_cmp_ps(a, b, 14); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( -718.37), SIMDE_FLOAT32_C( 402.09), SIMDE_FLOAT32_C( 660.94), SIMDE_FLOAT32_C( 280.50), SIMDE_FLOAT32_C( -500.81), SIMDE_FLOAT32_C( -99.58), SIMDE_FLOAT32_C( 609.32), SIMDE_FLOAT32_C( -871.48)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( -231.85), SIMDE_FLOAT32_C( -142.83), SIMDE_FLOAT32_C( -163.21), SIMDE_FLOAT32_C( -687.90), SIMDE_FLOAT32_C( -906.29), SIMDE_FLOAT32_C( 896.58), SIMDE_FLOAT32_C( 176.66), SIMDE_FLOAT32_C( -333.48)); e = simde_mm256_set_ps(SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET); r = simde_mm256_cmp_ps(a, b, 15); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( 116.16), SIMDE_FLOAT32_C( 859.97), SIMDE_FLOAT32_C( 226.33), SIMDE_FLOAT32_C( 8.76), SIMDE_FLOAT32_C( 653.49), SIMDE_FLOAT32_C( 911.94), SIMDE_FLOAT32_C( -467.83), SIMDE_FLOAT32_C( 901.04)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( -984.25), SIMDE_FLOAT32_C( -389.87), SIMDE_FLOAT32_C( 153.18), SIMDE_FLOAT32_C( 268.28), SIMDE_FLOAT32_C( 389.59), SIMDE_FLOAT32_C( -887.68), SIMDE_FLOAT32_C( 270.01), SIMDE_FLOAT32_C( -864.64)); e = simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)); r = simde_mm256_cmp_ps(a, b, 16); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( 370.52), SIMDE_FLOAT32_C( 981.11), SIMDE_FLOAT32_C( 129.13), SIMDE_FLOAT32_C( -206.59), SIMDE_FLOAT32_C( 934.43), SIMDE_FLOAT32_C( 784.23), SIMDE_FLOAT32_C( -480.27), SIMDE_FLOAT32_C( -564.50)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( -678.14), SIMDE_FLOAT32_C( 144.17), SIMDE_FLOAT32_C( -165.62), SIMDE_FLOAT32_C( 200.67), SIMDE_FLOAT32_C( -971.31), SIMDE_FLOAT32_C( -618.84), SIMDE_FLOAT32_C( -770.29), SIMDE_FLOAT32_C( 646.67)); e = simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET); r = simde_mm256_cmp_ps(a, b, 17); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( 163.45), SIMDE_FLOAT32_C( -974.16), SIMDE_FLOAT32_C( -872.88), SIMDE_FLOAT32_C( 866.32), SIMDE_FLOAT32_C( 589.17), SIMDE_FLOAT32_C( -436.70), SIMDE_FLOAT32_C( 651.94), SIMDE_FLOAT32_C( -800.52)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( -321.81), SIMDE_FLOAT32_C( 575.56), SIMDE_FLOAT32_C( 664.13), SIMDE_FLOAT32_C( -598.78), SIMDE_FLOAT32_C( 405.66), SIMDE_FLOAT32_C( -161.92), SIMDE_FLOAT32_C( -284.93), SIMDE_FLOAT32_C( 894.57)); e = simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET); r = simde_mm256_cmp_ps(a, b, 18); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( -267.71), SIMDE_FLOAT32_C( 437.69), SIMDE_FLOAT32_C( -122.76), SIMDE_FLOAT32_C( -910.35), SIMDE_FLOAT32_C( -336.05), SIMDE_FLOAT32_C( -733.70), SIMDE_FLOAT32_C( -255.51), SIMDE_FLOAT32_C( 200.58)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( 866.33), SIMDE_FLOAT32_C( -54.50), SIMDE_FLOAT32_C( 991.32), SIMDE_FLOAT32_C( 618.32), SIMDE_FLOAT32_C( -309.87), SIMDE_FLOAT32_C( 601.57), SIMDE_FLOAT32_C( -408.16), SIMDE_FLOAT32_C( 18.48)); e = simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)); r = simde_mm256_cmp_ps(a, b, 19); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( -16.05), SIMDE_FLOAT32_C( -298.08), SIMDE_FLOAT32_C( 249.83), SIMDE_FLOAT32_C( 758.02), SIMDE_FLOAT32_C( 479.46), SIMDE_FLOAT32_C( 336.47), SIMDE_FLOAT32_C( 883.93), SIMDE_FLOAT32_C( 849.23)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( 527.85), SIMDE_FLOAT32_C( 873.42), SIMDE_FLOAT32_C( -416.27), SIMDE_FLOAT32_C( 38.97), SIMDE_FLOAT32_C( -132.92), SIMDE_FLOAT32_C( 674.04), SIMDE_FLOAT32_C( -271.02), SIMDE_FLOAT32_C( 227.53)); e = simde_mm256_set_ps(SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET); r = simde_mm256_cmp_ps(a, b, 20); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( 614.63), SIMDE_FLOAT32_C( 157.19), SIMDE_FLOAT32_C( 417.58), SIMDE_FLOAT32_C( 646.09), SIMDE_FLOAT32_C( -575.70), SIMDE_FLOAT32_C( -968.11), SIMDE_FLOAT32_C( -412.85), SIMDE_FLOAT32_C( 552.77)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( -891.04), SIMDE_FLOAT32_C( 82.95), SIMDE_FLOAT32_C( -16.42), SIMDE_FLOAT32_C( 297.67), SIMDE_FLOAT32_C( -699.86), SIMDE_FLOAT32_C( -654.27), SIMDE_FLOAT32_C( -999.41), SIMDE_FLOAT32_C( -517.03)); e = simde_mm256_set_ps(SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET); r = simde_mm256_cmp_ps(a, b, 21); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( -836.21), SIMDE_FLOAT32_C( -887.30), SIMDE_FLOAT32_C( -752.55), SIMDE_FLOAT32_C( -796.40), SIMDE_FLOAT32_C( 351.16), SIMDE_FLOAT32_C( -178.21), SIMDE_FLOAT32_C( -706.76), SIMDE_FLOAT32_C( 73.54)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( -426.63), SIMDE_FLOAT32_C( 604.12), SIMDE_FLOAT32_C( 650.64), SIMDE_FLOAT32_C( 773.20), SIMDE_FLOAT32_C( 120.86), SIMDE_FLOAT32_C( -7.00), SIMDE_FLOAT32_C( 471.29), SIMDE_FLOAT32_C( 313.92)); e = simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)); r = simde_mm256_cmp_ps(a, b, 22); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( 116.65), SIMDE_FLOAT32_C( -494.31), SIMDE_FLOAT32_C( -707.48), SIMDE_FLOAT32_C( 709.57), SIMDE_FLOAT32_C( 910.05), SIMDE_FLOAT32_C( -151.47), SIMDE_FLOAT32_C( -953.89), SIMDE_FLOAT32_C( -496.46)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( 177.47), SIMDE_FLOAT32_C( -756.65), SIMDE_FLOAT32_C( 255.36), SIMDE_FLOAT32_C( -282.34), SIMDE_FLOAT32_C( -175.69), SIMDE_FLOAT32_C( -724.05), SIMDE_FLOAT32_C( 699.09), SIMDE_FLOAT32_C( 269.04)); e = simde_mm256_set_ps(SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET); r = simde_mm256_cmp_ps(a, b, 23); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( -903.19), SIMDE_FLOAT32_C( 746.12), SIMDE_FLOAT32_C( 342.73), SIMDE_FLOAT32_C( 795.69), SIMDE_FLOAT32_C( -643.05), SIMDE_FLOAT32_C( 437.97), SIMDE_FLOAT32_C( 559.94), SIMDE_FLOAT32_C( -748.98)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( 98.06), SIMDE_FLOAT32_C( 751.33), SIMDE_FLOAT32_C( 270.29), SIMDE_FLOAT32_C( 106.83), SIMDE_FLOAT32_C( 873.71), SIMDE_FLOAT32_C( -291.58), SIMDE_FLOAT32_C( -813.87), SIMDE_FLOAT32_C( 765.52)); e = simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)); r = simde_mm256_cmp_ps(a, b, 24); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( 510.52), SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( -126.10), SIMDE_FLOAT32_C( 409.54), SIMDE_FLOAT32_C( 995.44), SIMDE_FLOAT32_C( 315.04), SIMDE_FLOAT32_C( 962.60), SIMDE_FLOAT32_C( 941.69)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( -187.35), SIMDE_FLOAT32_C( -452.95), SIMDE_FLOAT32_C( 721.01), SIMDE_FLOAT32_C( 306.97), SIMDE_FLOAT32_C( -164.06), SIMDE_FLOAT32_C( 498.34), SIMDE_FLOAT32_C( -327.01), SIMDE_FLOAT32_C( 70.94)); e = simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)); r = simde_mm256_cmp_ps(a, b, 25); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( 875.35), SIMDE_FLOAT32_C( 722.91), SIMDE_FLOAT32_C( 314.64), SIMDE_FLOAT32_C( 955.25), SIMDE_FLOAT32_C( -448.66), SIMDE_FLOAT32_C( -836.37), SIMDE_FLOAT32_C( -166.39), SIMDE_FLOAT32_C( 761.84)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( 219.22), SIMDE_FLOAT32_C( 980.19), SIMDE_FLOAT32_C( 521.60), SIMDE_FLOAT32_C( 169.38), SIMDE_FLOAT32_C( -454.09), SIMDE_FLOAT32_C( -679.40), SIMDE_FLOAT32_C( 296.09), SIMDE_FLOAT32_C( -189.79)); e = simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00)); r = simde_mm256_cmp_ps(a, b, 26); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( 364.98), SIMDE_FLOAT32_C( 544.21), SIMDE_FLOAT32_C( 898.24), SIMDE_FLOAT32_C( 705.23), SIMDE_FLOAT32_C( -349.55), SIMDE_FLOAT32_C( -507.73), SIMDE_FLOAT32_C( -126.59), SIMDE_FLOAT32_C( 632.35)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( -343.37), SIMDE_FLOAT32_C( -456.75), SIMDE_FLOAT32_C( -23.18), SIMDE_FLOAT32_C( -118.51), SIMDE_FLOAT32_C( -748.39), SIMDE_FLOAT32_C( 12.78), SIMDE_FLOAT32_C( 746.33), SIMDE_FLOAT32_C( 804.05)); e = simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)); r = simde_mm256_cmp_ps(a, b, 27); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( 460.18), SIMDE_FLOAT32_C( 347.74), SIMDE_FLOAT32_C( -472.72), SIMDE_FLOAT32_C( 665.79), SIMDE_FLOAT32_C( 574.55), SIMDE_FLOAT32_C( -914.40), SIMDE_FLOAT32_C( 582.67), SIMDE_FLOAT32_C( -561.20)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( -507.07), SIMDE_FLOAT32_C( 23.76), SIMDE_FLOAT32_C( -452.87), SIMDE_FLOAT32_C( -385.46), SIMDE_FLOAT32_C( -717.37), SIMDE_FLOAT32_C( 438.32), SIMDE_FLOAT32_C( 629.22), SIMDE_FLOAT32_C( 831.24)); e = simde_mm256_set_ps(SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET); r = simde_mm256_cmp_ps(a, b, 28); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( -722.18), SIMDE_FLOAT32_C( -347.64), SIMDE_FLOAT32_C( -854.87), SIMDE_FLOAT32_C( -899.08), SIMDE_FLOAT32_C( 85.71), SIMDE_FLOAT32_C( -834.65), SIMDE_FLOAT32_C( 389.94), SIMDE_FLOAT32_C( 481.97)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( 314.14), SIMDE_FLOAT32_C( -60.49), SIMDE_FLOAT32_C( 531.62), SIMDE_FLOAT32_C( 990.93), SIMDE_FLOAT32_C( 96.46), SIMDE_FLOAT32_C( -19.75), SIMDE_FLOAT32_C( -147.53), SIMDE_FLOAT32_C( -991.98)); e = simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET); r = simde_mm256_cmp_ps(a, b, 29); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( -326.32), SIMDE_FLOAT32_C( 553.34), SIMDE_FLOAT32_C( 298.71), SIMDE_FLOAT32_C( -419.97), SIMDE_FLOAT32_C( 24.57), SIMDE_FLOAT32_C( 392.20), SIMDE_FLOAT32_C( -704.51), SIMDE_FLOAT32_C( 510.74)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( -609.44), SIMDE_FLOAT32_C( -312.06), SIMDE_FLOAT32_C( 205.60), SIMDE_FLOAT32_C( 160.74), SIMDE_FLOAT32_C( -717.63), SIMDE_FLOAT32_C( -124.46), SIMDE_FLOAT32_C( 761.31), SIMDE_FLOAT32_C( 650.10)); e = simde_mm256_set_ps(SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00), SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)); r = simde_mm256_cmp_ps(a, b, 30); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); a = simde_mm256_set_ps(SIMDE_FLOAT32_C( 17.13), SIMDE_FLOAT32_C( 745.11), SIMDE_FLOAT32_C( -376.48), SIMDE_FLOAT32_C( 594.45), SIMDE_FLOAT32_C( -508.77), SIMDE_FLOAT32_C( -947.81), SIMDE_FLOAT32_C( 338.88), SIMDE_FLOAT32_C( 402.07)); b = simde_mm256_set_ps(SIMDE_FLOAT32_C( 621.66), SIMDE_FLOAT32_C( 302.73), SIMDE_FLOAT32_C( 326.55), SIMDE_FLOAT32_C( 551.02), SIMDE_FLOAT32_C( 772.13), SIMDE_FLOAT32_C( 272.66), SIMDE_FLOAT32_C( 449.95), SIMDE_FLOAT32_C( -910.83)); e = simde_mm256_set_ps(SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET, SIMDE_F32_ALL_SET); r = simde_mm256_cmp_ps(a, b, 31); simde_assert_m256i_equal(simde_mm256_castps_si256(r), simde_mm256_castps_si256(e)); return 0; } static int test_simde_mm256_cvtepi32_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m256d r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 1957018358), INT32_C( 1074174472), INT32_C( 124397699), INT32_C( 1881644266)), simde_mm256_set_pd(SIMDE_FLOAT64_C(1957018358.00), SIMDE_FLOAT64_C(1074174472.00), SIMDE_FLOAT64_C(124397699.00), SIMDE_FLOAT64_C(1881644266.00)) }, { simde_mm_set_epi32(INT32_C( 2070107316), INT32_C( 1618653378), INT32_C( 1002292861), INT32_C( 2002977375)), simde_mm256_set_pd(SIMDE_FLOAT64_C(2070107316.00), SIMDE_FLOAT64_C(1618653378.00), SIMDE_FLOAT64_C(1002292861.00), SIMDE_FLOAT64_C(2002977375.00)) }, { simde_mm_set_epi32(INT32_C( 2031655643), INT32_C( 1380169755), INT32_C( 1722613954), INT32_C( 223100421)), simde_mm256_set_pd(SIMDE_FLOAT64_C(2031655643.00), SIMDE_FLOAT64_C(1380169755.00), SIMDE_FLOAT64_C(1722613954.00), SIMDE_FLOAT64_C(223100421.00)) }, { simde_mm_set_epi32(INT32_C(-1894427767), INT32_C(-1633274427), INT32_C(-2058387969), INT32_C(-1311515394)), simde_mm256_set_pd(SIMDE_FLOAT64_C(-1894427767.00), SIMDE_FLOAT64_C(-1633274427.00), SIMDE_FLOAT64_C(-2058387969.00), SIMDE_FLOAT64_C(-1311515394.00)) }, { simde_mm_set_epi32(INT32_C(-1443374135), INT32_C( 1382394218), INT32_C( 1459905767), INT32_C( -756048058)), simde_mm256_set_pd(SIMDE_FLOAT64_C(-1443374135.00), SIMDE_FLOAT64_C(1382394218.00), SIMDE_FLOAT64_C(1459905767.00), SIMDE_FLOAT64_C(-756048058.00)) }, { simde_mm_set_epi32(INT32_C( -781596301), INT32_C( 1840524706), INT32_C( 1502138952), INT32_C(-2118210723)), simde_mm256_set_pd(SIMDE_FLOAT64_C(-781596301.00), SIMDE_FLOAT64_C(1840524706.00), SIMDE_FLOAT64_C(1502138952.00), SIMDE_FLOAT64_C(-2118210723.00)) }, { simde_mm_set_epi32(INT32_C( 52250967), INT32_C( 1988701031), INT32_C( 1592626424), INT32_C(-1778387557)), simde_mm256_set_pd(SIMDE_FLOAT64_C(52250967.00), SIMDE_FLOAT64_C(1988701031.00), SIMDE_FLOAT64_C(1592626424.00), SIMDE_FLOAT64_C(-1778387557.00)) }, { simde_mm_set_epi32(INT32_C( 30979646), INT32_C( 769206580), INT32_C(-2128276240), INT32_C( 1445709709)), simde_mm256_set_pd(SIMDE_FLOAT64_C(30979646.00), SIMDE_FLOAT64_C(769206580.00), SIMDE_FLOAT64_C(-2128276240.00), SIMDE_FLOAT64_C(1445709709.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_cvtepi32_pd(test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_cvtepi32_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256 r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C( -6033), INT32_C( 15813), INT32_C( 12979), INT32_C( -31712), INT32_C( 18002), INT32_C( -6019), INT32_C( -26810), INT32_C( 14091)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-6033.00), SIMDE_FLOAT32_C(15813.00), SIMDE_FLOAT32_C(12979.00), SIMDE_FLOAT32_C(-31712.00), SIMDE_FLOAT32_C(18002.00), SIMDE_FLOAT32_C(-6019.00), SIMDE_FLOAT32_C(-26810.00), SIMDE_FLOAT32_C(14091.00)) }, { simde_mm256_set_epi32(INT32_C( 6359), INT32_C( 7786), INT32_C( 21856), INT32_C( -20706), INT32_C( -20652), INT32_C( 21040), INT32_C( -8561), INT32_C( -12779)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 6359.00), SIMDE_FLOAT32_C( 7786.00), SIMDE_FLOAT32_C(21856.00), SIMDE_FLOAT32_C(-20706.00), SIMDE_FLOAT32_C(-20652.00), SIMDE_FLOAT32_C(21040.00), SIMDE_FLOAT32_C(-8561.00), SIMDE_FLOAT32_C(-12779.00)) }, { simde_mm256_set_epi32(INT32_C( -28477), INT32_C( -21667), INT32_C( -16892), INT32_C( -16024), INT32_C( -11576), INT32_C( 602), INT32_C( 23902), INT32_C( 17547)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-28477.00), SIMDE_FLOAT32_C(-21667.00), SIMDE_FLOAT32_C(-16892.00), SIMDE_FLOAT32_C(-16024.00), SIMDE_FLOAT32_C(-11576.00), SIMDE_FLOAT32_C( 602.00), SIMDE_FLOAT32_C(23902.00), SIMDE_FLOAT32_C(17547.00)) }, { simde_mm256_set_epi32(INT32_C( 8732), INT32_C( 13948), INT32_C( 7489), INT32_C( 25724), INT32_C( 24561), INT32_C( 11189), INT32_C( 24773), INT32_C( -8467)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 8732.00), SIMDE_FLOAT32_C(13948.00), SIMDE_FLOAT32_C( 7489.00), SIMDE_FLOAT32_C(25724.00), SIMDE_FLOAT32_C(24561.00), SIMDE_FLOAT32_C(11189.00), SIMDE_FLOAT32_C(24773.00), SIMDE_FLOAT32_C(-8467.00)) }, { simde_mm256_set_epi32(INT32_C( -31943), INT32_C( 26870), INT32_C( -22515), INT32_C( 3030), INT32_C( 24358), INT32_C( 31924), INT32_C( 30771), INT32_C( 4777)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-31943.00), SIMDE_FLOAT32_C(26870.00), SIMDE_FLOAT32_C(-22515.00), SIMDE_FLOAT32_C( 3030.00), SIMDE_FLOAT32_C(24358.00), SIMDE_FLOAT32_C(31924.00), SIMDE_FLOAT32_C(30771.00), SIMDE_FLOAT32_C( 4777.00)) }, { simde_mm256_set_epi32(INT32_C( -1809), INT32_C( -14404), INT32_C( -32286), INT32_C( 25399), INT32_C( 29260), INT32_C( -23412), INT32_C( 12480), INT32_C( -23461)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-1809.00), SIMDE_FLOAT32_C(-14404.00), SIMDE_FLOAT32_C(-32286.00), SIMDE_FLOAT32_C(25399.00), SIMDE_FLOAT32_C(29260.00), SIMDE_FLOAT32_C(-23412.00), SIMDE_FLOAT32_C(12480.00), SIMDE_FLOAT32_C(-23461.00)) }, { simde_mm256_set_epi32(INT32_C( -2274), INT32_C( 23698), INT32_C( 4976), INT32_C( 13398), INT32_C( -8046), INT32_C( -16937), INT32_C( 19340), INT32_C( -6656)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-2274.00), SIMDE_FLOAT32_C(23698.00), SIMDE_FLOAT32_C( 4976.00), SIMDE_FLOAT32_C(13398.00), SIMDE_FLOAT32_C(-8046.00), SIMDE_FLOAT32_C(-16937.00), SIMDE_FLOAT32_C(19340.00), SIMDE_FLOAT32_C(-6656.00)) }, { simde_mm256_set_epi32(INT32_C( -29605), INT32_C( 8888), INT32_C( -8347), INT32_C( -8890), INT32_C( -8372), INT32_C( 7334), INT32_C( 14947), INT32_C( -5546)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-29605.00), SIMDE_FLOAT32_C( 8888.00), SIMDE_FLOAT32_C(-8347.00), SIMDE_FLOAT32_C(-8890.00), SIMDE_FLOAT32_C(-8372.00), SIMDE_FLOAT32_C( 7334.00), SIMDE_FLOAT32_C(14947.00), SIMDE_FLOAT32_C(-5546.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_cvtepi32_ps(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_cvtpd_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m128i r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 823.92), SIMDE_FLOAT64_C( -252.31), SIMDE_FLOAT64_C( 311.42), SIMDE_FLOAT64_C( 639.08)), simde_mm_set_epi32(INT32_C( 824), INT32_C(-252), INT32_C( 311), INT32_C( 639)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 311.89), SIMDE_FLOAT64_C( -507.25), SIMDE_FLOAT64_C( 748.64), SIMDE_FLOAT64_C( -488.86)), simde_mm_set_epi32(INT32_C( 312), INT32_C(-507), INT32_C( 749), INT32_C(-489)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 478.57), SIMDE_FLOAT64_C( -328.29), SIMDE_FLOAT64_C( -289.22), SIMDE_FLOAT64_C( -586.95)), simde_mm_set_epi32(INT32_C( 479), INT32_C(-328), INT32_C(-289), INT32_C(-587)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 341.08), SIMDE_FLOAT64_C( 432.49), SIMDE_FLOAT64_C( 835.07), SIMDE_FLOAT64_C( -889.53)), simde_mm_set_epi32(INT32_C( 341), INT32_C( 432), INT32_C( 835), INT32_C(-890)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -811.21), SIMDE_FLOAT64_C( -487.29), SIMDE_FLOAT64_C( 852.90), SIMDE_FLOAT64_C( 970.07)), simde_mm_set_epi32(INT32_C(-811), INT32_C(-487), INT32_C( 853), INT32_C( 970)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 456.79), SIMDE_FLOAT64_C( -836.64), SIMDE_FLOAT64_C( -396.53), SIMDE_FLOAT64_C( 788.69)), simde_mm_set_epi32(INT32_C( 457), INT32_C(-837), INT32_C(-397), INT32_C( 789)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 538.31), SIMDE_FLOAT64_C( 630.62), SIMDE_FLOAT64_C( -811.65), SIMDE_FLOAT64_C( -175.08)), simde_mm_set_epi32(INT32_C( 538), INT32_C( 631), INT32_C(-812), INT32_C(-175)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -53.87), SIMDE_FLOAT64_C( -83.11), SIMDE_FLOAT64_C( -288.58), SIMDE_FLOAT64_C( -287.98)), simde_mm_set_epi32(INT32_C( -54), INT32_C( -83), INT32_C(-289), INT32_C(-288)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm256_cvtpd_epi32(test_vec[i].a); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_cvtpd_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m128 r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 375.90), SIMDE_FLOAT64_C( -889.76), SIMDE_FLOAT64_C( -974.31), SIMDE_FLOAT64_C( 373.58)), simde_mm_set_ps(SIMDE_FLOAT32_C( 375.90), SIMDE_FLOAT32_C( -889.76), SIMDE_FLOAT32_C( -974.31), SIMDE_FLOAT32_C( 373.58)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 84.47), SIMDE_FLOAT64_C( 744.53), SIMDE_FLOAT64_C( -27.43), SIMDE_FLOAT64_C( -51.38)), simde_mm_set_ps(SIMDE_FLOAT32_C( 84.47), SIMDE_FLOAT32_C( 744.53), SIMDE_FLOAT32_C( -27.43), SIMDE_FLOAT32_C( -51.38)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 66.02), SIMDE_FLOAT64_C( -438.12), SIMDE_FLOAT64_C( 408.44), SIMDE_FLOAT64_C( 197.54)), simde_mm_set_ps(SIMDE_FLOAT32_C( 66.02), SIMDE_FLOAT32_C( -438.12), SIMDE_FLOAT32_C( 408.44), SIMDE_FLOAT32_C( 197.54)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 80.23), SIMDE_FLOAT64_C( 910.03), SIMDE_FLOAT64_C( 928.38), SIMDE_FLOAT64_C( 305.66)), simde_mm_set_ps(SIMDE_FLOAT32_C( 80.23), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 928.38), SIMDE_FLOAT32_C( 305.66)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -511.50), SIMDE_FLOAT64_C( -503.26), SIMDE_FLOAT64_C( -164.88), SIMDE_FLOAT64_C( -10.16)), simde_mm_set_ps(SIMDE_FLOAT32_C( -511.50), SIMDE_FLOAT32_C( -503.26), SIMDE_FLOAT32_C( -164.88), SIMDE_FLOAT32_C( -10.16)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 99.85), SIMDE_FLOAT64_C( -538.53), SIMDE_FLOAT64_C( 17.38), SIMDE_FLOAT64_C( -161.67)), simde_mm_set_ps(SIMDE_FLOAT32_C( 99.85), SIMDE_FLOAT32_C( -538.53), SIMDE_FLOAT32_C( 17.38), SIMDE_FLOAT32_C( -161.67)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 891.01), SIMDE_FLOAT64_C( 865.63), SIMDE_FLOAT64_C( -39.82), SIMDE_FLOAT64_C( -446.03)), simde_mm_set_ps(SIMDE_FLOAT32_C( 891.01), SIMDE_FLOAT32_C( 865.63), SIMDE_FLOAT32_C( -39.82), SIMDE_FLOAT32_C( -446.03)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 394.04), SIMDE_FLOAT64_C( 238.42), SIMDE_FLOAT64_C( 746.10), SIMDE_FLOAT64_C( -8.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( 394.04), SIMDE_FLOAT32_C( 238.42), SIMDE_FLOAT32_C( 746.10), SIMDE_FLOAT32_C( -8.70)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm256_cvtpd_ps(test_vec[i].a); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_cvtps_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256i r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 598.58), SIMDE_FLOAT32_C( 571.41), SIMDE_FLOAT32_C( -242.37), SIMDE_FLOAT32_C( -717.41), SIMDE_FLOAT32_C( 374.26), SIMDE_FLOAT32_C( -165.53), SIMDE_FLOAT32_C( -357.04), SIMDE_FLOAT32_C( -622.88)), simde_mm256_set_epi32(INT32_C( 599), INT32_C( 571), INT32_C(-242), INT32_C(-717), INT32_C( 374), INT32_C(-166), INT32_C(-357), INT32_C(-623)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 257.81), SIMDE_FLOAT32_C( -837.70), SIMDE_FLOAT32_C( 261.48), SIMDE_FLOAT32_C( 542.96), SIMDE_FLOAT32_C( 769.60), SIMDE_FLOAT32_C( -711.96), SIMDE_FLOAT32_C( -326.97), SIMDE_FLOAT32_C( -113.31)), simde_mm256_set_epi32(INT32_C( 258), INT32_C(-838), INT32_C( 261), INT32_C( 543), INT32_C( 770), INT32_C(-712), INT32_C(-327), INT32_C(-113)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -569.57), SIMDE_FLOAT32_C( 423.37), SIMDE_FLOAT32_C( -24.31), SIMDE_FLOAT32_C( 934.89), SIMDE_FLOAT32_C( 421.98), SIMDE_FLOAT32_C( 514.39), SIMDE_FLOAT32_C( 548.83), SIMDE_FLOAT32_C( 419.70)), simde_mm256_set_epi32(INT32_C(-570), INT32_C( 423), INT32_C( -24), INT32_C( 935), INT32_C( 422), INT32_C( 514), INT32_C( 549), INT32_C( 420)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -391.60), SIMDE_FLOAT32_C( -565.59), SIMDE_FLOAT32_C( -30.37), SIMDE_FLOAT32_C( -335.58), SIMDE_FLOAT32_C( 613.59), SIMDE_FLOAT32_C( -997.50), SIMDE_FLOAT32_C( -875.20), SIMDE_FLOAT32_C( 61.16)), simde_mm256_set_epi32(INT32_C(-392), INT32_C(-566), INT32_C( -30), INT32_C(-336), INT32_C( 614), INT32_C(-998), INT32_C(-875), INT32_C( 61)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -166.04), SIMDE_FLOAT32_C( -427.79), SIMDE_FLOAT32_C( 609.54), SIMDE_FLOAT32_C( -532.28), SIMDE_FLOAT32_C( -681.53), SIMDE_FLOAT32_C( 504.43), SIMDE_FLOAT32_C( -888.34), SIMDE_FLOAT32_C( 403.18)), simde_mm256_set_epi32(INT32_C(-166), INT32_C(-428), INT32_C( 610), INT32_C(-532), INT32_C(-682), INT32_C( 504), INT32_C(-888), INT32_C( 403)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 216.04), SIMDE_FLOAT32_C( -927.91), SIMDE_FLOAT32_C( 780.38), SIMDE_FLOAT32_C( 765.71), SIMDE_FLOAT32_C( -447.72), SIMDE_FLOAT32_C( -65.81), SIMDE_FLOAT32_C( 716.35), SIMDE_FLOAT32_C( 608.15)), simde_mm256_set_epi32(INT32_C( 216), INT32_C(-928), INT32_C( 780), INT32_C( 766), INT32_C(-448), INT32_C( -66), INT32_C( 716), INT32_C( 608)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 971.27), SIMDE_FLOAT32_C( -611.58), SIMDE_FLOAT32_C( -361.36), SIMDE_FLOAT32_C( -851.00), SIMDE_FLOAT32_C( 839.99), SIMDE_FLOAT32_C( 207.87), SIMDE_FLOAT32_C( -947.82), SIMDE_FLOAT32_C( -403.90)), simde_mm256_set_epi32(INT32_C( 971), INT32_C(-612), INT32_C(-361), INT32_C(-851), INT32_C( 840), INT32_C( 208), INT32_C(-948), INT32_C(-404)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -507.54), SIMDE_FLOAT32_C( 581.68), SIMDE_FLOAT32_C( -590.23), SIMDE_FLOAT32_C( 417.30), SIMDE_FLOAT32_C( -87.52), SIMDE_FLOAT32_C( -865.50), SIMDE_FLOAT32_C( 940.51), SIMDE_FLOAT32_C( 910.77)), simde_mm256_set_epi32(INT32_C(-508), INT32_C( 582), INT32_C(-590), INT32_C( 417), INT32_C( -88), INT32_C(-866), INT32_C( 941), INT32_C( 911)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_cvtps_epi32(test_vec[i].a); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_cvtps_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m256d r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 846.20), SIMDE_FLOAT32_C( 685.37), SIMDE_FLOAT32_C( 660.41), SIMDE_FLOAT32_C( -309.12)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 846.20), SIMDE_FLOAT64_C( 685.37), SIMDE_FLOAT64_C( 660.41), SIMDE_FLOAT64_C( -309.12)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 900.54), SIMDE_FLOAT32_C( 555.77), SIMDE_FLOAT32_C( -412.48), SIMDE_FLOAT32_C( -684.76)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 900.54), SIMDE_FLOAT64_C( 555.77), SIMDE_FLOAT64_C( -412.48), SIMDE_FLOAT64_C( -684.76)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -530.00), SIMDE_FLOAT32_C( 516.66), SIMDE_FLOAT32_C( 969.93), SIMDE_FLOAT32_C( -956.57)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -530.00), SIMDE_FLOAT64_C( 516.66), SIMDE_FLOAT64_C( 969.93), SIMDE_FLOAT64_C( -956.57)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 876.84), SIMDE_FLOAT32_C( 972.29), SIMDE_FLOAT32_C( 715.44), SIMDE_FLOAT32_C( -66.38)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 876.84), SIMDE_FLOAT64_C( 972.29), SIMDE_FLOAT64_C( 715.44), SIMDE_FLOAT64_C( -66.38)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -684.20), SIMDE_FLOAT32_C( -317.33), SIMDE_FLOAT32_C( 88.04), SIMDE_FLOAT32_C( 992.34)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -684.20), SIMDE_FLOAT64_C( -317.33), SIMDE_FLOAT64_C( 88.04), SIMDE_FLOAT64_C( 992.34)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 481.41), SIMDE_FLOAT32_C( -117.39), SIMDE_FLOAT32_C( 583.52), SIMDE_FLOAT32_C( 373.09)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 481.41), SIMDE_FLOAT64_C( -117.39), SIMDE_FLOAT64_C( 583.52), SIMDE_FLOAT64_C( 373.09)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 798.14), SIMDE_FLOAT32_C( 486.93), SIMDE_FLOAT32_C( -832.59), SIMDE_FLOAT32_C( 796.43)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 798.14), SIMDE_FLOAT64_C( 486.93), SIMDE_FLOAT64_C( -832.59), SIMDE_FLOAT64_C( 796.43)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 75.03), SIMDE_FLOAT32_C( 634.86), SIMDE_FLOAT32_C( 319.54), SIMDE_FLOAT32_C( -801.15)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 75.03), SIMDE_FLOAT64_C( 634.86), SIMDE_FLOAT64_C( 319.54), SIMDE_FLOAT64_C( -801.15)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_cvtps_pd(test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_cvtsd_f64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[4]; const simde_float64 r; } test_vec[] = { { { SIMDE_FLOAT64_C( -819.83), SIMDE_FLOAT64_C( 57.52), SIMDE_FLOAT64_C( 410.09), SIMDE_FLOAT64_C( -207.39) }, SIMDE_FLOAT64_C(-819.834690) }, { { SIMDE_FLOAT64_C( -757.62), SIMDE_FLOAT64_C( 670.35), SIMDE_FLOAT64_C( -81.02), SIMDE_FLOAT64_C( -418.01) }, SIMDE_FLOAT64_C(-757.623120) }, { { SIMDE_FLOAT64_C( 690.51), SIMDE_FLOAT64_C( -758.25), SIMDE_FLOAT64_C( 941.18), SIMDE_FLOAT64_C( 596.59) }, SIMDE_FLOAT64_C(690.512804) }, { { SIMDE_FLOAT64_C( 816.68), SIMDE_FLOAT64_C( 897.92), SIMDE_FLOAT64_C( 232.01), SIMDE_FLOAT64_C( 196.85) }, SIMDE_FLOAT64_C(816.682969) }, { { SIMDE_FLOAT64_C( -223.12), SIMDE_FLOAT64_C( -393.67), SIMDE_FLOAT64_C( 665.54), SIMDE_FLOAT64_C( -958.13) }, SIMDE_FLOAT64_C(-223.120246) }, { { SIMDE_FLOAT64_C( -129.08), SIMDE_FLOAT64_C( -576.73), SIMDE_FLOAT64_C( -588.43), SIMDE_FLOAT64_C( -263.46) }, SIMDE_FLOAT64_C(-129.075903) }, { { SIMDE_FLOAT64_C( 621.84), SIMDE_FLOAT64_C( 726.67), SIMDE_FLOAT64_C( 204.36), SIMDE_FLOAT64_C( -348.61) }, SIMDE_FLOAT64_C(621.835955) }, { { SIMDE_FLOAT64_C( 679.52), SIMDE_FLOAT64_C( -219.86), SIMDE_FLOAT64_C( 812.70), SIMDE_FLOAT64_C( 859.69) }, SIMDE_FLOAT64_C(679.523220) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde_float64 r = simde_mm256_cvtsd_f64(a); simde_assert_equal_f64(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_cvtsi256_si32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[8]; const int32_t r; } test_vec[] = { { { INT32_C( 1220605077), -INT32_C( 1798958528), INT32_C( 1922714309), INT32_C( 1773856549), -INT32_C( 1781872958), -INT32_C( 1646416163), INT32_C( 1580794507), -INT32_C( 381163180) }, INT32_C(1220605077) }, { { INT32_C( 506595294), -INT32_C( 541919207), INT32_C( 1733381442), -INT32_C( 137360588), -INT32_C( 1349739822), -INT32_C( 749967032), -INT32_C( 1070496148), INT32_C( 1302952047) }, INT32_C(506595294) }, { { -INT32_C( 1720919169), INT32_C( 360193747), -INT32_C( 1602434709), -INT32_C( 1500033580), INT32_C( 810951655), -INT32_C( 133979508), -INT32_C( 1732758232), INT32_C( 770007725) }, INT32_C(-1720919169) }, { { INT32_C( 298209597), -INT32_C( 618250640), -INT32_C( 612654329), -INT32_C( 679341328), -INT32_C( 1039673291), -INT32_C( 1548088454), -INT32_C( 314870976), INT32_C( 303702229) }, INT32_C(298209597) }, { { -INT32_C( 500965262), INT32_C( 633162270), -INT32_C( 587122195), -INT32_C( 2118876341), -INT32_C( 716981157), INT32_C( 125369799), INT32_C( 1173664624), INT32_C( 1180176340) }, INT32_C(-500965262) }, { { INT32_C( 237534191), -INT32_C( 1305221691), INT32_C( 1787769886), INT32_C( 317408439), -INT32_C( 941085184), -INT32_C( 1664196565), -INT32_C( 387857900), -INT32_C( 1053935151) }, INT32_C(237534191) }, { { INT32_C( 2060408501), INT32_C( 1512899131), -INT32_C( 289096649), INT32_C( 110336), INT32_C( 147319261), INT32_C( 1571067209), INT32_C( 692422232), INT32_C( 1961522111) }, INT32_C(2060408501) }, { { INT32_C( 99596745), -INT32_C( 211870532), -INT32_C( 639556648), -INT32_C( 1344675118), INT32_C( 364356043), -INT32_C( 1905107914), -INT32_C( 1581729566), -INT32_C( 183066069) }, INT32_C(99596745) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi32(test_vec[i].a); int32_t r = simde_mm256_cvtsi256_si32(a); simde_assert_equal_i32(r, test_vec[i].r); } return 0; } static int test_simde_mm256_cvtss_f32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 r; } test_vec[] = { { { SIMDE_FLOAT32_C( -520.55), SIMDE_FLOAT32_C( -165.02), SIMDE_FLOAT32_C( 891.39), SIMDE_FLOAT32_C( 353.19), SIMDE_FLOAT32_C( 212.08), SIMDE_FLOAT32_C( -507.29), SIMDE_FLOAT32_C( 216.85), SIMDE_FLOAT32_C( -490.64) }, SIMDE_FLOAT32_C(-520.547485) }, { { SIMDE_FLOAT32_C( -205.41), SIMDE_FLOAT32_C( 942.67), SIMDE_FLOAT32_C( 205.37), SIMDE_FLOAT32_C( -558.52), SIMDE_FLOAT32_C( 450.12), SIMDE_FLOAT32_C( -888.95), SIMDE_FLOAT32_C( 95.41), SIMDE_FLOAT32_C( -731.65) }, SIMDE_FLOAT32_C(-205.409851) }, { { SIMDE_FLOAT32_C( 958.49), SIMDE_FLOAT32_C( 649.60), SIMDE_FLOAT32_C( 758.41), SIMDE_FLOAT32_C( -641.04), SIMDE_FLOAT32_C( 632.75), SIMDE_FLOAT32_C( -642.73), SIMDE_FLOAT32_C( -156.06), SIMDE_FLOAT32_C( 828.00) }, SIMDE_FLOAT32_C(958.494385) }, { { SIMDE_FLOAT32_C( -172.13), SIMDE_FLOAT32_C( 270.97), SIMDE_FLOAT32_C( -877.82), SIMDE_FLOAT32_C( -648.51), SIMDE_FLOAT32_C( 726.04), SIMDE_FLOAT32_C( 208.53), SIMDE_FLOAT32_C( 427.83), SIMDE_FLOAT32_C( -794.51) }, SIMDE_FLOAT32_C(-172.134399) }, { { SIMDE_FLOAT32_C( -956.50), SIMDE_FLOAT32_C( 319.22), SIMDE_FLOAT32_C( 558.68), SIMDE_FLOAT32_C( 255.58), SIMDE_FLOAT32_C( 811.93), SIMDE_FLOAT32_C( -224.47), SIMDE_FLOAT32_C( 764.95), SIMDE_FLOAT32_C( -393.48) }, SIMDE_FLOAT32_C(-956.495544) }, { { SIMDE_FLOAT32_C( -281.80), SIMDE_FLOAT32_C( -29.68), SIMDE_FLOAT32_C( 48.00), SIMDE_FLOAT32_C( -831.68), SIMDE_FLOAT32_C( 81.37), SIMDE_FLOAT32_C( -856.59), SIMDE_FLOAT32_C( -563.33), SIMDE_FLOAT32_C( 39.86) }, SIMDE_FLOAT32_C(-281.800598) }, { { SIMDE_FLOAT32_C( 793.01), SIMDE_FLOAT32_C( -804.92), SIMDE_FLOAT32_C( 398.82), SIMDE_FLOAT32_C( 425.76), SIMDE_FLOAT32_C( -447.64), SIMDE_FLOAT32_C( -757.23), SIMDE_FLOAT32_C( 253.75), SIMDE_FLOAT32_C( 380.22) }, SIMDE_FLOAT32_C(793.009399) }, { { SIMDE_FLOAT32_C( 513.74), SIMDE_FLOAT32_C( 375.93), SIMDE_FLOAT32_C( 731.71), SIMDE_FLOAT32_C( 239.78), SIMDE_FLOAT32_C( -415.54), SIMDE_FLOAT32_C( 159.54), SIMDE_FLOAT32_C( 445.27), SIMDE_FLOAT32_C( -372.04) }, SIMDE_FLOAT32_C(513.740845) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde_float32 r = simde_mm256_cvtss_f32(a); simde_assert_equal_f32(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_cvttpd_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m128i r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -175.82), SIMDE_FLOAT64_C( -91.19), SIMDE_FLOAT64_C( -855.64), SIMDE_FLOAT64_C(-1000.00)), simde_mm_set_epi32(INT32_C(-175), INT32_C( -91), INT32_C(-855), INT32_C(-1000)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 296.54), SIMDE_FLOAT64_C( 312.22), SIMDE_FLOAT64_C( -648.31), SIMDE_FLOAT64_C( 586.65)), simde_mm_set_epi32(INT32_C( 296), INT32_C( 312), INT32_C(-648), INT32_C( 586)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 502.08), SIMDE_FLOAT64_C( -904.00), SIMDE_FLOAT64_C( 802.10), SIMDE_FLOAT64_C( 616.09)), simde_mm_set_epi32(INT32_C( 502), INT32_C(-904), INT32_C( 802), INT32_C( 616)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 808.28), SIMDE_FLOAT64_C( -212.29), SIMDE_FLOAT64_C( 75.93), SIMDE_FLOAT64_C( -979.81)), simde_mm_set_epi32(INT32_C( 808), INT32_C(-212), INT32_C( 75), INT32_C(-979)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -426.54), SIMDE_FLOAT64_C( 577.52), SIMDE_FLOAT64_C( 966.87), SIMDE_FLOAT64_C( 162.81)), simde_mm_set_epi32(INT32_C(-426), INT32_C( 577), INT32_C( 966), INT32_C( 162)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -50.83), SIMDE_FLOAT64_C( 928.40), SIMDE_FLOAT64_C( 392.36), SIMDE_FLOAT64_C( 469.60)), simde_mm_set_epi32(INT32_C( -50), INT32_C( 928), INT32_C( 392), INT32_C( 469)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 701.55), SIMDE_FLOAT64_C( -334.03), SIMDE_FLOAT64_C( 803.63), SIMDE_FLOAT64_C( -68.22)), simde_mm_set_epi32(INT32_C( 701), INT32_C(-334), INT32_C( 803), INT32_C( -68)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -168.66), SIMDE_FLOAT64_C( -164.87), SIMDE_FLOAT64_C( 824.77), SIMDE_FLOAT64_C( -834.37)), simde_mm_set_epi32(INT32_C(-168), INT32_C(-164), INT32_C( 824), INT32_C(-834)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm256_cvttpd_epi32(test_vec[i].a); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_cvttps_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256i r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( -135.75), SIMDE_FLOAT32_C( 534.39), SIMDE_FLOAT32_C( -81.93), SIMDE_FLOAT32_C( -234.94), SIMDE_FLOAT32_C( -390.94), SIMDE_FLOAT32_C( -625.05), SIMDE_FLOAT32_C( 991.22), SIMDE_FLOAT32_C( 326.76)), simde_mm256_set_epi32(INT32_C(-135), INT32_C( 534), INT32_C( -81), INT32_C(-234), INT32_C(-390), INT32_C(-625), INT32_C( 991), INT32_C( 326)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 864.35), SIMDE_FLOAT32_C( 855.52), SIMDE_FLOAT32_C( -619.15), SIMDE_FLOAT32_C( -985.70), SIMDE_FLOAT32_C( -511.44), SIMDE_FLOAT32_C( 327.81), SIMDE_FLOAT32_C( 88.57), SIMDE_FLOAT32_C( 775.15)), simde_mm256_set_epi32(INT32_C( 864), INT32_C( 855), INT32_C(-619), INT32_C(-985), INT32_C(-511), INT32_C( 327), INT32_C( 88), INT32_C( 775)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 464.19), SIMDE_FLOAT32_C( -184.66), SIMDE_FLOAT32_C( 626.41), SIMDE_FLOAT32_C( -54.43), SIMDE_FLOAT32_C( 810.20), SIMDE_FLOAT32_C( 906.68), SIMDE_FLOAT32_C( -63.04), SIMDE_FLOAT32_C( -182.48)), simde_mm256_set_epi32(INT32_C( 464), INT32_C(-184), INT32_C( 626), INT32_C( -54), INT32_C( 810), INT32_C( 906), INT32_C( -63), INT32_C(-182)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 127.49), SIMDE_FLOAT32_C( -473.91), SIMDE_FLOAT32_C( -171.73), SIMDE_FLOAT32_C( -903.89), SIMDE_FLOAT32_C( 73.85), SIMDE_FLOAT32_C( -545.98), SIMDE_FLOAT32_C( -240.40), SIMDE_FLOAT32_C( 286.08)), simde_mm256_set_epi32(INT32_C( 127), INT32_C(-473), INT32_C(-171), INT32_C(-903), INT32_C( 73), INT32_C(-545), INT32_C(-240), INT32_C( 286)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -184.43), SIMDE_FLOAT32_C( 171.84), SIMDE_FLOAT32_C( -693.45), SIMDE_FLOAT32_C( -961.18), SIMDE_FLOAT32_C( -527.37), SIMDE_FLOAT32_C( 565.38), SIMDE_FLOAT32_C( 865.23), SIMDE_FLOAT32_C( 998.03)), simde_mm256_set_epi32(INT32_C(-184), INT32_C( 171), INT32_C(-693), INT32_C(-961), INT32_C(-527), INT32_C( 565), INT32_C( 865), INT32_C( 998)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 417.68), SIMDE_FLOAT32_C( 795.88), SIMDE_FLOAT32_C( -307.52), SIMDE_FLOAT32_C( 75.71), SIMDE_FLOAT32_C( -179.42), SIMDE_FLOAT32_C( -352.61), SIMDE_FLOAT32_C( -314.52), SIMDE_FLOAT32_C( 250.68)), simde_mm256_set_epi32(INT32_C( 417), INT32_C( 795), INT32_C(-307), INT32_C( 75), INT32_C(-179), INT32_C(-352), INT32_C(-314), INT32_C( 250)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 89.37), SIMDE_FLOAT32_C( -634.21), SIMDE_FLOAT32_C( 115.63), SIMDE_FLOAT32_C( 878.23), SIMDE_FLOAT32_C( -321.46), SIMDE_FLOAT32_C( 524.08), SIMDE_FLOAT32_C( 597.19), SIMDE_FLOAT32_C( 940.58)), simde_mm256_set_epi32(INT32_C( 89), INT32_C(-634), INT32_C( 115), INT32_C( 878), INT32_C(-321), INT32_C( 524), INT32_C( 597), INT32_C( 940)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -643.91), SIMDE_FLOAT32_C( 102.13), SIMDE_FLOAT32_C( 800.40), SIMDE_FLOAT32_C( -587.08), SIMDE_FLOAT32_C( -734.61), SIMDE_FLOAT32_C( 772.26), SIMDE_FLOAT32_C( -256.23), SIMDE_FLOAT32_C( -452.64)), simde_mm256_set_epi32(INT32_C(-643), INT32_C( 102), INT32_C( 800), INT32_C(-587), INT32_C(-734), INT32_C( 772), INT32_C(-256), INT32_C(-452)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_cvttps_epi32(test_vec[i].a); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_div_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 b; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 675.83), SIMDE_FLOAT32_C( 732.26), SIMDE_FLOAT32_C( -4.57), SIMDE_FLOAT32_C( -168.80), SIMDE_FLOAT32_C( -520.00), SIMDE_FLOAT32_C( -692.17), SIMDE_FLOAT32_C( 934.56), SIMDE_FLOAT32_C( 631.79)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -378.09), SIMDE_FLOAT32_C( -656.19), SIMDE_FLOAT32_C( -265.99), SIMDE_FLOAT32_C( -457.08), SIMDE_FLOAT32_C( -481.51), SIMDE_FLOAT32_C( 732.73), SIMDE_FLOAT32_C( 321.36), SIMDE_FLOAT32_C( -269.65)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -1.79), SIMDE_FLOAT32_C( -1.12), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 1.08), SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( 2.91), SIMDE_FLOAT32_C( -2.34)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 118.79), SIMDE_FLOAT32_C( 887.62), SIMDE_FLOAT32_C( 493.85), SIMDE_FLOAT32_C( -554.19), SIMDE_FLOAT32_C( 954.00), SIMDE_FLOAT32_C( -438.20), SIMDE_FLOAT32_C( 457.40), SIMDE_FLOAT32_C( -597.80)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 136.05), SIMDE_FLOAT32_C( -648.50), SIMDE_FLOAT32_C( 975.99), SIMDE_FLOAT32_C( 125.14), SIMDE_FLOAT32_C( 391.49), SIMDE_FLOAT32_C( -989.28), SIMDE_FLOAT32_C( -980.53), SIMDE_FLOAT32_C( 107.28)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( -1.37), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( -4.43), SIMDE_FLOAT32_C( 2.44), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( -0.47), SIMDE_FLOAT32_C( -5.57)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 788.25), SIMDE_FLOAT32_C( -786.20), SIMDE_FLOAT32_C( -386.26), SIMDE_FLOAT32_C( -761.33), SIMDE_FLOAT32_C( 307.75), SIMDE_FLOAT32_C( 863.78), SIMDE_FLOAT32_C( 634.25), SIMDE_FLOAT32_C( 687.96)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 593.79), SIMDE_FLOAT32_C( -856.02), SIMDE_FLOAT32_C( 504.54), SIMDE_FLOAT32_C( 553.51), SIMDE_FLOAT32_C( 287.73), SIMDE_FLOAT32_C( -351.53), SIMDE_FLOAT32_C( -572.54), SIMDE_FLOAT32_C( 264.37)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 1.33), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -1.38), SIMDE_FLOAT32_C( 1.07), SIMDE_FLOAT32_C( -2.46), SIMDE_FLOAT32_C( -1.11), SIMDE_FLOAT32_C( 2.60)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 139.28), SIMDE_FLOAT32_C( 906.17), SIMDE_FLOAT32_C( 486.23), SIMDE_FLOAT32_C( 556.78), SIMDE_FLOAT32_C( -178.50), SIMDE_FLOAT32_C( -222.99), SIMDE_FLOAT32_C( 642.44), SIMDE_FLOAT32_C( 839.86)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -272.29), SIMDE_FLOAT32_C( -752.32), SIMDE_FLOAT32_C( -335.75), SIMDE_FLOAT32_C( -704.13), SIMDE_FLOAT32_C( 526.15), SIMDE_FLOAT32_C( -407.90), SIMDE_FLOAT32_C( -13.13), SIMDE_FLOAT32_C( -893.18)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.51), SIMDE_FLOAT32_C( -1.20), SIMDE_FLOAT32_C( -1.45), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( -48.93), SIMDE_FLOAT32_C( -0.94)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -332.01), SIMDE_FLOAT32_C( -279.46), SIMDE_FLOAT32_C( 360.69), SIMDE_FLOAT32_C( -121.43), SIMDE_FLOAT32_C( 819.79), SIMDE_FLOAT32_C( 512.44), SIMDE_FLOAT32_C( -185.75), SIMDE_FLOAT32_C( 503.23)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 621.10), SIMDE_FLOAT32_C( 514.79), SIMDE_FLOAT32_C( -672.95), SIMDE_FLOAT32_C( -62.01), SIMDE_FLOAT32_C( -155.11), SIMDE_FLOAT32_C( 518.20), SIMDE_FLOAT32_C( -704.10), SIMDE_FLOAT32_C( -249.95)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( 1.96), SIMDE_FLOAT32_C( -5.29), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -2.01)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -784.88), SIMDE_FLOAT32_C( -855.22), SIMDE_FLOAT32_C( -347.04), SIMDE_FLOAT32_C( -241.02), SIMDE_FLOAT32_C( 748.57), SIMDE_FLOAT32_C( -179.02), SIMDE_FLOAT32_C( -995.77), SIMDE_FLOAT32_C( -927.09)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 868.43), SIMDE_FLOAT32_C( 9.90), SIMDE_FLOAT32_C( -308.85), SIMDE_FLOAT32_C( -944.06), SIMDE_FLOAT32_C( -323.62), SIMDE_FLOAT32_C( 739.02), SIMDE_FLOAT32_C( -61.38), SIMDE_FLOAT32_C( 426.14)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.90), SIMDE_FLOAT32_C( -86.39), SIMDE_FLOAT32_C( 1.12), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -2.31), SIMDE_FLOAT32_C( -0.24), SIMDE_FLOAT32_C( 16.22), SIMDE_FLOAT32_C( -2.18)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 307.07), SIMDE_FLOAT32_C( 591.05), SIMDE_FLOAT32_C( -630.36), SIMDE_FLOAT32_C( 826.28), SIMDE_FLOAT32_C( -436.93), SIMDE_FLOAT32_C( 982.53), SIMDE_FLOAT32_C( -808.08), SIMDE_FLOAT32_C( 630.31)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -339.08), SIMDE_FLOAT32_C( -235.71), SIMDE_FLOAT32_C( -196.92), SIMDE_FLOAT32_C( 27.62), SIMDE_FLOAT32_C( -443.69), SIMDE_FLOAT32_C( 242.35), SIMDE_FLOAT32_C( 774.01), SIMDE_FLOAT32_C( 833.91)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.91), SIMDE_FLOAT32_C( -2.51), SIMDE_FLOAT32_C( 3.20), SIMDE_FLOAT32_C( 29.92), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 4.05), SIMDE_FLOAT32_C( -1.04), SIMDE_FLOAT32_C( 0.76)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -830.19), SIMDE_FLOAT32_C( -620.56), SIMDE_FLOAT32_C( -306.38), SIMDE_FLOAT32_C( -602.04), SIMDE_FLOAT32_C( 183.46), SIMDE_FLOAT32_C( 824.79), SIMDE_FLOAT32_C( -492.06), SIMDE_FLOAT32_C( -609.65)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -335.15), SIMDE_FLOAT32_C( -390.73), SIMDE_FLOAT32_C( 951.40), SIMDE_FLOAT32_C( 398.19), SIMDE_FLOAT32_C( 181.71), SIMDE_FLOAT32_C( -932.03), SIMDE_FLOAT32_C( 887.77), SIMDE_FLOAT32_C( 257.75)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 2.48), SIMDE_FLOAT32_C( 1.59), SIMDE_FLOAT32_C( -0.32), SIMDE_FLOAT32_C( -1.51), SIMDE_FLOAT32_C( 1.01), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( -2.37)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_div_ps(test_vec[i].a, test_vec[i].b); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_div_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d b; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 42.76), SIMDE_FLOAT64_C( 925.42), SIMDE_FLOAT64_C( 624.80), SIMDE_FLOAT64_C( 413.87)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -621.50), SIMDE_FLOAT64_C( -651.30), SIMDE_FLOAT64_C( -233.59), SIMDE_FLOAT64_C( -713.35)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.07), SIMDE_FLOAT64_C( -1.42), SIMDE_FLOAT64_C( -2.67), SIMDE_FLOAT64_C( -0.58)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 275.93), SIMDE_FLOAT64_C( 360.88), SIMDE_FLOAT64_C( -7.47), SIMDE_FLOAT64_C( -347.34)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 479.86), SIMDE_FLOAT64_C( 205.26), SIMDE_FLOAT64_C( 174.68), SIMDE_FLOAT64_C( 363.12)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.58), SIMDE_FLOAT64_C( 1.76), SIMDE_FLOAT64_C( -0.04), SIMDE_FLOAT64_C( -0.96)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 977.82), SIMDE_FLOAT64_C( 875.25), SIMDE_FLOAT64_C( 775.86), SIMDE_FLOAT64_C( 314.76)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -748.72), SIMDE_FLOAT64_C( 258.24), SIMDE_FLOAT64_C( -578.49), SIMDE_FLOAT64_C( -708.35)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -1.31), SIMDE_FLOAT64_C( 3.39), SIMDE_FLOAT64_C( -1.34), SIMDE_FLOAT64_C( -0.44)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 617.56), SIMDE_FLOAT64_C( -254.21), SIMDE_FLOAT64_C( -890.06), SIMDE_FLOAT64_C( -996.38)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -247.08), SIMDE_FLOAT64_C( 661.94), SIMDE_FLOAT64_C( -120.93), SIMDE_FLOAT64_C( -574.61)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -2.50), SIMDE_FLOAT64_C( -0.38), SIMDE_FLOAT64_C( 7.36), SIMDE_FLOAT64_C( 1.73)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -932.29), SIMDE_FLOAT64_C( -263.62), SIMDE_FLOAT64_C( -571.69), SIMDE_FLOAT64_C( -83.26)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 395.02), SIMDE_FLOAT64_C( 290.85), SIMDE_FLOAT64_C( -853.00), SIMDE_FLOAT64_C( 928.61)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -2.36), SIMDE_FLOAT64_C( -0.91), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( -0.09)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -109.60), SIMDE_FLOAT64_C( -812.14), SIMDE_FLOAT64_C( -474.36), SIMDE_FLOAT64_C( -732.62)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -982.62), SIMDE_FLOAT64_C( -995.34), SIMDE_FLOAT64_C( -51.94), SIMDE_FLOAT64_C( 973.17)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( 0.82), SIMDE_FLOAT64_C( 9.13), SIMDE_FLOAT64_C( -0.75)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -826.31), SIMDE_FLOAT64_C( -333.99), SIMDE_FLOAT64_C( -238.49), SIMDE_FLOAT64_C( -706.13)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 929.44), SIMDE_FLOAT64_C( 493.44), SIMDE_FLOAT64_C( -539.23), SIMDE_FLOAT64_C( -683.88)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.89), SIMDE_FLOAT64_C( -0.68), SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( 1.03)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 419.98), SIMDE_FLOAT64_C( -51.88), SIMDE_FLOAT64_C( -580.15), SIMDE_FLOAT64_C( -198.88)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 246.03), SIMDE_FLOAT64_C( -149.94), SIMDE_FLOAT64_C( -107.67), SIMDE_FLOAT64_C( 875.62)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 1.71), SIMDE_FLOAT64_C( 0.35), SIMDE_FLOAT64_C( 5.39), SIMDE_FLOAT64_C( -0.23)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_div_pd(test_vec[i].a, test_vec[i].b); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_floor_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 520.72), SIMDE_FLOAT32_C( 834.16), SIMDE_FLOAT32_C( -945.36), SIMDE_FLOAT32_C( -135.41), SIMDE_FLOAT32_C( 289.19), SIMDE_FLOAT32_C( 462.54), SIMDE_FLOAT32_C( -937.67), SIMDE_FLOAT32_C( 706.09)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 520.00), SIMDE_FLOAT32_C( 834.00), SIMDE_FLOAT32_C( -946.00), SIMDE_FLOAT32_C( -136.00), SIMDE_FLOAT32_C( 289.00), SIMDE_FLOAT32_C( 462.00), SIMDE_FLOAT32_C( -938.00), SIMDE_FLOAT32_C( 706.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -9.00), SIMDE_FLOAT32_C( 401.24), SIMDE_FLOAT32_C( 899.70), SIMDE_FLOAT32_C( -258.03), SIMDE_FLOAT32_C( -634.92), SIMDE_FLOAT32_C( -438.26), SIMDE_FLOAT32_C( 433.94), SIMDE_FLOAT32_C( -170.51)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -9.00), SIMDE_FLOAT32_C( 401.00), SIMDE_FLOAT32_C( 899.00), SIMDE_FLOAT32_C( -259.00), SIMDE_FLOAT32_C( -635.00), SIMDE_FLOAT32_C( -439.00), SIMDE_FLOAT32_C( 433.00), SIMDE_FLOAT32_C( -171.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -908.95), SIMDE_FLOAT32_C( -376.33), SIMDE_FLOAT32_C( -639.88), SIMDE_FLOAT32_C( 40.45), SIMDE_FLOAT32_C( -431.46), SIMDE_FLOAT32_C( -404.49), SIMDE_FLOAT32_C( -411.60), SIMDE_FLOAT32_C( 531.65)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -909.00), SIMDE_FLOAT32_C( -377.00), SIMDE_FLOAT32_C( -640.00), SIMDE_FLOAT32_C( 40.00), SIMDE_FLOAT32_C( -432.00), SIMDE_FLOAT32_C( -405.00), SIMDE_FLOAT32_C( -412.00), SIMDE_FLOAT32_C( 531.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -394.79), SIMDE_FLOAT32_C( -654.48), SIMDE_FLOAT32_C( 223.95), SIMDE_FLOAT32_C( -557.45), SIMDE_FLOAT32_C( 908.61), SIMDE_FLOAT32_C( -493.34), SIMDE_FLOAT32_C( 466.68), SIMDE_FLOAT32_C( -301.36)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -395.00), SIMDE_FLOAT32_C( -655.00), SIMDE_FLOAT32_C( 223.00), SIMDE_FLOAT32_C( -558.00), SIMDE_FLOAT32_C( 908.00), SIMDE_FLOAT32_C( -494.00), SIMDE_FLOAT32_C( 466.00), SIMDE_FLOAT32_C( -302.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -5.40), SIMDE_FLOAT32_C( 828.84), SIMDE_FLOAT32_C( 468.99), SIMDE_FLOAT32_C( 665.66), SIMDE_FLOAT32_C( -648.14), SIMDE_FLOAT32_C( -841.90), SIMDE_FLOAT32_C( -380.33), SIMDE_FLOAT32_C( 740.32)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -6.00), SIMDE_FLOAT32_C( 828.00), SIMDE_FLOAT32_C( 468.00), SIMDE_FLOAT32_C( 665.00), SIMDE_FLOAT32_C( -649.00), SIMDE_FLOAT32_C( -842.00), SIMDE_FLOAT32_C( -381.00), SIMDE_FLOAT32_C( 740.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -752.16), SIMDE_FLOAT32_C( -655.98), SIMDE_FLOAT32_C( 902.13), SIMDE_FLOAT32_C( 972.30), SIMDE_FLOAT32_C( -497.57), SIMDE_FLOAT32_C( -530.16), SIMDE_FLOAT32_C( -966.55), SIMDE_FLOAT32_C( 570.95)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -753.00), SIMDE_FLOAT32_C( -656.00), SIMDE_FLOAT32_C( 902.00), SIMDE_FLOAT32_C( 972.00), SIMDE_FLOAT32_C( -498.00), SIMDE_FLOAT32_C( -531.00), SIMDE_FLOAT32_C( -967.00), SIMDE_FLOAT32_C( 570.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 723.18), SIMDE_FLOAT32_C( 202.51), SIMDE_FLOAT32_C( -41.39), SIMDE_FLOAT32_C( -372.98), SIMDE_FLOAT32_C( 697.91), SIMDE_FLOAT32_C( -303.11), SIMDE_FLOAT32_C( -180.07), SIMDE_FLOAT32_C( 941.44)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 723.00), SIMDE_FLOAT32_C( 202.00), SIMDE_FLOAT32_C( -42.00), SIMDE_FLOAT32_C( -373.00), SIMDE_FLOAT32_C( 697.00), SIMDE_FLOAT32_C( -304.00), SIMDE_FLOAT32_C( -181.00), SIMDE_FLOAT32_C( 941.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 995.24), SIMDE_FLOAT32_C( 68.12), SIMDE_FLOAT32_C( 284.41), SIMDE_FLOAT32_C( 723.96), SIMDE_FLOAT32_C( -373.95), SIMDE_FLOAT32_C( 15.43), SIMDE_FLOAT32_C( -498.85), SIMDE_FLOAT32_C( 581.12)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 995.00), SIMDE_FLOAT32_C( 68.00), SIMDE_FLOAT32_C( 284.00), SIMDE_FLOAT32_C( 723.00), SIMDE_FLOAT32_C( -374.00), SIMDE_FLOAT32_C( 15.00), SIMDE_FLOAT32_C( -499.00), SIMDE_FLOAT32_C( 581.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_floor_ps(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_extractf128_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d b; simde__m128d ra; simde__m128d rb; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 115.05), SIMDE_FLOAT64_C( 580.50), SIMDE_FLOAT64_C( 784.61), SIMDE_FLOAT64_C( 6.02)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -748.60), SIMDE_FLOAT64_C( 328.25), SIMDE_FLOAT64_C( -515.20), SIMDE_FLOAT64_C( 761.63)), simde_mm_set_pd(SIMDE_FLOAT64_C( 784.61), SIMDE_FLOAT64_C( 6.02)), simde_mm_set_pd(SIMDE_FLOAT64_C( -748.60), SIMDE_FLOAT64_C( 328.25)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -201.97), SIMDE_FLOAT64_C( -32.82), SIMDE_FLOAT64_C( 698.56), SIMDE_FLOAT64_C( -504.23)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -706.22), SIMDE_FLOAT64_C( 296.17), SIMDE_FLOAT64_C( 289.51), SIMDE_FLOAT64_C( -515.71)), simde_mm_set_pd(SIMDE_FLOAT64_C( 698.56), SIMDE_FLOAT64_C( -504.23)), simde_mm_set_pd(SIMDE_FLOAT64_C( -706.22), SIMDE_FLOAT64_C( 296.17)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 708.15), SIMDE_FLOAT64_C( -171.50), SIMDE_FLOAT64_C( 534.26), SIMDE_FLOAT64_C( -815.83)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -29.20), SIMDE_FLOAT64_C( -861.78), SIMDE_FLOAT64_C( -7.26), SIMDE_FLOAT64_C( 861.75)), simde_mm_set_pd(SIMDE_FLOAT64_C( 534.26), SIMDE_FLOAT64_C( -815.83)), simde_mm_set_pd(SIMDE_FLOAT64_C( -29.20), SIMDE_FLOAT64_C( -861.78)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 186.47), SIMDE_FLOAT64_C( 690.51), SIMDE_FLOAT64_C( -956.51), SIMDE_FLOAT64_C( 679.80)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 790.70), SIMDE_FLOAT64_C( 327.68), SIMDE_FLOAT64_C( -42.45), SIMDE_FLOAT64_C( 443.64)), simde_mm_set_pd(SIMDE_FLOAT64_C( -956.51), SIMDE_FLOAT64_C( 679.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( 790.70), SIMDE_FLOAT64_C( 327.68)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -320.95), SIMDE_FLOAT64_C( 190.95), SIMDE_FLOAT64_C( -667.22), SIMDE_FLOAT64_C( -985.92)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -263.54), SIMDE_FLOAT64_C( 946.34), SIMDE_FLOAT64_C( 395.23), SIMDE_FLOAT64_C( 318.77)), simde_mm_set_pd(SIMDE_FLOAT64_C( -667.22), SIMDE_FLOAT64_C( -985.92)), simde_mm_set_pd(SIMDE_FLOAT64_C( -263.54), SIMDE_FLOAT64_C( 946.34)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 258.92), SIMDE_FLOAT64_C( -434.64), SIMDE_FLOAT64_C( 431.03), SIMDE_FLOAT64_C( -543.52)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 13.27), SIMDE_FLOAT64_C( -706.44), SIMDE_FLOAT64_C( 14.64), SIMDE_FLOAT64_C( -663.76)), simde_mm_set_pd(SIMDE_FLOAT64_C( 431.03), SIMDE_FLOAT64_C( -543.52)), simde_mm_set_pd(SIMDE_FLOAT64_C( 13.27), SIMDE_FLOAT64_C( -706.44)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -867.30), SIMDE_FLOAT64_C( 693.24), SIMDE_FLOAT64_C( -963.86), SIMDE_FLOAT64_C( 73.79)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -726.99), SIMDE_FLOAT64_C( 691.78), SIMDE_FLOAT64_C( 411.83), SIMDE_FLOAT64_C( 204.72)), simde_mm_set_pd(SIMDE_FLOAT64_C( -963.86), SIMDE_FLOAT64_C( 73.79)), simde_mm_set_pd(SIMDE_FLOAT64_C( -726.99), SIMDE_FLOAT64_C( 691.78)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -771.52), SIMDE_FLOAT64_C( -673.29), SIMDE_FLOAT64_C( -291.52), SIMDE_FLOAT64_C( -321.79)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 979.05), SIMDE_FLOAT64_C( 161.61), SIMDE_FLOAT64_C( 682.54), SIMDE_FLOAT64_C( 63.94)), simde_mm_set_pd(SIMDE_FLOAT64_C( -291.52), SIMDE_FLOAT64_C( -321.79)), simde_mm_set_pd(SIMDE_FLOAT64_C( 979.05), SIMDE_FLOAT64_C( 161.61)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d ra = simde_mm256_extractf128_pd(test_vec[i].a, 0); simde__m128d rb = simde_mm256_extractf128_pd(test_vec[i].b, 1); simde_assert_m128d_equal(ra, test_vec[i].ra); simde_assert_m128d_equal(rb, test_vec[i].rb); } return 0; } static int test_simde_mm256_extractf128_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 b; simde__m128 ra; simde__m128 rb; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( -982.78), SIMDE_FLOAT32_C( 936.88), SIMDE_FLOAT32_C( 412.85), SIMDE_FLOAT32_C( -941.25), SIMDE_FLOAT32_C( 131.34), SIMDE_FLOAT32_C( 565.12), SIMDE_FLOAT32_C( -716.42), SIMDE_FLOAT32_C( -825.93)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 850.34), SIMDE_FLOAT32_C( -992.50), SIMDE_FLOAT32_C( 927.06), SIMDE_FLOAT32_C( -523.74), SIMDE_FLOAT32_C( -670.83), SIMDE_FLOAT32_C( 805.14), SIMDE_FLOAT32_C( -177.24), SIMDE_FLOAT32_C( -739.27)), simde_mm_set_ps(SIMDE_FLOAT32_C( 131.34), SIMDE_FLOAT32_C( 565.12), SIMDE_FLOAT32_C( -716.42), SIMDE_FLOAT32_C( -825.93)), simde_mm_set_ps(SIMDE_FLOAT32_C( -982.78), SIMDE_FLOAT32_C( 936.88), SIMDE_FLOAT32_C( 412.85), SIMDE_FLOAT32_C( -941.25)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -70.97), SIMDE_FLOAT32_C( -281.18), SIMDE_FLOAT32_C( 775.52), SIMDE_FLOAT32_C( -398.03), SIMDE_FLOAT32_C( 484.85), SIMDE_FLOAT32_C( -518.53), SIMDE_FLOAT32_C( -204.80), SIMDE_FLOAT32_C( -550.46)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -638.16), SIMDE_FLOAT32_C( -1.38), SIMDE_FLOAT32_C( -998.42), SIMDE_FLOAT32_C( 63.00), SIMDE_FLOAT32_C( 880.69), SIMDE_FLOAT32_C( 119.17), SIMDE_FLOAT32_C( 35.15), SIMDE_FLOAT32_C( -586.49)), simde_mm_set_ps(SIMDE_FLOAT32_C( 484.85), SIMDE_FLOAT32_C( -518.53), SIMDE_FLOAT32_C( -204.80), SIMDE_FLOAT32_C( -550.46)), simde_mm_set_ps(SIMDE_FLOAT32_C( -70.97), SIMDE_FLOAT32_C( -281.18), SIMDE_FLOAT32_C( 775.52), SIMDE_FLOAT32_C( -398.03)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 899.55), SIMDE_FLOAT32_C( 996.09), SIMDE_FLOAT32_C( -135.99), SIMDE_FLOAT32_C( 141.51), SIMDE_FLOAT32_C( -55.54), SIMDE_FLOAT32_C( 357.72), SIMDE_FLOAT32_C( -706.21), SIMDE_FLOAT32_C( 310.52)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 434.09), SIMDE_FLOAT32_C( 834.83), SIMDE_FLOAT32_C( -713.54), SIMDE_FLOAT32_C( -262.36), SIMDE_FLOAT32_C( 332.23), SIMDE_FLOAT32_C( 176.07), SIMDE_FLOAT32_C( -753.38), SIMDE_FLOAT32_C( -978.05)), simde_mm_set_ps(SIMDE_FLOAT32_C( -55.54), SIMDE_FLOAT32_C( 357.72), SIMDE_FLOAT32_C( -706.21), SIMDE_FLOAT32_C( 310.52)), simde_mm_set_ps(SIMDE_FLOAT32_C( 899.55), SIMDE_FLOAT32_C( 996.09), SIMDE_FLOAT32_C( -135.99), SIMDE_FLOAT32_C( 141.51)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 683.50), SIMDE_FLOAT32_C( -416.59), SIMDE_FLOAT32_C( 629.11), SIMDE_FLOAT32_C( 891.79), SIMDE_FLOAT32_C( -173.40), SIMDE_FLOAT32_C( -666.21), SIMDE_FLOAT32_C( -628.67), SIMDE_FLOAT32_C( 605.77)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -779.97), SIMDE_FLOAT32_C( 302.06), SIMDE_FLOAT32_C( 734.68), SIMDE_FLOAT32_C( -886.16), SIMDE_FLOAT32_C( 707.40), SIMDE_FLOAT32_C( 833.35), SIMDE_FLOAT32_C( 963.40), SIMDE_FLOAT32_C( 412.83)), simde_mm_set_ps(SIMDE_FLOAT32_C( -173.40), SIMDE_FLOAT32_C( -666.21), SIMDE_FLOAT32_C( -628.67), SIMDE_FLOAT32_C( 605.77)), simde_mm_set_ps(SIMDE_FLOAT32_C( 683.50), SIMDE_FLOAT32_C( -416.59), SIMDE_FLOAT32_C( 629.11), SIMDE_FLOAT32_C( 891.79)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -802.04), SIMDE_FLOAT32_C( 699.58), SIMDE_FLOAT32_C( 450.94), SIMDE_FLOAT32_C( -386.39), SIMDE_FLOAT32_C( 494.04), SIMDE_FLOAT32_C( 940.54), SIMDE_FLOAT32_C( -934.60), SIMDE_FLOAT32_C( -970.12)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 648.13), SIMDE_FLOAT32_C( 946.05), SIMDE_FLOAT32_C( 808.22), SIMDE_FLOAT32_C( 517.90), SIMDE_FLOAT32_C( -691.53), SIMDE_FLOAT32_C( 525.44), SIMDE_FLOAT32_C( -474.29), SIMDE_FLOAT32_C( -454.31)), simde_mm_set_ps(SIMDE_FLOAT32_C( 494.04), SIMDE_FLOAT32_C( 940.54), SIMDE_FLOAT32_C( -934.60), SIMDE_FLOAT32_C( -970.12)), simde_mm_set_ps(SIMDE_FLOAT32_C( -802.04), SIMDE_FLOAT32_C( 699.58), SIMDE_FLOAT32_C( 450.94), SIMDE_FLOAT32_C( -386.39)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -543.05), SIMDE_FLOAT32_C( -31.72), SIMDE_FLOAT32_C( -407.93), SIMDE_FLOAT32_C( 926.97), SIMDE_FLOAT32_C( 179.76), SIMDE_FLOAT32_C( 712.03), SIMDE_FLOAT32_C( 463.85), SIMDE_FLOAT32_C( -838.23)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 673.45), SIMDE_FLOAT32_C( 368.84), SIMDE_FLOAT32_C( 678.61), SIMDE_FLOAT32_C( 857.34), SIMDE_FLOAT32_C( -482.39), SIMDE_FLOAT32_C( -94.31), SIMDE_FLOAT32_C( 471.32), SIMDE_FLOAT32_C( 173.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( 179.76), SIMDE_FLOAT32_C( 712.03), SIMDE_FLOAT32_C( 463.85), SIMDE_FLOAT32_C( -838.23)), simde_mm_set_ps(SIMDE_FLOAT32_C( -543.05), SIMDE_FLOAT32_C( -31.72), SIMDE_FLOAT32_C( -407.93), SIMDE_FLOAT32_C( 926.97)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -929.66), SIMDE_FLOAT32_C( -223.82), SIMDE_FLOAT32_C( 340.48), SIMDE_FLOAT32_C( 717.03), SIMDE_FLOAT32_C( 895.13), SIMDE_FLOAT32_C( 964.64), SIMDE_FLOAT32_C( -654.82), SIMDE_FLOAT32_C( 74.87)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 665.16), SIMDE_FLOAT32_C( 209.26), SIMDE_FLOAT32_C( 133.64), SIMDE_FLOAT32_C( -42.02), SIMDE_FLOAT32_C( -424.16), SIMDE_FLOAT32_C( -122.50), SIMDE_FLOAT32_C( -788.87), SIMDE_FLOAT32_C( -239.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( 895.13), SIMDE_FLOAT32_C( 964.64), SIMDE_FLOAT32_C( -654.82), SIMDE_FLOAT32_C( 74.87)), simde_mm_set_ps(SIMDE_FLOAT32_C( -929.66), SIMDE_FLOAT32_C( -223.82), SIMDE_FLOAT32_C( 340.48), SIMDE_FLOAT32_C( 717.03)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 406.90), SIMDE_FLOAT32_C( -672.60), SIMDE_FLOAT32_C( 803.83), SIMDE_FLOAT32_C( -409.89), SIMDE_FLOAT32_C( 549.46), SIMDE_FLOAT32_C( 773.35), SIMDE_FLOAT32_C( -173.87), SIMDE_FLOAT32_C( 365.19)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 381.05), SIMDE_FLOAT32_C( -236.95), SIMDE_FLOAT32_C( -568.89), SIMDE_FLOAT32_C( 375.92), SIMDE_FLOAT32_C( 259.53), SIMDE_FLOAT32_C( -247.84), SIMDE_FLOAT32_C( 166.06), SIMDE_FLOAT32_C( -963.74)), simde_mm_set_ps(SIMDE_FLOAT32_C( 549.46), SIMDE_FLOAT32_C( 773.35), SIMDE_FLOAT32_C( -173.87), SIMDE_FLOAT32_C( 365.19)), simde_mm_set_ps(SIMDE_FLOAT32_C( 406.90), SIMDE_FLOAT32_C( -672.60), SIMDE_FLOAT32_C( 803.83), SIMDE_FLOAT32_C( -409.89)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 ra = simde_mm256_extractf128_ps(test_vec[i].a, 0); simde__m128 rb = simde_mm256_extractf128_ps(test_vec[i].a, 1); simde_assert_m128_close(ra, test_vec[i].ra, 1); simde_assert_m128_close(rb, test_vec[i].rb, 1); } return 0; } static int test_simde_mm256_extractf128_si256(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m128i ra; simde__m128i rb; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C( 1229394801), INT32_C( 992221618), INT32_C(-1388107406), INT32_C( 780445625), INT32_C( 1795700153), INT32_C( -297324271), INT32_C( 1549329146), INT32_C( -534963225)), simde_mm256_set_epi32(INT32_C( -867719772), INT32_C(-1804212438), INT32_C( 1849818353), INT32_C( 405560893), INT32_C( 1351338386), INT32_C( -886724662), INT32_C( 1197680760), INT32_C( 1003042592)), simde_mm_set_epi32(INT32_C( 1795700153), INT32_C( -297324271), INT32_C( 1549329146), INT32_C( -534963225)), simde_mm_set_epi32(INT32_C( 1229394801), INT32_C( 992221618), INT32_C(-1388107406), INT32_C( 780445625)) }, { simde_mm256_set_epi32(INT32_C( 1839397279), INT32_C( -585358334), INT32_C( 779178160), INT32_C( -362976984), INT32_C(-1015866216), INT32_C( -556342867), INT32_C( -766379029), INT32_C( -130781639)), simde_mm256_set_epi32(INT32_C( 2021215895), INT32_C( -422044488), INT32_C(-1385706777), INT32_C( 22702781), INT32_C( 1076807274), INT32_C(-1923875401), INT32_C( -554846936), INT32_C(-1008226174)), simde_mm_set_epi32(INT32_C(-1015866216), INT32_C( -556342867), INT32_C( -766379029), INT32_C( -130781639)), simde_mm_set_epi32(INT32_C( 1839397279), INT32_C( -585358334), INT32_C( 779178160), INT32_C( -362976984)) }, { simde_mm256_set_epi32(INT32_C( 23865749), INT32_C( -908972624), INT32_C(-1642418179), INT32_C( 111689864), INT32_C( -835154412), INT32_C( -431540196), INT32_C( -161564683), INT32_C( 204589457)), simde_mm256_set_epi32(INT32_C( 2088662618), INT32_C(-1671363325), INT32_C( -997695043), INT32_C( -809764814), INT32_C( 2092581708), INT32_C(-1073689737), INT32_C(-1556963227), INT32_C( -641330488)), simde_mm_set_epi32(INT32_C( -835154412), INT32_C( -431540196), INT32_C( -161564683), INT32_C( 204589457)), simde_mm_set_epi32(INT32_C( 23865749), INT32_C( -908972624), INT32_C(-1642418179), INT32_C( 111689864)) }, { simde_mm256_set_epi32(INT32_C( 1727711569), INT32_C(-1915329589), INT32_C( -979233658), INT32_C( -409203179), INT32_C( 1343207861), INT32_C(-1541174422), INT32_C(-2097250480), INT32_C(-1382492089)), simde_mm256_set_epi32(INT32_C( -42175512), INT32_C(-2146588690), INT32_C(-1902868938), INT32_C( 1919945739), INT32_C( 410749235), INT32_C(-1828962645), INT32_C( 525862553), INT32_C( -282512400)), simde_mm_set_epi32(INT32_C( 1343207861), INT32_C(-1541174422), INT32_C(-2097250480), INT32_C(-1382492089)), simde_mm_set_epi32(INT32_C( 1727711569), INT32_C(-1915329589), INT32_C( -979233658), INT32_C( -409203179)) }, { simde_mm256_set_epi32(INT32_C( 377369527), INT32_C( 1159197718), INT32_C( 288677560), INT32_C( 828517622), INT32_C( 1815109517), INT32_C( 1103735854), INT32_C( 1342116414), INT32_C( 1750949195)), simde_mm256_set_epi32(INT32_C( 1481955155), INT32_C( -119794855), INT32_C(-2109995042), INT32_C( 582656481), INT32_C( 1178951500), INT32_C( 762286037), INT32_C( 628377158), INT32_C( -188026020)), simde_mm_set_epi32(INT32_C( 1815109517), INT32_C( 1103735854), INT32_C( 1342116414), INT32_C( 1750949195)), simde_mm_set_epi32(INT32_C( 377369527), INT32_C( 1159197718), INT32_C( 288677560), INT32_C( 828517622)) }, { simde_mm256_set_epi32(INT32_C(-1996051424), INT32_C( -314294760), INT32_C( -770521150), INT32_C( 508113145), INT32_C( -677093043), INT32_C( -527636644), INT32_C( 1238565466), INT32_C(-1592387355)), simde_mm256_set_epi32(INT32_C(-1510707643), INT32_C( 1988531398), INT32_C(-1182276921), INT32_C( 363503044), INT32_C( 2086268932), INT32_C( -428647595), INT32_C( 1685321543), INT32_C( 1979089365)), simde_mm_set_epi32(INT32_C( -677093043), INT32_C( -527636644), INT32_C( 1238565466), INT32_C(-1592387355)), simde_mm_set_epi32(INT32_C(-1996051424), INT32_C( -314294760), INT32_C( -770521150), INT32_C( 508113145)) }, { simde_mm256_set_epi32(INT32_C( 4593159), INT32_C( 1779671737), INT32_C( -569674634), INT32_C( -184254965), INT32_C( -665786654), INT32_C( 663766301), INT32_C(-1237697897), INT32_C( -260948936)), simde_mm256_set_epi32(INT32_C( -575114102), INT32_C( -399786699), INT32_C(-1468780124), INT32_C( 2032090700), INT32_C( 723386747), INT32_C(-1766232746), INT32_C( 73837413), INT32_C( 496540408)), simde_mm_set_epi32(INT32_C( -665786654), INT32_C( 663766301), INT32_C(-1237697897), INT32_C( -260948936)), simde_mm_set_epi32(INT32_C( 4593159), INT32_C( 1779671737), INT32_C( -569674634), INT32_C( -184254965)) }, { simde_mm256_set_epi32(INT32_C( -328197013), INT32_C( 1036318270), INT32_C(-1930293157), INT32_C( 1948339432), INT32_C( 1903716614), INT32_C(-1951673698), INT32_C(-1858071379), INT32_C( 2070124471)), simde_mm256_set_epi32(INT32_C(-1815372819), INT32_C( -102535612), INT32_C( 115383384), INT32_C( 1004544095), INT32_C( 1506420054), INT32_C(-1014523798), INT32_C(-1776388104), INT32_C( 1550371104)), simde_mm_set_epi32(INT32_C( 1903716614), INT32_C(-1951673698), INT32_C(-1858071379), INT32_C( 2070124471)), simde_mm_set_epi32(INT32_C( -328197013), INT32_C( 1036318270), INT32_C(-1930293157), INT32_C( 1948339432)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i ra = simde_mm256_extractf128_si256(test_vec[i].a, 0); simde__m128i rb = simde_mm256_extractf128_si256(test_vec[i].a, 1); simde_assert_m128i_i32(ra, ==, test_vec[i].ra); simde_assert_m128i_i32(rb, ==, test_vec[i].rb); } return 0; } static int test_simde_mm256_floor_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -86.60), SIMDE_FLOAT64_C( -29.62), SIMDE_FLOAT64_C( 880.65), SIMDE_FLOAT64_C( 474.01)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -87.00), SIMDE_FLOAT64_C( -30.00), SIMDE_FLOAT64_C( 880.00), SIMDE_FLOAT64_C( 474.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 335.44), SIMDE_FLOAT64_C( 87.17), SIMDE_FLOAT64_C( 264.70), SIMDE_FLOAT64_C( 435.92)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 335.00), SIMDE_FLOAT64_C( 87.00), SIMDE_FLOAT64_C( 264.00), SIMDE_FLOAT64_C( 435.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 657.73), SIMDE_FLOAT64_C( -255.01), SIMDE_FLOAT64_C( -236.61), SIMDE_FLOAT64_C( 198.74)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 657.00), SIMDE_FLOAT64_C( -256.00), SIMDE_FLOAT64_C( -237.00), SIMDE_FLOAT64_C( 198.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -459.75), SIMDE_FLOAT64_C( 234.86), SIMDE_FLOAT64_C( -517.66), SIMDE_FLOAT64_C( -561.05)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -460.00), SIMDE_FLOAT64_C( 234.00), SIMDE_FLOAT64_C( -518.00), SIMDE_FLOAT64_C( -562.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -815.90), SIMDE_FLOAT64_C( -973.26), SIMDE_FLOAT64_C( -704.97), SIMDE_FLOAT64_C( 629.57)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -816.00), SIMDE_FLOAT64_C( -974.00), SIMDE_FLOAT64_C( -705.00), SIMDE_FLOAT64_C( 629.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -147.09), SIMDE_FLOAT64_C( -283.85), SIMDE_FLOAT64_C( 91.60), SIMDE_FLOAT64_C( -808.32)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -148.00), SIMDE_FLOAT64_C( -284.00), SIMDE_FLOAT64_C( 91.00), SIMDE_FLOAT64_C( -809.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 346.15), SIMDE_FLOAT64_C( -862.92), SIMDE_FLOAT64_C( -616.19), SIMDE_FLOAT64_C( -434.01)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 346.00), SIMDE_FLOAT64_C( -863.00), SIMDE_FLOAT64_C( -617.00), SIMDE_FLOAT64_C( -435.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -593.06), SIMDE_FLOAT64_C( -286.08), SIMDE_FLOAT64_C( 351.32), SIMDE_FLOAT64_C( -29.06)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -594.00), SIMDE_FLOAT64_C( -287.00), SIMDE_FLOAT64_C( 351.00), SIMDE_FLOAT64_C( -30.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_floor_pd(test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_hadd_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 b; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( -626.68), SIMDE_FLOAT32_C( -596.09), SIMDE_FLOAT32_C( -988.19), SIMDE_FLOAT32_C( 961.65), SIMDE_FLOAT32_C( 518.43), SIMDE_FLOAT32_C( 334.09), SIMDE_FLOAT32_C( 212.95), SIMDE_FLOAT32_C( -488.35)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -361.77), SIMDE_FLOAT32_C( 121.01), SIMDE_FLOAT32_C( -252.45), SIMDE_FLOAT32_C( 920.40), SIMDE_FLOAT32_C( -660.15), SIMDE_FLOAT32_C( -869.23), SIMDE_FLOAT32_C( 372.46), SIMDE_FLOAT32_C( 408.66)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -240.76), SIMDE_FLOAT32_C( 667.95), SIMDE_FLOAT32_C(-1222.77), SIMDE_FLOAT32_C( -26.54), SIMDE_FLOAT32_C(-1529.38), SIMDE_FLOAT32_C( 781.12), SIMDE_FLOAT32_C( 852.52), SIMDE_FLOAT32_C( -275.40)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 781.58), SIMDE_FLOAT32_C( 111.71), SIMDE_FLOAT32_C( -214.03), SIMDE_FLOAT32_C( -280.14), SIMDE_FLOAT32_C( 285.11), SIMDE_FLOAT32_C( -159.71), SIMDE_FLOAT32_C( 737.74), SIMDE_FLOAT32_C( 159.06)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -542.95), SIMDE_FLOAT32_C( 311.99), SIMDE_FLOAT32_C( -695.66), SIMDE_FLOAT32_C( -563.82), SIMDE_FLOAT32_C( -378.07), SIMDE_FLOAT32_C( 160.57), SIMDE_FLOAT32_C( 591.32), SIMDE_FLOAT32_C( -15.88)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -230.96), SIMDE_FLOAT32_C(-1259.48), SIMDE_FLOAT32_C( 893.29), SIMDE_FLOAT32_C( -494.17), SIMDE_FLOAT32_C( -217.50), SIMDE_FLOAT32_C( 575.44), SIMDE_FLOAT32_C( 125.40), SIMDE_FLOAT32_C( 896.80)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -397.87), SIMDE_FLOAT32_C( 582.70), SIMDE_FLOAT32_C( 728.77), SIMDE_FLOAT32_C( 563.76), SIMDE_FLOAT32_C( -874.44), SIMDE_FLOAT32_C( -323.73), SIMDE_FLOAT32_C( 191.14), SIMDE_FLOAT32_C( -425.30)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 398.85), SIMDE_FLOAT32_C( 435.48), SIMDE_FLOAT32_C( -736.85), SIMDE_FLOAT32_C( -251.61), SIMDE_FLOAT32_C( 363.70), SIMDE_FLOAT32_C( -850.74), SIMDE_FLOAT32_C( 513.62), SIMDE_FLOAT32_C( 893.23)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 834.33), SIMDE_FLOAT32_C( -988.46), SIMDE_FLOAT32_C( 184.83), SIMDE_FLOAT32_C( 1292.53), SIMDE_FLOAT32_C( -487.04), SIMDE_FLOAT32_C( 1406.85), SIMDE_FLOAT32_C(-1198.17), SIMDE_FLOAT32_C( -234.16)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -659.61), SIMDE_FLOAT32_C( -996.22), SIMDE_FLOAT32_C( 426.97), SIMDE_FLOAT32_C( 60.91), SIMDE_FLOAT32_C( 175.17), SIMDE_FLOAT32_C( 226.61), SIMDE_FLOAT32_C( 234.99), SIMDE_FLOAT32_C( 755.38)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -152.71), SIMDE_FLOAT32_C( 857.34), SIMDE_FLOAT32_C( 403.84), SIMDE_FLOAT32_C( -862.22), SIMDE_FLOAT32_C( 782.97), SIMDE_FLOAT32_C( 437.87), SIMDE_FLOAT32_C( 825.47), SIMDE_FLOAT32_C( 915.28)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 704.63), SIMDE_FLOAT32_C( -458.38), SIMDE_FLOAT32_C(-1655.83), SIMDE_FLOAT32_C( 487.88), SIMDE_FLOAT32_C( 1220.84), SIMDE_FLOAT32_C( 1740.75), SIMDE_FLOAT32_C( 401.78), SIMDE_FLOAT32_C( 990.37)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -564.13), SIMDE_FLOAT32_C( -973.32), SIMDE_FLOAT32_C( 233.13), SIMDE_FLOAT32_C( 504.79), SIMDE_FLOAT32_C( 857.55), SIMDE_FLOAT32_C( 275.11), SIMDE_FLOAT32_C( 643.61), SIMDE_FLOAT32_C( -70.12)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -709.38), SIMDE_FLOAT32_C( 310.06), SIMDE_FLOAT32_C( -793.34), SIMDE_FLOAT32_C( -947.00), SIMDE_FLOAT32_C( -974.52), SIMDE_FLOAT32_C( 878.25), SIMDE_FLOAT32_C( -856.10), SIMDE_FLOAT32_C( 529.04)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -399.32), SIMDE_FLOAT32_C(-1740.34), SIMDE_FLOAT32_C(-1537.45), SIMDE_FLOAT32_C( 737.92), SIMDE_FLOAT32_C( -96.27), SIMDE_FLOAT32_C( -327.06), SIMDE_FLOAT32_C( 1132.66), SIMDE_FLOAT32_C( 573.49)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 733.99), SIMDE_FLOAT32_C( -737.47), SIMDE_FLOAT32_C( -603.71), SIMDE_FLOAT32_C( 863.52), SIMDE_FLOAT32_C( -639.50), SIMDE_FLOAT32_C( 474.16), SIMDE_FLOAT32_C( 816.39), SIMDE_FLOAT32_C( 75.56)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 412.95), SIMDE_FLOAT32_C( 396.26), SIMDE_FLOAT32_C( 519.43), SIMDE_FLOAT32_C( -413.53), SIMDE_FLOAT32_C( -676.69), SIMDE_FLOAT32_C( -335.15), SIMDE_FLOAT32_C( 961.37), SIMDE_FLOAT32_C( 820.10)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 809.21), SIMDE_FLOAT32_C( 105.90), SIMDE_FLOAT32_C( -3.48), SIMDE_FLOAT32_C( 259.81), SIMDE_FLOAT32_C(-1011.84), SIMDE_FLOAT32_C( 1781.47), SIMDE_FLOAT32_C( -165.34), SIMDE_FLOAT32_C( 891.95)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -149.16), SIMDE_FLOAT32_C( -430.29), SIMDE_FLOAT32_C( -817.18), SIMDE_FLOAT32_C( 272.68), SIMDE_FLOAT32_C( -899.55), SIMDE_FLOAT32_C( -654.95), SIMDE_FLOAT32_C( 148.93), SIMDE_FLOAT32_C( 957.05)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -940.60), SIMDE_FLOAT32_C( 528.45), SIMDE_FLOAT32_C( 574.67), SIMDE_FLOAT32_C( 993.90), SIMDE_FLOAT32_C( -532.80), SIMDE_FLOAT32_C( -214.29), SIMDE_FLOAT32_C( -506.86), SIMDE_FLOAT32_C( 389.73)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -412.15), SIMDE_FLOAT32_C( 1568.57), SIMDE_FLOAT32_C( -579.45), SIMDE_FLOAT32_C( -544.50), SIMDE_FLOAT32_C( -747.09), SIMDE_FLOAT32_C( -117.13), SIMDE_FLOAT32_C(-1554.50), SIMDE_FLOAT32_C( 1105.98)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -426.98), SIMDE_FLOAT32_C( 653.68), SIMDE_FLOAT32_C( 854.01), SIMDE_FLOAT32_C( -871.75), SIMDE_FLOAT32_C( 60.41), SIMDE_FLOAT32_C( 197.76), SIMDE_FLOAT32_C( -611.16), SIMDE_FLOAT32_C( 848.76)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 243.99), SIMDE_FLOAT32_C( -967.54), SIMDE_FLOAT32_C( 893.59), SIMDE_FLOAT32_C( -630.65), SIMDE_FLOAT32_C( -132.65), SIMDE_FLOAT32_C( -434.90), SIMDE_FLOAT32_C( -516.97), SIMDE_FLOAT32_C( 151.24)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -723.55), SIMDE_FLOAT32_C( 262.94), SIMDE_FLOAT32_C( 226.70), SIMDE_FLOAT32_C( -17.74), SIMDE_FLOAT32_C( -567.55), SIMDE_FLOAT32_C( -365.73), SIMDE_FLOAT32_C( 258.17), SIMDE_FLOAT32_C( 237.60)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_hadd_ps(test_vec[i].a, test_vec[i].b); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_hadd_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d b; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -436.68), SIMDE_FLOAT64_C( 480.99), SIMDE_FLOAT64_C( -278.34), SIMDE_FLOAT64_C( 588.89)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 130.14), SIMDE_FLOAT64_C( -927.67), SIMDE_FLOAT64_C( -646.84), SIMDE_FLOAT64_C( 150.94)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -797.53), SIMDE_FLOAT64_C( 44.31), SIMDE_FLOAT64_C( -495.90), SIMDE_FLOAT64_C( 310.55)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -433.18), SIMDE_FLOAT64_C( 708.48), SIMDE_FLOAT64_C( 534.86), SIMDE_FLOAT64_C( -929.94)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 826.52), SIMDE_FLOAT64_C( 36.50), SIMDE_FLOAT64_C( 561.99), SIMDE_FLOAT64_C( -293.03)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 863.02), SIMDE_FLOAT64_C( 275.30), SIMDE_FLOAT64_C( 268.96), SIMDE_FLOAT64_C( -395.08)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 255.04), SIMDE_FLOAT64_C( -637.84), SIMDE_FLOAT64_C( -513.11), SIMDE_FLOAT64_C( -599.83)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -910.36), SIMDE_FLOAT64_C( -799.15), SIMDE_FLOAT64_C( -982.23), SIMDE_FLOAT64_C( -206.95)), simde_mm256_set_pd(SIMDE_FLOAT64_C(-1709.51), SIMDE_FLOAT64_C( -382.80), SIMDE_FLOAT64_C(-1189.18), SIMDE_FLOAT64_C(-1112.94)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 149.87), SIMDE_FLOAT64_C( 274.56), SIMDE_FLOAT64_C( 400.10), SIMDE_FLOAT64_C( 410.40)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 883.42), SIMDE_FLOAT64_C( 46.03), SIMDE_FLOAT64_C( -600.38), SIMDE_FLOAT64_C( 131.34)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 929.45), SIMDE_FLOAT64_C( 424.43), SIMDE_FLOAT64_C( -469.04), SIMDE_FLOAT64_C( 810.50)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -461.17), SIMDE_FLOAT64_C( -757.61), SIMDE_FLOAT64_C( -114.45), SIMDE_FLOAT64_C( 853.48)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 397.09), SIMDE_FLOAT64_C( -444.66), SIMDE_FLOAT64_C( -909.26), SIMDE_FLOAT64_C( 102.16)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -47.57), SIMDE_FLOAT64_C(-1218.78), SIMDE_FLOAT64_C( -807.10), SIMDE_FLOAT64_C( 739.03)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -703.10), SIMDE_FLOAT64_C( -547.05), SIMDE_FLOAT64_C( -158.62), SIMDE_FLOAT64_C( -256.70)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 137.64), SIMDE_FLOAT64_C( -342.68), SIMDE_FLOAT64_C( 619.75), SIMDE_FLOAT64_C( 498.04)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -205.04), SIMDE_FLOAT64_C(-1250.15), SIMDE_FLOAT64_C( 1117.79), SIMDE_FLOAT64_C( -415.32)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -818.29), SIMDE_FLOAT64_C( 196.32), SIMDE_FLOAT64_C( -434.03), SIMDE_FLOAT64_C( 36.18)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -181.91), SIMDE_FLOAT64_C( 587.12), SIMDE_FLOAT64_C( -318.37), SIMDE_FLOAT64_C( -24.13)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 405.21), SIMDE_FLOAT64_C( -621.97), SIMDE_FLOAT64_C( -342.50), SIMDE_FLOAT64_C( -397.85)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 129.06), SIMDE_FLOAT64_C( -240.83), SIMDE_FLOAT64_C( -486.28), SIMDE_FLOAT64_C( 630.75)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 679.75), SIMDE_FLOAT64_C( -37.94), SIMDE_FLOAT64_C( 761.33), SIMDE_FLOAT64_C( -837.74)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 641.81), SIMDE_FLOAT64_C( -111.77), SIMDE_FLOAT64_C( -76.41), SIMDE_FLOAT64_C( 144.47)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_hadd_pd(test_vec[i].a, test_vec[i].b); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_hsub_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 b; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( -183.85), SIMDE_FLOAT32_C( 905.07), SIMDE_FLOAT32_C( -962.47), SIMDE_FLOAT32_C( 739.25), SIMDE_FLOAT32_C( 13.54), SIMDE_FLOAT32_C( -172.40), SIMDE_FLOAT32_C( 456.21), SIMDE_FLOAT32_C( 164.33)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 959.87), SIMDE_FLOAT32_C( 500.02), SIMDE_FLOAT32_C( -991.36), SIMDE_FLOAT32_C( 373.08), SIMDE_FLOAT32_C( -962.56), SIMDE_FLOAT32_C( -502.91), SIMDE_FLOAT32_C( -108.93), SIMDE_FLOAT32_C( 403.37)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -459.85), SIMDE_FLOAT32_C( 1364.44), SIMDE_FLOAT32_C( 1088.92), SIMDE_FLOAT32_C( 1701.72), SIMDE_FLOAT32_C( 459.65), SIMDE_FLOAT32_C( 512.30), SIMDE_FLOAT32_C( -185.94), SIMDE_FLOAT32_C( -291.88)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 716.55), SIMDE_FLOAT32_C( -798.82), SIMDE_FLOAT32_C( -17.12), SIMDE_FLOAT32_C( 981.07), SIMDE_FLOAT32_C( -241.05), SIMDE_FLOAT32_C( 266.35), SIMDE_FLOAT32_C( 140.17), SIMDE_FLOAT32_C( 285.86)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 835.80), SIMDE_FLOAT32_C( 802.76), SIMDE_FLOAT32_C( -745.28), SIMDE_FLOAT32_C( -228.38), SIMDE_FLOAT32_C( -44.09), SIMDE_FLOAT32_C( 991.15), SIMDE_FLOAT32_C( 461.60), SIMDE_FLOAT32_C( 89.29)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -33.04), SIMDE_FLOAT32_C( 516.90), SIMDE_FLOAT32_C(-1515.37), SIMDE_FLOAT32_C( 998.19), SIMDE_FLOAT32_C( 1035.24), SIMDE_FLOAT32_C( -372.31), SIMDE_FLOAT32_C( 507.40), SIMDE_FLOAT32_C( 145.69)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -323.55), SIMDE_FLOAT32_C( -835.05), SIMDE_FLOAT32_C( -869.80), SIMDE_FLOAT32_C( -771.34), SIMDE_FLOAT32_C( -342.71), SIMDE_FLOAT32_C( 374.92), SIMDE_FLOAT32_C( -998.95), SIMDE_FLOAT32_C( 85.31)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -235.37), SIMDE_FLOAT32_C( -740.75), SIMDE_FLOAT32_C( 568.96), SIMDE_FLOAT32_C( 984.74), SIMDE_FLOAT32_C( 344.48), SIMDE_FLOAT32_C( -384.09), SIMDE_FLOAT32_C( -746.69), SIMDE_FLOAT32_C( 666.35)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -505.38), SIMDE_FLOAT32_C( 415.78), SIMDE_FLOAT32_C( -511.50), SIMDE_FLOAT32_C( 98.46), SIMDE_FLOAT32_C( -728.57), SIMDE_FLOAT32_C( 1413.04), SIMDE_FLOAT32_C( 717.63), SIMDE_FLOAT32_C( 1084.26)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 685.93), SIMDE_FLOAT32_C( 492.23), SIMDE_FLOAT32_C( 668.17), SIMDE_FLOAT32_C( -421.85), SIMDE_FLOAT32_C( -93.75), SIMDE_FLOAT32_C( -819.96), SIMDE_FLOAT32_C( -246.22), SIMDE_FLOAT32_C( -823.51)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 304.04), SIMDE_FLOAT32_C( -843.07), SIMDE_FLOAT32_C( -204.07), SIMDE_FLOAT32_C( -879.53), SIMDE_FLOAT32_C( -83.04), SIMDE_FLOAT32_C( -516.58), SIMDE_FLOAT32_C( 600.96), SIMDE_FLOAT32_C( 84.13)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-1147.11), SIMDE_FLOAT32_C( -675.46), SIMDE_FLOAT32_C( -193.70), SIMDE_FLOAT32_C(-1090.02), SIMDE_FLOAT32_C( -433.54), SIMDE_FLOAT32_C( -516.83), SIMDE_FLOAT32_C( -726.21), SIMDE_FLOAT32_C( -577.29)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 492.05), SIMDE_FLOAT32_C( -121.48), SIMDE_FLOAT32_C( 197.17), SIMDE_FLOAT32_C( 108.83), SIMDE_FLOAT32_C( -910.74), SIMDE_FLOAT32_C( -610.63), SIMDE_FLOAT32_C( -510.98), SIMDE_FLOAT32_C( 996.81)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -210.55), SIMDE_FLOAT32_C( 321.75), SIMDE_FLOAT32_C( -949.93), SIMDE_FLOAT32_C( 547.84), SIMDE_FLOAT32_C( 133.08), SIMDE_FLOAT32_C( -303.40), SIMDE_FLOAT32_C( 47.44), SIMDE_FLOAT32_C( -236.50)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 532.30), SIMDE_FLOAT32_C( 1497.77), SIMDE_FLOAT32_C( -613.53), SIMDE_FLOAT32_C( -88.34), SIMDE_FLOAT32_C( -436.48), SIMDE_FLOAT32_C( -283.94), SIMDE_FLOAT32_C( 300.11), SIMDE_FLOAT32_C( 1507.79)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 204.77), SIMDE_FLOAT32_C( -920.82), SIMDE_FLOAT32_C( -807.55), SIMDE_FLOAT32_C( 138.87), SIMDE_FLOAT32_C( 34.09), SIMDE_FLOAT32_C( -826.98), SIMDE_FLOAT32_C( -567.48), SIMDE_FLOAT32_C( 943.56)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -46.97), SIMDE_FLOAT32_C( 474.05), SIMDE_FLOAT32_C( -64.19), SIMDE_FLOAT32_C( -945.80), SIMDE_FLOAT32_C( -873.08), SIMDE_FLOAT32_C( -569.02), SIMDE_FLOAT32_C( -630.19), SIMDE_FLOAT32_C( -681.68)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 521.02), SIMDE_FLOAT32_C( -881.61), SIMDE_FLOAT32_C(-1125.59), SIMDE_FLOAT32_C( 946.42), SIMDE_FLOAT32_C( 304.06), SIMDE_FLOAT32_C( -51.49), SIMDE_FLOAT32_C( -861.07), SIMDE_FLOAT32_C( 1511.04)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 470.38), SIMDE_FLOAT32_C( -693.79), SIMDE_FLOAT32_C( -843.14), SIMDE_FLOAT32_C( -640.08), SIMDE_FLOAT32_C( 950.30), SIMDE_FLOAT32_C( 582.04), SIMDE_FLOAT32_C( -585.94), SIMDE_FLOAT32_C( 175.69)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 45.69), SIMDE_FLOAT32_C( 38.77), SIMDE_FLOAT32_C( 194.04), SIMDE_FLOAT32_C( 410.12), SIMDE_FLOAT32_C( -28.08), SIMDE_FLOAT32_C( -596.23), SIMDE_FLOAT32_C( -38.68), SIMDE_FLOAT32_C( -731.17)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -6.92), SIMDE_FLOAT32_C( 216.08), SIMDE_FLOAT32_C(-1164.17), SIMDE_FLOAT32_C( 203.06), SIMDE_FLOAT32_C( -568.15), SIMDE_FLOAT32_C( -692.49), SIMDE_FLOAT32_C( -368.26), SIMDE_FLOAT32_C( 761.63)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -171.77), SIMDE_FLOAT32_C( -878.69), SIMDE_FLOAT32_C( -337.47), SIMDE_FLOAT32_C( -864.26), SIMDE_FLOAT32_C( 976.73), SIMDE_FLOAT32_C( 253.08), SIMDE_FLOAT32_C( 134.24), SIMDE_FLOAT32_C( -737.89)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 167.53), SIMDE_FLOAT32_C( -453.33), SIMDE_FLOAT32_C( 11.70), SIMDE_FLOAT32_C( 471.07), SIMDE_FLOAT32_C( -269.64), SIMDE_FLOAT32_C( 547.27), SIMDE_FLOAT32_C( -313.69), SIMDE_FLOAT32_C( -333.24)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -620.86), SIMDE_FLOAT32_C( 459.37), SIMDE_FLOAT32_C( -706.92), SIMDE_FLOAT32_C( -526.79), SIMDE_FLOAT32_C( 816.91), SIMDE_FLOAT32_C( -19.55), SIMDE_FLOAT32_C( -723.65), SIMDE_FLOAT32_C( -872.13)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_hsub_ps(test_vec[i].a, test_vec[i].b); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_hsub_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d b; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -762.69), SIMDE_FLOAT64_C( 237.58), SIMDE_FLOAT64_C( 832.53), SIMDE_FLOAT64_C( -18.37)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 974.95), SIMDE_FLOAT64_C( -69.86), SIMDE_FLOAT64_C( 78.29), SIMDE_FLOAT64_C( -156.35)), simde_mm256_set_pd(SIMDE_FLOAT64_C(-1044.81), SIMDE_FLOAT64_C( 1000.27), SIMDE_FLOAT64_C( -234.64), SIMDE_FLOAT64_C( -850.90)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 220.65), SIMDE_FLOAT64_C( -139.75), SIMDE_FLOAT64_C( -707.34), SIMDE_FLOAT64_C( -798.76)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 397.80), SIMDE_FLOAT64_C( -497.83), SIMDE_FLOAT64_C( 717.31), SIMDE_FLOAT64_C( -807.72)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -895.63), SIMDE_FLOAT64_C( -360.40), SIMDE_FLOAT64_C(-1525.03), SIMDE_FLOAT64_C( -91.42)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 105.63), SIMDE_FLOAT64_C( 306.00), SIMDE_FLOAT64_C( 281.00), SIMDE_FLOAT64_C( 310.89)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -325.57), SIMDE_FLOAT64_C( 534.52), SIMDE_FLOAT64_C( 987.26), SIMDE_FLOAT64_C( 787.06)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 860.09), SIMDE_FLOAT64_C( 200.37), SIMDE_FLOAT64_C( -200.20), SIMDE_FLOAT64_C( 29.89)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 967.71), SIMDE_FLOAT64_C( -641.51), SIMDE_FLOAT64_C( -759.32), SIMDE_FLOAT64_C( -97.44)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 348.76), SIMDE_FLOAT64_C( -255.68), SIMDE_FLOAT64_C( 982.70), SIMDE_FLOAT64_C( 155.49)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -604.44), SIMDE_FLOAT64_C(-1609.22), SIMDE_FLOAT64_C( -827.21), SIMDE_FLOAT64_C( 661.88)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 679.95), SIMDE_FLOAT64_C( -269.56), SIMDE_FLOAT64_C( -481.42), SIMDE_FLOAT64_C( 919.16)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -2.64), SIMDE_FLOAT64_C( 468.75), SIMDE_FLOAT64_C( -36.44), SIMDE_FLOAT64_C( 441.73)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 471.39), SIMDE_FLOAT64_C( -949.51), SIMDE_FLOAT64_C( 478.17), SIMDE_FLOAT64_C( 1400.58)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -496.34), SIMDE_FLOAT64_C( 144.53), SIMDE_FLOAT64_C( -0.78), SIMDE_FLOAT64_C( -49.70)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 551.16), SIMDE_FLOAT64_C( 1.32), SIMDE_FLOAT64_C( -388.16), SIMDE_FLOAT64_C( 219.25)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -549.84), SIMDE_FLOAT64_C( 640.87), SIMDE_FLOAT64_C( 607.41), SIMDE_FLOAT64_C( -48.92)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -33.11), SIMDE_FLOAT64_C( -186.08), SIMDE_FLOAT64_C( 701.92), SIMDE_FLOAT64_C( 14.26)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 92.65), SIMDE_FLOAT64_C( -100.54), SIMDE_FLOAT64_C( -271.34), SIMDE_FLOAT64_C( -61.14)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -193.19), SIMDE_FLOAT64_C( -152.97), SIMDE_FLOAT64_C( 210.20), SIMDE_FLOAT64_C( -687.66)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 116.05), SIMDE_FLOAT64_C( -582.94), SIMDE_FLOAT64_C( -9.93), SIMDE_FLOAT64_C( -395.51)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 109.95), SIMDE_FLOAT64_C( -493.57), SIMDE_FLOAT64_C( 927.71), SIMDE_FLOAT64_C( 40.21)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -603.52), SIMDE_FLOAT64_C( -698.99), SIMDE_FLOAT64_C( -887.50), SIMDE_FLOAT64_C( -385.58)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_hsub_pd(test_vec[i].a, test_vec[i].b); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_dp_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 b; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( -505.73), SIMDE_FLOAT32_C( -137.42), SIMDE_FLOAT32_C( 17.33), SIMDE_FLOAT32_C( 756.92), SIMDE_FLOAT32_C( -935.43), SIMDE_FLOAT32_C( 966.58), SIMDE_FLOAT32_C( -542.20), SIMDE_FLOAT32_C( -986.95)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -727.11), SIMDE_FLOAT32_C( 41.36), SIMDE_FLOAT32_C( -966.84), SIMDE_FLOAT32_C( -80.50), SIMDE_FLOAT32_C( 623.90), SIMDE_FLOAT32_C( -996.55), SIMDE_FLOAT32_C( -173.15), SIMDE_FLOAT32_C( -230.46)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -16755.34), SIMDE_FLOAT32_C( -16755.34), SIMDE_FLOAT32_C( -16755.34), SIMDE_FLOAT32_C( -16755.34), SIMDE_FLOAT32_C( 93881.93), SIMDE_FLOAT32_C( 93881.93), SIMDE_FLOAT32_C( 93881.93), SIMDE_FLOAT32_C( 93881.93)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 203.19), SIMDE_FLOAT32_C( -301.86), SIMDE_FLOAT32_C( -510.29), SIMDE_FLOAT32_C( -548.13), SIMDE_FLOAT32_C( 769.15), SIMDE_FLOAT32_C( 758.71), SIMDE_FLOAT32_C( 788.95), SIMDE_FLOAT32_C( -308.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -93.30), SIMDE_FLOAT32_C( -209.30), SIMDE_FLOAT32_C( 125.32), SIMDE_FLOAT32_C( -995.11), SIMDE_FLOAT32_C( 443.92), SIMDE_FLOAT32_C( 15.16), SIMDE_FLOAT32_C( 480.88), SIMDE_FLOAT32_C( -179.52)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -63949.54), SIMDE_FLOAT32_C( -63949.54), SIMDE_FLOAT32_C( -63949.54), SIMDE_FLOAT32_C( -63949.54), SIMDE_FLOAT32_C( 379390.28), SIMDE_FLOAT32_C( 379390.28), SIMDE_FLOAT32_C( 379390.28), SIMDE_FLOAT32_C( 379390.28)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -642.96), SIMDE_FLOAT32_C( 460.10), SIMDE_FLOAT32_C( 365.68), SIMDE_FLOAT32_C( 149.19), SIMDE_FLOAT32_C( -863.16), SIMDE_FLOAT32_C( 539.13), SIMDE_FLOAT32_C( -10.06), SIMDE_FLOAT32_C( -915.55)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -102.23), SIMDE_FLOAT32_C( 63.93), SIMDE_FLOAT32_C( 220.16), SIMDE_FLOAT32_C( -95.11), SIMDE_FLOAT32_C( 920.74), SIMDE_FLOAT32_C( -798.64), SIMDE_FLOAT32_C( 549.26), SIMDE_FLOAT32_C( 150.46)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 80508.11), SIMDE_FLOAT32_C( 80508.11), SIMDE_FLOAT32_C( 80508.11), SIMDE_FLOAT32_C( 80508.11), SIMDE_FLOAT32_C( -5525.56), SIMDE_FLOAT32_C( -5525.56), SIMDE_FLOAT32_C( -5525.56), SIMDE_FLOAT32_C( -5525.56)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -135.78), SIMDE_FLOAT32_C( -796.33), SIMDE_FLOAT32_C( -449.54), SIMDE_FLOAT32_C( -938.34), SIMDE_FLOAT32_C( 393.31), SIMDE_FLOAT32_C( -848.57), SIMDE_FLOAT32_C( -577.93), SIMDE_FLOAT32_C( -905.86)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 36.84), SIMDE_FLOAT32_C( 187.09), SIMDE_FLOAT32_C( -980.80), SIMDE_FLOAT32_C( -813.89), SIMDE_FLOAT32_C( -648.02), SIMDE_FLOAT32_C( 86.79), SIMDE_FLOAT32_C( 527.03), SIMDE_FLOAT32_C( -592.02)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 440908.84), SIMDE_FLOAT32_C( 440908.84), SIMDE_FLOAT32_C( 440908.84), SIMDE_FLOAT32_C( 440908.84), SIMDE_FLOAT32_C(-304586.47), SIMDE_FLOAT32_C(-304586.47), SIMDE_FLOAT32_C(-304586.47), SIMDE_FLOAT32_C(-304586.47)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 74.94), SIMDE_FLOAT32_C( 503.91), SIMDE_FLOAT32_C( -375.34), SIMDE_FLOAT32_C( -743.13), SIMDE_FLOAT32_C( 569.57), SIMDE_FLOAT32_C( 343.31), SIMDE_FLOAT32_C( -63.75), SIMDE_FLOAT32_C( -543.95)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -655.40), SIMDE_FLOAT32_C( -172.92), SIMDE_FLOAT32_C( 342.45), SIMDE_FLOAT32_C( 357.80), SIMDE_FLOAT32_C( 265.80), SIMDE_FLOAT32_C( -306.65), SIMDE_FLOAT32_C( 8.98), SIMDE_FLOAT32_C( 608.49)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-128535.19), SIMDE_FLOAT32_C(-128535.19), SIMDE_FLOAT32_C(-128535.19), SIMDE_FLOAT32_C(-128535.19), SIMDE_FLOAT32_C( -572.47), SIMDE_FLOAT32_C( -572.47), SIMDE_FLOAT32_C( -572.47), SIMDE_FLOAT32_C( -572.47)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 691.92), SIMDE_FLOAT32_C( -222.27), SIMDE_FLOAT32_C( -447.07), SIMDE_FLOAT32_C( 147.51), SIMDE_FLOAT32_C( 537.10), SIMDE_FLOAT32_C( 171.81), SIMDE_FLOAT32_C( 347.32), SIMDE_FLOAT32_C( -960.39)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 144.97), SIMDE_FLOAT32_C( -282.53), SIMDE_FLOAT32_C( 612.87), SIMDE_FLOAT32_C( -406.95), SIMDE_FLOAT32_C( -724.51), SIMDE_FLOAT32_C( -908.13), SIMDE_FLOAT32_C( 448.37), SIMDE_FLOAT32_C( -713.68)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-273995.78), SIMDE_FLOAT32_C(-273995.78), SIMDE_FLOAT32_C(-273995.78), SIMDE_FLOAT32_C(-273995.78), SIMDE_FLOAT32_C( 155727.88), SIMDE_FLOAT32_C( 155727.88), SIMDE_FLOAT32_C( 155727.88), SIMDE_FLOAT32_C( 155727.88)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -208.78), SIMDE_FLOAT32_C( 929.10), SIMDE_FLOAT32_C( -272.86), SIMDE_FLOAT32_C( 86.66), SIMDE_FLOAT32_C( -744.06), SIMDE_FLOAT32_C( -881.86), SIMDE_FLOAT32_C( -663.16), SIMDE_FLOAT32_C( 193.59)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -31.12), SIMDE_FLOAT32_C( -303.48), SIMDE_FLOAT32_C( -758.68), SIMDE_FLOAT32_C( -57.97), SIMDE_FLOAT32_C( -16.19), SIMDE_FLOAT32_C( 187.47), SIMDE_FLOAT32_C( 13.98), SIMDE_FLOAT32_C( 577.97)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 207013.41), SIMDE_FLOAT32_C( 207013.41), SIMDE_FLOAT32_C( 207013.41), SIMDE_FLOAT32_C( 207013.41), SIMDE_FLOAT32_C( -9270.98), SIMDE_FLOAT32_C( -9270.98), SIMDE_FLOAT32_C( -9270.98), SIMDE_FLOAT32_C( -9270.98)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 696.37), SIMDE_FLOAT32_C( 300.52), SIMDE_FLOAT32_C( -476.62), SIMDE_FLOAT32_C( 523.01), SIMDE_FLOAT32_C( -147.94), SIMDE_FLOAT32_C( -993.31), SIMDE_FLOAT32_C( 910.70), SIMDE_FLOAT32_C( -650.05)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -298.13), SIMDE_FLOAT32_C( -528.10), SIMDE_FLOAT32_C( 371.86), SIMDE_FLOAT32_C( -93.70), SIMDE_FLOAT32_C( -396.01), SIMDE_FLOAT32_C( -319.70), SIMDE_FLOAT32_C( -350.31), SIMDE_FLOAT32_C( -580.49)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-177235.91), SIMDE_FLOAT32_C(-177235.91), SIMDE_FLOAT32_C(-177235.91), SIMDE_FLOAT32_C(-177235.91), SIMDE_FLOAT32_C(-319027.31), SIMDE_FLOAT32_C(-319027.31), SIMDE_FLOAT32_C(-319027.31), SIMDE_FLOAT32_C(-319027.31)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_dp_ps(test_vec[i].a, test_vec[i].b, 47); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_extract_epi32(SIMDE_MUNIT_TEST_ARGS) { simde__m256i a; a = simde_mm256_set_epi32(INT32_C( 1385655883), INT32_C(-1710235670), INT32_C( 1840319559), INT32_C( 46971468), INT32_C( 543693501), INT32_C( 1301344915), INT32_C( 1566637023), INT32_C( 2143093505)); simde_assert_equal_i32(simde_mm256_extract_epi32(a, 0), INT32_C(2143093505)); a = simde_mm256_set_epi32(INT32_C( -318872645), INT32_C( 154450085), INT32_C( 940983410), INT32_C(-2131037397), INT32_C( 1379124831), INT32_C( 546028595), INT32_C( -61508185), INT32_C( 1543937407)); simde_assert_equal_i32(simde_mm256_extract_epi32(a, 1), INT32_C(-61508185)); a = simde_mm256_set_epi32(INT32_C(-1821994577), INT32_C( 1502812176), INT32_C( -436334344), INT32_C( 2003643735), INT32_C(-1707163842), INT32_C(-1393484615), INT32_C(-1517380673), INT32_C( -785551131)); simde_assert_equal_i32(simde_mm256_extract_epi32(a, 2), INT32_C(-1393484615)); a = simde_mm256_set_epi32(INT32_C(-1342559050), INT32_C( 1280753378), INT32_C( 519553994), INT32_C( 134336686), INT32_C(-1696266450), INT32_C( 1521778870), INT32_C(-1336686564), INT32_C( -138601679)); simde_assert_equal_i32(simde_mm256_extract_epi32(a, 3), INT32_C(-1696266450)); a = simde_mm256_set_epi32(INT32_C( -346178226), INT32_C(-1487718780), INT32_C( 1522880003), INT32_C( 588601981), INT32_C( 1931111095), INT32_C(-1466649812), INT32_C(-1669730912), INT32_C( 257973424)); simde_assert_equal_i32(simde_mm256_extract_epi32(a, 4), INT32_C(588601981)); a = simde_mm256_set_epi32(INT32_C( 564328837), INT32_C( 600105992), INT32_C(-1245299261), INT32_C( 186243465), INT32_C( 1677067524), INT32_C( -306576008), INT32_C(-1101353897), INT32_C( 733243889)); simde_assert_equal_i32(simde_mm256_extract_epi32(a, 5), INT32_C(-1245299261)); a = simde_mm256_set_epi32(INT32_C( -460169306), INT32_C( -732910055), INT32_C( 973235428), INT32_C( 784780564), INT32_C( -944171968), INT32_C(-1846057571), INT32_C( 1068840758), INT32_C( 786857160)); simde_assert_equal_i32(simde_mm256_extract_epi32(a, 6), INT32_C(-732910055)); a = simde_mm256_set_epi32(INT32_C( -541300758), INT32_C(-2119881925), INT32_C(-1597449913), INT32_C( 489702841), INT32_C(-1364763213), INT32_C( 217047214), INT32_C( 1964050600), INT32_C(-1022338452)); simde_assert_equal_i32(simde_mm256_extract_epi32(a, 7), INT32_C(-541300758)); return 0; } static int test_simde_mm256_extract_epi64(SIMDE_MUNIT_TEST_ARGS) { simde__m256i a; a = simde_mm256_set_epi64x(INT64_C(-4660489839809071463), INT64_C(-3275104097342830593), INT64_C(-7646437287554588382), INT64_C(-1616723671742758177)); simde_assert_equal_i64(simde_mm256_extract_epi64(a, 0), INT64_C(-1616723671742758177)); a = simde_mm256_set_epi64x(INT64_C(-4654112182052362730), INT64_C( 4380015427737745916), INT64_C(-2237548103057162518), INT64_C( -717271117564457282)); simde_assert_equal_i64(simde_mm256_extract_epi64(a, 1), INT64_C(-2237548103057162518)); a = simde_mm256_set_epi64x(INT64_C(-9053768942501696029), INT64_C( -11082930357956064), INT64_C( 4721540626264374149), INT64_C(-2904931686008463967)); simde_assert_equal_i64(simde_mm256_extract_epi64(a, 2), INT64_C( -11082930357956064)); a = simde_mm256_set_epi64x(INT64_C( 952352888602265092), INT64_C(-6100868528376933823), INT64_C(-6435628201550193122), INT64_C(-1473382845492258957)); simde_assert_equal_i64(simde_mm256_extract_epi64(a, 3), INT64_C( 952352888602265092)); a = simde_mm256_set_epi64x(INT64_C( 3571656271242113686), INT64_C(-4496934129483038288), INT64_C(-1076704560171997551), INT64_C( 3873627956598886953)); simde_assert_equal_i64(simde_mm256_extract_epi64(a, 0), INT64_C( 3873627956598886953)); a = simde_mm256_set_epi64x(INT64_C( 4706661489617163532), INT64_C( 5127697303613520544), INT64_C(-7455940824973292748), INT64_C(-7733027913451150656)); simde_assert_equal_i64(simde_mm256_extract_epi64(a, 1), INT64_C(-7455940824973292748)); a = simde_mm256_set_epi64x(INT64_C( 5538080490384191196), INT64_C( -631116641013180065), INT64_C(-8091873078648501156), INT64_C(-6139846631858294273)); simde_assert_equal_i64(simde_mm256_extract_epi64(a, 2), INT64_C( -631116641013180065)); a = simde_mm256_set_epi64x(INT64_C(-5169157996131687226), INT64_C(-8345143279822783157), INT64_C( 28110904629261106), INT64_C(-2611039926221160636)); simde_assert_equal_i64(simde_mm256_extract_epi64(a, 3), INT64_C(-5169157996131687226)); return 0; } static int test_simde_mm256_insert_epi8(SIMDE_MUNIT_TEST_ARGS) { simde__m256i a, r, e; a = simde_mm256_set_epi8(INT8_C( 2), INT8_C( 84), INT8_C( 6), INT8_C( -83), INT8_C( 118), INT8_C( 45), INT8_C( 46), INT8_C( -44), INT8_C(-115), INT8_C( -99), INT8_C( 35), INT8_C( 32), INT8_C( 111), INT8_C( 68), INT8_C( 103), INT8_C( 7), INT8_C( -73), INT8_C(-111), INT8_C(-113), INT8_C( 29), INT8_C( 25), INT8_C( 114), INT8_C(-122), INT8_C( 38), INT8_C( -96), INT8_C( 114), INT8_C( 39), INT8_C( 74), INT8_C( 99), INT8_C( 11), INT8_C( 76), INT8_C( 56)); r = simde_mm256_insert_epi8(a, INT8_C(-3), 5); e = simde_mm256_set_epi8(INT8_C( 2), INT8_C( 84), INT8_C( 6), INT8_C( -83), INT8_C( 118), INT8_C( 45), INT8_C( 46), INT8_C( -44), INT8_C(-115), INT8_C( -99), INT8_C( 35), INT8_C( 32), INT8_C( 111), INT8_C( 68), INT8_C( 103), INT8_C( 7), INT8_C( -73), INT8_C(-111), INT8_C(-113), INT8_C( 29), INT8_C( 25), INT8_C( 114), INT8_C(-122), INT8_C( 38), INT8_C( -96), INT8_C( 114), INT8_C( -3), INT8_C( 74), INT8_C( 99), INT8_C( 11), INT8_C( 76), INT8_C( 56)); simde_assert_m256i_i8(r, ==, e); a = simde_mm256_set_epi8(INT8_C(-119), INT8_C(-118), INT8_C( 60), INT8_C( 31), INT8_C(-126), INT8_C( -54), INT8_C( 51), INT8_C(-114), INT8_C(-128), INT8_C( -32), INT8_C( 25), INT8_C( 116), INT8_C( 103), INT8_C( -83), INT8_C( 64), INT8_C( 29), INT8_C( -50), INT8_C( -55), INT8_C( 114), INT8_C( 10), INT8_C( 44), INT8_C(-111), INT8_C(-114), INT8_C( -60), INT8_C(-128), INT8_C( -48), INT8_C( 15), INT8_C( 67), INT8_C( -42), INT8_C( -49), INT8_C( 99), INT8_C( 114)); r = simde_mm256_insert_epi8(a, INT8_C(59), 9); e = simde_mm256_set_epi8(INT8_C(-119), INT8_C(-118), INT8_C( 60), INT8_C( 31), INT8_C(-126), INT8_C( -54), INT8_C( 51), INT8_C(-114), INT8_C(-128), INT8_C( -32), INT8_C( 25), INT8_C( 116), INT8_C( 103), INT8_C( -83), INT8_C( 64), INT8_C( 29), INT8_C( -50), INT8_C( -55), INT8_C( 114), INT8_C( 10), INT8_C( 44), INT8_C(-111), INT8_C( 59), INT8_C( -60), INT8_C(-128), INT8_C( -48), INT8_C( 15), INT8_C( 67), INT8_C( -42), INT8_C( -49), INT8_C( 99), INT8_C( 114)); simde_assert_m256i_i8(r, ==, e); a = simde_mm256_set_epi8(INT8_C( -89), INT8_C( 90), INT8_C(-119), INT8_C( -38), INT8_C( -50), INT8_C( -6), INT8_C(-121), INT8_C( -93), INT8_C( 88), INT8_C( 83), INT8_C( -93), INT8_C( -96), INT8_C( -77), INT8_C( -84), INT8_C(-110), INT8_C( -16), INT8_C( -67), INT8_C( 18), INT8_C( 25), INT8_C( -38), INT8_C( -60), INT8_C( 59), INT8_C( 108), INT8_C( -22), INT8_C( 39), INT8_C( 101), INT8_C( 5), INT8_C( 105), INT8_C( 45), INT8_C( -43), INT8_C( 87), INT8_C( 73)); r = simde_mm256_insert_epi8(a, INT8_C(-38), 6); e = simde_mm256_set_epi8(INT8_C( -89), INT8_C( 90), INT8_C(-119), INT8_C( -38), INT8_C( -50), INT8_C( -6), INT8_C(-121), INT8_C( -93), INT8_C( 88), INT8_C( 83), INT8_C( -93), INT8_C( -96), INT8_C( -77), INT8_C( -84), INT8_C(-110), INT8_C( -16), INT8_C( -67), INT8_C( 18), INT8_C( 25), INT8_C( -38), INT8_C( -60), INT8_C( 59), INT8_C( 108), INT8_C( -22), INT8_C( 39), INT8_C( -38), INT8_C( 5), INT8_C( 105), INT8_C( 45), INT8_C( -43), INT8_C( 87), INT8_C( 73)); simde_assert_m256i_i8(r, ==, e); a = simde_mm256_set_epi8(INT8_C( 109), INT8_C( -4), INT8_C( 42), INT8_C( 13), INT8_C( 50), INT8_C( 75), INT8_C( 107), INT8_C( 56), INT8_C( 48), INT8_C( -32), INT8_C( -5), INT8_C( -89), INT8_C(-120), INT8_C( 97), INT8_C( 24), INT8_C( 11), INT8_C( -12), INT8_C( -17), INT8_C( 64), INT8_C( 44), INT8_C( 42), INT8_C( 13), INT8_C( 7), INT8_C( 86), INT8_C( -64), INT8_C( 9), INT8_C( -71), INT8_C( -23), INT8_C( 74), INT8_C( 49), INT8_C( 126), INT8_C( 43)); r = simde_mm256_insert_epi8(a, INT8_C(39), 10); e = simde_mm256_set_epi8(INT8_C( 109), INT8_C( -4), INT8_C( 42), INT8_C( 13), INT8_C( 50), INT8_C( 75), INT8_C( 107), INT8_C( 56), INT8_C( 48), INT8_C( -32), INT8_C( -5), INT8_C( -89), INT8_C(-120), INT8_C( 97), INT8_C( 24), INT8_C( 11), INT8_C( -12), INT8_C( -17), INT8_C( 64), INT8_C( 44), INT8_C( 42), INT8_C( 39), INT8_C( 7), INT8_C( 86), INT8_C( -64), INT8_C( 9), INT8_C( -71), INT8_C( -23), INT8_C( 74), INT8_C( 49), INT8_C( 126), INT8_C( 43)); simde_assert_m256i_i8(r, ==, e); a = simde_mm256_set_epi8(INT8_C( 19), INT8_C( -38), INT8_C( 78), INT8_C( -23), INT8_C( 62), INT8_C( -68), INT8_C(-102), INT8_C( 63), INT8_C( 124), INT8_C(-113), INT8_C( -21), INT8_C( 108), INT8_C( -68), INT8_C( -75), INT8_C( 51), INT8_C( 124), INT8_C( 76), INT8_C( -65), INT8_C( -80), INT8_C( 56), INT8_C( 41), INT8_C( 59), INT8_C( 106), INT8_C( -7), INT8_C( -87), INT8_C( 20), INT8_C( 3), INT8_C( 106), INT8_C( 72), INT8_C(-115), INT8_C(-103), INT8_C( 26)); r = simde_mm256_insert_epi8(a, INT8_C(-114), 10); e = simde_mm256_set_epi8(INT8_C( 19), INT8_C( -38), INT8_C( 78), INT8_C( -23), INT8_C( 62), INT8_C( -68), INT8_C(-102), INT8_C( 63), INT8_C( 124), INT8_C(-113), INT8_C( -21), INT8_C( 108), INT8_C( -68), INT8_C( -75), INT8_C( 51), INT8_C( 124), INT8_C( 76), INT8_C( -65), INT8_C( -80), INT8_C( 56), INT8_C( 41), INT8_C(-114), INT8_C( 106), INT8_C( -7), INT8_C( -87), INT8_C( 20), INT8_C( 3), INT8_C( 106), INT8_C( 72), INT8_C(-115), INT8_C(-103), INT8_C( 26)); simde_assert_m256i_i8(r, ==, e); a = simde_mm256_set_epi8(INT8_C( 124), INT8_C( -4), INT8_C( -73), INT8_C( 108), INT8_C( 66), INT8_C( -17), INT8_C(-121), INT8_C( 100), INT8_C( 124), INT8_C( -94), INT8_C( 17), INT8_C( -78), INT8_C( -99), INT8_C( 31), INT8_C( 28), INT8_C( 7), INT8_C(-108), INT8_C( -43), INT8_C( -23), INT8_C( 104), INT8_C( 122), INT8_C( 61), INT8_C( -93), INT8_C(-102), INT8_C( 125), INT8_C( -79), INT8_C( 24), INT8_C( 49), INT8_C( 9), INT8_C( -93), INT8_C( 36), INT8_C( -74)); r = simde_mm256_insert_epi8(a, INT8_C(-65), 2); e = simde_mm256_set_epi8(INT8_C( 124), INT8_C( -4), INT8_C( -73), INT8_C( 108), INT8_C( 66), INT8_C( -17), INT8_C(-121), INT8_C( 100), INT8_C( 124), INT8_C( -94), INT8_C( 17), INT8_C( -78), INT8_C( -99), INT8_C( 31), INT8_C( 28), INT8_C( 7), INT8_C(-108), INT8_C( -43), INT8_C( -23), INT8_C( 104), INT8_C( 122), INT8_C( 61), INT8_C( -93), INT8_C(-102), INT8_C( 125), INT8_C( -79), INT8_C( 24), INT8_C( 49), INT8_C( 9), INT8_C( -65), INT8_C( 36), INT8_C( -74)); simde_assert_m256i_i8(r, ==, e); a = simde_mm256_set_epi8(INT8_C(-100), INT8_C( 10), INT8_C( 87), INT8_C( -57), INT8_C( -89), INT8_C( -66), INT8_C( -32), INT8_C( 104), INT8_C( 66), INT8_C( -81), INT8_C( -94), INT8_C( 71), INT8_C( 67), INT8_C( 108), INT8_C( -88), INT8_C( 53), INT8_C( 108), INT8_C( -39), INT8_C( -93), INT8_C( -2), INT8_C( 93), INT8_C( 67), INT8_C( -85), INT8_C( -42), INT8_C( 28), INT8_C( -64), INT8_C( 83), INT8_C( -9), INT8_C( -95), INT8_C( 36), INT8_C( 44), INT8_C( 112)); r = simde_mm256_insert_epi8(a, INT8_C(58), 8); e = simde_mm256_set_epi8(INT8_C(-100), INT8_C( 10), INT8_C( 87), INT8_C( -57), INT8_C( -89), INT8_C( -66), INT8_C( -32), INT8_C( 104), INT8_C( 66), INT8_C( -81), INT8_C( -94), INT8_C( 71), INT8_C( 67), INT8_C( 108), INT8_C( -88), INT8_C( 53), INT8_C( 108), INT8_C( -39), INT8_C( -93), INT8_C( -2), INT8_C( 93), INT8_C( 67), INT8_C( -85), INT8_C( 58), INT8_C( 28), INT8_C( -64), INT8_C( 83), INT8_C( -9), INT8_C( -95), INT8_C( 36), INT8_C( 44), INT8_C( 112)); simde_assert_m256i_i8(r, ==, e); a = simde_mm256_set_epi8(INT8_C( -16), INT8_C( -47), INT8_C( -77), INT8_C( 42), INT8_C( 89), INT8_C( -7), INT8_C( -62), INT8_C( 45), INT8_C( -28), INT8_C( 34), INT8_C( -59), INT8_C( -9), INT8_C( -38), INT8_C(-118), INT8_C( 83), INT8_C( 59), INT8_C( -16), INT8_C( 34), INT8_C( -64), INT8_C( 16), INT8_C( -15), INT8_C( -28), INT8_C( 47), INT8_C( 10), INT8_C(-112), INT8_C( -37), INT8_C( 71), INT8_C( -84), INT8_C( -78), INT8_C( -81), INT8_C( 40), INT8_C( -98)); r = simde_mm256_insert_epi8(a, INT8_C(121), 7); e = simde_mm256_set_epi8(INT8_C( -16), INT8_C( -47), INT8_C( -77), INT8_C( 42), INT8_C( 89), INT8_C( -7), INT8_C( -62), INT8_C( 45), INT8_C( -28), INT8_C( 34), INT8_C( -59), INT8_C( -9), INT8_C( -38), INT8_C(-118), INT8_C( 83), INT8_C( 59), INT8_C( -16), INT8_C( 34), INT8_C( -64), INT8_C( 16), INT8_C( -15), INT8_C( -28), INT8_C( 47), INT8_C( 10), INT8_C( 121), INT8_C( -37), INT8_C( 71), INT8_C( -84), INT8_C( -78), INT8_C( -81), INT8_C( 40), INT8_C( -98)); simde_assert_m256i_i8(r, ==, e); return 0; } static int test_simde_mm256_insert_epi16(SIMDE_MUNIT_TEST_ARGS) { simde__m256i a, r, e; a = simde_mm256_set_epi16(INT16_C(-21602), INT16_C( 27671), INT16_C(-26514), INT16_C( 32139), INT16_C( 27553), INT16_C( 3389), INT16_C( 26164), INT16_C( 3268), INT16_C( -3948), INT16_C( 26700), INT16_C( 31313), INT16_C( 27327), INT16_C(-25076), INT16_C( -6473), INT16_C(-27908), INT16_C(-18876)); r = simde_mm256_insert_epi16(a, INT16_C(13157), 5); e = simde_mm256_set_epi16(INT16_C(-21602), INT16_C( 27671), INT16_C(-26514), INT16_C( 32139), INT16_C( 27553), INT16_C( 3389), INT16_C( 26164), INT16_C( 3268), INT16_C( -3948), INT16_C( 26700), INT16_C( 13157), INT16_C( 27327), INT16_C(-25076), INT16_C( -6473), INT16_C(-27908), INT16_C(-18876)); simde_assert_m256i_i16(r, ==, e); a = simde_mm256_set_epi16(INT16_C(-15739), INT16_C(-22477), INT16_C( 24105), INT16_C( 1501), INT16_C(-14518), INT16_C( 18176), INT16_C( 14482), INT16_C( 20288), INT16_C(-15586), INT16_C( 12200), INT16_C( -9527), INT16_C( -9462), INT16_C(-20273), INT16_C(-22514), INT16_C( 1070), INT16_C(-15309)); r = simde_mm256_insert_epi16(a, INT16_C(369), 3); e = simde_mm256_set_epi16(INT16_C(-15739), INT16_C(-22477), INT16_C( 24105), INT16_C( 1501), INT16_C(-14518), INT16_C( 18176), INT16_C( 14482), INT16_C( 20288), INT16_C(-15586), INT16_C( 12200), INT16_C( -9527), INT16_C( -9462), INT16_C( 369), INT16_C(-22514), INT16_C( 1070), INT16_C(-15309)); simde_assert_m256i_i16(r, ==, e); a = simde_mm256_set_epi16(INT16_C(-20578), INT16_C( 31339), INT16_C(-21867), INT16_C( 6148), INT16_C(-32342), INT16_C(-12751), INT16_C(-22422), INT16_C( 12556), INT16_C(-31526), INT16_C(-24860), INT16_C( 25156), INT16_C( -4916), INT16_C(-20990), INT16_C(-13542), INT16_C(-25587), INT16_C(-20477)); r = simde_mm256_insert_epi16(a, INT16_C(-32428), 1); e = simde_mm256_set_epi16(INT16_C(-20578), INT16_C( 31339), INT16_C(-21867), INT16_C( 6148), INT16_C(-32342), INT16_C(-12751), INT16_C(-22422), INT16_C( 12556), INT16_C(-31526), INT16_C(-24860), INT16_C( 25156), INT16_C( -4916), INT16_C(-20990), INT16_C(-13542), INT16_C(-32428), INT16_C(-20477)); simde_assert_m256i_i16(r, ==, e); a = simde_mm256_set_epi16(INT16_C( 16682), INT16_C( 9974), INT16_C( -6779), INT16_C( 1747), INT16_C(-26827), INT16_C(-32182), INT16_C( 17867), INT16_C(-23355), INT16_C( 15404), INT16_C( -2091), INT16_C( -560), INT16_C(-24442), INT16_C( 12274), INT16_C(-19942), INT16_C( 8401), INT16_C(-15722)); r = simde_mm256_insert_epi16(a, INT16_C(16950), 12); e = simde_mm256_set_epi16(INT16_C( 16682), INT16_C( 9974), INT16_C( -6779), INT16_C( 16950), INT16_C(-26827), INT16_C(-32182), INT16_C( 17867), INT16_C(-23355), INT16_C( 15404), INT16_C( -2091), INT16_C( -560), INT16_C(-24442), INT16_C( 12274), INT16_C(-19942), INT16_C( 8401), INT16_C(-15722)); simde_assert_m256i_i16(r, ==, e); a = simde_mm256_set_epi16(INT16_C( 31205), INT16_C( 25676), INT16_C( 7342), INT16_C( 4880), INT16_C( -8533), INT16_C(-32080), INT16_C( -7595), INT16_C(-22500), INT16_C(-10840), INT16_C( 19996), INT16_C( -4449), INT16_C(-31416), INT16_C(-26476), INT16_C( -3822), INT16_C( 13156), INT16_C(-26200)); r = simde_mm256_insert_epi16(a, INT16_C(27229), 15); e = simde_mm256_set_epi16(INT16_C( 27229), INT16_C( 25676), INT16_C( 7342), INT16_C( 4880), INT16_C( -8533), INT16_C(-32080), INT16_C( -7595), INT16_C(-22500), INT16_C(-10840), INT16_C( 19996), INT16_C( -4449), INT16_C(-31416), INT16_C(-26476), INT16_C( -3822), INT16_C( 13156), INT16_C(-26200)); simde_assert_m256i_i16(r, ==, e); a = simde_mm256_set_epi16(INT16_C( -8749), INT16_C(-27202), INT16_C(-11704), INT16_C( 52), INT16_C(-10454), INT16_C( -3314), INT16_C( -8238), INT16_C(-18856), INT16_C( 6163), INT16_C(-27363), INT16_C( 1816), INT16_C(-31045), INT16_C( 28943), INT16_C(-22635), INT16_C( 1291), INT16_C(-31630)); r = simde_mm256_insert_epi16(a, INT16_C(25149), 9); e = simde_mm256_set_epi16(INT16_C( -8749), INT16_C(-27202), INT16_C(-11704), INT16_C( 52), INT16_C(-10454), INT16_C( -3314), INT16_C( 25149), INT16_C(-18856), INT16_C( 6163), INT16_C(-27363), INT16_C( 1816), INT16_C(-31045), INT16_C( 28943), INT16_C(-22635), INT16_C( 1291), INT16_C(-31630)); simde_assert_m256i_i16(r, ==, e); a = simde_mm256_set_epi16(INT16_C( -5789), INT16_C( 32645), INT16_C(-25474), INT16_C( -6052), INT16_C( 30501), INT16_C( 13572), INT16_C( 32362), INT16_C( 31220), INT16_C( 21812), INT16_C( 21730), INT16_C(-10684), INT16_C(-29591), INT16_C( 23321), INT16_C( 4014), INT16_C( 18929), INT16_C( -646)); r = simde_mm256_insert_epi16(a, INT16_C(10486), 11); e = simde_mm256_set_epi16(INT16_C( -5789), INT16_C( 32645), INT16_C(-25474), INT16_C( -6052), INT16_C( 10486), INT16_C( 13572), INT16_C( 32362), INT16_C( 31220), INT16_C( 21812), INT16_C( 21730), INT16_C(-10684), INT16_C(-29591), INT16_C( 23321), INT16_C( 4014), INT16_C( 18929), INT16_C( -646)); simde_assert_m256i_i16(r, ==, e); a = simde_mm256_set_epi16(INT16_C( -2271), INT16_C( 8016), INT16_C( 26327), INT16_C( 27397), INT16_C( 19036), INT16_C( 25193), INT16_C(-11253), INT16_C(-15734), INT16_C( -521), INT16_C( 20581), INT16_C(-18434), INT16_C( 4365), INT16_C(-18143), INT16_C( 23566), INT16_C(-32412), INT16_C(-20606)); r = simde_mm256_insert_epi16(a, INT16_C(4940), 6); e = simde_mm256_set_epi16(INT16_C( -2271), INT16_C( 8016), INT16_C( 26327), INT16_C( 27397), INT16_C( 19036), INT16_C( 25193), INT16_C(-11253), INT16_C(-15734), INT16_C( -521), INT16_C( 4940), INT16_C(-18434), INT16_C( 4365), INT16_C(-18143), INT16_C( 23566), INT16_C(-32412), INT16_C(-20606)); simde_assert_m256i_i16(r, ==, e); return 0; } static int test_simde_mm256_insert_epi32(SIMDE_MUNIT_TEST_ARGS) { simde__m256i a, r, e; a = simde_mm256_set_epi32(INT32_C( 1527893980), INT32_C( 272236058), INT32_C( 1771532776), INT32_C( -527129145), INT32_C(-1867900811), INT32_C( 1959964247), INT32_C( 1343894165), INT32_C( 1334695580)); r = simde_mm256_insert_epi32(a, INT32_C( -707274869), 0); e = simde_mm256_set_epi32(INT32_C( 1527893980), INT32_C( 272236058), INT32_C( 1771532776), INT32_C( -527129145), INT32_C(-1867900811), INT32_C( 1959964247), INT32_C( 1343894165), INT32_C( -707274869)); simde_assert_m256i_i32(r, ==, e); a = simde_mm256_set_epi32(INT32_C( -661063243), INT32_C(-1710175048), INT32_C( 930876847), INT32_C(-1520459634), INT32_C( -463408284), INT32_C( -440016671), INT32_C( 1321845686), INT32_C( 1111303375)); r = simde_mm256_insert_epi32(a, INT32_C( 1750585714), 6); e = simde_mm256_set_epi32(INT32_C( -661063243), INT32_C( 1750585714), INT32_C( 930876847), INT32_C(-1520459634), INT32_C( -463408284), INT32_C( -440016671), INT32_C( 1321845686), INT32_C( 1111303375)); simde_assert_m256i_i32(r, ==, e); a = simde_mm256_set_epi32(INT32_C( 922514807), INT32_C(-1345830052), INT32_C( 578930278), INT32_C( -474805558), INT32_C( 1483567706), INT32_C( 1465521628), INT32_C( 1619162073), INT32_C( -603337611)); r = simde_mm256_insert_epi32(a, INT32_C( 1709940880), 5); e = simde_mm256_set_epi32(INT32_C( 922514807), INT32_C(-1345830052), INT32_C( 1709940880), INT32_C( -474805558), INT32_C( 1483567706), INT32_C( 1465521628), INT32_C( 1619162073), INT32_C( -603337611)); simde_assert_m256i_i32(r, ==, e); a = simde_mm256_set_epi32(INT32_C( 1682496014), INT32_C( -265998243), INT32_C( -696943616), INT32_C( -723203182), INT32_C( 1593791374), INT32_C( -358344217), INT32_C( 813656782), INT32_C( 58704738)); r = simde_mm256_insert_epi32(a, INT32_C( 1488485361), 5); e = simde_mm256_set_epi32(INT32_C( 1682496014), INT32_C( -265998243), INT32_C( 1488485361), INT32_C( -723203182), INT32_C( 1593791374), INT32_C( -358344217), INT32_C( 813656782), INT32_C( 58704738)); simde_assert_m256i_i32(r, ==, e); a = simde_mm256_set_epi32(INT32_C( 1847223436), INT32_C( -901858482), INT32_C( 1721924326), INT32_C( 291173023), INT32_C(-1897007668), INT32_C(-1769936815), INT32_C(-1568319650), INT32_C( -5176498)); r = simde_mm256_insert_epi32(a, INT32_C(-1531789383), 1); e = simde_mm256_set_epi32(INT32_C( 1847223436), INT32_C( -901858482), INT32_C( 1721924326), INT32_C( 291173023), INT32_C(-1897007668), INT32_C(-1769936815), INT32_C(-1531789383), INT32_C( -5176498)); simde_assert_m256i_i32(r, ==, e); a = simde_mm256_set_epi32(INT32_C( 350203051), INT32_C( -910749534), INT32_C( 265750572), INT32_C( 1641173073), INT32_C( -538285717), INT32_C( -840003501), INT32_C( 1271510949), INT32_C(-1020673062)); r = simde_mm256_insert_epi32(a, INT32_C( 1836561709), 3); e = simde_mm256_set_epi32(INT32_C( 350203051), INT32_C( -910749534), INT32_C( 265750572), INT32_C( 1641173073), INT32_C( 1836561709), INT32_C( -840003501), INT32_C( 1271510949), INT32_C(-1020673062)); simde_assert_m256i_i32(r, ==, e); a = simde_mm256_set_epi32(INT32_C( 547373175), INT32_C(-1592451622), INT32_C( 2039829911), INT32_C( -457221951), INT32_C(-1618527211), INT32_C(-1978334272), INT32_C( 1765993380), INT32_C( 1580829317)); r = simde_mm256_insert_epi32(a, INT32_C( 1732675568), 7); e = simde_mm256_set_epi32(INT32_C( 1732675568), INT32_C(-1592451622), INT32_C( 2039829911), INT32_C( -457221951), INT32_C(-1618527211), INT32_C(-1978334272), INT32_C( 1765993380), INT32_C( 1580829317)); simde_assert_m256i_i32(r, ==, e); a = simde_mm256_set_epi32(INT32_C( 1958144037), INT32_C( 549134406), INT32_C( -361892467), INT32_C( -550141532), INT32_C(-2067091063), INT32_C( 1069015288), INT32_C(-1107603429), INT32_C( 1078737418)); r = simde_mm256_insert_epi32(a, INT32_C( 896343144), 1); e = simde_mm256_set_epi32(INT32_C( 1958144037), INT32_C( 549134406), INT32_C( -361892467), INT32_C( -550141532), INT32_C(-2067091063), INT32_C( 1069015288), INT32_C( 896343144), INT32_C( 1078737418)); simde_assert_m256i_i32(r, ==, e); return 0; } static int test_simde_mm256_insert_epi64(SIMDE_MUNIT_TEST_ARGS) { simde__m256i a, r, e; a = simde_mm256_set_epi64x(INT64_C(-4505752726775834758), INT64_C( 5929367822849325121), INT64_C( 7859909444158050752), INT64_C( 8378794972787494426)); r = simde_mm256_insert_epi64(a, INT64_C( 3154696592882520417), 1); e = simde_mm256_set_epi64x(INT64_C(-4505752726775834758), INT64_C( 5929367822849325121), INT64_C( 3154696592882520417), INT64_C( 8378794972787494426)); simde_assert_m256i_i64(r, ==, e); a = simde_mm256_set_epi64x(INT64_C( 396574839952628801), INT64_C( 4240491953132221671), INT64_C( -278319971650278791), INT64_C( 6491233263195366023)); r = simde_mm256_insert_epi64(a, INT64_C(-3297610962929123976), 3); e = simde_mm256_set_epi64x(INT64_C(-3297610962929123976), INT64_C( 4240491953132221671), INT64_C( -278319971650278791), INT64_C( 6491233263195366023)); simde_assert_m256i_i64(r, ==, e); a = simde_mm256_set_epi64x(INT64_C(-1728252678477676185), INT64_C( 4096939565061250649), INT64_C( -899841113114403992), INT64_C( 5786373883955623560)); r = simde_mm256_insert_epi64(a, INT64_C(-3905247737278663189), 0); e = simde_mm256_set_epi64x(INT64_C(-1728252678477676185), INT64_C( 4096939565061250649), INT64_C( -899841113114403992), INT64_C(-3905247737278663189)); simde_assert_m256i_i64(r, ==, e); a = simde_mm256_set_epi64x(INT64_C(-7506903298948604025), INT64_C(-5148494998623595939), INT64_C( 1486007124617083344), INT64_C( 4658534095800830357)); r = simde_mm256_insert_epi64(a, INT64_C(-7835784393738508471), 1); e = simde_mm256_set_epi64x(INT64_C(-7506903298948604025), INT64_C(-5148494998623595939), INT64_C(-7835784393738508471), INT64_C( 4658534095800830357)); simde_assert_m256i_i64(r, ==, e); a = simde_mm256_set_epi64x(INT64_C(-2191440769984549047), INT64_C( 3908463436719096448), INT64_C( 8526354170218817669), INT64_C(-3858780869273911597)); r = simde_mm256_insert_epi64(a, INT64_C( 4403056273253937364), 3); e = simde_mm256_set_epi64x(INT64_C( 4403056273253937364), INT64_C( 3908463436719096448), INT64_C( 8526354170218817669), INT64_C(-3858780869273911597)); simde_assert_m256i_i64(r, ==, e); a = simde_mm256_set_epi64x(INT64_C( 6661310305483280859), INT64_C(-6601747037924714764), INT64_C(-1630628469313698153), INT64_C( 1161250947816487188)); r = simde_mm256_insert_epi64(a, INT64_C(-7130294008098064663), 1); e = simde_mm256_set_epi64x(INT64_C( 6661310305483280859), INT64_C(-6601747037924714764), INT64_C(-7130294008098064663), INT64_C( 1161250947816487188)); simde_assert_m256i_i64(r, ==, e); a = simde_mm256_set_epi64x(INT64_C( 782834070832404014), INT64_C(-2125384153009736171), INT64_C(-8527778016730746462), INT64_C( 3421940001003476372)); r = simde_mm256_insert_epi64(a, INT64_C( 3833121528156448342), 2); e = simde_mm256_set_epi64x(INT64_C( 782834070832404014), INT64_C( 3833121528156448342), INT64_C(-8527778016730746462), INT64_C( 3421940001003476372)); simde_assert_m256i_i64(r, ==, e); a = simde_mm256_set_epi64x(INT64_C( -656366593936952908), INT64_C(-1584520372107281742), INT64_C( 8511510589800984870), INT64_C( 3076176925060453392)); r = simde_mm256_insert_epi64(a, INT64_C( 7232381223726455225), 0); e = simde_mm256_set_epi64x(INT64_C( -656366593936952908), INT64_C(-1584520372107281742), INT64_C( 8511510589800984870), INT64_C( 7232381223726455225)); simde_assert_m256i_i64(r, ==, e); return 0; } static int test_simde_mm256_insertf128_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m128 b; simde__m256 ra; simde__m256 rb; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 57.86), SIMDE_FLOAT32_C( 900.49), SIMDE_FLOAT32_C( 678.15), SIMDE_FLOAT32_C( -551.43), SIMDE_FLOAT32_C( 431.88), SIMDE_FLOAT32_C( -426.33), SIMDE_FLOAT32_C( -705.72), SIMDE_FLOAT32_C( 809.23)), simde_mm_set_ps (SIMDE_FLOAT32_C( -625.43), SIMDE_FLOAT32_C( -829.28), SIMDE_FLOAT32_C( -42.04), SIMDE_FLOAT32_C( -643.64)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 57.86), SIMDE_FLOAT32_C( 900.49), SIMDE_FLOAT32_C( 678.15), SIMDE_FLOAT32_C( -551.43), SIMDE_FLOAT32_C( -625.43), SIMDE_FLOAT32_C( -829.28), SIMDE_FLOAT32_C( -42.04), SIMDE_FLOAT32_C( -643.64)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -625.43), SIMDE_FLOAT32_C( -829.28), SIMDE_FLOAT32_C( -42.04), SIMDE_FLOAT32_C( -643.64), SIMDE_FLOAT32_C( 431.88), SIMDE_FLOAT32_C( -426.33), SIMDE_FLOAT32_C( -705.72), SIMDE_FLOAT32_C( 809.23)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -837.79), SIMDE_FLOAT32_C( 338.83), SIMDE_FLOAT32_C( 296.45), SIMDE_FLOAT32_C( 172.80), SIMDE_FLOAT32_C( 220.09), SIMDE_FLOAT32_C( 171.14), SIMDE_FLOAT32_C( 492.30), SIMDE_FLOAT32_C( -224.75)), simde_mm_set_ps (SIMDE_FLOAT32_C( -479.01), SIMDE_FLOAT32_C( 686.13), SIMDE_FLOAT32_C( -518.69), SIMDE_FLOAT32_C( -606.38)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -837.79), SIMDE_FLOAT32_C( 338.83), SIMDE_FLOAT32_C( 296.45), SIMDE_FLOAT32_C( 172.80), SIMDE_FLOAT32_C( -479.01), SIMDE_FLOAT32_C( 686.13), SIMDE_FLOAT32_C( -518.69), SIMDE_FLOAT32_C( -606.38)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -479.01), SIMDE_FLOAT32_C( 686.13), SIMDE_FLOAT32_C( -518.69), SIMDE_FLOAT32_C( -606.38), SIMDE_FLOAT32_C( 220.09), SIMDE_FLOAT32_C( 171.14), SIMDE_FLOAT32_C( 492.30), SIMDE_FLOAT32_C( -224.75)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -665.08), SIMDE_FLOAT32_C( -599.25), SIMDE_FLOAT32_C( -107.42), SIMDE_FLOAT32_C( -565.87), SIMDE_FLOAT32_C( -588.15), SIMDE_FLOAT32_C( 906.13), SIMDE_FLOAT32_C( 481.87), SIMDE_FLOAT32_C( 540.93)), simde_mm_set_ps (SIMDE_FLOAT32_C( 308.44), SIMDE_FLOAT32_C( -387.39), SIMDE_FLOAT32_C( 312.59), SIMDE_FLOAT32_C( -811.76)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -665.08), SIMDE_FLOAT32_C( -599.25), SIMDE_FLOAT32_C( -107.42), SIMDE_FLOAT32_C( -565.87), SIMDE_FLOAT32_C( 308.44), SIMDE_FLOAT32_C( -387.39), SIMDE_FLOAT32_C( 312.59), SIMDE_FLOAT32_C( -811.76)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 308.44), SIMDE_FLOAT32_C( -387.39), SIMDE_FLOAT32_C( 312.59), SIMDE_FLOAT32_C( -811.76), SIMDE_FLOAT32_C( -588.15), SIMDE_FLOAT32_C( 906.13), SIMDE_FLOAT32_C( 481.87), SIMDE_FLOAT32_C( 540.93)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -499.97), SIMDE_FLOAT32_C( -474.63), SIMDE_FLOAT32_C( -449.49), SIMDE_FLOAT32_C( 941.31), SIMDE_FLOAT32_C( -102.84), SIMDE_FLOAT32_C( -165.66), SIMDE_FLOAT32_C( -680.74), SIMDE_FLOAT32_C( 98.73)), simde_mm_set_ps (SIMDE_FLOAT32_C( -600.34), SIMDE_FLOAT32_C( 321.05), SIMDE_FLOAT32_C( 438.78), SIMDE_FLOAT32_C( -70.17)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -499.97), SIMDE_FLOAT32_C( -474.63), SIMDE_FLOAT32_C( -449.49), SIMDE_FLOAT32_C( 941.31), SIMDE_FLOAT32_C( -600.34), SIMDE_FLOAT32_C( 321.05), SIMDE_FLOAT32_C( 438.78), SIMDE_FLOAT32_C( -70.17)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -600.34), SIMDE_FLOAT32_C( 321.05), SIMDE_FLOAT32_C( 438.78), SIMDE_FLOAT32_C( -70.17), SIMDE_FLOAT32_C( -102.84), SIMDE_FLOAT32_C( -165.66), SIMDE_FLOAT32_C( -680.74), SIMDE_FLOAT32_C( 98.73)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -402.57), SIMDE_FLOAT32_C( -372.27), SIMDE_FLOAT32_C( -839.54), SIMDE_FLOAT32_C( 507.35), SIMDE_FLOAT32_C( -596.72), SIMDE_FLOAT32_C( 333.88), SIMDE_FLOAT32_C( -839.21), SIMDE_FLOAT32_C( -624.72)), simde_mm_set_ps (SIMDE_FLOAT32_C( -109.04), SIMDE_FLOAT32_C( -997.63), SIMDE_FLOAT32_C( 959.39), SIMDE_FLOAT32_C( -856.10)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -402.57), SIMDE_FLOAT32_C( -372.27), SIMDE_FLOAT32_C( -839.54), SIMDE_FLOAT32_C( 507.35), SIMDE_FLOAT32_C( -109.04), SIMDE_FLOAT32_C( -997.63), SIMDE_FLOAT32_C( 959.39), SIMDE_FLOAT32_C( -856.10)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -109.04), SIMDE_FLOAT32_C( -997.63), SIMDE_FLOAT32_C( 959.39), SIMDE_FLOAT32_C( -856.10), SIMDE_FLOAT32_C( -596.72), SIMDE_FLOAT32_C( 333.88), SIMDE_FLOAT32_C( -839.21), SIMDE_FLOAT32_C( -624.72)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -148.40), SIMDE_FLOAT32_C( -137.24), SIMDE_FLOAT32_C( 665.88), SIMDE_FLOAT32_C( -239.38), SIMDE_FLOAT32_C( 864.82), SIMDE_FLOAT32_C( 415.07), SIMDE_FLOAT32_C( 223.96), SIMDE_FLOAT32_C( 144.96)), simde_mm_set_ps (SIMDE_FLOAT32_C( -29.35), SIMDE_FLOAT32_C( -415.61), SIMDE_FLOAT32_C( 231.08), SIMDE_FLOAT32_C( -375.28)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -148.40), SIMDE_FLOAT32_C( -137.24), SIMDE_FLOAT32_C( 665.88), SIMDE_FLOAT32_C( -239.38), SIMDE_FLOAT32_C( -29.35), SIMDE_FLOAT32_C( -415.61), SIMDE_FLOAT32_C( 231.08), SIMDE_FLOAT32_C( -375.28)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -29.35), SIMDE_FLOAT32_C( -415.61), SIMDE_FLOAT32_C( 231.08), SIMDE_FLOAT32_C( -375.28), SIMDE_FLOAT32_C( 864.82), SIMDE_FLOAT32_C( 415.07), SIMDE_FLOAT32_C( 223.96), SIMDE_FLOAT32_C( 144.96)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 384.21), SIMDE_FLOAT32_C( -172.99), SIMDE_FLOAT32_C( -651.42), SIMDE_FLOAT32_C( 104.60), SIMDE_FLOAT32_C( -412.61), SIMDE_FLOAT32_C( -685.74), SIMDE_FLOAT32_C( 349.45), SIMDE_FLOAT32_C( 431.71)), simde_mm_set_ps (SIMDE_FLOAT32_C( 810.19), SIMDE_FLOAT32_C( 94.73), SIMDE_FLOAT32_C( 542.66), SIMDE_FLOAT32_C( 824.78)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 384.21), SIMDE_FLOAT32_C( -172.99), SIMDE_FLOAT32_C( -651.42), SIMDE_FLOAT32_C( 104.60), SIMDE_FLOAT32_C( 810.19), SIMDE_FLOAT32_C( 94.73), SIMDE_FLOAT32_C( 542.66), SIMDE_FLOAT32_C( 824.78)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 810.19), SIMDE_FLOAT32_C( 94.73), SIMDE_FLOAT32_C( 542.66), SIMDE_FLOAT32_C( 824.78), SIMDE_FLOAT32_C( -412.61), SIMDE_FLOAT32_C( -685.74), SIMDE_FLOAT32_C( 349.45), SIMDE_FLOAT32_C( 431.71)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 117.02), SIMDE_FLOAT32_C( 874.06), SIMDE_FLOAT32_C( -896.71), SIMDE_FLOAT32_C( 927.83), SIMDE_FLOAT32_C( -471.09), SIMDE_FLOAT32_C( 907.26), SIMDE_FLOAT32_C( 774.08), SIMDE_FLOAT32_C( 141.60)), simde_mm_set_ps (SIMDE_FLOAT32_C( 69.32), SIMDE_FLOAT32_C( 645.62), SIMDE_FLOAT32_C( 860.89), SIMDE_FLOAT32_C( 694.26)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 117.02), SIMDE_FLOAT32_C( 874.06), SIMDE_FLOAT32_C( -896.71), SIMDE_FLOAT32_C( 927.83), SIMDE_FLOAT32_C( 69.32), SIMDE_FLOAT32_C( 645.62), SIMDE_FLOAT32_C( 860.89), SIMDE_FLOAT32_C( 694.26)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 69.32), SIMDE_FLOAT32_C( 645.62), SIMDE_FLOAT32_C( 860.89), SIMDE_FLOAT32_C( 694.26), SIMDE_FLOAT32_C( -471.09), SIMDE_FLOAT32_C( 907.26), SIMDE_FLOAT32_C( 774.08), SIMDE_FLOAT32_C( 141.60)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 ra = simde_mm256_insertf128_ps(test_vec[i].a, test_vec[i].b, 0); simde__m256 rb = simde_mm256_insertf128_ps(test_vec[i].a, test_vec[i].b, 1); simde_assert_m256_close(ra, test_vec[i].ra, 1); simde_assert_m256_close(rb, test_vec[i].rb, 1); } return 0; } static int test_simde_mm256_insertf128_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m128d b; simde__m256d ra; simde__m256d rb; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 500.07), SIMDE_FLOAT64_C( 24.20), SIMDE_FLOAT64_C( -264.31), SIMDE_FLOAT64_C( 584.01)), simde_mm_set_pd (SIMDE_FLOAT64_C( 431.47), SIMDE_FLOAT64_C( 318.12)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 500.07), SIMDE_FLOAT64_C( 24.20), SIMDE_FLOAT64_C( 431.47), SIMDE_FLOAT64_C( 318.12)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 431.47), SIMDE_FLOAT64_C( 318.12), SIMDE_FLOAT64_C( -264.31), SIMDE_FLOAT64_C( 584.01)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 669.13), SIMDE_FLOAT64_C( -378.72), SIMDE_FLOAT64_C( -204.56), SIMDE_FLOAT64_C( 289.88)), simde_mm_set_pd (SIMDE_FLOAT64_C( 609.30), SIMDE_FLOAT64_C( 491.95)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 669.13), SIMDE_FLOAT64_C( -378.72), SIMDE_FLOAT64_C( 609.30), SIMDE_FLOAT64_C( 491.95)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 609.30), SIMDE_FLOAT64_C( 491.95), SIMDE_FLOAT64_C( -204.56), SIMDE_FLOAT64_C( 289.88)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -439.47), SIMDE_FLOAT64_C( 501.94), SIMDE_FLOAT64_C( -311.14), SIMDE_FLOAT64_C( -486.50)), simde_mm_set_pd (SIMDE_FLOAT64_C( 460.51), SIMDE_FLOAT64_C( 800.13)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -439.47), SIMDE_FLOAT64_C( 501.94), SIMDE_FLOAT64_C( 460.51), SIMDE_FLOAT64_C( 800.13)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 460.51), SIMDE_FLOAT64_C( 800.13), SIMDE_FLOAT64_C( -311.14), SIMDE_FLOAT64_C( -486.50)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -828.83), SIMDE_FLOAT64_C( 892.34), SIMDE_FLOAT64_C( 849.35), SIMDE_FLOAT64_C( 71.26)), simde_mm_set_pd (SIMDE_FLOAT64_C( 690.69), SIMDE_FLOAT64_C( -666.59)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -828.83), SIMDE_FLOAT64_C( 892.34), SIMDE_FLOAT64_C( 690.69), SIMDE_FLOAT64_C( -666.59)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 690.69), SIMDE_FLOAT64_C( -666.59), SIMDE_FLOAT64_C( 849.35), SIMDE_FLOAT64_C( 71.26)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -393.25), SIMDE_FLOAT64_C( -143.78), SIMDE_FLOAT64_C( 452.34), SIMDE_FLOAT64_C( 313.17)), simde_mm_set_pd (SIMDE_FLOAT64_C( 2.43), SIMDE_FLOAT64_C( -405.20)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -393.25), SIMDE_FLOAT64_C( -143.78), SIMDE_FLOAT64_C( 2.43), SIMDE_FLOAT64_C( -405.20)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 2.43), SIMDE_FLOAT64_C( -405.20), SIMDE_FLOAT64_C( 452.34), SIMDE_FLOAT64_C( 313.17)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -19.72), SIMDE_FLOAT64_C( -282.65), SIMDE_FLOAT64_C( -261.63), SIMDE_FLOAT64_C( -641.13)), simde_mm_set_pd (SIMDE_FLOAT64_C( 818.65), SIMDE_FLOAT64_C( -240.18)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -19.72), SIMDE_FLOAT64_C( -282.65), SIMDE_FLOAT64_C( 818.65), SIMDE_FLOAT64_C( -240.18)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 818.65), SIMDE_FLOAT64_C( -240.18), SIMDE_FLOAT64_C( -261.63), SIMDE_FLOAT64_C( -641.13)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 809.87), SIMDE_FLOAT64_C( 692.31), SIMDE_FLOAT64_C( 848.43), SIMDE_FLOAT64_C( -514.36)), simde_mm_set_pd (SIMDE_FLOAT64_C( -330.16), SIMDE_FLOAT64_C( 670.26)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 809.87), SIMDE_FLOAT64_C( 692.31), SIMDE_FLOAT64_C( -330.16), SIMDE_FLOAT64_C( 670.26)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -330.16), SIMDE_FLOAT64_C( 670.26), SIMDE_FLOAT64_C( 848.43), SIMDE_FLOAT64_C( -514.36)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -917.99), SIMDE_FLOAT64_C( 637.33), SIMDE_FLOAT64_C( 143.49), SIMDE_FLOAT64_C( 390.85)), simde_mm_set_pd (SIMDE_FLOAT64_C( -606.83), SIMDE_FLOAT64_C( 948.25)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -917.99), SIMDE_FLOAT64_C( 637.33), SIMDE_FLOAT64_C( -606.83), SIMDE_FLOAT64_C( 948.25)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -606.83), SIMDE_FLOAT64_C( 948.25), SIMDE_FLOAT64_C( 143.49), SIMDE_FLOAT64_C( 390.85)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d ra = simde_mm256_insertf128_pd(test_vec[i].a, test_vec[i].b, 0); simde__m256d rb = simde_mm256_insertf128_pd(test_vec[i].a, test_vec[i].b, 1); simde_assert_m256d_close(ra, test_vec[i].ra, 1); simde_assert_m256d_close(rb, test_vec[i].rb, 1); } return 0; } static int test_simde_mm256_insertf128_si256(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m128i b; simde__m256i ra; simde__m256i rb; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C( 1732788931), INT32_C( -493919285), INT32_C( -171391193), INT32_C( 1397412103), INT32_C( -356536147), INT32_C(-1692932708), INT32_C(-1699348696), INT32_C( -647395099)), simde_mm_set_epi32 (INT32_C(-1522680411), INT32_C(-1731979321), INT32_C( 1240335413), INT32_C( 201854332)), simde_mm256_set_epi32(INT32_C( 1732788931), INT32_C( -493919285), INT32_C( -171391193), INT32_C( 1397412103), INT32_C(-1522680411), INT32_C(-1731979321), INT32_C( 1240335413), INT32_C( 201854332)), simde_mm256_set_epi32(INT32_C(-1522680411), INT32_C(-1731979321), INT32_C( 1240335413), INT32_C( 201854332), INT32_C( -356536147), INT32_C(-1692932708), INT32_C(-1699348696), INT32_C( -647395099)) }, { simde_mm256_set_epi32(INT32_C(-1444875329), INT32_C( 1610023191), INT32_C( -708588022), INT32_C( -172947680), INT32_C( 545675582), INT32_C( 1925063203), INT32_C( 200249152), INT32_C( 925361522)), simde_mm_set_epi32 (INT32_C( -719778838), INT32_C( -908663617), INT32_C(-1043096582), INT32_C( 2027106265)), simde_mm256_set_epi32(INT32_C(-1444875329), INT32_C( 1610023191), INT32_C( -708588022), INT32_C( -172947680), INT32_C( -719778838), INT32_C( -908663617), INT32_C(-1043096582), INT32_C( 2027106265)), simde_mm256_set_epi32(INT32_C( -719778838), INT32_C( -908663617), INT32_C(-1043096582), INT32_C( 2027106265), INT32_C( 545675582), INT32_C( 1925063203), INT32_C( 200249152), INT32_C( 925361522)) }, { simde_mm256_set_epi32(INT32_C( 819255641), INT32_C( 758383634), INT32_C( -712717178), INT32_C( 1831898363), INT32_C( -652589148), INT32_C( 437505059), INT32_C(-1426201125), INT32_C( 915542579)), simde_mm_set_epi32 (INT32_C( 1102980249), INT32_C( 1568821342), INT32_C( 1031497605), INT32_C( 1535564672)), simde_mm256_set_epi32(INT32_C( 819255641), INT32_C( 758383634), INT32_C( -712717178), INT32_C( 1831898363), INT32_C( 1102980249), INT32_C( 1568821342), INT32_C( 1031497605), INT32_C( 1535564672)), simde_mm256_set_epi32(INT32_C( 1102980249), INT32_C( 1568821342), INT32_C( 1031497605), INT32_C( 1535564672), INT32_C( -652589148), INT32_C( 437505059), INT32_C(-1426201125), INT32_C( 915542579)) }, { simde_mm256_set_epi32(INT32_C(-1576300711), INT32_C( 804080573), INT32_C(-1947930635), INT32_C( -773073118), INT32_C( -708044343), INT32_C( 1025803241), INT32_C(-1542400953), INT32_C( 1513652867)), simde_mm_set_epi32 (INT32_C( -630903986), INT32_C( 1210274072), INT32_C(-1479627472), INT32_C( 1540958491)), simde_mm256_set_epi32(INT32_C(-1576300711), INT32_C( 804080573), INT32_C(-1947930635), INT32_C( -773073118), INT32_C( -630903986), INT32_C( 1210274072), INT32_C(-1479627472), INT32_C( 1540958491)), simde_mm256_set_epi32(INT32_C( -630903986), INT32_C( 1210274072), INT32_C(-1479627472), INT32_C( 1540958491), INT32_C( -708044343), INT32_C( 1025803241), INT32_C(-1542400953), INT32_C( 1513652867)) }, { simde_mm256_set_epi32(INT32_C(-1474400259), INT32_C( 1988182849), INT32_C(-1345043070), INT32_C(-2043590369), INT32_C( -792511350), INT32_C(-1919476039), INT32_C( -711077027), INT32_C(-1924737697)), simde_mm_set_epi32 (INT32_C(-1229154872), INT32_C( 1506932355), INT32_C( 529233496), INT32_C( 900061932)), simde_mm256_set_epi32(INT32_C(-1474400259), INT32_C( 1988182849), INT32_C(-1345043070), INT32_C(-2043590369), INT32_C(-1229154872), INT32_C( 1506932355), INT32_C( 529233496), INT32_C( 900061932)), simde_mm256_set_epi32(INT32_C(-1229154872), INT32_C( 1506932355), INT32_C( 529233496), INT32_C( 900061932), INT32_C( -792511350), INT32_C(-1919476039), INT32_C( -711077027), INT32_C(-1924737697)) }, { simde_mm256_set_epi32(INT32_C( 1011012252), INT32_C(-1383487313), INT32_C( -799281089), INT32_C(-1421799289), INT32_C(-1020863292), INT32_C( -870274327), INT32_C( 767506840), INT32_C( 905532467)), simde_mm_set_epi32 (INT32_C( 535053718), INT32_C( 1571414305), INT32_C( 327456521), INT32_C( 562021450)), simde_mm256_set_epi32(INT32_C( 1011012252), INT32_C(-1383487313), INT32_C( -799281089), INT32_C(-1421799289), INT32_C( 535053718), INT32_C( 1571414305), INT32_C( 327456521), INT32_C( 562021450)), simde_mm256_set_epi32(INT32_C( 535053718), INT32_C( 1571414305), INT32_C( 327456521), INT32_C( 562021450), INT32_C(-1020863292), INT32_C( -870274327), INT32_C( 767506840), INT32_C( 905532467)) }, { simde_mm256_set_epi32(INT32_C(-1892816233), INT32_C( -258025342), INT32_C(-1474147149), INT32_C( 200557748), INT32_C( 863187861), INT32_C( 1974870245), INT32_C( 1114174400), INT32_C( -122006961)), simde_mm_set_epi32 (INT32_C( 1960728456), INT32_C(-1615388317), INT32_C( 728614642), INT32_C( 181559353)), simde_mm256_set_epi32(INT32_C(-1892816233), INT32_C( -258025342), INT32_C(-1474147149), INT32_C( 200557748), INT32_C( 1960728456), INT32_C(-1615388317), INT32_C( 728614642), INT32_C( 181559353)), simde_mm256_set_epi32(INT32_C( 1960728456), INT32_C(-1615388317), INT32_C( 728614642), INT32_C( 181559353), INT32_C( 863187861), INT32_C( 1974870245), INT32_C( 1114174400), INT32_C( -122006961)) }, { simde_mm256_set_epi32(INT32_C( -172940012), INT32_C(-1274554211), INT32_C( -855665209), INT32_C( 935611457), INT32_C( -592164168), INT32_C( 945068232), INT32_C( 755470781), INT32_C(-1762512447)), simde_mm_set_epi32 (INT32_C(-1172491108), INT32_C(-1413112125), INT32_C( 65588240), INT32_C(-1859214337)), simde_mm256_set_epi32(INT32_C( -172940012), INT32_C(-1274554211), INT32_C( -855665209), INT32_C( 935611457), INT32_C(-1172491108), INT32_C(-1413112125), INT32_C( 65588240), INT32_C(-1859214337)), simde_mm256_set_epi32(INT32_C(-1172491108), INT32_C(-1413112125), INT32_C( 65588240), INT32_C(-1859214337), INT32_C( -592164168), INT32_C( 945068232), INT32_C( 755470781), INT32_C(-1762512447)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i ra = simde_mm256_insertf128_si256(test_vec[i].a, test_vec[i].b, 0); simde__m256i rb = simde_mm256_insertf128_si256(test_vec[i].a, test_vec[i].b, 1); simde_assert_m256i_i32(ra, ==, test_vec[i].ra); simde_assert_m256i_i32(rb, ==, test_vec[i].rb); } return 0; } static int test_simde_mm256_lddqu_si256(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C( -208613396), INT32_C( 972060947), INT32_C( 1079690819), INT32_C(-1629141358), INT32_C( -291568998), INT32_C( -706346303), INT32_C( 1782265269), INT32_C( 663843445)), simde_mm256_set_epi32(INT32_C( -208613396), INT32_C( 972060947), INT32_C( 1079690819), INT32_C(-1629141358), INT32_C( -291568998), INT32_C( -706346303), INT32_C( 1782265269), INT32_C( 663843445)) }, { simde_mm256_set_epi32(INT32_C( -542385526), INT32_C(-1915647746), INT32_C( 251129882), INT32_C( 290247368), INT32_C( 363399145), INT32_C( 688121978), INT32_C( 600807845), INT32_C( 1456401224)), simde_mm256_set_epi32(INT32_C( -542385526), INT32_C(-1915647746), INT32_C( 251129882), INT32_C( 290247368), INT32_C( 363399145), INT32_C( 688121978), INT32_C( 600807845), INT32_C( 1456401224)) }, { simde_mm256_set_epi32(INT32_C( 862880243), INT32_C( 961555167), INT32_C( -704902562), INT32_C(-2017515450), INT32_C(-1906482322), INT32_C(-1699379933), INT32_C( 1894527886), INT32_C( 2049947519)), simde_mm256_set_epi32(INT32_C( 862880243), INT32_C( 961555167), INT32_C( -704902562), INT32_C(-2017515450), INT32_C(-1906482322), INT32_C(-1699379933), INT32_C( 1894527886), INT32_C( 2049947519)) }, { simde_mm256_set_epi32(INT32_C( 1564827830), INT32_C( -831950379), INT32_C( 815117120), INT32_C( -372364589), INT32_C(-1095370522), INT32_C( 1608512554), INT32_C( 1210942744), INT32_C( 816264608)), simde_mm256_set_epi32(INT32_C( 1564827830), INT32_C( -831950379), INT32_C( 815117120), INT32_C( -372364589), INT32_C(-1095370522), INT32_C( 1608512554), INT32_C( 1210942744), INT32_C( 816264608)) }, { simde_mm256_set_epi32(INT32_C( 1014835213), INT32_C( 419509758), INT32_C( -940172407), INT32_C( 2075423717), INT32_C( -958302313), INT32_C( 2056263130), INT32_C( -179845947), INT32_C( -487391602)), simde_mm256_set_epi32(INT32_C( 1014835213), INT32_C( 419509758), INT32_C( -940172407), INT32_C( 2075423717), INT32_C( -958302313), INT32_C( 2056263130), INT32_C( -179845947), INT32_C( -487391602)) }, { simde_mm256_set_epi32(INT32_C( 750230136), INT32_C( 830844077), INT32_C( 1366738463), INT32_C( 1719449608), INT32_C( 953227083), INT32_C( -624601508), INT32_C( -983006206), INT32_C( 1138640848)), simde_mm256_set_epi32(INT32_C( 750230136), INT32_C( 830844077), INT32_C( 1366738463), INT32_C( 1719449608), INT32_C( 953227083), INT32_C( -624601508), INT32_C( -983006206), INT32_C( 1138640848)) }, { simde_mm256_set_epi32(INT32_C(-2045061394), INT32_C( -759814821), INT32_C( 1064937743), INT32_C(-1124388611), INT32_C( -168818003), INT32_C( -757055903), INT32_C(-1606176919), INT32_C( 254467933)), simde_mm256_set_epi32(INT32_C(-2045061394), INT32_C( -759814821), INT32_C( 1064937743), INT32_C(-1124388611), INT32_C( -168818003), INT32_C( -757055903), INT32_C(-1606176919), INT32_C( 254467933)) }, { simde_mm256_set_epi32(INT32_C(-1387663431), INT32_C( 2083885974), INT32_C( 524830617), INT32_C( 1548734942), INT32_C( 1378860315), INT32_C(-1149727640), INT32_C( 1373643603), INT32_C( 772353923)), simde_mm256_set_epi32(INT32_C(-1387663431), INT32_C( 2083885974), INT32_C( 524830617), INT32_C( 1548734942), INT32_C( 1378860315), INT32_C(-1149727640), INT32_C( 1373643603), INT32_C( 772353923)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_lddqu_si256(&(test_vec[i].a)); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_load_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde_float64 a[sizeof(simde__m256d) / sizeof(simde_float64)]; simde__m256d r; } test_vec[8] = { { { SIMDE_FLOAT64_C( -338.67), SIMDE_FLOAT64_C( 630.84), SIMDE_FLOAT64_C( -302.19), SIMDE_FLOAT64_C( -238.77) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( -238.77), SIMDE_FLOAT64_C( -302.19), SIMDE_FLOAT64_C( 630.84), SIMDE_FLOAT64_C( -338.67)) }, { { SIMDE_FLOAT64_C( 725.41), SIMDE_FLOAT64_C( -787.32), SIMDE_FLOAT64_C( -819.45), SIMDE_FLOAT64_C( 657.50) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( 657.50), SIMDE_FLOAT64_C( -819.45), SIMDE_FLOAT64_C( -787.32), SIMDE_FLOAT64_C( 725.41)) }, { { SIMDE_FLOAT64_C( -519.61), SIMDE_FLOAT64_C( 692.74), SIMDE_FLOAT64_C( 96.96), SIMDE_FLOAT64_C( -63.30) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( -63.30), SIMDE_FLOAT64_C( 96.96), SIMDE_FLOAT64_C( 692.74), SIMDE_FLOAT64_C( -519.61)) }, { { SIMDE_FLOAT64_C( 577.54), SIMDE_FLOAT64_C( -524.47), SIMDE_FLOAT64_C( -254.05), SIMDE_FLOAT64_C( 614.55) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( 614.55), SIMDE_FLOAT64_C( -254.05), SIMDE_FLOAT64_C( -524.47), SIMDE_FLOAT64_C( 577.54)) }, { { SIMDE_FLOAT64_C( -608.94), SIMDE_FLOAT64_C( 345.46), SIMDE_FLOAT64_C( -476.81), SIMDE_FLOAT64_C( -532.19) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( -532.19), SIMDE_FLOAT64_C( -476.81), SIMDE_FLOAT64_C( 345.46), SIMDE_FLOAT64_C( -608.94)) }, { { SIMDE_FLOAT64_C( 96.64), SIMDE_FLOAT64_C( -218.82), SIMDE_FLOAT64_C( -345.29), SIMDE_FLOAT64_C( -716.59) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( -716.59), SIMDE_FLOAT64_C( -345.29), SIMDE_FLOAT64_C( -218.82), SIMDE_FLOAT64_C( 96.64)) }, { { SIMDE_FLOAT64_C( 896.80), SIMDE_FLOAT64_C( -999.47), SIMDE_FLOAT64_C( 692.69), SIMDE_FLOAT64_C( 75.34) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( 75.34), SIMDE_FLOAT64_C( 692.69), SIMDE_FLOAT64_C( -999.47), SIMDE_FLOAT64_C( 896.80)) }, { { SIMDE_FLOAT64_C( -936.41), SIMDE_FLOAT64_C( 832.42), SIMDE_FLOAT64_C( 861.03), SIMDE_FLOAT64_C( -909.25) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( -909.25), SIMDE_FLOAT64_C( 861.03), SIMDE_FLOAT64_C( 832.42), SIMDE_FLOAT64_C( -936.41)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_load_pd(test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_load_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { SIMDE_ALIGN_LIKE_32(simde__m256) simde_float32 a[sizeof(simde__m256) / sizeof(simde_float32)]; simde__m256 r; } test_vec[8] = { { { SIMDE_FLOAT32_C( -651.15), SIMDE_FLOAT32_C( 486.09), SIMDE_FLOAT32_C( 809.52), SIMDE_FLOAT32_C( 897.18), SIMDE_FLOAT32_C( -164.76), SIMDE_FLOAT32_C( 925.08), SIMDE_FLOAT32_C( -141.17), SIMDE_FLOAT32_C( 524.77) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( 524.77), SIMDE_FLOAT32_C( -141.17), SIMDE_FLOAT32_C( 925.08), SIMDE_FLOAT32_C( -164.76), SIMDE_FLOAT32_C( 897.18), SIMDE_FLOAT32_C( 809.52), SIMDE_FLOAT32_C( 486.09), SIMDE_FLOAT32_C( -651.15)) }, { { SIMDE_FLOAT32_C( 154.61), SIMDE_FLOAT32_C( -436.96), SIMDE_FLOAT32_C( -109.54), SIMDE_FLOAT32_C( -422.39), SIMDE_FLOAT32_C( -113.81), SIMDE_FLOAT32_C( -740.60), SIMDE_FLOAT32_C( -581.05), SIMDE_FLOAT32_C( 534.88) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( 534.88), SIMDE_FLOAT32_C( -581.05), SIMDE_FLOAT32_C( -740.60), SIMDE_FLOAT32_C( -113.81), SIMDE_FLOAT32_C( -422.39), SIMDE_FLOAT32_C( -109.54), SIMDE_FLOAT32_C( -436.96), SIMDE_FLOAT32_C( 154.61)) }, { { SIMDE_FLOAT32_C( 689.49), SIMDE_FLOAT32_C( -831.99), SIMDE_FLOAT32_C( 872.86), SIMDE_FLOAT32_C( 554.28), SIMDE_FLOAT32_C( 799.73), SIMDE_FLOAT32_C( -331.18), SIMDE_FLOAT32_C( 338.85), SIMDE_FLOAT32_C( 425.19) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( 425.19), SIMDE_FLOAT32_C( 338.85), SIMDE_FLOAT32_C( -331.18), SIMDE_FLOAT32_C( 799.73), SIMDE_FLOAT32_C( 554.28), SIMDE_FLOAT32_C( 872.86), SIMDE_FLOAT32_C( -831.99), SIMDE_FLOAT32_C( 689.49)) }, { { SIMDE_FLOAT32_C( 22.85), SIMDE_FLOAT32_C( -436.81), SIMDE_FLOAT32_C( 473.32), SIMDE_FLOAT32_C( 132.51), SIMDE_FLOAT32_C( -295.42), SIMDE_FLOAT32_C( 74.04), SIMDE_FLOAT32_C( 445.74), SIMDE_FLOAT32_C( 574.68) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( 574.68), SIMDE_FLOAT32_C( 445.74), SIMDE_FLOAT32_C( 74.04), SIMDE_FLOAT32_C( -295.42), SIMDE_FLOAT32_C( 132.51), SIMDE_FLOAT32_C( 473.32), SIMDE_FLOAT32_C( -436.81), SIMDE_FLOAT32_C( 22.85)) }, { { SIMDE_FLOAT32_C( 105.79), SIMDE_FLOAT32_C( -21.01), SIMDE_FLOAT32_C( -754.65), SIMDE_FLOAT32_C( -355.76), SIMDE_FLOAT32_C( 716.76), SIMDE_FLOAT32_C( -141.32), SIMDE_FLOAT32_C( 300.83), SIMDE_FLOAT32_C( -21.61) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( -21.61), SIMDE_FLOAT32_C( 300.83), SIMDE_FLOAT32_C( -141.32), SIMDE_FLOAT32_C( 716.76), SIMDE_FLOAT32_C( -355.76), SIMDE_FLOAT32_C( -754.65), SIMDE_FLOAT32_C( -21.01), SIMDE_FLOAT32_C( 105.79)) }, { { SIMDE_FLOAT32_C( -421.92), SIMDE_FLOAT32_C( 236.64), SIMDE_FLOAT32_C( -349.60), SIMDE_FLOAT32_C( 710.87), SIMDE_FLOAT32_C( -664.65), SIMDE_FLOAT32_C( 50.17), SIMDE_FLOAT32_C( 82.89), SIMDE_FLOAT32_C( -240.57) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( -240.57), SIMDE_FLOAT32_C( 82.89), SIMDE_FLOAT32_C( 50.17), SIMDE_FLOAT32_C( -664.65), SIMDE_FLOAT32_C( 710.87), SIMDE_FLOAT32_C( -349.60), SIMDE_FLOAT32_C( 236.64), SIMDE_FLOAT32_C( -421.92)) }, { { SIMDE_FLOAT32_C( 68.48), SIMDE_FLOAT32_C( 518.42), SIMDE_FLOAT32_C( 968.06), SIMDE_FLOAT32_C( -197.34), SIMDE_FLOAT32_C( 351.10), SIMDE_FLOAT32_C( 113.17), SIMDE_FLOAT32_C( 713.12), SIMDE_FLOAT32_C( -462.23) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( -462.23), SIMDE_FLOAT32_C( 713.12), SIMDE_FLOAT32_C( 113.17), SIMDE_FLOAT32_C( 351.10), SIMDE_FLOAT32_C( -197.34), SIMDE_FLOAT32_C( 968.06), SIMDE_FLOAT32_C( 518.42), SIMDE_FLOAT32_C( 68.48)) }, { { SIMDE_FLOAT32_C( -676.83), SIMDE_FLOAT32_C( 745.78), SIMDE_FLOAT32_C( -436.07), SIMDE_FLOAT32_C( 808.02), SIMDE_FLOAT32_C( 901.47), SIMDE_FLOAT32_C( -652.23), SIMDE_FLOAT32_C( -649.97), SIMDE_FLOAT32_C( -289.44) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( -289.44), SIMDE_FLOAT32_C( -649.97), SIMDE_FLOAT32_C( -652.23), SIMDE_FLOAT32_C( 901.47), SIMDE_FLOAT32_C( 808.02), SIMDE_FLOAT32_C( -436.07), SIMDE_FLOAT32_C( 745.78), SIMDE_FLOAT32_C( -676.83)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_load_ps(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_load_si256(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C( 93433077), INT32_C( 912488615), INT32_C( -849505573), INT32_C( -538760324), INT32_C( 576018808), INT32_C( 306399285), INT32_C( 761465198), INT32_C( 67322681)), simde_mm256_set_epi32(INT32_C( 93433077), INT32_C( 912488615), INT32_C( -849505573), INT32_C( -538760324), INT32_C( 576018808), INT32_C( 306399285), INT32_C( 761465198), INT32_C( 67322681)) }, { simde_mm256_set_epi32(INT32_C( 1661040700), INT32_C(-1506281364), INT32_C( 769308925), INT32_C( -545741767), INT32_C(-1609914843), INT32_C(-1728610320), INT32_C( 1438363911), INT32_C(-1495474004)), simde_mm256_set_epi32(INT32_C( 1661040700), INT32_C(-1506281364), INT32_C( 769308925), INT32_C( -545741767), INT32_C(-1609914843), INT32_C(-1728610320), INT32_C( 1438363911), INT32_C(-1495474004)) }, { simde_mm256_set_epi32(INT32_C( -403469250), INT32_C( 1422195130), INT32_C( 1240509512), INT32_C(-1325093027), INT32_C( 1112848703), INT32_C( 757887555), INT32_C( -808479029), INT32_C( 1524821649)), simde_mm256_set_epi32(INT32_C( -403469250), INT32_C( 1422195130), INT32_C( 1240509512), INT32_C(-1325093027), INT32_C( 1112848703), INT32_C( 757887555), INT32_C( -808479029), INT32_C( 1524821649)) }, { simde_mm256_set_epi32(INT32_C( 419753251), INT32_C( 1133371811), INT32_C( 1920523876), INT32_C( 1566543302), INT32_C( 1608176387), INT32_C( 174748447), INT32_C(-1944132629), INT32_C(-1618941327)), simde_mm256_set_epi32(INT32_C( 419753251), INT32_C( 1133371811), INT32_C( 1920523876), INT32_C( 1566543302), INT32_C( 1608176387), INT32_C( 174748447), INT32_C(-1944132629), INT32_C(-1618941327)) }, { simde_mm256_set_epi32(INT32_C( 133578927), INT32_C( -89176331), INT32_C( 533976318), INT32_C( 686005880), INT32_C( 1680867737), INT32_C( -633287306), INT32_C( -911734776), INT32_C( 1028891739)), simde_mm256_set_epi32(INT32_C( 133578927), INT32_C( -89176331), INT32_C( 533976318), INT32_C( 686005880), INT32_C( 1680867737), INT32_C( -633287306), INT32_C( -911734776), INT32_C( 1028891739)) }, { simde_mm256_set_epi32(INT32_C( 1968343895), INT32_C( 1991193919), INT32_C(-1412421123), INT32_C(-1413471204), INT32_C( 1571538617), INT32_C( 392630938), INT32_C( 44925707), INT32_C(-1288122501)), simde_mm256_set_epi32(INT32_C( 1968343895), INT32_C( 1991193919), INT32_C(-1412421123), INT32_C(-1413471204), INT32_C( 1571538617), INT32_C( 392630938), INT32_C( 44925707), INT32_C(-1288122501)) }, { simde_mm256_set_epi32(INT32_C( 932954327), INT32_C( 884951875), INT32_C(-1145840174), INT32_C( 2040117874), INT32_C( 39201359), INT32_C( -102892947), INT32_C( 740751736), INT32_C( 1598969461)), simde_mm256_set_epi32(INT32_C( 932954327), INT32_C( 884951875), INT32_C(-1145840174), INT32_C( 2040117874), INT32_C( 39201359), INT32_C( -102892947), INT32_C( 740751736), INT32_C( 1598969461)) }, { simde_mm256_set_epi32(INT32_C( -471731507), INT32_C( 1955207001), INT32_C(-1681640586), INT32_C( -304295513), INT32_C( 1688427496), INT32_C(-1852849481), INT32_C( -533311004), INT32_C( 263226824)), simde_mm256_set_epi32(INT32_C( -471731507), INT32_C( 1955207001), INT32_C(-1681640586), INT32_C( -304295513), INT32_C( 1688427496), INT32_C(-1852849481), INT32_C( -533311004), INT32_C( 263226824)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_load_si256(&(test_vec[i].a)); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_loadu_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde_float64 a[sizeof(simde__m256d) / sizeof(simde_float64)]; simde__m256d r; } test_vec[8] = { { { SIMDE_FLOAT64_C( -245.76), SIMDE_FLOAT64_C( -764.95), SIMDE_FLOAT64_C( 498.87), SIMDE_FLOAT64_C( -327.12) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( -327.12), SIMDE_FLOAT64_C( 498.87), SIMDE_FLOAT64_C( -764.95), SIMDE_FLOAT64_C( -245.76)) }, { { SIMDE_FLOAT64_C( -747.96), SIMDE_FLOAT64_C( 887.55), SIMDE_FLOAT64_C( -714.24), SIMDE_FLOAT64_C( 189.85) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( 189.85), SIMDE_FLOAT64_C( -714.24), SIMDE_FLOAT64_C( 887.55), SIMDE_FLOAT64_C( -747.96)) }, { { SIMDE_FLOAT64_C( -816.60), SIMDE_FLOAT64_C( 548.05), SIMDE_FLOAT64_C( -852.03), SIMDE_FLOAT64_C( 683.50) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( 683.50), SIMDE_FLOAT64_C( -852.03), SIMDE_FLOAT64_C( 548.05), SIMDE_FLOAT64_C( -816.60)) }, { { SIMDE_FLOAT64_C( 957.12), SIMDE_FLOAT64_C( 857.15), SIMDE_FLOAT64_C( -289.83), SIMDE_FLOAT64_C( -642.05) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( -642.05), SIMDE_FLOAT64_C( -289.83), SIMDE_FLOAT64_C( 857.15), SIMDE_FLOAT64_C( 957.12)) }, { { SIMDE_FLOAT64_C( 279.73), SIMDE_FLOAT64_C( 98.54), SIMDE_FLOAT64_C( 917.87), SIMDE_FLOAT64_C( -218.86) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( -218.86), SIMDE_FLOAT64_C( 917.87), SIMDE_FLOAT64_C( 98.54), SIMDE_FLOAT64_C( 279.73)) }, { { SIMDE_FLOAT64_C( -705.64), SIMDE_FLOAT64_C( -89.39), SIMDE_FLOAT64_C( -237.89), SIMDE_FLOAT64_C( 9.05) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( 9.05), SIMDE_FLOAT64_C( -237.89), SIMDE_FLOAT64_C( -89.39), SIMDE_FLOAT64_C( -705.64)) }, { { SIMDE_FLOAT64_C( 359.06), SIMDE_FLOAT64_C( 630.19), SIMDE_FLOAT64_C( -718.76), SIMDE_FLOAT64_C( 263.72) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( 263.72), SIMDE_FLOAT64_C( -718.76), SIMDE_FLOAT64_C( 630.19), SIMDE_FLOAT64_C( 359.06)) }, { { SIMDE_FLOAT64_C( 705.88), SIMDE_FLOAT64_C( 454.13), SIMDE_FLOAT64_C( 871.24), SIMDE_FLOAT64_C( -794.27) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( -794.27), SIMDE_FLOAT64_C( 871.24), SIMDE_FLOAT64_C( 454.13), SIMDE_FLOAT64_C( 705.88)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_load_pd(test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_loadu_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde_float32 a[sizeof(simde__m256) / sizeof(simde_float32)]; simde__m256 r; } test_vec[8] = { { { SIMDE_FLOAT32_C( 989.38), SIMDE_FLOAT32_C( -636.59), SIMDE_FLOAT32_C( 969.19), SIMDE_FLOAT32_C( 802.78), SIMDE_FLOAT32_C( -677.79), SIMDE_FLOAT32_C( 669.00), SIMDE_FLOAT32_C( -625.50), SIMDE_FLOAT32_C( -971.80) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( -971.80), SIMDE_FLOAT32_C( -625.50), SIMDE_FLOAT32_C( 669.00), SIMDE_FLOAT32_C( -677.79), SIMDE_FLOAT32_C( 802.78), SIMDE_FLOAT32_C( 969.19), SIMDE_FLOAT32_C( -636.59), SIMDE_FLOAT32_C( 989.38)) }, { { SIMDE_FLOAT32_C( 483.87), SIMDE_FLOAT32_C( 313.54), SIMDE_FLOAT32_C( -722.81), SIMDE_FLOAT32_C( 175.58), SIMDE_FLOAT32_C( -520.14), SIMDE_FLOAT32_C( -222.39), SIMDE_FLOAT32_C( 889.56), SIMDE_FLOAT32_C( -141.86) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( -141.86), SIMDE_FLOAT32_C( 889.56), SIMDE_FLOAT32_C( -222.39), SIMDE_FLOAT32_C( -520.14), SIMDE_FLOAT32_C( 175.58), SIMDE_FLOAT32_C( -722.81), SIMDE_FLOAT32_C( 313.54), SIMDE_FLOAT32_C( 483.87)) }, { { SIMDE_FLOAT32_C( 28.06), SIMDE_FLOAT32_C( 709.83), SIMDE_FLOAT32_C( -372.28), SIMDE_FLOAT32_C( 743.18), SIMDE_FLOAT32_C( -465.26), SIMDE_FLOAT32_C( -871.71), SIMDE_FLOAT32_C( 213.87), SIMDE_FLOAT32_C( 34.60) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( 34.60), SIMDE_FLOAT32_C( 213.87), SIMDE_FLOAT32_C( -871.71), SIMDE_FLOAT32_C( -465.26), SIMDE_FLOAT32_C( 743.18), SIMDE_FLOAT32_C( -372.28), SIMDE_FLOAT32_C( 709.83), SIMDE_FLOAT32_C( 28.06)) }, { { SIMDE_FLOAT32_C( 290.56), SIMDE_FLOAT32_C( 408.42), SIMDE_FLOAT32_C( -438.13), SIMDE_FLOAT32_C( -460.46), SIMDE_FLOAT32_C( -639.21), SIMDE_FLOAT32_C( -231.83), SIMDE_FLOAT32_C( 590.87), SIMDE_FLOAT32_C( -474.24) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( -474.24), SIMDE_FLOAT32_C( 590.87), SIMDE_FLOAT32_C( -231.83), SIMDE_FLOAT32_C( -639.21), SIMDE_FLOAT32_C( -460.46), SIMDE_FLOAT32_C( -438.13), SIMDE_FLOAT32_C( 408.42), SIMDE_FLOAT32_C( 290.56)) }, { { SIMDE_FLOAT32_C( -304.73), SIMDE_FLOAT32_C( 108.23), SIMDE_FLOAT32_C( -73.19), SIMDE_FLOAT32_C( 188.25), SIMDE_FLOAT32_C( 420.93), SIMDE_FLOAT32_C( 522.97), SIMDE_FLOAT32_C( 234.89), SIMDE_FLOAT32_C( -731.34) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( -731.34), SIMDE_FLOAT32_C( 234.89), SIMDE_FLOAT32_C( 522.97), SIMDE_FLOAT32_C( 420.93), SIMDE_FLOAT32_C( 188.25), SIMDE_FLOAT32_C( -73.19), SIMDE_FLOAT32_C( 108.23), SIMDE_FLOAT32_C( -304.73)) }, { { SIMDE_FLOAT32_C( 708.07), SIMDE_FLOAT32_C( 370.70), SIMDE_FLOAT32_C( -989.08), SIMDE_FLOAT32_C( -602.45), SIMDE_FLOAT32_C( -987.01), SIMDE_FLOAT32_C( 154.31), SIMDE_FLOAT32_C( -220.43), SIMDE_FLOAT32_C( 262.39) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( 262.39), SIMDE_FLOAT32_C( -220.43), SIMDE_FLOAT32_C( 154.31), SIMDE_FLOAT32_C( -987.01), SIMDE_FLOAT32_C( -602.45), SIMDE_FLOAT32_C( -989.08), SIMDE_FLOAT32_C( 370.70), SIMDE_FLOAT32_C( 708.07)) }, { { SIMDE_FLOAT32_C( 947.64), SIMDE_FLOAT32_C( -74.77), SIMDE_FLOAT32_C( 902.77), SIMDE_FLOAT32_C( -429.19), SIMDE_FLOAT32_C( -305.81), SIMDE_FLOAT32_C( 762.65), SIMDE_FLOAT32_C( -261.04), SIMDE_FLOAT32_C( -156.66) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( -156.66), SIMDE_FLOAT32_C( -261.04), SIMDE_FLOAT32_C( 762.65), SIMDE_FLOAT32_C( -305.81), SIMDE_FLOAT32_C( -429.19), SIMDE_FLOAT32_C( 902.77), SIMDE_FLOAT32_C( -74.77), SIMDE_FLOAT32_C( 947.64)) }, { { SIMDE_FLOAT32_C( -313.48), SIMDE_FLOAT32_C( -237.38), SIMDE_FLOAT32_C( 572.62), SIMDE_FLOAT32_C( -800.42), SIMDE_FLOAT32_C( -6.98), SIMDE_FLOAT32_C( 968.23), SIMDE_FLOAT32_C( 417.54), SIMDE_FLOAT32_C( 107.47) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( 107.47), SIMDE_FLOAT32_C( 417.54), SIMDE_FLOAT32_C( 968.23), SIMDE_FLOAT32_C( -6.98), SIMDE_FLOAT32_C( -800.42), SIMDE_FLOAT32_C( 572.62), SIMDE_FLOAT32_C( -237.38), SIMDE_FLOAT32_C( -313.48)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_loadu_ps(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_loadu_si256(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C( 93433077), INT32_C( 912488615), INT32_C( -849505573), INT32_C( -538760324), INT32_C( 576018808), INT32_C( 306399285), INT32_C( 761465198), INT32_C( 67322681)), simde_mm256_set_epi32(INT32_C( 93433077), INT32_C( 912488615), INT32_C( -849505573), INT32_C( -538760324), INT32_C( 576018808), INT32_C( 306399285), INT32_C( 761465198), INT32_C( 67322681)) }, { simde_mm256_set_epi32(INT32_C( 1661040700), INT32_C(-1506281364), INT32_C( 769308925), INT32_C( -545741767), INT32_C(-1609914843), INT32_C(-1728610320), INT32_C( 1438363911), INT32_C(-1495474004)), simde_mm256_set_epi32(INT32_C( 1661040700), INT32_C(-1506281364), INT32_C( 769308925), INT32_C( -545741767), INT32_C(-1609914843), INT32_C(-1728610320), INT32_C( 1438363911), INT32_C(-1495474004)) }, { simde_mm256_set_epi32(INT32_C( -403469250), INT32_C( 1422195130), INT32_C( 1240509512), INT32_C(-1325093027), INT32_C( 1112848703), INT32_C( 757887555), INT32_C( -808479029), INT32_C( 1524821649)), simde_mm256_set_epi32(INT32_C( -403469250), INT32_C( 1422195130), INT32_C( 1240509512), INT32_C(-1325093027), INT32_C( 1112848703), INT32_C( 757887555), INT32_C( -808479029), INT32_C( 1524821649)) }, { simde_mm256_set_epi32(INT32_C( 419753251), INT32_C( 1133371811), INT32_C( 1920523876), INT32_C( 1566543302), INT32_C( 1608176387), INT32_C( 174748447), INT32_C(-1944132629), INT32_C(-1618941327)), simde_mm256_set_epi32(INT32_C( 419753251), INT32_C( 1133371811), INT32_C( 1920523876), INT32_C( 1566543302), INT32_C( 1608176387), INT32_C( 174748447), INT32_C(-1944132629), INT32_C(-1618941327)) }, { simde_mm256_set_epi32(INT32_C( 133578927), INT32_C( -89176331), INT32_C( 533976318), INT32_C( 686005880), INT32_C( 1680867737), INT32_C( -633287306), INT32_C( -911734776), INT32_C( 1028891739)), simde_mm256_set_epi32(INT32_C( 133578927), INT32_C( -89176331), INT32_C( 533976318), INT32_C( 686005880), INT32_C( 1680867737), INT32_C( -633287306), INT32_C( -911734776), INT32_C( 1028891739)) }, { simde_mm256_set_epi32(INT32_C( 1968343895), INT32_C( 1991193919), INT32_C(-1412421123), INT32_C(-1413471204), INT32_C( 1571538617), INT32_C( 392630938), INT32_C( 44925707), INT32_C(-1288122501)), simde_mm256_set_epi32(INT32_C( 1968343895), INT32_C( 1991193919), INT32_C(-1412421123), INT32_C(-1413471204), INT32_C( 1571538617), INT32_C( 392630938), INT32_C( 44925707), INT32_C(-1288122501)) }, { simde_mm256_set_epi32(INT32_C( 932954327), INT32_C( 884951875), INT32_C(-1145840174), INT32_C( 2040117874), INT32_C( 39201359), INT32_C( -102892947), INT32_C( 740751736), INT32_C( 1598969461)), simde_mm256_set_epi32(INT32_C( 932954327), INT32_C( 884951875), INT32_C(-1145840174), INT32_C( 2040117874), INT32_C( 39201359), INT32_C( -102892947), INT32_C( 740751736), INT32_C( 1598969461)) }, { simde_mm256_set_epi32(INT32_C( -471731507), INT32_C( 1955207001), INT32_C(-1681640586), INT32_C( -304295513), INT32_C( 1688427496), INT32_C(-1852849481), INT32_C( -533311004), INT32_C( 263226824)), simde_mm256_set_epi32(INT32_C( -471731507), INT32_C( 1955207001), INT32_C(-1681640586), INT32_C( -304295513), INT32_C( 1688427496), INT32_C(-1852849481), INT32_C( -533311004), INT32_C( 263226824)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_loadu_si256(&(test_vec[i].a)); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_loadu2_m128(SIMDE_MUNIT_TEST_ARGS) { const struct { simde_float32 a[sizeof(simde__m128) / sizeof(simde_float32)]; simde_float32 b[sizeof(simde__m128) / sizeof(simde_float32)]; simde__m256 r; } test_vec[8] = { { { SIMDE_FLOAT32_C( 13.39), SIMDE_FLOAT32_C( 253.33), SIMDE_FLOAT32_C( 769.78), SIMDE_FLOAT32_C( 607.23) }, { SIMDE_FLOAT32_C( 382.59), SIMDE_FLOAT32_C( 295.37), SIMDE_FLOAT32_C( -847.51), SIMDE_FLOAT32_C( -193.22) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( 607.23), SIMDE_FLOAT32_C( 769.78), SIMDE_FLOAT32_C( 253.33), SIMDE_FLOAT32_C( 13.39), SIMDE_FLOAT32_C( -193.22), SIMDE_FLOAT32_C( -847.51), SIMDE_FLOAT32_C( 295.37), SIMDE_FLOAT32_C( 382.59)) }, { { SIMDE_FLOAT32_C( -621.90), SIMDE_FLOAT32_C( 305.75), SIMDE_FLOAT32_C( -907.35), SIMDE_FLOAT32_C( -378.43) }, { SIMDE_FLOAT32_C( 165.24), SIMDE_FLOAT32_C( 212.29), SIMDE_FLOAT32_C( 823.95), SIMDE_FLOAT32_C( 837.28) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( -378.43), SIMDE_FLOAT32_C( -907.35), SIMDE_FLOAT32_C( 305.75), SIMDE_FLOAT32_C( -621.90), SIMDE_FLOAT32_C( 837.28), SIMDE_FLOAT32_C( 823.95), SIMDE_FLOAT32_C( 212.29), SIMDE_FLOAT32_C( 165.24)) }, { { SIMDE_FLOAT32_C( -207.02), SIMDE_FLOAT32_C( 949.44), SIMDE_FLOAT32_C( 953.63), SIMDE_FLOAT32_C( -540.83) }, { SIMDE_FLOAT32_C( -239.63), SIMDE_FLOAT32_C( -907.66), SIMDE_FLOAT32_C( -840.87), SIMDE_FLOAT32_C( 300.80) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( -540.83), SIMDE_FLOAT32_C( 953.63), SIMDE_FLOAT32_C( 949.44), SIMDE_FLOAT32_C( -207.02), SIMDE_FLOAT32_C( 300.80), SIMDE_FLOAT32_C( -840.87), SIMDE_FLOAT32_C( -907.66), SIMDE_FLOAT32_C( -239.63)) }, { { SIMDE_FLOAT32_C( 568.29), SIMDE_FLOAT32_C( -558.59), SIMDE_FLOAT32_C( -1.20), SIMDE_FLOAT32_C( -521.17) }, { SIMDE_FLOAT32_C( 772.77), SIMDE_FLOAT32_C( -729.14), SIMDE_FLOAT32_C( -873.98), SIMDE_FLOAT32_C( 142.46) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( -521.17), SIMDE_FLOAT32_C( -1.20), SIMDE_FLOAT32_C( -558.59), SIMDE_FLOAT32_C( 568.29), SIMDE_FLOAT32_C( 142.46), SIMDE_FLOAT32_C( -873.98), SIMDE_FLOAT32_C( -729.14), SIMDE_FLOAT32_C( 772.77)) }, { { SIMDE_FLOAT32_C( 499.82), SIMDE_FLOAT32_C( -346.37), SIMDE_FLOAT32_C( 357.98), SIMDE_FLOAT32_C( -982.20) }, { SIMDE_FLOAT32_C( 429.05), SIMDE_FLOAT32_C( 743.13), SIMDE_FLOAT32_C( 351.79), SIMDE_FLOAT32_C( -106.23) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( -982.20), SIMDE_FLOAT32_C( 357.98), SIMDE_FLOAT32_C( -346.37), SIMDE_FLOAT32_C( 499.82), SIMDE_FLOAT32_C( -106.23), SIMDE_FLOAT32_C( 351.79), SIMDE_FLOAT32_C( 743.13), SIMDE_FLOAT32_C( 429.05)) }, { { SIMDE_FLOAT32_C( -764.00), SIMDE_FLOAT32_C( 204.78), SIMDE_FLOAT32_C( 842.05), SIMDE_FLOAT32_C( 473.10) }, { SIMDE_FLOAT32_C( -181.50), SIMDE_FLOAT32_C( -509.59), SIMDE_FLOAT32_C( 968.67), SIMDE_FLOAT32_C( 585.40) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( 473.10), SIMDE_FLOAT32_C( 842.05), SIMDE_FLOAT32_C( 204.78), SIMDE_FLOAT32_C( -764.00), SIMDE_FLOAT32_C( 585.40), SIMDE_FLOAT32_C( 968.67), SIMDE_FLOAT32_C( -509.59), SIMDE_FLOAT32_C( -181.50)) }, { { SIMDE_FLOAT32_C( -248.73), SIMDE_FLOAT32_C( -498.50), SIMDE_FLOAT32_C( -186.56), SIMDE_FLOAT32_C( 244.41) }, { SIMDE_FLOAT32_C( 987.29), SIMDE_FLOAT32_C( 541.99), SIMDE_FLOAT32_C( 577.71), SIMDE_FLOAT32_C( 147.41) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( 244.41), SIMDE_FLOAT32_C( -186.56), SIMDE_FLOAT32_C( -498.50), SIMDE_FLOAT32_C( -248.73), SIMDE_FLOAT32_C( 147.41), SIMDE_FLOAT32_C( 577.71), SIMDE_FLOAT32_C( 541.99), SIMDE_FLOAT32_C( 987.29)) }, { { SIMDE_FLOAT32_C( -53.98), SIMDE_FLOAT32_C( -59.84), SIMDE_FLOAT32_C( -791.34), SIMDE_FLOAT32_C( 7.53) }, { SIMDE_FLOAT32_C( 2.84), SIMDE_FLOAT32_C( 254.21), SIMDE_FLOAT32_C( 404.98), SIMDE_FLOAT32_C( -410.67) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( 7.53), SIMDE_FLOAT32_C( -791.34), SIMDE_FLOAT32_C( -59.84), SIMDE_FLOAT32_C( -53.98), SIMDE_FLOAT32_C( -410.67), SIMDE_FLOAT32_C( 404.98), SIMDE_FLOAT32_C( 254.21), SIMDE_FLOAT32_C( 2.84)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_loadu2_m128(test_vec[i].a, test_vec[i].b); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_loadu2_m128d(SIMDE_MUNIT_TEST_ARGS) { const struct { simde_float64 a[sizeof(simde__m128d) / sizeof(simde_float64)]; simde_float64 b[sizeof(simde__m128d) / sizeof(simde_float64)]; simde__m256d r; } test_vec[8] = { { { SIMDE_FLOAT64_C( 193.14), SIMDE_FLOAT64_C( -237.27) }, { SIMDE_FLOAT64_C( 826.89), SIMDE_FLOAT64_C( -516.49) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( -237.27), SIMDE_FLOAT64_C( 193.14), SIMDE_FLOAT64_C( -516.49), SIMDE_FLOAT64_C( 826.89)) }, { { SIMDE_FLOAT64_C( -640.74), SIMDE_FLOAT64_C( -449.08) }, { SIMDE_FLOAT64_C( 244.98), SIMDE_FLOAT64_C( -467.92) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( -449.08), SIMDE_FLOAT64_C( -640.74), SIMDE_FLOAT64_C( -467.92), SIMDE_FLOAT64_C( 244.98)) }, { { SIMDE_FLOAT64_C( 384.40), SIMDE_FLOAT64_C( -595.56) }, { SIMDE_FLOAT64_C( -808.24), SIMDE_FLOAT64_C( 198.37) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( -595.56), SIMDE_FLOAT64_C( 384.40), SIMDE_FLOAT64_C( 198.37), SIMDE_FLOAT64_C( -808.24)) }, { { SIMDE_FLOAT64_C( 647.94), SIMDE_FLOAT64_C( -628.27) }, { SIMDE_FLOAT64_C( -496.78), SIMDE_FLOAT64_C( -569.08) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( -628.27), SIMDE_FLOAT64_C( 647.94), SIMDE_FLOAT64_C( -569.08), SIMDE_FLOAT64_C( -496.78)) }, { { SIMDE_FLOAT64_C( 911.82), SIMDE_FLOAT64_C( -491.30) }, { SIMDE_FLOAT64_C( 365.77), SIMDE_FLOAT64_C( -898.74) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( -491.30), SIMDE_FLOAT64_C( 911.82), SIMDE_FLOAT64_C( -898.74), SIMDE_FLOAT64_C( 365.77)) }, { { SIMDE_FLOAT64_C( -297.53), SIMDE_FLOAT64_C( -521.34) }, { SIMDE_FLOAT64_C( 145.28), SIMDE_FLOAT64_C( 488.58) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( -521.34), SIMDE_FLOAT64_C( -297.53), SIMDE_FLOAT64_C( 488.58), SIMDE_FLOAT64_C( 145.28)) }, { { SIMDE_FLOAT64_C( -224.71), SIMDE_FLOAT64_C( -7.50) }, { SIMDE_FLOAT64_C( -86.35), SIMDE_FLOAT64_C( 810.88) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( -7.50), SIMDE_FLOAT64_C( -224.71), SIMDE_FLOAT64_C( 810.88), SIMDE_FLOAT64_C( -86.35)) }, { { SIMDE_FLOAT64_C( 885.68), SIMDE_FLOAT64_C( -940.09) }, { SIMDE_FLOAT64_C( -481.99), SIMDE_FLOAT64_C( -433.50) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( -940.09), SIMDE_FLOAT64_C( 885.68), SIMDE_FLOAT64_C( -433.50), SIMDE_FLOAT64_C( -481.99)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_loadu2_m128d(test_vec[i].a, test_vec[i].b); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_loadu2_m128i(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m256i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 354008351), INT32_C( 1710178598), INT32_C( 1223789711), INT32_C(-1500329554)), simde_mm_set_epi32(INT32_C(-1388022686), INT32_C( -390861004), INT32_C( -560834160), INT32_C( 1618430517)), simde_mm256_set_epi32(INT32_C( 354008351), INT32_C( 1710178598), INT32_C( 1223789711), INT32_C(-1500329554), INT32_C(-1388022686), INT32_C( -390861004), INT32_C( -560834160), INT32_C( 1618430517)) }, { simde_mm_set_epi32(INT32_C(-2097010594), INT32_C(-1953861975), INT32_C( 1525655088), INT32_C(-1479248872)), simde_mm_set_epi32(INT32_C( -212387035), INT32_C( -783086135), INT32_C( -464607138), INT32_C( -807907186)), simde_mm256_set_epi32(INT32_C(-2097010594), INT32_C(-1953861975), INT32_C( 1525655088), INT32_C(-1479248872), INT32_C( -212387035), INT32_C( -783086135), INT32_C( -464607138), INT32_C( -807907186)) }, { simde_mm_set_epi32(INT32_C( 1556453306), INT32_C( -628648157), INT32_C(-1070645220), INT32_C( 1816365112)), simde_mm_set_epi32(INT32_C( -449670221), INT32_C( 758539132), INT32_C( 894912628), INT32_C( 2013246533)), simde_mm256_set_epi32(INT32_C( 1556453306), INT32_C( -628648157), INT32_C(-1070645220), INT32_C( 1816365112), INT32_C( -449670221), INT32_C( 758539132), INT32_C( 894912628), INT32_C( 2013246533)) }, { simde_mm_set_epi32(INT32_C( 973055118), INT32_C( 267011876), INT32_C( -970751985), INT32_C( -790620326)), simde_mm_set_epi32(INT32_C(-1774701032), INT32_C( 110651775), INT32_C(-2029162765), INT32_C( -644927818)), simde_mm256_set_epi32(INT32_C( 973055118), INT32_C( 267011876), INT32_C( -970751985), INT32_C( -790620326), INT32_C(-1774701032), INT32_C( 110651775), INT32_C(-2029162765), INT32_C( -644927818)) }, { simde_mm_set_epi32(INT32_C( 1343331807), INT32_C( -752743183), INT32_C( -212726727), INT32_C( 673547091)), simde_mm_set_epi32(INT32_C( 510472604), INT32_C( 30606375), INT32_C(-1460649586), INT32_C( -783315263)), simde_mm256_set_epi32(INT32_C( 1343331807), INT32_C( -752743183), INT32_C( -212726727), INT32_C( 673547091), INT32_C( 510472604), INT32_C( 30606375), INT32_C(-1460649586), INT32_C( -783315263)) }, { simde_mm_set_epi32(INT32_C( 1773008222), INT32_C( -172973908), INT32_C( -578745695), INT32_C( 1088863920)), simde_mm_set_epi32(INT32_C(-2064848056), INT32_C( 207858402), INT32_C(-1299831865), INT32_C(-1364624980)), simde_mm256_set_epi32(INT32_C( 1773008222), INT32_C( -172973908), INT32_C( -578745695), INT32_C( 1088863920), INT32_C(-2064848056), INT32_C( 207858402), INT32_C(-1299831865), INT32_C(-1364624980)) }, { simde_mm_set_epi32(INT32_C( -608977283), INT32_C(-1563798803), INT32_C(-1827655569), INT32_C( -382597224)), simde_mm_set_epi32(INT32_C(-1788804177), INT32_C(-1217503299), INT32_C( 57159833), INT32_C( -53652220)), simde_mm256_set_epi32(INT32_C( -608977283), INT32_C(-1563798803), INT32_C(-1827655569), INT32_C( -382597224), INT32_C(-1788804177), INT32_C(-1217503299), INT32_C( 57159833), INT32_C( -53652220)) }, { simde_mm_set_epi32(INT32_C( 2096190829), INT32_C( 255970451), INT32_C( 2016421031), INT32_C( -950647181)), simde_mm_set_epi32(INT32_C( -133085873), INT32_C(-1605552420), INT32_C( -147782601), INT32_C( -870212282)), simde_mm256_set_epi32(INT32_C( 2096190829), INT32_C( 255970451), INT32_C( 2016421031), INT32_C( -950647181), INT32_C( -133085873), INT32_C(-1605552420), INT32_C( -147782601), INT32_C( -870212282)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_loadu2_m128i(&(test_vec[i].a), &(test_vec[i].b)); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_maskload_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 mem_addr[2]; const int64_t mask[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -44.84), SIMDE_FLOAT64_C( -187.23) }, { INT64_C( 697350032114386965), -INT64_C( 6822977484778790260) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -187.23) } }, { { SIMDE_FLOAT64_C( -686.00), SIMDE_FLOAT64_C( -486.25) }, { -INT64_C( 9072093096164548123), INT64_C( 8577706021278762060) }, { SIMDE_FLOAT64_C( -686.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( 345.89), SIMDE_FLOAT64_C( -846.86) }, { INT64_C( 7283870107845829619), INT64_C( 5554042763219526763) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( -979.25), SIMDE_FLOAT64_C( -524.07) }, { INT64_C( 1733613083399169728), -INT64_C( 8664218374432089815) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -524.07) } }, { { SIMDE_FLOAT64_C( 327.25), SIMDE_FLOAT64_C( 112.82) }, { -INT64_C( 800106376127047672), INT64_C( 7751542069822355551) }, { SIMDE_FLOAT64_C( 327.25), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( 407.41), SIMDE_FLOAT64_C( -401.19) }, { -INT64_C( 9026860482835374478), -INT64_C( 1097169102464975702) }, { SIMDE_FLOAT64_C( 407.41), SIMDE_FLOAT64_C( -401.19) } }, { { SIMDE_FLOAT64_C( -14.88), SIMDE_FLOAT64_C( 573.00) }, { -INT64_C( 3084833370581537693), -INT64_C( 2835100346349403270) }, { SIMDE_FLOAT64_C( -14.88), SIMDE_FLOAT64_C( 573.00) } }, { { SIMDE_FLOAT64_C( -778.55), SIMDE_FLOAT64_C( 193.17) }, { INT64_C( 672843420433189374), -INT64_C( 7606477107942056835) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 193.17) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i mask = simde_x_mm_loadu_epi64(test_vec[i].mask); simde__m128d r = simde_mm_maskload_pd(test_vec[i].mem_addr, mask); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_maskload_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 mem_addr[4]; const int64_t mask[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( 845.03), SIMDE_FLOAT64_C( 274.61), SIMDE_FLOAT64_C( 515.17), SIMDE_FLOAT64_C( 654.86) }, { -INT64_C( 1562028826953646494), -INT64_C( 6547821859740641223), INT64_C( 5461221024099586812), -INT64_C( 6926067570004073380) }, { SIMDE_FLOAT64_C( 845.03), SIMDE_FLOAT64_C( 274.61), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 654.86) } }, { { SIMDE_FLOAT64_C( 87.04), SIMDE_FLOAT64_C( -185.45), SIMDE_FLOAT64_C( 566.76), SIMDE_FLOAT64_C( -222.61) }, { -INT64_C( 8140185020693102094), -INT64_C( 7935186431243966026), -INT64_C( 3692834531731199052), -INT64_C( 687403654194683627) }, { SIMDE_FLOAT64_C( 87.04), SIMDE_FLOAT64_C( -185.45), SIMDE_FLOAT64_C( 566.76), SIMDE_FLOAT64_C( -222.61) } }, { { SIMDE_FLOAT64_C( -387.34), SIMDE_FLOAT64_C( 667.72), SIMDE_FLOAT64_C( 351.98), SIMDE_FLOAT64_C( 185.90) }, { INT64_C( 5746656153388198486), -INT64_C( 2698573944803254074), -INT64_C( 938136386737386456), -INT64_C( 2523130118312267541) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 667.72), SIMDE_FLOAT64_C( 351.98), SIMDE_FLOAT64_C( 185.90) } }, { { SIMDE_FLOAT64_C( -78.35), SIMDE_FLOAT64_C( -352.03), SIMDE_FLOAT64_C( 326.83), SIMDE_FLOAT64_C( 368.88) }, { -INT64_C( 7675339611453347526), INT64_C( 4744848230774212468), -INT64_C( 3847732952440777688), -INT64_C( 3251158471971203291) }, { SIMDE_FLOAT64_C( -78.35), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 326.83), SIMDE_FLOAT64_C( 368.88) } }, { { SIMDE_FLOAT64_C( -253.08), SIMDE_FLOAT64_C( 10.09), SIMDE_FLOAT64_C( 790.44), SIMDE_FLOAT64_C( -217.02) }, { -INT64_C( 6233112357282165138), INT64_C( 8276240822704953760), -INT64_C( 6505481490158291400), -INT64_C( 4741646846794426252) }, { SIMDE_FLOAT64_C( -253.08), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 790.44), SIMDE_FLOAT64_C( -217.02) } }, { { SIMDE_FLOAT64_C( 308.28), SIMDE_FLOAT64_C( -190.54), SIMDE_FLOAT64_C( -550.36), SIMDE_FLOAT64_C( -303.22) }, { -INT64_C( 6395814632349097515), -INT64_C( 7263366602557941603), -INT64_C( 3216775732650775751), -INT64_C( 5871229529546912511) }, { SIMDE_FLOAT64_C( 308.28), SIMDE_FLOAT64_C( -190.54), SIMDE_FLOAT64_C( -550.36), SIMDE_FLOAT64_C( -303.22) } }, { { SIMDE_FLOAT64_C( 657.64), SIMDE_FLOAT64_C( 674.06), SIMDE_FLOAT64_C( 624.26), SIMDE_FLOAT64_C( 941.85) }, { INT64_C( 508501554445574299), -INT64_C( 5706936849136467483), -INT64_C( 8199905151792502630), INT64_C( 8100955200803354953) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 674.06), SIMDE_FLOAT64_C( 624.26), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( 517.21), SIMDE_FLOAT64_C( 15.58), SIMDE_FLOAT64_C( 172.93), SIMDE_FLOAT64_C( -730.24) }, { INT64_C( 2699593483387123569), INT64_C( 2376308967288947396), -INT64_C( 4803955517750890898), INT64_C( 1459965220665278538) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 172.93), SIMDE_FLOAT64_C( 0.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i mask = simde_x_mm256_loadu_epi64(test_vec[i].mask); simde__m256d r = simde_mm256_maskload_pd(test_vec[i].mem_addr, mask); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_maskload_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 mem_addr[4]; const int32_t mask[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -560.98), SIMDE_FLOAT32_C( 330.23), SIMDE_FLOAT32_C( -571.08), SIMDE_FLOAT32_C( -900.52) }, { INT32_C( 552414127), -INT32_C( 630594570), -INT32_C( 1291956017), -INT32_C( 1030767749) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 330.23), SIMDE_FLOAT32_C( -571.08), SIMDE_FLOAT32_C( -900.52) } }, { { SIMDE_FLOAT32_C( -52.84), SIMDE_FLOAT32_C( -695.38), SIMDE_FLOAT32_C( -631.11), SIMDE_FLOAT32_C( 296.05) }, { -INT32_C( 1978494141), INT32_C( 1682818151), INT32_C( 2012703432), -INT32_C( 1952979819) }, { SIMDE_FLOAT32_C( -52.84), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 296.05) } }, { { SIMDE_FLOAT32_C( 460.61), SIMDE_FLOAT32_C( -394.20), SIMDE_FLOAT32_C( 440.23), SIMDE_FLOAT32_C( -609.13) }, { -INT32_C( 867605424), INT32_C( 42915871), -INT32_C( 268280147), INT32_C( 1501238513) }, { SIMDE_FLOAT32_C( 460.61), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 440.23), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( 481.64), SIMDE_FLOAT32_C( 724.40), SIMDE_FLOAT32_C( -863.54), SIMDE_FLOAT32_C( 137.47) }, { -INT32_C( 954682062), INT32_C( 1632874393), -INT32_C( 542289), INT32_C( 1019953181) }, { SIMDE_FLOAT32_C( 481.64), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -863.54), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( 811.63), SIMDE_FLOAT32_C( -828.49), SIMDE_FLOAT32_C( 881.09), SIMDE_FLOAT32_C( -936.46) }, { -INT32_C( 1447608137), INT32_C( 771895893), INT32_C( 768589818), INT32_C( 250931060) }, { SIMDE_FLOAT32_C( 811.63), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( -741.71), SIMDE_FLOAT32_C( -343.75), SIMDE_FLOAT32_C( -821.30), SIMDE_FLOAT32_C( 50.98) }, { INT32_C( 491021824), -INT32_C( 1067904857), INT32_C( 562468969), INT32_C( 768228824) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -343.75), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( 335.89), SIMDE_FLOAT32_C( 701.50), SIMDE_FLOAT32_C( -340.99), SIMDE_FLOAT32_C( -135.85) }, { INT32_C( 9906827), -INT32_C( 1492219119), -INT32_C( 739476013), -INT32_C( 1963904541) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 701.50), SIMDE_FLOAT32_C( -340.99), SIMDE_FLOAT32_C( -135.85) } }, { { SIMDE_FLOAT32_C( 167.10), SIMDE_FLOAT32_C( 398.88), SIMDE_FLOAT32_C( -514.86), SIMDE_FLOAT32_C( 423.86) }, { -INT32_C( 1144270366), INT32_C( 2129171726), -INT32_C( 303545247), -INT32_C( 2131918994) }, { SIMDE_FLOAT32_C( 167.10), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -514.86), SIMDE_FLOAT32_C( 423.86) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i mask = simde_x_mm_loadu_epi32(test_vec[i].mask); simde__m128 r = simde_mm_maskload_ps(test_vec[i].mem_addr, mask); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_maskload_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 mem_addr[8]; const int32_t mask[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( -588.55), SIMDE_FLOAT32_C( -156.14), SIMDE_FLOAT32_C( 765.50), SIMDE_FLOAT32_C( -514.50), SIMDE_FLOAT32_C( 262.17), SIMDE_FLOAT32_C( -363.89), SIMDE_FLOAT32_C( -808.48), SIMDE_FLOAT32_C( 781.30) }, { -INT32_C( 576220470), -INT32_C( 1010639970), INT32_C( 1590785915), -INT32_C( 714613675), -INT32_C( 779948395), -INT32_C( 245937156), -INT32_C( 509561887), -INT32_C( 616351727) }, { SIMDE_FLOAT32_C( -588.55), SIMDE_FLOAT32_C( -156.14), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -514.50), SIMDE_FLOAT32_C( 262.17), SIMDE_FLOAT32_C( -363.89), SIMDE_FLOAT32_C( -808.48), SIMDE_FLOAT32_C( 781.30) } }, { { SIMDE_FLOAT32_C( 420.97), SIMDE_FLOAT32_C( -838.24), SIMDE_FLOAT32_C( -392.68), SIMDE_FLOAT32_C( 299.50), SIMDE_FLOAT32_C( 207.95), SIMDE_FLOAT32_C( 278.96), SIMDE_FLOAT32_C( -847.51), SIMDE_FLOAT32_C( 417.49) }, { INT32_C( 1184956145), INT32_C( 1880819674), -INT32_C( 247357707), -INT32_C( 891119127), INT32_C( 1571521100), -INT32_C( 2059800645), -INT32_C( 1594953254), INT32_C( 1575099244) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -392.68), SIMDE_FLOAT32_C( 299.50), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 278.96), SIMDE_FLOAT32_C( -847.51), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( 165.20), SIMDE_FLOAT32_C( -625.22), SIMDE_FLOAT32_C( -748.60), SIMDE_FLOAT32_C( -111.47), SIMDE_FLOAT32_C( 463.62), SIMDE_FLOAT32_C( -236.00), SIMDE_FLOAT32_C( -639.66), SIMDE_FLOAT32_C( -407.20) }, { INT32_C( 1181787485), -INT32_C( 1978576322), -INT32_C( 1863795499), -INT32_C( 2062212693), INT32_C( 2116420626), INT32_C( 953944095), INT32_C( 338395275), -INT32_C( 1651273921) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -625.22), SIMDE_FLOAT32_C( -748.60), SIMDE_FLOAT32_C( -111.47), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -407.20) } }, { { SIMDE_FLOAT32_C( -291.43), SIMDE_FLOAT32_C( -760.80), SIMDE_FLOAT32_C( 348.26), SIMDE_FLOAT32_C( -222.79), SIMDE_FLOAT32_C( -485.28), SIMDE_FLOAT32_C( 543.93), SIMDE_FLOAT32_C( -34.22), SIMDE_FLOAT32_C( -759.27) }, { INT32_C( 1555752113), INT32_C( 417517573), -INT32_C( 208271148), -INT32_C( 1691651568), INT32_C( 850417394), INT32_C( 433013733), -INT32_C( 1685343674), INT32_C( 1489406119) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 348.26), SIMDE_FLOAT32_C( -222.79), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -34.22), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( 321.91), SIMDE_FLOAT32_C( 814.50), SIMDE_FLOAT32_C( 803.71), SIMDE_FLOAT32_C( 833.48), SIMDE_FLOAT32_C( 558.02), SIMDE_FLOAT32_C( 442.93), SIMDE_FLOAT32_C( -87.03), SIMDE_FLOAT32_C( 798.77) }, { -INT32_C( 1357438818), -INT32_C( 2058730861), INT32_C( 2125986457), -INT32_C( 2087156163), -INT32_C( 534830279), INT32_C( 406381995), -INT32_C( 1232409499), INT32_C( 567940227) }, { SIMDE_FLOAT32_C( 321.91), SIMDE_FLOAT32_C( 814.50), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 833.48), SIMDE_FLOAT32_C( 558.02), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -87.03), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( -551.06), SIMDE_FLOAT32_C( 227.15), SIMDE_FLOAT32_C( -106.44), SIMDE_FLOAT32_C( 271.24), SIMDE_FLOAT32_C( 412.99), SIMDE_FLOAT32_C( -27.66), SIMDE_FLOAT32_C( 187.06), SIMDE_FLOAT32_C( 798.87) }, { INT32_C( 1397490709), -INT32_C( 992549749), -INT32_C( 1281035001), INT32_C( 1087102170), INT32_C( 1274435016), -INT32_C( 1653747607), -INT32_C( 144359998), INT32_C( 1858384472) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 227.15), SIMDE_FLOAT32_C( -106.44), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -27.66), SIMDE_FLOAT32_C( 187.06), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( 516.26), SIMDE_FLOAT32_C( -512.15), SIMDE_FLOAT32_C( 869.28), SIMDE_FLOAT32_C( -650.32), SIMDE_FLOAT32_C( -650.22), SIMDE_FLOAT32_C( -362.47), SIMDE_FLOAT32_C( 974.91), SIMDE_FLOAT32_C( 374.68) }, { INT32_C( 1739565453), INT32_C( 1319598725), INT32_C( 949591503), INT32_C( 819267182), -INT32_C( 1675150780), -INT32_C( 1559565076), -INT32_C( 236663812), -INT32_C( 252843421) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -650.22), SIMDE_FLOAT32_C( -362.47), SIMDE_FLOAT32_C( 974.91), SIMDE_FLOAT32_C( 374.68) } }, { { SIMDE_FLOAT32_C( -61.68), SIMDE_FLOAT32_C( 929.24), SIMDE_FLOAT32_C( 912.15), SIMDE_FLOAT32_C( -644.38), SIMDE_FLOAT32_C( -898.12), SIMDE_FLOAT32_C( -627.24), SIMDE_FLOAT32_C( 292.92), SIMDE_FLOAT32_C( 202.99) }, { INT32_C( 169672860), INT32_C( 1262219783), INT32_C( 434594349), INT32_C( 1270673998), INT32_C( 540844477), INT32_C( 504375691), -INT32_C( 617125691), INT32_C( 62955111) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 292.92), SIMDE_FLOAT32_C( 0.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i mask = simde_x_mm256_loadu_epi32(test_vec[i].mask); simde__m256 r = simde_mm256_maskload_ps(test_vec[i].mem_addr, mask); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_maskstore_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128i mask; double ri[2]; double ro[2]; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 291.40), SIMDE_FLOAT64_C( -747.46)), simde_mm_set_epi64x(INT64_C( -901196363302656956), INT64_C( 423467829629286510)), { SIMDE_FLOAT64_C( -279.11), SIMDE_FLOAT64_C( -707.31) }, { SIMDE_FLOAT64_C( -279.11), SIMDE_FLOAT64_C( 291.40) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 36.11), SIMDE_FLOAT64_C( -279.03)), simde_mm_set_epi64x(INT64_C(-5374148835716618800), INT64_C( 4687824648494664977)), { SIMDE_FLOAT64_C( -513.32), SIMDE_FLOAT64_C( 997.01) }, { SIMDE_FLOAT64_C( -513.32), SIMDE_FLOAT64_C( 36.11) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 252.18), SIMDE_FLOAT64_C( -396.70)), simde_mm_set_epi64x(INT64_C(-2340838553401196290), INT64_C(-8255671198755410933)), { SIMDE_FLOAT64_C( -313.87), SIMDE_FLOAT64_C( 648.77) }, { SIMDE_FLOAT64_C( -396.70), SIMDE_FLOAT64_C( 252.18) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -154.32), SIMDE_FLOAT64_C( 707.71)), simde_mm_set_epi64x(INT64_C( -336577207510206055), INT64_C(-8731515008786621717)), { SIMDE_FLOAT64_C( -956.34), SIMDE_FLOAT64_C( 661.79) }, { SIMDE_FLOAT64_C( 707.71), SIMDE_FLOAT64_C( -154.32) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -482.35), SIMDE_FLOAT64_C( 870.14)), simde_mm_set_epi64x(INT64_C(-6428326320006280400), INT64_C( 2370968363897859860)), { SIMDE_FLOAT64_C( -88.00), SIMDE_FLOAT64_C( -393.63) }, { SIMDE_FLOAT64_C( -88.00), SIMDE_FLOAT64_C( -482.35) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -798.76), SIMDE_FLOAT64_C( 378.53)), simde_mm_set_epi64x(INT64_C( 2158346412704669322), INT64_C(-1741710562990070947)), { SIMDE_FLOAT64_C( -459.19), SIMDE_FLOAT64_C( -261.60) }, { SIMDE_FLOAT64_C( 378.53), SIMDE_FLOAT64_C( -261.60) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 47.39), SIMDE_FLOAT64_C( 457.82)), simde_mm_set_epi64x(INT64_C( 3118706775454689373), INT64_C(-4723277293636004112)), { SIMDE_FLOAT64_C( -206.48), SIMDE_FLOAT64_C( 663.61) }, { SIMDE_FLOAT64_C( 457.82), SIMDE_FLOAT64_C( 663.61) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -497.78), SIMDE_FLOAT64_C( -27.40)), simde_mm_set_epi64x(INT64_C(-5481981628135809029), INT64_C(-7037919562781567894)), { SIMDE_FLOAT64_C( 54.28), SIMDE_FLOAT64_C( -36.05) }, { SIMDE_FLOAT64_C( -27.40), SIMDE_FLOAT64_C( -497.78) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { double r[2] = { test_vec[i].ri[0], test_vec[i].ri[1], }; simde_memcpy(r, test_vec[i].ri, sizeof(r)); simde_mm_maskstore_pd(r, test_vec[i].mask, test_vec[i].a); simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].ro, 1); } return 0; } static int test_simde_mm256_maskstore_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256i mask; simde_float64 ri[4]; simde_float64 ro[4]; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 256.10), SIMDE_FLOAT64_C( 343.75), SIMDE_FLOAT64_C( -441.90), SIMDE_FLOAT64_C( 609.80)), simde_mm256_set_epi64x(INT64_C( 4260458650207424972), INT64_C( 7445494124920454187), INT64_C( 3286955945790099662), INT64_C(-7285974739268381254)), { SIMDE_FLOAT64_C( -289.65), SIMDE_FLOAT64_C( 426.76), SIMDE_FLOAT64_C( -9.11), SIMDE_FLOAT64_C( -274.93) }, { SIMDE_FLOAT64_C( 609.80), SIMDE_FLOAT64_C( 426.76), SIMDE_FLOAT64_C( -9.11), SIMDE_FLOAT64_C( -274.93) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 571.67), SIMDE_FLOAT64_C( 518.20), SIMDE_FLOAT64_C( -98.22), SIMDE_FLOAT64_C( -751.94)), simde_mm256_set_epi64x(INT64_C( 1638253588391173148), INT64_C( 1793291230565330203), INT64_C(-9111784699029565866), INT64_C( -227326109536357972)), { SIMDE_FLOAT64_C( -486.54), SIMDE_FLOAT64_C( 729.14), SIMDE_FLOAT64_C( -705.07), SIMDE_FLOAT64_C( -433.33) }, { SIMDE_FLOAT64_C( -751.94), SIMDE_FLOAT64_C( -98.22), SIMDE_FLOAT64_C( -705.07), SIMDE_FLOAT64_C( -433.33) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 102.13), SIMDE_FLOAT64_C( -818.78), SIMDE_FLOAT64_C( -736.90), SIMDE_FLOAT64_C( -616.20)), simde_mm256_set_epi64x(INT64_C(-2892759574131760065), INT64_C(-3440936018861750870), INT64_C( -525494054977382250), INT64_C(-2782562282709585632)), { SIMDE_FLOAT64_C( -465.13), SIMDE_FLOAT64_C( 232.40), SIMDE_FLOAT64_C( -478.53), SIMDE_FLOAT64_C( -53.86) }, { SIMDE_FLOAT64_C( -616.20), SIMDE_FLOAT64_C( -736.90), SIMDE_FLOAT64_C( -818.78), SIMDE_FLOAT64_C( 102.13) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 748.35), SIMDE_FLOAT64_C( 412.39), SIMDE_FLOAT64_C( 180.73), SIMDE_FLOAT64_C( -772.53)), simde_mm256_set_epi64x(INT64_C(-7252528024816875961), INT64_C( 3768666429054031776), INT64_C(-5502653220660844988), INT64_C( 6869378166726947276)), { SIMDE_FLOAT64_C( -276.66), SIMDE_FLOAT64_C( -248.39), SIMDE_FLOAT64_C( -589.21), SIMDE_FLOAT64_C( 826.33) }, { SIMDE_FLOAT64_C( -276.66), SIMDE_FLOAT64_C( 180.73), SIMDE_FLOAT64_C( -589.21), SIMDE_FLOAT64_C( 748.35) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 837.77), SIMDE_FLOAT64_C( 582.83), SIMDE_FLOAT64_C( 901.95), SIMDE_FLOAT64_C( 440.28)), simde_mm256_set_epi64x(INT64_C( 7132263712774217761), INT64_C( 7513359651930322343), INT64_C( 7271276353319921669), INT64_C(-6353645951073475265)), { SIMDE_FLOAT64_C( 194.29), SIMDE_FLOAT64_C( -702.43), SIMDE_FLOAT64_C( 663.08), SIMDE_FLOAT64_C( -837.37) }, { SIMDE_FLOAT64_C( 440.28), SIMDE_FLOAT64_C( -702.43), SIMDE_FLOAT64_C( 663.08), SIMDE_FLOAT64_C( -837.37) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -476.01), SIMDE_FLOAT64_C( 964.03), SIMDE_FLOAT64_C( -620.01), SIMDE_FLOAT64_C( -190.51)), simde_mm256_set_epi64x(INT64_C(-6904573933630117437), INT64_C( 8435505992452950995), INT64_C(-2030909113789010322), INT64_C(-3516031824252737762)), { SIMDE_FLOAT64_C( 412.73), SIMDE_FLOAT64_C( -375.82), SIMDE_FLOAT64_C( 493.97), SIMDE_FLOAT64_C( -325.91) }, { SIMDE_FLOAT64_C( -190.51), SIMDE_FLOAT64_C( -620.01), SIMDE_FLOAT64_C( 493.97), SIMDE_FLOAT64_C( -476.01) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 109.41), SIMDE_FLOAT64_C( -105.03), SIMDE_FLOAT64_C( 942.38), SIMDE_FLOAT64_C( 492.62)), simde_mm256_set_epi64x(INT64_C(-4507038716603653937), INT64_C(-5597740526711762453), INT64_C(-4892847490676269188), INT64_C(-2050275303632712946)), { SIMDE_FLOAT64_C( 999.52), SIMDE_FLOAT64_C( 91.29), SIMDE_FLOAT64_C( -389.17), SIMDE_FLOAT64_C( -828.90) }, { SIMDE_FLOAT64_C( 492.62), SIMDE_FLOAT64_C( 942.38), SIMDE_FLOAT64_C( -105.03), SIMDE_FLOAT64_C( 109.41) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -984.18), SIMDE_FLOAT64_C( 526.23), SIMDE_FLOAT64_C( 210.69), SIMDE_FLOAT64_C( -960.42)), simde_mm256_set_epi64x(INT64_C(-2327918596051776606), INT64_C(-6284034566091225578), INT64_C( 5326594562181579270), INT64_C( 7458656096830697285)), { SIMDE_FLOAT64_C( 719.28), SIMDE_FLOAT64_C( -272.00), SIMDE_FLOAT64_C( -305.09), SIMDE_FLOAT64_C( 995.54) }, { SIMDE_FLOAT64_C( 719.28), SIMDE_FLOAT64_C( -272.00), SIMDE_FLOAT64_C( 526.23), SIMDE_FLOAT64_C( -984.18) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float64 r[4]; simde_memcpy(r, test_vec[i].ri, sizeof(r)); simde_mm256_maskstore_pd(r, test_vec[i].mask, test_vec[i].a); simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].ro, 1); } return 0; } static int test_simde_mm_maskstore_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128i mask; simde_float32 ri[4]; simde_float32 ro[4]; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 674.56), SIMDE_FLOAT32_C( -800.90), SIMDE_FLOAT32_C( -515.11), SIMDE_FLOAT32_C( 918.12)), simde_mm_set_epi32(INT32_C( 1108579007), INT32_C( 1980053353), INT32_C( 1803323457), INT32_C( -407836103)), { SIMDE_FLOAT32_C( -619.39), SIMDE_FLOAT32_C( -235.61), SIMDE_FLOAT32_C( -100.19), SIMDE_FLOAT32_C( 132.85) }, { SIMDE_FLOAT32_C( 918.12), SIMDE_FLOAT32_C( -235.61), SIMDE_FLOAT32_C( -100.19), SIMDE_FLOAT32_C( 132.85) } }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -921.95), SIMDE_FLOAT32_C( -150.48), SIMDE_FLOAT32_C( -68.61), SIMDE_FLOAT32_C( 516.22)), simde_mm_set_epi32(INT32_C( 992186029), INT32_C( 1116343160), INT32_C( 312629428), INT32_C( -102955009)), { SIMDE_FLOAT32_C( 590.22), SIMDE_FLOAT32_C( 90.53), SIMDE_FLOAT32_C( -38.56), SIMDE_FLOAT32_C( 730.28) }, { SIMDE_FLOAT32_C( 516.22), SIMDE_FLOAT32_C( 90.53), SIMDE_FLOAT32_C( -38.56), SIMDE_FLOAT32_C( 730.28) } }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -641.93), SIMDE_FLOAT32_C( 207.52), SIMDE_FLOAT32_C( -794.64), SIMDE_FLOAT32_C( 604.13)), simde_mm_set_epi32(INT32_C( -917957115), INT32_C( 1866845223), INT32_C( 844085971), INT32_C( -482023893)), { SIMDE_FLOAT32_C( -925.57), SIMDE_FLOAT32_C( 8.11), SIMDE_FLOAT32_C( -375.92), SIMDE_FLOAT32_C( -370.15) }, { SIMDE_FLOAT32_C( 604.13), SIMDE_FLOAT32_C( 8.11), SIMDE_FLOAT32_C( -375.92), SIMDE_FLOAT32_C( -641.93) } }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 805.10), SIMDE_FLOAT32_C( -153.46), SIMDE_FLOAT32_C( 308.00), SIMDE_FLOAT32_C( -159.40)), simde_mm_set_epi32(INT32_C( -582979650), INT32_C( 2118735836), INT32_C( -687047741), INT32_C( -848367450)), { SIMDE_FLOAT32_C( 678.23), SIMDE_FLOAT32_C( 517.86), SIMDE_FLOAT32_C( 930.59), SIMDE_FLOAT32_C( -376.44) }, { SIMDE_FLOAT32_C( -159.40), SIMDE_FLOAT32_C( 308.00), SIMDE_FLOAT32_C( 930.59), SIMDE_FLOAT32_C( 805.10) } }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 286.93), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( -966.59), SIMDE_FLOAT32_C( -652.85)), simde_mm_set_epi32(INT32_C( -773117066), INT32_C(-1995762340), INT32_C(-1086112436), INT32_C( 352565673)), { SIMDE_FLOAT32_C( -243.02), SIMDE_FLOAT32_C( -10.35), SIMDE_FLOAT32_C( -930.64), SIMDE_FLOAT32_C( -942.71) }, { SIMDE_FLOAT32_C( -243.02), SIMDE_FLOAT32_C( -966.59), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( 286.93) } }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 695.71), SIMDE_FLOAT32_C( 544.01), SIMDE_FLOAT32_C( -155.11), SIMDE_FLOAT32_C( 773.52)), simde_mm_set_epi32(INT32_C( 2130523937), INT32_C( 959365319), INT32_C( -87305215), INT32_C( 336137071)), { SIMDE_FLOAT32_C( 331.75), SIMDE_FLOAT32_C( 641.50), SIMDE_FLOAT32_C( -114.90), SIMDE_FLOAT32_C( 582.07) }, { SIMDE_FLOAT32_C( 331.75), SIMDE_FLOAT32_C( -155.11), SIMDE_FLOAT32_C( -114.90), SIMDE_FLOAT32_C( 582.07) } }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -51.27), SIMDE_FLOAT32_C( 493.10), SIMDE_FLOAT32_C( -182.48), SIMDE_FLOAT32_C( -902.32)), simde_mm_set_epi32(INT32_C( -776181519), INT32_C(-1636897440), INT32_C( -492655883), INT32_C(-1758902344)), { SIMDE_FLOAT32_C( -81.09), SIMDE_FLOAT32_C( -980.86), SIMDE_FLOAT32_C( -619.01), SIMDE_FLOAT32_C( -490.33) }, { SIMDE_FLOAT32_C( -902.32), SIMDE_FLOAT32_C( -182.48), SIMDE_FLOAT32_C( 493.10), SIMDE_FLOAT32_C( -51.27) } }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 432.35), SIMDE_FLOAT32_C( 341.26), SIMDE_FLOAT32_C( 183.18), SIMDE_FLOAT32_C( -136.49)), simde_mm_set_epi32(INT32_C( 1637126517), INT32_C(-1887081950), INT32_C( 1956680612), INT32_C( 1224753500)), { SIMDE_FLOAT32_C( -471.44), SIMDE_FLOAT32_C( 241.66), SIMDE_FLOAT32_C( -19.61), SIMDE_FLOAT32_C( -311.97) }, { SIMDE_FLOAT32_C( -471.44), SIMDE_FLOAT32_C( 241.66), SIMDE_FLOAT32_C( 341.26), SIMDE_FLOAT32_C( -311.97) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float32 r[4]; simde_memcpy(r, test_vec[i].ri, sizeof(r)); simde_mm_maskstore_ps(r, test_vec[i].mask, test_vec[i].a); simde_assert_equal_vf32(sizeof(r) / sizeof(r[0]), r, test_vec[i].ro, 1); } return 0; } static int test_simde_mm256_maskstore_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256i mask; simde_float32 ri[8]; simde_float32 ro[8]; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 631.62), SIMDE_FLOAT32_C( -891.94), SIMDE_FLOAT32_C( -689.27), SIMDE_FLOAT32_C( 347.81), SIMDE_FLOAT32_C( -616.22), SIMDE_FLOAT32_C( 642.58), SIMDE_FLOAT32_C( 228.19), SIMDE_FLOAT32_C( -205.29)), simde_mm256_set_epi32(INT32_C( 295073064), INT32_C( 716384814), INT32_C( 546124227), INT32_C( -305648391), INT32_C(-1290025628), INT32_C(-1970079627), INT32_C( -571027584), INT32_C( 423261258)), { SIMDE_FLOAT32_C( -608.38), SIMDE_FLOAT32_C( 456.15), SIMDE_FLOAT32_C( 520.16), SIMDE_FLOAT32_C( 784.51), SIMDE_FLOAT32_C( 874.80), SIMDE_FLOAT32_C( -683.96), SIMDE_FLOAT32_C( -492.84), SIMDE_FLOAT32_C( -304.46) }, { SIMDE_FLOAT32_C( -608.38), SIMDE_FLOAT32_C( 228.19), SIMDE_FLOAT32_C( 642.58), SIMDE_FLOAT32_C( -616.22), SIMDE_FLOAT32_C( 347.81), SIMDE_FLOAT32_C( -683.96), SIMDE_FLOAT32_C( -492.84), SIMDE_FLOAT32_C( -304.46) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 410.61), SIMDE_FLOAT32_C( -548.69), SIMDE_FLOAT32_C( -844.69), SIMDE_FLOAT32_C( 115.46), SIMDE_FLOAT32_C( -883.40), SIMDE_FLOAT32_C( -942.83), SIMDE_FLOAT32_C( 299.17), SIMDE_FLOAT32_C( 463.83)), simde_mm256_set_epi32(INT32_C( 162007636), INT32_C( 1372837309), INT32_C( 1276687632), INT32_C(-1819337795), INT32_C(-2027923298), INT32_C( 531821300), INT32_C( 1062735782), INT32_C( -103135294)), { SIMDE_FLOAT32_C( -186.23), SIMDE_FLOAT32_C( 411.88), SIMDE_FLOAT32_C( -764.36), SIMDE_FLOAT32_C( 765.54), SIMDE_FLOAT32_C( -288.35), SIMDE_FLOAT32_C( 486.60), SIMDE_FLOAT32_C( -405.36), SIMDE_FLOAT32_C( 812.05) }, { SIMDE_FLOAT32_C( 463.83), SIMDE_FLOAT32_C( 411.88), SIMDE_FLOAT32_C( -764.36), SIMDE_FLOAT32_C( -883.40), SIMDE_FLOAT32_C( 115.46), SIMDE_FLOAT32_C( 486.60), SIMDE_FLOAT32_C( -405.36), SIMDE_FLOAT32_C( 812.05) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -783.01), SIMDE_FLOAT32_C( -741.62), SIMDE_FLOAT32_C( -384.48), SIMDE_FLOAT32_C( 584.48), SIMDE_FLOAT32_C( -995.40), SIMDE_FLOAT32_C( -545.74), SIMDE_FLOAT32_C( -304.24), SIMDE_FLOAT32_C( 611.74)), simde_mm256_set_epi32(INT32_C(-1838874946), INT32_C(-2030629043), INT32_C( 38494100), INT32_C( -7221084), INT32_C(-1092763998), INT32_C( 39946466), INT32_C( -239157020), INT32_C(-1142390879)), { SIMDE_FLOAT32_C( -415.76), SIMDE_FLOAT32_C( 542.61), SIMDE_FLOAT32_C( 568.26), SIMDE_FLOAT32_C( 280.16), SIMDE_FLOAT32_C( 662.20), SIMDE_FLOAT32_C( -14.92), SIMDE_FLOAT32_C( 639.71), SIMDE_FLOAT32_C( -773.09) }, { SIMDE_FLOAT32_C( 611.74), SIMDE_FLOAT32_C( -304.24), SIMDE_FLOAT32_C( 568.26), SIMDE_FLOAT32_C( -995.40), SIMDE_FLOAT32_C( 584.48), SIMDE_FLOAT32_C( -14.92), SIMDE_FLOAT32_C( -741.62), SIMDE_FLOAT32_C( -783.01) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 921.86), SIMDE_FLOAT32_C( -688.86), SIMDE_FLOAT32_C( 43.96), SIMDE_FLOAT32_C( 754.20), SIMDE_FLOAT32_C( -480.27), SIMDE_FLOAT32_C( -645.94), SIMDE_FLOAT32_C( 315.20), SIMDE_FLOAT32_C( 726.23)), simde_mm256_set_epi32(INT32_C( -956355020), INT32_C( -805184504), INT32_C( -5391233), INT32_C( 154150621), INT32_C( -322849130), INT32_C(-1596216639), INT32_C( -653101729), INT32_C( -652476461)), { SIMDE_FLOAT32_C( -76.23), SIMDE_FLOAT32_C( -18.44), SIMDE_FLOAT32_C( 771.31), SIMDE_FLOAT32_C( 206.00), SIMDE_FLOAT32_C( 650.40), SIMDE_FLOAT32_C( -69.08), SIMDE_FLOAT32_C( -737.29), SIMDE_FLOAT32_C( 222.97) }, { SIMDE_FLOAT32_C( 726.23), SIMDE_FLOAT32_C( 315.20), SIMDE_FLOAT32_C( -645.94), SIMDE_FLOAT32_C( -480.27), SIMDE_FLOAT32_C( 650.40), SIMDE_FLOAT32_C( 43.96), SIMDE_FLOAT32_C( -688.86), SIMDE_FLOAT32_C( 921.86) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 606.49), SIMDE_FLOAT32_C( 777.74), SIMDE_FLOAT32_C( -66.55), SIMDE_FLOAT32_C( -42.93), SIMDE_FLOAT32_C( 40.92), SIMDE_FLOAT32_C( -104.82), SIMDE_FLOAT32_C( 745.57), SIMDE_FLOAT32_C( -526.15)), simde_mm256_set_epi32(INT32_C( 1494400292), INT32_C( 898207849), INT32_C( 499297865), INT32_C( 32131455), INT32_C( 837039755), INT32_C( 686948685), INT32_C( 1413261791), INT32_C( 1091352937)), { SIMDE_FLOAT32_C( -911.46), SIMDE_FLOAT32_C( 181.07), SIMDE_FLOAT32_C( 60.18), SIMDE_FLOAT32_C( 299.13), SIMDE_FLOAT32_C( -412.14), SIMDE_FLOAT32_C( -496.33), SIMDE_FLOAT32_C( 300.62), SIMDE_FLOAT32_C( -738.40) }, { SIMDE_FLOAT32_C( -911.46), SIMDE_FLOAT32_C( 181.07), SIMDE_FLOAT32_C( 60.18), SIMDE_FLOAT32_C( 299.13), SIMDE_FLOAT32_C( -412.14), SIMDE_FLOAT32_C( -496.33), SIMDE_FLOAT32_C( 300.62), SIMDE_FLOAT32_C( -738.40) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -418.84), SIMDE_FLOAT32_C( 873.25), SIMDE_FLOAT32_C( 510.96), SIMDE_FLOAT32_C( 992.60), SIMDE_FLOAT32_C( -316.31), SIMDE_FLOAT32_C( 91.31), SIMDE_FLOAT32_C( 578.46), SIMDE_FLOAT32_C( -878.58)), simde_mm256_set_epi32(INT32_C( 957079452), INT32_C( 1280918142), INT32_C( 1743745557), INT32_C( 1819407670), INT32_C( 242366822), INT32_C( 1212388671), INT32_C(-1368211077), INT32_C( 1178548564)), { SIMDE_FLOAT32_C( 268.38), SIMDE_FLOAT32_C( 166.26), SIMDE_FLOAT32_C( -817.18), SIMDE_FLOAT32_C( -906.27), SIMDE_FLOAT32_C( 690.97), SIMDE_FLOAT32_C( 40.42), SIMDE_FLOAT32_C( 937.35), SIMDE_FLOAT32_C( -494.05) }, { SIMDE_FLOAT32_C( 268.38), SIMDE_FLOAT32_C( 578.46), SIMDE_FLOAT32_C( -817.18), SIMDE_FLOAT32_C( -906.27), SIMDE_FLOAT32_C( 690.97), SIMDE_FLOAT32_C( 40.42), SIMDE_FLOAT32_C( 937.35), SIMDE_FLOAT32_C( -494.05) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 991.62), SIMDE_FLOAT32_C( 121.11), SIMDE_FLOAT32_C( 504.72), SIMDE_FLOAT32_C( 982.21), SIMDE_FLOAT32_C( 263.79), SIMDE_FLOAT32_C( 803.03), SIMDE_FLOAT32_C( 92.44), SIMDE_FLOAT32_C( -807.69)), simde_mm256_set_epi32(INT32_C(-2097969116), INT32_C(-2051872419), INT32_C( 269695043), INT32_C( -952585033), INT32_C( 1293504381), INT32_C( -196806212), INT32_C(-2045108827), INT32_C( 1173779579)), { SIMDE_FLOAT32_C( 292.67), SIMDE_FLOAT32_C( 917.15), SIMDE_FLOAT32_C( 90.37), SIMDE_FLOAT32_C( 166.43), SIMDE_FLOAT32_C( 627.88), SIMDE_FLOAT32_C( -780.11), SIMDE_FLOAT32_C( -304.67), SIMDE_FLOAT32_C( -518.81) }, { SIMDE_FLOAT32_C( 292.67), SIMDE_FLOAT32_C( 92.44), SIMDE_FLOAT32_C( 803.03), SIMDE_FLOAT32_C( 166.43), SIMDE_FLOAT32_C( 982.21), SIMDE_FLOAT32_C( -780.11), SIMDE_FLOAT32_C( 121.11), SIMDE_FLOAT32_C( 991.62) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -433.27), SIMDE_FLOAT32_C( 892.50), SIMDE_FLOAT32_C( 314.40), SIMDE_FLOAT32_C( -419.48), SIMDE_FLOAT32_C( -384.30), SIMDE_FLOAT32_C( -314.92), SIMDE_FLOAT32_C( 743.09), SIMDE_FLOAT32_C( -477.54)), simde_mm256_set_epi32(INT32_C( 1543878346), INT32_C( -55266127), INT32_C(-1716646352), INT32_C( 1500867969), INT32_C( -538309268), INT32_C( 1738471819), INT32_C( -967093953), INT32_C( -389124917)), { SIMDE_FLOAT32_C( 553.15), SIMDE_FLOAT32_C( -107.56), SIMDE_FLOAT32_C( -195.96), SIMDE_FLOAT32_C( 763.27), SIMDE_FLOAT32_C( -256.55), SIMDE_FLOAT32_C( -826.51), SIMDE_FLOAT32_C( -168.36), SIMDE_FLOAT32_C( -340.90) }, { SIMDE_FLOAT32_C( -477.54), SIMDE_FLOAT32_C( 743.09), SIMDE_FLOAT32_C( -195.96), SIMDE_FLOAT32_C( -384.30), SIMDE_FLOAT32_C( -256.55), SIMDE_FLOAT32_C( 314.40), SIMDE_FLOAT32_C( 892.50), SIMDE_FLOAT32_C( -340.90) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float32 r[8]; simde_memcpy(r, test_vec[i].ri, sizeof(r)); simde_mm256_maskstore_ps(r, test_vec[i].mask, test_vec[i].a); simde_assert_equal_vf32(sizeof(r) / sizeof(r[0]), r, test_vec[i].ro, 1); } return 0; } static int test_simde_mm256_min_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 b; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 620.32), SIMDE_FLOAT32_C( -596.35), SIMDE_FLOAT32_C( 174.72), SIMDE_FLOAT32_C( 165.53), SIMDE_FLOAT32_C( 242.92), SIMDE_FLOAT32_C( 330.00), SIMDE_FLOAT32_C( -436.53), SIMDE_FLOAT32_C( -259.31)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 137.36), SIMDE_FLOAT32_C( -49.88), SIMDE_FLOAT32_C( 846.67), SIMDE_FLOAT32_C( 642.07), SIMDE_FLOAT32_C( 353.31), SIMDE_FLOAT32_C( -696.33), SIMDE_FLOAT32_C( -153.51), SIMDE_FLOAT32_C( -347.51)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 137.36), SIMDE_FLOAT32_C( -596.35), SIMDE_FLOAT32_C( 174.72), SIMDE_FLOAT32_C( 165.53), SIMDE_FLOAT32_C( 242.92), SIMDE_FLOAT32_C( -696.33), SIMDE_FLOAT32_C( -436.53), SIMDE_FLOAT32_C( -347.51)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 938.63), SIMDE_FLOAT32_C( 244.19), SIMDE_FLOAT32_C( 355.24), SIMDE_FLOAT32_C( 261.35), SIMDE_FLOAT32_C( 679.42), SIMDE_FLOAT32_C( -31.35), SIMDE_FLOAT32_C( 138.77), SIMDE_FLOAT32_C( -717.66)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -563.43), SIMDE_FLOAT32_C( 586.11), SIMDE_FLOAT32_C( -131.07), SIMDE_FLOAT32_C( 850.65), SIMDE_FLOAT32_C( 165.14), SIMDE_FLOAT32_C( -413.67), SIMDE_FLOAT32_C( -290.54), SIMDE_FLOAT32_C( 984.51)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -563.43), SIMDE_FLOAT32_C( 244.19), SIMDE_FLOAT32_C( -131.07), SIMDE_FLOAT32_C( 261.35), SIMDE_FLOAT32_C( 165.14), SIMDE_FLOAT32_C( -413.67), SIMDE_FLOAT32_C( -290.54), SIMDE_FLOAT32_C( -717.66)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -838.48), SIMDE_FLOAT32_C( 344.61), SIMDE_FLOAT32_C( -913.53), SIMDE_FLOAT32_C( 858.23), SIMDE_FLOAT32_C( -347.90), SIMDE_FLOAT32_C( -707.87), SIMDE_FLOAT32_C( -634.91), SIMDE_FLOAT32_C( -919.82)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -180.34), SIMDE_FLOAT32_C( -789.22), SIMDE_FLOAT32_C( 607.01), SIMDE_FLOAT32_C( 440.98), SIMDE_FLOAT32_C( 432.59), SIMDE_FLOAT32_C( -196.73), SIMDE_FLOAT32_C( 380.83), SIMDE_FLOAT32_C( 796.17)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -838.48), SIMDE_FLOAT32_C( -789.22), SIMDE_FLOAT32_C( -913.53), SIMDE_FLOAT32_C( 440.98), SIMDE_FLOAT32_C( -347.90), SIMDE_FLOAT32_C( -707.87), SIMDE_FLOAT32_C( -634.91), SIMDE_FLOAT32_C( -919.82)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -210.35), SIMDE_FLOAT32_C( -870.90), SIMDE_FLOAT32_C( 992.08), SIMDE_FLOAT32_C( -822.82), SIMDE_FLOAT32_C( -209.97), SIMDE_FLOAT32_C( -436.22), SIMDE_FLOAT32_C( 481.44), SIMDE_FLOAT32_C( 169.09)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -860.96), SIMDE_FLOAT32_C( 972.39), SIMDE_FLOAT32_C( 830.57), SIMDE_FLOAT32_C( -23.76), SIMDE_FLOAT32_C( 311.19), SIMDE_FLOAT32_C( 554.15), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( -247.41)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -860.96), SIMDE_FLOAT32_C( -870.90), SIMDE_FLOAT32_C( 830.57), SIMDE_FLOAT32_C( -822.82), SIMDE_FLOAT32_C( -209.97), SIMDE_FLOAT32_C( -436.22), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( -247.41)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -158.36), SIMDE_FLOAT32_C( -758.94), SIMDE_FLOAT32_C( 618.04), SIMDE_FLOAT32_C( 976.02), SIMDE_FLOAT32_C( -953.60), SIMDE_FLOAT32_C( 866.14), SIMDE_FLOAT32_C( 565.22), SIMDE_FLOAT32_C( 554.29)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -8.54), SIMDE_FLOAT32_C( -896.75), SIMDE_FLOAT32_C( 630.48), SIMDE_FLOAT32_C( 27.00), SIMDE_FLOAT32_C( 865.05), SIMDE_FLOAT32_C( -640.13), SIMDE_FLOAT32_C( -969.96), SIMDE_FLOAT32_C( -427.62)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -158.36), SIMDE_FLOAT32_C( -896.75), SIMDE_FLOAT32_C( 618.04), SIMDE_FLOAT32_C( 27.00), SIMDE_FLOAT32_C( -953.60), SIMDE_FLOAT32_C( -640.13), SIMDE_FLOAT32_C( -969.96), SIMDE_FLOAT32_C( -427.62)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -601.54), SIMDE_FLOAT32_C( -545.11), SIMDE_FLOAT32_C( -568.66), SIMDE_FLOAT32_C( 393.12), SIMDE_FLOAT32_C( -656.85), SIMDE_FLOAT32_C( 612.23), SIMDE_FLOAT32_C( 417.91), SIMDE_FLOAT32_C( -206.48)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -56.90), SIMDE_FLOAT32_C( -986.18), SIMDE_FLOAT32_C( 948.05), SIMDE_FLOAT32_C( -52.08), SIMDE_FLOAT32_C( -838.46), SIMDE_FLOAT32_C( -751.49), SIMDE_FLOAT32_C( 775.89), SIMDE_FLOAT32_C( 940.13)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -601.54), SIMDE_FLOAT32_C( -986.18), SIMDE_FLOAT32_C( -568.66), SIMDE_FLOAT32_C( -52.08), SIMDE_FLOAT32_C( -838.46), SIMDE_FLOAT32_C( -751.49), SIMDE_FLOAT32_C( 417.91), SIMDE_FLOAT32_C( -206.48)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 459.26), SIMDE_FLOAT32_C( -447.11), SIMDE_FLOAT32_C( 826.76), SIMDE_FLOAT32_C( -107.61), SIMDE_FLOAT32_C( -521.03), SIMDE_FLOAT32_C( -33.76), SIMDE_FLOAT32_C( 315.53), SIMDE_FLOAT32_C( -222.31)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -136.58), SIMDE_FLOAT32_C( 50.72), SIMDE_FLOAT32_C( 921.42), SIMDE_FLOAT32_C( 664.07), SIMDE_FLOAT32_C( 743.00), SIMDE_FLOAT32_C( -236.39), SIMDE_FLOAT32_C( 981.20), SIMDE_FLOAT32_C( 280.47)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -136.58), SIMDE_FLOAT32_C( -447.11), SIMDE_FLOAT32_C( 826.76), SIMDE_FLOAT32_C( -107.61), SIMDE_FLOAT32_C( -521.03), SIMDE_FLOAT32_C( -236.39), SIMDE_FLOAT32_C( 315.53), SIMDE_FLOAT32_C( -222.31)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 356.76), SIMDE_FLOAT32_C( -389.22), SIMDE_FLOAT32_C( -477.96), SIMDE_FLOAT32_C( -3.77), SIMDE_FLOAT32_C( -645.03), SIMDE_FLOAT32_C( -766.89), SIMDE_FLOAT32_C( 755.76), SIMDE_FLOAT32_C( 244.51)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 823.25), SIMDE_FLOAT32_C( 384.14), SIMDE_FLOAT32_C( 475.61), SIMDE_FLOAT32_C( -650.92), SIMDE_FLOAT32_C( -913.35), SIMDE_FLOAT32_C( -290.77), SIMDE_FLOAT32_C( 213.82), SIMDE_FLOAT32_C( -350.01)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 356.76), SIMDE_FLOAT32_C( -389.22), SIMDE_FLOAT32_C( -477.96), SIMDE_FLOAT32_C( -650.92), SIMDE_FLOAT32_C( -913.35), SIMDE_FLOAT32_C( -766.89), SIMDE_FLOAT32_C( 213.82), SIMDE_FLOAT32_C( -350.01)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_min_ps(test_vec[i].a, test_vec[i].b); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_min_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d b; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 207.41), SIMDE_FLOAT64_C( 328.63), SIMDE_FLOAT64_C( -694.69), SIMDE_FLOAT64_C( 687.63)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 89.05), SIMDE_FLOAT64_C( 448.86), SIMDE_FLOAT64_C( 19.12), SIMDE_FLOAT64_C( -158.19)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 89.05), SIMDE_FLOAT64_C( 328.63), SIMDE_FLOAT64_C( -694.69), SIMDE_FLOAT64_C( -158.19)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 809.97), SIMDE_FLOAT64_C( -437.57), SIMDE_FLOAT64_C( -994.98), SIMDE_FLOAT64_C( -594.51)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 949.89), SIMDE_FLOAT64_C( -515.80), SIMDE_FLOAT64_C( -545.90), SIMDE_FLOAT64_C( 794.78)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 809.97), SIMDE_FLOAT64_C( -515.80), SIMDE_FLOAT64_C( -994.98), SIMDE_FLOAT64_C( -594.51)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -509.90), SIMDE_FLOAT64_C( -421.15), SIMDE_FLOAT64_C( -539.85), SIMDE_FLOAT64_C( -245.38)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -30.48), SIMDE_FLOAT64_C( -451.08), SIMDE_FLOAT64_C( -92.82), SIMDE_FLOAT64_C( -896.69)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -509.90), SIMDE_FLOAT64_C( -451.08), SIMDE_FLOAT64_C( -539.85), SIMDE_FLOAT64_C( -896.69)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 362.92), SIMDE_FLOAT64_C( 618.37), SIMDE_FLOAT64_C( -874.81), SIMDE_FLOAT64_C( 119.95)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 479.87), SIMDE_FLOAT64_C( 161.56), SIMDE_FLOAT64_C( 162.67), SIMDE_FLOAT64_C( -967.58)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 362.92), SIMDE_FLOAT64_C( 161.56), SIMDE_FLOAT64_C( -874.81), SIMDE_FLOAT64_C( -967.58)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -124.73), SIMDE_FLOAT64_C( 204.12), SIMDE_FLOAT64_C( -546.39), SIMDE_FLOAT64_C( -78.22)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 120.00), SIMDE_FLOAT64_C( 851.26), SIMDE_FLOAT64_C( 153.83), SIMDE_FLOAT64_C( 393.14)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -124.73), SIMDE_FLOAT64_C( 204.12), SIMDE_FLOAT64_C( -546.39), SIMDE_FLOAT64_C( -78.22)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -812.91), SIMDE_FLOAT64_C( 797.50), SIMDE_FLOAT64_C( 285.31), SIMDE_FLOAT64_C( 340.33)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -48.34), SIMDE_FLOAT64_C( -906.19), SIMDE_FLOAT64_C( 314.61), SIMDE_FLOAT64_C( 602.47)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -812.91), SIMDE_FLOAT64_C( -906.19), SIMDE_FLOAT64_C( 285.31), SIMDE_FLOAT64_C( 340.33)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 376.08), SIMDE_FLOAT64_C( 847.57), SIMDE_FLOAT64_C( 979.10), SIMDE_FLOAT64_C( -602.74)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 307.85), SIMDE_FLOAT64_C( 571.44), SIMDE_FLOAT64_C( -621.81), SIMDE_FLOAT64_C( -12.29)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 307.85), SIMDE_FLOAT64_C( 571.44), SIMDE_FLOAT64_C( -621.81), SIMDE_FLOAT64_C( -602.74)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -682.10), SIMDE_FLOAT64_C( 349.84), SIMDE_FLOAT64_C( -943.34), SIMDE_FLOAT64_C( 341.61)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -89.49), SIMDE_FLOAT64_C( -236.51), SIMDE_FLOAT64_C( 632.28), SIMDE_FLOAT64_C( -535.13)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -682.10), SIMDE_FLOAT64_C( -236.51), SIMDE_FLOAT64_C( -943.34), SIMDE_FLOAT64_C( -535.13)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_min_pd(test_vec[i].a, test_vec[i].b); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_max_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 b; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 449.92), SIMDE_FLOAT32_C( 34.28), SIMDE_FLOAT32_C( -25.78), SIMDE_FLOAT32_C( 210.08), SIMDE_FLOAT32_C( 389.04), SIMDE_FLOAT32_C( -871.84), SIMDE_FLOAT32_C( -259.15), SIMDE_FLOAT32_C( -935.03)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -540.89), SIMDE_FLOAT32_C( 480.33), SIMDE_FLOAT32_C( 35.20), SIMDE_FLOAT32_C( 243.72), SIMDE_FLOAT32_C( 827.03), SIMDE_FLOAT32_C( -35.53), SIMDE_FLOAT32_C( -369.50), SIMDE_FLOAT32_C( -834.21)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 449.92), SIMDE_FLOAT32_C( 480.33), SIMDE_FLOAT32_C( 35.20), SIMDE_FLOAT32_C( 243.72), SIMDE_FLOAT32_C( 827.03), SIMDE_FLOAT32_C( -35.53), SIMDE_FLOAT32_C( -259.15), SIMDE_FLOAT32_C( -834.21)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 489.45), SIMDE_FLOAT32_C( -170.78), SIMDE_FLOAT32_C( 183.48), SIMDE_FLOAT32_C( 307.64), SIMDE_FLOAT32_C( -977.66), SIMDE_FLOAT32_C( 745.18), SIMDE_FLOAT32_C( 561.96), SIMDE_FLOAT32_C( -868.28)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -220.92), SIMDE_FLOAT32_C( 742.39), SIMDE_FLOAT32_C( 682.97), SIMDE_FLOAT32_C( 319.92), SIMDE_FLOAT32_C( 734.45), SIMDE_FLOAT32_C( -490.69), SIMDE_FLOAT32_C( -753.68), SIMDE_FLOAT32_C( -110.63)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 489.45), SIMDE_FLOAT32_C( 742.39), SIMDE_FLOAT32_C( 682.97), SIMDE_FLOAT32_C( 319.92), SIMDE_FLOAT32_C( 734.45), SIMDE_FLOAT32_C( 745.18), SIMDE_FLOAT32_C( 561.96), SIMDE_FLOAT32_C( -110.63)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -934.39), SIMDE_FLOAT32_C( -436.28), SIMDE_FLOAT32_C( 572.10), SIMDE_FLOAT32_C( -111.64), SIMDE_FLOAT32_C( 551.18), SIMDE_FLOAT32_C( 829.61), SIMDE_FLOAT32_C( -107.94), SIMDE_FLOAT32_C( -864.62)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 797.77), SIMDE_FLOAT32_C( 207.18), SIMDE_FLOAT32_C( -690.52), SIMDE_FLOAT32_C( 861.15), SIMDE_FLOAT32_C( 110.08), SIMDE_FLOAT32_C( 67.85), SIMDE_FLOAT32_C( -389.66), SIMDE_FLOAT32_C( 867.20)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 797.77), SIMDE_FLOAT32_C( 207.18), SIMDE_FLOAT32_C( 572.10), SIMDE_FLOAT32_C( 861.15), SIMDE_FLOAT32_C( 551.18), SIMDE_FLOAT32_C( 829.61), SIMDE_FLOAT32_C( -107.94), SIMDE_FLOAT32_C( 867.20)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 481.13), SIMDE_FLOAT32_C( -53.37), SIMDE_FLOAT32_C( -0.04), SIMDE_FLOAT32_C( 614.09), SIMDE_FLOAT32_C( 596.94), SIMDE_FLOAT32_C( 349.18), SIMDE_FLOAT32_C( 53.36), SIMDE_FLOAT32_C( 244.43)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -253.17), SIMDE_FLOAT32_C( -107.20), SIMDE_FLOAT32_C( -893.53), SIMDE_FLOAT32_C( 608.05), SIMDE_FLOAT32_C( 292.67), SIMDE_FLOAT32_C( 529.08), SIMDE_FLOAT32_C( 170.70), SIMDE_FLOAT32_C( -778.36)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 481.13), SIMDE_FLOAT32_C( -53.37), SIMDE_FLOAT32_C( -0.04), SIMDE_FLOAT32_C( 614.09), SIMDE_FLOAT32_C( 596.94), SIMDE_FLOAT32_C( 529.08), SIMDE_FLOAT32_C( 170.70), SIMDE_FLOAT32_C( 244.43)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -125.79), SIMDE_FLOAT32_C( -776.79), SIMDE_FLOAT32_C( -268.99), SIMDE_FLOAT32_C( 991.82), SIMDE_FLOAT32_C( 880.36), SIMDE_FLOAT32_C( -906.06), SIMDE_FLOAT32_C( -550.03), SIMDE_FLOAT32_C( 415.17)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 733.77), SIMDE_FLOAT32_C( -207.30), SIMDE_FLOAT32_C( -944.02), SIMDE_FLOAT32_C( -591.97), SIMDE_FLOAT32_C( -584.21), SIMDE_FLOAT32_C( 271.28), SIMDE_FLOAT32_C( -845.79), SIMDE_FLOAT32_C( -155.27)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 733.77), SIMDE_FLOAT32_C( -207.30), SIMDE_FLOAT32_C( -268.99), SIMDE_FLOAT32_C( 991.82), SIMDE_FLOAT32_C( 880.36), SIMDE_FLOAT32_C( 271.28), SIMDE_FLOAT32_C( -550.03), SIMDE_FLOAT32_C( 415.17)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -136.67), SIMDE_FLOAT32_C( 221.00), SIMDE_FLOAT32_C( -864.98), SIMDE_FLOAT32_C( 290.97), SIMDE_FLOAT32_C( -704.76), SIMDE_FLOAT32_C( -867.43), SIMDE_FLOAT32_C( -323.83), SIMDE_FLOAT32_C( 74.81)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -127.03), SIMDE_FLOAT32_C( 135.62), SIMDE_FLOAT32_C( -308.59), SIMDE_FLOAT32_C( -352.70), SIMDE_FLOAT32_C( 883.68), SIMDE_FLOAT32_C( 134.86), SIMDE_FLOAT32_C( -894.89), SIMDE_FLOAT32_C( -737.05)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -127.03), SIMDE_FLOAT32_C( 221.00), SIMDE_FLOAT32_C( -308.59), SIMDE_FLOAT32_C( 290.97), SIMDE_FLOAT32_C( 883.68), SIMDE_FLOAT32_C( 134.86), SIMDE_FLOAT32_C( -323.83), SIMDE_FLOAT32_C( 74.81)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 780.81), SIMDE_FLOAT32_C( -76.69), SIMDE_FLOAT32_C( -213.47), SIMDE_FLOAT32_C( 296.05), SIMDE_FLOAT32_C( 129.81), SIMDE_FLOAT32_C( 95.07), SIMDE_FLOAT32_C( -493.97), SIMDE_FLOAT32_C( -309.39)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 224.91), SIMDE_FLOAT32_C( 545.10), SIMDE_FLOAT32_C( -928.25), SIMDE_FLOAT32_C( 373.14), SIMDE_FLOAT32_C( -710.01), SIMDE_FLOAT32_C( 166.25), SIMDE_FLOAT32_C( 729.82), SIMDE_FLOAT32_C( 996.22)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 780.81), SIMDE_FLOAT32_C( 545.10), SIMDE_FLOAT32_C( -213.47), SIMDE_FLOAT32_C( 373.14), SIMDE_FLOAT32_C( 129.81), SIMDE_FLOAT32_C( 166.25), SIMDE_FLOAT32_C( 729.82), SIMDE_FLOAT32_C( 996.22)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -153.19), SIMDE_FLOAT32_C( -809.42), SIMDE_FLOAT32_C( 665.06), SIMDE_FLOAT32_C( 966.51), SIMDE_FLOAT32_C( 515.00), SIMDE_FLOAT32_C( 239.45), SIMDE_FLOAT32_C( 878.48), SIMDE_FLOAT32_C( 83.52)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -855.75), SIMDE_FLOAT32_C( -288.00), SIMDE_FLOAT32_C( 118.38), SIMDE_FLOAT32_C( 373.81), SIMDE_FLOAT32_C( -507.54), SIMDE_FLOAT32_C( -677.13), SIMDE_FLOAT32_C( -680.61), SIMDE_FLOAT32_C( 934.18)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -153.19), SIMDE_FLOAT32_C( -288.00), SIMDE_FLOAT32_C( 665.06), SIMDE_FLOAT32_C( 966.51), SIMDE_FLOAT32_C( 515.00), SIMDE_FLOAT32_C( 239.45), SIMDE_FLOAT32_C( 878.48), SIMDE_FLOAT32_C( 934.18)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_max_ps(test_vec[i].a, test_vec[i].b); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_max_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d b; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -66.36), SIMDE_FLOAT64_C( -982.48), SIMDE_FLOAT64_C( -994.10), SIMDE_FLOAT64_C( 656.44)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 58.12), SIMDE_FLOAT64_C( 730.28), SIMDE_FLOAT64_C( 705.46), SIMDE_FLOAT64_C( 138.28)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 58.12), SIMDE_FLOAT64_C( 730.28), SIMDE_FLOAT64_C( 705.46), SIMDE_FLOAT64_C( 656.44)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 965.09), SIMDE_FLOAT64_C( -956.81), SIMDE_FLOAT64_C( -84.17), SIMDE_FLOAT64_C( -0.28)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -943.91), SIMDE_FLOAT64_C( -849.45), SIMDE_FLOAT64_C( 747.06), SIMDE_FLOAT64_C( 297.16)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 965.09), SIMDE_FLOAT64_C( -849.45), SIMDE_FLOAT64_C( 747.06), SIMDE_FLOAT64_C( 297.16)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -285.40), SIMDE_FLOAT64_C( 20.32), SIMDE_FLOAT64_C( -517.57), SIMDE_FLOAT64_C( 747.40)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -634.35), SIMDE_FLOAT64_C( -655.50), SIMDE_FLOAT64_C( 321.54), SIMDE_FLOAT64_C( -151.46)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -285.40), SIMDE_FLOAT64_C( 20.32), SIMDE_FLOAT64_C( 321.54), SIMDE_FLOAT64_C( 747.40)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 766.49), SIMDE_FLOAT64_C( -821.02), SIMDE_FLOAT64_C( 800.18), SIMDE_FLOAT64_C( -78.19)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -744.41), SIMDE_FLOAT64_C( 250.49), SIMDE_FLOAT64_C( 16.63), SIMDE_FLOAT64_C( 223.92)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 766.49), SIMDE_FLOAT64_C( 250.49), SIMDE_FLOAT64_C( 800.18), SIMDE_FLOAT64_C( 223.92)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 297.61), SIMDE_FLOAT64_C( -1.23), SIMDE_FLOAT64_C( -644.62), SIMDE_FLOAT64_C( -37.89)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -152.08), SIMDE_FLOAT64_C( 667.58), SIMDE_FLOAT64_C( 737.38), SIMDE_FLOAT64_C( 221.17)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 297.61), SIMDE_FLOAT64_C( 667.58), SIMDE_FLOAT64_C( 737.38), SIMDE_FLOAT64_C( 221.17)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 804.47), SIMDE_FLOAT64_C( -363.06), SIMDE_FLOAT64_C( -130.47), SIMDE_FLOAT64_C( -257.67)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -448.43), SIMDE_FLOAT64_C( 52.56), SIMDE_FLOAT64_C( -688.52), SIMDE_FLOAT64_C( 690.05)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 804.47), SIMDE_FLOAT64_C( 52.56), SIMDE_FLOAT64_C( -130.47), SIMDE_FLOAT64_C( 690.05)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 20.48), SIMDE_FLOAT64_C( 404.58), SIMDE_FLOAT64_C( -885.87), SIMDE_FLOAT64_C( 607.60)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -402.92), SIMDE_FLOAT64_C( 777.55), SIMDE_FLOAT64_C( -392.90), SIMDE_FLOAT64_C( 385.67)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 20.48), SIMDE_FLOAT64_C( 777.55), SIMDE_FLOAT64_C( -392.90), SIMDE_FLOAT64_C( 607.60)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -612.46), SIMDE_FLOAT64_C( -353.94), SIMDE_FLOAT64_C( -723.01), SIMDE_FLOAT64_C( -705.60)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -638.30), SIMDE_FLOAT64_C( 67.95), SIMDE_FLOAT64_C( 537.38), SIMDE_FLOAT64_C( -280.17)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -612.46), SIMDE_FLOAT64_C( 67.95), SIMDE_FLOAT64_C( 537.38), SIMDE_FLOAT64_C( -280.17)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_max_pd(test_vec[i].a, test_vec[i].b); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_movedup_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -848.53), SIMDE_FLOAT64_C( -411.84), SIMDE_FLOAT64_C( -162.95), SIMDE_FLOAT64_C( 899.65)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -411.84), SIMDE_FLOAT64_C( -411.84), SIMDE_FLOAT64_C( 899.65), SIMDE_FLOAT64_C( 899.65)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -411.40), SIMDE_FLOAT64_C( -713.22), SIMDE_FLOAT64_C( -868.77), SIMDE_FLOAT64_C( -109.40)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -713.22), SIMDE_FLOAT64_C( -713.22), SIMDE_FLOAT64_C( -109.40), SIMDE_FLOAT64_C( -109.40)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -430.65), SIMDE_FLOAT64_C( 350.42), SIMDE_FLOAT64_C( -891.94), SIMDE_FLOAT64_C( -973.52)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 350.42), SIMDE_FLOAT64_C( 350.42), SIMDE_FLOAT64_C( -973.52), SIMDE_FLOAT64_C( -973.52)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -988.20), SIMDE_FLOAT64_C( -840.45), SIMDE_FLOAT64_C( 979.48), SIMDE_FLOAT64_C( -644.54)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -840.45), SIMDE_FLOAT64_C( -840.45), SIMDE_FLOAT64_C( -644.54), SIMDE_FLOAT64_C( -644.54)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 459.82), SIMDE_FLOAT64_C( 645.18), SIMDE_FLOAT64_C( -152.73), SIMDE_FLOAT64_C( 150.29)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 645.18), SIMDE_FLOAT64_C( 645.18), SIMDE_FLOAT64_C( 150.29), SIMDE_FLOAT64_C( 150.29)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -902.52), SIMDE_FLOAT64_C( -5.45), SIMDE_FLOAT64_C( -958.89), SIMDE_FLOAT64_C( -924.74)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -5.45), SIMDE_FLOAT64_C( -5.45), SIMDE_FLOAT64_C( -924.74), SIMDE_FLOAT64_C( -924.74)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -582.37), SIMDE_FLOAT64_C( 163.98), SIMDE_FLOAT64_C( -479.86), SIMDE_FLOAT64_C( 420.17)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 163.98), SIMDE_FLOAT64_C( 163.98), SIMDE_FLOAT64_C( 420.17), SIMDE_FLOAT64_C( 420.17)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -319.68), SIMDE_FLOAT64_C( 791.82), SIMDE_FLOAT64_C( 388.17), SIMDE_FLOAT64_C( 537.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 791.82), SIMDE_FLOAT64_C( 791.82), SIMDE_FLOAT64_C( 537.00), SIMDE_FLOAT64_C( 537.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_movedup_pd(test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_movehdup_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 948.05), SIMDE_FLOAT32_C( -208.59), SIMDE_FLOAT32_C( -422.71), SIMDE_FLOAT32_C( -254.03), SIMDE_FLOAT32_C( 4.80), SIMDE_FLOAT32_C( -671.71), SIMDE_FLOAT32_C( 685.42), SIMDE_FLOAT32_C( -954.51)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 948.05), SIMDE_FLOAT32_C( 948.05), SIMDE_FLOAT32_C( -422.71), SIMDE_FLOAT32_C( -422.71), SIMDE_FLOAT32_C( 4.80), SIMDE_FLOAT32_C( 4.80), SIMDE_FLOAT32_C( 685.42), SIMDE_FLOAT32_C( 685.42)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 569.22), SIMDE_FLOAT32_C( 232.90), SIMDE_FLOAT32_C( 835.45), SIMDE_FLOAT32_C( 10.13), SIMDE_FLOAT32_C( 30.06), SIMDE_FLOAT32_C( 598.20), SIMDE_FLOAT32_C( 376.16), SIMDE_FLOAT32_C( -918.63)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 569.22), SIMDE_FLOAT32_C( 569.22), SIMDE_FLOAT32_C( 835.45), SIMDE_FLOAT32_C( 835.45), SIMDE_FLOAT32_C( 30.06), SIMDE_FLOAT32_C( 30.06), SIMDE_FLOAT32_C( 376.16), SIMDE_FLOAT32_C( 376.16)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 97.81), SIMDE_FLOAT32_C( 286.59), SIMDE_FLOAT32_C( -299.41), SIMDE_FLOAT32_C( 944.38), SIMDE_FLOAT32_C( -367.92), SIMDE_FLOAT32_C( 626.43), SIMDE_FLOAT32_C( 889.36), SIMDE_FLOAT32_C( 776.89)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 97.81), SIMDE_FLOAT32_C( 97.81), SIMDE_FLOAT32_C( -299.41), SIMDE_FLOAT32_C( -299.41), SIMDE_FLOAT32_C( -367.92), SIMDE_FLOAT32_C( -367.92), SIMDE_FLOAT32_C( 889.36), SIMDE_FLOAT32_C( 889.36)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -67.83), SIMDE_FLOAT32_C( -435.86), SIMDE_FLOAT32_C( -637.27), SIMDE_FLOAT32_C( -95.13), SIMDE_FLOAT32_C( 698.01), SIMDE_FLOAT32_C( 55.48), SIMDE_FLOAT32_C( 1.28), SIMDE_FLOAT32_C( -243.13)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -67.83), SIMDE_FLOAT32_C( -67.83), SIMDE_FLOAT32_C( -637.27), SIMDE_FLOAT32_C( -637.27), SIMDE_FLOAT32_C( 698.01), SIMDE_FLOAT32_C( 698.01), SIMDE_FLOAT32_C( 1.28), SIMDE_FLOAT32_C( 1.28)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -266.47), SIMDE_FLOAT32_C( 784.83), SIMDE_FLOAT32_C( -470.70), SIMDE_FLOAT32_C( 865.28), SIMDE_FLOAT32_C( 393.60), SIMDE_FLOAT32_C( -743.40), SIMDE_FLOAT32_C( 858.48), SIMDE_FLOAT32_C( 507.86)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -266.47), SIMDE_FLOAT32_C( -266.47), SIMDE_FLOAT32_C( -470.70), SIMDE_FLOAT32_C( -470.70), SIMDE_FLOAT32_C( 393.60), SIMDE_FLOAT32_C( 393.60), SIMDE_FLOAT32_C( 858.48), SIMDE_FLOAT32_C( 858.48)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 270.84), SIMDE_FLOAT32_C( 46.20), SIMDE_FLOAT32_C( -185.53), SIMDE_FLOAT32_C( 211.61), SIMDE_FLOAT32_C( -20.87), SIMDE_FLOAT32_C( -661.06), SIMDE_FLOAT32_C( 209.04), SIMDE_FLOAT32_C( 920.86)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 270.84), SIMDE_FLOAT32_C( 270.84), SIMDE_FLOAT32_C( -185.53), SIMDE_FLOAT32_C( -185.53), SIMDE_FLOAT32_C( -20.87), SIMDE_FLOAT32_C( -20.87), SIMDE_FLOAT32_C( 209.04), SIMDE_FLOAT32_C( 209.04)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -751.76), SIMDE_FLOAT32_C( -655.85), SIMDE_FLOAT32_C( 179.21), SIMDE_FLOAT32_C( 373.36), SIMDE_FLOAT32_C( 1.62), SIMDE_FLOAT32_C( -981.05), SIMDE_FLOAT32_C( 802.72), SIMDE_FLOAT32_C( -49.40)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -751.76), SIMDE_FLOAT32_C( -751.76), SIMDE_FLOAT32_C( 179.21), SIMDE_FLOAT32_C( 179.21), SIMDE_FLOAT32_C( 1.62), SIMDE_FLOAT32_C( 1.62), SIMDE_FLOAT32_C( 802.72), SIMDE_FLOAT32_C( 802.72)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 44.57), SIMDE_FLOAT32_C( -391.28), SIMDE_FLOAT32_C( 614.40), SIMDE_FLOAT32_C( -901.50), SIMDE_FLOAT32_C( 577.45), SIMDE_FLOAT32_C( -465.25), SIMDE_FLOAT32_C( -148.70), SIMDE_FLOAT32_C( -714.68)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 44.57), SIMDE_FLOAT32_C( 44.57), SIMDE_FLOAT32_C( 614.40), SIMDE_FLOAT32_C( 614.40), SIMDE_FLOAT32_C( 577.45), SIMDE_FLOAT32_C( 577.45), SIMDE_FLOAT32_C( -148.70), SIMDE_FLOAT32_C( -148.70)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_movehdup_ps(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_moveldup_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 366.49), SIMDE_FLOAT32_C( -15.43), SIMDE_FLOAT32_C( -732.71), SIMDE_FLOAT32_C( 312.44), SIMDE_FLOAT32_C( -535.64), SIMDE_FLOAT32_C( -24.14), SIMDE_FLOAT32_C( -881.62), SIMDE_FLOAT32_C( 419.86)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -15.43), SIMDE_FLOAT32_C( -15.43), SIMDE_FLOAT32_C( 312.44), SIMDE_FLOAT32_C( 312.44), SIMDE_FLOAT32_C( -24.14), SIMDE_FLOAT32_C( -24.14), SIMDE_FLOAT32_C( 419.86), SIMDE_FLOAT32_C( 419.86)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -625.40), SIMDE_FLOAT32_C( -523.97), SIMDE_FLOAT32_C( 296.91), SIMDE_FLOAT32_C( 228.72), SIMDE_FLOAT32_C( 553.44), SIMDE_FLOAT32_C( -88.10), SIMDE_FLOAT32_C( -240.30), SIMDE_FLOAT32_C( 437.29)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -523.97), SIMDE_FLOAT32_C( -523.97), SIMDE_FLOAT32_C( 228.72), SIMDE_FLOAT32_C( 228.72), SIMDE_FLOAT32_C( -88.10), SIMDE_FLOAT32_C( -88.10), SIMDE_FLOAT32_C( 437.29), SIMDE_FLOAT32_C( 437.29)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -54.17), SIMDE_FLOAT32_C( -444.25), SIMDE_FLOAT32_C( -384.50), SIMDE_FLOAT32_C( 781.57), SIMDE_FLOAT32_C( 607.05), SIMDE_FLOAT32_C( -295.21), SIMDE_FLOAT32_C( 101.75), SIMDE_FLOAT32_C( -941.55)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -444.25), SIMDE_FLOAT32_C( -444.25), SIMDE_FLOAT32_C( 781.57), SIMDE_FLOAT32_C( 781.57), SIMDE_FLOAT32_C( -295.21), SIMDE_FLOAT32_C( -295.21), SIMDE_FLOAT32_C( -941.55), SIMDE_FLOAT32_C( -941.55)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -41.58), SIMDE_FLOAT32_C( -957.94), SIMDE_FLOAT32_C( 871.50), SIMDE_FLOAT32_C( -839.89), SIMDE_FLOAT32_C( 692.80), SIMDE_FLOAT32_C( -417.15), SIMDE_FLOAT32_C( -850.22), SIMDE_FLOAT32_C( 594.16)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -957.94), SIMDE_FLOAT32_C( -957.94), SIMDE_FLOAT32_C( -839.89), SIMDE_FLOAT32_C( -839.89), SIMDE_FLOAT32_C( -417.15), SIMDE_FLOAT32_C( -417.15), SIMDE_FLOAT32_C( 594.16), SIMDE_FLOAT32_C( 594.16)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -319.28), SIMDE_FLOAT32_C( -229.51), SIMDE_FLOAT32_C( -581.14), SIMDE_FLOAT32_C( 81.57), SIMDE_FLOAT32_C( 774.33), SIMDE_FLOAT32_C( -621.69), SIMDE_FLOAT32_C( -447.13), SIMDE_FLOAT32_C( 334.88)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -229.51), SIMDE_FLOAT32_C( -229.51), SIMDE_FLOAT32_C( 81.57), SIMDE_FLOAT32_C( 81.57), SIMDE_FLOAT32_C( -621.69), SIMDE_FLOAT32_C( -621.69), SIMDE_FLOAT32_C( 334.88), SIMDE_FLOAT32_C( 334.88)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 288.60), SIMDE_FLOAT32_C( 139.33), SIMDE_FLOAT32_C( 951.18), SIMDE_FLOAT32_C( -924.84), SIMDE_FLOAT32_C( -320.35), SIMDE_FLOAT32_C( -998.89), SIMDE_FLOAT32_C( 164.55), SIMDE_FLOAT32_C( 991.73)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 139.33), SIMDE_FLOAT32_C( 139.33), SIMDE_FLOAT32_C( -924.84), SIMDE_FLOAT32_C( -924.84), SIMDE_FLOAT32_C( -998.89), SIMDE_FLOAT32_C( -998.89), SIMDE_FLOAT32_C( 991.73), SIMDE_FLOAT32_C( 991.73)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -671.27), SIMDE_FLOAT32_C( 35.52), SIMDE_FLOAT32_C( 780.53), SIMDE_FLOAT32_C( -58.65), SIMDE_FLOAT32_C( 227.27), SIMDE_FLOAT32_C( -621.99), SIMDE_FLOAT32_C( -182.62), SIMDE_FLOAT32_C( 448.36)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 35.52), SIMDE_FLOAT32_C( 35.52), SIMDE_FLOAT32_C( -58.65), SIMDE_FLOAT32_C( -58.65), SIMDE_FLOAT32_C( -621.99), SIMDE_FLOAT32_C( -621.99), SIMDE_FLOAT32_C( 448.36), SIMDE_FLOAT32_C( 448.36)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -984.41), SIMDE_FLOAT32_C( -968.89), SIMDE_FLOAT32_C( -986.96), SIMDE_FLOAT32_C( -760.49), SIMDE_FLOAT32_C( 908.37), SIMDE_FLOAT32_C( 35.57), SIMDE_FLOAT32_C( -144.09), SIMDE_FLOAT32_C( -735.38)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -968.89), SIMDE_FLOAT32_C( -968.89), SIMDE_FLOAT32_C( -760.49), SIMDE_FLOAT32_C( -760.49), SIMDE_FLOAT32_C( 35.57), SIMDE_FLOAT32_C( 35.57), SIMDE_FLOAT32_C( -735.38), SIMDE_FLOAT32_C( -735.38)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_moveldup_ps(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_movemask_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; int r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C(-1882468747), INT32_C( 687119108), INT32_C( 990615051), INT32_C(-1253009356), INT32_C( -617641993), INT32_C(-1788847115), INT32_C( 1286496634), INT32_C( -717001088)), 157 }, { simde_mm256_set_epi32(INT32_C(-1989003409), INT32_C(-1421574364), INT32_C( -382909714), INT32_C( 1867671217), INT32_C( -487294761), INT32_C( -454003817), INT32_C( -33445130), INT32_C( -757457478)), 239 }, { simde_mm256_set_epi32(INT32_C( 75750543), INT32_C( -792015200), INT32_C( -456366441), INT32_C( 1395825015), INT32_C( 1090607410), INT32_C( -644107645), INT32_C( 2024077315), INT32_C( 1442543377)), 100 }, { simde_mm256_set_epi32(INT32_C( 64842844), INT32_C(-1499432736), INT32_C( -849672143), INT32_C( 808386603), INT32_C( 1431766696), INT32_C( 778893676), INT32_C( -4359592), INT32_C( -465691700)), 99 }, { simde_mm256_set_epi32(INT32_C(-1142593030), INT32_C( 1697913004), INT32_C(-1241903623), INT32_C( 1420498198), INT32_C( 1725730120), INT32_C( 1757719770), INT32_C( 1834101516), INT32_C(-1076410946)), 161 }, { simde_mm256_set_epi32(INT32_C( -247458634), INT32_C( 497266182), INT32_C( 668953611), INT32_C( -198408792), INT32_C( 443042962), INT32_C( 174399567), INT32_C( 1910223665), INT32_C( 1408104689)), 144 }, { simde_mm256_set_epi32(INT32_C( 1266303831), INT32_C(-1981624404), INT32_C( 1894718767), INT32_C( 1471458198), INT32_C(-1698669031), INT32_C(-1514661026), INT32_C(-1880466849), INT32_C(-1226909311)), 79 }, { simde_mm256_set_epi32(INT32_C( 1314135938), INT32_C( 1476193225), INT32_C( 1478701126), INT32_C( 1097128360), INT32_C( -681267332), INT32_C( -854863432), INT32_C(-1814679036), INT32_C( 2077413591)), 14 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm256_movemask_ps(simde_mm256_castsi256_ps(test_vec[i].a)); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm256_movemask_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; int r; } test_vec[8] = { { simde_mm256_set_epi64x(INT64_C(-3476114617639449125), INT64_C( 4174348817044283167), INT64_C( 2372823762134739460), INT64_C( 2922754125044459603)), 8 }, { simde_mm256_set_epi64x(INT64_C( 2977848152908728188), INT64_C(-9001314161736319662), INT64_C( 316358866512427816), INT64_C( 122734419977663898)), 4 }, { simde_mm256_set_epi64x(INT64_C( 1458362257601867464), INT64_C(-2204086314119824728), INT64_C( 4226262178485377739), INT64_C( 3412235452127467527)), 4 }, { simde_mm256_set_epi64x(INT64_C(-3206742534496437425), INT64_C( 4783689227782243759), INT64_C(-2969411634419391796), INT64_C( 6490543198836487087)), 10 }, { simde_mm256_set_epi64x(INT64_C( 5617129527752259343), INT64_C( 8836429733868806831), INT64_C(-3648376369890579220), INT64_C( 2974368927295586543)), 2 }, { simde_mm256_set_epi64x(INT64_C(-7634385432411504297), INT64_C( 7257505416389479780), INT64_C( 7838438756599446984), INT64_C( 8814240438147347165)), 8 }, { simde_mm256_set_epi64x(INT64_C( 9071444056306285548), INT64_C( 8310979399473657009), INT64_C(-1777863912628705993), INT64_C( 4472167403670196676)), 2 }, { simde_mm256_set_epi64x(INT64_C(-6555517372143397251), INT64_C( 2575855241209659630), INT64_C(-8431720993701553511), INT64_C( -874276281105343662)), 11 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm256_movemask_pd(simde_mm256_castsi256_pd(test_vec[i].a)); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm256_mul_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 b; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( -65.11), SIMDE_FLOAT32_C( 729.63), SIMDE_FLOAT32_C( 579.86), SIMDE_FLOAT32_C( 759.34), SIMDE_FLOAT32_C( 638.63), SIMDE_FLOAT32_C( 366.71), SIMDE_FLOAT32_C( -251.89), SIMDE_FLOAT32_C( -327.70)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -853.75), SIMDE_FLOAT32_C( 306.50), SIMDE_FLOAT32_C( -694.30), SIMDE_FLOAT32_C( 62.03), SIMDE_FLOAT32_C( -332.43), SIMDE_FLOAT32_C( -617.58), SIMDE_FLOAT32_C( -841.05), SIMDE_FLOAT32_C( -990.91)), simde_mm256_set_ps(SIMDE_FLOAT32_C(55587.66), SIMDE_FLOAT32_C(223631.59), SIMDE_FLOAT32_C(-402596.78), SIMDE_FLOAT32_C(47101.86), SIMDE_FLOAT32_C(-212299.77), SIMDE_FLOAT32_C(-226472.77), SIMDE_FLOAT32_C(211852.08), SIMDE_FLOAT32_C(324721.22)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -540.24), SIMDE_FLOAT32_C( 599.92), SIMDE_FLOAT32_C( 946.63), SIMDE_FLOAT32_C( 269.39), SIMDE_FLOAT32_C( 990.71), SIMDE_FLOAT32_C( 736.78), SIMDE_FLOAT32_C( 735.17), SIMDE_FLOAT32_C( -839.81)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -471.86), SIMDE_FLOAT32_C( 231.47), SIMDE_FLOAT32_C( 368.19), SIMDE_FLOAT32_C( -199.69), SIMDE_FLOAT32_C( -781.21), SIMDE_FLOAT32_C( -576.81), SIMDE_FLOAT32_C( -351.44), SIMDE_FLOAT32_C( 650.57)), simde_mm256_set_ps(SIMDE_FLOAT32_C(254917.64), SIMDE_FLOAT32_C(138863.48), SIMDE_FLOAT32_C(348539.72), SIMDE_FLOAT32_C(-53794.49), SIMDE_FLOAT32_C(-773952.62), SIMDE_FLOAT32_C(-424982.09), SIMDE_FLOAT32_C(-258368.14), SIMDE_FLOAT32_C(-546355.19)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 174.33), SIMDE_FLOAT32_C( -261.00), SIMDE_FLOAT32_C( 947.71), SIMDE_FLOAT32_C( -39.38), SIMDE_FLOAT32_C( -142.31), SIMDE_FLOAT32_C( -753.91), SIMDE_FLOAT32_C( -304.55), SIMDE_FLOAT32_C( 197.06)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -894.79), SIMDE_FLOAT32_C( 298.89), SIMDE_FLOAT32_C( 413.08), SIMDE_FLOAT32_C( 45.44), SIMDE_FLOAT32_C( -362.24), SIMDE_FLOAT32_C( 247.39), SIMDE_FLOAT32_C( -836.15), SIMDE_FLOAT32_C( 250.52)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-155988.73), SIMDE_FLOAT32_C(-78010.30), SIMDE_FLOAT32_C(391480.03), SIMDE_FLOAT32_C(-1789.43), SIMDE_FLOAT32_C(51550.37), SIMDE_FLOAT32_C(-186509.78), SIMDE_FLOAT32_C(254649.48), SIMDE_FLOAT32_C(49367.47)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -108.81), SIMDE_FLOAT32_C( -144.46), SIMDE_FLOAT32_C( -926.46), SIMDE_FLOAT32_C( -87.02), SIMDE_FLOAT32_C( -701.58), SIMDE_FLOAT32_C( 412.02), SIMDE_FLOAT32_C( -404.71), SIMDE_FLOAT32_C( -140.12)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -502.46), SIMDE_FLOAT32_C( -741.60), SIMDE_FLOAT32_C( 261.69), SIMDE_FLOAT32_C( -236.88), SIMDE_FLOAT32_C( 805.45), SIMDE_FLOAT32_C( 663.49), SIMDE_FLOAT32_C( 804.65), SIMDE_FLOAT32_C( -231.08)), simde_mm256_set_ps(SIMDE_FLOAT32_C(54672.67), SIMDE_FLOAT32_C(107131.54), SIMDE_FLOAT32_C(-242445.33), SIMDE_FLOAT32_C(20613.30), SIMDE_FLOAT32_C(-565087.62), SIMDE_FLOAT32_C(273371.12), SIMDE_FLOAT32_C(-325649.91), SIMDE_FLOAT32_C(32378.93)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 274.12), SIMDE_FLOAT32_C( 417.28), SIMDE_FLOAT32_C( 626.47), SIMDE_FLOAT32_C( 541.42), SIMDE_FLOAT32_C( -351.86), SIMDE_FLOAT32_C( 144.88), SIMDE_FLOAT32_C( -692.65), SIMDE_FLOAT32_C( 994.52)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -588.84), SIMDE_FLOAT32_C( 440.91), SIMDE_FLOAT32_C( -668.35), SIMDE_FLOAT32_C( 950.11), SIMDE_FLOAT32_C( 38.08), SIMDE_FLOAT32_C( -877.72), SIMDE_FLOAT32_C( 389.76), SIMDE_FLOAT32_C( 702.21)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-161412.83), SIMDE_FLOAT32_C(183982.92), SIMDE_FLOAT32_C(-418701.19), SIMDE_FLOAT32_C(514408.53), SIMDE_FLOAT32_C(-13398.83), SIMDE_FLOAT32_C(-127164.07), SIMDE_FLOAT32_C(-269967.28), SIMDE_FLOAT32_C(698361.94)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -72.46), SIMDE_FLOAT32_C( -957.98), SIMDE_FLOAT32_C( 872.62), SIMDE_FLOAT32_C( -600.00), SIMDE_FLOAT32_C( 937.99), SIMDE_FLOAT32_C( -698.26), SIMDE_FLOAT32_C( 895.96), SIMDE_FLOAT32_C( -799.53)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -864.38), SIMDE_FLOAT32_C( -416.69), SIMDE_FLOAT32_C( 894.78), SIMDE_FLOAT32_C( 968.43), SIMDE_FLOAT32_C( 609.48), SIMDE_FLOAT32_C( 317.20), SIMDE_FLOAT32_C( 767.37), SIMDE_FLOAT32_C( -51.68)), simde_mm256_set_ps(SIMDE_FLOAT32_C(62632.97), SIMDE_FLOAT32_C(399180.69), SIMDE_FLOAT32_C(780802.94), SIMDE_FLOAT32_C(-581058.00), SIMDE_FLOAT32_C(571686.12), SIMDE_FLOAT32_C(-221488.08), SIMDE_FLOAT32_C(687532.81), SIMDE_FLOAT32_C(41319.71)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -131.07), SIMDE_FLOAT32_C( -294.47), SIMDE_FLOAT32_C( -916.92), SIMDE_FLOAT32_C( 146.62), SIMDE_FLOAT32_C( 597.05), SIMDE_FLOAT32_C( 75.62), SIMDE_FLOAT32_C( 636.06), SIMDE_FLOAT32_C( 363.23)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 427.08), SIMDE_FLOAT32_C( -105.65), SIMDE_FLOAT32_C( 98.05), SIMDE_FLOAT32_C( 398.71), SIMDE_FLOAT32_C( -883.53), SIMDE_FLOAT32_C( -434.31), SIMDE_FLOAT32_C( 638.77), SIMDE_FLOAT32_C( -453.78)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-55977.38), SIMDE_FLOAT32_C(31110.76), SIMDE_FLOAT32_C(-89904.01), SIMDE_FLOAT32_C(58458.86), SIMDE_FLOAT32_C(-527511.56), SIMDE_FLOAT32_C(-32842.52), SIMDE_FLOAT32_C(406296.06), SIMDE_FLOAT32_C(-164826.52)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 401.83), SIMDE_FLOAT32_C( -101.21), SIMDE_FLOAT32_C( -199.00), SIMDE_FLOAT32_C( 823.62), SIMDE_FLOAT32_C( -930.84), SIMDE_FLOAT32_C( -154.42), SIMDE_FLOAT32_C( 958.59), SIMDE_FLOAT32_C( 853.69)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -449.55), SIMDE_FLOAT32_C( 354.78), SIMDE_FLOAT32_C( -131.96), SIMDE_FLOAT32_C( -519.93), SIMDE_FLOAT32_C( 699.65), SIMDE_FLOAT32_C( 781.77), SIMDE_FLOAT32_C( -157.96), SIMDE_FLOAT32_C( -793.47)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-180642.67), SIMDE_FLOAT32_C(-35907.29), SIMDE_FLOAT32_C(26260.04), SIMDE_FLOAT32_C(-428224.75), SIMDE_FLOAT32_C(-651262.25), SIMDE_FLOAT32_C(-120720.92), SIMDE_FLOAT32_C(-151418.89), SIMDE_FLOAT32_C(-677377.38)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_mul_ps(test_vec[i].a, test_vec[i].b); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_mul_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d b; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -216.07), SIMDE_FLOAT64_C( -759.70), SIMDE_FLOAT64_C( -257.81), SIMDE_FLOAT64_C( 916.82)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -279.71), SIMDE_FLOAT64_C( 654.36), SIMDE_FLOAT64_C( -699.07), SIMDE_FLOAT64_C( 772.15)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 60436.94), SIMDE_FLOAT64_C(-497117.29), SIMDE_FLOAT64_C( 180227.24), SIMDE_FLOAT64_C( 707922.56)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -576.02), SIMDE_FLOAT64_C( -915.91), SIMDE_FLOAT64_C( 616.42), SIMDE_FLOAT64_C( 692.53)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 285.52), SIMDE_FLOAT64_C( -538.61), SIMDE_FLOAT64_C( 604.61), SIMDE_FLOAT64_C( -220.38)), simde_mm256_set_pd(SIMDE_FLOAT64_C(-164465.23), SIMDE_FLOAT64_C( 493318.29), SIMDE_FLOAT64_C( 372693.70), SIMDE_FLOAT64_C(-152619.76)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -378.41), SIMDE_FLOAT64_C( -782.44), SIMDE_FLOAT64_C( -858.38), SIMDE_FLOAT64_C( -932.65)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 782.91), SIMDE_FLOAT64_C( 352.73), SIMDE_FLOAT64_C( 705.00), SIMDE_FLOAT64_C( -78.46)), simde_mm256_set_pd(SIMDE_FLOAT64_C(-296260.97), SIMDE_FLOAT64_C(-275990.06), SIMDE_FLOAT64_C(-605157.90), SIMDE_FLOAT64_C( 73175.72)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -936.48), SIMDE_FLOAT64_C( 274.43), SIMDE_FLOAT64_C( 341.69), SIMDE_FLOAT64_C( 588.43)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -915.35), SIMDE_FLOAT64_C( 625.98), SIMDE_FLOAT64_C( -66.28), SIMDE_FLOAT64_C( -474.34)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 857206.97), SIMDE_FLOAT64_C( 171787.69), SIMDE_FLOAT64_C( -22647.21), SIMDE_FLOAT64_C(-279115.89)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 371.75), SIMDE_FLOAT64_C( -392.05), SIMDE_FLOAT64_C( -730.60), SIMDE_FLOAT64_C( 399.39)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -409.43), SIMDE_FLOAT64_C( 920.37), SIMDE_FLOAT64_C( -56.28), SIMDE_FLOAT64_C( -779.31)), simde_mm256_set_pd(SIMDE_FLOAT64_C(-152205.60), SIMDE_FLOAT64_C(-360831.06), SIMDE_FLOAT64_C( 41118.17), SIMDE_FLOAT64_C(-311248.62)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.10), SIMDE_FLOAT64_C( 955.19), SIMDE_FLOAT64_C( 162.40), SIMDE_FLOAT64_C( -236.01)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -741.60), SIMDE_FLOAT64_C( 194.99), SIMDE_FLOAT64_C( 845.16), SIMDE_FLOAT64_C( 363.39)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 74.16), SIMDE_FLOAT64_C( 186252.50), SIMDE_FLOAT64_C( 137253.98), SIMDE_FLOAT64_C( -85763.67)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 18.23), SIMDE_FLOAT64_C( 296.33), SIMDE_FLOAT64_C( 628.43), SIMDE_FLOAT64_C( -660.29)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -24.23), SIMDE_FLOAT64_C( 573.95), SIMDE_FLOAT64_C( 350.37), SIMDE_FLOAT64_C( -979.36)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -441.71), SIMDE_FLOAT64_C( 170078.60), SIMDE_FLOAT64_C( 220183.02), SIMDE_FLOAT64_C( 646661.61)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 14.47), SIMDE_FLOAT64_C( 780.26), SIMDE_FLOAT64_C( -875.01), SIMDE_FLOAT64_C( 609.14)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 690.47), SIMDE_FLOAT64_C( -614.79), SIMDE_FLOAT64_C( 626.96), SIMDE_FLOAT64_C( -369.27)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 9991.10), SIMDE_FLOAT64_C(-479696.05), SIMDE_FLOAT64_C(-548596.27), SIMDE_FLOAT64_C(-224937.13)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_mul_pd(test_vec[i].a, test_vec[i].b); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_or_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 b; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( -856.34), SIMDE_FLOAT32_C( -251.54), SIMDE_FLOAT32_C( 873.84), SIMDE_FLOAT32_C( 282.56), SIMDE_FLOAT32_C( -701.43), SIMDE_FLOAT32_C( 881.08), SIMDE_FLOAT32_C( 949.17), SIMDE_FLOAT32_C( -70.20)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 13.03), SIMDE_FLOAT32_C( -213.71), SIMDE_FLOAT32_C( -960.05), SIMDE_FLOAT32_C( 76.97), SIMDE_FLOAT32_C( 529.10), SIMDE_FLOAT32_C( -768.23), SIMDE_FLOAT32_C( 808.48), SIMDE_FLOAT32_C( -237.02)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -3431.99), SIMDE_FLOAT32_C( -255.75), SIMDE_FLOAT32_C( -1001.87), SIMDE_FLOAT32_C( 315.93), SIMDE_FLOAT32_C( -701.50), SIMDE_FLOAT32_C( -881.25), SIMDE_FLOAT32_C( 957.48), SIMDE_FLOAT32_C( -474.81)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 222.49), SIMDE_FLOAT32_C( 898.11), SIMDE_FLOAT32_C( -986.95), SIMDE_FLOAT32_C( -624.89), SIMDE_FLOAT32_C( -780.02), SIMDE_FLOAT32_C( 920.74), SIMDE_FLOAT32_C( 815.92), SIMDE_FLOAT32_C( 550.68)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -274.50), SIMDE_FLOAT32_C( 377.51), SIMDE_FLOAT32_C( 595.96), SIMDE_FLOAT32_C( 513.30), SIMDE_FLOAT32_C( 235.21), SIMDE_FLOAT32_C( -45.95), SIMDE_FLOAT32_C( 722.85), SIMDE_FLOAT32_C( 781.79)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -446.98), SIMDE_FLOAT32_C( 129422.58), SIMDE_FLOAT32_C( -987.97), SIMDE_FLOAT32_C( -625.94), SIMDE_FLOAT32_C( -60213.79), SIMDE_FLOAT32_C( -15867.97), SIMDE_FLOAT32_C( 1023.98), SIMDE_FLOAT32_C( 815.93)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -789.64), SIMDE_FLOAT32_C( 957.16), SIMDE_FLOAT32_C( -431.62), SIMDE_FLOAT32_C( 527.46), SIMDE_FLOAT32_C( -345.54), SIMDE_FLOAT32_C( -528.90), SIMDE_FLOAT32_C( -257.76), SIMDE_FLOAT32_C( 194.98)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 966.68), SIMDE_FLOAT32_C( 513.70), SIMDE_FLOAT32_C( 151.32), SIMDE_FLOAT32_C( -424.41), SIMDE_FLOAT32_C( -411.73), SIMDE_FLOAT32_C( 846.41), SIMDE_FLOAT32_C( 151.87), SIMDE_FLOAT32_C( -542.65)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -983.69), SIMDE_FLOAT32_C( 957.73), SIMDE_FLOAT32_C( -431.75), SIMDE_FLOAT32_C(-110586.96), SIMDE_FLOAT32_C( -475.73), SIMDE_FLOAT32_C( -862.93), SIMDE_FLOAT32_C( -304.00), SIMDE_FLOAT32_C( -51195.98)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -998.71), SIMDE_FLOAT32_C( -667.81), SIMDE_FLOAT32_C( 67.10), SIMDE_FLOAT32_C( -760.87), SIMDE_FLOAT32_C( -217.54), SIMDE_FLOAT32_C( 503.65), SIMDE_FLOAT32_C( 247.04), SIMDE_FLOAT32_C( 844.19)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 661.94), SIMDE_FLOAT32_C( -209.04), SIMDE_FLOAT32_C( -75.64), SIMDE_FLOAT32_C( -274.76), SIMDE_FLOAT32_C( -442.04), SIMDE_FLOAT32_C( -562.29), SIMDE_FLOAT32_C( -151.20), SIMDE_FLOAT32_C( 897.08)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -1015.96), SIMDE_FLOAT32_C( -63484.00), SIMDE_FLOAT32_C( -75.73), SIMDE_FLOAT32_C( -98031.86), SIMDE_FLOAT32_C( -443.12), SIMDE_FLOAT32_C(-130983.49), SIMDE_FLOAT32_C( -247.23), SIMDE_FLOAT32_C( 973.21)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 413.34), SIMDE_FLOAT32_C( 969.02), SIMDE_FLOAT32_C( -337.47), SIMDE_FLOAT32_C( -324.75), SIMDE_FLOAT32_C( -193.88), SIMDE_FLOAT32_C( -534.97), SIMDE_FLOAT32_C( 873.51), SIMDE_FLOAT32_C( -141.88)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -783.17), SIMDE_FLOAT32_C( 718.95), SIMDE_FLOAT32_C( 707.27), SIMDE_FLOAT32_C( -573.61), SIMDE_FLOAT32_C( -268.73), SIMDE_FLOAT32_C( 245.35), SIMDE_FLOAT32_C( 309.36), SIMDE_FLOAT32_C( -248.32)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-106455.79), SIMDE_FLOAT32_C( 975.97), SIMDE_FLOAT32_C( -94714.82), SIMDE_FLOAT32_C( -89806.08), SIMDE_FLOAT32_C( -399.98), SIMDE_FLOAT32_C( -62975.62), SIMDE_FLOAT32_C( 112093.41), SIMDE_FLOAT32_C( -253.95)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -788.80), SIMDE_FLOAT32_C( -153.54), SIMDE_FLOAT32_C( 281.95), SIMDE_FLOAT32_C( 256.34), SIMDE_FLOAT32_C( -635.81), SIMDE_FLOAT32_C( -147.12), SIMDE_FLOAT32_C( -744.53), SIMDE_FLOAT32_C( 799.41)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 2.25), SIMDE_FLOAT32_C( -53.24), SIMDE_FLOAT32_C( 690.38), SIMDE_FLOAT32_C( 776.59), SIMDE_FLOAT32_C( -713.17), SIMDE_FLOAT32_C( -856.98), SIMDE_FLOAT32_C( -302.74), SIMDE_FLOAT32_C( 409.28)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -852.80), SIMDE_FLOAT32_C( -222.00), SIMDE_FLOAT32_C( 88563.70), SIMDE_FLOAT32_C( 99423.55), SIMDE_FLOAT32_C( -763.94), SIMDE_FLOAT32_C( -55102.72), SIMDE_FLOAT32_C( -98047.97), SIMDE_FLOAT32_C( 106487.99)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -404.76), SIMDE_FLOAT32_C( -875.24), SIMDE_FLOAT32_C( 822.92), SIMDE_FLOAT32_C( 409.42), SIMDE_FLOAT32_C( 144.27), SIMDE_FLOAT32_C( 260.67), SIMDE_FLOAT32_C( 383.19), SIMDE_FLOAT32_C( 173.56)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -913.39), SIMDE_FLOAT32_C( -747.11), SIMDE_FLOAT32_C( 262.13), SIMDE_FLOAT32_C( 535.89), SIMDE_FLOAT32_C( -513.17), SIMDE_FLOAT32_C( 945.43), SIMDE_FLOAT32_C( 473.23), SIMDE_FLOAT32_C( 594.31)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-122099.98), SIMDE_FLOAT32_C( -1003.24), SIMDE_FLOAT32_C( 106357.79), SIMDE_FLOAT32_C( 105467.93), SIMDE_FLOAT32_C( -36944.00), SIMDE_FLOAT32_C( 122047.55), SIMDE_FLOAT32_C( 511.23), SIMDE_FLOAT32_C( 48543.87)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 255.64), SIMDE_FLOAT32_C( 352.54), SIMDE_FLOAT32_C( 905.17), SIMDE_FLOAT32_C( -97.04), SIMDE_FLOAT32_C( 635.19), SIMDE_FLOAT32_C( -653.37), SIMDE_FLOAT32_C( 871.87), SIMDE_FLOAT32_C( -471.42)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -534.86), SIMDE_FLOAT32_C( 20.68), SIMDE_FLOAT32_C( 963.33), SIMDE_FLOAT32_C( -431.12), SIMDE_FLOAT32_C( 199.08), SIMDE_FLOAT32_C( 684.85), SIMDE_FLOAT32_C( -893.07), SIMDE_FLOAT32_C( -817.23)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -65463.87), SIMDE_FLOAT32_C( 362.92), SIMDE_FLOAT32_C( 971.50), SIMDE_FLOAT32_C( -431.25), SIMDE_FLOAT32_C( 57308.48), SIMDE_FLOAT32_C( -685.87), SIMDE_FLOAT32_C( -895.87), SIMDE_FLOAT32_C(-122879.96)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_or_ps(test_vec[i].a, test_vec[i].b); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_or_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C( -465239073), INT32_C( 1279184195), INT32_C( 2016764339), INT32_C(-2145324536), INT32_C(-1764212445), INT32_C( 366604460), INT32_C( 2076865232), INT32_C( -193563958)), simde_mm256_set_epi32(INT32_C( 541400396), INT32_C( -972933189), INT32_C( 510962050), INT32_C( -823731197), INT32_C( -98096262), INT32_C( -873938367), INT32_C( 832486509), INT32_C(-1258790386)), simde_mm256_set_epi32(INT32_C( -465231905), INT32_C( -834732037), INT32_C( 2121656755), INT32_C( -823722485), INT32_C( -16815237), INT32_C( -537267475), INT32_C( 2078211837), INT32_C( -184651058)) }, { simde_mm256_set_epi32(INT32_C( -357268810), INT32_C( -651759515), INT32_C( 235257662), INT32_C( -710907048), INT32_C(-1810919865), INT32_C( 1157547055), INT32_C(-1536937137), INT32_C( 1736416320)), simde_mm256_set_epi32(INT32_C( -849859400), INT32_C( -22261753), INT32_C(-1893529894), INT32_C( -986693653), INT32_C( -263776240), INT32_C( 502458571), INT32_C( -753857515), INT32_C(-1100300353)), simde_mm256_set_epi32(INT32_C( -268652866), INT32_C( -5312409), INT32_C(-1893220354), INT32_C( -709853189), INT32_C( -196108713), INT32_C( 1576987887), INT32_C( -143311009), INT32_C( -8404993)) }, { simde_mm256_set_epi32(INT32_C(-1390025255), INT32_C( 226124884), INT32_C( 1790052194), INT32_C( -108567495), INT32_C( 1678439305), INT32_C( 946188942), INT32_C(-2005442113), INT32_C(-1348041469)), simde_mm256_set_epi32(INT32_C( 575705169), INT32_C(-1737321258), INT32_C(-1640691781), INT32_C( -551468190), INT32_C( 1952646913), INT32_C( 1393751647), INT32_C(-1388496514), INT32_C( 590324192)), simde_mm256_set_epi32(INT32_C(-1351223847), INT32_C(-1652886314), INT32_C( -21559301), INT32_C( -5806213), INT32_C( 1953232777), INT32_C( 2071459551), INT32_C(-1384154625), INT32_C(-1347443229)) }, { simde_mm256_set_epi32(INT32_C(-1745763744), INT32_C( -289111572), INT32_C( -806274679), INT32_C( 1716472169), INT32_C( 1235124509), INT32_C( -2416200), INT32_C( -718461715), INT32_C(-2068168100)), simde_mm256_set_epi32(INT32_C( -747913617), INT32_C( 1661223449), INT32_C( 458600484), INT32_C(-1412075388), INT32_C( 1069408970), INT32_C( 1945090076), INT32_C( -553357992), INT32_C(-1440436522)), simde_mm256_set_epi32(INT32_C( -671351185), INT32_C( -272317443), INT32_C( -537544275), INT32_C( -270566931), INT32_C( 2143282143), INT32_C( -17988), INT32_C( -550670851), INT32_C(-1363218722)) }, { simde_mm256_set_epi32(INT32_C(-1244409590), INT32_C( 1807868489), INT32_C( -47548399), INT32_C( 1894098437), INT32_C(-1418958797), INT32_C( -739449954), INT32_C( 531303833), INT32_C( -582313126)), simde_mm256_set_epi32(INT32_C( 357521808), INT32_C( 1489594358), INT32_C(-1693320213), INT32_C( 1373628776), INT32_C( -817392977), INT32_C( -536101335), INT32_C( 1011260304), INT32_C( 796312463)), simde_mm256_set_epi32(INT32_C(-1243620966), INT32_C( 2076829695), INT32_C( -12846085), INT32_C( 1912598381), INT32_C( -277873985), INT32_C( -202375233), INT32_C( 1072668569), INT32_C( -8463393)) }, { simde_mm256_set_epi32(INT32_C(-1207931928), INT32_C( -834187886), INT32_C( -41133883), INT32_C( 1826519423), INT32_C(-1537225150), INT32_C(-1972742802), INT32_C(-1107144372), INT32_C( 2074601373)), simde_mm256_set_epi32(INT32_C( 1198219345), INT32_C( 680764868), INT32_C( 1929177864), INT32_C( 1458999019), INT32_C( 847246989), INT32_C( 902092277), INT32_C( 17517725), INT32_C( 814431018)), simde_mm256_set_epi32(INT32_C( -9737223), INT32_C( -287838762), INT32_C( -197683), INT32_C( 2130640895), INT32_C(-1233125681), INT32_C(-1074866689), INT32_C(-1089774115), INT32_C( 2075131839)) }, { simde_mm256_set_epi32(INT32_C( -137211173), INT32_C( -663911731), INT32_C( 1607469024), INT32_C( -419979231), INT32_C( 1165074553), INT32_C( 1528925488), INT32_C(-1523385372), INT32_C( -907053265)), simde_mm256_set_epi32(INT32_C( 994086113), INT32_C( 702992700), INT32_C( -270545927), INT32_C( -490915434), INT32_C( -743254762), INT32_C( 48033191), INT32_C(-2091867903), INT32_C( -756059930)), simde_mm256_set_epi32(INT32_C( -2957573), INT32_C( -101723139), INT32_C( -2109447), INT32_C( -419448393), INT32_C( -671877761), INT32_C( 1543368119), INT32_C(-1485636635), INT32_C( -605063185)) }, { simde_mm256_set_epi32(INT32_C( 1358203904), INT32_C( 915012873), INT32_C( 1992547669), INT32_C(-1717736064), INT32_C(-1421704847), INT32_C(-1530428724), INT32_C( 686978685), INT32_C( 376599363)), simde_mm256_set_epi32(INT32_C( 963177670), INT32_C( -310024670), INT32_C(-1429164258), INT32_C( 390666975), INT32_C( -901190881), INT32_C(-1166820314), INT32_C( 2002591871), INT32_C( -569097185)), simde_mm256_set_epi32(INT32_C( 2046618822), INT32_C( -7471317), INT32_C( -19660961), INT32_C(-1612873761), INT32_C( -347411585), INT32_C(-1091059986), INT32_C( 2147448447), INT32_C( -562661537)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_castpd_si256(simde_mm256_or_pd(simde_mm256_castsi256_pd(test_vec[i].a), simde_mm256_castsi256_pd(test_vec[i].b))); simde_assert_m256i_equal(r, test_vec[i].r); } return 0; } static int test_simde_mm256_permute_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { struct { simde__m256 a; simde__m256 r; } p[8]; } test_vec[1] = { { { { simde_mm256_set_ps(SIMDE_FLOAT32_C( -139.04), SIMDE_FLOAT32_C( -911.51), SIMDE_FLOAT32_C( 580.20), SIMDE_FLOAT32_C( 982.03), SIMDE_FLOAT32_C( -878.62), SIMDE_FLOAT32_C( -797.57), SIMDE_FLOAT32_C( -426.64), SIMDE_FLOAT32_C( -818.52)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 982.03), SIMDE_FLOAT32_C( 982.03), SIMDE_FLOAT32_C( 982.03), SIMDE_FLOAT32_C( 982.03), SIMDE_FLOAT32_C( -818.52), SIMDE_FLOAT32_C( -818.52), SIMDE_FLOAT32_C( -818.52), SIMDE_FLOAT32_C( -818.52)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -946.18), SIMDE_FLOAT32_C( 580.43), SIMDE_FLOAT32_C( 276.12), SIMDE_FLOAT32_C( 862.54), SIMDE_FLOAT32_C( 664.35), SIMDE_FLOAT32_C( 162.22), SIMDE_FLOAT32_C( 234.37), SIMDE_FLOAT32_C( 188.86)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 862.54), SIMDE_FLOAT32_C( 862.54), SIMDE_FLOAT32_C( 862.54), SIMDE_FLOAT32_C( 276.12), SIMDE_FLOAT32_C( 188.86), SIMDE_FLOAT32_C( 188.86), SIMDE_FLOAT32_C( 188.86), SIMDE_FLOAT32_C( 234.37)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -799.19), SIMDE_FLOAT32_C( -835.77), SIMDE_FLOAT32_C( -716.61), SIMDE_FLOAT32_C( 614.58), SIMDE_FLOAT32_C( 366.48), SIMDE_FLOAT32_C( 294.53), SIMDE_FLOAT32_C( -368.14), SIMDE_FLOAT32_C( 638.90)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 614.58), SIMDE_FLOAT32_C( 614.58), SIMDE_FLOAT32_C( 614.58), SIMDE_FLOAT32_C( -835.77), SIMDE_FLOAT32_C( 638.90), SIMDE_FLOAT32_C( 638.90), SIMDE_FLOAT32_C( 638.90), SIMDE_FLOAT32_C( 294.53)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 141.00), SIMDE_FLOAT32_C( -110.45), SIMDE_FLOAT32_C( -302.60), SIMDE_FLOAT32_C( -332.94), SIMDE_FLOAT32_C( 188.73), SIMDE_FLOAT32_C( 551.54), SIMDE_FLOAT32_C( -580.84), SIMDE_FLOAT32_C( 854.95)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -332.94), SIMDE_FLOAT32_C( -332.94), SIMDE_FLOAT32_C( -332.94), SIMDE_FLOAT32_C( 141.00), SIMDE_FLOAT32_C( 854.95), SIMDE_FLOAT32_C( 854.95), SIMDE_FLOAT32_C( 854.95), SIMDE_FLOAT32_C( 188.73)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 655.95), SIMDE_FLOAT32_C( -740.30), SIMDE_FLOAT32_C( -946.00), SIMDE_FLOAT32_C( -434.04), SIMDE_FLOAT32_C( 67.66), SIMDE_FLOAT32_C( 990.02), SIMDE_FLOAT32_C( 889.36), SIMDE_FLOAT32_C( 470.48)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -434.04), SIMDE_FLOAT32_C( -434.04), SIMDE_FLOAT32_C( -946.00), SIMDE_FLOAT32_C( -434.04), SIMDE_FLOAT32_C( 470.48), SIMDE_FLOAT32_C( 470.48), SIMDE_FLOAT32_C( 889.36), SIMDE_FLOAT32_C( 470.48)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 184.64), SIMDE_FLOAT32_C( 689.89), SIMDE_FLOAT32_C( 66.41), SIMDE_FLOAT32_C( 657.41), SIMDE_FLOAT32_C( -642.74), SIMDE_FLOAT32_C( 674.83), SIMDE_FLOAT32_C( -458.59), SIMDE_FLOAT32_C( -735.59)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 657.41), SIMDE_FLOAT32_C( 657.41), SIMDE_FLOAT32_C( 66.41), SIMDE_FLOAT32_C( 66.41), SIMDE_FLOAT32_C( -735.59), SIMDE_FLOAT32_C( -735.59), SIMDE_FLOAT32_C( -458.59), SIMDE_FLOAT32_C( -458.59)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -810.48), SIMDE_FLOAT32_C( -796.84), SIMDE_FLOAT32_C( 173.69), SIMDE_FLOAT32_C( -58.65), SIMDE_FLOAT32_C( -873.61), SIMDE_FLOAT32_C( -813.18), SIMDE_FLOAT32_C( -876.21), SIMDE_FLOAT32_C( -105.85)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -58.65), SIMDE_FLOAT32_C( -58.65), SIMDE_FLOAT32_C( 173.69), SIMDE_FLOAT32_C( -796.84), SIMDE_FLOAT32_C( -105.85), SIMDE_FLOAT32_C( -105.85), SIMDE_FLOAT32_C( -876.21), SIMDE_FLOAT32_C( -813.18)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -653.04), SIMDE_FLOAT32_C( -205.53), SIMDE_FLOAT32_C( -47.21), SIMDE_FLOAT32_C( -850.03), SIMDE_FLOAT32_C( -120.73), SIMDE_FLOAT32_C( -663.03), SIMDE_FLOAT32_C( 803.42), SIMDE_FLOAT32_C( 391.07)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -850.03), SIMDE_FLOAT32_C( -850.03), SIMDE_FLOAT32_C( -47.21), SIMDE_FLOAT32_C( -653.04), SIMDE_FLOAT32_C( 391.07), SIMDE_FLOAT32_C( 391.07), SIMDE_FLOAT32_C( 803.42), SIMDE_FLOAT32_C( -120.73)) }, }, } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r; r = simde_mm256_permute_ps(test_vec[i].p[0].a, 0); simde_assert_m256_close(r, test_vec[i].p[0].r, 1); r = simde_mm256_permute_ps(test_vec[i].p[1].a, 1); simde_assert_m256_close(r, test_vec[i].p[1].r, 1); r = simde_mm256_permute_ps(test_vec[i].p[2].a, 2); simde_assert_m256_close(r, test_vec[i].p[2].r, 1); r = simde_mm256_permute_ps(test_vec[i].p[3].a, 3); simde_assert_m256_close(r, test_vec[i].p[3].r, 1); r = simde_mm256_permute_ps(test_vec[i].p[4].a, 4); simde_assert_m256_close(r, test_vec[i].p[4].r, 1); r = simde_mm256_permute_ps(test_vec[i].p[5].a, 5); simde_assert_m256_close(r, test_vec[i].p[5].r, 1); r = simde_mm256_permute_ps(test_vec[i].p[6].a, 6); simde_assert_m256_close(r, test_vec[i].p[6].r, 1); r = simde_mm256_permute_ps(test_vec[i].p[7].a, 7); simde_assert_m256_close(r, test_vec[i].p[7].r, 1); } return 0; } static int test_simde_mm_permute_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { struct { simde__m128d a; simde__m128d r; } p[4]; } test_vec[8] = { { { { simde_mm_set_pd(SIMDE_FLOAT64_C( -586.66), SIMDE_FLOAT64_C( 759.07)), simde_mm_set_pd(SIMDE_FLOAT64_C( 759.07), SIMDE_FLOAT64_C( 759.07)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 312.86), SIMDE_FLOAT64_C( 489.25)), simde_mm_set_pd(SIMDE_FLOAT64_C( 489.25), SIMDE_FLOAT64_C( 312.86)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 510.93), SIMDE_FLOAT64_C( -731.36)), simde_mm_set_pd(SIMDE_FLOAT64_C( 510.93), SIMDE_FLOAT64_C( -731.36)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 211.69), SIMDE_FLOAT64_C( 302.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( 211.69), SIMDE_FLOAT64_C( 211.69)) }, }, }, { { { simde_mm_set_pd(SIMDE_FLOAT64_C( 878.46), SIMDE_FLOAT64_C( 679.15)), simde_mm_set_pd(SIMDE_FLOAT64_C( 679.15), SIMDE_FLOAT64_C( 679.15)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -990.20), SIMDE_FLOAT64_C( -630.61)), simde_mm_set_pd(SIMDE_FLOAT64_C( -630.61), SIMDE_FLOAT64_C( -990.20)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 471.54), SIMDE_FLOAT64_C( -307.27)), simde_mm_set_pd(SIMDE_FLOAT64_C( 471.54), SIMDE_FLOAT64_C( -307.27)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 625.19), SIMDE_FLOAT64_C( 623.51)), simde_mm_set_pd(SIMDE_FLOAT64_C( 625.19), SIMDE_FLOAT64_C( 625.19)) }, }, }, { { { simde_mm_set_pd(SIMDE_FLOAT64_C( 637.56), SIMDE_FLOAT64_C( 480.86)), simde_mm_set_pd(SIMDE_FLOAT64_C( 480.86), SIMDE_FLOAT64_C( 480.86)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -358.97), SIMDE_FLOAT64_C( -958.25)), simde_mm_set_pd(SIMDE_FLOAT64_C( -958.25), SIMDE_FLOAT64_C( -358.97)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 867.67), SIMDE_FLOAT64_C( -39.52)), simde_mm_set_pd(SIMDE_FLOAT64_C( 867.67), SIMDE_FLOAT64_C( -39.52)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 596.21), SIMDE_FLOAT64_C( 558.65)), simde_mm_set_pd(SIMDE_FLOAT64_C( 596.21), SIMDE_FLOAT64_C( 596.21)) }, }, }, { { { simde_mm_set_pd(SIMDE_FLOAT64_C( 32.94), SIMDE_FLOAT64_C( -306.71)), simde_mm_set_pd(SIMDE_FLOAT64_C( -306.71), SIMDE_FLOAT64_C( -306.71)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 881.79), SIMDE_FLOAT64_C( 737.35)), simde_mm_set_pd(SIMDE_FLOAT64_C( 737.35), SIMDE_FLOAT64_C( 881.79)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 857.28), SIMDE_FLOAT64_C( 640.79)), simde_mm_set_pd(SIMDE_FLOAT64_C( 857.28), SIMDE_FLOAT64_C( 640.79)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 529.31), SIMDE_FLOAT64_C( 660.42)), simde_mm_set_pd(SIMDE_FLOAT64_C( 529.31), SIMDE_FLOAT64_C( 529.31)) }, }, }, { { { simde_mm_set_pd(SIMDE_FLOAT64_C( -645.32), SIMDE_FLOAT64_C( -500.25)), simde_mm_set_pd(SIMDE_FLOAT64_C( -500.25), SIMDE_FLOAT64_C( -500.25)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -862.75), SIMDE_FLOAT64_C( -773.36)), simde_mm_set_pd(SIMDE_FLOAT64_C( -773.36), SIMDE_FLOAT64_C( -862.75)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 795.76), SIMDE_FLOAT64_C( 320.59)), simde_mm_set_pd(SIMDE_FLOAT64_C( 795.76), SIMDE_FLOAT64_C( 320.59)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -749.80), SIMDE_FLOAT64_C( 899.97)), simde_mm_set_pd(SIMDE_FLOAT64_C( -749.80), SIMDE_FLOAT64_C( -749.80)) }, }, }, { { { simde_mm_set_pd(SIMDE_FLOAT64_C( 711.35), SIMDE_FLOAT64_C( 304.45)), simde_mm_set_pd(SIMDE_FLOAT64_C( 304.45), SIMDE_FLOAT64_C( 304.45)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -92.64), SIMDE_FLOAT64_C( 45.68)), simde_mm_set_pd(SIMDE_FLOAT64_C( 45.68), SIMDE_FLOAT64_C( -92.64)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 261.59), SIMDE_FLOAT64_C( 207.13)), simde_mm_set_pd(SIMDE_FLOAT64_C( 261.59), SIMDE_FLOAT64_C( 207.13)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -540.32), SIMDE_FLOAT64_C( -11.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( -540.32), SIMDE_FLOAT64_C( -540.32)) }, }, }, { { { simde_mm_set_pd(SIMDE_FLOAT64_C( 11.20), SIMDE_FLOAT64_C( 175.87)), simde_mm_set_pd(SIMDE_FLOAT64_C( 175.87), SIMDE_FLOAT64_C( 175.87)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 274.15), SIMDE_FLOAT64_C( 152.30)), simde_mm_set_pd(SIMDE_FLOAT64_C( 152.30), SIMDE_FLOAT64_C( 274.15)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 673.19), SIMDE_FLOAT64_C( 979.94)), simde_mm_set_pd(SIMDE_FLOAT64_C( 673.19), SIMDE_FLOAT64_C( 979.94)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 36.65), SIMDE_FLOAT64_C( -391.85)), simde_mm_set_pd(SIMDE_FLOAT64_C( 36.65), SIMDE_FLOAT64_C( 36.65)) }, }, }, { { { simde_mm_set_pd(SIMDE_FLOAT64_C( -950.12), SIMDE_FLOAT64_C( -979.88)), simde_mm_set_pd(SIMDE_FLOAT64_C( -979.88), SIMDE_FLOAT64_C( -979.88)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 910.59), SIMDE_FLOAT64_C( 380.41)), simde_mm_set_pd(SIMDE_FLOAT64_C( 380.41), SIMDE_FLOAT64_C( 910.59)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -731.74), SIMDE_FLOAT64_C( 314.11)), simde_mm_set_pd(SIMDE_FLOAT64_C( -731.74), SIMDE_FLOAT64_C( 314.11)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -794.00), SIMDE_FLOAT64_C( -659.77)), simde_mm_set_pd(SIMDE_FLOAT64_C( -794.00), SIMDE_FLOAT64_C( -794.00)) }, }, } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r; r = simde_mm_permute_pd(test_vec[i].p[0].a, 0); simde_assert_m128d_close(r, test_vec[i].p[0].r, 1); r = simde_mm_permute_pd(test_vec[i].p[1].a, 1); simde_assert_m128d_close(r, test_vec[i].p[1].r, 1); r = simde_mm_permute_pd(test_vec[i].p[2].a, 2); simde_assert_m128d_close(r, test_vec[i].p[2].r, 1); r = simde_mm_permute_pd(test_vec[i].p[3].a, 3); simde_assert_m128d_close(r, test_vec[i].p[3].r, 1); } return 0; } static int test_simde_mm_permute_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -46.54), SIMDE_FLOAT32_C( -884.02), SIMDE_FLOAT32_C( -125.85), SIMDE_FLOAT32_C( -211.36)), // imm8 = 104 simde_mm_set_ps(SIMDE_FLOAT32_C( -125.85), SIMDE_FLOAT32_C( -884.02), SIMDE_FLOAT32_C( -884.02), SIMDE_FLOAT32_C( -211.36)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -126.12), SIMDE_FLOAT32_C( 654.39), SIMDE_FLOAT32_C( 987.87), SIMDE_FLOAT32_C( 213.63)), // imm8 = 15 simde_mm_set_ps(SIMDE_FLOAT32_C( 213.63), SIMDE_FLOAT32_C( 213.63), SIMDE_FLOAT32_C( -126.12), SIMDE_FLOAT32_C( -126.12)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 545.41), SIMDE_FLOAT32_C( -55.61), SIMDE_FLOAT32_C( 390.65), SIMDE_FLOAT32_C( -546.65)), // imm8 = 25 simde_mm_set_ps(SIMDE_FLOAT32_C( -546.65), SIMDE_FLOAT32_C( 390.65), SIMDE_FLOAT32_C( -55.61), SIMDE_FLOAT32_C( 390.65)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -570.93), SIMDE_FLOAT32_C( 337.51), SIMDE_FLOAT32_C( 48.49), SIMDE_FLOAT32_C( -941.32)), // imm8 = 21 simde_mm_set_ps(SIMDE_FLOAT32_C( -941.32), SIMDE_FLOAT32_C( 48.49), SIMDE_FLOAT32_C( 48.49), SIMDE_FLOAT32_C( 48.49)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 35.91), SIMDE_FLOAT32_C( -937.58), SIMDE_FLOAT32_C( 867.97), SIMDE_FLOAT32_C( -64.33)), // imm8 = 105 simde_mm_set_ps(SIMDE_FLOAT32_C( 867.97), SIMDE_FLOAT32_C( -937.58), SIMDE_FLOAT32_C( -937.58), SIMDE_FLOAT32_C( 867.97)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 592.52), SIMDE_FLOAT32_C( 823.80), SIMDE_FLOAT32_C( 377.28), SIMDE_FLOAT32_C( 174.06)), // imm8 = 246 simde_mm_set_ps(SIMDE_FLOAT32_C( 592.52), SIMDE_FLOAT32_C( 592.52), SIMDE_FLOAT32_C( 377.28), SIMDE_FLOAT32_C( 823.80)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -853.11), SIMDE_FLOAT32_C( 886.60), SIMDE_FLOAT32_C( -771.84), SIMDE_FLOAT32_C( -900.32)), // imm8 = 183 simde_mm_set_ps(SIMDE_FLOAT32_C( 886.60), SIMDE_FLOAT32_C( -853.11), SIMDE_FLOAT32_C( -771.84), SIMDE_FLOAT32_C( -853.11)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 59.21), SIMDE_FLOAT32_C( -625.11), SIMDE_FLOAT32_C( -350.66), SIMDE_FLOAT32_C( 873.23)), // imm8 = 169 simde_mm_set_ps(SIMDE_FLOAT32_C( -625.11), SIMDE_FLOAT32_C( -625.11), SIMDE_FLOAT32_C( -625.11), SIMDE_FLOAT32_C( -350.66)) } }; simde__m128 r; r = simde_mm_permute_ps(test_vec[0].a, 104); simde_assert_m128_equal(r, test_vec[0].r); r = simde_mm_permute_ps(test_vec[1].a, 15); simde_assert_m128_equal(r, test_vec[1].r); r = simde_mm_permute_ps(test_vec[2].a, 25); simde_assert_m128_equal(r, test_vec[2].r); r = simde_mm_permute_ps(test_vec[3].a, 21); simde_assert_m128_equal(r, test_vec[3].r); r = simde_mm_permute_ps(test_vec[4].a, 105); simde_assert_m128_equal(r, test_vec[4].r); r = simde_mm_permute_ps(test_vec[5].a, 246); simde_assert_m128_equal(r, test_vec[5].r); r = simde_mm_permute_ps(test_vec[6].a, 183); simde_assert_m128_equal(r, test_vec[6].r); r = simde_mm_permute_ps(test_vec[7].a, 169); simde_assert_m128_equal(r, test_vec[7].r); return 0; } static int test_simde_mm256_permute_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a[4]; simde__m256d r[4]; } test_vec[8] = { { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -307.33), SIMDE_FLOAT64_C( -277.83), SIMDE_FLOAT64_C( -811.26), SIMDE_FLOAT64_C( -340.98)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 520.01), SIMDE_FLOAT64_C( 20.96), SIMDE_FLOAT64_C( -217.27), SIMDE_FLOAT64_C( 475.98)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -236.99), SIMDE_FLOAT64_C( -515.97), SIMDE_FLOAT64_C( -252.10), SIMDE_FLOAT64_C( -367.50)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -740.61), SIMDE_FLOAT64_C( 459.66), SIMDE_FLOAT64_C( 780.67), SIMDE_FLOAT64_C( -928.66)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -277.83), SIMDE_FLOAT64_C( -277.83), SIMDE_FLOAT64_C( -340.98), SIMDE_FLOAT64_C( -340.98)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 20.96), SIMDE_FLOAT64_C( 20.96), SIMDE_FLOAT64_C( 475.98), SIMDE_FLOAT64_C( -217.27)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -515.97), SIMDE_FLOAT64_C( -515.97), SIMDE_FLOAT64_C( -252.10), SIMDE_FLOAT64_C( -367.50)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 459.66), SIMDE_FLOAT64_C( 459.66), SIMDE_FLOAT64_C( 780.67), SIMDE_FLOAT64_C( 780.67)) } }, { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 718.00), SIMDE_FLOAT64_C( -514.42), SIMDE_FLOAT64_C( -222.91), SIMDE_FLOAT64_C( -665.22)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 431.31), SIMDE_FLOAT64_C( -787.13), SIMDE_FLOAT64_C( -902.93), SIMDE_FLOAT64_C( -601.27)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -645.66), SIMDE_FLOAT64_C( 168.66), SIMDE_FLOAT64_C( 823.10), SIMDE_FLOAT64_C( 348.53)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -855.11), SIMDE_FLOAT64_C( 343.83), SIMDE_FLOAT64_C( 888.93), SIMDE_FLOAT64_C( 81.36)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -514.42), SIMDE_FLOAT64_C( -514.42), SIMDE_FLOAT64_C( -665.22), SIMDE_FLOAT64_C( -665.22)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -787.13), SIMDE_FLOAT64_C( -787.13), SIMDE_FLOAT64_C( -601.27), SIMDE_FLOAT64_C( -902.93)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 168.66), SIMDE_FLOAT64_C( 168.66), SIMDE_FLOAT64_C( 823.10), SIMDE_FLOAT64_C( 348.53)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 343.83), SIMDE_FLOAT64_C( 343.83), SIMDE_FLOAT64_C( 888.93), SIMDE_FLOAT64_C( 888.93)) } }, { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 482.80), SIMDE_FLOAT64_C( 651.20), SIMDE_FLOAT64_C( -299.11), SIMDE_FLOAT64_C( 660.92)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 562.16), SIMDE_FLOAT64_C( -407.46), SIMDE_FLOAT64_C( 470.74), SIMDE_FLOAT64_C( 663.86)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -609.81), SIMDE_FLOAT64_C( 224.41), SIMDE_FLOAT64_C( 638.76), SIMDE_FLOAT64_C( 609.66)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 950.61), SIMDE_FLOAT64_C( -221.62), SIMDE_FLOAT64_C( 198.64), SIMDE_FLOAT64_C( 472.05)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 651.20), SIMDE_FLOAT64_C( 651.20), SIMDE_FLOAT64_C( 660.92), SIMDE_FLOAT64_C( 660.92)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -407.46), SIMDE_FLOAT64_C( -407.46), SIMDE_FLOAT64_C( 663.86), SIMDE_FLOAT64_C( 470.74)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 224.41), SIMDE_FLOAT64_C( 224.41), SIMDE_FLOAT64_C( 638.76), SIMDE_FLOAT64_C( 609.66)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -221.62), SIMDE_FLOAT64_C( -221.62), SIMDE_FLOAT64_C( 198.64), SIMDE_FLOAT64_C( 198.64)) } }, { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -304.11), SIMDE_FLOAT64_C( 887.38), SIMDE_FLOAT64_C( -49.50), SIMDE_FLOAT64_C( -449.56)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -667.31), SIMDE_FLOAT64_C( 293.87), SIMDE_FLOAT64_C( -667.79), SIMDE_FLOAT64_C( 371.99)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 895.41), SIMDE_FLOAT64_C( 116.14), SIMDE_FLOAT64_C( 65.95), SIMDE_FLOAT64_C( -990.78)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -751.37), SIMDE_FLOAT64_C( -570.35), SIMDE_FLOAT64_C( -32.79), SIMDE_FLOAT64_C( 337.40)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 887.38), SIMDE_FLOAT64_C( 887.38), SIMDE_FLOAT64_C( -449.56), SIMDE_FLOAT64_C( -449.56)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 293.87), SIMDE_FLOAT64_C( 293.87), SIMDE_FLOAT64_C( 371.99), SIMDE_FLOAT64_C( -667.79)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 116.14), SIMDE_FLOAT64_C( 116.14), SIMDE_FLOAT64_C( 65.95), SIMDE_FLOAT64_C( -990.78)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -570.35), SIMDE_FLOAT64_C( -570.35), SIMDE_FLOAT64_C( -32.79), SIMDE_FLOAT64_C( -32.79)) } }, { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -78.53), SIMDE_FLOAT64_C( -723.45), SIMDE_FLOAT64_C( -594.84), SIMDE_FLOAT64_C( -504.83)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 599.38), SIMDE_FLOAT64_C( -102.58), SIMDE_FLOAT64_C( 369.99), SIMDE_FLOAT64_C( -58.86)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -705.85), SIMDE_FLOAT64_C( -561.88), SIMDE_FLOAT64_C( -855.33), SIMDE_FLOAT64_C( -876.41)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 200.30), SIMDE_FLOAT64_C( -816.59), SIMDE_FLOAT64_C( 495.88), SIMDE_FLOAT64_C( -20.39)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -723.45), SIMDE_FLOAT64_C( -723.45), SIMDE_FLOAT64_C( -504.83), SIMDE_FLOAT64_C( -504.83)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -102.58), SIMDE_FLOAT64_C( -102.58), SIMDE_FLOAT64_C( -58.86), SIMDE_FLOAT64_C( 369.99)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -561.88), SIMDE_FLOAT64_C( -561.88), SIMDE_FLOAT64_C( -855.33), SIMDE_FLOAT64_C( -876.41)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -816.59), SIMDE_FLOAT64_C( -816.59), SIMDE_FLOAT64_C( 495.88), SIMDE_FLOAT64_C( 495.88)) } }, { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -459.43), SIMDE_FLOAT64_C( 35.05), SIMDE_FLOAT64_C( -647.26), SIMDE_FLOAT64_C( -116.28)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 861.84), SIMDE_FLOAT64_C( 79.42), SIMDE_FLOAT64_C( -61.14), SIMDE_FLOAT64_C( -959.28)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -190.88), SIMDE_FLOAT64_C( 91.78), SIMDE_FLOAT64_C( 624.59), SIMDE_FLOAT64_C( -875.05)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -757.98), SIMDE_FLOAT64_C( -777.95), SIMDE_FLOAT64_C( -309.55), SIMDE_FLOAT64_C( 387.53)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 35.05), SIMDE_FLOAT64_C( 35.05), SIMDE_FLOAT64_C( -116.28), SIMDE_FLOAT64_C( -116.28)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 79.42), SIMDE_FLOAT64_C( 79.42), SIMDE_FLOAT64_C( -959.28), SIMDE_FLOAT64_C( -61.14)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 91.78), SIMDE_FLOAT64_C( 91.78), SIMDE_FLOAT64_C( 624.59), SIMDE_FLOAT64_C( -875.05)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -777.95), SIMDE_FLOAT64_C( -777.95), SIMDE_FLOAT64_C( -309.55), SIMDE_FLOAT64_C( -309.55)) } }, { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( -420.28), SIMDE_FLOAT64_C( -324.78), SIMDE_FLOAT64_C( -643.43)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 271.76), SIMDE_FLOAT64_C( -727.19), SIMDE_FLOAT64_C( 659.23), SIMDE_FLOAT64_C( 91.29)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 751.73), SIMDE_FLOAT64_C( 366.97), SIMDE_FLOAT64_C( 178.00), SIMDE_FLOAT64_C( -562.69)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 355.89), SIMDE_FLOAT64_C( 861.10), SIMDE_FLOAT64_C( 814.16), SIMDE_FLOAT64_C( 218.35)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -420.28), SIMDE_FLOAT64_C( -420.28), SIMDE_FLOAT64_C( -643.43), SIMDE_FLOAT64_C( -643.43)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -727.19), SIMDE_FLOAT64_C( -727.19), SIMDE_FLOAT64_C( 91.29), SIMDE_FLOAT64_C( 659.23)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 366.97), SIMDE_FLOAT64_C( 366.97), SIMDE_FLOAT64_C( 178.00), SIMDE_FLOAT64_C( -562.69)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 861.10), SIMDE_FLOAT64_C( 861.10), SIMDE_FLOAT64_C( 814.16), SIMDE_FLOAT64_C( 814.16)) } }, { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -344.58), SIMDE_FLOAT64_C( -961.29), SIMDE_FLOAT64_C( 602.43), SIMDE_FLOAT64_C( -99.06)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 915.17), SIMDE_FLOAT64_C( 886.67), SIMDE_FLOAT64_C( 631.07), SIMDE_FLOAT64_C( -393.04)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 815.36), SIMDE_FLOAT64_C( -920.33), SIMDE_FLOAT64_C( -701.98), SIMDE_FLOAT64_C( 230.05)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 408.01), SIMDE_FLOAT64_C( -369.61), SIMDE_FLOAT64_C( -195.80), SIMDE_FLOAT64_C( -161.14)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -961.29), SIMDE_FLOAT64_C( -961.29), SIMDE_FLOAT64_C( -99.06), SIMDE_FLOAT64_C( -99.06)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 886.67), SIMDE_FLOAT64_C( 886.67), SIMDE_FLOAT64_C( -393.04), SIMDE_FLOAT64_C( 631.07)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -920.33), SIMDE_FLOAT64_C( -920.33), SIMDE_FLOAT64_C( -701.98), SIMDE_FLOAT64_C( 230.05)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -369.61), SIMDE_FLOAT64_C( -369.61), SIMDE_FLOAT64_C( -195.80), SIMDE_FLOAT64_C( -195.80)) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r; r = simde_mm256_permute_pd(test_vec[i].a[0], 0); simde_assert_m256d_close(r, test_vec[i].r[0], 1); r = simde_mm256_permute_pd(test_vec[i].a[1], 1); simde_assert_m256d_close(r, test_vec[i].r[1], 1); r = simde_mm256_permute_pd(test_vec[i].a[2], 2); simde_assert_m256d_close(r, test_vec[i].r[2], 1); r = simde_mm256_permute_pd(test_vec[i].a[3], 3); simde_assert_m256d_close(r, test_vec[i].r[3], 1); } return 0; } static int test_simde_mm_permutevar_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128i b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -220.03), SIMDE_FLOAT32_C( -102.32), SIMDE_FLOAT32_C( -878.65), SIMDE_FLOAT32_C( 736.04)), simde_mm_set_epi32(INT32_C( 1978615509), INT32_C( -298382064), INT32_C( 844888802), INT32_C(-1984258319)), simde_mm_set_ps(SIMDE_FLOAT32_C( -878.65), SIMDE_FLOAT32_C( 736.04), SIMDE_FLOAT32_C( -102.32), SIMDE_FLOAT32_C( -878.65)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -74.21), SIMDE_FLOAT32_C( 20.33), SIMDE_FLOAT32_C( -47.37), SIMDE_FLOAT32_C( -145.03)), simde_mm_set_epi32(INT32_C(-1739872531), INT32_C( 1398798289), INT32_C( 139283762), INT32_C( -468646578)), simde_mm_set_ps(SIMDE_FLOAT32_C( -47.37), SIMDE_FLOAT32_C( -47.37), SIMDE_FLOAT32_C( 20.33), SIMDE_FLOAT32_C( 20.33)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 388.02), SIMDE_FLOAT32_C( 910.17), SIMDE_FLOAT32_C( -160.23), SIMDE_FLOAT32_C( -802.99)), simde_mm_set_epi32(INT32_C( 836573493), INT32_C(-1468644888), INT32_C( -506758879), INT32_C( -861763047)), simde_mm_set_ps(SIMDE_FLOAT32_C( -160.23), SIMDE_FLOAT32_C( -802.99), SIMDE_FLOAT32_C( -160.23), SIMDE_FLOAT32_C( -160.23)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 909.90), SIMDE_FLOAT32_C( 126.61), SIMDE_FLOAT32_C( 817.61), SIMDE_FLOAT32_C( 504.25)), simde_mm_set_epi32(INT32_C( 584238895), INT32_C( 928311120), INT32_C( -480157729), INT32_C( 870102815)), simde_mm_set_ps(SIMDE_FLOAT32_C( 909.90), SIMDE_FLOAT32_C( 504.25), SIMDE_FLOAT32_C( 909.90), SIMDE_FLOAT32_C( 909.90)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -446.77), SIMDE_FLOAT32_C( -55.86), SIMDE_FLOAT32_C( 873.78), SIMDE_FLOAT32_C( -757.74)), simde_mm_set_epi32(INT32_C(-1116848756), INT32_C( 67501238), INT32_C( 1151634701), INT32_C(-1833672337)), simde_mm_set_ps(SIMDE_FLOAT32_C( -757.74), SIMDE_FLOAT32_C( -55.86), SIMDE_FLOAT32_C( 873.78), SIMDE_FLOAT32_C( -446.77)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 17.75), SIMDE_FLOAT32_C( 909.79), SIMDE_FLOAT32_C( -854.76), SIMDE_FLOAT32_C( 845.21)), simde_mm_set_epi32(INT32_C( -650701315), INT32_C( -514022340), INT32_C( -456325153), INT32_C( 575846112)), simde_mm_set_ps(SIMDE_FLOAT32_C( -854.76), SIMDE_FLOAT32_C( 845.21), SIMDE_FLOAT32_C( 17.75), SIMDE_FLOAT32_C( 845.21)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -703.39), SIMDE_FLOAT32_C( 212.03), SIMDE_FLOAT32_C( 766.48), SIMDE_FLOAT32_C( 58.02)), simde_mm_set_epi32(INT32_C( 751532596), INT32_C( 2017282760), INT32_C( 1270374455), INT32_C( -795583425)), simde_mm_set_ps(SIMDE_FLOAT32_C( 58.02), SIMDE_FLOAT32_C( 58.02), SIMDE_FLOAT32_C( -703.39), SIMDE_FLOAT32_C( -703.39)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 101.14), SIMDE_FLOAT32_C( 114.68), SIMDE_FLOAT32_C( 986.99), SIMDE_FLOAT32_C( -651.72)), simde_mm_set_epi32(INT32_C( 1630473427), INT32_C( 1562779502), INT32_C( 1531074799), INT32_C(-1809767434)), simde_mm_set_ps(SIMDE_FLOAT32_C( 101.14), SIMDE_FLOAT32_C( 114.68), SIMDE_FLOAT32_C( 101.14), SIMDE_FLOAT32_C( 114.68)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_permutevar_ps(test_vec[i].a, test_vec[i].b); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_permutevar_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128i b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -190.00), SIMDE_FLOAT64_C( -363.64)), simde_mm_set_epi64x(INT64_C(-3679719263685326635), INT64_C(-5014117432834044471)), simde_mm_set_pd(SIMDE_FLOAT64_C( -363.64), SIMDE_FLOAT64_C( -363.64)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 107.93), SIMDE_FLOAT64_C( 497.72)), simde_mm_set_epi64x(INT64_C(1238725724228652833), INT64_C(2295575207610739945)), simde_mm_set_pd(SIMDE_FLOAT64_C( 497.72), SIMDE_FLOAT64_C( 497.72)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -71.89), SIMDE_FLOAT64_C( -692.84)), simde_mm_set_epi64x(INT64_C(3235819707285929243), INT64_C(-60491261046190647)), simde_mm_set_pd(SIMDE_FLOAT64_C( -71.89), SIMDE_FLOAT64_C( -692.84)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -729.73), SIMDE_FLOAT64_C( 585.33)), simde_mm_set_epi64x(INT64_C(2481877852619205882), INT64_C(3545311239979806958)), simde_mm_set_pd(SIMDE_FLOAT64_C( -729.73), SIMDE_FLOAT64_C( -729.73)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -735.74), SIMDE_FLOAT64_C( 646.64)), simde_mm_set_epi64x(INT64_C(-8424077459294111103), INT64_C(-6727548776374001581)), simde_mm_set_pd(SIMDE_FLOAT64_C( 646.64), SIMDE_FLOAT64_C( -735.74)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 403.13), SIMDE_FLOAT64_C( -882.62)), simde_mm_set_epi64x(INT64_C(-731202554371506341), INT64_C(7811413526677278696)), simde_mm_set_pd(SIMDE_FLOAT64_C( 403.13), SIMDE_FLOAT64_C( -882.62)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 910.78), SIMDE_FLOAT64_C( 162.04)), simde_mm_set_epi64x(INT64_C(-7914651116933831795), INT64_C(-7291667702753737699)), simde_mm_set_pd(SIMDE_FLOAT64_C( 162.04), SIMDE_FLOAT64_C( 162.04)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -194.64), SIMDE_FLOAT64_C( 21.61)), simde_mm_set_epi64x(INT64_C(-8162260555449998511), INT64_C(-7079262047989370805)), simde_mm_set_pd(SIMDE_FLOAT64_C( 21.61), SIMDE_FLOAT64_C( -194.64)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_permutevar_pd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_equal(r, test_vec[i].r); } return 0; } static int test_simde_mm256_permutevar_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256i b; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( -581.11), SIMDE_FLOAT32_C( 662.67), SIMDE_FLOAT32_C( 749.10), SIMDE_FLOAT32_C( 794.46), SIMDE_FLOAT32_C( 351.98), SIMDE_FLOAT32_C( 95.47), SIMDE_FLOAT32_C( -323.47), SIMDE_FLOAT32_C( 766.08)), simde_mm256_set_epi32(INT32_C(-1995089848), INT32_C(-1205354020), INT32_C( 942463332), INT32_C( 2042714882), INT32_C( -3085894), INT32_C( 1293825925), INT32_C( 1293195492), INT32_C( -942672880)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 794.46), SIMDE_FLOAT32_C( 794.46), SIMDE_FLOAT32_C( 794.46), SIMDE_FLOAT32_C( 662.67), SIMDE_FLOAT32_C( 95.47), SIMDE_FLOAT32_C( -323.47), SIMDE_FLOAT32_C( 766.08), SIMDE_FLOAT32_C( 766.08)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 553.57), SIMDE_FLOAT32_C( -638.78), SIMDE_FLOAT32_C( -812.35), SIMDE_FLOAT32_C( -453.08), SIMDE_FLOAT32_C( 690.64), SIMDE_FLOAT32_C( -358.14), SIMDE_FLOAT32_C( 491.09), SIMDE_FLOAT32_C( 642.81)), simde_mm256_set_epi32(INT32_C( 74819979), INT32_C(-1724654262), INT32_C( 1654056695), INT32_C( 767929859), INT32_C(-1773750147), INT32_C( 1504941571), INT32_C(-1277766239), INT32_C( -474163433)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 553.57), SIMDE_FLOAT32_C( -638.78), SIMDE_FLOAT32_C( 553.57), SIMDE_FLOAT32_C( 553.57), SIMDE_FLOAT32_C( 491.09), SIMDE_FLOAT32_C( 690.64), SIMDE_FLOAT32_C( 491.09), SIMDE_FLOAT32_C( 690.64)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -359.39), SIMDE_FLOAT32_C( -64.37), SIMDE_FLOAT32_C( -720.24), SIMDE_FLOAT32_C( -724.37), SIMDE_FLOAT32_C( -455.62), SIMDE_FLOAT32_C( 674.74), SIMDE_FLOAT32_C( -589.17), SIMDE_FLOAT32_C( -867.21)), simde_mm256_set_epi32(INT32_C(-1090291188), INT32_C(-2009955584), INT32_C(-1244380880), INT32_C( 2087210230), INT32_C( 1556610240), INT32_C( 347022662), INT32_C( -341820489), INT32_C( 209748637)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -724.37), SIMDE_FLOAT32_C( -724.37), SIMDE_FLOAT32_C( -724.37), SIMDE_FLOAT32_C( -64.37), SIMDE_FLOAT32_C( -867.21), SIMDE_FLOAT32_C( 674.74), SIMDE_FLOAT32_C( -455.62), SIMDE_FLOAT32_C( -589.17)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -688.17), SIMDE_FLOAT32_C( 265.16), SIMDE_FLOAT32_C( 223.77), SIMDE_FLOAT32_C( 457.91), SIMDE_FLOAT32_C( -449.18), SIMDE_FLOAT32_C( 418.80), SIMDE_FLOAT32_C( -219.32), SIMDE_FLOAT32_C( -798.45)), simde_mm256_set_epi32(INT32_C(-2022840556), INT32_C( 147366607), INT32_C( 1340044144), INT32_C(-1339633728), INT32_C(-2141656242), INT32_C( 179357343), INT32_C( -169847688), INT32_C( 944580448)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 457.91), SIMDE_FLOAT32_C( -688.17), SIMDE_FLOAT32_C( 457.91), SIMDE_FLOAT32_C( 457.91), SIMDE_FLOAT32_C( 418.80), SIMDE_FLOAT32_C( -449.18), SIMDE_FLOAT32_C( -798.45), SIMDE_FLOAT32_C( -798.45)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -894.16), SIMDE_FLOAT32_C( -261.98), SIMDE_FLOAT32_C( -668.79), SIMDE_FLOAT32_C( -535.27), SIMDE_FLOAT32_C( 295.60), SIMDE_FLOAT32_C( -624.10), SIMDE_FLOAT32_C( -218.41), SIMDE_FLOAT32_C( -239.98)), simde_mm256_set_epi32(INT32_C(-1987732124), INT32_C(-1043251572), INT32_C(-1915492365), INT32_C( 1239473734), INT32_C( 1924578330), INT32_C( 667857703), INT32_C( 1334096582), INT32_C(-1561092382)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -535.27), SIMDE_FLOAT32_C( -535.27), SIMDE_FLOAT32_C( -894.16), SIMDE_FLOAT32_C( -261.98), SIMDE_FLOAT32_C( -624.10), SIMDE_FLOAT32_C( 295.60), SIMDE_FLOAT32_C( -624.10), SIMDE_FLOAT32_C( -624.10)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 282.55), SIMDE_FLOAT32_C( 313.26), SIMDE_FLOAT32_C( -349.67), SIMDE_FLOAT32_C( -128.29), SIMDE_FLOAT32_C( 298.40), SIMDE_FLOAT32_C( -200.08), SIMDE_FLOAT32_C( -322.88), SIMDE_FLOAT32_C( 643.97)), simde_mm256_set_epi32(INT32_C(-1231918378), INT32_C(-1967971864), INT32_C( 1721865701), INT32_C( -553468547), INT32_C( -723509981), INT32_C( 1588622188), INT32_C( 1625856378), INT32_C(-1426622327)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 313.26), SIMDE_FLOAT32_C( -128.29), SIMDE_FLOAT32_C( -349.67), SIMDE_FLOAT32_C( -349.67), SIMDE_FLOAT32_C( 298.40), SIMDE_FLOAT32_C( 643.97), SIMDE_FLOAT32_C( -200.08), SIMDE_FLOAT32_C( -322.88)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -773.19), SIMDE_FLOAT32_C( 149.49), SIMDE_FLOAT32_C( -417.32), SIMDE_FLOAT32_C( -747.01), SIMDE_FLOAT32_C( 553.89), SIMDE_FLOAT32_C( -499.06), SIMDE_FLOAT32_C( -480.71), SIMDE_FLOAT32_C( -871.55)), simde_mm256_set_epi32(INT32_C(-1619504079), INT32_C( 234838625), INT32_C( 1611169016), INT32_C( 708864983), INT32_C( 595455017), INT32_C( -338961641), INT32_C( 1283075935), INT32_C( -90174648)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -417.32), SIMDE_FLOAT32_C( -417.32), SIMDE_FLOAT32_C( -747.01), SIMDE_FLOAT32_C( -773.19), SIMDE_FLOAT32_C( -480.71), SIMDE_FLOAT32_C( 553.89), SIMDE_FLOAT32_C( 553.89), SIMDE_FLOAT32_C( -871.55)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 770.41), SIMDE_FLOAT32_C( -575.75), SIMDE_FLOAT32_C( -694.46), SIMDE_FLOAT32_C( 878.16), SIMDE_FLOAT32_C( 230.89), SIMDE_FLOAT32_C( -700.74), SIMDE_FLOAT32_C( -243.26), SIMDE_FLOAT32_C( 192.97)), simde_mm256_set_epi32(INT32_C(-1612783450), INT32_C( 2104159364), INT32_C( -271090577), INT32_C( 962282198), INT32_C(-1614359330), INT32_C( -824400343), INT32_C( -259439032), INT32_C( -336808887)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -575.75), SIMDE_FLOAT32_C( 878.16), SIMDE_FLOAT32_C( 770.41), SIMDE_FLOAT32_C( -575.75), SIMDE_FLOAT32_C( -700.74), SIMDE_FLOAT32_C( -243.26), SIMDE_FLOAT32_C( 192.97), SIMDE_FLOAT32_C( -243.26)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_permutevar_ps(test_vec[i].a, test_vec[i].b); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_permutevar_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256i b; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 191.45), SIMDE_FLOAT64_C( 955.97), SIMDE_FLOAT64_C( -381.93), SIMDE_FLOAT64_C( -276.35)), simde_mm256_set_epi64x(INT64_C( 7847047898918917938), INT64_C(-2237739371695600451), INT64_C(-5921100696665465273), INT64_C(-3629132568613815239)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 191.45), SIMDE_FLOAT64_C( 955.97), SIMDE_FLOAT64_C( -381.93), SIMDE_FLOAT64_C( -276.35)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -864.55), SIMDE_FLOAT64_C( 105.60), SIMDE_FLOAT64_C( 308.22), SIMDE_FLOAT64_C( -262.99)), simde_mm256_set_epi64x(INT64_C( 1954446392539316319), INT64_C(-5867362525432575314), INT64_C(-5609592881024898283), INT64_C(-2732169121859970729)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -864.55), SIMDE_FLOAT64_C( -864.55), SIMDE_FLOAT64_C( -262.99), SIMDE_FLOAT64_C( 308.22)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -415.61), SIMDE_FLOAT64_C( 408.71), SIMDE_FLOAT64_C( -24.71), SIMDE_FLOAT64_C( 850.59)), simde_mm256_set_epi64x(INT64_C( 8155867202589355926), INT64_C(-4551757813155184517), INT64_C( 4070473136336150836), INT64_C( 8294293362513343506)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -415.61), SIMDE_FLOAT64_C( -415.61), SIMDE_FLOAT64_C( 850.59), SIMDE_FLOAT64_C( -24.71)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -858.92), SIMDE_FLOAT64_C( -409.27), SIMDE_FLOAT64_C( 940.24), SIMDE_FLOAT64_C( 118.21)), simde_mm256_set_epi64x(INT64_C(-6819188498234901479), INT64_C( 2067633441850695354), INT64_C( 371129412881073798), INT64_C(-4745838610152722297)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -409.27), SIMDE_FLOAT64_C( -858.92), SIMDE_FLOAT64_C( 940.24), SIMDE_FLOAT64_C( 940.24)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -35.59), SIMDE_FLOAT64_C( 620.23), SIMDE_FLOAT64_C( 173.49), SIMDE_FLOAT64_C( -242.33)), simde_mm256_set_epi64x(INT64_C( 6267827345436252242), INT64_C( 7757337633506703794), INT64_C( 8397042844771135785), INT64_C( 4768191744605903319)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -35.59), SIMDE_FLOAT64_C( -35.59), SIMDE_FLOAT64_C( -242.33), SIMDE_FLOAT64_C( 173.49)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -615.22), SIMDE_FLOAT64_C( -205.25), SIMDE_FLOAT64_C( 427.82), SIMDE_FLOAT64_C( -695.42)), simde_mm256_set_epi64x(INT64_C(-4627283775150795805), INT64_C( 5796432689531982886), INT64_C(-5333403376253040789), INT64_C(-1223877538147285054)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -615.22), SIMDE_FLOAT64_C( -615.22), SIMDE_FLOAT64_C( 427.82), SIMDE_FLOAT64_C( 427.82)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 461.58), SIMDE_FLOAT64_C( -322.38), SIMDE_FLOAT64_C( -747.07), SIMDE_FLOAT64_C( -350.25)), simde_mm256_set_epi64x(INT64_C(-4907910955860203917), INT64_C(-1172835446387939434), INT64_C( 6316430026104479052), INT64_C( 8850000004913574542)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 461.58), SIMDE_FLOAT64_C( 461.58), SIMDE_FLOAT64_C( -350.25), SIMDE_FLOAT64_C( -747.07)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 705.05), SIMDE_FLOAT64_C( 49.33), SIMDE_FLOAT64_C( -85.30), SIMDE_FLOAT64_C( 936.63)), simde_mm256_set_epi64x(INT64_C( 8506270823776015936), INT64_C(-7945266156798964263), INT64_C( 7749717350625346930), INT64_C( -406852585870799824)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 49.33), SIMDE_FLOAT64_C( 49.33), SIMDE_FLOAT64_C( -85.30), SIMDE_FLOAT64_C( 936.63)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_permutevar_pd(test_vec[i].a, test_vec[i].b); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_permute2f128_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 b; simde__m256 r; } test_vec[] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( -376.93), SIMDE_FLOAT32_C( -598.80), SIMDE_FLOAT32_C( 335.44), SIMDE_FLOAT32_C( -614.52), SIMDE_FLOAT32_C( 219.29), SIMDE_FLOAT32_C( -425.58), SIMDE_FLOAT32_C( 790.46), SIMDE_FLOAT32_C( 701.47)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -146.60), SIMDE_FLOAT32_C( 813.49), SIMDE_FLOAT32_C( -148.37), SIMDE_FLOAT32_C( -614.66), SIMDE_FLOAT32_C( 951.32), SIMDE_FLOAT32_C( -49.79), SIMDE_FLOAT32_C( 618.54), SIMDE_FLOAT32_C( -94.32)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 219.29), SIMDE_FLOAT32_C( -425.58), SIMDE_FLOAT32_C( 790.46), SIMDE_FLOAT32_C( 701.47), SIMDE_FLOAT32_C( 219.29), SIMDE_FLOAT32_C( -425.58), SIMDE_FLOAT32_C( 790.46), SIMDE_FLOAT32_C( 701.47)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C( -24.42), SIMDE_FLOAT32_C( 78.54), SIMDE_FLOAT32_C( -19.08), SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 204.91), SIMDE_FLOAT32_C( 161.00), SIMDE_FLOAT32_C( 230.93), SIMDE_FLOAT32_C( 108.17), SIMDE_FLOAT32_C( 327.81), SIMDE_FLOAT32_C( -178.38), SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00), SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C( -24.42), SIMDE_FLOAT32_C( 78.54), SIMDE_FLOAT32_C( -19.08)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C( -24.42), SIMDE_FLOAT32_C( 78.54), SIMDE_FLOAT32_C( -19.08), SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 204.91), SIMDE_FLOAT32_C( 161.00), SIMDE_FLOAT32_C( 230.93), SIMDE_FLOAT32_C( 108.17), SIMDE_FLOAT32_C( 327.81), SIMDE_FLOAT32_C( -178.38), SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00), SIMDE_FLOAT32_C( 327.81), SIMDE_FLOAT32_C( -178.38), SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C( -24.42), SIMDE_FLOAT32_C( 78.54), SIMDE_FLOAT32_C( -19.08), SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 204.91), SIMDE_FLOAT32_C( 161.00), SIMDE_FLOAT32_C( 230.93), SIMDE_FLOAT32_C( 108.17), SIMDE_FLOAT32_C( 327.81), SIMDE_FLOAT32_C( -178.38), SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00), SIMDE_FLOAT32_C( 204.91), SIMDE_FLOAT32_C( 161.00), SIMDE_FLOAT32_C( 230.93), SIMDE_FLOAT32_C( 108.17)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C( -24.42), SIMDE_FLOAT32_C( 78.54), SIMDE_FLOAT32_C( -19.08), SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 204.91), SIMDE_FLOAT32_C( 161.00), SIMDE_FLOAT32_C( 230.93), SIMDE_FLOAT32_C( 108.17), SIMDE_FLOAT32_C( 327.81), SIMDE_FLOAT32_C( -178.38), SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00), SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C( -24.42), SIMDE_FLOAT32_C( 78.54), SIMDE_FLOAT32_C( -19.08), SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 204.91), SIMDE_FLOAT32_C( 161.00), SIMDE_FLOAT32_C( 230.93), SIMDE_FLOAT32_C( 108.17), SIMDE_FLOAT32_C( 327.81), SIMDE_FLOAT32_C( -178.38), SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00), SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C( -24.42), SIMDE_FLOAT32_C( 78.54), SIMDE_FLOAT32_C( -19.08)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C( -24.42), SIMDE_FLOAT32_C( 78.54), SIMDE_FLOAT32_C( -19.08), SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 204.91), SIMDE_FLOAT32_C( 161.00), SIMDE_FLOAT32_C( 230.93), SIMDE_FLOAT32_C( 108.17), SIMDE_FLOAT32_C( 327.81), SIMDE_FLOAT32_C( -178.38), SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00), SIMDE_FLOAT32_C( 327.81), SIMDE_FLOAT32_C( -178.38), SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C( -24.42), SIMDE_FLOAT32_C( 78.54), SIMDE_FLOAT32_C( -19.08), SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 204.91), SIMDE_FLOAT32_C( 161.00), SIMDE_FLOAT32_C( 230.93), SIMDE_FLOAT32_C( 108.17), SIMDE_FLOAT32_C( 327.81), SIMDE_FLOAT32_C( -178.38), SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00), SIMDE_FLOAT32_C( 204.91), SIMDE_FLOAT32_C( 161.00), SIMDE_FLOAT32_C( 230.93), SIMDE_FLOAT32_C( 108.17)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C( -24.42), SIMDE_FLOAT32_C( 78.54), SIMDE_FLOAT32_C( -19.08), SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 204.91), SIMDE_FLOAT32_C( 161.00), SIMDE_FLOAT32_C( 230.93), SIMDE_FLOAT32_C( 108.17), SIMDE_FLOAT32_C( 327.81), SIMDE_FLOAT32_C( -178.38), SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C( -24.42), SIMDE_FLOAT32_C( 78.54), SIMDE_FLOAT32_C( -19.08), SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 204.91), SIMDE_FLOAT32_C( 161.00), SIMDE_FLOAT32_C( 230.93), SIMDE_FLOAT32_C( 108.17), SIMDE_FLOAT32_C( 327.81), SIMDE_FLOAT32_C( -178.38), SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C( -24.42), SIMDE_FLOAT32_C( 78.54), SIMDE_FLOAT32_C( -19.08), SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 204.91), SIMDE_FLOAT32_C( 161.00), SIMDE_FLOAT32_C( 230.93), SIMDE_FLOAT32_C( 108.17), SIMDE_FLOAT32_C( 327.81), SIMDE_FLOAT32_C( -178.38), SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C( -24.42), SIMDE_FLOAT32_C( 78.54), SIMDE_FLOAT32_C( -19.08), SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 204.91), SIMDE_FLOAT32_C( 161.00), SIMDE_FLOAT32_C( 230.93), SIMDE_FLOAT32_C( 108.17), SIMDE_FLOAT32_C( 327.81), SIMDE_FLOAT32_C( -178.38), SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C( -24.42), SIMDE_FLOAT32_C( 78.54), SIMDE_FLOAT32_C( -19.08), SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 204.91), SIMDE_FLOAT32_C( 161.00), SIMDE_FLOAT32_C( 230.93), SIMDE_FLOAT32_C( 108.17), SIMDE_FLOAT32_C( 327.81), SIMDE_FLOAT32_C( -178.38), SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C( -24.42), SIMDE_FLOAT32_C( 78.54), SIMDE_FLOAT32_C( -19.08), SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 204.91), SIMDE_FLOAT32_C( 161.00), SIMDE_FLOAT32_C( 230.93), SIMDE_FLOAT32_C( 108.17), SIMDE_FLOAT32_C( 327.81), SIMDE_FLOAT32_C( -178.38), SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C( -24.42), SIMDE_FLOAT32_C( 78.54), SIMDE_FLOAT32_C( -19.08), SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 204.91), SIMDE_FLOAT32_C( 161.00), SIMDE_FLOAT32_C( 230.93), SIMDE_FLOAT32_C( 108.17), SIMDE_FLOAT32_C( 327.81), SIMDE_FLOAT32_C( -178.38), SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.90), SIMDE_FLOAT32_C( -24.42), SIMDE_FLOAT32_C( 78.54), SIMDE_FLOAT32_C( -19.08), SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 204.91), SIMDE_FLOAT32_C( 161.00), SIMDE_FLOAT32_C( 230.93), SIMDE_FLOAT32_C( 108.17), SIMDE_FLOAT32_C( 327.81), SIMDE_FLOAT32_C( -178.38), SIMDE_FLOAT32_C( -611.38), SIMDE_FLOAT32_C( -189.72)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 380.38), SIMDE_FLOAT32_C( -917.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) } }; simde__m256 r; r = simde_mm256_permute2f128_ps(test_vec[0x0].a, test_vec[0x0].b, 0x0); simde_assert_m256_close(r, test_vec[0x0].r, 1); r = simde_mm256_permute2f128_ps(test_vec[0x1].a, test_vec[0x1].b, 0x1); simde_assert_m256_close(r, test_vec[0x1].r, 1); r = simde_mm256_permute2f128_ps(test_vec[0x2].a, test_vec[0x2].b, 0x2); simde_assert_m256_close(r, test_vec[0x2].r, 1); r = simde_mm256_permute2f128_ps(test_vec[0x3].a, test_vec[0x3].b, 0x3); simde_assert_m256_close(r, test_vec[0x3].r, 1); r = simde_mm256_permute2f128_ps(test_vec[0x4].a, test_vec[0x4].b, 0x4); simde_assert_m256_close(r, test_vec[0x4].r, 1); r = simde_mm256_permute2f128_ps(test_vec[0x5].a, test_vec[0x5].b, 0x5); simde_assert_m256_close(r, test_vec[0x5].r, 1); r = simde_mm256_permute2f128_ps(test_vec[0x6].a, test_vec[0x6].b, 0x6); simde_assert_m256_close(r, test_vec[0x6].r, 1); r = simde_mm256_permute2f128_ps(test_vec[0x7].a, test_vec[0x7].b, 0x7); simde_assert_m256_close(r, test_vec[0x7].r, 1); r = simde_mm256_permute2f128_ps(test_vec[0x8].a, test_vec[0x8].b, 0x8); simde_assert_m256_close(r, test_vec[0x8].r, 1); r = simde_mm256_permute2f128_ps(test_vec[0x9].a, test_vec[0x9].b, 0x9); simde_assert_m256_close(r, test_vec[0x9].r, 1); r = simde_mm256_permute2f128_ps(test_vec[0xa].a, test_vec[0xa].b, 0xa); simde_assert_m256_close(r, test_vec[0xa].r, 1); r = simde_mm256_permute2f128_ps(test_vec[0xb].a, test_vec[0xb].b, 0xb); simde_assert_m256_close(r, test_vec[0xb].r, 1); r = simde_mm256_permute2f128_ps(test_vec[0xc].a, test_vec[0xc].b, 0xc); simde_assert_m256_close(r, test_vec[0xc].r, 1); r = simde_mm256_permute2f128_ps(test_vec[0xd].a, test_vec[0xd].b, 0xd); simde_assert_m256_close(r, test_vec[0xd].r, 1); r = simde_mm256_permute2f128_ps(test_vec[0xe].a, test_vec[0xe].b, 0xe); simde_assert_m256_close(r, test_vec[0xe].r, 1); r = simde_mm256_permute2f128_ps(test_vec[0xf].a, test_vec[0xf].b, 0xf); simde_assert_m256_close(r, test_vec[0xf].r, 1); return 0; } static int test_simde_mm256_rcp_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( -908.92), SIMDE_FLOAT32_C( -201.59), SIMDE_FLOAT32_C( 3.47), SIMDE_FLOAT32_C( 829.08), SIMDE_FLOAT32_C( -86.36), SIMDE_FLOAT32_C( 780.02), SIMDE_FLOAT32_C( 13.29), SIMDE_FLOAT32_C( 492.53)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 745.72), SIMDE_FLOAT32_C( -860.90), SIMDE_FLOAT32_C( 647.35), SIMDE_FLOAT32_C( -932.06), SIMDE_FLOAT32_C( 782.22), SIMDE_FLOAT32_C( 232.69), SIMDE_FLOAT32_C( 88.27), SIMDE_FLOAT32_C( -882.29)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -0.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 750.74), SIMDE_FLOAT32_C( -90.83), SIMDE_FLOAT32_C( 949.51), SIMDE_FLOAT32_C( 177.31), SIMDE_FLOAT32_C( -204.98), SIMDE_FLOAT32_C( 340.91), SIMDE_FLOAT32_C( -39.69), SIMDE_FLOAT32_C( -715.33)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( -0.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -147.11), SIMDE_FLOAT32_C( 588.21), SIMDE_FLOAT32_C( 521.36), SIMDE_FLOAT32_C( -659.55), SIMDE_FLOAT32_C( 932.00), SIMDE_FLOAT32_C( 548.33), SIMDE_FLOAT32_C( 639.13), SIMDE_FLOAT32_C( -316.06)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -794.81), SIMDE_FLOAT32_C( 88.08), SIMDE_FLOAT32_C( -540.52), SIMDE_FLOAT32_C( 32.82), SIMDE_FLOAT32_C( -921.78), SIMDE_FLOAT32_C( -970.13), SIMDE_FLOAT32_C( 659.29), SIMDE_FLOAT32_C( -464.98)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -853.91), SIMDE_FLOAT32_C( 837.38), SIMDE_FLOAT32_C( -478.03), SIMDE_FLOAT32_C( 330.06), SIMDE_FLOAT32_C( 627.16), SIMDE_FLOAT32_C( 535.10), SIMDE_FLOAT32_C( -787.00), SIMDE_FLOAT32_C( 376.04)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -244.65), SIMDE_FLOAT32_C( 415.43), SIMDE_FLOAT32_C( 415.27), SIMDE_FLOAT32_C( 243.86), SIMDE_FLOAT32_C( 475.16), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( -509.99), SIMDE_FLOAT32_C( -861.80)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.89), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -819.61), SIMDE_FLOAT32_C( -441.38), SIMDE_FLOAT32_C( -736.01), SIMDE_FLOAT32_C( 681.16), SIMDE_FLOAT32_C( -798.05), SIMDE_FLOAT32_C( 561.39), SIMDE_FLOAT32_C( 116.98), SIMDE_FLOAT32_C( -372.62)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -0.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_rcp_ps(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_round_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 nearest; simde__m256 neg_inf; simde__m256 pos_inf; simde__m256 truncate; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( -437.99), SIMDE_FLOAT32_C( 332.86), SIMDE_FLOAT32_C( 531.55), SIMDE_FLOAT32_C( 188.24), SIMDE_FLOAT32_C( 135.31), SIMDE_FLOAT32_C( -341.69), SIMDE_FLOAT32_C( -995.08), SIMDE_FLOAT32_C( -84.86)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -438.00), SIMDE_FLOAT32_C( 333.00), SIMDE_FLOAT32_C( 532.00), SIMDE_FLOAT32_C( 188.00), SIMDE_FLOAT32_C( 135.00), SIMDE_FLOAT32_C( -342.00), SIMDE_FLOAT32_C( -995.00), SIMDE_FLOAT32_C( -85.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -438.00), SIMDE_FLOAT32_C( 332.00), SIMDE_FLOAT32_C( 531.00), SIMDE_FLOAT32_C( 188.00), SIMDE_FLOAT32_C( 135.00), SIMDE_FLOAT32_C( -342.00), SIMDE_FLOAT32_C( -996.00), SIMDE_FLOAT32_C( -85.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -437.00), SIMDE_FLOAT32_C( 333.00), SIMDE_FLOAT32_C( 532.00), SIMDE_FLOAT32_C( 189.00), SIMDE_FLOAT32_C( 136.00), SIMDE_FLOAT32_C( -341.00), SIMDE_FLOAT32_C( -995.00), SIMDE_FLOAT32_C( -84.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -437.00), SIMDE_FLOAT32_C( 332.00), SIMDE_FLOAT32_C( 531.00), SIMDE_FLOAT32_C( 188.00), SIMDE_FLOAT32_C( 135.00), SIMDE_FLOAT32_C( -341.00), SIMDE_FLOAT32_C( -995.00), SIMDE_FLOAT32_C( -84.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.77), SIMDE_FLOAT32_C( -902.26), SIMDE_FLOAT32_C( 960.96), SIMDE_FLOAT32_C( 885.00), SIMDE_FLOAT32_C( 184.99), SIMDE_FLOAT32_C( 273.55), SIMDE_FLOAT32_C( -508.74), SIMDE_FLOAT32_C( 304.51)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -272.00), SIMDE_FLOAT32_C( -902.00), SIMDE_FLOAT32_C( 961.00), SIMDE_FLOAT32_C( 885.00), SIMDE_FLOAT32_C( 185.00), SIMDE_FLOAT32_C( 274.00), SIMDE_FLOAT32_C( -509.00), SIMDE_FLOAT32_C( 305.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -272.00), SIMDE_FLOAT32_C( -903.00), SIMDE_FLOAT32_C( 960.00), SIMDE_FLOAT32_C( 885.00), SIMDE_FLOAT32_C( 184.00), SIMDE_FLOAT32_C( 273.00), SIMDE_FLOAT32_C( -509.00), SIMDE_FLOAT32_C( 304.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.00), SIMDE_FLOAT32_C( -902.00), SIMDE_FLOAT32_C( 961.00), SIMDE_FLOAT32_C( 885.00), SIMDE_FLOAT32_C( 185.00), SIMDE_FLOAT32_C( 274.00), SIMDE_FLOAT32_C( -508.00), SIMDE_FLOAT32_C( 305.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -271.00), SIMDE_FLOAT32_C( -902.00), SIMDE_FLOAT32_C( 960.00), SIMDE_FLOAT32_C( 885.00), SIMDE_FLOAT32_C( 184.00), SIMDE_FLOAT32_C( 273.00), SIMDE_FLOAT32_C( -508.00), SIMDE_FLOAT32_C( 304.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 970.76), SIMDE_FLOAT32_C( -833.91), SIMDE_FLOAT32_C( -187.58), SIMDE_FLOAT32_C( 27.59), SIMDE_FLOAT32_C( 181.38), SIMDE_FLOAT32_C( -399.46), SIMDE_FLOAT32_C( -127.86), SIMDE_FLOAT32_C( -393.23)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 971.00), SIMDE_FLOAT32_C( -834.00), SIMDE_FLOAT32_C( -188.00), SIMDE_FLOAT32_C( 28.00), SIMDE_FLOAT32_C( 181.00), SIMDE_FLOAT32_C( -399.00), SIMDE_FLOAT32_C( -128.00), SIMDE_FLOAT32_C( -393.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 970.00), SIMDE_FLOAT32_C( -834.00), SIMDE_FLOAT32_C( -188.00), SIMDE_FLOAT32_C( 27.00), SIMDE_FLOAT32_C( 181.00), SIMDE_FLOAT32_C( -400.00), SIMDE_FLOAT32_C( -128.00), SIMDE_FLOAT32_C( -394.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 971.00), SIMDE_FLOAT32_C( -833.00), SIMDE_FLOAT32_C( -187.00), SIMDE_FLOAT32_C( 28.00), SIMDE_FLOAT32_C( 182.00), SIMDE_FLOAT32_C( -399.00), SIMDE_FLOAT32_C( -127.00), SIMDE_FLOAT32_C( -393.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 970.00), SIMDE_FLOAT32_C( -833.00), SIMDE_FLOAT32_C( -187.00), SIMDE_FLOAT32_C( 27.00), SIMDE_FLOAT32_C( 181.00), SIMDE_FLOAT32_C( -399.00), SIMDE_FLOAT32_C( -127.00), SIMDE_FLOAT32_C( -393.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 259.90), SIMDE_FLOAT32_C( -282.33), SIMDE_FLOAT32_C( 766.19), SIMDE_FLOAT32_C( 948.74), SIMDE_FLOAT32_C( -533.05), SIMDE_FLOAT32_C( 397.75), SIMDE_FLOAT32_C( 998.83), SIMDE_FLOAT32_C( -841.13)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 260.00), SIMDE_FLOAT32_C( -282.00), SIMDE_FLOAT32_C( 766.00), SIMDE_FLOAT32_C( 949.00), SIMDE_FLOAT32_C( -533.00), SIMDE_FLOAT32_C( 398.00), SIMDE_FLOAT32_C( 999.00), SIMDE_FLOAT32_C( -841.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 259.00), SIMDE_FLOAT32_C( -283.00), SIMDE_FLOAT32_C( 766.00), SIMDE_FLOAT32_C( 948.00), SIMDE_FLOAT32_C( -534.00), SIMDE_FLOAT32_C( 397.00), SIMDE_FLOAT32_C( 998.00), SIMDE_FLOAT32_C( -842.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 260.00), SIMDE_FLOAT32_C( -282.00), SIMDE_FLOAT32_C( 767.00), SIMDE_FLOAT32_C( 949.00), SIMDE_FLOAT32_C( -533.00), SIMDE_FLOAT32_C( 398.00), SIMDE_FLOAT32_C( 999.00), SIMDE_FLOAT32_C( -841.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 259.00), SIMDE_FLOAT32_C( -282.00), SIMDE_FLOAT32_C( 766.00), SIMDE_FLOAT32_C( 948.00), SIMDE_FLOAT32_C( -533.00), SIMDE_FLOAT32_C( 397.00), SIMDE_FLOAT32_C( 998.00), SIMDE_FLOAT32_C( -841.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -178.69), SIMDE_FLOAT32_C( 880.71), SIMDE_FLOAT32_C( -928.72), SIMDE_FLOAT32_C( -201.24), SIMDE_FLOAT32_C( -99.45), SIMDE_FLOAT32_C( 785.84), SIMDE_FLOAT32_C( 542.02), SIMDE_FLOAT32_C( -81.93)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -179.00), SIMDE_FLOAT32_C( 881.00), SIMDE_FLOAT32_C( -929.00), SIMDE_FLOAT32_C( -201.00), SIMDE_FLOAT32_C( -99.00), SIMDE_FLOAT32_C( 786.00), SIMDE_FLOAT32_C( 542.00), SIMDE_FLOAT32_C( -82.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -179.00), SIMDE_FLOAT32_C( 880.00), SIMDE_FLOAT32_C( -929.00), SIMDE_FLOAT32_C( -202.00), SIMDE_FLOAT32_C( -100.00), SIMDE_FLOAT32_C( 785.00), SIMDE_FLOAT32_C( 542.00), SIMDE_FLOAT32_C( -82.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -178.00), SIMDE_FLOAT32_C( 881.00), SIMDE_FLOAT32_C( -928.00), SIMDE_FLOAT32_C( -201.00), SIMDE_FLOAT32_C( -99.00), SIMDE_FLOAT32_C( 786.00), SIMDE_FLOAT32_C( 543.00), SIMDE_FLOAT32_C( -81.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -178.00), SIMDE_FLOAT32_C( 880.00), SIMDE_FLOAT32_C( -928.00), SIMDE_FLOAT32_C( -201.00), SIMDE_FLOAT32_C( -99.00), SIMDE_FLOAT32_C( 785.00), SIMDE_FLOAT32_C( 542.00), SIMDE_FLOAT32_C( -81.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -937.16), SIMDE_FLOAT32_C( 854.52), SIMDE_FLOAT32_C( 980.48), SIMDE_FLOAT32_C( -86.24), SIMDE_FLOAT32_C( 473.38), SIMDE_FLOAT32_C( 104.75), SIMDE_FLOAT32_C( 14.33), SIMDE_FLOAT32_C( 91.35)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -937.00), SIMDE_FLOAT32_C( 855.00), SIMDE_FLOAT32_C( 980.00), SIMDE_FLOAT32_C( -86.00), SIMDE_FLOAT32_C( 473.00), SIMDE_FLOAT32_C( 105.00), SIMDE_FLOAT32_C( 14.00), SIMDE_FLOAT32_C( 91.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -938.00), SIMDE_FLOAT32_C( 854.00), SIMDE_FLOAT32_C( 980.00), SIMDE_FLOAT32_C( -87.00), SIMDE_FLOAT32_C( 473.00), SIMDE_FLOAT32_C( 104.00), SIMDE_FLOAT32_C( 14.00), SIMDE_FLOAT32_C( 91.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -937.00), SIMDE_FLOAT32_C( 855.00), SIMDE_FLOAT32_C( 981.00), SIMDE_FLOAT32_C( -86.00), SIMDE_FLOAT32_C( 474.00), SIMDE_FLOAT32_C( 105.00), SIMDE_FLOAT32_C( 15.00), SIMDE_FLOAT32_C( 92.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -937.00), SIMDE_FLOAT32_C( 854.00), SIMDE_FLOAT32_C( 980.00), SIMDE_FLOAT32_C( -86.00), SIMDE_FLOAT32_C( 473.00), SIMDE_FLOAT32_C( 104.00), SIMDE_FLOAT32_C( 14.00), SIMDE_FLOAT32_C( 91.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 529.30), SIMDE_FLOAT32_C( 148.64), SIMDE_FLOAT32_C( 820.35), SIMDE_FLOAT32_C( 265.99), SIMDE_FLOAT32_C( 701.82), SIMDE_FLOAT32_C( 479.73), SIMDE_FLOAT32_C( 432.96), SIMDE_FLOAT32_C( 276.42)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 529.00), SIMDE_FLOAT32_C( 149.00), SIMDE_FLOAT32_C( 820.00), SIMDE_FLOAT32_C( 266.00), SIMDE_FLOAT32_C( 702.00), SIMDE_FLOAT32_C( 480.00), SIMDE_FLOAT32_C( 433.00), SIMDE_FLOAT32_C( 276.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 529.00), SIMDE_FLOAT32_C( 148.00), SIMDE_FLOAT32_C( 820.00), SIMDE_FLOAT32_C( 265.00), SIMDE_FLOAT32_C( 701.00), SIMDE_FLOAT32_C( 479.00), SIMDE_FLOAT32_C( 432.00), SIMDE_FLOAT32_C( 276.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 530.00), SIMDE_FLOAT32_C( 149.00), SIMDE_FLOAT32_C( 821.00), SIMDE_FLOAT32_C( 266.00), SIMDE_FLOAT32_C( 702.00), SIMDE_FLOAT32_C( 480.00), SIMDE_FLOAT32_C( 433.00), SIMDE_FLOAT32_C( 277.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 529.00), SIMDE_FLOAT32_C( 148.00), SIMDE_FLOAT32_C( 820.00), SIMDE_FLOAT32_C( 265.00), SIMDE_FLOAT32_C( 701.00), SIMDE_FLOAT32_C( 479.00), SIMDE_FLOAT32_C( 432.00), SIMDE_FLOAT32_C( 276.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -33.90), SIMDE_FLOAT32_C( -334.90), SIMDE_FLOAT32_C( -399.58), SIMDE_FLOAT32_C( 824.28), SIMDE_FLOAT32_C( 442.40), SIMDE_FLOAT32_C( 699.22), SIMDE_FLOAT32_C( -143.02), SIMDE_FLOAT32_C( -465.79)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -34.00), SIMDE_FLOAT32_C( -335.00), SIMDE_FLOAT32_C( -400.00), SIMDE_FLOAT32_C( 824.00), SIMDE_FLOAT32_C( 442.00), SIMDE_FLOAT32_C( 699.00), SIMDE_FLOAT32_C( -143.00), SIMDE_FLOAT32_C( -466.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -34.00), SIMDE_FLOAT32_C( -335.00), SIMDE_FLOAT32_C( -400.00), SIMDE_FLOAT32_C( 824.00), SIMDE_FLOAT32_C( 442.00), SIMDE_FLOAT32_C( 699.00), SIMDE_FLOAT32_C( -144.00), SIMDE_FLOAT32_C( -466.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -33.00), SIMDE_FLOAT32_C( -334.00), SIMDE_FLOAT32_C( -399.00), SIMDE_FLOAT32_C( 825.00), SIMDE_FLOAT32_C( 443.00), SIMDE_FLOAT32_C( 700.00), SIMDE_FLOAT32_C( -143.00), SIMDE_FLOAT32_C( -465.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -33.00), SIMDE_FLOAT32_C( -334.00), SIMDE_FLOAT32_C( -399.00), SIMDE_FLOAT32_C( 824.00), SIMDE_FLOAT32_C( 442.00), SIMDE_FLOAT32_C( 699.00), SIMDE_FLOAT32_C( -143.00), SIMDE_FLOAT32_C( -465.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r; r = simde_mm256_round_ps(test_vec[i].a, SIMDE_MM_FROUND_TO_NEAREST_INT); simde_assert_m256_close(r, test_vec[i].nearest, 1); r = simde_mm256_round_ps(test_vec[i].a, SIMDE_MM_FROUND_TO_NEG_INF); simde_assert_m256_close(r, test_vec[i].neg_inf, 1); r = simde_mm256_round_ps(test_vec[i].a, SIMDE_MM_FROUND_TO_POS_INF); simde_assert_m256_close(r, test_vec[i].pos_inf, 1); r = simde_mm256_round_ps(test_vec[i].a, SIMDE_MM_FROUND_TO_ZERO); simde_assert_m256_close(r, test_vec[i].truncate, 1); } return 0; } static int test_simde_mm256_round_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d nearest; simde__m256d neg_inf; simde__m256d pos_inf; simde__m256d truncate; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 312.12), SIMDE_FLOAT64_C( 818.22), SIMDE_FLOAT64_C( 62.47), SIMDE_FLOAT64_C( 918.37)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 312.00), SIMDE_FLOAT64_C( 818.00), SIMDE_FLOAT64_C( 62.00), SIMDE_FLOAT64_C( 918.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 312.00), SIMDE_FLOAT64_C( 818.00), SIMDE_FLOAT64_C( 62.00), SIMDE_FLOAT64_C( 918.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 313.00), SIMDE_FLOAT64_C( 819.00), SIMDE_FLOAT64_C( 63.00), SIMDE_FLOAT64_C( 919.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 312.00), SIMDE_FLOAT64_C( 818.00), SIMDE_FLOAT64_C( 62.00), SIMDE_FLOAT64_C( 918.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 651.78), SIMDE_FLOAT64_C( -771.04), SIMDE_FLOAT64_C( 544.48), SIMDE_FLOAT64_C( 333.27)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 652.00), SIMDE_FLOAT64_C( -771.00), SIMDE_FLOAT64_C( 544.00), SIMDE_FLOAT64_C( 333.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 651.00), SIMDE_FLOAT64_C( -772.00), SIMDE_FLOAT64_C( 544.00), SIMDE_FLOAT64_C( 333.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 652.00), SIMDE_FLOAT64_C( -771.00), SIMDE_FLOAT64_C( 545.00), SIMDE_FLOAT64_C( 334.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 651.00), SIMDE_FLOAT64_C( -771.00), SIMDE_FLOAT64_C( 544.00), SIMDE_FLOAT64_C( 333.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -661.62), SIMDE_FLOAT64_C( 921.42), SIMDE_FLOAT64_C( 23.03), SIMDE_FLOAT64_C( 143.14)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -662.00), SIMDE_FLOAT64_C( 921.00), SIMDE_FLOAT64_C( 23.00), SIMDE_FLOAT64_C( 143.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -662.00), SIMDE_FLOAT64_C( 921.00), SIMDE_FLOAT64_C( 23.00), SIMDE_FLOAT64_C( 143.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -661.00), SIMDE_FLOAT64_C( 922.00), SIMDE_FLOAT64_C( 24.00), SIMDE_FLOAT64_C( 144.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -661.00), SIMDE_FLOAT64_C( 921.00), SIMDE_FLOAT64_C( 23.00), SIMDE_FLOAT64_C( 143.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -70.00), SIMDE_FLOAT64_C( -189.29), SIMDE_FLOAT64_C( -644.20), SIMDE_FLOAT64_C( -788.03)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -70.00), SIMDE_FLOAT64_C( -189.00), SIMDE_FLOAT64_C( -644.00), SIMDE_FLOAT64_C( -788.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -70.00), SIMDE_FLOAT64_C( -190.00), SIMDE_FLOAT64_C( -645.00), SIMDE_FLOAT64_C( -789.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -70.00), SIMDE_FLOAT64_C( -189.00), SIMDE_FLOAT64_C( -644.00), SIMDE_FLOAT64_C( -788.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -70.00), SIMDE_FLOAT64_C( -189.00), SIMDE_FLOAT64_C( -644.00), SIMDE_FLOAT64_C( -788.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -999.31), SIMDE_FLOAT64_C( 917.83), SIMDE_FLOAT64_C( -173.85), SIMDE_FLOAT64_C( -622.25)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -999.00), SIMDE_FLOAT64_C( 918.00), SIMDE_FLOAT64_C( -174.00), SIMDE_FLOAT64_C( -622.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C(-1000.00), SIMDE_FLOAT64_C( 917.00), SIMDE_FLOAT64_C( -174.00), SIMDE_FLOAT64_C( -623.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -999.00), SIMDE_FLOAT64_C( 918.00), SIMDE_FLOAT64_C( -173.00), SIMDE_FLOAT64_C( -622.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -999.00), SIMDE_FLOAT64_C( 917.00), SIMDE_FLOAT64_C( -173.00), SIMDE_FLOAT64_C( -622.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -905.08), SIMDE_FLOAT64_C( 96.40), SIMDE_FLOAT64_C( 481.12), SIMDE_FLOAT64_C( 989.53)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -905.00), SIMDE_FLOAT64_C( 96.00), SIMDE_FLOAT64_C( 481.00), SIMDE_FLOAT64_C( 990.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -906.00), SIMDE_FLOAT64_C( 96.00), SIMDE_FLOAT64_C( 481.00), SIMDE_FLOAT64_C( 989.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -905.00), SIMDE_FLOAT64_C( 97.00), SIMDE_FLOAT64_C( 482.00), SIMDE_FLOAT64_C( 990.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -905.00), SIMDE_FLOAT64_C( 96.00), SIMDE_FLOAT64_C( 481.00), SIMDE_FLOAT64_C( 989.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 380.53), SIMDE_FLOAT64_C( 251.75), SIMDE_FLOAT64_C( -843.75), SIMDE_FLOAT64_C( -890.74)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 381.00), SIMDE_FLOAT64_C( 252.00), SIMDE_FLOAT64_C( -844.00), SIMDE_FLOAT64_C( -891.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 380.00), SIMDE_FLOAT64_C( 251.00), SIMDE_FLOAT64_C( -844.00), SIMDE_FLOAT64_C( -891.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 381.00), SIMDE_FLOAT64_C( 252.00), SIMDE_FLOAT64_C( -843.00), SIMDE_FLOAT64_C( -890.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 380.00), SIMDE_FLOAT64_C( 251.00), SIMDE_FLOAT64_C( -843.00), SIMDE_FLOAT64_C( -890.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -382.52), SIMDE_FLOAT64_C( -590.14), SIMDE_FLOAT64_C( 3.25), SIMDE_FLOAT64_C( 599.23)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -383.00), SIMDE_FLOAT64_C( -590.00), SIMDE_FLOAT64_C( 3.00), SIMDE_FLOAT64_C( 599.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -383.00), SIMDE_FLOAT64_C( -591.00), SIMDE_FLOAT64_C( 3.00), SIMDE_FLOAT64_C( 599.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -382.00), SIMDE_FLOAT64_C( -590.00), SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( 600.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -382.00), SIMDE_FLOAT64_C( -590.00), SIMDE_FLOAT64_C( 3.00), SIMDE_FLOAT64_C( 599.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r; r = simde_mm256_round_pd(test_vec[i].a, SIMDE_MM_FROUND_TO_NEAREST_INT); simde_assert_m256d_close(r, test_vec[i].nearest, 1); r = simde_mm256_round_pd(test_vec[i].a, SIMDE_MM_FROUND_TO_NEG_INF); simde_assert_m256d_close(r, test_vec[i].neg_inf, 1); r = simde_mm256_round_pd(test_vec[i].a, SIMDE_MM_FROUND_TO_POS_INF); simde_assert_m256d_close(r, test_vec[i].pos_inf, 1); r = simde_mm256_round_pd(test_vec[i].a, SIMDE_MM_FROUND_TO_ZERO); simde_assert_m256d_close(r, test_vec[i].truncate, 1); } return 0; } static int test_simde_mm256_rsqrt_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 376.34), SIMDE_FLOAT32_C( 781.09), SIMDE_FLOAT32_C( 426.92), SIMDE_FLOAT32_C( 127.71), SIMDE_FLOAT32_C( 308.06), SIMDE_FLOAT32_C( 169.26), SIMDE_FLOAT32_C( 264.24), SIMDE_FLOAT32_C( 87.72)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.11)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 840.99), SIMDE_FLOAT32_C( 641.73), SIMDE_FLOAT32_C( 425.88), SIMDE_FLOAT32_C( 794.85), SIMDE_FLOAT32_C( 374.41), SIMDE_FLOAT32_C( 576.54), SIMDE_FLOAT32_C( 840.83), SIMDE_FLOAT32_C( 886.63)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.03)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 314.85), SIMDE_FLOAT32_C( 671.43), SIMDE_FLOAT32_C( 540.12), SIMDE_FLOAT32_C( 529.67), SIMDE_FLOAT32_C( 498.35), SIMDE_FLOAT32_C( 224.61), SIMDE_FLOAT32_C( 518.07), SIMDE_FLOAT32_C( 759.15)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.04)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 990.90), SIMDE_FLOAT32_C( 800.35), SIMDE_FLOAT32_C( 95.53), SIMDE_FLOAT32_C( 852.74), SIMDE_FLOAT32_C( 140.49), SIMDE_FLOAT32_C( 379.21), SIMDE_FLOAT32_C( 930.80), SIMDE_FLOAT32_C( 70.01)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.12)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 251.14), SIMDE_FLOAT32_C( 612.05), SIMDE_FLOAT32_C( 55.90), SIMDE_FLOAT32_C( 550.93), SIMDE_FLOAT32_C( 71.18), SIMDE_FLOAT32_C( 968.41), SIMDE_FLOAT32_C( 36.22), SIMDE_FLOAT32_C( 986.88)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 0.03)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 657.13), SIMDE_FLOAT32_C( 431.08), SIMDE_FLOAT32_C( 717.98), SIMDE_FLOAT32_C( 27.05), SIMDE_FLOAT32_C( 195.42), SIMDE_FLOAT32_C( 859.20), SIMDE_FLOAT32_C( 157.91), SIMDE_FLOAT32_C( 578.79)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.04)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 738.73), SIMDE_FLOAT32_C( 198.62), SIMDE_FLOAT32_C( 544.16), SIMDE_FLOAT32_C( 379.62), SIMDE_FLOAT32_C( 782.12), SIMDE_FLOAT32_C( 91.05), SIMDE_FLOAT32_C( 650.65), SIMDE_FLOAT32_C( 315.52)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.06)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 788.79), SIMDE_FLOAT32_C( 929.94), SIMDE_FLOAT32_C( 55.55), SIMDE_FLOAT32_C( 137.30), SIMDE_FLOAT32_C( 612.48), SIMDE_FLOAT32_C( 6.47), SIMDE_FLOAT32_C( 828.22), SIMDE_FLOAT32_C( 971.40)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.03)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_rsqrt_ps(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_setr_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { int8_t a[32]; simde__m256i r; } test_vec[8] = { { { INT8_C( -4), INT8_C( 97), INT8_C( -85), INT8_C( -82), INT8_C( 42), INT8_C( 35), INT8_C( 11), INT8_C( 62), INT8_C( -47), INT8_C( 10), INT8_C(-127), INT8_C( 56), INT8_C( 44), INT8_C( 59), INT8_C( -13), INT8_C( 22), INT8_C( -14), INT8_C( 77), INT8_C( -49), INT8_C( -46), INT8_C( 53), INT8_C(-109), INT8_C( 30), INT8_C( 70), INT8_C(-110), INT8_C( 70), INT8_C( 95), INT8_C( -22), INT8_C( 89), INT8_C( -50), INT8_C( -28), INT8_C(-122) }, simde_mm256_set_epi8(INT8_C(-122), INT8_C( -28), INT8_C( -50), INT8_C( 89), INT8_C( -22), INT8_C( 95), INT8_C( 70), INT8_C(-110), INT8_C( 70), INT8_C( 30), INT8_C(-109), INT8_C( 53), INT8_C( -46), INT8_C( -49), INT8_C( 77), INT8_C( -14), INT8_C( 22), INT8_C( -13), INT8_C( 59), INT8_C( 44), INT8_C( 56), INT8_C(-127), INT8_C( 10), INT8_C( -47), INT8_C( 62), INT8_C( 11), INT8_C( 35), INT8_C( 42), INT8_C( -82), INT8_C( -85), INT8_C( 97), INT8_C( -4)) }, { { INT8_C( 127), INT8_C( 42), INT8_C( -70), INT8_C( -73), INT8_C( -56), INT8_C(-116), INT8_C( -43), INT8_C( 20), INT8_C( -7), INT8_C(-120), INT8_C( -33), INT8_C( 3), INT8_C( 87), INT8_C( 58), INT8_C( 61), INT8_C( -32), INT8_C( 37), INT8_C(-114), INT8_C( 14), INT8_C( 80), INT8_C( -87), INT8_C( 43), INT8_C( -56), INT8_C( 51), INT8_C( 64), INT8_C(-107), INT8_C( 80), INT8_C( 59), INT8_C( -99), INT8_C( -49), INT8_C( 22), INT8_C( 109) }, simde_mm256_set_epi8(INT8_C( 109), INT8_C( 22), INT8_C( -49), INT8_C( -99), INT8_C( 59), INT8_C( 80), INT8_C(-107), INT8_C( 64), INT8_C( 51), INT8_C( -56), INT8_C( 43), INT8_C( -87), INT8_C( 80), INT8_C( 14), INT8_C(-114), INT8_C( 37), INT8_C( -32), INT8_C( 61), INT8_C( 58), INT8_C( 87), INT8_C( 3), INT8_C( -33), INT8_C(-120), INT8_C( -7), INT8_C( 20), INT8_C( -43), INT8_C(-116), INT8_C( -56), INT8_C( -73), INT8_C( -70), INT8_C( 42), INT8_C( 127)) }, { { INT8_C( 18), INT8_C( 106), INT8_C( -14), INT8_C( -63), INT8_C( 49), INT8_C( 26), INT8_C( 111), INT8_C( 121), INT8_C(-109), INT8_C( 19), INT8_C( 59), INT8_C( -42), INT8_C( -61), INT8_C( 44), INT8_C( 95), INT8_C( 0), INT8_C( 14), INT8_C( 62), INT8_C( 88), INT8_C( 98), INT8_C( 26), INT8_C( 58), INT8_C( 76), INT8_C( -18), INT8_C(-104), INT8_C( -9), INT8_C( -67), INT8_C( 20), INT8_C( -43), INT8_C( -63), INT8_C( 30), INT8_C( 123) }, simde_mm256_set_epi8(INT8_C( 123), INT8_C( 30), INT8_C( -63), INT8_C( -43), INT8_C( 20), INT8_C( -67), INT8_C( -9), INT8_C(-104), INT8_C( -18), INT8_C( 76), INT8_C( 58), INT8_C( 26), INT8_C( 98), INT8_C( 88), INT8_C( 62), INT8_C( 14), INT8_C( 0), INT8_C( 95), INT8_C( 44), INT8_C( -61), INT8_C( -42), INT8_C( 59), INT8_C( 19), INT8_C(-109), INT8_C( 121), INT8_C( 111), INT8_C( 26), INT8_C( 49), INT8_C( -63), INT8_C( -14), INT8_C( 106), INT8_C( 18)) }, { { INT8_C(-122), INT8_C( 106), INT8_C( -25), INT8_C( 57), INT8_C(-110), INT8_C( 80), INT8_C( 35), INT8_C( -81), INT8_C(-111), INT8_C( 7), INT8_C( 90), INT8_C( -14), INT8_C( 64), INT8_C( 90), INT8_C( -51), INT8_C( 87), INT8_C( 49), INT8_C( 77), INT8_C( 127), INT8_C( -93), INT8_C( -57), INT8_C( 112), INT8_C( -74), INT8_C( 26), INT8_C(-113), INT8_C( -77), INT8_C( -29), INT8_C( -29), INT8_C( 117), INT8_C( 31), INT8_C(-100), INT8_C( -86) }, simde_mm256_set_epi8(INT8_C( -86), INT8_C(-100), INT8_C( 31), INT8_C( 117), INT8_C( -29), INT8_C( -29), INT8_C( -77), INT8_C(-113), INT8_C( 26), INT8_C( -74), INT8_C( 112), INT8_C( -57), INT8_C( -93), INT8_C( 127), INT8_C( 77), INT8_C( 49), INT8_C( 87), INT8_C( -51), INT8_C( 90), INT8_C( 64), INT8_C( -14), INT8_C( 90), INT8_C( 7), INT8_C(-111), INT8_C( -81), INT8_C( 35), INT8_C( 80), INT8_C(-110), INT8_C( 57), INT8_C( -25), INT8_C( 106), INT8_C(-122)) }, { { INT8_C( 58), INT8_C( 6), INT8_C( 118), INT8_C( 36), INT8_C( 115), INT8_C( -15), INT8_C( 37), INT8_C( 119), INT8_C( -89), INT8_C( -10), INT8_C( -50), INT8_C(-119), INT8_C( 6), INT8_C( -77), INT8_C( 70), INT8_C( 117), INT8_C( -12), INT8_C( -22), INT8_C( 114), INT8_C( -39), INT8_C( 100), INT8_C( 122), INT8_C(-102), INT8_C( -55), INT8_C( 116), INT8_C( -23), INT8_C( -29), INT8_C( -57), INT8_C( -40), INT8_C( 41), INT8_C( 119), INT8_C( 121) }, simde_mm256_set_epi8(INT8_C( 121), INT8_C( 119), INT8_C( 41), INT8_C( -40), INT8_C( -57), INT8_C( -29), INT8_C( -23), INT8_C( 116), INT8_C( -55), INT8_C(-102), INT8_C( 122), INT8_C( 100), INT8_C( -39), INT8_C( 114), INT8_C( -22), INT8_C( -12), INT8_C( 117), INT8_C( 70), INT8_C( -77), INT8_C( 6), INT8_C(-119), INT8_C( -50), INT8_C( -10), INT8_C( -89), INT8_C( 119), INT8_C( 37), INT8_C( -15), INT8_C( 115), INT8_C( 36), INT8_C( 118), INT8_C( 6), INT8_C( 58)) }, { { INT8_C( 47), INT8_C( 33), INT8_C( -28), INT8_C(-105), INT8_C( -6), INT8_C( -69), INT8_C( 111), INT8_C( -17), INT8_C( 43), INT8_C(-123), INT8_C( 56), INT8_C( 119), INT8_C( 18), INT8_C( -6), INT8_C( -96), INT8_C(-126), INT8_C( 113), INT8_C(-107), INT8_C( 83), INT8_C( 24), INT8_C( -84), INT8_C(-124), INT8_C( -72), INT8_C( -86), INT8_C( 80), INT8_C( 33), INT8_C( -6), INT8_C( -30), INT8_C( 85), INT8_C( -74), INT8_C( 58), INT8_C( -88) }, simde_mm256_set_epi8(INT8_C( -88), INT8_C( 58), INT8_C( -74), INT8_C( 85), INT8_C( -30), INT8_C( -6), INT8_C( 33), INT8_C( 80), INT8_C( -86), INT8_C( -72), INT8_C(-124), INT8_C( -84), INT8_C( 24), INT8_C( 83), INT8_C(-107), INT8_C( 113), INT8_C(-126), INT8_C( -96), INT8_C( -6), INT8_C( 18), INT8_C( 119), INT8_C( 56), INT8_C(-123), INT8_C( 43), INT8_C( -17), INT8_C( 111), INT8_C( -69), INT8_C( -6), INT8_C(-105), INT8_C( -28), INT8_C( 33), INT8_C( 47)) }, { { INT8_C( 12), INT8_C( 93), INT8_C( -74), INT8_C( 117), INT8_C( -55), INT8_C( -56), INT8_C( 9), INT8_C( -48), INT8_C( 100), INT8_C( -4), INT8_C( 101), INT8_C( -1), INT8_C( -41), INT8_C( -98), INT8_C(-128), INT8_C( -73), INT8_C( -47), INT8_C( 35), INT8_C( -89), INT8_C( -36), INT8_C(-117), INT8_C( -95), INT8_C( -70), INT8_C( -94), INT8_C( -61), INT8_C( -88), INT8_C( -41), INT8_C( -56), INT8_C( -5), INT8_C( -90), INT8_C( -61), INT8_C( 58) }, simde_mm256_set_epi8(INT8_C( 58), INT8_C( -61), INT8_C( -90), INT8_C( -5), INT8_C( -56), INT8_C( -41), INT8_C( -88), INT8_C( -61), INT8_C( -94), INT8_C( -70), INT8_C( -95), INT8_C(-117), INT8_C( -36), INT8_C( -89), INT8_C( 35), INT8_C( -47), INT8_C( -73), INT8_C(-128), INT8_C( -98), INT8_C( -41), INT8_C( -1), INT8_C( 101), INT8_C( -4), INT8_C( 100), INT8_C( -48), INT8_C( 9), INT8_C( -56), INT8_C( -55), INT8_C( 117), INT8_C( -74), INT8_C( 93), INT8_C( 12)) }, { { INT8_C( -62), INT8_C( 106), INT8_C( 33), INT8_C( -86), INT8_C( 50), INT8_C( 51), INT8_C( 1), INT8_C( 83), INT8_C( -24), INT8_C( 24), INT8_C( 119), INT8_C( -35), INT8_C( 55), INT8_C( 109), INT8_C( -56), INT8_C( -46), INT8_C( -64), INT8_C( 124), INT8_C( -1), INT8_C( -71), INT8_C( 27), INT8_C( 108), INT8_C(-113), INT8_C( -86), INT8_C( 94), INT8_C( 46), INT8_C( -59), INT8_C( -32), INT8_C( -63), INT8_C( -39), INT8_C( -52), INT8_C( 101) }, simde_mm256_set_epi8(INT8_C( 101), INT8_C( -52), INT8_C( -39), INT8_C( -63), INT8_C( -32), INT8_C( -59), INT8_C( 46), INT8_C( 94), INT8_C( -86), INT8_C(-113), INT8_C( 108), INT8_C( 27), INT8_C( -71), INT8_C( -1), INT8_C( 124), INT8_C( -64), INT8_C( -46), INT8_C( -56), INT8_C( 109), INT8_C( 55), INT8_C( -35), INT8_C( 119), INT8_C( 24), INT8_C( -24), INT8_C( 83), INT8_C( 1), INT8_C( 51), INT8_C( 50), INT8_C( -86), INT8_C( 33), INT8_C( 106), INT8_C( -62)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_setr_epi8( test_vec[i].a[ 0], test_vec[i].a[ 1], test_vec[i].a[ 2], test_vec[i].a[ 3], test_vec[i].a[ 4], test_vec[i].a[ 5], test_vec[i].a[ 6], test_vec[i].a[ 7], test_vec[i].a[ 8], test_vec[i].a[ 9], test_vec[i].a[10], test_vec[i].a[11], test_vec[i].a[12], test_vec[i].a[13], test_vec[i].a[14], test_vec[i].a[15], test_vec[i].a[16], test_vec[i].a[17], test_vec[i].a[18], test_vec[i].a[19], test_vec[i].a[20], test_vec[i].a[21], test_vec[i].a[22], test_vec[i].a[23], test_vec[i].a[24], test_vec[i].a[25], test_vec[i].a[26], test_vec[i].a[27], test_vec[i].a[28], test_vec[i].a[29], test_vec[i].a[30], test_vec[i].a[31]); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_setr_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { int16_t a[16]; simde__m256i r; } test_vec[8] = { { { INT16_C(-20822), INT16_C( 4719), INT16_C( 13700), INT16_C( 26280), INT16_C( -8393), INT16_C( 13684), INT16_C(-27950), INT16_C(-18508), INT16_C( 32037), INT16_C(-24299), INT16_C(-21546), INT16_C( 1669), INT16_C( 957), INT16_C( 10001), INT16_C(-15549), INT16_C(-30917) }, simde_mm256_set_epi16(INT16_C(-30917), INT16_C(-15549), INT16_C( 10001), INT16_C( 957), INT16_C( 1669), INT16_C(-21546), INT16_C(-24299), INT16_C( 32037), INT16_C(-18508), INT16_C(-27950), INT16_C( 13684), INT16_C( -8393), INT16_C( 26280), INT16_C( 13700), INT16_C( 4719), INT16_C(-20822)) }, { { INT16_C(-21993), INT16_C(-20656), INT16_C( 29326), INT16_C( 28940), INT16_C(-31152), INT16_C(-19248), INT16_C(-26052), INT16_C(-19065), INT16_C(-11006), INT16_C( -7550), INT16_C( 14017), INT16_C( 19342), INT16_C(-32339), INT16_C( 3580), INT16_C( 28313), INT16_C(-13882) }, simde_mm256_set_epi16(INT16_C(-13882), INT16_C( 28313), INT16_C( 3580), INT16_C(-32339), INT16_C( 19342), INT16_C( 14017), INT16_C( -7550), INT16_C(-11006), INT16_C(-19065), INT16_C(-26052), INT16_C(-19248), INT16_C(-31152), INT16_C( 28940), INT16_C( 29326), INT16_C(-20656), INT16_C(-21993)) }, { { INT16_C( 10866), INT16_C( 9786), INT16_C( 3944), INT16_C(-19272), INT16_C(-30670), INT16_C( 14551), INT16_C( 8410), INT16_C( -6777), INT16_C(-16568), INT16_C(-18107), INT16_C(-20605), INT16_C( 22309), INT16_C(-22975), INT16_C( 30575), INT16_C( -4285), INT16_C( 10884) }, simde_mm256_set_epi16(INT16_C( 10884), INT16_C( -4285), INT16_C( 30575), INT16_C(-22975), INT16_C( 22309), INT16_C(-20605), INT16_C(-18107), INT16_C(-16568), INT16_C( -6777), INT16_C( 8410), INT16_C( 14551), INT16_C(-30670), INT16_C(-19272), INT16_C( 3944), INT16_C( 9786), INT16_C( 10866)) }, { { INT16_C( 3382), INT16_C(-18461), INT16_C( 23033), INT16_C(-12757), INT16_C( -812), INT16_C( 15509), INT16_C(-23059), INT16_C( 2475), INT16_C(-26254), INT16_C(-14528), INT16_C(-12769), INT16_C( -6867), INT16_C(-17924), INT16_C( 22705), INT16_C(-26548), INT16_C(-32025) }, simde_mm256_set_epi16(INT16_C(-32025), INT16_C(-26548), INT16_C( 22705), INT16_C(-17924), INT16_C( -6867), INT16_C(-12769), INT16_C(-14528), INT16_C(-26254), INT16_C( 2475), INT16_C(-23059), INT16_C( 15509), INT16_C( -812), INT16_C(-12757), INT16_C( 23033), INT16_C(-18461), INT16_C( 3382)) }, { { INT16_C(-16227), INT16_C( 12780), INT16_C( 24958), INT16_C( 10168), INT16_C(-24922), INT16_C(-26733), INT16_C( 3884), INT16_C( 8130), INT16_C( -363), INT16_C( -2828), INT16_C( -7524), INT16_C( 28685), INT16_C( -7215), INT16_C( 7765), INT16_C( 25104), INT16_C(-23004) }, simde_mm256_set_epi16(INT16_C(-23004), INT16_C( 25104), INT16_C( 7765), INT16_C( -7215), INT16_C( 28685), INT16_C( -7524), INT16_C( -2828), INT16_C( -363), INT16_C( 8130), INT16_C( 3884), INT16_C(-26733), INT16_C(-24922), INT16_C( 10168), INT16_C( 24958), INT16_C( 12780), INT16_C(-16227)) }, { { INT16_C(-23604), INT16_C(-17002), INT16_C( -3804), INT16_C(-31486), INT16_C(-31316), INT16_C( 8603), INT16_C( 13936), INT16_C(-30323), INT16_C( 17911), INT16_C(-25284), INT16_C( 11323), INT16_C( -3450), INT16_C( -2379), INT16_C( -4748), INT16_C( -1701), INT16_C(-14575) }, simde_mm256_set_epi16(INT16_C(-14575), INT16_C( -1701), INT16_C( -4748), INT16_C( -2379), INT16_C( -3450), INT16_C( 11323), INT16_C(-25284), INT16_C( 17911), INT16_C(-30323), INT16_C( 13936), INT16_C( 8603), INT16_C(-31316), INT16_C(-31486), INT16_C( -3804), INT16_C(-17002), INT16_C(-23604)) }, { { INT16_C( 2504), INT16_C( 3886), INT16_C( -8527), INT16_C( 15137), INT16_C( -2956), INT16_C( 3741), INT16_C(-30624), INT16_C(-26724), INT16_C( 6830), INT16_C( 31838), INT16_C( 31654), INT16_C(-13744), INT16_C( -1202), INT16_C( 10750), INT16_C( 5862), INT16_C(-29772) }, simde_mm256_set_epi16(INT16_C(-29772), INT16_C( 5862), INT16_C( 10750), INT16_C( -1202), INT16_C(-13744), INT16_C( 31654), INT16_C( 31838), INT16_C( 6830), INT16_C(-26724), INT16_C(-30624), INT16_C( 3741), INT16_C( -2956), INT16_C( 15137), INT16_C( -8527), INT16_C( 3886), INT16_C( 2504)) }, { { INT16_C( 9166), INT16_C( 24566), INT16_C(-20956), INT16_C( 25846), INT16_C( -9797), INT16_C(-30693), INT16_C( 17134), INT16_C(-20898), INT16_C(-11673), INT16_C( -1066), INT16_C( 24186), INT16_C( 15486), INT16_C( -6894), INT16_C( -1878), INT16_C(-19558), INT16_C( 25792) }, simde_mm256_set_epi16(INT16_C( 25792), INT16_C(-19558), INT16_C( -1878), INT16_C( -6894), INT16_C( 15486), INT16_C( 24186), INT16_C( -1066), INT16_C(-11673), INT16_C(-20898), INT16_C( 17134), INT16_C(-30693), INT16_C( -9797), INT16_C( 25846), INT16_C(-20956), INT16_C( 24566), INT16_C( 9166)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_setr_epi16( test_vec[i].a[ 0], test_vec[i].a[ 1], test_vec[i].a[ 2], test_vec[i].a[ 3], test_vec[i].a[ 4], test_vec[i].a[ 5], test_vec[i].a[ 6], test_vec[i].a[ 7], test_vec[i].a[ 8], test_vec[i].a[ 9], test_vec[i].a[10], test_vec[i].a[11], test_vec[i].a[12], test_vec[i].a[13], test_vec[i].a[14], test_vec[i].a[15]); simde_assert_m256i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_setr_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { int32_t a[8]; simde__m256i r; } test_vec[8] = { { { INT32_C( 932849909), INT32_C( -456580424), INT32_C(-1072840342), INT32_C( 187025165), INT32_C( -54386372), INT32_C(-1527557226), INT32_C( 842765893), INT32_C(-1371730077) }, simde_mm256_set_epi32(INT32_C(-1371730077), INT32_C( 842765893), INT32_C(-1527557226), INT32_C( -54386372), INT32_C( 187025165), INT32_C(-1072840342), INT32_C( -456580424), INT32_C( 932849909)) }, { { INT32_C( 1893614455), INT32_C( 1294871072), INT32_C( 1552259151), INT32_C( 946045936), INT32_C( -7047247), INT32_C( 177282155), INT32_C( -581856304), INT32_C( 673832922) }, simde_mm256_set_epi32(INT32_C( 673832922), INT32_C( -581856304), INT32_C( 177282155), INT32_C( -7047247), INT32_C( 946045936), INT32_C( 1552259151), INT32_C( 1294871072), INT32_C( 1893614455)) }, { { INT32_C(-1610219922), INT32_C( -94583836), INT32_C( -424768577), INT32_C( -880788885), INT32_C( 602433069), INT32_C( -274391227), INT32_C( -328110003), INT32_C( 499660384) }, simde_mm256_set_epi32(INT32_C( 499660384), INT32_C( -328110003), INT32_C( -274391227), INT32_C( 602433069), INT32_C( -880788885), INT32_C( -424768577), INT32_C( -94583836), INT32_C(-1610219922)) }, { { INT32_C( 1302188877), INT32_C( -801832432), INT32_C( 1655080701), INT32_C(-1605614771), INT32_C( 1846614190), INT32_C( 1570676076), INT32_C( -68393412), INT32_C( 1031272058) }, simde_mm256_set_epi32(INT32_C( 1031272058), INT32_C( -68393412), INT32_C( 1570676076), INT32_C( 1846614190), INT32_C(-1605614771), INT32_C( 1655080701), INT32_C( -801832432), INT32_C( 1302188877)) }, { { INT32_C( 25897078), INT32_C(-1241591361), INT32_C( -592602700), INT32_C( -348865550), INT32_C( 1694164628), INT32_C( -856795223), INT32_C( -997978026), INT32_C( 1280081679) }, simde_mm256_set_epi32(INT32_C( 1280081679), INT32_C( -997978026), INT32_C( -856795223), INT32_C( 1694164628), INT32_C( -348865550), INT32_C( -592602700), INT32_C(-1241591361), INT32_C( 25897078)) }, { { INT32_C( -87546396), INT32_C( 1852814507), INT32_C( -373825552), INT32_C( 1866208106), INT32_C( 910270627), INT32_C( 1550266609), INT32_C( 1485123950), INT32_C( -498285483) }, simde_mm256_set_epi32(INT32_C( -498285483), INT32_C( 1485123950), INT32_C( 1550266609), INT32_C( 910270627), INT32_C( 1866208106), INT32_C( -373825552), INT32_C( 1852814507), INT32_C( -87546396)) }, { { INT32_C( -786490570), INT32_C( -486650057), INT32_C(-1901610760), INT32_C(-1385527729), INT32_C( 1837621475), INT32_C( 362332872), INT32_C( 1409187239), INT32_C( -294514311) }, simde_mm256_set_epi32(INT32_C( -294514311), INT32_C( 1409187239), INT32_C( 362332872), INT32_C( 1837621475), INT32_C(-1385527729), INT32_C(-1901610760), INT32_C( -486650057), INT32_C( -786490570)) }, { { INT32_C(-2037006285), INT32_C(-1237137601), INT32_C(-1490902854), INT32_C(-1337182966), INT32_C( -732587886), INT32_C(-1907285545), INT32_C( 165118547), INT32_C(-1097315632) }, simde_mm256_set_epi32(INT32_C(-1097315632), INT32_C( 165118547), INT32_C(-1907285545), INT32_C( -732587886), INT32_C(-1337182966), INT32_C(-1490902854), INT32_C(-1237137601), INT32_C(-2037006285)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_setr_epi32( test_vec[i].a[ 0], test_vec[i].a[ 1], test_vec[i].a[ 2], test_vec[i].a[ 3], test_vec[i].a[ 4], test_vec[i].a[ 5], test_vec[i].a[ 6], test_vec[i].a[ 7]); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_setr_epi64x(SIMDE_MUNIT_TEST_ARGS) { const struct { int64_t a[4]; simde__m256i r; } test_vec[8] = { { { INT64_C( 3013620110861784505), INT64_C(-9156069624919168580), INT64_C( 1343723656449999612), INT64_C(-3830101585267880776) }, simde_mm256_set_epi64x(INT64_C(-3830101585267880776), INT64_C( 1343723656449999612), INT64_C(-9156069624919168580), INT64_C( 3013620110861784505)) }, { { INT64_C( -470898397325052178), INT64_C(-1684256644586675245), INT64_C(-8451403171467723697), INT64_C( 5467852576317781229) }, simde_mm256_set_epi64x(INT64_C( 5467852576317781229), INT64_C(-8451403171467723697), INT64_C(-1684256644586675245), INT64_C( -470898397325052178)) }, { { INT64_C(-8481091302015892038), INT64_C(-5840489628108018840), INT64_C( 8103807582752765524), INT64_C( 9048592296921391543) }, simde_mm256_set_epi64x(INT64_C( 9048592296921391543), INT64_C( 8103807582752765524), INT64_C(-5840489628108018840), INT64_C(-8481091302015892038)) }, { { INT64_C( 1422449841795305675), INT64_C( 2887994309822364165), INT64_C( 7807032920035636816), INT64_C(-3076736950419346689) }, simde_mm256_set_epi64x(INT64_C(-3076736950419346689), INT64_C( 7807032920035636816), INT64_C( 2887994309822364165), INT64_C( 1422449841795305675)) }, { { INT64_C( 3070310353568185156), INT64_C(-8852504885484410210), INT64_C( 8605078790751557478), INT64_C(-3993303917440615301) }, simde_mm256_set_epi64x(INT64_C(-3993303917440615301), INT64_C( 8605078790751557478), INT64_C(-8852504885484410210), INT64_C( 3070310353568185156)) }, { { INT64_C( 8628903781070638905), INT64_C( 7741876512722404057), INT64_C(-7211506260596057593), INT64_C( 4414889885954661792) }, simde_mm256_set_epi64x(INT64_C( 4414889885954661792), INT64_C(-7211506260596057593), INT64_C( 7741876512722404057), INT64_C( 8628903781070638905)) }, { { INT64_C( 5522184073273144975), INT64_C(-7081867462548166489), INT64_C( 9175542926859973104), INT64_C( 1769179143810464101) }, simde_mm256_set_epi64x(INT64_C( 1769179143810464101), INT64_C( 9175542926859973104), INT64_C(-7081867462548166489), INT64_C( 5522184073273144975)) }, { { INT64_C(-8500631716292798858), INT64_C( 4882720816332117442), INT64_C( 328133580565148934), INT64_C( 3537144852497440140) }, simde_mm256_set_epi64x(INT64_C( 3537144852497440140), INT64_C( 328133580565148934), INT64_C( 4882720816332117442), INT64_C(-8500631716292798858)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_setr_epi64x( test_vec[i].a[ 0], test_vec[i].a[ 1], test_vec[i].a[ 2], test_vec[i].a[ 3]); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_setr_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde_float32 a[8]; simde__m256 r; } test_vec[8] = { { { SIMDE_FLOAT32_C( -98.84), SIMDE_FLOAT32_C( 882.16), SIMDE_FLOAT32_C( 306.69), SIMDE_FLOAT32_C( -539.67), SIMDE_FLOAT32_C( -947.14), SIMDE_FLOAT32_C( -871.17), SIMDE_FLOAT32_C( -26.40), SIMDE_FLOAT32_C( -202.75) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( -202.75), SIMDE_FLOAT32_C( -26.40), SIMDE_FLOAT32_C( -871.17), SIMDE_FLOAT32_C( -947.14), SIMDE_FLOAT32_C( -539.67), SIMDE_FLOAT32_C( 306.69), SIMDE_FLOAT32_C( 882.16), SIMDE_FLOAT32_C( -98.84)) }, { { SIMDE_FLOAT32_C( 499.74), SIMDE_FLOAT32_C( -810.04), SIMDE_FLOAT32_C( 499.26), SIMDE_FLOAT32_C( -519.32), SIMDE_FLOAT32_C( 852.97), SIMDE_FLOAT32_C( 119.58), SIMDE_FLOAT32_C( 88.58), SIMDE_FLOAT32_C( 364.48) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( 364.48), SIMDE_FLOAT32_C( 88.58), SIMDE_FLOAT32_C( 119.58), SIMDE_FLOAT32_C( 852.97), SIMDE_FLOAT32_C( -519.32), SIMDE_FLOAT32_C( 499.26), SIMDE_FLOAT32_C( -810.04), SIMDE_FLOAT32_C( 499.74)) }, { { SIMDE_FLOAT32_C( 127.60), SIMDE_FLOAT32_C( 904.28), SIMDE_FLOAT32_C( -45.75), SIMDE_FLOAT32_C( -900.72), SIMDE_FLOAT32_C( 277.91), SIMDE_FLOAT32_C( -221.10), SIMDE_FLOAT32_C( 935.26), SIMDE_FLOAT32_C( -125.20) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( -125.20), SIMDE_FLOAT32_C( 935.26), SIMDE_FLOAT32_C( -221.10), SIMDE_FLOAT32_C( 277.91), SIMDE_FLOAT32_C( -900.72), SIMDE_FLOAT32_C( -45.75), SIMDE_FLOAT32_C( 904.28), SIMDE_FLOAT32_C( 127.60)) }, { { SIMDE_FLOAT32_C( -252.48), SIMDE_FLOAT32_C( -889.53), SIMDE_FLOAT32_C( 628.46), SIMDE_FLOAT32_C( 326.01), SIMDE_FLOAT32_C( 211.05), SIMDE_FLOAT32_C( -703.39), SIMDE_FLOAT32_C( -581.63), SIMDE_FLOAT32_C( -367.12) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( -367.12), SIMDE_FLOAT32_C( -581.63), SIMDE_FLOAT32_C( -703.39), SIMDE_FLOAT32_C( 211.05), SIMDE_FLOAT32_C( 326.01), SIMDE_FLOAT32_C( 628.46), SIMDE_FLOAT32_C( -889.53), SIMDE_FLOAT32_C( -252.48)) }, { { SIMDE_FLOAT32_C( -852.61), SIMDE_FLOAT32_C( 168.93), SIMDE_FLOAT32_C( -51.67), SIMDE_FLOAT32_C( -699.78), SIMDE_FLOAT32_C( -215.36), SIMDE_FLOAT32_C( 505.82), SIMDE_FLOAT32_C( -83.94), SIMDE_FLOAT32_C( -117.98) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( -117.98), SIMDE_FLOAT32_C( -83.94), SIMDE_FLOAT32_C( 505.82), SIMDE_FLOAT32_C( -215.36), SIMDE_FLOAT32_C( -699.78), SIMDE_FLOAT32_C( -51.67), SIMDE_FLOAT32_C( 168.93), SIMDE_FLOAT32_C( -852.61)) }, { { SIMDE_FLOAT32_C( 422.80), SIMDE_FLOAT32_C( 684.40), SIMDE_FLOAT32_C( 497.91), SIMDE_FLOAT32_C( -511.24), SIMDE_FLOAT32_C( 504.14), SIMDE_FLOAT32_C( 871.91), SIMDE_FLOAT32_C( 175.65), SIMDE_FLOAT32_C( -754.38) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( -754.38), SIMDE_FLOAT32_C( 175.65), SIMDE_FLOAT32_C( 871.91), SIMDE_FLOAT32_C( 504.14), SIMDE_FLOAT32_C( -511.24), SIMDE_FLOAT32_C( 497.91), SIMDE_FLOAT32_C( 684.40), SIMDE_FLOAT32_C( 422.80)) }, { { SIMDE_FLOAT32_C( -712.98), SIMDE_FLOAT32_C( 92.05), SIMDE_FLOAT32_C( -155.74), SIMDE_FLOAT32_C( 933.89), SIMDE_FLOAT32_C( 385.65), SIMDE_FLOAT32_C( -406.91), SIMDE_FLOAT32_C( -999.59), SIMDE_FLOAT32_C( -851.48) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( -851.48), SIMDE_FLOAT32_C( -999.59), SIMDE_FLOAT32_C( -406.91), SIMDE_FLOAT32_C( 385.65), SIMDE_FLOAT32_C( 933.89), SIMDE_FLOAT32_C( -155.74), SIMDE_FLOAT32_C( 92.05), SIMDE_FLOAT32_C( -712.98)) }, { { SIMDE_FLOAT32_C( -182.06), SIMDE_FLOAT32_C( -447.19), SIMDE_FLOAT32_C( -170.21), SIMDE_FLOAT32_C( -504.91), SIMDE_FLOAT32_C( 448.84), SIMDE_FLOAT32_C( -232.24), SIMDE_FLOAT32_C( -688.18), SIMDE_FLOAT32_C( -405.72) }, simde_mm256_set_ps(SIMDE_FLOAT32_C( -405.72), SIMDE_FLOAT32_C( -688.18), SIMDE_FLOAT32_C( -232.24), SIMDE_FLOAT32_C( 448.84), SIMDE_FLOAT32_C( -504.91), SIMDE_FLOAT32_C( -170.21), SIMDE_FLOAT32_C( -447.19), SIMDE_FLOAT32_C( -182.06)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_setr_ps( test_vec[i].a[ 0], test_vec[i].a[ 1], test_vec[i].a[ 2], test_vec[i].a[ 3], test_vec[i].a[ 4], test_vec[i].a[ 5], test_vec[i].a[ 6], test_vec[i].a[ 7]); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_setr_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde_float64 a[4]; simde__m256d r; } test_vec[8] = { { { SIMDE_FLOAT64_C( 648.06), SIMDE_FLOAT64_C( -427.64), SIMDE_FLOAT64_C( 870.51), SIMDE_FLOAT64_C( -400.08) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( -400.08), SIMDE_FLOAT64_C( 870.51), SIMDE_FLOAT64_C( -427.64), SIMDE_FLOAT64_C( 648.06)) }, { { SIMDE_FLOAT64_C( 631.12), SIMDE_FLOAT64_C( 452.84), SIMDE_FLOAT64_C( 521.67), SIMDE_FLOAT64_C( 516.74) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( 516.74), SIMDE_FLOAT64_C( 521.67), SIMDE_FLOAT64_C( 452.84), SIMDE_FLOAT64_C( 631.12)) }, { { SIMDE_FLOAT64_C( -967.92), SIMDE_FLOAT64_C( 20.70), SIMDE_FLOAT64_C( 301.61), SIMDE_FLOAT64_C( -721.26) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( -721.26), SIMDE_FLOAT64_C( 301.61), SIMDE_FLOAT64_C( 20.70), SIMDE_FLOAT64_C( -967.92)) }, { { SIMDE_FLOAT64_C( 324.87), SIMDE_FLOAT64_C( -688.66), SIMDE_FLOAT64_C( -942.28), SIMDE_FLOAT64_C( -476.77) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( -476.77), SIMDE_FLOAT64_C( -942.28), SIMDE_FLOAT64_C( -688.66), SIMDE_FLOAT64_C( 324.87)) }, { { SIMDE_FLOAT64_C( -951.83), SIMDE_FLOAT64_C( 77.38), SIMDE_FLOAT64_C( 95.18), SIMDE_FLOAT64_C( -682.02) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( -682.02), SIMDE_FLOAT64_C( 95.18), SIMDE_FLOAT64_C( 77.38), SIMDE_FLOAT64_C( -951.83)) }, { { SIMDE_FLOAT64_C( -650.77), SIMDE_FLOAT64_C( -285.31), SIMDE_FLOAT64_C( 662.58), SIMDE_FLOAT64_C( 693.61) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( 693.61), SIMDE_FLOAT64_C( 662.58), SIMDE_FLOAT64_C( -285.31), SIMDE_FLOAT64_C( -650.77)) }, { { SIMDE_FLOAT64_C( 209.43), SIMDE_FLOAT64_C( 188.93), SIMDE_FLOAT64_C( -264.78), SIMDE_FLOAT64_C( 938.62) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( 938.62), SIMDE_FLOAT64_C( -264.78), SIMDE_FLOAT64_C( 188.93), SIMDE_FLOAT64_C( 209.43)) }, { { SIMDE_FLOAT64_C( 887.57), SIMDE_FLOAT64_C( 787.01), SIMDE_FLOAT64_C( -658.13), SIMDE_FLOAT64_C( 241.09) }, simde_mm256_set_pd(SIMDE_FLOAT64_C( 241.09), SIMDE_FLOAT64_C( -658.13), SIMDE_FLOAT64_C( 787.01), SIMDE_FLOAT64_C( 887.57)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_setr_pd( test_vec[i].a[ 0], test_vec[i].a[ 1], test_vec[i].a[ 2], test_vec[i].a[ 3]); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_setr_m128(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m256 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -682.25), SIMDE_FLOAT32_C( -899.79), SIMDE_FLOAT32_C( -478.94), SIMDE_FLOAT32_C( 364.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( -650.11), SIMDE_FLOAT32_C( -192.16), SIMDE_FLOAT32_C( 808.30), SIMDE_FLOAT32_C( 519.14)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -650.11), SIMDE_FLOAT32_C( -192.16), SIMDE_FLOAT32_C( 808.30), SIMDE_FLOAT32_C( 519.14), SIMDE_FLOAT32_C( -682.25), SIMDE_FLOAT32_C( -899.79), SIMDE_FLOAT32_C( -478.94), SIMDE_FLOAT32_C( 364.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 16.48), SIMDE_FLOAT32_C( 517.23), SIMDE_FLOAT32_C( -546.20), SIMDE_FLOAT32_C( -61.05)), simde_mm_set_ps(SIMDE_FLOAT32_C( 715.06), SIMDE_FLOAT32_C( -476.50), SIMDE_FLOAT32_C( -479.17), SIMDE_FLOAT32_C( -869.09)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 715.06), SIMDE_FLOAT32_C( -476.50), SIMDE_FLOAT32_C( -479.17), SIMDE_FLOAT32_C( -869.09), SIMDE_FLOAT32_C( 16.48), SIMDE_FLOAT32_C( 517.23), SIMDE_FLOAT32_C( -546.20), SIMDE_FLOAT32_C( -61.05)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -779.37), SIMDE_FLOAT32_C( 30.06), SIMDE_FLOAT32_C( -690.77), SIMDE_FLOAT32_C( 921.96)), simde_mm_set_ps(SIMDE_FLOAT32_C( -173.53), SIMDE_FLOAT32_C( 887.42), SIMDE_FLOAT32_C( 309.36), SIMDE_FLOAT32_C( 929.48)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -173.53), SIMDE_FLOAT32_C( 887.42), SIMDE_FLOAT32_C( 309.36), SIMDE_FLOAT32_C( 929.48), SIMDE_FLOAT32_C( -779.37), SIMDE_FLOAT32_C( 30.06), SIMDE_FLOAT32_C( -690.77), SIMDE_FLOAT32_C( 921.96)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 766.53), SIMDE_FLOAT32_C( -675.92), SIMDE_FLOAT32_C( -948.96), SIMDE_FLOAT32_C( 521.94)), simde_mm_set_ps(SIMDE_FLOAT32_C( 725.37), SIMDE_FLOAT32_C( -802.67), SIMDE_FLOAT32_C( -800.62), SIMDE_FLOAT32_C( 419.68)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 725.37), SIMDE_FLOAT32_C( -802.67), SIMDE_FLOAT32_C( -800.62), SIMDE_FLOAT32_C( 419.68), SIMDE_FLOAT32_C( 766.53), SIMDE_FLOAT32_C( -675.92), SIMDE_FLOAT32_C( -948.96), SIMDE_FLOAT32_C( 521.94)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 133.15), SIMDE_FLOAT32_C( 853.30), SIMDE_FLOAT32_C( 295.19), SIMDE_FLOAT32_C( -233.49)), simde_mm_set_ps(SIMDE_FLOAT32_C( 973.48), SIMDE_FLOAT32_C( 235.18), SIMDE_FLOAT32_C( 111.09), SIMDE_FLOAT32_C( -515.37)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 973.48), SIMDE_FLOAT32_C( 235.18), SIMDE_FLOAT32_C( 111.09), SIMDE_FLOAT32_C( -515.37), SIMDE_FLOAT32_C( 133.15), SIMDE_FLOAT32_C( 853.30), SIMDE_FLOAT32_C( 295.19), SIMDE_FLOAT32_C( -233.49)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -131.35), SIMDE_FLOAT32_C( 737.21), SIMDE_FLOAT32_C( 816.16), SIMDE_FLOAT32_C( 442.16)), simde_mm_set_ps(SIMDE_FLOAT32_C( -614.82), SIMDE_FLOAT32_C( -170.44), SIMDE_FLOAT32_C( 851.94), SIMDE_FLOAT32_C( 235.41)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -614.82), SIMDE_FLOAT32_C( -170.44), SIMDE_FLOAT32_C( 851.94), SIMDE_FLOAT32_C( 235.41), SIMDE_FLOAT32_C( -131.35), SIMDE_FLOAT32_C( 737.21), SIMDE_FLOAT32_C( 816.16), SIMDE_FLOAT32_C( 442.16)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 291.38), SIMDE_FLOAT32_C( -442.22), SIMDE_FLOAT32_C( 756.36), SIMDE_FLOAT32_C( -768.65)), simde_mm_set_ps(SIMDE_FLOAT32_C( -266.91), SIMDE_FLOAT32_C( -275.67), SIMDE_FLOAT32_C( -687.10), SIMDE_FLOAT32_C( 236.32)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -266.91), SIMDE_FLOAT32_C( -275.67), SIMDE_FLOAT32_C( -687.10), SIMDE_FLOAT32_C( 236.32), SIMDE_FLOAT32_C( 291.38), SIMDE_FLOAT32_C( -442.22), SIMDE_FLOAT32_C( 756.36), SIMDE_FLOAT32_C( -768.65)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 561.83), SIMDE_FLOAT32_C( 979.61), SIMDE_FLOAT32_C( 43.21), SIMDE_FLOAT32_C( -386.38)), simde_mm_set_ps(SIMDE_FLOAT32_C( -879.64), SIMDE_FLOAT32_C( -192.97), SIMDE_FLOAT32_C( -876.27), SIMDE_FLOAT32_C( -36.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -879.64), SIMDE_FLOAT32_C( -192.97), SIMDE_FLOAT32_C( -876.27), SIMDE_FLOAT32_C( -36.00), SIMDE_FLOAT32_C( 561.83), SIMDE_FLOAT32_C( 979.61), SIMDE_FLOAT32_C( 43.21), SIMDE_FLOAT32_C( -386.38)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_setr_m128(test_vec[i].a, test_vec[i].b); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_setr_m128d(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m256d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -456.75), SIMDE_FLOAT64_C( -671.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( -831.34), SIMDE_FLOAT64_C( 280.05)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -831.34), SIMDE_FLOAT64_C( 280.05), SIMDE_FLOAT64_C( -456.75), SIMDE_FLOAT64_C( -671.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 937.15), SIMDE_FLOAT64_C( -608.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( -231.75), SIMDE_FLOAT64_C( -301.21)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -231.75), SIMDE_FLOAT64_C( -301.21), SIMDE_FLOAT64_C( 937.15), SIMDE_FLOAT64_C( -608.20)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -418.68), SIMDE_FLOAT64_C( -219.09)), simde_mm_set_pd(SIMDE_FLOAT64_C( -262.95), SIMDE_FLOAT64_C( -857.27)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -262.95), SIMDE_FLOAT64_C( -857.27), SIMDE_FLOAT64_C( -418.68), SIMDE_FLOAT64_C( -219.09)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 154.88), SIMDE_FLOAT64_C( 64.12)), simde_mm_set_pd(SIMDE_FLOAT64_C( 231.57), SIMDE_FLOAT64_C( 996.12)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 231.57), SIMDE_FLOAT64_C( 996.12), SIMDE_FLOAT64_C( 154.88), SIMDE_FLOAT64_C( 64.12)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -46.48), SIMDE_FLOAT64_C( -511.22)), simde_mm_set_pd(SIMDE_FLOAT64_C( 951.46), SIMDE_FLOAT64_C( 771.21)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 951.46), SIMDE_FLOAT64_C( 771.21), SIMDE_FLOAT64_C( -46.48), SIMDE_FLOAT64_C( -511.22)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -492.75), SIMDE_FLOAT64_C( -725.08)), simde_mm_set_pd(SIMDE_FLOAT64_C( -545.59), SIMDE_FLOAT64_C( -960.12)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -545.59), SIMDE_FLOAT64_C( -960.12), SIMDE_FLOAT64_C( -492.75), SIMDE_FLOAT64_C( -725.08)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -9.74), SIMDE_FLOAT64_C( -44.37)), simde_mm_set_pd(SIMDE_FLOAT64_C( 854.62), SIMDE_FLOAT64_C( -942.41)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 854.62), SIMDE_FLOAT64_C( -942.41), SIMDE_FLOAT64_C( -9.74), SIMDE_FLOAT64_C( -44.37)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 575.33), SIMDE_FLOAT64_C( -493.55)), simde_mm_set_pd(SIMDE_FLOAT64_C( 73.10), SIMDE_FLOAT64_C( 90.67)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 73.10), SIMDE_FLOAT64_C( 90.67), SIMDE_FLOAT64_C( 575.33), SIMDE_FLOAT64_C( -493.55)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_setr_m128d(test_vec[i].a, test_vec[i].b); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_setr_m128i(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m256i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C(-1742712724), INT32_C( -314784100), INT32_C( 986737210), INT32_C( 1275380805)), simde_mm_set_epi32(INT32_C( -652328462), INT32_C(-1178876865), INT32_C(-2116026355), INT32_C( 283851183)), simde_mm256_set_epi32(INT32_C( -652328462), INT32_C(-1178876865), INT32_C(-2116026355), INT32_C( 283851183), INT32_C(-1742712724), INT32_C( -314784100), INT32_C( 986737210), INT32_C( 1275380805)) }, { simde_mm_set_epi32(INT32_C( 1950785462), INT32_C(-1647057227), INT32_C( 1610379205), INT32_C( -779524107)), simde_mm_set_epi32(INT32_C( 463748536), INT32_C( -797772071), INT32_C( 1736524491), INT32_C( 1281308863)), simde_mm256_set_epi32(INT32_C( 463748536), INT32_C( -797772071), INT32_C( 1736524491), INT32_C( 1281308863), INT32_C( 1950785462), INT32_C(-1647057227), INT32_C( 1610379205), INT32_C( -779524107)) }, { simde_mm_set_epi32(INT32_C(-2008212267), INT32_C(-2138916541), INT32_C(-1006728926), INT32_C(-1435438838)), simde_mm_set_epi32(INT32_C( -133349630), INT32_C(-1192564707), INT32_C(-2002224298), INT32_C( 917327905)), simde_mm256_set_epi32(INT32_C( -133349630), INT32_C(-1192564707), INT32_C(-2002224298), INT32_C( 917327905), INT32_C(-2008212267), INT32_C(-2138916541), INT32_C(-1006728926), INT32_C(-1435438838)) }, { simde_mm_set_epi32(INT32_C( -870567789), INT32_C(-1650884654), INT32_C( 516950890), INT32_C(-1478935293)), simde_mm_set_epi32(INT32_C( 346251173), INT32_C( 44036763), INT32_C( -453299917), INT32_C( 1313402969)), simde_mm256_set_epi32(INT32_C( 346251173), INT32_C( 44036763), INT32_C( -453299917), INT32_C( 1313402969), INT32_C( -870567789), INT32_C(-1650884654), INT32_C( 516950890), INT32_C(-1478935293)) }, { simde_mm_set_epi32(INT32_C( 1685745491), INT32_C(-1092039924), INT32_C( 1442866872), INT32_C(-1523614432)), simde_mm_set_epi32(INT32_C( 863519834), INT32_C( 230110187), INT32_C( 812017634), INT32_C( 1688191143)), simde_mm256_set_epi32(INT32_C( 863519834), INT32_C( 230110187), INT32_C( 812017634), INT32_C( 1688191143), INT32_C( 1685745491), INT32_C(-1092039924), INT32_C( 1442866872), INT32_C(-1523614432)) }, { simde_mm_set_epi32(INT32_C( 245453619), INT32_C( 1778016121), INT32_C( 58675090), INT32_C( 1219256368)), simde_mm_set_epi32(INT32_C( 1174470085), INT32_C( -388376691), INT32_C( -990477533), INT32_C( -476034642)), simde_mm256_set_epi32(INT32_C( 1174470085), INT32_C( -388376691), INT32_C( -990477533), INT32_C( -476034642), INT32_C( 245453619), INT32_C( 1778016121), INT32_C( 58675090), INT32_C( 1219256368)) }, { simde_mm_set_epi32(INT32_C( 4875253), INT32_C(-1938130041), INT32_C( -829985839), INT32_C( 1737785848)), simde_mm_set_epi32(INT32_C( 645358488), INT32_C( 69189244), INT32_C( 1744086784), INT32_C( 473310154)), simde_mm256_set_epi32(INT32_C( 645358488), INT32_C( 69189244), INT32_C( 1744086784), INT32_C( 473310154), INT32_C( 4875253), INT32_C(-1938130041), INT32_C( -829985839), INT32_C( 1737785848)) }, { simde_mm_set_epi32(INT32_C( 804470839), INT32_C(-1989324616), INT32_C( 2138294939), INT32_C( -20370473)), simde_mm_set_epi32(INT32_C( 587534668), INT32_C( 665646160), INT32_C(-1572975914), INT32_C( 1262599280)), simde_mm256_set_epi32(INT32_C( 587534668), INT32_C( 665646160), INT32_C(-1572975914), INT32_C( 1262599280), INT32_C( 804470839), INT32_C(-1989324616), INT32_C( 2138294939), INT32_C( -20370473)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_setr_m128i(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_shuffle_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 b; simde__m256 r1; simde__m256 r2; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( -37.53), SIMDE_FLOAT32_C( 505.45), SIMDE_FLOAT32_C( -772.05), SIMDE_FLOAT32_C( -524.38), SIMDE_FLOAT32_C( 32.28), SIMDE_FLOAT32_C( 575.28), SIMDE_FLOAT32_C( 459.50), SIMDE_FLOAT32_C( -869.92)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 190.31), SIMDE_FLOAT32_C( -827.59), SIMDE_FLOAT32_C( -501.09), SIMDE_FLOAT32_C( 667.40), SIMDE_FLOAT32_C( -205.26), SIMDE_FLOAT32_C( 908.59), SIMDE_FLOAT32_C( 448.39), SIMDE_FLOAT32_C( -264.01)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -501.09), SIMDE_FLOAT32_C( -501.09), SIMDE_FLOAT32_C( -772.05), SIMDE_FLOAT32_C( -772.05), SIMDE_FLOAT32_C( 448.39), SIMDE_FLOAT32_C( 448.39), SIMDE_FLOAT32_C( 459.50), SIMDE_FLOAT32_C( 459.50)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -827.59), SIMDE_FLOAT32_C( -827.59), SIMDE_FLOAT32_C( 505.45), SIMDE_FLOAT32_C( 505.45), SIMDE_FLOAT32_C( 908.59), SIMDE_FLOAT32_C( 908.59), SIMDE_FLOAT32_C( 575.28), SIMDE_FLOAT32_C( 575.28)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -651.57), SIMDE_FLOAT32_C( -282.66), SIMDE_FLOAT32_C( -530.16), SIMDE_FLOAT32_C( -552.16), SIMDE_FLOAT32_C( 586.68), SIMDE_FLOAT32_C( 706.29), SIMDE_FLOAT32_C( 537.21), SIMDE_FLOAT32_C( -450.04)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -642.32), SIMDE_FLOAT32_C( -725.64), SIMDE_FLOAT32_C( 162.03), SIMDE_FLOAT32_C( 624.91), SIMDE_FLOAT32_C( 415.33), SIMDE_FLOAT32_C( -62.25), SIMDE_FLOAT32_C( 445.83), SIMDE_FLOAT32_C( -888.88)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 162.03), SIMDE_FLOAT32_C( 162.03), SIMDE_FLOAT32_C( -530.16), SIMDE_FLOAT32_C( -530.16), SIMDE_FLOAT32_C( 445.83), SIMDE_FLOAT32_C( 445.83), SIMDE_FLOAT32_C( 537.21), SIMDE_FLOAT32_C( 537.21)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -725.64), SIMDE_FLOAT32_C( -725.64), SIMDE_FLOAT32_C( -282.66), SIMDE_FLOAT32_C( -282.66), SIMDE_FLOAT32_C( -62.25), SIMDE_FLOAT32_C( -62.25), SIMDE_FLOAT32_C( 706.29), SIMDE_FLOAT32_C( 706.29)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -586.61), SIMDE_FLOAT32_C( -444.58), SIMDE_FLOAT32_C( 804.56), SIMDE_FLOAT32_C( -661.78), SIMDE_FLOAT32_C( -398.96), SIMDE_FLOAT32_C( 555.99), SIMDE_FLOAT32_C( 255.05), SIMDE_FLOAT32_C( 326.05)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 59.74), SIMDE_FLOAT32_C( 544.15), SIMDE_FLOAT32_C( 414.09), SIMDE_FLOAT32_C( 11.73), SIMDE_FLOAT32_C( 678.11), SIMDE_FLOAT32_C( 264.09), SIMDE_FLOAT32_C( 492.67), SIMDE_FLOAT32_C( -690.47)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 414.09), SIMDE_FLOAT32_C( 414.09), SIMDE_FLOAT32_C( 804.56), SIMDE_FLOAT32_C( 804.56), SIMDE_FLOAT32_C( 492.67), SIMDE_FLOAT32_C( 492.67), SIMDE_FLOAT32_C( 255.05), SIMDE_FLOAT32_C( 255.05)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 544.15), SIMDE_FLOAT32_C( 544.15), SIMDE_FLOAT32_C( -444.58), SIMDE_FLOAT32_C( -444.58), SIMDE_FLOAT32_C( 264.09), SIMDE_FLOAT32_C( 264.09), SIMDE_FLOAT32_C( 555.99), SIMDE_FLOAT32_C( 555.99)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -738.46), SIMDE_FLOAT32_C( 931.97), SIMDE_FLOAT32_C( -722.34), SIMDE_FLOAT32_C( -600.75), SIMDE_FLOAT32_C( -215.41), SIMDE_FLOAT32_C( -472.40), SIMDE_FLOAT32_C( -60.64), SIMDE_FLOAT32_C( 120.78)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 773.86), SIMDE_FLOAT32_C( -298.57), SIMDE_FLOAT32_C( 440.00), SIMDE_FLOAT32_C( -205.51), SIMDE_FLOAT32_C( -237.18), SIMDE_FLOAT32_C( -760.22), SIMDE_FLOAT32_C( -446.09), SIMDE_FLOAT32_C( -381.56)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 440.00), SIMDE_FLOAT32_C( 440.00), SIMDE_FLOAT32_C( -722.34), SIMDE_FLOAT32_C( -722.34), SIMDE_FLOAT32_C( -446.09), SIMDE_FLOAT32_C( -446.09), SIMDE_FLOAT32_C( -60.64), SIMDE_FLOAT32_C( -60.64)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -298.57), SIMDE_FLOAT32_C( -298.57), SIMDE_FLOAT32_C( 931.97), SIMDE_FLOAT32_C( 931.97), SIMDE_FLOAT32_C( -760.22), SIMDE_FLOAT32_C( -760.22), SIMDE_FLOAT32_C( -472.40), SIMDE_FLOAT32_C( -472.40)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -868.52), SIMDE_FLOAT32_C( -561.35), SIMDE_FLOAT32_C( -571.37), SIMDE_FLOAT32_C( 511.95), SIMDE_FLOAT32_C( 794.40), SIMDE_FLOAT32_C( 468.29), SIMDE_FLOAT32_C( 949.07), SIMDE_FLOAT32_C( 504.01)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -600.88), SIMDE_FLOAT32_C( -128.06), SIMDE_FLOAT32_C( -3.70), SIMDE_FLOAT32_C( -620.63), SIMDE_FLOAT32_C( 888.33), SIMDE_FLOAT32_C( 864.93), SIMDE_FLOAT32_C( -548.30), SIMDE_FLOAT32_C( 693.70)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -3.70), SIMDE_FLOAT32_C( -3.70), SIMDE_FLOAT32_C( -571.37), SIMDE_FLOAT32_C( -571.37), SIMDE_FLOAT32_C( -548.30), SIMDE_FLOAT32_C( -548.30), SIMDE_FLOAT32_C( 949.07), SIMDE_FLOAT32_C( 949.07)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -128.06), SIMDE_FLOAT32_C( -128.06), SIMDE_FLOAT32_C( -561.35), SIMDE_FLOAT32_C( -561.35), SIMDE_FLOAT32_C( 864.93), SIMDE_FLOAT32_C( 864.93), SIMDE_FLOAT32_C( 468.29), SIMDE_FLOAT32_C( 468.29)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -274.24), SIMDE_FLOAT32_C( -910.17), SIMDE_FLOAT32_C( 548.80), SIMDE_FLOAT32_C( -838.00), SIMDE_FLOAT32_C( -379.63), SIMDE_FLOAT32_C( 775.00), SIMDE_FLOAT32_C( -238.61), SIMDE_FLOAT32_C( -278.26)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 28.03), SIMDE_FLOAT32_C( -206.70), SIMDE_FLOAT32_C( -80.05), SIMDE_FLOAT32_C( 380.68), SIMDE_FLOAT32_C( 342.48), SIMDE_FLOAT32_C( 525.81), SIMDE_FLOAT32_C( -202.62), SIMDE_FLOAT32_C( 412.48)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -80.05), SIMDE_FLOAT32_C( -80.05), SIMDE_FLOAT32_C( 548.80), SIMDE_FLOAT32_C( 548.80), SIMDE_FLOAT32_C( -202.62), SIMDE_FLOAT32_C( -202.62), SIMDE_FLOAT32_C( -238.61), SIMDE_FLOAT32_C( -238.61)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -206.70), SIMDE_FLOAT32_C( -206.70), SIMDE_FLOAT32_C( -910.17), SIMDE_FLOAT32_C( -910.17), SIMDE_FLOAT32_C( 525.81), SIMDE_FLOAT32_C( 525.81), SIMDE_FLOAT32_C( 775.00), SIMDE_FLOAT32_C( 775.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 270.95), SIMDE_FLOAT32_C( 727.79), SIMDE_FLOAT32_C( 361.48), SIMDE_FLOAT32_C( 843.29), SIMDE_FLOAT32_C( -519.67), SIMDE_FLOAT32_C( -181.50), SIMDE_FLOAT32_C( -112.74), SIMDE_FLOAT32_C( 545.62)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -132.55), SIMDE_FLOAT32_C( -718.86), SIMDE_FLOAT32_C( 142.59), SIMDE_FLOAT32_C( 742.01), SIMDE_FLOAT32_C( 593.39), SIMDE_FLOAT32_C( 515.42), SIMDE_FLOAT32_C( 897.24), SIMDE_FLOAT32_C( 759.74)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 142.59), SIMDE_FLOAT32_C( 142.59), SIMDE_FLOAT32_C( 361.48), SIMDE_FLOAT32_C( 361.48), SIMDE_FLOAT32_C( 897.24), SIMDE_FLOAT32_C( 897.24), SIMDE_FLOAT32_C( -112.74), SIMDE_FLOAT32_C( -112.74)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -718.86), SIMDE_FLOAT32_C( -718.86), SIMDE_FLOAT32_C( 727.79), SIMDE_FLOAT32_C( 727.79), SIMDE_FLOAT32_C( 515.42), SIMDE_FLOAT32_C( 515.42), SIMDE_FLOAT32_C( -181.50), SIMDE_FLOAT32_C( -181.50)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -964.03), SIMDE_FLOAT32_C( 334.31), SIMDE_FLOAT32_C( -520.63), SIMDE_FLOAT32_C( -60.01), SIMDE_FLOAT32_C( 788.31), SIMDE_FLOAT32_C( -532.00), SIMDE_FLOAT32_C( 146.02), SIMDE_FLOAT32_C( -45.94)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -225.66), SIMDE_FLOAT32_C( -255.31), SIMDE_FLOAT32_C( 440.71), SIMDE_FLOAT32_C( -673.25), SIMDE_FLOAT32_C( -649.50), SIMDE_FLOAT32_C( -704.29), SIMDE_FLOAT32_C( 340.20), SIMDE_FLOAT32_C( -395.47)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 440.71), SIMDE_FLOAT32_C( 440.71), SIMDE_FLOAT32_C( -520.63), SIMDE_FLOAT32_C( -520.63), SIMDE_FLOAT32_C( 340.20), SIMDE_FLOAT32_C( 340.20), SIMDE_FLOAT32_C( 146.02), SIMDE_FLOAT32_C( 146.02)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -255.31), SIMDE_FLOAT32_C( -255.31), SIMDE_FLOAT32_C( 334.31), SIMDE_FLOAT32_C( 334.31), SIMDE_FLOAT32_C( -704.29), SIMDE_FLOAT32_C( -704.29), SIMDE_FLOAT32_C( -532.00), SIMDE_FLOAT32_C( -532.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r; r = simde_mm256_shuffle_ps(test_vec[i].a, test_vec[i].b, 0x55); simde_assert_m256_close(r, test_vec[i].r1, 1); r = simde_mm256_shuffle_ps(test_vec[i].a, test_vec[i].b, 0xaa); simde_assert_m256_close(r, test_vec[i].r2, 1); } return 0; } static int test_simde_mm256_shuffle_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d b; simde__m256d r1; simde__m256d r2; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 934.66), SIMDE_FLOAT64_C( -881.67), SIMDE_FLOAT64_C( 836.94), SIMDE_FLOAT64_C( -777.20)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -249.31), SIMDE_FLOAT64_C( 364.30), SIMDE_FLOAT64_C( -553.11), SIMDE_FLOAT64_C( -269.32)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 364.30), SIMDE_FLOAT64_C( 934.66), SIMDE_FLOAT64_C( -269.32), SIMDE_FLOAT64_C( 836.94)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -249.31), SIMDE_FLOAT64_C( -881.67), SIMDE_FLOAT64_C( -553.11), SIMDE_FLOAT64_C( -777.20)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -570.38), SIMDE_FLOAT64_C( 768.57), SIMDE_FLOAT64_C( 912.15), SIMDE_FLOAT64_C( -23.81)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -107.98), SIMDE_FLOAT64_C( -226.33), SIMDE_FLOAT64_C( 924.14), SIMDE_FLOAT64_C( -792.70)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -226.33), SIMDE_FLOAT64_C( -570.38), SIMDE_FLOAT64_C( -792.70), SIMDE_FLOAT64_C( 912.15)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -107.98), SIMDE_FLOAT64_C( 768.57), SIMDE_FLOAT64_C( 924.14), SIMDE_FLOAT64_C( -23.81)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -307.34), SIMDE_FLOAT64_C( 256.70), SIMDE_FLOAT64_C( 615.34), SIMDE_FLOAT64_C( 966.02)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 80.56), SIMDE_FLOAT64_C( -102.88), SIMDE_FLOAT64_C( 558.25), SIMDE_FLOAT64_C( 907.54)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -102.88), SIMDE_FLOAT64_C( -307.34), SIMDE_FLOAT64_C( 907.54), SIMDE_FLOAT64_C( 615.34)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 80.56), SIMDE_FLOAT64_C( 256.70), SIMDE_FLOAT64_C( 558.25), SIMDE_FLOAT64_C( 966.02)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -428.91), SIMDE_FLOAT64_C( -946.94), SIMDE_FLOAT64_C( -242.51), SIMDE_FLOAT64_C( 207.30)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 871.74), SIMDE_FLOAT64_C( 294.25), SIMDE_FLOAT64_C( -23.76), SIMDE_FLOAT64_C( 857.02)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 294.25), SIMDE_FLOAT64_C( -428.91), SIMDE_FLOAT64_C( 857.02), SIMDE_FLOAT64_C( -242.51)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 871.74), SIMDE_FLOAT64_C( -946.94), SIMDE_FLOAT64_C( -23.76), SIMDE_FLOAT64_C( 207.30)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 684.37), SIMDE_FLOAT64_C( -77.07), SIMDE_FLOAT64_C( -492.40), SIMDE_FLOAT64_C( -711.90)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 911.18), SIMDE_FLOAT64_C( -875.79), SIMDE_FLOAT64_C( 168.17), SIMDE_FLOAT64_C( -582.90)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -875.79), SIMDE_FLOAT64_C( 684.37), SIMDE_FLOAT64_C( -582.90), SIMDE_FLOAT64_C( -492.40)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 911.18), SIMDE_FLOAT64_C( -77.07), SIMDE_FLOAT64_C( 168.17), SIMDE_FLOAT64_C( -711.90)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 844.28), SIMDE_FLOAT64_C( -547.02), SIMDE_FLOAT64_C( -536.51), SIMDE_FLOAT64_C( -341.28)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -205.14), SIMDE_FLOAT64_C( 35.47), SIMDE_FLOAT64_C( 536.74), SIMDE_FLOAT64_C( 843.54)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 35.47), SIMDE_FLOAT64_C( 844.28), SIMDE_FLOAT64_C( 843.54), SIMDE_FLOAT64_C( -536.51)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -205.14), SIMDE_FLOAT64_C( -547.02), SIMDE_FLOAT64_C( 536.74), SIMDE_FLOAT64_C( -341.28)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 597.24), SIMDE_FLOAT64_C( 73.58), SIMDE_FLOAT64_C( 575.62), SIMDE_FLOAT64_C( -337.42)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 430.69), SIMDE_FLOAT64_C( -764.62), SIMDE_FLOAT64_C( 152.29), SIMDE_FLOAT64_C( 529.08)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -764.62), SIMDE_FLOAT64_C( 597.24), SIMDE_FLOAT64_C( 529.08), SIMDE_FLOAT64_C( 575.62)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 430.69), SIMDE_FLOAT64_C( 73.58), SIMDE_FLOAT64_C( 152.29), SIMDE_FLOAT64_C( -337.42)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 276.59), SIMDE_FLOAT64_C( 918.52), SIMDE_FLOAT64_C( 859.45), SIMDE_FLOAT64_C( 26.68)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -365.57), SIMDE_FLOAT64_C( 780.68), SIMDE_FLOAT64_C( 333.70), SIMDE_FLOAT64_C( -391.20)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 780.68), SIMDE_FLOAT64_C( 276.59), SIMDE_FLOAT64_C( -391.20), SIMDE_FLOAT64_C( 859.45)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -365.57), SIMDE_FLOAT64_C( 918.52), SIMDE_FLOAT64_C( 333.70), SIMDE_FLOAT64_C( 26.68)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r; r = simde_mm256_shuffle_pd(test_vec[i].a, test_vec[i].b, 0x5); simde_assert_m256d_close(r, test_vec[i].r1, 1); r = simde_mm256_shuffle_pd(test_vec[i].a, test_vec[i].b, 0xa); simde_assert_m256d_close(r, test_vec[i].r2, 1); } return 0; } static int test_simde_mm256_sqrt_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 37.27), SIMDE_FLOAT32_C( 842.37), SIMDE_FLOAT32_C( 821.35), SIMDE_FLOAT32_C( 882.42), SIMDE_FLOAT32_C( 506.85), SIMDE_FLOAT32_C( 418.78), SIMDE_FLOAT32_C( 759.20), SIMDE_FLOAT32_C( 903.29)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 6.10), SIMDE_FLOAT32_C( 29.02), SIMDE_FLOAT32_C( 28.66), SIMDE_FLOAT32_C( 29.71), SIMDE_FLOAT32_C( 22.51), SIMDE_FLOAT32_C( 20.46), SIMDE_FLOAT32_C( 27.55), SIMDE_FLOAT32_C( 30.05)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 675.11), SIMDE_FLOAT32_C( 473.44), SIMDE_FLOAT32_C( 936.76), SIMDE_FLOAT32_C( 315.53), SIMDE_FLOAT32_C( 585.70), SIMDE_FLOAT32_C( 466.99), SIMDE_FLOAT32_C( 876.99), SIMDE_FLOAT32_C( 421.09)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 25.98), SIMDE_FLOAT32_C( 21.76), SIMDE_FLOAT32_C( 30.61), SIMDE_FLOAT32_C( 17.76), SIMDE_FLOAT32_C( 24.20), SIMDE_FLOAT32_C( 21.61), SIMDE_FLOAT32_C( 29.61), SIMDE_FLOAT32_C( 20.52)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 455.63), SIMDE_FLOAT32_C( 708.48), SIMDE_FLOAT32_C( 426.65), SIMDE_FLOAT32_C( 16.24), SIMDE_FLOAT32_C( 899.49), SIMDE_FLOAT32_C( 710.23), SIMDE_FLOAT32_C( 195.07), SIMDE_FLOAT32_C( 877.55)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 21.35), SIMDE_FLOAT32_C( 26.62), SIMDE_FLOAT32_C( 20.66), SIMDE_FLOAT32_C( 4.03), SIMDE_FLOAT32_C( 29.99), SIMDE_FLOAT32_C( 26.65), SIMDE_FLOAT32_C( 13.97), SIMDE_FLOAT32_C( 29.62)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 247.65), SIMDE_FLOAT32_C( 650.69), SIMDE_FLOAT32_C( 691.01), SIMDE_FLOAT32_C( 931.91), SIMDE_FLOAT32_C( 760.76), SIMDE_FLOAT32_C( 925.05), SIMDE_FLOAT32_C( 438.39), SIMDE_FLOAT32_C( 204.75)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 15.74), SIMDE_FLOAT32_C( 25.51), SIMDE_FLOAT32_C( 26.29), SIMDE_FLOAT32_C( 30.53), SIMDE_FLOAT32_C( 27.58), SIMDE_FLOAT32_C( 30.41), SIMDE_FLOAT32_C( 20.94), SIMDE_FLOAT32_C( 14.31)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 281.85), SIMDE_FLOAT32_C( 525.43), SIMDE_FLOAT32_C( 50.88), SIMDE_FLOAT32_C( 685.15), SIMDE_FLOAT32_C( 223.40), SIMDE_FLOAT32_C( 911.30), SIMDE_FLOAT32_C( 97.50), SIMDE_FLOAT32_C( 436.55)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 16.79), SIMDE_FLOAT32_C( 22.92), SIMDE_FLOAT32_C( 7.13), SIMDE_FLOAT32_C( 26.18), SIMDE_FLOAT32_C( 14.95), SIMDE_FLOAT32_C( 30.19), SIMDE_FLOAT32_C( 9.87), SIMDE_FLOAT32_C( 20.89)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 420.63), SIMDE_FLOAT32_C( 643.40), SIMDE_FLOAT32_C( 474.44), SIMDE_FLOAT32_C( 474.06), SIMDE_FLOAT32_C( 331.02), SIMDE_FLOAT32_C( 191.18), SIMDE_FLOAT32_C( 614.70), SIMDE_FLOAT32_C( 135.59)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 20.51), SIMDE_FLOAT32_C( 25.37), SIMDE_FLOAT32_C( 21.78), SIMDE_FLOAT32_C( 21.77), SIMDE_FLOAT32_C( 18.19), SIMDE_FLOAT32_C( 13.83), SIMDE_FLOAT32_C( 24.79), SIMDE_FLOAT32_C( 11.64)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 777.57), SIMDE_FLOAT32_C( 684.09), SIMDE_FLOAT32_C( 183.20), SIMDE_FLOAT32_C( 761.60), SIMDE_FLOAT32_C( 226.72), SIMDE_FLOAT32_C( 710.47), SIMDE_FLOAT32_C( 853.48), SIMDE_FLOAT32_C( 115.80)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 27.88), SIMDE_FLOAT32_C( 26.16), SIMDE_FLOAT32_C( 13.54), SIMDE_FLOAT32_C( 27.60), SIMDE_FLOAT32_C( 15.06), SIMDE_FLOAT32_C( 26.65), SIMDE_FLOAT32_C( 29.21), SIMDE_FLOAT32_C( 10.76)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 797.47), SIMDE_FLOAT32_C( 634.08), SIMDE_FLOAT32_C( 881.12), SIMDE_FLOAT32_C( 697.30), SIMDE_FLOAT32_C( 189.46), SIMDE_FLOAT32_C( 47.99), SIMDE_FLOAT32_C( 85.88), SIMDE_FLOAT32_C( 938.36)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 28.24), SIMDE_FLOAT32_C( 25.18), SIMDE_FLOAT32_C( 29.68), SIMDE_FLOAT32_C( 26.41), SIMDE_FLOAT32_C( 13.76), SIMDE_FLOAT32_C( 6.93), SIMDE_FLOAT32_C( 9.27), SIMDE_FLOAT32_C( 30.63)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_sqrt_ps(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_setzero_ps(SIMDE_MUNIT_TEST_ARGS) { simde__m256 r = simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)); simde__m256 res = simde_mm256_setzero_ps(); simde_assert_m256_close(r, res, 1); return 0; } static int test_simde_mm256_setzero_pd(SIMDE_MUNIT_TEST_ARGS) { simde__m256d r = simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)); simde__m256d res = simde_mm256_setzero_pd(); simde_assert_m256d_close(r, res, 1); return 0; } static int test_simde_mm256_setzero_si256(SIMDE_MUNIT_TEST_ARGS) { simde__m256i r = simde_mm256_set_epi32(INT32_C(0), INT32_C(0), INT32_C(0), INT32_C(0), INT32_C(0), INT32_C(0), INT32_C(0), INT32_C(0)); simde__m256i res = simde_mm256_setzero_si256(); simde_assert_m256i_i32(r, ==, res); return 0; } static int test_simde_mm256_sqrt_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 121.95), SIMDE_FLOAT64_C( 169.21), SIMDE_FLOAT64_C( 224.34), SIMDE_FLOAT64_C( 661.75)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 11.04), SIMDE_FLOAT64_C( 13.01), SIMDE_FLOAT64_C( 14.98), SIMDE_FLOAT64_C( 25.72)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 41.79), SIMDE_FLOAT64_C( 48.53), SIMDE_FLOAT64_C( 17.25), SIMDE_FLOAT64_C( 585.21)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 6.46), SIMDE_FLOAT64_C( 6.97), SIMDE_FLOAT64_C( 4.15), SIMDE_FLOAT64_C( 24.19)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 323.54), SIMDE_FLOAT64_C( 12.60), SIMDE_FLOAT64_C( 916.80), SIMDE_FLOAT64_C( 392.02)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 17.99), SIMDE_FLOAT64_C( 3.55), SIMDE_FLOAT64_C( 30.28), SIMDE_FLOAT64_C( 19.80)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 317.36), SIMDE_FLOAT64_C( 248.26), SIMDE_FLOAT64_C( 48.91), SIMDE_FLOAT64_C( 291.13)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 17.81), SIMDE_FLOAT64_C( 15.76), SIMDE_FLOAT64_C( 6.99), SIMDE_FLOAT64_C( 17.06)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 476.37), SIMDE_FLOAT64_C( 799.71), SIMDE_FLOAT64_C( 234.23), SIMDE_FLOAT64_C( 908.93)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 21.83), SIMDE_FLOAT64_C( 28.28), SIMDE_FLOAT64_C( 15.30), SIMDE_FLOAT64_C( 30.15)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 937.16), SIMDE_FLOAT64_C( 886.92), SIMDE_FLOAT64_C( 703.77), SIMDE_FLOAT64_C( 383.22)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 30.61), SIMDE_FLOAT64_C( 29.78), SIMDE_FLOAT64_C( 26.53), SIMDE_FLOAT64_C( 19.58)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 36.08), SIMDE_FLOAT64_C( 932.02), SIMDE_FLOAT64_C( 592.55), SIMDE_FLOAT64_C( 593.01)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 6.01), SIMDE_FLOAT64_C( 30.53), SIMDE_FLOAT64_C( 24.34), SIMDE_FLOAT64_C( 24.35)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 436.31), SIMDE_FLOAT64_C( 915.76), SIMDE_FLOAT64_C( 575.57), SIMDE_FLOAT64_C( 268.70)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 20.89), SIMDE_FLOAT64_C( 30.26), SIMDE_FLOAT64_C( 23.99), SIMDE_FLOAT64_C( 16.39)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_sqrt_pd(test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_store_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; SIMDE_ALIGN_LIKE_32(simde__m256) simde_float32 r[8]; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 256.09), SIMDE_FLOAT32_C( 768.79), SIMDE_FLOAT32_C( 201.90), SIMDE_FLOAT32_C( 339.33), SIMDE_FLOAT32_C( 957.46), SIMDE_FLOAT32_C( 728.44), SIMDE_FLOAT32_C( 73.67), SIMDE_FLOAT32_C( 440.11)), { SIMDE_FLOAT32_C( 440.11), SIMDE_FLOAT32_C( 73.67), SIMDE_FLOAT32_C( 728.44), SIMDE_FLOAT32_C( 957.46), SIMDE_FLOAT32_C( 339.33), SIMDE_FLOAT32_C( 201.90), SIMDE_FLOAT32_C( 768.79), SIMDE_FLOAT32_C( 256.09) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -72.04), SIMDE_FLOAT32_C( -425.25), SIMDE_FLOAT32_C( 471.77), SIMDE_FLOAT32_C( 976.75), SIMDE_FLOAT32_C( -510.20), SIMDE_FLOAT32_C( 696.54), SIMDE_FLOAT32_C( -843.54), SIMDE_FLOAT32_C( -868.41)), { SIMDE_FLOAT32_C( -868.41), SIMDE_FLOAT32_C( -843.54), SIMDE_FLOAT32_C( 696.54), SIMDE_FLOAT32_C( -510.20), SIMDE_FLOAT32_C( 976.75), SIMDE_FLOAT32_C( 471.77), SIMDE_FLOAT32_C( -425.25), SIMDE_FLOAT32_C( -72.04) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -304.23), SIMDE_FLOAT32_C( 477.48), SIMDE_FLOAT32_C( 356.58), SIMDE_FLOAT32_C( 955.81), SIMDE_FLOAT32_C( 999.99), SIMDE_FLOAT32_C( 487.33), SIMDE_FLOAT32_C( 633.61), SIMDE_FLOAT32_C( 518.11)), { SIMDE_FLOAT32_C( 518.11), SIMDE_FLOAT32_C( 633.61), SIMDE_FLOAT32_C( 487.33), SIMDE_FLOAT32_C( 999.99), SIMDE_FLOAT32_C( 955.81), SIMDE_FLOAT32_C( 356.58), SIMDE_FLOAT32_C( 477.48), SIMDE_FLOAT32_C( -304.23) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 88.04), SIMDE_FLOAT32_C( -296.25), SIMDE_FLOAT32_C( 948.33), SIMDE_FLOAT32_C( -584.17), SIMDE_FLOAT32_C( -796.11), SIMDE_FLOAT32_C( -38.84), SIMDE_FLOAT32_C( -706.11), SIMDE_FLOAT32_C( 347.32)), { SIMDE_FLOAT32_C( 347.32), SIMDE_FLOAT32_C( -706.11), SIMDE_FLOAT32_C( -38.84), SIMDE_FLOAT32_C( -796.11), SIMDE_FLOAT32_C( -584.17), SIMDE_FLOAT32_C( 948.33), SIMDE_FLOAT32_C( -296.25), SIMDE_FLOAT32_C( 88.04) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -143.76), SIMDE_FLOAT32_C( 504.36), SIMDE_FLOAT32_C( -154.12), SIMDE_FLOAT32_C( 375.43), SIMDE_FLOAT32_C( -307.18), SIMDE_FLOAT32_C( 256.93), SIMDE_FLOAT32_C( 78.86), SIMDE_FLOAT32_C( 526.77)), { SIMDE_FLOAT32_C( 526.77), SIMDE_FLOAT32_C( 78.86), SIMDE_FLOAT32_C( 256.93), SIMDE_FLOAT32_C( -307.18), SIMDE_FLOAT32_C( 375.43), SIMDE_FLOAT32_C( -154.12), SIMDE_FLOAT32_C( 504.36), SIMDE_FLOAT32_C( -143.76) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 597.82), SIMDE_FLOAT32_C( 697.68), SIMDE_FLOAT32_C( -285.13), SIMDE_FLOAT32_C( 354.51), SIMDE_FLOAT32_C( 41.32), SIMDE_FLOAT32_C( -445.78), SIMDE_FLOAT32_C( -483.17), SIMDE_FLOAT32_C( -727.28)), { SIMDE_FLOAT32_C( -727.28), SIMDE_FLOAT32_C( -483.17), SIMDE_FLOAT32_C( -445.78), SIMDE_FLOAT32_C( 41.32), SIMDE_FLOAT32_C( 354.51), SIMDE_FLOAT32_C( -285.13), SIMDE_FLOAT32_C( 697.68), SIMDE_FLOAT32_C( 597.82) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -441.93), SIMDE_FLOAT32_C( -382.04), SIMDE_FLOAT32_C( 392.67), SIMDE_FLOAT32_C( 690.10), SIMDE_FLOAT32_C( -30.26), SIMDE_FLOAT32_C( -968.87), SIMDE_FLOAT32_C( -246.46), SIMDE_FLOAT32_C( 625.73)), { SIMDE_FLOAT32_C( 625.73), SIMDE_FLOAT32_C( -246.46), SIMDE_FLOAT32_C( -968.87), SIMDE_FLOAT32_C( -30.26), SIMDE_FLOAT32_C( 690.10), SIMDE_FLOAT32_C( 392.67), SIMDE_FLOAT32_C( -382.04), SIMDE_FLOAT32_C( -441.93) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 442.97), SIMDE_FLOAT32_C( -20.37), SIMDE_FLOAT32_C( 758.01), SIMDE_FLOAT32_C( -507.47), SIMDE_FLOAT32_C( 529.68), SIMDE_FLOAT32_C( -598.71), SIMDE_FLOAT32_C( 952.31), SIMDE_FLOAT32_C( -565.36)), { SIMDE_FLOAT32_C( -565.36), SIMDE_FLOAT32_C( 952.31), SIMDE_FLOAT32_C( -598.71), SIMDE_FLOAT32_C( 529.68), SIMDE_FLOAT32_C( -507.47), SIMDE_FLOAT32_C( 758.01), SIMDE_FLOAT32_C( -20.37), SIMDE_FLOAT32_C( 442.97) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { SIMDE_ALIGN_LIKE_32(simde__m256) simde_float32 r[8]; simde_mm256_store_ps(r, test_vec[i].a); simde_assert_equal_vf32(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_store_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; SIMDE_ALIGN_LIKE_32(simde__m256d) simde_float64 r[4]; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 771.84), SIMDE_FLOAT64_C( 578.19), SIMDE_FLOAT64_C( 287.63), SIMDE_FLOAT64_C( 196.16)), { SIMDE_FLOAT64_C( 196.16), SIMDE_FLOAT64_C( 287.63), SIMDE_FLOAT64_C( 578.19), SIMDE_FLOAT64_C( 771.84) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -941.73), SIMDE_FLOAT64_C( -687.45), SIMDE_FLOAT64_C( -865.09), SIMDE_FLOAT64_C( 586.03)), { SIMDE_FLOAT64_C( 586.03), SIMDE_FLOAT64_C( -865.09), SIMDE_FLOAT64_C( -687.45), SIMDE_FLOAT64_C( -941.73) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -342.90), SIMDE_FLOAT64_C( -658.61), SIMDE_FLOAT64_C( 882.05), SIMDE_FLOAT64_C( -607.89)), { SIMDE_FLOAT64_C( -607.89), SIMDE_FLOAT64_C( 882.05), SIMDE_FLOAT64_C( -658.61), SIMDE_FLOAT64_C( -342.90) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -248.91), SIMDE_FLOAT64_C( -473.20), SIMDE_FLOAT64_C( 170.76), SIMDE_FLOAT64_C( -407.02)), { SIMDE_FLOAT64_C( -407.02), SIMDE_FLOAT64_C( 170.76), SIMDE_FLOAT64_C( -473.20), SIMDE_FLOAT64_C( -248.91) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 181.69), SIMDE_FLOAT64_C( 697.67), SIMDE_FLOAT64_C( 911.25), SIMDE_FLOAT64_C( -299.04)), { SIMDE_FLOAT64_C( -299.04), SIMDE_FLOAT64_C( 911.25), SIMDE_FLOAT64_C( 697.67), SIMDE_FLOAT64_C( 181.69) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -630.92), SIMDE_FLOAT64_C( -612.42), SIMDE_FLOAT64_C( 645.34), SIMDE_FLOAT64_C( -379.90)), { SIMDE_FLOAT64_C( -379.90), SIMDE_FLOAT64_C( 645.34), SIMDE_FLOAT64_C( -612.42), SIMDE_FLOAT64_C( -630.92) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -339.32), SIMDE_FLOAT64_C( 404.98), SIMDE_FLOAT64_C( -361.76), SIMDE_FLOAT64_C( 391.60)), { SIMDE_FLOAT64_C( 391.60), SIMDE_FLOAT64_C( -361.76), SIMDE_FLOAT64_C( 404.98), SIMDE_FLOAT64_C( -339.32) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 714.72), SIMDE_FLOAT64_C( 997.58), SIMDE_FLOAT64_C( 807.47), SIMDE_FLOAT64_C( 948.80)), { SIMDE_FLOAT64_C( 948.80), SIMDE_FLOAT64_C( 807.47), SIMDE_FLOAT64_C( 997.58), SIMDE_FLOAT64_C( 714.72) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { SIMDE_ALIGN_LIKE_32(simde__m256d) simde_float64 r[4]; simde_mm256_store_pd(r, test_vec[i].a); simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_store_si256(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C( -495387953), INT32_C( 1389422093), INT32_C( 1549613541), INT32_C( 1621396930), INT32_C( 431885981), INT32_C( -495493978), INT32_C( 957980176), INT32_C( -756622382)), simde_mm256_set_epi32(INT32_C( -495387953), INT32_C( 1389422093), INT32_C( 1549613541), INT32_C( 1621396930), INT32_C( 431885981), INT32_C( -495493978), INT32_C( 957980176), INT32_C( -756622382)) }, { simde_mm256_set_epi32(INT32_C(-1537563225), INT32_C( -111752931), INT32_C( 2068479386), INT32_C( 2004807479), INT32_C( 2110162316), INT32_C( 1854708769), INT32_C( -83811592), INT32_C( -892599604)), simde_mm256_set_epi32(INT32_C(-1537563225), INT32_C( -111752931), INT32_C( 2068479386), INT32_C( 2004807479), INT32_C( 2110162316), INT32_C( 1854708769), INT32_C( -83811592), INT32_C( -892599604)) }, { simde_mm256_set_epi32(INT32_C( 1682126570), INT32_C( 340006289), INT32_C( 149083722), INT32_C( 1734045351), INT32_C(-1572830580), INT32_C( 1824421914), INT32_C( 1423738129), INT32_C( -492096848)), simde_mm256_set_epi32(INT32_C( 1682126570), INT32_C( 340006289), INT32_C( 149083722), INT32_C( 1734045351), INT32_C(-1572830580), INT32_C( 1824421914), INT32_C( 1423738129), INT32_C( -492096848)) }, { simde_mm256_set_epi32(INT32_C(-2079347795), INT32_C( -336455233), INT32_C( -761216501), INT32_C( 1784601451), INT32_C( 386854500), INT32_C( 177253765), INT32_C( 797476379), INT32_C( -848434210)), simde_mm256_set_epi32(INT32_C(-2079347795), INT32_C( -336455233), INT32_C( -761216501), INT32_C( 1784601451), INT32_C( 386854500), INT32_C( 177253765), INT32_C( 797476379), INT32_C( -848434210)) }, { simde_mm256_set_epi32(INT32_C(-1024397654), INT32_C( 273228891), INT32_C( -327343542), INT32_C( 941152347), INT32_C(-1943801853), INT32_C( -571606335), INT32_C(-1600503753), INT32_C( 3657489)), simde_mm256_set_epi32(INT32_C(-1024397654), INT32_C( 273228891), INT32_C( -327343542), INT32_C( 941152347), INT32_C(-1943801853), INT32_C( -571606335), INT32_C(-1600503753), INT32_C( 3657489)) }, { simde_mm256_set_epi32(INT32_C( 392666418), INT32_C(-1141495552), INT32_C(-1265226558), INT32_C( -341769681), INT32_C( 784056885), INT32_C( -8791049), INT32_C( -255179514), INT32_C( 1505512077)), simde_mm256_set_epi32(INT32_C( 392666418), INT32_C(-1141495552), INT32_C(-1265226558), INT32_C( -341769681), INT32_C( 784056885), INT32_C( -8791049), INT32_C( -255179514), INT32_C( 1505512077)) }, { simde_mm256_set_epi32(INT32_C(-2051731929), INT32_C( 1367430487), INT32_C( -142986568), INT32_C( 1381573702), INT32_C( -856833247), INT32_C( 665026143), INT32_C( 1196790497), INT32_C( -507260176)), simde_mm256_set_epi32(INT32_C(-2051731929), INT32_C( 1367430487), INT32_C( -142986568), INT32_C( 1381573702), INT32_C( -856833247), INT32_C( 665026143), INT32_C( 1196790497), INT32_C( -507260176)) }, { simde_mm256_set_epi32(INT32_C( -878005657), INT32_C( -546637276), INT32_C(-1056155816), INT32_C( 259320365), INT32_C(-1537222115), INT32_C( 895058753), INT32_C( -970999316), INT32_C( 224281618)), simde_mm256_set_epi32(INT32_C( -878005657), INT32_C( -546637276), INT32_C(-1056155816), INT32_C( 259320365), INT32_C(-1537222115), INT32_C( 895058753), INT32_C( -970999316), INT32_C( 224281618)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r; simde_mm256_store_si256(&r, test_vec[i].a); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_storeu_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde_float32 r[8]; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 389.47), SIMDE_FLOAT32_C( -23.53), SIMDE_FLOAT32_C( 971.41), SIMDE_FLOAT32_C( 968.93), SIMDE_FLOAT32_C( 388.52), SIMDE_FLOAT32_C( 400.32), SIMDE_FLOAT32_C( -988.67), SIMDE_FLOAT32_C( 867.57)), { SIMDE_FLOAT32_C( 867.57), SIMDE_FLOAT32_C( -988.67), SIMDE_FLOAT32_C( 400.32), SIMDE_FLOAT32_C( 388.52), SIMDE_FLOAT32_C( 968.93), SIMDE_FLOAT32_C( 971.41), SIMDE_FLOAT32_C( -23.53), SIMDE_FLOAT32_C( 389.47) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -904.63), SIMDE_FLOAT32_C( 373.49), SIMDE_FLOAT32_C( -535.56), SIMDE_FLOAT32_C( 674.97), SIMDE_FLOAT32_C( -321.09), SIMDE_FLOAT32_C( -666.72), SIMDE_FLOAT32_C( 574.65), SIMDE_FLOAT32_C( -486.68)), { SIMDE_FLOAT32_C( -486.68), SIMDE_FLOAT32_C( 574.65), SIMDE_FLOAT32_C( -666.72), SIMDE_FLOAT32_C( -321.09), SIMDE_FLOAT32_C( 674.97), SIMDE_FLOAT32_C( -535.56), SIMDE_FLOAT32_C( 373.49), SIMDE_FLOAT32_C( -904.63) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 716.68), SIMDE_FLOAT32_C( -902.50), SIMDE_FLOAT32_C( 689.19), SIMDE_FLOAT32_C( 562.96), SIMDE_FLOAT32_C( 218.97), SIMDE_FLOAT32_C( -717.02), SIMDE_FLOAT32_C( 50.08), SIMDE_FLOAT32_C( 473.03)), { SIMDE_FLOAT32_C( 473.03), SIMDE_FLOAT32_C( 50.08), SIMDE_FLOAT32_C( -717.02), SIMDE_FLOAT32_C( 218.97), SIMDE_FLOAT32_C( 562.96), SIMDE_FLOAT32_C( 689.19), SIMDE_FLOAT32_C( -902.50), SIMDE_FLOAT32_C( 716.68) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -748.65), SIMDE_FLOAT32_C( -546.25), SIMDE_FLOAT32_C( -755.41), SIMDE_FLOAT32_C( 720.41), SIMDE_FLOAT32_C( 469.62), SIMDE_FLOAT32_C( -529.37), SIMDE_FLOAT32_C( 68.16), SIMDE_FLOAT32_C( -880.26)), { SIMDE_FLOAT32_C( -880.26), SIMDE_FLOAT32_C( 68.16), SIMDE_FLOAT32_C( -529.37), SIMDE_FLOAT32_C( 469.62), SIMDE_FLOAT32_C( 720.41), SIMDE_FLOAT32_C( -755.41), SIMDE_FLOAT32_C( -546.25), SIMDE_FLOAT32_C( -748.65) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -639.31), SIMDE_FLOAT32_C( 210.80), SIMDE_FLOAT32_C( -481.50), SIMDE_FLOAT32_C( -773.33), SIMDE_FLOAT32_C( 766.24), SIMDE_FLOAT32_C( 722.05), SIMDE_FLOAT32_C( 569.99), SIMDE_FLOAT32_C( 78.20)), { SIMDE_FLOAT32_C( 78.20), SIMDE_FLOAT32_C( 569.99), SIMDE_FLOAT32_C( 722.05), SIMDE_FLOAT32_C( 766.24), SIMDE_FLOAT32_C( -773.33), SIMDE_FLOAT32_C( -481.50), SIMDE_FLOAT32_C( 210.80), SIMDE_FLOAT32_C( -639.31) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 219.96), SIMDE_FLOAT32_C( -967.95), SIMDE_FLOAT32_C( -966.73), SIMDE_FLOAT32_C( -375.54), SIMDE_FLOAT32_C( -872.08), SIMDE_FLOAT32_C( -211.68), SIMDE_FLOAT32_C( 797.84), SIMDE_FLOAT32_C( 819.26)), { SIMDE_FLOAT32_C( 819.26), SIMDE_FLOAT32_C( 797.84), SIMDE_FLOAT32_C( -211.68), SIMDE_FLOAT32_C( -872.08), SIMDE_FLOAT32_C( -375.54), SIMDE_FLOAT32_C( -966.73), SIMDE_FLOAT32_C( -967.95), SIMDE_FLOAT32_C( 219.96) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 601.98), SIMDE_FLOAT32_C( -827.42), SIMDE_FLOAT32_C( 747.06), SIMDE_FLOAT32_C( -914.70), SIMDE_FLOAT32_C( 111.41), SIMDE_FLOAT32_C( 873.12), SIMDE_FLOAT32_C( 763.60), SIMDE_FLOAT32_C( 161.15)), { SIMDE_FLOAT32_C( 161.15), SIMDE_FLOAT32_C( 763.60), SIMDE_FLOAT32_C( 873.12), SIMDE_FLOAT32_C( 111.41), SIMDE_FLOAT32_C( -914.70), SIMDE_FLOAT32_C( 747.06), SIMDE_FLOAT32_C( -827.42), SIMDE_FLOAT32_C( 601.98) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 800.45), SIMDE_FLOAT32_C( 526.22), SIMDE_FLOAT32_C( 670.34), SIMDE_FLOAT32_C( 433.87), SIMDE_FLOAT32_C( 651.08), SIMDE_FLOAT32_C( -532.01), SIMDE_FLOAT32_C( -619.94), SIMDE_FLOAT32_C( 88.39)), { SIMDE_FLOAT32_C( 88.39), SIMDE_FLOAT32_C( -619.94), SIMDE_FLOAT32_C( -532.01), SIMDE_FLOAT32_C( 651.08), SIMDE_FLOAT32_C( 433.87), SIMDE_FLOAT32_C( 670.34), SIMDE_FLOAT32_C( 526.22), SIMDE_FLOAT32_C( 800.45) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float32 r[8]; simde_mm256_storeu_ps(r, test_vec[i].a); simde_assert_equal_vf32(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_storeu_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde_float64 r[4]; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -416.91), SIMDE_FLOAT64_C( -266.91), SIMDE_FLOAT64_C( 400.77), SIMDE_FLOAT64_C( 614.06)), { SIMDE_FLOAT64_C( 614.06), SIMDE_FLOAT64_C( 400.77), SIMDE_FLOAT64_C( -266.91), SIMDE_FLOAT64_C( -416.91) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -82.84), SIMDE_FLOAT64_C( 375.26), SIMDE_FLOAT64_C( -316.62), SIMDE_FLOAT64_C( 608.04)), { SIMDE_FLOAT64_C( 608.04), SIMDE_FLOAT64_C( -316.62), SIMDE_FLOAT64_C( 375.26), SIMDE_FLOAT64_C( -82.84) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 218.37), SIMDE_FLOAT64_C( -388.98), SIMDE_FLOAT64_C( -164.15), SIMDE_FLOAT64_C( 66.39)), { SIMDE_FLOAT64_C( 66.39), SIMDE_FLOAT64_C( -164.15), SIMDE_FLOAT64_C( -388.98), SIMDE_FLOAT64_C( 218.37) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 414.66), SIMDE_FLOAT64_C( 373.86), SIMDE_FLOAT64_C( 387.85), SIMDE_FLOAT64_C( 140.99)), { SIMDE_FLOAT64_C( 140.99), SIMDE_FLOAT64_C( 387.85), SIMDE_FLOAT64_C( 373.86), SIMDE_FLOAT64_C( 414.66) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 513.43), SIMDE_FLOAT64_C( -671.43), SIMDE_FLOAT64_C( -383.59), SIMDE_FLOAT64_C( 476.58)), { SIMDE_FLOAT64_C( 476.58), SIMDE_FLOAT64_C( -383.59), SIMDE_FLOAT64_C( -671.43), SIMDE_FLOAT64_C( 513.43) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -620.74), SIMDE_FLOAT64_C( 593.91), SIMDE_FLOAT64_C( 595.45), SIMDE_FLOAT64_C( 326.69)), { SIMDE_FLOAT64_C( 326.69), SIMDE_FLOAT64_C( 595.45), SIMDE_FLOAT64_C( 593.91), SIMDE_FLOAT64_C( -620.74) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 933.41), SIMDE_FLOAT64_C( -583.63), SIMDE_FLOAT64_C( 451.59), SIMDE_FLOAT64_C( 17.38)), { SIMDE_FLOAT64_C( 17.38), SIMDE_FLOAT64_C( 451.59), SIMDE_FLOAT64_C( -583.63), SIMDE_FLOAT64_C( 933.41) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -951.90), SIMDE_FLOAT64_C( -96.08), SIMDE_FLOAT64_C( -475.76), SIMDE_FLOAT64_C( -920.22)), { SIMDE_FLOAT64_C( -920.22), SIMDE_FLOAT64_C( -475.76), SIMDE_FLOAT64_C( -96.08), SIMDE_FLOAT64_C( -951.90) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float64 r[4]; simde_mm256_storeu_pd(r, test_vec[i].a); simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_storeu_si256(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C(-1690076372), INT32_C( 273159718), INT32_C( 661600261), INT32_C( -431509063), INT32_C(-1410315245), INT32_C( 938478074), INT32_C( -325173074), INT32_C( -955068873)), simde_mm256_set_epi32(INT32_C(-1690076372), INT32_C( 273159718), INT32_C( 661600261), INT32_C( -431509063), INT32_C(-1410315245), INT32_C( 938478074), INT32_C( -325173074), INT32_C( -955068873)) }, { simde_mm256_set_epi32(INT32_C( 1349804757), INT32_C( -716302605), INT32_C( -407964159), INT32_C( 1438548072), INT32_C(-1765250498), INT32_C(-1398568590), INT32_C( -166142303), INT32_C(-1616708423)), simde_mm256_set_epi32(INT32_C( 1349804757), INT32_C( -716302605), INT32_C( -407964159), INT32_C( 1438548072), INT32_C(-1765250498), INT32_C(-1398568590), INT32_C( -166142303), INT32_C(-1616708423)) }, { simde_mm256_set_epi32(INT32_C( 1299583779), INT32_C( 1018639695), INT32_C( 378838390), INT32_C( 823542835), INT32_C( 107040332), INT32_C( 1160607838), INT32_C(-1073772085), INT32_C( 550633253)), simde_mm256_set_epi32(INT32_C( 1299583779), INT32_C( 1018639695), INT32_C( 378838390), INT32_C( 823542835), INT32_C( 107040332), INT32_C( 1160607838), INT32_C(-1073772085), INT32_C( 550633253)) }, { simde_mm256_set_epi32(INT32_C(-2020164947), INT32_C( 1097566074), INT32_C( 838709016), INT32_C(-1310900036), INT32_C( 205435207), INT32_C(-1570208834), INT32_C( -866634640), INT32_C(-1469292111)), simde_mm256_set_epi32(INT32_C(-2020164947), INT32_C( 1097566074), INT32_C( 838709016), INT32_C(-1310900036), INT32_C( 205435207), INT32_C(-1570208834), INT32_C( -866634640), INT32_C(-1469292111)) }, { simde_mm256_set_epi32(INT32_C( 1996197805), INT32_C(-1742000211), INT32_C( -252812234), INT32_C( 126353816), INT32_C(-1209789142), INT32_C(-1818044476), INT32_C(-1843734346), INT32_C( 1208980142)), simde_mm256_set_epi32(INT32_C( 1996197805), INT32_C(-1742000211), INT32_C( -252812234), INT32_C( 126353816), INT32_C(-1209789142), INT32_C(-1818044476), INT32_C(-1843734346), INT32_C( 1208980142)) }, { simde_mm256_set_epi32(INT32_C( 264233747), INT32_C( 65047959), INT32_C(-2078914587), INT32_C( -76956293), INT32_C(-1404669906), INT32_C(-1580100987), INT32_C(-1585720935), INT32_C( -962400296)), simde_mm256_set_epi32(INT32_C( 264233747), INT32_C( 65047959), INT32_C(-2078914587), INT32_C( -76956293), INT32_C(-1404669906), INT32_C(-1580100987), INT32_C(-1585720935), INT32_C( -962400296)) }, { simde_mm256_set_epi32(INT32_C(-2048043478), INT32_C( -746745729), INT32_C( 515423412), INT32_C(-1288873149), INT32_C( -50167373), INT32_C( 751152155), INT32_C( 63560258), INT32_C( 839041982)), simde_mm256_set_epi32(INT32_C(-2048043478), INT32_C( -746745729), INT32_C( 515423412), INT32_C(-1288873149), INT32_C( -50167373), INT32_C( 751152155), INT32_C( 63560258), INT32_C( 839041982)) }, { simde_mm256_set_epi32(INT32_C(-1066512345), INT32_C(-1608586608), INT32_C( 2001494396), INT32_C( 667109925), INT32_C( 134968304), INT32_C( 154885492), INT32_C( 204100449), INT32_C(-2103566856)), simde_mm256_set_epi32(INT32_C(-1066512345), INT32_C(-1608586608), INT32_C( 2001494396), INT32_C( 667109925), INT32_C( 134968304), INT32_C( 154885492), INT32_C( 204100449), INT32_C(-2103566856)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r; simde_mm256_storeu_si256(&r, test_vec[i].a); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_storeu2_m128d(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde_float64 lo[2]; simde_float64 hi[2]; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -194.98), SIMDE_FLOAT64_C( 916.70), SIMDE_FLOAT64_C( 887.89), SIMDE_FLOAT64_C( -369.82)), { SIMDE_FLOAT64_C( -369.82), SIMDE_FLOAT64_C( 887.89) }, { SIMDE_FLOAT64_C( 916.70), SIMDE_FLOAT64_C( -194.98) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -641.49), SIMDE_FLOAT64_C( -881.69), SIMDE_FLOAT64_C( 627.53), SIMDE_FLOAT64_C( 575.62)), { SIMDE_FLOAT64_C( 575.62), SIMDE_FLOAT64_C( 627.53) }, { SIMDE_FLOAT64_C( -881.69), SIMDE_FLOAT64_C( -641.49) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 518.30), SIMDE_FLOAT64_C( -433.95), SIMDE_FLOAT64_C( -285.84), SIMDE_FLOAT64_C( -221.03)), { SIMDE_FLOAT64_C( -221.03), SIMDE_FLOAT64_C( -285.84) }, { SIMDE_FLOAT64_C( -433.95), SIMDE_FLOAT64_C( 518.30) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -662.71), SIMDE_FLOAT64_C( 122.75), SIMDE_FLOAT64_C( -172.81), SIMDE_FLOAT64_C( 852.48)), { SIMDE_FLOAT64_C( 852.48), SIMDE_FLOAT64_C( -172.81) }, { SIMDE_FLOAT64_C( 122.75), SIMDE_FLOAT64_C( -662.71) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 223.39), SIMDE_FLOAT64_C( 81.02), SIMDE_FLOAT64_C( -389.17), SIMDE_FLOAT64_C( -925.92)), { SIMDE_FLOAT64_C( -925.92), SIMDE_FLOAT64_C( -389.17) }, { SIMDE_FLOAT64_C( 81.02), SIMDE_FLOAT64_C( 223.39) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -179.05), SIMDE_FLOAT64_C( -523.56), SIMDE_FLOAT64_C( -888.14), SIMDE_FLOAT64_C( -890.71)), { SIMDE_FLOAT64_C( -890.71), SIMDE_FLOAT64_C( -888.14) }, { SIMDE_FLOAT64_C( -523.56), SIMDE_FLOAT64_C( -179.05) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 495.30), SIMDE_FLOAT64_C( 776.16), SIMDE_FLOAT64_C( 945.09), SIMDE_FLOAT64_C( -993.27)), { SIMDE_FLOAT64_C( -993.27), SIMDE_FLOAT64_C( 945.09) }, { SIMDE_FLOAT64_C( 776.16), SIMDE_FLOAT64_C( 495.30) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -570.57), SIMDE_FLOAT64_C( -54.05), SIMDE_FLOAT64_C( 18.98), SIMDE_FLOAT64_C( 273.66)), { SIMDE_FLOAT64_C( 273.66), SIMDE_FLOAT64_C( 18.98) }, { SIMDE_FLOAT64_C( -54.05), SIMDE_FLOAT64_C( -570.57) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float64 lo[2]; simde_float64 hi[2]; simde_mm256_storeu2_m128d(hi, lo, test_vec[i].a); simde_assert_equal_vf64(sizeof(lo) / sizeof(lo[0]), lo, test_vec[i].lo, 1); simde_assert_equal_vf64(sizeof(hi) / sizeof(hi[0]), hi, test_vec[i].hi, 1); } return 0; } static int test_simde_mm256_storeu2_m128(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde_float32 lo[4]; simde_float32 hi[4]; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 443.79), SIMDE_FLOAT32_C( -943.66), SIMDE_FLOAT32_C( -617.03), SIMDE_FLOAT32_C( 623.90), SIMDE_FLOAT32_C( 762.13), SIMDE_FLOAT32_C( -191.81), SIMDE_FLOAT32_C( -640.60), SIMDE_FLOAT32_C( -234.64)), { SIMDE_FLOAT32_C( -234.64), SIMDE_FLOAT32_C( -640.60), SIMDE_FLOAT32_C( -191.81), SIMDE_FLOAT32_C( 762.13) }, { SIMDE_FLOAT32_C( 623.90), SIMDE_FLOAT32_C( -617.03), SIMDE_FLOAT32_C( -943.66), SIMDE_FLOAT32_C( 443.79) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 182.89), SIMDE_FLOAT32_C( 873.05), SIMDE_FLOAT32_C( -272.49), SIMDE_FLOAT32_C( 736.89), SIMDE_FLOAT32_C( 847.09), SIMDE_FLOAT32_C( -558.86), SIMDE_FLOAT32_C( 408.24), SIMDE_FLOAT32_C( 859.69)), { SIMDE_FLOAT32_C( 859.69), SIMDE_FLOAT32_C( 408.24), SIMDE_FLOAT32_C( -558.86), SIMDE_FLOAT32_C( 847.09) }, { SIMDE_FLOAT32_C( 736.89), SIMDE_FLOAT32_C( -272.49), SIMDE_FLOAT32_C( 873.05), SIMDE_FLOAT32_C( 182.89) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -136.90), SIMDE_FLOAT32_C( 907.77), SIMDE_FLOAT32_C( 406.39), SIMDE_FLOAT32_C( -278.95), SIMDE_FLOAT32_C( -644.43), SIMDE_FLOAT32_C( -382.39), SIMDE_FLOAT32_C( -628.10), SIMDE_FLOAT32_C( 477.81)), { SIMDE_FLOAT32_C( 477.81), SIMDE_FLOAT32_C( -628.10), SIMDE_FLOAT32_C( -382.39), SIMDE_FLOAT32_C( -644.43) }, { SIMDE_FLOAT32_C( -278.95), SIMDE_FLOAT32_C( 406.39), SIMDE_FLOAT32_C( 907.77), SIMDE_FLOAT32_C( -136.90) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -403.18), SIMDE_FLOAT32_C( -297.11), SIMDE_FLOAT32_C( -839.59), SIMDE_FLOAT32_C( 925.44), SIMDE_FLOAT32_C( -4.12), SIMDE_FLOAT32_C( 223.77), SIMDE_FLOAT32_C( 124.27), SIMDE_FLOAT32_C( -589.05)), { SIMDE_FLOAT32_C( -589.05), SIMDE_FLOAT32_C( 124.27), SIMDE_FLOAT32_C( 223.77), SIMDE_FLOAT32_C( -4.12) }, { SIMDE_FLOAT32_C( 925.44), SIMDE_FLOAT32_C( -839.59), SIMDE_FLOAT32_C( -297.11), SIMDE_FLOAT32_C( -403.18) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 375.51), SIMDE_FLOAT32_C( 712.93), SIMDE_FLOAT32_C( 259.42), SIMDE_FLOAT32_C( -11.11), SIMDE_FLOAT32_C( -941.70), SIMDE_FLOAT32_C( 747.97), SIMDE_FLOAT32_C( -604.59), SIMDE_FLOAT32_C( 959.68)), { SIMDE_FLOAT32_C( 959.68), SIMDE_FLOAT32_C( -604.59), SIMDE_FLOAT32_C( 747.97), SIMDE_FLOAT32_C( -941.70) }, { SIMDE_FLOAT32_C( -11.11), SIMDE_FLOAT32_C( 259.42), SIMDE_FLOAT32_C( 712.93), SIMDE_FLOAT32_C( 375.51) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 836.60), SIMDE_FLOAT32_C( 294.15), SIMDE_FLOAT32_C( -609.69), SIMDE_FLOAT32_C( -302.95), SIMDE_FLOAT32_C( 53.57), SIMDE_FLOAT32_C( -19.22), SIMDE_FLOAT32_C( -957.62), SIMDE_FLOAT32_C( 661.17)), { SIMDE_FLOAT32_C( 661.17), SIMDE_FLOAT32_C( -957.62), SIMDE_FLOAT32_C( -19.22), SIMDE_FLOAT32_C( 53.57) }, { SIMDE_FLOAT32_C( -302.95), SIMDE_FLOAT32_C( -609.69), SIMDE_FLOAT32_C( 294.15), SIMDE_FLOAT32_C( 836.60) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -436.80), SIMDE_FLOAT32_C( -424.39), SIMDE_FLOAT32_C( 570.24), SIMDE_FLOAT32_C( -821.65), SIMDE_FLOAT32_C( -881.33), SIMDE_FLOAT32_C( -593.81), SIMDE_FLOAT32_C( -486.59), SIMDE_FLOAT32_C( 962.05)), { SIMDE_FLOAT32_C( 962.05), SIMDE_FLOAT32_C( -486.59), SIMDE_FLOAT32_C( -593.81), SIMDE_FLOAT32_C( -881.33) }, { SIMDE_FLOAT32_C( -821.65), SIMDE_FLOAT32_C( 570.24), SIMDE_FLOAT32_C( -424.39), SIMDE_FLOAT32_C( -436.80) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -876.51), SIMDE_FLOAT32_C( 339.14), SIMDE_FLOAT32_C( 805.13), SIMDE_FLOAT32_C( -98.57), SIMDE_FLOAT32_C( 486.75), SIMDE_FLOAT32_C( -826.76), SIMDE_FLOAT32_C( -802.86), SIMDE_FLOAT32_C( 864.61)), { SIMDE_FLOAT32_C( 864.61), SIMDE_FLOAT32_C( -802.86), SIMDE_FLOAT32_C( -826.76), SIMDE_FLOAT32_C( 486.75) }, { SIMDE_FLOAT32_C( -98.57), SIMDE_FLOAT32_C( 805.13), SIMDE_FLOAT32_C( 339.14), SIMDE_FLOAT32_C( -876.51) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float32 lo[4]; simde_float32 hi[4]; simde_mm256_storeu2_m128(hi, lo, test_vec[i].a); simde_assert_equal_vf32(sizeof(lo) / sizeof(lo[0]), lo, test_vec[i].lo, 1); simde_assert_equal_vf32(sizeof(hi) / sizeof(hi[0]), hi, test_vec[i].hi, 1); } return 0; } static int test_simde_mm256_storeu2_m128i(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m128i lo; simde__m128i hi; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C( 393618896), INT32_C(-2001591323), INT32_C( 571243540), INT32_C( -991131551), INT32_C(-1628428871), INT32_C(-1785170070), INT32_C(-1344585991), INT32_C( 394549136)), simde_mm_set_epi32(INT32_C(-1628428871), INT32_C(-1785170070), INT32_C(-1344585991), INT32_C( 394549136)), simde_mm_set_epi32(INT32_C( 393618896), INT32_C(-2001591323), INT32_C( 571243540), INT32_C( -991131551)) }, { simde_mm256_set_epi32(INT32_C(-1934140687), INT32_C(-1810421284), INT32_C( 1425686798), INT32_C( -866335309), INT32_C( -95249521), INT32_C( 1994067869), INT32_C( -221558182), INT32_C(-1252244061)), simde_mm_set_epi32(INT32_C( -95249521), INT32_C( 1994067869), INT32_C( -221558182), INT32_C(-1252244061)), simde_mm_set_epi32(INT32_C(-1934140687), INT32_C(-1810421284), INT32_C( 1425686798), INT32_C( -866335309)) }, { simde_mm256_set_epi32(INT32_C( -246697484), INT32_C(-1956819884), INT32_C(-1699719942), INT32_C( -979363348), INT32_C( -887855577), INT32_C(-1489598373), INT32_C( -35728240), INT32_C(-1964323946)), simde_mm_set_epi32(INT32_C( -887855577), INT32_C(-1489598373), INT32_C( -35728240), INT32_C(-1964323946)), simde_mm_set_epi32(INT32_C( -246697484), INT32_C(-1956819884), INT32_C(-1699719942), INT32_C( -979363348)) }, { simde_mm256_set_epi32(INT32_C( 1386381884), INT32_C( 37784267), INT32_C(-1684378143), INT32_C( -594239665), INT32_C( 1400213774), INT32_C( -622529784), INT32_C( 401959227), INT32_C( 971273192)), simde_mm_set_epi32(INT32_C( 1400213774), INT32_C( -622529784), INT32_C( 401959227), INT32_C( 971273192)), simde_mm_set_epi32(INT32_C( 1386381884), INT32_C( 37784267), INT32_C(-1684378143), INT32_C( -594239665)) }, { simde_mm256_set_epi32(INT32_C(-1612998305), INT32_C( 1288430921), INT32_C(-1609225142), INT32_C( -537331182), INT32_C(-1310393347), INT32_C(-1547450104), INT32_C( 1731121387), INT32_C( 1158176208)), simde_mm_set_epi32(INT32_C(-1310393347), INT32_C(-1547450104), INT32_C( 1731121387), INT32_C( 1158176208)), simde_mm_set_epi32(INT32_C(-1612998305), INT32_C( 1288430921), INT32_C(-1609225142), INT32_C( -537331182)) }, { simde_mm256_set_epi32(INT32_C( 853720407), INT32_C( 1963572434), INT32_C(-1461958617), INT32_C( -948574), INT32_C( 440127588), INT32_C( -575649939), INT32_C( -647922801), INT32_C( 1292269475)), simde_mm_set_epi32(INT32_C( 440127588), INT32_C( -575649939), INT32_C( -647922801), INT32_C( 1292269475)), simde_mm_set_epi32(INT32_C( 853720407), INT32_C( 1963572434), INT32_C(-1461958617), INT32_C( -948574)) }, { simde_mm256_set_epi32(INT32_C( 1035304658), INT32_C(-1477195307), INT32_C( 1255581892), INT32_C( -187826179), INT32_C( -553614821), INT32_C(-1529210253), INT32_C( 524369675), INT32_C(-1409815299)), simde_mm_set_epi32(INT32_C( -553614821), INT32_C(-1529210253), INT32_C( 524369675), INT32_C(-1409815299)), simde_mm_set_epi32(INT32_C( 1035304658), INT32_C(-1477195307), INT32_C( 1255581892), INT32_C( -187826179)) }, { simde_mm256_set_epi32(INT32_C(-1147733206), INT32_C(-1982126005), INT32_C( 1233979928), INT32_C(-1413154458), INT32_C( 36276938), INT32_C( -66222229), INT32_C(-1746900300), INT32_C( 533326126)), simde_mm_set_epi32(INT32_C( 36276938), INT32_C( -66222229), INT32_C(-1746900300), INT32_C( 533326126)), simde_mm_set_epi32(INT32_C(-1147733206), INT32_C(-1982126005), INT32_C( 1233979928), INT32_C(-1413154458)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i lo, hi; simde_mm256_storeu2_m128i(&hi, &lo, test_vec[i].a); simde_test_x86_assert_equal_i32x4(lo, test_vec[i].lo); simde_test_x86_assert_equal_i32x4(hi, test_vec[i].hi); } return 0; } static int test_simde_mm256_stream_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; SIMDE_ALIGN_LIKE_32(simde__m256) simde_float32 r[8]; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 256.09), SIMDE_FLOAT32_C( 768.79), SIMDE_FLOAT32_C( 201.90), SIMDE_FLOAT32_C( 339.33), SIMDE_FLOAT32_C( 957.46), SIMDE_FLOAT32_C( 728.44), SIMDE_FLOAT32_C( 73.67), SIMDE_FLOAT32_C( 440.11)), { SIMDE_FLOAT32_C( 440.11), SIMDE_FLOAT32_C( 73.67), SIMDE_FLOAT32_C( 728.44), SIMDE_FLOAT32_C( 957.46), SIMDE_FLOAT32_C( 339.33), SIMDE_FLOAT32_C( 201.90), SIMDE_FLOAT32_C( 768.79), SIMDE_FLOAT32_C( 256.09) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -72.04), SIMDE_FLOAT32_C( -425.25), SIMDE_FLOAT32_C( 471.77), SIMDE_FLOAT32_C( 976.75), SIMDE_FLOAT32_C( -510.20), SIMDE_FLOAT32_C( 696.54), SIMDE_FLOAT32_C( -843.54), SIMDE_FLOAT32_C( -868.41)), { SIMDE_FLOAT32_C( -868.41), SIMDE_FLOAT32_C( -843.54), SIMDE_FLOAT32_C( 696.54), SIMDE_FLOAT32_C( -510.20), SIMDE_FLOAT32_C( 976.75), SIMDE_FLOAT32_C( 471.77), SIMDE_FLOAT32_C( -425.25), SIMDE_FLOAT32_C( -72.04) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -304.23), SIMDE_FLOAT32_C( 477.48), SIMDE_FLOAT32_C( 356.58), SIMDE_FLOAT32_C( 955.81), SIMDE_FLOAT32_C( 999.99), SIMDE_FLOAT32_C( 487.33), SIMDE_FLOAT32_C( 633.61), SIMDE_FLOAT32_C( 518.11)), { SIMDE_FLOAT32_C( 518.11), SIMDE_FLOAT32_C( 633.61), SIMDE_FLOAT32_C( 487.33), SIMDE_FLOAT32_C( 999.99), SIMDE_FLOAT32_C( 955.81), SIMDE_FLOAT32_C( 356.58), SIMDE_FLOAT32_C( 477.48), SIMDE_FLOAT32_C( -304.23) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 88.04), SIMDE_FLOAT32_C( -296.25), SIMDE_FLOAT32_C( 948.33), SIMDE_FLOAT32_C( -584.17), SIMDE_FLOAT32_C( -796.11), SIMDE_FLOAT32_C( -38.84), SIMDE_FLOAT32_C( -706.11), SIMDE_FLOAT32_C( 347.32)), { SIMDE_FLOAT32_C( 347.32), SIMDE_FLOAT32_C( -706.11), SIMDE_FLOAT32_C( -38.84), SIMDE_FLOAT32_C( -796.11), SIMDE_FLOAT32_C( -584.17), SIMDE_FLOAT32_C( 948.33), SIMDE_FLOAT32_C( -296.25), SIMDE_FLOAT32_C( 88.04) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -143.76), SIMDE_FLOAT32_C( 504.36), SIMDE_FLOAT32_C( -154.12), SIMDE_FLOAT32_C( 375.43), SIMDE_FLOAT32_C( -307.18), SIMDE_FLOAT32_C( 256.93), SIMDE_FLOAT32_C( 78.86), SIMDE_FLOAT32_C( 526.77)), { SIMDE_FLOAT32_C( 526.77), SIMDE_FLOAT32_C( 78.86), SIMDE_FLOAT32_C( 256.93), SIMDE_FLOAT32_C( -307.18), SIMDE_FLOAT32_C( 375.43), SIMDE_FLOAT32_C( -154.12), SIMDE_FLOAT32_C( 504.36), SIMDE_FLOAT32_C( -143.76) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 597.82), SIMDE_FLOAT32_C( 697.68), SIMDE_FLOAT32_C( -285.13), SIMDE_FLOAT32_C( 354.51), SIMDE_FLOAT32_C( 41.32), SIMDE_FLOAT32_C( -445.78), SIMDE_FLOAT32_C( -483.17), SIMDE_FLOAT32_C( -727.28)), { SIMDE_FLOAT32_C( -727.28), SIMDE_FLOAT32_C( -483.17), SIMDE_FLOAT32_C( -445.78), SIMDE_FLOAT32_C( 41.32), SIMDE_FLOAT32_C( 354.51), SIMDE_FLOAT32_C( -285.13), SIMDE_FLOAT32_C( 697.68), SIMDE_FLOAT32_C( 597.82) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -441.93), SIMDE_FLOAT32_C( -382.04), SIMDE_FLOAT32_C( 392.67), SIMDE_FLOAT32_C( 690.10), SIMDE_FLOAT32_C( -30.26), SIMDE_FLOAT32_C( -968.87), SIMDE_FLOAT32_C( -246.46), SIMDE_FLOAT32_C( 625.73)), { SIMDE_FLOAT32_C( 625.73), SIMDE_FLOAT32_C( -246.46), SIMDE_FLOAT32_C( -968.87), SIMDE_FLOAT32_C( -30.26), SIMDE_FLOAT32_C( 690.10), SIMDE_FLOAT32_C( 392.67), SIMDE_FLOAT32_C( -382.04), SIMDE_FLOAT32_C( -441.93) } }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 442.97), SIMDE_FLOAT32_C( -20.37), SIMDE_FLOAT32_C( 758.01), SIMDE_FLOAT32_C( -507.47), SIMDE_FLOAT32_C( 529.68), SIMDE_FLOAT32_C( -598.71), SIMDE_FLOAT32_C( 952.31), SIMDE_FLOAT32_C( -565.36)), { SIMDE_FLOAT32_C( -565.36), SIMDE_FLOAT32_C( 952.31), SIMDE_FLOAT32_C( -598.71), SIMDE_FLOAT32_C( 529.68), SIMDE_FLOAT32_C( -507.47), SIMDE_FLOAT32_C( 758.01), SIMDE_FLOAT32_C( -20.37), SIMDE_FLOAT32_C( 442.97) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { SIMDE_ALIGN_LIKE_32(simde__m256) simde_float32 r[8]; simde_mm256_stream_ps(r, test_vec[i].a); simde_assert_equal_vf32(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_stream_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; SIMDE_ALIGN_LIKE_32(simde__m256d) simde_float64 r[4]; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 771.84), SIMDE_FLOAT64_C( 578.19), SIMDE_FLOAT64_C( 287.63), SIMDE_FLOAT64_C( 196.16)), { SIMDE_FLOAT64_C( 196.16), SIMDE_FLOAT64_C( 287.63), SIMDE_FLOAT64_C( 578.19), SIMDE_FLOAT64_C( 771.84) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -941.73), SIMDE_FLOAT64_C( -687.45), SIMDE_FLOAT64_C( -865.09), SIMDE_FLOAT64_C( 586.03)), { SIMDE_FLOAT64_C( 586.03), SIMDE_FLOAT64_C( -865.09), SIMDE_FLOAT64_C( -687.45), SIMDE_FLOAT64_C( -941.73) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -342.90), SIMDE_FLOAT64_C( -658.61), SIMDE_FLOAT64_C( 882.05), SIMDE_FLOAT64_C( -607.89)), { SIMDE_FLOAT64_C( -607.89), SIMDE_FLOAT64_C( 882.05), SIMDE_FLOAT64_C( -658.61), SIMDE_FLOAT64_C( -342.90) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -248.91), SIMDE_FLOAT64_C( -473.20), SIMDE_FLOAT64_C( 170.76), SIMDE_FLOAT64_C( -407.02)), { SIMDE_FLOAT64_C( -407.02), SIMDE_FLOAT64_C( 170.76), SIMDE_FLOAT64_C( -473.20), SIMDE_FLOAT64_C( -248.91) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 181.69), SIMDE_FLOAT64_C( 697.67), SIMDE_FLOAT64_C( 911.25), SIMDE_FLOAT64_C( -299.04)), { SIMDE_FLOAT64_C( -299.04), SIMDE_FLOAT64_C( 911.25), SIMDE_FLOAT64_C( 697.67), SIMDE_FLOAT64_C( 181.69) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -630.92), SIMDE_FLOAT64_C( -612.42), SIMDE_FLOAT64_C( 645.34), SIMDE_FLOAT64_C( -379.90)), { SIMDE_FLOAT64_C( -379.90), SIMDE_FLOAT64_C( 645.34), SIMDE_FLOAT64_C( -612.42), SIMDE_FLOAT64_C( -630.92) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -339.32), SIMDE_FLOAT64_C( 404.98), SIMDE_FLOAT64_C( -361.76), SIMDE_FLOAT64_C( 391.60)), { SIMDE_FLOAT64_C( 391.60), SIMDE_FLOAT64_C( -361.76), SIMDE_FLOAT64_C( 404.98), SIMDE_FLOAT64_C( -339.32) } }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 714.72), SIMDE_FLOAT64_C( 997.58), SIMDE_FLOAT64_C( 807.47), SIMDE_FLOAT64_C( 948.80)), { SIMDE_FLOAT64_C( 948.80), SIMDE_FLOAT64_C( 807.47), SIMDE_FLOAT64_C( 997.58), SIMDE_FLOAT64_C( 714.72) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { SIMDE_ALIGN_LIKE_32(simde__m256d) simde_float64 r[4]; simde_mm256_stream_pd(r, test_vec[i].a); simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_stream_si256(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C( -495387953), INT32_C( 1389422093), INT32_C( 1549613541), INT32_C( 1621396930), INT32_C( 431885981), INT32_C( -495493978), INT32_C( 957980176), INT32_C( -756622382)), simde_mm256_set_epi32(INT32_C( -495387953), INT32_C( 1389422093), INT32_C( 1549613541), INT32_C( 1621396930), INT32_C( 431885981), INT32_C( -495493978), INT32_C( 957980176), INT32_C( -756622382)) }, { simde_mm256_set_epi32(INT32_C(-1537563225), INT32_C( -111752931), INT32_C( 2068479386), INT32_C( 2004807479), INT32_C( 2110162316), INT32_C( 1854708769), INT32_C( -83811592), INT32_C( -892599604)), simde_mm256_set_epi32(INT32_C(-1537563225), INT32_C( -111752931), INT32_C( 2068479386), INT32_C( 2004807479), INT32_C( 2110162316), INT32_C( 1854708769), INT32_C( -83811592), INT32_C( -892599604)) }, { simde_mm256_set_epi32(INT32_C( 1682126570), INT32_C( 340006289), INT32_C( 149083722), INT32_C( 1734045351), INT32_C(-1572830580), INT32_C( 1824421914), INT32_C( 1423738129), INT32_C( -492096848)), simde_mm256_set_epi32(INT32_C( 1682126570), INT32_C( 340006289), INT32_C( 149083722), INT32_C( 1734045351), INT32_C(-1572830580), INT32_C( 1824421914), INT32_C( 1423738129), INT32_C( -492096848)) }, { simde_mm256_set_epi32(INT32_C(-2079347795), INT32_C( -336455233), INT32_C( -761216501), INT32_C( 1784601451), INT32_C( 386854500), INT32_C( 177253765), INT32_C( 797476379), INT32_C( -848434210)), simde_mm256_set_epi32(INT32_C(-2079347795), INT32_C( -336455233), INT32_C( -761216501), INT32_C( 1784601451), INT32_C( 386854500), INT32_C( 177253765), INT32_C( 797476379), INT32_C( -848434210)) }, { simde_mm256_set_epi32(INT32_C(-1024397654), INT32_C( 273228891), INT32_C( -327343542), INT32_C( 941152347), INT32_C(-1943801853), INT32_C( -571606335), INT32_C(-1600503753), INT32_C( 3657489)), simde_mm256_set_epi32(INT32_C(-1024397654), INT32_C( 273228891), INT32_C( -327343542), INT32_C( 941152347), INT32_C(-1943801853), INT32_C( -571606335), INT32_C(-1600503753), INT32_C( 3657489)) }, { simde_mm256_set_epi32(INT32_C( 392666418), INT32_C(-1141495552), INT32_C(-1265226558), INT32_C( -341769681), INT32_C( 784056885), INT32_C( -8791049), INT32_C( -255179514), INT32_C( 1505512077)), simde_mm256_set_epi32(INT32_C( 392666418), INT32_C(-1141495552), INT32_C(-1265226558), INT32_C( -341769681), INT32_C( 784056885), INT32_C( -8791049), INT32_C( -255179514), INT32_C( 1505512077)) }, { simde_mm256_set_epi32(INT32_C(-2051731929), INT32_C( 1367430487), INT32_C( -142986568), INT32_C( 1381573702), INT32_C( -856833247), INT32_C( 665026143), INT32_C( 1196790497), INT32_C( -507260176)), simde_mm256_set_epi32(INT32_C(-2051731929), INT32_C( 1367430487), INT32_C( -142986568), INT32_C( 1381573702), INT32_C( -856833247), INT32_C( 665026143), INT32_C( 1196790497), INT32_C( -507260176)) }, { simde_mm256_set_epi32(INT32_C( -878005657), INT32_C( -546637276), INT32_C(-1056155816), INT32_C( 259320365), INT32_C(-1537222115), INT32_C( 895058753), INT32_C( -970999316), INT32_C( 224281618)), simde_mm256_set_epi32(INT32_C( -878005657), INT32_C( -546637276), INT32_C(-1056155816), INT32_C( 259320365), INT32_C(-1537222115), INT32_C( 895058753), INT32_C( -970999316), INT32_C( 224281618)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r; simde_mm256_stream_si256(&r, test_vec[i].a); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_sub_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 b; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 895.54), SIMDE_FLOAT32_C( -418.39), SIMDE_FLOAT32_C( -695.61), SIMDE_FLOAT32_C( -703.30), SIMDE_FLOAT32_C( -607.73), SIMDE_FLOAT32_C( 485.65), SIMDE_FLOAT32_C( 755.18), SIMDE_FLOAT32_C( 41.77)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -396.67), SIMDE_FLOAT32_C( -773.83), SIMDE_FLOAT32_C( -672.24), SIMDE_FLOAT32_C( 931.02), SIMDE_FLOAT32_C( -562.65), SIMDE_FLOAT32_C( -945.51), SIMDE_FLOAT32_C( 938.88), SIMDE_FLOAT32_C( 508.62)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 1292.21), SIMDE_FLOAT32_C( 355.44), SIMDE_FLOAT32_C( -23.37), SIMDE_FLOAT32_C(-1634.32), SIMDE_FLOAT32_C( -45.08), SIMDE_FLOAT32_C( 1431.16), SIMDE_FLOAT32_C( -183.70), SIMDE_FLOAT32_C( -466.85)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -470.99), SIMDE_FLOAT32_C( -584.82), SIMDE_FLOAT32_C( 594.88), SIMDE_FLOAT32_C( -970.99), SIMDE_FLOAT32_C( -61.62), SIMDE_FLOAT32_C( 8.56), SIMDE_FLOAT32_C( -149.02), SIMDE_FLOAT32_C( 992.11)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -290.52), SIMDE_FLOAT32_C( 724.83), SIMDE_FLOAT32_C( -46.66), SIMDE_FLOAT32_C( -959.87), SIMDE_FLOAT32_C( -350.83), SIMDE_FLOAT32_C( -742.59), SIMDE_FLOAT32_C( -154.10), SIMDE_FLOAT32_C( 297.07)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -180.47), SIMDE_FLOAT32_C(-1309.65), SIMDE_FLOAT32_C( 641.54), SIMDE_FLOAT32_C( -11.12), SIMDE_FLOAT32_C( 289.21), SIMDE_FLOAT32_C( 751.15), SIMDE_FLOAT32_C( 5.08), SIMDE_FLOAT32_C( 695.04)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -970.60), SIMDE_FLOAT32_C( -80.01), SIMDE_FLOAT32_C( -52.72), SIMDE_FLOAT32_C( 126.54), SIMDE_FLOAT32_C( -671.14), SIMDE_FLOAT32_C( -767.97), SIMDE_FLOAT32_C( -218.43), SIMDE_FLOAT32_C( 532.20)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 605.24), SIMDE_FLOAT32_C( -39.09), SIMDE_FLOAT32_C( 863.80), SIMDE_FLOAT32_C( -150.76), SIMDE_FLOAT32_C( -836.71), SIMDE_FLOAT32_C( 584.17), SIMDE_FLOAT32_C( 207.01), SIMDE_FLOAT32_C( 321.52)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-1575.84), SIMDE_FLOAT32_C( -40.92), SIMDE_FLOAT32_C( -916.52), SIMDE_FLOAT32_C( 277.30), SIMDE_FLOAT32_C( 165.57), SIMDE_FLOAT32_C(-1352.14), SIMDE_FLOAT32_C( -425.44), SIMDE_FLOAT32_C( 210.68)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 342.27), SIMDE_FLOAT32_C( -662.10), SIMDE_FLOAT32_C( -934.33), SIMDE_FLOAT32_C( -742.68), SIMDE_FLOAT32_C( -973.49), SIMDE_FLOAT32_C( 11.46), SIMDE_FLOAT32_C( 125.25), SIMDE_FLOAT32_C( -964.36)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 604.39), SIMDE_FLOAT32_C( 789.04), SIMDE_FLOAT32_C( -177.72), SIMDE_FLOAT32_C( -86.37), SIMDE_FLOAT32_C( -125.89), SIMDE_FLOAT32_C( 264.15), SIMDE_FLOAT32_C( 690.78), SIMDE_FLOAT32_C( 529.89)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -262.12), SIMDE_FLOAT32_C(-1451.14), SIMDE_FLOAT32_C( -756.61), SIMDE_FLOAT32_C( -656.31), SIMDE_FLOAT32_C( -847.60), SIMDE_FLOAT32_C( -252.69), SIMDE_FLOAT32_C( -565.53), SIMDE_FLOAT32_C(-1494.25)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 25.31), SIMDE_FLOAT32_C( 539.95), SIMDE_FLOAT32_C( 423.13), SIMDE_FLOAT32_C( 781.76), SIMDE_FLOAT32_C( 417.34), SIMDE_FLOAT32_C( 842.38), SIMDE_FLOAT32_C( -253.59), SIMDE_FLOAT32_C( -916.97)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 772.83), SIMDE_FLOAT32_C( 517.22), SIMDE_FLOAT32_C( 908.12), SIMDE_FLOAT32_C( -320.25), SIMDE_FLOAT32_C( 304.42), SIMDE_FLOAT32_C( -692.34), SIMDE_FLOAT32_C( -714.07), SIMDE_FLOAT32_C( -793.24)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -747.52), SIMDE_FLOAT32_C( 22.73), SIMDE_FLOAT32_C( -484.99), SIMDE_FLOAT32_C( 1102.01), SIMDE_FLOAT32_C( 112.92), SIMDE_FLOAT32_C( 1534.72), SIMDE_FLOAT32_C( 460.48), SIMDE_FLOAT32_C( -123.73)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 393.40), SIMDE_FLOAT32_C( -394.71), SIMDE_FLOAT32_C( -86.25), SIMDE_FLOAT32_C( 978.22), SIMDE_FLOAT32_C( 983.88), SIMDE_FLOAT32_C( 704.75), SIMDE_FLOAT32_C( 982.77), SIMDE_FLOAT32_C( 368.33)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -436.22), SIMDE_FLOAT32_C( -898.24), SIMDE_FLOAT32_C( -332.96), SIMDE_FLOAT32_C( 552.30), SIMDE_FLOAT32_C( 889.88), SIMDE_FLOAT32_C( -709.13), SIMDE_FLOAT32_C( -308.74), SIMDE_FLOAT32_C( 240.99)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 829.62), SIMDE_FLOAT32_C( 503.53), SIMDE_FLOAT32_C( 246.71), SIMDE_FLOAT32_C( 425.92), SIMDE_FLOAT32_C( 94.00), SIMDE_FLOAT32_C( 1413.88), SIMDE_FLOAT32_C( 1291.51), SIMDE_FLOAT32_C( 127.34)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 338.33), SIMDE_FLOAT32_C( 742.66), SIMDE_FLOAT32_C( -662.36), SIMDE_FLOAT32_C( 193.25), SIMDE_FLOAT32_C( -791.36), SIMDE_FLOAT32_C( 864.59), SIMDE_FLOAT32_C( -234.67), SIMDE_FLOAT32_C( -697.85)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -154.56), SIMDE_FLOAT32_C( 577.30), SIMDE_FLOAT32_C( 801.65), SIMDE_FLOAT32_C( 520.59), SIMDE_FLOAT32_C( -742.87), SIMDE_FLOAT32_C( -868.99), SIMDE_FLOAT32_C( 16.41), SIMDE_FLOAT32_C( 377.46)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 492.89), SIMDE_FLOAT32_C( 165.36), SIMDE_FLOAT32_C(-1464.01), SIMDE_FLOAT32_C( -327.34), SIMDE_FLOAT32_C( -48.49), SIMDE_FLOAT32_C( 1733.58), SIMDE_FLOAT32_C( -251.08), SIMDE_FLOAT32_C(-1075.31)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 301.89), SIMDE_FLOAT32_C( 110.31), SIMDE_FLOAT32_C( -669.00), SIMDE_FLOAT32_C( -603.43), SIMDE_FLOAT32_C( 552.74), SIMDE_FLOAT32_C( 111.39), SIMDE_FLOAT32_C( 485.92), SIMDE_FLOAT32_C( 89.23)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 222.86), SIMDE_FLOAT32_C( 451.25), SIMDE_FLOAT32_C( 422.47), SIMDE_FLOAT32_C( -182.63), SIMDE_FLOAT32_C( -845.87), SIMDE_FLOAT32_C( -863.72), SIMDE_FLOAT32_C( -400.45), SIMDE_FLOAT32_C( 543.79)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 79.03), SIMDE_FLOAT32_C( -340.94), SIMDE_FLOAT32_C(-1091.47), SIMDE_FLOAT32_C( -420.80), SIMDE_FLOAT32_C( 1398.61), SIMDE_FLOAT32_C( 975.11), SIMDE_FLOAT32_C( 886.37), SIMDE_FLOAT32_C( -454.56)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_sub_ps(test_vec[i].a, test_vec[i].b); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_sub_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d b; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -472.29), SIMDE_FLOAT64_C( 818.19), SIMDE_FLOAT64_C( -310.33), SIMDE_FLOAT64_C( -307.48)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 349.11), SIMDE_FLOAT64_C( 984.47), SIMDE_FLOAT64_C( 184.55), SIMDE_FLOAT64_C( 274.42)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -821.40), SIMDE_FLOAT64_C( -166.28), SIMDE_FLOAT64_C( -494.88), SIMDE_FLOAT64_C( -581.90)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -992.33), SIMDE_FLOAT64_C( -503.60), SIMDE_FLOAT64_C( -575.41), SIMDE_FLOAT64_C( 497.58)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 203.77), SIMDE_FLOAT64_C( -296.83), SIMDE_FLOAT64_C( -970.76), SIMDE_FLOAT64_C( 837.97)), simde_mm256_set_pd(SIMDE_FLOAT64_C(-1196.10), SIMDE_FLOAT64_C( -206.77), SIMDE_FLOAT64_C( 395.35), SIMDE_FLOAT64_C( -340.39)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 691.42), SIMDE_FLOAT64_C( 124.38), SIMDE_FLOAT64_C( -117.46), SIMDE_FLOAT64_C( -44.64)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 865.33), SIMDE_FLOAT64_C( -985.94), SIMDE_FLOAT64_C( -427.40), SIMDE_FLOAT64_C( 247.61)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -173.91), SIMDE_FLOAT64_C( 1110.32), SIMDE_FLOAT64_C( 309.94), SIMDE_FLOAT64_C( -292.25)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -811.54), SIMDE_FLOAT64_C( -682.27), SIMDE_FLOAT64_C( -138.35), SIMDE_FLOAT64_C( 149.95)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -356.84), SIMDE_FLOAT64_C( -870.72), SIMDE_FLOAT64_C( 278.05), SIMDE_FLOAT64_C( 725.66)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -454.70), SIMDE_FLOAT64_C( 188.45), SIMDE_FLOAT64_C( -416.40), SIMDE_FLOAT64_C( -575.71)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -372.68), SIMDE_FLOAT64_C( -973.76), SIMDE_FLOAT64_C( -655.95), SIMDE_FLOAT64_C( -13.49)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 132.41), SIMDE_FLOAT64_C( -317.72), SIMDE_FLOAT64_C( 685.31), SIMDE_FLOAT64_C( -205.90)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -505.09), SIMDE_FLOAT64_C( -656.04), SIMDE_FLOAT64_C(-1341.26), SIMDE_FLOAT64_C( 192.41)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -904.59), SIMDE_FLOAT64_C( 450.57), SIMDE_FLOAT64_C( 462.97), SIMDE_FLOAT64_C( -213.02)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 256.78), SIMDE_FLOAT64_C( 248.73), SIMDE_FLOAT64_C( -329.49), SIMDE_FLOAT64_C( 126.76)), simde_mm256_set_pd(SIMDE_FLOAT64_C(-1161.37), SIMDE_FLOAT64_C( 201.84), SIMDE_FLOAT64_C( 792.46), SIMDE_FLOAT64_C( -339.78)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 425.49), SIMDE_FLOAT64_C( -968.50), SIMDE_FLOAT64_C( -130.01), SIMDE_FLOAT64_C( 908.84)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 529.10), SIMDE_FLOAT64_C( 40.66), SIMDE_FLOAT64_C( 677.87), SIMDE_FLOAT64_C( -349.20)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -103.61), SIMDE_FLOAT64_C(-1009.16), SIMDE_FLOAT64_C( -807.88), SIMDE_FLOAT64_C( 1258.04)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 385.72), SIMDE_FLOAT64_C( 135.17), SIMDE_FLOAT64_C( -935.76), SIMDE_FLOAT64_C( 307.38)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 799.62), SIMDE_FLOAT64_C( 219.20), SIMDE_FLOAT64_C( 812.15), SIMDE_FLOAT64_C( 33.47)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -413.90), SIMDE_FLOAT64_C( -84.03), SIMDE_FLOAT64_C(-1747.91), SIMDE_FLOAT64_C( 273.91)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_sub_pd(test_vec[i].a, test_vec[i].b); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_undefined_ps(SIMDE_MUNIT_TEST_ARGS) { simde__m256 r; simde__m256 e = simde_mm256_setzero_ps(); r = simde_mm256_undefined_ps(); r = simde_mm256_xor_ps(r, r); simde_assert_m256_close(r, e, 1); return 0; } static int test_simde_mm256_undefined_pd(SIMDE_MUNIT_TEST_ARGS) { simde__m256d r; simde__m256d e = simde_mm256_setzero_pd(); r = simde_mm256_undefined_pd(); r = simde_mm256_xor_pd(r, r); simde_assert_m256d_close(r, e, 1); return 0; } static int test_simde_mm256_undefined_si256(SIMDE_MUNIT_TEST_ARGS) { simde__m256d r; simde__m256d e = simde_mm256_setzero_pd(); r = simde_mm256_undefined_pd(); r = simde_mm256_xor_pd(r, r); simde_assert_m256i_equal(simde_mm256_castpd_si256(r), simde_mm256_castpd_si256(e)); return 0; } static int test_simde_mm256_unpackhi_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 b; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 807.72), SIMDE_FLOAT32_C( 100.14), SIMDE_FLOAT32_C( 187.05), SIMDE_FLOAT32_C( -298.31), SIMDE_FLOAT32_C( -34.37), SIMDE_FLOAT32_C( 964.34), SIMDE_FLOAT32_C( 191.73), SIMDE_FLOAT32_C( 188.36)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -734.87), SIMDE_FLOAT32_C( -828.29), SIMDE_FLOAT32_C( -280.75), SIMDE_FLOAT32_C( 955.06), SIMDE_FLOAT32_C( 436.60), SIMDE_FLOAT32_C( -584.60), SIMDE_FLOAT32_C( 158.18), SIMDE_FLOAT32_C( 60.28)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -734.87), SIMDE_FLOAT32_C( 807.72), SIMDE_FLOAT32_C( -828.29), SIMDE_FLOAT32_C( 100.14), SIMDE_FLOAT32_C( 436.60), SIMDE_FLOAT32_C( -34.37), SIMDE_FLOAT32_C( -584.60), SIMDE_FLOAT32_C( 964.34)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -931.86), SIMDE_FLOAT32_C( -611.79), SIMDE_FLOAT32_C( -274.19), SIMDE_FLOAT32_C( -226.15), SIMDE_FLOAT32_C( -145.52), SIMDE_FLOAT32_C( 128.94), SIMDE_FLOAT32_C( -102.01), SIMDE_FLOAT32_C( -500.66)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -438.58), SIMDE_FLOAT32_C( -771.61), SIMDE_FLOAT32_C( 661.02), SIMDE_FLOAT32_C( -824.40), SIMDE_FLOAT32_C( 837.69), SIMDE_FLOAT32_C( 62.03), SIMDE_FLOAT32_C( 350.22), SIMDE_FLOAT32_C( -94.71)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -438.58), SIMDE_FLOAT32_C( -931.86), SIMDE_FLOAT32_C( -771.61), SIMDE_FLOAT32_C( -611.79), SIMDE_FLOAT32_C( 837.69), SIMDE_FLOAT32_C( -145.52), SIMDE_FLOAT32_C( 62.03), SIMDE_FLOAT32_C( 128.94)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -514.10), SIMDE_FLOAT32_C( 798.26), SIMDE_FLOAT32_C( 453.29), SIMDE_FLOAT32_C( -723.59), SIMDE_FLOAT32_C( 488.64), SIMDE_FLOAT32_C( -551.49), SIMDE_FLOAT32_C( -160.63), SIMDE_FLOAT32_C( 78.66)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 525.47), SIMDE_FLOAT32_C( -613.64), SIMDE_FLOAT32_C( 666.94), SIMDE_FLOAT32_C( -806.63), SIMDE_FLOAT32_C( 409.97), SIMDE_FLOAT32_C( 221.24), SIMDE_FLOAT32_C( 721.07), SIMDE_FLOAT32_C( -434.84)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 525.47), SIMDE_FLOAT32_C( -514.10), SIMDE_FLOAT32_C( -613.64), SIMDE_FLOAT32_C( 798.26), SIMDE_FLOAT32_C( 409.97), SIMDE_FLOAT32_C( 488.64), SIMDE_FLOAT32_C( 221.24), SIMDE_FLOAT32_C( -551.49)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -951.19), SIMDE_FLOAT32_C( -28.42), SIMDE_FLOAT32_C( 106.10), SIMDE_FLOAT32_C( -926.30), SIMDE_FLOAT32_C( -891.33), SIMDE_FLOAT32_C( -910.68), SIMDE_FLOAT32_C( 859.23), SIMDE_FLOAT32_C( -534.18)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -774.25), SIMDE_FLOAT32_C( -509.69), SIMDE_FLOAT32_C( -863.30), SIMDE_FLOAT32_C( -245.43), SIMDE_FLOAT32_C( -949.52), SIMDE_FLOAT32_C( -135.53), SIMDE_FLOAT32_C( 390.61), SIMDE_FLOAT32_C( -616.91)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -774.25), SIMDE_FLOAT32_C( -951.19), SIMDE_FLOAT32_C( -509.69), SIMDE_FLOAT32_C( -28.42), SIMDE_FLOAT32_C( -949.52), SIMDE_FLOAT32_C( -891.33), SIMDE_FLOAT32_C( -135.53), SIMDE_FLOAT32_C( -910.68)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 854.52), SIMDE_FLOAT32_C( -781.40), SIMDE_FLOAT32_C( 848.96), SIMDE_FLOAT32_C( 755.16), SIMDE_FLOAT32_C( 991.03), SIMDE_FLOAT32_C( -213.85), SIMDE_FLOAT32_C( 907.55), SIMDE_FLOAT32_C( -711.16)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 456.01), SIMDE_FLOAT32_C( -206.18), SIMDE_FLOAT32_C( -786.69), SIMDE_FLOAT32_C( 940.75), SIMDE_FLOAT32_C( -664.64), SIMDE_FLOAT32_C( -614.97), SIMDE_FLOAT32_C( 524.63), SIMDE_FLOAT32_C( 291.78)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 456.01), SIMDE_FLOAT32_C( 854.52), SIMDE_FLOAT32_C( -206.18), SIMDE_FLOAT32_C( -781.40), SIMDE_FLOAT32_C( -664.64), SIMDE_FLOAT32_C( 991.03), SIMDE_FLOAT32_C( -614.97), SIMDE_FLOAT32_C( -213.85)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -51.56), SIMDE_FLOAT32_C( 716.82), SIMDE_FLOAT32_C( 820.37), SIMDE_FLOAT32_C( -556.55), SIMDE_FLOAT32_C( -748.23), SIMDE_FLOAT32_C( -191.87), SIMDE_FLOAT32_C( 886.02), SIMDE_FLOAT32_C( -964.97)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 942.40), SIMDE_FLOAT32_C( -388.17), SIMDE_FLOAT32_C( 620.64), SIMDE_FLOAT32_C( 768.35), SIMDE_FLOAT32_C( -196.91), SIMDE_FLOAT32_C( -771.49), SIMDE_FLOAT32_C( -618.58), SIMDE_FLOAT32_C( -887.04)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 942.40), SIMDE_FLOAT32_C( -51.56), SIMDE_FLOAT32_C( -388.17), SIMDE_FLOAT32_C( 716.82), SIMDE_FLOAT32_C( -196.91), SIMDE_FLOAT32_C( -748.23), SIMDE_FLOAT32_C( -771.49), SIMDE_FLOAT32_C( -191.87)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 934.86), SIMDE_FLOAT32_C( -147.82), SIMDE_FLOAT32_C( 984.99), SIMDE_FLOAT32_C( 944.15), SIMDE_FLOAT32_C( 882.67), SIMDE_FLOAT32_C( 370.21), SIMDE_FLOAT32_C( 981.44), SIMDE_FLOAT32_C( -856.37)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 80.89), SIMDE_FLOAT32_C( -20.18), SIMDE_FLOAT32_C( -561.60), SIMDE_FLOAT32_C( 599.04), SIMDE_FLOAT32_C( -556.62), SIMDE_FLOAT32_C( -514.91), SIMDE_FLOAT32_C( -240.53), SIMDE_FLOAT32_C( -421.29)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 80.89), SIMDE_FLOAT32_C( 934.86), SIMDE_FLOAT32_C( -20.18), SIMDE_FLOAT32_C( -147.82), SIMDE_FLOAT32_C( -556.62), SIMDE_FLOAT32_C( 882.67), SIMDE_FLOAT32_C( -514.91), SIMDE_FLOAT32_C( 370.21)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -445.93), SIMDE_FLOAT32_C( 998.84), SIMDE_FLOAT32_C( -975.41), SIMDE_FLOAT32_C( 762.47), SIMDE_FLOAT32_C( 829.30), SIMDE_FLOAT32_C( -324.70), SIMDE_FLOAT32_C( 745.09), SIMDE_FLOAT32_C( 12.40)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 177.17), SIMDE_FLOAT32_C( 871.02), SIMDE_FLOAT32_C( 263.38), SIMDE_FLOAT32_C( -284.12), SIMDE_FLOAT32_C( -407.86), SIMDE_FLOAT32_C( -554.15), SIMDE_FLOAT32_C( -7.26), SIMDE_FLOAT32_C( -655.02)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 177.17), SIMDE_FLOAT32_C( -445.93), SIMDE_FLOAT32_C( 871.02), SIMDE_FLOAT32_C( 998.84), SIMDE_FLOAT32_C( -407.86), SIMDE_FLOAT32_C( 829.30), SIMDE_FLOAT32_C( -554.15), SIMDE_FLOAT32_C( -324.70)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_unpackhi_ps(test_vec[i].a, test_vec[i].b); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_unpackhi_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d b; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 474.20), SIMDE_FLOAT64_C( -84.92), SIMDE_FLOAT64_C( 521.98), SIMDE_FLOAT64_C( -506.09)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 547.06), SIMDE_FLOAT64_C( -105.08), SIMDE_FLOAT64_C( 810.31), SIMDE_FLOAT64_C( 175.50)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 547.06), SIMDE_FLOAT64_C( 474.20), SIMDE_FLOAT64_C( 810.31), SIMDE_FLOAT64_C( 521.98)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -738.63), SIMDE_FLOAT64_C( -207.02), SIMDE_FLOAT64_C( 624.23), SIMDE_FLOAT64_C( -787.13)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -734.37), SIMDE_FLOAT64_C( 16.93), SIMDE_FLOAT64_C( -235.12), SIMDE_FLOAT64_C( 261.95)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -734.37), SIMDE_FLOAT64_C( -738.63), SIMDE_FLOAT64_C( -235.12), SIMDE_FLOAT64_C( 624.23)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 713.19), SIMDE_FLOAT64_C( 906.18), SIMDE_FLOAT64_C( -969.47), SIMDE_FLOAT64_C( -953.69)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 182.58), SIMDE_FLOAT64_C( 575.97), SIMDE_FLOAT64_C( 870.00), SIMDE_FLOAT64_C( 681.86)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 182.58), SIMDE_FLOAT64_C( 713.19), SIMDE_FLOAT64_C( 870.00), SIMDE_FLOAT64_C( -969.47)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -478.99), SIMDE_FLOAT64_C( -198.61), SIMDE_FLOAT64_C( -506.78), SIMDE_FLOAT64_C( -744.29)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -105.58), SIMDE_FLOAT64_C( -594.58), SIMDE_FLOAT64_C( -326.65), SIMDE_FLOAT64_C( 744.36)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -105.58), SIMDE_FLOAT64_C( -478.99), SIMDE_FLOAT64_C( -326.65), SIMDE_FLOAT64_C( -506.78)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 456.60), SIMDE_FLOAT64_C( -346.15), SIMDE_FLOAT64_C( 230.97), SIMDE_FLOAT64_C( 246.60)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -895.16), SIMDE_FLOAT64_C( 675.27), SIMDE_FLOAT64_C( -175.34), SIMDE_FLOAT64_C( -350.18)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -895.16), SIMDE_FLOAT64_C( 456.60), SIMDE_FLOAT64_C( -175.34), SIMDE_FLOAT64_C( 230.97)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 517.62), SIMDE_FLOAT64_C( -985.29), SIMDE_FLOAT64_C( -720.18), SIMDE_FLOAT64_C( 52.87)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 519.19), SIMDE_FLOAT64_C( -56.82), SIMDE_FLOAT64_C( 611.63), SIMDE_FLOAT64_C( 463.94)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 519.19), SIMDE_FLOAT64_C( 517.62), SIMDE_FLOAT64_C( 611.63), SIMDE_FLOAT64_C( -720.18)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -839.87), SIMDE_FLOAT64_C( -23.85), SIMDE_FLOAT64_C( 777.18), SIMDE_FLOAT64_C( 867.39)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -606.42), SIMDE_FLOAT64_C( 503.57), SIMDE_FLOAT64_C( 891.19), SIMDE_FLOAT64_C( 137.98)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -606.42), SIMDE_FLOAT64_C( -839.87), SIMDE_FLOAT64_C( 891.19), SIMDE_FLOAT64_C( 777.18)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -362.78), SIMDE_FLOAT64_C( -942.23), SIMDE_FLOAT64_C( 577.23), SIMDE_FLOAT64_C( 747.66)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -987.53), SIMDE_FLOAT64_C( 145.74), SIMDE_FLOAT64_C( 948.38), SIMDE_FLOAT64_C( -772.85)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -987.53), SIMDE_FLOAT64_C( -362.78), SIMDE_FLOAT64_C( 948.38), SIMDE_FLOAT64_C( 577.23)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_unpackhi_pd(test_vec[i].a, test_vec[i].b); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_unpacklo_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 b; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 476.45), SIMDE_FLOAT32_C( 703.11), SIMDE_FLOAT32_C( 221.80), SIMDE_FLOAT32_C( -361.45), SIMDE_FLOAT32_C( 645.73), SIMDE_FLOAT32_C( 420.76), SIMDE_FLOAT32_C( -23.12), SIMDE_FLOAT32_C( 96.33)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -773.95), SIMDE_FLOAT32_C( 586.39), SIMDE_FLOAT32_C( 118.66), SIMDE_FLOAT32_C( 5.14), SIMDE_FLOAT32_C( 9.63), SIMDE_FLOAT32_C( 896.46), SIMDE_FLOAT32_C( 121.67), SIMDE_FLOAT32_C( -134.20)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 118.66), SIMDE_FLOAT32_C( 221.80), SIMDE_FLOAT32_C( 5.14), SIMDE_FLOAT32_C( -361.45), SIMDE_FLOAT32_C( 121.67), SIMDE_FLOAT32_C( -23.12), SIMDE_FLOAT32_C( -134.20), SIMDE_FLOAT32_C( 96.33)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -605.12), SIMDE_FLOAT32_C( 481.83), SIMDE_FLOAT32_C( 757.82), SIMDE_FLOAT32_C( -782.53), SIMDE_FLOAT32_C( 546.60), SIMDE_FLOAT32_C( -405.06), SIMDE_FLOAT32_C( -413.20), SIMDE_FLOAT32_C( -645.12)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -646.85), SIMDE_FLOAT32_C( -619.64), SIMDE_FLOAT32_C( 4.92), SIMDE_FLOAT32_C( 816.93), SIMDE_FLOAT32_C( -509.30), SIMDE_FLOAT32_C( 435.06), SIMDE_FLOAT32_C( -11.96), SIMDE_FLOAT32_C( -147.23)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 4.92), SIMDE_FLOAT32_C( 757.82), SIMDE_FLOAT32_C( 816.93), SIMDE_FLOAT32_C( -782.53), SIMDE_FLOAT32_C( -11.96), SIMDE_FLOAT32_C( -413.20), SIMDE_FLOAT32_C( -147.23), SIMDE_FLOAT32_C( -645.12)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 850.74), SIMDE_FLOAT32_C( -85.68), SIMDE_FLOAT32_C( -210.45), SIMDE_FLOAT32_C( -73.45), SIMDE_FLOAT32_C( 532.06), SIMDE_FLOAT32_C( 709.20), SIMDE_FLOAT32_C( 882.78), SIMDE_FLOAT32_C( -768.32)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -922.57), SIMDE_FLOAT32_C( 42.72), SIMDE_FLOAT32_C( 514.25), SIMDE_FLOAT32_C( -144.83), SIMDE_FLOAT32_C( -74.58), SIMDE_FLOAT32_C( -573.39), SIMDE_FLOAT32_C( -176.90), SIMDE_FLOAT32_C( -171.10)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 514.25), SIMDE_FLOAT32_C( -210.45), SIMDE_FLOAT32_C( -144.83), SIMDE_FLOAT32_C( -73.45), SIMDE_FLOAT32_C( -176.90), SIMDE_FLOAT32_C( 882.78), SIMDE_FLOAT32_C( -171.10), SIMDE_FLOAT32_C( -768.32)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -573.82), SIMDE_FLOAT32_C( 486.51), SIMDE_FLOAT32_C( 332.79), SIMDE_FLOAT32_C( -446.48), SIMDE_FLOAT32_C( 110.94), SIMDE_FLOAT32_C( 515.10), SIMDE_FLOAT32_C( 513.81), SIMDE_FLOAT32_C( -806.87)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 206.71), SIMDE_FLOAT32_C( -250.67), SIMDE_FLOAT32_C( 127.20), SIMDE_FLOAT32_C( -93.11), SIMDE_FLOAT32_C( -774.94), SIMDE_FLOAT32_C( -230.88), SIMDE_FLOAT32_C( 631.35), SIMDE_FLOAT32_C( 231.15)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 127.20), SIMDE_FLOAT32_C( 332.79), SIMDE_FLOAT32_C( -93.11), SIMDE_FLOAT32_C( -446.48), SIMDE_FLOAT32_C( 631.35), SIMDE_FLOAT32_C( 513.81), SIMDE_FLOAT32_C( 231.15), SIMDE_FLOAT32_C( -806.87)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 149.45), SIMDE_FLOAT32_C( 726.97), SIMDE_FLOAT32_C( -55.39), SIMDE_FLOAT32_C( -82.15), SIMDE_FLOAT32_C( 549.77), SIMDE_FLOAT32_C( 954.11), SIMDE_FLOAT32_C( -93.98), SIMDE_FLOAT32_C( -820.28)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 456.52), SIMDE_FLOAT32_C( -978.21), SIMDE_FLOAT32_C( 560.56), SIMDE_FLOAT32_C( 178.87), SIMDE_FLOAT32_C( 916.04), SIMDE_FLOAT32_C( -801.57), SIMDE_FLOAT32_C( -369.99), SIMDE_FLOAT32_C( 24.70)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 560.56), SIMDE_FLOAT32_C( -55.39), SIMDE_FLOAT32_C( 178.87), SIMDE_FLOAT32_C( -82.15), SIMDE_FLOAT32_C( -369.99), SIMDE_FLOAT32_C( -93.98), SIMDE_FLOAT32_C( 24.70), SIMDE_FLOAT32_C( -820.28)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 683.93), SIMDE_FLOAT32_C( -442.40), SIMDE_FLOAT32_C( -321.81), SIMDE_FLOAT32_C( -47.02), SIMDE_FLOAT32_C( -854.54), SIMDE_FLOAT32_C( -65.39), SIMDE_FLOAT32_C( -879.02), SIMDE_FLOAT32_C( -144.43)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -204.43), SIMDE_FLOAT32_C( 186.43), SIMDE_FLOAT32_C( -537.46), SIMDE_FLOAT32_C( -851.25), SIMDE_FLOAT32_C( -312.32), SIMDE_FLOAT32_C( -630.06), SIMDE_FLOAT32_C( -737.72), SIMDE_FLOAT32_C( 475.72)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -537.46), SIMDE_FLOAT32_C( -321.81), SIMDE_FLOAT32_C( -851.25), SIMDE_FLOAT32_C( -47.02), SIMDE_FLOAT32_C( -737.72), SIMDE_FLOAT32_C( -879.02), SIMDE_FLOAT32_C( 475.72), SIMDE_FLOAT32_C( -144.43)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -948.98), SIMDE_FLOAT32_C( 497.67), SIMDE_FLOAT32_C( 650.06), SIMDE_FLOAT32_C( 741.02), SIMDE_FLOAT32_C( -984.88), SIMDE_FLOAT32_C( -952.48), SIMDE_FLOAT32_C( -355.06), SIMDE_FLOAT32_C( 845.88)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 984.39), SIMDE_FLOAT32_C( -147.65), SIMDE_FLOAT32_C( -608.72), SIMDE_FLOAT32_C( 798.45), SIMDE_FLOAT32_C( -191.22), SIMDE_FLOAT32_C( -819.43), SIMDE_FLOAT32_C( 651.13), SIMDE_FLOAT32_C( 878.58)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -608.72), SIMDE_FLOAT32_C( 650.06), SIMDE_FLOAT32_C( 798.45), SIMDE_FLOAT32_C( 741.02), SIMDE_FLOAT32_C( 651.13), SIMDE_FLOAT32_C( -355.06), SIMDE_FLOAT32_C( 878.58), SIMDE_FLOAT32_C( 845.88)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 860.59), SIMDE_FLOAT32_C( -10.76), SIMDE_FLOAT32_C( -198.30), SIMDE_FLOAT32_C( 77.85), SIMDE_FLOAT32_C( -62.31), SIMDE_FLOAT32_C( -4.21), SIMDE_FLOAT32_C( 365.71), SIMDE_FLOAT32_C( 937.33)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -309.45), SIMDE_FLOAT32_C( 297.29), SIMDE_FLOAT32_C( 986.26), SIMDE_FLOAT32_C( -531.67), SIMDE_FLOAT32_C( 648.25), SIMDE_FLOAT32_C( -225.86), SIMDE_FLOAT32_C( -897.83), SIMDE_FLOAT32_C( 816.08)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 986.26), SIMDE_FLOAT32_C( -198.30), SIMDE_FLOAT32_C( -531.67), SIMDE_FLOAT32_C( 77.85), SIMDE_FLOAT32_C( -897.83), SIMDE_FLOAT32_C( 365.71), SIMDE_FLOAT32_C( 816.08), SIMDE_FLOAT32_C( 937.33)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_unpacklo_ps(test_vec[i].a, test_vec[i].b); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_unpacklo_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d b; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -600.90), SIMDE_FLOAT64_C( -534.18), SIMDE_FLOAT64_C( -294.96), SIMDE_FLOAT64_C( 194.68)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 653.08), SIMDE_FLOAT64_C( -555.28), SIMDE_FLOAT64_C( 745.15), SIMDE_FLOAT64_C( -216.67)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -555.28), SIMDE_FLOAT64_C( -534.18), SIMDE_FLOAT64_C( -216.67), SIMDE_FLOAT64_C( 194.68)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 421.87), SIMDE_FLOAT64_C( 397.23), SIMDE_FLOAT64_C( 303.53), SIMDE_FLOAT64_C( 285.42)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -720.94), SIMDE_FLOAT64_C( -472.31), SIMDE_FLOAT64_C( 488.28), SIMDE_FLOAT64_C( -308.40)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -472.31), SIMDE_FLOAT64_C( 397.23), SIMDE_FLOAT64_C( -308.40), SIMDE_FLOAT64_C( 285.42)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -109.61), SIMDE_FLOAT64_C( 180.86), SIMDE_FLOAT64_C( 399.64), SIMDE_FLOAT64_C( 594.61)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -866.29), SIMDE_FLOAT64_C( 935.36), SIMDE_FLOAT64_C( 680.83), SIMDE_FLOAT64_C( 371.83)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 935.36), SIMDE_FLOAT64_C( 180.86), SIMDE_FLOAT64_C( 371.83), SIMDE_FLOAT64_C( 594.61)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 90.32), SIMDE_FLOAT64_C( 277.68), SIMDE_FLOAT64_C( 879.43), SIMDE_FLOAT64_C( -100.14)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 615.95), SIMDE_FLOAT64_C( -913.08), SIMDE_FLOAT64_C( 109.38), SIMDE_FLOAT64_C( 735.81)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -913.08), SIMDE_FLOAT64_C( 277.68), SIMDE_FLOAT64_C( 735.81), SIMDE_FLOAT64_C( -100.14)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 932.30), SIMDE_FLOAT64_C( 361.71), SIMDE_FLOAT64_C( -106.90), SIMDE_FLOAT64_C( -236.07)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -388.60), SIMDE_FLOAT64_C( -254.78), SIMDE_FLOAT64_C( -8.15), SIMDE_FLOAT64_C( -517.38)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -254.78), SIMDE_FLOAT64_C( 361.71), SIMDE_FLOAT64_C( -517.38), SIMDE_FLOAT64_C( -236.07)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 420.24), SIMDE_FLOAT64_C( 915.08), SIMDE_FLOAT64_C( -310.09), SIMDE_FLOAT64_C( 924.29)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -6.45), SIMDE_FLOAT64_C( 943.38), SIMDE_FLOAT64_C( -842.17), SIMDE_FLOAT64_C( 303.10)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 943.38), SIMDE_FLOAT64_C( 915.08), SIMDE_FLOAT64_C( 303.10), SIMDE_FLOAT64_C( 924.29)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 198.81), SIMDE_FLOAT64_C( 823.67), SIMDE_FLOAT64_C( 21.23), SIMDE_FLOAT64_C( 275.37)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -789.28), SIMDE_FLOAT64_C( 222.39), SIMDE_FLOAT64_C( -125.82), SIMDE_FLOAT64_C( -521.52)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 222.39), SIMDE_FLOAT64_C( 823.67), SIMDE_FLOAT64_C( -521.52), SIMDE_FLOAT64_C( 275.37)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 71.55), SIMDE_FLOAT64_C( -915.84), SIMDE_FLOAT64_C( -246.51), SIMDE_FLOAT64_C( -206.93)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -663.61), SIMDE_FLOAT64_C( 763.05), SIMDE_FLOAT64_C( -365.16), SIMDE_FLOAT64_C( -475.21)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 763.05), SIMDE_FLOAT64_C( -915.84), SIMDE_FLOAT64_C( -475.21), SIMDE_FLOAT64_C( -206.93)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_unpacklo_pd(test_vec[i].a, test_vec[i].b); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_xor_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d b; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 125.09), SIMDE_FLOAT64_C( 533.33), SIMDE_FLOAT64_C( 190.03), SIMDE_FLOAT64_C( -352.74)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 208.67), SIMDE_FLOAT64_C( -937.37), SIMDE_FLOAT64_C( 842.10), SIMDE_FLOAT64_C( 692.29)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -0.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -785.19), SIMDE_FLOAT64_C( -227.52), SIMDE_FLOAT64_C( -675.22), SIMDE_FLOAT64_C( 927.05)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 333.25), SIMDE_FLOAT64_C( 653.61), SIMDE_FLOAT64_C( 853.07), SIMDE_FLOAT64_C( 580.55)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 846.59), SIMDE_FLOAT64_C( -306.35), SIMDE_FLOAT64_C( 201.33), SIMDE_FLOAT64_C( -591.74)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 539.65), SIMDE_FLOAT64_C( 901.07), SIMDE_FLOAT64_C( -281.86), SIMDE_FLOAT64_C( -385.60)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -208.55), SIMDE_FLOAT64_C( -102.16), SIMDE_FLOAT64_C( 741.78), SIMDE_FLOAT64_C( -841.86)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 42.61), SIMDE_FLOAT64_C( 866.30), SIMDE_FLOAT64_C( -734.78), SIMDE_FLOAT64_C( -363.89)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -80.10), SIMDE_FLOAT64_C( -6.79), SIMDE_FLOAT64_C( 45.81), SIMDE_FLOAT64_C( -402.99)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -431.36), SIMDE_FLOAT64_C( 229.84), SIMDE_FLOAT64_C( -298.07), SIMDE_FLOAT64_C( -459.61)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -252.25), SIMDE_FLOAT64_C( -434.28), SIMDE_FLOAT64_C( -802.07), SIMDE_FLOAT64_C( 931.61)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 494.93), SIMDE_FLOAT64_C( -455.26), SIMDE_FLOAT64_C( 781.09), SIMDE_FLOAT64_C( 819.90)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 114.61), SIMDE_FLOAT64_C( 574.95), SIMDE_FLOAT64_C( -900.56), SIMDE_FLOAT64_C( -784.93)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 405.36), SIMDE_FLOAT64_C( 885.34), SIMDE_FLOAT64_C( -606.18), SIMDE_FLOAT64_C( 785.23)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -0.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -477.77), SIMDE_FLOAT64_C( -377.20), SIMDE_FLOAT64_C( -207.72), SIMDE_FLOAT64_C( 319.51)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -449.36), SIMDE_FLOAT64_C( 638.76), SIMDE_FLOAT64_C( -315.99), SIMDE_FLOAT64_C( 136.54)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_xor_pd(test_vec[i].a, test_vec[i].b); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_xor_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 b; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 548.70), SIMDE_FLOAT32_C( -868.78), SIMDE_FLOAT32_C( -8.43), SIMDE_FLOAT32_C( -89.68), SIMDE_FLOAT32_C( -222.56), SIMDE_FLOAT32_C( 837.57), SIMDE_FLOAT32_C( -514.53), SIMDE_FLOAT32_C( 769.22)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -145.72), SIMDE_FLOAT32_C( -597.70), SIMDE_FLOAT32_C( -427.69), SIMDE_FLOAT32_C( -216.99), SIMDE_FLOAT32_C( 665.42), SIMDE_FLOAT32_C( -3.00), SIMDE_FLOAT32_C( -694.73), SIMDE_FLOAT32_C( 203.68)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 476.58), SIMDE_FLOAT32_C( -306.29), SIMDE_FLOAT32_C( 588.53), SIMDE_FLOAT32_C( 127.68), SIMDE_FLOAT32_C( -500.35), SIMDE_FLOAT32_C( 955.55), SIMDE_FLOAT32_C( 220.89), SIMDE_FLOAT32_C( -767.99)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 445.28), SIMDE_FLOAT32_C( 141.81), SIMDE_FLOAT32_C( -713.76), SIMDE_FLOAT32_C( -354.21), SIMDE_FLOAT32_C( 679.03), SIMDE_FLOAT32_C( -912.95), SIMDE_FLOAT32_C( 204.18), SIMDE_FLOAT32_C( 506.07)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 933.45), SIMDE_FLOAT32_C( 5.37), SIMDE_FLOAT32_C( -777.73), SIMDE_FLOAT32_C( 798.82), SIMDE_FLOAT32_C( 443.43), SIMDE_FLOAT32_C( -5.06), SIMDE_FLOAT32_C( 288.87), SIMDE_FLOAT32_C( -504.88)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 239.90), SIMDE_FLOAT32_C( -955.68), SIMDE_FLOAT32_C( -266.36), SIMDE_FLOAT32_C( 865.21), SIMDE_FLOAT32_C( 416.82), SIMDE_FLOAT32_C( -51.58), SIMDE_FLOAT32_C( 122.82), SIMDE_FLOAT32_C( 125.02)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 802.73), SIMDE_FLOAT32_C( -683.98), SIMDE_FLOAT32_C( -844.98), SIMDE_FLOAT32_C( 391.53), SIMDE_FLOAT32_C( -895.03), SIMDE_FLOAT32_C( -743.00), SIMDE_FLOAT32_C( -811.16), SIMDE_FLOAT32_C( -926.14)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 136.72), SIMDE_FLOAT32_C( -962.56), SIMDE_FLOAT32_C( 589.78), SIMDE_FLOAT32_C( 684.43), SIMDE_FLOAT32_C( -510.18), SIMDE_FLOAT32_C( 881.66), SIMDE_FLOAT32_C( -753.43), SIMDE_FLOAT32_C( 713.54)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -483.96), SIMDE_FLOAT32_C( -109.20), SIMDE_FLOAT32_C( -641.20), SIMDE_FLOAT32_C( 454.31), SIMDE_FLOAT32_C( 511.51), SIMDE_FLOAT32_C( -732.98), SIMDE_FLOAT32_C( 946.00), SIMDE_FLOAT32_C( -922.25)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -819.03), SIMDE_FLOAT32_C( -496.99), SIMDE_FLOAT32_C( 85.62), SIMDE_FLOAT32_C( -569.10), SIMDE_FLOAT32_C( -386.34), SIMDE_FLOAT32_C( 216.80), SIMDE_FLOAT32_C( -244.13), SIMDE_FLOAT32_C( -77.34)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 727.77), SIMDE_FLOAT32_C( -477.08), SIMDE_FLOAT32_C( 7.90), SIMDE_FLOAT32_C( 499.42), SIMDE_FLOAT32_C( -256.10), SIMDE_FLOAT32_C( -756.97), SIMDE_FLOAT32_C( 383.44), SIMDE_FLOAT32_C( -510.60)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -270.31), SIMDE_FLOAT32_C( -502.66), SIMDE_FLOAT32_C( 467.87), SIMDE_FLOAT32_C( 445.96), SIMDE_FLOAT32_C( -357.92), SIMDE_FLOAT32_C( -586.04), SIMDE_FLOAT32_C( -63.05), SIMDE_FLOAT32_C( 391.11)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -426.87), SIMDE_FLOAT32_C( -96.04), SIMDE_FLOAT32_C( 625.98), SIMDE_FLOAT32_C( 704.53), SIMDE_FLOAT32_C( 227.86), SIMDE_FLOAT32_C( -411.45), SIMDE_FLOAT32_C( -17.45), SIMDE_FLOAT32_C( -10.30)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 456.59), SIMDE_FLOAT32_C( 656.00), SIMDE_FLOAT32_C( 17.64), SIMDE_FLOAT32_C( 602.75), SIMDE_FLOAT32_C( 153.79), SIMDE_FLOAT32_C( -466.92), SIMDE_FLOAT32_C( -648.89), SIMDE_FLOAT32_C( -965.57)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -404.61), SIMDE_FLOAT32_C( -361.93), SIMDE_FLOAT32_C( 8.96), SIMDE_FLOAT32_C( -813.96), SIMDE_FLOAT32_C( 760.95), SIMDE_FLOAT32_C( 953.74), SIMDE_FLOAT32_C( 288.70), SIMDE_FLOAT32_C( 347.99)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -441.96), SIMDE_FLOAT32_C( 232.27), SIMDE_FLOAT32_C( 51.60), SIMDE_FLOAT32_C( -387.30), SIMDE_FLOAT32_C( -615.38), SIMDE_FLOAT32_C( 162.60), SIMDE_FLOAT32_C( -145.41), SIMDE_FLOAT32_C( 683.61)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_xor_ps(test_vec[i].a, test_vec[i].b); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_zextps128_ps256(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m256 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -312.77), SIMDE_FLOAT32_C( 594.20), SIMDE_FLOAT32_C( -325.59), SIMDE_FLOAT32_C( -490.02)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -312.77), SIMDE_FLOAT32_C( 594.20), SIMDE_FLOAT32_C( -325.59), SIMDE_FLOAT32_C( -490.02)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -164.79), SIMDE_FLOAT32_C( -934.42), SIMDE_FLOAT32_C( 921.14), SIMDE_FLOAT32_C( 62.10)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -164.79), SIMDE_FLOAT32_C( -934.42), SIMDE_FLOAT32_C( 921.14), SIMDE_FLOAT32_C( 62.10)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -517.66), SIMDE_FLOAT32_C( 967.01), SIMDE_FLOAT32_C( 846.07), SIMDE_FLOAT32_C( 311.22)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -517.66), SIMDE_FLOAT32_C( 967.01), SIMDE_FLOAT32_C( 846.07), SIMDE_FLOAT32_C( 311.22)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 258.19), SIMDE_FLOAT32_C( 3.23), SIMDE_FLOAT32_C( -975.57), SIMDE_FLOAT32_C( -36.42)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 258.19), SIMDE_FLOAT32_C( 3.23), SIMDE_FLOAT32_C( -975.57), SIMDE_FLOAT32_C( -36.42)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -435.42), SIMDE_FLOAT32_C( 892.30), SIMDE_FLOAT32_C( -907.17), SIMDE_FLOAT32_C( -773.85)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -435.42), SIMDE_FLOAT32_C( 892.30), SIMDE_FLOAT32_C( -907.17), SIMDE_FLOAT32_C( -773.85)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 555.74), SIMDE_FLOAT32_C( -152.72), SIMDE_FLOAT32_C( 264.72), SIMDE_FLOAT32_C( 888.72)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 555.74), SIMDE_FLOAT32_C( -152.72), SIMDE_FLOAT32_C( 264.72), SIMDE_FLOAT32_C( 888.72)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -118.52), SIMDE_FLOAT32_C( -258.55), SIMDE_FLOAT32_C( -89.99), SIMDE_FLOAT32_C( -84.84)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -118.52), SIMDE_FLOAT32_C( -258.55), SIMDE_FLOAT32_C( -89.99), SIMDE_FLOAT32_C( -84.84)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 270.43), SIMDE_FLOAT32_C( -194.76), SIMDE_FLOAT32_C( -351.14), SIMDE_FLOAT32_C( 335.42)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 270.43), SIMDE_FLOAT32_C( -194.76), SIMDE_FLOAT32_C( -351.14), SIMDE_FLOAT32_C( 335.42)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_zextps128_ps256(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_zextpd128_pd256(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m256d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 376.29), SIMDE_FLOAT64_C( -625.09)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 376.29), SIMDE_FLOAT64_C( -625.09)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -371.24), SIMDE_FLOAT64_C( -550.26)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -371.24), SIMDE_FLOAT64_C( -550.26)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -354.92), SIMDE_FLOAT64_C( -801.74)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -354.92), SIMDE_FLOAT64_C( -801.74)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -686.18), SIMDE_FLOAT64_C( 492.11)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -686.18), SIMDE_FLOAT64_C( 492.11)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -504.19), SIMDE_FLOAT64_C( -186.92)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -504.19), SIMDE_FLOAT64_C( -186.92)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 116.22), SIMDE_FLOAT64_C( 481.86)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 116.22), SIMDE_FLOAT64_C( 481.86)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 436.67), SIMDE_FLOAT64_C( 524.04)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 436.67), SIMDE_FLOAT64_C( 524.04)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -466.66), SIMDE_FLOAT64_C( 855.76)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -466.66), SIMDE_FLOAT64_C( 855.76)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_zextpd128_pd256(test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_zextsi128_si256 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t a[2]; const int64_t r[4]; } test_vec[] = { { { INT64_C( 3659960757603206795), INT64_C( 2308273045282314341) }, { INT64_C( 3659960757603206795), INT64_C( 2308273045282314341), INT64_C(0), INT64_C(0) } }, { { -INT64_C( 7909899133659387641), -INT64_C( 6643096323114624760) }, { -INT64_C( 7909899133659387641), -INT64_C( 6643096323114624760), INT64_C(0), INT64_C(0) } }, { { -INT64_C( 3738447939738821375), INT64_C( 8210257193021786038) }, { -INT64_C( 3738447939738821375), INT64_C( 8210257193021786038), INT64_C(0), INT64_C(0) } }, { { INT64_C( 7841089534433099674), -INT64_C( 2704280898271388226) }, { INT64_C( 7841089534433099674), -INT64_C( 2704280898271388226), INT64_C(0), INT64_C(0) } }, { { INT64_C( 2253740904739638291), INT64_C( 4217866929705054124) }, { INT64_C( 2253740904739638291), INT64_C( 4217866929705054124), INT64_C(0), INT64_C(0) } }, { { INT64_C( 6173955252198929118), -INT64_C( 2243738413454109144) }, { INT64_C( 6173955252198929118), -INT64_C( 2243738413454109144), INT64_C(0), INT64_C(0) } }, { { INT64_C( 3762935090286142127), -INT64_C( 1843430169138026352) }, { INT64_C( 3762935090286142127), -INT64_C( 1843430169138026352), INT64_C(0), INT64_C(0) } }, { { INT64_C( 6283319931180737605), -INT64_C( 5703187699811656404) }, { INT64_C( 6283319931180737605), -INT64_C( 5703187699811656404), INT64_C(0), INT64_C(0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi64(test_vec[i].a); simde__m256i r = simde_mm256_zextsi128_si256(a); simde_test_x86_assert_equal_i64x4(r, simde_x_mm256_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm_testc_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; int r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -975.49), SIMDE_FLOAT32_C( 483.21), SIMDE_FLOAT32_C( -728.28), SIMDE_FLOAT32_C( 87.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( 838.35), SIMDE_FLOAT32_C( 502.70), SIMDE_FLOAT32_C( 259.53), SIMDE_FLOAT32_C( 492.16)), 1 }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 145.89), SIMDE_FLOAT32_C( 703.10), SIMDE_FLOAT32_C( 934.08), SIMDE_FLOAT32_C( 486.65)), simde_mm_set_ps(SIMDE_FLOAT32_C( -729.11), SIMDE_FLOAT32_C( -285.01), SIMDE_FLOAT32_C( 936.71), SIMDE_FLOAT32_C( -581.95)), 0 }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -465.31), SIMDE_FLOAT32_C( 947.68), SIMDE_FLOAT32_C( 581.66), SIMDE_FLOAT32_C( 632.88)), simde_mm_set_ps(SIMDE_FLOAT32_C( 892.71), SIMDE_FLOAT32_C( 965.49), SIMDE_FLOAT32_C( -562.05), SIMDE_FLOAT32_C( 23.24)), 0 }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 527.52), SIMDE_FLOAT32_C( 684.72), SIMDE_FLOAT32_C( -444.91), SIMDE_FLOAT32_C( 864.11)), simde_mm_set_ps(SIMDE_FLOAT32_C( -726.99), SIMDE_FLOAT32_C( 870.43), SIMDE_FLOAT32_C( 880.94), SIMDE_FLOAT32_C( 503.59)), 0 }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -313.88), SIMDE_FLOAT32_C( 663.71), SIMDE_FLOAT32_C( -545.28), SIMDE_FLOAT32_C( 409.96)), simde_mm_set_ps(SIMDE_FLOAT32_C( 31.21), SIMDE_FLOAT32_C( -81.33), SIMDE_FLOAT32_C( -792.56), SIMDE_FLOAT32_C( 868.13)), 0 }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 844.22), SIMDE_FLOAT32_C( -506.15), SIMDE_FLOAT32_C( -527.87), SIMDE_FLOAT32_C( -352.42)), simde_mm_set_ps(SIMDE_FLOAT32_C( 765.09), SIMDE_FLOAT32_C( 315.48), SIMDE_FLOAT32_C( 792.46), SIMDE_FLOAT32_C( 202.31)), 1 }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -116.04), SIMDE_FLOAT32_C( 223.43), SIMDE_FLOAT32_C( 582.43), SIMDE_FLOAT32_C( 806.86)), simde_mm_set_ps(SIMDE_FLOAT32_C( -409.50), SIMDE_FLOAT32_C( 334.07), SIMDE_FLOAT32_C( -959.71), SIMDE_FLOAT32_C( -395.22)), 0 }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -17.46), SIMDE_FLOAT32_C( 569.91), SIMDE_FLOAT32_C( -620.83), SIMDE_FLOAT32_C( 411.71)), simde_mm_set_ps(SIMDE_FLOAT32_C( 294.19), SIMDE_FLOAT32_C( 545.09), SIMDE_FLOAT32_C( 315.96), SIMDE_FLOAT32_C( -698.39)), 0 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_testc_ps(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_testc_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; int r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 403.40), SIMDE_FLOAT64_C( -277.35)), simde_mm_set_pd(SIMDE_FLOAT64_C( 458.69), SIMDE_FLOAT64_C( -453.43)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -528.69), SIMDE_FLOAT64_C( 40.46)), simde_mm_set_pd(SIMDE_FLOAT64_C( 548.92), SIMDE_FLOAT64_C( -42.14)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -931.64), SIMDE_FLOAT64_C( -909.64)), simde_mm_set_pd(SIMDE_FLOAT64_C( -112.03), SIMDE_FLOAT64_C( 413.67)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -611.37), SIMDE_FLOAT64_C( 85.99)), simde_mm_set_pd(SIMDE_FLOAT64_C( 527.48), SIMDE_FLOAT64_C( -378.25)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -343.28), SIMDE_FLOAT64_C( -471.39)), simde_mm_set_pd(SIMDE_FLOAT64_C( 984.73), SIMDE_FLOAT64_C( 518.21)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 965.66), SIMDE_FLOAT64_C( -647.86)), simde_mm_set_pd(SIMDE_FLOAT64_C( 395.59), SIMDE_FLOAT64_C( 961.89)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 763.28), SIMDE_FLOAT64_C( 421.62)), simde_mm_set_pd(SIMDE_FLOAT64_C( -682.67), SIMDE_FLOAT64_C( -348.51)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -947.12), SIMDE_FLOAT64_C( 147.81)), simde_mm_set_pd(SIMDE_FLOAT64_C( 745.97), SIMDE_FLOAT64_C( -540.47)), 0 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_testc_pd(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm256_testc_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 b; int r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( -169.00), SIMDE_FLOAT32_C( -295.41), SIMDE_FLOAT32_C( 260.09), SIMDE_FLOAT32_C( -617.68), SIMDE_FLOAT32_C( 318.47), SIMDE_FLOAT32_C( -889.00), SIMDE_FLOAT32_C( 991.56), SIMDE_FLOAT32_C( -25.06)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 252.34), SIMDE_FLOAT32_C( -77.52), SIMDE_FLOAT32_C( -724.69), SIMDE_FLOAT32_C( 823.10), SIMDE_FLOAT32_C( -653.61), SIMDE_FLOAT32_C( -673.14), SIMDE_FLOAT32_C( 294.16), SIMDE_FLOAT32_C( 969.47)), 0 }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 269.05), SIMDE_FLOAT32_C( 535.88), SIMDE_FLOAT32_C( 534.89), SIMDE_FLOAT32_C( -793.57), SIMDE_FLOAT32_C( -723.99), SIMDE_FLOAT32_C( -951.14), SIMDE_FLOAT32_C( -834.84), SIMDE_FLOAT32_C( -924.19)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -369.85), SIMDE_FLOAT32_C( -791.09), SIMDE_FLOAT32_C( -28.29), SIMDE_FLOAT32_C( -28.76), SIMDE_FLOAT32_C( 912.84), SIMDE_FLOAT32_C( -660.86), SIMDE_FLOAT32_C( -511.48), SIMDE_FLOAT32_C( -116.65)), 0 }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 491.90), SIMDE_FLOAT32_C( -371.75), SIMDE_FLOAT32_C( -693.29), SIMDE_FLOAT32_C( 554.30), SIMDE_FLOAT32_C( -859.01), SIMDE_FLOAT32_C( -958.17), SIMDE_FLOAT32_C( 272.98), SIMDE_FLOAT32_C( 829.99)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 783.82), SIMDE_FLOAT32_C( -735.12), SIMDE_FLOAT32_C( 265.82), SIMDE_FLOAT32_C( -598.04), SIMDE_FLOAT32_C( -693.48), SIMDE_FLOAT32_C( 798.61), SIMDE_FLOAT32_C( -618.96), SIMDE_FLOAT32_C( 625.43)), 0 }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 321.41), SIMDE_FLOAT32_C( -827.42), SIMDE_FLOAT32_C( -235.24), SIMDE_FLOAT32_C( 914.82), SIMDE_FLOAT32_C( 3.35), SIMDE_FLOAT32_C( -99.95), SIMDE_FLOAT32_C( -932.57), SIMDE_FLOAT32_C( 846.75)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 827.87), SIMDE_FLOAT32_C( 782.35), SIMDE_FLOAT32_C( 364.26), SIMDE_FLOAT32_C( 589.04), SIMDE_FLOAT32_C( 72.17), SIMDE_FLOAT32_C( 906.51), SIMDE_FLOAT32_C( 816.64), SIMDE_FLOAT32_C( 975.90)), 1 }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -175.70), SIMDE_FLOAT32_C( -858.68), SIMDE_FLOAT32_C( 205.72), SIMDE_FLOAT32_C( 623.26), SIMDE_FLOAT32_C( -971.72), SIMDE_FLOAT32_C( -925.85), SIMDE_FLOAT32_C( -832.18), SIMDE_FLOAT32_C( 290.40)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 995.07), SIMDE_FLOAT32_C( 482.43), SIMDE_FLOAT32_C( 148.85), SIMDE_FLOAT32_C( 851.08), SIMDE_FLOAT32_C( -260.21), SIMDE_FLOAT32_C( -679.27), SIMDE_FLOAT32_C( 612.96), SIMDE_FLOAT32_C( -131.17)), 0 }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -930.60), SIMDE_FLOAT32_C( 310.01), SIMDE_FLOAT32_C( 768.42), SIMDE_FLOAT32_C( -620.68), SIMDE_FLOAT32_C( -106.57), SIMDE_FLOAT32_C( -657.44), SIMDE_FLOAT32_C( 384.33), SIMDE_FLOAT32_C( -279.72)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 43.01), SIMDE_FLOAT32_C( -312.85), SIMDE_FLOAT32_C( -161.92), SIMDE_FLOAT32_C( -359.59), SIMDE_FLOAT32_C( -839.05), SIMDE_FLOAT32_C( 39.24), SIMDE_FLOAT32_C( 321.97), SIMDE_FLOAT32_C( 303.19)), 0 }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 58.24), SIMDE_FLOAT32_C( -990.13), SIMDE_FLOAT32_C( 132.06), SIMDE_FLOAT32_C( -797.37), SIMDE_FLOAT32_C( 843.65), SIMDE_FLOAT32_C( -987.25), SIMDE_FLOAT32_C( -376.56), SIMDE_FLOAT32_C( -319.36)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -543.47), SIMDE_FLOAT32_C( -525.74), SIMDE_FLOAT32_C( -613.35), SIMDE_FLOAT32_C( 390.35), SIMDE_FLOAT32_C( -782.19), SIMDE_FLOAT32_C( -13.25), SIMDE_FLOAT32_C( 978.40), SIMDE_FLOAT32_C( 796.97)), 0 }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 904.12), SIMDE_FLOAT32_C( -477.03), SIMDE_FLOAT32_C( -234.30), SIMDE_FLOAT32_C( -407.00), SIMDE_FLOAT32_C( -205.27), SIMDE_FLOAT32_C( -89.25), SIMDE_FLOAT32_C( -245.34), SIMDE_FLOAT32_C( -973.70)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 359.42), SIMDE_FLOAT32_C( 340.47), SIMDE_FLOAT32_C( -928.36), SIMDE_FLOAT32_C( 988.69), SIMDE_FLOAT32_C( 898.92), SIMDE_FLOAT32_C( -682.31), SIMDE_FLOAT32_C( -259.92), SIMDE_FLOAT32_C( 333.26)), 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm256_testc_ps(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm256_testc_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d b; int r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 123.47), SIMDE_FLOAT64_C( 212.54), SIMDE_FLOAT64_C( 522.75), SIMDE_FLOAT64_C( 1.15)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -709.99), SIMDE_FLOAT64_C( 514.03), SIMDE_FLOAT64_C( 845.48), SIMDE_FLOAT64_C( -789.13)), 0 }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 684.95), SIMDE_FLOAT64_C( -284.02), SIMDE_FLOAT64_C( 731.17), SIMDE_FLOAT64_C( -676.64)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -937.45), SIMDE_FLOAT64_C( -128.99), SIMDE_FLOAT64_C( -272.42), SIMDE_FLOAT64_C( 828.88)), 0 }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 792.09), SIMDE_FLOAT64_C( 380.65), SIMDE_FLOAT64_C( -640.40), SIMDE_FLOAT64_C( 320.89)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 952.36), SIMDE_FLOAT64_C( -153.20), SIMDE_FLOAT64_C( -728.44), SIMDE_FLOAT64_C( 534.46)), 0 }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -746.07), SIMDE_FLOAT64_C( -762.31), SIMDE_FLOAT64_C( -109.79), SIMDE_FLOAT64_C( 660.25)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -321.84), SIMDE_FLOAT64_C( 811.70), SIMDE_FLOAT64_C( -839.71), SIMDE_FLOAT64_C( 614.83)), 1 }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 484.63), SIMDE_FLOAT64_C( 471.47), SIMDE_FLOAT64_C( -100.70), SIMDE_FLOAT64_C( 887.09)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -761.94), SIMDE_FLOAT64_C( -263.49), SIMDE_FLOAT64_C( -928.32), SIMDE_FLOAT64_C( -481.21)), 0 }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 563.75), SIMDE_FLOAT64_C( -564.39), SIMDE_FLOAT64_C( 2.49), SIMDE_FLOAT64_C( 514.36)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 928.41), SIMDE_FLOAT64_C( 792.30), SIMDE_FLOAT64_C( -596.24), SIMDE_FLOAT64_C( 365.58)), 0 }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -909.73), SIMDE_FLOAT64_C( 892.46), SIMDE_FLOAT64_C( -678.05), SIMDE_FLOAT64_C( 778.72)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -258.15), SIMDE_FLOAT64_C( 100.48), SIMDE_FLOAT64_C( -77.87), SIMDE_FLOAT64_C( -152.48)), 0 }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -613.30), SIMDE_FLOAT64_C( -567.86), SIMDE_FLOAT64_C( 674.67), SIMDE_FLOAT64_C( -566.07)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -57.83), SIMDE_FLOAT64_C( -183.14), SIMDE_FLOAT64_C( 852.20), SIMDE_FLOAT64_C( -939.00)), 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm256_testc_pd(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm256_testc_si256(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; int r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C( 1590541233), INT32_C( -436989526), INT32_C(-1581572624), INT32_C(-1048507105), INT32_C(-1251227046), INT32_C( -111355701), INT32_C( 463981150), INT32_C(-1310282310)), simde_mm256_set_epi32(INT32_C(-1730174443), INT32_C( 962749992), INT32_C( 1889650969), INT32_C(-1644227432), INT32_C(-1044962626), INT32_C( 2047573026), INT32_C(-1475613534), INT32_C( -143917251)), 0 }, { simde_mm256_set_epi32(INT32_C(-1338083145), INT32_C( 1764771144), INT32_C( -397147050), INT32_C( -289476710), INT32_C( 1323271828), INT32_C( -86062147), INT32_C( -642595378), INT32_C( -876487591)), simde_mm256_set_epi32(INT32_C( -876677989), INT32_C( 1319440399), INT32_C( 561344787), INT32_C( 2014642071), INT32_C( 1196573650), INT32_C( 773018631), INT32_C( 1292104201), INT32_C( 1045703036)), 0 }, { simde_mm256_set_epi32(INT32_C( 283890165), INT32_C( -214227023), INT32_C( 601751308), INT32_C( -649446863), INT32_C( -948918925), INT32_C( 1931664941), INT32_C( -846451204), INT32_C( 1136409049)), simde_mm256_set_epi32(INT32_C( -384402282), INT32_C( -992732365), INT32_C(-1540963980), INT32_C( 244471001), INT32_C( -395648516), INT32_C( 1146402181), INT32_C( -520478107), INT32_C(-1866567951)), 0 }, { simde_mm256_set_epi32(INT32_C(-1798222531), INT32_C(-1196367171), INT32_C( 1622696128), INT32_C( 716668488), INT32_C( 1277881561), INT32_C(-1886059507), INT32_C(-1722396956), INT32_C( 904397943)), simde_mm256_set_epi32(INT32_C( 1590185315), INT32_C(-2054583206), INT32_C( -524141746), INT32_C( 1070740740), INT32_C( 228023403), INT32_C(-1312111237), INT32_C(-1647173119), INT32_C(-1984225652)), 0 }, { simde_mm256_set_epi32(INT32_C( -291109931), INT32_C( 864813403), INT32_C( 1389239783), INT32_C( 1410930820), INT32_C( 876721304), INT32_C( 1356075339), INT32_C( -969519815), INT32_C( 1884376513)), simde_mm256_set_epi32(INT32_C( -348088337), INT32_C( 1648834089), INT32_C( 799153644), INT32_C(-1690149060), INT32_C( -552425726), INT32_C( 889492544), INT32_C( -332273251), INT32_C(-1382843562)), 0 }, { simde_mm256_set_epi32(INT32_C( 1282291341), INT32_C( 1395600177), INT32_C( -618520147), INT32_C( 318386342), INT32_C(-1071446046), INT32_C( 1914859572), INT32_C(-1754705496), INT32_C( -643641727)), simde_mm256_set_epi32(INT32_C( 1620891909), INT32_C(-1744463022), INT32_C( 1083709334), INT32_C( 1908851820), INT32_C(-1141617057), INT32_C(-1138459296), INT32_C( -288617760), INT32_C(-1727368553)), 0 }, { simde_mm256_set_epi32(INT32_C( 1523147892), INT32_C( 1037444310), INT32_C( -23711686), INT32_C(-1269181771), INT32_C( 1945791614), INT32_C( -804519478), INT32_C( -20906646), INT32_C( 1310709876)), simde_mm256_set_epi32(INT32_C( -504237752), INT32_C( 883986365), INT32_C( 1802809300), INT32_C(-1859897822), INT32_C(-1272698163), INT32_C( -143410874), INT32_C( 638495924), INT32_C(-1299515093)), 0 }, { simde_mm256_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( -1)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)), 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm256_testc_si256(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_testz_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; int r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -535.65), SIMDE_FLOAT32_C( -770.48), SIMDE_FLOAT32_C( 566.67), SIMDE_FLOAT32_C( 159.53)), simde_mm_set_ps(SIMDE_FLOAT32_C( 117.32), SIMDE_FLOAT32_C( -915.32), SIMDE_FLOAT32_C( -244.51), SIMDE_FLOAT32_C( 139.82)), 0 }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 688.86), SIMDE_FLOAT32_C( 962.46), SIMDE_FLOAT32_C( 502.62), SIMDE_FLOAT32_C( -759.82)), simde_mm_set_ps(SIMDE_FLOAT32_C( 447.13), SIMDE_FLOAT32_C( 569.82), SIMDE_FLOAT32_C( 813.87), SIMDE_FLOAT32_C( -41.23)), 0 }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 717.58), SIMDE_FLOAT32_C( -388.95), SIMDE_FLOAT32_C( 405.99), SIMDE_FLOAT32_C( -505.21)), simde_mm_set_ps(SIMDE_FLOAT32_C( 651.17), SIMDE_FLOAT32_C( 43.00), SIMDE_FLOAT32_C( -865.65), SIMDE_FLOAT32_C( 116.25)), 1 }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 748.82), SIMDE_FLOAT32_C( -519.63), SIMDE_FLOAT32_C( 940.52), SIMDE_FLOAT32_C( -776.61)), simde_mm_set_ps(SIMDE_FLOAT32_C( -86.61), SIMDE_FLOAT32_C( 528.66), SIMDE_FLOAT32_C( 315.29), SIMDE_FLOAT32_C( -836.37)), 0 }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -930.47), SIMDE_FLOAT32_C( 385.27), SIMDE_FLOAT32_C( 328.25), SIMDE_FLOAT32_C( -891.42)), simde_mm_set_ps(SIMDE_FLOAT32_C( 769.90), SIMDE_FLOAT32_C( 203.87), SIMDE_FLOAT32_C( 70.77), SIMDE_FLOAT32_C( 153.64)), 1 }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 110.09), SIMDE_FLOAT32_C( -474.31), SIMDE_FLOAT32_C( -205.82), SIMDE_FLOAT32_C( -912.60)), simde_mm_set_ps(SIMDE_FLOAT32_C( 690.10), SIMDE_FLOAT32_C( -177.50), SIMDE_FLOAT32_C( 69.56), SIMDE_FLOAT32_C( -722.94)), 0 }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -854.45), SIMDE_FLOAT32_C( -233.74), SIMDE_FLOAT32_C( 792.75), SIMDE_FLOAT32_C( 911.93)), simde_mm_set_ps(SIMDE_FLOAT32_C( 835.88), SIMDE_FLOAT32_C( -477.16), SIMDE_FLOAT32_C( 481.40), SIMDE_FLOAT32_C( -325.48)), 0 }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 349.66), SIMDE_FLOAT32_C( 852.75), SIMDE_FLOAT32_C( 630.50), SIMDE_FLOAT32_C( 599.35)), simde_mm_set_ps(SIMDE_FLOAT32_C( -12.39), SIMDE_FLOAT32_C( 669.65), SIMDE_FLOAT32_C( 19.88), SIMDE_FLOAT32_C( -104.79)), 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_testz_ps(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_testz_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; int r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -133.70), SIMDE_FLOAT64_C( -364.47)), simde_mm_set_pd(SIMDE_FLOAT64_C( 299.27), SIMDE_FLOAT64_C( 706.73)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 411.42), SIMDE_FLOAT64_C( -916.18)), simde_mm_set_pd(SIMDE_FLOAT64_C( 443.76), SIMDE_FLOAT64_C( 616.70)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 177.79), SIMDE_FLOAT64_C( -562.30)), simde_mm_set_pd(SIMDE_FLOAT64_C( 491.38), SIMDE_FLOAT64_C( 437.67)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -601.90), SIMDE_FLOAT64_C( 130.89)), simde_mm_set_pd(SIMDE_FLOAT64_C( -297.66), SIMDE_FLOAT64_C( -243.36)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -461.95), SIMDE_FLOAT64_C( -708.04)), simde_mm_set_pd(SIMDE_FLOAT64_C( -179.91), SIMDE_FLOAT64_C( 436.91)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -477.48), SIMDE_FLOAT64_C( 546.05)), simde_mm_set_pd(SIMDE_FLOAT64_C( -804.65), SIMDE_FLOAT64_C( 660.18)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 366.33), SIMDE_FLOAT64_C( -393.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( 758.77), SIMDE_FLOAT64_C( -413.77)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -605.04), SIMDE_FLOAT64_C( 186.44)), simde_mm_set_pd(SIMDE_FLOAT64_C( -113.05), SIMDE_FLOAT64_C( 709.60)), 0 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_testz_pd(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm256_testz_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 b; int r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 203.89), SIMDE_FLOAT32_C( 929.87), SIMDE_FLOAT32_C( -921.04), SIMDE_FLOAT32_C( -927.33), SIMDE_FLOAT32_C( 876.23), SIMDE_FLOAT32_C( 583.50), SIMDE_FLOAT32_C( 560.83), SIMDE_FLOAT32_C( -996.47)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 41.17), SIMDE_FLOAT32_C( 356.76), SIMDE_FLOAT32_C( -537.40), SIMDE_FLOAT32_C( -959.48), SIMDE_FLOAT32_C( -224.29), SIMDE_FLOAT32_C( -28.33), SIMDE_FLOAT32_C( -153.96), SIMDE_FLOAT32_C( -377.38)), 0 }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 417.43), SIMDE_FLOAT32_C( -235.27), SIMDE_FLOAT32_C( -869.92), SIMDE_FLOAT32_C( -107.51), SIMDE_FLOAT32_C( 353.07), SIMDE_FLOAT32_C( 989.26), SIMDE_FLOAT32_C( 19.42), SIMDE_FLOAT32_C( 737.36)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -799.55), SIMDE_FLOAT32_C( -863.03), SIMDE_FLOAT32_C( 787.36), SIMDE_FLOAT32_C( 884.07), SIMDE_FLOAT32_C( -646.88), SIMDE_FLOAT32_C( 348.23), SIMDE_FLOAT32_C( -19.97), SIMDE_FLOAT32_C( 231.76)), 0 }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -434.95), SIMDE_FLOAT32_C( 367.82), SIMDE_FLOAT32_C( -198.30), SIMDE_FLOAT32_C( 569.25), SIMDE_FLOAT32_C( 37.80), SIMDE_FLOAT32_C( 656.68), SIMDE_FLOAT32_C( -154.19), SIMDE_FLOAT32_C( -268.68)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 679.38), SIMDE_FLOAT32_C( 416.08), SIMDE_FLOAT32_C( 591.38), SIMDE_FLOAT32_C( -410.17), SIMDE_FLOAT32_C( -434.20), SIMDE_FLOAT32_C( -656.84), SIMDE_FLOAT32_C( -369.35), SIMDE_FLOAT32_C( -216.32)), 0 }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -37.47), SIMDE_FLOAT32_C( -40.21), SIMDE_FLOAT32_C( 5.31), SIMDE_FLOAT32_C( 651.03), SIMDE_FLOAT32_C( 91.95), SIMDE_FLOAT32_C( 136.83), SIMDE_FLOAT32_C( 215.58), SIMDE_FLOAT32_C( 976.43)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 791.68), SIMDE_FLOAT32_C( -97.52), SIMDE_FLOAT32_C( 49.28), SIMDE_FLOAT32_C( -47.67), SIMDE_FLOAT32_C( -520.82), SIMDE_FLOAT32_C( 775.18), SIMDE_FLOAT32_C( -311.24), SIMDE_FLOAT32_C( 87.06)), 0 }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 974.54), SIMDE_FLOAT32_C( -572.67), SIMDE_FLOAT32_C( 228.68), SIMDE_FLOAT32_C( -268.11), SIMDE_FLOAT32_C( 83.97), SIMDE_FLOAT32_C( -607.98), SIMDE_FLOAT32_C( 317.68), SIMDE_FLOAT32_C( -118.11)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -579.32), SIMDE_FLOAT32_C( -684.32), SIMDE_FLOAT32_C( -14.94), SIMDE_FLOAT32_C( 840.53), SIMDE_FLOAT32_C( -875.18), SIMDE_FLOAT32_C( -264.19), SIMDE_FLOAT32_C( -675.61), SIMDE_FLOAT32_C( 236.61)), 0 }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 268.13), SIMDE_FLOAT32_C( 524.74), SIMDE_FLOAT32_C( -846.94), SIMDE_FLOAT32_C( 539.16), SIMDE_FLOAT32_C( -824.88), SIMDE_FLOAT32_C( 966.22), SIMDE_FLOAT32_C( -319.64), SIMDE_FLOAT32_C( 463.07)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -484.03), SIMDE_FLOAT32_C( -785.67), SIMDE_FLOAT32_C( 818.22), SIMDE_FLOAT32_C( 722.83), SIMDE_FLOAT32_C( -933.76), SIMDE_FLOAT32_C( -804.32), SIMDE_FLOAT32_C( -18.04), SIMDE_FLOAT32_C( 790.55)), 0 }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 496.61), SIMDE_FLOAT32_C( 895.69), SIMDE_FLOAT32_C( 83.61), SIMDE_FLOAT32_C( -625.29), SIMDE_FLOAT32_C( -963.29), SIMDE_FLOAT32_C( -202.04), SIMDE_FLOAT32_C( -184.94), SIMDE_FLOAT32_C( 102.69)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -121.97), SIMDE_FLOAT32_C( -483.26), SIMDE_FLOAT32_C( -796.68), SIMDE_FLOAT32_C( 593.94), SIMDE_FLOAT32_C( -642.73), SIMDE_FLOAT32_C( -850.47), SIMDE_FLOAT32_C( -793.37), SIMDE_FLOAT32_C( -202.72)), 0 }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 824.10), SIMDE_FLOAT32_C( -794.74), SIMDE_FLOAT32_C( -876.82), SIMDE_FLOAT32_C( 50.96), SIMDE_FLOAT32_C( -281.18), SIMDE_FLOAT32_C( -527.70), SIMDE_FLOAT32_C( -453.71), SIMDE_FLOAT32_C( -588.71)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 755.45), SIMDE_FLOAT32_C( -252.04), SIMDE_FLOAT32_C( -915.68), SIMDE_FLOAT32_C( -54.75), SIMDE_FLOAT32_C( -63.75), SIMDE_FLOAT32_C( 413.61), SIMDE_FLOAT32_C( -347.26), SIMDE_FLOAT32_C( 540.31)), 0 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm256_testz_ps(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm256_testz_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d b; int r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 374.19), SIMDE_FLOAT64_C( -934.66), SIMDE_FLOAT64_C( 991.69), SIMDE_FLOAT64_C( 768.86)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 772.38), SIMDE_FLOAT64_C( 118.89), SIMDE_FLOAT64_C( -913.18), SIMDE_FLOAT64_C( 220.47)), 1 }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -659.01), SIMDE_FLOAT64_C( -495.78), SIMDE_FLOAT64_C( 343.83), SIMDE_FLOAT64_C( -984.74)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 485.80), SIMDE_FLOAT64_C( 393.82), SIMDE_FLOAT64_C( -663.76), SIMDE_FLOAT64_C( -48.15)), 0 }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 49.22), SIMDE_FLOAT64_C( -581.06), SIMDE_FLOAT64_C( 568.03), SIMDE_FLOAT64_C( -985.31)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -336.00), SIMDE_FLOAT64_C( -139.33), SIMDE_FLOAT64_C( 617.24), SIMDE_FLOAT64_C( 953.30)), 0 }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -274.93), SIMDE_FLOAT64_C( -900.75), SIMDE_FLOAT64_C( -102.71), SIMDE_FLOAT64_C( -472.84)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -901.78), SIMDE_FLOAT64_C( 12.36), SIMDE_FLOAT64_C( 689.29), SIMDE_FLOAT64_C( -976.30)), 0 }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 995.09), SIMDE_FLOAT64_C( -313.13), SIMDE_FLOAT64_C( -440.17), SIMDE_FLOAT64_C( 189.57)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 355.51), SIMDE_FLOAT64_C( -932.21), SIMDE_FLOAT64_C( -616.46), SIMDE_FLOAT64_C( -552.77)), 0 }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -528.18), SIMDE_FLOAT64_C( 45.66), SIMDE_FLOAT64_C( 363.59), SIMDE_FLOAT64_C( 611.34)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -921.78), SIMDE_FLOAT64_C( 818.71), SIMDE_FLOAT64_C( -177.51), SIMDE_FLOAT64_C( 690.85)), 0 }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -323.41), SIMDE_FLOAT64_C( -699.06), SIMDE_FLOAT64_C( -250.77), SIMDE_FLOAT64_C( 136.95)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 793.97), SIMDE_FLOAT64_C( -124.81), SIMDE_FLOAT64_C( -222.21), SIMDE_FLOAT64_C( 0.47)), 0 }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 499.90), SIMDE_FLOAT64_C( 53.18), SIMDE_FLOAT64_C( 122.29), SIMDE_FLOAT64_C( -348.92)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 656.41), SIMDE_FLOAT64_C( 401.79), SIMDE_FLOAT64_C( 913.30), SIMDE_FLOAT64_C( 939.03)), 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm256_testz_pd(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm256_testz_si256(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; int r; } test_vec[8] = { { simde_mm256_set_epi64x(INT64_C( 6293711937966483210), INT64_C( 1880458700636896550), INT64_C(-2395812271494697349), INT64_C(-3219984426865676065)), simde_mm256_set_epi64x(INT64_C(-5944145716236985819), INT64_C( 3393778583556144207), INT64_C(-7902030010445655740), INT64_C( 1343831900549571141)), 0 }, { simde_mm256_set_epi64x(INT64_C(-4587433109256837328), INT64_C( 4600839100559586303), INT64_C(-4228750003309626741), INT64_C( 8478339671631213897)), simde_mm256_set_epi64x(INT64_C(-2398862596123305272), INT64_C( -195553556197471185), INT64_C(-1163267556607256299), INT64_C( 8724663478814299088)), 0 }, { simde_mm256_set_epi64x(INT64_C( 6318565256204443110), INT64_C( 8197829483289572776), INT64_C(-5241286122396602839), INT64_C( 8034039929823396869)), simde_mm256_set_epi64x(INT64_C( 2316518055936499365), INT64_C(-3827103871459261221), INT64_C(-1074153741299789825), INT64_C(-5706377024354090462)), 0 }, { simde_mm256_set_epi64x(INT64_C( 6337382312890404250), INT64_C( 1040004396151504333), INT64_C(-6858337698693557354), INT64_C(-3290810792006167916)), simde_mm256_set_epi64x(INT64_C(-5980280665599952377), INT64_C(-4826455948616871632), INT64_C( 3955163730046162798), INT64_C(-5561662165703631134)), 0 }, { simde_mm256_set_epi64x(INT64_C(-3638435057324933583), INT64_C(-9111798730377633063), INT64_C(-2892856737256268180), INT64_C( 4241350171537373665)), simde_mm256_set_epi64x(INT64_C(-1517017303777947826), INT64_C( 1106019512787868766), INT64_C(-4154459162475494220), INT64_C( 2458562407439632505)), 0 }, { simde_mm256_set_epi64x(INT64_C(-8613849652924649480), INT64_C( 4502213447815150777), INT64_C( 964826197151294912), INT64_C(-2062422363128377394)), simde_mm256_set_epi64x(INT64_C( 420430988932892588), INT64_C( 7089052628145876495), INT64_C( 964826197151294912), INT64_C(-2062422363128377394)), 0 }, { simde_mm256_set_epi64x(INT64_C( 2313467387214959309), INT64_C( 1954089676203891706), INT64_C(-5992642054331042599), INT64_C( 2987244174038246250)), simde_mm256_set_epi64x(~INT64_C( 2313467387214959309), ~INT64_C( 1954089676203891706), ~INT64_C(-5992642054331042599), ~INT64_C( 2987244174038246250)), 1 }, { simde_mm256_set_epi64x(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0)), 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm256_testz_si256(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_testnzc_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; int r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -677.80), SIMDE_FLOAT32_C( 923.51), SIMDE_FLOAT32_C( 12.34), SIMDE_FLOAT32_C( 570.37)), simde_mm_set_ps(SIMDE_FLOAT32_C( -986.13), SIMDE_FLOAT32_C( 240.32), SIMDE_FLOAT32_C( 591.49), SIMDE_FLOAT32_C( 161.31)), 0 }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -901.76), SIMDE_FLOAT32_C( -346.47), SIMDE_FLOAT32_C( 361.48), SIMDE_FLOAT32_C( 579.94)), simde_mm_set_ps(SIMDE_FLOAT32_C( 37.29), SIMDE_FLOAT32_C( 138.07), SIMDE_FLOAT32_C( -20.49), SIMDE_FLOAT32_C( 183.91)), 0 }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -126.05), SIMDE_FLOAT32_C( -737.54), SIMDE_FLOAT32_C( 797.63), SIMDE_FLOAT32_C( -91.26)), simde_mm_set_ps(SIMDE_FLOAT32_C( 622.90), SIMDE_FLOAT32_C( 921.88), SIMDE_FLOAT32_C( -199.46), SIMDE_FLOAT32_C( -960.51)), 1 }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -724.79), SIMDE_FLOAT32_C( 401.73), SIMDE_FLOAT32_C( 459.33), SIMDE_FLOAT32_C( -257.11)), simde_mm_set_ps(SIMDE_FLOAT32_C( -483.83), SIMDE_FLOAT32_C( -357.19), SIMDE_FLOAT32_C( 660.48), SIMDE_FLOAT32_C( -967.49)), 1 }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 987.08), SIMDE_FLOAT32_C( -85.77), SIMDE_FLOAT32_C( 750.67), SIMDE_FLOAT32_C( -384.35)), simde_mm_set_ps(SIMDE_FLOAT32_C( -846.85), SIMDE_FLOAT32_C( 171.98), SIMDE_FLOAT32_C( 38.30), SIMDE_FLOAT32_C( -999.02)), 1 }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -535.41), SIMDE_FLOAT32_C( 674.36), SIMDE_FLOAT32_C( 853.75), SIMDE_FLOAT32_C( 423.18)), simde_mm_set_ps(SIMDE_FLOAT32_C( 436.68), SIMDE_FLOAT32_C( -556.22), SIMDE_FLOAT32_C( -733.91), SIMDE_FLOAT32_C( -508.00)), 0 }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 87.42), SIMDE_FLOAT32_C( -99.53), SIMDE_FLOAT32_C( -449.18), SIMDE_FLOAT32_C( 694.82)), simde_mm_set_ps(SIMDE_FLOAT32_C( -853.63), SIMDE_FLOAT32_C( -112.00), SIMDE_FLOAT32_C( 87.42), SIMDE_FLOAT32_C( -97.80)), 1 }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 342.14), SIMDE_FLOAT32_C( 347.96), SIMDE_FLOAT32_C( -638.14), SIMDE_FLOAT32_C( -357.36)), simde_mm_set_ps(SIMDE_FLOAT32_C( 103.59), SIMDE_FLOAT32_C( 685.51), SIMDE_FLOAT32_C( 13.48), SIMDE_FLOAT32_C( 108.92)), 0 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_testnzc_ps(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_testnzc_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; int r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -120.11), SIMDE_FLOAT64_C( 530.28)), simde_mm_set_pd(SIMDE_FLOAT64_C( -886.61), SIMDE_FLOAT64_C( 297.97)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 660.15), SIMDE_FLOAT64_C( 462.46)), simde_mm_set_pd(SIMDE_FLOAT64_C( 753.92), SIMDE_FLOAT64_C( -475.11)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 195.21), SIMDE_FLOAT64_C( 577.36)), simde_mm_set_pd(SIMDE_FLOAT64_C( 977.83), SIMDE_FLOAT64_C( 562.50)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 184.20), SIMDE_FLOAT64_C( -531.42)), simde_mm_set_pd(SIMDE_FLOAT64_C( -597.14), SIMDE_FLOAT64_C( 63.31)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 591.68), SIMDE_FLOAT64_C( -753.83)), simde_mm_set_pd(SIMDE_FLOAT64_C( 42.69), SIMDE_FLOAT64_C( -626.35)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 799.46), SIMDE_FLOAT64_C( 415.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( -755.76), SIMDE_FLOAT64_C( -637.19)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 708.86), SIMDE_FLOAT64_C( -480.39)), simde_mm_set_pd(SIMDE_FLOAT64_C( 417.06), SIMDE_FLOAT64_C( -687.07)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -48.08), SIMDE_FLOAT64_C( 617.37)), simde_mm_set_pd(SIMDE_FLOAT64_C( -937.79), SIMDE_FLOAT64_C( -565.45)), 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_testnzc_pd(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm256_testnzc_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 b; int r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 583.89), SIMDE_FLOAT32_C( -712.24), SIMDE_FLOAT32_C( -125.89), SIMDE_FLOAT32_C( 188.79), SIMDE_FLOAT32_C( 520.73), SIMDE_FLOAT32_C( -68.12), SIMDE_FLOAT32_C( 822.52), SIMDE_FLOAT32_C( -595.06)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 671.97), SIMDE_FLOAT32_C( 390.57), SIMDE_FLOAT32_C( -318.49), SIMDE_FLOAT32_C( -885.66), SIMDE_FLOAT32_C( -314.30), SIMDE_FLOAT32_C( -285.04), SIMDE_FLOAT32_C( -162.81), SIMDE_FLOAT32_C( -410.54)), 1 }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -820.54), SIMDE_FLOAT32_C( -790.46), SIMDE_FLOAT32_C( 711.91), SIMDE_FLOAT32_C( 907.30), SIMDE_FLOAT32_C( -112.02), SIMDE_FLOAT32_C( 599.13), SIMDE_FLOAT32_C( 409.13), SIMDE_FLOAT32_C( -352.81)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 176.49), SIMDE_FLOAT32_C( 775.58), SIMDE_FLOAT32_C( -44.34), SIMDE_FLOAT32_C( -968.49), SIMDE_FLOAT32_C( 67.85), SIMDE_FLOAT32_C( 437.32), SIMDE_FLOAT32_C( -839.45), SIMDE_FLOAT32_C( -726.44)), 1 }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 967.58), SIMDE_FLOAT32_C( -926.13), SIMDE_FLOAT32_C( -667.55), SIMDE_FLOAT32_C( 983.87), SIMDE_FLOAT32_C( 566.68), SIMDE_FLOAT32_C( 720.39), SIMDE_FLOAT32_C( 81.27), SIMDE_FLOAT32_C( -180.83)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -556.40), SIMDE_FLOAT32_C( 58.13), SIMDE_FLOAT32_C( -255.30), SIMDE_FLOAT32_C( -733.19), SIMDE_FLOAT32_C( 141.91), SIMDE_FLOAT32_C( 83.86), SIMDE_FLOAT32_C( 265.25), SIMDE_FLOAT32_C( 380.17)), 1 }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -290.71), SIMDE_FLOAT32_C( -838.97), SIMDE_FLOAT32_C( -21.50), SIMDE_FLOAT32_C( 222.89), SIMDE_FLOAT32_C( 710.43), SIMDE_FLOAT32_C( -683.80), SIMDE_FLOAT32_C( -751.33), SIMDE_FLOAT32_C( 356.34)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 328.43), SIMDE_FLOAT32_C( 897.92), SIMDE_FLOAT32_C( -115.62), SIMDE_FLOAT32_C( 410.36), SIMDE_FLOAT32_C( 613.67), SIMDE_FLOAT32_C( -980.39), SIMDE_FLOAT32_C( 791.41), SIMDE_FLOAT32_C( 271.47)), 0 }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -54.57), SIMDE_FLOAT32_C( 595.88), SIMDE_FLOAT32_C( 92.56), SIMDE_FLOAT32_C( -69.35), SIMDE_FLOAT32_C( 525.25), SIMDE_FLOAT32_C( 150.31), SIMDE_FLOAT32_C( 507.37), SIMDE_FLOAT32_C( 171.79)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -147.77), SIMDE_FLOAT32_C( 635.99), SIMDE_FLOAT32_C( 922.13), SIMDE_FLOAT32_C( -150.43), SIMDE_FLOAT32_C( -599.09), SIMDE_FLOAT32_C( 969.81), SIMDE_FLOAT32_C( -52.12), SIMDE_FLOAT32_C( 931.26)), 1 }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -925.77), SIMDE_FLOAT32_C( -819.13), SIMDE_FLOAT32_C( -387.03), SIMDE_FLOAT32_C( 369.31), SIMDE_FLOAT32_C( 816.32), SIMDE_FLOAT32_C( -110.60), SIMDE_FLOAT32_C( 155.71), SIMDE_FLOAT32_C( -467.73)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -249.07), SIMDE_FLOAT32_C( 262.13), SIMDE_FLOAT32_C( -955.31), SIMDE_FLOAT32_C( -680.63), SIMDE_FLOAT32_C( -662.36), SIMDE_FLOAT32_C( 38.93), SIMDE_FLOAT32_C( 136.68), SIMDE_FLOAT32_C( 432.80)), 1 }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -753.72), SIMDE_FLOAT32_C( 933.35), SIMDE_FLOAT32_C( 780.23), SIMDE_FLOAT32_C( 299.81), SIMDE_FLOAT32_C( -790.25), SIMDE_FLOAT32_C( 868.49), SIMDE_FLOAT32_C( -966.55), SIMDE_FLOAT32_C( -856.58)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 584.14), SIMDE_FLOAT32_C( -395.66), SIMDE_FLOAT32_C( -690.53), SIMDE_FLOAT32_C( -230.79), SIMDE_FLOAT32_C( -409.16), SIMDE_FLOAT32_C( -954.27), SIMDE_FLOAT32_C( -286.31), SIMDE_FLOAT32_C( -72.81)), 1 }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -458.63), SIMDE_FLOAT32_C( -211.79), SIMDE_FLOAT32_C( 179.21), SIMDE_FLOAT32_C( -282.23), SIMDE_FLOAT32_C( 901.33), SIMDE_FLOAT32_C( 545.46), SIMDE_FLOAT32_C( 300.44), SIMDE_FLOAT32_C( 545.25)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 816.92), SIMDE_FLOAT32_C( -195.51), SIMDE_FLOAT32_C( -422.20), SIMDE_FLOAT32_C( 587.24), SIMDE_FLOAT32_C( -85.60), SIMDE_FLOAT32_C( 249.83), SIMDE_FLOAT32_C( -348.91), SIMDE_FLOAT32_C( 259.84)), 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm256_testnzc_ps(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm256_testnzc_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d b; int r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -740.05), SIMDE_FLOAT64_C( -803.89), SIMDE_FLOAT64_C( -738.69), SIMDE_FLOAT64_C( -907.97)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -601.20), SIMDE_FLOAT64_C( 873.56), SIMDE_FLOAT64_C( -427.28), SIMDE_FLOAT64_C( -539.59)), 0 }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -679.84), SIMDE_FLOAT64_C( 334.20), SIMDE_FLOAT64_C( 374.46), SIMDE_FLOAT64_C( -17.90)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 933.19), SIMDE_FLOAT64_C( 255.92), SIMDE_FLOAT64_C( -527.33), SIMDE_FLOAT64_C( 651.28)), 0 }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -960.47), SIMDE_FLOAT64_C( 47.50), SIMDE_FLOAT64_C( 839.01), SIMDE_FLOAT64_C( -388.45)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -230.23), SIMDE_FLOAT64_C( -286.70), SIMDE_FLOAT64_C( -578.79), SIMDE_FLOAT64_C( 287.52)), 1 }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -128.67), SIMDE_FLOAT64_C( -282.41), SIMDE_FLOAT64_C( -741.53), SIMDE_FLOAT64_C( 405.60)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -927.14), SIMDE_FLOAT64_C( -105.58), SIMDE_FLOAT64_C( -674.42), SIMDE_FLOAT64_C( -434.93)), 1 }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 469.51), SIMDE_FLOAT64_C( -726.27), SIMDE_FLOAT64_C( -57.54), SIMDE_FLOAT64_C( 10.85)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -629.68), SIMDE_FLOAT64_C( 193.64), SIMDE_FLOAT64_C( -188.44), SIMDE_FLOAT64_C( -942.28)), 1 }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 717.44), SIMDE_FLOAT64_C( -428.23), SIMDE_FLOAT64_C( -903.34), SIMDE_FLOAT64_C( 963.44)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -806.29), SIMDE_FLOAT64_C( 928.14), SIMDE_FLOAT64_C( -419.31), SIMDE_FLOAT64_C( -536.05)), 1 }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -682.64), SIMDE_FLOAT64_C( 364.27), SIMDE_FLOAT64_C( -11.12), SIMDE_FLOAT64_C( 923.42)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -496.96), SIMDE_FLOAT64_C( -698.68), SIMDE_FLOAT64_C( 762.99), SIMDE_FLOAT64_C( 104.59)), 1 }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -867.55), SIMDE_FLOAT64_C( 263.90), SIMDE_FLOAT64_C( -169.35), SIMDE_FLOAT64_C( 237.91)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 768.02), SIMDE_FLOAT64_C( 326.08), SIMDE_FLOAT64_C( 577.75), SIMDE_FLOAT64_C( -405.14)), 0 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm256_testnzc_pd(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm256_testnzc_si256(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; int r; } test_vec[8] = { { simde_mm256_set_epi64x(INT64_C(-6804708873655136040), INT64_C( 4446918229480945172), INT64_C(-6458803806102185271), INT64_C( 6419639704555297719)), simde_mm256_set_epi64x(INT64_C( 4086527184939990173), INT64_C(-4592254743728630867), INT64_C( 4970594815150486048), INT64_C(-7583682924010349946)), 1 }, { simde_mm256_set_epi64x(INT64_C(-1928057325376684019), INT64_C(-8724716645086732256), INT64_C(-3860992505389240967), INT64_C( 2464414912339664108)), simde_mm256_set_epi64x(INT64_C( 2743122637609340204), INT64_C( 1297961604072261704), INT64_C( 3905148821277274727), INT64_C( 7711768841031320482)), 1 }, { simde_mm256_set_epi64x(INT64_C(-1757300674109662463), INT64_C( 3540874093748815164), INT64_C( 6199345139492343278), INT64_C(-2392213781376855007)), simde_mm256_set_epi64x(INT64_C( 3074780231229279065), INT64_C( 3565435212917289013), INT64_C( 4586940771077894472), INT64_C(-7584245251433225890)), 1 }, { simde_mm256_set_epi64x(INT64_C( 494161565528569426), INT64_C( 6322507550162055397), INT64_C(-5323201274204502385), INT64_C(-1289213418743081892)), simde_mm256_set_epi64x(INT64_C(-6476107628412075124), INT64_C(-6021204385531569231), INT64_C(-1583794509252285729), INT64_C( 3089826828243401077)), 1 }, { simde_mm256_set_epi64x(INT64_C( 8385406147133094169), INT64_C( 8814670790512562044), INT64_C(-3816462967170746071), INT64_C( 6122654749309721394)), simde_mm256_set_epi64x(INT64_C( 3128481605987261169), INT64_C( 5408589980237811609), INT64_C( 884520455099049673), INT64_C( 8133398980467634343)), 1 }, { simde_mm256_set_epi64x(INT64_C(-8329161804206964235), INT64_C( 636927422382767873), INT64_C( 3009146061842021624), INT64_C(-1851032033415757843)), simde_mm256_set_epi64x(INT64_C( 6472691381239458493), INT64_C( -333790812247230429), INT64_C(-8827165560999629213), INT64_C(-6808896659071721867)), 1 }, { simde_mm256_set_epi64x(INT64_C( 6104921182164936438), INT64_C(-1838247589228581946), INT64_C( 4047419838992777892), INT64_C(-7001360392396553117)), simde_mm256_set_epi64x(INT64_C( 5238813195851712113), INT64_C( -198251833482699615), INT64_C(-2396015894110422309), INT64_C(-6041072787160554283)), 1 }, { simde_mm256_set_epi64x(INT64_C( -1), INT64_C( -1), INT64_C( -1), INT64_C( -1)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0)), 0 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm256_testnzc_si256(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm256_set_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_set_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_set_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_set_epi64x) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_set_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_set_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_set_m128) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_set_m128i) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_set_m128d) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_set1_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_set1_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_set1_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_set1_epi64x) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_set1_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_set1_pd) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm256_deinterleaveeven_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm256_deinterleaveodd_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm256_deinterleaveeven_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm256_deinterleaveodd_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm256_deinterleaveeven_ps) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm256_deinterleaveodd_ps) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm256_deinterleaveeven_pd) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm256_deinterleaveodd_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_add_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_add_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_addsub_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_addsub_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_and_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_and_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_andnot_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_andnot_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_blend_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_blend_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_blendv_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_blendv_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_broadcast_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_broadcast_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_broadcast_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_broadcast_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_broadcast_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_castpd128_pd256) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_castpd256_pd128) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_castps_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_castpd_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_castps128_ps256) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_castps256_ps128) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_castsi128_si256) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_castsi256_si128) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_castps_si256) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_castpd_si256) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_castsi256_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_castsi256_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_ceil_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_ceil_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmp_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmp_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmp_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmp_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cmp_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cmp_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cvtepi32_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cvtepi32_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cvtpd_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cvtpd_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cvtps_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cvtps_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cvttpd_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cvttps_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cvtsd_f64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cvtsi256_si32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cvtss_f32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_div_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_div_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_dp_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_extract_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_extract_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_extractf128_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_extractf128_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_extractf128_si256) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_floor_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_floor_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_hadd_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_hadd_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_hsub_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_hsub_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_insert_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_insert_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_insert_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_insert_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_insertf128_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_insertf128_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_insertf128_si256) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_lddqu_si256) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_load_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_load_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_load_si256) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_loadu_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_loadu_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_loadu_si256) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_loadu2_m128) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_loadu2_m128d) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_loadu2_m128i) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskload_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskload_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskload_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskload_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskstore_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskstore_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskstore_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskstore_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_min_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_min_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_max_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_max_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_movedup_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_movehdup_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_moveldup_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_movemask_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_movemask_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mul_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mul_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_or_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_or_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_permute_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_permute_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_permute_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_permute_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_permutevar_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_permutevar_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_permutevar_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_permutevar_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_permute2f128_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_rcp_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_round_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_round_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_rsqrt_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_setr_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_setr_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_setr_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_setr_epi64x) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_setr_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_setr_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_setr_m128) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_setr_m128d) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_setr_m128i) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_setzero_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_setzero_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_setzero_si256) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_shuffle_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_shuffle_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_sqrt_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_sqrt_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_store_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_store_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_store_si256) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_storeu_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_storeu_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_storeu_si256) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_storeu2_m128) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_storeu2_m128d) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_storeu2_m128i) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_stream_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_stream_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_stream_si256) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_sub_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_sub_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_testc_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_testc_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_testc_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_testc_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_testc_si256) SIMDE_TEST_FUNC_LIST_ENTRY(mm_testz_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_testz_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_testz_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_testz_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_testz_si256) SIMDE_TEST_FUNC_LIST_ENTRY(mm_testnzc_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_testnzc_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_testnzc_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_testnzc_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_testnzc_si256) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_undefined_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_undefined_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_undefined_si256) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_unpackhi_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_unpackhi_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_unpacklo_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_unpacklo_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_xor_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_xor_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_zextps128_ps256) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_zextpd128_pd256) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_zextsi128_si256) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx2.c000066400000000000000000052751461400333146700152760ustar00rootroot00000000000000/* Copyright (c) 2018, 2019 Evan Nemerson * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define SIMDE_TESTS_CURRENT_ISAX avx2 #include #include static int test_simde_mm256_abs_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi8(INT8_C( -27), INT8_C( 88), INT8_C(-122), INT8_C( -6), INT8_C( -23), INT8_C( 108), INT8_C(-103), INT8_C( 32), INT8_C( 43), INT8_C( 116), INT8_C( -6), INT8_C( -98), INT8_C( -62), INT8_C( -87), INT8_C( 90), INT8_C( 82), INT8_C( 86), INT8_C( 8), INT8_C(-126), INT8_C( -22), INT8_C( -80), INT8_C(-125), INT8_C( -5), INT8_C(-101), INT8_C( 36), INT8_C( 114), INT8_C( -51), INT8_C( 59), INT8_C( -97), INT8_C( 124), INT8_C( 25), INT8_C( 90)), simde_mm256_set_epi8(INT8_C( 27), INT8_C( 88), INT8_C( 122), INT8_C( 6), INT8_C( 23), INT8_C( 108), INT8_C( 103), INT8_C( 32), INT8_C( 43), INT8_C( 116), INT8_C( 6), INT8_C( 98), INT8_C( 62), INT8_C( 87), INT8_C( 90), INT8_C( 82), INT8_C( 86), INT8_C( 8), INT8_C( 126), INT8_C( 22), INT8_C( 80), INT8_C( 125), INT8_C( 5), INT8_C( 101), INT8_C( 36), INT8_C( 114), INT8_C( 51), INT8_C( 59), INT8_C( 97), INT8_C( 124), INT8_C( 25), INT8_C( 90)) }, { simde_mm256_set_epi8(INT8_C( 111), INT8_C( 46), INT8_C( -44), INT8_C( 36), INT8_C( -79), INT8_C( 101), INT8_C( 0), INT8_C( 2), INT8_C( -69), INT8_C( 31), INT8_C( -68), INT8_C( -82), INT8_C( -45), INT8_C( 120), INT8_C( 39), INT8_C( 46), INT8_C( 66), INT8_C( 30), INT8_C(-106), INT8_C( 118), INT8_C( 61), INT8_C( 98), INT8_C( -61), INT8_C( 98), INT8_C( 49), INT8_C( -12), INT8_C(-117), INT8_C(-115), INT8_C( 63), INT8_C( -92), INT8_C(-102), INT8_C(-110)), simde_mm256_set_epi8(INT8_C( 111), INT8_C( 46), INT8_C( 44), INT8_C( 36), INT8_C( 79), INT8_C( 101), INT8_C( 0), INT8_C( 2), INT8_C( 69), INT8_C( 31), INT8_C( 68), INT8_C( 82), INT8_C( 45), INT8_C( 120), INT8_C( 39), INT8_C( 46), INT8_C( 66), INT8_C( 30), INT8_C( 106), INT8_C( 118), INT8_C( 61), INT8_C( 98), INT8_C( 61), INT8_C( 98), INT8_C( 49), INT8_C( 12), INT8_C( 117), INT8_C( 115), INT8_C( 63), INT8_C( 92), INT8_C( 102), INT8_C( 110)) }, { simde_mm256_set_epi8(INT8_C( 64), INT8_C( -84), INT8_C( 54), INT8_C(-102), INT8_C( -69), INT8_C( 12), INT8_C(-119), INT8_C( -19), INT8_C( 19), INT8_C( -55), INT8_C( -11), INT8_C(-117), INT8_C( -68), INT8_C( -51), INT8_C( 26), INT8_C( 72), INT8_C( -15), INT8_C( 108), INT8_C( -66), INT8_C( -24), INT8_C( -97), INT8_C( -48), INT8_C( 75), INT8_C( 35), INT8_C( 48), INT8_C( -25), INT8_C( -43), INT8_C( 2), INT8_C( -75), INT8_C( 28), INT8_C(-108), INT8_C( -43)), simde_mm256_set_epi8(INT8_C( 64), INT8_C( 84), INT8_C( 54), INT8_C( 102), INT8_C( 69), INT8_C( 12), INT8_C( 119), INT8_C( 19), INT8_C( 19), INT8_C( 55), INT8_C( 11), INT8_C( 117), INT8_C( 68), INT8_C( 51), INT8_C( 26), INT8_C( 72), INT8_C( 15), INT8_C( 108), INT8_C( 66), INT8_C( 24), INT8_C( 97), INT8_C( 48), INT8_C( 75), INT8_C( 35), INT8_C( 48), INT8_C( 25), INT8_C( 43), INT8_C( 2), INT8_C( 75), INT8_C( 28), INT8_C( 108), INT8_C( 43)) }, { simde_mm256_set_epi8(INT8_C( 8), INT8_C( -54), INT8_C( -1), INT8_C(-128), INT8_C( 118), INT8_C( -15), INT8_C( 125), INT8_C( 76), INT8_C( 47), INT8_C( 33), INT8_C( 69), INT8_C( 21), INT8_C(-116), INT8_C( 34), INT8_C( 36), INT8_C( 31), INT8_C( -32), INT8_C( -84), INT8_C( 23), INT8_C( -76), INT8_C( 82), INT8_C(-115), INT8_C( 74), INT8_C(-110), INT8_C( -46), INT8_C( 125), INT8_C( -52), INT8_C( -99), INT8_C( 30), INT8_C(-106), INT8_C( 66), INT8_C( 5)), simde_mm256_set_epi8(INT8_C( 8), INT8_C( 54), INT8_C( 1), INT8_C(-128), INT8_C( 118), INT8_C( 15), INT8_C( 125), INT8_C( 76), INT8_C( 47), INT8_C( 33), INT8_C( 69), INT8_C( 21), INT8_C( 116), INT8_C( 34), INT8_C( 36), INT8_C( 31), INT8_C( 32), INT8_C( 84), INT8_C( 23), INT8_C( 76), INT8_C( 82), INT8_C( 115), INT8_C( 74), INT8_C( 110), INT8_C( 46), INT8_C( 125), INT8_C( 52), INT8_C( 99), INT8_C( 30), INT8_C( 106), INT8_C( 66), INT8_C( 5)) }, { simde_mm256_set_epi8(INT8_C( 122), INT8_C( 42), INT8_C(-121), INT8_C(-106), INT8_C( 122), INT8_C( -8), INT8_C( 81), INT8_C(-109), INT8_C( 124), INT8_C( 32), INT8_C( 63), INT8_C( -21), INT8_C( -51), INT8_C( -42), INT8_C( 1), INT8_C( -78), INT8_C( 74), INT8_C( 8), INT8_C( 25), INT8_C( 10), INT8_C( 113), INT8_C( -75), INT8_C( -32), INT8_C( 126), INT8_C( -87), INT8_C( 67), INT8_C( 78), INT8_C( -64), INT8_C( 7), INT8_C( -40), INT8_C( -46), INT8_C( -59)), simde_mm256_set_epi8(INT8_C( 122), INT8_C( 42), INT8_C( 121), INT8_C( 106), INT8_C( 122), INT8_C( 8), INT8_C( 81), INT8_C( 109), INT8_C( 124), INT8_C( 32), INT8_C( 63), INT8_C( 21), INT8_C( 51), INT8_C( 42), INT8_C( 1), INT8_C( 78), INT8_C( 74), INT8_C( 8), INT8_C( 25), INT8_C( 10), INT8_C( 113), INT8_C( 75), INT8_C( 32), INT8_C( 126), INT8_C( 87), INT8_C( 67), INT8_C( 78), INT8_C( 64), INT8_C( 7), INT8_C( 40), INT8_C( 46), INT8_C( 59)) }, { simde_mm256_set_epi8(INT8_C( 10), INT8_C( 120), INT8_C( 81), INT8_C(-105), INT8_C( 73), INT8_C( -95), INT8_C( 79), INT8_C( -86), INT8_C( -93), INT8_C( -54), INT8_C( -43), INT8_C( -88), INT8_C( 59), INT8_C( -27), INT8_C( 12), INT8_C( 10), INT8_C( 73), INT8_C( -48), INT8_C( 112), INT8_C( 27), INT8_C(-113), INT8_C( -31), INT8_C( -56), INT8_C( -96), INT8_C( 48), INT8_C( -94), INT8_C(-111), INT8_C( 60), INT8_C(-116), INT8_C( -77), INT8_C( -70), INT8_C( 17)), simde_mm256_set_epi8(INT8_C( 10), INT8_C( 120), INT8_C( 81), INT8_C( 105), INT8_C( 73), INT8_C( 95), INT8_C( 79), INT8_C( 86), INT8_C( 93), INT8_C( 54), INT8_C( 43), INT8_C( 88), INT8_C( 59), INT8_C( 27), INT8_C( 12), INT8_C( 10), INT8_C( 73), INT8_C( 48), INT8_C( 112), INT8_C( 27), INT8_C( 113), INT8_C( 31), INT8_C( 56), INT8_C( 96), INT8_C( 48), INT8_C( 94), INT8_C( 111), INT8_C( 60), INT8_C( 116), INT8_C( 77), INT8_C( 70), INT8_C( 17)) }, { simde_mm256_set_epi8(INT8_C( 61), INT8_C( -57), INT8_C( -99), INT8_C( 0), INT8_C( 98), INT8_C(-121), INT8_C( 67), INT8_C( -20), INT8_C( 44), INT8_C( 53), INT8_C(-128), INT8_C( 44), INT8_C( 127), INT8_C( 53), INT8_C(-127), INT8_C( 58), INT8_C( 35), INT8_C( 83), INT8_C( -56), INT8_C( 22), INT8_C( -4), INT8_C( -6), INT8_C( -7), INT8_C( 121), INT8_C( -22), INT8_C( -32), INT8_C( -52), INT8_C( 124), INT8_C( -93), INT8_C( 55), INT8_C( -23), INT8_C( -62)), simde_mm256_set_epi8(INT8_C( 61), INT8_C( 57), INT8_C( 99), INT8_C( 0), INT8_C( 98), INT8_C( 121), INT8_C( 67), INT8_C( 20), INT8_C( 44), INT8_C( 53), INT8_C(-128), INT8_C( 44), INT8_C( 127), INT8_C( 53), INT8_C( 127), INT8_C( 58), INT8_C( 35), INT8_C( 83), INT8_C( 56), INT8_C( 22), INT8_C( 4), INT8_C( 6), INT8_C( 7), INT8_C( 121), INT8_C( 22), INT8_C( 32), INT8_C( 52), INT8_C( 124), INT8_C( 93), INT8_C( 55), INT8_C( 23), INT8_C( 62)) }, { simde_mm256_set_epi8(INT8_C( 71), INT8_C( -58), INT8_C( 24), INT8_C( 117), INT8_C( 2), INT8_C( -31), INT8_C( -86), INT8_C( 101), INT8_C( 3), INT8_C( 63), INT8_C( 2), INT8_C( -30), INT8_C( -33), INT8_C( 51), INT8_C( 60), INT8_C( 81), INT8_C( -91), INT8_C( -73), INT8_C( 66), INT8_C( 67), INT8_C( 72), INT8_C( -7), INT8_C( 44), INT8_C( -32), INT8_C( -80), INT8_C( 101), INT8_C( -98), INT8_C( 89), INT8_C( 89), INT8_C( 94), INT8_C( 109), INT8_C(-109)), simde_mm256_set_epi8(INT8_C( 71), INT8_C( 58), INT8_C( 24), INT8_C( 117), INT8_C( 2), INT8_C( 31), INT8_C( 86), INT8_C( 101), INT8_C( 3), INT8_C( 63), INT8_C( 2), INT8_C( 30), INT8_C( 33), INT8_C( 51), INT8_C( 60), INT8_C( 81), INT8_C( 91), INT8_C( 73), INT8_C( 66), INT8_C( 67), INT8_C( 72), INT8_C( 7), INT8_C( 44), INT8_C( 32), INT8_C( 80), INT8_C( 101), INT8_C( 98), INT8_C( 89), INT8_C( 89), INT8_C( 94), INT8_C( 109), INT8_C( 109)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_abs_epi8(test_vec[i].a); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_abs_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi16(INT16_C( 9101), INT16_C( 13664), INT16_C( 14007), INT16_C( 17440), INT16_C( 21201), INT16_C(-16892), INT16_C(-22702), INT16_C(-11875), INT16_C( 9352), INT16_C( 21001), INT16_C( 15464), INT16_C( 27994), INT16_C( 12104), INT16_C(-22404), INT16_C(-21433), INT16_C( -4031)), simde_mm256_set_epi16(INT16_C( 9101), INT16_C( 13664), INT16_C( 14007), INT16_C( 17440), INT16_C( 21201), INT16_C( 16892), INT16_C( 22702), INT16_C( 11875), INT16_C( 9352), INT16_C( 21001), INT16_C( 15464), INT16_C( 27994), INT16_C( 12104), INT16_C( 22404), INT16_C( 21433), INT16_C( 4031)) }, { simde_mm256_set_epi16(INT16_C( 20289), INT16_C( -8788), INT16_C( 5917), INT16_C(-28916), INT16_C(-21540), INT16_C( -8179), INT16_C(-25392), INT16_C( 2609), INT16_C( 12609), INT16_C(-11367), INT16_C( -70), INT16_C( 28633), INT16_C(-21576), INT16_C(-23753), INT16_C(-11797), INT16_C(-17346)), simde_mm256_set_epi16(INT16_C( 20289), INT16_C( 8788), INT16_C( 5917), INT16_C( 28916), INT16_C( 21540), INT16_C( 8179), INT16_C( 25392), INT16_C( 2609), INT16_C( 12609), INT16_C( 11367), INT16_C( 70), INT16_C( 28633), INT16_C( 21576), INT16_C( 23753), INT16_C( 11797), INT16_C( 17346)) }, { simde_mm256_set_epi16(INT16_C( 11563), INT16_C(-31585), INT16_C( 24583), INT16_C( 2918), INT16_C( 5705), INT16_C( 31274), INT16_C(-12388), INT16_C( 31454), INT16_C( 5008), INT16_C( 10123), INT16_C( 28874), INT16_C(-27636), INT16_C( 1380), INT16_C(-32687), INT16_C( 24141), INT16_C( 11570)), simde_mm256_set_epi16(INT16_C( 11563), INT16_C( 31585), INT16_C( 24583), INT16_C( 2918), INT16_C( 5705), INT16_C( 31274), INT16_C( 12388), INT16_C( 31454), INT16_C( 5008), INT16_C( 10123), INT16_C( 28874), INT16_C( 27636), INT16_C( 1380), INT16_C( 32687), INT16_C( 24141), INT16_C( 11570)) }, { simde_mm256_set_epi16(INT16_C(-28981), INT16_C(-21254), INT16_C( 12206), INT16_C( 17751), INT16_C( 4887), INT16_C( 27025), INT16_C( 20436), INT16_C( -3143), INT16_C( 5806), INT16_C( 19398), INT16_C( 23890), INT16_C( -1841), INT16_C( -1212), INT16_C( -418), INT16_C( 2804), INT16_C(-24086)), simde_mm256_set_epi16(INT16_C( 28981), INT16_C( 21254), INT16_C( 12206), INT16_C( 17751), INT16_C( 4887), INT16_C( 27025), INT16_C( 20436), INT16_C( 3143), INT16_C( 5806), INT16_C( 19398), INT16_C( 23890), INT16_C( 1841), INT16_C( 1212), INT16_C( 418), INT16_C( 2804), INT16_C( 24086)) }, { simde_mm256_set_epi16(INT16_C(-32227), INT16_C( 26559), INT16_C( 32468), INT16_C( 9282), INT16_C( 10212), INT16_C( 7157), INT16_C(-18109), INT16_C(-13716), INT16_C( 3356), INT16_C( -6654), INT16_C( 3548), INT16_C(-31612), INT16_C( -3226), INT16_C(-30156), INT16_C(-15323), INT16_C( 8689)), simde_mm256_set_epi16(INT16_C( 32227), INT16_C( 26559), INT16_C( 32468), INT16_C( 9282), INT16_C( 10212), INT16_C( 7157), INT16_C( 18109), INT16_C( 13716), INT16_C( 3356), INT16_C( 6654), INT16_C( 3548), INT16_C( 31612), INT16_C( 3226), INT16_C( 30156), INT16_C( 15323), INT16_C( 8689)) }, { simde_mm256_set_epi16(INT16_C( 14337), INT16_C(-20237), INT16_C( 7001), INT16_C( 29027), INT16_C( -3029), INT16_C( 12894), INT16_C(-24482), INT16_C( -8195), INT16_C( -7637), INT16_C(-26436), INT16_C( 15950), INT16_C( 5319), INT16_C( 22977), INT16_C( -593), INT16_C(-29639), INT16_C( 23312)), simde_mm256_set_epi16(INT16_C( 14337), INT16_C( 20237), INT16_C( 7001), INT16_C( 29027), INT16_C( 3029), INT16_C( 12894), INT16_C( 24482), INT16_C( 8195), INT16_C( 7637), INT16_C( 26436), INT16_C( 15950), INT16_C( 5319), INT16_C( 22977), INT16_C( 593), INT16_C( 29639), INT16_C( 23312)) }, { simde_mm256_set_epi16(INT16_C( 4249), INT16_C( -3888), INT16_C( 15630), INT16_C(-11095), INT16_C(-21648), INT16_C(-10947), INT16_C( -1651), INT16_C( 5821), INT16_C( 25032), INT16_C( 26383), INT16_C(-18726), INT16_C(-14746), INT16_C( 9694), INT16_C(-29231), INT16_C( 18526), INT16_C(-12816)), simde_mm256_set_epi16(INT16_C( 4249), INT16_C( 3888), INT16_C( 15630), INT16_C( 11095), INT16_C( 21648), INT16_C( 10947), INT16_C( 1651), INT16_C( 5821), INT16_C( 25032), INT16_C( 26383), INT16_C( 18726), INT16_C( 14746), INT16_C( 9694), INT16_C( 29231), INT16_C( 18526), INT16_C( 12816)) }, { simde_mm256_set_epi16(INT16_C( 6410), INT16_C( 4746), INT16_C( 16873), INT16_C(-29607), INT16_C( 21314), INT16_C(-32512), INT16_C(-23052), INT16_C( 20594), INT16_C( -1613), INT16_C( 26993), INT16_C( 28325), INT16_C( 406), INT16_C(-19031), INT16_C( 6060), INT16_C(-29650), INT16_C( 8164)), simde_mm256_set_epi16(INT16_C( 6410), INT16_C( 4746), INT16_C( 16873), INT16_C( 29607), INT16_C( 21314), INT16_C( 32512), INT16_C( 23052), INT16_C( 20594), INT16_C( 1613), INT16_C( 26993), INT16_C( 28325), INT16_C( 406), INT16_C( 19031), INT16_C( 6060), INT16_C( 29650), INT16_C( 8164)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_abs_epi16(test_vec[i].a); simde_assert_m256i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_abs_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C( 596456800), INT32_C( 917980192), INT32_C( 1389477380), INT32_C(-1487744611), INT32_C( 612913673), INT32_C( 1013476698), INT32_C( 793290876), INT32_C(-1404571583)), simde_mm256_set_epi32(INT32_C( 596456800), INT32_C( 917980192), INT32_C( 1389477380), INT32_C( 1487744611), INT32_C( 612913673), INT32_C( 1013476698), INT32_C( 793290876), INT32_C( 1404571583)) }, { simde_mm256_set_epi32(INT32_C( 1329716652), INT32_C( 387813132), INT32_C(-1411588083), INT32_C(-1664087503), INT32_C( 826397593), INT32_C( -4558887), INT32_C(-1413962953), INT32_C( -773080002)), simde_mm256_set_epi32(INT32_C( 1329716652), INT32_C( 387813132), INT32_C( 1411588083), INT32_C( 1664087503), INT32_C( 826397593), INT32_C( 4558887), INT32_C( 1413962953), INT32_C( 773080002)) }, { simde_mm256_set_epi32(INT32_C( 757826719), INT32_C( 1611074406), INT32_C( 373914154), INT32_C( -811828514), INT32_C( 328214411), INT32_C( 1892324364), INT32_C( 90472529), INT32_C( 1582116146)), simde_mm256_set_epi32(INT32_C( 757826719), INT32_C( 1611074406), INT32_C( 373914154), INT32_C( 811828514), INT32_C( 328214411), INT32_C( 1892324364), INT32_C( 90472529), INT32_C( 1582116146)) }, { simde_mm256_set_epi32(INT32_C(-1899254534), INT32_C( 799950167), INT32_C( 320301457), INT32_C( 1339356089), INT32_C( 380521414), INT32_C( 1565718735), INT32_C( -79364514), INT32_C( 183804394)), simde_mm256_set_epi32(INT32_C( 1899254534), INT32_C( 799950167), INT32_C( 320301457), INT32_C( 1339356089), INT32_C( 380521414), INT32_C( 1565718735), INT32_C( 79364514), INT32_C( 183804394)) }, { simde_mm256_set_epi32(INT32_C(-2112002113), INT32_C( 2127832130), INT32_C( 669260789), INT32_C(-1186739604), INT32_C( 219997698), INT32_C( 232555652), INT32_C( -211383756), INT32_C(-1004199439)), simde_mm256_set_epi32(INT32_C( 2112002113), INT32_C( 2127832130), INT32_C( 669260789), INT32_C( 1186739604), INT32_C( 219997698), INT32_C( 232555652), INT32_C( 211383756), INT32_C( 1004199439)) }, { simde_mm256_set_epi32(INT32_C( 939634931), INT32_C( 458846563), INT32_C( -198495650), INT32_C(-1604395011), INT32_C( -500459332), INT32_C( 1045304519), INT32_C( 1505885615), INT32_C(-1942398192)), simde_mm256_set_epi32(INT32_C( 939634931), INT32_C( 458846563), INT32_C( 198495650), INT32_C( 1604395011), INT32_C( 500459332), INT32_C( 1045304519), INT32_C( 1505885615), INT32_C( 1942398192)) }, { simde_mm256_set_epi32(INT32_C( 278524112), INT32_C( 1024382121), INT32_C(-1418668739), INT32_C( -108194115), INT32_C( 1640523535), INT32_C(-1227176346), INT32_C( 635342289), INT32_C( 1214172656)), simde_mm256_set_epi32(INT32_C( 278524112), INT32_C( 1024382121), INT32_C( 1418668739), INT32_C( 108194115), INT32_C( 1640523535), INT32_C( 1227176346), INT32_C( 635342289), INT32_C( 1214172656)) }, { simde_mm256_set_epi32(INT32_C( 420090506), INT32_C( 1105824857), INT32_C( 1396867328), INT32_C(-1510715278), INT32_C( -105682575), INT32_C( 1856307606), INT32_C(-1247209556), INT32_C(-1943134236)), simde_mm256_set_epi32(INT32_C( 420090506), INT32_C( 1105824857), INT32_C( 1396867328), INT32_C( 1510715278), INT32_C( 105682575), INT32_C( 1856307606), INT32_C( 1247209556), INT32_C( 1943134236)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_abs_epi32(test_vec[i].a); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_add_epi8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[32]; int8_t b[32]; int8_t r[32]; } test_vec[] = { { { -INT8_C( 31), INT8_C( 62), -INT8_C( 76), INT8_C( 4), -INT8_C( 86), INT8_C( 17), INT8_C( 123), INT8_C( 110), INT8_C( 62), -INT8_C( 91), -INT8_C( 124), INT8_C( 104), INT8_C( 48), -INT8_C( 40), -INT8_C( 103), -INT8_C( 47), INT8_C( 10), -INT8_C( 34), -INT8_C( 78), INT8_C( 22), INT8_C( 48), INT8_C( 43), INT8_C( 12), INT8_C( 72), INT8_C( 27), -INT8_C( 105), INT8_C( 29), INT8_C( 82), -INT8_C( 83), INT8_C( 94), INT8_C( 31), -INT8_C( 114) }, { -INT8_C( 99), -INT8_C( 44), -INT8_C( 109), INT8_C( 71), -INT8_C( 27), INT8_C( 14), -INT8_C( 75), INT8_C( 36), -INT8_C( 77), INT8_C( 58), -INT8_C( 116), -INT8_C( 29), INT8_C( 18), INT8_C( 38), -INT8_C( 76), INT8_C( 28), INT8_C( 4), INT8_C( 102), INT8_C( 51), INT8_C( 52), -INT8_C( 111), INT8_C( 63), INT8_C( 124), -INT8_C( 84), -INT8_C( 42), -INT8_C( 103), -INT8_C( 1), -INT8_C( 125), -INT8_C( 9), INT8_C( 30), INT8_C( 17), -INT8_C( 108) }, { INT8_C( 126), INT8_C( 18), INT8_C( 71), INT8_C( 75), -INT8_C( 113), INT8_C( 31), INT8_C( 48), -INT8_C( 110), -INT8_C( 15), -INT8_C( 33), INT8_C( 16), INT8_C( 75), INT8_C( 66), -INT8_C( 2), INT8_C( 77), -INT8_C( 19), INT8_C( 14), INT8_C( 68), -INT8_C( 27), INT8_C( 74), -INT8_C( 63), INT8_C( 106), -INT8_C( 120), -INT8_C( 12), -INT8_C( 15), INT8_C( 48), INT8_C( 28), -INT8_C( 43), -INT8_C( 92), INT8_C( 124), INT8_C( 48), INT8_C( 34) } }, { { -INT8_C( 14), -INT8_C( 92), -INT8_C( 37), -INT8_C( 40), -INT8_C( 78), -INT8_C( 111), -INT8_C( 4), INT8_C( 101), -INT8_C( 53), -INT8_C( 120), INT8_C( 73), -INT8_C( 35), -INT8_C( 82), -INT8_C( 3), -INT8_C( 7), -INT8_C( 78), INT8_C( 100), INT8_C( 44), -INT8_C( 26), -INT8_C( 11), INT8_C( 107), INT8_C( 98), -INT8_C( 94), INT8_C( 65), -INT8_C( 5), -INT8_C( 95), -INT8_C( 60), -INT8_C( 13), -INT8_C( 65), -INT8_C( 42), -INT8_C( 121), -INT8_C( 78) }, { INT8_C( 122), INT8_C( 99), -INT8_C( 118), INT8_C( 45), -INT8_C( 12), -INT8_C( 122), -INT8_C( 110), -INT8_C( 65), INT8_C( 14), -INT8_C( 37), -INT8_C( 100), -INT8_C( 67), -INT8_C( 39), -INT8_C( 107), INT8_C( 111), INT8_C( 61), -INT8_C( 62), INT8_C( 86), INT8_C( 50), INT8_C( 45), -INT8_C( 72), -INT8_C( 44), INT8_C( 111), -INT8_C( 76), INT8_C( 117), INT8_C( 51), -INT8_C( 89), INT8_C( 53), INT8_C( 9), INT8_C( 46), -INT8_C( 25), -INT8_C( 124) }, { INT8_C( 108), INT8_C( 7), INT8_C( 101), INT8_C( 5), -INT8_C( 90), INT8_C( 23), -INT8_C( 114), INT8_C( 36), -INT8_C( 39), INT8_C( 99), -INT8_C( 27), -INT8_C( 102), -INT8_C( 121), -INT8_C( 110), INT8_C( 104), -INT8_C( 17), INT8_C( 38), -INT8_C( 126), INT8_C( 24), INT8_C( 34), INT8_C( 35), INT8_C( 54), INT8_C( 17), -INT8_C( 11), INT8_C( 112), -INT8_C( 44), INT8_C( 107), INT8_C( 40), -INT8_C( 56), INT8_C( 4), INT8_C( 110), INT8_C( 54) } }, { { -INT8_C( 111), INT8_C( 113), -INT8_C( 79), -INT8_C( 123), -INT8_C( 9), INT8_C( 67), INT8_C( 68), INT8_C( 5), INT8_C( 31), -INT8_C( 32), -INT8_C( 62), -INT8_C( 8), INT8_C( 118), INT8_C( 50), INT8_C( 53), INT8_C( 56), -INT8_C( 120), INT8_C( 103), INT8_C( 101), INT8_C( 64), INT8_C( 60), -INT8_C( 44), -INT8_C( 12), -INT8_C( 79), INT8_C( 8), -INT8_C( 101), -INT8_C( 26), INT8_C( 17), -INT8_C( 54), -INT8_C( 51), -INT8_C( 107), INT8_C( 91) }, { INT8_C( 62), INT8_C( 70), -INT8_C( 31), INT8_C( 53), -INT8_C( 118), INT8_C( 37), INT8_C( 59), -INT8_C( 87), INT8_C( 6), -INT8_C( 3), -INT8_C( 95), INT8_C( 124), INT8_C( 47), -INT8_C( 42), -INT8_C( 76), -INT8_C( 73), INT8_C( 61), INT8_C( 25), -INT8_C( 8), INT8_C( 121), -INT8_C( 18), -INT8_C( 20), INT8_C( 43), -INT8_C( 10), -INT8_C( 120), INT8_C( 17), INT8_C( 7), INT8_C( 82), -INT8_C( 33), -INT8_C( 99), -INT8_C( 83), INT8_C( 29) }, { -INT8_C( 49), -INT8_C( 73), -INT8_C( 110), -INT8_C( 70), -INT8_C( 127), INT8_C( 104), INT8_MAX, -INT8_C( 82), INT8_C( 37), -INT8_C( 35), INT8_C( 99), INT8_C( 116), -INT8_C( 91), INT8_C( 8), -INT8_C( 23), -INT8_C( 17), -INT8_C( 59), INT8_MIN, INT8_C( 93), -INT8_C( 71), INT8_C( 42), -INT8_C( 64), INT8_C( 31), -INT8_C( 89), -INT8_C( 112), -INT8_C( 84), -INT8_C( 19), INT8_C( 99), -INT8_C( 87), INT8_C( 106), INT8_C( 66), INT8_C( 120) } }, { { -INT8_C( 29), -INT8_C( 114), INT8_C( 83), INT8_C( 109), -INT8_C( 76), -INT8_C( 114), INT8_C( 22), -INT8_C( 70), -INT8_C( 117), -INT8_C( 73), INT8_C( 54), -INT8_C( 69), -INT8_C( 115), -INT8_C( 22), INT8_C( 114), -INT8_C( 53), INT8_C( 3), INT8_C( 106), INT8_C( 68), -INT8_C( 15), INT8_C( 87), INT8_C( 111), -INT8_C( 25), -INT8_C( 33), -INT8_C( 127), -INT8_C( 17), INT8_C( 49), INT8_C( 96), -INT8_C( 116), -INT8_C( 34), INT8_C( 125), INT8_C( 111) }, { INT8_C( 109), -INT8_C( 48), -INT8_C( 35), INT8_C( 33), INT8_C( 94), -INT8_C( 13), -INT8_C( 37), -INT8_C( 22), -INT8_C( 85), INT8_C( 17), -INT8_C( 91), INT8_C( 56), -INT8_C( 5), INT8_C( 23), INT8_C( 3), -INT8_C( 2), -INT8_C( 126), INT8_C( 72), -INT8_C( 16), -INT8_C( 39), -INT8_C( 73), -INT8_C( 41), -INT8_C( 72), INT8_C( 56), -INT8_C( 58), -INT8_C( 23), -INT8_C( 104), INT8_C( 82), -INT8_C( 57), INT8_C( 22), -INT8_C( 62), INT8_C( 52) }, { INT8_C( 80), INT8_C( 94), INT8_C( 48), -INT8_C( 114), INT8_C( 18), -INT8_C( 127), -INT8_C( 15), -INT8_C( 92), INT8_C( 54), -INT8_C( 56), -INT8_C( 37), -INT8_C( 13), -INT8_C( 120), INT8_C( 1), INT8_C( 117), -INT8_C( 55), -INT8_C( 123), -INT8_C( 78), INT8_C( 52), -INT8_C( 54), INT8_C( 14), INT8_C( 70), -INT8_C( 97), INT8_C( 23), INT8_C( 71), -INT8_C( 40), -INT8_C( 55), -INT8_C( 78), INT8_C( 83), -INT8_C( 12), INT8_C( 63), -INT8_C( 93) } }, { { -INT8_C( 26), -INT8_C( 97), INT8_C( 85), INT8_C( 69), -INT8_C( 110), INT8_C( 48), INT8_C( 47), INT8_C( 61), INT8_C( 65), -INT8_C( 44), INT8_C( 118), INT8_C( 60), -INT8_C( 21), INT8_C( 121), INT8_C( 59), INT8_C( 109), -INT8_C( 63), INT8_C( 43), INT8_C( 70), INT8_C( 121), INT8_C( 2), -INT8_C( 2), -INT8_C( 79), -INT8_C( 55), -INT8_C( 25), INT8_C( 74), INT8_C( 27), -INT8_C( 81), INT8_C( 96), -INT8_C( 35), -INT8_C( 29), INT8_C( 70) }, { INT8_C( 124), INT8_C( 57), -INT8_C( 117), INT8_C( 15), INT8_C( 105), -INT8_C( 70), INT8_C( 76), -INT8_C( 85), -INT8_C( 114), -INT8_C( 62), -INT8_C( 25), INT8_C( 122), INT8_C( 60), INT8_C( 34), -INT8_C( 25), -INT8_C( 3), INT8_C( 77), INT8_C( 46), INT8_C( 118), INT8_C( 80), INT8_C( 44), INT8_C( 40), INT8_C( 25), INT8_C( 20), INT8_C( 114), INT8_C( 52), -INT8_C( 61), -INT8_C( 46), INT8_C( 18), -INT8_C( 90), INT8_C( 24), -INT8_C( 114) }, { INT8_C( 98), -INT8_C( 40), -INT8_C( 32), INT8_C( 84), -INT8_C( 5), -INT8_C( 22), INT8_C( 123), -INT8_C( 24), -INT8_C( 49), -INT8_C( 106), INT8_C( 93), -INT8_C( 74), INT8_C( 39), -INT8_C( 101), INT8_C( 34), INT8_C( 106), INT8_C( 14), INT8_C( 89), -INT8_C( 68), -INT8_C( 55), INT8_C( 46), INT8_C( 38), -INT8_C( 54), -INT8_C( 35), INT8_C( 89), INT8_C( 126), -INT8_C( 34), -INT8_C( 127), INT8_C( 114), -INT8_C( 125), -INT8_C( 5), -INT8_C( 44) } }, { { -INT8_C( 33), -INT8_C( 92), -INT8_C( 99), INT8_C( 73), INT8_C( 94), -INT8_C( 22), -INT8_C( 12), -INT8_C( 19), -INT8_C( 84), -INT8_C( 37), INT8_C( 103), -INT8_C( 24), -INT8_C( 2), INT8_C( 78), -INT8_C( 26), INT8_C( 75), INT8_C( 124), INT8_C( 92), -INT8_C( 101), -INT8_C( 87), -INT8_C( 124), -INT8_C( 76), -INT8_C( 67), -INT8_C( 10), -INT8_C( 23), INT8_MIN, -INT8_C( 56), -INT8_C( 5), INT8_C( 38), -INT8_C( 31), -INT8_C( 119), INT8_C( 6) }, { -INT8_C( 123), INT8_C( 39), INT8_C( 79), -INT8_C( 29), INT8_C( 17), INT8_C( 67), -INT8_C( 48), -INT8_C( 67), INT8_C( 30), INT8_C( 55), -INT8_C( 90), INT8_C( 28), -INT8_C( 122), -INT8_C( 116), INT8_C( 104), INT8_C( 2), -INT8_C( 24), INT8_C( 3), -INT8_C( 85), INT8_C( 109), -INT8_C( 72), INT8_C( 104), INT8_C( 99), -INT8_C( 95), -INT8_C( 24), INT8_C( 44), -INT8_C( 100), INT8_C( 15), INT8_C( 13), INT8_C( 37), INT8_C( 21), -INT8_C( 110) }, { INT8_C( 100), -INT8_C( 53), -INT8_C( 20), INT8_C( 44), INT8_C( 111), INT8_C( 45), -INT8_C( 60), -INT8_C( 86), -INT8_C( 54), INT8_C( 18), INT8_C( 13), INT8_C( 4), -INT8_C( 124), -INT8_C( 38), INT8_C( 78), INT8_C( 77), INT8_C( 100), INT8_C( 95), INT8_C( 70), INT8_C( 22), INT8_C( 60), INT8_C( 28), INT8_C( 32), -INT8_C( 105), -INT8_C( 47), -INT8_C( 84), INT8_C( 100), INT8_C( 10), INT8_C( 51), INT8_C( 6), -INT8_C( 98), -INT8_C( 104) } }, { { INT8_C( 76), INT8_C( 100), INT8_C( 117), INT8_C( 93), -INT8_C( 89), INT8_C( 70), INT8_C( 27), -INT8_C( 59), INT8_C( 125), -INT8_C( 63), -INT8_C( 30), INT8_C( 3), INT8_C( 77), INT8_C( 74), INT8_C( 6), INT8_C( 53), INT8_C( 77), -INT8_C( 79), -INT8_C( 94), INT8_C( 5), INT8_C( 26), INT8_C( 6), -INT8_C( 90), INT8_C( 2), INT8_C( 50), INT8_C( 66), INT8_C( 17), INT8_C( 63), INT8_C( 104), INT8_C( 38), -INT8_C( 47), -INT8_C( 76) }, { -INT8_C( 118), INT8_C( 70), INT8_C( 18), INT8_C( 49), -INT8_C( 116), INT8_C( 45), -INT8_C( 9), INT8_C( 10), -INT8_C( 18), -INT8_C( 39), INT8_C( 13), INT8_C( 59), INT8_C( 35), INT8_C( 19), INT8_C( 112), INT8_C( 112), -INT8_C( 59), INT8_C( 19), INT8_C( 118), -INT8_C( 33), INT8_C( 25), INT8_C( 28), -INT8_C( 31), INT8_C( 75), INT8_C( 95), -INT8_C( 13), -INT8_C( 118), -INT8_C( 57), INT8_C( 25), INT8_C( 91), INT8_C( 123), -INT8_C( 92) }, { -INT8_C( 42), -INT8_C( 86), -INT8_C( 121), -INT8_C( 114), INT8_C( 51), INT8_C( 115), INT8_C( 18), -INT8_C( 49), INT8_C( 107), -INT8_C( 102), -INT8_C( 17), INT8_C( 62), INT8_C( 112), INT8_C( 93), INT8_C( 118), -INT8_C( 91), INT8_C( 18), -INT8_C( 60), INT8_C( 24), -INT8_C( 28), INT8_C( 51), INT8_C( 34), -INT8_C( 121), INT8_C( 77), -INT8_C( 111), INT8_C( 53), -INT8_C( 101), INT8_C( 6), -INT8_C( 127), -INT8_C( 127), INT8_C( 76), INT8_C( 88) } }, { { -INT8_C( 95), -INT8_C( 115), -INT8_C( 43), INT8_C( 46), -INT8_C( 70), -INT8_C( 52), INT8_C( 56), -INT8_C( 88), -INT8_C( 91), INT8_C( 69), -INT8_C( 29), -INT8_C( 56), INT8_C( 89), INT8_C( 84), INT8_C( 57), INT8_C( 30), INT8_C( 103), -INT8_C( 81), -INT8_C( 3), INT8_MIN, -INT8_C( 53), -INT8_C( 34), -INT8_C( 53), INT8_C( 42), -INT8_C( 47), INT8_C( 85), -INT8_C( 15), -INT8_C( 21), -INT8_C( 80), INT8_C( 109), -INT8_C( 113), INT8_C( 81) }, { -INT8_C( 6), INT8_C( 100), INT8_MAX, -INT8_C( 75), INT8_C( 49), -INT8_C( 73), INT8_C( 93), -INT8_C( 42), -INT8_C( 3), INT8_C( 65), -INT8_C( 97), INT8_C( 86), -INT8_C( 107), -INT8_C( 40), INT8_C( 116), -INT8_C( 4), -INT8_C( 121), INT8_C( 113), INT8_C( 124), INT8_C( 82), INT8_C( 79), INT8_C( 71), INT8_C( 125), INT8_C( 33), -INT8_C( 100), INT8_C( 110), INT8_C( 12), INT8_C( 76), -INT8_C( 37), -INT8_C( 101), -INT8_C( 99), -INT8_C( 42) }, { -INT8_C( 101), -INT8_C( 15), INT8_C( 84), -INT8_C( 29), -INT8_C( 21), -INT8_C( 125), -INT8_C( 107), INT8_C( 126), -INT8_C( 94), -INT8_C( 122), -INT8_C( 126), INT8_C( 30), -INT8_C( 18), INT8_C( 44), -INT8_C( 83), INT8_C( 26), -INT8_C( 18), INT8_C( 32), INT8_C( 121), -INT8_C( 46), INT8_C( 26), INT8_C( 37), INT8_C( 72), INT8_C( 75), INT8_C( 109), -INT8_C( 61), -INT8_C( 3), INT8_C( 55), -INT8_C( 117), INT8_C( 8), INT8_C( 44), INT8_C( 39) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi8(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi8(test_vec[i].b); simde__m256i r = simde_mm256_add_epi8(a, b); simde_test_x86_assert_equal_i8x32(r, simde_x_mm256_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm256_add_epi16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[16]; int16_t b[16]; int16_t r[16]; } test_vec[] = { { { INT16_C( 16774), INT16_C( 17194), INT16_C( 10451), INT16_C( 3235), INT16_C( 31051), -INT16_C( 23344), INT16_C( 16496), INT16_C( 29568), -INT16_C( 901), -INT16_C( 4920), -INT16_C( 5814), -INT16_C( 23726), INT16_C( 13824), INT16_C( 17418), INT16_C( 19901), INT16_C( 17354) }, { -INT16_C( 2930), INT16_C( 24966), INT16_C( 10524), INT16_C( 26734), INT16_C( 16034), INT16_C( 4876), -INT16_C( 29570), -INT16_C( 1658), INT16_C( 20104), -INT16_C( 11291), INT16_C( 14135), INT16_C( 14454), -INT16_C( 32658), INT16_C( 11132), INT16_C( 18125), INT16_C( 23662) }, { INT16_C( 13844), -INT16_C( 23376), INT16_C( 20975), INT16_C( 29969), -INT16_C( 18451), -INT16_C( 18468), -INT16_C( 13074), INT16_C( 27910), INT16_C( 19203), -INT16_C( 16211), INT16_C( 8321), -INT16_C( 9272), -INT16_C( 18834), INT16_C( 28550), -INT16_C( 27510), -INT16_C( 24520) } }, { { -INT16_C( 2758), INT16_C( 22461), INT16_C( 11038), -INT16_C( 15937), -INT16_C( 13463), -INT16_C( 6188), INT16_C( 23127), -INT16_C( 7968), -INT16_C( 14936), -INT16_C( 8269), INT16_C( 10749), INT16_C( 27415), -INT16_C( 27479), INT16_C( 30358), INT16_C( 1242), INT16_C( 5586) }, { -INT16_C( 28423), INT16_C( 6252), INT16_C( 11195), INT16_C( 9689), -INT16_C( 21002), INT16_C( 19724), -INT16_C( 4857), -INT16_C( 20691), -INT16_C( 8014), -INT16_C( 20594), -INT16_C( 23031), -INT16_C( 19942), -INT16_C( 20422), INT16_C( 5161), -INT16_C( 1099), -INT16_C( 20951) }, { -INT16_C( 31181), INT16_C( 28713), INT16_C( 22233), -INT16_C( 6248), INT16_C( 31071), INT16_C( 13536), INT16_C( 18270), -INT16_C( 28659), -INT16_C( 22950), -INT16_C( 28863), -INT16_C( 12282), INT16_C( 7473), INT16_C( 17635), -INT16_C( 30017), INT16_C( 143), -INT16_C( 15365) } }, { { -INT16_C( 27253), INT16_C( 18374), -INT16_C( 24640), -INT16_C( 18836), INT16_C( 30796), INT16_C( 21252), INT16_C( 12645), INT16_C( 6146), -INT16_C( 28398), INT16_C( 7111), -INT16_C( 7625), INT16_C( 29134), -INT16_C( 2158), INT16_C( 18309), -INT16_C( 20494), INT16_C( 32502) }, { -INT16_C( 17340), INT16_C( 1477), INT16_C( 12636), -INT16_C( 22341), -INT16_C( 16471), INT16_C( 4092), -INT16_C( 271), INT16_C( 807), -INT16_C( 4465), -INT16_C( 14818), -INT16_C( 4912), INT16_C( 25399), -INT16_C( 16925), -INT16_C( 10582), -INT16_C( 24468), -INT16_C( 20396) }, { INT16_C( 20943), INT16_C( 19851), -INT16_C( 12004), INT16_C( 24359), INT16_C( 14325), INT16_C( 25344), INT16_C( 12374), INT16_C( 6953), INT16_C( 32673), -INT16_C( 7707), -INT16_C( 12537), -INT16_C( 11003), -INT16_C( 19083), INT16_C( 7727), INT16_C( 20574), INT16_C( 12106) } }, { { INT16_C( 6493), -INT16_C( 17995), INT16_C( 29002), -INT16_C( 3231), INT16_C( 23856), INT16_C( 8450), INT16_C( 10588), -INT16_C( 5340), INT16_C( 17176), -INT16_C( 5966), -INT16_C( 5841), INT16_C( 4939), -INT16_C( 2394), INT16_C( 4841), INT16_C( 15766), -INT16_C( 3133) }, { INT16_C( 30806), -INT16_C( 24404), INT16_C( 3817), INT16_C( 6803), -INT16_C( 27029), -INT16_C( 14533), INT16_C( 24767), -INT16_C( 10317), INT16_C( 26019), -INT16_C( 11584), INT16_C( 2894), -INT16_C( 2587), -INT16_C( 12799), -INT16_C( 26617), -INT16_C( 13813), INT16_C( 24971) }, { -INT16_C( 28237), INT16_C( 23137), -INT16_C( 32717), INT16_C( 3572), -INT16_C( 3173), -INT16_C( 6083), -INT16_C( 30181), -INT16_C( 15657), -INT16_C( 22341), -INT16_C( 17550), -INT16_C( 2947), INT16_C( 2352), -INT16_C( 15193), -INT16_C( 21776), INT16_C( 1953), INT16_C( 21838) } }, { { INT16_C( 14403), INT16_C( 11265), -INT16_C( 27322), -INT16_C( 20154), -INT16_C( 32213), -INT16_C( 5511), INT16_C( 11490), -INT16_C( 31294), -INT16_C( 32111), -INT16_C( 8361), INT16_C( 15757), -INT16_C( 28716), -INT16_C( 9205), INT16_C( 5927), -INT16_C( 19802), -INT16_C( 5768) }, { INT16_C( 31466), INT16_C( 12310), INT16_C( 23567), INT16_C( 15074), INT16_C( 23518), -INT16_C( 16348), -INT16_C( 6521), INT16_C( 6213), -INT16_C( 25240), -INT16_C( 2313), -INT16_C( 13094), -INT16_C( 6779), -INT16_C( 21336), INT16_C( 20220), INT16_C( 30046), INT16_C( 18744) }, { -INT16_C( 19667), INT16_C( 23575), -INT16_C( 3755), -INT16_C( 5080), -INT16_C( 8695), -INT16_C( 21859), INT16_C( 4969), -INT16_C( 25081), INT16_C( 8185), -INT16_C( 10674), INT16_C( 2663), INT16_C( 30041), -INT16_C( 30541), INT16_C( 26147), INT16_C( 10244), INT16_C( 12976) } }, { { INT16_C( 20207), -INT16_C( 391), INT16_C( 23466), -INT16_C( 30408), INT16_C( 23734), INT16_C( 15689), -INT16_C( 28861), -INT16_C( 21675), INT16_C( 19756), INT16_C( 1697), INT16_C( 9753), -INT16_C( 15893), -INT16_C( 5934), INT16_C( 12559), INT16_C( 18269), INT16_C( 19578) }, { -INT16_C( 3179), INT16_C( 16458), -INT16_C( 32177), INT16_C( 1481), INT16_C( 4830), INT16_C( 8515), -INT16_C( 26463), -INT16_C( 12851), INT16_C( 28389), -INT16_C( 301), -INT16_C( 16491), INT16_C( 26559), -INT16_C( 12377), INT16_C( 1176), INT16_C( 4630), -INT16_C( 21424) }, { INT16_C( 17028), INT16_C( 16067), -INT16_C( 8711), -INT16_C( 28927), INT16_C( 28564), INT16_C( 24204), INT16_C( 10212), INT16_C( 31010), -INT16_C( 17391), INT16_C( 1396), -INT16_C( 6738), INT16_C( 10666), -INT16_C( 18311), INT16_C( 13735), INT16_C( 22899), -INT16_C( 1846) } }, { { -INT16_C( 26106), INT16_C( 21996), -INT16_C( 19172), -INT16_C( 1446), -INT16_C( 25145), INT16_C( 26908), -INT16_C( 5834), INT16_C( 6966), INT16_C( 2647), -INT16_C( 5094), -INT16_C( 9783), INT16_C( 28756), -INT16_C( 4952), -INT16_C( 16524), -INT16_C( 15105), INT16_C( 1387) }, { INT16_C( 22366), INT16_C( 31322), -INT16_C( 19444), -INT16_C( 11404), -INT16_C( 28590), -INT16_C( 30660), INT16_C( 29561), -INT16_C( 11869), -INT16_C( 17027), INT16_C( 18109), INT16_C( 4503), INT16_C( 16310), INT16_C( 11006), -INT16_C( 514), INT16_C( 27118), INT16_C( 19458) }, { -INT16_C( 3740), -INT16_C( 12218), INT16_C( 26920), -INT16_C( 12850), INT16_C( 11801), -INT16_C( 3752), INT16_C( 23727), -INT16_C( 4903), -INT16_C( 14380), INT16_C( 13015), -INT16_C( 5280), -INT16_C( 20470), INT16_C( 6054), -INT16_C( 17038), INT16_C( 12013), INT16_C( 20845) } }, { { INT16_C( 23744), -INT16_C( 13114), INT16_C( 14864), INT16_C( 25248), -INT16_C( 9013), INT16_C( 17642), -INT16_C( 29105), -INT16_C( 13291), -INT16_C( 11445), -INT16_C( 7662), -INT16_C( 14108), -INT16_C( 7646), INT16_C( 8434), -INT16_C( 7969), -INT16_C( 7798), INT16_C( 18988) }, { -INT16_C( 3523), INT16_C( 19991), -INT16_C( 18643), -INT16_C( 1872), -INT16_C( 25709), -INT16_C( 7364), INT16_C( 21033), INT16_C( 29871), -INT16_C( 15835), INT16_C( 2391), INT16_C( 31114), INT16_C( 32236), -INT16_C( 13415), INT16_C( 9053), -INT16_C( 30035), -INT16_C( 5522) }, { INT16_C( 20221), INT16_C( 6877), -INT16_C( 3779), INT16_C( 23376), INT16_C( 30814), INT16_C( 10278), -INT16_C( 8072), INT16_C( 16580), -INT16_C( 27280), -INT16_C( 5271), INT16_C( 17006), INT16_C( 24590), -INT16_C( 4981), INT16_C( 1084), INT16_C( 27703), INT16_C( 13466) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi16(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi16(test_vec[i].b); simde__m256i r = simde_mm256_add_epi16(a, b); simde_test_x86_assert_equal_i16x16(r, simde_x_mm256_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm256_add_epi32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[8]; int32_t b[8]; int32_t r[8]; } test_vec[] = { { { -INT32_C( 162005335), -INT32_C( 622453682), -INT32_C( 1422873298), INT32_C( 732145866), -INT32_C( 1831844680), -INT32_C( 1222554996), -INT32_C( 916290153), -INT32_C( 253014202) }, { -INT32_C( 1679408307), -INT32_C( 1921659553), INT32_C( 1144563066), INT32_C( 91216973), -INT32_C( 1399308512), INT32_C( 543406473), -INT32_C( 2048211394), INT32_C( 1651889429) }, { -INT32_C( 1841413642), INT32_C( 1750854061), -INT32_C( 278310232), INT32_C( 823362839), INT32_C( 1063814104), -INT32_C( 679148523), INT32_C( 1330465749), INT32_C( 1398875227) } }, { { INT32_C( 2013092632), -INT32_C( 1576766936), INT32_C( 1709587735), INT32_C( 963269913), INT32_C( 518324884), -INT32_C( 96581189), INT32_C( 612313103), INT32_C( 377943294) }, { INT32_C( 2022605647), INT32_C( 203068149), -INT32_C( 395247153), -INT32_C( 350102442), -INT32_C( 1710684194), INT32_C( 1603553104), INT32_C( 1854083952), INT32_C( 1451493639) }, { -INT32_C( 259269017), -INT32_C( 1373698787), INT32_C( 1314340582), INT32_C( 613167471), -INT32_C( 1192359310), INT32_C( 1506971915), -INT32_C( 1828570241), INT32_C( 1829436933) } }, { { -INT32_C( 2117201268), INT32_C( 1955457445), INT32_C( 1079902186), -INT32_C( 1188331813), -INT32_C( 715967355), -INT32_C( 332077188), INT32_C( 22722554), INT32_C( 1280892608) }, { -INT32_C( 1764940047), -INT32_C( 116761841), INT32_C( 893019994), INT32_C( 1810785766), INT32_C( 373375642), INT32_C( 604140841), -INT32_C( 316318675), INT32_C( 725253434) }, { INT32_C( 412825981), INT32_C( 1838695604), INT32_C( 1972922180), INT32_C( 622453953), -INT32_C( 342591713), INT32_C( 272063653), -INT32_C( 293596121), INT32_C( 2006146042) } }, { { -INT32_C( 1279195228), -INT32_C( 1129460638), INT32_C( 435283763), -INT32_C( 427433908), INT32_C( 1274856994), INT32_C( 1752170043), -INT32_C( 1789487782), -INT32_C( 1228894190) }, { -INT32_C( 93683049), -INT32_C( 2118772914), INT32_C( 1251715326), -INT32_C( 1439620984), INT32_C( 569716198), -INT32_C( 2037750484), INT32_C( 203153658), INT32_C( 130276464) }, { -INT32_C( 1372878277), INT32_C( 1046733744), INT32_C( 1686999089), -INT32_C( 1867054892), INT32_C( 1844573192), -INT32_C( 285580441), -INT32_C( 1586334124), -INT32_C( 1098617726) } }, { { -INT32_C( 1409208994), INT32_C( 1110292548), -INT32_C( 393426848), -INT32_C( 829243928), INT32_C( 401639403), -INT32_C( 425886996), -INT32_C( 889996966), -INT32_C( 204360043) }, { INT32_C( 664785891), -INT32_C( 345387893), INT32_C( 2111042965), -INT32_C( 1639160397), -INT32_C( 642433812), INT32_C( 280974006), -INT32_C( 1596280309), INT32_C( 1267968872) }, { -INT32_C( 744423103), INT32_C( 764904655), INT32_C( 1717616117), INT32_C( 1826562971), -INT32_C( 240794409), -INT32_C( 144912990), INT32_C( 1808690021), INT32_C( 1063608829) } }, { { INT32_C( 158478974), -INT32_C( 1795892225), -INT32_C( 2079209519), INT32_C( 421682476), INT32_C( 1341314969), INT32_C( 895463721), -INT32_C( 858441372), INT32_C( 1662544357) }, { -INT32_C( 1704097125), INT32_C( 925786470), INT32_C( 1438335017), INT32_C( 930012573), -INT32_C( 578395980), INT32_C( 1980950034), INT32_C( 71493663), -INT32_C( 328705455) }, { -INT32_C( 1545618151), -INT32_C( 870105755), -INT32_C( 640874502), INT32_C( 1351695049), INT32_C( 762918989), -INT32_C( 1418553541), -INT32_C( 786947709), INT32_C( 1333838902) } }, { { INT32_C( 1267193317), INT32_C( 1602401590), -INT32_C( 1816838667), -INT32_C( 825613542), -INT32_C( 1767157628), INT32_C( 1443675702), -INT32_C( 145076570), -INT32_C( 1897676119) }, { -INT32_C( 824612201), INT32_C( 355293984), -INT32_C( 1297554792), -INT32_C( 1971293690), -INT32_C( 115332157), -INT32_C( 1890636567), INT32_C( 596028026), INT32_C( 78735980) }, { INT32_C( 442581116), INT32_C( 1957695574), INT32_C( 1180573837), INT32_C( 1498060064), -INT32_C( 1882489785), -INT32_C( 446960865), INT32_C( 450951456), -INT32_C( 1818940139) } }, { { -INT32_C( 187528492), INT32_C( 2097872869), -INT32_C( 399527198), -INT32_C( 395137243), -INT32_C( 991849765), INT32_C( 945041854), INT32_C( 1197267675), INT32_C( 424348996) }, { INT32_C( 2098011544), -INT32_C( 321763), -INT32_C( 270062902), -INT32_C( 1244177958), -INT32_C( 1451640597), -INT32_C( 991834647), -INT32_C( 334742104), -INT32_C( 486189237) }, { INT32_C( 1910483052), INT32_C( 2097551106), -INT32_C( 669590100), -INT32_C( 1639315201), INT32_C( 1851476934), -INT32_C( 46792793), INT32_C( 862525571), -INT32_C( 61840241) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi32(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi32(test_vec[i].b); simde__m256i r = simde_mm256_add_epi32(a, b); simde_test_x86_assert_equal_i32x8(r, simde_x_mm256_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm256_add_epi64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[4]; int64_t b[4]; int64_t r[4]; } test_vec[] = { { { INT64_C( 6349431231190787759), -INT64_C( 254720912029419480), -INT64_C( 1585114888304005479), -INT64_C( 1993816771242646234) }, { INT64_C( 1718517911670765936), INT64_C( 2128417586150406366), -INT64_C( 5915363177252979306), -INT64_C( 4600475057435654898) }, { INT64_C( 8067949142861553695), INT64_C( 1873696674120986886), -INT64_C( 7500478065556984785), -INT64_C( 6594291828678301132) } }, { { -INT64_C( 5470999611667550195), -INT64_C( 9064397034655497070), INT64_C( 1368083854048468167), INT64_C( 6718945478753745967) }, { INT64_C( 3132092573034315757), INT64_C( 5341117986440919217), INT64_C( 4118987204768407570), -INT64_C( 1675067501828407790) }, { -INT64_C( 2338907038633234438), -INT64_C( 3723279048214577853), INT64_C( 5487071058816875737), INT64_C( 5043877976925338177) } }, { { INT64_C( 3979532279430715765), INT64_C( 335787494049348827), -INT64_C( 1668789448351202924), -INT64_C( 385608049042419774) }, { INT64_C( 5828296139942220324), INT64_C( 4591233640581478336), -INT64_C( 2785840211528068457), -INT64_C( 3331470012021497532) }, { -INT64_C( 8638915654336615527), INT64_C( 4927021134630827163), -INT64_C( 4454629659879271381), -INT64_C( 3717078061063917306) } }, { { -INT64_C( 3290620525541536369), -INT64_C( 1080797550094253291), -INT64_C( 2810608374329123313), INT64_C( 7121710342746510679) }, { -INT64_C( 4244998137797193421), -INT64_C( 1798196463016764481), INT64_C( 8705188840942103139), INT64_C( 708843972859818144) }, { -INT64_C( 7535618663338729790), -INT64_C( 2878994013111017772), INT64_C( 5894580466612979826), INT64_C( 7830554315606328823) } }, { { -INT64_C( 5181843299269526110), -INT64_C( 8809360683665548033), INT64_C( 8565929875506776903), INT64_C( 5736108927119678651) }, { INT64_C( 5453497739975387389), -INT64_C( 6486905996753797037), INT64_C( 9005490340975441324), -INT64_C( 6897921832086805687) }, { INT64_C( 271654440705861279), INT64_C( 3150477393290206546), -INT64_C( 875323857227333389), -INT64_C( 1161812904967127036) } }, { { -INT64_C( 1213926134046595696), -INT64_C( 2826187797014754619), INT64_C( 724891255407629523), INT64_C( 6159312156450889015) }, { INT64_C( 1853046892828626987), INT64_C( 5451329691363056547), -INT64_C( 8240199362465037608), INT64_C( 1523872447569830923) }, { INT64_C( 639120758782031291), INT64_C( 2625141894348301928), -INT64_C( 7515308107057408085), INT64_C( 7683184604020719938) } }, { { INT64_C( 5935625214915008241), INT64_C( 8607162423740308398), INT64_C( 5453775861294584267), -INT64_C( 1632482330388195751) }, { INT64_C( 8546019858262733976), INT64_C( 5566467293344283178), INT64_C( 6840156418577748321), -INT64_C( 2155261274678556364) }, { -INT64_C( 3965099000531809399), -INT64_C( 4273114356624960040), -INT64_C( 6152811793837219028), -INT64_C( 3787743605066752115) } }, { { -INT64_C( 41444076634421970), INT64_C( 8884486176460245628), INT64_C( 1062298490825980762), -INT64_C( 8700866135333380333) }, { -INT64_C( 6311257025808602730), -INT64_C( 7671331955779620109), -INT64_C( 4352814043531794769), -INT64_C( 5119924125424365294) }, { -INT64_C( 6352701102443024700), INT64_C( 1213154220680625519), -INT64_C( 3290515552705814007), INT64_C( 4625953812951805989) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi64(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi64(test_vec[i].b); simde__m256i r = simde_mm256_add_epi64(a, b); simde_test_x86_assert_equal_i64x4(r, simde_x_mm256_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm256_alignr_epi8_case0(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi8(INT8_C( 57), INT8_C( -47), INT8_C( -81), INT8_C( -95), INT8_C(-100), INT8_C( -69), INT8_C( -75), INT8_C(-127), INT8_C( -8), INT8_C( -83), INT8_C( -94), INT8_C(-113), INT8_C( 107), INT8_C( -39), INT8_C( -99), INT8_C( 46), INT8_C( -61), INT8_C( -72), INT8_C( -5), INT8_C(-126), INT8_C( 113), INT8_C(-120), INT8_C( -9), INT8_C( 117), INT8_C( 121), INT8_C( -14), INT8_C( -58), INT8_C(-118), INT8_C( -70), INT8_C( -35), INT8_C(-100), INT8_C( 48)), simde_mm256_set_epi8(INT8_C( -98), INT8_C( 80), INT8_C( -16), INT8_C( 73), INT8_C( 126), INT8_C(-115), INT8_C( 31), INT8_C( 80), INT8_C( -75), INT8_C( 117), INT8_C( -13), INT8_C( -70), INT8_C(-101), INT8_C( -10), INT8_C(-111), INT8_C( 96), INT8_C( -77), INT8_C( 0), INT8_C( 70), INT8_C(-121), INT8_C( 78), INT8_C( 54), INT8_C( 38), INT8_C( 18), INT8_C( -12), INT8_C( 68), INT8_C( 53), INT8_C( 45), INT8_C( 77), INT8_C( 103), INT8_C( 118), INT8_C( 116)), simde_mm256_set_epi8(INT8_C( -39), INT8_C( -99), INT8_C( 46), INT8_C( -98), INT8_C( 80), INT8_C( -16), INT8_C( 73), INT8_C( 126), INT8_C(-115), INT8_C( 31), INT8_C( 80), INT8_C( -75), INT8_C( 117), INT8_C( -13), INT8_C( -70), INT8_C(-101), INT8_C( -35), INT8_C(-100), INT8_C( 48), INT8_C( -77), INT8_C( 0), INT8_C( 70), INT8_C(-121), INT8_C( 78), INT8_C( 54), INT8_C( 38), INT8_C( 18), INT8_C( -12), INT8_C( 68), INT8_C( 53), INT8_C( 45), INT8_C( 77)) }, { simde_mm256_set_epi8(INT8_C( 8), INT8_C( 70), INT8_C( 96), INT8_C( 42), INT8_C( 73), INT8_C( -91), INT8_C( -93), INT8_C( -58), INT8_C( -35), INT8_C( -14), INT8_C( 118), INT8_C(-128), INT8_C( -97), INT8_C( 97), INT8_C( 22), INT8_C( 4), INT8_C( 24), INT8_C( 124), INT8_C( -32), INT8_C( -48), INT8_C( -74), INT8_C( 95), INT8_C( 95), INT8_C( -38), INT8_C( 54), INT8_C( 120), INT8_C( -65), INT8_C( -96), INT8_C( -44), INT8_C( -78), INT8_C( -95), INT8_C(-111)), simde_mm256_set_epi8(INT8_C( 66), INT8_C( 48), INT8_C( 60), INT8_C( -4), INT8_C( 33), INT8_C( 67), INT8_C( 97), INT8_C( 65), INT8_C( 80), INT8_C(-109), INT8_C( 88), INT8_C( -23), INT8_C( 107), INT8_C( -65), INT8_C( -65), INT8_C( 62), INT8_C( 90), INT8_C( 118), INT8_C( 25), INT8_C( 105), INT8_C( 109), INT8_C( -45), INT8_C( 4), INT8_C(-107), INT8_C( 55), INT8_C(-101), INT8_C( -65), INT8_C( 121), INT8_C( -76), INT8_C( 55), INT8_C( 6), INT8_C( 86)), simde_mm256_set_epi8(INT8_C( 97), INT8_C( 22), INT8_C( 4), INT8_C( 66), INT8_C( 48), INT8_C( 60), INT8_C( -4), INT8_C( 33), INT8_C( 67), INT8_C( 97), INT8_C( 65), INT8_C( 80), INT8_C(-109), INT8_C( 88), INT8_C( -23), INT8_C( 107), INT8_C( -78), INT8_C( -95), INT8_C(-111), INT8_C( 90), INT8_C( 118), INT8_C( 25), INT8_C( 105), INT8_C( 109), INT8_C( -45), INT8_C( 4), INT8_C(-107), INT8_C( 55), INT8_C(-101), INT8_C( -65), INT8_C( 121), INT8_C( -76)) }, { simde_mm256_set_epi8(INT8_C( -1), INT8_C( 93), INT8_C( 114), INT8_C( 49), INT8_C( 120), INT8_C( -70), INT8_C(-112), INT8_C( 19), INT8_C( 56), INT8_C( -12), INT8_C( -77), INT8_C( -85), INT8_C( -59), INT8_C( 80), INT8_C( -47), INT8_C( -28), INT8_C( -33), INT8_C( 73), INT8_C( -24), INT8_C(-106), INT8_C(-122), INT8_C(-111), INT8_C( -3), INT8_C( 57), INT8_C( 67), INT8_C( 107), INT8_C( -68), INT8_C(-101), INT8_C( -9), INT8_C( 5), INT8_C(-124), INT8_C( -69)), simde_mm256_set_epi8(INT8_C( -67), INT8_C( 65), INT8_C( -72), INT8_C( -90), INT8_C( -44), INT8_C( 2), INT8_C( 39), INT8_C( -45), INT8_C( 51), INT8_C( -30), INT8_C( -47), INT8_C( 1), INT8_C( -91), INT8_C( -40), INT8_C( -73), INT8_C( 33), INT8_C(-117), INT8_C( -31), INT8_C(-117), INT8_C( 60), INT8_C( -36), INT8_C(-120), INT8_C( -10), INT8_C( -12), INT8_C( -25), INT8_C(-103), INT8_C( 48), INT8_C( -50), INT8_C( 101), INT8_C( 93), INT8_C( -31), INT8_C( -33)), simde_mm256_set_epi8(INT8_C( 80), INT8_C( -47), INT8_C( -28), INT8_C( -67), INT8_C( 65), INT8_C( -72), INT8_C( -90), INT8_C( -44), INT8_C( 2), INT8_C( 39), INT8_C( -45), INT8_C( 51), INT8_C( -30), INT8_C( -47), INT8_C( 1), INT8_C( -91), INT8_C( 5), INT8_C(-124), INT8_C( -69), INT8_C(-117), INT8_C( -31), INT8_C(-117), INT8_C( 60), INT8_C( -36), INT8_C(-120), INT8_C( -10), INT8_C( -12), INT8_C( -25), INT8_C(-103), INT8_C( 48), INT8_C( -50), INT8_C( 101)) }, { simde_mm256_set_epi8(INT8_C( 114), INT8_C( -28), INT8_C( -81), INT8_C(-126), INT8_C(-114), INT8_C( -94), INT8_C(-101), INT8_C( 52), INT8_C( 30), INT8_C( 66), INT8_C(-105), INT8_C( 50), INT8_C( 48), INT8_C( 15), INT8_C( 1), INT8_C(-106), INT8_C( -36), INT8_C( 61), INT8_C( -18), INT8_C( 8), INT8_C( 89), INT8_C( -23), INT8_C( 58), INT8_C( -29), INT8_C( 110), INT8_C( 57), INT8_C( -39), INT8_C( 1), INT8_C( -60), INT8_C( 60), INT8_C( -80), INT8_C( -20)), simde_mm256_set_epi8(INT8_C( -60), INT8_C( 69), INT8_C( -69), INT8_C( 107), INT8_C( 2), INT8_C( -33), INT8_C( 14), INT8_C( 77), INT8_C( 15), INT8_C( 63), INT8_C( 63), INT8_C( -53), INT8_C( -9), INT8_C( 76), INT8_C( 7), INT8_C( -76), INT8_C( -23), INT8_C(-107), INT8_C( 84), INT8_C( 37), INT8_C( -18), INT8_C( 47), INT8_C( 58), INT8_C( 31), INT8_C( 69), INT8_C( 12), INT8_C( 46), INT8_C( 101), INT8_C( -38), INT8_C( 117), INT8_C(-113), INT8_C( -36)), simde_mm256_set_epi8(INT8_C( 15), INT8_C( 1), INT8_C(-106), INT8_C( -60), INT8_C( 69), INT8_C( -69), INT8_C( 107), INT8_C( 2), INT8_C( -33), INT8_C( 14), INT8_C( 77), INT8_C( 15), INT8_C( 63), INT8_C( 63), INT8_C( -53), INT8_C( -9), INT8_C( 60), INT8_C( -80), INT8_C( -20), INT8_C( -23), INT8_C(-107), INT8_C( 84), INT8_C( 37), INT8_C( -18), INT8_C( 47), INT8_C( 58), INT8_C( 31), INT8_C( 69), INT8_C( 12), INT8_C( 46), INT8_C( 101), INT8_C( -38)) }, { simde_mm256_set_epi8(INT8_C( 13), INT8_C( -32), INT8_C( -51), INT8_C( 49), INT8_C( 15), INT8_C( 15), INT8_C( -66), INT8_C( -38), INT8_C( 69), INT8_C( 1), INT8_C( -50), INT8_C(-103), INT8_C( 72), INT8_C( -87), INT8_C(-104), INT8_C( -58), INT8_C( 63), INT8_C(-120), INT8_C( 36), INT8_C( -61), INT8_C( 106), INT8_C( 97), INT8_C(-128), INT8_C( 26), INT8_C(-121), INT8_C( -27), INT8_C( 107), INT8_C(-112), INT8_C( 57), INT8_C( 39), INT8_C( 98), INT8_C( 126)), simde_mm256_set_epi8(INT8_C(-110), INT8_C( 50), INT8_C( 102), INT8_C( -85), INT8_C( -62), INT8_C( 74), INT8_C( 102), INT8_C( 36), INT8_C( -55), INT8_C(-104), INT8_C( 11), INT8_C( -70), INT8_C( -20), INT8_C(-104), INT8_C( -37), INT8_C( 122), INT8_C( -88), INT8_C( 70), INT8_C( 43), INT8_C( -34), INT8_C( 14), INT8_C( 55), INT8_C( -41), INT8_C( 33), INT8_C( 53), INT8_C(-102), INT8_C( 56), INT8_C( 26), INT8_C( -79), INT8_C( -30), INT8_C( 73), INT8_C( -53)), simde_mm256_set_epi8(INT8_C( -87), INT8_C(-104), INT8_C( -58), INT8_C(-110), INT8_C( 50), INT8_C( 102), INT8_C( -85), INT8_C( -62), INT8_C( 74), INT8_C( 102), INT8_C( 36), INT8_C( -55), INT8_C(-104), INT8_C( 11), INT8_C( -70), INT8_C( -20), INT8_C( 39), INT8_C( 98), INT8_C( 126), INT8_C( -88), INT8_C( 70), INT8_C( 43), INT8_C( -34), INT8_C( 14), INT8_C( 55), INT8_C( -41), INT8_C( 33), INT8_C( 53), INT8_C(-102), INT8_C( 56), INT8_C( 26), INT8_C( -79)) }, { simde_mm256_set_epi8(INT8_C(-103), INT8_C( 23), INT8_C( -90), INT8_C( 43), INT8_C( -8), INT8_C( 54), INT8_C( -93), INT8_C( 64), INT8_C(-116), INT8_C( -76), INT8_C( -53), INT8_C( 35), INT8_C( 51), INT8_C( -86), INT8_C( 8), INT8_C( 43), INT8_C( -60), INT8_C(-113), INT8_C( 44), INT8_C(-102), INT8_C( 77), INT8_C( 95), INT8_C( 57), INT8_C( 124), INT8_C( -91), INT8_C( -87), INT8_C( -54), INT8_C( -83), INT8_C( 108), INT8_C( 76), INT8_C( 90), INT8_C( -49)), simde_mm256_set_epi8(INT8_C( -43), INT8_C( -5), INT8_C( 94), INT8_C( 76), INT8_C( -9), INT8_C( 110), INT8_C( -82), INT8_C( 112), INT8_C( -25), INT8_C( 11), INT8_C( -46), INT8_C( -57), INT8_C( -13), INT8_C(-127), INT8_C( 10), INT8_C(-113), INT8_C( 55), INT8_C( 85), INT8_C( 75), INT8_C( 61), INT8_C( 49), INT8_C( -14), INT8_C( 44), INT8_C(-128), INT8_C(-102), INT8_C( -17), INT8_C( 64), INT8_C( -73), INT8_C( 14), INT8_C( 19), INT8_C( -2), INT8_C( 7)), simde_mm256_set_epi8(INT8_C( -86), INT8_C( 8), INT8_C( 43), INT8_C( -43), INT8_C( -5), INT8_C( 94), INT8_C( 76), INT8_C( -9), INT8_C( 110), INT8_C( -82), INT8_C( 112), INT8_C( -25), INT8_C( 11), INT8_C( -46), INT8_C( -57), INT8_C( -13), INT8_C( 76), INT8_C( 90), INT8_C( -49), INT8_C( 55), INT8_C( 85), INT8_C( 75), INT8_C( 61), INT8_C( 49), INT8_C( -14), INT8_C( 44), INT8_C(-128), INT8_C(-102), INT8_C( -17), INT8_C( 64), INT8_C( -73), INT8_C( 14)) }, { simde_mm256_set_epi8(INT8_C( -32), INT8_C( -82), INT8_C( 79), INT8_C( 79), INT8_C( 127), INT8_C( 16), INT8_C(-118), INT8_C( 113), INT8_C( -52), INT8_C( -25), INT8_C(-124), INT8_C( -77), INT8_C( 114), INT8_C(-118), INT8_C( -84), INT8_C( 73), INT8_C( -12), INT8_C( -67), INT8_C(-127), INT8_C( -69), INT8_C( 30), INT8_C( 14), INT8_C( -47), INT8_C(-119), INT8_C( -36), INT8_C( -97), INT8_C( -89), INT8_C( 24), INT8_C( -41), INT8_C( 117), INT8_C(-102), INT8_C( 54)), simde_mm256_set_epi8(INT8_C( -66), INT8_C( -50), INT8_C( 39), INT8_C( 30), INT8_C( 53), INT8_C( 108), INT8_C( -71), INT8_C( -62), INT8_C( 58), INT8_C( -69), INT8_C( -39), INT8_C( -28), INT8_C( 58), INT8_C( 54), INT8_C(-117), INT8_C( -49), INT8_C( 126), INT8_C( -71), INT8_C( -39), INT8_C(-120), INT8_C( 88), INT8_C( 41), INT8_C( 127), INT8_C( 111), INT8_C( -32), INT8_C( -18), INT8_C( 12), INT8_C( 97), INT8_C( 83), INT8_C( -96), INT8_C( 13), INT8_C(-101)), simde_mm256_set_epi8(INT8_C(-118), INT8_C( -84), INT8_C( 73), INT8_C( -66), INT8_C( -50), INT8_C( 39), INT8_C( 30), INT8_C( 53), INT8_C( 108), INT8_C( -71), INT8_C( -62), INT8_C( 58), INT8_C( -69), INT8_C( -39), INT8_C( -28), INT8_C( 58), INT8_C( 117), INT8_C(-102), INT8_C( 54), INT8_C( 126), INT8_C( -71), INT8_C( -39), INT8_C(-120), INT8_C( 88), INT8_C( 41), INT8_C( 127), INT8_C( 111), INT8_C( -32), INT8_C( -18), INT8_C( 12), INT8_C( 97), INT8_C( 83)) }, { simde_mm256_set_epi8(INT8_C(-105), INT8_C( 9), INT8_C( -52), INT8_C( 54), INT8_C( -11), INT8_C( -39), INT8_C( -64), INT8_C( -79), INT8_C( 88), INT8_C( 52), INT8_C( -95), INT8_C( -70), INT8_C( 46), INT8_C( 121), INT8_C( -22), INT8_C( 113), INT8_C( 26), INT8_C( 104), INT8_C( -52), INT8_C( 109), INT8_C( 5), INT8_C( 87), INT8_C( 90), INT8_C( -7), INT8_C( -35), INT8_C(-112), INT8_C( 20), INT8_C(-128), INT8_C( -21), INT8_C( -67), INT8_C( 37), INT8_C( -35)), simde_mm256_set_epi8(INT8_C( 66), INT8_C( -23), INT8_C( 45), INT8_C( -38), INT8_C( -94), INT8_C( 119), INT8_C( -23), INT8_C( 70), INT8_C( -75), INT8_C( 25), INT8_C( 79), INT8_C(-101), INT8_C(-122), INT8_C(-107), INT8_C( -10), INT8_C(-109), INT8_C( -96), INT8_C( 45), INT8_C( 31), INT8_C( -60), INT8_C( 100), INT8_C( 115), INT8_C( 79), INT8_C( 38), INT8_C( -35), INT8_C( 57), INT8_C( 13), INT8_C( 93), INT8_C( 54), INT8_C( -61), INT8_C( 101), INT8_C(-113)), simde_mm256_set_epi8(INT8_C( 121), INT8_C( -22), INT8_C( 113), INT8_C( 66), INT8_C( -23), INT8_C( 45), INT8_C( -38), INT8_C( -94), INT8_C( 119), INT8_C( -23), INT8_C( 70), INT8_C( -75), INT8_C( 25), INT8_C( 79), INT8_C(-101), INT8_C(-122), INT8_C( -67), INT8_C( 37), INT8_C( -35), INT8_C( -96), INT8_C( 45), INT8_C( 31), INT8_C( -60), INT8_C( 100), INT8_C( 115), INT8_C( 79), INT8_C( 38), INT8_C( -35), INT8_C( 57), INT8_C( 13), INT8_C( 93), INT8_C( 54)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_alignr_epi8(test_vec[i].a, test_vec[i].b, 3); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_alignr_epi8_case1(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi8(INT8_C( 57), INT8_C( -47), INT8_C( -81), INT8_C( -95), INT8_C(-100), INT8_C( -69), INT8_C( -75), INT8_C(-127), INT8_C( -8), INT8_C( -83), INT8_C( -94), INT8_C(-113), INT8_C( 107), INT8_C( -39), INT8_C( -99), INT8_C( 46), INT8_C( -61), INT8_C( -72), INT8_C( -5), INT8_C(-126), INT8_C( 113), INT8_C(-120), INT8_C( -9), INT8_C( 117), INT8_C( 121), INT8_C( -14), INT8_C( -58), INT8_C(-118), INT8_C( -70), INT8_C( -35), INT8_C(-100), INT8_C( 48)), simde_mm256_set_epi8(INT8_C( -98), INT8_C( 80), INT8_C( -16), INT8_C( 73), INT8_C( 126), INT8_C(-115), INT8_C( 31), INT8_C( 80), INT8_C( -75), INT8_C( 117), INT8_C( -13), INT8_C( -70), INT8_C(-101), INT8_C( -10), INT8_C(-111), INT8_C( 96), INT8_C( -77), INT8_C( 0), INT8_C( 70), INT8_C(-121), INT8_C( 78), INT8_C( 54), INT8_C( 38), INT8_C( 18), INT8_C( -12), INT8_C( 68), INT8_C( 53), INT8_C( 45), INT8_C( 77), INT8_C( 103), INT8_C( 118), INT8_C( 116)), simde_mm256_set_epi8(INT8_C( -75), INT8_C(-127), INT8_C( -8), INT8_C( -83), INT8_C( -94), INT8_C(-113), INT8_C( 107), INT8_C( -39), INT8_C( -99), INT8_C( 46), INT8_C( -98), INT8_C( 80), INT8_C( -16), INT8_C( 73), INT8_C( 126), INT8_C(-115), INT8_C( -9), INT8_C( 117), INT8_C( 121), INT8_C( -14), INT8_C( -58), INT8_C(-118), INT8_C( -70), INT8_C( -35), INT8_C(-100), INT8_C( 48), INT8_C( -77), INT8_C( 0), INT8_C( 70), INT8_C(-121), INT8_C( 78), INT8_C( 54)) }, { simde_mm256_set_epi8(INT8_C( 8), INT8_C( 70), INT8_C( 96), INT8_C( 42), INT8_C( 73), INT8_C( -91), INT8_C( -93), INT8_C( -58), INT8_C( -35), INT8_C( -14), INT8_C( 118), INT8_C(-128), INT8_C( -97), INT8_C( 97), INT8_C( 22), INT8_C( 4), INT8_C( 24), INT8_C( 124), INT8_C( -32), INT8_C( -48), INT8_C( -74), INT8_C( 95), INT8_C( 95), INT8_C( -38), INT8_C( 54), INT8_C( 120), INT8_C( -65), INT8_C( -96), INT8_C( -44), INT8_C( -78), INT8_C( -95), INT8_C(-111)), simde_mm256_set_epi8(INT8_C( 66), INT8_C( 48), INT8_C( 60), INT8_C( -4), INT8_C( 33), INT8_C( 67), INT8_C( 97), INT8_C( 65), INT8_C( 80), INT8_C(-109), INT8_C( 88), INT8_C( -23), INT8_C( 107), INT8_C( -65), INT8_C( -65), INT8_C( 62), INT8_C( 90), INT8_C( 118), INT8_C( 25), INT8_C( 105), INT8_C( 109), INT8_C( -45), INT8_C( 4), INT8_C(-107), INT8_C( 55), INT8_C(-101), INT8_C( -65), INT8_C( 121), INT8_C( -76), INT8_C( 55), INT8_C( 6), INT8_C( 86)), simde_mm256_set_epi8(INT8_C( -93), INT8_C( -58), INT8_C( -35), INT8_C( -14), INT8_C( 118), INT8_C(-128), INT8_C( -97), INT8_C( 97), INT8_C( 22), INT8_C( 4), INT8_C( 66), INT8_C( 48), INT8_C( 60), INT8_C( -4), INT8_C( 33), INT8_C( 67), INT8_C( 95), INT8_C( -38), INT8_C( 54), INT8_C( 120), INT8_C( -65), INT8_C( -96), INT8_C( -44), INT8_C( -78), INT8_C( -95), INT8_C(-111), INT8_C( 90), INT8_C( 118), INT8_C( 25), INT8_C( 105), INT8_C( 109), INT8_C( -45)) }, { simde_mm256_set_epi8(INT8_C( -1), INT8_C( 93), INT8_C( 114), INT8_C( 49), INT8_C( 120), INT8_C( -70), INT8_C(-112), INT8_C( 19), INT8_C( 56), INT8_C( -12), INT8_C( -77), INT8_C( -85), INT8_C( -59), INT8_C( 80), INT8_C( -47), INT8_C( -28), INT8_C( -33), INT8_C( 73), INT8_C( -24), INT8_C(-106), INT8_C(-122), INT8_C(-111), INT8_C( -3), INT8_C( 57), INT8_C( 67), INT8_C( 107), INT8_C( -68), INT8_C(-101), INT8_C( -9), INT8_C( 5), INT8_C(-124), INT8_C( -69)), simde_mm256_set_epi8(INT8_C( -67), INT8_C( 65), INT8_C( -72), INT8_C( -90), INT8_C( -44), INT8_C( 2), INT8_C( 39), INT8_C( -45), INT8_C( 51), INT8_C( -30), INT8_C( -47), INT8_C( 1), INT8_C( -91), INT8_C( -40), INT8_C( -73), INT8_C( 33), INT8_C(-117), INT8_C( -31), INT8_C(-117), INT8_C( 60), INT8_C( -36), INT8_C(-120), INT8_C( -10), INT8_C( -12), INT8_C( -25), INT8_C(-103), INT8_C( 48), INT8_C( -50), INT8_C( 101), INT8_C( 93), INT8_C( -31), INT8_C( -33)), simde_mm256_set_epi8(INT8_C(-112), INT8_C( 19), INT8_C( 56), INT8_C( -12), INT8_C( -77), INT8_C( -85), INT8_C( -59), INT8_C( 80), INT8_C( -47), INT8_C( -28), INT8_C( -67), INT8_C( 65), INT8_C( -72), INT8_C( -90), INT8_C( -44), INT8_C( 2), INT8_C( -3), INT8_C( 57), INT8_C( 67), INT8_C( 107), INT8_C( -68), INT8_C(-101), INT8_C( -9), INT8_C( 5), INT8_C(-124), INT8_C( -69), INT8_C(-117), INT8_C( -31), INT8_C(-117), INT8_C( 60), INT8_C( -36), INT8_C(-120)) }, { simde_mm256_set_epi8(INT8_C( 114), INT8_C( -28), INT8_C( -81), INT8_C(-126), INT8_C(-114), INT8_C( -94), INT8_C(-101), INT8_C( 52), INT8_C( 30), INT8_C( 66), INT8_C(-105), INT8_C( 50), INT8_C( 48), INT8_C( 15), INT8_C( 1), INT8_C(-106), INT8_C( -36), INT8_C( 61), INT8_C( -18), INT8_C( 8), INT8_C( 89), INT8_C( -23), INT8_C( 58), INT8_C( -29), INT8_C( 110), INT8_C( 57), INT8_C( -39), INT8_C( 1), INT8_C( -60), INT8_C( 60), INT8_C( -80), INT8_C( -20)), simde_mm256_set_epi8(INT8_C( -60), INT8_C( 69), INT8_C( -69), INT8_C( 107), INT8_C( 2), INT8_C( -33), INT8_C( 14), INT8_C( 77), INT8_C( 15), INT8_C( 63), INT8_C( 63), INT8_C( -53), INT8_C( -9), INT8_C( 76), INT8_C( 7), INT8_C( -76), INT8_C( -23), INT8_C(-107), INT8_C( 84), INT8_C( 37), INT8_C( -18), INT8_C( 47), INT8_C( 58), INT8_C( 31), INT8_C( 69), INT8_C( 12), INT8_C( 46), INT8_C( 101), INT8_C( -38), INT8_C( 117), INT8_C(-113), INT8_C( -36)), simde_mm256_set_epi8(INT8_C(-101), INT8_C( 52), INT8_C( 30), INT8_C( 66), INT8_C(-105), INT8_C( 50), INT8_C( 48), INT8_C( 15), INT8_C( 1), INT8_C(-106), INT8_C( -60), INT8_C( 69), INT8_C( -69), INT8_C( 107), INT8_C( 2), INT8_C( -33), INT8_C( 58), INT8_C( -29), INT8_C( 110), INT8_C( 57), INT8_C( -39), INT8_C( 1), INT8_C( -60), INT8_C( 60), INT8_C( -80), INT8_C( -20), INT8_C( -23), INT8_C(-107), INT8_C( 84), INT8_C( 37), INT8_C( -18), INT8_C( 47)) }, { simde_mm256_set_epi8(INT8_C( 13), INT8_C( -32), INT8_C( -51), INT8_C( 49), INT8_C( 15), INT8_C( 15), INT8_C( -66), INT8_C( -38), INT8_C( 69), INT8_C( 1), INT8_C( -50), INT8_C(-103), INT8_C( 72), INT8_C( -87), INT8_C(-104), INT8_C( -58), INT8_C( 63), INT8_C(-120), INT8_C( 36), INT8_C( -61), INT8_C( 106), INT8_C( 97), INT8_C(-128), INT8_C( 26), INT8_C(-121), INT8_C( -27), INT8_C( 107), INT8_C(-112), INT8_C( 57), INT8_C( 39), INT8_C( 98), INT8_C( 126)), simde_mm256_set_epi8(INT8_C(-110), INT8_C( 50), INT8_C( 102), INT8_C( -85), INT8_C( -62), INT8_C( 74), INT8_C( 102), INT8_C( 36), INT8_C( -55), INT8_C(-104), INT8_C( 11), INT8_C( -70), INT8_C( -20), INT8_C(-104), INT8_C( -37), INT8_C( 122), INT8_C( -88), INT8_C( 70), INT8_C( 43), INT8_C( -34), INT8_C( 14), INT8_C( 55), INT8_C( -41), INT8_C( 33), INT8_C( 53), INT8_C(-102), INT8_C( 56), INT8_C( 26), INT8_C( -79), INT8_C( -30), INT8_C( 73), INT8_C( -53)), simde_mm256_set_epi8(INT8_C( -66), INT8_C( -38), INT8_C( 69), INT8_C( 1), INT8_C( -50), INT8_C(-103), INT8_C( 72), INT8_C( -87), INT8_C(-104), INT8_C( -58), INT8_C(-110), INT8_C( 50), INT8_C( 102), INT8_C( -85), INT8_C( -62), INT8_C( 74), INT8_C(-128), INT8_C( 26), INT8_C(-121), INT8_C( -27), INT8_C( 107), INT8_C(-112), INT8_C( 57), INT8_C( 39), INT8_C( 98), INT8_C( 126), INT8_C( -88), INT8_C( 70), INT8_C( 43), INT8_C( -34), INT8_C( 14), INT8_C( 55)) }, { simde_mm256_set_epi8(INT8_C(-103), INT8_C( 23), INT8_C( -90), INT8_C( 43), INT8_C( -8), INT8_C( 54), INT8_C( -93), INT8_C( 64), INT8_C(-116), INT8_C( -76), INT8_C( -53), INT8_C( 35), INT8_C( 51), INT8_C( -86), INT8_C( 8), INT8_C( 43), INT8_C( -60), INT8_C(-113), INT8_C( 44), INT8_C(-102), INT8_C( 77), INT8_C( 95), INT8_C( 57), INT8_C( 124), INT8_C( -91), INT8_C( -87), INT8_C( -54), INT8_C( -83), INT8_C( 108), INT8_C( 76), INT8_C( 90), INT8_C( -49)), simde_mm256_set_epi8(INT8_C( -43), INT8_C( -5), INT8_C( 94), INT8_C( 76), INT8_C( -9), INT8_C( 110), INT8_C( -82), INT8_C( 112), INT8_C( -25), INT8_C( 11), INT8_C( -46), INT8_C( -57), INT8_C( -13), INT8_C(-127), INT8_C( 10), INT8_C(-113), INT8_C( 55), INT8_C( 85), INT8_C( 75), INT8_C( 61), INT8_C( 49), INT8_C( -14), INT8_C( 44), INT8_C(-128), INT8_C(-102), INT8_C( -17), INT8_C( 64), INT8_C( -73), INT8_C( 14), INT8_C( 19), INT8_C( -2), INT8_C( 7)), simde_mm256_set_epi8(INT8_C( -93), INT8_C( 64), INT8_C(-116), INT8_C( -76), INT8_C( -53), INT8_C( 35), INT8_C( 51), INT8_C( -86), INT8_C( 8), INT8_C( 43), INT8_C( -43), INT8_C( -5), INT8_C( 94), INT8_C( 76), INT8_C( -9), INT8_C( 110), INT8_C( 57), INT8_C( 124), INT8_C( -91), INT8_C( -87), INT8_C( -54), INT8_C( -83), INT8_C( 108), INT8_C( 76), INT8_C( 90), INT8_C( -49), INT8_C( 55), INT8_C( 85), INT8_C( 75), INT8_C( 61), INT8_C( 49), INT8_C( -14)) }, { simde_mm256_set_epi8(INT8_C( -32), INT8_C( -82), INT8_C( 79), INT8_C( 79), INT8_C( 127), INT8_C( 16), INT8_C(-118), INT8_C( 113), INT8_C( -52), INT8_C( -25), INT8_C(-124), INT8_C( -77), INT8_C( 114), INT8_C(-118), INT8_C( -84), INT8_C( 73), INT8_C( -12), INT8_C( -67), INT8_C(-127), INT8_C( -69), INT8_C( 30), INT8_C( 14), INT8_C( -47), INT8_C(-119), INT8_C( -36), INT8_C( -97), INT8_C( -89), INT8_C( 24), INT8_C( -41), INT8_C( 117), INT8_C(-102), INT8_C( 54)), simde_mm256_set_epi8(INT8_C( -66), INT8_C( -50), INT8_C( 39), INT8_C( 30), INT8_C( 53), INT8_C( 108), INT8_C( -71), INT8_C( -62), INT8_C( 58), INT8_C( -69), INT8_C( -39), INT8_C( -28), INT8_C( 58), INT8_C( 54), INT8_C(-117), INT8_C( -49), INT8_C( 126), INT8_C( -71), INT8_C( -39), INT8_C(-120), INT8_C( 88), INT8_C( 41), INT8_C( 127), INT8_C( 111), INT8_C( -32), INT8_C( -18), INT8_C( 12), INT8_C( 97), INT8_C( 83), INT8_C( -96), INT8_C( 13), INT8_C(-101)), simde_mm256_set_epi8(INT8_C(-118), INT8_C( 113), INT8_C( -52), INT8_C( -25), INT8_C(-124), INT8_C( -77), INT8_C( 114), INT8_C(-118), INT8_C( -84), INT8_C( 73), INT8_C( -66), INT8_C( -50), INT8_C( 39), INT8_C( 30), INT8_C( 53), INT8_C( 108), INT8_C( -47), INT8_C(-119), INT8_C( -36), INT8_C( -97), INT8_C( -89), INT8_C( 24), INT8_C( -41), INT8_C( 117), INT8_C(-102), INT8_C( 54), INT8_C( 126), INT8_C( -71), INT8_C( -39), INT8_C(-120), INT8_C( 88), INT8_C( 41)) }, { simde_mm256_set_epi8(INT8_C(-105), INT8_C( 9), INT8_C( -52), INT8_C( 54), INT8_C( -11), INT8_C( -39), INT8_C( -64), INT8_C( -79), INT8_C( 88), INT8_C( 52), INT8_C( -95), INT8_C( -70), INT8_C( 46), INT8_C( 121), INT8_C( -22), INT8_C( 113), INT8_C( 26), INT8_C( 104), INT8_C( -52), INT8_C( 109), INT8_C( 5), INT8_C( 87), INT8_C( 90), INT8_C( -7), INT8_C( -35), INT8_C(-112), INT8_C( 20), INT8_C(-128), INT8_C( -21), INT8_C( -67), INT8_C( 37), INT8_C( -35)), simde_mm256_set_epi8(INT8_C( 66), INT8_C( -23), INT8_C( 45), INT8_C( -38), INT8_C( -94), INT8_C( 119), INT8_C( -23), INT8_C( 70), INT8_C( -75), INT8_C( 25), INT8_C( 79), INT8_C(-101), INT8_C(-122), INT8_C(-107), INT8_C( -10), INT8_C(-109), INT8_C( -96), INT8_C( 45), INT8_C( 31), INT8_C( -60), INT8_C( 100), INT8_C( 115), INT8_C( 79), INT8_C( 38), INT8_C( -35), INT8_C( 57), INT8_C( 13), INT8_C( 93), INT8_C( 54), INT8_C( -61), INT8_C( 101), INT8_C(-113)), simde_mm256_set_epi8(INT8_C( -64), INT8_C( -79), INT8_C( 88), INT8_C( 52), INT8_C( -95), INT8_C( -70), INT8_C( 46), INT8_C( 121), INT8_C( -22), INT8_C( 113), INT8_C( 66), INT8_C( -23), INT8_C( 45), INT8_C( -38), INT8_C( -94), INT8_C( 119), INT8_C( 90), INT8_C( -7), INT8_C( -35), INT8_C(-112), INT8_C( 20), INT8_C(-128), INT8_C( -21), INT8_C( -67), INT8_C( 37), INT8_C( -35), INT8_C( -96), INT8_C( 45), INT8_C( 31), INT8_C( -60), INT8_C( 100), INT8_C( 115)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_alignr_epi8(test_vec[i].a, test_vec[i].b, 10); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_alignr_epi8_case2(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi8(INT8_C( 113), INT8_C( -69), INT8_C( 23), INT8_C( -66), INT8_C( 115), INT8_C( -83), INT8_C( -66), INT8_C( -71), INT8_C( 28), INT8_C( 74), INT8_C( -4), INT8_C( 16), INT8_C( 16), INT8_C( -20), INT8_C( 79), INT8_C( -49), INT8_C( 13), INT8_C( 59), INT8_C( -99), INT8_C( 73), INT8_C( 19), INT8_C( 31), INT8_C( -64), INT8_C( -94), INT8_C( 111), INT8_C( 121), INT8_C( -91), INT8_C( -99), INT8_C(-120), INT8_C( 24), INT8_C(-116), INT8_C( -29)), simde_mm256_set_epi8(INT8_C( 19), INT8_C( 117), INT8_C( 83), INT8_C( 54), INT8_C( -84), INT8_C(-110), INT8_C( 115), INT8_C( 25), INT8_C( 122), INT8_C( -20), INT8_C( 49), INT8_C( -87), INT8_C( 9), INT8_C( -79), INT8_C( 28), INT8_C( -93), INT8_C( -97), INT8_C( 38), INT8_C( 85), INT8_C( -70), INT8_C( 96), INT8_C( 7), INT8_C( -55), INT8_C( -8), INT8_C( 94), INT8_C( 69), INT8_C( 20), INT8_C( 64), INT8_C( 54), INT8_C( 88), INT8_C( -92), INT8_C( -89)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 113), INT8_C( -69), INT8_C( 23), INT8_C( -66), INT8_C( 115), INT8_C( -83), INT8_C( -66), INT8_C( -71), INT8_C( 28), INT8_C( 74), INT8_C( -4), INT8_C( 16), INT8_C( 16), INT8_C( -20), INT8_C( 79), INT8_C( 0), INT8_C( 13), INT8_C( 59), INT8_C( -99), INT8_C( 73), INT8_C( 19), INT8_C( 31), INT8_C( -64), INT8_C( -94), INT8_C( 111), INT8_C( 121), INT8_C( -91), INT8_C( -99), INT8_C(-120), INT8_C( 24), INT8_C(-116)) }, { simde_mm256_set_epi8(INT8_C( -7), INT8_C( -66), INT8_C(-120), INT8_C( 103), INT8_C( -48), INT8_C( -55), INT8_C(-109), INT8_C( 52), INT8_C( 103), INT8_C( 30), INT8_C( -21), INT8_C(-107), INT8_C( 14), INT8_C( -9), INT8_C( 61), INT8_C(-114), INT8_C(-120), INT8_C(-116), INT8_C( -34), INT8_C( -96), INT8_C( 124), INT8_C( 29), INT8_C( -49), INT8_C( 74), INT8_C( -82), INT8_C( 66), INT8_C( 24), INT8_C( 47), INT8_C( -7), INT8_C( -96), INT8_C( 99), INT8_C( -53)), simde_mm256_set_epi8(INT8_C( 77), INT8_C( 25), INT8_C( 25), INT8_C( 69), INT8_C( -34), INT8_C( 2), INT8_C( 12), INT8_C( -5), INT8_C( -49), INT8_C( -58), INT8_C( -38), INT8_C( -80), INT8_C( 88), INT8_C( 48), INT8_C( 49), INT8_C( 73), INT8_C( 121), INT8_C( 39), INT8_C( -20), INT8_C( 28), INT8_C( 121), INT8_C( 0), INT8_C( -34), INT8_C( 24), INT8_C( -45), INT8_C(-121), INT8_C( -12), INT8_C( 37), INT8_C( 64), INT8_C( 31), INT8_C( 31), INT8_C( 41)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( -7), INT8_C( -66), INT8_C(-120), INT8_C( 103), INT8_C( -48), INT8_C( -55), INT8_C(-109), INT8_C( 52), INT8_C( 103), INT8_C( 30), INT8_C( -21), INT8_C(-107), INT8_C( 14), INT8_C( -9), INT8_C( 61), INT8_C( 0), INT8_C(-120), INT8_C(-116), INT8_C( -34), INT8_C( -96), INT8_C( 124), INT8_C( 29), INT8_C( -49), INT8_C( 74), INT8_C( -82), INT8_C( 66), INT8_C( 24), INT8_C( 47), INT8_C( -7), INT8_C( -96), INT8_C( 99)) }, { simde_mm256_set_epi8(INT8_C( -1), INT8_C( 20), INT8_C( -95), INT8_C( 112), INT8_C( 49), INT8_C(-103), INT8_C( 108), INT8_C( -46), INT8_C( -85), INT8_C( -99), INT8_C( 78), INT8_C( 125), INT8_C( -66), INT8_C( -37), INT8_C( 94), INT8_C( 112), INT8_C(-126), INT8_C( -37), INT8_C(-124), INT8_C( -71), INT8_C( -21), INT8_C( -79), INT8_C( -2), INT8_C( 34), INT8_C( -56), INT8_C( 22), INT8_C(-102), INT8_C( 53), INT8_C( -24), INT8_C( -87), INT8_C( 16), INT8_C( -85)), simde_mm256_set_epi8(INT8_C( 66), INT8_C( -80), INT8_C( 10), INT8_C( -95), INT8_C( -33), INT8_C( 65), INT8_C(-115), INT8_C( -97), INT8_C( -96), INT8_C( 17), INT8_C( 123), INT8_C( -81), INT8_C( -25), INT8_C( -92), INT8_C( 110), INT8_C( 74), INT8_C( 58), INT8_C( 126), INT8_C( 24), INT8_C( -87), INT8_C(-124), INT8_C( -7), INT8_C( -55), INT8_C( 112), INT8_C( 10), INT8_C(-117), INT8_C( -16), INT8_C( 34), INT8_C( -23), INT8_C( 105), INT8_C( 32), INT8_C(-122)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( -1), INT8_C( 20), INT8_C( -95), INT8_C( 112), INT8_C( 49), INT8_C(-103), INT8_C( 108), INT8_C( -46), INT8_C( -85), INT8_C( -99), INT8_C( 78), INT8_C( 125), INT8_C( -66), INT8_C( -37), INT8_C( 94), INT8_C( 0), INT8_C(-126), INT8_C( -37), INT8_C(-124), INT8_C( -71), INT8_C( -21), INT8_C( -79), INT8_C( -2), INT8_C( 34), INT8_C( -56), INT8_C( 22), INT8_C(-102), INT8_C( 53), INT8_C( -24), INT8_C( -87), INT8_C( 16)) }, { simde_mm256_set_epi8(INT8_C( 25), INT8_C( -64), INT8_C( 105), INT8_C( -19), INT8_C(-100), INT8_C( 119), INT8_C( 53), INT8_C(-128), INT8_C( 50), INT8_C(-115), INT8_C( 36), INT8_C( 126), INT8_C( 7), INT8_C( -21), INT8_C(-106), INT8_C(-120), INT8_C( 127), INT8_C( -66), INT8_C( -1), INT8_C( -93), INT8_C( -5), INT8_C( 40), INT8_C( 78), INT8_C(-119), INT8_C( 82), INT8_C( -17), INT8_C( 18), INT8_C( 47), INT8_C( 8), INT8_C(-108), INT8_C(-114), INT8_C( 65)), simde_mm256_set_epi8(INT8_C(-113), INT8_C( 113), INT8_C(-118), INT8_C( -40), INT8_C(-102), INT8_C( -62), INT8_C( 64), INT8_C( -33), INT8_C( 77), INT8_C( -83), INT8_C( -74), INT8_C( 19), INT8_C( 71), INT8_C( 17), INT8_C( -17), INT8_C( -35), INT8_C( 93), INT8_C( -41), INT8_C( 11), INT8_C( 0), INT8_C( -41), INT8_C( -66), INT8_C( -72), INT8_C( 117), INT8_C( 100), INT8_C(-122), INT8_C( 24), INT8_C(-112), INT8_C( 87), INT8_C( 37), INT8_C( -17), INT8_C( -51)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 25), INT8_C( -64), INT8_C( 105), INT8_C( -19), INT8_C(-100), INT8_C( 119), INT8_C( 53), INT8_C(-128), INT8_C( 50), INT8_C(-115), INT8_C( 36), INT8_C( 126), INT8_C( 7), INT8_C( -21), INT8_C(-106), INT8_C( 0), INT8_C( 127), INT8_C( -66), INT8_C( -1), INT8_C( -93), INT8_C( -5), INT8_C( 40), INT8_C( 78), INT8_C(-119), INT8_C( 82), INT8_C( -17), INT8_C( 18), INT8_C( 47), INT8_C( 8), INT8_C(-108), INT8_C(-114)) }, { simde_mm256_set_epi8(INT8_C( -71), INT8_C( 29), INT8_C( -63), INT8_C( -99), INT8_C( 16), INT8_C( 67), INT8_C(-118), INT8_C( 12), INT8_C( -46), INT8_C( 113), INT8_C(-128), INT8_C(-114), INT8_C( -51), INT8_C( -75), INT8_C( 97), INT8_C( -73), INT8_C( 24), INT8_C( -63), INT8_C( 20), INT8_C( -26), INT8_C( -44), INT8_C( 71), INT8_C( 109), INT8_C( 113), INT8_C( 36), INT8_C( 100), INT8_C( 42), INT8_C( -55), INT8_C( 12), INT8_C( -66), INT8_C( -91), INT8_C( 82)), simde_mm256_set_epi8(INT8_C( 81), INT8_C( 93), INT8_C( 76), INT8_C( 23), INT8_C( -32), INT8_C( 31), INT8_C( 25), INT8_C( -76), INT8_C( 38), INT8_C( 114), INT8_C( 74), INT8_C( -11), INT8_C( 55), INT8_C( -33), INT8_C( -78), INT8_C( -96), INT8_C( 106), INT8_C( 1), INT8_C( 32), INT8_C( -46), INT8_C( 124), INT8_C( 89), INT8_C( -80), INT8_C( -22), INT8_C(-117), INT8_C( -99), INT8_C( -39), INT8_C( -63), INT8_C( 12), INT8_C( 106), INT8_C( 120), INT8_C( 64)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( -71), INT8_C( 29), INT8_C( -63), INT8_C( -99), INT8_C( 16), INT8_C( 67), INT8_C(-118), INT8_C( 12), INT8_C( -46), INT8_C( 113), INT8_C(-128), INT8_C(-114), INT8_C( -51), INT8_C( -75), INT8_C( 97), INT8_C( 0), INT8_C( 24), INT8_C( -63), INT8_C( 20), INT8_C( -26), INT8_C( -44), INT8_C( 71), INT8_C( 109), INT8_C( 113), INT8_C( 36), INT8_C( 100), INT8_C( 42), INT8_C( -55), INT8_C( 12), INT8_C( -66), INT8_C( -91)) }, { simde_mm256_set_epi8(INT8_C( 91), INT8_C( -47), INT8_C( -11), INT8_C( 95), INT8_C(-101), INT8_C( 71), INT8_C( -99), INT8_C( 119), INT8_C( 53), INT8_C( -40), INT8_C( 93), INT8_C( -71), INT8_C( 53), INT8_C( 101), INT8_C( 91), INT8_C( 38), INT8_C( 101), INT8_C( 10), INT8_C( 7), INT8_C( 37), INT8_C( -66), INT8_C( 4), INT8_C( -96), INT8_C( -47), INT8_C( -72), INT8_C(-123), INT8_C( -18), INT8_C( -16), INT8_C( 57), INT8_C( -1), INT8_C(-122), INT8_C( 93)), simde_mm256_set_epi8(INT8_C( 125), INT8_C( -97), INT8_C( -63), INT8_C( 47), INT8_C( 76), INT8_C( -11), INT8_C( 32), INT8_C( 96), INT8_C( 121), INT8_C( -93), INT8_C( 72), INT8_C( -40), INT8_C( -11), INT8_C( -76), INT8_C( 51), INT8_C(-122), INT8_C( 108), INT8_C( -61), INT8_C( -9), INT8_C( -38), INT8_C( -65), INT8_C( -53), INT8_C( -36), INT8_C( 43), INT8_C( -52), INT8_C( -14), INT8_C( -72), INT8_C( 79), INT8_C( 4), INT8_C( 96), INT8_C( -88), INT8_C( 40)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 91), INT8_C( -47), INT8_C( -11), INT8_C( 95), INT8_C(-101), INT8_C( 71), INT8_C( -99), INT8_C( 119), INT8_C( 53), INT8_C( -40), INT8_C( 93), INT8_C( -71), INT8_C( 53), INT8_C( 101), INT8_C( 91), INT8_C( 0), INT8_C( 101), INT8_C( 10), INT8_C( 7), INT8_C( 37), INT8_C( -66), INT8_C( 4), INT8_C( -96), INT8_C( -47), INT8_C( -72), INT8_C(-123), INT8_C( -18), INT8_C( -16), INT8_C( 57), INT8_C( -1), INT8_C(-122)) }, { simde_mm256_set_epi8(INT8_C(-125), INT8_C( 50), INT8_C( 9), INT8_C(-114), INT8_C( 12), INT8_C(-115), INT8_C( 20), INT8_C( -81), INT8_C( 65), INT8_C( -9), INT8_C( 1), INT8_C( -7), INT8_C( 48), INT8_C( 81), INT8_C( -58), INT8_C( -29), INT8_C(-102), INT8_C( 98), INT8_C( 126), INT8_C( 49), INT8_C( 95), INT8_C( 45), INT8_C( -60), INT8_C( -92), INT8_C( -7), INT8_C( -56), INT8_C( -47), INT8_C( -85), INT8_C( -93), INT8_C( 33), INT8_C( -91), INT8_C( -34)), simde_mm256_set_epi8(INT8_C( 88), INT8_C( -22), INT8_C( -14), INT8_C( 72), INT8_C(-106), INT8_C( -75), INT8_C( -40), INT8_C( -29), INT8_C( 71), INT8_C( 52), INT8_C( -19), INT8_C( -85), INT8_C( 47), INT8_C( -19), INT8_C( -33), INT8_C( -99), INT8_C( -95), INT8_C(-103), INT8_C( 113), INT8_C( 75), INT8_C( 16), INT8_C(-125), INT8_C( 51), INT8_C( -20), INT8_C( -87), INT8_C( 79), INT8_C( 50), INT8_C( -45), INT8_C( -77), INT8_C( -89), INT8_C( 25), INT8_C( -53)), simde_mm256_set_epi8(INT8_C( 0), INT8_C(-125), INT8_C( 50), INT8_C( 9), INT8_C(-114), INT8_C( 12), INT8_C(-115), INT8_C( 20), INT8_C( -81), INT8_C( 65), INT8_C( -9), INT8_C( 1), INT8_C( -7), INT8_C( 48), INT8_C( 81), INT8_C( -58), INT8_C( 0), INT8_C(-102), INT8_C( 98), INT8_C( 126), INT8_C( 49), INT8_C( 95), INT8_C( 45), INT8_C( -60), INT8_C( -92), INT8_C( -7), INT8_C( -56), INT8_C( -47), INT8_C( -85), INT8_C( -93), INT8_C( 33), INT8_C( -91)) }, { simde_mm256_set_epi8(INT8_C( 108), INT8_C( -54), INT8_C( 79), INT8_C( -34), INT8_C( 79), INT8_C( 114), INT8_C( 95), INT8_C( -13), INT8_C( -32), INT8_C( 106), INT8_C( 6), INT8_C(-103), INT8_C( -17), INT8_C( 126), INT8_C( -76), INT8_C( 100), INT8_C( 103), INT8_C( -73), INT8_C(-120), INT8_C( 89), INT8_C( 92), INT8_C( 1), INT8_C( 24), INT8_C( -42), INT8_C( -87), INT8_C(-120), INT8_C( -69), INT8_C( 118), INT8_C( -71), INT8_C( 35), INT8_C( 31), INT8_C( -27)), simde_mm256_set_epi8(INT8_C( 106), INT8_C( 34), INT8_C( -93), INT8_C( -41), INT8_C( 92), INT8_C( 50), INT8_C(-109), INT8_C( 24), INT8_C( 49), INT8_C( -59), INT8_C( -13), INT8_C( 39), INT8_C( 111), INT8_C( -38), INT8_C( 26), INT8_C( -9), INT8_C(-106), INT8_C(-118), INT8_C( 104), INT8_C( -38), INT8_C( 127), INT8_C( -21), INT8_C( 72), INT8_C(-105), INT8_C( 100), INT8_C( 65), INT8_C( 70), INT8_C(-111), INT8_C( 93), INT8_C(-113), INT8_C( -69), INT8_C( -81)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 108), INT8_C( -54), INT8_C( 79), INT8_C( -34), INT8_C( 79), INT8_C( 114), INT8_C( 95), INT8_C( -13), INT8_C( -32), INT8_C( 106), INT8_C( 6), INT8_C(-103), INT8_C( -17), INT8_C( 126), INT8_C( -76), INT8_C( 0), INT8_C( 103), INT8_C( -73), INT8_C(-120), INT8_C( 89), INT8_C( 92), INT8_C( 1), INT8_C( 24), INT8_C( -42), INT8_C( -87), INT8_C(-120), INT8_C( -69), INT8_C( 118), INT8_C( -71), INT8_C( 35), INT8_C( 31)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_alignr_epi8(test_vec[i].a, test_vec[i].b, 17); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_alignr_epi8_case3(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi8(INT8_C( 57), INT8_C( -47), INT8_C( -81), INT8_C( -95), INT8_C(-100), INT8_C( -69), INT8_C( -75), INT8_C(-127), INT8_C( -8), INT8_C( -83), INT8_C( -94), INT8_C(-113), INT8_C( 107), INT8_C( -39), INT8_C( -99), INT8_C( 46), INT8_C( -61), INT8_C( -72), INT8_C( -5), INT8_C(-126), INT8_C( 113), INT8_C(-120), INT8_C( -9), INT8_C( 117), INT8_C( 121), INT8_C( -14), INT8_C( -58), INT8_C(-118), INT8_C( -70), INT8_C( -35), INT8_C(-100), INT8_C( 48)), simde_mm256_set_epi8(INT8_C( -98), INT8_C( 80), INT8_C( -16), INT8_C( 73), INT8_C( 126), INT8_C(-115), INT8_C( 31), INT8_C( 80), INT8_C( -75), INT8_C( 117), INT8_C( -13), INT8_C( -70), INT8_C(-101), INT8_C( -10), INT8_C(-111), INT8_C( 96), INT8_C( -77), INT8_C( 0), INT8_C( 70), INT8_C(-121), INT8_C( 78), INT8_C( 54), INT8_C( 38), INT8_C( 18), INT8_C( -12), INT8_C( 68), INT8_C( 53), INT8_C( 45), INT8_C( 77), INT8_C( 103), INT8_C( 118), INT8_C( 116)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 57), INT8_C( -47), INT8_C( -81), INT8_C( -95), INT8_C(-100), INT8_C( -69), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -61), INT8_C( -72), INT8_C( -5), INT8_C(-126), INT8_C( 113), INT8_C(-120)) }, { simde_mm256_set_epi8(INT8_C( 8), INT8_C( 70), INT8_C( 96), INT8_C( 42), INT8_C( 73), INT8_C( -91), INT8_C( -93), INT8_C( -58), INT8_C( -35), INT8_C( -14), INT8_C( 118), INT8_C(-128), INT8_C( -97), INT8_C( 97), INT8_C( 22), INT8_C( 4), INT8_C( 24), INT8_C( 124), INT8_C( -32), INT8_C( -48), INT8_C( -74), INT8_C( 95), INT8_C( 95), INT8_C( -38), INT8_C( 54), INT8_C( 120), INT8_C( -65), INT8_C( -96), INT8_C( -44), INT8_C( -78), INT8_C( -95), INT8_C(-111)), simde_mm256_set_epi8(INT8_C( 66), INT8_C( 48), INT8_C( 60), INT8_C( -4), INT8_C( 33), INT8_C( 67), INT8_C( 97), INT8_C( 65), INT8_C( 80), INT8_C(-109), INT8_C( 88), INT8_C( -23), INT8_C( 107), INT8_C( -65), INT8_C( -65), INT8_C( 62), INT8_C( 90), INT8_C( 118), INT8_C( 25), INT8_C( 105), INT8_C( 109), INT8_C( -45), INT8_C( 4), INT8_C(-107), INT8_C( 55), INT8_C(-101), INT8_C( -65), INT8_C( 121), INT8_C( -76), INT8_C( 55), INT8_C( 6), INT8_C( 86)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 8), INT8_C( 70), INT8_C( 96), INT8_C( 42), INT8_C( 73), INT8_C( -91), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 24), INT8_C( 124), INT8_C( -32), INT8_C( -48), INT8_C( -74), INT8_C( 95)) }, { simde_mm256_set_epi8(INT8_C( -1), INT8_C( 93), INT8_C( 114), INT8_C( 49), INT8_C( 120), INT8_C( -70), INT8_C(-112), INT8_C( 19), INT8_C( 56), INT8_C( -12), INT8_C( -77), INT8_C( -85), INT8_C( -59), INT8_C( 80), INT8_C( -47), INT8_C( -28), INT8_C( -33), INT8_C( 73), INT8_C( -24), INT8_C(-106), INT8_C(-122), INT8_C(-111), INT8_C( -3), INT8_C( 57), INT8_C( 67), INT8_C( 107), INT8_C( -68), INT8_C(-101), INT8_C( -9), INT8_C( 5), INT8_C(-124), INT8_C( -69)), simde_mm256_set_epi8(INT8_C( -67), INT8_C( 65), INT8_C( -72), INT8_C( -90), INT8_C( -44), INT8_C( 2), INT8_C( 39), INT8_C( -45), INT8_C( 51), INT8_C( -30), INT8_C( -47), INT8_C( 1), INT8_C( -91), INT8_C( -40), INT8_C( -73), INT8_C( 33), INT8_C(-117), INT8_C( -31), INT8_C(-117), INT8_C( 60), INT8_C( -36), INT8_C(-120), INT8_C( -10), INT8_C( -12), INT8_C( -25), INT8_C(-103), INT8_C( 48), INT8_C( -50), INT8_C( 101), INT8_C( 93), INT8_C( -31), INT8_C( -33)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 93), INT8_C( 114), INT8_C( 49), INT8_C( 120), INT8_C( -70), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -33), INT8_C( 73), INT8_C( -24), INT8_C(-106), INT8_C(-122), INT8_C(-111)) }, { simde_mm256_set_epi8(INT8_C( 114), INT8_C( -28), INT8_C( -81), INT8_C(-126), INT8_C(-114), INT8_C( -94), INT8_C(-101), INT8_C( 52), INT8_C( 30), INT8_C( 66), INT8_C(-105), INT8_C( 50), INT8_C( 48), INT8_C( 15), INT8_C( 1), INT8_C(-106), INT8_C( -36), INT8_C( 61), INT8_C( -18), INT8_C( 8), INT8_C( 89), INT8_C( -23), INT8_C( 58), INT8_C( -29), INT8_C( 110), INT8_C( 57), INT8_C( -39), INT8_C( 1), INT8_C( -60), INT8_C( 60), INT8_C( -80), INT8_C( -20)), simde_mm256_set_epi8(INT8_C( -60), INT8_C( 69), INT8_C( -69), INT8_C( 107), INT8_C( 2), INT8_C( -33), INT8_C( 14), INT8_C( 77), INT8_C( 15), INT8_C( 63), INT8_C( 63), INT8_C( -53), INT8_C( -9), INT8_C( 76), INT8_C( 7), INT8_C( -76), INT8_C( -23), INT8_C(-107), INT8_C( 84), INT8_C( 37), INT8_C( -18), INT8_C( 47), INT8_C( 58), INT8_C( 31), INT8_C( 69), INT8_C( 12), INT8_C( 46), INT8_C( 101), INT8_C( -38), INT8_C( 117), INT8_C(-113), INT8_C( -36)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 114), INT8_C( -28), INT8_C( -81), INT8_C(-126), INT8_C(-114), INT8_C( -94), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -36), INT8_C( 61), INT8_C( -18), INT8_C( 8), INT8_C( 89), INT8_C( -23)) }, { simde_mm256_set_epi8(INT8_C( 13), INT8_C( -32), INT8_C( -51), INT8_C( 49), INT8_C( 15), INT8_C( 15), INT8_C( -66), INT8_C( -38), INT8_C( 69), INT8_C( 1), INT8_C( -50), INT8_C(-103), INT8_C( 72), INT8_C( -87), INT8_C(-104), INT8_C( -58), INT8_C( 63), INT8_C(-120), INT8_C( 36), INT8_C( -61), INT8_C( 106), INT8_C( 97), INT8_C(-128), INT8_C( 26), INT8_C(-121), INT8_C( -27), INT8_C( 107), INT8_C(-112), INT8_C( 57), INT8_C( 39), INT8_C( 98), INT8_C( 126)), simde_mm256_set_epi8(INT8_C(-110), INT8_C( 50), INT8_C( 102), INT8_C( -85), INT8_C( -62), INT8_C( 74), INT8_C( 102), INT8_C( 36), INT8_C( -55), INT8_C(-104), INT8_C( 11), INT8_C( -70), INT8_C( -20), INT8_C(-104), INT8_C( -37), INT8_C( 122), INT8_C( -88), INT8_C( 70), INT8_C( 43), INT8_C( -34), INT8_C( 14), INT8_C( 55), INT8_C( -41), INT8_C( 33), INT8_C( 53), INT8_C(-102), INT8_C( 56), INT8_C( 26), INT8_C( -79), INT8_C( -30), INT8_C( 73), INT8_C( -53)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 13), INT8_C( -32), INT8_C( -51), INT8_C( 49), INT8_C( 15), INT8_C( 15), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 63), INT8_C(-120), INT8_C( 36), INT8_C( -61), INT8_C( 106), INT8_C( 97)) }, { simde_mm256_set_epi8(INT8_C(-103), INT8_C( 23), INT8_C( -90), INT8_C( 43), INT8_C( -8), INT8_C( 54), INT8_C( -93), INT8_C( 64), INT8_C(-116), INT8_C( -76), INT8_C( -53), INT8_C( 35), INT8_C( 51), INT8_C( -86), INT8_C( 8), INT8_C( 43), INT8_C( -60), INT8_C(-113), INT8_C( 44), INT8_C(-102), INT8_C( 77), INT8_C( 95), INT8_C( 57), INT8_C( 124), INT8_C( -91), INT8_C( -87), INT8_C( -54), INT8_C( -83), INT8_C( 108), INT8_C( 76), INT8_C( 90), INT8_C( -49)), simde_mm256_set_epi8(INT8_C( -43), INT8_C( -5), INT8_C( 94), INT8_C( 76), INT8_C( -9), INT8_C( 110), INT8_C( -82), INT8_C( 112), INT8_C( -25), INT8_C( 11), INT8_C( -46), INT8_C( -57), INT8_C( -13), INT8_C(-127), INT8_C( 10), INT8_C(-113), INT8_C( 55), INT8_C( 85), INT8_C( 75), INT8_C( 61), INT8_C( 49), INT8_C( -14), INT8_C( 44), INT8_C(-128), INT8_C(-102), INT8_C( -17), INT8_C( 64), INT8_C( -73), INT8_C( 14), INT8_C( 19), INT8_C( -2), INT8_C( 7)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-103), INT8_C( 23), INT8_C( -90), INT8_C( 43), INT8_C( -8), INT8_C( 54), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -60), INT8_C(-113), INT8_C( 44), INT8_C(-102), INT8_C( 77), INT8_C( 95)) }, { simde_mm256_set_epi8(INT8_C( -32), INT8_C( -82), INT8_C( 79), INT8_C( 79), INT8_C( 127), INT8_C( 16), INT8_C(-118), INT8_C( 113), INT8_C( -52), INT8_C( -25), INT8_C(-124), INT8_C( -77), INT8_C( 114), INT8_C(-118), INT8_C( -84), INT8_C( 73), INT8_C( -12), INT8_C( -67), INT8_C(-127), INT8_C( -69), INT8_C( 30), INT8_C( 14), INT8_C( -47), INT8_C(-119), INT8_C( -36), INT8_C( -97), INT8_C( -89), INT8_C( 24), INT8_C( -41), INT8_C( 117), INT8_C(-102), INT8_C( 54)), simde_mm256_set_epi8(INT8_C( -66), INT8_C( -50), INT8_C( 39), INT8_C( 30), INT8_C( 53), INT8_C( 108), INT8_C( -71), INT8_C( -62), INT8_C( 58), INT8_C( -69), INT8_C( -39), INT8_C( -28), INT8_C( 58), INT8_C( 54), INT8_C(-117), INT8_C( -49), INT8_C( 126), INT8_C( -71), INT8_C( -39), INT8_C(-120), INT8_C( 88), INT8_C( 41), INT8_C( 127), INT8_C( 111), INT8_C( -32), INT8_C( -18), INT8_C( 12), INT8_C( 97), INT8_C( 83), INT8_C( -96), INT8_C( 13), INT8_C(-101)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -32), INT8_C( -82), INT8_C( 79), INT8_C( 79), INT8_C( 127), INT8_C( 16), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -12), INT8_C( -67), INT8_C(-127), INT8_C( -69), INT8_C( 30), INT8_C( 14)) }, { simde_mm256_set_epi8(INT8_C(-105), INT8_C( 9), INT8_C( -52), INT8_C( 54), INT8_C( -11), INT8_C( -39), INT8_C( -64), INT8_C( -79), INT8_C( 88), INT8_C( 52), INT8_C( -95), INT8_C( -70), INT8_C( 46), INT8_C( 121), INT8_C( -22), INT8_C( 113), INT8_C( 26), INT8_C( 104), INT8_C( -52), INT8_C( 109), INT8_C( 5), INT8_C( 87), INT8_C( 90), INT8_C( -7), INT8_C( -35), INT8_C(-112), INT8_C( 20), INT8_C(-128), INT8_C( -21), INT8_C( -67), INT8_C( 37), INT8_C( -35)), simde_mm256_set_epi8(INT8_C( 66), INT8_C( -23), INT8_C( 45), INT8_C( -38), INT8_C( -94), INT8_C( 119), INT8_C( -23), INT8_C( 70), INT8_C( -75), INT8_C( 25), INT8_C( 79), INT8_C(-101), INT8_C(-122), INT8_C(-107), INT8_C( -10), INT8_C(-109), INT8_C( -96), INT8_C( 45), INT8_C( 31), INT8_C( -60), INT8_C( 100), INT8_C( 115), INT8_C( 79), INT8_C( 38), INT8_C( -35), INT8_C( 57), INT8_C( 13), INT8_C( 93), INT8_C( 54), INT8_C( -61), INT8_C( 101), INT8_C(-113)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-105), INT8_C( 9), INT8_C( -52), INT8_C( 54), INT8_C( -11), INT8_C( -39), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 26), INT8_C( 104), INT8_C( -52), INT8_C( 109), INT8_C( 5), INT8_C( 87)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_alignr_epi8(test_vec[i].a, test_vec[i].b, 26); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_and_si256(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi64x(INT64_C( 8722470578646828517), INT64_C( 891261850847437783), INT64_C( 8698554819020653857), INT64_C(-7282900013878242954)), simde_mm256_set_epi64x(INT64_C(-8128142018056442141), INT64_C( 5559182722028422309), INT64_C( 2093267872519066825), INT64_C(-7117023562774970023)), simde_mm256_set_epi64x(INT64_C( 648519197013312737), INT64_C( 866420841735143557), INT64_C( 1730587322060899329), INT64_C(-7482378910948097712)) }, { simde_mm256_set_epi64x(INT64_C(-2297219683620407228), INT64_C(-2314825045857877411), INT64_C(-2223407797787304327), INT64_C( 5408595704702705619)), simde_mm256_set_epi64x(INT64_C( 1902387556947256757), INT64_C(-4636290958455233996), INT64_C( -193279292138890017), INT64_C( 2387678637527501964)), simde_mm256_set_epi64x(INT64_C( 1867272746704900), INT64_C(-6944527661819330028), INT64_C(-2233693047608222631), INT64_C( 72674428659436672)) }, { simde_mm256_set_epi64x(INT64_C(-8083909718117301567), INT64_C( 11995607010100125), INT64_C(-6068617776224060223), INT64_C(-6387203967446836987)), simde_mm256_set_epi64x(INT64_C(-8320376883848651160), INT64_C(-4950145821323384534), INT64_C(-7969688999974624617), INT64_C( 659904372446782737)), simde_mm256_set_epi64x(INT64_C(-8322647438183611840), INT64_C( 2406350531494152), INT64_C(-9131628786599059327), INT64_C( 74330855942160641)) }, { simde_mm256_set_epi64x(INT64_C(-7862557356832127783), INT64_C(-5197238245936512816), INT64_C(-1440736387308233171), INT64_C( -422437923560182700)), simde_mm256_set_epi64x(INT64_C( 4501573497311276896), INT64_C( 1568099047173454230), INT64_C( 6784671475384752865), INT64_C(-5901872067663085826)), simde_mm256_set_epi64x(INT64_C( 1324204786773460032), INT64_C( 1568098471546732688), INT64_C( 5476791399028365857), INT64_C(-6196932668584612780)) }, { simde_mm256_set_epi64x(INT64_C( -83457062575009429), INT64_C(-7222721162513873213), INT64_C( 8275972355230696496), INT64_C( 5685146925209815999)), simde_mm256_set_epi64x(INT64_C( 7621095561231011691), INT64_C(-1384347240916299959), INT64_C( 8784701942784527649), INT64_C(-6329984144489188000)), simde_mm256_set_epi64x(INT64_C( 7549018173429252459), INT64_C(-8592431562369268159), INT64_C( 8126746635764630560), INT64_C( 586910516468318496)) }, { simde_mm256_set_epi64x(INT64_C( 5973184558080946927), INT64_C(-1786695518880322601), INT64_C( 564422817571527071), INT64_C( 4038585732338755869)), simde_mm256_set_epi64x(INT64_C(-8901168232869945121), INT64_C( 8118630853720063073), INT64_C( -228868271804772649), INT64_C(-6456700929251086932)), simde_mm256_set_epi64x(INT64_C( 27024505729917135), INT64_C( 6926573216261613633), INT64_C( 346814025888696471), INT64_C( 2306177340255840524)) }, { simde_mm256_set_epi64x(INT64_C( 4967668340414178010), INT64_C(-2410168209476403592), INT64_C(-3019436090811439415), INT64_C(-6965119139859890192)), simde_mm256_set_epi64x(INT64_C(-5120337331222163918), INT64_C(-1589564432494918546), INT64_C( 5292723257474752308), INT64_C( 2511807878775255697)), simde_mm256_set_epi64x(INT64_C( 67729921108361746), INT64_C(-3999169530918599576), INT64_C( 4616337787987166720), INT64_C( 167381957966049936)) }, { simde_mm256_set_epi64x(INT64_C(-6179811667909625694), INT64_C(-2471055444546593648), INT64_C( 7540412455883833292), INT64_C( 6654843089135720963)), simde_mm256_set_epi64x(INT64_C( -939588147635733509), INT64_C(-1340596046637757449), INT64_C(-1662948605324253370), INT64_C( 817158485966988858)), simde_mm256_set_epi64x(INT64_C(-6758530821969135454), INT64_C(-3664509346923870064), INT64_C( 7540157231680104260), INT64_C( 599541701488411138)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_and_si256(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_andnot_si256(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C( 1296069903), INT32_C( -401713319), INT32_C( -398512257), INT32_C( 1831595067), INT32_C( -79935516), INT32_C(-1180021826), INT32_C( 1322035843), INT32_C( 1532358492)), simde_mm256_set_epi32(INT32_C( 1994603249), INT32_C(-1155877896), INT32_C( 1480474617), INT32_C( 1055447888), INT32_C( 1247599905), INT32_C( -290225910), INT32_C( -75325456), INT32_C( 1443351725)), simde_mm256_set_epi32(INT32_C( 849545968), INT32_C( 319856800), INT32_C( 268439680), INT32_C( 314577728), INT32_C( 4233217), INT32_C( 1175552000), INT32_C(-1325263504), INT32_C( 67240097)) }, { simde_mm256_set_epi32(INT32_C( 751085853), INT32_C( 464664830), INT32_C( 1171943738), INT32_C( 238039385), INT32_C( 447621112), INT32_C( -944668210), INT32_C( -26835475), INT32_C( -497913063)), simde_mm256_set_epi32(INT32_C( -346681351), INT32_C( 1057375625), INT32_C(-1640028951), INT32_C( 1365052187), INT32_C( 1146265967), INT32_C( -337636497), INT32_C(-2119055512), INT32_C(-1077772260)), simde_mm256_set_epi32(INT32_C(-1022228768), INT32_C( 604259585), INT32_C(-1708849983), INT32_C( 1364003330), INT32_C( 1146126343), INT32_C( 675287585), INT32_C( 26296832), INT32_C( 494960644)) }, { simde_mm256_set_epi32(INT32_C( 1652916374), INT32_C( 430611066), INT32_C(-1135996673), INT32_C( 1059116837), INT32_C( 642663426), INT32_C(-2076745619), INT32_C(-1526373034), INT32_C(-1938949753)), simde_mm256_set_epi32(INT32_C( 21997480), INT32_C( 2006437191), INT32_C(-1106118986), INT32_C( -552923366), INT32_C( 1259738142), INT32_C(-1272739174), INT32_C( 256185924), INT32_C( 828965431)), simde_mm256_set_epi32(INT32_C( 21636904), INT32_C( 1712669957), INT32_C( 34727424), INT32_C(-1073018854), INT32_C( 1225789468), INT32_C( 805341842), INT32_C( 171971072), INT32_C( 822084144)) }, { simde_mm256_set_epi32(INT32_C( 1850172884), INT32_C( 347371552), INT32_C(-2124471172), INT32_C(-1122146912), INT32_C(-2084330368), INT32_C(-1474045155), INT32_C(-1097896595), INT32_C( -157279178)), simde_mm256_set_epi32(INT32_C( 1150160686), INT32_C( 1294066543), INT32_C(-1264650397), INT32_C( -958193094), INT32_C( 1906124487), INT32_C(-1106662757), INT32_C(-1281709058), INT32_C(-1981986383)), simde_mm256_set_epi32(INT32_C( 8915498), INT32_C( 1224835919), INT32_C( 880857859), INT32_C( 1122107930), INT32_C( 1880883783), INT32_C( 369631362), INT32_C( 17859218), INT32_C( 157106561)) }, { simde_mm256_set_epi32(INT32_C( 1083224354), INT32_C(-2047292862), INT32_C(-1063111811), INT32_C( 679392193), INT32_C(-1649939198), INT32_C( 165669772), INT32_C(-1686920475), INT32_C( -669365464)), simde_mm256_set_epi32(INT32_C( 1910614), INT32_C( 1742547828), INT32_C( 1824050734), INT32_C( 1245292821), INT32_C( 1373149632), INT32_C(-1705573089), INT32_C(-2120011139), INT32_C( 1659478998)), simde_mm256_set_epi32(INT32_C( 853588), INT32_C( 1644503348), INT32_C( 739822594), INT32_C( 1107363860), INT32_C( 1079513280), INT32_C(-1845492717), INT32_C( 8392728), INT32_C( 585212118)) }, { simde_mm256_set_epi32(INT32_C( -215087200), INT32_C( 439485206), INT32_C( 52069439), INT32_C( 1985678052), INT32_C( 236126003), INT32_C( 1543880509), INT32_C(-1076073317), INT32_C( 911942733)), simde_mm256_set_epi32(INT32_C( 289944943), INT32_C( 1791469709), INT32_C(-1356404241), INT32_C(-2084942503), INT32_C(-1405988531), INT32_C( 52512230), INT32_C( -191001241), INT32_C( 11336758)), simde_mm256_set_epi32(INT32_C( 4206671), INT32_C( 1623566473), INT32_C(-1406899776), INT32_C(-2120202983), INT32_C(-1608515508), INT32_C( 52445378), INT32_C( 1073840484), INT32_C( 10803250)) }, { simde_mm256_set_epi32(INT32_C( -255265247), INT32_C( 884745637), INT32_C(-1256896434), INT32_C( 1712403469), INT32_C(-1168205511), INT32_C( 1417172277), INT32_C( 6510726), INT32_C( 1896753771)), simde_mm256_set_epi32(INT32_C( 1154369104), INT32_C( 1355111439), INT32_C( 107073276), INT32_C( 1081379485), INT32_C( -791672049), INT32_C(-1430371667), INT32_C( 1251404018), INT32_C( 686255615)), simde_mm256_set_epi32(INT32_C( 67502160), INT32_C( 1078024202), INT32_C( 39880368), INT32_C( 6589072), INT32_C( 1082130950), INT32_C(-1434058104), INT32_C( 1251254384), INT32_C( 148981140)) }, { simde_mm256_set_epi32(INT32_C( 1670586553), INT32_C( 1338043318), INT32_C(-2053008274), INT32_C( 1109326069), INT32_C( 858036123), INT32_C( 201080868), INT32_C( -768148447), INT32_C( 1651890892)), simde_mm256_set_epi32(INT32_C( 1950737481), INT32_C(-1368157265), INT32_C( 504476696), INT32_C(-1751057511), INT32_C(-1576762410), INT32_C( 1167387907), INT32_C( 398094763), INT32_C( 1151527447)), simde_mm256_set_epi32(INT32_C( 340051008), INT32_C(-1607266295), INT32_C( 437264400), INT32_C(-1784675576), INT32_C(-2147483580), INT32_C( 1140894979), INT32_C( 92799370), INT32_C( 75636755)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_andnot_si256(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_adds_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi8(INT8_C(-119), INT8_C( -56), INT8_C( 53), INT8_C(-117), INT8_C( 25), INT8_C( -8), INT8_C( -23), INT8_C( -22), INT8_C( 105), INT8_C( 21), INT8_C( -22), INT8_C(-123), INT8_C( -78), INT8_C( 1), INT8_C( 124), INT8_C( 72), INT8_C( -56), INT8_C( 100), INT8_C( -58), INT8_C( 52), INT8_C( 106), INT8_C(-112), INT8_C( -69), INT8_C(-107), INT8_C( 71), INT8_C( -50), INT8_C( 48), INT8_C( -51), INT8_C( -22), INT8_C( 119), INT8_C( 38), INT8_C(-116)), simde_mm256_set_epi8(INT8_C( -61), INT8_C( 39), INT8_C( 108), INT8_C( 116), INT8_C( 8), INT8_C( 57), INT8_C( -68), INT8_C(-111), INT8_C( 66), INT8_C(-114), INT8_C( 86), INT8_C( 70), INT8_C( 95), INT8_C( 18), INT8_C( 95), INT8_C( 15), INT8_C( -45), INT8_C( 91), INT8_C( 47), INT8_C( 120), INT8_C(-127), INT8_C( -1), INT8_C( 68), INT8_C(-116), INT8_C( 47), INT8_C( 11), INT8_C( -19), INT8_C( -17), INT8_C(-114), INT8_C( 40), INT8_C( -43), INT8_C( -57)), simde_mm256_set_epi8(INT8_C(-128), INT8_C( -17), INT8_C( 127), INT8_C( -1), INT8_C( 33), INT8_C( 49), INT8_C( -91), INT8_C(-128), INT8_C( 127), INT8_C( -93), INT8_C( 64), INT8_C( -53), INT8_C( 17), INT8_C( 19), INT8_C( 127), INT8_C( 87), INT8_C(-101), INT8_C( 127), INT8_C( -11), INT8_C( 127), INT8_C( -21), INT8_C(-113), INT8_C( -1), INT8_C(-128), INT8_C( 118), INT8_C( -39), INT8_C( 29), INT8_C( -68), INT8_C(-128), INT8_C( 127), INT8_C( -5), INT8_C(-128)) }, { simde_mm256_set_epi8(INT8_C( 39), INT8_C( 31), INT8_C( -61), INT8_C( -35), INT8_C( 33), INT8_C( 40), INT8_C( 126), INT8_C( 109), INT8_C( 9), INT8_C( 100), INT8_C(-107), INT8_C( 27), INT8_C( -15), INT8_C( 43), INT8_C(-116), INT8_C(-111), INT8_C( -44), INT8_C(-120), INT8_C( 17), INT8_C( -18), INT8_C( -10), INT8_C( -86), INT8_C( 7), INT8_C( 61), INT8_C( -46), INT8_C( -53), INT8_C( -3), INT8_C( 98), INT8_C( -75), INT8_C( -98), INT8_C( 77), INT8_C( 102)), simde_mm256_set_epi8(INT8_C(-107), INT8_C(-113), INT8_C( 9), INT8_C( 35), INT8_C( 62), INT8_C( -77), INT8_C( 0), INT8_C( 9), INT8_C( -7), INT8_C( 23), INT8_C( -88), INT8_C( -35), INT8_C( 78), INT8_C( 7), INT8_C( 79), INT8_C( -98), INT8_C( -54), INT8_C( 87), INT8_C( 123), INT8_C( 91), INT8_C( 116), INT8_C( 67), INT8_C( 110), INT8_C( 43), INT8_C( -68), INT8_C( -85), INT8_C( -23), INT8_C( -82), INT8_C( 66), INT8_C(-119), INT8_C( -51), INT8_C( 79)), simde_mm256_set_epi8(INT8_C( -68), INT8_C( -82), INT8_C( -52), INT8_C( 0), INT8_C( 95), INT8_C( -37), INT8_C( 126), INT8_C( 118), INT8_C( 2), INT8_C( 123), INT8_C(-128), INT8_C( -8), INT8_C( 63), INT8_C( 50), INT8_C( -37), INT8_C(-128), INT8_C( -98), INT8_C( -33), INT8_C( 127), INT8_C( 73), INT8_C( 106), INT8_C( -19), INT8_C( 117), INT8_C( 104), INT8_C(-114), INT8_C(-128), INT8_C( -26), INT8_C( 16), INT8_C( -9), INT8_C(-128), INT8_C( 26), INT8_C( 127)) }, { simde_mm256_set_epi8(INT8_C( -96), INT8_C( -2), INT8_C(-104), INT8_C( 30), INT8_C( -98), INT8_C(-110), INT8_C( -27), INT8_C( 8), INT8_C( 36), INT8_C( -65), INT8_C( 59), INT8_C( 66), INT8_C( -66), INT8_C( -58), INT8_C( 111), INT8_C( 36), INT8_C( 35), INT8_C( -4), INT8_C( -85), INT8_C( 66), INT8_C( 120), INT8_C( 62), INT8_C( 85), INT8_C( -9), INT8_C( -9), INT8_C( -49), INT8_C( 90), INT8_C( -80), INT8_C( -89), INT8_C( -62), INT8_C(-127), INT8_C(-100)), simde_mm256_set_epi8(INT8_C( -67), INT8_C( 11), INT8_C( -39), INT8_C( 56), INT8_C( -35), INT8_C( 114), INT8_C( 34), INT8_C( -29), INT8_C( -3), INT8_C( 101), INT8_C( 115), INT8_C( 44), INT8_C( 36), INT8_C( -77), INT8_C( 98), INT8_C( 105), INT8_C( 91), INT8_C( 26), INT8_C( 9), INT8_C( 8), INT8_C( 79), INT8_C( -2), INT8_C( -60), INT8_C( 45), INT8_C(-118), INT8_C( -81), INT8_C( 63), INT8_C( -54), INT8_C( -51), INT8_C( 41), INT8_C( 33), INT8_C( -19)), simde_mm256_set_epi8(INT8_C(-128), INT8_C( 9), INT8_C(-128), INT8_C( 86), INT8_C(-128), INT8_C( 4), INT8_C( 7), INT8_C( -21), INT8_C( 33), INT8_C( 36), INT8_C( 127), INT8_C( 110), INT8_C( -30), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 126), INT8_C( 22), INT8_C( -76), INT8_C( 74), INT8_C( 127), INT8_C( 60), INT8_C( 25), INT8_C( 36), INT8_C(-127), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C( -21), INT8_C( -94), INT8_C(-119)) }, { simde_mm256_set_epi8(INT8_C( -17), INT8_C( -49), INT8_C( 22), INT8_C( -45), INT8_C( 54), INT8_C( 22), INT8_C( -24), INT8_C( 117), INT8_C( -78), INT8_C( 103), INT8_C(-105), INT8_C( 94), INT8_C(-107), INT8_C( 6), INT8_C(-126), INT8_C( 89), INT8_C( 17), INT8_C( 8), INT8_C( 15), INT8_C( -34), INT8_C( 29), INT8_C( 90), INT8_C( 96), INT8_C( -78), INT8_C( -87), INT8_C( -25), INT8_C( 62), INT8_C( 48), INT8_C(-125), INT8_C( -71), INT8_C( -72), INT8_C( 85)), simde_mm256_set_epi8(INT8_C( -54), INT8_C( 53), INT8_C( -73), INT8_C( -38), INT8_C(-124), INT8_C( 23), INT8_C( 96), INT8_C( -55), INT8_C( 34), INT8_C( 108), INT8_C( -62), INT8_C(-104), INT8_C( 81), INT8_C( -26), INT8_C( -9), INT8_C( -47), INT8_C( -73), INT8_C( 27), INT8_C( -45), INT8_C(-119), INT8_C(-120), INT8_C(-121), INT8_C( 67), INT8_C( -46), INT8_C( 17), INT8_C( 101), INT8_C( 16), INT8_C(-101), INT8_C( -58), INT8_C( 77), INT8_C( -17), INT8_C( -35)), simde_mm256_set_epi8(INT8_C( -71), INT8_C( 4), INT8_C( -51), INT8_C( -83), INT8_C( -70), INT8_C( 45), INT8_C( 72), INT8_C( 62), INT8_C( -44), INT8_C( 127), INT8_C(-128), INT8_C( -10), INT8_C( -26), INT8_C( -20), INT8_C(-128), INT8_C( 42), INT8_C( -56), INT8_C( 35), INT8_C( -30), INT8_C(-128), INT8_C( -91), INT8_C( -31), INT8_C( 127), INT8_C(-124), INT8_C( -70), INT8_C( 76), INT8_C( 78), INT8_C( -53), INT8_C(-128), INT8_C( 6), INT8_C( -89), INT8_C( 50)) }, { simde_mm256_set_epi8(INT8_C( 99), INT8_C( 94), INT8_C( -71), INT8_C( 34), INT8_C(-112), INT8_C( 96), INT8_C( -20), INT8_C( -14), INT8_C( 85), INT8_C( 101), INT8_C(-125), INT8_C( -64), INT8_C( 54), INT8_C( -63), INT8_C( -68), INT8_C( -91), INT8_C( 62), INT8_C( -51), INT8_C( 27), INT8_C( 22), INT8_C( -58), INT8_C( 12), INT8_C( 9), INT8_C( -36), INT8_C( 40), INT8_C( -72), INT8_C( -90), INT8_C( -3), INT8_C( -94), INT8_C( -54), INT8_C( 87), INT8_C( -43)), simde_mm256_set_epi8(INT8_C( 99), INT8_C( -12), INT8_C( 40), INT8_C( -99), INT8_C( 48), INT8_C(-109), INT8_C( 57), INT8_C( 112), INT8_C( -67), INT8_C( -21), INT8_C( 14), INT8_C( 22), INT8_C( -61), INT8_C( -64), INT8_C( 99), INT8_C( -14), INT8_C( 121), INT8_C( 116), INT8_C( 125), INT8_C( -80), INT8_C( -90), INT8_C( -41), INT8_C( 82), INT8_C( 106), INT8_C( -62), INT8_C( -13), INT8_C( -52), INT8_C( -14), INT8_C( -76), INT8_C(-109), INT8_C( 112), INT8_C( -92)), simde_mm256_set_epi8(INT8_C( 127), INT8_C( 82), INT8_C( -31), INT8_C( -65), INT8_C( -64), INT8_C( -13), INT8_C( 37), INT8_C( 98), INT8_C( 18), INT8_C( 80), INT8_C(-111), INT8_C( -42), INT8_C( -7), INT8_C(-127), INT8_C( 31), INT8_C(-105), INT8_C( 127), INT8_C( 65), INT8_C( 127), INT8_C( -58), INT8_C(-128), INT8_C( -29), INT8_C( 91), INT8_C( 70), INT8_C( -22), INT8_C( -85), INT8_C(-128), INT8_C( -17), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C(-128)) }, { simde_mm256_set_epi8(INT8_C( -49), INT8_C( 105), INT8_C(-105), INT8_C( 27), INT8_C( -85), INT8_C( 38), INT8_C(-122), INT8_C( 31), INT8_C( -23), INT8_C( 94), INT8_C( 33), INT8_C( 112), INT8_C( 117), INT8_C( 95), INT8_C( 107), INT8_C( -60), INT8_C( 41), INT8_C( -75), INT8_C( 112), INT8_C(-110), INT8_C( 101), INT8_C( -13), INT8_C( 127), INT8_C( -84), INT8_C( -84), INT8_C( -70), INT8_C( -49), INT8_C( 84), INT8_C( -1), INT8_C( -17), INT8_C( -2), INT8_C( 84)), simde_mm256_set_epi8(INT8_C(-108), INT8_C( 63), INT8_C( 104), INT8_C( -9), INT8_C( 74), INT8_C( 114), INT8_C( -61), INT8_C( 39), INT8_C( 15), INT8_C( 6), INT8_C( 41), INT8_C( 125), INT8_C( 96), INT8_C( -39), INT8_C( -42), INT8_C( 75), INT8_C( -60), INT8_C( 18), INT8_C(-100), INT8_C( -10), INT8_C( -33), INT8_C( -88), INT8_C( -36), INT8_C( 79), INT8_C( 49), INT8_C( 63), INT8_C( -18), INT8_C( -83), INT8_C( 44), INT8_C( 75), INT8_C( -17), INT8_C( -9)), simde_mm256_set_epi8(INT8_C(-128), INT8_C( 127), INT8_C( -1), INT8_C( 18), INT8_C( -11), INT8_C( 127), INT8_C(-128), INT8_C( 70), INT8_C( -8), INT8_C( 100), INT8_C( 74), INT8_C( 127), INT8_C( 127), INT8_C( 56), INT8_C( 65), INT8_C( 15), INT8_C( -19), INT8_C( -57), INT8_C( 12), INT8_C(-120), INT8_C( 68), INT8_C(-101), INT8_C( 91), INT8_C( -5), INT8_C( -35), INT8_C( -7), INT8_C( -67), INT8_C( 1), INT8_C( 43), INT8_C( 58), INT8_C( -19), INT8_C( 75)) }, { simde_mm256_set_epi8(INT8_C( -41), INT8_C( -94), INT8_C( 69), INT8_C( 109), INT8_C(-102), INT8_C( 64), INT8_C( 116), INT8_C( 22), INT8_C( -63), INT8_C( 76), INT8_C( -59), INT8_C( -94), INT8_C( 69), INT8_C( 1), INT8_C( -30), INT8_C( 101), INT8_C( 121), INT8_C( 10), INT8_C( -82), INT8_C( -33), INT8_C( -83), INT8_C( -50), INT8_C(-111), INT8_C( -72), INT8_C( 42), INT8_C(-125), INT8_C(-128), INT8_C( -8), INT8_C( 27), INT8_C( -93), INT8_C(-126), INT8_C( -77)), simde_mm256_set_epi8(INT8_C( -9), INT8_C( -48), INT8_C( 73), INT8_C( 31), INT8_C( 127), INT8_C( 88), INT8_C( 20), INT8_C( -82), INT8_C( -19), INT8_C( 3), INT8_C( 83), INT8_C( 114), INT8_C( 1), INT8_C( 31), INT8_C( 44), INT8_C( 5), INT8_C( -63), INT8_C( -96), INT8_C( 2), INT8_C(-126), INT8_C( 96), INT8_C( -97), INT8_C( -87), INT8_C( -40), INT8_C(-112), INT8_C( 92), INT8_C( -98), INT8_C( -50), INT8_C( 63), INT8_C( -57), INT8_C( 24), INT8_C( -21)), simde_mm256_set_epi8(INT8_C( -50), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 25), INT8_C( 127), INT8_C( 127), INT8_C( -60), INT8_C( -82), INT8_C( 79), INT8_C( 24), INT8_C( 20), INT8_C( 70), INT8_C( 32), INT8_C( 14), INT8_C( 106), INT8_C( 58), INT8_C( -86), INT8_C( -80), INT8_C(-128), INT8_C( 13), INT8_C(-128), INT8_C(-128), INT8_C(-112), INT8_C( -70), INT8_C( -33), INT8_C(-128), INT8_C( -58), INT8_C( 90), INT8_C(-128), INT8_C(-102), INT8_C( -98)) }, { simde_mm256_set_epi8(INT8_C( 59), INT8_C( -47), INT8_C( -6), INT8_C( 114), INT8_C( 104), INT8_C( 53), INT8_C(-112), INT8_C( 19), INT8_C( 115), INT8_C( 22), INT8_C( 66), INT8_C( 27), INT8_C( -25), INT8_C( -41), INT8_C(-111), INT8_C( 115), INT8_C( -21), INT8_C( -32), INT8_C( -5), INT8_C( 11), INT8_C( -6), INT8_C( 110), INT8_C( -89), INT8_C( -64), INT8_C(-104), INT8_C( 74), INT8_C( -29), INT8_C( 87), INT8_C( -8), INT8_C( 96), INT8_C( 5), INT8_C( 122)), simde_mm256_set_epi8(INT8_C( 43), INT8_C( 18), INT8_C( 50), INT8_C(-115), INT8_C( 38), INT8_C( -78), INT8_C( -51), INT8_C( 97), INT8_C( 30), INT8_C( 7), INT8_C( -46), INT8_C( -16), INT8_C( 109), INT8_C(-103), INT8_C( -61), INT8_C( 64), INT8_C( 60), INT8_C( -63), INT8_C( -52), INT8_C( -15), INT8_C( 104), INT8_C( 105), INT8_C( 61), INT8_C( 106), INT8_C( -66), INT8_C( 111), INT8_C( -25), INT8_C( 24), INT8_C( 28), INT8_C( 102), INT8_C( 8), INT8_C( 93)), simde_mm256_set_epi8(INT8_C( 102), INT8_C( -29), INT8_C( 44), INT8_C( -1), INT8_C( 127), INT8_C( -25), INT8_C(-128), INT8_C( 116), INT8_C( 127), INT8_C( 29), INT8_C( 20), INT8_C( 11), INT8_C( 84), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 39), INT8_C( -95), INT8_C( -57), INT8_C( -4), INT8_C( 98), INT8_C( 127), INT8_C( -28), INT8_C( 42), INT8_C(-128), INT8_C( 127), INT8_C( -54), INT8_C( 111), INT8_C( 20), INT8_C( 127), INT8_C( 13), INT8_C( 127)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_adds_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_adds_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi16(INT16_C( -962), INT16_C( 12004), INT16_C( 15276), INT16_C( 19344), INT16_C( 1682), INT16_C( 24393), INT16_C(-26791), INT16_C( 15115), INT16_C( 26019), INT16_C(-25175), INT16_C(-17857), INT16_C( 29245), INT16_C( -3095), INT16_C( -1043), INT16_C( 8684), INT16_C( -4510)), simde_mm256_set_epi16(INT16_C( 1387), INT16_C( 5995), INT16_C( 24092), INT16_C(-22020), INT16_C( 17334), INT16_C(-21406), INT16_C( 5443), INT16_C( -4682), INT16_C( 19873), INT16_C(-31638), INT16_C( 16556), INT16_C(-22745), INT16_C( 6086), INT16_C(-26599), INT16_C(-32635), INT16_C( 18742)), simde_mm256_set_epi16(INT16_C( 425), INT16_C( 17999), INT16_C( 32767), INT16_C( -2676), INT16_C( 19016), INT16_C( 2987), INT16_C(-21348), INT16_C( 10433), INT16_C( 32767), INT16_C(-32768), INT16_C( -1301), INT16_C( 6500), INT16_C( 2991), INT16_C(-27642), INT16_C(-23951), INT16_C( 14232)) }, { simde_mm256_set_epi16(INT16_C(-15667), INT16_C( -4604), INT16_C( -1424), INT16_C( 14196), INT16_C( -3271), INT16_C(-28350), INT16_C( 32228), INT16_C(-15812), INT16_C( -3284), INT16_C( -332), INT16_C(-21864), INT16_C(-23002), INT16_C( 15429), INT16_C( 22829), INT16_C( -2222), INT16_C( 22367)), simde_mm256_set_epi16(INT16_C( 2292), INT16_C( 20266), INT16_C( 19204), INT16_C(-18548), INT16_C(-10545), INT16_C( 20262), INT16_C( 18576), INT16_C( 18942), INT16_C( 30300), INT16_C( -3505), INT16_C( 25496), INT16_C(-21517), INT16_C( 23044), INT16_C( 26958), INT16_C( 16161), INT16_C( -8396)), simde_mm256_set_epi16(INT16_C(-13375), INT16_C( 15662), INT16_C( 17780), INT16_C( -4352), INT16_C(-13816), INT16_C( -8088), INT16_C( 32767), INT16_C( 3130), INT16_C( 27016), INT16_C( -3837), INT16_C( 3632), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C( 13939), INT16_C( 13971)) }, { simde_mm256_set_epi16(INT16_C(-13807), INT16_C(-17934), INT16_C( 30355), INT16_C( -764), INT16_C( -4690), INT16_C( 11643), INT16_C( 12025), INT16_C( 13748), INT16_C( 20416), INT16_C(-20138), INT16_C( -7279), INT16_C(-10075), INT16_C( 30794), INT16_C( -8219), INT16_C( 7665), INT16_C( 31634)), simde_mm256_set_epi16(INT16_C( 19174), INT16_C( 30134), INT16_C( -229), INT16_C( 19963), INT16_C( -2304), INT16_C( 30679), INT16_C( 27179), INT16_C(-10745), INT16_C(-27839), INT16_C( -4281), INT16_C( 23775), INT16_C( -9849), INT16_C(-26461), INT16_C(-25537), INT16_C( -6859), INT16_C(-14106)), simde_mm256_set_epi16(INT16_C( 5367), INT16_C( 12200), INT16_C( 30126), INT16_C( 19199), INT16_C( -6994), INT16_C( 32767), INT16_C( 32767), INT16_C( 3003), INT16_C( -7423), INT16_C(-24419), INT16_C( 16496), INT16_C(-19924), INT16_C( 4333), INT16_C(-32768), INT16_C( 806), INT16_C( 17528)) }, { simde_mm256_set_epi16(INT16_C( -5643), INT16_C( 16549), INT16_C( 27397), INT16_C(-21486), INT16_C( -4783), INT16_C( -4255), INT16_C( -9777), INT16_C( -2005), INT16_C( -5487), INT16_C( 4410), INT16_C( 9721), INT16_C(-18951), INT16_C( 27380), INT16_C( 27675), INT16_C(-18193), INT16_C( -8216)), simde_mm256_set_epi16(INT16_C( 3093), INT16_C(-27399), INT16_C( 25399), INT16_C( 21162), INT16_C(-20342), INT16_C(-26357), INT16_C( 20961), INT16_C(-29046), INT16_C(-12304), INT16_C(-23482), INT16_C( 31742), INT16_C( 26191), INT16_C( 32539), INT16_C( 28035), INT16_C( -8379), INT16_C( 32320)), simde_mm256_set_epi16(INT16_C( -2550), INT16_C(-10850), INT16_C( 32767), INT16_C( -324), INT16_C(-25125), INT16_C(-30612), INT16_C( 11184), INT16_C(-31051), INT16_C(-17791), INT16_C(-19072), INT16_C( 32767), INT16_C( 7240), INT16_C( 32767), INT16_C( 32767), INT16_C(-26572), INT16_C( 24104)) }, { simde_mm256_set_epi16(INT16_C( 8117), INT16_C( 765), INT16_C( -4891), INT16_C( -1773), INT16_C( 21984), INT16_C( 23512), INT16_C( 8564), INT16_C( 676), INT16_C(-22280), INT16_C( 5831), INT16_C(-15902), INT16_C( 13241), INT16_C( 6903), INT16_C( -2164), INT16_C(-27428), INT16_C(-27012)), simde_mm256_set_epi16(INT16_C( 12023), INT16_C(-19040), INT16_C( 31146), INT16_C(-18380), INT16_C(-24072), INT16_C( 14767), INT16_C( 22843), INT16_C( -2924), INT16_C( -5044), INT16_C(-32368), INT16_C( 21585), INT16_C( -7796), INT16_C( 32151), INT16_C( 8315), INT16_C( 19587), INT16_C(-17957)), simde_mm256_set_epi16(INT16_C( 20140), INT16_C(-18275), INT16_C( 26255), INT16_C(-20153), INT16_C( -2088), INT16_C( 32767), INT16_C( 31407), INT16_C( -2248), INT16_C(-27324), INT16_C(-26537), INT16_C( 5683), INT16_C( 5445), INT16_C( 32767), INT16_C( 6151), INT16_C( -7841), INT16_C(-32768)) }, { simde_mm256_set_epi16(INT16_C(-15432), INT16_C(-29555), INT16_C(-22086), INT16_C(-23352), INT16_C(-24272), INT16_C( 28442), INT16_C( -6183), INT16_C( 20311), INT16_C(-15448), INT16_C(-31565), INT16_C(-17613), INT16_C( -1655), INT16_C( -3795), INT16_C( 27576), INT16_C(-23497), INT16_C( -9670)), simde_mm256_set_epi16(INT16_C(-16830), INT16_C( -159), INT16_C(-22301), INT16_C(-32198), INT16_C( 2834), INT16_C( 201), INT16_C(-29264), INT16_C( -1240), INT16_C( 4796), INT16_C(-27180), INT16_C( 31617), INT16_C( -2210), INT16_C( -9712), INT16_C( 10057), INT16_C( -1995), INT16_C( 27783)), simde_mm256_set_epi16(INT16_C(-32262), INT16_C(-29714), INT16_C(-32768), INT16_C(-32768), INT16_C(-21438), INT16_C( 28643), INT16_C(-32768), INT16_C( 19071), INT16_C(-10652), INT16_C(-32768), INT16_C( 14004), INT16_C( -3865), INT16_C(-13507), INT16_C( 32767), INT16_C(-25492), INT16_C( 18113)) }, { simde_mm256_set_epi16(INT16_C(-15154), INT16_C( 9051), INT16_C( 6258), INT16_C( -1499), INT16_C(-27693), INT16_C( -8500), INT16_C( -882), INT16_C( 9147), INT16_C( 22175), INT16_C( 10790), INT16_C(-10382), INT16_C( 21347), INT16_C( -8836), INT16_C(-17904), INT16_C(-31823), INT16_C(-28959)), simde_mm256_set_epi16(INT16_C( 31738), INT16_C( 18200), INT16_C(-12479), INT16_C( 972), INT16_C(-30550), INT16_C(-19770), INT16_C( 32501), INT16_C( -4294), INT16_C(-22747), INT16_C(-27490), INT16_C(-20241), INT16_C(-31535), INT16_C(-31518), INT16_C(-18415), INT16_C( 12039), INT16_C(-31202)), simde_mm256_set_epi16(INT16_C( 16584), INT16_C( 27251), INT16_C( -6221), INT16_C( -527), INT16_C(-32768), INT16_C(-28270), INT16_C( 31619), INT16_C( 4853), INT16_C( -572), INT16_C(-16700), INT16_C(-30623), INT16_C(-10188), INT16_C(-32768), INT16_C(-32768), INT16_C(-19784), INT16_C(-32768)) }, { simde_mm256_set_epi16(INT16_C(-23218), INT16_C(-16504), INT16_C(-28974), INT16_C(-20854), INT16_C( 27922), INT16_C( 717), INT16_C(-10156), INT16_C( 13317), INT16_C( 12071), INT16_C(-29036), INT16_C( 16491), INT16_C( 6863), INT16_C( 3054), INT16_C( -8502), INT16_C(-19858), INT16_C(-20664)), simde_mm256_set_epi16(INT16_C(-22864), INT16_C(-13701), INT16_C( -9243), INT16_C( -4457), INT16_C( 16105), INT16_C( 3518), INT16_C(-11607), INT16_C(-19946), INT16_C( 5641), INT16_C( 8617), INT16_C(-22446), INT16_C( 2151), INT16_C( 16533), INT16_C(-18326), INT16_C( 16839), INT16_C( 26518)), simde_mm256_set_epi16(INT16_C(-32768), INT16_C(-30205), INT16_C(-32768), INT16_C(-25311), INT16_C( 32767), INT16_C( 4235), INT16_C(-21763), INT16_C( -6629), INT16_C( 17712), INT16_C(-20419), INT16_C( -5955), INT16_C( 9014), INT16_C( 19587), INT16_C(-26828), INT16_C( -3019), INT16_C( 5854)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_adds_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_adds_epu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_x_mm256_set_epu8(UINT8_C(253), UINT8_C(134), UINT8_C(240), UINT8_C(121), UINT8_C(194), UINT8_C( 6), UINT8_C( 90), UINT8_C(185), UINT8_C( 52), UINT8_C(188), UINT8_C(255), UINT8_C(213), UINT8_C( 70), UINT8_C(140), UINT8_C( 59), UINT8_C(206), UINT8_C( 91), UINT8_C( 56), UINT8_C(139), UINT8_C( 19), UINT8_C( 62), UINT8_C( 91), UINT8_C( 24), UINT8_C( 86), UINT8_C(156), UINT8_C( 89), UINT8_C( 98), UINT8_C(113), UINT8_C(237), UINT8_C( 2), UINT8_C(237), UINT8_C(177)), simde_x_mm256_set_epu8(UINT8_C(213), UINT8_C( 63), UINT8_C( 15), UINT8_C(166), UINT8_C( 63), UINT8_C(196), UINT8_C(141), UINT8_C(108), UINT8_C( 47), UINT8_C(216), UINT8_C( 17), UINT8_C(218), UINT8_C(111), UINT8_C(130), UINT8_C( 1), UINT8_C(159), UINT8_C( 50), UINT8_C(145), UINT8_C(171), UINT8_C( 70), UINT8_C( 84), UINT8_C(160), UINT8_C(222), UINT8_C(215), UINT8_C( 44), UINT8_C(139), UINT8_C( 68), UINT8_C( 49), UINT8_C(218), UINT8_C( 62), UINT8_C(151), UINT8_C(225)), simde_x_mm256_set_epu8(UINT8_C(255), UINT8_C(197), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(202), UINT8_C(231), UINT8_C(255), UINT8_C( 99), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(181), UINT8_C(255), UINT8_C( 60), UINT8_C(255), UINT8_C(141), UINT8_C(201), UINT8_C(255), UINT8_C( 89), UINT8_C(146), UINT8_C(251), UINT8_C(246), UINT8_C(255), UINT8_C(200), UINT8_C(228), UINT8_C(166), UINT8_C(162), UINT8_C(255), UINT8_C( 64), UINT8_C(255), UINT8_C(255)) }, { simde_x_mm256_set_epu8(UINT8_C( 46), UINT8_C( 60), UINT8_C( 83), UINT8_C( 69), UINT8_C( 75), UINT8_C( 52), UINT8_C(182), UINT8_C( 56), UINT8_C( 10), UINT8_C(180), UINT8_C(245), UINT8_C(208), UINT8_C(165), UINT8_C( 77), UINT8_C(214), UINT8_C(172), UINT8_C( 90), UINT8_C( 10), UINT8_C(190), UINT8_C(204), UINT8_C(174), UINT8_C(200), UINT8_C( 75), UINT8_C(188), UINT8_C(215), UINT8_C( 51), UINT8_C(188), UINT8_C( 47), UINT8_C( 17), UINT8_C(116), UINT8_C(116), UINT8_C(103)), simde_x_mm256_set_epu8(UINT8_C( 76), UINT8_C( 23), UINT8_C( 40), UINT8_C( 7), UINT8_C( 32), UINT8_C(238), UINT8_C(187), UINT8_C( 34), UINT8_C(130), UINT8_C(185), UINT8_C(135), UINT8_C( 64), UINT8_C(167), UINT8_C(215), UINT8_C(226), UINT8_C(221), UINT8_C( 22), UINT8_C( 21), UINT8_C( 86), UINT8_C(166), UINT8_C( 38), UINT8_C( 88), UINT8_C(194), UINT8_C( 97), UINT8_C( 79), UINT8_C(177), UINT8_C(251), UINT8_C(218), UINT8_C( 76), UINT8_C(121), UINT8_C(164), UINT8_C( 80)), simde_x_mm256_set_epu8(UINT8_C(122), UINT8_C( 83), UINT8_C(123), UINT8_C( 76), UINT8_C(107), UINT8_C(255), UINT8_C(255), UINT8_C( 90), UINT8_C(140), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(112), UINT8_C( 31), UINT8_C(255), UINT8_C(255), UINT8_C(212), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(228), UINT8_C(255), UINT8_C(255), UINT8_C( 93), UINT8_C(237), UINT8_C(255), UINT8_C(183)) }, { simde_x_mm256_set_epu8(UINT8_C( 62), UINT8_C(125), UINT8_C(167), UINT8_C( 69), UINT8_C(185), UINT8_C(211), UINT8_C(194), UINT8_C( 87), UINT8_C( 42), UINT8_C( 67), UINT8_C(196), UINT8_C(131), UINT8_C( 56), UINT8_C(103), UINT8_C( 93), UINT8_C(201), UINT8_C(200), UINT8_C( 67), UINT8_C( 5), UINT8_C(251), UINT8_C(171), UINT8_C( 0), UINT8_C(242), UINT8_C(219), UINT8_C( 52), UINT8_C( 56), UINT8_C(153), UINT8_C(193), UINT8_C(206), UINT8_C(184), UINT8_C(179), UINT8_C(112)), simde_x_mm256_set_epu8(UINT8_C( 3), UINT8_C( 94), UINT8_C( 2), UINT8_C( 80), UINT8_C( 75), UINT8_C( 54), UINT8_C(116), UINT8_C(175), UINT8_C(133), UINT8_C(183), UINT8_C(131), UINT8_C( 7), UINT8_C( 65), UINT8_C( 80), UINT8_C(192), UINT8_C( 97), UINT8_C(148), UINT8_C(232), UINT8_C(135), UINT8_C(251), UINT8_C(194), UINT8_C( 84), UINT8_C(121), UINT8_C( 35), UINT8_C( 55), UINT8_C(172), UINT8_C( 74), UINT8_C( 18), UINT8_C( 14), UINT8_C( 11), UINT8_C( 86), UINT8_C(161)), simde_x_mm256_set_epu8(UINT8_C( 65), UINT8_C(219), UINT8_C(169), UINT8_C(149), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(175), UINT8_C(250), UINT8_C(255), UINT8_C(138), UINT8_C(121), UINT8_C(183), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(140), UINT8_C(255), UINT8_C(255), UINT8_C( 84), UINT8_C(255), UINT8_C(254), UINT8_C(107), UINT8_C(228), UINT8_C(227), UINT8_C(211), UINT8_C(220), UINT8_C(195), UINT8_C(255), UINT8_C(255)) }, { simde_x_mm256_set_epu8(UINT8_C(180), UINT8_C(149), UINT8_C(241), UINT8_C( 31), UINT8_C( 45), UINT8_C(238), UINT8_C(127), UINT8_C( 41), UINT8_C( 61), UINT8_C( 40), UINT8_C(253), UINT8_C(133), UINT8_C(247), UINT8_C(164), UINT8_C(139), UINT8_C(228), UINT8_C( 62), UINT8_C(209), UINT8_C(132), UINT8_C( 80), UINT8_C(102), UINT8_C(192), UINT8_C(185), UINT8_C(191), UINT8_C(100), UINT8_C(154), UINT8_C( 33), UINT8_C( 61), UINT8_C( 93), UINT8_C(153), UINT8_C(220), UINT8_C(160)), simde_x_mm256_set_epu8(UINT8_C(230), UINT8_C( 84), UINT8_C( 78), UINT8_C(157), UINT8_C(192), UINT8_C( 80), UINT8_C(173), UINT8_C(216), UINT8_C( 47), UINT8_C(218), UINT8_C( 93), UINT8_C(143), UINT8_C( 45), UINT8_C( 59), UINT8_C(151), UINT8_C(134), UINT8_C( 70), UINT8_C( 90), UINT8_C( 58), UINT8_C(114), UINT8_C(223), UINT8_C(242), UINT8_C( 15), UINT8_C(131), UINT8_C( 82), UINT8_C(204), UINT8_C(173), UINT8_C( 5), UINT8_C( 58), UINT8_C(182), UINT8_C(252), UINT8_C(237)), simde_x_mm256_set_epu8(UINT8_C(255), UINT8_C(233), UINT8_C(255), UINT8_C(188), UINT8_C(237), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(108), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(223), UINT8_C(255), UINT8_C(255), UINT8_C(132), UINT8_C(255), UINT8_C(190), UINT8_C(194), UINT8_C(255), UINT8_C(255), UINT8_C(200), UINT8_C(255), UINT8_C(182), UINT8_C(255), UINT8_C(206), UINT8_C( 66), UINT8_C(151), UINT8_C(255), UINT8_C(255), UINT8_C(255)) }, { simde_x_mm256_set_epu8(UINT8_C(201), UINT8_C(214), UINT8_C(179), UINT8_C(233), UINT8_C(198), UINT8_C( 81), UINT8_C(243), UINT8_C(119), UINT8_C(160), UINT8_C(147), UINT8_C( 34), UINT8_C(138), UINT8_C(127), UINT8_C( 31), UINT8_C(145), UINT8_C( 40), UINT8_C(228), UINT8_C( 45), UINT8_C( 3), UINT8_C( 4), UINT8_C(249), UINT8_C(180), UINT8_C(210), UINT8_C( 7), UINT8_C(175), UINT8_C( 81), UINT8_C( 15), UINT8_C(137), UINT8_C( 20), UINT8_C( 57), UINT8_C( 3), UINT8_C(157)), simde_x_mm256_set_epu8(UINT8_C(209), UINT8_C( 16), UINT8_C(253), UINT8_C(199), UINT8_C(185), UINT8_C(238), UINT8_C( 28), UINT8_C( 87), UINT8_C( 41), UINT8_C(232), UINT8_C( 54), UINT8_C(100), UINT8_C(160), UINT8_C( 87), UINT8_C(101), UINT8_C(193), UINT8_C(173), UINT8_C(242), UINT8_C(182), UINT8_C( 0), UINT8_C(222), UINT8_C(142), UINT8_C(217), UINT8_C(177), UINT8_C(237), UINT8_C(196), UINT8_C(145), UINT8_C(208), UINT8_C( 95), UINT8_C(248), UINT8_C( 86), UINT8_C( 20)), simde_x_mm256_set_epu8(UINT8_C(255), UINT8_C(230), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(206), UINT8_C(201), UINT8_C(255), UINT8_C( 88), UINT8_C(238), UINT8_C(255), UINT8_C(118), UINT8_C(246), UINT8_C(233), UINT8_C(255), UINT8_C(255), UINT8_C(185), UINT8_C( 4), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(184), UINT8_C(255), UINT8_C(255), UINT8_C(160), UINT8_C(255), UINT8_C(115), UINT8_C(255), UINT8_C( 89), UINT8_C(177)) }, { simde_x_mm256_set_epu8(UINT8_C(117), UINT8_C(251), UINT8_C(156), UINT8_C( 72), UINT8_C(241), UINT8_C( 25), UINT8_C( 80), UINT8_C(195), UINT8_C(213), UINT8_C( 26), UINT8_C( 44), UINT8_C(154), UINT8_C( 40), UINT8_C(201), UINT8_C(142), UINT8_C(110), UINT8_C( 17), UINT8_C(100), UINT8_C( 41), UINT8_C(223), UINT8_C(255), UINT8_C(232), UINT8_C(253), UINT8_C(190), UINT8_C(155), UINT8_C(178), UINT8_C(150), UINT8_C(248), UINT8_C(166), UINT8_C(223), UINT8_C( 92), UINT8_C(146)), simde_x_mm256_set_epu8(UINT8_C(164), UINT8_C(171), UINT8_C(230), UINT8_C(232), UINT8_C( 86), UINT8_C(220), UINT8_C(153), UINT8_C(194), UINT8_C( 83), UINT8_C( 81), UINT8_C( 80), UINT8_C( 93), UINT8_C( 96), UINT8_C( 68), UINT8_C( 55), UINT8_C(183), UINT8_C(110), UINT8_C(151), UINT8_C( 66), UINT8_C(168), UINT8_C(210), UINT8_C( 58), UINT8_C(169), UINT8_C(189), UINT8_C( 61), UINT8_C( 15), UINT8_C(124), UINT8_C(190), UINT8_C( 93), UINT8_C(242), UINT8_C( 80), UINT8_C(225)), simde_x_mm256_set_epu8(UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(245), UINT8_C(233), UINT8_C(255), UINT8_C(255), UINT8_C(107), UINT8_C(124), UINT8_C(247), UINT8_C(136), UINT8_C(255), UINT8_C(197), UINT8_C(255), UINT8_C(127), UINT8_C(251), UINT8_C(107), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(216), UINT8_C(193), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(172), UINT8_C(255)) }, { simde_x_mm256_set_epu8(UINT8_C(130), UINT8_C( 76), UINT8_C( 98), UINT8_C( 8), UINT8_C(243), UINT8_C(125), UINT8_C(189), UINT8_C(162), UINT8_C(107), UINT8_C( 98), UINT8_C(171), UINT8_C( 80), UINT8_C(243), UINT8_C(225), UINT8_C( 6), UINT8_C( 11), UINT8_C(250), UINT8_C(210), UINT8_C( 60), UINT8_C(230), UINT8_C( 17), UINT8_C(222), UINT8_C( 70), UINT8_C(180), UINT8_C( 28), UINT8_C( 96), UINT8_C(128), UINT8_C(195), UINT8_C(240), UINT8_C(119), UINT8_C(199), UINT8_C( 65)), simde_x_mm256_set_epu8(UINT8_C( 47), UINT8_C(225), UINT8_C( 74), UINT8_C(106), UINT8_C(149), UINT8_C(250), UINT8_C(221), UINT8_C( 24), UINT8_C(161), UINT8_C( 69), UINT8_C(209), UINT8_C(120), UINT8_C(116), UINT8_C(235), UINT8_C( 4), UINT8_C(161), UINT8_C(156), UINT8_C(193), UINT8_C(253), UINT8_C( 2), UINT8_C(168), UINT8_C( 52), UINT8_C(231), UINT8_C(201), UINT8_C(115), UINT8_C( 0), UINT8_C(176), UINT8_C(224), UINT8_C( 4), UINT8_C(219), UINT8_C(202), UINT8_C(177)), simde_x_mm256_set_epu8(UINT8_C(177), UINT8_C(255), UINT8_C(172), UINT8_C(114), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(186), UINT8_C(255), UINT8_C(167), UINT8_C(255), UINT8_C(200), UINT8_C(255), UINT8_C(255), UINT8_C( 10), UINT8_C(172), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(232), UINT8_C(185), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(143), UINT8_C( 96), UINT8_C(255), UINT8_C(255), UINT8_C(244), UINT8_C(255), UINT8_C(255), UINT8_C(242)) }, { simde_x_mm256_set_epu8(UINT8_C(144), UINT8_C( 62), UINT8_C(142), UINT8_C(211), UINT8_C( 41), UINT8_C(162), UINT8_C(184), UINT8_C(180), UINT8_C(242), UINT8_C( 80), UINT8_C( 73), UINT8_C( 63), UINT8_C(168), UINT8_C(176), UINT8_C( 12), UINT8_C( 85), UINT8_C( 62), UINT8_C( 83), UINT8_C( 52), UINT8_C( 39), UINT8_C( 56), UINT8_C(245), UINT8_C( 65), UINT8_C(213), UINT8_C( 94), UINT8_C( 88), UINT8_C(157), UINT8_C(124), UINT8_C(123), UINT8_C(196), UINT8_C( 79), UINT8_C( 49)), simde_x_mm256_set_epu8(UINT8_C(142), UINT8_C( 71), UINT8_C(103), UINT8_C(139), UINT8_C(236), UINT8_C(100), UINT8_C(139), UINT8_C(154), UINT8_C(203), UINT8_C(125), UINT8_C(237), UINT8_C(236), UINT8_C( 45), UINT8_C(103), UINT8_C( 56), UINT8_C( 68), UINT8_C( 26), UINT8_C( 73), UINT8_C(130), UINT8_C( 19), UINT8_C(222), UINT8_C(206), UINT8_C(129), UINT8_C(101), UINT8_C( 56), UINT8_C(145), UINT8_C(213), UINT8_C(249), UINT8_C(145), UINT8_C( 67), UINT8_C(236), UINT8_C(136)), simde_x_mm256_set_epu8(UINT8_C(255), UINT8_C(133), UINT8_C(245), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(205), UINT8_C(255), UINT8_C(255), UINT8_C(213), UINT8_C(255), UINT8_C( 68), UINT8_C(153), UINT8_C( 88), UINT8_C(156), UINT8_C(182), UINT8_C( 58), UINT8_C(255), UINT8_C(255), UINT8_C(194), UINT8_C(255), UINT8_C(150), UINT8_C(233), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(185)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_adds_epu8(test_vec[i].a, test_vec[i].b); simde_assert_m256i_u8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_adds_epu16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_x_mm256_set_epu16(UINT16_C( 46173), UINT16_C( 51502), UINT16_C( 53334), UINT16_C( 43223), UINT16_C( 23928), UINT16_C( 20321), UINT16_C( 51743), UINT16_C( 37618), UINT16_C( 65078), UINT16_C( 7253), UINT16_C( 52827), UINT16_C( 55189), UINT16_C( 4987), UINT16_C( 17665), UINT16_C( 14350), UINT16_C( 38811)), simde_x_mm256_set_epu16(UINT16_C( 37136), UINT16_C( 48792), UINT16_C( 4303), UINT16_C( 5479), UINT16_C( 25191), UINT16_C( 58551), UINT16_C( 48285), UINT16_C( 4997), UINT16_C( 11428), UINT16_C( 55078), UINT16_C( 56047), UINT16_C( 52960), UINT16_C( 65202), UINT16_C( 38018), UINT16_C( 624), UINT16_C( 7777)), simde_x_mm256_set_epu16(UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 57637), UINT16_C( 48702), UINT16_C( 49119), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 42615), UINT16_C( 65535), UINT16_C( 62331), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 55683), UINT16_C( 14974), UINT16_C( 46588)) }, { simde_x_mm256_set_epu16(UINT16_C( 32326), UINT16_C( 24612), UINT16_C( 21846), UINT16_C( 55200), UINT16_C( 41586), UINT16_C( 19758), UINT16_C( 51650), UINT16_C( 38909), UINT16_C( 46063), UINT16_C( 28335), UINT16_C( 9134), UINT16_C( 41639), UINT16_C( 55060), UINT16_C( 39890), UINT16_C( 8293), UINT16_C( 53471)), simde_x_mm256_set_epu16(UINT16_C( 41234), UINT16_C( 2183), UINT16_C( 63540), UINT16_C( 1722), UINT16_C( 28608), UINT16_C( 25104), UINT16_C( 45712), UINT16_C( 60419), UINT16_C( 19160), UINT16_C( 3341), UINT16_C( 25253), UINT16_C( 20430), UINT16_C( 42207), UINT16_C( 24498), UINT16_C( 15365), UINT16_C( 19541)), simde_x_mm256_set_epu16(UINT16_C( 65535), UINT16_C( 26795), UINT16_C( 65535), UINT16_C( 56922), UINT16_C( 65535), UINT16_C( 44862), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65223), UINT16_C( 31676), UINT16_C( 34387), UINT16_C( 62069), UINT16_C( 65535), UINT16_C( 64388), UINT16_C( 23658), UINT16_C( 65535)) }, { simde_x_mm256_set_epu16(UINT16_C( 26386), UINT16_C( 63006), UINT16_C( 58203), UINT16_C( 62377), UINT16_C( 47602), UINT16_C( 30067), UINT16_C( 23114), UINT16_C( 33473), UINT16_C( 53575), UINT16_C( 38408), UINT16_C( 17337), UINT16_C( 42126), UINT16_C( 5249), UINT16_C( 35315), UINT16_C( 42156), UINT16_C( 18091)), simde_x_mm256_set_epu16(UINT16_C( 52612), UINT16_C( 30768), UINT16_C( 53242), UINT16_C( 17367), UINT16_C( 55155), UINT16_C( 55208), UINT16_C( 40791), UINT16_C( 34106), UINT16_C( 38398), UINT16_C( 3526), UINT16_C( 48471), UINT16_C( 61865), UINT16_C( 26735), UINT16_C( 59797), UINT16_C( 61911), UINT16_C( 9267)), simde_x_mm256_set_epu16(UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 63905), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 41934), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 31984), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 27358)) }, { simde_x_mm256_set_epu16(UINT16_C( 52292), UINT16_C( 60997), UINT16_C( 31428), UINT16_C( 9305), UINT16_C( 57362), UINT16_C( 62823), UINT16_C( 6394), UINT16_C( 15984), UINT16_C( 50964), UINT16_C( 21850), UINT16_C( 44748), UINT16_C( 36872), UINT16_C( 14263), UINT16_C( 48235), UINT16_C( 9404), UINT16_C( 55410)), simde_x_mm256_set_epu16(UINT16_C( 5822), UINT16_C( 53891), UINT16_C( 53572), UINT16_C( 35299), UINT16_C( 32303), UINT16_C( 47360), UINT16_C( 2824), UINT16_C( 19749), UINT16_C( 6763), UINT16_C( 47400), UINT16_C( 29201), UINT16_C( 32332), UINT16_C( 24570), UINT16_C( 50755), UINT16_C( 22545), UINT16_C( 9382)), simde_x_mm256_set_epu16(UINT16_C( 58114), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 44604), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 9218), UINT16_C( 35733), UINT16_C( 57727), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 38833), UINT16_C( 65535), UINT16_C( 31949), UINT16_C( 64792)) }, { simde_x_mm256_set_epu16(UINT16_C( 50076), UINT16_C( 9743), UINT16_C( 10727), UINT16_C( 51592), UINT16_C( 28130), UINT16_C( 34226), UINT16_C( 44764), UINT16_C( 58424), UINT16_C( 15168), UINT16_C( 4051), UINT16_C( 54044), UINT16_C( 7020), UINT16_C( 31115), UINT16_C( 49299), UINT16_C( 40742), UINT16_C( 48855)), simde_x_mm256_set_epu16(UINT16_C( 50898), UINT16_C( 60971), UINT16_C( 2964), UINT16_C( 53140), UINT16_C( 39951), UINT16_C( 57637), UINT16_C( 63735), UINT16_C( 40101), UINT16_C( 37326), UINT16_C( 12531), UINT16_C( 29670), UINT16_C( 49503), UINT16_C( 64935), UINT16_C( 44011), UINT16_C( 59422), UINT16_C( 45053)), simde_x_mm256_set_epu16(UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 13691), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 52494), UINT16_C( 16582), UINT16_C( 65535), UINT16_C( 56523), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535)) }, { simde_x_mm256_set_epu16(UINT16_C( 12906), UINT16_C( 34342), UINT16_C( 49900), UINT16_C( 16373), UINT16_C( 41099), UINT16_C( 42680), UINT16_C( 11034), UINT16_C( 4050), UINT16_C( 41370), UINT16_C( 18241), UINT16_C( 60694), UINT16_C( 59842), UINT16_C( 7044), UINT16_C( 38288), UINT16_C( 18779), UINT16_C( 33204)), simde_x_mm256_set_epu16(UINT16_C( 39975), UINT16_C( 26379), UINT16_C( 49406), UINT16_C( 11197), UINT16_C( 25665), UINT16_C( 45876), UINT16_C( 8978), UINT16_C( 56112), UINT16_C( 5541), UINT16_C( 41816), UINT16_C( 1773), UINT16_C( 25366), UINT16_C( 51395), UINT16_C( 19553), UINT16_C( 37079), UINT16_C( 32528)), simde_x_mm256_set_epu16(UINT16_C( 52881), UINT16_C( 60721), UINT16_C( 65535), UINT16_C( 27570), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 20012), UINT16_C( 60162), UINT16_C( 46911), UINT16_C( 60057), UINT16_C( 62467), UINT16_C( 65535), UINT16_C( 58439), UINT16_C( 57841), UINT16_C( 55858), UINT16_C( 65535)) }, { simde_x_mm256_set_epu16(UINT16_C( 54610), UINT16_C( 56071), UINT16_C( 54559), UINT16_C( 11899), UINT16_C( 32307), UINT16_C( 4962), UINT16_C( 27630), UINT16_C( 33600), UINT16_C( 31852), UINT16_C( 56358), UINT16_C( 35796), UINT16_C( 8024), UINT16_C( 46221), UINT16_C( 4529), UINT16_C( 49147), UINT16_C( 8518)), simde_x_mm256_set_epu16(UINT16_C( 57675), UINT16_C( 2883), UINT16_C( 55066), UINT16_C( 41648), UINT16_C( 12159), UINT16_C( 20265), UINT16_C( 47525), UINT16_C( 54059), UINT16_C( 12623), UINT16_C( 11063), UINT16_C( 34242), UINT16_C( 57692), UINT16_C( 8071), UINT16_C( 9806), UINT16_C( 30691), UINT16_C( 35776)), simde_x_mm256_set_epu16(UINT16_C( 65535), UINT16_C( 58954), UINT16_C( 65535), UINT16_C( 53547), UINT16_C( 44466), UINT16_C( 25227), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 44475), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 54292), UINT16_C( 14335), UINT16_C( 65535), UINT16_C( 44294)) }, { simde_x_mm256_set_epu16(UINT16_C( 56834), UINT16_C( 64885), UINT16_C( 9140), UINT16_C( 13056), UINT16_C( 40842), UINT16_C( 10347), UINT16_C( 7339), UINT16_C( 17877), UINT16_C( 14924), UINT16_C( 16868), UINT16_C( 50139), UINT16_C( 42854), UINT16_C( 20413), UINT16_C( 64148), UINT16_C( 24871), UINT16_C( 35734)), simde_x_mm256_set_epu16(UINT16_C( 20840), UINT16_C( 44144), UINT16_C( 58177), UINT16_C( 28709), UINT16_C( 14233), UINT16_C( 55224), UINT16_C( 50824), UINT16_C( 22009), UINT16_C( 46863), UINT16_C( 40997), UINT16_C( 17728), UINT16_C( 21679), UINT16_C( 9552), UINT16_C( 17236), UINT16_C( 5658), UINT16_C( 51223)), simde_x_mm256_set_epu16(UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 41765), UINT16_C( 55075), UINT16_C( 65535), UINT16_C( 58163), UINT16_C( 39886), UINT16_C( 61787), UINT16_C( 57865), UINT16_C( 65535), UINT16_C( 64533), UINT16_C( 29965), UINT16_C( 65535), UINT16_C( 30529), UINT16_C( 65535)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_adds_epu16(test_vec[i].a, test_vec[i].b); simde_assert_m256i_u16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_avg_epu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_x_mm256_set_epu8(UINT8_C(132), UINT8_C(185), UINT8_C( 15), UINT8_C(235), UINT8_C(102), UINT8_C( 8), UINT8_C(239), UINT8_C(181), UINT8_C( 81), UINT8_C(155), UINT8_C(236), UINT8_C(191), UINT8_C(133), UINT8_C( 65), UINT8_C( 73), UINT8_C( 40), UINT8_C(181), UINT8_C( 86), UINT8_C( 73), UINT8_C(213), UINT8_C( 85), UINT8_C( 28), UINT8_C( 53), UINT8_C(225), UINT8_C(217), UINT8_C(129), UINT8_C( 68), UINT8_C(183), UINT8_C(232), UINT8_C( 91), UINT8_C( 4), UINT8_C(129)), simde_x_mm256_set_epu8(UINT8_C(199), UINT8_C(146), UINT8_C( 73), UINT8_C(129), UINT8_C( 53), UINT8_C( 30), UINT8_C(178), UINT8_C(252), UINT8_C(125), UINT8_C( 44), UINT8_C( 67), UINT8_C( 83), UINT8_C( 52), UINT8_C( 79), UINT8_C(239), UINT8_C(118), UINT8_C(100), UINT8_C( 25), UINT8_C( 74), UINT8_C( 78), UINT8_C( 90), UINT8_C(145), UINT8_C(118), UINT8_C(211), UINT8_C( 29), UINT8_C( 31), UINT8_C(128), UINT8_C( 53), UINT8_C( 19), UINT8_C(239), UINT8_C(181), UINT8_C(108)), simde_x_mm256_set_epu8(UINT8_C(166), UINT8_C(166), UINT8_C( 44), UINT8_C(182), UINT8_C( 78), UINT8_C( 19), UINT8_C(209), UINT8_C(217), UINT8_C(103), UINT8_C(100), UINT8_C(152), UINT8_C(137), UINT8_C( 93), UINT8_C( 72), UINT8_C(156), UINT8_C( 79), UINT8_C(141), UINT8_C( 56), UINT8_C( 74), UINT8_C(146), UINT8_C( 88), UINT8_C( 87), UINT8_C( 86), UINT8_C(218), UINT8_C(123), UINT8_C( 80), UINT8_C( 98), UINT8_C(118), UINT8_C(126), UINT8_C(165), UINT8_C( 93), UINT8_C(119)) }, { simde_x_mm256_set_epu8(UINT8_C(209), UINT8_C(137), UINT8_C(159), UINT8_C(201), UINT8_C(142), UINT8_C(123), UINT8_C(233), UINT8_C(210), UINT8_C(180), UINT8_C( 1), UINT8_C( 19), UINT8_C( 5), UINT8_C( 35), UINT8_C(203), UINT8_C(189), UINT8_C( 26), UINT8_C(153), UINT8_C(140), UINT8_C( 62), UINT8_C(144), UINT8_C( 20), UINT8_C(125), UINT8_C(185), UINT8_C(133), UINT8_C( 90), UINT8_C(243), UINT8_C( 18), UINT8_C(117), UINT8_C(102), UINT8_C(222), UINT8_C( 27), UINT8_C( 12)), simde_x_mm256_set_epu8(UINT8_C(201), UINT8_C(154), UINT8_C( 75), UINT8_C(146), UINT8_C( 84), UINT8_C(138), UINT8_C(110), UINT8_C( 18), UINT8_C(142), UINT8_C(147), UINT8_C(192), UINT8_C(131), UINT8_C(151), UINT8_C(232), UINT8_C(176), UINT8_C( 8), UINT8_C(133), UINT8_C( 27), UINT8_C( 52), UINT8_C( 35), UINT8_C(114), UINT8_C(148), UINT8_C(237), UINT8_C(121), UINT8_C(209), UINT8_C( 97), UINT8_C(242), UINT8_C( 75), UINT8_C(194), UINT8_C( 62), UINT8_C(242), UINT8_C(214)), simde_x_mm256_set_epu8(UINT8_C(205), UINT8_C(146), UINT8_C(117), UINT8_C(174), UINT8_C(113), UINT8_C(131), UINT8_C(172), UINT8_C(114), UINT8_C(161), UINT8_C( 74), UINT8_C(106), UINT8_C( 68), UINT8_C( 93), UINT8_C(218), UINT8_C(183), UINT8_C( 17), UINT8_C(143), UINT8_C( 84), UINT8_C( 57), UINT8_C( 90), UINT8_C( 67), UINT8_C(137), UINT8_C(211), UINT8_C(127), UINT8_C(150), UINT8_C(170), UINT8_C(130), UINT8_C( 96), UINT8_C(148), UINT8_C(142), UINT8_C(135), UINT8_C(113)) }, { simde_x_mm256_set_epu8(UINT8_C(223), UINT8_C( 80), UINT8_C( 95), UINT8_C( 57), UINT8_C(173), UINT8_C( 14), UINT8_C( 75), UINT8_C( 79), UINT8_C(206), UINT8_C( 37), UINT8_C(125), UINT8_C(225), UINT8_C(172), UINT8_C(121), UINT8_C( 43), UINT8_C(249), UINT8_C(122), UINT8_C( 3), UINT8_C( 25), UINT8_C(204), UINT8_C(153), UINT8_C( 32), UINT8_C(245), UINT8_C( 58), UINT8_C(211), UINT8_C(116), UINT8_C( 87), UINT8_C(228), UINT8_C(139), UINT8_C(203), UINT8_C(106), UINT8_C(216)), simde_x_mm256_set_epu8(UINT8_C( 18), UINT8_C(169), UINT8_C( 46), UINT8_C(100), UINT8_C(246), UINT8_C( 6), UINT8_C(208), UINT8_C(224), UINT8_C(134), UINT8_C(248), UINT8_C( 90), UINT8_C(243), UINT8_C( 95), UINT8_C(216), UINT8_C(232), UINT8_C(245), UINT8_C( 35), UINT8_C(118), UINT8_C( 23), UINT8_C(111), UINT8_C(137), UINT8_C( 1), UINT8_C( 43), UINT8_C(212), UINT8_C(143), UINT8_C(143), UINT8_C(106), UINT8_C(242), UINT8_C(188), UINT8_C( 78), UINT8_C( 94), UINT8_C( 49)), simde_x_mm256_set_epu8(UINT8_C(121), UINT8_C(125), UINT8_C( 71), UINT8_C( 79), UINT8_C(210), UINT8_C( 10), UINT8_C(142), UINT8_C(152), UINT8_C(170), UINT8_C(143), UINT8_C(108), UINT8_C(234), UINT8_C(134), UINT8_C(169), UINT8_C(138), UINT8_C(247), UINT8_C( 79), UINT8_C( 61), UINT8_C( 24), UINT8_C(158), UINT8_C(145), UINT8_C( 17), UINT8_C(144), UINT8_C(135), UINT8_C(177), UINT8_C(130), UINT8_C( 97), UINT8_C(235), UINT8_C(164), UINT8_C(141), UINT8_C(100), UINT8_C(133)) }, { simde_x_mm256_set_epu8(UINT8_C(186), UINT8_C( 51), UINT8_C(166), UINT8_C(159), UINT8_C( 61), UINT8_C(189), UINT8_C(148), UINT8_C(156), UINT8_C(199), UINT8_C( 59), UINT8_C(214), UINT8_C( 21), UINT8_C( 92), UINT8_C( 24), UINT8_C( 35), UINT8_C( 33), UINT8_C( 27), UINT8_C(133), UINT8_C( 9), UINT8_C(114), UINT8_C(170), UINT8_C( 78), UINT8_C(149), UINT8_C(203), UINT8_C(212), UINT8_C(108), UINT8_C(116), UINT8_C(217), UINT8_C(102), UINT8_C(192), UINT8_C(223), UINT8_C( 98)), simde_x_mm256_set_epu8(UINT8_C( 51), UINT8_C(193), UINT8_C(129), UINT8_C(222), UINT8_C(147), UINT8_C( 49), UINT8_C(210), UINT8_C(198), UINT8_C(192), UINT8_C(158), UINT8_C( 49), UINT8_C(217), UINT8_C( 20), UINT8_C(183), UINT8_C(213), UINT8_C( 71), UINT8_C(164), UINT8_C( 92), UINT8_C(118), UINT8_C( 17), UINT8_C(236), UINT8_C( 27), UINT8_C(162), UINT8_C( 98), UINT8_C(196), UINT8_C(135), UINT8_C( 7), UINT8_C(172), UINT8_C(233), UINT8_C( 47), UINT8_C(151), UINT8_C(128)), simde_x_mm256_set_epu8(UINT8_C(119), UINT8_C(122), UINT8_C(148), UINT8_C(191), UINT8_C(104), UINT8_C(119), UINT8_C(179), UINT8_C(177), UINT8_C(196), UINT8_C(109), UINT8_C(132), UINT8_C(119), UINT8_C( 56), UINT8_C(104), UINT8_C(124), UINT8_C( 52), UINT8_C( 96), UINT8_C(113), UINT8_C( 64), UINT8_C( 66), UINT8_C(203), UINT8_C( 53), UINT8_C(156), UINT8_C(151), UINT8_C(204), UINT8_C(122), UINT8_C( 62), UINT8_C(195), UINT8_C(168), UINT8_C(120), UINT8_C(187), UINT8_C(113)) }, { simde_x_mm256_set_epu8(UINT8_C(182), UINT8_C(141), UINT8_C( 93), UINT8_C( 91), UINT8_C( 0), UINT8_C(189), UINT8_C(215), UINT8_C(221), UINT8_C(105), UINT8_C(231), UINT8_C( 61), UINT8_C(224), UINT8_C( 68), UINT8_C( 84), UINT8_C(247), UINT8_C(215), UINT8_C(125), UINT8_C(197), UINT8_C( 69), UINT8_C(102), UINT8_C(218), UINT8_C(120), UINT8_C(113), UINT8_C(175), UINT8_C(134), UINT8_C( 33), UINT8_C(106), UINT8_C(117), UINT8_C(129), UINT8_C(249), UINT8_C(194), UINT8_C( 70)), simde_x_mm256_set_epu8(UINT8_C(137), UINT8_C( 27), UINT8_C( 17), UINT8_C( 94), UINT8_C(244), UINT8_C(142), UINT8_C(142), UINT8_C( 48), UINT8_C( 54), UINT8_C(217), UINT8_C(209), UINT8_C(122), UINT8_C( 1), UINT8_C(190), UINT8_C( 59), UINT8_C(250), UINT8_C(179), UINT8_C(176), UINT8_C(167), UINT8_C( 57), UINT8_C( 90), UINT8_C( 15), UINT8_C( 31), UINT8_C(140), UINT8_C(209), UINT8_C(104), UINT8_C(139), UINT8_C(154), UINT8_C( 57), UINT8_C(248), UINT8_C(225), UINT8_C( 65)), simde_x_mm256_set_epu8(UINT8_C(160), UINT8_C( 84), UINT8_C( 55), UINT8_C( 93), UINT8_C(122), UINT8_C(166), UINT8_C(179), UINT8_C(135), UINT8_C( 80), UINT8_C(224), UINT8_C(135), UINT8_C(173), UINT8_C( 35), UINT8_C(137), UINT8_C(153), UINT8_C(233), UINT8_C(152), UINT8_C(187), UINT8_C(118), UINT8_C( 80), UINT8_C(154), UINT8_C( 68), UINT8_C( 72), UINT8_C(158), UINT8_C(172), UINT8_C( 69), UINT8_C(123), UINT8_C(136), UINT8_C( 93), UINT8_C(249), UINT8_C(210), UINT8_C( 68)) }, { simde_x_mm256_set_epu8(UINT8_C(125), UINT8_C(242), UINT8_C( 34), UINT8_C(120), UINT8_C(106), UINT8_C(202), UINT8_C(100), UINT8_C( 61), UINT8_C(105), UINT8_C(145), UINT8_C( 46), UINT8_C(129), UINT8_C(208), UINT8_C( 57), UINT8_C( 82), UINT8_C( 21), UINT8_C( 59), UINT8_C( 97), UINT8_C(206), UINT8_C( 4), UINT8_C(182), UINT8_C( 81), UINT8_C(203), UINT8_C(252), UINT8_C(111), UINT8_C( 28), UINT8_C(210), UINT8_C( 57), UINT8_C(214), UINT8_C(124), UINT8_C(137), UINT8_C(114)), simde_x_mm256_set_epu8(UINT8_C(208), UINT8_C( 60), UINT8_C( 51), UINT8_C( 61), UINT8_C(249), UINT8_C(203), UINT8_C( 69), UINT8_C(195), UINT8_C( 16), UINT8_C( 67), UINT8_C(241), UINT8_C(244), UINT8_C(217), UINT8_C(201), UINT8_C(104), UINT8_C( 80), UINT8_C( 30), UINT8_C( 40), UINT8_C( 69), UINT8_C( 88), UINT8_C( 83), UINT8_C(141), UINT8_C(221), UINT8_C(174), UINT8_C(165), UINT8_C(114), UINT8_C(107), UINT8_C( 42), UINT8_C( 83), UINT8_C( 1), UINT8_C( 95), UINT8_C( 89)), simde_x_mm256_set_epu8(UINT8_C(167), UINT8_C(151), UINT8_C( 43), UINT8_C( 91), UINT8_C(178), UINT8_C(203), UINT8_C( 85), UINT8_C(128), UINT8_C( 61), UINT8_C(106), UINT8_C(144), UINT8_C(187), UINT8_C(213), UINT8_C(129), UINT8_C( 93), UINT8_C( 51), UINT8_C( 45), UINT8_C( 69), UINT8_C(138), UINT8_C( 46), UINT8_C(133), UINT8_C(111), UINT8_C(212), UINT8_C(213), UINT8_C(138), UINT8_C( 71), UINT8_C(159), UINT8_C( 50), UINT8_C(149), UINT8_C( 63), UINT8_C(116), UINT8_C(102)) }, { simde_x_mm256_set_epu8(UINT8_C( 59), UINT8_C(202), UINT8_C( 28), UINT8_C( 65), UINT8_C( 60), UINT8_C( 92), UINT8_C(112), UINT8_C(105), UINT8_C(229), UINT8_C(116), UINT8_C(242), UINT8_C(217), UINT8_C(203), UINT8_C( 71), UINT8_C( 15), UINT8_C(143), UINT8_C( 58), UINT8_C(228), UINT8_C( 36), UINT8_C(154), UINT8_C( 96), UINT8_C( 2), UINT8_C( 86), UINT8_C( 36), UINT8_C( 93), UINT8_C( 29), UINT8_C( 70), UINT8_C( 20), UINT8_C(130), UINT8_C(172), UINT8_C(152), UINT8_C(189)), simde_x_mm256_set_epu8(UINT8_C(100), UINT8_C( 42), UINT8_C( 77), UINT8_C( 21), UINT8_C(144), UINT8_C(197), UINT8_C(242), UINT8_C(243), UINT8_C(205), UINT8_C(204), UINT8_C( 75), UINT8_C(102), UINT8_C( 21), UINT8_C(148), UINT8_C( 70), UINT8_C(128), UINT8_C( 95), UINT8_C(147), UINT8_C( 39), UINT8_C(190), UINT8_C( 20), UINT8_C(128), UINT8_C(196), UINT8_C(160), UINT8_C( 8), UINT8_C(206), UINT8_C( 13), UINT8_C(197), UINT8_C( 93), UINT8_C(253), UINT8_C( 16), UINT8_C( 27)), simde_x_mm256_set_epu8(UINT8_C( 80), UINT8_C(122), UINT8_C( 53), UINT8_C( 43), UINT8_C(102), UINT8_C(145), UINT8_C(177), UINT8_C(174), UINT8_C(217), UINT8_C(160), UINT8_C(159), UINT8_C(160), UINT8_C(112), UINT8_C(110), UINT8_C( 43), UINT8_C(136), UINT8_C( 77), UINT8_C(188), UINT8_C( 38), UINT8_C(172), UINT8_C( 58), UINT8_C( 65), UINT8_C(141), UINT8_C( 98), UINT8_C( 51), UINT8_C(118), UINT8_C( 42), UINT8_C(109), UINT8_C(112), UINT8_C(213), UINT8_C( 84), UINT8_C(108)) }, { simde_x_mm256_set_epu8(UINT8_C( 75), UINT8_C( 17), UINT8_C(162), UINT8_C( 64), UINT8_C(129), UINT8_C(250), UINT8_C(112), UINT8_C(166), UINT8_C( 98), UINT8_C(126), UINT8_C(129), UINT8_C(211), UINT8_C( 27), UINT8_C( 12), UINT8_C(183), UINT8_C(140), UINT8_C(106), UINT8_C(255), UINT8_C(252), UINT8_C(224), UINT8_C(116), UINT8_C(208), UINT8_C( 69), UINT8_C( 4), UINT8_C(193), UINT8_C( 46), UINT8_C(111), UINT8_C( 96), UINT8_C(101), UINT8_C(183), UINT8_C( 99), UINT8_C( 60)), simde_x_mm256_set_epu8(UINT8_C( 48), UINT8_C( 27), UINT8_C(253), UINT8_C(118), UINT8_C(225), UINT8_C(134), UINT8_C(250), UINT8_C(133), UINT8_C( 52), UINT8_C( 47), UINT8_C( 27), UINT8_C(213), UINT8_C( 28), UINT8_C(208), UINT8_C( 73), UINT8_C( 89), UINT8_C( 76), UINT8_C(160), UINT8_C( 57), UINT8_C(191), UINT8_C( 34), UINT8_C(121), UINT8_C(194), UINT8_C(205), UINT8_C(102), UINT8_C(106), UINT8_C(175), UINT8_C(219), UINT8_C(174), UINT8_C(128), UINT8_C(137), UINT8_C(235)), simde_x_mm256_set_epu8(UINT8_C( 62), UINT8_C( 22), UINT8_C(208), UINT8_C( 91), UINT8_C(177), UINT8_C(192), UINT8_C(181), UINT8_C(150), UINT8_C( 75), UINT8_C( 87), UINT8_C( 78), UINT8_C(212), UINT8_C( 28), UINT8_C(110), UINT8_C(128), UINT8_C(115), UINT8_C( 91), UINT8_C(208), UINT8_C(155), UINT8_C(208), UINT8_C( 75), UINT8_C(165), UINT8_C(132), UINT8_C(105), UINT8_C(148), UINT8_C( 76), UINT8_C(143), UINT8_C(158), UINT8_C(138), UINT8_C(156), UINT8_C(118), UINT8_C(148)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_avg_epu8(test_vec[i].a, test_vec[i].b); simde_assert_m256i_u8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_avg_epu16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_x_mm256_set_epu16(UINT16_C( 33977), UINT16_C( 4075), UINT16_C( 26120), UINT16_C( 61365), UINT16_C( 20891), UINT16_C( 60607), UINT16_C( 34113), UINT16_C( 18728), UINT16_C( 46422), UINT16_C( 18901), UINT16_C( 21788), UINT16_C( 13793), UINT16_C( 55681), UINT16_C( 17591), UINT16_C( 59483), UINT16_C( 1153)), simde_x_mm256_set_epu16(UINT16_C( 51090), UINT16_C( 18817), UINT16_C( 13598), UINT16_C( 45820), UINT16_C( 32044), UINT16_C( 17235), UINT16_C( 13391), UINT16_C( 61302), UINT16_C( 25625), UINT16_C( 19022), UINT16_C( 23185), UINT16_C( 30419), UINT16_C( 7455), UINT16_C( 32821), UINT16_C( 5103), UINT16_C( 46444)), simde_x_mm256_set_epu16(UINT16_C( 42534), UINT16_C( 11446), UINT16_C( 19859), UINT16_C( 53593), UINT16_C( 26468), UINT16_C( 38921), UINT16_C( 23752), UINT16_C( 40015), UINT16_C( 36024), UINT16_C( 18962), UINT16_C( 22487), UINT16_C( 22106), UINT16_C( 31568), UINT16_C( 25206), UINT16_C( 32293), UINT16_C( 23799)) }, { simde_x_mm256_set_epu16(UINT16_C( 53641), UINT16_C( 40905), UINT16_C( 36475), UINT16_C( 59858), UINT16_C( 46081), UINT16_C( 4869), UINT16_C( 9163), UINT16_C( 48410), UINT16_C( 39308), UINT16_C( 16016), UINT16_C( 5245), UINT16_C( 47493), UINT16_C( 23283), UINT16_C( 4725), UINT16_C( 26334), UINT16_C( 6924)), simde_x_mm256_set_epu16(UINT16_C( 51610), UINT16_C( 19346), UINT16_C( 21642), UINT16_C( 28178), UINT16_C( 36499), UINT16_C( 49283), UINT16_C( 38888), UINT16_C( 45064), UINT16_C( 34075), UINT16_C( 13347), UINT16_C( 29332), UINT16_C( 60793), UINT16_C( 53601), UINT16_C( 62027), UINT16_C( 49726), UINT16_C( 62166)), simde_x_mm256_set_epu16(UINT16_C( 52626), UINT16_C( 30126), UINT16_C( 29059), UINT16_C( 44018), UINT16_C( 41290), UINT16_C( 27076), UINT16_C( 24026), UINT16_C( 46737), UINT16_C( 36692), UINT16_C( 14682), UINT16_C( 17289), UINT16_C( 54143), UINT16_C( 38442), UINT16_C( 33376), UINT16_C( 38030), UINT16_C( 34545)) }, { simde_x_mm256_set_epu16(UINT16_C( 57168), UINT16_C( 24377), UINT16_C( 44302), UINT16_C( 19279), UINT16_C( 52773), UINT16_C( 32225), UINT16_C( 44153), UINT16_C( 11257), UINT16_C( 31235), UINT16_C( 6604), UINT16_C( 39200), UINT16_C( 62778), UINT16_C( 54132), UINT16_C( 22500), UINT16_C( 35787), UINT16_C( 27352)), simde_x_mm256_set_epu16(UINT16_C( 4777), UINT16_C( 11876), UINT16_C( 62982), UINT16_C( 53472), UINT16_C( 34552), UINT16_C( 23283), UINT16_C( 24536), UINT16_C( 59637), UINT16_C( 9078), UINT16_C( 5999), UINT16_C( 35073), UINT16_C( 11220), UINT16_C( 36751), UINT16_C( 27378), UINT16_C( 48206), UINT16_C( 24113)), simde_x_mm256_set_epu16(UINT16_C( 30973), UINT16_C( 18127), UINT16_C( 53642), UINT16_C( 36376), UINT16_C( 43663), UINT16_C( 27754), UINT16_C( 34345), UINT16_C( 35447), UINT16_C( 20157), UINT16_C( 6302), UINT16_C( 37137), UINT16_C( 36999), UINT16_C( 45442), UINT16_C( 24939), UINT16_C( 41997), UINT16_C( 25733)) }, { simde_x_mm256_set_epu16(UINT16_C( 47667), UINT16_C( 42655), UINT16_C( 15805), UINT16_C( 38044), UINT16_C( 51003), UINT16_C( 54805), UINT16_C( 23576), UINT16_C( 8993), UINT16_C( 7045), UINT16_C( 2418), UINT16_C( 43598), UINT16_C( 38347), UINT16_C( 54380), UINT16_C( 29913), UINT16_C( 26304), UINT16_C( 57186)), simde_x_mm256_set_epu16(UINT16_C( 13249), UINT16_C( 33246), UINT16_C( 37681), UINT16_C( 53958), UINT16_C( 49310), UINT16_C( 12761), UINT16_C( 5303), UINT16_C( 54599), UINT16_C( 42076), UINT16_C( 30225), UINT16_C( 60443), UINT16_C( 41570), UINT16_C( 50311), UINT16_C( 1964), UINT16_C( 59695), UINT16_C( 38784)), simde_x_mm256_set_epu16(UINT16_C( 30458), UINT16_C( 37951), UINT16_C( 26743), UINT16_C( 46001), UINT16_C( 50157), UINT16_C( 33783), UINT16_C( 14440), UINT16_C( 31796), UINT16_C( 24561), UINT16_C( 16322), UINT16_C( 52021), UINT16_C( 39959), UINT16_C( 52346), UINT16_C( 15939), UINT16_C( 43000), UINT16_C( 47985)) }, { simde_x_mm256_set_epu16(UINT16_C( 46733), UINT16_C( 23899), UINT16_C( 189), UINT16_C( 55261), UINT16_C( 27111), UINT16_C( 15840), UINT16_C( 17492), UINT16_C( 63447), UINT16_C( 32197), UINT16_C( 17766), UINT16_C( 55928), UINT16_C( 29103), UINT16_C( 34337), UINT16_C( 27253), UINT16_C( 33273), UINT16_C( 49734)), simde_x_mm256_set_epu16(UINT16_C( 35099), UINT16_C( 4446), UINT16_C( 62606), UINT16_C( 36400), UINT16_C( 14041), UINT16_C( 53626), UINT16_C( 446), UINT16_C( 15354), UINT16_C( 46000), UINT16_C( 42809), UINT16_C( 23055), UINT16_C( 8076), UINT16_C( 53608), UINT16_C( 35738), UINT16_C( 14840), UINT16_C( 57665)), simde_x_mm256_set_epu16(UINT16_C( 40916), UINT16_C( 14173), UINT16_C( 31398), UINT16_C( 45831), UINT16_C( 20576), UINT16_C( 34733), UINT16_C( 8969), UINT16_C( 39401), UINT16_C( 39099), UINT16_C( 30288), UINT16_C( 39492), UINT16_C( 18590), UINT16_C( 43973), UINT16_C( 31496), UINT16_C( 24057), UINT16_C( 53700)) }, { simde_x_mm256_set_epu16(UINT16_C( 32242), UINT16_C( 8824), UINT16_C( 27338), UINT16_C( 25661), UINT16_C( 27025), UINT16_C( 11905), UINT16_C( 53305), UINT16_C( 21013), UINT16_C( 15201), UINT16_C( 52740), UINT16_C( 46673), UINT16_C( 52220), UINT16_C( 28444), UINT16_C( 53817), UINT16_C( 54908), UINT16_C( 35186)), simde_x_mm256_set_epu16(UINT16_C( 53308), UINT16_C( 13117), UINT16_C( 63947), UINT16_C( 17859), UINT16_C( 4163), UINT16_C( 61940), UINT16_C( 55753), UINT16_C( 26704), UINT16_C( 7720), UINT16_C( 17752), UINT16_C( 21389), UINT16_C( 56750), UINT16_C( 42354), UINT16_C( 27434), UINT16_C( 21249), UINT16_C( 24409)), simde_x_mm256_set_epu16(UINT16_C( 42775), UINT16_C( 10971), UINT16_C( 45643), UINT16_C( 21760), UINT16_C( 15594), UINT16_C( 36923), UINT16_C( 54529), UINT16_C( 23859), UINT16_C( 11461), UINT16_C( 35246), UINT16_C( 34031), UINT16_C( 54485), UINT16_C( 35399), UINT16_C( 40626), UINT16_C( 38079), UINT16_C( 29798)) }, { simde_x_mm256_set_epu16(UINT16_C( 15306), UINT16_C( 7233), UINT16_C( 15452), UINT16_C( 28777), UINT16_C( 58740), UINT16_C( 62169), UINT16_C( 52039), UINT16_C( 3983), UINT16_C( 15076), UINT16_C( 9370), UINT16_C( 24578), UINT16_C( 22052), UINT16_C( 23837), UINT16_C( 17940), UINT16_C( 33452), UINT16_C( 39101)), simde_x_mm256_set_epu16(UINT16_C( 25642), UINT16_C( 19733), UINT16_C( 37061), UINT16_C( 62195), UINT16_C( 52684), UINT16_C( 19302), UINT16_C( 5524), UINT16_C( 18048), UINT16_C( 24467), UINT16_C( 10174), UINT16_C( 5248), UINT16_C( 50336), UINT16_C( 2254), UINT16_C( 3525), UINT16_C( 24061), UINT16_C( 4123)), simde_x_mm256_set_epu16(UINT16_C( 20474), UINT16_C( 13483), UINT16_C( 26257), UINT16_C( 45486), UINT16_C( 55712), UINT16_C( 40736), UINT16_C( 28782), UINT16_C( 11016), UINT16_C( 19772), UINT16_C( 9772), UINT16_C( 14913), UINT16_C( 36194), UINT16_C( 13046), UINT16_C( 10733), UINT16_C( 28757), UINT16_C( 21612)) }, { simde_x_mm256_set_epu16(UINT16_C( 19217), UINT16_C( 41536), UINT16_C( 33274), UINT16_C( 28838), UINT16_C( 25214), UINT16_C( 33235), UINT16_C( 6924), UINT16_C( 46988), UINT16_C( 27391), UINT16_C( 64736), UINT16_C( 29904), UINT16_C( 17668), UINT16_C( 49454), UINT16_C( 28512), UINT16_C( 26039), UINT16_C( 25404)), simde_x_mm256_set_epu16(UINT16_C( 12315), UINT16_C( 64886), UINT16_C( 57734), UINT16_C( 64133), UINT16_C( 13359), UINT16_C( 7125), UINT16_C( 7376), UINT16_C( 18777), UINT16_C( 19616), UINT16_C( 14783), UINT16_C( 8825), UINT16_C( 49869), UINT16_C( 26218), UINT16_C( 45019), UINT16_C( 44672), UINT16_C( 35307)), simde_x_mm256_set_epu16(UINT16_C( 15766), UINT16_C( 53211), UINT16_C( 45504), UINT16_C( 46486), UINT16_C( 19287), UINT16_C( 20180), UINT16_C( 7150), UINT16_C( 32883), UINT16_C( 23504), UINT16_C( 39760), UINT16_C( 19365), UINT16_C( 33769), UINT16_C( 37836), UINT16_C( 36766), UINT16_C( 35356), UINT16_C( 30356)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_avg_epu16(test_vec[i].a, test_vec[i].b); simde_assert_m256i_u16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_blend_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi16(INT16_C( -9012), INT16_C( 17188), INT16_C( 20170), INT16_C( -6948), INT16_C( 9138), INT16_C( 24690), INT16_C( -6761), INT16_C( -2618), INT16_C( 30583), INT16_C( 3343), INT16_C( -2458), INT16_C( 32235), INT16_C(-14188), INT16_C( 15906), INT16_C(-17823), INT16_C( 7319)), simde_mm256_set_epi16(INT16_C( -5752), INT16_C(-23668), INT16_C(-25159), INT16_C(-19782), INT16_C( 28185), INT16_C(-14343), INT16_C(-18599), INT16_C( 827), INT16_C( -4902), INT16_C( 2482), INT16_C( 14836), INT16_C(-27028), INT16_C( 23821), INT16_C( -1365), INT16_C( -4235), INT16_C( -888)), simde_mm256_set_epi16(INT16_C( -9012), INT16_C( 17188), INT16_C( 20170), INT16_C( -6948), INT16_C( 28185), INT16_C(-14343), INT16_C( -6761), INT16_C( 827), INT16_C( 30583), INT16_C( 3343), INT16_C( -2458), INT16_C( 32235), INT16_C( 23821), INT16_C( -1365), INT16_C(-17823), INT16_C( -888)) }, { simde_mm256_set_epi16(INT16_C( 2208), INT16_C( 24143), INT16_C( 20623), INT16_C( -5907), INT16_C( 4359), INT16_C( 6016), INT16_C( 2606), INT16_C(-17968), INT16_C( 24878), INT16_C(-20974), INT16_C( 11542), INT16_C( 18923), INT16_C( 32276), INT16_C(-26730), INT16_C( 20467), INT16_C(-30404)), simde_mm256_set_epi16(INT16_C( 27588), INT16_C(-23388), INT16_C( 31848), INT16_C( 22463), INT16_C(-31474), INT16_C(-14474), INT16_C( 2006), INT16_C(-32634), INT16_C( 32036), INT16_C(-13145), INT16_C(-30339), INT16_C(-22528), INT16_C( 17597), INT16_C( 17800), INT16_C( 16042), INT16_C( 23637)), simde_mm256_set_epi16(INT16_C( 2208), INT16_C( 24143), INT16_C( 20623), INT16_C( -5907), INT16_C(-31474), INT16_C(-14474), INT16_C( 2606), INT16_C(-32634), INT16_C( 24878), INT16_C(-20974), INT16_C( 11542), INT16_C( 18923), INT16_C( 17597), INT16_C( 17800), INT16_C( 20467), INT16_C( 23637)) }, { simde_mm256_set_epi16(INT16_C(-30302), INT16_C( -3491), INT16_C(-12187), INT16_C( -9390), INT16_C( -5875), INT16_C( 4739), INT16_C( 19577), INT16_C( 7526), INT16_C( 31347), INT16_C( 10086), INT16_C(-16595), INT16_C( 27410), INT16_C( 1134), INT16_C(-11806), INT16_C(-16010), INT16_C(-25451)), simde_mm256_set_epi16(INT16_C(-29901), INT16_C( 18434), INT16_C( -841), INT16_C( 28759), INT16_C( -1918), INT16_C( 2817), INT16_C(-31249), INT16_C( 6853), INT16_C( 24735), INT16_C(-25824), INT16_C( -1496), INT16_C( 12880), INT16_C( 11586), INT16_C( 24977), INT16_C( 22341), INT16_C(-21470)), simde_mm256_set_epi16(INT16_C(-30302), INT16_C( -3491), INT16_C(-12187), INT16_C( -9390), INT16_C( -1918), INT16_C( 2817), INT16_C( 19577), INT16_C( 6853), INT16_C( 31347), INT16_C( 10086), INT16_C(-16595), INT16_C( 27410), INT16_C( 11586), INT16_C( 24977), INT16_C(-16010), INT16_C(-21470)) }, { simde_mm256_set_epi16(INT16_C( 17074), INT16_C(-20924), INT16_C( 13898), INT16_C( 20227), INT16_C( 12334), INT16_C(-15702), INT16_C( 28564), INT16_C(-15082), INT16_C(-19676), INT16_C( 796), INT16_C( 13442), INT16_C( -9023), INT16_C( 10428), INT16_C( 21588), INT16_C(-25545), INT16_C( 22589)), simde_mm256_set_epi16(INT16_C( 13365), INT16_C(-16397), INT16_C(-14658), INT16_C( 8081), INT16_C( 4626), INT16_C(-31038), INT16_C(-27498), INT16_C( -1797), INT16_C(-14919), INT16_C( 31584), INT16_C( 32162), INT16_C( 21664), INT16_C( 32327), INT16_C( 9046), INT16_C( 29457), INT16_C( 18165)), simde_mm256_set_epi16(INT16_C( 17074), INT16_C(-20924), INT16_C( 13898), INT16_C( 20227), INT16_C( 4626), INT16_C(-31038), INT16_C( 28564), INT16_C( -1797), INT16_C(-19676), INT16_C( 796), INT16_C( 13442), INT16_C( -9023), INT16_C( 32327), INT16_C( 9046), INT16_C(-25545), INT16_C( 18165)) }, { simde_mm256_set_epi16(INT16_C(-28976), INT16_C(-17452), INT16_C(-30835), INT16_C(-11288), INT16_C( 23746), INT16_C(-12398), INT16_C( -9605), INT16_C( 914), INT16_C( -6067), INT16_C( 4660), INT16_C( 15780), INT16_C( 30375), INT16_C(-32484), INT16_C( 23271), INT16_C(-15980), INT16_C( 3969)), simde_mm256_set_epi16(INT16_C(-14502), INT16_C(-26489), INT16_C( -6738), INT16_C( -1193), INT16_C( 15756), INT16_C(-12605), INT16_C(-12710), INT16_C( -8558), INT16_C( 19027), INT16_C(-19772), INT16_C( 23814), INT16_C(-30071), INT16_C(-29678), INT16_C( 31649), INT16_C( 4669), INT16_C( -4491)), simde_mm256_set_epi16(INT16_C(-28976), INT16_C(-17452), INT16_C(-30835), INT16_C(-11288), INT16_C( 15756), INT16_C(-12605), INT16_C( -9605), INT16_C( -8558), INT16_C( -6067), INT16_C( 4660), INT16_C( 15780), INT16_C( 30375), INT16_C(-29678), INT16_C( 31649), INT16_C(-15980), INT16_C( -4491)) }, { simde_mm256_set_epi16(INT16_C( 16416), INT16_C(-25375), INT16_C(-21092), INT16_C(-20302), INT16_C(-10725), INT16_C(-20142), INT16_C( -4818), INT16_C(-14140), INT16_C(-13625), INT16_C(-24584), INT16_C( 6087), INT16_C(-31850), INT16_C(-29507), INT16_C( 7132), INT16_C( -6862), INT16_C( 26102)), simde_mm256_set_epi16(INT16_C( 3513), INT16_C(-30455), INT16_C(-14215), INT16_C(-31390), INT16_C( 22371), INT16_C(-30450), INT16_C(-14197), INT16_C( -3991), INT16_C( 25198), INT16_C( -1251), INT16_C( -4992), INT16_C(-16295), INT16_C( 23622), INT16_C( 28506), INT16_C(-16087), INT16_C(-18392)), simde_mm256_set_epi16(INT16_C( 16416), INT16_C(-25375), INT16_C(-21092), INT16_C(-20302), INT16_C( 22371), INT16_C(-30450), INT16_C( -4818), INT16_C( -3991), INT16_C(-13625), INT16_C(-24584), INT16_C( 6087), INT16_C(-31850), INT16_C( 23622), INT16_C( 28506), INT16_C( -6862), INT16_C(-18392)) }, { simde_mm256_set_epi16(INT16_C( -2375), INT16_C( 3031), INT16_C( 26231), INT16_C( 5999), INT16_C(-10519), INT16_C( 21791), INT16_C( 3889), INT16_C( 28062), INT16_C(-23674), INT16_C(-25444), INT16_C( 16907), INT16_C( 20389), INT16_C(-22712), INT16_C( 486), INT16_C( -2776), INT16_C(-21644)), simde_mm256_set_epi16(INT16_C(-29652), INT16_C( 489), INT16_C( -7346), INT16_C(-13391), INT16_C( 21827), INT16_C( 9877), INT16_C( 7842), INT16_C(-13219), INT16_C( 12847), INT16_C( 31187), INT16_C( -8174), INT16_C( -7953), INT16_C( 8071), INT16_C(-19051), INT16_C( 30976), INT16_C( 20848)), simde_mm256_set_epi16(INT16_C( -2375), INT16_C( 3031), INT16_C( 26231), INT16_C( 5999), INT16_C( 21827), INT16_C( 9877), INT16_C( 3889), INT16_C(-13219), INT16_C(-23674), INT16_C(-25444), INT16_C( 16907), INT16_C( 20389), INT16_C( 8071), INT16_C(-19051), INT16_C( -2776), INT16_C( 20848)) }, { simde_mm256_set_epi16(INT16_C( 13214), INT16_C(-27703), INT16_C( 6386), INT16_C( 5153), INT16_C( 26096), INT16_C( 8476), INT16_C( 10527), INT16_C(-23224), INT16_C( 23690), INT16_C( 9355), INT16_C( 1283), INT16_C(-29402), INT16_C( 22593), INT16_C(-12032), INT16_C( -8259), INT16_C( 13457)), simde_mm256_set_epi16(INT16_C(-25352), INT16_C( 21231), INT16_C(-11795), INT16_C( 17700), INT16_C(-24048), INT16_C(-11558), INT16_C( -1645), INT16_C( 21362), INT16_C( 18474), INT16_C( 30559), INT16_C( -790), INT16_C( 30067), INT16_C( 3488), INT16_C( 3834), INT16_C( 2645), INT16_C(-14787)), simde_mm256_set_epi16(INT16_C( 13214), INT16_C(-27703), INT16_C( 6386), INT16_C( 5153), INT16_C(-24048), INT16_C(-11558), INT16_C( 10527), INT16_C( 21362), INT16_C( 23690), INT16_C( 9355), INT16_C( 1283), INT16_C(-29402), INT16_C( 3488), INT16_C( 3834), INT16_C( -8259), INT16_C(-14787)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_blend_epi16(test_vec[i].a, test_vec[i].b, 13); simde_assert_m256i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_blend_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C( 67571941), INT32_C(-1405773426), INT32_C( 1540271825), INT32_C( 2065572299), INT32_C( -582398487), INT32_C( 1269568238), INT32_C( -277360429), INT32_C( 355946014)), simde_mm256_set_epi32(INT32_C(-1175528322), INT32_C( -128390122), INT32_C( 1870386786), INT32_C( 1369967555), INT32_C( 417868105), INT32_C( 128490599), INT32_C( 1283738263), INT32_C( -937782732)), simde_mm256_set_epi32(INT32_C( 67571941), INT32_C(-1405773426), INT32_C( 1540271825), INT32_C( 2065572299), INT32_C( 417868105), INT32_C( 128490599), INT32_C( -277360429), INT32_C( -937782732)) }, { simde_mm256_set_epi32(INT32_C( 1471616457), INT32_C( 153196965), INT32_C( 177786947), INT32_C(-1953704859), INT32_C(-2052334624), INT32_C(-1985445584), INT32_C( 389650479), INT32_C(-1304302924)), simde_mm256_set_epi32(INT32_C( -934897433), INT32_C( 1646862966), INT32_C(-1085276514), INT32_C( 112227015), INT32_C(-1389537102), INT32_C( 687724210), INT32_C( 1265543631), INT32_C( 346850755)), simde_mm256_set_epi32(INT32_C( 1471616457), INT32_C( 153196965), INT32_C( 177786947), INT32_C(-1953704859), INT32_C(-1389537102), INT32_C( 687724210), INT32_C( 389650479), INT32_C( 346850755)) }, { simde_mm256_set_epi32(INT32_C(-1682060225), INT32_C( 867867583), INT32_C( 925546319), INT32_C( 1379938785), INT32_C( 653018322), INT32_C( -687296073), INT32_C( -911101701), INT32_C( 1547072378)), simde_mm256_set_epi32(INT32_C( 1176167258), INT32_C( -301183666), INT32_C( -466020487), INT32_C( 52703344), INT32_C( 1233020389), INT32_C( 1117532027), INT32_C( 1899739665), INT32_C(-2043295118)), simde_mm256_set_epi32(INT32_C(-1682060225), INT32_C( 867867583), INT32_C( 925546319), INT32_C( 1379938785), INT32_C( 1233020389), INT32_C( 1117532027), INT32_C( -911101701), INT32_C(-2043295118)) }, { simde_mm256_set_epi32(INT32_C( 359138398), INT32_C( -860526519), INT32_C( 1692947884), INT32_C( 772823662), INT32_C( -270939677), INT32_C( 1412661540), INT32_C( 1070011153), INT32_C( 771375046)), simde_mm256_set_epi32(INT32_C( -974034130), INT32_C( 37087187), INT32_C( -871436522), INT32_C( 33095078), INT32_C( 715849450), INT32_C(-1345812415), INT32_C( -45115049), INT32_C( 1960320081)), simde_mm256_set_epi32(INT32_C( 359138398), INT32_C( -860526519), INT32_C( 1692947884), INT32_C( 772823662), INT32_C( 715849450), INT32_C(-1345812415), INT32_C( 1070011153), INT32_C( 1960320081)) }, { simde_mm256_set_epi32(INT32_C( -426383461), INT32_C( -768942960), INT32_C( -264677869), INT32_C( -822820045), INT32_C( 1890345084), INT32_C(-2046745025), INT32_C( -207573670), INT32_C( 1399666591)), simde_mm256_set_epi32(INT32_C( 232105709), INT32_C(-1583898310), INT32_C( 1161298300), INT32_C( 169359829), INT32_C( 621794425), INT32_C( 607256107), INT32_C( 1099667121), INT32_C( -184390486)), simde_mm256_set_epi32(INT32_C( -426383461), INT32_C( -768942960), INT32_C( -264677869), INT32_C( -822820045), INT32_C( 621794425), INT32_C( 607256107), INT32_C( -207573670), INT32_C( -184390486)) }, { simde_mm256_set_epi32(INT32_C(-1564290184), INT32_C( -240378472), INT32_C( 1142270593), INT32_C( 1000191111), INT32_C( 20701140), INT32_C( 37555352), INT32_C( -694404400), INT32_C( 1055280730)), simde_mm256_set_epi32(INT32_C( 1835031057), INT32_C( 2079483638), INT32_C( 1962415366), INT32_C( -373228817), INT32_C( 142245442), INT32_C( 51427720), INT32_C( 1717201652), INT32_C( 1177983710)), simde_mm256_set_epi32(INT32_C(-1564290184), INT32_C( -240378472), INT32_C( 1142270593), INT32_C( 1000191111), INT32_C( 142245442), INT32_C( 51427720), INT32_C( -694404400), INT32_C( 1177983710)) }, { simde_mm256_set_epi32(INT32_C(-1384452546), INT32_C( -108099055), INT32_C( -3256672), INT32_C(-2139665218), INT32_C( -280826539), INT32_C( -885573478), INT32_C( 2104257473), INT32_C( 1279376382)), simde_mm256_set_epi32(INT32_C( 1706246197), INT32_C(-1331652281), INT32_C( 1192842905), INT32_C( -885790109), INT32_C(-1010846518), INT32_C( -536721191), INT32_C( 1967911533), INT32_C( 1933417937)), simde_mm256_set_epi32(INT32_C(-1384452546), INT32_C( -108099055), INT32_C( -3256672), INT32_C(-2139665218), INT32_C(-1010846518), INT32_C( -536721191), INT32_C( 2104257473), INT32_C( 1933417937)) }, { simde_mm256_set_epi32(INT32_C( 1532802072), INT32_C( 125283422), INT32_C(-1578036874), INT32_C( 445027764), INT32_C( -409254011), INT32_C( 1098938926), INT32_C(-1086732528), INT32_C( -812360922)), simde_mm256_set_epi32(INT32_C( 1974078859), INT32_C( 2037136311), INT32_C( 1463878416), INT32_C( 122656324), INT32_C(-2126065903), INT32_C(-1726635542), INT32_C(-1755031182), INT32_C( 1725515904)), simde_mm256_set_epi32(INT32_C( 1532802072), INT32_C( 125283422), INT32_C(-1578036874), INT32_C( 445027764), INT32_C(-2126065903), INT32_C(-1726635542), INT32_C(-1086732528), INT32_C( 1725515904)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_blend_epi32(test_vec[i].a, test_vec[i].b, 13); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_blendv_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i i; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi8(INT8_C( 119), INT8_C( 14), INT8_C( 127), INT8_C(-117), INT8_C( -92), INT8_C( 49), INT8_C( -15), INT8_C( -64), INT8_C( 66), INT8_C(-125), INT8_C( 43), INT8_C( 6), INT8_C(-117), INT8_C( 95), INT8_C( 4), INT8_C( 100), INT8_C( 82), INT8_C( -66), INT8_C( 57), INT8_C( -98), INT8_C( -94), INT8_C( 7), INT8_C( 58), INT8_C(-109), INT8_C( -21), INT8_C( -88), INT8_C( 12), INT8_C( 65), INT8_C(-108), INT8_C( -25), INT8_C( -96), INT8_C( -4)), simde_mm256_set_epi8(INT8_C( -68), INT8_C( -8), INT8_C( 57), INT8_C( 103), INT8_C( 55), INT8_C( 120), INT8_C( 1), INT8_C( 68), INT8_C( 85), INT8_C(-119), INT8_C( -3), INT8_C( 60), INT8_C(-115), INT8_C( -64), INT8_C( 126), INT8_C( -67), INT8_C( 11), INT8_C( 73), INT8_C( 97), INT8_C( -47), INT8_C( 73), INT8_C( -17), INT8_C( -4), INT8_C( 19), INT8_C( -66), INT8_C( -1), INT8_C( 84), INT8_C( -63), INT8_C( 58), INT8_C( 114), INT8_C( 107), INT8_C( -13)), simde_mm256_set_epi8(INT8_C( -94), INT8_C( -13), INT8_C( -63), INT8_C( 48), INT8_C( 72), INT8_C( -33), INT8_C(-118), INT8_C( 86), INT8_C( 17), INT8_C( 112), INT8_C( 9), INT8_C( 106), INT8_C( 115), INT8_C( -15), INT8_C( 74), INT8_C( 5), INT8_C( 17), INT8_C( -31), INT8_C(-120), INT8_C( 18), INT8_C( 82), INT8_C( -67), INT8_C(-101), INT8_C(-109), INT8_C( 81), INT8_C(-126), INT8_C( -8), INT8_C( 4), INT8_C( 123), INT8_C( 125), INT8_C( 70), INT8_C( 66)), simde_mm256_set_epi8(INT8_C( -68), INT8_C( -8), INT8_C( 57), INT8_C(-117), INT8_C( -92), INT8_C( 120), INT8_C( 1), INT8_C( -64), INT8_C( 66), INT8_C(-125), INT8_C( 43), INT8_C( 6), INT8_C(-117), INT8_C( -64), INT8_C( 4), INT8_C( 100), INT8_C( 82), INT8_C( 73), INT8_C( 97), INT8_C( -98), INT8_C( -94), INT8_C( -17), INT8_C( -4), INT8_C( 19), INT8_C( -21), INT8_C( -1), INT8_C( 84), INT8_C( 65), INT8_C(-108), INT8_C( -25), INT8_C( -96), INT8_C( -4)) }, { simde_mm256_set_epi8(INT8_C( -95), INT8_C( 48), INT8_C(-106), INT8_C( 2), INT8_C( -98), INT8_C( 42), INT8_C( 111), INT8_C( -63), INT8_C( 63), INT8_C( 60), INT8_C(-104), INT8_C(-108), INT8_C( -18), INT8_C( -88), INT8_C( 55), INT8_C( 21), INT8_C(-103), INT8_C( 114), INT8_C( -80), INT8_C( 89), INT8_C( 60), INT8_C( 45), INT8_C( -20), INT8_C( 79), INT8_C(-112), INT8_C( -14), INT8_C( -11), INT8_C( 117), INT8_C( 14), INT8_C( -21), INT8_C( 11), INT8_C(-119)), simde_mm256_set_epi8(INT8_C( 7), INT8_C( -82), INT8_C( -49), INT8_C( 6), INT8_C(-108), INT8_C(-115), INT8_C( -95), INT8_C( -53), INT8_C( 6), INT8_C( 28), INT8_C( 47), INT8_C( 86), INT8_C( -76), INT8_C(-114), INT8_C( 114), INT8_C( 94), INT8_C( 90), INT8_C( 13), INT8_C( 121), INT8_C(-123), INT8_C( -33), INT8_C( -67), INT8_C( 30), INT8_C( -90), INT8_C( -5), INT8_C( 42), INT8_C( 102), INT8_C( 53), INT8_C( 95), INT8_C( 56), INT8_C( 1), INT8_C( -15)), simde_mm256_set_epi8(INT8_C(-117), INT8_C( -35), INT8_C( 72), INT8_C( -41), INT8_C(-118), INT8_C( 110), INT8_C( 61), INT8_C( 45), INT8_C(-112), INT8_C( 112), INT8_C( 46), INT8_C( -75), INT8_C( 71), INT8_C( -85), INT8_C( -58), INT8_C( 104), INT8_C(-126), INT8_C(-109), INT8_C( 86), INT8_C( 70), INT8_C( -85), INT8_C( 113), INT8_C( 90), INT8_C( 35), INT8_C( -48), INT8_C( -83), INT8_C( 16), INT8_C( 55), INT8_C( -13), INT8_C( 42), INT8_C( -55), INT8_C( -15)), simde_mm256_set_epi8(INT8_C( 7), INT8_C( -82), INT8_C(-106), INT8_C( 6), INT8_C(-108), INT8_C( 42), INT8_C( 111), INT8_C( -63), INT8_C( 6), INT8_C( 60), INT8_C(-104), INT8_C( 86), INT8_C( -18), INT8_C(-114), INT8_C( 114), INT8_C( 21), INT8_C( 90), INT8_C( 13), INT8_C( -80), INT8_C( 89), INT8_C( -33), INT8_C( 45), INT8_C( -20), INT8_C( 79), INT8_C( -5), INT8_C( 42), INT8_C( -11), INT8_C( 117), INT8_C( 95), INT8_C( -21), INT8_C( 1), INT8_C( -15)) }, { simde_mm256_set_epi8(INT8_C( 12), INT8_C( 118), INT8_C(-120), INT8_C( 17), INT8_C( 61), INT8_C( 24), INT8_C(-114), INT8_C(-117), INT8_C( -20), INT8_C(-118), INT8_C( 93), INT8_C( 106), INT8_C( -99), INT8_C( -8), INT8_C( 114), INT8_C(-115), INT8_C( -65), INT8_C( -34), INT8_C( 66), INT8_C( 62), INT8_C( 113), INT8_C( -44), INT8_C( 109), INT8_C( 20), INT8_C( 122), INT8_C( -58), INT8_C( 7), INT8_C(-123), INT8_C( 18), INT8_C( 89), INT8_C( -84), INT8_C(-114)), simde_mm256_set_epi8(INT8_C( -67), INT8_C( -15), INT8_C( -72), INT8_C( -39), INT8_C( -90), INT8_C( 108), INT8_C( 93), INT8_C( -36), INT8_C( -64), INT8_C( 36), INT8_C( -98), INT8_C( 76), INT8_C( 123), INT8_C( -40), INT8_C( -48), INT8_C(-111), INT8_C(-100), INT8_C( 76), INT8_C( 114), INT8_C( 51), INT8_C( 72), INT8_C( 51), INT8_C( 101), INT8_C(-128), INT8_C( -68), INT8_C( 35), INT8_C( -50), INT8_C(-100), INT8_C( 85), INT8_C(-109), INT8_C( -86), INT8_C( 43)), simde_mm256_set_epi8(INT8_C( 21), INT8_C( 44), INT8_C( 93), INT8_C( -14), INT8_C( 76), INT8_C( 78), INT8_C( 101), INT8_C( 40), INT8_C( 126), INT8_C( -37), INT8_C( 115), INT8_C( 17), INT8_C( 43), INT8_C( -71), INT8_C(-123), INT8_C( -85), INT8_C( -19), INT8_C( 27), INT8_C( 72), INT8_C( 57), INT8_C( 118), INT8_C( -19), INT8_C( -66), INT8_C( 124), INT8_C( -52), INT8_C( -69), INT8_C( 32), INT8_C( 64), INT8_C( -16), INT8_C(-103), INT8_C( -39), INT8_C( 121)), simde_mm256_set_epi8(INT8_C( 12), INT8_C( 118), INT8_C(-120), INT8_C( -39), INT8_C( 61), INT8_C( 24), INT8_C(-114), INT8_C(-117), INT8_C( -20), INT8_C( 36), INT8_C( 93), INT8_C( 106), INT8_C( -99), INT8_C( -40), INT8_C( -48), INT8_C(-111), INT8_C(-100), INT8_C( -34), INT8_C( 66), INT8_C( 62), INT8_C( 113), INT8_C( 51), INT8_C( 101), INT8_C( 20), INT8_C( -68), INT8_C( 35), INT8_C( 7), INT8_C(-123), INT8_C( 85), INT8_C(-109), INT8_C( -86), INT8_C(-114)) }, { simde_mm256_set_epi8(INT8_C( -23), INT8_C( 109), INT8_C( 45), INT8_C( -63), INT8_C( 54), INT8_C( -58), INT8_C( -11), INT8_C( 18), INT8_C( 27), INT8_C( -68), INT8_C(-102), INT8_C( 76), INT8_C( -97), INT8_C( 26), INT8_C( 10), INT8_C( -41), INT8_C( 50), INT8_C( 54), INT8_C(-128), INT8_C( 77), INT8_C( -33), INT8_C( -46), INT8_C( 118), INT8_C( 76), INT8_C( -72), INT8_C( 106), INT8_C( -54), INT8_C( 28), INT8_C( 86), INT8_C( 37), INT8_C( -1), INT8_C( 83)), simde_mm256_set_epi8(INT8_C( 123), INT8_C( 58), INT8_C( 98), INT8_C( 125), INT8_C( 118), INT8_C(-119), INT8_C( -36), INT8_C( 78), INT8_C( 0), INT8_C( 77), INT8_C( 67), INT8_C( 118), INT8_C( 22), INT8_C( -94), INT8_C( -70), INT8_C( 21), INT8_C( 65), INT8_C( 89), INT8_C( 34), INT8_C( 21), INT8_C( 65), INT8_C( -53), INT8_C( -43), INT8_C( -55), INT8_C( -12), INT8_C( -79), INT8_C(-123), INT8_C( 80), INT8_C( -58), INT8_C( -76), INT8_C( -82), INT8_C( 16)), simde_mm256_set_epi8(INT8_C( -15), INT8_C( -31), INT8_C( 124), INT8_C( 18), INT8_C( 63), INT8_C(-119), INT8_C( -84), INT8_C( -47), INT8_C( 110), INT8_C( 99), INT8_C( -7), INT8_C( -78), INT8_C( 122), INT8_C( 9), INT8_C( 54), INT8_C(-112), INT8_C(-100), INT8_C( -9), INT8_C(-127), INT8_C( 67), INT8_C( -60), INT8_C( -43), INT8_C( 76), INT8_C( 10), INT8_C( 83), INT8_C( -83), INT8_C( -60), INT8_C( -94), INT8_C( -63), INT8_C( 113), INT8_C(-114), INT8_C( 124)), simde_mm256_set_epi8(INT8_C( 123), INT8_C( 58), INT8_C( 45), INT8_C( -63), INT8_C( 54), INT8_C(-119), INT8_C( -36), INT8_C( 78), INT8_C( 27), INT8_C( -68), INT8_C( 67), INT8_C( 118), INT8_C( -97), INT8_C( 26), INT8_C( 10), INT8_C( 21), INT8_C( 65), INT8_C( 89), INT8_C( 34), INT8_C( 77), INT8_C( 65), INT8_C( -53), INT8_C( 118), INT8_C( 76), INT8_C( -72), INT8_C( -79), INT8_C(-123), INT8_C( 80), INT8_C( -58), INT8_C( 37), INT8_C( -82), INT8_C( 83)) }, { simde_mm256_set_epi8(INT8_C( 71), INT8_C( 12), INT8_C( 123), INT8_C(-112), INT8_C(-126), INT8_C( 60), INT8_C( 108), INT8_C(-103), INT8_C( 103), INT8_C( 120), INT8_C( -99), INT8_C( 114), INT8_C( 34), INT8_C( -37), INT8_C( -98), INT8_C( -39), INT8_C(-107), INT8_C( -14), INT8_C( -8), INT8_C( -82), INT8_C( 123), INT8_C( 78), INT8_C( 27), INT8_C( 7), INT8_C( 90), INT8_C(-108), INT8_C( -54), INT8_C( 40), INT8_C( -9), INT8_C( -18), INT8_C( 12), INT8_C( 6)), simde_mm256_set_epi8(INT8_C( 20), INT8_C( -98), INT8_C( 96), INT8_C( 3), INT8_C( 27), INT8_C( 45), INT8_C( -97), INT8_C( -19), INT8_C( -40), INT8_C( -84), INT8_C( -70), INT8_C( -38), INT8_C( -57), INT8_C(-122), INT8_C( -18), INT8_C( -59), INT8_C( -66), INT8_C( 38), INT8_C( 28), INT8_C( -72), INT8_C( 41), INT8_C( -85), INT8_C(-107), INT8_C( 124), INT8_C(-120), INT8_C( 0), INT8_C( 52), INT8_C( -73), INT8_C( -94), INT8_C(-103), INT8_C( 104), INT8_C(-108)), simde_mm256_set_epi8(INT8_C( 114), INT8_C( 67), INT8_C( 87), INT8_C( 60), INT8_C( -47), INT8_C(-121), INT8_C( -54), INT8_C( 110), INT8_C(-100), INT8_C( 105), INT8_C( -23), INT8_C( 81), INT8_C(-121), INT8_C( 67), INT8_C( 97), INT8_C(-125), INT8_C( 78), INT8_C( 118), INT8_C( 46), INT8_C( -85), INT8_C( 78), INT8_C( -66), INT8_C( 11), INT8_C( 90), INT8_C( 11), INT8_C( 120), INT8_C( 20), INT8_C( 83), INT8_C( 103), INT8_C( -43), INT8_C( 72), INT8_C( -92)), simde_mm256_set_epi8(INT8_C( 71), INT8_C( 12), INT8_C( 123), INT8_C(-112), INT8_C( 27), INT8_C( 45), INT8_C( -97), INT8_C(-103), INT8_C( -40), INT8_C( 120), INT8_C( -70), INT8_C( 114), INT8_C( -57), INT8_C( -37), INT8_C( -98), INT8_C( -59), INT8_C(-107), INT8_C( -14), INT8_C( -8), INT8_C( -72), INT8_C( 123), INT8_C( -85), INT8_C( 27), INT8_C( 7), INT8_C( 90), INT8_C(-108), INT8_C( -54), INT8_C( 40), INT8_C( -9), INT8_C(-103), INT8_C( 12), INT8_C(-108)) }, { simde_mm256_set_epi8(INT8_C(-120), INT8_C( -17), INT8_C( 71), INT8_C( 108), INT8_C(-124), INT8_C( 8), INT8_C( 38), INT8_C(-124), INT8_C( -33), INT8_C( -80), INT8_C( 82), INT8_C( -52), INT8_C( -85), INT8_C( 20), INT8_C( -43), INT8_C( 49), INT8_C(-127), INT8_C( 82), INT8_C(-125), INT8_C(-107), INT8_C( 99), INT8_C( -94), INT8_C( 98), INT8_C( 124), INT8_C( 23), INT8_C( -8), INT8_C( -30), INT8_C( 107), INT8_C( -17), INT8_C( -36), INT8_C( 65), INT8_C( 35)), simde_mm256_set_epi8(INT8_C( 50), INT8_C( -42), INT8_C( 70), INT8_C( 74), INT8_C( -25), INT8_C( 118), INT8_C( 53), INT8_C( 91), INT8_C( -24), INT8_C( 44), INT8_C(-107), INT8_C( 38), INT8_C( 62), INT8_C( 93), INT8_C( 84), INT8_C( 86), INT8_C( 25), INT8_C( 25), INT8_C( -1), INT8_C( -49), INT8_C( -69), INT8_C( -81), INT8_C( 33), INT8_C( -90), INT8_C( 118), INT8_C( 99), INT8_C( -68), INT8_C( 38), INT8_C( -18), INT8_C( -20), INT8_C( -37), INT8_C( -57)), simde_mm256_set_epi8(INT8_C( 36), INT8_C( 48), INT8_C(-118), INT8_C( 125), INT8_C( 12), INT8_C( 115), INT8_C( -38), INT8_C( 1), INT8_C( -43), INT8_C(-120), INT8_C( 119), INT8_C( 88), INT8_C( 19), INT8_C( 1), INT8_C( 40), INT8_C( -39), INT8_C( -67), INT8_C( 66), INT8_C( -86), INT8_C( 38), INT8_C( 40), INT8_C( -46), INT8_C( 104), INT8_C( -95), INT8_C( 115), INT8_C( -49), INT8_C( 15), INT8_C(-101), INT8_C( -79), INT8_C( -5), INT8_C( 112), INT8_C( -45)), simde_mm256_set_epi8(INT8_C(-120), INT8_C( -17), INT8_C( 70), INT8_C( 108), INT8_C(-124), INT8_C( 8), INT8_C( 53), INT8_C(-124), INT8_C( -24), INT8_C( 44), INT8_C( 82), INT8_C( -52), INT8_C( -85), INT8_C( 20), INT8_C( -43), INT8_C( 86), INT8_C( 25), INT8_C( 82), INT8_C( -1), INT8_C(-107), INT8_C( 99), INT8_C( -81), INT8_C( 98), INT8_C( -90), INT8_C( 23), INT8_C( 99), INT8_C( -30), INT8_C( 38), INT8_C( -18), INT8_C( -20), INT8_C( 65), INT8_C( -57)) }, { simde_mm256_set_epi8(INT8_C( 55), INT8_C( 33), INT8_C( 43), INT8_C( 65), INT8_C( -59), INT8_C( -95), INT8_C( 10), INT8_C( 11), INT8_C( 122), INT8_C( 35), INT8_C( 55), INT8_C( 107), INT8_C( -46), INT8_C(-119), INT8_C(-106), INT8_C( 77), INT8_C( -56), INT8_C( -78), INT8_C( -97), INT8_C(-126), INT8_C( -36), INT8_C( 17), INT8_C( 13), INT8_C( 38), INT8_C( 52), INT8_C( -31), INT8_C( 68), INT8_C( 51), INT8_C( -16), INT8_C( 61), INT8_C( -72), INT8_C( -48)), simde_mm256_set_epi8(INT8_C( 123), INT8_C(-111), INT8_C( -83), INT8_C( -8), INT8_C( 66), INT8_C( -69), INT8_C( -27), INT8_C(-103), INT8_C(-112), INT8_C( -80), INT8_C( 56), INT8_C( 60), INT8_C(-106), INT8_C( -4), INT8_C(-128), INT8_C(-122), INT8_C( 74), INT8_C( -55), INT8_C( -48), INT8_C(-128), INT8_C( -2), INT8_C( 63), INT8_C( -37), INT8_C( 18), INT8_C( -89), INT8_C( -76), INT8_C( 42), INT8_C( 62), INT8_C( 74), INT8_C( -87), INT8_C( 30), INT8_C(-103)), simde_mm256_set_epi8(INT8_C( 4), INT8_C( -10), INT8_C( -5), INT8_C( -1), INT8_C( -4), INT8_C( 87), INT8_C( -59), INT8_C( -9), INT8_C( -53), INT8_C( 83), INT8_C( -63), INT8_C( 99), INT8_C( 55), INT8_C( 119), INT8_C( 36), INT8_C( -96), INT8_C( -98), INT8_C( 22), INT8_C( 4), INT8_C(-128), INT8_C( 16), INT8_C( -72), INT8_C( 114), INT8_C( 35), INT8_C( -15), INT8_C( -10), INT8_C( 97), INT8_C(-112), INT8_C(-109), INT8_C( -68), INT8_C( 8), INT8_C( 101)), simde_mm256_set_epi8(INT8_C( 55), INT8_C(-111), INT8_C( -83), INT8_C( -8), INT8_C( 66), INT8_C( -95), INT8_C( -27), INT8_C(-103), INT8_C(-112), INT8_C( 35), INT8_C( 56), INT8_C( 107), INT8_C( -46), INT8_C(-119), INT8_C(-106), INT8_C(-122), INT8_C( 74), INT8_C( -78), INT8_C( -97), INT8_C(-128), INT8_C( -36), INT8_C( 63), INT8_C( 13), INT8_C( 38), INT8_C( -89), INT8_C( -76), INT8_C( 68), INT8_C( 62), INT8_C( 74), INT8_C( -87), INT8_C( -72), INT8_C( -48)) }, { simde_mm256_set_epi8(INT8_C( 69), INT8_C( -38), INT8_C(-105), INT8_C( -77), INT8_C( 3), INT8_C( -19), INT8_C( -91), INT8_C( 81), INT8_C( -61), INT8_C( 88), INT8_C( -52), INT8_C( -76), INT8_C(-100), INT8_C( -70), INT8_C( -64), INT8_C( 71), INT8_C( 82), INT8_C(-122), INT8_C( -41), INT8_C( -86), INT8_C( 9), INT8_C( 29), INT8_C( -31), INT8_C( -87), INT8_C(-113), INT8_C( 100), INT8_C( 100), INT8_C( -55), INT8_C( -11), INT8_C( 92), INT8_C( -87), INT8_C( 43)), simde_mm256_set_epi8(INT8_C( 96), INT8_C(-116), INT8_C( 5), INT8_C( 16), INT8_C( -66), INT8_C( 4), INT8_C( 8), INT8_C( 34), INT8_C( 93), INT8_C( 90), INT8_C( -27), INT8_C( 86), INT8_C( 115), INT8_C( 27), INT8_C( -30), INT8_C( 15), INT8_C( 94), INT8_C( -93), INT8_C( -55), INT8_C( -18), INT8_C(-122), INT8_C( 51), INT8_C( -22), INT8_C( -72), INT8_C( -80), INT8_C( 24), INT8_C( 38), INT8_C( 87), INT8_C( -35), INT8_C( -67), INT8_C( 94), INT8_C( -17)), simde_mm256_set_epi8(INT8_C( -19), INT8_C( -11), INT8_C( 77), INT8_C( -75), INT8_C( 98), INT8_C( -18), INT8_C( -28), INT8_C( 5), INT8_C( 86), INT8_C(-117), INT8_C( 114), INT8_C( -52), INT8_C( 63), INT8_C( -51), INT8_C( 110), INT8_C( -22), INT8_C( 54), INT8_C( 36), INT8_C( 44), INT8_C( 72), INT8_C( -14), INT8_C( -61), INT8_C( -76), INT8_C( 51), INT8_C( 40), INT8_C( -28), INT8_C( -97), INT8_C( 114), INT8_C(-128), INT8_C( 9), INT8_C( 94), INT8_C( -35)), simde_mm256_set_epi8(INT8_C( 96), INT8_C(-116), INT8_C(-105), INT8_C( 16), INT8_C( 3), INT8_C( 4), INT8_C( 8), INT8_C( 81), INT8_C( -61), INT8_C( 90), INT8_C( -52), INT8_C( 86), INT8_C(-100), INT8_C( 27), INT8_C( -64), INT8_C( 15), INT8_C( 82), INT8_C(-122), INT8_C( -41), INT8_C( -86), INT8_C(-122), INT8_C( 51), INT8_C( -22), INT8_C( -87), INT8_C(-113), INT8_C( 24), INT8_C( 38), INT8_C( -55), INT8_C( -35), INT8_C( 92), INT8_C( -87), INT8_C( -17)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_blendv_epi8(test_vec[i].a, test_vec[i].b, test_vec[i].i); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_cmpeq_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi8( INT8_C( -34), INT8_C( 65), INT8_C( -18), INT8_C( -94), INT8_C( 20), INT8_C(-105), INT8_C( 79), INT8_C( -26), INT8_C( 7), INT8_C( -36), INT8_C( 33), INT8_C( 123), INT8_C( 115), INT8_C( -52), INT8_C( -98), INT8_C( 62), INT8_C( 64), INT8_C( -91), INT8_C( 45), INT8_C( 102), INT8_C( 21), INT8_C(-111), INT8_C( -41), INT8_C( -14), INT8_C( 91), INT8_C(-106), INT8_C( 23), INT8_C( 86), INT8_C(-108), INT8_C( 34), INT8_C( -9), INT8_C( 42)), simde_mm256_set_epi8( INT8_C( -34), INT8_C( 65), INT8_C(-119), INT8_C( -94), INT8_C( -15), INT8_C( 95), INT8_C( 72), INT8_C( 66), INT8_C(-113), INT8_C( -36), ~INT8_C( 0), INT8_C( 123), INT8_C( -47), INT8_C( -52), INT8_C( -7), INT8_C( 62), INT8_C( 31), INT8_C( 79), INT8_C( 20), INT8_C( 102), INT8_C( 21), INT8_C( 17), INT8_C(-125), INT8_C( -14), INT8_C( 22), INT8_C(-111), INT8_C( 23), INT8_C( 100), INT8_C(-122), INT8_C( 34), INT8_C( -26), INT8_C( 42)), simde_mm256_set_epi8(~INT8_C( 0), ~INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), ~INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), ~INT8_C( 0)) }, { simde_mm256_set_epi8( INT8_C( 61), INT8_C( 96), INT8_C( -54), INT8_C( 88), INT8_C(-115), INT8_C( 14), INT8_C( 100), INT8_C( -28), INT8_C( 78), INT8_C( -79), INT8_C( 80), INT8_C( 114), INT8_C( 29), INT8_C( -69), INT8_C( 6), INT8_C(-127), INT8_C( 116), INT8_C( 35), INT8_C( 49), INT8_C( -24), INT8_C( 4), INT8_C(-123), INT8_C( -95), INT8_C( -23), INT8_C( 44), INT8_C( 68), INT8_C( -84), INT8_C( -9), INT8_C( -86), INT8_C( 117), INT8_C( 55), INT8_C( 121)), simde_mm256_set_epi8( INT8_C( 61), INT8_C( 81), INT8_C( 33), INT8_C( 88), INT8_C( -34), INT8_C( 14), INT8_C( 111), INT8_C( -28), INT8_C( -70), INT8_C(-102), INT8_C( 116), INT8_C(-102), INT8_C( 29), INT8_C( -10), INT8_C( 13), INT8_C( 109), INT8_C( 116), INT8_C( -83), INT8_C( 49), INT8_C( 1), INT8_C( 4), INT8_C(-123), INT8_C( -18), INT8_C( -23), INT8_C( 44), INT8_C( 68), INT8_C(-109), INT8_C( 54), INT8_C( 123), INT8_C( 62), INT8_C( 71), INT8_C( 3)), simde_mm256_set_epi8(~INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), ~INT8_C( 0), ~INT8_C( 0), INT8_C( 0), ~INT8_C( 0), ~INT8_C( 0), ~INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { simde_mm256_set_epi8( INT8_C( -58), INT8_C( -55), INT8_C( 126), INT8_C( -17), INT8_C( -94), INT8_C( -45), INT8_C( -42), INT8_C( 39), INT8_C( -81), INT8_C( -73), INT8_C( -20), INT8_C( -73), INT8_C( 79), INT8_C( 25), INT8_C( -86), INT8_C( -52), INT8_C( 106), INT8_C( 58), INT8_C( 113), INT8_C( 61), INT8_C( -71), INT8_C( -3), INT8_C( 101), INT8_C(-102), INT8_C( -14), INT8_C( -40), INT8_C( 82), INT8_C( -84), INT8_C( 49), INT8_C( 46), INT8_C( 59), INT8_C( -33)), simde_mm256_set_epi8( INT8_C( 110), INT8_C( -67), INT8_C( 126), INT8_C( -17), INT8_C( -94), INT8_C( -46), INT8_C( -74), INT8_C( -37), INT8_C( -81), INT8_C( -93), INT8_C( 97), INT8_C( -73), INT8_C( -66), INT8_C( 25), INT8_C( 37), INT8_C( -52), INT8_C( 106), INT8_C( -80), INT8_C( 113), INT8_C( 61), INT8_C( -50), INT8_C( 68), INT8_C( 101), INT8_C(-102), INT8_C( -14), INT8_C( -35), INT8_C( 54), INT8_C( -2), INT8_C( 49), INT8_C( 46), INT8_C( 6), INT8_C( 4)), simde_mm256_set_epi8( INT8_C( 0), INT8_C( 0), ~INT8_C( 0), ~INT8_C( 0), ~INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), ~INT8_C( 0), ~INT8_C( 0), INT8_C( 0), ~INT8_C( 0), ~INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), ~INT8_C( 0), ~INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), ~INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { simde_mm256_set_epi8( INT8_C( -9), INT8_C( 28), INT8_C( 1), INT8_C( 0), INT8_C( 57), INT8_C( -72), INT8_C( -61), INT8_C( 74), INT8_C(-105), INT8_C( 58), INT8_C( 84), INT8_C( 60), INT8_C( 21), INT8_C( 38), INT8_C( 16), INT8_C( 25), INT8_C( 2), INT8_C( -63), INT8_C( 28), INT8_C( 93), INT8_C( -44), INT8_C( -78), INT8_C( -50), INT8_C( -21), INT8_C( 47), INT8_C( 30), INT8_C( 97), INT8_C( 18), INT8_C( -36), INT8_C( 72), INT8_C( -66), INT8_C( 124)), simde_mm256_set_epi8( INT8_C(-127), INT8_C( -27), INT8_C( 86), INT8_C( -50), INT8_C( 57), INT8_C( -49), INT8_C( -61), INT8_C( 74), INT8_C(-105), INT8_C( -45), INT8_C( 125), INT8_C( 18), INT8_C( -47), INT8_C( 70), INT8_C( 16), INT8_C( 90), INT8_C( 2), INT8_C( -44), INT8_C( 121), INT8_C( 48), INT8_C( -78), INT8_C( 120), INT8_C( -50), INT8_C( -21), INT8_C( 47), INT8_C( -59), INT8_C( 97), INT8_C( 21), INT8_C( -36), INT8_C( 72), INT8_C( -44), INT8_C( 124)), simde_mm256_set_epi8( INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), ~INT8_C( 0), ~INT8_C( 0), ~INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), ~INT8_C( 0), ~INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), ~INT8_C( 0), ~INT8_C( 0), INT8_C( 0), ~INT8_C( 0)) }, { simde_mm256_set_epi8( INT8_C( 75), INT8_C( -6), INT8_C( -43), INT8_C( 107), INT8_C( 28), INT8_C( 81), INT8_C( 82), INT8_C( 91), INT8_C( 66), INT8_C( 107), INT8_C( -41), INT8_C( -50), INT8_C( -5), INT8_C( 66), INT8_C(-100), INT8_C( 112), INT8_C(-109), INT8_C( 92), INT8_C( -50), INT8_C( 68), INT8_C( -54), INT8_C( 111), INT8_C( 25), INT8_C( 36), INT8_C( 30), INT8_C( -33), INT8_C( 28), INT8_C( 68), INT8_C( -69), INT8_C( 124), INT8_C(-124), INT8_C( 48)), simde_mm256_set_epi8( INT8_C( 26), INT8_C( 124), INT8_C( -85), INT8_C( 90), INT8_C( 49), INT8_C( -43), INT8_C( 82), INT8_C(-110), INT8_C( -4), INT8_C(-103), INT8_C(-116), INT8_C( -91), INT8_C( 101), INT8_C( -60), INT8_C( 28), INT8_C( -62), INT8_C(-109), INT8_C( 2), INT8_C( 71), INT8_C( 68), INT8_C( 92), INT8_C( -96), INT8_C( -47), INT8_C( 89), INT8_C(-111), INT8_C( -89), INT8_C( 38), INT8_C( 109), INT8_C( -69), INT8_C( 57), INT8_C( 87), INT8_C( 94)), simde_mm256_set_epi8( INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { simde_mm256_set_epi8( INT8_C( 78), INT8_C(-103), INT8_C( -83), INT8_C( -64), INT8_C( 86), INT8_C( 62), INT8_C( 40), INT8_C(-116), INT8_C( 49), INT8_C( 94), INT8_C( -31), INT8_C( 82), INT8_C( -55), INT8_C(-100), INT8_C( -61), INT8_C( -62), INT8_C(-101), INT8_C( 117), INT8_C( -20), INT8_C( 19), INT8_C( -97), INT8_C( -75), INT8_C(-101), INT8_C( -39), INT8_C( 28), INT8_C( 55), INT8_C( 85), INT8_C( 106), INT8_C( -97), INT8_C( -74), INT8_C( 51), INT8_C( -54)), simde_mm256_set_epi8( INT8_C(-116), INT8_C( -91), INT8_C( 105), INT8_C( -64), INT8_C( -6), INT8_C( -63), INT8_C( 82), INT8_C( 4), INT8_C( -7), INT8_C( -70), INT8_C( -31), INT8_C( -83), INT8_C( 22), INT8_C(-100), INT8_C( 96), INT8_C( -62), INT8_C(-120), INT8_C( 120), INT8_C( -20), INT8_C( -80), INT8_C( -97), INT8_C( -75), INT8_C( -29), INT8_C( 4), INT8_C( 28), INT8_C( 60), INT8_C( 79), INT8_C( 73), INT8_C( 17), INT8_C(-120), INT8_C( 110), INT8_C( 100)), simde_mm256_set_epi8( INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), ~INT8_C( 0), ~INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { simde_mm256_set_epi8( INT8_C( 118), INT8_C( 88), INT8_C( 13), INT8_C( 39), INT8_C( -45), INT8_C( 46), INT8_C( -85), INT8_C( 17), INT8_C( -7), INT8_C( 75), INT8_C( -5), INT8_C(-120), INT8_C( -77), INT8_C( -49), INT8_C( 58), INT8_C( -50), INT8_C( 0), INT8_C( 21), INT8_C( -11), INT8_C( -48), INT8_C( -85), INT8_C( 55), INT8_C( 100), ~INT8_C( 0), INT8_C( 7), INT8_C( 72), INT8_C( 25), INT8_C(-126), INT8_C( -81), INT8_C( -15), INT8_C( -25), INT8_C( -55)), simde_mm256_set_epi8( INT8_C( 107), INT8_C( 67), INT8_C( -53), INT8_C( 30), INT8_C( 53), INT8_C( -25), INT8_C( 94), INT8_C( 90), INT8_C( -59), INT8_C( 75), INT8_C( 65), INT8_C(-120), INT8_C( 32), INT8_C( -11), INT8_C( 58), INT8_C( 33), INT8_C( 0), INT8_C( 44), INT8_C( -11), INT8_C( 49), INT8_C( -51), INT8_C( 55), INT8_C( 100), INT8_C( -67), INT8_C( -45), INT8_C( 89), INT8_C( 25), INT8_C( 110), INT8_C( 60), INT8_C( -42), INT8_C( -25), INT8_C( 1)), simde_mm256_set_epi8( INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), ~INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0)) }, { simde_mm256_set_epi8( INT8_C( 47), INT8_C( 126), INT8_C( -95), INT8_C(-121), INT8_C( 84), INT8_C( 11), INT8_C( -4), INT8_C( -74), INT8_C(-124), INT8_C( 4), INT8_C( -12), INT8_C( 31), INT8_C( 41), INT8_C( 31), INT8_C( -42), INT8_C( 92), INT8_C( -62), INT8_C( -8), INT8_C( -73), INT8_C( 14), INT8_C( 22), INT8_C(-109), INT8_C( 103), INT8_C( 90), INT8_C( 98), INT8_C( -28), INT8_C( -11), INT8_C(-120), INT8_C( -81), INT8_C( 0), INT8_C( 34), INT8_C( 36)), simde_mm256_set_epi8( INT8_C( 47), INT8_C(-103), INT8_C( -95), INT8_C(-121), INT8_C( 84), INT8_C( 72), INT8_C( 125), INT8_C( -13), INT8_C( -49), INT8_C( 14), INT8_C( 44), INT8_C( 110), INT8_C( 41), INT8_C( 68), INT8_C( -42), INT8_C( 92), INT8_C( 107), INT8_C( -8), INT8_C( 127), INT8_C( 14), INT8_C( 22), INT8_C(-109), INT8_C( -52), INT8_C( -3), INT8_C( -78), INT8_C( 91), INT8_C( 8), INT8_C( 23), INT8_C( 110), INT8_C( -91), INT8_C( 34), INT8_C( 17)), simde_mm256_set_epi8(~INT8_C( 0), INT8_C( 0), ~INT8_C( 0), ~INT8_C( 0), ~INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), ~INT8_C( 0), ~INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0), ~INT8_C( 0), ~INT8_C( 0), ~INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), ~INT8_C( 0), INT8_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_cmpeq_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_cmpeq_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi16( INT16_C( -5413), INT16_C( -8669), INT16_C(-20019), INT16_C( 13281), INT16_C( 31513), INT16_C( 29495), INT16_C( 24515), INT16_C( -4843), INT16_C(-25942), INT16_C(-22058), INT16_C( 25862), INT16_C( 17599), INT16_C( 3410), INT16_C(-25277), INT16_C( -9899), INT16_C( -1157)), simde_mm256_set_epi16( INT16_C(-24951), INT16_C( -8669), INT16_C(-20019), INT16_C(-25059), INT16_C(-16550), INT16_C( 29495), INT16_C( 14158), INT16_C( 15756), INT16_C(-12717), INT16_C( 16217), INT16_C( 23007), INT16_C(-29661), INT16_C( -4389), INT16_C( -6794), INT16_C( 24406), INT16_C( 3979)), simde_mm256_set_epi16( INT16_C( 0), ~INT16_C( 0), ~INT16_C( 0), INT16_C( 0), INT16_C( 0), ~INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm256_set_epi16( INT16_C(-24592), INT16_C(-14625), INT16_C(-22785), INT16_C( -539), INT16_C( 27023), INT16_C( 22410), INT16_C( 24512), INT16_C( 28286), INT16_C( -371), INT16_C(-19534), INT16_C( 20760), INT16_C( -2933), INT16_C( 19420), INT16_C( 28265), INT16_C( -411), INT16_C(-25192)), simde_mm256_set_epi16( INT16_C(-13031), INT16_C(-29481), INT16_C(-21398), INT16_C( 5762), INT16_C(-11151), INT16_C( 22410), INT16_C(-17819), INT16_C(-18343), INT16_C( -371), INT16_C(-18178), INT16_C( 32217), INT16_C(-13082), INT16_C( 19420), INT16_C(-14308), INT16_C( -411), INT16_C(-25192)), simde_mm256_set_epi16( INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), ~INT16_C( 0), INT16_C( 0), INT16_C( 0), ~INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), ~INT16_C( 0), INT16_C( 0), ~INT16_C( 0), ~INT16_C( 0)) }, { simde_mm256_set_epi16( INT16_C( -1787), INT16_C( 14472), INT16_C( 9645), INT16_C( 6467), INT16_C(-28251), INT16_C( -4608), INT16_C(-20138), INT16_C(-12375), INT16_C( -3139), INT16_C(-11799), INT16_C(-25694), INT16_C( -9505), INT16_C( 2016), INT16_C(-12994), INT16_C( 12032), INT16_C( -1625)), simde_mm256_set_epi16( INT16_C( 14082), INT16_C( 14472), INT16_C( 9645), INT16_C(-10341), INT16_C(-21377), INT16_C(-28844), INT16_C(-27109), INT16_C(-21859), INT16_C( -3139), INT16_C(-20139), INT16_C(-25694), INT16_C( 28917), INT16_C( 5339), INT16_C( 19240), INT16_C( 12032), INT16_C( 2212)), simde_mm256_set_epi16( INT16_C( 0), ~INT16_C( 0), ~INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), ~INT16_C( 0), INT16_C( 0), ~INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), ~INT16_C( 0), INT16_C( 0)) }, { simde_mm256_set_epi16( INT16_C( 29890), INT16_C( 31032), INT16_C(-29157), INT16_C(-14947), INT16_C(-10307), INT16_C(-20105), INT16_C( -2072), INT16_C( 29196), INT16_C( 1484), INT16_C(-11865), INT16_C(-30233), INT16_C( 30603), INT16_C( 20632), INT16_C(-27772), INT16_C(-32273), INT16_C( 4867)), simde_mm256_set_epi16( INT16_C(-21410), INT16_C( 9749), INT16_C(-29157), INT16_C(-14947), INT16_C( 13547), INT16_C( 28436), INT16_C( -2072), INT16_C( 29196), INT16_C( 1484), INT16_C(-14930), INT16_C( 6002), INT16_C( 30603), INT16_C( 27473), INT16_C( 2844), INT16_C(-19044), INT16_C( 4867)), simde_mm256_set_epi16( INT16_C( 0), INT16_C( 0), ~INT16_C( 0), ~INT16_C( 0), INT16_C( 0), INT16_C( 0), ~INT16_C( 0), ~INT16_C( 0), ~INT16_C( 0), INT16_C( 0), INT16_C( 0), ~INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), ~INT16_C( 0)) }, { simde_mm256_set_epi16( INT16_C( -2810), INT16_C(-12778), INT16_C( 16624), INT16_C( 2706), INT16_C( 14585), INT16_C( -6866), INT16_C( -582), INT16_C( 29880), INT16_C( 20309), INT16_C( 25315), INT16_C( -5634), INT16_C(-21292), INT16_C( 11215), INT16_C( 13817), INT16_C( 26751), INT16_C( -8288)), simde_mm256_set_epi16( INT16_C(-11737), INT16_C(-19770), INT16_C( 8400), INT16_C( 2706), INT16_C( 14599), INT16_C( -6866), INT16_C( -582), INT16_C( 29880), INT16_C( 20309), INT16_C( 32436), INT16_C( -5634), INT16_C(-21292), INT16_C(-11784), INT16_C( 13817), INT16_C( 26751), INT16_C(-12877)), simde_mm256_set_epi16( INT16_C( 0), INT16_C( 0), INT16_C( 0), ~INT16_C( 0), INT16_C( 0), ~INT16_C( 0), ~INT16_C( 0), ~INT16_C( 0), ~INT16_C( 0), INT16_C( 0), ~INT16_C( 0), ~INT16_C( 0), INT16_C( 0), ~INT16_C( 0), ~INT16_C( 0), INT16_C( 0)) }, { simde_mm256_set_epi16( INT16_C( 11738), INT16_C(-30358), INT16_C(-23042), INT16_C( -2979), INT16_C( 32390), INT16_C( 22571), INT16_C( -5410), INT16_C( 14750), INT16_C(-16092), INT16_C( 27869), INT16_C( 28085), INT16_C( 15025), INT16_C(-11732), INT16_C( 28458), INT16_C( -3526), INT16_C(-21900)), simde_mm256_set_epi16( INT16_C( 11738), INT16_C(-30358), INT16_C(-12411), INT16_C( -2979), INT16_C(-27990), INT16_C( 25385), INT16_C( -5410), INT16_C( 13011), INT16_C(-30216), INT16_C(-15754), INT16_C( 28085), INT16_C( 142), INT16_C(-11865), INT16_C( 28458), INT16_C( 2605), INT16_C( 31867)), simde_mm256_set_epi16( ~INT16_C( 0), ~INT16_C( 0), INT16_C( 0), ~INT16_C( 0), INT16_C( 0), INT16_C( 0), ~INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), ~INT16_C( 0), INT16_C( 0), INT16_C( 0), ~INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm256_set_epi16( INT16_C(-15649), INT16_C( 19434), INT16_C( 30307), INT16_C( -2580), INT16_C(-17669), INT16_C(-12636), INT16_C(-26128), INT16_C( 1047), INT16_C( 31189), INT16_C( -4800), INT16_C(-21775), INT16_C( 9584), INT16_C(-10037), INT16_C(-12969), INT16_C( -2203), INT16_C( 7107)), simde_mm256_set_epi16( INT16_C(-14042), INT16_C( 19434), INT16_C( 12761), INT16_C( -2580), INT16_C(-14129), INT16_C(-21327), INT16_C( 8254), INT16_C( 17988), INT16_C( 31189), INT16_C(-12604), INT16_C(-31847), INT16_C( 9584), INT16_C( 2606), INT16_C(-15409), INT16_C( -2203), INT16_C( 7107)), simde_mm256_set_epi16( INT16_C( 0), ~INT16_C( 0), INT16_C( 0), ~INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), ~INT16_C( 0), INT16_C( 0), INT16_C( 0), ~INT16_C( 0), INT16_C( 0), INT16_C( 0), ~INT16_C( 0), ~INT16_C( 0)) }, { simde_mm256_set_epi16( INT16_C( 2093), INT16_C( 26175), INT16_C( 5850), INT16_C(-17892), INT16_C(-28618), INT16_C(-20409), INT16_C( 201), INT16_C(-27373), INT16_C(-25181), INT16_C( -3904), INT16_C( 26317), INT16_C( 18884), INT16_C( 15503), INT16_C( 21511), INT16_C(-24060), INT16_C( -1242)), simde_mm256_set_epi16( INT16_C(-27449), INT16_C( 26175), INT16_C( 26521), INT16_C(-16887), INT16_C(-13967), INT16_C(-16902), INT16_C(-25148), INT16_C( 11331), INT16_C(-22706), INT16_C( -3904), INT16_C( -3346), INT16_C( 18884), INT16_C( 2207), INT16_C( 11164), INT16_C( 22502), INT16_C(-11226)), simde_mm256_set_epi16( INT16_C( 0), ~INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), ~INT16_C( 0), INT16_C( 0), ~INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_cmpeq_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_cmpeq_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32( INT32_C( -566104414), INT32_C( 345460710), INT32_C( 131867003), INT32_C( 1942789694), INT32_C( 1084566886), INT32_C( 361879538), INT32_C( 1536563030), INT32_C(-1809647830)), simde_mm256_set_epi32( INT32_C( 1749780765), INT32_C( -245413822), INT32_C( 131867003), INT32_C( -777848334), INT32_C( 525276311), INT32_C( 361879538), INT32_C( 378618724), INT32_C(-1809647830)), simde_mm256_set_epi32( INT32_C( 0), INT32_C( 0), ~INT32_C( 0), INT32_C( 0), INT32_C( 0), ~INT32_C( 0), INT32_C( 0), ~INT32_C( 0)) }, { simde_mm256_set_epi32( INT32_C( 1746696722), INT32_C( 940817566), INT32_C( -345075038), INT32_C( 1008650721), INT32_C( 1982631981), INT32_C( 1481863730), INT32_C( 1367059109), INT32_C( -627319339)), simde_mm256_set_epi32( INT32_C( 1746696722), INT32_C( 940817566), INT32_C(-2066241582), INT32_C( 1008650721), INT32_C( 174011254), INT32_C(-1995628897), INT32_C( -710324691), INT32_C( -119123371)), simde_mm256_set_epi32( ~INT32_C( 0), ~INT32_C( 0), INT32_C( 0), ~INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm256_set_epi32( INT32_C( 1029753432), INT32_C(-1928436508), INT32_C( 1320243314), INT32_C( 498796161), INT32_C( 1948463592), INT32_C( 75866601), INT32_C( 742698231), INT32_C(-1435158663)), simde_mm256_set_epi32( INT32_C( 1029753432), INT32_C(-1928436508), INT32_C(-1164282726), INT32_C( 267783533), INT32_C( -156415231), INT32_C(-1409290651), INT32_C( -98856138), INT32_C( 2067678979)), simde_mm256_set_epi32( ~INT32_C( 0), ~INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm256_set_epi32( INT32_C( -346859314), INT32_C( 1692797387), INT32_C( 66311928), INT32_C( 460824773), INT32_C( -548661058), INT32_C( -254589283), INT32_C( 833835845), INT32_C(-1932331579)), simde_mm256_set_epi32( INT32_C( -346859314), INT32_C( 60539810), INT32_C( 1011930823), INT32_C( 460824773), INT32_C( 190841895), INT32_C( -254589283), INT32_C( 316499678), INT32_C(-1932331579)), simde_mm256_set_epi32( ~INT32_C( 0), INT32_C( 0), INT32_C( 0), ~INT32_C( 0), INT32_C( 0), ~INT32_C( 0), INT32_C( 0), ~INT32_C( 0)) }, { simde_mm256_set_epi32( INT32_C( -959873297), INT32_C(-1563175385), INT32_C(-1346900809), INT32_C( 1327082188), INT32_C( 1782214973), INT32_C(-1174575718), INT32_C( -220704084), INT32_C( 825113567)), simde_mm256_set_epi32( INT32_C( -959873297), INT32_C( 735229659), INT32_C( -660381298), INT32_C(-1091295976), INT32_C( 1782214973), INT32_C(-1174575718), INT32_C( 2094872318), INT32_C( 1028851204)), simde_mm256_set_epi32( ~INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), ~INT32_C( 0), ~INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm256_set_epi32( INT32_C( 1661232006), INT32_C( 1627888390), INT32_C( 757018179), INT32_C( -750573130), INT32_C(-1367421353), INT32_C( 198410180), INT32_C(-1240440841), INT32_C(-1417499113)), simde_mm256_set_epi32( INT32_C( -466269763), INT32_C( 49857616), INT32_C( 757018179), INT32_C( -750573130), INT32_C(-1367421353), INT32_C(-1831110733), INT32_C(-1233216938), INT32_C( 84780453)), simde_mm256_set_epi32( INT32_C( 0), INT32_C( 0), ~INT32_C( 0), ~INT32_C( 0), ~INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm256_set_epi32( INT32_C( -149159680), INT32_C( 968409930), INT32_C(-1757785028), INT32_C( 354816025), INT32_C( 46210141), INT32_C( -726479125), INT32_C( 790520082), INT32_C( -599212420)), simde_mm256_set_epi32( INT32_C( -149159680), INT32_C( 1775235902), INT32_C(-1757785028), INT32_C( -783938470), INT32_C( 46210141), INT32_C( -726479125), INT32_C( -272289003), INT32_C( -599212420)), simde_mm256_set_epi32( ~INT32_C( 0), INT32_C( 0), ~INT32_C( 0), INT32_C( 0), ~INT32_C( 0), ~INT32_C( 0), INT32_C( 0), ~INT32_C( 0)) }, { simde_mm256_set_epi32( INT32_C( 431380596), INT32_C( -800905622), INT32_C(-1090716510), INT32_C( 598832812), INT32_C( 183783146), INT32_C( 1473748330), INT32_C( 1405235121), INT32_C( 1193787762)), simde_mm256_set_epi32( INT32_C( 1093676520), INT32_C( -410887875), INT32_C( 1548812312), INT32_C( -913535121), INT32_C( 183783146), INT32_C(-1507745238), INT32_C( 1405235121), INT32_C( 1193787762)), simde_mm256_set_epi32( INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), ~INT32_C( 0), INT32_C( 0), ~INT32_C( 0), ~INT32_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_cmpeq_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_cmpeq_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi64x( INT64_C( 5666986029204224795), INT64_C( 4139191150084672711), INT64_C(-2995542033801296871), INT64_C( 2916699395471658679)), simde_mm256_set_epi64x( INT64_C( 5666986029204224795), INT64_C(-4286656252876457389), INT64_C( 1630571993074201933), INT64_C(-4091102519963379575)), simde_mm256_set_epi64x( INT64_C( -1), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, { simde_mm256_set_epi64x( INT64_C( 3487161978562406497), INT64_C( 3319820858778075593), INT64_C(-2061262812497087366), INT64_C( 3422398331177828808)), simde_mm256_set_epi64x( INT64_C( 3487161978562406497), INT64_C( 3319820858778075593), INT64_C( 4615290220825808616), INT64_C(-1811913409974062507)), simde_mm256_set_epi64x( INT64_C( -1), INT64_C( -1), INT64_C( 0), INT64_C( 0)) }, { simde_mm256_set_epi64x( INT64_C( 6876132350949566240), INT64_C( 286650737897020179), INT64_C( 8647803947797074216), INT64_C( 8045239586356137615)), simde_mm256_set_epi64x( INT64_C(-2007896190058863769), INT64_C( 1398080065885028992), INT64_C( 8575195519759543000), INT64_C( 3842062270653749921)), simde_mm256_set_epi64x( INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, { simde_mm256_set_epi64x( INT64_C( -813114899828683326), INT64_C( 8993356532590363052), INT64_C( 1099980308418045920), INT64_C( -959713571456510913)), simde_mm256_set_epi64x( INT64_C( -813114899828683326), INT64_C( 8993356532590363052), INT64_C( 2705033811286181232), INT64_C(-9000650858577263758)), simde_mm256_set_epi64x( INT64_C( -1), INT64_C( -1), INT64_C( 0), INT64_C( 0)) }, { simde_mm256_set_epi64x( INT64_C(-2995469803746067901), INT64_C( 8439110829491825822), INT64_C( 622618816590200788), INT64_C(-4486881289174213355)), simde_mm256_set_epi64x( INT64_C(-2995469803746067901), INT64_C(-3762957269640528607), INT64_C( 622618816590200788), INT64_C(-3609138253339109026)), simde_mm256_set_epi64x( ~INT64_C( 0), INT64_C( 0), ~INT64_C( 0), INT64_C( 0)) }, { simde_mm256_set_epi64x( INT64_C(-1016066772373646945), INT64_C( 3813602464259710788), INT64_C( 7563424360708952600), INT64_C(-5726115040178877821)), simde_mm256_set_epi64x( INT64_C(-1016066772373646945), INT64_C( 3813602464259710788), INT64_C(-7344230325683392237), INT64_C(-5726115040178877821)), simde_mm256_set_epi64x( ~INT64_C( 0), ~INT64_C( 0), INT64_C( 0), ~INT64_C( 0)) }, { simde_mm256_set_epi64x( INT64_C(-5638985149235693304), INT64_C( 3850441284517009347), INT64_C(-7932931861129304736), INT64_C(-7323455422359325640)), simde_mm256_set_epi64x( INT64_C(-6947747787688303029), INT64_C( 7867789037175719368), INT64_C(-7932931861129304736), INT64_C( 356641616463870387)), simde_mm256_set_epi64x( INT64_C( 0), INT64_C( 0), ~INT64_C( 0), INT64_C( 0)) }, { simde_mm256_set_epi64x( INT64_C(-3620459218438416224), INT64_C(-7960741818410807131), INT64_C( 2210317486113607969), INT64_C( 2056660070405601362)), simde_mm256_set_epi64x( INT64_C( 186345282622433582), INT64_C(-7960741818410807131), INT64_C( 2210317486113607969), INT64_C( 8434183884659739058)), simde_mm256_set_epi64x( INT64_C( 0), ~INT64_C( 0), ~INT64_C( 0), INT64_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_cmpeq_epi64(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_cmpgt_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi8(INT8_C( 97), INT8_C( 38), INT8_C( 50), INT8_C( -69), INT8_C(-120), INT8_C( 113), INT8_C( -33), INT8_C( -11), INT8_C( 68), INT8_C( 95), INT8_C( 30), INT8_C( 12), INT8_C( -73), INT8_C( -78), INT8_C( -31), INT8_C( -6), INT8_C( -85), INT8_C( 8), INT8_C( -57), INT8_C( 86), INT8_C(-111), INT8_C( 96), INT8_C( 27), INT8_C( -37), INT8_C( 36), INT8_C(-109), INT8_C( -29), INT8_C( 11), INT8_C( 121), INT8_C( 68), INT8_C( -8), INT8_C(-117)), simde_mm256_set_epi8(INT8_C( -11), INT8_C( -47), INT8_C( 44), INT8_C( 92), INT8_C(-103), INT8_C( 119), INT8_C( 74), INT8_C( -33), INT8_C( 46), INT8_C( -92), INT8_C( -68), INT8_C( 34), INT8_C( 87), INT8_C( -27), INT8_C( -38), INT8_C( 8), INT8_C( -43), INT8_C( 93), INT8_C( 8), INT8_C(-116), INT8_C(-110), INT8_C( 42), INT8_C( -23), INT8_C( 50), INT8_C( 67), INT8_C( -92), INT8_C( -32), INT8_C( 41), INT8_C( -11), INT8_C( 105), INT8_C( -20), INT8_C( 57)), simde_mm256_set_epi8(INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0)) }, { simde_mm256_set_epi8(INT8_C( -79), INT8_C( 114), INT8_C( 20), INT8_C( 6), INT8_C( -80), INT8_C( 63), INT8_C( 115), INT8_C( -39), INT8_C( 103), INT8_C(-111), INT8_C( -9), INT8_C( 41), INT8_C(-111), INT8_C( 96), INT8_C(-117), INT8_C( -5), INT8_C( -32), INT8_C( 112), INT8_C( 94), INT8_C( -30), INT8_C( 1), INT8_C( 15), INT8_C( 71), INT8_C( 106), INT8_C( 89), INT8_C( 14), INT8_C( -92), INT8_C( -5), INT8_C( 34), INT8_C( 41), INT8_C( 6), INT8_C( 46)), simde_mm256_set_epi8(INT8_C( 69), INT8_C( 68), INT8_C( -50), INT8_C( -53), INT8_C( 53), INT8_C(-115), INT8_C( -3), INT8_C( -63), INT8_C( 44), INT8_C( -26), INT8_C( -60), INT8_C( 77), INT8_C( -39), INT8_C( 69), INT8_C( -4), INT8_C( 67), INT8_C( 2), INT8_C( 127), INT8_C(-108), INT8_C( -32), INT8_C( -90), INT8_C( -18), INT8_C( 30), INT8_C( 126), INT8_C( 71), INT8_C( 87), INT8_C( 81), INT8_C( 8), INT8_C( 97), INT8_C( 48), INT8_C( 122), INT8_C( 19)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1)) }, { simde_mm256_set_epi8(INT8_C( 4), INT8_C( -93), INT8_C( 78), INT8_C( -99), INT8_C( -33), INT8_C( 93), INT8_C(-119), INT8_C( -26), INT8_C(-115), INT8_C( -63), INT8_C( -15), INT8_C( -15), INT8_C( 51), INT8_C( 56), INT8_C(-113), INT8_C( -56), INT8_C( 88), INT8_C( 97), INT8_C( 124), INT8_C( 124), INT8_C( -61), INT8_C( 109), INT8_C( 81), INT8_C( 37), INT8_C( 67), INT8_C( -23), INT8_C( 20), INT8_C( 101), INT8_C(-111), INT8_C( 20), INT8_C( -17), INT8_C( 88)), simde_mm256_set_epi8(INT8_C( -45), INT8_C( -79), INT8_C( 95), INT8_C( 42), INT8_C( -8), INT8_C( 12), INT8_C( 84), INT8_C( -28), INT8_C( -9), INT8_C( 73), INT8_C( 108), INT8_C( 19), INT8_C( -1), INT8_C( -28), INT8_C( 40), INT8_C( -89), INT8_C( 87), INT8_C(-118), INT8_C( 52), INT8_C( -73), INT8_C( 83), INT8_C( -87), INT8_C( -94), INT8_C( -66), INT8_C( 22), INT8_C( -68), INT8_C(-100), INT8_C( -90), INT8_C( 59), INT8_C(-103), INT8_C( -68), INT8_C( 90)), simde_mm256_set_epi8(INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0)) }, { simde_mm256_set_epi8(INT8_C( 78), INT8_C( -98), INT8_C(-121), INT8_C( 92), INT8_C( -8), INT8_C( 96), INT8_C(-118), INT8_C( -55), INT8_C( -74), INT8_C( 105), INT8_C( -18), INT8_C( 33), INT8_C( 85), INT8_C( -52), INT8_C( 68), INT8_C( 9), INT8_C( -37), INT8_C( 70), INT8_C( 98), INT8_C( -80), INT8_C( -54), INT8_C(-128), INT8_C( -94), INT8_C(-119), INT8_C(-113), INT8_C(-115), INT8_C( 71), INT8_C( -46), INT8_C( 19), INT8_C( 125), INT8_C( 40), INT8_C( 3)), simde_mm256_set_epi8(INT8_C( -28), INT8_C(-110), INT8_C(-103), INT8_C( 3), INT8_C(-127), INT8_C( 24), INT8_C( 119), INT8_C( 81), INT8_C( -7), INT8_C( -15), INT8_C( 106), INT8_C( 4), INT8_C( -43), INT8_C( 98), INT8_C( -93), INT8_C( 36), INT8_C( -1), INT8_C( 44), INT8_C( 82), INT8_C( 14), INT8_C( 0), INT8_C( 125), INT8_C( 43), INT8_C( -25), INT8_C( 68), INT8_C( 74), INT8_C( 8), INT8_C( 53), INT8_C( -2), INT8_C( 27), INT8_C( -33), INT8_C( -9)), simde_mm256_set_epi8(INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1)) }, { simde_mm256_set_epi8(INT8_C( 95), INT8_C( -80), INT8_C( -95), INT8_C( 93), INT8_C( 6), INT8_C(-125), INT8_C( -26), INT8_C( 102), INT8_C( 60), INT8_C( -54), INT8_C( 123), INT8_C( -73), INT8_C( -99), INT8_C( 28), INT8_C( 26), INT8_C(-127), INT8_C( 12), INT8_C( -51), INT8_C( 90), INT8_C( -32), INT8_C( -10), INT8_C( 41), INT8_C( -30), INT8_C( 107), INT8_C( 23), INT8_C( 114), INT8_C( 116), INT8_C( -31), INT8_C( -33), INT8_C( -82), INT8_C( 21), INT8_C( 83)), simde_mm256_set_epi8(INT8_C( 49), INT8_C( -50), INT8_C( -70), INT8_C( -68), INT8_C( 71), INT8_C( -80), INT8_C( -71), INT8_C( 39), INT8_C( 88), INT8_C(-120), INT8_C( 70), INT8_C(-124), INT8_C( 40), INT8_C(-103), INT8_C( 95), INT8_C(-100), INT8_C( -5), INT8_C( 64), INT8_C( -11), INT8_C( -4), INT8_C( -28), INT8_C( -66), INT8_C( 41), INT8_C( 31), INT8_C( 82), INT8_C( 26), INT8_C( -64), INT8_C( 57), INT8_C( 27), INT8_C( 15), INT8_C( 57), INT8_C(-125)), simde_mm256_set_epi8(INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1)) }, { simde_mm256_set_epi8(INT8_C( 57), INT8_C(-121), INT8_C( -6), INT8_C( -74), INT8_C( 22), INT8_C( -63), INT8_C( 17), INT8_C( 101), INT8_C( 43), INT8_C( -53), INT8_C(-112), INT8_C( -25), INT8_C( 85), INT8_C( 55), INT8_C( 77), INT8_C( -58), INT8_C( 73), INT8_C( 35), INT8_C( -70), INT8_C( 30), INT8_C( -78), INT8_C( 4), INT8_C( -97), INT8_C( 5), INT8_C( -36), INT8_C( 28), INT8_C( 95), INT8_C( 75), INT8_C( -88), INT8_C( 11), INT8_C( 45), INT8_C( -19)), simde_mm256_set_epi8(INT8_C( 85), INT8_C( 121), INT8_C( 8), INT8_C( 127), INT8_C(-122), INT8_C( -88), INT8_C( 107), INT8_C( 76), INT8_C(-128), INT8_C(-125), INT8_C( 90), INT8_C( 68), INT8_C( -36), INT8_C( 75), INT8_C( -58), INT8_C( 50), INT8_C( 69), INT8_C(-119), INT8_C(-102), INT8_C( -69), INT8_C( 16), INT8_C( 98), INT8_C( 34), INT8_C(-127), INT8_C( 10), INT8_C( 69), INT8_C( 121), INT8_C(-126), INT8_C( 88), INT8_C( -14), INT8_C(-110), INT8_C( -45)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1)) }, { simde_mm256_set_epi8(INT8_C( -41), INT8_C( -49), INT8_C( 85), INT8_C( 123), INT8_C( -68), INT8_C( -82), INT8_C( -28), INT8_C( -31), INT8_C( 16), INT8_C( 27), INT8_C( 9), INT8_C(-102), INT8_C( 78), INT8_C( 37), INT8_C( -66), INT8_C( 41), INT8_C( -11), INT8_C(-117), INT8_C( 83), INT8_C( 41), INT8_C(-105), INT8_C( 71), INT8_C( -12), INT8_C( 118), INT8_C( -49), INT8_C( -73), INT8_C( -68), INT8_C( -18), INT8_C( 118), INT8_C( 110), INT8_C( 85), INT8_C( 87)), simde_mm256_set_epi8(INT8_C( 117), INT8_C( -99), INT8_C( 113), INT8_C( 26), INT8_C( -41), INT8_C( 64), INT8_C( 93), INT8_C( 98), INT8_C( 100), INT8_C( 92), INT8_C( -14), INT8_C( 86), INT8_C( 83), INT8_C( 11), INT8_C( -24), INT8_C( 12), INT8_C( 30), INT8_C( 43), INT8_C( 100), INT8_C( 94), INT8_C( 19), INT8_C( 46), INT8_C( -17), INT8_C( 0), INT8_C( 9), INT8_C( 104), INT8_C( 26), INT8_C( 39), INT8_C( 102), INT8_C( -79), INT8_C( 114), INT8_C( 9)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1)) }, { simde_mm256_set_epi8(INT8_C( 102), INT8_C( -3), INT8_C( -18), INT8_C(-123), INT8_C( -72), INT8_C( 10), INT8_C( 86), INT8_C( 30), INT8_C( -77), INT8_C( 95), INT8_C( -77), INT8_C( 0), INT8_C( -69), INT8_C( -21), INT8_C( 52), INT8_C( -84), INT8_C( 45), INT8_C( -12), INT8_C( -37), INT8_C( 90), INT8_C( 28), INT8_C(-104), INT8_C( 106), INT8_C( 118), INT8_C( -89), INT8_C( 3), INT8_C( 10), INT8_C( 107), INT8_C( 119), INT8_C( 18), INT8_C( -95), INT8_C( 109)), simde_mm256_set_epi8(INT8_C( -86), INT8_C( 18), INT8_C( -58), INT8_C( 3), INT8_C( -64), INT8_C( -78), INT8_C( -50), INT8_C( 105), INT8_C( 15), INT8_C(-117), INT8_C( -59), INT8_C( 105), INT8_C( -20), INT8_C( -76), INT8_C( -88), INT8_C(-122), INT8_C( -16), INT8_C( -87), INT8_C( 108), INT8_C( -44), INT8_C( -34), INT8_C( -65), INT8_C( 125), INT8_C( -17), INT8_C(-119), INT8_C( 55), INT8_C( -39), INT8_C( -68), INT8_C(-116), INT8_C( 45), INT8_C(-101), INT8_C( 61)), simde_mm256_set_epi8(INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_cmpgt_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_cmpgt_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi16(INT16_C( 22495), INT16_C( -4666), INT16_C( 11364), INT16_C( 3408), INT16_C( -7753), INT16_C( 22355), INT16_C( 25225), INT16_C(-16816), INT16_C( 30843), INT16_C( 15445), INT16_C(-14188), INT16_C( 18672), INT16_C(-17933), INT16_C( 19821), INT16_C( -7211), INT16_C( 10864)), simde_mm256_set_epi16(INT16_C( -752), INT16_C(-15840), INT16_C(-30268), INT16_C( 11179), INT16_C( 16785), INT16_C(-16319), INT16_C( 8147), INT16_C(-29538), INT16_C( 19069), INT16_C( 27101), INT16_C( 26817), INT16_C(-21010), INT16_C(-18350), INT16_C( 1385), INT16_C(-28379), INT16_C(-19254)), simde_mm256_set_epi16(INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1)) }, { simde_mm256_set_epi16(INT16_C( 22399), INT16_C( -10), INT16_C( 26109), INT16_C( 19317), INT16_C( 26547), INT16_C( 21309), INT16_C(-21770), INT16_C(-29851), INT16_C( 8254), INT16_C( -6152), INT16_C( 14575), INT16_C(-26009), INT16_C( -9485), INT16_C( 5665), INT16_C( 19505), INT16_C( -7563)), simde_mm256_set_epi16(INT16_C( 14612), INT16_C(-31260), INT16_C(-18743), INT16_C( -4117), INT16_C( 18997), INT16_C(-26816), INT16_C( 13752), INT16_C( 24083), INT16_C(-17683), INT16_C(-12944), INT16_C(-20564), INT16_C( -4251), INT16_C( -8899), INT16_C( -5996), INT16_C( 20472), INT16_C( -9514)), simde_mm256_set_epi16(INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1)) }, { simde_mm256_set_epi16(INT16_C( -1570), INT16_C(-21545), INT16_C(-26360), INT16_C( -8434), INT16_C( -3275), INT16_C(-16762), INT16_C( -5746), INT16_C( 30695), INT16_C( 2714), INT16_C( 10463), INT16_C( 9000), INT16_C( 9451), INT16_C( 23785), INT16_C( 12389), INT16_C(-26622), INT16_C(-15647)), simde_mm256_set_epi16(INT16_C( 1010), INT16_C(-25985), INT16_C( 16762), INT16_C( 1632), INT16_C(-18641), INT16_C( 15349), INT16_C(-16838), INT16_C(-20536), INT16_C(-25875), INT16_C( 26970), INT16_C(-23524), INT16_C( 11391), INT16_C( 3100), INT16_C(-31773), INT16_C( 26059), INT16_C( 11041)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0)) }, { simde_mm256_set_epi16(INT16_C( 23915), INT16_C( 959), INT16_C(-22514), INT16_C( 13747), INT16_C(-26649), INT16_C( 22646), INT16_C( 13432), INT16_C( 27482), INT16_C( 19401), INT16_C( 30181), INT16_C( 31161), INT16_C(-17967), INT16_C(-19058), INT16_C( 31935), INT16_C( 11181), INT16_C( 23239)), simde_mm256_set_epi16(INT16_C(-28186), INT16_C( 2084), INT16_C( 31597), INT16_C( 7566), INT16_C( 21799), INT16_C(-27106), INT16_C( 4736), INT16_C( 18793), INT16_C(-13965), INT16_C(-17482), INT16_C( 12657), INT16_C(-20379), INT16_C( 24896), INT16_C( 28918), INT16_C( 22577), INT16_C( 29712)), simde_mm256_set_epi16(INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0)) }, { simde_mm256_set_epi16(INT16_C( 11952), INT16_C( 18081), INT16_C( 7197), INT16_C( -9524), INT16_C( 18521), INT16_C( 8735), INT16_C( 31233), INT16_C(-18919), INT16_C( 8906), INT16_C( 21111), INT16_C( 27720), INT16_C( 5158), INT16_C( 2673), INT16_C( 31824), INT16_C(-18067), INT16_C( -891)), simde_mm256_set_epi16(INT16_C(-18602), INT16_C( 15849), INT16_C(-28095), INT16_C( -3999), INT16_C( 13518), INT16_C(-13535), INT16_C(-30865), INT16_C( 675), INT16_C(-26284), INT16_C( -1429), INT16_C(-19333), INT16_C(-26108), INT16_C( 10384), INT16_C(-21020), INT16_C(-15076), INT16_C( 30415)), simde_mm256_set_epi16(INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0)) }, { simde_mm256_set_epi16(INT16_C(-31253), INT16_C( -5290), INT16_C(-20180), INT16_C( 8300), INT16_C( 29534), INT16_C( 10169), INT16_C( 27568), INT16_C(-21872), INT16_C( 20546), INT16_C( 3171), INT16_C( -5551), INT16_C( 13838), INT16_C( 10100), INT16_C(-27385), INT16_C( 19172), INT16_C(-26206)), simde_mm256_set_epi16(INT16_C(-12916), INT16_C( 16100), INT16_C(-14860), INT16_C( 6730), INT16_C( 2608), INT16_C(-17352), INT16_C( 13860), INT16_C( 6700), INT16_C( -7357), INT16_C( -1733), INT16_C( 7387), INT16_C( 5378), INT16_C(-19225), INT16_C( -5805), INT16_C( 154), INT16_C(-20617)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0)) }, { simde_mm256_set_epi16(INT16_C( 13973), INT16_C( 21863), INT16_C( -4187), INT16_C( 31713), INT16_C( -2161), INT16_C(-12788), INT16_C( 13548), INT16_C( 19072), INT16_C(-23365), INT16_C( 2809), INT16_C( 16101), INT16_C( -9274), INT16_C( 12377), INT16_C( 26381), INT16_C( 29677), INT16_C( 4006)), simde_mm256_set_epi16(INT16_C( -9250), INT16_C(-11965), INT16_C(-16314), INT16_C( -8035), INT16_C(-12852), INT16_C( -3996), INT16_C( 8694), INT16_C( -8455), INT16_C( 12571), INT16_C(-15529), INT16_C( -8320), INT16_C( 14428), INT16_C( -5397), INT16_C( 17843), INT16_C( 7761), INT16_C( -1266)), simde_mm256_set_epi16(INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1)) }, { simde_mm256_set_epi16(INT16_C( 7404), INT16_C( 10935), INT16_C(-26115), INT16_C( 8618), INT16_C(-27599), INT16_C( 31224), INT16_C( 4654), INT16_C(-18707), INT16_C( 12785), INT16_C( 22785), INT16_C( 25148), INT16_C(-31622), INT16_C( 17070), INT16_C(-22617), INT16_C( 6953), INT16_C(-14046)), simde_mm256_set_epi16(INT16_C(-11447), INT16_C( 18342), INT16_C( 563), INT16_C(-13432), INT16_C( 21117), INT16_C( 29427), INT16_C( 31704), INT16_C( 14377), INT16_C( 14770), INT16_C( -3712), INT16_C( 7229), INT16_C(-22634), INT16_C( 28493), INT16_C(-28170), INT16_C( 5912), INT16_C( 7039)), simde_mm256_set_epi16(INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_cmpgt_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_cmpgt_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C(-1910963751), INT32_C(-1037527336), INT32_C( -581253082), INT32_C( -805157505), INT32_C( 1446966287), INT32_C( -8913681), INT32_C( -494526366), INT32_C(-1857474161)), simde_mm256_set_epi32(INT32_C(-1143050049), INT32_C(-1545949366), INT32_C( 1503277288), INT32_C(-1357138171), INT32_C( 1058844939), INT32_C( 309480335), INT32_C( 1825640960), INT32_C(-2083253752)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( -1)) }, { simde_mm256_set_epi32(INT32_C(-1125895018), INT32_C( -151199733), INT32_C(-1156572232), INT32_C(-1719423594), INT32_C(-1350415044), INT32_C( 1941048360), INT32_C( 1462693644), INT32_C( 157276070)), simde_mm256_set_epi32(INT32_C( 1685470989), INT32_C( -23549807), INT32_C( 2101982138), INT32_C(-1846214249), INT32_C(-1855916776), INT32_C( -540831398), INT32_C( 233301808), INT32_C( 1667068415)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( 0)) }, { simde_mm256_set_epi32(INT32_C(-2064202369), INT32_C( -937796718), INT32_C( 1860922821), INT32_C( -943515104), INT32_C( 1932702217), INT32_C(-1909235607), INT32_C( -701927701), INT32_C( 2122537608)), simde_mm256_set_epi32(INT32_C( 1359427872), INT32_C( 430067405), INT32_C(-2017266204), INT32_C( -112174075), INT32_C(-2036257158), INT32_C( 794990098), INT32_C( 1595368835), INT32_C(-2025748789)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( -1)) }, { simde_mm256_set_epi32(INT32_C( 101914014), INT32_C( 1252307439), INT32_C( -251229086), INT32_C( 1747655874), INT32_C( 1975156439), INT32_C( 244588822), INT32_C(-2030488384), INT32_C( -41784348)), simde_mm256_set_epi32(INT32_C( 126704158), INT32_C(-1817043660), INT32_C( 67229317), INT32_C( 1709840887), INT32_C( -401701180), INT32_C(-1816869894), INT32_C( -356935412), INT32_C( 1029497038)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( 0)) }, { simde_mm256_set_epi32(INT32_C(-1821031073), INT32_C(-1888821389), INT32_C( 1302274763), INT32_C( -889069785), INT32_C( 849224332), INT32_C( 1448513971), INT32_C(-1444618137), INT32_C( -857531383)), simde_mm256_set_epi32(INT32_C( -294182987), INT32_C(-1911426210), INT32_C( 1273887477), INT32_C( -804272895), INT32_C(-1138168885), INT32_C(-1273049807), INT32_C( -920424822), INT32_C( 75748881)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( 0)) }, { simde_mm256_set_epi32(INT32_C( -145989095), INT32_C( 1224886052), INT32_C(-1270092347), INT32_C(-2004839127), INT32_C( -805168375), INT32_C(-2053011497), INT32_C( 926069792), INT32_C( -523797780)), simde_mm256_set_epi32(INT32_C( 1782736183), INT32_C( 1196895214), INT32_C( 2023829967), INT32_C( 431966763), INT32_C(-1279096565), INT32_C(-1568278654), INT32_C( -225755322), INT32_C( 800135833)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( 0)) }, { simde_mm256_set_epi32(INT32_C( 45181175), INT32_C( 2067174734), INT32_C(-1046962322), INT32_C( -770870625), INT32_C( 166196701), INT32_C(-1211694319), INT32_C( 1891438592), INT32_C(-1986783509)), simde_mm256_set_epi32(INT32_C( 1357811899), INT32_C( -666006860), INT32_C( -78093915), INT32_C(-1784875915), INT32_C(-1592390514), INT32_C( -295345562), INT32_C(-1194315003), INT32_C( -378378269)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( 0)) }, { simde_mm256_set_epi32(INT32_C( 2029570724), INT32_C( 593067919), INT32_C( 1378001411), INT32_C(-1490077172), INT32_C( -985008502), INT32_C( -780769236), INT32_C( 983706005), INT32_C( -145048806)), simde_mm256_set_epi32(INT32_C( -371614648), INT32_C( 790968401), INT32_C( 604322541), INT32_C( 1275463353), INT32_C( 507685025), INT32_C(-1785065497), INT32_C( -324987069), INT32_C( 1403024939)), simde_mm256_set_epi32(INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 0)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_cmpgt_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_cmpgt_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi64x(INT64_C( 2118945800826688975), INT64_C(-2048024407550915063), INT64_C( 5214505670652994271), INT64_C( -257856994503089701)), simde_mm256_set_epi64x(INT64_C( 5695011750507465952), INT64_C( -909596801509157384), INT64_C( 8030198869340496548), INT64_C( 154404577117411578)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, { simde_mm256_set_epi64x(INT64_C( 5911350820320548021), INT64_C( 1593992970237440641), INT64_C( 1816057404225443518), INT64_C(-5807847363009183486)), simde_mm256_set_epi64x(INT64_C( 6716528373762493386), INT64_C(-8043125522610464194), INT64_C( 8780083716644576089), INT64_C( 1141738695323567528)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C( -1), INT64_C( 0), INT64_C( 0)) }, { simde_mm256_set_epi64x(INT64_C( 2128200156105206458), INT64_C( 4268557007045133354), INT64_C( 922795411729807384), INT64_C( -370673782849908924)), simde_mm256_set_epi64x(INT64_C(-4606711496612992363), INT64_C( 5770563142009272389), INT64_C( 5439573582391425757), INT64_C( 3818549561656095124)), simde_mm256_set_epi64x(INT64_C( -1), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, { simde_mm256_set_epi64x(INT64_C(-7763872078520682010), INT64_C( 8214461135943818795), INT64_C( 6913361685886042209), INT64_C( 4675504907089870645)), simde_mm256_set_epi64x(INT64_C( 4559571845325885325), INT64_C(-5258431881764629271), INT64_C( 5559569102824249097), INT64_C( 5799667037503013333)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C( -1), INT64_C( -1), INT64_C( 0)) }, { simde_mm256_set_epi64x(INT64_C( 4800525853411880003), INT64_C( -184846840160593290), INT64_C(-4953203027402144401), INT64_C( 2572609663077992835)), simde_mm256_set_epi64x(INT64_C(-5431416973650143300), INT64_C( 5030179310017915920), INT64_C(-6125011459628645754), INT64_C(-8488302794525901027)), simde_mm256_set_epi64x(INT64_C( -1), INT64_C( 0), INT64_C( -1), INT64_C( -1)) }, { simde_mm256_set_epi64x(INT64_C(-3860235829589315850), INT64_C( 8837948345888719516), INT64_C( 3875197176959192831), INT64_C(-5180468743599698974)), simde_mm256_set_epi64x(INT64_C(-1290331525121336992), INT64_C(-2797732422128290053), INT64_C(-1679378228033738615), INT64_C(-7981001780127731934)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C( -1), INT64_C( -1), INT64_C( -1)) }, { simde_mm256_set_epi64x(INT64_C(-1174099241428997420), INT64_C( -973237140110744253), INT64_C(-2478103055519574033), INT64_C(-4160968278707001293)), simde_mm256_set_epi64x(INT64_C(-5836788839917359935), INT64_C( 9029070361682465515), INT64_C( 7441474531728878768), INT64_C(-8910660672377811998)), simde_mm256_set_epi64x(INT64_C( -1), INT64_C( 0), INT64_C( 0), INT64_C( -1)) }, { simde_mm256_set_epi64x(INT64_C( 660412182468938166), INT64_C( 2628509021710534078), INT64_C(-4480617326594733579), INT64_C( 6124790654743076244)), simde_mm256_set_epi64x(INT64_C(-2572783283758925366), INT64_C( 8355777023791813662), INT64_C( 7632362282544129149), INT64_C(-5998036421176707691)), simde_mm256_set_epi64x(INT64_C( -1), INT64_C( 0), INT64_C( 0), INT64_C( -1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_cmpgt_epi64(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_broadcastb_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( -20), INT8_C( 103), INT8_C( -20), INT8_C( 116), INT8_C( -9), INT8_C( 73), INT8_C( 44), INT8_C( 79), INT8_C( -20), INT8_C( -81), INT8_C(-114), INT8_C( -81), INT8_C( 10), INT8_C( 63), INT8_C( -41), INT8_C(-117)), simde_mm_set_epi8(INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117)) }, { simde_mm_set_epi8(INT8_C( -22), INT8_C(-124), INT8_C( -97), INT8_C( -1), INT8_C( -4), INT8_C( -87), INT8_C( -49), INT8_C(-124), INT8_C( -37), INT8_C( -17), INT8_C( -57), INT8_C( 0), INT8_C(-121), INT8_C( 57), INT8_C( 49), INT8_C(-112)), simde_mm_set_epi8(INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112)) }, { simde_mm_set_epi8(INT8_C( -37), INT8_C( -43), INT8_C( -30), INT8_C( 45), INT8_C( 86), INT8_C(-100), INT8_C( -53), INT8_C( -4), INT8_C( 54), INT8_C(-107), INT8_C( 40), INT8_C( -68), INT8_C( 49), INT8_C( 59), INT8_C( -37), INT8_C( 60)), simde_mm_set_epi8(INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60)) }, { simde_mm_set_epi8(INT8_C( 35), INT8_C( -98), INT8_C( -73), INT8_C( 22), INT8_C(-127), INT8_C( 78), INT8_C( 125), INT8_C( 84), INT8_C(-115), INT8_C(-124), INT8_C(-103), INT8_C( 59), INT8_C( 15), INT8_C( -58), INT8_C( 81), INT8_C( 4)), simde_mm_set_epi8(INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4)) }, { simde_mm_set_epi8(INT8_C( -56), INT8_C( 88), INT8_C( 1), INT8_C( -59), INT8_C( -14), INT8_C( -27), INT8_C( -63), INT8_C( -89), INT8_C( -31), INT8_C( -81), INT8_C( -92), INT8_C( 115), INT8_C( 106), INT8_C( -49), INT8_C( -19), INT8_C( 92)), simde_mm_set_epi8(INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92)) }, { simde_mm_set_epi8(INT8_C( 7), INT8_C( 47), INT8_C( 55), INT8_C( 77), INT8_C( 16), INT8_C( -71), INT8_C(-122), INT8_C( -81), INT8_C(-120), INT8_C( -64), INT8_C( -77), INT8_C( 13), INT8_C( 80), INT8_C(-114), INT8_C(-121), INT8_C( 92)), simde_mm_set_epi8(INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92)) }, { simde_mm_set_epi8(INT8_C( 14), INT8_C( -85), INT8_C( -8), INT8_C( -80), INT8_C( -58), INT8_C( 84), INT8_C( 52), INT8_C(-106), INT8_C( -46), INT8_C( 118), INT8_C( -96), INT8_C( 88), INT8_C( 35), INT8_C( 50), INT8_C(-112), INT8_C(-112)), simde_mm_set_epi8(INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112)) }, { simde_mm_set_epi8(INT8_C( 17), INT8_C( 88), INT8_C( -82), INT8_C(-109), INT8_C( 56), INT8_C( 18), INT8_C( 73), INT8_C( 115), INT8_C( -99), INT8_C( -71), INT8_C( -92), INT8_C( 121), INT8_C( 93), INT8_C( 51), INT8_C( -65), INT8_C( 55)), simde_mm_set_epi8(INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_broadcastb_epi8(test_vec[i].a); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_broadcastb_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m256i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( -20), INT8_C( 103), INT8_C( -20), INT8_C( 116), INT8_C( -9), INT8_C( 73), INT8_C( 44), INT8_C( 79), INT8_C( -20), INT8_C( -81), INT8_C(-114), INT8_C( -81), INT8_C( 10), INT8_C( 63), INT8_C( -41), INT8_C(-117)), simde_mm256_set_epi8(INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117)) }, { simde_mm_set_epi8(INT8_C( -22), INT8_C(-124), INT8_C( -97), INT8_C( -1), INT8_C( -4), INT8_C( -87), INT8_C( -49), INT8_C(-124), INT8_C( -37), INT8_C( -17), INT8_C( -57), INT8_C( 0), INT8_C(-121), INT8_C( 57), INT8_C( 49), INT8_C(-112)), simde_mm256_set_epi8(INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112)) }, { simde_mm_set_epi8(INT8_C( -37), INT8_C( -43), INT8_C( -30), INT8_C( 45), INT8_C( 86), INT8_C(-100), INT8_C( -53), INT8_C( -4), INT8_C( 54), INT8_C(-107), INT8_C( 40), INT8_C( -68), INT8_C( 49), INT8_C( 59), INT8_C( -37), INT8_C( 60)), simde_mm256_set_epi8(INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60)) }, { simde_mm_set_epi8(INT8_C( 35), INT8_C( -98), INT8_C( -73), INT8_C( 22), INT8_C(-127), INT8_C( 78), INT8_C( 125), INT8_C( 84), INT8_C(-115), INT8_C(-124), INT8_C(-103), INT8_C( 59), INT8_C( 15), INT8_C( -58), INT8_C( 81), INT8_C( 4)), simde_mm256_set_epi8(INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4), INT8_C( 4)) }, { simde_mm_set_epi8(INT8_C( -56), INT8_C( 88), INT8_C( 1), INT8_C( -59), INT8_C( -14), INT8_C( -27), INT8_C( -63), INT8_C( -89), INT8_C( -31), INT8_C( -81), INT8_C( -92), INT8_C( 115), INT8_C( 106), INT8_C( -49), INT8_C( -19), INT8_C( 92)), simde_mm256_set_epi8(INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92)) }, { simde_mm_set_epi8(INT8_C( 7), INT8_C( 47), INT8_C( 55), INT8_C( 77), INT8_C( 16), INT8_C( -71), INT8_C(-122), INT8_C( -81), INT8_C(-120), INT8_C( -64), INT8_C( -77), INT8_C( 13), INT8_C( 80), INT8_C(-114), INT8_C(-121), INT8_C( 92)), simde_mm256_set_epi8(INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92), INT8_C( 92)) }, { simde_mm_set_epi8(INT8_C( 14), INT8_C( -85), INT8_C( -8), INT8_C( -80), INT8_C( -58), INT8_C( 84), INT8_C( 52), INT8_C(-106), INT8_C( -46), INT8_C( 118), INT8_C( -96), INT8_C( 88), INT8_C( 35), INT8_C( 50), INT8_C(-112), INT8_C(-112)), simde_mm256_set_epi8(INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112), INT8_C(-112)) }, { simde_mm_set_epi8(INT8_C( 17), INT8_C( 88), INT8_C( -82), INT8_C(-109), INT8_C( 56), INT8_C( 18), INT8_C( 73), INT8_C( 115), INT8_C( -99), INT8_C( -71), INT8_C( -92), INT8_C( 121), INT8_C( 93), INT8_C( 51), INT8_C( -65), INT8_C( 55)), simde_mm256_set_epi8(INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_broadcastb_epi8(test_vec[i].a); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_broadcastw_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C(-14724), INT16_C(-11263), INT16_C(-14102), INT16_C( 6431), INT16_C( 11838), INT16_C( -2695), INT16_C(-11290), INT16_C( 22147)), simde_mm_set_epi16(INT16_C( 22147), INT16_C( 22147), INT16_C( 22147), INT16_C( 22147), INT16_C( 22147), INT16_C( 22147), INT16_C( 22147), INT16_C( 22147)) }, { simde_mm_set_epi16(INT16_C(-16582), INT16_C(-25927), INT16_C(-23837), INT16_C( 29287), INT16_C(-19772), INT16_C( 13452), INT16_C( -3861), INT16_C( -4316)), simde_mm_set_epi16(INT16_C( -4316), INT16_C( -4316), INT16_C( -4316), INT16_C( -4316), INT16_C( -4316), INT16_C( -4316), INT16_C( -4316), INT16_C( -4316)) }, { simde_mm_set_epi16(INT16_C(-22817), INT16_C(-13842), INT16_C(-28521), INT16_C( 14250), INT16_C( 16773), INT16_C(-17571), INT16_C( 16721), INT16_C(-24996)), simde_mm_set_epi16(INT16_C(-24996), INT16_C(-24996), INT16_C(-24996), INT16_C(-24996), INT16_C(-24996), INT16_C(-24996), INT16_C(-24996), INT16_C(-24996)) }, { simde_mm_set_epi16(INT16_C( -7252), INT16_C( -8727), INT16_C( 7341), INT16_C( 25148), INT16_C(-18544), INT16_C( 21940), INT16_C( 7393), INT16_C( -5844)), simde_mm_set_epi16(INT16_C( -5844), INT16_C( -5844), INT16_C( -5844), INT16_C( -5844), INT16_C( -5844), INT16_C( -5844), INT16_C( -5844), INT16_C( -5844)) }, { simde_mm_set_epi16(INT16_C(-20117), INT16_C(-17614), INT16_C( 27322), INT16_C(-16415), INT16_C( 21044), INT16_C( 32548), INT16_C( 27672), INT16_C( 26452)), simde_mm_set_epi16(INT16_C( 26452), INT16_C( 26452), INT16_C( 26452), INT16_C( 26452), INT16_C( 26452), INT16_C( 26452), INT16_C( 26452), INT16_C( 26452)) }, { simde_mm_set_epi16(INT16_C( 15651), INT16_C( -9541), INT16_C( -1245), INT16_C( 19639), INT16_C( -7877), INT16_C(-31496), INT16_C( -9293), INT16_C( 24923)), simde_mm_set_epi16(INT16_C( 24923), INT16_C( 24923), INT16_C( 24923), INT16_C( 24923), INT16_C( 24923), INT16_C( 24923), INT16_C( 24923), INT16_C( 24923)) }, { simde_mm_set_epi16(INT16_C( -1558), INT16_C( 24882), INT16_C(-27020), INT16_C( -1908), INT16_C( 5712), INT16_C( 21207), INT16_C( 4307), INT16_C( 9371)), simde_mm_set_epi16(INT16_C( 9371), INT16_C( 9371), INT16_C( 9371), INT16_C( 9371), INT16_C( 9371), INT16_C( 9371), INT16_C( 9371), INT16_C( 9371)) }, { simde_mm_set_epi16(INT16_C( 28347), INT16_C(-26396), INT16_C(-17550), INT16_C(-16266), INT16_C(-10219), INT16_C( 6683), INT16_C(-28102), INT16_C( 11110)), simde_mm_set_epi16(INT16_C( 11110), INT16_C( 11110), INT16_C( 11110), INT16_C( 11110), INT16_C( 11110), INT16_C( 11110), INT16_C( 11110), INT16_C( 11110)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_broadcastw_epi16(test_vec[i].a); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_broadcastw_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m256i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C(-14724), INT16_C(-11263), INT16_C(-14102), INT16_C( 6431), INT16_C( 11838), INT16_C( -2695), INT16_C(-11290), INT16_C( 22147)), simde_mm256_set_epi16(INT16_C( 22147), INT16_C( 22147), INT16_C( 22147), INT16_C( 22147), INT16_C( 22147), INT16_C( 22147), INT16_C( 22147), INT16_C( 22147), INT16_C( 22147), INT16_C( 22147), INT16_C( 22147), INT16_C( 22147), INT16_C( 22147), INT16_C( 22147), INT16_C( 22147), INT16_C( 22147)) }, { simde_mm_set_epi16(INT16_C(-16582), INT16_C(-25927), INT16_C(-23837), INT16_C( 29287), INT16_C(-19772), INT16_C( 13452), INT16_C( -3861), INT16_C( -4316)), simde_mm256_set_epi16(INT16_C( -4316), INT16_C( -4316), INT16_C( -4316), INT16_C( -4316), INT16_C( -4316), INT16_C( -4316), INT16_C( -4316), INT16_C( -4316), INT16_C( -4316), INT16_C( -4316), INT16_C( -4316), INT16_C( -4316), INT16_C( -4316), INT16_C( -4316), INT16_C( -4316), INT16_C( -4316)) }, { simde_mm_set_epi16(INT16_C(-22817), INT16_C(-13842), INT16_C(-28521), INT16_C( 14250), INT16_C( 16773), INT16_C(-17571), INT16_C( 16721), INT16_C(-24996)), simde_mm256_set_epi16(INT16_C(-24996), INT16_C(-24996), INT16_C(-24996), INT16_C(-24996), INT16_C(-24996), INT16_C(-24996), INT16_C(-24996), INT16_C(-24996), INT16_C(-24996), INT16_C(-24996), INT16_C(-24996), INT16_C(-24996), INT16_C(-24996), INT16_C(-24996), INT16_C(-24996), INT16_C(-24996)) }, { simde_mm_set_epi16(INT16_C( -7252), INT16_C( -8727), INT16_C( 7341), INT16_C( 25148), INT16_C(-18544), INT16_C( 21940), INT16_C( 7393), INT16_C( -5844)), simde_mm256_set_epi16(INT16_C( -5844), INT16_C( -5844), INT16_C( -5844), INT16_C( -5844), INT16_C( -5844), INT16_C( -5844), INT16_C( -5844), INT16_C( -5844), INT16_C( -5844), INT16_C( -5844), INT16_C( -5844), INT16_C( -5844), INT16_C( -5844), INT16_C( -5844), INT16_C( -5844), INT16_C( -5844)) }, { simde_mm_set_epi16(INT16_C(-20117), INT16_C(-17614), INT16_C( 27322), INT16_C(-16415), INT16_C( 21044), INT16_C( 32548), INT16_C( 27672), INT16_C( 26452)), simde_mm256_set_epi16(INT16_C( 26452), INT16_C( 26452), INT16_C( 26452), INT16_C( 26452), INT16_C( 26452), INT16_C( 26452), INT16_C( 26452), INT16_C( 26452), INT16_C( 26452), INT16_C( 26452), INT16_C( 26452), INT16_C( 26452), INT16_C( 26452), INT16_C( 26452), INT16_C( 26452), INT16_C( 26452)) }, { simde_mm_set_epi16(INT16_C( 15651), INT16_C( -9541), INT16_C( -1245), INT16_C( 19639), INT16_C( -7877), INT16_C(-31496), INT16_C( -9293), INT16_C( 24923)), simde_mm256_set_epi16(INT16_C( 24923), INT16_C( 24923), INT16_C( 24923), INT16_C( 24923), INT16_C( 24923), INT16_C( 24923), INT16_C( 24923), INT16_C( 24923), INT16_C( 24923), INT16_C( 24923), INT16_C( 24923), INT16_C( 24923), INT16_C( 24923), INT16_C( 24923), INT16_C( 24923), INT16_C( 24923)) }, { simde_mm_set_epi16(INT16_C( -1558), INT16_C( 24882), INT16_C(-27020), INT16_C( -1908), INT16_C( 5712), INT16_C( 21207), INT16_C( 4307), INT16_C( 9371)), simde_mm256_set_epi16(INT16_C( 9371), INT16_C( 9371), INT16_C( 9371), INT16_C( 9371), INT16_C( 9371), INT16_C( 9371), INT16_C( 9371), INT16_C( 9371), INT16_C( 9371), INT16_C( 9371), INT16_C( 9371), INT16_C( 9371), INT16_C( 9371), INT16_C( 9371), INT16_C( 9371), INT16_C( 9371)) }, { simde_mm_set_epi16(INT16_C( 28347), INT16_C(-26396), INT16_C(-17550), INT16_C(-16266), INT16_C(-10219), INT16_C( 6683), INT16_C(-28102), INT16_C( 11110)), simde_mm256_set_epi16(INT16_C( 11110), INT16_C( 11110), INT16_C( 11110), INT16_C( 11110), INT16_C( 11110), INT16_C( 11110), INT16_C( 11110), INT16_C( 11110), INT16_C( 11110), INT16_C( 11110), INT16_C( 11110), INT16_C( 11110), INT16_C( 11110), INT16_C( 11110), INT16_C( 11110), INT16_C( 11110)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_broadcastw_epi16(test_vec[i].a); simde_assert_m256i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_broadcastd_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( -964897791), INT32_C( -924182241), INT32_C( 775878009), INT32_C( -739879293)), simde_mm_set_epi32(INT32_C( -739879293), INT32_C( -739879293), INT32_C( -739879293), INT32_C( -739879293)) }, { simde_mm_set_epi32(INT32_C(-1086678343), INT32_C(-1562152345), INT32_C(-1295764340), INT32_C( -252973276)), simde_mm_set_epi32(INT32_C( -252973276), INT32_C( -252973276), INT32_C( -252973276), INT32_C( -252973276)) }, { simde_mm_set_epi32(INT32_C(-1495283218), INT32_C(-1869138006), INT32_C( 1099283293), INT32_C( 1095867996)), simde_mm_set_epi32(INT32_C( 1095867996), INT32_C( 1095867996), INT32_C( 1095867996), INT32_C( 1095867996)) }, { simde_mm_set_epi32(INT32_C( -475210263), INT32_C( 481124924), INT32_C(-1215277644), INT32_C( 484567340)), simde_mm_set_epi32(INT32_C( 484567340), INT32_C( 484567340), INT32_C( 484567340), INT32_C( 484567340)) }, { simde_mm_set_epi32(INT32_C(-1318339790), INT32_C( 1790623713), INT32_C( 1379172132), INT32_C( 1813538644)), simde_mm_set_epi32(INT32_C( 1813538644), INT32_C( 1813538644), INT32_C( 1813538644), INT32_C( 1813538644)) }, { simde_mm_set_epi32(INT32_C( 1025759931), INT32_C( -81572681), INT32_C( -516193032), INT32_C( -609001125)), simde_mm_set_epi32(INT32_C( -609001125), INT32_C( -609001125), INT32_C( -609001125), INT32_C( -609001125)) }, { simde_mm_set_epi32(INT32_C( -102080206), INT32_C(-1770719092), INT32_C( 374362839), INT32_C( 282272923)), simde_mm_set_epi32(INT32_C( 282272923), INT32_C( 282272923), INT32_C( 282272923), INT32_C( 282272923)) }, { simde_mm_set_epi32(INT32_C( 1857788132), INT32_C(-1150107530), INT32_C( -669705701), INT32_C(-1841681562)), simde_mm_set_epi32(INT32_C(-1841681562), INT32_C(-1841681562), INT32_C(-1841681562), INT32_C(-1841681562)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_broadcastd_epi32(test_vec[i].a); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_broadcastd_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m256i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( -964897791), INT32_C( -924182241), INT32_C( 775878009), INT32_C( -739879293)), simde_mm256_set_epi32(INT32_C( -739879293), INT32_C( -739879293), INT32_C( -739879293), INT32_C( -739879293), INT32_C( -739879293), INT32_C( -739879293), INT32_C( -739879293), INT32_C( -739879293)) }, { simde_mm_set_epi32(INT32_C(-1086678343), INT32_C(-1562152345), INT32_C(-1295764340), INT32_C( -252973276)), simde_mm256_set_epi32(INT32_C( -252973276), INT32_C( -252973276), INT32_C( -252973276), INT32_C( -252973276), INT32_C( -252973276), INT32_C( -252973276), INT32_C( -252973276), INT32_C( -252973276)) }, { simde_mm_set_epi32(INT32_C(-1495283218), INT32_C(-1869138006), INT32_C( 1099283293), INT32_C( 1095867996)), simde_mm256_set_epi32(INT32_C( 1095867996), INT32_C( 1095867996), INT32_C( 1095867996), INT32_C( 1095867996), INT32_C( 1095867996), INT32_C( 1095867996), INT32_C( 1095867996), INT32_C( 1095867996)) }, { simde_mm_set_epi32(INT32_C( -475210263), INT32_C( 481124924), INT32_C(-1215277644), INT32_C( 484567340)), simde_mm256_set_epi32(INT32_C( 484567340), INT32_C( 484567340), INT32_C( 484567340), INT32_C( 484567340), INT32_C( 484567340), INT32_C( 484567340), INT32_C( 484567340), INT32_C( 484567340)) }, { simde_mm_set_epi32(INT32_C(-1318339790), INT32_C( 1790623713), INT32_C( 1379172132), INT32_C( 1813538644)), simde_mm256_set_epi32(INT32_C( 1813538644), INT32_C( 1813538644), INT32_C( 1813538644), INT32_C( 1813538644), INT32_C( 1813538644), INT32_C( 1813538644), INT32_C( 1813538644), INT32_C( 1813538644)) }, { simde_mm_set_epi32(INT32_C( 1025759931), INT32_C( -81572681), INT32_C( -516193032), INT32_C( -609001125)), simde_mm256_set_epi32(INT32_C( -609001125), INT32_C( -609001125), INT32_C( -609001125), INT32_C( -609001125), INT32_C( -609001125), INT32_C( -609001125), INT32_C( -609001125), INT32_C( -609001125)) }, { simde_mm_set_epi32(INT32_C( -102080206), INT32_C(-1770719092), INT32_C( 374362839), INT32_C( 282272923)), simde_mm256_set_epi32(INT32_C( 282272923), INT32_C( 282272923), INT32_C( 282272923), INT32_C( 282272923), INT32_C( 282272923), INT32_C( 282272923), INT32_C( 282272923), INT32_C( 282272923)) }, { simde_mm_set_epi32(INT32_C( 1857788132), INT32_C(-1150107530), INT32_C( -669705701), INT32_C(-1841681562)), simde_mm256_set_epi32(INT32_C(-1841681562), INT32_C(-1841681562), INT32_C(-1841681562), INT32_C(-1841681562), INT32_C(-1841681562), INT32_C(-1841681562), INT32_C(-1841681562), INT32_C(-1841681562)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_broadcastd_epi32(test_vec[i].a); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_broadcastq_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi64x(INT64_C(-4144204452956858081), INT64_C( 3332370677895681667)), simde_mm_set_epi64x(INT64_C( 3332370677895681667), INT64_C( 3332370677895681667)) }, { simde_mm_set_epi64x(INT64_C(-4667247941723655577), INT64_C(-5565265459581030620)), simde_mm_set_epi64x(INT64_C(-5565265459581030620), INT64_C(-5565265459581030620)) }, { simde_mm_set_epi64x(INT64_C(-6422192517141809238), INT64_C( 4721385793570053724)), simde_mm_set_epi64x(INT64_C( 4721385793570053724), INT64_C( 4721385793570053724)) }, { simde_mm_set_epi64x(INT64_C(-2041012537827433924), INT64_C(-5219577736055363284)), simde_mm_set_epi64x(INT64_C(-5219577736055363284), INT64_C(-5219577736055363284)) }, { simde_mm_set_epi64x(INT64_C(-5662226281274884127), INT64_C( 5923499204308133716)), simde_mm_set_epi64x(INT64_C( 5923499204308133716), INT64_C( 5923499204308133716)) }, { simde_mm_set_epi64x(INT64_C( 4405605361405611191), INT64_C(-2217032187177115301)), simde_mm_set_epi64x(INT64_C(-2217032187177115301), INT64_C(-2217032187177115301)) }, { simde_mm_set_epi64x(INT64_C( -438431143814694772), INT64_C( 1607876150624986267)), simde_mm_set_epi64x(INT64_C( 1607876150624986267), INT64_C( 1607876150624986267)) }, { simde_mm_set_epi64x(INT64_C( 7979139272981790838), INT64_C(-2876364081286468762)), simde_mm_set_epi64x(INT64_C(-2876364081286468762), INT64_C(-2876364081286468762)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_broadcastq_epi64(test_vec[i].a); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_broadcastq_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m256i r; } test_vec[8] = { { simde_mm_set_epi64x(INT64_C(-4144204452956858081), INT64_C( 3332370677895681667)), simde_mm256_set_epi64x(INT64_C( 3332370677895681667), INT64_C( 3332370677895681667), INT64_C( 3332370677895681667), INT64_C( 3332370677895681667)) }, { simde_mm_set_epi64x(INT64_C(-4667247941723655577), INT64_C(-5565265459581030620)), simde_mm256_set_epi64x(INT64_C(-5565265459581030620), INT64_C(-5565265459581030620), INT64_C(-5565265459581030620), INT64_C(-5565265459581030620)) }, { simde_mm_set_epi64x(INT64_C(-6422192517141809238), INT64_C( 4721385793570053724)), simde_mm256_set_epi64x(INT64_C( 4721385793570053724), INT64_C( 4721385793570053724), INT64_C( 4721385793570053724), INT64_C( 4721385793570053724)) }, { simde_mm_set_epi64x(INT64_C(-2041012537827433924), INT64_C(-5219577736055363284)), simde_mm256_set_epi64x(INT64_C(-5219577736055363284), INT64_C(-5219577736055363284), INT64_C(-5219577736055363284), INT64_C(-5219577736055363284)) }, { simde_mm_set_epi64x(INT64_C(-5662226281274884127), INT64_C( 5923499204308133716)), simde_mm256_set_epi64x(INT64_C( 5923499204308133716), INT64_C( 5923499204308133716), INT64_C( 5923499204308133716), INT64_C( 5923499204308133716)) }, { simde_mm_set_epi64x(INT64_C( 4405605361405611191), INT64_C(-2217032187177115301)), simde_mm256_set_epi64x(INT64_C(-2217032187177115301), INT64_C(-2217032187177115301), INT64_C(-2217032187177115301), INT64_C(-2217032187177115301)) }, { simde_mm_set_epi64x(INT64_C( -438431143814694772), INT64_C( 1607876150624986267)), simde_mm256_set_epi64x(INT64_C( 1607876150624986267), INT64_C( 1607876150624986267), INT64_C( 1607876150624986267), INT64_C( 1607876150624986267)) }, { simde_mm_set_epi64x(INT64_C( 7979139272981790838), INT64_C(-2876364081286468762)), simde_mm256_set_epi64x(INT64_C(-2876364081286468762), INT64_C(-2876364081286468762), INT64_C(-2876364081286468762), INT64_C(-2876364081286468762)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_broadcastq_epi64(test_vec[i].a); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_broadcastss_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 550.68), SIMDE_FLOAT32_C( 569.64), SIMDE_FLOAT32_C( -638.70), SIMDE_FLOAT32_C( 655.47)), simde_mm_set_ps(SIMDE_FLOAT32_C( 655.47), SIMDE_FLOAT32_C( 655.47), SIMDE_FLOAT32_C( 655.47), SIMDE_FLOAT32_C( 655.47)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 493.98), SIMDE_FLOAT32_C( 272.57), SIMDE_FLOAT32_C( 396.61), SIMDE_FLOAT32_C( 882.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( 882.20), SIMDE_FLOAT32_C( 882.20), SIMDE_FLOAT32_C( 882.20), SIMDE_FLOAT32_C( 882.20)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 303.70), SIMDE_FLOAT32_C( 129.61), SIMDE_FLOAT32_C( -488.11), SIMDE_FLOAT32_C( -489.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( -489.70), SIMDE_FLOAT32_C( -489.70), SIMDE_FLOAT32_C( -489.70), SIMDE_FLOAT32_C( -489.70)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 778.71), SIMDE_FLOAT32_C( -775.96), SIMDE_FLOAT32_C( 434.09), SIMDE_FLOAT32_C( -774.36)), simde_mm_set_ps(SIMDE_FLOAT32_C( -774.36), SIMDE_FLOAT32_C( -774.36), SIMDE_FLOAT32_C( -774.36), SIMDE_FLOAT32_C( -774.36)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 386.10), SIMDE_FLOAT32_C( -166.18), SIMDE_FLOAT32_C( -357.77), SIMDE_FLOAT32_C( -155.51)), simde_mm_set_ps(SIMDE_FLOAT32_C( -155.51), SIMDE_FLOAT32_C( -155.51), SIMDE_FLOAT32_C( -155.51), SIMDE_FLOAT32_C( -155.51)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -522.34), SIMDE_FLOAT32_C( 962.01), SIMDE_FLOAT32_C( 759.63), SIMDE_FLOAT32_C( 716.41)), simde_mm_set_ps(SIMDE_FLOAT32_C( 716.41), SIMDE_FLOAT32_C( 716.41), SIMDE_FLOAT32_C( 716.41), SIMDE_FLOAT32_C( 716.41)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 952.47), SIMDE_FLOAT32_C( 175.44), SIMDE_FLOAT32_C( -825.67), SIMDE_FLOAT32_C( -868.56)), simde_mm_set_ps(SIMDE_FLOAT32_C( -868.56), SIMDE_FLOAT32_C( -868.56), SIMDE_FLOAT32_C( -868.56), SIMDE_FLOAT32_C( -868.56)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -134.90), SIMDE_FLOAT32_C( 464.44), SIMDE_FLOAT32_C( 688.14), SIMDE_FLOAT32_C( 142.40)), simde_mm_set_ps(SIMDE_FLOAT32_C( 142.40), SIMDE_FLOAT32_C( 142.40), SIMDE_FLOAT32_C( 142.40), SIMDE_FLOAT32_C( 142.40)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_broadcastss_ps(test_vec[i].a); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_broadcastss_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m256 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 550.68), SIMDE_FLOAT32_C( 569.64), SIMDE_FLOAT32_C( -638.70), SIMDE_FLOAT32_C( 655.47)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 655.47), SIMDE_FLOAT32_C( 655.47), SIMDE_FLOAT32_C( 655.47), SIMDE_FLOAT32_C( 655.47), SIMDE_FLOAT32_C( 655.47), SIMDE_FLOAT32_C( 655.47), SIMDE_FLOAT32_C( 655.47), SIMDE_FLOAT32_C( 655.47)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 303.70), SIMDE_FLOAT32_C( 129.61), SIMDE_FLOAT32_C( -488.11), SIMDE_FLOAT32_C( -489.70)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -489.70), SIMDE_FLOAT32_C( -489.70), SIMDE_FLOAT32_C( -489.70), SIMDE_FLOAT32_C( -489.70), SIMDE_FLOAT32_C( -489.70), SIMDE_FLOAT32_C( -489.70), SIMDE_FLOAT32_C( -489.70), SIMDE_FLOAT32_C( -489.70)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 386.10), SIMDE_FLOAT32_C( -166.18), SIMDE_FLOAT32_C( -357.77), SIMDE_FLOAT32_C( -155.51)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -155.51), SIMDE_FLOAT32_C( -155.51), SIMDE_FLOAT32_C( -155.51), SIMDE_FLOAT32_C( -155.51), SIMDE_FLOAT32_C( -155.51), SIMDE_FLOAT32_C( -155.51), SIMDE_FLOAT32_C( -155.51), SIMDE_FLOAT32_C( -155.51)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 952.47), SIMDE_FLOAT32_C( 175.44), SIMDE_FLOAT32_C( -825.67), SIMDE_FLOAT32_C( -868.56)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -868.56), SIMDE_FLOAT32_C( -868.56), SIMDE_FLOAT32_C( -868.56), SIMDE_FLOAT32_C( -868.56), SIMDE_FLOAT32_C( -868.56), SIMDE_FLOAT32_C( -868.56), SIMDE_FLOAT32_C( -868.56), SIMDE_FLOAT32_C( -868.56)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -766.17), SIMDE_FLOAT32_C( 812.79), SIMDE_FLOAT32_C( -842.83), SIMDE_FLOAT32_C( -99.19)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -99.19), SIMDE_FLOAT32_C( -99.19), SIMDE_FLOAT32_C( -99.19), SIMDE_FLOAT32_C( -99.19), SIMDE_FLOAT32_C( -99.19), SIMDE_FLOAT32_C( -99.19), SIMDE_FLOAT32_C( -99.19), SIMDE_FLOAT32_C( -99.19)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -434.84), SIMDE_FLOAT32_C( -337.86), SIMDE_FLOAT32_C( 127.40), SIMDE_FLOAT32_C( 235.90)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 235.90), SIMDE_FLOAT32_C( 235.90), SIMDE_FLOAT32_C( 235.90), SIMDE_FLOAT32_C( 235.90), SIMDE_FLOAT32_C( 235.90), SIMDE_FLOAT32_C( 235.90), SIMDE_FLOAT32_C( 235.90), SIMDE_FLOAT32_C( 235.90)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -349.94), SIMDE_FLOAT32_C( 403.87), SIMDE_FLOAT32_C( -69.00), SIMDE_FLOAT32_C( 494.83)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 494.83), SIMDE_FLOAT32_C( 494.83), SIMDE_FLOAT32_C( 494.83), SIMDE_FLOAT32_C( 494.83), SIMDE_FLOAT32_C( 494.83), SIMDE_FLOAT32_C( 494.83), SIMDE_FLOAT32_C( 494.83), SIMDE_FLOAT32_C( 494.83)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 86.40), SIMDE_FLOAT32_C( 641.98), SIMDE_FLOAT32_C( -789.02), SIMDE_FLOAT32_C( -272.67)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -272.67), SIMDE_FLOAT32_C( -272.67), SIMDE_FLOAT32_C( -272.67), SIMDE_FLOAT32_C( -272.67), SIMDE_FLOAT32_C( -272.67), SIMDE_FLOAT32_C( -272.67), SIMDE_FLOAT32_C( -272.67), SIMDE_FLOAT32_C( -272.67)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_broadcastss_ps(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_broadcastsd_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -638.70), SIMDE_FLOAT64_C( 655.47)), simde_mm_set_pd(SIMDE_FLOAT64_C( 655.47), SIMDE_FLOAT64_C( 655.47)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 550.68), SIMDE_FLOAT64_C( 569.64)), simde_mm_set_pd(SIMDE_FLOAT64_C( 569.64), SIMDE_FLOAT64_C( 569.64)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 396.61), SIMDE_FLOAT64_C( 882.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( 882.20), SIMDE_FLOAT64_C( 882.20)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 493.98), SIMDE_FLOAT64_C( 272.57)), simde_mm_set_pd(SIMDE_FLOAT64_C( 272.57), SIMDE_FLOAT64_C( 272.57)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -488.11), SIMDE_FLOAT64_C( -489.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( -489.70), SIMDE_FLOAT64_C( -489.70)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 303.70), SIMDE_FLOAT64_C( 129.61)), simde_mm_set_pd(SIMDE_FLOAT64_C( 129.61), SIMDE_FLOAT64_C( 129.61)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 434.09), SIMDE_FLOAT64_C( -774.36)), simde_mm_set_pd(SIMDE_FLOAT64_C( -774.36), SIMDE_FLOAT64_C( -774.36)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 778.71), SIMDE_FLOAT64_C( -775.96)), simde_mm_set_pd(SIMDE_FLOAT64_C( -775.96), SIMDE_FLOAT64_C( -775.96)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { #if !defined(SIMDE_NATIVE_ALIASES_TESTING) simde__m128d r = simde_mm_broadcastsd_pd(test_vec[i].a); #else simde__m128d r = simde_mm_movedup_pd(test_vec[i].a); #endif simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_broadcastsd_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m256d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -638.70), SIMDE_FLOAT64_C( 655.47)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 655.47), SIMDE_FLOAT64_C( 655.47), SIMDE_FLOAT64_C( 655.47), SIMDE_FLOAT64_C( 655.47)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 396.61), SIMDE_FLOAT64_C( 882.20)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 882.20), SIMDE_FLOAT64_C( 882.20), SIMDE_FLOAT64_C( 882.20), SIMDE_FLOAT64_C( 882.20)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -488.11), SIMDE_FLOAT64_C( -489.70)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -489.70), SIMDE_FLOAT64_C( -489.70), SIMDE_FLOAT64_C( -489.70), SIMDE_FLOAT64_C( -489.70)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 434.09), SIMDE_FLOAT64_C( -774.36)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -774.36), SIMDE_FLOAT64_C( -774.36), SIMDE_FLOAT64_C( -774.36), SIMDE_FLOAT64_C( -774.36)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -357.77), SIMDE_FLOAT64_C( -155.51)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -155.51), SIMDE_FLOAT64_C( -155.51), SIMDE_FLOAT64_C( -155.51), SIMDE_FLOAT64_C( -155.51)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 759.63), SIMDE_FLOAT64_C( 716.41)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 716.41), SIMDE_FLOAT64_C( 716.41), SIMDE_FLOAT64_C( 716.41), SIMDE_FLOAT64_C( 716.41)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -825.67), SIMDE_FLOAT64_C( -868.56)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -868.56), SIMDE_FLOAT64_C( -868.56), SIMDE_FLOAT64_C( -868.56), SIMDE_FLOAT64_C( -868.56)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 688.14), SIMDE_FLOAT64_C( 142.40)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 142.40), SIMDE_FLOAT64_C( 142.40), SIMDE_FLOAT64_C( 142.40), SIMDE_FLOAT64_C( 142.40)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_broadcastsd_pd(test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_broadcastsi128_si256(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m256i r; } test_vec[8] = { { simde_mm_set_epi64x (INT64_C( 3783485884510153131), INT64_C( 5322848723863790661)), simde_mm256_set_epi64x(INT64_C( 3783485884510153131), INT64_C( 5322848723863790661), INT64_C( 3783485884510153131), INT64_C( 5322848723863790661)) }, { simde_mm_set_epi64x (INT64_C( 3709494745882156155), INT64_C(-6688158465955285293)), simde_mm256_set_epi64x(INT64_C( 3709494745882156155), INT64_C(-6688158465955285293), INT64_C( 3709494745882156155), INT64_C(-6688158465955285293)) }, { simde_mm_set_epi64x (INT64_C(-8419237038829073489), INT64_C(-1707044583358729761)), simde_mm256_set_epi64x(INT64_C(-8419237038829073489), INT64_C(-1707044583358729761), INT64_C(-8419237038829073489), INT64_C(-1707044583358729761)) }, { simde_mm_set_epi64x (INT64_C( -168447772491176834), INT64_C(-4784151950425519944)), simde_mm256_set_epi64x(INT64_C( -168447772491176834), INT64_C(-4784151950425519944), INT64_C( -168447772491176834), INT64_C(-4784151950425519944)) }, { simde_mm_set_epi64x (INT64_C( 992172666234330248), INT64_C( 7115358650695109080)), simde_mm256_set_epi64x(INT64_C( 992172666234330248), INT64_C( 7115358650695109080), INT64_C( 992172666234330248), INT64_C( 7115358650695109080)) }, { simde_mm_set_epi64x (INT64_C( 6019682307676879898), INT64_C( -745177675559433697)), simde_mm256_set_epi64x(INT64_C( 6019682307676879898), INT64_C( -745177675559433697), INT64_C( 6019682307676879898), INT64_C( -745177675559433697)) }, { simde_mm_set_epi64x (INT64_C(-8364368011425569946), INT64_C( 1685080214233190853)), simde_mm256_set_epi64x(INT64_C(-8364368011425569946), INT64_C( 1685080214233190853), INT64_C(-8364368011425569946), INT64_C( 1685080214233190853)) }, { simde_mm_set_epi64x (INT64_C( 6712859087758853138), INT64_C( 2170550621944299043)), simde_mm256_set_epi64x(INT64_C( 6712859087758853138), INT64_C( 2170550621944299043), INT64_C( 6712859087758853138), INT64_C( 2170550621944299043)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_broadcastsi128_si256(test_vec[i].a); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_bslli_epi128(SIMDE_MUNIT_TEST_ARGS) { simde__m256i r; const int8_t a1[32] = { -INT8_C( 101), -INT8_C( 22), INT8_C( 109), INT8_C( 62), -INT8_C( 16), INT8_C( 2), -INT8_C( 71), INT8_C( 107), -INT8_C( 122), INT8_C( 94), -INT8_C( 113), -INT8_C( 23), -INT8_C( 99), -INT8_C( 110), -INT8_C( 42), INT8_C( 105), INT8_C( 5), -INT8_C( 23), -INT8_C( 24), INT8_C( 36), -INT8_C( 20), INT8_C( 8), -INT8_C( 24), INT8_C( 71), -INT8_C( 117), INT8_C( 77), -INT8_C( 78), -INT8_C( 120), -INT8_C( 92), INT8_C( 20), INT8_C( 10), INT8_C( 64) }; const int8_t e1[32] = { -INT8_C( 101), -INT8_C( 22), INT8_C( 109), INT8_C( 62), -INT8_C( 16), INT8_C( 2), -INT8_C( 71), INT8_C( 107), -INT8_C( 122), INT8_C( 94), -INT8_C( 113), -INT8_C( 23), -INT8_C( 99), -INT8_C( 110), -INT8_C( 42), INT8_C( 105), INT8_C( 5), -INT8_C( 23), -INT8_C( 24), INT8_C( 36), -INT8_C( 20), INT8_C( 8), -INT8_C( 24), INT8_C( 71), -INT8_C( 117), INT8_C( 77), -INT8_C( 78), -INT8_C( 120), -INT8_C( 92), INT8_C( 20), INT8_C( 10), INT8_C( 64) }; r = simde_mm256_bslli_epi128(simde_x_mm256_loadu_epi8(a1), 0); simde_test_x86_assert_equal_i8x32(r, simde_x_mm256_loadu_epi8(e1)); const int8_t a2[32] = { INT8_C( 62), INT8_C( 94), INT8_C( 58), -INT8_C( 15), INT8_C( 92), INT8_C( 124), INT8_C( 43), -INT8_C( 58), -INT8_C( 7), INT8_C( 63), INT8_C( 54), INT8_C( 76), -INT8_C( 88), -INT8_C( 90), -INT8_C( 2), -INT8_C( 57), INT8_C( 73), INT8_C( 106), INT8_C( 57), INT8_C( 44), INT8_C( 105), -INT8_C( 54), -INT8_C( 66), INT8_C( 62), -INT8_C( 71), -INT8_C( 67), -INT8_C( 106), -INT8_C( 54), INT8_C( 54), -INT8_C( 122), -INT8_C( 65), INT8_C( 116) }; const int8_t e2[32] = { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 62), INT8_C( 94), INT8_C( 58), -INT8_C( 15), INT8_C( 92), INT8_C( 124), INT8_C( 43), -INT8_C( 58), -INT8_C( 7), INT8_C( 63), INT8_C( 54), INT8_C( 76), -INT8_C( 88), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 73), INT8_C( 106), INT8_C( 57), INT8_C( 44), INT8_C( 105), -INT8_C( 54), -INT8_C( 66), INT8_C( 62), -INT8_C( 71), -INT8_C( 67), -INT8_C( 106), -INT8_C( 54), INT8_C( 54) }; r = simde_mm256_bslli_epi128(simde_x_mm256_loadu_epi8(a2), 3); simde_test_x86_assert_equal_i8x32(r, simde_x_mm256_loadu_epi8(e2)); const int8_t a3[32] = { -INT8_C( 109), INT8_C( 56), INT8_C( 107), -INT8_C( 88), INT8_C( 124), INT8_C( 123), -INT8_C( 108), -INT8_C( 79), INT8_C( 10), -INT8_C( 123), -INT8_C( 57), -INT8_C( 23), -INT8_C( 32), -INT8_C( 10), -INT8_C( 85), -INT8_C( 22), INT8_C( 91), -INT8_C( 91), INT8_C( 0), -INT8_C( 47), INT8_C( 30), -INT8_C( 7), -INT8_C( 73), INT8_C( 17), INT8_C( 74), INT8_C( 6), -INT8_C( 73), -INT8_C( 13), INT8_C( 59), INT8_C( 44), -INT8_C( 34), -INT8_C( 49) }; const int8_t e3[32] = { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 109), INT8_C( 56), INT8_C( 107), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 91), -INT8_C( 91), INT8_C( 0) }; r = simde_mm256_bslli_epi128(simde_x_mm256_loadu_epi8(a3), 13); simde_test_x86_assert_equal_i8x32(r, simde_x_mm256_loadu_epi8(e3)); const int8_t a4[32] = { INT8_C( 74), -INT8_C( 109), -INT8_C( 57), INT8_C( 52), INT8_C( 64), -INT8_C( 101), INT8_C( 54), -INT8_C( 122), -INT8_C( 85), INT8_MAX, -INT8_C( 106), INT8_C( 80), -INT8_C( 15), -INT8_C( 65), INT8_C( 88), INT8_C( 92), -INT8_C( 12), INT8_C( 103), -INT8_C( 4), -INT8_C( 36), -INT8_C( 67), -INT8_C( 101), -INT8_C( 91), INT8_MAX, -INT8_C( 39), -INT8_C( 89), -INT8_C( 86), -INT8_C( 94), -INT8_C( 33), -INT8_C( 28), INT8_C( 102), INT8_C( 41) }; const int8_t e4[32] = { INT8_C( 0), INT8_C( 0), INT8_C( 74), -INT8_C( 109), -INT8_C( 57), INT8_C( 52), INT8_C( 64), -INT8_C( 101), INT8_C( 54), -INT8_C( 122), -INT8_C( 85), INT8_MAX, -INT8_C( 106), INT8_C( 80), -INT8_C( 15), -INT8_C( 65), INT8_C( 0), INT8_C( 0), -INT8_C( 12), INT8_C( 103), -INT8_C( 4), -INT8_C( 36), -INT8_C( 67), -INT8_C( 101), -INT8_C( 91), INT8_MAX, -INT8_C( 39), -INT8_C( 89), -INT8_C( 86), -INT8_C( 94), -INT8_C( 33), -INT8_C( 28) }; r = simde_mm256_bslli_epi128(simde_x_mm256_loadu_epi8(a4), 2); simde_test_x86_assert_equal_i8x32(r, simde_x_mm256_loadu_epi8(e4)); const int8_t a5[32] = { INT8_C( 102), -INT8_C( 9), INT8_C( 57), -INT8_C( 65), -INT8_C( 51), INT8_C( 43), -INT8_C( 30), INT8_C( 53), INT8_C( 68), INT8_C( 76), -INT8_C( 34), -INT8_C( 69), INT8_C( 55), INT8_C( 91), -INT8_C( 101), -INT8_C( 32), INT8_C( 107), INT8_C( 28), INT8_C( 11), -INT8_C( 64), INT8_C( 104), -INT8_C( 26), INT8_C( 67), INT8_C( 120), INT8_C( 27), INT8_C( 100), INT8_C( 46), -INT8_C( 10), -INT8_C( 68), INT8_C( 86), INT8_C( 29), INT8_C( 34) }; const int8_t e5[32] = { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) }; r = simde_mm256_bslli_epi128(simde_x_mm256_loadu_epi8(a5), 19); simde_test_x86_assert_equal_i8x32(r, simde_x_mm256_loadu_epi8(e5)); return 0; } static int test_simde_mm256_bsrli_epi128(SIMDE_MUNIT_TEST_ARGS) { simde__m256i r; const int8_t a1[32] = { -INT8_C( 101), -INT8_C( 22), INT8_C( 109), INT8_C( 62), -INT8_C( 16), INT8_C( 2), -INT8_C( 71), INT8_C( 107), -INT8_C( 122), INT8_C( 94), -INT8_C( 113), -INT8_C( 23), -INT8_C( 99), -INT8_C( 110), -INT8_C( 42), INT8_C( 105), INT8_C( 5), -INT8_C( 23), -INT8_C( 24), INT8_C( 36), -INT8_C( 20), INT8_C( 8), -INT8_C( 24), INT8_C( 71), -INT8_C( 117), INT8_C( 77), -INT8_C( 78), -INT8_C( 120), -INT8_C( 92), INT8_C( 20), INT8_C( 10), INT8_C( 64) }; const int8_t e1[32] = { -INT8_C( 101), -INT8_C( 22), INT8_C( 109), INT8_C( 62), -INT8_C( 16), INT8_C( 2), -INT8_C( 71), INT8_C( 107), -INT8_C( 122), INT8_C( 94), -INT8_C( 113), -INT8_C( 23), -INT8_C( 99), -INT8_C( 110), -INT8_C( 42), INT8_C( 105), INT8_C( 5), -INT8_C( 23), -INT8_C( 24), INT8_C( 36), -INT8_C( 20), INT8_C( 8), -INT8_C( 24), INT8_C( 71), -INT8_C( 117), INT8_C( 77), -INT8_C( 78), -INT8_C( 120), -INT8_C( 92), INT8_C( 20), INT8_C( 10), INT8_C( 64) }; r = simde_mm256_bsrli_epi128(simde_x_mm256_loadu_epi8(a1), 0); simde_test_x86_assert_equal_i8x32(r, simde_x_mm256_loadu_epi8(e1)); const int8_t a2[32] = { INT8_C( 16), -INT8_C( 10), -INT8_C( 116), INT8_C( 62), -INT8_C( 37), -INT8_C( 30), -INT8_C( 105), INT8_C( 80), INT8_C( 0), -INT8_C( 81), INT8_C( 44), INT8_C( 72), -INT8_C( 47), -INT8_C( 51), INT8_C( 72), -INT8_C( 102), INT8_C( 90), -INT8_C( 37), -INT8_C( 126), INT8_C( 111), INT8_C( 75), -INT8_C( 3), -INT8_C( 50), -INT8_C( 67), -INT8_C( 112), -INT8_C( 59), INT8_C( 80), INT8_C( 2), -INT8_C( 10), INT8_C( 8), -INT8_C( 72), INT8_C( 7) }; const int8_t e2[32] = { INT8_C( 62), -INT8_C( 37), -INT8_C( 30), -INT8_C( 105), INT8_C( 80), INT8_C( 0), -INT8_C( 81), INT8_C( 44), INT8_C( 72), -INT8_C( 47), -INT8_C( 51), INT8_C( 72), -INT8_C( 102), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 111), INT8_C( 75), -INT8_C( 3), -INT8_C( 50), -INT8_C( 67), -INT8_C( 112), -INT8_C( 59), INT8_C( 80), INT8_C( 2), -INT8_C( 10), INT8_C( 8), -INT8_C( 72), INT8_C( 7), INT8_C( 0), INT8_C( 0), INT8_C( 0) }; r = simde_mm256_bsrli_epi128(simde_x_mm256_loadu_epi8(a2), 3); simde_test_x86_assert_equal_i8x32(r, simde_x_mm256_loadu_epi8(e2)); const int8_t a3[32] = { -INT8_C( 90), INT8_C( 37), INT8_C( 2), INT8_C( 43), INT8_C( 12), INT8_C( 1), -INT8_C( 14), -INT8_C( 108), -INT8_C( 108), -INT8_C( 85), INT8_C( 63), INT8_C( 117), -INT8_C( 64), INT8_C( 115), -INT8_C( 42), -INT8_C( 20), INT8_C( 9), -INT8_C( 75), INT8_C( 98), INT8_C( 100), INT8_C( 13), -INT8_C( 115), INT8_C( 124), INT8_C( 78), INT8_C( 48), -INT8_C( 23), INT8_C( 73), INT8_C( 111), -INT8_C( 10), -INT8_C( 64), INT8_C( 14), -INT8_C( 100) }; const int8_t e3[32] = { INT8_C( 115), -INT8_C( 42), -INT8_C( 20), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 64), INT8_C( 14), -INT8_C( 100), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) }; r = simde_mm256_bsrli_epi128(simde_x_mm256_loadu_epi8(a3), 13); simde_test_x86_assert_equal_i8x32(r, simde_x_mm256_loadu_epi8(e3)); const int8_t a4[32] = { -INT8_C( 103), -INT8_C( 46), -INT8_C( 10), INT8_C( 82), -INT8_C( 74), -INT8_C( 94), -INT8_C( 122), -INT8_C( 9), INT8_C( 105), -INT8_C( 36), -INT8_C( 46), -INT8_C( 40), -INT8_C( 102), -INT8_C( 76), INT8_C( 23), -INT8_C( 45), INT8_C( 73), -INT8_C( 66), INT8_C( 102), INT8_C( 124), -INT8_C( 122), INT8_C( 8), INT8_C( 49), -INT8_C( 57), -INT8_C( 70), -INT8_C( 95), -INT8_C( 12), -INT8_C( 115), -INT8_C( 68), INT8_C( 77), INT8_C( 94), INT8_C( 85) }; const int8_t e4[32] = { -INT8_C( 10), INT8_C( 82), -INT8_C( 74), -INT8_C( 94), -INT8_C( 122), -INT8_C( 9), INT8_C( 105), -INT8_C( 36), -INT8_C( 46), -INT8_C( 40), -INT8_C( 102), -INT8_C( 76), INT8_C( 23), -INT8_C( 45), INT8_C( 0), INT8_C( 0), INT8_C( 102), INT8_C( 124), -INT8_C( 122), INT8_C( 8), INT8_C( 49), -INT8_C( 57), -INT8_C( 70), -INT8_C( 95), -INT8_C( 12), -INT8_C( 115), -INT8_C( 68), INT8_C( 77), INT8_C( 94), INT8_C( 85), INT8_C( 0), INT8_C( 0) }; r = simde_mm256_bsrli_epi128(simde_x_mm256_loadu_epi8(a4), 2); simde_test_x86_assert_equal_i8x32(r, simde_x_mm256_loadu_epi8(e4)); const int8_t a5[32] = { INT8_C( 3), -INT8_C( 11), INT8_C( 13), -INT8_C( 50), INT8_C( 113), -INT8_C( 14), -INT8_C( 91), -INT8_C( 97), -INT8_C( 82), -INT8_C( 41), -INT8_C( 48), INT8_C( 51), INT8_C( 0), INT8_C( 74), INT8_C( 95), -INT8_C( 86), -INT8_C( 107), INT8_C( 112), -INT8_C( 126), INT8_C( 116), INT8_C( 46), INT8_C( 35), INT8_C( 12), -INT8_C( 126), INT8_C( 51), INT8_C( 63), INT8_C( 58), INT8_C( 109), -INT8_C( 105), -INT8_C( 22), -INT8_C( 72), -INT8_C( 102) }; const int8_t e5[32] = { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) }; r = simde_mm256_bsrli_epi128(simde_x_mm256_loadu_epi8(a5), 19); simde_test_x86_assert_equal_i8x32(r, simde_x_mm256_loadu_epi8(e5)); return 0; } static int test_simde_mm256_cvtepi8_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m256i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( -64), INT8_C( -39), INT8_C( -1), INT8_C( 123), INT8_C( -41), INT8_C( 42), INT8_C( -42), INT8_C( 48), INT8_C(-101), INT8_C( -11), INT8_C( 78), INT8_C( -33), INT8_C( -4), INT8_C( -46), INT8_C(-128), INT8_C( 115)), simde_mm256_set_epi16(INT16_C( -64), INT16_C( -39), INT16_C( -1), INT16_C( 123), INT16_C( -41), INT16_C( 42), INT16_C( -42), INT16_C( 48), INT16_C( -101), INT16_C( -11), INT16_C( 78), INT16_C( -33), INT16_C( -4), INT16_C( -46), INT16_C( -128), INT16_C( 115)) }, { simde_mm_set_epi8(INT8_C( -82), INT8_C( 92), INT8_C( 55), INT8_C( -41), INT8_C( 61), INT8_C(-113), INT8_C( 108), INT8_C( 36), INT8_C( 115), INT8_C( 53), INT8_C( -46), INT8_C( -96), INT8_C( -71), INT8_C( -38), INT8_C( 25), INT8_C( 61)), simde_mm256_set_epi16(INT16_C( -82), INT16_C( 92), INT16_C( 55), INT16_C( -41), INT16_C( 61), INT16_C( -113), INT16_C( 108), INT16_C( 36), INT16_C( 115), INT16_C( 53), INT16_C( -46), INT16_C( -96), INT16_C( -71), INT16_C( -38), INT16_C( 25), INT16_C( 61)) }, { simde_mm_set_epi8(INT8_C( -73), INT8_C( 5), INT8_C( 10), INT8_C(-111), INT8_C(-127), INT8_C( 16), INT8_C( -16), INT8_C(-115), INT8_C( 94), INT8_C( 88), INT8_C( -20), INT8_C( -24), INT8_C( -27), INT8_C( -17), INT8_C( -31), INT8_C( 67)), simde_mm256_set_epi16(INT16_C( -73), INT16_C( 5), INT16_C( 10), INT16_C( -111), INT16_C( -127), INT16_C( 16), INT16_C( -16), INT16_C( -115), INT16_C( 94), INT16_C( 88), INT16_C( -20), INT16_C( -24), INT16_C( -27), INT16_C( -17), INT16_C( -31), INT16_C( 67)) }, { simde_mm_set_epi8(INT8_C( -17), INT8_C( 8), INT8_C( 67), INT8_C( -40), INT8_C( 99), INT8_C( 89), INT8_C( -60), INT8_C( -24), INT8_C( 120), INT8_C( -29), INT8_C(-127), INT8_C( 15), INT8_C( -40), INT8_C(-106), INT8_C( 13), INT8_C( -27)), simde_mm256_set_epi16(INT16_C( -17), INT16_C( 8), INT16_C( 67), INT16_C( -40), INT16_C( 99), INT16_C( 89), INT16_C( -60), INT16_C( -24), INT16_C( 120), INT16_C( -29), INT16_C( -127), INT16_C( 15), INT16_C( -40), INT16_C( -106), INT16_C( 13), INT16_C( -27)) }, { simde_mm_set_epi8(INT8_C( -5), INT8_C( 52), INT8_C( 112), INT8_C( -86), INT8_C( 7), INT8_C( 2), INT8_C( 89), INT8_C( 40), INT8_C( 34), INT8_C( -14), INT8_C(-119), INT8_C(-115), INT8_C(-103), INT8_C( 103), INT8_C( -15), INT8_C( -68)), simde_mm256_set_epi16(INT16_C( -5), INT16_C( 52), INT16_C( 112), INT16_C( -86), INT16_C( 7), INT16_C( 2), INT16_C( 89), INT16_C( 40), INT16_C( 34), INT16_C( -14), INT16_C( -119), INT16_C( -115), INT16_C( -103), INT16_C( 103), INT16_C( -15), INT16_C( -68)) }, { simde_mm_set_epi8(INT8_C( 34), INT8_C( 104), INT8_C(-116), INT8_C(-106), INT8_C(-122), INT8_C( 51), INT8_C( -86), INT8_C( 26), INT8_C( 57), INT8_C( 23), INT8_C( 125), INT8_C( 20), INT8_C( 40), INT8_C( -87), INT8_C( -60), INT8_C( -93)), simde_mm256_set_epi16(INT16_C( 34), INT16_C( 104), INT16_C( -116), INT16_C( -106), INT16_C( -122), INT16_C( 51), INT16_C( -86), INT16_C( 26), INT16_C( 57), INT16_C( 23), INT16_C( 125), INT16_C( 20), INT16_C( 40), INT16_C( -87), INT16_C( -60), INT16_C( -93)) }, { simde_mm_set_epi8(INT8_C( 35), INT8_C( 103), INT8_C( 83), INT8_C( 11), INT8_C( 5), INT8_C( -26), INT8_C( -34), INT8_C( -28), INT8_C( -15), INT8_C( -2), INT8_C( 10), INT8_C( -97), INT8_C( 35), INT8_C(-108), INT8_C( 38), INT8_C(-122)), simde_mm256_set_epi16(INT16_C( 35), INT16_C( 103), INT16_C( 83), INT16_C( 11), INT16_C( 5), INT16_C( -26), INT16_C( -34), INT16_C( -28), INT16_C( -15), INT16_C( -2), INT16_C( 10), INT16_C( -97), INT16_C( 35), INT16_C( -108), INT16_C( 38), INT16_C( -122)) }, { simde_mm_set_epi8(INT8_C( 44), INT8_C(-101), INT8_C(-122), INT8_C( 91), INT8_C( 60), INT8_C( 60), INT8_C( 22), INT8_C( 81), INT8_C( 13), INT8_C(-111), INT8_C(-125), INT8_C( -40), INT8_C( -72), INT8_C(-102), INT8_C( -87), INT8_C( 108)), simde_mm256_set_epi16(INT16_C( 44), INT16_C( -101), INT16_C( -122), INT16_C( 91), INT16_C( 60), INT16_C( 60), INT16_C( 22), INT16_C( 81), INT16_C( 13), INT16_C( -111), INT16_C( -125), INT16_C( -40), INT16_C( -72), INT16_C( -102), INT16_C( -87), INT16_C( 108)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_cvtepi8_epi16(test_vec[i].a); simde_assert_m256i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_cvtepi8_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m256i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( 123), INT8_C( -2), INT8_C( 102), INT8_C( -48), INT8_C( 12), INT8_C(-119), INT8_C( -32), INT8_C( 68), INT8_C( -88), INT8_C( 80), INT8_C( 32), INT8_C(-108), INT8_C( -66), INT8_C( 35), INT8_C( -12), INT8_C(-103)), simde_mm256_set_epi32(INT32_C( -88), INT32_C( 80), INT32_C( 32), INT32_C( -108), INT32_C( -66), INT32_C( 35), INT32_C( -12), INT32_C( -103)) }, { simde_mm_set_epi8(INT8_C( -12), INT8_C( -88), INT8_C( -20), INT8_C( -26), INT8_C( -69), INT8_C( -40), INT8_C( 122), INT8_C(-121), INT8_C( 47), INT8_C( 123), INT8_C( -15), INT8_C(-104), INT8_C( -91), INT8_C( -13), INT8_C( -61), INT8_C(-104)), simde_mm256_set_epi32(INT32_C( 47), INT32_C( 123), INT32_C( -15), INT32_C( -104), INT32_C( -91), INT32_C( -13), INT32_C( -61), INT32_C( -104)) }, { simde_mm_set_epi8(INT8_C(-105), INT8_C( 35), INT8_C(-118), INT8_C( -44), INT8_C( -91), INT8_C( 43), INT8_C( 18), INT8_C( -70), INT8_C( 44), INT8_C( 98), INT8_C( -82), INT8_C(-126), INT8_C( -80), INT8_C(-109), INT8_C( 69), INT8_C(-116)), simde_mm256_set_epi32(INT32_C( 44), INT32_C( 98), INT32_C( -82), INT32_C( -126), INT32_C( -80), INT32_C( -109), INT32_C( 69), INT32_C( -116)) }, { simde_mm_set_epi8(INT8_C( 43), INT8_C( 92), INT8_C( -66), INT8_C( -32), INT8_C( 71), INT8_C( -63), INT8_C( 51), INT8_C( 89), INT8_C( 102), INT8_C( -4), INT8_C( -28), INT8_C( -5), INT8_C(-107), INT8_C( -25), INT8_C(-107), INT8_C( -15)), simde_mm256_set_epi32(INT32_C( 102), INT32_C( -4), INT32_C( -28), INT32_C( -5), INT32_C( -107), INT32_C( -25), INT32_C( -107), INT32_C( -15)) }, { simde_mm_set_epi8(INT8_C( -7), INT8_C( -22), INT8_C( -1), INT8_C( 29), INT8_C( -3), INT8_C( -97), INT8_C( 61), INT8_C( -36), INT8_C( -81), INT8_C( -83), INT8_C( 54), INT8_C( 37), INT8_C( -70), INT8_C( -70), INT8_C( 12), INT8_C( -61)), simde_mm256_set_epi32(INT32_C( -81), INT32_C( -83), INT32_C( 54), INT32_C( 37), INT32_C( -70), INT32_C( -70), INT32_C( 12), INT32_C( -61)) }, { simde_mm_set_epi8(INT8_C( 88), INT8_C( -30), INT8_C( 125), INT8_C( -22), INT8_C( -87), INT8_C(-109), INT8_C( -90), INT8_C( -86), INT8_C(-124), INT8_C( -10), INT8_C( 57), INT8_C( -62), INT8_C( -55), INT8_C( 40), INT8_C( -11), INT8_C(-100)), simde_mm256_set_epi32(INT32_C( -124), INT32_C( -10), INT32_C( 57), INT32_C( -62), INT32_C( -55), INT32_C( 40), INT32_C( -11), INT32_C( -100)) }, { simde_mm_set_epi8(INT8_C( 58), INT8_C(-110), INT8_C( -6), INT8_C( -58), INT8_C(-110), INT8_C( 125), INT8_C( 39), INT8_C( -82), INT8_C( -88), INT8_C( 107), INT8_C( 76), INT8_C( 58), INT8_C( -2), INT8_C(-119), INT8_C( -27), INT8_C( 40)), simde_mm256_set_epi32(INT32_C( -88), INT32_C( 107), INT32_C( 76), INT32_C( 58), INT32_C( -2), INT32_C( -119), INT32_C( -27), INT32_C( 40)) }, { simde_mm_set_epi8(INT8_C(-110), INT8_C( -24), INT8_C( -33), INT8_C(-117), INT8_C( 92), INT8_C( -35), INT8_C( 113), INT8_C( 13), INT8_C( 46), INT8_C(-107), INT8_C( 4), INT8_C( 8), INT8_C( 56), INT8_C( 9), INT8_C( 123), INT8_C( 36)), simde_mm256_set_epi32(INT32_C( 46), INT32_C( -107), INT32_C( 4), INT32_C( 8), INT32_C( 56), INT32_C( 9), INT32_C( 123), INT32_C( 36)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_cvtepi8_epi32(test_vec[i].a); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_cvtepi8_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m256i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( 9), INT8_C( -74), INT8_C( -52), INT8_C( -68), INT8_C( 95), INT8_C( -10), INT8_C( -99), INT8_C( 121), INT8_C( 1), INT8_C( 69), INT8_C( -36), INT8_C(-102), INT8_C(-124), INT8_C( -93), INT8_C(-101), INT8_C(-118)), simde_mm256_set_epi64x(INT64_C( -124), INT64_C( -93), INT64_C( -101), INT64_C( -118)) }, { simde_mm_set_epi8(INT8_C(-107), INT8_C( -34), INT8_C( -75), INT8_C( 119), INT8_C( -79), INT8_C(-103), INT8_C( -9), INT8_C(-121), INT8_C( -51), INT8_C( 4), INT8_C( 89), INT8_C( 110), INT8_C( 112), INT8_C( 67), INT8_C(-101), INT8_C( 87)), simde_mm256_set_epi64x(INT64_C( 112), INT64_C( 67), INT64_C( -101), INT64_C( 87)) }, { simde_mm_set_epi8(INT8_C( -85), INT8_C( 121), INT8_C(-116), INT8_C( 121), INT8_C( -43), INT8_C( -80), INT8_C( 1), INT8_C(-102), INT8_C( 39), INT8_C(-105), INT8_C( -26), INT8_C( 88), INT8_C( -52), INT8_C( 61), INT8_C(-127), INT8_C( 21)), simde_mm256_set_epi64x(INT64_C( -52), INT64_C( 61), INT64_C( -127), INT64_C( 21)) }, { simde_mm_set_epi8(INT8_C( 85), INT8_C( 1), INT8_C( 94), INT8_C( 60), INT8_C( 102), INT8_C( 9), INT8_C( -56), INT8_C( 64), INT8_C(-103), INT8_C( -86), INT8_C( 51), INT8_C(-126), INT8_C( -9), INT8_C( 88), INT8_C( -52), INT8_C( 13)), simde_mm256_set_epi64x(INT64_C( -9), INT64_C( 88), INT64_C( -52), INT64_C( 13)) }, { simde_mm_set_epi8(INT8_C( 0), INT8_C( -40), INT8_C( 42), INT8_C( 42), INT8_C( 109), INT8_C( -5), INT8_C( -28), INT8_C( -80), INT8_C( 70), INT8_C( 6), INT8_C( -51), INT8_C( 17), INT8_C(-105), INT8_C(-108), INT8_C( 14), INT8_C( -91)), simde_mm256_set_epi64x(INT64_C( -105), INT64_C( -108), INT64_C( 14), INT64_C( -91)) }, { simde_mm_set_epi8(INT8_C( 125), INT8_C(-114), INT8_C( -39), INT8_C(-118), INT8_C( 29), INT8_C( -81), INT8_C(-121), INT8_C( 115), INT8_C(-128), INT8_C( 99), INT8_C( 64), INT8_C( 7), INT8_C( 114), INT8_C(-101), INT8_C(-127), INT8_C( 30)), simde_mm256_set_epi64x(INT64_C( 114), INT64_C( -101), INT64_C( -127), INT64_C( 30)) }, { simde_mm_set_epi8(INT8_C(-126), INT8_C( 122), INT8_C( 50), INT8_C( 115), INT8_C( 72), INT8_C( -56), INT8_C( 52), INT8_C( 125), INT8_C(-104), INT8_C( -76), INT8_C( 65), INT8_C( 103), INT8_C(-122), INT8_C( -36), INT8_C( -93), INT8_C( 110)), simde_mm256_set_epi64x(INT64_C( -122), INT64_C( -36), INT64_C( -93), INT64_C( 110)) }, { simde_mm_set_epi8(INT8_C( -80), INT8_C( 98), INT8_C(-119), INT8_C( 51), INT8_C( -37), INT8_C( -17), INT8_C( -94), INT8_C(-122), INT8_C(-114), INT8_C( 13), INT8_C( -75), INT8_C( 111), INT8_C( -76), INT8_C( -15), INT8_C( -1), INT8_C( -12)), simde_mm256_set_epi64x(INT64_C( -76), INT64_C( -15), INT64_C( -1), INT64_C( -12)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_cvtepi8_epi64(test_vec[i].a); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_cvtepi16_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m256i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C( 29201), INT16_C( 17763), INT16_C( 13480), INT16_C( 29487), INT16_C( -6581), INT16_C( 13446), INT16_C( 26538), INT16_C( -3936)), simde_mm256_set_epi32(INT32_C( 29201), INT32_C( 17763), INT32_C( 13480), INT32_C( 29487), INT32_C( -6581), INT32_C( 13446), INT32_C( 26538), INT32_C( -3936)) }, { simde_mm_set_epi16(INT16_C( -3829), INT16_C( 12503), INT16_C( 10513), INT16_C( 11407), INT16_C( 28611), INT16_C( 5727), INT16_C( 32199), INT16_C( 11434)), simde_mm256_set_epi32(INT32_C( -3829), INT32_C( 12503), INT32_C( 10513), INT32_C( 11407), INT32_C( 28611), INT32_C( 5727), INT32_C( 32199), INT32_C( 11434)) }, { simde_mm_set_epi16(INT16_C( -4989), INT16_C(-23450), INT16_C(-15441), INT16_C( 22895), INT16_C( 2927), INT16_C(-20709), INT16_C( -7582), INT16_C(-31347)), simde_mm256_set_epi32(INT32_C( -4989), INT32_C( -23450), INT32_C( -15441), INT32_C( 22895), INT32_C( 2927), INT32_C( -20709), INT32_C( -7582), INT32_C( -31347)) }, { simde_mm_set_epi16(INT16_C( 24635), INT16_C( 21162), INT16_C( 29781), INT16_C( -488), INT16_C( -6580), INT16_C(-27732), INT16_C( 607), INT16_C(-20782)), simde_mm256_set_epi32(INT32_C( 24635), INT32_C( 21162), INT32_C( 29781), INT32_C( -488), INT32_C( -6580), INT32_C( -27732), INT32_C( 607), INT32_C( -20782)) }, { simde_mm_set_epi16(INT16_C( -401), INT16_C( -5605), INT16_C(-28681), INT16_C( 4577), INT16_C( -9094), INT16_C( 6804), INT16_C( -9223), INT16_C( 10661)), simde_mm256_set_epi32(INT32_C( -401), INT32_C( -5605), INT32_C( -28681), INT32_C( 4577), INT32_C( -9094), INT32_C( 6804), INT32_C( -9223), INT32_C( 10661)) }, { simde_mm_set_epi16(INT16_C( 2053), INT16_C(-19489), INT16_C( 7360), INT16_C( 16515), INT16_C( 32080), INT16_C(-16555), INT16_C(-32752), INT16_C( 19001)), simde_mm256_set_epi32(INT32_C( 2053), INT32_C( -19489), INT32_C( 7360), INT32_C( 16515), INT32_C( 32080), INT32_C( -16555), INT32_C( -32752), INT32_C( 19001)) }, { simde_mm_set_epi16(INT16_C(-31610), INT16_C(-22910), INT16_C( 22850), INT16_C(-27168), INT16_C( 13092), INT16_C(-21449), INT16_C( 16024), INT16_C( 8597)), simde_mm256_set_epi32(INT32_C( -31610), INT32_C( -22910), INT32_C( 22850), INT32_C( -27168), INT32_C( 13092), INT32_C( -21449), INT32_C( 16024), INT32_C( 8597)) }, { simde_mm_set_epi16(INT16_C( 28081), INT16_C( -7416), INT16_C( 18632), INT16_C(-28896), INT16_C(-23156), INT16_C(-15424), INT16_C(-13082), INT16_C( 23555)), simde_mm256_set_epi32(INT32_C( 28081), INT32_C( -7416), INT32_C( 18632), INT32_C( -28896), INT32_C( -23156), INT32_C( -15424), INT32_C( -13082), INT32_C( 23555)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_cvtepi16_epi32(test_vec[i].a); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_cvtepi16_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m256i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C(-31485), INT16_C( 6767), INT16_C( -1054), INT16_C(-15390), INT16_C(-10897), INT16_C(-31336), INT16_C( -9551), INT16_C( 8617)), simde_mm256_set_epi64x(INT64_C( -10897), INT64_C( -31336), INT64_C( -9551), INT64_C( 8617)) }, { simde_mm_set_epi16(INT16_C( 30988), INT16_C( 5181), INT16_C( 18938), INT16_C(-32273), INT16_C( 5456), INT16_C( 19282), INT16_C(-21851), INT16_C( 27127)), simde_mm256_set_epi64x(INT64_C( 5456), INT64_C( 19282), INT64_C( -21851), INT64_C( 27127)) }, { simde_mm_set_epi16(INT16_C( -7527), INT16_C(-20831), INT16_C( 23786), INT16_C( 3131), INT16_C(-19886), INT16_C( -2045), INT16_C(-25062), INT16_C( -5779)), simde_mm256_set_epi64x(INT64_C( -19886), INT64_C( -2045), INT64_C( -25062), INT64_C( -5779)) }, { simde_mm_set_epi16(INT16_C( 31086), INT16_C( 22946), INT16_C( -1551), INT16_C( 9842), INT16_C(-22586), INT16_C( 5015), INT16_C( 24465), INT16_C(-15199)), simde_mm256_set_epi64x(INT64_C( -22586), INT64_C( 5015), INT64_C( 24465), INT64_C( -15199)) }, { simde_mm_set_epi16(INT16_C( 20641), INT16_C(-16277), INT16_C( 32328), INT16_C( 21501), INT16_C( 17537), INT16_C(-11745), INT16_C(-20371), INT16_C( 18200)), simde_mm256_set_epi64x(INT64_C( 17537), INT64_C( -11745), INT64_C( -20371), INT64_C( 18200)) }, { simde_mm_set_epi16(INT16_C(-26363), INT16_C(-18240), INT16_C( 21370), INT16_C(-23762), INT16_C( 2610), INT16_C( 7301), INT16_C(-13295), INT16_C( 15532)), simde_mm256_set_epi64x(INT64_C( 2610), INT64_C( 7301), INT64_C( -13295), INT64_C( 15532)) }, { simde_mm_set_epi16(INT16_C( -6112), INT16_C(-22921), INT16_C( -1544), INT16_C(-16225), INT16_C( 14287), INT16_C(-23793), INT16_C( 5660), INT16_C(-22213)), simde_mm256_set_epi64x(INT64_C( 14287), INT64_C( -23793), INT64_C( 5660), INT64_C( -22213)) }, { simde_mm_set_epi16(INT16_C( -130), INT16_C( -9648), INT16_C(-32446), INT16_C( 22661), INT16_C( 18414), INT16_C(-28168), INT16_C( 10429), INT16_C( 9914)), simde_mm256_set_epi64x(INT64_C( 18414), INT64_C( -28168), INT64_C( 10429), INT64_C( 9914)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_cvtepi16_epi64(test_vec[i].a); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_cvtepi32_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m256i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 2035310840), INT32_C( 817509407), INT32_C( 1530478738), INT32_C( 1934275633)), simde_mm256_set_epi64x(INT64_C( 2035310840), INT64_C( 817509407), INT64_C( 1530478738), INT64_C( 1934275633)) }, { simde_mm_set_epi32(INT32_C(-1872979628), INT32_C( 1510684699), INT32_C( -362741060), INT32_C( 56318596)), simde_mm256_set_epi64x(INT64_C( -1872979628), INT64_C( 1510684699), INT64_C( -362741060), INT64_C( 56318596)) }, { simde_mm_set_epi32(INT32_C( -15196779), INT32_C( 3808236), INT32_C( -178294426), INT32_C(-1675394154)), simde_mm256_set_epi64x(INT64_C( -15196779), INT64_C( 3808236), INT64_C( -178294426), INT64_C( -1675394154)) }, { simde_mm_set_epi32(INT32_C( 1742018933), INT32_C( -523666920), INT32_C(-1989565458), INT32_C( -138158906)), simde_mm256_set_epi64x(INT64_C( 1742018933), INT64_C( -523666920), INT64_C( -1989565458), INT64_C( -138158906)) }, { simde_mm_set_epi32(INT32_C( -851329384), INT32_C(-2094859646), INT32_C(-1382699819), INT32_C( 2104722305)), simde_mm256_set_epi64x(INT64_C( -851329384), INT64_C( -2094859646), INT64_C( -1382699819), INT64_C( 2104722305)) }, { simde_mm_set_epi32(INT32_C( 1596198624), INT32_C(-1923442761), INT32_C( 1335983203), INT32_C(-1180029731)), simde_mm256_set_epi64x(INT64_C( 1596198624), INT64_C( -1923442761), INT64_C( 1335983203), INT64_C( -1180029731)) }, { simde_mm_set_epi32(INT32_C( 599337487), INT32_C( 1023189876), INT32_C( 1555456525), INT32_C( -191457824)), simde_mm256_set_epi64x(INT64_C( 599337487), INT64_C( 1023189876), INT64_C( 1555456525), INT64_C( -191457824)) }, { simde_mm_set_epi32(INT32_C( 2100212902), INT32_C( -853030753), INT32_C( -606897046), INT32_C(-1882381199)), simde_mm256_set_epi64x(INT64_C( 2100212902), INT64_C( -853030753), INT64_C( -606897046), INT64_C( -1882381199)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_cvtepi32_epi64(test_vec[i].a); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_cvtepu8_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m256i r; } test_vec[8] = { { simde_x_mm_set_epu8(UINT8_C(240), UINT8_C( 50), UINT8_C(144), UINT8_C( 4), UINT8_C( 7), UINT8_C(233), UINT8_C(157), UINT8_C( 74), UINT8_C(217), UINT8_C( 90), UINT8_C(141), UINT8_C(254), UINT8_C(115), UINT8_C( 96), UINT8_C(126), UINT8_C( 23)), simde_mm256_set_epi16(INT16_C( 240), INT16_C( 50), INT16_C( 144), INT16_C( 4), INT16_C( 7), INT16_C( 233), INT16_C( 157), INT16_C( 74), INT16_C( 217), INT16_C( 90), INT16_C( 141), INT16_C( 254), INT16_C( 115), INT16_C( 96), INT16_C( 126), INT16_C( 23)) }, { simde_x_mm_set_epu8(UINT8_C( 79), UINT8_C(126), UINT8_C(254), UINT8_C( 77), UINT8_C(225), UINT8_C( 76), UINT8_C( 61), UINT8_C(223), UINT8_C( 95), UINT8_C(244), UINT8_C( 66), UINT8_C( 4), UINT8_C(241), UINT8_C(112), UINT8_C(120), UINT8_C(166)), simde_mm256_set_epi16(INT16_C( 79), INT16_C( 126), INT16_C( 254), INT16_C( 77), INT16_C( 225), INT16_C( 76), INT16_C( 61), INT16_C( 223), INT16_C( 95), INT16_C( 244), INT16_C( 66), INT16_C( 4), INT16_C( 241), INT16_C( 112), INT16_C( 120), INT16_C( 166)) }, { simde_x_mm_set_epu8(UINT8_C( 56), UINT8_C(107), UINT8_C(114), UINT8_C( 86), UINT8_C( 35), UINT8_C( 47), UINT8_C( 25), UINT8_C(187), UINT8_C(176), UINT8_C( 49), UINT8_C(164), UINT8_C(236), UINT8_C(110), UINT8_C( 61), UINT8_C( 64), UINT8_C( 42)), simde_mm256_set_epi16(INT16_C( 56), INT16_C( 107), INT16_C( 114), INT16_C( 86), INT16_C( 35), INT16_C( 47), INT16_C( 25), INT16_C( 187), INT16_C( 176), INT16_C( 49), INT16_C( 164), INT16_C( 236), INT16_C( 110), INT16_C( 61), INT16_C( 64), INT16_C( 42)) }, { simde_x_mm_set_epu8(UINT8_C( 54), UINT8_C(130), UINT8_C(171), UINT8_C( 76), UINT8_C( 75), UINT8_C(192), UINT8_C(152), UINT8_C(247), UINT8_C(165), UINT8_C(252), UINT8_C(115), UINT8_C( 73), UINT8_C( 86), UINT8_C( 52), UINT8_C( 29), UINT8_C(227)), simde_mm256_set_epi16(INT16_C( 54), INT16_C( 130), INT16_C( 171), INT16_C( 76), INT16_C( 75), INT16_C( 192), INT16_C( 152), INT16_C( 247), INT16_C( 165), INT16_C( 252), INT16_C( 115), INT16_C( 73), INT16_C( 86), INT16_C( 52), INT16_C( 29), INT16_C( 227)) }, { simde_x_mm_set_epu8(UINT8_C(175), UINT8_C(214), UINT8_C(138), UINT8_C(132), UINT8_C( 59), UINT8_C( 53), UINT8_C(175), UINT8_C( 98), UINT8_C(195), UINT8_C(230), UINT8_C(207), UINT8_C(189), UINT8_C(100), UINT8_C(255), UINT8_C( 84), UINT8_C(129)), simde_mm256_set_epi16(INT16_C( 175), INT16_C( 214), INT16_C( 138), INT16_C( 132), INT16_C( 59), INT16_C( 53), INT16_C( 175), INT16_C( 98), INT16_C( 195), INT16_C( 230), INT16_C( 207), INT16_C( 189), INT16_C( 100), INT16_C( 255), INT16_C( 84), INT16_C( 129)) }, { simde_x_mm_set_epu8(UINT8_C( 40), UINT8_C( 10), UINT8_C(243), UINT8_C( 28), UINT8_C( 48), UINT8_C(231), UINT8_C(240), UINT8_C( 2), UINT8_C( 1), UINT8_C( 20), UINT8_C(184), UINT8_C(244), UINT8_C(174), UINT8_C(138), UINT8_C( 47), UINT8_C(122)), simde_mm256_set_epi16(INT16_C( 40), INT16_C( 10), INT16_C( 243), INT16_C( 28), INT16_C( 48), INT16_C( 231), INT16_C( 240), INT16_C( 2), INT16_C( 1), INT16_C( 20), INT16_C( 184), INT16_C( 244), INT16_C( 174), INT16_C( 138), INT16_C( 47), INT16_C( 122)) }, { simde_x_mm_set_epu8(UINT8_C( 90), UINT8_C(144), UINT8_C(116), UINT8_C( 64), UINT8_C(250), UINT8_C(233), UINT8_C(185), UINT8_C(193), UINT8_C(172), UINT8_C(128), UINT8_C( 92), UINT8_C(230), UINT8_C(153), UINT8_C( 17), UINT8_C( 8), UINT8_C(121)), simde_mm256_set_epi16(INT16_C( 90), INT16_C( 144), INT16_C( 116), INT16_C( 64), INT16_C( 250), INT16_C( 233), INT16_C( 185), INT16_C( 193), INT16_C( 172), INT16_C( 128), INT16_C( 92), INT16_C( 230), INT16_C( 153), INT16_C( 17), INT16_C( 8), INT16_C( 121)) }, { simde_x_mm_set_epu8(UINT8_C(135), UINT8_C(132), UINT8_C(184), UINT8_C(244), UINT8_C(164), UINT8_C( 94), UINT8_C(216), UINT8_C(238), UINT8_C(112), UINT8_C(252), UINT8_C( 78), UINT8_C(150), UINT8_C( 72), UINT8_C(215), UINT8_C(214), UINT8_C(215)), simde_mm256_set_epi16(INT16_C( 135), INT16_C( 132), INT16_C( 184), INT16_C( 244), INT16_C( 164), INT16_C( 94), INT16_C( 216), INT16_C( 238), INT16_C( 112), INT16_C( 252), INT16_C( 78), INT16_C( 150), INT16_C( 72), INT16_C( 215), INT16_C( 214), INT16_C( 215)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_cvtepu8_epi16(test_vec[i].a); simde_assert_m256i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_cvtepu8_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m256i r; } test_vec[8] = { { simde_x_mm_set_epu8(UINT8_C(240), UINT8_C( 50), UINT8_C(144), UINT8_C( 4), UINT8_C( 7), UINT8_C(233), UINT8_C(157), UINT8_C( 74), UINT8_C(217), UINT8_C( 90), UINT8_C(141), UINT8_C(254), UINT8_C(115), UINT8_C( 96), UINT8_C(126), UINT8_C( 23)), simde_mm256_set_epi32(INT32_C( 217), INT32_C( 90), INT32_C( 141), INT32_C( 254), INT32_C( 115), INT32_C( 96), INT32_C( 126), INT32_C( 23)) }, { simde_x_mm_set_epu8(UINT8_C( 79), UINT8_C(126), UINT8_C(254), UINT8_C( 77), UINT8_C(225), UINT8_C( 76), UINT8_C( 61), UINT8_C(223), UINT8_C( 95), UINT8_C(244), UINT8_C( 66), UINT8_C( 4), UINT8_C(241), UINT8_C(112), UINT8_C(120), UINT8_C(166)), simde_mm256_set_epi32(INT32_C( 95), INT32_C( 244), INT32_C( 66), INT32_C( 4), INT32_C( 241), INT32_C( 112), INT32_C( 120), INT32_C( 166)) }, { simde_x_mm_set_epu8(UINT8_C( 56), UINT8_C(107), UINT8_C(114), UINT8_C( 86), UINT8_C( 35), UINT8_C( 47), UINT8_C( 25), UINT8_C(187), UINT8_C(176), UINT8_C( 49), UINT8_C(164), UINT8_C(236), UINT8_C(110), UINT8_C( 61), UINT8_C( 64), UINT8_C( 42)), simde_mm256_set_epi32(INT32_C( 176), INT32_C( 49), INT32_C( 164), INT32_C( 236), INT32_C( 110), INT32_C( 61), INT32_C( 64), INT32_C( 42)) }, { simde_x_mm_set_epu8(UINT8_C( 54), UINT8_C(130), UINT8_C(171), UINT8_C( 76), UINT8_C( 75), UINT8_C(192), UINT8_C(152), UINT8_C(247), UINT8_C(165), UINT8_C(252), UINT8_C(115), UINT8_C( 73), UINT8_C( 86), UINT8_C( 52), UINT8_C( 29), UINT8_C(227)), simde_mm256_set_epi32(INT32_C( 165), INT32_C( 252), INT32_C( 115), INT32_C( 73), INT32_C( 86), INT32_C( 52), INT32_C( 29), INT32_C( 227)) }, { simde_x_mm_set_epu8(UINT8_C(175), UINT8_C(214), UINT8_C(138), UINT8_C(132), UINT8_C( 59), UINT8_C( 53), UINT8_C(175), UINT8_C( 98), UINT8_C(195), UINT8_C(230), UINT8_C(207), UINT8_C(189), UINT8_C(100), UINT8_C(255), UINT8_C( 84), UINT8_C(129)), simde_mm256_set_epi32(INT32_C( 195), INT32_C( 230), INT32_C( 207), INT32_C( 189), INT32_C( 100), INT32_C( 255), INT32_C( 84), INT32_C( 129)) }, { simde_x_mm_set_epu8(UINT8_C( 40), UINT8_C( 10), UINT8_C(243), UINT8_C( 28), UINT8_C( 48), UINT8_C(231), UINT8_C(240), UINT8_C( 2), UINT8_C( 1), UINT8_C( 20), UINT8_C(184), UINT8_C(244), UINT8_C(174), UINT8_C(138), UINT8_C( 47), UINT8_C(122)), simde_mm256_set_epi32(INT32_C( 1), INT32_C( 20), INT32_C( 184), INT32_C( 244), INT32_C( 174), INT32_C( 138), INT32_C( 47), INT32_C( 122)) }, { simde_x_mm_set_epu8(UINT8_C( 90), UINT8_C(144), UINT8_C(116), UINT8_C( 64), UINT8_C(250), UINT8_C(233), UINT8_C(185), UINT8_C(193), UINT8_C(172), UINT8_C(128), UINT8_C( 92), UINT8_C(230), UINT8_C(153), UINT8_C( 17), UINT8_C( 8), UINT8_C(121)), simde_mm256_set_epi32(INT32_C( 172), INT32_C( 128), INT32_C( 92), INT32_C( 230), INT32_C( 153), INT32_C( 17), INT32_C( 8), INT32_C( 121)) }, { simde_x_mm_set_epu8(UINT8_C(135), UINT8_C(132), UINT8_C(184), UINT8_C(244), UINT8_C(164), UINT8_C( 94), UINT8_C(216), UINT8_C(238), UINT8_C(112), UINT8_C(252), UINT8_C( 78), UINT8_C(150), UINT8_C( 72), UINT8_C(215), UINT8_C(214), UINT8_C(215)), simde_mm256_set_epi32(INT32_C( 112), INT32_C( 252), INT32_C( 78), INT32_C( 150), INT32_C( 72), INT32_C( 215), INT32_C( 214), INT32_C( 215)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_cvtepu8_epi32(test_vec[i].a); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_cvtepu8_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m256i r; } test_vec[8] = { { simde_x_mm_set_epu8(UINT8_C( 9), UINT8_C(182), UINT8_C(204), UINT8_C(188), UINT8_C( 95), UINT8_C(246), UINT8_C(157), UINT8_C(121), UINT8_C( 1), UINT8_C( 69), UINT8_C(220), UINT8_C(154), UINT8_C(132), UINT8_C(163), UINT8_C(155), UINT8_C(138)), simde_mm256_set_epi64x(INT64_C( 132), INT64_C( 163), INT64_C( 155), INT64_C( 138)) }, { simde_x_mm_set_epu8(UINT8_C(149), UINT8_C(222), UINT8_C(181), UINT8_C(119), UINT8_C(177), UINT8_C(153), UINT8_C(247), UINT8_C(135), UINT8_C(205), UINT8_C( 4), UINT8_C( 89), UINT8_C(110), UINT8_C(112), UINT8_C( 67), UINT8_C(155), UINT8_C( 87)), simde_mm256_set_epi64x(INT64_C( 112), INT64_C( 67), INT64_C( 155), INT64_C( 87)) }, { simde_x_mm_set_epu8(UINT8_C(171), UINT8_C(121), UINT8_C(140), UINT8_C(121), UINT8_C(213), UINT8_C(176), UINT8_C( 1), UINT8_C(154), UINT8_C( 39), UINT8_C(151), UINT8_C(230), UINT8_C( 88), UINT8_C(204), UINT8_C( 61), UINT8_C(129), UINT8_C( 21)), simde_mm256_set_epi64x(INT64_C( 204), INT64_C( 61), INT64_C( 129), INT64_C( 21)) }, { simde_x_mm_set_epu8(UINT8_C( 85), UINT8_C( 1), UINT8_C( 94), UINT8_C( 60), UINT8_C(102), UINT8_C( 9), UINT8_C(200), UINT8_C( 64), UINT8_C(153), UINT8_C(170), UINT8_C( 51), UINT8_C(130), UINT8_C(247), UINT8_C( 88), UINT8_C(204), UINT8_C( 13)), simde_mm256_set_epi64x(INT64_C( 247), INT64_C( 88), INT64_C( 204), INT64_C( 13)) }, { simde_x_mm_set_epu8(UINT8_C( 0), UINT8_C(216), UINT8_C( 42), UINT8_C( 42), UINT8_C(109), UINT8_C(251), UINT8_C(228), UINT8_C(176), UINT8_C( 70), UINT8_C( 6), UINT8_C(205), UINT8_C( 17), UINT8_C(151), UINT8_C(148), UINT8_C( 14), UINT8_C(165)), simde_mm256_set_epi64x(INT64_C( 151), INT64_C( 148), INT64_C( 14), INT64_C( 165)) }, { simde_x_mm_set_epu8(UINT8_C(125), UINT8_C(142), UINT8_C(217), UINT8_C(138), UINT8_C( 29), UINT8_C(175), UINT8_C(135), UINT8_C(115), UINT8_C(128), UINT8_C( 99), UINT8_C( 64), UINT8_C( 7), UINT8_C(114), UINT8_C(155), UINT8_C(129), UINT8_C( 30)), simde_mm256_set_epi64x(INT64_C( 114), INT64_C( 155), INT64_C( 129), INT64_C( 30)) }, { simde_x_mm_set_epu8(UINT8_C(130), UINT8_C(122), UINT8_C( 50), UINT8_C(115), UINT8_C( 72), UINT8_C(200), UINT8_C( 52), UINT8_C(125), UINT8_C(152), UINT8_C(180), UINT8_C( 65), UINT8_C(103), UINT8_C(134), UINT8_C(220), UINT8_C(163), UINT8_C(110)), simde_mm256_set_epi64x(INT64_C( 134), INT64_C( 220), INT64_C( 163), INT64_C( 110)) }, { simde_x_mm_set_epu8(UINT8_C(176), UINT8_C( 98), UINT8_C(137), UINT8_C( 51), UINT8_C(219), UINT8_C(239), UINT8_C(162), UINT8_C(134), UINT8_C(142), UINT8_C( 13), UINT8_C(181), UINT8_C(111), UINT8_C(180), UINT8_C(241), UINT8_C(255), UINT8_C(244)), simde_mm256_set_epi64x(INT64_C( 180), INT64_C( 241), INT64_C( 255), INT64_C( 244)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_cvtepu8_epi64(test_vec[i].a); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_cvtepu16_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m256i r; } test_vec[8] = { { simde_x_mm_set_epu16(UINT16_C(61490), UINT16_C(36868), UINT16_C( 2025), UINT16_C(40266), UINT16_C(55642), UINT16_C(36350), UINT16_C(29536), UINT16_C(32279)), simde_mm256_set_epi32(INT32_C( 61490), INT32_C( 36868), INT32_C( 2025), INT32_C( 40266), INT32_C( 55642), INT32_C( 36350), INT32_C( 29536), INT32_C( 32279)) }, { simde_x_mm_set_epu16(UINT16_C(20350), UINT16_C(65101), UINT16_C(57676), UINT16_C(15839), UINT16_C(24564), UINT16_C(16900), UINT16_C(61808), UINT16_C(30886)), simde_mm256_set_epi32(INT32_C( 20350), INT32_C( 65101), INT32_C( 57676), INT32_C( 15839), INT32_C( 24564), INT32_C( 16900), INT32_C( 61808), INT32_C( 30886)) }, { simde_x_mm_set_epu16(UINT16_C(14443), UINT16_C(29270), UINT16_C( 9007), UINT16_C( 6587), UINT16_C(45105), UINT16_C(42220), UINT16_C(28221), UINT16_C(16426)), simde_mm256_set_epi32(INT32_C( 14443), INT32_C( 29270), INT32_C( 9007), INT32_C( 6587), INT32_C( 45105), INT32_C( 42220), INT32_C( 28221), INT32_C( 16426)) }, { simde_x_mm_set_epu16(UINT16_C(13954), UINT16_C(43852), UINT16_C(19392), UINT16_C(39159), UINT16_C(42492), UINT16_C(29513), UINT16_C(22068), UINT16_C( 7651)), simde_mm256_set_epi32(INT32_C( 13954), INT32_C( 43852), INT32_C( 19392), INT32_C( 39159), INT32_C( 42492), INT32_C( 29513), INT32_C( 22068), INT32_C( 7651)) }, { simde_x_mm_set_epu16(UINT16_C(45014), UINT16_C(35460), UINT16_C(15157), UINT16_C(44898), UINT16_C(50150), UINT16_C(53181), UINT16_C(25855), UINT16_C(21633)), simde_mm256_set_epi32(INT32_C( 45014), INT32_C( 35460), INT32_C( 15157), INT32_C( 44898), INT32_C( 50150), INT32_C( 53181), INT32_C( 25855), INT32_C( 21633)) }, { simde_x_mm_set_epu16(UINT16_C(10250), UINT16_C(62236), UINT16_C(12519), UINT16_C(61442), UINT16_C( 276), UINT16_C(47348), UINT16_C(44682), UINT16_C(12154)), simde_mm256_set_epi32(INT32_C( 10250), INT32_C( 62236), INT32_C( 12519), INT32_C( 61442), INT32_C( 276), INT32_C( 47348), INT32_C( 44682), INT32_C( 12154)) }, { simde_x_mm_set_epu16(UINT16_C(23184), UINT16_C(29760), UINT16_C(64233), UINT16_C(47553), UINT16_C(44160), UINT16_C(23782), UINT16_C(39185), UINT16_C( 2169)), simde_mm256_set_epi32(INT32_C( 23184), INT32_C( 29760), INT32_C( 64233), INT32_C( 47553), INT32_C( 44160), INT32_C( 23782), INT32_C( 39185), INT32_C( 2169)) }, { simde_x_mm_set_epu16(UINT16_C(34692), UINT16_C(47348), UINT16_C(42078), UINT16_C(55534), UINT16_C(28924), UINT16_C(20118), UINT16_C(18647), UINT16_C(54999)), simde_mm256_set_epi32(INT32_C( 34692), INT32_C( 47348), INT32_C( 42078), INT32_C( 55534), INT32_C( 28924), INT32_C( 20118), INT32_C( 18647), INT32_C( 54999)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_cvtepu16_epi32(test_vec[i].a); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_cvtepu16_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m256i r; } test_vec[8] = { { simde_x_mm_set_epu16(UINT16_C(61490), UINT16_C(36868), UINT16_C( 2025), UINT16_C(40266), UINT16_C(55642), UINT16_C(36350), UINT16_C(29536), UINT16_C(32279)), simde_mm256_set_epi64x(INT64_C( 55642), INT64_C( 36350), INT64_C( 29536), INT64_C( 32279)) }, { simde_x_mm_set_epu16(UINT16_C(20350), UINT16_C(65101), UINT16_C(57676), UINT16_C(15839), UINT16_C(24564), UINT16_C(16900), UINT16_C(61808), UINT16_C(30886)), simde_mm256_set_epi64x(INT64_C( 24564), INT64_C( 16900), INT64_C( 61808), INT64_C( 30886)) }, { simde_x_mm_set_epu16(UINT16_C(14443), UINT16_C(29270), UINT16_C( 9007), UINT16_C( 6587), UINT16_C(45105), UINT16_C(42220), UINT16_C(28221), UINT16_C(16426)), simde_mm256_set_epi64x(INT64_C( 45105), INT64_C( 42220), INT64_C( 28221), INT64_C( 16426)) }, { simde_x_mm_set_epu16(UINT16_C(13954), UINT16_C(43852), UINT16_C(19392), UINT16_C(39159), UINT16_C(42492), UINT16_C(29513), UINT16_C(22068), UINT16_C( 7651)), simde_mm256_set_epi64x(INT64_C( 42492), INT64_C( 29513), INT64_C( 22068), INT64_C( 7651)) }, { simde_x_mm_set_epu16(UINT16_C(45014), UINT16_C(35460), UINT16_C(15157), UINT16_C(44898), UINT16_C(50150), UINT16_C(53181), UINT16_C(25855), UINT16_C(21633)), simde_mm256_set_epi64x(INT64_C( 50150), INT64_C( 53181), INT64_C( 25855), INT64_C( 21633)) }, { simde_x_mm_set_epu16(UINT16_C(10250), UINT16_C(62236), UINT16_C(12519), UINT16_C(61442), UINT16_C( 276), UINT16_C(47348), UINT16_C(44682), UINT16_C(12154)), simde_mm256_set_epi64x(INT64_C( 276), INT64_C( 47348), INT64_C( 44682), INT64_C( 12154)) }, { simde_x_mm_set_epu16(UINT16_C(23184), UINT16_C(29760), UINT16_C(64233), UINT16_C(47553), UINT16_C(44160), UINT16_C(23782), UINT16_C(39185), UINT16_C( 2169)), simde_mm256_set_epi64x(INT64_C( 44160), INT64_C( 23782), INT64_C( 39185), INT64_C( 2169)) }, { simde_x_mm_set_epu16(UINT16_C(34692), UINT16_C(47348), UINT16_C(42078), UINT16_C(55534), UINT16_C(28924), UINT16_C(20118), UINT16_C(18647), UINT16_C(54999)), simde_mm256_set_epi64x(INT64_C( 28924), INT64_C( 20118), INT64_C( 18647), INT64_C( 54999)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_cvtepu16_epi64(test_vec[i].a); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_cvtepu32_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m256i r; } test_vec[8] = { { simde_x_mm_set_epu32(UINT32_C(2027668512), UINT32_C(4262540660), UINT32_C(2279720356), UINT32_C( 579063940)), simde_mm256_set_epi64x(INT64_C( 2027668512), INT64_C( 4262540660), INT64_C( 2279720356), INT64_C( 579063940)) }, { simde_x_mm_set_epu32(UINT32_C( 214101781), UINT32_C(2515026933), UINT32_C( 10021235), UINT32_C(4284419101)), simde_mm256_set_epi64x(INT64_C( 214101781), INT64_C( 2515026933), INT64_C( 10021235), INT64_C( 4284419101)) }, { simde_x_mm_set_epu32(UINT32_C(2492492584), UINT32_C(1475674737), UINT32_C( 145233694), UINT32_C(2838555915)), simde_mm256_set_epi64x(INT64_C( 2492492584), INT64_C( 1475674737), INT64_C( 145233694), INT64_C( 2838555915)) }, { simde_x_mm_set_epu32(UINT32_C(3488754722), UINT32_C(2142666247), UINT32_C(4044693026), UINT32_C( 975481583)), simde_mm256_set_epi64x(INT64_C( 3488754722), INT64_C( 2142666247), INT64_C( 4044693026), INT64_C( 975481583)) }, { simde_x_mm_set_epu32(UINT32_C(3942926803), UINT32_C( 10053147), UINT32_C(3324554936), UINT32_C( 275092283)), simde_mm256_set_epi64x(INT64_C( 3942926803), INT64_C( 10053147), INT64_C( 3324554936), INT64_C( 275092283)) }, { simde_x_mm_set_epu32(UINT32_C( 609927901), UINT32_C(4032952140), UINT32_C(2163741382), UINT32_C(1197307836)), simde_mm256_set_epi64x(INT64_C( 609927901), INT64_C( 4032952140), INT64_C( 2163741382), INT64_C( 1197307836)) }, { simde_x_mm_set_epu32(UINT32_C( 667494753), UINT32_C( 25338810), UINT32_C( 229628292), UINT32_C(1413771580)), simde_mm256_set_epi64x(INT64_C( 667494753), INT64_C( 25338810), INT64_C( 229628292), INT64_C( 1413771580)) }, { simde_x_mm_set_epu32(UINT32_C(3461166204), UINT32_C(2882591041), UINT32_C(4038947223), UINT32_C(3672325978)), simde_mm256_set_epi64x(INT64_C( 3461166204), INT64_C( 2882591041), INT64_C( 4038947223), INT64_C( 3672325978)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_cvtepu32_epi64(test_vec[i].a); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_extract_epi8(SIMDE_MUNIT_TEST_ARGS) { simde__m256i a; a = simde_mm256_set_epi8(INT8_C( 109), INT8_C( -15), INT8_C( -7), INT8_C( 79), INT8_C( 63), INT8_C( 13), INT8_C( 55), INT8_C( 74), INT8_C( -46), INT8_C( 126), INT8_C(-104), INT8_C( 83), INT8_C( -94), INT8_C( 46), INT8_C( -10), INT8_C( -51), INT8_C( -63), INT8_C( 8), INT8_C( 47), INT8_C( 108), INT8_C( 38), INT8_C( -24), INT8_C( -54), INT8_C( 83), INT8_C( 41), INT8_C( 48), INT8_C(-118), INT8_C( 66), INT8_C( -73), INT8_C( 110), INT8_C( 47), INT8_C( 78)); simde_assert_equal_i8(HEDLEY_STATIC_CAST(int8_t, simde_mm256_extract_epi8(a, 1)), INT8_C( 47)); a = simde_mm256_set_epi8(INT8_C( 9), INT8_C( 60), INT8_C( -76), INT8_C( 57), INT8_C( 106), INT8_C( -35), INT8_C( -51), INT8_C( 33), INT8_C(-119), INT8_C( 27), INT8_C(-103), INT8_C( -96), INT8_C( 114), INT8_C( 4), INT8_C( -76), INT8_C( -65), INT8_C( 105), INT8_C( -42), INT8_C( 71), INT8_C( 12), INT8_C(-113), INT8_C( 32), INT8_C(-107), INT8_C( -55), INT8_C( 87), INT8_C( 74), INT8_C( 33), INT8_C( 32), INT8_C( -83), INT8_C( -97), INT8_C( 40), INT8_C(-118)); simde_assert_equal_i8(HEDLEY_STATIC_CAST(int8_t, simde_mm256_extract_epi8(a, 12)), INT8_C( 12)); a = simde_mm256_set_epi8(INT8_C( 53), INT8_C( -56), INT8_C( -74), INT8_C( 14), INT8_C( 84), INT8_C( 78), INT8_C( 58), INT8_C( 56), INT8_C(-126), INT8_C( 20), INT8_C( 56), INT8_C( 20), INT8_C( 6), INT8_C( 127), INT8_C( 111), INT8_C(-117), INT8_C( -66), INT8_C(-101), INT8_C( 45), INT8_C( 7), INT8_C(-108), INT8_C( 110), INT8_C( -80), INT8_C( -37), INT8_C( -87), INT8_C( -76), INT8_C( -93), INT8_C( -39), INT8_C( 5), INT8_C( 40), INT8_C( 106), INT8_C( -44)); simde_assert_equal_i8(HEDLEY_STATIC_CAST(int8_t, simde_mm256_extract_epi8(a, 2)), INT8_C( 40)); a = simde_mm256_set_epi8(INT8_C( -54), INT8_C( -60), INT8_C( 8), INT8_C(-114), INT8_C( -71), INT8_C( 78), INT8_C( -79), INT8_C( 48), INT8_C( 112), INT8_C( 109), INT8_C( 51), INT8_C( -46), INT8_C( -65), INT8_C( -24), INT8_C( 81), INT8_C( 65), INT8_C( 88), INT8_C( 30), INT8_C( -69), INT8_C( -54), INT8_C( 87), INT8_C( 123), INT8_C( 124), INT8_C( -89), INT8_C( 14), INT8_C( -48), INT8_C( 73), INT8_C( 34), INT8_C( -22), INT8_C( -74), INT8_C( 4), INT8_C( -22)); simde_assert_equal_i8(HEDLEY_STATIC_CAST(int8_t, simde_mm256_extract_epi8(a, 1)), INT8_C( 4)); a = simde_mm256_set_epi8(INT8_C( -34), INT8_C( 20), INT8_C( 68), INT8_C( -53), INT8_C( 24), INT8_C( -70), INT8_C( -82), INT8_C( 20), INT8_C(-104), INT8_C( -97), INT8_C( 126), INT8_C(-128), INT8_C( 102), INT8_C( -37), INT8_C( -20), INT8_C( -7), INT8_C( -78), INT8_C( 110), INT8_C( -59), INT8_C( 89), INT8_C( -18), INT8_C( -26), INT8_C( -89), INT8_C( 39), INT8_C( -79), INT8_C( 100), INT8_C( -39), INT8_C( 76), INT8_C( -51), INT8_C( -31), INT8_C( 26), INT8_C( -70)); simde_assert_equal_i8(HEDLEY_STATIC_CAST(int8_t, simde_mm256_extract_epi8(a, 27)), INT8_C( 24)); a = simde_mm256_set_epi8(INT8_C( -85), INT8_C( -44), INT8_C( 101), INT8_C( 109), INT8_C( 58), INT8_C( 71), INT8_C( 75), INT8_C( 93), INT8_C( -37), INT8_C( 91), INT8_C( 6), INT8_C( 95), INT8_C( -47), INT8_C( 107), INT8_C( 114), INT8_C( -12), INT8_C( 86), INT8_C( 23), INT8_C( -82), INT8_C( 84), INT8_C( -80), INT8_C( -54), INT8_C(-107), INT8_C( -58), INT8_C( -42), INT8_C( -79), INT8_C( 59), INT8_C( -50), INT8_C( 63), INT8_C(-125), INT8_C( -96), INT8_C( -58)); simde_assert_equal_i8(HEDLEY_STATIC_CAST(int8_t, simde_mm256_extract_epi8(a, 11)), INT8_C( -80)); a = simde_mm256_set_epi8(INT8_C( 71), INT8_C( 18), INT8_C( 82), INT8_C( -74), INT8_C( 9), INT8_C( -6), INT8_C( -94), INT8_C( -46), INT8_C( 17), INT8_C( 25), INT8_C( -11), INT8_C(-128), INT8_C(-116), INT8_C( 77), INT8_C( 76), INT8_C( 8), INT8_C( 80), INT8_C( -50), INT8_C( 11), INT8_C( 10), INT8_C( 107), INT8_C( -99), INT8_C( 37), INT8_C( 5), INT8_C( 107), INT8_C(-118), INT8_C( 119), INT8_C( -11), INT8_C( 84), INT8_C( 8), INT8_C( 15), INT8_C(-102)); simde_assert_equal_i8(HEDLEY_STATIC_CAST(int8_t, simde_mm256_extract_epi8(a, 9)), INT8_C( 37)); a = simde_mm256_set_epi8(INT8_C( 84), INT8_C(-100), INT8_C( 116), INT8_C( -53), INT8_C(-104), INT8_C( 52), INT8_C( 51), INT8_C( 60), INT8_C( 91), INT8_C( 114), INT8_C(-106), INT8_C( -11), INT8_C( 83), INT8_C( 57), INT8_C( 4), INT8_C(-120), INT8_C( 77), INT8_C( 71), INT8_C( 1), INT8_C( 95), INT8_C( 23), INT8_C( 89), INT8_C( 112), INT8_C( -7), INT8_C( -66), INT8_C( 78), INT8_C( 88), INT8_C( 5), INT8_C( 75), INT8_C( 72), INT8_C( -87), INT8_C( 47)); simde_assert_equal_i8(HEDLEY_STATIC_CAST(int8_t, simde_mm256_extract_epi8(a, 27)), INT8_C(-104)); return 0; } static int test_simde_mm256_extract_epi16(SIMDE_MUNIT_TEST_ARGS) { simde__m256i a; a = simde_mm256_set_epi16(INT16_C( -9152), INT16_C(-17321), INT16_C( -3541), INT16_C( 31629), INT16_C( 4310), INT16_C(-20495), INT16_C(-28807), INT16_C(-17056), INT16_C(-19221), INT16_C( 32236), INT16_C(-26695), INT16_C( -3004), INT16_C( -4570), INT16_C(-14787), INT16_C( 7635), INT16_C(-23471)); simde_assert_equal_i16(HEDLEY_STATIC_CAST(int16_t, simde_mm256_extract_epi16(a, 4)), INT16_C( -3004)); a = simde_mm256_set_epi16(INT16_C( 10010), INT16_C( 6255), INT16_C(-20985), INT16_C( 25937), INT16_C( 187), INT16_C(-14746), INT16_C( 4260), INT16_C( -5314), INT16_C( 21499), INT16_C(-10726), INT16_C(-28550), INT16_C(-26957), INT16_C( 25383), INT16_C(-32368), INT16_C(-28215), INT16_C(-29614)); simde_assert_equal_i16(HEDLEY_STATIC_CAST(int16_t, simde_mm256_extract_epi16(a, 3)), INT16_C( 25383)); a = simde_mm256_set_epi16(INT16_C( -9332), INT16_C( -3776), INT16_C( -659), INT16_C(-11998), INT16_C(-22530), INT16_C( 30025), INT16_C( -7620), INT16_C( -4652), INT16_C( 12144), INT16_C( 2985), INT16_C(-29351), INT16_C( 7652), INT16_C( 29358), INT16_C( -2482), INT16_C( 2031), INT16_C(-18027)); simde_assert_equal_i16(HEDLEY_STATIC_CAST(int16_t, simde_mm256_extract_epi16(a, 12)), INT16_C(-11998)); a = simde_mm256_set_epi16(INT16_C( 5566), INT16_C( 26716), INT16_C(-28005), INT16_C(-20328), INT16_C(-22447), INT16_C(-29429), INT16_C( 18844), INT16_C(-28207), INT16_C( 13304), INT16_C( 29506), INT16_C( 26640), INT16_C(-30988), INT16_C( 7094), INT16_C( 15837), INT16_C( 17850), INT16_C( -4007)); simde_assert_equal_i16(HEDLEY_STATIC_CAST(int16_t, simde_mm256_extract_epi16(a, 8)), INT16_C(-28207)); a = simde_mm256_set_epi16(INT16_C(-22807), INT16_C( 19457), INT16_C(-27368), INT16_C(-30791), INT16_C( -9280), INT16_C(-12893), INT16_C(-14741), INT16_C( -5696), INT16_C( 4897), INT16_C( 24376), INT16_C(-27876), INT16_C( 29206), INT16_C(-29469), INT16_C(-22113), INT16_C( 23067), INT16_C( 6063)); simde_assert_equal_i16(HEDLEY_STATIC_CAST(int16_t, simde_mm256_extract_epi16(a, 3)), INT16_C(-29469)); a = simde_mm256_set_epi16(INT16_C( -6124), INT16_C( 1398), INT16_C( 383), INT16_C( 1785), INT16_C(-30725), INT16_C(-20290), INT16_C( 11961), INT16_C(-25919), INT16_C(-25742), INT16_C(-31321), INT16_C( -6067), INT16_C( 17059), INT16_C( 16943), INT16_C( 31917), INT16_C( 18056), INT16_C(-23274)); simde_assert_equal_i16(HEDLEY_STATIC_CAST(int16_t, simde_mm256_extract_epi16(a, 7)), INT16_C(-25742)); a = simde_mm256_set_epi16(INT16_C( 29739), INT16_C(-15442), INT16_C(-18760), INT16_C( 3107), INT16_C(-25745), INT16_C( -8), INT16_C( -5543), INT16_C( 3310), INT16_C( 13765), INT16_C(-29431), INT16_C( 31912), INT16_C( 20910), INT16_C(-32282), INT16_C( 8881), INT16_C(-19453), INT16_C(-25360)); simde_assert_equal_i16(HEDLEY_STATIC_CAST(int16_t, simde_mm256_extract_epi16(a, 0)), INT16_C(-25360)); a = simde_mm256_set_epi16(INT16_C( 19041), INT16_C( 28526), INT16_C( 632), INT16_C( 12890), INT16_C( -9054), INT16_C( 9044), INT16_C(-24624), INT16_C(-18390), INT16_C( -7692), INT16_C( 20879), INT16_C( 2760), INT16_C( 29306), INT16_C( 5480), INT16_C( 22577), INT16_C(-15668), INT16_C(-16497)); simde_assert_equal_i16(HEDLEY_STATIC_CAST(int16_t, simde_mm256_extract_epi16(a, 0)), INT16_C(-16497)); return 0; } static int test_simde_mm256_extracti128_si256(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m128i ra; simde__m128i rb; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C( 1229394801), INT32_C( 992221618), INT32_C(-1388107406), INT32_C( 780445625), INT32_C( 1795700153), INT32_C( -297324271), INT32_C( 1549329146), INT32_C( -534963225)), simde_mm256_set_epi32(INT32_C( -867719772), INT32_C(-1804212438), INT32_C( 1849818353), INT32_C( 405560893), INT32_C( 1351338386), INT32_C( -886724662), INT32_C( 1197680760), INT32_C( 1003042592)), simde_mm_set_epi32(INT32_C( 1795700153), INT32_C( -297324271), INT32_C( 1549329146), INT32_C( -534963225)), simde_mm_set_epi32(INT32_C( 1229394801), INT32_C( 992221618), INT32_C(-1388107406), INT32_C( 780445625)) }, { simde_mm256_set_epi32(INT32_C( 1839397279), INT32_C( -585358334), INT32_C( 779178160), INT32_C( -362976984), INT32_C(-1015866216), INT32_C( -556342867), INT32_C( -766379029), INT32_C( -130781639)), simde_mm256_set_epi32(INT32_C( 2021215895), INT32_C( -422044488), INT32_C(-1385706777), INT32_C( 22702781), INT32_C( 1076807274), INT32_C(-1923875401), INT32_C( -554846936), INT32_C(-1008226174)), simde_mm_set_epi32(INT32_C(-1015866216), INT32_C( -556342867), INT32_C( -766379029), INT32_C( -130781639)), simde_mm_set_epi32(INT32_C( 1839397279), INT32_C( -585358334), INT32_C( 779178160), INT32_C( -362976984)) }, { simde_mm256_set_epi32(INT32_C( 23865749), INT32_C( -908972624), INT32_C(-1642418179), INT32_C( 111689864), INT32_C( -835154412), INT32_C( -431540196), INT32_C( -161564683), INT32_C( 204589457)), simde_mm256_set_epi32(INT32_C( 2088662618), INT32_C(-1671363325), INT32_C( -997695043), INT32_C( -809764814), INT32_C( 2092581708), INT32_C(-1073689737), INT32_C(-1556963227), INT32_C( -641330488)), simde_mm_set_epi32(INT32_C( -835154412), INT32_C( -431540196), INT32_C( -161564683), INT32_C( 204589457)), simde_mm_set_epi32(INT32_C( 23865749), INT32_C( -908972624), INT32_C(-1642418179), INT32_C( 111689864)) }, { simde_mm256_set_epi32(INT32_C( 1727711569), INT32_C(-1915329589), INT32_C( -979233658), INT32_C( -409203179), INT32_C( 1343207861), INT32_C(-1541174422), INT32_C(-2097250480), INT32_C(-1382492089)), simde_mm256_set_epi32(INT32_C( -42175512), INT32_C(-2146588690), INT32_C(-1902868938), INT32_C( 1919945739), INT32_C( 410749235), INT32_C(-1828962645), INT32_C( 525862553), INT32_C( -282512400)), simde_mm_set_epi32(INT32_C( 1343207861), INT32_C(-1541174422), INT32_C(-2097250480), INT32_C(-1382492089)), simde_mm_set_epi32(INT32_C( 1727711569), INT32_C(-1915329589), INT32_C( -979233658), INT32_C( -409203179)) }, { simde_mm256_set_epi32(INT32_C( 377369527), INT32_C( 1159197718), INT32_C( 288677560), INT32_C( 828517622), INT32_C( 1815109517), INT32_C( 1103735854), INT32_C( 1342116414), INT32_C( 1750949195)), simde_mm256_set_epi32(INT32_C( 1481955155), INT32_C( -119794855), INT32_C(-2109995042), INT32_C( 582656481), INT32_C( 1178951500), INT32_C( 762286037), INT32_C( 628377158), INT32_C( -188026020)), simde_mm_set_epi32(INT32_C( 1815109517), INT32_C( 1103735854), INT32_C( 1342116414), INT32_C( 1750949195)), simde_mm_set_epi32(INT32_C( 377369527), INT32_C( 1159197718), INT32_C( 288677560), INT32_C( 828517622)) }, { simde_mm256_set_epi32(INT32_C(-1996051424), INT32_C( -314294760), INT32_C( -770521150), INT32_C( 508113145), INT32_C( -677093043), INT32_C( -527636644), INT32_C( 1238565466), INT32_C(-1592387355)), simde_mm256_set_epi32(INT32_C(-1510707643), INT32_C( 1988531398), INT32_C(-1182276921), INT32_C( 363503044), INT32_C( 2086268932), INT32_C( -428647595), INT32_C( 1685321543), INT32_C( 1979089365)), simde_mm_set_epi32(INT32_C( -677093043), INT32_C( -527636644), INT32_C( 1238565466), INT32_C(-1592387355)), simde_mm_set_epi32(INT32_C(-1996051424), INT32_C( -314294760), INT32_C( -770521150), INT32_C( 508113145)) }, { simde_mm256_set_epi32(INT32_C( 4593159), INT32_C( 1779671737), INT32_C( -569674634), INT32_C( -184254965), INT32_C( -665786654), INT32_C( 663766301), INT32_C(-1237697897), INT32_C( -260948936)), simde_mm256_set_epi32(INT32_C( -575114102), INT32_C( -399786699), INT32_C(-1468780124), INT32_C( 2032090700), INT32_C( 723386747), INT32_C(-1766232746), INT32_C( 73837413), INT32_C( 496540408)), simde_mm_set_epi32(INT32_C( -665786654), INT32_C( 663766301), INT32_C(-1237697897), INT32_C( -260948936)), simde_mm_set_epi32(INT32_C( 4593159), INT32_C( 1779671737), INT32_C( -569674634), INT32_C( -184254965)) }, { simde_mm256_set_epi32(INT32_C( -328197013), INT32_C( 1036318270), INT32_C(-1930293157), INT32_C( 1948339432), INT32_C( 1903716614), INT32_C(-1951673698), INT32_C(-1858071379), INT32_C( 2070124471)), simde_mm256_set_epi32(INT32_C(-1815372819), INT32_C( -102535612), INT32_C( 115383384), INT32_C( 1004544095), INT32_C( 1506420054), INT32_C(-1014523798), INT32_C(-1776388104), INT32_C( 1550371104)), simde_mm_set_epi32(INT32_C( 1903716614), INT32_C(-1951673698), INT32_C(-1858071379), INT32_C( 2070124471)), simde_mm_set_epi32(INT32_C( -328197013), INT32_C( 1036318270), INT32_C(-1930293157), INT32_C( 1948339432)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i ra = simde_mm256_extracti128_si256(test_vec[i].a, 0); simde__m128i rb = simde_mm256_extracti128_si256(test_vec[i].a, 1); simde_assert_m128i_i32(ra, ==, test_vec[i].ra); simde_assert_m128i_i32(rb, ==, test_vec[i].rb); } return 0; } static int test_simde_mm256_hadd_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi16(INT16_C(-18444), INT16_C(-18028), INT16_C( 29026), INT16_C( 2084), INT16_C( 4112), INT16_C(-30013), INT16_C( 26536), INT16_C(-22613), INT16_C( 28256), INT16_C(-13992), INT16_C( 1054), INT16_C(-17387), INT16_C( 28361), INT16_C(-13892), INT16_C(-23621), INT16_C( -6864)), simde_mm256_set_epi16(INT16_C( -2077), INT16_C( 32093), INT16_C(-32096), INT16_C( -1919), INT16_C(-29576), INT16_C( 1752), INT16_C( -9460), INT16_C( 18637), INT16_C( 12808), INT16_C( 5796), INT16_C( 8151), INT16_C( -2897), INT16_C(-24515), INT16_C(-31464), INT16_C(-13532), INT16_C( -9881)), simde_mm256_set_epi16(INT16_C( 30016), INT16_C( 31521), INT16_C(-27824), INT16_C( 9177), INT16_C( 29064), INT16_C( 31110), INT16_C(-25901), INT16_C( 3923), INT16_C( 18604), INT16_C( 5254), INT16_C( 9557), INT16_C(-23413), INT16_C( 14264), INT16_C(-16333), INT16_C( 14469), INT16_C(-30485)) }, { simde_mm256_set_epi16(INT16_C( 9100), INT16_C( -1983), INT16_C( 28981), INT16_C( 31131), INT16_C( -8267), INT16_C( 9829), INT16_C(-19843), INT16_C( 28705), INT16_C( 15872), INT16_C( 21932), INT16_C( -9103), INT16_C( 20935), INT16_C(-24996), INT16_C( 13650), INT16_C( 30282), INT16_C(-21973)), simde_mm256_set_epi16(INT16_C( -9238), INT16_C(-15000), INT16_C( 28358), INT16_C(-31774), INT16_C( 20723), INT16_C( 27208), INT16_C(-20512), INT16_C( 10808), INT16_C( 17124), INT16_C( 20983), INT16_C( 113), INT16_C( 22835), INT16_C( -190), INT16_C(-28607), INT16_C( 22616), INT16_C( 4805)), simde_mm256_set_epi16(INT16_C(-24238), INT16_C( -3416), INT16_C(-17605), INT16_C( -9704), INT16_C( 7117), INT16_C( -5424), INT16_C( 1562), INT16_C( 8862), INT16_C(-27429), INT16_C( 22948), INT16_C(-28797), INT16_C( 27421), INT16_C(-27732), INT16_C( 11832), INT16_C(-11346), INT16_C( 8309)) }, { simde_mm256_set_epi16(INT16_C( 26837), INT16_C( 17774), INT16_C(-32310), INT16_C( -9579), INT16_C( 3637), INT16_C(-14106), INT16_C( 30046), INT16_C(-13930), INT16_C( -1887), INT16_C( 13772), INT16_C( 19874), INT16_C(-18102), INT16_C(-11204), INT16_C( -4897), INT16_C( 3260), INT16_C(-17962)), simde_mm256_set_epi16(INT16_C( 22389), INT16_C(-14730), INT16_C( 31871), INT16_C( 17642), INT16_C(-26185), INT16_C( -2982), INT16_C(-14158), INT16_C( 4590), INT16_C( 20601), INT16_C( 21976), INT16_C( 25432), INT16_C( 31010), INT16_C(-23314), INT16_C( -9816), INT16_C( 8140), INT16_C( -7858)), simde_mm256_set_epi16(INT16_C( 7659), INT16_C(-16023), INT16_C(-29167), INT16_C( -9568), INT16_C(-20925), INT16_C( 23647), INT16_C(-10469), INT16_C( 16116), INT16_C(-22959), INT16_C( -9094), INT16_C( 32406), INT16_C( 282), INT16_C( 11885), INT16_C( 1772), INT16_C(-16101), INT16_C(-14702)) }, { simde_mm256_set_epi16(INT16_C( 12057), INT16_C(-24517), INT16_C( -9967), INT16_C( -7239), INT16_C( -2143), INT16_C(-23349), INT16_C(-25096), INT16_C(-21587), INT16_C( -802), INT16_C( 25377), INT16_C(-22001), INT16_C( 28281), INT16_C(-28768), INT16_C(-19834), INT16_C( 8025), INT16_C(-23943)), simde_mm256_set_epi16(INT16_C( 6731), INT16_C(-13423), INT16_C( -6728), INT16_C(-19678), INT16_C(-29476), INT16_C( -9124), INT16_C(-21742), INT16_C( 14418), INT16_C( 27306), INT16_C( 25972), INT16_C(-12648), INT16_C(-16450), INT16_C(-25118), INT16_C( 4239), INT16_C(-17383), INT16_C(-20693)), simde_mm256_set_epi16(INT16_C( -6692), INT16_C(-26406), INT16_C( 26936), INT16_C( -7324), INT16_C(-12460), INT16_C(-17206), INT16_C(-25492), INT16_C( 18853), INT16_C(-12258), INT16_C(-29098), INT16_C(-20879), INT16_C( 27460), INT16_C( 24575), INT16_C( 6280), INT16_C( 16934), INT16_C(-15918)) }, { simde_mm256_set_epi16(INT16_C(-15868), INT16_C(-27060), INT16_C( 528), INT16_C(-29935), INT16_C(-12298), INT16_C( 18504), INT16_C( -5289), INT16_C( 430), INT16_C(-29328), INT16_C( 3228), INT16_C( 18568), INT16_C(-13568), INT16_C( -2471), INT16_C( -1530), INT16_C( 5334), INT16_C( 31888)), simde_mm256_set_epi16(INT16_C( -2903), INT16_C(-32300), INT16_C( 29546), INT16_C( 12096), INT16_C(-31860), INT16_C(-30618), INT16_C( 1236), INT16_C( -9131), INT16_C( -497), INT16_C(-22865), INT16_C( 30177), INT16_C( 29201), INT16_C(-26632), INT16_C(-25915), INT16_C( 14957), INT16_C( 6108)), simde_mm256_set_epi16(INT16_C( 30333), INT16_C(-23894), INT16_C( 3058), INT16_C( -7895), INT16_C( 22608), INT16_C(-29407), INT16_C( 6206), INT16_C( -4859), INT16_C(-23362), INT16_C( -6158), INT16_C( 12989), INT16_C( 21065), INT16_C(-26100), INT16_C( 5000), INT16_C( -4001), INT16_C(-28314)) }, { simde_mm256_set_epi16(INT16_C( 4205), INT16_C( 23268), INT16_C(-32329), INT16_C(-20305), INT16_C( -9331), INT16_C( -4387), INT16_C(-30285), INT16_C( 13621), INT16_C( 15818), INT16_C( 16370), INT16_C(-24200), INT16_C( 10901), INT16_C( 17049), INT16_C( 14552), INT16_C( -5992), INT16_C(-17203)), simde_mm256_set_epi16(INT16_C( 15660), INT16_C( 25069), INT16_C(-11965), INT16_C( -9994), INT16_C( -7271), INT16_C( 17410), INT16_C( 14381), INT16_C(-30587), INT16_C( 11396), INT16_C(-22996), INT16_C( 26099), INT16_C(-11680), INT16_C(-18604), INT16_C( 20100), INT16_C( 5959), INT16_C(-27899)), simde_mm256_set_epi16(INT16_C(-24807), INT16_C(-21959), INT16_C( 10139), INT16_C(-16206), INT16_C( 27473), INT16_C( 12902), INT16_C(-13718), INT16_C(-16664), INT16_C(-11600), INT16_C( 14419), INT16_C( 1496), INT16_C(-21940), INT16_C( 32188), INT16_C(-13299), INT16_C( 31601), INT16_C(-23195)) }, { simde_mm256_set_epi16(INT16_C( 27483), INT16_C( -502), INT16_C( 138), INT16_C( -7690), INT16_C( 19176), INT16_C(-26261), INT16_C(-11443), INT16_C(-23576), INT16_C( 22206), INT16_C(-25802), INT16_C( 16334), INT16_C(-18863), INT16_C( 31357), INT16_C(-31063), INT16_C( 28401), INT16_C(-12622)), simde_mm256_set_epi16(INT16_C( 13717), INT16_C( 31993), INT16_C( 16367), INT16_C(-21175), INT16_C( 23569), INT16_C( -7701), INT16_C( -1073), INT16_C( 24510), INT16_C( 22852), INT16_C( 29964), INT16_C( -1882), INT16_C( -8354), INT16_C(-16212), INT16_C( 25899), INT16_C( 11699), INT16_C(-20150)), simde_mm256_set_epi16(INT16_C(-19826), INT16_C( -4808), INT16_C( 15868), INT16_C( 23437), INT16_C( 26981), INT16_C( -7552), INT16_C( -7085), INT16_C( 30517), INT16_C(-12720), INT16_C(-10236), INT16_C( 9687), INT16_C( -8451), INT16_C( -3596), INT16_C( -2529), INT16_C( 294), INT16_C( 15779)) }, { simde_mm256_set_epi16(INT16_C( -5381), INT16_C(-10578), INT16_C(-31523), INT16_C(-23458), INT16_C( 22155), INT16_C( 10052), INT16_C( 22251), INT16_C(-22857), INT16_C( -468), INT16_C( 4672), INT16_C(-23974), INT16_C(-31691), INT16_C( 11898), INT16_C( 4678), INT16_C( 5316), INT16_C(-17657)), simde_mm256_set_epi16(INT16_C( 15026), INT16_C(-17248), INT16_C(-13886), INT16_C( 26220), INT16_C( -1198), INT16_C( -7005), INT16_C(-21616), INT16_C(-30390), INT16_C(-30119), INT16_C(-22301), INT16_C(-29984), INT16_C( 6696), INT16_C( 12300), INT16_C( 16913), INT16_C( 12770), INT16_C( 3850)), simde_mm256_set_epi16(INT16_C( -2222), INT16_C( 12334), INT16_C( -8203), INT16_C( 13530), INT16_C(-15959), INT16_C( 10555), INT16_C( 32207), INT16_C( -606), INT16_C( 13116), INT16_C(-23288), INT16_C( 29213), INT16_C( 16620), INT16_C( 4204), INT16_C( 9871), INT16_C( 16576), INT16_C(-12341)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_hadd_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_hadd_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C(-1208698476), INT32_C( 1902250020), INT32_C( 269519555), INT32_C( 1739106219), INT32_C( 1851836760), INT32_C( 69123093), INT32_C( 1858718140), INT32_C(-1547967184)), simde_mm256_set_epi32(INT32_C( -136086179), INT32_C(-2103379839), INT32_C(-1938290984), INT32_C( -619951923), INT32_C( 839390884), INT32_C( 534246575), INT32_C(-1606580968), INT32_C( -886777497)), simde_mm256_set_epi32(INT32_C( 2055501278), INT32_C( 1736724389), INT32_C( 693551544), INT32_C( 2008625774), INT32_C( 1373637459), INT32_C( 1801608831), INT32_C( 1920959853), INT32_C( 310750956)) }, { simde_mm256_set_epi32(INT32_C( 596441153), INT32_C( 1899329947), INT32_C( -541776283), INT32_C(-1300402143), INT32_C( 1040209324), INT32_C( -596553273), INT32_C(-1638124206), INT32_C( 1984604715)), simde_mm256_set_epi32(INT32_C( -605371032), INT32_C( 1858503650), INT32_C( 1358129736), INT32_C(-1344263624), INT32_C( 1122259447), INT32_C( 7428403), INT32_C( -12414911), INT32_C( 1482166981)), simde_mm256_set_epi32(INT32_C( 1253132618), INT32_C( 13866112), INT32_C(-1799196196), INT32_C(-1842178426), INT32_C( 1129687850), INT32_C( 1469752070), INT32_C( 443656051), INT32_C( 346480509)) }, { simde_mm256_set_epi32(INT32_C( 1758807406), INT32_C(-2117412203), INT32_C( 238405862), INT32_C( 1969146262), INT32_C( -123652660), INT32_C( 1302509898), INT32_C( -734204705), INT32_C( 213694934)), simde_mm256_set_epi32(INT32_C( 1467336310), INT32_C( 2088715498), INT32_C(-1715997606), INT32_C( -927854098), INT32_C( 1350129112), INT32_C( 1666742562), INT32_C(-1527850584), INT32_C( 533520718)), simde_mm256_set_epi32(INT32_C( -738915488), INT32_C( 1651115592), INT32_C( -358604797), INT32_C(-2087415172), INT32_C(-1278095622), INT32_C( -994329866), INT32_C( 1178857238), INT32_C( -520509771)) }, { simde_mm256_set_epi32(INT32_C( 790208571), INT32_C( -653139015), INT32_C( -140401461), INT32_C(-1644647507), INT32_C( -52534495), INT32_C(-1441829255), INT32_C(-1885293946), INT32_C( 525967993)), simde_mm256_set_epi32(INT32_C( 441174929), INT32_C( -440880350), INT32_C(-1931682724), INT32_C(-1424869294), INT32_C( 1789551988), INT32_C( -828850242), INT32_C(-1646129009), INT32_C(-1139167445)), simde_mm256_set_epi32(INT32_C( 294579), INT32_C( 938415278), INT32_C( 137069556), INT32_C(-1785048968), INT32_C( 960701746), INT32_C( 1509670842), INT32_C(-1494363750), INT32_C(-1359325953)) }, { simde_mm256_set_epi32(INT32_C(-1039886772), INT32_C( 34638609), INT32_C( -805943224), INT32_C( -346619474), INT32_C(-1922036580), INT32_C( 1216924416), INT32_C( -161875450), INT32_C( 349600912)), simde_mm256_set_epi32(INT32_C( -190217772), INT32_C( 1936338752), INT32_C(-2087942042), INT32_C( 81058901), INT32_C( -32528721), INT32_C( 1977709073), INT32_C(-1745315131), INT32_C( 980228060)), simde_mm256_set_epi32(INT32_C( 1746120980), INT32_C(-2006883141), INT32_C(-1005248163), INT32_C(-1152562698), INT32_C( 1945180352), INT32_C( -765087071), INT32_C( -705112164), INT32_C( 187725462)) }, { simde_mm256_set_epi32(INT32_C( 275602148), INT32_C(-2118668113), INT32_C( -611455267), INT32_C(-1984744139), INT32_C( 1036664818), INT32_C(-1585960299), INT32_C( 1117337816), INT32_C( -392643379)), simde_mm256_set_epi32(INT32_C( 1026318829), INT32_C( -784082698), INT32_C( -476494846), INT32_C( 942508165), INT32_C( 746890796), INT32_C( 1710477920), INT32_C(-1219211644), INT32_C( 390566661)), simde_mm256_set_epi32(INT32_C( 242236131), INT32_C( 466013319), INT32_C(-1843065965), INT32_C( 1698767890), INT32_C(-1837598580), INT32_C( -828644983), INT32_C( -549295481), INT32_C( 724694437)) }, { simde_mm256_set_epi32(INT32_C( 1801190922), INT32_C( 9101814), INT32_C( 1256757611), INT32_C( -749886488), INT32_C( 1455332150), INT32_C( 1070511697), INT32_C( 2055046825), INT32_C( 1861340850)), simde_mm256_set_epi32(INT32_C( 898989305), INT32_C( 1072672073), INT32_C( 1544675819), INT32_C( -70295618), INT32_C( 1497658636), INT32_C( -123281570), INT32_C(-1062443733), INT32_C( 766751050)), simde_mm256_set_epi32(INT32_C( 1971661378), INT32_C( 1474380201), INT32_C( 1810292736), INT32_C( 506871123), INT32_C( 1374377066), INT32_C( -295692683), INT32_C(-1769123449), INT32_C( -378579621)) }, { simde_mm256_set_epi32(INT32_C( -352594258), INT32_C(-2065849250), INT32_C( 1451960132), INT32_C( 1458284215), INT32_C( -30666176), INT32_C(-1571126219), INT32_C( 779752006), INT32_C( 348437255)), simde_mm256_set_epi32(INT32_C( 984792224), INT32_C( -910006676), INT32_C( -78453597), INT32_C(-1416591030), INT32_C(-1973835549), INT32_C(-1965024728), INT32_C( 806109713), INT32_C( 836898570)), simde_mm256_set_epi32(INT32_C( 74785548), INT32_C(-1495044627), INT32_C( 1876523788), INT32_C(-1384722949), INT32_C( 356107019), INT32_C( 1643008283), INT32_C(-1601792395), INT32_C( 1128189261)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_hadd_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_hadds_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[16]; const int16_t b[16]; const int16_t r[16]; } test_vec[] = { { { -INT16_C( 10867), INT16_C( 3622), -INT16_C( 28779), -INT16_C( 15004), INT16_C( 21958), INT16_C( 28225), -INT16_C( 9443), -INT16_C( 17328), INT16_C( 9723), INT16_C( 22832), INT16_C( 14776), INT16_C( 12956), INT16_C( 11112), -INT16_C( 20463), INT16_C( 32154), INT16_C( 10109) }, { -INT16_C( 23726), -INT16_C( 6346), -INT16_C( 26062), -INT16_C( 1876), -INT16_C( 4369), INT16_C( 3175), -INT16_C( 18487), -INT16_C( 14904), -INT16_C( 1572), -INT16_C( 27362), -INT16_C( 17614), -INT16_C( 25913), -INT16_C( 10010), -INT16_C( 32693), -INT16_C( 14251), -INT16_C( 22360) }, { -INT16_C( 7245), INT16_MIN, INT16_MAX, -INT16_C( 26771), -INT16_C( 30072), -INT16_C( 27938), -INT16_C( 1194), INT16_MIN, INT16_C( 32555), INT16_C( 27732), -INT16_C( 9351), INT16_MAX, -INT16_C( 28934), INT16_MIN, INT16_MIN, INT16_MIN } }, { { -INT16_C( 8596), -INT16_C( 24945), INT16_C( 15480), INT16_C( 26519), -INT16_C( 470), -INT16_C( 3213), INT16_C( 15285), -INT16_C( 28232), -INT16_C( 10444), INT16_C( 26406), -INT16_C( 4718), INT16_C( 30721), INT16_C( 19654), INT16_C( 7160), -INT16_C( 24555), -INT16_C( 32317) }, { INT16_C( 21374), -INT16_C( 2529), -INT16_C( 18801), -INT16_C( 18083), -INT16_C( 12108), INT16_C( 27052), INT16_C( 25868), INT16_C( 16635), INT16_C( 8508), -INT16_C( 12633), -INT16_C( 22257), -INT16_C( 10938), INT16_C( 16117), INT16_C( 2800), -INT16_C( 19233), INT16_C( 23947) }, { INT16_MIN, INT16_MAX, -INT16_C( 3683), -INT16_C( 12947), INT16_C( 18845), INT16_MIN, INT16_C( 14944), INT16_MAX, INT16_C( 15962), INT16_C( 26003), INT16_C( 26814), INT16_MIN, -INT16_C( 4125), INT16_MIN, INT16_C( 18917), INT16_C( 4714) } }, { { -INT16_C( 21753), -INT16_C( 27052), -INT16_C( 20127), INT16_C( 5711), -INT16_C( 1150), -INT16_C( 29057), INT16_C( 31328), -INT16_C( 25394), INT16_C( 30364), -INT16_C( 21654), -INT16_C( 20449), INT16_C( 5248), INT16_C( 28911), -INT16_C( 12769), -INT16_C( 21980), INT16_C( 11051) }, { INT16_C( 32597), -INT16_C( 18495), INT16_C( 4145), -INT16_C( 19507), INT16_C( 19468), INT16_C( 27713), INT16_C( 4039), INT16_C( 25353), INT16_C( 29573), -INT16_C( 23538), -INT16_C( 29148), INT16_C( 5049), -INT16_C( 9986), INT16_C( 9185), INT16_C( 3202), -INT16_C( 10162) }, { INT16_MIN, -INT16_C( 14416), -INT16_C( 30207), INT16_C( 5934), INT16_C( 14102), -INT16_C( 15362), INT16_MAX, INT16_C( 29392), INT16_C( 8710), -INT16_C( 15201), INT16_C( 16142), -INT16_C( 10929), INT16_C( 6035), -INT16_C( 24099), -INT16_C( 801), -INT16_C( 6960) } }, { { INT16_C( 4236), -INT16_C( 17009), INT16_C( 23584), INT16_C( 11376), -INT16_C( 20056), INT16_C( 28569), -INT16_C( 23872), INT16_C( 18130), -INT16_C( 8171), INT16_C( 14826), -INT16_C( 23698), INT16_C( 27980), INT16_C( 11643), -INT16_C( 368), -INT16_C( 8646), -INT16_C( 14634) }, { INT16_C( 26094), INT16_C( 3971), -INT16_C( 3135), INT16_C( 26939), -INT16_C( 11100), INT16_C( 25817), -INT16_C( 21642), -INT16_C( 29526), -INT16_C( 27252), -INT16_C( 1339), INT16_C( 4664), -INT16_C( 19353), -INT16_C( 2241), INT16_C( 31154), -INT16_C( 30506), -INT16_C( 15297) }, { -INT16_C( 12773), INT16_MAX, INT16_C( 8513), -INT16_C( 5742), INT16_C( 30065), INT16_C( 23804), INT16_C( 14717), INT16_MIN, INT16_C( 6655), INT16_C( 4282), INT16_C( 11275), -INT16_C( 23280), -INT16_C( 28591), -INT16_C( 14689), INT16_C( 28913), INT16_MIN } }, { { -INT16_C( 15635), -INT16_C( 20781), INT16_C( 4021), INT16_C( 22807), -INT16_C( 3869), INT16_C( 23230), INT16_C( 26780), INT16_C( 10470), -INT16_C( 21507), INT16_C( 13858), -INT16_C( 30019), -INT16_C( 534), -INT16_C( 25471), INT16_C( 22390), -INT16_C( 18908), INT16_C( 4380) }, { -INT16_C( 4232), INT16_C( 11967), -INT16_C( 10498), -INT16_C( 7545), INT16_C( 17863), INT16_C( 25404), INT16_C( 8878), -INT16_C( 21621), -INT16_C( 21043), -INT16_C( 29727), -INT16_C( 13513), -INT16_C( 18040), -INT16_C( 409), -INT16_C( 29936), INT16_C( 11444), INT16_C( 11676) }, { INT16_MIN, INT16_C( 26828), INT16_C( 19361), INT16_MAX, INT16_C( 7735), -INT16_C( 18043), INT16_MAX, -INT16_C( 12743), -INT16_C( 7649), -INT16_C( 30553), -INT16_C( 3081), -INT16_C( 14528), INT16_MIN, -INT16_C( 31553), -INT16_C( 30345), INT16_C( 23120) } }, { { INT16_C( 23324), INT16_C( 6747), -INT16_C( 7630), -INT16_C( 1540), INT16_C( 14376), -INT16_C( 10660), -INT16_C( 6310), INT16_C( 10369), INT16_C( 25492), -INT16_C( 13133), INT16_C( 15150), -INT16_C( 27003), -INT16_C( 27335), -INT16_C( 4575), -INT16_C( 16702), -INT16_C( 8677) }, { INT16_C( 30233), INT16_C( 19448), -INT16_C( 2728), -INT16_C( 32700), -INT16_C( 24531), -INT16_C( 30634), -INT16_C( 10105), INT16_C( 7344), INT16_C( 25403), INT16_C( 27112), INT16_C( 28062), -INT16_C( 10241), INT16_C( 8450), -INT16_C( 15163), -INT16_C( 7969), -INT16_C( 1886) }, { INT16_C( 30071), -INT16_C( 9170), INT16_C( 3716), INT16_C( 4059), INT16_MAX, INT16_MIN, INT16_MIN, -INT16_C( 2761), INT16_C( 12359), -INT16_C( 11853), -INT16_C( 31910), -INT16_C( 25379), INT16_MAX, INT16_C( 17821), -INT16_C( 6713), -INT16_C( 9855) } }, { { -INT16_C( 25770), -INT16_C( 20668), -INT16_C( 30576), -INT16_C( 17105), -INT16_C( 31191), -INT16_C( 20411), -INT16_C( 2722), -INT16_C( 26164), -INT16_C( 19368), -INT16_C( 2558), INT16_C( 545), INT16_C( 9422), -INT16_C( 27869), INT16_C( 744), -INT16_C( 29836), -INT16_C( 13574) }, { INT16_C( 15910), -INT16_C( 18823), -INT16_C( 22073), -INT16_C( 3981), -INT16_C( 18129), -INT16_C( 29280), INT16_C( 28078), INT16_C( 1830), INT16_C( 10273), INT16_C( 17405), -INT16_C( 13526), INT16_C( 19815), INT16_C( 20319), -INT16_C( 11441), INT16_C( 19162), INT16_C( 157) }, { INT16_MIN, INT16_MIN, INT16_MIN, -INT16_C( 28886), -INT16_C( 2913), -INT16_C( 26054), INT16_MIN, INT16_C( 29908), -INT16_C( 21926), INT16_C( 9967), -INT16_C( 27125), INT16_MIN, INT16_C( 27678), INT16_C( 6289), INT16_C( 8878), INT16_C( 19319) } }, { { INT16_C( 6024), INT16_C( 20406), INT16_C( 10944), -INT16_C( 4289), -INT16_C( 7965), -INT16_C( 28292), -INT16_C( 23987), INT16_C( 28312), -INT16_C( 26934), -INT16_C( 2639), INT16_C( 6241), -INT16_C( 16318), -INT16_C( 28056), INT16_C( 17043), INT16_C( 12764), INT16_C( 25667) }, { -INT16_C( 1720), INT16_C( 2228), -INT16_C( 3293), INT16_C( 1783), INT16_C( 29651), INT16_C( 8344), INT16_C( 12309), -INT16_C( 8305), INT16_C( 16582), INT16_C( 10452), INT16_C( 5977), -INT16_C( 15896), INT16_C( 31913), -INT16_C( 31485), INT16_C( 18093), -INT16_C( 2583) }, { INT16_C( 26430), INT16_C( 6655), INT16_MIN, INT16_C( 4325), INT16_C( 508), -INT16_C( 1510), INT16_MAX, INT16_C( 4004), -INT16_C( 29573), -INT16_C( 10077), -INT16_C( 11013), INT16_MAX, INT16_C( 27034), -INT16_C( 9919), INT16_C( 428), INT16_C( 15510) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi16(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi16(test_vec[i].b); simde__m256i r = simde_mm256_hadds_epi16(a, b); simde_test_x86_assert_equal_i16x16(r, simde_x_mm256_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm256_hsub_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi16(INT16_C(-18444), INT16_C(-18028), INT16_C( 29026), INT16_C( 2084), INT16_C( 4112), INT16_C(-30013), INT16_C( 26536), INT16_C(-22613), INT16_C( 28256), INT16_C(-13992), INT16_C( 1054), INT16_C(-17387), INT16_C( 28361), INT16_C(-13892), INT16_C(-23621), INT16_C( -6864)), simde_mm256_set_epi16(INT16_C( -2077), INT16_C( 32093), INT16_C(-32096), INT16_C( -1919), INT16_C(-29576), INT16_C( 1752), INT16_C( -9460), INT16_C( 18637), INT16_C( 12808), INT16_C( 5796), INT16_C( 8151), INT16_C( -2897), INT16_C(-24515), INT16_C(-31464), INT16_C(-13532), INT16_C( -9881)), simde_mm256_set_epi16(INT16_C(-31366), INT16_C( 30177), INT16_C( 31328), INT16_C( 28097), INT16_C( 416), INT16_C(-26942), INT16_C( 31411), INT16_C( 16387), INT16_C( -7012), INT16_C(-11048), INT16_C( -6949), INT16_C( 3651), INT16_C( 23288), INT16_C(-18441), INT16_C( 23283), INT16_C( 16757)) }, { simde_mm256_set_epi16(INT16_C( 9100), INT16_C( -1983), INT16_C( 28981), INT16_C( 31131), INT16_C( -8267), INT16_C( 9829), INT16_C(-19843), INT16_C( 28705), INT16_C( 15872), INT16_C( 21932), INT16_C( -9103), INT16_C( 20935), INT16_C(-24996), INT16_C( 13650), INT16_C( 30282), INT16_C(-21973)), simde_mm256_set_epi16(INT16_C( -9238), INT16_C(-15000), INT16_C( 28358), INT16_C(-31774), INT16_C( 20723), INT16_C( 27208), INT16_C(-20512), INT16_C( 10808), INT16_C( 17124), INT16_C( 20983), INT16_C( 113), INT16_C( 22835), INT16_C( -190), INT16_C(-28607), INT16_C( 22616), INT16_C( 4805)), simde_mm256_set_epi16(INT16_C( -5762), INT16_C( 5404), INT16_C( 6485), INT16_C( 31320), INT16_C(-11083), INT16_C( 2150), INT16_C( 18096), INT16_C(-16988), INT16_C( 3859), INT16_C( 22722), INT16_C(-28417), INT16_C(-17811), INT16_C( 6060), INT16_C( 30038), INT16_C(-26890), INT16_C( 13281)) }, { simde_mm256_set_epi16(INT16_C( 26837), INT16_C( 17774), INT16_C(-32310), INT16_C( -9579), INT16_C( 3637), INT16_C(-14106), INT16_C( 30046), INT16_C(-13930), INT16_C( -1887), INT16_C( 13772), INT16_C( 19874), INT16_C(-18102), INT16_C(-11204), INT16_C( -4897), INT16_C( 3260), INT16_C(-17962)), simde_mm256_set_epi16(INT16_C( 22389), INT16_C(-14730), INT16_C( 31871), INT16_C( 17642), INT16_C(-26185), INT16_C( -2982), INT16_C(-14158), INT16_C( 4590), INT16_C( 20601), INT16_C( 21976), INT16_C( 25432), INT16_C( 31010), INT16_C(-23314), INT16_C( -9816), INT16_C( 8140), INT16_C( -7858)), simde_mm256_set_epi16(INT16_C( 28417), INT16_C(-14229), INT16_C( 23203), INT16_C( 18748), INT16_C( -9063), INT16_C( 22731), INT16_C(-17743), INT16_C( 21560), INT16_C( 1375), INT16_C( 5578), INT16_C( 13498), INT16_C(-15998), INT16_C( 15659), INT16_C( 27560), INT16_C( 6307), INT16_C(-21222)) }, { simde_mm256_set_epi16(INT16_C( 12057), INT16_C(-24517), INT16_C( -9967), INT16_C( -7239), INT16_C( -2143), INT16_C(-23349), INT16_C(-25096), INT16_C(-21587), INT16_C( -802), INT16_C( 25377), INT16_C(-22001), INT16_C( 28281), INT16_C(-28768), INT16_C(-19834), INT16_C( 8025), INT16_C(-23943)), simde_mm256_set_epi16(INT16_C( 6731), INT16_C(-13423), INT16_C( -6728), INT16_C(-19678), INT16_C(-29476), INT16_C( -9124), INT16_C(-21742), INT16_C( 14418), INT16_C( 27306), INT16_C( 25972), INT16_C(-12648), INT16_C(-16450), INT16_C(-25118), INT16_C( 4239), INT16_C(-17383), INT16_C(-20693)), simde_mm256_set_epi16(INT16_C(-20154), INT16_C(-12950), INT16_C( 20352), INT16_C(-29376), INT16_C( 28962), INT16_C( 2728), INT16_C(-21206), INT16_C( 3509), INT16_C( -1334), INT16_C( -3802), INT16_C( 29357), INT16_C( -3310), INT16_C( 26179), INT16_C(-15254), INT16_C( 8934), INT16_C(-31968)) }, { simde_mm256_set_epi16(INT16_C(-15868), INT16_C(-27060), INT16_C( 528), INT16_C(-29935), INT16_C(-12298), INT16_C( 18504), INT16_C( -5289), INT16_C( 430), INT16_C(-29328), INT16_C( 3228), INT16_C( 18568), INT16_C(-13568), INT16_C( -2471), INT16_C( -1530), INT16_C( 5334), INT16_C( 31888)), simde_mm256_set_epi16(INT16_C( -2903), INT16_C(-32300), INT16_C( 29546), INT16_C( 12096), INT16_C(-31860), INT16_C(-30618), INT16_C( 1236), INT16_C( -9131), INT16_C( -497), INT16_C(-22865), INT16_C( 30177), INT16_C( 29201), INT16_C(-26632), INT16_C(-25915), INT16_C( 14957), INT16_C( 6108)), simde_mm256_set_epi16(INT16_C(-29397), INT16_C(-17450), INT16_C( 1242), INT16_C(-10367), INT16_C(-11192), INT16_C(-30463), INT16_C( 30802), INT16_C( 5719), INT16_C(-22368), INT16_C( -976), INT16_C( 717), INT16_C( -8849), INT16_C( 32556), INT16_C(-32136), INT16_C( 941), INT16_C( 26554)) }, { simde_mm256_set_epi16(INT16_C( 4205), INT16_C( 23268), INT16_C(-32329), INT16_C(-20305), INT16_C( -9331), INT16_C( -4387), INT16_C(-30285), INT16_C( 13621), INT16_C( 15818), INT16_C( 16370), INT16_C(-24200), INT16_C( 10901), INT16_C( 17049), INT16_C( 14552), INT16_C( -5992), INT16_C(-17203)), simde_mm256_set_epi16(INT16_C( 15660), INT16_C( 25069), INT16_C(-11965), INT16_C( -9994), INT16_C( -7271), INT16_C( 17410), INT16_C( 14381), INT16_C(-30587), INT16_C( 11396), INT16_C(-22996), INT16_C( 26099), INT16_C(-11680), INT16_C(-18604), INT16_C( 20100), INT16_C( 5959), INT16_C(-27899)), simde_mm256_set_epi16(INT16_C( 9409), INT16_C( 1971), INT16_C( 24681), INT16_C( 20568), INT16_C( 19063), INT16_C( 12024), INT16_C( 4944), INT16_C(-21630), INT16_C( 31144), INT16_C( 27757), INT16_C(-26832), INT16_C( 31678), INT16_C( 552), INT16_C(-30435), INT16_C( -2497), INT16_C(-11211)) }, { simde_mm256_set_epi16(INT16_C( 27483), INT16_C( -502), INT16_C( 138), INT16_C( -7690), INT16_C( 19176), INT16_C(-26261), INT16_C(-11443), INT16_C(-23576), INT16_C( 22206), INT16_C(-25802), INT16_C( 16334), INT16_C(-18863), INT16_C( 31357), INT16_C(-31063), INT16_C( 28401), INT16_C(-12622)), simde_mm256_set_epi16(INT16_C( 13717), INT16_C( 31993), INT16_C( 16367), INT16_C(-21175), INT16_C( 23569), INT16_C( -7701), INT16_C( -1073), INT16_C( 24510), INT16_C( 22852), INT16_C( 29964), INT16_C( -1882), INT16_C( -8354), INT16_C(-16212), INT16_C( 25899), INT16_C( 11699), INT16_C(-20150)), simde_mm256_set_epi16(INT16_C( 18276), INT16_C( 27994), INT16_C(-31270), INT16_C( 25583), INT16_C(-27985), INT16_C( -7828), INT16_C( 20099), INT16_C(-12133), INT16_C( 7112), INT16_C( -6472), INT16_C(-23425), INT16_C(-31849), INT16_C( 17528), INT16_C( 30339), INT16_C( 3116), INT16_C( 24513)) }, { simde_mm256_set_epi16(INT16_C( -5381), INT16_C(-10578), INT16_C(-31523), INT16_C(-23458), INT16_C( 22155), INT16_C( 10052), INT16_C( 22251), INT16_C(-22857), INT16_C( -468), INT16_C( 4672), INT16_C(-23974), INT16_C(-31691), INT16_C( 11898), INT16_C( 4678), INT16_C( 5316), INT16_C(-17657)), simde_mm256_set_epi16(INT16_C( 15026), INT16_C(-17248), INT16_C(-13886), INT16_C( 26220), INT16_C( -1198), INT16_C( -7005), INT16_C(-21616), INT16_C(-30390), INT16_C(-30119), INT16_C(-22301), INT16_C(-29984), INT16_C( 6696), INT16_C( 12300), INT16_C( 16913), INT16_C( 12770), INT16_C( 3850)), simde_mm256_set_epi16(INT16_C(-32274), INT16_C(-25430), INT16_C( -5807), INT16_C( -8774), INT16_C( -5197), INT16_C( 8065), INT16_C(-12103), INT16_C( 20428), INT16_C( 7818), INT16_C(-28856), INT16_C( 4613), INT16_C( -8920), INT16_C( 5140), INT16_C( -7717), INT16_C( -7220), INT16_C(-22973)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_hsub_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_hsub_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C(-1208698476), INT32_C( 1902250020), INT32_C( 269519555), INT32_C( 1739106219), INT32_C( 1851836760), INT32_C( 69123093), INT32_C( 1858718140), INT32_C(-1547967184)), simde_mm256_set_epi32(INT32_C( -136086179), INT32_C(-2103379839), INT32_C(-1938290984), INT32_C( -619951923), INT32_C( 839390884), INT32_C( 534246575), INT32_C(-1606580968), INT32_C( -886777497)), simde_mm256_set_epi32(INT32_C(-1967293660), INT32_C( 1318339061), INT32_C(-1184018800), INT32_C( 1469586664), INT32_C( -305144309), INT32_C( 719803471), INT32_C(-1782713667), INT32_C( 888281972)) }, { simde_mm256_set_epi32(INT32_C( 596441153), INT32_C( 1899329947), INT32_C( -541776283), INT32_C(-1300402143), INT32_C( 1040209324), INT32_C( -596553273), INT32_C(-1638124206), INT32_C( 1984604715)), simde_mm256_set_epi32(INT32_C( -605371032), INT32_C( 1858503650), INT32_C( 1358129736), INT32_C(-1344263624), INT32_C( 1122259447), INT32_C( 7428403), INT32_C( -12414911), INT32_C( 1482166981)), simde_mm256_set_epi32(INT32_C(-1831092614), INT32_C( 1592573936), INT32_C( 1302888794), INT32_C( -758625860), INT32_C(-1114831044), INT32_C( 1494581892), INT32_C(-1636762597), INT32_C( -672238375)) }, { simde_mm256_set_epi32(INT32_C( 1758807406), INT32_C(-2117412203), INT32_C( 238405862), INT32_C( 1969146262), INT32_C( -123652660), INT32_C( 1302509898), INT32_C( -734204705), INT32_C( 213694934)), simde_mm256_set_epi32(INT32_C( 1467336310), INT32_C( 2088715498), INT32_C(-1715997606), INT32_C( -927854098), INT32_C( 1350129112), INT32_C( 1666742562), INT32_C(-1527850584), INT32_C( 533520718)), simde_mm256_set_epi32(INT32_C( 621379188), INT32_C( 788143508), INT32_C( 418747687), INT32_C( 1730740400), INT32_C( 316613450), INT32_C( 2061371302), INT32_C( 1426162558), INT32_C( 947899639)) }, { simde_mm256_set_epi32(INT32_C( 790208571), INT32_C( -653139015), INT32_C( -140401461), INT32_C(-1644647507), INT32_C( -52534495), INT32_C(-1441829255), INT32_C(-1885293946), INT32_C( 525967993)), simde_mm256_set_epi32(INT32_C( 441174929), INT32_C( -440880350), INT32_C(-1931682724), INT32_C(-1424869294), INT32_C( 1789551988), INT32_C( -828850242), INT32_C(-1646129009), INT32_C(-1139167445)), simde_mm256_set_epi32(INT32_C( -882055279), INT32_C( 506813430), INT32_C(-1443347586), INT32_C(-1504246046), INT32_C( 1676565066), INT32_C( 506961564), INT32_C(-1389294760), INT32_C(-1883705357)) }, { simde_mm256_set_epi32(INT32_C(-1039886772), INT32_C( 34638609), INT32_C( -805943224), INT32_C( -346619474), INT32_C(-1922036580), INT32_C( 1216924416), INT32_C( -161875450), INT32_C( 349600912)), simde_mm256_set_epi32(INT32_C( -190217772), INT32_C( 1936338752), INT32_C(-2087942042), INT32_C( 81058901), INT32_C( -32528721), INT32_C( 1977709073), INT32_C(-1745315131), INT32_C( 980228060)), simde_mm256_set_epi32(INT32_C( 2126556524), INT32_C(-2125966353), INT32_C( 1074525381), INT32_C( 459323750), INT32_C( 2010237794), INT32_C(-1569424105), INT32_C(-1156006300), INT32_C( 511476362)) }, { simde_mm256_set_epi32(INT32_C( 275602148), INT32_C(-2118668113), INT32_C( -611455267), INT32_C(-1984744139), INT32_C( 1036664818), INT32_C(-1585960299), INT32_C( 1117337816), INT32_C( -392643379)), simde_mm256_set_epi32(INT32_C( 1026318829), INT32_C( -784082698), INT32_C( -476494846), INT32_C( 942508165), INT32_C( 746890796), INT32_C( 1710477920), INT32_C(-1219211644), INT32_C( 390566661)), simde_mm256_set_epi32(INT32_C(-1810401527), INT32_C( 1419003011), INT32_C( 1900697035), INT32_C(-1373288872), INT32_C( 963587124), INT32_C( 1609778305), INT32_C( 1672342179), INT32_C(-1509981195)) }, { simde_mm256_set_epi32(INT32_C( 1801190922), INT32_C( 9101814), INT32_C( 1256757611), INT32_C( -749886488), INT32_C( 1455332150), INT32_C( 1070511697), INT32_C( 2055046825), INT32_C( 1861340850)), simde_mm256_set_epi32(INT32_C( 898989305), INT32_C( 1072672073), INT32_C( 1544675819), INT32_C( -70295618), INT32_C( 1497658636), INT32_C( -123281570), INT32_C(-1062443733), INT32_C( 766751050)), simde_mm256_set_epi32(INT32_C( 173682768), INT32_C(-1614971437), INT32_C(-1792089108), INT32_C(-2006644099), INT32_C(-1620940206), INT32_C( 1829194783), INT32_C( -384820453), INT32_C( -193705975)) }, { simde_mm256_set_epi32(INT32_C( -352594258), INT32_C(-2065849250), INT32_C( 1451960132), INT32_C( 1458284215), INT32_C( -30666176), INT32_C(-1571126219), INT32_C( 779752006), INT32_C( 348437255)), simde_mm256_set_epi32(INT32_C( 984792224), INT32_C( -910006676), INT32_C( -78453597), INT32_C(-1416591030), INT32_C(-1973835549), INT32_C(-1965024728), INT32_C( 806109713), INT32_C( 836898570)), simde_mm256_set_epi32(INT32_C(-1894798900), INT32_C(-1338137433), INT32_C(-1713254992), INT32_C( 6324083), INT32_C( 8810821), INT32_C( 30788857), INT32_C(-1540460043), INT32_C( -431314751)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_hsub_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_hsubs_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[16]; const int16_t b[16]; const int16_t r[16]; } test_vec[] = { { { -INT16_C( 26245), -INT16_C( 12255), -INT16_C( 13259), -INT16_C( 32044), INT16_C( 31061), INT16_C( 23401), -INT16_C( 9352), -INT16_C( 31983), -INT16_C( 3563), -INT16_C( 3232), -INT16_C( 4207), INT16_C( 2011), -INT16_C( 23858), INT16_C( 31980), INT16_C( 8339), INT16_C( 3620) }, { INT16_C( 17849), -INT16_C( 4386), -INT16_C( 19950), INT16_C( 26481), -INT16_C( 9685), -INT16_C( 23357), -INT16_C( 11082), -INT16_C( 13529), -INT16_C( 30778), INT16_C( 22463), -INT16_C( 25994), INT16_C( 17502), INT16_C( 19261), -INT16_C( 12096), -INT16_C( 6805), INT16_C( 9438) }, { -INT16_C( 13990), INT16_C( 18785), INT16_C( 7660), INT16_C( 22631), INT16_C( 22235), INT16_MIN, INT16_C( 13672), INT16_C( 2447), -INT16_C( 331), -INT16_C( 6218), INT16_MIN, INT16_C( 4719), INT16_MIN, INT16_MIN, INT16_C( 31357), -INT16_C( 16243) } }, { { -INT16_C( 17366), INT16_C( 15379), -INT16_C( 31634), -INT16_C( 26204), INT16_C( 26462), INT16_C( 5181), INT16_C( 25659), INT16_C( 480), -INT16_C( 24597), INT16_C( 24921), -INT16_C( 18631), INT16_C( 30373), INT16_C( 26114), INT16_C( 27974), INT16_C( 9291), INT16_C( 30098) }, { -INT16_C( 23072), INT16_C( 20146), INT16_C( 22057), -INT16_C( 30744), INT16_C( 9661), -INT16_C( 1892), INT16_C( 31882), INT16_C( 30201), INT16_C( 21019), INT16_C( 21719), INT16_C( 31754), INT16_C( 3275), INT16_C( 4578), INT16_C( 11642), INT16_C( 3126), INT16_C( 5795) }, { -INT16_C( 32745), -INT16_C( 5430), INT16_C( 21281), INT16_C( 25179), INT16_MIN, INT16_MAX, INT16_C( 11553), INT16_C( 1681), INT16_MIN, INT16_MIN, -INT16_C( 1860), -INT16_C( 20807), -INT16_C( 700), INT16_C( 28479), -INT16_C( 7064), -INT16_C( 2669) } }, { { INT16_C( 21937), -INT16_C( 9627), INT16_C( 19883), INT16_C( 26721), -INT16_C( 654), -INT16_C( 928), INT16_C( 22905), -INT16_C( 27534), INT16_C( 18860), -INT16_C( 18711), -INT16_C( 19259), -INT16_C( 22334), INT16_C( 15557), -INT16_C( 1067), INT16_C( 30792), -INT16_C( 1774) }, { INT16_C( 30669), INT16_C( 30931), INT16_C( 13764), INT16_C( 14048), INT16_C( 16434), -INT16_C( 21453), -INT16_C( 23142), INT16_C( 17984), INT16_C( 10734), -INT16_C( 19460), -INT16_C( 16675), -INT16_C( 23717), INT16_C( 12795), INT16_C( 17310), -INT16_C( 20311), INT16_C( 30525) }, { INT16_C( 31564), -INT16_C( 6838), INT16_C( 274), INT16_MAX, -INT16_C( 262), -INT16_C( 284), INT16_MAX, INT16_MIN, INT16_MAX, INT16_C( 3075), INT16_C( 16624), INT16_C( 32566), INT16_C( 30194), INT16_C( 7042), -INT16_C( 4515), INT16_MIN } }, { { INT16_C( 4135), -INT16_C( 5137), -INT16_C( 12219), INT16_C( 30754), INT16_C( 21776), -INT16_C( 21980), INT16_C( 25850), -INT16_C( 5904), -INT16_C( 4978), INT16_C( 27547), -INT16_C( 2133), -INT16_C( 23026), -INT16_C( 21208), -INT16_C( 11799), INT16_C( 9821), -INT16_C( 31416) }, { INT16_C( 14391), INT16_C( 31856), -INT16_C( 28152), INT16_C( 6388), INT16_C( 6375), -INT16_C( 7741), -INT16_C( 19587), INT16_C( 3017), INT16_C( 26016), INT16_C( 19318), -INT16_C( 31396), -INT16_C( 31503), -INT16_C( 9678), -INT16_C( 28843), -INT16_C( 25087), INT16_C( 14356) }, { INT16_C( 9272), INT16_MIN, INT16_MAX, INT16_C( 31754), -INT16_C( 17465), INT16_MIN, INT16_C( 14116), -INT16_C( 22604), -INT16_C( 32525), INT16_C( 20893), -INT16_C( 9409), INT16_MAX, INT16_C( 6698), INT16_C( 107), INT16_C( 19165), INT16_MIN } }, { { -INT16_C( 31274), -INT16_C( 8524), -INT16_C( 22249), -INT16_C( 10), -INT16_C( 17983), INT16_C( 16096), -INT16_C( 21907), INT16_C( 3401), -INT16_C( 16369), INT16_C( 27480), INT16_C( 18757), INT16_C( 30703), INT16_C( 17443), INT16_C( 9222), INT16_C( 7138), -INT16_C( 18340) }, { INT16_C( 4512), -INT16_C( 18538), -INT16_C( 29254), INT16_C( 31670), -INT16_C( 26810), -INT16_C( 19526), INT16_C( 833), INT16_C( 20672), INT16_C( 6339), INT16_C( 2235), -INT16_C( 21919), -INT16_C( 31361), -INT16_C( 30994), -INT16_C( 11863), INT16_C( 1697), INT16_C( 16777) }, { -INT16_C( 22750), -INT16_C( 22239), INT16_MIN, -INT16_C( 25308), INT16_C( 23050), INT16_MIN, -INT16_C( 7284), -INT16_C( 19839), INT16_MIN, -INT16_C( 11946), INT16_C( 8221), INT16_C( 25478), INT16_C( 4104), INT16_C( 9442), -INT16_C( 19131), -INT16_C( 15080) } }, { { INT16_C( 8215), -INT16_C( 11784), -INT16_C( 20563), -INT16_C( 3252), INT16_C( 1606), -INT16_C( 30809), INT16_C( 26378), -INT16_C( 12841), -INT16_C( 28032), -INT16_C( 7722), INT16_C( 21820), INT16_C( 10854), INT16_C( 4315), INT16_C( 31995), -INT16_C( 31466), INT16_C( 11709) }, { -INT16_C( 18779), INT16_C( 21246), INT16_C( 19045), -INT16_C( 21691), -INT16_C( 5039), INT16_C( 23346), INT16_C( 2388), -INT16_C( 11224), -INT16_C( 357), -INT16_C( 10315), INT16_C( 7252), INT16_C( 12033), -INT16_C( 724), INT16_C( 17068), INT16_C( 27010), INT16_C( 10095) }, { INT16_C( 19999), -INT16_C( 17311), INT16_C( 32415), INT16_MAX, INT16_MIN, INT16_MAX, -INT16_C( 28385), INT16_C( 13612), -INT16_C( 20310), INT16_C( 10966), -INT16_C( 27680), INT16_MIN, INT16_C( 9958), -INT16_C( 4781), -INT16_C( 17792), INT16_C( 16915) } }, { { INT16_C( 27935), -INT16_C( 31623), -INT16_C( 16713), INT16_C( 2095), INT16_C( 25003), -INT16_C( 157), -INT16_C( 29590), INT16_C( 1491), -INT16_C( 30582), -INT16_C( 8484), -INT16_C( 8540), -INT16_C( 12274), -INT16_C( 17701), INT16_C( 23826), -INT16_C( 32477), INT16_C( 17284) }, { -INT16_C( 530), -INT16_C( 22841), -INT16_C( 2117), INT16_C( 26286), INT16_C( 4696), -INT16_C( 15515), INT16_C( 14494), INT16_C( 10440), -INT16_C( 23103), INT16_C( 25863), INT16_C( 5507), INT16_C( 24118), INT16_C( 18639), -INT16_C( 3397), INT16_C( 16330), -INT16_C( 18379) }, { INT16_MAX, -INT16_C( 18808), INT16_C( 25160), -INT16_C( 31081), INT16_C( 22311), -INT16_C( 28403), INT16_C( 20211), INT16_C( 4054), -INT16_C( 22098), INT16_C( 3734), INT16_MIN, INT16_MIN, INT16_MIN, -INT16_C( 18611), INT16_C( 22036), INT16_MAX } }, { { -INT16_C( 708), -INT16_C( 2210), INT16_C( 3572), INT16_C( 19550), -INT16_C( 15585), -INT16_C( 17137), -INT16_C( 9988), -INT16_C( 16923), -INT16_C( 4995), INT16_C( 34), INT16_C( 22529), -INT16_C( 12194), INT16_C( 6561), INT16_C( 27587), -INT16_C( 1960), -INT16_C( 27613) }, { -INT16_C( 32011), -INT16_C( 5749), -INT16_C( 5745), -INT16_C( 20938), INT16_C( 17837), -INT16_C( 22165), INT16_C( 20509), -INT16_C( 26010), -INT16_C( 30659), INT16_C( 16026), -INT16_C( 1823), -INT16_C( 32241), -INT16_C( 11759), INT16_C( 27117), INT16_C( 4298), -INT16_C( 16131) }, { INT16_C( 1502), -INT16_C( 15978), INT16_C( 1552), INT16_C( 6935), -INT16_C( 26262), INT16_C( 15193), INT16_MAX, INT16_MAX, -INT16_C( 5029), INT16_MAX, -INT16_C( 21026), INT16_C( 25653), INT16_MIN, INT16_C( 30418), INT16_MIN, INT16_C( 20429) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi16(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi16(test_vec[i].b); simde__m256i r = simde_mm256_hsubs_epi16(a, b); simde_test_x86_assert_equal_i16x16(r, simde_x_mm256_loadu_epi16(test_vec[i].r)); } return 0; } static int32_t i32gather_buffer[4096]; static int test_simde_mm_i32gather_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t vindex[4]; const int32_t r[4]; } test_vec[] = { { { INT32_C( 141), INT32_C( 78), INT32_C( 249), INT32_C( 88) }, { INT32_C( 141), INT32_C( 78), INT32_C( 249), INT32_C( 88) } }, { { INT32_C( 189), INT32_C( 204), INT32_C( 14), INT32_C( 231) }, { INT32_C( 189), INT32_C( 204), INT32_C( 14), INT32_C( 231) } }, { { INT32_C( 199), INT32_C( 52), INT32_C( 133), INT32_C( 101) }, { INT32_C( 199), INT32_C( 52), INT32_C( 133), INT32_C( 101) } }, { { INT32_C( 239), INT32_C( 12), INT32_C( 121), INT32_C( 226) }, { INT32_C( 239), INT32_C( 12), INT32_C( 121), INT32_C( 226) } }, { { INT32_C( 197), INT32_C( 167), INT32_C( 235), INT32_C( 15) }, { INT32_C( 197), INT32_C( 167), INT32_C( 235), INT32_C( 15) } }, { { INT32_C( 239), INT32_C( 157), INT32_C( 219), INT32_C( 83) }, { INT32_C( 239), INT32_C( 157), INT32_C( 219), INT32_C( 83) } }, { { INT32_C( 230), INT32_C( 67), INT32_C( 195), INT32_C( 27) }, { INT32_C( 230), INT32_C( 67), INT32_C( 195), INT32_C( 27) } }, { { INT32_C( 203), INT32_C( 150), INT32_C( 133), INT32_C( 68) }, { INT32_C( 203), INT32_C( 150), INT32_C( 133), INT32_C( 68) } } }; for (size_t i = 0 ; i < (sizeof(i32gather_buffer) / sizeof(i32gather_buffer[0])) ; i++) { i32gather_buffer[i] = HEDLEY_STATIC_CAST(int32_t, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i vindex = simde_x_mm_loadu_epi32(test_vec[i].vindex); simde__m128i r = simde_mm_i32gather_epi32(i32gather_buffer, vindex, 4); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm_mask_i32gather_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t src[4]; const int32_t vindex[4]; const int32_t mask[4]; const int32_t r[4]; } test_vec[] = { { { -INT32_C( 685361840), -INT32_C( 1057547638), -INT32_C( 1601608401), -INT32_C( 2110383967) }, { INT32_C( 85), INT32_C( 81), INT32_C( 250), INT32_C( 146) }, { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }, { INT32_C( 85), INT32_C( 81), INT32_C( 250), INT32_C( 146) } }, { { -INT32_C( 612533238), -INT32_C( 2144538583), -INT32_C( 1518687133), INT32_C( 1765241328) }, { INT32_C( 198), INT32_C( 202), INT32_C( 225), INT32_C( 124) }, { INT32_C( 0), INT32_C( 0), INT32_MIN, INT32_MIN }, { -INT32_C( 612533238), -INT32_C( 2144538583), INT32_C( 225), INT32_C( 124) } }, { { INT32_C( 1518663255), -INT32_C( 1557876442), INT32_C( 1485068261), INT32_C( 248810868) }, { INT32_C( 138), INT32_C( 226), INT32_C( 2), INT32_C( 239) }, { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }, { INT32_C( 138), INT32_C( 226), INT32_C( 2), INT32_C( 239) } }, { { -INT32_C( 436463052), INT32_C( 46620719), -INT32_C( 637220286), -INT32_C( 624310953) }, { INT32_C( 255), INT32_C( 71), INT32_C( 2), INT32_C( 72) }, { INT32_C( 0), INT32_MIN, INT32_C( 0), INT32_C( 0) }, { -INT32_C( 436463052), INT32_C( 71), -INT32_C( 637220286), -INT32_C( 624310953) } }, { { INT32_C( 1290237130), -INT32_C( 376192698), -INT32_C( 487909938), -INT32_C( 1020567585) }, { INT32_C( 161), INT32_C( 11), INT32_C( 227), INT32_C( 34) }, { INT32_MIN, INT32_MIN, INT32_C( 0), INT32_MIN }, { INT32_C( 161), INT32_C( 11), -INT32_C( 487909938), INT32_C( 34) } }, { { -INT32_C( 193884505), INT32_C( 2130650489), INT32_C( 526459079), -INT32_C( 1220465615) }, { INT32_C( 81), INT32_C( 199), INT32_C( 213), INT32_C( 171) }, { INT32_MIN, INT32_MIN, INT32_MIN, INT32_C( 0) }, { INT32_C( 81), INT32_C( 199), INT32_C( 213), -INT32_C( 1220465615) } }, { { -INT32_C( 1028183535), INT32_C( 931744097), INT32_C( 34424318), -INT32_C( 1095912774) }, { INT32_C( 3), INT32_C( 170), INT32_C( 26), INT32_C( 155) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_MIN }, { -INT32_C( 1028183535), INT32_C( 931744097), INT32_C( 34424318), INT32_C( 155) } }, { { INT32_C( 1720704891), -INT32_C( 1492085483), INT32_C( 1187097276), INT32_C( 1491223020) }, { INT32_C( 248), INT32_C( 82), INT32_C( 255), INT32_C( 161) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) }, { INT32_C( 1720704891), -INT32_C( 1492085483), INT32_C( 1187097276), INT32_C( 1491223020) } } }; for (size_t i = 0 ; i < (sizeof(i32gather_buffer) / sizeof(i32gather_buffer[0])) ; i++) { i32gather_buffer[i] = HEDLEY_STATIC_CAST(int32_t, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i src = simde_x_mm_loadu_epi32(test_vec[i].src); simde__m128i vindex = simde_x_mm_loadu_epi32(test_vec[i].vindex); simde__m128i mask = simde_x_mm_loadu_epi32(test_vec[i].mask); simde__m128i r = simde_mm_mask_i32gather_epi32(src, i32gather_buffer, vindex, mask, 4); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm256_i32gather_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t vindex[8]; const int32_t r[8]; } test_vec[] = { { { INT32_C( 27), INT32_C( 135), INT32_C( 222), INT32_C( 60), INT32_C( 120), INT32_C( 248), INT32_C( 252), INT32_C( 33) }, { INT32_C( 27), INT32_C( 135), INT32_C( 222), INT32_C( 60), INT32_C( 120), INT32_C( 248), INT32_C( 252), INT32_C( 33) } }, { { INT32_C( 94), INT32_C( 185), INT32_C( 221), INT32_C( 160), INT32_C( 28), INT32_C( 199), INT32_C( 216), INT32_C( 91) }, { INT32_C( 94), INT32_C( 185), INT32_C( 221), INT32_C( 160), INT32_C( 28), INT32_C( 199), INT32_C( 216), INT32_C( 91) } }, { { INT32_C( 198), INT32_C( 71), INT32_C( 176), INT32_C( 54), INT32_C( 235), INT32_C( 249), INT32_C( 5), INT32_C( 236) }, { INT32_C( 198), INT32_C( 71), INT32_C( 176), INT32_C( 54), INT32_C( 235), INT32_C( 249), INT32_C( 5), INT32_C( 236) } }, { { INT32_C( 152), INT32_C( 161), INT32_C( 168), INT32_C( 209), INT32_C( 201), INT32_C( 153), INT32_C( 8), INT32_C( 97) }, { INT32_C( 152), INT32_C( 161), INT32_C( 168), INT32_C( 209), INT32_C( 201), INT32_C( 153), INT32_C( 8), INT32_C( 97) } }, { { INT32_C( 210), INT32_C( 35), INT32_C( 29), INT32_C( 112), INT32_C( 115), INT32_C( 2), INT32_C( 240), INT32_C( 195) }, { INT32_C( 210), INT32_C( 35), INT32_C( 29), INT32_C( 112), INT32_C( 115), INT32_C( 2), INT32_C( 240), INT32_C( 195) } }, { { INT32_C( 126), INT32_C( 0), INT32_C( 49), INT32_C( 241), INT32_C( 211), INT32_C( 104), INT32_C( 198), INT32_C( 131) }, { INT32_C( 126), INT32_C( 0), INT32_C( 49), INT32_C( 241), INT32_C( 211), INT32_C( 104), INT32_C( 198), INT32_C( 131) } }, { { INT32_C( 25), INT32_C( 242), INT32_C( 37), INT32_C( 251), INT32_C( 120), INT32_C( 10), INT32_C( 98), INT32_C( 217) }, { INT32_C( 25), INT32_C( 242), INT32_C( 37), INT32_C( 251), INT32_C( 120), INT32_C( 10), INT32_C( 98), INT32_C( 217) } }, { { INT32_C( 159), INT32_C( 152), INT32_C( 136), INT32_C( 27), INT32_C( 62), INT32_C( 120), INT32_C( 145), INT32_C( 235) }, { INT32_C( 159), INT32_C( 152), INT32_C( 136), INT32_C( 27), INT32_C( 62), INT32_C( 120), INT32_C( 145), INT32_C( 235) } } }; for (size_t i = 0 ; i < (sizeof(i32gather_buffer) / sizeof(i32gather_buffer[0])) ; i++) { i32gather_buffer[i] = HEDLEY_STATIC_CAST(int32_t, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i vindex = simde_x_mm256_loadu_epi32(test_vec[i].vindex); simde__m256i r = simde_mm256_i32gather_epi32(i32gather_buffer, vindex, 4); simde_test_x86_assert_equal_i32x8(r, simde_x_mm256_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm256_mask_i32gather_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t src[8]; const int32_t vindex[8]; const int32_t mask[8]; const int32_t r[8]; } test_vec[] = { { { -INT32_C( 1086860273), -INT32_C( 849127954), -INT32_C( 348023026), -INT32_C( 1484367608), INT32_C( 287657063), -INT32_C( 380909789), INT32_C( 1004087424), -INT32_C( 1260891740) }, { INT32_C( 19), INT32_C( 114), INT32_C( 118), INT32_C( 226), INT32_C( 83), INT32_C( 138), INT32_C( 11), INT32_C( 140) }, { INT32_C( 0), INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN, INT32_C( 0), INT32_C( 0), INT32_MIN }, { -INT32_C( 1086860273), INT32_C( 114), INT32_C( 118), INT32_C( 226), INT32_C( 83), -INT32_C( 380909789), INT32_C( 1004087424), INT32_C( 140) } }, { { -INT32_C( 1577111400), INT32_C( 1410892465), INT32_C( 678561379), INT32_C( 1578988305), -INT32_C( 1665833368), -INT32_C( 2026400021), INT32_C( 2139872828), -INT32_C( 596763836) }, { INT32_C( 252), INT32_C( 84), INT32_C( 173), INT32_C( 158), INT32_C( 235), INT32_C( 148), INT32_C( 148), INT32_C( 100) }, { INT32_C( 0), INT32_MIN, INT32_C( 0), INT32_MIN, INT32_MIN, INT32_C( 0), INT32_C( 0), INT32_C( 0) }, { -INT32_C( 1577111400), INT32_C( 84), INT32_C( 678561379), INT32_C( 158), INT32_C( 235), -INT32_C( 2026400021), INT32_C( 2139872828), -INT32_C( 596763836) } }, { { -INT32_C( 1074690759), -INT32_C( 1478416843), INT32_C( 592490712), INT32_C( 695179148), INT32_C( 220112409), -INT32_C( 128800944), INT32_C( 1706634309), -INT32_C( 1334452618) }, { INT32_C( 152), INT32_C( 101), INT32_C( 140), INT32_C( 63), INT32_C( 238), INT32_C( 192), INT32_C( 66), INT32_C( 224) }, { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN, INT32_C( 0), INT32_MIN }, { INT32_C( 152), INT32_C( 101), INT32_C( 140), INT32_C( 63), INT32_C( 238), INT32_C( 192), INT32_C( 1706634309), INT32_C( 224) } }, { { -INT32_C( 1420665862), INT32_C( 1515996691), INT32_C( 969382596), INT32_C( 1431139470), INT32_C( 144133742), -INT32_C( 1133668042), -INT32_C( 2118118208), -INT32_C( 1488673875) }, { INT32_C( 18), INT32_C( 161), INT32_C( 215), INT32_C( 22), INT32_C( 172), INT32_C( 28), INT32_C( 76), INT32_C( 203) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_MIN, INT32_MIN, INT32_MIN, INT32_C( 0) }, { -INT32_C( 1420665862), INT32_C( 1515996691), INT32_C( 969382596), INT32_C( 1431139470), INT32_C( 172), INT32_C( 28), INT32_C( 76), -INT32_C( 1488673875) } }, { { -INT32_C( 20439286), -INT32_C( 449069208), -INT32_C( 2012655728), -INT32_C( 1829288817), -INT32_C( 2081353314), INT32_C( 1074228372), INT32_C( 2142754948), -INT32_C( 1477248355) }, { INT32_C( 130), INT32_C( 9), INT32_C( 190), INT32_C( 83), INT32_C( 47), INT32_C( 49), INT32_C( 83), INT32_C( 58) }, { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN, INT32_C( 0), INT32_C( 0) }, { INT32_C( 130), INT32_C( 9), INT32_C( 190), INT32_C( 83), INT32_C( 47), INT32_C( 49), INT32_C( 2142754948), -INT32_C( 1477248355) } }, { { INT32_C( 1885499633), INT32_C( 656294547), -INT32_C( 1351644492), INT32_C( 1128532806), -INT32_C( 873657988), -INT32_C( 1764702148), -INT32_C( 2096027564), INT32_C( 1796130170) }, { INT32_C( 201), INT32_C( 102), INT32_C( 201), INT32_C( 231), INT32_C( 159), INT32_C( 22), INT32_C( 38), INT32_C( 66) }, { INT32_C( 0), INT32_MIN, INT32_C( 0), INT32_MIN, INT32_C( 0), INT32_MIN, INT32_C( 0), INT32_C( 0) }, { INT32_C( 1885499633), INT32_C( 102), -INT32_C( 1351644492), INT32_C( 231), -INT32_C( 873657988), INT32_C( 22), -INT32_C( 2096027564), INT32_C( 1796130170) } }, { { INT32_C( 515341239), -INT32_C( 737547912), -INT32_C( 778257104), INT32_C( 936725373), -INT32_C( 1833731923), INT32_C( 1914979922), INT32_C( 1184881778), -INT32_C( 583921882) }, { INT32_C( 2), INT32_C( 3), INT32_C( 63), INT32_C( 107), INT32_C( 115), INT32_C( 226), INT32_C( 35), INT32_C( 31) }, { INT32_MIN, INT32_C( 0), INT32_MIN, INT32_C( 0), INT32_MIN, INT32_MIN, INT32_C( 0), INT32_MIN }, { INT32_C( 2), -INT32_C( 737547912), INT32_C( 63), INT32_C( 936725373), INT32_C( 115), INT32_C( 226), INT32_C( 1184881778), INT32_C( 31) } }, { { -INT32_C( 724013614), -INT32_C( 778505370), INT32_C( 2063310050), -INT32_C( 1154183402), -INT32_C( 707698399), INT32_C( 210159988), INT32_C( 1698257641), INT32_C( 36013360) }, { INT32_C( 89), INT32_C( 212), INT32_C( 34), INT32_C( 214), INT32_C( 87), INT32_C( 97), INT32_C( 188), INT32_C( 22) }, { INT32_MIN, INT32_MIN, INT32_MIN, INT32_C( 0), INT32_C( 0), INT32_MIN, INT32_C( 0), INT32_C( 0) }, { INT32_C( 89), INT32_C( 212), INT32_C( 34), -INT32_C( 1154183402), -INT32_C( 707698399), INT32_C( 97), INT32_C( 1698257641), INT32_C( 36013360) } } }; for (size_t i = 0 ; i < (sizeof(i32gather_buffer) / sizeof(i32gather_buffer[0])) ; i++) { i32gather_buffer[i] = HEDLEY_STATIC_CAST(int32_t, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i src = simde_x_mm256_loadu_epi32(test_vec[i].src); simde__m256i vindex = simde_x_mm256_loadu_epi32(test_vec[i].vindex); simde__m256i mask = simde_x_mm256_loadu_epi32(test_vec[i].mask); simde__m256i r = simde_mm256_mask_i32gather_epi32(src, i32gather_buffer, vindex, mask, 4); simde_test_x86_assert_equal_i32x8(r, simde_x_mm256_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm_i64gather_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t vindex[2]; const int32_t r[4]; } test_vec[] = { { { INT64_C( 136), INT64_C( 22) }, { INT32_C( 136), INT32_C( 22), INT32_C( 0), INT32_C( 0) } }, { { INT64_C( 173), INT64_C( 86) }, { INT32_C( 173), INT32_C( 86), INT32_C( 0), INT32_C( 0) } }, { { INT64_C( 157), INT64_C( 106) }, { INT32_C( 157), INT32_C( 106), INT32_C( 0), INT32_C( 0) } }, { { INT64_C( 81), INT64_C( 112) }, { INT32_C( 81), INT32_C( 112), INT32_C( 0), INT32_C( 0) } }, { { INT64_C( 42), INT64_C( 54) }, { INT32_C( 42), INT32_C( 54), INT32_C( 0), INT32_C( 0) } }, { { INT64_C( 75), INT64_C( 158) }, { INT32_C( 75), INT32_C( 158), INT32_C( 0), INT32_C( 0) } }, { { INT64_C( 9), INT64_C( 95) }, { INT32_C( 9), INT32_C( 95), INT32_C( 0), INT32_C( 0) } }, { { INT64_C( 192), INT64_C( 148) }, { INT32_C( 192), INT32_C( 148), INT32_C( 0), INT32_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(i32gather_buffer) / sizeof(i32gather_buffer[0])) ; i++) { i32gather_buffer[i] = HEDLEY_STATIC_CAST(int32_t, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i vindex = simde_x_mm_loadu_epi64(test_vec[i].vindex); simde__m128i r = simde_mm_i64gather_epi32(i32gather_buffer, vindex, 4); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm_mask_i64gather_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t src[4]; const int64_t vindex[2]; const int32_t mask[4]; const int32_t r[4]; } test_vec[] = { { { INT32_C( 1478898212), INT32_C( 916774907), -INT32_C( 1556893248), INT32_C( 1777183058) }, { INT64_C( 141), INT64_C( 139) }, { INT32_MIN, INT32_MIN, INT32_C( 0), INT32_C( 0) }, { INT32_C( 141), INT32_C( 139), INT32_C( 0), INT32_C( 0) } }, { { -INT32_C( 632300097), -INT32_C( 462958966), INT32_C( 1851006215), INT32_C( 721091466) }, { INT64_C( 157), INT64_C( 177) }, { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }, { INT32_C( 157), INT32_C( 177), INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 1526041333), -INT32_C( 1124607967), -INT32_C( 1106894900), INT32_C( 879726651) }, { INT64_C( 0), INT64_C( 233) }, { INT32_MIN, INT32_MIN, INT32_C( 0), INT32_MIN }, { INT32_C( 0), INT32_C( 233), INT32_C( 0), INT32_C( 0) } }, { { -INT32_C( 1009155372), INT32_C( 2126747810), INT32_C( 1779523445), -INT32_C( 1420614464) }, { INT64_C( 44), INT64_C( 205) }, { INT32_MIN, INT32_C( 0), INT32_MIN, INT32_C( 0) }, { INT32_C( 44), INT32_C( 2126747810), INT32_C( 0), INT32_C( 0) } }, { { -INT32_C( 2036541516), -INT32_C( 1464708264), INT32_C( 1817736563), INT32_C( 289001730) }, { INT64_C( 210), INT64_C( 6) }, { INT32_C( 0), INT32_MIN, INT32_C( 0), INT32_C( 0) }, { -INT32_C( 2036541516), INT32_C( 6), INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 529894144), -INT32_C( 1242496641), -INT32_C( 1991166154), -INT32_C( 661684580) }, { INT64_C( 15), INT64_C( 39) }, { INT32_MIN, INT32_C( 0), INT32_MIN, INT32_MIN }, { INT32_C( 15), -INT32_C( 1242496641), INT32_C( 0), INT32_C( 0) } }, { { -INT32_C( 2000072659), INT32_C( 932691705), -INT32_C( 673489744), INT32_C( 16648425) }, { INT64_C( 234), INT64_C( 70) }, { INT32_C( 0), INT32_MIN, INT32_C( 0), INT32_MIN }, { -INT32_C( 2000072659), INT32_C( 70), INT32_C( 0), INT32_C( 0) } }, { { -INT32_C( 913030322), -INT32_C( 1531700955), INT32_C( 960408096), -INT32_C( 1367393148) }, { INT64_C( 118), INT64_C( 32) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_MIN }, { -INT32_C( 913030322), -INT32_C( 1531700955), INT32_C( 0), INT32_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(i32gather_buffer) / sizeof(i32gather_buffer[0])) ; i++) { i32gather_buffer[i] = HEDLEY_STATIC_CAST(int32_t, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i src = simde_x_mm_loadu_epi32(test_vec[i].src); simde__m128i vindex = simde_x_mm_loadu_epi64(test_vec[i].vindex); simde__m128i mask = simde_x_mm_loadu_epi32(test_vec[i].mask); simde__m128i r = simde_mm_mask_i64gather_epi32(src, i32gather_buffer, vindex, mask, 4); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm256_i64gather_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t vindex[4]; const int32_t r[4]; } test_vec[] = { { { INT64_C( 164), INT64_C( 255), INT64_C( 34), INT64_C( 127) }, { INT32_C( 164), INT32_C( 255), INT32_C( 34), INT32_C( 127) } }, { { INT64_C( 52), INT64_C( 61), INT64_C( 40), INT64_C( 26) }, { INT32_C( 52), INT32_C( 61), INT32_C( 40), INT32_C( 26) } }, { { INT64_C( 166), INT64_C( 126), INT64_C( 130), INT64_C( 24) }, { INT32_C( 166), INT32_C( 126), INT32_C( 130), INT32_C( 24) } }, { { INT64_C( 246), INT64_C( 51), INT64_C( 78), INT64_C( 212) }, { INT32_C( 246), INT32_C( 51), INT32_C( 78), INT32_C( 212) } }, { { INT64_C( 104), INT64_C( 184), INT64_C( 15), INT64_C( 222) }, { INT32_C( 104), INT32_C( 184), INT32_C( 15), INT32_C( 222) } }, { { INT64_C( 136), INT64_C( 54), INT64_C( 141), INT64_C( 30) }, { INT32_C( 136), INT32_C( 54), INT32_C( 141), INT32_C( 30) } }, { { INT64_C( 103), INT64_C( 148), INT64_C( 191), INT64_C( 239) }, { INT32_C( 103), INT32_C( 148), INT32_C( 191), INT32_C( 239) } }, { { INT64_C( 123), INT64_C( 179), INT64_C( 92), INT64_C( 156) }, { INT32_C( 123), INT32_C( 179), INT32_C( 92), INT32_C( 156) } } }; for (size_t i = 0 ; i < (sizeof(i32gather_buffer) / sizeof(i32gather_buffer[0])) ; i++) { i32gather_buffer[i] = HEDLEY_STATIC_CAST(int32_t, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i vindex = simde_x_mm256_loadu_epi64(test_vec[i].vindex); simde__m128i r = simde_mm256_i64gather_epi32(i32gather_buffer, vindex, 4); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm256_mask_i64gather_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t src[4]; const int64_t vindex[4]; const int32_t mask[4]; const int32_t r[4]; } test_vec[] = { { { -INT32_C( 1914974857), -INT32_C( 268305992), INT32_C( 1600826892), INT32_C( 91051765) }, { INT64_C( 21), INT64_C( 250), INT64_C( 89), INT64_C( 48) }, { INT32_C( 0), INT32_C( 0), INT32_MIN, INT32_C( 0) }, { -INT32_C( 1914974857), -INT32_C( 268305992), INT32_C( 89), INT32_C( 91051765) } }, { { -INT32_C( 807128191), -INT32_C( 215463748), INT32_C( 975167766), INT32_C( 627724550) }, { INT64_C( 89), INT64_C( 70), INT64_C( 162), INT64_C( 179) }, { INT32_C( 0), INT32_C( 0), INT32_MIN, INT32_MIN }, { -INT32_C( 807128191), -INT32_C( 215463748), INT32_C( 162), INT32_C( 179) } }, { { -INT32_C( 569972142), INT32_C( 1199611944), INT32_C( 1668045913), -INT32_C( 770263134) }, { INT64_C( 96), INT64_C( 18), INT64_C( 116), INT64_C( 3) }, { INT32_MIN, INT32_C( 0), INT32_C( 0), INT32_C( 0) }, { INT32_C( 96), INT32_C( 1199611944), INT32_C( 1668045913), -INT32_C( 770263134) } }, { { INT32_C( 209437937), INT32_C( 25204532), INT32_C( 1584355103), -INT32_C( 1738428347) }, { INT64_C( 249), INT64_C( 196), INT64_C( 215), INT64_C( 197) }, { INT32_C( 0), INT32_C( 0), INT32_MIN, INT32_MIN }, { INT32_C( 209437937), INT32_C( 25204532), INT32_C( 215), INT32_C( 197) } }, { { INT32_C( 1431392925), INT32_C( 1278007459), INT32_C( 1966760398), INT32_C( 926662903) }, { INT64_C( 228), INT64_C( 100), INT64_C( 202), INT64_C( 39) }, { INT32_C( 0), INT32_MIN, INT32_MIN, INT32_MIN }, { INT32_C( 1431392925), INT32_C( 100), INT32_C( 202), INT32_C( 39) } }, { { INT32_C( 286484245), -INT32_C( 1831110836), INT32_C( 1942952725), INT32_C( 2140816278) }, { INT64_C( 25), INT64_C( 234), INT64_C( 181), INT64_C( 145) }, { INT32_MIN, INT32_C( 0), INT32_MIN, INT32_C( 0) }, { INT32_C( 25), -INT32_C( 1831110836), INT32_C( 181), INT32_C( 2140816278) } }, { { INT32_C( 478193020), INT32_C( 1842534011), INT32_C( 1693907963), -INT32_C( 520749634) }, { INT64_C( 40), INT64_C( 39), INT64_C( 239), INT64_C( 122) }, { INT32_C( 0), INT32_MIN, INT32_MIN, INT32_MIN }, { INT32_C( 478193020), INT32_C( 39), INT32_C( 239), INT32_C( 122) } }, { { -INT32_C( 683278108), INT32_C( 667313686), -INT32_C( 1862854276), -INT32_C( 552950175) }, { INT64_C( 248), INT64_C( 188), INT64_C( 99), INT64_C( 19) }, { INT32_C( 0), INT32_MIN, INT32_C( 0), INT32_MIN }, { -INT32_C( 683278108), INT32_C( 188), -INT32_C( 1862854276), INT32_C( 19) } } }; for (size_t i = 0 ; i < (sizeof(i32gather_buffer) / sizeof(i32gather_buffer[0])) ; i++) { i32gather_buffer[i] = HEDLEY_STATIC_CAST(int32_t, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i src = simde_x_mm_loadu_epi32(test_vec[i].src); simde__m256i vindex = simde_x_mm256_loadu_epi64(test_vec[i].vindex); simde__m128i mask = simde_x_mm_loadu_epi32(test_vec[i].mask); simde__m128i r = simde_mm256_mask_i64gather_epi32(src, i32gather_buffer, vindex, mask, 4); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; } static int64_t i64gather_buffer[4096]; static int test_simde_mm_i32gather_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t vindex[4]; const int64_t r[2]; } test_vec[] = { { { INT32_C( 213), INT32_C( 182), INT32_C( 137), INT32_C( 61) }, { INT64_C( 213), INT64_C( 182) } }, { { INT32_C( 194), INT32_C( 187), INT32_C( 51), INT32_C( 119) }, { INT64_C( 194), INT64_C( 187) } }, { { INT32_C( 228), INT32_C( 167), INT32_C( 116), INT32_C( 164) }, { INT64_C( 228), INT64_C( 167) } }, { { INT32_C( 165), INT32_C( 240), INT32_C( 155), INT32_C( 11) }, { INT64_C( 165), INT64_C( 240) } }, { { INT32_C( 24), INT32_C( 15), INT32_C( 38), INT32_C( 204) }, { INT64_C( 24), INT64_C( 15) } }, { { INT32_C( 213), INT32_C( 248), INT32_C( 99), INT32_C( 197) }, { INT64_C( 213), INT64_C( 248) } }, { { INT32_C( 246), INT32_C( 206), INT32_C( 198), INT32_C( 189) }, { INT64_C( 246), INT64_C( 206) } }, { { INT32_C( 177), INT32_C( 184), INT32_C( 118), INT32_C( 235) }, { INT64_C( 177), INT64_C( 184) } } }; for (size_t i = 0 ; i < (sizeof(i64gather_buffer) / sizeof(i64gather_buffer[0])) ; i++) { i64gather_buffer[i] = HEDLEY_STATIC_CAST(int64_t, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i vindex = simde_x_mm_loadu_epi32(test_vec[i].vindex); simde__m128i r = simde_mm_i32gather_epi64(i64gather_buffer, vindex, 8); simde_test_x86_assert_equal_i64x2(r, simde_x_mm_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm_mask_i32gather_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t src[2]; const int32_t vindex[4]; const int64_t mask[2]; const int64_t r[2]; } test_vec[] = { { { -INT64_C( 8019067203857977457), -INT64_C( 8061722764401183075) }, { INT32_C( 183), INT32_C( 168), INT32_C( 220), INT32_C( 145) }, { INT64_C( 0), INT64_C( 0) }, { -INT64_C( 8019067203857977457), -INT64_C( 8061722764401183075) } }, { { -INT64_C( 919569984677581009), -INT64_C( 1648047741553663091) }, { INT32_C( 235), INT32_C( 124), INT32_C( 216), INT32_C( 241) }, { INT64_C( 0), INT64_MIN }, { -INT64_C( 919569984677581009), INT64_C( 124) } }, { { -INT64_C( 8634359355635040034), -INT64_C( 3764535099121455859) }, { INT32_C( 178), INT32_C( 242), INT32_C( 53), INT32_C( 4) }, { INT64_C( 0), INT64_C( 0) }, { -INT64_C( 8634359355635040034), -INT64_C( 3764535099121455859) } }, { { INT64_C( 2549016659115993441), -INT64_C( 7458823204181079982) }, { INT32_C( 77), INT32_C( 31), INT32_C( 104), INT32_C( 168) }, { INT64_C( 0), INT64_C( 0) }, { INT64_C( 2549016659115993441), -INT64_C( 7458823204181079982) } }, { { INT64_C( 5881278022537621357), -INT64_C( 7234300930576215892) }, { INT32_C( 151), INT32_C( 32), INT32_C( 36), INT32_C( 71) }, { INT64_MIN, INT64_C( 0) }, { INT64_C( 151), -INT64_C( 7234300930576215892) } }, { { INT64_C( 349012936767990930), INT64_C( 1707613122462270608) }, { INT32_C( 126), INT32_C( 80), INT32_C( 233), INT32_C( 146) }, { INT64_C( 0), INT64_MIN }, { INT64_C( 349012936767990930), INT64_C( 80) } }, { { INT64_C( 4375785989412903820), -INT64_C( 4856981328699777274) }, { INT32_C( 103), INT32_C( 227), INT32_C( 125), INT32_C( 99) }, { INT64_MIN, INT64_C( 0) }, { INT64_C( 103), -INT64_C( 4856981328699777274) } }, { { -INT64_C( 7386706167218158950), -INT64_C( 7989056672551773465) }, { INT32_C( 107), INT32_C( 21), INT32_C( 236), INT32_C( 252) }, { INT64_C( 0), INT64_C( 0) }, { -INT64_C( 7386706167218158950), -INT64_C( 7989056672551773465) } } }; for (size_t i = 0 ; i < (sizeof(i64gather_buffer) / sizeof(i64gather_buffer[0])) ; i++) { i64gather_buffer[i] = HEDLEY_STATIC_CAST(int64_t, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i src = simde_x_mm_loadu_epi64(test_vec[i].src); simde__m128i vindex = simde_x_mm_loadu_epi32(test_vec[i].vindex); simde__m128i mask = simde_x_mm_loadu_epi64(test_vec[i].mask); simde__m128i r = simde_mm_mask_i32gather_epi64(src, i64gather_buffer, vindex, mask, 8); simde_test_x86_assert_equal_i64x2(r, simde_x_mm_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm256_i32gather_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t vindex[4]; const int64_t r[4]; } test_vec[] = { { { INT32_C( 222), INT32_C( 35), INT32_C( 153), INT32_C( 134) }, { INT64_C( 222), INT64_C( 35), INT64_C( 153), INT64_C( 134) } }, { { INT32_C( 57), INT32_C( 242), INT32_C( 165), INT32_C( 182) }, { INT64_C( 57), INT64_C( 242), INT64_C( 165), INT64_C( 182) } }, { { INT32_C( 137), INT32_C( 208), INT32_C( 222), INT32_C( 21) }, { INT64_C( 137), INT64_C( 208), INT64_C( 222), INT64_C( 21) } }, { { INT32_C( 45), INT32_C( 10), INT32_C( 128), INT32_C( 27) }, { INT64_C( 45), INT64_C( 10), INT64_C( 128), INT64_C( 27) } }, { { INT32_C( 23), INT32_C( 3), INT32_C( 105), INT32_C( 216) }, { INT64_C( 23), INT64_C( 3), INT64_C( 105), INT64_C( 216) } }, { { INT32_C( 28), INT32_C( 117), INT32_C( 171), INT32_C( 117) }, { INT64_C( 28), INT64_C( 117), INT64_C( 171), INT64_C( 117) } }, { { INT32_C( 238), INT32_C( 69), INT32_C( 187), INT32_C( 167) }, { INT64_C( 238), INT64_C( 69), INT64_C( 187), INT64_C( 167) } }, { { INT32_C( 243), INT32_C( 37), INT32_C( 145), INT32_C( 129) }, { INT64_C( 243), INT64_C( 37), INT64_C( 145), INT64_C( 129) } } }; for (size_t i = 0 ; i < (sizeof(i64gather_buffer) / sizeof(i64gather_buffer[0])) ; i++) { i64gather_buffer[i] = HEDLEY_STATIC_CAST(int64_t, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i vindex = simde_x_mm_loadu_epi32(test_vec[i].vindex); simde__m256i r = simde_mm256_i32gather_epi64(i64gather_buffer, vindex, 8); simde_test_x86_assert_equal_i64x4(r, simde_x_mm256_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm256_mask_i32gather_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t src[4]; const int32_t vindex[4]; const int64_t mask[4]; const int64_t r[4]; } test_vec[] = { { { INT64_C( 3369142246547613560), INT64_C( 7744810680774779185), -INT64_C( 7552795409020785613), INT64_C( 1387622825387924421) }, { INT32_C( 64), INT32_C( 70), INT32_C( 80), INT32_C( 245) }, { INT64_C( 0), INT64_MIN, INT64_C( 0), INT64_C( 0) }, { INT64_C( 3369142246547613560), INT64_C( 70), -INT64_C( 7552795409020785613), INT64_C( 1387622825387924421) } }, { { INT64_C( 3103440719973267570), -INT64_C( 1919373598316996544), -INT64_C( 6201068952727107105), -INT64_C( 2614573325330952241) }, { INT32_C( 72), INT32_C( 23), INT32_C( 132), INT32_C( 99) }, { INT64_MIN, INT64_C( 0), INT64_MIN, INT64_MIN }, { INT64_C( 72), -INT64_C( 1919373598316996544), INT64_C( 132), INT64_C( 99) } }, { { -INT64_C( 1752065872900610483), INT64_C( 8405587345429565333), INT64_C( 1653618346573801900), -INT64_C( 7161726922272969511) }, { INT32_C( 116), INT32_C( 71), INT32_C( 153), INT32_C( 134) }, { INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_MIN }, { -INT64_C( 1752065872900610483), INT64_C( 8405587345429565333), INT64_C( 1653618346573801900), INT64_C( 134) } }, { { INT64_C( 1257101528912083106), -INT64_C( 77386101739500697), -INT64_C( 6860348042974974061), INT64_C( 6311937495677122959) }, { INT32_C( 104), INT32_C( 134), INT32_C( 114), INT32_C( 96) }, { INT64_C( 0), INT64_C( 0), INT64_MIN, INT64_C( 0) }, { INT64_C( 1257101528912083106), -INT64_C( 77386101739500697), INT64_C( 114), INT64_C( 6311937495677122959) } }, { { -INT64_C( 2861233286667249227), INT64_C( 2994139127905595224), INT64_C( 1902462180205604141), INT64_C( 6568991147572350640) }, { INT32_C( 20), INT32_C( 9), INT32_C( 185), INT32_C( 83) }, { INT64_MIN, INT64_MIN, INT64_C( 0), INT64_C( 0) }, { INT64_C( 20), INT64_C( 9), INT64_C( 1902462180205604141), INT64_C( 6568991147572350640) } }, { { -INT64_C( 4429501574753673542), -INT64_C( 8290183239203434124), INT64_C( 9208322051927348199), -INT64_C( 3990225492664517654) }, { INT32_C( 199), INT32_C( 26), INT32_C( 173), INT32_C( 45) }, { INT64_C( 0), INT64_MIN, INT64_MIN, INT64_MIN }, { -INT64_C( 4429501574753673542), INT64_C( 26), INT64_C( 173), INT64_C( 45) } }, { { INT64_C( 5138321306576926676), INT64_C( 3583659973203966119), -INT64_C( 5991628195680088859), -INT64_C( 7026477670733466586) }, { INT32_C( 168), INT32_C( 117), INT32_C( 131), INT32_C( 38) }, { INT64_MIN, INT64_C( 0), INT64_C( 0), INT64_MIN }, { INT64_C( 168), INT64_C( 3583659973203966119), -INT64_C( 5991628195680088859), INT64_C( 38) } }, { { -INT64_C( 5654470186572994433), -INT64_C( 2076098425164798), INT64_C( 4968725845142138655), -INT64_C( 4249484596989279597) }, { INT32_C( 199), INT32_C( 152), INT32_C( 117), INT32_C( 16) }, { INT64_MIN, INT64_MIN, INT64_C( 0), INT64_C( 0) }, { INT64_C( 199), INT64_C( 152), INT64_C( 4968725845142138655), -INT64_C( 4249484596989279597) } } }; for (size_t i = 0 ; i < (sizeof(i64gather_buffer) / sizeof(i64gather_buffer[0])) ; i++) { i64gather_buffer[i] = HEDLEY_STATIC_CAST(int64_t, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i src = simde_x_mm256_loadu_epi64(test_vec[i].src); simde__m128i vindex = simde_x_mm_loadu_epi32(test_vec[i].vindex); simde__m256i mask = simde_x_mm256_loadu_epi64(test_vec[i].mask); simde__m256i r = simde_mm256_mask_i32gather_epi64(src, i64gather_buffer, vindex, mask, 8); simde_test_x86_assert_equal_i64x4(r, simde_x_mm256_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm_i64gather_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t vindex[2]; const int64_t r[2]; } test_vec[] = { { { INT64_C( 0), INT64_C( 234) }, { INT64_C( 0), INT64_C( 234) } }, { { INT64_C( 22), INT64_C( 245) }, { INT64_C( 22), INT64_C( 245) } }, { { INT64_C( 151), INT64_C( 230) }, { INT64_C( 151), INT64_C( 230) } }, { { INT64_C( 184), INT64_C( 194) }, { INT64_C( 184), INT64_C( 194) } }, { { INT64_C( 213), INT64_C( 241) }, { INT64_C( 213), INT64_C( 241) } }, { { INT64_C( 191), INT64_C( 195) }, { INT64_C( 191), INT64_C( 195) } }, { { INT64_C( 174), INT64_C( 128) }, { INT64_C( 174), INT64_C( 128) } }, { { INT64_C( 186), INT64_C( 50) }, { INT64_C( 186), INT64_C( 50) } } }; for (size_t i = 0 ; i < (sizeof(i64gather_buffer) / sizeof(i64gather_buffer[0])) ; i++) { i64gather_buffer[i] = HEDLEY_STATIC_CAST(int64_t, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i vindex = simde_x_mm_loadu_epi64(test_vec[i].vindex); simde__m128i r = simde_mm_i64gather_epi64(i64gather_buffer, vindex, 8); simde_test_x86_assert_equal_i64x2(r, simde_x_mm_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm_mask_i64gather_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t src[2]; const int64_t vindex[2]; const int64_t mask[2]; const int64_t r[2]; } test_vec[] = { { { -INT64_C( 2403370840913539277), INT64_C( 5164194594589355633) }, { INT64_C( 73), INT64_C( 179) }, { INT64_C( 0), INT64_C( 0) }, { -INT64_C( 2403370840913539277), INT64_C( 5164194594589355633) } }, { { INT64_C( 935513772061340819), INT64_C( 7894743952153131099) }, { INT64_C( 36), INT64_C( 84) }, { INT64_C( 0), INT64_MIN }, { INT64_C( 935513772061340819), INT64_C( 84) } }, { { INT64_C( 6537362916212700834), -INT64_C( 8981235666207658170) }, { INT64_C( 157), INT64_C( 192) }, { INT64_C( 0), INT64_MIN }, { INT64_C( 6537362916212700834), INT64_C( 192) } }, { { -INT64_C( 1290141850782853098), INT64_C( 6360822988240042599) }, { INT64_C( 102), INT64_C( 84) }, { INT64_C( 0), INT64_MIN }, { -INT64_C( 1290141850782853098), INT64_C( 84) } }, { { INT64_C( 6060159807846979595), -INT64_C( 2729284862310875763) }, { INT64_C( 118), INT64_C( 230) }, { INT64_C( 0), INT64_C( 0) }, { INT64_C( 6060159807846979595), -INT64_C( 2729284862310875763) } }, { { -INT64_C( 4478319151860018035), -INT64_C( 5765320646130876869) }, { INT64_C( 255), INT64_C( 33) }, { INT64_MIN, INT64_MIN }, { INT64_C( 255), INT64_C( 33) } }, { { INT64_C( 6993169290329548479), -INT64_C( 5736351597774397777) }, { INT64_C( 95), INT64_C( 101) }, { INT64_MIN, INT64_MIN }, { INT64_C( 95), INT64_C( 101) } }, { { INT64_C( 1384741590647677331), -INT64_C( 5107012464534872811) }, { INT64_C( 42), INT64_C( 182) }, { INT64_C( 0), INT64_MIN }, { INT64_C( 1384741590647677331), INT64_C( 182) } } }; for (size_t i = 0 ; i < (sizeof(i64gather_buffer) / sizeof(i64gather_buffer[0])) ; i++) { i64gather_buffer[i] = HEDLEY_STATIC_CAST(int64_t, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i src = simde_x_mm_loadu_epi64(test_vec[i].src); simde__m128i vindex = simde_x_mm_loadu_epi64(test_vec[i].vindex); simde__m128i mask = simde_x_mm_loadu_epi64(test_vec[i].mask); simde__m128i r = simde_mm_mask_i64gather_epi64(src, i64gather_buffer, vindex, mask, 8); simde_test_x86_assert_equal_i64x2(r, simde_x_mm_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm256_i64gather_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t vindex[4]; const int64_t r[4]; } test_vec[] = { { { INT64_C( 226), INT64_C( 163), INT64_C( 130), INT64_C( 153) }, { INT64_C( 226), INT64_C( 163), INT64_C( 130), INT64_C( 153) } }, { { INT64_C( 200), INT64_C( 98), INT64_C( 2), INT64_C( 50) }, { INT64_C( 200), INT64_C( 98), INT64_C( 2), INT64_C( 50) } }, { { INT64_C( 16), INT64_C( 32), INT64_C( 233), INT64_C( 157) }, { INT64_C( 16), INT64_C( 32), INT64_C( 233), INT64_C( 157) } }, { { INT64_C( 136), INT64_C( 125), INT64_C( 114), INT64_C( 31) }, { INT64_C( 136), INT64_C( 125), INT64_C( 114), INT64_C( 31) } }, { { INT64_C( 170), INT64_C( 197), INT64_C( 24), INT64_C( 71) }, { INT64_C( 170), INT64_C( 197), INT64_C( 24), INT64_C( 71) } }, { { INT64_C( 171), INT64_C( 221), INT64_C( 156), INT64_C( 172) }, { INT64_C( 171), INT64_C( 221), INT64_C( 156), INT64_C( 172) } }, { { INT64_C( 165), INT64_C( 159), INT64_C( 250), INT64_C( 110) }, { INT64_C( 165), INT64_C( 159), INT64_C( 250), INT64_C( 110) } }, { { INT64_C( 135), INT64_C( 146), INT64_C( 141), INT64_C( 229) }, { INT64_C( 135), INT64_C( 146), INT64_C( 141), INT64_C( 229) } } }; for (size_t i = 0 ; i < (sizeof(i64gather_buffer) / sizeof(i64gather_buffer[0])) ; i++) { i64gather_buffer[i] = HEDLEY_STATIC_CAST(int64_t, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i vindex = simde_x_mm256_loadu_epi64(test_vec[i].vindex); simde__m256i r = simde_mm256_i64gather_epi64(i64gather_buffer, vindex, 8); simde_test_x86_assert_equal_i64x4(r, simde_x_mm256_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm256_mask_i64gather_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t src[4]; const int64_t vindex[4]; const int64_t mask[4]; const int64_t r[4]; } test_vec[] = { { { INT64_C( 8348925912988061266), INT64_C( 4593288946901735853), INT64_C( 4308902935962320046), INT64_C( 5084668380847658629) }, { INT64_C( 74), INT64_C( 225), INT64_C( 80), INT64_C( 129) }, { INT64_C( 0), INT64_C( 0), INT64_MIN, INT64_MIN }, { INT64_C( 8348925912988061266), INT64_C( 4593288946901735853), INT64_C( 80), INT64_C( 129) } }, { { INT64_C( 1819345364246751166), INT64_C( 6003474620557878227), INT64_C( 4745430889957619642), -INT64_C( 181287204719907732) }, { INT64_C( 153), INT64_C( 47), INT64_C( 169), INT64_C( 18) }, { INT64_MIN, INT64_C( 0), INT64_MIN, INT64_C( 0) }, { INT64_C( 153), INT64_C( 6003474620557878227), INT64_C( 169), -INT64_C( 181287204719907732) } }, { { -INT64_C( 4982644635249127578), INT64_C( 1248644929523017696), -INT64_C( 5881855532059445236), -INT64_C( 3838907038599371630) }, { INT64_C( 169), INT64_C( 136), INT64_C( 124), INT64_C( 218) }, { INT64_C( 0), INT64_MIN, INT64_MIN, INT64_MIN }, { -INT64_C( 4982644635249127578), INT64_C( 136), INT64_C( 124), INT64_C( 218) } }, { { INT64_C( 3695928590331946469), -INT64_C( 3354854445674728064), -INT64_C( 7502590046551983353), INT64_C( 2128958219520435651) }, { INT64_C( 145), INT64_C( 0), INT64_C( 131), INT64_C( 115) }, { INT64_C( 0), INT64_MIN, INT64_MIN, INT64_MIN }, { INT64_C( 3695928590331946469), INT64_C( 0), INT64_C( 131), INT64_C( 115) } }, { { -INT64_C( 1365028202850547012), INT64_C( 7416463151597603534), INT64_C( 5974682522185630920), INT64_C( 4041606267062842147) }, { INT64_C( 11), INT64_C( 207), INT64_C( 105), INT64_C( 70) }, { INT64_C( 0), INT64_MIN, INT64_C( 0), INT64_C( 0) }, { -INT64_C( 1365028202850547012), INT64_C( 207), INT64_C( 5974682522185630920), INT64_C( 4041606267062842147) } }, { { -INT64_C( 9156116817736860426), INT64_C( 4444164503705539924), INT64_C( 449387376615079680), -INT64_C( 2018570388200701601) }, { INT64_C( 16), INT64_C( 116), INT64_C( 131), INT64_C( 179) }, { INT64_MIN, INT64_C( 0), INT64_C( 0), INT64_MIN }, { INT64_C( 16), INT64_C( 4444164503705539924), INT64_C( 449387376615079680), INT64_C( 179) } }, { { -INT64_C( 4232490538612136547), -INT64_C( 7797947821915074691), -INT64_C( 42789732815080928), INT64_C( 6176971354028858165) }, { INT64_C( 52), INT64_C( 108), INT64_C( 215), INT64_C( 104) }, { INT64_MIN, INT64_MIN, INT64_MIN, INT64_C( 0) }, { INT64_C( 52), INT64_C( 108), INT64_C( 215), INT64_C( 6176971354028858165) } }, { { INT64_C( 1917495916396991492), -INT64_C( 3737546505400132308), -INT64_C( 3617519179428978074), INT64_C( 5470992047431380744) }, { INT64_C( 131), INT64_C( 67), INT64_C( 187), INT64_C( 54) }, { INT64_C( 0), INT64_C( 0), INT64_MIN, INT64_MIN }, { INT64_C( 1917495916396991492), -INT64_C( 3737546505400132308), INT64_C( 187), INT64_C( 54) } } }; for (size_t i = 0 ; i < (sizeof(i64gather_buffer) / sizeof(i64gather_buffer[0])) ; i++) { i64gather_buffer[i] = HEDLEY_STATIC_CAST(int64_t, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i src = simde_x_mm256_loadu_epi64(test_vec[i].src); simde__m256i vindex = simde_x_mm256_loadu_epi64(test_vec[i].vindex); simde__m256i mask = simde_x_mm256_loadu_epi64(test_vec[i].mask); simde__m256i r = simde_mm256_mask_i64gather_epi64(src, i64gather_buffer, vindex, mask, 8); simde_test_x86_assert_equal_i64x4(r, simde_x_mm256_loadu_epi64(test_vec[i].r)); } return 0; } static simde_float32 f32gather_buffer[4096]; static int test_simde_mm_i32gather_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t vindex[4]; const simde_float32 r[4]; } test_vec[] = { { { INT32_C( 161), INT32_C( 17), INT32_C( 47), INT32_C( 177) }, { SIMDE_FLOAT32_C( 161.00), SIMDE_FLOAT32_C( 17.00), SIMDE_FLOAT32_C( 47.00), SIMDE_FLOAT32_C( 177.00) } }, { { INT32_C( 192), INT32_C( 196), INT32_C( 32), INT32_C( 56) }, { SIMDE_FLOAT32_C( 192.00), SIMDE_FLOAT32_C( 196.00), SIMDE_FLOAT32_C( 32.00), SIMDE_FLOAT32_C( 56.00) } }, { { INT32_C( 243), INT32_C( 253), INT32_C( 198), INT32_C( 186) }, { SIMDE_FLOAT32_C( 243.00), SIMDE_FLOAT32_C( 253.00), SIMDE_FLOAT32_C( 198.00), SIMDE_FLOAT32_C( 186.00) } }, { { INT32_C( 201), INT32_C( 25), INT32_C( 83), INT32_C( 64) }, { SIMDE_FLOAT32_C( 201.00), SIMDE_FLOAT32_C( 25.00), SIMDE_FLOAT32_C( 83.00), SIMDE_FLOAT32_C( 64.00) } }, { { INT32_C( 173), INT32_C( 203), INT32_C( 45), INT32_C( 53) }, { SIMDE_FLOAT32_C( 173.00), SIMDE_FLOAT32_C( 203.00), SIMDE_FLOAT32_C( 45.00), SIMDE_FLOAT32_C( 53.00) } }, { { INT32_C( 232), INT32_C( 218), INT32_C( 33), INT32_C( 228) }, { SIMDE_FLOAT32_C( 232.00), SIMDE_FLOAT32_C( 218.00), SIMDE_FLOAT32_C( 33.00), SIMDE_FLOAT32_C( 228.00) } }, { { INT32_C( 26), INT32_C( 51), INT32_C( 182), INT32_C( 102) }, { SIMDE_FLOAT32_C( 26.00), SIMDE_FLOAT32_C( 51.00), SIMDE_FLOAT32_C( 182.00), SIMDE_FLOAT32_C( 102.00) } }, { { INT32_C( 4), INT32_C( 101), INT32_C( 200), INT32_C( 229) }, { SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 101.00), SIMDE_FLOAT32_C( 200.00), SIMDE_FLOAT32_C( 229.00) } } }; for (size_t i = 0 ; i < (sizeof(f32gather_buffer) / sizeof(f32gather_buffer[0])) ; i++) { f32gather_buffer[i] = HEDLEY_STATIC_CAST(simde_float32, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i vindex = simde_x_mm_loadu_epi32(test_vec[i].vindex); simde__m128 r = simde_mm_i32gather_ps(f32gather_buffer, vindex, 4); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_mask_i32gather_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[4]; const int32_t vindex[4]; const simde_float32 mask[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 349.70), SIMDE_FLOAT32_C( -696.38), SIMDE_FLOAT32_C( 625.38), SIMDE_FLOAT32_C( 53.26) }, { INT32_C( 66), INT32_C( 235), INT32_C( 243), INT32_C( 24) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( 349.70), SIMDE_FLOAT32_C( 235.00), SIMDE_FLOAT32_C( 625.38), SIMDE_FLOAT32_C( 24.00) } }, { { SIMDE_FLOAT32_C( -959.90), SIMDE_FLOAT32_C( -440.10), SIMDE_FLOAT32_C( 558.05), SIMDE_FLOAT32_C( -71.77) }, { INT32_C( 40), INT32_C( 16), INT32_C( 251), INT32_C( 54) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( -959.90), SIMDE_FLOAT32_C( 16.00), SIMDE_FLOAT32_C( 558.05), SIMDE_FLOAT32_C( 54.00) } }, { { SIMDE_FLOAT32_C( 152.90), SIMDE_FLOAT32_C( 196.28), SIMDE_FLOAT32_C( -304.14), SIMDE_FLOAT32_C( 91.85) }, { INT32_C( 37), INT32_C( 26), INT32_C( 139), INT32_C( 218) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( 152.90), SIMDE_FLOAT32_C( 26.00), SIMDE_FLOAT32_C( -304.14), SIMDE_FLOAT32_C( 218.00) } }, { { SIMDE_FLOAT32_C( 91.05), SIMDE_FLOAT32_C( 444.96), SIMDE_FLOAT32_C( 775.00), SIMDE_FLOAT32_C( -616.08) }, { INT32_C( 74), INT32_C( 136), INT32_C( 157), INT32_C( 68) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( 91.05), SIMDE_FLOAT32_C( 136.00), SIMDE_FLOAT32_C( 775.00), SIMDE_FLOAT32_C( 68.00) } }, { { SIMDE_FLOAT32_C( -111.35), SIMDE_FLOAT32_C( 886.72), SIMDE_FLOAT32_C( 255.61), SIMDE_FLOAT32_C( -399.36) }, { INT32_C( 126), INT32_C( 154), INT32_C( 177), INT32_C( 65) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( -111.35), SIMDE_FLOAT32_C( 154.00), SIMDE_FLOAT32_C( 255.61), SIMDE_FLOAT32_C( 65.00) } }, { { SIMDE_FLOAT32_C( 48.78), SIMDE_FLOAT32_C( 652.36), SIMDE_FLOAT32_C( -139.19), SIMDE_FLOAT32_C( -584.66) }, { INT32_C( 215), INT32_C( 81), INT32_C( 153), INT32_C( 75) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( 48.78), SIMDE_FLOAT32_C( 81.00), SIMDE_FLOAT32_C( -139.19), SIMDE_FLOAT32_C( 75.00) } }, { { SIMDE_FLOAT32_C( 39.45), SIMDE_FLOAT32_C( 995.37), SIMDE_FLOAT32_C( 468.08), SIMDE_FLOAT32_C( -82.06) }, { INT32_C( 220), INT32_C( 183), INT32_C( 125), INT32_C( 44) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( 39.45), SIMDE_FLOAT32_C( 183.00), SIMDE_FLOAT32_C( 468.08), SIMDE_FLOAT32_C( 44.00) } }, { { SIMDE_FLOAT32_C( 883.35), SIMDE_FLOAT32_C( -889.08), SIMDE_FLOAT32_C( 355.42), SIMDE_FLOAT32_C( 899.85) }, { INT32_C( 173), INT32_C( 192), INT32_C( 223), INT32_C( 168) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( 883.35), SIMDE_FLOAT32_C( 192.00), SIMDE_FLOAT32_C( 355.42), SIMDE_FLOAT32_C( 168.00) } } }; for (size_t i = 0 ; i < (sizeof(f32gather_buffer) / sizeof(f32gather_buffer[0])) ; i++) { f32gather_buffer[i] = HEDLEY_STATIC_CAST(simde_float32, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 src = simde_mm_loadu_ps(test_vec[i].src); simde__m128i vindex = simde_x_mm_loadu_epi32(test_vec[i].vindex); simde__m128 mask = simde_mm_loadu_ps(test_vec[i].mask); simde__m128 r = simde_mm_mask_i32gather_ps(src, f32gather_buffer, vindex, mask, 4); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_i32gather_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t vindex[8]; const simde_float32 r[8]; } test_vec[] = { { { INT32_C( 156), INT32_C( 110), INT32_C( 202), INT32_C( 30), INT32_C( 85), INT32_C( 63), INT32_C( 139), INT32_C( 34) }, { SIMDE_FLOAT32_C( 156.00), SIMDE_FLOAT32_C( 110.00), SIMDE_FLOAT32_C( 202.00), SIMDE_FLOAT32_C( 30.00), SIMDE_FLOAT32_C( 85.00), SIMDE_FLOAT32_C( 63.00), SIMDE_FLOAT32_C( 139.00), SIMDE_FLOAT32_C( 34.00) } }, { { INT32_C( 20), INT32_C( 209), INT32_C( 84), INT32_C( 116), INT32_C( 159), INT32_C( 9), INT32_C( 45), INT32_C( 240) }, { SIMDE_FLOAT32_C( 20.00), SIMDE_FLOAT32_C( 209.00), SIMDE_FLOAT32_C( 84.00), SIMDE_FLOAT32_C( 116.00), SIMDE_FLOAT32_C( 159.00), SIMDE_FLOAT32_C( 9.00), SIMDE_FLOAT32_C( 45.00), SIMDE_FLOAT32_C( 240.00) } }, { { INT32_C( 229), INT32_C( 18), INT32_C( 146), INT32_C( 118), INT32_C( 19), INT32_C( 136), INT32_C( 196), INT32_C( 132) }, { SIMDE_FLOAT32_C( 229.00), SIMDE_FLOAT32_C( 18.00), SIMDE_FLOAT32_C( 146.00), SIMDE_FLOAT32_C( 118.00), SIMDE_FLOAT32_C( 19.00), SIMDE_FLOAT32_C( 136.00), SIMDE_FLOAT32_C( 196.00), SIMDE_FLOAT32_C( 132.00) } }, { { INT32_C( 95), INT32_C( 188), INT32_C( 60), INT32_C( 152), INT32_C( 20), INT32_C( 16), INT32_C( 188), INT32_C( 106) }, { SIMDE_FLOAT32_C( 95.00), SIMDE_FLOAT32_C( 188.00), SIMDE_FLOAT32_C( 60.00), SIMDE_FLOAT32_C( 152.00), SIMDE_FLOAT32_C( 20.00), SIMDE_FLOAT32_C( 16.00), SIMDE_FLOAT32_C( 188.00), SIMDE_FLOAT32_C( 106.00) } }, { { INT32_C( 126), INT32_C( 56), INT32_C( 197), INT32_C( 2), INT32_C( 172), INT32_C( 70), INT32_C( 92), INT32_C( 90) }, { SIMDE_FLOAT32_C( 126.00), SIMDE_FLOAT32_C( 56.00), SIMDE_FLOAT32_C( 197.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 172.00), SIMDE_FLOAT32_C( 70.00), SIMDE_FLOAT32_C( 92.00), SIMDE_FLOAT32_C( 90.00) } }, { { INT32_C( 179), INT32_C( 221), INT32_C( 122), INT32_C( 117), INT32_C( 214), INT32_C( 77), INT32_C( 138), INT32_C( 77) }, { SIMDE_FLOAT32_C( 179.00), SIMDE_FLOAT32_C( 221.00), SIMDE_FLOAT32_C( 122.00), SIMDE_FLOAT32_C( 117.00), SIMDE_FLOAT32_C( 214.00), SIMDE_FLOAT32_C( 77.00), SIMDE_FLOAT32_C( 138.00), SIMDE_FLOAT32_C( 77.00) } }, { { INT32_C( 201), INT32_C( 109), INT32_C( 209), INT32_C( 78), INT32_C( 64), INT32_C( 182), INT32_C( 193), INT32_C( 166) }, { SIMDE_FLOAT32_C( 201.00), SIMDE_FLOAT32_C( 109.00), SIMDE_FLOAT32_C( 209.00), SIMDE_FLOAT32_C( 78.00), SIMDE_FLOAT32_C( 64.00), SIMDE_FLOAT32_C( 182.00), SIMDE_FLOAT32_C( 193.00), SIMDE_FLOAT32_C( 166.00) } }, { { INT32_C( 179), INT32_C( 70), INT32_C( 166), INT32_C( 103), INT32_C( 13), INT32_C( 186), INT32_C( 19), INT32_C( 11) }, { SIMDE_FLOAT32_C( 179.00), SIMDE_FLOAT32_C( 70.00), SIMDE_FLOAT32_C( 166.00), SIMDE_FLOAT32_C( 103.00), SIMDE_FLOAT32_C( 13.00), SIMDE_FLOAT32_C( 186.00), SIMDE_FLOAT32_C( 19.00), SIMDE_FLOAT32_C( 11.00) } } }; for (size_t i = 0 ; i < (sizeof(f32gather_buffer) / sizeof(f32gather_buffer[0])) ; i++) { f32gather_buffer[i] = HEDLEY_STATIC_CAST(simde_float32, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i vindex = simde_x_mm256_loadu_epi32(test_vec[i].vindex); simde__m256 r = simde_mm256_i32gather_ps(f32gather_buffer, vindex, 4); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_mask_i32gather_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[8]; const int32_t vindex[8]; const simde_float32 mask[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 265.75), SIMDE_FLOAT32_C( 153.07), SIMDE_FLOAT32_C( 737.27), SIMDE_FLOAT32_C( 671.90), SIMDE_FLOAT32_C( 596.53), SIMDE_FLOAT32_C( 857.73), SIMDE_FLOAT32_C( 355.81), SIMDE_FLOAT32_C( 117.96) }, { INT32_C( 116), INT32_C( 49), INT32_C( 1), INT32_C( 154), INT32_C( 238), INT32_C( 237), INT32_C( 63), INT32_C( 95) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( 265.75), SIMDE_FLOAT32_C( 49.00), SIMDE_FLOAT32_C( 737.27), SIMDE_FLOAT32_C( 154.00), SIMDE_FLOAT32_C( 596.53), SIMDE_FLOAT32_C( 237.00), SIMDE_FLOAT32_C( 355.81), SIMDE_FLOAT32_C( 95.00) } }, { { SIMDE_FLOAT32_C( 355.76), SIMDE_FLOAT32_C( -146.74), SIMDE_FLOAT32_C( 908.75), SIMDE_FLOAT32_C( 492.00), SIMDE_FLOAT32_C( -494.15), SIMDE_FLOAT32_C( -429.18), SIMDE_FLOAT32_C( 884.15), SIMDE_FLOAT32_C( 245.56) }, { INT32_C( 197), INT32_C( 188), INT32_C( 203), INT32_C( 9), INT32_C( 115), INT32_C( 247), INT32_C( 86), INT32_C( 118) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( 355.76), SIMDE_FLOAT32_C( 188.00), SIMDE_FLOAT32_C( 908.75), SIMDE_FLOAT32_C( 9.00), SIMDE_FLOAT32_C( -494.15), SIMDE_FLOAT32_C( 247.00), SIMDE_FLOAT32_C( 884.15), SIMDE_FLOAT32_C( 118.00) } }, { { SIMDE_FLOAT32_C( -488.70), SIMDE_FLOAT32_C( 468.34), SIMDE_FLOAT32_C( -802.96), SIMDE_FLOAT32_C( 324.69), SIMDE_FLOAT32_C( -471.47), SIMDE_FLOAT32_C( -786.76), SIMDE_FLOAT32_C( -838.26), SIMDE_FLOAT32_C( 905.98) }, { INT32_C( 142), INT32_C( 89), INT32_C( 225), INT32_C( 62), INT32_C( 206), INT32_C( 139), INT32_C( 54), INT32_C( 104) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( -488.70), SIMDE_FLOAT32_C( 89.00), SIMDE_FLOAT32_C( -802.96), SIMDE_FLOAT32_C( 62.00), SIMDE_FLOAT32_C( -471.47), SIMDE_FLOAT32_C( 139.00), SIMDE_FLOAT32_C( -838.26), SIMDE_FLOAT32_C( 104.00) } }, { { SIMDE_FLOAT32_C( -743.20), SIMDE_FLOAT32_C( 452.48), SIMDE_FLOAT32_C( 844.69), SIMDE_FLOAT32_C( 377.91), SIMDE_FLOAT32_C( -629.91), SIMDE_FLOAT32_C( -501.68), SIMDE_FLOAT32_C( 485.08), SIMDE_FLOAT32_C( 859.54) }, { INT32_C( 110), INT32_C( 70), INT32_C( 8), INT32_C( 120), INT32_C( 248), INT32_C( 78), INT32_C( 213), INT32_C( 81) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( -743.20), SIMDE_FLOAT32_C( 70.00), SIMDE_FLOAT32_C( 844.69), SIMDE_FLOAT32_C( 120.00), SIMDE_FLOAT32_C( -629.91), SIMDE_FLOAT32_C( 78.00), SIMDE_FLOAT32_C( 485.08), SIMDE_FLOAT32_C( 81.00) } }, { { SIMDE_FLOAT32_C( -292.29), SIMDE_FLOAT32_C( -955.02), SIMDE_FLOAT32_C( -774.96), SIMDE_FLOAT32_C( 472.21), SIMDE_FLOAT32_C( 826.00), SIMDE_FLOAT32_C( -866.19), SIMDE_FLOAT32_C( 101.43), SIMDE_FLOAT32_C( -113.16) }, { INT32_C( 154), INT32_C( 23), INT32_C( 119), INT32_C( 42), INT32_C( 169), INT32_C( 248), INT32_C( 64), INT32_C( 209) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( -292.29), SIMDE_FLOAT32_C( 23.00), SIMDE_FLOAT32_C( -774.96), SIMDE_FLOAT32_C( 42.00), SIMDE_FLOAT32_C( 826.00), SIMDE_FLOAT32_C( 248.00), SIMDE_FLOAT32_C( 101.43), SIMDE_FLOAT32_C( 209.00) } }, { { SIMDE_FLOAT32_C( 24.55), SIMDE_FLOAT32_C( 652.58), SIMDE_FLOAT32_C( 245.18), SIMDE_FLOAT32_C( -784.38), SIMDE_FLOAT32_C( -205.61), SIMDE_FLOAT32_C( -224.89), SIMDE_FLOAT32_C( -769.96), SIMDE_FLOAT32_C( -33.65) }, { INT32_C( 77), INT32_C( 199), INT32_C( 107), INT32_C( 90), INT32_C( 74), INT32_C( 93), INT32_C( 41), INT32_C( 69) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( 24.55), SIMDE_FLOAT32_C( 199.00), SIMDE_FLOAT32_C( 245.18), SIMDE_FLOAT32_C( 90.00), SIMDE_FLOAT32_C( -205.61), SIMDE_FLOAT32_C( 93.00), SIMDE_FLOAT32_C( -769.96), SIMDE_FLOAT32_C( 69.00) } }, { { SIMDE_FLOAT32_C( 709.81), SIMDE_FLOAT32_C( -130.13), SIMDE_FLOAT32_C( -541.91), SIMDE_FLOAT32_C( -394.05), SIMDE_FLOAT32_C( 142.57), SIMDE_FLOAT32_C( -331.04), SIMDE_FLOAT32_C( 771.89), SIMDE_FLOAT32_C( -815.62) }, { INT32_C( 45), INT32_C( 168), INT32_C( 242), INT32_C( 53), INT32_C( 156), INT32_C( 67), INT32_C( 131), INT32_C( 254) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( 709.81), SIMDE_FLOAT32_C( 168.00), SIMDE_FLOAT32_C( -541.91), SIMDE_FLOAT32_C( 53.00), SIMDE_FLOAT32_C( 142.57), SIMDE_FLOAT32_C( 67.00), SIMDE_FLOAT32_C( 771.89), SIMDE_FLOAT32_C( 254.00) } }, { { SIMDE_FLOAT32_C( 913.20), SIMDE_FLOAT32_C( -738.56), SIMDE_FLOAT32_C( 273.37), SIMDE_FLOAT32_C( -405.28), SIMDE_FLOAT32_C( 609.46), SIMDE_FLOAT32_C( -442.74), SIMDE_FLOAT32_C( 800.15), SIMDE_FLOAT32_C( 259.23) }, { INT32_C( 86), INT32_C( 169), INT32_C( 125), INT32_C( 69), INT32_C( 23), INT32_C( 149), INT32_C( 111), INT32_C( 202) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( 913.20), SIMDE_FLOAT32_C( 169.00), SIMDE_FLOAT32_C( 273.37), SIMDE_FLOAT32_C( 69.00), SIMDE_FLOAT32_C( 609.46), SIMDE_FLOAT32_C( 149.00), SIMDE_FLOAT32_C( 800.15), SIMDE_FLOAT32_C( 202.00) } } }; for (size_t i = 0 ; i < (sizeof(f32gather_buffer) / sizeof(f32gather_buffer[0])) ; i++) { f32gather_buffer[i] = HEDLEY_STATIC_CAST(simde_float32, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 src = simde_mm256_loadu_ps(test_vec[i].src); simde__m256i vindex = simde_x_mm256_loadu_epi32(test_vec[i].vindex); simde__m256 mask = simde_mm256_loadu_ps(test_vec[i].mask); simde__m256 r = simde_mm256_mask_i32gather_ps(src, f32gather_buffer, vindex, mask, 4); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_i64gather_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t vindex[2]; const simde_float32 r[4]; } test_vec[] = { { { INT64_C( 255), INT64_C( 74) }, { SIMDE_FLOAT32_C( 255.00), SIMDE_FLOAT32_C( 74.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { { INT64_C( 169), INT64_C( 122) }, { SIMDE_FLOAT32_C( 169.00), SIMDE_FLOAT32_C( 122.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { { INT64_C( 224), INT64_C( 199) }, { SIMDE_FLOAT32_C( 224.00), SIMDE_FLOAT32_C( 199.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { { INT64_C( 59), INT64_C( 158) }, { SIMDE_FLOAT32_C( 59.00), SIMDE_FLOAT32_C( 158.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { { INT64_C( 50), INT64_C( 7) }, { SIMDE_FLOAT32_C( 50.00), SIMDE_FLOAT32_C( 7.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { { INT64_C( 139), INT64_C( 78) }, { SIMDE_FLOAT32_C( 139.00), SIMDE_FLOAT32_C( 78.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { { INT64_C( 100), INT64_C( 188) }, { SIMDE_FLOAT32_C( 100.00), SIMDE_FLOAT32_C( 188.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { { INT64_C( 215), INT64_C( 83) }, { SIMDE_FLOAT32_C( 215.00), SIMDE_FLOAT32_C( 83.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } } }; for (size_t i = 0 ; i < (sizeof(f32gather_buffer) / sizeof(f32gather_buffer[0])) ; i++) { f32gather_buffer[i] = HEDLEY_STATIC_CAST(simde_float32, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i vindex = simde_x_mm_loadu_epi64(test_vec[i].vindex); simde__m128 r = simde_mm_i64gather_ps(f32gather_buffer, vindex, 4); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_mask_i64gather_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[4]; const int64_t vindex[2]; const simde_float32 mask[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -910.45), SIMDE_FLOAT32_C( 471.73), SIMDE_FLOAT32_C( -109.25), SIMDE_FLOAT32_C( -741.72) }, { INT64_C( 149), INT64_C( 105) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( -910.45), SIMDE_FLOAT32_C( 105.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( 501.51), SIMDE_FLOAT32_C( -569.16), SIMDE_FLOAT32_C( -498.09), SIMDE_FLOAT32_C( 591.93) }, { INT64_C( 253), INT64_C( 55) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( 501.51), SIMDE_FLOAT32_C( 55.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( -500.50), SIMDE_FLOAT32_C( -787.96), SIMDE_FLOAT32_C( -8.71), SIMDE_FLOAT32_C( -327.02) }, { INT64_C( 86), INT64_C( 3) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( -500.50), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( 268.16), SIMDE_FLOAT32_C( -168.09), SIMDE_FLOAT32_C( 771.83), SIMDE_FLOAT32_C( 974.65) }, { INT64_C( 183), INT64_C( 149) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( 268.16), SIMDE_FLOAT32_C( 149.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( -167.43), SIMDE_FLOAT32_C( -25.72), SIMDE_FLOAT32_C( 800.73), SIMDE_FLOAT32_C( 362.38) }, { INT64_C( 37), INT64_C( 58) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( -167.43), SIMDE_FLOAT32_C( 58.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( 879.51), SIMDE_FLOAT32_C( -467.85), SIMDE_FLOAT32_C( -818.26), SIMDE_FLOAT32_C( 221.05) }, { INT64_C( 234), INT64_C( 186) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( 879.51), SIMDE_FLOAT32_C( 186.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( -462.62), SIMDE_FLOAT32_C( -246.74), SIMDE_FLOAT32_C( -719.57), SIMDE_FLOAT32_C( 388.25) }, { INT64_C( 12), INT64_C( 199) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( -462.62), SIMDE_FLOAT32_C( 199.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( -212.03), SIMDE_FLOAT32_C( -784.09), SIMDE_FLOAT32_C( 477.31), SIMDE_FLOAT32_C( 490.83) }, { INT64_C( 10), INT64_C( 106) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( -212.03), SIMDE_FLOAT32_C( 106.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } } }; for (size_t i = 0 ; i < (sizeof(f32gather_buffer) / sizeof(f32gather_buffer[0])) ; i++) { f32gather_buffer[i] = HEDLEY_STATIC_CAST(simde_float32, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 src = simde_mm_loadu_ps(test_vec[i].src); simde__m128i vindex = simde_x_mm_loadu_epi64(test_vec[i].vindex); simde__m128 mask = simde_mm_loadu_ps(test_vec[i].mask); simde__m128 r = simde_mm_mask_i64gather_ps(src, f32gather_buffer, vindex, mask, 4); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_i64gather_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t vindex[4]; const simde_float32 r[4]; } test_vec[] = { { { INT64_C( 102), INT64_C( 248), INT64_C( 114), INT64_C( 223) }, { SIMDE_FLOAT32_C( 102.00), SIMDE_FLOAT32_C( 248.00), SIMDE_FLOAT32_C( 114.00), SIMDE_FLOAT32_C( 223.00) } }, { { INT64_C( 250), INT64_C( 132), INT64_C( 220), INT64_C( 173) }, { SIMDE_FLOAT32_C( 250.00), SIMDE_FLOAT32_C( 132.00), SIMDE_FLOAT32_C( 220.00), SIMDE_FLOAT32_C( 173.00) } }, { { INT64_C( 19), INT64_C( 230), INT64_C( 86), INT64_C( 7) }, { SIMDE_FLOAT32_C( 19.00), SIMDE_FLOAT32_C( 230.00), SIMDE_FLOAT32_C( 86.00), SIMDE_FLOAT32_C( 7.00) } }, { { INT64_C( 137), INT64_C( 241), INT64_C( 195), INT64_C( 198) }, { SIMDE_FLOAT32_C( 137.00), SIMDE_FLOAT32_C( 241.00), SIMDE_FLOAT32_C( 195.00), SIMDE_FLOAT32_C( 198.00) } }, { { INT64_C( 252), INT64_C( 215), INT64_C( 67), INT64_C( 180) }, { SIMDE_FLOAT32_C( 252.00), SIMDE_FLOAT32_C( 215.00), SIMDE_FLOAT32_C( 67.00), SIMDE_FLOAT32_C( 180.00) } }, { { INT64_C( 100), INT64_C( 137), INT64_C( 243), INT64_C( 185) }, { SIMDE_FLOAT32_C( 100.00), SIMDE_FLOAT32_C( 137.00), SIMDE_FLOAT32_C( 243.00), SIMDE_FLOAT32_C( 185.00) } }, { { INT64_C( 175), INT64_C( 29), INT64_C( 122), INT64_C( 116) }, { SIMDE_FLOAT32_C( 175.00), SIMDE_FLOAT32_C( 29.00), SIMDE_FLOAT32_C( 122.00), SIMDE_FLOAT32_C( 116.00) } }, { { INT64_C( 48), INT64_C( 182), INT64_C( 53), INT64_C( 192) }, { SIMDE_FLOAT32_C( 48.00), SIMDE_FLOAT32_C( 182.00), SIMDE_FLOAT32_C( 53.00), SIMDE_FLOAT32_C( 192.00) } } }; for (size_t i = 0 ; i < (sizeof(f32gather_buffer) / sizeof(f32gather_buffer[0])) ; i++) { f32gather_buffer[i] = HEDLEY_STATIC_CAST(simde_float32, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i vindex = simde_x_mm256_loadu_epi64(test_vec[i].vindex); simde__m128 r = simde_mm256_i64gather_ps(f32gather_buffer, vindex, 4); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_mask_i64gather_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[4]; const int64_t vindex[4]; const simde_float32 mask[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -557.85), SIMDE_FLOAT32_C( -785.29), SIMDE_FLOAT32_C( -569.41), SIMDE_FLOAT32_C( -404.80) }, { INT64_C( 204), INT64_C( 120), INT64_C( 37), INT64_C( 130) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( -557.85), SIMDE_FLOAT32_C( 120.00), SIMDE_FLOAT32_C( -569.41), SIMDE_FLOAT32_C( 130.00) } }, { { SIMDE_FLOAT32_C( 831.45), SIMDE_FLOAT32_C( -114.89), SIMDE_FLOAT32_C( -779.68), SIMDE_FLOAT32_C( 6.19) }, { INT64_C( 168), INT64_C( 241), INT64_C( 231), INT64_C( 98) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( 831.45), SIMDE_FLOAT32_C( 241.00), SIMDE_FLOAT32_C( -779.68), SIMDE_FLOAT32_C( 98.00) } }, { { SIMDE_FLOAT32_C( -926.84), SIMDE_FLOAT32_C( -613.11), SIMDE_FLOAT32_C( 474.57), SIMDE_FLOAT32_C( -308.37) }, { INT64_C( 188), INT64_C( 117), INT64_C( 230), INT64_C( 107) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( -926.84), SIMDE_FLOAT32_C( 117.00), SIMDE_FLOAT32_C( 474.57), SIMDE_FLOAT32_C( 107.00) } }, { { SIMDE_FLOAT32_C( -210.98), SIMDE_FLOAT32_C( -337.89), SIMDE_FLOAT32_C( -71.79), SIMDE_FLOAT32_C( -805.12) }, { INT64_C( 188), INT64_C( 100), INT64_C( 159), INT64_C( 80) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( -210.98), SIMDE_FLOAT32_C( 100.00), SIMDE_FLOAT32_C( -71.79), SIMDE_FLOAT32_C( 80.00) } }, { { SIMDE_FLOAT32_C( -900.97), SIMDE_FLOAT32_C( 631.07), SIMDE_FLOAT32_C( 698.37), SIMDE_FLOAT32_C( 195.85) }, { INT64_C( 49), INT64_C( 72), INT64_C( 255), INT64_C( 112) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( -900.97), SIMDE_FLOAT32_C( 72.00), SIMDE_FLOAT32_C( 698.37), SIMDE_FLOAT32_C( 112.00) } }, { { SIMDE_FLOAT32_C( -812.07), SIMDE_FLOAT32_C( 60.40), SIMDE_FLOAT32_C( -955.01), SIMDE_FLOAT32_C( -625.48) }, { INT64_C( 7), INT64_C( 209), INT64_C( 12), INT64_C( 97) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( -812.07), SIMDE_FLOAT32_C( 209.00), SIMDE_FLOAT32_C( -955.01), SIMDE_FLOAT32_C( 97.00) } }, { { SIMDE_FLOAT32_C( -948.25), SIMDE_FLOAT32_C( 53.13), SIMDE_FLOAT32_C( -171.41), SIMDE_FLOAT32_C( 245.75) }, { INT64_C( 170), INT64_C( 104), INT64_C( 82), INT64_C( 123) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( -948.25), SIMDE_FLOAT32_C( 104.00), SIMDE_FLOAT32_C( -171.41), SIMDE_FLOAT32_C( 123.00) } }, { { SIMDE_FLOAT32_C( -63.25), SIMDE_FLOAT32_C( 125.71), SIMDE_FLOAT32_C( 69.11), SIMDE_FLOAT32_C( -25.31) }, { INT64_C( 145), INT64_C( 84), INT64_C( 244), INT64_C( 22) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-2147483648.00) }, { SIMDE_FLOAT32_C( -63.25), SIMDE_FLOAT32_C( 84.00), SIMDE_FLOAT32_C( 69.11), SIMDE_FLOAT32_C( 22.00) } } }; for (size_t i = 0 ; i < (sizeof(f32gather_buffer) / sizeof(f32gather_buffer[0])) ; i++) { f32gather_buffer[i] = HEDLEY_STATIC_CAST(simde_float32, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 src = simde_mm_loadu_ps(test_vec[i].src); simde__m256i vindex = simde_x_mm256_loadu_epi64(test_vec[i].vindex); simde__m128 mask = simde_mm_loadu_ps(test_vec[i].mask); simde__m128 r = simde_mm256_mask_i64gather_ps(src, f32gather_buffer, vindex, mask, 4); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static simde_float64 f64gather_buffer[4096]; static int test_simde_mm_i32gather_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t vindex[4]; const simde_float64 r[2]; } test_vec[] = { { { INT32_C( 151), INT32_C( 228), INT32_C( 22), INT32_C( 128) }, { SIMDE_FLOAT64_C( 151.00), SIMDE_FLOAT64_C( 228.00) } }, { { INT32_C( 162), INT32_C( 84), INT32_C( 9), INT32_C( 70) }, { SIMDE_FLOAT64_C( 162.00), SIMDE_FLOAT64_C( 84.00) } }, { { INT32_C( 218), INT32_C( 181), INT32_C( 119), INT32_C( 133) }, { SIMDE_FLOAT64_C( 218.00), SIMDE_FLOAT64_C( 181.00) } }, { { INT32_C( 22), INT32_C( 114), INT32_C( 193), INT32_C( 112) }, { SIMDE_FLOAT64_C( 22.00), SIMDE_FLOAT64_C( 114.00) } }, { { INT32_C( 79), INT32_C( 111), INT32_C( 117), INT32_C( 77) }, { SIMDE_FLOAT64_C( 79.00), SIMDE_FLOAT64_C( 111.00) } }, { { INT32_C( 123), INT32_C( 187), INT32_C( 81), INT32_C( 26) }, { SIMDE_FLOAT64_C( 123.00), SIMDE_FLOAT64_C( 187.00) } }, { { INT32_C( 171), INT32_C( 12), INT32_C( 85), INT32_C( 56) }, { SIMDE_FLOAT64_C( 171.00), SIMDE_FLOAT64_C( 12.00) } }, { { INT32_C( 64), INT32_C( 180), INT32_C( 87), INT32_C( 104) }, { SIMDE_FLOAT64_C( 64.00), SIMDE_FLOAT64_C( 180.00) } } }; for (size_t i = 0 ; i < (sizeof(f64gather_buffer) / sizeof(f64gather_buffer[0])) ; i++) { f64gather_buffer[i] = HEDLEY_STATIC_CAST(simde_float64, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i vindex = simde_x_mm_loadu_epi32(test_vec[i].vindex); simde__m128d r = simde_mm_i32gather_pd(f64gather_buffer, vindex, 8); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_mask_i32gather_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[2]; const int32_t vindex[4]; const simde_float64 mask[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -480.33), SIMDE_FLOAT64_C( -315.66) }, { INT32_C( 197), INT32_C( 5), INT32_C( 11), INT32_C( 103) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( -480.33), SIMDE_FLOAT64_C( 5.00) } }, { { SIMDE_FLOAT64_C( -766.75), SIMDE_FLOAT64_C( -247.86) }, { INT32_C( 60), INT32_C( 245), INT32_C( 66), INT32_C( 155) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( -766.75), SIMDE_FLOAT64_C( 245.00) } }, { { SIMDE_FLOAT64_C( -243.19), SIMDE_FLOAT64_C( 794.07) }, { INT32_C( 29), INT32_C( 87), INT32_C( 99), INT32_C( 99) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( -243.19), SIMDE_FLOAT64_C( 87.00) } }, { { SIMDE_FLOAT64_C( 424.26), SIMDE_FLOAT64_C( -504.72) }, { INT32_C( 71), INT32_C( 70), INT32_C( 235), INT32_C( 194) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( 424.26), SIMDE_FLOAT64_C( 70.00) } }, { { SIMDE_FLOAT64_C( 881.31), SIMDE_FLOAT64_C( -768.55) }, { INT32_C( 65), INT32_C( 188), INT32_C( 232), INT32_C( 73) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( 881.31), SIMDE_FLOAT64_C( 188.00) } }, { { SIMDE_FLOAT64_C( -774.67), SIMDE_FLOAT64_C( 356.86) }, { INT32_C( 174), INT32_C( 126), INT32_C( 251), INT32_C( 40) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( -774.67), SIMDE_FLOAT64_C( 126.00) } }, { { SIMDE_FLOAT64_C( -351.27), SIMDE_FLOAT64_C( -343.71) }, { INT32_C( 190), INT32_C( 201), INT32_C( 87), INT32_C( 59) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( -351.27), SIMDE_FLOAT64_C( 201.00) } }, { { SIMDE_FLOAT64_C( -898.19), SIMDE_FLOAT64_C( -375.56) }, { INT32_C( 234), INT32_C( 148), INT32_C( 170), INT32_C( 24) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( -898.19), SIMDE_FLOAT64_C( 148.00) } } }; for (size_t i = 0 ; i < (sizeof(f64gather_buffer) / sizeof(f64gather_buffer[0])) ; i++) { f64gather_buffer[i] = HEDLEY_STATIC_CAST(simde_float64, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d src = simde_mm_loadu_pd(test_vec[i].src); simde__m128i vindex = simde_x_mm_loadu_epi32(test_vec[i].vindex); simde__m128d mask = simde_mm_loadu_pd(test_vec[i].mask); simde__m128d r = simde_mm_mask_i32gather_pd(src, f64gather_buffer, vindex, mask, 8); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_i32gather_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t vindex[4]; const simde_float64 r[4]; } test_vec[] = { { { INT32_C( 177), INT32_C( 141), INT32_C( 100), INT32_C( 45) }, { SIMDE_FLOAT64_C( 177.00), SIMDE_FLOAT64_C( 141.00), SIMDE_FLOAT64_C( 100.00), SIMDE_FLOAT64_C( 45.00) } }, { { INT32_C( 150), INT32_C( 114), INT32_C( 229), INT32_C( 101) }, { SIMDE_FLOAT64_C( 150.00), SIMDE_FLOAT64_C( 114.00), SIMDE_FLOAT64_C( 229.00), SIMDE_FLOAT64_C( 101.00) } }, { { INT32_C( 226), INT32_C( 173), INT32_C( 89), INT32_C( 9) }, { SIMDE_FLOAT64_C( 226.00), SIMDE_FLOAT64_C( 173.00), SIMDE_FLOAT64_C( 89.00), SIMDE_FLOAT64_C( 9.00) } }, { { INT32_C( 29), INT32_C( 54), INT32_C( 230), INT32_C( 153) }, { SIMDE_FLOAT64_C( 29.00), SIMDE_FLOAT64_C( 54.00), SIMDE_FLOAT64_C( 230.00), SIMDE_FLOAT64_C( 153.00) } }, { { INT32_C( 130), INT32_C( 112), INT32_C( 170), INT32_C( 75) }, { SIMDE_FLOAT64_C( 130.00), SIMDE_FLOAT64_C( 112.00), SIMDE_FLOAT64_C( 170.00), SIMDE_FLOAT64_C( 75.00) } }, { { INT32_C( 81), INT32_C( 83), INT32_C( 244), INT32_C( 81) }, { SIMDE_FLOAT64_C( 81.00), SIMDE_FLOAT64_C( 83.00), SIMDE_FLOAT64_C( 244.00), SIMDE_FLOAT64_C( 81.00) } }, { { INT32_C( 29), INT32_C( 15), INT32_C( 115), INT32_C( 79) }, { SIMDE_FLOAT64_C( 29.00), SIMDE_FLOAT64_C( 15.00), SIMDE_FLOAT64_C( 115.00), SIMDE_FLOAT64_C( 79.00) } }, { { INT32_C( 131), INT32_C( 207), INT32_C( 76), INT32_C( 123) }, { SIMDE_FLOAT64_C( 131.00), SIMDE_FLOAT64_C( 207.00), SIMDE_FLOAT64_C( 76.00), SIMDE_FLOAT64_C( 123.00) } } }; for (size_t i = 0 ; i < (sizeof(f64gather_buffer) / sizeof(f64gather_buffer[0])) ; i++) { f64gather_buffer[i] = HEDLEY_STATIC_CAST(simde_float64, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i vindex = simde_x_mm_loadu_epi32(test_vec[i].vindex); simde__m256d r = simde_mm256_i32gather_pd(f64gather_buffer, vindex, 8); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_mask_i32gather_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[4]; const int32_t vindex[4]; const simde_float64 mask[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( 267.18), SIMDE_FLOAT64_C( 752.26), SIMDE_FLOAT64_C( 216.84), SIMDE_FLOAT64_C( 987.12) }, { INT32_C( 123), INT32_C( 197), INT32_C( 179), INT32_C( 0) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( 267.18), SIMDE_FLOAT64_C( 197.00), SIMDE_FLOAT64_C( 216.84), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( 608.28), SIMDE_FLOAT64_C( 322.28), SIMDE_FLOAT64_C( 399.60), SIMDE_FLOAT64_C( -753.21) }, { INT32_C( 249), INT32_C( 84), INT32_C( 194), INT32_C( 44) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( 608.28), SIMDE_FLOAT64_C( 84.00), SIMDE_FLOAT64_C( 399.60), SIMDE_FLOAT64_C( 44.00) } }, { { SIMDE_FLOAT64_C( 765.40), SIMDE_FLOAT64_C( -73.02), SIMDE_FLOAT64_C( 385.76), SIMDE_FLOAT64_C( 103.67) }, { INT32_C( 55), INT32_C( 66), INT32_C( 86), INT32_C( 189) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( 765.40), SIMDE_FLOAT64_C( 66.00), SIMDE_FLOAT64_C( 385.76), SIMDE_FLOAT64_C( 189.00) } }, { { SIMDE_FLOAT64_C( 170.06), SIMDE_FLOAT64_C( 333.86), SIMDE_FLOAT64_C( -731.28), SIMDE_FLOAT64_C( -526.76) }, { INT32_C( 11), INT32_C( 174), INT32_C( 199), INT32_C( 205) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( 170.06), SIMDE_FLOAT64_C( 174.00), SIMDE_FLOAT64_C( -731.28), SIMDE_FLOAT64_C( 205.00) } }, { { SIMDE_FLOAT64_C( -655.08), SIMDE_FLOAT64_C( 43.94), SIMDE_FLOAT64_C( 458.52), SIMDE_FLOAT64_C( 50.01) }, { INT32_C( 17), INT32_C( 204), INT32_C( 61), INT32_C( 41) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( -655.08), SIMDE_FLOAT64_C( 204.00), SIMDE_FLOAT64_C( 458.52), SIMDE_FLOAT64_C( 41.00) } }, { { SIMDE_FLOAT64_C( 91.50), SIMDE_FLOAT64_C( 334.64), SIMDE_FLOAT64_C( -285.42), SIMDE_FLOAT64_C( 872.18) }, { INT32_C( 206), INT32_C( 59), INT32_C( 5), INT32_C( 108) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( 91.50), SIMDE_FLOAT64_C( 59.00), SIMDE_FLOAT64_C( -285.42), SIMDE_FLOAT64_C( 108.00) } }, { { SIMDE_FLOAT64_C( -329.65), SIMDE_FLOAT64_C( 179.37), SIMDE_FLOAT64_C( -815.96), SIMDE_FLOAT64_C( 824.89) }, { INT32_C( 228), INT32_C( 234), INT32_C( 46), INT32_C( 94) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( -329.65), SIMDE_FLOAT64_C( 234.00), SIMDE_FLOAT64_C( -815.96), SIMDE_FLOAT64_C( 94.00) } }, { { SIMDE_FLOAT64_C( -490.85), SIMDE_FLOAT64_C( 503.59), SIMDE_FLOAT64_C( -623.45), SIMDE_FLOAT64_C( 519.67) }, { INT32_C( 127), INT32_C( 223), INT32_C( 120), INT32_C( 66) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( -490.85), SIMDE_FLOAT64_C( 223.00), SIMDE_FLOAT64_C( -623.45), SIMDE_FLOAT64_C( 66.00) } } }; for (size_t i = 0 ; i < (sizeof(f64gather_buffer) / sizeof(f64gather_buffer[0])) ; i++) { f64gather_buffer[i] = HEDLEY_STATIC_CAST(simde_float64, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d src = simde_mm256_loadu_pd(test_vec[i].src); simde__m128i vindex = simde_x_mm_loadu_epi32(test_vec[i].vindex); simde__m256d mask = simde_mm256_loadu_pd(test_vec[i].mask); simde__m256d r = simde_mm256_mask_i32gather_pd(src, f64gather_buffer, vindex, mask, 8); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_i64gather_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t vindex[2]; const simde_float64 r[2]; } test_vec[] = { { { INT64_C( 44), INT64_C( 35) }, { SIMDE_FLOAT64_C( 44.00), SIMDE_FLOAT64_C( 35.00) } }, { { INT64_C( 67), INT64_C( 111) }, { SIMDE_FLOAT64_C( 67.00), SIMDE_FLOAT64_C( 111.00) } }, { { INT64_C( 136), INT64_C( 227) }, { SIMDE_FLOAT64_C( 136.00), SIMDE_FLOAT64_C( 227.00) } }, { { INT64_C( 149), INT64_C( 42) }, { SIMDE_FLOAT64_C( 149.00), SIMDE_FLOAT64_C( 42.00) } }, { { INT64_C( 162), INT64_C( 73) }, { SIMDE_FLOAT64_C( 162.00), SIMDE_FLOAT64_C( 73.00) } }, { { INT64_C( 34), INT64_C( 86) }, { SIMDE_FLOAT64_C( 34.00), SIMDE_FLOAT64_C( 86.00) } }, { { INT64_C( 0), INT64_C( 81) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 81.00) } }, { { INT64_C( 121), INT64_C( 62) }, { SIMDE_FLOAT64_C( 121.00), SIMDE_FLOAT64_C( 62.00) } } }; for (size_t i = 0 ; i < (sizeof(f64gather_buffer) / sizeof(f64gather_buffer[0])) ; i++) { f64gather_buffer[i] = HEDLEY_STATIC_CAST(simde_float64, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i vindex = simde_x_mm_loadu_epi64(test_vec[i].vindex); simde__m128d r = simde_mm_i64gather_pd(f64gather_buffer, vindex, 8); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_mask_i64gather_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[2]; const int64_t vindex[2]; const simde_float64 mask[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 228.75), SIMDE_FLOAT64_C( -39.16) }, { INT64_C( 151), INT64_C( 61) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( 228.75), SIMDE_FLOAT64_C( 61.00) } }, { { SIMDE_FLOAT64_C( -882.35), SIMDE_FLOAT64_C( 591.91) }, { INT64_C( 204), INT64_C( 199) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( -882.35), SIMDE_FLOAT64_C( 199.00) } }, { { SIMDE_FLOAT64_C( 141.24), SIMDE_FLOAT64_C( 212.29) }, { INT64_C( 117), INT64_C( 141) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( 141.24), SIMDE_FLOAT64_C( 141.00) } }, { { SIMDE_FLOAT64_C( -950.10), SIMDE_FLOAT64_C( 891.20) }, { INT64_C( 83), INT64_C( 248) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( -950.10), SIMDE_FLOAT64_C( 248.00) } }, { { SIMDE_FLOAT64_C( 89.46), SIMDE_FLOAT64_C( -130.83) }, { INT64_C( 70), INT64_C( 253) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( 89.46), SIMDE_FLOAT64_C( 253.00) } }, { { SIMDE_FLOAT64_C( -460.75), SIMDE_FLOAT64_C( -728.93) }, { INT64_C( 2), INT64_C( 214) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( -460.75), SIMDE_FLOAT64_C( 214.00) } }, { { SIMDE_FLOAT64_C( 878.60), SIMDE_FLOAT64_C( 965.00) }, { INT64_C( 72), INT64_C( 241) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( 878.60), SIMDE_FLOAT64_C( 241.00) } }, { { SIMDE_FLOAT64_C( 564.46), SIMDE_FLOAT64_C( 962.41) }, { INT64_C( 206), INT64_C( 147) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( 564.46), SIMDE_FLOAT64_C( 147.00) } } }; for (size_t i = 0 ; i < (sizeof(f64gather_buffer) / sizeof(f64gather_buffer[0])) ; i++) { f64gather_buffer[i] = HEDLEY_STATIC_CAST(simde_float64, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d src = simde_mm_loadu_pd(test_vec[i].src); simde__m128i vindex = simde_x_mm_loadu_epi64(test_vec[i].vindex); simde__m128d mask = simde_mm_loadu_pd(test_vec[i].mask); simde__m128d r = simde_mm_mask_i64gather_pd(src, f64gather_buffer, vindex, mask, 8); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_i64gather_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t vindex[4]; const simde_float64 r[4]; } test_vec[] = { { { INT64_C( 155), INT64_C( 134), INT64_C( 102), INT64_C( 36) }, { SIMDE_FLOAT64_C( 155.00), SIMDE_FLOAT64_C( 134.00), SIMDE_FLOAT64_C( 102.00), SIMDE_FLOAT64_C( 36.00) } }, { { INT64_C( 188), INT64_C( 82), INT64_C( 119), INT64_C( 247) }, { SIMDE_FLOAT64_C( 188.00), SIMDE_FLOAT64_C( 82.00), SIMDE_FLOAT64_C( 119.00), SIMDE_FLOAT64_C( 247.00) } }, { { INT64_C( 60), INT64_C( 4), INT64_C( 193), INT64_C( 157) }, { SIMDE_FLOAT64_C( 60.00), SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( 193.00), SIMDE_FLOAT64_C( 157.00) } }, { { INT64_C( 191), INT64_C( 201), INT64_C( 171), INT64_C( 123) }, { SIMDE_FLOAT64_C( 191.00), SIMDE_FLOAT64_C( 201.00), SIMDE_FLOAT64_C( 171.00), SIMDE_FLOAT64_C( 123.00) } }, { { INT64_C( 25), INT64_C( 82), INT64_C( 149), INT64_C( 61) }, { SIMDE_FLOAT64_C( 25.00), SIMDE_FLOAT64_C( 82.00), SIMDE_FLOAT64_C( 149.00), SIMDE_FLOAT64_C( 61.00) } }, { { INT64_C( 213), INT64_C( 24), INT64_C( 205), INT64_C( 61) }, { SIMDE_FLOAT64_C( 213.00), SIMDE_FLOAT64_C( 24.00), SIMDE_FLOAT64_C( 205.00), SIMDE_FLOAT64_C( 61.00) } }, { { INT64_C( 212), INT64_C( 90), INT64_C( 134), INT64_C( 96) }, { SIMDE_FLOAT64_C( 212.00), SIMDE_FLOAT64_C( 90.00), SIMDE_FLOAT64_C( 134.00), SIMDE_FLOAT64_C( 96.00) } }, { { INT64_C( 73), INT64_C( 18), INT64_C( 98), INT64_C( 201) }, { SIMDE_FLOAT64_C( 73.00), SIMDE_FLOAT64_C( 18.00), SIMDE_FLOAT64_C( 98.00), SIMDE_FLOAT64_C( 201.00) } } }; for (size_t i = 0 ; i < (sizeof(f64gather_buffer) / sizeof(f64gather_buffer[0])) ; i++) { f64gather_buffer[i] = HEDLEY_STATIC_CAST(simde_float64, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i vindex = simde_x_mm256_loadu_epi64(test_vec[i].vindex); simde__m256d r = simde_mm256_i64gather_pd(f64gather_buffer, vindex, 8); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_mask_i64gather_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[4]; const int64_t vindex[4]; const simde_float64 mask[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( -225.70), SIMDE_FLOAT64_C( -7.36), SIMDE_FLOAT64_C( 407.57), SIMDE_FLOAT64_C( 907.28) }, { INT64_C( 44), INT64_C( 253), INT64_C( 186), INT64_C( 235) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( -225.70), SIMDE_FLOAT64_C( 253.00), SIMDE_FLOAT64_C( 407.57), SIMDE_FLOAT64_C( 235.00) } }, { { SIMDE_FLOAT64_C( -714.19), SIMDE_FLOAT64_C( 993.64), SIMDE_FLOAT64_C( -956.79), SIMDE_FLOAT64_C( 142.66) }, { INT64_C( 117), INT64_C( 6), INT64_C( 107), INT64_C( 114) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( -714.19), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( -956.79), SIMDE_FLOAT64_C( 114.00) } }, { { SIMDE_FLOAT64_C( 927.00), SIMDE_FLOAT64_C( 732.99), SIMDE_FLOAT64_C( -422.33), SIMDE_FLOAT64_C( 720.35) }, { INT64_C( 41), INT64_C( 9), INT64_C( 229), INT64_C( 58) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( 927.00), SIMDE_FLOAT64_C( 9.00), SIMDE_FLOAT64_C( -422.33), SIMDE_FLOAT64_C( 58.00) } }, { { SIMDE_FLOAT64_C( -504.21), SIMDE_FLOAT64_C( -460.73), SIMDE_FLOAT64_C( 337.37), SIMDE_FLOAT64_C( -249.52) }, { INT64_C( 86), INT64_C( 35), INT64_C( 77), INT64_C( 8) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( -504.21), SIMDE_FLOAT64_C( 35.00), SIMDE_FLOAT64_C( 337.37), SIMDE_FLOAT64_C( 8.00) } }, { { SIMDE_FLOAT64_C( -359.51), SIMDE_FLOAT64_C( 323.22), SIMDE_FLOAT64_C( -224.22), SIMDE_FLOAT64_C( 888.99) }, { INT64_C( 88), INT64_C( 225), INT64_C( 40), INT64_C( 71) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( -359.51), SIMDE_FLOAT64_C( 225.00), SIMDE_FLOAT64_C( -224.22), SIMDE_FLOAT64_C( 71.00) } }, { { SIMDE_FLOAT64_C( -595.68), SIMDE_FLOAT64_C( 6.34), SIMDE_FLOAT64_C( 51.10), SIMDE_FLOAT64_C( 42.13) }, { INT64_C( 61), INT64_C( 53), INT64_C( 127), INT64_C( 227) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( -595.68), SIMDE_FLOAT64_C( 53.00), SIMDE_FLOAT64_C( 51.10), SIMDE_FLOAT64_C( 227.00) } }, { { SIMDE_FLOAT64_C( 356.87), SIMDE_FLOAT64_C( 122.53), SIMDE_FLOAT64_C( -844.94), SIMDE_FLOAT64_C( 51.26) }, { INT64_C( 71), INT64_C( 43), INT64_C( 173), INT64_C( 232) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( 356.87), SIMDE_FLOAT64_C( 43.00), SIMDE_FLOAT64_C( -844.94), SIMDE_FLOAT64_C( 232.00) } }, { { SIMDE_FLOAT64_C( -545.64), SIMDE_FLOAT64_C( 253.09), SIMDE_FLOAT64_C( -327.77), SIMDE_FLOAT64_C( 486.99) }, { INT64_C( 97), INT64_C( 231), INT64_C( 140), INT64_C( 243) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-9223372036854775808.00) }, { SIMDE_FLOAT64_C( -545.64), SIMDE_FLOAT64_C( 231.00), SIMDE_FLOAT64_C( -327.77), SIMDE_FLOAT64_C( 243.00) } } }; for (size_t i = 0 ; i < (sizeof(f64gather_buffer) / sizeof(f64gather_buffer[0])) ; i++) { f64gather_buffer[i] = HEDLEY_STATIC_CAST(simde_float64, i); } for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d src = simde_mm256_loadu_pd(test_vec[i].src); simde__m256i vindex = simde_x_mm256_loadu_epi64(test_vec[i].vindex); simde__m256d mask = simde_mm256_loadu_pd(test_vec[i].mask); simde__m256d r = simde_mm256_mask_i64gather_pd(src, f64gather_buffer, vindex, mask, 8); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_inserti128_si256 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[8]; const int32_t b[4]; const int32_t ra[8]; const int32_t rb[8]; } test_vec[] = { { { -INT32_C( 21047385), -INT32_C( 230819693), -INT32_C( 2121983276), INT32_C( 325772553), -INT32_C( 910275464), INT32_C( 1357952964), INT32_C( 2017751920), INT32_C( 9626969) }, { INT32_C( 1358909628), INT32_C( 474102856), INT32_C( 1604241494), INT32_C( 628230317) }, { INT32_C( 1358909628), INT32_C( 474102856), INT32_C( 1604241494), INT32_C( 628230317), -INT32_C( 910275464), INT32_C( 1357952964), INT32_C( 2017751920), INT32_C( 9626969) }, { -INT32_C( 21047385), -INT32_C( 230819693), -INT32_C( 2121983276), INT32_C( 325772553), INT32_C( 1358909628), INT32_C( 474102856), INT32_C( 1604241494), INT32_C( 628230317) } }, { { INT32_C( 435106133), INT32_C( 1483333608), -INT32_C( 1412321710), INT32_C( 1353474963), -INT32_C( 73356365), INT32_C( 1041752807), INT32_C( 1486730666), INT32_C( 326963390) }, { INT32_C( 690777153), -INT32_C( 1652451765), -INT32_C( 683060668), INT32_C( 1747449269) }, { INT32_C( 690777153), -INT32_C( 1652451765), -INT32_C( 683060668), INT32_C( 1747449269), -INT32_C( 73356365), INT32_C( 1041752807), INT32_C( 1486730666), INT32_C( 326963390) }, { INT32_C( 435106133), INT32_C( 1483333608), -INT32_C( 1412321710), INT32_C( 1353474963), INT32_C( 690777153), -INT32_C( 1652451765), -INT32_C( 683060668), INT32_C( 1747449269) } }, { { -INT32_C( 2023503968), INT32_C( 1422228394), -INT32_C( 290692304), -INT32_C( 1274992013), -INT32_C( 488821098), INT32_C( 142565060), INT32_C( 1725941937), INT32_C( 1573849021) }, { INT32_C( 2028286670), -INT32_C( 556946771), -INT32_C( 2134083315), INT32_C( 976539299) }, { INT32_C( 2028286670), -INT32_C( 556946771), -INT32_C( 2134083315), INT32_C( 976539299), -INT32_C( 488821098), INT32_C( 142565060), INT32_C( 1725941937), INT32_C( 1573849021) }, { -INT32_C( 2023503968), INT32_C( 1422228394), -INT32_C( 290692304), -INT32_C( 1274992013), INT32_C( 2028286670), -INT32_C( 556946771), -INT32_C( 2134083315), INT32_C( 976539299) } }, { { -INT32_C( 1071902212), INT32_C( 566795120), INT32_C( 562538340), INT32_C( 2105497262), INT32_C( 922051721), INT32_C( 471122446), -INT32_C( 543366852), -INT32_C( 1424371281) }, { INT32_C( 1382757858), INT32_C( 896742353), -INT32_C( 1990788134), -INT32_C( 637086383) }, { INT32_C( 1382757858), INT32_C( 896742353), -INT32_C( 1990788134), -INT32_C( 637086383), INT32_C( 922051721), INT32_C( 471122446), -INT32_C( 543366852), -INT32_C( 1424371281) }, { -INT32_C( 1071902212), INT32_C( 566795120), INT32_C( 562538340), INT32_C( 2105497262), INT32_C( 1382757858), INT32_C( 896742353), -INT32_C( 1990788134), -INT32_C( 637086383) } }, { { INT32_C( 1209137977), -INT32_C( 94100034), -INT32_C( 1244069882), -INT32_C( 1268714543), -INT32_C( 116995288), -INT32_C( 651265282), -INT32_C( 966621835), -INT32_C( 1801361318) }, { INT32_C( 568111715), -INT32_C( 568639273), INT32_C( 311686464), INT32_C( 281474280) }, { INT32_C( 568111715), -INT32_C( 568639273), INT32_C( 311686464), INT32_C( 281474280), -INT32_C( 116995288), -INT32_C( 651265282), -INT32_C( 966621835), -INT32_C( 1801361318) }, { INT32_C( 1209137977), -INT32_C( 94100034), -INT32_C( 1244069882), -INT32_C( 1268714543), INT32_C( 568111715), -INT32_C( 568639273), INT32_C( 311686464), INT32_C( 281474280) } }, { { -INT32_C( 1106588481), -INT32_C( 1147717562), INT32_C( 411236797), -INT32_C( 995351711), -INT32_C( 1394177835), INT32_C( 143262152), -INT32_C( 568713482), -INT32_C( 772808686) }, { -INT32_C( 208668243), -INT32_C( 273734095), -INT32_C( 2147012321), INT32_C( 692433748) }, { -INT32_C( 208668243), -INT32_C( 273734095), -INT32_C( 2147012321), INT32_C( 692433748), -INT32_C( 1394177835), INT32_C( 143262152), -INT32_C( 568713482), -INT32_C( 772808686) }, { -INT32_C( 1106588481), -INT32_C( 1147717562), INT32_C( 411236797), -INT32_C( 995351711), -INT32_C( 208668243), -INT32_C( 273734095), -INT32_C( 2147012321), INT32_C( 692433748) } }, { { INT32_C( 64301883), INT32_C( 587948076), -INT32_C( 1878972802), -INT32_C( 1285427194), INT32_C( 463991273), INT32_C( 923424279), -INT32_C( 608759417), -INT32_C( 16450364) }, { INT32_C( 1409472807), -INT32_C( 1216934599), INT32_C( 977762355), INT32_C( 1391307113) }, { INT32_C( 1409472807), -INT32_C( 1216934599), INT32_C( 977762355), INT32_C( 1391307113), INT32_C( 463991273), INT32_C( 923424279), -INT32_C( 608759417), -INT32_C( 16450364) }, { INT32_C( 64301883), INT32_C( 587948076), -INT32_C( 1878972802), -INT32_C( 1285427194), INT32_C( 1409472807), -INT32_C( 1216934599), INT32_C( 977762355), INT32_C( 1391307113) } }, { { -INT32_C( 1318218598), INT32_C( 1911060458), INT32_C( 1280090248), -INT32_C( 1001697124), INT32_C( 1662537002), -INT32_C( 1910796453), INT32_C( 1892180487), -INT32_C( 1513900533) }, { INT32_C( 894906442), INT32_C( 816201640), INT32_C( 2088629215), INT32_C( 1832962115) }, { INT32_C( 894906442), INT32_C( 816201640), INT32_C( 2088629215), INT32_C( 1832962115), INT32_C( 1662537002), -INT32_C( 1910796453), INT32_C( 1892180487), -INT32_C( 1513900533) }, { -INT32_C( 1318218598), INT32_C( 1911060458), INT32_C( 1280090248), -INT32_C( 1001697124), INT32_C( 894906442), INT32_C( 816201640), INT32_C( 2088629215), INT32_C( 1832962115) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi32(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi32(test_vec[i].b); simde__m256i ra = simde_mm256_inserti128_si256(a, b, 0); simde__m256i rb = simde_mm256_inserti128_si256(a, b, 1); simde_test_x86_assert_equal_i32x8(ra, simde_x_mm256_loadu_epi32(test_vec[i].ra)); simde_test_x86_assert_equal_i32x8(rb, simde_x_mm256_loadu_epi32(test_vec[i].rb)); } return 0; } static int test_simde_mm256_madd_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi16(INT16_C(-22074), INT16_C( 27892), INT16_C(-27402), INT16_C( -5185), INT16_C(-13617), INT16_C( 6733), INT16_C( 31914), INT16_C( 16627), INT16_C( 14296), INT16_C( 527), INT16_C(-18797), INT16_C( 25549), INT16_C( 30759), INT16_C(-12360), INT16_C( 6414), INT16_C( 21507)), simde_mm256_set_epi16(INT16_C( 13087), INT16_C( 552), INT16_C(-32388), INT16_C( 21398), INT16_C( -2724), INT16_C( 12527), INT16_C( 4881), INT16_C( -1411), INT16_C( 4971), INT16_C( -716), INT16_C(-15438), INT16_C( 22737), INT16_C( 22649), INT16_C( 14157), INT16_C( 16131), INT16_C( 25742)), simde_mm256_set_epi32(INT32_C( -273486054), INT32_C( 776547346), INT32_C( 121436999), INT32_C( 132311537), INT32_C( 70688084), INT32_C( 871095699), INT32_C( 521680071), INT32_C( 657097428)) }, { simde_mm256_set_epi16(INT16_C( 31591), INT16_C( 27463), INT16_C(-11448), INT16_C( 12754), INT16_C( 23360), INT16_C( 24732), INT16_C( 21501), INT16_C( 9477), INT16_C(-29628), INT16_C( 15818), INT16_C( 22405), INT16_C(-17463), INT16_C( -4252), INT16_C( 1744), INT16_C( 29107), INT16_C( -236)), simde_mm256_set_epi16(INT16_C( 17376), INT16_C(-10713), INT16_C( 2486), INT16_C( 1967), INT16_C( 3397), INT16_C( 25143), INT16_C( 11210), INT16_C( -8470), INT16_C( -7431), INT16_C(-24781), INT16_C( 28646), INT16_C(-15461), INT16_C( -7439), INT16_C(-17272), INT16_C(-12699), INT16_C( 15557)), simde_mm256_set_epi32(INT32_C( 254714097), INT32_C( -3372610), INT32_C( 701190596), INT32_C( 160756020), INT32_C( -171820190), INT32_C( 911809073), INT32_C( 1508260), INT32_C( -373301245)) }, { simde_mm256_set_epi16(INT16_C(-23018), INT16_C( 27443), INT16_C( -9704), INT16_C( 6815), INT16_C(-10742), INT16_C(-15199), INT16_C(-25926), INT16_C( 11907), INT16_C(-11173), INT16_C( -1296), INT16_C(-31401), INT16_C(-11984), INT16_C( 29153), INT16_C( 26221), INT16_C( 23204), INT16_C( 24693)), simde_mm256_set_epi16(INT16_C( 22883), INT16_C( -1430), INT16_C(-22444), INT16_C( -4173), INT16_C( -9271), INT16_C(-22344), INT16_C(-19214), INT16_C(-15237), INT16_C( 8913), INT16_C(-31636), INT16_C(-32308), INT16_C( 15943), INT16_C( 4837), INT16_C( -6600), INT16_C(-14527), INT16_C(-29993)), simde_mm256_set_epi32(INT32_C( -565964384), INT32_C( 189357581), INT32_C( 439195538), INT32_C( 316715205), INT32_C( -58584693), INT32_C( 823442596), INT32_C( -32045539), INT32_C(-1077701657)) }, { simde_mm256_set_epi16(INT16_C( -2513), INT16_C( -1446), INT16_C( 20507), INT16_C(-25668), INT16_C( 32595), INT16_C( 1090), INT16_C( 8204), INT16_C( 9120), INT16_C( -8133), INT16_C( 31849), INT16_C(-18457), INT16_C(-12347), INT16_C( 18795), INT16_C( -8246), INT16_C( 23278), INT16_C(-14987)), simde_mm256_set_epi16(INT16_C( 12529), INT16_C(-11077), INT16_C( 5410), INT16_C(-23993), INT16_C(-26377), INT16_C( -6112), INT16_C(-21857), INT16_C( 3969), INT16_C( -2477), INT16_C(-13689), INT16_C( 21824), INT16_C( -644), INT16_C( 10631), INT16_C(-12974), INT16_C(-28564), INT16_C( 32352)), simde_mm256_set_epi32(INT32_C( -15468035), INT32_C( 726795194), INT32_C( -866420395), INT32_C( -143117548), INT32_C( -415835520), INT32_C( -394854100), INT32_C( 306793249), INT32_C(-1149772216)) }, { simde_mm256_set_epi16(INT16_C( 7571), INT16_C( 21562), INT16_C( 24839), INT16_C( 27056), INT16_C(-18448), INT16_C( 29209), INT16_C( 1880), INT16_C(-21767), INT16_C( 26198), INT16_C(-31641), INT16_C( 3244), INT16_C(-13098), INT16_C( -4443), INT16_C( -521), INT16_C(-27791), INT16_C(-13063)), simde_mm256_set_epi16(INT16_C(-17648), INT16_C( 8337), INT16_C( -3551), INT16_C(-28013), INT16_C(-17930), INT16_C( 577), INT16_C( 32382), INT16_C( 20122), INT16_C(-14435), INT16_C(-21581), INT16_C(-32759), INT16_C( 2792), INT16_C(-24268), INT16_C( 11663), INT16_C( -1946), INT16_C( 19139)), simde_mm256_set_epi32(INT32_C( 46149386), INT32_C( -846123017), INT32_C( 347626233), INT32_C( -377117414), INT32_C( 304676291), INT32_C( -142839812), INT32_C( 101746301), INT32_C( -195931471)) }, { simde_mm256_set_epi16(INT16_C(-11993), INT16_C( -3203), INT16_C( 15681), INT16_C(-18383), INT16_C( 16847), INT16_C( 2437), INT16_C( -8441), INT16_C( 14338), INT16_C( 7300), INT16_C(-21082), INT16_C(-17580), INT16_C( 1429), INT16_C(-32388), INT16_C(-10418), INT16_C(-19218), INT16_C(-31595)), simde_mm256_set_epi16(INT16_C(-24077), INT16_C(-32143), INT16_C( 18864), INT16_C( 11719), INT16_C( -5555), INT16_C( -6220), INT16_C(-30370), INT16_C( 23594), INT16_C( 29026), INT16_C( 10950), INT16_C( 31566), INT16_C(-28277), INT16_C( 28777), INT16_C( 17653), INT16_C(-21542), INT16_C( 7263)), simde_mm256_set_epi32(INT32_C( 391709490), INT32_C( 80376007), INT32_C( -108743225), INT32_C( 594643942), INT32_C( -18958100), INT32_C( -595338113), INT32_C(-1115938430), INT32_C( 184519671)) }, { simde_mm256_set_epi16(INT16_C(-31708), INT16_C( -6736), INT16_C(-12543), INT16_C(-19831), INT16_C( -5660), INT16_C(-20189), INT16_C( 31844), INT16_C( 20882), INT16_C(-22159), INT16_C( -7550), INT16_C( 8622), INT16_C(-20427), INT16_C(-19784), INT16_C( 4739), INT16_C( -9632), INT16_C( 3416)), simde_mm256_set_epi16(INT16_C( 18684), INT16_C( 4347), INT16_C( 18074), INT16_C(-24539), INT16_C( 14613), INT16_C(-28009), INT16_C( 18528), INT16_C(-16867), INT16_C( 1794), INT16_C(-22885), INT16_C( 27546), INT16_C( 25964), INT16_C( -3488), INT16_C(-31053), INT16_C( 6969), INT16_C(-12364)), simde_mm256_set_epi32(INT32_C( -621713664), INT32_C( 259930727), INT32_C( 482764121), INT32_C( 237788938), INT32_C( 133028504), INT32_C( -292865016), INT32_C( -78153575), INT32_C( -109360832)) }, { simde_mm256_set_epi16(INT16_C( 32611), INT16_C(-26810), INT16_C( 32585), INT16_C( 6944), INT16_C( 26033), INT16_C( 2406), INT16_C(-32105), INT16_C(-11091), INT16_C(-32475), INT16_C(-29532), INT16_C( 13591), INT16_C(-15433), INT16_C( 29089), INT16_C(-24035), INT16_C( 15832), INT16_C(-10008)), simde_mm256_set_epi16(INT16_C( 16370), INT16_C( -9733), INT16_C( -7456), INT16_C( -2346), INT16_C( -1285), INT16_C( -5955), INT16_C(-24955), INT16_C( -4486), INT16_C( 4170), INT16_C(-17666), INT16_C( 7986), INT16_C( 19848), INT16_C(-13940), INT16_C( 24511), INT16_C( 18142), INT16_C(-26201)), simde_mm256_set_epi32(INT32_C( 794783800), INT32_C( -259244384), INT32_C( -47780135), INT32_C( 850934501), INT32_C( 386291562), INT32_C( -197776458), INT32_C( -994622545), INT32_C( 549443752)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_madd_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_maddubs_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const uint8_t a[32]; const int8_t b[32]; const int16_t r[16]; } test_vec[] = { { { UINT8_C(109), UINT8_C(252), UINT8_C( 25), UINT8_C(124), UINT8_C(145), UINT8_C(132), UINT8_C( 41), UINT8_C(134), UINT8_C( 24), UINT8_C( 84), UINT8_C(233), UINT8_C(164), UINT8_C(189), UINT8_C( 12), UINT8_C(171), UINT8_C( 39), UINT8_C(233), UINT8_C(115), UINT8_C(157), UINT8_C(228), UINT8_C(143), UINT8_C(175), UINT8_C(252), UINT8_C( 18), UINT8_C(160), UINT8_C( 8), UINT8_C(174), UINT8_C(181), UINT8_C(235), UINT8_C( 97), UINT8_C(108), UINT8_C( 88) }, { INT8_C( 94), -INT8_C( 123), -INT8_C( 44), -INT8_C( 17), INT8_C( 10), -INT8_C( 3), INT8_C( 117), INT8_C( 34), INT8_C( 81), INT8_C( 94), -INT8_C( 58), INT8_C( 15), INT8_C( 107), INT8_C( 113), INT8_C( 54), INT8_C( 84), -INT8_C( 27), -INT8_C( 44), INT8_C( 56), INT8_C( 116), -INT8_C( 125), INT8_C( 52), -INT8_C( 122), INT8_C( 36), INT8_C( 60), INT8_C( 53), -INT8_C( 39), INT8_C( 39), -INT8_C( 106), INT8_C( 70), INT8_MAX, -INT8_C( 12) }, { -INT16_C( 20750), -INT16_C( 3208), INT16_C( 1054), INT16_C( 9353), INT16_C( 9840), -INT16_C( 11054), INT16_C( 21579), INT16_C( 12510), -INT16_C( 11351), INT16_MAX, -INT16_C( 8775), -INT16_C( 30096), INT16_C( 10024), INT16_C( 273), -INT16_C( 18120), INT16_C( 12660) } }, { { UINT8_C(203), UINT8_C( 83), UINT8_C(228), UINT8_C(213), UINT8_C( 80), UINT8_C( 89), UINT8_C(247), UINT8_C(162), UINT8_C(184), UINT8_C(190), UINT8_C(177), UINT8_C( 35), UINT8_C( 47), UINT8_C(231), UINT8_C(119), UINT8_C( 20), UINT8_C(187), UINT8_C(176), UINT8_C(137), UINT8_C( 63), UINT8_C(228), UINT8_C( 15), UINT8_C( 99), UINT8_C( 33), UINT8_C( 68), UINT8_C( 60), UINT8_C( 72), UINT8_C(219), UINT8_C(130), UINT8_C(200), UINT8_C(207), UINT8_C( 78) }, { INT8_C( 27), -INT8_C( 77), INT8_C( 35), INT8_C( 108), INT8_C( 13), INT8_C( 27), INT8_C( 14), -INT8_C( 59), -INT8_C( 39), -INT8_C( 65), -INT8_C( 24), INT8_C( 8), -INT8_C( 90), INT8_C( 95), INT8_C( 29), INT8_C( 98), INT8_C( 15), -INT8_C( 90), -INT8_C( 95), -INT8_C( 12), -INT8_C( 75), INT8_C( 4), INT8_C( 21), -INT8_C( 6), INT8_C( 64), INT8_C( 93), -INT8_C( 43), -INT8_C( 61), INT8_C( 37), -INT8_C( 92), INT8_C( 17), INT8_C( 65) }, { -INT16_C( 910), INT16_C( 30984), INT16_C( 3443), -INT16_C( 6100), -INT16_C( 19526), -INT16_C( 3968), INT16_C( 17715), INT16_C( 5411), -INT16_C( 13035), -INT16_C( 13771), -INT16_C( 17040), INT16_C( 1881), INT16_C( 9932), -INT16_C( 16455), -INT16_C( 13590), INT16_C( 8589) } }, { { UINT8_C( 88), UINT8_C( 52), UINT8_C(173), UINT8_C(101), UINT8_C( 79), UINT8_C(187), UINT8_C( 42), UINT8_C( 40), UINT8_C(122), UINT8_C( 18), UINT8_C( 49), UINT8_C( 32), UINT8_C(113), UINT8_C( 78), UINT8_C(130), UINT8_C(129), UINT8_C(244), UINT8_C( 35), UINT8_C(117), UINT8_C(169), UINT8_C( 39), UINT8_C(138), UINT8_C(163), UINT8_C(104), UINT8_C(231), UINT8_C(120), UINT8_C( 43), UINT8_C( 13), UINT8_C( 29), UINT8_C( 60), UINT8_C( 78), UINT8_C(117) }, { INT8_C( 112), -INT8_C( 5), -INT8_C( 38), -INT8_C( 64), -INT8_C( 74), INT8_C( 4), -INT8_C( 24), INT8_C( 48), INT8_C( 22), INT8_C( 25), INT8_C( 80), -INT8_C( 121), INT8_C( 103), -INT8_C( 45), INT8_C( 8), INT8_C( 91), -INT8_C( 10), INT8_C( 125), INT8_C( 5), INT8_C( 30), INT8_C( 7), -INT8_C( 88), -INT8_C( 122), -INT8_C( 17), INT8_C( 33), -INT8_C( 79), -INT8_C( 4), INT8_C( 62), -INT8_C( 19), INT8_C( 74), -INT8_C( 77), INT8_C( 93) }, { INT16_C( 9596), -INT16_C( 13038), -INT16_C( 5098), INT16_C( 912), INT16_C( 3134), INT16_C( 48), INT16_C( 8129), INT16_C( 12779), INT16_C( 1935), INT16_C( 5655), -INT16_C( 11871), -INT16_C( 21654), -INT16_C( 1857), INT16_C( 634), INT16_C( 3889), INT16_C( 4875) } }, { { UINT8_C( 69), UINT8_C(141), UINT8_C( 29), UINT8_C(251), UINT8_C(145), UINT8_C( 6), UINT8_C( 43), UINT8_C(167), UINT8_C( 31), UINT8_C(123), UINT8_C( 46), UINT8_C(135), UINT8_C( 78), UINT8_C( 55), UINT8_C(226), UINT8_C( 69), UINT8_C(180), UINT8_C(231), UINT8_C( 99), UINT8_C(188), UINT8_C(144), UINT8_C(233), UINT8_C(171), UINT8_C(177), UINT8_C(154), UINT8_C(167), UINT8_C(239), UINT8_C(135), UINT8_C(241), UINT8_C(162), UINT8_C(228), UINT8_C( 54) }, { INT8_C( 47), INT8_C( 2), INT8_C( 49), -INT8_C( 64), INT8_C( 8), INT8_C( 92), INT8_C( 103), INT8_C( 39), -INT8_C( 41), -INT8_C( 107), -INT8_C( 82), INT8_C( 38), -INT8_C( 52), -INT8_C( 111), INT8_C( 107), -INT8_C( 127), INT8_C( 120), -INT8_C( 50), INT8_C( 61), INT8_C( 8), -INT8_C( 73), -INT8_C( 24), -INT8_C( 71), INT8_C( 81), -INT8_C( 113), -INT8_C( 88), -INT8_C( 40), INT8_MIN, INT8_C( 74), -INT8_C( 68), -INT8_C( 74), INT8_C( 121) }, { INT16_C( 3525), -INT16_C( 14643), INT16_C( 1712), INT16_C( 10942), -INT16_C( 14432), INT16_C( 1358), -INT16_C( 10161), INT16_C( 15419), INT16_C( 10050), INT16_C( 7543), -INT16_C( 16104), INT16_C( 2196), -INT16_C( 32098), -INT16_C( 26840), INT16_C( 6818), -INT16_C( 10338) } }, { { UINT8_C(190), UINT8_C(231), UINT8_C( 57), UINT8_C(198), UINT8_C( 67), UINT8_C(160), UINT8_C(238), UINT8_C( 26), UINT8_C( 54), UINT8_C(156), UINT8_C( 64), UINT8_C( 2), UINT8_C( 45), UINT8_C(171), UINT8_C(131), UINT8_C(166), UINT8_C(121), UINT8_C(192), UINT8_C(174), UINT8_C( 48), UINT8_C(168), UINT8_C(104), UINT8_C(129), UINT8_C( 55), UINT8_C( 16), UINT8_C( 89), UINT8_C(183), UINT8_C( 91), UINT8_C( 22), UINT8_C(109), UINT8_C(212), UINT8_C(212) }, { INT8_C( 84), INT8_C( 14), -INT8_C( 101), -INT8_C( 105), -INT8_C( 82), -INT8_C( 119), -INT8_C( 78), -INT8_C( 28), INT8_C( 37), -INT8_C( 14), -INT8_C( 25), INT8_C( 83), -INT8_C( 98), INT8_C( 106), -INT8_C( 7), INT8_C( 23), INT8_C( 43), -INT8_C( 89), INT8_C( 72), -INT8_C( 45), INT8_C( 15), -INT8_C( 55), INT8_C( 11), INT8_C( 32), INT8_C( 35), -INT8_C( 62), INT8_C( 123), INT8_C( 57), INT8_C( 48), INT8_C( 79), INT8_C( 13), -INT8_C( 124) }, { INT16_C( 19194), -INT16_C( 26547), -INT16_C( 24534), -INT16_C( 19292), -INT16_C( 186), -INT16_C( 1434), INT16_C( 13716), INT16_C( 2901), -INT16_C( 11885), INT16_C( 10368), -INT16_C( 3200), INT16_C( 3179), -INT16_C( 4958), INT16_C( 27696), INT16_C( 9667), -INT16_C( 23532) } }, { { UINT8_C( 93), UINT8_C(168), UINT8_C( 28), UINT8_C( 12), UINT8_C( 49), UINT8_C(206), UINT8_C(240), UINT8_C( 87), UINT8_C(192), UINT8_C(215), UINT8_C(170), UINT8_C( 94), UINT8_C( 66), UINT8_C(163), UINT8_C(118), UINT8_C(109), UINT8_C( 74), UINT8_C(190), UINT8_C( 64), UINT8_C( 90), UINT8_C(135), UINT8_C( 75), UINT8_C(122), UINT8_C(170), UINT8_C( 14), UINT8_C(245), UINT8_C(227), UINT8_C( 62), UINT8_C( 68), UINT8_C(241), UINT8_C(194), UINT8_C(162) }, { -INT8_C( 103), -INT8_C( 34), -INT8_C( 82), -INT8_C( 53), -INT8_C( 84), -INT8_C( 98), INT8_C( 34), INT8_C( 109), INT8_C( 118), -INT8_C( 52), -INT8_C( 53), -INT8_C( 72), INT8_C( 111), INT8_C( 65), INT8_C( 37), -INT8_C( 71), -INT8_C( 1), INT8_C( 101), INT8_C( 19), -INT8_C( 121), -INT8_C( 79), -INT8_C( 115), INT8_C( 49), -INT8_C( 65), -INT8_C( 126), INT8_C( 21), -INT8_C( 3), -INT8_C( 57), INT8_C( 6), -INT8_C( 65), INT8_C( 105), -INT8_C( 97) }, { -INT16_C( 15291), -INT16_C( 2932), -INT16_C( 24304), INT16_C( 17643), INT16_C( 11476), -INT16_C( 15778), INT16_C( 17921), -INT16_C( 3373), INT16_C( 19116), -INT16_C( 9674), -INT16_C( 19290), -INT16_C( 5072), INT16_C( 3381), -INT16_C( 4215), -INT16_C( 15257), INT16_C( 4656) } }, { { UINT8_C(158), UINT8_C( 23), UINT8_C(106), UINT8_C( 74), UINT8_C(181), UINT8_C(140), UINT8_C(183), UINT8_C( 43), UINT8_C( 88), UINT8_C(131), UINT8_C(227), UINT8_C(199), UINT8_C(196), UINT8_C( 8), UINT8_C(129), UINT8_C(196), UINT8_C(110), UINT8_C(148), UINT8_C( 75), UINT8_C( 31), UINT8_C( 34), UINT8_C(124), UINT8_C(222), UINT8_C(164), UINT8_C(145), UINT8_C(219), UINT8_C(107), UINT8_C(151), UINT8_C(154), UINT8_C(212), UINT8_C( 55), UINT8_C( 56) }, { -INT8_C( 21), -INT8_C( 95), -INT8_C( 125), -INT8_C( 95), INT8_C( 46), INT8_C( 58), -INT8_C( 52), -INT8_C( 122), -INT8_C( 67), -INT8_C( 80), INT8_C( 78), -INT8_C( 126), -INT8_C( 72), -INT8_C( 49), INT8_C( 70), INT8_C( 38), INT8_C( 99), -INT8_C( 111), INT8_C( 69), -INT8_C( 123), INT8_C( 13), INT8_C( 35), INT8_C( 42), -INT8_C( 97), -INT8_C( 2), -INT8_C( 107), INT8_C( 54), -INT8_C( 103), INT8_C( 106), INT8_C( 109), -INT8_C( 47), INT8_C( 85) }, { -INT16_C( 5503), -INT16_C( 20280), INT16_C( 16446), -INT16_C( 14762), -INT16_C( 16376), -INT16_C( 7368), -INT16_C( 14504), INT16_C( 16478), -INT16_C( 5538), INT16_C( 1362), INT16_C( 4782), -INT16_C( 6584), -INT16_C( 23723), -INT16_C( 9775), INT16_MAX, INT16_C( 2175) } }, { { UINT8_C( 15), UINT8_C( 84), UINT8_C(246), UINT8_C( 61), UINT8_C(143), UINT8_C(195), UINT8_C(195), UINT8_C( 76), UINT8_C(115), UINT8_C( 17), UINT8_C(206), UINT8_C( 43), UINT8_C(224), UINT8_C( 20), UINT8_C( 82), UINT8_C( 68), UINT8_C(165), UINT8_C(151), UINT8_C(201), UINT8_C(179), UINT8_C(187), UINT8_C(243), UINT8_C( 82), UINT8_C(185), UINT8_C(137), UINT8_C(136), UINT8_C( 82), UINT8_C(243), UINT8_C(246), UINT8_C( 36), UINT8_C( 72), UINT8_C( 5) }, { INT8_C( 120), INT8_C( 63), INT8_C( 66), INT8_C( 7), INT8_C( 2), INT8_C( 5), INT8_C( 84), INT8_C( 117), INT8_C( 23), INT8_C( 34), -INT8_C( 96), -INT8_C( 9), INT8_C( 55), -INT8_C( 14), INT8_C( 59), -INT8_C( 36), -INT8_C( 118), INT8_C( 5), -INT8_C( 113), INT8_C( 69), -INT8_C( 8), -INT8_C( 31), -INT8_C( 2), -INT8_C( 127), INT8_C( 106), INT8_C( 81), INT8_C( 116), INT8_C( 96), INT8_C( 117), -INT8_C( 67), INT8_C( 101), -INT8_C( 19) }, { INT16_C( 7092), INT16_C( 16663), INT16_C( 1261), INT16_C( 25272), INT16_C( 3223), -INT16_C( 20163), INT16_C( 12040), INT16_C( 2390), -INT16_C( 18715), -INT16_C( 10362), -INT16_C( 9029), -INT16_C( 23659), INT16_C( 25538), INT16_MAX, INT16_C( 26370), INT16_C( 7177) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi16(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi16(test_vec[i].b); simde__m256i r = simde_mm256_maddubs_epi16(a, b); simde_test_x86_assert_equal_i16x16(r, simde_x_mm256_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm_maskload_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[4]; const int32_t b[4]; const int32_t r[4]; } test_vec[] = { { { INT32_C( 534968926), INT32_C( 77452639), -INT32_C( 2020739113), -INT32_C( 154957017) }, { INT32_C( 730249678), -INT32_C( 1555222833), INT32_C( 1430166726), INT32_C( 515979712) }, { INT32_C( 0), INT32_C( 77452639), INT32_C( 0), INT32_C( 0) } }, { { -INT32_C( 1824611532), INT32_C( 1335417720), -INT32_C( 103406126), INT32_C( 2129697456) }, { INT32_C( 581531219), INT32_C( 1808201381), INT32_C( 1405158547), INT32_C( 1920107070) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { { -INT32_C( 1643794395), INT32_C( 1575853451), INT32_C( 1918289090), -INT32_C( 1309653410) }, { INT32_C( 1658034877), INT32_C( 600676752), -INT32_C( 596144482), INT32_C( 911141136) }, { INT32_C( 0), INT32_C( 0), INT32_C( 1918289090), INT32_C( 0) } }, { { INT32_C( 617894809), -INT32_C( 1283341839), -INT32_C( 483993723), -INT32_C( 611052002) }, { INT32_C( 1094543536), -INT32_C( 1620833791), -INT32_C( 1451500647), INT32_C( 1574947268) }, { INT32_C( 0), -INT32_C( 1283341839), -INT32_C( 483993723), INT32_C( 0) } }, { { INT32_C( 243381021), -INT32_C( 88014219), -INT32_C( 119609382), -INT32_C( 1361874178) }, { -INT32_C( 588312358), -INT32_C( 1283763174), -INT32_C( 195168465), -INT32_C( 581878592) }, { INT32_C( 243381021), -INT32_C( 88014219), -INT32_C( 119609382), -INT32_C( 1361874178) } }, { { INT32_C( 1709954032), -INT32_C( 1352684075), -INT32_C( 1834533484), -INT32_C( 1975420240) }, { -INT32_C( 1520029558), -INT32_C( 1269243260), -INT32_C( 1716996647), -INT32_C( 495453710) }, { INT32_C( 1709954032), -INT32_C( 1352684075), -INT32_C( 1834533484), -INT32_C( 1975420240) } }, { { -INT32_C( 1572379956), -INT32_C( 1538152945), -INT32_C( 1808336412), -INT32_C( 31557773) }, { INT32_C( 748914088), INT32_C( 1088486247), -INT32_C( 1546024783), INT32_C( 1317359746) }, { INT32_C( 0), INT32_C( 0), -INT32_C( 1808336412), INT32_C( 0) } }, { { -INT32_C( 1024406349), INT32_C( 1449542258), -INT32_C( 1360356037), -INT32_C( 1129576172) }, { -INT32_C( 169259122), -INT32_C( 63583925), -INT32_C( 727773614), INT32_C( 304227423) }, { -INT32_C( 1024406349), INT32_C( 1449542258), -INT32_C( 1360356037), INT32_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i b = simde_x_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_maskload_epi32(test_vec[i].a, b); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm256_maskload_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[8]; const int32_t b[8]; const int32_t r[8]; } test_vec[] = { { { -INT32_C( 678062514), INT32_C( 351869071), INT32_C( 2136335632), -INT32_C( 212796243), INT32_C( 1333699519), -INT32_C( 1581125468), INT32_C( 304063629), INT32_C( 630727383) }, { INT32_C( 1207709368), INT32_C( 1448932678), -INT32_C( 1999261221), INT32_C( 1836852910), INT32_C( 1841167049), INT32_C( 1997438698), -INT32_C( 108450526), INT32_C( 119415375) }, { INT32_C( 0), INT32_C( 0), INT32_C( 2136335632), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 304063629), INT32_C( 0) } }, { { -INT32_C( 1773266096), -INT32_C( 336811504), INT32_C( 175423836), -INT32_C( 1334316825), -INT32_C( 719506198), -INT32_C( 716428366), -INT32_C( 1479616936), INT32_C( 1219423736) }, { INT32_C( 417266952), INT32_C( 50645671), INT32_C( 1930262667), INT32_C( 1378059624), INT32_C( 1814512057), -INT32_C( 985566100), INT32_C( 1114378058), INT32_C( 76159996) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 716428366), INT32_C( 0), INT32_C( 0) } }, { { -INT32_C( 1088591848), -INT32_C( 1111285454), INT32_C( 19976345), INT32_C( 257119317), INT32_C( 24869781), INT32_C( 969325807), -INT32_C( 931449909), INT32_C( 1724646734) }, { -INT32_C( 1624905363), -INT32_C( 1554192374), INT32_C( 245665209), INT32_C( 1981675745), INT32_C( 1652004979), INT32_C( 547044948), -INT32_C( 1075308943), -INT32_C( 2010794981) }, { -INT32_C( 1088591848), -INT32_C( 1111285454), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 931449909), INT32_C( 1724646734) } }, { { -INT32_C( 1473820002), -INT32_C( 330529997), -INT32_C( 235212784), INT32_C( 1533483240), INT32_C( 96329648), -INT32_C( 1910155235), -INT32_C( 1991438994), INT32_C( 1594979009) }, { -INT32_C( 284673860), -INT32_C( 858041156), INT32_C( 767481412), -INT32_C( 1635244562), INT32_C( 564348420), INT32_C( 229623966), -INT32_C( 1751712555), INT32_C( 737585262) }, { -INT32_C( 1473820002), -INT32_C( 330529997), INT32_C( 0), INT32_C( 1533483240), INT32_C( 0), INT32_C( 0), -INT32_C( 1991438994), INT32_C( 0) } }, { { -INT32_C( 1675952416), -INT32_C( 1754663342), -INT32_C( 1161549876), INT32_C( 1364741196), INT32_C( 829619346), -INT32_C( 1707203900), -INT32_C( 1926114274), INT32_C( 1572349820) }, { INT32_C( 2029638182), -INT32_C( 1810931000), -INT32_C( 699477111), -INT32_C( 1306024160), INT32_C( 1742969251), -INT32_C( 637460037), INT32_C( 1919365878), -INT32_C( 2133909670) }, { INT32_C( 0), -INT32_C( 1754663342), -INT32_C( 1161549876), INT32_C( 1364741196), INT32_C( 0), -INT32_C( 1707203900), INT32_C( 0), INT32_C( 1572349820) } }, { { -INT32_C( 1158100495), -INT32_C( 1253177301), -INT32_C( 74736165), -INT32_C( 407981500), INT32_C( 122589515), -INT32_C( 1444851533), -INT32_C( 602191742), INT32_C( 1482484583) }, { -INT32_C( 552446540), INT32_C( 949248349), INT32_C( 1110712318), INT32_C( 489284305), INT32_C( 639924083), INT32_C( 1255081415), -INT32_C( 1272517811), -INT32_C( 1978891306) }, { -INT32_C( 1158100495), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 602191742), INT32_C( 1482484583) } }, { { INT32_C( 896081880), INT32_C( 2121137792), -INT32_C( 272588515), -INT32_C( 150148733), INT32_C( 673001568), -INT32_C( 2106397387), -INT32_C( 1372153640), -INT32_C( 214416869) }, { -INT32_C( 517430943), -INT32_C( 1117808993), -INT32_C( 1146347721), INT32_C( 1773320200), INT32_C( 496095208), -INT32_C( 1801518148), -INT32_C( 1220356709), INT32_C( 2041215511) }, { INT32_C( 896081880), INT32_C( 2121137792), -INT32_C( 272588515), INT32_C( 0), INT32_C( 0), -INT32_C( 2106397387), -INT32_C( 1372153640), INT32_C( 0) } }, { { -INT32_C( 1151675620), -INT32_C( 1602700695), -INT32_C( 497343271), -INT32_C( 1001714212), -INT32_C( 1713251107), INT32_C( 2049867999), INT32_C( 1831956565), INT32_C( 132570347) }, { INT32_C( 415383727), -INT32_C( 726123526), INT32_C( 1018565727), -INT32_C( 33554143), -INT32_C( 1130831139), -INT32_C( 1204369822), INT32_C( 556099638), -INT32_C( 215479484) }, { INT32_C( 0), -INT32_C( 1602700695), INT32_C( 0), -INT32_C( 1001714212), -INT32_C( 1713251107), INT32_C( 2049867999), INT32_C( 0), INT32_C( 132570347) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i b = simde_x_mm256_loadu_epi32(test_vec[i].b); simde__m256i r = simde_mm256_maskload_epi32(test_vec[i].a, b); simde_test_x86_assert_equal_i32x8(r, simde_x_mm256_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm_maskload_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t a[2]; const int64_t b[2]; const int64_t r[2]; } test_vec[] = { { { INT64_C( 7349151601351223182), INT64_C( 2152075836815026901) }, { -INT64_C( 3451509024372858266), INT64_C( 7639172437864636040) }, { INT64_C( 7349151601351223182), INT64_C( 0) } }, { { INT64_C( 5694817644399627057), INT64_C( 3631998945759467448) }, { -INT64_C( 1550766173522912299), INT64_C( 6212325517381229296) }, { INT64_C( 5694817644399627057), INT64_C( 0) } }, { { INT64_C( 1040332979076417448), INT64_C( 5698954012533458456) }, { INT64_C( 1415945725891154011), -INT64_C( 8842793825335295795) }, { INT64_C( 0), INT64_C( 5698954012533458456) } }, { { INT64_C( 6538970759181058396), INT64_C( 3740051304798361630) }, { INT64_C( 2385384236005712100), INT64_C( 7654249160401746390) }, { INT64_C( 0), INT64_C( 0) } }, { { INT64_C( 1601271529454952604), -INT64_C( 5765005774754515213) }, { INT64_C( 6551277872407309686), -INT64_C( 846316808078164535) }, { INT64_C( 0), -INT64_C( 5765005774754515213) } }, { { INT64_C( 3041629830225484840), -INT64_C( 1154342652176112512) }, { INT64_C( 5313864382184796385), INT64_C( 4390553429439728801) }, { INT64_C( 0), INT64_C( 0) } }, { { INT64_C( 5413206681815138493), -INT64_C( 1594397375012137610) }, { -INT64_C( 2591526659806029760), INT64_C( 5199903345750714841) }, { INT64_C( 5413206681815138493), INT64_C( 0) } }, { { -INT64_C( 8242893437788125820), INT64_C( 172082101606953329) }, { -INT64_C( 7562726032021409293), INT64_C( 3082904871272505846) }, { -INT64_C( 8242893437788125820), INT64_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b); simde__m128i r; #if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_NATIVE_ALIASES_TESTING) r = simde_mm_maskload_epi64(HEDLEY_REINTERPRET_CAST(const long long *, test_vec[i].a), b); #else r = simde_mm_maskload_epi64(test_vec[i].a, b); #endif simde_test_x86_assert_equal_i64x2(r, simde_x_mm_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm256_maskload_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t a[4]; const int64_t b[4]; const int64_t r[4]; } test_vec[] = { { { -INT64_C( 1070553612364617574), -INT64_C( 114323686149473380), -INT64_C( 3053698850669963787), -INT64_C( 5793007821577797203) }, { -INT64_C( 2874087956492281342), -INT64_C( 8133550842580450581), -INT64_C( 5976205236281087193), -INT64_C( 520589115781817598) }, { -INT64_C( 1070553612364617574), -INT64_C( 114323686149473380), -INT64_C( 3053698850669963787), -INT64_C( 5793007821577797203) } }, { { -INT64_C( 1284580311155027432), -INT64_C( 8201847224291781372), INT64_C( 1425908320736694043), INT64_C( 4873440337809026227) }, { INT64_C( 771455870039475650), INT64_C( 1030716232118617176), -INT64_C( 6788626756700334683), -INT64_C( 6553603994163678305) }, { INT64_C( 0), INT64_C( 0), INT64_C( 1425908320736694043), INT64_C( 4873440337809026227) } }, { { -INT64_C( 4701442009973677136), -INT64_C( 1880874889532052765), -INT64_C( 6814117747969290272), INT64_C( 6260135033578371671) }, { INT64_C( 5566394621280209690), INT64_C( 7448400211646594360), -INT64_C( 5351783383607936745), -INT64_C( 5306699842386385569) }, { INT64_C( 0), INT64_C( 0), -INT64_C( 6814117747969290272), INT64_C( 6260135033578371671) } }, { { INT64_C( 7985576165328250651), INT64_C( 1523138059357910324), INT64_C( 6652636875035990078), INT64_C( 3270100493289996814) }, { INT64_C( 4884017522370844707), INT64_C( 3642606102698241657), -INT64_C( 5416668556751356806), INT64_C( 1486680395769328994) }, { INT64_C( 0), INT64_C( 0), INT64_C( 6652636875035990078), INT64_C( 0) } }, { { INT64_C( 5884855437847515992), INT64_C( 5046377720733805283), INT64_C( 6515463978070497041), -INT64_C( 2175959308119914040) }, { INT64_C( 5975418466285927107), INT64_C( 1995148555708839069), -INT64_C( 3865991726324617720), INT64_C( 6589137445521139719) }, { INT64_C( 0), INT64_C( 0), INT64_C( 6515463978070497041), INT64_C( 0) } }, { { INT64_C( 5353903293904370059), -INT64_C( 5855694748648180329), -INT64_C( 3198110109063606318), -INT64_C( 134311449431170769) }, { -INT64_C( 7888822226924332658), INT64_C( 8074655740636379578), -INT64_C( 1071888862176560257), -INT64_C( 1912662773025177085) }, { INT64_C( 5353903293904370059), INT64_C( 0), -INT64_C( 3198110109063606318), -INT64_C( 134311449431170769) } }, { { INT64_C( 7746936492670348532), INT64_C( 2660740784655676895), INT64_C( 1605972051073446037), -INT64_C( 3437089993967009519) }, { -INT64_C( 970171594833891988), -INT64_C( 4341088310040570377), -INT64_C( 3263873051487686261), -INT64_C( 6047621540034756507) }, { INT64_C( 7746936492670348532), INT64_C( 2660740784655676895), INT64_C( 1605972051073446037), -INT64_C( 3437089993967009519) } }, { { INT64_C( 935152317656388341), INT64_C( 5736295944050796969), INT64_C( 9164439426345842332), -INT64_C( 1815279467665200671) }, { -INT64_C( 7653891838718273370), INT64_C( 8531516276872147026), -INT64_C( 3001928471202802754), -INT64_C( 7835481678918706838) }, { INT64_C( 935152317656388341), INT64_C( 0), INT64_C( 9164439426345842332), -INT64_C( 1815279467665200671) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i b = simde_x_mm256_loadu_epi64(test_vec[i].b); simde__m256i r; #if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_NATIVE_ALIASES_TESTING) r = simde_mm256_maskload_epi64(HEDLEY_REINTERPRET_CAST(const long long *, test_vec[i].a), b); #else r = simde_mm256_maskload_epi64(test_vec[i].a, b); #endif simde_test_x86_assert_equal_i64x4(r, simde_x_mm256_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm_maskstore_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t mask[4]; const int32_t a[4]; const int32_t ri[4]; const int32_t ro[4]; } test_vec[] = { { { -INT32_C( 1072310678), -INT32_C( 1753702976), -INT32_C( 1000721038), INT32_C( 1139936175) }, { -INT32_C( 921988596), -INT32_C( 1384169384), INT32_C( 1969759351), INT32_C( 323044521) }, { -INT32_C( 921988596), -INT32_C( 1384169384), INT32_C( 1969759351), INT32_C( 22034) }, { -INT32_C( 921988596), -INT32_C( 1384169384), INT32_C( 1969759351), INT32_C( 22034) } }, { { -INT32_C( 623618278), INT32_C( 1718766836), INT32_C( 824888449), -INT32_C( 462152488) }, { INT32_C( 112033710), INT32_C( 917712319), -INT32_C( 72606895), INT32_C( 2047798624) }, { INT32_C( 112033710), -INT32_C( 1384169384), INT32_C( 1969759351), INT32_C( 2047798624) }, { INT32_C( 112033710), -INT32_C( 1384169384), INT32_C( 1969759351), INT32_C( 2047798624) } }, { { INT32_C( 945152580), -INT32_C( 1331771601), INT32_C( 1809959315), -INT32_C( 1806674203) }, { -INT32_C( 1785004587), INT32_C( 2093698602), -INT32_C( 914917527), -INT32_C( 1455127195) }, { INT32_C( 112033710), INT32_C( 2093698602), INT32_C( 1969759351), -INT32_C( 1455127195) }, { INT32_C( 112033710), INT32_C( 2093698602), INT32_C( 1969759351), -INT32_C( 1455127195) } }, { { -INT32_C( 1746757272), -INT32_C( 213417888), INT32_C( 794765641), INT32_C( 1422110591) }, { -INT32_C( 672571988), INT32_C( 357807531), -INT32_C( 1847670228), -INT32_C( 1220861361) }, { -INT32_C( 672571988), INT32_C( 357807531), INT32_C( 1969759351), -INT32_C( 1455127195) }, { -INT32_C( 672571988), INT32_C( 357807531), INT32_C( 1969759351), -INT32_C( 1455127195) } }, { { INT32_C( 458104251), -INT32_C( 418408803), INT32_C( 1041657535), -INT32_C( 913123043) }, { -INT32_C( 492798922), INT32_C( 1576530737), INT32_C( 233821629), -INT32_C( 1278989576) }, { -INT32_C( 672571988), INT32_C( 1576530737), INT32_C( 1969759351), -INT32_C( 1278989576) }, { -INT32_C( 672571988), INT32_C( 1576530737), INT32_C( 1969759351), -INT32_C( 1278989576) } }, { { -INT32_C( 456191161), INT32_C( 1758191273), INT32_C( 1772544332), -INT32_C( 248366918) }, { -INT32_C( 439102540), -INT32_C( 2075931962), -INT32_C( 1752092001), -INT32_C( 1555344036) }, { -INT32_C( 439102540), INT32_C( 1576530737), INT32_C( 1969759351), -INT32_C( 1555344036) }, { -INT32_C( 439102540), INT32_C( 1576530737), INT32_C( 1969759351), -INT32_C( 1555344036) } }, { { INT32_C( 294066792), INT32_C( 1148802040), -INT32_C( 273866956), INT32_C( 216063832) }, { INT32_C( 2045948850), INT32_C( 486356349), -INT32_C( 1011577241), INT32_C( 1281818595) }, { -INT32_C( 439102540), INT32_C( 1576530737), -INT32_C( 1011577241), -INT32_C( 1555344036) }, { -INT32_C( 439102540), INT32_C( 1576530737), -INT32_C( 1011577241), -INT32_C( 1555344036) } }, { { INT32_C( 291368217), INT32_C( 1968559936), INT32_C( 1315177206), -INT32_C( 1805957919) }, { INT32_C( 1947028983), -INT32_C( 376436094), INT32_C( 2074887320), INT32_C( 1556550211) }, { -INT32_C( 439102540), INT32_C( 1576530737), -INT32_C( 1011577241), INT32_C( 1556550211) }, { -INT32_C( 439102540), INT32_C( 1576530737), -INT32_C( 1011577241), INT32_C( 1556550211) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i mask = simde_x_mm_loadu_epi32(test_vec[i].mask); simde__m128i a = simde_x_mm_loadu_epi32(test_vec[i].a); int32_t r[4]; simde_memcpy(r, test_vec[i].ri, sizeof(r)); simde_mm_maskstore_epi32(r, mask, a); simde_assert_equal_vi32(sizeof(r) / sizeof(r[0]), r, test_vec[i].ro); } return 0; } static int test_simde_mm256_maskstore_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t mask[8]; const int32_t a[8]; const int32_t ri[8]; const int32_t ro[8]; } test_vec[] = { { { -INT32_C( 2076816164), INT32_C( 922486962), INT32_C( 556407151), INT32_C( 1733355511), INT32_C( 401058348), -INT32_C( 81912571), -INT32_C( 1174954754), INT32_C( 302965558) }, { -INT32_C( 493403088), -INT32_C( 1139240115), -INT32_C( 1545780564), INT32_C( 1426795817), -INT32_C( 580062504), INT32_C( 232294927), INT32_C( 1573376039), -INT32_C( 479210061) }, { -INT32_C( 493403088), INT32_C( 0), INT32_C( 1022469250), INT32_C( 32547), INT32_C( 1022406520), INT32_C( 232294927), INT32_C( 1573376039), INT32_C( 32547) }, { -INT32_C( 493403088), INT32_C( 0), INT32_C( 1022469250), INT32_C( 32547), INT32_C( 1022406520), INT32_C( 232294927), INT32_C( 1573376039), INT32_C( 32547) } }, { { INT32_C( 1724188185), INT32_C( 1176690073), INT32_C( 1240071968), INT32_C( 94368813), -INT32_C( 152957977), -INT32_C( 1123763306), INT32_C( 1041943435), -INT32_C( 1188918880) }, { INT32_C( 690022288), -INT32_C( 445693243), INT32_C( 1865308482), INT32_C( 880069965), INT32_C( 1865111257), -INT32_C( 1674825967), -INT32_C( 1696905478), INT32_C( 1599339983) }, { -INT32_C( 493403088), INT32_C( 0), INT32_C( 1022469250), INT32_C( 32547), INT32_C( 1865111257), -INT32_C( 1674825967), INT32_C( 1573376039), INT32_C( 1599339983) }, { -INT32_C( 493403088), INT32_C( 0), INT32_C( 1022469250), INT32_C( 32547), INT32_C( 1865111257), -INT32_C( 1674825967), INT32_C( 1573376039), INT32_C( 1599339983) } }, { { -INT32_C( 1450609692), -INT32_C( 124847946), -INT32_C( 1620591279), INT32_C( 1674828682), INT32_C( 1137901105), INT32_C( 668991021), INT32_C( 348240452), -INT32_C( 1670179401) }, { INT32_C( 1044774024), INT32_C( 1177998581), INT32_C( 468032913), -INT32_C( 1434470024), -INT32_C( 437431881), -INT32_C( 1811100592), INT32_C( 1051250311), INT32_C( 1809456355) }, { INT32_C( 1044774024), INT32_C( 1177998581), INT32_C( 468032913), INT32_C( 32547), INT32_C( 1865111257), -INT32_C( 1674825967), INT32_C( 1573376039), INT32_C( 1809456355) }, { INT32_C( 1044774024), INT32_C( 1177998581), INT32_C( 468032913), INT32_C( 32547), INT32_C( 1865111257), -INT32_C( 1674825967), INT32_C( 1573376039), INT32_C( 1809456355) } }, { { INT32_C( 229253144), -INT32_C( 2058034956), -INT32_C( 157271682), -INT32_C( 1432346638), -INT32_C( 1047556751), -INT32_C( 514483366), INT32_C( 1277165161), INT32_C( 850983450) }, { INT32_C( 239100442), -INT32_C( 1064070078), -INT32_C( 1061735475), -INT32_C( 999663789), INT32_C( 1065744869), -INT32_C( 31401324), -INT32_C( 230015016), INT32_C( 1394934329) }, { INT32_C( 1044774024), -INT32_C( 1064070078), -INT32_C( 1061735475), -INT32_C( 999663789), INT32_C( 1065744869), -INT32_C( 31401324), INT32_C( 1573376039), INT32_C( 1809456355) }, { INT32_C( 1044774024), -INT32_C( 1064070078), -INT32_C( 1061735475), -INT32_C( 999663789), INT32_C( 1065744869), -INT32_C( 31401324), INT32_C( 1573376039), INT32_C( 1809456355) } }, { { -INT32_C( 1486789276), -INT32_C( 966265607), INT32_C( 2072387112), INT32_C( 1530916982), INT32_C( 2124072169), INT32_C( 2004662942), INT32_C( 862570233), INT32_C( 763793097) }, { -INT32_C( 321591053), INT32_C( 78855388), -INT32_C( 796968614), INT32_C( 321633834), INT32_C( 563201410), INT32_C( 2040008063), -INT32_C( 1649671724), -INT32_C( 2083900784) }, { -INT32_C( 321591053), INT32_C( 78855388), -INT32_C( 1061735475), -INT32_C( 999663789), INT32_C( 1065744869), -INT32_C( 31401324), INT32_C( 1573376039), INT32_C( 1809456355) }, { -INT32_C( 321591053), INT32_C( 78855388), -INT32_C( 1061735475), -INT32_C( 999663789), INT32_C( 1065744869), -INT32_C( 31401324), INT32_C( 1573376039), INT32_C( 1809456355) } }, { { -INT32_C( 143614182), INT32_C( 905651163), -INT32_C( 2046395556), -INT32_C( 1130745543), INT32_C( 1994206199), INT32_C( 233796921), INT32_C( 111844214), -INT32_C( 393579314) }, { -INT32_C( 287311341), INT32_C( 2032458525), -INT32_C( 1895814570), INT32_C( 1380686427), -INT32_C( 20371259), INT32_C( 336312477), INT32_C( 572175700), INT32_C( 1024107562) }, { -INT32_C( 287311341), INT32_C( 78855388), -INT32_C( 1895814570), INT32_C( 1380686427), INT32_C( 1065744869), -INT32_C( 31401324), INT32_C( 1573376039), INT32_C( 1024107562) }, { -INT32_C( 287311341), INT32_C( 78855388), -INT32_C( 1895814570), INT32_C( 1380686427), INT32_C( 1065744869), -INT32_C( 31401324), INT32_C( 1573376039), INT32_C( 1024107562) } }, { { -INT32_C( 1154684258), INT32_C( 456478917), -INT32_C( 710265478), -INT32_C( 1809254705), -INT32_C( 1131155170), -INT32_C( 36659543), INT32_C( 2116020820), INT32_C( 767240847) }, { -INT32_C( 638982380), -INT32_C( 1309401545), INT32_C( 579313235), -INT32_C( 1279873132), INT32_C( 1232030112), INT32_C( 1011302376), -INT32_C( 1195743703), -INT32_C( 1528400496) }, { -INT32_C( 638982380), INT32_C( 78855388), INT32_C( 579313235), -INT32_C( 1279873132), INT32_C( 1232030112), INT32_C( 1011302376), INT32_C( 1573376039), INT32_C( 1024107562) }, { -INT32_C( 638982380), INT32_C( 78855388), INT32_C( 579313235), -INT32_C( 1279873132), INT32_C( 1232030112), INT32_C( 1011302376), INT32_C( 1573376039), INT32_C( 1024107562) } }, { { -INT32_C( 1803694243), INT32_C( 1078358509), -INT32_C( 1537028848), INT32_C( 475469948), INT32_C( 1248183906), INT32_C( 797355013), -INT32_C( 1545125869), INT32_C( 306761141) }, { -INT32_C( 1985493604), INT32_C( 1204415799), INT32_C( 921381818), -INT32_C( 1504558268), INT32_C( 267433737), INT32_C( 2000582244), INT32_C( 1796875702), -INT32_C( 1887542541) }, { -INT32_C( 1985493604), INT32_C( 78855388), INT32_C( 921381818), -INT32_C( 1279873132), INT32_C( 1232030112), INT32_C( 1011302376), INT32_C( 1796875702), INT32_C( 1024107562) }, { -INT32_C( 1985493604), INT32_C( 78855388), INT32_C( 921381818), -INT32_C( 1279873132), INT32_C( 1232030112), INT32_C( 1011302376), INT32_C( 1796875702), INT32_C( 1024107562) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i mask = simde_x_mm256_loadu_epi32(test_vec[i].mask); simde__m256i a = simde_x_mm256_loadu_epi32(test_vec[i].a); int32_t r[8]; simde_memcpy(r, test_vec[i].ri, sizeof(r)); simde_mm256_maskstore_epi32(r, mask, a); simde_assert_equal_vi32(sizeof(r) / sizeof(r[0]), r, test_vec[i].ro); } return 0; } static int test_simde_mm_maskstore_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t mask[2]; const int64_t a[2]; const int64_t ri[2]; const int64_t ro[2]; } test_vec[] = { { { INT64_C( 3149967905596390459), -INT64_C( 4778628840774069374) }, { INT64_C( 5664027239959376000), INT64_C( 8937778571530162224) }, { INT64_C( 140732988756112), INT64_C( 8937778571530162224) }, { INT64_C( 140732988756112), INT64_C( 8937778571530162224) } }, { { INT64_C( 8062610935831789210), -INT64_C( 8946025712957579848) }, { -INT64_C( 7091179927189929083), -INT64_C( 5251394950330663071) }, { INT64_C( 140732988756112), -INT64_C( 5251394950330663071) }, { INT64_C( 140732988756112), -INT64_C( 5251394950330663071) } }, { { INT64_C( 7684922938565365066), INT64_C( 2879875638071020913) }, { -INT64_C( 2293448770573013726), INT64_C( 332036821611605703) }, { INT64_C( 140732988756112), -INT64_C( 5251394950330663071) }, { INT64_C( 140732988756112), -INT64_C( 5251394950330663071) } }, { { -INT64_C( 3437124613673645265), INT64_C( 6050651184505868846) }, { INT64_C( 3965153871791286441), INT64_C( 6501991883275471484) }, { INT64_C( 3965153871791286441), -INT64_C( 5251394950330663071) }, { INT64_C( 3965153871791286441), -INT64_C( 5251394950330663071) } }, { { -INT64_C( 8650702022112413758), INT64_C( 2008129837236434778) }, { INT64_C( 5359365805870137528), -INT64_C( 767092764058747948) }, { INT64_C( 5359365805870137528), -INT64_C( 5251394950330663071) }, { INT64_C( 5359365805870137528), -INT64_C( 5251394950330663071) } }, { { INT64_C( 8652269335912879668), INT64_C( 8272797757509692224) }, { INT64_C( 3511395233916719521), INT64_C( 3215161084095773558) }, { INT64_C( 5359365805870137528), -INT64_C( 5251394950330663071) }, { INT64_C( 5359365805870137528), -INT64_C( 5251394950330663071) } }, { { -INT64_C( 20041080821402335), INT64_C( 4567917337930546231) }, { INT64_C( 8151067802542735250), -INT64_C( 303508250809446492) }, { INT64_C( 8151067802542735250), -INT64_C( 5251394950330663071) }, { INT64_C( 8151067802542735250), -INT64_C( 5251394950330663071) } }, { { -INT64_C( 8778935907035413907), -INT64_C( 1185330308615447376) }, { INT64_C( 5136350581142404060), -INT64_C( 8781828949867732662) }, { INT64_C( 5136350581142404060), -INT64_C( 8781828949867732662) }, { INT64_C( 5136350581142404060), -INT64_C( 8781828949867732662) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i mask = simde_x_mm_loadu_epi64(test_vec[i].mask); simde__m128i a = simde_x_mm_loadu_epi64(test_vec[i].a); int64_t r[2]; simde_memcpy(r, test_vec[i].ri, sizeof(r)); #if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_NATIVE_ALIASES_TESTING) simde_mm_maskstore_epi64((long long *)r, mask, a); #else simde_mm_maskstore_epi64(r, mask, a); #endif simde_assert_equal_vi64(sizeof(r) / sizeof(r[0]), r, test_vec[i].ro); } return 0; } static int test_simde_mm256_maskstore_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t mask[4]; const int64_t a[4]; const int64_t ri[4]; const int64_t ro[4]; } test_vec[] = { { { INT64_C( 5242526618242621661), INT64_C( 6166759832868221493), -INT64_C( 3838943471189419310), -INT64_C( 300229214128524470) }, { INT64_C( 2195119886963881727), INT64_C( 4994165925608667027), -INT64_C( 2162954226736872975), INT64_C( 464305096456775511) }, { INT64_C( 2050), INT64_C( 1313770), -INT64_C( 2162954226736872975), INT64_C( 464305096456775511) }, { INT64_C( 2050), INT64_C( 1313770), -INT64_C( 2162954226736872975), INT64_C( 464305096456775511) } }, { { -INT64_C( 8770573412453887679), INT64_C( 5987762628439542624), -INT64_C( 105974608678354161), INT64_C( 663406593554201895) }, { INT64_C( 8191901560846116662), INT64_C( 917429192545808134), -INT64_C( 1800249354620813217), -INT64_C( 7905707010328004489) }, { INT64_C( 8191901560846116662), INT64_C( 1313770), -INT64_C( 1800249354620813217), INT64_C( 464305096456775511) }, { INT64_C( 8191901560846116662), INT64_C( 1313770), -INT64_C( 1800249354620813217), INT64_C( 464305096456775511) } }, { { -INT64_C( 129641248100483919), INT64_C( 3792913607644641434), INT64_C( 579345394647586402), INT64_C( 1920382171601182647) }, { INT64_C( 7351017860721565965), -INT64_C( 1112211479437939833), -INT64_C( 4570841319541126409), INT64_C( 8628750009532471612) }, { INT64_C( 7351017860721565965), INT64_C( 1313770), -INT64_C( 1800249354620813217), INT64_C( 464305096456775511) }, { INT64_C( 7351017860721565965), INT64_C( 1313770), -INT64_C( 1800249354620813217), INT64_C( 464305096456775511) } }, { { INT64_C( 3606962921541573595), -INT64_C( 6599710048517223188), -INT64_C( 3120222027706958616), -INT64_C( 2560234815484229993) }, { INT64_C( 7111344849244452307), INT64_C( 4967546651701690014), INT64_C( 8227770697573080513), -INT64_C( 5454725699460683665) }, { INT64_C( 7351017860721565965), INT64_C( 4967546651701690014), INT64_C( 8227770697573080513), -INT64_C( 5454725699460683665) }, { INT64_C( 7351017860721565965), INT64_C( 4967546651701690014), INT64_C( 8227770697573080513), -INT64_C( 5454725699460683665) } }, { { INT64_C( 124660851703730401), -INT64_C( 7079591700845521376), INT64_C( 5739316420021843801), -INT64_C( 1845900778233980467) }, { -INT64_C( 7620099957265165093), INT64_C( 2379821211310001037), -INT64_C( 1082557009605755447), -INT64_C( 6989304304425322474) }, { INT64_C( 7351017860721565965), INT64_C( 2379821211310001037), INT64_C( 8227770697573080513), -INT64_C( 6989304304425322474) }, { INT64_C( 7351017860721565965), INT64_C( 2379821211310001037), INT64_C( 8227770697573080513), -INT64_C( 6989304304425322474) } }, { { -INT64_C( 4350446221603815788), INT64_C( 5661361750980473512), -INT64_C( 413503352329476305), INT64_C( 8362083761278804137) }, { -INT64_C( 5886734649484449196), -INT64_C( 2548280180651164976), INT64_C( 3707993608283921047), -INT64_C( 746280734714459568) }, { -INT64_C( 5886734649484449196), INT64_C( 2379821211310001037), INT64_C( 3707993608283921047), -INT64_C( 6989304304425322474) }, { -INT64_C( 5886734649484449196), INT64_C( 2379821211310001037), INT64_C( 3707993608283921047), -INT64_C( 6989304304425322474) } }, { { INT64_C( 1174539280240812695), -INT64_C( 5561656252458243512), -INT64_C( 4512708599429136223), INT64_C( 4954969688283086893) }, { INT64_C( 6343651101063070392), INT64_C( 1685505655689801368), INT64_C( 7084341664285954590), -INT64_C( 8028524129605893725) }, { -INT64_C( 5886734649484449196), INT64_C( 1685505655689801368), INT64_C( 7084341664285954590), -INT64_C( 6989304304425322474) }, { -INT64_C( 5886734649484449196), INT64_C( 1685505655689801368), INT64_C( 7084341664285954590), -INT64_C( 6989304304425322474) } }, { { INT64_C( 4296032324518769974), INT64_C( 2922363408830294785), INT64_C( 1764011878569726692), -INT64_C( 3048659187356095416) }, { -INT64_C( 3724554084945282703), INT64_C( 8347090473923591588), -INT64_C( 2580272778841387612), INT64_C( 443794320655161779) }, { -INT64_C( 5886734649484449196), INT64_C( 1685505655689801368), INT64_C( 7084341664285954590), INT64_C( 443794320655161779) }, { -INT64_C( 5886734649484449196), INT64_C( 1685505655689801368), INT64_C( 7084341664285954590), INT64_C( 443794320655161779) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i mask = simde_x_mm256_loadu_epi64(test_vec[i].mask); simde__m256i a = simde_x_mm256_loadu_epi64(test_vec[i].a); int64_t r[4]; simde_memcpy(r, test_vec[i].ri, sizeof(r)); #if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_NATIVE_ALIASES_TESTING) simde_mm256_maskstore_epi64((long long *)r, mask, a); #else simde_mm256_maskstore_epi64(r, mask, a); #endif simde_assert_equal_vi64(sizeof(r) / sizeof(r[0]), r, test_vec[i].ro); } return 0; } static int test_simde_mm256_max_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi8(INT8_C( 82), INT8_C( 80), INT8_C( 100), INT8_C(-114), INT8_C(-121), INT8_C(-115), INT8_C( -33), INT8_C( -36), INT8_C(-105), INT8_C( -52), INT8_C( 94), INT8_C( 97), INT8_C( -84), INT8_C( 116), INT8_C( 107), INT8_C( -76), INT8_C( 5), INT8_C( 53), INT8_C( 122), INT8_C(-101), INT8_C( 32), INT8_C(-122), INT8_C( -66), INT8_C( -61), INT8_C( -25), INT8_C( -2), INT8_C( 14), INT8_C( -94), INT8_C( 18), INT8_C( -35), INT8_C(-102), INT8_C( 57)), simde_mm256_set_epi8(INT8_C( 46), INT8_C( -89), INT8_C( 83), INT8_C( 115), INT8_C( 114), INT8_C( 48), INT8_C( 9), INT8_C( 1), INT8_C( 54), INT8_C( -93), INT8_C( 118), INT8_C( -84), INT8_C( 50), INT8_C( -43), INT8_C(-125), INT8_C( -87), INT8_C( 48), INT8_C( 67), INT8_C( 65), INT8_C( 103), INT8_C( 105), INT8_C( -97), INT8_C( -54), INT8_C( 17), INT8_C( -90), INT8_C( -86), INT8_C( 116), INT8_C( 127), INT8_C( 77), INT8_C( -5), INT8_C( 31), INT8_C( 24)), simde_mm256_set_epi8(INT8_C( 82), INT8_C( 80), INT8_C( 100), INT8_C( 115), INT8_C( 114), INT8_C( 48), INT8_C( 9), INT8_C( 1), INT8_C( 54), INT8_C( -52), INT8_C( 118), INT8_C( 97), INT8_C( 50), INT8_C( 116), INT8_C( 107), INT8_C( -76), INT8_C( 48), INT8_C( 67), INT8_C( 122), INT8_C( 103), INT8_C( 105), INT8_C( -97), INT8_C( -54), INT8_C( 17), INT8_C( -25), INT8_C( -2), INT8_C( 116), INT8_C( 127), INT8_C( 77), INT8_C( -5), INT8_C( 31), INT8_C( 57)) }, { simde_mm256_set_epi8(INT8_C( 104), INT8_C( 106), INT8_C(-125), INT8_C( -4), INT8_C( -59), INT8_C( 95), INT8_C( -56), INT8_C( 31), INT8_C( 108), INT8_C( 5), INT8_C( 101), INT8_C( 27), INT8_C( -64), INT8_C( 57), INT8_C(-101), INT8_C( -55), INT8_C( 77), INT8_C( 118), INT8_C( 99), INT8_C( 21), INT8_C( 95), INT8_C( 17), INT8_C( 54), INT8_C( -21), INT8_C( 92), INT8_C( -54), INT8_C( 78), INT8_C( 40), INT8_C( 102), INT8_C(-101), INT8_C( 0), INT8_C( 127)), simde_mm256_set_epi8(INT8_C( -59), INT8_C( 102), INT8_C( 95), INT8_C( -82), INT8_C( -77), INT8_C( -39), INT8_C( -88), INT8_C( -22), INT8_C( 54), INT8_C( 84), INT8_C( -33), INT8_C( 49), INT8_C( 63), INT8_C( 99), INT8_C( 64), INT8_C(-122), INT8_C( 102), INT8_C( 28), INT8_C( 90), INT8_C( 72), INT8_C( 24), INT8_C( 50), INT8_C( 78), INT8_C( -15), INT8_C( -20), INT8_C( -56), INT8_C( 73), INT8_C( 48), INT8_C( -27), INT8_C( 7), INT8_C( -15), INT8_C(-109)), simde_mm256_set_epi8(INT8_C( 104), INT8_C( 106), INT8_C( 95), INT8_C( -4), INT8_C( -59), INT8_C( 95), INT8_C( -56), INT8_C( 31), INT8_C( 108), INT8_C( 84), INT8_C( 101), INT8_C( 49), INT8_C( 63), INT8_C( 99), INT8_C( 64), INT8_C( -55), INT8_C( 102), INT8_C( 118), INT8_C( 99), INT8_C( 72), INT8_C( 95), INT8_C( 50), INT8_C( 78), INT8_C( -15), INT8_C( 92), INT8_C( -54), INT8_C( 78), INT8_C( 48), INT8_C( 102), INT8_C( 7), INT8_C( 0), INT8_C( 127)) }, { simde_mm256_set_epi8(INT8_C( 65), INT8_C( 23), INT8_C( 64), INT8_C( -32), INT8_C( -17), INT8_C( 98), INT8_C( -25), INT8_C( 100), INT8_C(-100), INT8_C( -55), INT8_C( -13), INT8_C( 105), INT8_C( 8), INT8_C( 62), INT8_C(-102), INT8_C( 30), INT8_C( -9), INT8_C( 71), INT8_C( -37), INT8_C( -11), INT8_C( -97), INT8_C( 54), INT8_C( -23), INT8_C( 103), INT8_C( 11), INT8_C( -76), INT8_C( 47), INT8_C( 45), INT8_C( -29), INT8_C( -83), INT8_C( -52), INT8_C( 82)), simde_mm256_set_epi8(INT8_C( -2), INT8_C( -84), INT8_C( -69), INT8_C( -58), INT8_C(-128), INT8_C( 76), INT8_C( 110), INT8_C( -99), INT8_C( 46), INT8_C( 8), INT8_C( -50), INT8_C( -2), INT8_C( 114), INT8_C( 31), INT8_C( -27), INT8_C( -16), INT8_C( 63), INT8_C( -83), INT8_C(-114), INT8_C( 116), INT8_C( 14), INT8_C( -31), INT8_C( 3), INT8_C(-105), INT8_C( -84), INT8_C( -19), INT8_C( 81), INT8_C( 57), INT8_C( -55), INT8_C( -67), INT8_C( -89), INT8_C( 74)), simde_mm256_set_epi8(INT8_C( 65), INT8_C( 23), INT8_C( 64), INT8_C( -32), INT8_C( -17), INT8_C( 98), INT8_C( 110), INT8_C( 100), INT8_C( 46), INT8_C( 8), INT8_C( -13), INT8_C( 105), INT8_C( 114), INT8_C( 62), INT8_C( -27), INT8_C( 30), INT8_C( 63), INT8_C( 71), INT8_C( -37), INT8_C( 116), INT8_C( 14), INT8_C( 54), INT8_C( 3), INT8_C( 103), INT8_C( 11), INT8_C( -19), INT8_C( 81), INT8_C( 57), INT8_C( -29), INT8_C( -67), INT8_C( -52), INT8_C( 82)) }, { simde_mm256_set_epi8(INT8_C( 3), INT8_C( -48), INT8_C( -19), INT8_C( 101), INT8_C( -3), INT8_C( -85), INT8_C(-102), INT8_C( 26), INT8_C( 89), INT8_C( -95), INT8_C( 51), INT8_C( -5), INT8_C( 13), INT8_C( 30), INT8_C( -92), INT8_C( -13), INT8_C( 68), INT8_C( 71), INT8_C(-127), INT8_C( -65), INT8_C(-102), INT8_C( -1), INT8_C( 48), INT8_C( 30), INT8_C( 43), INT8_C( 71), INT8_C( 53), INT8_C( 78), INT8_C( 81), INT8_C( 119), INT8_C( -32), INT8_C( 18)), simde_mm256_set_epi8(INT8_C( -81), INT8_C( -54), INT8_C( -70), INT8_C( 102), INT8_C( 86), INT8_C( 106), INT8_C( 99), INT8_C( -7), INT8_C( -74), INT8_C( -52), INT8_C(-121), INT8_C(-127), INT8_C( 49), INT8_C(-119), INT8_C( 52), INT8_C( 119), INT8_C( 123), INT8_C(-104), INT8_C( 44), INT8_C( -14), INT8_C( 56), INT8_C( -83), INT8_C( 21), INT8_C( 108), INT8_C( -43), INT8_C( 75), INT8_C( 75), INT8_C( 51), INT8_C( 54), INT8_C( -18), INT8_C( 100), INT8_C( 49)), simde_mm256_set_epi8(INT8_C( 3), INT8_C( -48), INT8_C( -19), INT8_C( 102), INT8_C( 86), INT8_C( 106), INT8_C( 99), INT8_C( 26), INT8_C( 89), INT8_C( -52), INT8_C( 51), INT8_C( -5), INT8_C( 49), INT8_C( 30), INT8_C( 52), INT8_C( 119), INT8_C( 123), INT8_C( 71), INT8_C( 44), INT8_C( -14), INT8_C( 56), INT8_C( -1), INT8_C( 48), INT8_C( 108), INT8_C( 43), INT8_C( 75), INT8_C( 75), INT8_C( 78), INT8_C( 81), INT8_C( 119), INT8_C( 100), INT8_C( 49)) }, { simde_mm256_set_epi8(INT8_C( -92), INT8_C( 89), INT8_C( 61), INT8_C( 117), INT8_C( -42), INT8_C( 29), INT8_C( -36), INT8_C( 67), INT8_C(-107), INT8_C( 95), INT8_C(-103), INT8_C( -42), INT8_C( -63), INT8_C( 54), INT8_C( 53), INT8_C( -40), INT8_C( -66), INT8_C( -31), INT8_C( -91), INT8_C( 82), INT8_C( 110), INT8_C( -33), INT8_C( 21), INT8_C( 38), INT8_C( -69), INT8_C( 34), INT8_C( -74), INT8_C( 24), INT8_C( -26), INT8_C(-124), INT8_C( -81), INT8_C( 36)), simde_mm256_set_epi8(INT8_C(-105), INT8_C( 55), INT8_C(-117), INT8_C( 72), INT8_C( -82), INT8_C( -42), INT8_C( 100), INT8_C( -21), INT8_C( 19), INT8_C( 104), INT8_C( 11), INT8_C( 52), INT8_C( 30), INT8_C( 8), INT8_C( 70), INT8_C( -51), INT8_C( -61), INT8_C( -91), INT8_C( 126), INT8_C( -28), INT8_C( -34), INT8_C( 102), INT8_C( 69), INT8_C( 99), INT8_C( 81), INT8_C( 11), INT8_C( -51), INT8_C( 2), INT8_C( 60), INT8_C( -70), INT8_C( 92), INT8_C( -37)), simde_mm256_set_epi8(INT8_C( -92), INT8_C( 89), INT8_C( 61), INT8_C( 117), INT8_C( -42), INT8_C( 29), INT8_C( 100), INT8_C( 67), INT8_C( 19), INT8_C( 104), INT8_C( 11), INT8_C( 52), INT8_C( 30), INT8_C( 54), INT8_C( 70), INT8_C( -40), INT8_C( -61), INT8_C( -31), INT8_C( 126), INT8_C( 82), INT8_C( 110), INT8_C( 102), INT8_C( 69), INT8_C( 99), INT8_C( 81), INT8_C( 34), INT8_C( -51), INT8_C( 24), INT8_C( 60), INT8_C( -70), INT8_C( 92), INT8_C( 36)) }, { simde_mm256_set_epi8(INT8_C(-123), INT8_C( 126), INT8_C( 24), INT8_C( 94), INT8_C( -42), INT8_C( 118), INT8_C( 5), INT8_C( -41), INT8_C( -65), INT8_C( -51), INT8_C( -68), INT8_C( 107), INT8_C( -74), INT8_C( 112), INT8_C( 53), INT8_C( 87), INT8_C( 26), INT8_C( -33), INT8_C( 99), INT8_C(-107), INT8_C( 6), INT8_C( 119), INT8_C( -42), INT8_C( 32), INT8_C( 54), INT8_C( -97), INT8_C(-117), INT8_C( -9), INT8_C(-107), INT8_C( 44), INT8_C( -40), INT8_C( 33)), simde_mm256_set_epi8(INT8_C(-115), INT8_C( -12), INT8_C( -81), INT8_C(-120), INT8_C( 82), INT8_C( 114), INT8_C( -55), INT8_C(-106), INT8_C( -54), INT8_C( 126), INT8_C( 70), INT8_C( 83), INT8_C( 64), INT8_C( -11), INT8_C( 10), INT8_C( 72), INT8_C( 19), INT8_C( -35), INT8_C( 123), INT8_C( -50), INT8_C( 95), INT8_C( -2), INT8_C( 77), INT8_C( -12), INT8_C(-127), INT8_C( -73), INT8_C( 117), INT8_C( 14), INT8_C( 40), INT8_C( -92), INT8_C( 74), INT8_C( 114)), simde_mm256_set_epi8(INT8_C(-115), INT8_C( 126), INT8_C( 24), INT8_C( 94), INT8_C( 82), INT8_C( 118), INT8_C( 5), INT8_C( -41), INT8_C( -54), INT8_C( 126), INT8_C( 70), INT8_C( 107), INT8_C( 64), INT8_C( 112), INT8_C( 53), INT8_C( 87), INT8_C( 26), INT8_C( -33), INT8_C( 123), INT8_C( -50), INT8_C( 95), INT8_C( 119), INT8_C( 77), INT8_C( 32), INT8_C( 54), INT8_C( -73), INT8_C( 117), INT8_C( 14), INT8_C( 40), INT8_C( 44), INT8_C( 74), INT8_C( 114)) }, { simde_mm256_set_epi8(INT8_C( 29), INT8_C( 41), INT8_C( 107), INT8_C( 24), INT8_C(-113), INT8_C( 113), INT8_C( -53), INT8_C( 6), INT8_C( -82), INT8_C( 34), INT8_C( 0), INT8_C( 35), INT8_C( -43), INT8_C(-128), INT8_C( -61), INT8_C( -69), INT8_C( 2), INT8_C( 64), INT8_C( 89), INT8_C( -88), INT8_C( 111), INT8_C( -95), INT8_C( 42), INT8_C( -15), INT8_C( 21), INT8_C( 73), INT8_C( 70), INT8_C( 127), INT8_C( 32), INT8_C( 116), INT8_C( 36), INT8_C( -14)), simde_mm256_set_epi8(INT8_C( 52), INT8_C( -53), INT8_C( 101), INT8_C( 79), INT8_C( -8), INT8_C( -70), INT8_C( -81), INT8_C(-117), INT8_C( 27), INT8_C( -60), INT8_C( 78), INT8_C( 6), INT8_C( 79), INT8_C( 72), INT8_C( 1), INT8_C( -62), INT8_C( -36), INT8_C( -42), INT8_C( 0), INT8_C( -10), INT8_C( -19), INT8_C( -1), INT8_C( 25), INT8_C( -88), INT8_C( -5), INT8_C( -10), INT8_C( 127), INT8_C( 114), INT8_C( 110), INT8_C( -1), INT8_C( -27), INT8_C( 51)), simde_mm256_set_epi8(INT8_C( 52), INT8_C( 41), INT8_C( 107), INT8_C( 79), INT8_C( -8), INT8_C( 113), INT8_C( -53), INT8_C( 6), INT8_C( 27), INT8_C( 34), INT8_C( 78), INT8_C( 35), INT8_C( 79), INT8_C( 72), INT8_C( 1), INT8_C( -62), INT8_C( 2), INT8_C( 64), INT8_C( 89), INT8_C( -10), INT8_C( 111), INT8_C( -1), INT8_C( 42), INT8_C( -15), INT8_C( 21), INT8_C( 73), INT8_C( 127), INT8_C( 127), INT8_C( 110), INT8_C( 116), INT8_C( 36), INT8_C( 51)) }, { simde_mm256_set_epi8(INT8_C( 94), INT8_C( -83), INT8_C( -81), INT8_C( 109), INT8_C( -8), INT8_C( 100), INT8_C( 40), INT8_C(-125), INT8_C( 114), INT8_C( -99), INT8_C( 42), INT8_C( 35), INT8_C( 59), INT8_C( 67), INT8_C( 26), INT8_C( -39), INT8_C( 79), INT8_C( 116), INT8_C( 3), INT8_C( -47), INT8_C( 73), INT8_C( 13), INT8_C( -83), INT8_C( -95), INT8_C( 75), INT8_C( -48), INT8_C( -36), INT8_C( 127), INT8_C( 117), INT8_C( -9), INT8_C( 94), INT8_C( -87)), simde_mm256_set_epi8(INT8_C( -44), INT8_C( -67), INT8_C(-100), INT8_C( 73), INT8_C( 63), INT8_C( 69), INT8_C( -48), INT8_C( -87), INT8_C( -1), INT8_C( -19), INT8_C( -9), INT8_C( 66), INT8_C( 27), INT8_C( 17), INT8_C(-109), INT8_C( 22), INT8_C( 98), INT8_C( 106), INT8_C( -50), INT8_C( -90), INT8_C( 92), INT8_C( 106), INT8_C( -59), INT8_C( 31), INT8_C( -18), INT8_C( -17), INT8_C( -80), INT8_C( -46), INT8_C( 67), INT8_C( -29), INT8_C( 44), INT8_C( 76)), simde_mm256_set_epi8(INT8_C( 94), INT8_C( -67), INT8_C( -81), INT8_C( 109), INT8_C( 63), INT8_C( 100), INT8_C( 40), INT8_C( -87), INT8_C( 114), INT8_C( -19), INT8_C( 42), INT8_C( 66), INT8_C( 59), INT8_C( 67), INT8_C( 26), INT8_C( 22), INT8_C( 98), INT8_C( 116), INT8_C( 3), INT8_C( -47), INT8_C( 92), INT8_C( 106), INT8_C( -59), INT8_C( 31), INT8_C( 75), INT8_C( -17), INT8_C( -36), INT8_C( 127), INT8_C( 117), INT8_C( -9), INT8_C( 94), INT8_C( 76)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_max_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_max_epu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_x_mm256_set_epu8(UINT8_C(180), UINT8_C(215), UINT8_C( 58), UINT8_C(173), UINT8_C(238), UINT8_C(242), UINT8_C( 74), UINT8_C(116), UINT8_C(237), UINT8_C( 59), UINT8_C(170), UINT8_C(217), UINT8_C(180), UINT8_C(149), UINT8_C(157), UINT8_C(250), UINT8_C(224), UINT8_C(201), UINT8_C(109), UINT8_C(247), UINT8_C( 25), UINT8_C(111), UINT8_C( 62), UINT8_C(129), UINT8_C(115), UINT8_C(244), UINT8_C( 23), UINT8_C(181), UINT8_C(205), UINT8_C(130), UINT8_C( 15), UINT8_C( 6)), simde_x_mm256_set_epu8(UINT8_C(129), UINT8_C(102), UINT8_C(249), UINT8_C(232), UINT8_C(171), UINT8_C(250), UINT8_C(155), UINT8_C( 41), UINT8_C(184), UINT8_C(159), UINT8_C(213), UINT8_C( 99), UINT8_C(220), UINT8_C(155), UINT8_C(220), UINT8_C(163), UINT8_C(219), UINT8_C(184), UINT8_C( 97), UINT8_C(230), UINT8_C( 89), UINT8_C( 94), UINT8_C( 73), UINT8_C(152), UINT8_C( 89), UINT8_C( 86), UINT8_C(115), UINT8_C(206), UINT8_C(132), UINT8_C(230), UINT8_C(207), UINT8_C( 0)), simde_x_mm256_set_epu8(UINT8_C(180), UINT8_C(215), UINT8_C(249), UINT8_C(232), UINT8_C(238), UINT8_C(250), UINT8_C(155), UINT8_C(116), UINT8_C(237), UINT8_C(159), UINT8_C(213), UINT8_C(217), UINT8_C(220), UINT8_C(155), UINT8_C(220), UINT8_C(250), UINT8_C(224), UINT8_C(201), UINT8_C(109), UINT8_C(247), UINT8_C( 89), UINT8_C(111), UINT8_C( 73), UINT8_C(152), UINT8_C(115), UINT8_C(244), UINT8_C(115), UINT8_C(206), UINT8_C(205), UINT8_C(230), UINT8_C(207), UINT8_C( 6)) }, { simde_x_mm256_set_epu8(UINT8_C(218), UINT8_C(157), UINT8_C(237), UINT8_C(159), UINT8_C( 73), UINT8_C(122), UINT8_C( 14), UINT8_C(243), UINT8_C(232), UINT8_C(133), UINT8_C( 42), UINT8_C( 15), UINT8_C( 24), UINT8_C(199), UINT8_C(216), UINT8_C( 25), UINT8_C(242), UINT8_C(254), UINT8_C(156), UINT8_C(113), UINT8_C( 5), UINT8_C( 95), UINT8_C( 73), UINT8_C(123), UINT8_C(224), UINT8_C(233), UINT8_C(137), UINT8_C(219), UINT8_C(122), UINT8_C(103), UINT8_C( 32), UINT8_C( 32)), simde_x_mm256_set_epu8(UINT8_C(255), UINT8_C(178), UINT8_C( 89), UINT8_C( 15), UINT8_C( 21), UINT8_C(211), UINT8_C(214), UINT8_C(175), UINT8_C( 24), UINT8_C(208), UINT8_C(242), UINT8_C( 16), UINT8_C( 86), UINT8_C(207), UINT8_C(159), UINT8_C( 23), UINT8_C( 74), UINT8_C( 72), UINT8_C(123), UINT8_C( 85), UINT8_C( 99), UINT8_C( 6), UINT8_C( 15), UINT8_C(240), UINT8_C(112), UINT8_C( 36), UINT8_C(217), UINT8_C( 82), UINT8_C(251), UINT8_C( 67), UINT8_C(226), UINT8_C(252)), simde_x_mm256_set_epu8(UINT8_C(255), UINT8_C(178), UINT8_C(237), UINT8_C(159), UINT8_C( 73), UINT8_C(211), UINT8_C(214), UINT8_C(243), UINT8_C(232), UINT8_C(208), UINT8_C(242), UINT8_C( 16), UINT8_C( 86), UINT8_C(207), UINT8_C(216), UINT8_C( 25), UINT8_C(242), UINT8_C(254), UINT8_C(156), UINT8_C(113), UINT8_C( 99), UINT8_C( 95), UINT8_C( 73), UINT8_C(240), UINT8_C(224), UINT8_C(233), UINT8_C(217), UINT8_C(219), UINT8_C(251), UINT8_C(103), UINT8_C(226), UINT8_C(252)) }, { simde_x_mm256_set_epu8(UINT8_C( 99), UINT8_C(186), UINT8_C(163), UINT8_C( 4), UINT8_C( 50), UINT8_C( 29), UINT8_C( 2), UINT8_C( 45), UINT8_C( 57), UINT8_C( 0), UINT8_C(206), UINT8_C( 78), UINT8_C(164), UINT8_C( 87), UINT8_C( 32), UINT8_C(133), UINT8_C(239), UINT8_C(167), UINT8_C( 26), UINT8_C(218), UINT8_C(142), UINT8_C( 23), UINT8_C( 41), UINT8_C( 63), UINT8_C(230), UINT8_C(150), UINT8_C( 27), UINT8_C(237), UINT8_C(105), UINT8_C(166), UINT8_C( 82), UINT8_C( 50)), simde_x_mm256_set_epu8(UINT8_C(129), UINT8_C(111), UINT8_C( 84), UINT8_C(213), UINT8_C( 28), UINT8_C( 33), UINT8_C(141), UINT8_C(175), UINT8_C( 38), UINT8_C(248), UINT8_C( 9), UINT8_C( 4), UINT8_C(199), UINT8_C( 40), UINT8_C(185), UINT8_C(144), UINT8_C( 46), UINT8_C( 14), UINT8_C( 90), UINT8_C(214), UINT8_C( 91), UINT8_C( 56), UINT8_C( 35), UINT8_C(120), UINT8_C(228), UINT8_C( 66), UINT8_C( 89), UINT8_C(196), UINT8_C(150), UINT8_C(233), UINT8_C(114), UINT8_C(153)), simde_x_mm256_set_epu8(UINT8_C(129), UINT8_C(186), UINT8_C(163), UINT8_C(213), UINT8_C( 50), UINT8_C( 33), UINT8_C(141), UINT8_C(175), UINT8_C( 57), UINT8_C(248), UINT8_C(206), UINT8_C( 78), UINT8_C(199), UINT8_C( 87), UINT8_C(185), UINT8_C(144), UINT8_C(239), UINT8_C(167), UINT8_C( 90), UINT8_C(218), UINT8_C(142), UINT8_C( 56), UINT8_C( 41), UINT8_C(120), UINT8_C(230), UINT8_C(150), UINT8_C( 89), UINT8_C(237), UINT8_C(150), UINT8_C(233), UINT8_C(114), UINT8_C(153)) }, { simde_x_mm256_set_epu8(UINT8_C( 80), UINT8_C( 4), UINT8_C(125), UINT8_C( 7), UINT8_C( 48), UINT8_C(207), UINT8_C( 44), UINT8_C(221), UINT8_C(215), UINT8_C( 45), UINT8_C(223), UINT8_C(194), UINT8_C(186), UINT8_C( 12), UINT8_C(145), UINT8_C(171), UINT8_C(238), UINT8_C(191), UINT8_C( 72), UINT8_C(118), UINT8_C( 66), UINT8_C(123), UINT8_C(140), UINT8_C(190), UINT8_C( 2), UINT8_C(202), UINT8_C( 43), UINT8_C( 29), UINT8_C( 54), UINT8_C( 64), UINT8_C(115), UINT8_C( 16)), simde_x_mm256_set_epu8(UINT8_C( 80), UINT8_C(158), UINT8_C(246), UINT8_C( 30), UINT8_C(178), UINT8_C(129), UINT8_C(199), UINT8_C(213), UINT8_C( 85), UINT8_C(149), UINT8_C(108), UINT8_C( 59), UINT8_C(205), UINT8_C(149), UINT8_C(119), UINT8_C(194), UINT8_C(246), UINT8_C( 94), UINT8_C(221), UINT8_C(162), UINT8_C( 94), UINT8_C(125), UINT8_C(138), UINT8_C(231), UINT8_C(191), UINT8_C( 11), UINT8_C( 15), UINT8_C( 66), UINT8_C(133), UINT8_C(100), UINT8_C( 76), UINT8_C( 29)), simde_x_mm256_set_epu8(UINT8_C( 80), UINT8_C(158), UINT8_C(246), UINT8_C( 30), UINT8_C(178), UINT8_C(207), UINT8_C(199), UINT8_C(221), UINT8_C(215), UINT8_C(149), UINT8_C(223), UINT8_C(194), UINT8_C(205), UINT8_C(149), UINT8_C(145), UINT8_C(194), UINT8_C(246), UINT8_C(191), UINT8_C(221), UINT8_C(162), UINT8_C( 94), UINT8_C(125), UINT8_C(140), UINT8_C(231), UINT8_C(191), UINT8_C(202), UINT8_C( 43), UINT8_C( 66), UINT8_C(133), UINT8_C(100), UINT8_C(115), UINT8_C( 29)) }, { simde_x_mm256_set_epu8(UINT8_C(197), UINT8_C( 85), UINT8_C( 13), UINT8_C(197), UINT8_C( 97), UINT8_C(213), UINT8_C( 19), UINT8_C(106), UINT8_C(135), UINT8_C( 2), UINT8_C(117), UINT8_C(164), UINT8_C(206), UINT8_C(103), UINT8_C( 74), UINT8_C( 88), UINT8_C(183), UINT8_C( 33), UINT8_C(103), UINT8_C(216), UINT8_C(136), UINT8_C(200), UINT8_C( 59), UINT8_C(124), UINT8_C(188), UINT8_C(115), UINT8_C(181), UINT8_C(132), UINT8_C(156), UINT8_C( 81), UINT8_C(255), UINT8_C(214)), simde_x_mm256_set_epu8(UINT8_C( 95), UINT8_C( 13), UINT8_C(157), UINT8_C(137), UINT8_C( 41), UINT8_C(108), UINT8_C( 8), UINT8_C( 29), UINT8_C( 52), UINT8_C(238), UINT8_C( 31), UINT8_C( 23), UINT8_C( 9), UINT8_C( 86), UINT8_C( 2), UINT8_C( 88), UINT8_C( 88), UINT8_C(185), UINT8_C(173), UINT8_C(108), UINT8_C(234), UINT8_C(252), UINT8_C(231), UINT8_C( 15), UINT8_C( 68), UINT8_C(138), UINT8_C( 60), UINT8_C(162), UINT8_C(131), UINT8_C(215), UINT8_C( 87), UINT8_C( 44)), simde_x_mm256_set_epu8(UINT8_C(197), UINT8_C( 85), UINT8_C(157), UINT8_C(197), UINT8_C( 97), UINT8_C(213), UINT8_C( 19), UINT8_C(106), UINT8_C(135), UINT8_C(238), UINT8_C(117), UINT8_C(164), UINT8_C(206), UINT8_C(103), UINT8_C( 74), UINT8_C( 88), UINT8_C(183), UINT8_C(185), UINT8_C(173), UINT8_C(216), UINT8_C(234), UINT8_C(252), UINT8_C(231), UINT8_C(124), UINT8_C(188), UINT8_C(138), UINT8_C(181), UINT8_C(162), UINT8_C(156), UINT8_C(215), UINT8_C(255), UINT8_C(214)) }, { simde_x_mm256_set_epu8(UINT8_C(231), UINT8_C(112), UINT8_C(155), UINT8_C( 33), UINT8_C( 64), UINT8_C(148), UINT8_C(180), UINT8_C( 63), UINT8_C( 75), UINT8_C(171), UINT8_C(170), UINT8_C(114), UINT8_C(142), UINT8_C(212), UINT8_C(162), UINT8_C(149), UINT8_C( 47), UINT8_C(201), UINT8_C( 71), UINT8_C( 17), UINT8_C(237), UINT8_C(172), UINT8_C(186), UINT8_C( 26), UINT8_C(227), UINT8_C(106), UINT8_C(215), UINT8_C(216), UINT8_C(247), UINT8_C(225), UINT8_C( 9), UINT8_C( 60)), simde_x_mm256_set_epu8(UINT8_C(136), UINT8_C( 20), UINT8_C(248), UINT8_C(149), UINT8_C( 4), UINT8_C(138), UINT8_C(203), UINT8_C( 63), UINT8_C(168), UINT8_C(148), UINT8_C(215), UINT8_C( 68), UINT8_C(209), UINT8_C(109), UINT8_C(191), UINT8_C(165), UINT8_C( 20), UINT8_C( 98), UINT8_C(113), UINT8_C(151), UINT8_C(142), UINT8_C(111), UINT8_C(162), UINT8_C(190), UINT8_C(230), UINT8_C(122), UINT8_C(213), UINT8_C(213), UINT8_C(118), UINT8_C(189), UINT8_C( 22), UINT8_C(229)), simde_x_mm256_set_epu8(UINT8_C(231), UINT8_C(112), UINT8_C(248), UINT8_C(149), UINT8_C( 64), UINT8_C(148), UINT8_C(203), UINT8_C( 63), UINT8_C(168), UINT8_C(171), UINT8_C(215), UINT8_C(114), UINT8_C(209), UINT8_C(212), UINT8_C(191), UINT8_C(165), UINT8_C( 47), UINT8_C(201), UINT8_C(113), UINT8_C(151), UINT8_C(237), UINT8_C(172), UINT8_C(186), UINT8_C(190), UINT8_C(230), UINT8_C(122), UINT8_C(215), UINT8_C(216), UINT8_C(247), UINT8_C(225), UINT8_C( 22), UINT8_C(229)) }, { simde_x_mm256_set_epu8(UINT8_C(183), UINT8_C( 9), UINT8_C( 46), UINT8_C( 70), UINT8_C( 48), UINT8_C(117), UINT8_C(202), UINT8_C(154), UINT8_C(250), UINT8_C(204), UINT8_C(191), UINT8_C( 51), UINT8_C( 37), UINT8_C( 5), UINT8_C(178), UINT8_C( 19), UINT8_C(105), UINT8_C( 57), UINT8_C( 19), UINT8_C( 60), UINT8_C( 26), UINT8_C( 52), UINT8_C(197), UINT8_C( 41), UINT8_C(112), UINT8_C(146), UINT8_C(171), UINT8_C( 51), UINT8_C( 94), UINT8_C(195), UINT8_C(226), UINT8_C(203)), simde_x_mm256_set_epu8(UINT8_C( 96), UINT8_C( 68), UINT8_C( 60), UINT8_C( 83), UINT8_C(130), UINT8_C( 56), UINT8_C(227), UINT8_C(106), UINT8_C(254), UINT8_C(175), UINT8_C(176), UINT8_C(122), UINT8_C(126), UINT8_C(122), UINT8_C(171), UINT8_C(205), UINT8_C( 85), UINT8_C(250), UINT8_C( 22), UINT8_C(153), UINT8_C(140), UINT8_C(119), UINT8_C( 56), UINT8_C(225), UINT8_C( 78), UINT8_C(180), UINT8_C(234), UINT8_C(136), UINT8_C( 47), UINT8_C(214), UINT8_C( 40), UINT8_C(206)), simde_x_mm256_set_epu8(UINT8_C(183), UINT8_C( 68), UINT8_C( 60), UINT8_C( 83), UINT8_C(130), UINT8_C(117), UINT8_C(227), UINT8_C(154), UINT8_C(254), UINT8_C(204), UINT8_C(191), UINT8_C(122), UINT8_C(126), UINT8_C(122), UINT8_C(178), UINT8_C(205), UINT8_C(105), UINT8_C(250), UINT8_C( 22), UINT8_C(153), UINT8_C(140), UINT8_C(119), UINT8_C(197), UINT8_C(225), UINT8_C(112), UINT8_C(180), UINT8_C(234), UINT8_C(136), UINT8_C( 94), UINT8_C(214), UINT8_C(226), UINT8_C(206)) }, { simde_x_mm256_set_epu8(UINT8_C( 93), UINT8_C(234), UINT8_C(139), UINT8_C( 2), UINT8_C( 9), UINT8_C(232), UINT8_C( 35), UINT8_C( 78), UINT8_C(197), UINT8_C( 13), UINT8_C(224), UINT8_C( 83), UINT8_C( 37), UINT8_C(182), UINT8_C( 94), UINT8_C( 69), UINT8_C(143), UINT8_C( 54), UINT8_C(219), UINT8_C(171), UINT8_C( 22), UINT8_C(117), UINT8_C( 46), UINT8_C(238), UINT8_C( 4), UINT8_C(125), UINT8_C( 7), UINT8_C(106), UINT8_C(127), UINT8_C( 87), UINT8_C( 56), UINT8_C( 20)), simde_x_mm256_set_epu8(UINT8_C( 73), UINT8_C(103), UINT8_C( 49), UINT8_C(198), UINT8_C(212), UINT8_C(255), UINT8_C(227), UINT8_C( 52), UINT8_C(122), UINT8_C( 22), UINT8_C(213), UINT8_C(204), UINT8_C(103), UINT8_C( 93), UINT8_C( 45), UINT8_C( 45), UINT8_C(221), UINT8_C(118), UINT8_C( 73), UINT8_C( 16), UINT8_C(194), UINT8_C( 60), UINT8_C(246), UINT8_C(126), UINT8_C( 84), UINT8_C(155), UINT8_C(128), UINT8_C( 83), UINT8_C( 5), UINT8_C( 37), UINT8_C(157), UINT8_C(132)), simde_x_mm256_set_epu8(UINT8_C( 93), UINT8_C(234), UINT8_C(139), UINT8_C(198), UINT8_C(212), UINT8_C(255), UINT8_C(227), UINT8_C( 78), UINT8_C(197), UINT8_C( 22), UINT8_C(224), UINT8_C(204), UINT8_C(103), UINT8_C(182), UINT8_C( 94), UINT8_C( 69), UINT8_C(221), UINT8_C(118), UINT8_C(219), UINT8_C(171), UINT8_C(194), UINT8_C(117), UINT8_C(246), UINT8_C(238), UINT8_C( 84), UINT8_C(155), UINT8_C(128), UINT8_C(106), UINT8_C(127), UINT8_C( 87), UINT8_C(157), UINT8_C(132)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_max_epu8(test_vec[i].a, test_vec[i].b); simde_assert_m256i_u8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_max_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi16(INT16_C( 13945), INT16_C(-32397), INT16_C( -8500), INT16_C(-16936), INT16_C( -615), INT16_C( 28879), INT16_C( 19952), INT16_C(-20844), INT16_C(-18762), INT16_C( 23311), INT16_C(-22090), INT16_C( 16355), INT16_C( 18939), INT16_C( -9393), INT16_C( 19655), INT16_C( 25483)), simde_mm256_set_epi16(INT16_C(-30076), INT16_C(-27428), INT16_C(-24061), INT16_C( -9620), INT16_C(-17974), INT16_C( 3150), INT16_C( 6986), INT16_C(-23891), INT16_C( 12942), INT16_C( 12645), INT16_C(-13904), INT16_C( -868), INT16_C(-14591), INT16_C( 24935), INT16_C( 4654), INT16_C( 4591)), simde_mm256_set_epi16(INT16_C( 13945), INT16_C(-27428), INT16_C( -8500), INT16_C( -9620), INT16_C( -615), INT16_C( 28879), INT16_C( 19952), INT16_C(-20844), INT16_C( 12942), INT16_C( 23311), INT16_C(-13904), INT16_C( 16355), INT16_C( 18939), INT16_C( 24935), INT16_C( 19655), INT16_C( 25483)) }, { simde_mm256_set_epi16(INT16_C( 15746), INT16_C( 16476), INT16_C(-26892), INT16_C( 15551), INT16_C( 4802), INT16_C( 22020), INT16_C( 7684), INT16_C( 20433), INT16_C(-15213), INT16_C( -9067), INT16_C( 11028), INT16_C( 2768), INT16_C( 19036), INT16_C(-29021), INT16_C( 27796), INT16_C(-20181)), simde_mm256_set_epi16(INT16_C(-15017), INT16_C(-10120), INT16_C(-23789), INT16_C(-21730), INT16_C( 16078), INT16_C( 3607), INT16_C(-18668), INT16_C(-12209), INT16_C( -562), INT16_C( 30247), INT16_C( 20324), INT16_C( -8924), INT16_C( 24524), INT16_C( -5426), INT16_C( 25503), INT16_C( 2215)), simde_mm256_set_epi16(INT16_C( 15746), INT16_C( 16476), INT16_C(-23789), INT16_C( 15551), INT16_C( 16078), INT16_C( 22020), INT16_C( 7684), INT16_C( 20433), INT16_C( -562), INT16_C( 30247), INT16_C( 20324), INT16_C( 2768), INT16_C( 24524), INT16_C( -5426), INT16_C( 27796), INT16_C( 2215)) }, { simde_mm256_set_epi16(INT16_C( 9558), INT16_C( 24602), INT16_C(-11854), INT16_C( 13316), INT16_C( -7111), INT16_C( 5322), INT16_C( 10474), INT16_C( 14309), INT16_C( 10175), INT16_C(-23285), INT16_C( -7105), INT16_C(-18408), INT16_C( -9456), INT16_C(-31797), INT16_C( 6677), INT16_C( 24246)), simde_mm256_set_epi16(INT16_C( 8353), INT16_C(-12225), INT16_C( 10989), INT16_C( 20160), INT16_C(-24928), INT16_C( 11285), INT16_C(-21080), INT16_C(-21637), INT16_C( -557), INT16_C(-15431), INT16_C(-14247), INT16_C( 1813), INT16_C( -7571), INT16_C( 22502), INT16_C( 12550), INT16_C( 3083)), simde_mm256_set_epi16(INT16_C( 9558), INT16_C( 24602), INT16_C( 10989), INT16_C( 20160), INT16_C( -7111), INT16_C( 11285), INT16_C( 10474), INT16_C( 14309), INT16_C( 10175), INT16_C(-15431), INT16_C( -7105), INT16_C( 1813), INT16_C( -7571), INT16_C( 22502), INT16_C( 12550), INT16_C( 24246)) }, { simde_mm256_set_epi16(INT16_C( 25542), INT16_C(-21715), INT16_C(-12723), INT16_C( 3641), INT16_C( 10626), INT16_C( 1975), INT16_C( 32038), INT16_C( 13257), INT16_C( 26030), INT16_C(-29459), INT16_C( 27784), INT16_C(-29348), INT16_C( 24986), INT16_C( 19201), INT16_C(-21133), INT16_C( -544)), simde_mm256_set_epi16(INT16_C(-15360), INT16_C( -9091), INT16_C( 9964), INT16_C(-26119), INT16_C(-13193), INT16_C( -4473), INT16_C(-16910), INT16_C( -8355), INT16_C( 29088), INT16_C( 26903), INT16_C( 19970), INT16_C( 20929), INT16_C( 21866), INT16_C( -554), INT16_C(-31252), INT16_C(-27281)), simde_mm256_set_epi16(INT16_C( 25542), INT16_C( -9091), INT16_C( 9964), INT16_C( 3641), INT16_C( 10626), INT16_C( 1975), INT16_C( 32038), INT16_C( 13257), INT16_C( 29088), INT16_C( 26903), INT16_C( 27784), INT16_C( 20929), INT16_C( 24986), INT16_C( 19201), INT16_C(-21133), INT16_C( -544)) }, { simde_mm256_set_epi16(INT16_C(-28957), INT16_C( 7788), INT16_C(-11350), INT16_C( 29385), INT16_C( -7207), INT16_C(-21363), INT16_C(-10963), INT16_C(-23177), INT16_C( 22883), INT16_C(-14765), INT16_C( 17155), INT16_C(-19202), INT16_C(-32343), INT16_C( 8167), INT16_C( 17107), INT16_C(-17194)), simde_mm256_set_epi16(INT16_C(-21587), INT16_C( 21036), INT16_C( 16036), INT16_C(-20844), INT16_C( 2600), INT16_C(-15232), INT16_C( 4125), INT16_C( 22708), INT16_C(-29000), INT16_C(-22285), INT16_C( 8338), INT16_C( 25191), INT16_C( 5401), INT16_C(-28508), INT16_C(-16518), INT16_C(-23781)), simde_mm256_set_epi16(INT16_C(-21587), INT16_C( 21036), INT16_C( 16036), INT16_C( 29385), INT16_C( 2600), INT16_C(-15232), INT16_C( 4125), INT16_C( 22708), INT16_C( 22883), INT16_C(-14765), INT16_C( 17155), INT16_C( 25191), INT16_C( 5401), INT16_C( 8167), INT16_C( 17107), INT16_C(-17194)) }, { simde_mm256_set_epi16(INT16_C( 26823), INT16_C( 30422), INT16_C(-10399), INT16_C( 6469), INT16_C(-22608), INT16_C( 3602), INT16_C( 24356), INT16_C( -5312), INT16_C( 22553), INT16_C( 4057), INT16_C( 16984), INT16_C( 14642), INT16_C( -4725), INT16_C(-13132), INT16_C(-32486), INT16_C( -9795)), simde_mm256_set_epi16(INT16_C(-16670), INT16_C( 11037), INT16_C(-12867), INT16_C( -9733), INT16_C(-19057), INT16_C( 28626), INT16_C( 27349), INT16_C( 21991), INT16_C(-17860), INT16_C( -7267), INT16_C( 21708), INT16_C( 27219), INT16_C( 3344), INT16_C(-13000), INT16_C( -5702), INT16_C( 8873)), simde_mm256_set_epi16(INT16_C( 26823), INT16_C( 30422), INT16_C(-10399), INT16_C( 6469), INT16_C(-19057), INT16_C( 28626), INT16_C( 27349), INT16_C( 21991), INT16_C( 22553), INT16_C( 4057), INT16_C( 21708), INT16_C( 27219), INT16_C( 3344), INT16_C(-13000), INT16_C( -5702), INT16_C( 8873)) }, { simde_mm256_set_epi16(INT16_C( 28327), INT16_C( 28059), INT16_C(-12455), INT16_C(-22892), INT16_C( 9516), INT16_C(-23091), INT16_C( 28067), INT16_C( 29028), INT16_C(-24610), INT16_C(-13303), INT16_C( -3871), INT16_C( 25753), INT16_C(-19764), INT16_C(-10115), INT16_C( -9313), INT16_C(-10807)), simde_mm256_set_epi16(INT16_C( 1267), INT16_C( -7929), INT16_C( 29505), INT16_C( 22745), INT16_C( 1801), INT16_C( -829), INT16_C( 2659), INT16_C( 24204), INT16_C( 27266), INT16_C(-10805), INT16_C( 7071), INT16_C( 12404), INT16_C( 27748), INT16_C( 11490), INT16_C(-10130), INT16_C( 7320)), simde_mm256_set_epi16(INT16_C( 28327), INT16_C( 28059), INT16_C( 29505), INT16_C( 22745), INT16_C( 9516), INT16_C( -829), INT16_C( 28067), INT16_C( 29028), INT16_C( 27266), INT16_C(-10805), INT16_C( 7071), INT16_C( 25753), INT16_C( 27748), INT16_C( 11490), INT16_C( -9313), INT16_C( 7320)) }, { simde_mm256_set_epi16(INT16_C(-28396), INT16_C( 3489), INT16_C( 18687), INT16_C( 31879), INT16_C( 15536), INT16_C(-17843), INT16_C( 25087), INT16_C(-27693), INT16_C(-17170), INT16_C( 22369), INT16_C(-27360), INT16_C( -5829), INT16_C(-28550), INT16_C( 650), INT16_C(-11446), INT16_C( 3172)), simde_mm256_set_epi16(INT16_C(-17057), INT16_C(-18950), INT16_C( 15210), INT16_C(-25469), INT16_C(-29475), INT16_C( 24211), INT16_C(-14409), INT16_C( -9847), INT16_C( 14530), INT16_C(-24184), INT16_C(-29404), INT16_C( 9879), INT16_C( 2544), INT16_C(-17943), INT16_C( 7392), INT16_C( 21259)), simde_mm256_set_epi16(INT16_C(-17057), INT16_C( 3489), INT16_C( 18687), INT16_C( 31879), INT16_C( 15536), INT16_C( 24211), INT16_C( 25087), INT16_C( -9847), INT16_C( 14530), INT16_C( 22369), INT16_C(-27360), INT16_C( 9879), INT16_C( 2544), INT16_C( 650), INT16_C( 7392), INT16_C( 21259)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_max_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_max_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C(-1578701412), INT32_C(-1861943275), INT32_C( 1717826073), INT32_C( -689858277), INT32_C(-2120069619), INT32_C( -269745295), INT32_C( 993893699), INT32_C( 1747535129)), simde_mm256_set_epi32(INT32_C(-1662415513), INT32_C( 528745592), INT32_C( -219858588), INT32_C( 622357704), INT32_C(-2013314779), INT32_C(-1188086430), INT32_C( 1171120200), INT32_C( 259652605)), simde_mm256_set_epi32(INT32_C(-1578701412), INT32_C( 528745592), INT32_C( 1717826073), INT32_C( 622357704), INT32_C(-2013314779), INT32_C( -269745295), INT32_C( 1171120200), INT32_C( 1747535129)) }, { simde_mm256_set_epi32(INT32_C( 1892880717), INT32_C( 489135272), INT32_C( 1334433155), INT32_C( 820950025), INT32_C( 875780372), INT32_C( 1165542940), INT32_C(-1922521413), INT32_C( 1603602866)), simde_mm256_set_epi32(INT32_C( 1774078122), INT32_C( 717307143), INT32_C( 1385706638), INT32_C( 1722931608), INT32_C( 722989282), INT32_C( 346685962), INT32_C( 888351034), INT32_C(-1040558861)), simde_mm256_set_epi32(INT32_C( 1892880717), INT32_C( 717307143), INT32_C( 1385706638), INT32_C( 1722931608), INT32_C( 875780372), INT32_C( 1165542940), INT32_C( 888351034), INT32_C( 1603602866)) }, { simde_mm256_set_epi32(INT32_C( 542527818), INT32_C( 704072326), INT32_C(-1102544845), INT32_C( 1444592706), INT32_C(-1925092178), INT32_C( -204731801), INT32_C( 775742710), INT32_C( 1177505754)), simde_mm256_set_epi32(INT32_C( -551142491), INT32_C( 444795418), INT32_C( -825745617), INT32_C(-1787304548), INT32_C(-1241777147), INT32_C( 265612525), INT32_C( 2134936507), INT32_C(-1780599144)), simde_mm256_set_epi32(INT32_C( 542527818), INT32_C( 704072326), INT32_C( -825745617), INT32_C( 1444592706), INT32_C(-1241777147), INT32_C( 265612525), INT32_C( 2134936507), INT32_C( 1177505754)) }, { simde_mm256_set_epi32(INT32_C( 788610578), INT32_C( 1831830497), INT32_C( -755374494), INT32_C( 2130364415), INT32_C( -342185910), INT32_C( 854700402), INT32_C( 1343385181), INT32_C( 891289886)), simde_mm256_set_epi32(INT32_C( 1917738489), INT32_C( 690751883), INT32_C(-1037858966), INT32_C(-1567909551), INT32_C( -26783282), INT32_C( -715472333), INT32_C( -713074037), INT32_C( -17697982)), simde_mm256_set_epi32(INT32_C( 1917738489), INT32_C( 1831830497), INT32_C( -755374494), INT32_C( 2130364415), INT32_C( -26783282), INT32_C( 854700402), INT32_C( 1343385181), INT32_C( 891289886)) }, { simde_mm256_set_epi32(INT32_C( 1796568981), INT32_C(-1846046069), INT32_C(-1495880353), INT32_C(-1105562137), INT32_C(-2000119429), INT32_C( 450352139), INT32_C( -734796291), INT32_C(-1851159287)), simde_mm256_set_epi32(INT32_C( 1076027923), INT32_C( 1999677975), INT32_C( -498539521), INT32_C(-1166856281), INT32_C(-1611419248), INT32_C(-1268341170), INT32_C(-1115547457), INT32_C( 1554907000)), simde_mm256_set_epi32(INT32_C( 1796568981), INT32_C( 1999677975), INT32_C( -498539521), INT32_C(-1105562137), INT32_C(-1611419248), INT32_C( 450352139), INT32_C( -734796291), INT32_C( 1554907000)) }, { simde_mm256_set_epi32(INT32_C( -263364521), INT32_C( 1666932430), INT32_C( 378039954), INT32_C( 1866502452), INT32_C( -756222443), INT32_C( -752660448), INT32_C( 1087715357), INT32_C( 1808069656)), simde_mm256_set_epi32(INT32_C( 10375777), INT32_C( 958545984), INT32_C(-1916055393), INT32_C(-1450264731), INT32_C( -550755823), INT32_C( 2131394316), INT32_C( 243861812), INT32_C( 1031114919)), simde_mm256_set_epi32(INT32_C( 10375777), INT32_C( 1666932430), INT32_C( 378039954), INT32_C( 1866502452), INT32_C( -550755823), INT32_C( 2131394316), INT32_C( 1087715357), INT32_C( 1808069656)) }, { simde_mm256_set_epi32(INT32_C(-1075159077), INT32_C( -789508054), INT32_C(-2102436600), INT32_C( 177892995), INT32_C(-1910430929), INT32_C( -135620958), INT32_C( 1899951190), INT32_C(-1286116105)), simde_mm256_set_epi32(INT32_C(-1117240644), INT32_C(-1011771686), INT32_C(-1764247251), INT32_C( -953836385), INT32_C(-1633093106), INT32_C( 1815106343), INT32_C( 1418749534), INT32_C( 1718021188)), simde_mm256_set_epi32(INT32_C(-1075159077), INT32_C( -789508054), INT32_C(-1764247251), INT32_C( 177892995), INT32_C(-1633093106), INT32_C( 1815106343), INT32_C( 1899951190), INT32_C( 1718021188)) }, { simde_mm256_set_epi32(INT32_C( -585726505), INT32_C( -735532451), INT32_C( 1572773329), INT32_C(-1610167093), INT32_C( 934479765), INT32_C( 1726304740), INT32_C(-1433078949), INT32_C(-1056217637)), simde_mm256_set_epi32(INT32_C( 201330788), INT32_C( 1825214883), INT32_C( 489777084), INT32_C( -824102072), INT32_C(-1255028012), INT32_C(-1300324544), INT32_C(-1269112569), INT32_C( -124131174)), simde_mm256_set_epi32(INT32_C( 201330788), INT32_C( 1825214883), INT32_C( 1572773329), INT32_C( -824102072), INT32_C( 934479765), INT32_C( 1726304740), INT32_C(-1269112569), INT32_C( -124131174)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_max_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_min_epu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_x_mm256_set_epu8(UINT8_C( 83), UINT8_C( 77), UINT8_C(142), UINT8_C(252), UINT8_C( 19), UINT8_C( 26), UINT8_C(193), UINT8_C( 92), UINT8_C(253), UINT8_C(183), UINT8_C(225), UINT8_C(205), UINT8_C(165), UINT8_C( 19), UINT8_C(154), UINT8_C(158), UINT8_C(126), UINT8_C(221), UINT8_C(206), UINT8_C( 22), UINT8_C(217), UINT8_C(166), UINT8_C(237), UINT8_C(161), UINT8_C(207), UINT8_C( 26), UINT8_C( 36), UINT8_C( 78), UINT8_C( 55), UINT8_C(167), UINT8_C( 36), UINT8_C(198)), simde_x_mm256_set_epu8(UINT8_C(203), UINT8_C( 44), UINT8_C( 47), UINT8_C( 51), UINT8_C(185), UINT8_C( 39), UINT8_C(221), UINT8_C( 75), UINT8_C(182), UINT8_C(100), UINT8_C(238), UINT8_C(172), UINT8_C(248), UINT8_C(164), UINT8_C( 91), UINT8_C(138), UINT8_C(128), UINT8_C( 29), UINT8_C(136), UINT8_C(164), UINT8_C( 43), UINT8_C( 85), UINT8_C(168), UINT8_C(105), UINT8_C(240), UINT8_C(205), UINT8_C(221), UINT8_C(217), UINT8_C(244), UINT8_C(220), UINT8_C(102), UINT8_C( 37)), simde_x_mm256_set_epu8(UINT8_C( 83), UINT8_C( 44), UINT8_C( 47), UINT8_C( 51), UINT8_C( 19), UINT8_C( 26), UINT8_C(193), UINT8_C( 75), UINT8_C(182), UINT8_C(100), UINT8_C(225), UINT8_C(172), UINT8_C(165), UINT8_C( 19), UINT8_C( 91), UINT8_C(138), UINT8_C(126), UINT8_C( 29), UINT8_C(136), UINT8_C( 22), UINT8_C( 43), UINT8_C( 85), UINT8_C(168), UINT8_C(105), UINT8_C(207), UINT8_C( 26), UINT8_C( 36), UINT8_C( 78), UINT8_C( 55), UINT8_C(167), UINT8_C( 36), UINT8_C( 37)) }, { simde_x_mm256_set_epu8(UINT8_C(177), UINT8_C(210), UINT8_C( 28), UINT8_C(116), UINT8_C(174), UINT8_C(162), UINT8_C(241), UINT8_C( 21), UINT8_C(126), UINT8_C(202), UINT8_C(250), UINT8_C(169), UINT8_C( 43), UINT8_C(239), UINT8_C(224), UINT8_C(202), UINT8_C(208), UINT8_C( 48), UINT8_C(132), UINT8_C( 78), UINT8_C( 59), UINT8_C( 2), UINT8_C(213), UINT8_C(228), UINT8_C( 42), UINT8_C( 45), UINT8_C(244), UINT8_C( 41), UINT8_C( 49), UINT8_C( 48), UINT8_C(111), UINT8_C(211)), simde_x_mm256_set_epu8(UINT8_C(114), UINT8_C( 33), UINT8_C(210), UINT8_C(232), UINT8_C(128), UINT8_C(122), UINT8_C(216), UINT8_C(228), UINT8_C( 91), UINT8_C( 37), UINT8_C(119), UINT8_C(203), UINT8_C(236), UINT8_C( 24), UINT8_C(170), UINT8_C(120), UINT8_C( 95), UINT8_C(231), UINT8_C(152), UINT8_C(143), UINT8_C( 42), UINT8_C( 38), UINT8_C(240), UINT8_C(125), UINT8_C(124), UINT8_C(251), UINT8_C(118), UINT8_C( 44), UINT8_C( 68), UINT8_C( 42), UINT8_C( 54), UINT8_C(185)), simde_x_mm256_set_epu8(UINT8_C(114), UINT8_C( 33), UINT8_C( 28), UINT8_C(116), UINT8_C(128), UINT8_C(122), UINT8_C(216), UINT8_C( 21), UINT8_C( 91), UINT8_C( 37), UINT8_C(119), UINT8_C(169), UINT8_C( 43), UINT8_C( 24), UINT8_C(170), UINT8_C(120), UINT8_C( 95), UINT8_C( 48), UINT8_C(132), UINT8_C( 78), UINT8_C( 42), UINT8_C( 2), UINT8_C(213), UINT8_C(125), UINT8_C( 42), UINT8_C( 45), UINT8_C(118), UINT8_C( 41), UINT8_C( 49), UINT8_C( 42), UINT8_C( 54), UINT8_C(185)) }, { simde_x_mm256_set_epu8(UINT8_C( 78), UINT8_C( 16), UINT8_C( 42), UINT8_C(217), UINT8_C( 5), UINT8_C( 96), UINT8_C( 7), UINT8_C( 83), UINT8_C( 16), UINT8_C(208), UINT8_C( 1), UINT8_C(207), UINT8_C( 75), UINT8_C(207), UINT8_C(252), UINT8_C( 92), UINT8_C(122), UINT8_C( 24), UINT8_C( 32), UINT8_C(104), UINT8_C( 88), UINT8_C( 59), UINT8_C(218), UINT8_C( 28), UINT8_C( 51), UINT8_C( 64), UINT8_C( 65), UINT8_C( 92), UINT8_C( 24), UINT8_C( 83), UINT8_C(248), UINT8_C(243)), simde_x_mm256_set_epu8(UINT8_C(167), UINT8_C( 87), UINT8_C( 75), UINT8_C( 10), UINT8_C( 44), UINT8_C(173), UINT8_C(210), UINT8_C(150), UINT8_C(187), UINT8_C(229), UINT8_C(246), UINT8_C( 10), UINT8_C( 80), UINT8_C( 74), UINT8_C(174), UINT8_C(128), UINT8_C( 64), UINT8_C(160), UINT8_C(126), UINT8_C(231), UINT8_C(215), UINT8_C( 80), UINT8_C(115), UINT8_C( 66), UINT8_C(204), UINT8_C( 18), UINT8_C(158), UINT8_C(133), UINT8_C(170), UINT8_C(119), UINT8_C(216), UINT8_C(196)), simde_x_mm256_set_epu8(UINT8_C( 78), UINT8_C( 16), UINT8_C( 42), UINT8_C( 10), UINT8_C( 5), UINT8_C( 96), UINT8_C( 7), UINT8_C( 83), UINT8_C( 16), UINT8_C(208), UINT8_C( 1), UINT8_C( 10), UINT8_C( 75), UINT8_C( 74), UINT8_C(174), UINT8_C( 92), UINT8_C( 64), UINT8_C( 24), UINT8_C( 32), UINT8_C(104), UINT8_C( 88), UINT8_C( 59), UINT8_C(115), UINT8_C( 28), UINT8_C( 51), UINT8_C( 18), UINT8_C( 65), UINT8_C( 92), UINT8_C( 24), UINT8_C( 83), UINT8_C(216), UINT8_C(196)) }, { simde_x_mm256_set_epu8(UINT8_C( 4), UINT8_C(145), UINT8_C( 75), UINT8_C( 42), UINT8_C( 60), UINT8_C(146), UINT8_C(158), UINT8_C( 3), UINT8_C( 57), UINT8_C(210), UINT8_C( 14), UINT8_C(106), UINT8_C( 96), UINT8_C(136), UINT8_C( 82), UINT8_C( 54), UINT8_C(227), UINT8_C(202), UINT8_C( 35), UINT8_C(184), UINT8_C( 41), UINT8_C(205), UINT8_C(162), UINT8_C(206), UINT8_C(247), UINT8_C( 69), UINT8_C(139), UINT8_C(186), UINT8_C( 13), UINT8_C(119), UINT8_C( 79), UINT8_C( 53)), simde_x_mm256_set_epu8(UINT8_C(239), UINT8_C(239), UINT8_C( 37), UINT8_C(187), UINT8_C(237), UINT8_C(203), UINT8_C( 68), UINT8_C( 35), UINT8_C(147), UINT8_C(109), UINT8_C(244), UINT8_C(194), UINT8_C(131), UINT8_C(189), UINT8_C( 51), UINT8_C( 84), UINT8_C( 48), UINT8_C( 26), UINT8_C(203), UINT8_C( 35), UINT8_C(128), UINT8_C( 76), UINT8_C( 16), UINT8_C( 52), UINT8_C(103), UINT8_C(192), UINT8_C(105), UINT8_C( 65), UINT8_C(117), UINT8_C(100), UINT8_C( 71), UINT8_C(124)), simde_x_mm256_set_epu8(UINT8_C( 4), UINT8_C(145), UINT8_C( 37), UINT8_C( 42), UINT8_C( 60), UINT8_C(146), UINT8_C( 68), UINT8_C( 3), UINT8_C( 57), UINT8_C(109), UINT8_C( 14), UINT8_C(106), UINT8_C( 96), UINT8_C(136), UINT8_C( 51), UINT8_C( 54), UINT8_C( 48), UINT8_C( 26), UINT8_C( 35), UINT8_C( 35), UINT8_C( 41), UINT8_C( 76), UINT8_C( 16), UINT8_C( 52), UINT8_C(103), UINT8_C( 69), UINT8_C(105), UINT8_C( 65), UINT8_C( 13), UINT8_C(100), UINT8_C( 71), UINT8_C( 53)) }, { simde_x_mm256_set_epu8(UINT8_C(144), UINT8_C(140), UINT8_C(121), UINT8_C(161), UINT8_C( 2), UINT8_C( 56), UINT8_C(102), UINT8_C(220), UINT8_C(246), UINT8_C( 67), UINT8_C( 19), UINT8_C( 67), UINT8_C( 18), UINT8_C(117), UINT8_C(155), UINT8_C( 84), UINT8_C( 3), UINT8_C(204), UINT8_C( 72), UINT8_C( 34), UINT8_C(218), UINT8_C(208), UINT8_C( 99), UINT8_C( 27), UINT8_C(224), UINT8_C( 99), UINT8_C(119), UINT8_C(221), UINT8_C( 98), UINT8_C(202), UINT8_C(146), UINT8_C( 18)), simde_x_mm256_set_epu8(UINT8_C(127), UINT8_C(108), UINT8_C( 76), UINT8_C(146), UINT8_C(206), UINT8_C(221), UINT8_C(206), UINT8_C(168), UINT8_C( 11), UINT8_C(166), UINT8_C(228), UINT8_C(131), UINT8_C( 46), UINT8_C( 42), UINT8_C(192), UINT8_C(117), UINT8_C(166), UINT8_C(151), UINT8_C(189), UINT8_C(138), UINT8_C(121), UINT8_C( 19), UINT8_C( 89), UINT8_C(188), UINT8_C(255), UINT8_C(104), UINT8_C(124), UINT8_C(206), UINT8_C( 89), UINT8_C(160), UINT8_C(162), UINT8_C( 10)), simde_x_mm256_set_epu8(UINT8_C(127), UINT8_C(108), UINT8_C( 76), UINT8_C(146), UINT8_C( 2), UINT8_C( 56), UINT8_C(102), UINT8_C(168), UINT8_C( 11), UINT8_C( 67), UINT8_C( 19), UINT8_C( 67), UINT8_C( 18), UINT8_C( 42), UINT8_C(155), UINT8_C( 84), UINT8_C( 3), UINT8_C(151), UINT8_C( 72), UINT8_C( 34), UINT8_C(121), UINT8_C( 19), UINT8_C( 89), UINT8_C( 27), UINT8_C(224), UINT8_C( 99), UINT8_C(119), UINT8_C(206), UINT8_C( 89), UINT8_C(160), UINT8_C(146), UINT8_C( 10)) }, { simde_x_mm256_set_epu8(UINT8_C( 74), UINT8_C(143), UINT8_C( 19), UINT8_C( 8), UINT8_C( 11), UINT8_C(124), UINT8_C( 76), UINT8_C( 6), UINT8_C(148), UINT8_C( 67), UINT8_C(224), UINT8_C(163), UINT8_C(113), UINT8_C(245), UINT8_C( 59), UINT8_C( 27), UINT8_C(131), UINT8_C(161), UINT8_C(251), UINT8_C(125), UINT8_C(201), UINT8_C(252), UINT8_C(140), UINT8_C( 24), UINT8_C(254), UINT8_C(183), UINT8_C(205), UINT8_C(238), UINT8_C(180), UINT8_C( 21), UINT8_C( 59), UINT8_C(174)), simde_x_mm256_set_epu8(UINT8_C( 98), UINT8_C(152), UINT8_C(212), UINT8_C(148), UINT8_C(183), UINT8_C( 37), UINT8_C(170), UINT8_C( 93), UINT8_C( 52), UINT8_C(182), UINT8_C(181), UINT8_C(242), UINT8_C(229), UINT8_C(182), UINT8_C(143), UINT8_C( 43), UINT8_C(177), UINT8_C(115), UINT8_C(177), UINT8_C(170), UINT8_C( 71), UINT8_C(222), UINT8_C(162), UINT8_C(198), UINT8_C(195), UINT8_C(222), UINT8_C( 10), UINT8_C( 86), UINT8_C( 48), UINT8_C(215), UINT8_C( 16), UINT8_C( 69)), simde_x_mm256_set_epu8(UINT8_C( 74), UINT8_C(143), UINT8_C( 19), UINT8_C( 8), UINT8_C( 11), UINT8_C( 37), UINT8_C( 76), UINT8_C( 6), UINT8_C( 52), UINT8_C( 67), UINT8_C(181), UINT8_C(163), UINT8_C(113), UINT8_C(182), UINT8_C( 59), UINT8_C( 27), UINT8_C(131), UINT8_C(115), UINT8_C(177), UINT8_C(125), UINT8_C( 71), UINT8_C(222), UINT8_C(140), UINT8_C( 24), UINT8_C(195), UINT8_C(183), UINT8_C( 10), UINT8_C( 86), UINT8_C( 48), UINT8_C( 21), UINT8_C( 16), UINT8_C( 69)) }, { simde_x_mm256_set_epu8(UINT8_C(119), UINT8_C(117), UINT8_C( 13), UINT8_C(207), UINT8_C( 70), UINT8_C(197), UINT8_C(152), UINT8_C( 89), UINT8_C(135), UINT8_C(159), UINT8_C( 50), UINT8_C(184), UINT8_C(155), UINT8_C( 62), UINT8_C(253), UINT8_C(248), UINT8_C(240), UINT8_C(207), UINT8_C( 53), UINT8_C(202), UINT8_C(203), UINT8_C(241), UINT8_C(218), UINT8_C(118), UINT8_C(121), UINT8_C(140), UINT8_C(125), UINT8_C( 65), UINT8_C( 70), UINT8_C(249), UINT8_C(217), UINT8_C(237)), simde_x_mm256_set_epu8(UINT8_C(125), UINT8_C( 67), UINT8_C( 52), UINT8_C( 8), UINT8_C(207), UINT8_C(205), UINT8_C( 56), UINT8_C(196), UINT8_C( 42), UINT8_C(240), UINT8_C(116), UINT8_C( 44), UINT8_C(109), UINT8_C(201), UINT8_C(220), UINT8_C(182), UINT8_C(224), UINT8_C( 99), UINT8_C(151), UINT8_C(222), UINT8_C(220), UINT8_C(252), UINT8_C( 6), UINT8_C(245), UINT8_C(215), UINT8_C(221), UINT8_C(177), UINT8_C(240), UINT8_C(118), UINT8_C(155), UINT8_C(143), UINT8_C(240)), simde_x_mm256_set_epu8(UINT8_C(119), UINT8_C( 67), UINT8_C( 13), UINT8_C( 8), UINT8_C( 70), UINT8_C(197), UINT8_C( 56), UINT8_C( 89), UINT8_C( 42), UINT8_C(159), UINT8_C( 50), UINT8_C( 44), UINT8_C(109), UINT8_C( 62), UINT8_C(220), UINT8_C(182), UINT8_C(224), UINT8_C( 99), UINT8_C( 53), UINT8_C(202), UINT8_C(203), UINT8_C(241), UINT8_C( 6), UINT8_C(118), UINT8_C(121), UINT8_C(140), UINT8_C(125), UINT8_C( 65), UINT8_C( 70), UINT8_C(155), UINT8_C(143), UINT8_C(237)) }, { simde_x_mm256_set_epu8(UINT8_C(201), UINT8_C(240), UINT8_C( 36), UINT8_C( 35), UINT8_C(236), UINT8_C( 73), UINT8_C( 29), UINT8_C(244), UINT8_C(140), UINT8_C( 36), UINT8_C(200), UINT8_C(155), UINT8_C( 8), UINT8_C(245), UINT8_C( 10), UINT8_C( 93), UINT8_C(124), UINT8_C(125), UINT8_C( 25), UINT8_C(192), UINT8_C( 32), UINT8_C(119), UINT8_C(142), UINT8_C(147), UINT8_C( 56), UINT8_C( 66), UINT8_C(172), UINT8_C(214), UINT8_C(227), UINT8_C(203), UINT8_C( 62), UINT8_C( 37)), simde_x_mm256_set_epu8(UINT8_C( 5), UINT8_C( 64), UINT8_C(205), UINT8_C(118), UINT8_C( 53), UINT8_C( 5), UINT8_C( 28), UINT8_C( 17), UINT8_C( 93), UINT8_C(223), UINT8_C( 18), UINT8_C( 93), UINT8_C(210), UINT8_C(158), UINT8_C( 37), UINT8_C( 66), UINT8_C(184), UINT8_C(142), UINT8_C(246), UINT8_C( 15), UINT8_C(153), UINT8_C( 9), UINT8_C(121), UINT8_C(211), UINT8_C( 7), UINT8_C( 12), UINT8_C( 41), UINT8_C( 14), UINT8_C(127), UINT8_C(208), UINT8_C( 7), UINT8_C( 93)), simde_x_mm256_set_epu8(UINT8_C( 5), UINT8_C( 64), UINT8_C( 36), UINT8_C( 35), UINT8_C( 53), UINT8_C( 5), UINT8_C( 28), UINT8_C( 17), UINT8_C( 93), UINT8_C( 36), UINT8_C( 18), UINT8_C( 93), UINT8_C( 8), UINT8_C(158), UINT8_C( 10), UINT8_C( 66), UINT8_C(124), UINT8_C(125), UINT8_C( 25), UINT8_C( 15), UINT8_C( 32), UINT8_C( 9), UINT8_C(121), UINT8_C(147), UINT8_C( 7), UINT8_C( 12), UINT8_C( 41), UINT8_C( 14), UINT8_C(127), UINT8_C(203), UINT8_C( 7), UINT8_C( 37)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_min_epu8(test_vec[i].a, test_vec[i].b); simde_assert_m256i_u8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_min_epu16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_x_mm256_set_epu16(UINT16_C( 21985), UINT16_C( 37408), UINT16_C( 64559), UINT16_C( 50506), UINT16_C( 21348), UINT16_C( 50917), UINT16_C( 43331), UINT16_C( 4092), UINT16_C( 58372), UINT16_C( 9106), UINT16_C( 30380), UINT16_C( 36427), UINT16_C( 37095), UINT16_C( 38478), UINT16_C( 20272), UINT16_C( 48447)), simde_x_mm256_set_epu16(UINT16_C( 1564), UINT16_C( 15505), UINT16_C( 60508), UINT16_C( 59764), UINT16_C( 4697), UINT16_C( 47817), UINT16_C( 4510), UINT16_C( 63199), UINT16_C( 46692), UINT16_C( 14440), UINT16_C( 53716), UINT16_C( 9747), UINT16_C( 59579), UINT16_C( 43113), UINT16_C( 42711), UINT16_C( 11767)), simde_x_mm256_set_epu16(UINT16_C( 1564), UINT16_C( 15505), UINT16_C( 60508), UINT16_C( 50506), UINT16_C( 4697), UINT16_C( 47817), UINT16_C( 4510), UINT16_C( 4092), UINT16_C( 46692), UINT16_C( 9106), UINT16_C( 30380), UINT16_C( 9747), UINT16_C( 37095), UINT16_C( 38478), UINT16_C( 20272), UINT16_C( 11767)) }, { simde_x_mm256_set_epu16(UINT16_C( 19955), UINT16_C( 258), UINT16_C( 7499), UINT16_C( 10868), UINT16_C( 6000), UINT16_C( 24962), UINT16_C( 19695), UINT16_C( 32604), UINT16_C( 25690), UINT16_C( 5375), UINT16_C( 7487), UINT16_C( 63875), UINT16_C( 58580), UINT16_C( 15823), UINT16_C( 50945), UINT16_C( 25251)), simde_x_mm256_set_epu16(UINT16_C( 40574), UINT16_C( 18810), UINT16_C( 13888), UINT16_C( 49326), UINT16_C( 64223), UINT16_C( 65297), UINT16_C( 61868), UINT16_C( 28086), UINT16_C( 41020), UINT16_C( 35514), UINT16_C( 58890), UINT16_C( 3107), UINT16_C( 7813), UINT16_C( 34771), UINT16_C( 34306), UINT16_C( 41125)), simde_x_mm256_set_epu16(UINT16_C( 19955), UINT16_C( 258), UINT16_C( 7499), UINT16_C( 10868), UINT16_C( 6000), UINT16_C( 24962), UINT16_C( 19695), UINT16_C( 28086), UINT16_C( 25690), UINT16_C( 5375), UINT16_C( 7487), UINT16_C( 3107), UINT16_C( 7813), UINT16_C( 15823), UINT16_C( 34306), UINT16_C( 25251)) }, { simde_x_mm256_set_epu16(UINT16_C( 54635), UINT16_C( 31649), UINT16_C( 37203), UINT16_C( 44269), UINT16_C( 35808), UINT16_C( 48759), UINT16_C( 30288), UINT16_C( 48436), UINT16_C( 40625), UINT16_C( 61846), UINT16_C( 14756), UINT16_C( 44804), UINT16_C( 3949), UINT16_C( 42131), UINT16_C( 39125), UINT16_C( 37028)), simde_x_mm256_set_epu16(UINT16_C( 57949), UINT16_C( 27621), UINT16_C( 31226), UINT16_C( 53362), UINT16_C( 27488), UINT16_C( 47169), UINT16_C( 31030), UINT16_C( 24207), UINT16_C( 4423), UINT16_C( 14910), UINT16_C( 30727), UINT16_C( 21908), UINT16_C( 50230), UINT16_C( 46262), UINT16_C( 25828), UINT16_C( 42233)), simde_x_mm256_set_epu16(UINT16_C( 54635), UINT16_C( 27621), UINT16_C( 31226), UINT16_C( 44269), UINT16_C( 27488), UINT16_C( 47169), UINT16_C( 30288), UINT16_C( 24207), UINT16_C( 4423), UINT16_C( 14910), UINT16_C( 14756), UINT16_C( 21908), UINT16_C( 3949), UINT16_C( 42131), UINT16_C( 25828), UINT16_C( 37028)) }, { simde_x_mm256_set_epu16(UINT16_C( 20152), UINT16_C( 27686), UINT16_C( 12466), UINT16_C( 27372), UINT16_C( 44680), UINT16_C( 5041), UINT16_C( 65098), UINT16_C( 61815), UINT16_C( 46798), UINT16_C( 41939), UINT16_C( 44487), UINT16_C( 43543), UINT16_C( 48144), UINT16_C( 56243), UINT16_C( 61060), UINT16_C( 26152)), simde_x_mm256_set_epu16(UINT16_C( 5079), UINT16_C( 44741), UINT16_C( 26080), UINT16_C( 22525), UINT16_C( 28605), UINT16_C( 1946), UINT16_C( 59845), UINT16_C( 20686), UINT16_C( 33395), UINT16_C( 20058), UINT16_C( 31413), UINT16_C( 18280), UINT16_C( 56644), UINT16_C( 26935), UINT16_C( 15289), UINT16_C( 20043)), simde_x_mm256_set_epu16(UINT16_C( 5079), UINT16_C( 27686), UINT16_C( 12466), UINT16_C( 22525), UINT16_C( 28605), UINT16_C( 1946), UINT16_C( 59845), UINT16_C( 20686), UINT16_C( 33395), UINT16_C( 20058), UINT16_C( 31413), UINT16_C( 18280), UINT16_C( 48144), UINT16_C( 26935), UINT16_C( 15289), UINT16_C( 20043)) }, { simde_x_mm256_set_epu16(UINT16_C( 50822), UINT16_C( 22037), UINT16_C( 40380), UINT16_C( 27848), UINT16_C( 22453), UINT16_C( 65299), UINT16_C( 416), UINT16_C( 55389), UINT16_C( 3972), UINT16_C( 20597), UINT16_C( 3476), UINT16_C( 46123), UINT16_C( 29301), UINT16_C( 51108), UINT16_C( 35981), UINT16_C( 8905)), simde_x_mm256_set_epu16(UINT16_C( 25885), UINT16_C( 59271), UINT16_C( 16174), UINT16_C( 33039), UINT16_C( 55355), UINT16_C( 49380), UINT16_C( 36750), UINT16_C( 16476), UINT16_C( 22855), UINT16_C( 12806), UINT16_C( 3167), UINT16_C( 41606), UINT16_C( 57407), UINT16_C( 61681), UINT16_C( 28474), UINT16_C( 18472)), simde_x_mm256_set_epu16(UINT16_C( 25885), UINT16_C( 22037), UINT16_C( 16174), UINT16_C( 27848), UINT16_C( 22453), UINT16_C( 49380), UINT16_C( 416), UINT16_C( 16476), UINT16_C( 3972), UINT16_C( 12806), UINT16_C( 3167), UINT16_C( 41606), UINT16_C( 29301), UINT16_C( 51108), UINT16_C( 28474), UINT16_C( 8905)) }, { simde_x_mm256_set_epu16(UINT16_C( 38411), UINT16_C( 38062), UINT16_C( 63184), UINT16_C( 5327), UINT16_C( 2662), UINT16_C( 37879), UINT16_C( 38436), UINT16_C( 35926), UINT16_C( 37505), UINT16_C( 16059), UINT16_C( 30023), UINT16_C( 13435), UINT16_C( 55897), UINT16_C( 41272), UINT16_C( 35066), UINT16_C( 45732)), simde_x_mm256_set_epu16(UINT16_C( 4290), UINT16_C( 8244), UINT16_C( 17610), UINT16_C( 27365), UINT16_C( 41082), UINT16_C( 32808), UINT16_C( 11681), UINT16_C( 25455), UINT16_C( 5969), UINT16_C( 52658), UINT16_C( 58404), UINT16_C( 43100), UINT16_C( 23433), UINT16_C( 56801), UINT16_C( 40070), UINT16_C( 43765)), simde_x_mm256_set_epu16(UINT16_C( 4290), UINT16_C( 8244), UINT16_C( 17610), UINT16_C( 5327), UINT16_C( 2662), UINT16_C( 32808), UINT16_C( 11681), UINT16_C( 25455), UINT16_C( 5969), UINT16_C( 16059), UINT16_C( 30023), UINT16_C( 13435), UINT16_C( 23433), UINT16_C( 41272), UINT16_C( 35066), UINT16_C( 43765)) }, { simde_x_mm256_set_epu16(UINT16_C( 3920), UINT16_C( 2672), UINT16_C( 125), UINT16_C( 16695), UINT16_C( 9026), UINT16_C( 9313), UINT16_C( 36585), UINT16_C( 38441), UINT16_C( 50905), UINT16_C( 11221), UINT16_C( 28122), UINT16_C( 18875), UINT16_C( 30095), UINT16_C( 22178), UINT16_C( 51689), UINT16_C( 19916)), simde_x_mm256_set_epu16(UINT16_C( 58371), UINT16_C( 46494), UINT16_C( 62748), UINT16_C( 27388), UINT16_C( 64543), UINT16_C( 23688), UINT16_C( 42332), UINT16_C( 43841), UINT16_C( 27632), UINT16_C( 34295), UINT16_C( 21862), UINT16_C( 32789), UINT16_C( 42560), UINT16_C( 49564), UINT16_C( 10833), UINT16_C( 33089)), simde_x_mm256_set_epu16(UINT16_C( 3920), UINT16_C( 2672), UINT16_C( 125), UINT16_C( 16695), UINT16_C( 9026), UINT16_C( 9313), UINT16_C( 36585), UINT16_C( 38441), UINT16_C( 27632), UINT16_C( 11221), UINT16_C( 21862), UINT16_C( 18875), UINT16_C( 30095), UINT16_C( 22178), UINT16_C( 10833), UINT16_C( 19916)) }, { simde_x_mm256_set_epu16(UINT16_C( 32155), UINT16_C( 62138), UINT16_C( 45089), UINT16_C( 64592), UINT16_C( 57638), UINT16_C( 36363), UINT16_C( 16857), UINT16_C( 65526), UINT16_C( 4908), UINT16_C( 33576), UINT16_C( 61433), UINT16_C( 64967), UINT16_C( 30812), UINT16_C( 35007), UINT16_C( 3114), UINT16_C( 45540)), simde_x_mm256_set_epu16(UINT16_C( 46521), UINT16_C( 5351), UINT16_C( 22645), UINT16_C( 25668), UINT16_C( 14295), UINT16_C( 52574), UINT16_C( 9596), UINT16_C( 37163), UINT16_C( 27912), UINT16_C( 36566), UINT16_C( 34315), UINT16_C( 2157), UINT16_C( 51210), UINT16_C( 14248), UINT16_C( 30537), UINT16_C( 57705)), simde_x_mm256_set_epu16(UINT16_C( 32155), UINT16_C( 5351), UINT16_C( 22645), UINT16_C( 25668), UINT16_C( 14295), UINT16_C( 36363), UINT16_C( 9596), UINT16_C( 37163), UINT16_C( 4908), UINT16_C( 33576), UINT16_C( 34315), UINT16_C( 2157), UINT16_C( 30812), UINT16_C( 14248), UINT16_C( 3114), UINT16_C( 45540)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_min_epu16(test_vec[i].a, test_vec[i].b); simde_assert_m256i_u16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_min_epu32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_x_mm256_set_epu32(UINT32_C(1967336836), UINT32_C(4229175532), UINT32_C(3383228965), UINT32_C( 979444700), UINT32_C( 138906106), UINT32_C( 146006389), UINT32_C(2494756173), UINT32_C(2105904126)), simde_x_mm256_set_epu32(UINT32_C(2893387611), UINT32_C(2460478173), UINT32_C(3299027518), UINT32_C( 896184310), UINT32_C(1699227452), UINT32_C(1922730015), UINT32_C(1220777705), UINT32_C( 483108197)), simde_x_mm256_set_epu32(UINT32_C(1967336836), UINT32_C(2460478173), UINT32_C(3299027518), UINT32_C( 896184310), UINT32_C( 138906106), UINT32_C( 146006389), UINT32_C(1220777705), UINT32_C( 483108197)) }, { simde_x_mm256_set_epu32(UINT32_C( 596030925), UINT32_C(1623933296), UINT32_C(4116080130), UINT32_C(2140384796), UINT32_C(1701047302), UINT32_C( 386212074), UINT32_C( 328829151), UINT32_C(3670208831)), simde_x_mm256_set_epu32(UINT32_C(3740465827), UINT32_C(1627250042), UINT32_C(2355376593), UINT32_C(1538421544), UINT32_C( 533181547), UINT32_C(1474624420), UINT32_C( 464742120), UINT32_C(1469792713)), simde_x_mm256_set_epu32(UINT32_C( 596030925), UINT32_C(1623933296), UINT32_C(2355376593), UINT32_C(1538421544), UINT32_C( 533181547), UINT32_C( 386212074), UINT32_C( 328829151), UINT32_C(1469792713)) }, { simde_x_mm256_set_epu32(UINT32_C(3969449230), UINT32_C(1656808571), UINT32_C(3512723294), UINT32_C(1894827634), UINT32_C( 143242580), UINT32_C(1411474427), UINT32_C(1474375050), UINT32_C(2576321811)), simde_x_mm256_set_epu32(UINT32_C(3817774721), UINT32_C(1995677222), UINT32_C(2801037071), UINT32_C(1160265207), UINT32_C( 536799050), UINT32_C(3040213718), UINT32_C(2960203135), UINT32_C( 387289056)), simde_x_mm256_set_epu32(UINT32_C(3817774721), UINT32_C(1656808571), UINT32_C(2801037071), UINT32_C(1160265207), UINT32_C( 143242580), UINT32_C(1411474427), UINT32_C(1474375050), UINT32_C( 387289056)) }, { simde_x_mm256_set_epu32(UINT32_C(4117731720), UINT32_C(2896203570), UINT32_C(2130869721), UINT32_C( 624464130), UINT32_C(3920434556), UINT32_C(3882005287), UINT32_C(2912704980), UINT32_C(3353740323)), simde_x_mm256_set_epu32(UINT32_C(1011878374), UINT32_C( 876977997), UINT32_C( 633144937), UINT32_C(2591224872), UINT32_C(1689064732), UINT32_C(1345088039), UINT32_C(3928457299), UINT32_C(1317789172)), simde_x_mm256_set_epu32(UINT32_C(1011878374), UINT32_C( 876977997), UINT32_C( 633144937), UINT32_C( 624464130), UINT32_C(1689064732), UINT32_C(1345088039), UINT32_C(2912704980), UINT32_C(1317789172)) }, { simde_x_mm256_set_epu32(UINT32_C(1278890315), UINT32_C(3068059236), UINT32_C( 937423722), UINT32_C( 545836753), UINT32_C(3944086739), UINT32_C(2155957693), UINT32_C( 750306742), UINT32_C( 995938818)), simde_x_mm256_set_epu32(UINT32_C(2300290567), UINT32_C(3884383026), UINT32_C( 682756216), UINT32_C( 434660596), UINT32_C(3895444851), UINT32_C(3229085704), UINT32_C(3028266335), UINT32_C(2490258842)), simde_x_mm256_set_epu32(UINT32_C(1278890315), UINT32_C(3068059236), UINT32_C( 682756216), UINT32_C( 434660596), UINT32_C(3895444851), UINT32_C(2155957693), UINT32_C( 750306742), UINT32_C( 995938818)) }, { simde_x_mm256_set_epu32(UINT32_C( 953347239), UINT32_C(3233201384), UINT32_C( 883460426), UINT32_C( 630153716), UINT32_C(2626834474), UINT32_C(4260188706), UINT32_C(4276291548), UINT32_C(1697478493)), simde_x_mm256_set_epu32(UINT32_C(3841797977), UINT32_C( 519016629), UINT32_C( 54139722), UINT32_C(4160085404), UINT32_C(2354740665), UINT32_C( 224670449), UINT32_C(2606748626), UINT32_C(2604287898)), simde_x_mm256_set_epu32(UINT32_C( 953347239), UINT32_C( 519016629), UINT32_C( 54139722), UINT32_C( 630153716), UINT32_C(2354740665), UINT32_C( 224670449), UINT32_C(2606748626), UINT32_C(1697478493)) }, { simde_x_mm256_set_epu32(UINT32_C(4276795094), UINT32_C(3517498069), UINT32_C(3286132221), UINT32_C(1640896057), UINT32_C(1497672480), UINT32_C( 40644986), UINT32_C(1824934232), UINT32_C(1194285849)), simde_x_mm256_set_epu32(UINT32_C(1850977199), UINT32_C(1916865152), UINT32_C(2772610612), UINT32_C(2574813520), UINT32_C(1514631464), UINT32_C(2960447777), UINT32_C(2276426609), UINT32_C(1419491712)), simde_x_mm256_set_epu32(UINT32_C(1850977199), UINT32_C(1916865152), UINT32_C(2772610612), UINT32_C(1640896057), UINT32_C(1497672480), UINT32_C( 40644986), UINT32_C(1824934232), UINT32_C(1194285849)) }, { simde_x_mm256_set_epu32(UINT32_C( 990577222), UINT32_C(1025245358), UINT32_C(2406551265), UINT32_C(4071927667), UINT32_C(3189139328), UINT32_C(1739830541), UINT32_C(1739044254), UINT32_C( 584922997)), simde_x_mm256_set_epu32(UINT32_C( 72076484), UINT32_C(2950382985), UINT32_C( 915753842), UINT32_C( 355749909), UINT32_C(3000181234), UINT32_C(3918623971), UINT32_C(3711879869), UINT32_C(3103913192)), simde_x_mm256_set_epu32(UINT32_C( 72076484), UINT32_C(1025245358), UINT32_C( 915753842), UINT32_C( 355749909), UINT32_C(3000181234), UINT32_C(1739830541), UINT32_C(1739044254), UINT32_C( 584922997)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_min_epu32(test_vec[i].a, test_vec[i].b); simde_assert_m256i_u32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_movemask_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; int32_t r; } test_vec[8] = { { simde_mm256_set_epi8(INT8_C( -96), INT8_C(-118), INT8_C( 98), INT8_C( 62), INT8_C( -51), INT8_C( 97), INT8_C( -33), INT8_C( 125), INT8_C(-127), INT8_C( -72), INT8_C( -2), INT8_C( 75), INT8_C( 7), INT8_C( 73), INT8_C( -28), INT8_C( 60), INT8_C( 8), INT8_C( -37), INT8_C(-119), INT8_C( 83), INT8_C( -63), INT8_C( 105), INT8_C(-120), INT8_C( -81), INT8_C( 66), INT8_C( -93), INT8_C( 75), INT8_C( -69), INT8_C( 47), INT8_C( 11), INT8_C( 51), INT8_C( 35)), -891131056 }, { simde_mm256_set_epi8(INT8_C( 15), INT8_C( 104), INT8_C( -65), INT8_C(-125), INT8_C( 29), INT8_C( 110), INT8_C( -50), INT8_C( 21), INT8_C( -48), INT8_C( 105), INT8_C( 56), INT8_C( 122), INT8_C( -60), INT8_C( 127), INT8_C( 65), INT8_C(-126), INT8_C( -5), INT8_C( -40), INT8_C( -84), INT8_C( -80), INT8_C( 27), INT8_C( 14), INT8_C( 89), INT8_C( 45), INT8_C(-125), INT8_C( -33), INT8_C( 119), INT8_C( -9), INT8_C( 20), INT8_C(-117), INT8_C( -34), INT8_C( -66)), 847900887 }, { simde_mm256_set_epi8(INT8_C( 106), INT8_C( 46), INT8_C( 114), INT8_C( -45), INT8_C( 75), INT8_C( 29), INT8_C( -66), INT8_C(-117), INT8_C( 47), INT8_C( 53), INT8_C( 50), INT8_C( 31), INT8_C(-111), INT8_C( 36), INT8_C( -73), INT8_C( 38), INT8_C( -23), INT8_C( 112), INT8_C( -88), INT8_C( 42), INT8_C( -89), INT8_C( 120), INT8_C( 50), INT8_C( 27), INT8_C( 6), INT8_C( 1), INT8_C( 127), INT8_C( 127), INT8_C( 38), INT8_C( 57), INT8_C( 13), INT8_C( -14)), 319465473 }, { simde_mm256_set_epi8(INT8_C( -21), INT8_C( 113), INT8_C( 127), INT8_C( -53), INT8_C( 111), INT8_C( 121), INT8_C( -27), INT8_C( 17), INT8_C(-104), INT8_C( 11), INT8_C( -41), INT8_C( -39), INT8_C( 51), INT8_C( 41), INT8_C( 91), INT8_C( -62), INT8_C(-116), INT8_C( 34), INT8_C( 15), INT8_C( -55), INT8_C( -90), INT8_C( -31), INT8_C( -66), INT8_C( -64), INT8_C( 115), INT8_C( -38), INT8_C( -54), INT8_C( 24), INT8_C( -59), INT8_C( -48), INT8_C( 15), INT8_C( 60)), -1833853076 }, { simde_mm256_set_epi8(INT8_C( -84), INT8_C( 66), INT8_C( 126), INT8_C( -52), INT8_C( 88), INT8_C( 79), INT8_C( 71), INT8_C( -11), INT8_C( -43), INT8_C( -40), INT8_C(-120), INT8_C( 75), INT8_C( 12), INT8_C( -40), INT8_C( 86), INT8_C( 111), INT8_C( 107), INT8_C( -40), INT8_C( -47), INT8_C( 90), INT8_C( 21), INT8_C( 126), INT8_C( -72), INT8_C( -52), INT8_C( -36), INT8_C( -82), INT8_C( -69), INT8_C( 97), INT8_C(-122), INT8_C( -39), INT8_C( 59), INT8_C( 25)), -1847303188 }, { simde_mm256_set_epi8(INT8_C( 67), INT8_C( 64), INT8_C( 17), INT8_C( -4), INT8_C( -84), INT8_C( 57), INT8_C( 94), INT8_C( 94), INT8_C(-112), INT8_C( 59), INT8_C( -47), INT8_C( -43), INT8_C( -74), INT8_C( 39), INT8_C( 45), INT8_C( -64), INT8_C( -47), INT8_C( 114), INT8_C( -10), INT8_C( 33), INT8_C( 47), INT8_C( -82), INT8_C( -45), INT8_C( 28), INT8_C( 16), INT8_C( 34), INT8_C( 94), INT8_C( 53), INT8_C( 64), INT8_C(-113), INT8_C( -53), INT8_C( 74)), 414819846 }, { simde_mm256_set_epi8(INT8_C( 27), INT8_C( -3), INT8_C( 33), INT8_C( -42), INT8_C( 113), INT8_C( -79), INT8_C( 119), INT8_C( 38), INT8_C( 96), INT8_C( 109), INT8_C( 125), INT8_C( 82), INT8_C( 8), INT8_C( -29), INT8_C( 10), INT8_C( -22), INT8_C( -49), INT8_C( 123), INT8_C( 109), INT8_C( -49), INT8_C( 6), INT8_C( -16), INT8_C( -14), INT8_C( 102), INT8_C( -5), INT8_C( 88), INT8_C( 66), INT8_C( -63), INT8_C( 82), INT8_C( 34), INT8_C( 44), INT8_C( 56)), 1409652368 }, { simde_mm256_set_epi8(INT8_C( -69), INT8_C( -65), INT8_C( 16), INT8_C( 111), INT8_C( 123), INT8_C( 89), INT8_C( 77), INT8_C( 3), INT8_C( 37), INT8_C( -13), INT8_C( 28), INT8_C( 56), INT8_C( -40), INT8_C( -18), INT8_C( -12), INT8_C( 32), INT8_C( -91), INT8_C( -40), INT8_C( 109), INT8_C( 79), INT8_C( 14), INT8_C( 52), INT8_C( 95), INT8_C( 73), INT8_C( 62), INT8_C( -36), INT8_C( -31), INT8_C( 24), INT8_C( 60), INT8_C( -72), INT8_C( 1), INT8_C( -18)), -1068580763 }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int32_t r = simde_mm256_movemask_epi8(test_vec[i].a); simde_assert_equal_i32(r, test_vec[i].r); } return 0; } static int test_simde_mm256_mpsadbw_epu8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const uint8_t a[32]; const uint8_t b[32]; const uint16_t r[16]; } test_vec[] = { { { UINT8_C(116), UINT8_C(168), UINT8_C(162), UINT8_C(173), UINT8_C(122), UINT8_C(179), UINT8_C(242), UINT8_C( 70), UINT8_C(251), UINT8_C(139), UINT8_C(136), UINT8_C(248), UINT8_C(103), UINT8_C(122), UINT8_C( 83), UINT8_C(207), UINT8_C(164), UINT8_C( 64), UINT8_C(139), UINT8_C(142), UINT8_MAX, UINT8_C( 80), UINT8_C(177), UINT8_C(166), UINT8_C( 81), UINT8_C(193), UINT8_C(178), UINT8_C(140), UINT8_C( 66), UINT8_C(185), UINT8_C( 86), UINT8_C(182) }, { UINT8_C( 98), UINT8_C(248), UINT8_C( 99), UINT8_C(220), UINT8_C(171), UINT8_C( 85), UINT8_C( 34), UINT8_C(166), UINT8_C(225), UINT8_C(170), UINT8_C(158), UINT8_C( 72), UINT8_C( 36), UINT8_C(242), UINT8_C( 23), UINT8_C(201), UINT8_C( 50), UINT8_C(162), UINT8_C( 87), UINT8_C( 49), UINT8_C(242), UINT8_C( 8), UINT8_C(215), UINT8_C( 68), UINT8_C(201), UINT8_C(137), UINT8_C(208), UINT8_C( 12), UINT8_C( 67), UINT8_C( 38), UINT8_C(194), UINT8_C(165) }, { UINT16_C( 499), UINT16_C( 240), UINT16_C( 668), UINT16_C( 224), UINT16_C( 478), UINT16_C( 532), UINT16_C( 265), UINT16_C( 568), UINT16_C( 357), UINT16_C( 298), UINT16_C( 308), UINT16_C( 320), UINT16_C( 494), UINT16_C( 156), UINT16_C( 281), UINT16_C( 432) } }, { { UINT8_C( 30), UINT8_C( 38), UINT8_C(129), UINT8_C(202), UINT8_C(123), UINT8_C(163), UINT8_C(112), UINT8_C( 92), UINT8_C( 77), UINT8_C( 15), UINT8_C(164), UINT8_C(114), UINT8_C( 1), UINT8_C(187), UINT8_C( 59), UINT8_C( 51), UINT8_C( 94), UINT8_C(146), UINT8_C(100), UINT8_C( 80), UINT8_C(155), UINT8_C( 60), UINT8_C(148), UINT8_C(100), UINT8_C(197), UINT8_C(100), UINT8_C(112), UINT8_C( 8), UINT8_C(138), UINT8_C( 51), UINT8_C(173), UINT8_C(169) }, { UINT8_C( 89), UINT8_C( 46), UINT8_C(115), UINT8_C(212), UINT8_C(209), UINT8_C(227), UINT8_C( 49), UINT8_C( 31), UINT8_C(242), UINT8_C(213), UINT8_C(145), UINT8_C(243), UINT8_C(145), UINT8_C(204), UINT8_C( 38), UINT8_C(239), UINT8_C( 94), UINT8_C(139), UINT8_C( 63), UINT8_C(249), UINT8_C(199), UINT8_C(212), UINT8_C( 94), UINT8_C(140), UINT8_C( 56), UINT8_C(206), UINT8_C(149), UINT8_C(195), UINT8_C( 1), UINT8_C( 66), UINT8_C(108), UINT8_C( 90) }, { UINT16_C( 284), UINT16_C( 326), UINT16_C( 408), UINT16_C( 278), UINT16_C( 508), UINT16_C( 484), UINT16_C( 198), UINT16_C( 563), UINT16_C( 213), UINT16_C( 202), UINT16_C( 346), UINT16_C( 134), UINT16_C( 374), UINT16_C( 132), UINT16_C( 376), UINT16_C( 238) } }, { { UINT8_C(113), UINT8_C(223), UINT8_C( 47), UINT8_C( 66), UINT8_C(194), UINT8_C( 96), UINT8_C( 97), UINT8_C(181), UINT8_C( 53), UINT8_C(242), UINT8_C(168), UINT8_C(198), UINT8_C(190), UINT8_C(207), UINT8_C(181), UINT8_C( 29), UINT8_C( 90), UINT8_C(245), UINT8_C( 22), UINT8_C( 33), UINT8_C(201), UINT8_C(116), UINT8_C(173), UINT8_C( 1), UINT8_C( 67), UINT8_C( 66), UINT8_C(196), UINT8_C( 68), UINT8_C(133), UINT8_C( 48), UINT8_C(159), UINT8_C(246) }, { UINT8_C( 15), UINT8_C(206), UINT8_C( 56), UINT8_C(210), UINT8_C( 46), UINT8_C(154), UINT8_C(135), UINT8_C( 99), UINT8_C(140), UINT8_C( 47), UINT8_C( 42), UINT8_C( 75), UINT8_C(254), UINT8_C(223), UINT8_C(104), UINT8_C( 88), UINT8_C(212), UINT8_C(126), UINT8_C(121), UINT8_C(157), UINT8_C(243), UINT8_C( 39), UINT8_C(159), UINT8_C( 54), UINT8_C(105), UINT8_C( 99), UINT8_C(122), UINT8_C(238), UINT8_C(148), UINT8_C( 25), UINT8_C(228), UINT8_C(163) }, { UINT16_C( 287), UINT16_C( 396), UINT16_C( 404), UINT16_C( 461), UINT16_C( 394), UINT16_C( 263), UINT16_C( 316), UINT16_C( 285), UINT16_C( 464), UINT16_C( 269), UINT16_C( 404), UINT16_C( 275), UINT16_C( 229), UINT16_C( 353), UINT16_C( 309), UINT16_C( 364) } }, { { UINT8_C(231), UINT8_C( 29), UINT8_C(117), UINT8_C( 21), UINT8_C(183), UINT8_C(252), UINT8_C(121), UINT8_C( 67), UINT8_C( 44), UINT8_C(163), UINT8_C(142), UINT8_C( 42), UINT8_C(130), UINT8_C(246), UINT8_C(131), UINT8_C( 87), UINT8_C(117), UINT8_C(252), UINT8_C(244), UINT8_C(104), UINT8_C( 35), UINT8_C(147), UINT8_C(158), UINT8_C(141), UINT8_C(247), UINT8_C( 24), UINT8_C(123), UINT8_C(139), UINT8_C( 50), UINT8_C( 96), UINT8_C( 46), UINT8_C( 25) }, { UINT8_C(125), UINT8_C(164), UINT8_C( 47), UINT8_C( 52), UINT8_C(160), UINT8_C(168), UINT8_C(119), UINT8_C(204), UINT8_C( 75), UINT8_C( 6), UINT8_C(247), UINT8_C(205), UINT8_C(252), UINT8_C(122), UINT8_C( 36), UINT8_C(113), UINT8_C(118), UINT8_C( 25), UINT8_C(217), UINT8_C(154), UINT8_C(172), UINT8_C(119), UINT8_C( 39), UINT8_C(163), UINT8_C(144), UINT8_C(162), UINT8_C( 46), UINT8_C(194), UINT8_C( 2), UINT8_C( 93), UINT8_C(219), UINT8_C(127) }, { UINT16_C( 330), UINT16_C( 101), UINT16_C( 244), UINT16_C( 419), UINT16_C( 426), UINT16_C( 132), UINT16_C( 417), UINT16_C( 446), UINT16_C( 305), UINT16_C( 585), UINT16_C( 394), UINT16_C( 98), UINT16_C( 277), UINT16_C( 331), UINT16_C( 316), UINT16_C( 469) } }, { { UINT8_C( 1), UINT8_C( 10), UINT8_C(179), UINT8_C(161), UINT8_C(178), UINT8_C( 43), UINT8_C(110), UINT8_C(253), UINT8_C( 49), UINT8_C(101), UINT8_C(203), UINT8_C( 45), UINT8_C(223), UINT8_C(239), UINT8_C(159), UINT8_C( 85), UINT8_C( 8), UINT8_C(120), UINT8_C(239), UINT8_C(181), UINT8_C(240), UINT8_C( 22), UINT8_C( 88), UINT8_C(128), UINT8_C(185), UINT8_C(135), UINT8_C( 66), UINT8_C(187), UINT8_C(228), UINT8_C( 29), UINT8_C( 59), UINT8_C(229) }, { UINT8_C( 40), UINT8_C(238), UINT8_C(134), UINT8_C(218), UINT8_C( 25), UINT8_C(244), UINT8_C(216), UINT8_C( 74), UINT8_C( 89), UINT8_C(163), UINT8_C(120), UINT8_C( 56), UINT8_C(146), UINT8_C( 23), UINT8_C(142), UINT8_C(155), UINT8_C(143), UINT8_C(125), UINT8_C( 80), UINT8_C(127), UINT8_C(148), UINT8_C(168), UINT8_MAX, UINT8_C( 77), UINT8_C( 47), UINT8_C( 65), UINT8_C( 8), UINT8_C( 19), UINT8_C( 95), UINT8_C( 67), UINT8_C(248), UINT8_C(135) }, { UINT16_C( 182), UINT16_C( 407), UINT16_C( 413), UINT16_C( 222), UINT16_C( 346), UINT16_C( 390), UINT16_C( 244), UINT16_C( 402), UINT16_C( 353), UINT16_C( 351), UINT16_C( 417), UINT16_C( 250), UINT16_C( 209), UINT16_C( 264), UINT16_C( 171), UINT16_C( 191) } }, { { UINT8_C( 50), UINT8_C(127), UINT8_C( 97), UINT8_C( 75), UINT8_C(115), UINT8_C( 57), UINT8_C(150), UINT8_C(205), UINT8_C(220), UINT8_C( 14), UINT8_C( 5), UINT8_C(111), UINT8_C( 37), UINT8_C(147), UINT8_C( 10), UINT8_C(180), UINT8_C( 17), UINT8_C( 90), UINT8_C( 52), UINT8_C(165), UINT8_C( 2), UINT8_C( 51), UINT8_C(242), UINT8_C( 50), UINT8_C(117), UINT8_C(250), UINT8_C( 69), UINT8_C(212), UINT8_C( 62), UINT8_C( 62), UINT8_C( 91), UINT8_C(112) }, { UINT8_C(189), UINT8_C(188), UINT8_C(187), UINT8_C( 48), UINT8_C(246), UINT8_C( 81), UINT8_C(253), UINT8_C(210), UINT8_C( 95), UINT8_C( 3), UINT8_C( 65), UINT8_C(132), UINT8_C(150), UINT8_C( 75), UINT8_C( 57), UINT8_C(167), UINT8_C(165), UINT8_C(109), UINT8_C( 76), UINT8_C(168), UINT8_C(160), UINT8_C( 62), UINT8_C(218), UINT8_C( 21), UINT8_C( 57), UINT8_C( 31), UINT8_C(233), UINT8_C(119), UINT8_C( 93), UINT8_C( 68), UINT8_C(231), UINT8_C( 26) }, { UINT16_C( 184), UINT16_C( 369), UINT16_C( 446), UINT16_C( 405), UINT16_C( 239), UINT16_C( 390), UINT16_C( 221), UINT16_C( 324), UINT16_C( 194), UINT16_C( 387), UINT16_C( 360), UINT16_C( 206), UINT16_C( 505), UINT16_C( 324), UINT16_C( 259), UINT16_C( 396) } }, { { UINT8_C( 1), UINT8_C(162), UINT8_C( 75), UINT8_C(247), UINT8_C(244), UINT8_C( 72), UINT8_C(201), UINT8_C( 83), UINT8_C( 75), UINT8_C( 11), UINT8_C(216), UINT8_C(226), UINT8_C( 86), UINT8_C( 17), UINT8_C(137), UINT8_C(252), UINT8_C(126), UINT8_C(214), UINT8_C(164), UINT8_C( 30), UINT8_C( 20), UINT8_C(126), UINT8_C( 52), UINT8_C( 77), UINT8_C(157), UINT8_C( 29), UINT8_C(196), UINT8_C(251), UINT8_C( 98), UINT8_C(171), UINT8_C( 21), UINT8_C( 99) }, { UINT8_C( 78), UINT8_C( 96), UINT8_C( 90), UINT8_C( 66), UINT8_C(169), UINT8_C( 35), UINT8_C(149), UINT8_C(244), UINT8_C( 46), UINT8_C(109), UINT8_C(214), UINT8_C(133), UINT8_C(126), UINT8_C( 96), UINT8_C(129), UINT8_C(252), UINT8_C( 54), UINT8_C( 37), UINT8_C( 27), UINT8_C( 74), UINT8_C(163), UINT8_C( 79), UINT8_C(152), UINT8_C( 64), UINT8_C(108), UINT8_C( 92), UINT8_C( 59), UINT8_C(206), UINT8_C( 8), UINT8_C( 81), UINT8_C( 49), UINT8_C( 86) }, { UINT16_C( 383), UINT16_C( 382), UINT16_C( 383), UINT16_C( 218), UINT16_C( 249), UINT16_C( 498), UINT16_C( 498), UINT16_C( 337), UINT16_C( 430), UINT16_C( 344), UINT16_C( 176), UINT16_C( 162), UINT16_C( 151), UINT16_C( 220), UINT16_C( 217), UINT16_C( 267) } }, { { UINT8_C(177), UINT8_C(139), UINT8_C(152), UINT8_C( 90), UINT8_C(175), UINT8_C( 45), UINT8_C( 79), UINT8_C(221), UINT8_C(155), UINT8_C( 37), UINT8_C( 98), UINT8_C( 25), UINT8_C(133), UINT8_C(227), UINT8_C( 22), UINT8_C(187), UINT8_C( 8), UINT8_C( 49), UINT8_C( 6), UINT8_C(171), UINT8_C(128), UINT8_C(158), UINT8_C(236), UINT8_C(236), UINT8_C(250), UINT8_C( 39), UINT8_C(187), UINT8_C( 2), UINT8_C(120), UINT8_C(236), UINT8_C( 88), UINT8_C( 42) }, { UINT8_C(120), UINT8_C(240), UINT8_C(132), UINT8_C( 39), UINT8_C( 30), UINT8_C(211), UINT8_C( 4), UINT8_C(185), UINT8_C(249), UINT8_C(103), UINT8_C(210), UINT8_C(126), UINT8_C( 74), UINT8_C(232), UINT8_C( 58), UINT8_C( 83), UINT8_C( 25), UINT8_C( 64), UINT8_C(254), UINT8_C(153), UINT8_C(222), UINT8_C(234), UINT8_C(134), UINT8_C(216), UINT8_C( 18), UINT8_C( 65), UINT8_C(219), UINT8_C(138), UINT8_C( 45), UINT8_C( 51), UINT8_C(180), UINT8_C(165) }, { UINT16_C( 447), UINT16_C( 417), UINT16_C( 159), UINT16_C( 260), UINT16_C( 374), UINT16_C( 254), UINT16_C( 450), UINT16_C( 378), UINT16_C( 298), UINT16_C( 190), UINT16_C( 257), UINT16_C( 389), UINT16_C( 298), UINT16_C( 420), UINT16_C( 501), UINT16_C( 646) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi8(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi8(test_vec[i].b); simde__m256i r = simde_mm256_mpsadbw_epu8(a, b, 7); simde_test_x86_assert_equal_u16x16(r, simde_x_mm256_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm256_mul_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[8]; const int32_t b[8]; const int64_t r[4]; } test_vec[] = { { { -INT32_C( 1249839109), INT32_C( 383478711), -INT32_C( 552642143), INT32_C( 566134082), -INT32_C( 1345465419), -INT32_C( 1119937342), INT32_C( 1351952468), INT32_C( 75355400) }, { -INT32_C( 2068185395), INT32_C( 177902953), INT32_C( 820620014), -INT32_C( 464410577), INT32_C( 1033117563), -INT32_C( 1879387333), INT32_C( 81825788), INT32_C( 839408997) }, { INT64_C( 2584898991333613055), -INT64_C( 453509203125650002), -INT64_C( 1390023954778053897), INT64_C( 110624576032644784) } }, { { -INT32_C( 994655653), INT32_C( 1171214679), INT32_C( 712358139), -INT32_C( 602945952), INT32_C( 555328485), INT32_C( 1924142198), INT32_C( 142053539), INT32_C( 1228570606) }, { -INT32_C( 1743851199), INT32_C( 1037950274), -INT32_C( 161000811), -INT32_C( 19761639), -INT32_C( 1876956391), -INT32_C( 1560096513), INT32_C( 1319860576), INT32_C( 983099129) }, { INT64_C( 1734531453076177947), -INT64_C( 114690238101450729), -INT64_C( 1042327349025097635), INT64_C( 187490865807378464) } }, { { INT32_C( 433301207), INT32_C( 408334467), INT32_C( 470727939), INT32_C( 1293606964), -INT32_C( 874693940), INT32_C( 1785651210), INT32_C( 1387862617), -INT32_C( 678604544) }, { INT32_C( 2062573815), INT32_C( 328353296), INT32_C( 925868291), INT32_C( 1300580993), -INT32_C( 1910939004), -INT32_C( 1661434046), -INT32_C( 1578192479), -INT32_C( 109544702) }, { INT64_C( 893715723566094705), INT64_C( 435832072407882249), INT64_C( 1671486766508435760), -INT64_C( 2190314344034657543) } }, { { -INT32_C( 327980837), -INT32_C( 1291909458), INT32_C( 686370727), -INT32_C( 42570119), INT32_C( 327913425), -INT32_C( 1196457194), INT32_C( 928620085), -INT32_C( 181284327) }, { -INT32_C( 371088326), INT32_C( 1369170090), -INT32_C( 2005236721), -INT32_C( 997854989), -INT32_C( 1781067649), -INT32_C( 917665900), INT32_C( 1040230181), -INT32_C( 1288490631) }, { INT64_C( 121709859762408862), -INT64_C( 1376335785999866167), -INT64_C( 584035992940287825), INT64_C( 965978639099785385) } }, { { -INT32_C( 2137254698), INT32_C( 80885749), -INT32_C( 1349694276), -INT32_C( 1150086596), -INT32_C( 1219474909), -INT32_C( 159342895), -INT32_C( 1103790011), -INT32_C( 2005833550) }, { INT32_C( 1913130621), INT32_C( 24566341), INT32_C( 1655702310), INT32_C( 958210838), INT32_C( 1072721518), INT32_C( 1362522124), -INT32_C( 1576047632), INT32_C( 1344962771) }, { -INT64_C( 4088847407619907458), -INT64_C( 2234691930566977560), -INT64_C( 1308156975545391862), INT64_C( 1739625633061803952) } }, { { -INT32_C( 725470322), INT32_C( 886389005), INT32_C( 1385596476), INT32_C( 395031721), INT32_C( 777485090), -INT32_C( 595620373), -INT32_C( 880898312), -INT32_C( 1659066098) }, { -INT32_C( 378413348), INT32_C( 1411204631), INT32_C( 1990636748), -INT32_C( 1970458008), -INT32_C( 1715936083), INT32_C( 1769289585), -INT32_C( 751438907), INT32_C( 2020626844) }, { INT64_C( 274527653422658056), INT64_C( 2758219263024900048), -INT64_C( 1334114719925502470), INT64_C( 661941264747424984) } }, { { INT32_C( 1197662511), -INT32_C( 191135960), -INT32_C( 1687535309), INT32_C( 556136563), INT32_C( 1320869340), -INT32_C( 642306284), -INT32_C( 1079120862), INT32_C( 1832328509) }, { INT32_C( 666147327), INT32_C( 1276858137), INT32_C( 82347664), INT32_C( 1512377726), -INT32_C( 5709845), INT32_C( 819552270), -INT32_C( 1964014004), -INT32_C( 1560860765) }, { INT64_C( 797819680350758097), -INT64_C( 138964590613668176), -INT64_C( 7541959196652300), INT64_C( 2119408484976551448) } }, { { -INT32_C( 641094720), -INT32_C( 1977162246), -INT32_C( 376566165), INT32_C( 105100059), -INT32_C( 1610158958), -INT32_C( 1731076276), INT32_C( 136495205), -INT32_C( 1465181721) }, { -INT32_C( 1098812220), -INT32_C( 1001805991), -INT32_C( 777136203), INT32_C( 500691339), INT32_C( 700374493), INT32_C( 566333372), INT32_C( 925557839), -INT32_C( 1025518083) }, { INT64_C( 704442712513478400), INT64_C( 292643199646371495), -INT64_C( 1127714263858658294), INT64_C( 126334206973661995) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi32(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi32(test_vec[i].b); simde__m256i r = simde_mm256_mul_epi32(a, b); simde_test_x86_assert_equal_i64x4(r, simde_x_mm256_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm256_mul_epu32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const uint32_t a[8]; const uint32_t b[8]; const uint64_t r[4]; } test_vec[] = { { { UINT32_C(1983947362), UINT32_C(3053479846), UINT32_C(1792953067), UINT32_C(3215038466), UINT32_C( 607004743), UINT32_C(2578666130), UINT32_C(1073743766), UINT32_C(1510921976) }, { UINT32_C( 533745272), UINT32_C(2715144374), UINT32_C( 487371546), UINT32_C(2480713036), UINT32_C(1756826326), UINT32_C(4060244572), UINT32_C(1764885105), UINT32_C(1371816409) }, { UINT64_C( 1058922524364372464), UINT64_C( 873834308169231582), UINT64_C( 1066401912509264218), UINT64_C( 1895034379200005430) } }, { { UINT32_C(1165005967), UINT32_C(2145863013), UINT32_C(1167913977), UINT32_C(1993898144), UINT32_C(3739193218), UINT32_C(1792074233), UINT32_C(3167945699), UINT32_C(3540948804) }, { UINT32_C(2434367020), UINT32_C(3171942596), UINT32_C(2466426355), UINT32_C(2819283493), UINT32_C(1653008745), UINT32_C(2915850186), UINT32_C(2657787738), UINT32_C(1668380727) }, { UINT64_C( 2836052104168008340), UINT64_C( 2880573813245663835), UINT64_C( 6180919088598691410), UINT64_C( 8419727233452038862) } }, { { UINT32_C(3136588534), UINT32_C(2104951946), UINT32_C(3608246705), UINT32_C(3162446675), UINT32_C(3458073860), UINT32_C(3061574236), UINT32_C(3243566474), UINT32_C(1411696221) }, { UINT32_C(3675134032), UINT32_C(3461907996), UINT32_C(1403349503), UINT32_C(2282693764), UINT32_C(2253794857), UINT32_C(2721894936), UINT32_C( 358846903), UINT32_C(2808710999) }, { UINT64_C(11527383265684389088), UINT64_C( 5063631220163137615), UINT64_C( 7793789080794138020), UINT64_C( 1163943783869530022) } }, { { UINT32_C(3162666911), UINT32_C(4253735933), UINT32_C(3377475396), UINT32_C(2085707603), UINT32_C(2785192077), UINT32_C( 826818426), UINT32_C( 658943184), UINT32_C(3553537843) }, { UINT32_C( 613372199), UINT32_C(1881217324), UINT32_C(2604233032), UINT32_C(1578601425), UINT32_C(2902727219), UINT32_C( 702434649), UINT32_C( 743450105), UINT32_C(4227800788) }, { UINT64_C( 1939891957904607289), UINT64_C( 8795732991030480672), UINT64_C( 8084652852051043863), UINT64_C( 489891379333834320) } }, { { UINT32_C(2602602095), UINT32_C(4010557863), UINT32_C(2223654323), UINT32_C( 65184464), UINT32_C( 363915196), UINT32_C( 759074612), UINT32_C(2287570612), UINT32_C( 478435756) }, { UINT32_C(2411177191), UINT32_C(2558444517), UINT32_C(3642493193), UINT32_C(1742602155), UINT32_C( 444370406), UINT32_C(3494361628), UINT32_C(4099514440), UINT32_C(3775979001) }, { UINT64_C( 6275334808712815145), UINT64_C( 8099645735112523339), UINT64_C( 161713143396089576), UINT64_C( 9377928756413637280) } }, { { UINT32_C(1718667393), UINT32_C(2499800715), UINT32_C(2725125111), UINT32_C( 609050), UINT32_C(4112156120), UINT32_C(2277859647), UINT32_C(4219215362), UINT32_C(2094828795) }, { UINT32_C(3772992596), UINT32_C( 846520891), UINT32_C( 416670461), UINT32_C( 102293037), UINT32_C(2751148900), UINT32_C(2519449748), UINT32_C(3667044319), UINT32_C(2287431219) }, { UINT64_C( 6484519348775622228), UINT64_C( 1135479136283046171), UINT64_C(11313153786166268000), UINT64_C(15472049723859628478) } }, { { UINT32_C(4134025915), UINT32_C( 422108188), UINT32_C(3962699199), UINT32_C(1089620700), UINT32_C( 300150141), UINT32_C(2376601262), UINT32_C(3915856565), UINT32_C(1668398760) }, { UINT32_C( 341432824), UINT32_C(1949205173), UINT32_C(1533108095), UINT32_C( 647713705), UINT32_C(4013457217), UINT32_C(1132257421), UINT32_C(3257721626), UINT32_C(2586221986) }, { UINT64_C( 1411492142647633960), UINT64_C( 6075246220036915905), UINT64_C( 1204639749580017597), UINT64_C(12756770616114574690) } }, { { UINT32_C( 732921718), UINT32_C(2174803201), UINT32_C(3873177916), UINT32_C(2500622420), UINT32_C(2223260919), UINT32_C(1053229092), UINT32_C(2248274916), UINT32_C( 102770576) }, { UINT32_C(2821902246), UINT32_C(3911832236), UINT32_C( 667878867), UINT32_C(1958599549), UINT32_C(1157185824), UINT32_C( 646168642), UINT32_C(1152156852), UINT32_C(1363922091) }, { UINT64_C( 2068233442166378628), UINT64_C( 2586813678227501172), UINT64_C( 2572726018520012256), UINT64_C( 2590365349649124432) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi32(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi32(test_vec[i].b); simde__m256i r = simde_mm256_mul_epu32(a, b); simde_test_x86_assert_equal_u64x4(r, simde_x_mm256_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm256_mulhi_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[16]; const int16_t b[16]; const int16_t r[16]; } test_vec[] = { { { -INT16_C( 3387), INT16_C( 9328), -INT16_C( 608), -INT16_C( 8041), -INT16_C( 24120), -INT16_C( 28930), INT16_C( 24168), -INT16_C( 16718), -INT16_C( 28202), -INT16_C( 22224), -INT16_C( 31685), -INT16_C( 30796), -INT16_C( 32360), -INT16_C( 29908), INT16_C( 7198), -INT16_C( 7385) }, { -INT16_C( 26610), -INT16_C( 20985), -INT16_C( 24939), INT16_C( 23950), -INT16_C( 29633), -INT16_C( 22548), -INT16_C( 24853), -INT16_C( 16027), -INT16_C( 27089), INT16_C( 27498), INT16_C( 7962), -INT16_C( 19726), INT16_C( 7840), -INT16_C( 16835), INT16_C( 25914), INT16_C( 18593) }, { INT16_C( 1375), -INT16_C( 2987), INT16_C( 231), -INT16_C( 2939), INT16_C( 10906), INT16_C( 9953), -INT16_C( 9166), INT16_C( 4088), INT16_C( 11657), -INT16_C( 9325), -INT16_C( 3850), INT16_C( 9269), -INT16_C( 3872), INT16_C( 7682), INT16_C( 2846), -INT16_C( 2096) } }, { { -INT16_C( 22275), -INT16_C( 27913), -INT16_C( 31418), -INT16_C( 31248), -INT16_C( 9198), -INT16_C( 724), -INT16_C( 28038), -INT16_C( 21826), INT16_C( 10536), INT16_C( 16917), INT16_C( 1864), -INT16_C( 5900), INT16_C( 12582), INT16_C( 24743), INT16_C( 18582), -INT16_C( 27735) }, { -INT16_C( 24335), INT16_C( 14118), INT16_C( 5669), INT16_C( 14269), -INT16_C( 5646), INT16_C( 27700), -INT16_C( 3205), -INT16_C( 23786), INT16_C( 11036), INT16_C( 25829), -INT16_C( 9933), INT16_C( 22860), -INT16_C( 3317), -INT16_C( 24135), INT16_C( 25148), INT16_C( 11573) }, { INT16_C( 8271), -INT16_C( 6014), -INT16_C( 2718), -INT16_C( 6804), INT16_C( 792), -INT16_C( 307), INT16_C( 1371), INT16_C( 7921), INT16_C( 1774), INT16_C( 6667), -INT16_C( 283), -INT16_C( 2059), -INT16_C( 637), -INT16_C( 9113), INT16_C( 7130), -INT16_C( 4898) } }, { { INT16_C( 23298), INT16_C( 10340), INT16_C( 8561), INT16_C( 25439), -INT16_C( 27637), -INT16_C( 31025), -INT16_C( 6521), -INT16_C( 23766), INT16_C( 3857), INT16_C( 17415), INT16_C( 21481), -INT16_C( 2915), INT16_C( 22343), -INT16_C( 31851), -INT16_C( 13639), -INT16_C( 17232) }, { INT16_C( 5157), -INT16_C( 26908), INT16_C( 17206), INT16_C( 16889), -INT16_C( 13865), INT16_C( 24263), -INT16_C( 3665), -INT16_C( 16383), INT16_C( 2049), -INT16_C( 5627), -INT16_C( 23972), -INT16_C( 23586), INT16_C( 29689), -INT16_C( 19674), -INT16_C( 10690), INT16_C( 25455) }, { INT16_C( 1833), -INT16_C( 4246), INT16_C( 2247), INT16_C( 6555), INT16_C( 5846), -INT16_C( 11487), INT16_C( 364), INT16_C( 5941), INT16_C( 120), -INT16_C( 1496), -INT16_C( 7858), INT16_C( 1049), INT16_C( 10121), INT16_C( 9561), INT16_C( 2224), -INT16_C( 6694) } }, { { INT16_C( 21482), INT16_C( 8442), -INT16_C( 3178), INT16_C( 28257), INT16_C( 10684), INT16_C( 27596), -INT16_C( 12774), INT16_C( 6956), INT16_C( 12758), INT16_C( 12805), -INT16_C( 7213), -INT16_C( 12843), -INT16_C( 1193), -INT16_C( 27264), -INT16_C( 4143), -INT16_C( 17160) }, { -INT16_C( 3518), -INT16_C( 10020), INT16_C( 16102), -INT16_C( 23994), INT16_C( 4967), -INT16_C( 32498), INT16_C( 15073), -INT16_C( 18531), -INT16_C( 23957), INT16_C( 16106), -INT16_C( 16506), -INT16_C( 8949), -INT16_C( 29765), -INT16_C( 29582), INT16_C( 27258), -INT16_C( 17336) }, { -INT16_C( 1154), -INT16_C( 1291), -INT16_C( 781), -INT16_C( 10346), INT16_C( 809), -INT16_C( 13685), -INT16_C( 2938), -INT16_C( 1967), -INT16_C( 4664), INT16_C( 3146), INT16_C( 1816), INT16_C( 1753), INT16_C( 541), INT16_C( 12306), -INT16_C( 1724), INT16_C( 4539) } }, { { INT16_C( 9565), INT16_C( 17301), -INT16_C( 9373), -INT16_C( 13595), -INT16_C( 3090), -INT16_C( 12469), -INT16_C( 6099), -INT16_C( 26489), INT16_C( 29067), INT16_C( 4567), -INT16_C( 7632), -INT16_C( 5138), INT16_C( 24686), -INT16_C( 6024), -INT16_C( 16182), INT16_C( 10149) }, { INT16_C( 15077), INT16_C( 18538), INT16_C( 20501), INT16_C( 1042), INT16_C( 24131), INT16_C( 29139), INT16_C( 23110), -INT16_C( 12023), -INT16_C( 7989), -INT16_C( 798), -INT16_C( 12093), INT16_C( 12775), INT16_C( 24368), -INT16_C( 1255), -INT16_C( 16864), INT16_C( 1314) }, { INT16_C( 2200), INT16_C( 4893), -INT16_C( 2933), -INT16_C( 217), -INT16_C( 1138), -INT16_C( 5545), -INT16_C( 2151), INT16_C( 4859), -INT16_C( 3544), -INT16_C( 56), INT16_C( 1408), -INT16_C( 1002), INT16_C( 9178), INT16_C( 115), INT16_C( 4164), INT16_C( 203) } }, { { -INT16_C( 29192), INT16_C( 3662), INT16_C( 24797), INT16_C( 8210), -INT16_C( 6722), INT16_C( 1425), -INT16_C( 25792), INT16_C( 3030), -INT16_C( 18053), INT16_C( 15879), -INT16_C( 4215), -INT16_C( 17809), -INT16_C( 30386), INT16_C( 28341), -INT16_C( 10425), INT16_C( 16500) }, { -INT16_C( 15772), INT16_C( 16718), INT16_C( 24610), -INT16_C( 7838), -INT16_C( 3259), -INT16_C( 31258), -INT16_C( 17266), INT16_C( 2705), -INT16_C( 26507), -INT16_C( 184), -INT16_C( 18297), -INT16_C( 10567), INT16_C( 28225), -INT16_C( 30652), -INT16_C( 18363), -INT16_C( 21816) }, { INT16_C( 7025), INT16_C( 934), INT16_C( 9311), -INT16_C( 982), INT16_C( 334), -INT16_C( 680), INT16_C( 6795), INT16_C( 125), INT16_C( 7301), -INT16_C( 45), INT16_C( 1176), INT16_C( 2871), -INT16_C( 13087), -INT16_C( 13256), INT16_C( 2921), -INT16_C( 5493) } }, { { INT16_C( 5754), -INT16_C( 25109), INT16_C( 19830), -INT16_C( 17282), INT16_C( 25665), -INT16_C( 12479), -INT16_C( 11744), -INT16_C( 26919), INT16_C( 8811), -INT16_C( 3435), INT16_C( 20186), INT16_C( 7112), INT16_C( 3516), INT16_C( 419), INT16_C( 27845), INT16_C( 16555) }, { -INT16_C( 26750), -INT16_C( 1571), INT16_C( 23524), INT16_C( 9653), -INT16_C( 2369), -INT16_C( 8203), -INT16_C( 12599), INT16_C( 13429), INT16_C( 2800), -INT16_C( 13786), -INT16_C( 4264), INT16_C( 5349), -INT16_C( 30212), -INT16_C( 16106), -INT16_C( 15883), INT16_C( 30465) }, { -INT16_C( 2349), INT16_C( 601), INT16_C( 7117), -INT16_C( 2546), -INT16_C( 928), INT16_C( 1561), INT16_C( 2257), -INT16_C( 5516), INT16_C( 376), INT16_C( 722), -INT16_C( 1314), INT16_C( 580), -INT16_C( 1621), -INT16_C( 103), -INT16_C( 6749), INT16_C( 7695) } }, { { -INT16_C( 8616), INT16_C( 15728), INT16_C( 9529), -INT16_C( 1950), INT16_C( 22300), -INT16_C( 6696), INT16_C( 19750), INT16_C( 5657), INT16_C( 16216), -INT16_C( 20255), -INT16_C( 14802), INT16_C( 10949), -INT16_C( 9393), INT16_C( 17644), -INT16_C( 4708), -INT16_C( 2628) }, { INT16_C( 11468), INT16_C( 1330), -INT16_C( 27566), INT16_C( 28414), -INT16_C( 10516), INT16_C( 4691), INT16_C( 27683), INT16_C( 31528), INT16_C( 2475), -INT16_C( 9684), -INT16_C( 3632), INT16_C( 7940), -INT16_C( 3892), INT16_C( 26724), INT16_C( 8414), -INT16_C( 21923) }, { -INT16_C( 1508), INT16_C( 319), -INT16_C( 4009), -INT16_C( 846), -INT16_C( 3579), -INT16_C( 480), INT16_C( 8342), INT16_C( 2721), INT16_C( 612), INT16_C( 2993), INT16_C( 820), INT16_C( 1326), INT16_C( 557), INT16_C( 7194), -INT16_C( 605), INT16_C( 879) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi16(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi16(test_vec[i].b); simde__m256i r = simde_mm256_mulhi_epi16(a, b); simde_test_x86_assert_equal_i16x16(r, simde_x_mm256_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm256_mulhi_epu16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const uint16_t a[16]; const uint16_t b[16]; const uint16_t r[16]; } test_vec[] = { { { UINT16_C(10332), UINT16_C(42272), UINT16_C(44135), UINT16_C( 8711), UINT16_C(23908), UINT16_C(41433), UINT16_C(47685), UINT16_C( 9375), UINT16_C( 1656), UINT16_C(30122), UINT16_C( 5686), UINT16_C(62995), UINT16_C(28193), UINT16_C(48077), UINT16_C(50425), UINT16_C(21926) }, { UINT16_C(50924), UINT16_C(21499), UINT16_C( 626), UINT16_C(54901), UINT16_C(20063), UINT16_C(42103), UINT16_C( 5641), UINT16_C(33224), UINT16_C(29468), UINT16_C(21238), UINT16_C( 2441), UINT16_C(43592), UINT16_C( 5751), UINT16_C(29029), UINT16_C( 3034), UINT16_C(50886) }, { UINT16_C( 8028), UINT16_C(13867), UINT16_C( 421), UINT16_C( 7297), UINT16_C( 7319), UINT16_C(26618), UINT16_C( 4104), UINT16_C( 4752), UINT16_C( 744), UINT16_C( 9761), UINT16_C( 211), UINT16_C(41901), UINT16_C( 2474), UINT16_C(21295), UINT16_C( 2334), UINT16_C(17024) } }, { { UINT16_C(49617), UINT16_C(17177), UINT16_C(36803), UINT16_C( 8729), UINT16_C(37341), UINT16_C(59079), UINT16_C(36775), UINT16_C(50280), UINT16_C(24066), UINT16_C(35606), UINT16_C(24424), UINT16_C(57142), UINT16_C(39797), UINT16_C(20304), UINT16_C( 6055), UINT16_C(30741) }, { UINT16_C(11992), UINT16_C(40124), UINT16_C(54717), UINT16_C(39870), UINT16_C(34150), UINT16_C( 3713), UINT16_C(59669), UINT16_C( 6098), UINT16_C(59464), UINT16_C(45219), UINT16_C(55623), UINT16_C(48271), UINT16_C(57460), UINT16_C( 6923), UINT16_C( 8439), UINT16_C(53140) }, { UINT16_C( 9079), UINT16_C(10516), UINT16_C(30727), UINT16_C( 5310), UINT16_C(19457), UINT16_C( 3347), UINT16_C(33482), UINT16_C( 4678), UINT16_C(21836), UINT16_C(24567), UINT16_C(20729), UINT16_C(42088), UINT16_C(34892), UINT16_C( 2144), UINT16_C( 779), UINT16_C(24926) } }, { { UINT16_C(20559), UINT16_C( 3179), UINT16_C(10789), UINT16_C(36007), UINT16_C(10671), UINT16_C(50330), UINT16_C(27666), UINT16_C(23260), UINT16_C(32596), UINT16_C(39946), UINT16_C(39512), UINT16_C(52312), UINT16_C(25722), UINT16_C(29160), UINT16_C(31876), UINT16_C(54080) }, { UINT16_C(44236), UINT16_C(61920), UINT16_C(34774), UINT16_C(34173), UINT16_C( 6064), UINT16_C(49994), UINT16_C( 9859), UINT16_C(55325), UINT16_C(10405), UINT16_C(64884), UINT16_C(52418), UINT16_C(15561), UINT16_C(45360), UINT16_C(46509), UINT16_C(60717), UINT16_C(63880) }, { UINT16_C(13877), UINT16_C( 3003), UINT16_C( 5724), UINT16_C(18775), UINT16_C( 987), UINT16_C(38394), UINT16_C( 4161), UINT16_C(19635), UINT16_C( 5175), UINT16_C(39548), UINT16_C(31603), UINT16_C(12421), UINT16_C(17803), UINT16_C(20694), UINT16_C(29532), UINT16_C(52713) } }, { { UINT16_C(26777), UINT16_C(28651), UINT16_C(26864), UINT16_C(41205), UINT16_C(16256), UINT16_C( 867), UINT16_C(33125), UINT16_C( 2779), UINT16_C(20393), UINT16_C(27399), UINT16_C(53276), UINT16_C(19623), UINT16_C(21634), UINT16_C(44801), UINT16_C(35393), UINT16_C(56233) }, { UINT16_C(38130), UINT16_C(57930), UINT16_C(16380), UINT16_C(31875), UINT16_C(59006), UINT16_C(58240), UINT16_C(23399), UINT16_C( 4333), UINT16_C(62635), UINT16_C(51067), UINT16_C( 8901), UINT16_C(18195), UINT16_C( 5494), UINT16_C(47350), UINT16_C(40863), UINT16_C(37267) }, { UINT16_C(15579), UINT16_C(25325), UINT16_C( 6714), UINT16_C(20041), UINT16_C(14636), UINT16_C( 770), UINT16_C(11826), UINT16_C( 183), UINT16_C(19490), UINT16_C(21349), UINT16_C( 7235), UINT16_C( 5448), UINT16_C( 1813), UINT16_C(32368), UINT16_C(22068), UINT16_C(31976) } }, { { UINT16_C(56627), UINT16_C(12404), UINT16_C(63261), UINT16_C(39852), UINT16_C(11485), UINT16_C(17791), UINT16_C(27784), UINT16_C(13141), UINT16_C(53601), UINT16_C( 9978), UINT16_C( 3571), UINT16_C(27245), UINT16_C(25378), UINT16_C(49442), UINT16_C(46339), UINT16_C(13907) }, { UINT16_C(51090), UINT16_C(44902), UINT16_C( 5054), UINT16_C(39755), UINT16_C(51775), UINT16_C(51168), UINT16_C(13878), UINT16_C(38906), UINT16_C(62471), UINT16_C(64189), UINT16_C(10754), UINT16_C( 9316), UINT16_C(34446), UINT16_C(37350), UINT16_C(14651), UINT16_C(52935) }, { UINT16_C(44144), UINT16_C( 8498), UINT16_C( 4878), UINT16_C(24174), UINT16_C( 9073), UINT16_C(13890), UINT16_C( 5883), UINT16_C( 7801), UINT16_C(51094), UINT16_C( 9772), UINT16_C( 585), UINT16_C( 3872), UINT16_C(13338), UINT16_C(28177), UINT16_C(10359), UINT16_C(11233) } }, { { UINT16_C(11776), UINT16_C(48765), UINT16_C(51265), UINT16_C(32857), UINT16_C(14994), UINT16_C(51528), UINT16_C(17008), UINT16_C(30560), UINT16_C( 7735), UINT16_C(14705), UINT16_C(54856), UINT16_C(54877), UINT16_C(17244), UINT16_C(39015), UINT16_C(12156), UINT16_C(31846) }, { UINT16_C(58205), UINT16_C(40506), UINT16_C(38060), UINT16_C(15902), UINT16_C(26318), UINT16_C(15879), UINT16_C(26793), UINT16_C(57525), UINT16_C( 9862), UINT16_C(52761), UINT16_C(30460), UINT16_C(22949), UINT16_C( 3258), UINT16_C(14065), UINT16_C(22331), UINT16_C(39091) }, { UINT16_C(10458), UINT16_C(30140), UINT16_C(29772), UINT16_C( 7972), UINT16_C( 6021), UINT16_C(12484), UINT16_C( 6953), UINT16_C(26824), UINT16_C( 1163), UINT16_C(11838), UINT16_C(25496), UINT16_C(19216), UINT16_C( 857), UINT16_C( 8373), UINT16_C( 4142), UINT16_C(18995) } }, { { UINT16_C(60730), UINT16_C(58934), UINT16_C(21889), UINT16_C(20261), UINT16_C(11451), UINT16_C(25741), UINT16_C(17044), UINT16_C( 6724), UINT16_C(23913), UINT16_C(26089), UINT16_C(36564), UINT16_C(36542), UINT16_C(44954), UINT16_C(54980), UINT16_C(30470), UINT16_C(16750) }, { UINT16_C(42341), UINT16_C(58919), UINT16_C(19706), UINT16_C(46390), UINT16_C(50041), UINT16_C( 3354), UINT16_C(24070), UINT16_C(28456), UINT16_C( 4540), UINT16_C(37076), UINT16_C(37791), UINT16_C(14622), UINT16_C(57922), UINT16_C(18703), UINT16_C(32346), UINT16_C(49034) }, { UINT16_C(39235), UINT16_C(52983), UINT16_C( 6581), UINT16_C(14341), UINT16_C( 8743), UINT16_C( 1317), UINT16_C( 6259), UINT16_C( 2919), UINT16_C( 1656), UINT16_C(14759), UINT16_C(21084), UINT16_C( 8153), UINT16_C(39731), UINT16_C(15690), UINT16_C(15038), UINT16_C(12532) } }, { { UINT16_C(45347), UINT16_C( 7589), UINT16_C(56318), UINT16_C(30674), UINT16_C(60575), UINT16_C(42372), UINT16_C(44107), UINT16_C( 1812), UINT16_C(59581), UINT16_C(23703), UINT16_C(46459), UINT16_C(48790), UINT16_C(42391), UINT16_C(61703), UINT16_C(37155), UINT16_C(18096) }, { UINT16_C(22082), UINT16_C(16483), UINT16_C(13873), UINT16_C(53431), UINT16_C(15394), UINT16_C(28021), UINT16_C(35304), UINT16_C(42612), UINT16_C( 2930), UINT16_C(60674), UINT16_C(39104), UINT16_C(22699), UINT16_C(45630), UINT16_C(24905), UINT16_C(64067), UINT16_C(34472) }, { UINT16_C(15279), UINT16_C( 1908), UINT16_C(11921), UINT16_C(25008), UINT16_C(14228), UINT16_C(18116), UINT16_C(23760), UINT16_C( 1178), UINT16_C( 2663), UINT16_C(21944), UINT16_C(27721), UINT16_C(16898), UINT16_C(29515), UINT16_C(23448), UINT16_C(36322), UINT16_C( 9518) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi16(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi16(test_vec[i].b); simde__m256i r = simde_mm256_mulhi_epu16(a, b); simde_test_x86_assert_equal_u16x16(r, simde_x_mm256_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm256_mulhrs_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[16]; const int16_t b[16]; const int16_t r[16]; } test_vec[] = { { { -INT16_C( 7424), -INT16_C( 12265), INT16_C( 30100), INT16_C( 15580), INT16_C( 26159), -INT16_C( 29591), -INT16_C( 20062), -INT16_C( 16404), INT16_C( 25875), -INT16_C( 16332), INT16_C( 6642), INT16_C( 19995), -INT16_C( 16871), INT16_C( 7464), -INT16_C( 21479), INT16_C( 6866) }, { -INT16_C( 5744), INT16_C( 9450), -INT16_C( 14498), -INT16_C( 29088), -INT16_C( 13779), -INT16_C( 12518), INT16_C( 1915), -INT16_C( 28786), -INT16_C( 15764), INT16_C( 24143), INT16_C( 27355), -INT16_C( 2900), -INT16_C( 10967), INT16_C( 16914), -INT16_C( 7039), INT16_C( 4444) }, { INT16_C( 1301), -INT16_C( 3537), -INT16_C( 13318), -INT16_C( 13830), -INT16_C( 11000), INT16_C( 11304), -INT16_C( 1172), INT16_C( 14411), -INT16_C( 12448), -INT16_C( 12033), INT16_C( 5545), -INT16_C( 1770), INT16_C( 5646), INT16_C( 3853), INT16_C( 4614), INT16_C( 931) } }, { { INT16_C( 18381), INT16_C( 11062), -INT16_C( 27122), INT16_C( 15289), -INT16_C( 11168), -INT16_C( 9206), -INT16_C( 26405), INT16_C( 18283), -INT16_C( 17829), INT16_C( 13989), INT16_C( 20773), INT16_C( 20011), INT16_C( 15654), -INT16_C( 22384), -INT16_C( 4831), -INT16_C( 4423) }, { -INT16_C( 4300), INT16_C( 16921), -INT16_C( 11386), -INT16_C( 6531), -INT16_C( 30809), -INT16_C( 32062), INT16_C( 11551), INT16_C( 31433), INT16_C( 28392), INT16_C( 3505), -INT16_C( 9025), -INT16_C( 6565), -INT16_C( 5351), INT16_C( 14990), INT16_C( 18392), INT16_C( 3112) }, { -INT16_C( 2412), INT16_C( 5712), INT16_C( 9424), -INT16_C( 3047), INT16_C( 10500), INT16_C( 9008), -INT16_C( 9308), INT16_C( 17538), -INT16_C( 15448), INT16_C( 1496), -INT16_C( 5721), -INT16_C( 4009), -INT16_C( 2556), -INT16_C( 10240), -INT16_C( 2712), -INT16_C( 420) } }, { { INT16_C( 16695), -INT16_C( 17074), -INT16_C( 13548), -INT16_C( 17501), INT16_C( 26194), INT16_C( 29245), INT16_C( 1683), INT16_C( 31724), -INT16_C( 25228), INT16_C( 13448), -INT16_C( 7303), -INT16_C( 28134), -INT16_C( 22321), -INT16_C( 22580), -INT16_C( 2833), INT16_C( 9908) }, { INT16_C( 566), INT16_C( 19171), -INT16_C( 30770), INT16_C( 8198), INT16_C( 17389), -INT16_C( 32622), INT16_C( 32586), -INT16_C( 16644), -INT16_C( 31716), -INT16_C( 26894), INT16_C( 3176), INT16_C( 14120), -INT16_C( 2636), -INT16_C( 23330), -INT16_C( 27927), INT16_C( 8138) }, { INT16_C( 288), -INT16_C( 9989), INT16_C( 12722), -INT16_C( 4378), INT16_C( 13900), -INT16_C( 29115), INT16_C( 1674), -INT16_C( 16114), INT16_C( 24418), -INT16_C( 11037), -INT16_C( 708), -INT16_C( 12123), INT16_C( 1796), INT16_C( 16076), INT16_C( 2414), INT16_C( 2461) } }, { { -INT16_C( 20843), INT16_C( 25450), INT16_C( 28725), INT16_C( 8835), INT16_C( 5811), -INT16_C( 606), -INT16_C( 24939), -INT16_C( 20036), -INT16_C( 20957), -INT16_C( 29881), INT16_C( 28859), INT16_C( 28610), -INT16_C( 24475), INT16_C( 19987), -INT16_C( 8653), -INT16_C( 14226) }, { -INT16_C( 10100), -INT16_C( 16085), -INT16_C( 20920), -INT16_C( 1053), -INT16_C( 31292), INT16_C( 23033), -INT16_C( 19164), INT16_C( 18187), INT16_C( 21091), INT16_C( 7890), -INT16_C( 27454), INT16_C( 10126), -INT16_C( 24268), INT16_C( 26486), -INT16_C( 7041), INT16_C( 2863) }, { INT16_C( 6424), -INT16_C( 12493), -INT16_C( 18339), -INT16_C( 284), -INT16_C( 5549), -INT16_C( 426), INT16_C( 14585), -INT16_C( 11120), -INT16_C( 13489), -INT16_C( 7195), -INT16_C( 24179), INT16_C( 8841), INT16_C( 18126), INT16_C( 16155), INT16_C( 1859), -INT16_C( 1243) } }, { { INT16_C( 23228), INT16_C( 1228), -INT16_C( 20727), -INT16_C( 12801), -INT16_C( 1995), INT16_C( 22823), INT16_C( 12973), INT16_C( 4512), INT16_C( 29316), INT16_C( 18223), -INT16_C( 17146), INT16_C( 14958), -INT16_C( 7073), -INT16_C( 8542), -INT16_C( 11832), -INT16_C( 31510) }, { -INT16_C( 18900), INT16_C( 13704), -INT16_C( 30618), -INT16_C( 25854), INT16_C( 10624), INT16_C( 12020), -INT16_C( 27557), -INT16_C( 8129), INT16_C( 28166), INT16_C( 3111), -INT16_C( 27348), -INT16_C( 29882), -INT16_C( 6022), INT16_C( 17001), INT16_C( 21434), -INT16_C( 6457) }, { -INT16_C( 13397), INT16_C( 514), INT16_C( 19367), INT16_C( 10100), -INT16_C( 647), INT16_C( 8372), -INT16_C( 10910), -INT16_C( 1119), INT16_C( 25199), INT16_C( 1730), INT16_C( 14310), -INT16_C( 13641), INT16_C( 1300), -INT16_C( 4432), -INT16_C( 7739), INT16_C( 6209) } }, { { INT16_C( 20234), INT16_C( 28699), INT16_C( 7639), INT16_C( 22539), -INT16_C( 185), -INT16_C( 23930), -INT16_C( 14957), -INT16_C( 26238), -INT16_C( 22221), INT16_C( 24485), -INT16_C( 5313), -INT16_C( 17942), INT16_C( 21716), -INT16_C( 28933), -INT16_C( 15705), -INT16_C( 20108) }, { -INT16_C( 28910), -INT16_C( 5855), INT16_C( 11436), -INT16_C( 3263), -INT16_C( 14549), -INT16_C( 16746), INT16_C( 6284), -INT16_C( 16297), -INT16_C( 830), INT16_C( 287), INT16_C( 2792), -INT16_C( 17222), -INT16_C( 19106), INT16_C( 1354), -INT16_C( 16776), -INT16_C( 30025) }, { -INT16_C( 17852), -INT16_C( 5128), INT16_C( 2666), -INT16_C( 2244), INT16_C( 82), INT16_C( 12229), -INT16_C( 2868), INT16_C( 13049), INT16_C( 563), INT16_C( 214), -INT16_C( 453), INT16_C( 9430), -INT16_C( 12662), -INT16_C( 1196), INT16_C( 8040), INT16_C( 18425) } }, { { -INT16_C( 10163), -INT16_C( 1677), -INT16_C( 19195), INT16_C( 12525), -INT16_C( 31876), INT16_C( 2543), INT16_C( 18075), INT16_C( 24009), -INT16_C( 6077), INT16_C( 11102), INT16_C( 6386), INT16_C( 20711), INT16_C( 12750), INT16_C( 18006), INT16_C( 3567), INT16_C( 15568) }, { INT16_C( 17381), -INT16_C( 5579), INT16_C( 8952), INT16_C( 29979), INT16_C( 2725), INT16_C( 16766), INT16_C( 18256), -INT16_C( 27746), -INT16_C( 721), INT16_C( 8894), -INT16_C( 23275), -INT16_C( 7310), -INT16_C( 14122), -INT16_C( 15063), -INT16_C( 1579), -INT16_C( 17663) }, { -INT16_C( 5391), INT16_C( 286), -INT16_C( 5244), INT16_C( 11459), -INT16_C( 2651), INT16_C( 1301), INT16_C( 10070), -INT16_C( 20329), INT16_C( 134), INT16_C( 3013), -INT16_C( 4536), -INT16_C( 4620), -INT16_C( 5495), -INT16_C( 8277), -INT16_C( 172), -INT16_C( 8392) } }, { { INT16_C( 14141), INT16_C( 13733), -INT16_C( 16295), -INT16_C( 86), INT16_C( 10442), INT16_C( 6976), -INT16_C( 8593), -INT16_C( 24658), INT16_C( 28123), -INT16_C( 3647), INT16_C( 13074), -INT16_C( 5676), -INT16_C( 260), -INT16_C( 11858), -INT16_C( 20233), INT16_C( 13452) }, { INT16_C( 13031), INT16_C( 16490), INT16_C( 5362), -INT16_C( 17089), INT16_C( 32573), -INT16_C( 21288), -INT16_C( 31138), INT16_C( 14667), INT16_C( 3315), INT16_C( 1578), -INT16_C( 192), INT16_C( 15599), -INT16_C( 25091), -INT16_C( 3059), -INT16_C( 26035), INT16_C( 13353) }, { INT16_C( 5624), INT16_C( 6911), -INT16_C( 2666), INT16_C( 45), INT16_C( 10380), -INT16_C( 4532), INT16_C( 8166), -INT16_C( 11037), INT16_C( 2845), -INT16_C( 176), -INT16_C( 77), -INT16_C( 2702), INT16_C( 199), INT16_C( 1107), INT16_C( 16076), INT16_C( 5482) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi16(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi16(test_vec[i].b); simde__m256i r = simde_mm256_mulhrs_epi16(a, b); simde_test_x86_assert_equal_i16x16(r, simde_x_mm256_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm256_mullo_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi16(INT16_C( 26958), INT16_C( 5839), INT16_C( 10773), INT16_C(-17217), INT16_C( 20782), INT16_C(-24278), INT16_C( 14053), INT16_C( 4872), INT16_C(-31512), INT16_C( -5844), INT16_C( 1857), INT16_C( 9311), INT16_C( 26459), INT16_C( 31943), INT16_C(-26611), INT16_C( 26444)), simde_mm256_set_epi16(INT16_C( 25062), INT16_C(-26342), INT16_C( 2282), INT16_C( -1929), INT16_C( -4943), INT16_C(-11597), INT16_C(-15401), INT16_C(-15540), INT16_C( -1566), INT16_C( -8249), INT16_C( 3344), INT16_C(-23800), INT16_C( 15220), INT16_C( -3764), INT16_C( 25181), INT16_C( 11266)), simde_mm256_set_epi16(INT16_C( 10772), INT16_C( 2054), INT16_C( 7986), INT16_C(-15159), INT16_C(-30514), INT16_C( 9310), INT16_C(-30381), INT16_C(-16800), INT16_C( -816), INT16_C(-27340), INT16_C(-16112), INT16_C(-24584), INT16_C(-12740), INT16_C( 25108), INT16_C( 14009), INT16_C( -8552)) }, { simde_mm256_set_epi16(INT16_C( -5684), INT16_C( 15383), INT16_C(-12220), INT16_C( 1990), INT16_C( -3866), INT16_C(-10346), INT16_C( 21545), INT16_C( 18065), INT16_C(-21267), INT16_C( -3518), INT16_C( 27216), INT16_C( 24702), INT16_C( -8721), INT16_C(-16371), INT16_C( 21885), INT16_C( 22906)), simde_mm256_set_epi16(INT16_C( -9241), INT16_C( -3030), INT16_C(-27821), INT16_C( 13222), INT16_C( -7134), INT16_C( 9766), INT16_C( 1304), INT16_C( 21664), INT16_C( 21613), INT16_C( 29626), INT16_C(-23683), INT16_C( 15587), INT16_C( 14936), INT16_C( 31442), INT16_C( 1986), INT16_C(-23873)), simde_mm256_set_epi16(INT16_C( 31508), INT16_C(-14394), INT16_C(-28148), INT16_C( 31844), INT16_C(-10612), INT16_C( 17476), INT16_C(-20264), INT16_C(-20832), INT16_C( 25833), INT16_C(-22028), INT16_C( -9968), INT16_C( 6074), INT16_C( 28712), INT16_C(-17238), INT16_C( 13242), INT16_C( -2554)) }, { simde_mm256_set_epi16(INT16_C(-22642), INT16_C( 406), INT16_C( -2741), INT16_C( 24854), INT16_C(-32159), INT16_C( 5357), INT16_C( 22365), INT16_C(-19783), INT16_C( 6458), INT16_C( -2382), INT16_C( 27277), INT16_C( 20167), INT16_C( 308), INT16_C( 11773), INT16_C(-18240), INT16_C(-22526)), simde_mm256_set_epi16(INT16_C( 3067), INT16_C( -5016), INT16_C( 13492), INT16_C(-13562), INT16_C(-32027), INT16_C(-13606), INT16_C(-15731), INT16_C( 23689), INT16_C( -1980), INT16_C(-21001), INT16_C( 20300), INT16_C( -4296), INT16_C( -123), INT16_C( 14799), INT16_C(-32140), INT16_C( 31677)), simde_mm256_set_epi16(INT16_C( 25146), INT16_C( -4880), INT16_C(-19268), INT16_C(-18300), INT16_C( -7483), INT16_C(-11310), INT16_C(-26567), INT16_C( 8449), INT16_C( -7320), INT16_C( 20414), INT16_C( 9436), INT16_C( 1160), INT16_C( 27652), INT16_C(-31597), INT16_C( 14080), INT16_C( -134)) }, { simde_mm256_set_epi16(INT16_C(-19186), INT16_C( 1301), INT16_C( 31295), INT16_C(-15933), INT16_C( 15507), INT16_C( -5145), INT16_C( 22638), INT16_C( -9549), INT16_C( 5226), INT16_C( -8321), INT16_C( 11534), INT16_C( -7469), INT16_C( 21265), INT16_C( -8572), INT16_C( 3867), INT16_C( 30789)), simde_mm256_set_epi16(INT16_C(-11613), INT16_C( 15229), INT16_C( -2018), INT16_C( 27597), INT16_C(-11741), INT16_C(-30691), INT16_C( 13581), INT16_C(-20193), INT16_C( 19002), INT16_C(-20438), INT16_C( 23792), INT16_C( 10741), INT16_C( 24708), INT16_C(-12234), INT16_C(-16212), INT16_C(-32518)), simde_mm256_set_epi16(INT16_C(-15382), INT16_C( 21057), INT16_C( 23394), INT16_C(-21977), INT16_C( -8679), INT16_C( 28971), INT16_C( 17302), INT16_C( 16045), INT16_C( 17412), INT16_C( -1322), INT16_C( 17696), INT16_C( -8465), INT16_C( 13508), INT16_C( 12248), INT16_C( 26148), INT16_C( -3230)) }, { simde_mm256_set_epi16(INT16_C( 31461), INT16_C( 28893), INT16_C(-27940), INT16_C(-14179), INT16_C( -7147), INT16_C( 8716), INT16_C( -5522), INT16_C( -7988), INT16_C( 11144), INT16_C( 18257), INT16_C( -233), INT16_C( 22445), INT16_C(-19828), INT16_C( 15498), INT16_C( 17919), INT16_C( -2256)), simde_mm256_set_epi16(INT16_C(-10903), INT16_C( 5816), INT16_C( 24259), INT16_C(-27791), INT16_C(-16404), INT16_C( 3280), INT16_C(-17628), INT16_C( -2370), INT16_C( 12909), INT16_C( 25550), INT16_C( 7994), INT16_C( 693), INT16_C( -412), INT16_C( 18409), INT16_C(-18714), INT16_C( -7)), simde_mm256_set_epi16(INT16_C( -3859), INT16_C( 7384), INT16_C(-23148), INT16_C(-19379), INT16_C( -4516), INT16_C( 14784), INT16_C( 20856), INT16_C( -8344), INT16_C( 6376), INT16_C(-18898), INT16_C(-27594), INT16_C( 22353), INT16_C(-22864), INT16_C( 24474), INT16_C( 11546), INT16_C( 15792)) }, { simde_mm256_set_epi16(INT16_C(-23599), INT16_C( -6775), INT16_C(-22042), INT16_C( 23599), INT16_C( 29883), INT16_C(-16427), INT16_C( 18767), INT16_C( 13204), INT16_C(-18900), INT16_C(-23120), INT16_C( 17609), INT16_C(-28983), INT16_C( 10186), INT16_C(-12557), INT16_C( 9866), INT16_C( 22138)), simde_mm256_set_epi16(INT16_C(-21041), INT16_C(-13045), INT16_C( 15958), INT16_C( 31878), INT16_C( 18632), INT16_C( 10843), INT16_C(-29976), INT16_C(-16463), INT16_C(-15244), INT16_C( -3717), INT16_C( 16396), INT16_C(-24330), INT16_C( -6946), INT16_C( 27306), INT16_C( -9977), INT16_C(-30515)), simde_mm256_set_epi16(INT16_C(-19713), INT16_C(-28189), INT16_C(-14524), INT16_C( 1178), INT16_C(-13800), INT16_C( 8887), INT16_C( 1432), INT16_C( 5460), INT16_C( 15344), INT16_C( 19344), INT16_C( 31084), INT16_C(-10970), INT16_C( 26924), INT16_C( 2910), INT16_C( 1990), INT16_C( 4018)) }, { simde_mm256_set_epi16(INT16_C( 8019), INT16_C(-18318), INT16_C(-27174), INT16_C(-24268), INT16_C(-11542), INT16_C( 3812), INT16_C( 30491), INT16_C( 9957), INT16_C( 24267), INT16_C( 14634), INT16_C( 742), INT16_C( 6819), INT16_C( 18671), INT16_C(-25958), INT16_C( 11320), INT16_C( 22969)), simde_mm256_set_epi16(INT16_C(-30081), INT16_C(-15383), INT16_C(-30862), INT16_C( 19583), INT16_C( 4817), INT16_C( 15541), INT16_C( 22774), INT16_C( -2106), INT16_C(-10048), INT16_C(-27305), INT16_C(-18367), INT16_C(-29706), INT16_C(-10409), INT16_C( 15691), INT16_C( 9193), INT16_C( 19182)), simde_mm256_set_epi16(INT16_C( 18477), INT16_C(-19006), INT16_C(-20204), INT16_C( 26828), INT16_C(-23286), INT16_C( -2252), INT16_C(-17422), INT16_C( 2078), INT16_C( 24640), INT16_C( -8378), INT16_C( 3174), INT16_C( 6562), INT16_C(-32199), INT16_C( -738), INT16_C( -6408), INT16_C( -7170)) }, { simde_mm256_set_epi16(INT16_C( 1498), INT16_C(-31368), INT16_C( -3455), INT16_C(-19849), INT16_C(-16083), INT16_C(-23087), INT16_C( 26835), INT16_C( 10141), INT16_C( 24239), INT16_C(-15471), INT16_C( 117), INT16_C(-26512), INT16_C( -4941), INT16_C( 10703), INT16_C( 12266), INT16_C(-25802)), simde_mm256_set_epi16(INT16_C(-20053), INT16_C(-31092), INT16_C( 14330), INT16_C( 14498), INT16_C( 16232), INT16_C(-21016), INT16_C( 26384), INT16_C(-26667), INT16_C( 26626), INT16_C( -346), INT16_C( 22688), INT16_C(-31544), INT16_C( 24113), INT16_C( 2549), INT16_C( 12831), INT16_C(-19325)), simde_mm256_set_epi16(INT16_C(-23906), INT16_C(-12896), INT16_C(-30470), INT16_C( -2226), INT16_C(-29368), INT16_C(-32152), INT16_C( 29232), INT16_C(-28511), INT16_C(-10914), INT16_C(-20986), INT16_C(-32480), INT16_C(-10368), INT16_C( 2115), INT16_C( 18971), INT16_C(-32426), INT16_C( 25762)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_mullo_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_mullo_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C(-1352403780), INT32_C( -343467662), INT32_C( 517643457), INT32_C( -488960691), INT32_C(-1266352672), INT32_C( 1517008609), INT32_C( 990411931), INT32_C(-1870795966)), simde_mm256_set_epi32(INT32_C(-1381411484), INT32_C(-1688230631), INT32_C( 2122460393), INT32_C( 1331099088), INT32_C(-1172838687), INT32_C(-1435873650), INT32_C( 806691655), INT32_C( 1528853489)), simde_mm256_set_epi32(INT32_C( 1332348272), INT32_C(-1964640734), INT32_C( 1072420777), INT32_C( 814972816), INT32_C(-1273339424), INT32_C( 480378574), INT32_C( -786776067), INT32_C( 1970872098)) }, { simde_mm256_set_epi32(INT32_C( 1337827173), INT32_C( -259025503), INT32_C( -584925635), INT32_C( 195250370), INT32_C(-1297361156), INT32_C( -136339105), INT32_C( 279578676), INT32_C(-1541670246)), simde_mm256_set_epi32(INT32_C( 163529464), INT32_C( 1163098936), INT32_C(-2107715246), INT32_C( 687192711), INT32_C( 869285726), INT32_C(-1567378281), INT32_C(-1698583124), INT32_C( 1922904107)), simde_mm256_set_epi32(INT32_C(-1675267112), INT32_C(-2071980488), INT32_C( 114736266), INT32_C(-1214098866), INT32_C( -977210232), INT32_C( 971766537), INT32_C(-1862490384), INT32_C( 337770462)) }, { simde_mm256_set_epi32(INT32_C(-1141523047), INT32_C( 75738515), INT32_C( 2021274638), INT32_C(-1605111533), INT32_C( 702401071), INT32_C( 991513903), INT32_C( 1097525967), INT32_C( 12869194)), simde_mm256_set_epi32(INT32_C( -236040676), INT32_C(-1937249843), INT32_C( -759698458), INT32_C( 1440000042), INT32_C( 500464056), INT32_C( 1039800065), INT32_C( 1696902588), INT32_C( 1988285066)), simde_mm256_set_epi32(INT32_C(-1142728004), INT32_C( 1660012983), INT32_C( 496344724), INT32_C( 1454769438), INT32_C(-1377466168), INT32_C( 334399023), INT32_C( -78753020), INT32_C( 2093451236)) }, { simde_mm256_set_epi32(INT32_C( 135806382), INT32_C( 1225419686), INT32_C(-1943331695), INT32_C( -184770167), INT32_C( -868496558), INT32_C(-1570632013), INT32_C(-1946534455), INT32_C( 456616503)), simde_mm256_set_epi32(INT32_C( 1379577055), INT32_C( -512074604), INT32_C(-1891332670), INT32_C( 1111532874), INT32_C(-1428646370), INT32_C( 907679144), INT32_C( -10646910), INT32_C( 1881159279)), simde_mm256_set_epi32(INT32_C( 200974994), INT32_C( 735175672), INT32_C(-1930722590), INT32_C( 1499725466), INT32_C( -111943780), INT32_C( -608414600), INT32_C(-1553548782), INT32_C( -733042727)) }, { simde_mm256_set_epi32(INT32_C( 1387302426), INT32_C( -589733281), INT32_C(-1148378464), INT32_C(-1369430370), INT32_C( 64719355), INT32_C( 1048033330), INT32_C( 1019366599), INT32_C(-1908464696)), simde_mm256_set_epi32(INT32_C( -277676972), INT32_C( -370073323), INT32_C( -963247981), INT32_C( 258227968), INT32_C( 1397531888), INT32_C( 91286530), INT32_C( 441718636), INT32_C( 180722050)), simde_mm256_set_epi32(INT32_C( 2070165640), INT32_C( 905072843), INT32_C(-1144331808), INT32_C( 20505088), INT32_C(-1715853488), INT32_C( 1788708964), INT32_C( 2116699380), INT32_C( 1364898704)) }, { simde_mm256_set_epi32(INT32_C( 1766434696), INT32_C( -696400655), INT32_C( -511755431), INT32_C( 254323910), INT32_C(-1407716551), INT32_C( -866109177), INT32_C( 1454483112), INT32_C( 830274169)), simde_mm256_set_epi32(INT32_C( -369468938), INT32_C( 827481876), INT32_C( 733336376), INT32_C( -709198563), INT32_C(-1510456310), INT32_C( 2091700298), INT32_C(-1848267445), INT32_C( -214051693)), simde_mm256_set_epi32(INT32_C( 718619824), INT32_C(-1541746220), INT32_C( 447501944), INT32_C( -640401298), INT32_C( 1908581434), INT32_C( -153474042), INT32_C( 1165187896), INT32_C( 1946486651)) }, { simde_mm256_set_epi32(INT32_C( 551854829), INT32_C(-1524250015), INT32_C( 248754089), INT32_C( 2058702947), INT32_C( 514801021), INT32_C(-2062150747), INT32_C( 1811376814), INT32_C( 1655721768)), simde_mm256_set_epi32(INT32_C( -68197299), INT32_C( -108251896), INT32_C( 732768373), INT32_C(-1458146720), INT32_C(-1648648199), INT32_C( 765297486), INT32_C( 1888788167), INT32_C( 1580342871)), simde_mm256_set_epi32(INT32_C(-1364421303), INT32_C( 35678216), INT32_C( -228151235), INT32_C( 88881952), INT32_C( 1123185045), INT32_C( -436704442), INT32_C( 1841516866), INT32_C( 1493276312)) }, { simde_mm256_set_epi32(INT32_C(-1004493292), INT32_C( 86312384), INT32_C(-1589794735), INT32_C( 1105371360), INT32_C( 893767357), INT32_C( -562966901), INT32_C( 980757301), INT32_C( 2070176970)), simde_mm256_set_epi32(INT32_C( 1498513889), INT32_C( 1078743553), INT32_C( 911009242), INT32_C( -31879959), INT32_C( 673790886), INT32_C(-1100901508), INT32_C( 837704078), INT32_C( -540129822)), simde_mm256_set_epi32(INT32_C( 1421216660), INT32_C(-1778612800), INT32_C(-2078422534), INT32_C( -585176096), INT32_C( 1177460110), INT32_C(-2056987564), INT32_C( 2034852966), INT32_C( 240610388)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_mullo_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_x_mm256_mullo_epu32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_x_mm256_set_epu32(UINT32_C(2258322750), UINT32_C(2861166599), UINT32_C(3174424968), UINT32_C(2016553993), UINT32_C(2997181236), UINT32_C( 363976099), UINT32_C(1103728177), UINT32_C(2198010875)), simde_x_mm256_set_epu32(UINT32_C(1159500967), UINT32_C(2653222606), UINT32_C(3215542902), UINT32_C(2811870533), UINT32_C(3872912803), UINT32_C(1788506759), UINT32_C(1042504603), UINT32_C(1249290459)), simde_x_mm256_set_epu32(UINT32_C(3692692338), UINT32_C(3668041634), UINT32_C( 617563312), UINT32_C(2691269485), UINT32_C(1346229788), UINT32_C(2750955253), UINT32_C(3376906923), UINT32_C(3164358585)) }, { simde_x_mm256_set_epu32(UINT32_C(2548089923), UINT32_C(1352192202), UINT32_C(3738306426), UINT32_C( 536787617), UINT32_C( 525476003), UINT32_C(3633025093), UINT32_C(1784195446), UINT32_C(1453616288)), simde_x_mm256_set_epu32(UINT32_C(1186306915), UINT32_C( 779102467), UINT32_C(1127274760), UINT32_C(2603214034), UINT32_C( 521716141), UINT32_C(3878310832), UINT32_C(2866604767), UINT32_C(3646073150)), simde_x_mm256_set_epu32(UINT32_C(3894376169), UINT32_C( 261803102), UINT32_C(1405527504), UINT32_C(2079685650), UINT32_C(3781357863), UINT32_C( 235638384), UINT32_C(2877913546), UINT32_C(4073189056)) }, { simde_x_mm256_set_epu32(UINT32_C(1846808532), UINT32_C(4103184512), UINT32_C(1968463192), UINT32_C(1782167042), UINT32_C(2474531325), UINT32_C( 998377243), UINT32_C( 44320792), UINT32_C( 386122774)), simde_x_mm256_set_epu32(UINT32_C(3246150743), UINT32_C(1171028203), UINT32_C(3269727308), UINT32_C(1281786774), UINT32_C(4190872936), UINT32_C( 118349934), UINT32_C(3463597682), UINT32_C( 47529222)), simde_x_mm256_set_epu32(UINT32_C(2092101900), UINT32_C( 720000384), UINT32_C(1052153376), UINT32_C(3804212012), UINT32_C(3421741000), UINT32_C(1903537562), UINT32_C(2781833904), UINT32_C(3256735364)) }, { simde_x_mm256_set_epu32(UINT32_C(4276815888), UINT32_C(1336815165), UINT32_C( 961795267), UINT32_C( 314541168), UINT32_C(2077700186), UINT32_C(2373614491), UINT32_C(2478361141), UINT32_C(3586675075)), simde_x_mm256_set_epu32(UINT32_C(1354372236), UINT32_C(3218632640), UINT32_C(3308884037), UINT32_C( 276466624), UINT32_C(1857596357), UINT32_C(2760506734), UINT32_C( 503740282), UINT32_C(1124719285)), simde_x_mm256_set_epu32(UINT32_C(2697570496), UINT32_C(3475448000), UINT32_C( 162597007), UINT32_C( 703341568), UINT32_C(1989432130), UINT32_C(1220299674), UINT32_C(1062010946), UINT32_C(3802541983)) }, { simde_x_mm256_set_epu32(UINT32_C(1408536206), UINT32_C( 904931218), UINT32_C(3779421746), UINT32_C( 832565776), UINT32_C( 37616047), UINT32_C(1134646772), UINT32_C(3784150216), UINT32_C(1429477789)), simde_x_mm256_set_epu32(UINT32_C( 961414585), UINT32_C(1502350843), UINT32_C(3315915686), UINT32_C(2253735990), UINT32_C(2519475515), UINT32_C( 980432319), UINT32_C( 560915899), UINT32_C(2488230114)), simde_x_mm256_set_epu32(UINT32_C(2143799966), UINT32_C(2052142630), UINT32_C(1668614764), UINT32_C( 857837408), UINT32_C(3983726165), UINT32_C(3203048716), UINT32_C(2806300184), UINT32_C(1829534874)) }, { simde_x_mm256_set_epu32(UINT32_C( 574695235), UINT32_C( 304227009), UINT32_C(1668479769), UINT32_C(3452443080), UINT32_C(3762070562), UINT32_C(2046023294), UINT32_C( 475815618), UINT32_C( 179358113)), simde_x_mm256_set_epu32(UINT32_C( 260558052), UINT32_C(3663947713), UINT32_C(4165399884), UINT32_C(1745062207), UINT32_C(2120455131), UINT32_C(4011446154), UINT32_C(1023802013), UINT32_C( 827938078)), simde_x_mm256_set_epu32(UINT32_C( 245717932), UINT32_C( 844814977), UINT32_C(1900509292), UINT32_C(1197629496), UINT32_C(3678793494), UINT32_C(2083488236), UINT32_C(2902036730), UINT32_C(3718452702)) }, { simde_x_mm256_set_epu32(UINT32_C(1617715092), UINT32_C(2193069624), UINT32_C( 650454244), UINT32_C(2538964293), UINT32_C(1532176753), UINT32_C(3080365125), UINT32_C(2879482590), UINT32_C(3982850403)), simde_x_mm256_set_epu32(UINT32_C(1884881854), UINT32_C(3265025687), UINT32_C(1041888717), UINT32_C(1140822020), UINT32_C(4094158163), UINT32_C( 967388894), UINT32_C(1291557283), UINT32_C(2562715165)), simde_x_mm256_set_epu32(UINT32_C(3489770456), UINT32_C(3235932936), UINT32_C(2322644628), UINT32_C(1191699732), UINT32_C(3592389795), UINT32_C(3492534742), UINT32_C(1884272986), UINT32_C(2486553143)) }, { simde_x_mm256_set_epu32(UINT32_C(3590735797), UINT32_C(1425583171), UINT32_C( 334293210), UINT32_C( 791405491), UINT32_C(1931435573), UINT32_C( 591893589), UINT32_C(1994069612), UINT32_C( 354132544)), simde_x_mm256_set_epu32(UINT32_C(2748385695), UINT32_C( 731899882), UINT32_C(1077607699), UINT32_C(1910108083), UINT32_C(1267460605), UINT32_C(2091709757), UINT32_C(2754232525), UINT32_C(2186524445)), simde_x_mm256_set_epu32(UINT32_C(4029736555), UINT32_C(1297803838), UINT32_C(3726162478), UINT32_C(4211036969), UINT32_C( 8738657), UINT32_C(3189748033), UINT32_C(3502503036), UINT32_C(1071456576)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_x_mm256_mullo_epu32(test_vec[i].a, test_vec[i].b); simde_assert_m256i_u32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_or_si256(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi64x(INT64_C( -801044498564576659), INT64_C(-2909946603020252481), INT64_C( 5958340648204315976), INT64_C( 8713768337389103061)), simde_mm256_set_epi64x(INT64_C( 9176724763357309327), INT64_C( 4054644920102546891), INT64_C( 4782244109117166481), INT64_C( 3831721763102663031)), simde_mm256_set_epi64x(INT64_C( -1584611843413009), INT64_C( -9571266241499137), INT64_C( 5980208482798747609), INT64_C( 9074056875634305015)) }, { simde_mm256_set_epi64x(INT64_C( 1317517346722662736), INT64_C( 8192878697228400830), INT64_C(-3537455209908178968), INT64_C(-2713816682012121382)), simde_mm256_set_epi64x(INT64_C( 7957277620212510994), INT64_C( -737217900053345188), INT64_C( 8353910688937076237), INT64_C( 240232259721200655)), simde_mm256_set_epi64x(INT64_C( 9110201366055221586), INT64_C( -723122418076615426), INT64_C( -4662226728259603), INT64_C(-2641361182872049953)) }, { simde_mm256_set_epi64x(INT64_C(-1030545204507091849), INT64_C(-1542600680052722313), INT64_C(-7648307982573512602), INT64_C( 5973019580240685616)), simde_mm256_set_epi64x(INT64_C(-6290470397500953523), INT64_C( 8109710997204180941), INT64_C( 5917924879433877736), INT64_C( 8502004464391034004)), simde_mm256_set_epi64x(INT64_C( -453800777020900225), INT64_C( -387309568701530113), INT64_C(-2883445622328010514), INT64_C( 8646205037023002292)) }, { simde_mm256_set_epi64x(INT64_C(-2990334454120409171), INT64_C(-3220201474370514905), INT64_C( 8548083516217107397), INT64_C( 1251663319653874101)), simde_mm256_set_epi64x(INT64_C(-2621282330722334206), INT64_C( 5235652619773460077), INT64_C(-8007055325654862889), INT64_C( 4775726838041815408)), simde_mm256_set_epi64x(INT64_C(-2333014432620503121), INT64_C(-2598629952962888081), INT64_C( -656965087403232297), INT64_C( 6007467006593006069)) }, { simde_mm256_set_epi64x(INT64_C( 1315645066342648861), INT64_C( 3754004658427516786), INT64_C(-7880307939890805097), INT64_C(-5701204371115270443)), simde_mm256_set_epi64x(INT64_C( 4172903126396830914), INT64_C( -493154668521044871), INT64_C(-2309759438976524777), INT64_C(-1689539225349388212)), simde_mm256_set_epi64x(INT64_C( 4317612287190453471), INT64_C( -198167755223214213), INT64_C(-2309335405310321001), INT64_C( -509524860864094499)) }, { simde_mm256_set_epi64x(INT64_C(-7748112100043814155), INT64_C( 5814291251258484552), INT64_C( 8569511450246080549), INT64_C(-3900190118960098388)), simde_mm256_set_epi64x(INT64_C( 9015646334468450927), INT64_C( 883710405382046595), INT64_C( 2743428167896968049), INT64_C(-6564603084509542605)), simde_mm256_set_epi64x(INT64_C( -180373114503566593), INT64_C( 6697859792108349387), INT64_C( 8574745263641455477), INT64_C(-1297039137678485569)) }, { simde_mm256_set_epi64x(INT64_C( 5980675563351081308), INT64_C( 7108230643859206772), INT64_C(-7185068082285956895), INT64_C(-5748801677096031915)), simde_mm256_set_epi64x(INT64_C(-1209090942768865396), INT64_C(-7402713372895048445), INT64_C( -24471728257632960), INT64_C( 3473093230644658861)), simde_mm256_set_epi64x(INT64_C( -14577395916836), INT64_C( -295610862468677769), INT64_C( -6315904187370015), INT64_C(-5748026933373309955)) }, { simde_mm256_set_epi64x(INT64_C( 3669045510431781214), INT64_C(-8656850301840548621), INT64_C(-5639311717074453893), INT64_C(-8609899897096571068)), simde_mm256_set_epi64x(INT64_C(-5772405160554679118), INT64_C( 8581290868842963452), INT64_C(-7553387725647900846), INT64_C( 1768046205102779153)), simde_mm256_set_epi64x(INT64_C(-4616368323934308866), INT64_C( -585750987759698433), INT64_C(-5206728520596308101), INT64_C(-7454726563416572075)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_or_si256(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_packs_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[16]; const int16_t b[16]; const int8_t r[32]; } test_vec[] = { { { INT16_C( 217), INT16_C( 133), INT16_C( 35), INT16_C( 164), INT16_C( 69), INT16_C( 101), INT16_C( 149), INT16_C( 103), INT16_C( 41), INT16_C( 56), INT16_C( 76), INT16_C( 183), INT16_C( 83), INT16_C( 188), INT16_C( 132), INT16_C( 172) }, { INT16_C( 172), INT16_C( 254), INT16_C( 196), INT16_C( 19), INT16_C( 250), INT16_C( 196), INT16_C( 4), INT16_C( 186), INT16_C( 183), INT16_C( 52), INT16_C( 215), INT16_C( 95), INT16_C( 224), INT16_C( 86), INT16_C( 229), INT16_C( 194) }, { INT8_MAX, INT8_MAX, INT8_C( 35), INT8_MAX, INT8_C( 69), INT8_C( 101), INT8_MAX, INT8_C( 103), INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 19), INT8_MAX, INT8_MAX, INT8_C( 4), INT8_MAX, INT8_C( 41), INT8_C( 56), INT8_C( 76), INT8_MAX, INT8_C( 83), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 52), INT8_MAX, INT8_C( 95), INT8_MAX, INT8_C( 86), INT8_MAX, INT8_MAX } }, { { INT16_C( 51), INT16_C( 97), INT16_C( 99), INT16_C( 1), INT16_C( 236), INT16_C( 236), INT16_C( 241), INT16_C( 30), INT16_C( 153), INT16_C( 173), INT16_C( 62), INT16_C( 155), INT16_C( 40), INT16_C( 131), INT16_C( 243), INT16_C( 160) }, { INT16_C( 5), INT16_C( 30), INT16_C( 118), INT16_C( 197), INT16_C( 229), INT16_C( 83), INT16_C( 88), INT16_C( 128), INT16_C( 195), INT16_C( 97), INT16_C( 57), INT16_C( 32), INT16_C( 238), INT16_C( 112), INT16_C( 232), INT16_C( 8) }, { INT8_C( 51), INT8_C( 97), INT8_C( 99), INT8_C( 1), INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 30), INT8_C( 5), INT8_C( 30), INT8_C( 118), INT8_MAX, INT8_MAX, INT8_C( 83), INT8_C( 88), INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 62), INT8_MAX, INT8_C( 40), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 97), INT8_C( 57), INT8_C( 32), INT8_MAX, INT8_C( 112), INT8_MAX, INT8_C( 8) } }, { { INT16_C( 17), INT16_C( 86), INT16_C( 70), INT16_C( 234), INT16_C( 205), INT16_C( 2), INT16_C( 174), INT16_C( 22), INT16_C( 175), INT16_C( 115), INT16_C( 116), INT16_C( 75), INT16_C( 54), INT16_C( 69), INT16_C( 203), INT16_C( 11) }, { INT16_C( 116), INT16_C( 100), INT16_C( 125), INT16_C( 229), INT16_C( 139), INT16_C( 111), INT16_C( 106), INT16_C( 170), INT16_C( 252), INT16_C( 3), INT16_C( 176), INT16_C( 212), INT16_C( 9), INT16_C( 4), INT16_C( 102), INT16_C( 176) }, { INT8_C( 17), INT8_C( 86), INT8_C( 70), INT8_MAX, INT8_MAX, INT8_C( 2), INT8_MAX, INT8_C( 22), INT8_C( 116), INT8_C( 100), INT8_C( 125), INT8_MAX, INT8_MAX, INT8_C( 111), INT8_C( 106), INT8_MAX, INT8_MAX, INT8_C( 115), INT8_C( 116), INT8_C( 75), INT8_C( 54), INT8_C( 69), INT8_MAX, INT8_C( 11), INT8_MAX, INT8_C( 3), INT8_MAX, INT8_MAX, INT8_C( 9), INT8_C( 4), INT8_C( 102), INT8_MAX } }, { { INT16_C( 113), INT16_C( 148), INT16_C( 98), INT16_C( 56), INT16_C( 98), INT16_C( 38), INT16_C( 44), INT16_C( 230), INT16_C( 238), INT16_C( 154), INT16_C( 55), INT16_C( 133), INT16_C( 135), INT16_C( 20), INT16_C( 154), INT16_C( 200) }, { INT16_C( 217), INT16_C( 250), INT16_C( 214), INT16_C( 41), INT16_C( 218), INT16_C( 5), INT16_C( 33), INT16_C( 47), INT16_C( 213), INT16_C( 174), INT16_C( 55), INT16_C( 77), INT16_C( 190), INT16_C( 172), INT16_C( 38), INT16_C( 99) }, { INT8_C( 113), INT8_MAX, INT8_C( 98), INT8_C( 56), INT8_C( 98), INT8_C( 38), INT8_C( 44), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 41), INT8_MAX, INT8_C( 5), INT8_C( 33), INT8_C( 47), INT8_MAX, INT8_MAX, INT8_C( 55), INT8_MAX, INT8_MAX, INT8_C( 20), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 55), INT8_C( 77), INT8_MAX, INT8_MAX, INT8_C( 38), INT8_C( 99) } }, { { INT16_C( 208), INT16_C( 60), INT16_C( 144), INT16_C( 223), INT16_C( 181), INT16_C( 112), INT16_C( 208), INT16_C( 230), INT16_C( 105), INT16_C( 177), INT16_C( 200), INT16_C( 95), INT16_C( 96), INT16_C( 222), INT16_C( 127), INT16_C( 134) }, { INT16_C( 159), INT16_C( 247), INT16_C( 40), INT16_C( 153), INT16_C( 187), INT16_C( 180), INT16_C( 170), INT16_C( 48), INT16_C( 46), INT16_C( 180), INT16_C( 224), INT16_C( 125), INT16_C( 31), INT16_C( 198), INT16_C( 158), INT16_C( 239) }, { INT8_MAX, INT8_C( 60), INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 112), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 40), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 48), INT8_C( 105), INT8_MAX, INT8_MAX, INT8_C( 95), INT8_C( 96), INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 46), INT8_MAX, INT8_MAX, INT8_C( 125), INT8_C( 31), INT8_MAX, INT8_MAX, INT8_MAX } }, { { INT16_C( 15), INT16_C( 109), INT16_C( 188), INT16_C( 21), INT16_C( 16), INT16_C( 2), INT16_C( 99), INT16_C( 206), INT16_C( 20), INT16_C( 136), INT16_C( 150), INT16_C( 52), INT16_C( 98), INT16_C( 85), INT16_C( 71), INT16_C( 61) }, { INT16_C( 42), INT16_C( 142), INT16_C( 177), INT16_C( 94), INT16_C( 108), INT16_C( 124), INT16_C( 147), INT16_C( 97), INT16_C( 204), INT16_C( 155), INT16_C( 239), INT16_C( 25), INT16_C( 201), INT16_C( 81), INT16_C( 178), INT16_C( 104) }, { INT8_C( 15), INT8_C( 109), INT8_MAX, INT8_C( 21), INT8_C( 16), INT8_C( 2), INT8_C( 99), INT8_MAX, INT8_C( 42), INT8_MAX, INT8_MAX, INT8_C( 94), INT8_C( 108), INT8_C( 124), INT8_MAX, INT8_C( 97), INT8_C( 20), INT8_MAX, INT8_MAX, INT8_C( 52), INT8_C( 98), INT8_C( 85), INT8_C( 71), INT8_C( 61), INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 25), INT8_MAX, INT8_C( 81), INT8_MAX, INT8_C( 104) } }, { { INT16_C( 57), INT16_C( 194), INT16_C( 154), INT16_C( 172), INT16_C( 129), INT16_C( 214), INT16_C( 114), INT16_C( 187), INT16_C( 32), INT16_C( 161), INT16_C( 37), INT16_C( 97), INT16_C( 40), INT16_C( 255), INT16_C( 66), INT16_C( 182) }, { INT16_C( 94), INT16_C( 102), INT16_C( 153), INT16_C( 254), INT16_C( 58), INT16_C( 46), INT16_C( 11), INT16_C( 234), INT16_C( 63), INT16_C( 60), INT16_C( 70), INT16_C( 83), INT16_C( 80), INT16_C( 73), INT16_C( 186), INT16_C( 14) }, { INT8_C( 57), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 114), INT8_MAX, INT8_C( 94), INT8_C( 102), INT8_MAX, INT8_MAX, INT8_C( 58), INT8_C( 46), INT8_C( 11), INT8_MAX, INT8_C( 32), INT8_MAX, INT8_C( 37), INT8_C( 97), INT8_C( 40), INT8_MAX, INT8_C( 66), INT8_MAX, INT8_C( 63), INT8_C( 60), INT8_C( 70), INT8_C( 83), INT8_C( 80), INT8_C( 73), INT8_MAX, INT8_C( 14) } }, { { INT16_C( 121), INT16_C( 16), INT16_C( 134), INT16_C( 45), INT16_C( 227), INT16_C( 108), INT16_C( 69), INT16_C( 26), INT16_C( 226), INT16_C( 233), INT16_C( 244), INT16_C( 152), INT16_C( 142), INT16_C( 215), INT16_C( 225), INT16_C( 96) }, { INT16_C( 89), INT16_C( 109), INT16_C( 127), INT16_C( 159), INT16_C( 245), INT16_C( 80), INT16_C( 97), INT16_C( 191), INT16_C( 193), INT16_C( 109), INT16_C( 228), INT16_C( 250), INT16_C( 230), INT16_C( 187), INT16_C( 182), INT16_C( 34) }, { INT8_C( 121), INT8_C( 16), INT8_MAX, INT8_C( 45), INT8_MAX, INT8_C( 108), INT8_C( 69), INT8_C( 26), INT8_C( 89), INT8_C( 109), INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 80), INT8_C( 97), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 96), INT8_MAX, INT8_C( 109), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 34) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi16(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi16(test_vec[i].b); simde__m256i r = simde_mm256_packs_epi16(a, b); simde_test_x86_assert_equal_i8x32(r, simde_x_mm256_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm256_packs_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[8]; const int32_t b[8]; const int16_t r[16]; } test_vec[] = { { { INT32_C( 26989), INT32_C( 24063), INT32_C( 64705), INT32_C( 36658), INT32_C( 50142), INT32_C( 39242), INT32_C( 8518), INT32_C( 3656) }, { INT32_C( 34423), INT32_C( 1251), INT32_C( 52992), INT32_C( 41566), INT32_C( 36965), INT32_C( 1065), INT32_C( 2853), INT32_C( 14105) }, { INT16_C( 26989), INT16_C( 24063), INT16_MAX, INT16_MAX, INT16_MAX, INT16_C( 1251), INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_C( 8518), INT16_C( 3656), INT16_MAX, INT16_C( 1065), INT16_C( 2853), INT16_C( 14105) } }, { { INT32_C( 23229), INT32_C( 38238), INT32_C( 64100), INT32_C( 13468), INT32_C( 43716), INT32_C( 10670), INT32_C( 7220), INT32_C( 25683) }, { INT32_C( 59070), INT32_C( 63100), INT32_C( 3056), INT32_C( 41535), INT32_C( 15948), INT32_C( 20583), INT32_C( 3692), INT32_C( 51058) }, { INT16_C( 23229), INT16_MAX, INT16_MAX, INT16_C( 13468), INT16_MAX, INT16_MAX, INT16_C( 3056), INT16_MAX, INT16_MAX, INT16_C( 10670), INT16_C( 7220), INT16_C( 25683), INT16_C( 15948), INT16_C( 20583), INT16_C( 3692), INT16_MAX } }, { { INT32_C( 33198), INT32_C( 51063), INT32_C( 44498), INT32_C( 33103), INT32_C( 2239), INT32_C( 25432), INT32_C( 43889), INT32_C( 21875) }, { INT32_C( 25046), INT32_C( 21800), INT32_C( 43267), INT32_C( 8746), INT32_C( 33578), INT32_C( 54246), INT32_C( 52094), INT32_C( 20512) }, { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_C( 25046), INT16_C( 21800), INT16_MAX, INT16_C( 8746), INT16_C( 2239), INT16_C( 25432), INT16_MAX, INT16_C( 21875), INT16_MAX, INT16_MAX, INT16_MAX, INT16_C( 20512) } }, { { INT32_C( 23985), INT32_C( 64179), INT32_C( 57507), INT32_C( 62979), INT32_C( 51066), INT32_C( 63386), INT32_C( 62658), INT32_C( 7493) }, { INT32_C( 8058), INT32_C( 42009), INT32_C( 60548), INT32_C( 16866), INT32_C( 25865), INT32_C( 30044), INT32_C( 51050), INT32_C( 56292) }, { INT16_C( 23985), INT16_MAX, INT16_MAX, INT16_MAX, INT16_C( 8058), INT16_MAX, INT16_MAX, INT16_C( 16866), INT16_MAX, INT16_MAX, INT16_MAX, INT16_C( 7493), INT16_C( 25865), INT16_C( 30044), INT16_MAX, INT16_MAX } }, { { INT32_C( 30202), INT32_C( 28441), INT32_C( 23387), INT32_C( 55965), INT32_C( 22335), INT32_C( 1741), INT32_C( 48077), INT32_C( 35735) }, { INT32_C( 39681), INT32_C( 30219), INT32_C( 57042), INT32_C( 16312), INT32_C( 24214), INT32_C( 19812), INT32_C( 32777), INT32_C( 61964) }, { INT16_C( 30202), INT16_C( 28441), INT16_C( 23387), INT16_MAX, INT16_MAX, INT16_C( 30219), INT16_MAX, INT16_C( 16312), INT16_C( 22335), INT16_C( 1741), INT16_MAX, INT16_MAX, INT16_C( 24214), INT16_C( 19812), INT16_MAX, INT16_MAX } }, { { INT32_C( 55181), INT32_C( 57421), INT32_C( 41918), INT32_C( 41954), INT32_C( 514), INT32_C( 30543), INT32_C( 31479), INT32_C( 10860) }, { INT32_C( 14337), INT32_C( 37144), INT32_C( 64565), INT32_C( 48032), INT32_C( 28093), INT32_C( 41188), INT32_C( 23835), INT32_C( 61319) }, { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_C( 14337), INT16_MAX, INT16_MAX, INT16_MAX, INT16_C( 514), INT16_C( 30543), INT16_C( 31479), INT16_C( 10860), INT16_C( 28093), INT16_MAX, INT16_C( 23835), INT16_MAX } }, { { INT32_C( 5159), INT32_C( 17829), INT32_C( 25153), INT32_C( 33309), INT32_C( 36079), INT32_C( 19244), INT32_C( 36521), INT32_C( 20605) }, { INT32_C( 36964), INT32_C( 64213), INT32_C( 54876), INT32_C( 31577), INT32_C( 14599), INT32_C( 52101), INT32_C( 19033), INT32_C( 6043) }, { INT16_C( 5159), INT16_C( 17829), INT16_C( 25153), INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_C( 31577), INT16_MAX, INT16_C( 19244), INT16_MAX, INT16_C( 20605), INT16_C( 14599), INT16_MAX, INT16_C( 19033), INT16_C( 6043) } }, { { INT32_C( 24743), INT32_C( 61018), INT32_C( 35780), INT32_C( 33286), INT32_C( 33467), INT32_C( 48461), INT32_C( 52488), INT32_C( 63716) }, { INT32_C( 44120), INT32_C( 39834), INT32_C( 38694), INT32_C( 57881), INT32_C( 32100), INT32_C( 33594), INT32_C( 54096), INT32_C( 35019) }, { INT16_C( 24743), INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_C( 32100), INT16_MAX, INT16_MAX, INT16_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi32(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi32(test_vec[i].b); simde__m256i r = simde_mm256_packs_epi32(a, b); simde_test_x86_assert_equal_i16x16(r, simde_x_mm256_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm256_packus_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[16]; const int16_t b[16]; const uint8_t r[32]; } test_vec[] = { { { INT16_C( 1278), INT16_C( 16487), INT16_C( 23908), -INT16_C( 22641), -INT16_C( 20243), INT16_C( 19138), INT16_C( 5855), -INT16_C( 25236), INT16_C( 27106), -INT16_C( 20692), -INT16_C( 28351), -INT16_C( 14963), -INT16_C( 25251), -INT16_C( 8084), -INT16_C( 9791), -INT16_C( 16413) }, { INT16_C( 75), INT16_C( 65), INT16_C( 142), INT16_C( 150), INT16_C( 171), INT16_C( 30), INT16_C( 77), INT16_C( 163), INT16_C( 231), INT16_C( 247), INT16_C( 224), INT16_C( 213), INT16_C( 41), INT16_C( 62), INT16_C( 153), INT16_C( 223) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 75), UINT8_C( 65), UINT8_C(142), UINT8_C(150), UINT8_C(171), UINT8_C( 30), UINT8_C( 77), UINT8_C(163), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(231), UINT8_C(247), UINT8_C(224), UINT8_C(213), UINT8_C( 41), UINT8_C( 62), UINT8_C(153), UINT8_C(223) } }, { { -INT16_C( 796), -INT16_C( 29664), INT16_C( 2443), -INT16_C( 14046), INT16_C( 948), INT16_C( 30183), -INT16_C( 23984), INT16_C( 1561), INT16_C( 27529), INT16_C( 765), -INT16_C( 17845), -INT16_C( 13865), -INT16_C( 29213), -INT16_C( 6905), INT16_C( 1318), INT16_C( 2756) }, { INT16_C( 228), INT16_C( 140), INT16_C( 185), INT16_C( 161), INT16_C( 61), INT16_C( 12), INT16_C( 48), INT16_C( 105), INT16_C( 15), INT16_C( 231), INT16_C( 67), INT16_C( 172), INT16_C( 183), INT16_C( 246), INT16_C( 85), INT16_C( 190) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C(228), UINT8_C(140), UINT8_C(185), UINT8_C(161), UINT8_C( 61), UINT8_C( 12), UINT8_C( 48), UINT8_C(105), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 15), UINT8_C(231), UINT8_C( 67), UINT8_C(172), UINT8_C(183), UINT8_C(246), UINT8_C( 85), UINT8_C(190) } }, { { -INT16_C( 27078), INT16_C( 10058), -INT16_C( 24497), INT16_C( 3017), -INT16_C( 7970), -INT16_C( 16873), INT16_C( 10512), -INT16_C( 21721), -INT16_C( 27847), INT16_C( 658), INT16_C( 17110), -INT16_C( 22865), INT16_C( 16634), -INT16_C( 18788), -INT16_C( 25450), -INT16_C( 12172) }, { INT16_C( 191), INT16_C( 130), INT16_C( 192), INT16_C( 61), INT16_C( 165), INT16_C( 176), INT16_C( 35), INT16_C( 7), INT16_C( 238), INT16_C( 140), INT16_C( 185), INT16_C( 43), INT16_C( 206), INT16_C( 143), INT16_C( 86), INT16_C( 156) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C(191), UINT8_C(130), UINT8_C(192), UINT8_C( 61), UINT8_C(165), UINT8_C(176), UINT8_C( 35), UINT8_C( 7), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(238), UINT8_C(140), UINT8_C(185), UINT8_C( 43), UINT8_C(206), UINT8_C(143), UINT8_C( 86), UINT8_C(156) } }, { { INT16_C( 22293), INT16_C( 29726), -INT16_C( 21481), -INT16_C( 18254), -INT16_C( 21167), INT16_C( 8040), -INT16_C( 15152), -INT16_C( 31193), INT16_C( 12723), -INT16_C( 7150), INT16_C( 17642), -INT16_C( 7409), -INT16_C( 4078), INT16_C( 31859), -INT16_C( 11706), INT16_C( 23321) }, { INT16_C( 55), INT16_C( 65), INT16_C( 130), INT16_C( 52), INT16_C( 97), INT16_C( 0), INT16_C( 123), INT16_C( 217), INT16_C( 153), INT16_C( 150), INT16_C( 204), INT16_C( 240), INT16_C( 236), INT16_C( 3), INT16_C( 133), INT16_C( 232) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 55), UINT8_C( 65), UINT8_C(130), UINT8_C( 52), UINT8_C( 97), UINT8_C( 0), UINT8_C(123), UINT8_C(217), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C(153), UINT8_C(150), UINT8_C(204), UINT8_C(240), UINT8_C(236), UINT8_C( 3), UINT8_C(133), UINT8_C(232) } }, { { INT16_C( 11965), -INT16_C( 24535), INT16_C( 8880), -INT16_C( 7979), INT16_C( 10628), -INT16_C( 21792), INT16_C( 26276), INT16_C( 20611), INT16_C( 16639), -INT16_C( 8730), INT16_C( 24332), -INT16_C( 14131), INT16_C( 14668), INT16_C( 3019), INT16_C( 10943), INT16_C( 31987) }, { INT16_C( 29), INT16_C( 9), INT16_C( 241), INT16_C( 195), INT16_C( 201), INT16_C( 190), INT16_C( 240), INT16_C( 47), INT16_C( 244), INT16_C( 60), INT16_C( 217), INT16_C( 160), INT16_C( 208), INT16_C( 209), INT16_C( 158), INT16_C( 83) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 29), UINT8_C( 9), UINT8_C(241), UINT8_C(195), UINT8_C(201), UINT8_C(190), UINT8_C(240), UINT8_C( 47), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C(244), UINT8_C( 60), UINT8_C(217), UINT8_C(160), UINT8_C(208), UINT8_C(209), UINT8_C(158), UINT8_C( 83) } }, { { INT16_C( 27323), -INT16_C( 1188), INT16_C( 17755), INT16_C( 30398), INT16_C( 11278), INT16_C( 15668), INT16_C( 17180), INT16_C( 19820), INT16_C( 30775), -INT16_C( 29815), -INT16_C( 29103), INT16_C( 25643), -INT16_C( 10657), INT16_C( 22837), -INT16_C( 31883), INT16_C( 12460) }, { INT16_C( 8), INT16_C( 72), INT16_C( 234), INT16_C( 91), INT16_C( 243), INT16_C( 50), INT16_C( 5), INT16_C( 109), INT16_C( 9), INT16_C( 207), INT16_C( 36), INT16_C( 246), INT16_C( 105), INT16_C( 112), INT16_C( 252), INT16_C( 217) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 8), UINT8_C( 72), UINT8_C(234), UINT8_C( 91), UINT8_C(243), UINT8_C( 50), UINT8_C( 5), UINT8_C(109), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 9), UINT8_C(207), UINT8_C( 36), UINT8_C(246), UINT8_C(105), UINT8_C(112), UINT8_C(252), UINT8_C(217) } }, { { -INT16_C( 13307), INT16_C( 21025), -INT16_C( 8010), -INT16_C( 13138), INT16_C( 18387), INT16_C( 2558), INT16_C( 32332), -INT16_C( 13706), INT16_C( 28551), INT16_C( 7834), -INT16_C( 12908), -INT16_C( 28907), INT16_C( 25910), INT16_C( 8959), -INT16_C( 24735), INT16_C( 26363) }, { INT16_C( 29), INT16_C( 33), INT16_C( 103), INT16_C( 208), INT16_C( 236), INT16_C( 250), INT16_C( 79), INT16_C( 241), INT16_C( 95), INT16_C( 83), INT16_C( 36), INT16_C( 99), INT16_C( 225), INT16_C( 235), INT16_C( 129), INT16_C( 236) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 29), UINT8_C( 33), UINT8_C(103), UINT8_C(208), UINT8_C(236), UINT8_C(250), UINT8_C( 79), UINT8_C(241), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 95), UINT8_C( 83), UINT8_C( 36), UINT8_C( 99), UINT8_C(225), UINT8_C(235), UINT8_C(129), UINT8_C(236) } }, { { INT16_C( 2718), -INT16_C( 25843), -INT16_C( 1167), INT16_C( 8043), INT16_C( 17639), INT16_C( 20762), -INT16_C( 8301), INT16_C( 21058), INT16_C( 20798), INT16_C( 27301), -INT16_C( 30858), -INT16_C( 51), INT16_C( 21352), -INT16_C( 5654), INT16_C( 15572), INT16_C( 29397) }, { INT16_C( 226), INT16_C( 184), INT16_C( 120), INT16_C( 196), INT16_C( 241), INT16_C( 79), INT16_C( 87), INT16_C( 14), INT16_C( 71), INT16_C( 31), INT16_C( 70), INT16_C( 55), INT16_C( 9), INT16_C( 109), INT16_C( 245), INT16_C( 139) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C(226), UINT8_C(184), UINT8_C(120), UINT8_C(196), UINT8_C(241), UINT8_C( 79), UINT8_C( 87), UINT8_C( 14), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 71), UINT8_C( 31), UINT8_C( 70), UINT8_C( 55), UINT8_C( 9), UINT8_C(109), UINT8_C(245), UINT8_C(139) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi16(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi16(test_vec[i].b); simde__m256i r = simde_mm256_packus_epi16(a, b); simde_test_x86_assert_equal_u8x32(r, simde_x_mm256_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm256_packus_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[8]; const int32_t b[8]; const uint16_t r[16]; } test_vec[] = { { { INT32_C( 56303), INT32_C( 60373), INT32_C( 50246), INT32_C( 4724), INT32_C( 15585), INT32_C( 12721), INT32_C( 31788), INT32_C( 19403) }, { -INT32_C( 1490634197), INT32_C( 1234369705), INT32_C( 638441621), INT32_C( 2100920576), INT32_C( 12196591), INT32_C( 1060203433), INT32_C( 196828782), INT32_C( 1750501180) }, { UINT16_C(56303), UINT16_C(60373), UINT16_C(50246), UINT16_C( 4724), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C(15585), UINT16_C(12721), UINT16_C(31788), UINT16_C(19403), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { INT32_C( 59919), INT32_C( 3636), INT32_C( 31284), INT32_C( 45815), INT32_C( 12722), INT32_C( 35697), INT32_C( 32406), INT32_C( 62950) }, { -INT32_C( 471861910), INT32_C( 301011863), INT32_C( 411772245), INT32_C( 466256531), INT32_C( 1363967284), -INT32_C( 1545814687), -INT32_C( 1658752278), -INT32_C( 913176481) }, { UINT16_C(59919), UINT16_C( 3636), UINT16_C(31284), UINT16_C(45815), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C(12722), UINT16_C(35697), UINT16_C(32406), UINT16_C(62950), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { INT32_C( 38060), INT32_C( 55973), INT32_C( 22258), INT32_C( 59249), INT32_C( 39736), INT32_C( 25918), INT32_C( 58882), INT32_C( 26031) }, { -INT32_C( 1929815033), -INT32_C( 1134059527), -INT32_C( 2112726577), INT32_C( 1349092118), -INT32_C( 1125408447), INT32_C( 1008806325), -INT32_C( 266198135), -INT32_C( 1084894792) }, { UINT16_C(38060), UINT16_C(55973), UINT16_C(22258), UINT16_C(59249), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C(39736), UINT16_C(25918), UINT16_C(58882), UINT16_C(26031), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { INT32_C( 10060), INT32_C( 48611), INT32_C( 9024), INT32_C( 47731), INT32_C( 118), INT32_C( 4412), INT32_C( 29442), INT32_C( 23859) }, { -INT32_C( 1803255898), INT32_C( 1045588018), -INT32_C( 681471394), -INT32_C( 2037197509), -INT32_C( 1132001228), INT32_C( 1540277152), INT32_C( 1389285410), -INT32_C( 827391705) }, { UINT16_C(10060), UINT16_C(48611), UINT16_C( 9024), UINT16_C(47731), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 118), UINT16_C( 4412), UINT16_C(29442), UINT16_C(23859), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { INT32_C( 45666), INT32_C( 64241), INT32_C( 33489), INT32_C( 23304), INT32_C( 3096), INT32_C( 30055), INT32_C( 56775), INT32_C( 47019) }, { INT32_C( 1181355690), INT32_C( 155212738), -INT32_C( 712306003), -INT32_C( 516910219), INT32_C( 1995261987), -INT32_C( 454339538), -INT32_C( 1044204918), -INT32_C( 747082456) }, { UINT16_C(45666), UINT16_C(64241), UINT16_C(33489), UINT16_C(23304), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 3096), UINT16_C(30055), UINT16_C(56775), UINT16_C(47019), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { INT32_C( 15897), INT32_C( 60231), INT32_C( 57792), INT32_C( 35011), INT32_C( 26622), INT32_C( 36684), INT32_C( 50256), INT32_C( 63383) }, { -INT32_C( 399134549), INT32_C( 1993571338), -INT32_C( 1286040754), -INT32_C( 1120134268), -INT32_C( 802932021), -INT32_C( 1084264412), -INT32_C( 92033154), INT32_C( 603003512) }, { UINT16_C(15897), UINT16_C(60231), UINT16_C(57792), UINT16_C(35011), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C(26622), UINT16_C(36684), UINT16_C(50256), UINT16_C(63383), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, { { INT32_C( 54539), INT32_C( 61516), INT32_C( 63139), INT32_C( 35507), INT32_C( 15963), INT32_C( 51197), INT32_C( 58049), INT32_C( 26117) }, { INT32_C( 2050756824), INT32_C( 1651149039), -INT32_C( 346550996), INT32_C( 125111533), INT32_C( 742772964), -INT32_C( 168607093), INT32_C( 1607972035), INT32_C( 1053154406) }, { UINT16_C(54539), UINT16_C(61516), UINT16_C(63139), UINT16_C(35507), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C(15963), UINT16_C(51197), UINT16_C(58049), UINT16_C(26117), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { INT32_C( 56504), INT32_C( 46398), INT32_C( 7584), INT32_C( 34340), INT32_C( 29107), INT32_C( 28518), INT32_C( 49614), INT32_C( 1536) }, { INT32_C( 518174869), INT32_C( 198451419), INT32_C( 1495823542), INT32_C( 1910459530), INT32_C( 1625461429), -INT32_C( 1798354631), -INT32_C( 1605001850), -INT32_C( 962177743) }, { UINT16_C(56504), UINT16_C(46398), UINT16_C( 7584), UINT16_C(34340), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C(29107), UINT16_C(28518), UINT16_C(49614), UINT16_C( 1536), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi32(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi32(test_vec[i].b); simde__m256i r = simde_mm256_packus_epi32(a, b); simde_test_x86_assert_equal_u16x16(r, simde_x_mm256_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm256_permute4x64_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi64x(INT64_C(-4031273950158647677), INT64_C(-7434948453373431243), INT64_C( 966046851086666502), INT64_C(-3558090175042735721)), simde_mm256_set_epi64x(INT64_C(-3558090175042735721), INT64_C(-3558090175042735721), INT64_C(-4031273950158647677), INT64_C( 966046851086666502)) }, { simde_mm256_set_epi64x(INT64_C(-5846597928812893198), INT64_C(-4353963629209663352), INT64_C(-7799994890686903985), INT64_C( 1444957477620918324)), simde_mm256_set_epi64x(INT64_C( 1444957477620918324), INT64_C( 1444957477620918324), INT64_C(-5846597928812893198), INT64_C(-7799994890686903985)) }, { simde_mm256_set_epi64x(INT64_C(-1184806487964558659), INT64_C( 7043949117721512702), INT64_C( -92438279376413162), INT64_C( 2263934164871463775)), simde_mm256_set_epi64x(INT64_C( 2263934164871463775), INT64_C( 2263934164871463775), INT64_C(-1184806487964558659), INT64_C( -92438279376413162)) }, { simde_mm256_set_epi64x(INT64_C( 9090919205935740251), INT64_C( 3797255434791406626), INT64_C(-3974983398240952043), INT64_C(-2667637164037811982)), simde_mm256_set_epi64x(INT64_C(-2667637164037811982), INT64_C(-2667637164037811982), INT64_C( 9090919205935740251), INT64_C(-3974983398240952043)) }, { simde_mm256_set_epi64x(INT64_C( 7885365925671452944), INT64_C( 8557735835567037410), INT64_C( 1805700887716213163), INT64_C(-5945530108016559723)), simde_mm256_set_epi64x(INT64_C(-5945530108016559723), INT64_C(-5945530108016559723), INT64_C( 7885365925671452944), INT64_C( 1805700887716213163)) }, { simde_mm256_set_epi64x(INT64_C( -666700084400918528), INT64_C( 2293046882897477780), INT64_C(-4361422993016110212), INT64_C( 5540865589910111090)), simde_mm256_set_epi64x(INT64_C( 5540865589910111090), INT64_C( 5540865589910111090), INT64_C( -666700084400918528), INT64_C(-4361422993016110212)) }, { simde_mm256_set_epi64x(INT64_C(-4089126903474854143), INT64_C(-3405442608942374627), INT64_C(-5965708747641475330), INT64_C( 3779098457061206514)), simde_mm256_set_epi64x(INT64_C( 3779098457061206514), INT64_C( 3779098457061206514), INT64_C(-4089126903474854143), INT64_C(-5965708747641475330)) }, { simde_mm256_set_epi64x(INT64_C(-7195404196599220190), INT64_C( 4846123797420351534), INT64_C( 1973553066803872882), INT64_C(-2603358823346386940)), simde_mm256_set_epi64x(INT64_C(-2603358823346386940), INT64_C(-2603358823346386940), INT64_C(-7195404196599220190), INT64_C( 1973553066803872882)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_permute4x64_epi64(test_vec[i].a, 13); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_permute4x64_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -641.76), SIMDE_FLOAT64_C( 477.18), SIMDE_FLOAT64_C( 278.49), SIMDE_FLOAT64_C( 569.18)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 569.18), SIMDE_FLOAT64_C( 569.18), SIMDE_FLOAT64_C( -641.76), SIMDE_FLOAT64_C( 278.49)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -489.82), SIMDE_FLOAT64_C( -641.14), SIMDE_FLOAT64_C( -951.91), SIMDE_FLOAT64_C( 935.01)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 935.01), SIMDE_FLOAT64_C( 935.01), SIMDE_FLOAT64_C( -489.82), SIMDE_FLOAT64_C( -951.91)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -602.75), SIMDE_FLOAT64_C( 339.47), SIMDE_FLOAT64_C( -820.66), SIMDE_FLOAT64_C( -740.61)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -740.61), SIMDE_FLOAT64_C( -740.61), SIMDE_FLOAT64_C( -602.75), SIMDE_FLOAT64_C( -820.66)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -303.98), SIMDE_FLOAT64_C( -350.58), SIMDE_FLOAT64_C( -574.59), SIMDE_FLOAT64_C( 850.86)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 850.86), SIMDE_FLOAT64_C( 850.86), SIMDE_FLOAT64_C( -303.98), SIMDE_FLOAT64_C( -574.59)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -623.12), SIMDE_FLOAT64_C( -143.91), SIMDE_FLOAT64_C( -840.93), SIMDE_FLOAT64_C( 411.53)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 411.53), SIMDE_FLOAT64_C( 411.53), SIMDE_FLOAT64_C( -623.12), SIMDE_FLOAT64_C( -840.93)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -167.73), SIMDE_FLOAT64_C( -215.71), SIMDE_FLOAT64_C( -717.06), SIMDE_FLOAT64_C( -432.13)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -432.13), SIMDE_FLOAT64_C( -432.13), SIMDE_FLOAT64_C( -167.73), SIMDE_FLOAT64_C( -717.06)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 572.27), SIMDE_FLOAT64_C( -942.37), SIMDE_FLOAT64_C( 905.44), SIMDE_FLOAT64_C( -810.45)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -810.45), SIMDE_FLOAT64_C( -810.45), SIMDE_FLOAT64_C( 572.27), SIMDE_FLOAT64_C( 905.44)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -340.56), SIMDE_FLOAT64_C( 537.41), SIMDE_FLOAT64_C( -960.96), SIMDE_FLOAT64_C( -230.88)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -230.88), SIMDE_FLOAT64_C( -230.88), SIMDE_FLOAT64_C( -340.56), SIMDE_FLOAT64_C( -960.96)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_permute4x64_pd(test_vec[i].a, 13); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_permute2x128_si256(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi64x(INT64_C( 9096692030846176105), INT64_C( 644260392039444522), INT64_C(-4583540275174352405), INT64_C(-6816753880857675259)), simde_mm256_set_epi64x(INT64_C(-7886827988827131690), INT64_C(-2107575233125845054), INT64_C(-8398644678734943287), INT64_C( 7172114359254607016)), simde_mm256_set_epi64x(INT64_C( 9096692030846176105), INT64_C( 644260392039444522), INT64_C(-7886827988827131690), INT64_C(-2107575233125845054)) }, { simde_mm256_set_epi64x(INT64_C(-6314946612387904819), INT64_C(-1883921203594067636), INT64_C(-5030251871897832411), INT64_C( 8348493077761215789)), simde_mm256_set_epi64x(INT64_C( 2582913386835954622), INT64_C( 153655168481379701), INT64_C(-1087064137492042031), INT64_C( 1783808851867973139)), simde_mm256_set_epi64x(INT64_C(-6314946612387904819), INT64_C(-1883921203594067636), INT64_C( 2582913386835954622), INT64_C( 153655168481379701)) }, { simde_mm256_set_epi64x(INT64_C(-3208478614025680333), INT64_C(-3409066786741744502), INT64_C(-6957643043766269700), INT64_C( 3219428559958296960)), simde_mm256_set_epi64x(INT64_C(-1736924574103250292), INT64_C(-2810347538827243748), INT64_C( -297965813118371045), INT64_C(-7618358042182251122)), simde_mm256_set_epi64x(INT64_C(-3208478614025680333), INT64_C(-3409066786741744502), INT64_C(-1736924574103250292), INT64_C(-2810347538827243748)) }, { simde_mm256_set_epi64x(INT64_C( 796366024780064289), INT64_C(-1489690745108457074), INT64_C(-7990282097237082056), INT64_C(-7545130296515735090)), simde_mm256_set_epi64x(INT64_C( 951803776889232332), INT64_C(-6640461449591045668), INT64_C( 5271740244822761531), INT64_C( 3149915688837762175)), simde_mm256_set_epi64x(INT64_C( 796366024780064289), INT64_C(-1489690745108457074), INT64_C( 951803776889232332), INT64_C(-6640461449591045668)) }, { simde_mm256_set_epi64x(INT64_C( -95120238103258498), INT64_C(-1762353908339260045), INT64_C( 6992845328844002662), INT64_C(-5939283762406250642)), simde_mm256_set_epi64x(INT64_C(-5885001620821736092), INT64_C(-6745062192544323367), INT64_C( 7803931770148523943), INT64_C(-8993062880293478576)), simde_mm256_set_epi64x(INT64_C( -95120238103258498), INT64_C(-1762353908339260045), INT64_C(-5885001620821736092), INT64_C(-6745062192544323367)) }, { simde_mm256_set_epi64x(INT64_C(-3708437875152674849), INT64_C( 8243162546537572005), INT64_C(-1103721052327437925), INT64_C(-2925489198757650175)), simde_mm256_set_epi64x(INT64_C( 1440085788748654982), INT64_C( 1725906984156202179), INT64_C( 5845599904819452784), INT64_C( 7162548421658470679)), simde_mm256_set_epi64x(INT64_C(-3708437875152674849), INT64_C( 8243162546537572005), INT64_C( 1440085788748654982), INT64_C( 1725906984156202179)) }, { simde_mm256_set_epi64x(INT64_C( 6834943649491098623), INT64_C( 2759372331225584008), INT64_C(-1659900994892419246), INT64_C(-2119655686628377164)), simde_mm256_set_epi64x(INT64_C( 1954610004667753515), INT64_C( 5688482191974230934), INT64_C(-3937849964004809456), INT64_C(-8694088207381845200)), simde_mm256_set_epi64x(INT64_C( 6834943649491098623), INT64_C( 2759372331225584008), INT64_C( 1954610004667753515), INT64_C( 5688482191974230934)) }, { simde_mm256_set_epi64x(INT64_C( 5051547726856501651), INT64_C( 7333908238294102632), INT64_C( 7118133466490521985), INT64_C( 6243950982549416292)), simde_mm256_set_epi64x(INT64_C(-6805527145604381785), INT64_C(-1282569833996306134), INT64_C(-1497859500202369050), INT64_C( 1581543684384159070)), simde_mm256_set_epi64x(INT64_C( 5051547726856501651), INT64_C( 7333908238294102632), INT64_C(-6805527145604381785), INT64_C(-1282569833996306134)) }, }; //printf("\n"); //for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { // simde__m256i_private a, b, r; // munit_rand_memory(sizeof(a), (uint8_t*) &a); // munit_rand_memory(sizeof(b), (uint8_t*) &b); // r = simde__m256i_to_private(simde_mm256_permute2x128_si256(simde__m256i_from_private(a), simde__m256i_from_private(b), 23)); // printf(" { simde_mm256_set_epi64x(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n" // " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")),\n", // a.i64[3], a.i64[2], a.i64[1], a.i64[0]); // printf(" simde_mm256_set_epi64x(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n" // " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")),\n", // b.i64[3], b.i64[2], b.i64[1], b.i64[0]); // printf(" simde_mm256_set_epi64x(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 "),\n" // " INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")) },\n", // r.i64[3], r.i64[2], r.i64[1], r.i64[0]); //} //return MUNIT_FAIL; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_permute2x128_si256(test_vec[i].a, test_vec[i].b, 23); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_permutevar8x32_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[8]; const int32_t b[8]; const int32_t r[8]; } test_vec[] = { { { INT32_C( 1408063718), INT32_C( 1053738302), -INT32_C( 1926009139), INT32_C( 971995611), -INT32_C( 79435850), INT32_C( 1766687132), -INT32_C( 1602616261), -INT32_C( 891684124) }, { -INT32_C( 1877031342), INT32_C( 1489955979), INT32_C( 920978011), INT32_C( 829478011), INT32_C( 1462612923), INT32_C( 1874885172), INT32_C( 1645165438), -INT32_C( 2027034571) }, { -INT32_C( 1926009139), INT32_C( 971995611), INT32_C( 971995611), INT32_C( 971995611), INT32_C( 971995611), -INT32_C( 79435850), -INT32_C( 1602616261), INT32_C( 1766687132) } }, { { INT32_C( 957828014), -INT32_C( 1835932105), INT32_C( 1674147816), INT32_C( 127220043), INT32_C( 543081196), -INT32_C( 1165025476), -INT32_C( 1893949862), INT32_C( 873875846) }, { -INT32_C( 865260140), -INT32_C( 77660653), -INT32_C( 1050728587), INT32_C( 1288238176), -INT32_C( 227793226), -INT32_C( 1616053435), INT32_C( 523159705), -INT32_C( 1504492526) }, { INT32_C( 543081196), INT32_C( 127220043), -INT32_C( 1165025476), INT32_C( 957828014), -INT32_C( 1893949862), -INT32_C( 1165025476), -INT32_C( 1835932105), INT32_C( 1674147816) } }, { { -INT32_C( 2056077198), INT32_C( 880923071), INT32_C( 1509286136), -INT32_C( 1968849452), INT32_C( 695997156), -INT32_C( 1479989235), INT32_C( 46594032), -INT32_C( 1364649412) }, { -INT32_C( 1724703782), -INT32_C( 439438100), INT32_C( 1748943764), INT32_C( 1710416769), INT32_C( 59666165), -INT32_C( 2018879594), -INT32_C( 1953927089), INT32_C( 1698247306) }, { INT32_C( 1509286136), INT32_C( 695997156), INT32_C( 695997156), INT32_C( 880923071), -INT32_C( 1479989235), INT32_C( 46594032), -INT32_C( 1364649412), INT32_C( 1509286136) } }, { { INT32_C( 989752398), -INT32_C( 1256207327), INT32_C( 287202704), INT32_C( 913707073), INT32_C( 356058239), -INT32_C( 1415781540), -INT32_C( 566876588), -INT32_C( 1505530024) }, { -INT32_C( 52346149), -INT32_C( 1632501746), -INT32_C( 1615867810), INT32_C( 1607804384), -INT32_C( 2039148758), INT32_C( 1177620978), -INT32_C( 1876596937), -INT32_C( 1321834282) }, { INT32_C( 913707073), -INT32_C( 566876588), -INT32_C( 566876588), INT32_C( 989752398), INT32_C( 287202704), INT32_C( 287202704), -INT32_C( 1505530024), -INT32_C( 566876588) } }, { { -INT32_C( 1179773014), INT32_C( 1985437720), INT32_C( 269813552), INT32_C( 1450240556), -INT32_C( 321067527), INT32_C( 775032310), INT32_C( 1253988212), INT32_C( 1794962624) }, { INT32_C( 606317068), INT32_C( 983202570), -INT32_C( 1370837118), -INT32_C( 1828341095), -INT32_C( 1770004065), INT32_C( 1673834991), -INT32_C( 911310327), -INT32_C( 2110543242) }, { -INT32_C( 321067527), INT32_C( 269813552), INT32_C( 269813552), INT32_C( 1985437720), INT32_C( 1794962624), INT32_C( 1794962624), INT32_C( 1985437720), INT32_C( 1253988212) } }, { { INT32_C( 1587959636), INT32_C( 1419264210), -INT32_C( 1996299537), INT32_C( 1008469917), -INT32_C( 657286167), INT32_C( 1429968460), -INT32_C( 1893799656), -INT32_C( 418295149) }, { INT32_C( 2068166825), -INT32_C( 389030408), INT32_C( 1567740352), -INT32_C( 1030124071), INT32_C( 1956277288), INT32_C( 466277634), INT32_C( 1386932415), -INT32_C( 482690246) }, { INT32_C( 1419264210), INT32_C( 1587959636), INT32_C( 1587959636), INT32_C( 1419264210), INT32_C( 1587959636), -INT32_C( 1996299537), -INT32_C( 418295149), -INT32_C( 1996299537) } }, { { INT32_C( 1818132339), INT32_C( 492055901), -INT32_C( 663042561), INT32_C( 2056917842), -INT32_C( 2098318209), -INT32_C( 929187831), -INT32_C( 618969183), INT32_C( 1992250626) }, { INT32_C( 836902356), INT32_C( 1246639691), INT32_C( 1294125307), INT32_C( 1539816668), -INT32_C( 102910480), INT32_C( 264403566), -INT32_C( 991240767), INT32_C( 104507698) }, { -INT32_C( 2098318209), INT32_C( 2056917842), INT32_C( 2056917842), -INT32_C( 2098318209), INT32_C( 1818132339), -INT32_C( 618969183), INT32_C( 492055901), -INT32_C( 663042561) } }, { { INT32_C( 305667271), INT32_C( 1297909330), INT32_C( 731545167), INT32_C( 713449786), -INT32_C( 2078055402), -INT32_C( 1600920098), -INT32_C( 194740542), -INT32_C( 285499865) }, { INT32_C( 201339834), INT32_C( 140074169), INT32_C( 338949082), INT32_C( 1782495828), -INT32_C( 51420642), INT32_C( 178029127), INT32_C( 687734784), INT32_C( 1477900702) }, { INT32_C( 731545167), INT32_C( 1297909330), INT32_C( 731545167), -INT32_C( 2078055402), -INT32_C( 194740542), -INT32_C( 285499865), INT32_C( 305667271), -INT32_C( 194740542) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi32(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi32(test_vec[i].b); simde__m256i r = simde_mm256_permutevar8x32_epi32(a, b); simde_test_x86_assert_equal_i32x8(r, simde_x_mm256_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm256_permutevar8x32_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const int32_t b[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 809.43), SIMDE_FLOAT32_C( 302.44), SIMDE_FLOAT32_C( -655.90), SIMDE_FLOAT32_C( 628.58), SIMDE_FLOAT32_C( 242.76), SIMDE_FLOAT32_C( -149.67), SIMDE_FLOAT32_C( 759.40), SIMDE_FLOAT32_C( 143.90) }, { -INT32_C( 716838906), INT32_C( 339742123), -INT32_C( 1286153911), -INT32_C( 443567707), -INT32_C( 1899934705), INT32_C( 2112822676), -INT32_C( 1385550335), INT32_C( 16765946) }, { SIMDE_FLOAT32_C( 759.40), SIMDE_FLOAT32_C( 628.58), SIMDE_FLOAT32_C( 302.44), SIMDE_FLOAT32_C( -149.67), SIMDE_FLOAT32_C( 143.90), SIMDE_FLOAT32_C( 242.76), SIMDE_FLOAT32_C( 302.44), SIMDE_FLOAT32_C( -655.90) } }, { { SIMDE_FLOAT32_C( 110.93), SIMDE_FLOAT32_C( 350.59), SIMDE_FLOAT32_C( -543.10), SIMDE_FLOAT32_C( -725.49), SIMDE_FLOAT32_C( 18.95), SIMDE_FLOAT32_C( -444.18), SIMDE_FLOAT32_C( -148.59), SIMDE_FLOAT32_C( 920.56) }, { -INT32_C( 1806839313), -INT32_C( 1854284414), -INT32_C( 1122026711), INT32_C( 1513754200), INT32_C( 923247932), INT32_C( 859244152), -INT32_C( 1667625654), INT32_C( 322311203) }, { SIMDE_FLOAT32_C( 920.56), SIMDE_FLOAT32_C( -543.10), SIMDE_FLOAT32_C( 350.59), SIMDE_FLOAT32_C( 110.93), SIMDE_FLOAT32_C( 18.95), SIMDE_FLOAT32_C( 110.93), SIMDE_FLOAT32_C( -543.10), SIMDE_FLOAT32_C( -725.49) } }, { { SIMDE_FLOAT32_C( -73.44), SIMDE_FLOAT32_C( -32.92), SIMDE_FLOAT32_C( 765.45), SIMDE_FLOAT32_C( 761.19), SIMDE_FLOAT32_C( 685.64), SIMDE_FLOAT32_C( 282.32), SIMDE_FLOAT32_C( 215.31), SIMDE_FLOAT32_C( -532.59) }, { -INT32_C( 1270343332), INT32_C( 1678672423), -INT32_C( 1617226457), INT32_C( 1725157915), INT32_C( 50490848), INT32_C( 1729509505), INT32_C( 500154044), INT32_C( 1000917215) }, { SIMDE_FLOAT32_C( 685.64), SIMDE_FLOAT32_C( -532.59), SIMDE_FLOAT32_C( -532.59), SIMDE_FLOAT32_C( 761.19), SIMDE_FLOAT32_C( -73.44), SIMDE_FLOAT32_C( -32.92), SIMDE_FLOAT32_C( 685.64), SIMDE_FLOAT32_C( -532.59) } }, { { SIMDE_FLOAT32_C( -786.97), SIMDE_FLOAT32_C( -512.61), SIMDE_FLOAT32_C( 516.41), SIMDE_FLOAT32_C( -558.86), SIMDE_FLOAT32_C( -760.16), SIMDE_FLOAT32_C( 955.28), SIMDE_FLOAT32_C( -651.17), SIMDE_FLOAT32_C( 933.60) }, { INT32_C( 792266771), -INT32_C( 1164636966), -INT32_C( 71395463), -INT32_C( 1939680049), INT32_C( 1906913682), -INT32_C( 609463815), -INT32_C( 1260086207), -INT32_C( 1387376230) }, { SIMDE_FLOAT32_C( -558.86), SIMDE_FLOAT32_C( 516.41), SIMDE_FLOAT32_C( -512.61), SIMDE_FLOAT32_C( 933.60), SIMDE_FLOAT32_C( 516.41), SIMDE_FLOAT32_C( -512.61), SIMDE_FLOAT32_C( -512.61), SIMDE_FLOAT32_C( 516.41) } }, { { SIMDE_FLOAT32_C( -59.26), SIMDE_FLOAT32_C( 439.31), SIMDE_FLOAT32_C( 841.23), SIMDE_FLOAT32_C( -266.81), SIMDE_FLOAT32_C( 199.59), SIMDE_FLOAT32_C( -17.60), SIMDE_FLOAT32_C( 866.57), SIMDE_FLOAT32_C( -521.77) }, { -INT32_C( 670520312), INT32_C( 325347968), -INT32_C( 1786507877), -INT32_C( 1603260065), INT32_C( 1733579981), -INT32_C( 32202075), -INT32_C( 1104023254), INT32_C( 1808473954) }, { SIMDE_FLOAT32_C( -59.26), SIMDE_FLOAT32_C( -59.26), SIMDE_FLOAT32_C( -266.81), SIMDE_FLOAT32_C( -521.77), SIMDE_FLOAT32_C( -17.60), SIMDE_FLOAT32_C( -17.60), SIMDE_FLOAT32_C( 841.23), SIMDE_FLOAT32_C( 841.23) } }, { { SIMDE_FLOAT32_C( -400.14), SIMDE_FLOAT32_C( 83.44), SIMDE_FLOAT32_C( -751.08), SIMDE_FLOAT32_C( 794.29), SIMDE_FLOAT32_C( 639.38), SIMDE_FLOAT32_C( -222.75), SIMDE_FLOAT32_C( -375.60), SIMDE_FLOAT32_C( 682.13) }, { INT32_C( 326099892), -INT32_C( 508240108), -INT32_C( 666367949), -INT32_C( 707371605), -INT32_C( 1332541618), -INT32_C( 233087449), INT32_C( 1899912754), -INT32_C( 1169449723) }, { SIMDE_FLOAT32_C( 639.38), SIMDE_FLOAT32_C( 639.38), SIMDE_FLOAT32_C( 794.29), SIMDE_FLOAT32_C( 794.29), SIMDE_FLOAT32_C( -375.60), SIMDE_FLOAT32_C( 682.13), SIMDE_FLOAT32_C( -751.08), SIMDE_FLOAT32_C( -222.75) } }, { { SIMDE_FLOAT32_C( -742.55), SIMDE_FLOAT32_C( -101.38), SIMDE_FLOAT32_C( -337.44), SIMDE_FLOAT32_C( 380.26), SIMDE_FLOAT32_C( 642.80), SIMDE_FLOAT32_C( 959.92), SIMDE_FLOAT32_C( -937.04), SIMDE_FLOAT32_C( -939.55) }, { INT32_C( 899989386), INT32_C( 1779071516), -INT32_C( 1474585215), INT32_C( 781924091), -INT32_C( 1700800107), -INT32_C( 128652681), INT32_C( 1032659620), INT32_C( 755565731) }, { SIMDE_FLOAT32_C( -337.44), SIMDE_FLOAT32_C( 642.80), SIMDE_FLOAT32_C( -101.38), SIMDE_FLOAT32_C( 380.26), SIMDE_FLOAT32_C( 959.92), SIMDE_FLOAT32_C( -939.55), SIMDE_FLOAT32_C( 642.80), SIMDE_FLOAT32_C( 380.26) } }, { { SIMDE_FLOAT32_C( -5.16), SIMDE_FLOAT32_C( 584.21), SIMDE_FLOAT32_C( 220.31), SIMDE_FLOAT32_C( -944.84), SIMDE_FLOAT32_C( -786.38), SIMDE_FLOAT32_C( -527.06), SIMDE_FLOAT32_C( -779.98), SIMDE_FLOAT32_C( -331.72) }, { INT32_C( 89220617), INT32_C( 825486492), INT32_C( 1020056261), INT32_C( 1614028988), -INT32_C( 425868734), -INT32_C( 1978423610), INT32_C( 2053862995), -INT32_C( 333269278) }, { SIMDE_FLOAT32_C( 584.21), SIMDE_FLOAT32_C( -786.38), SIMDE_FLOAT32_C( -527.06), SIMDE_FLOAT32_C( -786.38), SIMDE_FLOAT32_C( 220.31), SIMDE_FLOAT32_C( -779.98), SIMDE_FLOAT32_C( -944.84), SIMDE_FLOAT32_C( 220.31) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi32(test_vec[i].b); simde__m256 r = simde_mm256_permutevar8x32_ps(a, b); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_sad_epu8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const uint8_t a[32]; const uint8_t b[32]; const uint64_t r[4]; } test_vec[] = { { { UINT8_C(193), UINT8_C(212), UINT8_C( 9), UINT8_C(139), UINT8_C(179), UINT8_C(138), UINT8_C(126), UINT8_C(149), UINT8_C(188), UINT8_C(191), UINT8_C(174), UINT8_C( 53), UINT8_C(220), UINT8_C(132), UINT8_C(137), UINT8_C(189), UINT8_C( 77), UINT8_C( 99), UINT8_C(211), UINT8_C(102), UINT8_C( 2), UINT8_C( 4), UINT8_C(118), UINT8_C(127), UINT8_C(149), UINT8_C( 77), UINT8_C( 65), UINT8_C(195), UINT8_C(184), UINT8_C(249), UINT8_C(233), UINT8_C(122) }, { UINT8_C(205), UINT8_C(242), UINT8_C( 5), UINT8_C(128), UINT8_C(125), UINT8_C(131), UINT8_C( 22), UINT8_C( 57), UINT8_C( 66), UINT8_C(196), UINT8_C(111), UINT8_C( 30), UINT8_C( 72), UINT8_C(248), UINT8_C(220), UINT8_C(149), UINT8_C( 91), UINT8_C(175), UINT8_C(251), UINT8_C( 93), UINT8_C(179), UINT8_C(113), UINT8_C(220), UINT8_C( 72), UINT8_C(190), UINT8_C( 29), UINT8_C( 11), UINT8_C(119), UINT8_C( 23), UINT8_C(244), UINT8_C(241), UINT8_C(228) }, { UINT64_C( 314), UINT64_C( 600), UINT64_C( 582), UINT64_C( 499) } }, { { UINT8_C(231), UINT8_C(246), UINT8_C(101), UINT8_C(100), UINT8_C(122), UINT8_C(123), UINT8_C(157), UINT8_C(188), UINT8_C( 63), UINT8_C( 12), UINT8_C(219), UINT8_C(135), UINT8_C( 4), UINT8_C(183), UINT8_C( 28), UINT8_C( 95), UINT8_C(102), UINT8_C( 24), UINT8_C(189), UINT8_C( 26), UINT8_C(137), UINT8_C(153), UINT8_C( 98), UINT8_C( 72), UINT8_C(183), UINT8_C(110), UINT8_C(191), UINT8_C(206), UINT8_C( 98), UINT8_C(176), UINT8_C(178), UINT8_C( 73) }, { UINT8_C(166), UINT8_C( 23), UINT8_C(173), UINT8_C( 32), UINT8_C(146), UINT8_C( 75), UINT8_C(221), UINT8_C(209), UINT8_C( 87), UINT8_C(184), UINT8_C( 88), UINT8_C( 92), UINT8_C(111), UINT8_C(117), UINT8_C(187), UINT8_C(213), UINT8_C(141), UINT8_C(120), UINT8_C(239), UINT8_C( 22), UINT8_C( 18), UINT8_C( 82), UINT8_C( 94), UINT8_C(201), UINT8_C(192), UINT8_C( 29), UINT8_C(151), UINT8_C( 34), UINT8_C(205), UINT8_C( 73), UINT8_C(108), UINT8_C(116) }, { UINT64_C( 585), UINT64_C( 820), UINT64_C( 512), UINT64_C( 625) } }, { { UINT8_C( 97), UINT8_C( 25), UINT8_C(148), UINT8_C(243), UINT8_C(100), UINT8_C(113), UINT8_C(197), UINT8_C(188), UINT8_C( 41), UINT8_C( 29), UINT8_C( 24), UINT8_C(152), UINT8_C(146), UINT8_C(211), UINT8_C(110), UINT8_C( 31), UINT8_C( 76), UINT8_C( 93), UINT8_C( 54), UINT8_C( 94), UINT8_C(175), UINT8_C(148), UINT8_C( 39), UINT8_C(111), UINT8_C(178), UINT8_C(190), UINT8_C(146), UINT8_C(127), UINT8_C( 7), UINT8_C(254), UINT8_C(243), UINT8_C(104) }, { UINT8_C( 23), UINT8_C(136), UINT8_C( 92), UINT8_C(124), UINT8_C(249), UINT8_C( 33), UINT8_C( 56), UINT8_C( 35), UINT8_C( 62), UINT8_C( 80), UINT8_C(187), UINT8_C(209), UINT8_C( 35), UINT8_C( 41), UINT8_C(240), UINT8_C(111), UINT8_C(135), UINT8_C( 38), UINT8_C(205), UINT8_C( 54), UINT8_C(187), UINT8_C(244), UINT8_C(166), UINT8_C(109), UINT8_C(178), UINT8_C( 56), UINT8_C(236), UINT8_C(186), UINT8_C( 54), UINT8_C(224), UINT8_C( 34), UINT8_C( 77) }, { UINT64_C( 883), UINT64_C( 783), UINT64_C( 542), UINT64_C( 596) } }, { { UINT8_C(104), UINT8_C(126), UINT8_C(201), UINT8_C( 97), UINT8_C(159), UINT8_C( 1), UINT8_C(132), UINT8_C(222), UINT8_C( 81), UINT8_C( 64), UINT8_C(175), UINT8_C(117), UINT8_C(105), UINT8_C(159), UINT8_C(228), UINT8_C(240), UINT8_C(198), UINT8_C(178), UINT8_C( 39), UINT8_C(129), UINT8_C(166), UINT8_C(205), UINT8_C(238), UINT8_C( 89), UINT8_C( 5), UINT8_C(218), UINT8_C( 19), UINT8_C( 59), UINT8_C(186), UINT8_C( 53), UINT8_C(136), UINT8_C( 34) }, { UINT8_C(180), UINT8_C( 82), UINT8_C(132), UINT8_C( 83), UINT8_C( 83), UINT8_C( 8), UINT8_C( 49), UINT8_C(165), UINT8_C( 72), UINT8_C(224), UINT8_C( 26), UINT8_C(178), UINT8_C(128), UINT8_C(254), UINT8_C(162), UINT8_C( 70), UINT8_C(176), UINT8_C(201), UINT8_C(199), UINT8_C( 87), UINT8_C(150), UINT8_C(181), UINT8_C(176), UINT8_C(155), UINT8_C(143), UINT8_C(195), UINT8_C(214), UINT8_C( 74), UINT8_C(248), UINT8_C( 95), UINT8_C(108), UINT8_C(172) }, { UINT64_C( 426), UINT64_C( 733), UINT64_C( 415), UINT64_C( 641) } }, { { UINT8_C(177), UINT8_C(240), UINT8_C( 0), UINT8_C( 4), UINT8_C(249), UINT8_C( 49), UINT8_C(169), UINT8_C( 65), UINT8_C( 18), UINT8_C(195), UINT8_C(243), UINT8_C(146), UINT8_C(194), UINT8_C(150), UINT8_C(216), UINT8_C(114), UINT8_C( 95), UINT8_C(159), UINT8_C(201), UINT8_C(246), UINT8_C( 84), UINT8_C(121), UINT8_C(145), UINT8_C(227), UINT8_C( 60), UINT8_C(104), UINT8_C( 45), UINT8_C( 53), UINT8_C(199), UINT8_C(154), UINT8_C(225), UINT8_C(120) }, { UINT8_C(138), UINT8_C(225), UINT8_C(124), UINT8_C(131), UINT8_C( 19), UINT8_C( 38), UINT8_C(197), UINT8_C( 37), UINT8_C(233), UINT8_C(184), UINT8_C(183), UINT8_C(171), UINT8_C( 78), UINT8_C(143), UINT8_C( 30), UINT8_C(174), UINT8_C( 46), UINT8_C(231), UINT8_C(164), UINT8_C(130), UINT8_C( 97), UINT8_C( 53), UINT8_C(101), UINT8_C(157), UINT8_C(157), UINT8_C(147), UINT8_C(210), UINT8_C(100), UINT8_C( 45), UINT8_C(180), UINT8_C(220), UINT8_C(183) }, { UINT64_C( 602), UINT64_C( 680), UINT64_C( 469), UINT64_C( 600) } }, { { UINT8_C(149), UINT8_C( 89), UINT8_C( 59), UINT8_C(168), UINT8_C(127), UINT8_C( 0), UINT8_C(205), UINT8_C(104), UINT8_C(184), UINT8_C(132), UINT8_C( 20), UINT8_C( 7), UINT8_C( 19), UINT8_C( 50), UINT8_C(181), UINT8_C( 65), UINT8_C( 25), UINT8_C( 89), UINT8_C(195), UINT8_C(122), UINT8_C(142), UINT8_C( 41), UINT8_C( 24), UINT8_C( 44), UINT8_C(188), UINT8_C(234), UINT8_C(144), UINT8_C(233), UINT8_C(158), UINT8_C(109), UINT8_C(160), UINT8_C( 52) }, { UINT8_C(198), UINT8_C(219), UINT8_C(220), UINT8_C( 69), UINT8_C(219), UINT8_C(170), UINT8_C(173), UINT8_C(148), UINT8_C( 46), UINT8_C(193), UINT8_C(155), UINT8_C( 66), UINT8_C(243), UINT8_C( 80), UINT8_C(131), UINT8_C( 13), UINT8_C(169), UINT8_C( 71), UINT8_C(135), UINT8_C( 55), UINT8_C(112), UINT8_C(159), UINT8_C( 99), UINT8_C( 44), UINT8_C(138), UINT8_C(244), UINT8_C( 21), UINT8_C( 40), UINT8_C( 97), UINT8_C(181), UINT8_C( 92), UINT8_C( 39) }, { UINT64_C( 777), UINT64_C( 749), UINT64_C( 512), UINT64_C( 590) } }, { { UINT8_C(145), UINT8_C( 57), UINT8_C(108), UINT8_C(108), UINT8_C(227), UINT8_C( 25), UINT8_C( 0), UINT8_C( 17), UINT8_C(219), UINT8_C(155), UINT8_C( 83), UINT8_C(206), UINT8_C(235), UINT8_C(215), UINT8_C(219), UINT8_C(148), UINT8_C( 30), UINT8_C( 99), UINT8_C(204), UINT8_C(142), UINT8_C( 2), UINT8_C( 47), UINT8_C(186), UINT8_C(140), UINT8_C( 35), UINT8_C(207), UINT8_C(181), UINT8_C(132), UINT8_C(132), UINT8_C( 17), UINT8_C(171), UINT8_C( 21) }, { UINT8_C( 74), UINT8_C( 23), UINT8_C(130), UINT8_C( 45), UINT8_C( 49), UINT8_C(130), UINT8_C( 63), UINT8_C( 12), UINT8_C( 30), UINT8_C(146), UINT8_C(218), UINT8_C( 9), UINT8_C(105), UINT8_C(182), UINT8_C(158), UINT8_C(135), UINT8_C( 25), UINT8_C(106), UINT8_C( 21), UINT8_C( 27), UINT8_C(153), UINT8_C(207), UINT8_C(168), UINT8_C(189), UINT8_C(158), UINT8_C( 93), UINT8_C( 65), UINT8_C( 35), UINT8_C(110), UINT8_C(237), UINT8_C( 56), UINT8_C(185) }, { UINT64_C( 541), UINT64_C( 767), UINT64_C( 688), UINT64_C( 971) } }, { { UINT8_C( 4), UINT8_C(186), UINT8_C(230), UINT8_C( 53), UINT8_C( 61), UINT8_C( 37), UINT8_C( 65), UINT8_C( 91), UINT8_C(184), UINT8_C( 28), UINT8_C(100), UINT8_C( 33), UINT8_C(210), UINT8_C( 2), UINT8_C(169), UINT8_C(235), UINT8_C(108), UINT8_C(190), UINT8_C( 6), UINT8_C( 6), UINT8_C(142), UINT8_C(174), UINT8_C(195), UINT8_C( 44), UINT8_C( 11), UINT8_C( 4), UINT8_C( 79), UINT8_C(122), UINT8_C(241), UINT8_C(136), UINT8_C( 51), UINT8_C(246) }, { UINT8_C( 66), UINT8_C( 25), UINT8_C( 43), UINT8_C(127), UINT8_C( 63), UINT8_C(109), UINT8_C(218), UINT8_C(247), UINT8_C(137), UINT8_C( 63), UINT8_C( 24), UINT8_C( 91), UINT8_C( 65), UINT8_C(193), UINT8_C( 70), UINT8_C(174), UINT8_C(128), UINT8_C( 76), UINT8_C(180), UINT8_C( 14), UINT8_C(251), UINT8_C(119), UINT8_C( 58), UINT8_C( 6), UINT8_C(123), UINT8_C(138), UINT8_C(128), UINT8_C(109), UINT8_C( 18), UINT8_C(179), UINT8_C( 99), UINT8_C( 84) }, { UINT64_C( 867), UINT64_C( 712), UINT64_C( 655), UINT64_C( 784) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi8(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi8(test_vec[i].b); simde__m256i r = simde_mm256_sad_epu8(a, b); simde_test_x86_assert_equal_u64x4(r, simde_x_mm256_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm256_shuffle_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_x_mm256_set_epu8(UINT8_C(132), UINT8_C(100), UINT8_C(115), UINT8_C( 94), UINT8_C( 73), UINT8_C(247), UINT8_C(104), UINT8_C(220), UINT8_C(117), UINT8_C( 74), UINT8_C( 39), UINT8_C(125), UINT8_C( 51), UINT8_C( 55), UINT8_C(148), UINT8_C(183), UINT8_C( 5), UINT8_C(228), UINT8_C( 33), UINT8_C( 72), UINT8_C( 84), UINT8_C( 4), UINT8_C(172), UINT8_C(134), UINT8_C( 26), UINT8_C(247), UINT8_C(250), UINT8_C( 1), UINT8_C(220), UINT8_C( 43), UINT8_C( 83), UINT8_C(118)), simde_x_mm256_set_epu8(UINT8_C( 24), UINT8_C(160), UINT8_C( 38), UINT8_C(201), UINT8_C(108), UINT8_C( 32), UINT8_C( 93), UINT8_C( 16), UINT8_C(104), UINT8_C(146), UINT8_C(128), UINT8_C(127), UINT8_C(240), UINT8_C(187), UINT8_C(163), UINT8_C( 93), UINT8_C(161), UINT8_C(148), UINT8_C(236), UINT8_C(208), UINT8_C(110), UINT8_C( 19), UINT8_C( 3), UINT8_C(142), UINT8_C( 56), UINT8_C( 74), UINT8_C( 2), UINT8_C(168), UINT8_C(100), UINT8_C(209), UINT8_C( 27), UINT8_C( 46)), simde_x_mm256_set_epu8(UINT8_C(220), UINT8_C( 0), UINT8_C( 74), UINT8_C( 0), UINT8_C( 94), UINT8_C(183), UINT8_C(115), UINT8_C(183), UINT8_C(220), UINT8_C( 0), UINT8_C( 0), UINT8_C(132), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(115), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(228), UINT8_C(220), UINT8_C(220), UINT8_C( 0), UINT8_C(134), UINT8_C( 4), UINT8_C( 43), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 84), UINT8_C(228)) }, { simde_x_mm256_set_epu8(UINT8_C( 53), UINT8_C(227), UINT8_C(193), UINT8_C(203), UINT8_C(188), UINT8_C( 31), UINT8_C(134), UINT8_C(151), UINT8_C( 90), UINT8_C(227), UINT8_C(228), UINT8_C( 42), UINT8_C(223), UINT8_C( 71), UINT8_C( 37), UINT8_C(166), UINT8_C(115), UINT8_C( 13), UINT8_C(136), UINT8_C( 67), UINT8_C(142), UINT8_C( 55), UINT8_C(218), UINT8_C(135), UINT8_C(160), UINT8_C(121), UINT8_C(105), UINT8_C(225), UINT8_C( 67), UINT8_C(106), UINT8_C( 80), UINT8_C(187)), simde_x_mm256_set_epu8(UINT8_C(139), UINT8_C(207), UINT8_C(188), UINT8_C(170), UINT8_C(232), UINT8_C(172), UINT8_C( 51), UINT8_C( 14), UINT8_C( 50), UINT8_C(186), UINT8_C( 76), UINT8_C(187), UINT8_C( 22), UINT8_C( 90), UINT8_C(150), UINT8_C(148), UINT8_C(123), UINT8_C( 36), UINT8_C(145), UINT8_C( 72), UINT8_C(252), UINT8_C(171), UINT8_C(109), UINT8_C( 77), UINT8_C(145), UINT8_C( 77), UINT8_C(121), UINT8_C(107), UINT8_C(138), UINT8_C(154), UINT8_C(236), UINT8_C( 13)), simde_x_mm256_set_epu8(UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(223), UINT8_C(227), UINT8_C( 71), UINT8_C( 0), UINT8_C(203), UINT8_C( 0), UINT8_C(227), UINT8_C( 31), UINT8_C( 0), UINT8_C( 0), UINT8_C(142), UINT8_C(225), UINT8_C( 0), UINT8_C(135), UINT8_C( 0), UINT8_C( 0), UINT8_C(136), UINT8_C(136), UINT8_C( 0), UINT8_C(136), UINT8_C(218), UINT8_C(142), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(136)) }, { simde_x_mm256_set_epu8(UINT8_C(147), UINT8_C(175), UINT8_C(103), UINT8_C(105), UINT8_C(115), UINT8_C( 25), UINT8_C( 88), UINT8_C( 95), UINT8_C(108), UINT8_C(195), UINT8_C( 30), UINT8_C( 32), UINT8_C(183), UINT8_C(198), UINT8_C(177), UINT8_C( 61), UINT8_C(213), UINT8_C( 7), UINT8_C(150), UINT8_C(186), UINT8_C( 52), UINT8_C( 9), UINT8_C( 76), UINT8_C(240), UINT8_C(240), UINT8_C(126), UINT8_C(242), UINT8_C(203), UINT8_C(162), UINT8_C( 72), UINT8_C(186), UINT8_C( 35)), simde_x_mm256_set_epu8(UINT8_C( 26), UINT8_C( 1), UINT8_C(253), UINT8_C(220), UINT8_C(156), UINT8_C(126), UINT8_C(237), UINT8_C(228), UINT8_C(210), UINT8_C( 82), UINT8_C( 8), UINT8_C( 8), UINT8_C(104), UINT8_C( 35), UINT8_C( 98), UINT8_C( 24), UINT8_C(230), UINT8_C(227), UINT8_C( 73), UINT8_C(233), UINT8_C(132), UINT8_C( 49), UINT8_C( 63), UINT8_C(172), UINT8_C(137), UINT8_C( 87), UINT8_C( 85), UINT8_C(128), UINT8_C(136), UINT8_C(108), UINT8_C( 70), UINT8_C(218)), simde_x_mm256_set_epu8(UINT8_C( 25), UINT8_C(177), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(175), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(198), UINT8_C( 95), UINT8_C( 95), UINT8_C( 95), UINT8_C(183), UINT8_C(198), UINT8_C( 95), UINT8_C( 0), UINT8_C( 0), UINT8_C( 76), UINT8_C( 0), UINT8_C( 0), UINT8_C(186), UINT8_C(213), UINT8_C( 0), UINT8_C( 0), UINT8_C(240), UINT8_C(242), UINT8_C( 0), UINT8_C( 0), UINT8_C(186), UINT8_C(126), UINT8_C( 0)) }, { simde_x_mm256_set_epu8(UINT8_C(132), UINT8_C( 22), UINT8_C(115), UINT8_C(245), UINT8_C(139), UINT8_C(127), UINT8_C( 30), UINT8_C( 96), UINT8_C(123), UINT8_C(228), UINT8_C( 73), UINT8_C(244), UINT8_C( 12), UINT8_C( 46), UINT8_C( 26), UINT8_C( 36), UINT8_C(186), UINT8_C( 18), UINT8_C(169), UINT8_C(233), UINT8_C( 8), UINT8_C(185), UINT8_C(123), UINT8_C( 50), UINT8_C(171), UINT8_C(240), UINT8_C(159), UINT8_C(176), UINT8_C( 29), UINT8_C(146), UINT8_C( 85), UINT8_C( 34)), simde_x_mm256_set_epu8(UINT8_C(197), UINT8_C( 93), UINT8_C( 52), UINT8_C(130), UINT8_C(124), UINT8_C( 87), UINT8_C( 88), UINT8_C( 89), UINT8_C( 56), UINT8_C(209), UINT8_C( 85), UINT8_C(235), UINT8_C( 45), UINT8_C(236), UINT8_C(154), UINT8_C( 72), UINT8_C(164), UINT8_C(253), UINT8_C(196), UINT8_C( 41), UINT8_C( 28), UINT8_C(181), UINT8_C(180), UINT8_C(237), UINT8_C( 65), UINT8_C(122), UINT8_C(110), UINT8_C( 94), UINT8_C(125), UINT8_C( 60), UINT8_C( 80), UINT8_C(209)), simde_x_mm256_set_epu8(UINT8_C( 0), UINT8_C(115), UINT8_C(244), UINT8_C( 0), UINT8_C(245), UINT8_C(123), UINT8_C( 96), UINT8_C( 30), UINT8_C( 96), UINT8_C( 0), UINT8_C( 73), UINT8_C( 0), UINT8_C(115), UINT8_C( 0), UINT8_C( 0), UINT8_C( 96), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(123), UINT8_C(233), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 85), UINT8_C(185), UINT8_C( 18), UINT8_C( 18), UINT8_C(169), UINT8_C(233), UINT8_C( 34), UINT8_C( 0)) }, { simde_x_mm256_set_epu8(UINT8_C(233), UINT8_C(108), UINT8_C(215), UINT8_C(181), UINT8_C(100), UINT8_C( 72), UINT8_C(220), UINT8_C( 52), UINT8_C( 24), UINT8_C( 0), UINT8_C(242), UINT8_C(125), UINT8_C(116), UINT8_C(102), UINT8_C( 94), UINT8_C( 64), UINT8_C(158), UINT8_C(177), UINT8_C( 3), UINT8_C(246), UINT8_C(164), UINT8_C(171), UINT8_C(138), UINT8_C( 15), UINT8_C( 30), UINT8_C( 68), UINT8_C(246), UINT8_C(147), UINT8_C(162), UINT8_C( 13), UINT8_C( 93), UINT8_C( 58)), simde_x_mm256_set_epu8(UINT8_C(159), UINT8_C(224), UINT8_C(140), UINT8_C(117), UINT8_C( 95), UINT8_C( 45), UINT8_C(144), UINT8_C( 69), UINT8_C(174), UINT8_C( 34), UINT8_C(237), UINT8_C( 42), UINT8_C( 78), UINT8_C( 7), UINT8_C(200), UINT8_C(173), UINT8_C(137), UINT8_C( 75), UINT8_C(170), UINT8_C(110), UINT8_C(182), UINT8_C(180), UINT8_C(173), UINT8_C(172), UINT8_C(227), UINT8_C(222), UINT8_C(132), UINT8_C( 4), UINT8_C(190), UINT8_C( 7), UINT8_C( 12), UINT8_C( 21)), simde_x_mm256_set_epu8(UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(242), UINT8_C(233), UINT8_C(215), UINT8_C( 0), UINT8_C(242), UINT8_C( 0), UINT8_C(102), UINT8_C( 0), UINT8_C( 72), UINT8_C(108), UINT8_C( 24), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(164), UINT8_C( 0), UINT8_C(177), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(147), UINT8_C( 0), UINT8_C( 30), UINT8_C(246), UINT8_C(246)) }, { simde_x_mm256_set_epu8(UINT8_C(239), UINT8_C(190), UINT8_C( 67), UINT8_C(183), UINT8_C(162), UINT8_C( 57), UINT8_C(249), UINT8_C(218), UINT8_C(176), UINT8_C(139), UINT8_C( 3), UINT8_C(248), UINT8_C(214), UINT8_C( 36), UINT8_C(105), UINT8_C( 96), UINT8_C( 80), UINT8_C(108), UINT8_C(142), UINT8_C( 2), UINT8_C(196), UINT8_C(162), UINT8_C( 38), UINT8_C( 4), UINT8_C(175), UINT8_C(178), UINT8_C( 88), UINT8_C(165), UINT8_C(168), UINT8_C( 71), UINT8_C( 76), UINT8_C(232)), simde_x_mm256_set_epu8(UINT8_C(199), UINT8_C( 95), UINT8_C( 43), UINT8_C(216), UINT8_C(128), UINT8_C(137), UINT8_C(103), UINT8_C( 51), UINT8_C(121), UINT8_C( 84), UINT8_C(196), UINT8_C(101), UINT8_C(251), UINT8_C( 4), UINT8_C( 89), UINT8_C( 54), UINT8_C(216), UINT8_C(149), UINT8_C(145), UINT8_C( 73), UINT8_C(162), UINT8_C( 73), UINT8_C(124), UINT8_C(146), UINT8_C( 70), UINT8_C(151), UINT8_C(137), UINT8_C(218), UINT8_C(230), UINT8_C(112), UINT8_C( 45), UINT8_C(226)), simde_x_mm256_set_epu8(UINT8_C( 0), UINT8_C(239), UINT8_C(162), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(176), UINT8_C(214), UINT8_C(249), UINT8_C(248), UINT8_C( 0), UINT8_C( 3), UINT8_C( 0), UINT8_C(248), UINT8_C(249), UINT8_C(139), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 38), UINT8_C( 0), UINT8_C( 38), UINT8_C( 2), UINT8_C( 0), UINT8_C(178), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(232), UINT8_C(142), UINT8_C( 0)) }, { simde_x_mm256_set_epu8(UINT8_C( 81), UINT8_C(102), UINT8_C( 49), UINT8_C(133), UINT8_C(250), UINT8_C( 13), UINT8_C(233), UINT8_C( 67), UINT8_C(205), UINT8_C(254), UINT8_C( 41), UINT8_C( 6), UINT8_C(117), UINT8_C(148), UINT8_C(222), UINT8_C(171), UINT8_C( 8), UINT8_C( 99), UINT8_C( 93), UINT8_C( 39), UINT8_C(103), UINT8_C(215), UINT8_C(216), UINT8_C( 75), UINT8_C( 94), UINT8_C(171), UINT8_C( 41), UINT8_C(173), UINT8_C(217), UINT8_C(138), UINT8_C(180), UINT8_C(160)), simde_x_mm256_set_epu8(UINT8_C(200), UINT8_C(107), UINT8_C(119), UINT8_C(216), UINT8_C( 73), UINT8_C(187), UINT8_C(212), UINT8_C( 20), UINT8_C( 22), UINT8_C( 11), UINT8_C(155), UINT8_C(106), UINT8_C( 35), UINT8_C(108), UINT8_C( 43), UINT8_C( 58), UINT8_C(102), UINT8_C(228), UINT8_C( 5), UINT8_C(227), UINT8_C(106), UINT8_C(119), UINT8_C(184), UINT8_C(199), UINT8_C( 57), UINT8_C(120), UINT8_C( 14), UINT8_C( 56), UINT8_C( 82), UINT8_C( 19), UINT8_C( 33), UINT8_C( 82)), simde_x_mm256_set_epu8(UINT8_C( 0), UINT8_C(250), UINT8_C(205), UINT8_C( 0), UINT8_C(233), UINT8_C( 0), UINT8_C( 0), UINT8_C( 6), UINT8_C(254), UINT8_C(250), UINT8_C( 0), UINT8_C( 13), UINT8_C(117), UINT8_C(133), UINT8_C(250), UINT8_C( 13), UINT8_C(171), UINT8_C( 0), UINT8_C( 41), UINT8_C( 0), UINT8_C(215), UINT8_C( 94), UINT8_C( 0), UINT8_C( 0), UINT8_C(216), UINT8_C( 75), UINT8_C( 99), UINT8_C( 75), UINT8_C(138), UINT8_C(217), UINT8_C(180), UINT8_C(138)) }, { simde_x_mm256_set_epu8(UINT8_C(173), UINT8_C(218), UINT8_C( 20), UINT8_C( 45), UINT8_C(110), UINT8_C( 90), UINT8_C(183), UINT8_C( 54), UINT8_C(102), UINT8_C(126), UINT8_C(167), UINT8_C(135), UINT8_C( 42), UINT8_C( 74), UINT8_C( 22), UINT8_C(137), UINT8_C( 56), UINT8_C( 5), UINT8_C( 18), UINT8_C(105), UINT8_C( 83), UINT8_C(146), UINT8_C(229), UINT8_C( 54), UINT8_C(169), UINT8_C( 55), UINT8_C(228), UINT8_C(168), UINT8_C( 1), UINT8_C(126), UINT8_C(169), UINT8_C(235)), simde_x_mm256_set_epu8(UINT8_C(254), UINT8_C( 56), UINT8_C( 4), UINT8_C(163), UINT8_C( 35), UINT8_C( 52), UINT8_C(182), UINT8_C( 52), UINT8_C( 1), UINT8_C( 45), UINT8_C(119), UINT8_C(169), UINT8_C(175), UINT8_C(254), UINT8_C( 71), UINT8_C( 63), UINT8_C( 24), UINT8_C(116), UINT8_C(194), UINT8_C(184), UINT8_C(150), UINT8_C(167), UINT8_C(101), UINT8_C(133), UINT8_C( 95), UINT8_C( 84), UINT8_C(199), UINT8_C(134), UINT8_C(251), UINT8_C(101), UINT8_C( 38), UINT8_C( 43)), simde_x_mm256_set_epu8(UINT8_C( 0), UINT8_C( 54), UINT8_C(135), UINT8_C( 0), UINT8_C( 42), UINT8_C(135), UINT8_C( 0), UINT8_C(135), UINT8_C( 22), UINT8_C( 20), UINT8_C(102), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(102), UINT8_C(173), UINT8_C( 54), UINT8_C(168), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(228), UINT8_C( 0), UINT8_C( 56), UINT8_C(168), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(228), UINT8_C( 55), UINT8_C( 83)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_shuffle_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m256i_u8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_shuffle_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C( 7953530), INT32_C( 1626445648), INT32_C( 338474584), INT32_C( 1314132322), INT32_C( -165526664), INT32_C( 1862544223), INT32_C( 1736492928), INT32_C( -812721862)), simde_mm256_set_epi32(INT32_C( 1626445648), INT32_C( 1626445648), INT32_C( 1626445648), INT32_C( 1626445648), INT32_C( 1862544223), INT32_C( 1862544223), INT32_C( 1862544223), INT32_C( 1862544223)) }, { simde_mm256_set_epi32(INT32_C( 1571455878), INT32_C( 1654912639), INT32_C( 1961566624), INT32_C( 1369731271), INT32_C( -802314883), INT32_C( 1895104772), INT32_C(-1294636783), INT32_C( -2090880)), simde_mm256_set_epi32(INT32_C( 1654912639), INT32_C( 1654912639), INT32_C( 1654912639), INT32_C( 1654912639), INT32_C( 1895104772), INT32_C( 1895104772), INT32_C( 1895104772), INT32_C( 1895104772)) }, { simde_mm256_set_epi32(INT32_C( 716787034), INT32_C( 279681787), INT32_C( -21556639), INT32_C(-1981624037), INT32_C( 330738474), INT32_C(-1021797214), INT32_C( 117555788), INT32_C( 1295745410)), simde_mm256_set_epi32(INT32_C( 279681787), INT32_C( 279681787), INT32_C( 279681787), INT32_C( 279681787), INT32_C(-1021797214), INT32_C(-1021797214), INT32_C(-1021797214), INT32_C(-1021797214)) }, { simde_mm256_set_epi32(INT32_C(-1161252350), INT32_C( 1333069761), INT32_C( -746913676), INT32_C(-1297385199), INT32_C( 1762552906), INT32_C( 1819201471), INT32_C(-1000519846), INT32_C( 2096577459)), simde_mm256_set_epi32(INT32_C( 1333069761), INT32_C( 1333069761), INT32_C( 1333069761), INT32_C( 1333069761), INT32_C( 1819201471), INT32_C( 1819201471), INT32_C( 1819201471), INT32_C( 1819201471)) }, { simde_mm256_set_epi32(INT32_C( 1772174643), INT32_C( 2012282759), INT32_C( 407185018), INT32_C(-1907491648), INT32_C(-1950535235), INT32_C( 59473466), INT32_C( -212502371), INT32_C( -171378108)), simde_mm256_set_epi32(INT32_C( 2012282759), INT32_C( 2012282759), INT32_C( 2012282759), INT32_C( 2012282759), INT32_C( 59473466), INT32_C( 59473466), INT32_C( 59473466), INT32_C( 59473466)) }, { simde_mm256_set_epi32(INT32_C(-1786867928), INT32_C( 183968379), INT32_C(-1710767833), INT32_C( 419209661), INT32_C( 1470446764), INT32_C( -838411183), INT32_C( 1711518303), INT32_C(-1929674962)), simde_mm256_set_epi32(INT32_C( 183968379), INT32_C( 183968379), INT32_C( 183968379), INT32_C( 183968379), INT32_C( -838411183), INT32_C( -838411183), INT32_C( -838411183), INT32_C( -838411183)) }, { simde_mm256_set_epi32(INT32_C(-1296827172), INT32_C(-1426782704), INT32_C( 461497475), INT32_C( 857689164), INT32_C( -127267738), INT32_C( -225620875), INT32_C( -476268334), INT32_C( 275470)), simde_mm256_set_epi32(INT32_C(-1426782704), INT32_C(-1426782704), INT32_C(-1426782704), INT32_C(-1426782704), INT32_C( -225620875), INT32_C( -225620875), INT32_C( -225620875), INT32_C( -225620875)) }, { simde_mm256_set_epi32(INT32_C( 492995740), INT32_C(-1095702956), INT32_C( 1746718954), INT32_C( 2048506117), INT32_C( 180431580), INT32_C(-1463759846), INT32_C( 1212242273), INT32_C(-1640731308)), simde_mm256_set_epi32(INT32_C(-1095702956), INT32_C(-1095702956), INT32_C(-1095702956), INT32_C(-1095702956), INT32_C(-1463759846), INT32_C(-1463759846), INT32_C(-1463759846), INT32_C(-1463759846)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_shuffle_epi32(test_vec[i].a, 0xaa); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_shufflehi_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[16]; const int16_t r[16]; } test_vec[] = { { { -INT16_C( 2516), -INT16_C( 21378), INT16_C( 2569), INT16_C( 28926), INT16_C( 23995), INT16_C( 19519), INT16_C( 305), INT16_C( 3639), INT16_C( 27436), INT16_C( 23441), -INT16_C( 10560), INT16_C( 8981), -INT16_C( 30716), -INT16_C( 1213), -INT16_C( 20665), INT16_C( 29550) }, { -INT16_C( 2516), -INT16_C( 21378), INT16_C( 2569), INT16_C( 28926), INT16_C( 19519), INT16_C( 19519), INT16_C( 19519), INT16_C( 305), INT16_C( 27436), INT16_C( 23441), -INT16_C( 10560), INT16_C( 8981), -INT16_C( 1213), -INT16_C( 1213), -INT16_C( 1213), -INT16_C( 20665) } }, { { -INT16_C( 4955), -INT16_C( 20961), INT16_C( 7670), -INT16_C( 19938), INT16_C( 24187), -INT16_C( 21250), INT16_C( 13663), -INT16_C( 29766), INT16_C( 19360), INT16_C( 24806), -INT16_C( 1247), INT16_C( 9603), -INT16_C( 14717), -INT16_C( 13792), -INT16_C( 29067), INT16_C( 6973) }, { -INT16_C( 4955), -INT16_C( 20961), INT16_C( 7670), -INT16_C( 19938), -INT16_C( 21250), -INT16_C( 21250), -INT16_C( 21250), INT16_C( 13663), INT16_C( 19360), INT16_C( 24806), -INT16_C( 1247), INT16_C( 9603), -INT16_C( 13792), -INT16_C( 13792), -INT16_C( 13792), -INT16_C( 29067) } }, { { INT16_C( 23675), INT16_C( 29129), -INT16_C( 6022), -INT16_C( 2781), INT16_C( 8518), -INT16_C( 23135), INT16_C( 23638), -INT16_C( 2512), INT16_C( 5799), -INT16_C( 13993), -INT16_C( 9710), -INT16_C( 27154), INT16_C( 4001), INT16_C( 5728), -INT16_C( 25187), INT16_C( 6193) }, { INT16_C( 23675), INT16_C( 29129), -INT16_C( 6022), -INT16_C( 2781), -INT16_C( 23135), -INT16_C( 23135), -INT16_C( 23135), INT16_C( 23638), INT16_C( 5799), -INT16_C( 13993), -INT16_C( 9710), -INT16_C( 27154), INT16_C( 5728), INT16_C( 5728), INT16_C( 5728), -INT16_C( 25187) } }, { { -INT16_C( 1030), INT16_C( 29834), -INT16_C( 21021), INT16_C( 10601), INT16_C( 2767), INT16_C( 9678), -INT16_C( 410), INT16_C( 3612), INT16_C( 29460), INT16_C( 9943), -INT16_C( 15027), -INT16_C( 4420), INT16_C( 7380), INT16_C( 29189), INT16_C( 14009), -INT16_C( 19574) }, { -INT16_C( 1030), INT16_C( 29834), -INT16_C( 21021), INT16_C( 10601), INT16_C( 9678), INT16_C( 9678), INT16_C( 9678), -INT16_C( 410), INT16_C( 29460), INT16_C( 9943), -INT16_C( 15027), -INT16_C( 4420), INT16_C( 29189), INT16_C( 29189), INT16_C( 29189), INT16_C( 14009) } }, { { INT16_C( 5169), INT16_C( 5159), -INT16_C( 28478), -INT16_C( 28355), INT16_C( 2971), INT16_C( 438), -INT16_C( 11767), INT16_C( 7695), -INT16_C( 6587), -INT16_C( 27836), INT16_C( 172), -INT16_C( 32639), -INT16_C( 31204), -INT16_C( 10510), INT16_C( 32189), -INT16_C( 4471) }, { INT16_C( 5169), INT16_C( 5159), -INT16_C( 28478), -INT16_C( 28355), INT16_C( 438), INT16_C( 438), INT16_C( 438), -INT16_C( 11767), -INT16_C( 6587), -INT16_C( 27836), INT16_C( 172), -INT16_C( 32639), -INT16_C( 10510), -INT16_C( 10510), -INT16_C( 10510), INT16_C( 32189) } }, { { -INT16_C( 20079), INT16_C( 21251), INT16_C( 16449), -INT16_C( 8988), -INT16_C( 25780), INT16_C( 21982), -INT16_C( 4755), -INT16_C( 19597), -INT16_C( 18220), -INT16_C( 32698), -INT16_C( 14408), -INT16_C( 11008), -INT16_C( 3250), INT16_C( 2987), INT16_C( 13424), INT16_C( 505) }, { -INT16_C( 20079), INT16_C( 21251), INT16_C( 16449), -INT16_C( 8988), INT16_C( 21982), INT16_C( 21982), INT16_C( 21982), -INT16_C( 4755), -INT16_C( 18220), -INT16_C( 32698), -INT16_C( 14408), -INT16_C( 11008), INT16_C( 2987), INT16_C( 2987), INT16_C( 2987), INT16_C( 13424) } }, { { -INT16_C( 795), INT16_C( 10069), INT16_C( 14653), -INT16_C( 30461), -INT16_C( 7724), INT16_C( 17118), INT16_C( 21199), -INT16_C( 23563), INT16_C( 15114), -INT16_C( 15837), INT16_C( 8962), INT16_C( 20631), INT16_C( 16918), -INT16_C( 31141), INT16_C( 21879), INT16_C( 23688) }, { -INT16_C( 795), INT16_C( 10069), INT16_C( 14653), -INT16_C( 30461), INT16_C( 17118), INT16_C( 17118), INT16_C( 17118), INT16_C( 21199), INT16_C( 15114), -INT16_C( 15837), INT16_C( 8962), INT16_C( 20631), -INT16_C( 31141), -INT16_C( 31141), -INT16_C( 31141), INT16_C( 21879) } }, { { -INT16_C( 8879), -INT16_C( 29053), -INT16_C( 30954), -INT16_C( 5353), -INT16_C( 2456), INT16_C( 14125), INT16_C( 8776), INT16_C( 21210), -INT16_C( 675), INT16_C( 24340), -INT16_C( 21471), INT16_C( 14256), INT16_C( 3054), INT16_C( 26046), INT16_C( 18016), -INT16_C( 19774) }, { -INT16_C( 8879), -INT16_C( 29053), -INT16_C( 30954), -INT16_C( 5353), INT16_C( 14125), INT16_C( 14125), INT16_C( 14125), INT16_C( 8776), -INT16_C( 675), INT16_C( 24340), -INT16_C( 21471), INT16_C( 14256), INT16_C( 26046), INT16_C( 26046), INT16_C( 26046), INT16_C( 18016) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi16(test_vec[i].a); simde__m256i r = simde_mm256_shufflehi_epi16(a, 149); simde_test_x86_assert_equal_i16x16(r, simde_x_mm256_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm256_shufflelo_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi16(INT16_C( -2179), INT16_C( 30608), INT16_C( 19808), INT16_C(-28307), INT16_C( 25873), INT16_C( -9642), INT16_C( 56), INT16_C( 338), INT16_C(-29256), INT16_C( 9693), INT16_C( 18909), INT16_C( 3915), INT16_C( 2653), INT16_C( 26676), INT16_C(-23808), INT16_C( 25025)), simde_mm256_set_epi16(INT16_C( -2179), INT16_C( 30608), INT16_C( 19808), INT16_C(-28307), INT16_C( -9642), INT16_C( 56), INT16_C( 56), INT16_C( 56), INT16_C(-29256), INT16_C( 9693), INT16_C( 18909), INT16_C( 3915), INT16_C( 26676), INT16_C(-23808), INT16_C(-23808), INT16_C(-23808)) }, { simde_mm256_set_epi16(INT16_C( 18348), INT16_C(-29440), INT16_C( -6978), INT16_C( 22811), INT16_C(-31995), INT16_C( 32157), INT16_C( -1931), INT16_C( 15650), INT16_C( 25681), INT16_C( 27258), INT16_C( -9033), INT16_C( 28640), INT16_C(-16045), INT16_C( 2509), INT16_C(-23756), INT16_C( 4399)), simde_mm256_set_epi16(INT16_C( 18348), INT16_C(-29440), INT16_C( -6978), INT16_C( 22811), INT16_C( 32157), INT16_C( -1931), INT16_C( -1931), INT16_C( -1931), INT16_C( 25681), INT16_C( 27258), INT16_C( -9033), INT16_C( 28640), INT16_C( 2509), INT16_C(-23756), INT16_C(-23756), INT16_C(-23756)) }, { simde_mm256_set_epi16(INT16_C( 32389), INT16_C(-20100), INT16_C( 26342), INT16_C(-12082), INT16_C( 27809), INT16_C( 10468), INT16_C( 29312), INT16_C( -5875), INT16_C( 10801), INT16_C(-11039), INT16_C(-32348), INT16_C(-19420), INT16_C(-32754), INT16_C(-20940), INT16_C( 17235), INT16_C(-29506)), simde_mm256_set_epi16(INT16_C( 32389), INT16_C(-20100), INT16_C( 26342), INT16_C(-12082), INT16_C( 10468), INT16_C( 29312), INT16_C( 29312), INT16_C( 29312), INT16_C( 10801), INT16_C(-11039), INT16_C(-32348), INT16_C(-19420), INT16_C(-20940), INT16_C( 17235), INT16_C( 17235), INT16_C( 17235)) }, { simde_mm256_set_epi16(INT16_C(-30489), INT16_C( 27619), INT16_C(-16535), INT16_C(-14927), INT16_C( 10253), INT16_C( 31108), INT16_C( 46), INT16_C( 10088), INT16_C(-25344), INT16_C( -5295), INT16_C( 22387), INT16_C( 27540), INT16_C(-30287), INT16_C( 3057), INT16_C( -2994), INT16_C( 31366)), simde_mm256_set_epi16(INT16_C(-30489), INT16_C( 27619), INT16_C(-16535), INT16_C(-14927), INT16_C( 31108), INT16_C( 46), INT16_C( 46), INT16_C( 46), INT16_C(-25344), INT16_C( -5295), INT16_C( 22387), INT16_C( 27540), INT16_C( 3057), INT16_C( -2994), INT16_C( -2994), INT16_C( -2994)) }, { simde_mm256_set_epi16(INT16_C( -7255), INT16_C(-20910), INT16_C( 13809), INT16_C( 9595), INT16_C( -1612), INT16_C( 23053), INT16_C( 7111), INT16_C(-11581), INT16_C(-12829), INT16_C( 20686), INT16_C(-16865), INT16_C( 5649), INT16_C(-25623), INT16_C( -2444), INT16_C( 11296), INT16_C( 7832)), simde_mm256_set_epi16(INT16_C( -7255), INT16_C(-20910), INT16_C( 13809), INT16_C( 9595), INT16_C( 23053), INT16_C( 7111), INT16_C( 7111), INT16_C( 7111), INT16_C(-12829), INT16_C( 20686), INT16_C(-16865), INT16_C( 5649), INT16_C( -2444), INT16_C( 11296), INT16_C( 11296), INT16_C( 11296)) }, { simde_mm256_set_epi16(INT16_C(-26606), INT16_C( 4305), INT16_C( 13601), INT16_C( 6245), INT16_C( -6095), INT16_C( 5518), INT16_C(-13456), INT16_C(-16529), INT16_C(-18890), INT16_C( 32306), INT16_C( 18285), INT16_C( 9413), INT16_C( -4831), INT16_C( 31394), INT16_C( 7403), INT16_C( 8863)), simde_mm256_set_epi16(INT16_C(-26606), INT16_C( 4305), INT16_C( 13601), INT16_C( 6245), INT16_C( 5518), INT16_C(-13456), INT16_C(-13456), INT16_C(-13456), INT16_C(-18890), INT16_C( 32306), INT16_C( 18285), INT16_C( 9413), INT16_C( 31394), INT16_C( 7403), INT16_C( 7403), INT16_C( 7403)) }, { simde_mm256_set_epi16(INT16_C(-22090), INT16_C( 3292), INT16_C(-24353), INT16_C(-13630), INT16_C( 1106), INT16_C( 20149), INT16_C( 12111), INT16_C( -9481), INT16_C( 8059), INT16_C( 23722), INT16_C(-24846), INT16_C(-19082), INT16_C( 18649), INT16_C( 23374), INT16_C(-15098), INT16_C( 5703)), simde_mm256_set_epi16(INT16_C(-22090), INT16_C( 3292), INT16_C(-24353), INT16_C(-13630), INT16_C( 20149), INT16_C( 12111), INT16_C( 12111), INT16_C( 12111), INT16_C( 8059), INT16_C( 23722), INT16_C(-24846), INT16_C(-19082), INT16_C( 23374), INT16_C(-15098), INT16_C(-15098), INT16_C(-15098)) }, { simde_mm256_set_epi16(INT16_C( 31351), INT16_C(-28147), INT16_C(-17219), INT16_C( 13266), INT16_C( 26309), INT16_C( 5438), INT16_C( -9093), INT16_C( -541), INT16_C( -7590), INT16_C(-28792), INT16_C( 9449), INT16_C(-27866), INT16_C( 7986), INT16_C(-24032), INT16_C( 23568), INT16_C( 18541)), simde_mm256_set_epi16(INT16_C( 31351), INT16_C(-28147), INT16_C(-17219), INT16_C( 13266), INT16_C( 5438), INT16_C( -9093), INT16_C( -9093), INT16_C( -9093), INT16_C( -7590), INT16_C(-28792), INT16_C( 9449), INT16_C(-27866), INT16_C(-24032), INT16_C( 23568), INT16_C( 23568), INT16_C( 23568)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_shufflelo_epi16(test_vec[i].a, 149); simde_assert_m256i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_sign_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi8(INT8_C(-124), INT8_C( -48), INT8_C( 10), INT8_C( 63), INT8_C( -84), INT8_C( 103), INT8_C( -14), INT8_C( 63), INT8_C( 82), INT8_C( -68), INT8_C( -17), INT8_C( 117), INT8_C( -80), INT8_C(-104), INT8_C( 37), INT8_C( 59), INT8_C( 121), INT8_C( 11), INT8_C( -20), INT8_C( -38), INT8_C( 36), INT8_C( -63), INT8_C( -89), INT8_C( -39), INT8_C( 115), INT8_C( 43), INT8_C(-116), INT8_C( 8), INT8_C( -68), INT8_C( -15), INT8_C( 35), INT8_C( -73)), simde_mm256_set_epi8(INT8_C( 64), INT8_C( -46), INT8_C( 104), INT8_C( 85), INT8_C( -66), INT8_C( 103), INT8_C( -19), INT8_C( -91), INT8_C(-116), INT8_C( 86), INT8_C( 54), INT8_C( 90), INT8_C(-128), INT8_C( 120), INT8_C( 127), INT8_C( -41), INT8_C( 56), INT8_C( 45), INT8_C( -60), INT8_C( 14), INT8_C( -4), INT8_C( -42), INT8_C( -79), INT8_C( 83), INT8_C( 72), INT8_C( 65), INT8_C( -99), INT8_C( 111), INT8_C( -84), INT8_C( 103), INT8_C( -29), INT8_C( -65)), simde_mm256_set_epi8(INT8_C(-124), INT8_C( 48), INT8_C( 10), INT8_C( 63), INT8_C( 84), INT8_C( 103), INT8_C( 14), INT8_C( -63), INT8_C( -82), INT8_C( -68), INT8_C( -17), INT8_C( 117), INT8_C( 80), INT8_C(-104), INT8_C( 37), INT8_C( -59), INT8_C( 121), INT8_C( 11), INT8_C( 20), INT8_C( -38), INT8_C( -36), INT8_C( 63), INT8_C( 89), INT8_C( -39), INT8_C( 115), INT8_C( 43), INT8_C( 116), INT8_C( 8), INT8_C( 68), INT8_C( -15), INT8_C( -35), INT8_C( 73)) }, { simde_mm256_set_epi8(INT8_C( 122), INT8_C( 101), INT8_C( 57), INT8_C(-117), INT8_C( -22), INT8_C( -56), INT8_C( -5), INT8_C( 60), INT8_C( 110), INT8_C( -69), INT8_C( -30), INT8_C( -83), INT8_C( 50), INT8_C( 67), INT8_C( -85), INT8_C( 24), INT8_C( 64), INT8_C( 117), INT8_C( 23), INT8_C( -97), INT8_C( 88), INT8_C( 95), INT8_C( 73), INT8_C( 84), INT8_C( 44), INT8_C( 108), INT8_C( -73), INT8_C(-105), INT8_C( 58), INT8_C( 125), INT8_C( 21), INT8_C( -8)), simde_mm256_set_epi8(INT8_C( -54), INT8_C( -52), INT8_C( 108), INT8_C( 17), INT8_C( -61), INT8_C( 77), INT8_C( -68), INT8_C(-127), INT8_C( 55), INT8_C( -25), INT8_C( -44), INT8_C(-111), INT8_C( 12), INT8_C( -35), INT8_C( 116), INT8_C( 64), INT8_C( -32), INT8_C( 127), INT8_C( -91), INT8_C( -72), INT8_C( -92), INT8_C( 13), INT8_C( 97), INT8_C( -10), INT8_C(-120), INT8_C( -40), INT8_C( -37), INT8_C( -49), INT8_C( 123), INT8_C( -15), INT8_C( 100), INT8_C( -27)), simde_mm256_set_epi8(INT8_C(-122), INT8_C(-101), INT8_C( 57), INT8_C(-117), INT8_C( 22), INT8_C( -56), INT8_C( 5), INT8_C( -60), INT8_C( 110), INT8_C( 69), INT8_C( 30), INT8_C( 83), INT8_C( 50), INT8_C( -67), INT8_C( -85), INT8_C( 24), INT8_C( -64), INT8_C( 117), INT8_C( -23), INT8_C( 97), INT8_C( -88), INT8_C( 95), INT8_C( 73), INT8_C( -84), INT8_C( -44), INT8_C(-108), INT8_C( 73), INT8_C( 105), INT8_C( 58), INT8_C(-125), INT8_C( 21), INT8_C( 8)) }, { simde_mm256_set_epi8(INT8_C( 80), INT8_C(-104), INT8_C( 33), INT8_C( -96), INT8_C( 105), INT8_C( -57), INT8_C(-123), INT8_C( 89), INT8_C( 62), INT8_C( -47), INT8_C( -92), INT8_C( -33), INT8_C( 113), INT8_C( 44), INT8_C( 125), INT8_C( -43), INT8_C( 28), INT8_C( -43), INT8_C( 122), INT8_C(-115), INT8_C( 45), INT8_C( 72), INT8_C( 66), INT8_C( 86), INT8_C( -11), INT8_C( 17), INT8_C( 64), INT8_C( -36), INT8_C( -86), INT8_C( -6), INT8_C( -64), INT8_C( 31)), simde_mm256_set_epi8(INT8_C( 65), INT8_C( -61), INT8_C( 83), INT8_C( -37), INT8_C(-100), INT8_C( 96), INT8_C( 90), INT8_C( -24), INT8_C( 86), INT8_C( -18), INT8_C( -62), INT8_C(-110), INT8_C(-107), INT8_C( -6), INT8_C( 9), INT8_C( -6), INT8_C( 80), INT8_C( -71), INT8_C( -81), INT8_C( 127), INT8_C( -21), INT8_C( -14), INT8_C( 27), INT8_C( 82), INT8_C(-126), INT8_C( 70), INT8_C( 18), INT8_C( -75), INT8_C( 126), INT8_C(-103), INT8_C(-109), INT8_C( 116)), simde_mm256_set_epi8(INT8_C( 80), INT8_C( 104), INT8_C( 33), INT8_C( 96), INT8_C(-105), INT8_C( -57), INT8_C(-123), INT8_C( -89), INT8_C( 62), INT8_C( 47), INT8_C( 92), INT8_C( 33), INT8_C(-113), INT8_C( -44), INT8_C( 125), INT8_C( 43), INT8_C( 28), INT8_C( 43), INT8_C(-122), INT8_C(-115), INT8_C( -45), INT8_C( -72), INT8_C( 66), INT8_C( 86), INT8_C( 11), INT8_C( 17), INT8_C( 64), INT8_C( 36), INT8_C( -86), INT8_C( 6), INT8_C( 64), INT8_C( 31)) }, { simde_mm256_set_epi8(INT8_C( 88), INT8_C( 57), INT8_C( -36), INT8_C( 10), INT8_C( 93), INT8_C( -67), INT8_C( 35), INT8_C( 4), INT8_C( 82), INT8_C( -81), INT8_C( 18), INT8_C( 74), INT8_C(-119), INT8_C( -48), INT8_C( 50), INT8_C( -37), INT8_C( 85), INT8_C( 99), INT8_C( -94), INT8_C(-127), INT8_C( 126), INT8_C( 36), INT8_C( 87), INT8_C( -95), INT8_C( 111), INT8_C( 86), INT8_C( -49), INT8_C( -3), INT8_C( -39), INT8_C( 19), INT8_C( -3), INT8_C( -93)), simde_mm256_set_epi8(INT8_C( -50), INT8_C( 47), INT8_C( 110), INT8_C( 89), INT8_C( -98), INT8_C( -62), INT8_C( 100), INT8_C( -53), INT8_C( -92), INT8_C( -2), INT8_C( 91), INT8_C( -12), INT8_C( -28), INT8_C( -91), INT8_C( -27), INT8_C( 32), INT8_C(-110), INT8_C( -42), INT8_C( 73), INT8_C(-102), INT8_C(-122), INT8_C( 48), INT8_C( -4), INT8_C( -50), INT8_C( 9), INT8_C( -36), INT8_C( 102), INT8_C(-126), INT8_C( 34), INT8_C( 96), INT8_C( -57), INT8_C( 7)), simde_mm256_set_epi8(INT8_C( -88), INT8_C( 57), INT8_C( -36), INT8_C( 10), INT8_C( -93), INT8_C( 67), INT8_C( 35), INT8_C( -4), INT8_C( -82), INT8_C( 81), INT8_C( 18), INT8_C( -74), INT8_C( 119), INT8_C( 48), INT8_C( -50), INT8_C( -37), INT8_C( -85), INT8_C( -99), INT8_C( -94), INT8_C( 127), INT8_C(-126), INT8_C( 36), INT8_C( -87), INT8_C( 95), INT8_C( 111), INT8_C( -86), INT8_C( -49), INT8_C( 3), INT8_C( -39), INT8_C( 19), INT8_C( 3), INT8_C( -93)) }, { simde_mm256_set_epi8(INT8_C( 77), INT8_C( -76), INT8_C( 117), INT8_C( -63), INT8_C( -98), INT8_C( -23), INT8_C( 99), INT8_C( 28), INT8_C( 114), INT8_C( -99), INT8_C( 117), INT8_C( -16), INT8_C( -35), INT8_C( 79), INT8_C( -35), INT8_C( -40), INT8_C( 116), INT8_C( 97), INT8_C( 16), INT8_C( 32), INT8_C(-122), INT8_C( -2), INT8_C( 116), INT8_C( -53), INT8_C(-103), INT8_C( -52), INT8_C( 50), INT8_C( -27), INT8_C(-112), INT8_C( 115), INT8_C( -42), INT8_C( -87)), simde_mm256_set_epi8(INT8_C( 49), INT8_C( 101), INT8_C( -91), INT8_C( -11), INT8_C( 38), INT8_C( 110), INT8_C( 63), INT8_C( 118), INT8_C( 65), INT8_C( -80), INT8_C( 118), INT8_C( -75), INT8_C( 103), INT8_C( -76), INT8_C( 10), INT8_C( -66), INT8_C( 107), INT8_C(-120), INT8_C( 11), INT8_C( -38), INT8_C( -59), INT8_C( -40), INT8_C( -9), INT8_C( 123), INT8_C( -3), INT8_C( -44), INT8_C( -64), INT8_C( 26), INT8_C( -30), INT8_C( 14), INT8_C(-100), INT8_C( 22)), simde_mm256_set_epi8(INT8_C( 77), INT8_C( -76), INT8_C(-117), INT8_C( 63), INT8_C( -98), INT8_C( -23), INT8_C( 99), INT8_C( 28), INT8_C( 114), INT8_C( 99), INT8_C( 117), INT8_C( 16), INT8_C( -35), INT8_C( -79), INT8_C( -35), INT8_C( 40), INT8_C( 116), INT8_C( -97), INT8_C( 16), INT8_C( -32), INT8_C( 122), INT8_C( 2), INT8_C(-116), INT8_C( -53), INT8_C( 103), INT8_C( 52), INT8_C( -50), INT8_C( -27), INT8_C( 112), INT8_C( 115), INT8_C( 42), INT8_C( -87)) }, { simde_mm256_set_epi8(INT8_C( -77), INT8_C( 55), INT8_C( 69), INT8_C( 5), INT8_C( -56), INT8_C( 30), INT8_C( 123), INT8_C( 84), INT8_C( 88), INT8_C(-108), INT8_C( 47), INT8_C( -23), INT8_C( -58), INT8_C( -90), INT8_C( 18), INT8_C(-120), INT8_C( -15), INT8_C( 43), INT8_C( 102), INT8_C( 1), INT8_C( -38), INT8_C( 45), INT8_C( 120), INT8_C( 124), INT8_C( 39), INT8_C( 42), INT8_C(-106), INT8_C( 42), INT8_C( 50), INT8_C( 123), INT8_C( 83), INT8_C( -75)), simde_mm256_set_epi8(INT8_C( -99), INT8_C( 1), INT8_C( 103), INT8_C( -24), INT8_C( 77), INT8_C( 77), INT8_C( -55), INT8_C( 64), INT8_C( -51), INT8_C( 124), INT8_C( -74), INT8_C( 85), INT8_C( 26), INT8_C( -10), INT8_C( -19), INT8_C( -23), INT8_C( -11), INT8_C( 18), INT8_C(-116), INT8_C( 62), INT8_C( -42), INT8_C( -20), INT8_C( 104), INT8_C(-123), INT8_C(-113), INT8_C( 1), INT8_C( -68), INT8_C( -90), INT8_C( 1), INT8_C( -32), INT8_C( 1), INT8_C(-108)), simde_mm256_set_epi8(INT8_C( 77), INT8_C( 55), INT8_C( 69), INT8_C( -5), INT8_C( -56), INT8_C( 30), INT8_C(-123), INT8_C( 84), INT8_C( -88), INT8_C(-108), INT8_C( -47), INT8_C( -23), INT8_C( -58), INT8_C( 90), INT8_C( -18), INT8_C( 120), INT8_C( 15), INT8_C( 43), INT8_C(-102), INT8_C( 1), INT8_C( 38), INT8_C( -45), INT8_C( 120), INT8_C(-124), INT8_C( -39), INT8_C( 42), INT8_C( 106), INT8_C( -42), INT8_C( 50), INT8_C(-123), INT8_C( 83), INT8_C( 75)) }, { simde_mm256_set_epi8(INT8_C( 124), INT8_C( 74), INT8_C( -32), INT8_C( 30), INT8_C(-109), INT8_C( 52), INT8_C( 31), INT8_C( 94), INT8_C( -78), INT8_C( -59), INT8_C( -8), INT8_C( 92), INT8_C( 9), INT8_C( -91), INT8_C( -63), INT8_C( -72), INT8_C( -85), INT8_C( 1), INT8_C( 60), INT8_C( 84), INT8_C( 43), INT8_C( -36), INT8_C(-112), INT8_C( 6), INT8_C( -19), INT8_C(-123), INT8_C( 80), INT8_C( -53), INT8_C( -54), INT8_C( 94), INT8_C( 3), INT8_C( 112)), simde_mm256_set_epi8(INT8_C( -31), INT8_C( -40), INT8_C( -74), INT8_C( 49), INT8_C(-107), INT8_C(-106), INT8_C( 102), INT8_C( 9), INT8_C( 117), INT8_C( 90), INT8_C( 114), INT8_C( -61), INT8_C( 32), INT8_C( -8), INT8_C( -4), INT8_C( -57), INT8_C( 123), INT8_C(-125), INT8_C( 9), INT8_C(-128), INT8_C(-101), INT8_C( 1), INT8_C( 32), INT8_C( -63), INT8_C( 99), INT8_C( 41), INT8_C( 38), INT8_C(-106), INT8_C( -65), INT8_C( 58), INT8_C( -23), INT8_C( 120)), simde_mm256_set_epi8(INT8_C(-124), INT8_C( -74), INT8_C( 32), INT8_C( 30), INT8_C( 109), INT8_C( -52), INT8_C( 31), INT8_C( 94), INT8_C( -78), INT8_C( -59), INT8_C( -8), INT8_C( -92), INT8_C( 9), INT8_C( 91), INT8_C( 63), INT8_C( 72), INT8_C( -85), INT8_C( -1), INT8_C( 60), INT8_C( -84), INT8_C( -43), INT8_C( -36), INT8_C(-112), INT8_C( -6), INT8_C( -19), INT8_C(-123), INT8_C( 80), INT8_C( 53), INT8_C( 54), INT8_C( 94), INT8_C( -3), INT8_C( 112)) }, { simde_mm256_set_epi8(INT8_C( -49), INT8_C(-106), INT8_C( 23), INT8_C( 37), INT8_C( -87), INT8_C( 105), INT8_C( 66), INT8_C( -47), INT8_C(-100), INT8_C( -77), INT8_C(-123), INT8_C( -73), INT8_C( -4), INT8_C( -48), INT8_C( 97), INT8_C( -78), INT8_C( 21), INT8_C( 125), INT8_C(-128), INT8_C( 30), INT8_C( -87), INT8_C( 46), INT8_C( -21), INT8_C( -69), INT8_C( -4), INT8_C( 15), INT8_C( -17), INT8_C( -95), INT8_C( 7), INT8_C( 32), INT8_C( 93), INT8_C( -95)), simde_mm256_set_epi8(INT8_C( 27), INT8_C( 61), INT8_C( 26), INT8_C( 17), INT8_C( 110), INT8_C( 35), INT8_C( 70), INT8_C( -63), INT8_C( 51), INT8_C( -79), INT8_C( -47), INT8_C( 74), INT8_C( 44), INT8_C( 7), INT8_C( 125), INT8_C( -59), INT8_C( -26), INT8_C( -74), INT8_C( -55), INT8_C( -22), INT8_C( 18), INT8_C( -40), INT8_C( 33), INT8_C( 126), INT8_C(-127), INT8_C( -2), INT8_C(-125), INT8_C( 79), INT8_C( 113), INT8_C( 14), INT8_C( 113), INT8_C( 21)), simde_mm256_set_epi8(INT8_C( -49), INT8_C(-106), INT8_C( 23), INT8_C( 37), INT8_C( -87), INT8_C( 105), INT8_C( 66), INT8_C( 47), INT8_C(-100), INT8_C( 77), INT8_C( 123), INT8_C( -73), INT8_C( -4), INT8_C( -48), INT8_C( 97), INT8_C( 78), INT8_C( -21), INT8_C(-125), INT8_C(-128), INT8_C( -30), INT8_C( -87), INT8_C( -46), INT8_C( -21), INT8_C( -69), INT8_C( 4), INT8_C( -15), INT8_C( 17), INT8_C( -95), INT8_C( 7), INT8_C( 32), INT8_C( 93), INT8_C( -95)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_sign_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_sign_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi16(INT16_C(-31536), INT16_C( 2623), INT16_C(-21401), INT16_C( -3521), INT16_C( 21180), INT16_C( -4235), INT16_C(-20328), INT16_C( 9531), INT16_C( 30987), INT16_C( -4902), INT16_C( 9409), INT16_C(-22567), INT16_C( 29483), INT16_C(-29688), INT16_C(-17167), INT16_C( 9143)), simde_mm256_set_epi16(INT16_C( 16594), INT16_C( 26709), INT16_C(-16793), INT16_C( -4699), INT16_C(-29610), INT16_C( 13914), INT16_C(-32648), INT16_C( 32727), INT16_C( 14381), INT16_C(-15346), INT16_C( -810), INT16_C(-20141), INT16_C( 18497), INT16_C(-25233), INT16_C(-21401), INT16_C( -7233)), simde_mm256_set_epi16(INT16_C(-31536), INT16_C( 2623), INT16_C( 21401), INT16_C( 3521), INT16_C(-21180), INT16_C( -4235), INT16_C( 20328), INT16_C( 9531), INT16_C( 30987), INT16_C( 4902), INT16_C( -9409), INT16_C( 22567), INT16_C( 29483), INT16_C( 29688), INT16_C( 17167), INT16_C( -9143)) }, { simde_mm256_set_epi16(INT16_C( 31333), INT16_C( 14731), INT16_C( -5432), INT16_C( -1220), INT16_C( 28347), INT16_C( -7507), INT16_C( 12867), INT16_C(-21736), INT16_C( 16501), INT16_C( 6047), INT16_C( 22623), INT16_C( 18772), INT16_C( 11372), INT16_C(-18537), INT16_C( 14973), INT16_C( 5624)), simde_mm256_set_epi16(INT16_C(-13620), INT16_C( 27665), INT16_C(-15539), INT16_C(-17279), INT16_C( 14311), INT16_C(-11119), INT16_C( 3293), INT16_C( 29760), INT16_C( -8065), INT16_C(-23112), INT16_C(-23539), INT16_C( 25078), INT16_C(-30504), INT16_C( -9265), INT16_C( 31729), INT16_C( 25829)), simde_mm256_set_epi16(INT16_C(-31333), INT16_C( 14731), INT16_C( 5432), INT16_C( 1220), INT16_C( 28347), INT16_C( 7507), INT16_C( 12867), INT16_C(-21736), INT16_C(-16501), INT16_C( -6047), INT16_C(-22623), INT16_C( 18772), INT16_C(-11372), INT16_C( 18537), INT16_C( 14973), INT16_C( 5624)) }, { simde_mm256_set_epi16(INT16_C( 20632), INT16_C( 8608), INT16_C( 27079), INT16_C(-31399), INT16_C( 16081), INT16_C(-23329), INT16_C( 28972), INT16_C( 32213), INT16_C( 7381), INT16_C( 31373), INT16_C( 11592), INT16_C( 16982), INT16_C( -2799), INT16_C( 16604), INT16_C(-21766), INT16_C(-16353)), simde_mm256_set_epi16(INT16_C( 16835), INT16_C( 21467), INT16_C(-25504), INT16_C( 23272), INT16_C( 22254), INT16_C(-15726), INT16_C(-27142), INT16_C( 2554), INT16_C( 20665), INT16_C(-20609), INT16_C( -5134), INT16_C( 6994), INT16_C(-32186), INT16_C( 4789), INT16_C( 32409), INT16_C(-27788)), simde_mm256_set_epi16(INT16_C( 20632), INT16_C( 8608), INT16_C(-27079), INT16_C(-31399), INT16_C( 16081), INT16_C( 23329), INT16_C(-28972), INT16_C( 32213), INT16_C( 7381), INT16_C(-31373), INT16_C(-11592), INT16_C( 16982), INT16_C( 2799), INT16_C( 16604), INT16_C(-21766), INT16_C( 16353)) }, { simde_mm256_set_epi16(INT16_C( 22585), INT16_C( -9206), INT16_C( 23997), INT16_C( 8964), INT16_C( 21167), INT16_C( 4682), INT16_C(-30256), INT16_C( 13019), INT16_C( 21859), INT16_C(-23935), INT16_C( 32292), INT16_C( 22433), INT16_C( 28502), INT16_C(-12291), INT16_C( -9965), INT16_C( -605)), simde_mm256_set_epi16(INT16_C(-12753), INT16_C( 28249), INT16_C(-24894), INT16_C( 25803), INT16_C(-23298), INT16_C( 23540), INT16_C( -7003), INT16_C( -6880), INT16_C(-27946), INT16_C( 18842), INT16_C(-31184), INT16_C( -818), INT16_C( 2524), INT16_C( 26242), INT16_C( 8800), INT16_C(-14585)), simde_mm256_set_epi16(INT16_C(-22585), INT16_C( -9206), INT16_C(-23997), INT16_C( 8964), INT16_C(-21167), INT16_C( 4682), INT16_C( 30256), INT16_C(-13019), INT16_C(-21859), INT16_C(-23935), INT16_C(-32292), INT16_C(-22433), INT16_C( 28502), INT16_C(-12291), INT16_C( -9965), INT16_C( 605)) }, { simde_mm256_set_epi16(INT16_C( 19892), INT16_C( 30145), INT16_C(-24855), INT16_C( 25372), INT16_C( 29341), INT16_C( 30192), INT16_C( -8881), INT16_C( -8744), INT16_C( 29793), INT16_C( 4128), INT16_C(-30978), INT16_C( 29899), INT16_C(-26164), INT16_C( 13029), INT16_C(-28557), INT16_C(-10583)), simde_mm256_set_epi16(INT16_C( 12645), INT16_C(-23051), INT16_C( 9838), INT16_C( 16246), INT16_C( 16816), INT16_C( 30389), INT16_C( 26548), INT16_C( 2750), INT16_C( 27528), INT16_C( 3034), INT16_C(-14888), INT16_C( -2181), INT16_C( -556), INT16_C(-16358), INT16_C( -7666), INT16_C(-25578)), simde_mm256_set_epi16(INT16_C( 19892), INT16_C(-30145), INT16_C(-24855), INT16_C( 25372), INT16_C( 29341), INT16_C( 30192), INT16_C( -8881), INT16_C( -8744), INT16_C( 29793), INT16_C( 4128), INT16_C( 30978), INT16_C(-29899), INT16_C( 26164), INT16_C(-13029), INT16_C( 28557), INT16_C( 10583)) }, { simde_mm256_set_epi16(INT16_C(-19657), INT16_C( 17669), INT16_C(-14306), INT16_C( 31572), INT16_C( 22676), INT16_C( 12265), INT16_C(-14682), INT16_C( 4744), INT16_C( -3797), INT16_C( 26113), INT16_C( -9683), INT16_C( 30844), INT16_C( 10026), INT16_C(-27094), INT16_C( 12923), INT16_C( 21429)), simde_mm256_set_epi16(INT16_C(-25343), INT16_C( 26600), INT16_C( 19789), INT16_C(-14016), INT16_C(-12932), INT16_C(-18859), INT16_C( 6902), INT16_C( -4631), INT16_C( -2798), INT16_C(-29634), INT16_C(-10516), INT16_C( 26757), INT16_C(-28927), INT16_C(-17242), INT16_C( 480), INT16_C( 404)), simde_mm256_set_epi16(INT16_C( 19657), INT16_C( 17669), INT16_C(-14306), INT16_C(-31572), INT16_C(-22676), INT16_C(-12265), INT16_C(-14682), INT16_C( -4744), INT16_C( 3797), INT16_C(-26113), INT16_C( 9683), INT16_C( 30844), INT16_C(-10026), INT16_C( 27094), INT16_C( 12923), INT16_C( 21429)) }, { simde_mm256_set_epi16(INT16_C( 31818), INT16_C( -8162), INT16_C(-27852), INT16_C( 8030), INT16_C(-19771), INT16_C( -1956), INT16_C( 2469), INT16_C(-15944), INT16_C(-21759), INT16_C( 15444), INT16_C( 11228), INT16_C(-28666), INT16_C( -4731), INT16_C( 20683), INT16_C(-13730), INT16_C( 880)), simde_mm256_set_epi16(INT16_C( -7720), INT16_C(-18895), INT16_C(-27242), INT16_C( 26121), INT16_C( 30042), INT16_C( 29379), INT16_C( 8440), INT16_C( -825), INT16_C( 31619), INT16_C( 2432), INT16_C(-25855), INT16_C( 8385), INT16_C( 25385), INT16_C( 9878), INT16_C(-16582), INT16_C( -5768)), simde_mm256_set_epi16(INT16_C(-31818), INT16_C( 8162), INT16_C( 27852), INT16_C( 8030), INT16_C(-19771), INT16_C( -1956), INT16_C( 2469), INT16_C( 15944), INT16_C(-21759), INT16_C( 15444), INT16_C(-11228), INT16_C(-28666), INT16_C( -4731), INT16_C( 20683), INT16_C( 13730), INT16_C( -880)) }, { simde_mm256_set_epi16(INT16_C(-12394), INT16_C( 5925), INT16_C(-22167), INT16_C( 17105), INT16_C(-25421), INT16_C(-31305), INT16_C( -816), INT16_C( 25010), INT16_C( 5501), INT16_C(-32738), INT16_C(-22226), INT16_C( -5189), INT16_C( -1009), INT16_C( -4191), INT16_C( 1824), INT16_C( 23969)), simde_mm256_set_epi16(INT16_C( 6973), INT16_C( 6673), INT16_C( 28195), INT16_C( 18113), INT16_C( 13233), INT16_C(-11958), INT16_C( 11271), INT16_C( 32197), INT16_C( -6474), INT16_C(-13846), INT16_C( 4824), INT16_C( 8574), INT16_C(-32258), INT16_C(-31921), INT16_C( 28942), INT16_C( 28949)), simde_mm256_set_epi16(INT16_C(-12394), INT16_C( 5925), INT16_C(-22167), INT16_C( 17105), INT16_C(-25421), INT16_C( 31305), INT16_C( -816), INT16_C( 25010), INT16_C( -5501), INT16_C( 32738), INT16_C(-22226), INT16_C( -5189), INT16_C( 1009), INT16_C( 4191), INT16_C( 1824), INT16_C( 23969)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_sign_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_sign_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C(-2066740673), INT32_C(-1402473921), INT32_C( 1388113781), INT32_C(-1332206277), INT32_C( 2030824666), INT32_C( 616671193), INT32_C( 1932233736), INT32_C(-1125047369)), simde_mm256_set_epi32(INT32_C( 1087531093), INT32_C(-1100485211), INT32_C(-1940507046), INT32_C(-2139586601), INT32_C( 942523406), INT32_C( -53038765), INT32_C( 1212259695), INT32_C(-1402477633)), simde_mm256_set_epi32(INT32_C(-2066740673), INT32_C( 1402473921), INT32_C(-1388113781), INT32_C( 1332206277), INT32_C( 2030824666), INT32_C( -616671193), INT32_C( 1932233736), INT32_C( 1125047369)) }, { simde_mm256_set_epi32(INT32_C( 2053454219), INT32_C( -355927236), INT32_C( 1857807021), INT32_C( 843295512), INT32_C( 1081415583), INT32_C( 1482639700), INT32_C( 745322391), INT32_C( 981276152)), simde_mm256_set_epi32(INT32_C( -892572655), INT32_C(-1018315647), INT32_C( 937940113), INT32_C( 215839808), INT32_C( -528505416), INT32_C(-1542626826), INT32_C(-1999053873), INT32_C( 2079417573)), simde_mm256_set_epi32(INT32_C(-2053454219), INT32_C( 355927236), INT32_C( 1857807021), INT32_C( 843295512), INT32_C(-1081415583), INT32_C(-1482639700), INT32_C( -745322391), INT32_C( 981276152)) }, { simde_mm256_set_epi32(INT32_C( 1352147360), INT32_C( 1774683481), INT32_C( 1053926623), INT32_C( 1898741205), INT32_C( 483752589), INT32_C( 759710294), INT32_C( -183418660), INT32_C(-1426407393)), simde_mm256_set_epi32(INT32_C( 1103320027), INT32_C(-1671406872), INT32_C( 1458487954), INT32_C(-1778775558), INT32_C( 1354346367), INT32_C( -336454830), INT32_C(-2109336907), INT32_C( 2123993972)), simde_mm256_set_epi32(INT32_C( 1352147360), INT32_C(-1774683481), INT32_C( 1053926623), INT32_C(-1898741205), INT32_C( 483752589), INT32_C( -759710294), INT32_C( 183418660), INT32_C(-1426407393)) }, { simde_mm256_set_epi32(INT32_C( 1480186890), INT32_C( 1572676356), INT32_C( 1387205194), INT32_C(-1982844197), INT32_C( 1432593025), INT32_C( 2116310945), INT32_C( 1867960317), INT32_C( -653001309)), simde_mm256_set_epi32(INT32_C( -835752359), INT32_C(-1631427381), INT32_C(-1526834188), INT32_C( -458889952), INT32_C(-1831450214), INT32_C(-2043609906), INT32_C( 165439106), INT32_C( 576767751)), simde_mm256_set_epi32(INT32_C(-1480186890), INT32_C(-1572676356), INT32_C(-1387205194), INT32_C( 1982844197), INT32_C(-1432593025), INT32_C(-2116310945), INT32_C( 1867960317), INT32_C( -653001309)) }, { simde_mm256_set_epi32(INT32_C( 1303672257), INT32_C(-1628871908), INT32_C( 1922921968), INT32_C( -581968424), INT32_C( 1952518176), INT32_C(-2030144309), INT32_C(-1714670875), INT32_C(-1871456599)), simde_mm256_set_epi32(INT32_C( 828745205), INT32_C( 644759414), INT32_C( 1102083765), INT32_C( 1739852478), INT32_C( 1804078042), INT32_C( -975636613), INT32_C( -36388838), INT32_C( -502359018)), simde_mm256_set_epi32(INT32_C( 1303672257), INT32_C(-1628871908), INT32_C( 1922921968), INT32_C( -581968424), INT32_C( 1952518176), INT32_C( 2030144309), INT32_C( 1714670875), INT32_C( 1871456599)) }, { simde_mm256_set_epi32(INT32_C(-1288223483), INT32_C( -937526444), INT32_C( 1486106601), INT32_C( -962194808), INT32_C( -248814079), INT32_C( -634554244), INT32_C( 657102378), INT32_C( 846943157)), simde_mm256_set_epi32(INT32_C(-1660852248), INT32_C( 1296943424), INT32_C( -847464875), INT32_C( 452390377), INT32_C( -183333826), INT32_C( -689149819), INT32_C(-1895711578), INT32_C( 31457684)), simde_mm256_set_epi32(INT32_C( 1288223483), INT32_C( -937526444), INT32_C(-1486106601), INT32_C( -962194808), INT32_C( 248814079), INT32_C( 634554244), INT32_C( -657102378), INT32_C( 846943157)) }, { simde_mm256_set_epi32(INT32_C( 2085281822), INT32_C(-1825300642), INT32_C(-1295648676), INT32_C( 161857976), INT32_C(-1425982380), INT32_C( 735875078), INT32_C( -310030133), INT32_C( -899808400)), simde_mm256_set_epi32(INT32_C( -505891279), INT32_C(-1785305591), INT32_C( 1968861891), INT32_C( 553188551), INT32_C( 2072185216), INT32_C(-1694424895), INT32_C( 1663641238), INT32_C(-1086658184)), simde_mm256_set_epi32(INT32_C(-2085281822), INT32_C( 1825300642), INT32_C(-1295648676), INT32_C( 161857976), INT32_C(-1425982380), INT32_C( -735875078), INT32_C( -310030133), INT32_C( 899808400)) }, { simde_mm256_set_epi32(INT32_C( -812247259), INT32_C(-1452719407), INT32_C(-1665956425), INT32_C( -53452366), INT32_C( 360546334), INT32_C(-1456542789), INT32_C( -66064479), INT32_C( 119561633)), simde_mm256_set_epi32(INT32_C( 456989201), INT32_C( 1847805633), INT32_C( 867291466), INT32_C( 738688453), INT32_C( -424228374), INT32_C( 316154238), INT32_C(-2114026673), INT32_C( 1896771861)), simde_mm256_set_epi32(INT32_C( -812247259), INT32_C(-1452719407), INT32_C(-1665956425), INT32_C( -53452366), INT32_C( -360546334), INT32_C(-1456542789), INT32_C( 66064479), INT32_C( 119561633)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_sign_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_sll_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[16]; const int64_t count[2]; const int16_t r[16]; } test_vec[] = { { { -INT16_C( 30967), -INT16_C( 15643), INT16_C( 9802), INT16_C( 10151), INT16_C( 21287), INT16_C( 10197), INT16_C( 23843), -INT16_C( 11577), -INT16_C( 30272), INT16_C( 28806), INT16_C( 2696), INT16_C( 16693), INT16_C( 19713), -INT16_C( 3783), -INT16_C( 24678), -INT16_C( 23756) }, { INT64_C( 6), INT64_C( 8) }, { -INT16_C( 15808), -INT16_C( 18112), -INT16_C( 28032), -INT16_C( 5696), -INT16_C( 13888), -INT16_C( 2752), INT16_C( 18624), -INT16_C( 20032), INT16_C( 28672), INT16_C( 8576), -INT16_C( 24064), INT16_C( 19776), INT16_C( 16448), INT16_C( 20032), -INT16_C( 6528), -INT16_C( 13056) } }, { { -INT16_C( 8994), INT16_C( 26363), INT16_C( 12774), -INT16_C( 6233), -INT16_C( 7810), INT16_C( 6361), INT16_C( 3456), -INT16_C( 22596), INT16_C( 8486), INT16_C( 25880), -INT16_C( 20434), -INT16_C( 28724), INT16_C( 22813), -INT16_C( 6381), INT16_C( 27054), -INT16_C( 29582) }, { INT64_C( 1), INT64_C( 1) }, { -INT16_C( 17988), -INT16_C( 12810), INT16_C( 25548), -INT16_C( 12466), -INT16_C( 15620), INT16_C( 12722), INT16_C( 6912), INT16_C( 20344), INT16_C( 16972), -INT16_C( 13776), INT16_C( 24668), INT16_C( 8088), -INT16_C( 19910), -INT16_C( 12762), -INT16_C( 11428), INT16_C( 6372) } }, { { -INT16_C( 17901), INT16_C( 16772), INT16_C( 20586), -INT16_C( 30768), -INT16_C( 7255), INT16_C( 22382), -INT16_C( 7860), -INT16_C( 28189), -INT16_C( 10929), -INT16_C( 4420), -INT16_C( 12433), -INT16_C( 5877), INT16_C( 16570), -INT16_C( 19484), -INT16_C( 31182), INT16_C( 17873) }, { INT64_C( 4), INT64_C( 13) }, { -INT16_C( 24272), INT16_C( 6208), INT16_C( 1696), INT16_C( 32000), INT16_C( 14992), INT16_C( 30432), INT16_C( 5312), INT16_C( 7728), INT16_C( 21744), -INT16_C( 5184), -INT16_C( 2320), -INT16_C( 28496), INT16_C( 2976), INT16_C( 15936), INT16_C( 25376), INT16_C( 23824) } }, { { -INT16_C( 10913), -INT16_C( 12610), -INT16_C( 13916), INT16_C( 24248), -INT16_C( 25591), INT16_C( 15121), -INT16_C( 7389), INT16_C( 25472), INT16_C( 1848), -INT16_C( 8690), INT16_C( 16222), -INT16_C( 26579), -INT16_C( 11297), INT16_C( 24607), INT16_C( 14429), -INT16_C( 17104) }, { INT64_C( 12), INT64_C( 3) }, { -INT16_C( 4096), -INT16_C( 8192), INT16_C( 16384), INT16_MIN, -INT16_C( 28672), INT16_C( 4096), INT16_C( 12288), INT16_C( 0), INT16_MIN, -INT16_C( 8192), -INT16_C( 8192), -INT16_C( 12288), -INT16_C( 4096), -INT16_C( 4096), -INT16_C( 12288), INT16_C( 0) } }, { { INT16_C( 29828), -INT16_C( 7654), INT16_C( 18356), -INT16_C( 27782), -INT16_C( 26085), INT16_C( 30964), INT16_C( 9426), -INT16_C( 8395), -INT16_C( 16109), -INT16_C( 13680), -INT16_C( 24828), -INT16_C( 7029), -INT16_C( 30528), -INT16_C( 15129), INT16_C( 19973), -INT16_C( 30463) }, { INT64_C( 7), INT64_C( 3) }, { INT16_C( 16896), INT16_C( 3328), -INT16_C( 9728), -INT16_C( 17152), INT16_C( 3456), INT16_C( 31232), INT16_C( 26880), -INT16_C( 25984), -INT16_C( 30336), INT16_C( 18432), -INT16_C( 32256), INT16_C( 17792), INT16_C( 24576), INT16_C( 29568), INT16_C( 640), -INT16_C( 32640) } }, { { -INT16_C( 16147), -INT16_C( 3840), -INT16_C( 29856), INT16_C( 8406), -INT16_C( 17133), INT16_C( 6373), -INT16_C( 6645), -INT16_C( 12639), INT16_C( 3073), INT16_C( 25668), INT16_C( 20210), INT16_C( 29154), -INT16_C( 9908), INT16_C( 28611), -INT16_C( 3323), -INT16_C( 3420) }, { INT64_C( 4), INT64_C( 0) }, { INT16_C( 3792), INT16_C( 4096), -INT16_C( 18944), INT16_C( 3424), -INT16_C( 11984), -INT16_C( 29104), INT16_C( 24752), -INT16_C( 5616), -INT16_C( 16368), INT16_C( 17472), -INT16_C( 4320), INT16_C( 7712), -INT16_C( 27456), -INT16_C( 976), INT16_C( 12368), INT16_C( 10816) } }, { { -INT16_C( 27382), -INT16_C( 923), INT16_C( 18659), INT16_C( 12397), INT16_C( 12321), INT16_C( 9887), INT16_C( 17188), -INT16_C( 10216), -INT16_C( 1048), INT16_C( 6380), INT16_C( 8373), INT16_C( 11355), -INT16_C( 18630), INT16_C( 14766), -INT16_C( 75), -INT16_C( 16582) }, { INT64_C( 0), INT64_C( 7) }, { -INT16_C( 27382), -INT16_C( 923), INT16_C( 18659), INT16_C( 12397), INT16_C( 12321), INT16_C( 9887), INT16_C( 17188), -INT16_C( 10216), -INT16_C( 1048), INT16_C( 6380), INT16_C( 8373), INT16_C( 11355), -INT16_C( 18630), INT16_C( 14766), -INT16_C( 75), -INT16_C( 16582) } }, { { INT16_C( 16706), -INT16_C( 2166), -INT16_C( 6815), -INT16_C( 25821), -INT16_C( 11619), INT16_C( 21205), INT16_C( 4049), INT16_C( 25873), -INT16_C( 13137), -INT16_C( 26660), -INT16_C( 31500), INT16_C( 19872), -INT16_C( 12342), INT16_C( 21706), INT16_C( 7958), INT16_C( 22982) }, { INT64_C( 13), INT64_C( 15) }, { INT16_C( 16384), INT16_C( 16384), INT16_C( 8192), INT16_C( 24576), -INT16_C( 24576), -INT16_C( 24576), INT16_C( 8192), INT16_C( 8192), -INT16_C( 8192), INT16_MIN, INT16_MIN, INT16_C( 0), INT16_C( 16384), INT16_C( 16384), -INT16_C( 16384), -INT16_C( 16384) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi16(test_vec[i].a); simde__m128i count = simde_x_mm_loadu_epi64(test_vec[i].count); simde__m256i r = simde_mm256_sll_epi16(a, count); simde_test_x86_assert_equal_i16x16(r, simde_x_mm256_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm256_sll_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[8]; const int64_t count[2]; const int32_t r[8]; } test_vec[] = { { { -INT32_C( 42405030), INT32_C( 2070762355), -INT32_C( 707236265), INT32_C( 839284725), INT32_C( 514773574), INT32_C( 579134346), INT32_C( 416515106), INT32_C( 1106560486) }, { INT64_C( 2), INT64_C( 7) }, { -INT32_C( 169620120), -INT32_C( 306885172), INT32_C( 1466022236), -INT32_C( 937828396), INT32_C( 2059094296), -INT32_C( 1978429912), INT32_C( 1666060424), INT32_C( 131274648) } }, { { INT32_C( 1331425221), -INT32_C( 177086510), INT32_C( 1242383459), -INT32_C( 1031077367), INT32_C( 787335534), -INT32_C( 1924819852), INT32_C( 194785556), -INT32_C( 381100764) }, { INT64_C( 28), INT64_C( 2) }, { INT32_C( 1342177280), INT32_C( 536870912), INT32_C( 805306368), -INT32_C( 1879048192), -INT32_C( 536870912), INT32_C( 1073741824), INT32_C( 1073741824), INT32_C( 1073741824) } }, { { -INT32_C( 113203067), INT32_C( 1350993212), -INT32_C( 614784330), -INT32_C( 859527937), -INT32_C( 882115256), -INT32_C( 1749876824), INT32_C( 1150280608), INT32_C( 609634975) }, { INT64_C( 26), INT64_C( 24) }, { INT32_C( 335544320), -INT32_C( 268435456), -INT32_C( 671088640), -INT32_C( 67108864), INT32_C( 536870912), -INT32_C( 1610612736), INT32_MIN, INT32_C( 2080374784) } }, { { INT32_C( 378731886), -INT32_C( 1666366212), -INT32_C( 941605592), INT32_C( 2045523591), -INT32_C( 408483380), INT32_C( 1958291117), INT32_C( 2134533377), INT32_C( 1162267862) }, { INT64_C( 8), INT64_C( 8) }, { -INT32_C( 1828884992), -INT32_C( 1387987968), -INT32_C( 532862976), -INT32_C( 331970816), -INT32_C( 1492530176), -INT32_C( 1188647680), INT32_C( 979697920), INT32_C( 1187829248) } }, { { -INT32_C( 1356010239), INT32_C( 354673940), INT32_C( 563371339), -INT32_C( 228140503), -INT32_C( 810040654), INT32_C( 286464203), -INT32_C( 740417702), INT32_C( 421012503) }, { INT64_C( 14), INT64_C( 11) }, { INT32_C( 994066432), -INT32_C( 112918528), INT32_C( 391299072), -INT32_C( 1232453632), -INT32_C( 257130496), -INT32_C( 969752576), -INT32_C( 2015985664), INT32_C( 151371776) } }, { { -INT32_C( 1976886593), -INT32_C( 1919205838), INT32_C( 1969256798), -INT32_C( 1366394723), -INT32_C( 422291780), -INT32_C( 1990521279), -INT32_C( 2013596963), -INT32_C( 1360830993) }, { INT64_C( 21), INT64_C( 25) }, { INT32_C( 1474297856), -INT32_C( 968884224), INT32_C( 734003200), INT32_C( 329252864), -INT32_C( 679477248), -INT32_C( 937426944), INT32_C( 1537212416), -INT32_C( 1109393408) } }, { { INT32_C( 1186097925), INT32_C( 1473252730), -INT32_C( 287388929), -INT32_C( 1617116888), INT32_C( 474600655), -INT32_C( 205094745), INT32_C( 652011031), -INT32_C( 1695386475) }, { INT64_C( 22), INT64_C( 4) }, { -INT32_C( 1052770304), INT32_C( 1585446912), -INT32_C( 1077936128), INT32_C( 1241513984), INT32_C( 868220928), INT32_C( 700448768), -INT32_C( 2051014656), INT32_C( 624951296) } }, { { -INT32_C( 1203991536), INT32_C( 615187212), INT32_C( 2118813929), -INT32_C( 619168516), -INT32_C( 1841956384), INT32_C( 608429737), -INT32_C( 959978891), INT32_C( 803670558) }, { INT64_C( 2), INT64_C( 5) }, { -INT32_C( 520998848), -INT32_C( 1834218448), -INT32_C( 114678876), INT32_C( 1818293232), INT32_C( 1222109056), -INT32_C( 1861248348), INT32_C( 455051732), -INT32_C( 1080285064) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi32(test_vec[i].a); simde__m128i count = simde_x_mm_loadu_epi64(test_vec[i].count); simde__m256i r = simde_mm256_sll_epi32(a, count); simde_test_x86_assert_equal_i32x8(r, simde_x_mm256_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm256_sll_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t a[4]; const int64_t count[2]; const int64_t r[4]; } test_vec[] = { { { INT64_C( 6353669260759125320), -INT64_C( 3799575433940554765), -INT64_C( 3221953462652208759), -INT64_C( 5994917397228281305) }, { INT64_C( 24), INT64_C( 54) }, { -INT64_C( 8679599983080505344), INT64_C( 5051539236809867264), INT64_C( 7311530745351110656), INT64_C( 7187857302152544256) } }, { { -INT64_C( 2273019595137522117), INT64_C( 3805099269717809854), -INT64_C( 582719818775460585), INT64_C( 7138324329339315939) }, { INT64_C( 47), INT64_C( 36) }, { -INT64_C( 5540131229107486720), INT64_C( 7881017872921657344), -INT64_C( 9039991089527783424), INT64_C( 3706884705790984192) } }, { { INT64_C( 2023522753362425524), INT64_C( 2373503989335581029), -INT64_C( 4790959826946689851), -INT64_C( 4167002754303171549) }, { INT64_C( 41), INT64_C( 6) }, { INT64_C( 3237358056358543360), -INT64_C( 6646527998696620032), INT64_C( 6478861271553802240), INT64_C( 6829785800721301504) } }, { { INT64_C( 3260993203488027417), INT64_C( 1269439804849385668), INT64_C( 5917508330730193206), -INT64_C( 7956805592445070552) }, { INT64_C( 37), INT64_C( 55) }, { -INT64_C( 7224227763165593600), -INT64_C( 486924771674554368), INT64_C( 1355626093914095616), INT64_C( 8001318738651512832) } }, { { INT64_C( 1465206910307710346), INT64_C( 4671115228599013065), -INT64_C( 8577132701579089518), -INT64_C( 639029091708279909) }, { INT64_C( 45), INT64_C( 43) }, { -INT64_C( 2976527509971009536), -INT64_C( 443287903947194368), -INT64_C( 4813714682961461248), -INT64_C( 3714519714534260736) } }, { { INT64_C( 7745356627688155572), INT64_C( 3877828844525065068), INT64_C( 4246041348744727368), INT64_C( 7187132229681079538) }, { INT64_C( 16), INT64_C( 47) }, { INT64_C( 635275905231749120), -INT64_C( 3401948701828317184), -INT64_C( 568520574133338112), -INT64_C( 3265373720462360576) } }, { { INT64_C( 6705542549428466616), -INT64_C( 8947198048595020349), -INT64_C( 1652251508452938248), -INT64_C( 3179399383378874708) }, { INT64_C( 29), INT64_C( 19) }, { INT64_C( 8835620976327655424), -INT64_C( 1275188604754198528), INT64_C( 4170867338423304192), INT64_C( 7229233794509701120) } }, { { -INT64_C( 5199636620313426708), -INT64_C( 1708347553783123364), INT64_C( 5888461629188206990), INT64_C( 5606339840074555950) }, { INT64_C( 28), INT64_C( 39) }, { -INT64_C( 5849700588702400512), INT64_C( 3998518232565678080), INT64_C( 8364450152747892736), INT64_C( 1737327477719564288) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi64(test_vec[i].a); simde__m128i count = simde_x_mm_loadu_epi64(test_vec[i].count); simde__m256i r = simde_mm256_sll_epi64(a, count); simde_test_x86_assert_equal_i64x4(r, simde_x_mm256_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm256_slli_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i r; } test_vec[] = { { simde_mm256_set_epi16(INT16_C( -7189), INT16_C( -4038), INT16_C( 8832), INT16_C(-31599), INT16_C( 9071), INT16_C(-26166), INT16_C( 4984), INT16_C(-29916), INT16_C( 26692), INT16_C( 7557), INT16_C(-30970), INT16_C(-31903), INT16_C( 32721), INT16_C( 14732), INT16_C( -9072), INT16_C(-28469)), simde_mm256_set_epi16(INT16_C( -7189), INT16_C( -4038), INT16_C( 8832), INT16_C(-31599), INT16_C( 9071), INT16_C(-26166), INT16_C( 4984), INT16_C(-29916), INT16_C( 26692), INT16_C( 7557), INT16_C(-30970), INT16_C(-31903), INT16_C( 32721), INT16_C( 14732), INT16_C( -9072), INT16_C(-28469)) }, { simde_mm256_set_epi16(INT16_C( 27658), INT16_C(-18253), INT16_C( 19135), INT16_C(-22021), INT16_C(-31750), INT16_C( 26403), INT16_C(-14265), INT16_C( -1890), INT16_C( 16323), INT16_C( -5176), INT16_C(-29730), INT16_C( -7646), INT16_C(-24362), INT16_C( 28979), INT16_C( 13248), INT16_C( 6628)), simde_mm256_set_epi16(INT16_C(-10220), INT16_C( 29030), INT16_C(-27266), INT16_C( 21494), INT16_C( 2036), INT16_C(-12730), INT16_C(-28530), INT16_C( -3780), INT16_C( 32646), INT16_C(-10352), INT16_C( 6076), INT16_C(-15292), INT16_C( 16812), INT16_C( -7578), INT16_C( 26496), INT16_C( 13256)) }, { simde_mm256_set_epi16(INT16_C(-18549), INT16_C( -1433), INT16_C( 15072), INT16_C( 28665), INT16_C(-22672), INT16_C( 30426), INT16_C(-11873), INT16_C(-28559), INT16_C( 26366), INT16_C( 20007), INT16_C( 14676), INT16_C( -4391), INT16_C(-13223), INT16_C(-20349), INT16_C(-14071), INT16_C( 6326)), simde_mm256_set_epi16(INT16_C( -8660), INT16_C( -5732), INT16_C( -5248), INT16_C(-16412), INT16_C(-25152), INT16_C( -9368), INT16_C( 18044), INT16_C( 16836), INT16_C(-25608), INT16_C( 14492), INT16_C( -6832), INT16_C(-17564), INT16_C( 12644), INT16_C(-15860), INT16_C( 9252), INT16_C( 25304)) }, { simde_mm256_set_epi16(INT16_C(-15802), INT16_C( 1115), INT16_C(-15134), INT16_C(-24432), INT16_C( 29893), INT16_C( 28473), INT16_C( 18873), INT16_C( 20173), INT16_C( 191), INT16_C( 32281), INT16_C( 13007), INT16_C( 21879), INT16_C( 31137), INT16_C(-25345), INT16_C( 8960), INT16_C( 9998)), simde_mm256_set_epi16(INT16_C( 4656), INT16_C( 8920), INT16_C( 10000), INT16_C( 1152), INT16_C(-23000), INT16_C( 31176), INT16_C( 19912), INT16_C( 30312), INT16_C( 1528), INT16_C( -3896), INT16_C(-27016), INT16_C(-21576), INT16_C(-13048), INT16_C( -6152), INT16_C( 6144), INT16_C( 14448)) }, { simde_mm256_set_epi16(INT16_C(-11235), INT16_C( 12162), INT16_C(-25050), INT16_C(-16691), INT16_C(-21645), INT16_C(-11525), INT16_C( 24581), INT16_C( 20473), INT16_C( 17987), INT16_C( 13992), INT16_C( 21082), INT16_C( 9896), INT16_C( 20946), INT16_C( -338), INT16_C( -890), INT16_C( 3047)), simde_mm256_set_epi16(INT16_C( 16848), INT16_C( -2016), INT16_C( -7584), INT16_C( -4912), INT16_C(-18640), INT16_C( 12208), INT16_C( 80), INT16_C( -112), INT16_C( 25648), INT16_C( 27264), INT16_C( 9632), INT16_C( 27264), INT16_C( 7456), INT16_C( -5408), INT16_C(-14240), INT16_C(-16784)) }, { simde_mm256_set_epi16(INT16_C( 4232), INT16_C( 9528), INT16_C( 5625), INT16_C( -2056), INT16_C( -5147), INT16_C(-18162), INT16_C(-12026), INT16_C(-23947), INT16_C( 19702), INT16_C( 18533), INT16_C( 17268), INT16_C( -7285), INT16_C(-25540), INT16_C( 15541), INT16_C(-16480), INT16_C(-30543)), simde_mm256_set_epi16(INT16_C( 4352), INT16_C(-22784), INT16_C(-16608), INT16_C( -256), INT16_C( 31904), INT16_C( 8640), INT16_C( 8384), INT16_C( 20128), INT16_C(-24896), INT16_C( 3232), INT16_C( 28288), INT16_C( 29024), INT16_C(-30848), INT16_C(-26976), INT16_C( -3072), INT16_C( 5664)) }, { simde_mm256_set_epi16(INT16_C( 21401), INT16_C(-26197), INT16_C(-15084), INT16_C( 9940), INT16_C(-15195), INT16_C( 26408), INT16_C( 31953), INT16_C( 9654), INT16_C(-16569), INT16_C( 29048), INT16_C(-29616), INT16_C( 6990), INT16_C( 2847), INT16_C(-28587), INT16_C( 20626), INT16_C( 4687)), simde_mm256_set_epi16(INT16_C( -6592), INT16_C( 27328), INT16_C( 17664), INT16_C(-19200), INT16_C( 10560), INT16_C(-13824), INT16_C( 13376), INT16_C( 28032), INT16_C(-11840), INT16_C( 24064), INT16_C( 5120), INT16_C(-11392), INT16_C(-14400), INT16_C( 5440), INT16_C( 9344), INT16_C(-27712)) }, { simde_mm256_set_epi16(INT16_C( -7756), INT16_C( -3283), INT16_C(-22803), INT16_C(-16163), INT16_C( 5629), INT16_C( 29150), INT16_C( -9667), INT16_C( 12335), INT16_C(-18839), INT16_C(-10112), INT16_C(-18195), INT16_C(-15969), INT16_C(-29495), INT16_C( 29894), INT16_C(-13459), INT16_C( 16744)), simde_mm256_set_epi16(INT16_C( -9728), INT16_C(-27008), INT16_C( 30336), INT16_C( 28288), INT16_C( -384), INT16_C( -4352), INT16_C( 7808), INT16_C( 6016), INT16_C( 13440), INT16_C( 16384), INT16_C( 30336), INT16_C(-12416), INT16_C( 25728), INT16_C( 25344), INT16_C(-18816), INT16_C(-19456)) }, { simde_mm256_set_epi16(INT16_C(-28557), INT16_C(-10724), INT16_C( 2722), INT16_C(-10967), INT16_C( 5210), INT16_C( 29322), INT16_C( 15248), INT16_C( -8080), INT16_C(-19889), INT16_C( 30125), INT16_C( 14116), INT16_C(-29183), INT16_C( 11578), INT16_C(-15372), INT16_C(-24444), INT16_C( 10332)), simde_mm256_set_epi16(INT16_C( 29440), INT16_C( 7168), INT16_C(-24064), INT16_C( 10496), INT16_C( 23040), INT16_C(-30208), INT16_C(-28672), INT16_C( 28672), INT16_C( 20224), INT16_C(-21248), INT16_C( 9216), INT16_C( 256), INT16_C( 14848), INT16_C( -3072), INT16_C(-31744), INT16_C( 23552)) }, { simde_mm256_set_epi16(INT16_C(-21274), INT16_C(-31982), INT16_C(-20961), INT16_C( 5088), INT16_C(-17462), INT16_C( -9409), INT16_C( 15757), INT16_C( 1966), INT16_C(-22236), INT16_C(-28953), INT16_C( -1919), INT16_C(-19201), INT16_C( 20002), INT16_C( 13277), INT16_C(-29203), INT16_C(-31968)), simde_mm256_set_epi16(INT16_C(-13312), INT16_C( 9216), INT16_C( 15872), INT16_C(-16384), INT16_C(-27648), INT16_C( 32256), INT16_C( 6656), INT16_C( 23552), INT16_C( 18432), INT16_C(-12800), INT16_C( 512), INT16_C( -512), INT16_C( 17408), INT16_C(-17920), INT16_C( -9728), INT16_C( 16384)) }, { simde_mm256_set_epi16(INT16_C( 18957), INT16_C( -9706), INT16_C( -6614), INT16_C( 10095), INT16_C( 5161), INT16_C( -605), INT16_C( 13764), INT16_C( 27464), INT16_C( -3974), INT16_C( -5977), INT16_C(-10905), INT16_C( 357), INT16_C(-20180), INT16_C( 8910), INT16_C( -3748), INT16_C( 6077)), simde_mm256_set_epi16(INT16_C( 13312), INT16_C( 22528), INT16_C(-22528), INT16_C(-17408), INT16_C(-23552), INT16_C(-29696), INT16_C( 4096), INT16_C( 8192), INT16_C( -6144), INT16_C(-25600), INT16_C(-25600), INT16_C(-27648), INT16_C(-20480), INT16_C( 14336), INT16_C( 28672), INT16_C( -3072)) }, { simde_mm256_set_epi16(INT16_C(-31972), INT16_C( 18963), INT16_C( 30683), INT16_C(-21818), INT16_C(-14936), INT16_C(-28324), INT16_C( -9468), INT16_C(-30588), INT16_C(-23376), INT16_C( 11957), INT16_C(-26836), INT16_C( 1095), INT16_C( 3892), INT16_C( 16953), INT16_C( 18603), INT16_C(-10451)), simde_mm256_set_epi16(INT16_C( -8192), INT16_C(-26624), INT16_C(-10240), INT16_C( 12288), INT16_C( 16384), INT16_C( -8192), INT16_C( 8192), INT16_C( 8192), INT16_C(-32768), INT16_C(-22528), INT16_C( 24576), INT16_C( 14336), INT16_C(-24576), INT16_C(-14336), INT16_C( 22528), INT16_C( 26624)) }, { simde_mm256_set_epi16(INT16_C( -5389), INT16_C(-19407), INT16_C( 25290), INT16_C( -4640), INT16_C( 29672), INT16_C( 23128), INT16_C(-13360), INT16_C(-25779), INT16_C(-15993), INT16_C( -8711), INT16_C(-27319), INT16_C( 5516), INT16_C(-14272), INT16_C( 31123), INT16_C(-10448), INT16_C( 22014)), simde_mm256_set_epi16(INT16_C( 12288), INT16_C( 4096), INT16_C(-24576), INT16_C( 0), INT16_C(-32768), INT16_C(-32768), INT16_C( 0), INT16_C(-12288), INT16_C( 28672), INT16_C(-28672), INT16_C(-28672), INT16_C(-16384), INT16_C( 0), INT16_C( 12288), INT16_C( 0), INT16_C( -8192)) }, { simde_mm256_set_epi16(INT16_C( -8792), INT16_C( -1044), INT16_C(-11356), INT16_C( -9495), INT16_C( 4809), INT16_C(-13629), INT16_C( 15424), INT16_C(-18049), INT16_C( 4498), INT16_C(-15799), INT16_C(-26777), INT16_C( 7570), INT16_C( 20105), INT16_C( -5032), INT16_C( -5734), INT16_C(-24145)), simde_mm256_set_epi16(INT16_C( 0), INT16_C(-32768), INT16_C(-32768), INT16_C( 8192), INT16_C( 8192), INT16_C( 24576), INT16_C( 0), INT16_C( -8192), INT16_C( 16384), INT16_C( 8192), INT16_C( -8192), INT16_C( 16384), INT16_C( 8192), INT16_C( 0), INT16_C( 16384), INT16_C( -8192)) }, { simde_mm256_set_epi16(INT16_C(-28948), INT16_C(-14792), INT16_C( 23966), INT16_C( 21707), INT16_C( 7390), INT16_C( 8499), INT16_C( 25220), INT16_C( 26210), INT16_C( 3798), INT16_C( 20760), INT16_C(-20165), INT16_C(-15694), INT16_C( 538), INT16_C( 97), INT16_C( -3558), INT16_C(-31643)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C(-32768), INT16_C(-16384), INT16_C(-32768), INT16_C(-16384), INT16_C( 0), INT16_C(-32768), INT16_C(-32768), INT16_C( 0), INT16_C(-16384), INT16_C(-32768), INT16_C(-32768), INT16_C( 16384), INT16_C(-32768), INT16_C( 16384)) }, { simde_mm256_set_epi16(INT16_C(-17181), INT16_C( 9599), INT16_C(-10139), INT16_C(-21574), INT16_C( -1148), INT16_C(-14339), INT16_C( -9621), INT16_C( 4987), INT16_C(-25046), INT16_C( 11688), INT16_C( -9678), INT16_C( -6619), INT16_C( 24962), INT16_C(-11214), INT16_C( 26930), INT16_C(-12095)), simde_mm256_set_epi16(INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C( 0), INT16_C( 0), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C(-32768), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C(-32768)) } }; simde__m256i r; r = simde_mm256_slli_epi16(test_vec[0].a, 0); simde_assert_m256i_i16(r, ==, test_vec[0].r); r = simde_mm256_slli_epi16(test_vec[1].a, 1); simde_assert_m256i_i16(r, ==, test_vec[1].r); r = simde_mm256_slli_epi16(test_vec[2].a, 2); simde_assert_m256i_i16(r, ==, test_vec[2].r); r = simde_mm256_slli_epi16(test_vec[3].a, 3); simde_assert_m256i_i16(r, ==, test_vec[3].r); r = simde_mm256_slli_epi16(test_vec[4].a, 4); simde_assert_m256i_i16(r, ==, test_vec[4].r); r = simde_mm256_slli_epi16(test_vec[5].a, 5); simde_assert_m256i_i16(r, ==, test_vec[5].r); r = simde_mm256_slli_epi16(test_vec[6].a, 6); simde_assert_m256i_i16(r, ==, test_vec[6].r); r = simde_mm256_slli_epi16(test_vec[7].a, 7); simde_assert_m256i_i16(r, ==, test_vec[7].r); r = simde_mm256_slli_epi16(test_vec[8].a, 8); simde_assert_m256i_i16(r, ==, test_vec[8].r); r = simde_mm256_slli_epi16(test_vec[9].a, 9); simde_assert_m256i_i16(r, ==, test_vec[9].r); r = simde_mm256_slli_epi16(test_vec[10].a, 10); simde_assert_m256i_i16(r, ==, test_vec[10].r); r = simde_mm256_slli_epi16(test_vec[11].a, 11); simde_assert_m256i_i16(r, ==, test_vec[11].r); r = simde_mm256_slli_epi16(test_vec[12].a, 12); simde_assert_m256i_i16(r, ==, test_vec[12].r); r = simde_mm256_slli_epi16(test_vec[13].a, 13); simde_assert_m256i_i16(r, ==, test_vec[13].r); r = simde_mm256_slli_epi16(test_vec[14].a, 14); simde_assert_m256i_i16(r, ==, test_vec[14].r); r = simde_mm256_slli_epi16(test_vec[15].a, 15); simde_assert_m256i_i16(r, ==, test_vec[15].r); return 0; } static int test_simde_mm256_slli_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C( 1857969468), INT32_C( 1569141389), INT32_C(-1894985594), INT32_C( 1398609693), INT32_C( 1177229575), INT32_C( 1655079421), INT32_C(-1753400065), INT32_C( -607538910)), simde_mm256_set_epi32(INT32_C(-1434991502), INT32_C(-1643531728), INT32_C( 1435031148), INT32_C( -241637048), INT32_C( 325610914), INT32_C( 482634268), INT32_C(-1954026803), INT32_C( -489641480)), simde_mm256_set_epi32(INT32_C( -674519168), INT32_C(-1327083104), INT32_C( -509996864), INT32_C( 1805837216), INT32_C( -983359264), INT32_C( 1422933920), INT32_C( -274227232), INT32_C( 2033591360)) }, { simde_mm256_set_epi32(INT32_C(-1508060177), INT32_C( 2047506344), INT32_C( 1926956558), INT32_C( -567187171), INT32_C( -718834191), INT32_C( 316061425), INT32_C(-1354037966), INT32_C(-2043187872)), simde_mm256_set_epi32(INT32_C( -787812104), INT32_C(-1305007947), INT32_C( 1142922615), INT32_C( 1342194494), INT32_C(-1552966924), INT32_C( 272965007), INT32_C(-2116417663), INT32_C( 1446174254)), simde_mm256_set_epi32(INT32_C(-1013285408), INT32_C( 1095693568), INT32_C( 1533067712), INT32_C( -970120288), INT32_C(-1527857632), INT32_C( 1524031008), INT32_C( -379541952), INT32_C( -957502464)) }, { simde_mm256_set_epi32(INT32_C( 1315357752), INT32_C(-1727529785), INT32_C(-1290894210), INT32_C(-1585668235), INT32_C( 1227224502), INT32_C( -793752248), INT32_C( -629408802), INT32_C( 1394059551)), simde_mm256_set_epi32(INT32_C( 743833146), INT32_C( -347973949), INT32_C( 521095178), INT32_C( -742841452), INT32_C( -734810084), INT32_C(-1542766714), INT32_C( 575957526), INT32_C(-1816399246)), simde_mm256_set_epi32(INT32_C( -858224896), INT32_C( 553621728), INT32_C( 1641058240), INT32_C( 798224032), INT32_C( 616478400), INT32_C( 369731840), INT32_C( 1333754816), INT32_C( 1660232672)) }, { simde_mm256_set_epi32(INT32_C( 150628023), INT32_C(-2069974717), INT32_C( 1177413093), INT32_C(-2053792688), INT32_C(-1561038597), INT32_C( -690144185), INT32_C( -416879322), INT32_C( 1986359122)), simde_mm256_set_epi32(INT32_C( 306184069), INT32_C( 2091432312), INT32_C( 1455569082), INT32_C( 1460019701), INT32_C(-1968965681), INT32_C(-1344732712), INT32_C( 1352620988), INT32_C( 1100122997)), simde_mm256_set_epi32(INT32_C( 525129440), INT32_C(-1814681504), INT32_C( -977486688), INT32_C(-1296856576), INT32_C( 1586372448), INT32_C( -609777440), INT32_C( -455236416), INT32_C( -861017536)) }, { simde_mm256_set_epi32(INT32_C(-1165370523), INT32_C( 190390907), INT32_C( -318174800), INT32_C(-1752249068), INT32_C( 1366077326), INT32_C( 1019085647), INT32_C( 1171551091), INT32_C(-1805589438)), simde_mm256_set_epi32(INT32_C(-1007170348), INT32_C( 2038045091), INT32_C( 483759032), INT32_C(-1769389731), INT32_C( 1901958985), INT32_C(-1845366512), INT32_C( 234958311), INT32_C( -992749206)), simde_mm256_set_epi32(INT32_C( 1362848928), INT32_C( 1797541728), INT32_C(-1591659008), INT32_C( -237395328), INT32_C( 764801472), INT32_C(-1748997664), INT32_C(-1165070752), INT32_C(-1944287168)) }, { simde_mm256_set_epi32(INT32_C( -719890300), INT32_C(-1894949227), INT32_C( 1354912660), INT32_C( 262146704), INT32_C(-2066437384), INT32_C( -277466785), INT32_C( 1307284092), INT32_C( -145490102)), simde_mm256_set_epi32(INT32_C( 1880856976), INT32_C( -113670921), INT32_C(-1146592891), INT32_C( -833902193), INT32_C( -226567429), INT32_C( -58637801), INT32_C( -547686157), INT32_C( 870297549)), simde_mm256_set_epi32(INT32_C(-1561653120), INT32_C( -508833120), INT32_C( 407532160), INT32_C( -201240064), INT32_C(-1701486848), INT32_C( -289002528), INT32_C(-1116582016), INT32_C( -360715968)) }, { simde_mm256_set_epi32(INT32_C( 1194726573), INT32_C( -714629781), INT32_C( 721069762), INT32_C(-1027424294), INT32_C(-1451303753), INT32_C( -307072244), INT32_C( 760761319), INT32_C(-1826453170)), simde_mm256_set_epi32(INT32_C(-2121454006), INT32_C( 354423919), INT32_C( -610410351), INT32_C(-1822148121), INT32_C( 2107148479), INT32_C(-1935868808), INT32_C( -573099828), INT32_C( 986406309)), simde_mm256_set_epi32(INT32_C( -423455328), INT32_C(-1393316512), INT32_C( 1599395904), INT32_C( 1482160960), INT32_C( 802920160), INT32_C(-1236377216), INT32_C(-1425441568), INT32_C( 1683040704)) }, { simde_mm256_set_epi32(INT32_C( -461263801), INT32_C( 1863945688), INT32_C( 508376318), INT32_C(-1101321410), INT32_C( 1682501002), INT32_C(-1564690744), INT32_C( 469304116), INT32_C( 162493937)), simde_mm256_set_epi32(INT32_C( 1027297411), INT32_C( 436621801), INT32_C( 2048624604), INT32_C( 613668083), INT32_C( 2064314140), INT32_C( -717287635), INT32_C( 1286678409), INT32_C(-1591587787)), simde_mm256_set_epi32(INT32_C(-1875539744), INT32_C( -483280128), INT32_C( -911827008), INT32_C( -882546752), INT32_C(-1994542784), INT32_C( 1469503744), INT32_C( 2132829824), INT32_C( 904838688)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_slli_epi32(test_vec[i].a, 5); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_slli_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi64x(INT64_C(-1180812000753094585), INT64_C(-5675573813228216402), INT64_C(-2208027268370360000), INT64_C( 6465428915389083026)), simde_mm256_set_epi64x(INT64_C( 8452611756027991400), INT64_C(-5020149848375082542), INT64_C( 7925308354439905072), INT64_C( 953522845551677435)), simde_mm256_set_epi64x(INT64_C( -892495876679923488), INT64_C( 2849078713792591296), INT64_C( 3130103706986686464), INT64_C( 3979540481645589056)) }, { simde_mm256_set_epi64x(INT64_C(-4047432104007102232), INT64_C(-7778060476316871326), INT64_C( 2270807563927334286), INT64_C(-5320870512275475361)), simde_mm256_set_epi64x(INT64_C( 5866182958150697676), INT64_C(-5938517638175025937), INT64_C( 6284264256050873550), INT64_C( 1402097755121835442)), simde_mm256_set_epi64x(INT64_C( -390618812260410112), INT64_C(-9090262283915711424), INT64_C(-1121134249163509312), INT64_C(-4247159729429247008)) }, { simde_mm256_set_epi64x(INT64_C(-5299935544215164629), INT64_C(-5358218550815045801), INT64_C(-2743808865442122704), INT64_C(-5577727586229156383)), simde_mm256_set_epi64x(INT64_C(-1628942167927235670), INT64_C( 2616318076895434913), INT64_C(-3093772338434873461), INT64_C( 5967250390824240978)), simde_mm256_set_epi64x(INT64_C(-3577240751499303584), INT64_C(-5442296962695501088), INT64_C( 4431836674399831552), INT64_C( 5980157977762511904)) }, { simde_mm256_set_epi64x(INT64_C( 5567340862163337259), INT64_C(-5967855323816927748), INT64_C(-3043785185238641430), INT64_C( 8117316208405658103)), simde_mm256_set_epi64x(INT64_C( 8644099717862406948), INT64_C(-2019559086403734381), INT64_C(-4658546452517300382), INT64_C(-8872814693937714953)), simde_mm256_set_epi64x(INT64_C(-6312533147868723872), INT64_C(-6503929625046171776), INT64_C(-5167405559088767680), INT64_C( 1499701637047336672)) }, { simde_mm256_set_epi64x(INT64_C(-8259641032964555923), INT64_C( 6678157623055419482), INT64_C( -532130775756370858), INT64_C(-6271412084163076656)), simde_mm256_set_epi64x(INT64_C( 6145157523311970224), INT64_C(-5190146230173537442), INT64_C(-7773875053091172024), INT64_C(-4385818839583220370)), simde_mm256_set_epi64x(INT64_C(-6054096022932066912), INT64_C(-7659884946741195968), INT64_C( 1418559249505684160), INT64_C( 2228998117586614784)) }, { simde_mm256_set_epi64x(INT64_C(-3893461849415744807), INT64_C(-9064502035867553709), INT64_C( 3098807305629640641), INT64_C( 6650302822439097584)), simde_mm256_set_epi64x(INT64_C( 2307018380829868937), INT64_C( 8592643653599303501), INT64_C( 8944692494744620746), INT64_C(-7922755342769649115)), simde_mm256_set_epi64x(INT64_C( 4536429334663027488), INT64_C( 5083840031591107168), INT64_C( 6928113411600742432), INT64_C(-8551238566463496704)) }, { simde_mm256_set_epi64x(INT64_C(-1984148929379503272), INT64_C(-5205661836978001324), INT64_C( 6359749525414039769), INT64_C(-1541764205226378294)), simde_mm256_set_epi64x(INT64_C(-1490660722702882908), INT64_C( -633122769786086240), INT64_C(-8686691198507413769), INT64_C(-9123917909533884922)), simde_mm256_set_epi64x(INT64_C(-8152533519015449856), INT64_C( -560482119910077824), INT64_C( 597800002444204832), INT64_C( 6003777653884549440)) }, { simde_mm256_set_epi64x(INT64_C(-5232449947985336963), INT64_C( -518473474085028208), INT64_C( 1841137168562663956), INT64_C( 5952445751598523173)), simde_mm256_set_epi64x(INT64_C(-2687140035203018503), INT64_C( -119809082612743153), INT64_C( 6113467954286988131), INT64_C(-1537862337526048352)), simde_mm256_set_epi64x(INT64_C(-1417701672144818272), INT64_C( 1855592902988648960), INT64_C( 3576157172876591744), INT64_C( 6010823314057225376)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_slli_epi64(test_vec[i].a,5); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_slli_si256 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int8_t a[32]; const int8_t r[32]; } test_vec[] = { { { INT8_C( 78), INT8_C( 117), -INT8_C( 82), INT8_C( 44), -INT8_C( 74), -INT8_C( 55), INT8_C( 109), -INT8_C( 43), -INT8_C( 6), -INT8_C( 95), INT8_C( 101), INT8_C( 76), -INT8_C( 57), INT8_C( 91), INT8_C( 63), -INT8_C( 99), INT8_C( 10), -INT8_C( 79), -INT8_C( 4), -INT8_C( 13), -INT8_C( 104), -INT8_C( 113), -INT8_C( 84), INT8_C( 2), INT8_C( 111), INT8_C( 61), INT8_C( 13), INT8_C( 125), -INT8_C( 31), INT8_C( 95), -INT8_C( 60), INT8_C( 47) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 78), INT8_C( 117), -INT8_C( 82), INT8_C( 44), -INT8_C( 74), -INT8_C( 55), INT8_C( 109), -INT8_C( 43), -INT8_C( 6), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 10), -INT8_C( 79), -INT8_C( 4), -INT8_C( 13), -INT8_C( 104), -INT8_C( 113), -INT8_C( 84), INT8_C( 2), INT8_C( 111) } }, { { -INT8_C( 44), INT8_C( 114), INT8_C( 91), -INT8_C( 118), INT8_C( 59), -INT8_C( 56), INT8_C( 95), INT8_C( 53), INT8_C( 106), -INT8_C( 60), -INT8_C( 126), INT8_C( 49), INT8_C( 32), -INT8_C( 63), -INT8_C( 50), INT8_C( 42), INT8_C( 114), -INT8_C( 54), INT8_C( 29), INT8_C( 10), INT8_C( 90), -INT8_C( 55), INT8_C( 13), -INT8_C( 55), INT8_C( 6), INT8_C( 26), INT8_C( 70), -INT8_C( 25), INT8_C( 121), INT8_C( 10), INT8_C( 22), INT8_C( 77) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 44), INT8_C( 114), INT8_C( 91), -INT8_C( 118), INT8_C( 59), -INT8_C( 56), INT8_C( 95), INT8_C( 53), INT8_C( 106), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 114), -INT8_C( 54), INT8_C( 29), INT8_C( 10), INT8_C( 90), -INT8_C( 55), INT8_C( 13), -INT8_C( 55), INT8_C( 6) } }, { { INT8_C( 124), INT8_C( 114), -INT8_C( 40), -INT8_C( 73), INT8_C( 58), INT8_C( 55), -INT8_C( 20), -INT8_C( 92), -INT8_C( 4), INT8_C( 110), -INT8_C( 43), INT8_C( 28), INT8_C( 47), -INT8_C( 92), INT8_C( 70), -INT8_C( 95), INT8_C( 110), INT8_C( 99), -INT8_C( 84), -INT8_C( 56), INT8_C( 44), -INT8_C( 71), -INT8_C( 111), INT8_C( 50), -INT8_C( 45), -INT8_C( 41), INT8_C( 25), INT8_C( 76), -INT8_C( 31), INT8_C( 47), -INT8_C( 103), INT8_C( 93) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 124), INT8_C( 114), -INT8_C( 40), -INT8_C( 73), INT8_C( 58), INT8_C( 55), -INT8_C( 20), -INT8_C( 92), -INT8_C( 4), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 110), INT8_C( 99), -INT8_C( 84), -INT8_C( 56), INT8_C( 44), -INT8_C( 71), -INT8_C( 111), INT8_C( 50), -INT8_C( 45) } }, { { -INT8_C( 95), INT8_C( 113), INT8_C( 20), -INT8_C( 36), -INT8_C( 87), INT8_C( 1), INT8_MIN, -INT8_C( 91), INT8_C( 111), INT8_C( 86), -INT8_C( 63), -INT8_C( 97), -INT8_C( 6), INT8_C( 7), INT8_C( 64), INT8_C( 104), INT8_C( 106), -INT8_C( 20), INT8_C( 49), -INT8_C( 106), -INT8_C( 91), -INT8_C( 62), -INT8_C( 56), INT8_C( 120), -INT8_C( 102), -INT8_C( 31), -INT8_C( 60), INT8_C( 123), INT8_C( 16), INT8_C( 94), -INT8_C( 39), -INT8_C( 78) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 95), INT8_C( 113), INT8_C( 20), -INT8_C( 36), -INT8_C( 87), INT8_C( 1), INT8_MIN, -INT8_C( 91), INT8_C( 111), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 106), -INT8_C( 20), INT8_C( 49), -INT8_C( 106), -INT8_C( 91), -INT8_C( 62), -INT8_C( 56), INT8_C( 120), -INT8_C( 102) } }, { { -INT8_C( 49), -INT8_C( 19), -INT8_C( 114), INT8_C( 120), -INT8_C( 18), INT8_C( 14), INT8_C( 29), INT8_C( 94), INT8_C( 100), -INT8_C( 34), -INT8_C( 3), INT8_C( 94), -INT8_C( 27), INT8_C( 61), -INT8_C( 57), INT8_C( 79), INT8_C( 42), -INT8_C( 8), -INT8_C( 27), -INT8_C( 49), -INT8_C( 70), -INT8_C( 83), INT8_C( 72), INT8_C( 84), -INT8_C( 114), INT8_C( 12), -INT8_C( 48), -INT8_C( 97), INT8_C( 106), -INT8_C( 87), INT8_C( 81), INT8_C( 58) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 49), -INT8_C( 19), -INT8_C( 114), INT8_C( 120), -INT8_C( 18), INT8_C( 14), INT8_C( 29), INT8_C( 94), INT8_C( 100), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 42), -INT8_C( 8), -INT8_C( 27), -INT8_C( 49), -INT8_C( 70), -INT8_C( 83), INT8_C( 72), INT8_C( 84), -INT8_C( 114) } }, { { -INT8_C( 106), -INT8_C( 33), -INT8_C( 78), -INT8_C( 123), -INT8_C( 19), -INT8_C( 48), -INT8_C( 29), INT8_C( 82), -INT8_C( 82), -INT8_C( 32), -INT8_C( 80), -INT8_C( 108), INT8_C( 29), INT8_C( 119), -INT8_C( 29), INT8_C( 71), INT8_C( 111), -INT8_C( 55), INT8_C( 23), INT8_C( 42), INT8_C( 118), INT8_C( 95), INT8_C( 126), INT8_C( 5), INT8_C( 107), INT8_C( 78), -INT8_C( 92), -INT8_C( 42), -INT8_C( 9), -INT8_C( 11), INT8_C( 16), -INT8_C( 114) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 106), -INT8_C( 33), -INT8_C( 78), -INT8_C( 123), -INT8_C( 19), -INT8_C( 48), -INT8_C( 29), INT8_C( 82), -INT8_C( 82), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 111), -INT8_C( 55), INT8_C( 23), INT8_C( 42), INT8_C( 118), INT8_C( 95), INT8_C( 126), INT8_C( 5), INT8_C( 107) } }, { { -INT8_C( 44), -INT8_C( 62), INT8_C( 19), -INT8_C( 63), -INT8_C( 110), -INT8_C( 10), INT8_C( 19), INT8_C( 65), -INT8_C( 42), -INT8_C( 60), -INT8_C( 43), -INT8_C( 13), INT8_C( 59), -INT8_C( 72), INT8_C( 59), -INT8_C( 85), -INT8_C( 127), INT8_C( 82), -INT8_C( 43), -INT8_C( 8), -INT8_C( 79), INT8_C( 83), -INT8_C( 3), INT8_C( 28), -INT8_C( 94), -INT8_C( 95), -INT8_C( 14), -INT8_C( 103), -INT8_C( 106), INT8_C( 2), INT8_C( 39), INT8_C( 106) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 44), -INT8_C( 62), INT8_C( 19), -INT8_C( 63), -INT8_C( 110), -INT8_C( 10), INT8_C( 19), INT8_C( 65), -INT8_C( 42), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 127), INT8_C( 82), -INT8_C( 43), -INT8_C( 8), -INT8_C( 79), INT8_C( 83), -INT8_C( 3), INT8_C( 28), -INT8_C( 94) } }, { { -INT8_C( 59), INT8_C( 58), INT8_C( 43), INT8_C( 87), INT8_C( 48), INT8_C( 63), -INT8_C( 104), INT8_C( 6), INT8_C( 3), INT8_C( 109), -INT8_C( 6), INT8_C( 62), INT8_C( 38), INT8_C( 53), -INT8_C( 23), -INT8_C( 89), -INT8_C( 121), -INT8_C( 66), -INT8_C( 97), INT8_C( 56), INT8_C( 18), -INT8_C( 100), INT8_C( 84), -INT8_C( 76), INT8_C( 61), INT8_C( 71), INT8_C( 77), -INT8_C( 45), INT8_C( 73), INT8_C( 117), INT8_C( 61), INT8_C( 14) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 59), INT8_C( 58), INT8_C( 43), INT8_C( 87), INT8_C( 48), INT8_C( 63), -INT8_C( 104), INT8_C( 6), INT8_C( 3), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 121), -INT8_C( 66), -INT8_C( 97), INT8_C( 56), INT8_C( 18), -INT8_C( 100), INT8_C( 84), -INT8_C( 76), INT8_C( 61) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi8(test_vec[i].a); simde__m256i r = simde_mm256_slli_si256(a, 7); simde_test_x86_assert_equal_i8x32(r, simde_x_mm256_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm_sllv_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 1355265632), INT32_C( 1126629442), INT32_C( 130687039), INT32_C( -680602946)), simde_mm_set_epi32(INT32_C( 29), INT32_C( 12), INT32_C( 25), INT32_C( 28)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 1879318528), INT32_C( 2113929216), INT32_C( -536870912)) }, { simde_mm_set_epi32(INT32_C( 1737379484), INT32_C( 1883980170), INT32_C( -705882711), INT32_C( 1718253197)), simde_mm_set_epi32(INT32_C( 1), INT32_C( 30), INT32_C( 30), INT32_C(-2112542540)), simde_mm_set_epi32(INT32_C( -820208328), INT32_MIN , INT32_C( 1073741824), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 329097328), INT32_C(-1602739192), INT32_C(-1512309543), INT32_C(-1399203732)), simde_mm_set_epi32(INT32_C( 24), INT32_C( 5), INT32_C( 13), INT32_C( 29)), simde_mm_set_epi32(INT32_C( 1879048192), INT32_C( 251953408), INT32_C( 2140872704), INT32_MIN ) }, { simde_mm_set_epi32(INT32_C( 1241289542), INT32_C( 668450526), INT32_C(-1156837735), INT32_C( 6941002)), simde_mm_set_epi32(INT32_C( 1273396233), INT32_C( 1711746946), INT32_C( 28), INT32_C( 28)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C(-1879048192), INT32_C(-1610612736)) }, { simde_mm_set_epi32(INT32_C(-1594911645), INT32_C( -201864987), INT32_C(-1345215776), INT32_C(-1388228747)), simde_mm_set_epi32(INT32_C( 25), INT32_C( 21), INT32_C( 2), INT32_C( 30)), simde_mm_set_epi32(INT32_C( -973078528), INT32_C( 480247808), INT32_C(-1085895808), INT32_C( 1073741824)) }, { simde_mm_set_epi32(INT32_C( -2803715), INT32_C( -534114403), INT32_C( -330536019), INT32_C( 1118335664)), simde_mm_set_epi32(INT32_C( 10), INT32_C( 5), INT32_C( 21), INT32_C( 30)), simde_mm_set_epi32(INT32_C( 1423963136), INT32_C( 88208288), INT32_C( 1973420032), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 1957597354), INT32_C( -483301922), INT32_C(-1795584966), INT32_C( 886123627)), simde_mm_set_epi32(INT32_C( 7), INT32_C( 6), INT32_C( 22), INT32_C( 29)), simde_mm_set_epi32(INT32_C( 1464358144), INT32_C( -866551936), INT32_C( 243269632), INT32_C( 1610612736)) }, { simde_mm_set_epi32(INT32_C(-1580825747), INT32_C(-2024732896), INT32_C(-1162095669), INT32_C( 732464901)), simde_mm_set_epi32(INT32_C( 19), INT32_C( 12), INT32_C( 30), INT32_C( 2014545570)), simde_mm_set_epi32(INT32_C( 459800576), INT32_C( 275906560), INT32_C(-1073741824), INT32_C( 0)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_sllv_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_sllv_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C( -907599535), INT32_C( 1816850761), INT32_C(-1813723808), INT32_C( 1682346964), INT32_C( 297464492), INT32_C( 1204833268), INT32_C(-2121860190), INT32_C( 1275339488)), simde_mm256_set_epi32(INT32_C( 26), INT32_C( 9), INT32_C( 13), INT32_C( 1136705026), INT32_C( 13), INT32_C( 1), INT32_C( 31), INT32_C( 21)), simde_mm256_set_epi32(INT32_C( 1140850688), INT32_C(-1780313600), INT32_C(-1733558272), INT32_C( 0), INT32_C( 1582661632), INT32_C(-1885300760), INT32_C( 0), INT32_C( 1543503872)) }, { simde_mm256_set_epi32(INT32_C( 829211350), INT32_C( -225754766), INT32_C( -55839), INT32_C(-1551786022), INT32_C(-1090517055), INT32_C( 692773880), INT32_C(-1952556964), INT32_C( -931315699)), simde_mm256_set_epi32(INT32_C( 920320477), INT32_C( 1278238897), INT32_C( 5), INT32_C( 17), INT32_C( 14), INT32_C( 27), INT32_C( 10), INT32_C( 5)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( -1786848), INT32_C( 1068761088), INT32_C( 32522240), INT32_C(-1073741824), INT32_C( 2036428800), INT32_C( 262668704)) }, { simde_mm256_set_epi32(INT32_C(-1678489123), INT32_C( 600143956), INT32_C( 1432800149), INT32_C(-1881004909), INT32_C( -8633494), INT32_C( 61500000), INT32_C( 1965990355), INT32_C( 563956027)), simde_mm256_set_epi32(INT32_C( 23), INT32_C( 9), INT32_C( 1), INT32_C( 16), INT32_C( 6), INT32_C( 10), INT32_C( 3), INT32_C( 12)), simde_mm256_set_epi32(INT32_C( -293601280), INT32_C(-1963939840), INT32_C(-1429366998), INT32_C( 613613568), INT32_C( -552543616), INT32_C(-1448509440), INT32_C(-1451946344), INT32_C( -728518656)) }, { simde_mm256_set_epi32(INT32_C( 338504320), INT32_C( 883906167), INT32_C( -890950152), INT32_C( -640473906), INT32_C( -675244408), INT32_C( 1260399807), INT32_C( 1576784170), INT32_C( 1082002360)), simde_mm256_set_epi32(INT32_C( 28), INT32_C( 10), INT32_C( 31), INT32_C( -121273429), INT32_C( 14), INT32_C( 14), INT32_C( 26), INT32_C( 17)), simde_mm256_set_epi32(INT32_C( 0), INT32_C(-1118184448), INT32_C( 0), INT32_C( 0), INT32_C( 631373824), INT32_C( 187678720), INT32_C(-1476395008), INT32_C( 393216000)) }, { simde_mm256_set_epi32(INT32_C( 1222178972), INT32_C( 657678434), INT32_C( 1236396836), INT32_C( -851157565), INT32_C( 991812030), INT32_C( -17431900), INT32_C( 1566944363), INT32_C( -95337079)), simde_mm256_set_epi32(INT32_C( 31), INT32_C( 30), INT32_C(-1114512709), INT32_C( 1), INT32_C( 6), INT32_C( 9), INT32_C( 15), INT32_C( 25)), simde_mm256_set_epi32(INT32_C( 0), INT32_MIN , INT32_C( 0), INT32_C(-1702315130), INT32_C( -948539520), INT32_C( -335198208), INT32_C( -701136896), INT32_C( 301989888)) }, { simde_mm256_set_epi32(INT32_C( 1430218987), INT32_C(-1167579987), INT32_C(-1424112663), INT32_C(-2078521852), INT32_C( -642033113), INT32_C( 1296873781), INT32_C( 1828392907), INT32_C( -832748013)), simde_mm256_set_epi32(INT32_C( 1237301658), INT32_C( 10), INT32_C( 25), INT32_C( 2), INT32_C( 1381780746), INT32_C( 7), INT32_C( 0), INT32_C( 9)), simde_mm256_set_epi32(INT32_C( 0), INT32_C(-1600998400), INT32_C( -771751936), INT32_C( 275847184), INT32_C( 0), INT32_C(-1503880576), INT32_C( 1828392907), INT32_C(-1165220352)) }, { simde_mm256_set_epi32(INT32_C(-1957098634), INT32_C( 2020272894), INT32_C( 274355604), INT32_C(-2023351499), INT32_C( 1868935568), INT32_C( -188773630), INT32_C( 1217940242), INT32_C( -149131379)), simde_mm256_set_epi32(INT32_C( 31), INT32_C( -296627453), INT32_C( 26), INT32_C( 17), INT32_C( 3), INT32_C( 30), INT32_C( 1792285256), INT32_C( 21)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 1342177280), INT32_C( 912916480), INT32_C( 2066582656), INT32_MIN , INT32_C( 0), INT32_C( -241172480)) }, { simde_mm256_set_epi32(INT32_C( 1147857332), INT32_C( 1591168671), INT32_C( 670003425), INT32_C(-2025325767), INT32_C( 1644550271), INT32_C( -644639924), INT32_C( -405472702), INT32_C(-1412280243)), simde_mm256_set_epi32(INT32_C( 17), INT32_C( -931471145), INT32_C( 903571660), INT32_C( 7), INT32_C( 2), INT32_C( 26), INT32_C( 19), INT32_C( 29)), simde_mm256_set_epi32(INT32_C( -748158976), INT32_C( 0), INT32_C( 0), INT32_C(-1543660416), INT32_C(-2011733508), INT32_C( 805306368), INT32_C( -770703360), INT32_C(-1610612736)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_sllv_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_sllv_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi64x(INT64_C(7972759472039988968), INT64_C(-4018394183250543798)), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 14)), simde_mm_set_epi64x(INT64_C(7972759472039988968), INT64_C(-940699307519868928)) }, { simde_mm_set_epi64x(INT64_C( 758988409686043336), INT64_C(-1495726894487680579)), simde_mm_set_epi64x(INT64_C( 3), INT64_C( 44)), simde_mm_set_epi64x(INT64_C(6071907277488346688), INT64_C( 332087695960440832)) }, { simde_mm_set_epi64x(INT64_C(-2629933388658093644), INT64_C(-289252332092708880)), simde_mm_set_epi64x(INT64_C( 10), INT64_C(-3585819117600524220)), simde_mm_set_epi64x(INT64_C( 172844775706644480), INT64_C( 0)) }, { simde_mm_set_epi64x(INT64_C(4426668050653871600), INT64_C(1900489733297038847)), simde_mm_set_epi64x(INT64_C( 12), INT64_C( 1)), simde_mm_set_epi64x(INT64_C(-1517088978231164928), INT64_C(3800979466594077694)) }, { simde_mm_set_epi64x(INT64_C(-6094836834726644397), INT64_C(-5043635791122646072)), simde_mm_set_epi64x(INT64_C( 24), INT64_C( 28)), simde_mm_set_epi64x(INT64_C(3516791194124288000), INT64_C(-8765408141610319872)) }, { simde_mm_set_epi64x(INT64_C(-5021615519821282876), INT64_C(7147251859786373177)), simde_mm_set_epi64x(INT64_C( 19), INT64_C( 38)), simde_mm_set_epi64x(INT64_C(1896775987578798080), INT64_C(-5456658230472081408)) }, { simde_mm_set_epi64x(INT64_C(-5191993007641480010), INT64_C(-4233139431813518757)), simde_mm_set_epi64x(INT64_C( 31), INT64_C( 1)), simde_mm_set_epi64x(INT64_C(1701193368639373312), INT64_C(-8466278863627037514)) }, { simde_mm_set_epi64x(INT64_C( 546100030060415122), INT64_C(-8171136833646680622)), simde_mm_set_epi64x(INT64_C(5414271824693851176), INT64_C(-2835576914550646887)), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_sllv_epi64(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_sllv_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi64x(INT64_C( -91881763986021568), INT64_C( 8531978069693998286), INT64_C( 7972759472039988968), INT64_C(-4018394183250543798)), simde_mm256_set_epi64x(INT64_C( 3), INT64_C( 44), INT64_C( 8), INT64_C( 61)), simde_mm256_set_epi64x(INT64_C( -735054111888172544), INT64_C(-8481157707640864768), INT64_C(-6562167339523053568), INT64_C( 4611686018427387904)) }, { simde_mm256_set_epi64x(INT64_C( 766349923467523082), INT64_C(-3585819117600524220), INT64_C(-2629933388658093644), INT64_C( -289252332092708880)), simde_mm256_set_epi64x(INT64_C( 12), INT64_C( 1), INT64_C( 48), INT64_C( 1900489733297038847)), simde_mm256_set_epi64x(INT64_C( 3022793992350769152), INT64_C(-7171638235201048440), INT64_C( 5887330612880080896), INT64_C( 0)) }, { simde_mm256_set_epi64x(INT64_C( 3582646241058391448), INT64_C( 5243361490463539228), INT64_C(-6094836834726644397), INT64_C(-5043635791122646072)), simde_mm256_set_epi64x(INT64_C( 19), INT64_C( 38), INT64_C( 4), INT64_C( 57)), simde_mm256_set_epi64x(INT64_C(-1282873453157810176), INT64_C( 193662480558325760), INT64_C(-5283668987078552272), INT64_C(-8070450532247928832)) }, { simde_mm256_set_epi64x(INT64_C(-3133121661023385633), INT64_C( 86614665799225345), INT64_C(-5191993007641480010), INT64_C(-4233139431813518757)), simde_mm256_set_epi64x(INT64_C( 5414271824693851176), INT64_C(-2835576914550646887), INT64_C( 18), INT64_C( 18)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C( 0), INT64_C( 6302995343711141888), INT64_C( 8680028822435528704)) }, { simde_mm256_set_epi64x(INT64_C(-8035395454128872003), INT64_C( 7344605666715935137), INT64_C(-1365449772765093192), INT64_C( 717291431309989344)), simde_mm256_set_epi64x(INT64_C( 55), INT64_C( 11), INT64_C( 42), INT64_C( 47)), simde_mm256_set_epi64x(INT64_C(-2413929400270585856), INT64_C( 7655985360950593536), INT64_C(-3779962646619488256), INT64_C(-1517713074423857152)) }, { simde_mm256_set_epi64x(INT64_C(-1439191960592432599), INT64_C( 1735866133656124044), INT64_C(-3359799274550972684), INT64_C(-8271287930321791084)), simde_mm256_set_epi64x(INT64_C( 41), INT64_C( 53), INT64_C( 16), INT64_C( 10)), simde_mm256_set_epi64x(INT64_C(-7744975299216932864), INT64_C( 5872693914091126784), INT64_C(-7467993175337730048), INT64_C(-2743310816829878272)) }, { simde_mm256_set_epi64x(INT64_C( 6427900085291119138), INT64_C( 5786748734574497216), INT64_C( 5043272528786026223), INT64_C( 3596044833449463138)), simde_mm256_set_epi64x(INT64_C( 63), INT64_C( 27), INT64_C( 24), INT64_C( 24)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C(-8915429143793893376), INT64_C(-6557303802913882112), INT64_C(-5103103612012199936)) }, { simde_mm256_set_epi64x(INT64_C( 4490003779390629369), INT64_C( 8881591764214441368), INT64_C(-2584940796434610056), INT64_C( 1109135920291955027)), simde_mm256_set_epi64x(INT64_C( 14), INT64_C( 34), INT64_C( 5), INT64_C(-3135683893925165551)), simde_mm256_set_epi64x(INT64_C(-1393444417620262912), INT64_C(-8636331371352031232), INT64_C(-8931129191069315328), INT64_C( 0)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_sllv_epi64(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_sra_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[16]; const int64_t count[2]; const int16_t r[16]; } test_vec[] = { { { -INT16_C( 21483), -INT16_C( 24863), INT16_C( 31976), -INT16_C( 522), INT16_C( 20226), INT16_C( 14016), INT16_C( 31056), INT16_C( 18579), -INT16_C( 2251), INT16_C( 9160), -INT16_C( 13514), INT16_C( 32064), INT16_C( 6594), INT16_C( 27048), INT16_C( 5309), -INT16_C( 11561) }, { INT64_C( 3), INT64_C( 1) }, { -INT16_C( 2686), -INT16_C( 3108), INT16_C( 3997), -INT16_C( 66), INT16_C( 2528), INT16_C( 1752), INT16_C( 3882), INT16_C( 2322), -INT16_C( 282), INT16_C( 1145), -INT16_C( 1690), INT16_C( 4008), INT16_C( 824), INT16_C( 3381), INT16_C( 663), -INT16_C( 1446) } }, { { INT16_C( 5879), INT16_C( 11576), INT16_C( 30946), -INT16_C( 23382), INT16_C( 21137), INT16_C( 19982), -INT16_C( 6809), INT16_C( 10017), -INT16_C( 28259), -INT16_C( 11568), INT16_C( 30456), -INT16_C( 21240), INT16_C( 30173), -INT16_C( 17229), INT16_C( 373), INT16_C( 27857) }, { INT64_C( 1), INT64_C( 2) }, { INT16_C( 2939), INT16_C( 5788), INT16_C( 15473), -INT16_C( 11691), INT16_C( 10568), INT16_C( 9991), -INT16_C( 3405), INT16_C( 5008), -INT16_C( 14130), -INT16_C( 5784), INT16_C( 15228), -INT16_C( 10620), INT16_C( 15086), -INT16_C( 8615), INT16_C( 186), INT16_C( 13928) } }, { { -INT16_C( 2797), INT16_C( 2816), INT16_C( 2411), INT16_C( 18616), INT16_C( 27518), -INT16_C( 3067), -INT16_C( 10644), -INT16_C( 31648), -INT16_C( 1313), INT16_C( 24701), INT16_C( 6973), -INT16_C( 11406), -INT16_C( 11577), INT16_C( 22736), -INT16_C( 2732), INT16_C( 26503) }, { INT64_C( 0), INT64_C( 5) }, { -INT16_C( 2797), INT16_C( 2816), INT16_C( 2411), INT16_C( 18616), INT16_C( 27518), -INT16_C( 3067), -INT16_C( 10644), -INT16_C( 31648), -INT16_C( 1313), INT16_C( 24701), INT16_C( 6973), -INT16_C( 11406), -INT16_C( 11577), INT16_C( 22736), -INT16_C( 2732), INT16_C( 26503) } }, { { INT16_C( 1117), -INT16_C( 25672), INT16_C( 10783), -INT16_C( 6290), INT16_C( 16380), INT16_C( 20543), -INT16_C( 14796), INT16_C( 7863), INT16_C( 10574), -INT16_C( 8589), INT16_C( 4436), -INT16_C( 5395), -INT16_C( 3916), INT16_C( 11756), INT16_C( 29524), -INT16_C( 20091) }, { INT64_C( 6), INT64_C( 13) }, { INT16_C( 17), -INT16_C( 402), INT16_C( 168), -INT16_C( 99), INT16_C( 255), INT16_C( 320), -INT16_C( 232), INT16_C( 122), INT16_C( 165), -INT16_C( 135), INT16_C( 69), -INT16_C( 85), -INT16_C( 62), INT16_C( 183), INT16_C( 461), -INT16_C( 314) } }, { { -INT16_C( 16491), -INT16_C( 5712), -INT16_C( 25135), -INT16_C( 31277), -INT16_C( 16242), -INT16_C( 7501), INT16_C( 14387), -INT16_C( 21869), -INT16_C( 8074), -INT16_C( 8896), -INT16_C( 16741), -INT16_C( 27327), -INT16_C( 2437), -INT16_C( 317), INT16_C( 3938), -INT16_C( 2096) }, { INT64_C( 10), INT64_C( 8) }, { -INT16_C( 17), -INT16_C( 6), -INT16_C( 25), -INT16_C( 31), -INT16_C( 16), -INT16_C( 8), INT16_C( 14), -INT16_C( 22), -INT16_C( 8), -INT16_C( 9), -INT16_C( 17), -INT16_C( 27), -INT16_C( 3), -INT16_C( 1), INT16_C( 3), -INT16_C( 3) } }, { { -INT16_C( 28159), -INT16_C( 25500), -INT16_C( 23216), -INT16_C( 13519), -INT16_C( 2917), -INT16_C( 567), -INT16_C( 26365), -INT16_C( 11787), -INT16_C( 10727), INT16_C( 14193), -INT16_C( 26998), -INT16_C( 30), INT16_C( 28782), INT16_C( 32422), -INT16_C( 1903), -INT16_C( 28155) }, { INT64_C( 10), INT64_C( 2) }, { -INT16_C( 28), -INT16_C( 25), -INT16_C( 23), -INT16_C( 14), -INT16_C( 3), -INT16_C( 1), -INT16_C( 26), -INT16_C( 12), -INT16_C( 11), INT16_C( 13), -INT16_C( 27), -INT16_C( 1), INT16_C( 28), INT16_C( 31), -INT16_C( 2), -INT16_C( 28) } }, { { -INT16_C( 26510), -INT16_C( 680), INT16_C( 15150), -INT16_C( 25348), -INT16_C( 23893), INT16_C( 15387), INT16_C( 8346), INT16_C( 9422), -INT16_C( 887), -INT16_C( 26626), -INT16_C( 23717), -INT16_C( 20927), -INT16_C( 5870), INT16_C( 6660), INT16_C( 11141), -INT16_C( 1989) }, { INT64_C( 7), INT64_C( 5) }, { -INT16_C( 208), -INT16_C( 6), INT16_C( 118), -INT16_C( 199), -INT16_C( 187), INT16_C( 120), INT16_C( 65), INT16_C( 73), -INT16_C( 7), -INT16_C( 209), -INT16_C( 186), -INT16_C( 164), -INT16_C( 46), INT16_C( 52), INT16_C( 87), -INT16_C( 16) } }, { { INT16_C( 20864), -INT16_C( 9238), INT16_C( 11508), INT16_C( 1673), -INT16_C( 29419), -INT16_C( 26080), INT16_C( 23736), INT16_C( 31890), -INT16_C( 30736), -INT16_C( 16529), -INT16_C( 392), INT16_C( 3129), -INT16_C( 4184), INT16_C( 29242), -INT16_C( 29581), -INT16_C( 3131) }, { INT64_C( 15), INT64_C( 4) }, { INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), -INT16_C( 1) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi16(test_vec[i].a); simde__m128i count = simde_x_mm_loadu_epi64(test_vec[i].count); simde__m256i r = simde_mm256_sra_epi16(a, count); simde_test_x86_assert_equal_i16x16(r, simde_x_mm256_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm256_sra_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[8]; const int64_t count[2]; const int32_t r[8]; } test_vec[] = { { { -INT32_C( 576905387), INT32_C( 1043904660), INT32_C( 295463941), INT32_C( 566319288), -INT32_C( 368870807), -INT32_C( 483175452), -INT32_C( 12265399), -INT32_C( 1827624387) }, { INT64_C( 6), INT64_C( 15) }, { -INT32_C( 9014147), INT32_C( 16311010), INT32_C( 4616624), INT32_C( 8848738), -INT32_C( 5763607), -INT32_C( 7549617), -INT32_C( 191647), -INT32_C( 28556632) } }, { { -INT32_C( 1645940039), -INT32_C( 1887430843), INT32_C( 764331248), INT32_C( 952147822), -INT32_C( 1281937075), -INT32_C( 283233318), -INT32_C( 37905556), -INT32_C( 1812484902) }, { INT64_C( 14), INT64_C( 14) }, { -INT32_C( 100461), -INT32_C( 115200), INT32_C( 46651), INT32_C( 58114), -INT32_C( 78244), -INT32_C( 17288), -INT32_C( 2314), -INT32_C( 110626) } }, { { -INT32_C( 593448446), INT32_C( 1389150181), INT32_C( 877627738), -INT32_C( 741914827), INT32_C( 381155107), INT32_C( 469379751), -INT32_C( 855766228), -INT32_C( 541451812) }, { INT64_C( 7), INT64_C( 11) }, { -INT32_C( 4636316), INT32_C( 10852735), INT32_C( 6856466), -INT32_C( 5796210), INT32_C( 2977774), INT32_C( 3667029), -INT32_C( 6685674), -INT32_C( 4230093) } }, { { INT32_C( 265474151), INT32_C( 657116411), -INT32_C( 1259132713), INT32_C( 211004737), INT32_C( 566054408), -INT32_C( 426393642), -INT32_C( 1439941103), INT32_C( 359024046) }, { INT64_C( 13), INT64_C( 14) }, { INT32_C( 32406), INT32_C( 80214), -INT32_C( 153703), INT32_C( 25757), INT32_C( 69098), -INT32_C( 52051), -INT32_C( 175775), INT32_C( 43826) } }, { { INT32_C( 1208124018), INT32_C( 1395627842), -INT32_C( 2080482347), -INT32_C( 1248238432), -INT32_C( 1547256419), -INT32_C( 2071921396), INT32_C( 20779304), -INT32_C( 1478426059) }, { INT64_C( 5), INT64_C( 1) }, { INT32_C( 37753875), INT32_C( 43613370), -INT32_C( 65015074), -INT32_C( 39007451), -INT32_C( 48351764), -INT32_C( 64747544), INT32_C( 649353), -INT32_C( 46200815) } }, { { INT32_C( 935171370), -INT32_C( 1145356909), -INT32_C( 2084767666), INT32_C( 2133565178), -INT32_C( 62514303), -INT32_C( 1269997254), -INT32_C( 154263175), -INT32_C( 1206805106) }, { INT64_C( 5), INT64_C( 5) }, { INT32_C( 29224105), -INT32_C( 35792404), -INT32_C( 65148990), INT32_C( 66673911), -INT32_C( 1953572), -INT32_C( 39687415), -INT32_C( 4820725), -INT32_C( 37712660) } }, { { INT32_C( 1582981924), INT32_C( 1024632772), INT32_C( 1446305992), -INT32_C( 1341241987), INT32_C( 527891987), INT32_C( 1266350248), INT32_C( 317216822), -INT32_C( 2056321951) }, { INT64_C( 3), INT64_C( 15) }, { INT32_C( 197872740), INT32_C( 128079096), INT32_C( 180788249), -INT32_C( 167655249), INT32_C( 65986498), INT32_C( 158293781), INT32_C( 39652102), -INT32_C( 257040244) } }, { { INT32_C( 1125480859), -INT32_C( 1483829391), INT32_C( 1220114151), -INT32_C( 489870981), INT32_C( 1661907186), INT32_C( 2057009061), -INT32_C( 154392044), INT32_C( 1659621319) }, { INT64_C( 7), INT64_C( 3) }, { INT32_C( 8792819), -INT32_C( 11592418), INT32_C( 9532141), -INT32_C( 3827118), INT32_C( 12983649), INT32_C( 16070383), -INT32_C( 1206188), INT32_C( 12965791) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi32(test_vec[i].a); simde__m128i count = simde_x_mm_loadu_epi64(test_vec[i].count); simde__m256i r = simde_mm256_sra_epi32(a, count); simde_test_x86_assert_equal_i32x8(r, simde_x_mm256_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm256_srai_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i r1; simde__m256i r3; simde__m256i r5; simde__m256i r11; simde__m256i r13; simde__m256i r15; simde__m256i r16; simde__m256i r24; } test_vec[8] = { { simde_mm256_set_epi16(INT16_C( 15196), INT16_C(-26519), INT16_C( 2034), INT16_C( 3767), INT16_C( 20039), INT16_C( 24955), INT16_C( -829), INT16_C( 24412), INT16_C( 5644), INT16_C(-14035), INT16_C( 32481), INT16_C(-26971), INT16_C(-13019), INT16_C(-23412), INT16_C(-14153), INT16_C(-22092)), simde_mm256_set_epi16(INT16_C( 7598), INT16_C(-13260), INT16_C( 1017), INT16_C( 1883), INT16_C( 10019), INT16_C( 12477), INT16_C( -415), INT16_C( 12206), INT16_C( 2822), INT16_C( -7018), INT16_C( 16240), INT16_C(-13486), INT16_C( -6510), INT16_C(-11706), INT16_C( -7077), INT16_C(-11046)), simde_mm256_set_epi16(INT16_C( 1899), INT16_C( -3315), INT16_C( 254), INT16_C( 470), INT16_C( 2504), INT16_C( 3119), INT16_C( -104), INT16_C( 3051), INT16_C( 705), INT16_C( -1755), INT16_C( 4060), INT16_C( -3372), INT16_C( -1628), INT16_C( -2927), INT16_C( -1770), INT16_C( -2762)), simde_mm256_set_epi16(INT16_C( 474), INT16_C( -829), INT16_C( 63), INT16_C( 117), INT16_C( 626), INT16_C( 779), INT16_C( -26), INT16_C( 762), INT16_C( 176), INT16_C( -439), INT16_C( 1015), INT16_C( -843), INT16_C( -407), INT16_C( -732), INT16_C( -443), INT16_C( -691)), simde_mm256_set_epi16(INT16_C( 7), INT16_C( -13), INT16_C( 0), INT16_C( 1), INT16_C( 9), INT16_C( 12), INT16_C( -1), INT16_C( 11), INT16_C( 2), INT16_C( -7), INT16_C( 15), INT16_C( -14), INT16_C( -7), INT16_C( -12), INT16_C( -7), INT16_C( -11)), simde_mm256_set_epi16(INT16_C( 1), INT16_C( -4), INT16_C( 0), INT16_C( 0), INT16_C( 2), INT16_C( 3), INT16_C( -1), INT16_C( 2), INT16_C( 0), INT16_C( -2), INT16_C( 3), INT16_C( -4), INT16_C( -2), INT16_C( -3), INT16_C( -2), INT16_C( -3)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1)) }, { simde_mm256_set_epi16(INT16_C( 21167), INT16_C( 19648), INT16_C( 2514), INT16_C( 5742), INT16_C( 10728), INT16_C( -8816), INT16_C( 21963), INT16_C( 23917), INT16_C( 14277), INT16_C(-22184), INT16_C( 6487), INT16_C( -1330), INT16_C( 12982), INT16_C(-12306), INT16_C( -2383), INT16_C( -871)), simde_mm256_set_epi16(INT16_C( 10583), INT16_C( 9824), INT16_C( 1257), INT16_C( 2871), INT16_C( 5364), INT16_C( -4408), INT16_C( 10981), INT16_C( 11958), INT16_C( 7138), INT16_C(-11092), INT16_C( 3243), INT16_C( -665), INT16_C( 6491), INT16_C( -6153), INT16_C( -1192), INT16_C( -436)), simde_mm256_set_epi16(INT16_C( 2645), INT16_C( 2456), INT16_C( 314), INT16_C( 717), INT16_C( 1341), INT16_C( -1102), INT16_C( 2745), INT16_C( 2989), INT16_C( 1784), INT16_C( -2773), INT16_C( 810), INT16_C( -167), INT16_C( 1622), INT16_C( -1539), INT16_C( -298), INT16_C( -109)), simde_mm256_set_epi16(INT16_C( 661), INT16_C( 614), INT16_C( 78), INT16_C( 179), INT16_C( 335), INT16_C( -276), INT16_C( 686), INT16_C( 747), INT16_C( 446), INT16_C( -694), INT16_C( 202), INT16_C( -42), INT16_C( 405), INT16_C( -385), INT16_C( -75), INT16_C( -28)), simde_mm256_set_epi16(INT16_C( 10), INT16_C( 9), INT16_C( 1), INT16_C( 2), INT16_C( 5), INT16_C( -5), INT16_C( 10), INT16_C( 11), INT16_C( 6), INT16_C( -11), INT16_C( 3), INT16_C( -1), INT16_C( 6), INT16_C( -7), INT16_C( -2), INT16_C( -1)), simde_mm256_set_epi16(INT16_C( 2), INT16_C( 2), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( -2), INT16_C( 2), INT16_C( 2), INT16_C( 1), INT16_C( -3), INT16_C( 0), INT16_C( -1), INT16_C( 1), INT16_C( -2), INT16_C( -1), INT16_C( -1)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1)) }, { simde_mm256_set_epi16(INT16_C( 2621), INT16_C(-27438), INT16_C(-25117), INT16_C(-30861), INT16_C( 13622), INT16_C( 18856), INT16_C( 6824), INT16_C(-32482), INT16_C(-22803), INT16_C( -6837), INT16_C( -6446), INT16_C(-26335), INT16_C(-21911), INT16_C(-30376), INT16_C( -8706), INT16_C( -3742)), simde_mm256_set_epi16(INT16_C( 1310), INT16_C(-13719), INT16_C(-12559), INT16_C(-15431), INT16_C( 6811), INT16_C( 9428), INT16_C( 3412), INT16_C(-16241), INT16_C(-11402), INT16_C( -3419), INT16_C( -3223), INT16_C(-13168), INT16_C(-10956), INT16_C(-15188), INT16_C( -4353), INT16_C( -1871)), simde_mm256_set_epi16(INT16_C( 327), INT16_C( -3430), INT16_C( -3140), INT16_C( -3858), INT16_C( 1702), INT16_C( 2357), INT16_C( 853), INT16_C( -4061), INT16_C( -2851), INT16_C( -855), INT16_C( -806), INT16_C( -3292), INT16_C( -2739), INT16_C( -3797), INT16_C( -1089), INT16_C( -468)), simde_mm256_set_epi16(INT16_C( 81), INT16_C( -858), INT16_C( -785), INT16_C( -965), INT16_C( 425), INT16_C( 589), INT16_C( 213), INT16_C( -1016), INT16_C( -713), INT16_C( -214), INT16_C( -202), INT16_C( -823), INT16_C( -685), INT16_C( -950), INT16_C( -273), INT16_C( -117)), simde_mm256_set_epi16(INT16_C( 1), INT16_C( -14), INT16_C( -13), INT16_C( -16), INT16_C( 6), INT16_C( 9), INT16_C( 3), INT16_C( -16), INT16_C( -12), INT16_C( -4), INT16_C( -4), INT16_C( -13), INT16_C( -11), INT16_C( -15), INT16_C( -5), INT16_C( -2)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( -4), INT16_C( -4), INT16_C( -4), INT16_C( 1), INT16_C( 2), INT16_C( 0), INT16_C( -4), INT16_C( -3), INT16_C( -1), INT16_C( -1), INT16_C( -4), INT16_C( -3), INT16_C( -4), INT16_C( -2), INT16_C( -1)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1)) }, { simde_mm256_set_epi16(INT16_C(-20091), INT16_C( -5864), INT16_C( -1105), INT16_C( 9308), INT16_C( 14226), INT16_C( 20343), INT16_C( -8516), INT16_C( -8308), INT16_C( 4189), INT16_C(-13071), INT16_C( -7355), INT16_C(-23705), INT16_C( 28782), INT16_C( 17354), INT16_C(-20377), INT16_C( 15337)), simde_mm256_set_epi16(INT16_C(-10046), INT16_C( -2932), INT16_C( -553), INT16_C( 4654), INT16_C( 7113), INT16_C( 10171), INT16_C( -4258), INT16_C( -4154), INT16_C( 2094), INT16_C( -6536), INT16_C( -3678), INT16_C(-11853), INT16_C( 14391), INT16_C( 8677), INT16_C(-10189), INT16_C( 7668)), simde_mm256_set_epi16(INT16_C( -2512), INT16_C( -733), INT16_C( -139), INT16_C( 1163), INT16_C( 1778), INT16_C( 2542), INT16_C( -1065), INT16_C( -1039), INT16_C( 523), INT16_C( -1634), INT16_C( -920), INT16_C( -2964), INT16_C( 3597), INT16_C( 2169), INT16_C( -2548), INT16_C( 1917)), simde_mm256_set_epi16(INT16_C( -628), INT16_C( -184), INT16_C( -35), INT16_C( 290), INT16_C( 444), INT16_C( 635), INT16_C( -267), INT16_C( -260), INT16_C( 130), INT16_C( -409), INT16_C( -230), INT16_C( -741), INT16_C( 899), INT16_C( 542), INT16_C( -637), INT16_C( 479)), simde_mm256_set_epi16(INT16_C( -10), INT16_C( -3), INT16_C( -1), INT16_C( 4), INT16_C( 6), INT16_C( 9), INT16_C( -5), INT16_C( -5), INT16_C( 2), INT16_C( -7), INT16_C( -4), INT16_C( -12), INT16_C( 14), INT16_C( 8), INT16_C( -10), INT16_C( 7)), simde_mm256_set_epi16(INT16_C( -3), INT16_C( -1), INT16_C( -1), INT16_C( 1), INT16_C( 1), INT16_C( 2), INT16_C( -2), INT16_C( -2), INT16_C( 0), INT16_C( -2), INT16_C( -1), INT16_C( -3), INT16_C( 3), INT16_C( 2), INT16_C( -3), INT16_C( 1)), simde_mm256_set_epi16(INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0)), simde_mm256_set_epi16(INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0)), simde_mm256_set_epi16(INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0)) }, { simde_mm256_set_epi16(INT16_C(-28903), INT16_C(-30383), INT16_C(-20860), INT16_C( 22246), INT16_C( 20775), INT16_C(-18600), INT16_C(-18675), INT16_C( 2687), INT16_C( 19939), INT16_C(-28692), INT16_C( -6959), INT16_C(-28677), INT16_C( 8943), INT16_C(-16990), INT16_C( 17649), INT16_C(-10824)), simde_mm256_set_epi16(INT16_C(-14452), INT16_C(-15192), INT16_C(-10430), INT16_C( 11123), INT16_C( 10387), INT16_C( -9300), INT16_C( -9338), INT16_C( 1343), INT16_C( 9969), INT16_C(-14346), INT16_C( -3480), INT16_C(-14339), INT16_C( 4471), INT16_C( -8495), INT16_C( 8824), INT16_C( -5412)), simde_mm256_set_epi16(INT16_C( -3613), INT16_C( -3798), INT16_C( -2608), INT16_C( 2780), INT16_C( 2596), INT16_C( -2325), INT16_C( -2335), INT16_C( 335), INT16_C( 2492), INT16_C( -3587), INT16_C( -870), INT16_C( -3585), INT16_C( 1117), INT16_C( -2124), INT16_C( 2206), INT16_C( -1353)), simde_mm256_set_epi16(INT16_C( -904), INT16_C( -950), INT16_C( -652), INT16_C( 695), INT16_C( 649), INT16_C( -582), INT16_C( -584), INT16_C( 83), INT16_C( 623), INT16_C( -897), INT16_C( -218), INT16_C( -897), INT16_C( 279), INT16_C( -531), INT16_C( 551), INT16_C( -339)), simde_mm256_set_epi16(INT16_C( -15), INT16_C( -15), INT16_C( -11), INT16_C( 10), INT16_C( 10), INT16_C( -10), INT16_C( -10), INT16_C( 1), INT16_C( 9), INT16_C( -15), INT16_C( -4), INT16_C( -15), INT16_C( 4), INT16_C( -9), INT16_C( 8), INT16_C( -6)), simde_mm256_set_epi16(INT16_C( -4), INT16_C( -4), INT16_C( -3), INT16_C( 2), INT16_C( 2), INT16_C( -3), INT16_C( -3), INT16_C( 0), INT16_C( 2), INT16_C( -4), INT16_C( -1), INT16_C( -4), INT16_C( 1), INT16_C( -3), INT16_C( 2), INT16_C( -2)), simde_mm256_set_epi16(INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1)), simde_mm256_set_epi16(INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1)), simde_mm256_set_epi16(INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1)) }, { simde_mm256_set_epi16(INT16_C( 30610), INT16_C( 28790), INT16_C( 937), INT16_C( 9694), INT16_C(-17401), INT16_C(-32454), INT16_C(-25761), INT16_C( 3568), INT16_C( 18158), INT16_C(-22052), INT16_C( 13442), INT16_C( 1933), INT16_C( 24532), INT16_C(-29992), INT16_C( -2718), INT16_C( 141)), simde_mm256_set_epi16(INT16_C( 15305), INT16_C( 14395), INT16_C( 468), INT16_C( 4847), INT16_C( -8701), INT16_C(-16227), INT16_C(-12881), INT16_C( 1784), INT16_C( 9079), INT16_C(-11026), INT16_C( 6721), INT16_C( 966), INT16_C( 12266), INT16_C(-14996), INT16_C( -1359), INT16_C( 70)), simde_mm256_set_epi16(INT16_C( 3826), INT16_C( 3598), INT16_C( 117), INT16_C( 1211), INT16_C( -2176), INT16_C( -4057), INT16_C( -3221), INT16_C( 446), INT16_C( 2269), INT16_C( -2757), INT16_C( 1680), INT16_C( 241), INT16_C( 3066), INT16_C( -3749), INT16_C( -340), INT16_C( 17)), simde_mm256_set_epi16(INT16_C( 956), INT16_C( 899), INT16_C( 29), INT16_C( 302), INT16_C( -544), INT16_C( -1015), INT16_C( -806), INT16_C( 111), INT16_C( 567), INT16_C( -690), INT16_C( 420), INT16_C( 60), INT16_C( 766), INT16_C( -938), INT16_C( -85), INT16_C( 4)), simde_mm256_set_epi16(INT16_C( 14), INT16_C( 14), INT16_C( 0), INT16_C( 4), INT16_C( -9), INT16_C( -16), INT16_C( -13), INT16_C( 1), INT16_C( 8), INT16_C( -11), INT16_C( 6), INT16_C( 0), INT16_C( 11), INT16_C( -15), INT16_C( -2), INT16_C( 0)), simde_mm256_set_epi16(INT16_C( 3), INT16_C( 3), INT16_C( 0), INT16_C( 1), INT16_C( -3), INT16_C( -4), INT16_C( -4), INT16_C( 0), INT16_C( 2), INT16_C( -3), INT16_C( 1), INT16_C( 0), INT16_C( 2), INT16_C( -4), INT16_C( -1), INT16_C( 0)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0)) }, { simde_mm256_set_epi16(INT16_C( 15336), INT16_C( -3979), INT16_C(-24197), INT16_C(-21101), INT16_C(-22510), INT16_C( 14686), INT16_C( 22575), INT16_C( 6813), INT16_C( 16551), INT16_C(-31832), INT16_C( 12767), INT16_C(-32161), INT16_C( 11852), INT16_C( -8546), INT16_C(-23397), INT16_C( 31365)), simde_mm256_set_epi16(INT16_C( 7668), INT16_C( -1990), INT16_C(-12099), INT16_C(-10551), INT16_C(-11255), INT16_C( 7343), INT16_C( 11287), INT16_C( 3406), INT16_C( 8275), INT16_C(-15916), INT16_C( 6383), INT16_C(-16081), INT16_C( 5926), INT16_C( -4273), INT16_C(-11699), INT16_C( 15682)), simde_mm256_set_epi16(INT16_C( 1917), INT16_C( -498), INT16_C( -3025), INT16_C( -2638), INT16_C( -2814), INT16_C( 1835), INT16_C( 2821), INT16_C( 851), INT16_C( 2068), INT16_C( -3979), INT16_C( 1595), INT16_C( -4021), INT16_C( 1481), INT16_C( -1069), INT16_C( -2925), INT16_C( 3920)), simde_mm256_set_epi16(INT16_C( 479), INT16_C( -125), INT16_C( -757), INT16_C( -660), INT16_C( -704), INT16_C( 458), INT16_C( 705), INT16_C( 212), INT16_C( 517), INT16_C( -995), INT16_C( 398), INT16_C( -1006), INT16_C( 370), INT16_C( -268), INT16_C( -732), INT16_C( 980)), simde_mm256_set_epi16(INT16_C( 7), INT16_C( -2), INT16_C( -12), INT16_C( -11), INT16_C( -11), INT16_C( 7), INT16_C( 11), INT16_C( 3), INT16_C( 8), INT16_C( -16), INT16_C( 6), INT16_C( -16), INT16_C( 5), INT16_C( -5), INT16_C( -12), INT16_C( 15)), simde_mm256_set_epi16(INT16_C( 1), INT16_C( -1), INT16_C( -3), INT16_C( -3), INT16_C( -3), INT16_C( 1), INT16_C( 2), INT16_C( 0), INT16_C( 2), INT16_C( -4), INT16_C( 1), INT16_C( -4), INT16_C( 1), INT16_C( -2), INT16_C( -3), INT16_C( 3)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0)) }, { simde_mm256_set_epi16(INT16_C( 10638), INT16_C(-17208), INT16_C( -3442), INT16_C( 22577), INT16_C(-18388), INT16_C( 22101), INT16_C( 9949), INT16_C( 9978), INT16_C(-20527), INT16_C( -7864), INT16_C(-12143), INT16_C( -3713), INT16_C( 32473), INT16_C( -9341), INT16_C( -1436), INT16_C( 17837)), simde_mm256_set_epi16(INT16_C( 5319), INT16_C( -8604), INT16_C( -1721), INT16_C( 11288), INT16_C( -9194), INT16_C( 11050), INT16_C( 4974), INT16_C( 4989), INT16_C(-10264), INT16_C( -3932), INT16_C( -6072), INT16_C( -1857), INT16_C( 16236), INT16_C( -4671), INT16_C( -718), INT16_C( 8918)), simde_mm256_set_epi16(INT16_C( 1329), INT16_C( -2151), INT16_C( -431), INT16_C( 2822), INT16_C( -2299), INT16_C( 2762), INT16_C( 1243), INT16_C( 1247), INT16_C( -2566), INT16_C( -983), INT16_C( -1518), INT16_C( -465), INT16_C( 4059), INT16_C( -1168), INT16_C( -180), INT16_C( 2229)), simde_mm256_set_epi16(INT16_C( 332), INT16_C( -538), INT16_C( -108), INT16_C( 705), INT16_C( -575), INT16_C( 690), INT16_C( 310), INT16_C( 311), INT16_C( -642), INT16_C( -246), INT16_C( -380), INT16_C( -117), INT16_C( 1014), INT16_C( -292), INT16_C( -45), INT16_C( 557)), simde_mm256_set_epi16(INT16_C( 5), INT16_C( -9), INT16_C( -2), INT16_C( 11), INT16_C( -9), INT16_C( 10), INT16_C( 4), INT16_C( 4), INT16_C( -11), INT16_C( -4), INT16_C( -6), INT16_C( -2), INT16_C( 15), INT16_C( -5), INT16_C( -1), INT16_C( 8)), simde_mm256_set_epi16(INT16_C( 1), INT16_C( -3), INT16_C( -1), INT16_C( 2), INT16_C( -3), INT16_C( 2), INT16_C( 1), INT16_C( 1), INT16_C( -3), INT16_C( -1), INT16_C( -2), INT16_C( -1), INT16_C( 3), INT16_C( -2), INT16_C( -1), INT16_C( 2)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r; r = simde_mm256_srai_epi16(test_vec[i].a, 0); simde_assert_m256i_i16(r, ==, test_vec[i].a); r = simde_mm256_srai_epi16(test_vec[i].a, 1); simde_assert_m256i_i16(r, ==, test_vec[i].r1); r = simde_mm256_srai_epi16(test_vec[i].a, 3); simde_assert_m256i_i16(r, ==, test_vec[i].r3); r = simde_mm256_srai_epi16(test_vec[i].a, 5); simde_assert_m256i_i16(r, ==, test_vec[i].r5); r = simde_mm256_srai_epi16(test_vec[i].a, 11); simde_assert_m256i_i16(r, ==, test_vec[i].r11); r = simde_mm256_srai_epi16(test_vec[i].a, 13); simde_assert_m256i_i16(r, ==, test_vec[i].r13); r = simde_mm256_srai_epi16(test_vec[i].a, 15); simde_assert_m256i_i16(r, ==, test_vec[i].r15); r = simde_mm256_srai_epi16(test_vec[i].a, 16); simde_assert_m256i_i16(r, ==, test_vec[i].r16); r = simde_mm256_srai_epi16(test_vec[i].a, 24); simde_assert_m256i_i16(r, ==, test_vec[i].r24); } return 0; } static int test_simde_mm256_srai_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i r1; simde__m256i r3; simde__m256i r5; simde__m256i r11; simde__m256i r23; simde__m256i r31; simde__m256i r32; simde__m256i r55; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C( 995924073), INT32_C( 133303991), INT32_C( 1313300859), INT32_C( -54304932), INT32_C( 369936685), INT32_C( 2128713381), INT32_C( -853171060), INT32_C( -927487564)), simde_mm256_set_epi32(INT32_C( 497962036), INT32_C( 66651995), INT32_C( 656650429), INT32_C( -27152466), INT32_C( 184968342), INT32_C( 1064356690), INT32_C( -426585530), INT32_C( -463743782)), simde_mm256_set_epi32(INT32_C( 124490509), INT32_C( 16662998), INT32_C( 164162607), INT32_C( -6788117), INT32_C( 46242085), INT32_C( 266089172), INT32_C( -106646383), INT32_C( -115935946)), simde_mm256_set_epi32(INT32_C( 31122627), INT32_C( 4165749), INT32_C( 41040651), INT32_C( -1697030), INT32_C( 11560521), INT32_C( 66522293), INT32_C( -26661596), INT32_C( -28983987)), simde_mm256_set_epi32(INT32_C( 486291), INT32_C( 65089), INT32_C( 641260), INT32_C( -26517), INT32_C( 180633), INT32_C( 1039410), INT32_C( -416588), INT32_C( -452875)), simde_mm256_set_epi32(INT32_C( 118), INT32_C( 15), INT32_C( 156), INT32_C( -7), INT32_C( 44), INT32_C( 253), INT32_C( -102), INT32_C( -111)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -1)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -1)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -1)) }, { simde_mm256_set_epi32(INT32_C( 1387220160), INT32_C( 164763246), INT32_C( 703126928), INT32_C( 1439391085), INT32_C( 935700824), INT32_C( 425196238), INT32_C( 850841582), INT32_C( -156107623)), simde_mm256_set_epi32(INT32_C( 693610080), INT32_C( 82381623), INT32_C( 351563464), INT32_C( 719695542), INT32_C( 467850412), INT32_C( 212598119), INT32_C( 425420791), INT32_C( -78053812)), simde_mm256_set_epi32(INT32_C( 173402520), INT32_C( 20595405), INT32_C( 87890866), INT32_C( 179923885), INT32_C( 116962603), INT32_C( 53149529), INT32_C( 106355197), INT32_C( -19513453)), simde_mm256_set_epi32(INT32_C( 43350630), INT32_C( 5148851), INT32_C( 21972716), INT32_C( 44980971), INT32_C( 29240650), INT32_C( 13287382), INT32_C( 26588799), INT32_C( -4878364)), simde_mm256_set_epi32(INT32_C( 677353), INT32_C( 80450), INT32_C( 343323), INT32_C( 702827), INT32_C( 456885), INT32_C( 207615), INT32_C( 415449), INT32_C( -76225)), simde_mm256_set_epi32(INT32_C( 165), INT32_C( 19), INT32_C( 83), INT32_C( 171), INT32_C( 111), INT32_C( 50), INT32_C( 101), INT32_C( -19)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -1)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -1)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -1)) }, { simde_mm256_set_epi32(INT32_C( 171807954), INT32_C(-1646033037), INT32_C( 892750248), INT32_C( 447250718), INT32_C(-1494358709), INT32_C( -422405855), INT32_C(-1435924136), INT32_C( -570494622)), simde_mm256_set_epi32(INT32_C( 85903977), INT32_C( -823016519), INT32_C( 446375124), INT32_C( 223625359), INT32_C( -747179355), INT32_C( -211202928), INT32_C( -717962068), INT32_C( -285247311)), simde_mm256_set_epi32(INT32_C( 21475994), INT32_C( -205754130), INT32_C( 111593781), INT32_C( 55906339), INT32_C( -186794839), INT32_C( -52800732), INT32_C( -179490517), INT32_C( -71311828)), simde_mm256_set_epi32(INT32_C( 5368998), INT32_C( -51438533), INT32_C( 27898445), INT32_C( 13976584), INT32_C( -46698710), INT32_C( -13200183), INT32_C( -44872630), INT32_C( -17827957)), simde_mm256_set_epi32(INT32_C( 83890), INT32_C( -803728), INT32_C( 435913), INT32_C( 218384), INT32_C( -729668), INT32_C( -206253), INT32_C( -701135), INT32_C( -278562)), simde_mm256_set_epi32(INT32_C( 20), INT32_C( -197), INT32_C( 106), INT32_C( 53), INT32_C( -179), INT32_C( -51), INT32_C( -172), INT32_C( -69)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( -1)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( -1)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( -1)) }, { simde_mm256_set_epi32(INT32_C(-1316624104), INT32_C( -72407972), INT32_C( 932335479), INT32_C( -558047348), INT32_C( 274582769), INT32_C( -481975449), INT32_C( 1886274506), INT32_C(-1335411735)), simde_mm256_set_epi32(INT32_C( -658312052), INT32_C( -36203986), INT32_C( 466167739), INT32_C( -279023674), INT32_C( 137291384), INT32_C( -240987725), INT32_C( 943137253), INT32_C( -667705868)), simde_mm256_set_epi32(INT32_C( -164578013), INT32_C( -9050997), INT32_C( 116541934), INT32_C( -69755919), INT32_C( 34322846), INT32_C( -60246932), INT32_C( 235784313), INT32_C( -166926467)), simde_mm256_set_epi32(INT32_C( -41144504), INT32_C( -2262750), INT32_C( 29135483), INT32_C( -17438980), INT32_C( 8580711), INT32_C( -15061733), INT32_C( 58946078), INT32_C( -41731617)), simde_mm256_set_epi32(INT32_C( -642883), INT32_C( -35356), INT32_C( 455241), INT32_C( -272485), INT32_C( 134073), INT32_C( -235340), INT32_C( 921032), INT32_C( -652057)), simde_mm256_set_epi32(INT32_C( -157), INT32_C( -9), INT32_C( 111), INT32_C( -67), INT32_C( 32), INT32_C( -58), INT32_C( 224), INT32_C( -160)), simde_mm256_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -1)), simde_mm256_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -1)), simde_mm256_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -1)) }, { simde_mm256_set_epi32(INT32_C(-1894151855), INT32_C(-1367058714), INT32_C( 1361557336), INT32_C(-1223882113), INT32_C( 1306759148), INT32_C( -456028165), INT32_C( 586136994), INT32_C( 1156699576)), simde_mm256_set_epi32(INT32_C( -947075928), INT32_C( -683529357), INT32_C( 680778668), INT32_C( -611941057), INT32_C( 653379574), INT32_C( -228014083), INT32_C( 293068497), INT32_C( 578349788)), simde_mm256_set_epi32(INT32_C( -236768982), INT32_C( -170882340), INT32_C( 170194667), INT32_C( -152985265), INT32_C( 163344893), INT32_C( -57003521), INT32_C( 73267124), INT32_C( 144587447)), simde_mm256_set_epi32(INT32_C( -59192246), INT32_C( -42720585), INT32_C( 42548666), INT32_C( -38246317), INT32_C( 40836223), INT32_C( -14250881), INT32_C( 18316781), INT32_C( 36146861)), simde_mm256_set_epi32(INT32_C( -924879), INT32_C( -667510), INT32_C( 664822), INT32_C( -597599), INT32_C( 638065), INT32_C( -222671), INT32_C( 286199), INT32_C( 564794)), simde_mm256_set_epi32(INT32_C( -226), INT32_C( -163), INT32_C( 162), INT32_C( -146), INT32_C( 155), INT32_C( -55), INT32_C( 69), INT32_C( 137)), simde_mm256_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 0)), simde_mm256_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 0)), simde_mm256_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 0)) }, { simde_mm256_set_epi32(INT32_C( 2006085750), INT32_C( 61416926), INT32_C(-1140358854), INT32_C(-1688269328), INT32_C( 1190046172), INT32_C( 880936845), INT32_C( 1607764696), INT32_C( -178126707)), simde_mm256_set_epi32(INT32_C( 1003042875), INT32_C( 30708463), INT32_C( -570179427), INT32_C( -844134664), INT32_C( 595023086), INT32_C( 440468422), INT32_C( 803882348), INT32_C( -89063354)), simde_mm256_set_epi32(INT32_C( 250760718), INT32_C( 7677115), INT32_C( -142544857), INT32_C( -211033666), INT32_C( 148755771), INT32_C( 110117105), INT32_C( 200970587), INT32_C( -22265839)), simde_mm256_set_epi32(INT32_C( 62690179), INT32_C( 1919278), INT32_C( -35636215), INT32_C( -52758417), INT32_C( 37188942), INT32_C( 27529276), INT32_C( 50242646), INT32_C( -5566460)), simde_mm256_set_epi32(INT32_C( 979534), INT32_C( 29988), INT32_C( -556816), INT32_C( -824351), INT32_C( 581077), INT32_C( 430144), INT32_C( 785041), INT32_C( -86976)), simde_mm256_set_epi32(INT32_C( 239), INT32_C( 7), INT32_C( -136), INT32_C( -202), INT32_C( 141), INT32_C( 105), INT32_C( 191), INT32_C( -22)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -1)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -1)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -1)) }, { simde_mm256_set_epi32(INT32_C( 1005121653), INT32_C(-1585730157), INT32_C(-1475200674), INT32_C( 1479482013), INT32_C( 1084720040), INT32_C( 836731487), INT32_C( 776789662), INT32_C(-1533314427)), simde_mm256_set_epi32(INT32_C( 502560826), INT32_C( -792865079), INT32_C( -737600337), INT32_C( 739741006), INT32_C( 542360020), INT32_C( 418365743), INT32_C( 388394831), INT32_C( -766657214)), simde_mm256_set_epi32(INT32_C( 125640206), INT32_C( -198216270), INT32_C( -184400085), INT32_C( 184935251), INT32_C( 135590005), INT32_C( 104591435), INT32_C( 97098707), INT32_C( -191664304)), simde_mm256_set_epi32(INT32_C( 31410051), INT32_C( -49554068), INT32_C( -46100022), INT32_C( 46233812), INT32_C( 33897501), INT32_C( 26147858), INT32_C( 24274676), INT32_C( -47916076)), simde_mm256_set_epi32(INT32_C( 490782), INT32_C( -774283), INT32_C( -720313), INT32_C( 722403), INT32_C( 529648), INT32_C( 408560), INT32_C( 379291), INT32_C( -748689)), simde_mm256_set_epi32(INT32_C( 119), INT32_C( -190), INT32_C( -176), INT32_C( 176), INT32_C( 129), INT32_C( 99), INT32_C( 92), INT32_C( -183)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -1)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -1)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -1)) }, { simde_mm256_set_epi32(INT32_C( 697220296), INT32_C( -225552335), INT32_C(-1205053867), INT32_C( 652027642), INT32_C(-1345199800), INT32_C( -795741825), INT32_C( 2128206723), INT32_C( -94091859)), simde_mm256_set_epi32(INT32_C( 348610148), INT32_C( -112776168), INT32_C( -602526934), INT32_C( 326013821), INT32_C( -672599900), INT32_C( -397870913), INT32_C( 1064103361), INT32_C( -47045930)), simde_mm256_set_epi32(INT32_C( 87152537), INT32_C( -28194042), INT32_C( -150631734), INT32_C( 81503455), INT32_C( -168149975), INT32_C( -99467729), INT32_C( 266025840), INT32_C( -11761483)), simde_mm256_set_epi32(INT32_C( 21788134), INT32_C( -7048511), INT32_C( -37657934), INT32_C( 20375863), INT32_C( -42037494), INT32_C( -24866933), INT32_C( 66506460), INT32_C( -2940371)), simde_mm256_set_epi32(INT32_C( 340439), INT32_C( -110133), INT32_C( -588406), INT32_C( 318372), INT32_C( -656836), INT32_C( -388546), INT32_C( 1039163), INT32_C( -45944)), simde_mm256_set_epi32(INT32_C( 83), INT32_C( -27), INT32_C( -144), INT32_C( 77), INT32_C( -161), INT32_C( -95), INT32_C( 253), INT32_C( -12)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r; r = simde_mm256_srai_epi32(test_vec[i].a, 0); simde_assert_m256i_i32(r, ==, test_vec[i].a); r = simde_mm256_srai_epi32(test_vec[i].a, 1); simde_assert_m256i_i32(r, ==, test_vec[i].r1); r = simde_mm256_srai_epi32(test_vec[i].a, 3); simde_assert_m256i_i32(r, ==, test_vec[i].r3); r = simde_mm256_srai_epi32(test_vec[i].a, 5); simde_assert_m256i_i32(r, ==, test_vec[i].r5); r = simde_mm256_srai_epi32(test_vec[i].a, 11); simde_assert_m256i_i32(r, ==, test_vec[i].r11); r = simde_mm256_srai_epi32(test_vec[i].a, 23); simde_assert_m256i_i32(r, ==, test_vec[i].r23); r = simde_mm256_srai_epi32(test_vec[i].a, 31); simde_assert_m256i_i32(r, ==, test_vec[i].r31); r = simde_mm256_srai_epi32(test_vec[i].a, 32); simde_assert_m256i_i32(r, ==, test_vec[i].r32); r = simde_mm256_srai_epi32(test_vec[i].a, 55); simde_assert_m256i_i32(r, ==, test_vec[i].r55); } return 0; } static int test_simde_mm_srav_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[4]; const int32_t count[4]; const int32_t r[4]; } test_vec[] = { { { -INT32_C( 690570879), -INT32_C( 1738612706), -INT32_C( 540036542), -INT32_C( 885456606) }, { INT32_C( 0), INT32_C( 4), INT32_C( 14), INT32_C( 2) }, { -INT32_C( 690570879), -INT32_C( 108663295), -INT32_C( 32962), -INT32_C( 221364152) } }, { { INT32_C( 990068252), INT32_C( 1624465950), INT32_C( 910140180), -INT32_C( 1677625180) }, { INT32_C( 7), INT32_C( 10), INT32_C( 13), INT32_C( 8) }, { INT32_C( 7734908), INT32_C( 1586392), INT32_C( 111101), -INT32_C( 6553224) } }, { { INT32_C( 1388512308), INT32_C( 2075301734), -INT32_C( 558763462), INT32_C( 544912234) }, { INT32_C( 10), INT32_C( 8), INT32_C( 13), INT32_C( 0) }, { INT32_C( 1355969), INT32_C( 8106647), -INT32_C( 68209), INT32_C( 544912234) } }, { { -INT32_C( 983752610), -INT32_C( 247459913), INT32_C( 1825566977), -INT32_C( 779335004) }, { INT32_C( 12), INT32_C( 15), INT32_C( 8), INT32_C( 5) }, { -INT32_C( 240174), -INT32_C( 7552), INT32_C( 7131121), -INT32_C( 24354219) } }, { { -INT32_C( 132257471), -INT32_C( 85369352), -INT32_C( 211371698), INT32_C( 1757737475) }, { INT32_C( 10), INT32_C( 10), INT32_C( 0), INT32_C( 11) }, { -INT32_C( 129158), -INT32_C( 83369), -INT32_C( 211371698), INT32_C( 858270) } }, { { -INT32_C( 1665761628), INT32_C( 697737435), INT32_C( 1545403481), INT32_C( 180675054) }, { INT32_C( 2), INT32_C( 6), INT32_C( 1), INT32_C( 5) }, { -INT32_C( 416440407), INT32_C( 10902147), INT32_C( 772701740), INT32_C( 5646095) } }, { { INT32_C( 821596501), -INT32_C( 1973842127), INT32_C( 2061923979), INT32_C( 1938074199) }, { INT32_C( 15), INT32_C( 1), INT32_C( 1), INT32_C( 0) }, { INT32_C( 25073), -INT32_C( 986921064), INT32_C( 1030961989), INT32_C( 1938074199) } }, { { -INT32_C( 717985117), -INT32_C( 2057335047), INT32_C( 1543456260), -INT32_C( 1999731728) }, { INT32_C( 3), INT32_C( 2), INT32_C( 6), INT32_C( 3) }, { -INT32_C( 89748140), -INT32_C( 514333762), INT32_C( 24116504), -INT32_C( 249966466) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi32(test_vec[i].a); simde__m128i count = simde_x_mm_loadu_epi32(test_vec[i].count); simde__m128i r = simde_mm_srav_epi32(a, count); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm256_srav_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[8]; const int32_t count[8]; const int32_t r[8]; } test_vec[] = { { { -INT32_C( 492051327), -INT32_C( 510825512), -INT32_C( 762777342), INT32_C( 1785481283), -INT32_C( 543476496), -INT32_C( 286322968), -INT32_C( 1946042097), -INT32_C( 254994066) }, { INT32_C( 13), INT32_C( 14), INT32_C( 9), INT32_C( 7), INT32_C( 4), INT32_C( 11), INT32_C( 7), INT32_C( 4) }, { -INT32_C( 60065), -INT32_C( 31179), -INT32_C( 1489800), INT32_C( 13949072), -INT32_C( 33967281), -INT32_C( 139807), -INT32_C( 15203454), -INT32_C( 15937130) } }, { { INT32_C( 1813919881), -INT32_C( 397224553), -INT32_C( 1669658347), -INT32_C( 2045478611), -INT32_C( 1178113262), -INT32_C( 1183779151), INT32_C( 1882174246), INT32_C( 1287165379) }, { INT32_C( 6), INT32_C( 12), INT32_C( 4), INT32_C( 5), INT32_C( 8), INT32_C( 10), INT32_C( 15), INT32_C( 13) }, { INT32_C( 28342498), -INT32_C( 96979), -INT32_C( 104353647), -INT32_C( 63921207), -INT32_C( 4602005), -INT32_C( 1156035), INT32_C( 57439), INT32_C( 157124) } }, { { INT32_C( 1463679147), INT32_C( 404261379), -INT32_C( 1822788270), -INT32_C( 974773529), INT32_C( 911536817), -INT32_C( 1629381521), INT32_C( 529661203), INT32_C( 2113458642) }, { INT32_C( 12), INT32_C( 1), INT32_C( 4), INT32_C( 1), INT32_C( 14), INT32_C( 1), INT32_C( 15), INT32_C( 11) }, { INT32_C( 357343), INT32_C( 202130689), -INT32_C( 113924267), -INT32_C( 487386765), INT32_C( 55635), -INT32_C( 814690761), INT32_C( 16163), INT32_C( 1031962) } }, { { INT32_C( 495143517), -INT32_C( 1758566346), INT32_C( 148951975), -INT32_C( 635769251), INT32_C( 1321624144), -INT32_C( 1285598836), INT32_C( 1521258349), INT32_C( 353378999) }, { INT32_C( 10), INT32_C( 10), INT32_C( 9), INT32_C( 5), INT32_C( 11), INT32_C( 15), INT32_C( 4), INT32_C( 10) }, { INT32_C( 483538), -INT32_C( 1717350), INT32_C( 290921), -INT32_C( 19867790), INT32_C( 645324), -INT32_C( 39234), INT32_C( 95078646), INT32_C( 345096) } }, { { -INT32_C( 347826706), INT32_C( 697270005), -INT32_C( 1598407784), INT32_C( 754525442), -INT32_C( 622747305), INT32_C( 1053444776), -INT32_C( 1785981858), INT32_C( 1563946863) }, { INT32_C( 7), INT32_C( 9), INT32_C( 1), INT32_C( 13), INT32_C( 7), INT32_C( 12), INT32_C( 0), INT32_C( 11) }, { -INT32_C( 2717397), INT32_C( 1361855), -INT32_C( 799203892), INT32_C( 92105), -INT32_C( 4865214), INT32_C( 257188), -INT32_C( 1785981858), INT32_C( 763645) } }, { { INT32_C( 237938708), -INT32_C( 1818177152), -INT32_C( 1331113426), -INT32_C( 880088322), INT32_C( 2051030797), INT32_C( 1363539134), INT32_C( 1666491956), INT32_C( 1377285182) }, { INT32_C( 13), INT32_C( 4), INT32_C( 13), INT32_C( 0), INT32_C( 7), INT32_C( 0), INT32_C( 11), INT32_C( 3) }, { INT32_C( 29045), -INT32_C( 113636072), -INT32_C( 162490), -INT32_C( 880088322), INT32_C( 16023678), INT32_C( 1363539134), INT32_C( 813716), INT32_C( 172160647) } }, { { -INT32_C( 602970939), INT32_C( 924876388), -INT32_C( 1744300650), INT32_C( 1403495573), INT32_C( 1389375617), INT32_C( 1935122175), -INT32_C( 1775845184), INT32_C( 97004096) }, { INT32_C( 15), INT32_C( 14), INT32_C( 10), INT32_C( 5), INT32_C( 5), INT32_C( 2), INT32_C( 0), INT32_C( 10) }, { -INT32_C( 18402), INT32_C( 56449), -INT32_C( 1703419), INT32_C( 43859236), INT32_C( 43417988), INT32_C( 483780543), -INT32_C( 1775845184), INT32_C( 94730) } }, { { -INT32_C( 1163728540), INT32_C( 61263339), -INT32_C( 693163009), -INT32_C( 1255166120), -INT32_C( 317597057), -INT32_C( 1625562916), -INT32_C( 157163562), -INT32_C( 794449556) }, { INT32_C( 7), INT32_C( 1), INT32_C( 11), INT32_C( 6), INT32_C( 13), INT32_C( 3), INT32_C( 11), INT32_C( 4) }, { -INT32_C( 9091630), INT32_C( 30631669), -INT32_C( 338459), -INT32_C( 19611971), -INT32_C( 38770), -INT32_C( 203195365), -INT32_C( 76741), -INT32_C( 49653098) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi32(test_vec[i].a); simde__m256i count = simde_x_mm256_loadu_epi32(test_vec[i].count); simde__m256i r = simde_mm256_srav_epi32(a, count); simde_test_x86_assert_equal_i32x8(r, simde_x_mm256_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm256_srl_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[16]; const int64_t count[2]; const int16_t r[16]; } test_vec[] = { { { -INT16_C( 22997), -INT16_C( 654), INT16_C( 4109), -INT16_C( 4930), INT16_C( 21238), -INT16_C( 24602), -INT16_C( 1996), -INT16_C( 25685), -INT16_C( 13301), INT16_C( 1794), -INT16_C( 3246), INT16_C( 8293), INT16_C( 10874), -INT16_C( 19744), INT16_C( 24589), INT16_C( 14500) }, { INT64_C( 1), INT64_C( 14) }, { INT16_C( 21269), INT16_C( 32441), INT16_C( 2054), INT16_C( 30303), INT16_C( 10619), INT16_C( 20467), INT16_C( 31770), INT16_C( 19925), INT16_C( 26117), INT16_C( 897), INT16_C( 31145), INT16_C( 4146), INT16_C( 5437), INT16_C( 22896), INT16_C( 12294), INT16_C( 7250) } }, { { INT16_C( 6196), -INT16_C( 30991), INT16_C( 22027), -INT16_C( 31322), -INT16_C( 30847), -INT16_C( 29129), -INT16_C( 8985), -INT16_C( 4409), -INT16_C( 525), INT16_C( 6659), INT16_C( 1009), INT16_C( 14136), -INT16_C( 2582), -INT16_C( 13902), -INT16_C( 14242), -INT16_C( 27981) }, { INT64_C( 7), INT64_C( 7) }, { INT16_C( 48), INT16_C( 269), INT16_C( 172), INT16_C( 267), INT16_C( 271), INT16_C( 284), INT16_C( 441), INT16_C( 477), INT16_C( 507), INT16_C( 52), INT16_C( 7), INT16_C( 110), INT16_C( 491), INT16_C( 403), INT16_C( 400), INT16_C( 293) } }, { { INT16_C( 8143), -INT16_C( 16239), -INT16_C( 14045), INT16_C( 3575), -INT16_C( 22081), INT16_C( 7638), -INT16_C( 30351), INT16_C( 20911), -INT16_C( 14290), INT16_C( 10812), -INT16_C( 21368), -INT16_C( 12634), -INT16_C( 20140), -INT16_C( 10244), INT16_C( 6531), INT16_C( 21070) }, { INT64_C( 6), INT64_C( 15) }, { INT16_C( 127), INT16_C( 770), INT16_C( 804), INT16_C( 55), INT16_C( 678), INT16_C( 119), INT16_C( 549), INT16_C( 326), INT16_C( 800), INT16_C( 168), INT16_C( 690), INT16_C( 826), INT16_C( 709), INT16_C( 863), INT16_C( 102), INT16_C( 329) } }, { { -INT16_C( 19460), -INT16_C( 31712), -INT16_C( 14497), -INT16_C( 19629), INT16_C( 20344), -INT16_C( 885), -INT16_C( 9880), -INT16_C( 24242), INT16_C( 25016), INT16_C( 24828), INT16_C( 25963), INT16_C( 7880), INT16_C( 19619), INT16_C( 27459), -INT16_C( 17792), INT16_C( 32098) }, { INT64_C( 12), INT64_C( 9) }, { INT16_C( 11), INT16_C( 8), INT16_C( 12), INT16_C( 11), INT16_C( 4), INT16_C( 15), INT16_C( 13), INT16_C( 10), INT16_C( 6), INT16_C( 6), INT16_C( 6), INT16_C( 1), INT16_C( 4), INT16_C( 6), INT16_C( 11), INT16_C( 7) } }, { { -INT16_C( 21907), -INT16_C( 9988), -INT16_C( 15345), -INT16_C( 19721), INT16_C( 14865), -INT16_C( 28386), -INT16_C( 32524), INT16_C( 24846), INT16_C( 4098), INT16_C( 19502), -INT16_C( 20892), INT16_C( 2062), -INT16_C( 13127), -INT16_C( 25323), -INT16_C( 15656), INT16_C( 17977) }, { INT64_C( 0), INT64_C( 12) }, { -INT16_C( 21907), -INT16_C( 9988), -INT16_C( 15345), -INT16_C( 19721), INT16_C( 14865), -INT16_C( 28386), -INT16_C( 32524), INT16_C( 24846), INT16_C( 4098), INT16_C( 19502), -INT16_C( 20892), INT16_C( 2062), -INT16_C( 13127), -INT16_C( 25323), -INT16_C( 15656), INT16_C( 17977) } }, { { -INT16_C( 11077), INT16_C( 7962), INT16_C( 10370), INT16_C( 15144), INT16_C( 15860), -INT16_C( 12840), INT16_C( 4607), INT16_C( 27667), INT16_C( 12614), INT16_C( 16615), INT16_C( 5447), -INT16_C( 26805), -INT16_C( 6047), INT16_C( 11739), -INT16_C( 32365), INT16_C( 20220) }, { INT64_C( 3), INT64_C( 4) }, { INT16_C( 6807), INT16_C( 995), INT16_C( 1296), INT16_C( 1893), INT16_C( 1982), INT16_C( 6587), INT16_C( 575), INT16_C( 3458), INT16_C( 1576), INT16_C( 2076), INT16_C( 680), INT16_C( 4841), INT16_C( 7436), INT16_C( 1467), INT16_C( 4146), INT16_C( 2527) } }, { { INT16_C( 9541), -INT16_C( 29566), -INT16_C( 12741), -INT16_C( 25565), -INT16_C( 330), INT16_C( 18890), -INT16_C( 14720), -INT16_C( 10857), INT16_C( 1244), INT16_C( 7085), -INT16_C( 16486), INT16_C( 27727), INT16_C( 20394), -INT16_C( 23234), INT16_C( 31843), -INT16_C( 22297) }, { INT64_C( 14), INT64_C( 14) }, { INT16_C( 0), INT16_C( 2), INT16_C( 3), INT16_C( 2), INT16_C( 3), INT16_C( 1), INT16_C( 3), INT16_C( 3), INT16_C( 0), INT16_C( 0), INT16_C( 2), INT16_C( 1), INT16_C( 1), INT16_C( 2), INT16_C( 1), INT16_C( 2) } }, { { INT16_C( 22738), INT16_C( 27649), INT16_C( 20503), -INT16_C( 15911), INT16_C( 6048), INT16_C( 871), INT16_C( 20116), INT16_C( 13995), -INT16_C( 8264), -INT16_C( 4077), -INT16_C( 29642), -INT16_C( 29730), INT16_C( 5584), -INT16_C( 9887), INT16_C( 3299), -INT16_C( 18753) }, { INT64_C( 11), INT64_C( 10) }, { INT16_C( 11), INT16_C( 13), INT16_C( 10), INT16_C( 24), INT16_C( 2), INT16_C( 0), INT16_C( 9), INT16_C( 6), INT16_C( 27), INT16_C( 30), INT16_C( 17), INT16_C( 17), INT16_C( 2), INT16_C( 27), INT16_C( 1), INT16_C( 22) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi16(test_vec[i].a); simde__m128i count = simde_x_mm_loadu_epi64(test_vec[i].count); simde__m256i r = simde_mm256_srl_epi16(a, count); simde_test_x86_assert_equal_i16x16(r, simde_x_mm256_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm256_srl_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[8]; const int64_t count[2]; const int32_t r[8]; } test_vec[] = { { { INT32_C( 462342934), -INT32_C( 1870121018), INT32_C( 554160944), -INT32_C( 694160714), INT32_C( 612945077), -INT32_C( 1258241143), INT32_C( 228605294), INT32_C( 1633628746) }, { INT64_C( 10), INT64_C( 0) }, { INT32_C( 451506), INT32_C( 2368013), INT32_C( 541172), INT32_C( 3516412), INT32_C( 598579), INT32_C( 2965552), INT32_C( 223247), INT32_C( 1595340) } }, { { INT32_C( 1814555874), INT32_C( 488712111), -INT32_C( 1372929436), -INT32_C( 250639628), -INT32_C( 1649112200), INT32_C( 1743980431), -INT32_C( 1342936734), INT32_C( 1655789695) }, { INT64_C( 12), INT64_C( 7) }, { INT32_C( 443006), INT32_C( 119314), INT32_C( 713388), INT32_C( 987384), INT32_C( 645960), INT32_C( 425776), INT32_C( 720710), INT32_C( 404245) } }, { { INT32_C( 1020938925), -INT32_C( 1264333486), -INT32_C( 1234986953), INT32_C( 890770672), -INT32_C( 332863763), -INT32_C( 2024785450), INT32_C( 959275636), INT32_C( 813089667) }, { INT64_C( 10), INT64_C( 15) }, { INT32_C( 997010), INT32_C( 2959603), INT32_C( 2988262), INT32_C( 869893), INT32_C( 3869241), INT32_C( 2216974), INT32_C( 936792), INT32_C( 794032) } }, { { -INT32_C( 530188790), -INT32_C( 1553418961), -INT32_C( 1696819945), INT32_C( 1724600920), -INT32_C( 1043973981), -INT32_C( 250160568), -INT32_C( 578150063), INT32_C( 1347770437) }, { INT64_C( 16), INT64_C( 30) }, { INT32_C( 57445), INT32_C( 41832), INT32_C( 39644), INT32_C( 26315), INT32_C( 49606), INT32_C( 61718), INT32_C( 56714), INT32_C( 20565) } }, { { INT32_C( 1823781668), -INT32_C( 547501427), INT32_C( 834463724), -INT32_C( 1904144065), INT32_C( 990687948), INT32_C( 2025991498), INT32_C( 2097079083), -INT32_C( 210769201) }, { INT64_C( 27), INT64_C( 20) }, { INT32_C( 13), INT32_C( 27), INT32_C( 6), INT32_C( 17), INT32_C( 7), INT32_C( 15), INT32_C( 15), INT32_C( 30) } }, { { -INT32_C( 1965064641), -INT32_C( 687693397), INT32_C( 1397948548), -INT32_C( 2008628502), -INT32_C( 709646874), INT32_C( 128957794), INT32_C( 1525398403), -INT32_C( 1996595895) }, { INT64_C( 0), INT64_C( 11) }, { -INT32_C( 1965064641), -INT32_C( 687693397), INT32_C( 1397948548), -INT32_C( 2008628502), -INT32_C( 709646874), INT32_C( 128957794), INT32_C( 1525398403), -INT32_C( 1996595895) } }, { { -INT32_C( 1557251520), INT32_C( 2058018551), -INT32_C( 489384551), -INT32_C( 848571662), INT32_C( 760446383), -INT32_C( 1506823790), -INT32_C( 727350175), INT32_C( 1613638687) }, { INT64_C( 26), INT64_C( 5) }, { INT32_C( 40), INT32_C( 30), INT32_C( 56), INT32_C( 51), INT32_C( 11), INT32_C( 41), INT32_C( 53), INT32_C( 24) } }, { { INT32_C( 828069023), INT32_C( 1742179077), INT32_C( 792427791), INT32_C( 277834153), INT32_C( 7180998), -INT32_C( 2116795073), INT32_C( 1874168817), INT32_C( 1268627116) }, { INT64_C( 18), INT64_C( 28) }, { INT32_C( 3158), INT32_C( 6645), INT32_C( 3022), INT32_C( 1059), INT32_C( 27), INT32_C( 8309), INT32_C( 7149), INT32_C( 4839) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi32(test_vec[i].a); simde__m128i count = simde_x_mm_loadu_epi64(test_vec[i].count); simde__m256i r = simde_mm256_srl_epi32(a, count); simde_test_x86_assert_equal_i32x8(r, simde_x_mm256_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm256_srl_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t a[4]; const int64_t count[2]; const int64_t r[4]; } test_vec[] = { { { -INT64_C( 5615524349952248035), -INT64_C( 169181141632837383), -INT64_C( 2156056899313244590), INT64_C( 2210795734852214606) }, { INT64_C( 53), INT64_C( 9) }, { INT64_C( 1424), INT64_C( 2029), INT64_C( 1808), INT64_C( 245) } }, { { -INT64_C( 1487990399803557942), INT64_C( 529040453150517560), -INT64_C( 2488935950564090945), INT64_C( 6943142796168475236) }, { INT64_C( 43), INT64_C( 7) }, { INT64_C( 1927987), INT64_C( 60144), INT64_C( 1814192), INT64_C( 789343) } }, { { -INT64_C( 5357257909153707046), -INT64_C( 8261214601456445852), -INT64_C( 5095646641614933725), INT64_C( 5258072887802146680) }, { INT64_C( 18), INT64_C( 36) }, { INT64_C( 49932427080367), INT64_C( 38854711426746), INT64_C( 50930394867304), INT64_C( 20057956267555) } }, { { INT64_C( 3236806875457131729), INT64_C( 2330891980307266465), -INT64_C( 800261788893942862), -INT64_C( 7202296167703893490) }, { INT64_C( 18), INT64_C( 61) }, { INT64_C( 12347438337162), INT64_C( 8891647263745), INT64_C( 67315987719786), INT64_C( 42894164680502) } }, { { -INT64_C( 2722861607866604259), -INT64_C( 4610170485910138466), -INT64_C( 4356362314793495399), -INT64_C( 5361502814979693218) }, { INT64_C( 14), INT64_C( 62) }, { INT64_C( 959709623159359), INT64_C( 844517430895960), INT64_C( 860008652277591), INT64_C( 798659744795523) } }, { { -INT64_C( 6211371764766002309), -INT64_C( 3676656067685929812), INT64_C( 6836255463078293771), -INT64_C( 6585853469746560704) }, { INT64_C( 17), INT64_C( 46) }, { INT64_C( 93348482581661), INT64_C( 112686828659237), INT64_C( 52156490044237), INT64_C( 90491413909629) } }, { { -INT64_C( 2813834318091037066), -INT64_C( 3202319610422890814), -INT64_C( 2320715310519665176), INT64_C( 5863278639856965364) }, { INT64_C( 48), INT64_C( 49) }, { INT64_C( 55539), INT64_C( 54159), INT64_C( 57291), INT64_C( 20830) } }, { { -INT64_C( 6120366921829610119), INT64_C( 5663747587464683628), INT64_C( 2390317772124553323), -INT64_C( 8174011239875094039) }, { INT64_C( 23), INT64_C( 26) }, { INT64_C( 1469418663010), INT64_C( 675171326096), INT64_C( 284948083415), INT64_C( 1224605182866) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi64(test_vec[i].a); simde__m128i count = simde_x_mm_loadu_epi64(test_vec[i].count); simde__m256i r = simde_mm256_srl_epi64(a, count); simde_test_x86_assert_equal_i64x4(r, simde_x_mm256_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm256_srli_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i r1; simde__m256i r3; simde__m256i r5; simde__m256i r11; simde__m256i r13; simde__m256i r15; simde__m256i r16; simde__m256i r24; } test_vec[8] = { { simde_mm256_set_epi16(INT16_C(-13208), INT16_C( 32518), INT16_C(-12083), INT16_C( -4650), INT16_C( 32616), INT16_C(-23415), INT16_C(-12219), INT16_C(-11043), INT16_C( 17138), INT16_C( 18141), INT16_C( 29257), INT16_C(-17957), INT16_C( -2929), INT16_C(-12343), INT16_C( -8291), INT16_C(-11958)), simde_mm256_set_epi16(INT16_C( 26164), INT16_C( 16259), INT16_C( 26726), INT16_C( 30443), INT16_C( 16308), INT16_C( 21060), INT16_C( 26658), INT16_C( 27246), INT16_C( 8569), INT16_C( 9070), INT16_C( 14628), INT16_C( 23789), INT16_C( 31303), INT16_C( 26596), INT16_C( 28622), INT16_C( 26789)), simde_mm256_set_epi16(INT16_C( 6541), INT16_C( 4064), INT16_C( 6681), INT16_C( 7610), INT16_C( 4077), INT16_C( 5265), INT16_C( 6664), INT16_C( 6811), INT16_C( 2142), INT16_C( 2267), INT16_C( 3657), INT16_C( 5947), INT16_C( 7825), INT16_C( 6649), INT16_C( 7155), INT16_C( 6697)), simde_mm256_set_epi16(INT16_C( 1635), INT16_C( 1016), INT16_C( 1670), INT16_C( 1902), INT16_C( 1019), INT16_C( 1316), INT16_C( 1666), INT16_C( 1702), INT16_C( 535), INT16_C( 566), INT16_C( 914), INT16_C( 1486), INT16_C( 1956), INT16_C( 1662), INT16_C( 1788), INT16_C( 1674)), simde_mm256_set_epi16(INT16_C( 25), INT16_C( 15), INT16_C( 26), INT16_C( 29), INT16_C( 15), INT16_C( 20), INT16_C( 26), INT16_C( 26), INT16_C( 8), INT16_C( 8), INT16_C( 14), INT16_C( 23), INT16_C( 30), INT16_C( 25), INT16_C( 27), INT16_C( 26)), simde_mm256_set_epi16(INT16_C( 6), INT16_C( 3), INT16_C( 6), INT16_C( 7), INT16_C( 3), INT16_C( 5), INT16_C( 6), INT16_C( 6), INT16_C( 2), INT16_C( 2), INT16_C( 3), INT16_C( 5), INT16_C( 7), INT16_C( 6), INT16_C( 6), INT16_C( 6)), simde_mm256_set_epi16(INT16_C( 1), INT16_C( 0), INT16_C( 1), INT16_C( 1), INT16_C( 0), INT16_C( 1), INT16_C( 1), INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 1), INT16_C( 1), INT16_C( 1), INT16_C( 1)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm256_set_epi16(INT16_C( 9810), INT16_C( 24519), INT16_C(-20641), INT16_C( 29186), INT16_C(-23300), INT16_C( -6682), INT16_C(-18375), INT16_C( 30920), INT16_C( 29283), INT16_C( 14293), INT16_C( -6612), INT16_C( 11040), INT16_C(-31748), INT16_C( -6890), INT16_C( 12929), INT16_C(-16870)), simde_mm256_set_epi16(INT16_C( 4905), INT16_C( 12259), INT16_C( 22447), INT16_C( 14593), INT16_C( 21118), INT16_C( 29427), INT16_C( 23580), INT16_C( 15460), INT16_C( 14641), INT16_C( 7146), INT16_C( 29462), INT16_C( 5520), INT16_C( 16894), INT16_C( 29323), INT16_C( 6464), INT16_C( 24333)), simde_mm256_set_epi16(INT16_C( 1226), INT16_C( 3064), INT16_C( 5611), INT16_C( 3648), INT16_C( 5279), INT16_C( 7356), INT16_C( 5895), INT16_C( 3865), INT16_C( 3660), INT16_C( 1786), INT16_C( 7365), INT16_C( 1380), INT16_C( 4223), INT16_C( 7330), INT16_C( 1616), INT16_C( 6083)), simde_mm256_set_epi16(INT16_C( 306), INT16_C( 766), INT16_C( 1402), INT16_C( 912), INT16_C( 1319), INT16_C( 1839), INT16_C( 1473), INT16_C( 966), INT16_C( 915), INT16_C( 446), INT16_C( 1841), INT16_C( 345), INT16_C( 1055), INT16_C( 1832), INT16_C( 404), INT16_C( 1520)), simde_mm256_set_epi16(INT16_C( 4), INT16_C( 11), INT16_C( 21), INT16_C( 14), INT16_C( 20), INT16_C( 28), INT16_C( 23), INT16_C( 15), INT16_C( 14), INT16_C( 6), INT16_C( 28), INT16_C( 5), INT16_C( 16), INT16_C( 28), INT16_C( 6), INT16_C( 23)), simde_mm256_set_epi16(INT16_C( 1), INT16_C( 2), INT16_C( 5), INT16_C( 3), INT16_C( 5), INT16_C( 7), INT16_C( 5), INT16_C( 3), INT16_C( 3), INT16_C( 1), INT16_C( 7), INT16_C( 1), INT16_C( 4), INT16_C( 7), INT16_C( 1), INT16_C( 5)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( 1), INT16_C( 1), INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( 1), INT16_C( 1), INT16_C( 0), INT16_C( 1)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm256_set_epi16(INT16_C( 4687), INT16_C( -4828), INT16_C( 9674), INT16_C( 8229), INT16_C(-28519), INT16_C( 24429), INT16_C(-25708), INT16_C(-15646), INT16_C( 27606), INT16_C( -993), INT16_C( 27866), INT16_C(-11890), INT16_C( 25757), INT16_C( -1957), INT16_C( 24727), INT16_C(-30230)), simde_mm256_set_epi16(INT16_C( 2343), INT16_C( 30354), INT16_C( 4837), INT16_C( 4114), INT16_C( 18508), INT16_C( 12214), INT16_C( 19914), INT16_C( 24945), INT16_C( 13803), INT16_C( 32271), INT16_C( 13933), INT16_C( 26823), INT16_C( 12878), INT16_C( 31789), INT16_C( 12363), INT16_C( 17653)), simde_mm256_set_epi16(INT16_C( 585), INT16_C( 7588), INT16_C( 1209), INT16_C( 1028), INT16_C( 4627), INT16_C( 3053), INT16_C( 4978), INT16_C( 6236), INT16_C( 3450), INT16_C( 8067), INT16_C( 3483), INT16_C( 6705), INT16_C( 3219), INT16_C( 7947), INT16_C( 3090), INT16_C( 4413)), simde_mm256_set_epi16(INT16_C( 146), INT16_C( 1897), INT16_C( 302), INT16_C( 257), INT16_C( 1156), INT16_C( 763), INT16_C( 1244), INT16_C( 1559), INT16_C( 862), INT16_C( 2016), INT16_C( 870), INT16_C( 1676), INT16_C( 804), INT16_C( 1986), INT16_C( 772), INT16_C( 1103)), simde_mm256_set_epi16(INT16_C( 2), INT16_C( 29), INT16_C( 4), INT16_C( 4), INT16_C( 18), INT16_C( 11), INT16_C( 19), INT16_C( 24), INT16_C( 13), INT16_C( 31), INT16_C( 13), INT16_C( 26), INT16_C( 12), INT16_C( 31), INT16_C( 12), INT16_C( 17)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 7), INT16_C( 1), INT16_C( 1), INT16_C( 4), INT16_C( 2), INT16_C( 4), INT16_C( 6), INT16_C( 3), INT16_C( 7), INT16_C( 3), INT16_C( 6), INT16_C( 3), INT16_C( 7), INT16_C( 3), INT16_C( 4)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( 1), INT16_C( 1), INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( 1)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm256_set_epi16(INT16_C( 16592), INT16_C( -9654), INT16_C( -8076), INT16_C( 10592), INT16_C( 20644), INT16_C( 25911), INT16_C( -1061), INT16_C( 18172), INT16_C( 22556), INT16_C(-19191), INT16_C( 28031), INT16_C( -883), INT16_C( 5347), INT16_C( -3724), INT16_C(-32544), INT16_C(-24989)), simde_mm256_set_epi16(INT16_C( 8296), INT16_C( 27941), INT16_C( 28730), INT16_C( 5296), INT16_C( 10322), INT16_C( 12955), INT16_C( 32237), INT16_C( 9086), INT16_C( 11278), INT16_C( 23172), INT16_C( 14015), INT16_C( 32326), INT16_C( 2673), INT16_C( 30906), INT16_C( 16496), INT16_C( 20273)), simde_mm256_set_epi16(INT16_C( 2074), INT16_C( 6985), INT16_C( 7182), INT16_C( 1324), INT16_C( 2580), INT16_C( 3238), INT16_C( 8059), INT16_C( 2271), INT16_C( 2819), INT16_C( 5793), INT16_C( 3503), INT16_C( 8081), INT16_C( 668), INT16_C( 7726), INT16_C( 4124), INT16_C( 5068)), simde_mm256_set_epi16(INT16_C( 518), INT16_C( 1746), INT16_C( 1795), INT16_C( 331), INT16_C( 645), INT16_C( 809), INT16_C( 2014), INT16_C( 567), INT16_C( 704), INT16_C( 1448), INT16_C( 875), INT16_C( 2020), INT16_C( 167), INT16_C( 1931), INT16_C( 1031), INT16_C( 1267)), simde_mm256_set_epi16(INT16_C( 8), INT16_C( 27), INT16_C( 28), INT16_C( 5), INT16_C( 10), INT16_C( 12), INT16_C( 31), INT16_C( 8), INT16_C( 11), INT16_C( 22), INT16_C( 13), INT16_C( 31), INT16_C( 2), INT16_C( 30), INT16_C( 16), INT16_C( 19)), simde_mm256_set_epi16(INT16_C( 2), INT16_C( 6), INT16_C( 7), INT16_C( 1), INT16_C( 2), INT16_C( 3), INT16_C( 7), INT16_C( 2), INT16_C( 2), INT16_C( 5), INT16_C( 3), INT16_C( 7), INT16_C( 0), INT16_C( 7), INT16_C( 4), INT16_C( 4)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 1), INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( 1), INT16_C( 1), INT16_C( 1)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm256_set_epi16(INT16_C( -4839), INT16_C( 3174), INT16_C( 7509), INT16_C( 28795), INT16_C( -1732), INT16_C(-26609), INT16_C(-11656), INT16_C( 3035), INT16_C(-10865), INT16_C( 2405), INT16_C( 29471), INT16_C( 19828), INT16_C( 29576), INT16_C( 23078), INT16_C( 11200), INT16_C( 26322)), simde_mm256_set_epi16(INT16_C( 30348), INT16_C( 1587), INT16_C( 3754), INT16_C( 14397), INT16_C( 31902), INT16_C( 19463), INT16_C( 26940), INT16_C( 1517), INT16_C( 27335), INT16_C( 1202), INT16_C( 14735), INT16_C( 9914), INT16_C( 14788), INT16_C( 11539), INT16_C( 5600), INT16_C( 13161)), simde_mm256_set_epi16(INT16_C( 7587), INT16_C( 396), INT16_C( 938), INT16_C( 3599), INT16_C( 7975), INT16_C( 4865), INT16_C( 6735), INT16_C( 379), INT16_C( 6833), INT16_C( 300), INT16_C( 3683), INT16_C( 2478), INT16_C( 3697), INT16_C( 2884), INT16_C( 1400), INT16_C( 3290)), simde_mm256_set_epi16(INT16_C( 1896), INT16_C( 99), INT16_C( 234), INT16_C( 899), INT16_C( 1993), INT16_C( 1216), INT16_C( 1683), INT16_C( 94), INT16_C( 1708), INT16_C( 75), INT16_C( 920), INT16_C( 619), INT16_C( 924), INT16_C( 721), INT16_C( 350), INT16_C( 822)), simde_mm256_set_epi16(INT16_C( 29), INT16_C( 1), INT16_C( 3), INT16_C( 14), INT16_C( 31), INT16_C( 19), INT16_C( 26), INT16_C( 1), INT16_C( 26), INT16_C( 1), INT16_C( 14), INT16_C( 9), INT16_C( 14), INT16_C( 11), INT16_C( 5), INT16_C( 12)), simde_mm256_set_epi16(INT16_C( 7), INT16_C( 0), INT16_C( 0), INT16_C( 3), INT16_C( 7), INT16_C( 4), INT16_C( 6), INT16_C( 0), INT16_C( 6), INT16_C( 0), INT16_C( 3), INT16_C( 2), INT16_C( 3), INT16_C( 2), INT16_C( 1), INT16_C( 3)), simde_mm256_set_epi16(INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 1), INT16_C( 1), INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm256_set_epi16(INT16_C(-25851), INT16_C( 6707), INT16_C(-23633), INT16_C( -4351), INT16_C( -641), INT16_C(-22303), INT16_C( 6727), INT16_C( 9129), INT16_C( 1286), INT16_C(-28152), INT16_C(-22922), INT16_C( -950), INT16_C( -1798), INT16_C(-15465), INT16_C( 910), INT16_C(-23243)), simde_mm256_set_epi16(INT16_C( 19842), INT16_C( 3353), INT16_C( 20951), INT16_C( 30592), INT16_C( 32447), INT16_C( 21616), INT16_C( 3363), INT16_C( 4564), INT16_C( 643), INT16_C( 18692), INT16_C( 21307), INT16_C( 32293), INT16_C( 31869), INT16_C( 25035), INT16_C( 455), INT16_C( 21146)), simde_mm256_set_epi16(INT16_C( 4960), INT16_C( 838), INT16_C( 5237), INT16_C( 7648), INT16_C( 8111), INT16_C( 5404), INT16_C( 840), INT16_C( 1141), INT16_C( 160), INT16_C( 4673), INT16_C( 5326), INT16_C( 8073), INT16_C( 7967), INT16_C( 6258), INT16_C( 113), INT16_C( 5286)), simde_mm256_set_epi16(INT16_C( 1240), INT16_C( 209), INT16_C( 1309), INT16_C( 1912), INT16_C( 2027), INT16_C( 1351), INT16_C( 210), INT16_C( 285), INT16_C( 40), INT16_C( 1168), INT16_C( 1331), INT16_C( 2018), INT16_C( 1991), INT16_C( 1564), INT16_C( 28), INT16_C( 1321)), simde_mm256_set_epi16(INT16_C( 19), INT16_C( 3), INT16_C( 20), INT16_C( 29), INT16_C( 31), INT16_C( 21), INT16_C( 3), INT16_C( 4), INT16_C( 0), INT16_C( 18), INT16_C( 20), INT16_C( 31), INT16_C( 31), INT16_C( 24), INT16_C( 0), INT16_C( 20)), simde_mm256_set_epi16(INT16_C( 4), INT16_C( 0), INT16_C( 5), INT16_C( 7), INT16_C( 7), INT16_C( 5), INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( 4), INT16_C( 5), INT16_C( 7), INT16_C( 7), INT16_C( 6), INT16_C( 0), INT16_C( 5)), simde_mm256_set_epi16(INT16_C( 1), INT16_C( 0), INT16_C( 1), INT16_C( 1), INT16_C( 1), INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 1), INT16_C( 1), INT16_C( 1), INT16_C( 1), INT16_C( 0), INT16_C( 1)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm256_set_epi16(INT16_C( 7674), INT16_C( 18879), INT16_C( 27446), INT16_C(-29225), INT16_C( -2725), INT16_C( 23364), INT16_C( 12045), INT16_C(-28927), INT16_C(-14599), INT16_C(-16964), INT16_C( 660), INT16_C( 23234), INT16_C(-21987), INT16_C(-30631), INT16_C( 26152), INT16_C(-28363)), simde_mm256_set_epi16(INT16_C( 3837), INT16_C( 9439), INT16_C( 13723), INT16_C( 18155), INT16_C( 31405), INT16_C( 11682), INT16_C( 6022), INT16_C( 18304), INT16_C( 25468), INT16_C( 24286), INT16_C( 330), INT16_C( 11617), INT16_C( 21774), INT16_C( 17452), INT16_C( 13076), INT16_C( 18586)), simde_mm256_set_epi16(INT16_C( 959), INT16_C( 2359), INT16_C( 3430), INT16_C( 4538), INT16_C( 7851), INT16_C( 2920), INT16_C( 1505), INT16_C( 4576), INT16_C( 6367), INT16_C( 6071), INT16_C( 82), INT16_C( 2904), INT16_C( 5443), INT16_C( 4363), INT16_C( 3269), INT16_C( 4646)), simde_mm256_set_epi16(INT16_C( 239), INT16_C( 589), INT16_C( 857), INT16_C( 1134), INT16_C( 1962), INT16_C( 730), INT16_C( 376), INT16_C( 1144), INT16_C( 1591), INT16_C( 1517), INT16_C( 20), INT16_C( 726), INT16_C( 1360), INT16_C( 1090), INT16_C( 817), INT16_C( 1161)), simde_mm256_set_epi16(INT16_C( 3), INT16_C( 9), INT16_C( 13), INT16_C( 17), INT16_C( 30), INT16_C( 11), INT16_C( 5), INT16_C( 17), INT16_C( 24), INT16_C( 23), INT16_C( 0), INT16_C( 11), INT16_C( 21), INT16_C( 17), INT16_C( 12), INT16_C( 18)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 2), INT16_C( 3), INT16_C( 4), INT16_C( 7), INT16_C( 2), INT16_C( 1), INT16_C( 4), INT16_C( 6), INT16_C( 5), INT16_C( 0), INT16_C( 2), INT16_C( 5), INT16_C( 4), INT16_C( 3), INT16_C( 4)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 1), INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 1), INT16_C( 0), INT16_C( 1)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm256_set_epi16(INT16_C(-13197), INT16_C( 24881), INT16_C(-10578), INT16_C(-21298), INT16_C( 16303), INT16_C( -8332), INT16_C( 25558), INT16_C( 12717), INT16_C( 18247), INT16_C(-30759), INT16_C( 9647), INT16_C( 18112), INT16_C( -4632), INT16_C( 7524), INT16_C(-32339), INT16_C( 28325)), simde_mm256_set_epi16(INT16_C( 26169), INT16_C( 12440), INT16_C( 27479), INT16_C( 22119), INT16_C( 8151), INT16_C( 28602), INT16_C( 12779), INT16_C( 6358), INT16_C( 9123), INT16_C( 17388), INT16_C( 4823), INT16_C( 9056), INT16_C( 30452), INT16_C( 3762), INT16_C( 16598), INT16_C( 14162)), simde_mm256_set_epi16(INT16_C( 6542), INT16_C( 3110), INT16_C( 6869), INT16_C( 5529), INT16_C( 2037), INT16_C( 7150), INT16_C( 3194), INT16_C( 1589), INT16_C( 2280), INT16_C( 4347), INT16_C( 1205), INT16_C( 2264), INT16_C( 7613), INT16_C( 940), INT16_C( 4149), INT16_C( 3540)), simde_mm256_set_epi16(INT16_C( 1635), INT16_C( 777), INT16_C( 1717), INT16_C( 1382), INT16_C( 509), INT16_C( 1787), INT16_C( 798), INT16_C( 397), INT16_C( 570), INT16_C( 1086), INT16_C( 301), INT16_C( 566), INT16_C( 1903), INT16_C( 235), INT16_C( 1037), INT16_C( 885)), simde_mm256_set_epi16(INT16_C( 25), INT16_C( 12), INT16_C( 26), INT16_C( 21), INT16_C( 7), INT16_C( 27), INT16_C( 12), INT16_C( 6), INT16_C( 8), INT16_C( 16), INT16_C( 4), INT16_C( 8), INT16_C( 29), INT16_C( 3), INT16_C( 16), INT16_C( 13)), simde_mm256_set_epi16(INT16_C( 6), INT16_C( 3), INT16_C( 6), INT16_C( 5), INT16_C( 1), INT16_C( 6), INT16_C( 3), INT16_C( 1), INT16_C( 2), INT16_C( 4), INT16_C( 1), INT16_C( 2), INT16_C( 7), INT16_C( 0), INT16_C( 4), INT16_C( 3)), simde_mm256_set_epi16(INT16_C( 1), INT16_C( 0), INT16_C( 1), INT16_C( 1), INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( 1), INT16_C( 0)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r; r = simde_mm256_srli_epi16(test_vec[i].a, 0); simde_assert_m256i_i16(r, ==, test_vec[i].a); r = simde_mm256_srli_epi16(test_vec[i].a, 1); simde_assert_m256i_i16(r, ==, test_vec[i].r1); r = simde_mm256_srli_epi16(test_vec[i].a, 3); simde_assert_m256i_i16(r, ==, test_vec[i].r3); r = simde_mm256_srli_epi16(test_vec[i].a, 5); simde_assert_m256i_i16(r, ==, test_vec[i].r5); r = simde_mm256_srli_epi16(test_vec[i].a, 11); simde_assert_m256i_i16(r, ==, test_vec[i].r11); r = simde_mm256_srli_epi16(test_vec[i].a, 13); simde_assert_m256i_i16(r, ==, test_vec[i].r13); r = simde_mm256_srli_epi16(test_vec[i].a, 15); simde_assert_m256i_i16(r, ==, test_vec[i].r15); r = simde_mm256_srli_epi16(test_vec[i].a, 16); simde_assert_m256i_i16(r, ==, test_vec[i].r16); r = simde_mm256_srli_epi16(test_vec[i].a, 24); simde_assert_m256i_i16(r, ==, test_vec[i].r24); } return 0; } static int test_simde_mm256_srli_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C( 732419944), INT32_C( -77147012), INT32_C( 489295522), INT32_C( -707244875), INT32_C( 1759674836), INT32_C( 934163130), INT32_C( 1804082267), INT32_C(-1601331496)), simde_mm256_set_epi32(INT32_C( 849157488), INT32_C( 1818223314), INT32_C( -155475303), INT32_C(-1398665928), INT32_C( 1594244161), INT32_C( -844009252), INT32_C( 1806417213), INT32_C(-2079022073)), simde_mm256_set_epi32(INT32_C( 5722030), INT32_C( 32951720), INT32_C( 3822621), INT32_C( 28029081), INT32_C( 13747459), INT32_C( 7298149), INT32_C( 14094392), INT32_C( 21044029)) }, { simde_mm256_set_epi32(INT32_C(-1876248297), INT32_C(-2075193364), INT32_C( 172214735), INT32_C( 1442116689), INT32_C(-1288626413), INT32_C( 967081158), INT32_C(-1720117739), INT32_C( 578489253)), simde_mm256_set_epi32(INT32_C( 428131799), INT32_C( -748788150), INT32_C(-1439171179), INT32_C(-1390423819), INT32_C(-1892283443), INT32_C( 1334337044), INT32_C(-1388554016), INT32_C( -267755914)), simde_mm256_set_epi32(INT32_C( 18896242), INT32_C( 17341983), INT32_C( 1345427), INT32_C( 11266536), INT32_C( 23487038), INT32_C( 7555321), INT32_C( 20116012), INT32_C( 4519447)) }, { simde_mm256_set_epi32(INT32_C( 2139550852), INT32_C( 1705221620), INT32_C( -794518741), INT32_C( 1217473420), INT32_C( -178348948), INT32_C( -348930603), INT32_C( 1243233554), INT32_C( 409162556)), simde_mm256_set_epi32(INT32_C(-1403015061), INT32_C( 1726506153), INT32_C(-1594809223), INT32_C( -409985534), INT32_C( 1511772803), INT32_C( 1304778026), INT32_C( 245063900), INT32_C(-1297537554)), simde_mm256_set_epi32(INT32_C( 16715241), INT32_C( 13322043), INT32_C( 27347254), INT32_C( 9511511), INT32_C( 32161080), INT32_C( 30828411), INT32_C( 9712762), INT32_C( 3196582)) }, { simde_mm256_set_epi32(INT32_C( -266647535), INT32_C(-1026471549), INT32_C( 2142406190), INT32_C( 988722559), INT32_C( -185570336), INT32_C( 377523091), INT32_C( 41928420), INT32_C( 778993888)), simde_mm256_set_epi32(INT32_C( -589207043), INT32_C( 1553242780), INT32_C(-1285792025), INT32_C( -729017109), INT32_C( 1574494207), INT32_C( 934652742), INT32_C( 140826824), INT32_C(-1967394389)), simde_mm256_set_epi32(INT32_C( 31471248), INT32_C( 25535123), INT32_C( 16737548), INT32_C( 7724394), INT32_C( 32104663), INT32_C( 2949399), INT32_C( 327565), INT32_C( 6085889)) }, { simde_mm256_set_epi32(INT32_C(-1791792894), INT32_C( 1191804972), INT32_C( -126480817), INT32_C( 1125707739), INT32_C( -881879475), INT32_C( 143668756), INT32_C( -526899451), INT32_C( 1691821556)), simde_mm256_set_epi32(INT32_C( 1203771433), INT32_C(-1195415594), INT32_C( 39184838), INT32_C( 1288741747), INT32_C(-1347767789), INT32_C( 1099277192), INT32_C( 1193566195), INT32_C( -378044470)), simde_mm256_set_epi32(INT32_C( 19556050), INT32_C( 9310976), INT32_C( 32566300), INT32_C( 8794591), INT32_C( 26664748), INT32_C( 1122412), INT32_C( 29438030), INT32_C( 13217355)) }, { simde_mm256_set_epi32(INT32_C(-2125943644), INT32_C(-1910893035), INT32_C( 262981568), INT32_C( 1406838420), INT32_C( 1188023687), INT32_C( -157711585), INT32_C(-1355034011), INT32_C( 1720639911)), simde_mm256_set_epi32(INT32_C(-2060210004), INT32_C( 1988675682), INT32_C(-1866317501), INT32_C(-1240234337), INT32_C(-1832718526), INT32_C(-1229749178), INT32_C(-1445633372), INT32_C(-1964281870)), simde_mm256_set_epi32(INT32_C( 16945497), INT32_C( 18625580), INT32_C( 2054543), INT32_C( 10990925), INT32_C( 9281435), INT32_C( 32322310), INT32_C( 22968228), INT32_C( 13442499)) }, { simde_mm256_set_epi32(INT32_C(-2026359983), INT32_C( 713896046), INT32_C( 968519053), INT32_C( 1906109584), INT32_C( -45372712), INT32_C( 1799473244), INT32_C( -655893602), INT32_C( 1170033241)), simde_mm256_set_epi32(INT32_C( -638177916), INT32_C( 1362453557), INT32_C( -397570420), INT32_C(-1865678794), INT32_C( -397148457), INT32_C( -942451042), INT32_C( 944509801), INT32_C( -43249903)), simde_mm256_set_epi32(INT32_C( 17723494), INT32_C( 5577312), INT32_C( 7566555), INT32_C( 14891481), INT32_C( 33199957), INT32_C( 14058384), INT32_C( 28430263), INT32_C( 9140884)) }, { simde_mm256_set_epi32(INT32_C(-1081998790), INT32_C( 485579923), INT32_C(-1608658429), INT32_C(-1749625928), INT32_C( -779220060), INT32_C( 1164169772), INT32_C( 1589226288), INT32_C(-1216437245)), simde_mm256_set_epi32(INT32_C( 320527694), INT32_C( 1480004810), INT32_C( 343325687), INT32_C( 772720606), INT32_C( 1014221898), INT32_C(-1455936050), INT32_C(-1191095377), INT32_C( 1501993218)), simde_mm256_set_epi32(INT32_C( 25101316), INT32_C( 3793593), INT32_C( 20986788), INT32_C( 19885479), INT32_C( 27466775), INT32_C( 9095076), INT32_C( 12415830), INT32_C( 24051016)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_srli_epi32(test_vec[i].a, 7); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_srli_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C(13444540030250453406)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 3078071440592676417)), HEDLEY_STATIC_CAST(int64_t, UINT64_C(12679412335333608791)), HEDLEY_STATIC_CAST(int64_t, UINT64_C(11535715936901372554))), simde_mm256_set_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 1641179202911432)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 375741142650473)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 1547779826090528)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 1408168449328780))) }, { simde_mm256_set_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 225928524470693751)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 2474929979316735680)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 9996007191945919101)), HEDLEY_STATIC_CAST(int64_t, UINT64_C(17512962486882118479))), simde_mm256_set_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 27579165584801)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 302115475990812)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 1220215721673085)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 2137812803574477))) }, { simde_mm256_set_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 982097740378530081)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 970769818047416681)), HEDLEY_STATIC_CAST(int64_t, UINT64_C(17255424662243862193)), HEDLEY_STATIC_CAST(int64_t, UINT64_C(11037271049098666112))), simde_mm256_set_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 119884978073551)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 118502175054616)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 2106375080840315)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 1347323126110677))) }, { simde_mm256_set_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C(11593238181509994155)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 7911460325067993277)), HEDLEY_STATIC_CAST(int64_t, UINT64_C(14357185542824356538)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 8595783393340762736))), simde_mm256_set_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 1415190207703856)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 965754434212401)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 1752586125833051)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 1049289965007417))) }, { simde_mm256_set_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 1166612957385363389)), HEDLEY_STATIC_CAST(int64_t, UINT64_C(16938733438792705089)), HEDLEY_STATIC_CAST(int64_t, UINT64_C(15575845187785487375)), HEDLEY_STATIC_CAST(int64_t, UINT64_C(10601739794903951841))), simde_mm256_set_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 142408808274580)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 2067716484227625)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 1901348289524595)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 1294157689807611))) }, { simde_mm256_set_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C(15633886144376419396)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 8542868240583792995)), HEDLEY_STATIC_CAST(int64_t, UINT64_C(15596557227725823268)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 8048867687018942442))), simde_mm256_set_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 1908433367233449)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 1042830595774388)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 1903876614712624)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 982527793825554))) }, { simde_mm256_set_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C(15325764881028507458)), HEDLEY_STATIC_CAST(int64_t, UINT64_C(15687264368536599834)), HEDLEY_STATIC_CAST(int64_t, UINT64_C(15848782944379583666)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 6713584830763153714))), simde_mm256_set_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 1870820908328675)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 1914949263737377)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 1934665886765085)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 819529398286517))) }, { simde_mm256_set_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 4987716052637776361)), HEDLEY_STATIC_CAST(int64_t, UINT64_C(12811672244172189341)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 5840602719775879932)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 2103118507556192853))), simde_mm256_set_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 608852057206759)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 1563924834493675)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 712964199191391)), HEDLEY_STATIC_CAST(int64_t, UINT64_C( 256728333441918))) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_srli_epi64(test_vec[i].a, 13); simde_assert_m256i_u64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_srli_si256(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi8(INT8_C( 96), INT8_C( 17), INT8_C( 11), INT8_C( 103), INT8_C( 75), INT8_C( 47), INT8_C( -18), INT8_C( 1), INT8_C( 93), INT8_C( -43), INT8_C( -55), INT8_C( 100), INT8_C( -48), INT8_C( 21), INT8_C( -29), INT8_C( 10), INT8_C( 66), INT8_C(-116), INT8_C( 19), INT8_C( -92), INT8_C( -95), INT8_C( 82), INT8_C( -36), INT8_C( -42), INT8_C( 126), INT8_C( 88), INT8_C( -28), INT8_C( -8), INT8_C( -38), INT8_C(-106), INT8_C( 21), INT8_C( 13)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 96), INT8_C( 17), INT8_C( 11), INT8_C( 103), INT8_C( 75), INT8_C( 47), INT8_C( -18), INT8_C( 1), INT8_C( 93), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 66), INT8_C(-116), INT8_C( 19), INT8_C( -92), INT8_C( -95), INT8_C( 82), INT8_C( -36), INT8_C( -42), INT8_C( 126)) }, { simde_mm256_set_epi8(INT8_C(-108), INT8_C( -86), INT8_C( -14), INT8_C( -95), INT8_C( 109), INT8_C( 36), INT8_C( 47), INT8_C(-100), INT8_C( 19), INT8_C( 29), INT8_C( -48), INT8_C(-125), INT8_C( -86), INT8_C( 59), INT8_C(-103), INT8_C( 59), INT8_C( 123), INT8_C( -73), INT8_C(-107), INT8_C( -42), INT8_C( 68), INT8_C( 37), INT8_C( 115), INT8_C( 51), INT8_C( -48), INT8_C( 56), INT8_C( -7), INT8_C( -79), INT8_C( -38), INT8_C(-103), INT8_C( 68), INT8_C( 60)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-108), INT8_C( -86), INT8_C( -14), INT8_C( -95), INT8_C( 109), INT8_C( 36), INT8_C( 47), INT8_C(-100), INT8_C( 19), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 123), INT8_C( -73), INT8_C(-107), INT8_C( -42), INT8_C( 68), INT8_C( 37), INT8_C( 115), INT8_C( 51), INT8_C( -48)) }, { simde_mm256_set_epi8(INT8_C( 76), INT8_C( 16), INT8_C( -72), INT8_C( 118), INT8_C( 21), INT8_C( -73), INT8_C( -17), INT8_C( -69), INT8_C( 119), INT8_C( 79), INT8_C( 76), INT8_C( 29), INT8_C( 50), INT8_C( -64), INT8_C( -33), INT8_C( 123), INT8_C( -96), INT8_C( 33), INT8_C( 83), INT8_C( -25), INT8_C( 49), INT8_C( 0), INT8_C( -75), INT8_C( 118), INT8_C( -51), INT8_C( -18), INT8_C( -17), INT8_C(-114), INT8_C( 65), INT8_C( 26), INT8_C( -78), INT8_C( 60)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 76), INT8_C( 16), INT8_C( -72), INT8_C( 118), INT8_C( 21), INT8_C( -73), INT8_C( -17), INT8_C( -69), INT8_C( 119), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -96), INT8_C( 33), INT8_C( 83), INT8_C( -25), INT8_C( 49), INT8_C( 0), INT8_C( -75), INT8_C( 118), INT8_C( -51)) }, { simde_mm256_set_epi8(INT8_C( -24), INT8_C( -43), INT8_C( -68), INT8_C( 95), INT8_C( -74), INT8_C( 73), INT8_C( 100), INT8_C( 109), INT8_C( 96), INT8_C( -49), INT8_C( -75), INT8_C( 117), INT8_C( 13), INT8_C( -72), INT8_C( -68), INT8_C( 14), INT8_C( -76), INT8_C( 76), INT8_C( 21), INT8_C( -85), INT8_C( -57), INT8_C(-103), INT8_C( 22), INT8_C( -25), INT8_C( -64), INT8_C( 0), INT8_C( 92), INT8_C( -92), INT8_C(-101), INT8_C( -65), INT8_C( -20), INT8_C( 63)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -24), INT8_C( -43), INT8_C( -68), INT8_C( 95), INT8_C( -74), INT8_C( 73), INT8_C( 100), INT8_C( 109), INT8_C( 96), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -76), INT8_C( 76), INT8_C( 21), INT8_C( -85), INT8_C( -57), INT8_C(-103), INT8_C( 22), INT8_C( -25), INT8_C( -64)) }, { simde_mm256_set_epi8(INT8_C( -68), INT8_C( -24), INT8_C( 84), INT8_C(-116), INT8_C( -43), INT8_C( 79), INT8_C( -39), INT8_C( -68), INT8_C( 124), INT8_C( -14), INT8_C( 98), INT8_C(-100), INT8_C( 125), INT8_C( 110), INT8_C( 79), INT8_C( -13), INT8_C( -72), INT8_C(-119), INT8_C( -50), INT8_C( -7), INT8_C( 127), INT8_C( 126), INT8_C( -62), INT8_C(-124), INT8_C( -36), INT8_C( -38), INT8_C( -1), INT8_C( 11), INT8_C( -6), INT8_C( 75), INT8_C( 3), INT8_C( 46)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -68), INT8_C( -24), INT8_C( 84), INT8_C(-116), INT8_C( -43), INT8_C( 79), INT8_C( -39), INT8_C( -68), INT8_C( 124), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -72), INT8_C(-119), INT8_C( -50), INT8_C( -7), INT8_C( 127), INT8_C( 126), INT8_C( -62), INT8_C(-124), INT8_C( -36)) }, { simde_mm256_set_epi8(INT8_C(-121), INT8_C( -60), INT8_C( 94), INT8_C( 113), INT8_C( -23), INT8_C( 116), INT8_C( 51), INT8_C( 98), INT8_C( 52), INT8_C( -39), INT8_C( -43), INT8_C( -8), INT8_C( 98), INT8_C( 57), INT8_C( -33), INT8_C( -19), INT8_C( 7), INT8_C( -17), INT8_C( 13), INT8_C( 106), INT8_C(-115), INT8_C(-117), INT8_C(-118), INT8_C( -11), INT8_C( -1), INT8_C( 104), INT8_C( -53), INT8_C( 93), INT8_C( -53), INT8_C( 34), INT8_C( -17), INT8_C( -60)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-121), INT8_C( -60), INT8_C( 94), INT8_C( 113), INT8_C( -23), INT8_C( 116), INT8_C( 51), INT8_C( 98), INT8_C( 52), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 7), INT8_C( -17), INT8_C( 13), INT8_C( 106), INT8_C(-115), INT8_C(-117), INT8_C(-118), INT8_C( -11), INT8_C( -1)) }, { simde_mm256_set_epi8(INT8_C( -26), INT8_C( -15), INT8_C( -63), INT8_C( 20), INT8_C( -47), INT8_C( -75), INT8_C( -93), INT8_C( -10), INT8_C( -70), INT8_C( -68), INT8_C( 51), INT8_C( -33), INT8_C( -77), INT8_C( -93), INT8_C(-109), INT8_C( -87), INT8_C(-100), INT8_C( -94), INT8_C( 37), INT8_C( -30), INT8_C( -77), INT8_C( -68), INT8_C( -55), INT8_C(-112), INT8_C( -35), INT8_C( 48), INT8_C( 19), INT8_C( 104), INT8_C( 67), INT8_C( 105), INT8_C(-110), INT8_C( 111)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -26), INT8_C( -15), INT8_C( -63), INT8_C( 20), INT8_C( -47), INT8_C( -75), INT8_C( -93), INT8_C( -10), INT8_C( -70), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-100), INT8_C( -94), INT8_C( 37), INT8_C( -30), INT8_C( -77), INT8_C( -68), INT8_C( -55), INT8_C(-112), INT8_C( -35)) }, { simde_mm256_set_epi8(INT8_C( 45), INT8_C( -53), INT8_C(-119), INT8_C( 36), INT8_C( -11), INT8_C( -24), INT8_C( 109), INT8_C( -22), INT8_C( -79), INT8_C( -98), INT8_C( -10), INT8_C( 35), INT8_C( 46), INT8_C(-121), INT8_C( 7), INT8_C( 38), INT8_C( 124), INT8_C( -47), INT8_C( -7), INT8_C( 41), INT8_C( 33), INT8_C( 95), INT8_C( 12), INT8_C( 3), INT8_C( 105), INT8_C( 16), INT8_C( 33), INT8_C(-106), INT8_C( 94), INT8_C( 28), INT8_C( 14), INT8_C( 63)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 45), INT8_C( -53), INT8_C(-119), INT8_C( 36), INT8_C( -11), INT8_C( -24), INT8_C( 109), INT8_C( -22), INT8_C( -79), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 124), INT8_C( -47), INT8_C( -7), INT8_C( 41), INT8_C( 33), INT8_C( 95), INT8_C( 12), INT8_C( 3), INT8_C( 105)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_srli_si256(test_vec[i].a, 7); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_srlv_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 750889845), INT32_C( 1061237522), INT32_C( -532194576), INT32_C( 369438034)), simde_mm_set_epi32(INT32_C( 24), INT32_C( 8), INT32_C( 6), INT32_C( 22)), simde_mm_set_epi32(INT32_C( 44), INT32_C( 4145459), INT32_C( 58793323), INT32_C( 88)) }, { simde_mm_set_epi32(INT32_C( 258457925), INT32_C( -26515586), INT32_C( 413932222), INT32_C( 1067196057)), simde_mm_set_epi32(INT32_C( 31), INT32_C( 28), INT32_C( 8), INT32_C(-1408300382)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 15), INT32_C( 1616922), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C(-1382618640), INT32_C(-1421089532), INT32_C( 108769767), INT32_C( -467052333)), simde_mm_set_epi32(INT32_C( 21), INT32_C( 14), INT32_C( 16), INT32_C( 14)), simde_mm_set_epi32(INT32_C( 1388), INT32_C( 175407), INT32_C( 1659), INT32_C( 233637)) }, { simde_mm_set_epi32(INT32_C( -987735049), INT32_C( 1751785542), INT32_C( -871968813), INT32_C( -682701112)), simde_mm_set_epi32(INT32_C( 1275084561), INT32_C( 344565288), INT32_C( 29), INT32_C( 13)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 6), INT32_C( 440950)) }, { simde_mm_set_epi32(INT32_C( -172652498), INT32_C(-1982996024), INT32_C( -540050425), INT32_C( -840529876)), simde_mm_set_epi32(INT32_C( 23), INT32_C( 23), INT32_C( 26), INT32_C( 22)), simde_mm_set_epi32(INT32_C( 491), INT32_C( 275), INT32_C( 55), INT32_C( 823)) }, { simde_mm_set_epi32(INT32_C( 1748060455), INT32_C( 1899512955), INT32_C( 350293706), INT32_C(-1638949235)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 10), INT32_C( 17), INT32_C( 6)), simde_mm_set_epi32(INT32_C( 1748060455), INT32_C( 1854993), INT32_C( 2672), INT32_C( 41500282)) }, { simde_mm_set_epi32(INT32_C(-1740858648), INT32_C( 1128670113), INT32_C(-1204328910), INT32_C( -772579618)), simde_mm_set_epi32(INT32_C( 9), INT32_C( 28), INT32_C( 11), INT32_C( 14)), simde_mm_set_epi32(INT32_C( 4988493), INT32_C( 4), INT32_C( 1509100), INT32_C( 214989)) }, { simde_mm_set_epi32(INT32_C( -969095340), INT32_C( 1833136862), INT32_C(-2029269927), INT32_C( 238413160)), simde_mm_set_epi32(INT32_C( 3), INT32_C( 20), INT32_C( 27), INT32_C(-1565782040)), simde_mm_set_epi32(INT32_C( 415733994), INT32_C( 1748), INT32_C( 16), INT32_C( 0)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_srlv_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_srlv_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C( 861771480), INT32_C( 1241239464), INT32_C( 563960678), INT32_C(-1557457802), INT32_C( 750889845), INT32_C( 1061237522), INT32_C( -532194576), INT32_C( 369438034)), simde_mm256_set_epi32(INT32_C( 31), INT32_C( 28), INT32_C( 8), INT32_C(-1408300382), INT32_C( 5), INT32_C( 30), INT32_C( 30), INT32_C( 25)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 4), INT32_C( 2202971), INT32_C( 0), INT32_C( 23465307), INT32_C( 0), INT32_C( 3), INT32_C( 11)) }, { simde_mm256_set_epi32(INT32_C( 1385973461), INT32_C( 1951417326), INT32_C( 1227312976), INT32_C( 1815104686), INT32_C(-1382618640), INT32_C(-1421089532), INT32_C( 108769767), INT32_C( -467052333)), simde_mm256_set_epi32(INT32_C( 1275084561), INT32_C( 344565288), INT32_C( 29), INT32_C( 13), INT32_C( 23), INT32_C( 6), INT32_C( 19), INT32_C( 8)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 2), INT32_C( 221570), INT32_C( 347), INT32_C( 44904340), INT32_C( 207), INT32_C( 14952792)) }, { simde_mm256_set_epi32(INT32_C( 585901719), INT32_C( 528891191), INT32_C( 1862281050), INT32_C( 2077782134), INT32_C( -172652498), INT32_C(-1982996024), INT32_C( -540050425), INT32_C( -840529876)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 10), INT32_C( 17), INT32_C( 6), INT32_C( 7), INT32_C( 27), INT32_C( 10), INT32_C( 13)), simde_mm256_set_epi32(INT32_C( 585901719), INT32_C( 516495), INT32_C( 14208), INT32_C( 32465345), INT32_C( 32205584), INT32_C( 17), INT32_C( 3666911), INT32_C( 421684)) }, { simde_mm256_set_epi32(INT32_C( -594595895), INT32_C( 1620324892), INT32_C( 211976587), INT32_C( 477414606), INT32_C(-1740858648), INT32_C( 1128670113), INT32_C(-1204328910), INT32_C( -772579618)), simde_mm256_set_epi32(INT32_C( 3), INT32_C( 20), INT32_C( 27), INT32_C(-1565782040), INT32_C( 20), INT32_C( 30), INT32_C( 25), INT32_C( 8)), simde_mm256_set_epi32(INT32_C( 462546425), INT32_C( 1545), INT32_C( 1), INT32_C( 0), INT32_C( 2435), INT32_C( 1), INT32_C( 92), INT32_C( 13759326)) }, { simde_mm256_set_epi32(INT32_C( -889280255), INT32_C( 271958618), INT32_C( -251228133), INT32_C( 720482352), INT32_C(-1904765782), INT32_C( 203744379), INT32_C( 1575427578), INT32_C( -282602589)), simde_mm256_set_epi32(INT32_C( 23), INT32_C( 13), INT32_C( 1563325862), INT32_C( 29), INT32_C( 19), INT32_C( 7), INT32_C( 24), INT32_C( 3)), simde_mm256_set_epi32(INT32_C( 405), INT32_C( 33198), INT32_C( 0), INT32_C( 1), INT32_C( 4558), INT32_C( 1591752), INT32_C( 93), INT32_C( 501545588)) }, { simde_mm256_set_epi32(INT32_C( 1315029482), INT32_C(-1940519792), INT32_C( 49328210), INT32_C( -718320499), INT32_C(-2013049672), INT32_C( -426567571), INT32_C( 1959340047), INT32_C( 359056326)), simde_mm256_set_epi32(INT32_C( -523938288), INT32_C( 12), INT32_C( 7), INT32_C( 25), INT32_C(-1642052233), INT32_C( 14), INT32_C( 21), INT32_C( 28)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 574816), INT32_C( 385376), INT32_C( 106), INT32_C( 0), INT32_C( 236108), INT32_C( 934), INT32_C( 1)) }, { simde_mm256_set_epi32(INT32_C(-1635822813), INT32_C( 796954372), INT32_C(-1179699383), INT32_C(-1410308028), INT32_C(-1961762130), INT32_C( 185876307), INT32_C(-1502255170), INT32_C( -357150625)), simde_mm256_set_epi32(INT32_C( 2), INT32_C(-2068185893), INT32_C( 15), INT32_C( 10), INT32_C( 21), INT32_C( 20), INT32_C( -337949468), INT32_C( 29)), simde_mm256_set_epi32(INT32_C( 664786120), INT32_C( 0), INT32_C( 95070), INT32_C( 2817050), INT32_C( 1112), INT32_C( 177), INT32_C( 0), INT32_C( 7)) }, { simde_mm256_set_epi32(INT32_C( 1698380028), INT32_C( -28278292), INT32_C( 1781848426), INT32_C( -350034661), INT32_C(-1536163099), INT32_C( -584250792), INT32_C(-1013422776), INT32_C( 880343098)), simde_mm256_set_epi32(INT32_C( 23), INT32_C( 170594558), INT32_C( -142350595), INT32_C( 17), INT32_C( 24), INT32_C( 3), INT32_C( 16), INT32_C( 14)), simde_mm256_set_epi32(INT32_C( 202), INT32_C( 0), INT32_C( 0), INT32_C( 30097), INT32_C( 164), INT32_C( 463839563), INT32_C( 50072), INT32_C( 53731)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_srlv_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_srlv_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi64x(INT64_C(3225047328234746642), INT64_C(-2285758298659148462)), simde_mm_set_epi64x(INT64_C( 40), INT64_C( 54)), simde_mm_set_epi64x(INT64_C( 2933163), INT64_C( 897)) }, { simde_mm_set_epi64x(INT64_C(1110068339535472510), INT64_C(1777825357317807769)), simde_mm_set_epi64x(INT64_C( 28), INT64_C( 34)), simde_mm_set_epi64x(INT64_C( 4135326815), INT64_C( 103483055)) }, { simde_mm_set_epi64x(INT64_C(-5938301838766119676), INT64_C( 467162595886454995)), simde_mm_set_epi64x(INT64_C( 46), INT64_C(5271269095691537582)), simde_mm_set_epi64x(INT64_C( 177755), INT64_C( 0)) }, { simde_mm_set_epi64x(INT64_C(-4242289730816171962), INT64_C(-3745077531354673464)), simde_mm_set_epi64x(INT64_C( 40), INT64_C( 45)), simde_mm_set_epi64x(INT64_C( 12918875), INT64_C( 417846)) }, { simde_mm_set_epi64x(INT64_C(-741536830170734136), INT64_C(-2319498910111463380)), simde_mm_set_epi64x(INT64_C( 55), INT64_C( 54)), simde_mm_set_epi64x(INT64_C( 491), INT64_C( 895)) }, { simde_mm_set_epi64x(INT64_C(7507862487555392635), INT64_C(1504500013920657037)), simde_mm_set_epi64x(INT64_C( 10), INT64_C( 38)), simde_mm_set_epi64x(INT64_C( 7331896960503313), INT64_C( 5473339)) }, { simde_mm_set_epi64x(INT64_C(-7476930958990105695), INT64_C(-5172553278554939682)), simde_mm_set_epi64x(INT64_C( 28), INT64_C( 14)), simde_mm_set_epi64x(INT64_C( 40865738372), INT64_C( 810192309274573)) }, { simde_mm_set_epi64x(INT64_C(-4162232790172863778), INT64_C(-8715647970982894232)), simde_mm_set_epi64x(INT64_C(-7347100370153245868), INT64_C(2854270594309689320)), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_srlv_epi64(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_srlv_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi64x(INT64_C( 3701280324466757544), INT64_C( 2422192670977496182), INT64_C( 3225047328234746642), INT64_C(-2285758298659148462)), simde_mm256_set_epi64x(INT64_C( 28), INT64_C( 34), INT64_C( 62), INT64_C( 25)), simde_mm256_set_epi64x(INT64_C( 13788343684), INT64_C( 140990169), INT64_C( 0), INT64_C( 481634908171)) }, { simde_mm256_set_epi64x(INT64_C( 5952710690070348782), INT64_C( 5271269095691537582), INT64_C(-5938301838766119676), INT64_C( 467162595886454995)), simde_mm256_set_epi64x(INT64_C( 40), INT64_C( 45), INT64_C( 6), INT64_C(-3745077531354673464)), simde_mm256_set_epi64x(INT64_C( 5413958), INT64_C( 149818), INT64_C( 195444409920991124), INT64_C( 0)) }, { simde_mm256_set_epi64x(INT64_C( 2516428722304073015), INT64_C( 7998436207788322934), INT64_C( -741536830170734136), INT64_C(-2319498910111463380)), simde_mm256_set_epi64x(INT64_C( 10), INT64_C( 38), INT64_C( 59), INT64_C( 13)), simde_mm256_set_epi64x(INT64_C( 2457449924125071), INT64_C( 29098141), INT64_C( 30), INT64_C( 1968657856884532)) }, { simde_mm256_set_epi64x(INT64_C(-2553769921740525028), INT64_C( 910432509160113358), INT64_C(-7476930958990105695), INT64_C(-5172553278554939682)), simde_mm256_set_epi64x(INT64_C(-7347100370153245868), INT64_C( 2854270594309689320), INT64_C( 30), INT64_C( 40)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C( 0), INT64_C( 10216434593), INT64_C( 12072806)) }, { simde_mm256_set_epi64x(INT64_C(-3819429611931581862), INT64_C(-1079016614349656016), INT64_C(-8180906740026121093), INT64_C( 6766409928738853795)), simde_mm256_set_epi64x(INT64_C( 13), INT64_C( 61), INT64_C( 39), INT64_C( 3)), simde_mm256_set_epi64x(INT64_C( 1785560847385006), INT64_C( 7), INT64_C( 18673449), INT64_C( 845801241092356724)) }, { simde_mm256_set_epi64x(INT64_C( 5648008620820268176), INT64_C( 211863052296866957), INT64_C(-8645982502595127187), INT64_C( 8415301423967159238)), simde_mm256_set_epi64x(INT64_C( 12), INT64_C( 25), INT64_C( 14), INT64_C( 28)), simde_mm256_set_epi64x(INT64_C( 1378908354692448), INT64_C( 6314010986), INT64_C( 598191013861964), INT64_C( 31349440753)) }, { simde_mm256_set_epi64x(INT64_C(-7025805483088769276), INT64_C(-5066770266211719100), INT64_C(-8425704190695424173), INT64_C(-6452136821459103649)), simde_mm256_set_epi64x(INT64_C( 27), INT64_C( 42), INT64_C( 52), INT64_C( 29)), simde_mm256_set_epi64x(INT64_C( 85092623461), INT64_C( 3042253), INT64_C( 2225), INT64_C( 22341697015)) }, { simde_mm256_set_epi64x(INT64_C( 7294486680706253292), INT64_C( 7652980720044008731), INT64_C(-6597770267816293800), INT64_C(-4352617679061190598)), simde_mm256_set_epi64x(INT64_C( 62), INT64_C( 17), INT64_C( 35), INT64_C(-3878551521611571282)), simde_mm256_set_epi64x(INT64_C( 1), INT64_C( 58387609253265), INT64_C( 344850524), INT64_C( 0)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_srlv_epi64(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_stream_load_si256 (SIMDE_MUNIT_TEST_ARGS) { /* XL C/C++ doesn't seem to honor the alignment requirements if this * is static. I'm actually not sure if it honors them if it's not * static or just happens to generate properly aligned data… */ const struct { SIMDE_ALIGN_LIKE_32(simde__m256i) const int32_t a[8]; const int32_t r[8]; } test_vec[] = { { { -INT32_C( 318278464), -INT32_C( 1120465675), INT32_C( 457266700), INT32_C( 2139701695), INT32_C( 1033159662), -INT32_C( 359675734), INT32_C( 1714257348), INT32_C( 1256709514) }, { -INT32_C( 318278464), -INT32_C( 1120465675), INT32_C( 457266700), INT32_C( 2139701695), INT32_C( 1033159662), -INT32_C( 359675734), INT32_C( 1714257348), INT32_C( 1256709514) } }, { { INT32_C( 1178070609), INT32_C( 117731066), -INT32_C( 2094905916), INT32_C( 1963174791), INT32_C( 431200111), INT32_C( 587416159), INT32_C( 1233793215), INT32_C( 1586786573) }, { INT32_C( 1178070609), INT32_C( 117731066), -INT32_C( 2094905916), INT32_C( 1963174791), INT32_C( 431200111), INT32_C( 587416159), INT32_C( 1233793215), INT32_C( 1586786573) } }, { { INT32_C( 1504037727), -INT32_C( 27219654), INT32_C( 1971487470), -INT32_C( 1645509330), INT32_C( 2092342812), -INT32_C( 1616921888), -INT32_C( 118937110), -INT32_C( 111772262) }, { INT32_C( 1504037727), -INT32_C( 27219654), INT32_C( 1971487470), -INT32_C( 1645509330), INT32_C( 2092342812), -INT32_C( 1616921888), -INT32_C( 118937110), -INT32_C( 111772262) } }, { { -INT32_C( 2108425400), -INT32_C( 1820216412), INT32_C( 1678246710), -INT32_C( 1543375992), INT32_C( 1897969553), INT32_C( 1544667249), -INT32_C( 2074805527), -INT32_C( 1082283401) }, { -INT32_C( 2108425400), -INT32_C( 1820216412), INT32_C( 1678246710), -INT32_C( 1543375992), INT32_C( 1897969553), INT32_C( 1544667249), -INT32_C( 2074805527), -INT32_C( 1082283401) } }, { { INT32_C( 1245892774), -INT32_C( 1159871612), INT32_C( 1310648006), INT32_C( 1811029977), INT32_C( 1222382550), -INT32_C( 1130041901), INT32_C( 1581316327), INT32_C( 1209974434) }, { INT32_C( 1245892774), -INT32_C( 1159871612), INT32_C( 1310648006), INT32_C( 1811029977), INT32_C( 1222382550), -INT32_C( 1130041901), INT32_C( 1581316327), INT32_C( 1209974434) } }, { { INT32_C( 311648398), -INT32_C( 372477917), INT32_C( 808970838), -INT32_C( 526702327), INT32_C( 254310204), INT32_C( 1288490085), INT32_C( 1722485956), INT32_C( 1504692682) }, { INT32_C( 311648398), -INT32_C( 372477917), INT32_C( 808970838), -INT32_C( 526702327), INT32_C( 254310204), INT32_C( 1288490085), INT32_C( 1722485956), INT32_C( 1504692682) } }, { { INT32_C( 1282097705), INT32_C( 154482866), INT32_C( 741960738), -INT32_C( 770911083), -INT32_C( 1327418293), -INT32_C( 990008064), -INT32_C( 2077579078), -INT32_C( 1696736911) }, { INT32_C( 1282097705), INT32_C( 154482866), INT32_C( 741960738), -INT32_C( 770911083), -INT32_C( 1327418293), -INT32_C( 990008064), -INT32_C( 2077579078), -INT32_C( 1696736911) } }, { { -INT32_C( 823768805), -INT32_C( 1546183807), INT32_C( 483332231), INT32_C( 804183012), INT32_C( 266391567), INT32_C( 936631677), -INT32_C( 155386235), -INT32_C( 208627241) }, { -INT32_C( 823768805), -INT32_C( 1546183807), INT32_C( 483332231), INT32_C( 804183012), INT32_C( 266391567), INT32_C( 936631677), -INT32_C( 155386235), -INT32_C( 208627241) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_mm256_stream_load_si256(SIMDE_ALIGN_CAST(simde__m256i const*, test_vec[i].a)); simde_test_x86_assert_equal_i32x8(a, simde_x_mm256_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm256_sub_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi8(INT8_C( -37), INT8_C( -5), INT8_C( -23), INT8_C( 84), INT8_C(-108), INT8_C( -92), INT8_C( 86), INT8_C( -77), INT8_C( 77), INT8_C( -93), INT8_C( -37), INT8_C( 101), INT8_C( 79), INT8_C( -45), INT8_C( 56), INT8_C( -51), INT8_C( 103), INT8_C(-116), INT8_C( -58), INT8_C( -2), INT8_C( 39), INT8_C(-101), INT8_C(-113), INT8_C( 21), INT8_C( -77), INT8_C( 26), INT8_C( -37), INT8_C( 126), INT8_C( 102), INT8_C( 27), INT8_C( -92), INT8_C( -33)), simde_mm256_set_epi8(INT8_C(-122), INT8_C( -91), INT8_C( 24), INT8_C(-103), INT8_C( 34), INT8_C( 5), INT8_C( 109), INT8_C(-120), INT8_C( -7), INT8_C( 86), INT8_C( 3), INT8_C( 90), INT8_C( 98), INT8_C(-108), INT8_C(-128), INT8_C( 33), INT8_C( -84), INT8_C( -73), INT8_C( -39), INT8_C( 33), INT8_C( 103), INT8_C( -21), INT8_C(-117), INT8_C( -60), INT8_C( -48), INT8_C( -88), INT8_C( 95), INT8_C( 100), INT8_C( -66), INT8_C( 52), INT8_C( -40), INT8_C( 90)), simde_mm256_set_epi8(INT8_C( 85), INT8_C( 86), INT8_C( -47), INT8_C( -69), INT8_C( 114), INT8_C( -97), INT8_C( -23), INT8_C( 43), INT8_C( 84), INT8_C( 77), INT8_C( -40), INT8_C( 11), INT8_C( -19), INT8_C( 63), INT8_C( -72), INT8_C( -84), INT8_C( -69), INT8_C( -43), INT8_C( -19), INT8_C( -35), INT8_C( -64), INT8_C( -80), INT8_C( 4), INT8_C( 81), INT8_C( -29), INT8_C( 114), INT8_C( 124), INT8_C( 26), INT8_C( -88), INT8_C( -25), INT8_C( -52), INT8_C(-123)) }, { simde_mm256_set_epi8(INT8_C( -74), INT8_C( -43), INT8_C( -65), INT8_C( 71), INT8_C(-122), INT8_C(-128), INT8_C( 4), INT8_C( 54), INT8_C( 12), INT8_C( 60), INT8_C( 80), INT8_C( -43), INT8_C( -85), INT8_C( 94), INT8_C( -70), INT8_C( 67), INT8_C( 82), INT8_C( 40), INT8_C( -92), INT8_C( -55), INT8_C( 114), INT8_C( 12), INT8_C( -43), INT8_C( -93), INT8_C( -89), INT8_C( -90), INT8_C( 46), INT8_C( -34), INT8_C( -53), INT8_C( 0), INT8_C( 91), INT8_C( -43)), simde_mm256_set_epi8(INT8_C( 34), INT8_C( -42), INT8_C( 32), INT8_C( -31), INT8_C( 117), INT8_C( -44), INT8_C( 116), INT8_C( -46), INT8_C( 114), INT8_C( 78), INT8_C( -3), INT8_C( 123), INT8_C( 103), INT8_C( 4), INT8_C( -99), INT8_C(-112), INT8_C( 67), INT8_C(-113), INT8_C( -1), INT8_C(-117), INT8_C( -58), INT8_C( -63), INT8_C(-110), INT8_C( 65), INT8_C( 53), INT8_C(-113), INT8_C(-123), INT8_C( -24), INT8_C( -84), INT8_C( -96), INT8_C( 116), INT8_C( 23)), simde_mm256_set_epi8(INT8_C(-108), INT8_C( -1), INT8_C( -97), INT8_C( 102), INT8_C( 17), INT8_C( -84), INT8_C(-112), INT8_C( 100), INT8_C(-102), INT8_C( -18), INT8_C( 83), INT8_C( 90), INT8_C( 68), INT8_C( 90), INT8_C( 29), INT8_C( -77), INT8_C( 15), INT8_C(-103), INT8_C( -91), INT8_C( 62), INT8_C( -84), INT8_C( 75), INT8_C( 67), INT8_C( 98), INT8_C( 114), INT8_C( 23), INT8_C( -87), INT8_C( -10), INT8_C( 31), INT8_C( 96), INT8_C( -25), INT8_C( -66)) }, { simde_mm256_set_epi8(INT8_C( -56), INT8_C( -54), INT8_C( -84), INT8_C( -79), INT8_C( 25), INT8_C( -22), INT8_C( -93), INT8_C( -71), INT8_C(-106), INT8_C(-125), INT8_C( -67), INT8_C( 53), INT8_C( 101), INT8_C(-105), INT8_C( -28), INT8_C( 37), INT8_C( -61), INT8_C( -39), INT8_C(-117), INT8_C(-116), INT8_C( -16), INT8_C(-126), INT8_C( 25), INT8_C( 103), INT8_C( -81), INT8_C( 88), INT8_C( 1), INT8_C( 20), INT8_C( 36), INT8_C( 16), INT8_C( -44), INT8_C( 107)), simde_mm256_set_epi8(INT8_C( 73), INT8_C(-125), INT8_C( 72), INT8_C( 17), INT8_C( -36), INT8_C( 20), INT8_C( -81), INT8_C( 10), INT8_C( 119), INT8_C( 41), INT8_C( -90), INT8_C( -54), INT8_C(-116), INT8_C( 15), INT8_C( -64), INT8_C( 60), INT8_C( -43), INT8_C( 5), INT8_C( 93), INT8_C( 54), INT8_C( -46), INT8_C(-106), INT8_C(-117), INT8_C( 127), INT8_C( 81), INT8_C( -75), INT8_C(-105), INT8_C( 31), INT8_C( 51), INT8_C(-105), INT8_C( -23), INT8_C( 95)), simde_mm256_set_epi8(INT8_C( 127), INT8_C( 71), INT8_C( 100), INT8_C( -96), INT8_C( 61), INT8_C( -42), INT8_C( -12), INT8_C( -81), INT8_C( 31), INT8_C( 90), INT8_C( 23), INT8_C( 107), INT8_C( -39), INT8_C(-120), INT8_C( 36), INT8_C( -23), INT8_C( -18), INT8_C( -44), INT8_C( 46), INT8_C( 86), INT8_C( 30), INT8_C( -20), INT8_C(-114), INT8_C( -24), INT8_C( 94), INT8_C( -93), INT8_C( 106), INT8_C( -11), INT8_C( -15), INT8_C( 121), INT8_C( -21), INT8_C( 12)) }, { simde_mm256_set_epi8(INT8_C( 114), INT8_C( 34), INT8_C( 26), INT8_C( -19), INT8_C( -79), INT8_C( -66), INT8_C( -16), INT8_C( 14), INT8_C(-119), INT8_C( 36), INT8_C( 48), INT8_C(-124), INT8_C( -3), INT8_C( 65), INT8_C( 48), INT8_C(-117), INT8_C( -76), INT8_C( -16), INT8_C( 83), INT8_C( 8), INT8_C( 76), INT8_C( -37), INT8_C( -88), INT8_C( -95), INT8_C( -96), INT8_C( 84), INT8_C( -4), INT8_C( 56), INT8_C( 13), INT8_C( -84), INT8_C( 107), INT8_C( -97)), simde_mm256_set_epi8(INT8_C(-116), INT8_C( -40), INT8_C( 117), INT8_C( -7), INT8_C( -80), INT8_C( 53), INT8_C( -24), INT8_C(-123), INT8_C(-119), INT8_C( -47), INT8_C( -64), INT8_C( -25), INT8_C( 117), INT8_C( 45), INT8_C( 40), INT8_C( -91), INT8_C( 19), INT8_C( 123), INT8_C( 36), INT8_C( -6), INT8_C( 29), INT8_C( -9), INT8_C( 117), INT8_C( -17), INT8_C( -12), INT8_C( 82), INT8_C( 121), INT8_C( 9), INT8_C( -40), INT8_C(-118), INT8_C( -50), INT8_C( -94)), simde_mm256_set_epi8(INT8_C( -26), INT8_C( 74), INT8_C( -91), INT8_C( -12), INT8_C( 1), INT8_C(-119), INT8_C( 8), INT8_C(-119), INT8_C( 0), INT8_C( 83), INT8_C( 112), INT8_C( -99), INT8_C(-120), INT8_C( 20), INT8_C( 8), INT8_C( -26), INT8_C( -95), INT8_C( 117), INT8_C( 47), INT8_C( 14), INT8_C( 47), INT8_C( -28), INT8_C( 51), INT8_C( -78), INT8_C( -84), INT8_C( 2), INT8_C(-125), INT8_C( 47), INT8_C( 53), INT8_C( 34), INT8_C( -99), INT8_C( -3)) }, { simde_mm256_set_epi8(INT8_C( 95), INT8_C( -54), INT8_C( -5), INT8_C( -28), INT8_C( -63), INT8_C( 54), INT8_C(-113), INT8_C( 127), INT8_C( -25), INT8_C( -12), INT8_C(-108), INT8_C(-123), INT8_C( -64), INT8_C( 68), INT8_C( 89), INT8_C( 72), INT8_C( -46), INT8_C( -63), INT8_C( 56), INT8_C( 125), INT8_C( 44), INT8_C( 8), INT8_C( 49), INT8_C( 105), INT8_C( 43), INT8_C( 41), INT8_C( -46), INT8_C(-117), INT8_C( 22), INT8_C( 27), INT8_C( -55), INT8_C( 106)), simde_mm256_set_epi8(INT8_C( 15), INT8_C( -64), INT8_C( 3), INT8_C( -17), INT8_C( 127), INT8_C(-101), INT8_C( 76), INT8_C( 31), INT8_C( 79), INT8_C( 115), INT8_C( -72), INT8_C( -19), INT8_C(-105), INT8_C( 117), INT8_C( -69), INT8_C( 62), INT8_C( 51), INT8_C( -12), INT8_C( -69), INT8_C(-109), INT8_C(-101), INT8_C( -50), INT8_C( 90), INT8_C( 52), INT8_C( 58), INT8_C( -94), INT8_C( -69), INT8_C( 88), INT8_C( -9), INT8_C( 25), INT8_C( 89), INT8_C( 31)), simde_mm256_set_epi8(INT8_C( 80), INT8_C( 10), INT8_C( -8), INT8_C( -11), INT8_C( 66), INT8_C(-101), INT8_C( 67), INT8_C( 96), INT8_C(-104), INT8_C(-127), INT8_C( -36), INT8_C(-104), INT8_C( 41), INT8_C( -49), INT8_C( -98), INT8_C( 10), INT8_C( -97), INT8_C( -51), INT8_C( 125), INT8_C( -22), INT8_C(-111), INT8_C( 58), INT8_C( -41), INT8_C( 53), INT8_C( -15), INT8_C(-121), INT8_C( 23), INT8_C( 51), INT8_C( 31), INT8_C( 2), INT8_C( 112), INT8_C( 75)) }, { simde_mm256_set_epi8(INT8_C( 58), INT8_C( -35), INT8_C( -9), INT8_C( -15), INT8_C( -74), INT8_C( -92), INT8_C( -26), INT8_C( -48), INT8_C( -14), INT8_C( 54), INT8_C( 39), INT8_C( 120), INT8_C( -38), INT8_C( -90), INT8_C( 101), INT8_C( 25), INT8_C( 90), INT8_C( 106), INT8_C( 24), INT8_C( -31), INT8_C( -57), INT8_C( 7), INT8_C( 29), INT8_C( 106), INT8_C( 124), INT8_C( 62), INT8_C( 44), INT8_C( 40), INT8_C( 54), INT8_C( 54), INT8_C( -93), INT8_C( -77)), simde_mm256_set_epi8(INT8_C( 65), INT8_C(-103), INT8_C( 10), INT8_C( 103), INT8_C( 65), INT8_C( 90), INT8_C( -56), INT8_C( 98), INT8_C( 64), INT8_C( 110), INT8_C( -94), INT8_C( 23), INT8_C( -62), INT8_C( 19), INT8_C( 115), INT8_C( 93), INT8_C( 68), INT8_C( 91), INT8_C( -1), INT8_C( 5), INT8_C( 19), INT8_C( -54), INT8_C( -79), INT8_C(-124), INT8_C( -14), INT8_C( 106), INT8_C( -71), INT8_C( -34), INT8_C( 39), INT8_C( -20), INT8_C( 117), INT8_C( -23)), simde_mm256_set_epi8(INT8_C( -7), INT8_C( 68), INT8_C( -19), INT8_C(-118), INT8_C( 117), INT8_C( 74), INT8_C( 30), INT8_C( 110), INT8_C( -78), INT8_C( -56), INT8_C(-123), INT8_C( 97), INT8_C( 24), INT8_C(-109), INT8_C( -14), INT8_C( -68), INT8_C( 22), INT8_C( 15), INT8_C( 25), INT8_C( -36), INT8_C( -76), INT8_C( 61), INT8_C( 108), INT8_C( -26), INT8_C(-118), INT8_C( -44), INT8_C( 115), INT8_C( 74), INT8_C( 15), INT8_C( 74), INT8_C( 46), INT8_C( -54)) }, { simde_mm256_set_epi8(INT8_C( -10), INT8_C( 24), INT8_C(-113), INT8_C( -36), INT8_C( 115), INT8_C( 49), INT8_C( 18), INT8_C( 86), INT8_C( 112), INT8_C( 85), INT8_C( -24), INT8_C( -37), INT8_C( -39), INT8_C( 13), INT8_C( 8), INT8_C( 92), INT8_C( -95), INT8_C(-112), INT8_C( 83), INT8_C( 16), INT8_C( 106), INT8_C(-127), INT8_C( 122), INT8_C( -83), INT8_C( -96), INT8_C( -94), INT8_C( 86), INT8_C( -10), INT8_C( -44), INT8_C( -36), INT8_C( 93), INT8_C( 71)), simde_mm256_set_epi8(INT8_C( -12), INT8_C(-115), INT8_C( 101), INT8_C( -47), INT8_C( 85), INT8_C( -78), INT8_C( -44), INT8_C( 16), INT8_C( 71), INT8_C( 4), INT8_C( -58), INT8_C( 7), INT8_C( 30), INT8_C( 54), INT8_C( 60), INT8_C( -1), INT8_C( -73), INT8_C( -28), INT8_C( -92), INT8_C( -36), INT8_C( 82), INT8_C( -11), INT8_C( -69), INT8_C( 2), INT8_C( -76), INT8_C( 100), INT8_C( 99), INT8_C( 0), INT8_C( -87), INT8_C( -70), INT8_C( 29), INT8_C( 121)), simde_mm256_set_epi8(INT8_C( 2), INT8_C(-117), INT8_C( 42), INT8_C( 11), INT8_C( 30), INT8_C( 127), INT8_C( 62), INT8_C( 70), INT8_C( 41), INT8_C( 81), INT8_C( 34), INT8_C( -44), INT8_C( -69), INT8_C( -41), INT8_C( -52), INT8_C( 93), INT8_C( -22), INT8_C( -84), INT8_C( -81), INT8_C( 52), INT8_C( 24), INT8_C(-116), INT8_C( -65), INT8_C( -85), INT8_C( -20), INT8_C( 62), INT8_C( -13), INT8_C( -10), INT8_C( 43), INT8_C( 34), INT8_C( 64), INT8_C( -50)) }, { simde_mm256_set_epi8(INT8_C( 2), INT8_C( -33), INT8_C( 21), INT8_C( 39), INT8_C( -5), INT8_C(-110), INT8_C(-113), INT8_C( 98), INT8_C( 6), INT8_C( 47), INT8_C( -26), INT8_C( 73), INT8_C(-121), INT8_C( -53), INT8_C( 102), INT8_C( 110), INT8_C( -38), INT8_C( -97), INT8_C( -70), INT8_C(-105), INT8_C(-128), INT8_C( 48), INT8_C( -78), INT8_C( 108), INT8_C( 118), INT8_C( 9), INT8_C( 40), INT8_C( 110), INT8_C( -32), INT8_C( -25), INT8_C( 39), INT8_C(-123)), simde_mm256_set_epi8(INT8_C( 15), INT8_C( 27), INT8_C( -7), INT8_C(-103), INT8_C( 81), INT8_C( 127), INT8_C( -25), INT8_C( 51), INT8_C( -31), INT8_C( 28), INT8_C(-102), INT8_C( 24), INT8_C( 73), INT8_C( 119), INT8_C( 106), INT8_C( 1), INT8_C( 5), INT8_C(-108), INT8_C( -84), INT8_C( 54), INT8_C( -30), INT8_C(-120), INT8_C( 39), INT8_C( 6), INT8_C(-115), INT8_C(-128), INT8_C( 110), INT8_C( 39), INT8_C(-110), INT8_C(-102), INT8_C( 6), INT8_C( 126)), simde_mm256_set_epi8(INT8_C( -13), INT8_C( -60), INT8_C( 28), INT8_C(-114), INT8_C( -86), INT8_C( 19), INT8_C( -88), INT8_C( 47), INT8_C( 37), INT8_C( 19), INT8_C( 76), INT8_C( 49), INT8_C( 62), INT8_C( 84), INT8_C( -4), INT8_C( 109), INT8_C( -43), INT8_C( 11), INT8_C( 14), INT8_C( 97), INT8_C( -98), INT8_C( -88), INT8_C(-117), INT8_C( 102), INT8_C( -23), INT8_C(-119), INT8_C( -70), INT8_C( 71), INT8_C( 78), INT8_C( 77), INT8_C( 33), INT8_C( 7)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_sub_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_sub_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi16(INT16_C(-27953), INT16_C(-25893), INT16_C( 18102), INT16_C( -6593), INT16_C( 7623), INT16_C( 27326), INT16_C(-15847), INT16_C( 25196), INT16_C( 4150), INT16_C( 31494), INT16_C( 14922), INT16_C( 2426), INT16_C( 8906), INT16_C( -8618), INT16_C(-26305), INT16_C( 2552)), simde_mm256_set_epi16(INT16_C(-20113), INT16_C( 26325), INT16_C( 23863), INT16_C(-13511), INT16_C( 6653), INT16_C( 22833), INT16_C( 26506), INT16_C( 21325), INT16_C(-18785), INT16_C( -8308), INT16_C( -2522), INT16_C( 805), INT16_C(-27299), INT16_C( 31631), INT16_C( -9722), INT16_C( 3708)), simde_mm256_set_epi16(INT16_C( -7840), INT16_C( 13318), INT16_C( -5761), INT16_C( 6918), INT16_C( 970), INT16_C( 4493), INT16_C( 23183), INT16_C( 3871), INT16_C( 22935), INT16_C(-25734), INT16_C( 17444), INT16_C( 1621), INT16_C(-29331), INT16_C( 25287), INT16_C(-16583), INT16_C( -1156)) }, { simde_mm256_set_epi16(INT16_C(-11131), INT16_C(-17560), INT16_C( 30989), INT16_C( -1961), INT16_C( 25635), INT16_C(-31167), INT16_C( 13893), INT16_C(-24056), INT16_C( 29937), INT16_C( 24074), INT16_C( 3624), INT16_C( 30721), INT16_C( 7318), INT16_C(-21871), INT16_C(-24159), INT16_C( 27811)), simde_mm256_set_epi16(INT16_C(-14801), INT16_C( 18665), INT16_C( 21171), INT16_C( 2083), INT16_C( 9794), INT16_C(-14994), INT16_C( -7071), INT16_C(-14888), INT16_C( 17983), INT16_C(-13129), INT16_C(-28994), INT16_C( 8299), INT16_C( 22982), INT16_C( 2227), INT16_C(-21646), INT16_C(-28910)), simde_mm256_set_epi16(INT16_C( 3670), INT16_C( 29311), INT16_C( 9818), INT16_C( -4044), INT16_C( 15841), INT16_C(-16173), INT16_C( 20964), INT16_C( -9168), INT16_C( 11954), INT16_C(-28333), INT16_C( 32618), INT16_C( 22422), INT16_C(-15664), INT16_C(-24098), INT16_C( -2513), INT16_C( -8815)) }, { simde_mm256_set_epi16(INT16_C(-23995), INT16_C( 25905), INT16_C( -5982), INT16_C( 20058), INT16_C( 2118), INT16_C( -114), INT16_C( 287), INT16_C(-16504), INT16_C( 12216), INT16_C( 31693), INT16_C( 7692), INT16_C(-10279), INT16_C( -9654), INT16_C( 13919), INT16_C( 14030), INT16_C( -6049)), simde_mm256_set_epi16(INT16_C( 18387), INT16_C( 3058), INT16_C( 24810), INT16_C(-31587), INT16_C( 19314), INT16_C(-13527), INT16_C( 20849), INT16_C(-15645), INT16_C( 4064), INT16_C( 5808), INT16_C( 11755), INT16_C( 23823), INT16_C(-12210), INT16_C( -514), INT16_C(-10077), INT16_C( 8317)), simde_mm256_set_epi16(INT16_C( 23154), INT16_C( 22847), INT16_C(-30792), INT16_C(-13891), INT16_C(-17196), INT16_C( 13413), INT16_C(-20562), INT16_C( -859), INT16_C( 8152), INT16_C( 25885), INT16_C( -4063), INT16_C( 31434), INT16_C( 2556), INT16_C( 14433), INT16_C( 24107), INT16_C(-14366)) }, { simde_mm256_set_epi16(INT16_C( 16733), INT16_C(-19192), INT16_C(-19177), INT16_C(-13201), INT16_C(-14970), INT16_C(-23573), INT16_C( 23037), INT16_C(-14068), INT16_C( 18230), INT16_C( 11141), INT16_C( 3822), INT16_C( 6182), INT16_C(-23525), INT16_C(-16307), INT16_C( 26522), INT16_C( 21806)), simde_mm256_set_epi16(INT16_C(-30186), INT16_C( 24213), INT16_C( 29509), INT16_C( 2127), INT16_C( -4444), INT16_C( 10965), INT16_C(-26578), INT16_C( -9065), INT16_C( -2827), INT16_C( 31843), INT16_C( 28821), INT16_C( 8551), INT16_C( -1781), INT16_C( 16565), INT16_C(-30183), INT16_C( 9876)), simde_mm256_set_epi16(INT16_C(-18617), INT16_C( 22131), INT16_C( 16850), INT16_C(-15328), INT16_C(-10526), INT16_C( 30998), INT16_C(-15921), INT16_C( -5003), INT16_C( 21057), INT16_C(-20702), INT16_C(-24999), INT16_C( -2369), INT16_C(-21744), INT16_C( 32664), INT16_C( -8831), INT16_C( 11930)) }, { simde_mm256_set_epi16(INT16_C(-10649), INT16_C( 1789), INT16_C( 27012), INT16_C( 5765), INT16_C(-10851), INT16_C( 5376), INT16_C( -9175), INT16_C(-28947), INT16_C( 7733), INT16_C( 7167), INT16_C(-27568), INT16_C( 2115), INT16_C( 10120), INT16_C(-30644), INT16_C(-28345), INT16_C( 20915)), simde_mm256_set_epi16(INT16_C( 23043), INT16_C( 25420), INT16_C( 19475), INT16_C(-31775), INT16_C(-12265), INT16_C( -4342), INT16_C( 11340), INT16_C( 16492), INT16_C( 28872), INT16_C( -2502), INT16_C(-23877), INT16_C(-30231), INT16_C( 1907), INT16_C( 764), INT16_C( -427), INT16_C(-25788)), simde_mm256_set_epi16(INT16_C( 31844), INT16_C(-23631), INT16_C( 7537), INT16_C(-27996), INT16_C( 1414), INT16_C( 9718), INT16_C(-20515), INT16_C( 20097), INT16_C(-21139), INT16_C( 9669), INT16_C( -3691), INT16_C( 32346), INT16_C( 8213), INT16_C(-31408), INT16_C(-27918), INT16_C(-18833)) }, { simde_mm256_set_epi16(INT16_C( 10906), INT16_C( -7304), INT16_C( 1755), INT16_C( 4890), INT16_C(-11395), INT16_C( -7564), INT16_C( 10816), INT16_C(-23172), INT16_C(-11608), INT16_C(-11875), INT16_C(-15837), INT16_C(-17763), INT16_C( 21395), INT16_C( 8598), INT16_C(-20063), INT16_C(-10275)), simde_mm256_set_epi16(INT16_C( -9448), INT16_C( 6330), INT16_C(-18331), INT16_C( -6901), INT16_C(-14581), INT16_C( 32348), INT16_C( 9963), INT16_C( 31451), INT16_C( 24178), INT16_C( 12481), INT16_C( 12230), INT16_C(-12978), INT16_C(-27030), INT16_C( -1366), INT16_C(-22019), INT16_C( 19232)), simde_mm256_set_epi16(INT16_C( 20354), INT16_C(-13634), INT16_C( 20086), INT16_C( 11791), INT16_C( 3186), INT16_C( 25624), INT16_C( 853), INT16_C( 10913), INT16_C( 29750), INT16_C(-24356), INT16_C(-28067), INT16_C( -4785), INT16_C(-17111), INT16_C( 9964), INT16_C( 1956), INT16_C(-29507)) }, { simde_mm256_set_epi16(INT16_C( 30843), INT16_C( 21336), INT16_C( 5686), INT16_C( 2828), INT16_C( 19954), INT16_C( 31539), INT16_C(-19646), INT16_C( 671), INT16_C( 25132), INT16_C( 3458), INT16_C(-19712), INT16_C( 4606), INT16_C( 5503), INT16_C(-15517), INT16_C( 355), INT16_C( 25145)), simde_mm256_set_epi16(INT16_C( -6584), INT16_C( 11532), INT16_C( 13185), INT16_C(-20125), INT16_C( 10954), INT16_C( -8384), INT16_C( 2784), INT16_C( 19614), INT16_C( 16337), INT16_C( 21741), INT16_C( 28414), INT16_C( 19897), INT16_C(-27104), INT16_C( 23081), INT16_C( 17729), INT16_C( 27826)), simde_mm256_set_epi16(INT16_C(-28109), INT16_C( 9804), INT16_C( -7499), INT16_C( 22953), INT16_C( 9000), INT16_C(-25613), INT16_C(-22430), INT16_C(-18943), INT16_C( 8795), INT16_C(-18283), INT16_C( 17410), INT16_C(-15291), INT16_C( 32607), INT16_C( 26938), INT16_C(-17374), INT16_C( -2681)) }, { simde_mm256_set_epi16(INT16_C(-16362), INT16_C(-26163), INT16_C( -8945), INT16_C( 5119), INT16_C( 30284), INT16_C( 12893), INT16_C(-21079), INT16_C( 124), INT16_C( -1271), INT16_C( 3217), INT16_C( 20702), INT16_C( 647), INT16_C(-30215), INT16_C( 25412), INT16_C( 851), INT16_C( 30446)), simde_mm256_set_epi16(INT16_C( 8605), INT16_C(-24114), INT16_C( -5979), INT16_C( -9101), INT16_C( 6437), INT16_C(-25374), INT16_C( 24146), INT16_C(-14771), INT16_C(-26765), INT16_C( 30576), INT16_C( 25327), INT16_C( 1225), INT16_C(-13633), INT16_C(-15186), INT16_C(-14472), INT16_C( 31809)), simde_mm256_set_epi16(INT16_C(-24967), INT16_C( -2049), INT16_C( -2966), INT16_C( 14220), INT16_C( 23847), INT16_C(-27269), INT16_C( 20311), INT16_C( 14895), INT16_C( 25494), INT16_C(-27359), INT16_C( -4625), INT16_C( -578), INT16_C(-16582), INT16_C(-24938), INT16_C( 15323), INT16_C( -1363)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_sub_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_sub_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C( 326943318), INT32_C(-1426446841), INT32_C( -705322739), INT32_C( 1382181134), INT32_C( -752013979), INT32_C( 1926838871), INT32_C(-1267474220), INT32_C(-1807600682)), simde_mm256_set_epi32(INT32_C( 251701658), INT32_C( 1304570849), INT32_C(-1313346575), INT32_C( 548342496), INT32_C( 350328631), INT32_C( 492623957), INT32_C( 209048435), INT32_C( 494519967)), simde_mm256_set_epi32(INT32_C( 75241660), INT32_C( 1563949606), INT32_C( 608023836), INT32_C( 833838638), INT32_C(-1102342610), INT32_C( 1434214914), INT32_C(-1476522655), INT32_C( 1992846647)) }, { simde_mm256_set_epi32(INT32_C( -933394757), INT32_C( 2044671684), INT32_C( 359818497), INT32_C( 1070534780), INT32_C( -970998086), INT32_C(-1718071453), INT32_C( 1676992968), INT32_C(-1700643811)), simde_mm256_set_epi32(INT32_C( 1601497259), INT32_C( -783324799), INT32_C( 230836562), INT32_C( -793897628), INT32_C( 2109010219), INT32_C( -500991481), INT32_C( 1633364343), INT32_C(-1085575788)), simde_mm256_set_epi32(INT32_C( 1760075280), INT32_C(-1466970813), INT32_C( 128981935), INT32_C( 1864432408), INT32_C( 1214958991), INT32_C(-1217079972), INT32_C( 43628625), INT32_C( -615068023)) }, { simde_mm256_set_epi32(INT32_C( -51216759), INT32_C( -878749548), INT32_C(-1368701232), INT32_C(-2046658418), INT32_C(-1669301170), INT32_C(-2003910541), INT32_C( 1329758066), INT32_C( 1724000039)), simde_mm256_set_epi32(INT32_C(-2019361399), INT32_C( 1441882288), INT32_C(-1015852054), INT32_C(-2140601122), INT32_C( 1393090412), INT32_C( -54787332), INT32_C( -18616908), INT32_C(-2129018166)), simde_mm256_set_epi32(INT32_C( 1968144640), INT32_C( 1974335460), INT32_C( -352849178), INT32_C( 93942704), INT32_C( 1232575714), INT32_C(-1949123209), INT32_C( 1348374974), INT32_C( -441949091)) }, { simde_mm256_set_epi32(INT32_C( -198091267), INT32_C( -797938117), INT32_C( 1365606315), INT32_C( 1369771544), INT32_C( 454471573), INT32_C(-1496770990), INT32_C(-1504062345), INT32_C(-1729521659)), simde_mm256_set_epi32(INT32_C(-1875378957), INT32_C( 1413357347), INT32_C(-2041380610), INT32_C( 1834864782), INT32_C( 265860555), INT32_C( 367864932), INT32_C(-1203083960), INT32_C( 193499515)), simde_mm256_set_epi32(INT32_C( 1677287690), INT32_C( 2083671832), INT32_C( -887980371), INT32_C( -465093238), INT32_C( 188611018), INT32_C(-1864635922), INT32_C( -300978385), INT32_C(-1923021174)) }, { simde_mm256_set_epi32(INT32_C(-1042833230), INT32_C( 1624054732), INT32_C( 234631196), INT32_C( 738820353), INT32_C(-1501237092), INT32_C(-1737761237), INT32_C(-1663334158), INT32_C( -439342149)), simde_mm256_set_epi32(INT32_C( 164170961), INT32_C( 1826341689), INT32_C(-1379584328), INT32_C( -883834236), INT32_C( -145065229), INT32_C( 1276456429), INT32_C( 908937179), INT32_C( 1101978337)), simde_mm256_set_epi32(INT32_C(-1207004191), INT32_C( -202286957), INT32_C( 1614215524), INT32_C( 1622654589), INT32_C(-1356171863), INT32_C( 1280749630), INT32_C( 1722695959), INT32_C(-1541320486)) }, { simde_mm256_set_epi32(INT32_C( 1638607685), INT32_C(-2009128326), INT32_C( -563580109), INT32_C( 1319284819), INT32_C( -21113241), INT32_C( -656216704), INT32_C( 927278729), INT32_C( 414207795)), simde_mm256_set_epi32(INT32_C(-1371308348), INT32_C( 1761271665), INT32_C( 1358250649), INT32_C( -906482895), INT32_C( 540611685), INT32_C( 776022210), INT32_C( 1935373704), INT32_C( -317327387)), simde_mm256_set_epi32(INT32_C(-1285051263), INT32_C( 524567305), INT32_C(-1921830758), INT32_C(-2069199582), INT32_C( -561724926), INT32_C(-1432238914), INT32_C(-1008094975), INT32_C( 731535182)) }, { simde_mm256_set_epi32(INT32_C(-2100986650), INT32_C( 1414058950), INT32_C(-1053542475), INT32_C( -736674814), INT32_C(-2096553410), INT32_C(-1369794669), INT32_C( 26060713), INT32_C(-1294950753)), simde_mm256_set_epi32(INT32_C( 2012616977), INT32_C( 1553490078), INT32_C( 324319438), INT32_C(-1664423684), INT32_C( 1284704444), INT32_C( 1334884932), INT32_C(-1343260831), INT32_C( -516360618)), simde_mm256_set_epi32(INT32_C( 181363669), INT32_C( -139431128), INT32_C(-1377861913), INT32_C( 927748870), INT32_C( 913709442), INT32_C( 1590287695), INT32_C( 1369321544), INT32_C( -778590135)) }, { simde_mm256_set_epi32(INT32_C(-1942786341), INT32_C(-1691503678), INT32_C(-1765167726), INT32_C( 1858759286), INT32_C( 627375525), INT32_C(-1868274030), INT32_C( 12505540), INT32_C( 986732702)), simde_mm256_set_epi32(INT32_C(-1777720655), INT32_C(-2032047915), INT32_C( -56259521), INT32_C( -952761137), INT32_C( 805966046), INT32_C( 372017673), INT32_C( 20751226), INT32_C( -331388981)), simde_mm256_set_epi32(INT32_C( -165065686), INT32_C( 340544237), INT32_C(-1708908205), INT32_C(-1483446873), INT32_C( -178590521), INT32_C( 2054675593), INT32_C( -8245686), INT32_C( 1318121683)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_sub_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_sub_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi64x(INT64_C(-2885858146882597019), INT64_C(-3498254024691385676), INT64_C( 7972223233728178657), INT64_C( 6231317733982512179)), simde_mm256_set_epi64x(INT64_C( 3826502112888397679), INT64_C(-8682306207824752089), INT64_C( 8218291866756092637), INT64_C( 977663894185762148)), simde_mm256_set_epi64x(INT64_C(-6712360259770994698), INT64_C( 5184052183133366413), INT64_C( -246068633027913980), INT64_C( 5253653839796750031)) }, { simde_mm256_set_epi64x(INT64_C( 7921380973104735890), INT64_C(-5532460099708114375), INT64_C( 865084101794303087), INT64_C(-7356844417578967355)), simde_mm256_set_epi64x(INT64_C(-3491207381975027904), INT64_C(-1030678116339367991), INT64_C(-8175524911489530105), INT64_C( 323779015099665520)), simde_mm256_set_epi64x(INT64_C(-7034155718629787822), INT64_C(-4501781983368746384), INT64_C( 9040609013283833192), INT64_C(-7680623432678632875)) }, { simde_mm256_set_epi64x(INT64_C( 4341016735029725119), INT64_C(-4017352277952498418), INT64_C( 7582500189708583283), INT64_C(-5892965129862928683)), simde_mm256_set_epi64x(INT64_C(-8237509791976513777), INT64_C( -470554251606780044), INT64_C( 6040143329078659839), INT64_C( -424501583476695872)), simde_mm256_set_epi64x(INT64_C(-5868217546703312720), INT64_C(-3546798026345718374), INT64_C( 1542356860629923444), INT64_C(-5468463546386232811)) }, { simde_mm256_set_epi64x(INT64_C(-7392183468556786070), INT64_C(-6610175517365172355), INT64_C(-3756419626009538740), INT64_C(-9036454621798279072)), simde_mm256_set_epi64x(INT64_C( 6035045015129345008), INT64_C( 8791627257213638823), INT64_C(-8556126845609402343), INT64_C( 1314921607038438268)), simde_mm256_set_epi64x(INT64_C( 5019515590023420538), INT64_C( 3044941299130740438), INT64_C( 4799707219599863603), INT64_C( 8095367844872834276)) }, { simde_mm256_set_epi64x(INT64_C( 8174138309950782489), INT64_C(-3595444893239169269), INT64_C(-6261543721635705717), INT64_C(-8089986319993999570)), simde_mm256_set_epi64x(INT64_C( 4668020520258365306), INT64_C( 1754255393252662962), INT64_C( 1690801515152582572), INT64_C(-7136424812163547703)), simde_mm256_set_epi64x(INT64_C( 3506117789692417183), INT64_C(-5349700286491832231), INT64_C(-7952345236788288289), INT64_C( -953561507830451867)) }, { simde_mm256_set_epi64x(INT64_C( 6033172137928900790), INT64_C(-5612895834493214657), INT64_C(-8524528714074059580), INT64_C(-3969628664745125304)), simde_mm256_set_epi64x(INT64_C( 7126793331936974199), INT64_C( 5665142255281704397), INT64_C( 1602540828580772090), INT64_C( 8387594021483335882)), simde_mm256_set_epi64x(INT64_C(-1093621194008073409), INT64_C( 7168705983934632562), INT64_C( 8319674531054719946), INT64_C( 6089521387481090430)) }, { simde_mm256_set_epi64x(INT64_C( 3903790755083266798), INT64_C(-4529825138752859475), INT64_C( -488451067611140207), INT64_C( 375847149564403603)), simde_mm256_set_epi64x(INT64_C(-7317636610137191039), INT64_C(-6404029010026237648), INT64_C( 2684127196708781119), INT64_C(-4695910663442221977)), simde_mm256_set_epi64x(INT64_C(-7225316708489093779), INT64_C( 1874203871273378173), INT64_C(-3172578264319921326), INT64_C( 5071757813006625580)) }, { simde_mm256_set_epi64x(INT64_C( -634520004609695604), INT64_C(-4556943099082170660), INT64_C( 407417976594608885), INT64_C(-2432518353344050945)), simde_mm256_set_epi64x(INT64_C( 8351025001768468670), INT64_C(-2893015942465040381), INT64_C(-7482511571570747740), INT64_C( 2790733591573448110)), simde_mm256_set_epi64x(INT64_C(-8985545006378164274), INT64_C(-1663927156617130279), INT64_C( 7889929548165356625), INT64_C(-5223251944917499055)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_sub_epi64(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_x_mm256_sub_epu32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_x_mm256_set_epu32(UINT32_C( 107879378), UINT32_C(2256216441), UINT32_C(1547145394), UINT32_C( 23322183), UINT32_C( 24183335), UINT32_C(2032217709), UINT32_C( 971024796), UINT32_C(3326705122)), simde_x_mm256_set_epu32(UINT32_C( 149466069), UINT32_C(3517994125), UINT32_C(1432270320), UINT32_C(2851287457), UINT32_C(1208329282), UINT32_C( 158848810), UINT32_C(3819928804), UINT32_C(3738317239)), simde_x_mm256_set_epu32(UINT32_C(4253380605), UINT32_C(3033189612), UINT32_C( 114875074), UINT32_C(1467002022), UINT32_C(3110821349), UINT32_C(1873368899), UINT32_C(1446063288), UINT32_C(3883355179)) }, { simde_x_mm256_set_epu32(UINT32_C(2351108717), UINT32_C( 184367791), UINT32_C( 651012821), UINT32_C(1858699737), UINT32_C(2120954292), UINT32_C( 432426176), UINT32_C( 314621009), UINT32_C( 683154820)), simde_x_mm256_set_epu32(UINT32_C(2061753916), UINT32_C(3341354567), UINT32_C(2475959533), UINT32_C( 982967416), UINT32_C(2049417650), UINT32_C(1285751824), UINT32_C(3637636007), UINT32_C(1398995246)), simde_x_mm256_set_epu32(UINT32_C( 289354801), UINT32_C(1137980520), UINT32_C(2470020584), UINT32_C( 875732321), UINT32_C( 71536642), UINT32_C(3441641648), UINT32_C( 971952298), UINT32_C(3579126870)) }, { simde_x_mm256_set_epu32(UINT32_C(1897588552), UINT32_C( 447224664), UINT32_C(2771607020), UINT32_C( 619382757), UINT32_C(2584621804), UINT32_C( 967633271), UINT32_C(2311738101), UINT32_C(1795187199)), simde_x_mm256_set_epu32(UINT32_C(1007929400), UINT32_C(2616961748), UINT32_C(1589771424), UINT32_C(3081473365), UINT32_C(3569555574), UINT32_C(3120660078), UINT32_C( 775299967), UINT32_C( 379946438)), simde_x_mm256_set_epu32(UINT32_C( 889659152), UINT32_C(2125230212), UINT32_C(1181835596), UINT32_C(1832876688), UINT32_C(3310033526), UINT32_C(2141940489), UINT32_C(1536438134), UINT32_C(1415240761)) }, { simde_x_mm256_set_epu32(UINT32_C(1476738727), UINT32_C(1269851975), UINT32_C(2316737502), UINT32_C( 66699996), UINT32_C( 467087828), UINT32_C(2161831335), UINT32_C(4149911179), UINT32_C( 504793551)), simde_x_mm256_set_epu32(UINT32_C(3396602389), UINT32_C(3482637302), UINT32_C(4215273460), UINT32_C( 993218468), UINT32_C(2631972858), UINT32_C(3366478001), UINT32_C(3680212603), UINT32_C(3739565912)), simde_x_mm256_set_epu32(UINT32_C(2375103634), UINT32_C(2082181969), UINT32_C(2396431338), UINT32_C(3368448824), UINT32_C(2130082266), UINT32_C(3090320630), UINT32_C( 469698576), UINT32_C(1060194935)) }, { simde_x_mm256_set_epu32(UINT32_C( 179882245), UINT32_C(4021916078), UINT32_C(3627300617), UINT32_C(3248980610), UINT32_C( 191409075), UINT32_C(3646970912), UINT32_C(2035553404), UINT32_C(4075300565)), simde_x_mm256_set_epu32(UINT32_C(4018869540), UINT32_C(3247000771), UINT32_C( 419518577), UINT32_C(1741182634), UINT32_C( 293418594), UINT32_C(2336988215), UINT32_C(4218997823), UINT32_C( 205082086)), simde_x_mm256_set_epu32(UINT32_C( 455980001), UINT32_C( 774915307), UINT32_C(3207782040), UINT32_C(1507797976), UINT32_C(4192957777), UINT32_C(1309982697), UINT32_C(2111522877), UINT32_C(3870218479)) }, { simde_x_mm256_set_epu32(UINT32_C( 84590544), UINT32_C(4204381436), UINT32_C(3727353694), UINT32_C(2213501554), UINT32_C(2836071016), UINT32_C(4047724580), UINT32_C(1001388660), UINT32_C(3282415970)), simde_x_mm256_set_epu32(UINT32_C(1074654014), UINT32_C(1012976116), UINT32_C(2090582329), UINT32_C(2908553801), UINT32_C(1272437250), UINT32_C(3454899956), UINT32_C(3722866273), UINT32_C(4069029472)), simde_x_mm256_set_epu32(UINT32_C(3304903826), UINT32_C(3191405320), UINT32_C(1636771365), UINT32_C(3599915049), UINT32_C(1563633766), UINT32_C( 592824624), UINT32_C(1573489683), UINT32_C(3508353794)) }, { simde_x_mm256_set_epu32(UINT32_C(2451663057), UINT32_C( 104156540), UINT32_C( 918040827), UINT32_C( 534540183), UINT32_C(3003822364), UINT32_C(4203020917), UINT32_C(1548649531), UINT32_C( 205808915)), simde_x_mm256_set_epu32(UINT32_C(3286644093), UINT32_C(3785715782), UINT32_C( 33333951), UINT32_C( 270641278), UINT32_C( 533308511), UINT32_C(1335157821), UINT32_C(2324891096), UINT32_C( 26430848)), simde_x_mm256_set_epu32(UINT32_C(3459986260), UINT32_C( 613408054), UINT32_C( 884706876), UINT32_C( 263898905), UINT32_C(2470513853), UINT32_C(2867863096), UINT32_C(3518725731), UINT32_C( 179378067)) }, { simde_x_mm256_set_epu32(UINT32_C(2743667516), UINT32_C(3635884938), UINT32_C( 257447579), UINT32_C(2134094887), UINT32_C( 404535734), UINT32_C(2034221371), UINT32_C(3960317580), UINT32_C( 591850571)), simde_x_mm256_set_epu32(UINT32_C( 570641927), UINT32_C(1821664575), UINT32_C( 96915915), UINT32_C( 55727396), UINT32_C(4162444437), UINT32_C(1669109680), UINT32_C(1870162202), UINT32_C(3082396888)), simde_x_mm256_set_epu32(UINT32_C(2173025589), UINT32_C(1814220363), UINT32_C( 160531664), UINT32_C(2078367491), UINT32_C( 537058593), UINT32_C( 365111691), UINT32_C(2090155378), UINT32_C(1804420979)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_x_mm256_sub_epu32(test_vec[i].a, test_vec[i].b); simde_assert_m256i_u32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_subs_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi8(INT8_C( 53), INT8_C( 22), INT8_C( -99), INT8_C( -90), INT8_C( 42), INT8_C( 84), INT8_C( 24), INT8_C( 60), INT8_C(-122), INT8_C( 44), INT8_C( -82), INT8_C( 91), INT8_C( -59), INT8_C(-107), INT8_C( -72), INT8_C( -97), INT8_C(-114), INT8_C( 65), INT8_C( -70), INT8_C( -44), INT8_C( -34), INT8_C( 83), INT8_C( 30), INT8_C( 3), INT8_C( 17), INT8_C( 66), INT8_C( 92), INT8_C( 52), INT8_C( -56), INT8_C( 94), INT8_C( 119), INT8_C( 94)), simde_mm256_set_epi8(INT8_C( 56), INT8_C( -13), INT8_C( -72), INT8_C( 29), INT8_C( 36), INT8_C( 53), INT8_C( 55), INT8_C( 96), INT8_C( 22), INT8_C( 124), INT8_C( 55), INT8_C( 67), INT8_C( 105), INT8_C( -7), INT8_C( 41), INT8_C( -17), INT8_C( 28), INT8_C( 17), INT8_C( -15), INT8_C( 70), INT8_C( 15), INT8_C( -38), INT8_C( 93), INT8_C( 109), INT8_C( 77), INT8_C(-109), INT8_C( 9), INT8_C( 58), INT8_C( -56), INT8_C( 8), INT8_C( 92), INT8_C( -88)), simde_mm256_set_epi8(INT8_C( -3), INT8_C( 35), INT8_C( -27), INT8_C(-119), INT8_C( 6), INT8_C( 31), INT8_C( -31), INT8_C( -36), INT8_C(-128), INT8_C( -80), INT8_C(-128), INT8_C( 24), INT8_C(-128), INT8_C(-100), INT8_C(-113), INT8_C( -80), INT8_C(-128), INT8_C( 48), INT8_C( -55), INT8_C(-114), INT8_C( -49), INT8_C( 121), INT8_C( -63), INT8_C(-106), INT8_C( -60), INT8_C( 127), INT8_C( 83), INT8_C( -6), INT8_C( 0), INT8_C( 86), INT8_C( 27), INT8_C( 127)) }, { simde_mm256_set_epi8(INT8_C( -93), INT8_C( -26), INT8_C( 0), INT8_C( 17), INT8_C(-120), INT8_C( 61), INT8_C(-102), INT8_C( 103), INT8_C( 79), INT8_C( -49), INT8_C( -72), INT8_C( 47), INT8_C( 110), INT8_C( -74), INT8_C( -56), INT8_C( 43), INT8_C( 93), INT8_C( 101), INT8_C( 5), INT8_C( -56), INT8_C( -87), INT8_C( 20), INT8_C(-114), INT8_C(-104), INT8_C(-119), INT8_C( 75), INT8_C(-125), INT8_C( -24), INT8_C( -47), INT8_C( -37), INT8_C( -74), INT8_C( -79)), simde_mm256_set_epi8(INT8_C( 29), INT8_C( 85), INT8_C( 95), INT8_C( -5), INT8_C( -3), INT8_C( 113), INT8_C( 45), INT8_C( -94), INT8_C( -94), INT8_C( -99), INT8_C( 85), INT8_C( 102), INT8_C( 95), INT8_C( 76), INT8_C( -35), INT8_C( -63), INT8_C( 20), INT8_C( -12), INT8_C( 14), INT8_C(-108), INT8_C( -3), INT8_C( -68), INT8_C( 90), INT8_C( 90), INT8_C( 39), INT8_C( 15), INT8_C( 34), INT8_C( -85), INT8_C(-115), INT8_C(-113), INT8_C( -50), INT8_C( 41)), simde_mm256_set_epi8(INT8_C(-122), INT8_C(-111), INT8_C( -95), INT8_C( 22), INT8_C(-117), INT8_C( -52), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 50), INT8_C(-128), INT8_C( -55), INT8_C( 15), INT8_C(-128), INT8_C( -21), INT8_C( 106), INT8_C( 73), INT8_C( 113), INT8_C( -9), INT8_C( 52), INT8_C( -84), INT8_C( 88), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 60), INT8_C(-128), INT8_C( 61), INT8_C( 68), INT8_C( 76), INT8_C( -24), INT8_C(-120)) }, { simde_mm256_set_epi8(INT8_C(-118), INT8_C( 5), INT8_C(-122), INT8_C( -40), INT8_C( -95), INT8_C( 64), INT8_C( 56), INT8_C(-112), INT8_C( -70), INT8_C( -39), INT8_C( 82), INT8_C( 89), INT8_C( -25), INT8_C( -48), INT8_C( 6), INT8_C( 73), INT8_C( -25), INT8_C( 89), INT8_C( 45), INT8_C( -40), INT8_C( 7), INT8_C( -14), INT8_C( 103), INT8_C( 37), INT8_C( -51), INT8_C( 59), INT8_C( -30), INT8_C( 28), INT8_C( -3), INT8_C( -99), INT8_C( 87), INT8_C( 38)), simde_mm256_set_epi8(INT8_C( -9), INT8_C( 27), INT8_C( -15), INT8_C(-127), INT8_C(-108), INT8_C( -58), INT8_C( -97), INT8_C( -56), INT8_C( 79), INT8_C( -35), INT8_C( -71), INT8_C( 22), INT8_C( -63), INT8_C(-121), INT8_C( -22), INT8_C( 67), INT8_C( 88), INT8_C( -40), INT8_C( -33), INT8_C( 36), INT8_C( -18), INT8_C( 91), INT8_C( 119), INT8_C( 49), INT8_C( 32), INT8_C(-120), INT8_C(-103), INT8_C( -96), INT8_C( 75), INT8_C( 92), INT8_C( 46), INT8_C( -24)), simde_mm256_set_epi8(INT8_C(-109), INT8_C( -22), INT8_C(-107), INT8_C( 87), INT8_C( 13), INT8_C( 122), INT8_C( 127), INT8_C( -56), INT8_C(-128), INT8_C( -4), INT8_C( 127), INT8_C( 67), INT8_C( 38), INT8_C( 73), INT8_C( 28), INT8_C( 6), INT8_C(-113), INT8_C( 127), INT8_C( 78), INT8_C( -76), INT8_C( 25), INT8_C(-105), INT8_C( -16), INT8_C( -12), INT8_C( -83), INT8_C( 127), INT8_C( 73), INT8_C( 124), INT8_C( -78), INT8_C(-128), INT8_C( 41), INT8_C( 62)) }, { simde_mm256_set_epi8(INT8_C( 44), INT8_C( 83), INT8_C( -32), INT8_C( 9), INT8_C( -69), INT8_C( 91), INT8_C( -61), INT8_C( 48), INT8_C( -44), INT8_C( 120), INT8_C( 118), INT8_C( -37), INT8_C(-112), INT8_C( -30), INT8_C( 84), INT8_C( 76), INT8_C( -78), INT8_C( 115), INT8_C( -63), INT8_C( -12), INT8_C( -25), INT8_C( 117), INT8_C( 23), INT8_C( -38), INT8_C( -3), INT8_C( 33), INT8_C( 83), INT8_C( -23), INT8_C( -34), INT8_C( 37), INT8_C( -85), INT8_C( -55)), simde_mm256_set_epi8(INT8_C( 88), INT8_C(-105), INT8_C( -90), INT8_C( 16), INT8_C(-116), INT8_C( -66), INT8_C( 122), INT8_C( 120), INT8_C( -39), INT8_C( 67), INT8_C( 124), INT8_C( 106), INT8_C( -41), INT8_C( 52), INT8_C( -14), INT8_C( -22), INT8_C(-117), INT8_C( -91), INT8_C( 105), INT8_C( 6), INT8_C(-100), INT8_C( 20), INT8_C( -55), INT8_C( 105), INT8_C( -43), INT8_C( 29), INT8_C( 33), INT8_C( -82), INT8_C(-126), INT8_C( 41), INT8_C( 41), INT8_C( -12)), simde_mm256_set_epi8(INT8_C( -44), INT8_C( 127), INT8_C( 58), INT8_C( -7), INT8_C( 47), INT8_C( 127), INT8_C(-128), INT8_C( -72), INT8_C( -5), INT8_C( 53), INT8_C( -6), INT8_C(-128), INT8_C( -71), INT8_C( -82), INT8_C( 98), INT8_C( 98), INT8_C( 39), INT8_C( 127), INT8_C(-128), INT8_C( -18), INT8_C( 75), INT8_C( 97), INT8_C( 78), INT8_C(-128), INT8_C( 40), INT8_C( 4), INT8_C( 50), INT8_C( 59), INT8_C( 92), INT8_C( -4), INT8_C(-126), INT8_C( -43)) }, { simde_mm256_set_epi8(INT8_C( 69), INT8_C( -19), INT8_C( -92), INT8_C( -51), INT8_C( 104), INT8_C( 20), INT8_C( 21), INT8_C( -35), INT8_C( -76), INT8_C( -66), INT8_C( 37), INT8_C( 59), INT8_C( 60), INT8_C(-100), INT8_C( 19), INT8_C( 125), INT8_C( 63), INT8_C( 96), INT8_C( -20), INT8_C( 53), INT8_C( 121), INT8_C( 115), INT8_C( -36), INT8_C( 95), INT8_C( 62), INT8_C( 99), INT8_C( -61), INT8_C( -76), INT8_C( 55), INT8_C( 5), INT8_C( -93), INT8_C( -19)), simde_mm256_set_epi8(INT8_C(-107), INT8_C( 27), INT8_C( 110), INT8_C( -66), INT8_C(-108), INT8_C( 103), INT8_C( -63), INT8_C( 96), INT8_C( 40), INT8_C( -37), INT8_C( 4), INT8_C( -24), INT8_C( -32), INT8_C( 47), INT8_C( 83), INT8_C( -56), INT8_C( -5), INT8_C(-125), INT8_C( 8), INT8_C( -20), INT8_C( -92), INT8_C( -17), INT8_C( -76), INT8_C(-105), INT8_C( -38), INT8_C( 10), INT8_C( 27), INT8_C( -81), INT8_C( 92), INT8_C( 92), INT8_C( 46), INT8_C(-100)), simde_mm256_set_epi8(INT8_C( 127), INT8_C( -46), INT8_C(-128), INT8_C( 15), INT8_C( 127), INT8_C( -83), INT8_C( 84), INT8_C(-128), INT8_C(-116), INT8_C( -29), INT8_C( 33), INT8_C( 83), INT8_C( 92), INT8_C(-128), INT8_C( -64), INT8_C( 127), INT8_C( 68), INT8_C( 127), INT8_C( -28), INT8_C( 73), INT8_C( 127), INT8_C( 127), INT8_C( 40), INT8_C( 127), INT8_C( 100), INT8_C( 89), INT8_C( -88), INT8_C( 5), INT8_C( -37), INT8_C( -87), INT8_C(-128), INT8_C( 81)) }, { simde_mm256_set_epi8(INT8_C( -50), INT8_C( 75), INT8_C( -14), INT8_C( 54), INT8_C( 43), INT8_C( -80), INT8_C(-125), INT8_C( 103), INT8_C( 80), INT8_C( -1), INT8_C( -59), INT8_C( -13), INT8_C(-111), INT8_C( 18), INT8_C( -92), INT8_C( 116), INT8_C( 12), INT8_C( -97), INT8_C( -84), INT8_C( -83), INT8_C( -61), INT8_C( 127), INT8_C( 102), INT8_C(-110), INT8_C( 1), INT8_C( 91), INT8_C( -75), INT8_C( -20), INT8_C( -58), INT8_C( -80), INT8_C( 101), INT8_C( 97)), simde_mm256_set_epi8(INT8_C( 15), INT8_C( -47), INT8_C( 64), INT8_C( 40), INT8_C( 22), INT8_C( 0), INT8_C( -93), INT8_C( 62), INT8_C( -55), INT8_C( 95), INT8_C( -45), INT8_C( -91), INT8_C( 44), INT8_C( 98), INT8_C( -37), INT8_C( -63), INT8_C( -64), INT8_C( 91), INT8_C(-118), INT8_C( 36), INT8_C(-114), INT8_C( 60), INT8_C( 43), INT8_C( -14), INT8_C( -9), INT8_C( -85), INT8_C( -51), INT8_C( -99), INT8_C(-120), INT8_C( 73), INT8_C( 109), INT8_C( 94)), simde_mm256_set_epi8(INT8_C( -65), INT8_C( 122), INT8_C( -78), INT8_C( 14), INT8_C( 21), INT8_C( -80), INT8_C( -32), INT8_C( 41), INT8_C( 127), INT8_C( -96), INT8_C( -14), INT8_C( 78), INT8_C(-128), INT8_C( -80), INT8_C( -55), INT8_C( 127), INT8_C( 76), INT8_C(-128), INT8_C( 34), INT8_C(-119), INT8_C( 53), INT8_C( 67), INT8_C( 59), INT8_C( -96), INT8_C( 10), INT8_C( 127), INT8_C( -24), INT8_C( 79), INT8_C( 62), INT8_C(-128), INT8_C( -8), INT8_C( 3)) }, { simde_mm256_set_epi8(INT8_C( -90), INT8_C( -41), INT8_C(-125), INT8_C( 28), INT8_C( -59), INT8_C( -49), INT8_C( 55), INT8_C(-108), INT8_C( 114), INT8_C(-122), INT8_C( 4), INT8_C(-102), INT8_C( 92), INT8_C( -39), INT8_C( -84), INT8_C( 119), INT8_C( -87), INT8_C( 8), INT8_C(-125), INT8_C( -67), INT8_C( 61), INT8_C( 70), INT8_C( -88), INT8_C( 46), INT8_C( -8), INT8_C( -33), INT8_C( 3), INT8_C( -52), INT8_C( -90), INT8_C( 122), INT8_C( 12), INT8_C( -37)), simde_mm256_set_epi8(INT8_C( 27), INT8_C(-128), INT8_C( -93), INT8_C( -38), INT8_C( 107), INT8_C( 58), INT8_C( 66), INT8_C( -13), INT8_C( 126), INT8_C( 106), INT8_C( 57), INT8_C( -95), INT8_C(-110), INT8_C( 38), INT8_C( -48), INT8_C(-109), INT8_C( 83), INT8_C( -82), INT8_C(-119), INT8_C( 15), INT8_C( 122), INT8_C( -6), INT8_C( -15), INT8_C( 81), INT8_C( -79), INT8_C( -92), INT8_C( 8), INT8_C( 40), INT8_C( 33), INT8_C( 96), INT8_C(-114), INT8_C(-116)), simde_mm256_set_epi8(INT8_C(-117), INT8_C( 87), INT8_C( -32), INT8_C( 66), INT8_C(-128), INT8_C(-107), INT8_C( -11), INT8_C( -95), INT8_C( -12), INT8_C(-128), INT8_C( -53), INT8_C( -7), INT8_C( 127), INT8_C( -77), INT8_C( -36), INT8_C( 127), INT8_C(-128), INT8_C( 90), INT8_C( -6), INT8_C( -82), INT8_C( -61), INT8_C( 76), INT8_C( -73), INT8_C( -35), INT8_C( 71), INT8_C( 59), INT8_C( -5), INT8_C( -92), INT8_C(-123), INT8_C( 26), INT8_C( 126), INT8_C( 79)) }, { simde_mm256_set_epi8(INT8_C( -33), INT8_C( 57), INT8_C( 32), INT8_C( 63), INT8_C( 13), INT8_C( 3), INT8_C(-124), INT8_C(-110), INT8_C(-104), INT8_C( -9), INT8_C( -20), INT8_C( 115), INT8_C(-123), INT8_C(-119), INT8_C( -41), INT8_C(-125), INT8_C( 125), INT8_C( -55), INT8_C( 104), INT8_C( 103), INT8_C( 40), INT8_C( 8), INT8_C(-109), INT8_C( 96), INT8_C( 126), INT8_C( 21), INT8_C( -61), INT8_C( 85), INT8_C( 120), INT8_C( 127), INT8_C( 54), INT8_C( 72)), simde_mm256_set_epi8(INT8_C( 8), INT8_C( -54), INT8_C( -21), INT8_C( 62), INT8_C( -44), INT8_C( -59), INT8_C( 7), INT8_C( 68), INT8_C( 127), INT8_C( -4), INT8_C( 119), INT8_C( 63), INT8_C( -49), INT8_C( 31), INT8_C( 25), INT8_C( -33), INT8_C( 45), INT8_C( 7), INT8_C( 0), INT8_C( 88), INT8_C( 47), INT8_C( -92), INT8_C( -21), INT8_C( 118), INT8_C( -41), INT8_C( 85), INT8_C( 93), INT8_C( 38), INT8_C( 60), INT8_C( 59), INT8_C( -96), INT8_C( -40)), simde_mm256_set_epi8(INT8_C( -41), INT8_C( 111), INT8_C( 53), INT8_C( 1), INT8_C( 57), INT8_C( 62), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( -5), INT8_C(-128), INT8_C( 52), INT8_C( -74), INT8_C(-128), INT8_C( -66), INT8_C( -92), INT8_C( 80), INT8_C( -62), INT8_C( 104), INT8_C( 15), INT8_C( -7), INT8_C( 100), INT8_C( -88), INT8_C( -22), INT8_C( 127), INT8_C( -64), INT8_C(-128), INT8_C( 47), INT8_C( 60), INT8_C( 68), INT8_C( 127), INT8_C( 112)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_subs_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_subs_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi16(INT16_C( 13590), INT16_C(-25178), INT16_C( 10836), INT16_C( 6204), INT16_C(-31188), INT16_C(-20901), INT16_C(-14955), INT16_C(-18273), INT16_C(-29119), INT16_C(-17708), INT16_C( -8621), INT16_C( 7683), INT16_C( 4418), INT16_C( 23604), INT16_C(-14242), INT16_C( 30558)), simde_mm256_set_epi16(INT16_C( 14579), INT16_C(-18403), INT16_C( 9269), INT16_C( 14176), INT16_C( 5756), INT16_C( 14147), INT16_C( 27129), INT16_C( 10735), INT16_C( 7185), INT16_C( -3770), INT16_C( 4058), INT16_C( 23917), INT16_C( 19859), INT16_C( 2362), INT16_C(-14328), INT16_C( 23720)), simde_mm256_set_epi16(INT16_C( -989), INT16_C( -6775), INT16_C( 1567), INT16_C( -7972), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C(-29008), INT16_C(-32768), INT16_C(-13938), INT16_C(-12679), INT16_C(-16234), INT16_C(-15441), INT16_C( 21242), INT16_C( 86), INT16_C( 6838)) }, { simde_mm256_set_epi16(INT16_C(-23578), INT16_C( 17), INT16_C(-30659), INT16_C(-26009), INT16_C( 20431), INT16_C(-18385), INT16_C( 28342), INT16_C(-14293), INT16_C( 23909), INT16_C( 1480), INT16_C(-22252), INT16_C(-29032), INT16_C(-30389), INT16_C(-31768), INT16_C(-11813), INT16_C(-18767)), simde_mm256_set_epi16(INT16_C( 7509), INT16_C( 24571), INT16_C( -655), INT16_C( 11682), INT16_C(-23907), INT16_C( 21862), INT16_C( 24396), INT16_C( -8767), INT16_C( 5364), INT16_C( 3732), INT16_C( -580), INT16_C( 23130), INT16_C( 9999), INT16_C( 8875), INT16_C(-29297), INT16_C(-12759)), simde_mm256_set_epi16(INT16_C(-31087), INT16_C(-24554), INT16_C(-30004), INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C( 3946), INT16_C( -5526), INT16_C( 18545), INT16_C( -2252), INT16_C(-21672), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C( 17484), INT16_C( -6008)) }, { simde_mm256_set_epi16(INT16_C(-30203), INT16_C(-31016), INT16_C(-24256), INT16_C( 14480), INT16_C(-17703), INT16_C( 21081), INT16_C( -6192), INT16_C( 1609), INT16_C( -6311), INT16_C( 11736), INT16_C( 2034), INT16_C( 26405), INT16_C(-12997), INT16_C( -7652), INT16_C( -611), INT16_C( 22310)), simde_mm256_set_epi16(INT16_C( -2277), INT16_C( -3711), INT16_C(-27450), INT16_C(-24632), INT16_C( 20445), INT16_C(-18154), INT16_C(-15993), INT16_C( -5565), INT16_C( 22744), INT16_C( -8412), INT16_C( -4517), INT16_C( 30513), INT16_C( 8328), INT16_C(-26208), INT16_C( 19292), INT16_C( 12008)), simde_mm256_set_epi16(INT16_C(-27926), INT16_C(-27305), INT16_C( 3194), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767), INT16_C( 9801), INT16_C( 7174), INT16_C(-29055), INT16_C( 20148), INT16_C( 6551), INT16_C( -4108), INT16_C(-21325), INT16_C( 18556), INT16_C(-19903), INT16_C( 10302)) }, { simde_mm256_set_epi16(INT16_C( 11347), INT16_C( -8183), INT16_C(-17573), INT16_C(-15568), INT16_C(-11144), INT16_C( 30427), INT16_C(-28446), INT16_C( 21580), INT16_C(-19853), INT16_C(-15884), INT16_C( -6283), INT16_C( 6106), INT16_C( -735), INT16_C( 21481), INT16_C( -8667), INT16_C(-21559)), simde_mm256_set_epi16(INT16_C( 22679), INT16_C(-23024), INT16_C(-29506), INT16_C( 31352), INT16_C( -9917), INT16_C( 31850), INT16_C(-10444), INT16_C( -3350), INT16_C(-29787), INT16_C( 26886), INT16_C(-25580), INT16_C(-13975), INT16_C(-10979), INT16_C( 8622), INT16_C(-32215), INT16_C( 10740)), simde_mm256_set_epi16(INT16_C(-11332), INT16_C( 14841), INT16_C( 11933), INT16_C(-32768), INT16_C( -1227), INT16_C( -1423), INT16_C(-18002), INT16_C( 24930), INT16_C( 9934), INT16_C(-32768), INT16_C( 19297), INT16_C( 20081), INT16_C( 10244), INT16_C( 12859), INT16_C( 23548), INT16_C(-32299)) }, { simde_mm256_set_epi16(INT16_C( 17901), INT16_C(-23347), INT16_C( 26644), INT16_C( 5597), INT16_C(-19266), INT16_C( 9531), INT16_C( 15516), INT16_C( 4989), INT16_C( 16224), INT16_C( -5067), INT16_C( 31091), INT16_C( -9121), INT16_C( 15971), INT16_C(-15436), INT16_C( 14085), INT16_C(-23571)), simde_mm256_set_epi16(INT16_C(-27365), INT16_C( 28350), INT16_C(-27545), INT16_C(-16032), INT16_C( 10459), INT16_C( 1256), INT16_C( -8145), INT16_C( 21448), INT16_C( -1149), INT16_C( 2284), INT16_C(-23313), INT16_C(-19305), INT16_C( -9718), INT16_C( 7087), INT16_C( 23644), INT16_C( 11932)), simde_mm256_set_epi16(INT16_C( 32767), INT16_C(-32768), INT16_C( 32767), INT16_C( 21629), INT16_C(-29725), INT16_C( 8275), INT16_C( 23661), INT16_C(-16459), INT16_C( 17373), INT16_C( -7351), INT16_C( 32767), INT16_C( 10184), INT16_C( 25689), INT16_C(-22523), INT16_C( -9559), INT16_C(-32768)) }, { simde_mm256_set_epi16(INT16_C(-12725), INT16_C( -3530), INT16_C( 11184), INT16_C(-31897), INT16_C( 20735), INT16_C(-14861), INT16_C(-28398), INT16_C(-23436), INT16_C( 3231), INT16_C(-21331), INT16_C(-15489), INT16_C( 26258), INT16_C( 347), INT16_C(-18964), INT16_C(-14672), INT16_C( 25953)), simde_mm256_set_epi16(INT16_C( 4049), INT16_C( 16424), INT16_C( 5632), INT16_C(-23746), INT16_C(-13985), INT16_C(-11355), INT16_C( 11362), INT16_C( -9279), INT16_C(-16293), INT16_C(-30172), INT16_C(-29124), INT16_C( 11250), INT16_C( -2133), INT16_C(-12899), INT16_C(-30647), INT16_C( 27998)), simde_mm256_set_epi16(INT16_C(-16774), INT16_C(-19954), INT16_C( 5552), INT16_C( -8151), INT16_C( 32767), INT16_C( -3506), INT16_C(-32768), INT16_C(-14157), INT16_C( 19524), INT16_C( 8841), INT16_C( 13635), INT16_C( 15008), INT16_C( 2480), INT16_C( -6065), INT16_C( 15975), INT16_C( -2045)) }, { simde_mm256_set_epi16(INT16_C(-22825), INT16_C(-31972), INT16_C(-14897), INT16_C( 14228), INT16_C( 29318), INT16_C( 1178), INT16_C( 23769), INT16_C(-21385), INT16_C(-22264), INT16_C(-31811), INT16_C( 15686), INT16_C(-22482), INT16_C( -1825), INT16_C( 972), INT16_C(-22918), INT16_C( 3291)), simde_mm256_set_epi16(INT16_C( 7040), INT16_C(-23590), INT16_C( 27450), INT16_C( 17139), INT16_C( 32362), INT16_C( 14753), INT16_C(-28122), INT16_C(-12141), INT16_C( 21422), INT16_C(-30449), INT16_C( 31482), INT16_C( -3759), INT16_C(-20060), INT16_C( 2088), INT16_C( 8544), INT16_C(-29044)), simde_mm256_set_epi16(INT16_C(-29865), INT16_C( -8382), INT16_C(-32768), INT16_C( -2911), INT16_C( -3044), INT16_C(-13575), INT16_C( 32767), INT16_C( -9244), INT16_C(-32768), INT16_C( -1362), INT16_C(-15796), INT16_C(-18723), INT16_C( 18235), INT16_C( -1116), INT16_C(-31462), INT16_C( 32335)) }, { simde_mm256_set_epi16(INT16_C( -8391), INT16_C( 8255), INT16_C( 3331), INT16_C(-31598), INT16_C(-26377), INT16_C( -5005), INT16_C(-31351), INT16_C(-10365), INT16_C( 32201), INT16_C( 26727), INT16_C( 10248), INT16_C(-27808), INT16_C( 32277), INT16_C(-15531), INT16_C( 30847), INT16_C( 13896)), simde_mm256_set_epi16(INT16_C( 2250), INT16_C( -5314), INT16_C(-11067), INT16_C( 1860), INT16_C( 32764), INT16_C( 30527), INT16_C(-12513), INT16_C( 6623), INT16_C( 11527), INT16_C( 88), INT16_C( 12196), INT16_C( -5258), INT16_C(-10411), INT16_C( 23846), INT16_C( 15419), INT16_C(-24360)), simde_mm256_set_epi16(INT16_C(-10641), INT16_C( 13569), INT16_C( 14398), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C(-18838), INT16_C(-16988), INT16_C( 20674), INT16_C( 26639), INT16_C( -1948), INT16_C(-22550), INT16_C( 32767), INT16_C(-32768), INT16_C( 15428), INT16_C( 32767)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_subs_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_subs_epu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_x_mm256_set_epu8(UINT8_C( 53), UINT8_C( 22), UINT8_C(157), UINT8_C(166), UINT8_C( 42), UINT8_C( 84), UINT8_C( 24), UINT8_C( 60), UINT8_C(134), UINT8_C( 44), UINT8_C(174), UINT8_C( 91), UINT8_C(197), UINT8_C(149), UINT8_C(184), UINT8_C(159), UINT8_C(142), UINT8_C( 65), UINT8_C(186), UINT8_C(212), UINT8_C(222), UINT8_C( 83), UINT8_C( 30), UINT8_C( 3), UINT8_C( 17), UINT8_C( 66), UINT8_C( 92), UINT8_C( 52), UINT8_C(200), UINT8_C( 94), UINT8_C(119), UINT8_C( 94)), simde_x_mm256_set_epu8(UINT8_C( 56), UINT8_C(243), UINT8_C(184), UINT8_C( 29), UINT8_C( 36), UINT8_C( 53), UINT8_C( 55), UINT8_C( 96), UINT8_C( 22), UINT8_C(124), UINT8_C( 55), UINT8_C( 67), UINT8_C(105), UINT8_C(249), UINT8_C( 41), UINT8_C(239), UINT8_C( 28), UINT8_C( 17), UINT8_C(241), UINT8_C( 70), UINT8_C( 15), UINT8_C(218), UINT8_C( 93), UINT8_C(109), UINT8_C( 77), UINT8_C(147), UINT8_C( 9), UINT8_C( 58), UINT8_C(200), UINT8_C( 8), UINT8_C( 92), UINT8_C(168)), simde_x_mm256_set_epu8(UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(137), UINT8_C( 6), UINT8_C( 31), UINT8_C( 0), UINT8_C( 0), UINT8_C(112), UINT8_C( 0), UINT8_C(119), UINT8_C( 24), UINT8_C( 92), UINT8_C( 0), UINT8_C(143), UINT8_C( 0), UINT8_C(114), UINT8_C( 48), UINT8_C( 0), UINT8_C(142), UINT8_C(207), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 83), UINT8_C( 0), UINT8_C( 0), UINT8_C( 86), UINT8_C( 27), UINT8_C( 0)) }, { simde_x_mm256_set_epu8(UINT8_C(163), UINT8_C(230), UINT8_C( 0), UINT8_C( 17), UINT8_C(136), UINT8_C( 61), UINT8_C(154), UINT8_C(103), UINT8_C( 79), UINT8_C(207), UINT8_C(184), UINT8_C( 47), UINT8_C(110), UINT8_C(182), UINT8_C(200), UINT8_C( 43), UINT8_C( 93), UINT8_C(101), UINT8_C( 5), UINT8_C(200), UINT8_C(169), UINT8_C( 20), UINT8_C(142), UINT8_C(152), UINT8_C(137), UINT8_C( 75), UINT8_C(131), UINT8_C(232), UINT8_C(209), UINT8_C(219), UINT8_C(182), UINT8_C(177)), simde_x_mm256_set_epu8(UINT8_C( 29), UINT8_C( 85), UINT8_C( 95), UINT8_C(251), UINT8_C(253), UINT8_C(113), UINT8_C( 45), UINT8_C(162), UINT8_C(162), UINT8_C(157), UINT8_C( 85), UINT8_C(102), UINT8_C( 95), UINT8_C( 76), UINT8_C(221), UINT8_C(193), UINT8_C( 20), UINT8_C(244), UINT8_C( 14), UINT8_C(148), UINT8_C(253), UINT8_C(188), UINT8_C( 90), UINT8_C( 90), UINT8_C( 39), UINT8_C( 15), UINT8_C( 34), UINT8_C(171), UINT8_C(141), UINT8_C(143), UINT8_C(206), UINT8_C( 41)), simde_x_mm256_set_epu8(UINT8_C(134), UINT8_C(145), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(109), UINT8_C( 0), UINT8_C( 0), UINT8_C( 50), UINT8_C( 99), UINT8_C( 0), UINT8_C( 15), UINT8_C(106), UINT8_C( 0), UINT8_C( 0), UINT8_C( 73), UINT8_C( 0), UINT8_C( 0), UINT8_C( 52), UINT8_C( 0), UINT8_C( 0), UINT8_C( 52), UINT8_C( 62), UINT8_C( 98), UINT8_C( 60), UINT8_C( 97), UINT8_C( 61), UINT8_C( 68), UINT8_C( 76), UINT8_C( 0), UINT8_C(136)) }, { simde_x_mm256_set_epu8(UINT8_C(138), UINT8_C( 5), UINT8_C(134), UINT8_C(216), UINT8_C(161), UINT8_C( 64), UINT8_C( 56), UINT8_C(144), UINT8_C(186), UINT8_C(217), UINT8_C( 82), UINT8_C( 89), UINT8_C(231), UINT8_C(208), UINT8_C( 6), UINT8_C( 73), UINT8_C(231), UINT8_C( 89), UINT8_C( 45), UINT8_C(216), UINT8_C( 7), UINT8_C(242), UINT8_C(103), UINT8_C( 37), UINT8_C(205), UINT8_C( 59), UINT8_C(226), UINT8_C( 28), UINT8_C(253), UINT8_C(157), UINT8_C( 87), UINT8_C( 38)), simde_x_mm256_set_epu8(UINT8_C(247), UINT8_C( 27), UINT8_C(241), UINT8_C(129), UINT8_C(148), UINT8_C(198), UINT8_C(159), UINT8_C(200), UINT8_C( 79), UINT8_C(221), UINT8_C(185), UINT8_C( 22), UINT8_C(193), UINT8_C(135), UINT8_C(234), UINT8_C( 67), UINT8_C( 88), UINT8_C(216), UINT8_C(223), UINT8_C( 36), UINT8_C(238), UINT8_C( 91), UINT8_C(119), UINT8_C( 49), UINT8_C( 32), UINT8_C(136), UINT8_C(153), UINT8_C(160), UINT8_C( 75), UINT8_C( 92), UINT8_C( 46), UINT8_C(232)), simde_x_mm256_set_epu8(UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 87), UINT8_C( 13), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(107), UINT8_C( 0), UINT8_C( 0), UINT8_C( 67), UINT8_C( 38), UINT8_C( 73), UINT8_C( 0), UINT8_C( 6), UINT8_C(143), UINT8_C( 0), UINT8_C( 0), UINT8_C(180), UINT8_C( 0), UINT8_C(151), UINT8_C( 0), UINT8_C( 0), UINT8_C(173), UINT8_C( 0), UINT8_C( 73), UINT8_C( 0), UINT8_C(178), UINT8_C( 65), UINT8_C( 41), UINT8_C( 0)) }, { simde_x_mm256_set_epu8(UINT8_C( 44), UINT8_C( 83), UINT8_C(224), UINT8_C( 9), UINT8_C(187), UINT8_C( 91), UINT8_C(195), UINT8_C( 48), UINT8_C(212), UINT8_C(120), UINT8_C(118), UINT8_C(219), UINT8_C(144), UINT8_C(226), UINT8_C( 84), UINT8_C( 76), UINT8_C(178), UINT8_C(115), UINT8_C(193), UINT8_C(244), UINT8_C(231), UINT8_C(117), UINT8_C( 23), UINT8_C(218), UINT8_C(253), UINT8_C( 33), UINT8_C( 83), UINT8_C(233), UINT8_C(222), UINT8_C( 37), UINT8_C(171), UINT8_C(201)), simde_x_mm256_set_epu8(UINT8_C( 88), UINT8_C(151), UINT8_C(166), UINT8_C( 16), UINT8_C(140), UINT8_C(190), UINT8_C(122), UINT8_C(120), UINT8_C(217), UINT8_C( 67), UINT8_C(124), UINT8_C(106), UINT8_C(215), UINT8_C( 52), UINT8_C(242), UINT8_C(234), UINT8_C(139), UINT8_C(165), UINT8_C(105), UINT8_C( 6), UINT8_C(156), UINT8_C( 20), UINT8_C(201), UINT8_C(105), UINT8_C(213), UINT8_C( 29), UINT8_C( 33), UINT8_C(174), UINT8_C(130), UINT8_C( 41), UINT8_C( 41), UINT8_C(244)), simde_x_mm256_set_epu8(UINT8_C( 0), UINT8_C( 0), UINT8_C( 58), UINT8_C( 0), UINT8_C( 47), UINT8_C( 0), UINT8_C( 73), UINT8_C( 0), UINT8_C( 0), UINT8_C( 53), UINT8_C( 0), UINT8_C(113), UINT8_C( 0), UINT8_C(174), UINT8_C( 0), UINT8_C( 0), UINT8_C( 39), UINT8_C( 0), UINT8_C( 88), UINT8_C(238), UINT8_C( 75), UINT8_C( 97), UINT8_C( 0), UINT8_C(113), UINT8_C( 40), UINT8_C( 4), UINT8_C( 50), UINT8_C( 59), UINT8_C( 92), UINT8_C( 0), UINT8_C(130), UINT8_C( 0)) }, { simde_x_mm256_set_epu8(UINT8_C( 69), UINT8_C(237), UINT8_C(164), UINT8_C(205), UINT8_C(104), UINT8_C( 20), UINT8_C( 21), UINT8_C(221), UINT8_C(180), UINT8_C(190), UINT8_C( 37), UINT8_C( 59), UINT8_C( 60), UINT8_C(156), UINT8_C( 19), UINT8_C(125), UINT8_C( 63), UINT8_C( 96), UINT8_C(236), UINT8_C( 53), UINT8_C(121), UINT8_C(115), UINT8_C(220), UINT8_C( 95), UINT8_C( 62), UINT8_C( 99), UINT8_C(195), UINT8_C(180), UINT8_C( 55), UINT8_C( 5), UINT8_C(163), UINT8_C(237)), simde_x_mm256_set_epu8(UINT8_C(149), UINT8_C( 27), UINT8_C(110), UINT8_C(190), UINT8_C(148), UINT8_C(103), UINT8_C(193), UINT8_C( 96), UINT8_C( 40), UINT8_C(219), UINT8_C( 4), UINT8_C(232), UINT8_C(224), UINT8_C( 47), UINT8_C( 83), UINT8_C(200), UINT8_C(251), UINT8_C(131), UINT8_C( 8), UINT8_C(236), UINT8_C(164), UINT8_C(239), UINT8_C(180), UINT8_C(151), UINT8_C(218), UINT8_C( 10), UINT8_C( 27), UINT8_C(175), UINT8_C( 92), UINT8_C( 92), UINT8_C( 46), UINT8_C(156)), simde_x_mm256_set_epu8(UINT8_C( 0), UINT8_C(210), UINT8_C( 54), UINT8_C( 15), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(125), UINT8_C(140), UINT8_C( 0), UINT8_C( 33), UINT8_C( 0), UINT8_C( 0), UINT8_C(109), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(228), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 40), UINT8_C( 0), UINT8_C( 0), UINT8_C( 89), UINT8_C(168), UINT8_C( 5), UINT8_C( 0), UINT8_C( 0), UINT8_C(117), UINT8_C( 81)) }, { simde_x_mm256_set_epu8(UINT8_C(206), UINT8_C( 75), UINT8_C(242), UINT8_C( 54), UINT8_C( 43), UINT8_C(176), UINT8_C(131), UINT8_C(103), UINT8_C( 80), UINT8_C(255), UINT8_C(197), UINT8_C(243), UINT8_C(145), UINT8_C( 18), UINT8_C(164), UINT8_C(116), UINT8_C( 12), UINT8_C(159), UINT8_C(172), UINT8_C(173), UINT8_C(195), UINT8_C(127), UINT8_C(102), UINT8_C(146), UINT8_C( 1), UINT8_C( 91), UINT8_C(181), UINT8_C(236), UINT8_C(198), UINT8_C(176), UINT8_C(101), UINT8_C( 97)), simde_x_mm256_set_epu8(UINT8_C( 15), UINT8_C(209), UINT8_C( 64), UINT8_C( 40), UINT8_C( 22), UINT8_C( 0), UINT8_C(163), UINT8_C( 62), UINT8_C(201), UINT8_C( 95), UINT8_C(211), UINT8_C(165), UINT8_C( 44), UINT8_C( 98), UINT8_C(219), UINT8_C(193), UINT8_C(192), UINT8_C( 91), UINT8_C(138), UINT8_C( 36), UINT8_C(142), UINT8_C( 60), UINT8_C( 43), UINT8_C(242), UINT8_C(247), UINT8_C(171), UINT8_C(205), UINT8_C(157), UINT8_C(136), UINT8_C( 73), UINT8_C(109), UINT8_C( 94)), simde_x_mm256_set_epu8(UINT8_C(191), UINT8_C( 0), UINT8_C(178), UINT8_C( 14), UINT8_C( 21), UINT8_C(176), UINT8_C( 0), UINT8_C( 41), UINT8_C( 0), UINT8_C(160), UINT8_C( 0), UINT8_C( 78), UINT8_C(101), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 68), UINT8_C( 34), UINT8_C(137), UINT8_C( 53), UINT8_C( 67), UINT8_C( 59), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 79), UINT8_C( 62), UINT8_C(103), UINT8_C( 0), UINT8_C( 3)) }, { simde_x_mm256_set_epu8(UINT8_C(166), UINT8_C(215), UINT8_C(131), UINT8_C( 28), UINT8_C(197), UINT8_C(207), UINT8_C( 55), UINT8_C(148), UINT8_C(114), UINT8_C(134), UINT8_C( 4), UINT8_C(154), UINT8_C( 92), UINT8_C(217), UINT8_C(172), UINT8_C(119), UINT8_C(169), UINT8_C( 8), UINT8_C(131), UINT8_C(189), UINT8_C( 61), UINT8_C( 70), UINT8_C(168), UINT8_C( 46), UINT8_C(248), UINT8_C(223), UINT8_C( 3), UINT8_C(204), UINT8_C(166), UINT8_C(122), UINT8_C( 12), UINT8_C(219)), simde_x_mm256_set_epu8(UINT8_C( 27), UINT8_C(128), UINT8_C(163), UINT8_C(218), UINT8_C(107), UINT8_C( 58), UINT8_C( 66), UINT8_C(243), UINT8_C(126), UINT8_C(106), UINT8_C( 57), UINT8_C(161), UINT8_C(146), UINT8_C( 38), UINT8_C(208), UINT8_C(147), UINT8_C( 83), UINT8_C(174), UINT8_C(137), UINT8_C( 15), UINT8_C(122), UINT8_C(250), UINT8_C(241), UINT8_C( 81), UINT8_C(177), UINT8_C(164), UINT8_C( 8), UINT8_C( 40), UINT8_C( 33), UINT8_C( 96), UINT8_C(142), UINT8_C(140)), simde_x_mm256_set_epu8(UINT8_C(139), UINT8_C( 87), UINT8_C( 0), UINT8_C( 0), UINT8_C( 90), UINT8_C(149), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 28), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(179), UINT8_C( 0), UINT8_C( 0), UINT8_C( 86), UINT8_C( 0), UINT8_C( 0), UINT8_C(174), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 71), UINT8_C( 59), UINT8_C( 0), UINT8_C(164), UINT8_C(133), UINT8_C( 26), UINT8_C( 0), UINT8_C( 79)) }, { simde_x_mm256_set_epu8(UINT8_C(223), UINT8_C( 57), UINT8_C( 32), UINT8_C( 63), UINT8_C( 13), UINT8_C( 3), UINT8_C(132), UINT8_C(146), UINT8_C(152), UINT8_C(247), UINT8_C(236), UINT8_C(115), UINT8_C(133), UINT8_C(137), UINT8_C(215), UINT8_C(131), UINT8_C(125), UINT8_C(201), UINT8_C(104), UINT8_C(103), UINT8_C( 40), UINT8_C( 8), UINT8_C(147), UINT8_C( 96), UINT8_C(126), UINT8_C( 21), UINT8_C(195), UINT8_C( 85), UINT8_C(120), UINT8_C(127), UINT8_C( 54), UINT8_C( 72)), simde_x_mm256_set_epu8(UINT8_C( 8), UINT8_C(202), UINT8_C(235), UINT8_C( 62), UINT8_C(212), UINT8_C(197), UINT8_C( 7), UINT8_C( 68), UINT8_C(127), UINT8_C(252), UINT8_C(119), UINT8_C( 63), UINT8_C(207), UINT8_C( 31), UINT8_C( 25), UINT8_C(223), UINT8_C( 45), UINT8_C( 7), UINT8_C( 0), UINT8_C( 88), UINT8_C( 47), UINT8_C(164), UINT8_C(235), UINT8_C(118), UINT8_C(215), UINT8_C( 85), UINT8_C( 93), UINT8_C( 38), UINT8_C( 60), UINT8_C( 59), UINT8_C(160), UINT8_C(216)), simde_x_mm256_set_epu8(UINT8_C(215), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C(125), UINT8_C( 78), UINT8_C( 25), UINT8_C( 0), UINT8_C(117), UINT8_C( 52), UINT8_C( 0), UINT8_C(106), UINT8_C(190), UINT8_C( 0), UINT8_C( 80), UINT8_C(194), UINT8_C(104), UINT8_C( 15), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(102), UINT8_C( 47), UINT8_C( 60), UINT8_C( 68), UINT8_C( 0), UINT8_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_subs_epu8(test_vec[i].a, test_vec[i].b); simde_assert_m256i_u8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_subs_epu16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_x_mm256_set_epu16(UINT16_C( 13590), UINT16_C( 40358), UINT16_C( 10836), UINT16_C( 6204), UINT16_C( 34348), UINT16_C( 44635), UINT16_C( 50581), UINT16_C( 47263), UINT16_C( 36417), UINT16_C( 47828), UINT16_C( 56915), UINT16_C( 7683), UINT16_C( 4418), UINT16_C( 23604), UINT16_C( 51294), UINT16_C( 30558)), simde_x_mm256_set_epu16(UINT16_C( 14579), UINT16_C( 47133), UINT16_C( 9269), UINT16_C( 14176), UINT16_C( 5756), UINT16_C( 14147), UINT16_C( 27129), UINT16_C( 10735), UINT16_C( 7185), UINT16_C( 61766), UINT16_C( 4058), UINT16_C( 23917), UINT16_C( 19859), UINT16_C( 2362), UINT16_C( 51208), UINT16_C( 23720)), simde_x_mm256_set_epu16(UINT16_C( 0), UINT16_C( 0), UINT16_C( 1567), UINT16_C( 0), UINT16_C( 28592), UINT16_C( 30488), UINT16_C( 23452), UINT16_C( 36528), UINT16_C( 29232), UINT16_C( 0), UINT16_C( 52857), UINT16_C( 0), UINT16_C( 0), UINT16_C( 21242), UINT16_C( 86), UINT16_C( 6838)) }, { simde_x_mm256_set_epu16(UINT16_C( 41958), UINT16_C( 17), UINT16_C( 34877), UINT16_C( 39527), UINT16_C( 20431), UINT16_C( 47151), UINT16_C( 28342), UINT16_C( 51243), UINT16_C( 23909), UINT16_C( 1480), UINT16_C( 43284), UINT16_C( 36504), UINT16_C( 35147), UINT16_C( 33768), UINT16_C( 53723), UINT16_C( 46769)), simde_x_mm256_set_epu16(UINT16_C( 7509), UINT16_C( 24571), UINT16_C( 64881), UINT16_C( 11682), UINT16_C( 41629), UINT16_C( 21862), UINT16_C( 24396), UINT16_C( 56769), UINT16_C( 5364), UINT16_C( 3732), UINT16_C( 64956), UINT16_C( 23130), UINT16_C( 9999), UINT16_C( 8875), UINT16_C( 36239), UINT16_C( 52777)), simde_x_mm256_set_epu16(UINT16_C( 34449), UINT16_C( 0), UINT16_C( 0), UINT16_C( 27845), UINT16_C( 0), UINT16_C( 25289), UINT16_C( 3946), UINT16_C( 0), UINT16_C( 18545), UINT16_C( 0), UINT16_C( 0), UINT16_C( 13374), UINT16_C( 25148), UINT16_C( 24893), UINT16_C( 17484), UINT16_C( 0)) }, { simde_x_mm256_set_epu16(UINT16_C( 35333), UINT16_C( 34520), UINT16_C( 41280), UINT16_C( 14480), UINT16_C( 47833), UINT16_C( 21081), UINT16_C( 59344), UINT16_C( 1609), UINT16_C( 59225), UINT16_C( 11736), UINT16_C( 2034), UINT16_C( 26405), UINT16_C( 52539), UINT16_C( 57884), UINT16_C( 64925), UINT16_C( 22310)), simde_x_mm256_set_epu16(UINT16_C( 63259), UINT16_C( 61825), UINT16_C( 38086), UINT16_C( 40904), UINT16_C( 20445), UINT16_C( 47382), UINT16_C( 49543), UINT16_C( 59971), UINT16_C( 22744), UINT16_C( 57124), UINT16_C( 61019), UINT16_C( 30513), UINT16_C( 8328), UINT16_C( 39328), UINT16_C( 19292), UINT16_C( 12008)), simde_x_mm256_set_epu16(UINT16_C( 0), UINT16_C( 0), UINT16_C( 3194), UINT16_C( 0), UINT16_C( 27388), UINT16_C( 0), UINT16_C( 9801), UINT16_C( 0), UINT16_C( 36481), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 44211), UINT16_C( 18556), UINT16_C( 45633), UINT16_C( 10302)) }, { simde_x_mm256_set_epu16(UINT16_C( 11347), UINT16_C( 57353), UINT16_C( 47963), UINT16_C( 49968), UINT16_C( 54392), UINT16_C( 30427), UINT16_C( 37090), UINT16_C( 21580), UINT16_C( 45683), UINT16_C( 49652), UINT16_C( 59253), UINT16_C( 6106), UINT16_C( 64801), UINT16_C( 21481), UINT16_C( 56869), UINT16_C( 43977)), simde_x_mm256_set_epu16(UINT16_C( 22679), UINT16_C( 42512), UINT16_C( 36030), UINT16_C( 31352), UINT16_C( 55619), UINT16_C( 31850), UINT16_C( 55092), UINT16_C( 62186), UINT16_C( 35749), UINT16_C( 26886), UINT16_C( 39956), UINT16_C( 51561), UINT16_C( 54557), UINT16_C( 8622), UINT16_C( 33321), UINT16_C( 10740)), simde_x_mm256_set_epu16(UINT16_C( 0), UINT16_C( 14841), UINT16_C( 11933), UINT16_C( 18616), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 9934), UINT16_C( 22766), UINT16_C( 19297), UINT16_C( 0), UINT16_C( 10244), UINT16_C( 12859), UINT16_C( 23548), UINT16_C( 33237)) }, { simde_x_mm256_set_epu16(UINT16_C( 17901), UINT16_C( 42189), UINT16_C( 26644), UINT16_C( 5597), UINT16_C( 46270), UINT16_C( 9531), UINT16_C( 15516), UINT16_C( 4989), UINT16_C( 16224), UINT16_C( 60469), UINT16_C( 31091), UINT16_C( 56415), UINT16_C( 15971), UINT16_C( 50100), UINT16_C( 14085), UINT16_C( 41965)), simde_x_mm256_set_epu16(UINT16_C( 38171), UINT16_C( 28350), UINT16_C( 37991), UINT16_C( 49504), UINT16_C( 10459), UINT16_C( 1256), UINT16_C( 57391), UINT16_C( 21448), UINT16_C( 64387), UINT16_C( 2284), UINT16_C( 42223), UINT16_C( 46231), UINT16_C( 55818), UINT16_C( 7087), UINT16_C( 23644), UINT16_C( 11932)), simde_x_mm256_set_epu16(UINT16_C( 0), UINT16_C( 13839), UINT16_C( 0), UINT16_C( 0), UINT16_C( 35811), UINT16_C( 8275), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 58185), UINT16_C( 0), UINT16_C( 10184), UINT16_C( 0), UINT16_C( 43013), UINT16_C( 0), UINT16_C( 30033)) }, { simde_x_mm256_set_epu16(UINT16_C( 52811), UINT16_C( 62006), UINT16_C( 11184), UINT16_C( 33639), UINT16_C( 20735), UINT16_C( 50675), UINT16_C( 37138), UINT16_C( 42100), UINT16_C( 3231), UINT16_C( 44205), UINT16_C( 50047), UINT16_C( 26258), UINT16_C( 347), UINT16_C( 46572), UINT16_C( 50864), UINT16_C( 25953)), simde_x_mm256_set_epu16(UINT16_C( 4049), UINT16_C( 16424), UINT16_C( 5632), UINT16_C( 41790), UINT16_C( 51551), UINT16_C( 54181), UINT16_C( 11362), UINT16_C( 56257), UINT16_C( 49243), UINT16_C( 35364), UINT16_C( 36412), UINT16_C( 11250), UINT16_C( 63403), UINT16_C( 52637), UINT16_C( 34889), UINT16_C( 27998)), simde_x_mm256_set_epu16(UINT16_C( 48762), UINT16_C( 45582), UINT16_C( 5552), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 25776), UINT16_C( 0), UINT16_C( 0), UINT16_C( 8841), UINT16_C( 13635), UINT16_C( 15008), UINT16_C( 0), UINT16_C( 0), UINT16_C( 15975), UINT16_C( 0)) }, { simde_x_mm256_set_epu16(UINT16_C( 42711), UINT16_C( 33564), UINT16_C( 50639), UINT16_C( 14228), UINT16_C( 29318), UINT16_C( 1178), UINT16_C( 23769), UINT16_C( 44151), UINT16_C( 43272), UINT16_C( 33725), UINT16_C( 15686), UINT16_C( 43054), UINT16_C( 63711), UINT16_C( 972), UINT16_C( 42618), UINT16_C( 3291)), simde_x_mm256_set_epu16(UINT16_C( 7040), UINT16_C( 41946), UINT16_C( 27450), UINT16_C( 17139), UINT16_C( 32362), UINT16_C( 14753), UINT16_C( 37414), UINT16_C( 53395), UINT16_C( 21422), UINT16_C( 35087), UINT16_C( 31482), UINT16_C( 61777), UINT16_C( 45476), UINT16_C( 2088), UINT16_C( 8544), UINT16_C( 36492)), simde_x_mm256_set_epu16(UINT16_C( 35671), UINT16_C( 0), UINT16_C( 23189), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 21850), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 18235), UINT16_C( 0), UINT16_C( 34074), UINT16_C( 0)) }, { simde_x_mm256_set_epu16(UINT16_C( 57145), UINT16_C( 8255), UINT16_C( 3331), UINT16_C( 33938), UINT16_C( 39159), UINT16_C( 60531), UINT16_C( 34185), UINT16_C( 55171), UINT16_C( 32201), UINT16_C( 26727), UINT16_C( 10248), UINT16_C( 37728), UINT16_C( 32277), UINT16_C( 50005), UINT16_C( 30847), UINT16_C( 13896)), simde_x_mm256_set_epu16(UINT16_C( 2250), UINT16_C( 60222), UINT16_C( 54469), UINT16_C( 1860), UINT16_C( 32764), UINT16_C( 30527), UINT16_C( 53023), UINT16_C( 6623), UINT16_C( 11527), UINT16_C( 88), UINT16_C( 12196), UINT16_C( 60278), UINT16_C( 55125), UINT16_C( 23846), UINT16_C( 15419), UINT16_C( 41176)), simde_x_mm256_set_epu16(UINT16_C( 54895), UINT16_C( 0), UINT16_C( 0), UINT16_C( 32078), UINT16_C( 6395), UINT16_C( 30004), UINT16_C( 0), UINT16_C( 48548), UINT16_C( 20674), UINT16_C( 26639), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 26159), UINT16_C( 15428), UINT16_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_subs_epu16(test_vec[i].a, test_vec[i].b); simde_assert_m256i_u16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_unpacklo_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi8(INT8_C(-101), INT8_C( 92), INT8_C( -29), INT8_C( 114), INT8_C( -9), INT8_C( 115), INT8_C( -85), INT8_C( 76), INT8_C( 113), INT8_C( 104), INT8_C( 13), INT8_C( 120), INT8_C( -16), INT8_C( 113), INT8_C( -42), INT8_C( -27), INT8_C( 62), INT8_C(-102), INT8_C(-110), INT8_C( 104), INT8_C( -25), INT8_C( -66), INT8_C( 36), INT8_C( -49), INT8_C( 43), INT8_C( 7), INT8_C(-118), INT8_C( 87), INT8_C(-107), INT8_C(-121), INT8_C(-123), INT8_C( -99)), simde_mm256_set_epi8(INT8_C( -70), INT8_C( 56), INT8_C( 114), INT8_C(-120), INT8_C( 3), INT8_C( -84), INT8_C( -87), INT8_C(-109), INT8_C( 89), INT8_C( 30), INT8_C( 125), INT8_C( 67), INT8_C( 97), INT8_C( -34), INT8_C( -11), INT8_C( -10), INT8_C( 107), INT8_C( 105), INT8_C( 6), INT8_C( 27), INT8_C( -81), INT8_C( 56), INT8_C( 18), INT8_C( -60), INT8_C(-101), INT8_C( -87), INT8_C( 94), INT8_C( -75), INT8_C( 11), INT8_C( 36), INT8_C( -58), INT8_C( 50)), simde_mm256_set_epi8(INT8_C( 89), INT8_C( 113), INT8_C( 30), INT8_C( 104), INT8_C( 125), INT8_C( 13), INT8_C( 67), INT8_C( 120), INT8_C( 97), INT8_C( -16), INT8_C( -34), INT8_C( 113), INT8_C( -11), INT8_C( -42), INT8_C( -10), INT8_C( -27), INT8_C(-101), INT8_C( 43), INT8_C( -87), INT8_C( 7), INT8_C( 94), INT8_C(-118), INT8_C( -75), INT8_C( 87), INT8_C( 11), INT8_C(-107), INT8_C( 36), INT8_C(-121), INT8_C( -58), INT8_C(-123), INT8_C( 50), INT8_C( -99)) }, { simde_mm256_set_epi8(INT8_C( -67), INT8_C( -9), INT8_C( 17), INT8_C( 1), INT8_C( 81), INT8_C( -53), INT8_C( 31), INT8_C( -39), INT8_C( 73), INT8_C( 67), INT8_C( -55), INT8_C( 6), INT8_C( 10), INT8_C( 40), INT8_C(-106), INT8_C( 72), INT8_C( -42), INT8_C( -10), INT8_C( -43), INT8_C( 90), INT8_C( -58), INT8_C(-121), INT8_C( 29), INT8_C( -98), INT8_C(-102), INT8_C( 94), INT8_C( -84), INT8_C( -30), INT8_C( 85), INT8_C( 81), INT8_C( 3), INT8_C( -25)), simde_mm256_set_epi8(INT8_C( -61), INT8_C( -12), INT8_C( -31), INT8_C( 32), INT8_C(-113), INT8_C( 84), INT8_C(-127), INT8_C( -26), INT8_C( 16), INT8_C( -35), INT8_C( 19), INT8_C( 122), INT8_C( 69), INT8_C( -65), INT8_C( 100), INT8_C( 2), INT8_C( 26), INT8_C( -65), INT8_C( 14), INT8_C( 116), INT8_C( 122), INT8_C( 9), INT8_C( 6), INT8_C( -16), INT8_C( 63), INT8_C( 30), INT8_C( 83), INT8_C( 98), INT8_C( 14), INT8_C( 101), INT8_C( -11), INT8_C( -35)), simde_mm256_set_epi8(INT8_C( 16), INT8_C( 73), INT8_C( -35), INT8_C( 67), INT8_C( 19), INT8_C( -55), INT8_C( 122), INT8_C( 6), INT8_C( 69), INT8_C( 10), INT8_C( -65), INT8_C( 40), INT8_C( 100), INT8_C(-106), INT8_C( 2), INT8_C( 72), INT8_C( 63), INT8_C(-102), INT8_C( 30), INT8_C( 94), INT8_C( 83), INT8_C( -84), INT8_C( 98), INT8_C( -30), INT8_C( 14), INT8_C( 85), INT8_C( 101), INT8_C( 81), INT8_C( -11), INT8_C( 3), INT8_C( -35), INT8_C( -25)) }, { simde_mm256_set_epi8(INT8_C( 79), INT8_C( 15), INT8_C( 49), INT8_C(-100), INT8_C( -26), INT8_C( -68), INT8_C( -90), INT8_C( -20), INT8_C( -86), INT8_C( 47), INT8_C( -77), INT8_C( -34), INT8_C( 8), INT8_C( 109), INT8_C( -5), INT8_C( 7), INT8_C( 27), INT8_C( 55), INT8_C( 9), INT8_C( -91), INT8_C(-121), INT8_C( -45), INT8_C( 90), INT8_C( 41), INT8_C( 35), INT8_C( 41), INT8_C( -27), INT8_C( 76), INT8_C( 18), INT8_C( 110), INT8_C( 20), INT8_C( 91)), simde_mm256_set_epi8(INT8_C(-107), INT8_C( 5), INT8_C( 63), INT8_C( -15), INT8_C( -24), INT8_C( 33), INT8_C( -61), INT8_C( 5), INT8_C( -26), INT8_C( 28), INT8_C( 111), INT8_C( -8), INT8_C( 5), INT8_C( -1), INT8_C( 57), INT8_C( 116), INT8_C( -59), INT8_C( 116), INT8_C( 15), INT8_C(-110), INT8_C( -91), INT8_C( -64), INT8_C( 54), INT8_C( 99), INT8_C( 58), INT8_C( -68), INT8_C( -48), INT8_C( -80), INT8_C( -28), INT8_C( -99), INT8_C( 100), INT8_C( -81)), simde_mm256_set_epi8(INT8_C( -26), INT8_C( -86), INT8_C( 28), INT8_C( 47), INT8_C( 111), INT8_C( -77), INT8_C( -8), INT8_C( -34), INT8_C( 5), INT8_C( 8), INT8_C( -1), INT8_C( 109), INT8_C( 57), INT8_C( -5), INT8_C( 116), INT8_C( 7), INT8_C( 58), INT8_C( 35), INT8_C( -68), INT8_C( 41), INT8_C( -48), INT8_C( -27), INT8_C( -80), INT8_C( 76), INT8_C( -28), INT8_C( 18), INT8_C( -99), INT8_C( 110), INT8_C( 100), INT8_C( 20), INT8_C( -81), INT8_C( 91)) }, { simde_mm256_set_epi8(INT8_C( 111), INT8_C( 64), INT8_C( 83), INT8_C( 89), INT8_C( 53), INT8_C( 69), INT8_C( 18), INT8_C(-112), INT8_C( -39), INT8_C( 62), INT8_C(-127), INT8_C( 93), INT8_C( 40), INT8_C( -88), INT8_C( 34), INT8_C( 56), INT8_C( -85), INT8_C( 98), INT8_C( 123), INT8_C( -14), INT8_C( -70), INT8_C( -22), INT8_C( 32), INT8_C( 109), INT8_C( 94), INT8_C(-121), INT8_C(-110), INT8_C( 114), INT8_C( -62), INT8_C( 22), INT8_C( -32), INT8_C( 70)), simde_mm256_set_epi8(INT8_C( -35), INT8_C( 57), INT8_C( -30), INT8_C( 91), INT8_C( -17), INT8_C(-121), INT8_C(-121), INT8_C( -63), INT8_C( 125), INT8_C( 10), INT8_C( -28), INT8_C(-117), INT8_C( 12), INT8_C( 112), INT8_C( 23), INT8_C( 38), INT8_C( -40), INT8_C( 22), INT8_C(-121), INT8_C(-107), INT8_C( 1), INT8_C( 89), INT8_C( 11), INT8_C(-119), INT8_C( -63), INT8_C( 67), INT8_C( 93), INT8_C( -28), INT8_C( 24), INT8_C( 106), INT8_C(-124), INT8_C( 25)), simde_mm256_set_epi8(INT8_C( 125), INT8_C( -39), INT8_C( 10), INT8_C( 62), INT8_C( -28), INT8_C(-127), INT8_C(-117), INT8_C( 93), INT8_C( 12), INT8_C( 40), INT8_C( 112), INT8_C( -88), INT8_C( 23), INT8_C( 34), INT8_C( 38), INT8_C( 56), INT8_C( -63), INT8_C( 94), INT8_C( 67), INT8_C(-121), INT8_C( 93), INT8_C(-110), INT8_C( -28), INT8_C( 114), INT8_C( 24), INT8_C( -62), INT8_C( 106), INT8_C( 22), INT8_C(-124), INT8_C( -32), INT8_C( 25), INT8_C( 70)) }, { simde_mm256_set_epi8(INT8_C(-124), INT8_C( 32), INT8_C( 75), INT8_C( 56), INT8_C( -51), INT8_C( -59), INT8_C( -9), INT8_C( -19), INT8_C( 70), INT8_C(-112), INT8_C( -88), INT8_C( -25), INT8_C( 65), INT8_C( -16), INT8_C( 87), INT8_C( 72), INT8_C( -17), INT8_C( 111), INT8_C(-124), INT8_C( -55), INT8_C( 48), INT8_C( 3), INT8_C( -70), INT8_C( 97), INT8_C( -90), INT8_C(-126), INT8_C( -20), INT8_C( 124), INT8_C( -5), INT8_C( -13), INT8_C( 118), INT8_C( 93)), simde_mm256_set_epi8(INT8_C(-114), INT8_C( -2), INT8_C( 68), INT8_C( -83), INT8_C( 91), INT8_C( 102), INT8_C( -24), INT8_C( 67), INT8_C( -95), INT8_C( 7), INT8_C( 96), INT8_C( 35), INT8_C( -10), INT8_C( -45), INT8_C( -47), INT8_C( -75), INT8_C( 15), INT8_C( 123), INT8_C( -29), INT8_C(-121), INT8_C(-102), INT8_C( 78), INT8_C( 104), INT8_C( 76), INT8_C(-103), INT8_C( 86), INT8_C( 88), INT8_C(-113), INT8_C( -62), INT8_C( 94), INT8_C( -48), INT8_C(-103)), simde_mm256_set_epi8(INT8_C( -95), INT8_C( 70), INT8_C( 7), INT8_C(-112), INT8_C( 96), INT8_C( -88), INT8_C( 35), INT8_C( -25), INT8_C( -10), INT8_C( 65), INT8_C( -45), INT8_C( -16), INT8_C( -47), INT8_C( 87), INT8_C( -75), INT8_C( 72), INT8_C(-103), INT8_C( -90), INT8_C( 86), INT8_C(-126), INT8_C( 88), INT8_C( -20), INT8_C(-113), INT8_C( 124), INT8_C( -62), INT8_C( -5), INT8_C( 94), INT8_C( -13), INT8_C( -48), INT8_C( 118), INT8_C(-103), INT8_C( 93)) }, { simde_mm256_set_epi8(INT8_C( -90), INT8_C(-125), INT8_C(-122), INT8_C( -6), INT8_C( 94), INT8_C( 38), INT8_C( -80), INT8_C( 45), INT8_C( 120), INT8_C( 2), INT8_C( 29), INT8_C( -37), INT8_C( -49), INT8_C( 112), INT8_C( -83), INT8_C( 43), INT8_C( 51), INT8_C( 92), INT8_C( -2), INT8_C( 110), INT8_C( -26), INT8_C( -17), INT8_C( -37), INT8_C(-121), INT8_C(-105), INT8_C( -8), INT8_C( 57), INT8_C( -68), INT8_C( 115), INT8_C( -11), INT8_C( -39), INT8_C( -63)), simde_mm256_set_epi8(INT8_C( -64), INT8_C( 39), INT8_C( 25), INT8_C( 3), INT8_C( 122), INT8_C( -60), INT8_C( -66), INT8_C( -73), INT8_C( 0), INT8_C( 41), INT8_C( 40), INT8_C( 56), INT8_C( 75), INT8_C( -32), INT8_C( -94), INT8_C( 17), INT8_C( 121), INT8_C(-101), INT8_C( 114), INT8_C( -11), INT8_C( 87), INT8_C( -91), INT8_C( -73), INT8_C(-128), INT8_C( -38), INT8_C(-107), INT8_C( 46), INT8_C( 109), INT8_C( -12), INT8_C(-126), INT8_C( -31), INT8_C( -11)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 120), INT8_C( 41), INT8_C( 2), INT8_C( 40), INT8_C( 29), INT8_C( 56), INT8_C( -37), INT8_C( 75), INT8_C( -49), INT8_C( -32), INT8_C( 112), INT8_C( -94), INT8_C( -83), INT8_C( 17), INT8_C( 43), INT8_C( -38), INT8_C(-105), INT8_C(-107), INT8_C( -8), INT8_C( 46), INT8_C( 57), INT8_C( 109), INT8_C( -68), INT8_C( -12), INT8_C( 115), INT8_C(-126), INT8_C( -11), INT8_C( -31), INT8_C( -39), INT8_C( -11), INT8_C( -63)) }, { simde_mm256_set_epi8(INT8_C( 63), INT8_C( 108), INT8_C( 117), INT8_C( 111), INT8_C(-116), INT8_C(-118), INT8_C( 82), INT8_C(-121), INT8_C( 68), INT8_C( -92), INT8_C( -35), INT8_C( 7), INT8_C( -96), INT8_C( 9), INT8_C( -94), INT8_C( -59), INT8_C( 32), INT8_C( -9), INT8_C( -80), INT8_C(-123), INT8_C(-100), INT8_C( 52), INT8_C( 39), INT8_C( -97), INT8_C( 73), INT8_C( -67), INT8_C( 65), INT8_C( 4), INT8_C( -56), INT8_C( -71), INT8_C( 7), INT8_C( -5)), simde_mm256_set_epi8(INT8_C( 102), INT8_C( 68), INT8_C( -62), INT8_C( 99), INT8_C(-116), INT8_C( -55), INT8_C( 58), INT8_C( -12), INT8_C( 76), INT8_C( 54), INT8_C( 104), INT8_C( 91), INT8_C( 97), INT8_C(-106), INT8_C( 52), INT8_C( 126), INT8_C( -63), INT8_C(-121), INT8_C( 81), INT8_C( -68), INT8_C( 43), INT8_C( 94), INT8_C( -45), INT8_C( -65), INT8_C( 106), INT8_C( 44), INT8_C( 113), INT8_C( -16), INT8_C( 99), INT8_C( 23), INT8_C( -7), INT8_C( 117)), simde_mm256_set_epi8(INT8_C( 76), INT8_C( 68), INT8_C( 54), INT8_C( -92), INT8_C( 104), INT8_C( -35), INT8_C( 91), INT8_C( 7), INT8_C( 97), INT8_C( -96), INT8_C(-106), INT8_C( 9), INT8_C( 52), INT8_C( -94), INT8_C( 126), INT8_C( -59), INT8_C( 106), INT8_C( 73), INT8_C( 44), INT8_C( -67), INT8_C( 113), INT8_C( 65), INT8_C( -16), INT8_C( 4), INT8_C( 99), INT8_C( -56), INT8_C( 23), INT8_C( -71), INT8_C( -7), INT8_C( 7), INT8_C( 117), INT8_C( -5)) }, { simde_mm256_set_epi8(INT8_C( 104), INT8_C( 34), INT8_C( -70), INT8_C( -26), INT8_C( -69), INT8_C( 81), INT8_C( -50), INT8_C( 4), INT8_C( -60), INT8_C( -88), INT8_C( -4), INT8_C( -63), INT8_C( -86), INT8_C(-128), INT8_C( 17), INT8_C( -71), INT8_C( 58), INT8_C( 49), INT8_C( 70), INT8_C( -5), INT8_C( -33), INT8_C( -24), INT8_C( -68), INT8_C( 51), INT8_C( -53), INT8_C( -74), INT8_C( 121), INT8_C( 64), INT8_C( 107), INT8_C( 46), INT8_C( 97), INT8_C( -31)), simde_mm256_set_epi8(INT8_C( -50), INT8_C( 41), INT8_C( -4), INT8_C( 50), INT8_C( 14), INT8_C( 6), INT8_C( 5), INT8_C( -94), INT8_C( 117), INT8_C(-125), INT8_C( -37), INT8_C(-117), INT8_C( 55), INT8_C(-105), INT8_C( 11), INT8_C(-118), INT8_C( -38), INT8_C( 101), INT8_C( 50), INT8_C( 56), INT8_C( -29), INT8_C( -1), INT8_C( 78), INT8_C( 43), INT8_C( 76), INT8_C( -69), INT8_C( 38), INT8_C( -37), INT8_C( 55), INT8_C( -69), INT8_C( 85), INT8_C( 113)), simde_mm256_set_epi8(INT8_C( 117), INT8_C( -60), INT8_C(-125), INT8_C( -88), INT8_C( -37), INT8_C( -4), INT8_C(-117), INT8_C( -63), INT8_C( 55), INT8_C( -86), INT8_C(-105), INT8_C(-128), INT8_C( 11), INT8_C( 17), INT8_C(-118), INT8_C( -71), INT8_C( 76), INT8_C( -53), INT8_C( -69), INT8_C( -74), INT8_C( 38), INT8_C( 121), INT8_C( -37), INT8_C( 64), INT8_C( 55), INT8_C( 107), INT8_C( -69), INT8_C( 46), INT8_C( 85), INT8_C( 97), INT8_C( 113), INT8_C( -31)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_unpacklo_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_unpacklo_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi16(INT16_C(-30282), INT16_C( 17545), INT16_C( 1397), INT16_C(-23688), INT16_C( 7095), INT16_C(-13226), INT16_C( 16225), INT16_C( 24624), INT16_C(-19105), INT16_C( 16354), INT16_C(-30269), INT16_C(-10683), INT16_C( 8943), INT16_C(-21230), INT16_C(-20477), INT16_C(-15650)), simde_mm256_set_epi16(INT16_C(-25398), INT16_C(-19610), INT16_C(-11289), INT16_C( 28083), INT16_C(-11420), INT16_C( -1651), INT16_C(-26110), INT16_C( 32276), INT16_C( 2653), INT16_C( 16175), INT16_C( 2453), INT16_C(-13949), INT16_C( -3232), INT16_C(-27909), INT16_C( 187), INT16_C( 30423)), simde_mm256_set_epi16(INT16_C(-11420), INT16_C( 7095), INT16_C( -1651), INT16_C(-13226), INT16_C(-26110), INT16_C( 16225), INT16_C( 32276), INT16_C( 24624), INT16_C( -3232), INT16_C( 8943), INT16_C(-27909), INT16_C(-21230), INT16_C( 187), INT16_C(-20477), INT16_C( 30423), INT16_C(-15650)) }, { simde_mm256_set_epi16(INT16_C( 21698), INT16_C(-28557), INT16_C(-10383), INT16_C( 5893), INT16_C( 262), INT16_C(-15994), INT16_C( 10150), INT16_C(-16695), INT16_C(-27992), INT16_C( 19356), INT16_C(-16467), INT16_C( 25602), INT16_C( -8537), INT16_C( 32448), INT16_C( 8648), INT16_C( -2538)), simde_mm256_set_epi16(INT16_C(-10809), INT16_C( 31334), INT16_C(-23341), INT16_C(-23517), INT16_C( 24971), INT16_C( -2967), INT16_C( 85), INT16_C( -6374), INT16_C( 12369), INT16_C( 2809), INT16_C( 24485), INT16_C( 26961), INT16_C(-13324), INT16_C( 1302), INT16_C( 20638), INT16_C(-30060)), simde_mm256_set_epi16(INT16_C( 24971), INT16_C( 262), INT16_C( -2967), INT16_C(-15994), INT16_C( 85), INT16_C( 10150), INT16_C( -6374), INT16_C(-16695), INT16_C(-13324), INT16_C( -8537), INT16_C( 1302), INT16_C( 32448), INT16_C( 20638), INT16_C( 8648), INT16_C(-30060), INT16_C( -2538)) }, { simde_mm256_set_epi16(INT16_C( 29708), INT16_C( 973), INT16_C(-18396), INT16_C( -4069), INT16_C( -8794), INT16_C( 11568), INT16_C( 791), INT16_C( 2488), INT16_C( -5218), INT16_C(-22271), INT16_C(-28848), INT16_C( 10219), INT16_C( 23398), INT16_C( 32588), INT16_C( 20014), INT16_C( 27330)), simde_mm256_set_epi16(INT16_C( 2064), INT16_C( 8959), INT16_C( 21574), INT16_C(-11590), INT16_C( 29752), INT16_C( -9389), INT16_C(-30933), INT16_C( 2430), INT16_C(-26152), INT16_C( 21194), INT16_C(-30448), INT16_C( -9544), INT16_C(-11845), INT16_C(-20406), INT16_C( 31145), INT16_C( 11486)), simde_mm256_set_epi16(INT16_C( 29752), INT16_C( -8794), INT16_C( -9389), INT16_C( 11568), INT16_C(-30933), INT16_C( 791), INT16_C( 2430), INT16_C( 2488), INT16_C(-11845), INT16_C( 23398), INT16_C(-20406), INT16_C( 32588), INT16_C( 31145), INT16_C( 20014), INT16_C( 11486), INT16_C( 27330)) }, { simde_mm256_set_epi16(INT16_C( 6440), INT16_C( 24154), INT16_C( 3465), INT16_C( -9568), INT16_C( 32223), INT16_C(-15547), INT16_C(-13940), INT16_C(-23416), INT16_C( 1226), INT16_C( 3044), INT16_C(-16083), INT16_C( -2151), INT16_C( 18066), INT16_C( 29407), INT16_C( 22180), INT16_C( 8673)), simde_mm256_set_epi16(INT16_C( 32190), INT16_C( -2276), INT16_C( 14786), INT16_C( 21157), INT16_C(-13808), INT16_C(-14345), INT16_C( -8569), INT16_C( 28230), INT16_C( 4254), INT16_C(-24931), INT16_C( 188), INT16_C(-22513), INT16_C( 28262), INT16_C(-17377), INT16_C(-27018), INT16_C(-20998)), simde_mm256_set_epi16(INT16_C(-13808), INT16_C( 32223), INT16_C(-14345), INT16_C(-15547), INT16_C( -8569), INT16_C(-13940), INT16_C( 28230), INT16_C(-23416), INT16_C( 28262), INT16_C( 18066), INT16_C(-17377), INT16_C( 29407), INT16_C(-27018), INT16_C( 22180), INT16_C(-20998), INT16_C( 8673)) }, { simde_mm256_set_epi16(INT16_C( 21433), INT16_C(-32745), INT16_C(-20857), INT16_C( 9689), INT16_C(-19620), INT16_C( 27874), INT16_C(-12579), INT16_C(-11273), INT16_C( -7990), INT16_C( -5188), INT16_C( -2308), INT16_C(-28807), INT16_C( 30803), INT16_C(-31644), INT16_C(-17815), INT16_C( 25631)), simde_mm256_set_epi16(INT16_C( -6103), INT16_C( 30452), INT16_C( 26648), INT16_C(-28159), INT16_C( 7856), INT16_C(-23604), INT16_C(-21584), INT16_C( 8163), INT16_C(-18173), INT16_C( 4449), INT16_C(-19194), INT16_C( 32687), INT16_C( 13328), INT16_C( 5053), INT16_C(-14292), INT16_C( 9840)), simde_mm256_set_epi16(INT16_C( 7856), INT16_C(-19620), INT16_C(-23604), INT16_C( 27874), INT16_C(-21584), INT16_C(-12579), INT16_C( 8163), INT16_C(-11273), INT16_C( 13328), INT16_C( 30803), INT16_C( 5053), INT16_C(-31644), INT16_C(-14292), INT16_C(-17815), INT16_C( 9840), INT16_C( 25631)) }, { simde_mm256_set_epi16(INT16_C( 12319), INT16_C( 2668), INT16_C( 15777), INT16_C(-15674), INT16_C(-27625), INT16_C( -9913), INT16_C(-25103), INT16_C( 18516), INT16_C( -2788), INT16_C(-28181), INT16_C( 690), INT16_C(-21029), INT16_C( -9342), INT16_C( -7022), INT16_C( -3964), INT16_C( 27684)), simde_mm256_set_epi16(INT16_C(-23966), INT16_C( 20005), INT16_C(-26957), INT16_C( 9742), INT16_C(-30820), INT16_C( 24145), INT16_C( 4042), INT16_C( 6816), INT16_C(-17740), INT16_C( 1939), INT16_C( -2041), INT16_C(-32516), INT16_C(-25602), INT16_C( 15914), INT16_C( 5185), INT16_C(-12371)), simde_mm256_set_epi16(INT16_C(-30820), INT16_C(-27625), INT16_C( 24145), INT16_C( -9913), INT16_C( 4042), INT16_C(-25103), INT16_C( 6816), INT16_C( 18516), INT16_C(-25602), INT16_C( -9342), INT16_C( 15914), INT16_C( -7022), INT16_C( 5185), INT16_C( -3964), INT16_C(-12371), INT16_C( 27684)) }, { simde_mm256_set_epi16(INT16_C( -5299), INT16_C( -9685), INT16_C( 9755), INT16_C(-16148), INT16_C(-31255), INT16_C( 18778), INT16_C( 18350), INT16_C( 31236), INT16_C(-10664), INT16_C( 12773), INT16_C( 30044), INT16_C(-23199), INT16_C( 22014), INT16_C( 1980), INT16_C( 29718), INT16_C(-21712)), simde_mm256_set_epi16(INT16_C( 26040), INT16_C( 22288), INT16_C( 6550), INT16_C(-26515), INT16_C(-19807), INT16_C(-18397), INT16_C(-22874), INT16_C( 407), INT16_C(-20286), INT16_C( 30014), INT16_C( 13089), INT16_C( 23053), INT16_C(-13726), INT16_C( 7887), INT16_C(-22384), INT16_C(-14795)), simde_mm256_set_epi16(INT16_C(-19807), INT16_C(-31255), INT16_C(-18397), INT16_C( 18778), INT16_C(-22874), INT16_C( 18350), INT16_C( 407), INT16_C( 31236), INT16_C(-13726), INT16_C( 22014), INT16_C( 7887), INT16_C( 1980), INT16_C(-22384), INT16_C( 29718), INT16_C(-14795), INT16_C(-21712)) }, { simde_mm256_set_epi16(INT16_C( 31115), INT16_C( -4108), INT16_C( -8080), INT16_C( 29018), INT16_C(-17821), INT16_C(-27725), INT16_C( 1365), INT16_C( 15677), INT16_C( 9784), INT16_C(-29292), INT16_C( 18208), INT16_C(-29194), INT16_C( 11347), INT16_C( -8693), INT16_C( -8615), INT16_C( 13549)), simde_mm256_set_epi16(INT16_C(-31719), INT16_C( 9678), INT16_C( 3398), INT16_C( 25207), INT16_C(-26413), INT16_C( -2632), INT16_C( 30768), INT16_C( 11317), INT16_C( 21819), INT16_C( 22008), INT16_C( 24268), INT16_C( 4358), INT16_C(-28281), INT16_C( 31671), INT16_C( 9126), INT16_C( -4199)), simde_mm256_set_epi16(INT16_C(-26413), INT16_C(-17821), INT16_C( -2632), INT16_C(-27725), INT16_C( 30768), INT16_C( 1365), INT16_C( 11317), INT16_C( 15677), INT16_C(-28281), INT16_C( 11347), INT16_C( 31671), INT16_C( -8693), INT16_C( 9126), INT16_C( -8615), INT16_C( -4199), INT16_C( 13549)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_unpacklo_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_unpacklo_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C( 1634006754), INT32_C( 2027673276), INT32_C(-1004966058), INT32_C( 31035471), INT32_C( 582697150), INT32_C( -865564811), INT32_C(-1835008447), INT32_C( 1804896535)), simde_mm256_set_epi32(INT32_C( 250556499), INT32_C(-1998071312), INT32_C(-1032788603), INT32_C(-1564323608), INT32_C(-2109612931), INT32_C(-1991486451), INT32_C(-1624617327), INT32_C( -572875248)), simde_mm256_set_epi32(INT32_C(-1032788603), INT32_C(-1004966058), INT32_C(-1564323608), INT32_C( 31035471), INT32_C(-1624617327), INT32_C(-1835008447), INT32_C( -572875248), INT32_C( 1804896535)) }, { simde_mm256_set_epi32(INT32_C( 949978713), INT32_C(-1147362485), INT32_C( 854099489), INT32_C( -769233801), INT32_C(-2080267368), INT32_C( 2025215151), INT32_C( 1944502882), INT32_C( 1998396566)), simde_mm256_set_epi32(INT32_C( 1131236119), INT32_C(-1871822227), INT32_C(-1378954726), INT32_C( -808581099), INT32_C( 114286940), INT32_C(-2068340600), INT32_C( 1176100740), INT32_C(-1711799535)), simde_mm256_set_epi32(INT32_C(-1378954726), INT32_C( 854099489), INT32_C( -808581099), INT32_C( -769233801), INT32_C( 1176100740), INT32_C( 1944502882), INT32_C(-1711799535), INT32_C( 1998396566)) }, { simde_mm256_set_epi32(INT32_C( -539215325), INT32_C( 1386192905), INT32_C( 2045489389), INT32_C( -233070788), INT32_C(-1110980092), INT32_C(-1773587744), INT32_C(-1877041680), INT32_C( 1118926695)), simde_mm256_set_epi32(INT32_C( 1837258736), INT32_C( 777251927), INT32_C( -528822409), INT32_C( 652807806), INT32_C( -855377965), INT32_C( 2001307829), INT32_C( 222072460), INT32_C( -954219379)), simde_mm256_set_epi32(INT32_C( -528822409), INT32_C( 2045489389), INT32_C( 652807806), INT32_C( -233070788), INT32_C( 222072460), INT32_C(-1877041680), INT32_C( -954219379), INT32_C( 1118926695)) }, { simde_mm256_set_epi32(INT32_C( 1171398216), INT32_C(-1815838821), INT32_C( 612082553), INT32_C( -598227397), INT32_C( 1917370778), INT32_C(-1469217853), INT32_C(-1242194963), INT32_C(-1540218008)), simde_mm256_set_epi32(INT32_C( 1716361405), INT32_C( 1201391077), INT32_C( -200251042), INT32_C( 92359436), INT32_C(-1697948463), INT32_C(-1859110046), INT32_C( 1319052809), INT32_C( -520980879)), simde_mm256_set_epi32(INT32_C( -200251042), INT32_C( 612082553), INT32_C( 92359436), INT32_C( -598227397), INT32_C( 1319052809), INT32_C(-1242194963), INT32_C( -520980879), INT32_C(-1540218008)) }, { simde_mm256_set_epi32(INT32_C(-1628276207), INT32_C( -52307206), INT32_C( -204071811), INT32_C( -323316344), INT32_C( -993779627), INT32_C( 1107422469), INT32_C(-2067672829), INT32_C(-1768371428)), simde_mm256_set_epi32(INT32_C(-1719696812), INT32_C(-1577841127), INT32_C(-1009880980), INT32_C( -766826796), INT32_C( 1275907717), INT32_C(-1042662611), INT32_C( -491358494), INT32_C( 421707156)), simde_mm256_set_epi32(INT32_C(-1009880980), INT32_C( -204071811), INT32_C( -766826796), INT32_C( -323316344), INT32_C( -491358494), INT32_C(-2067672829), INT32_C( 421707156), INT32_C(-1768371428)) }, { simde_mm256_set_epi32(INT32_C(-1254036215), INT32_C( 377006130), INT32_C( 1193383293), INT32_C(-1422821080), INT32_C( -739482809), INT32_C( 2014379887), INT32_C(-1242086604), INT32_C(-1197757953)), simde_mm256_set_epi32(INT32_C( 1288082582), INT32_C( 1674663179), INT32_C( -228351353), INT32_C( -941607417), INT32_C( 416281156), INT32_C(-1292618170), INT32_C( 1488480694), INT32_C( 327047972)), simde_mm256_set_epi32(INT32_C( -228351353), INT32_C( 1193383293), INT32_C( -941607417), INT32_C(-1422821080), INT32_C( 1488480694), INT32_C(-1242086604), INT32_C( 327047972), INT32_C(-1197757953)) }, { simde_mm256_set_epi32(INT32_C( 1508573201), INT32_C( 969937333), INT32_C( 14444014), INT32_C( 108302280), INT32_C(-1520907964), INT32_C( -22983897), INT32_C( 1816713030), INT32_C(-1177538461)), simde_mm256_set_epi32(INT32_C(-1104336423), INT32_C( 271712214), INT32_C( -354649539), INT32_C( 1013182006), INT32_C( 1412833129), INT32_C(-1419706355), INT32_C( 1282557791), INT32_C(-1472463953)), simde_mm256_set_epi32(INT32_C( -354649539), INT32_C( 14444014), INT32_C( 1013182006), INT32_C( 108302280), INT32_C( 1282557791), INT32_C( 1816713030), INT32_C(-1472463953), INT32_C(-1177538461)) }, { simde_mm256_set_epi32(INT32_C( -66937609), INT32_C( -119592026), INT32_C( 1432494483), INT32_C( -124579914), INT32_C( -238722129), INT32_C( -220704743), INT32_C( 419566334), INT32_C( 422590012)), simde_mm256_set_epi32(INT32_C( -454663334), INT32_C( 1184045083), INT32_C( 1025965918), INT32_C( -854328465), INT32_C( -19322018), INT32_C( 1969810936), INT32_C( -266337614), INT32_C( 51340398)), simde_mm256_set_epi32(INT32_C( 1025965918), INT32_C( 1432494483), INT32_C( -854328465), INT32_C( -124579914), INT32_C( -266337614), INT32_C( 419566334), INT32_C( 51340398), INT32_C( 422590012)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_unpacklo_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_unpacklo_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi64x(INT64_C( 7018005571900790460), INT64_C(-4316296352669003697), INT64_C( 2502665206151808885), INT64_C(-7881301265943852777)), simde_mm256_set_epi64x(INT64_C( 1076131971302152688), INT64_C(-4435793270835883800), INT64_C(-9060718543560223731), INT64_C(-6977678284257845744)), simde_mm256_set_epi64x(INT64_C(-4435793270835883800), INT64_C(-4316296352669003697), INT64_C(-6977678284257845744), INT64_C(-7881301265943852777)) }, { simde_mm256_set_epi64x(INT64_C( 4080127507378774859), INT64_C( 3668329376311045239), INT64_C(-8934680310470781777), INT64_C( 8351576287166143638)), simde_mm256_set_epi64x(INT64_C( 4858622137582109293), INT64_C(-5922565447348254699), INT64_C( 490858671886540936), INT64_C( 5051314217684566801)), simde_mm256_set_epi64x(INT64_C(-5922565447348254699), INT64_C( 3668329376311045239), INT64_C( 5051314217684566801), INT64_C( 8351576287166143638)) }, { simde_mm256_set_epi64x(INT64_C(-2315912184990818295), INT64_C( 8785310034131918652), INT64_C(-4771623159125691680), INT64_C(-8061832627709970585)), simde_mm256_set_epi64x(INT64_C( 7890966186187549783), INT64_C(-2271274951394128258), INT64_C(-3673820383392724811), INT64_C( 953793956383016077)), simde_mm256_set_epi64x(INT64_C(-2271274951394128258), INT64_C( 8785310034131918652), INT64_C( 953793956383016077), INT64_C(-8061832627709970585)) }, { simde_mm256_set_epi64x(INT64_C( 5031117030791872411), INT64_C( 2628874551283926587), INT64_C( 8235044788641825731), INT64_C(-5335186738586180760)), simde_mm256_set_epi64x(INT64_C( 7371716103793001957), INT64_C( -860071676287562996), INT64_C(-7292633116442608798), INT64_C( 5665288680125920881)), simde_mm256_set_epi64x(INT64_C( -860071676287562996), INT64_C( 2628874551283926587), INT64_C( 5665288680125920881), INT64_C(-5335186738586180760)) }, { simde_mm256_set_epi64x(INT64_C(-6993393053677266182), INT64_C( -876481750308842104), INT64_C(-4268250996288656123), INT64_C(-8880587176856204516)), simde_mm256_set_epi64x(INT64_C(-7386041563858334183), INT64_C(-4337405778424289580), INT64_C( 5479981920481327917), INT64_C(-2110368661920105068)), simde_mm256_set_epi64x(INT64_C(-4337405778424289580), INT64_C( -876481750308842104), INT64_C(-2110368661920105068), INT64_C(-8880587176856204516)) }, { simde_mm256_set_epi64x(INT64_C(-5386044531047618510), INT64_C( 5125542217899931944), INT64_C(-3176054478594834577), INT64_C(-5334721339882493441)), simde_mm256_set_epi64x(INT64_C( 5532272565911901451), INT64_C( -980761589778991609), INT64_C( 1787913953963423302), INT64_C( 6392975901784431396)), simde_mm256_set_epi64x(INT64_C( -980761589778991609), INT64_C( 5125542217899931944), INT64_C( 6392975901784431396), INT64_C(-5334721339882493441)) }, { simde_mm256_set_epi64x(INT64_C( 6479272562886971829), INT64_C( 62036567861268424), INT64_C(-6532249961333961945), INT64_C( 7802723053184495715)), simde_mm256_set_epi64x(INT64_C(-4743088820294909994), INT64_C(-1523208170533294538), INT64_C( 6068072086635610125), INT64_C( 5508543770397506479)), simde_mm256_set_epi64x(INT64_C(-1523208170533294538), INT64_C( 62036567861268424), INT64_C( 5508543770397506479), INT64_C( 7802723053184495715)) }, { simde_mm256_set_epi64x(INT64_C( -287494837352059994), INT64_C( 6152516960355815350), INT64_C(-1025303732812230631), INT64_C( 1802023683455202876)), simde_mm256_set_epi64x(INT64_C(-1952764149036279781), INT64_C( 4406490068061256559), INT64_C( -82987433432912392), INT64_C(-1143911341773331346)), simde_mm256_set_epi64x(INT64_C( 4406490068061256559), INT64_C( 6152516960355815350), INT64_C(-1143911341773331346), INT64_C( 1802023683455202876)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_unpacklo_epi64(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_unpackhi_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi8(INT8_C( 87), INT8_C( 102), INT8_C( -8), INT8_C( -64), INT8_C( 127), INT8_C( 58), INT8_C( 96), INT8_C( 103), INT8_C( -98), INT8_C(-118), INT8_C( 55), INT8_C( 100), INT8_C( -28), INT8_C( 103), INT8_C( -55), INT8_C( 11), INT8_C( 6), INT8_C(-121), INT8_C( -30), INT8_C( -49), INT8_C( 125), INT8_C( 108), INT8_C( 76), INT8_C( -98), INT8_C( 55), INT8_C( -83), INT8_C( -57), INT8_C( 124), INT8_C( 121), INT8_C( 85), INT8_C( 102), INT8_C( 78)), simde_mm256_set_epi8(INT8_C( 3), INT8_C( 85), INT8_C( 69), INT8_C( -24), INT8_C(-108), INT8_C( 112), INT8_C( 108), INT8_C( -48), INT8_C( 87), INT8_C( 127), INT8_C( -59), INT8_C( -27), INT8_C( -42), INT8_C( 90), INT8_C(-117), INT8_C( -35), INT8_C( 78), INT8_C(-126), INT8_C( 39), INT8_C( 121), INT8_C( 106), INT8_C( -53), INT8_C( -82), INT8_C( -49), INT8_C( -76), INT8_C( 83), INT8_C( 68), INT8_C( 126), INT8_C( -36), INT8_C(-105), INT8_C( 91), INT8_C( -27)), simde_mm256_set_epi8(INT8_C( 3), INT8_C( 87), INT8_C( 85), INT8_C( 102), INT8_C( 69), INT8_C( -8), INT8_C( -24), INT8_C( -64), INT8_C(-108), INT8_C( 127), INT8_C( 112), INT8_C( 58), INT8_C( 108), INT8_C( 96), INT8_C( -48), INT8_C( 103), INT8_C( 78), INT8_C( 6), INT8_C(-126), INT8_C(-121), INT8_C( 39), INT8_C( -30), INT8_C( 121), INT8_C( -49), INT8_C( 106), INT8_C( 125), INT8_C( -53), INT8_C( 108), INT8_C( -82), INT8_C( 76), INT8_C( -49), INT8_C( -98)) }, { simde_mm256_set_epi8(INT8_C( 112), INT8_C( 90), INT8_C( 24), INT8_C( 102), INT8_C(-123), INT8_C( 60), INT8_C( -57), INT8_C( 10), INT8_C( -8), INT8_C( 91), INT8_C( 56), INT8_C( 10), INT8_C(-127), INT8_C( -25), INT8_C( 40), INT8_C(-120), INT8_C( 99), INT8_C(-103), INT8_C(-112), INT8_C( -7), INT8_C( 115), INT8_C( -59), INT8_C( 49), INT8_C( -54), INT8_C( -41), INT8_C( -25), INT8_C( -96), INT8_C( 16), INT8_C( -43), INT8_C( -2), INT8_C(-114), INT8_C( 89)), simde_mm256_set_epi8(INT8_C( 20), INT8_C( 32), INT8_C( -27), INT8_C( -28), INT8_C( 86), INT8_C( -18), INT8_C( 1), INT8_C( -10), INT8_C( -40), INT8_C( 83), INT8_C( -44), INT8_C(-102), INT8_C( 46), INT8_C( 76), INT8_C( 10), INT8_C( 66), INT8_C( -76), INT8_C( 18), INT8_C( 56), INT8_C( 13), INT8_C(-103), INT8_C( -2), INT8_C( -25), INT8_C( -5), INT8_C( 74), INT8_C( -5), INT8_C( -3), INT8_C(-123), INT8_C( -35), INT8_C( -25), INT8_C( 43), INT8_C( 19)), simde_mm256_set_epi8(INT8_C( 20), INT8_C( 112), INT8_C( 32), INT8_C( 90), INT8_C( -27), INT8_C( 24), INT8_C( -28), INT8_C( 102), INT8_C( 86), INT8_C(-123), INT8_C( -18), INT8_C( 60), INT8_C( 1), INT8_C( -57), INT8_C( -10), INT8_C( 10), INT8_C( -76), INT8_C( 99), INT8_C( 18), INT8_C(-103), INT8_C( 56), INT8_C(-112), INT8_C( 13), INT8_C( -7), INT8_C(-103), INT8_C( 115), INT8_C( -2), INT8_C( -59), INT8_C( -25), INT8_C( 49), INT8_C( -5), INT8_C( -54)) }, { simde_mm256_set_epi8(INT8_C( 95), INT8_C( 65), INT8_C( 44), INT8_C(-120), INT8_C( 77), INT8_C( 80), INT8_C( 72), INT8_C( -2), INT8_C( 107), INT8_C( 115), INT8_C( 120), INT8_C( -13), INT8_C( 12), INT8_C( 67), INT8_C( -69), INT8_C( -53), INT8_C( -97), INT8_C( -94), INT8_C(-120), INT8_C( 83), INT8_C(-103), INT8_C( -32), INT8_C( 93), INT8_C(-100), INT8_C( 58), INT8_C( 69), INT8_C(-123), INT8_C(-102), INT8_C( 123), INT8_C( -15), INT8_C( -31), INT8_C( -25)), simde_mm256_set_epi8(INT8_C( 12), INT8_C( 126), INT8_C( 6), INT8_C( -7), INT8_C(-106), INT8_C( 73), INT8_C( 64), INT8_C( 36), INT8_C( -83), INT8_C(-127), INT8_C( -5), INT8_C( -91), INT8_C( -74), INT8_C( 98), INT8_C( 28), INT8_C(-104), INT8_C( 56), INT8_C(-120), INT8_C( -84), INT8_C( 28), INT8_C( -96), INT8_C( 47), INT8_C( -85), INT8_C(-104), INT8_C( 96), INT8_C( 12), INT8_C( 123), INT8_C(-104), INT8_C( 31), INT8_C( 37), INT8_C( -31), INT8_C( -17)), simde_mm256_set_epi8(INT8_C( 12), INT8_C( 95), INT8_C( 126), INT8_C( 65), INT8_C( 6), INT8_C( 44), INT8_C( -7), INT8_C(-120), INT8_C(-106), INT8_C( 77), INT8_C( 73), INT8_C( 80), INT8_C( 64), INT8_C( 72), INT8_C( 36), INT8_C( -2), INT8_C( 56), INT8_C( -97), INT8_C(-120), INT8_C( -94), INT8_C( -84), INT8_C(-120), INT8_C( 28), INT8_C( 83), INT8_C( -96), INT8_C(-103), INT8_C( 47), INT8_C( -32), INT8_C( -85), INT8_C( 93), INT8_C(-104), INT8_C(-100)) }, { simde_mm256_set_epi8(INT8_C( 105), INT8_C( -44), INT8_C( 72), INT8_C( -66), INT8_C( 125), INT8_C( -29), INT8_C( 78), INT8_C( -12), INT8_C( 124), INT8_C( -33), INT8_C( 94), INT8_C( 61), INT8_C( 86), INT8_C( 37), INT8_C( -88), INT8_C( 37), INT8_C( -27), INT8_C( -80), INT8_C(-110), INT8_C( -81), INT8_C( 40), INT8_C( 113), INT8_C( -26), INT8_C( -57), INT8_C( -6), INT8_C( -99), INT8_C( -38), INT8_C( 24), INT8_C( 86), INT8_C(-109), INT8_C( 116), INT8_C( 103)), simde_mm256_set_epi8(INT8_C( -71), INT8_C( 89), INT8_C( 75), INT8_C( 16), INT8_C( -92), INT8_C( 95), INT8_C( -25), INT8_C( 57), INT8_C( 114), INT8_C( -21), INT8_C( 26), INT8_C( 67), INT8_C( -54), INT8_C( 93), INT8_C( 57), INT8_C( -16), INT8_C( -68), INT8_C( -27), INT8_C(-121), INT8_C( -7), INT8_C( -78), INT8_C(-108), INT8_C( 94), INT8_C( 56), INT8_C( 14), INT8_C( -83), INT8_C(-117), INT8_C( -4), INT8_C( 75), INT8_C( -63), INT8_C( 109), INT8_C( 6)), simde_mm256_set_epi8(INT8_C( -71), INT8_C( 105), INT8_C( 89), INT8_C( -44), INT8_C( 75), INT8_C( 72), INT8_C( 16), INT8_C( -66), INT8_C( -92), INT8_C( 125), INT8_C( 95), INT8_C( -29), INT8_C( -25), INT8_C( 78), INT8_C( 57), INT8_C( -12), INT8_C( -68), INT8_C( -27), INT8_C( -27), INT8_C( -80), INT8_C(-121), INT8_C(-110), INT8_C( -7), INT8_C( -81), INT8_C( -78), INT8_C( 40), INT8_C(-108), INT8_C( 113), INT8_C( 94), INT8_C( -26), INT8_C( 56), INT8_C( -57)) }, { simde_mm256_set_epi8(INT8_C( -76), INT8_C( -96), INT8_C( 126), INT8_C( 70), INT8_C(-109), INT8_C( 42), INT8_C( -56), INT8_C( -17), INT8_C( 90), INT8_C( 30), INT8_C( 106), INT8_C( 47), INT8_C( 58), INT8_C( 4), INT8_C( 49), INT8_C( 97), INT8_C( -63), INT8_C( -41), INT8_C( 44), INT8_C( -24), INT8_C(-126), INT8_C( -36), INT8_C( 48), INT8_C( 88), INT8_C(-121), INT8_C(-109), INT8_C( 0), INT8_C( -48), INT8_C( -14), INT8_C( 115), INT8_C(-117), INT8_C( -86)), simde_mm256_set_epi8(INT8_C(-115), INT8_C( 98), INT8_C( 106), INT8_C( -64), INT8_C( 61), INT8_C(-124), INT8_C( -21), INT8_C( 59), INT8_C( 18), INT8_C( -3), INT8_C( 78), INT8_C( 77), INT8_C( -11), INT8_C( 116), INT8_C( -63), INT8_C( -81), INT8_C( -87), INT8_C( 116), INT8_C( 29), INT8_C( 58), INT8_C( 66), INT8_C( 4), INT8_C( -71), INT8_C( 14), INT8_C( -54), INT8_C( -76), INT8_C( -39), INT8_C( 8), INT8_C( 12), INT8_C( -91), INT8_C( -14), INT8_C( -79)), simde_mm256_set_epi8(INT8_C(-115), INT8_C( -76), INT8_C( 98), INT8_C( -96), INT8_C( 106), INT8_C( 126), INT8_C( -64), INT8_C( 70), INT8_C( 61), INT8_C(-109), INT8_C(-124), INT8_C( 42), INT8_C( -21), INT8_C( -56), INT8_C( 59), INT8_C( -17), INT8_C( -87), INT8_C( -63), INT8_C( 116), INT8_C( -41), INT8_C( 29), INT8_C( 44), INT8_C( 58), INT8_C( -24), INT8_C( 66), INT8_C(-126), INT8_C( 4), INT8_C( -36), INT8_C( -71), INT8_C( 48), INT8_C( 14), INT8_C( 88)) }, { simde_mm256_set_epi8(INT8_C( -15), INT8_C( 20), INT8_C( -88), INT8_C( 19), INT8_C( 9), INT8_C( 82), INT8_C( -23), INT8_C( 71), INT8_C( 11), INT8_C( 4), INT8_C(-108), INT8_C( 0), INT8_C( 68), INT8_C( 53), INT8_C( 89), INT8_C( -77), INT8_C( -95), INT8_C( -3), INT8_C( 96), INT8_C( -31), INT8_C(-103), INT8_C( -16), INT8_C( 103), INT8_C( 19), INT8_C( -35), INT8_C( -46), INT8_C( 38), INT8_C(-105), INT8_C( 39), INT8_C( 111), INT8_C( 116), INT8_C( -83)), simde_mm256_set_epi8(INT8_C(-107), INT8_C( 48), INT8_C( 54), INT8_C( 1), INT8_C(-111), INT8_C( 44), INT8_C( 1), INT8_C(-106), INT8_C( 15), INT8_C( 70), INT8_C( -56), INT8_C( 61), INT8_C(-114), INT8_C( 127), INT8_C( -2), INT8_C(-115), INT8_C(-128), INT8_C(-102), INT8_C( -98), INT8_C( -84), INT8_C( 41), INT8_C( -64), INT8_C( -50), INT8_C(-121), INT8_C( 127), INT8_C( -51), INT8_C( 114), INT8_C( 76), INT8_C( 32), INT8_C( -1), INT8_C( 123), INT8_C( 126)), simde_mm256_set_epi8(INT8_C(-107), INT8_C( -15), INT8_C( 48), INT8_C( 20), INT8_C( 54), INT8_C( -88), INT8_C( 1), INT8_C( 19), INT8_C(-111), INT8_C( 9), INT8_C( 44), INT8_C( 82), INT8_C( 1), INT8_C( -23), INT8_C(-106), INT8_C( 71), INT8_C(-128), INT8_C( -95), INT8_C(-102), INT8_C( -3), INT8_C( -98), INT8_C( 96), INT8_C( -84), INT8_C( -31), INT8_C( 41), INT8_C(-103), INT8_C( -64), INT8_C( -16), INT8_C( -50), INT8_C( 103), INT8_C(-121), INT8_C( 19)) }, { simde_mm256_set_epi8(INT8_C( 55), INT8_C(-115), INT8_C( -83), INT8_C( -46), INT8_C( -9), INT8_C(-117), INT8_C( -90), INT8_C( 125), INT8_C( 101), INT8_C( -65), INT8_C( -92), INT8_C( -40), INT8_C(-123), INT8_C(-123), INT8_C( -75), INT8_C( -66), INT8_C( -35), INT8_C( 99), INT8_C( 2), INT8_C( -14), INT8_C( 112), INT8_C( 43), INT8_C( 61), INT8_C( 117), INT8_C( -65), INT8_C( -59), INT8_C( 26), INT8_C( 65), INT8_C( 77), INT8_C( -80), INT8_C( -44), INT8_C( -43)), simde_mm256_set_epi8(INT8_C(-100), INT8_C( 88), INT8_C( -93), INT8_C( -17), INT8_C( -79), INT8_C( 101), INT8_C( -79), INT8_C( -53), INT8_C( -78), INT8_C( 41), INT8_C( 93), INT8_C( -10), INT8_C( 23), INT8_C( -80), INT8_C( -49), INT8_C(-125), INT8_C( 90), INT8_C( 77), INT8_C( -52), INT8_C(-119), INT8_C( -58), INT8_C( -1), INT8_C( -56), INT8_C( -10), INT8_C(-115), INT8_C(-128), INT8_C( -24), INT8_C( -90), INT8_C( -73), INT8_C( 42), INT8_C( 53), INT8_C( 78)), simde_mm256_set_epi8(INT8_C(-100), INT8_C( 55), INT8_C( 88), INT8_C(-115), INT8_C( -93), INT8_C( -83), INT8_C( -17), INT8_C( -46), INT8_C( -79), INT8_C( -9), INT8_C( 101), INT8_C(-117), INT8_C( -79), INT8_C( -90), INT8_C( -53), INT8_C( 125), INT8_C( 90), INT8_C( -35), INT8_C( 77), INT8_C( 99), INT8_C( -52), INT8_C( 2), INT8_C(-119), INT8_C( -14), INT8_C( -58), INT8_C( 112), INT8_C( -1), INT8_C( 43), INT8_C( -56), INT8_C( 61), INT8_C( -10), INT8_C( 117)) }, { simde_mm256_set_epi8(INT8_C( 57), INT8_C( -60), INT8_C( 66), INT8_C( 39), INT8_C( 35), INT8_C( 15), INT8_C( 40), INT8_C( 1), INT8_C( -19), INT8_C( 90), INT8_C( -43), INT8_C( 35), INT8_C( 49), INT8_C(-109), INT8_C( -79), INT8_C( -32), INT8_C( -69), INT8_C(-121), INT8_C( 84), INT8_C( -95), INT8_C( -83), INT8_C( 120), INT8_C( -35), INT8_C( 86), INT8_C( -37), INT8_C(-127), INT8_C( -39), INT8_C( -49), INT8_C( -98), INT8_C( 115), INT8_C(-116), INT8_C( -93)), simde_mm256_set_epi8(INT8_C( 71), INT8_C( -6), INT8_C( 63), INT8_C( 38), INT8_C( 88), INT8_C( -67), INT8_C( 37), INT8_C( -30), INT8_C( 75), INT8_C( 31), INT8_C( 90), INT8_C( 41), INT8_C(-128), INT8_C( 10), INT8_C( -34), INT8_C( 57), INT8_C( -37), INT8_C( 100), INT8_C( 5), INT8_C( -3), INT8_C( -26), INT8_C( 30), INT8_C( 58), INT8_C( 60), INT8_C( 84), INT8_C( 42), INT8_C(-111), INT8_C(-101), INT8_C( 16), INT8_C( -45), INT8_C( -2), INT8_C( 13)), simde_mm256_set_epi8(INT8_C( 71), INT8_C( 57), INT8_C( -6), INT8_C( -60), INT8_C( 63), INT8_C( 66), INT8_C( 38), INT8_C( 39), INT8_C( 88), INT8_C( 35), INT8_C( -67), INT8_C( 15), INT8_C( 37), INT8_C( 40), INT8_C( -30), INT8_C( 1), INT8_C( -37), INT8_C( -69), INT8_C( 100), INT8_C(-121), INT8_C( 5), INT8_C( 84), INT8_C( -3), INT8_C( -95), INT8_C( -26), INT8_C( -83), INT8_C( 30), INT8_C( 120), INT8_C( 58), INT8_C( -35), INT8_C( 60), INT8_C( 86)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_unpackhi_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_unpackhi_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi16(INT16_C( 28196), INT16_C(-19354), INT16_C( 9804), INT16_C(-14507), INT16_C(-13536), INT16_C( 20917), INT16_C( 967), INT16_C(-20246), INT16_C(-10186), INT16_C( -9535), INT16_C(-21783), INT16_C( 1947), INT16_C( 2132), INT16_C(-31037), INT16_C( 14740), INT16_C( 1814)), simde_mm256_set_epi16(INT16_C( 5018), INT16_C(-30102), INT16_C( 8885), INT16_C( 7092), INT16_C(-30176), INT16_C( -3016), INT16_C( 25065), INT16_C( 15182), INT16_C( 20584), INT16_C( -3893), INT16_C( 9254), INT16_C( -555), INT16_C( -1863), INT16_C(-29928), INT16_C(-30040), INT16_C( 20077)), simde_mm256_set_epi16(INT16_C( 5018), INT16_C( 28196), INT16_C(-30102), INT16_C(-19354), INT16_C( 8885), INT16_C( 9804), INT16_C( 7092), INT16_C(-14507), INT16_C( 20584), INT16_C(-10186), INT16_C( -3893), INT16_C( -9535), INT16_C( 9254), INT16_C(-21783), INT16_C( -555), INT16_C( 1947)) }, { simde_mm256_set_epi16(INT16_C( 24148), INT16_C(-26844), INT16_C(-25824), INT16_C( 11455), INT16_C( -8659), INT16_C( 13962), INT16_C( 642), INT16_C( 30912), INT16_C( 23855), INT16_C(-21952), INT16_C( -2538), INT16_C( 20285), INT16_C( 13723), INT16_C(-27203), INT16_C( 7085), INT16_C(-26650)), simde_mm256_set_epi16(INT16_C( -2663), INT16_C(-15563), INT16_C(-16845), INT16_C(-25964), INT16_C(-19994), INT16_C( 11205), INT16_C( -9585), INT16_C( 20135), INT16_C(-16938), INT16_C(-17819), INT16_C(-19377), INT16_C(-10418), INT16_C( 5652), INT16_C( 345), INT16_C(-24270), INT16_C( -4677)), simde_mm256_set_epi16(INT16_C( -2663), INT16_C( 24148), INT16_C(-15563), INT16_C(-26844), INT16_C(-16845), INT16_C(-25824), INT16_C(-25964), INT16_C( 11455), INT16_C(-16938), INT16_C( 23855), INT16_C(-17819), INT16_C(-21952), INT16_C(-19377), INT16_C( -2538), INT16_C(-10418), INT16_C( 20285)) }, { simde_mm256_set_epi16(INT16_C( -9935), INT16_C( -5807), INT16_C( -4566), INT16_C( 9296), INT16_C( 7157), INT16_C( -7813), INT16_C( 31971), INT16_C( 26020), INT16_C(-21643), INT16_C(-19740), INT16_C( 3735), INT16_C(-24663), INT16_C( -5558), INT16_C( 18004), INT16_C(-25263), INT16_C(-18828)), simde_mm256_set_epi16(INT16_C( -3706), INT16_C(-10360), INT16_C( 12612), INT16_C(-20501), INT16_C( 11831), INT16_C(-21556), INT16_C(-18710), INT16_C( 17184), INT16_C( 12245), INT16_C(-30371), INT16_C( 2840), INT16_C( 8546), INT16_C(-23833), INT16_C( 19149), INT16_C( 688), INT16_C( 6366)), simde_mm256_set_epi16(INT16_C( -3706), INT16_C( -9935), INT16_C(-10360), INT16_C( -5807), INT16_C( 12612), INT16_C( -4566), INT16_C(-20501), INT16_C( 9296), INT16_C( 12245), INT16_C(-21643), INT16_C(-30371), INT16_C(-19740), INT16_C( 2840), INT16_C( 3735), INT16_C( 8546), INT16_C(-24663)) }, { simde_mm256_set_epi16(INT16_C( 14882), INT16_C(-14375), INT16_C( -5870), INT16_C( -5698), INT16_C( 1829), INT16_C( 5020), INT16_C( 8224), INT16_C( -7222), INT16_C(-30958), INT16_C(-10885), INT16_C( 13942), INT16_C( 22894), INT16_C( 26157), INT16_C(-23632), INT16_C( 26556), INT16_C( -3542)), simde_mm256_set_epi16(INT16_C(-10931), INT16_C( 19441), INT16_C( -4465), INT16_C( 4406), INT16_C( 16020), INT16_C( 26201), INT16_C( 25476), INT16_C( 21164), INT16_C(-22646), INT16_C(-29482), INT16_C( 6258), INT16_C(-24419), INT16_C( 18662), INT16_C(-31207), INT16_C( 31969), INT16_C(-18662)), simde_mm256_set_epi16(INT16_C(-10931), INT16_C( 14882), INT16_C( 19441), INT16_C(-14375), INT16_C( -4465), INT16_C( -5870), INT16_C( 4406), INT16_C( -5698), INT16_C(-22646), INT16_C(-30958), INT16_C(-29482), INT16_C(-10885), INT16_C( 6258), INT16_C( 13942), INT16_C(-24419), INT16_C( 22894)) }, { simde_mm256_set_epi16(INT16_C( 29539), INT16_C( 676), INT16_C( 123), INT16_C(-11161), INT16_C(-30201), INT16_C( -2387), INT16_C( 24111), INT16_C( 19679), INT16_C( 23083), INT16_C(-21338), INT16_C( -7843), INT16_C( 32608), INT16_C(-24773), INT16_C( 13353), INT16_C( 17164), INT16_C( -2488)), simde_mm256_set_epi16(INT16_C( -5138), INT16_C( 29730), INT16_C( 16993), INT16_C( 30231), INT16_C( -2118), INT16_C( 20012), INT16_C(-32476), INT16_C( 2319), INT16_C(-28250), INT16_C(-23799), INT16_C( 9274), INT16_C( 28645), INT16_C( 5189), INT16_C( -5020), INT16_C( -4150), INT16_C(-15571)), simde_mm256_set_epi16(INT16_C( -5138), INT16_C( 29539), INT16_C( 29730), INT16_C( 676), INT16_C( 16993), INT16_C( 123), INT16_C( 30231), INT16_C(-11161), INT16_C(-28250), INT16_C( 23083), INT16_C(-23799), INT16_C(-21338), INT16_C( 9274), INT16_C( -7843), INT16_C( 28645), INT16_C( 32608)) }, { simde_mm256_set_epi16(INT16_C(-27710), INT16_C(-18125), INT16_C( 10296), INT16_C( 21376), INT16_C( 23378), INT16_C(-22935), INT16_C(-22914), INT16_C(-31313), INT16_C( 22251), INT16_C( 14011), INT16_C( -6668), INT16_C( 16302), INT16_C( 11015), INT16_C( -1790), INT16_C(-19054), INT16_C( 25215)), simde_mm256_set_epi16(INT16_C( 32531), INT16_C( -2485), INT16_C(-25139), INT16_C( 32419), INT16_C( -2942), INT16_C(-32258), INT16_C( -7635), INT16_C( 12558), INT16_C( 15559), INT16_C( -1904), INT16_C( 27475), INT16_C(-12566), INT16_C(-17567), INT16_C(-27853), INT16_C(-24500), INT16_C( 12726)), simde_mm256_set_epi16(INT16_C( 32531), INT16_C(-27710), INT16_C( -2485), INT16_C(-18125), INT16_C(-25139), INT16_C( 10296), INT16_C( 32419), INT16_C( 21376), INT16_C( 15559), INT16_C( 22251), INT16_C( -1904), INT16_C( 14011), INT16_C( 27475), INT16_C( -6668), INT16_C(-12566), INT16_C( 16302)) }, { simde_mm256_set_epi16(INT16_C( 4586), INT16_C(-13896), INT16_C( 23081), INT16_C(-20860), INT16_C( 27330), INT16_C( 28170), INT16_C( 24215), INT16_C( 4688), INT16_C( 19543), INT16_C( 27682), INT16_C(-31899), INT16_C( 32041), INT16_C( -8944), INT16_C( 18325), INT16_C(-13926), INT16_C( 24617)), simde_mm256_set_epi16(INT16_C(-21223), INT16_C( -4083), INT16_C( 21224), INT16_C(-26984), INT16_C( 30248), INT16_C( 30434), INT16_C( 22265), INT16_C( 8187), INT16_C( 29805), INT16_C( -7428), INT16_C( 31794), INT16_C( 7354), INT16_C(-23403), INT16_C( 17371), INT16_C(-31138), INT16_C( 16031)), simde_mm256_set_epi16(INT16_C(-21223), INT16_C( 4586), INT16_C( -4083), INT16_C(-13896), INT16_C( 21224), INT16_C( 23081), INT16_C(-26984), INT16_C(-20860), INT16_C( 29805), INT16_C( 19543), INT16_C( -7428), INT16_C( 27682), INT16_C( 31794), INT16_C(-31899), INT16_C( 7354), INT16_C( 32041)) }, { simde_mm256_set_epi16(INT16_C( -9151), INT16_C(-30324), INT16_C( 22631), INT16_C( 344), INT16_C( 4713), INT16_C( 10238), INT16_C(-10751), INT16_C(-14572), INT16_C( 3790), INT16_C( 8629), INT16_C( 9946), INT16_C( 31127), INT16_C( -9819), INT16_C( 4727), INT16_C( 30741), INT16_C(-20627)), simde_mm256_set_epi16(INT16_C(-23421), INT16_C( -2678), INT16_C( 28254), INT16_C( -2905), INT16_C( 6237), INT16_C( 10685), INT16_C( 27961), INT16_C( 15513), INT16_C( 7306), INT16_C(-32072), INT16_C(-26874), INT16_C( 25112), INT16_C( -3381), INT16_C(-22461), INT16_C( -7476), INT16_C(-19391)), simde_mm256_set_epi16(INT16_C(-23421), INT16_C( -9151), INT16_C( -2678), INT16_C(-30324), INT16_C( 28254), INT16_C( 22631), INT16_C( -2905), INT16_C( 344), INT16_C( 7306), INT16_C( 3790), INT16_C(-32072), INT16_C( 8629), INT16_C(-26874), INT16_C( 9946), INT16_C( 25112), INT16_C( 31127)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_unpackhi_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_unpackhi_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C( 1912850859), INT32_C( 967654585), INT32_C( 1199101495), INT32_C( 1020867807), INT32_C(-1113017403), INT32_C( 1207205853), INT32_C(-1283015323), INT32_C( -865603422)), simde_mm256_set_epi32(INT32_C( 439671122), INT32_C( -834176430), INT32_C( 1316719462), INT32_C( 794894521), INT32_C( -364012057), INT32_C(-1283491930), INT32_C( -229222523), INT32_C( -489560867)), simde_mm256_set_epi32(INT32_C( 439671122), INT32_C( 1912850859), INT32_C( -834176430), INT32_C( 967654585), INT32_C( -364012057), INT32_C(-1113017403), INT32_C(-1283491930), INT32_C( 1207205853)) }, { simde_mm256_set_epi32(INT32_C(-1043075301), INT32_C(-1205379203), INT32_C( -623218356), INT32_C(-1709643548), INT32_C( -53386540), INT32_C( 1999540953), INT32_C( 2146270924), INT32_C( 1577977486)), simde_mm256_set_epi32(INT32_C( 1416708497), INT32_C( 1977290590), INT32_C( -799631345), INT32_C( -699557932), INT32_C( -779666453), INT32_C(-1556766589), INT32_C( 1459438848), INT32_C( 777616978)), simde_mm256_set_epi32(INT32_C( 1416708497), INT32_C(-1043075301), INT32_C( 1977290590), INT32_C(-1205379203), INT32_C( -779666453), INT32_C( -53386540), INT32_C(-1556766589), INT32_C( 1999540953)) }, { simde_mm256_set_epi32(INT32_C(-1262008529), INT32_C( 995722237), INT32_C( 1921619936), INT32_C(-2112826366), INT32_C( -393450028), INT32_C( 1344594991), INT32_C( 2093675145), INT32_C( 760241232)), simde_mm256_set_epi32(INT32_C( 1015857134), INT32_C( 1981903036), INT32_C( 1927114874), INT32_C( 1164834454), INT32_C( -157704373), INT32_C(-1887449946), INT32_C(-1985956729), INT32_C(-2099334634)), simde_mm256_set_epi32(INT32_C( 1015857134), INT32_C(-1262008529), INT32_C( 1981903036), INT32_C( 995722237), INT32_C( -157704373), INT32_C( -393450028), INT32_C(-1887449946), INT32_C( 1344594991)) }, { simde_mm256_set_epi32(INT32_C( 212170692), INT32_C( 815869922), INT32_C( 20080222), INT32_C( -75417640), INT32_C( -503678651), INT32_C( -834592925), INT32_C( 1707747009), INT32_C( -254760969)), simde_mm256_set_epi32(INT32_C( -574627621), INT32_C( -890978529), INT32_C( 2077265887), INT32_C( 842919754), INT32_C( -486905662), INT32_C( 988851226), INT32_C(-1518229684), INT32_C( 1816570667)), simde_mm256_set_epi32(INT32_C( -574627621), INT32_C( 212170692), INT32_C( -890978529), INT32_C( 815869922), INT32_C( -486905662), INT32_C( -503678651), INT32_C( 988851226), INT32_C( -834592925)) }, { simde_mm256_set_epi32(INT32_C( 1459349320), INT32_C( -943790006), INT32_C( 424384832), INT32_C( 113065932), INT32_C( -80059372), INT32_C( 814075306), INT32_C( 1255708904), INT32_C( 894835823)), simde_mm256_set_epi32(INT32_C( 1159658953), INT32_C( 513900351), INT32_C( 1274799760), INT32_C( 661217108), INT32_C(-1116902016), INT32_C( 1264134407), INT32_C( 1996134185), INT32_C( -620124201)), simde_mm256_set_epi32(INT32_C( 1159658953), INT32_C( 1459349320), INT32_C( 513900351), INT32_C( -943790006), INT32_C(-1116902016), INT32_C( -80059372), INT32_C( 1264134407), INT32_C( 814075306)) }, { simde_mm256_set_epi32(INT32_C( 1669821560), INT32_C( -524933447), INT32_C(-1923407638), INT32_C( 1748809176), INT32_C( -67073492), INT32_C(-1589224355), INT32_C(-1890395480), INT32_C( 650020033)), simde_mm256_set_epi32(INT32_C( 1449046752), INT32_C( 11693105), INT32_C( 225889559), INT32_C( 1195957602), INT32_C( -790861669), INT32_C( 894225381), INT32_C( 1788797029), INT32_C( 1410983650)), simde_mm256_set_epi32(INT32_C( 1449046752), INT32_C( 1669821560), INT32_C( 11693105), INT32_C( -524933447), INT32_C( -790861669), INT32_C( -67073492), INT32_C( 894225381), INT32_C(-1589224355)) }, { simde_mm256_set_epi32(INT32_C( 166836505), INT32_C( 35920603), INT32_C( 209405330), INT32_C(-1870274444), INT32_C(-2065187438), INT32_C(-1749203354), INT32_C(-1834771489), INT32_C( 1795755804)), simde_mm256_set_epi32(INT32_C( 863607209), INT32_C( 671761907), INT32_C(-1319646828), INT32_C( 236938511), INT32_C( 277292323), INT32_C(-1226502074), INT32_C( 1319858080), INT32_C( 1103529470)), simde_mm256_set_epi32(INT32_C( 863607209), INT32_C( 166836505), INT32_C( 671761907), INT32_C( 35920603), INT32_C( 277292323), INT32_C(-2065187438), INT32_C(-1226502074), INT32_C(-1749203354)) }, { simde_mm256_set_epi32(INT32_C( -91786742), INT32_C(-1505313832), INT32_C( 1530067112), INT32_C(-1201437931), INT32_C(-1252606163), INT32_C( 1723954910), INT32_C( 348258249), INT32_C( -864462904)), simde_mm256_set_epi32(INT32_C( 926816633), INT32_C( -705859720), INT32_C( 1728076763), INT32_C(-1714964607), INT32_C( -194394697), INT32_C( -494196608), INT32_C( 1804204829), INT32_C(-1267214668)), simde_mm256_set_epi32(INT32_C( 926816633), INT32_C( -91786742), INT32_C( -705859720), INT32_C(-1505313832), INT32_C( -194394697), INT32_C(-1252606163), INT32_C( -494196608), INT32_C( 1723954910)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_unpackhi_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_unpackhi_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi64x(INT64_C( 8215631882498161849), INT64_C( 5150101706630575327), INT64_C(-4780373344556646435), INT64_C(-5510508849122512734)), simde_mm256_set_epi64x(INT64_C( 1888373093446416978), INT64_C( 5655267028091609273), INT64_C(-1563419877153212506), INT64_C( -984503235986201379)), simde_mm256_set_epi64x(INT64_C( 1888373093446416978), INT64_C( 8215631882498161849), INT64_C(-1563419877153212506), INT64_C(-4780373344556646435)) }, { simde_mm256_set_epi64x(INT64_C(-4479974301970768003), INT64_C(-2676702454701561628), INT64_C( -229293441347054887), INT64_C( 9218163428513678990)), simde_mm256_set_epi64x(INT64_C( 6084716664557604702), INT64_C(-3434390472036083756), INT64_C(-3348641914685120381), INT64_C( 6268242123449531986)), simde_mm256_set_epi64x(INT64_C( 6084716664557604702), INT64_C(-4479974301970768003), INT64_C(-3348641914685120381), INT64_C( -229293441347054887)) }, { simde_mm256_set_epi64x(INT64_C(-5420285358332345347), INT64_C( 8253294782643753986), INT64_C(-1689855001525689297), INT64_C( 8992266276983299152)), simde_mm256_set_epi64x(INT64_C( 4363073169920192700), INT64_C( 8276895360629995158), INT64_C( -677335122063668058), INT64_C(-8529619200130502122)), simde_mm256_set_epi64x(INT64_C( 4363073169920192700), INT64_C(-5420285358332345347), INT64_C( -677335122063668058), INT64_C(-1689855001525689297)) }, { simde_mm256_set_epi64x(INT64_C( 911266184125558754), INT64_C( 86243901005969368), INT64_C(-2163283330278023325), INT64_C( 7334717557537023991)), simde_mm256_set_epi64x(INT64_C(-2468006836169294049), INT64_C( 8921789050604351306), INT64_C(-2091243893538378726), INT64_C(-6520746838779843797)), simde_mm256_set_epi64x(INT64_C(-2468006836169294049), INT64_C( 911266184125558754), INT64_C(-2091243893538378726), INT64_C(-2163283330278023325)) }, { simde_mm256_set_epi64x(INT64_C( 6267857606191016010), INT64_C( 1822718974471520204), INT64_C( -343852383664222806), INT64_C( 5393228676870839407)), simde_mm256_set_epi64x(INT64_C( 4980697278162501439), INT64_C( 5475223278809866068), INT64_C(-4797057630292334329), INT64_C( 8573331046677456855)), simde_mm256_set_epi64x(INT64_C( 4980697278162501439), INT64_C( 6267857606191016010), INT64_C(-4797057630292334329), INT64_C( -343852383664222806)) }, { simde_mm256_set_epi64x(INT64_C( 7171828994125735609), INT64_C(-8260972900337797672), INT64_C( -288078451862774691), INT64_C(-8119186762456202047)), simde_mm256_set_epi64x(INT64_C( 6223608410226715697), INT64_C( 970188269608820066), INT64_C(-3396725003120751643), INT64_C( 7682824740147947234)), simde_mm256_set_epi64x(INT64_C( 6223608410226715697), INT64_C( 7171828994125735609), INT64_C(-3396725003120751643), INT64_C( -288078451862774691)) }, { simde_mm256_set_epi64x(INT64_C( 716557332789861083), INT64_C( 899389046382780532), INT64_C(-8869912503774263706), INT64_C(-7880283539092467940)), simde_mm256_set_epi64x(INT64_C( 3709164719916598771), INT64_C(-5667839968293198577), INT64_C( 1190961461785333830), INT64_C( 5668747290064881150)), simde_mm256_set_epi64x(INT64_C( 3709164719916598771), INT64_C( 716557332789861083), INT64_C( 1190961461785333830), INT64_C(-8869912503774263706)) }, { simde_mm256_set_epi64x(INT64_C( -394221052306736168), INT64_C( 6571588209818698517), INT64_C(-5379902503129090338), INT64_C( 1495757793447729096)), simde_mm256_set_epi64x(INT64_C( 3980647131712941944), INT64_C( 7422033184642545537), INT64_C( -834918862330058624), INT64_C( 7749000738868025012)), simde_mm256_set_epi64x(INT64_C( 3980647131712941944), INT64_C( -394221052306736168), INT64_C( -834918862330058624), INT64_C(-5379902503129090338)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_unpackhi_epi64(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_xor_si256(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi64x(INT64_C( 5259297934522696228), INT64_C( -431388325374833226), INT64_C(-9156654280217339654), INT64_C( 3013799969040676174)), simde_mm256_set_epi64x(INT64_C(-5610681863545377343), INT64_C( 612481038911101319), INT64_C( 6883094940018615339), INT64_C( 5567527613349466758)), simde_mm256_set_epi64x(INT64_C( -369826194426859547), INT64_C( -973728768696970191), INT64_C(-2348436908296831791), INT64_C( 7246573032282466248)) }, { simde_mm256_set_epi64x(INT64_C( 3972970387578047990), INT64_C(-3229803026080205617), INT64_C(-6416566799554918012), INT64_C( 7243493956554503841)), simde_mm256_set_epi64x(INT64_C(-2328069278528400967), INT64_C( -866344990039815627), INT64_C( 5191362241164177707), INT64_C( 6965508020004735797)), simde_mm256_set_epi64x(INT64_C(-1687764814515086257), INT64_C( 2366483896335154938), INT64_C(-1227043938297071441), INT64_C( 300751355152891284)) }, { simde_mm256_set_epi64x(INT64_C(-2206747055975530789), INT64_C( 4273990306427029435), INT64_C(-8066951550635951807), INT64_C(-6928425273442710875)), simde_mm256_set_epi64x(INT64_C(-4939687486626430761), INT64_C(-1679871113178117518), INT64_C( -257233978894060261), INT64_C( 3512456176079583747)), simde_mm256_set_epi64x(INT64_C( 6490456006491962892), INT64_C(-3170633551353754167), INT64_C( 7809929228804508762), INT64_C(-5807521855463853914)) }, { simde_mm256_set_epi64x(INT64_C(-8921324304875098515), INT64_C( 7821700200964206339), INT64_C( 2994261353466796408), INT64_C(-3540068018343188204)), simde_mm256_set_epi64x(INT64_C( 4294357400656496399), INT64_C( 3626952931332295508), INT64_C(-7673859083891959013), INT64_C( 1757595472193292730)), simde_mm256_set_epi64x(INT64_C(-4636023147992241822), INT64_C( 6834710091372766295), INT64_C(-4896194530503130525), INT64_C(-2973751098574872402)) }, { simde_mm256_set_epi64x(INT64_C( 6035067220796971838), INT64_C( 776715589999365452), INT64_C( 6384421609504908311), INT64_C(-8816267298986390401)), simde_mm256_set_epi64x(INT64_C(-5739247744839421985), INT64_C( 4653816358777581262), INT64_C( 1021833051660996174), INT64_C(-2812490060666099514)), simde_mm256_set_epi64x(INT64_C(-2046106215006112543), INT64_C( 5355589079452705666), INT64_C( 6247691286445288025), INT64_C( 6727907985691518137)) }, { simde_mm256_set_epi64x(INT64_C( 8351942705843556286), INT64_C(-6873744911563016812), INT64_C(-3359807879676804643), INT64_C( 8110291315370526762)), simde_mm256_set_epi64x(INT64_C( -126725188666547082), INT64_C( 4977977654704902461), INT64_C(-7727661347289141755), INT64_C( 7453179086506187618)), simde_mm256_set_epi64x(INT64_C(-8226449520241153080), INT64_C(-1905346832462535511), INT64_C( 5016522326789730264), INT64_C( 1721087647877354312)) }, { simde_mm256_set_epi64x(INT64_C(-3541650301143541882), INT64_C( 2340510988864802758), INT64_C(-2484352519219712887), INT64_C(-7823366500674586971)), simde_mm256_set_epi64x(INT64_C( 8787496485572343767), INT64_C( 5972980855318355117), INT64_C( 1254817555351506981), INT64_C( 3585972827969393388)), simde_mm256_set_epi64x(INT64_C(-5248115586102763439), INT64_C( 8259425971222532971), INT64_C(-3679494355159515988), INT64_C(-6724368097479220151)) }, { simde_mm256_set_epi64x(INT64_C(-8306510049557921760), INT64_C(-3952727854475033325), INT64_C(-2945542500559658351), INT64_C( 937552523927336948)), simde_mm256_set_epi64x(INT64_C( 4952101385331357252), INT64_C( 3142376059888144776), INT64_C(-5526391810306106309), INT64_C( 1965153668529504230)), simde_mm256_set_epi64x(INT64_C(-4035158306098612636), INT64_C(-2108000641444326245), INT64_C( 7228588967820590762), INT64_C( 1605387943167658002)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_xor_si256(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_max_epu16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_x_mm256_set_epu16(UINT16_C( 46627), UINT16_C( 59720), UINT16_C( 61906), UINT16_C( 11658), UINT16_C( 20044), UINT16_C( 39774), UINT16_C( 13081), UINT16_C( 26580), UINT16_C( 18209), UINT16_C( 20949), UINT16_C( 29177), UINT16_C( 31372), UINT16_C( 2292), UINT16_C( 1480), UINT16_C( 23053), UINT16_C( 14592)), simde_x_mm256_set_epu16(UINT16_C( 18663), UINT16_C( 17474), UINT16_C( 64035), UINT16_C( 52566), UINT16_C( 14212), UINT16_C( 10671), UINT16_C( 65410), UINT16_C( 60796), UINT16_C( 54296), UINT16_C( 44956), UINT16_C( 2247), UINT16_C( 1960), UINT16_C( 42291), UINT16_C( 26784), UINT16_C( 22137), UINT16_C( 30865)), simde_x_mm256_set_epu16(UINT16_C( 46627), UINT16_C( 59720), UINT16_C( 64035), UINT16_C( 52566), UINT16_C( 20044), UINT16_C( 39774), UINT16_C( 65410), UINT16_C( 60796), UINT16_C( 54296), UINT16_C( 44956), UINT16_C( 29177), UINT16_C( 31372), UINT16_C( 42291), UINT16_C( 26784), UINT16_C( 23053), UINT16_C( 30865)) }, { simde_x_mm256_set_epu16(UINT16_C( 18263), UINT16_C( 58739), UINT16_C( 63639), UINT16_C( 55084), UINT16_C( 26185), UINT16_C( 41545), UINT16_C( 59964), UINT16_C( 12474), UINT16_C( 53086), UINT16_C( 18094), UINT16_C( 30753), UINT16_C( 29484), UINT16_C( 20722), UINT16_C( 11313), UINT16_C( 13962), UINT16_C( 39972)), simde_x_mm256_set_epu16(UINT16_C( 58325), UINT16_C( 58948), UINT16_C( 43545), UINT16_C( 59756), UINT16_C( 12295), UINT16_C( 57147), UINT16_C( 58632), UINT16_C( 46109), UINT16_C( 49795), UINT16_C( 8909), UINT16_C( 2266), UINT16_C( 36324), UINT16_C( 59196), UINT16_C( 64507), UINT16_C( 7734), UINT16_C( 45358)), simde_x_mm256_set_epu16(UINT16_C( 58325), UINT16_C( 58948), UINT16_C( 63639), UINT16_C( 59756), UINT16_C( 26185), UINT16_C( 57147), UINT16_C( 59964), UINT16_C( 46109), UINT16_C( 53086), UINT16_C( 18094), UINT16_C( 30753), UINT16_C( 36324), UINT16_C( 59196), UINT16_C( 64507), UINT16_C( 13962), UINT16_C( 45358)) }, { simde_x_mm256_set_epu16(UINT16_C( 39984), UINT16_C( 47951), UINT16_C( 65190), UINT16_C( 48692), UINT16_C( 48663), UINT16_C( 53744), UINT16_C( 12623), UINT16_C( 35136), UINT16_C( 1353), UINT16_C( 21406), UINT16_C( 10132), UINT16_C( 27940), UINT16_C( 4906), UINT16_C( 10556), UINT16_C( 20269), UINT16_C( 18864)), simde_x_mm256_set_epu16(UINT16_C( 52708), UINT16_C( 50460), UINT16_C( 39275), UINT16_C( 31341), UINT16_C( 59248), UINT16_C( 23377), UINT16_C( 29345), UINT16_C( 50829), UINT16_C( 4977), UINT16_C( 15093), UINT16_C( 36151), UINT16_C( 58243), UINT16_C( 16819), UINT16_C( 62076), UINT16_C( 48614), UINT16_C( 2944)), simde_x_mm256_set_epu16(UINT16_C( 52708), UINT16_C( 50460), UINT16_C( 65190), UINT16_C( 48692), UINT16_C( 59248), UINT16_C( 53744), UINT16_C( 29345), UINT16_C( 50829), UINT16_C( 4977), UINT16_C( 21406), UINT16_C( 36151), UINT16_C( 58243), UINT16_C( 16819), UINT16_C( 62076), UINT16_C( 48614), UINT16_C( 18864)) }, { simde_x_mm256_set_epu16(UINT16_C( 14954), UINT16_C( 41703), UINT16_C( 49393), UINT16_C( 33328), UINT16_C( 13198), UINT16_C( 8963), UINT16_C( 20974), UINT16_C( 46796), UINT16_C( 4895), UINT16_C( 2875), UINT16_C( 62027), UINT16_C( 51154), UINT16_C( 19247), UINT16_C( 34899), UINT16_C( 65019), UINT16_C( 60032)), simde_x_mm256_set_epu16(UINT16_C( 58187), UINT16_C( 7429), UINT16_C( 29396), UINT16_C( 13821), UINT16_C( 33371), UINT16_C( 1928), UINT16_C( 49379), UINT16_C( 722), UINT16_C( 28382), UINT16_C( 55366), UINT16_C( 43323), UINT16_C( 47667), UINT16_C( 5125), UINT16_C( 60684), UINT16_C( 59616), UINT16_C( 50280)), simde_x_mm256_set_epu16(UINT16_C( 58187), UINT16_C( 41703), UINT16_C( 49393), UINT16_C( 33328), UINT16_C( 33371), UINT16_C( 8963), UINT16_C( 49379), UINT16_C( 46796), UINT16_C( 28382), UINT16_C( 55366), UINT16_C( 62027), UINT16_C( 51154), UINT16_C( 19247), UINT16_C( 60684), UINT16_C( 65019), UINT16_C( 60032)) }, { simde_x_mm256_set_epu16(UINT16_C( 27739), UINT16_C( 29065), UINT16_C( 56947), UINT16_C( 55426), UINT16_C( 46492), UINT16_C( 58959), UINT16_C( 17884), UINT16_C( 106), UINT16_C( 40322), UINT16_C( 13650), UINT16_C( 50675), UINT16_C( 30885), UINT16_C( 7879), UINT16_C( 4114), UINT16_C( 54484), UINT16_C( 15816)), simde_x_mm256_set_epu16(UINT16_C( 43597), UINT16_C( 65192), UINT16_C( 43740), UINT16_C( 54700), UINT16_C( 31952), UINT16_C( 20957), UINT16_C( 20261), UINT16_C( 50955), UINT16_C( 37051), UINT16_C( 36093), UINT16_C( 59438), UINT16_C( 28726), UINT16_C( 6886), UINT16_C( 52003), UINT16_C( 19596), UINT16_C( 59577)), simde_x_mm256_set_epu16(UINT16_C( 43597), UINT16_C( 65192), UINT16_C( 56947), UINT16_C( 55426), UINT16_C( 46492), UINT16_C( 58959), UINT16_C( 20261), UINT16_C( 50955), UINT16_C( 40322), UINT16_C( 36093), UINT16_C( 59438), UINT16_C( 30885), UINT16_C( 7879), UINT16_C( 52003), UINT16_C( 54484), UINT16_C( 59577)) }, { simde_x_mm256_set_epu16(UINT16_C( 57657), UINT16_C( 13117), UINT16_C( 60292), UINT16_C( 65452), UINT16_C( 694), UINT16_C( 21975), UINT16_C( 46000), UINT16_C( 22467), UINT16_C( 60775), UINT16_C( 6084), UINT16_C( 52920), UINT16_C( 10893), UINT16_C( 33722), UINT16_C( 16366), UINT16_C( 746), UINT16_C( 51289)), simde_x_mm256_set_epu16(UINT16_C( 18878), UINT16_C( 35582), UINT16_C( 34313), UINT16_C( 41553), UINT16_C( 15025), UINT16_C( 13655), UINT16_C( 30842), UINT16_C( 55976), UINT16_C( 55306), UINT16_C( 8486), UINT16_C( 20326), UINT16_C( 56205), UINT16_C( 9516), UINT16_C( 5604), UINT16_C( 41707), UINT16_C( 23844)), simde_x_mm256_set_epu16(UINT16_C( 57657), UINT16_C( 35582), UINT16_C( 60292), UINT16_C( 65452), UINT16_C( 15025), UINT16_C( 21975), UINT16_C( 46000), UINT16_C( 55976), UINT16_C( 60775), UINT16_C( 8486), UINT16_C( 52920), UINT16_C( 56205), UINT16_C( 33722), UINT16_C( 16366), UINT16_C( 41707), UINT16_C( 51289)) }, { simde_x_mm256_set_epu16(UINT16_C( 35061), UINT16_C( 31271), UINT16_C( 44458), UINT16_C( 43999), UINT16_C( 52764), UINT16_C( 24092), UINT16_C( 55278), UINT16_C( 9729), UINT16_C( 30350), UINT16_C( 35228), UINT16_C( 24843), UINT16_C( 46927), UINT16_C( 47959), UINT16_C( 8348), UINT16_C( 39137), UINT16_C( 59054)), simde_x_mm256_set_epu16(UINT16_C( 18920), UINT16_C( 22899), UINT16_C( 4131), UINT16_C( 54549), UINT16_C( 33095), UINT16_C( 25760), UINT16_C( 6465), UINT16_C( 12263), UINT16_C( 47803), UINT16_C( 61613), UINT16_C( 51708), UINT16_C( 45746), UINT16_C( 59719), UINT16_C( 2919), UINT16_C( 24086), UINT16_C( 5665)), simde_x_mm256_set_epu16(UINT16_C( 35061), UINT16_C( 31271), UINT16_C( 44458), UINT16_C( 54549), UINT16_C( 52764), UINT16_C( 25760), UINT16_C( 55278), UINT16_C( 12263), UINT16_C( 47803), UINT16_C( 61613), UINT16_C( 51708), UINT16_C( 46927), UINT16_C( 59719), UINT16_C( 8348), UINT16_C( 39137), UINT16_C( 59054)) }, { simde_x_mm256_set_epu16(UINT16_C( 46764), UINT16_C( 51381), UINT16_C( 57954), UINT16_C( 11573), UINT16_C( 40336), UINT16_C( 26367), UINT16_C( 17948), UINT16_C( 58963), UINT16_C( 63320), UINT16_C( 30133), UINT16_C( 13248), UINT16_C( 4541), UINT16_C( 43652), UINT16_C( 3442), UINT16_C( 602), UINT16_C( 59909)), simde_x_mm256_set_epu16(UINT16_C( 34844), UINT16_C( 11763), UINT16_C( 21582), UINT16_C( 37103), UINT16_C( 62839), UINT16_C( 48595), UINT16_C( 34664), UINT16_C( 65307), UINT16_C( 42742), UINT16_C( 1820), UINT16_C( 54309), UINT16_C( 55027), UINT16_C( 64764), UINT16_C( 27300), UINT16_C( 20131), UINT16_C( 43537)), simde_x_mm256_set_epu16(UINT16_C( 46764), UINT16_C( 51381), UINT16_C( 57954), UINT16_C( 37103), UINT16_C( 62839), UINT16_C( 48595), UINT16_C( 34664), UINT16_C( 65307), UINT16_C( 63320), UINT16_C( 30133), UINT16_C( 54309), UINT16_C( 55027), UINT16_C( 64764), UINT16_C( 27300), UINT16_C( 20131), UINT16_C( 59909)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_max_epu16(test_vec[i].a, test_vec[i].b); simde_assert_m256i_u16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_max_epu32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_x_mm256_set_epu32(UINT32_C(3051572045), UINT32_C(3545123096), UINT32_C( 539532434), UINT32_C(2726067579), UINT32_C(3419329411), UINT32_C(3056421163), UINT32_C(2937475413), UINT32_C( 891304178)), simde_x_mm256_set_epu32(UINT32_C(3684521838), UINT32_C(3664092042), UINT32_C( 648541541), UINT32_C( 272930365), UINT32_C( 675916127), UINT32_C(1720002587), UINT32_C(1082929595), UINT32_C( 984837149)), simde_x_mm256_set_epu32(UINT32_C(3684521838), UINT32_C(3664092042), UINT32_C( 648541541), UINT32_C(2726067579), UINT32_C(3419329411), UINT32_C(3056421163), UINT32_C(2937475413), UINT32_C( 984837149)) }, { simde_x_mm256_set_epu32(UINT32_C(2114544969), UINT32_C( 863431180), UINT32_C(2929790297), UINT32_C(1508406092), UINT32_C(1766188399), UINT32_C(3527468216), UINT32_C(1207097510), UINT32_C(3902158588)), simde_x_mm256_set_epu32(UINT32_C(4246012734), UINT32_C(1929345650), UINT32_C(3025365238), UINT32_C(3355971563), UINT32_C(3310140776), UINT32_C(2715842442), UINT32_C(2891355465), UINT32_C(4115361740)), simde_x_mm256_set_epu32(UINT32_C(4246012734), UINT32_C(1929345650), UINT32_C(3025365238), UINT32_C(3355971563), UINT32_C(3310140776), UINT32_C(3527468216), UINT32_C(2891355465), UINT32_C(4115361740)) }, { simde_x_mm256_set_epu32(UINT32_C(1001663617), UINT32_C(3733133836), UINT32_C(2215256808), UINT32_C(2636746621), UINT32_C(1004506231), UINT32_C(4292057274), UINT32_C(1794485537), UINT32_C(2483395991)), simde_x_mm256_set_epu32(UINT32_C(3187837407), UINT32_C( 282146620), UINT32_C(3384289392), UINT32_C(3287894196), UINT32_C(1516921107), UINT32_C( 874312086), UINT32_C(1365725481), UINT32_C(2010925515)), simde_x_mm256_set_epu32(UINT32_C(3187837407), UINT32_C(3733133836), UINT32_C(3384289392), UINT32_C(3287894196), UINT32_C(1516921107), UINT32_C(4292057274), UINT32_C(1794485537), UINT32_C(2483395991)) }, { simde_x_mm256_set_epu32(UINT32_C(1825864990), UINT32_C(3651632677), UINT32_C(1848482644), UINT32_C(4217806782), UINT32_C(3557654096), UINT32_C( 849332445), UINT32_C( 114697269), UINT32_C( 674786807)), simde_x_mm256_set_epu32(UINT32_C(1779504954), UINT32_C(1699927403), UINT32_C(2934040606), UINT32_C(4177282687), UINT32_C(3450483742), UINT32_C(3231995683), UINT32_C(2093833738), UINT32_C( 336221368)), simde_x_mm256_set_epu32(UINT32_C(1825864990), UINT32_C(3651632677), UINT32_C(2934040606), UINT32_C(4217806782), UINT32_C(3557654096), UINT32_C(3231995683), UINT32_C(2093833738), UINT32_C( 674786807)) }, { simde_x_mm256_set_epu32(UINT32_C( 837260657), UINT32_C(3220613586), UINT32_C(2642631733), UINT32_C(2252900450), UINT32_C(3061609531), UINT32_C(2252923032), UINT32_C(1030296341), UINT32_C(2720361240)), simde_x_mm256_set_epu32(UINT32_C( 273034038), UINT32_C(2374878315), UINT32_C( 248014486), UINT32_C( 363234795), UINT32_C(2126760034), UINT32_C(4029819680), UINT32_C(2144543040), UINT32_C(4227450764)), simde_x_mm256_set_epu32(UINT32_C( 837260657), UINT32_C(3220613586), UINT32_C(2642631733), UINT32_C(2252900450), UINT32_C(3061609531), UINT32_C(4029819680), UINT32_C(2144543040), UINT32_C(4227450764)) }, { simde_x_mm256_set_epu32(UINT32_C(1832345572), UINT32_C(3528531140), UINT32_C(1174695155), UINT32_C( 287919562), UINT32_C( 792094828), UINT32_C(2183085761), UINT32_C(4262203031), UINT32_C(3585219150)), simde_x_mm256_set_epu32(UINT32_C( 890341915), UINT32_C(2131853812), UINT32_C(2693768197), UINT32_C(1107526035), UINT32_C(3439504205), UINT32_C( 751425977), UINT32_C(3946340711), UINT32_C(2999018213)), simde_x_mm256_set_epu32(UINT32_C(1832345572), UINT32_C(3528531140), UINT32_C(2693768197), UINT32_C(1107526035), UINT32_C(3439504205), UINT32_C(2183085761), UINT32_C(4262203031), UINT32_C(3585219150)) }, { simde_x_mm256_set_epu32(UINT32_C(3937957510), UINT32_C(1450021822), UINT32_C(1539281783), UINT32_C( 551303701), UINT32_C(1703578262), UINT32_C(2645712321), UINT32_C(2567475981), UINT32_C(3368049591)), simde_x_mm256_set_epu32(UINT32_C(3778291576), UINT32_C(3382324216), UINT32_C(1328766962), UINT32_C(4275285368), UINT32_C( 709773804), UINT32_C(1207236213), UINT32_C(1754387913), UINT32_C(3060418664)), simde_x_mm256_set_epu32(UINT32_C(3937957510), UINT32_C(3382324216), UINT32_C(1539281783), UINT32_C(4275285368), UINT32_C(1703578262), UINT32_C(2645712321), UINT32_C(2567475981), UINT32_C(3368049591)) }, { simde_x_mm256_set_epu32(UINT32_C(1036418017), UINT32_C(1863476858), UINT32_C(1083883567), UINT32_C(3943520867), UINT32_C( 70129580), UINT32_C(1560905984), UINT32_C(2897296070), UINT32_C(2575520666)), simde_x_mm256_set_epu32(UINT32_C(2683682856), UINT32_C( 255193154), UINT32_C( 693351345), UINT32_C(1449340919), UINT32_C(2448587623), UINT32_C(2723008994), UINT32_C(2741828276), UINT32_C(3254255853)), simde_x_mm256_set_epu32(UINT32_C(2683682856), UINT32_C(1863476858), UINT32_C(1083883567), UINT32_C(3943520867), UINT32_C(2448587623), UINT32_C(2723008994), UINT32_C(2897296070), UINT32_C(3254255853)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_max_epu32(test_vec[i].a, test_vec[i].b); simde_assert_m256i_u32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_blend_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( -776446699), INT32_C( 1736036449), INT32_C( 1358627972), INT32_C(-1414521748)), simde_mm_set_epi32(INT32_C( 703500470), INT32_C(-1252293011), INT32_C(-1142834240), INT32_C( 1567373213)), simde_mm_set_epi32(INT32_C( 703500470), INT32_C(-1252293011), INT32_C( 1358627972), INT32_C( 1567373213)) }, { simde_mm_set_epi32(INT32_C( 928555701), INT32_C( -221570804), INT32_C( 1766167688), INT32_C( 1538455167)), simde_mm_set_epi32(INT32_C(-2064241406), INT32_C(-1114629780), INT32_C( 844778923), INT32_C( 825239814)), simde_mm_set_epi32(INT32_C(-2064241406), INT32_C(-1114629780), INT32_C( 1766167688), INT32_C( 825239814)) }, { simde_mm_set_epi32(INT32_C( -254776787), INT32_C(-1383921861), INT32_C(-1381573747), INT32_C( 385374117)), simde_mm_set_epi32(INT32_C( 783537896), INT32_C( -226088253), INT32_C( 1138968651), INT32_C( 521443914)), simde_mm_set_epi32(INT32_C( 783537896), INT32_C( -226088253), INT32_C(-1381573747), INT32_C( 521443914)) }, { simde_mm_set_epi32(INT32_C( 484740492), INT32_C(-1346417719), INT32_C( 1029792501), INT32_C( 2033188015)), simde_mm_set_epi32(INT32_C( 465252472), INT32_C( -775119562), INT32_C( 1616675771), INT32_C(-1846026054)), simde_mm_set_epi32(INT32_C( 465252472), INT32_C( -775119562), INT32_C( 1029792501), INT32_C(-1846026054)) }, { simde_mm_set_epi32(INT32_C( 2097910720), INT32_C( 985021972), INT32_C(-1164844515), INT32_C(-1048926956)), simde_mm_set_epi32(INT32_C( 1235766570), INT32_C( 1912005813), INT32_C( -678890313), INT32_C( -799232173)), simde_mm_set_epi32(INT32_C( 1235766570), INT32_C( 1912005813), INT32_C(-1164844515), INT32_C( -799232173)) }, { simde_mm_set_epi32(INT32_C( 107952418), INT32_C(-1186034132), INT32_C(-1490121281), INT32_C(-1988190971)), simde_mm_set_epi32(INT32_C( 438665972), INT32_C(-1023182690), INT32_C(-1692406594), INT32_C( 2076129119)), simde_mm_set_epi32(INT32_C( 438665972), INT32_C(-1023182690), INT32_C(-1490121281), INT32_C( 2076129119)) }, { simde_mm_set_epi32(INT32_C( -296997195), INT32_C( 1989906045), INT32_C( 861414748), INT32_C( 802028810)), simde_mm_set_epi32(INT32_C( 2010137226), INT32_C(-1001536035), INT32_C( 526065654), INT32_C( 2146580357)), simde_mm_set_epi32(INT32_C( 2010137226), INT32_C(-1001536035), INT32_C( 861414748), INT32_C( 2146580357)) }, { simde_mm_set_epi32(INT32_C( 1671679906), INT32_C( 72341521), INT32_C( 697290912), INT32_C( 1581591761)), simde_mm_set_epi32(INT32_C( -765500720), INT32_C( 807300453), INT32_C(-1774452228), INT32_C( -386626305)), simde_mm_set_epi32(INT32_C( -765500720), INT32_C( 807300453), INT32_C( 697290912), INT32_C( -386626305)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_blend_epi32(test_vec[i].a, test_vec[i].b, 13); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_min_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi8(INT8_C(-113), INT8_C( 124), INT8_C( 15), INT8_C(-126), INT8_C( -14), INT8_C( -39), INT8_C( -2), INT8_C( -33), INT8_C( 77), INT8_C(-107), INT8_C( -95), INT8_C( -10), INT8_C( 24), INT8_C( -89), INT8_C( 80), INT8_C( 69), INT8_C( -99), INT8_C( 54), INT8_C( 5), INT8_C( 103), INT8_C( 71), INT8_C( 11), INT8_C( -41), INT8_C( 122), INT8_C( 57), INT8_C( -91), INT8_C(-106), INT8_C( 27), INT8_C( 57), INT8_C( -8), INT8_C( -10), INT8_C( 38)), simde_mm256_set_epi8(INT8_C( 58), INT8_C(-107), INT8_C( 43), INT8_C(-106), INT8_C(-127), INT8_C( 87), INT8_C( 107), INT8_C( 115), INT8_C( 91), INT8_C( 104), INT8_C( -95), INT8_C( -69), INT8_C( -37), INT8_C( 126), INT8_C( 40), INT8_C( 53), INT8_C(-103), INT8_C( 84), INT8_C( 99), INT8_C( 38), INT8_C( 126), INT8_C( -61), INT8_C( 117), INT8_C( 48), INT8_C( 65), INT8_C( 73), INT8_C( 55), INT8_C( 73), INT8_C( -94), INT8_C( -78), INT8_C( -6), INT8_C( 49)), simde_mm256_set_epi8(INT8_C(-113), INT8_C(-107), INT8_C( 15), INT8_C(-126), INT8_C(-127), INT8_C( -39), INT8_C( -2), INT8_C( -33), INT8_C( 77), INT8_C(-107), INT8_C( -95), INT8_C( -69), INT8_C( -37), INT8_C( -89), INT8_C( 40), INT8_C( 53), INT8_C(-103), INT8_C( 54), INT8_C( 5), INT8_C( 38), INT8_C( 71), INT8_C( -61), INT8_C( -41), INT8_C( 48), INT8_C( 57), INT8_C( -91), INT8_C(-106), INT8_C( 27), INT8_C( -94), INT8_C( -78), INT8_C( -10), INT8_C( 38)) }, { simde_mm256_set_epi8(INT8_C( 53), INT8_C( 21), INT8_C( 98), INT8_C( 120), INT8_C(-113), INT8_C( 39), INT8_C( -37), INT8_C( 99), INT8_C(-124), INT8_C( -15), INT8_C( -78), INT8_C( 107), INT8_C( 24), INT8_C( 124), INT8_C( -54), INT8_C( 81), INT8_C( 115), INT8_C( -24), INT8_C( 66), INT8_C( 27), INT8_C( 15), INT8_C( 61), INT8_C( 57), INT8_C(-118), INT8_C(-113), INT8_C( -12), INT8_C( -8), INT8_C( -69), INT8_C( -18), INT8_C( 21), INT8_C( 83), INT8_C( 113)), simde_mm256_set_epi8(INT8_C(-103), INT8_C(-115), INT8_C( -82), INT8_C( 99), INT8_C( 114), INT8_C( 96), INT8_C( 122), INT8_C(-112), INT8_C( 16), INT8_C( 31), INT8_C( 14), INT8_C( -56), INT8_C( -48), INT8_C( 2), INT8_C( -47), INT8_C( 23), INT8_C( 105), INT8_C( -82), INT8_C( 84), INT8_C( 120), INT8_C( 10), INT8_C( 6), INT8_C( -73), INT8_C( -69), INT8_C( -15), INT8_C( -30), INT8_C( -84), INT8_C( 4), INT8_C( 118), INT8_C( 78), INT8_C(-102), INT8_C(-121)), simde_mm256_set_epi8(INT8_C(-103), INT8_C(-115), INT8_C( -82), INT8_C( 99), INT8_C(-113), INT8_C( 39), INT8_C( -37), INT8_C(-112), INT8_C(-124), INT8_C( -15), INT8_C( -78), INT8_C( -56), INT8_C( -48), INT8_C( 2), INT8_C( -54), INT8_C( 23), INT8_C( 105), INT8_C( -82), INT8_C( 66), INT8_C( 27), INT8_C( 10), INT8_C( 6), INT8_C( -73), INT8_C(-118), INT8_C(-113), INT8_C( -30), INT8_C( -84), INT8_C( -69), INT8_C( -18), INT8_C( 21), INT8_C(-102), INT8_C(-121)) }, { simde_mm256_set_epi8(INT8_C( 99), INT8_C( 36), INT8_C( -33), INT8_C(-101), INT8_C(-112), INT8_C(-121), INT8_C( 47), INT8_C( -29), INT8_C( 104), INT8_C( 110), INT8_C( -84), INT8_C( 72), INT8_C( -54), INT8_C( -40), INT8_C( 19), INT8_C(-103), INT8_C( -69), INT8_C( -50), INT8_C( 26), INT8_C( -97), INT8_C( -51), INT8_C( -78), INT8_C( 41), INT8_C( -85), INT8_C( 36), INT8_C( 76), INT8_C( 111), INT8_C( 49), INT8_C( 97), INT8_C( -3), INT8_C( 36), INT8_C( 67)), simde_mm256_set_epi8(INT8_C( 66), INT8_C( -8), INT8_C( 56), INT8_C( 7), INT8_C( 58), INT8_C(-107), INT8_C( 110), INT8_C( -95), INT8_C( 31), INT8_C( -56), INT8_C( 42), INT8_C( 8), INT8_C( -46), INT8_C( -16), INT8_C( 21), INT8_C( 126), INT8_C( -12), INT8_C( -94), INT8_C( 109), INT8_C( 67), INT8_C( 47), INT8_C( 103), INT8_C( 21), INT8_C( 104), INT8_C( 99), INT8_C( 106), INT8_C( -91), INT8_C( 87), INT8_C( -83), INT8_C( 2), INT8_C( 105), INT8_C(-105)), simde_mm256_set_epi8(INT8_C( 66), INT8_C( -8), INT8_C( -33), INT8_C(-101), INT8_C(-112), INT8_C(-121), INT8_C( 47), INT8_C( -95), INT8_C( 31), INT8_C( -56), INT8_C( -84), INT8_C( 8), INT8_C( -54), INT8_C( -40), INT8_C( 19), INT8_C(-103), INT8_C( -69), INT8_C( -94), INT8_C( 26), INT8_C( -97), INT8_C( -51), INT8_C( -78), INT8_C( 21), INT8_C( -85), INT8_C( 36), INT8_C( 76), INT8_C( -91), INT8_C( 49), INT8_C( -83), INT8_C( -3), INT8_C( 36), INT8_C(-105)) }, { simde_mm256_set_epi8(INT8_C( -79), INT8_C( -25), INT8_C( 15), INT8_C( 2), INT8_C( -84), INT8_C( 9), INT8_C( 54), INT8_C( 99), INT8_C( -69), INT8_C( 117), INT8_C( -20), INT8_C( 47), INT8_C( 21), INT8_C( 44), INT8_C( 113), INT8_C( 59), INT8_C( 9), INT8_C( 65), INT8_C( -7), INT8_C( -76), INT8_C(-124), INT8_C( -64), INT8_C( 22), INT8_C( -79), INT8_C( -58), INT8_C( 29), INT8_C( 40), INT8_C( 64), INT8_C( -9), INT8_C(-123), INT8_C( -13), INT8_C( -32)), simde_mm256_set_epi8(INT8_C(-114), INT8_C( -7), INT8_C( 9), INT8_C( -63), INT8_C( 0), INT8_C( -82), INT8_C( -19), INT8_C( 75), INT8_C( 69), INT8_C( 48), INT8_C( 42), INT8_C( -52), INT8_C( 119), INT8_C( -20), INT8_C( 26), INT8_C( -57), INT8_C( 88), INT8_C( 87), INT8_C( 99), INT8_C(-127), INT8_C( 48), INT8_C( 19), INT8_C( 54), INT8_C( -35), INT8_C( 5), INT8_C( 23), INT8_C( -21), INT8_C( 88), INT8_C( 87), INT8_C(-109), INT8_C( -46), INT8_C(-127)), simde_mm256_set_epi8(INT8_C(-114), INT8_C( -25), INT8_C( 9), INT8_C( -63), INT8_C( -84), INT8_C( -82), INT8_C( -19), INT8_C( 75), INT8_C( -69), INT8_C( 48), INT8_C( -20), INT8_C( -52), INT8_C( 21), INT8_C( -20), INT8_C( 26), INT8_C( -57), INT8_C( 9), INT8_C( 65), INT8_C( -7), INT8_C(-127), INT8_C(-124), INT8_C( -64), INT8_C( 22), INT8_C( -79), INT8_C( -58), INT8_C( 23), INT8_C( -21), INT8_C( 64), INT8_C( -9), INT8_C(-123), INT8_C( -46), INT8_C(-127)) }, { simde_mm256_set_epi8(INT8_C( 40), INT8_C(-101), INT8_C(-116), INT8_C( -63), INT8_C( -76), INT8_C( 83), INT8_C( -41), INT8_C( -2), INT8_C( 74), INT8_C( 48), INT8_C(-104), INT8_C( -61), INT8_C( 95), INT8_C( 50), INT8_C( 86), INT8_C(-110), INT8_C( -24), INT8_C( 99), INT8_C( -15), INT8_C( 126), INT8_C( -73), INT8_C( -17), INT8_C(-122), INT8_C( -7), INT8_C( 95), INT8_C( -91), INT8_C( 66), INT8_C( -2), INT8_C( 88), INT8_C( -54), INT8_C( 117), INT8_C( -86)), simde_mm256_set_epi8(INT8_C( -45), INT8_C( -41), INT8_C( 87), INT8_C( -75), INT8_C( 104), INT8_C( -1), INT8_C( 11), INT8_C( 125), INT8_C(-123), INT8_C( 86), INT8_C( 26), INT8_C( 46), INT8_C( 6), INT8_C( 54), INT8_C( -77), INT8_C( 63), INT8_C( 79), INT8_C(-127), INT8_C(-118), INT8_C( 107), INT8_C(-122), INT8_C( -36), INT8_C( -2), INT8_C( -64), INT8_C( 65), INT8_C( -89), INT8_C( 86), INT8_C( 99), INT8_C( -49), INT8_C( 59), INT8_C( -15), INT8_C( 103)), simde_mm256_set_epi8(INT8_C( -45), INT8_C(-101), INT8_C(-116), INT8_C( -75), INT8_C( -76), INT8_C( -1), INT8_C( -41), INT8_C( -2), INT8_C(-123), INT8_C( 48), INT8_C(-104), INT8_C( -61), INT8_C( 6), INT8_C( 50), INT8_C( -77), INT8_C(-110), INT8_C( -24), INT8_C(-127), INT8_C(-118), INT8_C( 107), INT8_C(-122), INT8_C( -36), INT8_C(-122), INT8_C( -64), INT8_C( 65), INT8_C( -91), INT8_C( 66), INT8_C( -2), INT8_C( -49), INT8_C( -54), INT8_C( -15), INT8_C( -86)) }, { simde_mm256_set_epi8(INT8_C( -6), INT8_C( 22), INT8_C( -98), INT8_C(-111), INT8_C( -27), INT8_C( -45), INT8_C( -60), INT8_C( -64), INT8_C( -14), INT8_C( -15), INT8_C( -20), INT8_C( 52), INT8_C(-110), INT8_C( 52), INT8_C( 120), INT8_C( 119), INT8_C( -35), INT8_C( 3), INT8_C( 42), INT8_C( 109), INT8_C( 16), INT8_C( -18), INT8_C( -49), INT8_C( 102), INT8_C( 87), INT8_C( -69), INT8_C( 10), INT8_C( 0), INT8_C( 1), INT8_C( 21), INT8_C( 6), INT8_C( -56)), simde_mm256_set_epi8(INT8_C( -60), INT8_C( 58), INT8_C( 68), INT8_C(-110), INT8_C( 25), INT8_C( 53), INT8_C(-112), INT8_C( 1), INT8_C( 104), INT8_C( 22), INT8_C( -20), INT8_C(-119), INT8_C( -66), INT8_C( -43), INT8_C( 109), INT8_C( -61), INT8_C( -86), INT8_C( -59), INT8_C( -31), INT8_C( -86), INT8_C( -11), INT8_C( 72), INT8_C( 122), INT8_C(-127), INT8_C( 21), INT8_C( 32), INT8_C( 109), INT8_C( 110), INT8_C( 112), INT8_C( -23), INT8_C( 44), INT8_C( 69)), simde_mm256_set_epi8(INT8_C( -60), INT8_C( 22), INT8_C( -98), INT8_C(-111), INT8_C( -27), INT8_C( -45), INT8_C(-112), INT8_C( -64), INT8_C( -14), INT8_C( -15), INT8_C( -20), INT8_C(-119), INT8_C(-110), INT8_C( -43), INT8_C( 109), INT8_C( -61), INT8_C( -86), INT8_C( -59), INT8_C( -31), INT8_C( -86), INT8_C( -11), INT8_C( -18), INT8_C( -49), INT8_C(-127), INT8_C( 21), INT8_C( -69), INT8_C( 10), INT8_C( 0), INT8_C( 1), INT8_C( -23), INT8_C( 6), INT8_C( -56)) }, { simde_mm256_set_epi8(INT8_C( -81), INT8_C( 40), INT8_C( 34), INT8_C(-101), INT8_C( 85), INT8_C( 102), INT8_C( -21), INT8_C( 6), INT8_C( 23), INT8_C( -23), INT8_C( -36), INT8_C( 44), INT8_C( -12), INT8_C( 110), INT8_C( -68), INT8_C( 47), INT8_C( 45), INT8_C( -59), INT8_C(-112), INT8_C( 86), INT8_C( -95), INT8_C( -9), INT8_C(-101), INT8_C( 83), INT8_C( 37), INT8_C( 85), INT8_C( 31), INT8_C( -4), INT8_C( 98), INT8_C( -87), INT8_C( -65), INT8_C( -71)), simde_mm256_set_epi8(INT8_C( -58), INT8_C(-110), INT8_C( 4), INT8_C( 10), INT8_C( -39), INT8_C( -8), INT8_C( 126), INT8_C( 76), INT8_C( 85), INT8_C( -11), INT8_C(-106), INT8_C( 42), INT8_C( 125), INT8_C( 46), INT8_C( 63), INT8_C( 35), INT8_C( -24), INT8_C( -99), INT8_C( 71), INT8_C(-108), INT8_C( 52), INT8_C(-105), INT8_C( -88), INT8_C( -28), INT8_C( 12), INT8_C( -41), INT8_C( 81), INT8_C( 38), INT8_C( -13), INT8_C( 29), INT8_C( -44), INT8_C( 59)), simde_mm256_set_epi8(INT8_C( -81), INT8_C(-110), INT8_C( 4), INT8_C(-101), INT8_C( -39), INT8_C( -8), INT8_C( -21), INT8_C( 6), INT8_C( 23), INT8_C( -23), INT8_C(-106), INT8_C( 42), INT8_C( -12), INT8_C( 46), INT8_C( -68), INT8_C( 35), INT8_C( -24), INT8_C( -99), INT8_C(-112), INT8_C(-108), INT8_C( -95), INT8_C(-105), INT8_C(-101), INT8_C( -28), INT8_C( 12), INT8_C( -41), INT8_C( 31), INT8_C( -4), INT8_C( -13), INT8_C( -87), INT8_C( -65), INT8_C( -71)) }, { simde_mm256_set_epi8(INT8_C( -36), INT8_C( -94), INT8_C( -61), INT8_C( 117), INT8_C( 0), INT8_C( -1), INT8_C( 118), INT8_C( 99), INT8_C( -54), INT8_C(-123), INT8_C( 18), INT8_C( 111), INT8_C( 8), INT8_C( -59), INT8_C( 107), INT8_C( 77), INT8_C( 7), INT8_C( 91), INT8_C( 25), INT8_C( 70), INT8_C( 80), INT8_C( 81), INT8_C( 11), INT8_C( 29), INT8_C( 109), INT8_C( 125), INT8_C( -51), INT8_C( 58), INT8_C( 66), INT8_C( 126), INT8_C(-122), INT8_C( 96)), simde_mm256_set_epi8(INT8_C( -91), INT8_C( -28), INT8_C( 59), INT8_C( 45), INT8_C( 99), INT8_C( 67), INT8_C( -54), INT8_C( 122), INT8_C( -4), INT8_C( -6), INT8_C( -77), INT8_C( 96), INT8_C( 16), INT8_C( 37), INT8_C(-127), INT8_C( 76), INT8_C( 56), INT8_C( -93), INT8_C( 7), INT8_C( -50), INT8_C( 56), INT8_C(-112), INT8_C( 6), INT8_C( -49), INT8_C( 107), INT8_C( -15), INT8_C( -42), INT8_C(-121), INT8_C( -81), INT8_C( 97), INT8_C( -56), INT8_C( 97)), simde_mm256_set_epi8(INT8_C( -91), INT8_C( -94), INT8_C( -61), INT8_C( 45), INT8_C( 0), INT8_C( -1), INT8_C( -54), INT8_C( 99), INT8_C( -54), INT8_C(-123), INT8_C( -77), INT8_C( 96), INT8_C( 8), INT8_C( -59), INT8_C(-127), INT8_C( 76), INT8_C( 7), INT8_C( -93), INT8_C( 7), INT8_C( -50), INT8_C( 56), INT8_C(-112), INT8_C( 6), INT8_C( -49), INT8_C( 107), INT8_C( -15), INT8_C( -51), INT8_C(-121), INT8_C( -81), INT8_C( 97), INT8_C(-122), INT8_C( 96)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_min_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_min_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi16(INT16_C(-28804), INT16_C( 3970), INT16_C( -3367), INT16_C( -289), INT16_C( 19861), INT16_C(-24074), INT16_C( 6311), INT16_C( 20549), INT16_C(-25290), INT16_C( 1383), INT16_C( 18187), INT16_C(-10374), INT16_C( 14757), INT16_C(-27109), INT16_C( 14840), INT16_C( -2522)), simde_mm256_set_epi16(INT16_C( 14997), INT16_C( 11158), INT16_C(-32425), INT16_C( 27507), INT16_C( 23400), INT16_C(-24133), INT16_C( -9346), INT16_C( 10293), INT16_C(-26284), INT16_C( 25382), INT16_C( 32451), INT16_C( 30000), INT16_C( 16713), INT16_C( 14153), INT16_C(-23886), INT16_C( -1487)), simde_mm256_set_epi16(INT16_C(-28804), INT16_C( 3970), INT16_C(-32425), INT16_C( -289), INT16_C( 19861), INT16_C(-24133), INT16_C( -9346), INT16_C( 10293), INT16_C(-26284), INT16_C( 1383), INT16_C( 18187), INT16_C(-10374), INT16_C( 14757), INT16_C(-27109), INT16_C(-23886), INT16_C( -2522)) }, { simde_mm256_set_epi16(INT16_C( 13589), INT16_C( 25208), INT16_C(-28889), INT16_C( -9373), INT16_C(-31503), INT16_C(-19861), INT16_C( 6268), INT16_C(-13743), INT16_C( 29672), INT16_C( 16923), INT16_C( 3901), INT16_C( 14730), INT16_C(-28684), INT16_C( -1861), INT16_C( -4587), INT16_C( 21361)), simde_mm256_set_epi16(INT16_C(-26227), INT16_C(-20893), INT16_C( 29280), INT16_C( 31376), INT16_C( 4127), INT16_C( 3784), INT16_C(-12286), INT16_C(-12009), INT16_C( 27054), INT16_C( 21624), INT16_C( 2566), INT16_C(-18501), INT16_C( -3614), INT16_C(-21500), INT16_C( 30286), INT16_C(-25977)), simde_mm256_set_epi16(INT16_C(-26227), INT16_C(-20893), INT16_C(-28889), INT16_C( -9373), INT16_C(-31503), INT16_C(-19861), INT16_C(-12286), INT16_C(-13743), INT16_C( 27054), INT16_C( 16923), INT16_C( 2566), INT16_C(-18501), INT16_C(-28684), INT16_C(-21500), INT16_C( -4587), INT16_C(-25977)) }, { simde_mm256_set_epi16(INT16_C( 25380), INT16_C( -8293), INT16_C(-28537), INT16_C( 12259), INT16_C( 26734), INT16_C(-21432), INT16_C(-13608), INT16_C( 5017), INT16_C(-17458), INT16_C( 6815), INT16_C(-12878), INT16_C( 10667), INT16_C( 9292), INT16_C( 28465), INT16_C( 25085), INT16_C( 9283)), simde_mm256_set_epi16(INT16_C( 17144), INT16_C( 14343), INT16_C( 14997), INT16_C( 28321), INT16_C( 8136), INT16_C( 10760), INT16_C(-11536), INT16_C( 5502), INT16_C( -2910), INT16_C( 27971), INT16_C( 12135), INT16_C( 5480), INT16_C( 25450), INT16_C(-23209), INT16_C(-21246), INT16_C( 27031)), simde_mm256_set_epi16(INT16_C( 17144), INT16_C( -8293), INT16_C(-28537), INT16_C( 12259), INT16_C( 8136), INT16_C(-21432), INT16_C(-13608), INT16_C( 5017), INT16_C(-17458), INT16_C( 6815), INT16_C(-12878), INT16_C( 5480), INT16_C( 9292), INT16_C(-23209), INT16_C(-21246), INT16_C( 9283)) }, { simde_mm256_set_epi16(INT16_C(-19993), INT16_C( 3842), INT16_C(-21495), INT16_C( 13923), INT16_C(-17547), INT16_C( -5073), INT16_C( 5420), INT16_C( 28987), INT16_C( 2369), INT16_C( -1612), INT16_C(-31552), INT16_C( 5809), INT16_C(-14819), INT16_C( 10304), INT16_C( -2171), INT16_C( -3104)), simde_mm256_set_epi16(INT16_C(-28935), INT16_C( 2497), INT16_C( 174), INT16_C( -4789), INT16_C( 17712), INT16_C( 10956), INT16_C( 30700), INT16_C( 6855), INT16_C( 22615), INT16_C( 25473), INT16_C( 12307), INT16_C( 14045), INT16_C( 1303), INT16_C( -5288), INT16_C( 22419), INT16_C(-11647)), simde_mm256_set_epi16(INT16_C(-28935), INT16_C( 2497), INT16_C(-21495), INT16_C( -4789), INT16_C(-17547), INT16_C( -5073), INT16_C( 5420), INT16_C( 6855), INT16_C( 2369), INT16_C( -1612), INT16_C(-31552), INT16_C( 5809), INT16_C(-14819), INT16_C( -5288), INT16_C( -2171), INT16_C(-11647)) }, { simde_mm256_set_epi16(INT16_C( 10395), INT16_C(-29503), INT16_C(-19373), INT16_C(-10242), INT16_C( 18992), INT16_C(-26429), INT16_C( 24370), INT16_C( 22162), INT16_C( -6045), INT16_C( -3714), INT16_C(-18449), INT16_C(-30983), INT16_C( 24485), INT16_C( 17150), INT16_C( 22730), INT16_C( 30122)), simde_mm256_set_epi16(INT16_C(-11305), INT16_C( 22453), INT16_C( 26879), INT16_C( 2941), INT16_C(-31402), INT16_C( 6702), INT16_C( 1590), INT16_C(-19649), INT16_C( 20353), INT16_C(-30101), INT16_C(-31012), INT16_C( -320), INT16_C( 16807), INT16_C( 22115), INT16_C(-12485), INT16_C( -3737)), simde_mm256_set_epi16(INT16_C(-11305), INT16_C(-29503), INT16_C(-19373), INT16_C(-10242), INT16_C(-31402), INT16_C(-26429), INT16_C( 1590), INT16_C(-19649), INT16_C( -6045), INT16_C(-30101), INT16_C(-31012), INT16_C(-30983), INT16_C( 16807), INT16_C( 17150), INT16_C(-12485), INT16_C( -3737)) }, { simde_mm256_set_epi16(INT16_C( -1514), INT16_C(-24943), INT16_C( -6701), INT16_C(-15168), INT16_C( -3343), INT16_C( -5068), INT16_C(-28108), INT16_C( 30839), INT16_C( -8957), INT16_C( 10861), INT16_C( 4334), INT16_C(-12442), INT16_C( 22459), INT16_C( 2560), INT16_C( 277), INT16_C( 1736)), simde_mm256_set_epi16(INT16_C(-15302), INT16_C( 17554), INT16_C( 6453), INT16_C(-28671), INT16_C( 26646), INT16_C( -4983), INT16_C(-16683), INT16_C( 28099), INT16_C(-21819), INT16_C( -7766), INT16_C( -2744), INT16_C( 31361), INT16_C( 5408), INT16_C( 28014), INT16_C( 28905), INT16_C( 11333)), simde_mm256_set_epi16(INT16_C(-15302), INT16_C(-24943), INT16_C( -6701), INT16_C(-28671), INT16_C( -3343), INT16_C( -5068), INT16_C(-28108), INT16_C( 28099), INT16_C(-21819), INT16_C( -7766), INT16_C( -2744), INT16_C(-12442), INT16_C( 5408), INT16_C( 2560), INT16_C( 277), INT16_C( 1736)) }, { simde_mm256_set_epi16(INT16_C(-20696), INT16_C( 8859), INT16_C( 21862), INT16_C( -5370), INT16_C( 6121), INT16_C( -9172), INT16_C( -2962), INT16_C(-17361), INT16_C( 11717), INT16_C(-28586), INT16_C(-24073), INT16_C(-25773), INT16_C( 9557), INT16_C( 8188), INT16_C( 25257), INT16_C(-16455)), simde_mm256_set_epi16(INT16_C(-14702), INT16_C( 1034), INT16_C( -9736), INT16_C( 32332), INT16_C( 22005), INT16_C(-27094), INT16_C( 32046), INT16_C( 16163), INT16_C( -5987), INT16_C( 18324), INT16_C( 13463), INT16_C(-22300), INT16_C( 3287), INT16_C( 20774), INT16_C( -3299), INT16_C(-11205)), simde_mm256_set_epi16(INT16_C(-20696), INT16_C( 1034), INT16_C( -9736), INT16_C( -5370), INT16_C( 6121), INT16_C(-27094), INT16_C( -2962), INT16_C(-17361), INT16_C( -5987), INT16_C(-28586), INT16_C(-24073), INT16_C(-25773), INT16_C( 3287), INT16_C( 8188), INT16_C( -3299), INT16_C(-16455)) }, { simde_mm256_set_epi16(INT16_C( -9054), INT16_C(-15499), INT16_C( 255), INT16_C( 30307), INT16_C(-13691), INT16_C( 4719), INT16_C( 2245), INT16_C( 27469), INT16_C( 1883), INT16_C( 6470), INT16_C( 20561), INT16_C( 2845), INT16_C( 28029), INT16_C(-12998), INT16_C( 17022), INT16_C(-31136)), simde_mm256_set_epi16(INT16_C(-23068), INT16_C( 15149), INT16_C( 25411), INT16_C(-13702), INT16_C( -774), INT16_C(-19616), INT16_C( 4133), INT16_C(-32436), INT16_C( 14499), INT16_C( 1998), INT16_C( 14480), INT16_C( 1743), INT16_C( 27633), INT16_C(-10617), INT16_C(-20639), INT16_C(-14239)), simde_mm256_set_epi16(INT16_C(-23068), INT16_C(-15499), INT16_C( 255), INT16_C(-13702), INT16_C(-13691), INT16_C(-19616), INT16_C( 2245), INT16_C(-32436), INT16_C( 1883), INT16_C( 1998), INT16_C( 14480), INT16_C( 1743), INT16_C( 27633), INT16_C(-12998), INT16_C(-20639), INT16_C(-31136)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_min_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_min_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C(-1887694974), INT32_C( -220594465), INT32_C( 1301651958), INT32_C( 413618245), INT32_C(-1657404057), INT32_C( 1191958394), INT32_C( 967153179), INT32_C( 972617254)), simde_mm256_set_epi32(INT32_C( 982854550), INT32_C(-2124977293), INT32_C( 1533583803), INT32_C( -612489163), INT32_C(-1722522842), INT32_C( 2126738736), INT32_C( 1095317321), INT32_C(-1565328847)), simde_mm256_set_epi32(INT32_C(-1887694974), INT32_C(-2124977293), INT32_C( 1301651958), INT32_C( -612489163), INT32_C(-1722522842), INT32_C( 1191958394), INT32_C( 967153179), INT32_C(-1565328847)) }, { simde_mm256_set_epi32(INT32_C( 890593912), INT32_C(-1893213341), INT32_C(-2064534933), INT32_C( 410831441), INT32_C( 1944601115), INT32_C( 255670666), INT32_C(-1879770949), INT32_C( -300592271)), simde_mm256_set_epi32(INT32_C(-1718768029), INT32_C( 1918925456), INT32_C( 270470856), INT32_C( -805121769), INT32_C( 1773032568), INT32_C( 168212411), INT32_C( -236803068), INT32_C( 1984862855)), simde_mm256_set_epi32(INT32_C(-1718768029), INT32_C(-1893213341), INT32_C(-2064534933), INT32_C( -805121769), INT32_C( 1773032568), INT32_C( 168212411), INT32_C(-1879770949), INT32_C( -300592271)) }, { simde_mm256_set_epi32(INT32_C( 1663360923), INT32_C(-1870188573), INT32_C( 1752083528), INT32_C( -891808871), INT32_C(-1144120673), INT32_C( -843961941), INT32_C( 608988977), INT32_C( 1643979843)), simde_mm256_set_epi32(INT32_C( 1123563527), INT32_C( 982871713), INT32_C( 533211656), INT32_C( -756017794), INT32_C( -190681789), INT32_C( 795284840), INT32_C( 1667933527), INT32_C(-1392350825)), simde_mm256_set_epi32(INT32_C( 1123563527), INT32_C(-1870188573), INT32_C( 533211656), INT32_C( -891808871), INT32_C(-1144120673), INT32_C( -843961941), INT32_C( 608988977), INT32_C(-1392350825)) }, { simde_mm256_set_epi32(INT32_C(-1310257406), INT32_C(-1408682397), INT32_C(-1149899729), INT32_C( 355234107), INT32_C( 155318708), INT32_C(-2067786063), INT32_C( -971167680), INT32_C( -142216224)), simde_mm256_set_epi32(INT32_C(-1896281663), INT32_C( 11464011), INT32_C( 1160784588), INT32_C( 2011962055), INT32_C( 1482122113), INT32_C( 806565597), INT32_C( 85453656), INT32_C( 1469305473)), simde_mm256_set_epi32(INT32_C(-1896281663), INT32_C(-1408682397), INT32_C(-1149899729), INT32_C( 355234107), INT32_C( 155318708), INT32_C(-2067786063), INT32_C( -971167680), INT32_C( -142216224)) }, { simde_mm256_set_epi32(INT32_C( 681282753), INT32_C(-1269573634), INT32_C( 1244698819), INT32_C( 1597134482), INT32_C( -396103298), INT32_C(-1209039111), INT32_C( 1604666110), INT32_C( 1489663402)), simde_mm256_set_epi32(INT32_C( -740862027), INT32_C( 1761545085), INT32_C(-2057954770), INT32_C( 104248127), INT32_C( 1333889643), INT32_C(-2032337216), INT32_C( 1101485667), INT32_C( -818155161)), simde_mm256_set_epi32(INT32_C( -740862027), INT32_C(-1269573634), INT32_C(-2057954770), INT32_C( 104248127), INT32_C( -396103298), INT32_C(-2032337216), INT32_C( 1101485667), INT32_C( -818155161)) }, { simde_mm256_set_epi32(INT32_C( -99180911), INT32_C( -439106368), INT32_C( -219026380), INT32_C(-1842055049), INT32_C( -586995091), INT32_C( 284086118), INT32_C( 1471875584), INT32_C( 18155208)), simde_mm256_set_epi32(INT32_C(-1002814318), INT32_C( 422940673), INT32_C( 1746332809), INT32_C(-1093308989), INT32_C(-1429872214), INT32_C( -179799423), INT32_C( 354446702), INT32_C( 1894329413)), simde_mm256_set_epi32(INT32_C(-1002814318), INT32_C( -439106368), INT32_C( -219026380), INT32_C(-1842055049), INT32_C(-1429872214), INT32_C( -179799423), INT32_C( 354446702), INT32_C( 18155208)) }, { simde_mm256_set_epi32(INT32_C(-1356324197), INT32_C( 1432808198), INT32_C( 401202220), INT32_C( -194069457), INT32_C( 767922262), INT32_C(-1577608365), INT32_C( 626335740), INT32_C( 1655291833)), simde_mm256_set_epi32(INT32_C( -963509238), INT32_C( -638026164), INT32_C( 1442158122), INT32_C( 2100182819), INT32_C( -392345708), INT32_C( 882354404), INT32_C( 215437606), INT32_C( -216148933)), simde_mm256_set_epi32(INT32_C(-1356324197), INT32_C( -638026164), INT32_C( 401202220), INT32_C( -194069457), INT32_C( -392345708), INT32_C(-1577608365), INT32_C( 215437606), INT32_C( -216148933)) }, { simde_mm256_set_epi32(INT32_C( -593312907), INT32_C( 16741987), INT32_C( -897248657), INT32_C( 147155789), INT32_C( 123410758), INT32_C( 1347488541), INT32_C( 1836961082), INT32_C( 1115588192)), simde_mm256_set_epi32(INT32_C(-1511769299), INT32_C( 1665387130), INT32_C( -50678944), INT32_C( 270893388), INT32_C( 950208462), INT32_C( 948963023), INT32_C( 1811011207), INT32_C(-1352546207)), simde_mm256_set_epi32(INT32_C(-1511769299), INT32_C( 16741987), INT32_C( -897248657), INT32_C( 147155789), INT32_C( 123410758), INT32_C( 948963023), INT32_C( 1811011207), INT32_C(-1352546207)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_min_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm256_abs_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_abs_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_abs_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_add_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_add_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_add_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_add_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_adds_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_adds_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_adds_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_adds_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_alignr_epi8_case0) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_alignr_epi8_case1) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_alignr_epi8_case2) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_alignr_epi8_case3) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_and_si256) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_andnot_si256) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_avg_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_avg_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_blend_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_blend_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_blend_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_blendv_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_broadcastb_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_broadcastb_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_broadcastw_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_broadcastw_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_broadcastd_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_broadcastd_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_broadcastq_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_broadcastq_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_broadcastss_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_broadcastss_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_broadcastsd_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_broadcastsd_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_broadcastsi128_si256) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_bslli_epi128) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_bsrli_epi128) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cmpeq_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cmpeq_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cmpeq_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cmpeq_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cmpgt_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cmpgt_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cmpgt_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cmpgt_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cvtepi8_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cvtepi8_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cvtepi8_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cvtepi16_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cvtepi16_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cvtepi32_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cvtepu8_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cvtepu8_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cvtepu8_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cvtepu16_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cvtepu16_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cvtepu32_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_extract_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_extract_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_extracti128_si256) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_hadd_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_hadd_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_hadds_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_hsub_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_hsub_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_hsubs_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_i32gather_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_i32gather_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_i32gather_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_i32gather_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_i64gather_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_i64gather_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_i64gather_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_i64gather_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_i32gather_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_i32gather_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_i32gather_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_i32gather_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_i64gather_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_i64gather_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_i64gather_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_i64gather_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_i32gather_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_i32gather_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_i32gather_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_i32gather_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_i64gather_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_i64gather_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_i64gather_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_i64gather_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_i32gather_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_i32gather_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_i32gather_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_i32gather_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_i64gather_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_i64gather_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_i64gather_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_i64gather_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_inserti128_si256) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_madd_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maddubs_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskload_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskload_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskload_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskload_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskstore_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskstore_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskstore_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskstore_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_max_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_max_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_max_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_max_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_max_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_max_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_min_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_min_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_min_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_min_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_min_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_min_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_movemask_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mpsadbw_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mul_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mul_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mulhi_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mulhi_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mulhrs_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mullo_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mullo_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm256_mullo_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_or_si256) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_packs_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_packs_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_packus_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_packus_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_permute4x64_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_permute4x64_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_permute2x128_si256) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_permutevar8x32_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_permutevar8x32_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_sad_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_shuffle_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_shuffle_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_shufflehi_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_shufflelo_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_sign_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_sign_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_sign_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_sll_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_sll_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_sll_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_slli_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_slli_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_slli_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_slli_si256) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sllv_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_sllv_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sllv_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_sllv_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_sra_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_sra_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_srai_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_srai_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_srav_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_srav_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_srl_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_srl_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_srl_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_srli_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_srli_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_srli_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_srli_si256) SIMDE_TEST_FUNC_LIST_ENTRY(mm_srlv_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_srlv_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_srlv_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_srlv_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_stream_load_si256) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_sub_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_sub_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_sub_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_sub_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm256_sub_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_subs_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_subs_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_subs_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_subs_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_unpacklo_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_unpacklo_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_unpacklo_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_unpacklo_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_unpackhi_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_unpackhi_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_unpackhi_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_unpackhi_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_xor_si256) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/000077500000000000000000000000001400333146700152545ustar00rootroot00000000000000simde-0.7.2/test/x86/avx512/2intersect.c000066400000000000000000000637561400333146700175230ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #define SIMDE_TEST_X86_AVX512_INSN 2intersect #include #include static int test_simde_mm_2intersect_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[4]; const int32_t b[4]; const simde__mmask8 k1; const simde__mmask8 k2; } test_vec[] = { { { INT32_C( 1933024678), INT32_C( 1654182574), INT32_C( 2112578681), INT32_C( 1940886531) }, { -INT32_C( 1741466723), INT32_C( 1850577758), INT32_C( 1878396937), INT32_C( 1834180551) }, UINT8_C( 0), UINT8_C( 0) }, { { -INT32_C( 1411347460), -INT32_C( 686982819), -INT32_C( 531302179), INT32_C( 1411347460) }, { -INT32_C( 1245608361), INT32_C( 1411347460), -INT32_C( 426632674), INT32_C( 1968432505) }, UINT8_C( 8), UINT8_C( 2) }, { { INT32_C( 1560976195), INT32_C( 1560976195), INT32_C( 1560976195), INT32_C( 1560976195) }, { INT32_C( 1560976195), -INT32_C( 931123542), -INT32_C( 1045556664), INT32_C( 1748370168) }, UINT8_C( 15), UINT8_C( 1) }, { { -INT32_C( 482977994), -INT32_C( 1435640956), -INT32_C( 765732907), -INT32_C( 685295980) }, { INT32_C( 2067083985), -INT32_C( 482977994), -INT32_C( 482977994), INT32_C( 707305460) }, UINT8_C( 1), UINT8_C( 6) }, { { INT32_C( 2064474102), INT32_C( 254114362), -INT32_C( 438140847), -INT32_C( 2000877130) }, { -INT32_C( 1694240197), INT32_C( 2064474102), INT32_C( 2064474102), INT32_C( 1113151564) }, UINT8_C( 1), UINT8_C( 6) }, { { -INT32_C( 11203149), INT32_C( 838984417), INT32_C( 421061475), -INT32_C( 11203149) }, { INT32_C( 1858315719), INT32_C( 652085228), -INT32_C( 11203149), INT32_C( 474132069) }, UINT8_C( 9), UINT8_C( 4) }, { { -INT32_C( 183566572), INT32_C( 1160253410), -INT32_C( 1857369435), -INT32_C( 603586539) }, { -INT32_C( 1857369435), -INT32_C( 1857369435), -INT32_C( 1685844682), -INT32_C( 1857369435) }, UINT8_C( 4), UINT8_C( 11) }, { { -INT32_C( 1480210747), -INT32_C( 940773931), INT32_C( 1118128941), -INT32_C( 266425269) }, { INT32_C( 1199663222), -INT32_C( 266425269), -INT32_C( 266425269), -INT32_C( 1480210747) }, UINT8_C( 9), UINT8_C( 14) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi32(test_vec[i].b); simde__mmask8 k1, k2; simde_mm_2intersect_epi32(a, b, &k1, &k2); simde_assert_equal_u8(k1, test_vec[i].k1); simde_assert_equal_u8(k2, test_vec[i].k2); } return 0; } static int test_simde_mm_2intersect_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t a[2]; const int64_t b[2]; const simde__mmask8 k1; const simde__mmask8 k2; } test_vec[] = { { { -INT64_C( 4776335177296002651), INT64_C( 8875469304536200269) }, { INT64_C( 862643719312783235), INT64_C( 2147708466904836265) }, UINT8_C( 0), UINT8_C( 0) }, { { INT64_C( 4045976202395705330), INT64_C( 4045976202395705330) }, { INT64_C( 4045976202395705330), INT64_C( 4045976202395705330) }, UINT8_C( 3), UINT8_C( 3) }, { { -INT64_C( 2397572822580459626), -INT64_C( 6170985749621319162) }, { -INT64_C( 2397572822580459626), -INT64_C( 785892022578403345) }, UINT8_C( 1), UINT8_C( 1) }, { { INT64_C( 3391546047396777466), INT64_C( 3593163477413257996) }, { INT64_C( 3593163477413257996), -INT64_C( 541550164919252183) }, UINT8_C( 2), UINT8_C( 1) }, { { INT64_C( 3561287535980966675), -INT64_C( 5515149198495544545) }, { INT64_C( 1587772868994464785), INT64_C( 1587772868994464785) }, UINT8_C( 0), UINT8_C( 0) }, { { INT64_C( 8128785740146199590), INT64_C( 8128785740146199590) }, { INT64_C( 9088189746117568130), INT64_C( 8128785740146199590) }, UINT8_C( 3), UINT8_C( 2) }, { { INT64_C( 3587253327639487946), INT64_C( 7488792276366498831) }, { INT64_C( 3587253327639487946), -INT64_C( 4033080124360595636) }, UINT8_C( 1), UINT8_C( 1) }, { { -INT64_C( 2532488855789940029), -INT64_C( 3119956717484221082) }, { INT64_C( 7328211024378615371), -INT64_C( 3119956717484221082) }, UINT8_C( 2), UINT8_C( 2) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b); simde__mmask8 k1, k2; simde_mm_2intersect_epi64(a, b, &k1, &k2); simde_assert_equal_u8(k1, test_vec[i].k1); simde_assert_equal_u8(k2, test_vec[i].k2); } return 0; } static int test_simde_mm256_2intersect_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[8]; const int32_t b[8]; const simde__mmask8 k1; const simde__mmask8 k2; } test_vec[] = { { { INT32_C( 1217017979), INT32_C( 1747215362), -INT32_C( 1498093887), -INT32_C( 528783467), -INT32_C( 696013891), INT32_C( 624637876), -INT32_C( 728474186), INT32_C( 28691334) }, { INT32_C( 105463556), INT32_C( 1685024163), -INT32_C( 385211564), INT32_C( 1204454794), -INT32_C( 450998992), INT32_C( 990534020), INT32_C( 1007656886), INT32_C( 1849541738) }, UINT8_C( 0), UINT8_C( 0) }, { { -INT32_C( 1502247421), INT32_C( 1208673524), -INT32_C( 1859054585), -INT32_C( 891749478), -INT32_C( 844106167), INT32_C( 101235280), -INT32_C( 1019013287), -INT32_C( 567115557) }, { -INT32_C( 75192569), -INT32_C( 1502247421), INT32_C( 1025733795), -INT32_C( 1502247421), INT32_C( 1116125170), -INT32_C( 901214351), -INT32_C( 2104652890), INT32_C( 325107468) }, UINT8_C( 1), UINT8_C( 10) }, { { -INT32_C( 250682010), INT32_C( 377704819), INT32_C( 894674629), -INT32_C( 1796318302), -INT32_C( 2083097326), -INT32_C( 1437786620), -INT32_C( 1238574422), INT32_C( 13208986) }, { -INT32_C( 437069966), -INT32_C( 894674629), -INT32_C( 1104982244), -INT32_C( 1135472214), -INT32_C( 1975572346), -INT32_C( 264926138), INT32_C( 894674629), INT32_C( 1610706926) }, UINT8_C( 4), UINT8_C( 64) }, { { INT32_C( 1850143558), -INT32_C( 2057551255), INT32_C( 1967141599), INT32_C( 402167184), INT32_C( 60897469), INT32_C( 1967141599), INT32_C( 640457272), INT32_C( 1351036426) }, { -INT32_C( 1967141599), -INT32_C( 1609622769), INT32_C( 719082138), INT32_C( 1967141599), -INT32_C( 777460979), -INT32_C( 235037511), INT32_C( 1075325238), INT32_C( 2039586647) }, UINT8_C( 36), UINT8_C( 8) }, { { INT32_C( 2063814764), INT32_C( 85660523), INT32_C( 1294989413), -INT32_C( 621710899), INT32_C( 229349716), INT32_C( 1828694069), INT32_C( 2104176048), INT32_C( 581057974) }, { INT32_C( 2104176048), INT32_C( 519944377), INT32_C( 2104176048), -INT32_C( 229155426), INT32_C( 754910198), INT32_C( 2107178667), -INT32_C( 878230507), INT32_C( 267209090) }, UINT8_C( 64), UINT8_C( 5) }, { { -INT32_C( 1475835153), -INT32_C( 205060797), -INT32_C( 797887950), -INT32_C( 1976319669), INT32_C( 2008203979), -INT32_C( 688633152), INT32_C( 279026574), INT32_C( 1976319669) }, { INT32_C( 1551706137), INT32_C( 1615873069), INT32_C( 1976319669), INT32_C( 1435300745), INT32_C( 1976319669), INT32_C( 374063240), INT32_C( 1093135708), -INT32_C( 1793767556) }, UINT8_C(128), UINT8_C( 20) }, { { -INT32_C( 1992980813), INT32_C( 1308377814), -INT32_C( 1992980813), -INT32_C( 706617568), -INT32_C( 1992980813), -INT32_C( 1992980813), -INT32_C( 15998333), -INT32_C( 1992980813) }, { -INT32_C( 1992980813), -INT32_C( 875155000), INT32_C( 2119706974), INT32_C( 626211080), INT32_C( 1439342311), -INT32_C( 1992980813), -INT32_C( 1833555352), -INT32_C( 1992980813) }, UINT8_C(181), UINT8_C(161) }, { { -INT32_C( 1719181359), -INT32_C( 278569583), -INT32_C( 1888567929), INT32_C( 464865930), -INT32_C( 1825407137), INT32_C( 1850364165), -INT32_C( 973078405), INT32_C( 482552395) }, { INT32_C( 464865930), INT32_C( 464865930), INT32_C( 464865930), -INT32_C( 1716817094), -INT32_C( 131211533), -INT32_C( 1419348177), -INT32_C( 1015978376), INT32_C( 467612817) }, UINT8_C( 8), UINT8_C( 7) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi32(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi32(test_vec[i].b); simde__mmask8 k1, k2; simde_mm256_2intersect_epi32(a, b, &k1, &k2); simde_assert_equal_u8(k1, test_vec[i].k1); simde_assert_equal_u8(k2, test_vec[i].k2); } return 0; } static int test_simde_mm256_2intersect_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t a[4]; const int64_t b[4]; const simde__mmask8 k1; const simde__mmask8 k2; } test_vec[] = { { { -INT64_C( 8766748503608748929), INT64_C( 8902491151602554041), -INT64_C( 6216979381371029026), -INT64_C( 2455504826211227328) }, { INT64_C( 2023622991203803871), INT64_C( 5148066728463264110), -INT64_C( 58813153888836267), -INT64_C( 4219101102839063753) }, UINT8_C( 0), UINT8_C( 0) }, { { INT64_C( 4494427296342767328), INT64_C( 4494427296342767328), INT64_C( 2110521255196416139), -INT64_C( 5162773510200616018) }, { -INT64_C( 1609937810159325542), INT64_C( 4494427296342767328), -INT64_C( 2917026165189634687), -INT64_C( 841956137440168257) }, UINT8_C( 3), UINT8_C( 2) }, { { INT64_C( 7958994853337910131), -INT64_C( 4289554115039553052), INT64_C( 3856682679687434886), INT64_C( 3041368692472213990) }, { INT64_C( 9186261986360726632), -INT64_C( 5419976400773268030), INT64_C( 3856682679687434886), INT64_C( 3366724148763797977) }, UINT8_C( 4), UINT8_C( 4) }, { { INT64_C( 4809367613008675296), INT64_C( 2978891424056842101), INT64_C( 8883542278980876825), -INT64_C( 6247402992387353768) }, { -INT64_C( 5322822373234603982), -INT64_C( 4809367613008675296), INT64_C( 4809367613008675296), -INT64_C( 8605108578678238506) }, UINT8_C( 1), UINT8_C( 4) }, { { INT64_C( 1658014014993689867), INT64_C( 8597494887961549808), INT64_C( 3557126004884492111), INT64_C( 2701845427586504351) }, { INT64_C( 3557126004884492111), INT64_C( 3557126004884492111), INT64_C( 3557126004884492111), INT64_C( 5834683804961742777) }, UINT8_C( 4), UINT8_C( 7) }, { { INT64_C( 6181203837213840955), INT64_C( 6181203837213840955), -INT64_C( 7055680582451281489), INT64_C( 5233017304722279758) }, { -INT64_C( 6244094469284677934), INT64_C( 7314164337829338752), INT64_C( 6181203837213840955), INT64_C( 1181151732418991513) }, UINT8_C( 3), UINT8_C( 4) }, { { -INT64_C( 5164758819808562559), INT64_C( 8867810427645032693), -INT64_C( 5974686722716203094), -INT64_C( 8301490860214009732) }, { -INT64_C( 7651463346696779295), -INT64_C( 7651463346696779295), INT64_C( 8942278592495508056), -INT64_C( 886080949694504164) }, UINT8_C( 0), UINT8_C( 0) }, { { INT64_C( 8701478350864771979), -INT64_C( 6167911330808003851), INT64_C( 8701478350864771979), -INT64_C( 4028468504552457453) }, { -INT64_C( 731071876734638868), -INT64_C( 6650349780674285640), INT64_C( 8701478350864771979), INT64_C( 5579763545130950761) }, UINT8_C( 5), UINT8_C( 4) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi64(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi64(test_vec[i].b); simde__mmask8 k1, k2; simde_mm256_2intersect_epi64(a, b, &k1, &k2); simde_assert_equal_u8(k1, test_vec[i].k1); simde_assert_equal_u8(k2, test_vec[i].k2); } return 0; } static int test_simde_mm512_2intersect_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[16]; const int32_t b[16]; const simde__mmask16 k1; const simde__mmask16 k2; } test_vec[] = { { { INT32_C( 1907319665), -INT32_C( 781809383), -INT32_C( 1797507720), INT32_C( 1345818988), INT32_C( 1480483164), -INT32_C( 110669168), INT32_C( 346069938), INT32_C( 1499533288), -INT32_C( 1932914574), INT32_C( 291320217), -INT32_C( 760923802), INT32_C( 656661706), -INT32_C( 830512834), INT32_C( 1724376755), INT32_C( 1786406785), -INT32_C( 373040266) }, { -INT32_C( 2055893268), INT32_C( 630706879), -INT32_C( 688374772), INT32_C( 1476205337), INT32_C( 807763068), -INT32_C( 459871134), -INT32_C( 900853676), -INT32_C( 676130325), INT32_C( 1583163551), INT32_C( 126153979), INT32_C( 1239252016), INT32_C( 329309079), -INT32_C( 1169963689), INT32_C( 111073714), -INT32_C( 707728151), -INT32_C( 1649638147) }, UINT16_C( 0), UINT16_C( 0) }, { { -INT32_C( 1476720212), INT32_C( 783187965), -INT32_C( 1837658885), -INT32_C( 1096411033), -INT32_C( 1887901219), -INT32_C( 1399515453), INT32_C( 8480258), -INT32_C( 1768083734), INT32_C( 893294647), INT32_C( 325315608), -INT32_C( 542713224), -INT32_C( 794931982), -INT32_C( 127986123), INT32_C( 799405357), INT32_C( 1160717915), -INT32_C( 1931752364) }, { INT32_C( 2109806949), INT32_C( 2123375622), -INT32_C( 245483778), -INT32_C( 1195246462), INT32_C( 1068507154), INT32_C( 1886344469), -INT32_C( 793403780), -INT32_C( 799239829), -INT32_C( 1476720212), INT32_C( 1076878657), -INT32_C( 1476720212), -INT32_C( 1476720212), INT32_C( 685506322), -INT32_C( 795325868), INT32_C( 1403080424), -INT32_C( 1994129953) }, UINT16_C( 1), UINT16_C( 3328) }, { { INT32_C( 1547333659), INT32_C( 1654417742), INT32_C( 2147012086), -INT32_C( 770029633), -INT32_C( 1694829242), INT32_C( 678138431), INT32_C( 1547333659), INT32_C( 608804617), INT32_C( 1547333659), -INT32_C( 490791444), -INT32_C( 1436436245), INT32_C( 1165786111), -INT32_C( 1327532431), -INT32_C( 371701239), INT32_C( 1621709910), INT32_C( 42267379) }, { INT32_C( 1547333659), INT32_C( 239279907), INT32_C( 1547333659), -INT32_C( 1877265121), -INT32_C( 1270744149), INT32_C( 1547333659), INT32_C( 1627146093), -INT32_C( 1369210823), -INT32_C( 1458584698), -INT32_C( 1162390814), INT32_C( 361787382), INT32_C( 1353100452), -INT32_C( 335222873), INT32_C( 1854448128), INT32_C( 583959785), -INT32_C( 1949290235) }, UINT16_C( 321), UINT16_C( 37) }, { { -INT32_C( 684400652), INT32_C( 663874353), -INT32_C( 12770982), INT32_C( 1951392717), -INT32_C( 899591222), -INT32_C( 549918218), INT32_C( 1929512814), INT32_C( 771674680), -INT32_C( 486264142), INT32_C( 1997247773), -INT32_C( 2072622922), -INT32_C( 684400652), INT32_C( 264198425), -INT32_C( 1309738941), INT32_C( 925167871), INT32_C( 1969496771) }, { INT32_C( 1901619284), -INT32_C( 684400652), -INT32_C( 700948565), INT32_C( 1036726308), -INT32_C( 850621558), -INT32_C( 684400652), -INT32_C( 289824213), INT32_C( 409149124), -INT32_C( 2071348090), -INT32_C( 684400652), -INT32_C( 173969200), INT32_C( 707947936), INT32_C( 2079817720), -INT32_C( 436374086), -INT32_C( 684400652), INT32_C( 1542666196) }, UINT16_C( 2049), UINT16_C(16930) }, { { INT32_C( 333413619), -INT32_C( 1092741138), INT32_C( 666075015), -INT32_C( 464328980), INT32_C( 526338405), -INT32_C( 704357185), -INT32_C( 407709677), INT32_C( 54699280), INT32_C( 269885730), -INT32_C( 1076956104), INT32_C( 1625784948), -INT32_C( 851101336), INT32_C( 1106027650), INT32_C( 337178625), -INT32_C( 637809975), -INT32_C( 1847837073) }, { -INT32_C( 1734216865), INT32_C( 1532457191), INT32_C( 1522286322), -INT32_C( 98107017), -INT32_C( 1506077787), -INT32_C( 860204285), -INT32_C( 1918519779), INT32_C( 1394508532), INT32_C( 1558954101), INT32_C( 582501168), -INT32_C( 109284223), INT32_C( 452174709), -INT32_C( 1178521930), -INT32_C( 1618576510), INT32_C( 623651633), INT32_C( 578374573) }, UINT16_C( 0), UINT16_C( 0) }, { { INT32_C( 998204427), INT32_C( 693974951), INT32_C( 539154859), INT32_C( 842667388), -INT32_C( 974390461), -INT32_C( 1469746825), INT32_C( 1255051677), -INT32_C( 412268580), INT32_C( 1394797739), -INT32_C( 830701789), -INT32_C( 722559400), -INT32_C( 167368525), INT32_C( 1394797739), INT32_C( 4399459), -INT32_C( 1890905678), INT32_C( 74889304) }, { -INT32_C( 950560348), INT32_C( 1394797739), INT32_C( 1394797739), -INT32_C( 820360021), -INT32_C( 1586833858), -INT32_C( 1432178953), INT32_C( 1394797739), INT32_C( 1226616741), INT32_C( 1628468040), -INT32_C( 1210931898), -INT32_C( 740616408), -INT32_C( 1583090333), -INT32_C( 1019015732), INT32_C( 1394797739), INT32_C( 2006165202), -INT32_C( 1631539370) }, UINT16_C( 4352), UINT16_C( 8262) }, { { INT32_C( 1761595426), -INT32_C( 1658859147), INT32_C( 1282472681), -INT32_C( 1142025233), INT32_C( 914111965), -INT32_C( 396891114), INT32_C( 914111965), INT32_C( 914111965), INT32_C( 1680050159), INT32_C( 1090601560), INT32_C( 747532604), -INT32_C( 1494778747), -INT32_C( 1014864211), INT32_C( 914111965), INT32_C( 1590627012), INT32_C( 404313641) }, { INT32_C( 914111965), -INT32_C( 1133019777), INT32_C( 914111965), INT32_C( 914111965), -INT32_C( 1980654538), INT32_C( 980393078), -INT32_C( 812106074), INT32_C( 1927787156), INT32_C( 1856529391), -INT32_C( 819322912), -INT32_C( 1505619419), INT32_C( 416636130), INT32_C( 1856095992), INT32_C( 162009187), -INT32_C( 489078450), -INT32_C( 497762061) }, UINT16_C( 8400), UINT16_C( 13) }, { { INT32_C( 72416292), INT32_C( 1104378652), INT32_C( 1877415565), INT32_C( 1787280498), -INT32_C( 422041469), -INT32_C( 2031124424), -INT32_C( 1268135743), -INT32_C( 1382630007), -INT32_C( 692983878), -INT32_C( 283671198), INT32_C( 224394907), INT32_C( 1048045243), INT32_C( 224394907), -INT32_C( 1848765489), INT32_C( 1699035100), INT32_C( 224394907) }, { INT32_C( 629457859), -INT32_C( 485188792), INT32_C( 1441821594), INT32_C( 1771267930), -INT32_C( 2035238986), -INT32_C( 1508409654), -INT32_C( 1442095947), -INT32_C( 78111432), INT32_C( 690019809), INT32_C( 224394907), INT32_C( 40369576), INT32_C( 224394907), INT32_C( 2090933425), INT32_C( 1344452762), INT32_C( 224394907), INT32_C( 224394907) }, UINT16_C(37888), UINT16_C(51712) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__mmask16 k1, k2; simde_mm512_2intersect_epi32(a, b, &k1, &k2); simde_assert_equal_u16(k1, test_vec[i].k1); simde_assert_equal_u16(k2, test_vec[i].k2); } return 0; } static int test_simde_mm512_2intersect_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t a[8]; const int64_t b[8]; const simde__mmask8 k1; const simde__mmask8 k2; } test_vec[] = { { { INT64_C( 5168553055441283375), -INT64_C( 3539348696962882976), INT64_C( 5817362346138043089), INT64_C( 3968487147570050458), INT64_C( 3185622512111151184), INT64_C( 407243940743446460), -INT64_C( 585419171191263147), -INT64_C( 5466824156440068639) }, { INT64_C( 6219709241949814872), INT64_C( 7606814054839959895), INT64_C( 1016363449530777798), INT64_C( 4642675653713852502), INT64_C( 4614699684024553753), -INT64_C( 3138064399478343178), -INT64_C( 7235575661464163237), -INT64_C( 4669453642320409334) }, UINT8_C( 0), UINT8_C( 0) }, { { INT64_C( 4755374921309394901), -INT64_C( 9180378775273354827), -INT64_C( 6049567253957406678), -INT64_C( 319526712694120149), INT64_C( 3887935236943184743), INT64_C( 4755374921309394901), -INT64_C( 617094524076282335), INT64_C( 4982487373490752772) }, { INT64_C( 1734428563722526300), -INT64_C( 4803963547392538067), -INT64_C( 7649943887019971310), INT64_C( 4755374921309394901), INT64_C( 3706815463824562626), INT64_C( 2712489773651406667), INT64_C( 4755374921309394901), INT64_C( 3153380010101671287) }, UINT8_C( 33), UINT8_C( 72) }, { { -INT64_C( 9123460286336666584), INT64_C( 1539118865939218724), -INT64_C( 9123460286336666584), INT64_C( 8873746391598253201), -INT64_C( 4284036367812810027), -INT64_C( 9123460286336666584), INT64_C( 4151980932426210487), INT64_C( 8349489671477378065) }, { INT64_C( 6946251108763207682), INT64_C( 8156009828404257278), -INT64_C( 9046324889687907181), INT64_C( 202034163600500667), -INT64_C( 9123460286336666584), INT64_C( 7968700468414374764), -INT64_C( 5685479010123188442), INT64_C( 1353570027672698822) }, UINT8_C( 37), UINT8_C( 16) }, { { -INT64_C( 2031627594634138333), -INT64_C( 3216355587245227491), -INT64_C( 3216355587245227491), INT64_C( 4922574173478799738), -INT64_C( 3216355587245227491), -INT64_C( 8783732379857039187), INT64_C( 8003268126061066997), INT64_C( 3819050597373298842) }, { -INT64_C( 3216355587245227491), -INT64_C( 5501322054403173328), -INT64_C( 3216355587245227491), INT64_C( 3182461549434432685), -INT64_C( 3216355587245227491), -INT64_C( 2415448909100637430), -INT64_C( 2304457403914170796), -INT64_C( 2166793525636209448) }, UINT8_C( 22), UINT8_C( 21) }, { { INT64_C( 1529152442343085506), INT64_C( 1885134428219124552), -INT64_C( 7225886222500309083), -INT64_C( 4881639813352599776), -INT64_C( 4881639813352599776), INT64_C( 1871862973153615129), -INT64_C( 4881639813352599776), INT64_C( 3321127711715148600) }, { -INT64_C( 4881639813352599776), INT64_C( 4919248266449914807), INT64_C( 1723319408414111010), INT64_C( 4572830837763827512), INT64_C( 2401416311529263042), -INT64_C( 4881639813352599776), -INT64_C( 3045321076404049978), INT64_C( 5902690307842808383) }, UINT8_C( 88), UINT8_C( 33) }, { { -INT64_C( 7716316538138037920), -INT64_C( 8162633823873976534), -INT64_C( 4820557501022558553), -INT64_C( 4820557501022558553), -INT64_C( 8695084884341802301), -INT64_C( 7716316538138037920), INT64_C( 1657394534526917829), INT64_C( 8962897428284991605) }, { INT64_C( 4520083695532266198), INT64_C( 462040351903261004), INT64_C( 4881702772540018459), INT64_C( 7055136805196132274), INT64_C( 116335706281092147), INT64_C( 5815819702404429167), -INT64_C( 4820557501022558553), INT64_C( 2707313128120914342) }, UINT8_C( 12), UINT8_C( 64) }, { { -INT64_C( 2078091870763473293), INT64_C( 2475652280986350039), -INT64_C( 5653422662837316793), -INT64_C( 6272664560019460197), -INT64_C( 6272664560019460197), INT64_C( 1619342924220434101), -INT64_C( 6272664560019460197), INT64_C( 1231787778849964068) }, { -INT64_C( 5883929854820092705), INT64_C( 9043367803184548763), INT64_C( 9103191405880813417), INT64_C( 7678633565749467912), INT64_C( 6887818870142972144), -INT64_C( 6272664560019460197), INT64_C( 1101744769376845681), INT64_C( 1837218135771262346) }, UINT8_C( 88), UINT8_C( 32) }, { { -INT64_C( 8579190114695865908), INT64_C( 8490188015147203124), INT64_C( 6428511732540512726), INT64_C( 6428511732540512726), -INT64_C( 3880339714752709229), -INT64_C( 5659074764894222339), -INT64_C( 298302716710701158), -INT64_C( 1961642745170929904) }, { INT64_C( 6428511732540512726), -INT64_C( 2873912020668100368), INT64_C( 5892163249051208328), INT64_C( 5888770405444264579), -INT64_C( 3104119355101856970), -INT64_C( 7108265615378266812), INT64_C( 6428511732540512726), -INT64_C( 6513078694919253296) }, UINT8_C( 12), UINT8_C( 65) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__mmask8 k1, k2; simde_mm512_2intersect_epi64(a, b, &k1, &k2); simde_assert_equal_u8(k1, test_vec[i].k1); simde_assert_equal_u8(k2, test_vec[i].k2); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm_2intersect_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_2intersect_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_2intersect_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_2intersect_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_2intersect_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_2intersect_epi64) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/abs.c000066400000000000000000007414621400333146700162030ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN abs #include #include #include static int test_simde_mm_mask_abs_epi8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int8_t src[16]; const simde__mmask16 k; const int8_t a[16]; const int8_t r[16]; } test_vec[] = { { { INT8_C( 115), -INT8_C( 14), -INT8_C( 39), -INT8_C( 78), INT8_C( 20), -INT8_C( 119), INT8_C( 90), -INT8_C( 11), INT8_C( 38), INT8_C( 60), INT8_C( 35), INT8_C( 25), -INT8_C( 81), INT8_C( 45), INT8_C( 59), -INT8_C( 89) }, UINT16_C(61702), { -INT8_C( 102), INT8_C( 16), -INT8_C( 119), -INT8_C( 7), INT8_C( 26), INT8_C( 102), INT8_C( 48), -INT8_C( 119), INT8_C( 49), -INT8_C( 12), -INT8_C( 14), INT8_C( 101), -INT8_C( 13), -INT8_C( 109), INT8_C( 16), -INT8_C( 69) }, { INT8_C( 115), INT8_C( 16), INT8_C( 119), -INT8_C( 78), INT8_C( 20), -INT8_C( 119), INT8_C( 90), -INT8_C( 11), INT8_C( 49), INT8_C( 60), INT8_C( 35), INT8_C( 25), INT8_C( 13), INT8_C( 109), INT8_C( 16), INT8_C( 69) } }, { { INT8_C( 105), -INT8_C( 125), INT8_C( 57), -INT8_C( 42), INT8_C( 112), INT8_C( 9), INT8_C( 90), INT8_C( 60), -INT8_C( 44), -INT8_C( 15), INT8_C( 105), -INT8_C( 88), -INT8_C( 21), INT8_C( 36), -INT8_C( 54), -INT8_C( 5) }, UINT16_C(16140), { INT8_C( 85), -INT8_C( 80), INT8_C( 35), -INT8_C( 120), -INT8_C( 2), -INT8_C( 126), INT8_C( 4), INT8_C( 2), INT8_C( 53), -INT8_C( 97), -INT8_C( 98), -INT8_C( 97), -INT8_C( 5), INT8_MAX, INT8_C( 32), INT8_C( 2) }, { INT8_C( 105), -INT8_C( 125), INT8_C( 35), INT8_C( 120), INT8_C( 112), INT8_C( 9), INT8_C( 90), INT8_C( 60), INT8_C( 53), INT8_C( 97), INT8_C( 98), INT8_C( 97), INT8_C( 5), INT8_MAX, -INT8_C( 54), -INT8_C( 5) } }, { { INT8_C( 57), INT8_C( 51), INT8_C( 59), -INT8_C( 24), -INT8_C( 59), -INT8_C( 127), INT8_C( 102), -INT8_C( 27), INT8_C( 72), INT8_C( 126), INT8_C( 55), INT8_C( 1), INT8_C( 102), INT8_C( 38), -INT8_C( 4), INT8_C( 93) }, UINT16_C(22560), { -INT8_C( 37), -INT8_C( 17), -INT8_C( 118), INT8_C( 37), -INT8_C( 112), -INT8_C( 73), -INT8_C( 20), -INT8_C( 3), INT8_C( 65), INT8_C( 0), -INT8_C( 19), INT8_C( 33), INT8_C( 99), INT8_C( 38), -INT8_C( 14), -INT8_C( 68) }, { INT8_C( 57), INT8_C( 51), INT8_C( 59), -INT8_C( 24), -INT8_C( 59), INT8_C( 73), INT8_C( 102), -INT8_C( 27), INT8_C( 72), INT8_C( 126), INT8_C( 55), INT8_C( 33), INT8_C( 99), INT8_C( 38), INT8_C( 14), INT8_C( 93) } }, { { -INT8_C( 81), INT8_C( 103), -INT8_C( 37), INT8_C( 36), -INT8_C( 58), -INT8_C( 71), INT8_C( 10), -INT8_C( 8), -INT8_C( 90), -INT8_C( 33), -INT8_C( 34), INT8_C( 31), INT8_C( 116), -INT8_C( 1), -INT8_C( 63), -INT8_C( 2) }, UINT16_C(16195), { INT8_C( 106), INT8_C( 92), -INT8_C( 34), -INT8_C( 79), -INT8_C( 62), INT8_C( 72), -INT8_C( 3), INT8_C( 17), INT8_C( 107), -INT8_C( 70), -INT8_C( 97), -INT8_C( 103), -INT8_C( 45), -INT8_C( 123), -INT8_C( 5), -INT8_C( 87) }, { INT8_C( 106), INT8_C( 92), -INT8_C( 37), INT8_C( 36), -INT8_C( 58), -INT8_C( 71), INT8_C( 3), -INT8_C( 8), INT8_C( 107), INT8_C( 70), INT8_C( 97), INT8_C( 103), INT8_C( 45), INT8_C( 123), -INT8_C( 63), -INT8_C( 2) } }, { { -INT8_C( 117), -INT8_C( 50), -INT8_C( 3), INT8_C( 21), -INT8_C( 14), -INT8_C( 123), INT8_C( 98), INT8_C( 119), -INT8_C( 121), -INT8_C( 35), INT8_C( 12), -INT8_C( 82), -INT8_C( 93), INT8_C( 40), INT8_C( 26), -INT8_C( 79) }, UINT16_C(29568), { -INT8_C( 44), INT8_C( 14), INT8_C( 65), -INT8_C( 112), INT8_C( 49), INT8_C( 81), INT8_C( 38), INT8_C( 71), INT8_C( 11), INT8_C( 26), -INT8_C( 44), INT8_C( 39), INT8_C( 116), INT8_C( 41), -INT8_C( 105), -INT8_C( 71) }, { -INT8_C( 117), -INT8_C( 50), -INT8_C( 3), INT8_C( 21), -INT8_C( 14), -INT8_C( 123), INT8_C( 98), INT8_C( 71), INT8_C( 11), INT8_C( 26), INT8_C( 12), -INT8_C( 82), INT8_C( 116), INT8_C( 41), INT8_C( 105), -INT8_C( 79) } }, { { -INT8_C( 122), -INT8_C( 67), INT8_MIN, INT8_C( 104), INT8_C( 85), INT8_MIN, -INT8_C( 85), -INT8_C( 90), -INT8_C( 123), INT8_C( 72), INT8_C( 109), INT8_C( 61), -INT8_C( 9), INT8_C( 22), INT8_C( 63), INT8_C( 2) }, UINT16_C(24034), { -INT8_C( 77), INT8_C( 93), INT8_C( 122), INT8_C( 26), INT8_C( 53), -INT8_C( 6), -INT8_C( 88), -INT8_C( 106), -INT8_C( 81), INT8_C( 24), INT8_C( 109), -INT8_C( 113), INT8_C( 69), -INT8_C( 26), -INT8_C( 67), INT8_C( 26) }, { -INT8_C( 122), INT8_C( 93), INT8_MIN, INT8_C( 104), INT8_C( 85), INT8_C( 6), INT8_C( 88), INT8_C( 106), INT8_C( 81), INT8_C( 72), INT8_C( 109), INT8_C( 113), INT8_C( 69), INT8_C( 22), INT8_C( 67), INT8_C( 2) } }, { { INT8_C( 61), -INT8_C( 112), -INT8_C( 20), INT8_C( 115), INT8_C( 91), -INT8_C( 47), -INT8_C( 94), -INT8_C( 48), INT8_C( 13), INT8_C( 17), -INT8_C( 121), -INT8_C( 53), INT8_C( 98), -INT8_C( 9), -INT8_C( 39), -INT8_C( 82) }, UINT16_C(34747), { -INT8_C( 105), -INT8_C( 92), INT8_C( 17), -INT8_C( 55), -INT8_C( 127), -INT8_C( 14), INT8_C( 79), INT8_C( 123), -INT8_C( 78), INT8_C( 70), INT8_C( 37), INT8_C( 87), -INT8_C( 52), INT8_C( 75), -INT8_C( 98), -INT8_C( 26) }, { INT8_C( 105), INT8_C( 92), -INT8_C( 20), INT8_C( 55), INT8_MAX, INT8_C( 14), -INT8_C( 94), INT8_C( 123), INT8_C( 78), INT8_C( 70), INT8_C( 37), -INT8_C( 53), INT8_C( 98), -INT8_C( 9), -INT8_C( 39), INT8_C( 26) } }, { { -INT8_C( 89), -INT8_C( 81), INT8_C( 27), -INT8_C( 66), -INT8_C( 104), INT8_C( 27), INT8_C( 70), INT8_C( 113), INT8_C( 106), -INT8_C( 107), -INT8_C( 123), INT8_C( 49), INT8_C( 96), -INT8_C( 86), INT8_C( 88), -INT8_C( 26) }, UINT16_C(19488), { -INT8_C( 47), INT8_C( 126), INT8_MAX, -INT8_C( 96), INT8_C( 81), -INT8_C( 29), INT8_C( 8), -INT8_C( 35), INT8_C( 14), INT8_C( 19), -INT8_C( 126), INT8_C( 125), -INT8_C( 16), INT8_C( 66), -INT8_C( 59), -INT8_C( 53) }, { -INT8_C( 89), -INT8_C( 81), INT8_C( 27), -INT8_C( 66), -INT8_C( 104), INT8_C( 29), INT8_C( 70), INT8_C( 113), INT8_C( 106), -INT8_C( 107), INT8_C( 126), INT8_C( 125), INT8_C( 96), -INT8_C( 86), INT8_C( 59), -INT8_C( 26) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i src = simde_x_mm_loadu_epi8(test_vec[i].src); simde__m128i a = simde_x_mm_loadu_epi8(test_vec[i].a); simde__m128i r = simde_mm_mask_abs_epi8(src, test_vec[i].k, a); simde_test_x86_assert_equal_i8x16(r, simde_x_mm_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm_maskz_abs_epi8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask16 k; const int8_t a[16]; const int8_t r[16]; } test_vec[] = { { UINT16_C(57432), { INT8_C( 44), INT8_C( 47), -INT8_C( 120), INT8_C( 111), -INT8_C( 65), INT8_C( 87), INT8_C( 90), INT8_C( 38), INT8_C( 10), INT8_C( 24), -INT8_C( 56), -INT8_C( 43), -INT8_C( 119), INT8_C( 102), -INT8_C( 58), INT8_C( 89) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 111), INT8_C( 65), INT8_C( 0), INT8_C( 90), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 102), INT8_C( 58), INT8_C( 89) } }, { UINT16_C(21485), { INT8_C( 28), INT8_C( 88), INT8_C( 30), -INT8_C( 28), -INT8_C( 93), -INT8_C( 61), INT8_C( 19), INT8_C( 71), -INT8_C( 40), -INT8_C( 4), -INT8_C( 116), -INT8_C( 23), -INT8_C( 3), -INT8_C( 67), INT8_C( 102), INT8_C( 23) }, { INT8_C( 28), INT8_C( 0), INT8_C( 30), INT8_C( 28), INT8_C( 0), INT8_C( 61), INT8_C( 19), INT8_C( 71), INT8_C( 40), INT8_C( 4), INT8_C( 0), INT8_C( 0), INT8_C( 3), INT8_C( 0), INT8_C( 102), INT8_C( 0) } }, { UINT16_C(21564), { INT8_C( 2), -INT8_C( 52), INT8_C( 110), -INT8_C( 13), INT8_C( 53), -INT8_C( 127), INT8_C( 61), -INT8_C( 25), INT8_C( 101), -INT8_C( 7), -INT8_C( 107), INT8_C( 67), INT8_C( 59), INT8_C( 123), INT8_C( 35), -INT8_C( 120) }, { INT8_C( 0), INT8_C( 0), INT8_C( 110), INT8_C( 13), INT8_C( 53), INT8_MAX, INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 107), INT8_C( 0), INT8_C( 59), INT8_C( 0), INT8_C( 35), INT8_C( 0) } }, { UINT16_C(51548), { -INT8_C( 64), INT8_C( 83), INT8_C( 100), -INT8_C( 30), INT8_C( 81), -INT8_C( 69), INT8_C( 123), INT8_C( 41), INT8_C( 60), INT8_C( 42), INT8_C( 78), INT8_C( 33), INT8_C( 84), -INT8_C( 1), INT8_C( 94), INT8_C( 63) }, { INT8_C( 0), INT8_C( 0), INT8_C( 100), INT8_C( 30), INT8_C( 81), INT8_C( 0), INT8_C( 123), INT8_C( 0), INT8_C( 60), INT8_C( 0), INT8_C( 0), INT8_C( 33), INT8_C( 0), INT8_C( 0), INT8_C( 94), INT8_C( 63) } }, { UINT16_C(13397), { -INT8_C( 105), INT8_C( 53), -INT8_C( 18), -INT8_C( 116), -INT8_C( 108), INT8_C( 36), INT8_C( 83), INT8_C( 110), -INT8_C( 23), -INT8_C( 15), INT8_C( 70), INT8_C( 2), -INT8_C( 110), -INT8_C( 126), INT8_C( 110), INT8_C( 117) }, { INT8_C( 105), INT8_C( 0), INT8_C( 18), INT8_C( 0), INT8_C( 108), INT8_C( 0), INT8_C( 83), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 70), INT8_C( 0), INT8_C( 110), INT8_C( 126), INT8_C( 0), INT8_C( 0) } }, { UINT16_C(25039), { -INT8_C( 113), INT8_C( 73), -INT8_C( 72), -INT8_C( 54), INT8_C( 79), INT8_C( 50), -INT8_C( 17), -INT8_C( 75), -INT8_C( 121), INT8_C( 107), INT8_C( 94), -INT8_C( 105), -INT8_C( 33), INT8_C( 111), INT8_C( 57), -INT8_C( 24) }, { INT8_C( 113), INT8_C( 73), INT8_C( 72), INT8_C( 54), INT8_C( 0), INT8_C( 0), INT8_C( 17), INT8_C( 75), INT8_C( 121), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 111), INT8_C( 57), INT8_C( 0) } }, { UINT16_C(40507), { -INT8_C( 10), -INT8_C( 56), INT8_C( 2), -INT8_C( 109), INT8_C( 40), -INT8_C( 13), INT8_C( 16), INT8_C( 50), -INT8_C( 69), INT8_C( 97), -INT8_C( 32), INT8_C( 88), -INT8_C( 63), INT8_C( 58), INT8_C( 62), -INT8_C( 97) }, { INT8_C( 10), INT8_C( 56), INT8_C( 0), INT8_C( 109), INT8_C( 40), INT8_C( 13), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 97), INT8_C( 32), INT8_C( 88), INT8_C( 63), INT8_C( 0), INT8_C( 0), INT8_C( 97) } }, { UINT16_C(31988), { -INT8_C( 68), INT8_C( 6), INT8_C( 10), INT8_C( 17), INT8_C( 92), INT8_C( 60), -INT8_C( 95), -INT8_C( 56), INT8_C( 106), -INT8_C( 113), INT8_C( 12), -INT8_C( 97), -INT8_C( 56), -INT8_C( 73), INT8_C( 21), -INT8_C( 95) }, { INT8_C( 0), INT8_C( 0), INT8_C( 10), INT8_C( 0), INT8_C( 92), INT8_C( 60), INT8_C( 95), INT8_C( 56), INT8_C( 0), INT8_C( 0), INT8_C( 12), INT8_C( 97), INT8_C( 56), INT8_C( 73), INT8_C( 21), INT8_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi8(test_vec[i].a); simde__m128i r = simde_mm_maskz_abs_epi8(test_vec[i].k, a); simde_test_x86_assert_equal_i8x16(r, simde_x_mm_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm_mask_abs_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t src[8]; const simde__mmask8 k; const int16_t a[8]; const int16_t r[8]; } test_vec[] = { { { INT16_C( 8153), INT16_C( 16161), INT16_C( 6855), -INT16_C( 17824), -INT16_C( 10464), -INT16_C( 18024), -INT16_C( 25775), INT16_C( 32280) }, UINT8_C( 66), { INT16_C( 13385), -INT16_C( 15752), INT16_C( 13235), -INT16_C( 19058), INT16_C( 19462), -INT16_C( 3246), -INT16_C( 30050), -INT16_C( 6596) }, { INT16_C( 8153), INT16_C( 15752), INT16_C( 6855), -INT16_C( 17824), -INT16_C( 10464), -INT16_C( 18024), INT16_C( 30050), INT16_C( 32280) } }, { { INT16_C( 15536), -INT16_C( 28087), -INT16_C( 28835), INT16_C( 17732), INT16_C( 28497), INT16_C( 9121), INT16_C( 28628), -INT16_C( 30221) }, UINT8_C( 56), { INT16_C( 15263), -INT16_C( 25944), -INT16_C( 12189), -INT16_C( 659), INT16_C( 23072), INT16_C( 14027), INT16_C( 26184), INT16_C( 13567) }, { INT16_C( 15536), -INT16_C( 28087), -INT16_C( 28835), INT16_C( 659), INT16_C( 23072), INT16_C( 14027), INT16_C( 28628), -INT16_C( 30221) } }, { { -INT16_C( 3774), -INT16_C( 7245), INT16_C( 3049), INT16_C( 7497), -INT16_C( 12617), INT16_C( 28131), INT16_C( 10527), INT16_C( 6521) }, UINT8_C(182), { INT16_C( 23186), -INT16_C( 32513), -INT16_C( 16007), INT16_C( 28358), -INT16_C( 7161), -INT16_C( 29226), -INT16_C( 31719), INT16_C( 24598) }, { -INT16_C( 3774), INT16_C( 32513), INT16_C( 16007), INT16_C( 7497), INT16_C( 7161), INT16_C( 29226), INT16_C( 10527), INT16_C( 24598) } }, { { INT16_C( 28985), -INT16_C( 21137), INT16_C( 9114), -INT16_C( 587), INT16_C( 10616), -INT16_C( 12703), -INT16_C( 31567), INT16_C( 22068) }, UINT8_C( 76), { INT16_C( 4627), INT16_C( 12178), -INT16_C( 14639), INT16_C( 24171), -INT16_C( 9608), INT16_C( 30857), -INT16_C( 739), -INT16_C( 22827) }, { INT16_C( 28985), -INT16_C( 21137), INT16_C( 14639), INT16_C( 24171), INT16_C( 10616), -INT16_C( 12703), INT16_C( 739), INT16_C( 22068) } }, { { -INT16_C( 1171), INT16_C( 7354), -INT16_C( 1877), INT16_C( 16390), -INT16_C( 6177), -INT16_C( 784), INT16_C( 22452), INT16_C( 15509) }, UINT8_C(230), { INT16_C( 4801), INT16_C( 872), INT16_C( 16760), -INT16_C( 8622), INT16_C( 24650), -INT16_C( 6092), -INT16_C( 25601), -INT16_C( 17682) }, { -INT16_C( 1171), INT16_C( 872), INT16_C( 16760), INT16_C( 16390), -INT16_C( 6177), INT16_C( 6092), INT16_C( 25601), INT16_C( 17682) } }, { { -INT16_C( 20314), INT16_C( 14303), -INT16_C( 10837), -INT16_C( 17361), -INT16_C( 17869), INT16_C( 9635), -INT16_C( 26066), -INT16_C( 16289) }, UINT8_C(245), { INT16_C( 14), INT16_C( 8028), -INT16_C( 21476), INT16_C( 17146), -INT16_C( 11337), INT16_C( 20019), -INT16_C( 17783), -INT16_C( 6419) }, { INT16_C( 14), INT16_C( 14303), INT16_C( 21476), -INT16_C( 17361), INT16_C( 11337), INT16_C( 20019), INT16_C( 17783), INT16_C( 6419) } }, { { INT16_C( 4307), INT16_C( 28142), INT16_C( 27919), INT16_C( 11490), -INT16_C( 8387), -INT16_C( 13172), -INT16_C( 10842), INT16_C( 13655) }, UINT8_C( 62), { INT16_C( 26887), INT16_C( 10704), -INT16_C( 3529), -INT16_C( 7720), INT16_C( 27957), -INT16_C( 9436), -INT16_C( 7956), -INT16_C( 29431) }, { INT16_C( 4307), INT16_C( 10704), INT16_C( 3529), INT16_C( 7720), INT16_C( 27957), INT16_C( 9436), -INT16_C( 10842), INT16_C( 13655) } }, { { INT16_C( 29302), -INT16_C( 19446), -INT16_C( 25972), INT16_C( 16877), -INT16_C( 505), -INT16_C( 4405), INT16_C( 25296), -INT16_C( 23565) }, UINT8_C(201), { INT16_C( 9477), INT16_C( 32174), -INT16_C( 29767), INT16_C( 24616), -INT16_C( 3737), -INT16_C( 24240), -INT16_C( 27071), INT16_C( 29624) }, { INT16_C( 9477), -INT16_C( 19446), -INT16_C( 25972), INT16_C( 24616), -INT16_C( 505), -INT16_C( 4405), INT16_C( 27071), INT16_C( 29624) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i src = simde_x_mm_loadu_epi16(test_vec[i].src); simde__m128i a = simde_x_mm_loadu_epi16(test_vec[i].a); simde__m128i r = simde_mm_mask_abs_epi16(src, test_vec[i].k, a); simde_test_x86_assert_equal_i16x8(r, simde_x_mm_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm_maskz_abs_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const int16_t a[8]; const int16_t r[8]; } test_vec[] = { { UINT8_C( 9), { -INT16_C( 18170), INT16_C( 3543), -INT16_C( 6732), INT16_C( 2591), INT16_C( 16089), INT16_C( 25611), -INT16_C( 22208), -INT16_C( 26321) }, { INT16_C( 18170), INT16_C( 0), INT16_C( 0), INT16_C( 2591), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { UINT8_C(246), { INT16_C( 18289), -INT16_C( 24027), -INT16_C( 29991), -INT16_C( 1982), INT16_C( 32714), -INT16_C( 354), -INT16_C( 22665), INT16_C( 9312) }, { INT16_C( 0), INT16_C( 24027), INT16_C( 29991), INT16_C( 0), INT16_C( 32714), INT16_C( 354), INT16_C( 22665), INT16_C( 9312) } }, { UINT8_C(207), { INT16_C( 11566), INT16_C( 22926), INT16_C( 14011), -INT16_C( 7358), -INT16_C( 18332), INT16_C( 15086), INT16_C( 13877), INT16_C( 27416) }, { INT16_C( 11566), INT16_C( 22926), INT16_C( 14011), INT16_C( 7358), INT16_C( 0), INT16_C( 0), INT16_C( 13877), INT16_C( 27416) } }, { UINT8_C( 86), { -INT16_C( 2382), -INT16_C( 5907), INT16_C( 22152), INT16_C( 19099), INT16_C( 30599), INT16_C( 3850), -INT16_C( 4906), INT16_C( 4835) }, { INT16_C( 0), INT16_C( 5907), INT16_C( 22152), INT16_C( 0), INT16_C( 30599), INT16_C( 0), INT16_C( 4906), INT16_C( 0) } }, { UINT8_C(119), { -INT16_C( 32551), -INT16_C( 10402), -INT16_C( 27415), -INT16_C( 17203), -INT16_C( 15129), INT16_C( 10840), -INT16_C( 3485), -INT16_C( 22834) }, { INT16_C( 32551), INT16_C( 10402), INT16_C( 27415), INT16_C( 0), INT16_C( 15129), INT16_C( 10840), INT16_C( 3485), INT16_C( 0) } }, { UINT8_C( 9), { -INT16_C( 16078), -INT16_C( 4277), INT16_C( 6446), -INT16_C( 24958), -INT16_C( 6732), -INT16_C( 8610), -INT16_C( 25530), INT16_C( 24833) }, { INT16_C( 16078), INT16_C( 0), INT16_C( 0), INT16_C( 24958), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { UINT8_C(136), { INT16_C( 2458), INT16_C( 11322), -INT16_C( 10557), INT16_C( 10157), -INT16_C( 13767), INT16_C( 17938), INT16_C( 29253), -INT16_C( 10355) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 10157), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 10355) } }, { UINT8_C( 67), { INT16_C( 3514), -INT16_C( 28361), INT16_C( 31771), INT16_C( 5728), -INT16_C( 9840), INT16_C( 11002), -INT16_C( 7475), -INT16_C( 5133) }, { INT16_C( 3514), INT16_C( 28361), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 7475), INT16_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi16(test_vec[i].a); simde__m128i r = simde_mm_maskz_abs_epi16(test_vec[i].k, a); simde_test_x86_assert_equal_i16x8(r, simde_x_mm_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm_mask_abs_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t src[4]; const simde__mmask8 k; const int32_t a[4]; const int32_t r[4]; } test_vec[] = { { { INT32_C( 688398243), INT32_C( 2117596500), -INT32_C( 750842275), INT32_C( 366535198) }, UINT8_C(131), { -INT32_C( 1004016930), -INT32_C( 1077141926), -INT32_C( 2083644661), INT32_C( 399895044) }, { INT32_C( 1004016930), INT32_C( 1077141926), -INT32_C( 750842275), INT32_C( 366535198) } }, { { INT32_C( 1632121691), INT32_C( 483536164), -INT32_C( 526963188), -INT32_C( 1230342708) }, UINT8_C( 2), { INT32_C( 2145654124), -INT32_C( 1724078204), -INT32_C( 190821781), -INT32_C( 1219539762) }, { INT32_C( 1632121691), INT32_C( 1724078204), -INT32_C( 526963188), -INT32_C( 1230342708) } }, { { -INT32_C( 780236771), INT32_C( 1976716971), INT32_C( 1074971562), INT32_C( 1213854368) }, UINT8_C( 82), { -INT32_C( 767166523), INT32_C( 1085468303), -INT32_C( 295595563), INT32_C( 669742458) }, { -INT32_C( 780236771), INT32_C( 1085468303), INT32_C( 1074971562), INT32_C( 1213854368) } }, { { -INT32_C( 1066078121), INT32_C( 1916170187), -INT32_C( 1589423098), -INT32_C( 746781550) }, UINT8_C(136), { -INT32_C( 802933306), -INT32_C( 186975219), -INT32_C( 1081305950), INT32_C( 1075243371) }, { -INT32_C( 1066078121), INT32_C( 1916170187), -INT32_C( 1589423098), INT32_C( 1075243371) } }, { { INT32_C( 955441731), -INT32_C( 1927520383), INT32_C( 841960739), -INT32_C( 1971983518) }, UINT8_C( 63), { -INT32_C( 1129031646), -INT32_C( 1553699482), -INT32_C( 1621136138), -INT32_C( 791151103) }, { INT32_C( 1129031646), INT32_C( 1553699482), INT32_C( 1621136138), INT32_C( 791151103) } }, { { INT32_C( 2072269077), INT32_C( 1390338014), -INT32_C( 681233355), -INT32_C( 586259273) }, UINT8_C( 73), { INT32_C( 289225178), -INT32_C( 1951535354), -INT32_C( 1646281947), -INT32_C( 283269702) }, { INT32_C( 289225178), INT32_C( 1390338014), -INT32_C( 681233355), INT32_C( 283269702) } }, { { INT32_C( 352437480), -INT32_C( 669662064), -INT32_C( 1349420366), INT32_C( 1478068007) }, UINT8_C( 52), { -INT32_C( 1411603801), -INT32_C( 1980243425), INT32_C( 161641122), -INT32_C( 1088019476) }, { INT32_C( 352437480), -INT32_C( 669662064), INT32_C( 161641122), INT32_C( 1478068007) } }, { { -INT32_C( 968386477), -INT32_C( 888428856), -INT32_C( 552543373), -INT32_C( 1460967715) }, UINT8_C(167), { -INT32_C( 1057832772), -INT32_C( 1469689236), -INT32_C( 300347505), -INT32_C( 52757827) }, { INT32_C( 1057832772), INT32_C( 1469689236), INT32_C( 300347505), -INT32_C( 1460967715) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i src = simde_x_mm_loadu_epi32(test_vec[i].src); simde__m128i a = simde_x_mm_loadu_epi32(test_vec[i].a); simde__m128i r = simde_mm_mask_abs_epi32(src, test_vec[i].k, a); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm_maskz_abs_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const int32_t a[4]; const int32_t r[4]; } test_vec[] = { { UINT8_C(145), { INT32_C( 29805490), -INT32_C( 2083285805), INT32_C( 753740199), -INT32_C( 1343338556) }, { INT32_C( 29805490), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { UINT8_C( 58), { -INT32_C( 2072344551), -INT32_C( 137560356), -INT32_C( 577438960), -INT32_C( 1224979635) }, { INT32_C( 0), INT32_C( 137560356), INT32_C( 0), INT32_C( 1224979635) } }, { UINT8_C(109), { INT32_C( 815986804), -INT32_C( 520418861), -INT32_C( 1705291520), -INT32_C( 1422986918) }, { INT32_C( 815986804), INT32_C( 0), INT32_C( 1705291520), INT32_C( 1422986918) } }, { UINT8_C(145), { -INT32_C( 1602009068), INT32_C( 676272594), INT32_C( 1754227610), INT32_C( 567182279) }, { INT32_C( 1602009068), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { UINT8_C(195), { INT32_C( 1694336367), INT32_C( 738012218), INT32_C( 87416787), -INT32_C( 2145881269) }, { INT32_C( 1694336367), INT32_C( 738012218), INT32_C( 0), INT32_C( 0) } }, { UINT8_C( 97), { -INT32_C( 444185248), -INT32_C( 216805061), INT32_C( 376077454), INT32_C( 835265240) }, { INT32_C( 444185248), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { UINT8_MAX, { -INT32_C( 1160186485), INT32_C( 1353662651), INT32_C( 2034799586), -INT32_C( 705717215) }, { INT32_C( 1160186485), INT32_C( 1353662651), INT32_C( 2034799586), INT32_C( 705717215) } }, { UINT8_C(168), { INT32_C( 1699267364), INT32_C( 479861968), -INT32_C( 177248900), -INT32_C( 1180950087) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1180950087) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi32(test_vec[i].a); simde__m128i r = simde_mm_maskz_abs_epi32(test_vec[i].k, a); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm_abs_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t a[2]; const int64_t r[2]; } test_vec[] = { { { INT64_MIN, INT64_C( 5987331142896800384) }, { INT64_MIN, INT64_C( 5987331142896800384) } }, { { -INT64_C( 6165271089019809896), -INT64_C( 1488269006246725939) }, { INT64_C( 6165271089019809896), INT64_C( 1488269006246725939) } }, { { -INT64_C( 287912670071654876), INT64_C( 3376558256458965752) }, { INT64_C( 287912670071654876), INT64_C( 3376558256458965752) } }, { { -INT64_C( 1699690728377702014), INT64_C( 2927647255755636771) }, { INT64_C( 1699690728377702014), INT64_C( 2927647255755636771) } }, { { -INT64_C( 8959542323819455163), INT64_C( 3365246129411480893) }, { INT64_C( 8959542323819455163), INT64_C( 3365246129411480893) } }, { { INT64_C( 4227824362795330185), INT64_C( 6194577401110150880) }, { INT64_C( 4227824362795330185), INT64_C( 6194577401110150880) } }, { { INT64_C( 6873617928876373866), INT64_C( 1262142814710839683) }, { INT64_C( 6873617928876373866), INT64_C( 1262142814710839683) } }, { { INT64_C( 722086948698055913), -INT64_C( 4941936896584979953) }, { INT64_C( 722086948698055913), INT64_C( 4941936896584979953) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi64(test_vec[i].a); simde__m128i r = simde_mm_abs_epi64(a); simde_test_x86_assert_equal_i64x2(r, simde_x_mm_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm_mask_abs_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t src[2]; const simde__mmask8 k; const int64_t a[2]; const int64_t r[2]; } test_vec[] = { { { INT64_C( 4845332346535929233), INT64_C( 1700698435022438078) }, UINT8_C(130), { -INT64_C( 6803045812735068648), -INT64_C( 7443449697644586270) }, { INT64_C( 4845332346535929233), INT64_C( 7443449697644586270) } }, { { -INT64_C( 6421298693609969513), -INT64_C( 6160319844260546176) }, UINT8_C( 59), { -INT64_C( 4351657591194229711), -INT64_C( 8627740096035247728) }, { INT64_C( 4351657591194229711), INT64_C( 8627740096035247728) } }, { { -INT64_C( 8180037481821730213), INT64_C( 7219960493591948494) }, UINT8_C( 49), { -INT64_C( 6441345642108472215), INT64_C( 4350044603238480648) }, { INT64_C( 6441345642108472215), INT64_C( 7219960493591948494) } }, { { INT64_C( 4684076903763347163), INT64_C( 6497802772857514833) }, UINT8_C(205), { -INT64_C( 4870124432114791231), INT64_C( 4454143856972221582) }, { INT64_C( 4870124432114791231), INT64_C( 6497802772857514833) } }, { { -INT64_C( 838855374297144746), -INT64_C( 2942560270663534524) }, UINT8_C(120), { INT64_C( 5641214509537388547), INT64_C( 4712163805488714118) }, { -INT64_C( 838855374297144746), -INT64_C( 2942560270663534524) } }, { { INT64_C( 7176515612344537603), -INT64_C( 8643734220088015145) }, UINT8_C(168), { -INT64_C( 3490178188729363300), INT64_C( 7993754077794638996) }, { INT64_C( 7176515612344537603), -INT64_C( 8643734220088015145) } }, { { -INT64_C( 1529783215006713101), -INT64_C( 1978515024379923929) }, UINT8_C(204), { -INT64_C( 8261273454123855187), -INT64_C( 408440238321563495) }, { -INT64_C( 1529783215006713101), -INT64_C( 1978515024379923929) } }, { { -INT64_C( 3158447172117950868), -INT64_C( 3303403632531072544) }, UINT8_C( 84), { -INT64_C( 5291873217680795087), INT64_C( 4801197429913235623) }, { -INT64_C( 3158447172117950868), -INT64_C( 3303403632531072544) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i src = simde_x_mm_loadu_epi64(test_vec[i].src); simde__m128i a = simde_x_mm_loadu_epi64(test_vec[i].a); simde__m128i r = simde_mm_mask_abs_epi64(src, test_vec[i].k, a); simde_test_x86_assert_equal_i64x2(r, simde_x_mm_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm_maskz_abs_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const int64_t a[2]; const int64_t r[2]; } test_vec[] = { { UINT8_C(158), { -INT64_C( 6298226099324920239), -INT64_C( 5000322151057574458) }, { INT64_C( 0), INT64_C( 5000322151057574458) } }, { UINT8_C( 43), { INT64_C( 2636038210509465369), INT64_C( 7020928684628243752) }, { INT64_C( 2636038210509465369), INT64_C( 7020928684628243752) } }, { UINT8_C( 64), { INT64_C( 7649134006013225985), INT64_C( 2078749890811515096) }, { INT64_C( 0), INT64_C( 0) } }, { UINT8_C(207), { INT64_C( 941032990317475364), -INT64_C( 6013459460053205151) }, { INT64_C( 941032990317475364), INT64_C( 6013459460053205151) } }, { UINT8_C(103), { -INT64_C( 1560295149959329567), -INT64_C( 3971587257135282239) }, { INT64_C( 1560295149959329567), INT64_C( 3971587257135282239) } }, { UINT8_C( 38), { -INT64_C( 5243445501069980794), INT64_C( 4885633393584462144) }, { INT64_C( 0), INT64_C( 4885633393584462144) } }, { UINT8_C( 94), { INT64_C( 7243498660887455097), INT64_C( 8890095449815425622) }, { INT64_C( 0), INT64_C( 8890095449815425622) } }, { UINT8_C( 14), { INT64_C( 7759806299451765498), -INT64_C( 6445959026453494579) }, { INT64_C( 0), INT64_C( 6445959026453494579) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi64(test_vec[i].a); simde__m128i r = simde_mm_maskz_abs_epi64(test_vec[i].k, a); simde_test_x86_assert_equal_i64x2(r, simde_x_mm_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm256_abs_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi64x(INT64_C( 2298255581870375211), INT64_C(-3544843370875867424), INT64_C( 3174188203889017774), INT64_C(-2855144460944446932)), simde_mm256_set_epi64x(INT64_C( 2298255581870375211), INT64_C( 3544843370875867424), INT64_C( 3174188203889017774), INT64_C( 2855144460944446932)) }, { simde_mm256_set_epi64x(INT64_C(-2343577668018514218), INT64_C( 6125961421606078258), INT64_C(-3940514899539048661), INT64_C(-1443470135985810906)), simde_mm256_set_epi64x(INT64_C( 2343577668018514218), INT64_C( 6125961421606078258), INT64_C( 3940514899539048661), INT64_C( 1443470135985810906)) }, { simde_mm256_set_epi64x(INT64_C(-5113251846863269416), INT64_C( 4963302814062391174), INT64_C(-8692429813673586920), INT64_C(-1299515304381535234)), simde_mm256_set_epi64x(INT64_C( 5113251846863269416), INT64_C( 4963302814062391174), INT64_C( 8692429813673586920), INT64_C( 1299515304381535234)) }, { simde_mm256_set_epi64x(INT64_C( 8282900993993562890), INT64_C( -871234380790935570), INT64_C( 1016547295723275308), INT64_C( 2445109086053031177)), simde_mm256_set_epi64x(INT64_C( 8282900993993562890), INT64_C( 871234380790935570), INT64_C( 1016547295723275308), INT64_C( 2445109086053031177)) }, { simde_mm256_set_epi64x(INT64_C( 2885698025168517941), INT64_C( 4164132731831874360), INT64_C( 5579124789695570138), INT64_C(-5071075354474953440)), simde_mm256_set_epi64x(INT64_C( 2885698025168517941), INT64_C( 4164132731831874360), INT64_C( 5579124789695570138), INT64_C( 5071075354474953440)) }, { simde_mm256_set_epi64x(INT64_C(-3829241843042224259), INT64_C(-5265306480458209716), INT64_C( -199503262700073332), INT64_C(-3406476690611433698)), simde_mm256_set_epi64x(INT64_C( 3829241843042224259), INT64_C( 5265306480458209716), INT64_C( 199503262700073332), INT64_C( 3406476690611433698)) }, { simde_mm256_set_epi64x(INT64_C(-8511077884182051912), INT64_C(-2833485123520542356), INT64_C(-8333607306604449051), INT64_C(-3068466298309072119)), simde_mm256_set_epi64x(INT64_C( 8511077884182051912), INT64_C( 2833485123520542356), INT64_C( 8333607306604449051), INT64_C( 3068466298309072119)) }, { simde_mm256_set_epi64x(INT64_C( 2822112346803664079), INT64_C( 298455952410199790), INT64_C( 966686671017309845), INT64_C( 9214147743026689710)), simde_mm256_set_epi64x(INT64_C( 2822112346803664079), INT64_C( 298455952410199790), INT64_C( 966686671017309845), INT64_C( 9214147743026689710)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_abs_epi64(test_vec[i].a); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_mask_abs_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i src; simde__mmask8 k; simde__m256i a; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi64x(INT64_C( 623879162816280883), INT64_C(-3225900025883395735), INT64_C( 411040496809638529), INT64_C(-7584870799288762128)), UINT8_C( 62), simde_mm256_set_epi64x(INT64_C(-4625946001506527479), INT64_C(-2791937557159189467), INT64_C(-5209880226959401821), INT64_C(-2130118807554140301)), simde_mm256_set_epi64x(INT64_C( 4625946001506527479), INT64_C( 2791937557159189467), INT64_C( 5209880226959401821), INT64_C(-7584870799288762128)) }, { simde_mm256_set_epi64x(INT64_C( 8448739575006176562), INT64_C( 3518346377803159044), INT64_C( 844328342996800488), INT64_C( 8434264651311772530)), UINT8_C(156), simde_mm256_set_epi64x(INT64_C(-2671163103984174033), INT64_C( 4562965894666802973), INT64_C(-8366536480676858800), INT64_C( 6120742655549907249)), simde_mm256_set_epi64x(INT64_C( 2671163103984174033), INT64_C( 4562965894666802973), INT64_C( 844328342996800488), INT64_C( 8434264651311772530)) }, { simde_mm256_set_epi64x(INT64_C(-7191173410794127611), INT64_C( 3688037766287492394), INT64_C( 1547230041795852910), INT64_C( 3059339057736759292)), UINT8_C(119), simde_mm256_set_epi64x(INT64_C(-6542580348328468330), INT64_C( 44667239404533068), INT64_C( 2360079993551421998), INT64_C( 219045572964647829)), simde_mm256_set_epi64x(INT64_C(-7191173410794127611), INT64_C( 44667239404533068), INT64_C( 2360079993551421998), INT64_C( 219045572964647829)) }, { simde_mm256_set_epi64x(INT64_C( 4128283011258120213), INT64_C( -108361944871310768), INT64_C(-7759705295173963093), INT64_C(-2624902131704570248)), UINT8_C( 75), simde_mm256_set_epi64x(INT64_C(-5879975501041972673), INT64_C( 4967758226257621489), INT64_C( 7728804239548221103), INT64_C( 8515647311939165123)), simde_mm256_set_epi64x(INT64_C( 5879975501041972673), INT64_C( -108361944871310768), INT64_C( 7728804239548221103), INT64_C( 8515647311939165123)) }, { simde_mm256_set_epi64x(INT64_C(-2790757822212524741), INT64_C( 4593245805939314417), INT64_C( 507611866393274703), INT64_C( 3764810505633876098)), UINT8_C(205), simde_mm256_set_epi64x(INT64_C(-8403106197018531632), INT64_C( 4361313410194959167), INT64_C(-3471819223171854464), INT64_C(-1064109494582275885)), simde_mm256_set_epi64x(INT64_C( 8403106197018531632), INT64_C( 4361313410194959167), INT64_C( 507611866393274703), INT64_C( 1064109494582275885)) }, { simde_mm256_set_epi64x(INT64_C(-7284244723237547041), INT64_C(-2704891057065522880), INT64_C( 2088703461327613834), INT64_C(-6691637034812206656)), UINT8_C( 53), simde_mm256_set_epi64x(INT64_C( 7087054034507278743), INT64_C(-1904829140491124246), INT64_C(-8979305972799046958), INT64_C(-9028640504948081950)), simde_mm256_set_epi64x(INT64_C(-7284244723237547041), INT64_C( 1904829140491124246), INT64_C( 2088703461327613834), INT64_C( 9028640504948081950)) }, { simde_mm256_set_epi64x(INT64_C(-6774164690615400180), INT64_C( 169354612478585762), INT64_C(-2560732297798063552), INT64_C(-5440475278226442040)), UINT8_C(226), simde_mm256_set_epi64x(INT64_C( 4140219913643893074), INT64_C( 8233690702404220943), INT64_C(-8119230973072356120), INT64_C( 5725416174942475460)), simde_mm256_set_epi64x(INT64_C(-6774164690615400180), INT64_C( 169354612478585762), INT64_C( 8119230973072356120), INT64_C(-5440475278226442040)) }, { simde_mm256_set_epi64x(INT64_C(-3618167506666580601), INT64_C(-3565111142066299914), INT64_C( 4487949165835396675), INT64_C( 3493476883354981965)), UINT8_C(162), simde_mm256_set_epi64x(INT64_C(-4298605512042857739), INT64_C(-8701289307647237142), INT64_C(-3191212805157153492), INT64_C( 6189308541761658990)), simde_mm256_set_epi64x(INT64_C(-3618167506666580601), INT64_C(-3565111142066299914), INT64_C( 3191212805157153492), INT64_C( 3493476883354981965)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_mask_abs_epi64(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_maskz_abs_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m256i a; simde__m256i r; } test_vec[8] = { { UINT8_C( 51), simde_mm256_set_epi64x(INT64_C(-5558947899438156608), INT64_C(-5328111225624005045), INT64_C(-5266448436194518899), INT64_C(-3023513724998191945)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C( 0), INT64_C( 5266448436194518899), INT64_C( 3023513724998191945)) }, { UINT8_C(192), simde_mm256_set_epi64x(INT64_C( 1820775813457202726), INT64_C( 8407143534854112894), INT64_C( 1164468631328972115), INT64_C( 3847858140267031773)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, { UINT8_C(150), simde_mm256_set_epi64x(INT64_C( 1329935347622458589), INT64_C(-6552239731915331500), INT64_C(-5727672039115289046), INT64_C( 2814104926627850068)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C( 6552239731915331500), INT64_C( 5727672039115289046), INT64_C( 0)) }, { UINT8_C( 62), simde_mm256_set_epi64x(INT64_C(-5313485292314620515), INT64_C(-8562444952160280220), INT64_C(-6743839490299418176), INT64_C( -90311038632227591)), simde_mm256_set_epi64x(INT64_C( 5313485292314620515), INT64_C( 8562444952160280220), INT64_C( 6743839490299418176), INT64_C( 0)) }, { UINT8_C(146), simde_mm256_set_epi64x(INT64_C( 134169414195672899), INT64_C(-3653740064081149177), INT64_C(-3907455768376978765), INT64_C(-2357591052420787867)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C( 0), INT64_C( 3907455768376978765), INT64_C( 0)) }, { UINT8_C( 80), simde_mm256_set_epi64x(INT64_C(-4112624575699262364), INT64_C( -503713654380207790), INT64_C(-1026806857675583448), INT64_C( 3708988589081863948)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, { UINT8_C( 70), simde_mm256_set_epi64x(INT64_C( 5155483861531614212), INT64_C(-1432515770334784350), INT64_C( 5951616937413531378), INT64_C( 3407818380382978160)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C( 1432515770334784350), INT64_C( 5951616937413531378), INT64_C( 0)) }, { UINT8_C(215), simde_mm256_set_epi64x(INT64_C( 1187658108632559622), INT64_C( 3381325771936787939), INT64_C(-4190080085529007037), INT64_C( 1815625056621359018)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C( 3381325771936787939), INT64_C( 4190080085529007037), INT64_C( 1815625056621359018)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_maskz_abs_epi64(test_vec[i].k, test_vec[i].a); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_abs_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi8(INT8_C( -97), INT8_C( 22), INT8_C( -8), INT8_C(-101), INT8_C( -18), INT8_C( 124), INT8_C( -73), INT8_C( -35), INT8_C(-107), INT8_C( 125), INT8_C( -49), INT8_C( -14), INT8_C( -55), INT8_C( -2), INT8_C( 3), INT8_C( -86), INT8_C( -70), INT8_C( -16), INT8_C( -3), INT8_C( -98), INT8_C( -20), INT8_C( -18), INT8_C( -58), INT8_C( -57), INT8_C( 119), INT8_C( 17), INT8_C( -79), INT8_C( 80), INT8_C( 82), INT8_C( 3), INT8_C( -18), INT8_C( -99), INT8_C( 25), INT8_C( 25), INT8_C( 83), INT8_C( 88), INT8_C( 117), INT8_C(-128), INT8_C( 16), INT8_C( -42), INT8_C( 114), INT8_C( -1), INT8_C(-110), INT8_C( 53), INT8_C( 127), INT8_C( -61), INT8_C( -68), INT8_C( 74), INT8_C( 103), INT8_C( 92), INT8_C(-115), INT8_C( -60), INT8_C( -23), INT8_C( 82), INT8_C(-123), INT8_C( 21), INT8_C( -37), INT8_C( 119), INT8_C( -39), INT8_C( -31), INT8_C( 25), INT8_C( -69), INT8_C( -57), INT8_C( 30)), simde_mm512_set_epi8(INT8_C( 97), INT8_C( 22), INT8_C( 8), INT8_C( 101), INT8_C( 18), INT8_C( 124), INT8_C( 73), INT8_C( 35), INT8_C( 107), INT8_C( 125), INT8_C( 49), INT8_C( 14), INT8_C( 55), INT8_C( 2), INT8_C( 3), INT8_C( 86), INT8_C( 70), INT8_C( 16), INT8_C( 3), INT8_C( 98), INT8_C( 20), INT8_C( 18), INT8_C( 58), INT8_C( 57), INT8_C( 119), INT8_C( 17), INT8_C( 79), INT8_C( 80), INT8_C( 82), INT8_C( 3), INT8_C( 18), INT8_C( 99), INT8_C( 25), INT8_C( 25), INT8_C( 83), INT8_C( 88), INT8_C( 117), INT8_C(-128), INT8_C( 16), INT8_C( 42), INT8_C( 114), INT8_C( 1), INT8_C( 110), INT8_C( 53), INT8_C( 127), INT8_C( 61), INT8_C( 68), INT8_C( 74), INT8_C( 103), INT8_C( 92), INT8_C( 115), INT8_C( 60), INT8_C( 23), INT8_C( 82), INT8_C( 123), INT8_C( 21), INT8_C( 37), INT8_C( 119), INT8_C( 39), INT8_C( 31), INT8_C( 25), INT8_C( 69), INT8_C( 57), INT8_C( 30)) }, { simde_mm512_set_epi8(INT8_C( 122), INT8_C( 62), INT8_C( -43), INT8_C( -88), INT8_C( 92), INT8_C(-116), INT8_C( -6), INT8_C( 36), INT8_C( 10), INT8_C( 2), INT8_C( -66), INT8_C( 108), INT8_C( -38), INT8_C( 112), INT8_C( 123), INT8_C( 87), INT8_C( 99), INT8_C( -46), INT8_C( -53), INT8_C( 41), INT8_C(-105), INT8_C( -98), INT8_C( 18), INT8_C( -12), INT8_C( -82), INT8_C( 126), INT8_C( -77), INT8_C( -19), INT8_C( 18), INT8_C( 16), INT8_C( 35), INT8_C( -10), INT8_C( -58), INT8_C( 48), INT8_C(-120), INT8_C( 38), INT8_C( 62), INT8_C( 17), INT8_C( 33), INT8_C(-120), INT8_C( 106), INT8_C( 25), INT8_C( -91), INT8_C( 15), INT8_C( 101), INT8_C( 114), INT8_C( -46), INT8_C( -58), INT8_C( 113), INT8_C( 4), INT8_C( 50), INT8_C( 42), INT8_C( -1), INT8_C( -29), INT8_C( -27), INT8_C( -23), INT8_C( -69), INT8_C( 92), INT8_C( -67), INT8_C( 89), INT8_C( -10), INT8_C( -42), INT8_C( 79), INT8_C( 112)), simde_mm512_set_epi8(INT8_C( 122), INT8_C( 62), INT8_C( 43), INT8_C( 88), INT8_C( 92), INT8_C( 116), INT8_C( 6), INT8_C( 36), INT8_C( 10), INT8_C( 2), INT8_C( 66), INT8_C( 108), INT8_C( 38), INT8_C( 112), INT8_C( 123), INT8_C( 87), INT8_C( 99), INT8_C( 46), INT8_C( 53), INT8_C( 41), INT8_C( 105), INT8_C( 98), INT8_C( 18), INT8_C( 12), INT8_C( 82), INT8_C( 126), INT8_C( 77), INT8_C( 19), INT8_C( 18), INT8_C( 16), INT8_C( 35), INT8_C( 10), INT8_C( 58), INT8_C( 48), INT8_C( 120), INT8_C( 38), INT8_C( 62), INT8_C( 17), INT8_C( 33), INT8_C( 120), INT8_C( 106), INT8_C( 25), INT8_C( 91), INT8_C( 15), INT8_C( 101), INT8_C( 114), INT8_C( 46), INT8_C( 58), INT8_C( 113), INT8_C( 4), INT8_C( 50), INT8_C( 42), INT8_C( 1), INT8_C( 29), INT8_C( 27), INT8_C( 23), INT8_C( 69), INT8_C( 92), INT8_C( 67), INT8_C( 89), INT8_C( 10), INT8_C( 42), INT8_C( 79), INT8_C( 112)) }, { simde_mm512_set_epi8(INT8_C(-115), INT8_C( 121), INT8_C( -28), INT8_C( -32), INT8_C( 39), INT8_C( 97), INT8_C( 104), INT8_C( -44), INT8_C( 120), INT8_C( -11), INT8_C( -74), INT8_C( -63), INT8_C( -24), INT8_C( -35), INT8_C(-108), INT8_C( -9), INT8_C( 30), INT8_C( -94), INT8_C( 96), INT8_C(-119), INT8_C( -14), INT8_C( -94), INT8_C( 34), INT8_C(-111), INT8_C( 86), INT8_C( -6), INT8_C(-116), INT8_C( 56), INT8_C( -2), INT8_C( -8), INT8_C( -66), INT8_C( 73), INT8_C(-111), INT8_C( 20), INT8_C( 114), INT8_C( 16), INT8_C( 71), INT8_C( 17), INT8_C( -13), INT8_C(-101), INT8_C( 32), INT8_C( 52), INT8_C( -6), INT8_C( -16), INT8_C( 78), INT8_C( 58), INT8_C( 14), INT8_C( -85), INT8_C( -58), INT8_C( 120), INT8_C( 102), INT8_C(-125), INT8_C( 73), INT8_C(-121), INT8_C(-118), INT8_C( -77), INT8_C( 84), INT8_C( 62), INT8_C( 100), INT8_C(-122), INT8_C( -17), INT8_C( 81), INT8_C( 105), INT8_C( -71)), simde_mm512_set_epi8(INT8_C( 115), INT8_C( 121), INT8_C( 28), INT8_C( 32), INT8_C( 39), INT8_C( 97), INT8_C( 104), INT8_C( 44), INT8_C( 120), INT8_C( 11), INT8_C( 74), INT8_C( 63), INT8_C( 24), INT8_C( 35), INT8_C( 108), INT8_C( 9), INT8_C( 30), INT8_C( 94), INT8_C( 96), INT8_C( 119), INT8_C( 14), INT8_C( 94), INT8_C( 34), INT8_C( 111), INT8_C( 86), INT8_C( 6), INT8_C( 116), INT8_C( 56), INT8_C( 2), INT8_C( 8), INT8_C( 66), INT8_C( 73), INT8_C( 111), INT8_C( 20), INT8_C( 114), INT8_C( 16), INT8_C( 71), INT8_C( 17), INT8_C( 13), INT8_C( 101), INT8_C( 32), INT8_C( 52), INT8_C( 6), INT8_C( 16), INT8_C( 78), INT8_C( 58), INT8_C( 14), INT8_C( 85), INT8_C( 58), INT8_C( 120), INT8_C( 102), INT8_C( 125), INT8_C( 73), INT8_C( 121), INT8_C( 118), INT8_C( 77), INT8_C( 84), INT8_C( 62), INT8_C( 100), INT8_C( 122), INT8_C( 17), INT8_C( 81), INT8_C( 105), INT8_C( 71)) }, { simde_mm512_set_epi8(INT8_C( 104), INT8_C( 89), INT8_C( 23), INT8_C( -69), INT8_C( -81), INT8_C( -18), INT8_C(-115), INT8_C( 45), INT8_C( 111), INT8_C( 97), INT8_C( -96), INT8_C( -52), INT8_C( 117), INT8_C( -89), INT8_C( 83), INT8_C( 55), INT8_C( -79), INT8_C( -41), INT8_C( 65), INT8_C( -18), INT8_C( -14), INT8_C( -36), INT8_C( -5), INT8_C(-118), INT8_C( 102), INT8_C( 66), INT8_C( 6), INT8_C( 63), INT8_C( 2), INT8_C( 71), INT8_C( -79), INT8_C( 103), INT8_C( 99), INT8_C( 75), INT8_C( 18), INT8_C(-125), INT8_C( 89), INT8_C( 97), INT8_C( -12), INT8_C( -68), INT8_C( -29), INT8_C( 64), INT8_C( 90), INT8_C( 106), INT8_C( -66), INT8_C( 46), INT8_C( -67), INT8_C(-122), INT8_C( 35), INT8_C( 89), INT8_C(-123), INT8_C( 49), INT8_C( 79), INT8_C(-111), INT8_C( 102), INT8_C( 13), INT8_C( 18), INT8_C( 7), INT8_C( 11), INT8_C( -54), INT8_C( 79), INT8_C( -18), INT8_C( 80), INT8_C( 58)), simde_mm512_set_epi8(INT8_C( 104), INT8_C( 89), INT8_C( 23), INT8_C( 69), INT8_C( 81), INT8_C( 18), INT8_C( 115), INT8_C( 45), INT8_C( 111), INT8_C( 97), INT8_C( 96), INT8_C( 52), INT8_C( 117), INT8_C( 89), INT8_C( 83), INT8_C( 55), INT8_C( 79), INT8_C( 41), INT8_C( 65), INT8_C( 18), INT8_C( 14), INT8_C( 36), INT8_C( 5), INT8_C( 118), INT8_C( 102), INT8_C( 66), INT8_C( 6), INT8_C( 63), INT8_C( 2), INT8_C( 71), INT8_C( 79), INT8_C( 103), INT8_C( 99), INT8_C( 75), INT8_C( 18), INT8_C( 125), INT8_C( 89), INT8_C( 97), INT8_C( 12), INT8_C( 68), INT8_C( 29), INT8_C( 64), INT8_C( 90), INT8_C( 106), INT8_C( 66), INT8_C( 46), INT8_C( 67), INT8_C( 122), INT8_C( 35), INT8_C( 89), INT8_C( 123), INT8_C( 49), INT8_C( 79), INT8_C( 111), INT8_C( 102), INT8_C( 13), INT8_C( 18), INT8_C( 7), INT8_C( 11), INT8_C( 54), INT8_C( 79), INT8_C( 18), INT8_C( 80), INT8_C( 58)) }, { simde_mm512_set_epi8(INT8_C( -69), INT8_C( -18), INT8_C( -24), INT8_C( 31), INT8_C(-118), INT8_C( 28), INT8_C( 111), INT8_C( 9), INT8_C( -62), INT8_C( 2), INT8_C( 24), INT8_C( 57), INT8_C( 60), INT8_C( 85), INT8_C(-124), INT8_C( 4), INT8_C( -47), INT8_C( -2), INT8_C( -42), INT8_C( 4), INT8_C(-111), INT8_C( 1), INT8_C( -7), INT8_C( 49), INT8_C( 87), INT8_C(-117), INT8_C( 70), INT8_C( -68), INT8_C( 92), INT8_C( 73), INT8_C( 108), INT8_C( 6), INT8_C( 108), INT8_C( -36), INT8_C( 61), INT8_C( 29), INT8_C( 87), INT8_C( 64), INT8_C(-117), INT8_C( 17), INT8_C( -12), INT8_C( 46), INT8_C( -75), INT8_C( 42), INT8_C( 80), INT8_C( -38), INT8_C( 85), INT8_C(-124), INT8_C(-126), INT8_C( -12), INT8_C( 41), INT8_C( 12), INT8_C( -57), INT8_C( -47), INT8_C( 80), INT8_C( -60), INT8_C( 24), INT8_C( 89), INT8_C( -45), INT8_C(-122), INT8_C( -52), INT8_C( 21), INT8_C( 54), INT8_C( 124)), simde_mm512_set_epi8(INT8_C( 69), INT8_C( 18), INT8_C( 24), INT8_C( 31), INT8_C( 118), INT8_C( 28), INT8_C( 111), INT8_C( 9), INT8_C( 62), INT8_C( 2), INT8_C( 24), INT8_C( 57), INT8_C( 60), INT8_C( 85), INT8_C( 124), INT8_C( 4), INT8_C( 47), INT8_C( 2), INT8_C( 42), INT8_C( 4), INT8_C( 111), INT8_C( 1), INT8_C( 7), INT8_C( 49), INT8_C( 87), INT8_C( 117), INT8_C( 70), INT8_C( 68), INT8_C( 92), INT8_C( 73), INT8_C( 108), INT8_C( 6), INT8_C( 108), INT8_C( 36), INT8_C( 61), INT8_C( 29), INT8_C( 87), INT8_C( 64), INT8_C( 117), INT8_C( 17), INT8_C( 12), INT8_C( 46), INT8_C( 75), INT8_C( 42), INT8_C( 80), INT8_C( 38), INT8_C( 85), INT8_C( 124), INT8_C( 126), INT8_C( 12), INT8_C( 41), INT8_C( 12), INT8_C( 57), INT8_C( 47), INT8_C( 80), INT8_C( 60), INT8_C( 24), INT8_C( 89), INT8_C( 45), INT8_C( 122), INT8_C( 52), INT8_C( 21), INT8_C( 54), INT8_C( 124)) }, { simde_mm512_set_epi8(INT8_C( 23), INT8_C( -45), INT8_C( -87), INT8_C(-128), INT8_C( 79), INT8_C( 64), INT8_C( -72), INT8_C( 109), INT8_C( -1), INT8_C( 120), INT8_C( -18), INT8_C(-122), INT8_C( -56), INT8_C( 0), INT8_C( 100), INT8_C( 60), INT8_C( -78), INT8_C( -63), INT8_C( 26), INT8_C( 35), INT8_C( -65), INT8_C( 72), INT8_C( 38), INT8_C( -77), INT8_C(-123), INT8_C( 106), INT8_C( 7), INT8_C( 83), INT8_C( 87), INT8_C( 105), INT8_C( -86), INT8_C( 65), INT8_C( -41), INT8_C( 111), INT8_C( -74), INT8_C( -72), INT8_C( 30), INT8_C( -92), INT8_C( 62), INT8_C( -69), INT8_C( -56), INT8_C( 120), INT8_C( 86), INT8_C( 20), INT8_C( -82), INT8_C( 72), INT8_C( 45), INT8_C( 66), INT8_C( -71), INT8_C(-128), INT8_C( -35), INT8_C( 10), INT8_C( -92), INT8_C( -41), INT8_C( 102), INT8_C( -89), INT8_C( 47), INT8_C( 44), INT8_C( 12), INT8_C( 18), INT8_C( -29), INT8_C( 113), INT8_C( -21), INT8_C( 122)), simde_mm512_set_epi8(INT8_C( 23), INT8_C( 45), INT8_C( 87), INT8_C(-128), INT8_C( 79), INT8_C( 64), INT8_C( 72), INT8_C( 109), INT8_C( 1), INT8_C( 120), INT8_C( 18), INT8_C( 122), INT8_C( 56), INT8_C( 0), INT8_C( 100), INT8_C( 60), INT8_C( 78), INT8_C( 63), INT8_C( 26), INT8_C( 35), INT8_C( 65), INT8_C( 72), INT8_C( 38), INT8_C( 77), INT8_C( 123), INT8_C( 106), INT8_C( 7), INT8_C( 83), INT8_C( 87), INT8_C( 105), INT8_C( 86), INT8_C( 65), INT8_C( 41), INT8_C( 111), INT8_C( 74), INT8_C( 72), INT8_C( 30), INT8_C( 92), INT8_C( 62), INT8_C( 69), INT8_C( 56), INT8_C( 120), INT8_C( 86), INT8_C( 20), INT8_C( 82), INT8_C( 72), INT8_C( 45), INT8_C( 66), INT8_C( 71), INT8_C(-128), INT8_C( 35), INT8_C( 10), INT8_C( 92), INT8_C( 41), INT8_C( 102), INT8_C( 89), INT8_C( 47), INT8_C( 44), INT8_C( 12), INT8_C( 18), INT8_C( 29), INT8_C( 113), INT8_C( 21), INT8_C( 122)) }, { simde_mm512_set_epi8(INT8_C( 6), INT8_C( -58), INT8_C( -97), INT8_C( 99), INT8_C( 24), INT8_C( 108), INT8_C( -42), INT8_C( 116), INT8_C( -51), INT8_C( 37), INT8_C( 17), INT8_C( 87), INT8_C( 119), INT8_C( 22), INT8_C( 38), INT8_C( -86), INT8_C( 70), INT8_C( -19), INT8_C( 116), INT8_C( 4), INT8_C( -77), INT8_C( -68), INT8_C( 19), INT8_C( -39), INT8_C( -4), INT8_C(-120), INT8_C( 84), INT8_C( -27), INT8_C( -68), INT8_C( 120), INT8_C(-117), INT8_C( -33), INT8_C( 3), INT8_C( 109), INT8_C( 85), INT8_C( -14), INT8_C( 121), INT8_C( 30), INT8_C( 108), INT8_C( -1), INT8_C( 114), INT8_C( -61), INT8_C( 46), INT8_C( 93), INT8_C( 48), INT8_C( -57), INT8_C( -97), INT8_C(-100), INT8_C( 84), INT8_C( 0), INT8_C( -87), INT8_C( -47), INT8_C( 85), INT8_C( 2), INT8_C( 125), INT8_C( 35), INT8_C( -12), INT8_C( -7), INT8_C( 3), INT8_C( 4), INT8_C( 86), INT8_C( 111), INT8_C( -66), INT8_C( 29)), simde_mm512_set_epi8(INT8_C( 6), INT8_C( 58), INT8_C( 97), INT8_C( 99), INT8_C( 24), INT8_C( 108), INT8_C( 42), INT8_C( 116), INT8_C( 51), INT8_C( 37), INT8_C( 17), INT8_C( 87), INT8_C( 119), INT8_C( 22), INT8_C( 38), INT8_C( 86), INT8_C( 70), INT8_C( 19), INT8_C( 116), INT8_C( 4), INT8_C( 77), INT8_C( 68), INT8_C( 19), INT8_C( 39), INT8_C( 4), INT8_C( 120), INT8_C( 84), INT8_C( 27), INT8_C( 68), INT8_C( 120), INT8_C( 117), INT8_C( 33), INT8_C( 3), INT8_C( 109), INT8_C( 85), INT8_C( 14), INT8_C( 121), INT8_C( 30), INT8_C( 108), INT8_C( 1), INT8_C( 114), INT8_C( 61), INT8_C( 46), INT8_C( 93), INT8_C( 48), INT8_C( 57), INT8_C( 97), INT8_C( 100), INT8_C( 84), INT8_C( 0), INT8_C( 87), INT8_C( 47), INT8_C( 85), INT8_C( 2), INT8_C( 125), INT8_C( 35), INT8_C( 12), INT8_C( 7), INT8_C( 3), INT8_C( 4), INT8_C( 86), INT8_C( 111), INT8_C( 66), INT8_C( 29)) }, { simde_mm512_set_epi8(INT8_C( 48), INT8_C( 61), INT8_C( 127), INT8_C( 76), INT8_C( -86), INT8_C( 122), INT8_C( -96), INT8_C(-118), INT8_C( -38), INT8_C( -8), INT8_C( 56), INT8_C(-108), INT8_C( 1), INT8_C( 8), INT8_C( 22), INT8_C(-116), INT8_C( -52), INT8_C( 92), INT8_C( 68), INT8_C( 112), INT8_C( -94), INT8_C( -84), INT8_C( 98), INT8_C( -49), INT8_C( -43), INT8_C( 105), INT8_C( 71), INT8_C( 34), INT8_C(-126), INT8_C( -5), INT8_C( 5), INT8_C( -61), INT8_C(-125), INT8_C( -31), INT8_C(-128), INT8_C( -41), INT8_C( 82), INT8_C( 17), INT8_C( -47), INT8_C(-121), INT8_C( 0), INT8_C( 118), INT8_C( -18), INT8_C( -96), INT8_C( 45), INT8_C( 28), INT8_C( 105), INT8_C(-104), INT8_C( -15), INT8_C( 24), INT8_C( 94), INT8_C( 103), INT8_C( -54), INT8_C(-112), INT8_C( 15), INT8_C( 123), INT8_C( -27), INT8_C( 121), INT8_C(-118), INT8_C(-112), INT8_C( -70), INT8_C( 97), INT8_C( 58), INT8_C( -42)), simde_mm512_set_epi8(INT8_C( 48), INT8_C( 61), INT8_C( 127), INT8_C( 76), INT8_C( 86), INT8_C( 122), INT8_C( 96), INT8_C( 118), INT8_C( 38), INT8_C( 8), INT8_C( 56), INT8_C( 108), INT8_C( 1), INT8_C( 8), INT8_C( 22), INT8_C( 116), INT8_C( 52), INT8_C( 92), INT8_C( 68), INT8_C( 112), INT8_C( 94), INT8_C( 84), INT8_C( 98), INT8_C( 49), INT8_C( 43), INT8_C( 105), INT8_C( 71), INT8_C( 34), INT8_C( 126), INT8_C( 5), INT8_C( 5), INT8_C( 61), INT8_C( 125), INT8_C( 31), INT8_C(-128), INT8_C( 41), INT8_C( 82), INT8_C( 17), INT8_C( 47), INT8_C( 121), INT8_C( 0), INT8_C( 118), INT8_C( 18), INT8_C( 96), INT8_C( 45), INT8_C( 28), INT8_C( 105), INT8_C( 104), INT8_C( 15), INT8_C( 24), INT8_C( 94), INT8_C( 103), INT8_C( 54), INT8_C( 112), INT8_C( 15), INT8_C( 123), INT8_C( 27), INT8_C( 121), INT8_C( 118), INT8_C( 112), INT8_C( 70), INT8_C( 97), INT8_C( 58), INT8_C( 42)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_abs_epi8(test_vec[i].a); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_abs_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask64 k; simde__m512i a; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi8(INT8_C( 117), INT8_C(-104), INT8_C( -35), INT8_C( -40), INT8_C( -1), INT8_C( 43), INT8_C( 10), INT8_C( -45), INT8_C( -42), INT8_C( 80), INT8_C( -69), INT8_C( -15), INT8_C( -14), INT8_C(-122), INT8_C( 60), INT8_C( 93), INT8_C( 23), INT8_C( 122), INT8_C( 10), INT8_C( 108), INT8_C( -24), INT8_C( -65), INT8_C( -39), INT8_C( -98), INT8_C( -57), INT8_C( -6), INT8_C( 81), INT8_C( -45), INT8_C( -27), INT8_C( 28), INT8_C( -85), INT8_C( 15), INT8_C(-118), INT8_C( 52), INT8_C( 10), INT8_C(-116), INT8_C( 26), INT8_C( -43), INT8_C( -38), INT8_C( -27), INT8_C( 66), INT8_C( -52), INT8_C( 5), INT8_C( -1), INT8_C( -28), INT8_C( 3), INT8_C( 123), INT8_C(-116), INT8_C( -34), INT8_C( -32), INT8_C( 98), INT8_C( 103), INT8_C( -19), INT8_C(-118), INT8_C( -77), INT8_C( -32), INT8_C( 60), INT8_C( -80), INT8_C( 22), INT8_C( -26), INT8_C( 60), INT8_C( -12), INT8_C( -65), INT8_C( 88)), UINT64_C( 2117573942), simde_mm512_set_epi8(INT8_C( 32), INT8_C( 22), INT8_C( 88), INT8_C( -34), INT8_C( 12), INT8_C( 90), INT8_C(-101), INT8_C( -4), INT8_C( -14), INT8_C( 42), INT8_C( -87), INT8_C( 105), INT8_C( 22), INT8_C( 34), INT8_C( 113), INT8_C( -72), INT8_C( -40), INT8_C( -70), INT8_C( -24), INT8_C( -97), INT8_C( -68), INT8_C( -6), INT8_C( 98), INT8_C(-124), INT8_C( -35), INT8_C( 11), INT8_C(-118), INT8_C( -49), INT8_C( -42), INT8_C( 24), INT8_C( -34), INT8_C( 73), INT8_C( -3), INT8_C( -72), INT8_C(-103), INT8_C( 26), INT8_C( -36), INT8_C(-109), INT8_C( 37), INT8_C( 50), INT8_C( 26), INT8_C( 78), INT8_C( 33), INT8_C( 67), INT8_C( -8), INT8_C( -66), INT8_C( 29), INT8_C( 31), INT8_C( 34), INT8_C( 40), INT8_C( -67), INT8_C( 86), INT8_C( 38), INT8_C(-128), INT8_C(-106), INT8_C( -15), INT8_C( 100), INT8_C( 53), INT8_C( 42), INT8_C( 55), INT8_C( 87), INT8_C( -15), INT8_C( -5), INT8_C( -85)), simde_mm512_set_epi8(INT8_C( 117), INT8_C(-104), INT8_C( -35), INT8_C( -40), INT8_C( -1), INT8_C( 43), INT8_C( 10), INT8_C( -45), INT8_C( -42), INT8_C( 80), INT8_C( -69), INT8_C( -15), INT8_C( -14), INT8_C(-122), INT8_C( 60), INT8_C( 93), INT8_C( 23), INT8_C( 122), INT8_C( 10), INT8_C( 108), INT8_C( -24), INT8_C( -65), INT8_C( -39), INT8_C( -98), INT8_C( -57), INT8_C( -6), INT8_C( 81), INT8_C( -45), INT8_C( -27), INT8_C( 28), INT8_C( -85), INT8_C( 15), INT8_C(-118), INT8_C( 72), INT8_C( 103), INT8_C( 26), INT8_C( 36), INT8_C( 109), INT8_C( 37), INT8_C( -27), INT8_C( 66), INT8_C( -52), INT8_C( 33), INT8_C( 67), INT8_C( -28), INT8_C( 66), INT8_C( 29), INT8_C( 31), INT8_C( 34), INT8_C( -32), INT8_C( 98), INT8_C( 86), INT8_C( 38), INT8_C(-128), INT8_C( -77), INT8_C( 15), INT8_C( 60), INT8_C( -80), INT8_C( 42), INT8_C( 55), INT8_C( 60), INT8_C( 15), INT8_C( 5), INT8_C( 88)) }, { simde_mm512_set_epi8(INT8_C( -27), INT8_C(-108), INT8_C(-117), INT8_C( -88), INT8_C(-107), INT8_C( 53), INT8_C( -16), INT8_C( -1), INT8_C( -92), INT8_C(-119), INT8_C( 17), INT8_C(-122), INT8_C( 22), INT8_C( -13), INT8_C( 7), INT8_C(-126), INT8_C( -24), INT8_C( -51), INT8_C( -29), INT8_C(-114), INT8_C( 100), INT8_C( -53), INT8_C( 0), INT8_C(-112), INT8_C( -80), INT8_C( 89), INT8_C( 91), INT8_C( 1), INT8_C( 102), INT8_C( -2), INT8_C( -67), INT8_C( -88), INT8_C( -5), INT8_C( -85), INT8_C( 24), INT8_C( 13), INT8_C( 67), INT8_C( 49), INT8_C( 20), INT8_C( -71), INT8_C( -24), INT8_C( 19), INT8_C( -18), INT8_C( 58), INT8_C( 109), INT8_C(-116), INT8_C( 95), INT8_C( 71), INT8_C( 47), INT8_C( 118), INT8_C( -15), INT8_C( -31), INT8_C( -70), INT8_C( -81), INT8_C( 45), INT8_C( 88), INT8_C( -92), INT8_C( 95), INT8_C( -3), INT8_C( -29), INT8_C( 20), INT8_C( -86), INT8_C( -5), INT8_C( 57)), UINT64_C( 3796566764), simde_mm512_set_epi8(INT8_C( -74), INT8_C( 1), INT8_C( -40), INT8_C( 93), INT8_C( 28), INT8_C( 66), INT8_C( 14), INT8_C( 119), INT8_C( -8), INT8_C(-103), INT8_C( 124), INT8_C( -64), INT8_C( -5), INT8_C( 73), INT8_C( 83), INT8_C(-107), INT8_C( -64), INT8_C( -31), INT8_C( 11), INT8_C( 45), INT8_C( -14), INT8_C(-110), INT8_C( 100), INT8_C( -6), INT8_C( -50), INT8_C(-123), INT8_C( -94), INT8_C( 12), INT8_C( -29), INT8_C(-100), INT8_C( 97), INT8_C(-115), INT8_C( 103), INT8_C( -79), INT8_C( 102), INT8_C( -6), INT8_C( -20), INT8_C(-105), INT8_C( -6), INT8_C( 69), INT8_C( 19), INT8_C( 102), INT8_C(-126), INT8_C( -17), INT8_C( 26), INT8_C(-105), INT8_C( 91), INT8_C( -38), INT8_C( 106), INT8_C( 8), INT8_C( 85), INT8_C( -66), INT8_C( 40), INT8_C( -49), INT8_C( 10), INT8_C( 15), INT8_C( 30), INT8_C( 97), INT8_C( -48), INT8_C( 26), INT8_C( 77), INT8_C( 104), INT8_C(-118), INT8_C( 49)), simde_mm512_set_epi8(INT8_C( -27), INT8_C(-108), INT8_C(-117), INT8_C( -88), INT8_C(-107), INT8_C( 53), INT8_C( -16), INT8_C( -1), INT8_C( -92), INT8_C(-119), INT8_C( 17), INT8_C(-122), INT8_C( 22), INT8_C( -13), INT8_C( 7), INT8_C(-126), INT8_C( -24), INT8_C( -51), INT8_C( -29), INT8_C(-114), INT8_C( 100), INT8_C( -53), INT8_C( 0), INT8_C(-112), INT8_C( -80), INT8_C( 89), INT8_C( 91), INT8_C( 1), INT8_C( 102), INT8_C( -2), INT8_C( -67), INT8_C( -88), INT8_C( 103), INT8_C( 79), INT8_C( 102), INT8_C( 13), INT8_C( 67), INT8_C( 49), INT8_C( 6), INT8_C( -71), INT8_C( -24), INT8_C( 102), INT8_C( -18), INT8_C( 58), INT8_C( 26), INT8_C(-116), INT8_C( 91), INT8_C( 38), INT8_C( 47), INT8_C( 118), INT8_C( -15), INT8_C( -31), INT8_C( -70), INT8_C( -81), INT8_C( 10), INT8_C( 88), INT8_C( 30), INT8_C( 97), INT8_C( 48), INT8_C( -29), INT8_C( 77), INT8_C( 104), INT8_C( -5), INT8_C( 57)) }, { simde_mm512_set_epi8(INT8_C( 64), INT8_C( 45), INT8_C( -70), INT8_C( 94), INT8_C( 127), INT8_C( -70), INT8_C( 127), INT8_C( -78), INT8_C( -58), INT8_C( 92), INT8_C( -25), INT8_C( -8), INT8_C( 21), INT8_C( 89), INT8_C( 8), INT8_C( 1), INT8_C( 85), INT8_C( 5), INT8_C( 111), INT8_C( 109), INT8_C( 6), INT8_C( -27), INT8_C( 18), INT8_C( 62), INT8_C( -7), INT8_C( 126), INT8_C( -22), INT8_C( -36), INT8_C( -10), INT8_C( -1), INT8_C( 1), INT8_C( 115), INT8_C( 87), INT8_C( 93), INT8_C( -71), INT8_C(-100), INT8_C( -92), INT8_C( 103), INT8_C( -19), INT8_C( -4), INT8_C( 126), INT8_C( 112), INT8_C( -72), INT8_C( 45), INT8_C( 61), INT8_C( -10), INT8_C( 68), INT8_C( -93), INT8_C( 5), INT8_C( 127), INT8_C( 109), INT8_C( -62), INT8_C( -89), INT8_C(-117), INT8_C(-126), INT8_C( 52), INT8_C( -8), INT8_C( -92), INT8_C( -23), INT8_C( -48), INT8_C( 104), INT8_C(-120), INT8_C( -2), INT8_C(-108)), UINT64_C( 2131497860), simde_mm512_set_epi8(INT8_C( 85), INT8_C( 118), INT8_C( 120), INT8_C( -48), INT8_C( 112), INT8_C( 80), INT8_C( -83), INT8_C( 55), INT8_C( 10), INT8_C(-104), INT8_C( -7), INT8_C(-106), INT8_C( -6), INT8_C( 9), INT8_C( -88), INT8_C( 52), INT8_C( 69), INT8_C( 91), INT8_C(-122), INT8_C( 83), INT8_C( 54), INT8_C( -42), INT8_C( 9), INT8_C( 100), INT8_C( 84), INT8_C( 66), INT8_C( 99), INT8_C( -57), INT8_C( 20), INT8_C( -56), INT8_C( -41), INT8_C( 34), INT8_C( 96), INT8_C( 125), INT8_C( 40), INT8_C( -10), INT8_C( 37), INT8_C( -54), INT8_C( -41), INT8_C( 111), INT8_C( -17), INT8_C( 73), INT8_C( 10), INT8_C( 78), INT8_C( -64), INT8_C( 57), INT8_C( 95), INT8_C( 52), INT8_C(-123), INT8_C( 102), INT8_C( -91), INT8_C( -25), INT8_C( -74), INT8_C( 23), INT8_C(-127), INT8_C( -43), INT8_C( 123), INT8_C( -21), INT8_C( -69), INT8_C( 72), INT8_C( -86), INT8_C( 39), INT8_C( -52), INT8_C( 88)), simde_mm512_set_epi8(INT8_C( 64), INT8_C( 45), INT8_C( -70), INT8_C( 94), INT8_C( 127), INT8_C( -70), INT8_C( 127), INT8_C( -78), INT8_C( -58), INT8_C( 92), INT8_C( -25), INT8_C( -8), INT8_C( 21), INT8_C( 89), INT8_C( 8), INT8_C( 1), INT8_C( 85), INT8_C( 5), INT8_C( 111), INT8_C( 109), INT8_C( 6), INT8_C( -27), INT8_C( 18), INT8_C( 62), INT8_C( -7), INT8_C( 126), INT8_C( -22), INT8_C( -36), INT8_C( -10), INT8_C( -1), INT8_C( 1), INT8_C( 115), INT8_C( 87), INT8_C( 125), INT8_C( 40), INT8_C( 10), INT8_C( 37), INT8_C( 54), INT8_C( 41), INT8_C( 111), INT8_C( 126), INT8_C( 112), INT8_C( -72), INT8_C( 45), INT8_C( 64), INT8_C( 57), INT8_C( 68), INT8_C( -93), INT8_C( 5), INT8_C( 127), INT8_C( 109), INT8_C( 25), INT8_C( -89), INT8_C(-117), INT8_C( 127), INT8_C( 43), INT8_C( 123), INT8_C( -92), INT8_C( -23), INT8_C( -48), INT8_C( 104), INT8_C( 39), INT8_C( -2), INT8_C(-108)) }, { simde_mm512_set_epi8(INT8_C( -39), INT8_C(-117), INT8_C( -99), INT8_C( -55), INT8_C( 3), INT8_C( -15), INT8_C( 113), INT8_C( -3), INT8_C( -35), INT8_C( 100), INT8_C( -74), INT8_C(-107), INT8_C( 44), INT8_C( -58), INT8_C( 20), INT8_C( 23), INT8_C( 105), INT8_C( -68), INT8_C( 118), INT8_C( -13), INT8_C( -81), INT8_C( 41), INT8_C( -73), INT8_C(-115), INT8_C(-111), INT8_C( 21), INT8_C( 99), INT8_C( 117), INT8_C( -14), INT8_C(-112), INT8_C( 71), INT8_C( 21), INT8_C(-114), INT8_C( -75), INT8_C( 66), INT8_C(-119), INT8_C( -62), INT8_C( -30), INT8_C( 86), INT8_C(-128), INT8_C( 109), INT8_C( 15), INT8_C( -69), INT8_C( 22), INT8_C( -13), INT8_C( 38), INT8_C( -93), INT8_C( -41), INT8_C( 96), INT8_C( 79), INT8_C( -24), INT8_C( -40), INT8_C( 90), INT8_C( 31), INT8_C( -35), INT8_C( 22), INT8_C(-112), INT8_C( -37), INT8_C( 29), INT8_C( 29), INT8_C( 7), INT8_C( 8), INT8_C( 106), INT8_C( -46)), UINT64_C( 127712386), simde_mm512_set_epi8(INT8_C( 68), INT8_C( 120), INT8_C( -69), INT8_C( -50), INT8_C( 102), INT8_C(-123), INT8_C( 95), INT8_C( 110), INT8_C( 90), INT8_C( -66), INT8_C( -52), INT8_C( 44), INT8_C(-111), INT8_C( 10), INT8_C(-111), INT8_C( 20), INT8_C( -11), INT8_C(-128), INT8_C( -17), INT8_C( -40), INT8_C( -41), INT8_C( 0), INT8_C( -15), INT8_C( 105), INT8_C( 81), INT8_C( 3), INT8_C( 23), INT8_C( 107), INT8_C( -18), INT8_C( 80), INT8_C(-106), INT8_C( 52), INT8_C( 80), INT8_C( 120), INT8_C( 83), INT8_C(-117), INT8_C( 84), INT8_C( -78), INT8_C( 47), INT8_C( -33), INT8_C( 103), INT8_C( 66), INT8_C( 79), INT8_C( 53), INT8_C( -45), INT8_C( 20), INT8_C( 111), INT8_C( -59), INT8_C( -18), INT8_C( 30), INT8_C( 70), INT8_C( -25), INT8_C( -57), INT8_C( 18), INT8_C( -4), INT8_C( 101), INT8_C( 75), INT8_C( 12), INT8_C( 85), INT8_C( 93), INT8_C( -79), INT8_C( -13), INT8_C( 43), INT8_C( 45)), simde_mm512_set_epi8(INT8_C( -39), INT8_C(-117), INT8_C( -99), INT8_C( -55), INT8_C( 3), INT8_C( -15), INT8_C( 113), INT8_C( -3), INT8_C( -35), INT8_C( 100), INT8_C( -74), INT8_C(-107), INT8_C( 44), INT8_C( -58), INT8_C( 20), INT8_C( 23), INT8_C( 105), INT8_C( -68), INT8_C( 118), INT8_C( -13), INT8_C( -81), INT8_C( 41), INT8_C( -73), INT8_C(-115), INT8_C(-111), INT8_C( 21), INT8_C( 99), INT8_C( 117), INT8_C( -14), INT8_C(-112), INT8_C( 71), INT8_C( 21), INT8_C(-114), INT8_C( -75), INT8_C( 66), INT8_C(-119), INT8_C( -62), INT8_C( 78), INT8_C( 47), INT8_C( 33), INT8_C( 103), INT8_C( 15), INT8_C( -69), INT8_C( 53), INT8_C( 45), INT8_C( 20), INT8_C( -93), INT8_C( -41), INT8_C( 18), INT8_C( 79), INT8_C( 70), INT8_C( 25), INT8_C( 57), INT8_C( 18), INT8_C( -35), INT8_C( 22), INT8_C( 75), INT8_C( -37), INT8_C( 29), INT8_C( 29), INT8_C( 7), INT8_C( 8), INT8_C( 43), INT8_C( -46)) }, { simde_mm512_set_epi8(INT8_C( -81), INT8_C( 98), INT8_C( 23), INT8_C(-108), INT8_C(-126), INT8_C( 95), INT8_C( -44), INT8_C( -56), INT8_C( 42), INT8_C( 32), INT8_C( -91), INT8_C(-126), INT8_C( 119), INT8_C( 88), INT8_C( 110), INT8_C( 93), INT8_C( 75), INT8_C( -49), INT8_C( -63), INT8_C( -42), INT8_C( 54), INT8_C( -71), INT8_C( 87), INT8_C( -1), INT8_C( -25), INT8_C( -60), INT8_C( 102), INT8_C( -98), INT8_C( -95), INT8_C( -34), INT8_C( -46), INT8_C( 94), INT8_C( 118), INT8_C( 127), INT8_C( -62), INT8_C( -70), INT8_C( 80), INT8_C( 125), INT8_C( -12), INT8_C( 33), INT8_C( 110), INT8_C( -9), INT8_C( -29), INT8_C(-115), INT8_C(-117), INT8_C( 52), INT8_C(-126), INT8_C( -15), INT8_C(-118), INT8_C(-123), INT8_C( -16), INT8_C( 72), INT8_C( 84), INT8_C( 54), INT8_C( 76), INT8_C( -48), INT8_C( -79), INT8_C( 100), INT8_C( -58), INT8_C( 30), INT8_C( 35), INT8_C( 68), INT8_C( -40), INT8_C( 8)), UINT64_C( 522030218), simde_mm512_set_epi8(INT8_C( -1), INT8_C( -56), INT8_C( -80), INT8_C( 17), INT8_C( 127), INT8_C( 83), INT8_C( -9), INT8_C( 0), INT8_C( -1), INT8_C( 117), INT8_C( -15), INT8_C( 26), INT8_C( 30), INT8_C( -32), INT8_C( 47), INT8_C( 99), INT8_C( -59), INT8_C( -81), INT8_C( -58), INT8_C( 71), INT8_C(-119), INT8_C( -65), INT8_C( -78), INT8_C(-101), INT8_C( -14), INT8_C( 4), INT8_C( -24), INT8_C( -95), INT8_C( 106), INT8_C( 31), INT8_C( 104), INT8_C( 20), INT8_C( 65), INT8_C( -8), INT8_C( -75), INT8_C(-128), INT8_C( -81), INT8_C( 68), INT8_C( -86), INT8_C( 98), INT8_C( -55), INT8_C( 10), INT8_C( 75), INT8_C( 51), INT8_C( -57), INT8_C(-111), INT8_C( 87), INT8_C( 47), INT8_C( -21), INT8_C( 105), INT8_C( 17), INT8_C( 107), INT8_C(-119), INT8_C( -18), INT8_C(-123), INT8_C( 81), INT8_C( 54), INT8_C(-122), INT8_C( -83), INT8_C( 81), INT8_C( 21), INT8_C( 13), INT8_C( 6), INT8_C( -56)), simde_mm512_set_epi8(INT8_C( -81), INT8_C( 98), INT8_C( 23), INT8_C(-108), INT8_C(-126), INT8_C( 95), INT8_C( -44), INT8_C( -56), INT8_C( 42), INT8_C( 32), INT8_C( -91), INT8_C(-126), INT8_C( 119), INT8_C( 88), INT8_C( 110), INT8_C( 93), INT8_C( 75), INT8_C( -49), INT8_C( -63), INT8_C( -42), INT8_C( 54), INT8_C( -71), INT8_C( 87), INT8_C( -1), INT8_C( -25), INT8_C( -60), INT8_C( 102), INT8_C( -98), INT8_C( -95), INT8_C( -34), INT8_C( -46), INT8_C( 94), INT8_C( 118), INT8_C( 127), INT8_C( -62), INT8_C(-128), INT8_C( 81), INT8_C( 68), INT8_C( 86), INT8_C( 98), INT8_C( 110), INT8_C( -9), INT8_C( -29), INT8_C( 51), INT8_C( 57), INT8_C( 111), INT8_C(-126), INT8_C( 47), INT8_C( 21), INT8_C(-123), INT8_C( -16), INT8_C( 72), INT8_C( 119), INT8_C( 18), INT8_C( 76), INT8_C( -48), INT8_C( 54), INT8_C( 100), INT8_C( -58), INT8_C( 30), INT8_C( 21), INT8_C( 68), INT8_C( 6), INT8_C( 8)) }, { simde_mm512_set_epi8(INT8_C(-112), INT8_C( -53), INT8_C(-107), INT8_C( 41), INT8_C( -50), INT8_C( -58), INT8_C( 56), INT8_C( 54), INT8_C(-101), INT8_C(-123), INT8_C( 64), INT8_C( -70), INT8_C( -46), INT8_C( -1), INT8_C( 70), INT8_C( -46), INT8_C( 96), INT8_C( 45), INT8_C( 57), INT8_C( -8), INT8_C( 23), INT8_C( 34), INT8_C( -16), INT8_C( -48), INT8_C( 74), INT8_C( 85), INT8_C(-106), INT8_C( 98), INT8_C( 81), INT8_C(-107), INT8_C( -43), INT8_C( 64), INT8_C(-110), INT8_C( 124), INT8_C(-122), INT8_C(-123), INT8_C( 20), INT8_C( 122), INT8_C( 57), INT8_C( -15), INT8_C( 58), INT8_C( 90), INT8_C(-103), INT8_C( 57), INT8_C( 51), INT8_C(-118), INT8_C( 37), INT8_C( -79), INT8_C( 13), INT8_C( 116), INT8_C( -79), INT8_C( -18), INT8_C( -87), INT8_C( -79), INT8_C( -83), INT8_C( -25), INT8_C( -30), INT8_C( -40), INT8_C( 126), INT8_C( 80), INT8_C( -74), INT8_C( 71), INT8_C( -68), INT8_C( 53)), UINT64_C( 2821348422), simde_mm512_set_epi8(INT8_C(-126), INT8_C( -8), INT8_C( 35), INT8_C( 112), INT8_C( -78), INT8_C( 75), INT8_C( -25), INT8_C( 1), INT8_C( -27), INT8_C( -67), INT8_C( 49), INT8_C( 75), INT8_C( -39), INT8_C( -68), INT8_C( -51), INT8_C( 42), INT8_C( -30), INT8_C( 1), INT8_C( -18), INT8_C( -4), INT8_C( 39), INT8_C( 85), INT8_C( 69), INT8_C( 68), INT8_C(-113), INT8_C( -38), INT8_C( 28), INT8_C( 83), INT8_C( -31), INT8_C( 61), INT8_C( 37), INT8_C( 67), INT8_C( 46), INT8_C( -43), INT8_C( 32), INT8_C( -73), INT8_C( -26), INT8_C( 2), INT8_C( -6), INT8_C( 122), INT8_C( -51), INT8_C( 118), INT8_C( 3), INT8_C( 17), INT8_C( 32), INT8_C( 82), INT8_C( 40), INT8_C( 0), INT8_C( 28), INT8_C( 37), INT8_C( -3), INT8_C( -85), INT8_C( -92), INT8_C( 45), INT8_C( -23), INT8_C( -58), INT8_C(-108), INT8_C( 44), INT8_C( 28), INT8_C( 77), INT8_C( 12), INT8_C( 81), INT8_C(-103), INT8_C( 7)), simde_mm512_set_epi8(INT8_C(-112), INT8_C( -53), INT8_C(-107), INT8_C( 41), INT8_C( -50), INT8_C( -58), INT8_C( 56), INT8_C( 54), INT8_C(-101), INT8_C(-123), INT8_C( 64), INT8_C( -70), INT8_C( -46), INT8_C( -1), INT8_C( 70), INT8_C( -46), INT8_C( 96), INT8_C( 45), INT8_C( 57), INT8_C( -8), INT8_C( 23), INT8_C( 34), INT8_C( -16), INT8_C( -48), INT8_C( 74), INT8_C( 85), INT8_C(-106), INT8_C( 98), INT8_C( 81), INT8_C(-107), INT8_C( -43), INT8_C( 64), INT8_C( 46), INT8_C( 124), INT8_C( 32), INT8_C(-123), INT8_C( 26), INT8_C( 122), INT8_C( 57), INT8_C( -15), INT8_C( 58), INT8_C( 90), INT8_C( 3), INT8_C( 57), INT8_C( 32), INT8_C(-118), INT8_C( 40), INT8_C( -79), INT8_C( 13), INT8_C( 37), INT8_C( -79), INT8_C( 85), INT8_C( 92), INT8_C( 45), INT8_C( -83), INT8_C( -25), INT8_C( -30), INT8_C( 44), INT8_C( 126), INT8_C( 80), INT8_C( -74), INT8_C( 81), INT8_C( 103), INT8_C( 53)) }, { simde_mm512_set_epi8(INT8_C( 115), INT8_C( -13), INT8_C( 104), INT8_C( 83), INT8_C( 80), INT8_C(-118), INT8_C( 34), INT8_C( 48), INT8_C( 50), INT8_C( -65), INT8_C( 88), INT8_C( 76), INT8_C( -17), INT8_C( -86), INT8_C( -68), INT8_C( 75), INT8_C( 121), INT8_C( 9), INT8_C( -63), INT8_C( 106), INT8_C( 93), INT8_C( 44), INT8_C( 0), INT8_C( -33), INT8_C( -53), INT8_C( 101), INT8_C( 76), INT8_C( 37), INT8_C( 94), INT8_C( -32), INT8_C(-104), INT8_C( -20), INT8_C( -48), INT8_C( 45), INT8_C( 88), INT8_C( -93), INT8_C( 104), INT8_C( 42), INT8_C( -99), INT8_C( 59), INT8_C( 90), INT8_C( -69), INT8_C( 107), INT8_C( 16), INT8_C(-118), INT8_C(-119), INT8_C( -60), INT8_C( 51), INT8_C( 126), INT8_C( -78), INT8_C( 114), INT8_C( -75), INT8_C( -75), INT8_C( 19), INT8_C( 113), INT8_C( 84), INT8_C( 47), INT8_C( -83), INT8_C( -26), INT8_C( -38), INT8_C( 64), INT8_C(-106), INT8_C( 107), INT8_C( 56)), UINT64_C( 1977462364), simde_mm512_set_epi8(INT8_C(-106), INT8_C( -34), INT8_C( 105), INT8_C( -49), INT8_C( -33), INT8_C( 121), INT8_C( 0), INT8_C( 127), INT8_C( -65), INT8_C( -90), INT8_C(-123), INT8_C( 112), INT8_C( -57), INT8_C( 77), INT8_C( 42), INT8_C( 34), INT8_C( -12), INT8_C( -47), INT8_C( 117), INT8_C( 40), INT8_C( 42), INT8_C( 16), INT8_C( -26), INT8_C( 122), INT8_C( 122), INT8_C( -37), INT8_C( -98), INT8_C( -20), INT8_C( 86), INT8_C( -87), INT8_C( -90), INT8_C(-112), INT8_C(-115), INT8_C( 79), INT8_C( 123), INT8_C( 33), INT8_C( -55), INT8_C(-125), INT8_C( 102), INT8_C( 59), INT8_C( -57), INT8_C( 19), INT8_C( -4), INT8_C( -55), INT8_C( -86), INT8_C( 88), INT8_C( -47), INT8_C( 29), INT8_C(-116), INT8_C( -58), INT8_C( 115), INT8_C( -63), INT8_C( -15), INT8_C( -54), INT8_C( 84), INT8_C( -1), INT8_C( 5), INT8_C( -33), INT8_C( -96), INT8_C( 93), INT8_C( 97), INT8_C( 124), INT8_C( 26), INT8_C( -34)), simde_mm512_set_epi8(INT8_C( 115), INT8_C( -13), INT8_C( 104), INT8_C( 83), INT8_C( 80), INT8_C(-118), INT8_C( 34), INT8_C( 48), INT8_C( 50), INT8_C( -65), INT8_C( 88), INT8_C( 76), INT8_C( -17), INT8_C( -86), INT8_C( -68), INT8_C( 75), INT8_C( 121), INT8_C( 9), INT8_C( -63), INT8_C( 106), INT8_C( 93), INT8_C( 44), INT8_C( 0), INT8_C( -33), INT8_C( -53), INT8_C( 101), INT8_C( 76), INT8_C( 37), INT8_C( 94), INT8_C( -32), INT8_C(-104), INT8_C( -20), INT8_C( -48), INT8_C( 79), INT8_C( 123), INT8_C( 33), INT8_C( 104), INT8_C( 125), INT8_C( -99), INT8_C( 59), INT8_C( 57), INT8_C( 19), INT8_C( 107), INT8_C( 55), INT8_C( 86), INT8_C( 88), INT8_C( -60), INT8_C( 29), INT8_C( 116), INT8_C( -78), INT8_C( 115), INT8_C( -75), INT8_C( 15), INT8_C( 54), INT8_C( 84), INT8_C( 84), INT8_C( 47), INT8_C( 33), INT8_C( -26), INT8_C( 93), INT8_C( 97), INT8_C( 124), INT8_C( 107), INT8_C( 56)) }, { simde_mm512_set_epi8(INT8_C( 2), INT8_C( -4), INT8_C( 108), INT8_C( 27), INT8_C( -49), INT8_C( 69), INT8_C( -84), INT8_C( 82), INT8_C( 9), INT8_C( 0), INT8_C( 42), INT8_C( 118), INT8_C( -3), INT8_C( -67), INT8_C( 6), INT8_C( 30), INT8_C( -88), INT8_C( -69), INT8_C( 118), INT8_C( 36), INT8_C( 110), INT8_C( 81), INT8_C( -37), INT8_C( 36), INT8_C( -74), INT8_C(-109), INT8_C( 47), INT8_C( 12), INT8_C( -29), INT8_C( -81), INT8_C( 76), INT8_C( -22), INT8_C( 91), INT8_C( 125), INT8_C( 98), INT8_C( 17), INT8_C( 115), INT8_C( 58), INT8_C(-107), INT8_C( 90), INT8_C( 115), INT8_C( -24), INT8_C( 83), INT8_C( 17), INT8_C( -11), INT8_C( 20), INT8_C( 81), INT8_C( 54), INT8_C( -59), INT8_C( 112), INT8_C(-102), INT8_C( 13), INT8_C( 8), INT8_C(-105), INT8_C( -27), INT8_C(-127), INT8_C(-112), INT8_C( 125), INT8_C( 21), INT8_C( 55), INT8_C( 24), INT8_C( 58), INT8_C( 7), INT8_C( 127)), UINT64_C( 751965274), simde_mm512_set_epi8(INT8_C( 90), INT8_C( 75), INT8_C( -70), INT8_C( 89), INT8_C( 25), INT8_C( -86), INT8_C( -40), INT8_C( -9), INT8_C(-119), INT8_C( -19), INT8_C( 110), INT8_C( -26), INT8_C(-126), INT8_C( 124), INT8_C( 6), INT8_C( -11), INT8_C( -92), INT8_C( 66), INT8_C( -68), INT8_C( 20), INT8_C( 35), INT8_C( 35), INT8_C( 58), INT8_C( 98), INT8_C( 84), INT8_C( -34), INT8_C( 36), INT8_C(-124), INT8_C( 32), INT8_C( -74), INT8_C( 73), INT8_C( -74), INT8_C( 77), INT8_C( 116), INT8_C( 50), INT8_C( 82), INT8_C( 68), INT8_C( 72), INT8_C( 23), INT8_C( 32), INT8_C( -54), INT8_C( 82), INT8_C( 53), INT8_C( 71), INT8_C( 22), INT8_C( 92), INT8_C( 42), INT8_C(-123), INT8_C( -41), INT8_C( 34), INT8_C( 75), INT8_C( 63), INT8_C(-117), INT8_C( 23), INT8_C(-115), INT8_C( 66), INT8_C( -90), INT8_C( 99), INT8_C( -73), INT8_C( -19), INT8_C( -43), INT8_C( -64), INT8_C( -21), INT8_C( 20)), simde_mm512_set_epi8(INT8_C( 2), INT8_C( -4), INT8_C( 108), INT8_C( 27), INT8_C( -49), INT8_C( 69), INT8_C( -84), INT8_C( 82), INT8_C( 9), INT8_C( 0), INT8_C( 42), INT8_C( 118), INT8_C( -3), INT8_C( -67), INT8_C( 6), INT8_C( 30), INT8_C( -88), INT8_C( -69), INT8_C( 118), INT8_C( 36), INT8_C( 110), INT8_C( 81), INT8_C( -37), INT8_C( 36), INT8_C( -74), INT8_C(-109), INT8_C( 47), INT8_C( 12), INT8_C( -29), INT8_C( -81), INT8_C( 76), INT8_C( -22), INT8_C( 91), INT8_C( 125), INT8_C( 50), INT8_C( 17), INT8_C( 68), INT8_C( 72), INT8_C(-107), INT8_C( 90), INT8_C( 54), INT8_C( 82), INT8_C( 83), INT8_C( 71), INT8_C( -11), INT8_C( 20), INT8_C( 42), INT8_C( 54), INT8_C( -59), INT8_C( 112), INT8_C(-102), INT8_C( 63), INT8_C( 8), INT8_C( 23), INT8_C( -27), INT8_C(-127), INT8_C(-112), INT8_C( 99), INT8_C( 21), INT8_C( 19), INT8_C( 43), INT8_C( 58), INT8_C( 21), INT8_C( 127)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_abs_epi8(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_abs_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask64 k; simde__m512i a; simde__m512i r; } test_vec[8] = { { UINT64_C( 1713497089), simde_mm512_set_epi8(INT8_C(-105), INT8_C( 80), INT8_C( -16), INT8_C(-124), INT8_C( -48), INT8_C( 76), INT8_C( -91), INT8_C(-128), INT8_C( 54), INT8_C( 63), INT8_C( 114), INT8_C( -73), INT8_C( -26), INT8_C( -48), INT8_C( -24), INT8_C( -13), INT8_C( 5), INT8_C( 123), INT8_C( -45), INT8_C( -57), INT8_C(-107), INT8_C( 47), INT8_C( 90), INT8_C( -54), INT8_C( 1), INT8_C( 118), INT8_C( 37), INT8_C( -7), INT8_C( 83), INT8_C( 31), INT8_C( -23), INT8_C( -20), INT8_C(-104), INT8_C( 114), INT8_C( 63), INT8_C( 25), INT8_C( -80), INT8_C( 17), INT8_C( 37), INT8_C( -44), INT8_C(-112), INT8_C( 41), INT8_C( -18), INT8_C( 86), INT8_C( 114), INT8_C( -23), INT8_C( -86), INT8_C( -99), INT8_C( 114), INT8_C( 25), INT8_C( 94), INT8_C( 34), INT8_C( -48), INT8_C( -4), INT8_C(-123), INT8_C( -44), INT8_C( -68), INT8_C( 19), INT8_C( 47), INT8_C(-122), INT8_C( 117), INT8_C( 69), INT8_C(-121), INT8_C( 66)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 114), INT8_C( 63), INT8_C( 0), INT8_C( 0), INT8_C( 17), INT8_C( 37), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 18), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 99), INT8_C( 114), INT8_C( 25), INT8_C( 94), INT8_C( 0), INT8_C( 0), INT8_C( 4), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 66)) }, { UINT64_C( 549841533), simde_mm512_set_epi8(INT8_C( -84), INT8_C( 24), INT8_C( 17), INT8_C( -28), INT8_C( -3), INT8_C( 88), INT8_C( 98), INT8_C( -52), INT8_C( -76), INT8_C( -19), INT8_C( 100), INT8_C( 59), INT8_C( -64), INT8_C( -60), INT8_C( -53), INT8_C( 16), INT8_C( 0), INT8_C( -89), INT8_C( 13), INT8_C( 17), INT8_C( 116), INT8_C( 41), INT8_C( 54), INT8_C( -8), INT8_C(-112), INT8_C( 109), INT8_C( 94), INT8_C( 19), INT8_C( 46), INT8_C( -55), INT8_C( 103), INT8_C( 7), INT8_C( -15), INT8_C( -12), INT8_C( -22), INT8_C( 127), INT8_C( -48), INT8_C( -83), INT8_C( -9), INT8_C( -85), INT8_C( -79), INT8_C( -12), INT8_C( 76), INT8_C( -65), INT8_C( -90), INT8_C( 19), INT8_C( 33), INT8_C( -50), INT8_C( 89), INT8_C( -40), INT8_C(-117), INT8_C( 111), INT8_C( 48), INT8_C( 119), INT8_C( -55), INT8_C( 66), INT8_C( 113), INT8_C( -2), INT8_C( -49), INT8_C(-110), INT8_C( -55), INT8_C( 44), INT8_C( 125), INT8_C( -61)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 22), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 79), INT8_C( 12), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 19), INT8_C( 0), INT8_C( 50), INT8_C( 89), INT8_C( 40), INT8_C( 117), INT8_C( 0), INT8_C( 48), INT8_C( 0), INT8_C( 55), INT8_C( 0), INT8_C( 0), INT8_C( 2), INT8_C( 49), INT8_C( 110), INT8_C( 55), INT8_C( 44), INT8_C( 0), INT8_C( 61)) }, { UINT64_C( 2304862624), simde_mm512_set_epi8(INT8_C( 71), INT8_C( -17), INT8_C( 0), INT8_C( -82), INT8_C( -27), INT8_C( 124), INT8_C( 45), INT8_C( 57), INT8_C( 107), INT8_C( -93), INT8_C( -77), INT8_C( 53), INT8_C( 126), INT8_C( 10), INT8_C( 123), INT8_C(-113), INT8_C( -41), INT8_C(-108), INT8_C( -59), INT8_C( -36), INT8_C( -24), INT8_C( -51), INT8_C( -68), INT8_C( -38), INT8_C( 19), INT8_C( 120), INT8_C(-118), INT8_C( 63), INT8_C( 24), INT8_C( 72), INT8_C( 39), INT8_C( 31), INT8_C( -92), INT8_C( 52), INT8_C( 81), INT8_C( 39), INT8_C( -70), INT8_C( 73), INT8_C( 76), INT8_C( 114), INT8_C( -7), INT8_C( 4), INT8_C( -55), INT8_C( -68), INT8_C( 120), INT8_C( 98), INT8_C(-115), INT8_C( -56), INT8_C( 93), INT8_C( -2), INT8_C( 78), INT8_C( 16), INT8_C( 88), INT8_C( 71), INT8_C(-112), INT8_C(-118), INT8_C( 4), INT8_C( -88), INT8_C( 76), INT8_C( 88), INT8_C( -97), INT8_C( 107), INT8_C( -28), INT8_C( -59)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 92), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 70), INT8_C( 0), INT8_C( 0), INT8_C( 114), INT8_C( 0), INT8_C( 4), INT8_C( 55), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 56), INT8_C( 0), INT8_C( 2), INT8_C( 78), INT8_C( 0), INT8_C( 88), INT8_C( 0), INT8_C( 0), INT8_C( 118), INT8_C( 4), INT8_C( 0), INT8_C( 76), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { UINT64_C( 2156618221), simde_mm512_set_epi8(INT8_C( -88), INT8_C( -28), INT8_C( -75), INT8_C( 34), INT8_C( -30), INT8_C( -1), INT8_C( 52), INT8_C( -92), INT8_C( -85), INT8_C( 43), INT8_C( 9), INT8_C( 24), INT8_C( -64), INT8_C( 107), INT8_C( -57), INT8_C( 38), INT8_C( 95), INT8_C( -18), INT8_C( 11), INT8_C( 96), INT8_C( -4), INT8_C( -94), INT8_C( 116), INT8_C( -31), INT8_C( 52), INT8_C( -2), INT8_C( 98), INT8_C( 10), INT8_C( 5), INT8_C( 19), INT8_C( -65), INT8_C( 10), INT8_C(-109), INT8_C( 52), INT8_C( -85), INT8_C( -32), INT8_C( 38), INT8_C( 92), INT8_C( 6), INT8_C( -71), INT8_C( -79), INT8_C( 79), INT8_C( -94), INT8_C( 113), INT8_C(-117), INT8_C( 20), INT8_C( -82), INT8_C( 82), INT8_C(-120), INT8_C( 114), INT8_C( -52), INT8_C( -68), INT8_C( -20), INT8_C( -47), INT8_C( -90), INT8_C( -87), INT8_C( 79), INT8_C( -37), INT8_C( 63), INT8_C( -89), INT8_C( -40), INT8_C( -67), INT8_C( -69), INT8_C(-117)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 109), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 79), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 117), INT8_C( 0), INT8_C( 82), INT8_C( 82), INT8_C( 0), INT8_C( 114), INT8_C( 52), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 87), INT8_C( 79), INT8_C( 37), INT8_C( 63), INT8_C( 0), INT8_C( 40), INT8_C( 67), INT8_C( 0), INT8_C( 117)) }, { UINT64_C( 2985927056), simde_mm512_set_epi8(INT8_C(-128), INT8_C( 11), INT8_C( -31), INT8_C( 116), INT8_C( -77), INT8_C( 97), INT8_C( 87), INT8_C( 53), INT8_C( -33), INT8_C( 37), INT8_C( 28), INT8_C( 24), INT8_C(-103), INT8_C( 99), INT8_C( -75), INT8_C( 41), INT8_C( 83), INT8_C( 39), INT8_C( 120), INT8_C( 115), INT8_C( -51), INT8_C( -28), INT8_C( 102), INT8_C( -98), INT8_C( -77), INT8_C( 121), INT8_C( 42), INT8_C( 114), INT8_C( -1), INT8_C( 112), INT8_C( 17), INT8_C( -31), INT8_C( 108), INT8_C( -27), INT8_C( 66), INT8_C( 23), INT8_C( 69), INT8_C( -90), INT8_C( -46), INT8_C( -91), INT8_C( -81), INT8_C( -87), INT8_C( 1), INT8_C( -11), INT8_C( 84), INT8_C(-117), INT8_C( 79), INT8_C(-110), INT8_C( -44), INT8_C( -30), INT8_C( 33), INT8_C( 53), INT8_C( 64), INT8_C( -16), INT8_C(-111), INT8_C( -41), INT8_C(-102), INT8_C( 13), INT8_C( 97), INT8_C( -55), INT8_C( 19), INT8_C( -16), INT8_C( -68), INT8_C( -83)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 108), INT8_C( 0), INT8_C( 66), INT8_C( 23), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 91), INT8_C( 81), INT8_C( 87), INT8_C( 1), INT8_C( 11), INT8_C( 84), INT8_C( 0), INT8_C( 0), INT8_C( 110), INT8_C( 44), INT8_C( 0), INT8_C( 33), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 41), INT8_C( 102), INT8_C( 0), INT8_C( 0), INT8_C( 55), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { UINT64_C( 390836854), simde_mm512_set_epi8(INT8_C( -6), INT8_C( 127), INT8_C(-110), INT8_C( -8), INT8_C( 106), INT8_C( 95), INT8_C(-126), INT8_C(-127), INT8_C(-103), INT8_C( -21), INT8_C( -20), INT8_C( -71), INT8_C( 106), INT8_C( 23), INT8_C( -51), INT8_C( -47), INT8_C(-107), INT8_C( 61), INT8_C( -93), INT8_C( 10), INT8_C( 4), INT8_C( 110), INT8_C( -43), INT8_C( 40), INT8_C( 60), INT8_C( -40), INT8_C( 36), INT8_C( -39), INT8_C( -80), INT8_C(-110), INT8_C( 14), INT8_C( -61), INT8_C( -39), INT8_C( -70), INT8_C(-116), INT8_C( -99), INT8_C( -82), INT8_C(-113), INT8_C(-120), INT8_C(-116), INT8_C( -58), INT8_C( 18), INT8_C( 72), INT8_C( 23), INT8_C(-117), INT8_C(-105), INT8_C( 83), INT8_C( 3), INT8_C(-104), INT8_C( 34), INT8_C( 72), INT8_C( -33), INT8_C( 84), INT8_C( -90), INT8_C(-116), INT8_C( -46), INT8_C( -18), INT8_C( 96), INT8_C( -46), INT8_C(-109), INT8_C(-103), INT8_C( -18), INT8_C( -39), INT8_C( 67)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 99), INT8_C( 0), INT8_C( 113), INT8_C( 120), INT8_C( 116), INT8_C( 0), INT8_C( 18), INT8_C( 0), INT8_C( 0), INT8_C( 117), INT8_C( 0), INT8_C( 83), INT8_C( 3), INT8_C( 104), INT8_C( 0), INT8_C( 72), INT8_C( 33), INT8_C( 0), INT8_C( 0), INT8_C( 116), INT8_C( 0), INT8_C( 0), INT8_C( 96), INT8_C( 46), INT8_C( 109), INT8_C( 0), INT8_C( 18), INT8_C( 39), INT8_C( 0)) }, { UINT64_C( 189869641), simde_mm512_set_epi8(INT8_C( 28), INT8_C(-101), INT8_C(-104), INT8_C(-117), INT8_C( 24), INT8_C( -55), INT8_C( 82), INT8_C(-100), INT8_C( -42), INT8_C( 62), INT8_C(-113), INT8_C( 110), INT8_C( -92), INT8_C( 127), INT8_C( -92), INT8_C( 20), INT8_C( -35), INT8_C( 35), INT8_C( 30), INT8_C( -86), INT8_C( 120), INT8_C( 91), INT8_C( -69), INT8_C( -49), INT8_C( 19), INT8_C( -87), INT8_C( 42), INT8_C(-110), INT8_C( 68), INT8_C( 97), INT8_C(-125), INT8_C( 75), INT8_C( 30), INT8_C( -54), INT8_C( -38), INT8_C( -20), INT8_C( -96), INT8_C( 84), INT8_C( 108), INT8_C( 24), INT8_C( -54), INT8_C( -26), INT8_C(-125), INT8_C( -53), INT8_C( 48), INT8_C( -78), INT8_C( -96), INT8_C( 82), INT8_C( -16), INT8_C( -68), INT8_C( -65), INT8_C( 28), INT8_C( -82), INT8_C(-116), INT8_C( 119), INT8_C(-113), INT8_C( 102), INT8_C( 90), INT8_C( 86), INT8_C( -14), INT8_C( -49), INT8_C( 71), INT8_C( 2), INT8_C( 28)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 96), INT8_C( 0), INT8_C( 108), INT8_C( 24), INT8_C( 0), INT8_C( 26), INT8_C( 0), INT8_C( 53), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 82), INT8_C( 0), INT8_C( 0), INT8_C( 65), INT8_C( 0), INT8_C( 82), INT8_C( 116), INT8_C( 119), INT8_C( 0), INT8_C( 0), INT8_C( 90), INT8_C( 0), INT8_C( 0), INT8_C( 49), INT8_C( 0), INT8_C( 0), INT8_C( 28)) }, { UINT64_C( 2755545546), simde_mm512_set_epi8(INT8_C( -71), INT8_C( 48), INT8_C( -1), INT8_C( -17), INT8_C( -90), INT8_C( 3), INT8_C( -34), INT8_C( 36), INT8_C( -17), INT8_C( -38), INT8_C( 100), INT8_C( -30), INT8_C( 118), INT8_C( 42), INT8_C( -25), INT8_C( -45), INT8_C( 4), INT8_C( 8), INT8_C( 53), INT8_C( 84), INT8_C(-120), INT8_C( 61), INT8_C( 90), INT8_C( -19), INT8_C( 31), INT8_C(-108), INT8_C( -76), INT8_C( 95), INT8_C( 101), INT8_C( -99), INT8_C( -14), INT8_C( 26), INT8_C( -35), INT8_C( -61), INT8_C( 15), INT8_C( 71), INT8_C( 113), INT8_C( 109), INT8_C( 91), INT8_C(-117), INT8_C( 0), INT8_C( 121), INT8_C( 48), INT8_C( 109), INT8_C( 55), INT8_C( 125), INT8_C(-112), INT8_C( 80), INT8_C( 48), INT8_C( 40), INT8_C( 32), INT8_C( -98), INT8_C( 64), INT8_C( -31), INT8_C( -10), INT8_C( -6), INT8_C( -40), INT8_C( 37), INT8_C( 76), INT8_C( -51), INT8_C( 27), INT8_C( -2), INT8_C(-101), INT8_C( -10)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 35), INT8_C( 0), INT8_C( 15), INT8_C( 0), INT8_C( 0), INT8_C( 109), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 48), INT8_C( 109), INT8_C( 55), INT8_C( 125), INT8_C( 112), INT8_C( 0), INT8_C( 0), INT8_C( 40), INT8_C( 0), INT8_C( 0), INT8_C( 64), INT8_C( 0), INT8_C( 0), INT8_C( 6), INT8_C( 40), INT8_C( 37), INT8_C( 0), INT8_C( 0), INT8_C( 27), INT8_C( 0), INT8_C( 101), INT8_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_abs_epi8(test_vec[i].k, test_vec[i].a); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_abs_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi16(INT16_C(-24810), INT16_C( -1893), INT16_C( -4484), INT16_C(-18467), INT16_C(-27267), INT16_C(-12302), INT16_C(-13826), INT16_C( 938), INT16_C(-17680), INT16_C( -610), INT16_C( -4882), INT16_C(-14649), INT16_C( 30481), INT16_C(-20144), INT16_C( 20995), INT16_C( -4451), INT16_C( 6425), INT16_C( 21336), INT16_C( 30080), INT16_C( 4310), INT16_C( 29439), INT16_C(-28107), INT16_C( 32707), INT16_C(-17334), INT16_C( 26460), INT16_C(-29244), INT16_C( -5806), INT16_C(-31467), INT16_C( -9353), INT16_C( -9759), INT16_C( 6587), INT16_C(-14562)), simde_mm512_set_epi16(INT16_C( 24810), INT16_C( 1893), INT16_C( 4484), INT16_C( 18467), INT16_C( 27267), INT16_C( 12302), INT16_C( 13826), INT16_C( 938), INT16_C( 17680), INT16_C( 610), INT16_C( 4882), INT16_C( 14649), INT16_C( 30481), INT16_C( 20144), INT16_C( 20995), INT16_C( 4451), INT16_C( 6425), INT16_C( 21336), INT16_C( 30080), INT16_C( 4310), INT16_C( 29439), INT16_C( 28107), INT16_C( 32707), INT16_C( 17334), INT16_C( 26460), INT16_C( 29244), INT16_C( 5806), INT16_C( 31467), INT16_C( 9353), INT16_C( 9759), INT16_C( 6587), INT16_C( 14562)) }, { simde_mm512_set_epi16(INT16_C( 31294), INT16_C(-10840), INT16_C( 23692), INT16_C( -1500), INT16_C( 2562), INT16_C(-16788), INT16_C( -9616), INT16_C( 31575), INT16_C( 25554), INT16_C(-13527), INT16_C(-26722), INT16_C( 4852), INT16_C(-20866), INT16_C(-19475), INT16_C( 4624), INT16_C( 9206), INT16_C(-14800), INT16_C(-30682), INT16_C( 15889), INT16_C( 8584), INT16_C( 27161), INT16_C(-23281), INT16_C( 25970), INT16_C(-11578), INT16_C( 28932), INT16_C( 12842), INT16_C( -29), INT16_C( -6679), INT16_C(-17572), INT16_C(-17063), INT16_C( -2346), INT16_C( 20336)), simde_mm512_set_epi16(INT16_C( 31294), INT16_C( 10840), INT16_C( 23692), INT16_C( 1500), INT16_C( 2562), INT16_C( 16788), INT16_C( 9616), INT16_C( 31575), INT16_C( 25554), INT16_C( 13527), INT16_C( 26722), INT16_C( 4852), INT16_C( 20866), INT16_C( 19475), INT16_C( 4624), INT16_C( 9206), INT16_C( 14800), INT16_C( 30682), INT16_C( 15889), INT16_C( 8584), INT16_C( 27161), INT16_C( 23281), INT16_C( 25970), INT16_C( 11578), INT16_C( 28932), INT16_C( 12842), INT16_C( 29), INT16_C( 6679), INT16_C( 17572), INT16_C( 17063), INT16_C( 2346), INT16_C( 20336)) }, { simde_mm512_set_epi16(INT16_C(-29319), INT16_C( -6944), INT16_C( 10081), INT16_C( 26836), INT16_C( 30965), INT16_C(-18751), INT16_C( -5923), INT16_C(-27401), INT16_C( 7842), INT16_C( 24713), INT16_C( -3422), INT16_C( 8849), INT16_C( 22266), INT16_C(-29640), INT16_C( -264), INT16_C(-16823), INT16_C(-28396), INT16_C( 29200), INT16_C( 18193), INT16_C( -3173), INT16_C( 8244), INT16_C( -1296), INT16_C( 20026), INT16_C( 3755), INT16_C(-14728), INT16_C( 26243), INT16_C( 18823), INT16_C(-30029), INT16_C( 21566), INT16_C( 25734), INT16_C( -4271), INT16_C( 27065)), simde_mm512_set_epi16(INT16_C( 29319), INT16_C( 6944), INT16_C( 10081), INT16_C( 26836), INT16_C( 30965), INT16_C( 18751), INT16_C( 5923), INT16_C( 27401), INT16_C( 7842), INT16_C( 24713), INT16_C( 3422), INT16_C( 8849), INT16_C( 22266), INT16_C( 29640), INT16_C( 264), INT16_C( 16823), INT16_C( 28396), INT16_C( 29200), INT16_C( 18193), INT16_C( 3173), INT16_C( 8244), INT16_C( 1296), INT16_C( 20026), INT16_C( 3755), INT16_C( 14728), INT16_C( 26243), INT16_C( 18823), INT16_C( 30029), INT16_C( 21566), INT16_C( 25734), INT16_C( 4271), INT16_C( 27065)) }, { simde_mm512_set_epi16(INT16_C( 26713), INT16_C( 6075), INT16_C(-20498), INT16_C(-29395), INT16_C( 28513), INT16_C(-24372), INT16_C( 30119), INT16_C( 21303), INT16_C(-20009), INT16_C( 16878), INT16_C( -3364), INT16_C( -1142), INT16_C( 26178), INT16_C( 1599), INT16_C( 583), INT16_C(-20121), INT16_C( 25419), INT16_C( 4739), INT16_C( 22881), INT16_C( -2884), INT16_C( -7360), INT16_C( 23146), INT16_C(-16850), INT16_C(-17018), INT16_C( 9049), INT16_C(-31439), INT16_C( 20369), INT16_C( 26125), INT16_C( 4615), INT16_C( 3018), INT16_C( 20462), INT16_C( 20538)), simde_mm512_set_epi16(INT16_C( 26713), INT16_C( 6075), INT16_C( 20498), INT16_C( 29395), INT16_C( 28513), INT16_C( 24372), INT16_C( 30119), INT16_C( 21303), INT16_C( 20009), INT16_C( 16878), INT16_C( 3364), INT16_C( 1142), INT16_C( 26178), INT16_C( 1599), INT16_C( 583), INT16_C( 20121), INT16_C( 25419), INT16_C( 4739), INT16_C( 22881), INT16_C( 2884), INT16_C( 7360), INT16_C( 23146), INT16_C( 16850), INT16_C( 17018), INT16_C( 9049), INT16_C( 31439), INT16_C( 20369), INT16_C( 26125), INT16_C( 4615), INT16_C( 3018), INT16_C( 20462), INT16_C( 20538)) }, { simde_mm512_set_epi16(INT16_C(-17426), INT16_C( -6113), INT16_C(-30180), INT16_C( 28425), INT16_C(-15870), INT16_C( 6201), INT16_C( 15445), INT16_C(-31740), INT16_C(-11778), INT16_C(-10748), INT16_C(-28415), INT16_C( -1743), INT16_C( 22411), INT16_C( 18108), INT16_C( 23625), INT16_C( 27654), INT16_C( 27868), INT16_C( 15645), INT16_C( 22336), INT16_C(-29935), INT16_C( -3026), INT16_C(-19158), INT16_C( 20698), INT16_C( 21892), INT16_C(-32012), INT16_C( 10508), INT16_C(-14383), INT16_C( 20676), INT16_C( 6233), INT16_C(-11386), INT16_C(-13291), INT16_C( 13948)), simde_mm512_set_epi16(INT16_C( 17426), INT16_C( 6113), INT16_C( 30180), INT16_C( 28425), INT16_C( 15870), INT16_C( 6201), INT16_C( 15445), INT16_C( 31740), INT16_C( 11778), INT16_C( 10748), INT16_C( 28415), INT16_C( 1743), INT16_C( 22411), INT16_C( 18108), INT16_C( 23625), INT16_C( 27654), INT16_C( 27868), INT16_C( 15645), INT16_C( 22336), INT16_C( 29935), INT16_C( 3026), INT16_C( 19158), INT16_C( 20698), INT16_C( 21892), INT16_C( 32012), INT16_C( 10508), INT16_C( 14383), INT16_C( 20676), INT16_C( 6233), INT16_C( 11386), INT16_C( 13291), INT16_C( 13948)) }, { simde_mm512_set_epi16(INT16_C( 6099), INT16_C(-22144), INT16_C( 20288), INT16_C(-18323), INT16_C( -136), INT16_C( -4474), INT16_C(-14336), INT16_C( 25660), INT16_C(-19775), INT16_C( 6691), INT16_C(-16568), INT16_C( 9907), INT16_C(-31382), INT16_C( 1875), INT16_C( 22377), INT16_C(-21951), INT16_C(-10385), INT16_C(-18760), INT16_C( 7844), INT16_C( 16059), INT16_C(-14216), INT16_C( 22036), INT16_C(-20920), INT16_C( 11586), INT16_C(-18048), INT16_C( -8950), INT16_C(-23337), INT16_C( 26279), INT16_C( 12076), INT16_C( 3090), INT16_C( -7311), INT16_C( -5254)), simde_mm512_set_epi16(INT16_C( 6099), INT16_C( 22144), INT16_C( 20288), INT16_C( 18323), INT16_C( 136), INT16_C( 4474), INT16_C( 14336), INT16_C( 25660), INT16_C( 19775), INT16_C( 6691), INT16_C( 16568), INT16_C( 9907), INT16_C( 31382), INT16_C( 1875), INT16_C( 22377), INT16_C( 21951), INT16_C( 10385), INT16_C( 18760), INT16_C( 7844), INT16_C( 16059), INT16_C( 14216), INT16_C( 22036), INT16_C( 20920), INT16_C( 11586), INT16_C( 18048), INT16_C( 8950), INT16_C( 23337), INT16_C( 26279), INT16_C( 12076), INT16_C( 3090), INT16_C( 7311), INT16_C( 5254)) }, { simde_mm512_set_epi16(INT16_C( 1734), INT16_C(-24733), INT16_C( 6252), INT16_C(-10636), INT16_C(-13019), INT16_C( 4439), INT16_C( 30486), INT16_C( 9898), INT16_C( 18157), INT16_C( 29700), INT16_C(-19524), INT16_C( 5081), INT16_C( -888), INT16_C( 21733), INT16_C(-17288), INT16_C(-29729), INT16_C( 877), INT16_C( 22002), INT16_C( 31006), INT16_C( 27903), INT16_C( 29379), INT16_C( 11869), INT16_C( 12487), INT16_C(-24676), INT16_C( 21504), INT16_C(-22063), INT16_C( 21762), INT16_C( 32035), INT16_C( -2823), INT16_C( 772), INT16_C( 22127), INT16_C(-16867)), simde_mm512_set_epi16(INT16_C( 1734), INT16_C( 24733), INT16_C( 6252), INT16_C( 10636), INT16_C( 13019), INT16_C( 4439), INT16_C( 30486), INT16_C( 9898), INT16_C( 18157), INT16_C( 29700), INT16_C( 19524), INT16_C( 5081), INT16_C( 888), INT16_C( 21733), INT16_C( 17288), INT16_C( 29729), INT16_C( 877), INT16_C( 22002), INT16_C( 31006), INT16_C( 27903), INT16_C( 29379), INT16_C( 11869), INT16_C( 12487), INT16_C( 24676), INT16_C( 21504), INT16_C( 22063), INT16_C( 21762), INT16_C( 32035), INT16_C( 2823), INT16_C( 772), INT16_C( 22127), INT16_C( 16867)) }, { simde_mm512_set_epi16(INT16_C( 12349), INT16_C( 32588), INT16_C(-21894), INT16_C(-24438), INT16_C( -9480), INT16_C( 14484), INT16_C( 264), INT16_C( 5772), INT16_C(-13220), INT16_C( 17520), INT16_C(-23892), INT16_C( 25295), INT16_C(-10903), INT16_C( 18210), INT16_C(-32005), INT16_C( 1475), INT16_C(-31775), INT16_C(-32553), INT16_C( 21009), INT16_C(-11897), INT16_C( 118), INT16_C( -4448), INT16_C( 11548), INT16_C( 27032), INT16_C( -3816), INT16_C( 24167), INT16_C(-13680), INT16_C( 3963), INT16_C( -6791), INT16_C(-30064), INT16_C(-17823), INT16_C( 15062)), simde_mm512_set_epi16(INT16_C( 12349), INT16_C( 32588), INT16_C( 21894), INT16_C( 24438), INT16_C( 9480), INT16_C( 14484), INT16_C( 264), INT16_C( 5772), INT16_C( 13220), INT16_C( 17520), INT16_C( 23892), INT16_C( 25295), INT16_C( 10903), INT16_C( 18210), INT16_C( 32005), INT16_C( 1475), INT16_C( 31775), INT16_C( 32553), INT16_C( 21009), INT16_C( 11897), INT16_C( 118), INT16_C( 4448), INT16_C( 11548), INT16_C( 27032), INT16_C( 3816), INT16_C( 24167), INT16_C( 13680), INT16_C( 3963), INT16_C( 6791), INT16_C( 30064), INT16_C( 17823), INT16_C( 15062)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_abs_epi16(test_vec[i].a); simde_assert_m512i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_abs_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t src[32]; const simde__mmask32 k; const int16_t a[32]; const int16_t r[32]; } test_vec[] = { { { -INT16_C( 5316), -INT16_C( 21434), INT16_C( 23082), -INT16_C( 11070), -INT16_C( 21411), -INT16_C( 1395), INT16_C( 31798), -INT16_C( 18159), -INT16_C( 19036), -INT16_C( 29365), INT16_C( 11378), INT16_C( 16016), -INT16_C( 11452), -INT16_C( 20127), INT16_C( 12785), INT16_C( 11729), INT16_C( 5916), INT16_C( 18393), -INT16_C( 25742), -INT16_C( 12517), -INT16_C( 22456), INT16_C( 32458), -INT16_C( 9436), -INT16_C( 14025), -INT16_C( 32112), INT16_C( 598), -INT16_C( 6225), -INT16_C( 3264), -INT16_C( 24134), -INT16_C( 21596), INT16_C( 30418), -INT16_C( 4136) }, UINT32_C(4281774477), { INT16_C( 20812), -INT16_C( 27441), -INT16_C( 26119), INT16_C( 7442), INT16_C( 18804), INT16_C( 1254), INT16_C( 15820), INT16_C( 31494), INT16_C( 17956), -INT16_C( 8594), INT16_C( 4839), -INT16_C( 18039), INT16_C( 24968), INT16_C( 5800), -INT16_C( 8686), INT16_C( 24085), -INT16_C( 7121), INT16_C( 10483), INT16_C( 1405), -INT16_C( 3770), INT16_C( 11343), INT16_C( 7157), -INT16_C( 1175), -INT16_C( 29290), INT16_C( 1089), INT16_C( 10347), -INT16_C( 3050), -INT16_C( 24606), -INT16_C( 30123), INT16_C( 26549), -INT16_C( 13719), -INT16_C( 26426) }, { INT16_C( 20812), -INT16_C( 21434), INT16_C( 26119), INT16_C( 7442), -INT16_C( 21411), -INT16_C( 1395), INT16_C( 31798), INT16_C( 31494), INT16_C( 17956), -INT16_C( 29365), INT16_C( 11378), INT16_C( 16016), INT16_C( 24968), INT16_C( 5800), INT16_C( 12785), INT16_C( 24085), INT16_C( 5916), INT16_C( 10483), INT16_C( 1405), -INT16_C( 12517), INT16_C( 11343), INT16_C( 7157), -INT16_C( 9436), -INT16_C( 14025), INT16_C( 1089), INT16_C( 10347), INT16_C( 3050), INT16_C( 24606), INT16_C( 30123), INT16_C( 26549), INT16_C( 13719), INT16_C( 26426) } }, { { -INT16_C( 18001), INT16_C( 11457), INT16_C( 1982), INT16_C( 3358), INT16_C( 4915), -INT16_C( 25304), -INT16_C( 16881), INT16_C( 20522), -INT16_C( 26942), -INT16_C( 9863), INT16_C( 23434), -INT16_C( 8072), INT16_C( 11749), INT16_C( 20039), INT16_C( 3575), -INT16_C( 22809), -INT16_C( 22330), -INT16_C( 31277), -INT16_C( 3665), -INT16_C( 7534), -INT16_C( 17660), INT16_C( 4991), -INT16_C( 21895), INT16_C( 15460), -INT16_C( 8896), -INT16_C( 13803), -INT16_C( 29384), INT16_C( 7594), -INT16_C( 3398), -INT16_C( 20116), INT16_C( 21503), -INT16_C( 14760) }, UINT32_C(2857053179), { -INT16_C( 8932), INT16_C( 8332), INT16_C( 3224), INT16_C( 4660), -INT16_C( 26442), -INT16_C( 2482), INT16_C( 25461), -INT16_C( 21056), INT16_C( 27632), -INT16_C( 21814), INT16_C( 13917), INT16_C( 23643), -INT16_C( 19575), -INT16_C( 31710), INT16_C( 28126), -INT16_C( 1490), -INT16_C( 17589), -INT16_C( 7397), INT16_C( 20423), INT16_C( 32245), INT16_C( 17383), INT16_C( 23667), INT16_C( 13222), -INT16_C( 27127), -INT16_C( 11362), -INT16_C( 1216), -INT16_C( 25590), -INT16_C( 27816), INT16_C( 31311), INT16_C( 11800), INT16_C( 18152), INT16_C( 13096) }, { INT16_C( 8932), INT16_C( 8332), INT16_C( 1982), INT16_C( 4660), INT16_C( 26442), INT16_C( 2482), INT16_C( 25461), INT16_C( 21056), INT16_C( 27632), INT16_C( 21814), INT16_C( 23434), INT16_C( 23643), INT16_C( 11749), INT16_C( 31710), INT16_C( 3575), -INT16_C( 22809), INT16_C( 17589), INT16_C( 7397), -INT16_C( 3665), INT16_C( 32245), -INT16_C( 17660), INT16_C( 4991), INT16_C( 13222), INT16_C( 15460), -INT16_C( 8896), INT16_C( 1216), -INT16_C( 29384), INT16_C( 27816), -INT16_C( 3398), INT16_C( 11800), INT16_C( 21503), INT16_C( 13096) } }, { { INT16_C( 17153), -INT16_C( 14314), INT16_C( 3218), INT16_C( 31045), -INT16_C( 18353), -INT16_C( 2347), -INT16_C( 8468), -INT16_C( 30068), -INT16_C( 12878), -INT16_C( 17274), -INT16_C( 8599), -INT16_C( 18353), INT16_C( 26456), INT16_C( 16614), INT16_C( 4014), -INT16_C( 20621), -INT16_C( 30126), -INT16_C( 6792), -INT16_C( 17002), -INT16_C( 6818), INT16_C( 13430), INT16_C( 25307), INT16_C( 26642), -INT16_C( 15124), INT16_C( 29237), -INT16_C( 24960), -INT16_C( 12208), -INT16_C( 22186), INT16_C( 15671), -INT16_C( 6679), INT16_C( 23884), -INT16_C( 24939) }, UINT32_C(2105740775), { -INT16_C( 7478), INT16_C( 16482), INT16_C( 15894), INT16_C( 10402), -INT16_C( 28762), -INT16_C( 9235), INT16_C( 27905), INT16_C( 21113), -INT16_C( 12483), INT16_C( 30203), -INT16_C( 7156), INT16_C( 22618), -INT16_C( 4287), INT16_C( 10487), INT16_C( 31484), -INT16_C( 14427), INT16_C( 2140), INT16_C( 29191), -INT16_C( 21946), -INT16_C( 4965), -INT16_C( 30663), INT16_C( 15047), INT16_C( 16629), INT16_C( 13196), -INT16_C( 30961), INT16_C( 7336), INT16_C( 620), -INT16_C( 21132), INT16_C( 27634), -INT16_C( 4394), INT16_C( 31718), INT16_C( 17077) }, { INT16_C( 7478), INT16_C( 16482), INT16_C( 15894), INT16_C( 31045), -INT16_C( 18353), INT16_C( 9235), INT16_C( 27905), INT16_C( 21113), INT16_C( 12483), -INT16_C( 17274), INT16_C( 7156), INT16_C( 22618), INT16_C( 26456), INT16_C( 16614), INT16_C( 4014), -INT16_C( 20621), INT16_C( 2140), INT16_C( 29191), -INT16_C( 17002), -INT16_C( 6818), INT16_C( 13430), INT16_C( 25307), INT16_C( 26642), INT16_C( 13196), INT16_C( 30961), -INT16_C( 24960), INT16_C( 620), INT16_C( 21132), INT16_C( 27634), INT16_C( 4394), INT16_C( 31718), -INT16_C( 24939) } }, { { -INT16_C( 17021), -INT16_C( 13899), INT16_C( 20583), -INT16_C( 24395), INT16_C( 31960), -INT16_C( 12838), INT16_C( 26556), -INT16_C( 13312), -INT16_C( 22290), INT16_C( 23272), INT16_C( 23723), -INT16_C( 25336), -INT16_C( 8504), -INT16_C( 20853), INT16_C( 16729), -INT16_C( 8720), -INT16_C( 23042), INT16_C( 26022), INT16_C( 23797), -INT16_C( 13051), -INT16_C( 8232), -INT16_C( 27237), -INT16_C( 25786), INT16_C( 13665), INT16_C( 18756), -INT16_C( 4209), -INT16_C( 26715), INT16_C( 28044), INT16_C( 6005), -INT16_C( 12517), INT16_C( 3160), INT16_C( 22188) }, UINT32_C(2814071473), { -INT16_C( 16210), -INT16_C( 30860), INT16_C( 4000), -INT16_C( 6628), INT16_C( 32171), -INT16_C( 4325), -INT16_C( 21562), INT16_C( 27614), INT16_C( 27202), -INT16_C( 18215), -INT16_C( 2943), -INT16_C( 9593), INT16_C( 13056), -INT16_C( 19920), -INT16_C( 4987), INT16_C( 13401), -INT16_C( 12884), INT16_C( 19643), -INT16_C( 10275), -INT16_C( 30669), INT16_C( 20052), INT16_C( 6775), INT16_C( 22009), INT16_C( 15493), INT16_C( 24255), INT16_C( 16628), INT16_C( 31571), INT16_C( 21274), INT16_C( 19374), INT16_C( 13061), INT16_C( 24119), -INT16_C( 7321) }, { INT16_C( 16210), -INT16_C( 13899), INT16_C( 20583), -INT16_C( 24395), INT16_C( 32171), INT16_C( 4325), INT16_C( 26556), INT16_C( 27614), -INT16_C( 22290), INT16_C( 18215), INT16_C( 23723), -INT16_C( 25336), INT16_C( 13056), -INT16_C( 20853), INT16_C( 4987), -INT16_C( 8720), INT16_C( 12884), INT16_C( 19643), INT16_C( 23797), INT16_C( 30669), INT16_C( 20052), INT16_C( 6775), -INT16_C( 25786), INT16_C( 15493), INT16_C( 24255), INT16_C( 16628), INT16_C( 31571), INT16_C( 28044), INT16_C( 6005), INT16_C( 13061), INT16_C( 3160), INT16_C( 7321) } }, { { INT16_C( 8748), INT16_C( 2352), INT16_C( 25593), INT16_C( 19857), INT16_C( 2225), -INT16_C( 21657), -INT16_C( 4771), INT16_C( 7399), -INT16_C( 9397), -INT16_C( 24996), INT16_C( 30550), INT16_C( 1266), -INT16_C( 2110), -INT16_C( 1737), -INT16_C( 24746), -INT16_C( 32036), INT16_C( 3265), -INT16_C( 17525), INT16_C( 7279), INT16_C( 8456), INT16_C( 28708), -INT16_C( 32308), -INT16_C( 19619), -INT16_C( 22371), -INT16_C( 1650), -INT16_C( 7097), INT16_C( 14704), INT16_C( 13032), INT16_C( 7984), -INT16_C( 31189), INT16_C( 2238), -INT16_C( 32760) }, UINT32_C(2218496788), { INT16_C( 17327), -INT16_C( 11355), INT16_C( 29107), INT16_C( 4180), -INT16_C( 3804), -INT16_C( 19783), INT16_C( 235), INT16_C( 23446), INT16_C( 32313), INT16_C( 27022), -INT16_C( 18019), INT16_C( 23792), -INT16_C( 1855), -INT16_C( 10532), INT16_C( 6028), INT16_C( 15194), -INT16_C( 166), INT16_C( 3599), INT16_C( 25456), -INT16_C( 27618), -INT16_C( 10411), INT16_C( 16454), -INT16_C( 9001), INT16_C( 4251), INT16_C( 10586), -INT16_C( 2182), INT16_C( 27363), -INT16_C( 23469), INT16_C( 12130), -INT16_C( 4486), -INT16_C( 11194), -INT16_C( 24278) }, { INT16_C( 8748), INT16_C( 2352), INT16_C( 29107), INT16_C( 19857), INT16_C( 3804), -INT16_C( 21657), -INT16_C( 4771), INT16_C( 7399), INT16_C( 32313), INT16_C( 27022), INT16_C( 30550), INT16_C( 1266), INT16_C( 1855), -INT16_C( 1737), -INT16_C( 24746), INT16_C( 15194), INT16_C( 166), INT16_C( 3599), INT16_C( 7279), INT16_C( 27618), INT16_C( 10411), INT16_C( 16454), -INT16_C( 19619), -INT16_C( 22371), -INT16_C( 1650), -INT16_C( 7097), INT16_C( 27363), INT16_C( 13032), INT16_C( 7984), -INT16_C( 31189), INT16_C( 2238), INT16_C( 24278) } }, { { INT16_C( 14803), INT16_C( 17327), -INT16_C( 12900), -INT16_C( 3625), INT16_C( 7589), INT16_C( 31793), -INT16_C( 12807), INT16_C( 21389), INT16_C( 2038), -INT16_C( 9909), -INT16_C( 24975), -INT16_C( 11394), -INT16_C( 1842), INT16_C( 5314), -INT16_C( 4915), -INT16_C( 24395), INT16_C( 25637), -INT16_C( 15900), -INT16_C( 17614), -INT16_C( 10317), -INT16_C( 6951), -INT16_C( 11693), -INT16_C( 8015), -INT16_C( 22490), INT16_C( 29159), INT16_C( 22657), -INT16_C( 241), -INT16_C( 8916), -INT16_C( 4360), -INT16_C( 14862), -INT16_C( 22566), -INT16_C( 155) }, UINT32_C(1052789004), { INT16_C( 29445), -INT16_C( 8683), INT16_C( 26712), INT16_C( 2480), -INT16_C( 10679), INT16_C( 12465), INT16_C( 13127), INT16_C( 22409), -INT16_C( 19150), INT16_C( 10804), INT16_C( 9891), INT16_C( 32239), INT16_C( 21966), -INT16_C( 9604), INT16_C( 15518), -INT16_C( 23784), INT16_C( 11696), INT16_C( 2177), INT16_C( 12949), -INT16_C( 8687), -INT16_C( 15608), INT16_C( 20495), -INT16_C( 26378), INT16_C( 10407), -INT16_C( 9395), -INT16_C( 4013), INT16_C( 16898), -INT16_C( 12179), -INT16_C( 5737), INT16_C( 13994), -INT16_C( 15835), -INT16_C( 10791) }, { INT16_C( 14803), INT16_C( 17327), INT16_C( 26712), INT16_C( 2480), INT16_C( 7589), INT16_C( 31793), -INT16_C( 12807), INT16_C( 21389), INT16_C( 19150), -INT16_C( 9909), -INT16_C( 24975), INT16_C( 32239), -INT16_C( 1842), INT16_C( 5314), INT16_C( 15518), -INT16_C( 24395), INT16_C( 25637), -INT16_C( 15900), -INT16_C( 17614), -INT16_C( 10317), -INT16_C( 6951), -INT16_C( 11693), INT16_C( 26378), INT16_C( 10407), INT16_C( 29159), INT16_C( 4013), INT16_C( 16898), INT16_C( 12179), INT16_C( 5737), INT16_C( 13994), -INT16_C( 22566), -INT16_C( 155) } }, { { INT16_C( 23535), -INT16_C( 31523), -INT16_C( 4211), -INT16_C( 27293), INT16_C( 29362), -INT16_C( 22299), -INT16_C( 29686), INT16_C( 22480), INT16_C( 9064), INT16_C( 27207), -INT16_C( 19354), -INT16_C( 710), -INT16_C( 7011), -INT16_C( 15821), INT16_C( 3494), -INT16_C( 27240), INT16_C( 30056), -INT16_C( 2791), INT16_C( 31844), INT16_C( 5770), INT16_C( 28910), -INT16_C( 1858), -INT16_C( 28676), INT16_C( 25679), -INT16_C( 26958), INT16_C( 6350), INT16_C( 2122), -INT16_C( 6378), INT16_C( 18924), -INT16_C( 27990), INT16_C( 16982), -INT16_C( 16857) }, UINT32_C( 481509815), { INT16_C( 16061), -INT16_C( 21454), -INT16_C( 3666), -INT16_C( 21852), -INT16_C( 2944), INT16_C( 12815), -INT16_C( 8822), -INT16_C( 10933), INT16_C( 25062), -INT16_C( 11588), INT16_C( 26282), INT16_C( 357), -INT16_C( 29528), INT16_C( 24767), INT16_C( 29645), -INT16_C( 29828), -INT16_C( 20815), INT16_C( 24375), -INT16_C( 9313), INT16_C( 7945), INT16_C( 6351), INT16_C( 23122), -INT16_C( 25098), -INT16_C( 9169), -INT16_C( 5122), -INT16_C( 22354), INT16_C( 4946), -INT16_C( 1367), INT16_C( 27040), INT16_C( 27994), -INT16_C( 10532), -INT16_C( 29192) }, { INT16_C( 16061), INT16_C( 21454), INT16_C( 3666), -INT16_C( 27293), INT16_C( 2944), INT16_C( 12815), -INT16_C( 29686), INT16_C( 10933), INT16_C( 25062), INT16_C( 27207), -INT16_C( 19354), -INT16_C( 710), -INT16_C( 7011), -INT16_C( 15821), INT16_C( 29645), -INT16_C( 27240), INT16_C( 20815), INT16_C( 24375), INT16_C( 31844), INT16_C( 5770), INT16_C( 6351), INT16_C( 23122), -INT16_C( 28676), INT16_C( 9169), -INT16_C( 26958), INT16_C( 6350), INT16_C( 4946), INT16_C( 1367), INT16_C( 27040), -INT16_C( 27990), INT16_C( 16982), -INT16_C( 16857) } }, { { INT16_C( 12165), INT16_C( 9452), -INT16_C( 2805), -INT16_C( 9660), -INT16_C( 27122), INT16_C( 1076), INT16_C( 25395), INT16_C( 12768), -INT16_C( 29105), -INT16_C( 24103), -INT16_C( 31838), INT16_C( 17051), -INT16_C( 2324), -INT16_C( 14161), -INT16_C( 22324), INT16_C( 20821), INT16_C( 16855), -INT16_C( 7562), -INT16_C( 17866), INT16_C( 17597), -INT16_C( 3760), -INT16_C( 31928), INT16_C( 10325), -INT16_C( 23372), -INT16_C( 29257), INT16_C( 22853), -INT16_C( 8176), -INT16_C( 869), INT16_C( 19158), -INT16_C( 23612), INT16_C( 6642), -INT16_C( 13580) }, UINT32_C(2443995738), { INT16_C( 26916), INT16_C( 29909), INT16_C( 7771), -INT16_C( 20233), -INT16_C( 21690), -INT16_C( 684), -INT16_C( 26311), INT16_C( 18774), -INT16_C( 3719), INT16_C( 20550), INT16_C( 2620), INT16_C( 12019), -INT16_C( 6364), INT16_C( 32504), -INT16_C( 23214), INT16_C( 30223), -INT16_C( 6898), INT16_C( 27115), -INT16_C( 7677), INT16_C( 18713), INT16_C( 28046), -INT16_C( 14521), -INT16_C( 25338), -INT16_C( 32752), INT16_C( 22159), -INT16_C( 13360), -INT16_C( 15519), -INT16_C( 31239), -INT16_C( 3414), -INT16_C( 1021), INT16_C( 5015), -INT16_C( 23181) }, { INT16_C( 12165), INT16_C( 29909), -INT16_C( 2805), INT16_C( 20233), INT16_C( 21690), INT16_C( 1076), INT16_C( 26311), INT16_C( 12768), -INT16_C( 29105), INT16_C( 20550), -INT16_C( 31838), INT16_C( 12019), -INT16_C( 2324), INT16_C( 32504), INT16_C( 23214), INT16_C( 20821), INT16_C( 16855), -INT16_C( 7562), INT16_C( 7677), INT16_C( 18713), -INT16_C( 3760), INT16_C( 14521), INT16_C( 10325), INT16_C( 32752), INT16_C( 22159), INT16_C( 22853), -INT16_C( 8176), -INT16_C( 869), INT16_C( 3414), -INT16_C( 23612), INT16_C( 6642), INT16_C( 23181) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi16(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i r = simde_mm512_mask_abs_epi16(src, test_vec[i].k, a); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_abs_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask32 k; const int16_t a[32]; const int16_t r[32]; } test_vec[] = { { UINT32_C( 426916685), { INT16_C( 30267), INT16_C( 27257), -INT16_C( 27872), -INT16_C( 31845), -INT16_C( 31872), INT16_C( 14983), -INT16_C( 30143), INT16_C( 666), INT16_C( 12540), INT16_C( 6967), -INT16_C( 16243), INT16_C( 692), -INT16_C( 5341), INT16_C( 28883), INT16_C( 17702), INT16_C( 24969), INT16_C( 955), -INT16_C( 9013), INT16_C( 26518), INT16_C( 5727), -INT16_C( 6422), INT16_C( 11089), -INT16_C( 5264), INT16_C( 27693), INT16_C( 25627), -INT16_C( 22392), INT16_C( 15396), INT16_C( 18346), INT16_C( 32295), INT16_C( 19896), INT16_C( 16835), INT16_C( 32686) }, { INT16_C( 30267), INT16_C( 0), INT16_C( 27872), INT16_C( 31845), INT16_C( 0), INT16_C( 0), INT16_C( 30143), INT16_C( 0), INT16_C( 12540), INT16_C( 6967), INT16_C( 0), INT16_C( 692), INT16_C( 5341), INT16_C( 28883), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 9013), INT16_C( 0), INT16_C( 0), INT16_C( 6422), INT16_C( 11089), INT16_C( 5264), INT16_C( 0), INT16_C( 25627), INT16_C( 0), INT16_C( 0), INT16_C( 18346), INT16_C( 32295), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { UINT32_C(3680205380), { -INT16_C( 17695), -INT16_C( 13327), INT16_C( 17057), INT16_C( 4599), INT16_C( 9261), INT16_C( 18558), INT16_C( 1673), -INT16_C( 21008), -INT16_C( 25790), INT16_C( 27125), -INT16_C( 21223), -INT16_C( 9034), INT16_C( 25838), INT16_C( 13147), -INT16_C( 18722), -INT16_C( 16626), -INT16_C( 143), INT16_C( 4747), -INT16_C( 32190), INT16_C( 28451), -INT16_C( 24154), INT16_C( 12216), -INT16_C( 22361), -INT16_C( 5667), -INT16_C( 11709), INT16_C( 23634), INT16_C( 2175), INT16_C( 27961), -INT16_C( 27539), INT16_C( 19360), -INT16_C( 20917), -INT16_C( 17397) }, { INT16_C( 0), INT16_C( 0), INT16_C( 17057), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 1673), INT16_C( 0), INT16_C( 0), INT16_C( 27125), INT16_C( 0), INT16_C( 9034), INT16_C( 25838), INT16_C( 13147), INT16_C( 18722), INT16_C( 0), INT16_C( 143), INT16_C( 4747), INT16_C( 0), INT16_C( 28451), INT16_C( 24154), INT16_C( 0), INT16_C( 22361), INT16_C( 0), INT16_C( 11709), INT16_C( 23634), INT16_C( 0), INT16_C( 27961), INT16_C( 27539), INT16_C( 0), INT16_C( 20917), INT16_C( 17397) } }, { UINT32_C(4040070830), { -INT16_C( 3816), -INT16_C( 16801), INT16_C( 6035), INT16_C( 15086), -INT16_C( 13376), INT16_C( 804), INT16_C( 30365), INT16_C( 7264), -INT16_C( 26241), -INT16_C( 4983), INT16_C( 10797), INT16_C( 30775), INT16_C( 17112), -INT16_C( 31180), INT16_C( 728), -INT16_C( 3978), -INT16_C( 10508), -INT16_C( 30801), -INT16_C( 25107), -INT16_C( 21055), -INT16_C( 6808), INT16_C( 1457), INT16_C( 4444), -INT16_C( 9439), -INT16_C( 21846), -INT16_C( 10297), -INT16_C( 300), -INT16_C( 21168), -INT16_C( 31679), INT16_C( 6451), -INT16_C( 21881), INT16_C( 31498) }, { INT16_C( 0), INT16_C( 16801), INT16_C( 6035), INT16_C( 15086), INT16_C( 0), INT16_C( 804), INT16_C( 0), INT16_C( 7264), INT16_C( 0), INT16_C( 4983), INT16_C( 10797), INT16_C( 0), INT16_C( 17112), INT16_C( 0), INT16_C( 0), INT16_C( 3978), INT16_C( 0), INT16_C( 30801), INT16_C( 25107), INT16_C( 21055), INT16_C( 0), INT16_C( 0), INT16_C( 4444), INT16_C( 9439), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 31679), INT16_C( 6451), INT16_C( 21881), INT16_C( 31498) } }, { UINT32_C(1828895104), { -INT16_C( 15530), -INT16_C( 16869), -INT16_C( 13143), INT16_C( 1475), -INT16_C( 6947), -INT16_C( 30752), -INT16_C( 22642), INT16_C( 25438), -INT16_C( 20827), -INT16_C( 6640), INT16_C( 17203), -INT16_C( 17920), INT16_C( 2797), INT16_C( 27957), INT16_C( 14275), INT16_C( 6619), -INT16_C( 2310), -INT16_C( 23593), -INT16_C( 25918), -INT16_C( 24664), -INT16_C( 30594), INT16_C( 3110), -INT16_C( 31697), -INT16_C( 10897), INT16_C( 32563), INT16_C( 26299), -INT16_C( 17469), -INT16_C( 20448), INT16_C( 21957), -INT16_C( 30690), -INT16_C( 1652), -INT16_C( 31071) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 25438), INT16_C( 20827), INT16_C( 0), INT16_C( 0), INT16_C( 17920), INT16_C( 2797), INT16_C( 27957), INT16_C( 0), INT16_C( 6619), INT16_C( 0), INT16_C( 23593), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 32563), INT16_C( 0), INT16_C( 17469), INT16_C( 20448), INT16_C( 0), INT16_C( 30690), INT16_C( 1652), INT16_C( 0) } }, { UINT32_C(2972350703), { -INT16_C( 11758), -INT16_C( 28592), INT16_C( 30299), -INT16_C( 30051), INT16_C( 3322), INT16_C( 11615), INT16_C( 7052), INT16_C( 20371), -INT16_C( 19498), -INT16_C( 25345), INT16_C( 7432), -INT16_C( 27612), -INT16_C( 14826), INT16_C( 1307), INT16_C( 17726), INT16_C( 20918), INT16_C( 1559), INT16_C( 29409), INT16_C( 32380), INT16_C( 30717), INT16_C( 23691), INT16_C( 6052), INT16_C( 14455), INT16_C( 20070), INT16_C( 26091), -INT16_C( 2838), INT16_C( 3715), -INT16_C( 26232), -INT16_C( 23596), INT16_C( 5023), INT16_C( 21992), INT16_C( 100) }, { INT16_C( 11758), INT16_C( 28592), INT16_C( 30299), INT16_C( 30051), INT16_C( 0), INT16_C( 11615), INT16_C( 7052), INT16_C( 20371), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 27612), INT16_C( 14826), INT16_C( 1307), INT16_C( 17726), INT16_C( 0), INT16_C( 0), INT16_C( 29409), INT16_C( 0), INT16_C( 30717), INT16_C( 0), INT16_C( 6052), INT16_C( 0), INT16_C( 0), INT16_C( 26091), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 23596), INT16_C( 5023), INT16_C( 0), INT16_C( 100) } }, { UINT32_C(3631367516), { INT16_C( 28612), INT16_C( 20303), -INT16_C( 2868), INT16_C( 17254), -INT16_C( 13268), INT16_C( 6033), INT16_C( 31537), -INT16_C( 19445), -INT16_C( 27510), INT16_C( 24142), -INT16_C( 4809), INT16_C( 8305), -INT16_C( 10942), -INT16_C( 25056), -INT16_C( 28133), -INT16_C( 8329), -INT16_C( 14846), -INT16_C( 12754), -INT16_C( 27462), -INT16_C( 6639), -INT16_C( 23712), -INT16_C( 28162), INT16_C( 2334), -INT16_C( 22458), -INT16_C( 27491), -INT16_C( 11001), INT16_C( 30849), -INT16_C( 15371), INT16_C( 5454), INT16_C( 26978), -INT16_C( 9817), -INT16_C( 22200) }, { INT16_C( 0), INT16_C( 0), INT16_C( 2868), INT16_C( 17254), INT16_C( 13268), INT16_C( 0), INT16_C( 31537), INT16_C( 0), INT16_C( 27510), INT16_C( 0), INT16_C( 4809), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 28133), INT16_C( 0), INT16_C( 0), INT16_C( 12754), INT16_C( 0), INT16_C( 0), INT16_C( 23712), INT16_C( 28162), INT16_C( 2334), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 15371), INT16_C( 5454), INT16_C( 0), INT16_C( 9817), INT16_C( 22200) } }, { UINT32_C(1517778591), { -INT16_C( 30454), INT16_C( 27200), INT16_C( 15916), INT16_C( 19195), INT16_C( 16712), -INT16_C( 6669), -INT16_C( 1323), INT16_C( 22202), -INT16_C( 20622), -INT16_C( 16358), INT16_C( 31940), INT16_C( 27689), INT16_C( 29013), -INT16_C( 3051), -INT16_C( 29209), -INT16_C( 3762), -INT16_C( 28906), INT16_C( 16987), INT16_C( 22477), INT16_C( 5516), INT16_C( 32664), INT16_C( 28411), -INT16_C( 19079), -INT16_C( 4924), -INT16_C( 8603), INT16_C( 10668), -INT16_C( 10662), -INT16_C( 20587), -INT16_C( 21689), INT16_C( 12196), -INT16_C( 3528), INT16_C( 20000) }, { INT16_C( 30454), INT16_C( 27200), INT16_C( 15916), INT16_C( 19195), INT16_C( 16712), INT16_C( 0), INT16_C( 0), INT16_C( 22202), INT16_C( 0), INT16_C( 16358), INT16_C( 31940), INT16_C( 0), INT16_C( 29013), INT16_C( 3051), INT16_C( 29209), INT16_C( 0), INT16_C( 28906), INT16_C( 16987), INT16_C( 22477), INT16_C( 0), INT16_C( 32664), INT16_C( 28411), INT16_C( 19079), INT16_C( 0), INT16_C( 0), INT16_C( 10668), INT16_C( 0), INT16_C( 20587), INT16_C( 21689), INT16_C( 0), INT16_C( 3528), INT16_C( 0) } }, { UINT32_C(1334869121), { INT16_C( 7379), INT16_C( 27492), INT16_C( 24476), INT16_C( 5593), -INT16_C( 25067), INT16_C( 31233), -INT16_C( 20868), -INT16_C( 10333), INT16_C( 14724), -INT16_C( 13434), INT16_C( 10980), INT16_C( 7418), INT16_C( 6941), -INT16_C( 24982), -INT16_C( 1385), INT16_C( 27373), INT16_C( 21014), -INT16_C( 19755), -INT16_C( 20559), -INT16_C( 14648), -INT16_C( 14003), -INT16_C( 14016), -INT16_C( 7049), -INT16_C( 1120), INT16_C( 10013), INT16_C( 455), -INT16_C( 16047), INT16_C( 28189), -INT16_C( 30756), INT16_C( 29453), -INT16_C( 1407), -INT16_C( 26659) }, { INT16_C( 7379), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 10333), INT16_C( 0), INT16_C( 0), INT16_C( 10980), INT16_C( 7418), INT16_C( 6941), INT16_C( 24982), INT16_C( 1385), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 14003), INT16_C( 0), INT16_C( 0), INT16_C( 1120), INT16_C( 10013), INT16_C( 455), INT16_C( 16047), INT16_C( 28189), INT16_C( 0), INT16_C( 0), INT16_C( 1407), INT16_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i r = simde_mm512_maskz_abs_epi16(test_vec[i].k, a); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_abs_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C(-1095158286), INT32_C( -133595553), INT32_C( -941949577), INT32_C(-1117722052), INT32_C(-1053667317), INT32_C( -662420643), INT32_C( 2095193825), INT32_C( -799061081), INT32_C( 347912513), INT32_C( -439299809), INT32_C( 2053030698), INT32_C( -277514113), INT32_C( 1476262970), INT32_C( 1955038119), INT32_C( -77085072), INT32_C( 1014069144)), simde_mm512_set_epi32(INT32_C( 1095158286), INT32_C( 133595553), INT32_C( 941949577), INT32_C( 1117722052), INT32_C( 1053667317), INT32_C( 662420643), INT32_C( 2095193825), INT32_C( 799061081), INT32_C( 347912513), INT32_C( 439299809), INT32_C( 2053030698), INT32_C( 277514113), INT32_C( 1476262970), INT32_C( 1955038119), INT32_C( 77085072), INT32_C( 1014069144)) }, { simde_mm512_set_epi32(INT32_C( 1865049709), INT32_C( -408997463), INT32_C( 1771073477), INT32_C( 1463780468), INT32_C( 268019741), INT32_C(-1725054429), INT32_C( 1408597864), INT32_C( 1576985133), INT32_C( 170783936), INT32_C( 836522882), INT32_C( 1364040350), INT32_C( 563663058), INT32_C(-1491438903), INT32_C( -873504608), INT32_C( 1431273511), INT32_C( -164765086)), simde_mm512_set_epi32(INT32_C( 1865049709), INT32_C( 408997463), INT32_C( 1771073477), INT32_C( 1463780468), INT32_C( 268019741), INT32_C( 1725054429), INT32_C( 1408597864), INT32_C( 1576985133), INT32_C( 170783936), INT32_C( 836522882), INT32_C( 1364040350), INT32_C( 563663058), INT32_C( 1491438903), INT32_C( 873504608), INT32_C( 1431273511), INT32_C( 164765086)) }, { simde_mm512_set_epi32(INT32_C( 1505063340), INT32_C( -79208486), INT32_C( -115790145), INT32_C( 1137793635), INT32_C( -719063760), INT32_C( -465633360), INT32_C( 1417132608), INT32_C( 1715322300), INT32_C( 1194443989), INT32_C( 1598244723), INT32_C( -360509626), INT32_C( -844528776), INT32_C( -291907566), INT32_C( -980752736), INT32_C( 701363552), INT32_C( 1148036152)), simde_mm512_set_epi32(INT32_C( 1505063340), INT32_C( 79208486), INT32_C( 115790145), INT32_C( 1137793635), INT32_C( 719063760), INT32_C( 465633360), INT32_C( 1417132608), INT32_C( 1715322300), INT32_C( 1194443989), INT32_C( 1598244723), INT32_C( 360509626), INT32_C( 844528776), INT32_C( 291907566), INT32_C( 980752736), INT32_C( 701363552), INT32_C( 1148036152)) }, { simde_mm512_set_epi32(INT32_C(-1538804784), INT32_C( -43683957), INT32_C( -70380459), INT32_C( 259050545), INT32_C(-1140217223), INT32_C( -24242506), INT32_C(-1281378925), INT32_C( -426768587), INT32_C(-1825251144), INT32_C( -975195895), INT32_C( 758020113), INT32_C( -3401471), INT32_C( 154668063), INT32_C( -827616009), INT32_C( 793625070), INT32_C( -735990247)), simde_mm512_set_epi32(INT32_C( 1538804784), INT32_C( 43683957), INT32_C( 70380459), INT32_C( 259050545), INT32_C( 1140217223), INT32_C( 24242506), INT32_C( 1281378925), INT32_C( 426768587), INT32_C( 1825251144), INT32_C( 975195895), INT32_C( 758020113), INT32_C( 3401471), INT32_C( 154668063), INT32_C( 827616009), INT32_C( 793625070), INT32_C( 735990247)) }, { simde_mm512_set_epi32(INT32_C( -919197120), INT32_C( 1902742720), INT32_C( 576001152), INT32_C( 772608991), INT32_C( 1373611304), INT32_C( 156079462), INT32_C( 392030686), INT32_C( 1159450969), INT32_C( 1376625025), INT32_C( -701917672), INT32_C( 1911493359), INT32_C( -115817480), INT32_C( -875216623), INT32_C( 1333681477), INT32_C(-1067533891), INT32_C( 1671330781)), simde_mm512_set_epi32(INT32_C( 919197120), INT32_C( 1902742720), INT32_C( 576001152), INT32_C( 772608991), INT32_C( 1373611304), INT32_C( 156079462), INT32_C( 392030686), INT32_C( 1159450969), INT32_C( 1376625025), INT32_C( 701917672), INT32_C( 1911493359), INT32_C( 115817480), INT32_C( 875216623), INT32_C( 1333681477), INT32_C( 1067533891), INT32_C( 1671330781)) }, { simde_mm512_set_epi32(INT32_C(-1168385947), INT32_C(-1671882855), INT32_C(-1182456995), INT32_C(-1803534861), INT32_C( 443878759), INT32_C( 702169153), INT32_C(-1879742181), INT32_C( 1627978919), INT32_C( 583873330), INT32_C( -857098109), INT32_C( 710347808), INT32_C( 1707849385), INT32_C( 1863512780), INT32_C( -371421167), INT32_C( 1902179408), INT32_C(-1189025654)), simde_mm512_set_epi32(INT32_C( 1168385947), INT32_C( 1671882855), INT32_C( 1182456995), INT32_C( 1803534861), INT32_C( 443878759), INT32_C( 702169153), INT32_C( 1879742181), INT32_C( 1627978919), INT32_C( 583873330), INT32_C( 857098109), INT32_C( 710347808), INT32_C( 1707849385), INT32_C( 1863512780), INT32_C( 371421167), INT32_C( 1902179408), INT32_C( 1189025654)) }, { simde_mm512_set_epi32(INT32_C( 7990856), INT32_C(-1991291137), INT32_C( 1404443548), INT32_C(-1023849862), INT32_C( 2054941409), INT32_C(-1604088325), INT32_C( 721271909), INT32_C(-1622295089), INT32_C( 1869222605), INT32_C(-1583998423), INT32_C( -801626928), INT32_C( -940395766), INT32_C( 1108931720), INT32_C( -471669445), INT32_C( 1204289475), INT32_C( -752679106)), simde_mm512_set_epi32(INT32_C( 7990856), INT32_C( 1991291137), INT32_C( 1404443548), INT32_C( 1023849862), INT32_C( 2054941409), INT32_C( 1604088325), INT32_C( 721271909), INT32_C( 1622295089), INT32_C( 1869222605), INT32_C( 1583998423), INT32_C( 801626928), INT32_C( 940395766), INT32_C( 1108931720), INT32_C( 471669445), INT32_C( 1204289475), INT32_C( 752679106)) }, { simde_mm512_set_epi32(INT32_C( 1399806844), INT32_C( 1131841699), INT32_C( -346937782), INT32_C( 567816154), INT32_C(-1589012616), INT32_C(-2005496894), INT32_C( 1401681986), INT32_C( 423760716), INT32_C( 431684101), INT32_C( 852583616), INT32_C(-1369299290), INT32_C( -663899319), INT32_C( 1580470265), INT32_C( 298083241), INT32_C( -630373638), INT32_C(-1937828661)), simde_mm512_set_epi32(INT32_C( 1399806844), INT32_C( 1131841699), INT32_C( 346937782), INT32_C( 567816154), INT32_C( 1589012616), INT32_C( 2005496894), INT32_C( 1401681986), INT32_C( 423760716), INT32_C( 431684101), INT32_C( 852583616), INT32_C( 1369299290), INT32_C( 663899319), INT32_C( 1580470265), INT32_C( 298083241), INT32_C( 630373638), INT32_C( 1937828661)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_abs_epi32(test_vec[i].a); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_abs_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask16 k; simde__m512i a; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C( 114710097), INT32_C( 1837246098), INT32_C(-1399577225), INT32_C(-1388127606), INT32_C( 1116027725), INT32_C( -871797325), INT32_C(-1979326643), INT32_C( 1477004857), INT32_C( 1670723749), INT32_C(-1006052339), INT32_C( 1863789116), INT32_C( -690396684), INT32_C( -629773535), INT32_C( 667046523), INT32_C( 1317445565), INT32_C( -732937024)), UINT16_C(28888), simde_mm512_set_epi32(INT32_C(-1877548571), INT32_C( -399920351), INT32_C( 15525797), INT32_C( 18165921), INT32_C( 2085930596), INT32_C( 1662282658), INT32_C(-1842752263), INT32_C( 499820912), INT32_C( 1419797765), INT32_C( -44818966), INT32_C( 1761152620), INT32_C(-1288657930), INT32_C( 894078020), INT32_C(-1369431563), INT32_C( -103362440), INT32_C(-1559726025)), simde_mm512_set_epi32(INT32_C( 114710097), INT32_C( 399920351), INT32_C( 15525797), INT32_C( 18165921), INT32_C( 1116027725), INT32_C( -871797325), INT32_C(-1979326643), INT32_C( 1477004857), INT32_C( 1419797765), INT32_C( 44818966), INT32_C( 1863789116), INT32_C( 1288657930), INT32_C( 894078020), INT32_C( 667046523), INT32_C( 1317445565), INT32_C( -732937024)) }, { simde_mm512_set_epi32(INT32_C( 1505436737), INT32_C( 342253548), INT32_C( 1435179252), INT32_C( 1326766533), INT32_C( 156769011), INT32_C( 343760696), INT32_C( 611303965), INT32_C( 1457341409), INT32_C( 165452421), INT32_C(-1824090116), INT32_C( -184738383), INT32_C( -191086464), INT32_C( -117301127), INT32_C(-1526262537), INT32_C( -208138847), INT32_C( 807348777)), UINT16_C(61134), simde_mm512_set_epi32(INT32_C( 1427056174), INT32_C( 2097896620), INT32_C( 1813263538), INT32_C( 1909821993), INT32_C( 1439822042), INT32_C(-1049213292), INT32_C( 1557133349), INT32_C( 1168931268), INT32_C( -810546774), INT32_C(-1283013132), INT32_C( 654302587), INT32_C( 314275905), INT32_C(-1091094079), INT32_C( -114174508), INT32_C( 407580338), INT32_C( 1906809805)), simde_mm512_set_epi32(INT32_C( 1427056174), INT32_C( 2097896620), INT32_C( 1813263538), INT32_C( 1326766533), INT32_C( 1439822042), INT32_C( 1049213292), INT32_C( 1557133349), INT32_C( 1457341409), INT32_C( 810546774), INT32_C( 1283013132), INT32_C( -184738383), INT32_C( -191086464), INT32_C( 1091094079), INT32_C( 114174508), INT32_C( 407580338), INT32_C( 807348777)) }, { simde_mm512_set_epi32(INT32_C( 905172649), INT32_C(-1044778809), INT32_C(-1938215986), INT32_C(-1138753169), INT32_C(-1689961651), INT32_C( 890456168), INT32_C( 1382435241), INT32_C( -803845344), INT32_C( 430838507), INT32_C( 1075259040), INT32_C(-1956785379), INT32_C(-1586468297), INT32_C( 622055688), INT32_C(-1127740382), INT32_C( 466514910), INT32_C(-1745879628)), UINT16_C(30570), simde_mm512_set_epi32(INT32_C( -310045086), INT32_C( 560822999), INT32_C( -680371476), INT32_C( 1838395052), INT32_C(-1152635838), INT32_C( -481448106), INT32_C( 871399876), INT32_C( -939960538), INT32_C( -898000986), INT32_C( -641497176), INT32_C( 657638908), INT32_C(-1796735419), INT32_C(-1032150818), INT32_C( 151713087), INT32_C( 1554707006), INT32_C( -318690470)), simde_mm512_set_epi32(INT32_C( 905172649), INT32_C( 560822999), INT32_C( 680371476), INT32_C( 1838395052), INT32_C(-1689961651), INT32_C( 481448106), INT32_C( 871399876), INT32_C( 939960538), INT32_C( 430838507), INT32_C( 641497176), INT32_C( 657638908), INT32_C(-1586468297), INT32_C( 1032150818), INT32_C(-1127740382), INT32_C( 1554707006), INT32_C(-1745879628)) }, { simde_mm512_set_epi32(INT32_C(-1675700291), INT32_C( -85412591), INT32_C(-1865493216), INT32_C(-1122257925), INT32_C( 955620837), INT32_C( -725693586), INT32_C( 1056307491), INT32_C( 1924019839), INT32_C(-2012466116), INT32_C(-1808881746), INT32_C( -887453452), INT32_C( 160221724), INT32_C( -886018282), INT32_C( 1222780200), INT32_C( 1877396684), INT32_C( 283360472)), UINT16_C(28339), simde_mm512_set_epi32(INT32_C(-1238615237), INT32_C( 583893938), INT32_C( -594441984), INT32_C( 1561597956), INT32_C( 174377227), INT32_C( 319460903), INT32_C(-1295208114), INT32_C( 659707887), INT32_C( 1117898731), INT32_C( -209622907), INT32_C(-1431480123), INT32_C(-2058827609), INT32_C(-1519596795), INT32_C( 24332922), INT32_C( -338106630), INT32_C(-1565374776)), simde_mm512_set_epi32(INT32_C(-1675700291), INT32_C( 583893938), INT32_C( 594441984), INT32_C(-1122257925), INT32_C( 174377227), INT32_C( 319460903), INT32_C( 1295208114), INT32_C( 1924019839), INT32_C( 1117898731), INT32_C(-1808881746), INT32_C( 1431480123), INT32_C( 2058827609), INT32_C( -886018282), INT32_C( 1222780200), INT32_C( 338106630), INT32_C( 1565374776)) }, { simde_mm512_set_epi32(INT32_C( 178377352), INT32_C( -324510384), INT32_C( 446946466), INT32_C(-1323398690), INT32_C( -720979875), INT32_C( -512216094), INT32_C( 1145272930), INT32_C( -706074883), INT32_C(-1863795060), INT32_C( -525595897), INT32_C( 1357119557), INT32_C( 837734387), INT32_C( -607392699), INT32_C( -498581669), INT32_C(-2108693629), INT32_C( -476969927)), UINT16_C(42507), simde_mm512_set_epi32(INT32_C( -5472621), INT32_C( -263868960), INT32_C(-1867831731), INT32_C( 955254216), INT32_C( 1990179011), INT32_C(-1729740457), INT32_C( 1711933869), INT32_C(-1566075058), INT32_C( -550106516), INT32_C(-1087591249), INT32_C( 919917002), INT32_C(-1410389997), INT32_C( -188117230), INT32_C( 1025569327), INT32_C(-1456210246), INT32_C( -254945819)), simde_mm512_set_epi32(INT32_C( 5472621), INT32_C( -324510384), INT32_C( 1867831731), INT32_C(-1323398690), INT32_C( -720979875), INT32_C( 1729740457), INT32_C( 1711933869), INT32_C( -706074883), INT32_C(-1863795060), INT32_C( -525595897), INT32_C( 1357119557), INT32_C( 837734387), INT32_C( 188117230), INT32_C( -498581669), INT32_C( 1456210246), INT32_C( 254945819)) }, { simde_mm512_set_epi32(INT32_C(-1007934437), INT32_C( 201253136), INT32_C( 2123754123), INT32_C( 1034305262), INT32_C( 2139323878), INT32_C( -545410429), INT32_C(-1549231865), INT32_C( 1779895500), INT32_C( 1932853973), INT32_C( 2135732954), INT32_C( 1232725518), INT32_C( 339564914), INT32_C( -113030707), INT32_C(-1715459937), INT32_C( -492435091), INT32_C(-1720946495)), UINT16_C(49758), simde_mm512_set_epi32(INT32_C( 348473993), INT32_C(-1624874318), INT32_C( 361690252), INT32_C( 165927413), INT32_C(-1864332117), INT32_C( -524477604), INT32_C( 481484649), INT32_C(-1499715490), INT32_C(-1683117466), INT32_C(-2055457330), INT32_C( -850617531), INT32_C(-2081246973), INT32_C( 1276057415), INT32_C( 1619064589), INT32_C(-1536816688), INT32_C( 2060578085)), simde_mm512_set_epi32(INT32_C( 348473993), INT32_C( 1624874318), INT32_C( 2123754123), INT32_C( 1034305262), INT32_C( 2139323878), INT32_C( -545410429), INT32_C( 481484649), INT32_C( 1779895500), INT32_C( 1932853973), INT32_C( 2055457330), INT32_C( 1232725518), INT32_C( 2081246973), INT32_C( 1276057415), INT32_C( 1619064589), INT32_C( 1536816688), INT32_C(-1720946495)) }, { simde_mm512_set_epi32(INT32_C( 860828042), INT32_C( 1459856596), INT32_C(-1901530659), INT32_C( 1296141157), INT32_C( 778663095), INT32_C(-1872048536), INT32_C(-1115787645), INT32_C(-1142406643), INT32_C( 1518955242), INT32_C( -174688543), INT32_C( 1537062129), INT32_C( -974095643), INT32_C( 125816377), INT32_C(-1032428044), INT32_C( -374455538), INT32_C( -648832583)), UINT16_C(41340), simde_mm512_set_epi32(INT32_C( 1553986008), INT32_C( -808715903), INT32_C(-2114331727), INT32_C( 878797396), INT32_C( 1547560130), INT32_C( -931453209), INT32_C( 639671594), INT32_C( 734358771), INT32_C(-1802430748), INT32_C( 38083245), INT32_C( 636500349), INT32_C( 2020438947), INT32_C( 89083218), INT32_C( 2041918986), INT32_C(-2068453500), INT32_C( 1772569863)), simde_mm512_set_epi32(INT32_C( 1553986008), INT32_C( 1459856596), INT32_C( 2114331727), INT32_C( 1296141157), INT32_C( 778663095), INT32_C(-1872048536), INT32_C(-1115787645), INT32_C( 734358771), INT32_C( 1518955242), INT32_C( 38083245), INT32_C( 636500349), INT32_C( 2020438947), INT32_C( 89083218), INT32_C( 2041918986), INT32_C( -374455538), INT32_C( -648832583)) }, { simde_mm512_set_epi32(INT32_C(-1208548961), INT32_C( 1705109710), INT32_C( -159097588), INT32_C( -879037423), INT32_C( 2121552533), INT32_C( 595529007), INT32_C( -405863552), INT32_C( 1431630584), INT32_C( -616000216), INT32_C( 444327364), INT32_C( 613413664), INT32_C(-2128463203), INT32_C( 939927077), INT32_C(-1255659348), INT32_C(-1631544337), INT32_C(-1727626838)), UINT16_C(49163), simde_mm512_set_epi32(INT32_C( 895846723), INT32_C( 449272422), INT32_C( 1127330699), INT32_C(-1084895433), INT32_C( -399265722), INT32_C( 697840482), INT32_C( -598276089), INT32_C( -50403840), INT32_C( 1970006978), INT32_C( 1602141812), INT32_C(-1773480652), INT32_C( 740913018), INT32_C( 1668822994), INT32_C( 698152405), INT32_C( 1772335922), INT32_C( 847772835)), simde_mm512_set_epi32(INT32_C( 895846723), INT32_C( 449272422), INT32_C( -159097588), INT32_C( -879037423), INT32_C( 2121552533), INT32_C( 595529007), INT32_C( -405863552), INT32_C( 1431630584), INT32_C( -616000216), INT32_C( 444327364), INT32_C( 613413664), INT32_C(-2128463203), INT32_C( 1668822994), INT32_C(-1255659348), INT32_C( 1772335922), INT32_C( 847772835)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_abs_epi32(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_abs_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m512i a; simde__m512i r; } test_vec[8] = { { UINT16_C(17600), simde_mm512_set_epi32(INT32_C( 393115914), INT32_C( -9604904), INT32_C( 114710097), INT32_C( 1837246098), INT32_C(-1399577225), INT32_C(-1388127606), INT32_C( 1116027725), INT32_C( -871797325), INT32_C(-1979326643), INT32_C( 1477004857), INT32_C( 1670723749), INT32_C(-1006052339), INT32_C( 1863789116), INT32_C( -690396684), INT32_C( -629773535), INT32_C( 667046523)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 9604904), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1388127606), INT32_C( 0), INT32_C( 0), INT32_C( 1979326643), INT32_C( 1477004857), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { UINT16_C(30775), simde_mm512_set_epi32(INT32_C( -208138847), INT32_C( 807348777), INT32_C(-1877548571), INT32_C( -399920351), INT32_C( 15525797), INT32_C( 18165921), INT32_C( 2085930596), INT32_C( 1662282658), INT32_C(-1842752263), INT32_C( 499820912), INT32_C( 1419797765), INT32_C( -44818966), INT32_C( 1761152620), INT32_C(-1288657930), INT32_C( 894078020), INT32_C(-1369431563)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 807348777), INT32_C( 1877548571), INT32_C( 399920351), INT32_C( 15525797), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1419797765), INT32_C( 44818966), INT32_C( 0), INT32_C( 1288657930), INT32_C( 894078020), INT32_C( 1369431563)) }, { UINT16_C( 5367), simde_mm512_set_epi32(INT32_C( 407580338), INT32_C( 1906809805), INT32_C( -849801752), INT32_C(-1965822258), INT32_C( 1505436737), INT32_C( 342253548), INT32_C( 1435179252), INT32_C( 1326766533), INT32_C( 156769011), INT32_C( 343760696), INT32_C( 611303965), INT32_C( 1457341409), INT32_C( 165452421), INT32_C(-1824090116), INT32_C( -184738383), INT32_C( -191086464)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1965822258), INT32_C( 0), INT32_C( 342253548), INT32_C( 0), INT32_C( 0), INT32_C( 156769011), INT32_C( 343760696), INT32_C( 611303965), INT32_C( 1457341409), INT32_C( 0), INT32_C( 1824090116), INT32_C( 184738383), INT32_C( 191086464)) }, { UINT16_C(54740), simde_mm512_set_epi32(INT32_C( 622055688), INT32_C(-1127740382), INT32_C( 466514910), INT32_C(-1745879628), INT32_C( 1427056174), INT32_C( 2097896620), INT32_C( 1813263538), INT32_C( 1909821993), INT32_C( 1439822042), INT32_C(-1049213292), INT32_C( 1557133349), INT32_C( 1168931268), INT32_C( -810546774), INT32_C(-1283013132), INT32_C( 654302587), INT32_C( 314275905)), simde_mm512_set_epi32(INT32_C( 622055688), INT32_C( 1127740382), INT32_C( 0), INT32_C( 1745879628), INT32_C( 0), INT32_C( 2097896620), INT32_C( 0), INT32_C( 1909821993), INT32_C( 1439822042), INT32_C( 1049213292), INT32_C( 0), INT32_C( 1168931268), INT32_C( 0), INT32_C( 1283013132), INT32_C( 0), INT32_C( 0)) }, { UINT16_C(27191), simde_mm512_set_epi32(INT32_C(-1032150818), INT32_C( 151713087), INT32_C( 1554707006), INT32_C( -318690470), INT32_C( 788893537), INT32_C( -230394006), INT32_C( 905172649), INT32_C(-1044778809), INT32_C(-1938215986), INT32_C(-1138753169), INT32_C(-1689961651), INT32_C( 890456168), INT32_C( 1382435241), INT32_C( -803845344), INT32_C( 430838507), INT32_C( 1075259040)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 151713087), INT32_C( 1554707006), INT32_C( 0), INT32_C( 788893537), INT32_C( 0), INT32_C( 905172649), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1689961651), INT32_C( 890456168), INT32_C( 0), INT32_C( 803845344), INT32_C( 430838507), INT32_C( 1075259040)) }, { UINT16_C(65093), simde_mm512_set_epi32(INT32_C( -887453452), INT32_C( 160221724), INT32_C( -886018282), INT32_C( 1222780200), INT32_C( 1877396684), INT32_C( 283360472), INT32_C( -310045086), INT32_C( 560822999), INT32_C( -680371476), INT32_C( 1838395052), INT32_C(-1152635838), INT32_C( -481448106), INT32_C( 871399876), INT32_C( -939960538), INT32_C( -898000986), INT32_C( -641497176)), simde_mm512_set_epi32(INT32_C( 887453452), INT32_C( 160221724), INT32_C( 886018282), INT32_C( 1222780200), INT32_C( 1877396684), INT32_C( 283360472), INT32_C( 310045086), INT32_C( 0), INT32_C( 0), INT32_C( 1838395052), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 939960538), INT32_C( 0), INT32_C( 641497176)) }, { UINT16_C(42926), simde_mm512_set_epi32(INT32_C(-1431480123), INT32_C(-2058827609), INT32_C(-1519596795), INT32_C( 24332922), INT32_C( -338106630), INT32_C(-1565374776), INT32_C(-1426452996), INT32_C( -680300877), INT32_C(-1675700291), INT32_C( -85412591), INT32_C(-1865493216), INT32_C(-1122257925), INT32_C( 955620837), INT32_C( -725693586), INT32_C( 1056307491), INT32_C( 1924019839)), simde_mm512_set_epi32(INT32_C( 1431480123), INT32_C( 0), INT32_C( 1519596795), INT32_C( 0), INT32_C( 0), INT32_C( 1565374776), INT32_C( 1426452996), INT32_C( 680300877), INT32_C( 1675700291), INT32_C( 0), INT32_C( 1865493216), INT32_C( 0), INT32_C( 955620837), INT32_C( 725693586), INT32_C( 1056307491), INT32_C( 0)) }, { UINT16_C(26757), simde_mm512_set_epi32(INT32_C(-1863795060), INT32_C( -525595897), INT32_C( 1357119557), INT32_C( 837734387), INT32_C( -607392699), INT32_C( -498581669), INT32_C(-2108693629), INT32_C( -476969927), INT32_C(-1238615237), INT32_C( 583893938), INT32_C( -594441984), INT32_C( 1561597956), INT32_C( 174377227), INT32_C( 319460903), INT32_C(-1295208114), INT32_C( 659707887)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 525595897), INT32_C( 1357119557), INT32_C( 0), INT32_C( 607392699), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1238615237), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 319460903), INT32_C( 0), INT32_C( 659707887)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_abs_epi32(test_vec[i].k, test_vec[i].a); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_abs_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C(-4703669018152042913), INT64_C(-4045642624518788548), INT64_C(-4525466663746518179), INT64_C( 8998788960652053415), INT64_C( 1494272869059842335), INT64_C( 8817699709611505791), INT64_C( 6340501178400867239), INT64_C( -331077862235736168)), simde_mm512_set_epi64(INT64_C( 4703669018152042913), INT64_C( 4045642624518788548), INT64_C( 4525466663746518179), INT64_C( 8998788960652053415), INT64_C( 1494272869059842335), INT64_C( 8817699709611505791), INT64_C( 6340501178400867239), INT64_C( 331077862235736168)) }, { simde_mm512_set_epi64(INT64_C( 8010327509455286697), INT64_C( 7606702663991788660), INT64_C( 1151136024847303203), INT64_C( 6049881760672440877), INT64_C( 733511420638679938), INT64_C( 5858508694238056658), INT64_C(-6405681308945653600), INT64_C( 6147272925506298466)), simde_mm512_set_epi64(INT64_C( 8010327509455286697), INT64_C( 7606702663991788660), INT64_C( 1151136024847303203), INT64_C( 6049881760672440877), INT64_C( 733511420638679938), INT64_C( 5858508694238056658), INT64_C( 6405681308945653600), INT64_C( 6147272925506298466)) }, { simde_mm512_set_epi64(INT64_C( 6464197827924287450), INT64_C( -497314884836304285), INT64_C(-3088355329109459024), INT64_C( 6086538207170510268), INT64_C( 5130097871257028467), INT64_C(-1548377050112752776), INT64_C(-1253733446110746976), INT64_C( 3012333519594431544)), simde_mm512_set_epi64(INT64_C( 6464197827924287450), INT64_C( 497314884836304285), INT64_C( 3088355329109459024), INT64_C( 6086538207170510268), INT64_C( 5130097871257028467), INT64_C( 1548377050112752776), INT64_C( 1253733446110746976), INT64_C( 3012333519594431544)) }, { simde_mm512_set_epi64(INT64_C(-6609116217957060725), INT64_C( -302281769423418319), INT64_C(-4897195678850214218), INT64_C(-5503480572790438091), INT64_C(-7839393967146815223), INT64_C( 3255671599336790273), INT64_C( 664294275788018935), INT64_C( 3408593724494687769)), simde_mm512_set_epi64(INT64_C( 6609116217957060725), INT64_C( 302281769423418319), INT64_C( 4897195678850214218), INT64_C( 5503480572790438091), INT64_C( 7839393967146815223), INT64_C( 3255671599336790273), INT64_C( 664294275788018935), INT64_C( 3408593724494687769)) }, { simde_mm512_set_epi64(INT64_C(-3947921567074644800), INT64_C( 2473906111070933983), INT64_C( 5899615628251993446), INT64_C( 1683758976557896025), INT64_C( 5912559464823232024), INT64_C( 8209801467605337080), INT64_C(-3759026771366879931), INT64_C(-4585023147545297955)), simde_mm512_set_epi64(INT64_C( 3947921567074644800), INT64_C( 2473906111070933983), INT64_C( 5899615628251993446), INT64_C( 1683758976557896025), INT64_C( 5912559464823232024), INT64_C( 8209801467605337080), INT64_C( 3759026771366879931), INT64_C( 4585023147545297955)) }, { simde_mm512_set_epi64(INT64_C(-5018179428847904871), INT64_C(-5078614119960003085), INT64_C( 1906444753996234817), INT64_C(-8073431190678733657), INT64_C( 2507716860794484867), INT64_C( 3050920605853136553), INT64_C( 8003726449701589009), INT64_C( 8169798351590582410)), simde_mm512_set_epi64(INT64_C( 5018179428847904871), INT64_C( 5078614119960003085), INT64_C( 1906444753996234817), INT64_C( 8073431190678733657), INT64_C( 2507716860794484867), INT64_C( 3050920605853136553), INT64_C( 8003726449701589009), INT64_C( 8169798351590582410)) }, { simde_mm512_set_epi64(INT64_C( 34320467490721535), INT64_C( 6032039111009323642), INT64_C( 8825906149542039035), INT64_C( 3097839263351160271), INT64_C( 8028249960129894953), INT64_C(-3442961435998375158), INT64_C( 4762825474720326971), INT64_C( 5172383913584297790)), simde_mm512_set_epi64(INT64_C( 34320467490721535), INT64_C( 6032039111009323642), INT64_C( 8825906149542039035), INT64_C( 3097839263351160271), INT64_C( 8028249960129894953), INT64_C( 3442961435998375158), INT64_C( 4762825474720326971), INT64_C( 5172383913584297790)) }, { simde_mm512_set_epi64(INT64_C( 6012124616828815523), INT64_C(-1490086426868961318), INT64_C(-6824757216361935934), INT64_C( 6020178289686090572), INT64_C( 1854069096850744512), INT64_C(-5881095665354951863), INT64_C( 6788068100773536681), INT64_C(-2707434157113404213)), simde_mm512_set_epi64(INT64_C( 6012124616828815523), INT64_C( 1490086426868961318), INT64_C( 6824757216361935934), INT64_C( 6020178289686090572), INT64_C( 1854069096850744512), INT64_C( 5881095665354951863), INT64_C( 6788068100773536681), INT64_C( 2707434157113404213)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_abs_epi64(test_vec[i].a); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_abs_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask8 k; simde__m512i a; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C( 492676116973233810), INT64_C(-6011138406694593910), INT64_C( 4793302583727451571), INT64_C(-8501143198309462471), INT64_C( 7175703865894427661), INT64_C( 8004913303465320948), INT64_C(-2704856736044264837), INT64_C( 5658385619497272512)), UINT8_C(216), simde_mm512_set_epi64(INT64_C(-1717644828534315099), INT64_C( 78022038682650212), INT64_C( 7139449655270167801), INT64_C( 2146714472316691717), INT64_C( -192495991449383316), INT64_C(-5534743664186979260), INT64_C(-5881663773003558792), INT64_C(-6698972267701962486)), simde_mm512_set_epi64(INT64_C( 1717644828534315099), INT64_C( 78022038682650212), INT64_C( 4793302583727451571), INT64_C( 2146714472316691717), INT64_C( 192495991449383316), INT64_C( 8004913303465320948), INT64_C(-2704856736044264837), INT64_C( 5658385619497272512)) }, { simde_mm512_set_epi64(INT64_C( 1469967797035145460), INT64_C( 5698418868819073779), INT64_C( 1476440947581501981), INT64_C( 6259233690927012485), INT64_C(-7834407389066617423), INT64_C( -820710109410615175), INT64_C(-6555247677438161503), INT64_C( 3467536596098015717)), UINT8_C( 65), simde_mm512_set_epi64(INT64_C( 7787907596649075241), INT64_C( 6183988585695692436), INT64_C( 6687836810634885572), INT64_C(-3481271883196348940), INT64_C( 2810208213167470657), INT64_C(-4686213381983447596), INT64_C( 1750544224109435853), INT64_C(-3649870730594357554)), simde_mm512_set_epi64(INT64_C( 1469967797035145460), INT64_C( 6183988585695692436), INT64_C( 1476440947581501981), INT64_C( 6259233690927012485), INT64_C(-7834407389066617423), INT64_C( -820710109410615175), INT64_C(-6555247677438161503), INT64_C( 3649870730594357554)) }, { simde_mm512_set_epi64(INT64_C(-8324574269298179729), INT64_C(-7258330021648709528), INT64_C( 5937514152424000288), INT64_C( 1850437298497726112), INT64_C(-8404329205387466185), INT64_C( 2671708839418006562), INT64_C( 2003666284095471028), INT64_C( 6129159598982782124)), UINT8_C(199), simde_mm512_set_epi64(INT64_C( 7895846628610550850), INT64_C(-2067803869119741500), INT64_C(-4037099766843598938), INT64_C(-2755209390738717188), INT64_C(-7716919860907040546), INT64_C( 651602748594909758), INT64_C(-1368765145407975583), INT64_C( -989534720059255127)), simde_mm512_set_epi64(INT64_C( 7895846628610550850), INT64_C( 2067803869119741500), INT64_C( 5937514152424000288), INT64_C( 1850437298497726112), INT64_C(-8404329205387466185), INT64_C( 651602748594909758), INT64_C( 1368765145407975583), INT64_C( 989534720059255127)) }, { simde_mm512_set_epi64(INT64_C(-4820061084596199963), INT64_C(-3116830217730655965), INT64_C( 8263602287642686524), INT64_C(-7769087937993864972), INT64_C( 688147068097687318), INT64_C( 5251800971073735884), INT64_C( 1217023964204045922), INT64_C( 2408716443164236524)), UINT8_C( 32), simde_mm512_set_epi64(INT64_C( 748944487451629095), INT64_C(-5562876490484131857), INT64_C( 4801338493970245765), INT64_C(-6148160310922917721), INT64_C(-6526618537607083398), INT64_C(-1452156915681179960), INT64_C(-6126568963486552397), INT64_C(-7197077943533128431)), simde_mm512_set_epi64(INT64_C(-4820061084596199963), INT64_C(-3116830217730655965), INT64_C( 4801338493970245765), INT64_C(-7769087937993864972), INT64_C( 688147068097687318), INT64_C( 5251800971073735884), INT64_C( 1217023964204045922), INT64_C( 2408716443164236524)) }, { simde_mm512_set_epi64(INT64_C(-3096584980416416798), INT64_C( 4918909782932989693), INT64_C(-8004938825376986361), INT64_C( 5828784114914742259), INT64_C(-2608731774237786277), INT64_C(-9056770170020559815), INT64_C(-5319811934658395214), INT64_C(-2553108879087757308)), UINT8_C(222), simde_mm512_set_epi64(INT64_C(-7429178691671160403), INT64_C(-6726241153446442388), INT64_C(-4671168844950875702), INT64_C(-6057578907613688046), INT64_C( 4404786722084486842), INT64_C(-1094983952222664046), INT64_C( 5287971478839612040), INT64_C(-1393761486045455198)), simde_mm512_set_epi64(INT64_C( 7429178691671160403), INT64_C( 6726241153446442388), INT64_C(-8004938825376986361), INT64_C( 6057578907613688046), INT64_C( 4404786722084486842), INT64_C( 1094983952222664046), INT64_C( 5287971478839612040), INT64_C(-2553108879087757308)) }, { simde_mm512_set_epi64(INT64_C(-2342519952706594553), INT64_C( 7644592964730421973), INT64_C( 9172903191652197902), INT64_C( 1458420204680989133), INT64_C(-7367844323210688147), INT64_C(-7391408909901332845), INT64_C(-1133308551202396595), INT64_C( 4102785619076298947)), UINT8_C(230), simde_mm512_set_epi64(INT64_C( 2067960823776290910), INT64_C(-7228934469556881970), INT64_C(-3653374474835545853), INT64_C( 5480624866862364429), INT64_C(-6600577412846457563), INT64_C( 2529415530022027870), INT64_C(-4329045443225919216), INT64_C( 9121454504064466670)), simde_mm512_set_epi64(INT64_C( 2067960823776290910), INT64_C( 7228934469556881970), INT64_C( 3653374474835545853), INT64_C( 1458420204680989133), INT64_C(-7367844323210688147), INT64_C( 2529415530022027870), INT64_C( 4329045443225919216), INT64_C( 4102785619076298947)) }, { simde_mm512_set_epi64(INT64_C(-4792271441403297267), INT64_C( 6523863092598044385), INT64_C( 6601631579296004837), INT64_C( 540377227778745844), INT64_C(-1608274285869950535), INT64_C( 1496684406111625906), INT64_C( 1553447803787926005), INT64_C(-8007245467626955940)), UINT8_C(104), simde_mm512_set_epi64(INT64_C( 3154046907468289764), INT64_C( 163566292437055869), INT64_C( 8677719201018760530), INT64_C( 8769975268177995652), INT64_C( 7613129594859420923), INT64_C(-1998576254813523574), INT64_C( 6270036339063321053), INT64_C( 5566883881093264567)), simde_mm512_set_epi64(INT64_C(-4792271441403297267), INT64_C( 163566292437055869), INT64_C( 8677719201018760530), INT64_C( 540377227778745844), INT64_C( 7613129594859420923), INT64_C( 1496684406111625906), INT64_C( 1553447803787926005), INT64_C(-8007245467626955940)) }, { simde_mm512_set_epi64(INT64_C( 6148806541912347944), INT64_C( 1908371497711301408), INT64_C(-9141679846684482011), INT64_C(-5393015831913260049), INT64_C(-7420100767347904040), INT64_C(-3473408352959472719), INT64_C( 3774406077177521346), INT64_C(-4000561069769581270)), UINT8_C(128), simde_mm512_set_epi64(INT64_C( 8461115545003933300), INT64_C(-7617041399687843974), INT64_C( 7167540182740956629), INT64_C( 7612124823363779747), INT64_C(-1891905030773424117), INT64_C(-5190678261404669746), INT64_C( -683318933916552175), INT64_C( 9111998746576489775)), simde_mm512_set_epi64(INT64_C( 8461115545003933300), INT64_C( 1908371497711301408), INT64_C(-9141679846684482011), INT64_C(-5393015831913260049), INT64_C(-7420100767347904040), INT64_C(-3473408352959472719), INT64_C( 3774406077177521346), INT64_C(-4000561069769581270)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_abs_epi64(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_abs_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512i a; simde__m512i r; } test_vec[8] = { { UINT8_C(192), simde_mm512_set_epi64(INT64_C( -41252748446509487), INT64_C( 7890911908509001079), INT64_C(-5961962669328745651), INT64_C(-3744340997299642547), INT64_C( 6343687558518880421), INT64_C(-4320961892205516228), INT64_C(-2965231175381652703), INT64_C( 2864943002512957373)), simde_mm512_set_epi64(INT64_C( 41252748446509487), INT64_C( 7890911908509001079), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, { UINT8_C( 10), simde_mm512_set_epi64(INT64_C(-8064009705201487071), INT64_C( 66682790377500833), INT64_C( 8959003693208071074), INT64_C(-7914560703715169936), INT64_C( 6097984971859041770), INT64_C( 7564092909171024886), INT64_C( 3840035858897969653), INT64_C( -443938296699520969)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 6097984971859041770), INT64_C( 0), INT64_C( 3840035858897969653), INT64_C( 0)) }, { UINT8_C( 41), simde_mm512_set_epi64(INT64_C(-8443142306353437631), INT64_C( 1469967797035145460), INT64_C( 5698418868819073779), INT64_C( 1476440947581501981), INT64_C( 6259233690927012485), INT64_C(-7834407389066617423), INT64_C( -820710109410615175), INT64_C(-6555247677438161503)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( 5698418868819073779), INT64_C( 0), INT64_C( 6259233690927012485), INT64_C( 0), INT64_C( 0), INT64_C( 6555247677438161503)) }, { UINT8_C(232), simde_mm512_set_epi64(INT64_C( 6129159598982782124), INT64_C( 7787907596649075241), INT64_C( 6183988585695692436), INT64_C( 6687836810634885572), INT64_C(-3481271883196348940), INT64_C( 2810208213167470657), INT64_C(-4686213381983447596), INT64_C( 1750544224109435853)), simde_mm512_set_epi64(INT64_C( 6129159598982782124), INT64_C( 7787907596649075241), INT64_C( 6183988585695692436), INT64_C( 0), INT64_C( 3481271883196348940), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, { UINT8_C(180), simde_mm512_set_epi64(INT64_C( -989534720059255127), INT64_C(-4487290813852079154), INT64_C(-4890907616466355379), INT64_C( 3824480121463916969), INT64_C(-3452489463091031317), INT64_C( 4618202413866537757), INT64_C(-6813829451133759224), INT64_C(-4843608058602032162)), simde_mm512_set_epi64(INT64_C( 989534720059255127), INT64_C( 0), INT64_C( 4890907616466355379), INT64_C( 3824480121463916969), INT64_C( 0), INT64_C( 4618202413866537757), INT64_C( 0), INT64_C( 0)) }, { UINT8_C( 97), simde_mm512_set_epi64(INT64_C(-1331633504094684457), INT64_C(-2922173236712853844), INT64_C(-4950533224594034858), INT64_C( 3742633972513462054), INT64_C(-3856884862992283736), INT64_C( 2824537604935384645), INT64_C(-4433054007697935041), INT64_C( 6677415749608352602)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 2922173236712853844), INT64_C( 4950533224594034858), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 6677415749608352602)) }, { UINT8_C(216), simde_mm512_set_epi64(INT64_C(-2921870015535851587), INT64_C( -366844282582149856), INT64_C(-4820061084596199963), INT64_C(-3116830217730655965), INT64_C( 8263602287642686524), INT64_C(-7769087937993864972), INT64_C( 688147068097687318), INT64_C( 5251800971073735884)), simde_mm512_set_epi64(INT64_C( 2921870015535851587), INT64_C( 366844282582149856), INT64_C( 0), INT64_C( 3116830217730655965), INT64_C( 8263602287642686524), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, { UINT8_C(252), simde_mm512_set_epi64(INT64_C(-5319811934658395214), INT64_C(-2553108879087757308), INT64_C( 748944487451629095), INT64_C(-5562876490484131857), INT64_C( 4801338493970245765), INT64_C(-6148160310922917721), INT64_C(-6526618537607083398), INT64_C(-1452156915681179960)), simde_mm512_set_epi64(INT64_C( 5319811934658395214), INT64_C( 2553108879087757308), INT64_C( 748944487451629095), INT64_C( 5562876490484131857), INT64_C( 4801338493970245765), INT64_C( 6148160310922917721), INT64_C( 0), INT64_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_abs_epi64(test_vec[i].k, test_vec[i].a); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_abs_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( 747.74), SIMDE_FLOAT32_C( -874.37), SIMDE_FLOAT32_C( 751.90), SIMDE_FLOAT32_C( -592.77), SIMDE_FLOAT32_C( -708.81), SIMDE_FLOAT32_C( 252.42), SIMDE_FLOAT32_C( -787.46), SIMDE_FLOAT32_C( -882.47), SIMDE_FLOAT32_C( -140.56), SIMDE_FLOAT32_C( -558.99), SIMDE_FLOAT32_C( 240.08), SIMDE_FLOAT32_C( -481.72), SIMDE_FLOAT32_C( 489.35), SIMDE_FLOAT32_C( 686.76), SIMDE_FLOAT32_C( -206.54), SIMDE_FLOAT32_C( 728.61)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 747.74), SIMDE_FLOAT32_C( 874.37), SIMDE_FLOAT32_C( 751.90), SIMDE_FLOAT32_C( 592.77), SIMDE_FLOAT32_C( 708.81), SIMDE_FLOAT32_C( 252.42), SIMDE_FLOAT32_C( 787.46), SIMDE_FLOAT32_C( 882.47), SIMDE_FLOAT32_C( 140.56), SIMDE_FLOAT32_C( 558.99), SIMDE_FLOAT32_C( 240.08), SIMDE_FLOAT32_C( 481.72), SIMDE_FLOAT32_C( 489.35), SIMDE_FLOAT32_C( 686.76), SIMDE_FLOAT32_C( 206.54), SIMDE_FLOAT32_C( 728.61)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 76.28), SIMDE_FLOAT32_C( -319.23), SIMDE_FLOAT32_C( 655.09), SIMDE_FLOAT32_C( 773.21), SIMDE_FLOAT32_C( -928.32), SIMDE_FLOAT32_C( -25.13), SIMDE_FLOAT32_C( -847.53), SIMDE_FLOAT32_C( 859.40), SIMDE_FLOAT32_C( 388.54), SIMDE_FLOAT32_C( -184.67), SIMDE_FLOAT32_C( 102.38), SIMDE_FLOAT32_C( 833.56), SIMDE_FLOAT32_C( -722.29), SIMDE_FLOAT32_C( -441.84), SIMDE_FLOAT32_C( -821.42), SIMDE_FLOAT32_C( -761.98)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 76.28), SIMDE_FLOAT32_C( 319.23), SIMDE_FLOAT32_C( 655.09), SIMDE_FLOAT32_C( 773.21), SIMDE_FLOAT32_C( 928.32), SIMDE_FLOAT32_C( 25.13), SIMDE_FLOAT32_C( 847.53), SIMDE_FLOAT32_C( 859.40), SIMDE_FLOAT32_C( 388.54), SIMDE_FLOAT32_C( 184.67), SIMDE_FLOAT32_C( 102.38), SIMDE_FLOAT32_C( 833.56), SIMDE_FLOAT32_C( 722.29), SIMDE_FLOAT32_C( 441.84), SIMDE_FLOAT32_C( 821.42), SIMDE_FLOAT32_C( 761.98)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -645.61), SIMDE_FLOAT32_C( 594.77), SIMDE_FLOAT32_C( -171.69), SIMDE_FLOAT32_C( 108.08), SIMDE_FLOAT32_C( -7.24), SIMDE_FLOAT32_C( 885.82), SIMDE_FLOAT32_C( 296.84), SIMDE_FLOAT32_C( -408.70), SIMDE_FLOAT32_C( -40.31), SIMDE_FLOAT32_C( 866.84), SIMDE_FLOAT32_C( -660.11), SIMDE_FLOAT32_C( 121.17), SIMDE_FLOAT32_C( 988.31), SIMDE_FLOAT32_C( -622.26), SIMDE_FLOAT32_C( 206.00), SIMDE_FLOAT32_C( 520.48)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 645.61), SIMDE_FLOAT32_C( 594.77), SIMDE_FLOAT32_C( 171.69), SIMDE_FLOAT32_C( 108.08), SIMDE_FLOAT32_C( 7.24), SIMDE_FLOAT32_C( 885.82), SIMDE_FLOAT32_C( 296.84), SIMDE_FLOAT32_C( 408.70), SIMDE_FLOAT32_C( 40.31), SIMDE_FLOAT32_C( 866.84), SIMDE_FLOAT32_C( 660.11), SIMDE_FLOAT32_C( 121.17), SIMDE_FLOAT32_C( 988.31), SIMDE_FLOAT32_C( 622.26), SIMDE_FLOAT32_C( 206.00), SIMDE_FLOAT32_C( 520.48)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 530.77), SIMDE_FLOAT32_C( 966.97), SIMDE_FLOAT32_C( -63.51), SIMDE_FLOAT32_C( 360.07), SIMDE_FLOAT32_C( -846.61), SIMDE_FLOAT32_C( -749.79), SIMDE_FLOAT32_C( 510.77), SIMDE_FLOAT32_C( -104.12), SIMDE_FLOAT32_C( -838.06), SIMDE_FLOAT32_C( -901.25), SIMDE_FLOAT32_C( -89.58), SIMDE_FLOAT32_C( 539.88), SIMDE_FLOAT32_C( 88.35), SIMDE_FLOAT32_C( 773.77), SIMDE_FLOAT32_C( -729.20), SIMDE_FLOAT32_C( -254.72)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 530.77), SIMDE_FLOAT32_C( 966.97), SIMDE_FLOAT32_C( 63.51), SIMDE_FLOAT32_C( 360.07), SIMDE_FLOAT32_C( 846.61), SIMDE_FLOAT32_C( 749.79), SIMDE_FLOAT32_C( 510.77), SIMDE_FLOAT32_C( 104.12), SIMDE_FLOAT32_C( 838.06), SIMDE_FLOAT32_C( 901.25), SIMDE_FLOAT32_C( 89.58), SIMDE_FLOAT32_C( 539.88), SIMDE_FLOAT32_C( 88.35), SIMDE_FLOAT32_C( 773.77), SIMDE_FLOAT32_C( 729.20), SIMDE_FLOAT32_C( 254.72)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -814.45), SIMDE_FLOAT32_C( -377.80), SIMDE_FLOAT32_C( 640.68), SIMDE_FLOAT32_C( 778.00), SIMDE_FLOAT32_C( 377.67), SIMDE_FLOAT32_C( -489.06), SIMDE_FLOAT32_C( 933.74), SIMDE_FLOAT32_C( -749.41), SIMDE_FLOAT32_C( 193.12), SIMDE_FLOAT32_C( -423.37), SIMDE_FLOAT32_C( -194.06), SIMDE_FLOAT32_C( -118.88), SIMDE_FLOAT32_C( -77.74), SIMDE_FLOAT32_C( -506.16), SIMDE_FLOAT32_C( -617.33), SIMDE_FLOAT32_C( -947.60)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 814.45), SIMDE_FLOAT32_C( 377.80), SIMDE_FLOAT32_C( 640.68), SIMDE_FLOAT32_C( 778.00), SIMDE_FLOAT32_C( 377.67), SIMDE_FLOAT32_C( 489.06), SIMDE_FLOAT32_C( 933.74), SIMDE_FLOAT32_C( 749.41), SIMDE_FLOAT32_C( 193.12), SIMDE_FLOAT32_C( 423.37), SIMDE_FLOAT32_C( 194.06), SIMDE_FLOAT32_C( 118.88), SIMDE_FLOAT32_C( 77.74), SIMDE_FLOAT32_C( 506.16), SIMDE_FLOAT32_C( 617.33), SIMDE_FLOAT32_C( 947.60)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 525.11), SIMDE_FLOAT32_C( 299.00), SIMDE_FLOAT32_C( 814.48), SIMDE_FLOAT32_C( 676.51), SIMDE_FLOAT32_C( -481.76), SIMDE_FLOAT32_C( 528.75), SIMDE_FLOAT32_C( -375.20), SIMDE_FLOAT32_C( 146.55), SIMDE_FLOAT32_C( 199.14), SIMDE_FLOAT32_C( -505.05), SIMDE_FLOAT32_C( 833.96), SIMDE_FLOAT32_C( -388.48), SIMDE_FLOAT32_C( -212.57), SIMDE_FLOAT32_C( 943.89), SIMDE_FLOAT32_C( 651.63), SIMDE_FLOAT32_C( 695.54)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 525.11), SIMDE_FLOAT32_C( 299.00), SIMDE_FLOAT32_C( 814.48), SIMDE_FLOAT32_C( 676.51), SIMDE_FLOAT32_C( 481.76), SIMDE_FLOAT32_C( 528.75), SIMDE_FLOAT32_C( 375.20), SIMDE_FLOAT32_C( 146.55), SIMDE_FLOAT32_C( 199.14), SIMDE_FLOAT32_C( 505.05), SIMDE_FLOAT32_C( 833.96), SIMDE_FLOAT32_C( 388.48), SIMDE_FLOAT32_C( 212.57), SIMDE_FLOAT32_C( 943.89), SIMDE_FLOAT32_C( 651.63), SIMDE_FLOAT32_C( 695.54)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -13.73), SIMDE_FLOAT32_C( -546.29), SIMDE_FLOAT32_C( -787.44), SIMDE_FLOAT32_C( -104.88), SIMDE_FLOAT32_C( 979.47), SIMDE_FLOAT32_C( -744.23), SIMDE_FLOAT32_C( 836.15), SIMDE_FLOAT32_C( 495.73), SIMDE_FLOAT32_C( -301.39), SIMDE_FLOAT32_C( 262.00), SIMDE_FLOAT32_C( 466.22), SIMDE_FLOAT32_C( 536.10), SIMDE_FLOAT32_C( -613.16), SIMDE_FLOAT32_C( -393.36), SIMDE_FLOAT32_C( -56.94), SIMDE_FLOAT32_C( 670.22)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 13.73), SIMDE_FLOAT32_C( 546.29), SIMDE_FLOAT32_C( 787.44), SIMDE_FLOAT32_C( 104.88), SIMDE_FLOAT32_C( 979.47), SIMDE_FLOAT32_C( 744.23), SIMDE_FLOAT32_C( 836.15), SIMDE_FLOAT32_C( 495.73), SIMDE_FLOAT32_C( 301.39), SIMDE_FLOAT32_C( 262.00), SIMDE_FLOAT32_C( 466.22), SIMDE_FLOAT32_C( 536.10), SIMDE_FLOAT32_C( 613.16), SIMDE_FLOAT32_C( 393.36), SIMDE_FLOAT32_C( 56.94), SIMDE_FLOAT32_C( 670.22)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 493.79), SIMDE_FLOAT32_C( -29.44), SIMDE_FLOAT32_C( -941.83), SIMDE_FLOAT32_C( -567.95), SIMDE_FLOAT32_C( 535.05), SIMDE_FLOAT32_C( 43.85), SIMDE_FLOAT32_C( -963.94), SIMDE_FLOAT32_C( 235.87), SIMDE_FLOAT32_C( 143.93), SIMDE_FLOAT32_C( -236.80), SIMDE_FLOAT32_C( 550.36), SIMDE_FLOAT32_C( -8.58), SIMDE_FLOAT32_C( 374.16), SIMDE_FLOAT32_C( 714.91), SIMDE_FLOAT32_C( -355.51), SIMDE_FLOAT32_C( -520.52)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 493.79), SIMDE_FLOAT32_C( 29.44), SIMDE_FLOAT32_C( 941.83), SIMDE_FLOAT32_C( 567.95), SIMDE_FLOAT32_C( 535.05), SIMDE_FLOAT32_C( 43.85), SIMDE_FLOAT32_C( 963.94), SIMDE_FLOAT32_C( 235.87), SIMDE_FLOAT32_C( 143.93), SIMDE_FLOAT32_C( 236.80), SIMDE_FLOAT32_C( 550.36), SIMDE_FLOAT32_C( 8.58), SIMDE_FLOAT32_C( 374.16), SIMDE_FLOAT32_C( 714.91), SIMDE_FLOAT32_C( 355.51), SIMDE_FLOAT32_C( 520.52)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_abs_ps(test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_abs_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 src; simde__mmask16 k; simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -319.23), SIMDE_FLOAT32_C( 773.21), SIMDE_FLOAT32_C( -25.13), SIMDE_FLOAT32_C( 859.40), SIMDE_FLOAT32_C( -184.67), SIMDE_FLOAT32_C( 833.56), SIMDE_FLOAT32_C( -441.84), SIMDE_FLOAT32_C( -761.98), SIMDE_FLOAT32_C( -874.37), SIMDE_FLOAT32_C( -592.77), SIMDE_FLOAT32_C( 252.42), SIMDE_FLOAT32_C( -882.47), SIMDE_FLOAT32_C( -558.99), SIMDE_FLOAT32_C( -481.72), SIMDE_FLOAT32_C( 686.76), SIMDE_FLOAT32_C( 728.61)), UINT16_C(15540), simde_mm512_set_ps(SIMDE_FLOAT32_C( 76.28), SIMDE_FLOAT32_C( 655.09), SIMDE_FLOAT32_C( -928.32), SIMDE_FLOAT32_C( -847.53), SIMDE_FLOAT32_C( 388.54), SIMDE_FLOAT32_C( 102.38), SIMDE_FLOAT32_C( -722.29), SIMDE_FLOAT32_C( -821.42), SIMDE_FLOAT32_C( 747.74), SIMDE_FLOAT32_C( 751.90), SIMDE_FLOAT32_C( -708.81), SIMDE_FLOAT32_C( -787.46), SIMDE_FLOAT32_C( -140.56), SIMDE_FLOAT32_C( 240.08), SIMDE_FLOAT32_C( 489.35), SIMDE_FLOAT32_C( -206.54)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -319.23), SIMDE_FLOAT32_C( 773.21), SIMDE_FLOAT32_C( 928.32), SIMDE_FLOAT32_C( 847.53), SIMDE_FLOAT32_C( 388.54), SIMDE_FLOAT32_C( 102.38), SIMDE_FLOAT32_C( -441.84), SIMDE_FLOAT32_C( -761.98), SIMDE_FLOAT32_C( 747.74), SIMDE_FLOAT32_C( -592.77), SIMDE_FLOAT32_C( 708.81), SIMDE_FLOAT32_C( 787.46), SIMDE_FLOAT32_C( -558.99), SIMDE_FLOAT32_C( 240.08), SIMDE_FLOAT32_C( 686.76), SIMDE_FLOAT32_C( 728.61)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -947.60), SIMDE_FLOAT32_C( 966.97), SIMDE_FLOAT32_C( 360.07), SIMDE_FLOAT32_C( -749.79), SIMDE_FLOAT32_C( -104.12), SIMDE_FLOAT32_C( -901.25), SIMDE_FLOAT32_C( 539.88), SIMDE_FLOAT32_C( 773.77), SIMDE_FLOAT32_C( -254.72), SIMDE_FLOAT32_C( 594.77), SIMDE_FLOAT32_C( 108.08), SIMDE_FLOAT32_C( 885.82), SIMDE_FLOAT32_C( -408.70), SIMDE_FLOAT32_C( 866.84), SIMDE_FLOAT32_C( 121.17), SIMDE_FLOAT32_C( -622.26)), UINT16_C( 6415), simde_mm512_set_ps(SIMDE_FLOAT32_C( -617.33), SIMDE_FLOAT32_C( 530.77), SIMDE_FLOAT32_C( -63.51), SIMDE_FLOAT32_C( -846.61), SIMDE_FLOAT32_C( 510.77), SIMDE_FLOAT32_C( -838.06), SIMDE_FLOAT32_C( -89.58), SIMDE_FLOAT32_C( 88.35), SIMDE_FLOAT32_C( -729.20), SIMDE_FLOAT32_C( -645.61), SIMDE_FLOAT32_C( -171.69), SIMDE_FLOAT32_C( -7.24), SIMDE_FLOAT32_C( 296.84), SIMDE_FLOAT32_C( -40.31), SIMDE_FLOAT32_C( -660.11), SIMDE_FLOAT32_C( 988.31)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -947.60), SIMDE_FLOAT32_C( 966.97), SIMDE_FLOAT32_C( 360.07), SIMDE_FLOAT32_C( 846.61), SIMDE_FLOAT32_C( 510.77), SIMDE_FLOAT32_C( -901.25), SIMDE_FLOAT32_C( 539.88), SIMDE_FLOAT32_C( 88.35), SIMDE_FLOAT32_C( -254.72), SIMDE_FLOAT32_C( 594.77), SIMDE_FLOAT32_C( 108.08), SIMDE_FLOAT32_C( 885.82), SIMDE_FLOAT32_C( 296.84), SIMDE_FLOAT32_C( 40.31), SIMDE_FLOAT32_C( 660.11), SIMDE_FLOAT32_C( 988.31)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -393.36), SIMDE_FLOAT32_C( 670.22), SIMDE_FLOAT32_C( 299.00), SIMDE_FLOAT32_C( 676.51), SIMDE_FLOAT32_C( 528.75), SIMDE_FLOAT32_C( 146.55), SIMDE_FLOAT32_C( -505.05), SIMDE_FLOAT32_C( -388.48), SIMDE_FLOAT32_C( 943.89), SIMDE_FLOAT32_C( 695.54), SIMDE_FLOAT32_C( -377.80), SIMDE_FLOAT32_C( 778.00), SIMDE_FLOAT32_C( -489.06), SIMDE_FLOAT32_C( -749.41), SIMDE_FLOAT32_C( -423.37), SIMDE_FLOAT32_C( -118.88)), UINT16_C( 1525), simde_mm512_set_ps(SIMDE_FLOAT32_C( -613.16), SIMDE_FLOAT32_C( -56.94), SIMDE_FLOAT32_C( 525.11), SIMDE_FLOAT32_C( 814.48), SIMDE_FLOAT32_C( -481.76), SIMDE_FLOAT32_C( -375.20), SIMDE_FLOAT32_C( 199.14), SIMDE_FLOAT32_C( 833.96), SIMDE_FLOAT32_C( -212.57), SIMDE_FLOAT32_C( 651.63), SIMDE_FLOAT32_C( -814.45), SIMDE_FLOAT32_C( 640.68), SIMDE_FLOAT32_C( 377.67), SIMDE_FLOAT32_C( 933.74), SIMDE_FLOAT32_C( 193.12), SIMDE_FLOAT32_C( -194.06)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -393.36), SIMDE_FLOAT32_C( 670.22), SIMDE_FLOAT32_C( 299.00), SIMDE_FLOAT32_C( 676.51), SIMDE_FLOAT32_C( 528.75), SIMDE_FLOAT32_C( 375.20), SIMDE_FLOAT32_C( -505.05), SIMDE_FLOAT32_C( 833.96), SIMDE_FLOAT32_C( 212.57), SIMDE_FLOAT32_C( 651.63), SIMDE_FLOAT32_C( 814.45), SIMDE_FLOAT32_C( 640.68), SIMDE_FLOAT32_C( -489.06), SIMDE_FLOAT32_C( 933.74), SIMDE_FLOAT32_C( -423.37), SIMDE_FLOAT32_C( 194.06)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 61.51), SIMDE_FLOAT32_C( -643.69), SIMDE_FLOAT32_C( -16.59), SIMDE_FLOAT32_C( -29.44), SIMDE_FLOAT32_C( -567.95), SIMDE_FLOAT32_C( 43.85), SIMDE_FLOAT32_C( 235.87), SIMDE_FLOAT32_C( -236.80), SIMDE_FLOAT32_C( -8.58), SIMDE_FLOAT32_C( 714.91), SIMDE_FLOAT32_C( -520.52), SIMDE_FLOAT32_C( -546.29), SIMDE_FLOAT32_C( -104.88), SIMDE_FLOAT32_C( -744.23), SIMDE_FLOAT32_C( 495.73), SIMDE_FLOAT32_C( 262.00)), UINT16_C(29879), simde_mm512_set_ps(SIMDE_FLOAT32_C( 515.30), SIMDE_FLOAT32_C( 896.28), SIMDE_FLOAT32_C( 660.35), SIMDE_FLOAT32_C( 493.79), SIMDE_FLOAT32_C( -941.83), SIMDE_FLOAT32_C( 535.05), SIMDE_FLOAT32_C( -963.94), SIMDE_FLOAT32_C( 143.93), SIMDE_FLOAT32_C( 550.36), SIMDE_FLOAT32_C( 374.16), SIMDE_FLOAT32_C( -355.51), SIMDE_FLOAT32_C( -13.73), SIMDE_FLOAT32_C( -787.44), SIMDE_FLOAT32_C( 979.47), SIMDE_FLOAT32_C( 836.15), SIMDE_FLOAT32_C( -301.39)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 61.51), SIMDE_FLOAT32_C( 896.28), SIMDE_FLOAT32_C( 660.35), SIMDE_FLOAT32_C( 493.79), SIMDE_FLOAT32_C( -567.95), SIMDE_FLOAT32_C( 535.05), SIMDE_FLOAT32_C( 235.87), SIMDE_FLOAT32_C( -236.80), SIMDE_FLOAT32_C( 550.36), SIMDE_FLOAT32_C( 714.91), SIMDE_FLOAT32_C( 355.51), SIMDE_FLOAT32_C( 13.73), SIMDE_FLOAT32_C( -104.88), SIMDE_FLOAT32_C( 979.47), SIMDE_FLOAT32_C( 836.15), SIMDE_FLOAT32_C( 301.39)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 148.99), SIMDE_FLOAT32_C( -963.65), SIMDE_FLOAT32_C( 149.45), SIMDE_FLOAT32_C( -850.34), SIMDE_FLOAT32_C( -524.37), SIMDE_FLOAT32_C( -513.69), SIMDE_FLOAT32_C( 22.08), SIMDE_FLOAT32_C( 488.53), SIMDE_FLOAT32_C( 770.65), SIMDE_FLOAT32_C( 491.66), SIMDE_FLOAT32_C( 89.59), SIMDE_FLOAT32_C( 924.64), SIMDE_FLOAT32_C( -763.40), SIMDE_FLOAT32_C( -404.62), SIMDE_FLOAT32_C( -957.75), SIMDE_FLOAT32_C( 281.78)), UINT16_C(44157), simde_mm512_set_ps(SIMDE_FLOAT32_C( -979.51), SIMDE_FLOAT32_C( -129.70), SIMDE_FLOAT32_C( -587.42), SIMDE_FLOAT32_C( 94.97), SIMDE_FLOAT32_C( -887.16), SIMDE_FLOAT32_C( -189.75), SIMDE_FLOAT32_C( 881.78), SIMDE_FLOAT32_C( -152.81), SIMDE_FLOAT32_C( 943.19), SIMDE_FLOAT32_C( -229.02), SIMDE_FLOAT32_C( -577.41), SIMDE_FLOAT32_C( -719.96), SIMDE_FLOAT32_C( 770.58), SIMDE_FLOAT32_C( -153.52), SIMDE_FLOAT32_C( -991.64), SIMDE_FLOAT32_C( -53.48)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 979.51), SIMDE_FLOAT32_C( -963.65), SIMDE_FLOAT32_C( 587.42), SIMDE_FLOAT32_C( -850.34), SIMDE_FLOAT32_C( 887.16), SIMDE_FLOAT32_C( 189.75), SIMDE_FLOAT32_C( 22.08), SIMDE_FLOAT32_C( 488.53), SIMDE_FLOAT32_C( 770.65), SIMDE_FLOAT32_C( 229.02), SIMDE_FLOAT32_C( 577.41), SIMDE_FLOAT32_C( 719.96), SIMDE_FLOAT32_C( 770.58), SIMDE_FLOAT32_C( 153.52), SIMDE_FLOAT32_C( -957.75), SIMDE_FLOAT32_C( 53.48)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 631.94), SIMDE_FLOAT32_C( -409.79), SIMDE_FLOAT32_C( 668.07), SIMDE_FLOAT32_C( 542.88), SIMDE_FLOAT32_C( -896.06), SIMDE_FLOAT32_C( 248.80), SIMDE_FLOAT32_C( 200.01), SIMDE_FLOAT32_C( 669.33), SIMDE_FLOAT32_C( -642.07), SIMDE_FLOAT32_C( -212.55), SIMDE_FLOAT32_C( -356.51), SIMDE_FLOAT32_C( -440.95), SIMDE_FLOAT32_C( -982.52), SIMDE_FLOAT32_C( -842.67), SIMDE_FLOAT32_C( -420.59), SIMDE_FLOAT32_C( -949.02)), UINT16_C(15240), simde_mm512_set_ps(SIMDE_FLOAT32_C( 966.19), SIMDE_FLOAT32_C( 529.24), SIMDE_FLOAT32_C( -544.06), SIMDE_FLOAT32_C( -881.83), SIMDE_FLOAT32_C( -242.38), SIMDE_FLOAT32_C( -380.44), SIMDE_FLOAT32_C( -752.70), SIMDE_FLOAT32_C( -160.45), SIMDE_FLOAT32_C( 773.41), SIMDE_FLOAT32_C( -474.98), SIMDE_FLOAT32_C( 573.78), SIMDE_FLOAT32_C( -190.69), SIMDE_FLOAT32_C( -743.99), SIMDE_FLOAT32_C( -698.61), SIMDE_FLOAT32_C( -633.81), SIMDE_FLOAT32_C( 938.50)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 631.94), SIMDE_FLOAT32_C( -409.79), SIMDE_FLOAT32_C( 544.06), SIMDE_FLOAT32_C( 881.83), SIMDE_FLOAT32_C( 242.38), SIMDE_FLOAT32_C( 248.80), SIMDE_FLOAT32_C( 752.70), SIMDE_FLOAT32_C( 160.45), SIMDE_FLOAT32_C( 773.41), SIMDE_FLOAT32_C( -212.55), SIMDE_FLOAT32_C( -356.51), SIMDE_FLOAT32_C( -440.95), SIMDE_FLOAT32_C( 743.99), SIMDE_FLOAT32_C( -842.67), SIMDE_FLOAT32_C( -420.59), SIMDE_FLOAT32_C( -949.02)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 156.92), SIMDE_FLOAT32_C( -736.34), SIMDE_FLOAT32_C( 166.92), SIMDE_FLOAT32_C( 300.41), SIMDE_FLOAT32_C( -295.98), SIMDE_FLOAT32_C( -702.22), SIMDE_FLOAT32_C( -740.49), SIMDE_FLOAT32_C( -80.99), SIMDE_FLOAT32_C( -785.06), SIMDE_FLOAT32_C( 87.65), SIMDE_FLOAT32_C( -482.52), SIMDE_FLOAT32_C( -681.02), SIMDE_FLOAT32_C( 764.25), SIMDE_FLOAT32_C( 305.46), SIMDE_FLOAT32_C( 526.44), SIMDE_FLOAT32_C( 369.20)), UINT16_C(49024), simde_mm512_set_ps(SIMDE_FLOAT32_C( 809.64), SIMDE_FLOAT32_C( -790.72), SIMDE_FLOAT32_C( 295.53), SIMDE_FLOAT32_C( -856.33), SIMDE_FLOAT32_C( 237.04), SIMDE_FLOAT32_C( -607.75), SIMDE_FLOAT32_C( -732.96), SIMDE_FLOAT32_C( -497.56), SIMDE_FLOAT32_C( -918.03), SIMDE_FLOAT32_C( 488.66), SIMDE_FLOAT32_C( -523.80), SIMDE_FLOAT32_C( -224.58), SIMDE_FLOAT32_C( 298.04), SIMDE_FLOAT32_C( 606.61), SIMDE_FLOAT32_C( -852.36), SIMDE_FLOAT32_C( -314.42)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 809.64), SIMDE_FLOAT32_C( -736.34), SIMDE_FLOAT32_C( 295.53), SIMDE_FLOAT32_C( 856.33), SIMDE_FLOAT32_C( 237.04), SIMDE_FLOAT32_C( 607.75), SIMDE_FLOAT32_C( 732.96), SIMDE_FLOAT32_C( 497.56), SIMDE_FLOAT32_C( 918.03), SIMDE_FLOAT32_C( 87.65), SIMDE_FLOAT32_C( -482.52), SIMDE_FLOAT32_C( -681.02), SIMDE_FLOAT32_C( 764.25), SIMDE_FLOAT32_C( 305.46), SIMDE_FLOAT32_C( 526.44), SIMDE_FLOAT32_C( 369.20)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -897.44), SIMDE_FLOAT32_C( 113.95), SIMDE_FLOAT32_C( 654.01), SIMDE_FLOAT32_C( 620.23), SIMDE_FLOAT32_C( 623.09), SIMDE_FLOAT32_C( -407.46), SIMDE_FLOAT32_C( -763.16), SIMDE_FLOAT32_C( -768.89), SIMDE_FLOAT32_C( 966.30), SIMDE_FLOAT32_C( 863.50), SIMDE_FLOAT32_C( 709.25), SIMDE_FLOAT32_C( 348.50), SIMDE_FLOAT32_C( -816.66), SIMDE_FLOAT32_C( -662.92), SIMDE_FLOAT32_C( 913.50), SIMDE_FLOAT32_C( 301.72)), UINT16_C(64661), simde_mm512_set_ps(SIMDE_FLOAT32_C( -54.30), SIMDE_FLOAT32_C( -771.33), SIMDE_FLOAT32_C( -34.80), SIMDE_FLOAT32_C( -55.97), SIMDE_FLOAT32_C( -654.29), SIMDE_FLOAT32_C( 768.64), SIMDE_FLOAT32_C( -409.48), SIMDE_FLOAT32_C( 859.32), SIMDE_FLOAT32_C( -160.39), SIMDE_FLOAT32_C( -988.34), SIMDE_FLOAT32_C( -518.87), SIMDE_FLOAT32_C( -778.28), SIMDE_FLOAT32_C( 357.12), SIMDE_FLOAT32_C( 449.29), SIMDE_FLOAT32_C( -46.50), SIMDE_FLOAT32_C( 93.99)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 54.30), SIMDE_FLOAT32_C( 771.33), SIMDE_FLOAT32_C( 34.80), SIMDE_FLOAT32_C( 55.97), SIMDE_FLOAT32_C( 654.29), SIMDE_FLOAT32_C( 768.64), SIMDE_FLOAT32_C( -763.16), SIMDE_FLOAT32_C( -768.89), SIMDE_FLOAT32_C( 160.39), SIMDE_FLOAT32_C( 863.50), SIMDE_FLOAT32_C( 709.25), SIMDE_FLOAT32_C( 778.28), SIMDE_FLOAT32_C( -816.66), SIMDE_FLOAT32_C( 449.29), SIMDE_FLOAT32_C( 913.50), SIMDE_FLOAT32_C( 93.99)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mask_abs_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_abs_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( -140.56), SIMDE_FLOAT64_C( -558.99), SIMDE_FLOAT64_C( 240.08), SIMDE_FLOAT64_C( -481.72), SIMDE_FLOAT64_C( 489.35), SIMDE_FLOAT64_C( 686.76), SIMDE_FLOAT64_C( -206.54), SIMDE_FLOAT64_C( 728.61)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 140.56), SIMDE_FLOAT64_C( 558.99), SIMDE_FLOAT64_C( 240.08), SIMDE_FLOAT64_C( 481.72), SIMDE_FLOAT64_C( 489.35), SIMDE_FLOAT64_C( 686.76), SIMDE_FLOAT64_C( 206.54), SIMDE_FLOAT64_C( 728.61)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 747.74), SIMDE_FLOAT64_C( -874.37), SIMDE_FLOAT64_C( 751.90), SIMDE_FLOAT64_C( -592.77), SIMDE_FLOAT64_C( -708.81), SIMDE_FLOAT64_C( 252.42), SIMDE_FLOAT64_C( -787.46), SIMDE_FLOAT64_C( -882.47)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 747.74), SIMDE_FLOAT64_C( 874.37), SIMDE_FLOAT64_C( 751.90), SIMDE_FLOAT64_C( 592.77), SIMDE_FLOAT64_C( 708.81), SIMDE_FLOAT64_C( 252.42), SIMDE_FLOAT64_C( 787.46), SIMDE_FLOAT64_C( 882.47)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 388.54), SIMDE_FLOAT64_C( -184.67), SIMDE_FLOAT64_C( 102.38), SIMDE_FLOAT64_C( 833.56), SIMDE_FLOAT64_C( -722.29), SIMDE_FLOAT64_C( -441.84), SIMDE_FLOAT64_C( -821.42), SIMDE_FLOAT64_C( -761.98)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 388.54), SIMDE_FLOAT64_C( 184.67), SIMDE_FLOAT64_C( 102.38), SIMDE_FLOAT64_C( 833.56), SIMDE_FLOAT64_C( 722.29), SIMDE_FLOAT64_C( 441.84), SIMDE_FLOAT64_C( 821.42), SIMDE_FLOAT64_C( 761.98)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 76.28), SIMDE_FLOAT64_C( -319.23), SIMDE_FLOAT64_C( 655.09), SIMDE_FLOAT64_C( 773.21), SIMDE_FLOAT64_C( -928.32), SIMDE_FLOAT64_C( -25.13), SIMDE_FLOAT64_C( -847.53), SIMDE_FLOAT64_C( 859.40)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 76.28), SIMDE_FLOAT64_C( 319.23), SIMDE_FLOAT64_C( 655.09), SIMDE_FLOAT64_C( 773.21), SIMDE_FLOAT64_C( 928.32), SIMDE_FLOAT64_C( 25.13), SIMDE_FLOAT64_C( 847.53), SIMDE_FLOAT64_C( 859.40)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -40.31), SIMDE_FLOAT64_C( 866.84), SIMDE_FLOAT64_C( -660.11), SIMDE_FLOAT64_C( 121.17), SIMDE_FLOAT64_C( 988.31), SIMDE_FLOAT64_C( -622.26), SIMDE_FLOAT64_C( 206.00), SIMDE_FLOAT64_C( 520.48)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 40.31), SIMDE_FLOAT64_C( 866.84), SIMDE_FLOAT64_C( 660.11), SIMDE_FLOAT64_C( 121.17), SIMDE_FLOAT64_C( 988.31), SIMDE_FLOAT64_C( 622.26), SIMDE_FLOAT64_C( 206.00), SIMDE_FLOAT64_C( 520.48)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -645.61), SIMDE_FLOAT64_C( 594.77), SIMDE_FLOAT64_C( -171.69), SIMDE_FLOAT64_C( 108.08), SIMDE_FLOAT64_C( -7.24), SIMDE_FLOAT64_C( 885.82), SIMDE_FLOAT64_C( 296.84), SIMDE_FLOAT64_C( -408.70)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 645.61), SIMDE_FLOAT64_C( 594.77), SIMDE_FLOAT64_C( 171.69), SIMDE_FLOAT64_C( 108.08), SIMDE_FLOAT64_C( 7.24), SIMDE_FLOAT64_C( 885.82), SIMDE_FLOAT64_C( 296.84), SIMDE_FLOAT64_C( 408.70)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -838.06), SIMDE_FLOAT64_C( -901.25), SIMDE_FLOAT64_C( -89.58), SIMDE_FLOAT64_C( 539.88), SIMDE_FLOAT64_C( 88.35), SIMDE_FLOAT64_C( 773.77), SIMDE_FLOAT64_C( -729.20), SIMDE_FLOAT64_C( -254.72)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 838.06), SIMDE_FLOAT64_C( 901.25), SIMDE_FLOAT64_C( 89.58), SIMDE_FLOAT64_C( 539.88), SIMDE_FLOAT64_C( 88.35), SIMDE_FLOAT64_C( 773.77), SIMDE_FLOAT64_C( 729.20), SIMDE_FLOAT64_C( 254.72)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 530.77), SIMDE_FLOAT64_C( 966.97), SIMDE_FLOAT64_C( -63.51), SIMDE_FLOAT64_C( 360.07), SIMDE_FLOAT64_C( -846.61), SIMDE_FLOAT64_C( -749.79), SIMDE_FLOAT64_C( 510.77), SIMDE_FLOAT64_C( -104.12)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 530.77), SIMDE_FLOAT64_C( 966.97), SIMDE_FLOAT64_C( 63.51), SIMDE_FLOAT64_C( 360.07), SIMDE_FLOAT64_C( 846.61), SIMDE_FLOAT64_C( 749.79), SIMDE_FLOAT64_C( 510.77), SIMDE_FLOAT64_C( 104.12)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_abs_pd(test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_abs_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d src; simde__mmask8 k; simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( -874.37), SIMDE_FLOAT64_C( -592.77), SIMDE_FLOAT64_C( 252.42), SIMDE_FLOAT64_C( -882.47), SIMDE_FLOAT64_C( -558.99), SIMDE_FLOAT64_C( -481.72), SIMDE_FLOAT64_C( 686.76), SIMDE_FLOAT64_C( 728.61)), UINT8_C( 67), simde_mm512_set_pd(SIMDE_FLOAT64_C( 747.74), SIMDE_FLOAT64_C( 751.90), SIMDE_FLOAT64_C( -708.81), SIMDE_FLOAT64_C( -787.46), SIMDE_FLOAT64_C( -140.56), SIMDE_FLOAT64_C( 240.08), SIMDE_FLOAT64_C( 489.35), SIMDE_FLOAT64_C( -206.54)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -874.37), SIMDE_FLOAT64_C( 751.90), SIMDE_FLOAT64_C( 252.42), SIMDE_FLOAT64_C( -882.47), SIMDE_FLOAT64_C( -558.99), SIMDE_FLOAT64_C( -481.72), SIMDE_FLOAT64_C( 489.35), SIMDE_FLOAT64_C( 206.54)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 76.28), SIMDE_FLOAT64_C( 655.09), SIMDE_FLOAT64_C( -928.32), SIMDE_FLOAT64_C( -847.53), SIMDE_FLOAT64_C( 388.54), SIMDE_FLOAT64_C( 102.38), SIMDE_FLOAT64_C( -722.29), SIMDE_FLOAT64_C( -821.42)), UINT8_C(153), simde_mm512_set_pd(SIMDE_FLOAT64_C( 520.48), SIMDE_FLOAT64_C( -319.23), SIMDE_FLOAT64_C( 773.21), SIMDE_FLOAT64_C( -25.13), SIMDE_FLOAT64_C( 859.40), SIMDE_FLOAT64_C( -184.67), SIMDE_FLOAT64_C( 833.56), SIMDE_FLOAT64_C( -441.84)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 520.48), SIMDE_FLOAT64_C( 655.09), SIMDE_FLOAT64_C( -928.32), SIMDE_FLOAT64_C( 25.13), SIMDE_FLOAT64_C( 859.40), SIMDE_FLOAT64_C( 102.38), SIMDE_FLOAT64_C( -722.29), SIMDE_FLOAT64_C( 441.84)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -254.72), SIMDE_FLOAT64_C( 594.77), SIMDE_FLOAT64_C( 108.08), SIMDE_FLOAT64_C( 885.82), SIMDE_FLOAT64_C( -408.70), SIMDE_FLOAT64_C( 866.84), SIMDE_FLOAT64_C( 121.17), SIMDE_FLOAT64_C( -622.26)), UINT8_C( 41), simde_mm512_set_pd(SIMDE_FLOAT64_C( -729.20), SIMDE_FLOAT64_C( -645.61), SIMDE_FLOAT64_C( -171.69), SIMDE_FLOAT64_C( -7.24), SIMDE_FLOAT64_C( 296.84), SIMDE_FLOAT64_C( -40.31), SIMDE_FLOAT64_C( -660.11), SIMDE_FLOAT64_C( 988.31)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -254.72), SIMDE_FLOAT64_C( 594.77), SIMDE_FLOAT64_C( 171.69), SIMDE_FLOAT64_C( 885.82), SIMDE_FLOAT64_C( 296.84), SIMDE_FLOAT64_C( 866.84), SIMDE_FLOAT64_C( 121.17), SIMDE_FLOAT64_C( 988.31)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -617.33), SIMDE_FLOAT64_C( 530.77), SIMDE_FLOAT64_C( -63.51), SIMDE_FLOAT64_C( -846.61), SIMDE_FLOAT64_C( 510.77), SIMDE_FLOAT64_C( -838.06), SIMDE_FLOAT64_C( -89.58), SIMDE_FLOAT64_C( 88.35)), UINT8_C(208), simde_mm512_set_pd(SIMDE_FLOAT64_C( -506.16), SIMDE_FLOAT64_C( -947.60), SIMDE_FLOAT64_C( 966.97), SIMDE_FLOAT64_C( 360.07), SIMDE_FLOAT64_C( -749.79), SIMDE_FLOAT64_C( -104.12), SIMDE_FLOAT64_C( -901.25), SIMDE_FLOAT64_C( 539.88)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 506.16), SIMDE_FLOAT64_C( 947.60), SIMDE_FLOAT64_C( -63.51), SIMDE_FLOAT64_C( 360.07), SIMDE_FLOAT64_C( 510.77), SIMDE_FLOAT64_C( -838.06), SIMDE_FLOAT64_C( -89.58), SIMDE_FLOAT64_C( 88.35)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 943.89), SIMDE_FLOAT64_C( 695.54), SIMDE_FLOAT64_C( -377.80), SIMDE_FLOAT64_C( 778.00), SIMDE_FLOAT64_C( -489.06), SIMDE_FLOAT64_C( -749.41), SIMDE_FLOAT64_C( -423.37), SIMDE_FLOAT64_C( -118.88)), UINT8_C( 52), simde_mm512_set_pd(SIMDE_FLOAT64_C( -212.57), SIMDE_FLOAT64_C( 651.63), SIMDE_FLOAT64_C( -814.45), SIMDE_FLOAT64_C( 640.68), SIMDE_FLOAT64_C( 377.67), SIMDE_FLOAT64_C( 933.74), SIMDE_FLOAT64_C( 193.12), SIMDE_FLOAT64_C( -194.06)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 943.89), SIMDE_FLOAT64_C( 695.54), SIMDE_FLOAT64_C( 814.45), SIMDE_FLOAT64_C( 640.68), SIMDE_FLOAT64_C( -489.06), SIMDE_FLOAT64_C( 933.74), SIMDE_FLOAT64_C( -423.37), SIMDE_FLOAT64_C( -118.88)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -613.16), SIMDE_FLOAT64_C( -56.94), SIMDE_FLOAT64_C( 525.11), SIMDE_FLOAT64_C( 814.48), SIMDE_FLOAT64_C( -481.76), SIMDE_FLOAT64_C( -375.20), SIMDE_FLOAT64_C( 199.14), SIMDE_FLOAT64_C( 833.96)), UINT8_C(108), simde_mm512_set_pd(SIMDE_FLOAT64_C( 536.10), SIMDE_FLOAT64_C( -393.36), SIMDE_FLOAT64_C( 670.22), SIMDE_FLOAT64_C( 299.00), SIMDE_FLOAT64_C( 676.51), SIMDE_FLOAT64_C( 528.75), SIMDE_FLOAT64_C( 146.55), SIMDE_FLOAT64_C( -505.05)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -613.16), SIMDE_FLOAT64_C( 393.36), SIMDE_FLOAT64_C( 670.22), SIMDE_FLOAT64_C( 814.48), SIMDE_FLOAT64_C( 676.51), SIMDE_FLOAT64_C( 528.75), SIMDE_FLOAT64_C( 199.14), SIMDE_FLOAT64_C( 833.96)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -8.58), SIMDE_FLOAT64_C( 714.91), SIMDE_FLOAT64_C( -520.52), SIMDE_FLOAT64_C( -546.29), SIMDE_FLOAT64_C( -104.88), SIMDE_FLOAT64_C( -744.23), SIMDE_FLOAT64_C( 495.73), SIMDE_FLOAT64_C( 262.00)), UINT8_C(147), simde_mm512_set_pd(SIMDE_FLOAT64_C( 550.36), SIMDE_FLOAT64_C( 374.16), SIMDE_FLOAT64_C( -355.51), SIMDE_FLOAT64_C( -13.73), SIMDE_FLOAT64_C( -787.44), SIMDE_FLOAT64_C( 979.47), SIMDE_FLOAT64_C( 836.15), SIMDE_FLOAT64_C( -301.39)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 550.36), SIMDE_FLOAT64_C( 714.91), SIMDE_FLOAT64_C( -520.52), SIMDE_FLOAT64_C( 13.73), SIMDE_FLOAT64_C( -104.88), SIMDE_FLOAT64_C( -744.23), SIMDE_FLOAT64_C( 836.15), SIMDE_FLOAT64_C( 301.39)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 515.30), SIMDE_FLOAT64_C( 896.28), SIMDE_FLOAT64_C( 660.35), SIMDE_FLOAT64_C( 493.79), SIMDE_FLOAT64_C( -941.83), SIMDE_FLOAT64_C( 535.05), SIMDE_FLOAT64_C( -963.94), SIMDE_FLOAT64_C( 143.93)), UINT8_C( 75), simde_mm512_set_pd(SIMDE_FLOAT64_C( 520.16), SIMDE_FLOAT64_C( 61.51), SIMDE_FLOAT64_C( -643.69), SIMDE_FLOAT64_C( -16.59), SIMDE_FLOAT64_C( -29.44), SIMDE_FLOAT64_C( -567.95), SIMDE_FLOAT64_C( 43.85), SIMDE_FLOAT64_C( 235.87)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 515.30), SIMDE_FLOAT64_C( 61.51), SIMDE_FLOAT64_C( 660.35), SIMDE_FLOAT64_C( 493.79), SIMDE_FLOAT64_C( 29.44), SIMDE_FLOAT64_C( 535.05), SIMDE_FLOAT64_C( 43.85), SIMDE_FLOAT64_C( 235.87)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mask_abs_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_abs_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_abs_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_abs_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_abs_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_abs_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_abs_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_abs_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_abs_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_abs_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_abs_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_abs_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_abs_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_abs_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_abs_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_abs_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_abs_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_abs_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_abs_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_abs_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_abs_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_abs_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_abs_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_abs_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_abs_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_abs_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_abs_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_abs_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_abs_pd) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/add.c000066400000000000000000012537751400333146700161740ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN add #include #include #include static int test_simde_mm_mask_add_epi8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int8_t src[16]; const simde__mmask16 k; const int8_t a[16]; const int8_t b[16]; const int8_t r[16]; } test_vec[] = { { { -INT8_C( 27), -INT8_C( 117), -INT8_C( 6), INT8_C( 88), -INT8_C( 46), INT8_C( 97), -INT8_C( 33), INT8_C( 41), -INT8_C( 39), -INT8_C( 6), -INT8_C( 94), INT8_C( 57), -INT8_C( 33), INT8_C( 58), INT8_C( 124), -INT8_C( 31) }, UINT16_C( 5189), { INT8_C( 116), INT8_C( 54), -INT8_C( 13), INT8_C( 7), -INT8_C( 49), -INT8_C( 33), -INT8_C( 119), INT8_C( 78), INT8_C( 14), -INT8_C( 94), INT8_C( 14), -INT8_C( 68), -INT8_C( 88), INT8_C( 20), INT8_C( 70), -INT8_C( 115) }, { INT8_C( 9), -INT8_C( 104), -INT8_C( 11), -INT8_C( 116), -INT8_C( 58), -INT8_C( 28), INT8_C( 17), -INT8_C( 115), -INT8_C( 25), -INT8_C( 123), -INT8_C( 27), INT8_C( 40), INT8_C( 24), -INT8_C( 71), -INT8_C( 96), -INT8_C( 45) }, { INT8_C( 125), -INT8_C( 117), -INT8_C( 24), INT8_C( 88), -INT8_C( 46), INT8_C( 97), -INT8_C( 102), INT8_C( 41), -INT8_C( 39), -INT8_C( 6), -INT8_C( 13), INT8_C( 57), -INT8_C( 64), INT8_C( 58), INT8_C( 124), -INT8_C( 31) } }, { { -INT8_C( 98), -INT8_C( 92), -INT8_C( 38), -INT8_C( 68), -INT8_C( 41), INT8_C( 122), -INT8_C( 22), -INT8_C( 38), INT8_C( 27), -INT8_C( 33), INT8_C( 109), -INT8_C( 84), -INT8_C( 65), -INT8_C( 97), -INT8_C( 87), -INT8_C( 26) }, UINT16_C(48315), { INT8_C( 33), -INT8_C( 21), INT8_C( 102), INT8_C( 107), -INT8_C( 24), INT8_C( 118), -INT8_C( 19), -INT8_C( 100), INT8_C( 41), INT8_C( 78), -INT8_C( 81), INT8_C( 11), INT8_C( 38), -INT8_C( 101), INT8_C( 39), -INT8_C( 103) }, { INT8_C( 68), -INT8_C( 83), INT8_MIN, INT8_C( 118), -INT8_C( 117), INT8_C( 112), -INT8_C( 32), -INT8_C( 114), INT8_C( 16), -INT8_C( 45), INT8_C( 93), INT8_C( 38), -INT8_C( 71), -INT8_C( 60), INT8_C( 99), -INT8_C( 64) }, { INT8_C( 101), -INT8_C( 104), -INT8_C( 38), -INT8_C( 31), INT8_C( 115), -INT8_C( 26), -INT8_C( 22), INT8_C( 42), INT8_C( 27), -INT8_C( 33), INT8_C( 12), INT8_C( 49), -INT8_C( 33), INT8_C( 95), -INT8_C( 87), INT8_C( 89) } }, { { -INT8_C( 32), -INT8_C( 57), INT8_C( 78), -INT8_C( 38), -INT8_C( 31), -INT8_C( 49), -INT8_C( 2), -INT8_C( 91), INT8_C( 122), -INT8_C( 124), INT8_C( 43), INT8_C( 111), INT8_C( 13), -INT8_C( 53), -INT8_C( 99), -INT8_C( 7) }, UINT16_C(55519), { -INT8_C( 112), INT8_C( 59), INT8_C( 27), -INT8_C( 75), -INT8_C( 30), INT8_C( 46), INT8_C( 88), -INT8_C( 37), INT8_C( 68), -INT8_C( 86), -INT8_C( 84), -INT8_C( 81), INT8_C( 73), -INT8_C( 81), INT8_C( 111), INT8_C( 71) }, { INT8_C( 75), INT8_C( 105), INT8_C( 40), -INT8_C( 50), -INT8_C( 66), INT8_C( 107), -INT8_C( 47), INT8_C( 108), -INT8_C( 44), -INT8_C( 70), -INT8_C( 126), INT8_C( 70), INT8_C( 82), -INT8_C( 48), -INT8_C( 16), -INT8_C( 11) }, { -INT8_C( 37), -INT8_C( 92), INT8_C( 67), -INT8_C( 125), -INT8_C( 96), -INT8_C( 49), INT8_C( 41), INT8_C( 71), INT8_C( 122), -INT8_C( 124), INT8_C( 43), -INT8_C( 11), -INT8_C( 101), -INT8_C( 53), INT8_C( 95), INT8_C( 60) } }, { { -INT8_C( 10), INT8_C( 74), INT8_C( 22), INT8_MAX, -INT8_C( 86), -INT8_C( 38), -INT8_C( 55), -INT8_C( 34), INT8_C( 37), -INT8_C( 93), INT8_C( 113), INT8_C( 90), -INT8_C( 22), INT8_C( 95), -INT8_C( 30), INT8_C( 36) }, UINT16_C(49528), { -INT8_C( 86), -INT8_C( 18), INT8_C( 8), -INT8_C( 80), INT8_C( 64), -INT8_C( 123), -INT8_C( 65), INT8_C( 70), INT8_C( 76), -INT8_C( 38), INT8_C( 79), INT8_C( 7), -INT8_C( 112), -INT8_C( 51), INT8_C( 59), INT8_C( 55) }, { -INT8_C( 115), -INT8_C( 53), -INT8_C( 116), -INT8_C( 104), INT8_C( 80), INT8_C( 21), INT8_C( 61), INT8_C( 7), -INT8_C( 99), -INT8_C( 92), -INT8_C( 35), INT8_C( 32), -INT8_C( 2), INT8_C( 80), INT8_C( 120), -INT8_C( 12) }, { -INT8_C( 10), INT8_C( 74), INT8_C( 22), INT8_C( 72), -INT8_C( 112), -INT8_C( 102), -INT8_C( 4), -INT8_C( 34), -INT8_C( 23), -INT8_C( 93), INT8_C( 113), INT8_C( 90), -INT8_C( 22), INT8_C( 95), -INT8_C( 77), INT8_C( 43) } }, { { INT8_C( 4), INT8_MIN, -INT8_C( 71), -INT8_C( 17), -INT8_C( 48), -INT8_C( 5), INT8_MAX, INT8_C( 111), INT8_C( 107), -INT8_C( 122), -INT8_C( 126), -INT8_C( 76), -INT8_C( 121), -INT8_C( 65), -INT8_C( 115), -INT8_C( 46) }, UINT16_C(28165), { INT8_C( 27), -INT8_C( 48), -INT8_C( 86), INT8_C( 54), -INT8_C( 32), INT8_C( 59), INT8_C( 122), -INT8_C( 12), -INT8_C( 6), INT8_C( 28), INT8_C( 89), INT8_C( 69), INT8_C( 57), -INT8_C( 19), -INT8_C( 99), -INT8_C( 37) }, { INT8_C( 97), -INT8_C( 39), INT8_C( 120), INT8_C( 103), -INT8_C( 90), INT8_C( 20), INT8_C( 101), INT8_C( 30), INT8_C( 57), INT8_C( 57), -INT8_C( 13), INT8_C( 10), INT8_C( 41), -INT8_C( 13), -INT8_C( 53), INT8_C( 21) }, { INT8_C( 124), INT8_MIN, INT8_C( 34), -INT8_C( 17), -INT8_C( 48), -INT8_C( 5), INT8_MAX, INT8_C( 111), INT8_C( 107), INT8_C( 85), INT8_C( 76), INT8_C( 79), -INT8_C( 121), -INT8_C( 32), INT8_C( 104), -INT8_C( 46) } }, { { -INT8_C( 125), INT8_C( 109), INT8_C( 50), INT8_C( 49), INT8_C( 76), INT8_C( 38), INT8_C( 43), INT8_C( 37), -INT8_C( 125), -INT8_C( 52), INT8_C( 115), INT8_C( 100), INT8_C( 45), INT8_C( 74), -INT8_C( 5), -INT8_C( 110) }, UINT16_C(47574), { -INT8_C( 76), INT8_C( 26), -INT8_C( 112), -INT8_C( 90), -INT8_C( 73), -INT8_C( 123), INT8_C( 15), -INT8_C( 19), -INT8_C( 57), -INT8_C( 58), -INT8_C( 79), INT8_C( 7), -INT8_C( 26), INT8_C( 47), -INT8_C( 11), INT8_MAX }, { -INT8_C( 44), INT8_C( 76), -INT8_C( 8), INT8_C( 91), INT8_C( 64), INT8_C( 42), -INT8_C( 48), INT8_C( 22), -INT8_C( 10), INT8_C( 4), INT8_C( 93), -INT8_C( 57), -INT8_C( 42), -INT8_C( 31), INT8_C( 109), INT8_C( 88) }, { -INT8_C( 125), INT8_C( 102), -INT8_C( 120), INT8_C( 49), -INT8_C( 9), INT8_C( 38), -INT8_C( 33), INT8_C( 3), -INT8_C( 67), -INT8_C( 52), INT8_C( 115), -INT8_C( 50), -INT8_C( 68), INT8_C( 16), -INT8_C( 5), -INT8_C( 41) } }, { { INT8_C( 20), INT8_C( 50), INT8_C( 9), INT8_C( 94), -INT8_C( 69), INT8_C( 31), INT8_C( 50), INT8_C( 65), -INT8_C( 79), INT8_C( 123), -INT8_C( 96), INT8_C( 34), -INT8_C( 70), -INT8_C( 37), -INT8_C( 3), INT8_C( 22) }, UINT16_C(58436), { INT8_C( 126), -INT8_C( 47), -INT8_C( 126), -INT8_C( 42), INT8_C( 30), INT8_C( 111), -INT8_C( 34), INT8_C( 96), -INT8_C( 65), INT8_C( 94), -INT8_C( 39), -INT8_C( 47), -INT8_C( 73), INT8_C( 120), INT8_C( 10), INT8_C( 18) }, { INT8_C( 72), INT8_C( 25), INT8_C( 71), INT8_C( 82), INT8_C( 115), -INT8_C( 50), INT8_C( 16), INT8_C( 21), -INT8_C( 37), -INT8_C( 92), INT8_C( 58), INT8_C( 57), INT8_C( 41), -INT8_C( 77), -INT8_C( 116), INT8_C( 92) }, { INT8_C( 20), INT8_C( 50), -INT8_C( 55), INT8_C( 94), -INT8_C( 69), INT8_C( 31), -INT8_C( 18), INT8_C( 65), -INT8_C( 79), INT8_C( 123), INT8_C( 19), INT8_C( 34), -INT8_C( 70), INT8_C( 43), -INT8_C( 106), INT8_C( 110) } }, { { INT8_C( 29), -INT8_C( 38), -INT8_C( 54), -INT8_C( 111), -INT8_C( 23), -INT8_C( 86), -INT8_C( 8), INT8_C( 21), INT8_C( 98), INT8_C( 54), INT8_C( 102), -INT8_C( 84), INT8_C( 14), INT8_C( 23), INT8_C( 88), INT8_C( 42) }, UINT16_C(45295), { INT8_C( 4), INT8_C( 61), INT8_C( 20), -INT8_C( 59), INT8_C( 94), INT8_C( 39), -INT8_C( 42), INT8_C( 68), -INT8_C( 116), INT8_C( 24), -INT8_C( 43), -INT8_C( 18), INT8_C( 50), INT8_C( 74), -INT8_C( 67), INT8_C( 123) }, { -INT8_C( 6), INT8_C( 9), INT8_C( 16), -INT8_C( 80), -INT8_C( 95), INT8_C( 115), -INT8_C( 62), INT8_C( 63), -INT8_C( 62), -INT8_C( 105), INT8_C( 107), -INT8_C( 107), -INT8_C( 91), -INT8_C( 56), -INT8_C( 62), -INT8_C( 15) }, { -INT8_C( 2), INT8_C( 70), INT8_C( 36), INT8_C( 117), -INT8_C( 23), -INT8_C( 102), -INT8_C( 104), -INT8_C( 125), INT8_C( 98), INT8_C( 54), INT8_C( 102), -INT8_C( 84), -INT8_C( 41), INT8_C( 18), INT8_C( 88), INT8_C( 108) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i src = simde_x_mm_loadu_epi8(test_vec[i].src); simde__m128i a = simde_x_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_mask_add_epi8(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i8x16(r, simde_x_mm_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm_maskz_add_epi8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask16 k; const int8_t a[16]; const int8_t b[16]; const int8_t r[16]; } test_vec[] = { { UINT16_C(19804), { INT8_C( 27), INT8_C( 33), -INT8_C( 11), INT8_C( 13), -INT8_C( 52), INT8_C( 94), INT8_C( 66), INT8_C( 86), INT8_C( 52), INT8_C( 23), INT8_C( 93), -INT8_C( 33), INT8_C( 78), -INT8_C( 48), -INT8_C( 30), INT8_C( 75) }, { -INT8_C( 37), -INT8_C( 52), -INT8_C( 84), INT8_C( 110), -INT8_C( 15), -INT8_C( 31), -INT8_C( 23), -INT8_C( 34), -INT8_C( 103), -INT8_C( 38), INT8_C( 65), INT8_C( 33), -INT8_C( 16), -INT8_C( 52), INT8_C( 35), INT8_C( 109) }, { INT8_C( 0), INT8_C( 0), -INT8_C( 95), INT8_C( 123), -INT8_C( 67), INT8_C( 0), INT8_C( 43), INT8_C( 0), -INT8_C( 51), INT8_C( 0), -INT8_C( 98), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 5), INT8_C( 0) } }, { UINT16_C(50023), { -INT8_C( 40), INT8_C( 67), -INT8_C( 16), INT8_C( 53), INT8_C( 73), -INT8_C( 64), -INT8_C( 41), INT8_C( 124), INT8_C( 97), -INT8_C( 126), INT8_C( 77), -INT8_C( 78), -INT8_C( 92), -INT8_C( 116), INT8_C( 16), -INT8_C( 126) }, { INT8_C( 76), -INT8_C( 111), INT8_MIN, -INT8_C( 99), -INT8_C( 91), INT8_C( 102), INT8_C( 51), -INT8_C( 30), INT8_C( 45), -INT8_C( 47), INT8_C( 25), INT8_C( 16), INT8_C( 104), -INT8_C( 95), -INT8_C( 111), INT8_C( 93) }, { INT8_C( 36), -INT8_C( 44), INT8_C( 112), INT8_C( 0), INT8_C( 0), INT8_C( 38), INT8_C( 10), INT8_C( 0), -INT8_C( 114), INT8_C( 83), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 95), -INT8_C( 33) } }, { UINT16_C( 6986), { INT8_C( 119), -INT8_C( 75), -INT8_C( 115), INT8_C( 108), -INT8_C( 41), -INT8_C( 83), INT8_C( 24), INT8_C( 118), INT8_C( 117), INT8_C( 98), -INT8_C( 80), INT8_C( 105), -INT8_C( 62), -INT8_C( 104), -INT8_C( 75), INT8_C( 22) }, { INT8_MAX, INT8_C( 109), -INT8_C( 49), INT8_C( 103), -INT8_C( 97), -INT8_C( 46), -INT8_C( 64), INT8_C( 44), -INT8_C( 126), -INT8_C( 107), -INT8_C( 14), INT8_C( 2), -INT8_C( 58), INT8_C( 69), -INT8_C( 19), -INT8_C( 91) }, { INT8_C( 0), INT8_C( 34), INT8_C( 0), -INT8_C( 45), INT8_C( 0), INT8_C( 0), -INT8_C( 40), INT8_C( 0), -INT8_C( 9), -INT8_C( 9), INT8_C( 0), INT8_C( 107), -INT8_C( 120), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { UINT16_C(36448), { -INT8_C( 7), INT8_C( 119), INT8_C( 107), INT8_C( 82), INT8_C( 101), INT8_C( 83), -INT8_C( 83), INT8_C( 116), INT8_C( 14), INT8_C( 72), INT8_C( 114), INT8_C( 3), -INT8_C( 28), -INT8_C( 7), INT8_C( 124), -INT8_C( 77) }, { -INT8_C( 10), -INT8_C( 106), INT8_C( 24), INT8_C( 124), -INT8_C( 82), -INT8_C( 56), -INT8_C( 50), INT8_C( 105), INT8_C( 61), -INT8_C( 116), INT8_C( 93), INT8_C( 9), INT8_C( 50), INT8_C( 96), INT8_C( 92), INT8_C( 3) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 27), INT8_C( 123), INT8_C( 0), INT8_C( 0), -INT8_C( 44), -INT8_C( 49), INT8_C( 12), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 74) } }, { UINT16_C(37475), { INT8_C( 88), INT8_C( 7), -INT8_C( 30), INT8_C( 23), INT8_C( 11), -INT8_C( 12), INT8_C( 22), -INT8_C( 112), INT8_C( 7), INT8_C( 103), -INT8_C( 62), INT8_C( 79), INT8_C( 13), -INT8_C( 6), INT8_C( 3), INT8_C( 110) }, { -INT8_C( 109), INT8_C( 83), -INT8_C( 125), -INT8_C( 26), -INT8_C( 64), INT8_C( 118), -INT8_C( 108), INT8_C( 120), INT8_C( 65), INT8_C( 56), INT8_C( 108), -INT8_C( 19), -INT8_C( 92), -INT8_C( 43), INT8_C( 59), -INT8_C( 82) }, { -INT8_C( 21), INT8_C( 90), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 106), -INT8_C( 86), INT8_C( 0), INT8_C( 0), -INT8_C( 97), INT8_C( 0), INT8_C( 0), -INT8_C( 79), INT8_C( 0), INT8_C( 0), INT8_C( 28) } }, { UINT16_C(60868), { -INT8_C( 85), INT8_C( 98), -INT8_C( 119), INT8_C( 28), -INT8_C( 52), INT8_C( 36), -INT8_C( 38), INT8_C( 97), -INT8_C( 23), INT8_C( 33), INT8_C( 101), INT8_C( 30), -INT8_C( 56), INT8_C( 67), -INT8_C( 69), -INT8_C( 85) }, { -INT8_C( 92), INT8_C( 56), INT8_C( 8), INT8_C( 111), INT8_C( 68), -INT8_C( 19), -INT8_C( 66), INT8_C( 42), INT8_C( 1), INT8_C( 32), INT8_C( 112), INT8_C( 10), INT8_C( 11), INT8_C( 114), INT8_C( 126), -INT8_C( 127) }, { INT8_C( 0), INT8_C( 0), -INT8_C( 111), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 104), -INT8_C( 117), -INT8_C( 22), INT8_C( 0), -INT8_C( 43), INT8_C( 40), INT8_C( 0), -INT8_C( 75), INT8_C( 57), INT8_C( 44) } }, { UINT16_C(25877), { INT8_C( 40), -INT8_C( 65), INT8_C( 46), -INT8_C( 42), -INT8_C( 23), INT8_C( 101), INT8_C( 37), INT8_C( 87), INT8_C( 109), -INT8_C( 69), INT8_C( 80), -INT8_C( 44), -INT8_C( 53), INT8_C( 53), INT8_C( 52), INT8_C( 114) }, { -INT8_C( 100), INT8_C( 3), -INT8_C( 25), -INT8_C( 36), -INT8_C( 90), -INT8_C( 81), -INT8_C( 115), -INT8_C( 101), INT8_C( 18), INT8_C( 125), -INT8_C( 12), INT8_C( 115), -INT8_C( 22), -INT8_C( 9), INT8_C( 117), INT8_C( 27) }, { -INT8_C( 60), INT8_C( 0), INT8_C( 21), INT8_C( 0), -INT8_C( 113), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_MAX, INT8_C( 0), INT8_C( 68), INT8_C( 0), INT8_C( 0), INT8_C( 44), -INT8_C( 87), INT8_C( 0) } }, { UINT16_C( 8536), { -INT8_C( 44), -INT8_C( 95), -INT8_C( 28), -INT8_C( 36), -INT8_C( 103), -INT8_C( 7), INT8_C( 72), INT8_C( 25), -INT8_C( 117), -INT8_C( 114), -INT8_C( 95), INT8_C( 17), INT8_C( 7), INT8_C( 1), -INT8_C( 44), -INT8_C( 70) }, { -INT8_C( 68), -INT8_C( 8), INT8_C( 12), INT8_C( 65), INT8_C( 102), INT8_C( 62), -INT8_C( 99), INT8_C( 118), INT8_C( 8), -INT8_C( 26), INT8_C( 94), INT8_C( 54), INT8_C( 0), -INT8_C( 68), INT8_MAX, -INT8_C( 126) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 29), -INT8_C( 1), INT8_C( 0), -INT8_C( 27), INT8_C( 0), -INT8_C( 109), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 67), INT8_C( 0), INT8_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_maskz_add_epi8(test_vec[i].k, a, b); simde_test_x86_assert_equal_i8x16(r, simde_x_mm_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm_mask_add_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t src[8]; const simde__mmask8 k; const int16_t a[8]; const int16_t b[8]; const int16_t r[8]; } test_vec[] = { { { INT16_C( 17817), -INT16_C( 2243), -INT16_C( 28883), -INT16_C( 7874), INT16_C( 338), -INT16_C( 12748), -INT16_C( 9412), INT16_C( 228) }, UINT8_C( 66), { -INT16_C( 13625), -INT16_C( 511), -INT16_C( 9176), -INT16_C( 32360), -INT16_C( 18674), -INT16_C( 29571), -INT16_C( 32208), INT16_C( 4749) }, { -INT16_C( 26006), -INT16_C( 12496), INT16_C( 20326), -INT16_C( 234), -INT16_C( 21928), -INT16_C( 3464), -INT16_C( 18163), -INT16_C( 1715) }, { INT16_C( 17817), -INT16_C( 13007), -INT16_C( 28883), -INT16_C( 7874), INT16_C( 338), -INT16_C( 12748), INT16_C( 15165), INT16_C( 228) } }, { { -INT16_C( 31056), -INT16_C( 22914), INT16_C( 26105), -INT16_C( 10738), INT16_C( 397), INT16_C( 25565), -INT16_C( 22215), -INT16_C( 22166) }, UINT8_C( 95), { -INT16_C( 29933), -INT16_C( 7224), INT16_C( 20985), INT16_C( 6509), -INT16_C( 25426), -INT16_C( 14506), -INT16_C( 26474), -INT16_C( 16323) }, { -INT16_C( 17906), INT16_C( 4756), -INT16_C( 20982), INT16_C( 9519), -INT16_C( 15356), -INT16_C( 11051), INT16_C( 26863), -INT16_C( 25274) }, { INT16_C( 17697), -INT16_C( 2468), INT16_C( 3), INT16_C( 16028), INT16_C( 24754), INT16_C( 25565), INT16_C( 389), -INT16_C( 22166) } }, { { -INT16_C( 4880), INT16_C( 8735), INT16_C( 8130), INT16_C( 26938), -INT16_C( 8020), -INT16_C( 31347), INT16_C( 8055), INT16_C( 9966) }, UINT8_C(190), { INT16_C( 1808), INT16_C( 20344), INT16_C( 2923), -INT16_C( 23307), -INT16_C( 27990), -INT16_C( 8377), -INT16_C( 32088), -INT16_C( 20615) }, { INT16_C( 25028), -INT16_C( 30164), INT16_C( 13304), INT16_C( 19564), -INT16_C( 24947), INT16_C( 1894), -INT16_C( 6448), -INT16_C( 24485) }, { -INT16_C( 4880), -INT16_C( 9820), INT16_C( 16227), -INT16_C( 3743), INT16_C( 12599), -INT16_C( 6483), INT16_C( 8055), INT16_C( 20436) } }, { { INT16_C( 13221), -INT16_C( 19991), INT16_C( 21448), INT16_C( 9585), INT16_C( 18335), INT16_C( 18834), INT16_C( 31996), -INT16_C( 1809) }, UINT8_C(131), { INT16_C( 1798), INT16_C( 26660), -INT16_C( 21916), -INT16_C( 14369), INT16_C( 18610), INT16_C( 19226), -INT16_C( 32140), INT16_C( 6089) }, { -INT16_C( 10658), INT16_C( 26610), INT16_C( 5435), INT16_C( 22284), INT16_C( 1294), INT16_C( 25864), INT16_C( 31360), INT16_C( 29206) }, { -INT16_C( 8860), -INT16_C( 12266), INT16_C( 21448), INT16_C( 9585), INT16_C( 18335), INT16_C( 18834), INT16_C( 31996), -INT16_C( 30241) } }, { { INT16_C( 3819), INT16_C( 7886), -INT16_C( 2308), -INT16_C( 23812), -INT16_C( 53), INT16_C( 7661), -INT16_C( 6086), -INT16_C( 20132) }, UINT8_C(126), { INT16_C( 1025), -INT16_C( 1747), INT16_C( 13483), INT16_C( 15025), INT16_C( 17328), -INT16_C( 14874), INT16_C( 6565), INT16_C( 14071) }, { -INT16_C( 12931), -INT16_C( 25527), -INT16_C( 11161), INT16_C( 18125), -INT16_C( 1194), -INT16_C( 29538), -INT16_C( 3219), -INT16_C( 14390) }, { INT16_C( 3819), -INT16_C( 27274), INT16_C( 2322), -INT16_C( 32386), INT16_C( 16134), INT16_C( 21124), INT16_C( 3346), -INT16_C( 20132) } }, { { INT16_C( 22971), -INT16_C( 32214), -INT16_C( 27903), INT16_C( 14508), INT16_C( 16600), -INT16_C( 22707), -INT16_C( 5461), -INT16_C( 11019) }, UINT8_C( 35), { INT16_C( 16262), -INT16_C( 24435), INT16_C( 14751), INT16_C( 27015), -INT16_C( 24225), -INT16_C( 541), -INT16_C( 9613), INT16_C( 4216) }, { -INT16_C( 1057), INT16_C( 24640), INT16_C( 27675), INT16_C( 25767), INT16_C( 8263), INT16_C( 28018), -INT16_C( 30242), INT16_C( 6202) }, { INT16_C( 15205), INT16_C( 205), -INT16_C( 27903), INT16_C( 14508), INT16_C( 16600), INT16_C( 27477), -INT16_C( 5461), -INT16_C( 11019) } }, { { INT16_C( 9473), -INT16_C( 12490), INT16_C( 26373), -INT16_C( 31879), -INT16_C( 12795), INT16_C( 27285), INT16_C( 20597), -INT16_C( 2226) }, UINT8_C( 62), { -INT16_C( 1230), -INT16_C( 11593), INT16_C( 18697), -INT16_C( 8764), -INT16_C( 7130), INT16_C( 25580), -INT16_C( 4090), -INT16_C( 25505) }, { INT16_C( 19431), -INT16_C( 14954), -INT16_C( 27621), INT16_C( 15919), -INT16_C( 16076), -INT16_C( 32362), -INT16_C( 25322), INT16_C( 25848) }, { INT16_C( 9473), -INT16_C( 26547), -INT16_C( 8924), INT16_C( 7155), -INT16_C( 23206), -INT16_C( 6782), INT16_C( 20597), -INT16_C( 2226) } }, { { -INT16_C( 16798), -INT16_C( 16061), INT16_C( 28632), INT16_C( 12716), INT16_C( 10145), INT16_C( 23704), INT16_C( 13844), -INT16_C( 1453) }, UINT8_C( 10), { INT16_C( 4456), INT16_C( 1455), -INT16_C( 11882), -INT16_C( 21010), -INT16_C( 14517), INT16_C( 9453), INT16_C( 19422), INT16_C( 22787) }, { INT16_C( 9815), -INT16_C( 4177), -INT16_C( 17752), INT16_C( 8643), -INT16_C( 14132), -INT16_C( 28981), INT16_C( 3734), -INT16_C( 2203) }, { -INT16_C( 16798), -INT16_C( 2722), INT16_C( 28632), -INT16_C( 12367), INT16_C( 10145), INT16_C( 23704), INT16_C( 13844), -INT16_C( 1453) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i src = simde_x_mm_loadu_epi16(test_vec[i].src); simde__m128i a = simde_x_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_mask_add_epi16(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x8(r, simde_x_mm_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm_maskz_add_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const int16_t a[8]; const int16_t b[8]; const int16_t r[8]; } test_vec[] = { { UINT8_C(134), { INT16_C( 12554), -INT16_C( 17057), -INT16_C( 28311), -INT16_C( 28428), -INT16_C( 15318), -INT16_C( 10100), -INT16_C( 18852), -INT16_C( 16229) }, { -INT16_C( 31990), INT16_C( 29908), INT16_C( 15111), -INT16_C( 4862), INT16_C( 23252), -INT16_C( 12060), -INT16_C( 25385), -INT16_C( 12982) }, { INT16_C( 0), INT16_C( 12851), -INT16_C( 13200), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 29211) } }, { UINT8_C(157), { INT16_C( 1408), INT16_C( 17385), -INT16_C( 25707), -INT16_C( 3037), INT16_C( 2190), -INT16_C( 16188), INT16_C( 9347), -INT16_C( 7492) }, { -INT16_C( 31565), INT16_C( 13950), -INT16_C( 23827), INT16_C( 2179), INT16_C( 5735), INT16_C( 2100), -INT16_C( 26139), -INT16_C( 8740) }, { -INT16_C( 30157), INT16_C( 0), INT16_C( 16002), -INT16_C( 858), INT16_C( 7925), INT16_C( 0), INT16_C( 0), -INT16_C( 16232) } }, { UINT8_C( 24), { -INT16_C( 18498), -INT16_C( 16851), -INT16_C( 8942), INT16_C( 29587), -INT16_C( 30777), -INT16_C( 18973), -INT16_C( 17141), INT16_C( 22434) }, { -INT16_C( 7638), INT16_C( 677), -INT16_C( 15883), INT16_C( 16066), INT16_C( 29995), -INT16_C( 27669), INT16_C( 7858), -INT16_C( 16674) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 19883), -INT16_C( 782), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { UINT8_C(157), { -INT16_C( 30517), -INT16_C( 8546), INT16_C( 22395), INT16_C( 25788), -INT16_C( 9860), INT16_C( 13172), INT16_C( 13481), INT16_C( 7362) }, { INT16_C( 20929), -INT16_C( 87), -INT16_C( 17569), -INT16_C( 31496), INT16_C( 15928), -INT16_C( 7297), INT16_C( 24667), INT16_C( 21390) }, { -INT16_C( 9588), INT16_C( 0), INT16_C( 4826), -INT16_C( 5708), INT16_C( 6068), INT16_C( 0), INT16_C( 0), INT16_C( 28752) } }, { UINT8_C(204), { -INT16_C( 7807), -INT16_C( 18593), INT16_C( 5789), INT16_C( 10188), -INT16_C( 320), -INT16_C( 16671), -INT16_C( 20439), -INT16_C( 18566) }, { -INT16_C( 19073), -INT16_C( 12901), INT16_C( 26791), INT16_C( 16857), -INT16_C( 21045), -INT16_C( 9971), INT16_C( 2843), INT16_C( 25436) }, { INT16_C( 0), INT16_C( 0), INT16_C( 32580), INT16_C( 27045), INT16_C( 0), INT16_C( 0), -INT16_C( 17596), INT16_C( 6870) } }, { UINT8_C(237), { -INT16_C( 16461), -INT16_C( 6930), INT16_C( 16169), -INT16_C( 32323), INT16_C( 2552), INT16_C( 1679), -INT16_C( 26974), INT16_C( 14036) }, { INT16_C( 2376), -INT16_C( 18722), INT16_C( 3134), INT16_C( 10788), INT16_C( 9045), INT16_C( 14703), INT16_C( 12911), INT16_C( 27034) }, { -INT16_C( 14085), INT16_C( 0), INT16_C( 19303), -INT16_C( 21535), INT16_C( 0), INT16_C( 16382), -INT16_C( 14063), -INT16_C( 24466) } }, { UINT8_C( 42), { INT16_C( 26325), INT16_C( 6988), INT16_C( 23545), -INT16_C( 22647), INT16_C( 31625), -INT16_C( 3414), INT16_C( 10045), -INT16_C( 17949) }, { INT16_C( 1997), -INT16_C( 13645), -INT16_C( 8378), INT16_C( 19517), INT16_C( 12495), -INT16_C( 25023), INT16_C( 24234), -INT16_C( 25731) }, { INT16_C( 0), -INT16_C( 6657), INT16_C( 0), -INT16_C( 3130), INT16_C( 0), -INT16_C( 28437), INT16_C( 0), INT16_C( 0) } }, { UINT8_C(211), { INT16_C( 12889), INT16_C( 28312), INT16_C( 22010), INT16_C( 4663), -INT16_C( 15800), -INT16_C( 32484), -INT16_C( 11389), INT16_C( 13519) }, { INT16_C( 15116), -INT16_C( 28247), INT16_C( 679), INT16_C( 25752), -INT16_C( 15591), INT16_C( 22603), -INT16_C( 32111), -INT16_C( 14859) }, { INT16_C( 28005), INT16_C( 65), INT16_C( 0), INT16_C( 0), -INT16_C( 31391), INT16_C( 0), INT16_C( 22036), -INT16_C( 1340) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_maskz_add_epi16(test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x8(r, simde_x_mm_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm_mask_add_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t src[4]; const simde__mmask8 k; const int32_t a[4]; const int32_t b[4]; const int32_t r[4]; } test_vec[] = { { { INT32_C( 1958227227), INT32_C( 1842333829), -INT32_C( 2080453323), -INT32_C( 249177196) }, UINT8_C(155), { INT32_C( 553106004), -INT32_C( 1372587746), -INT32_C( 1712075801), INT32_C( 993452127) }, { INT32_C( 777515607), -INT32_C( 712684129), INT32_C( 223497663), -INT32_C( 489447286) }, { INT32_C( 1330621611), -INT32_C( 2085271875), -INT32_C( 2080453323), INT32_C( 504004841) } }, { { INT32_C( 697251714), -INT32_C( 951749739), INT32_C( 836834350), -INT32_C( 346279314) }, UINT8_C(111), { -INT32_C( 885142141), -INT32_C( 194598295), -INT32_C( 1261452693), INT32_C( 375114831) }, { INT32_C( 1479180836), -INT32_C( 1604991616), -INT32_C( 1780092987), INT32_C( 634429248) }, { INT32_C( 594038695), -INT32_C( 1799589911), INT32_C( 1253421616), INT32_C( 1009544079) } }, { { INT32_C( 1249061656), INT32_C( 406248213), INT32_C( 1546568796), INT32_C( 345790387) }, UINT8_C(173), { INT32_C( 1116067984), -INT32_C( 306617666), INT32_C( 1471337118), INT32_C( 1742516687) }, { INT32_C( 1421198449), INT32_C( 1861843318), -INT32_C( 2139880994), INT32_C( 418138440) }, { -INT32_C( 1757700863), INT32_C( 406248213), -INT32_C( 668543876), -INT32_C( 2134312169) } }, { { -INT32_C( 455875920), -INT32_C( 1805739296), -INT32_C( 834127167), -INT32_C( 193385963) }, UINT8_C(243), { INT32_C( 397937177), INT32_C( 447724867), -INT32_C( 1604479719), -INT32_C( 1556316088) }, { INT32_C( 764118341), INT32_C( 33168795), -INT32_C( 931609255), INT32_C( 2093992876) }, { INT32_C( 1162055518), INT32_C( 480893662), -INT32_C( 834127167), -INT32_C( 193385963) } }, { { INT32_C( 227843937), -INT32_C( 1816959923), INT32_C( 110120824), -INT32_C( 1826017770) }, UINT8_C(242), { INT32_C( 598721326), -INT32_C( 1962044123), -INT32_C( 1919813583), -INT32_C( 1281349718) }, { -INT32_C( 1464369420), INT32_C( 1889351967), -INT32_C( 174840084), -INT32_C( 1849263339) }, { INT32_C( 227843937), -INT32_C( 72692156), INT32_C( 110120824), -INT32_C( 1826017770) } }, { { INT32_C( 585486166), -INT32_C( 1881648464), -INT32_C( 1741597697), INT32_C( 1501172127) }, UINT8_C(177), { -INT32_C( 2072152845), INT32_C( 1678312837), INT32_C( 175231240), INT32_C( 639313595) }, { INT32_C( 1844718395), INT32_C( 1747844119), -INT32_C( 1642309052), -INT32_C( 1463847021) }, { -INT32_C( 227434450), -INT32_C( 1881648464), -INT32_C( 1741597697), INT32_C( 1501172127) } }, { { INT32_C( 1282734968), -INT32_C( 1805890056), -INT32_C( 170454139), INT32_C( 939566096) }, UINT8_C( 1), { INT32_C( 373441333), INT32_C( 1967739279), INT32_C( 363886263), -INT32_C( 1478106109) }, { -INT32_C( 1988739640), INT32_C( 299055662), INT32_C( 830616967), INT32_C( 503576578) }, { -INT32_C( 1615298307), -INT32_C( 1805890056), -INT32_C( 170454139), INT32_C( 939566096) } }, { { INT32_C( 1100203671), -INT32_C( 234656697), -INT32_C( 2035991414), INT32_C( 1938166869) }, UINT8_C( 44), { -INT32_C( 77918946), -INT32_C( 927432354), -INT32_C( 2008458249), INT32_C( 1379220591) }, { -INT32_C( 261431271), -INT32_C( 1794574077), INT32_C( 1874265007), INT32_C( 695196668) }, { INT32_C( 1100203671), -INT32_C( 234656697), -INT32_C( 134193242), INT32_C( 2074417259) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i src = simde_x_mm_loadu_epi32(test_vec[i].src); simde__m128i a = simde_x_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_mask_add_epi32(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm_maskz_add_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const int32_t a[4]; const int32_t b[4]; const int32_t r[4]; } test_vec[] = { { UINT8_C( 26), { INT32_C( 1100397714), INT32_C( 1720147146), INT32_C( 782659498), INT32_C( 164614002) }, { INT32_C( 1362429759), -INT32_C( 135924898), INT32_C( 1277782591), INT32_C( 1455600660) }, { INT32_C( 0), INT32_C( 1584222248), INT32_C( 0), INT32_C( 1620214662) } }, { UINT8_C(104), { -INT32_C( 511491329), INT32_C( 2145361873), -INT32_C( 681927889), -INT32_C( 1760116045) }, { -INT32_C( 1818488330), -INT32_C( 1227468567), -INT32_C( 914908373), -INT32_C( 761443622) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1773407629) } }, { UINT8_C( 38), { -INT32_C( 1756543294), -INT32_C( 1924529315), INT32_C( 753287628), INT32_C( 821606796) }, { INT32_C( 1995981071), INT32_C( 1941095443), -INT32_C( 997134313), INT32_C( 1414962996) }, { INT32_C( 0), INT32_C( 16566128), -INT32_C( 243846685), INT32_C( 0) } }, { UINT8_C( 11), { INT32_C( 1936642854), INT32_C( 1476527496), -INT32_C( 1916837668), INT32_C( 1565927957) }, { INT32_C( 819891483), INT32_C( 24098982), INT32_C( 1427042923), INT32_C( 967231402) }, { -INT32_C( 1538432959), INT32_C( 1500626478), INT32_C( 0), -INT32_C( 1761807937) } }, { UINT8_C(136), { INT32_C( 2025931821), -INT32_C( 1602308853), -INT32_C( 1584066603), INT32_C( 2144498786) }, { -INT32_C( 32030623), -INT32_C( 522392968), INT32_C( 2136840774), INT32_C( 707776301) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 1442692209) } }, { UINT8_C( 16), { INT32_C( 615979771), INT32_C( 91372444), -INT32_C( 1715689431), INT32_C( 1732560282) }, { INT32_C( 1582515072), -INT32_C( 566478811), -INT32_C( 1915644371), -INT32_C( 936530095) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { UINT8_C(152), { -INT32_C( 1815019322), INT32_C( 2115824139), -INT32_C( 541094950), -INT32_C( 308634405) }, { -INT32_C( 923734690), INT32_C( 926178071), INT32_C( 1920079652), -INT32_C( 1332173880) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 1640808285) } }, { UINT8_C( 27), { INT32_C( 1575440202), INT32_C( 1941889206), INT32_C( 41623433), -INT32_C( 28396641) }, { INT32_C( 7451246), -INT32_C( 1374405146), INT32_C( 1597383244), -INT32_C( 1478059980) }, { INT32_C( 1582891448), INT32_C( 567484060), INT32_C( 0), -INT32_C( 1506456621) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_maskz_add_epi32(test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm_mask_add_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t src[2]; const simde__mmask8 k; const int64_t a[2]; const int64_t b[2]; const int64_t r[2]; } test_vec[] = { { { INT64_C( 8201918233897001447), INT64_C( 363962166077692400) }, UINT8_C(116), { -INT64_C( 2929372668825588728), -INT64_C( 3656510824753645306) }, { INT64_C( 2121463196383157106), -INT64_C( 2712448646012699723) }, { INT64_C( 8201918233897001447), INT64_C( 363962166077692400) } }, { { -INT64_C( 7660056023431247569), -INT64_C( 8800573589908441802) }, UINT8_C( 26), { -INT64_C( 5155283151088348406), -INT64_C( 6777794566593036429) }, { INT64_C( 8217194832092526134), -INT64_C( 6988787196078811595) }, { -INT64_C( 7660056023431247569), INT64_C( 4680162311037703592) } }, { { -INT64_C( 5177610656632927949), -INT64_C( 7835688890176823491) }, UINT8_C( 38), { INT64_C( 8694329990162064366), -INT64_C( 7553465337537219517) }, { INT64_C( 5861342603890327684), INT64_C( 8969888432361565647) }, { -INT64_C( 5177610656632927949), INT64_C( 1416423094824346130) } }, { { INT64_C( 1563207507706856527), -INT64_C( 371485882345723171) }, UINT8_C(246), { -INT64_C( 8155582183645986764), -INT64_C( 8042754456252808652) }, { -INT64_C( 996858152082936078), INT64_C( 999238294551887019) }, { INT64_C( 1563207507706856527), -INT64_C( 7043516161700921633) } }, { { INT64_C( 2269651972621910057), INT64_C( 8122205111827084555) }, UINT8_C(109), { INT64_C( 5898016879431101179), INT64_C( 8196109586946188276) }, { -INT64_C( 3927996688380496977), -INT64_C( 8700540345223695011) }, { INT64_C( 1970020191050604202), INT64_C( 8122205111827084555) } }, { { INT64_C( 8953512102049709771), INT64_C( 4073568780934150804) }, UINT8_C( 55), { -INT64_C( 8698567697690688449), INT64_C( 2011128588034496860) }, { INT64_C( 4050759086289972052), -INT64_C( 4209687100771601707) }, { -INT64_C( 4647808611400716397), -INT64_C( 2198558512737104847) } }, { { -INT64_C( 2003310644913083277), INT64_C( 3614518035412058723) }, UINT8_C(247), { INT64_C( 1202266463040515144), INT64_C( 2203879785493297747) }, { -INT64_C( 6903979043742968285), INT64_C( 286701945599558971) }, { -INT64_C( 5701712580702453141), INT64_C( 2490581731092856718) } }, { { INT64_C( 9179970759417586743), -INT64_C( 419587667919506800) }, UINT8_C( 88), { -INT64_C( 5871315755711329534), -INT64_C( 2058360122490679194) }, { INT64_C( 1804735659384354964), INT64_C( 3266572330366650128) }, { INT64_C( 9179970759417586743), -INT64_C( 419587667919506800) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i src = simde_x_mm_loadu_epi64(test_vec[i].src); simde__m128i a = simde_x_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_mask_add_epi64(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i64x2(r, simde_x_mm_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm_maskz_add_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const int64_t a[2]; const int64_t b[2]; const int64_t r[2]; } test_vec[] = { { UINT8_C(150), { -INT64_C( 993872781162934343), INT64_C( 9134756112190127784) }, { -INT64_C( 2891573308549781280), -INT64_C( 9143568260559188007) }, { INT64_C( 0), -INT64_C( 8812148369060223) } }, { UINT8_C(196), { INT64_C( 6550556420904536135), INT64_C( 8490360445651406694) }, { -INT64_C( 1739704569367854626), INT64_C( 4245131661435093091) }, { INT64_C( 0), INT64_C( 0) } }, { UINT8_C( 52), { -INT64_C( 2760535867334763843), INT64_C( 7115030050339329677) }, { INT64_C( 7188684116250616331), -INT64_C( 2471133335336396754) }, { INT64_C( 0), INT64_C( 0) } }, { UINT8_C( 5), { INT64_C( 3814106451400715087), INT64_C( 6178810664702908734) }, { INT64_C( 1820139726703884269), -INT64_C( 2876452185216044984) }, { INT64_C( 5634246178104599356), INT64_C( 0) } }, { UINT8_C( 77), { -INT64_C( 7349641843913850521), INT64_C( 3200744105211371253) }, { INT64_C( 228621185812703474), INT64_C( 6391631982896984822) }, { -INT64_C( 7121020658101147047), INT64_C( 0) } }, { UINT8_C(254), { INT64_C( 5885620942751936373), INT64_C( 3334511588433406542) }, { -INT64_C( 254487071634799123), INT64_C( 3127732574282601076) }, { INT64_C( 0), INT64_C( 6462244162716007618) } }, { UINT8_C(174), { -INT64_C( 2725183470148505474), INT64_C( 8524564968923083055) }, { INT64_C( 3417677596325229905), -INT64_C( 1448789787674024211) }, { INT64_C( 0), INT64_C( 7075775181249058844) } }, { UINT8_C( 60), { INT64_C( 5475088381666832271), INT64_C( 2253690732183149705) }, { INT64_C( 9160715340915633356), -INT64_C( 3092667301170657521) }, { INT64_C( 0), INT64_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_maskz_add_epi64(test_vec[i].k, a, b); simde_test_x86_assert_equal_i64x2(r, simde_x_mm_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm_mask_add_ss (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde_float32 src[4]; const simde__mmask8 k; const simde_float32 a[4]; const simde_float32 b[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -362.02), SIMDE_FLOAT32_C( -753.03), SIMDE_FLOAT32_C( 336.60), SIMDE_FLOAT32_C( 433.83) }, UINT8_C(125), { SIMDE_FLOAT32_C( 392.81), SIMDE_FLOAT32_C( -464.44), SIMDE_FLOAT32_C( -422.67), SIMDE_FLOAT32_C( -899.86) }, { SIMDE_FLOAT32_C( -403.90), SIMDE_FLOAT32_C( 536.42), SIMDE_FLOAT32_C( -805.91), SIMDE_FLOAT32_C( 663.43) }, { SIMDE_FLOAT32_C( -11.09), SIMDE_FLOAT32_C( -464.44), SIMDE_FLOAT32_C( -422.67), SIMDE_FLOAT32_C( -899.86) } }, { { SIMDE_FLOAT32_C( -478.45), SIMDE_FLOAT32_C( -735.39), SIMDE_FLOAT32_C( -353.05), SIMDE_FLOAT32_C( 917.77) }, UINT8_C( 83), { SIMDE_FLOAT32_C( -443.57), SIMDE_FLOAT32_C( 820.62), SIMDE_FLOAT32_C( 677.29), SIMDE_FLOAT32_C( 81.36) }, { SIMDE_FLOAT32_C( 246.07), SIMDE_FLOAT32_C( 669.97), SIMDE_FLOAT32_C( 862.97), SIMDE_FLOAT32_C( 545.49) }, { SIMDE_FLOAT32_C( -197.50), SIMDE_FLOAT32_C( 820.62), SIMDE_FLOAT32_C( 677.29), SIMDE_FLOAT32_C( 81.36) } }, { { SIMDE_FLOAT32_C( 289.66), SIMDE_FLOAT32_C( 153.93), SIMDE_FLOAT32_C( -971.51), SIMDE_FLOAT32_C( 876.28) }, UINT8_C(104), { SIMDE_FLOAT32_C( -333.53), SIMDE_FLOAT32_C( -876.75), SIMDE_FLOAT32_C( -699.81), SIMDE_FLOAT32_C( -899.70) }, { SIMDE_FLOAT32_C( -343.74), SIMDE_FLOAT32_C( 692.99), SIMDE_FLOAT32_C( -364.15), SIMDE_FLOAT32_C( 233.59) }, { SIMDE_FLOAT32_C( 289.66), SIMDE_FLOAT32_C( -876.75), SIMDE_FLOAT32_C( -699.81), SIMDE_FLOAT32_C( -899.70) } }, { { SIMDE_FLOAT32_C( 793.13), SIMDE_FLOAT32_C( 231.95), SIMDE_FLOAT32_C( -229.99), SIMDE_FLOAT32_C( 987.23) }, UINT8_C(242), { SIMDE_FLOAT32_C( 291.56), SIMDE_FLOAT32_C( -748.16), SIMDE_FLOAT32_C( 542.34), SIMDE_FLOAT32_C( 209.32) }, { SIMDE_FLOAT32_C( 326.59), SIMDE_FLOAT32_C( -901.23), SIMDE_FLOAT32_C( 29.95), SIMDE_FLOAT32_C( 3.89) }, { SIMDE_FLOAT32_C( 793.13), SIMDE_FLOAT32_C( -748.16), SIMDE_FLOAT32_C( 542.34), SIMDE_FLOAT32_C( 209.32) } }, { { SIMDE_FLOAT32_C( 180.14), SIMDE_FLOAT32_C( -723.98), SIMDE_FLOAT32_C( -326.15), SIMDE_FLOAT32_C( 43.10) }, UINT8_C(222), { SIMDE_FLOAT32_C( 963.51), SIMDE_FLOAT32_C( -802.96), SIMDE_FLOAT32_C( 850.00), SIMDE_FLOAT32_C( 839.79) }, { SIMDE_FLOAT32_C( 160.62), SIMDE_FLOAT32_C( -483.52), SIMDE_FLOAT32_C( 963.04), SIMDE_FLOAT32_C( 460.80) }, { SIMDE_FLOAT32_C( 180.14), SIMDE_FLOAT32_C( -802.96), SIMDE_FLOAT32_C( 850.00), SIMDE_FLOAT32_C( 839.79) } }, { { SIMDE_FLOAT32_C( -383.22), SIMDE_FLOAT32_C( -380.70), SIMDE_FLOAT32_C( 153.80), SIMDE_FLOAT32_C( 252.63) }, UINT8_C( 59), { SIMDE_FLOAT32_C( -53.07), SIMDE_FLOAT32_C( -515.42), SIMDE_FLOAT32_C( -377.10), SIMDE_FLOAT32_C( -65.84) }, { SIMDE_FLOAT32_C( 379.97), SIMDE_FLOAT32_C( 914.45), SIMDE_FLOAT32_C( 186.00), SIMDE_FLOAT32_C( -77.69) }, { SIMDE_FLOAT32_C( 326.90), SIMDE_FLOAT32_C( -515.42), SIMDE_FLOAT32_C( -377.10), SIMDE_FLOAT32_C( -65.84) } }, { { SIMDE_FLOAT32_C( 123.78), SIMDE_FLOAT32_C( -487.41), SIMDE_FLOAT32_C( 21.08), SIMDE_FLOAT32_C( -846.28) }, UINT8_C(218), { SIMDE_FLOAT32_C( -798.78), SIMDE_FLOAT32_C( -570.26), SIMDE_FLOAT32_C( -809.67), SIMDE_FLOAT32_C( 244.32) }, { SIMDE_FLOAT32_C( -748.74), SIMDE_FLOAT32_C( -846.16), SIMDE_FLOAT32_C( 441.35), SIMDE_FLOAT32_C( -898.74) }, { SIMDE_FLOAT32_C( 123.78), SIMDE_FLOAT32_C( -570.26), SIMDE_FLOAT32_C( -809.67), SIMDE_FLOAT32_C( 244.32) } }, { { SIMDE_FLOAT32_C( 993.63), SIMDE_FLOAT32_C( -398.03), SIMDE_FLOAT32_C( -382.26), SIMDE_FLOAT32_C( 956.67) }, UINT8_C( 17), { SIMDE_FLOAT32_C( 234.51), SIMDE_FLOAT32_C( -424.03), SIMDE_FLOAT32_C( 216.57), SIMDE_FLOAT32_C( -512.86) }, { SIMDE_FLOAT32_C( -571.15), SIMDE_FLOAT32_C( -836.50), SIMDE_FLOAT32_C( -28.28), SIMDE_FLOAT32_C( 51.75) }, { SIMDE_FLOAT32_C( -336.64), SIMDE_FLOAT32_C( -424.03), SIMDE_FLOAT32_C( 216.57), SIMDE_FLOAT32_C( -512.86) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 src = simde_mm_loadu_ps(test_vec[i].src); simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 b = simde_mm_loadu_ps(test_vec[i].b); simde__m128 r = simde_mm_mask_add_ss(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128 src = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128 a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m128 b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m128 r = simde_mm_mask_add_ss(src, k, a, b); simde_test_x86_write_f32x4(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_maskz_add_ss (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask8 k; const simde_float32 a[4]; const simde_float32 b[4]; const simde_float32 r[4]; } test_vec[] = { { UINT8_C(229), { SIMDE_FLOAT32_C( 695.67), SIMDE_FLOAT32_C( -467.38), SIMDE_FLOAT32_C( 303.41), SIMDE_FLOAT32_C( 444.32) }, { SIMDE_FLOAT32_C( -971.51), SIMDE_FLOAT32_C( 843.53), SIMDE_FLOAT32_C( -243.67), SIMDE_FLOAT32_C( 463.81) }, { SIMDE_FLOAT32_C( -275.84), SIMDE_FLOAT32_C( -467.38), SIMDE_FLOAT32_C( 303.41), SIMDE_FLOAT32_C( 444.32) } }, { UINT8_C(226), { SIMDE_FLOAT32_C( 197.10), SIMDE_FLOAT32_C( 132.05), SIMDE_FLOAT32_C( -305.37), SIMDE_FLOAT32_C( -575.56) }, { SIMDE_FLOAT32_C( 496.81), SIMDE_FLOAT32_C( -398.18), SIMDE_FLOAT32_C( 186.52), SIMDE_FLOAT32_C( 10.74) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 132.05), SIMDE_FLOAT32_C( -305.37), SIMDE_FLOAT32_C( -575.56) } }, { UINT8_C(101), { SIMDE_FLOAT32_C( -659.66), SIMDE_FLOAT32_C( -842.89), SIMDE_FLOAT32_C( -218.71), SIMDE_FLOAT32_C( 619.24) }, { SIMDE_FLOAT32_C( -897.13), SIMDE_FLOAT32_C( -873.47), SIMDE_FLOAT32_C( 228.22), SIMDE_FLOAT32_C( 5.28) }, { SIMDE_FLOAT32_C( -1556.79), SIMDE_FLOAT32_C( -842.89), SIMDE_FLOAT32_C( -218.71), SIMDE_FLOAT32_C( 619.24) } }, { UINT8_C(252), { SIMDE_FLOAT32_C( 87.18), SIMDE_FLOAT32_C( 911.77), SIMDE_FLOAT32_C( -825.67), SIMDE_FLOAT32_C( 690.54) }, { SIMDE_FLOAT32_C( 607.43), SIMDE_FLOAT32_C( -293.05), SIMDE_FLOAT32_C( -6.04), SIMDE_FLOAT32_C( 51.75) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 911.77), SIMDE_FLOAT32_C( -825.67), SIMDE_FLOAT32_C( 690.54) } }, { UINT8_C( 58), { SIMDE_FLOAT32_C( -162.51), SIMDE_FLOAT32_C( 808.09), SIMDE_FLOAT32_C( -800.75), SIMDE_FLOAT32_C( 733.18) }, { SIMDE_FLOAT32_C( 5.19), SIMDE_FLOAT32_C( 331.30), SIMDE_FLOAT32_C( -572.20), SIMDE_FLOAT32_C( 429.63) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 808.09), SIMDE_FLOAT32_C( -800.75), SIMDE_FLOAT32_C( 733.18) } }, { UINT8_C(165), { SIMDE_FLOAT32_C( 29.62), SIMDE_FLOAT32_C( -383.85), SIMDE_FLOAT32_C( 838.85), SIMDE_FLOAT32_C( -85.88) }, { SIMDE_FLOAT32_C( -43.51), SIMDE_FLOAT32_C( 995.96), SIMDE_FLOAT32_C( 695.41), SIMDE_FLOAT32_C( -424.27) }, { SIMDE_FLOAT32_C( -13.89), SIMDE_FLOAT32_C( -383.85), SIMDE_FLOAT32_C( 838.85), SIMDE_FLOAT32_C( -85.88) } }, { UINT8_C(151), { SIMDE_FLOAT32_C( 821.95), SIMDE_FLOAT32_C( 803.96), SIMDE_FLOAT32_C( 104.12), SIMDE_FLOAT32_C( 482.38) }, { SIMDE_FLOAT32_C( -108.86), SIMDE_FLOAT32_C( 15.89), SIMDE_FLOAT32_C( 656.71), SIMDE_FLOAT32_C( -418.32) }, { SIMDE_FLOAT32_C( 713.09), SIMDE_FLOAT32_C( 803.96), SIMDE_FLOAT32_C( 104.12), SIMDE_FLOAT32_C( 482.38) } }, { UINT8_C(197), { SIMDE_FLOAT32_C( -636.34), SIMDE_FLOAT32_C( 575.64), SIMDE_FLOAT32_C( 675.07), SIMDE_FLOAT32_C( 99.10) }, { SIMDE_FLOAT32_C( -586.87), SIMDE_FLOAT32_C( 483.16), SIMDE_FLOAT32_C( 298.35), SIMDE_FLOAT32_C( -853.69) }, { SIMDE_FLOAT32_C( -1223.21), SIMDE_FLOAT32_C( 575.64), SIMDE_FLOAT32_C( 675.07), SIMDE_FLOAT32_C( 99.10) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 b = simde_mm_loadu_ps(test_vec[i].b); simde__m128 r = simde_mm_maskz_add_ss(test_vec[i].k, a, b); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128 a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m128 b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m128 r = simde_mm_maskz_add_ss(k, a, b); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask_add_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t src[16]; const simde__mmask16 k; const int16_t a[16]; const int16_t b[16]; const int16_t r[16]; } test_vec[] = { { { -INT16_C( 18088), INT16_C( 1606), -INT16_C( 7503), INT16_C( 5992), INT16_C( 28165), INT16_C( 22274), -INT16_C( 26583), -INT16_C( 13076), -INT16_C( 12908), INT16_C( 10372), INT16_C( 29897), INT16_C( 832), INT16_C( 19766), INT16_C( 21637), -INT16_C( 21677), INT16_C( 16763) }, UINT16_C(60084), { INT16_C( 31188), INT16_C( 10550), -INT16_C( 29740), INT16_C( 24731), -INT16_C( 25844), INT16_C( 20568), -INT16_C( 21197), INT16_C( 13543), -INT16_C( 13074), -INT16_C( 5446), INT16_C( 8264), -INT16_C( 22453), -INT16_C( 30162), -INT16_C( 8771), INT16_C( 29554), -INT16_C( 6204) }, { INT16_C( 13652), -INT16_C( 1326), -INT16_C( 8634), -INT16_C( 16805), -INT16_C( 7376), -INT16_C( 1060), -INT16_C( 17004), INT16_C( 3029), -INT16_C( 23256), -INT16_C( 24573), -INT16_C( 5392), INT16_C( 9842), INT16_C( 11581), -INT16_C( 25918), -INT16_C( 1929), INT16_C( 6096) }, { -INT16_C( 18088), INT16_C( 1606), INT16_C( 27162), INT16_C( 5992), INT16_C( 32316), INT16_C( 19508), -INT16_C( 26583), INT16_C( 16572), -INT16_C( 12908), -INT16_C( 30019), INT16_C( 29897), -INT16_C( 12611), INT16_C( 19766), INT16_C( 30847), INT16_C( 27625), -INT16_C( 108) } }, { { -INT16_C( 28260), INT16_C( 3355), INT16_C( 30149), INT16_C( 18841), INT16_C( 897), -INT16_C( 18335), -INT16_C( 27856), INT16_C( 31843), INT16_C( 24626), -INT16_C( 30048), -INT16_C( 8679), INT16_C( 1167), INT16_C( 9529), INT16_C( 15283), -INT16_C( 18329), INT16_C( 1611) }, UINT16_C(36795), { -INT16_C( 20367), INT16_C( 17977), -INT16_C( 10340), INT16_C( 23796), INT16_C( 4308), -INT16_C( 8548), -INT16_C( 31482), -INT16_C( 21319), INT16_C( 17), -INT16_C( 10987), INT16_C( 12850), INT16_C( 9166), INT16_C( 5118), -INT16_C( 13916), INT16_C( 27754), -INT16_C( 17049) }, { -INT16_C( 29649), INT16_C( 2250), INT16_C( 31457), INT16_C( 19428), -INT16_C( 17826), INT16_C( 8517), INT16_C( 3008), -INT16_C( 15739), -INT16_C( 3791), -INT16_C( 20340), INT16_C( 28400), INT16_C( 31847), -INT16_C( 20111), INT16_C( 8560), -INT16_C( 3090), -INT16_C( 17728) }, { INT16_C( 15520), INT16_C( 20227), INT16_C( 30149), -INT16_C( 22312), -INT16_C( 13518), -INT16_C( 31), -INT16_C( 27856), INT16_C( 28478), -INT16_C( 3774), -INT16_C( 31327), -INT16_C( 24286), -INT16_C( 24523), INT16_C( 9529), INT16_C( 15283), -INT16_C( 18329), INT16_C( 30759) } }, { { INT16_C( 5027), INT16_C( 31603), -INT16_C( 3038), INT16_C( 25247), INT16_C( 12894), -INT16_C( 2609), INT16_C( 30080), INT16_C( 22396), -INT16_C( 4776), -INT16_C( 7593), -INT16_C( 5922), -INT16_C( 20302), INT16_C( 26389), -INT16_C( 2181), INT16_C( 14645), INT16_C( 6951) }, UINT16_C(62936), { -INT16_C( 18607), INT16_C( 18295), -INT16_C( 2304), INT16_C( 28875), -INT16_C( 8411), INT16_C( 2635), INT16_C( 16083), INT16_C( 5198), -INT16_C( 31435), -INT16_C( 4074), INT16_C( 13887), -INT16_C( 4755), INT16_C( 30885), -INT16_C( 1538), -INT16_C( 32035), -INT16_C( 4113) }, { INT16_C( 2642), INT16_C( 4797), -INT16_C( 3564), -INT16_C( 27490), INT16_C( 16589), INT16_C( 10413), INT16_C( 30700), -INT16_C( 8724), INT16_C( 25964), INT16_C( 5287), -INT16_C( 19421), INT16_C( 28120), -INT16_C( 30880), -INT16_C( 19119), -INT16_C( 19912), -INT16_C( 27324) }, { INT16_C( 5027), INT16_C( 31603), -INT16_C( 3038), INT16_C( 1385), INT16_C( 8178), -INT16_C( 2609), -INT16_C( 18753), -INT16_C( 3526), -INT16_C( 5471), -INT16_C( 7593), -INT16_C( 5534), -INT16_C( 20302), INT16_C( 5), -INT16_C( 20657), INT16_C( 13589), -INT16_C( 31437) } }, { { INT16_C( 25041), INT16_C( 25663), INT16_C( 28102), INT16_C( 28245), INT16_C( 5599), INT16_C( 24300), -INT16_C( 10886), -INT16_C( 9183), INT16_C( 3807), INT16_C( 25125), -INT16_C( 25229), -INT16_C( 4659), -INT16_C( 15898), INT16_C( 11743), -INT16_C( 28195), INT16_C( 25686) }, UINT16_C(55587), { INT16_C( 2381), -INT16_C( 16078), -INT16_C( 27768), INT16_C( 27601), INT16_C( 16912), INT16_C( 8673), -INT16_C( 29342), -INT16_C( 7031), INT16_C( 30710), -INT16_C( 7797), -INT16_C( 30252), INT16_C( 23527), -INT16_C( 2137), INT16_C( 31719), INT16_C( 32572), INT16_C( 32012) }, { -INT16_C( 11761), INT16_C( 10830), -INT16_C( 22217), -INT16_C( 26449), -INT16_C( 13353), -INT16_C( 6701), -INT16_C( 21906), INT16_C( 26387), INT16_C( 28827), -INT16_C( 13374), -INT16_C( 1039), -INT16_C( 19312), -INT16_C( 2641), INT16_C( 17473), -INT16_C( 21172), -INT16_C( 20086) }, { -INT16_C( 9380), -INT16_C( 5248), INT16_C( 28102), INT16_C( 28245), INT16_C( 5599), INT16_C( 1972), -INT16_C( 10886), -INT16_C( 9183), -INT16_C( 5999), INT16_C( 25125), -INT16_C( 25229), INT16_C( 4215), -INT16_C( 4778), INT16_C( 11743), INT16_C( 11400), INT16_C( 11926) } }, { { INT16_C( 12057), INT16_C( 29150), INT16_C( 23614), INT16_C( 25130), INT16_C( 10655), -INT16_C( 27568), INT16_C( 28503), -INT16_C( 25443), -INT16_C( 10717), -INT16_C( 20322), -INT16_C( 29779), -INT16_C( 24431), INT16_C( 27510), -INT16_C( 9356), INT16_C( 4476), INT16_C( 16174) }, UINT16_C(36512), { INT16_C( 19088), INT16_C( 10128), INT16_C( 13140), INT16_C( 29098), -INT16_C( 3415), INT16_C( 2524), -INT16_C( 20169), -INT16_C( 11551), -INT16_C( 26829), INT16_C( 3366), INT16_C( 12131), INT16_C( 2463), INT16_C( 30355), INT16_C( 12022), INT16_C( 19040), INT16_C( 20815) }, { -INT16_C( 24937), INT16_C( 16359), -INT16_C( 602), INT16_C( 32707), -INT16_C( 4799), INT16_C( 16001), INT16_C( 10569), INT16_C( 2669), INT16_C( 8145), -INT16_C( 957), INT16_C( 11518), -INT16_C( 18084), -INT16_C( 29708), INT16_C( 9779), INT16_C( 904), -INT16_C( 28159) }, { INT16_C( 12057), INT16_C( 29150), INT16_C( 23614), INT16_C( 25130), INT16_C( 10655), INT16_C( 18525), INT16_C( 28503), -INT16_C( 8882), -INT16_C( 10717), INT16_C( 2409), INT16_C( 23649), -INT16_C( 15621), INT16_C( 27510), -INT16_C( 9356), INT16_C( 4476), -INT16_C( 7344) } }, { { INT16_C( 13479), INT16_C( 32082), -INT16_C( 1052), -INT16_C( 28178), -INT16_C( 5151), INT16_C( 15355), -INT16_C( 21898), -INT16_C( 6248), INT16_C( 26798), INT16_C( 24344), -INT16_C( 25169), INT16_C( 15648), -INT16_C( 31017), INT16_C( 1114), -INT16_C( 19793), INT16_C( 27930) }, UINT16_C( 8052), { -INT16_C( 26007), INT16_C( 21812), -INT16_C( 28453), -INT16_C( 22252), -INT16_C( 2350), INT16_C( 20554), -INT16_C( 112), INT16_C( 28501), INT16_C( 20387), INT16_C( 15898), -INT16_C( 4455), INT16_C( 8743), -INT16_C( 3007), INT16_C( 1364), INT16_C( 26716), -INT16_C( 32024) }, { -INT16_C( 26027), -INT16_C( 22945), -INT16_C( 11918), -INT16_C( 14268), -INT16_C( 11610), -INT16_C( 22539), INT16_C( 6421), -INT16_C( 16498), INT16_C( 26152), -INT16_C( 22496), -INT16_C( 1902), -INT16_C( 10031), INT16_C( 9340), -INT16_C( 16924), -INT16_C( 19351), -INT16_C( 25237) }, { INT16_C( 13479), INT16_C( 32082), INT16_C( 25165), -INT16_C( 28178), -INT16_C( 13960), -INT16_C( 1985), INT16_C( 6309), -INT16_C( 6248), -INT16_C( 18997), -INT16_C( 6598), -INT16_C( 6357), -INT16_C( 1288), INT16_C( 6333), INT16_C( 1114), -INT16_C( 19793), INT16_C( 27930) } }, { { -INT16_C( 9402), -INT16_C( 4652), INT16_C( 27455), -INT16_C( 17628), INT16_C( 12568), -INT16_C( 6414), -INT16_C( 29207), -INT16_C( 1798), -INT16_C( 7113), -INT16_C( 19430), INT16_C( 19790), INT16_C( 17330), -INT16_C( 30097), INT16_C( 28960), INT16_C( 28059), -INT16_C( 12652) }, UINT16_C(55494), { INT16_C( 4668), INT16_C( 21727), INT16_C( 23842), INT16_C( 859), INT16_C( 24202), -INT16_C( 4733), -INT16_C( 18553), -INT16_C( 32032), -INT16_C( 22813), INT16_C( 981), -INT16_C( 1521), -INT16_C( 13085), -INT16_C( 20947), -INT16_C( 1346), -INT16_C( 1102), -INT16_C( 30046) }, { INT16_C( 29305), -INT16_C( 30721), INT16_C( 31389), -INT16_C( 1704), INT16_C( 2370), -INT16_C( 31381), INT16_C( 22976), INT16_C( 6971), INT16_C( 2368), INT16_C( 11549), -INT16_C( 7631), -INT16_C( 5582), -INT16_C( 17938), -INT16_C( 4477), INT16_C( 28539), -INT16_C( 8234) }, { -INT16_C( 9402), -INT16_C( 8994), -INT16_C( 10305), -INT16_C( 17628), INT16_C( 12568), -INT16_C( 6414), INT16_C( 4423), -INT16_C( 25061), -INT16_C( 7113), -INT16_C( 19430), INT16_C( 19790), -INT16_C( 18667), INT16_C( 26651), INT16_C( 28960), INT16_C( 27437), INT16_C( 27256) } }, { { INT16_C( 17152), -INT16_C( 16650), -INT16_C( 14707), INT16_C( 11632), INT16_C( 19908), -INT16_C( 20810), INT16_C( 377), INT16_C( 15379), -INT16_C( 10283), -INT16_C( 26722), -INT16_C( 177), -INT16_C( 17760), INT16_C( 8591), INT16_C( 3646), -INT16_C( 16462), INT16_C( 17616) }, UINT16_C( 3229), { INT16_C( 32634), INT16_C( 32728), INT16_C( 30084), -INT16_C( 23413), -INT16_C( 22910), INT16_C( 20672), INT16_C( 22735), -INT16_C( 29137), -INT16_C( 1705), INT16_C( 5378), INT16_C( 17391), INT16_C( 15446), -INT16_C( 2220), -INT16_C( 3224), -INT16_C( 13014), -INT16_C( 19705) }, { INT16_C( 14637), INT16_C( 15248), -INT16_C( 4954), -INT16_C( 14311), -INT16_C( 296), -INT16_C( 30474), -INT16_C( 5162), -INT16_C( 32073), INT16_C( 26895), INT16_C( 18041), -INT16_C( 18825), INT16_C( 9328), INT16_C( 19262), INT16_C( 3861), -INT16_C( 29853), INT16_C( 17210) }, { -INT16_C( 18265), -INT16_C( 16650), INT16_C( 25130), INT16_C( 27812), -INT16_C( 23206), -INT16_C( 20810), INT16_C( 377), INT16_C( 4326), -INT16_C( 10283), -INT16_C( 26722), -INT16_C( 1434), INT16_C( 24774), INT16_C( 8591), INT16_C( 3646), -INT16_C( 16462), INT16_C( 17616) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i src = simde_x_mm256_loadu_epi16(test_vec[i].src); simde__m256i a = simde_x_mm256_loadu_epi16(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi16(test_vec[i].b); simde__m256i r = simde_mm256_mask_add_epi16(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x16(r, simde_x_mm256_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm256_maskz_add_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask16 k; const int16_t a[16]; const int16_t b[16]; const int16_t r[16]; } test_vec[] = { { UINT16_C( 4262), { INT16_C( 17131), INT16_C( 28575), INT16_C( 26454), INT16_C( 26097), INT16_C( 13425), INT16_C( 2916), INT16_C( 18413), -INT16_C( 10307), -INT16_C( 22620), INT16_C( 19835), INT16_C( 24739), INT16_C( 18854), -INT16_C( 6815), -INT16_C( 4094), INT16_C( 31637), INT16_C( 24951) }, { -INT16_C( 29452), INT16_C( 29154), -INT16_C( 24158), INT16_C( 20005), -INT16_C( 21148), INT16_C( 28846), INT16_C( 22284), -INT16_C( 15662), -INT16_C( 9581), -INT16_C( 17953), -INT16_C( 18145), -INT16_C( 27296), -INT16_C( 29945), -INT16_C( 14925), -INT16_C( 22585), INT16_C( 11545) }, { INT16_C( 0), -INT16_C( 7807), INT16_C( 2296), INT16_C( 0), INT16_C( 0), INT16_C( 31762), INT16_C( 0), -INT16_C( 25969), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 28776), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { UINT16_C(12070), { -INT16_C( 11840), -INT16_C( 15372), INT16_C( 32629), INT16_C( 21795), -INT16_C( 20660), -INT16_C( 29830), -INT16_C( 24024), -INT16_C( 29778), -INT16_C( 10477), -INT16_C( 11462), INT16_C( 1337), INT16_C( 5552), INT16_C( 5961), INT16_C( 23833), -INT16_C( 10057), -INT16_C( 19595) }, { -INT16_C( 22405), -INT16_C( 25821), -INT16_C( 24191), -INT16_C( 3861), -INT16_C( 24777), INT16_C( 24020), INT16_C( 13500), -INT16_C( 20346), -INT16_C( 128), INT16_C( 5177), INT16_C( 12366), -INT16_C( 8346), -INT16_C( 7771), -INT16_C( 8032), -INT16_C( 11287), -INT16_C( 24811) }, { INT16_C( 0), INT16_C( 24343), INT16_C( 8438), INT16_C( 0), INT16_C( 0), -INT16_C( 5810), INT16_C( 0), INT16_C( 0), -INT16_C( 10605), -INT16_C( 6285), INT16_C( 13703), -INT16_C( 2794), INT16_C( 0), INT16_C( 15801), INT16_C( 0), INT16_C( 0) } }, { UINT16_C(58512), { -INT16_C( 21753), -INT16_C( 12602), -INT16_C( 7088), -INT16_C( 15287), INT16_C( 2851), -INT16_C( 3784), -INT16_C( 19644), -INT16_C( 3081), INT16_C( 27159), INT16_C( 23080), -INT16_C( 12718), -INT16_C( 24689), -INT16_C( 17761), INT16_C( 9682), -INT16_C( 30808), -INT16_C( 21236) }, { -INT16_C( 30812), INT16_C( 6629), INT16_C( 21297), -INT16_C( 17253), INT16_C( 32442), INT16_C( 2260), -INT16_C( 2679), -INT16_C( 7556), -INT16_C( 10834), -INT16_C( 26189), INT16_C( 2827), -INT16_C( 12197), -INT16_C( 13163), INT16_C( 26769), INT16_C( 21450), INT16_C( 4191) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 30243), INT16_C( 0), INT16_C( 0), -INT16_C( 10637), INT16_C( 0), INT16_C( 0), -INT16_C( 9891), INT16_C( 0), INT16_C( 0), -INT16_C( 29085), -INT16_C( 9358), -INT16_C( 17045) } }, { UINT16_C(26409), { INT16_C( 23989), -INT16_C( 3893), -INT16_C( 10213), -INT16_C( 24561), INT16_C( 24975), -INT16_C( 14143), -INT16_C( 26678), -INT16_C( 32272), -INT16_C( 25660), INT16_C( 31804), -INT16_C( 10324), INT16_C( 19769), -INT16_C( 22018), -INT16_C( 31803), -INT16_C( 22436), INT16_C( 31880) }, { -INT16_C( 13937), INT16_C( 31205), -INT16_C( 12695), INT16_C( 9893), INT16_C( 19693), -INT16_C( 32334), -INT16_C( 25972), INT16_C( 30977), INT16_C( 26854), -INT16_C( 10518), -INT16_C( 16582), -INT16_C( 26001), -INT16_C( 23041), INT16_C( 5435), INT16_C( 20892), -INT16_C( 8904) }, { INT16_C( 10052), INT16_C( 0), INT16_C( 0), -INT16_C( 14668), INT16_C( 0), INT16_C( 19059), INT16_C( 0), INT16_C( 0), INT16_C( 1194), INT16_C( 21286), -INT16_C( 26906), INT16_C( 0), INT16_C( 0), -INT16_C( 26368), -INT16_C( 1544), INT16_C( 0) } }, { UINT16_C(33734), { -INT16_C( 22559), -INT16_C( 8699), -INT16_C( 3315), INT16_C( 30631), INT16_C( 26186), INT16_C( 646), INT16_C( 32221), -INT16_C( 21902), INT16_C( 8809), -INT16_C( 9404), -INT16_C( 30818), INT16_C( 9706), -INT16_C( 24086), INT16_C( 10730), -INT16_C( 14587), -INT16_C( 12657) }, { -INT16_C( 32121), -INT16_C( 14411), -INT16_C( 12619), INT16_C( 32766), -INT16_C( 5886), -INT16_C( 28837), -INT16_C( 21424), INT16_C( 1152), INT16_C( 18809), -INT16_C( 4130), INT16_C( 7614), -INT16_C( 25916), -INT16_C( 21360), INT16_C( 31629), -INT16_C( 21160), -INT16_C( 31719) }, { INT16_C( 0), -INT16_C( 23110), -INT16_C( 15934), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 10797), -INT16_C( 20750), INT16_C( 27618), -INT16_C( 13534), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 21160) } }, { UINT16_C(12974), { -INT16_C( 9110), -INT16_C( 27434), -INT16_C( 6575), -INT16_C( 17103), INT16_C( 10872), -INT16_C( 28881), -INT16_C( 1144), INT16_C( 11533), -INT16_C( 28247), INT16_C( 19770), INT16_C( 10309), -INT16_C( 18656), -INT16_C( 7754), INT16_C( 3438), INT16_C( 758), -INT16_C( 19035) }, { -INT16_C( 10315), -INT16_C( 614), INT16_C( 9805), INT16_C( 29426), -INT16_C( 17671), INT16_C( 32637), INT16_C( 32714), INT16_C( 16551), -INT16_C( 22924), -INT16_C( 21545), -INT16_C( 9595), -INT16_C( 15050), INT16_C( 9710), INT16_C( 18424), INT16_C( 172), -INT16_C( 1119) }, { INT16_C( 0), -INT16_C( 28048), INT16_C( 3230), INT16_C( 12323), INT16_C( 0), INT16_C( 3756), INT16_C( 0), INT16_C( 28084), INT16_C( 0), -INT16_C( 1775), INT16_C( 0), INT16_C( 0), INT16_C( 1956), INT16_C( 21862), INT16_C( 0), INT16_C( 0) } }, { UINT16_C(10927), { INT16_C( 8380), INT16_C( 25660), INT16_C( 3043), INT16_C( 26369), -INT16_C( 8173), INT16_C( 10605), INT16_C( 14118), -INT16_C( 4505), -INT16_C( 11066), INT16_C( 28798), -INT16_C( 24766), -INT16_C( 11889), -INT16_C( 14630), INT16_C( 31282), -INT16_C( 8140), -INT16_C( 9178) }, { INT16_C( 15994), -INT16_C( 30317), -INT16_C( 15077), INT16_C( 17333), -INT16_C( 7916), -INT16_C( 24502), INT16_C( 5769), -INT16_C( 29765), -INT16_C( 24127), -INT16_C( 29183), -INT16_C( 28892), INT16_C( 13466), INT16_C( 13695), -INT16_C( 4328), INT16_C( 11614), -INT16_C( 16283) }, { INT16_C( 24374), -INT16_C( 4657), -INT16_C( 12034), -INT16_C( 21834), INT16_C( 0), -INT16_C( 13897), INT16_C( 0), INT16_C( 31266), INT16_C( 0), -INT16_C( 385), INT16_C( 0), INT16_C( 1577), INT16_C( 0), INT16_C( 26954), INT16_C( 0), INT16_C( 0) } }, { UINT16_C(24898), { INT16_C( 31303), -INT16_C( 28888), INT16_C( 32475), -INT16_C( 9009), -INT16_C( 21039), INT16_C( 2463), -INT16_C( 235), -INT16_C( 27830), INT16_C( 12119), -INT16_C( 21538), -INT16_C( 26278), -INT16_C( 21134), -INT16_C( 12642), -INT16_C( 7297), -INT16_C( 26495), INT16_C( 29429) }, { INT16_C( 31766), INT16_C( 20949), INT16_C( 1225), INT16_C( 8709), INT16_C( 22985), -INT16_C( 8523), INT16_C( 31728), -INT16_C( 31368), -INT16_C( 17643), -INT16_C( 2286), -INT16_C( 26761), INT16_C( 1099), INT16_C( 26190), -INT16_C( 28221), INT16_C( 25127), -INT16_C( 9460) }, { INT16_C( 0), -INT16_C( 7939), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 31493), INT16_C( 0), -INT16_C( 5524), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 30018), -INT16_C( 1368), INT16_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi16(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi16(test_vec[i].b); simde__m256i r = simde_mm256_maskz_add_epi16(test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x16(r, simde_x_mm256_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm256_mask_add_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t src[8]; const simde__mmask8 k; const int32_t a[8]; const int32_t b[8]; const int32_t r[8]; } test_vec[] = { { { INT32_C( 426361587), -INT32_C( 443903963), INT32_C( 1108332220), INT32_C( 1919687830), INT32_C( 1848164895), -INT32_C( 1877796598), INT32_C( 483830142), -INT32_C( 2033377471) }, UINT8_C(237), { INT32_C( 1091358368), INT32_C( 454097987), INT32_C( 794389166), INT32_C( 249974803), INT32_C( 152267150), INT32_C( 907080727), -INT32_C( 1733770061), INT32_C( 220923521) }, { INT32_C( 1746020713), INT32_C( 1017500003), -INT32_C( 803273524), INT32_C( 1210396863), INT32_C( 579828512), -INT32_C( 646644006), INT32_C( 1430804319), INT32_C( 926985775) }, { -INT32_C( 1457588215), -INT32_C( 443903963), -INT32_C( 8884358), INT32_C( 1460371666), INT32_C( 1848164895), INT32_C( 260436721), -INT32_C( 302965742), INT32_C( 1147909296) } }, { { INT32_C( 1684912389), INT32_C( 1461764767), INT32_C( 531585245), -INT32_C( 1449055219), INT32_C( 1704880266), INT32_C( 70446890), INT32_C( 2071901944), -INT32_C( 833618578) }, UINT8_C(215), { -INT32_C( 1260280170), INT32_C( 60784922), INT32_C( 1789074221), INT32_C( 506242865), -INT32_C( 265290717), INT32_C( 773465711), -INT32_C( 249802610), INT32_C( 1884100310) }, { -INT32_C( 1123572585), INT32_C( 950443465), INT32_C( 1139858930), -INT32_C( 1387438223), -INT32_C( 995912866), -INT32_C( 783462299), INT32_C( 568247403), -INT32_C( 175356258) }, { INT32_C( 1911114541), INT32_C( 1011228387), -INT32_C( 1366034145), -INT32_C( 1449055219), -INT32_C( 1261203583), INT32_C( 70446890), INT32_C( 318444793), INT32_C( 1708744052) } }, { { INT32_C( 683801390), -INT32_C( 1524037838), INT32_C( 971777976), -INT32_C( 179262957), -INT32_C( 662180444), INT32_C( 1746109353), -INT32_C( 120687010), -INT32_C( 1478304295) }, UINT8_C(142), { -INT32_C( 1699645306), INT32_C( 1703095819), -INT32_C( 1247556922), -INT32_C( 676606248), -INT32_C( 2057411672), -INT32_C( 2133453849), INT32_C( 1277225381), INT32_C( 895840639) }, { -INT32_C( 241606211), -INT32_C( 714501803), -INT32_C( 395132250), -INT32_C( 1428661824), INT32_C( 1298870179), INT32_C( 1006598770), -INT32_C( 1667424466), INT32_C( 1226468081) }, { INT32_C( 683801390), INT32_C( 988594016), -INT32_C( 1642689172), -INT32_C( 2105268072), -INT32_C( 662180444), INT32_C( 1746109353), -INT32_C( 120687010), INT32_C( 2122308720) } }, { { -INT32_C( 1854580385), INT32_C( 1115024973), -INT32_C( 902732038), INT32_C( 1105570825), -INT32_C( 2019220757), INT32_C( 1567591273), -INT32_C( 1045989337), INT32_C( 483693948) }, UINT8_C( 16), { -INT32_C( 1622664288), -INT32_C( 1596764141), INT32_C( 1531617719), INT32_C( 2136124051), -INT32_C( 833907649), INT32_C( 442458548), -INT32_C( 1966743074), INT32_C( 1510510672) }, { INT32_C( 1995792121), INT32_C( 1901624212), -INT32_C( 724913828), INT32_C( 574753287), -INT32_C( 939006740), INT32_C( 1662860686), -INT32_C( 798097367), -INT32_C( 819579665) }, { -INT32_C( 1854580385), INT32_C( 1115024973), -INT32_C( 902732038), INT32_C( 1105570825), -INT32_C( 1772914389), INT32_C( 1567591273), -INT32_C( 1045989337), INT32_C( 483693948) } }, { { -INT32_C( 2128119628), INT32_C( 286644021), -INT32_C( 144034294), -INT32_C( 755119821), INT32_C( 737304527), -INT32_C( 795896062), -INT32_C( 1306877446), INT32_C( 2017207584) }, UINT8_C( 39), { -INT32_C( 889153325), INT32_C( 1946641334), -INT32_C( 394827187), -INT32_C( 1838628604), -INT32_C( 1114607536), -INT32_C( 988331075), INT32_C( 505604917), -INT32_C( 158258489) }, { -INT32_C( 19176166), -INT32_C( 739831540), INT32_C( 812816619), INT32_C( 1345615940), INT32_C( 485266830), -INT32_C( 1880526201), -INT32_C( 347473235), -INT32_C( 631923023) }, { -INT32_C( 908329491), INT32_C( 1206809794), INT32_C( 417989432), -INT32_C( 755119821), INT32_C( 737304527), INT32_C( 1426110020), -INT32_C( 1306877446), INT32_C( 2017207584) } }, { { -INT32_C( 35579584), -INT32_C( 1740792689), INT32_C( 1827803830), -INT32_C( 1820729369), -INT32_C( 2037363614), INT32_C( 1301089229), INT32_C( 2117227785), INT32_C( 1313930535) }, UINT8_C( 29), { INT32_C( 773440826), -INT32_C( 38988069), -INT32_C( 324201942), INT32_C( 299194520), -INT32_C( 582542291), INT32_C( 984927229), INT32_C( 579334463), INT32_C( 166931855) }, { -INT32_C( 1529113807), INT32_C( 521813891), -INT32_C( 1447212676), INT32_C( 1970271572), -INT32_C( 1806503392), INT32_C( 1956442032), -INT32_C( 878298291), INT32_C( 934511007) }, { -INT32_C( 755672981), -INT32_C( 1740792689), -INT32_C( 1771414618), -INT32_C( 2025501204), INT32_C( 1905921613), INT32_C( 1301089229), INT32_C( 2117227785), INT32_C( 1313930535) } }, { { INT32_C( 1988271902), -INT32_C( 928422277), INT32_C( 751124781), -INT32_C( 1615080274), -INT32_C( 1457437893), INT32_C( 1967921693), -INT32_C( 719093602), INT32_C( 1590290629) }, UINT8_C(184), { -INT32_C( 200700131), -INT32_C( 1501991200), INT32_C( 1863812719), -INT32_C( 2110506296), -INT32_C( 281811570), -INT32_C( 1957556859), -INT32_C( 274670855), INT32_C( 588775262) }, { INT32_C( 48549005), -INT32_C( 1189837525), -INT32_C( 246645075), INT32_C( 710580162), INT32_C( 1547365021), -INT32_C( 1190799946), -INT32_C( 758766312), -INT32_C( 1139192475) }, { INT32_C( 1988271902), -INT32_C( 928422277), INT32_C( 751124781), -INT32_C( 1399926134), INT32_C( 1265553451), INT32_C( 1146610491), -INT32_C( 719093602), -INT32_C( 550417213) } }, { { -INT32_C( 1607524027), -INT32_C( 1435945627), -INT32_C( 1834704498), -INT32_C( 2007313332), -INT32_C( 1883122106), -INT32_C( 1075124600), INT32_C( 1649535565), -INT32_C( 868112955) }, UINT8_C( 77), { INT32_C( 441881310), INT32_C( 1966218471), INT32_C( 1844649392), -INT32_C( 321863841), INT32_C( 26145081), INT32_C( 2066060518), -INT32_C( 1310174374), -INT32_C( 1468908845) }, { INT32_C( 900269734), INT32_C( 1516547149), INT32_C( 1563014998), INT32_C( 47500850), -INT32_C( 2090628180), INT32_C( 430978017), -INT32_C( 2091637067), INT32_C( 1915750227) }, { INT32_C( 1342151044), -INT32_C( 1435945627), -INT32_C( 887302906), -INT32_C( 274362991), -INT32_C( 1883122106), -INT32_C( 1075124600), INT32_C( 893155855), -INT32_C( 868112955) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i src = simde_x_mm256_loadu_epi32(test_vec[i].src); simde__m256i a = simde_x_mm256_loadu_epi32(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi32(test_vec[i].b); simde__m256i r = simde_mm256_mask_add_epi32(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x8(r, simde_x_mm256_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm256_maskz_add_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const int32_t a[8]; const int32_t b[8]; const int32_t r[8]; } test_vec[] = { { UINT8_C(101), { -INT32_C( 628298337), INT32_C( 792924887), -INT32_C( 1672768632), -INT32_C( 1074053347), -INT32_C( 1064007895), INT32_C( 1455536926), -INT32_C( 1219047864), INT32_C( 1560140371) }, { INT32_C( 1960233098), INT32_C( 1892059781), -INT32_C( 1373718700), -INT32_C( 352286831), INT32_C( 1327327802), INT32_C( 454025751), INT32_C( 2093702863), -INT32_C( 1771120467) }, { INT32_C( 1331934761), INT32_C( 0), INT32_C( 1248479964), INT32_C( 0), INT32_C( 0), INT32_C( 1909562677), INT32_C( 874654999), INT32_C( 0) } }, { UINT8_C( 88), { -INT32_C( 643482639), INT32_C( 1022430836), INT32_C( 1708754555), -INT32_C( 1303850341), INT32_C( 1960699394), INT32_C( 694659735), INT32_C( 142013690), INT32_C( 821320707) }, { -INT32_C( 242563209), INT32_C( 1657605631), -INT32_C( 1229937754), INT32_C( 1696216758), -INT32_C( 1213277610), -INT32_C( 353040792), INT32_C( 392990364), INT32_C( 1905216013) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 392366417), INT32_C( 747421784), INT32_C( 0), INT32_C( 535004054), INT32_C( 0) } }, { UINT8_C(212), { -INT32_C( 1571159102), INT32_C( 2016749220), -INT32_C( 1981095890), -INT32_C( 1870763484), -INT32_C( 836082282), -INT32_C( 660363674), -INT32_C( 1962323400), -INT32_C( 143449086) }, { -INT32_C( 765186109), INT32_C( 1029534834), INT32_C( 6988289), INT32_C( 1621597364), -INT32_C( 789981577), INT32_C( 1804329931), INT32_C( 1186001858), INT32_C( 883248384) }, { INT32_C( 0), INT32_C( 0), -INT32_C( 1974107601), INT32_C( 0), -INT32_C( 1626063859), INT32_C( 0), -INT32_C( 776321542), INT32_C( 739799298) } }, { UINT8_C(109), { -INT32_C( 1770917884), -INT32_C( 530889520), -INT32_C( 504237856), -INT32_C( 1597380183), -INT32_C( 1049867779), -INT32_C( 468326631), INT32_C( 559503757), INT32_C( 564004527) }, { INT32_C( 1698045997), -INT32_C( 820697417), -INT32_C( 38633790), INT32_C( 881187224), INT32_C( 1921955153), INT32_C( 941315295), INT32_C( 759745706), -INT32_C( 2126521994) }, { -INT32_C( 72871887), INT32_C( 0), -INT32_C( 542871646), -INT32_C( 716192959), INT32_C( 0), INT32_C( 472988664), INT32_C( 1319249463), INT32_C( 0) } }, { UINT8_C(137), { -INT32_C( 1858814058), -INT32_C( 1042377463), -INT32_C( 887474565), -INT32_C( 2122945850), INT32_C( 1667461733), -INT32_C( 205579704), -INT32_C( 975714158), INT32_C( 1635367554) }, { -INT32_C( 1057794330), INT32_C( 1052274960), -INT32_C( 41436220), INT32_C( 143840279), -INT32_C( 1906448242), -INT32_C( 1788921821), INT32_C( 10365593), INT32_C( 35226418) }, { INT32_C( 1378358908), INT32_C( 0), INT32_C( 0), -INT32_C( 1979105571), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1670593972) } }, { UINT8_C( 14), { INT32_C( 955121944), -INT32_C( 1858149961), INT32_C( 989283609), -INT32_C( 1140579072), -INT32_C( 447254547), -INT32_C( 109214975), -INT32_C( 782653276), -INT32_C( 848887013) }, { -INT32_C( 1458708646), -INT32_C( 83883126), INT32_C( 1994371146), INT32_C( 1884018050), -INT32_C( 1630751337), -INT32_C( 235406036), INT32_C( 301613089), INT32_C( 1337287860) }, { INT32_C( 0), -INT32_C( 1942033087), -INT32_C( 1311312541), INT32_C( 743438978), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { UINT8_C( 68), { -INT32_C( 17343115), -INT32_C( 1057582083), INT32_C( 1925676355), INT32_C( 513737506), INT32_C( 1449532125), INT32_C( 651379898), INT32_C( 265490234), INT32_C( 1273345831) }, { INT32_C( 1163851905), -INT32_C( 1572780827), INT32_C( 1722687548), INT32_C( 1425094090), INT32_C( 16744716), INT32_C( 1308498419), -INT32_C( 505877045), -INT32_C( 219979393) }, { INT32_C( 0), INT32_C( 0), -INT32_C( 646603393), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 240386811), INT32_C( 0) } }, { UINT8_C(143), { -INT32_C( 1104597870), -INT32_C( 724590379), INT32_C( 1429138415), INT32_C( 1193975544), INT32_C( 1467334920), -INT32_C( 1899396965), INT32_C( 2086966300), INT32_C( 710186623) }, { -INT32_C( 1112373334), INT32_C( 406415980), -INT32_C( 1125556542), -INT32_C( 990567746), -INT32_C( 543146280), -INT32_C( 1976533491), -INT32_C( 337560786), INT32_C( 640868710) }, { INT32_C( 2077996092), -INT32_C( 318174399), INT32_C( 303581873), INT32_C( 203407798), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1351055333) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi32(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi32(test_vec[i].b); simde__m256i r = simde_mm256_maskz_add_epi32(test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x8(r, simde_x_mm256_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm256_mask_add_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t src[4]; const simde__mmask8 k; const int64_t a[4]; const int64_t b[4]; const int64_t r[4]; } test_vec[] = { { { INT64_C( 360909485362196701), INT64_C( 7148018188301179842), INT64_C( 1649121149472017725), INT64_C( 7018633285360450459) }, UINT8_C(126), { -INT64_C( 2069953424375465822), INT64_C( 2649419973831635918), INT64_C( 6631995759656128229), INT64_C( 9043749468674069294) }, { INT64_C( 9127572592675566832), INT64_C( 3139630957469384257), -INT64_C( 5603674882213165495), INT64_C( 5601563532454726799) }, { INT64_C( 360909485362196701), INT64_C( 5789050931301020175), INT64_C( 1028320877442962734), -INT64_C( 3801431072580755523) } }, { { INT64_C( 756376000083734984), INT64_C( 9070926205802174906), INT64_C( 8919263102054599581), INT64_C( 7783409138876853393) }, UINT8_C(148), { INT64_C( 382287398418300828), INT64_C( 1115930529724925645), -INT64_C( 2730316823403657925), INT64_C( 770900165413777792) }, { INT64_C( 6713096438988588943), INT64_C( 6664457870741226758), INT64_C( 234662618570907527), -INT64_C( 313102228808514227) }, { INT64_C( 756376000083734984), INT64_C( 9070926205802174906), -INT64_C( 2495654204832750398), INT64_C( 7783409138876853393) } }, { { INT64_C( 1483073417317823758), INT64_C( 8795153962326963887), -INT64_C( 4149002502104727210), INT64_C( 8377900925383693364) }, UINT8_C( 87), { INT64_C( 9038467157306396791), -INT64_C( 9115228638979228790), INT64_C( 3197974720961571240), -INT64_C( 4399071158149707528) }, { INT64_C( 4732531473964703663), -INT64_C( 5052728689924432136), -INT64_C( 7671229005429790575), INT64_C( 8865457101011322896) }, { -INT64_C( 4675745442438451162), INT64_C( 4278786744805890690), -INT64_C( 4473254284468219335), INT64_C( 8377900925383693364) } }, { { -INT64_C( 1491568665462998214), -INT64_C( 3104012194786369954), -INT64_C( 7320413738587291346), INT64_C( 2384471346820870965) }, UINT8_C(225), { INT64_C( 8948791811858071094), INT64_C( 5692550040163038202), INT64_C( 830789416759788308), INT64_C( 8983297292243767262) }, { INT64_C( 1554136023168485625), INT64_C( 4643374375046125828), INT64_C( 1207394444856260247), INT64_C( 1262475476216602754) }, { -INT64_C( 7943816238682994897), -INT64_C( 3104012194786369954), -INT64_C( 7320413738587291346), INT64_C( 2384471346820870965) } }, { { -INT64_C( 2613596307693393920), INT64_C( 3708150816535274390), INT64_C( 8656585681334612292), -INT64_C( 4071969454335091598) }, UINT8_C(205), { INT64_C( 4385428620712312621), -INT64_C( 6056401822535469397), -INT64_C( 5800804598671026971), -INT64_C( 3092870165874030482) }, { -INT64_C( 2397873348439446309), INT64_C( 5910269085379428366), -INT64_C( 1513047638103079921), INT64_C( 34238311786967204) }, { INT64_C( 1987555272272866312), INT64_C( 3708150816535274390), -INT64_C( 7313852236774106892), -INT64_C( 3058631854087063278) } }, { { INT64_C( 8817443827781467208), -INT64_C( 6180758275292564870), -INT64_C( 4102290530891378202), INT64_C( 1720054592936513257) }, UINT8_C(101), { INT64_C( 8678261917080766892), INT64_C( 6939458579392878936), -INT64_C( 1907623884063940351), -INT64_C( 1780765326956861806) }, { INT64_C( 3690861860797904287), INT64_C( 7163125489808023225), -INT64_C( 7713699132847182549), -INT64_C( 5838526841458117177) }, { -INT64_C( 6077620295830880437), -INT64_C( 6180758275292564870), INT64_C( 8825421056798428716), INT64_C( 1720054592936513257) } }, { { -INT64_C( 4226189128237357280), -INT64_C( 8943401384532428378), INT64_C( 205521322776642791), INT64_C( 8979119138226217421) }, UINT8_C(100), { INT64_C( 74791247571826988), INT64_C( 6811658127022208365), -INT64_C( 2788904720803475790), INT64_C( 2092431171636941532) }, { -INT64_C( 4721001194988252981), -INT64_C( 5047893754993698744), -INT64_C( 9019989623988394261), INT64_C( 6590525094843788764) }, { -INT64_C( 4226189128237357280), -INT64_C( 8943401384532428378), INT64_C( 6637849728917681565), INT64_C( 8979119138226217421) } }, { { -INT64_C( 5055154983797961474), INT64_C( 708989412318452), INT64_C( 3248858564412742665), -INT64_C( 1488234339305196330) }, UINT8_C( 97), { -INT64_C( 1196281769919788973), -INT64_C( 5928234014316609444), -INT64_C( 3324535828431189494), -INT64_C( 5570270810707974551) }, { -INT64_C( 1528923573651634989), -INT64_C( 8239253630832908272), INT64_C( 446928026184942104), -INT64_C( 7452034225800619308) }, { -INT64_C( 2725205343571423962), INT64_C( 708989412318452), INT64_C( 3248858564412742665), -INT64_C( 1488234339305196330) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i src = simde_x_mm256_loadu_epi64(test_vec[i].src); simde__m256i a = simde_x_mm256_loadu_epi64(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi64(test_vec[i].b); simde__m256i r = simde_mm256_mask_add_epi64(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i64x4(r, simde_x_mm256_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm256_maskz_add_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const int64_t a[4]; const int64_t b[4]; const int64_t r[4]; } test_vec[] = { { UINT8_C(211), { INT64_C( 5396742621358824656), -INT64_C( 5784837964580948014), INT64_C( 745905599035155894), INT64_C( 4432459820370420263) }, { -INT64_C( 2375627515263830277), INT64_C( 7298802839973142414), -INT64_C( 4614916525088736096), -INT64_C( 9037591478603085962) }, { INT64_C( 3021115106094994379), INT64_C( 1513964875392194400), INT64_C( 0), INT64_C( 0) } }, { UINT8_C(176), { -INT64_C( 3141262892297976873), -INT64_C( 2562411322036282317), INT64_C( 3833520914396613866), INT64_C( 2194941788560940840) }, { INT64_C( 901766347898667771), INT64_C( 7724355933804345671), -INT64_C( 7464625842775737952), -INT64_C( 2080691623985024546) }, { INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0) } }, { UINT8_C(195), { -INT64_C( 6193321100419901660), INT64_C( 8380227380500028405), INT64_C( 6069563355255056514), INT64_C( 7567844964561032724) }, { -INT64_C( 2028429546419720554), INT64_C( 5900094282335619633), -INT64_C( 3089803978563711891), INT64_C( 365280098963395815) }, { -INT64_C( 8221750646839622214), -INT64_C( 4166422410873903578), INT64_C( 0), INT64_C( 0) } }, { UINT8_C( 97), { -INT64_C( 6385959693145891606), INT64_C( 5329109541483815658), -INT64_C( 2688882311189433873), INT64_C( 9217817113255199635) }, { INT64_C( 1459731060611346300), -INT64_C( 3886780218327649651), INT64_C( 7977890084846895501), INT64_C( 4735556178160298813) }, { -INT64_C( 4926228632534545306), INT64_C( 0), INT64_C( 0), INT64_C( 0) } }, { UINT8_C(134), { INT64_C( 7860706032630943107), INT64_C( 2484009629061100715), INT64_C( 7509120629608935666), -INT64_C( 2386733518501817427) }, { INT64_C( 8447004062159754569), -INT64_C( 1567962289685365340), -INT64_C( 8386187512053637187), -INT64_C( 1215046594740082176) }, { INT64_C( 0), INT64_C( 916047339375735375), -INT64_C( 877066882444701521), INT64_C( 0) } }, { UINT8_C(199), { -INT64_C( 2180470979029174228), INT64_C( 6443804020022187526), -INT64_C( 6418035538474219843), -INT64_C( 5507798642899854187) }, { INT64_C( 2466999869382999871), INT64_C( 996995031899642676), -INT64_C( 73336982042601123), INT64_C( 6989615507504111215) }, { INT64_C( 286528890353825643), INT64_C( 7440799051921830202), -INT64_C( 6491372520516820966), INT64_C( 0) } }, { UINT8_C( 0), { -INT64_C( 8767112038748657730), -INT64_C( 7052635034042790208), -INT64_C( 6603890804231331725), -INT64_C( 3131137631410272858) }, { -INT64_C( 1591887663143957093), INT64_C( 8922603379407992151), -INT64_C( 206750837837817474), INT64_C( 1015076583748941132) }, { INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0) } }, { UINT8_C(231), { INT64_C( 6842434063541616318), -INT64_C( 6075517079359538540), INT64_C( 1270586480222899316), INT64_C( 7380126505793994543) }, { INT64_C( 8872434914943821287), -INT64_C( 6662427551341978491), -INT64_C( 3648454390316234397), INT64_C( 6544313852933680065) }, { -INT64_C( 2731875095224114011), INT64_C( 5708799443008034585), -INT64_C( 2377867910093335081), INT64_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi64(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi64(test_vec[i].b); simde__m256i r = simde_mm256_maskz_add_epi64(test_vec[i].k, a, b); simde_test_x86_assert_equal_i64x4(r, simde_x_mm256_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_add_epi8 (SIMDE_MUNIT_TEST_ARGS) { struct { const int8_t a[64]; const int8_t b[64]; const int8_t r[64]; } test_vec[] = { { { INT8_C( 16), -INT8_C( 23), -INT8_C( 42), -INT8_C( 110), -INT8_C( 66), -INT8_C( 42), INT8_C( 81), -INT8_C( 107), -INT8_C( 64), -INT8_C( 89), INT8_C( 109), INT8_C( 69), INT8_C( 32), -INT8_C( 26), -INT8_C( 52), -INT8_C( 121), -INT8_C( 91), -INT8_C( 119), INT8_C( 20), -INT8_C( 127), INT8_C( 41), -INT8_C( 19), INT8_C( 18), INT8_C( 3), INT8_C( 103), INT8_C( 122), INT8_C( 63), -INT8_C( 75), INT8_C( 40), -INT8_C( 38), -INT8_C( 44), INT8_C( 56), -INT8_C( 61), -INT8_C( 86), -INT8_C( 54), -INT8_C( 127), INT8_MIN, INT8_C( 27), INT8_C( 22), INT8_C( 64), -INT8_C( 61), -INT8_C( 125), -INT8_C( 122), -INT8_C( 29), INT8_C( 105), INT8_C( 82), INT8_C( 106), INT8_C( 14), -INT8_C( 37), INT8_C( 126), -INT8_C( 113), INT8_C( 5), INT8_C( 107), -INT8_C( 95), INT8_C( 8), -INT8_C( 46), INT8_C( 27), INT8_C( 71), -INT8_C( 120), INT8_C( 67), INT8_C( 33), INT8_C( 92), INT8_C( 124), -INT8_C( 28) }, { INT8_C( 6), INT8_C( 70), INT8_C( 101), -INT8_C( 122), INT8_C( 98), INT8_C( 123), -INT8_C( 58), INT8_C( 37), -INT8_C( 1), INT8_C( 76), INT8_C( 8), INT8_C( 104), -INT8_C( 98), INT8_C( 114), INT8_C( 119), INT8_C( 122), -INT8_C( 16), INT8_C( 6), INT8_MAX, INT8_C( 91), -INT8_C( 88), -INT8_C( 121), INT8_C( 45), -INT8_C( 61), -INT8_C( 50), -INT8_C( 75), INT8_C( 7), -INT8_C( 17), INT8_C( 17), -INT8_C( 125), -INT8_C( 45), INT8_C( 23), -INT8_C( 55), INT8_C( 56), -INT8_C( 99), INT8_C( 43), -INT8_C( 77), INT8_C( 100), INT8_C( 80), -INT8_C( 78), -INT8_C( 80), INT8_C( 88), INT8_C( 27), INT8_C( 79), -INT8_C( 54), -INT8_C( 110), -INT8_C( 55), -INT8_C( 70), -INT8_C( 104), INT8_C( 72), INT8_C( 21), INT8_C( 64), -INT8_C( 49), INT8_C( 67), INT8_C( 4), -INT8_C( 99), -INT8_C( 8), INT8_C( 11), -INT8_C( 116), INT8_C( 10), -INT8_C( 114), INT8_C( 95), INT8_C( 33), INT8_C( 87) }, { INT8_C( 22), INT8_C( 47), INT8_C( 59), INT8_C( 24), INT8_C( 32), INT8_C( 81), INT8_C( 23), -INT8_C( 70), -INT8_C( 65), -INT8_C( 13), INT8_C( 117), -INT8_C( 83), -INT8_C( 66), INT8_C( 88), INT8_C( 67), INT8_C( 1), -INT8_C( 107), -INT8_C( 113), -INT8_C( 109), -INT8_C( 36), -INT8_C( 47), INT8_C( 116), INT8_C( 63), -INT8_C( 58), INT8_C( 53), INT8_C( 47), INT8_C( 70), -INT8_C( 92), INT8_C( 57), INT8_C( 93), -INT8_C( 89), INT8_C( 79), -INT8_C( 116), -INT8_C( 30), INT8_C( 103), -INT8_C( 84), INT8_C( 51), INT8_MAX, INT8_C( 102), -INT8_C( 14), INT8_C( 115), -INT8_C( 37), -INT8_C( 95), INT8_C( 50), INT8_C( 51), -INT8_C( 28), INT8_C( 51), -INT8_C( 56), INT8_C( 115), -INT8_C( 58), -INT8_C( 92), INT8_C( 69), INT8_C( 58), -INT8_C( 28), INT8_C( 12), INT8_C( 111), INT8_C( 19), INT8_C( 82), INT8_C( 20), INT8_C( 77), -INT8_C( 81), -INT8_C( 69), -INT8_C( 99), INT8_C( 59) } }, { { -INT8_C( 105), -INT8_C( 65), -INT8_C( 125), INT8_C( 74), INT8_C( 35), -INT8_C( 45), -INT8_C( 3), -INT8_C( 45), INT8_C( 44), INT8_C( 24), INT8_C( 34), -INT8_C( 10), -INT8_C( 86), -INT8_C( 21), -INT8_C( 79), INT8_C( 66), INT8_C( 51), -INT8_C( 58), -INT8_C( 125), INT8_C( 2), INT8_C( 9), -INT8_C( 121), -INT8_C( 97), INT8_C( 2), -INT8_C( 110), INT8_C( 43), INT8_C( 12), INT8_C( 32), -INT8_C( 118), INT8_C( 45), INT8_C( 119), INT8_C( 33), -INT8_C( 20), -INT8_C( 6), INT8_C( 108), INT8_C( 15), -INT8_C( 50), INT8_C( 105), -INT8_C( 29), -INT8_C( 6), -INT8_C( 127), INT8_C( 5), -INT8_C( 16), INT8_C( 43), -INT8_C( 15), -INT8_C( 95), INT8_C( 109), INT8_C( 36), INT8_C( 104), -INT8_C( 16), INT8_C( 39), INT8_C( 113), INT8_C( 119), -INT8_C( 58), INT8_C( 115), INT8_C( 9), -INT8_C( 14), INT8_MAX, INT8_C( 41), INT8_C( 124), -INT8_C( 83), -INT8_C( 95), -INT8_C( 98), -INT8_C( 103) }, { -INT8_C( 101), INT8_C( 10), -INT8_C( 87), INT8_C( 105), INT8_C( 115), -INT8_C( 116), INT8_C( 99), -INT8_C( 12), -INT8_C( 111), INT8_C( 84), INT8_C( 31), -INT8_C( 126), -INT8_C( 11), -INT8_C( 116), -INT8_C( 89), INT8_C( 93), INT8_C( 125), -INT8_C( 50), -INT8_C( 49), -INT8_C( 12), -INT8_C( 108), INT8_C( 66), -INT8_C( 2), -INT8_C( 122), -INT8_C( 62), INT8_C( 39), INT8_C( 3), INT8_C( 111), -INT8_C( 56), -INT8_C( 95), INT8_C( 8), INT8_C( 100), -INT8_C( 85), -INT8_C( 79), -INT8_C( 51), INT8_C( 30), INT8_C( 61), INT8_C( 49), INT8_C( 18), -INT8_C( 49), -INT8_C( 123), INT8_C( 49), INT8_C( 81), INT8_C( 122), -INT8_C( 67), -INT8_C( 8), -INT8_C( 40), INT8_C( 58), -INT8_C( 58), -INT8_C( 89), INT8_C( 47), INT8_C( 91), -INT8_C( 23), INT8_C( 45), -INT8_C( 31), -INT8_C( 85), INT8_C( 84), -INT8_C( 28), INT8_C( 26), INT8_C( 29), -INT8_C( 123), INT8_C( 35), -INT8_C( 127), INT8_C( 48) }, { INT8_C( 50), -INT8_C( 55), INT8_C( 44), -INT8_C( 77), -INT8_C( 106), INT8_C( 95), INT8_C( 96), -INT8_C( 57), -INT8_C( 67), INT8_C( 108), INT8_C( 65), INT8_C( 120), -INT8_C( 97), INT8_C( 119), INT8_C( 88), -INT8_C( 97), -INT8_C( 80), -INT8_C( 108), INT8_C( 82), -INT8_C( 10), -INT8_C( 99), -INT8_C( 55), -INT8_C( 99), -INT8_C( 120), INT8_C( 84), INT8_C( 82), INT8_C( 15), -INT8_C( 113), INT8_C( 82), -INT8_C( 50), INT8_MAX, -INT8_C( 123), -INT8_C( 105), -INT8_C( 85), INT8_C( 57), INT8_C( 45), INT8_C( 11), -INT8_C( 102), -INT8_C( 11), -INT8_C( 55), INT8_C( 6), INT8_C( 54), INT8_C( 65), -INT8_C( 91), -INT8_C( 82), -INT8_C( 103), INT8_C( 69), INT8_C( 94), INT8_C( 46), -INT8_C( 105), INT8_C( 86), -INT8_C( 52), INT8_C( 96), -INT8_C( 13), INT8_C( 84), -INT8_C( 76), INT8_C( 70), INT8_C( 99), INT8_C( 67), -INT8_C( 103), INT8_C( 50), -INT8_C( 60), INT8_C( 31), -INT8_C( 55) } }, { { -INT8_C( 44), INT8_C( 78), INT8_C( 78), INT8_C( 18), INT8_MAX, INT8_C( 96), -INT8_C( 31), INT8_C( 4), -INT8_C( 111), INT8_C( 50), INT8_MAX, INT8_C( 79), INT8_C( 43), INT8_C( 87), -INT8_C( 119), -INT8_C( 15), -INT8_C( 2), -INT8_C( 72), INT8_C( 76), -INT8_C( 25), -INT8_C( 27), INT8_C( 46), -INT8_C( 109), INT8_C( 58), INT8_C( 18), -INT8_C( 83), INT8_C( 87), -INT8_C( 104), -INT8_C( 48), -INT8_C( 40), -INT8_C( 56), -INT8_C( 91), INT8_C( 38), INT8_C( 23), -INT8_C( 73), -INT8_C( 90), INT8_C( 119), -INT8_C( 104), -INT8_C( 86), INT8_C( 9), -INT8_C( 54), INT8_C( 41), INT8_C( 88), -INT8_C( 11), INT8_MIN, -INT8_C( 31), -INT8_C( 25), INT8_C( 126), -INT8_C( 102), INT8_C( 51), INT8_C( 102), INT8_MAX, INT8_C( 97), -INT8_C( 7), -INT8_C( 71), INT8_C( 116), -INT8_C( 90), INT8_C( 16), INT8_C( 12), INT8_C( 119), -INT8_C( 24), -INT8_C( 44), INT8_C( 28), INT8_C( 15) }, { -INT8_C( 21), -INT8_C( 45), -INT8_C( 75), INT8_C( 99), INT8_C( 107), INT8_C( 95), INT8_C( 108), INT8_C( 53), -INT8_C( 119), -INT8_C( 60), INT8_C( 43), INT8_C( 9), -INT8_C( 91), INT8_C( 18), -INT8_C( 120), INT8_C( 63), INT8_C( 69), -INT8_C( 18), -INT8_C( 65), -INT8_C( 89), -INT8_C( 25), INT8_C( 120), INT8_C( 27), -INT8_C( 115), -INT8_C( 119), INT8_C( 39), INT8_C( 4), INT8_C( 113), -INT8_C( 5), INT8_C( 32), INT8_MIN, -INT8_C( 25), -INT8_C( 13), INT8_C( 53), INT8_C( 74), INT8_C( 94), -INT8_C( 107), -INT8_C( 74), -INT8_C( 108), INT8_C( 30), INT8_C( 122), -INT8_C( 65), INT8_C( 39), INT8_C( 31), -INT8_C( 47), -INT8_C( 81), INT8_C( 95), INT8_C( 22), -INT8_C( 99), INT8_C( 30), -INT8_C( 67), -INT8_C( 124), -INT8_C( 106), -INT8_C( 40), INT8_C( 18), INT8_C( 31), -INT8_C( 1), INT8_C( 22), -INT8_C( 111), -INT8_C( 5), INT8_C( 55), INT8_C( 17), -INT8_C( 30), INT8_C( 42) }, { -INT8_C( 65), INT8_C( 33), INT8_C( 3), INT8_C( 117), -INT8_C( 22), -INT8_C( 65), INT8_C( 77), INT8_C( 57), INT8_C( 26), -INT8_C( 10), -INT8_C( 86), INT8_C( 88), -INT8_C( 48), INT8_C( 105), INT8_C( 17), INT8_C( 48), INT8_C( 67), -INT8_C( 90), INT8_C( 11), -INT8_C( 114), -INT8_C( 52), -INT8_C( 90), -INT8_C( 82), -INT8_C( 57), -INT8_C( 101), -INT8_C( 44), INT8_C( 91), INT8_C( 9), -INT8_C( 53), -INT8_C( 8), INT8_C( 72), -INT8_C( 116), INT8_C( 25), INT8_C( 76), INT8_C( 1), INT8_C( 4), INT8_C( 12), INT8_C( 78), INT8_C( 62), INT8_C( 39), INT8_C( 68), -INT8_C( 24), INT8_MAX, INT8_C( 20), INT8_C( 81), -INT8_C( 112), INT8_C( 70), -INT8_C( 108), INT8_C( 55), INT8_C( 81), INT8_C( 35), INT8_C( 3), -INT8_C( 9), -INT8_C( 47), -INT8_C( 53), -INT8_C( 109), -INT8_C( 91), INT8_C( 38), -INT8_C( 99), INT8_C( 114), INT8_C( 31), -INT8_C( 27), -INT8_C( 2), INT8_C( 57) } }, { { INT8_C( 71), INT8_C( 44), -INT8_C( 119), -INT8_C( 36), -INT8_C( 30), INT8_C( 29), -INT8_C( 6), INT8_C( 92), -INT8_C( 36), INT8_C( 33), INT8_C( 123), -INT8_C( 83), -INT8_C( 47), -INT8_C( 38), -INT8_C( 61), INT8_C( 110), -INT8_C( 8), -INT8_C( 127), -INT8_C( 13), -INT8_C( 113), INT8_C( 89), INT8_C( 5), -INT8_C( 82), INT8_C( 89), INT8_C( 27), INT8_C( 63), INT8_C( 84), INT8_C( 82), INT8_C( 81), INT8_C( 54), INT8_C( 125), -INT8_C( 104), INT8_C( 98), INT8_C( 6), INT8_C( 116), INT8_C( 68), INT8_C( 35), INT8_C( 110), -INT8_C( 96), -INT8_C( 1), -INT8_C( 113), INT8_C( 27), -INT8_C( 84), INT8_C( 96), -INT8_C( 10), INT8_C( 111), -INT8_C( 49), -INT8_C( 18), -INT8_C( 16), -INT8_C( 62), INT8_C( 125), INT8_C( 74), -INT8_C( 57), INT8_C( 44), -INT8_C( 93), -INT8_C( 30), INT8_C( 107), -INT8_C( 9), INT8_C( 53), -INT8_C( 68), INT8_C( 45), -INT8_C( 78), INT8_C( 84), -INT8_C( 113) }, { -INT8_C( 72), -INT8_C( 56), -INT8_C( 45), -INT8_C( 37), INT8_C( 54), INT8_C( 115), -INT8_C( 38), -INT8_C( 58), -INT8_C( 114), -INT8_C( 122), INT8_C( 38), -INT8_C( 124), -INT8_C( 11), -INT8_C( 11), INT8_C( 115), -INT8_C( 26), -INT8_C( 73), -INT8_C( 16), INT8_C( 48), INT8_C( 126), INT8_C( 28), -INT8_C( 45), INT8_C( 97), -INT8_C( 120), -INT8_C( 54), -INT8_C( 106), INT8_C( 68), -INT8_C( 9), INT8_C( 72), -INT8_C( 103), -INT8_C( 122), INT8_C( 0), INT8_C( 97), INT8_C( 89), -INT8_C( 37), -INT8_C( 104), -INT8_C( 52), -INT8_C( 75), INT8_C( 94), INT8_C( 90), INT8_C( 59), -INT8_C( 124), -INT8_C( 33), INT8_C( 48), INT8_C( 122), INT8_C( 82), INT8_C( 22), INT8_C( 49), INT8_C( 66), INT8_C( 70), -INT8_C( 80), INT8_C( 95), INT8_C( 25), INT8_C( 17), -INT8_C( 25), -INT8_C( 29), -INT8_C( 89), INT8_C( 43), -INT8_C( 38), -INT8_C( 17), -INT8_C( 60), INT8_C( 96), -INT8_C( 17), INT8_C( 38) }, { -INT8_C( 1), -INT8_C( 12), INT8_C( 92), -INT8_C( 73), INT8_C( 24), -INT8_C( 112), -INT8_C( 44), INT8_C( 34), INT8_C( 106), -INT8_C( 89), -INT8_C( 95), INT8_C( 49), -INT8_C( 58), -INT8_C( 49), INT8_C( 54), INT8_C( 84), -INT8_C( 81), INT8_C( 113), INT8_C( 35), INT8_C( 13), INT8_C( 117), -INT8_C( 40), INT8_C( 15), -INT8_C( 31), -INT8_C( 27), -INT8_C( 43), -INT8_C( 104), INT8_C( 73), -INT8_C( 103), -INT8_C( 49), INT8_C( 3), -INT8_C( 104), -INT8_C( 61), INT8_C( 95), INT8_C( 79), -INT8_C( 36), -INT8_C( 17), INT8_C( 35), -INT8_C( 2), INT8_C( 89), -INT8_C( 54), -INT8_C( 97), -INT8_C( 117), -INT8_C( 112), INT8_C( 112), -INT8_C( 63), -INT8_C( 27), INT8_C( 31), INT8_C( 50), INT8_C( 8), INT8_C( 45), -INT8_C( 87), -INT8_C( 32), INT8_C( 61), -INT8_C( 118), -INT8_C( 59), INT8_C( 18), INT8_C( 34), INT8_C( 15), -INT8_C( 85), -INT8_C( 15), INT8_C( 18), INT8_C( 67), -INT8_C( 75) } }, { { -INT8_C( 71), -INT8_C( 54), -INT8_C( 66), -INT8_C( 123), INT8_MAX, INT8_C( 28), -INT8_C( 32), -INT8_C( 70), -INT8_C( 96), -INT8_C( 65), -INT8_C( 22), INT8_C( 26), INT8_C( 17), INT8_C( 1), INT8_C( 76), INT8_C( 83), INT8_C( 71), -INT8_C( 4), -INT8_C( 78), INT8_C( 97), INT8_C( 13), -INT8_C( 103), INT8_C( 68), -INT8_C( 76), -INT8_C( 59), INT8_C( 31), -INT8_C( 93), -INT8_C( 119), INT8_MAX, -INT8_C( 110), -INT8_C( 81), INT8_C( 57), INT8_C( 92), INT8_C( 109), -INT8_C( 66), -INT8_C( 37), -INT8_C( 119), -INT8_C( 98), -INT8_C( 107), INT8_C( 42), INT8_C( 93), INT8_MAX, INT8_C( 68), INT8_C( 110), INT8_MIN, -INT8_C( 112), -INT8_C( 62), -INT8_C( 56), -INT8_C( 116), INT8_C( 116), INT8_C( 41), -INT8_C( 103), INT8_C( 14), INT8_C( 109), INT8_C( 77), -INT8_C( 45), -INT8_C( 116), -INT8_C( 16), INT8_C( 92), INT8_C( 12), -INT8_C( 126), INT8_C( 12), INT8_C( 69), -INT8_C( 34) }, { INT8_C( 121), INT8_C( 3), -INT8_C( 71), INT8_C( 3), -INT8_C( 94), INT8_C( 78), INT8_C( 45), -INT8_C( 1), -INT8_C( 50), INT8_C( 113), INT8_C( 110), INT8_C( 78), INT8_C( 2), INT8_C( 48), INT8_C( 22), -INT8_C( 114), -INT8_C( 92), INT8_C( 63), INT8_C( 40), -INT8_C( 78), -INT8_C( 83), INT8_C( 117), -INT8_C( 123), INT8_C( 57), INT8_C( 102), -INT8_C( 30), INT8_C( 69), -INT8_C( 24), -INT8_C( 18), -INT8_C( 118), -INT8_C( 57), INT8_C( 103), -INT8_C( 114), INT8_MIN, INT8_C( 106), INT8_C( 48), -INT8_C( 49), -INT8_C( 105), INT8_C( 47), -INT8_C( 99), INT8_C( 9), -INT8_C( 99), -INT8_C( 21), INT8_C( 11), -INT8_C( 51), INT8_C( 2), -INT8_C( 103), INT8_C( 114), INT8_C( 65), -INT8_C( 63), INT8_C( 36), -INT8_C( 18), INT8_C( 55), -INT8_C( 86), INT8_C( 40), -INT8_C( 99), -INT8_C( 116), INT8_C( 109), -INT8_C( 123), INT8_C( 122), -INT8_C( 8), INT8_C( 76), -INT8_C( 31), -INT8_C( 122) }, { INT8_C( 50), -INT8_C( 51), INT8_C( 119), -INT8_C( 120), INT8_C( 33), INT8_C( 106), INT8_C( 13), -INT8_C( 71), INT8_C( 110), INT8_C( 48), INT8_C( 88), INT8_C( 104), INT8_C( 19), INT8_C( 49), INT8_C( 98), -INT8_C( 31), -INT8_C( 21), INT8_C( 59), -INT8_C( 38), INT8_C( 19), -INT8_C( 70), INT8_C( 14), -INT8_C( 55), -INT8_C( 19), INT8_C( 43), INT8_C( 1), -INT8_C( 24), INT8_C( 113), INT8_C( 109), INT8_C( 28), INT8_C( 118), -INT8_C( 96), -INT8_C( 22), -INT8_C( 19), INT8_C( 40), INT8_C( 11), INT8_C( 88), INT8_C( 53), -INT8_C( 60), -INT8_C( 57), INT8_C( 102), INT8_C( 28), INT8_C( 47), INT8_C( 121), INT8_C( 77), -INT8_C( 110), INT8_C( 91), INT8_C( 58), -INT8_C( 51), INT8_C( 53), INT8_C( 77), -INT8_C( 121), INT8_C( 69), INT8_C( 23), INT8_C( 117), INT8_C( 112), INT8_C( 24), INT8_C( 93), -INT8_C( 31), -INT8_C( 122), INT8_C( 122), INT8_C( 88), INT8_C( 38), INT8_C( 100) } }, { { -INT8_C( 51), INT8_C( 76), -INT8_C( 74), -INT8_C( 100), -INT8_C( 29), -INT8_C( 27), INT8_C( 57), -INT8_C( 20), -INT8_C( 125), INT8_C( 36), -INT8_C( 9), INT8_C( 80), INT8_C( 38), -INT8_C( 111), -INT8_C( 62), INT8_C( 104), INT8_C( 82), -INT8_C( 25), INT8_C( 86), -INT8_C( 119), -INT8_C( 111), INT8_C( 126), INT8_C( 38), INT8_C( 29), -INT8_C( 20), -INT8_C( 84), -INT8_C( 105), -INT8_C( 28), -INT8_C( 8), INT8_C( 120), INT8_C( 106), -INT8_C( 59), -INT8_C( 60), INT8_C( 32), INT8_C( 97), -INT8_C( 88), INT8_C( 5), -INT8_C( 102), -INT8_C( 108), -INT8_C( 120), -INT8_C( 65), -INT8_C( 116), -INT8_C( 39), -INT8_C( 27), INT8_C( 29), -INT8_C( 101), INT8_C( 77), INT8_C( 111), -INT8_C( 126), -INT8_C( 92), -INT8_C( 7), INT8_C( 19), INT8_C( 34), INT8_C( 31), INT8_C( 48), INT8_C( 14), -INT8_C( 53), -INT8_C( 57), -INT8_C( 14), -INT8_C( 60), INT8_C( 64), INT8_C( 92), -INT8_C( 119), INT8_C( 4) }, { INT8_C( 124), -INT8_C( 21), -INT8_C( 84), -INT8_C( 126), -INT8_C( 123), INT8_C( 65), INT8_C( 10), INT8_C( 68), -INT8_C( 51), -INT8_C( 29), INT8_C( 42), -INT8_C( 22), INT8_MAX, INT8_C( 119), INT8_C( 89), INT8_C( 1), INT8_C( 27), INT8_C( 82), INT8_C( 21), INT8_C( 62), INT8_C( 114), INT8_C( 69), INT8_C( 76), INT8_C( 61), INT8_C( 13), INT8_C( 63), INT8_C( 1), INT8_C( 77), -INT8_C( 101), -INT8_C( 117), INT8_C( 81), INT8_C( 24), INT8_C( 118), -INT8_C( 2), -INT8_C( 102), -INT8_C( 5), INT8_C( 63), -INT8_C( 92), INT8_C( 64), INT8_C( 12), -INT8_C( 120), INT8_C( 106), -INT8_C( 10), INT8_C( 7), -INT8_C( 31), INT8_C( 79), INT8_C( 8), -INT8_C( 3), -INT8_C( 94), INT8_C( 29), INT8_C( 59), INT8_C( 20), INT8_C( 99), -INT8_C( 121), INT8_C( 81), INT8_C( 112), -INT8_C( 58), INT8_C( 83), -INT8_C( 67), INT8_C( 98), -INT8_C( 34), INT8_C( 14), INT8_C( 122), INT8_C( 84) }, { INT8_C( 73), INT8_C( 55), INT8_C( 98), INT8_C( 30), INT8_C( 104), INT8_C( 38), INT8_C( 67), INT8_C( 48), INT8_C( 80), INT8_C( 7), INT8_C( 33), INT8_C( 58), -INT8_C( 91), INT8_C( 8), INT8_C( 27), INT8_C( 105), INT8_C( 109), INT8_C( 57), INT8_C( 107), -INT8_C( 57), INT8_C( 3), -INT8_C( 61), INT8_C( 114), INT8_C( 90), -INT8_C( 7), -INT8_C( 21), -INT8_C( 104), INT8_C( 49), -INT8_C( 109), INT8_C( 3), -INT8_C( 69), -INT8_C( 35), INT8_C( 58), INT8_C( 30), -INT8_C( 5), -INT8_C( 93), INT8_C( 68), INT8_C( 62), -INT8_C( 44), -INT8_C( 108), INT8_C( 71), -INT8_C( 10), -INT8_C( 49), -INT8_C( 20), -INT8_C( 2), -INT8_C( 22), INT8_C( 85), INT8_C( 108), INT8_C( 36), -INT8_C( 63), INT8_C( 52), INT8_C( 39), -INT8_C( 123), -INT8_C( 90), -INT8_C( 127), INT8_C( 126), -INT8_C( 111), INT8_C( 26), -INT8_C( 81), INT8_C( 38), INT8_C( 30), INT8_C( 106), INT8_C( 3), INT8_C( 88) } }, { { INT8_C( 12), INT8_C( 20), INT8_C( 79), INT8_C( 75), -INT8_C( 72), -INT8_C( 113), INT8_C( 87), INT8_C( 64), -INT8_C( 7), INT8_C( 77), INT8_C( 71), -INT8_C( 37), -INT8_C( 99), INT8_C( 80), -INT8_C( 40), INT8_C( 63), INT8_C( 109), INT8_C( 19), INT8_C( 83), -INT8_C( 48), -INT8_C( 102), -INT8_C( 92), INT8_C( 64), INT8_C( 97), -INT8_C( 9), -INT8_C( 3), -INT8_C( 61), -INT8_C( 43), INT8_C( 12), INT8_C( 61), INT8_C( 41), INT8_C( 24), INT8_C( 81), INT8_C( 121), INT8_C( 100), INT8_C( 9), INT8_C( 8), -INT8_C( 69), INT8_C( 74), INT8_C( 2), INT8_C( 9), -INT8_C( 111), -INT8_C( 35), -INT8_C( 90), -INT8_C( 31), -INT8_C( 75), -INT8_C( 27), INT8_C( 79), -INT8_C( 56), INT8_C( 56), INT8_C( 31), INT8_C( 98), -INT8_C( 36), INT8_C( 96), -INT8_C( 61), -INT8_C( 44), INT8_C( 93), -INT8_C( 122), -INT8_C( 87), INT8_C( 105), -INT8_C( 61), -INT8_C( 45), -INT8_C( 126), INT8_C( 20) }, { INT8_C( 76), -INT8_C( 26), INT8_C( 30), INT8_C( 84), -INT8_C( 95), INT8_C( 104), INT8_C( 86), -INT8_C( 86), -INT8_C( 7), INT8_C( 51), INT8_C( 80), -INT8_C( 37), -INT8_C( 24), INT8_C( 53), INT8_C( 42), -INT8_C( 80), INT8_C( 109), INT8_C( 73), INT8_C( 19), INT8_C( 74), -INT8_C( 87), -INT8_C( 42), INT8_C( 30), INT8_C( 7), INT8_C( 93), -INT8_C( 57), INT8_C( 112), INT8_C( 32), -INT8_C( 102), -INT8_C( 14), INT8_C( 53), -INT8_C( 26), -INT8_C( 40), INT8_C( 83), INT8_C( 59), INT8_C( 122), -INT8_C( 69), -INT8_C( 111), INT8_C( 36), -INT8_C( 76), -INT8_C( 59), INT8_C( 117), -INT8_C( 113), -INT8_C( 83), -INT8_C( 86), -INT8_C( 71), INT8_C( 94), INT8_C( 24), INT8_C( 3), INT8_C( 113), INT8_C( 98), -INT8_C( 84), INT8_C( 71), INT8_MIN, -INT8_C( 77), -INT8_C( 92), INT8_C( 71), INT8_C( 36), -INT8_C( 59), -INT8_C( 30), INT8_C( 22), -INT8_C( 6), -INT8_C( 56), -INT8_C( 17) }, { INT8_C( 88), -INT8_C( 6), INT8_C( 109), -INT8_C( 97), INT8_C( 89), -INT8_C( 9), -INT8_C( 83), -INT8_C( 22), -INT8_C( 14), INT8_MIN, -INT8_C( 105), -INT8_C( 74), -INT8_C( 123), -INT8_C( 123), INT8_C( 2), -INT8_C( 17), -INT8_C( 38), INT8_C( 92), INT8_C( 102), INT8_C( 26), INT8_C( 67), INT8_C( 122), INT8_C( 94), INT8_C( 104), INT8_C( 84), -INT8_C( 60), INT8_C( 51), -INT8_C( 11), -INT8_C( 90), INT8_C( 47), INT8_C( 94), -INT8_C( 2), INT8_C( 41), -INT8_C( 52), -INT8_C( 97), -INT8_C( 125), -INT8_C( 61), INT8_C( 76), INT8_C( 110), -INT8_C( 74), -INT8_C( 50), INT8_C( 6), INT8_C( 108), INT8_C( 83), -INT8_C( 117), INT8_C( 110), INT8_C( 67), INT8_C( 103), -INT8_C( 53), -INT8_C( 87), -INT8_C( 127), INT8_C( 14), INT8_C( 35), -INT8_C( 32), INT8_C( 118), INT8_C( 120), -INT8_C( 92), -INT8_C( 86), INT8_C( 110), INT8_C( 75), -INT8_C( 39), -INT8_C( 51), INT8_C( 74), INT8_C( 3) } }, { { INT8_C( 77), INT8_C( 3), INT8_C( 105), INT8_C( 8), -INT8_C( 107), -INT8_C( 115), -INT8_C( 68), INT8_C( 90), INT8_C( 2), INT8_C( 76), INT8_C( 7), -INT8_C( 83), INT8_C( 5), INT8_C( 101), -INT8_C( 59), INT8_C( 8), -INT8_C( 42), INT8_C( 39), -INT8_C( 75), INT8_C( 30), -INT8_C( 89), INT8_C( 104), -INT8_C( 62), -INT8_C( 18), -INT8_C( 116), -INT8_C( 121), -INT8_C( 48), -INT8_C( 93), -INT8_C( 127), -INT8_C( 103), -INT8_C( 110), -INT8_C( 50), -INT8_C( 100), -INT8_C( 5), -INT8_C( 42), INT8_C( 49), -INT8_C( 120), -INT8_C( 109), -INT8_C( 117), -INT8_C( 117), -INT8_C( 33), -INT8_C( 109), INT8_C( 56), -INT8_C( 28), -INT8_C( 8), -INT8_C( 3), -INT8_C( 19), -INT8_C( 49), INT8_C( 36), -INT8_C( 94), -INT8_C( 19), -INT8_C( 53), INT8_C( 10), -INT8_C( 81), -INT8_C( 71), -INT8_C( 105), INT8_C( 55), -INT8_C( 118), INT8_C( 58), -INT8_C( 72), INT8_C( 35), -INT8_C( 52), -INT8_C( 121), -INT8_C( 65) }, { -INT8_C( 57), INT8_C( 93), -INT8_C( 15), INT8_C( 79), -INT8_C( 16), INT8_C( 124), -INT8_C( 38), -INT8_C( 49), INT8_C( 15), INT8_C( 18), -INT8_C( 76), INT8_C( 8), INT8_C( 15), -INT8_C( 95), -INT8_C( 41), INT8_C( 51), INT8_C( 67), -INT8_C( 60), -INT8_C( 2), INT8_C( 77), INT8_C( 115), -INT8_C( 72), -INT8_C( 28), -INT8_C( 86), INT8_C( 66), INT8_C( 30), INT8_C( 99), INT8_C( 101), -INT8_C( 22), -INT8_C( 22), INT8_C( 36), -INT8_C( 79), INT8_C( 71), INT8_C( 21), INT8_C( 1), INT8_C( 56), -INT8_C( 110), -INT8_C( 37), INT8_C( 7), -INT8_C( 95), -INT8_C( 18), -INT8_C( 69), -INT8_C( 87), -INT8_C( 3), INT8_C( 92), INT8_MIN, INT8_C( 49), -INT8_C( 97), INT8_C( 68), INT8_C( 47), -INT8_C( 19), -INT8_C( 72), -INT8_C( 25), -INT8_C( 47), INT8_C( 98), INT8_C( 41), -INT8_C( 16), -INT8_C( 59), -INT8_C( 114), -INT8_C( 38), -INT8_C( 81), -INT8_C( 77), -INT8_C( 116), -INT8_C( 9) }, { INT8_C( 20), INT8_C( 96), INT8_C( 90), INT8_C( 87), -INT8_C( 123), INT8_C( 9), -INT8_C( 106), INT8_C( 41), INT8_C( 17), INT8_C( 94), -INT8_C( 69), -INT8_C( 75), INT8_C( 20), INT8_C( 6), -INT8_C( 100), INT8_C( 59), INT8_C( 25), -INT8_C( 21), -INT8_C( 77), INT8_C( 107), INT8_C( 26), INT8_C( 32), -INT8_C( 90), -INT8_C( 104), -INT8_C( 50), -INT8_C( 91), INT8_C( 51), INT8_C( 8), INT8_C( 107), -INT8_C( 125), -INT8_C( 74), INT8_MAX, -INT8_C( 29), INT8_C( 16), -INT8_C( 41), INT8_C( 105), INT8_C( 26), INT8_C( 110), -INT8_C( 110), INT8_C( 44), -INT8_C( 51), INT8_C( 78), -INT8_C( 31), -INT8_C( 31), INT8_C( 84), INT8_C( 125), INT8_C( 30), INT8_C( 110), INT8_C( 104), -INT8_C( 47), -INT8_C( 38), -INT8_C( 125), -INT8_C( 15), INT8_MIN, INT8_C( 27), -INT8_C( 64), INT8_C( 39), INT8_C( 79), -INT8_C( 56), -INT8_C( 110), -INT8_C( 46), INT8_MAX, INT8_C( 19), -INT8_C( 74) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_add_epi8(a, b); simde_test_x86_assert_equal_i8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_add_epi8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int8_t src[64]; const simde__mmask64 k; const int8_t a[64]; const int8_t b[64]; const int8_t r[64]; } test_vec[] = { { { INT8_C( 20), -INT8_C( 19), INT8_C( 38), INT8_C( 45), -INT8_C( 89), -INT8_C( 95), INT8_C( 34), -INT8_C( 3), -INT8_C( 21), INT8_C( 99), -INT8_C( 35), INT8_C( 73), INT8_C( 3), -INT8_C( 119), INT8_C( 1), -INT8_C( 7), -INT8_C( 123), -INT8_C( 90), INT8_C( 32), INT8_C( 73), -INT8_C( 12), -INT8_C( 115), -INT8_C( 93), INT8_C( 56), INT8_C( 116), INT8_C( 96), -INT8_C( 89), -INT8_C( 108), INT8_C( 46), -INT8_C( 17), INT8_C( 92), INT8_C( 67), -INT8_C( 35), -INT8_C( 125), INT8_C( 112), -INT8_C( 124), INT8_C( 36), -INT8_C( 110), -INT8_C( 127), INT8_C( 15), -INT8_C( 11), INT8_C( 95), INT8_C( 88), -INT8_C( 8), -INT8_C( 24), INT8_C( 89), -INT8_C( 14), INT8_C( 109), -INT8_C( 1), INT8_C( 18), -INT8_C( 73), -INT8_C( 13), -INT8_C( 97), INT8_C( 90), INT8_C( 43), INT8_C( 19), -INT8_C( 69), -INT8_C( 45), -INT8_C( 88), -INT8_C( 23), -INT8_C( 62), INT8_C( 4), INT8_C( 44), -INT8_C( 97) }, UINT64_C( 2646890825868614791), { INT8_C( 4), INT8_C( 19), INT8_C( 28), -INT8_C( 20), INT8_C( 109), INT8_C( 14), INT8_C( 90), INT8_C( 108), INT8_C( 32), INT8_C( 17), INT8_C( 96), -INT8_C( 64), INT8_C( 107), -INT8_C( 117), -INT8_C( 45), INT8_C( 38), INT8_C( 94), INT8_C( 123), INT8_C( 16), INT8_C( 33), INT8_MIN, INT8_C( 60), -INT8_C( 64), INT8_C( 7), -INT8_C( 39), -INT8_C( 28), -INT8_C( 77), INT8_C( 7), -INT8_C( 119), INT8_C( 110), INT8_C( 43), -INT8_C( 115), -INT8_C( 127), INT8_C( 72), INT8_C( 121), -INT8_C( 18), INT8_C( 86), -INT8_C( 45), INT8_C( 91), INT8_C( 119), -INT8_C( 28), -INT8_C( 69), INT8_C( 55), INT8_C( 80), INT8_C( 70), INT8_C( 10), INT8_C( 118), -INT8_C( 91), -INT8_C( 122), -INT8_C( 122), -INT8_C( 58), INT8_C( 6), -INT8_C( 61), -INT8_C( 122), INT8_C( 13), -INT8_C( 100), INT8_C( 106), -INT8_C( 64), -INT8_C( 93), -INT8_C( 13), INT8_C( 46), -INT8_C( 49), INT8_MIN, -INT8_C( 80) }, { INT8_C( 23), -INT8_C( 6), -INT8_C( 98), INT8_C( 109), -INT8_C( 51), -INT8_C( 7), -INT8_C( 28), -INT8_C( 78), -INT8_C( 76), INT8_C( 27), INT8_C( 2), -INT8_C( 5), INT8_C( 38), INT8_C( 120), -INT8_C( 96), -INT8_C( 84), -INT8_C( 1), INT8_C( 102), -INT8_C( 78), -INT8_C( 62), -INT8_C( 20), -INT8_C( 65), INT8_C( 94), INT8_C( 87), INT8_MIN, INT8_C( 1), INT8_C( 74), -INT8_C( 82), -INT8_C( 48), -INT8_C( 53), INT8_C( 94), -INT8_C( 25), -INT8_C( 59), -INT8_C( 3), INT8_C( 85), -INT8_C( 110), -INT8_C( 10), INT8_C( 57), INT8_C( 68), -INT8_C( 85), INT8_C( 85), INT8_C( 70), -INT8_C( 90), INT8_C( 123), -INT8_C( 65), INT8_C( 70), INT8_C( 39), -INT8_C( 66), -INT8_C( 84), -INT8_C( 39), INT8_MIN, -INT8_C( 104), -INT8_C( 104), -INT8_C( 34), -INT8_C( 17), INT8_C( 24), -INT8_C( 33), INT8_C( 58), -INT8_C( 57), -INT8_C( 80), INT8_C( 5), INT8_C( 37), -INT8_C( 105), -INT8_C( 54) }, { INT8_C( 27), INT8_C( 13), -INT8_C( 70), INT8_C( 45), -INT8_C( 89), -INT8_C( 95), INT8_C( 34), INT8_C( 30), -INT8_C( 21), INT8_C( 99), INT8_C( 98), -INT8_C( 69), -INT8_C( 111), -INT8_C( 119), INT8_C( 1), -INT8_C( 46), INT8_C( 93), -INT8_C( 31), INT8_C( 32), INT8_C( 73), -INT8_C( 12), -INT8_C( 5), -INT8_C( 93), INT8_C( 56), INT8_C( 89), -INT8_C( 27), -INT8_C( 89), -INT8_C( 75), INT8_C( 46), INT8_C( 57), INT8_C( 92), INT8_C( 116), -INT8_C( 35), INT8_C( 69), -INT8_C( 50), INT8_MIN, INT8_C( 36), INT8_C( 12), -INT8_C( 127), INT8_C( 15), INT8_C( 57), INT8_C( 95), -INT8_C( 35), -INT8_C( 8), -INT8_C( 24), INT8_C( 80), -INT8_C( 14), INT8_C( 99), INT8_C( 50), INT8_C( 95), -INT8_C( 73), -INT8_C( 98), INT8_C( 91), INT8_C( 100), INT8_C( 43), -INT8_C( 76), -INT8_C( 69), -INT8_C( 45), INT8_C( 106), -INT8_C( 23), -INT8_C( 62), -INT8_C( 12), INT8_C( 44), -INT8_C( 97) } }, { { INT8_C( 34), -INT8_C( 20), INT8_C( 92), INT8_C( 25), INT8_C( 38), -INT8_C( 95), -INT8_C( 60), INT8_C( 123), -INT8_C( 25), INT8_C( 106), -INT8_C( 10), -INT8_C( 90), -INT8_C( 80), INT8_C( 29), INT8_C( 100), INT8_C( 92), -INT8_C( 10), -INT8_C( 28), -INT8_C( 12), -INT8_C( 114), -INT8_C( 62), -INT8_C( 28), -INT8_C( 89), -INT8_C( 94), INT8_C( 30), INT8_C( 110), INT8_C( 82), INT8_C( 35), -INT8_C( 109), -INT8_C( 23), -INT8_C( 19), -INT8_C( 74), -INT8_C( 42), INT8_C( 73), -INT8_C( 49), -INT8_C( 4), -INT8_C( 22), -INT8_C( 109), INT8_C( 119), -INT8_C( 46), -INT8_C( 3), INT8_C( 109), INT8_C( 120), -INT8_C( 83), -INT8_C( 118), -INT8_C( 35), INT8_C( 9), INT8_MIN, -INT8_C( 63), -INT8_C( 3), INT8_C( 14), -INT8_C( 124), -INT8_C( 31), -INT8_C( 75), INT8_C( 38), -INT8_C( 1), INT8_C( 35), INT8_C( 120), INT8_C( 34), -INT8_C( 73), INT8_C( 97), INT8_C( 15), INT8_C( 109), INT8_C( 55) }, UINT64_C(14705847965410606169), { INT8_C( 23), -INT8_C( 114), INT8_C( 121), -INT8_C( 95), INT8_C( 107), -INT8_C( 126), INT8_C( 33), INT8_C( 44), INT8_MAX, INT8_C( 48), -INT8_C( 80), INT8_C( 97), -INT8_C( 27), -INT8_C( 42), INT8_C( 96), INT8_C( 9), INT8_C( 78), -INT8_C( 125), -INT8_C( 64), -INT8_C( 80), -INT8_C( 110), INT8_C( 45), -INT8_C( 25), -INT8_C( 21), INT8_C( 105), INT8_C( 27), INT8_C( 47), INT8_C( 56), -INT8_C( 59), INT8_C( 68), INT8_C( 4), -INT8_C( 35), -INT8_C( 46), INT8_C( 125), INT8_C( 126), INT8_C( 61), -INT8_C( 1), -INT8_C( 96), INT8_C( 106), INT8_C( 126), -INT8_C( 48), INT8_C( 26), -INT8_C( 33), -INT8_C( 75), -INT8_C( 15), INT8_C( 64), -INT8_C( 66), INT8_C( 63), -INT8_C( 61), INT8_C( 126), -INT8_C( 17), INT8_C( 85), -INT8_C( 85), -INT8_C( 41), INT8_C( 65), INT8_C( 20), -INT8_C( 14), INT8_C( 112), INT8_C( 76), -INT8_C( 73), -INT8_C( 76), INT8_C( 80), -INT8_C( 108), -INT8_C( 121) }, { -INT8_C( 51), INT8_C( 19), -INT8_C( 60), -INT8_C( 52), -INT8_C( 77), INT8_C( 46), INT8_C( 75), -INT8_C( 125), INT8_C( 73), INT8_C( 42), INT8_C( 56), INT8_C( 58), INT8_C( 106), -INT8_C( 9), INT8_C( 121), INT8_C( 45), INT8_C( 117), INT8_C( 105), -INT8_C( 125), INT8_C( 33), INT8_C( 64), -INT8_C( 60), INT8_C( 53), INT8_C( 50), INT8_C( 52), -INT8_C( 126), -INT8_C( 23), -INT8_C( 24), -INT8_C( 46), INT8_C( 126), INT8_C( 111), -INT8_C( 96), -INT8_C( 111), INT8_C( 52), INT8_C( 108), INT8_C( 68), INT8_C( 98), -INT8_C( 73), -INT8_C( 57), -INT8_C( 85), -INT8_C( 30), -INT8_C( 1), -INT8_C( 27), INT8_C( 76), -INT8_C( 10), INT8_C( 95), INT8_C( 122), INT8_C( 108), -INT8_C( 56), -INT8_C( 3), -INT8_C( 115), INT8_C( 8), -INT8_C( 63), -INT8_C( 62), INT8_C( 58), -INT8_C( 11), INT8_C( 68), INT8_C( 35), -INT8_C( 35), INT8_C( 23), -INT8_C( 95), INT8_C( 77), -INT8_C( 73), INT8_C( 50) }, { -INT8_C( 28), -INT8_C( 20), INT8_C( 92), INT8_C( 109), INT8_C( 30), -INT8_C( 95), INT8_C( 108), INT8_C( 123), -INT8_C( 25), INT8_C( 106), -INT8_C( 24), -INT8_C( 101), INT8_C( 79), -INT8_C( 51), INT8_C( 100), INT8_C( 92), -INT8_C( 61), -INT8_C( 20), -INT8_C( 12), -INT8_C( 114), -INT8_C( 46), -INT8_C( 15), -INT8_C( 89), -INT8_C( 94), -INT8_C( 99), -INT8_C( 99), INT8_C( 82), INT8_C( 35), -INT8_C( 109), -INT8_C( 23), INT8_C( 115), -INT8_C( 74), INT8_C( 99), -INT8_C( 79), -INT8_C( 22), -INT8_C( 127), -INT8_C( 22), -INT8_C( 109), INT8_C( 49), INT8_C( 41), -INT8_C( 3), INT8_C( 25), INT8_C( 120), INT8_C( 1), -INT8_C( 118), -INT8_C( 97), INT8_C( 9), -INT8_C( 85), -INT8_C( 117), -INT8_C( 3), INT8_C( 124), -INT8_C( 124), INT8_C( 108), -INT8_C( 75), INT8_C( 38), -INT8_C( 1), INT8_C( 35), INT8_C( 120), INT8_C( 41), -INT8_C( 50), INT8_C( 97), INT8_C( 15), INT8_C( 75), -INT8_C( 71) } }, { { -INT8_C( 127), INT8_C( 35), INT8_C( 118), -INT8_C( 29), -INT8_C( 37), INT8_C( 61), -INT8_C( 113), -INT8_C( 67), INT8_C( 61), INT8_C( 116), INT8_C( 9), INT8_C( 51), -INT8_C( 45), -INT8_C( 125), -INT8_C( 97), -INT8_C( 101), INT8_MIN, INT8_C( 44), -INT8_C( 93), INT8_C( 65), -INT8_C( 17), -INT8_C( 35), INT8_C( 54), INT8_C( 51), INT8_C( 1), INT8_C( 20), INT8_C( 74), -INT8_C( 94), INT8_C( 97), INT8_C( 1), -INT8_C( 43), -INT8_C( 30), INT8_C( 37), INT8_C( 75), -INT8_C( 59), INT8_C( 0), -INT8_C( 119), INT8_C( 84), -INT8_C( 67), -INT8_C( 58), -INT8_C( 55), -INT8_C( 58), -INT8_C( 7), -INT8_C( 100), INT8_C( 74), -INT8_C( 103), INT8_C( 56), -INT8_C( 54), -INT8_C( 59), -INT8_C( 37), INT8_C( 12), -INT8_C( 76), -INT8_C( 71), INT8_C( 66), -INT8_C( 24), -INT8_C( 70), INT8_C( 86), INT8_C( 50), INT8_C( 92), -INT8_C( 73), INT8_C( 52), INT8_C( 49), -INT8_C( 103), INT8_C( 89) }, UINT64_C( 8992587514113515389), { -INT8_C( 36), -INT8_C( 59), INT8_C( 25), INT8_C( 38), INT8_C( 94), INT8_C( 81), -INT8_C( 15), INT8_C( 36), INT8_C( 44), -INT8_C( 3), -INT8_C( 40), -INT8_C( 27), INT8_C( 63), -INT8_C( 64), -INT8_C( 97), -INT8_C( 106), -INT8_C( 13), -INT8_C( 4), INT8_C( 77), INT8_C( 39), INT8_C( 45), -INT8_C( 25), INT8_MIN, -INT8_C( 86), INT8_C( 70), -INT8_C( 39), -INT8_C( 80), -INT8_C( 7), -INT8_C( 17), INT8_C( 124), INT8_C( 118), -INT8_C( 53), INT8_C( 66), -INT8_C( 113), -INT8_C( 14), -INT8_C( 96), -INT8_C( 32), -INT8_C( 29), -INT8_C( 60), INT8_C( 12), -INT8_C( 32), -INT8_C( 99), -INT8_C( 14), INT8_C( 31), INT8_C( 93), -INT8_C( 111), -INT8_C( 75), INT8_C( 80), -INT8_C( 115), INT8_C( 3), INT8_C( 119), -INT8_C( 69), -INT8_C( 22), -INT8_C( 9), INT8_C( 101), INT8_C( 48), -INT8_C( 48), INT8_C( 22), INT8_C( 41), -INT8_C( 65), -INT8_C( 110), -INT8_C( 97), -INT8_C( 117), -INT8_C( 44) }, { INT8_C( 46), INT8_C( 125), INT8_C( 117), INT8_C( 14), INT8_C( 96), INT8_C( 57), INT8_C( 27), INT8_C( 64), -INT8_C( 42), INT8_C( 13), INT8_C( 95), INT8_C( 52), -INT8_C( 98), INT8_C( 21), -INT8_C( 124), INT8_C( 44), INT8_C( 24), -INT8_C( 4), -INT8_C( 25), INT8_C( 2), -INT8_C( 13), INT8_C( 76), INT8_C( 50), -INT8_C( 60), INT8_C( 98), INT8_C( 91), -INT8_C( 125), -INT8_C( 11), -INT8_C( 5), INT8_C( 14), -INT8_C( 55), INT8_C( 41), -INT8_C( 117), INT8_C( 62), INT8_C( 56), -INT8_C( 21), INT8_C( 120), INT8_C( 83), INT8_C( 43), INT8_C( 78), INT8_C( 96), -INT8_C( 117), -INT8_C( 126), -INT8_C( 2), -INT8_C( 96), INT8_C( 7), INT8_C( 42), -INT8_C( 72), INT8_C( 3), INT8_C( 17), -INT8_C( 70), -INT8_C( 10), INT8_C( 94), -INT8_C( 20), -INT8_C( 70), -INT8_C( 64), INT8_C( 71), INT8_C( 62), -INT8_C( 75), INT8_C( 66), INT8_C( 76), INT8_MAX, INT8_C( 108), -INT8_C( 40) }, { INT8_C( 10), INT8_C( 35), -INT8_C( 114), INT8_C( 52), -INT8_C( 66), -INT8_C( 118), INT8_C( 12), -INT8_C( 67), INT8_C( 2), INT8_C( 10), INT8_C( 55), INT8_C( 25), -INT8_C( 35), -INT8_C( 125), INT8_C( 35), -INT8_C( 101), INT8_C( 11), INT8_C( 44), -INT8_C( 93), INT8_C( 41), INT8_C( 32), -INT8_C( 35), -INT8_C( 78), INT8_C( 51), INT8_C( 1), INT8_C( 52), INT8_C( 51), -INT8_C( 94), INT8_C( 97), INT8_C( 1), -INT8_C( 43), -INT8_C( 30), -INT8_C( 51), -INT8_C( 51), -INT8_C( 59), INT8_C( 0), INT8_C( 88), INT8_C( 54), -INT8_C( 67), INT8_C( 90), -INT8_C( 55), INT8_C( 40), INT8_C( 116), -INT8_C( 100), -INT8_C( 3), -INT8_C( 103), INT8_C( 56), -INT8_C( 54), -INT8_C( 59), -INT8_C( 37), INT8_C( 49), -INT8_C( 79), -INT8_C( 71), INT8_C( 66), INT8_C( 31), -INT8_C( 16), INT8_C( 86), INT8_C( 50), -INT8_C( 34), INT8_C( 1), -INT8_C( 34), INT8_C( 30), -INT8_C( 9), INT8_C( 89) } }, { { -INT8_C( 67), -INT8_C( 92), -INT8_C( 61), INT8_C( 53), -INT8_C( 9), -INT8_C( 17), -INT8_C( 124), INT8_C( 87), INT8_C( 122), INT8_C( 6), INT8_C( 85), INT8_C( 26), INT8_C( 13), INT8_MIN, -INT8_C( 46), INT8_C( 16), -INT8_C( 111), -INT8_C( 116), INT8_C( 7), -INT8_C( 17), INT8_C( 120), -INT8_C( 63), -INT8_C( 80), -INT8_C( 65), -INT8_C( 1), INT8_C( 101), INT8_C( 2), INT8_C( 76), -INT8_C( 28), INT8_C( 110), INT8_C( 36), -INT8_C( 94), INT8_C( 18), -INT8_C( 25), -INT8_C( 41), INT8_C( 9), -INT8_C( 42), INT8_C( 91), INT8_C( 96), INT8_C( 80), INT8_C( 98), -INT8_C( 75), INT8_C( 106), INT8_C( 111), INT8_C( 53), INT8_C( 60), INT8_MIN, -INT8_C( 57), -INT8_C( 56), -INT8_C( 121), -INT8_C( 74), INT8_C( 64), INT8_C( 72), INT8_C( 102), INT8_C( 0), INT8_C( 72), -INT8_C( 52), INT8_C( 2), -INT8_C( 108), -INT8_C( 80), INT8_C( 112), -INT8_C( 72), INT8_C( 82), -INT8_C( 126) }, UINT64_C(16701295226602072735), { -INT8_C( 96), INT8_C( 49), INT8_C( 87), -INT8_C( 42), INT8_C( 109), -INT8_C( 41), -INT8_C( 99), INT8_C( 54), INT8_C( 94), INT8_C( 83), INT8_C( 118), -INT8_C( 90), -INT8_C( 70), INT8_C( 118), -INT8_C( 18), -INT8_C( 122), INT8_C( 120), -INT8_C( 126), INT8_C( 54), -INT8_C( 24), INT8_C( 58), -INT8_C( 119), INT8_C( 106), -INT8_C( 38), -INT8_C( 77), -INT8_C( 11), INT8_C( 80), INT8_C( 56), -INT8_C( 32), INT8_C( 22), INT8_C( 32), -INT8_C( 127), INT8_C( 71), INT8_C( 119), INT8_C( 87), -INT8_C( 75), INT8_C( 78), -INT8_C( 12), -INT8_C( 21), -INT8_C( 84), INT8_C( 71), INT8_C( 97), INT8_C( 82), INT8_C( 1), -INT8_C( 40), INT8_C( 65), -INT8_C( 121), INT8_C( 80), -INT8_C( 61), -INT8_C( 66), INT8_C( 57), -INT8_C( 2), INT8_C( 71), -INT8_C( 93), -INT8_C( 40), -INT8_C( 6), -INT8_C( 103), INT8_C( 40), INT8_C( 50), INT8_C( 121), INT8_C( 62), INT8_C( 82), -INT8_C( 6), -INT8_C( 122) }, { -INT8_C( 55), INT8_C( 81), INT8_C( 59), INT8_C( 23), INT8_C( 69), INT8_C( 38), -INT8_C( 61), -INT8_C( 115), -INT8_C( 121), INT8_C( 22), -INT8_C( 114), INT8_C( 95), INT8_C( 87), INT8_C( 22), -INT8_C( 80), INT8_C( 26), -INT8_C( 44), -INT8_C( 23), INT8_C( 24), INT8_C( 27), -INT8_C( 116), -INT8_C( 16), INT8_C( 21), INT8_C( 37), INT8_C( 24), INT8_C( 71), -INT8_C( 97), INT8_C( 87), -INT8_C( 102), -INT8_C( 103), -INT8_C( 35), INT8_C( 99), -INT8_C( 21), INT8_C( 24), INT8_C( 123), INT8_C( 48), INT8_C( 62), INT8_C( 62), -INT8_C( 67), -INT8_C( 59), INT8_C( 84), INT8_C( 76), INT8_C( 37), -INT8_C( 85), INT8_C( 98), -INT8_C( 43), -INT8_C( 58), INT8_C( 54), -INT8_C( 66), -INT8_C( 34), INT8_C( 81), INT8_C( 74), -INT8_C( 49), INT8_C( 102), INT8_C( 112), -INT8_C( 25), -INT8_C( 83), INT8_C( 15), INT8_C( 62), INT8_C( 71), -INT8_C( 88), INT8_C( 27), -INT8_C( 85), -INT8_C( 109) }, { INT8_C( 105), -INT8_C( 126), -INT8_C( 110), -INT8_C( 19), -INT8_C( 78), -INT8_C( 17), -INT8_C( 124), -INT8_C( 61), INT8_C( 122), INT8_C( 105), INT8_C( 85), INT8_C( 5), INT8_C( 13), -INT8_C( 116), -INT8_C( 46), INT8_C( 16), INT8_C( 76), INT8_C( 107), INT8_C( 7), INT8_C( 3), INT8_C( 120), -INT8_C( 63), -INT8_C( 80), -INT8_C( 1), -INT8_C( 1), INT8_C( 60), -INT8_C( 17), INT8_C( 76), INT8_C( 122), -INT8_C( 81), -INT8_C( 3), -INT8_C( 94), INT8_C( 50), -INT8_C( 25), -INT8_C( 46), INT8_C( 9), -INT8_C( 42), INT8_C( 91), INT8_C( 96), INT8_C( 113), -INT8_C( 101), -INT8_C( 83), INT8_C( 106), -INT8_C( 84), INT8_C( 53), INT8_C( 22), INT8_C( 77), -INT8_C( 122), -INT8_C( 56), -INT8_C( 100), -INT8_C( 118), INT8_C( 64), INT8_C( 72), INT8_C( 102), INT8_C( 72), -INT8_C( 31), INT8_C( 70), INT8_C( 55), INT8_C( 112), -INT8_C( 80), INT8_C( 112), INT8_C( 109), -INT8_C( 91), INT8_C( 25) } }, { { INT8_C( 51), INT8_C( 38), -INT8_C( 60), INT8_C( 113), INT8_C( 100), -INT8_C( 127), INT8_C( 55), -INT8_C( 71), -INT8_C( 51), INT8_C( 92), INT8_C( 100), INT8_C( 47), INT8_C( 49), INT8_C( 42), INT8_C( 101), -INT8_C( 17), INT8_C( 9), -INT8_C( 74), INT8_C( 57), -INT8_C( 40), INT8_C( 28), -INT8_C( 87), -INT8_C( 65), -INT8_C( 54), -INT8_C( 72), -INT8_C( 2), INT8_C( 17), INT8_C( 97), INT8_C( 25), -INT8_C( 68), -INT8_C( 12), INT8_C( 77), -INT8_C( 30), -INT8_C( 72), -INT8_C( 66), INT8_C( 71), INT8_C( 58), -INT8_C( 11), INT8_C( 0), INT8_C( 7), INT8_C( 81), INT8_C( 100), INT8_C( 55), -INT8_C( 126), -INT8_C( 113), -INT8_C( 100), INT8_C( 113), -INT8_C( 104), INT8_C( 83), -INT8_C( 85), INT8_C( 112), INT8_C( 111), INT8_C( 84), INT8_C( 47), INT8_C( 57), INT8_C( 13), INT8_C( 45), INT8_C( 75), INT8_C( 110), INT8_C( 71), INT8_C( 7), INT8_C( 98), -INT8_C( 108), -INT8_C( 22) }, UINT64_C(11050761772397056539), { -INT8_C( 107), -INT8_C( 109), INT8_C( 28), INT8_C( 36), INT8_C( 48), -INT8_C( 115), -INT8_C( 68), -INT8_C( 125), INT8_C( 56), INT8_C( 44), -INT8_C( 14), -INT8_C( 115), INT8_C( 92), INT8_C( 44), -INT8_C( 102), -INT8_C( 119), INT8_C( 119), INT8_C( 8), -INT8_C( 48), INT8_C( 126), INT8_C( 106), INT8_C( 100), INT8_C( 104), -INT8_C( 123), -INT8_C( 73), -INT8_C( 103), -INT8_C( 38), -INT8_C( 1), -INT8_C( 54), INT8_C( 55), -INT8_C( 104), INT8_C( 96), -INT8_C( 54), -INT8_C( 76), -INT8_C( 124), -INT8_C( 6), INT8_C( 66), INT8_C( 65), INT8_C( 125), INT8_C( 122), INT8_C( 109), INT8_C( 112), INT8_C( 7), -INT8_C( 55), -INT8_C( 100), -INT8_C( 95), INT8_C( 83), INT8_C( 19), -INT8_C( 87), INT8_C( 35), -INT8_C( 111), INT8_C( 20), -INT8_C( 120), -INT8_C( 6), -INT8_C( 103), INT8_C( 63), -INT8_C( 109), INT8_C( 116), INT8_C( 62), INT8_C( 94), -INT8_C( 85), -INT8_C( 42), -INT8_C( 66), INT8_C( 117) }, { -INT8_C( 117), INT8_C( 66), INT8_C( 112), -INT8_C( 51), -INT8_C( 125), -INT8_C( 19), INT8_C( 71), -INT8_C( 15), INT8_C( 93), INT8_C( 79), -INT8_C( 70), -INT8_C( 7), -INT8_C( 16), INT8_C( 13), INT8_C( 12), -INT8_C( 102), INT8_C( 49), -INT8_C( 98), -INT8_C( 82), -INT8_C( 71), -INT8_C( 104), INT8_C( 71), -INT8_C( 8), INT8_C( 43), -INT8_C( 69), INT8_C( 54), -INT8_C( 119), INT8_C( 102), INT8_C( 12), INT8_C( 71), -INT8_C( 36), -INT8_C( 105), -INT8_C( 118), INT8_C( 76), INT8_C( 100), INT8_C( 13), INT8_C( 57), -INT8_C( 84), -INT8_C( 2), -INT8_C( 105), -INT8_C( 5), -INT8_C( 71), -INT8_C( 112), -INT8_C( 21), -INT8_C( 58), -INT8_C( 99), -INT8_C( 123), -INT8_C( 9), INT8_C( 59), INT8_C( 51), -INT8_C( 80), -INT8_C( 45), INT8_C( 123), -INT8_C( 88), -INT8_C( 2), INT8_C( 54), -INT8_C( 34), -INT8_C( 120), -INT8_C( 99), -INT8_C( 21), -INT8_C( 49), INT8_C( 121), -INT8_C( 126), INT8_C( 89) }, { INT8_C( 32), -INT8_C( 43), -INT8_C( 60), -INT8_C( 15), -INT8_C( 77), -INT8_C( 127), INT8_C( 55), -INT8_C( 71), -INT8_C( 51), INT8_C( 123), INT8_C( 100), INT8_C( 47), INT8_C( 76), INT8_C( 42), -INT8_C( 90), -INT8_C( 17), -INT8_C( 88), -INT8_C( 74), INT8_C( 57), -INT8_C( 40), INT8_C( 2), -INT8_C( 85), -INT8_C( 65), -INT8_C( 54), INT8_C( 114), -INT8_C( 2), INT8_C( 99), INT8_C( 97), -INT8_C( 42), -INT8_C( 68), INT8_C( 116), INT8_C( 77), -INT8_C( 30), -INT8_C( 72), -INT8_C( 66), INT8_C( 7), INT8_C( 58), -INT8_C( 11), INT8_C( 123), INT8_C( 7), INT8_C( 104), INT8_C( 100), INT8_C( 55), -INT8_C( 126), INT8_C( 98), INT8_C( 62), INT8_C( 113), -INT8_C( 104), INT8_C( 83), -INT8_C( 85), INT8_C( 65), -INT8_C( 25), INT8_C( 3), INT8_C( 47), -INT8_C( 105), INT8_C( 13), INT8_C( 113), INT8_C( 75), INT8_C( 110), INT8_C( 73), INT8_C( 122), INT8_C( 98), -INT8_C( 108), -INT8_C( 50) } }, { { -INT8_C( 59), -INT8_C( 25), INT8_C( 103), -INT8_C( 2), -INT8_C( 109), INT8_C( 101), -INT8_C( 107), -INT8_C( 114), INT8_C( 30), INT8_C( 38), INT8_C( 121), -INT8_C( 27), -INT8_C( 61), -INT8_C( 1), -INT8_C( 36), -INT8_C( 2), INT8_C( 50), -INT8_C( 115), -INT8_C( 47), -INT8_C( 83), INT8_C( 53), -INT8_C( 49), -INT8_C( 28), INT8_C( 20), INT8_C( 87), -INT8_C( 127), -INT8_C( 1), INT8_C( 39), -INT8_C( 6), -INT8_C( 127), INT8_MIN, -INT8_C( 65), INT8_C( 104), -INT8_C( 25), -INT8_C( 67), -INT8_C( 5), INT8_C( 77), INT8_C( 83), -INT8_C( 119), INT8_C( 107), INT8_C( 121), INT8_C( 3), INT8_C( 80), INT8_C( 60), INT8_C( 2), INT8_C( 45), INT8_C( 58), INT8_C( 52), -INT8_C( 70), INT8_C( 11), -INT8_C( 30), -INT8_C( 17), -INT8_C( 38), -INT8_C( 58), INT8_C( 3), INT8_C( 50), INT8_C( 71), INT8_C( 2), INT8_C( 89), INT8_C( 65), -INT8_C( 124), -INT8_C( 39), INT8_C( 0), -INT8_C( 20) }, UINT64_C( 9906073169161665985), { INT8_C( 116), -INT8_C( 54), -INT8_C( 59), INT8_C( 118), -INT8_C( 9), -INT8_C( 1), -INT8_C( 85), -INT8_C( 79), INT8_C( 10), -INT8_C( 115), -INT8_C( 96), -INT8_C( 27), INT8_C( 83), -INT8_C( 92), INT8_C( 23), -INT8_C( 102), -INT8_C( 90), INT8_C( 112), -INT8_C( 37), INT8_C( 42), INT8_C( 73), -INT8_C( 37), INT8_C( 23), INT8_C( 10), -INT8_C( 104), -INT8_C( 1), INT8_C( 24), -INT8_C( 87), INT8_C( 112), -INT8_C( 110), INT8_C( 50), -INT8_C( 27), INT8_C( 92), -INT8_C( 8), INT8_C( 91), INT8_C( 83), -INT8_C( 9), INT8_C( 6), INT8_C( 4), INT8_C( 2), -INT8_C( 109), -INT8_C( 92), -INT8_C( 25), -INT8_C( 26), INT8_C( 72), -INT8_C( 2), INT8_MIN, -INT8_C( 17), INT8_C( 110), INT8_C( 91), INT8_C( 25), -INT8_C( 73), INT8_C( 54), INT8_C( 48), -INT8_C( 62), -INT8_C( 49), INT8_C( 47), -INT8_C( 38), INT8_C( 120), -INT8_C( 96), INT8_C( 108), -INT8_C( 86), -INT8_C( 123), -INT8_C( 56) }, { -INT8_C( 94), -INT8_C( 32), INT8_C( 27), -INT8_C( 102), -INT8_C( 25), INT8_C( 31), -INT8_C( 100), INT8_C( 122), -INT8_C( 60), -INT8_C( 125), INT8_C( 97), INT8_C( 12), -INT8_C( 127), -INT8_C( 31), -INT8_C( 5), -INT8_C( 17), INT8_C( 61), INT8_C( 21), -INT8_C( 90), INT8_C( 115), INT8_C( 69), INT8_C( 104), INT8_C( 66), INT8_C( 117), INT8_C( 67), -INT8_C( 70), INT8_C( 21), -INT8_C( 81), INT8_C( 101), -INT8_C( 102), INT8_C( 120), INT8_C( 7), INT8_C( 122), -INT8_C( 109), -INT8_C( 95), INT8_C( 97), -INT8_C( 77), INT8_C( 61), -INT8_C( 36), INT8_C( 119), -INT8_C( 64), INT8_C( 61), -INT8_C( 125), INT8_C( 65), INT8_C( 30), INT8_MAX, INT8_C( 48), INT8_C( 91), -INT8_C( 108), -INT8_C( 41), -INT8_C( 49), -INT8_C( 39), INT8_C( 63), INT8_C( 17), INT8_C( 78), -INT8_C( 126), -INT8_C( 52), INT8_C( 99), INT8_C( 50), INT8_C( 49), -INT8_C( 3), -INT8_C( 86), INT8_C( 56), INT8_C( 120) }, { INT8_C( 22), -INT8_C( 25), INT8_C( 103), -INT8_C( 2), -INT8_C( 109), INT8_C( 101), INT8_C( 71), INT8_C( 43), -INT8_C( 50), INT8_C( 38), INT8_C( 1), -INT8_C( 15), -INT8_C( 44), -INT8_C( 123), -INT8_C( 36), -INT8_C( 119), INT8_C( 50), -INT8_C( 115), -INT8_C( 47), -INT8_C( 99), INT8_C( 53), INT8_C( 67), INT8_C( 89), INT8_MAX, INT8_C( 87), -INT8_C( 71), INT8_C( 45), INT8_C( 88), -INT8_C( 6), -INT8_C( 127), INT8_MIN, -INT8_C( 65), INT8_C( 104), -INT8_C( 25), -INT8_C( 67), -INT8_C( 5), -INT8_C( 86), INT8_C( 83), -INT8_C( 119), INT8_C( 107), INT8_C( 83), INT8_C( 3), INT8_C( 80), INT8_C( 60), INT8_C( 102), INT8_C( 125), -INT8_C( 80), INT8_C( 52), INT8_C( 2), INT8_C( 11), -INT8_C( 30), -INT8_C( 112), INT8_C( 117), INT8_C( 65), INT8_C( 16), INT8_C( 50), -INT8_C( 5), INT8_C( 2), INT8_C( 89), -INT8_C( 47), -INT8_C( 124), -INT8_C( 39), INT8_C( 0), INT8_C( 64) } }, { { INT8_C( 61), -INT8_C( 38), -INT8_C( 39), -INT8_C( 16), INT8_C( 23), -INT8_C( 75), INT8_C( 103), -INT8_C( 40), -INT8_C( 14), -INT8_C( 21), INT8_C( 25), INT8_C( 17), INT8_C( 106), INT8_C( 74), INT8_C( 108), -INT8_C( 2), INT8_C( 33), INT8_C( 59), -INT8_C( 41), INT8_C( 96), INT8_C( 77), INT8_C( 38), -INT8_C( 29), INT8_C( 25), -INT8_C( 119), INT8_C( 21), INT8_C( 74), -INT8_C( 121), -INT8_C( 65), -INT8_C( 126), -INT8_C( 1), -INT8_C( 4), INT8_C( 92), -INT8_C( 40), -INT8_C( 19), INT8_C( 116), -INT8_C( 114), INT8_C( 84), INT8_C( 76), INT8_MIN, INT8_C( 63), INT8_C( 101), -INT8_C( 111), -INT8_C( 87), -INT8_C( 81), -INT8_C( 2), -INT8_C( 89), -INT8_C( 48), INT8_C( 57), INT8_MAX, INT8_C( 49), -INT8_C( 122), -INT8_C( 91), INT8_C( 20), -INT8_C( 97), INT8_C( 46), INT8_C( 41), -INT8_C( 23), -INT8_C( 75), -INT8_C( 24), INT8_C( 108), -INT8_C( 76), -INT8_C( 28), -INT8_C( 56) }, UINT64_C( 7321595316467978637), { -INT8_C( 18), INT8_C( 45), INT8_C( 15), -INT8_C( 99), INT8_C( 43), -INT8_C( 74), INT8_C( 110), INT8_C( 100), INT8_C( 53), -INT8_C( 97), -INT8_C( 21), -INT8_C( 38), -INT8_C( 77), -INT8_C( 118), INT8_C( 9), -INT8_C( 36), INT8_C( 116), -INT8_C( 66), -INT8_C( 60), -INT8_C( 32), INT8_C( 115), -INT8_C( 88), -INT8_C( 88), INT8_C( 0), INT8_C( 122), -INT8_C( 27), INT8_C( 27), -INT8_C( 96), INT8_C( 109), -INT8_C( 74), INT8_C( 5), INT8_C( 91), -INT8_C( 29), INT8_C( 20), -INT8_C( 7), INT8_C( 14), -INT8_C( 53), INT8_C( 103), INT8_C( 115), INT8_C( 0), INT8_C( 6), INT8_C( 94), -INT8_C( 37), -INT8_C( 71), -INT8_C( 24), -INT8_C( 28), -INT8_C( 107), INT8_C( 92), -INT8_C( 94), INT8_C( 89), INT8_C( 60), INT8_C( 21), INT8_C( 1), -INT8_C( 27), INT8_C( 21), INT8_C( 123), -INT8_C( 54), INT8_C( 48), INT8_C( 27), INT8_C( 55), -INT8_C( 25), INT8_C( 33), -INT8_C( 109), -INT8_C( 54) }, { INT8_C( 53), -INT8_C( 116), -INT8_C( 39), INT8_C( 0), -INT8_C( 13), INT8_C( 76), INT8_C( 1), -INT8_C( 7), -INT8_C( 86), -INT8_C( 36), -INT8_C( 78), -INT8_C( 110), -INT8_C( 64), INT8_C( 71), -INT8_C( 17), INT8_C( 98), -INT8_C( 96), INT8_C( 43), INT8_C( 120), -INT8_C( 95), INT8_C( 16), -INT8_C( 115), INT8_C( 29), -INT8_C( 38), -INT8_C( 66), INT8_C( 56), INT8_C( 18), -INT8_C( 91), INT8_C( 89), -INT8_C( 91), INT8_C( 111), -INT8_C( 113), INT8_C( 49), INT8_C( 72), -INT8_C( 113), INT8_C( 36), -INT8_C( 108), -INT8_C( 112), INT8_C( 29), INT8_C( 62), INT8_C( 108), -INT8_C( 49), -INT8_C( 47), INT8_C( 44), INT8_C( 22), -INT8_C( 64), -INT8_C( 113), -INT8_C( 74), -INT8_C( 21), INT8_C( 7), INT8_C( 87), -INT8_C( 4), -INT8_C( 108), INT8_C( 116), -INT8_C( 42), INT8_C( 82), -INT8_C( 83), -INT8_C( 24), -INT8_C( 9), INT8_C( 6), -INT8_C( 115), INT8_C( 103), -INT8_C( 107), -INT8_C( 66) }, { INT8_C( 35), -INT8_C( 38), -INT8_C( 24), -INT8_C( 99), INT8_C( 23), -INT8_C( 75), INT8_C( 103), INT8_C( 93), -INT8_C( 33), -INT8_C( 21), INT8_C( 25), INT8_C( 17), INT8_C( 115), INT8_C( 74), -INT8_C( 8), INT8_C( 62), INT8_C( 33), INT8_C( 59), INT8_C( 60), -INT8_C( 127), -INT8_C( 125), INT8_C( 53), -INT8_C( 29), INT8_C( 25), INT8_C( 56), INT8_C( 29), INT8_C( 74), INT8_C( 69), -INT8_C( 58), -INT8_C( 126), -INT8_C( 1), -INT8_C( 4), INT8_C( 92), INT8_C( 92), -INT8_C( 120), INT8_C( 116), -INT8_C( 114), -INT8_C( 9), INT8_C( 76), INT8_MIN, INT8_C( 63), INT8_C( 101), -INT8_C( 111), -INT8_C( 27), -INT8_C( 81), -INT8_C( 2), -INT8_C( 89), INT8_C( 18), -INT8_C( 115), INT8_C( 96), INT8_C( 49), INT8_C( 17), -INT8_C( 107), INT8_C( 20), -INT8_C( 97), -INT8_C( 51), INT8_C( 119), -INT8_C( 23), INT8_C( 18), -INT8_C( 24), INT8_C( 108), -INT8_C( 120), INT8_C( 40), -INT8_C( 56) } }, { { -INT8_C( 81), INT8_C( 37), -INT8_C( 30), INT8_C( 68), -INT8_C( 75), -INT8_C( 1), -INT8_C( 126), INT8_C( 34), -INT8_C( 50), INT8_C( 83), INT8_C( 78), -INT8_C( 28), INT8_C( 19), -INT8_C( 35), -INT8_C( 102), -INT8_C( 1), -INT8_C( 28), -INT8_C( 14), -INT8_C( 5), INT8_C( 121), INT8_C( 102), -INT8_C( 47), -INT8_C( 53), INT8_C( 19), -INT8_C( 70), -INT8_C( 61), INT8_C( 26), INT8_C( 71), INT8_C( 42), -INT8_C( 81), INT8_C( 6), -INT8_C( 39), -INT8_C( 44), -INT8_C( 24), INT8_C( 29), -INT8_C( 118), -INT8_C( 24), -INT8_C( 96), -INT8_C( 84), -INT8_C( 74), -INT8_C( 13), -INT8_C( 6), -INT8_C( 101), INT8_C( 7), -INT8_C( 40), INT8_C( 53), INT8_C( 6), -INT8_C( 68), INT8_C( 39), INT8_C( 1), INT8_C( 53), -INT8_C( 114), -INT8_C( 46), INT8_C( 1), -INT8_C( 95), -INT8_C( 116), -INT8_C( 60), -INT8_C( 69), -INT8_C( 44), -INT8_C( 18), INT8_C( 107), -INT8_C( 38), -INT8_C( 57), INT8_C( 63) }, UINT64_C( 8674343574248744386), { INT8_C( 112), -INT8_C( 4), INT8_MAX, INT8_C( 72), INT8_C( 49), -INT8_C( 123), INT8_C( 4), INT8_C( 89), -INT8_C( 122), INT8_C( 58), -INT8_C( 25), INT8_C( 89), INT8_C( 59), -INT8_C( 120), -INT8_C( 27), -INT8_C( 1), INT8_C( 68), -INT8_C( 71), -INT8_C( 19), -INT8_C( 81), -INT8_C( 109), -INT8_C( 76), -INT8_C( 18), INT8_C( 86), -INT8_C( 103), -INT8_C( 72), INT8_C( 0), INT8_C( 30), INT8_C( 45), INT8_C( 97), -INT8_C( 105), -INT8_C( 99), INT8_C( 93), INT8_C( 22), -INT8_C( 27), -INT8_C( 113), -INT8_C( 100), -INT8_C( 22), -INT8_C( 24), INT8_C( 34), INT8_C( 36), -INT8_C( 49), INT8_C( 123), INT8_C( 95), INT8_C( 87), INT8_C( 97), INT8_C( 94), -INT8_C( 101), INT8_C( 26), INT8_C( 75), INT8_C( 74), -INT8_C( 82), -INT8_C( 1), INT8_C( 57), INT8_C( 4), -INT8_C( 103), -INT8_C( 15), INT8_C( 4), -INT8_C( 73), INT8_C( 30), INT8_C( 102), INT8_C( 78), -INT8_C( 68), -INT8_C( 61) }, { INT8_C( 101), -INT8_C( 95), INT8_C( 82), INT8_C( 1), -INT8_C( 117), INT8_C( 58), INT8_C( 35), -INT8_C( 81), INT8_C( 9), -INT8_C( 97), INT8_C( 14), INT8_C( 97), INT8_C( 0), INT8_C( 108), -INT8_C( 4), INT8_C( 26), -INT8_C( 73), INT8_C( 71), -INT8_C( 56), -INT8_C( 73), INT8_MIN, -INT8_C( 52), INT8_C( 80), INT8_C( 113), -INT8_C( 47), INT8_C( 7), -INT8_C( 113), INT8_C( 55), INT8_C( 86), INT8_C( 75), -INT8_C( 6), -INT8_C( 69), -INT8_C( 19), INT8_C( 77), -INT8_C( 68), INT8_C( 120), -INT8_C( 121), -INT8_C( 33), INT8_C( 40), -INT8_C( 111), INT8_C( 126), INT8_C( 54), -INT8_C( 14), INT8_C( 126), -INT8_C( 93), -INT8_C( 18), -INT8_C( 103), INT8_C( 90), INT8_C( 53), INT8_C( 97), INT8_C( 17), -INT8_C( 75), INT8_C( 46), INT8_C( 97), INT8_C( 38), -INT8_C( 1), INT8_C( 105), -INT8_C( 74), INT8_C( 54), -INT8_C( 65), INT8_C( 1), INT8_C( 48), INT8_C( 122), -INT8_C( 18) }, { -INT8_C( 81), -INT8_C( 99), -INT8_C( 30), INT8_C( 68), -INT8_C( 75), -INT8_C( 1), INT8_C( 39), INT8_C( 8), -INT8_C( 113), INT8_C( 83), -INT8_C( 11), -INT8_C( 28), INT8_C( 19), -INT8_C( 12), -INT8_C( 31), INT8_C( 25), -INT8_C( 5), -INT8_C( 14), -INT8_C( 5), INT8_C( 102), INT8_C( 102), -INT8_C( 47), INT8_C( 62), -INT8_C( 57), -INT8_C( 70), -INT8_C( 65), INT8_C( 26), INT8_C( 85), INT8_C( 42), -INT8_C( 84), INT8_C( 6), INT8_C( 88), INT8_C( 74), -INT8_C( 24), -INT8_C( 95), -INT8_C( 118), -INT8_C( 24), -INT8_C( 96), -INT8_C( 84), -INT8_C( 77), -INT8_C( 94), -INT8_C( 6), INT8_C( 109), INT8_C( 7), -INT8_C( 6), INT8_C( 79), -INT8_C( 9), -INT8_C( 68), INT8_C( 79), INT8_C( 1), INT8_C( 53), -INT8_C( 114), -INT8_C( 46), -INT8_C( 102), INT8_C( 42), -INT8_C( 116), -INT8_C( 60), -INT8_C( 69), -INT8_C( 44), -INT8_C( 35), INT8_C( 103), INT8_C( 126), INT8_C( 54), INT8_C( 63) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi8(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_mask_add_epi8(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_add_epi8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask64 k; const int8_t a[64]; const int8_t b[64]; const int8_t r[64]; } test_vec[] = { { UINT64_C( 9122680976650596708), { INT8_C( 68), -INT8_C( 57), INT8_C( 11), -INT8_C( 119), -INT8_C( 113), INT8_C( 124), INT8_C( 91), INT8_C( 1), INT8_C( 112), -INT8_C( 126), INT8_C( 44), -INT8_C( 65), INT8_C( 76), -INT8_C( 60), -INT8_C( 30), INT8_C( 88), -INT8_C( 8), INT8_C( 72), -INT8_C( 34), -INT8_C( 108), INT8_C( 2), INT8_C( 65), -INT8_C( 12), INT8_C( 102), INT8_C( 50), INT8_C( 109), INT8_C( 10), INT8_C( 54), -INT8_C( 77), -INT8_C( 91), -INT8_C( 76), -INT8_C( 9), INT8_C( 108), -INT8_C( 65), INT8_MIN, -INT8_C( 5), INT8_C( 60), -INT8_C( 36), -INT8_C( 4), -INT8_C( 84), INT8_C( 94), INT8_C( 40), INT8_C( 108), -INT8_C( 86), -INT8_C( 20), INT8_C( 78), INT8_C( 2), -INT8_C( 27), -INT8_C( 106), -INT8_C( 32), INT8_C( 121), -INT8_C( 104), INT8_C( 34), INT8_C( 110), -INT8_C( 2), INT8_C( 84), -INT8_C( 37), INT8_C( 8), -INT8_C( 118), -INT8_C( 114), -INT8_C( 83), INT8_C( 63), -INT8_C( 122), INT8_C( 25) }, { -INT8_C( 2), INT8_C( 6), INT8_C( 20), INT8_C( 58), -INT8_C( 30), INT8_C( 16), -INT8_C( 25), INT8_C( 65), INT8_C( 56), INT8_C( 83), -INT8_C( 21), INT8_C( 37), -INT8_C( 95), -INT8_C( 18), INT8_C( 10), INT8_C( 55), -INT8_C( 50), -INT8_C( 125), -INT8_C( 49), -INT8_C( 16), -INT8_C( 15), -INT8_C( 51), INT8_C( 69), -INT8_C( 52), -INT8_C( 43), -INT8_C( 49), INT8_C( 91), -INT8_C( 125), INT8_C( 14), -INT8_C( 31), -INT8_C( 100), INT8_C( 13), -INT8_C( 25), -INT8_C( 79), INT8_C( 71), -INT8_C( 54), -INT8_C( 63), INT8_C( 46), INT8_C( 11), -INT8_C( 6), -INT8_C( 127), -INT8_C( 10), INT8_C( 31), INT8_C( 34), -INT8_C( 28), INT8_C( 41), INT8_C( 89), -INT8_C( 77), -INT8_C( 84), INT8_C( 40), -INT8_C( 93), -INT8_C( 98), -INT8_C( 11), -INT8_C( 24), INT8_C( 106), -INT8_C( 53), -INT8_C( 72), -INT8_C( 59), INT8_C( 78), -INT8_C( 58), -INT8_C( 90), -INT8_C( 22), -INT8_C( 45), -INT8_C( 114) }, { INT8_C( 0), INT8_C( 0), INT8_C( 31), INT8_C( 0), INT8_C( 0), -INT8_C( 116), INT8_C( 66), INT8_C( 0), -INT8_C( 88), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 19), -INT8_C( 78), -INT8_C( 20), -INT8_C( 113), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 124), -INT8_C( 13), INT8_C( 14), INT8_C( 57), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 101), INT8_C( 0), INT8_C( 0), -INT8_C( 122), INT8_C( 0), INT8_C( 4), INT8_C( 83), INT8_C( 112), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 30), -INT8_C( 117), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 91), INT8_C( 0), INT8_C( 0), INT8_C( 8), INT8_C( 0), INT8_C( 54), INT8_C( 23), INT8_C( 0), INT8_C( 0), INT8_C( 31), INT8_C( 0), -INT8_C( 51), -INT8_C( 40), INT8_C( 84), INT8_C( 83), INT8_C( 41), INT8_C( 89), INT8_C( 0) } }, { UINT64_C(14652289079423015835), { INT8_C( 89), INT8_C( 118), -INT8_C( 19), INT8_C( 62), -INT8_C( 97), INT8_C( 71), -INT8_C( 15), INT8_C( 75), INT8_C( 111), -INT8_C( 108), -INT8_C( 23), INT8_C( 101), INT8_C( 125), INT8_C( 84), INT8_C( 48), INT8_C( 53), INT8_C( 25), INT8_C( 126), -INT8_C( 5), -INT8_C( 64), INT8_C( 104), -INT8_C( 49), INT8_C( 78), INT8_C( 4), -INT8_C( 22), -INT8_C( 90), INT8_C( 97), INT8_C( 51), INT8_C( 9), -INT8_C( 72), -INT8_C( 2), INT8_C( 98), INT8_C( 46), -INT8_C( 20), -INT8_C( 96), -INT8_C( 51), INT8_C( 51), -INT8_C( 111), INT8_C( 24), -INT8_C( 94), INT8_C( 38), INT8_C( 2), INT8_C( 7), -INT8_C( 93), INT8_C( 86), INT8_C( 55), -INT8_C( 40), INT8_C( 111), -INT8_C( 75), -INT8_C( 45), INT8_C( 47), INT8_C( 30), -INT8_C( 94), INT8_C( 125), INT8_C( 34), -INT8_C( 116), INT8_C( 35), -INT8_C( 125), -INT8_C( 64), INT8_C( 44), INT8_C( 59), -INT8_C( 66), -INT8_C( 113), INT8_C( 105) }, { -INT8_C( 86), INT8_C( 47), INT8_C( 54), -INT8_C( 35), -INT8_C( 63), INT8_C( 78), INT8_MIN, -INT8_C( 25), INT8_C( 80), -INT8_C( 121), -INT8_C( 118), -INT8_C( 90), -INT8_C( 65), INT8_C( 98), INT8_C( 22), INT8_C( 116), INT8_C( 53), INT8_C( 69), -INT8_C( 110), -INT8_C( 40), -INT8_C( 61), -INT8_C( 76), INT8_C( 100), -INT8_C( 26), INT8_C( 55), INT8_C( 36), INT8_C( 19), INT8_C( 114), -INT8_C( 29), -INT8_C( 94), -INT8_C( 37), -INT8_C( 115), -INT8_C( 47), INT8_C( 17), INT8_C( 107), -INT8_C( 110), INT8_C( 96), -INT8_C( 21), INT8_C( 121), -INT8_C( 80), INT8_C( 114), INT8_C( 3), INT8_C( 87), INT8_C( 49), INT8_C( 101), INT8_C( 109), -INT8_C( 90), -INT8_C( 101), -INT8_C( 78), INT8_C( 56), INT8_C( 115), INT8_C( 117), -INT8_C( 19), -INT8_C( 41), INT8_C( 92), INT8_C( 36), -INT8_C( 4), INT8_C( 111), -INT8_C( 105), -INT8_C( 33), INT8_C( 17), INT8_C( 114), INT8_C( 108), -INT8_C( 30) }, { INT8_C( 3), -INT8_C( 91), INT8_C( 0), INT8_C( 27), INT8_C( 96), INT8_C( 0), INT8_C( 0), INT8_C( 50), -INT8_C( 65), INT8_C( 27), INT8_C( 0), INT8_C( 11), INT8_C( 60), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 104), INT8_C( 43), INT8_C( 0), -INT8_C( 78), INT8_C( 0), INT8_C( 33), INT8_C( 0), INT8_C( 116), -INT8_C( 91), -INT8_C( 20), INT8_C( 0), -INT8_C( 39), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 95), INT8_C( 0), INT8_C( 0), -INT8_C( 111), INT8_C( 0), -INT8_C( 104), INT8_C( 5), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 92), INT8_C( 126), INT8_C( 0), INT8_C( 103), INT8_C( 11), -INT8_C( 94), INT8_C( 0), -INT8_C( 113), INT8_C( 0), INT8_C( 126), INT8_C( 0), INT8_C( 31), -INT8_C( 14), INT8_C( 0), INT8_C( 11), INT8_C( 0), INT8_C( 0), -INT8_C( 5), INT8_C( 75) } }, { UINT64_C( 3860973301387351940), { -INT8_C( 14), -INT8_C( 21), INT8_C( 102), INT8_C( 87), INT8_C( 88), INT8_C( 12), -INT8_C( 14), INT8_C( 11), INT8_C( 69), INT8_C( 101), INT8_MIN, INT8_C( 50), INT8_C( 61), -INT8_C( 36), INT8_C( 86), INT8_C( 57), INT8_C( 75), -INT8_C( 19), INT8_C( 24), INT8_C( 92), INT8_C( 96), -INT8_C( 124), INT8_C( 63), -INT8_C( 28), INT8_C( 92), -INT8_C( 76), -INT8_C( 56), INT8_C( 30), -INT8_C( 94), INT8_C( 92), INT8_C( 83), -INT8_C( 108), INT8_C( 72), -INT8_C( 70), -INT8_C( 20), -INT8_C( 96), -INT8_C( 58), -INT8_C( 34), -INT8_C( 85), INT8_C( 11), INT8_C( 68), INT8_C( 44), INT8_C( 61), -INT8_C( 127), INT8_C( 8), -INT8_C( 108), -INT8_C( 70), INT8_C( 84), -INT8_C( 127), -INT8_C( 46), -INT8_C( 80), -INT8_C( 31), INT8_C( 86), -INT8_C( 17), -INT8_C( 59), -INT8_C( 78), -INT8_C( 93), -INT8_C( 115), -INT8_C( 47), INT8_C( 70), -INT8_C( 22), INT8_C( 36), -INT8_C( 38), INT8_C( 50) }, { -INT8_C( 34), -INT8_C( 58), -INT8_C( 46), -INT8_C( 91), -INT8_C( 91), INT8_C( 126), -INT8_C( 80), -INT8_C( 23), -INT8_C( 86), -INT8_C( 18), INT8_C( 106), -INT8_C( 78), -INT8_C( 126), INT8_C( 36), INT8_C( 6), INT8_C( 3), -INT8_C( 10), -INT8_C( 73), -INT8_C( 27), INT8_C( 76), -INT8_C( 90), -INT8_C( 86), -INT8_C( 1), INT8_C( 74), INT8_C( 56), -INT8_C( 48), -INT8_C( 112), INT8_C( 34), -INT8_C( 12), INT8_C( 106), INT8_C( 84), -INT8_C( 45), INT8_C( 49), INT8_C( 38), INT8_C( 120), -INT8_C( 42), -INT8_C( 92), INT8_C( 40), -INT8_C( 65), INT8_C( 78), INT8_C( 22), INT8_C( 41), INT8_C( 1), -INT8_C( 104), INT8_C( 77), INT8_C( 7), -INT8_C( 100), INT8_C( 67), -INT8_C( 66), -INT8_C( 127), -INT8_C( 113), INT8_C( 101), INT8_C( 43), -INT8_C( 114), -INT8_C( 81), INT8_C( 99), INT8_C( 94), INT8_C( 63), -INT8_C( 123), INT8_C( 83), -INT8_C( 87), -INT8_C( 39), INT8_C( 38), -INT8_C( 38) }, { INT8_C( 0), INT8_C( 0), INT8_C( 56), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 12), -INT8_C( 17), INT8_C( 83), -INT8_C( 22), INT8_C( 0), -INT8_C( 65), INT8_C( 0), INT8_C( 92), INT8_C( 60), INT8_C( 65), INT8_C( 0), -INT8_C( 3), INT8_C( 0), INT8_C( 6), INT8_C( 46), INT8_C( 62), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 88), INT8_C( 0), INT8_C( 0), -INT8_C( 58), -INT8_C( 89), INT8_C( 103), INT8_C( 0), -INT8_C( 32), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 106), INT8_C( 89), INT8_C( 0), INT8_C( 85), INT8_C( 62), INT8_C( 25), INT8_C( 0), -INT8_C( 101), INT8_C( 86), -INT8_C( 105), INT8_C( 0), INT8_C( 0), INT8_C( 63), INT8_C( 0), -INT8_C( 127), INT8_C( 0), INT8_C( 0), INT8_C( 21), INT8_C( 1), INT8_C( 0), INT8_C( 86), INT8_C( 0), -INT8_C( 109), -INT8_C( 3), INT8_C( 0), INT8_C( 0) } }, { UINT64_C(15993249600680009216), { -INT8_C( 104), -INT8_C( 12), INT8_C( 117), -INT8_C( 27), -INT8_C( 5), INT8_C( 17), INT8_C( 40), -INT8_C( 70), -INT8_C( 110), -INT8_C( 72), INT8_C( 31), -INT8_C( 66), INT8_C( 70), -INT8_C( 50), INT8_C( 33), -INT8_C( 91), INT8_C( 13), -INT8_C( 89), -INT8_C( 8), -INT8_C( 74), INT8_MIN, INT8_C( 30), -INT8_C( 111), INT8_MIN, -INT8_C( 68), INT8_C( 65), INT8_C( 37), -INT8_C( 126), -INT8_C( 79), INT8_C( 24), INT8_C( 95), INT8_C( 73), INT8_C( 12), -INT8_C( 43), INT8_C( 47), INT8_C( 7), -INT8_C( 26), INT8_C( 87), -INT8_C( 63), INT8_C( 121), INT8_C( 15), -INT8_C( 32), INT8_C( 55), INT8_C( 86), -INT8_C( 82), INT8_C( 88), -INT8_C( 5), -INT8_C( 69), -INT8_C( 1), -INT8_C( 13), INT8_C( 114), INT8_MIN, INT8_C( 17), INT8_C( 3), INT8_C( 0), -INT8_C( 51), INT8_C( 68), INT8_C( 37), INT8_C( 79), -INT8_C( 11), INT8_C( 61), -INT8_C( 81), INT8_C( 63), INT8_C( 73) }, { -INT8_C( 124), INT8_C( 110), INT8_C( 81), INT8_C( 106), -INT8_C( 59), INT8_C( 18), -INT8_C( 29), -INT8_C( 43), -INT8_C( 13), INT8_C( 26), INT8_C( 43), -INT8_C( 95), INT8_C( 115), INT8_C( 38), INT8_C( 93), INT8_C( 114), INT8_C( 25), -INT8_C( 49), -INT8_C( 14), INT8_C( 42), -INT8_C( 46), -INT8_C( 13), -INT8_C( 9), INT8_C( 22), INT8_C( 24), INT8_C( 70), INT8_C( 12), INT8_C( 86), -INT8_C( 11), INT8_C( 75), -INT8_C( 97), INT8_C( 121), -INT8_C( 71), -INT8_C( 16), -INT8_C( 28), INT8_C( 126), INT8_C( 3), -INT8_C( 57), INT8_C( 83), -INT8_C( 10), -INT8_C( 30), INT8_C( 126), -INT8_C( 105), INT8_C( 85), -INT8_C( 92), -INT8_C( 12), -INT8_C( 57), -INT8_C( 67), -INT8_C( 61), -INT8_C( 70), -INT8_C( 25), -INT8_C( 107), -INT8_C( 83), -INT8_C( 34), -INT8_C( 84), -INT8_C( 59), INT8_C( 37), -INT8_C( 72), INT8_C( 27), INT8_C( 26), INT8_C( 3), -INT8_C( 69), -INT8_C( 108), -INT8_C( 68) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 46), INT8_C( 74), INT8_C( 95), -INT8_C( 71), INT8_C( 0), INT8_C( 0), INT8_C( 23), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 82), INT8_C( 17), INT8_C( 0), -INT8_C( 106), INT8_C( 0), INT8_C( 0), INT8_C( 49), INT8_C( 0), INT8_C( 0), INT8_C( 99), INT8_C( 0), -INT8_C( 62), INT8_C( 0), -INT8_C( 59), INT8_C( 19), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 20), INT8_C( 111), -INT8_C( 15), INT8_C( 94), -INT8_C( 50), -INT8_C( 85), INT8_C( 0), INT8_C( 76), -INT8_C( 62), INT8_C( 0), -INT8_C( 62), -INT8_C( 83), INT8_C( 0), INT8_C( 0), -INT8_C( 66), -INT8_C( 31), -INT8_C( 84), -INT8_C( 110), INT8_C( 105), INT8_C( 0), INT8_C( 106), INT8_C( 15), INT8_C( 64), INT8_C( 0), -INT8_C( 45), INT8_C( 5) } }, { UINT64_C( 2424218903589320875), { INT8_C( 12), INT8_C( 60), INT8_C( 118), -INT8_C( 79), INT8_C( 48), INT8_C( 62), INT8_C( 110), -INT8_C( 12), -INT8_C( 8), INT8_C( 86), -INT8_C( 119), -INT8_C( 91), INT8_C( 52), INT8_C( 53), INT8_C( 106), INT8_C( 89), -INT8_C( 19), -INT8_C( 122), INT8_C( 116), -INT8_C( 16), INT8_C( 65), INT8_C( 8), -INT8_C( 84), -INT8_C( 20), INT8_MIN, -INT8_C( 25), -INT8_C( 101), -INT8_C( 65), INT8_C( 117), INT8_C( 63), -INT8_C( 31), -INT8_C( 127), INT8_C( 123), INT8_C( 87), INT8_C( 50), -INT8_C( 84), -INT8_C( 107), -INT8_C( 95), -INT8_C( 96), -INT8_C( 115), -INT8_C( 9), INT8_C( 41), INT8_C( 50), INT8_C( 43), INT8_C( 95), -INT8_C( 99), -INT8_C( 123), INT8_C( 76), INT8_C( 35), -INT8_C( 7), INT8_C( 61), INT8_C( 100), INT8_C( 1), -INT8_C( 23), INT8_C( 80), -INT8_C( 127), -INT8_C( 48), -INT8_C( 21), INT8_C( 64), INT8_C( 69), INT8_C( 43), INT8_C( 33), -INT8_C( 57), -INT8_C( 90) }, { INT8_C( 121), -INT8_C( 7), INT8_C( 82), INT8_C( 14), -INT8_C( 102), -INT8_C( 14), -INT8_C( 100), -INT8_C( 111), INT8_C( 28), -INT8_C( 50), -INT8_C( 67), INT8_C( 123), INT8_C( 107), INT8_C( 66), -INT8_C( 57), -INT8_C( 114), INT8_C( 59), INT8_C( 4), -INT8_C( 14), INT8_C( 60), -INT8_C( 18), INT8_C( 67), -INT8_C( 67), -INT8_C( 66), INT8_C( 46), -INT8_C( 3), INT8_C( 4), INT8_C( 89), INT8_C( 31), -INT8_C( 53), INT8_C( 0), -INT8_C( 104), -INT8_C( 60), INT8_C( 82), -INT8_C( 90), INT8_C( 95), INT8_C( 69), INT8_C( 66), -INT8_C( 16), INT8_C( 97), INT8_C( 17), -INT8_C( 83), -INT8_C( 36), INT8_C( 124), -INT8_C( 17), -INT8_C( 93), INT8_C( 11), INT8_C( 42), -INT8_C( 88), -INT8_C( 3), INT8_C( 102), -INT8_C( 106), INT8_C( 64), INT8_C( 35), INT8_C( 84), INT8_C( 111), INT8_C( 33), INT8_C( 88), -INT8_C( 56), INT8_C( 64), INT8_C( 35), -INT8_C( 56), -INT8_C( 40), -INT8_C( 24) }, { -INT8_C( 123), INT8_C( 53), INT8_C( 0), -INT8_C( 65), INT8_C( 0), INT8_C( 48), INT8_C( 0), -INT8_C( 123), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 32), -INT8_C( 97), INT8_C( 119), INT8_C( 49), INT8_C( 0), INT8_C( 0), -INT8_C( 118), INT8_C( 0), INT8_C( 44), INT8_C( 47), INT8_C( 75), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 28), -INT8_C( 97), INT8_C( 24), INT8_C( 0), INT8_C( 10), INT8_C( 0), INT8_C( 25), INT8_C( 63), -INT8_C( 87), -INT8_C( 40), INT8_C( 11), -INT8_C( 38), -INT8_C( 29), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 42), INT8_C( 14), -INT8_C( 89), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 118), INT8_C( 0), INT8_C( 0), -INT8_C( 93), INT8_C( 0), INT8_C( 0), INT8_C( 12), INT8_C( 0), -INT8_C( 16), -INT8_C( 15), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 23), INT8_C( 0), INT8_C( 0) } }, { UINT64_C(15186480722153537051), { -INT8_C( 27), -INT8_C( 99), INT8_C( 78), -INT8_C( 44), INT8_C( 64), INT8_C( 89), -INT8_C( 1), -INT8_C( 24), INT8_C( 87), INT8_C( 101), INT8_C( 126), -INT8_C( 105), -INT8_C( 119), -INT8_C( 45), INT8_C( 6), -INT8_C( 86), INT8_C( 43), -INT8_C( 49), -INT8_C( 22), INT8_C( 79), -INT8_C( 105), -INT8_C( 62), INT8_C( 55), -INT8_C( 78), INT8_C( 64), INT8_C( 126), INT8_C( 18), INT8_C( 1), -INT8_C( 75), -INT8_C( 45), -INT8_C( 45), -INT8_C( 102), INT8_C( 112), INT8_C( 34), INT8_C( 111), -INT8_C( 79), INT8_C( 123), INT8_C( 110), -INT8_C( 103), -INT8_C( 46), -INT8_C( 45), INT8_C( 24), INT8_C( 106), INT8_C( 92), -INT8_C( 21), INT8_C( 112), INT8_C( 6), INT8_C( 22), INT8_C( 63), -INT8_C( 16), INT8_C( 101), -INT8_C( 41), -INT8_C( 78), -INT8_C( 100), -INT8_C( 119), -INT8_C( 13), INT8_C( 26), -INT8_C( 100), -INT8_C( 12), -INT8_C( 48), INT8_C( 111), -INT8_C( 56), INT8_C( 106), -INT8_C( 32) }, { -INT8_C( 22), -INT8_C( 39), -INT8_C( 111), INT8_C( 101), INT8_C( 71), INT8_C( 42), INT8_C( 56), INT8_C( 27), INT8_C( 66), -INT8_C( 94), INT8_C( 119), INT8_C( 45), INT8_C( 18), INT8_C( 126), INT8_C( 68), INT8_C( 82), INT8_C( 110), -INT8_C( 87), INT8_C( 41), INT8_C( 33), INT8_C( 70), -INT8_C( 78), INT8_C( 20), INT8_C( 96), INT8_C( 78), INT8_C( 8), INT8_C( 48), -INT8_C( 66), -INT8_C( 48), -INT8_C( 101), -INT8_C( 98), -INT8_C( 70), INT8_C( 116), INT8_C( 47), INT8_C( 32), -INT8_C( 68), INT8_C( 89), INT8_C( 88), -INT8_C( 41), -INT8_C( 100), -INT8_C( 6), INT8_C( 78), -INT8_C( 55), INT8_C( 12), -INT8_C( 52), INT8_C( 13), INT8_C( 94), INT8_C( 59), -INT8_C( 73), -INT8_C( 121), INT8_C( 92), -INT8_C( 3), INT8_C( 58), INT8_C( 112), INT8_C( 93), -INT8_C( 120), INT8_C( 120), -INT8_C( 114), INT8_C( 70), INT8_C( 73), INT8_C( 41), -INT8_C( 28), INT8_C( 3), -INT8_C( 99) }, { -INT8_C( 49), INT8_C( 118), INT8_C( 0), INT8_C( 57), -INT8_C( 121), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 7), -INT8_C( 11), -INT8_C( 60), -INT8_C( 101), INT8_C( 81), INT8_C( 74), INT8_C( 0), -INT8_C( 103), INT8_C( 120), INT8_C( 19), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 75), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 110), INT8_C( 113), INT8_C( 0), -INT8_C( 28), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 112), INT8_C( 110), -INT8_C( 51), INT8_C( 102), INT8_C( 51), INT8_C( 0), -INT8_C( 73), INT8_C( 125), INT8_C( 0), INT8_C( 0), -INT8_C( 10), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 26), INT8_C( 123), INT8_C( 0), INT8_C( 42), INT8_C( 0), INT8_C( 0), -INT8_C( 104), INT8_C( 0), INT8_C( 109), INT8_C( 125) } }, { UINT64_C( 8433325083901633299), { INT8_MAX, -INT8_C( 46), -INT8_C( 126), INT8_C( 75), -INT8_C( 32), -INT8_C( 32), -INT8_C( 122), -INT8_C( 105), INT8_C( 104), -INT8_C( 30), -INT8_C( 108), -INT8_C( 94), INT8_C( 82), -INT8_C( 15), INT8_C( 42), -INT8_C( 53), INT8_MAX, INT8_C( 113), INT8_C( 20), -INT8_C( 88), INT8_C( 85), INT8_C( 23), INT8_C( 70), INT8_C( 105), INT8_C( 59), -INT8_C( 97), -INT8_C( 42), -INT8_C( 74), -INT8_C( 48), -INT8_C( 33), INT8_C( 44), INT8_C( 79), -INT8_C( 79), -INT8_C( 82), -INT8_C( 102), -INT8_C( 111), -INT8_C( 114), INT8_C( 33), INT8_C( 40), -INT8_C( 10), INT8_C( 3), -INT8_C( 68), -INT8_C( 104), INT8_C( 86), -INT8_C( 82), -INT8_C( 61), INT8_C( 33), INT8_C( 45), INT8_C( 52), INT8_C( 53), -INT8_C( 42), -INT8_C( 119), INT8_C( 76), INT8_C( 28), -INT8_C( 14), -INT8_C( 121), -INT8_C( 69), -INT8_C( 56), INT8_C( 62), -INT8_C( 117), -INT8_C( 89), INT8_C( 106), -INT8_C( 38), INT8_C( 89) }, { INT8_C( 24), INT8_C( 117), -INT8_C( 22), -INT8_C( 90), -INT8_C( 106), INT8_C( 19), -INT8_C( 99), -INT8_C( 103), -INT8_C( 49), INT8_C( 53), -INT8_C( 17), INT8_C( 125), -INT8_C( 8), INT8_C( 16), -INT8_C( 85), INT8_C( 44), INT8_C( 69), -INT8_C( 127), -INT8_C( 74), -INT8_C( 110), -INT8_C( 99), -INT8_C( 88), INT8_C( 25), INT8_C( 88), INT8_C( 113), INT8_C( 87), -INT8_C( 28), INT8_C( 24), -INT8_C( 63), -INT8_C( 66), INT8_C( 113), -INT8_C( 39), INT8_C( 51), INT8_C( 92), INT8_MIN, -INT8_C( 55), INT8_C( 111), INT8_C( 29), INT8_C( 99), INT8_C( 62), INT8_C( 82), INT8_C( 82), -INT8_C( 68), INT8_C( 75), INT8_C( 99), INT8_C( 103), INT8_C( 119), -INT8_C( 88), -INT8_C( 24), INT8_C( 45), INT8_C( 58), -INT8_C( 123), -INT8_C( 42), INT8_C( 84), -INT8_C( 35), INT8_C( 71), -INT8_C( 85), -INT8_C( 63), INT8_C( 95), INT8_C( 109), INT8_MIN, -INT8_C( 47), INT8_C( 70), -INT8_C( 77) }, { -INT8_C( 105), INT8_C( 71), INT8_C( 0), INT8_C( 0), INT8_C( 118), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 55), INT8_C( 23), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 60), INT8_C( 0), INT8_C( 0), INT8_C( 58), -INT8_C( 14), INT8_C( 0), INT8_C( 95), INT8_C( 0), -INT8_C( 84), INT8_C( 0), -INT8_C( 70), -INT8_C( 50), INT8_C( 0), -INT8_C( 99), -INT8_C( 99), INT8_C( 0), -INT8_C( 28), INT8_C( 10), INT8_C( 0), INT8_C( 90), -INT8_C( 3), INT8_C( 62), -INT8_C( 117), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 17), INT8_C( 42), INT8_C( 0), INT8_C( 0), INT8_C( 28), INT8_C( 0), INT8_C( 0), INT8_C( 14), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 102), INT8_C( 0), -INT8_C( 99), INT8_C( 0), INT8_C( 39), INT8_C( 59), INT8_C( 32), INT8_C( 0) } }, { UINT64_C( 3952718891158717997), { INT8_C( 50), -INT8_C( 106), -INT8_C( 127), -INT8_C( 107), -INT8_C( 3), -INT8_C( 8), INT8_C( 62), -INT8_C( 27), INT8_C( 38), INT8_C( 120), INT8_C( 106), -INT8_C( 4), -INT8_C( 52), INT8_C( 72), INT8_C( 67), INT8_C( 120), INT8_C( 9), -INT8_C( 94), -INT8_C( 27), -INT8_C( 119), INT8_C( 115), INT8_C( 43), INT8_C( 61), -INT8_C( 96), -INT8_C( 14), -INT8_C( 70), INT8_C( 60), -INT8_C( 43), -INT8_C( 102), INT8_C( 23), INT8_C( 11), -INT8_C( 52), -INT8_C( 83), -INT8_C( 116), INT8_C( 98), -INT8_C( 85), -INT8_C( 123), -INT8_C( 96), -INT8_C( 112), -INT8_C( 85), INT8_C( 24), -INT8_C( 5), -INT8_C( 89), -INT8_C( 27), INT8_C( 67), -INT8_C( 22), INT8_C( 93), INT8_C( 76), -INT8_C( 116), INT8_C( 66), -INT8_C( 42), INT8_C( 0), INT8_C( 109), INT8_C( 19), -INT8_C( 96), INT8_C( 95), -INT8_C( 51), -INT8_C( 35), INT8_C( 53), INT8_C( 103), -INT8_C( 12), INT8_C( 64), INT8_C( 51), -INT8_C( 95) }, { -INT8_C( 51), -INT8_C( 107), INT8_C( 76), INT8_C( 82), INT8_C( 53), -INT8_C( 35), -INT8_C( 3), INT8_C( 78), -INT8_C( 40), -INT8_C( 92), INT8_C( 51), INT8_C( 27), -INT8_C( 114), -INT8_C( 112), INT8_C( 103), INT8_C( 26), -INT8_C( 46), INT8_C( 61), INT8_C( 26), INT8_C( 63), INT8_C( 80), -INT8_C( 69), -INT8_C( 97), INT8_C( 29), -INT8_C( 104), -INT8_C( 44), -INT8_C( 124), -INT8_C( 116), INT8_C( 20), -INT8_C( 72), INT8_C( 45), -INT8_C( 31), INT8_C( 77), INT8_C( 122), INT8_C( 51), -INT8_C( 125), INT8_C( 87), INT8_C( 48), -INT8_C( 47), INT8_C( 47), -INT8_C( 44), INT8_C( 4), INT8_C( 74), INT8_C( 98), -INT8_C( 108), -INT8_C( 79), INT8_C( 125), INT8_C( 102), -INT8_C( 17), -INT8_C( 105), -INT8_C( 91), INT8_C( 63), INT8_C( 82), INT8_C( 68), INT8_C( 93), -INT8_C( 22), INT8_C( 24), -INT8_C( 31), INT8_C( 118), INT8_C( 45), -INT8_C( 103), -INT8_C( 92), INT8_C( 14), -INT8_C( 25) }, { -INT8_C( 1), INT8_C( 0), -INT8_C( 51), -INT8_C( 25), INT8_C( 0), -INT8_C( 43), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 28), -INT8_C( 99), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 86), -INT8_C( 110), -INT8_C( 37), INT8_C( 0), -INT8_C( 1), -INT8_C( 56), -INT8_C( 61), -INT8_C( 26), -INT8_C( 36), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 64), INT8_C( 97), -INT8_C( 82), INT8_C( 0), INT8_C( 0), -INT8_C( 83), -INT8_C( 6), INT8_C( 6), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 48), INT8_C( 97), -INT8_C( 38), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 101), -INT8_C( 38), -INT8_C( 78), INT8_C( 0), -INT8_C( 39), INT8_C( 0), INT8_C( 63), -INT8_C( 65), INT8_C( 0), -INT8_C( 3), INT8_C( 73), INT8_C( 0), -INT8_C( 66), -INT8_C( 85), INT8_C( 0), -INT8_C( 115), -INT8_C( 28), INT8_C( 0), INT8_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_maskz_add_epi8(test_vec[i].k, a, b); simde_test_x86_assert_equal_i8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm512_add_epi16 (SIMDE_MUNIT_TEST_ARGS) { struct { const int16_t a[32]; const int16_t b[32]; const int16_t r[32]; } test_vec[] = { { { INT16_C( 11452), INT16_C( 18562), INT16_C( 2675), -INT16_C( 17089), INT16_C( 30550), -INT16_C( 14832), -INT16_C( 11694), -INT16_C( 20057), INT16_C( 31125), INT16_C( 7807), -INT16_C( 11481), INT16_C( 12519), -INT16_C( 410), -INT16_C( 11992), INT16_C( 27537), INT16_C( 19885), INT16_C( 12440), INT16_C( 2965), -INT16_C( 11206), -INT16_C( 28472), -INT16_C( 10164), -INT16_C( 25001), -INT16_C( 342), INT16_C( 16463), -INT16_C( 12424), -INT16_C( 24738), INT16_C( 17826), INT16_C( 2255), -INT16_C( 1981), -INT16_C( 11047), -INT16_C( 30877), -INT16_C( 1246) }, { -INT16_C( 18505), -INT16_C( 3833), -INT16_C( 12404), -INT16_C( 10111), -INT16_C( 10072), INT16_C( 21110), -INT16_C( 14633), INT16_C( 20370), -INT16_C( 3947), INT16_C( 14318), -INT16_C( 17098), INT16_C( 31040), INT16_C( 6581), INT16_C( 6478), INT16_C( 28832), INT16_C( 22292), INT16_C( 6951), -INT16_C( 19640), -INT16_C( 13589), -INT16_C( 27765), INT16_C( 674), INT16_C( 31205), INT16_C( 30920), INT16_C( 24008), -INT16_C( 18840), -INT16_C( 24940), -INT16_C( 11148), INT16_C( 10520), INT16_C( 26350), -INT16_C( 29118), INT16_C( 22486), -INT16_C( 538) }, { -INT16_C( 7053), INT16_C( 14729), -INT16_C( 9729), -INT16_C( 27200), INT16_C( 20478), INT16_C( 6278), -INT16_C( 26327), INT16_C( 313), INT16_C( 27178), INT16_C( 22125), -INT16_C( 28579), -INT16_C( 21977), INT16_C( 6171), -INT16_C( 5514), -INT16_C( 9167), -INT16_C( 23359), INT16_C( 19391), -INT16_C( 16675), -INT16_C( 24795), INT16_C( 9299), -INT16_C( 9490), INT16_C( 6204), INT16_C( 30578), -INT16_C( 25065), -INT16_C( 31264), INT16_C( 15858), INT16_C( 6678), INT16_C( 12775), INT16_C( 24369), INT16_C( 25371), -INT16_C( 8391), -INT16_C( 1784) } }, { { INT16_C( 11890), INT16_C( 23985), INT16_C( 15608), -INT16_C( 25616), -INT16_C( 10690), INT16_C( 1556), -INT16_C( 8882), -INT16_C( 18845), -INT16_C( 1901), INT16_C( 1877), INT16_C( 28108), -INT16_C( 17871), INT16_C( 29651), -INT16_C( 22199), INT16_C( 12234), INT16_C( 15782), INT16_C( 22365), INT16_C( 22170), -INT16_C( 29804), -INT16_C( 11535), INT16_C( 1377), -INT16_C( 20519), INT16_C( 15586), INT16_C( 30309), -INT16_C( 17868), INT16_C( 381), -INT16_C( 20953), -INT16_C( 1349), INT16_C( 1058), -INT16_C( 4957), INT16_C( 18995), -INT16_C( 28375) }, { -INT16_C( 15199), INT16_C( 13799), -INT16_C( 10161), -INT16_C( 20472), -INT16_C( 7715), -INT16_C( 16289), -INT16_C( 15331), INT16_C( 21046), -INT16_C( 19585), -INT16_C( 22957), INT16_C( 3682), -INT16_C( 31583), INT16_C( 17427), INT16_C( 18032), -INT16_C( 25970), INT16_C( 12503), -INT16_C( 16802), -INT16_C( 21147), INT16_C( 28054), INT16_C( 29789), -INT16_C( 17330), INT16_C( 27700), INT16_C( 27264), -INT16_C( 66), INT16_C( 4381), INT16_C( 32678), INT16_C( 18207), INT16_C( 12803), INT16_C( 29835), INT16_C( 6777), INT16_C( 20494), INT16_C( 27722) }, { -INT16_C( 3309), -INT16_C( 27752), INT16_C( 5447), INT16_C( 19448), -INT16_C( 18405), -INT16_C( 14733), -INT16_C( 24213), INT16_C( 2201), -INT16_C( 21486), -INT16_C( 21080), INT16_C( 31790), INT16_C( 16082), -INT16_C( 18458), -INT16_C( 4167), -INT16_C( 13736), INT16_C( 28285), INT16_C( 5563), INT16_C( 1023), -INT16_C( 1750), INT16_C( 18254), -INT16_C( 15953), INT16_C( 7181), -INT16_C( 22686), INT16_C( 30243), -INT16_C( 13487), -INT16_C( 32477), -INT16_C( 2746), INT16_C( 11454), INT16_C( 30893), INT16_C( 1820), -INT16_C( 26047), -INT16_C( 653) } }, { { -INT16_C( 20721), -INT16_C( 23271), INT16_C( 30237), INT16_C( 27417), INT16_C( 19762), -INT16_C( 19753), -INT16_C( 27209), -INT16_C( 10830), INT16_C( 22694), -INT16_C( 14764), INT16_C( 22687), INT16_C( 11000), INT16_C( 29132), -INT16_C( 9660), -INT16_C( 28990), -INT16_C( 11962), INT16_C( 24382), INT16_C( 23414), -INT16_C( 28459), INT16_C( 1990), -INT16_C( 24867), -INT16_C( 27207), INT16_C( 27443), -INT16_C( 9622), -INT16_C( 16701), INT16_C( 25248), -INT16_C( 26602), -INT16_C( 7539), -INT16_C( 12022), -INT16_C( 13124), INT16_C( 608), -INT16_C( 24931) }, { INT16_C( 4961), INT16_C( 14073), -INT16_C( 16477), -INT16_C( 32451), -INT16_C( 2211), -INT16_C( 28394), -INT16_C( 32670), INT16_C( 9835), INT16_C( 2878), INT16_C( 21896), INT16_C( 5539), -INT16_C( 21193), -INT16_C( 2841), INT16_C( 18297), INT16_C( 5878), INT16_C( 22757), -INT16_C( 8662), -INT16_C( 12914), -INT16_C( 13155), -INT16_C( 1202), INT16_C( 25795), INT16_C( 9612), -INT16_C( 2076), INT16_C( 9035), -INT16_C( 11262), -INT16_C( 23176), -INT16_C( 20503), -INT16_C( 12205), -INT16_C( 13149), -INT16_C( 26089), -INT16_C( 797), INT16_C( 3570) }, { -INT16_C( 15760), -INT16_C( 9198), INT16_C( 13760), -INT16_C( 5034), INT16_C( 17551), INT16_C( 17389), INT16_C( 5657), -INT16_C( 995), INT16_C( 25572), INT16_C( 7132), INT16_C( 28226), -INT16_C( 10193), INT16_C( 26291), INT16_C( 8637), -INT16_C( 23112), INT16_C( 10795), INT16_C( 15720), INT16_C( 10500), INT16_C( 23922), INT16_C( 788), INT16_C( 928), -INT16_C( 17595), INT16_C( 25367), -INT16_C( 587), -INT16_C( 27963), INT16_C( 2072), INT16_C( 18431), -INT16_C( 19744), -INT16_C( 25171), INT16_C( 26323), -INT16_C( 189), -INT16_C( 21361) } }, { { -INT16_C( 32550), INT16_C( 30938), INT16_C( 10572), INT16_C( 3955), -INT16_C( 115), INT16_C( 29237), -INT16_C( 32522), -INT16_C( 1899), INT16_C( 3412), INT16_C( 16029), -INT16_C( 3908), INT16_C( 24590), INT16_C( 9917), -INT16_C( 24326), -INT16_C( 5086), -INT16_C( 595), -INT16_C( 30868), -INT16_C( 18059), -INT16_C( 5968), INT16_C( 16072), -INT16_C( 537), -INT16_C( 8784), INT16_C( 17790), -INT16_C( 11563), INT16_C( 29266), INT16_C( 3600), INT16_C( 8035), INT16_C( 8302), INT16_C( 26693), INT16_C( 26560), INT16_C( 27988), -INT16_C( 16028) }, { -INT16_C( 9740), -INT16_C( 23174), INT16_C( 17089), -INT16_C( 22301), -INT16_C( 27840), -INT16_C( 16763), INT16_C( 23256), INT16_C( 10896), -INT16_C( 24115), INT16_C( 12344), -INT16_C( 22592), INT16_C( 1360), INT16_C( 4111), INT16_C( 25708), -INT16_C( 11907), INT16_C( 28965), -INT16_C( 24662), INT16_C( 27670), -INT16_C( 1567), INT16_C( 8468), -INT16_C( 25972), INT16_C( 25823), INT16_C( 28916), -INT16_C( 15986), -INT16_C( 14575), -INT16_C( 11791), INT16_C( 16750), INT16_C( 32214), INT16_C( 16977), -INT16_C( 12575), INT16_C( 1555), -INT16_C( 16832) }, { INT16_C( 23246), INT16_C( 7764), INT16_C( 27661), -INT16_C( 18346), -INT16_C( 27955), INT16_C( 12474), -INT16_C( 9266), INT16_C( 8997), -INT16_C( 20703), INT16_C( 28373), -INT16_C( 26500), INT16_C( 25950), INT16_C( 14028), INT16_C( 1382), -INT16_C( 16993), INT16_C( 28370), INT16_C( 10006), INT16_C( 9611), -INT16_C( 7535), INT16_C( 24540), -INT16_C( 26509), INT16_C( 17039), -INT16_C( 18830), -INT16_C( 27549), INT16_C( 14691), -INT16_C( 8191), INT16_C( 24785), -INT16_C( 25020), -INT16_C( 21866), INT16_C( 13985), INT16_C( 29543), INT16_C( 32676) } }, { { INT16_C( 22181), -INT16_C( 30934), INT16_C( 15952), -INT16_C( 9048), -INT16_C( 30504), -INT16_C( 12991), -INT16_C( 12296), INT16_C( 2446), -INT16_C( 32618), INT16_C( 1242), -INT16_C( 20287), INT16_C( 4994), INT16_C( 25586), INT16_C( 1761), INT16_C( 8554), INT16_C( 4036), -INT16_C( 4488), -INT16_C( 14186), INT16_C( 16172), INT16_C( 1444), -INT16_C( 6713), -INT16_C( 16430), INT16_C( 24757), INT16_C( 19400), -INT16_C( 23840), -INT16_C( 23984), -INT16_C( 11694), INT16_C( 17589), -INT16_C( 27083), -INT16_C( 24758), INT16_C( 3768), INT16_C( 12463) }, { INT16_C( 17916), INT16_C( 10744), -INT16_C( 25468), INT16_C( 19246), INT16_C( 130), INT16_C( 14090), -INT16_C( 11680), INT16_C( 16770), -INT16_C( 11660), -INT16_C( 14621), -INT16_C( 26460), -INT16_C( 9717), INT16_C( 21806), -INT16_C( 6535), INT16_C( 10340), INT16_C( 24598), INT16_C( 3694), -INT16_C( 3447), -INT16_C( 18517), INT16_C( 11582), INT16_C( 18615), INT16_C( 6244), -INT16_C( 6629), -INT16_C( 28839), INT16_C( 15545), INT16_C( 23894), INT16_C( 25044), INT16_C( 567), -INT16_C( 20042), INT16_C( 6889), -INT16_C( 39), INT16_C( 18299) }, { -INT16_C( 25439), -INT16_C( 20190), -INT16_C( 9516), INT16_C( 10198), -INT16_C( 30374), INT16_C( 1099), -INT16_C( 23976), INT16_C( 19216), INT16_C( 21258), -INT16_C( 13379), INT16_C( 18789), -INT16_C( 4723), -INT16_C( 18144), -INT16_C( 4774), INT16_C( 18894), INT16_C( 28634), -INT16_C( 794), -INT16_C( 17633), -INT16_C( 2345), INT16_C( 13026), INT16_C( 11902), -INT16_C( 10186), INT16_C( 18128), -INT16_C( 9439), -INT16_C( 8295), -INT16_C( 90), INT16_C( 13350), INT16_C( 18156), INT16_C( 18411), -INT16_C( 17869), INT16_C( 3729), INT16_C( 30762) } }, { { INT16_C( 1038), -INT16_C( 18118), INT16_C( 30908), INT16_C( 29670), INT16_C( 19136), -INT16_C( 9333), -INT16_C( 7120), -INT16_C( 5781), -INT16_C( 16096), -INT16_C( 3001), INT16_C( 32290), -INT16_C( 9993), -INT16_C( 8145), INT16_C( 2547), INT16_C( 28383), -INT16_C( 4784), -INT16_C( 30094), INT16_C( 11942), -INT16_C( 29694), -INT16_C( 15454), INT16_C( 11734), INT16_C( 1950), INT16_C( 2322), INT16_C( 13040), INT16_C( 14282), -INT16_C( 5081), INT16_C( 7862), -INT16_C( 6715), -INT16_C( 18178), -INT16_C( 8722), INT16_C( 16166), -INT16_C( 26421) }, { INT16_C( 29129), -INT16_C( 13113), INT16_C( 27134), -INT16_C( 11121), INT16_C( 11670), -INT16_C( 22309), -INT16_C( 13257), INT16_C( 475), INT16_C( 515), -INT16_C( 17938), -INT16_C( 19680), INT16_C( 7839), -INT16_C( 29333), -INT16_C( 28165), -INT16_C( 14644), -INT16_C( 27095), -INT16_C( 4040), INT16_C( 13922), -INT16_C( 3751), -INT16_C( 4086), -INT16_C( 6626), INT16_C( 21912), INT16_C( 29618), -INT16_C( 19113), INT16_C( 17781), -INT16_C( 27281), INT16_C( 3832), INT16_C( 25523), -INT16_C( 20581), INT16_C( 26868), INT16_C( 7541), -INT16_C( 20994) }, { INT16_C( 30167), -INT16_C( 31231), -INT16_C( 7494), INT16_C( 18549), INT16_C( 30806), -INT16_C( 31642), -INT16_C( 20377), -INT16_C( 5306), -INT16_C( 15581), -INT16_C( 20939), INT16_C( 12610), -INT16_C( 2154), INT16_C( 28058), -INT16_C( 25618), INT16_C( 13739), -INT16_C( 31879), INT16_C( 31402), INT16_C( 25864), INT16_C( 32091), -INT16_C( 19540), INT16_C( 5108), INT16_C( 23862), INT16_C( 31940), -INT16_C( 6073), INT16_C( 32063), -INT16_C( 32362), INT16_C( 11694), INT16_C( 18808), INT16_C( 26777), INT16_C( 18146), INT16_C( 23707), INT16_C( 18121) } }, { { INT16_C( 24590), INT16_C( 26595), -INT16_C( 4527), INT16_C( 28503), -INT16_C( 3884), -INT16_C( 31035), INT16_C( 7267), -INT16_C( 9925), -INT16_C( 21919), INT16_C( 22894), INT16_C( 8888), INT16_C( 21692), -INT16_C( 20271), INT16_C( 18108), -INT16_C( 17715), -INT16_C( 9228), -INT16_C( 10470), INT16_C( 27459), -INT16_C( 25915), -INT16_C( 26150), -INT16_C( 24694), -INT16_C( 4577), INT16_C( 23483), INT16_C( 7367), INT16_C( 13573), -INT16_C( 16779), INT16_C( 12631), INT16_C( 10258), -INT16_C( 12575), -INT16_C( 20625), INT16_C( 25480), -INT16_C( 23926) }, { -INT16_C( 12998), INT16_C( 13), -INT16_C( 6296), -INT16_C( 3431), -INT16_C( 18041), INT16_C( 17120), -INT16_C( 22764), INT16_C( 6495), -INT16_C( 11043), INT16_C( 13527), -INT16_C( 5882), -INT16_C( 6307), -INT16_C( 13129), INT16_C( 16278), INT16_C( 8495), INT16_C( 27105), -INT16_C( 4370), INT16_C( 22121), INT16_C( 982), INT16_C( 23881), INT16_C( 10684), -INT16_C( 12129), -INT16_C( 303), -INT16_C( 20759), -INT16_C( 15917), -INT16_C( 9758), INT16_C( 16298), INT16_C( 25280), INT16_C( 22283), INT16_C( 15009), -INT16_C( 31880), INT16_C( 26276) }, { INT16_C( 11592), INT16_C( 26608), -INT16_C( 10823), INT16_C( 25072), -INT16_C( 21925), -INT16_C( 13915), -INT16_C( 15497), -INT16_C( 3430), INT16_C( 32574), -INT16_C( 29115), INT16_C( 3006), INT16_C( 15385), INT16_C( 32136), -INT16_C( 31150), -INT16_C( 9220), INT16_C( 17877), -INT16_C( 14840), -INT16_C( 15956), -INT16_C( 24933), -INT16_C( 2269), -INT16_C( 14010), -INT16_C( 16706), INT16_C( 23180), -INT16_C( 13392), -INT16_C( 2344), -INT16_C( 26537), INT16_C( 28929), -INT16_C( 29998), INT16_C( 9708), -INT16_C( 5616), -INT16_C( 6400), INT16_C( 2350) } }, { { INT16_C( 3441), INT16_C( 18365), INT16_C( 1552), -INT16_C( 13148), INT16_C( 17455), INT16_C( 156), -INT16_C( 31166), INT16_C( 5550), -INT16_C( 28345), -INT16_C( 3602), -INT16_C( 20528), -INT16_C( 9133), -INT16_C( 2810), INT16_C( 32278), -INT16_C( 17800), -INT16_C( 5660), -INT16_C( 24120), -INT16_C( 10191), -INT16_C( 10841), -INT16_C( 10331), INT16_C( 16665), INT16_C( 23767), -INT16_C( 31033), INT16_C( 3697), INT16_C( 24599), -INT16_C( 6400), INT16_C( 21263), INT16_C( 5571), -INT16_C( 9656), -INT16_C( 16237), INT16_C( 30612), INT16_C( 23722) }, { -INT16_C( 9447), -INT16_C( 16331), -INT16_C( 9552), -INT16_C( 13673), INT16_C( 28443), -INT16_C( 7386), -INT16_C( 26635), INT16_C( 3313), -INT16_C( 3593), INT16_C( 1779), -INT16_C( 18619), -INT16_C( 29413), -INT16_C( 20847), INT16_C( 9550), -INT16_C( 2010), INT16_C( 16258), -INT16_C( 18477), -INT16_C( 31745), -INT16_C( 26735), -INT16_C( 21427), INT16_C( 29446), -INT16_C( 1137), -INT16_C( 32501), INT16_C( 519), -INT16_C( 1422), -INT16_C( 18679), INT16_C( 9393), INT16_C( 16965), -INT16_C( 27693), -INT16_C( 1688), -INT16_C( 5493), INT16_C( 24120) }, { -INT16_C( 6006), INT16_C( 2034), -INT16_C( 8000), -INT16_C( 26821), -INT16_C( 19638), -INT16_C( 7230), INT16_C( 7735), INT16_C( 8863), -INT16_C( 31938), -INT16_C( 1823), INT16_C( 26389), INT16_C( 26990), -INT16_C( 23657), -INT16_C( 23708), -INT16_C( 19810), INT16_C( 10598), INT16_C( 22939), INT16_C( 23600), INT16_C( 27960), -INT16_C( 31758), -INT16_C( 19425), INT16_C( 22630), INT16_C( 2002), INT16_C( 4216), INT16_C( 23177), -INT16_C( 25079), INT16_C( 30656), INT16_C( 22536), INT16_C( 28187), -INT16_C( 17925), INT16_C( 25119), -INT16_C( 17694) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_add_epi16(a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_add_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t src[32]; const simde__mmask32 k; const int16_t a[32]; const int16_t b[32]; const int16_t r[32]; } test_vec[] = { { { INT16_C( 19989), INT16_C( 17201), -INT16_C( 484), -INT16_C( 4807), -INT16_C( 18583), INT16_C( 7804), INT16_C( 26728), INT16_C( 16294), -INT16_C( 31636), INT16_C( 28135), -INT16_C( 32158), INT16_C( 20064), -INT16_C( 16695), INT16_C( 3157), INT16_C( 32300), INT16_C( 16873), INT16_C( 6860), -INT16_C( 5755), -INT16_C( 16872), -INT16_C( 32042), INT16_C( 21110), -INT16_C( 8544), INT16_C( 18106), INT16_C( 9757), INT16_C( 1482), INT16_C( 11668), -INT16_C( 2937), INT16_C( 20859), -INT16_C( 12110), -INT16_C( 8611), INT16_C( 17999), INT16_C( 6944) }, UINT32_C(2030347617), { -INT16_C( 25811), -INT16_C( 6217), -INT16_C( 10782), -INT16_C( 21490), -INT16_C( 23846), INT16_C( 25049), INT16_C( 21910), INT16_C( 18610), INT16_C( 4133), INT16_C( 29734), INT16_C( 18006), -INT16_C( 18544), -INT16_C( 27413), INT16_C( 20273), INT16_C( 11375), -INT16_C( 25560), -INT16_C( 7992), -INT16_C( 21885), -INT16_C( 28235), -INT16_C( 28842), INT16_C( 12339), -INT16_C( 13840), -INT16_C( 23675), -INT16_C( 21999), INT16_C( 14515), INT16_C( 2335), -INT16_C( 20610), INT16_C( 27329), -INT16_C( 3517), -INT16_C( 19783), -INT16_C( 7906), -INT16_C( 6578) }, { -INT16_C( 11583), INT16_C( 30352), -INT16_C( 6301), -INT16_C( 26875), -INT16_C( 2537), -INT16_C( 25504), INT16_C( 29337), INT16_C( 19526), INT16_C( 26026), INT16_C( 10325), INT16_C( 5652), INT16_C( 22674), INT16_C( 19208), INT16_C( 9994), INT16_C( 22829), -INT16_C( 4595), -INT16_C( 25045), -INT16_C( 29083), INT16_C( 27269), -INT16_C( 25563), -INT16_C( 31136), -INT16_C( 1736), INT16_C( 32504), -INT16_C( 23995), -INT16_C( 25628), -INT16_C( 1846), INT16_C( 23985), -INT16_C( 17840), INT16_C( 23464), -INT16_C( 10783), -INT16_C( 4428), -INT16_C( 8252) }, { INT16_C( 28142), INT16_C( 17201), -INT16_C( 484), -INT16_C( 4807), -INT16_C( 18583), -INT16_C( 455), -INT16_C( 14289), INT16_C( 16294), INT16_C( 30159), INT16_C( 28135), INT16_C( 23658), INT16_C( 20064), -INT16_C( 16695), INT16_C( 30267), INT16_C( 32300), -INT16_C( 30155), INT16_C( 6860), -INT16_C( 5755), -INT16_C( 966), -INT16_C( 32042), INT16_C( 21110), -INT16_C( 8544), INT16_C( 18106), INT16_C( 9757), -INT16_C( 11113), INT16_C( 11668), -INT16_C( 2937), INT16_C( 9489), INT16_C( 19947), -INT16_C( 30566), -INT16_C( 12334), INT16_C( 6944) } }, { { INT16_C( 10636), INT16_C( 4461), -INT16_C( 27757), -INT16_C( 2899), -INT16_C( 6887), INT16_C( 4589), INT16_C( 13156), INT16_C( 18611), INT16_C( 32206), INT16_C( 32576), -INT16_C( 28198), -INT16_C( 31943), INT16_C( 6892), -INT16_C( 24488), INT16_C( 7177), -INT16_C( 27265), -INT16_C( 5051), -INT16_C( 9817), INT16_C( 21631), -INT16_C( 26419), -INT16_C( 17862), -INT16_C( 24919), INT16_C( 23789), -INT16_C( 17434), INT16_C( 9946), -INT16_C( 19397), INT16_C( 29879), -INT16_C( 23753), -INT16_C( 28529), -INT16_C( 26557), -INT16_C( 15700), -INT16_C( 3539) }, UINT32_C( 785110191), { INT16_C( 28754), INT16_C( 16385), -INT16_C( 6195), -INT16_C( 22533), INT16_C( 13837), -INT16_C( 15013), -INT16_C( 27733), INT16_C( 14952), -INT16_C( 21469), -INT16_C( 12334), -INT16_C( 146), INT16_C( 7617), -INT16_C( 29484), -INT16_C( 692), INT16_C( 4900), INT16_C( 30560), INT16_C( 24963), INT16_C( 20663), -INT16_C( 19896), INT16_C( 22007), INT16_C( 21481), -INT16_C( 27622), -INT16_C( 31770), INT16_C( 2510), -INT16_C( 24529), -INT16_C( 25128), -INT16_C( 25953), INT16_C( 29627), INT16_C( 1830), INT16_C( 19312), -INT16_C( 12262), -INT16_C( 25150) }, { INT16_C( 31025), INT16_C( 31214), -INT16_C( 6869), INT16_C( 5327), -INT16_C( 5832), INT16_C( 7848), INT16_C( 30316), -INT16_C( 25817), INT16_C( 22), -INT16_C( 18887), -INT16_C( 2918), -INT16_C( 16343), -INT16_C( 25861), INT16_C( 5387), -INT16_C( 12950), -INT16_C( 25422), -INT16_C( 24506), INT16_C( 29205), -INT16_C( 7034), -INT16_C( 16762), INT16_C( 12238), INT16_C( 15069), INT16_C( 1189), -INT16_C( 17194), INT16_C( 3844), -INT16_C( 24974), -INT16_C( 25853), -INT16_C( 417), INT16_C( 27189), -INT16_C( 24557), -INT16_C( 15048), INT16_C( 32316) }, { -INT16_C( 5757), -INT16_C( 17937), -INT16_C( 13064), -INT16_C( 17206), -INT16_C( 6887), -INT16_C( 7165), INT16_C( 13156), -INT16_C( 10865), INT16_C( 32206), INT16_C( 32576), -INT16_C( 3064), -INT16_C( 31943), INT16_C( 10191), -INT16_C( 24488), -INT16_C( 8050), INT16_C( 5138), INT16_C( 457), -INT16_C( 15668), INT16_C( 21631), INT16_C( 5245), -INT16_C( 17862), -INT16_C( 24919), -INT16_C( 30581), -INT16_C( 14684), INT16_C( 9946), INT16_C( 15434), INT16_C( 13730), INT16_C( 29210), -INT16_C( 28529), -INT16_C( 5245), -INT16_C( 15700), -INT16_C( 3539) } }, { { INT16_C( 20838), -INT16_C( 4880), INT16_C( 30518), INT16_C( 1194), -INT16_C( 30810), INT16_C( 19262), INT16_C( 5260), -INT16_C( 28665), INT16_C( 31011), INT16_C( 9775), -INT16_C( 29163), INT16_C( 18980), INT16_C( 14328), INT16_C( 12522), INT16_C( 9981), INT16_C( 25519), -INT16_C( 24712), -INT16_C( 20913), -INT16_C( 1770), -INT16_C( 17230), -INT16_C( 3967), INT16_C( 3336), INT16_C( 3845), INT16_C( 10397), -INT16_C( 13175), -INT16_C( 25009), INT16_C( 29530), INT16_C( 21480), -INT16_C( 11349), -INT16_C( 22397), INT16_C( 13049), INT16_C( 28939) }, UINT32_C(3894368978), { -INT16_C( 21054), -INT16_C( 14367), INT16_C( 32700), INT16_C( 17903), INT16_C( 15947), -INT16_C( 22813), -INT16_C( 13134), INT16_C( 24057), INT16_C( 31903), -INT16_C( 26619), INT16_C( 4271), -INT16_C( 32502), INT16_C( 10602), -INT16_C( 17047), INT16_C( 3835), -INT16_C( 17006), INT16_C( 29627), INT16_C( 30852), INT16_C( 29682), INT16_C( 16061), -INT16_C( 24142), INT16_C( 25828), -INT16_C( 8851), INT16_C( 3265), -INT16_C( 14759), INT16_C( 2212), -INT16_C( 20778), INT16_C( 16521), -INT16_C( 3112), -INT16_C( 11267), -INT16_C( 28927), -INT16_C( 17008) }, { INT16_C( 5123), -INT16_C( 2763), -INT16_C( 3449), INT16_C( 14643), INT16_C( 6035), INT16_C( 157), INT16_C( 24308), INT16_C( 19980), -INT16_C( 20188), -INT16_C( 1450), -INT16_C( 8097), INT16_C( 14138), INT16_C( 14547), -INT16_C( 11254), -INT16_C( 25913), -INT16_C( 13679), -INT16_C( 14674), INT16_C( 14016), -INT16_C( 3143), INT16_C( 19567), INT16_C( 3339), -INT16_C( 179), INT16_C( 22891), -INT16_C( 28595), -INT16_C( 23542), INT16_C( 27274), -INT16_C( 14972), INT16_C( 22433), -INT16_C( 21251), -INT16_C( 15317), -INT16_C( 17082), -INT16_C( 2673) }, { INT16_C( 20838), -INT16_C( 17130), INT16_C( 30518), INT16_C( 1194), INT16_C( 21982), INT16_C( 19262), INT16_C( 11174), -INT16_C( 21499), INT16_C( 31011), -INT16_C( 28069), -INT16_C( 29163), -INT16_C( 18364), INT16_C( 25149), INT16_C( 12522), -INT16_C( 22078), INT16_C( 25519), INT16_C( 14953), -INT16_C( 20668), INT16_C( 26539), -INT16_C( 29908), -INT16_C( 20803), INT16_C( 3336), INT16_C( 3845), INT16_C( 10397), -INT16_C( 13175), -INT16_C( 25009), INT16_C( 29530), -INT16_C( 26582), -INT16_C( 11349), -INT16_C( 26584), INT16_C( 19527), -INT16_C( 19681) } }, { { INT16_C( 20355), INT16_C( 15403), -INT16_C( 26046), INT16_C( 19849), -INT16_C( 10585), INT16_C( 4941), -INT16_C( 26065), INT16_C( 15011), INT16_C( 11582), -INT16_C( 15708), INT16_C( 17906), -INT16_C( 4327), INT16_C( 17905), INT16_C( 14516), INT16_C( 17154), -INT16_C( 31443), INT16_C( 22674), -INT16_C( 11070), INT16_C( 19442), -INT16_C( 26078), INT16_C( 28449), INT16_C( 20653), INT16_C( 20489), INT16_C( 18570), INT16_C( 11901), INT16_C( 28682), INT16_C( 9332), INT16_C( 25951), INT16_C( 4969), INT16_C( 27549), -INT16_C( 13738), -INT16_C( 5904) }, UINT32_C( 364753442), { INT16_C( 23630), INT16_C( 22383), -INT16_C( 1620), INT16_C( 10655), -INT16_C( 21976), -INT16_C( 25447), -INT16_C( 1586), INT16_C( 14081), -INT16_C( 24820), INT16_C( 25506), -INT16_C( 28055), -INT16_C( 29621), INT16_C( 2117), INT16_C( 17057), INT16_C( 20711), INT16_C( 13665), -INT16_C( 12116), INT16_C( 22669), INT16_C( 11465), -INT16_C( 3711), INT16_C( 7126), -INT16_C( 23411), -INT16_C( 28908), INT16_C( 8411), INT16_C( 32046), -INT16_C( 26749), -INT16_C( 12528), INT16_C( 21795), -INT16_C( 15145), -INT16_C( 16489), -INT16_C( 2028), -INT16_C( 16140) }, { -INT16_C( 32312), -INT16_C( 28136), -INT16_C( 25938), -INT16_C( 31613), INT16_C( 4533), -INT16_C( 14039), INT16_C( 1184), -INT16_C( 12567), INT16_C( 28034), -INT16_C( 28059), -INT16_C( 30404), INT16_C( 5095), INT16_C( 32333), INT16_C( 25298), -INT16_C( 14473), INT16_C( 16162), INT16_C( 15176), -INT16_C( 2351), INT16_C( 21973), -INT16_C( 30085), -INT16_C( 23450), INT16_C( 1619), INT16_C( 15528), INT16_C( 10964), INT16_C( 14761), -INT16_C( 6724), -INT16_C( 23614), INT16_C( 4345), -INT16_C( 13534), -INT16_C( 26254), -INT16_C( 27502), -INT16_C( 9256) }, { INT16_C( 20355), -INT16_C( 5753), -INT16_C( 26046), INT16_C( 19849), -INT16_C( 10585), INT16_C( 26050), -INT16_C( 26065), INT16_C( 15011), INT16_C( 11582), -INT16_C( 2553), INT16_C( 17906), -INT16_C( 4327), -INT16_C( 31086), -INT16_C( 23181), INT16_C( 17154), INT16_C( 29827), INT16_C( 3060), -INT16_C( 11070), -INT16_C( 32098), INT16_C( 31740), -INT16_C( 16324), -INT16_C( 21792), INT16_C( 20489), INT16_C( 19375), -INT16_C( 18729), INT16_C( 28682), INT16_C( 29394), INT16_C( 25951), -INT16_C( 28679), INT16_C( 27549), -INT16_C( 13738), -INT16_C( 5904) } }, { { -INT16_C( 21809), -INT16_C( 23343), INT16_C( 19711), INT16_C( 25902), -INT16_C( 32272), -INT16_C( 26261), INT16_C( 16318), INT16_C( 26563), -INT16_C( 32648), INT16_C( 15181), INT16_C( 17955), INT16_C( 17739), -INT16_C( 17135), -INT16_C( 23330), -INT16_C( 18607), INT16_C( 8575), INT16_C( 20577), INT16_C( 24773), -INT16_C( 2915), -INT16_C( 29243), INT16_C( 12405), INT16_C( 13094), -INT16_C( 5521), -INT16_C( 6245), -INT16_C( 6038), -INT16_C( 29406), INT16_C( 27950), INT16_C( 16339), -INT16_C( 20182), INT16_C( 31971), INT16_C( 25192), -INT16_C( 13923) }, UINT32_C(1344889523), { INT16_C( 1054), -INT16_C( 29185), -INT16_C( 25874), INT16_C( 22645), -INT16_C( 26750), -INT16_C( 20251), -INT16_C( 18427), INT16_C( 12272), -INT16_C( 11414), -INT16_C( 11605), INT16_C( 18486), -INT16_C( 5732), -INT16_C( 14933), INT16_C( 313), INT16_C( 5812), -INT16_C( 11571), -INT16_C( 13030), INT16_C( 2144), -INT16_C( 10905), -INT16_C( 5536), INT16_C( 18028), INT16_C( 29082), -INT16_C( 29954), INT16_C( 26785), INT16_C( 19550), -INT16_C( 27589), -INT16_C( 10347), INT16_C( 16509), -INT16_C( 18788), INT16_C( 20545), INT16_C( 4044), -INT16_C( 6365) }, { -INT16_C( 31780), INT16_C( 17391), INT16_C( 20568), -INT16_C( 15315), -INT16_C( 14186), -INT16_C( 27594), -INT16_C( 10414), -INT16_C( 20227), INT16_C( 14371), -INT16_C( 18364), -INT16_C( 16113), -INT16_C( 21512), INT16_C( 14967), INT16_C( 17660), INT16_C( 8009), INT16_C( 9515), INT16_C( 6818), -INT16_C( 1432), -INT16_C( 27030), INT16_C( 190), -INT16_C( 2978), -INT16_C( 20331), -INT16_C( 27957), -INT16_C( 4255), -INT16_C( 23094), -INT16_C( 9817), -INT16_C( 24473), -INT16_C( 8572), -INT16_C( 32550), INT16_C( 8994), INT16_C( 19871), INT16_C( 16712) }, { -INT16_C( 30726), -INT16_C( 11794), INT16_C( 19711), INT16_C( 25902), INT16_C( 24600), INT16_C( 17691), INT16_C( 16318), -INT16_C( 7955), -INT16_C( 32648), -INT16_C( 29969), INT16_C( 17955), INT16_C( 17739), -INT16_C( 17135), INT16_C( 17973), INT16_C( 13821), INT16_C( 8575), -INT16_C( 6212), INT16_C( 24773), -INT16_C( 2915), -INT16_C( 5346), INT16_C( 12405), INT16_C( 8751), -INT16_C( 5521), -INT16_C( 6245), -INT16_C( 6038), -INT16_C( 29406), INT16_C( 27950), INT16_C( 16339), INT16_C( 14198), INT16_C( 31971), INT16_C( 23915), -INT16_C( 13923) } }, { { -INT16_C( 20376), -INT16_C( 11717), -INT16_C( 1466), -INT16_C( 23341), INT16_C( 26862), -INT16_C( 17835), -INT16_C( 18694), -INT16_C( 15191), INT16_C( 20571), -INT16_C( 15715), INT16_C( 8688), -INT16_C( 13663), -INT16_C( 15454), INT16_C( 16877), INT16_C( 13585), INT16_C( 31107), -INT16_C( 16666), INT16_C( 11339), INT16_C( 7864), -INT16_C( 22575), INT16_C( 9862), -INT16_C( 32671), INT16_C( 2780), INT16_C( 14148), -INT16_C( 7846), INT16_C( 19450), -INT16_C( 25853), -INT16_C( 23275), INT16_C( 862), INT16_C( 28646), INT16_C( 26936), INT16_C( 7912) }, UINT32_C(3763024936), { -INT16_C( 6078), INT16_C( 7769), -INT16_C( 24846), INT16_C( 19797), INT16_C( 20351), -INT16_C( 32104), -INT16_C( 21014), INT16_C( 18727), INT16_C( 3760), -INT16_C( 5704), -INT16_C( 24201), -INT16_C( 24825), INT16_C( 21205), INT16_C( 10112), INT16_C( 1902), -INT16_C( 20480), INT16_C( 23280), -INT16_C( 7474), INT16_C( 9464), INT16_C( 30511), -INT16_C( 14477), INT16_C( 24314), INT16_C( 8565), INT16_C( 9639), INT16_C( 24367), -INT16_C( 22770), INT16_C( 5632), -INT16_C( 10938), -INT16_C( 14744), -INT16_C( 10243), -INT16_C( 562), -INT16_C( 16761) }, { INT16_C( 22103), INT16_C( 20384), -INT16_C( 12166), -INT16_C( 4665), -INT16_C( 15977), INT16_C( 3147), -INT16_C( 3358), INT16_C( 4658), INT16_C( 16466), INT16_C( 21177), -INT16_C( 170), -INT16_C( 16600), INT16_C( 9670), -INT16_C( 27498), INT16_C( 7458), INT16_C( 31314), -INT16_C( 3469), -INT16_C( 4663), -INT16_C( 28478), INT16_C( 23259), INT16_C( 9809), INT16_C( 13414), -INT16_C( 26599), INT16_C( 27462), -INT16_C( 39), INT16_C( 12221), -INT16_C( 6658), -INT16_C( 15122), -INT16_C( 31734), INT16_C( 11608), -INT16_C( 21854), INT16_C( 5543) }, { -INT16_C( 20376), -INT16_C( 11717), -INT16_C( 1466), INT16_C( 15132), INT16_C( 26862), -INT16_C( 28957), -INT16_C( 18694), -INT16_C( 15191), INT16_C( 20571), -INT16_C( 15715), -INT16_C( 24371), -INT16_C( 13663), INT16_C( 30875), -INT16_C( 17386), INT16_C( 13585), INT16_C( 31107), INT16_C( 19811), -INT16_C( 12137), INT16_C( 7864), -INT16_C( 11766), INT16_C( 9862), -INT16_C( 32671), -INT16_C( 18034), INT16_C( 14148), -INT16_C( 7846), INT16_C( 19450), -INT16_C( 25853), -INT16_C( 23275), INT16_C( 862), INT16_C( 1365), -INT16_C( 22416), -INT16_C( 11218) } }, { { INT16_C( 28829), INT16_C( 24323), -INT16_C( 8703), INT16_C( 21177), INT16_C( 8196), INT16_C( 7558), -INT16_C( 13128), -INT16_C( 28280), INT16_C( 18123), -INT16_C( 13631), -INT16_C( 20693), INT16_C( 13966), -INT16_C( 6348), -INT16_C( 10653), INT16_C( 2705), INT16_C( 12011), -INT16_C( 4486), INT16_C( 31630), INT16_C( 18380), -INT16_C( 11826), INT16_C( 21607), INT16_C( 8430), INT16_C( 30497), -INT16_C( 4943), INT16_C( 29373), -INT16_C( 5962), INT16_C( 17698), INT16_C( 22046), -INT16_C( 32468), -INT16_C( 17108), INT16_C( 6027), INT16_C( 1772) }, UINT32_C(3531700742), { -INT16_C( 27996), -INT16_C( 15031), -INT16_C( 1527), -INT16_C( 14671), INT16_C( 26733), -INT16_C( 28754), -INT16_C( 12883), -INT16_C( 9755), INT16_C( 4430), -INT16_C( 9578), -INT16_C( 32216), INT16_C( 12000), INT16_C( 25084), -INT16_C( 16895), -INT16_C( 23375), INT16_C( 21991), INT16_C( 12342), INT16_C( 16154), -INT16_C( 13526), -INT16_C( 26875), -INT16_C( 19405), -INT16_C( 8154), INT16_C( 2945), -INT16_C( 12359), INT16_C( 20508), INT16_C( 17833), -INT16_C( 30254), -INT16_C( 12429), INT16_C( 29931), -INT16_C( 25459), INT16_C( 29721), INT16_C( 20465) }, { INT16_C( 2980), -INT16_C( 12657), -INT16_C( 27434), INT16_C( 2662), -INT16_C( 29624), -INT16_C( 13846), -INT16_C( 23400), -INT16_C( 19303), INT16_C( 17140), -INT16_C( 14599), INT16_C( 28108), -INT16_C( 18539), INT16_C( 8929), -INT16_C( 1453), INT16_C( 17558), INT16_C( 14922), -INT16_C( 9905), INT16_C( 9481), INT16_C( 28525), -INT16_C( 18897), INT16_C( 6907), -INT16_C( 27777), INT16_C( 6334), -INT16_C( 19896), INT16_C( 16731), INT16_C( 10104), INT16_C( 3758), -INT16_C( 28450), INT16_C( 12592), -INT16_C( 14454), -INT16_C( 11147), -INT16_C( 15359) }, { INT16_C( 28829), -INT16_C( 27688), -INT16_C( 28961), INT16_C( 21177), INT16_C( 8196), INT16_C( 7558), -INT16_C( 13128), -INT16_C( 28280), INT16_C( 18123), -INT16_C( 24177), -INT16_C( 20693), -INT16_C( 6539), -INT16_C( 31523), -INT16_C( 18348), -INT16_C( 5817), INT16_C( 12011), INT16_C( 2437), INT16_C( 31630), INT16_C( 18380), -INT16_C( 11826), INT16_C( 21607), INT16_C( 8430), INT16_C( 30497), -INT16_C( 32255), INT16_C( 29373), INT16_C( 27937), INT16_C( 17698), INT16_C( 22046), -INT16_C( 23013), -INT16_C( 17108), INT16_C( 18574), INT16_C( 5106) } }, { { INT16_C( 2733), INT16_C( 7145), INT16_C( 6521), INT16_C( 30161), INT16_C( 20531), -INT16_C( 3832), INT16_C( 20585), -INT16_C( 15197), INT16_C( 7058), INT16_C( 16619), -INT16_C( 14039), INT16_C( 23248), INT16_C( 23546), INT16_C( 28449), INT16_C( 8751), -INT16_C( 8909), INT16_C( 7213), -INT16_C( 22792), -INT16_C( 14027), INT16_C( 26651), INT16_C( 9241), -INT16_C( 32167), -INT16_C( 908), INT16_C( 1606), INT16_C( 12568), INT16_C( 16711), INT16_C( 6138), -INT16_C( 2917), -INT16_C( 17294), -INT16_C( 23965), -INT16_C( 26913), INT16_C( 3199) }, UINT32_C(3904010163), { -INT16_C( 21774), INT16_C( 26332), INT16_C( 8871), -INT16_C( 16531), -INT16_C( 19372), INT16_C( 19968), -INT16_C( 25397), INT16_C( 15939), -INT16_C( 22952), INT16_C( 14304), INT16_C( 24381), -INT16_C( 4029), -INT16_C( 2346), INT16_C( 5848), INT16_C( 10692), -INT16_C( 18833), INT16_C( 19412), INT16_C( 31516), -INT16_C( 30354), -INT16_C( 15814), INT16_C( 14909), INT16_C( 2320), INT16_C( 21462), INT16_C( 12103), INT16_C( 10234), INT16_C( 14182), -INT16_C( 21882), INT16_C( 23591), -INT16_C( 96), INT16_C( 25714), -INT16_C( 7895), -INT16_C( 742) }, { INT16_C( 13869), -INT16_C( 25736), -INT16_C( 19776), -INT16_C( 675), INT16_C( 28140), -INT16_C( 15610), INT16_C( 19905), -INT16_C( 17422), INT16_C( 22644), -INT16_C( 1294), INT16_C( 6402), -INT16_C( 23978), -INT16_C( 14312), INT16_C( 16646), INT16_C( 8362), -INT16_C( 10434), -INT16_C( 18857), INT16_C( 6002), -INT16_C( 12440), INT16_C( 21780), INT16_C( 6972), -INT16_C( 744), INT16_C( 2664), -INT16_C( 8776), -INT16_C( 21918), INT16_C( 26071), INT16_C( 11971), -INT16_C( 9209), INT16_C( 3830), -INT16_C( 24547), INT16_C( 23598), -INT16_C( 31369) }, { -INT16_C( 7905), INT16_C( 596), INT16_C( 6521), INT16_C( 30161), INT16_C( 8768), INT16_C( 4358), INT16_C( 20585), -INT16_C( 1483), -INT16_C( 308), INT16_C( 13010), INT16_C( 30783), INT16_C( 23248), -INT16_C( 16658), INT16_C( 22494), INT16_C( 19054), -INT16_C( 8909), INT16_C( 7213), -INT16_C( 28018), -INT16_C( 14027), INT16_C( 26651), INT16_C( 21881), INT16_C( 1576), -INT16_C( 908), INT16_C( 3327), INT16_C( 12568), INT16_C( 16711), INT16_C( 6138), INT16_C( 14382), -INT16_C( 17294), INT16_C( 1167), INT16_C( 15703), -INT16_C( 32111) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi16(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_mask_add_epi16(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_add_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask32 k; const int16_t a[32]; const int16_t b[32]; const int16_t r[32]; } test_vec[] = { { UINT32_C(1970569674), { -INT16_C( 4224), -INT16_C( 11503), -INT16_C( 18041), INT16_C( 6436), -INT16_C( 9270), -INT16_C( 15814), -INT16_C( 27569), INT16_C( 13042), -INT16_C( 23737), -INT16_C( 13044), INT16_C( 7220), -INT16_C( 316), INT16_C( 14494), -INT16_C( 908), INT16_C( 27220), -INT16_C( 11118), -INT16_C( 23719), -INT16_C( 8025), -INT16_C( 13476), INT16_C( 9977), INT16_C( 13479), -INT16_C( 2328), -INT16_C( 9528), INT16_C( 3881), INT16_C( 13693), -INT16_C( 19747), -INT16_C( 24239), -INT16_C( 4176), INT16_C( 9433), INT16_C( 11756), INT16_C( 32398), -INT16_C( 6143) }, { -INT16_C( 22239), INT16_C( 32200), -INT16_C( 15756), INT16_C( 7075), -INT16_C( 29450), -INT16_C( 16878), INT16_C( 15206), -INT16_C( 6962), -INT16_C( 21648), -INT16_C( 15978), INT16_C( 17996), INT16_C( 9649), -INT16_C( 25237), -INT16_C( 1709), INT16_C( 21531), INT16_C( 15585), -INT16_C( 21763), INT16_C( 29369), INT16_C( 23660), INT16_C( 25229), -INT16_C( 24600), INT16_C( 20256), -INT16_C( 4390), INT16_C( 18995), -INT16_C( 13927), -INT16_C( 6900), -INT16_C( 17137), INT16_C( 31243), INT16_C( 24154), INT16_C( 30068), INT16_C( 21938), -INT16_C( 20303) }, { INT16_C( 0), INT16_C( 20697), INT16_C( 0), INT16_C( 13511), INT16_C( 0), INT16_C( 0), -INT16_C( 12363), INT16_C( 6080), INT16_C( 20151), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 4467), INT16_C( 0), INT16_C( 0), INT16_C( 10184), INT16_C( 0), -INT16_C( 11121), INT16_C( 17928), -INT16_C( 13918), INT16_C( 0), -INT16_C( 234), INT16_C( 0), INT16_C( 24160), INT16_C( 0), -INT16_C( 31949), -INT16_C( 23712), -INT16_C( 11200), INT16_C( 0) } }, { UINT32_C(1797417727), { -INT16_C( 4529), INT16_C( 10750), INT16_C( 12764), INT16_C( 30324), -INT16_C( 32518), INT16_C( 2395), INT16_C( 26173), -INT16_C( 26748), -INT16_C( 1852), INT16_C( 30476), -INT16_C( 17075), INT16_C( 19751), INT16_C( 18727), -INT16_C( 4680), -INT16_C( 30984), INT16_C( 18332), -INT16_C( 25996), INT16_C( 20593), -INT16_C( 6709), -INT16_C( 14906), INT16_C( 8805), -INT16_C( 23857), INT16_C( 21384), INT16_C( 19769), INT16_C( 17739), -INT16_C( 26428), -INT16_C( 5374), INT16_C( 10725), -INT16_C( 25036), INT16_C( 11286), -INT16_C( 19676), -INT16_C( 26508) }, { -INT16_C( 6835), INT16_C( 6632), -INT16_C( 20534), INT16_C( 12254), -INT16_C( 21039), INT16_C( 22993), INT16_C( 2560), INT16_C( 19366), INT16_C( 27215), INT16_C( 20964), -INT16_C( 13995), -INT16_C( 30342), -INT16_C( 28569), -INT16_C( 29770), INT16_C( 10819), -INT16_C( 28381), INT16_C( 3087), -INT16_C( 9814), -INT16_C( 30533), -INT16_C( 29688), -INT16_C( 9930), INT16_C( 14053), -INT16_C( 29469), INT16_C( 12930), INT16_C( 26358), INT16_C( 19587), -INT16_C( 721), -INT16_C( 26667), -INT16_C( 29811), -INT16_C( 11998), INT16_C( 18101), -INT16_C( 15262) }, { -INT16_C( 11364), INT16_C( 17382), -INT16_C( 7770), -INT16_C( 22958), INT16_C( 11979), INT16_C( 25388), INT16_C( 28733), -INT16_C( 7382), INT16_C( 0), -INT16_C( 14096), INT16_C( 0), -INT16_C( 10591), INT16_C( 0), INT16_C( 31086), -INT16_C( 20165), INT16_C( 0), INT16_C( 0), INT16_C( 10779), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 9804), INT16_C( 0), INT16_C( 0), -INT16_C( 21439), -INT16_C( 6841), INT16_C( 0), -INT16_C( 15942), INT16_C( 0), -INT16_C( 712), -INT16_C( 1575), INT16_C( 0) } }, { UINT32_C( 228396114), { INT16_C( 32382), INT16_C( 24833), -INT16_C( 31990), INT16_C( 403), INT16_C( 5865), INT16_C( 6221), INT16_C( 8723), -INT16_C( 24145), -INT16_C( 11602), INT16_C( 25458), -INT16_C( 11240), INT16_C( 27176), -INT16_C( 14880), INT16_C( 29815), INT16_C( 4203), -INT16_C( 5825), INT16_C( 16526), -INT16_C( 26293), -INT16_C( 8509), -INT16_C( 21350), -INT16_C( 6155), INT16_C( 2244), INT16_C( 29705), -INT16_C( 18519), INT16_C( 6982), INT16_C( 24091), INT16_C( 17391), -INT16_C( 12344), INT16_C( 16136), INT16_C( 29508), -INT16_C( 31921), -INT16_C( 8867) }, { -INT16_C( 22333), -INT16_C( 31114), INT16_C( 4230), INT16_C( 31538), -INT16_C( 2313), INT16_C( 388), INT16_C( 11626), -INT16_C( 20296), -INT16_C( 11447), INT16_C( 14350), -INT16_C( 10730), INT16_C( 7944), INT16_C( 19477), INT16_C( 25746), -INT16_C( 4145), -INT16_C( 28094), -INT16_C( 18281), INT16_C( 7704), INT16_C( 19145), -INT16_C( 16231), INT16_C( 7488), -INT16_C( 21567), INT16_C( 31307), -INT16_C( 27557), INT16_C( 27213), INT16_C( 25804), -INT16_C( 11200), INT16_C( 22147), INT16_C( 5408), -INT16_C( 4166), -INT16_C( 1019), -INT16_C( 25471) }, { INT16_C( 0), -INT16_C( 6281), INT16_C( 0), INT16_C( 0), INT16_C( 3552), INT16_C( 0), INT16_C( 20349), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 21970), -INT16_C( 30416), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1755), INT16_C( 0), INT16_C( 10636), INT16_C( 27955), INT16_C( 1333), INT16_C( 0), INT16_C( 0), INT16_C( 19460), -INT16_C( 31341), INT16_C( 0), INT16_C( 6191), INT16_C( 9803), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { UINT32_C(2126158261), { INT16_C( 113), -INT16_C( 17201), INT16_C( 10874), -INT16_C( 14512), INT16_C( 7572), -INT16_C( 10965), -INT16_C( 20751), INT16_C( 4651), -INT16_C( 6716), -INT16_C( 14079), -INT16_C( 31774), -INT16_C( 26779), INT16_C( 8220), INT16_C( 21), INT16_C( 21364), -INT16_C( 6876), -INT16_C( 3245), -INT16_C( 12894), -INT16_C( 3555), -INT16_C( 19819), -INT16_C( 16369), INT16_C( 391), -INT16_C( 19857), INT16_C( 13075), INT16_C( 5271), INT16_C( 31228), INT16_C( 24983), -INT16_C( 19440), INT16_C( 9601), -INT16_C( 2636), -INT16_C( 10119), -INT16_C( 13093) }, { INT16_C( 32203), -INT16_C( 5990), INT16_C( 12143), INT16_C( 32666), INT16_C( 8687), INT16_C( 24192), -INT16_C( 27693), INT16_C( 27537), -INT16_C( 29273), INT16_C( 16356), -INT16_C( 2577), INT16_C( 28915), -INT16_C( 22758), -INT16_C( 27802), INT16_C( 16767), INT16_C( 19040), -INT16_C( 1346), INT16_C( 11570), -INT16_C( 13015), INT16_C( 6316), INT16_C( 11502), -INT16_C( 15753), INT16_C( 2239), INT16_C( 26413), INT16_C( 4502), -INT16_C( 31322), -INT16_C( 26362), INT16_C( 8693), INT16_C( 23360), -INT16_C( 16460), INT16_C( 5276), INT16_C( 23049) }, { INT16_C( 32316), INT16_C( 0), INT16_C( 23017), INT16_C( 0), INT16_C( 16259), INT16_C( 13227), INT16_C( 0), INT16_C( 32188), INT16_C( 29547), INT16_C( 0), INT16_C( 0), INT16_C( 2136), -INT16_C( 14538), INT16_C( 0), INT16_C( 0), INT16_C( 12164), INT16_C( 0), -INT16_C( 1324), INT16_C( 0), -INT16_C( 13503), -INT16_C( 4867), -INT16_C( 15362), INT16_C( 0), -INT16_C( 26048), INT16_C( 0), -INT16_C( 94), -INT16_C( 1379), -INT16_C( 10747), -INT16_C( 32575), -INT16_C( 19096), -INT16_C( 4843), INT16_C( 0) } }, { UINT32_C( 931674894), { -INT16_C( 14495), INT16_C( 8377), -INT16_C( 6449), INT16_C( 25991), INT16_C( 11767), -INT16_C( 278), -INT16_C( 7994), INT16_C( 1567), -INT16_C( 11461), -INT16_C( 10043), -INT16_C( 12568), -INT16_C( 2510), -INT16_C( 17910), INT16_C( 4654), INT16_C( 32495), INT16_C( 20489), -INT16_C( 15803), INT16_C( 5232), -INT16_C( 1880), -INT16_C( 24454), INT16_C( 25637), -INT16_C( 4962), -INT16_C( 17084), -INT16_C( 32526), -INT16_C( 18288), INT16_C( 30808), -INT16_C( 30074), -INT16_C( 28561), -INT16_C( 25275), INT16_C( 13475), -INT16_C( 21477), INT16_C( 24708) }, { -INT16_C( 2961), INT16_C( 6004), -INT16_C( 4372), INT16_C( 4791), INT16_C( 21843), -INT16_C( 26626), -INT16_C( 4078), -INT16_C( 23785), INT16_C( 28584), INT16_C( 12059), -INT16_C( 29958), INT16_C( 16319), INT16_C( 25127), INT16_C( 17011), -INT16_C( 2289), INT16_C( 32418), INT16_C( 6123), -INT16_C( 10091), INT16_C( 19717), INT16_C( 22762), -INT16_C( 5982), -INT16_C( 18960), INT16_C( 2008), -INT16_C( 32424), INT16_C( 29559), INT16_C( 29104), INT16_C( 28670), INT16_C( 9648), INT16_C( 9170), -INT16_C( 7832), INT16_C( 2586), INT16_C( 1375) }, { INT16_C( 0), INT16_C( 14381), -INT16_C( 10821), INT16_C( 30782), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 17123), INT16_C( 2016), INT16_C( 0), INT16_C( 13809), INT16_C( 7217), INT16_C( 21665), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1692), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 586), INT16_C( 11271), -INT16_C( 5624), -INT16_C( 1404), INT16_C( 0), -INT16_C( 16105), INT16_C( 5643), INT16_C( 0), INT16_C( 0) } }, { UINT32_C( 668857377), { INT16_C( 28591), -INT16_C( 30567), -INT16_C( 3721), -INT16_C( 4599), -INT16_C( 18076), INT16_C( 25183), INT16_C( 3880), -INT16_C( 1400), -INT16_C( 4046), INT16_C( 19675), INT16_C( 15098), INT16_C( 7249), INT16_C( 12079), INT16_C( 28739), -INT16_C( 15626), -INT16_C( 22956), -INT16_C( 4814), -INT16_C( 22226), INT16_C( 14302), INT16_C( 17303), -INT16_C( 2320), INT16_C( 6309), INT16_C( 11525), INT16_C( 14099), -INT16_C( 4579), INT16_C( 6275), -INT16_C( 11223), INT16_C( 22580), INT16_C( 30467), -INT16_C( 1336), INT16_C( 7481), INT16_C( 27552) }, { -INT16_C( 12790), -INT16_C( 5868), -INT16_C( 21755), -INT16_C( 2772), -INT16_C( 11871), -INT16_C( 23027), INT16_C( 8447), INT16_C( 7389), INT16_C( 24591), INT16_C( 14388), INT16_C( 26677), INT16_C( 14480), INT16_C( 22751), INT16_C( 6450), -INT16_C( 11659), -INT16_C( 32636), -INT16_C( 26208), -INT16_C( 23191), -INT16_C( 27324), -INT16_C( 6502), -INT16_C( 22426), INT16_C( 25996), INT16_C( 27336), -INT16_C( 10366), -INT16_C( 18742), -INT16_C( 241), -INT16_C( 24801), -INT16_C( 456), INT16_C( 27384), INT16_C( 27927), -INT16_C( 25539), -INT16_C( 8723) }, { INT16_C( 15801), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 2156), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 23761), INT16_C( 0), -INT16_C( 30706), -INT16_C( 30347), -INT16_C( 27285), INT16_C( 9944), -INT16_C( 31022), INT16_C( 0), -INT16_C( 13022), INT16_C( 10801), -INT16_C( 24746), INT16_C( 0), -INT16_C( 26675), INT16_C( 3733), -INT16_C( 23321), INT16_C( 6034), INT16_C( 29512), INT16_C( 0), INT16_C( 0), INT16_C( 26591), INT16_C( 0), INT16_C( 0) } }, { UINT32_C(2038650421), { -INT16_C( 4923), -INT16_C( 29001), INT16_C( 14678), INT16_C( 8293), INT16_C( 30192), INT16_C( 3872), INT16_C( 22548), INT16_C( 3085), INT16_C( 9666), -INT16_C( 134), INT16_C( 26561), -INT16_C( 2339), INT16_C( 24766), -INT16_C( 22161), -INT16_C( 12419), INT16_C( 17403), -INT16_C( 19525), INT16_C( 4561), INT16_C( 14060), -INT16_C( 9167), INT16_C( 20907), -INT16_C( 16149), -INT16_C( 1623), INT16_C( 27852), INT16_C( 17950), -INT16_C( 8341), INT16_C( 18606), INT16_C( 27861), INT16_C( 17576), INT16_C( 9749), INT16_C( 4371), -INT16_C( 12695) }, { INT16_C( 15044), -INT16_C( 20257), INT16_C( 4464), INT16_C( 7309), INT16_C( 30818), INT16_C( 3292), -INT16_C( 22415), -INT16_C( 28808), -INT16_C( 7185), -INT16_C( 25234), INT16_C( 17196), -INT16_C( 11255), INT16_C( 7816), -INT16_C( 25606), INT16_C( 25391), -INT16_C( 3222), INT16_C( 18845), INT16_C( 3748), INT16_C( 12634), -INT16_C( 17110), INT16_C( 1705), INT16_C( 7113), INT16_C( 16814), -INT16_C( 25174), INT16_C( 6436), INT16_C( 20538), INT16_C( 17244), -INT16_C( 7131), INT16_C( 8034), -INT16_C( 28288), -INT16_C( 5501), INT16_C( 8325) }, { INT16_C( 10121), INT16_C( 0), INT16_C( 19142), INT16_C( 0), -INT16_C( 4526), INT16_C( 7164), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 25368), -INT16_C( 21779), INT16_C( 0), INT16_C( 32582), INT16_C( 0), INT16_C( 12972), INT16_C( 0), -INT16_C( 680), INT16_C( 8309), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 2678), INT16_C( 24386), INT16_C( 0), INT16_C( 0), INT16_C( 20730), INT16_C( 25610), -INT16_C( 18539), -INT16_C( 1130), INT16_C( 0) } }, { UINT32_C(2385389875), { INT16_C( 5214), INT16_C( 3358), -INT16_C( 13995), INT16_C( 31146), -INT16_C( 6686), INT16_C( 16074), -INT16_C( 4312), -INT16_C( 30173), -INT16_C( 23794), -INT16_C( 28388), -INT16_C( 24179), -INT16_C( 16206), -INT16_C( 7990), INT16_C( 9294), -INT16_C( 26311), -INT16_C( 26841), INT16_C( 18093), INT16_C( 676), INT16_C( 20239), -INT16_C( 3716), INT16_C( 17972), INT16_C( 23599), INT16_C( 21045), INT16_C( 17383), INT16_C( 1013), -INT16_C( 32043), -INT16_C( 30812), INT16_C( 28227), -INT16_C( 28313), -INT16_C( 24430), -INT16_C( 18133), -INT16_C( 10184) }, { -INT16_C( 8961), INT16_C( 3803), INT16_C( 22315), INT16_C( 24575), INT16_C( 12189), -INT16_C( 11588), -INT16_C( 23679), INT16_C( 30485), -INT16_C( 5466), INT16_C( 19193), INT16_C( 15473), -INT16_C( 9800), INT16_C( 19150), -INT16_C( 1671), -INT16_C( 20221), INT16_C( 977), -INT16_C( 21362), -INT16_C( 18159), INT16_C( 4355), -INT16_C( 24551), -INT16_C( 10944), -INT16_C( 16014), -INT16_C( 30600), INT16_C( 7736), INT16_C( 12914), -INT16_C( 7064), INT16_C( 8302), INT16_C( 15549), INT16_C( 13930), INT16_C( 27957), INT16_C( 2024), INT16_C( 30320) }, { -INT16_C( 3747), INT16_C( 7161), INT16_C( 0), INT16_C( 0), INT16_C( 5503), INT16_C( 4486), INT16_C( 0), INT16_C( 0), -INT16_C( 29260), INT16_C( 0), INT16_C( 0), -INT16_C( 26006), INT16_C( 0), INT16_C( 7623), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 17483), INT16_C( 24594), -INT16_C( 28267), INT16_C( 0), INT16_C( 7585), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 26429), -INT16_C( 22510), -INT16_C( 21760), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 20136) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_maskz_add_epi16(test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_add_epi32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[16]; int32_t b[16]; int32_t r[16]; } test_vec[] = { { { -INT32_C( 2023322181), -INT32_C( 153083711), INT32_C( 1496228679), -INT32_C( 1879098037), INT32_C( 556222349), -INT32_C( 1406744359), INT32_C( 328840924), INT32_C( 558351974), INT32_C( 363441491), INT32_C( 353077710), INT32_C( 2003712300), INT32_C( 50752886), -INT32_C( 1926943052), -INT32_C( 767996938), INT32_C( 1759891970), -INT32_C( 326488680) }, { -INT32_C( 1476316198), -INT32_C( 390198084), -INT32_C( 1151325115), INT32_C( 1321166490), -INT32_C( 1965235052), INT32_C( 828118319), -INT32_C( 2137373976), INT32_C( 1231823983), INT32_C( 334654807), -INT32_C( 1090801543), INT32_C( 1987730396), INT32_C( 1455765954), INT32_C( 1289855261), -INT32_C( 1635893834), -INT32_C( 299952001), -INT32_C( 1825010884) }, { INT32_C( 795328917), -INT32_C( 543281795), INT32_C( 344903564), -INT32_C( 557931547), -INT32_C( 1409012703), -INT32_C( 578626040), -INT32_C( 1808533052), INT32_C( 1790175957), INT32_C( 698096298), -INT32_C( 737723833), -INT32_C( 303524600), INT32_C( 1506518840), -INT32_C( 637087791), INT32_C( 1891076524), INT32_C( 1459939969), INT32_C( 2143467732) } }, { { INT32_C( 1923492601), -INT32_C( 1238261286), -INT32_C( 1087525891), INT32_C( 18215652), INT32_C( 1229846163), -INT32_C( 1276589260), INT32_C( 530712547), -INT32_C( 1951212910), -INT32_C( 553756668), -INT32_C( 141283334), -INT32_C( 1095319078), INT32_C( 1186974643), -INT32_C( 158331710), -INT32_C( 1146521384), INT32_C( 299584383), INT32_C( 698191141) }, { -INT32_C( 536372250), -INT32_C( 1529373494), INT32_C( 291671389), INT32_C( 441917784), INT32_C( 84993837), -INT32_C( 557729185), INT32_C( 737188869), INT32_C( 257199401), -INT32_C( 219194328), INT32_C( 1435944696), -INT32_C( 1402537901), INT32_C( 1187429913), INT32_C( 88922021), -INT32_C( 1763504751), -INT32_C( 759049303), -INT32_C( 1998449056) }, { INT32_C( 1387120351), INT32_C( 1527332516), -INT32_C( 795854502), INT32_C( 460133436), INT32_C( 1314840000), -INT32_C( 1834318445), INT32_C( 1267901416), -INT32_C( 1694013509), -INT32_C( 772950996), INT32_C( 1294661362), INT32_C( 1797110317), -INT32_C( 1920562740), -INT32_C( 69409689), INT32_C( 1384941161), -INT32_C( 459464920), -INT32_C( 1300257915) } }, { { INT32_C( 1786433906), -INT32_C( 339799912), INT32_C( 563553800), -INT32_C( 1989648668), -INT32_C( 963726283), INT32_C( 1784443585), -INT32_C( 1506009531), -INT32_C( 1506927052), -INT32_C( 2012173840), -INT32_C( 1032597575), -INT32_C( 639431691), -INT32_C( 1637659799), -INT32_C( 1067126273), -INT32_C( 1456816029), INT32_C( 307193822), INT32_C( 1975025029) }, { -INT32_C( 520239066), -INT32_C( 1918733928), -INT32_C( 446200452), -INT32_C( 796669231), INT32_C( 529655739), -INT32_C( 2033665113), -INT32_C( 1466427614), -INT32_C( 1155706476), -INT32_C( 1315235047), INT32_C( 138362252), -INT32_C( 1813141822), INT32_C( 728002672), -INT32_C( 28641961), -INT32_C( 746319184), -INT32_C( 1099227863), -INT32_C( 2022074258) }, { INT32_C( 1266194840), INT32_C( 2036433456), INT32_C( 117353348), INT32_C( 1508649397), -INT32_C( 434070544), -INT32_C( 249221528), INT32_C( 1322530151), INT32_C( 1632333768), INT32_C( 967558409), -INT32_C( 894235323), INT32_C( 1842393783), -INT32_C( 909657127), -INT32_C( 1095768234), INT32_C( 2091832083), -INT32_C( 792034041), -INT32_C( 47049229) } }, { { INT32_C( 1060705459), INT32_C( 323450961), -INT32_C( 1901644770), -INT32_C( 71758940), -INT32_C( 1325792256), INT32_C( 1082359318), INT32_C( 167706267), INT32_C( 1251047319), -INT32_C( 594883957), INT32_C( 1626329410), -INT32_C( 1427204602), -INT32_C( 1582913631), -INT32_C( 1034772309), -INT32_C( 1174219490), INT32_C( 1807941844), INT32_C( 45438071) }, { INT32_C( 1625177886), INT32_C( 398511377), INT32_C( 96579172), INT32_C( 27748182), INT32_C( 650377479), -INT32_C( 1562327602), INT32_C( 1007526853), INT32_C( 373212152), INT32_C( 326573058), INT32_C( 1311389674), INT32_C( 1012133094), INT32_C( 1530788435), -INT32_C( 1031732749), -INT32_C( 1939578426), -INT32_C( 53972476), INT32_C( 923993909) }, { -INT32_C( 1609083951), INT32_C( 721962338), -INT32_C( 1805065598), -INT32_C( 44010758), -INT32_C( 675414777), -INT32_C( 479968284), INT32_C( 1175233120), INT32_C( 1624259471), -INT32_C( 268310899), -INT32_C( 1357248212), -INT32_C( 415071508), -INT32_C( 52125196), -INT32_C( 2066505058), INT32_C( 1181169380), INT32_C( 1753969368), INT32_C( 969431980) } }, { { INT32_C( 223054371), -INT32_C( 1487178303), -INT32_C( 1243369631), -INT32_C( 1659887191), -INT32_C( 396390110), -INT32_C( 160119822), INT32_C( 1794325813), INT32_C( 1738671684), INT32_C( 1366683024), -INT32_C( 990261150), INT32_C( 695852159), INT32_C( 533105149), INT32_C( 201860378), INT32_C( 503479528), -INT32_C( 41355847), -INT32_C( 1956304133) }, { INT32_C( 2061359639), INT32_C( 708761258), -INT32_C( 1336690766), INT32_C( 1523521856), INT32_C( 644273982), INT32_C( 222586964), INT32_C( 1493945694), INT32_C( 266694903), -INT32_C( 192298422), INT32_C( 1243531160), -INT32_C( 1090883202), -INT32_C( 937899382), -INT32_C( 168853855), INT32_C( 1141060582), -INT32_C( 123859456), -INT32_C( 939031682) }, { -INT32_C( 2010553286), -INT32_C( 778417045), INT32_C( 1714906899), -INT32_C( 136365335), INT32_C( 247883872), INT32_C( 62467142), -INT32_C( 1006695789), INT32_C( 2005366587), INT32_C( 1174384602), INT32_C( 253270010), -INT32_C( 395031043), -INT32_C( 404794233), INT32_C( 33006523), INT32_C( 1644540110), -INT32_C( 165215303), INT32_C( 1399631481) } }, { { -INT32_C( 574844859), -INT32_C( 718808233), -INT32_C( 678223284), -INT32_C( 1918915604), INT32_C( 260279849), -INT32_C( 1034647870), INT32_C( 314241684), -INT32_C( 1160068747), -INT32_C( 1466460591), -INT32_C( 1099055503), -INT32_C( 862646048), -INT32_C( 463850309), -INT32_C( 2047550013), -INT32_C( 146323357), -INT32_C( 1358364102), INT32_C( 359261123) }, { -INT32_C( 339935111), -INT32_C( 1616299074), INT32_C( 124468811), INT32_C( 904643954), INT32_C( 96133026), INT32_C( 1643905575), -INT32_C( 955251452), INT32_C( 1658616296), INT32_C( 944609913), INT32_C( 551024341), -INT32_C( 1507376588), -INT32_C( 1428417784), INT32_C( 447780594), -INT32_C( 1669616488), -INT32_C( 1704686414), INT32_C( 2147237893) }, { -INT32_C( 914779970), INT32_C( 1959859989), -INT32_C( 553754473), -INT32_C( 1014271650), INT32_C( 356412875), INT32_C( 609257705), -INT32_C( 641009768), INT32_C( 498547549), -INT32_C( 521850678), -INT32_C( 548031162), INT32_C( 1924944660), -INT32_C( 1892268093), -INT32_C( 1599769419), -INT32_C( 1815939845), INT32_C( 1231916780), -INT32_C( 1788468280) } }, { { -INT32_C( 1346942502), INT32_C( 1943047743), -INT32_C( 669321264), -INT32_C( 41683446), INT32_C( 622277516), -INT32_C( 1849584929), INT32_C( 606872862), INT32_C( 1084434534), -INT32_C( 1309648270), -INT32_C( 1205485336), -INT32_C( 1030668361), -INT32_C( 1044442059), INT32_C( 652662343), -INT32_C( 2017941400), INT32_C( 866903245), INT32_C( 2121551372) }, { -INT32_C( 1875876696), -INT32_C( 616016604), -INT32_C( 912402028), INT32_C( 881482989), -INT32_C( 1688506062), -INT32_C( 433974503), INT32_C( 52088311), -INT32_C( 1014854117), INT32_C( 374584050), -INT32_C( 1678664953), INT32_C( 1650757493), INT32_C( 513273579), INT32_C( 2025452127), -INT32_C( 60826875), -INT32_C( 1006667352), -INT32_C( 108625657) }, { INT32_C( 1072148098), INT32_C( 1327031139), -INT32_C( 1581723292), INT32_C( 839799543), -INT32_C( 1066228546), INT32_C( 2011407864), INT32_C( 658961173), INT32_C( 69580417), -INT32_C( 935064220), INT32_C( 1410817007), INT32_C( 620089132), -INT32_C( 531168480), -INT32_C( 1616852826), -INT32_C( 2078768275), -INT32_C( 139764107), INT32_C( 2012925715) } }, { { INT32_C( 974117171), -INT32_C( 371916684), INT32_C( 2068593039), -INT32_C( 2019957976), -INT32_C( 637513003), -INT32_C( 707371219), -INT32_C( 543631912), -INT32_C( 1965547945), INT32_C( 1808132087), INT32_C( 2002098919), -INT32_C( 51207724), INT32_C( 1501793156), INT32_C( 171148253), -INT32_C( 1159788062), INT32_C( 899250142), -INT32_C( 1933545067) }, { INT32_C( 1089963352), -INT32_C( 206091233), INT32_C( 1911532013), INT32_C( 298480436), -INT32_C( 652476938), -INT32_C( 443287034), INT32_C( 102378865), -INT32_C( 141370722), INT32_C( 2134346079), -INT32_C( 1015877930), -INT32_C( 885693801), -INT32_C( 874709035), INT32_C( 61143037), INT32_C( 1659386097), INT32_C( 57148261), INT32_C( 1039858397) }, { INT32_C( 2064080523), -INT32_C( 578007917), -INT32_C( 314842244), -INT32_C( 1721477540), -INT32_C( 1289989941), -INT32_C( 1150658253), -INT32_C( 441253047), -INT32_C( 2106918667), -INT32_C( 352489130), INT32_C( 986220989), -INT32_C( 936901525), INT32_C( 627084121), INT32_C( 232291290), INT32_C( 499598035), INT32_C( 956398403), -INT32_C( 893686670) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_add_epi32(a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_add_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t src[16]; const simde__mmask16 k; const int32_t a[16]; const int32_t b[16]; const int32_t r[16]; } test_vec[] = { { { -INT32_C( 1958532398), -INT32_C( 733777631), -INT32_C( 61793464), INT32_C( 1915617450), INT32_C( 759754662), INT32_C( 196190852), -INT32_C( 77082310), -INT32_C( 259049954), -INT32_C( 2105748895), -INT32_C( 1940471997), -INT32_C( 2071418662), INT32_C( 1324791464), INT32_C( 695943077), INT32_C( 456404449), INT32_C( 471309310), INT32_C( 856467154) }, UINT16_C(34936), { INT32_C( 1554501556), INT32_C( 715824517), -INT32_C( 1118624036), INT32_C( 1373210451), -INT32_C( 177344477), INT32_C( 237533590), INT32_C( 1254743298), -INT32_C( 1653861911), INT32_C( 1727599584), -INT32_C( 1919900495), INT32_C( 491513034), -INT32_C( 1905384341), -INT32_C( 1434199276), INT32_C( 1454943060), INT32_C( 1923121545), INT32_C( 1930431890) }, { -INT32_C( 774305504), INT32_C( 2002741677), -INT32_C( 1181439411), -INT32_C( 498662706), INT32_C( 848088029), -INT32_C( 7846794), INT32_C( 1483876805), INT32_C( 1456177718), INT32_C( 958964875), INT32_C( 1538295565), -INT32_C( 15448783), INT32_C( 635525959), -INT32_C( 1655214810), INT32_C( 1989992369), INT32_C( 1053691400), INT32_C( 479566224) }, { -INT32_C( 1958532398), -INT32_C( 733777631), -INT32_C( 61793464), INT32_C( 874547745), INT32_C( 670743552), INT32_C( 229686796), -INT32_C( 1556347193), -INT32_C( 259049954), -INT32_C( 2105748895), -INT32_C( 1940471997), -INT32_C( 2071418662), -INT32_C( 1269858382), INT32_C( 695943077), INT32_C( 456404449), INT32_C( 471309310), -INT32_C( 1884969182) } }, { { INT32_C( 1263910205), INT32_C( 1973814596), -INT32_C( 1837843894), INT32_C( 1018647829), INT32_C( 1943604930), -INT32_C( 152472083), INT32_C( 338999428), -INT32_C( 1892628143), -INT32_C( 891648634), -INT32_C( 717258613), INT32_C( 1332196154), -INT32_C( 896852472), INT32_C( 440230956), INT32_C( 1578117082), INT32_C( 812795103), -INT32_C( 1799380210) }, UINT16_C(39208), { INT32_C( 1520693330), INT32_C( 992292367), INT32_C( 1834312339), INT32_C( 1758160265), -INT32_C( 1197916758), INT32_C( 155998432), INT32_C( 196914162), -INT32_C( 1671477942), INT32_C( 1173750326), -INT32_C( 1015014608), INT32_C( 120641150), -INT32_C( 445580485), INT32_C( 429721913), INT32_C( 1394797153), -INT32_C( 547364971), INT32_C( 1518059044) }, { -INT32_C( 257985856), INT32_C( 196354189), INT32_C( 823387382), INT32_C( 420971488), -INT32_C( 315444084), INT32_C( 876696990), INT32_C( 1477681204), INT32_C( 515084126), -INT32_C( 1911664127), INT32_C( 1754972786), -INT32_C( 2019906137), -INT32_C( 1130319568), INT32_C( 78238309), INT32_C( 1530456615), -INT32_C( 390837366), -INT32_C( 620337190) }, { INT32_C( 1263910205), INT32_C( 1973814596), -INT32_C( 1837843894), -INT32_C( 2115835543), INT32_C( 1943604930), INT32_C( 1032695422), INT32_C( 338999428), -INT32_C( 1892628143), -INT32_C( 737913801), -INT32_C( 717258613), INT32_C( 1332196154), -INT32_C( 1575900053), INT32_C( 507960222), INT32_C( 1578117082), INT32_C( 812795103), INT32_C( 897721854) } }, { { INT32_C( 745149881), INT32_C( 2123629783), -INT32_C( 519754063), INT32_C( 1167959519), -INT32_C( 1622587784), -INT32_C( 1141145295), -INT32_C( 1482379316), -INT32_C( 813520362), -INT32_C( 1745097537), -INT32_C( 1592422160), -INT32_C( 1635640386), INT32_C( 954408896), -INT32_C( 1747440538), INT32_C( 2035471277), -INT32_C( 1742670206), INT32_C( 1617404833) }, UINT16_C(25487), { -INT32_C( 397433816), INT32_C( 690041539), INT32_C( 197196126), INT32_C( 1317344204), -INT32_C( 1427725047), -INT32_C( 670347960), -INT32_C( 1554513232), INT32_C( 928348431), -INT32_C( 517954531), INT32_C( 889864663), INT32_C( 104975162), -INT32_C( 413874466), -INT32_C( 1265485205), INT32_C( 948739463), -INT32_C( 1344543585), INT32_C( 2078683229) }, { -INT32_C( 1403255083), -INT32_C( 2115934649), INT32_C( 260514353), INT32_C( 1425529832), -INT32_C( 1660385003), -INT32_C( 975858650), -INT32_C( 713772936), -INT32_C( 1236247583), -INT32_C( 1453151135), INT32_C( 1143620371), INT32_C( 1314173542), -INT32_C( 1549644915), -INT32_C( 96425260), -INT32_C( 1228991170), -INT32_C( 1500760891), -INT32_C( 262349681) }, { -INT32_C( 1800688899), -INT32_C( 1425893110), INT32_C( 457710479), -INT32_C( 1552093260), -INT32_C( 1622587784), -INT32_C( 1141145295), -INT32_C( 1482379316), -INT32_C( 307899152), -INT32_C( 1971105666), INT32_C( 2033485034), -INT32_C( 1635640386), INT32_C( 954408896), -INT32_C( 1747440538), -INT32_C( 280251707), INT32_C( 1449662820), INT32_C( 1617404833) } }, { { -INT32_C( 1667645815), INT32_C( 1759560706), INT32_C( 62272630), INT32_C( 1403410815), INT32_C( 1112401411), -INT32_C( 1040708101), -INT32_C( 798522303), -INT32_C( 356465567), -INT32_C( 2071569790), -INT32_C( 1796446690), INT32_C( 446145435), -INT32_C( 9552132), INT32_C( 541178660), INT32_C( 165755592), INT32_C( 534333630), -INT32_C( 1895196148) }, UINT16_C(36852), { -INT32_C( 1616167517), -INT32_C( 1600251525), -INT32_C( 1648303915), -INT32_C( 660102886), -INT32_C( 151486231), INT32_C( 243597594), -INT32_C( 2027906927), INT32_C( 991479448), INT32_C( 2145043204), -INT32_C( 1306560035), INT32_C( 1934614361), INT32_C( 1783363200), -INT32_C( 1855962249), -INT32_C( 694098619), INT32_C( 375242877), -INT32_C( 1957595769) }, { INT32_C( 352988216), -INT32_C( 20501851), -INT32_C( 1972300023), -INT32_C( 2064335859), INT32_C( 1159091200), -INT32_C( 1239697863), -INT32_C( 36931466), INT32_C( 629677805), -INT32_C( 281308342), -INT32_C( 957545795), INT32_C( 659578393), INT32_C( 447431706), -INT32_C( 782253672), -INT32_C( 293045641), -INT32_C( 538225422), -INT32_C( 1140493198) }, { -INT32_C( 1667645815), INT32_C( 1759560706), INT32_C( 674363358), INT32_C( 1403410815), INT32_C( 1007604969), -INT32_C( 996100269), -INT32_C( 2064838393), INT32_C( 1621157253), INT32_C( 1863734862), INT32_C( 2030861466), -INT32_C( 1700774542), -INT32_C( 2064172390), INT32_C( 541178660), INT32_C( 165755592), INT32_C( 534333630), INT32_C( 1196878329) } }, { { -INT32_C( 995409913), INT32_C( 1552586818), INT32_C( 293854198), -INT32_C( 1205129697), INT32_C( 1737067504), -INT32_C( 128642811), -INT32_C( 656981658), -INT32_C( 1131029323), INT32_C( 1602240540), -INT32_C( 809825575), INT32_C( 98582245), INT32_C( 1555893356), -INT32_C( 1664858473), -INT32_C( 1097590440), INT32_C( 261516378), INT32_C( 1707813704) }, UINT16_C(19308), { INT32_C( 692123069), -INT32_C( 1735983871), -INT32_C( 1674294716), -INT32_C( 1101346461), INT32_C( 2110648373), -INT32_C( 1998415588), INT32_C( 986556132), -INT32_C( 495525595), -INT32_C( 687032618), -INT32_C( 126905676), INT32_C( 1066706140), -INT32_C( 1560416659), -INT32_C( 98579490), INT32_C( 1216479844), -INT32_C( 830255192), INT32_C( 129038641) }, { INT32_C( 1675607215), INT32_C( 710626894), INT32_C( 1600843762), -INT32_C( 1140758563), -INT32_C( 1766448846), -INT32_C( 874563293), -INT32_C( 1181130104), INT32_C( 180439643), INT32_C( 1433313286), -INT32_C( 511718930), -INT32_C( 1774130759), -INT32_C( 2091761071), -INT32_C( 2045114013), -INT32_C( 900597438), -INT32_C( 1232802981), INT32_C( 1002456373) }, { -INT32_C( 995409913), INT32_C( 1552586818), -INT32_C( 73450954), INT32_C( 2052862272), INT32_C( 1737067504), INT32_C( 1421988415), -INT32_C( 194573972), -INT32_C( 1131029323), INT32_C( 746280668), -INT32_C( 638624606), INT32_C( 98582245), INT32_C( 642789566), -INT32_C( 1664858473), -INT32_C( 1097590440), -INT32_C( 2063058173), INT32_C( 1707813704) } }, { { -INT32_C( 745525531), -INT32_C( 1313599240), INT32_C( 1246230009), -INT32_C( 1697736137), -INT32_C( 450828125), INT32_C( 1018130913), -INT32_C( 1846398116), INT32_C( 1573761656), -INT32_C( 651076127), INT32_C( 1737155949), INT32_C( 296866266), INT32_C( 246120299), INT32_C( 1223936871), -INT32_C( 1719360707), INT32_C( 1328248534), INT32_C( 179107881) }, UINT16_C(56661), { -INT32_C( 1431315650), -INT32_C( 1028105637), INT32_C( 1661709350), INT32_C( 637308751), INT32_C( 796141318), INT32_C( 1966678303), -INT32_C( 1053287170), -INT32_C( 950050167), -INT32_C( 1737421251), -INT32_C( 1906627992), INT32_C( 636577494), -INT32_C( 78975243), INT32_C( 891993877), -INT32_C( 559258656), INT32_C( 144761471), -INT32_C( 2117009596) }, { INT32_C( 1964654861), INT32_C( 1090811243), -INT32_C( 798558757), -INT32_C( 104025629), INT32_C( 1345255024), -INT32_C( 651241382), -INT32_C( 18690374), -INT32_C( 629165363), INT32_C( 1599117811), -INT32_C( 375368690), INT32_C( 767166281), INT32_C( 673613496), -INT32_C( 696757124), -INT32_C( 424630740), INT32_C( 1122275957), INT32_C( 924672836) }, { INT32_C( 533339211), -INT32_C( 1313599240), INT32_C( 863150593), -INT32_C( 1697736137), INT32_C( 2141396342), INT32_C( 1018130913), -INT32_C( 1071977544), INT32_C( 1573761656), -INT32_C( 138303440), INT32_C( 1737155949), INT32_C( 1403743775), INT32_C( 594638253), INT32_C( 195236753), -INT32_C( 1719360707), INT32_C( 1267037428), -INT32_C( 1192336760) } }, { { INT32_C( 194407933), INT32_C( 183842753), -INT32_C( 164122818), -INT32_C( 1323410123), -INT32_C( 578251087), -INT32_C( 1312606148), INT32_C( 250914762), INT32_C( 138744075), INT32_C( 1058266238), INT32_C( 1363740691), -INT32_C( 330858057), -INT32_C( 1868667426), INT32_C( 929900283), INT32_C( 686371166), -INT32_C( 482943528), INT32_C( 1827372014) }, UINT16_C(65367), { INT32_C( 1420493429), INT32_C( 1659128167), -INT32_C( 845524625), INT32_C( 1542816642), INT32_C( 1312697184), -INT32_C( 21353817), INT32_C( 812213545), -INT32_C( 806411175), -INT32_C( 1910269145), INT32_C( 1425082340), -INT32_C( 618558632), INT32_C( 1849038606), -INT32_C( 373525438), -INT32_C( 941066594), INT32_C( 888689115), -INT32_C( 1677465739) }, { INT32_C( 1730881154), -INT32_C( 2034557907), -INT32_C( 1251877721), INT32_C( 908302323), -INT32_C( 1440751861), INT32_C( 812713813), -INT32_C( 832280232), -INT32_C( 748001199), -INT32_C( 1137011314), INT32_C( 1480783281), -INT32_C( 988961838), INT32_C( 1174089786), INT32_C( 1693391631), INT32_C( 2073321762), INT32_C( 457832906), -INT32_C( 269503647) }, { -INT32_C( 1143592713), -INT32_C( 375429740), -INT32_C( 2097402346), -INT32_C( 1323410123), -INT32_C( 128054677), -INT32_C( 1312606148), -INT32_C( 20066687), INT32_C( 138744075), INT32_C( 1247686837), -INT32_C( 1389101675), -INT32_C( 1607520470), -INT32_C( 1271838904), INT32_C( 1319866193), INT32_C( 1132255168), INT32_C( 1346522021), -INT32_C( 1946969386) } }, { { -INT32_C( 89446071), -INT32_C( 246158049), -INT32_C( 894017392), -INT32_C( 1609518447), -INT32_C( 284819507), INT32_C( 728406368), -INT32_C( 213470318), -INT32_C( 1327286937), INT32_C( 2125106783), INT32_C( 208665980), -INT32_C( 271112866), -INT32_C( 1534072873), INT32_C( 1200919782), -INT32_C( 1066205650), INT32_C( 431274162), INT32_C( 1305057262) }, UINT16_C(29477), { INT32_C( 935232863), INT32_C( 1390103916), INT32_C( 278491106), INT32_C( 550505326), -INT32_C( 1304853308), INT32_C( 1107231259), -INT32_C( 421344651), INT32_C( 1672843268), -INT32_C( 2120584427), -INT32_C( 1546357055), INT32_C( 1404268005), INT32_C( 1030980473), INT32_C( 602909704), INT32_C( 610594478), -INT32_C( 1140176968), -INT32_C( 316686121) }, { -INT32_C( 194069965), INT32_C( 362234416), INT32_C( 694766256), -INT32_C( 697901874), INT32_C( 939087241), -INT32_C( 77898173), INT32_C( 2092394149), INT32_C( 1500108326), -INT32_C( 1068574576), -INT32_C( 891886310), -INT32_C( 17613008), -INT32_C( 1529587429), -INT32_C( 237187666), -INT32_C( 789825749), -INT32_C( 1018322019), INT32_C( 169719418) }, { INT32_C( 741162898), -INT32_C( 246158049), INT32_C( 973257362), -INT32_C( 1609518447), -INT32_C( 284819507), INT32_C( 1029333086), -INT32_C( 213470318), -INT32_C( 1327286937), INT32_C( 1105808293), INT32_C( 1856723931), -INT32_C( 271112866), -INT32_C( 1534072873), INT32_C( 365722038), -INT32_C( 179231271), INT32_C( 2136468309), INT32_C( 1305057262) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi32(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_mask_add_epi32(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_add_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask16 k; const int32_t a[16]; const int32_t b[16]; const int32_t r[16]; } test_vec[] = { { UINT16_C(52979), { INT32_C( 1318030952), -INT32_C( 938502652), -INT32_C( 1205717630), INT32_C( 1648076236), INT32_C( 1874746093), INT32_C( 507402795), -INT32_C( 271937240), -INT32_C( 581761675), -INT32_C( 1758731373), -INT32_C( 77579399), INT32_C( 1018397296), INT32_C( 345959975), INT32_C( 1954766153), -INT32_C( 527253065), -INT32_C( 925934509), -INT32_C( 190504095) }, { INT32_C( 2139869190), INT32_C( 2071653131), -INT32_C( 1799934611), INT32_C( 1688819227), INT32_C( 1792552115), INT32_C( 1095396078), INT32_C( 654908102), INT32_C( 1125887549), -INT32_C( 1966954626), INT32_C( 343186), -INT32_C( 2070626967), -INT32_C( 957793005), INT32_C( 1479590250), -INT32_C( 224822484), -INT32_C( 770006379), -INT32_C( 837470896) }, { -INT32_C( 837067154), INT32_C( 1133150479), INT32_C( 0), INT32_C( 0), -INT32_C( 627669088), INT32_C( 1602798873), INT32_C( 382970862), INT32_C( 544125874), INT32_C( 0), -INT32_C( 77236213), -INT32_C( 1052229671), -INT32_C( 611833030), INT32_C( 0), INT32_C( 0), -INT32_C( 1695940888), -INT32_C( 1027974991) } }, { UINT16_C(55260), { INT32_C( 771752731), -INT32_C( 1410012863), -INT32_C( 687659861), INT32_C( 885628063), -INT32_C( 1912151234), -INT32_C( 178513127), INT32_C( 90486258), INT32_C( 780260115), INT32_C( 408715991), INT32_C( 381898859), INT32_C( 351127156), -INT32_C( 1605847198), -INT32_C( 1288810598), INT32_C( 1571392106), INT32_C( 1382157631), -INT32_C( 1199512351) }, { -INT32_C( 774841242), -INT32_C( 1578593492), -INT32_C( 1145711271), INT32_C( 660340108), -INT32_C( 1210414772), INT32_C( 1393853203), INT32_C( 1923446417), -INT32_C( 1070979494), INT32_C( 798161410), -INT32_C( 422544755), -INT32_C( 593394353), -INT32_C( 821822334), -INT32_C( 1735991931), -INT32_C( 219440543), INT32_C( 1801752848), -INT32_C( 1188327753) }, { INT32_C( 0), INT32_C( 0), -INT32_C( 1833371132), INT32_C( 1545968171), INT32_C( 1172401290), INT32_C( 0), INT32_C( 2013932675), -INT32_C( 290719379), INT32_C( 1206877401), -INT32_C( 40645896), -INT32_C( 242267197), INT32_C( 0), INT32_C( 1270164767), INT32_C( 0), -INT32_C( 1111056817), INT32_C( 1907127192) } }, { UINT16_C(48520), { -INT32_C( 1067213763), INT32_C( 495937176), -INT32_C( 1531636413), -INT32_C( 1080647249), -INT32_C( 383059406), INT32_C( 279074440), INT32_C( 1260751635), -INT32_C( 2116935613), INT32_C( 1413559740), -INT32_C( 562966373), -INT32_C( 1803343899), -INT32_C( 95217208), -INT32_C( 1662812652), -INT32_C( 408058412), INT32_C( 1412616720), -INT32_C( 1344994061) }, { -INT32_C( 737929671), -INT32_C( 877431322), INT32_C( 1683961500), INT32_C( 1667150415), INT32_C( 67125552), -INT32_C( 672354873), INT32_C( 1915428479), INT32_C( 1545732131), -INT32_C( 63887850), INT32_C( 952624283), INT32_C( 1771841050), INT32_C( 164494297), INT32_C( 51301692), -INT32_C( 103024006), INT32_C( 996935192), INT32_C( 496537095) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 586503166), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 571203482), INT32_C( 1349671890), INT32_C( 0), -INT32_C( 31502849), INT32_C( 69277089), -INT32_C( 1611510960), -INT32_C( 511082418), INT32_C( 0), -INT32_C( 848456966) } }, { UINT16_C(51636), { -INT32_C( 516938744), INT32_C( 1542126879), INT32_C( 1147140298), INT32_C( 188627698), INT32_C( 1195813440), -INT32_C( 328868296), INT32_C( 1413185447), INT32_C( 1746649952), INT32_C( 105467111), INT32_C( 341914697), INT32_C( 525910060), INT32_C( 992646906), INT32_C( 2021814336), -INT32_C( 161159345), -INT32_C( 951345050), INT32_C( 1244620387) }, { -INT32_C( 61834830), -INT32_C( 653217363), INT32_C( 1828218994), INT32_C( 1067918079), -INT32_C( 491246957), INT32_C( 2027428881), INT32_C( 524231612), INT32_C( 1013542538), -INT32_C( 1808221721), -INT32_C( 579975061), -INT32_C( 1337366863), INT32_C( 485486985), INT32_C( 754886427), -INT32_C( 2136680764), -INT32_C( 2069830662), INT32_C( 968886610) }, { INT32_C( 0), INT32_C( 0), -INT32_C( 1319608004), INT32_C( 0), INT32_C( 704566483), INT32_C( 1698560585), INT32_C( 0), -INT32_C( 1534774806), -INT32_C( 1702754610), INT32_C( 0), INT32_C( 0), INT32_C( 1478133891), INT32_C( 0), INT32_C( 0), INT32_C( 1273791584), -INT32_C( 2081460299) } }, { UINT16_C(63939), { INT32_C( 732058786), INT32_C( 1615303237), -INT32_C( 41073351), INT32_C( 377368860), INT32_C( 1738153493), -INT32_C( 358589913), -INT32_C( 1793561005), INT32_C( 1300702122), -INT32_C( 1116198280), -INT32_C( 182533956), INT32_C( 569617157), -INT32_C( 248024612), -INT32_C( 1235693169), INT32_C( 2141321516), INT32_C( 303348071), INT32_C( 1432329437) }, { -INT32_C( 2112694330), -INT32_C( 1653133161), -INT32_C( 1195480357), INT32_C( 1789523675), -INT32_C( 215940409), INT32_C( 1651753723), INT32_C( 1484031867), -INT32_C( 374484189), INT32_C( 1114357931), -INT32_C( 857742352), INT32_C( 696557133), INT32_C( 1536372116), INT32_C( 709866543), -INT32_C( 225590666), INT32_C( 1833566537), -INT32_C( 2141783851) }, { -INT32_C( 1380635544), -INT32_C( 37829924), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 309529138), INT32_C( 926217933), -INT32_C( 1840349), INT32_C( 0), INT32_C( 0), INT32_C( 1288347504), -INT32_C( 525826626), INT32_C( 1915730850), INT32_C( 2136914608), -INT32_C( 709454414) } }, { UINT16_C(49848), { -INT32_C( 736364480), INT32_C( 1429188390), -INT32_C( 629113245), -INT32_C( 1966338752), -INT32_C( 470346226), -INT32_C( 966570738), -INT32_C( 1267784177), INT32_C( 145220552), INT32_C( 48022236), -INT32_C( 715715727), -INT32_C( 894445686), -INT32_C( 212567068), -INT32_C( 1596568687), -INT32_C( 1469695335), INT32_C( 677238112), -INT32_C( 1792015175) }, { INT32_C( 949423302), -INT32_C( 1592922601), -INT32_C( 1435714362), -INT32_C( 929185737), -INT32_C( 1519881204), INT32_C( 239980462), INT32_C( 1563863716), INT32_C( 1978820270), -INT32_C( 1985115790), INT32_C( 1043053176), -INT32_C( 1377265802), INT32_C( 1668646487), -INT32_C( 1475813638), INT32_C( 1370904237), -INT32_C( 1347425280), -INT32_C( 1004232366) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1399442807), -INT32_C( 1990227430), -INT32_C( 726590276), INT32_C( 0), INT32_C( 2124040822), INT32_C( 0), INT32_C( 327337449), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 670187168), INT32_C( 1498719755) } }, { UINT16_C(53547), { INT32_C( 1739507983), INT32_C( 1237984079), -INT32_C( 1359883519), INT32_C( 687908649), -INT32_C( 438784109), INT32_C( 2074737744), INT32_C( 1478424525), INT32_C( 2136604527), INT32_C( 417728457), INT32_C( 744665131), -INT32_C( 1394912381), -INT32_C( 1898521605), -INT32_C( 629887350), INT32_C( 2018909611), -INT32_C( 2066648044), INT32_C( 1023617652) }, { INT32_C( 1565911346), INT32_C( 495564697), INT32_C( 113861643), -INT32_C( 913006785), -INT32_C( 106690482), INT32_C( 980548134), -INT32_C( 490847634), -INT32_C( 1625308819), INT32_C( 1157395882), INT32_C( 929137964), INT32_C( 691874538), INT32_C( 418632394), INT32_C( 1152986), -INT32_C( 12877167), INT32_C( 853735877), INT32_C( 1708196283) }, { -INT32_C( 989547967), INT32_C( 1733548776), INT32_C( 0), -INT32_C( 225098136), INT32_C( 0), -INT32_C( 1239681418), INT32_C( 0), INT32_C( 0), INT32_C( 1575124339), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 628734364), INT32_C( 0), -INT32_C( 1212912167), -INT32_C( 1563153361) } }, { UINT16_C(52598), { -INT32_C( 10086347), -INT32_C( 1005102614), -INT32_C( 2117785360), INT32_C( 1870659754), -INT32_C( 1264491783), -INT32_C( 635800988), -INT32_C( 1837251777), INT32_C( 63854798), INT32_C( 1510093936), INT32_C( 2099124621), -INT32_C( 335617215), -INT32_C( 581206045), INT32_C( 1167195361), -INT32_C( 1373590673), INT32_C( 1027644783), INT32_C( 1698697205) }, { -INT32_C( 775994813), -INT32_C( 1672552869), -INT32_C( 1517859391), -INT32_C( 1383931188), INT32_C( 1324553183), INT32_C( 788272063), -INT32_C( 1502921296), -INT32_C( 1895060660), INT32_C( 1214303213), INT32_C( 1793372073), -INT32_C( 938513412), INT32_C( 762679630), INT32_C( 1685809317), INT32_C( 747796347), INT32_C( 13827508), -INT32_C( 1785668184) }, { INT32_C( 0), INT32_C( 1617311813), INT32_C( 659322545), INT32_C( 0), INT32_C( 60061400), INT32_C( 152471075), INT32_C( 954794223), INT32_C( 0), -INT32_C( 1570570147), INT32_C( 0), -INT32_C( 1274130627), INT32_C( 181473585), INT32_C( 0), INT32_C( 0), INT32_C( 1041472291), -INT32_C( 86970979) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_maskz_add_epi32(test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_add_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t a[8]; const int64_t b[8]; const int64_t r[8]; } test_vec[] = { { { INT64_C( 5896694048212443049), INT64_C( 9000266092577364175), INT64_C( 7152103947590144860), INT64_C( 2989496802120002433), INT64_C( 7710631622698424498), -INT64_C( 3633641352504339518), INT64_C( 4274662313843579209), INT64_C( 6826149357438294289) }, { -INT64_C( 164363539517042510), -INT64_C( 8691483022440823005), INT64_C( 9220214710722807807), -INT64_C( 2401321110146592095), -INT64_C( 1183069234730910884), INT64_C( 5562197665816815723), INT64_C( 1124204932795639468), -INT64_C( 3326459772972193332) }, { INT64_C( 5732330508695400539), INT64_C( 308783070136541170), -INT64_C( 2074425415396598949), INT64_C( 588175691973410338), INT64_C( 6527562387967513614), INT64_C( 1928556313312476205), INT64_C( 5398867246639218677), INT64_C( 3499689584466100957) } }, { { INT64_C( 5873253456280027845), -INT64_C( 7547985786885765724), INT64_C( 958785414761629392), INT64_C( 8879370812030102515), INT64_C( 4762149369024389598), INT64_C( 798391932315570322), INT64_C( 147097132267652539), -INT64_C( 3786220049007964093) }, { -INT64_C( 6906651495143342010), -INT64_C( 8287694440800363594), INT64_C( 2452371479376447222), INT64_C( 6726846324779217826), INT64_C( 5025569198072523023), INT64_C( 225235015271111619), -INT64_C( 2487938372584494983), INT64_C( 3745242421369017476) }, { -INT64_C( 1033398038863314165), INT64_C( 2611063846023422298), INT64_C( 3411156894138076614), -INT64_C( 2840526936900231275), -INT64_C( 8659025506612638995), INT64_C( 1023626947586681941), -INT64_C( 2340841240316842444), -INT64_C( 40977627638946617) } }, { { INT64_C( 7387045378804169392), INT64_C( 7348215347083393770), -INT64_C( 7432287296260602942), -INT64_C( 4746462990122596405), INT64_C( 7669772552352133735), INT64_C( 5083821277037292091), INT64_C( 5407731889132030559), -INT64_C( 3084302269135830938) }, { -INT64_C( 1461330460425637939), -INT64_C( 3061426891990558023), INT64_C( 5599758734307477482), INT64_C( 1862788523933954198), -INT64_C( 4557890179386853341), INT64_C( 6666876053459161657), -INT64_C( 4937214972124475832), INT64_C( 6111833508638834029) }, { INT64_C( 5925714918378531453), INT64_C( 4286788455092835747), -INT64_C( 1832528561953125460), -INT64_C( 2883674466188642207), INT64_C( 3111882372965280394), -INT64_C( 6696046743213097868), INT64_C( 470516917007554727), INT64_C( 3027531239503003091) } }, { { -INT64_C( 7991663547628636080), -INT64_C( 2555292973839346502), INT64_C( 4212139769629200532), INT64_C( 1966319092590916547), INT64_C( 1506042142180667901), -INT64_C( 9075093079022557283), INT64_C( 7143746535270586651), -INT64_C( 2897889499141433630) }, { INT64_C( 6953298079720946194), -INT64_C( 6437157297342791622), INT64_C( 8555627167819425208), -INT64_C( 4217080419303877945), -INT64_C( 916288211658955227), INT64_C( 3576356706803505520), -INT64_C( 6218269451284303702), -INT64_C( 1742958193093650601) }, { -INT64_C( 1038365467907689886), -INT64_C( 8992450271182138124), -INT64_C( 5678977136260925876), -INT64_C( 2250761326712961398), INT64_C( 589753930521712674), -INT64_C( 5498736372219051763), INT64_C( 925477083986282949), -INT64_C( 4640847692235084231) } }, { { INT64_C( 2312342974665588586), -INT64_C( 6729576343545367823), -INT64_C( 4578026214523853331), -INT64_C( 1074221180203122067), -INT64_C( 1195656230424156519), INT64_C( 3385005156404397150), -INT64_C( 2575086539621213671), INT64_C( 4660983342689947190) }, { INT64_C( 5690001192450114569), INT64_C( 6765706558176579445), -INT64_C( 8375529455621185160), -INT64_C( 280638300551000014), INT64_C( 791220201005032380), -INT64_C( 5337991249511014582), INT64_C( 2478776332018633862), INT64_C( 7142732816633802545) }, { INT64_C( 8002344167115703155), INT64_C( 36130214631211622), INT64_C( 5493188403564513125), -INT64_C( 1354859480754122081), -INT64_C( 404436029419124139), -INT64_C( 1952986093106617432), -INT64_C( 96310207602579809), -INT64_C( 6643027914385801881) } }, { { INT64_C( 7590546826509362360), INT64_C( 4799960603843565481), -INT64_C( 3764863488869189202), -INT64_C( 8485326154395304909), INT64_C( 125025846558150196), INT64_C( 4919203572335817541), INT64_C( 1811753159855661758), INT64_C( 6393760326532469855) }, { INT64_C( 421764692607537793), INT64_C( 501672283606598428), -INT64_C( 2545232539499374162), INT64_C( 6202803407104615064), INT64_C( 4980991260009414746), INT64_C( 2385761506151573452), -INT64_C( 7628987825040033081), INT64_C( 5850290225876708869) }, { INT64_C( 8012311519116900153), INT64_C( 5301632887450163909), -INT64_C( 6310096028368563364), -INT64_C( 2282522747290689845), INT64_C( 5106017106567564942), INT64_C( 7304965078487390993), -INT64_C( 5817234665184371323), -INT64_C( 6202693521300372892) } }, { { INT64_C( 3861145535682141991), INT64_C( 4704120286579625139), INT64_C( 7310649930581147103), INT64_C( 6132617560052451027), -INT64_C( 4220933801323952434), -INT64_C( 467755223424977465), -INT64_C( 9153765608270723279), INT64_C( 8400169494660134417) }, { INT64_C( 4306311459952605676), INT64_C( 1432426031515283149), INT64_C( 1311843823099622919), -INT64_C( 3392084749394608174), INT64_C( 8992722739203577885), INT64_C( 5779599678188505408), INT64_C( 5119810430763850234), INT64_C( 6804001435340987831) }, { INT64_C( 8167456995634747667), INT64_C( 6136546318094908288), INT64_C( 8622493753680770022), INT64_C( 2740532810657842853), INT64_C( 4771788937879625451), INT64_C( 5311844454763527943), -INT64_C( 4033955177506873045), -INT64_C( 3242573143708429368) } }, { { INT64_C( 896142439321910083), INT64_C( 1197503498379252485), -INT64_C( 7856220743107108291), -INT64_C( 6406762567310591882), -INT64_C( 4058014011976186410), INT64_C( 9080299469053222364), -INT64_C( 7078487466013880490), INT64_C( 7199966683762914017) }, { INT64_C( 7780449457883481456), INT64_C( 1824347912971095698), -INT64_C( 8415522727832944271), INT64_C( 7418198203865008897), INT64_C( 4556395623730444353), INT64_C( 7889010207409543840), -INT64_C( 3090529460147599642), -INT64_C( 1719435354305139514) }, { INT64_C( 8676591897205391539), INT64_C( 3021851411350348183), INT64_C( 2175000602769499054), INT64_C( 1011435636554417015), INT64_C( 498381611754257943), -INT64_C( 1477434397246785412), INT64_C( 8277727147548071484), INT64_C( 5480531329457774503) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_add_epi64(a, b); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_add_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t src[16]; const simde__mmask8 k; const int64_t a[16]; const int64_t b[16]; const int64_t r[16]; } test_vec[] = { { { INT64_C( 1637095571448452370), -INT64_C( 5010656489973054228), INT64_C( 8096577095910365922), INT64_C( 3926524403043278656), INT64_C( 1692383872749537703), -INT64_C( 7546382928320257262), -INT64_C( 6602394267959061769), INT64_C( 2598636899144412341) }, UINT8_C( 63), { INT64_C( 3064278465490095078), -INT64_C( 1676615221711466009), -INT64_C( 5333405411065419087), -INT64_C( 3810230114928306775), -INT64_C( 1075017760364328478), INT64_C( 2095704811519734998), -INT64_C( 299103093840977638), INT64_C( 5108483185182444596) }, { -INT64_C( 4648200900296301693), -INT64_C( 4629136759825157063), -INT64_C( 5066623773317061022), INT64_C( 536973459407932105), INT64_C( 2486008889004565721), -INT64_C( 3554876755438703545), INT64_C( 1436140625484484016), INT64_C( 5713013447801749692) }, { -INT64_C( 1583922434806206615), -INT64_C( 6305751981536623072), INT64_C( 8046714889327071507), -INT64_C( 3273256655520374670), INT64_C( 1410991128640237243), -INT64_C( 1459171943918968547), -INT64_C( 6602394267959061769), INT64_C( 2598636899144412341) } }, { { INT64_C( 3991194155833482583), INT64_C( 7365863369617845245), -INT64_C( 6217348007288128678), INT64_C( 148675600489051978), -INT64_C( 5748943111581392624), -INT64_C( 9084794923389396527), INT64_C( 6675703621262608398), -INT64_C( 3278006165881122860) }, UINT8_C( 79), { INT64_C( 7078729567351001797), INT64_C( 3976127268296180429), INT64_C( 6764870419675162927), -INT64_C( 6394845513855835965), -INT64_C( 4382478565492243517), -INT64_C( 1011318967947184367), -INT64_C( 6311831277423214532), INT64_C( 4236157129718335039) }, { INT64_C( 4311691048566315805), INT64_C( 693258357862808300), -INT64_C( 5061911316372677582), INT64_C( 5814609134873172224), -INT64_C( 2890120277031405697), INT64_C( 273070111211249652), INT64_C( 6337650268323962303), -INT64_C( 956874791454847436) }, { -INT64_C( 7056323457792234014), INT64_C( 4669385626158988729), INT64_C( 1702959103302485345), -INT64_C( 580236378982663741), -INT64_C( 5748943111581392624), -INT64_C( 9084794923389396527), INT64_C( 25818990900747771), -INT64_C( 3278006165881122860) } }, { { INT64_C( 5402490335443754038), INT64_C( 7004459312563912287), INT64_C( 6873494867043635124), -INT64_C( 1746693303777676963), INT64_C( 4412405986682822043), -INT64_C( 3277034903515019135), INT64_C( 971442364987875570), INT64_C( 6704628126445290098) }, UINT8_C(252), { -INT64_C( 3199500174101950700), INT64_C( 7068896874256776325), -INT64_C( 840732006067128670), -INT64_C( 7451465598208935429), INT64_C( 990872770473652578), INT64_C( 1777037797882114565), -INT64_C( 3158904769779877244), INT64_C( 6189642379913322441) }, { -INT64_C( 2014230672746244489), INT64_C( 1316129223197016245), INT64_C( 6358081634684124815), INT64_C( 2925524125942721361), -INT64_C( 7645647755206468574), INT64_C( 8364597264550793588), INT64_C( 2245635740289228099), INT64_C( 9124008468664275140) }, { INT64_C( 5402490335443754038), INT64_C( 7004459312563912287), INT64_C( 5517349628616996145), -INT64_C( 4525941472266214068), -INT64_C( 6654774984732815996), -INT64_C( 8305109011276643463), -INT64_C( 913269029490649145), -INT64_C( 3133093225131954035) } }, { { INT64_C( 7876626396527707865), INT64_C( 6327703798935457910), -INT64_C( 8444156093278868254), INT64_C( 792525990600389412), INT64_C( 6542343655737491300), -INT64_C( 6733297332257473758), INT64_C( 3495113324412254258), -INT64_C( 8894133035806391978) }, UINT8_C( 48), { -INT64_C( 1618640895730195884), INT64_C( 566130083197796387), -INT64_C( 3091365637900741985), -INT64_C( 8802714067975954187), INT64_C( 8931894081495034460), -INT64_C( 8463108217014804938), -INT64_C( 2811541516088205358), -INT64_C( 4054272745087766267) }, { INT64_C( 6129898402509662270), INT64_C( 565315231888848484), -INT64_C( 8016080185148496634), -INT64_C( 3365171251436437734), INT64_C( 5232753838442094123), -INT64_C( 1806946338783921745), INT64_C( 6678716485601335700), INT64_C( 2537267084449117649) }, { INT64_C( 7876626396527707865), INT64_C( 6327703798935457910), -INT64_C( 8444156093278868254), INT64_C( 792525990600389412), -INT64_C( 4282096153772423033), INT64_C( 8176689517910824933), INT64_C( 3495113324412254258), -INT64_C( 8894133035806391978) } }, { { -INT64_C( 4217327386109371060), INT64_C( 1462146507223994500), INT64_C( 9029403535350110895), -INT64_C( 6164557771088777128), INT64_C( 7967243682726010805), -INT64_C( 9152970505335981211), INT64_C( 7521223655988276535), INT64_C( 1078941248321503985) }, UINT8_C( 10), { -INT64_C( 6444823229810484523), -INT64_C( 7166643799492954826), INT64_C( 1160825679683284586), INT64_C( 4107978185158323148), -INT64_C( 8042316938503522478), INT64_C( 4355947116441623144), INT64_C( 124837676903243996), -INT64_C( 1113239454258551314) }, { INT64_C( 5394206117329760241), INT64_C( 790827237554372843), -INT64_C( 3320718750563147595), -INT64_C( 3521057494574767212), -INT64_C( 3689301451095683169), INT64_C( 4102642388072787639), -INT64_C( 6298270799792855837), INT64_C( 908597294068841711) }, { -INT64_C( 4217327386109371060), -INT64_C( 6375816561938581983), INT64_C( 9029403535350110895), INT64_C( 586920690583555936), INT64_C( 7967243682726010805), -INT64_C( 9152970505335981211), INT64_C( 7521223655988276535), INT64_C( 1078941248321503985) } }, { { INT64_C( 7311693701301843659), INT64_C( 7494898546895421768), INT64_C( 2349409172957636062), INT64_C( 4322479761028576388), INT64_C( 3265778120923777598), -INT64_C( 5310310381393437343), -INT64_C( 4003064257566966751), INT64_C( 2693634056535957430) }, UINT8_C( 63), { INT64_C( 74681461099467337), -INT64_C( 5086377914583683253), -INT64_C( 8273458662043960522), -INT64_C( 39800438883330947), INT64_C( 3679636505814865579), -INT64_C( 5866531736128853600), -INT64_C( 3073049960134569313), INT64_C( 1395686423709339305) }, { INT64_C( 6281452445510075920), INT64_C( 3045217899379926812), INT64_C( 7460303757460924507), INT64_C( 1845390670211485473), INT64_C( 1096976101920587563), -INT64_C( 7954793774127551260), INT64_C( 392601397348307534), -INT64_C( 8539621634010629797) }, { INT64_C( 6356133906609543257), -INT64_C( 2041160015203756441), -INT64_C( 813154904583036015), INT64_C( 1805590231328154526), INT64_C( 4776612607735453142), INT64_C( 4625418563453146756), -INT64_C( 4003064257566966751), INT64_C( 2693634056535957430) } }, { { -INT64_C( 5625659159720783894), -INT64_C( 4262733505137438704), INT64_C( 4771074415986154316), -INT64_C( 2710563408861215365), -INT64_C( 9137340262048543309), INT64_C( 6372485775011303733), -INT64_C( 224123893461729351), INT64_C( 7083941961317845637) }, UINT8_C( 4), { INT64_C( 7269643312887620103), INT64_C( 4329870181778099646), INT64_C( 2564722579906344530), INT64_C( 7190335853134220430), -INT64_C( 968852038973637098), INT64_C( 1853343154121473663), -INT64_C( 4838903194234096357), -INT64_C( 824357888695620912) }, { INT64_C( 1274425862000582536), INT64_C( 9189953907530268329), INT64_C( 5306942928662607291), -INT64_C( 321439533223302985), -INT64_C( 340471119033620572), -INT64_C( 3077940849910492058), INT64_C( 4642198055108443306), -INT64_C( 8432040435859988082) }, { -INT64_C( 5625659159720783894), -INT64_C( 4262733505137438704), INT64_C( 7871665508568951821), -INT64_C( 2710563408861215365), -INT64_C( 9137340262048543309), INT64_C( 6372485775011303733), -INT64_C( 224123893461729351), INT64_C( 7083941961317845637) } }, { { -INT64_C( 9127382355256823033), INT64_C( 6974267907656827098), INT64_C( 9068262761557100815), INT64_C( 1459580854064754385), -INT64_C( 2177275983803055828), -INT64_C( 5361079444635839613), -INT64_C( 2408539542357402585), -INT64_C( 5262782123028966956) }, UINT8_C(216), { -INT64_C( 2761901989156618652), -INT64_C( 7396259151174703979), INT64_C( 1620878075755917699), -INT64_C( 4915584061870677991), -INT64_C( 219395007845324972), INT64_C( 3208968296463365233), -INT64_C( 3812486535545803012), INT64_C( 7117239981973485491) }, { -INT64_C( 6444525492333861076), INT64_C( 5168757207706484966), -INT64_C( 7509645842022035381), INT64_C( 3857445270331687960), INT64_C( 6839094782695310862), -INT64_C( 1825179838618698216), INT64_C( 7833075129166066744), -INT64_C( 3860117335376243408) }, { -INT64_C( 9127382355256823033), INT64_C( 6974267907656827098), INT64_C( 9068262761557100815), -INT64_C( 1058138791538990031), INT64_C( 6619699774849985890), -INT64_C( 5361079444635839613), INT64_C( 4020588593620263732), INT64_C( 3257122646597242083) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi64(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_mask_add_epi64(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_add_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const int64_t a[16]; const int64_t b[16]; const int64_t r[16]; } test_vec[] = { { UINT8_C(165), { INT64_C( 6299320458837796671), -INT64_C( 3196421240547742572), -INT64_C( 9151855083952004989), -INT64_C( 2652966953870515301), -INT64_C( 3361856595458879637), -INT64_C( 8765515588673012554), INT64_C( 4218943347121949634), INT64_C( 8056360307695763285) }, { -INT64_C( 6084423613766652800), -INT64_C( 986696027690857020), INT64_C( 716507424025936408), INT64_C( 2755580261000000714), INT64_C( 4185659851829194101), -INT64_C( 1359153785955268607), INT64_C( 8651579458846990930), INT64_C( 5266260289850313545) }, { INT64_C( 214896845071143871), INT64_C( 0), -INT64_C( 8435347659926068581), INT64_C( 0), INT64_C( 0), INT64_C( 8322074699081270455), INT64_C( 0), -INT64_C( 5124123476163474786) } }, { UINT8_C( 48), { INT64_C( 1327964625155044601), -INT64_C( 1979941431104987422), -INT64_C( 6388061835839239302), INT64_C( 6198577468949612625), INT64_C( 7878256497849969529), -INT64_C( 8253649976125538866), INT64_C( 7274427282076993456), -INT64_C( 5985215513423679939) }, { -INT64_C( 5628578266044451862), INT64_C( 6350840359232373634), -INT64_C( 993721339898183746), -INT64_C( 7573227544723558906), -INT64_C( 7078269819051780816), INT64_C( 8967324078724744818), -INT64_C( 5423879114017925356), -INT64_C( 850101963731351568) }, { INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 799986678798188713), INT64_C( 713674102599205952), INT64_C( 0), INT64_C( 0) } }, { UINT8_C(169), { -INT64_C( 3269505804180125889), INT64_C( 4898802782180457107), -INT64_C( 4122299440839867048), -INT64_C( 2805040416254433388), -INT64_C( 3881074597838727547), INT64_C( 1644288571922952801), -INT64_C( 91554778652228748), INT64_C( 5302276918373401890) }, { -INT64_C( 6606572555650556850), INT64_C( 2829390529692828527), INT64_C( 1381995888231790022), INT64_C( 6568329687495316506), -INT64_C( 1303457298250678015), -INT64_C( 2013848872050549965), INT64_C( 1771957535492024468), -INT64_C( 3657734556536641579) }, { INT64_C( 8570665713878868877), INT64_C( 0), INT64_C( 0), INT64_C( 3763289271240883118), INT64_C( 0), -INT64_C( 369560300127597164), INT64_C( 0), INT64_C( 1644542361836760311) } }, { UINT8_C(222), { INT64_C( 1693716282863260189), -INT64_C( 7181241992025315484), -INT64_C( 2718570591168046034), -INT64_C( 9033248451413530712), -INT64_C( 5937343786860347514), INT64_C( 1858518704354021561), -INT64_C( 4687457667859782492), -INT64_C( 7792311420757763850) }, { -INT64_C( 5252692508087571419), -INT64_C( 611453451093374081), -INT64_C( 3394024332202210286), -INT64_C( 5460606234653922919), INT64_C( 9036821187608596148), INT64_C( 1013709022150741447), -INT64_C( 5106768477839482762), INT64_C( 561708961651182727) }, { INT64_C( 0), -INT64_C( 7792695443118689565), -INT64_C( 6112594923370256320), INT64_C( 3952889387642097985), INT64_C( 3099477400748248634), INT64_C( 0), INT64_C( 8652517928010286362), -INT64_C( 7230602459106581123) } }, { UINT8_C(229), { INT64_C( 8138391701483141613), INT64_C( 4406625028354607943), -INT64_C( 1993379839983388751), INT64_C( 2662541310383647862), INT64_C( 9046393778122708729), INT64_C( 2568271637353789258), -INT64_C( 8121881179064237364), -INT64_C( 5039088444989734475) }, { -INT64_C( 1722519523622035611), INT64_C( 7561249774353008216), -INT64_C( 2405460785354645258), INT64_C( 3464354200514345880), -INT64_C( 6718838163239081926), -INT64_C( 275183546372714198), -INT64_C( 6250246341167154373), INT64_C( 2512751206208769253) }, { INT64_C( 6415872177861106002), INT64_C( 0), -INT64_C( 4398840625338034009), INT64_C( 0), INT64_C( 0), INT64_C( 2293088090981075060), INT64_C( 4074616553478159879), -INT64_C( 2526337238780965222) } }, { UINT8_C(254), { -INT64_C( 3622607429175870549), -INT64_C( 6274694410419404970), -INT64_C( 7031979866514108454), -INT64_C( 6667111988167567258), INT64_C( 12644015949398435), -INT64_C( 6358628794173882517), INT64_C( 4907679902253939692), -INT64_C( 1348765095626235475) }, { -INT64_C( 3412187665191306502), -INT64_C( 2311003071927724424), INT64_C( 6505582346217724197), INT64_C( 4346333461565343769), -INT64_C( 8000778778988929343), -INT64_C( 5787191995171151651), -INT64_C( 7069248972678558756), -INT64_C( 1332976243435314173) }, { INT64_C( 0), -INT64_C( 8585697482347129394), -INT64_C( 526397520296384257), -INT64_C( 2320778526602223489), -INT64_C( 7988134763039530908), INT64_C( 6300923284364517448), -INT64_C( 2161569070424619064), -INT64_C( 2681741339061549648) } }, { UINT8_C(239), { -INT64_C( 1385535232953346975), INT64_C( 5696251178006254957), INT64_C( 6906112230749870041), -INT64_C( 166219096561869968), -INT64_C( 4862855913802450804), -INT64_C( 444736920620238273), -INT64_C( 8760446760531417455), -INT64_C( 334961341082568769) }, { INT64_C( 2935809197118471858), INT64_C( 3902790899556199184), INT64_C( 6467643616834876965), -INT64_C( 5327742948472452442), -INT64_C( 4008634985254182324), -INT64_C( 5837191191359649246), -INT64_C( 7634820792522817257), INT64_C( 1904947663936929972) }, { INT64_C( 1550273964165124883), -INT64_C( 8847701996147097475), -INT64_C( 5072988226124804610), -INT64_C( 5493962045034322410), INT64_C( 0), -INT64_C( 6281928111979887519), INT64_C( 2051476520655316904), INT64_C( 1569986322854361203) } }, { UINT8_C( 94), { -INT64_C( 5133576159156088793), -INT64_C( 3958400705177220649), -INT64_C( 8271053347050896680), -INT64_C( 8784986448452653061), -INT64_C( 2149372564095095867), INT64_C( 3728957796702186606), -INT64_C( 4321223872130680659), -INT64_C( 7079217880864431396) }, { INT64_C( 4154637502148371899), INT64_C( 2033637388041814953), -INT64_C( 5191631281194602905), -INT64_C( 5010619628260266496), INT64_C( 7826456547109668761), INT64_C( 2465062992106081707), INT64_C( 7649721765552376983), INT64_C( 7524593379129367732) }, { INT64_C( 0), -INT64_C( 1924763317135405696), INT64_C( 4984059445464052031), INT64_C( 4651137996996632059), INT64_C( 5677083983014572894), INT64_C( 0), INT64_C( 3328497893421696324), INT64_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_maskz_add_epi64(test_vec[i].k, a, b); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_add_ps (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[16]; simde_float32 b[16]; simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 667.91), SIMDE_FLOAT32_C( 656.58), SIMDE_FLOAT32_C( -596.78), SIMDE_FLOAT32_C( -487.31), SIMDE_FLOAT32_C( -822.62), SIMDE_FLOAT32_C( 812.31), SIMDE_FLOAT32_C( 89.92), SIMDE_FLOAT32_C( -844.98), SIMDE_FLOAT32_C( -326.84), SIMDE_FLOAT32_C( -869.81), SIMDE_FLOAT32_C( -327.10), SIMDE_FLOAT32_C( -520.41), SIMDE_FLOAT32_C( 590.39), SIMDE_FLOAT32_C( 190.09), SIMDE_FLOAT32_C( -999.49), SIMDE_FLOAT32_C( 279.05) }, { SIMDE_FLOAT32_C( 510.55), SIMDE_FLOAT32_C( 821.50), SIMDE_FLOAT32_C( -282.52), SIMDE_FLOAT32_C( 624.18), SIMDE_FLOAT32_C( 410.62), SIMDE_FLOAT32_C( -938.89), SIMDE_FLOAT32_C( 71.79), SIMDE_FLOAT32_C( 376.91), SIMDE_FLOAT32_C( 674.13), SIMDE_FLOAT32_C( 85.78), SIMDE_FLOAT32_C( -18.27), SIMDE_FLOAT32_C( 115.82), SIMDE_FLOAT32_C( -281.68), SIMDE_FLOAT32_C( -193.16), SIMDE_FLOAT32_C( -673.77), SIMDE_FLOAT32_C( -613.77) }, { SIMDE_FLOAT32_C( 1178.46), SIMDE_FLOAT32_C( 1478.08), SIMDE_FLOAT32_C( -879.31), SIMDE_FLOAT32_C( 136.87), SIMDE_FLOAT32_C( -412.00), SIMDE_FLOAT32_C( -126.58), SIMDE_FLOAT32_C( 161.71), SIMDE_FLOAT32_C( -468.07), SIMDE_FLOAT32_C( 347.28), SIMDE_FLOAT32_C( -784.02), SIMDE_FLOAT32_C( -345.37), SIMDE_FLOAT32_C( -404.59), SIMDE_FLOAT32_C( 308.71), SIMDE_FLOAT32_C( -3.07), SIMDE_FLOAT32_C( -1673.26), SIMDE_FLOAT32_C( -334.72) } }, { { SIMDE_FLOAT32_C( -536.58), SIMDE_FLOAT32_C( -270.56), SIMDE_FLOAT32_C( -101.08), SIMDE_FLOAT32_C( -359.21), SIMDE_FLOAT32_C( -458.24), SIMDE_FLOAT32_C( 988.84), SIMDE_FLOAT32_C( -204.19), SIMDE_FLOAT32_C( 214.91), SIMDE_FLOAT32_C( -880.97), SIMDE_FLOAT32_C( 468.71), SIMDE_FLOAT32_C( 694.51), SIMDE_FLOAT32_C( 709.42), SIMDE_FLOAT32_C( -341.20), SIMDE_FLOAT32_C( 695.02), SIMDE_FLOAT32_C( -11.52), SIMDE_FLOAT32_C( -830.65) }, { SIMDE_FLOAT32_C( 516.52), SIMDE_FLOAT32_C( 705.95), SIMDE_FLOAT32_C( 793.53), SIMDE_FLOAT32_C( -72.87), SIMDE_FLOAT32_C( 767.06), SIMDE_FLOAT32_C( -134.68), SIMDE_FLOAT32_C( -695.95), SIMDE_FLOAT32_C( 441.19), SIMDE_FLOAT32_C( 951.11), SIMDE_FLOAT32_C( 285.78), SIMDE_FLOAT32_C( -442.99), SIMDE_FLOAT32_C( -330.57), SIMDE_FLOAT32_C( -907.38), SIMDE_FLOAT32_C( -116.76), SIMDE_FLOAT32_C( 55.65), SIMDE_FLOAT32_C( -443.96) }, { SIMDE_FLOAT32_C( -20.07), SIMDE_FLOAT32_C( 435.40), SIMDE_FLOAT32_C( 692.45), SIMDE_FLOAT32_C( -432.07), SIMDE_FLOAT32_C( 308.82), SIMDE_FLOAT32_C( 854.16), SIMDE_FLOAT32_C( -900.14), SIMDE_FLOAT32_C( 656.10), SIMDE_FLOAT32_C( 70.14), SIMDE_FLOAT32_C( 754.49), SIMDE_FLOAT32_C( 251.51), SIMDE_FLOAT32_C( 378.85), SIMDE_FLOAT32_C( -1248.58), SIMDE_FLOAT32_C( 578.25), SIMDE_FLOAT32_C( 44.13), SIMDE_FLOAT32_C( -1274.61) } }, { { SIMDE_FLOAT32_C( 612.68), SIMDE_FLOAT32_C( 954.57), SIMDE_FLOAT32_C( 196.83), SIMDE_FLOAT32_C( -845.56), SIMDE_FLOAT32_C( 943.41), SIMDE_FLOAT32_C( 992.64), SIMDE_FLOAT32_C( 369.35), SIMDE_FLOAT32_C( -937.56), SIMDE_FLOAT32_C( 461.35), SIMDE_FLOAT32_C( 63.86), SIMDE_FLOAT32_C( 771.86), SIMDE_FLOAT32_C( -879.85), SIMDE_FLOAT32_C( -241.12), SIMDE_FLOAT32_C( -239.67), SIMDE_FLOAT32_C( -710.49), SIMDE_FLOAT32_C( -724.61) }, { SIMDE_FLOAT32_C( -533.71), SIMDE_FLOAT32_C( -916.96), SIMDE_FLOAT32_C( 202.53), SIMDE_FLOAT32_C( -766.65), SIMDE_FLOAT32_C( -51.64), SIMDE_FLOAT32_C( 506.57), SIMDE_FLOAT32_C( 674.54), SIMDE_FLOAT32_C( -100.53), SIMDE_FLOAT32_C( -207.65), SIMDE_FLOAT32_C( -768.46), SIMDE_FLOAT32_C( 568.90), SIMDE_FLOAT32_C( -115.03), SIMDE_FLOAT32_C( 114.78), SIMDE_FLOAT32_C( -375.45), SIMDE_FLOAT32_C( 441.01), SIMDE_FLOAT32_C( -272.54) }, { SIMDE_FLOAT32_C( 78.97), SIMDE_FLOAT32_C( 37.61), SIMDE_FLOAT32_C( 399.35), SIMDE_FLOAT32_C( -1612.21), SIMDE_FLOAT32_C( 891.77), SIMDE_FLOAT32_C( 1499.21), SIMDE_FLOAT32_C( 1043.89), SIMDE_FLOAT32_C( -1038.09), SIMDE_FLOAT32_C( 253.70), SIMDE_FLOAT32_C( -704.60), SIMDE_FLOAT32_C( 1340.75), SIMDE_FLOAT32_C( -994.87), SIMDE_FLOAT32_C( -126.35), SIMDE_FLOAT32_C( -615.12), SIMDE_FLOAT32_C( -269.48), SIMDE_FLOAT32_C( -997.15) } }, { { SIMDE_FLOAT32_C( -420.88), SIMDE_FLOAT32_C( -362.16), SIMDE_FLOAT32_C( -118.10), SIMDE_FLOAT32_C( -477.47), SIMDE_FLOAT32_C( -369.52), SIMDE_FLOAT32_C( -748.75), SIMDE_FLOAT32_C( -415.03), SIMDE_FLOAT32_C( -908.17), SIMDE_FLOAT32_C( 315.11), SIMDE_FLOAT32_C( -643.17), SIMDE_FLOAT32_C( -788.02), SIMDE_FLOAT32_C( -926.02), SIMDE_FLOAT32_C( 117.16), SIMDE_FLOAT32_C( -498.52), SIMDE_FLOAT32_C( -650.63), SIMDE_FLOAT32_C( 583.45) }, { SIMDE_FLOAT32_C( -415.48), SIMDE_FLOAT32_C( 551.90), SIMDE_FLOAT32_C( 816.80), SIMDE_FLOAT32_C( 532.88), SIMDE_FLOAT32_C( 58.47), SIMDE_FLOAT32_C( 491.34), SIMDE_FLOAT32_C( -567.65), SIMDE_FLOAT32_C( 850.83), SIMDE_FLOAT32_C( 722.88), SIMDE_FLOAT32_C( -998.75), SIMDE_FLOAT32_C( -264.20), SIMDE_FLOAT32_C( -162.34), SIMDE_FLOAT32_C( -374.20), SIMDE_FLOAT32_C( -823.19), SIMDE_FLOAT32_C( 565.12), SIMDE_FLOAT32_C( 204.92) }, { SIMDE_FLOAT32_C( -836.35), SIMDE_FLOAT32_C( 189.74), SIMDE_FLOAT32_C( 698.70), SIMDE_FLOAT32_C( 55.42), SIMDE_FLOAT32_C( -311.05), SIMDE_FLOAT32_C( -257.41), SIMDE_FLOAT32_C( -982.68), SIMDE_FLOAT32_C( -57.35), SIMDE_FLOAT32_C( 1037.99), SIMDE_FLOAT32_C( -1641.92), SIMDE_FLOAT32_C( -1052.22), SIMDE_FLOAT32_C( -1088.36), SIMDE_FLOAT32_C( -257.04), SIMDE_FLOAT32_C( -1321.70), SIMDE_FLOAT32_C( -85.51), SIMDE_FLOAT32_C( 788.38) } }, { { SIMDE_FLOAT32_C( -185.35), SIMDE_FLOAT32_C( -552.99), SIMDE_FLOAT32_C( 727.46), SIMDE_FLOAT32_C( 445.13), SIMDE_FLOAT32_C( -301.74), SIMDE_FLOAT32_C( -687.57), SIMDE_FLOAT32_C( 536.96), SIMDE_FLOAT32_C( -986.63), SIMDE_FLOAT32_C( -330.75), SIMDE_FLOAT32_C( 748.93), SIMDE_FLOAT32_C( -912.65), SIMDE_FLOAT32_C( 786.42), SIMDE_FLOAT32_C( -749.58), SIMDE_FLOAT32_C( -563.28), SIMDE_FLOAT32_C( 369.87), SIMDE_FLOAT32_C( -165.06) }, { SIMDE_FLOAT32_C( 988.62), SIMDE_FLOAT32_C( 186.67), SIMDE_FLOAT32_C( -632.17), SIMDE_FLOAT32_C( 47.10), SIMDE_FLOAT32_C( -321.99), SIMDE_FLOAT32_C( -199.82), SIMDE_FLOAT32_C( -102.08), SIMDE_FLOAT32_C( -599.11), SIMDE_FLOAT32_C( -198.57), SIMDE_FLOAT32_C( 633.73), SIMDE_FLOAT32_C( 238.55), SIMDE_FLOAT32_C( 427.23), SIMDE_FLOAT32_C( 810.54), SIMDE_FLOAT32_C( -196.33), SIMDE_FLOAT32_C( -367.85), SIMDE_FLOAT32_C( -374.81) }, { SIMDE_FLOAT32_C( 803.28), SIMDE_FLOAT32_C( -366.32), SIMDE_FLOAT32_C( 95.28), SIMDE_FLOAT32_C( 492.23), SIMDE_FLOAT32_C( -623.73), SIMDE_FLOAT32_C( -887.39), SIMDE_FLOAT32_C( 434.88), SIMDE_FLOAT32_C( -1585.74), SIMDE_FLOAT32_C( -529.32), SIMDE_FLOAT32_C( 1382.66), SIMDE_FLOAT32_C( -674.10), SIMDE_FLOAT32_C( 1213.65), SIMDE_FLOAT32_C( 60.96), SIMDE_FLOAT32_C( -759.61), SIMDE_FLOAT32_C( 2.02), SIMDE_FLOAT32_C( -539.87) } }, { { SIMDE_FLOAT32_C( 250.68), SIMDE_FLOAT32_C( -640.39), SIMDE_FLOAT32_C( -929.68), SIMDE_FLOAT32_C( 948.94), SIMDE_FLOAT32_C( -327.96), SIMDE_FLOAT32_C( 607.27), SIMDE_FLOAT32_C( 962.31), SIMDE_FLOAT32_C( 341.29), SIMDE_FLOAT32_C( 356.21), SIMDE_FLOAT32_C( -950.34), SIMDE_FLOAT32_C( 127.71), SIMDE_FLOAT32_C( 606.63), SIMDE_FLOAT32_C( -513.62), SIMDE_FLOAT32_C( -502.42), SIMDE_FLOAT32_C( -558.43), SIMDE_FLOAT32_C( -524.99) }, { SIMDE_FLOAT32_C( 684.24), SIMDE_FLOAT32_C( -190.61), SIMDE_FLOAT32_C( 522.10), SIMDE_FLOAT32_C( -637.75), SIMDE_FLOAT32_C( 609.57), SIMDE_FLOAT32_C( -579.97), SIMDE_FLOAT32_C( -236.86), SIMDE_FLOAT32_C( -589.00), SIMDE_FLOAT32_C( -946.25), SIMDE_FLOAT32_C( -998.31), SIMDE_FLOAT32_C( 838.23), SIMDE_FLOAT32_C( 864.29), SIMDE_FLOAT32_C( -194.64), SIMDE_FLOAT32_C( -529.61), SIMDE_FLOAT32_C( -510.52), SIMDE_FLOAT32_C( -943.96) }, { SIMDE_FLOAT32_C( 934.92), SIMDE_FLOAT32_C( -831.00), SIMDE_FLOAT32_C( -407.58), SIMDE_FLOAT32_C( 311.19), SIMDE_FLOAT32_C( 281.61), SIMDE_FLOAT32_C( 27.30), SIMDE_FLOAT32_C( 725.45), SIMDE_FLOAT32_C( -247.71), SIMDE_FLOAT32_C( -590.04), SIMDE_FLOAT32_C( -1948.65), SIMDE_FLOAT32_C( 965.94), SIMDE_FLOAT32_C( 1470.92), SIMDE_FLOAT32_C( -708.26), SIMDE_FLOAT32_C( -1032.04), SIMDE_FLOAT32_C( -1068.95), SIMDE_FLOAT32_C( -1468.95) } }, { { SIMDE_FLOAT32_C( -170.00), SIMDE_FLOAT32_C( -440.20), SIMDE_FLOAT32_C( -995.02), SIMDE_FLOAT32_C( 502.03), SIMDE_FLOAT32_C( -832.92), SIMDE_FLOAT32_C( 967.29), SIMDE_FLOAT32_C( -156.68), SIMDE_FLOAT32_C( 523.28), SIMDE_FLOAT32_C( -983.05), SIMDE_FLOAT32_C( 971.03), SIMDE_FLOAT32_C( 129.91), SIMDE_FLOAT32_C( -496.67), SIMDE_FLOAT32_C( -531.39), SIMDE_FLOAT32_C( 571.48), SIMDE_FLOAT32_C( -21.66), SIMDE_FLOAT32_C( -847.15) }, { SIMDE_FLOAT32_C( -619.13), SIMDE_FLOAT32_C( -499.55), SIMDE_FLOAT32_C( -484.90), SIMDE_FLOAT32_C( 990.45), SIMDE_FLOAT32_C( -79.53), SIMDE_FLOAT32_C( 278.24), SIMDE_FLOAT32_C( -598.55), SIMDE_FLOAT32_C( -25.77), SIMDE_FLOAT32_C( 279.93), SIMDE_FLOAT32_C( -760.32), SIMDE_FLOAT32_C( -161.48), SIMDE_FLOAT32_C( -914.71), SIMDE_FLOAT32_C( -289.93), SIMDE_FLOAT32_C( 328.00), SIMDE_FLOAT32_C( -858.67), SIMDE_FLOAT32_C( 540.06) }, { SIMDE_FLOAT32_C( -789.13), SIMDE_FLOAT32_C( -939.75), SIMDE_FLOAT32_C( -1479.92), SIMDE_FLOAT32_C( 1492.48), SIMDE_FLOAT32_C( -912.45), SIMDE_FLOAT32_C( 1245.53), SIMDE_FLOAT32_C( -755.23), SIMDE_FLOAT32_C( 497.51), SIMDE_FLOAT32_C( -703.12), SIMDE_FLOAT32_C( 210.71), SIMDE_FLOAT32_C( -31.57), SIMDE_FLOAT32_C( -1411.38), SIMDE_FLOAT32_C( -821.33), SIMDE_FLOAT32_C( 899.48), SIMDE_FLOAT32_C( -880.33), SIMDE_FLOAT32_C( -307.09) } }, { { SIMDE_FLOAT32_C( 887.80), SIMDE_FLOAT32_C( -853.69), SIMDE_FLOAT32_C( 42.10), SIMDE_FLOAT32_C( -945.12), SIMDE_FLOAT32_C( -886.40), SIMDE_FLOAT32_C( 885.42), SIMDE_FLOAT32_C( 578.16), SIMDE_FLOAT32_C( -869.46), SIMDE_FLOAT32_C( 856.45), SIMDE_FLOAT32_C( -291.93), SIMDE_FLOAT32_C( -366.12), SIMDE_FLOAT32_C( -674.94), SIMDE_FLOAT32_C( -720.45), SIMDE_FLOAT32_C( 612.22), SIMDE_FLOAT32_C( -522.09), SIMDE_FLOAT32_C( -339.57) }, { SIMDE_FLOAT32_C( -887.34), SIMDE_FLOAT32_C( -6.99), SIMDE_FLOAT32_C( -349.13), SIMDE_FLOAT32_C( 33.14), SIMDE_FLOAT32_C( -728.74), SIMDE_FLOAT32_C( 52.32), SIMDE_FLOAT32_C( -992.63), SIMDE_FLOAT32_C( 551.19), SIMDE_FLOAT32_C( 292.00), SIMDE_FLOAT32_C( -154.11), SIMDE_FLOAT32_C( 636.48), SIMDE_FLOAT32_C( -997.93), SIMDE_FLOAT32_C( -826.11), SIMDE_FLOAT32_C( 777.81), SIMDE_FLOAT32_C( 542.14), SIMDE_FLOAT32_C( -938.31) }, { SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( -860.68), SIMDE_FLOAT32_C( -307.03), SIMDE_FLOAT32_C( -911.99), SIMDE_FLOAT32_C( -1615.15), SIMDE_FLOAT32_C( 937.74), SIMDE_FLOAT32_C( -414.47), SIMDE_FLOAT32_C( -318.27), SIMDE_FLOAT32_C( 1148.46), SIMDE_FLOAT32_C( -446.04), SIMDE_FLOAT32_C( 270.35), SIMDE_FLOAT32_C( -1672.87), SIMDE_FLOAT32_C( -1546.56), SIMDE_FLOAT32_C( 1390.02), SIMDE_FLOAT32_C( 20.05), SIMDE_FLOAT32_C( -1277.88) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 r = simde_mm512_add_ps(a, b); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_add_ps (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 src[16]; simde__mmask16 k; simde_float32 a[16]; simde_float32 b[16]; simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 460.46), SIMDE_FLOAT32_C( -331.63), SIMDE_FLOAT32_C( 681.04), SIMDE_FLOAT32_C( -156.99), SIMDE_FLOAT32_C( -543.60), SIMDE_FLOAT32_C( 94.06), SIMDE_FLOAT32_C( 923.51), SIMDE_FLOAT32_C( -881.03), SIMDE_FLOAT32_C( -751.28), SIMDE_FLOAT32_C( -565.86), SIMDE_FLOAT32_C( -825.81), SIMDE_FLOAT32_C( 899.74), SIMDE_FLOAT32_C( 180.53), SIMDE_FLOAT32_C( -308.94), SIMDE_FLOAT32_C( 105.08), SIMDE_FLOAT32_C( -812.17) }, UINT16_C(46254), { SIMDE_FLOAT32_C( 159.98), SIMDE_FLOAT32_C( 930.16), SIMDE_FLOAT32_C( -647.50), SIMDE_FLOAT32_C( -273.39), SIMDE_FLOAT32_C( -580.12), SIMDE_FLOAT32_C( -662.69), SIMDE_FLOAT32_C( -636.46), SIMDE_FLOAT32_C( 880.33), SIMDE_FLOAT32_C( 5.69), SIMDE_FLOAT32_C( -955.42), SIMDE_FLOAT32_C( -276.66), SIMDE_FLOAT32_C( 462.09), SIMDE_FLOAT32_C( 138.64), SIMDE_FLOAT32_C( -353.15), SIMDE_FLOAT32_C( 581.06), SIMDE_FLOAT32_C( 387.36) }, { SIMDE_FLOAT32_C( 80.99), SIMDE_FLOAT32_C( 755.25), SIMDE_FLOAT32_C( 287.10), SIMDE_FLOAT32_C( -738.48), SIMDE_FLOAT32_C( -553.70), SIMDE_FLOAT32_C( -607.83), SIMDE_FLOAT32_C( -550.66), SIMDE_FLOAT32_C( 141.56), SIMDE_FLOAT32_C( -486.72), SIMDE_FLOAT32_C( 158.46), SIMDE_FLOAT32_C( 11.82), SIMDE_FLOAT32_C( -371.24), SIMDE_FLOAT32_C( 774.24), SIMDE_FLOAT32_C( -643.60), SIMDE_FLOAT32_C( 412.35), SIMDE_FLOAT32_C( -65.78) }, { SIMDE_FLOAT32_C( 460.46), SIMDE_FLOAT32_C( 1685.41), SIMDE_FLOAT32_C( -360.40), SIMDE_FLOAT32_C( -1011.87), SIMDE_FLOAT32_C( -543.60), SIMDE_FLOAT32_C( -1270.51), SIMDE_FLOAT32_C( 923.51), SIMDE_FLOAT32_C( 1021.89), SIMDE_FLOAT32_C( -751.28), SIMDE_FLOAT32_C( -565.86), SIMDE_FLOAT32_C( -264.84), SIMDE_FLOAT32_C( 899.74), SIMDE_FLOAT32_C( 912.88), SIMDE_FLOAT32_C( -996.76), SIMDE_FLOAT32_C( 105.08), SIMDE_FLOAT32_C( 321.58) } }, { { SIMDE_FLOAT32_C( -713.44), SIMDE_FLOAT32_C( 764.86), SIMDE_FLOAT32_C( 660.83), SIMDE_FLOAT32_C( -293.56), SIMDE_FLOAT32_C( -897.83), SIMDE_FLOAT32_C( -975.63), SIMDE_FLOAT32_C( -413.23), SIMDE_FLOAT32_C( 107.86), SIMDE_FLOAT32_C( -931.06), SIMDE_FLOAT32_C( 310.12), SIMDE_FLOAT32_C( -430.05), SIMDE_FLOAT32_C( 207.58), SIMDE_FLOAT32_C( 956.96), SIMDE_FLOAT32_C( -848.99), SIMDE_FLOAT32_C( -405.06), SIMDE_FLOAT32_C( 37.95) }, UINT16_C(56892), { SIMDE_FLOAT32_C( -92.73), SIMDE_FLOAT32_C( -494.07), SIMDE_FLOAT32_C( 416.25), SIMDE_FLOAT32_C( -318.49), SIMDE_FLOAT32_C( -137.67), SIMDE_FLOAT32_C( -171.40), SIMDE_FLOAT32_C( 615.73), SIMDE_FLOAT32_C( 148.89), SIMDE_FLOAT32_C( -406.54), SIMDE_FLOAT32_C( 276.56), SIMDE_FLOAT32_C( 855.33), SIMDE_FLOAT32_C( -304.37), SIMDE_FLOAT32_C( 300.92), SIMDE_FLOAT32_C( -557.90), SIMDE_FLOAT32_C( 803.48), SIMDE_FLOAT32_C( 369.86) }, { SIMDE_FLOAT32_C( 752.21), SIMDE_FLOAT32_C( -626.57), SIMDE_FLOAT32_C( -422.55), SIMDE_FLOAT32_C( 709.18), SIMDE_FLOAT32_C( -475.56), SIMDE_FLOAT32_C( 172.39), SIMDE_FLOAT32_C( -252.87), SIMDE_FLOAT32_C( -569.31), SIMDE_FLOAT32_C( 54.43), SIMDE_FLOAT32_C( -953.40), SIMDE_FLOAT32_C( -216.76), SIMDE_FLOAT32_C( 328.64), SIMDE_FLOAT32_C( 795.42), SIMDE_FLOAT32_C( -722.64), SIMDE_FLOAT32_C( -883.86), SIMDE_FLOAT32_C( -297.31) }, { SIMDE_FLOAT32_C( -713.44), SIMDE_FLOAT32_C( 764.86), SIMDE_FLOAT32_C( -6.30), SIMDE_FLOAT32_C( 390.69), SIMDE_FLOAT32_C( -613.23), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( -413.23), SIMDE_FLOAT32_C( 107.86), SIMDE_FLOAT32_C( -931.06), SIMDE_FLOAT32_C( -676.84), SIMDE_FLOAT32_C( 638.57), SIMDE_FLOAT32_C( 24.27), SIMDE_FLOAT32_C( 1096.34), SIMDE_FLOAT32_C( -848.99), SIMDE_FLOAT32_C( -80.38), SIMDE_FLOAT32_C( 72.56) } }, { { SIMDE_FLOAT32_C( -216.71), SIMDE_FLOAT32_C( 532.39), SIMDE_FLOAT32_C( 384.21), SIMDE_FLOAT32_C( 645.62), SIMDE_FLOAT32_C( -639.01), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( -205.49), SIMDE_FLOAT32_C( -45.55), SIMDE_FLOAT32_C( -723.51), SIMDE_FLOAT32_C( -350.17), SIMDE_FLOAT32_C( 650.08), SIMDE_FLOAT32_C( 577.41), SIMDE_FLOAT32_C( 91.93), SIMDE_FLOAT32_C( 453.57), SIMDE_FLOAT32_C( -52.73), SIMDE_FLOAT32_C( -155.86) }, UINT16_C( 2131), { SIMDE_FLOAT32_C( 347.05), SIMDE_FLOAT32_C( 565.37), SIMDE_FLOAT32_C( 80.17), SIMDE_FLOAT32_C( 142.47), SIMDE_FLOAT32_C( 842.73), SIMDE_FLOAT32_C( 196.31), SIMDE_FLOAT32_C( 845.17), SIMDE_FLOAT32_C( -373.98), SIMDE_FLOAT32_C( -271.30), SIMDE_FLOAT32_C( 229.37), SIMDE_FLOAT32_C( -728.36), SIMDE_FLOAT32_C( 89.69), SIMDE_FLOAT32_C( -770.69), SIMDE_FLOAT32_C( 66.14), SIMDE_FLOAT32_C( -955.85), SIMDE_FLOAT32_C( -494.20) }, { SIMDE_FLOAT32_C( 715.97), SIMDE_FLOAT32_C( 694.23), SIMDE_FLOAT32_C( -916.79), SIMDE_FLOAT32_C( -192.10), SIMDE_FLOAT32_C( 147.79), SIMDE_FLOAT32_C( 30.48), SIMDE_FLOAT32_C( 652.05), SIMDE_FLOAT32_C( -25.21), SIMDE_FLOAT32_C( -444.80), SIMDE_FLOAT32_C( -794.64), SIMDE_FLOAT32_C( 326.23), SIMDE_FLOAT32_C( 252.31), SIMDE_FLOAT32_C( 505.81), SIMDE_FLOAT32_C( -891.64), SIMDE_FLOAT32_C( 3.84), SIMDE_FLOAT32_C( -147.13) }, { SIMDE_FLOAT32_C( 1063.03), SIMDE_FLOAT32_C( 1259.60), SIMDE_FLOAT32_C( 384.21), SIMDE_FLOAT32_C( 645.62), SIMDE_FLOAT32_C( 990.52), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( 1497.21), SIMDE_FLOAT32_C( -45.55), SIMDE_FLOAT32_C( -723.51), SIMDE_FLOAT32_C( -350.17), SIMDE_FLOAT32_C( 650.08), SIMDE_FLOAT32_C( 342.00), SIMDE_FLOAT32_C( 91.93), SIMDE_FLOAT32_C( 453.57), SIMDE_FLOAT32_C( -52.73), SIMDE_FLOAT32_C( -155.86) } }, { { SIMDE_FLOAT32_C( 673.73), SIMDE_FLOAT32_C( -915.98), SIMDE_FLOAT32_C( 995.34), SIMDE_FLOAT32_C( 516.46), SIMDE_FLOAT32_C( 280.33), SIMDE_FLOAT32_C( 840.51), SIMDE_FLOAT32_C( -857.52), SIMDE_FLOAT32_C( -990.97), SIMDE_FLOAT32_C( 69.88), SIMDE_FLOAT32_C( -585.88), SIMDE_FLOAT32_C( 98.72), SIMDE_FLOAT32_C( 299.19), SIMDE_FLOAT32_C( 480.27), SIMDE_FLOAT32_C( 142.87), SIMDE_FLOAT32_C( 804.99), SIMDE_FLOAT32_C( 196.24) }, UINT16_C(47953), { SIMDE_FLOAT32_C( 861.56), SIMDE_FLOAT32_C( -714.09), SIMDE_FLOAT32_C( -273.80), SIMDE_FLOAT32_C( 367.37), SIMDE_FLOAT32_C( -605.73), SIMDE_FLOAT32_C( 730.04), SIMDE_FLOAT32_C( -779.76), SIMDE_FLOAT32_C( -932.00), SIMDE_FLOAT32_C( 814.06), SIMDE_FLOAT32_C( -784.42), SIMDE_FLOAT32_C( 584.46), SIMDE_FLOAT32_C( 94.39), SIMDE_FLOAT32_C( -943.91), SIMDE_FLOAT32_C( 726.95), SIMDE_FLOAT32_C( 103.42), SIMDE_FLOAT32_C( 125.97) }, { SIMDE_FLOAT32_C( -858.93), SIMDE_FLOAT32_C( -797.86), SIMDE_FLOAT32_C( -574.84), SIMDE_FLOAT32_C( 621.33), SIMDE_FLOAT32_C( 345.01), SIMDE_FLOAT32_C( -769.85), SIMDE_FLOAT32_C( -182.42), SIMDE_FLOAT32_C( -817.89), SIMDE_FLOAT32_C( -881.65), SIMDE_FLOAT32_C( -178.28), SIMDE_FLOAT32_C( -833.00), SIMDE_FLOAT32_C( 37.03), SIMDE_FLOAT32_C( -522.09), SIMDE_FLOAT32_C( 126.68), SIMDE_FLOAT32_C( -489.08), SIMDE_FLOAT32_C( -660.53) }, { SIMDE_FLOAT32_C( 2.62), SIMDE_FLOAT32_C( -915.98), SIMDE_FLOAT32_C( 995.34), SIMDE_FLOAT32_C( 516.46), SIMDE_FLOAT32_C( -260.72), SIMDE_FLOAT32_C( 840.51), SIMDE_FLOAT32_C( -962.19), SIMDE_FLOAT32_C( -990.97), SIMDE_FLOAT32_C( -67.59), SIMDE_FLOAT32_C( -962.70), SIMDE_FLOAT32_C( 98.72), SIMDE_FLOAT32_C( 131.42), SIMDE_FLOAT32_C( -1466.00), SIMDE_FLOAT32_C( 853.63), SIMDE_FLOAT32_C( 804.99), SIMDE_FLOAT32_C( -534.56) } }, { { SIMDE_FLOAT32_C( 412.59), SIMDE_FLOAT32_C( 237.12), SIMDE_FLOAT32_C( 706.84), SIMDE_FLOAT32_C( 806.86), SIMDE_FLOAT32_C( -32.84), SIMDE_FLOAT32_C( 927.07), SIMDE_FLOAT32_C( 874.86), SIMDE_FLOAT32_C( -218.78), SIMDE_FLOAT32_C( -857.35), SIMDE_FLOAT32_C( 459.32), SIMDE_FLOAT32_C( 875.61), SIMDE_FLOAT32_C( -801.26), SIMDE_FLOAT32_C( 186.27), SIMDE_FLOAT32_C( -20.97), SIMDE_FLOAT32_C( 324.71), SIMDE_FLOAT32_C( 327.34) }, UINT16_C(16785), { SIMDE_FLOAT32_C( -412.03), SIMDE_FLOAT32_C( -124.71), SIMDE_FLOAT32_C( 135.41), SIMDE_FLOAT32_C( 65.88), SIMDE_FLOAT32_C( -998.03), SIMDE_FLOAT32_C( 646.33), SIMDE_FLOAT32_C( 405.35), SIMDE_FLOAT32_C( 414.56), SIMDE_FLOAT32_C( -116.55), SIMDE_FLOAT32_C( 112.18), SIMDE_FLOAT32_C( 221.42), SIMDE_FLOAT32_C( 850.61), SIMDE_FLOAT32_C( 39.26), SIMDE_FLOAT32_C( 96.28), SIMDE_FLOAT32_C( -368.17), SIMDE_FLOAT32_C( 181.91) }, { SIMDE_FLOAT32_C( -444.40), SIMDE_FLOAT32_C( -492.56), SIMDE_FLOAT32_C( 380.65), SIMDE_FLOAT32_C( 741.87), SIMDE_FLOAT32_C( 486.46), SIMDE_FLOAT32_C( -294.64), SIMDE_FLOAT32_C( 69.20), SIMDE_FLOAT32_C( -332.37), SIMDE_FLOAT32_C( -544.77), SIMDE_FLOAT32_C( -982.12), SIMDE_FLOAT32_C( 193.82), SIMDE_FLOAT32_C( -564.75), SIMDE_FLOAT32_C( 784.12), SIMDE_FLOAT32_C( 902.11), SIMDE_FLOAT32_C( -466.37), SIMDE_FLOAT32_C( -627.91) }, { SIMDE_FLOAT32_C( -856.43), SIMDE_FLOAT32_C( 237.12), SIMDE_FLOAT32_C( 706.84), SIMDE_FLOAT32_C( 806.86), SIMDE_FLOAT32_C( -511.57), SIMDE_FLOAT32_C( 927.07), SIMDE_FLOAT32_C( 874.86), SIMDE_FLOAT32_C( 82.19), SIMDE_FLOAT32_C( -661.32), SIMDE_FLOAT32_C( 459.32), SIMDE_FLOAT32_C( 875.61), SIMDE_FLOAT32_C( -801.26), SIMDE_FLOAT32_C( 186.27), SIMDE_FLOAT32_C( -20.97), SIMDE_FLOAT32_C( -834.55), SIMDE_FLOAT32_C( 327.34) } }, { { SIMDE_FLOAT32_C( -222.60), SIMDE_FLOAT32_C( 669.04), SIMDE_FLOAT32_C( 437.97), SIMDE_FLOAT32_C( -220.63), SIMDE_FLOAT32_C( 315.37), SIMDE_FLOAT32_C( -156.68), SIMDE_FLOAT32_C( -806.07), SIMDE_FLOAT32_C( -801.18), SIMDE_FLOAT32_C( 955.50), SIMDE_FLOAT32_C( 415.35), SIMDE_FLOAT32_C( -950.57), SIMDE_FLOAT32_C( -5.24), SIMDE_FLOAT32_C( -488.38), SIMDE_FLOAT32_C( -318.75), SIMDE_FLOAT32_C( -823.33), SIMDE_FLOAT32_C( 67.22) }, UINT16_C(17154), { SIMDE_FLOAT32_C( 896.17), SIMDE_FLOAT32_C( -463.40), SIMDE_FLOAT32_C( 153.15), SIMDE_FLOAT32_C( 680.29), SIMDE_FLOAT32_C( -561.29), SIMDE_FLOAT32_C( 686.78), SIMDE_FLOAT32_C( -947.62), SIMDE_FLOAT32_C( 216.11), SIMDE_FLOAT32_C( 355.82), SIMDE_FLOAT32_C( 490.35), SIMDE_FLOAT32_C( 995.48), SIMDE_FLOAT32_C( -328.82), SIMDE_FLOAT32_C( -666.33), SIMDE_FLOAT32_C( -810.59), SIMDE_FLOAT32_C( -130.00), SIMDE_FLOAT32_C( -710.83) }, { SIMDE_FLOAT32_C( 604.75), SIMDE_FLOAT32_C( -80.58), SIMDE_FLOAT32_C( 283.92), SIMDE_FLOAT32_C( -883.63), SIMDE_FLOAT32_C( 600.68), SIMDE_FLOAT32_C( 460.59), SIMDE_FLOAT32_C( 183.59), SIMDE_FLOAT32_C( -210.63), SIMDE_FLOAT32_C( 17.91), SIMDE_FLOAT32_C( 992.68), SIMDE_FLOAT32_C( 464.52), SIMDE_FLOAT32_C( 280.58), SIMDE_FLOAT32_C( 870.97), SIMDE_FLOAT32_C( -192.70), SIMDE_FLOAT32_C( 998.48), SIMDE_FLOAT32_C( 767.14) }, { SIMDE_FLOAT32_C( -222.60), SIMDE_FLOAT32_C( -543.97), SIMDE_FLOAT32_C( 437.97), SIMDE_FLOAT32_C( -220.63), SIMDE_FLOAT32_C( 315.37), SIMDE_FLOAT32_C( -156.68), SIMDE_FLOAT32_C( -806.07), SIMDE_FLOAT32_C( -801.18), SIMDE_FLOAT32_C( 373.72), SIMDE_FLOAT32_C( 1483.03), SIMDE_FLOAT32_C( -950.57), SIMDE_FLOAT32_C( -5.24), SIMDE_FLOAT32_C( -488.38), SIMDE_FLOAT32_C( -318.75), SIMDE_FLOAT32_C( 868.48), SIMDE_FLOAT32_C( 67.22) } }, { { SIMDE_FLOAT32_C( 343.91), SIMDE_FLOAT32_C( 151.64), SIMDE_FLOAT32_C( 447.43), SIMDE_FLOAT32_C( 782.62), SIMDE_FLOAT32_C( -161.58), SIMDE_FLOAT32_C( 499.81), SIMDE_FLOAT32_C( -1.27), SIMDE_FLOAT32_C( -805.77), SIMDE_FLOAT32_C( -9.84), SIMDE_FLOAT32_C( -5.79), SIMDE_FLOAT32_C( -134.58), SIMDE_FLOAT32_C( 323.82), SIMDE_FLOAT32_C( 183.61), SIMDE_FLOAT32_C( 735.41), SIMDE_FLOAT32_C( 612.99), SIMDE_FLOAT32_C( -211.63) }, UINT16_C(55098), { SIMDE_FLOAT32_C( -918.99), SIMDE_FLOAT32_C( -490.60), SIMDE_FLOAT32_C( -344.01), SIMDE_FLOAT32_C( 951.99), SIMDE_FLOAT32_C( 316.70), SIMDE_FLOAT32_C( -345.53), SIMDE_FLOAT32_C( 719.12), SIMDE_FLOAT32_C( -339.39), SIMDE_FLOAT32_C( 806.11), SIMDE_FLOAT32_C( 166.55), SIMDE_FLOAT32_C( -556.77), SIMDE_FLOAT32_C( -355.47), SIMDE_FLOAT32_C( -333.64), SIMDE_FLOAT32_C( 441.96), SIMDE_FLOAT32_C( -161.24), SIMDE_FLOAT32_C( 656.52) }, { SIMDE_FLOAT32_C( -563.83), SIMDE_FLOAT32_C( 704.18), SIMDE_FLOAT32_C( -19.66), SIMDE_FLOAT32_C( 619.78), SIMDE_FLOAT32_C( 439.59), SIMDE_FLOAT32_C( -406.67), SIMDE_FLOAT32_C( -591.85), SIMDE_FLOAT32_C( -905.57), SIMDE_FLOAT32_C( 490.24), SIMDE_FLOAT32_C( 312.88), SIMDE_FLOAT32_C( -650.06), SIMDE_FLOAT32_C( 847.74), SIMDE_FLOAT32_C( 401.22), SIMDE_FLOAT32_C( 394.82), SIMDE_FLOAT32_C( 223.15), SIMDE_FLOAT32_C( 482.23) }, { SIMDE_FLOAT32_C( 343.91), SIMDE_FLOAT32_C( 213.57), SIMDE_FLOAT32_C( 447.43), SIMDE_FLOAT32_C( 1571.77), SIMDE_FLOAT32_C( 756.29), SIMDE_FLOAT32_C( -752.20), SIMDE_FLOAT32_C( -1.27), SIMDE_FLOAT32_C( -805.77), SIMDE_FLOAT32_C( 1296.35), SIMDE_FLOAT32_C( 479.44), SIMDE_FLOAT32_C( -1206.83), SIMDE_FLOAT32_C( 323.82), SIMDE_FLOAT32_C( 67.58), SIMDE_FLOAT32_C( 735.41), SIMDE_FLOAT32_C( 61.91), SIMDE_FLOAT32_C( 1138.75) } }, { { SIMDE_FLOAT32_C( 904.21), SIMDE_FLOAT32_C( 879.14), SIMDE_FLOAT32_C( 434.21), SIMDE_FLOAT32_C( 220.91), SIMDE_FLOAT32_C( -466.39), SIMDE_FLOAT32_C( 153.34), SIMDE_FLOAT32_C( 881.52), SIMDE_FLOAT32_C( -660.28), SIMDE_FLOAT32_C( -680.11), SIMDE_FLOAT32_C( -675.25), SIMDE_FLOAT32_C( -15.75), SIMDE_FLOAT32_C( -13.75), SIMDE_FLOAT32_C( 766.71), SIMDE_FLOAT32_C( 823.02), SIMDE_FLOAT32_C( -357.23), SIMDE_FLOAT32_C( -797.13) }, UINT16_C(62059), { SIMDE_FLOAT32_C( 543.68), SIMDE_FLOAT32_C( 411.16), SIMDE_FLOAT32_C( 554.42), SIMDE_FLOAT32_C( -55.10), SIMDE_FLOAT32_C( -194.03), SIMDE_FLOAT32_C( -222.43), SIMDE_FLOAT32_C( -572.87), SIMDE_FLOAT32_C( -289.81), SIMDE_FLOAT32_C( -343.29), SIMDE_FLOAT32_C( 861.34), SIMDE_FLOAT32_C( 931.10), SIMDE_FLOAT32_C( 190.32), SIMDE_FLOAT32_C( 14.68), SIMDE_FLOAT32_C( 812.62), SIMDE_FLOAT32_C( 530.05), SIMDE_FLOAT32_C( 334.57) }, { SIMDE_FLOAT32_C( -862.62), SIMDE_FLOAT32_C( -485.70), SIMDE_FLOAT32_C( -679.18), SIMDE_FLOAT32_C( 904.08), SIMDE_FLOAT32_C( -662.68), SIMDE_FLOAT32_C( -36.41), SIMDE_FLOAT32_C( -893.04), SIMDE_FLOAT32_C( 864.51), SIMDE_FLOAT32_C( -413.30), SIMDE_FLOAT32_C( 929.61), SIMDE_FLOAT32_C( -168.70), SIMDE_FLOAT32_C( -196.86), SIMDE_FLOAT32_C( -839.59), SIMDE_FLOAT32_C( 892.52), SIMDE_FLOAT32_C( -490.18), SIMDE_FLOAT32_C( 704.10) }, { SIMDE_FLOAT32_C( -318.94), SIMDE_FLOAT32_C( -74.54), SIMDE_FLOAT32_C( 434.21), SIMDE_FLOAT32_C( 848.98), SIMDE_FLOAT32_C( -466.39), SIMDE_FLOAT32_C( -258.84), SIMDE_FLOAT32_C( -1465.91), SIMDE_FLOAT32_C( -660.28), SIMDE_FLOAT32_C( -680.11), SIMDE_FLOAT32_C( 1790.95), SIMDE_FLOAT32_C( -15.75), SIMDE_FLOAT32_C( -13.75), SIMDE_FLOAT32_C( -824.91), SIMDE_FLOAT32_C( 1705.14), SIMDE_FLOAT32_C( 39.87), SIMDE_FLOAT32_C( 1038.67) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 r = simde_mm512_mask_add_ps(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_maskz_add_ps (SIMDE_MUNIT_TEST_ARGS) { struct { simde__mmask16 k; simde_float32 a[16]; simde_float32 b[16]; simde_float32 r[16]; } test_vec[] = { { UINT16_C( 7629), { SIMDE_FLOAT32_C( 914.45), SIMDE_FLOAT32_C( 855.01), SIMDE_FLOAT32_C( 38.80), SIMDE_FLOAT32_C( 492.72), SIMDE_FLOAT32_C( 597.40), SIMDE_FLOAT32_C( 253.75), SIMDE_FLOAT32_C( 616.90), SIMDE_FLOAT32_C( 108.68), SIMDE_FLOAT32_C( 217.16), SIMDE_FLOAT32_C( 439.38), SIMDE_FLOAT32_C( 724.30), SIMDE_FLOAT32_C( 474.66), SIMDE_FLOAT32_C( 870.80), SIMDE_FLOAT32_C( -46.25), SIMDE_FLOAT32_C( -743.93), SIMDE_FLOAT32_C( 176.79) }, { SIMDE_FLOAT32_C( -872.85), SIMDE_FLOAT32_C( 805.82), SIMDE_FLOAT32_C( 350.81), SIMDE_FLOAT32_C( -515.94), SIMDE_FLOAT32_C( -720.47), SIMDE_FLOAT32_C( 570.49), SIMDE_FLOAT32_C( 295.95), SIMDE_FLOAT32_C( 265.48), SIMDE_FLOAT32_C( 175.46), SIMDE_FLOAT32_C( -217.20), SIMDE_FLOAT32_C( -845.54), SIMDE_FLOAT32_C( 857.16), SIMDE_FLOAT32_C( 138.12), SIMDE_FLOAT32_C( -599.93), SIMDE_FLOAT32_C( 503.35), SIMDE_FLOAT32_C( 52.57) }, { SIMDE_FLOAT32_C( 41.60), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 389.61), SIMDE_FLOAT32_C( -23.22), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 912.85), SIMDE_FLOAT32_C( 374.16), SIMDE_FLOAT32_C( 392.62), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -121.24), SIMDE_FLOAT32_C( 1331.82), SIMDE_FLOAT32_C( 1008.92), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { UINT16_C(26184), { SIMDE_FLOAT32_C( 601.56), SIMDE_FLOAT32_C( -314.52), SIMDE_FLOAT32_C( -512.28), SIMDE_FLOAT32_C( 472.36), SIMDE_FLOAT32_C( 639.23), SIMDE_FLOAT32_C( -256.21), SIMDE_FLOAT32_C( -350.85), SIMDE_FLOAT32_C( 766.38), SIMDE_FLOAT32_C( -450.39), SIMDE_FLOAT32_C( 999.96), SIMDE_FLOAT32_C( -749.56), SIMDE_FLOAT32_C( -170.85), SIMDE_FLOAT32_C( 570.45), SIMDE_FLOAT32_C( 546.39), SIMDE_FLOAT32_C( -905.38), SIMDE_FLOAT32_C( -254.09) }, { SIMDE_FLOAT32_C( -670.81), SIMDE_FLOAT32_C( -750.92), SIMDE_FLOAT32_C( -396.93), SIMDE_FLOAT32_C( 467.31), SIMDE_FLOAT32_C( -350.85), SIMDE_FLOAT32_C( -893.58), SIMDE_FLOAT32_C( -480.12), SIMDE_FLOAT32_C( -95.76), SIMDE_FLOAT32_C( -351.43), SIMDE_FLOAT32_C( 65.16), SIMDE_FLOAT32_C( -243.28), SIMDE_FLOAT32_C( -555.53), SIMDE_FLOAT32_C( 227.35), SIMDE_FLOAT32_C( 717.89), SIMDE_FLOAT32_C( 457.53), SIMDE_FLOAT32_C( -171.09) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 939.67), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -830.97), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1065.13), SIMDE_FLOAT32_C( -992.84), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1264.28), SIMDE_FLOAT32_C( -447.85), SIMDE_FLOAT32_C( 0.00) } }, { UINT16_C(38268), { SIMDE_FLOAT32_C( -49.61), SIMDE_FLOAT32_C( -940.59), SIMDE_FLOAT32_C( -932.20), SIMDE_FLOAT32_C( -479.16), SIMDE_FLOAT32_C( 605.80), SIMDE_FLOAT32_C( -837.58), SIMDE_FLOAT32_C( 266.75), SIMDE_FLOAT32_C( 934.99), SIMDE_FLOAT32_C( -588.49), SIMDE_FLOAT32_C( 869.82), SIMDE_FLOAT32_C( 402.30), SIMDE_FLOAT32_C( 60.66), SIMDE_FLOAT32_C( 976.24), SIMDE_FLOAT32_C( 922.17), SIMDE_FLOAT32_C( 964.89), SIMDE_FLOAT32_C( -375.20) }, { SIMDE_FLOAT32_C( -12.67), SIMDE_FLOAT32_C( -278.39), SIMDE_FLOAT32_C( 69.27), SIMDE_FLOAT32_C( -785.32), SIMDE_FLOAT32_C( -560.49), SIMDE_FLOAT32_C( -473.20), SIMDE_FLOAT32_C( 43.59), SIMDE_FLOAT32_C( -157.12), SIMDE_FLOAT32_C( -527.94), SIMDE_FLOAT32_C( 344.87), SIMDE_FLOAT32_C( -114.53), SIMDE_FLOAT32_C( 161.10), SIMDE_FLOAT32_C( -704.71), SIMDE_FLOAT32_C( -305.55), SIMDE_FLOAT32_C( -600.24), SIMDE_FLOAT32_C( 245.68) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -862.92), SIMDE_FLOAT32_C( -1264.48), SIMDE_FLOAT32_C( 45.31), SIMDE_FLOAT32_C( -1310.77), SIMDE_FLOAT32_C( 310.34), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -1116.44), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 287.77), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 271.52), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -129.52) } }, { UINT16_C(37369), { SIMDE_FLOAT32_C( -96.91), SIMDE_FLOAT32_C( 696.94), SIMDE_FLOAT32_C( -897.85), SIMDE_FLOAT32_C( -120.68), SIMDE_FLOAT32_C( 619.12), SIMDE_FLOAT32_C( -932.96), SIMDE_FLOAT32_C( 504.13), SIMDE_FLOAT32_C( -393.55), SIMDE_FLOAT32_C( -211.35), SIMDE_FLOAT32_C( -426.60), SIMDE_FLOAT32_C( -178.87), SIMDE_FLOAT32_C( 228.16), SIMDE_FLOAT32_C( 100.20), SIMDE_FLOAT32_C( 864.72), SIMDE_FLOAT32_C( -928.97), SIMDE_FLOAT32_C( 572.26) }, { SIMDE_FLOAT32_C( 209.59), SIMDE_FLOAT32_C( -43.49), SIMDE_FLOAT32_C( -266.64), SIMDE_FLOAT32_C( 504.88), SIMDE_FLOAT32_C( 650.96), SIMDE_FLOAT32_C( 133.12), SIMDE_FLOAT32_C( -249.44), SIMDE_FLOAT32_C( -595.18), SIMDE_FLOAT32_C( 600.68), SIMDE_FLOAT32_C( -482.93), SIMDE_FLOAT32_C( -235.52), SIMDE_FLOAT32_C( -769.33), SIMDE_FLOAT32_C( 550.34), SIMDE_FLOAT32_C( 59.13), SIMDE_FLOAT32_C( 272.16), SIMDE_FLOAT32_C( -546.58) }, { SIMDE_FLOAT32_C( 112.67), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 384.20), SIMDE_FLOAT32_C( 1270.07), SIMDE_FLOAT32_C( -799.84), SIMDE_FLOAT32_C( 254.68), SIMDE_FLOAT32_C( -988.73), SIMDE_FLOAT32_C( 389.33), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 650.54), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 25.68) } }, { UINT16_C(17887), { SIMDE_FLOAT32_C( 410.27), SIMDE_FLOAT32_C( 802.77), SIMDE_FLOAT32_C( 458.15), SIMDE_FLOAT32_C( -489.53), SIMDE_FLOAT32_C( 667.49), SIMDE_FLOAT32_C( 529.19), SIMDE_FLOAT32_C( -917.27), SIMDE_FLOAT32_C( -122.92), SIMDE_FLOAT32_C( -514.30), SIMDE_FLOAT32_C( -183.91), SIMDE_FLOAT32_C( -618.04), SIMDE_FLOAT32_C( -863.35), SIMDE_FLOAT32_C( 949.21), SIMDE_FLOAT32_C( 132.51), SIMDE_FLOAT32_C( -458.53), SIMDE_FLOAT32_C( 549.89) }, { SIMDE_FLOAT32_C( 649.59), SIMDE_FLOAT32_C( 305.95), SIMDE_FLOAT32_C( 780.56), SIMDE_FLOAT32_C( 199.92), SIMDE_FLOAT32_C( -634.93), SIMDE_FLOAT32_C( 52.72), SIMDE_FLOAT32_C( 653.35), SIMDE_FLOAT32_C( 121.14), SIMDE_FLOAT32_C( -572.98), SIMDE_FLOAT32_C( -13.91), SIMDE_FLOAT32_C( 496.32), SIMDE_FLOAT32_C( 868.36), SIMDE_FLOAT32_C( 822.96), SIMDE_FLOAT32_C( -522.04), SIMDE_FLOAT32_C( -901.64), SIMDE_FLOAT32_C( 233.23) }, { SIMDE_FLOAT32_C( 1059.85), SIMDE_FLOAT32_C( 1108.71), SIMDE_FLOAT32_C( 1238.71), SIMDE_FLOAT32_C( -289.60), SIMDE_FLOAT32_C( 32.56), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -263.92), SIMDE_FLOAT32_C( -1.78), SIMDE_FLOAT32_C( -1087.28), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -121.72), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -1360.17), SIMDE_FLOAT32_C( 0.00) } }, { UINT16_C(52085), { SIMDE_FLOAT32_C( -357.48), SIMDE_FLOAT32_C( 207.25), SIMDE_FLOAT32_C( 708.05), SIMDE_FLOAT32_C( -408.26), SIMDE_FLOAT32_C( -660.23), SIMDE_FLOAT32_C( -750.48), SIMDE_FLOAT32_C( -858.37), SIMDE_FLOAT32_C( 989.35), SIMDE_FLOAT32_C( 555.47), SIMDE_FLOAT32_C( 922.19), SIMDE_FLOAT32_C( 189.28), SIMDE_FLOAT32_C( 920.54), SIMDE_FLOAT32_C( -25.09), SIMDE_FLOAT32_C( -157.38), SIMDE_FLOAT32_C( 41.68), SIMDE_FLOAT32_C( 401.93) }, { SIMDE_FLOAT32_C( 828.72), SIMDE_FLOAT32_C( -462.00), SIMDE_FLOAT32_C( 270.29), SIMDE_FLOAT32_C( 651.68), SIMDE_FLOAT32_C( 15.96), SIMDE_FLOAT32_C( 368.65), SIMDE_FLOAT32_C( -115.09), SIMDE_FLOAT32_C( 296.68), SIMDE_FLOAT32_C( -74.83), SIMDE_FLOAT32_C( -371.39), SIMDE_FLOAT32_C( 244.89), SIMDE_FLOAT32_C( -989.13), SIMDE_FLOAT32_C( -544.95), SIMDE_FLOAT32_C( -929.81), SIMDE_FLOAT32_C( 582.27), SIMDE_FLOAT32_C( 97.57) }, { SIMDE_FLOAT32_C( 471.24), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 978.34), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -644.28), SIMDE_FLOAT32_C( -381.83), SIMDE_FLOAT32_C( -973.46), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 480.63), SIMDE_FLOAT32_C( 550.80), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -68.59), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 623.95), SIMDE_FLOAT32_C( 499.50) } }, { UINT16_C(30320), { SIMDE_FLOAT32_C( 753.13), SIMDE_FLOAT32_C( -204.17), SIMDE_FLOAT32_C( 15.84), SIMDE_FLOAT32_C( -271.97), SIMDE_FLOAT32_C( 638.45), SIMDE_FLOAT32_C( -942.48), SIMDE_FLOAT32_C( -870.04), SIMDE_FLOAT32_C( 467.17), SIMDE_FLOAT32_C( -404.47), SIMDE_FLOAT32_C( 400.26), SIMDE_FLOAT32_C( 118.85), SIMDE_FLOAT32_C( 611.49), SIMDE_FLOAT32_C( -231.09), SIMDE_FLOAT32_C( -996.24), SIMDE_FLOAT32_C( -91.83), SIMDE_FLOAT32_C( 694.08) }, { SIMDE_FLOAT32_C( -367.63), SIMDE_FLOAT32_C( -846.94), SIMDE_FLOAT32_C( 704.95), SIMDE_FLOAT32_C( 87.42), SIMDE_FLOAT32_C( -776.75), SIMDE_FLOAT32_C( 287.22), SIMDE_FLOAT32_C( -815.01), SIMDE_FLOAT32_C( 500.69), SIMDE_FLOAT32_C( -422.46), SIMDE_FLOAT32_C( 874.30), SIMDE_FLOAT32_C( 117.89), SIMDE_FLOAT32_C( -882.62), SIMDE_FLOAT32_C( 705.23), SIMDE_FLOAT32_C( -275.56), SIMDE_FLOAT32_C( 212.68), SIMDE_FLOAT32_C( 458.36) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -138.30), SIMDE_FLOAT32_C( -655.26), SIMDE_FLOAT32_C( -1685.05), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1274.55), SIMDE_FLOAT32_C( 236.74), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 474.14), SIMDE_FLOAT32_C( -1271.80), SIMDE_FLOAT32_C( 120.85), SIMDE_FLOAT32_C( 0.00) } }, { UINT16_C( 6201), { SIMDE_FLOAT32_C( 716.62), SIMDE_FLOAT32_C( 744.75), SIMDE_FLOAT32_C( -506.94), SIMDE_FLOAT32_C( -514.47), SIMDE_FLOAT32_C( 748.50), SIMDE_FLOAT32_C( 401.23), SIMDE_FLOAT32_C( -820.39), SIMDE_FLOAT32_C( -619.12), SIMDE_FLOAT32_C( 554.30), SIMDE_FLOAT32_C( 884.56), SIMDE_FLOAT32_C( 468.30), SIMDE_FLOAT32_C( 777.54), SIMDE_FLOAT32_C( 171.78), SIMDE_FLOAT32_C( 653.28), SIMDE_FLOAT32_C( 278.23), SIMDE_FLOAT32_C( 749.31) }, { SIMDE_FLOAT32_C( 527.58), SIMDE_FLOAT32_C( -603.88), SIMDE_FLOAT32_C( 866.69), SIMDE_FLOAT32_C( 232.81), SIMDE_FLOAT32_C( 120.56), SIMDE_FLOAT32_C( 79.37), SIMDE_FLOAT32_C( -308.83), SIMDE_FLOAT32_C( -359.16), SIMDE_FLOAT32_C( 307.90), SIMDE_FLOAT32_C( -122.44), SIMDE_FLOAT32_C( 799.56), SIMDE_FLOAT32_C( 593.95), SIMDE_FLOAT32_C( 193.92), SIMDE_FLOAT32_C( -574.54), SIMDE_FLOAT32_C( -524.47), SIMDE_FLOAT32_C( -89.46) }, { SIMDE_FLOAT32_C( 1244.20), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -281.66), SIMDE_FLOAT32_C( 869.07), SIMDE_FLOAT32_C( 480.61), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1371.50), SIMDE_FLOAT32_C( 365.70), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 r = simde_mm512_maskz_add_ps(test_vec[i].k, a, b); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_add_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 b[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( -775.47), SIMDE_FLOAT64_C( 0.19), SIMDE_FLOAT64_C( -757.09), SIMDE_FLOAT64_C( -410.73), SIMDE_FLOAT64_C( -780.15), SIMDE_FLOAT64_C( -708.07), SIMDE_FLOAT64_C( 367.35), SIMDE_FLOAT64_C( -968.32) }, { SIMDE_FLOAT64_C( 820.33), SIMDE_FLOAT64_C( 850.82), SIMDE_FLOAT64_C( 596.13), SIMDE_FLOAT64_C( -878.99), SIMDE_FLOAT64_C( -603.61), SIMDE_FLOAT64_C( 533.64), SIMDE_FLOAT64_C( -994.37), SIMDE_FLOAT64_C( 846.35) }, { SIMDE_FLOAT64_C( 44.87), SIMDE_FLOAT64_C( 851.01), SIMDE_FLOAT64_C( -160.96), SIMDE_FLOAT64_C( -1289.72), SIMDE_FLOAT64_C( -1383.75), SIMDE_FLOAT64_C( -174.43), SIMDE_FLOAT64_C( -627.02), SIMDE_FLOAT64_C( -121.96) } }, { { SIMDE_FLOAT64_C( -503.42), SIMDE_FLOAT64_C( -250.77), SIMDE_FLOAT64_C( -532.42), SIMDE_FLOAT64_C( 815.06), SIMDE_FLOAT64_C( -419.11), SIMDE_FLOAT64_C( 224.41), SIMDE_FLOAT64_C( -34.26), SIMDE_FLOAT64_C( -803.36) }, { SIMDE_FLOAT64_C( -331.10), SIMDE_FLOAT64_C( -474.33), SIMDE_FLOAT64_C( 866.30), SIMDE_FLOAT64_C( 560.33), SIMDE_FLOAT64_C( 467.15), SIMDE_FLOAT64_C( 279.38), SIMDE_FLOAT64_C( -475.96), SIMDE_FLOAT64_C( 691.69) }, { SIMDE_FLOAT64_C( -834.52), SIMDE_FLOAT64_C( -725.11), SIMDE_FLOAT64_C( 333.88), SIMDE_FLOAT64_C( 1375.40), SIMDE_FLOAT64_C( 48.04), SIMDE_FLOAT64_C( 503.79), SIMDE_FLOAT64_C( -510.22), SIMDE_FLOAT64_C( -111.67) } }, { { SIMDE_FLOAT64_C( -720.44), SIMDE_FLOAT64_C( -233.05), SIMDE_FLOAT64_C( -719.04), SIMDE_FLOAT64_C( -500.58), SIMDE_FLOAT64_C( 58.88), SIMDE_FLOAT64_C( 648.31), SIMDE_FLOAT64_C( -468.90), SIMDE_FLOAT64_C( -120.79) }, { SIMDE_FLOAT64_C( 499.13), SIMDE_FLOAT64_C( -872.76), SIMDE_FLOAT64_C( 0.22), SIMDE_FLOAT64_C( 895.52), SIMDE_FLOAT64_C( 660.88), SIMDE_FLOAT64_C( 5.85), SIMDE_FLOAT64_C( 741.88), SIMDE_FLOAT64_C( -842.54) }, { SIMDE_FLOAT64_C( -221.31), SIMDE_FLOAT64_C( -1105.81), SIMDE_FLOAT64_C( -718.83), SIMDE_FLOAT64_C( 394.94), SIMDE_FLOAT64_C( 719.76), SIMDE_FLOAT64_C( 654.16), SIMDE_FLOAT64_C( 272.98), SIMDE_FLOAT64_C( -963.33) } }, { { SIMDE_FLOAT64_C( 755.08), SIMDE_FLOAT64_C( -790.54), SIMDE_FLOAT64_C( 972.53), SIMDE_FLOAT64_C( -664.03), SIMDE_FLOAT64_C( 433.87), SIMDE_FLOAT64_C( -61.74), SIMDE_FLOAT64_C( -467.39), SIMDE_FLOAT64_C( -897.23) }, { SIMDE_FLOAT64_C( 463.93), SIMDE_FLOAT64_C( -601.09), SIMDE_FLOAT64_C( 663.10), SIMDE_FLOAT64_C( -68.92), SIMDE_FLOAT64_C( 678.29), SIMDE_FLOAT64_C( -812.86), SIMDE_FLOAT64_C( -377.23), SIMDE_FLOAT64_C( 957.85) }, { SIMDE_FLOAT64_C( 1219.01), SIMDE_FLOAT64_C( -1391.63), SIMDE_FLOAT64_C( 1635.63), SIMDE_FLOAT64_C( -732.95), SIMDE_FLOAT64_C( 1112.16), SIMDE_FLOAT64_C( -874.59), SIMDE_FLOAT64_C( -844.62), SIMDE_FLOAT64_C( 60.62) } }, { { SIMDE_FLOAT64_C( -45.90), SIMDE_FLOAT64_C( -96.28), SIMDE_FLOAT64_C( -542.73), SIMDE_FLOAT64_C( -987.02), SIMDE_FLOAT64_C( -447.97), SIMDE_FLOAT64_C( -11.63), SIMDE_FLOAT64_C( -107.82), SIMDE_FLOAT64_C( -948.84) }, { SIMDE_FLOAT64_C( 115.60), SIMDE_FLOAT64_C( 892.40), SIMDE_FLOAT64_C( 946.68), SIMDE_FLOAT64_C( -223.52), SIMDE_FLOAT64_C( -101.75), SIMDE_FLOAT64_C( 688.56), SIMDE_FLOAT64_C( -66.05), SIMDE_FLOAT64_C( -346.67) }, { SIMDE_FLOAT64_C( 69.70), SIMDE_FLOAT64_C( 796.12), SIMDE_FLOAT64_C( 403.95), SIMDE_FLOAT64_C( -1210.54), SIMDE_FLOAT64_C( -549.72), SIMDE_FLOAT64_C( 676.92), SIMDE_FLOAT64_C( -173.87), SIMDE_FLOAT64_C( -1295.52) } }, { { SIMDE_FLOAT64_C( 898.01), SIMDE_FLOAT64_C( -93.53), SIMDE_FLOAT64_C( -10.70), SIMDE_FLOAT64_C( 331.89), SIMDE_FLOAT64_C( 844.74), SIMDE_FLOAT64_C( 521.91), SIMDE_FLOAT64_C( 434.66), SIMDE_FLOAT64_C( 308.66) }, { SIMDE_FLOAT64_C( 920.82), SIMDE_FLOAT64_C( 97.76), SIMDE_FLOAT64_C( -760.25), SIMDE_FLOAT64_C( 599.10), SIMDE_FLOAT64_C( 284.91), SIMDE_FLOAT64_C( -137.49), SIMDE_FLOAT64_C( 556.96), SIMDE_FLOAT64_C( -761.00) }, { SIMDE_FLOAT64_C( 1818.83), SIMDE_FLOAT64_C( 4.23), SIMDE_FLOAT64_C( -770.96), SIMDE_FLOAT64_C( 930.99), SIMDE_FLOAT64_C( 1129.64), SIMDE_FLOAT64_C( 384.42), SIMDE_FLOAT64_C( 991.61), SIMDE_FLOAT64_C( -452.33) } }, { { SIMDE_FLOAT64_C( 766.23), SIMDE_FLOAT64_C( -985.78), SIMDE_FLOAT64_C( -748.02), SIMDE_FLOAT64_C( -681.74), SIMDE_FLOAT64_C( 2.59), SIMDE_FLOAT64_C( 144.16), SIMDE_FLOAT64_C( -630.58), SIMDE_FLOAT64_C( -881.80) }, { SIMDE_FLOAT64_C( 36.57), SIMDE_FLOAT64_C( -683.90), SIMDE_FLOAT64_C( -105.32), SIMDE_FLOAT64_C( 934.82), SIMDE_FLOAT64_C( -995.35), SIMDE_FLOAT64_C( 828.63), SIMDE_FLOAT64_C( -411.86), SIMDE_FLOAT64_C( 902.67) }, { SIMDE_FLOAT64_C( 802.80), SIMDE_FLOAT64_C( -1669.68), SIMDE_FLOAT64_C( -853.34), SIMDE_FLOAT64_C( 253.08), SIMDE_FLOAT64_C( -992.76), SIMDE_FLOAT64_C( 972.79), SIMDE_FLOAT64_C( -1042.44), SIMDE_FLOAT64_C( 20.86) } }, { { SIMDE_FLOAT64_C( -264.90), SIMDE_FLOAT64_C( 577.44), SIMDE_FLOAT64_C( 234.56), SIMDE_FLOAT64_C( -420.17), SIMDE_FLOAT64_C( 99.35), SIMDE_FLOAT64_C( -330.78), SIMDE_FLOAT64_C( 888.50), SIMDE_FLOAT64_C( 20.17) }, { SIMDE_FLOAT64_C( 766.98), SIMDE_FLOAT64_C( -871.76), SIMDE_FLOAT64_C( -380.73), SIMDE_FLOAT64_C( 51.88), SIMDE_FLOAT64_C( -9.24), SIMDE_FLOAT64_C( -823.77), SIMDE_FLOAT64_C( 290.89), SIMDE_FLOAT64_C( -243.01) }, { SIMDE_FLOAT64_C( 502.08), SIMDE_FLOAT64_C( -294.31), SIMDE_FLOAT64_C( -146.17), SIMDE_FLOAT64_C( -368.28), SIMDE_FLOAT64_C( 90.11), SIMDE_FLOAT64_C( -1154.55), SIMDE_FLOAT64_C( 1179.39), SIMDE_FLOAT64_C( -222.84) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__m512d r = simde_mm512_add_pd(a, b); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_add_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 b[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( -241.95), SIMDE_FLOAT64_C( 348.31), SIMDE_FLOAT64_C( -125.04), SIMDE_FLOAT64_C( -245.69), SIMDE_FLOAT64_C( -588.93), SIMDE_FLOAT64_C( -276.58), SIMDE_FLOAT64_C( -867.91), SIMDE_FLOAT64_C( -10.44) }, UINT8_C( 17), { SIMDE_FLOAT64_C( 296.41), SIMDE_FLOAT64_C( -576.00), SIMDE_FLOAT64_C( 367.45), SIMDE_FLOAT64_C( -294.17), SIMDE_FLOAT64_C( -916.48), SIMDE_FLOAT64_C( 266.60), SIMDE_FLOAT64_C( 288.43), SIMDE_FLOAT64_C( 471.22) }, { SIMDE_FLOAT64_C( -995.36), SIMDE_FLOAT64_C( 468.56), SIMDE_FLOAT64_C( 50.02), SIMDE_FLOAT64_C( -51.51), SIMDE_FLOAT64_C( 997.69), SIMDE_FLOAT64_C( 806.19), SIMDE_FLOAT64_C( -145.36), SIMDE_FLOAT64_C( 877.33) }, { SIMDE_FLOAT64_C( -698.96), SIMDE_FLOAT64_C( 348.31), SIMDE_FLOAT64_C( -125.04), SIMDE_FLOAT64_C( -245.69), SIMDE_FLOAT64_C( 81.21), SIMDE_FLOAT64_C( -276.58), SIMDE_FLOAT64_C( -867.91), SIMDE_FLOAT64_C( -10.44) } }, { { SIMDE_FLOAT64_C( -303.10), SIMDE_FLOAT64_C( -675.79), SIMDE_FLOAT64_C( 770.76), SIMDE_FLOAT64_C( 600.76), SIMDE_FLOAT64_C( -105.79), SIMDE_FLOAT64_C( -257.88), SIMDE_FLOAT64_C( -641.18), SIMDE_FLOAT64_C( -757.48) }, UINT8_C(183), { SIMDE_FLOAT64_C( 113.13), SIMDE_FLOAT64_C( -346.41), SIMDE_FLOAT64_C( -659.51), SIMDE_FLOAT64_C( 245.22), SIMDE_FLOAT64_C( 643.14), SIMDE_FLOAT64_C( 43.25), SIMDE_FLOAT64_C( -458.37), SIMDE_FLOAT64_C( -932.86) }, { SIMDE_FLOAT64_C( -589.30), SIMDE_FLOAT64_C( 247.46), SIMDE_FLOAT64_C( -849.33), SIMDE_FLOAT64_C( 677.31), SIMDE_FLOAT64_C( -464.11), SIMDE_FLOAT64_C( 621.89), SIMDE_FLOAT64_C( 681.94), SIMDE_FLOAT64_C( -995.54) }, { SIMDE_FLOAT64_C( -476.17), SIMDE_FLOAT64_C( -98.95), SIMDE_FLOAT64_C( -1508.84), SIMDE_FLOAT64_C( 600.76), SIMDE_FLOAT64_C( 179.04), SIMDE_FLOAT64_C( 665.14), SIMDE_FLOAT64_C( -641.18), SIMDE_FLOAT64_C( -1928.40) } }, { { SIMDE_FLOAT64_C( -328.10), SIMDE_FLOAT64_C( -369.57), SIMDE_FLOAT64_C( -997.86), SIMDE_FLOAT64_C( -521.91), SIMDE_FLOAT64_C( 485.07), SIMDE_FLOAT64_C( 879.48), SIMDE_FLOAT64_C( 175.00), SIMDE_FLOAT64_C( 809.28) }, UINT8_C( 91), { SIMDE_FLOAT64_C( -224.24), SIMDE_FLOAT64_C( -296.51), SIMDE_FLOAT64_C( -607.64), SIMDE_FLOAT64_C( 134.57), SIMDE_FLOAT64_C( -53.99), SIMDE_FLOAT64_C( -990.57), SIMDE_FLOAT64_C( -752.30), SIMDE_FLOAT64_C( 599.60) }, { SIMDE_FLOAT64_C( -650.08), SIMDE_FLOAT64_C( 492.93), SIMDE_FLOAT64_C( 242.74), SIMDE_FLOAT64_C( 393.17), SIMDE_FLOAT64_C( -965.44), SIMDE_FLOAT64_C( 309.89), SIMDE_FLOAT64_C( 803.88), SIMDE_FLOAT64_C( 282.02) }, { SIMDE_FLOAT64_C( -874.32), SIMDE_FLOAT64_C( 196.42), SIMDE_FLOAT64_C( -997.86), SIMDE_FLOAT64_C( 527.75), SIMDE_FLOAT64_C( -1019.43), SIMDE_FLOAT64_C( 879.48), SIMDE_FLOAT64_C( 51.58), SIMDE_FLOAT64_C( 809.28) } }, { { SIMDE_FLOAT64_C( 460.56), SIMDE_FLOAT64_C( 481.18), SIMDE_FLOAT64_C( 817.91), SIMDE_FLOAT64_C( 82.44), SIMDE_FLOAT64_C( 163.12), SIMDE_FLOAT64_C( 822.36), SIMDE_FLOAT64_C( 754.35), SIMDE_FLOAT64_C( 793.56) }, UINT8_C( 35), { SIMDE_FLOAT64_C( -767.56), SIMDE_FLOAT64_C( 278.63), SIMDE_FLOAT64_C( 703.98), SIMDE_FLOAT64_C( 407.44), SIMDE_FLOAT64_C( 87.91), SIMDE_FLOAT64_C( 354.22), SIMDE_FLOAT64_C( -816.81), SIMDE_FLOAT64_C( 791.41) }, { SIMDE_FLOAT64_C( 746.58), SIMDE_FLOAT64_C( 317.77), SIMDE_FLOAT64_C( -262.58), SIMDE_FLOAT64_C( 756.01), SIMDE_FLOAT64_C( 565.47), SIMDE_FLOAT64_C( -662.99), SIMDE_FLOAT64_C( -894.07), SIMDE_FLOAT64_C( 58.40) }, { SIMDE_FLOAT64_C( -20.98), SIMDE_FLOAT64_C( 596.40), SIMDE_FLOAT64_C( 817.91), SIMDE_FLOAT64_C( 82.44), SIMDE_FLOAT64_C( 163.12), SIMDE_FLOAT64_C( -308.76), SIMDE_FLOAT64_C( 754.35), SIMDE_FLOAT64_C( 793.56) } }, { { SIMDE_FLOAT64_C( 579.76), SIMDE_FLOAT64_C( 499.11), SIMDE_FLOAT64_C( 92.96), SIMDE_FLOAT64_C( -110.35), SIMDE_FLOAT64_C( 302.99), SIMDE_FLOAT64_C( -625.02), SIMDE_FLOAT64_C( -649.80), SIMDE_FLOAT64_C( -215.83) }, UINT8_C( 3), { SIMDE_FLOAT64_C( 432.65), SIMDE_FLOAT64_C( 947.29), SIMDE_FLOAT64_C( -984.75), SIMDE_FLOAT64_C( 186.99), SIMDE_FLOAT64_C( 740.85), SIMDE_FLOAT64_C( 839.76), SIMDE_FLOAT64_C( 419.43), SIMDE_FLOAT64_C( 19.48) }, { SIMDE_FLOAT64_C( 543.74), SIMDE_FLOAT64_C( -173.13), SIMDE_FLOAT64_C( -892.61), SIMDE_FLOAT64_C( -102.04), SIMDE_FLOAT64_C( 10.06), SIMDE_FLOAT64_C( 898.80), SIMDE_FLOAT64_C( -355.45), SIMDE_FLOAT64_C( -672.17) }, { SIMDE_FLOAT64_C( 976.39), SIMDE_FLOAT64_C( 774.16), SIMDE_FLOAT64_C( 92.96), SIMDE_FLOAT64_C( -110.35), SIMDE_FLOAT64_C( 302.99), SIMDE_FLOAT64_C( -625.02), SIMDE_FLOAT64_C( -649.80), SIMDE_FLOAT64_C( -215.83) } }, { { SIMDE_FLOAT64_C( -363.79), SIMDE_FLOAT64_C( -599.44), SIMDE_FLOAT64_C( 893.30), SIMDE_FLOAT64_C( -26.77), SIMDE_FLOAT64_C( -493.51), SIMDE_FLOAT64_C( -48.30), SIMDE_FLOAT64_C( -447.01), SIMDE_FLOAT64_C( -994.40) }, UINT8_C( 89), { SIMDE_FLOAT64_C( 442.63), SIMDE_FLOAT64_C( 308.59), SIMDE_FLOAT64_C( -580.36), SIMDE_FLOAT64_C( 792.84), SIMDE_FLOAT64_C( -907.24), SIMDE_FLOAT64_C( -387.48), SIMDE_FLOAT64_C( 225.48), SIMDE_FLOAT64_C( -959.95) }, { SIMDE_FLOAT64_C( -372.23), SIMDE_FLOAT64_C( -587.52), SIMDE_FLOAT64_C( 780.90), SIMDE_FLOAT64_C( -532.47), SIMDE_FLOAT64_C( 831.91), SIMDE_FLOAT64_C( -199.62), SIMDE_FLOAT64_C( -988.73), SIMDE_FLOAT64_C( -341.22) }, { SIMDE_FLOAT64_C( 70.40), SIMDE_FLOAT64_C( -599.44), SIMDE_FLOAT64_C( 893.30), SIMDE_FLOAT64_C( 260.36), SIMDE_FLOAT64_C( -75.33), SIMDE_FLOAT64_C( -48.30), SIMDE_FLOAT64_C( -763.25), SIMDE_FLOAT64_C( -994.40) } }, { { SIMDE_FLOAT64_C( -92.23), SIMDE_FLOAT64_C( -90.77), SIMDE_FLOAT64_C( 668.84), SIMDE_FLOAT64_C( -193.43), SIMDE_FLOAT64_C( 553.78), SIMDE_FLOAT64_C( 996.67), SIMDE_FLOAT64_C( 442.78), SIMDE_FLOAT64_C( 954.34) }, UINT8_C(200), { SIMDE_FLOAT64_C( -583.99), SIMDE_FLOAT64_C( -539.17), SIMDE_FLOAT64_C( -158.32), SIMDE_FLOAT64_C( -31.00), SIMDE_FLOAT64_C( -533.56), SIMDE_FLOAT64_C( -113.65), SIMDE_FLOAT64_C( -588.37), SIMDE_FLOAT64_C( 775.02) }, { SIMDE_FLOAT64_C( 305.99), SIMDE_FLOAT64_C( -795.53), SIMDE_FLOAT64_C( 867.78), SIMDE_FLOAT64_C( 918.51), SIMDE_FLOAT64_C( 429.95), SIMDE_FLOAT64_C( 907.83), SIMDE_FLOAT64_C( -453.72), SIMDE_FLOAT64_C( 842.43) }, { SIMDE_FLOAT64_C( -92.23), SIMDE_FLOAT64_C( -90.77), SIMDE_FLOAT64_C( 668.84), SIMDE_FLOAT64_C( 887.51), SIMDE_FLOAT64_C( 553.78), SIMDE_FLOAT64_C( 996.67), SIMDE_FLOAT64_C( -1042.09), SIMDE_FLOAT64_C( 1617.45) } }, { { SIMDE_FLOAT64_C( 688.73), SIMDE_FLOAT64_C( 13.81), SIMDE_FLOAT64_C( 674.34), SIMDE_FLOAT64_C( -510.89), SIMDE_FLOAT64_C( 25.08), SIMDE_FLOAT64_C( -666.88), SIMDE_FLOAT64_C( 396.88), SIMDE_FLOAT64_C( 934.31) }, UINT8_C(155), { SIMDE_FLOAT64_C( -796.55), SIMDE_FLOAT64_C( 488.09), SIMDE_FLOAT64_C( 998.63), SIMDE_FLOAT64_C( 646.24), SIMDE_FLOAT64_C( 442.43), SIMDE_FLOAT64_C( 888.61), SIMDE_FLOAT64_C( -937.75), SIMDE_FLOAT64_C( 903.26) }, { SIMDE_FLOAT64_C( -269.71), SIMDE_FLOAT64_C( 31.25), SIMDE_FLOAT64_C( -630.30), SIMDE_FLOAT64_C( 616.64), SIMDE_FLOAT64_C( 442.88), SIMDE_FLOAT64_C( -855.28), SIMDE_FLOAT64_C( -77.38), SIMDE_FLOAT64_C( 647.35) }, { SIMDE_FLOAT64_C( -1066.26), SIMDE_FLOAT64_C( 519.34), SIMDE_FLOAT64_C( 674.34), SIMDE_FLOAT64_C( 1262.87), SIMDE_FLOAT64_C( 885.31), SIMDE_FLOAT64_C( -666.88), SIMDE_FLOAT64_C( 396.88), SIMDE_FLOAT64_C( 1550.61) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__m512d r = simde_mm512_mask_add_pd(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_maskz_add_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 b[8]; const simde_float64 r[8]; } test_vec[] = { { UINT8_C(141), { SIMDE_FLOAT64_C( 539.39), SIMDE_FLOAT64_C( 127.65), SIMDE_FLOAT64_C( -419.83), SIMDE_FLOAT64_C( -509.25), SIMDE_FLOAT64_C( 614.81), SIMDE_FLOAT64_C( -356.87), SIMDE_FLOAT64_C( -437.81), SIMDE_FLOAT64_C( 217.95) }, { SIMDE_FLOAT64_C( -60.15), SIMDE_FLOAT64_C( -699.30), SIMDE_FLOAT64_C( 963.74), SIMDE_FLOAT64_C( 851.36), SIMDE_FLOAT64_C( 773.07), SIMDE_FLOAT64_C( -457.96), SIMDE_FLOAT64_C( -310.92), SIMDE_FLOAT64_C( 852.62) }, { SIMDE_FLOAT64_C( 479.24), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 543.91), SIMDE_FLOAT64_C( 342.11), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 1070.58) } }, { UINT8_C(123), { SIMDE_FLOAT64_C( 902.15), SIMDE_FLOAT64_C( 661.09), SIMDE_FLOAT64_C( -493.90), SIMDE_FLOAT64_C( 433.62), SIMDE_FLOAT64_C( -884.72), SIMDE_FLOAT64_C( -690.47), SIMDE_FLOAT64_C( -391.44), SIMDE_FLOAT64_C( -97.69) }, { SIMDE_FLOAT64_C( -732.29), SIMDE_FLOAT64_C( 446.84), SIMDE_FLOAT64_C( -990.19), SIMDE_FLOAT64_C( 216.62), SIMDE_FLOAT64_C( -720.09), SIMDE_FLOAT64_C( 35.61), SIMDE_FLOAT64_C( -243.99), SIMDE_FLOAT64_C( 407.56) }, { SIMDE_FLOAT64_C( 169.86), SIMDE_FLOAT64_C( 1107.93), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 650.24), SIMDE_FLOAT64_C( -1604.81), SIMDE_FLOAT64_C( -654.86), SIMDE_FLOAT64_C( -635.42), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C(175), { SIMDE_FLOAT64_C( 246.76), SIMDE_FLOAT64_C( 22.37), SIMDE_FLOAT64_C( -741.09), SIMDE_FLOAT64_C( 808.94), SIMDE_FLOAT64_C( -759.68), SIMDE_FLOAT64_C( 198.75), SIMDE_FLOAT64_C( -890.36), SIMDE_FLOAT64_C( -795.93) }, { SIMDE_FLOAT64_C( 50.12), SIMDE_FLOAT64_C( 882.71), SIMDE_FLOAT64_C( -253.90), SIMDE_FLOAT64_C( 739.19), SIMDE_FLOAT64_C( 735.33), SIMDE_FLOAT64_C( 572.27), SIMDE_FLOAT64_C( 641.34), SIMDE_FLOAT64_C( 396.42) }, { SIMDE_FLOAT64_C( 296.87), SIMDE_FLOAT64_C( 905.08), SIMDE_FLOAT64_C( -994.99), SIMDE_FLOAT64_C( 1548.14), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 771.02), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -399.51) } }, { UINT8_C( 10), { SIMDE_FLOAT64_C( 74.96), SIMDE_FLOAT64_C( 511.70), SIMDE_FLOAT64_C( -612.10), SIMDE_FLOAT64_C( 683.53), SIMDE_FLOAT64_C( -585.99), SIMDE_FLOAT64_C( -344.39), SIMDE_FLOAT64_C( 130.37), SIMDE_FLOAT64_C( -576.18) }, { SIMDE_FLOAT64_C( 872.23), SIMDE_FLOAT64_C( 410.28), SIMDE_FLOAT64_C( 459.43), SIMDE_FLOAT64_C( -371.75), SIMDE_FLOAT64_C( -182.16), SIMDE_FLOAT64_C( 75.20), SIMDE_FLOAT64_C( 875.00), SIMDE_FLOAT64_C( 840.21) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 921.98), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 311.77), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C( 10), { SIMDE_FLOAT64_C( 683.95), SIMDE_FLOAT64_C( -919.47), SIMDE_FLOAT64_C( -467.14), SIMDE_FLOAT64_C( 793.59), SIMDE_FLOAT64_C( -715.40), SIMDE_FLOAT64_C( 582.98), SIMDE_FLOAT64_C( 676.29), SIMDE_FLOAT64_C( 30.70) }, { SIMDE_FLOAT64_C( 322.17), SIMDE_FLOAT64_C( 411.62), SIMDE_FLOAT64_C( -397.03), SIMDE_FLOAT64_C( -36.48), SIMDE_FLOAT64_C( -191.96), SIMDE_FLOAT64_C( -318.66), SIMDE_FLOAT64_C( -961.52), SIMDE_FLOAT64_C( -680.25) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -507.84), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 757.10), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C(199), { SIMDE_FLOAT64_C( 722.01), SIMDE_FLOAT64_C( -266.24), SIMDE_FLOAT64_C( 724.85), SIMDE_FLOAT64_C( -147.62), SIMDE_FLOAT64_C( 157.58), SIMDE_FLOAT64_C( 597.08), SIMDE_FLOAT64_C( -737.35), SIMDE_FLOAT64_C( -383.00) }, { SIMDE_FLOAT64_C( -774.68), SIMDE_FLOAT64_C( 80.49), SIMDE_FLOAT64_C( 692.21), SIMDE_FLOAT64_C( -899.67), SIMDE_FLOAT64_C( -79.30), SIMDE_FLOAT64_C( 26.32), SIMDE_FLOAT64_C( 784.27), SIMDE_FLOAT64_C( 1.24) }, { SIMDE_FLOAT64_C( -52.67), SIMDE_FLOAT64_C( -185.75), SIMDE_FLOAT64_C( 1417.06), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 46.93), SIMDE_FLOAT64_C( -381.76) } }, { UINT8_C(108), { SIMDE_FLOAT64_C( 577.86), SIMDE_FLOAT64_C( 285.84), SIMDE_FLOAT64_C( 142.16), SIMDE_FLOAT64_C( 254.16), SIMDE_FLOAT64_C( -683.46), SIMDE_FLOAT64_C( -535.67), SIMDE_FLOAT64_C( -334.22), SIMDE_FLOAT64_C( -80.49) }, { SIMDE_FLOAT64_C( 427.85), SIMDE_FLOAT64_C( 473.82), SIMDE_FLOAT64_C( 600.85), SIMDE_FLOAT64_C( 466.33), SIMDE_FLOAT64_C( 793.57), SIMDE_FLOAT64_C( -329.91), SIMDE_FLOAT64_C( 188.34), SIMDE_FLOAT64_C( -472.67) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 743.02), SIMDE_FLOAT64_C( 720.48), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -865.57), SIMDE_FLOAT64_C( -145.89), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C( 48), { SIMDE_FLOAT64_C( -959.29), SIMDE_FLOAT64_C( 684.90), SIMDE_FLOAT64_C( 992.02), SIMDE_FLOAT64_C( -696.63), SIMDE_FLOAT64_C( -698.09), SIMDE_FLOAT64_C( -782.66), SIMDE_FLOAT64_C( 383.86), SIMDE_FLOAT64_C( 994.11) }, { SIMDE_FLOAT64_C( -682.33), SIMDE_FLOAT64_C( -695.44), SIMDE_FLOAT64_C( 20.43), SIMDE_FLOAT64_C( -898.06), SIMDE_FLOAT64_C( 305.80), SIMDE_FLOAT64_C( -420.39), SIMDE_FLOAT64_C( 679.80), SIMDE_FLOAT64_C( -408.37) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -392.30), SIMDE_FLOAT64_C( -1203.04), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__m512d r = simde_mm512_maskz_add_pd(test_vec[i].k, a, b); #if defined(__EMSCRIPTEN__) (void) r; #else simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); #endif } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_add_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_add_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_add_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_add_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_add_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_add_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_add_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_add_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_add_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_add_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_add_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_add_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_add_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_add_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_add_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_add_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_add_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_add_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_add_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_add_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_add_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_add_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_add_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_add_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_add_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_add_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_add_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_add_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_add_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_add_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_add_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_add_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_add_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_add_pd) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/adds.c000066400000000000000000014405371400333146700163510ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur */ #define SIMDE_TEST_X86_AVX512_INSN adds #include #include #include static int test_simde_mm_mask_adds_epi8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int8_t src[16]; const simde__mmask16 k; const int8_t a[16]; const int8_t b[16]; const int8_t r[16]; } test_vec[] = { { { -INT8_C( 31), -INT8_C( 5), INT8_C( 46), INT8_C( 1), INT8_C( 19), -INT8_C( 53), INT8_C( 28), INT8_C( 66), -INT8_C( 30), INT8_C( 100), INT8_C( 35), -INT8_C( 33), INT8_C( 39), -INT8_C( 73), INT8_C( 19), INT8_C( 92) }, UINT16_C( 2426), { -INT8_C( 95), INT8_C( 26), -INT8_C( 72), INT8_C( 19), INT8_C( 14), INT8_C( 102), INT8_C( 82), -INT8_C( 2), -INT8_C( 59), -INT8_C( 10), INT8_C( 113), INT8_C( 15), INT8_C( 27), INT8_C( 69), -INT8_C( 110), INT8_C( 115) }, { INT8_C( 15), INT8_C( 75), INT8_C( 123), -INT8_C( 127), -INT8_C( 110), INT8_C( 47), INT8_C( 62), -INT8_C( 126), INT8_C( 41), INT8_C( 26), INT8_C( 55), INT8_C( 3), -INT8_C( 110), -INT8_C( 34), -INT8_C( 49), -INT8_C( 55) }, { -INT8_C( 31), INT8_C( 101), INT8_C( 46), -INT8_C( 108), -INT8_C( 96), INT8_MAX, INT8_MAX, INT8_C( 66), -INT8_C( 18), INT8_C( 100), INT8_C( 35), INT8_C( 18), INT8_C( 39), -INT8_C( 73), INT8_C( 19), INT8_C( 92) } }, { { INT8_C( 118), INT8_C( 85), INT8_C( 76), -INT8_C( 47), -INT8_C( 19), INT8_C( 81), INT8_C( 94), INT8_C( 77), INT8_C( 47), -INT8_C( 123), -INT8_C( 116), -INT8_C( 110), -INT8_C( 57), INT8_C( 55), -INT8_C( 111), -INT8_C( 4) }, UINT16_C(32116), { -INT8_C( 69), INT8_C( 126), INT8_C( 82), -INT8_C( 14), -INT8_C( 58), INT8_C( 10), INT8_C( 64), INT8_MAX, INT8_C( 23), -INT8_C( 109), INT8_C( 72), -INT8_C( 20), INT8_C( 64), INT8_C( 73), INT8_C( 75), -INT8_C( 38) }, { -INT8_C( 24), INT8_C( 58), -INT8_C( 98), INT8_C( 121), INT8_C( 74), INT8_C( 61), -INT8_C( 26), -INT8_C( 116), INT8_C( 100), INT8_C( 59), -INT8_C( 113), -INT8_C( 115), -INT8_C( 23), INT8_C( 33), INT8_C( 58), -INT8_C( 8) }, { INT8_C( 118), INT8_C( 85), -INT8_C( 16), -INT8_C( 47), INT8_C( 16), INT8_C( 71), INT8_C( 38), INT8_C( 77), INT8_C( 123), -INT8_C( 123), -INT8_C( 41), INT8_MIN, INT8_C( 41), INT8_C( 106), INT8_MAX, -INT8_C( 4) } }, { { -INT8_C( 47), -INT8_C( 23), INT8_C( 39), INT8_C( 51), -INT8_C( 61), -INT8_C( 44), -INT8_C( 113), INT8_C( 35), -INT8_C( 52), -INT8_C( 41), INT8_C( 98), INT8_C( 85), -INT8_C( 104), INT8_C( 102), -INT8_C( 36), -INT8_C( 31) }, UINT16_C(20717), { -INT8_C( 104), -INT8_C( 53), -INT8_C( 112), -INT8_C( 37), INT8_C( 87), INT8_C( 78), -INT8_C( 51), -INT8_C( 79), INT8_C( 5), INT8_C( 119), INT8_C( 69), -INT8_C( 74), -INT8_C( 10), INT8_C( 26), INT8_C( 95), -INT8_C( 100) }, { -INT8_C( 17), -INT8_C( 107), -INT8_C( 7), -INT8_C( 58), INT8_C( 10), INT8_C( 88), INT8_C( 60), -INT8_C( 46), INT8_C( 2), INT8_C( 90), INT8_C( 57), INT8_C( 85), -INT8_C( 77), INT8_C( 49), -INT8_C( 36), -INT8_C( 31) }, { -INT8_C( 121), -INT8_C( 23), -INT8_C( 119), -INT8_C( 95), -INT8_C( 61), INT8_MAX, INT8_C( 9), -INT8_C( 125), -INT8_C( 52), -INT8_C( 41), INT8_C( 98), INT8_C( 85), -INT8_C( 87), INT8_C( 102), INT8_C( 59), -INT8_C( 31) } }, { { INT8_C( 85), INT8_C( 118), -INT8_C( 103), INT8_MIN, INT8_C( 39), -INT8_C( 43), -INT8_C( 67), -INT8_C( 57), -INT8_C( 62), INT8_C( 83), -INT8_C( 118), -INT8_C( 116), -INT8_C( 79), INT8_C( 25), INT8_C( 118), INT8_C( 95) }, UINT16_C(62809), { INT8_C( 82), -INT8_C( 124), INT8_C( 65), INT8_C( 56), -INT8_C( 94), -INT8_C( 9), INT8_C( 63), -INT8_C( 116), -INT8_C( 68), INT8_C( 40), INT8_C( 6), -INT8_C( 104), INT8_C( 1), -INT8_C( 120), -INT8_C( 34), -INT8_C( 27) }, { INT8_C( 86), -INT8_C( 105), INT8_C( 63), -INT8_C( 53), -INT8_C( 76), -INT8_C( 100), INT8_C( 3), INT8_C( 113), -INT8_C( 76), INT8_C( 30), -INT8_C( 104), -INT8_C( 53), INT8_C( 4), -INT8_C( 45), -INT8_C( 26), -INT8_C( 6) }, { INT8_MAX, INT8_C( 118), -INT8_C( 103), INT8_C( 3), INT8_MIN, -INT8_C( 43), INT8_C( 66), -INT8_C( 57), INT8_MIN, INT8_C( 83), -INT8_C( 98), -INT8_C( 116), INT8_C( 5), INT8_MIN, -INT8_C( 60), -INT8_C( 33) } }, { { -INT8_C( 90), -INT8_C( 117), -INT8_C( 26), -INT8_C( 9), -INT8_C( 54), INT8_C( 123), INT8_C( 17), -INT8_C( 40), -INT8_C( 24), INT8_C( 12), -INT8_C( 5), INT8_C( 120), INT8_C( 71), INT8_C( 4), INT8_C( 10), INT8_C( 1) }, UINT16_C(44733), { -INT8_C( 54), INT8_C( 98), -INT8_C( 59), INT8_C( 107), -INT8_C( 95), INT8_C( 79), -INT8_C( 23), INT8_C( 35), -INT8_C( 87), INT8_C( 3), INT8_C( 87), -INT8_C( 117), -INT8_C( 101), INT8_C( 66), -INT8_C( 100), -INT8_C( 98) }, { INT8_C( 3), -INT8_C( 14), -INT8_C( 39), INT8_C( 50), INT8_C( 93), -INT8_C( 35), -INT8_C( 58), INT8_C( 81), INT8_C( 108), INT8_C( 97), INT8_C( 119), -INT8_C( 4), -INT8_C( 32), -INT8_C( 56), INT8_C( 12), INT8_C( 20) }, { -INT8_C( 51), -INT8_C( 117), -INT8_C( 98), INT8_MAX, -INT8_C( 2), INT8_C( 44), INT8_C( 17), INT8_C( 116), -INT8_C( 24), INT8_C( 100), INT8_MAX, -INT8_C( 121), INT8_C( 71), INT8_C( 10), INT8_C( 10), -INT8_C( 78) } }, { { INT8_C( 19), -INT8_C( 64), -INT8_C( 43), -INT8_C( 1), -INT8_C( 111), INT8_C( 62), -INT8_C( 109), INT8_C( 118), -INT8_C( 60), -INT8_C( 20), INT8_C( 126), -INT8_C( 114), -INT8_C( 121), -INT8_C( 42), -INT8_C( 28), -INT8_C( 20) }, UINT16_C(55639), { -INT8_C( 71), INT8_C( 65), INT8_C( 2), -INT8_C( 81), -INT8_C( 74), INT8_C( 31), INT8_C( 108), -INT8_C( 43), INT8_C( 116), INT8_C( 15), INT8_C( 97), -INT8_C( 97), -INT8_C( 27), INT8_C( 102), -INT8_C( 108), INT8_C( 86) }, { -INT8_C( 89), -INT8_C( 92), INT8_C( 119), -INT8_C( 97), -INT8_C( 64), -INT8_C( 44), INT8_C( 57), INT8_C( 45), -INT8_C( 53), INT8_C( 21), INT8_C( 88), -INT8_C( 119), -INT8_C( 42), INT8_C( 100), -INT8_C( 41), INT8_C( 82) }, { INT8_MIN, -INT8_C( 27), INT8_C( 121), -INT8_C( 1), INT8_MIN, INT8_C( 62), INT8_MAX, INT8_C( 118), INT8_C( 63), -INT8_C( 20), INT8_C( 126), INT8_MIN, -INT8_C( 69), -INT8_C( 42), INT8_MIN, INT8_MAX } }, { { INT8_C( 0), -INT8_C( 56), -INT8_C( 74), -INT8_C( 70), INT8_C( 8), INT8_C( 43), INT8_C( 101), -INT8_C( 121), INT8_C( 0), INT8_C( 92), -INT8_C( 58), INT8_C( 42), INT8_C( 93), INT8_C( 109), INT8_C( 64), INT8_C( 125) }, UINT16_C(21225), { INT8_C( 113), INT8_C( 49), -INT8_C( 88), INT8_C( 112), -INT8_C( 39), -INT8_C( 82), INT8_C( 13), -INT8_C( 74), INT8_C( 91), -INT8_C( 122), -INT8_C( 99), INT8_C( 112), -INT8_C( 37), INT8_C( 19), -INT8_C( 46), -INT8_C( 30) }, { -INT8_C( 88), INT8_C( 60), INT8_C( 21), -INT8_C( 52), INT8_C( 33), -INT8_C( 37), -INT8_C( 92), INT8_C( 25), INT8_C( 63), INT8_C( 50), -INT8_C( 57), INT8_C( 91), INT8_C( 73), INT8_C( 120), INT8_C( 62), INT8_C( 79) }, { INT8_C( 25), -INT8_C( 56), -INT8_C( 74), INT8_C( 60), INT8_C( 8), -INT8_C( 119), -INT8_C( 79), -INT8_C( 49), INT8_C( 0), -INT8_C( 72), -INT8_C( 58), INT8_C( 42), INT8_C( 36), INT8_C( 109), INT8_C( 16), INT8_C( 125) } }, { { -INT8_C( 10), -INT8_C( 50), -INT8_C( 47), INT8_C( 74), INT8_C( 57), -INT8_C( 26), -INT8_C( 66), INT8_C( 94), INT8_C( 84), -INT8_C( 107), -INT8_C( 57), -INT8_C( 48), -INT8_C( 69), -INT8_C( 65), INT8_C( 38), INT8_C( 80) }, UINT16_C(52423), { INT8_C( 55), -INT8_C( 9), INT8_C( 51), INT8_C( 94), INT8_C( 84), -INT8_C( 116), INT8_C( 78), -INT8_C( 33), -INT8_C( 36), -INT8_C( 21), -INT8_C( 69), -INT8_C( 98), INT8_C( 52), INT8_C( 73), INT8_C( 52), -INT8_C( 72) }, { -INT8_C( 69), -INT8_C( 10), INT8_C( 2), INT8_C( 68), -INT8_C( 124), -INT8_C( 68), -INT8_C( 24), -INT8_C( 4), INT8_C( 40), -INT8_C( 98), INT8_C( 58), INT8_C( 89), -INT8_C( 59), -INT8_C( 66), INT8_C( 90), -INT8_C( 84) }, { -INT8_C( 14), -INT8_C( 19), INT8_C( 53), INT8_C( 74), INT8_C( 57), -INT8_C( 26), INT8_C( 54), -INT8_C( 37), INT8_C( 84), -INT8_C( 107), -INT8_C( 11), -INT8_C( 9), -INT8_C( 69), -INT8_C( 65), INT8_MAX, INT8_MIN } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i src = simde_x_mm_loadu_epi8(test_vec[i].src); simde__m128i a = simde_x_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_mask_adds_epi8(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i8x16(r, simde_x_mm_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm_maskz_adds_epi8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask16 k; const int8_t a[16]; const int8_t b[16]; const int8_t r[16]; } test_vec[] = { { UINT16_C(12102), { INT8_C( 1), INT8_C( 83), -INT8_C( 45), INT8_C( 8), -INT8_C( 110), -INT8_C( 61), INT8_MIN, -INT8_C( 118), -INT8_C( 32), -INT8_C( 8), -INT8_C( 28), INT8_C( 82), -INT8_C( 13), -INT8_C( 72), -INT8_C( 60), INT8_C( 13) }, { -INT8_C( 121), -INT8_C( 88), -INT8_C( 66), -INT8_C( 50), INT8_C( 67), INT8_C( 126), INT8_MIN, INT8_C( 29), -INT8_C( 107), INT8_C( 42), -INT8_C( 47), -INT8_C( 51), INT8_C( 119), INT8_C( 13), INT8_C( 63), -INT8_C( 24) }, { INT8_C( 0), -INT8_C( 5), -INT8_C( 111), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_MIN, INT8_C( 34), -INT8_C( 75), INT8_C( 31), INT8_C( 0), -INT8_C( 59), INT8_C( 0), INT8_C( 0) } }, { UINT16_C(65315), { INT8_C( 98), -INT8_C( 110), INT8_C( 37), INT8_C( 119), -INT8_C( 45), -INT8_C( 116), INT8_C( 33), INT8_C( 110), INT8_C( 102), INT8_C( 43), -INT8_C( 95), -INT8_C( 73), -INT8_C( 10), INT8_C( 70), INT8_C( 39), INT8_C( 96) }, { INT8_C( 77), -INT8_C( 80), INT8_C( 43), -INT8_C( 64), INT8_C( 51), -INT8_C( 18), INT8_C( 91), INT8_C( 16), -INT8_C( 40), INT8_C( 55), -INT8_C( 39), -INT8_C( 38), -INT8_C( 69), -INT8_C( 67), INT8_C( 53), -INT8_C( 103) }, { INT8_MAX, INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_C( 62), INT8_C( 98), INT8_MIN, -INT8_C( 111), -INT8_C( 79), INT8_C( 3), INT8_C( 92), -INT8_C( 7) } }, { UINT16_C(29490), { -INT8_C( 35), -INT8_C( 98), INT8_C( 6), -INT8_C( 75), INT8_C( 113), -INT8_C( 22), INT8_C( 62), INT8_C( 7), -INT8_C( 56), -INT8_C( 16), INT8_C( 122), -INT8_C( 115), -INT8_C( 47), INT8_C( 54), -INT8_C( 125), -INT8_C( 94) }, { -INT8_C( 82), INT8_C( 75), -INT8_C( 88), INT8_C( 26), INT8_C( 97), -INT8_C( 72), INT8_C( 34), -INT8_C( 46), -INT8_C( 23), INT8_C( 62), INT8_C( 30), INT8_C( 110), -INT8_C( 34), -INT8_C( 121), -INT8_C( 34), INT8_C( 14) }, { INT8_C( 0), -INT8_C( 23), INT8_C( 0), INT8_C( 0), INT8_MAX, -INT8_C( 94), INT8_C( 0), INT8_C( 0), -INT8_C( 79), INT8_C( 46), INT8_C( 0), INT8_C( 0), -INT8_C( 81), -INT8_C( 67), INT8_MIN, INT8_C( 0) } }, { UINT16_C(14341), { -INT8_C( 19), -INT8_C( 46), -INT8_C( 73), INT8_C( 51), -INT8_C( 101), INT8_C( 37), -INT8_C( 80), INT8_C( 41), INT8_C( 38), INT8_C( 20), -INT8_C( 33), -INT8_C( 54), INT8_C( 67), INT8_C( 20), INT8_C( 21), INT8_C( 117) }, { INT8_C( 33), -INT8_C( 99), INT8_C( 79), -INT8_C( 113), -INT8_C( 63), INT8_C( 7), -INT8_C( 124), -INT8_C( 105), INT8_C( 64), -INT8_C( 85), -INT8_C( 62), -INT8_C( 17), -INT8_C( 96), -INT8_C( 25), -INT8_C( 93), INT8_C( 124) }, { INT8_C( 14), INT8_C( 0), INT8_C( 6), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 71), -INT8_C( 29), -INT8_C( 5), INT8_C( 0), INT8_C( 0) } }, { UINT16_C( 3703), { INT8_MIN, -INT8_C( 125), INT8_C( 45), -INT8_C( 105), INT8_C( 125), -INT8_C( 41), INT8_C( 59), -INT8_C( 89), INT8_C( 14), -INT8_C( 92), -INT8_C( 90), -INT8_C( 114), INT8_C( 96), -INT8_C( 78), -INT8_C( 109), INT8_C( 39) }, { -INT8_C( 91), INT8_C( 23), -INT8_C( 57), INT8_C( 59), INT8_C( 66), INT8_C( 66), INT8_C( 12), -INT8_C( 94), -INT8_C( 108), -INT8_C( 63), INT8_C( 99), INT8_C( 23), INT8_C( 86), INT8_C( 80), INT8_C( 24), INT8_C( 42) }, { INT8_MIN, -INT8_C( 102), -INT8_C( 12), INT8_C( 0), INT8_MAX, INT8_C( 25), INT8_C( 71), INT8_C( 0), INT8_C( 0), INT8_MIN, INT8_C( 9), -INT8_C( 91), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { UINT16_C(23183), { -INT8_C( 119), -INT8_C( 26), -INT8_C( 57), INT8_C( 13), -INT8_C( 45), INT8_C( 56), INT8_C( 92), INT8_C( 18), INT8_C( 77), -INT8_C( 82), INT8_C( 104), INT8_C( 71), -INT8_C( 99), -INT8_C( 127), -INT8_C( 39), -INT8_C( 14) }, { INT8_C( 71), INT8_C( 93), INT8_C( 46), -INT8_C( 20), INT8_C( 5), -INT8_C( 14), -INT8_C( 92), -INT8_C( 125), -INT8_C( 73), -INT8_C( 53), -INT8_C( 18), INT8_C( 0), INT8_C( 48), -INT8_C( 84), -INT8_C( 86), -INT8_C( 53) }, { -INT8_C( 48), INT8_C( 67), -INT8_C( 11), -INT8_C( 7), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 107), INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_C( 71), -INT8_C( 51), INT8_C( 0), -INT8_C( 125), INT8_C( 0) } }, { UINT16_C( 4280), { INT8_C( 10), INT8_C( 78), INT8_MAX, -INT8_C( 8), INT8_C( 108), -INT8_C( 11), -INT8_C( 97), -INT8_C( 111), -INT8_C( 46), INT8_C( 13), -INT8_C( 47), -INT8_C( 45), -INT8_C( 105), -INT8_C( 25), INT8_C( 0), -INT8_C( 7) }, { -INT8_C( 16), -INT8_C( 22), INT8_C( 88), -INT8_C( 71), INT8_C( 8), -INT8_C( 70), -INT8_C( 80), INT8_C( 103), INT8_C( 123), INT8_C( 93), -INT8_C( 35), INT8_C( 85), INT8_C( 10), INT8_C( 77), INT8_C( 0), INT8_C( 47) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 79), INT8_C( 116), -INT8_C( 81), INT8_C( 0), -INT8_C( 8), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 95), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { UINT16_C( 4325), { INT8_C( 65), INT8_C( 62), INT8_C( 42), -INT8_C( 68), INT8_C( 90), INT8_C( 73), INT8_C( 19), -INT8_C( 56), -INT8_C( 4), INT8_C( 31), INT8_C( 4), INT8_C( 3), INT8_C( 44), INT8_C( 90), INT8_C( 99), -INT8_C( 25) }, { INT8_C( 89), INT8_C( 52), INT8_C( 104), -INT8_C( 86), INT8_C( 121), -INT8_C( 27), INT8_C( 75), -INT8_C( 9), -INT8_C( 48), INT8_C( 121), INT8_C( 40), -INT8_C( 3), -INT8_C( 53), -INT8_C( 114), -INT8_C( 100), INT8_C( 81) }, { INT8_MAX, INT8_C( 0), INT8_MAX, INT8_C( 0), INT8_C( 0), INT8_C( 46), INT8_C( 94), -INT8_C( 65), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 9), INT8_C( 0), INT8_C( 0), INT8_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_maskz_adds_epi8(test_vec[i].k, a, b); simde_test_x86_assert_equal_i8x16(r, simde_x_mm_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm_mask_adds_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t src[8]; const simde__mmask8 k; const int16_t a[8]; const int16_t b[8]; const int16_t r[8]; } test_vec[] = { { { INT16_C( 19830), -INT16_C( 20263), INT16_C( 6365), INT16_C( 9514), INT16_C( 8029), -INT16_C( 31012), -INT16_C( 27351), INT16_C( 19606) }, UINT8_C(166), { -INT16_C( 21201), -INT16_C( 16983), -INT16_C( 28913), INT16_C( 479), -INT16_C( 27699), -INT16_C( 698), -INT16_C( 21664), INT16_C( 12978) }, { -INT16_C( 31630), -INT16_C( 27895), -INT16_C( 31919), -INT16_C( 25946), INT16_C( 5384), INT16_C( 31469), INT16_C( 29499), -INT16_C( 8608) }, { INT16_C( 19830), INT16_MIN, INT16_MIN, INT16_C( 9514), INT16_C( 8029), INT16_C( 30771), -INT16_C( 27351), INT16_C( 4370) } }, { { INT16_C( 14956), -INT16_C( 24206), INT16_C( 10279), INT16_C( 662), -INT16_C( 16911), -INT16_C( 17106), -INT16_C( 4862), -INT16_C( 4374) }, UINT8_C(253), { INT16_C( 25492), -INT16_C( 2767), -INT16_C( 5158), -INT16_C( 5013), INT16_C( 23080), INT16_C( 24538), INT16_C( 24678), -INT16_C( 27578) }, { -INT16_C( 5284), INT16_C( 1255), INT16_C( 12121), INT16_C( 12017), INT16_C( 13281), INT16_C( 1685), INT16_C( 18360), -INT16_C( 24670) }, { INT16_C( 20208), -INT16_C( 24206), INT16_C( 6963), INT16_C( 7004), INT16_MAX, INT16_C( 26223), INT16_MAX, INT16_MIN } }, { { -INT16_C( 14832), -INT16_C( 19736), INT16_C( 5222), -INT16_C( 1427), INT16_C( 24020), INT16_C( 13226), -INT16_C( 22015), INT16_C( 8143) }, UINT8_C(219), { -INT16_C( 20552), -INT16_C( 5853), INT16_C( 22872), -INT16_C( 6013), -INT16_C( 16137), -INT16_C( 27569), INT16_C( 19276), -INT16_C( 26420) }, { INT16_C( 1430), INT16_C( 15804), INT16_C( 13643), INT16_C( 13486), INT16_C( 28580), INT16_C( 26652), INT16_C( 15895), -INT16_C( 25828) }, { -INT16_C( 19122), INT16_C( 9951), INT16_C( 5222), INT16_C( 7473), INT16_C( 12443), INT16_C( 13226), INT16_MAX, INT16_MIN } }, { { INT16_C( 18056), -INT16_C( 6311), -INT16_C( 10823), INT16_C( 8297), INT16_C( 24132), INT16_C( 31019), -INT16_C( 20821), INT16_C( 1073) }, UINT8_C(155), { INT16_C( 25312), -INT16_C( 18725), -INT16_C( 31168), -INT16_C( 13304), -INT16_C( 19949), INT16_C( 28347), INT16_C( 3674), INT16_C( 11284) }, { -INT16_C( 32624), INT16_C( 14806), INT16_C( 29854), -INT16_C( 13394), INT16_C( 19939), INT16_C( 14150), -INT16_C( 32469), -INT16_C( 16490) }, { -INT16_C( 7312), -INT16_C( 3919), -INT16_C( 10823), -INT16_C( 26698), -INT16_C( 10), INT16_C( 31019), -INT16_C( 20821), -INT16_C( 5206) } }, { { INT16_C( 16411), INT16_C( 26770), -INT16_C( 15770), INT16_C( 11567), -INT16_C( 25386), -INT16_C( 21370), -INT16_C( 18951), INT16_C( 5013) }, UINT8_C(175), { INT16_C( 22074), -INT16_C( 6096), INT16_C( 12562), -INT16_C( 2681), -INT16_C( 8474), INT16_C( 7981), INT16_C( 29859), -INT16_C( 5915) }, { -INT16_C( 26649), -INT16_C( 14667), -INT16_C( 21700), INT16_C( 23924), INT16_C( 23041), -INT16_C( 12329), -INT16_C( 26844), INT16_C( 12057) }, { -INT16_C( 4575), -INT16_C( 20763), -INT16_C( 9138), INT16_C( 21243), -INT16_C( 25386), -INT16_C( 4348), -INT16_C( 18951), INT16_C( 6142) } }, { { INT16_C( 31823), -INT16_C( 16416), INT16_C( 10547), -INT16_C( 20936), -INT16_C( 21717), INT16_C( 2429), -INT16_C( 14089), INT16_C( 20415) }, UINT8_C(116), { -INT16_C( 20670), -INT16_C( 18929), -INT16_C( 9110), -INT16_C( 27658), INT16_C( 5659), -INT16_C( 22755), INT16_C( 30401), INT16_C( 12241) }, { -INT16_C( 2428), -INT16_C( 3085), INT16_C( 20629), -INT16_C( 29739), -INT16_C( 19289), INT16_C( 29198), INT16_C( 13379), -INT16_C( 26784) }, { INT16_C( 31823), -INT16_C( 16416), INT16_C( 11519), -INT16_C( 20936), -INT16_C( 13630), INT16_C( 6443), INT16_MAX, INT16_C( 20415) } }, { { INT16_C( 10682), INT16_C( 30723), -INT16_C( 25631), INT16_C( 12870), -INT16_C( 7733), -INT16_C( 3023), INT16_C( 8882), INT16_C( 24910) }, UINT8_C( 45), { -INT16_C( 15846), -INT16_C( 19943), -INT16_C( 32694), -INT16_C( 6090), INT16_C( 14036), -INT16_C( 30104), -INT16_C( 22309), INT16_C( 32750) }, { -INT16_C( 16035), INT16_C( 6633), INT16_C( 31017), -INT16_C( 29512), INT16_C( 20156), INT16_C( 28149), INT16_C( 9311), -INT16_C( 22877) }, { -INT16_C( 31881), INT16_C( 30723), -INT16_C( 1677), INT16_MIN, -INT16_C( 7733), -INT16_C( 1955), INT16_C( 8882), INT16_C( 24910) } }, { { INT16_C( 11801), INT16_C( 30522), -INT16_C( 28771), INT16_C( 18595), -INT16_C( 26062), INT16_C( 7549), INT16_C( 19180), -INT16_C( 19581) }, UINT8_C( 42), { INT16_C( 21562), -INT16_C( 15180), INT16_C( 17197), INT16_C( 26341), INT16_C( 15144), -INT16_C( 11790), INT16_C( 26114), -INT16_C( 26150) }, { -INT16_C( 16280), -INT16_C( 27280), INT16_C( 27387), -INT16_C( 12348), INT16_C( 28781), INT16_C( 23348), INT16_C( 22336), INT16_C( 2436) }, { INT16_C( 11801), INT16_MIN, -INT16_C( 28771), INT16_C( 13993), -INT16_C( 26062), INT16_C( 11558), INT16_C( 19180), -INT16_C( 19581) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i src = simde_x_mm_loadu_epi16(test_vec[i].src); simde__m128i a = simde_x_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_mask_adds_epi16(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x8(r, simde_x_mm_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm_maskz_adds_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const int16_t a[8]; const int16_t b[8]; const int16_t r[8]; } test_vec[] = { { UINT8_C(126), { INT16_C( 18530), -INT16_C( 15866), -INT16_C( 15400), -INT16_C( 26342), -INT16_C( 14737), -INT16_C( 15030), INT16_C( 21680), INT16_C( 29882) }, { INT16_C( 27962), INT16_C( 13123), -INT16_C( 10479), INT16_C( 3537), -INT16_C( 7715), INT16_C( 27275), -INT16_C( 4308), INT16_C( 2341) }, { INT16_C( 0), -INT16_C( 2743), -INT16_C( 25879), -INT16_C( 22805), -INT16_C( 22452), INT16_C( 12245), INT16_C( 17372), INT16_C( 0) } }, { UINT8_C(175), { -INT16_C( 18089), -INT16_C( 11763), -INT16_C( 7340), -INT16_C( 26300), INT16_C( 14485), INT16_C( 27175), INT16_C( 8158), -INT16_C( 20335) }, { -INT16_C( 8103), INT16_C( 16065), -INT16_C( 4453), INT16_C( 5089), INT16_C( 29515), -INT16_C( 2790), -INT16_C( 21763), INT16_C( 22019) }, { -INT16_C( 26192), INT16_C( 4302), -INT16_C( 11793), -INT16_C( 21211), INT16_C( 0), INT16_C( 24385), INT16_C( 0), INT16_C( 1684) } }, { UINT8_C( 14), { -INT16_C( 20827), -INT16_C( 18732), -INT16_C( 10152), -INT16_C( 2933), INT16_C( 31842), -INT16_C( 8748), -INT16_C( 11815), INT16_C( 21410) }, { -INT16_C( 1528), -INT16_C( 13596), -INT16_C( 25943), -INT16_C( 28335), INT16_C( 25848), INT16_C( 9269), -INT16_C( 15402), INT16_C( 26007) }, { INT16_C( 0), -INT16_C( 32328), INT16_MIN, -INT16_C( 31268), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { UINT8_C( 15), { -INT16_C( 27507), -INT16_C( 9398), INT16_C( 28915), -INT16_C( 2956), -INT16_C( 14608), -INT16_C( 10466), INT16_C( 28978), -INT16_C( 7517) }, { INT16_C( 20451), -INT16_C( 22940), INT16_C( 26080), INT16_C( 12379), -INT16_C( 5954), -INT16_C( 27171), -INT16_C( 18512), -INT16_C( 15809) }, { -INT16_C( 7056), -INT16_C( 32338), INT16_MAX, INT16_C( 9423), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { UINT8_C( 30), { INT16_C( 17121), -INT16_C( 27683), INT16_C( 32718), INT16_C( 24958), -INT16_C( 20150), -INT16_C( 1717), -INT16_C( 8726), -INT16_C( 15593) }, { INT16_C( 21952), -INT16_C( 7015), -INT16_C( 16049), -INT16_C( 27828), INT16_C( 16846), INT16_C( 20356), INT16_C( 31961), -INT16_C( 30752) }, { INT16_C( 0), INT16_MIN, INT16_C( 16669), -INT16_C( 2870), -INT16_C( 3304), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { UINT8_C(176), { -INT16_C( 2095), INT16_C( 22806), INT16_C( 10643), INT16_C( 17957), -INT16_C( 4712), -INT16_C( 17466), -INT16_C( 31523), INT16_C( 23464) }, { -INT16_C( 9670), INT16_C( 4356), -INT16_C( 25926), INT16_C( 21354), -INT16_C( 3587), INT16_C( 16434), -INT16_C( 8316), -INT16_C( 28686) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 8299), -INT16_C( 1032), INT16_C( 0), -INT16_C( 5222) } }, { UINT8_C(196), { INT16_C( 23417), -INT16_C( 1452), -INT16_C( 25974), -INT16_C( 1716), INT16_C( 22247), -INT16_C( 10292), INT16_C( 7502), -INT16_C( 839) }, { -INT16_C( 1858), -INT16_C( 2942), -INT16_C( 11236), INT16_C( 15194), -INT16_C( 6079), INT16_C( 6575), -INT16_C( 15784), -INT16_C( 25633) }, { INT16_C( 0), INT16_C( 0), INT16_MIN, INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 8282), -INT16_C( 26472) } }, { UINT8_C( 6), { -INT16_C( 1086), INT16_C( 31230), -INT16_C( 15014), -INT16_C( 7078), -INT16_C( 11452), INT16_C( 13045), INT16_C( 23661), INT16_C( 14428) }, { -INT16_C( 29896), -INT16_C( 10249), -INT16_C( 5921), -INT16_C( 11132), -INT16_C( 3726), -INT16_C( 12393), INT16_C( 5350), INT16_C( 20495) }, { INT16_C( 0), INT16_C( 20981), -INT16_C( 20935), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_maskz_adds_epi16(test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x8(r, simde_x_mm_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm256_mask_adds_epi8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int8_t src[32]; const simde__mmask32 k; const int8_t a[32]; const int8_t b[32]; const int8_t r[32]; } test_vec[] = { { { INT8_C( 63), INT8_C( 79), INT8_C( 62), -INT8_C( 24), INT8_C( 99), INT8_C( 34), INT8_C( 33), INT8_C( 55), INT8_C( 34), INT8_C( 107), INT8_C( 116), INT8_C( 40), -INT8_C( 20), INT8_C( 26), INT8_C( 82), -INT8_C( 107), -INT8_C( 52), INT8_MIN, -INT8_C( 36), INT8_C( 51), -INT8_C( 21), -INT8_C( 124), INT8_C( 112), INT8_C( 122), -INT8_C( 57), -INT8_C( 30), -INT8_C( 11), -INT8_C( 90), -INT8_C( 59), INT8_C( 91), INT8_C( 106), -INT8_C( 116) }, UINT32_C( 17939), { -INT8_C( 123), -INT8_C( 71), INT8_C( 3), -INT8_C( 27), INT8_C( 90), -INT8_C( 35), INT8_C( 53), -INT8_C( 70), INT8_C( 37), -INT8_C( 93), INT8_C( 39), -INT8_C( 99), -INT8_C( 73), INT8_C( 99), -INT8_C( 90), -INT8_C( 99), INT8_C( 34), INT8_C( 74), INT8_C( 66), -INT8_C( 87), -INT8_C( 35), INT8_C( 109), -INT8_C( 31), INT8_C( 57), INT8_C( 29), -INT8_C( 1), -INT8_C( 109), -INT8_C( 29), -INT8_C( 102), -INT8_C( 122), -INT8_C( 40), -INT8_C( 64) }, { INT8_C( 1), -INT8_C( 57), -INT8_C( 42), -INT8_C( 31), INT8_C( 49), -INT8_C( 49), INT8_C( 95), -INT8_C( 124), -INT8_C( 108), INT8_C( 82), -INT8_C( 127), -INT8_C( 41), INT8_C( 123), INT8_C( 60), INT8_C( 18), INT8_C( 4), -INT8_C( 92), -INT8_C( 115), INT8_C( 22), -INT8_C( 82), INT8_C( 16), -INT8_C( 95), INT8_C( 51), -INT8_C( 3), INT8_C( 91), INT8_C( 71), -INT8_C( 33), -INT8_C( 101), -INT8_C( 115), -INT8_C( 4), INT8_C( 91), -INT8_C( 104) }, { -INT8_C( 122), INT8_MIN, INT8_C( 62), -INT8_C( 24), INT8_MAX, INT8_C( 34), INT8_C( 33), INT8_C( 55), INT8_C( 34), -INT8_C( 11), -INT8_C( 88), INT8_C( 40), -INT8_C( 20), INT8_C( 26), -INT8_C( 72), -INT8_C( 107), -INT8_C( 52), INT8_MIN, -INT8_C( 36), INT8_C( 51), -INT8_C( 21), -INT8_C( 124), INT8_C( 112), INT8_C( 122), -INT8_C( 57), -INT8_C( 30), -INT8_C( 11), -INT8_C( 90), -INT8_C( 59), INT8_C( 91), INT8_C( 106), -INT8_C( 116) } }, { { -INT8_C( 123), INT8_C( 123), INT8_C( 97), INT8_C( 38), -INT8_C( 67), -INT8_C( 49), INT8_C( 30), -INT8_C( 76), -INT8_C( 84), INT8_C( 84), INT8_C( 23), INT8_C( 86), INT8_C( 8), -INT8_C( 26), -INT8_C( 64), -INT8_C( 51), INT8_C( 112), INT8_C( 87), INT8_C( 119), INT8_C( 1), INT8_C( 43), -INT8_C( 21), INT8_C( 90), INT8_C( 91), INT8_C( 125), -INT8_C( 98), INT8_C( 20), INT8_C( 24), INT8_C( 126), -INT8_C( 27), -INT8_C( 117), -INT8_C( 87) }, UINT32_C( 56101), { -INT8_C( 29), INT8_C( 83), INT8_C( 102), -INT8_C( 68), -INT8_C( 94), INT8_C( 98), -INT8_C( 113), -INT8_C( 98), -INT8_C( 91), -INT8_C( 30), INT8_C( 20), INT8_C( 105), INT8_C( 46), -INT8_C( 21), -INT8_C( 97), -INT8_C( 66), -INT8_C( 109), -INT8_C( 73), INT8_C( 45), INT8_C( 30), -INT8_C( 72), -INT8_C( 108), -INT8_C( 85), -INT8_C( 45), INT8_C( 78), INT8_C( 69), INT8_C( 14), INT8_C( 75), -INT8_C( 71), -INT8_C( 49), -INT8_C( 55), INT8_C( 42) }, { INT8_C( 52), -INT8_C( 69), -INT8_C( 103), -INT8_C( 82), INT8_C( 90), INT8_C( 51), -INT8_C( 42), INT8_MIN, INT8_C( 102), INT8_C( 100), INT8_C( 6), INT8_C( 50), -INT8_C( 114), INT8_C( 106), INT8_C( 119), -INT8_C( 17), -INT8_C( 46), -INT8_C( 52), INT8_C( 25), -INT8_C( 71), INT8_C( 100), INT8_C( 75), INT8_C( 70), INT8_C( 2), -INT8_C( 20), INT8_C( 69), -INT8_C( 9), -INT8_C( 68), INT8_C( 119), -INT8_C( 98), -INT8_C( 25), -INT8_C( 53) }, { INT8_C( 23), INT8_C( 123), -INT8_C( 1), INT8_C( 38), -INT8_C( 67), INT8_MAX, INT8_C( 30), -INT8_C( 76), INT8_C( 11), INT8_C( 70), INT8_C( 23), INT8_MAX, -INT8_C( 68), -INT8_C( 26), INT8_C( 22), -INT8_C( 83), INT8_C( 112), INT8_C( 87), INT8_C( 119), INT8_C( 1), INT8_C( 43), -INT8_C( 21), INT8_C( 90), INT8_C( 91), INT8_C( 125), -INT8_C( 98), INT8_C( 20), INT8_C( 24), INT8_C( 126), -INT8_C( 27), -INT8_C( 117), -INT8_C( 87) } }, { { -INT8_C( 26), INT8_C( 33), -INT8_C( 40), INT8_C( 31), -INT8_C( 46), INT8_C( 71), INT8_C( 81), -INT8_C( 30), -INT8_C( 45), -INT8_C( 8), -INT8_C( 2), INT8_MAX, -INT8_C( 99), INT8_C( 87), INT8_C( 64), -INT8_C( 20), -INT8_C( 83), -INT8_C( 40), -INT8_C( 19), -INT8_C( 114), INT8_C( 34), INT8_C( 105), INT8_C( 90), INT8_C( 35), INT8_C( 75), INT8_C( 112), -INT8_C( 89), -INT8_C( 127), INT8_C( 95), INT8_C( 79), INT8_C( 40), INT8_C( 8) }, UINT32_C( 23780), { -INT8_C( 46), -INT8_C( 84), -INT8_C( 66), -INT8_C( 62), INT8_C( 68), -INT8_C( 119), -INT8_C( 126), -INT8_C( 81), -INT8_C( 113), INT8_C( 88), -INT8_C( 93), -INT8_C( 126), INT8_C( 25), INT8_C( 108), -INT8_C( 109), INT8_C( 86), -INT8_C( 76), INT8_C( 45), INT8_C( 25), INT8_C( 69), INT8_C( 27), -INT8_C( 54), INT8_C( 66), -INT8_C( 126), INT8_C( 100), -INT8_C( 10), -INT8_C( 44), -INT8_C( 28), INT8_C( 113), -INT8_C( 119), -INT8_C( 10), -INT8_C( 93) }, { INT8_C( 109), -INT8_C( 64), -INT8_C( 31), -INT8_C( 47), INT8_C( 104), -INT8_C( 13), INT8_C( 71), INT8_C( 10), -INT8_C( 90), INT8_C( 62), INT8_C( 102), -INT8_C( 45), INT8_C( 60), -INT8_C( 39), -INT8_C( 121), -INT8_C( 60), INT8_C( 117), -INT8_C( 59), INT8_C( 37), -INT8_C( 71), -INT8_C( 20), INT8_C( 65), -INT8_C( 54), -INT8_C( 58), -INT8_C( 96), -INT8_C( 70), -INT8_C( 87), INT8_C( 79), -INT8_C( 17), INT8_C( 50), -INT8_C( 51), INT8_C( 52) }, { -INT8_C( 26), INT8_C( 33), -INT8_C( 97), INT8_C( 31), -INT8_C( 46), INT8_MIN, -INT8_C( 55), -INT8_C( 71), -INT8_C( 45), -INT8_C( 8), INT8_C( 9), INT8_MIN, INT8_C( 85), INT8_C( 87), INT8_MIN, -INT8_C( 20), -INT8_C( 83), -INT8_C( 40), -INT8_C( 19), -INT8_C( 114), INT8_C( 34), INT8_C( 105), INT8_C( 90), INT8_C( 35), INT8_C( 75), INT8_C( 112), -INT8_C( 89), -INT8_C( 127), INT8_C( 95), INT8_C( 79), INT8_C( 40), INT8_C( 8) } }, { { -INT8_C( 127), -INT8_C( 111), -INT8_C( 31), INT8_C( 126), INT8_C( 108), INT8_C( 93), -INT8_C( 18), INT8_C( 111), -INT8_C( 105), INT8_C( 57), INT8_C( 30), -INT8_C( 40), INT8_C( 11), -INT8_C( 124), -INT8_C( 72), INT8_C( 21), INT8_C( 105), INT8_C( 64), -INT8_C( 9), INT8_C( 116), -INT8_C( 97), -INT8_C( 95), INT8_C( 6), -INT8_C( 94), INT8_C( 64), -INT8_C( 46), INT8_C( 55), INT8_C( 98), INT8_C( 21), -INT8_C( 72), -INT8_C( 125), INT8_C( 46) }, UINT32_C( 49881), { -INT8_C( 114), -INT8_C( 58), INT8_C( 3), INT8_C( 38), -INT8_C( 42), -INT8_C( 116), INT8_C( 75), INT8_C( 22), -INT8_C( 60), INT8_C( 47), INT8_C( 34), INT8_C( 47), -INT8_C( 105), INT8_C( 78), -INT8_C( 59), -INT8_C( 90), -INT8_C( 37), INT8_C( 116), -INT8_C( 87), -INT8_C( 26), INT8_C( 120), INT8_C( 99), -INT8_C( 89), INT8_C( 40), -INT8_C( 5), -INT8_C( 91), -INT8_C( 78), INT8_C( 22), INT8_C( 10), -INT8_C( 39), -INT8_C( 35), INT8_C( 56) }, { INT8_C( 4), INT8_C( 92), INT8_C( 58), -INT8_C( 42), INT8_C( 109), INT8_C( 85), -INT8_C( 76), INT8_C( 113), INT8_C( 58), -INT8_C( 80), INT8_C( 16), INT8_C( 52), INT8_C( 8), INT8_C( 35), -INT8_C( 21), INT8_C( 108), INT8_C( 126), -INT8_C( 36), -INT8_C( 43), -INT8_C( 64), INT8_C( 95), -INT8_C( 115), -INT8_C( 91), INT8_C( 38), INT8_C( 96), INT8_C( 13), INT8_C( 93), INT8_C( 14), INT8_C( 107), -INT8_C( 90), INT8_C( 99), INT8_C( 71) }, { -INT8_C( 110), -INT8_C( 111), -INT8_C( 31), -INT8_C( 4), INT8_C( 67), INT8_C( 93), -INT8_C( 1), INT8_MAX, -INT8_C( 105), -INT8_C( 33), INT8_C( 30), -INT8_C( 40), INT8_C( 11), -INT8_C( 124), -INT8_C( 80), INT8_C( 18), INT8_C( 105), INT8_C( 64), -INT8_C( 9), INT8_C( 116), -INT8_C( 97), -INT8_C( 95), INT8_C( 6), -INT8_C( 94), INT8_C( 64), -INT8_C( 46), INT8_C( 55), INT8_C( 98), INT8_C( 21), -INT8_C( 72), -INT8_C( 125), INT8_C( 46) } }, { { -INT8_C( 92), -INT8_C( 56), -INT8_C( 56), -INT8_C( 3), INT8_C( 106), -INT8_C( 125), -INT8_C( 24), -INT8_C( 120), -INT8_C( 42), -INT8_C( 1), INT8_C( 74), INT8_C( 47), INT8_C( 71), -INT8_C( 15), -INT8_C( 8), INT8_C( 57), INT8_C( 109), INT8_C( 68), -INT8_C( 69), INT8_C( 98), -INT8_C( 52), INT8_C( 55), INT8_C( 55), INT8_C( 101), -INT8_C( 58), INT8_C( 121), -INT8_C( 76), -INT8_C( 112), INT8_C( 50), -INT8_C( 36), -INT8_C( 36), INT8_C( 96) }, UINT32_C( 33993), { -INT8_C( 4), -INT8_C( 8), INT8_C( 72), INT8_C( 121), INT8_C( 22), -INT8_C( 61), -INT8_C( 95), INT8_C( 91), INT8_C( 13), -INT8_C( 124), INT8_C( 34), -INT8_C( 59), INT8_C( 96), INT8_C( 14), INT8_C( 20), INT8_C( 69), INT8_C( 18), INT8_C( 37), INT8_C( 87), -INT8_C( 61), -INT8_C( 47), -INT8_C( 17), INT8_C( 37), -INT8_C( 73), -INT8_C( 66), INT8_C( 43), -INT8_C( 63), INT8_C( 101), -INT8_C( 107), -INT8_C( 56), -INT8_C( 72), -INT8_C( 87) }, { INT8_C( 82), INT8_C( 121), -INT8_C( 76), INT8_C( 57), -INT8_C( 81), INT8_C( 56), -INT8_C( 72), INT8_C( 67), INT8_C( 58), INT8_C( 91), INT8_C( 66), -INT8_C( 59), -INT8_C( 79), -INT8_C( 29), -INT8_C( 96), INT8_MIN, INT8_C( 89), -INT8_C( 52), -INT8_C( 48), -INT8_C( 102), -INT8_C( 77), -INT8_C( 68), INT8_C( 9), INT8_C( 120), INT8_C( 28), -INT8_C( 42), INT8_C( 38), -INT8_C( 89), -INT8_C( 78), INT8_C( 109), INT8_C( 34), INT8_C( 43) }, { INT8_C( 78), -INT8_C( 56), -INT8_C( 56), INT8_MAX, INT8_C( 106), -INT8_C( 125), INT8_MIN, INT8_MAX, -INT8_C( 42), -INT8_C( 1), INT8_C( 100), INT8_C( 47), INT8_C( 71), -INT8_C( 15), -INT8_C( 8), -INT8_C( 59), INT8_C( 109), INT8_C( 68), -INT8_C( 69), INT8_C( 98), -INT8_C( 52), INT8_C( 55), INT8_C( 55), INT8_C( 101), -INT8_C( 58), INT8_C( 121), -INT8_C( 76), -INT8_C( 112), INT8_C( 50), -INT8_C( 36), -INT8_C( 36), INT8_C( 96) } }, { { -INT8_C( 119), INT8_C( 0), INT8_C( 65), -INT8_C( 53), -INT8_C( 18), INT8_C( 41), INT8_C( 12), INT8_C( 72), -INT8_C( 86), INT8_C( 39), INT8_C( 80), -INT8_C( 86), -INT8_C( 98), INT8_C( 71), -INT8_C( 10), INT8_C( 26), INT8_C( 2), INT8_C( 2), INT8_C( 81), -INT8_C( 15), -INT8_C( 127), INT8_C( 89), INT8_C( 120), -INT8_C( 108), INT8_C( 37), INT8_C( 34), INT8_C( 62), INT8_C( 1), INT8_C( 36), INT8_C( 86), INT8_C( 109), INT8_C( 112) }, UINT32_C( 9379), { INT8_C( 104), INT8_C( 79), INT8_C( 45), INT8_C( 74), -INT8_C( 6), -INT8_C( 53), INT8_C( 39), -INT8_C( 66), INT8_C( 44), -INT8_C( 68), INT8_C( 67), INT8_C( 50), -INT8_C( 91), -INT8_C( 88), INT8_C( 82), -INT8_C( 87), INT8_C( 16), INT8_C( 37), INT8_C( 87), -INT8_C( 66), -INT8_C( 77), INT8_C( 46), INT8_C( 25), INT8_C( 15), INT8_C( 45), -INT8_C( 13), -INT8_C( 39), -INT8_C( 113), INT8_C( 16), INT8_C( 13), -INT8_C( 73), -INT8_C( 102) }, { INT8_C( 96), INT8_C( 32), INT8_C( 87), -INT8_C( 68), INT8_C( 115), -INT8_C( 15), -INT8_C( 38), -INT8_C( 33), -INT8_C( 61), -INT8_C( 113), -INT8_C( 118), INT8_C( 12), INT8_C( 78), -INT8_C( 29), -INT8_C( 43), INT8_C( 48), -INT8_C( 55), INT8_C( 71), -INT8_C( 43), INT8_C( 21), -INT8_C( 98), -INT8_C( 50), INT8_C( 121), INT8_C( 116), INT8_C( 65), INT8_C( 79), INT8_MAX, INT8_C( 62), INT8_C( 2), INT8_C( 115), INT8_C( 10), INT8_C( 17) }, { INT8_MAX, INT8_C( 111), INT8_C( 65), -INT8_C( 53), -INT8_C( 18), -INT8_C( 68), INT8_C( 12), -INT8_C( 99), -INT8_C( 86), INT8_C( 39), -INT8_C( 51), -INT8_C( 86), -INT8_C( 98), -INT8_C( 117), -INT8_C( 10), INT8_C( 26), INT8_C( 2), INT8_C( 2), INT8_C( 81), -INT8_C( 15), -INT8_C( 127), INT8_C( 89), INT8_C( 120), -INT8_C( 108), INT8_C( 37), INT8_C( 34), INT8_C( 62), INT8_C( 1), INT8_C( 36), INT8_C( 86), INT8_C( 109), INT8_C( 112) } }, { { INT8_C( 106), -INT8_C( 38), INT8_C( 113), -INT8_C( 114), -INT8_C( 121), INT8_C( 92), -INT8_C( 30), INT8_C( 110), INT8_C( 56), -INT8_C( 93), -INT8_C( 61), INT8_C( 32), INT8_C( 8), INT8_C( 89), INT8_C( 67), -INT8_C( 108), -INT8_C( 52), INT8_C( 104), -INT8_C( 67), -INT8_C( 2), INT8_C( 105), INT8_C( 96), -INT8_C( 60), INT8_C( 83), -INT8_C( 123), -INT8_C( 16), INT8_C( 90), INT8_C( 53), INT8_C( 8), -INT8_C( 83), -INT8_C( 2), -INT8_C( 72) }, UINT32_C( 60777), { INT8_C( 87), INT8_C( 109), -INT8_C( 14), -INT8_C( 56), -INT8_C( 36), INT8_C( 50), INT8_C( 112), -INT8_C( 106), INT8_C( 115), INT8_C( 42), INT8_C( 109), INT8_C( 2), -INT8_C( 46), -INT8_C( 67), INT8_C( 26), INT8_C( 102), -INT8_C( 104), -INT8_C( 104), INT8_C( 5), -INT8_C( 69), -INT8_C( 12), -INT8_C( 38), -INT8_C( 100), -INT8_C( 59), INT8_C( 92), -INT8_C( 63), INT8_C( 99), INT8_C( 33), INT8_C( 56), -INT8_C( 66), -INT8_C( 59), -INT8_C( 33) }, { -INT8_C( 65), INT8_C( 98), INT8_C( 34), INT8_C( 105), INT8_C( 11), INT8_C( 40), -INT8_C( 94), INT8_C( 55), INT8_C( 93), INT8_C( 32), INT8_C( 16), -INT8_C( 123), -INT8_C( 53), INT8_C( 118), INT8_C( 76), -INT8_C( 7), -INT8_C( 126), -INT8_C( 96), INT8_C( 111), INT8_MIN, -INT8_C( 88), -INT8_C( 35), INT8_C( 107), INT8_C( 19), INT8_C( 54), -INT8_C( 31), INT8_C( 97), INT8_C( 15), -INT8_C( 66), INT8_C( 116), INT8_C( 96), -INT8_C( 1) }, { INT8_C( 22), -INT8_C( 38), INT8_C( 113), INT8_C( 49), -INT8_C( 121), INT8_C( 90), INT8_C( 18), INT8_C( 110), INT8_MAX, -INT8_C( 93), INT8_C( 125), -INT8_C( 121), INT8_C( 8), INT8_C( 51), INT8_C( 102), INT8_C( 95), -INT8_C( 52), INT8_C( 104), -INT8_C( 67), -INT8_C( 2), INT8_C( 105), INT8_C( 96), -INT8_C( 60), INT8_C( 83), -INT8_C( 123), -INT8_C( 16), INT8_C( 90), INT8_C( 53), INT8_C( 8), -INT8_C( 83), -INT8_C( 2), -INT8_C( 72) } }, { { INT8_C( 100), -INT8_C( 55), INT8_MIN, INT8_C( 72), INT8_C( 87), INT8_C( 65), INT8_C( 86), -INT8_C( 76), INT8_C( 80), INT8_C( 106), -INT8_C( 121), INT8_MAX, -INT8_C( 112), -INT8_C( 35), -INT8_C( 2), -INT8_C( 44), INT8_C( 49), -INT8_C( 82), -INT8_C( 22), -INT8_C( 13), -INT8_C( 17), -INT8_C( 8), -INT8_C( 14), INT8_C( 108), INT8_C( 126), INT8_C( 95), -INT8_C( 50), -INT8_C( 76), INT8_C( 116), -INT8_C( 6), INT8_C( 17), -INT8_C( 97) }, UINT32_C( 17935), { -INT8_C( 36), INT8_C( 107), INT8_C( 99), -INT8_C( 88), -INT8_C( 108), -INT8_C( 54), -INT8_C( 107), -INT8_C( 60), INT8_C( 33), INT8_C( 111), INT8_C( 96), INT8_C( 117), INT8_C( 43), -INT8_C( 46), -INT8_C( 19), -INT8_C( 103), -INT8_C( 107), -INT8_C( 51), INT8_C( 122), INT8_C( 90), INT8_C( 34), -INT8_C( 40), -INT8_C( 107), INT8_C( 117), INT8_C( 95), INT8_C( 30), -INT8_C( 30), INT8_C( 112), INT8_C( 86), -INT8_C( 46), -INT8_C( 2), INT8_C( 109) }, { INT8_C( 69), INT8_C( 50), INT8_C( 112), INT8_C( 36), -INT8_C( 68), INT8_C( 41), INT8_C( 61), INT8_C( 95), -INT8_C( 71), INT8_C( 65), -INT8_C( 90), -INT8_C( 46), INT8_C( 10), INT8_C( 89), -INT8_C( 35), -INT8_C( 68), -INT8_C( 54), INT8_C( 48), INT8_MIN, -INT8_C( 108), -INT8_C( 48), -INT8_C( 34), -INT8_C( 69), -INT8_C( 69), INT8_C( 0), INT8_C( 32), INT8_C( 8), INT8_C( 21), INT8_C( 10), INT8_C( 48), -INT8_C( 60), INT8_C( 55) }, { INT8_C( 33), INT8_MAX, INT8_MAX, -INT8_C( 52), INT8_C( 87), INT8_C( 65), INT8_C( 86), -INT8_C( 76), INT8_C( 80), INT8_MAX, INT8_C( 6), INT8_MAX, -INT8_C( 112), -INT8_C( 35), -INT8_C( 54), -INT8_C( 44), INT8_C( 49), -INT8_C( 82), -INT8_C( 22), -INT8_C( 13), -INT8_C( 17), -INT8_C( 8), -INT8_C( 14), INT8_C( 108), INT8_C( 126), INT8_C( 95), -INT8_C( 50), -INT8_C( 76), INT8_C( 116), -INT8_C( 6), INT8_C( 17), -INT8_C( 97) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i src = simde_x_mm256_loadu_epi8(test_vec[i].src); simde__m256i a = simde_x_mm256_loadu_epi8(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi8(test_vec[i].b); simde__m256i r = simde_mm256_mask_adds_epi8(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i8x32(r, simde_x_mm256_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm256_maskz_adds_epi8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask32 k; const int8_t a[32]; const int8_t b[32]; const int8_t r[32]; } test_vec[] = { { UINT32_C( 14766), { INT8_C( 55), INT8_C( 62), INT8_C( 61), -INT8_C( 18), INT8_C( 70), -INT8_C( 29), INT8_C( 26), -INT8_C( 17), INT8_C( 21), INT8_C( 30), -INT8_C( 94), -INT8_C( 9), INT8_C( 2), INT8_C( 115), -INT8_C( 89), -INT8_C( 32), INT8_C( 70), -INT8_C( 105), INT8_C( 70), INT8_C( 70), INT8_C( 61), INT8_C( 10), INT8_C( 9), INT8_C( 43), INT8_C( 68), -INT8_C( 98), -INT8_C( 100), -INT8_C( 127), -INT8_C( 29), INT8_C( 26), -INT8_C( 67), -INT8_C( 57) }, { INT8_C( 66), INT8_C( 16), INT8_C( 100), -INT8_C( 70), INT8_C( 89), -INT8_C( 69), -INT8_C( 68), -INT8_C( 110), INT8_C( 64), -INT8_C( 113), -INT8_C( 40), INT8_C( 48), INT8_C( 79), -INT8_C( 27), INT8_C( 25), INT8_C( 42), -INT8_C( 19), INT8_C( 67), INT8_C( 63), -INT8_C( 22), -INT8_C( 125), INT8_C( 59), -INT8_C( 74), INT8_C( 64), -INT8_C( 96), INT8_C( 66), INT8_C( 71), -INT8_C( 80), INT8_C( 23), -INT8_C( 102), INT8_C( 65), INT8_C( 126) }, { INT8_C( 0), INT8_C( 78), INT8_MAX, -INT8_C( 88), INT8_C( 0), -INT8_C( 98), INT8_C( 0), -INT8_C( 127), INT8_C( 85), INT8_C( 0), INT8_C( 0), INT8_C( 39), INT8_C( 81), INT8_C( 88), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { UINT32_C( 48711), { -INT8_C( 23), -INT8_C( 106), INT8_C( 46), -INT8_C( 92), -INT8_C( 53), -INT8_C( 100), -INT8_C( 19), -INT8_C( 86), -INT8_C( 3), INT8_C( 102), INT8_C( 27), INT8_C( 95), INT8_C( 80), INT8_C( 93), INT8_C( 104), -INT8_C( 65), -INT8_C( 117), -INT8_C( 64), INT8_C( 111), -INT8_C( 87), -INT8_C( 92), INT8_C( 32), INT8_C( 69), -INT8_C( 39), INT8_C( 50), -INT8_C( 78), INT8_C( 69), INT8_C( 71), -INT8_C( 10), -INT8_C( 95), INT8_C( 19), INT8_C( 78) }, { -INT8_C( 13), INT8_C( 64), INT8_C( 95), -INT8_C( 107), INT8_C( 32), INT8_C( 21), INT8_C( 116), -INT8_C( 18), -INT8_C( 61), INT8_C( 88), INT8_C( 111), -INT8_C( 60), -INT8_C( 5), INT8_C( 0), -INT8_C( 87), -INT8_C( 89), -INT8_C( 75), -INT8_C( 119), INT8_C( 100), INT8_C( 56), INT8_C( 112), -INT8_C( 120), -INT8_C( 115), -INT8_C( 17), -INT8_C( 19), INT8_C( 30), -INT8_C( 93), INT8_C( 18), INT8_C( 40), -INT8_C( 94), -INT8_C( 36), -INT8_C( 32) }, { -INT8_C( 36), -INT8_C( 42), INT8_MAX, INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 97), INT8_C( 0), INT8_C( 0), INT8_MAX, INT8_MAX, INT8_C( 35), INT8_C( 75), INT8_C( 93), INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { UINT32_C( 22629), { INT8_C( 0), INT8_C( 69), -INT8_C( 39), -INT8_C( 106), -INT8_C( 84), INT8_C( 19), INT8_C( 31), -INT8_C( 16), -INT8_C( 113), -INT8_C( 23), INT8_C( 40), INT8_C( 69), -INT8_C( 7), -INT8_C( 85), -INT8_C( 89), -INT8_C( 117), INT8_C( 70), INT8_C( 44), INT8_C( 115), INT8_C( 20), INT8_C( 16), INT8_C( 45), -INT8_C( 30), INT8_C( 36), INT8_C( 38), -INT8_C( 49), INT8_C( 88), INT8_C( 80), INT8_C( 78), INT8_C( 52), INT8_C( 70), INT8_C( 47) }, { -INT8_C( 37), -INT8_C( 51), -INT8_C( 78), -INT8_C( 33), INT8_C( 76), INT8_C( 24), INT8_C( 2), INT8_C( 44), INT8_C( 68), -INT8_C( 42), INT8_C( 1), -INT8_C( 82), -INT8_C( 7), -INT8_C( 101), -INT8_C( 121), -INT8_C( 5), INT8_C( 42), INT8_C( 31), -INT8_C( 59), -INT8_C( 111), INT8_MIN, -INT8_C( 74), -INT8_C( 80), -INT8_C( 43), -INT8_C( 92), -INT8_C( 5), INT8_C( 56), -INT8_C( 57), -INT8_C( 14), INT8_C( 35), -INT8_C( 94), -INT8_C( 10) }, { -INT8_C( 37), INT8_C( 0), -INT8_C( 117), INT8_C( 0), INT8_C( 0), INT8_C( 43), INT8_C( 33), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 13), -INT8_C( 14), INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { UINT32_C( 34647), { INT8_C( 22), -INT8_C( 101), -INT8_C( 1), INT8_C( 44), INT8_C( 87), INT8_C( 24), -INT8_C( 84), -INT8_C( 58), -INT8_C( 20), INT8_C( 81), -INT8_C( 60), -INT8_C( 24), INT8_C( 6), INT8_C( 70), -INT8_C( 48), INT8_C( 111), -INT8_C( 65), INT8_C( 60), -INT8_C( 59), INT8_C( 20), INT8_C( 6), -INT8_C( 65), -INT8_C( 88), INT8_C( 103), INT8_C( 43), INT8_C( 22), INT8_C( 95), INT8_C( 54), INT8_C( 91), INT8_C( 116), INT8_C( 118), INT8_C( 1) }, { -INT8_C( 64), -INT8_C( 72), -INT8_C( 36), -INT8_C( 45), -INT8_C( 62), -INT8_C( 121), INT8_C( 20), INT8_C( 98), INT8_C( 9), -INT8_C( 3), -INT8_C( 33), -INT8_C( 57), -INT8_C( 77), INT8_C( 54), INT8_C( 14), -INT8_C( 101), INT8_C( 57), -INT8_C( 123), -INT8_C( 71), INT8_C( 16), INT8_C( 102), INT8_C( 5), -INT8_C( 127), INT8_C( 46), INT8_C( 112), -INT8_C( 64), INT8_C( 3), INT8_C( 32), -INT8_C( 70), -INT8_C( 90), -INT8_C( 38), -INT8_C( 60) }, { -INT8_C( 42), INT8_MIN, -INT8_C( 37), INT8_C( 0), INT8_C( 25), INT8_C( 0), -INT8_C( 64), INT8_C( 0), -INT8_C( 11), INT8_C( 78), -INT8_C( 93), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 10), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { UINT32_C( 33178), { -INT8_C( 112), -INT8_C( 114), -INT8_C( 27), INT8_C( 55), INT8_C( 41), INT8_C( 10), INT8_C( 103), -INT8_C( 121), INT8_C( 66), -INT8_C( 119), INT8_C( 82), -INT8_C( 125), -INT8_C( 120), INT8_C( 5), INT8_C( 41), -INT8_C( 55), -INT8_C( 114), INT8_C( 124), -INT8_C( 115), INT8_C( 57), INT8_C( 20), -INT8_C( 84), -INT8_C( 7), INT8_C( 102), -INT8_C( 77), -INT8_C( 5), -INT8_C( 59), -INT8_C( 62), INT8_C( 124), INT8_C( 14), INT8_C( 57), INT8_C( 94) }, { -INT8_C( 126), INT8_C( 83), INT8_C( 7), INT8_C( 67), INT8_C( 33), INT8_C( 82), -INT8_C( 24), INT8_C( 96), -INT8_C( 112), -INT8_C( 43), INT8_C( 36), INT8_C( 36), -INT8_C( 99), -INT8_C( 23), INT8_C( 74), INT8_C( 27), -INT8_C( 72), INT8_C( 23), -INT8_C( 17), -INT8_C( 4), -INT8_C( 25), -INT8_C( 84), -INT8_C( 55), -INT8_C( 115), -INT8_C( 36), INT8_C( 16), INT8_C( 33), INT8_C( 13), INT8_C( 3), INT8_C( 29), -INT8_C( 53), INT8_MAX }, { INT8_C( 0), -INT8_C( 31), INT8_C( 0), INT8_C( 122), INT8_C( 74), INT8_C( 0), INT8_C( 0), -INT8_C( 25), -INT8_C( 46), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 28), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { UINT32_C( 23935), { INT8_C( 37), INT8_C( 27), INT8_C( 77), -INT8_C( 113), INT8_C( 4), -INT8_C( 36), -INT8_C( 1), INT8_C( 117), -INT8_C( 77), INT8_C( 42), INT8_C( 104), INT8_C( 109), -INT8_C( 9), INT8_C( 94), INT8_C( 23), INT8_C( 25), -INT8_C( 69), -INT8_C( 80), -INT8_C( 76), INT8_C( 22), -INT8_C( 44), -INT8_C( 48), INT8_C( 100), -INT8_C( 87), INT8_C( 73), INT8_C( 64), INT8_C( 74), INT8_C( 77), -INT8_C( 104), INT8_C( 109), INT8_C( 73), -INT8_C( 100) }, { INT8_C( 34), INT8_C( 89), INT8_C( 99), INT8_C( 37), INT8_C( 2), INT8_C( 58), INT8_C( 49), INT8_C( 10), INT8_C( 72), -INT8_C( 35), INT8_C( 120), INT8_C( 43), INT8_C( 84), -INT8_C( 59), -INT8_C( 72), INT8_C( 106), INT8_C( 33), INT8_C( 86), INT8_C( 69), -INT8_C( 42), -INT8_C( 14), INT8_C( 32), INT8_C( 113), -INT8_C( 101), -INT8_C( 108), -INT8_C( 77), INT8_C( 126), -INT8_C( 15), INT8_C( 63), INT8_C( 113), INT8_C( 108), -INT8_C( 117) }, { INT8_C( 71), INT8_C( 116), INT8_MAX, -INT8_C( 76), INT8_C( 6), INT8_C( 22), INT8_C( 48), INT8_C( 0), -INT8_C( 5), INT8_C( 0), INT8_MAX, INT8_MAX, INT8_C( 75), INT8_C( 0), -INT8_C( 49), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { UINT32_C( 30325), { -INT8_C( 38), INT8_C( 61), -INT8_C( 91), INT8_C( 83), -INT8_C( 95), -INT8_C( 17), INT8_C( 39), -INT8_C( 69), -INT8_C( 65), -INT8_C( 37), -INT8_C( 112), -INT8_C( 103), INT8_C( 54), -INT8_C( 77), INT8_C( 39), -INT8_C( 98), -INT8_C( 45), -INT8_C( 124), -INT8_C( 82), -INT8_C( 119), -INT8_C( 44), -INT8_C( 17), -INT8_C( 83), -INT8_C( 33), INT8_C( 89), -INT8_C( 29), INT8_C( 105), INT8_C( 39), -INT8_C( 32), -INT8_C( 38), INT8_C( 98), -INT8_C( 47) }, { -INT8_C( 109), INT8_C( 105), -INT8_C( 21), INT8_C( 27), INT8_C( 2), -INT8_C( 112), -INT8_C( 93), INT8_C( 15), -INT8_C( 75), -INT8_C( 2), INT8_C( 65), -INT8_C( 112), -INT8_C( 6), INT8_C( 59), -INT8_C( 94), INT8_C( 123), INT8_C( 35), -INT8_C( 22), -INT8_C( 85), INT8_C( 118), INT8_C( 60), INT8_C( 28), INT8_C( 72), INT8_C( 106), -INT8_C( 104), -INT8_C( 83), -INT8_C( 41), -INT8_C( 31), INT8_C( 80), INT8_C( 85), -INT8_C( 69), INT8_C( 37) }, { INT8_MIN, INT8_C( 0), -INT8_C( 112), INT8_C( 0), -INT8_C( 93), INT8_MIN, -INT8_C( 54), INT8_C( 0), INT8_C( 0), -INT8_C( 39), -INT8_C( 47), INT8_C( 0), INT8_C( 48), -INT8_C( 18), -INT8_C( 55), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { UINT32_C( 38151), { INT8_C( 24), INT8_C( 22), INT8_C( 106), INT8_C( 19), -INT8_C( 11), INT8_C( 73), -INT8_C( 76), -INT8_C( 100), -INT8_C( 66), -INT8_C( 18), INT8_C( 0), INT8_C( 116), -INT8_C( 99), INT8_C( 9), -INT8_C( 107), -INT8_C( 41), INT8_C( 2), -INT8_C( 51), -INT8_C( 119), -INT8_C( 50), INT8_C( 52), -INT8_C( 79), -INT8_C( 112), -INT8_C( 105), INT8_C( 121), -INT8_C( 1), -INT8_C( 33), -INT8_C( 59), -INT8_C( 40), INT8_C( 113), -INT8_C( 108), -INT8_C( 42) }, { -INT8_C( 111), -INT8_C( 9), -INT8_C( 21), INT8_MIN, INT8_C( 68), -INT8_C( 45), -INT8_C( 26), INT8_C( 55), INT8_C( 54), -INT8_C( 115), -INT8_C( 52), -INT8_C( 115), INT8_C( 11), -INT8_C( 27), INT8_C( 14), INT8_C( 69), INT8_C( 105), -INT8_C( 79), -INT8_C( 62), -INT8_C( 2), INT8_C( 99), INT8_C( 81), -INT8_C( 127), INT8_C( 0), -INT8_C( 25), INT8_C( 66), INT8_C( 49), -INT8_C( 21), INT8_C( 3), INT8_C( 110), -INT8_C( 88), -INT8_C( 74) }, { -INT8_C( 87), INT8_C( 13), INT8_C( 85), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 12), INT8_C( 0), -INT8_C( 52), INT8_C( 0), -INT8_C( 88), INT8_C( 0), INT8_C( 0), INT8_C( 28), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi8(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi8(test_vec[i].b); simde__m256i r = simde_mm256_maskz_adds_epi8(test_vec[i].k, a, b); simde_test_x86_assert_equal_i8x32(r, simde_x_mm256_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm256_mask_adds_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t src[16]; const simde__mmask16 k; const int16_t a[16]; const int16_t b[16]; const int16_t r[16]; } test_vec[] = { { { -INT16_C( 13954), -INT16_C( 13566), INT16_C( 13346), -INT16_C( 19324), INT16_C( 20767), -INT16_C( 25591), -INT16_C( 15670), INT16_C( 22319), -INT16_C( 15735), INT16_C( 15143), INT16_C( 21746), -INT16_C( 32557), INT16_C( 16862), INT16_C( 27014), -INT16_C( 1471), -INT16_C( 16462) }, UINT16_C(46276), { -INT16_C( 6518), INT16_C( 3817), INT16_C( 2202), -INT16_C( 23713), INT16_C( 10660), -INT16_C( 11419), -INT16_C( 4480), -INT16_C( 22379), -INT16_C( 30679), -INT16_C( 516), -INT16_C( 9720), -INT16_C( 28866), INT16_C( 32579), -INT16_C( 2679), INT16_C( 19774), -INT16_C( 13910) }, { -INT16_C( 27853), -INT16_C( 12841), INT16_C( 14235), INT16_C( 16240), -INT16_C( 10912), -INT16_C( 7917), -INT16_C( 22332), -INT16_C( 4727), -INT16_C( 31440), INT16_C( 14826), INT16_C( 10591), -INT16_C( 23864), INT16_C( 20904), -INT16_C( 6249), INT16_C( 16799), -INT16_C( 11600) }, { -INT16_C( 13954), -INT16_C( 13566), INT16_C( 16437), -INT16_C( 19324), INT16_C( 20767), -INT16_C( 25591), -INT16_C( 26812), -INT16_C( 27106), -INT16_C( 15735), INT16_C( 15143), INT16_C( 871), -INT16_C( 32557), INT16_MAX, -INT16_C( 8928), -INT16_C( 1471), -INT16_C( 25510) } }, { { -INT16_C( 30764), INT16_C( 28576), INT16_C( 4286), INT16_C( 8111), -INT16_C( 15642), -INT16_C( 22016), -INT16_C( 30358), -INT16_C( 25705), -INT16_C( 32242), INT16_C( 28116), -INT16_C( 25429), INT16_C( 21263), -INT16_C( 22803), -INT16_C( 29638), -INT16_C( 5400), -INT16_C( 17313) }, UINT16_C(65394), { INT16_C( 12332), -INT16_C( 9457), -INT16_C( 2737), INT16_C( 20381), INT16_C( 1951), INT16_C( 14296), -INT16_C( 6494), INT16_C( 30393), INT16_C( 25683), INT16_C( 25106), INT16_C( 183), -INT16_C( 3575), -INT16_C( 3700), -INT16_C( 5156), INT16_C( 20141), -INT16_C( 9750) }, { -INT16_C( 1409), -INT16_C( 12620), INT16_C( 20975), -INT16_C( 28898), -INT16_C( 2471), -INT16_C( 1082), INT16_C( 32733), INT16_C( 12402), -INT16_C( 31517), -INT16_C( 25965), -INT16_C( 25468), INT16_C( 4492), INT16_C( 27021), INT16_C( 15100), -INT16_C( 6217), INT16_C( 13844) }, { -INT16_C( 30764), -INT16_C( 22077), INT16_C( 4286), INT16_C( 8111), -INT16_C( 520), INT16_C( 13214), INT16_C( 26239), -INT16_C( 25705), -INT16_C( 5834), -INT16_C( 859), -INT16_C( 25285), INT16_C( 917), INT16_C( 23321), INT16_C( 9944), INT16_C( 13924), INT16_C( 4094) } }, { { -INT16_C( 14111), -INT16_C( 12283), INT16_C( 8986), INT16_C( 29535), INT16_C( 9497), -INT16_C( 2450), -INT16_C( 8028), -INT16_C( 30937), -INT16_C( 17819), -INT16_C( 5854), -INT16_C( 20906), -INT16_C( 7174), -INT16_C( 2281), -INT16_C( 12515), INT16_C( 12766), -INT16_C( 16635) }, UINT16_C( 2810), { INT16_C( 5263), -INT16_C( 4307), INT16_C( 18311), -INT16_C( 2796), -INT16_C( 18115), INT16_C( 25814), INT16_C( 15168), INT16_C( 25118), INT16_C( 29732), INT16_C( 7953), INT16_C( 10327), INT16_C( 29974), -INT16_C( 2825), -INT16_C( 602), -INT16_C( 24397), INT16_C( 16903) }, { INT16_C( 13748), INT16_C( 15153), INT16_C( 18044), -INT16_C( 18127), INT16_C( 2047), INT16_C( 16158), INT16_C( 15426), INT16_C( 26274), -INT16_C( 19535), INT16_C( 2181), -INT16_C( 25637), -INT16_C( 11395), INT16_C( 9359), INT16_C( 17104), -INT16_C( 10300), INT16_C( 31109) }, { -INT16_C( 14111), INT16_C( 10846), INT16_C( 8986), -INT16_C( 20923), -INT16_C( 16068), INT16_MAX, INT16_C( 30594), INT16_MAX, -INT16_C( 17819), INT16_C( 10134), -INT16_C( 20906), INT16_C( 18579), -INT16_C( 2281), -INT16_C( 12515), INT16_C( 12766), -INT16_C( 16635) } }, { { -INT16_C( 18932), -INT16_C( 30540), -INT16_C( 6660), -INT16_C( 1214), INT16_C( 24812), INT16_C( 11835), -INT16_C( 8804), INT16_C( 19861), INT16_C( 6800), INT16_C( 27478), -INT16_C( 11338), INT16_C( 17726), INT16_C( 3831), -INT16_C( 17272), INT16_C( 3558), -INT16_C( 3531) }, UINT16_C(59843), { -INT16_C( 16261), -INT16_C( 16945), -INT16_C( 17477), -INT16_C( 2531), -INT16_C( 17942), INT16_C( 32723), INT16_C( 25351), INT16_C( 23961), INT16_C( 20431), INT16_C( 3376), INT16_C( 10389), INT16_C( 7452), INT16_C( 740), INT16_C( 6442), -INT16_C( 4620), INT16_C( 28418) }, { -INT16_C( 11859), INT16_C( 26924), INT16_C( 18829), INT16_C( 30559), INT16_C( 13059), INT16_C( 2806), -INT16_C( 28778), INT16_C( 25959), -INT16_C( 26657), INT16_C( 29811), -INT16_C( 28737), -INT16_C( 23663), -INT16_C( 17519), -INT16_C( 31300), -INT16_C( 16472), INT16_C( 22261) }, { -INT16_C( 28120), INT16_C( 9979), -INT16_C( 6660), -INT16_C( 1214), INT16_C( 24812), INT16_C( 11835), -INT16_C( 3427), INT16_MAX, -INT16_C( 6226), INT16_C( 27478), -INT16_C( 11338), -INT16_C( 16211), INT16_C( 3831), -INT16_C( 24858), -INT16_C( 21092), INT16_MAX } }, { { INT16_C( 8592), INT16_C( 7615), INT16_C( 7787), INT16_C( 28308), -INT16_C( 30127), -INT16_C( 6024), -INT16_C( 8422), -INT16_C( 1715), -INT16_C( 16266), INT16_C( 13933), -INT16_C( 433), -INT16_C( 7975), -INT16_C( 26951), INT16_C( 24934), INT16_C( 23381), -INT16_C( 6729) }, UINT16_C(30332), { -INT16_C( 6397), -INT16_C( 26731), -INT16_C( 6571), -INT16_C( 13022), INT16_C( 15566), INT16_C( 7340), INT16_C( 9013), -INT16_C( 23844), INT16_C( 11353), INT16_C( 12960), INT16_C( 22796), INT16_C( 29384), INT16_C( 7610), INT16_C( 29389), INT16_C( 18947), INT16_C( 1768) }, { INT16_C( 32049), -INT16_C( 30819), -INT16_C( 16540), INT16_C( 12884), INT16_C( 507), INT16_C( 12366), INT16_C( 11044), INT16_C( 32210), INT16_C( 29271), INT16_C( 25519), INT16_C( 30923), -INT16_C( 31018), -INT16_C( 23659), -INT16_C( 26376), -INT16_C( 7955), INT16_C( 8094) }, { INT16_C( 8592), INT16_C( 7615), -INT16_C( 23111), -INT16_C( 138), INT16_C( 16073), INT16_C( 19706), INT16_C( 20057), -INT16_C( 1715), -INT16_C( 16266), INT16_MAX, INT16_MAX, -INT16_C( 7975), -INT16_C( 16049), INT16_C( 3013), INT16_C( 10992), -INT16_C( 6729) } }, { { INT16_C( 15454), -INT16_C( 15706), -INT16_C( 1285), -INT16_C( 2060), INT16_C( 17403), INT16_C( 7975), -INT16_C( 1426), -INT16_C( 14948), INT16_C( 19564), INT16_C( 14376), -INT16_C( 316), INT16_C( 22974), -INT16_C( 18782), -INT16_C( 28686), -INT16_C( 28522), -INT16_C( 2898) }, UINT16_C(21708), { -INT16_C( 14154), -INT16_C( 21681), INT16_C( 19135), -INT16_C( 6418), INT16_C( 23658), INT16_C( 1760), INT16_C( 19745), INT16_C( 18770), INT16_C( 5765), INT16_C( 17224), -INT16_C( 5520), INT16_C( 25337), -INT16_C( 28807), INT16_C( 10482), -INT16_C( 16508), INT16_C( 14972) }, { -INT16_C( 13433), INT16_C( 18149), -INT16_C( 11498), -INT16_C( 32724), INT16_C( 3375), INT16_C( 20614), -INT16_C( 9894), -INT16_C( 8294), -INT16_C( 7441), INT16_C( 24354), INT16_C( 7116), INT16_C( 17857), -INT16_C( 19286), INT16_C( 11885), -INT16_C( 5517), -INT16_C( 1431) }, { INT16_C( 15454), -INT16_C( 15706), INT16_C( 7637), INT16_MIN, INT16_C( 17403), INT16_C( 7975), INT16_C( 9851), INT16_C( 10476), INT16_C( 19564), INT16_C( 14376), INT16_C( 1596), INT16_C( 22974), INT16_MIN, -INT16_C( 28686), -INT16_C( 22025), -INT16_C( 2898) } }, { { INT16_C( 20149), -INT16_C( 13504), INT16_C( 27682), INT16_C( 20811), -INT16_C( 11655), -INT16_C( 11358), INT16_C( 15531), -INT16_C( 25934), -INT16_C( 11234), -INT16_C( 5382), -INT16_C( 17425), -INT16_C( 26065), -INT16_C( 25233), -INT16_C( 7480), INT16_C( 12679), INT16_C( 15580) }, UINT16_C( 7296), { -INT16_C( 24056), INT16_C( 21385), INT16_C( 755), -INT16_C( 27355), -INT16_C( 12074), -INT16_C( 30511), -INT16_C( 4245), INT16_C( 25949), INT16_C( 19673), INT16_C( 2336), -INT16_C( 28442), -INT16_C( 20570), INT16_C( 11634), INT16_C( 20448), INT16_C( 24681), INT16_C( 29035) }, { -INT16_C( 3070), -INT16_C( 2363), -INT16_C( 5385), -INT16_C( 12917), INT16_C( 23995), INT16_C( 9813), -INT16_C( 19892), INT16_C( 9867), -INT16_C( 21505), -INT16_C( 6865), -INT16_C( 10949), -INT16_C( 20844), INT16_C( 29954), INT16_C( 27645), INT16_C( 26837), -INT16_C( 10019) }, { INT16_C( 20149), -INT16_C( 13504), INT16_C( 27682), INT16_C( 20811), -INT16_C( 11655), -INT16_C( 11358), INT16_C( 15531), INT16_MAX, -INT16_C( 11234), -INT16_C( 5382), INT16_MIN, INT16_MIN, INT16_MAX, -INT16_C( 7480), INT16_C( 12679), INT16_C( 15580) } }, { { -INT16_C( 23971), INT16_C( 21710), INT16_C( 22924), INT16_C( 18209), INT16_C( 30390), INT16_C( 877), -INT16_C( 2007), INT16_C( 10281), INT16_C( 22692), -INT16_C( 8435), -INT16_C( 24019), INT16_C( 12173), -INT16_C( 30185), -INT16_C( 4966), INT16_C( 30707), INT16_C( 20676) }, UINT16_C(37401), { -INT16_C( 22876), -INT16_C( 14868), -INT16_C( 23827), INT16_C( 23355), INT16_C( 25765), -INT16_C( 12717), -INT16_C( 2164), -INT16_C( 26074), INT16_C( 21463), INT16_C( 25660), INT16_C( 21378), INT16_C( 7663), -INT16_C( 7617), INT16_C( 1172), -INT16_C( 20942), -INT16_C( 10602) }, { -INT16_C( 32172), INT16_C( 16795), -INT16_C( 10715), -INT16_C( 13668), -INT16_C( 4037), -INT16_C( 14439), -INT16_C( 16409), -INT16_C( 16799), -INT16_C( 25325), -INT16_C( 27357), INT16_C( 4848), INT16_C( 12466), INT16_C( 18420), INT16_C( 9780), -INT16_C( 13579), INT16_C( 18940) }, { INT16_MIN, INT16_C( 21710), INT16_C( 22924), INT16_C( 9687), INT16_C( 21728), INT16_C( 877), -INT16_C( 2007), INT16_C( 10281), INT16_C( 22692), -INT16_C( 1697), -INT16_C( 24019), INT16_C( 12173), INT16_C( 10803), -INT16_C( 4966), INT16_C( 30707), INT16_C( 8338) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i src = simde_x_mm256_loadu_epi16(test_vec[i].src); simde__m256i a = simde_x_mm256_loadu_epi16(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi16(test_vec[i].b); simde__m256i r = simde_mm256_mask_adds_epi16(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x16(r, simde_x_mm256_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm256_maskz_adds_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask16 k; const int16_t a[16]; const int16_t b[16]; const int16_t r[16]; } test_vec[] = { { UINT16_C(31891), { -INT16_C( 23134), -INT16_C( 31842), INT16_C( 15981), INT16_C( 19589), -INT16_C( 21450), -INT16_C( 21374), -INT16_C( 7737), -INT16_C( 739), -INT16_C( 31345), INT16_C( 27262), INT16_C( 24141), -INT16_C( 681), INT16_C( 10726), -INT16_C( 29885), -INT16_C( 10531), INT16_C( 32519) }, { -INT16_C( 23173), -INT16_C( 5886), -INT16_C( 30749), INT16_C( 6453), -INT16_C( 18636), -INT16_C( 1082), -INT16_C( 7272), INT16_C( 10232), INT16_C( 30312), -INT16_C( 19055), -INT16_C( 5932), -INT16_C( 17486), -INT16_C( 2799), -INT16_C( 4538), INT16_C( 19915), INT16_C( 18286) }, { INT16_MIN, INT16_MIN, INT16_C( 0), INT16_C( 0), INT16_MIN, INT16_C( 0), INT16_C( 0), INT16_C( 9493), INT16_C( 0), INT16_C( 0), INT16_C( 18209), -INT16_C( 18167), INT16_C( 7927), INT16_MIN, INT16_C( 9384), INT16_C( 0) } }, { UINT16_C(28914), { -INT16_C( 10960), INT16_C( 26104), INT16_C( 11503), -INT16_C( 19172), -INT16_C( 19417), INT16_C( 8344), INT16_C( 475), INT16_C( 27798), INT16_C( 27574), INT16_C( 26964), INT16_C( 25894), INT16_C( 27742), INT16_C( 10836), -INT16_C( 15687), -INT16_C( 21647), -INT16_C( 24270) }, { INT16_C( 10880), INT16_C( 28422), INT16_C( 8790), INT16_C( 32292), -INT16_C( 16938), -INT16_C( 20066), INT16_C( 13502), INT16_C( 29725), INT16_C( 29087), -INT16_C( 14883), INT16_C( 15574), INT16_C( 10801), -INT16_C( 5530), -INT16_C( 10260), INT16_C( 8085), INT16_C( 5752) }, { INT16_C( 0), INT16_MAX, INT16_C( 0), INT16_C( 0), INT16_MIN, -INT16_C( 11722), INT16_C( 13977), INT16_MAX, INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 5306), -INT16_C( 25947), -INT16_C( 13562), INT16_C( 0) } }, { UINT16_C(32329), { -INT16_C( 24443), -INT16_C( 21856), INT16_C( 30238), -INT16_C( 17305), INT16_C( 9511), INT16_C( 17648), -INT16_C( 28519), INT16_C( 30645), -INT16_C( 29867), -INT16_C( 30797), INT16_C( 6582), -INT16_C( 23951), INT16_C( 2032), INT16_C( 26817), INT16_C( 2845), -INT16_C( 23834) }, { -INT16_C( 31061), -INT16_C( 14004), -INT16_C( 19460), INT16_C( 9093), INT16_C( 30168), INT16_C( 29287), INT16_C( 7173), INT16_C( 23529), -INT16_C( 25433), INT16_C( 24034), INT16_C( 21429), -INT16_C( 23296), -INT16_C( 16038), INT16_C( 30477), -INT16_C( 3124), INT16_C( 30490) }, { INT16_MIN, INT16_C( 0), INT16_C( 0), -INT16_C( 8212), INT16_C( 0), INT16_C( 0), -INT16_C( 21346), INT16_C( 0), INT16_C( 0), -INT16_C( 6763), INT16_C( 28011), INT16_MIN, -INT16_C( 14006), INT16_MAX, -INT16_C( 279), INT16_C( 0) } }, { UINT16_C(26233), { INT16_C( 30016), -INT16_C( 15078), -INT16_C( 3432), -INT16_C( 197), INT16_C( 16484), INT16_C( 19739), -INT16_C( 15717), INT16_C( 32233), -INT16_C( 25056), INT16_C( 8401), INT16_C( 11075), INT16_C( 20705), -INT16_C( 20829), -INT16_C( 17085), -INT16_C( 17371), INT16_C( 26147) }, { INT16_C( 15665), -INT16_C( 14037), INT16_C( 26160), -INT16_C( 27448), -INT16_C( 7257), INT16_C( 17122), -INT16_C( 13402), -INT16_C( 14656), -INT16_C( 28310), -INT16_C( 21018), -INT16_C( 14404), INT16_C( 24574), INT16_C( 16757), -INT16_C( 25828), INT16_C( 16638), INT16_C( 12033) }, { INT16_MAX, INT16_C( 0), INT16_C( 0), -INT16_C( 27645), INT16_C( 9227), INT16_MAX, -INT16_C( 29119), INT16_C( 0), INT16_C( 0), -INT16_C( 12617), -INT16_C( 3329), INT16_C( 0), INT16_C( 0), INT16_MIN, -INT16_C( 733), INT16_C( 0) } }, { UINT16_C(11389), { -INT16_C( 20999), -INT16_C( 15981), INT16_C( 14914), INT16_C( 9381), INT16_C( 19324), INT16_C( 15599), INT16_C( 22801), -INT16_C( 2099), -INT16_C( 30201), INT16_C( 1470), INT16_C( 13545), INT16_C( 1606), INT16_C( 17615), -INT16_C( 12218), -INT16_C( 15500), INT16_C( 28156) }, { -INT16_C( 28815), -INT16_C( 19666), -INT16_C( 11319), INT16_C( 18135), -INT16_C( 14818), INT16_C( 12162), INT16_C( 20512), INT16_C( 10022), -INT16_C( 6694), -INT16_C( 15572), INT16_C( 29209), -INT16_C( 5943), INT16_C( 4023), INT16_C( 11192), -INT16_C( 19245), INT16_C( 17560) }, { INT16_MIN, INT16_C( 0), INT16_C( 3595), INT16_C( 27516), INT16_C( 4506), INT16_C( 27761), INT16_MAX, INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_MAX, -INT16_C( 4337), INT16_C( 0), -INT16_C( 1026), INT16_C( 0), INT16_C( 0) } }, { UINT16_C(50756), { INT16_C( 3575), -INT16_C( 12646), -INT16_C( 18349), -INT16_C( 10604), -INT16_C( 19224), INT16_C( 3622), INT16_C( 219), INT16_C( 2035), INT16_C( 3267), -INT16_C( 29318), INT16_C( 12788), -INT16_C( 21348), INT16_C( 28508), -INT16_C( 2975), -INT16_C( 23117), -INT16_C( 21830) }, { INT16_C( 21682), INT16_C( 1656), INT16_C( 3341), -INT16_C( 2596), INT16_C( 705), -INT16_C( 25341), -INT16_C( 2302), -INT16_C( 14940), INT16_C( 7683), -INT16_C( 1966), -INT16_C( 4273), -INT16_C( 21596), INT16_C( 1374), INT16_C( 4767), INT16_C( 23210), INT16_C( 23996) }, { INT16_C( 0), INT16_C( 0), -INT16_C( 15008), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 2083), INT16_C( 0), INT16_C( 0), -INT16_C( 31284), INT16_C( 8515), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 93), INT16_C( 2166) } }, { UINT16_C(13742), { -INT16_C( 17565), INT16_C( 16194), INT16_C( 944), -INT16_C( 19391), INT16_C( 17312), INT16_C( 17835), -INT16_C( 20984), INT16_C( 23395), -INT16_C( 19546), INT16_C( 19274), -INT16_C( 22434), -INT16_C( 432), -INT16_C( 1094), INT16_C( 30552), INT16_C( 1624), -INT16_C( 17492) }, { -INT16_C( 4414), INT16_C( 29434), INT16_C( 15345), -INT16_C( 28122), -INT16_C( 11906), -INT16_C( 31017), INT16_C( 14976), INT16_C( 9953), INT16_C( 11245), INT16_C( 19569), -INT16_C( 15660), -INT16_C( 29110), -INT16_C( 23875), INT16_C( 5381), -INT16_C( 20056), INT16_C( 27344) }, { INT16_C( 0), INT16_MAX, INT16_C( 16289), INT16_MIN, INT16_C( 0), -INT16_C( 13182), INT16_C( 0), INT16_MAX, -INT16_C( 8301), INT16_C( 0), INT16_MIN, INT16_C( 0), -INT16_C( 24969), INT16_MAX, INT16_C( 0), INT16_C( 0) } }, { UINT16_C(51871), { -INT16_C( 28195), INT16_C( 773), -INT16_C( 31965), -INT16_C( 1323), INT16_C( 21769), -INT16_C( 5324), INT16_C( 8827), -INT16_C( 4842), -INT16_C( 5522), -INT16_C( 18257), INT16_C( 27769), INT16_C( 32346), INT16_C( 641), INT16_C( 20784), -INT16_C( 12435), INT16_C( 18971) }, { INT16_C( 8288), -INT16_C( 31923), INT16_C( 8867), -INT16_C( 21379), -INT16_C( 19849), -INT16_C( 3177), -INT16_C( 20780), INT16_C( 17120), -INT16_C( 28776), INT16_C( 4602), INT16_C( 21755), INT16_C( 31888), -INT16_C( 16298), -INT16_C( 15411), -INT16_C( 6001), -INT16_C( 4083) }, { -INT16_C( 19907), -INT16_C( 31150), -INT16_C( 23098), -INT16_C( 22702), INT16_C( 1920), INT16_C( 0), INT16_C( 0), INT16_C( 12278), INT16_C( 0), -INT16_C( 13655), INT16_C( 0), INT16_MAX, INT16_C( 0), INT16_C( 0), -INT16_C( 18436), INT16_C( 14888) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi16(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi16(test_vec[i].b); simde__m256i r = simde_mm256_maskz_adds_epi16(test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x16(r, simde_x_mm256_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_adds_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi8(INT8_C( -30), INT8_C( -16), INT8_C( -64), INT8_C( 113), INT8_C( 51), INT8_C(-115), INT8_C( 19), INT8_C( -80), INT8_C(-110), INT8_C( -62), INT8_C( -91), INT8_C( 8), INT8_C( 11), INT8_C( -49), INT8_C( -43), INT8_C(-101), INT8_C( 16), INT8_C( 61), INT8_C( -2), INT8_C(-100), INT8_C( 1), INT8_C( -80), INT8_C( 127), INT8_C(-105), INT8_C( -74), INT8_C(-126), INT8_C( 122), INT8_C( 30), INT8_C( -73), INT8_C( 48), INT8_C( -33), INT8_C( 2), INT8_C( -82), INT8_C( -10), INT8_C( 33), INT8_C( 20), INT8_C(-120), INT8_C(-120), INT8_C( -46), INT8_C( -54), INT8_C( -43), INT8_C( 63), INT8_C( 81), INT8_C( 103), INT8_C( 118), INT8_C( -84), INT8_C( 76), INT8_C( 47), INT8_C( 101), INT8_C( -83), INT8_C( 15), INT8_C( 23), INT8_C( 36), INT8_C(-116), INT8_C( -88), INT8_C( -45), INT8_C( -6), INT8_C( 86), INT8_C( 20), INT8_C( 39), INT8_C( 117), INT8_C( 80), INT8_C( 97), INT8_C( 41)), simde_mm512_set_epi8(INT8_C( 122), INT8_C( 43), INT8_C( 38), INT8_C( 97), INT8_C( 38), INT8_C( 68), INT8_C( 88), INT8_C( 56), INT8_C( -39), INT8_C( 61), INT8_C( -57), INT8_C( -66), INT8_C(-121), INT8_C( 29), INT8_C( 110), INT8_C( 127), INT8_C(-115), INT8_C( 16), INT8_C( -46), INT8_C( 65), INT8_C( -36), INT8_C( 26), INT8_C( 12), INT8_C( -79), INT8_C( -66), INT8_C( -28), INT8_C( -67), INT8_C( -79), INT8_C( -60), INT8_C( -13), INT8_C( 74), INT8_C( 25), INT8_C( 46), INT8_C( 8), INT8_C( 70), INT8_C( 106), INT8_C(-119), INT8_C( 0), INT8_C( -96), INT8_C( -51), INT8_C( -50), INT8_C( 93), INT8_C( 92), INT8_C( 65), INT8_C( -96), INT8_C(-126), INT8_C( 4), INT8_C( 9), INT8_C( 26), INT8_C(-119), INT8_C( 93), INT8_C( 15), INT8_C(-115), INT8_C( 77), INT8_C( 125), INT8_C( -62), INT8_C( 77), INT8_C( -22), INT8_C( 101), INT8_C( 83), INT8_C( 45), INT8_C( -68), INT8_C( 97), INT8_C( -19)), simde_mm512_set_epi8(INT8_C( 92), INT8_C( 27), INT8_C( -26), INT8_C( 127), INT8_C( 89), INT8_C( -47), INT8_C( 107), INT8_C( -24), INT8_C(-128), INT8_C( -1), INT8_C(-128), INT8_C( -58), INT8_C(-110), INT8_C( -20), INT8_C( 67), INT8_C( 26), INT8_C( -99), INT8_C( 77), INT8_C( -48), INT8_C( -35), INT8_C( -35), INT8_C( -54), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 55), INT8_C( -49), INT8_C(-128), INT8_C( 35), INT8_C( 41), INT8_C( 27), INT8_C( -36), INT8_C( -2), INT8_C( 103), INT8_C( 126), INT8_C(-128), INT8_C(-120), INT8_C(-128), INT8_C(-105), INT8_C( -93), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 22), INT8_C(-128), INT8_C( 80), INT8_C( 56), INT8_C( 127), INT8_C(-128), INT8_C( 108), INT8_C( 38), INT8_C( -79), INT8_C( -39), INT8_C( 37), INT8_C(-107), INT8_C( 71), INT8_C( 64), INT8_C( 121), INT8_C( 122), INT8_C( 127), INT8_C( 12), INT8_C( 127), INT8_C( 22)) }, { simde_mm512_set_epi8(INT8_C( 60), INT8_C( 96), INT8_C(-106), INT8_C(-115), INT8_C( 31), INT8_C(-108), INT8_C( 15), INT8_C( 63), INT8_C( -77), INT8_C( 125), INT8_C( 41), INT8_C( -80), INT8_C( 36), INT8_C(-118), INT8_C(-116), INT8_C( -63), INT8_C( -44), INT8_C( 70), INT8_C( 99), INT8_C( 48), INT8_C( 67), INT8_C( 43), INT8_C( 83), INT8_C( 53), INT8_C( 60), INT8_C( 57), INT8_C(-118), INT8_C( 55), INT8_C( -62), INT8_C( -42), INT8_C( 19), INT8_C( 52), INT8_C( -37), INT8_C( -32), INT8_C( 33), INT8_C(-114), INT8_C( -96), INT8_C( 53), INT8_C( 18), INT8_C( -37), INT8_C( 6), INT8_C( -87), INT8_C( -11), INT8_C( -10), INT8_C( -4), INT8_C( 84), INT8_C( 104), INT8_C( 55), INT8_C( 92), INT8_C( 117), INT8_C( 16), INT8_C( 115), INT8_C( 83), INT8_C( 80), INT8_C(-125), INT8_C( 64), INT8_C( 74), INT8_C( -9), INT8_C( -62), INT8_C( 67), INT8_C( 102), INT8_C( 66), INT8_C( -86), INT8_C(-112)), simde_mm512_set_epi8(INT8_C( -97), INT8_C( 52), INT8_C( -91), INT8_C( 7), INT8_C( -1), INT8_C( -19), INT8_C( 38), INT8_C( 15), INT8_C( 76), INT8_C( 63), INT8_C( 10), INT8_C( 18), INT8_C( 54), INT8_C( 73), INT8_C( 15), INT8_C( -54), INT8_C(-114), INT8_C( 63), INT8_C( 44), INT8_C( 4), INT8_C( 41), INT8_C( 90), INT8_C( -78), INT8_C( -67), INT8_C( 95), INT8_C( -14), INT8_C( -10), INT8_C( -44), INT8_C( 53), INT8_C( 102), INT8_C( 127), INT8_C( 120), INT8_C( -89), INT8_C( 10), INT8_C( -36), INT8_C( -50), INT8_C( 118), INT8_C(-106), INT8_C( -52), INT8_C( 76), INT8_C( 66), INT8_C( -36), INT8_C( -79), INT8_C( 91), INT8_C( 14), INT8_C( 103), INT8_C( -73), INT8_C(-123), INT8_C( 68), INT8_C( -54), INT8_C(-103), INT8_C( 38), INT8_C( 58), INT8_C( 0), INT8_C( -62), INT8_C( -76), INT8_C( 93), INT8_C( 119), INT8_C( 14), INT8_C( 84), INT8_C( -83), INT8_C( 76), INT8_C( -47), INT8_C( -53)), simde_mm512_set_epi8(INT8_C( -37), INT8_C( 127), INT8_C(-128), INT8_C(-108), INT8_C( 30), INT8_C(-127), INT8_C( 53), INT8_C( 78), INT8_C( -1), INT8_C( 127), INT8_C( 51), INT8_C( -62), INT8_C( 90), INT8_C( -45), INT8_C(-101), INT8_C(-117), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 52), INT8_C( 108), INT8_C( 127), INT8_C( 5), INT8_C( -14), INT8_C( 127), INT8_C( 43), INT8_C(-128), INT8_C( 11), INT8_C( -9), INT8_C( 60), INT8_C( 127), INT8_C( 127), INT8_C(-126), INT8_C( -22), INT8_C( -3), INT8_C(-128), INT8_C( 22), INT8_C( -53), INT8_C( -34), INT8_C( 39), INT8_C( 72), INT8_C(-123), INT8_C( -90), INT8_C( 81), INT8_C( 10), INT8_C( 127), INT8_C( 31), INT8_C( -68), INT8_C( 127), INT8_C( 63), INT8_C( -87), INT8_C( 127), INT8_C( 127), INT8_C( 80), INT8_C(-128), INT8_C( -12), INT8_C( 127), INT8_C( 110), INT8_C( -48), INT8_C( 127), INT8_C( 19), INT8_C( 127), INT8_C(-128), INT8_C(-128)) }, { simde_mm512_set_epi8(INT8_C( -80), INT8_C( 122), INT8_C(-100), INT8_C( 123), INT8_C( 82), INT8_C(-107), INT8_C( 95), INT8_C( -80), INT8_C(-115), INT8_C(-109), INT8_C( 60), INT8_C( 72), INT8_C( 57), INT8_C( 99), INT8_C( -30), INT8_C(-115), INT8_C( 69), INT8_C( 18), INT8_C( -5), INT8_C( -6), INT8_C( -90), INT8_C( -20), INT8_C( 17), INT8_C( 121), INT8_C(-100), INT8_C( -55), INT8_C( 90), INT8_C( -42), INT8_C(-113), INT8_C( 119), INT8_C( 44), INT8_C(-123), INT8_C( -39), INT8_C( 114), INT8_C( 37), INT8_C( -84), INT8_C( -88), INT8_C( 15), INT8_C( 23), INT8_C( 114), INT8_C( 63), INT8_C( -61), INT8_C( 22), INT8_C( 8), INT8_C( 54), INT8_C( 19), INT8_C( -34), INT8_C( 18), INT8_C( -36), INT8_C( 104), INT8_C( 1), INT8_C( 5), INT8_C(-116), INT8_C( 79), INT8_C( -20), INT8_C( -4), INT8_C( 95), INT8_C( 103), INT8_C( 3), INT8_C( 64), INT8_C( 34), INT8_C( -49), INT8_C( -27), INT8_C( 41)), simde_mm512_set_epi8(INT8_C( 97), INT8_C(-116), INT8_C( -44), INT8_C( -44), INT8_C( 67), INT8_C( -66), INT8_C( 77), INT8_C( 60), INT8_C( 74), INT8_C( -19), INT8_C(-123), INT8_C( -81), INT8_C( -74), INT8_C( -21), INT8_C( -32), INT8_C(-122), INT8_C(-110), INT8_C( 116), INT8_C( 115), INT8_C( 99), INT8_C( 29), INT8_C( 50), INT8_C(-119), INT8_C(-126), INT8_C( -53), INT8_C( 68), INT8_C( -37), INT8_C( 84), INT8_C( 23), INT8_C( -10), INT8_C( 15), INT8_C( 37), INT8_C( -93), INT8_C( 57), INT8_C( 28), INT8_C(-116), INT8_C( 56), INT8_C(-124), INT8_C( -93), INT8_C( -67), INT8_C( 27), INT8_C( -1), INT8_C( -70), INT8_C( -30), INT8_C( -39), INT8_C( 59), INT8_C( 76), INT8_C( 108), INT8_C( 13), INT8_C( -29), INT8_C( 60), INT8_C( 106), INT8_C( 106), INT8_C( -77), INT8_C(-102), INT8_C( 3), INT8_C( -18), INT8_C( 23), INT8_C( 113), INT8_C( -97), INT8_C( 109), INT8_C( 81), INT8_C( 42), INT8_C( 93)), simde_mm512_set_epi8(INT8_C( 17), INT8_C( 6), INT8_C(-128), INT8_C( 79), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( -20), INT8_C( -41), INT8_C(-128), INT8_C( -63), INT8_C( -9), INT8_C( -17), INT8_C( 78), INT8_C( -62), INT8_C(-128), INT8_C( -41), INT8_C( 127), INT8_C( 110), INT8_C( 93), INT8_C( -61), INT8_C( 30), INT8_C(-102), INT8_C( -5), INT8_C(-128), INT8_C( 13), INT8_C( 53), INT8_C( 42), INT8_C( -90), INT8_C( 109), INT8_C( 59), INT8_C( -86), INT8_C(-128), INT8_C( 127), INT8_C( 65), INT8_C(-128), INT8_C( -32), INT8_C(-109), INT8_C( -70), INT8_C( 47), INT8_C( 90), INT8_C( -62), INT8_C( -48), INT8_C( -22), INT8_C( 15), INT8_C( 78), INT8_C( 42), INT8_C( 126), INT8_C( -23), INT8_C( 75), INT8_C( 61), INT8_C( 111), INT8_C( -10), INT8_C( 2), INT8_C(-122), INT8_C( -1), INT8_C( 77), INT8_C( 126), INT8_C( 116), INT8_C( -33), INT8_C( 127), INT8_C( 32), INT8_C( 15), INT8_C( 127)) }, { simde_mm512_set_epi8(INT8_C( -94), INT8_C( 96), INT8_C( -77), INT8_C( -55), INT8_C( 18), INT8_C( 12), INT8_C( 78), INT8_C( -17), INT8_C( -94), INT8_C( 83), INT8_C( -20), INT8_C(-101), INT8_C( -62), INT8_C( -62), INT8_C( -77), INT8_C( 21), INT8_C( 21), INT8_C( -49), INT8_C(-127), INT8_C( -47), INT8_C( -26), INT8_C( 62), INT8_C( -51), INT8_C( -34), INT8_C( 9), INT8_C( -56), INT8_C( 63), INT8_C( 4), INT8_C( 64), INT8_C( 46), INT8_C( 68), INT8_C( -18), INT8_C(-110), INT8_C( 103), INT8_C(-101), INT8_C( 45), INT8_C( -81), INT8_C( 125), INT8_C( 8), INT8_C( 72), INT8_C( -48), INT8_C( -28), INT8_C( 102), INT8_C( -90), INT8_C(-101), INT8_C( -40), INT8_C( -94), INT8_C(-127), INT8_C( -36), INT8_C( -32), INT8_C( 34), INT8_C( -39), INT8_C( -64), INT8_C( 111), INT8_C( 36), INT8_C( 91), INT8_C( 90), INT8_C( -10), INT8_C( 88), INT8_C(-117), INT8_C( 117), INT8_C(-111), INT8_C(-128), INT8_C( -35)), simde_mm512_set_epi8(INT8_C( 65), INT8_C( -2), INT8_C( 4), INT8_C( -77), INT8_C( 33), INT8_C(-119), INT8_C( -36), INT8_C( 95), INT8_C( -54), INT8_C( 57), INT8_C( -15), INT8_C( 20), INT8_C( 119), INT8_C( -56), INT8_C( -57), INT8_C( 22), INT8_C( -23), INT8_C( -56), INT8_C( -77), INT8_C( 114), INT8_C( -25), INT8_C( 85), INT8_C( 65), INT8_C( 124), INT8_C( 80), INT8_C( 94), INT8_C( -6), INT8_C(-128), INT8_C( 25), INT8_C( -80), INT8_C( 31), INT8_C( -23), INT8_C( 123), INT8_C(-123), INT8_C( -56), INT8_C( 34), INT8_C( 72), INT8_C( -39), INT8_C( -17), INT8_C( 39), INT8_C( 29), INT8_C( -88), INT8_C( 65), INT8_C( 81), INT8_C(-120), INT8_C( 80), INT8_C( -76), INT8_C( -59), INT8_C( 48), INT8_C( -90), INT8_C( -85), INT8_C( 82), INT8_C( 115), INT8_C(-114), INT8_C( 84), INT8_C( -40), INT8_C( -69), INT8_C( -11), INT8_C(-118), INT8_C( 39), INT8_C( -17), INT8_C( -23), INT8_C( 4), INT8_C( -37)), simde_mm512_set_epi8(INT8_C( -29), INT8_C( 94), INT8_C( -73), INT8_C(-128), INT8_C( 51), INT8_C(-107), INT8_C( 42), INT8_C( 78), INT8_C(-128), INT8_C( 127), INT8_C( -35), INT8_C( -81), INT8_C( 57), INT8_C(-118), INT8_C(-128), INT8_C( 43), INT8_C( -2), INT8_C(-105), INT8_C(-128), INT8_C( 67), INT8_C( -51), INT8_C( 127), INT8_C( 14), INT8_C( 90), INT8_C( 89), INT8_C( 38), INT8_C( 57), INT8_C(-124), INT8_C( 89), INT8_C( -34), INT8_C( 99), INT8_C( -41), INT8_C( 13), INT8_C( -20), INT8_C(-128), INT8_C( 79), INT8_C( -9), INT8_C( 86), INT8_C( -9), INT8_C( 111), INT8_C( -19), INT8_C(-116), INT8_C( 127), INT8_C( -9), INT8_C(-128), INT8_C( 40), INT8_C(-128), INT8_C(-128), INT8_C( 12), INT8_C(-122), INT8_C( -51), INT8_C( 43), INT8_C( 51), INT8_C( -3), INT8_C( 120), INT8_C( 51), INT8_C( 21), INT8_C( -21), INT8_C( -30), INT8_C( -78), INT8_C( 100), INT8_C(-128), INT8_C(-124), INT8_C( -72)) }, { simde_mm512_set_epi8(INT8_C( 31), INT8_C( -81), INT8_C( 115), INT8_C( 18), INT8_C( 20), INT8_C( -33), INT8_C(-125), INT8_C( -45), INT8_C( 54), INT8_C( 82), INT8_C( 61), INT8_C( 81), INT8_C( 123), INT8_C( 18), INT8_C( 89), INT8_C( 34), INT8_C( 10), INT8_C( -54), INT8_C( -24), INT8_C( 54), INT8_C( 49), INT8_C( -70), INT8_C( 7), INT8_C( -12), INT8_C( 110), INT8_C( -16), INT8_C( -10), INT8_C( 4), INT8_C( 3), INT8_C( -17), INT8_C( -46), INT8_C( -29), INT8_C( 26), INT8_C( -38), INT8_C(-107), INT8_C( -25), INT8_C( 61), INT8_C( -4), INT8_C( -21), INT8_C( -11), INT8_C( -29), INT8_C( 62), INT8_C( 120), INT8_C( 84), INT8_C( -48), INT8_C( -42), INT8_C( -15), INT8_C( -64), INT8_C( -90), INT8_C( 111), INT8_C(-123), INT8_C( 61), INT8_C( 22), INT8_C( -48), INT8_C(-107), INT8_C( 122), INT8_C( 27), INT8_C( 119), INT8_C( -63), INT8_C( -61), INT8_C( -69), INT8_C( 46), INT8_C( 34), INT8_C(-101)), simde_mm512_set_epi8(INT8_C( 44), INT8_C( 101), INT8_C( -48), INT8_C( -49), INT8_C( 63), INT8_C( 93), INT8_C( -48), INT8_C(-122), INT8_C( -67), INT8_C( 27), INT8_C( 1), INT8_C( -55), INT8_C( -79), INT8_C( -44), INT8_C(-115), INT8_C(-126), INT8_C( -42), INT8_C( 29), INT8_C( 66), INT8_C( -83), INT8_C( 108), INT8_C( -1), INT8_C( -47), INT8_C(-104), INT8_C( 62), INT8_C( 29), INT8_C( 4), INT8_C( -47), INT8_C( 98), INT8_C(-109), INT8_C( -23), INT8_C( 69), INT8_C(-110), INT8_C( -47), INT8_C( 48), INT8_C(-101), INT8_C( -80), INT8_C(-110), INT8_C( -55), INT8_C( 25), INT8_C( 39), INT8_C( 58), INT8_C( 4), INT8_C(-108), INT8_C( -23), INT8_C( -79), INT8_C( 124), INT8_C( 36), INT8_C( 113), INT8_C( 110), INT8_C( 105), INT8_C( 113), INT8_C( -54), INT8_C( -63), INT8_C( 50), INT8_C( -99), INT8_C( -8), INT8_C( -76), INT8_C( 102), INT8_C( 51), INT8_C( 58), INT8_C( 16), INT8_C( -94), INT8_C( -93)), simde_mm512_set_epi8(INT8_C( 75), INT8_C( 20), INT8_C( 67), INT8_C( -31), INT8_C( 83), INT8_C( 60), INT8_C(-128), INT8_C(-128), INT8_C( -13), INT8_C( 109), INT8_C( 62), INT8_C( 26), INT8_C( 44), INT8_C( -26), INT8_C( -26), INT8_C( -92), INT8_C( -32), INT8_C( -25), INT8_C( 42), INT8_C( -29), INT8_C( 127), INT8_C( -71), INT8_C( -40), INT8_C(-116), INT8_C( 127), INT8_C( 13), INT8_C( -6), INT8_C( -43), INT8_C( 101), INT8_C(-126), INT8_C( -69), INT8_C( 40), INT8_C( -84), INT8_C( -85), INT8_C( -59), INT8_C(-126), INT8_C( -19), INT8_C(-114), INT8_C( -76), INT8_C( 14), INT8_C( 10), INT8_C( 120), INT8_C( 124), INT8_C( -24), INT8_C( -71), INT8_C(-121), INT8_C( 109), INT8_C( -28), INT8_C( 23), INT8_C( 127), INT8_C( -18), INT8_C( 127), INT8_C( -32), INT8_C(-111), INT8_C( -57), INT8_C( 23), INT8_C( 19), INT8_C( 43), INT8_C( 39), INT8_C( -10), INT8_C( -11), INT8_C( 62), INT8_C( -60), INT8_C(-128)) }, { simde_mm512_set_epi8(INT8_C( 64), INT8_C( 55), INT8_C( 97), INT8_C( -61), INT8_C( 89), INT8_C( 120), INT8_C( 49), INT8_C( 124), INT8_C( 16), INT8_C( 113), INT8_C( 48), INT8_C( 70), INT8_C( 60), INT8_C( -11), INT8_C( 98), INT8_C( 70), INT8_C( 111), INT8_C( 55), INT8_C( -50), INT8_C( -21), INT8_C(-102), INT8_C( 3), INT8_C( 75), INT8_C( -9), INT8_C( 34), INT8_C( 96), INT8_C(-104), INT8_C( 37), INT8_C( 74), INT8_C( 22), INT8_C( 13), INT8_C( 82), INT8_C(-108), INT8_C(-112), INT8_C( 49), INT8_C( -58), INT8_C( 116), INT8_C( 58), INT8_C( -29), INT8_C( 60), INT8_C( 125), INT8_C( 105), INT8_C( -20), INT8_C( -4), INT8_C(-106), INT8_C(-111), INT8_C( -29), INT8_C( -32), INT8_C( -43), INT8_C(-103), INT8_C( 84), INT8_C( 40), INT8_C( 63), INT8_C( 88), INT8_C( -74), INT8_C( -49), INT8_C( 10), INT8_C( 50), INT8_C( 38), INT8_C( 108), INT8_C( 42), INT8_C( -40), INT8_C( -81), INT8_C( 31)), simde_mm512_set_epi8(INT8_C(-107), INT8_C( 105), INT8_C( -15), INT8_C( -49), INT8_C( 74), INT8_C( -31), INT8_C( -49), INT8_C( 112), INT8_C(-103), INT8_C( 69), INT8_C( -55), INT8_C(-115), INT8_C( -48), INT8_C( 96), INT8_C( -8), INT8_C( -35), INT8_C( -24), INT8_C( 101), INT8_C( 70), INT8_C( -69), INT8_C(-105), INT8_C( -28), INT8_C( -38), INT8_C( -85), INT8_C( -94), INT8_C(-122), INT8_C( -80), INT8_C( 95), INT8_C( -1), INT8_C( 87), INT8_C( 21), INT8_C( 6), INT8_C(-103), INT8_C( 38), INT8_C( 15), INT8_C( -98), INT8_C( -27), INT8_C( 57), INT8_C( -76), INT8_C(-112), INT8_C( 77), INT8_C( -3), INT8_C( 57), INT8_C( 3), INT8_C( 123), INT8_C( -18), INT8_C( -90), INT8_C(-119), INT8_C( 112), INT8_C( 61), INT8_C( -48), INT8_C( -16), INT8_C( -36), INT8_C(-125), INT8_C( -10), INT8_C( 77), INT8_C( 106), INT8_C( 72), INT8_C( 62), INT8_C( -24), INT8_C( -65), INT8_C( 102), INT8_C(-122), INT8_C( 19)), simde_mm512_set_epi8(INT8_C( -43), INT8_C( 127), INT8_C( 82), INT8_C(-110), INT8_C( 127), INT8_C( 89), INT8_C( 0), INT8_C( 127), INT8_C( -87), INT8_C( 127), INT8_C( -7), INT8_C( -45), INT8_C( 12), INT8_C( 85), INT8_C( 90), INT8_C( 35), INT8_C( 87), INT8_C( 127), INT8_C( 20), INT8_C( -90), INT8_C(-128), INT8_C( -25), INT8_C( 37), INT8_C( -94), INT8_C( -60), INT8_C( -26), INT8_C(-128), INT8_C( 127), INT8_C( 73), INT8_C( 109), INT8_C( 34), INT8_C( 88), INT8_C(-128), INT8_C( -74), INT8_C( 64), INT8_C(-128), INT8_C( 89), INT8_C( 115), INT8_C(-105), INT8_C( -52), INT8_C( 127), INT8_C( 102), INT8_C( 37), INT8_C( -1), INT8_C( 17), INT8_C(-128), INT8_C(-119), INT8_C(-128), INT8_C( 69), INT8_C( -42), INT8_C( 36), INT8_C( 24), INT8_C( 27), INT8_C( -37), INT8_C( -84), INT8_C( 28), INT8_C( 116), INT8_C( 122), INT8_C( 100), INT8_C( 84), INT8_C( -23), INT8_C( 62), INT8_C(-128), INT8_C( 50)) }, { simde_mm512_set_epi8(INT8_C( 90), INT8_C(-125), INT8_C( -5), INT8_C( -69), INT8_C( 103), INT8_C( -40), INT8_C( 35), INT8_C( -38), INT8_C( 107), INT8_C(-117), INT8_C( -80), INT8_C( 123), INT8_C( 57), INT8_C( -70), INT8_C( -34), INT8_C( 38), INT8_C( 114), INT8_C(-126), INT8_C( 20), INT8_C( -93), INT8_C( 17), INT8_C(-128), INT8_C( 2), INT8_C( 15), INT8_C(-122), INT8_C( -10), INT8_C( -2), INT8_C( 84), INT8_C( 93), INT8_C( 89), INT8_C( 57), INT8_C( -50), INT8_C( 4), INT8_C( -63), INT8_C( 118), INT8_C( 19), INT8_C( -68), INT8_C( -29), INT8_C( -16), INT8_C( -30), INT8_C( -76), INT8_C(-110), INT8_C( 121), INT8_C( 87), INT8_C(-117), INT8_C(-113), INT8_C( -94), INT8_C( 61), INT8_C( -29), INT8_C( 43), INT8_C( -69), INT8_C( 123), INT8_C( 79), INT8_C(-101), INT8_C( 40), INT8_C( 64), INT8_C( 34), INT8_C(-102), INT8_C( 77), INT8_C( 114), INT8_C( -49), INT8_C( -31), INT8_C( 103), INT8_C( 81)), simde_mm512_set_epi8(INT8_C( 34), INT8_C(-125), INT8_C( -96), INT8_C(-111), INT8_C( 23), INT8_C( 58), INT8_C( -5), INT8_C( -22), INT8_C( -80), INT8_C( -90), INT8_C( -33), INT8_C( -65), INT8_C( -34), INT8_C( 100), INT8_C( -10), INT8_C( -57), INT8_C( 37), INT8_C( -9), INT8_C( -29), INT8_C(-114), INT8_C(-102), INT8_C( 53), INT8_C( 107), INT8_C( 124), INT8_C( -12), INT8_C( -57), INT8_C( -57), INT8_C( -37), INT8_C( 50), INT8_C( 22), INT8_C( -71), INT8_C( -2), INT8_C( -31), INT8_C( 60), INT8_C( 62), INT8_C( 19), INT8_C( 117), INT8_C( 1), INT8_C( -26), INT8_C(-100), INT8_C( 29), INT8_C( -6), INT8_C( 7), INT8_C(-100), INT8_C(-120), INT8_C( -33), INT8_C( 17), INT8_C(-103), INT8_C( 10), INT8_C( -45), INT8_C( -76), INT8_C( 10), INT8_C( -10), INT8_C( 15), INT8_C( 36), INT8_C( 2), INT8_C( 74), INT8_C( 44), INT8_C( -16), INT8_C( 122), INT8_C( -4), INT8_C( 15), INT8_C( -27), INT8_C( -15)), simde_mm512_set_epi8(INT8_C( 124), INT8_C(-128), INT8_C(-101), INT8_C(-128), INT8_C( 126), INT8_C( 18), INT8_C( 30), INT8_C( -60), INT8_C( 27), INT8_C(-128), INT8_C(-113), INT8_C( 58), INT8_C( 23), INT8_C( 30), INT8_C( -44), INT8_C( -19), INT8_C( 127), INT8_C(-128), INT8_C( -9), INT8_C(-128), INT8_C( -85), INT8_C( -75), INT8_C( 109), INT8_C( 127), INT8_C(-128), INT8_C( -67), INT8_C( -59), INT8_C( 47), INT8_C( 127), INT8_C( 111), INT8_C( -14), INT8_C( -52), INT8_C( -27), INT8_C( -3), INT8_C( 127), INT8_C( 38), INT8_C( 49), INT8_C( -28), INT8_C( -42), INT8_C(-128), INT8_C( -47), INT8_C(-116), INT8_C( 127), INT8_C( -13), INT8_C(-128), INT8_C(-128), INT8_C( -77), INT8_C( -42), INT8_C( -19), INT8_C( -2), INT8_C(-128), INT8_C( 127), INT8_C( 69), INT8_C( -86), INT8_C( 76), INT8_C( 66), INT8_C( 108), INT8_C( -58), INT8_C( 61), INT8_C( 127), INT8_C( -53), INT8_C( -16), INT8_C( 76), INT8_C( 66)) }, { simde_mm512_set_epi8(INT8_C( -52), INT8_C(-111), INT8_C( 67), INT8_C( -14), INT8_C( 11), INT8_C( -63), INT8_C( -35), INT8_C( -86), INT8_C(-114), INT8_C( -97), INT8_C( 124), INT8_C( -30), INT8_C( -1), INT8_C( 55), INT8_C( 84), INT8_C( 12), INT8_C( 86), INT8_C( -43), INT8_C( 81), INT8_C( -30), INT8_C( -25), INT8_C( 74), INT8_C(-105), INT8_C( 11), INT8_C( -97), INT8_C( 0), INT8_C( 98), INT8_C( 106), INT8_C( -83), INT8_C( -89), INT8_C( 16), INT8_C( -42), INT8_C( 81), INT8_C( 35), INT8_C(-127), INT8_C(-127), INT8_C( -38), INT8_C( 51), INT8_C( -75), INT8_C( 24), INT8_C( 86), INT8_C( -37), INT8_C( 97), INT8_C( -82), INT8_C( 109), INT8_C( 21), INT8_C( 127), INT8_C( 16), INT8_C( 101), INT8_C( -69), INT8_C( 95), INT8_C(-121), INT8_C( -66), INT8_C( 39), INT8_C( 104), INT8_C(-109), INT8_C( -1), INT8_C( 35), INT8_C( 68), INT8_C( 93), INT8_C( -25), INT8_C( 120), INT8_C( -19), INT8_C( 84)), simde_mm512_set_epi8(INT8_C(-111), INT8_C(-108), INT8_C( 102), INT8_C( -40), INT8_C(-124), INT8_C( -61), INT8_C( 80), INT8_C( 102), INT8_C( -35), INT8_C( 3), INT8_C( -76), INT8_C( 98), INT8_C( 9), INT8_C( -39), INT8_C( -24), INT8_C( -92), INT8_C( -73), INT8_C( -19), INT8_C(-103), INT8_C( -41), INT8_C( -32), INT8_C( -8), INT8_C( 126), INT8_C( 75), INT8_C( -64), INT8_C( -26), INT8_C( -15), INT8_C( -35), INT8_C( -20), INT8_C( -6), INT8_C( 86), INT8_C( -56), INT8_C( -56), INT8_C( 67), INT8_C( 59), INT8_C( -73), INT8_C( 35), INT8_C( -67), INT8_C( 92), INT8_C( 34), INT8_C( 16), INT8_C( -12), INT8_C( 44), INT8_C( -59), INT8_C( -82), INT8_C(-102), INT8_C( 93), INT8_C( -43), INT8_C( 79), INT8_C( -19), INT8_C( -45), INT8_C( -87), INT8_C( -50), INT8_C( -23), INT8_C( -17), INT8_C( 110), INT8_C( 102), INT8_C( -59), INT8_C(-117), INT8_C( 84), INT8_C( -75), INT8_C( 87), INT8_C( 69), INT8_C( -6)), simde_mm512_set_epi8(INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( -54), INT8_C(-113), INT8_C(-124), INT8_C( 45), INT8_C( 16), INT8_C(-128), INT8_C( -94), INT8_C( 48), INT8_C( 68), INT8_C( 8), INT8_C( 16), INT8_C( 60), INT8_C( -80), INT8_C( 13), INT8_C( -62), INT8_C( -22), INT8_C( -71), INT8_C( -57), INT8_C( 66), INT8_C( 21), INT8_C( 86), INT8_C(-128), INT8_C( -26), INT8_C( 83), INT8_C( 71), INT8_C(-103), INT8_C( -95), INT8_C( 102), INT8_C( -98), INT8_C( 25), INT8_C( 102), INT8_C( -68), INT8_C(-128), INT8_C( -3), INT8_C( -16), INT8_C( 17), INT8_C( 58), INT8_C( 102), INT8_C( -49), INT8_C( 127), INT8_C(-128), INT8_C( 27), INT8_C( -81), INT8_C( 127), INT8_C( -27), INT8_C( 127), INT8_C( -88), INT8_C( 50), INT8_C(-128), INT8_C(-116), INT8_C( 16), INT8_C( 87), INT8_C( 1), INT8_C( 101), INT8_C( -24), INT8_C( -49), INT8_C( 127), INT8_C(-100), INT8_C( 127), INT8_C( 50), INT8_C( 78)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_adds_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_adds_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask64 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi8(INT8_C( 92), INT8_C( 116), INT8_C( -78), INT8_C( -19), INT8_C( -73), INT8_C( 22), INT8_C( -66), INT8_C( -29), INT8_C( 55), INT8_C( 78), INT8_C( -45), INT8_C(-119), INT8_C( 30), INT8_C( -29), INT8_C( -23), INT8_C( 89), INT8_C( 15), INT8_C( 119), INT8_C( -88), INT8_C( 109), INT8_C( -16), INT8_C( -10), INT8_C( 0), INT8_C( -18), INT8_C( 1), INT8_C( 116), INT8_C( 62), INT8_C( -73), INT8_C(-122), INT8_C( 0), INT8_C( 111), INT8_C( 95), INT8_C( -20), INT8_C( 3), INT8_C( 122), INT8_C(-121), INT8_C( 13), INT8_C( 25), INT8_C( -35), INT8_C(-127), INT8_C( 82), INT8_C( 90), INT8_C( 53), INT8_C( 123), INT8_C( 73), INT8_C( 108), INT8_C( -18), INT8_C( 15), INT8_C( 81), INT8_C( -27), INT8_C( 102), INT8_C( 118), INT8_C( 39), INT8_C( -77), INT8_C( 45), INT8_C( 81), INT8_C( -17), INT8_C( -28), INT8_C( 67), INT8_C(-118), INT8_C( 79), INT8_C(-113), INT8_C(-122), INT8_C( 124)), UINT64_C(10224647434006242820), simde_mm512_set_epi8(INT8_C( 68), INT8_C(-108), INT8_C(-121), INT8_C(-115), INT8_C( -59), INT8_C( -78), INT8_C(-111), INT8_C( 56), INT8_C( 9), INT8_C(-121), INT8_C( 120), INT8_C( -36), INT8_C( 36), INT8_C(-103), INT8_C(-104), INT8_C( 25), INT8_C(-111), INT8_C( -63), INT8_C( -35), INT8_C(-120), INT8_C( 28), INT8_C( -44), INT8_C( -26), INT8_C( -86), INT8_C( -13), INT8_C( 53), INT8_C( -88), INT8_C(-107), INT8_C( 68), INT8_C( 42), INT8_C(-118), INT8_C( 111), INT8_C( 54), INT8_C( -58), INT8_C( -13), INT8_C( 27), INT8_C( 23), INT8_C( 41), INT8_C(-119), INT8_C( 44), INT8_C( 7), INT8_C(-120), INT8_C( 32), INT8_C( -43), INT8_C( 114), INT8_C( -72), INT8_C( 73), INT8_C( -96), INT8_C( 96), INT8_C( 110), INT8_C( -81), INT8_C( -76), INT8_C( 103), INT8_C(-100), INT8_C( -22), INT8_C( 18), INT8_C( 115), INT8_C( 54), INT8_C( -40), INT8_C( 125), INT8_C( 110), INT8_C( 31), INT8_C( 51), INT8_C(-104)), simde_mm512_set_epi8(INT8_C( -14), INT8_C( 85), INT8_C( -80), INT8_C( 80), INT8_C( 48), INT8_C( 93), INT8_C( 79), INT8_C( 127), INT8_C( 16), INT8_C( 41), INT8_C( 54), INT8_C(-116), INT8_C( 17), INT8_C( 42), INT8_C( -86), INT8_C( 38), INT8_C(-118), INT8_C( -56), INT8_C( 60), INT8_C( 19), INT8_C(-100), INT8_C(-107), INT8_C( 105), INT8_C( -76), INT8_C( 10), INT8_C( -9), INT8_C( -12), INT8_C( -56), INT8_C( -71), INT8_C( 96), INT8_C( 6), INT8_C( 24), INT8_C( 69), INT8_C( 73), INT8_C( -27), INT8_C( 3), INT8_C( -95), INT8_C( 50), INT8_C( -67), INT8_C( -39), INT8_C( 97), INT8_C( 62), INT8_C( 101), INT8_C( 56), INT8_C( -79), INT8_C( 83), INT8_C( -99), INT8_C( -56), INT8_C( -17), INT8_C( -56), INT8_C( -8), INT8_C( -16), INT8_C( 84), INT8_C( -95), INT8_C( -73), INT8_C( 54), INT8_C( 125), INT8_C( 85), INT8_C( 78), INT8_C( -65), INT8_C( 4), INT8_C( 113), INT8_C( -16), INT8_C( -53)), simde_mm512_set_epi8(INT8_C( 54), INT8_C( 116), INT8_C( -78), INT8_C( -19), INT8_C( -11), INT8_C( 15), INT8_C( -66), INT8_C( 127), INT8_C( 25), INT8_C( -80), INT8_C( 127), INT8_C(-119), INT8_C( 30), INT8_C( -61), INT8_C( -23), INT8_C( 63), INT8_C( 15), INT8_C( 119), INT8_C( 25), INT8_C(-101), INT8_C( -72), INT8_C(-128), INT8_C( 79), INT8_C( -18), INT8_C( -3), INT8_C( 116), INT8_C(-100), INT8_C( -73), INT8_C( -3), INT8_C( 0), INT8_C(-112), INT8_C( 127), INT8_C( -20), INT8_C( 3), INT8_C( -40), INT8_C( 30), INT8_C( 13), INT8_C( 25), INT8_C( -35), INT8_C( 5), INT8_C( 104), INT8_C( 90), INT8_C( 53), INT8_C( 123), INT8_C( 35), INT8_C( 11), INT8_C( -18), INT8_C(-128), INT8_C( 81), INT8_C( -27), INT8_C( -89), INT8_C( 118), INT8_C( 39), INT8_C(-128), INT8_C( -95), INT8_C( 81), INT8_C( -17), INT8_C( -28), INT8_C( 67), INT8_C(-118), INT8_C( 79), INT8_C( 127), INT8_C(-122), INT8_C( 124)) }, { simde_mm512_set_epi8(INT8_C(-123), INT8_C(-100), INT8_C( -39), INT8_C( -44), INT8_C( 22), INT8_C( 112), INT8_C( 16), INT8_C( 15), INT8_C(-104), INT8_C(-102), INT8_C( 102), INT8_C( -95), INT8_C( 69), INT8_C( 13), INT8_C( -21), INT8_C( 45), INT8_C( 62), INT8_C(-120), INT8_C(-111), INT8_C( 32), INT8_C(-107), INT8_C( -30), INT8_C( 99), INT8_C( -64), INT8_C( 8), INT8_C( -42), INT8_C( 81), INT8_C( -34), INT8_C( -46), INT8_C( 26), INT8_C( 31), INT8_C( -2), INT8_C( 68), INT8_C( -7), INT8_C( -71), INT8_C( 46), INT8_C( -21), INT8_C( -73), INT8_C( 21), INT8_C( 83), INT8_C(-108), INT8_C( -97), INT8_C( -69), INT8_C( 73), INT8_C( 57), INT8_C( -37), INT8_C( 21), INT8_C( 82), INT8_C(-119), INT8_C(-126), INT8_C( 126), INT8_C( 91), INT8_C( 115), INT8_C( 31), INT8_C( -79), INT8_C( 28), INT8_C(-106), INT8_C( -18), INT8_C( 65), INT8_C(-104), INT8_C( 81), INT8_C( 38), INT8_C( -84), INT8_C( -2)), UINT64_C( 4597426592773770833), simde_mm512_set_epi8(INT8_C( 74), INT8_C( 72), INT8_C( -4), INT8_C( 28), INT8_C(-115), INT8_C( 93), INT8_C( 102), INT8_C( 44), INT8_C(-103), INT8_C( -29), INT8_C( -50), INT8_C( 48), INT8_C( -96), INT8_C( -50), INT8_C( 46), INT8_C( -65), INT8_C( 4), INT8_C( 43), INT8_C( -75), INT8_C( 97), INT8_C( -26), INT8_C(-103), INT8_C( 71), INT8_C(-107), INT8_C( 91), INT8_C( 45), INT8_C( -11), INT8_C( 47), INT8_C( 29), INT8_C( 25), INT8_C( 26), INT8_C( -9), INT8_C( 10), INT8_C( 36), INT8_C( -79), INT8_C( -53), INT8_C( 41), INT8_C( 1), INT8_C( -23), INT8_C( -63), INT8_C(-127), INT8_C( 68), INT8_C( 48), INT8_C( 36), INT8_C( 89), INT8_C(-112), INT8_C( -31), INT8_C( 120), INT8_C( 35), INT8_C( 62), INT8_C( -21), INT8_C(-114), INT8_C(-104), INT8_C( 57), INT8_C( 42), INT8_C(-111), INT8_C( 94), INT8_C( -63), INT8_C( -9), INT8_C( 64), INT8_C( -65), INT8_C( -2), INT8_C( 110), INT8_C( -8)), simde_mm512_set_epi8(INT8_C( 44), INT8_C( -92), INT8_C( -31), INT8_C( 26), INT8_C( -99), INT8_C( -53), INT8_C( 117), INT8_C( 18), INT8_C( -63), INT8_C( 45), INT8_C( 12), INT8_C( 24), INT8_C(-108), INT8_C( 18), INT8_C( -60), INT8_C( 28), INT8_C( 50), INT8_C( -11), INT8_C( -68), INT8_C( -31), INT8_C( 105), INT8_C(-106), INT8_C( 98), INT8_C( 51), INT8_C( 58), INT8_C( 103), INT8_C( 111), INT8_C(-127), INT8_C( 68), INT8_C( -56), INT8_C( 124), INT8_C(-119), INT8_C( 74), INT8_C( -62), INT8_C(-116), INT8_C( 37), INT8_C( -12), INT8_C( 114), INT8_C( 0), INT8_C( 61), INT8_C( 103), INT8_C( -4), INT8_C(-105), INT8_C( -68), INT8_C( 39), INT8_C(-100), INT8_C( -93), INT8_C( 11), INT8_C( -80), INT8_C( -19), INT8_C( -22), INT8_C( -39), INT8_C( 127), INT8_C( -38), INT8_C(-125), INT8_C(-111), INT8_C( 84), INT8_C( -96), INT8_C( 87), INT8_C( -22), INT8_C( -5), INT8_C( -3), INT8_C(-127), INT8_C( 41)), simde_mm512_set_epi8(INT8_C(-123), INT8_C(-100), INT8_C( -35), INT8_C( 54), INT8_C(-128), INT8_C( 40), INT8_C( 127), INT8_C( 62), INT8_C(-128), INT8_C( 16), INT8_C( 102), INT8_C( -95), INT8_C(-128), INT8_C( -32), INT8_C( -21), INT8_C( -37), INT8_C( 62), INT8_C( 32), INT8_C(-111), INT8_C( 66), INT8_C(-107), INT8_C(-128), INT8_C( 127), INT8_C( -56), INT8_C( 8), INT8_C( -42), INT8_C( 100), INT8_C( -34), INT8_C( -46), INT8_C( 26), INT8_C( 31), INT8_C( -2), INT8_C( 84), INT8_C( -7), INT8_C(-128), INT8_C( -16), INT8_C( 29), INT8_C( 115), INT8_C( -23), INT8_C( -2), INT8_C(-108), INT8_C( -97), INT8_C( -57), INT8_C( -32), INT8_C( 57), INT8_C(-128), INT8_C(-124), INT8_C( 127), INT8_C(-119), INT8_C(-126), INT8_C( 126), INT8_C( 91), INT8_C( 23), INT8_C( 19), INT8_C( -83), INT8_C( 28), INT8_C(-106), INT8_C(-128), INT8_C( 65), INT8_C( 42), INT8_C( 81), INT8_C( 38), INT8_C( -84), INT8_C( 33)) }, { simde_mm512_set_epi8(INT8_C( -30), INT8_C( -37), INT8_C( 51), INT8_C( -36), INT8_C( 8), INT8_C( 52), INT8_C( 97), INT8_C( 123), INT8_C( -49), INT8_C(-124), INT8_C( 95), INT8_C( -83), INT8_C( 70), INT8_C( -50), INT8_C( -61), INT8_C( 25), INT8_C( -97), INT8_C( 28), INT8_C( -58), INT8_C( 11), INT8_C( -14), INT8_C( 126), INT8_C( 81), INT8_C( 45), INT8_C( -23), INT8_C( 120), INT8_C( -83), INT8_C( -16), INT8_C( 7), INT8_C( 51), INT8_C( -57), INT8_C( -50), INT8_C( -21), INT8_C( 98), INT8_C( 88), INT8_C( 0), INT8_C( -66), INT8_C( 3), INT8_C( 124), INT8_C(-113), INT8_C( 50), INT8_C( 88), INT8_C( -85), INT8_C( -93), INT8_C( -44), INT8_C( -13), INT8_C( -94), INT8_C( 17), INT8_C( -2), INT8_C( 79), INT8_C(-116), INT8_C( 43), INT8_C( -77), INT8_C(-125), INT8_C( -23), INT8_C(-120), INT8_C( 96), INT8_C( -64), INT8_C( -23), INT8_C( -46), INT8_C( -29), INT8_C( -71), INT8_C( 71), INT8_C( 90)), UINT64_C(12627002542648829104), simde_mm512_set_epi8(INT8_C( 56), INT8_C( -45), INT8_C(-108), INT8_C( -19), INT8_C(-124), INT8_C( -27), INT8_C( 22), INT8_C( 126), INT8_C(-106), INT8_C( -68), INT8_C( -60), INT8_C( 8), INT8_C( 60), INT8_C( 93), INT8_C( -33), INT8_C( -27), INT8_C( -7), INT8_C( 27), INT8_C(-122), INT8_C( -38), INT8_C( 23), INT8_C( 6), INT8_C( 45), INT8_C( -21), INT8_C( -23), INT8_C(-101), INT8_C( 116), INT8_C( 127), INT8_C( 96), INT8_C( 40), INT8_C( -97), INT8_C( 40), INT8_C( 86), INT8_C( -44), INT8_C( 70), INT8_C( -71), INT8_C( 62), INT8_C( -21), INT8_C( 66), INT8_C( 68), INT8_C( -87), INT8_C( -61), INT8_C( 48), INT8_C( -70), INT8_C( 18), INT8_C( -78), INT8_C( -98), INT8_C( 117), INT8_C( 74), INT8_C( 32), INT8_C( 93), INT8_C( 125), INT8_C( -47), INT8_C( -60), INT8_C( -86), INT8_C( 117), INT8_C( 122), INT8_C( -54), INT8_C( 50), INT8_C( 123), INT8_C( -31), INT8_C( -74), INT8_C( -64), INT8_C( 54)), simde_mm512_set_epi8(INT8_C( 115), INT8_C( 51), INT8_C( -91), INT8_C( 56), INT8_C( 64), INT8_C( -39), INT8_C(-119), INT8_C( -28), INT8_C( -54), INT8_C( 28), INT8_C( 54), INT8_C( -8), INT8_C( -54), INT8_C(-128), INT8_C( -28), INT8_C( -71), INT8_C( 107), INT8_C( -66), INT8_C(-114), INT8_C( -88), INT8_C( 34), INT8_C( -83), INT8_C( -21), INT8_C( -64), INT8_C( 121), INT8_C( -20), INT8_C( -89), INT8_C( -94), INT8_C( 112), INT8_C( -27), INT8_C( 81), INT8_C( -54), INT8_C( -64), INT8_C(-114), INT8_C( 48), INT8_C( -89), INT8_C( -61), INT8_C( 26), INT8_C( 43), INT8_C( 29), INT8_C( 0), INT8_C( 64), INT8_C( 123), INT8_C( -67), INT8_C( 15), INT8_C( 120), INT8_C( 36), INT8_C( 40), INT8_C( 106), INT8_C(-118), INT8_C(-108), INT8_C( -58), INT8_C( 26), INT8_C(-111), INT8_C( 63), INT8_C( -98), INT8_C( -13), INT8_C( -12), INT8_C(-124), INT8_C( 96), INT8_C( -13), INT8_C( -98), INT8_C( 99), INT8_C( -13)), simde_mm512_set_epi8(INT8_C( 127), INT8_C( -37), INT8_C(-128), INT8_C( -36), INT8_C( -60), INT8_C( -66), INT8_C( -97), INT8_C( 98), INT8_C( -49), INT8_C(-124), INT8_C( -6), INT8_C( 0), INT8_C( 6), INT8_C( -35), INT8_C( -61), INT8_C( 25), INT8_C( -97), INT8_C( 28), INT8_C( -58), INT8_C(-126), INT8_C( 57), INT8_C( -77), INT8_C( 24), INT8_C( -85), INT8_C( 98), INT8_C(-121), INT8_C( 27), INT8_C( -16), INT8_C( 127), INT8_C( 51), INT8_C( -57), INT8_C( -14), INT8_C( -21), INT8_C(-128), INT8_C( 118), INT8_C( 0), INT8_C( 1), INT8_C( 5), INT8_C( 124), INT8_C(-113), INT8_C( -87), INT8_C( 88), INT8_C( -85), INT8_C( -93), INT8_C( 33), INT8_C( -13), INT8_C( -94), INT8_C( 127), INT8_C( 127), INT8_C( 79), INT8_C( -15), INT8_C( 43), INT8_C( -77), INT8_C(-128), INT8_C( -23), INT8_C(-120), INT8_C( 109), INT8_C( -64), INT8_C( -74), INT8_C( 127), INT8_C( -29), INT8_C( -71), INT8_C( 71), INT8_C( 90)) }, { simde_mm512_set_epi8(INT8_C( 72), INT8_C( 45), INT8_C( 120), INT8_C( -5), INT8_C(-109), INT8_C( 62), INT8_C( 17), INT8_C( 31), INT8_C( -30), INT8_C( -58), INT8_C( 56), INT8_C( 21), INT8_C( 72), INT8_C( -74), INT8_C( -40), INT8_C( 120), INT8_C( 95), INT8_C( 108), INT8_C( 32), INT8_C( 64), INT8_C(-128), INT8_C( 102), INT8_C( -21), INT8_C( 28), INT8_C( 105), INT8_C( 52), INT8_C( 85), INT8_C(-104), INT8_C( 57), INT8_C( -31), INT8_C( -38), INT8_C(-124), INT8_C(-107), INT8_C( -2), INT8_C( 55), INT8_C( 46), INT8_C( -71), INT8_C( 77), INT8_C( 18), INT8_C( 70), INT8_C( 89), INT8_C( 125), INT8_C( -42), INT8_C(-125), INT8_C( 121), INT8_C( -11), INT8_C( -69), INT8_C( -59), INT8_C( -53), INT8_C( 34), INT8_C( 9), INT8_C( 64), INT8_C( -61), INT8_C( -25), INT8_C(-115), INT8_C( 100), INT8_C( 65), INT8_C( 8), INT8_C( 69), INT8_C( -8), INT8_C( -15), INT8_C( -51), INT8_C( 1), INT8_C( 90)), UINT64_C(14515151237088493607), simde_mm512_set_epi8(INT8_C( -26), INT8_C( -9), INT8_C( 66), INT8_C( 1), INT8_C( -13), INT8_C( 60), INT8_C(-119), INT8_C( -83), INT8_C(-122), INT8_C( -64), INT8_C( -83), INT8_C( -74), INT8_C( 119), INT8_C( -8), INT8_C( 12), INT8_C( 113), INT8_C( -12), INT8_C( -84), INT8_C( 6), INT8_C( 69), INT8_C( 2), INT8_C( -75), INT8_C( -34), INT8_C(-126), INT8_C( 3), INT8_C(-128), INT8_C( -9), INT8_C( 24), INT8_C( 11), INT8_C( -94), INT8_C( -32), INT8_C( 110), INT8_C( 33), INT8_C( -24), INT8_C( 125), INT8_C( 35), INT8_C(-103), INT8_C( -48), INT8_C( -22), INT8_C( 38), INT8_C( -81), INT8_C( 9), INT8_C( -11), INT8_C(-124), INT8_C( 71), INT8_C( 31), INT8_C( -42), INT8_C( 93), INT8_C( 67), INT8_C( 45), INT8_C( 51), INT8_C( -92), INT8_C( 126), INT8_C( 108), INT8_C(-123), INT8_C( -71), INT8_C( 113), INT8_C( 32), INT8_C( 71), INT8_C( 55), INT8_C( -26), INT8_C( 82), INT8_C( -81), INT8_C( -20)), simde_mm512_set_epi8(INT8_C(-125), INT8_C( 121), INT8_C(-128), INT8_C( 103), INT8_C( 0), INT8_C( 101), INT8_C( -41), INT8_C( 89), INT8_C( -83), INT8_C( -65), INT8_C( 9), INT8_C( -7), INT8_C( -63), INT8_C( 13), INT8_C( 105), INT8_C( 92), INT8_C( -18), INT8_C( -21), INT8_C(-102), INT8_C(-114), INT8_C( 74), INT8_C( 121), INT8_C( -45), INT8_C( 52), INT8_C( -63), INT8_C( -93), INT8_C( 98), INT8_C( 106), INT8_C(-109), INT8_C( -47), INT8_C( 37), INT8_C( 70), INT8_C( 100), INT8_C( 121), INT8_C( 18), INT8_C( 28), INT8_C(-117), INT8_C( 107), INT8_C( 3), INT8_C( -62), INT8_C( 42), INT8_C( 72), INT8_C( 91), INT8_C( 86), INT8_C( -72), INT8_C( 9), INT8_C( -80), INT8_C( 118), INT8_C( 122), INT8_C(-108), INT8_C( -70), INT8_C( -63), INT8_C( 56), INT8_C( 71), INT8_C( -14), INT8_C( 49), INT8_C( -73), INT8_C( 53), INT8_C( -29), INT8_C( 3), INT8_C( -73), INT8_C( 43), INT8_C( -22), INT8_C( 85)), simde_mm512_set_epi8(INT8_C(-128), INT8_C( 112), INT8_C( 120), INT8_C( -5), INT8_C( -13), INT8_C( 62), INT8_C( 17), INT8_C( 6), INT8_C( -30), INT8_C(-128), INT8_C( -74), INT8_C( -81), INT8_C( 72), INT8_C( -74), INT8_C( -40), INT8_C( 120), INT8_C( 95), INT8_C( 108), INT8_C( -96), INT8_C( 64), INT8_C( 76), INT8_C( 46), INT8_C( -21), INT8_C( -74), INT8_C( 105), INT8_C( 52), INT8_C( 89), INT8_C(-104), INT8_C( 57), INT8_C(-128), INT8_C( -38), INT8_C( 127), INT8_C(-107), INT8_C( 97), INT8_C( 55), INT8_C( 46), INT8_C( -71), INT8_C( 77), INT8_C( -19), INT8_C( -24), INT8_C( -39), INT8_C( 125), INT8_C( 80), INT8_C( -38), INT8_C( -1), INT8_C( -11), INT8_C(-122), INT8_C( 127), INT8_C( -53), INT8_C( -63), INT8_C( 9), INT8_C( 64), INT8_C( -61), INT8_C( -25), INT8_C(-115), INT8_C( 100), INT8_C( 65), INT8_C( 8), INT8_C( 42), INT8_C( -8), INT8_C( -15), INT8_C( 125), INT8_C(-103), INT8_C( 65)) }, { simde_mm512_set_epi8(INT8_C( -47), INT8_C( 84), INT8_C(-126), INT8_C( -64), INT8_C( 14), INT8_C( 11), INT8_C( 37), INT8_C( -23), INT8_C( 67), INT8_C( 124), INT8_C( 58), INT8_C( -94), INT8_C( 30), INT8_C( -33), INT8_C( 70), INT8_C( -24), INT8_C( 38), INT8_C( -97), INT8_C( -56), INT8_C( -60), INT8_C( -59), INT8_C( 65), INT8_C( -74), INT8_C( 45), INT8_C( -11), INT8_C( 55), INT8_C( -82), INT8_C( 12), INT8_C( 106), INT8_C( 22), INT8_C(-124), INT8_C( -4), INT8_C( 2), INT8_C( -81), INT8_C( 14), INT8_C( 90), INT8_C(-100), INT8_C(-122), INT8_C( -35), INT8_C( 81), INT8_C( -14), INT8_C( -42), INT8_C( 125), INT8_C(-125), INT8_C( -57), INT8_C( 90), INT8_C( -9), INT8_C( 63), INT8_C( 53), INT8_C( 77), INT8_C( 63), INT8_C( -84), INT8_C( 27), INT8_C( 22), INT8_C( 3), INT8_C( -37), INT8_C( 65), INT8_C( 118), INT8_C(-126), INT8_C( 97), INT8_C( 109), INT8_C( 7), INT8_C(-114), INT8_C( -75)), UINT64_C( 8707623543556880126), simde_mm512_set_epi8(INT8_C( 84), INT8_C( -71), INT8_C( 8), INT8_C( 12), INT8_C( -11), INT8_C( -76), INT8_C( 62), INT8_C( 93), INT8_C( -75), INT8_C( -77), INT8_C( -84), INT8_C(-108), INT8_C( -35), INT8_C( 14), INT8_C( -60), INT8_C( 18), INT8_C( 23), INT8_C( -60), INT8_C( -63), INT8_C(-114), INT8_C( -55), INT8_C( 75), INT8_C( -99), INT8_C( -55), INT8_C( 58), INT8_C( 76), INT8_C(-102), INT8_C(-118), INT8_C( -30), INT8_C( 39), INT8_C( 119), INT8_C( 85), INT8_C( -8), INT8_C( -72), INT8_C( -60), INT8_C( -94), INT8_C(-112), INT8_C( 119), INT8_C( 124), INT8_C( 76), INT8_C( -42), INT8_C(-124), INT8_C( 54), INT8_C( 74), INT8_C( -92), INT8_C( 99), INT8_C( 79), INT8_C( -3), INT8_C( 61), INT8_C( -89), INT8_C( 84), INT8_C( -94), INT8_C( 31), INT8_C(-116), INT8_C( -67), INT8_C(-102), INT8_C( -72), INT8_C( -91), INT8_C(-105), INT8_C(-108), INT8_C( -44), INT8_C( 74), INT8_C( -28), INT8_C( 124)), simde_mm512_set_epi8(INT8_C( 1), INT8_C( 75), INT8_C( 21), INT8_C( -36), INT8_C(-126), INT8_C( 122), INT8_C( 71), INT8_C( 76), INT8_C( 28), INT8_C( -56), INT8_C( 32), INT8_C( 101), INT8_C(-107), INT8_C(-111), INT8_C( -88), INT8_C( -19), INT8_C( -77), INT8_C( 19), INT8_C( -21), INT8_C(-111), INT8_C( -68), INT8_C( 82), INT8_C(-118), INT8_C( -76), INT8_C( 47), INT8_C( 127), INT8_C( 62), INT8_C( -16), INT8_C( 10), INT8_C( -14), INT8_C(-100), INT8_C( 86), INT8_C( 29), INT8_C( 107), INT8_C( 56), INT8_C( 21), INT8_C( 24), INT8_C( 68), INT8_C( -96), INT8_C( 64), INT8_C( 48), INT8_C( 13), INT8_C( -83), INT8_C( 4), INT8_C( -3), INT8_C( -64), INT8_C( 17), INT8_C(-115), INT8_C( 21), INT8_C( 108), INT8_C( 125), INT8_C( -60), INT8_C( -72), INT8_C( 74), INT8_C( -5), INT8_C( -58), INT8_C( -41), INT8_C( 22), INT8_C(-115), INT8_C( 102), INT8_C( 59), INT8_C( -80), INT8_C( -15), INT8_C( -63)), simde_mm512_set_epi8(INT8_C( -47), INT8_C( 4), INT8_C( 29), INT8_C( -24), INT8_C(-128), INT8_C( 11), INT8_C( 37), INT8_C( -23), INT8_C( -47), INT8_C(-128), INT8_C( 58), INT8_C( -7), INT8_C( 30), INT8_C( -97), INT8_C(-128), INT8_C( -1), INT8_C( -54), INT8_C( -97), INT8_C( -84), INT8_C(-128), INT8_C( -59), INT8_C( 65), INT8_C( -74), INT8_C(-128), INT8_C( -11), INT8_C( 127), INT8_C( -40), INT8_C(-128), INT8_C( -20), INT8_C( 22), INT8_C( 19), INT8_C( -4), INT8_C( 2), INT8_C( 35), INT8_C( 14), INT8_C( -73), INT8_C(-100), INT8_C( 127), INT8_C( 28), INT8_C( 127), INT8_C( 6), INT8_C( -42), INT8_C( 125), INT8_C(-125), INT8_C( -95), INT8_C( 90), INT8_C( -9), INT8_C(-118), INT8_C( 53), INT8_C( 77), INT8_C( 127), INT8_C(-128), INT8_C( 27), INT8_C( -42), INT8_C( -72), INT8_C( -37), INT8_C(-113), INT8_C( -69), INT8_C(-128), INT8_C( -6), INT8_C( 15), INT8_C( -6), INT8_C( -43), INT8_C( -75)) }, { simde_mm512_set_epi8(INT8_C( -64), INT8_C( -53), INT8_C( -42), INT8_C( 126), INT8_C( 67), INT8_C( 50), INT8_C( -18), INT8_C( 76), INT8_C( -19), INT8_C( 123), INT8_C( -87), INT8_C( 106), INT8_C( -74), INT8_C( 44), INT8_C( 117), INT8_C( 103), INT8_C( 81), INT8_C( 122), INT8_C( 56), INT8_C( -10), INT8_C( 67), INT8_C( 79), INT8_C( 83), INT8_C( -38), INT8_C( -13), INT8_C( 43), INT8_C( 27), INT8_C( -97), INT8_C( 102), INT8_C( 126), INT8_C( 38), INT8_C( -62), INT8_C( -24), INT8_C( 117), INT8_C( -38), INT8_C( -93), INT8_C( -58), INT8_C(-124), INT8_C( -75), INT8_C( 10), INT8_C( 18), INT8_C( -74), INT8_C( 14), INT8_C( 36), INT8_C( -7), INT8_C( 113), INT8_C( 40), INT8_C( 48), INT8_C(-107), INT8_C( -34), INT8_C( -75), INT8_C( 85), INT8_C( -35), INT8_C(-116), INT8_C( 65), INT8_C( -21), INT8_C( 15), INT8_C( 3), INT8_C( 45), INT8_C( 21), INT8_C( 72), INT8_C( 93), INT8_C( 108), INT8_C( 125)), UINT64_C(12576710173448868104), simde_mm512_set_epi8(INT8_C( 90), INT8_C( -38), INT8_C( -98), INT8_C( -70), INT8_C(-108), INT8_C( 20), INT8_C( 43), INT8_C(-128), INT8_C( 77), INT8_C( 108), INT8_C( 53), INT8_C( 82), INT8_C( -50), INT8_C( 52), INT8_C( 56), INT8_C( 58), INT8_C(-120), INT8_C( -43), INT8_C( 114), INT8_C( 93), INT8_C( -44), INT8_C( -15), INT8_C( 38), INT8_C( -17), INT8_C(-110), INT8_C(-123), INT8_C( -39), INT8_C( 114), INT8_C( 51), INT8_C(-115), INT8_C( -74), INT8_C( 43), INT8_C( 41), INT8_C( -36), INT8_C( 19), INT8_C( 69), INT8_C( 60), INT8_C( -53), INT8_C( 112), INT8_C( 108), INT8_C( 8), INT8_C( 46), INT8_C( -35), INT8_C( 26), INT8_C( 11), INT8_C( 42), INT8_C( 47), INT8_C( 59), INT8_C( -57), INT8_C( 94), INT8_C(-125), INT8_C(-124), INT8_C( 36), INT8_C( 57), INT8_C( 68), INT8_C( -52), INT8_C( 39), INT8_C( 50), INT8_C( -48), INT8_C( 94), INT8_C( 53), INT8_C( 11), INT8_C( 29), INT8_C( 65)), simde_mm512_set_epi8(INT8_C(-107), INT8_C( 120), INT8_C( -58), INT8_C( 107), INT8_C( -32), INT8_C( -32), INT8_C( 88), INT8_C( -43), INT8_C( 31), INT8_C( -32), INT8_C( -64), INT8_C( 27), INT8_C( 82), INT8_C( -90), INT8_C( -54), INT8_C( -84), INT8_C( -30), INT8_C( 63), INT8_C( 24), INT8_C( 81), INT8_C( -8), INT8_C( 9), INT8_C( -35), INT8_C(-101), INT8_C( 83), INT8_C( 107), INT8_C( -47), INT8_C( -56), INT8_C( 57), INT8_C( -88), INT8_C(-115), INT8_C( -20), INT8_C( 58), INT8_C( 75), INT8_C( 56), INT8_C( 93), INT8_C( 49), INT8_C( 43), INT8_C( 108), INT8_C( 118), INT8_C( -79), INT8_C( 112), INT8_C( 44), INT8_C(-112), INT8_C( -52), INT8_C( 10), INT8_C( 28), INT8_C( -86), INT8_C( 65), INT8_C( 62), INT8_C( 86), INT8_C(-107), INT8_C( 24), INT8_C( -55), INT8_C( 54), INT8_C(-110), INT8_C( -33), INT8_C( 110), INT8_C(-116), INT8_C( -39), INT8_C( 39), INT8_C(-112), INT8_C( 64), INT8_C( 43)), simde_mm512_set_epi8(INT8_C( -17), INT8_C( -53), INT8_C(-128), INT8_C( 126), INT8_C(-128), INT8_C( -12), INT8_C( 127), INT8_C( 76), INT8_C( 108), INT8_C( 123), INT8_C( -87), INT8_C( 106), INT8_C( 32), INT8_C( 44), INT8_C( 117), INT8_C( -26), INT8_C( 81), INT8_C( 20), INT8_C( 127), INT8_C( 127), INT8_C( 67), INT8_C( 79), INT8_C( 3), INT8_C(-118), INT8_C( -13), INT8_C( -16), INT8_C( 27), INT8_C( -97), INT8_C( 102), INT8_C(-128), INT8_C( 38), INT8_C( -62), INT8_C( 99), INT8_C( 117), INT8_C( 75), INT8_C( 127), INT8_C( 109), INT8_C(-124), INT8_C( 127), INT8_C( 127), INT8_C( 18), INT8_C( 127), INT8_C( 9), INT8_C( -86), INT8_C( -7), INT8_C( 52), INT8_C( 75), INT8_C( 48), INT8_C(-107), INT8_C( -34), INT8_C( -75), INT8_C(-128), INT8_C( -35), INT8_C(-116), INT8_C( 65), INT8_C(-128), INT8_C( 15), INT8_C( 3), INT8_C( 45), INT8_C( 21), INT8_C( 92), INT8_C( 93), INT8_C( 108), INT8_C( 125)) }, { simde_mm512_set_epi8(INT8_C(-103), INT8_C(-124), INT8_C( 13), INT8_C( -80), INT8_C( -32), INT8_C( 123), INT8_C( 115), INT8_C( -49), INT8_C( 113), INT8_C( -51), INT8_C( 119), INT8_C( -67), INT8_C( -21), INT8_C( 19), INT8_C( -12), INT8_C( 29), INT8_C( 91), INT8_C( -64), INT8_C( 88), INT8_C(-105), INT8_C( 24), INT8_C( 31), INT8_C( 77), INT8_C( 92), INT8_C( -85), INT8_C( 48), INT8_C(-113), INT8_C(-114), INT8_C( 16), INT8_C( 18), INT8_C( -44), INT8_C( -66), INT8_C( 25), INT8_C(-105), INT8_C(-100), INT8_C( -91), INT8_C( 26), INT8_C(-123), INT8_C( -33), INT8_C( -37), INT8_C( 63), INT8_C( 13), INT8_C(-124), INT8_C( 41), INT8_C(-104), INT8_C( -59), INT8_C( -2), INT8_C( -54), INT8_C( 73), INT8_C( 3), INT8_C( -18), INT8_C( 10), INT8_C( 19), INT8_C( 58), INT8_C( -12), INT8_C( -75), INT8_C( -88), INT8_C( 59), INT8_C( 104), INT8_C( -40), INT8_C( -82), INT8_C( 42), INT8_C( -73), INT8_C( -94)), UINT64_C( 2633789449456316803), simde_mm512_set_epi8(INT8_C( 118), INT8_C( 44), INT8_C( -14), INT8_C( 84), INT8_C( -69), INT8_C( -48), INT8_C( 40), INT8_C( 86), INT8_C( -1), INT8_C( 121), INT8_C( -40), INT8_C( 44), INT8_C( -1), INT8_C( 38), INT8_C( -44), INT8_C( 38), INT8_C( 93), INT8_C(-107), INT8_C( -4), INT8_C( -61), INT8_C( -52), INT8_C( 22), INT8_C( -54), INT8_C(-125), INT8_C( -29), INT8_C( 25), INT8_C( -77), INT8_C( -68), INT8_C( 9), INT8_C( -35), INT8_C( -11), INT8_C( 53), INT8_C( 28), INT8_C( 61), INT8_C( -35), INT8_C(-106), INT8_C( -46), INT8_C( 121), INT8_C(-102), INT8_C( 121), INT8_C( -54), INT8_C( -60), INT8_C( 7), INT8_C( 2), INT8_C(-119), INT8_C( 111), INT8_C( -20), INT8_C( -58), INT8_C( -98), INT8_C( 83), INT8_C( 32), INT8_C( -49), INT8_C( -11), INT8_C( 48), INT8_C( 92), INT8_C( 47), INT8_C( -38), INT8_C( 11), INT8_C( 26), INT8_C( 90), INT8_C( -5), INT8_C( 73), INT8_C( 45), INT8_C( -2)), simde_mm512_set_epi8(INT8_C( -31), INT8_C( 126), INT8_C(-118), INT8_C( -57), INT8_C(-126), INT8_C( 29), INT8_C( 101), INT8_C( -15), INT8_C( -61), INT8_C( 118), INT8_C( 102), INT8_C( 12), INT8_C( -59), INT8_C( -41), INT8_C( -60), INT8_C( 46), INT8_C( 63), INT8_C( 78), INT8_C( 87), INT8_C( 18), INT8_C( 18), INT8_C( 32), INT8_C( -26), INT8_C( -6), INT8_C( -20), INT8_C(-123), INT8_C( 99), INT8_C( 65), INT8_C( 13), INT8_C( 25), INT8_C( 108), INT8_C(-121), INT8_C( 42), INT8_C( 0), INT8_C( 104), INT8_C( -17), INT8_C( 111), INT8_C(-110), INT8_C( -39), INT8_C( 125), INT8_C( -50), INT8_C( -47), INT8_C( 30), INT8_C( 92), INT8_C( 19), INT8_C( -8), INT8_C(-120), INT8_C( 127), INT8_C( -49), INT8_C( 23), INT8_C( 16), INT8_C( -64), INT8_C( -79), INT8_C( 116), INT8_C( -5), INT8_C( -50), INT8_C( 32), INT8_C( 22), INT8_C( -42), INT8_C( -3), INT8_C( 30), INT8_C( 64), INT8_C( 96), INT8_C( -66)), simde_mm512_set_epi8(INT8_C(-103), INT8_C(-124), INT8_C(-128), INT8_C( -80), INT8_C( -32), INT8_C( -19), INT8_C( 115), INT8_C( -49), INT8_C( -62), INT8_C( -51), INT8_C( 119), INT8_C( -67), INT8_C( -60), INT8_C( -3), INT8_C( -12), INT8_C( 84), INT8_C( 91), INT8_C( -64), INT8_C( 88), INT8_C( -43), INT8_C( -34), INT8_C( 31), INT8_C( 77), INT8_C(-128), INT8_C( -49), INT8_C( 48), INT8_C(-113), INT8_C(-114), INT8_C( 22), INT8_C( -10), INT8_C( -44), INT8_C( -66), INT8_C( 70), INT8_C( 61), INT8_C(-100), INT8_C( -91), INT8_C( 26), INT8_C( 11), INT8_C( -33), INT8_C( -37), INT8_C( 63), INT8_C( 13), INT8_C(-124), INT8_C( 41), INT8_C(-104), INT8_C( -59), INT8_C(-128), INT8_C( 69), INT8_C(-128), INT8_C( 106), INT8_C( 48), INT8_C(-113), INT8_C( 19), INT8_C( 58), INT8_C( -12), INT8_C( -3), INT8_C( -6), INT8_C( 59), INT8_C( 104), INT8_C( -40), INT8_C( -82), INT8_C( 42), INT8_C( 127), INT8_C( -68)) }, { simde_mm512_set_epi8(INT8_C( 117), INT8_C( 69), INT8_C( 121), INT8_C( -45), INT8_C( 30), INT8_C( -73), INT8_C( 2), INT8_C( -40), INT8_C( 95), INT8_C( -3), INT8_C( 16), INT8_C( -78), INT8_C(-128), INT8_C( -41), INT8_C( -66), INT8_C( 66), INT8_C( 93), INT8_C( -32), INT8_C( -50), INT8_C( -2), INT8_C( -61), INT8_C( 29), INT8_C( -88), INT8_C(-118), INT8_C( -27), INT8_C( 42), INT8_C( 78), INT8_C( -46), INT8_C( -79), INT8_C( 38), INT8_C( -75), INT8_C( 14), INT8_C(-118), INT8_C(-114), INT8_C(-120), INT8_C( 42), INT8_C( 4), INT8_C( 79), INT8_C( -84), INT8_C(-110), INT8_C( 0), INT8_C( 85), INT8_C( -20), INT8_C( 61), INT8_C( 40), INT8_C( -75), INT8_C( 69), INT8_C( -99), INT8_C( 73), INT8_C( -34), INT8_C( -14), INT8_C( 72), INT8_C( 101), INT8_C( 96), INT8_C(-114), INT8_C(-124), INT8_C( -87), INT8_C( 43), INT8_C(-118), INT8_C( 114), INT8_C( -88), INT8_C( 8), INT8_C( -52), INT8_C( 75)), UINT64_C(14890918166471265655), simde_mm512_set_epi8(INT8_C( -56), INT8_C( -70), INT8_C( 66), INT8_C(-111), INT8_C( -69), INT8_C( 15), INT8_C( -82), INT8_C( -12), INT8_C( -89), INT8_C( 37), INT8_C( 80), INT8_C( 120), INT8_C( -83), INT8_C(-120), INT8_C( 95), INT8_C( 21), INT8_C( 91), INT8_C( -97), INT8_C( -72), INT8_C( 42), INT8_C( 22), INT8_C( -70), INT8_C( 71), INT8_C( -78), INT8_C( -5), INT8_C( 52), INT8_C( -22), INT8_C( -34), INT8_C( 16), INT8_C( 92), INT8_C( 91), INT8_C( -72), INT8_C( 3), INT8_C( -31), INT8_C( -95), INT8_C( -56), INT8_C( -50), INT8_C( 68), INT8_C( -24), INT8_C( -50), INT8_C( 94), INT8_C( 67), INT8_C( 108), INT8_C(-118), INT8_C( -65), INT8_C( 31), INT8_C( 70), INT8_C( 108), INT8_C( -66), INT8_C( 107), INT8_C( -85), INT8_C( 38), INT8_C( 2), INT8_C( 32), INT8_C( 56), INT8_C( 66), INT8_C( -2), INT8_C( -74), INT8_C( 112), INT8_C( 10), INT8_C( 64), INT8_C( 100), INT8_C( -55), INT8_C( 83)), simde_mm512_set_epi8(INT8_C( -69), INT8_C( -45), INT8_C( 23), INT8_C( 70), INT8_C( 22), INT8_C( 113), INT8_C( 75), INT8_C(-117), INT8_C( 69), INT8_C( -74), INT8_C( 40), INT8_C( -39), INT8_C( -51), INT8_C( -91), INT8_C( 25), INT8_C( -4), INT8_C( -12), INT8_C( -13), INT8_C( -77), INT8_C( 75), INT8_C(-116), INT8_C( -36), INT8_C( 38), INT8_C( -52), INT8_C( 13), INT8_C( -52), INT8_C(-100), INT8_C( -46), INT8_C( 13), INT8_C( 83), INT8_C( -94), INT8_C( 102), INT8_C( 116), INT8_C( -60), INT8_C( 44), INT8_C( -7), INT8_C( 104), INT8_C( -50), INT8_C( -30), INT8_C( 17), INT8_C( 13), INT8_C(-107), INT8_C( 111), INT8_C( -13), INT8_C( 17), INT8_C( -45), INT8_C( -31), INT8_C( 76), INT8_C(-102), INT8_C( -71), INT8_C( 116), INT8_C( 72), INT8_C( -11), INT8_C( -97), INT8_C( 93), INT8_C( 48), INT8_C( -75), INT8_C( -90), INT8_C( -10), INT8_C( -78), INT8_C( -8), INT8_C( -10), INT8_C( 48), INT8_C( -11)), simde_mm512_set_epi8(INT8_C(-125), INT8_C(-115), INT8_C( 121), INT8_C( -45), INT8_C( -47), INT8_C( 127), INT8_C( -7), INT8_C( -40), INT8_C( -20), INT8_C( -3), INT8_C( 120), INT8_C( -78), INT8_C(-128), INT8_C(-128), INT8_C( 120), INT8_C( 17), INT8_C( 93), INT8_C( -32), INT8_C(-128), INT8_C( -2), INT8_C( -94), INT8_C( 29), INT8_C( 109), INT8_C(-128), INT8_C( -27), INT8_C( 42), INT8_C(-122), INT8_C( -46), INT8_C( 29), INT8_C( 127), INT8_C( -75), INT8_C( 30), INT8_C(-118), INT8_C( -91), INT8_C(-120), INT8_C( 42), INT8_C( 54), INT8_C( 79), INT8_C( -54), INT8_C( -33), INT8_C( 107), INT8_C( -40), INT8_C( 127), INT8_C( 61), INT8_C( -48), INT8_C( -14), INT8_C( 39), INT8_C( -99), INT8_C( 73), INT8_C( 36), INT8_C( -14), INT8_C( 72), INT8_C( -9), INT8_C( -65), INT8_C(-114), INT8_C( 114), INT8_C( -87), INT8_C(-128), INT8_C( 102), INT8_C( -68), INT8_C( -88), INT8_C( 90), INT8_C( -7), INT8_C( 72)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_adds_epi8(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_adds_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask64 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { UINT64_C(17286015531074160252), simde_mm512_set_epi8(INT8_C(-115), INT8_C( -27), INT8_C( 62), INT8_C( -85), INT8_C( 49), INT8_C(-115), INT8_C( 38), INT8_C( 4), INT8_C( 92), INT8_C( 116), INT8_C( -78), INT8_C( -19), INT8_C( -73), INT8_C( 22), INT8_C( -66), INT8_C( -29), INT8_C( 55), INT8_C( 78), INT8_C( -45), INT8_C(-119), INT8_C( 30), INT8_C( -29), INT8_C( -23), INT8_C( 89), INT8_C( 15), INT8_C( 119), INT8_C( -88), INT8_C( 109), INT8_C( -16), INT8_C( -10), INT8_C( 0), INT8_C( -18), INT8_C( 1), INT8_C( 116), INT8_C( 62), INT8_C( -73), INT8_C(-122), INT8_C( 0), INT8_C( 111), INT8_C( 95), INT8_C( -20), INT8_C( 3), INT8_C( 122), INT8_C(-121), INT8_C( 13), INT8_C( 25), INT8_C( -35), INT8_C(-127), INT8_C( 82), INT8_C( 90), INT8_C( 53), INT8_C( 123), INT8_C( 73), INT8_C( 108), INT8_C( -18), INT8_C( 15), INT8_C( 81), INT8_C( -27), INT8_C( 102), INT8_C( 118), INT8_C( 39), INT8_C( -77), INT8_C( 45), INT8_C( 81)), simde_mm512_set_epi8(INT8_C( 68), INT8_C(-108), INT8_C(-121), INT8_C(-115), INT8_C( -59), INT8_C( -78), INT8_C(-111), INT8_C( 56), INT8_C( 9), INT8_C(-121), INT8_C( 120), INT8_C( -36), INT8_C( 36), INT8_C(-103), INT8_C(-104), INT8_C( 25), INT8_C(-111), INT8_C( -63), INT8_C( -35), INT8_C(-120), INT8_C( 28), INT8_C( -44), INT8_C( -26), INT8_C( -86), INT8_C( -13), INT8_C( 53), INT8_C( -88), INT8_C(-107), INT8_C( 68), INT8_C( 42), INT8_C(-118), INT8_C( 111), INT8_C( 54), INT8_C( -58), INT8_C( -13), INT8_C( 27), INT8_C( 23), INT8_C( 41), INT8_C(-119), INT8_C( 44), INT8_C( 7), INT8_C(-120), INT8_C( 32), INT8_C( -43), INT8_C( 114), INT8_C( -72), INT8_C( 73), INT8_C( -96), INT8_C( 96), INT8_C( 110), INT8_C( -81), INT8_C( -76), INT8_C( 103), INT8_C(-100), INT8_C( -22), INT8_C( 18), INT8_C( 115), INT8_C( 54), INT8_C( -40), INT8_C( 125), INT8_C( 110), INT8_C( 31), INT8_C( 51), INT8_C(-104)), simde_mm512_set_epi8(INT8_C( -47), INT8_C(-128), INT8_C( -59), INT8_C( 0), INT8_C( -10), INT8_C(-128), INT8_C( -73), INT8_C( 60), INT8_C( 101), INT8_C( -5), INT8_C( 42), INT8_C( 0), INT8_C( 0), INT8_C( -81), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 15), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -49), INT8_C( 3), INT8_C( 2), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 52), INT8_C( 0), INT8_C(-118), INT8_C( 0), INT8_C( 0), INT8_C( 58), INT8_C( 0), INT8_C( 0), INT8_C( -99), INT8_C( 41), INT8_C( -8), INT8_C( 127), INT8_C( -13), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C( -47), INT8_C( 38), INT8_C(-128), INT8_C( 127), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 8), INT8_C( -40), INT8_C( 0), INT8_C( 0), INT8_C( 27), INT8_C( 62), INT8_C( 127), INT8_C( 127), INT8_C( -46), INT8_C( 0), INT8_C( 0)) }, { UINT64_C( 9031211210081693899), simde_mm512_set_epi8(INT8_C(-106), INT8_C( -18), INT8_C( 65), INT8_C(-104), INT8_C( 81), INT8_C( 38), INT8_C( -84), INT8_C( -2), INT8_C( -14), INT8_C( 85), INT8_C( -80), INT8_C( 80), INT8_C( 48), INT8_C( 93), INT8_C( 79), INT8_C( 127), INT8_C( 16), INT8_C( 41), INT8_C( 54), INT8_C(-116), INT8_C( 17), INT8_C( 42), INT8_C( -86), INT8_C( 38), INT8_C(-118), INT8_C( -56), INT8_C( 60), INT8_C( 19), INT8_C(-100), INT8_C(-107), INT8_C( 105), INT8_C( -76), INT8_C( 10), INT8_C( -9), INT8_C( -12), INT8_C( -56), INT8_C( -71), INT8_C( 96), INT8_C( 6), INT8_C( 24), INT8_C( 69), INT8_C( 73), INT8_C( -27), INT8_C( 3), INT8_C( -95), INT8_C( 50), INT8_C( -67), INT8_C( -39), INT8_C( 97), INT8_C( 62), INT8_C( 101), INT8_C( 56), INT8_C( -79), INT8_C( 83), INT8_C( -99), INT8_C( -56), INT8_C( -17), INT8_C( -56), INT8_C( -8), INT8_C( -16), INT8_C( 84), INT8_C( -95), INT8_C( -73), INT8_C( 54)), simde_mm512_set_epi8(INT8_C( 63), INT8_C( -51), INT8_C( 87), INT8_C( 32), INT8_C( -65), INT8_C( 55), INT8_C( 14), INT8_C( 81), INT8_C(-123), INT8_C(-100), INT8_C( -39), INT8_C( -44), INT8_C( 22), INT8_C( 112), INT8_C( 16), INT8_C( 15), INT8_C(-104), INT8_C(-102), INT8_C( 102), INT8_C( -95), INT8_C( 69), INT8_C( 13), INT8_C( -21), INT8_C( 45), INT8_C( 62), INT8_C(-120), INT8_C(-111), INT8_C( 32), INT8_C(-107), INT8_C( -30), INT8_C( 99), INT8_C( -64), INT8_C( 8), INT8_C( -42), INT8_C( 81), INT8_C( -34), INT8_C( -46), INT8_C( 26), INT8_C( 31), INT8_C( -2), INT8_C( 68), INT8_C( -7), INT8_C( -71), INT8_C( 46), INT8_C( -21), INT8_C( -73), INT8_C( 21), INT8_C( 83), INT8_C(-108), INT8_C( -97), INT8_C( -69), INT8_C( 73), INT8_C( 57), INT8_C( -37), INT8_C( 21), INT8_C( 82), INT8_C(-119), INT8_C(-126), INT8_C( 126), INT8_C( 91), INT8_C( 115), INT8_C( 31), INT8_C( -79), INT8_C( 28)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( -69), INT8_C( 127), INT8_C( -72), INT8_C( 16), INT8_C( 93), INT8_C( 0), INT8_C( 79), INT8_C( 0), INT8_C( -15), INT8_C( 0), INT8_C( 36), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( -61), INT8_C( 0), INT8_C( 0), INT8_C( 86), INT8_C( 55), INT8_C(-107), INT8_C( 0), INT8_C( -56), INT8_C( 0), INT8_C( -51), INT8_C( 51), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 122), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 66), INT8_C( -98), INT8_C( 49), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 44), INT8_C( -11), INT8_C( -35), INT8_C( 32), INT8_C( 127), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C(-128), INT8_C( 82)) }, { UINT64_C( 6828010367541473016), simde_mm512_set_epi8(INT8_C( 84), INT8_C( -96), INT8_C( 87), INT8_C( -22), INT8_C( -5), INT8_C( -3), INT8_C(-127), INT8_C( 41), INT8_C( 74), INT8_C( 72), INT8_C( -4), INT8_C( 28), INT8_C(-115), INT8_C( 93), INT8_C( 102), INT8_C( 44), INT8_C(-103), INT8_C( -29), INT8_C( -50), INT8_C( 48), INT8_C( -96), INT8_C( -50), INT8_C( 46), INT8_C( -65), INT8_C( 4), INT8_C( 43), INT8_C( -75), INT8_C( 97), INT8_C( -26), INT8_C(-103), INT8_C( 71), INT8_C(-107), INT8_C( 91), INT8_C( 45), INT8_C( -11), INT8_C( 47), INT8_C( 29), INT8_C( 25), INT8_C( 26), INT8_C( -9), INT8_C( 10), INT8_C( 36), INT8_C( -79), INT8_C( -53), INT8_C( 41), INT8_C( 1), INT8_C( -23), INT8_C( -63), INT8_C(-127), INT8_C( 68), INT8_C( 48), INT8_C( 36), INT8_C( 89), INT8_C(-112), INT8_C( -31), INT8_C( 120), INT8_C( 35), INT8_C( 62), INT8_C( -21), INT8_C(-114), INT8_C(-104), INT8_C( 57), INT8_C( 42), INT8_C(-111)), simde_mm512_set_epi8(INT8_C( 96), INT8_C( -64), INT8_C( -23), INT8_C( -46), INT8_C( -29), INT8_C( -71), INT8_C( 71), INT8_C( 90), INT8_C( 44), INT8_C( -92), INT8_C( -31), INT8_C( 26), INT8_C( -99), INT8_C( -53), INT8_C( 117), INT8_C( 18), INT8_C( -63), INT8_C( 45), INT8_C( 12), INT8_C( 24), INT8_C(-108), INT8_C( 18), INT8_C( -60), INT8_C( 28), INT8_C( 50), INT8_C( -11), INT8_C( -68), INT8_C( -31), INT8_C( 105), INT8_C(-106), INT8_C( 98), INT8_C( 51), INT8_C( 58), INT8_C( 103), INT8_C( 111), INT8_C(-127), INT8_C( 68), INT8_C( -56), INT8_C( 124), INT8_C(-119), INT8_C( 74), INT8_C( -62), INT8_C(-116), INT8_C( 37), INT8_C( -12), INT8_C( 114), INT8_C( 0), INT8_C( 61), INT8_C( 103), INT8_C( -4), INT8_C(-105), INT8_C( -68), INT8_C( 39), INT8_C(-100), INT8_C( -93), INT8_C( 11), INT8_C( -80), INT8_C( -19), INT8_C( -22), INT8_C( -39), INT8_C( 127), INT8_C( -38), INT8_C(-125), INT8_C(-111)), simde_mm512_set_epi8(INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( -68), INT8_C( -34), INT8_C( -74), INT8_C( -56), INT8_C( 0), INT8_C( 118), INT8_C( -20), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 62), INT8_C(-128), INT8_C( 16), INT8_C( -38), INT8_C( 72), INT8_C( 0), INT8_C( -32), INT8_C( -14), INT8_C( -37), INT8_C( 0), INT8_C( 32), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( 100), INT8_C( -80), INT8_C( 97), INT8_C( -31), INT8_C( 127), INT8_C(-128), INT8_C( 84), INT8_C( -26), INT8_C(-128), INT8_C( -16), INT8_C( 29), INT8_C( 115), INT8_C( -23), INT8_C( 0), INT8_C( 0), INT8_C( 64), INT8_C( -57), INT8_C( 0), INT8_C( 127), INT8_C(-128), INT8_C(-124), INT8_C( 0), INT8_C( -45), INT8_C( 43), INT8_C( -43), INT8_C(-128), INT8_C( 23), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { UINT64_C(18325019528117086600), simde_mm512_set_epi8(INT8_C( 122), INT8_C( -54), INT8_C( 50), INT8_C( 123), INT8_C( -31), INT8_C( -74), INT8_C( -64), INT8_C( 54), INT8_C( -81), INT8_C( 60), INT8_C( 31), INT8_C( -23), INT8_C( 108), INT8_C(-119), INT8_C( -92), INT8_C( -80), INT8_C( -30), INT8_C( -37), INT8_C( 51), INT8_C( -36), INT8_C( 8), INT8_C( 52), INT8_C( 97), INT8_C( 123), INT8_C( -49), INT8_C(-124), INT8_C( 95), INT8_C( -83), INT8_C( 70), INT8_C( -50), INT8_C( -61), INT8_C( 25), INT8_C( -97), INT8_C( 28), INT8_C( -58), INT8_C( 11), INT8_C( -14), INT8_C( 126), INT8_C( 81), INT8_C( 45), INT8_C( -23), INT8_C( 120), INT8_C( -83), INT8_C( -16), INT8_C( 7), INT8_C( 51), INT8_C( -57), INT8_C( -50), INT8_C( -21), INT8_C( 98), INT8_C( 88), INT8_C( 0), INT8_C( -66), INT8_C( 3), INT8_C( 124), INT8_C(-113), INT8_C( 50), INT8_C( 88), INT8_C( -85), INT8_C( -93), INT8_C( -44), INT8_C( -13), INT8_C( -94), INT8_C( 17)), simde_mm512_set_epi8(INT8_C( -13), INT8_C( -12), INT8_C(-124), INT8_C( 96), INT8_C( -13), INT8_C( -98), INT8_C( 99), INT8_C( -13), INT8_C( 56), INT8_C( -45), INT8_C(-108), INT8_C( -19), INT8_C(-124), INT8_C( -27), INT8_C( 22), INT8_C( 126), INT8_C(-106), INT8_C( -68), INT8_C( -60), INT8_C( 8), INT8_C( 60), INT8_C( 93), INT8_C( -33), INT8_C( -27), INT8_C( -7), INT8_C( 27), INT8_C(-122), INT8_C( -38), INT8_C( 23), INT8_C( 6), INT8_C( 45), INT8_C( -21), INT8_C( -23), INT8_C(-101), INT8_C( 116), INT8_C( 127), INT8_C( 96), INT8_C( 40), INT8_C( -97), INT8_C( 40), INT8_C( 86), INT8_C( -44), INT8_C( 70), INT8_C( -71), INT8_C( 62), INT8_C( -21), INT8_C( 66), INT8_C( 68), INT8_C( -87), INT8_C( -61), INT8_C( 48), INT8_C( -70), INT8_C( 18), INT8_C( -78), INT8_C( -98), INT8_C( 117), INT8_C( 74), INT8_C( 32), INT8_C( 93), INT8_C( 125), INT8_C( -47), INT8_C( -60), INT8_C( -86), INT8_C( 117)), simde_mm512_set_epi8(INT8_C( 109), INT8_C( -66), INT8_C( -74), INT8_C( 127), INT8_C( -44), INT8_C(-128), INT8_C( 35), INT8_C( 0), INT8_C( 0), INT8_C( 15), INT8_C( 0), INT8_C( 0), INT8_C( -16), INT8_C(-128), INT8_C( -70), INT8_C( 46), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 68), INT8_C( 127), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -27), INT8_C( 0), INT8_C( 93), INT8_C( 0), INT8_C( -16), INT8_C( 4), INT8_C(-120), INT8_C( 0), INT8_C( 58), INT8_C( 127), INT8_C( 0), INT8_C( 0), INT8_C( -16), INT8_C( 85), INT8_C( 63), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 9), INT8_C( 18), INT8_C(-108), INT8_C( 37), INT8_C( 127), INT8_C( 0), INT8_C( -48), INT8_C( 0), INT8_C( 0), INT8_C( 4), INT8_C( 124), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -91), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { UINT64_C( 7677112093376593822), simde_mm512_set_epi8(INT8_C( -53), INT8_C( 34), INT8_C( 9), INT8_C( 64), INT8_C( -61), INT8_C( -25), INT8_C(-115), INT8_C( 100), INT8_C( 65), INT8_C( 8), INT8_C( 69), INT8_C( -8), INT8_C( -15), INT8_C( -51), INT8_C( 1), INT8_C( 90), INT8_C( 115), INT8_C( 51), INT8_C( -91), INT8_C( 56), INT8_C( 64), INT8_C( -39), INT8_C(-119), INT8_C( -28), INT8_C( -54), INT8_C( 28), INT8_C( 54), INT8_C( -8), INT8_C( -54), INT8_C(-128), INT8_C( -28), INT8_C( -71), INT8_C( 107), INT8_C( -66), INT8_C(-114), INT8_C( -88), INT8_C( 34), INT8_C( -83), INT8_C( -21), INT8_C( -64), INT8_C( 121), INT8_C( -20), INT8_C( -89), INT8_C( -94), INT8_C( 112), INT8_C( -27), INT8_C( 81), INT8_C( -54), INT8_C( -64), INT8_C(-114), INT8_C( 48), INT8_C( -89), INT8_C( -61), INT8_C( 26), INT8_C( 43), INT8_C( 29), INT8_C( 0), INT8_C( 64), INT8_C( 123), INT8_C( -67), INT8_C( 15), INT8_C( 120), INT8_C( 36), INT8_C( 40)), simde_mm512_set_epi8(INT8_C( 113), INT8_C( 32), INT8_C( 71), INT8_C( 55), INT8_C( -26), INT8_C( 82), INT8_C( -81), INT8_C( -20), INT8_C( -55), INT8_C( 112), INT8_C( 45), INT8_C( 37), INT8_C( 67), INT8_C( -69), INT8_C( 64), INT8_C( 39), INT8_C( 72), INT8_C( 45), INT8_C( 120), INT8_C( -5), INT8_C(-109), INT8_C( 62), INT8_C( 17), INT8_C( 31), INT8_C( -30), INT8_C( -58), INT8_C( 56), INT8_C( 21), INT8_C( 72), INT8_C( -74), INT8_C( -40), INT8_C( 120), INT8_C( 95), INT8_C( 108), INT8_C( 32), INT8_C( 64), INT8_C(-128), INT8_C( 102), INT8_C( -21), INT8_C( 28), INT8_C( 105), INT8_C( 52), INT8_C( 85), INT8_C(-104), INT8_C( 57), INT8_C( -31), INT8_C( -38), INT8_C(-124), INT8_C(-107), INT8_C( -2), INT8_C( 55), INT8_C( 46), INT8_C( -71), INT8_C( 77), INT8_C( 18), INT8_C( 70), INT8_C( 89), INT8_C( 125), INT8_C( -42), INT8_C(-125), INT8_C( 121), INT8_C( -11), INT8_C( -69), INT8_C( -59)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 66), INT8_C( 80), INT8_C( 0), INT8_C( -87), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 10), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 52), INT8_C( 0), INT8_C( 65), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( 0), INT8_C( 51), INT8_C( 0), INT8_C( 23), INT8_C( 0), INT8_C( 0), INT8_C( -84), INT8_C( -30), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( -68), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -24), INT8_C( -94), INT8_C( 0), INT8_C( -42), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( 103), INT8_C( -43), INT8_C(-128), INT8_C( 103), INT8_C( 61), INT8_C( 99), INT8_C( 89), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 127), INT8_C( 109), INT8_C( -33), INT8_C( 0)) }, { UINT64_C( 4840581956081845689), simde_mm512_set_epi8(INT8_C( 122), INT8_C(-108), INT8_C( -70), INT8_C( -63), INT8_C( 56), INT8_C( 71), INT8_C( -14), INT8_C( 49), INT8_C( -73), INT8_C( 53), INT8_C( -29), INT8_C( 3), INT8_C( -73), INT8_C( 43), INT8_C( -22), INT8_C( 85), INT8_C( -26), INT8_C( -9), INT8_C( 66), INT8_C( 1), INT8_C( -13), INT8_C( 60), INT8_C(-119), INT8_C( -83), INT8_C(-122), INT8_C( -64), INT8_C( -83), INT8_C( -74), INT8_C( 119), INT8_C( -8), INT8_C( 12), INT8_C( 113), INT8_C( -12), INT8_C( -84), INT8_C( 6), INT8_C( 69), INT8_C( 2), INT8_C( -75), INT8_C( -34), INT8_C(-126), INT8_C( 3), INT8_C(-128), INT8_C( -9), INT8_C( 24), INT8_C( 11), INT8_C( -94), INT8_C( -32), INT8_C( 110), INT8_C( 33), INT8_C( -24), INT8_C( 125), INT8_C( 35), INT8_C(-103), INT8_C( -48), INT8_C( -22), INT8_C( 38), INT8_C( -81), INT8_C( 9), INT8_C( -11), INT8_C(-124), INT8_C( 71), INT8_C( 31), INT8_C( -42), INT8_C( 93)), simde_mm512_set_epi8(INT8_C( 53), INT8_C( 77), INT8_C( 63), INT8_C( -84), INT8_C( 27), INT8_C( 22), INT8_C( 3), INT8_C( -37), INT8_C( 65), INT8_C( 118), INT8_C(-126), INT8_C( 97), INT8_C( 109), INT8_C( 7), INT8_C(-114), INT8_C( -75), INT8_C(-125), INT8_C( 121), INT8_C(-128), INT8_C( 103), INT8_C( 0), INT8_C( 101), INT8_C( -41), INT8_C( 89), INT8_C( -83), INT8_C( -65), INT8_C( 9), INT8_C( -7), INT8_C( -63), INT8_C( 13), INT8_C( 105), INT8_C( 92), INT8_C( -18), INT8_C( -21), INT8_C(-102), INT8_C(-114), INT8_C( 74), INT8_C( 121), INT8_C( -45), INT8_C( 52), INT8_C( -63), INT8_C( -93), INT8_C( 98), INT8_C( 106), INT8_C(-109), INT8_C( -47), INT8_C( 37), INT8_C( 70), INT8_C( 100), INT8_C( 121), INT8_C( 18), INT8_C( 28), INT8_C(-117), INT8_C( 107), INT8_C( 3), INT8_C( -62), INT8_C( 42), INT8_C( 72), INT8_C( 91), INT8_C( 86), INT8_C( -72), INT8_C( 9), INT8_C( -80), INT8_C( 118)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( -31), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -11), INT8_C( 12), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 36), INT8_C( 50), INT8_C( 0), INT8_C( 10), INT8_C( 0), INT8_C( 0), INT8_C( -62), INT8_C( 104), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 6), INT8_C(-128), INT8_C( 0), INT8_C( -74), INT8_C( 0), INT8_C( 0), INT8_C( 5), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-105), INT8_C( -96), INT8_C( -45), INT8_C( 76), INT8_C( 46), INT8_C( -79), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 89), INT8_C( 0), INT8_C( -98), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 59), INT8_C( 0), INT8_C( -24), INT8_C( -39), INT8_C( 0), INT8_C( 80), INT8_C( -38), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 127)) }, { UINT64_C(17498311407133456191), simde_mm512_set_epi8(INT8_C( 61), INT8_C( -89), INT8_C( 84), INT8_C( -94), INT8_C( 31), INT8_C(-116), INT8_C( -67), INT8_C(-102), INT8_C( -72), INT8_C( -91), INT8_C(-105), INT8_C(-108), INT8_C( -44), INT8_C( 74), INT8_C( -28), INT8_C( 124), INT8_C( 120), INT8_C( -41), INT8_C( -79), INT8_C( 122), INT8_C( 87), INT8_C(-119), INT8_C( 54), INT8_C( -2), INT8_C( -47), INT8_C( 84), INT8_C(-126), INT8_C( -64), INT8_C( 14), INT8_C( 11), INT8_C( 37), INT8_C( -23), INT8_C( 67), INT8_C( 124), INT8_C( 58), INT8_C( -94), INT8_C( 30), INT8_C( -33), INT8_C( 70), INT8_C( -24), INT8_C( 38), INT8_C( -97), INT8_C( -56), INT8_C( -60), INT8_C( -59), INT8_C( 65), INT8_C( -74), INT8_C( 45), INT8_C( -11), INT8_C( 55), INT8_C( -82), INT8_C( 12), INT8_C( 106), INT8_C( 22), INT8_C(-124), INT8_C( -4), INT8_C( 2), INT8_C( -81), INT8_C( 14), INT8_C( 90), INT8_C(-100), INT8_C(-122), INT8_C( -35), INT8_C( 81)), simde_mm512_set_epi8(INT8_C( 21), INT8_C( 108), INT8_C( 125), INT8_C( -60), INT8_C( -72), INT8_C( 74), INT8_C( -5), INT8_C( -58), INT8_C( -41), INT8_C( 22), INT8_C(-115), INT8_C( 102), INT8_C( 59), INT8_C( -80), INT8_C( -15), INT8_C( -63), INT8_C( 84), INT8_C( -71), INT8_C( 8), INT8_C( 12), INT8_C( -11), INT8_C( -76), INT8_C( 62), INT8_C( 93), INT8_C( -75), INT8_C( -77), INT8_C( -84), INT8_C(-108), INT8_C( -35), INT8_C( 14), INT8_C( -60), INT8_C( 18), INT8_C( 23), INT8_C( -60), INT8_C( -63), INT8_C(-114), INT8_C( -55), INT8_C( 75), INT8_C( -99), INT8_C( -55), INT8_C( 58), INT8_C( 76), INT8_C(-102), INT8_C(-118), INT8_C( -30), INT8_C( 39), INT8_C( 119), INT8_C( 85), INT8_C( -8), INT8_C( -72), INT8_C( -60), INT8_C( -94), INT8_C(-112), INT8_C( 119), INT8_C( 124), INT8_C( 76), INT8_C( -42), INT8_C(-124), INT8_C( 54), INT8_C( 74), INT8_C( -92), INT8_C( 99), INT8_C( 79), INT8_C( -3)), simde_mm512_set_epi8(INT8_C( 82), INT8_C( 19), INT8_C( 127), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( -72), INT8_C( 0), INT8_C(-113), INT8_C( -69), INT8_C( 0), INT8_C( -6), INT8_C( 0), INT8_C( -6), INT8_C( -43), INT8_C( 0), INT8_C( 0), INT8_C(-112), INT8_C( -71), INT8_C( 127), INT8_C( 76), INT8_C(-128), INT8_C( 0), INT8_C( 91), INT8_C(-122), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -23), INT8_C( -5), INT8_C( 90), INT8_C( 64), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 42), INT8_C( -29), INT8_C( -79), INT8_C( 0), INT8_C( -21), INT8_C( 0), INT8_C(-128), INT8_C( -89), INT8_C( 0), INT8_C( 45), INT8_C( 0), INT8_C( -19), INT8_C( -17), INT8_C(-128), INT8_C( -82), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( 72), INT8_C( 0), INT8_C( 0), INT8_C( 68), INT8_C( 127), INT8_C(-128), INT8_C( -23), INT8_C( 44), INT8_C( 78)) }, { UINT64_C( 3462613925466476941), simde_mm512_set_epi8(INT8_C( 18), INT8_C( -74), INT8_C( 14), INT8_C( 36), INT8_C( -7), INT8_C( 113), INT8_C( 40), INT8_C( 48), INT8_C(-107), INT8_C( -34), INT8_C( -75), INT8_C( 85), INT8_C( -35), INT8_C(-116), INT8_C( 65), INT8_C( -21), INT8_C( 15), INT8_C( 3), INT8_C( 45), INT8_C( 21), INT8_C( 72), INT8_C( 93), INT8_C( 108), INT8_C( 125), INT8_C( 1), INT8_C( 75), INT8_C( 21), INT8_C( -36), INT8_C(-126), INT8_C( 122), INT8_C( 71), INT8_C( 76), INT8_C( 28), INT8_C( -56), INT8_C( 32), INT8_C( 101), INT8_C(-107), INT8_C(-111), INT8_C( -88), INT8_C( -19), INT8_C( -77), INT8_C( 19), INT8_C( -21), INT8_C(-111), INT8_C( -68), INT8_C( 82), INT8_C(-118), INT8_C( -76), INT8_C( 47), INT8_C( 127), INT8_C( 62), INT8_C( -16), INT8_C( 10), INT8_C( -14), INT8_C(-100), INT8_C( 86), INT8_C( 29), INT8_C( 107), INT8_C( 56), INT8_C( 21), INT8_C( 24), INT8_C( 68), INT8_C( -96), INT8_C( 64)), simde_mm512_set_epi8(INT8_C( -57), INT8_C( 94), INT8_C(-125), INT8_C(-124), INT8_C( 36), INT8_C( 57), INT8_C( 68), INT8_C( -52), INT8_C( 39), INT8_C( 50), INT8_C( -48), INT8_C( 94), INT8_C( 53), INT8_C( 11), INT8_C( 29), INT8_C( 65), INT8_C( -82), INT8_C(-119), INT8_C( 115), INT8_C( 68), INT8_C( -69), INT8_C( 118), INT8_C( 17), INT8_C( 8), INT8_C( -64), INT8_C( -53), INT8_C( -42), INT8_C( 126), INT8_C( 67), INT8_C( 50), INT8_C( -18), INT8_C( 76), INT8_C( -19), INT8_C( 123), INT8_C( -87), INT8_C( 106), INT8_C( -74), INT8_C( 44), INT8_C( 117), INT8_C( 103), INT8_C( 81), INT8_C( 122), INT8_C( 56), INT8_C( -10), INT8_C( 67), INT8_C( 79), INT8_C( 83), INT8_C( -38), INT8_C( -13), INT8_C( 43), INT8_C( 27), INT8_C( -97), INT8_C( 102), INT8_C( 126), INT8_C( 38), INT8_C( -62), INT8_C( -24), INT8_C( 117), INT8_C( -38), INT8_C( -93), INT8_C( -58), INT8_C(-124), INT8_C( -75), INT8_C( 10)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C(-111), INT8_C( -88), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 18), INT8_C(-105), INT8_C( 0), INT8_C( 44), INT8_C( -67), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( 3), INT8_C( 127), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( 0), INT8_C( 9), INT8_C( 67), INT8_C( -55), INT8_C( 127), INT8_C(-128), INT8_C( -67), INT8_C( 0), INT8_C( 84), INT8_C( 4), INT8_C( 127), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-113), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 24), INT8_C( 5), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -34), INT8_C( -56), INT8_C( 0), INT8_C( 74)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_adds_epi8(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_adds_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi16(INT16_C( 5952), INT16_C(-21138), INT16_C( 23605), INT16_C( -3799), INT16_C( 1987), INT16_C( 29254), INT16_C( -4184), INT16_C( 13346), INT16_C( 26203), INT16_C(-24373), INT16_C( 15487), INT16_C( 15569), INT16_C( 13821), INT16_C( -3315), INT16_C( 30667), INT16_C( 9811), INT16_C( 22769), INT16_C( 21964), INT16_C( -315), INT16_C(-15723), INT16_C( -9658), INT16_C(-27088), INT16_C( 18289), INT16_C( 13213), INT16_C(-12712), INT16_C( 23293), INT16_C( 12497), INT16_C( 16576), INT16_C(-19358), INT16_C(-11263), INT16_C( -3745), INT16_C( 7925)), simde_mm512_set_epi16(INT16_C(-22600), INT16_C( 9825), INT16_C( 27347), INT16_C( -4649), INT16_C( 4812), INT16_C( 27489), INT16_C( 14729), INT16_C(-20872), INT16_C(-24487), INT16_C( 23053), INT16_C( 28966), INT16_C(-23890), INT16_C( -9737), INT16_C(-24021), INT16_C( 12009), INT16_C(-16892), INT16_C(-11018), INT16_C( -7664), INT16_C( 22598), INT16_C( 9112), INT16_C( -468), INT16_C(-16246), INT16_C( 11633), INT16_C( 3402), INT16_C( 12940), INT16_C(-18453), INT16_C(-10463), INT16_C(-24372), INT16_C( 9722), INT16_C( 4912), INT16_C( 14086), INT16_C( -2976)), simde_mm512_set_epi16(INT16_C(-16648), INT16_C(-11313), INT16_C( 32767), INT16_C( -8448), INT16_C( 6799), INT16_C( 32767), INT16_C( 10545), INT16_C( -7526), INT16_C( 1716), INT16_C( -1320), INT16_C( 32767), INT16_C( -8321), INT16_C( 4084), INT16_C(-27336), INT16_C( 32767), INT16_C( -7081), INT16_C( 11751), INT16_C( 14300), INT16_C( 22283), INT16_C( -6611), INT16_C(-10126), INT16_C(-32768), INT16_C( 29922), INT16_C( 16615), INT16_C( 228), INT16_C( 4840), INT16_C( 2034), INT16_C( -7796), INT16_C( -9636), INT16_C( -6351), INT16_C( 10341), INT16_C( 4949)) }, { simde_mm512_set_epi16(INT16_C(-21870), INT16_C( -1966), INT16_C(-26101), INT16_C( 8905), INT16_C(-13069), INT16_C( 13555), INT16_C(-23554), INT16_C(-12947), INT16_C( 31779), INT16_C( 21013), INT16_C( 32688), INT16_C( 12512), INT16_C(-26721), INT16_C( 7598), INT16_C( 21566), INT16_C(-11203), INT16_C( 27021), INT16_C( -8539), INT16_C( -532), INT16_C(-17200), INT16_C(-18609), INT16_C(-30604), INT16_C( 6829), INT16_C(-13538), INT16_C( 21529), INT16_C( 29340), INT16_C( 23176), INT16_C( 10860), INT16_C( 31408), INT16_C( 3656), INT16_C(-30572), INT16_C( -4393)), simde_mm512_set_epi16(INT16_C( 29137), INT16_C( 29112), INT16_C(-21893), INT16_C( 14830), INT16_C( 28350), INT16_C( 21222), INT16_C( 26922), INT16_C( 26197), INT16_C(-25881), INT16_C( 28163), INT16_C(-19268), INT16_C( 10158), INT16_C(-29245), INT16_C( 16103), INT16_C(-23323), INT16_C( 12478), INT16_C( -6515), INT16_C( 24007), INT16_C(-28366), INT16_C(-32677), INT16_C(-27350), INT16_C(-26916), INT16_C(-11979), INT16_C( 13820), INT16_C(-22393), INT16_C(-20903), INT16_C( -9892), INT16_C(-22447), INT16_C(-14286), INT16_C( 3532), INT16_C( 19664), INT16_C(-26842)), simde_mm512_set_epi16(INT16_C( 7267), INT16_C( 27146), INT16_C(-32768), INT16_C( 23735), INT16_C( 15281), INT16_C( 32767), INT16_C( 3368), INT16_C( 13250), INT16_C( 5898), INT16_C( 32767), INT16_C( 13420), INT16_C( 22670), INT16_C(-32768), INT16_C( 23701), INT16_C( -1757), INT16_C( 1275), INT16_C( 20506), INT16_C( 15468), INT16_C(-28898), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C( -5150), INT16_C( 282), INT16_C( -864), INT16_C( 8437), INT16_C( 13284), INT16_C(-11587), INT16_C( 17122), INT16_C( 7188), INT16_C(-10908), INT16_C(-31235)) }, { simde_mm512_set_epi16(INT16_C(-22689), INT16_C( 25207), INT16_C( -9300), INT16_C( 28977), INT16_C( 2270), INT16_C(-32394), INT16_C( 26182), INT16_C( 23367), INT16_C( 31661), INT16_C( 9636), INT16_C(-18580), INT16_C(-28875), INT16_C(-25943), INT16_C( 14067), INT16_C( 10599), INT16_C( 30831), INT16_C(-23557), INT16_C( 11377), INT16_C( -4608), INT16_C( 14955), INT16_C( 11781), INT16_C(-12602), INT16_C( -3223), INT16_C( 11440), INT16_C( -2896), INT16_C(-24304), INT16_C( 29156), INT16_C( -4347), INT16_C( -2421), INT16_C( 31033), INT16_C( -3132), INT16_C(-19324)), simde_mm512_set_epi16(INT16_C( -2724), INT16_C( 4704), INT16_C( 23772), INT16_C(-20368), INT16_C(-30374), INT16_C( 15838), INT16_C( 19402), INT16_C( 16976), INT16_C(-28476), INT16_C( -2715), INT16_C(-27679), INT16_C( -8608), INT16_C(-20120), INT16_C( 29206), INT16_C( -1643), INT16_C(-29856), INT16_C( 18656), INT16_C( -9660), INT16_C( -4660), INT16_C(-30272), INT16_C( 610), INT16_C( -3787), INT16_C( 22038), INT16_C(-29446), INT16_C(-26504), INT16_C(-28984), INT16_C(-21830), INT16_C(-27507), INT16_C( 24148), INT16_C(-12945), INT16_C( 24529), INT16_C( 7913)), simde_mm512_set_epi16(INT16_C(-25413), INT16_C( 29911), INT16_C( 14472), INT16_C( 8609), INT16_C(-28104), INT16_C(-16556), INT16_C( 32767), INT16_C( 32767), INT16_C( 3185), INT16_C( 6921), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C( 8956), INT16_C( 975), INT16_C( -4901), INT16_C( 1717), INT16_C( -9268), INT16_C(-15317), INT16_C( 12391), INT16_C(-16389), INT16_C( 18815), INT16_C(-18006), INT16_C(-29400), INT16_C(-32768), INT16_C( 7326), INT16_C(-31854), INT16_C( 21727), INT16_C( 18088), INT16_C( 21397), INT16_C(-11411)) }, { simde_mm512_set_epi16(INT16_C(-18069), INT16_C( 21685), INT16_C( -5808), INT16_C( 24672), INT16_C( -8190), INT16_C( -2830), INT16_C(-23214), INT16_C( 1083), INT16_C( 6699), INT16_C( 871), INT16_C(-14072), INT16_C( -2337), INT16_C( 13356), INT16_C( 7260), INT16_C(-23788), INT16_C(-19723), INT16_C( 13480), INT16_C(-15429), INT16_C(-20153), INT16_C( 18680), INT16_C(-26180), INT16_C( 27923), INT16_C(-14099), INT16_C( 25829), INT16_C(-27481), INT16_C( 21639), INT16_C( 16654), INT16_C( 19972), INT16_C( 15776), INT16_C( 23126), INT16_C( 30799), INT16_C(-14043)), simde_mm512_set_epi16(INT16_C( 16111), INT16_C(-26103), INT16_C( -8800), INT16_C(-24747), INT16_C( 27941), INT16_C( 603), INT16_C( -8112), INT16_C(-28325), INT16_C( -6110), INT16_C(-15530), INT16_C( -938), INT16_C(-26515), INT16_C( -2492), INT16_C( 16124), INT16_C(-27181), INT16_C( -3573), INT16_C( 11652), INT16_C( 11646), INT16_C( 6964), INT16_C( 27675), INT16_C( 13103), INT16_C( -2742), INT16_C( 27906), INT16_C( -1077), INT16_C(-30636), INT16_C(-23050), INT16_C( 8363), INT16_C( 31350), INT16_C( -4750), INT16_C( -220), INT16_C( -8954), INT16_C( 15617)), simde_mm512_set_epi16(INT16_C( -1958), INT16_C( -4418), INT16_C(-14608), INT16_C( -75), INT16_C( 19751), INT16_C( -2227), INT16_C(-31326), INT16_C(-27242), INT16_C( 589), INT16_C(-14659), INT16_C(-15010), INT16_C(-28852), INT16_C( 10864), INT16_C( 23384), INT16_C(-32768), INT16_C(-23296), INT16_C( 25132), INT16_C( -3783), INT16_C(-13189), INT16_C( 32767), INT16_C(-13077), INT16_C( 25181), INT16_C( 13807), INT16_C( 24752), INT16_C(-32768), INT16_C( -1411), INT16_C( 25017), INT16_C( 32767), INT16_C( 11026), INT16_C( 22906), INT16_C( 21845), INT16_C( 1574)) }, { simde_mm512_set_epi16(INT16_C( 25180), INT16_C(-32332), INT16_C(-18604), INT16_C(-25763), INT16_C( 28237), INT16_C(-22027), INT16_C( 18301), INT16_C( 27599), INT16_C( 11815), INT16_C(-14839), INT16_C(-13552), INT16_C( -6300), INT16_C(-30211), INT16_C(-25552), INT16_C( 7375), INT16_C( 5267), INT16_C(-14761), INT16_C(-28375), INT16_C(-32756), INT16_C( 13128), INT16_C( 17683), INT16_C( -4430), INT16_C( -1805), INT16_C(-17763), INT16_C( 206), INT16_C( 15851), INT16_C( 1078), INT16_C( 27402), INT16_C(-15177), INT16_C( 5068), INT16_C(-15079), INT16_C(-13584)), simde_mm512_set_epi16(INT16_C( -3533), INT16_C( -3885), INT16_C( 789), INT16_C( -3755), INT16_C( 3668), INT16_C( 19955), INT16_C(-22356), INT16_C( 8403), INT16_C(-16920), INT16_C( 15195), INT16_C( -1853), INT16_C(-12121), INT16_C( 5095), INT16_C( -9909), INT16_C( 574), INT16_C( 31911), INT16_C( -1319), INT16_C( -2880), INT16_C(-31000), INT16_C(-30829), INT16_C(-12917), INT16_C( 21541), INT16_C( 27413), INT16_C(-28040), INT16_C(-19236), INT16_C(-24178), INT16_C(-29369), INT16_C( -3607), INT16_C( -2666), INT16_C(-13470), INT16_C( 4416), INT16_C( 23850)), simde_mm512_set_epi16(INT16_C( 21647), INT16_C(-32768), INT16_C(-17815), INT16_C(-29518), INT16_C( 31905), INT16_C( -2072), INT16_C( -4055), INT16_C( 32767), INT16_C( -5105), INT16_C( 356), INT16_C(-15405), INT16_C(-18421), INT16_C(-25116), INT16_C(-32768), INT16_C( 7949), INT16_C( 32767), INT16_C(-16080), INT16_C(-31255), INT16_C(-32768), INT16_C(-17701), INT16_C( 4766), INT16_C( 17111), INT16_C( 25608), INT16_C(-32768), INT16_C(-19030), INT16_C( -8327), INT16_C(-28291), INT16_C( 23795), INT16_C(-17843), INT16_C( -8402), INT16_C(-10663), INT16_C( 10266)) }, { simde_mm512_set_epi16(INT16_C( 11424), INT16_C( 29699), INT16_C( 29618), INT16_C(-29898), INT16_C(-23079), INT16_C( -6822), INT16_C( -7340), INT16_C(-12296), INT16_C( 17833), INT16_C( -2043), INT16_C(-29766), INT16_C( 30776), INT16_C(-18161), INT16_C(-32768), INT16_C(-31882), INT16_C( 3163), INT16_C( 20808), INT16_C(-24577), INT16_C( 7685), INT16_C( 6279), INT16_C(-27880), INT16_C( 27718), INT16_C(-18275), INT16_C( 24470), INT16_C( 8834), INT16_C( 20594), INT16_C( 23052), INT16_C(-18860), INT16_C( 7784), INT16_C(-16668), INT16_C(-30440), INT16_C( 6387)), simde_mm512_set_epi16(INT16_C( 15054), INT16_C(-23448), INT16_C( 866), INT16_C(-23509), INT16_C( -2), INT16_C( 6267), INT16_C(-15891), INT16_C(-24685), INT16_C( 17511), INT16_C( 7218), INT16_C(-13659), INT16_C(-27162), INT16_C( 10378), INT16_C(-16488), INT16_C(-11578), INT16_C(-27832), INT16_C(-17566), INT16_C(-29377), INT16_C( -2221), INT16_C( 18656), INT16_C( 7516), INT16_C(-30776), INT16_C(-19010), INT16_C( 26269), INT16_C(-18451), INT16_C( 31467), INT16_C(-22066), INT16_C( 30155), INT16_C( -3394), INT16_C( -6131), INT16_C( 13533), INT16_C( 21770)), simde_mm512_set_epi16(INT16_C( 26478), INT16_C( 6251), INT16_C( 30484), INT16_C(-32768), INT16_C(-23081), INT16_C( -555), INT16_C(-23231), INT16_C(-32768), INT16_C( 32767), INT16_C( 5175), INT16_C(-32768), INT16_C( 3614), INT16_C( -7783), INT16_C(-32768), INT16_C(-32768), INT16_C(-24669), INT16_C( 3242), INT16_C(-32768), INT16_C( 5464), INT16_C( 24935), INT16_C(-20364), INT16_C( -3058), INT16_C(-32768), INT16_C( 32767), INT16_C( -9617), INT16_C( 32767), INT16_C( 986), INT16_C( 11295), INT16_C( 4390), INT16_C(-22799), INT16_C(-16907), INT16_C( 28157)) }, { simde_mm512_set_epi16(INT16_C( 11108), INT16_C( 5360), INT16_C( 8246), INT16_C( 2034), INT16_C( 1485), INT16_C( 28421), INT16_C( 17428), INT16_C(-13900), INT16_C( 4889), INT16_C( 17148), INT16_C( 13806), INT16_C(-26968), INT16_C( -7783), INT16_C( 11345), INT16_C( 17472), INT16_C( 26483), INT16_C(-10340), INT16_C( -7816), INT16_C( 22407), INT16_C( 32028), INT16_C( 18301), INT16_C(-30148), INT16_C( -4736), INT16_C(-10574), INT16_C( 10686), INT16_C( -5463), INT16_C( 19858), INT16_C( -4320), INT16_C(-28154), INT16_C( 8984), INT16_C( 32741), INT16_C( 29052)), simde_mm512_set_epi16(INT16_C(-22772), INT16_C(-14596), INT16_C( -9458), INT16_C( 2054), INT16_C(-24090), INT16_C( 1808), INT16_C( 9897), INT16_C( 20003), INT16_C( 21444), INT16_C( 8548), INT16_C( -1773), INT16_C( 8905), INT16_C( 14811), INT16_C( 5752), INT16_C(-17876), INT16_C( -7032), INT16_C(-19669), INT16_C(-30631), INT16_C(-19220), INT16_C( 18906), INT16_C( 15935), INT16_C( 2767), INT16_C(-30937), INT16_C( 13780), INT16_C( 28270), INT16_C( 29263), INT16_C( 11106), INT16_C( 19546), INT16_C( 16947), INT16_C(-29470), INT16_C( -1882), INT16_C(-19990)), simde_mm512_set_epi16(INT16_C(-11664), INT16_C( -9236), INT16_C( -1212), INT16_C( 4088), INT16_C(-22605), INT16_C( 30229), INT16_C( 27325), INT16_C( 6103), INT16_C( 26333), INT16_C( 25696), INT16_C( 12033), INT16_C(-18063), INT16_C( 7028), INT16_C( 17097), INT16_C( -404), INT16_C( 19451), INT16_C(-30009), INT16_C(-32768), INT16_C( 3187), INT16_C( 32767), INT16_C( 32767), INT16_C(-27381), INT16_C(-32768), INT16_C( 3206), INT16_C( 32767), INT16_C( 23800), INT16_C( 30964), INT16_C( 15226), INT16_C(-11207), INT16_C(-20486), INT16_C( 30859), INT16_C( 9062)) }, { simde_mm512_set_epi16(INT16_C(-30561), INT16_C( -5745), INT16_C( 32288), INT16_C( 11324), INT16_C(-18220), INT16_C(-24874), INT16_C(-10154), INT16_C( 15837), INT16_C( 22748), INT16_C( 6197), INT16_C( 23429), INT16_C(-23495), INT16_C( -9809), INT16_C(-19367), INT16_C(-10631), INT16_C( -3845), INT16_C( -5637), INT16_C(-22436), INT16_C( 6126), INT16_C( 16975), INT16_C( 5830), INT16_C( 26963), INT16_C(-12238), INT16_C( 18631), INT16_C(-23641), INT16_C(-17151), INT16_C(-13103), INT16_C( 5981), INT16_C( 25710), INT16_C( 32081), INT16_C(-26911), INT16_C(-20494)), simde_mm512_set_epi16(INT16_C( 30869), INT16_C(-18069), INT16_C( 6380), INT16_C(-23760), INT16_C( -6614), INT16_C( 32014), INT16_C( -2152), INT16_C( 25253), INT16_C( -8172), INT16_C(-22416), INT16_C( 2949), INT16_C( 5712), INT16_C( 30074), INT16_C( 20521), INT16_C( 28471), INT16_C( -3867), INT16_C( -3892), INT16_C( 21304), INT16_C( 2611), INT16_C( 5827), INT16_C(-30009), INT16_C( 31479), INT16_C(-19390), INT16_C( 1047), INT16_C(-19967), INT16_C( 11404), INT16_C( 25909), INT16_C( 6741), INT16_C( 1933), INT16_C(-26599), INT16_C(-25469), INT16_C(-30351)), simde_mm512_set_epi16(INT16_C( 308), INT16_C(-23814), INT16_C( 32767), INT16_C(-12436), INT16_C(-24834), INT16_C( 7140), INT16_C(-12306), INT16_C( 32767), INT16_C( 14576), INT16_C(-16219), INT16_C( 26378), INT16_C(-17783), INT16_C( 20265), INT16_C( 1154), INT16_C( 17840), INT16_C( -7712), INT16_C( -9529), INT16_C( -1132), INT16_C( 8737), INT16_C( 22802), INT16_C(-24179), INT16_C( 32767), INT16_C(-31628), INT16_C( 19678), INT16_C(-32768), INT16_C( -5747), INT16_C( 12806), INT16_C( 12722), INT16_C( 27643), INT16_C( 5482), INT16_C(-32768), INT16_C(-32768)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_adds_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m512i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_adds_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t src[32]; const simde__mmask32 k; const int16_t a[32]; const int16_t b[32]; const int16_t r[32]; } test_vec[] = { { { INT16_C( 72), -INT16_C( 5287), -INT16_C( 4542), INT16_C( 2288), -INT16_C( 8745), INT16_C( 27056), -INT16_C( 11265), -INT16_C( 27298), -INT16_C( 32448), -INT16_C( 11055), -INT16_C( 28733), INT16_C( 7301), INT16_C( 28274), -INT16_C( 27095), -INT16_C( 1294), INT16_C( 15311), INT16_C( 10490), INT16_C( 15398), INT16_C( 5654), -INT16_C( 4795), -INT16_C( 2572), -INT16_C( 3241), -INT16_C( 19000), INT16_C( 2185), INT16_C( 23095), -INT16_C( 1315), INT16_C( 25321), INT16_C( 23574), INT16_C( 16336), -INT16_C( 15374), -INT16_C( 16071), INT16_C( 13566) }, UINT32_C( 7349481), { -INT16_C( 19142), INT16_C( 12013), INT16_C( 17579), INT16_C( 29474), -INT16_C( 21510), INT16_C( 12668), INT16_C( 22789), -INT16_C( 4565), INT16_C( 17083), -INT16_C( 29878), INT16_C( 15489), -INT16_C( 17586), INT16_C( 19709), -INT16_C( 6161), INT16_C( 24432), -INT16_C( 21529), -INT16_C( 11243), -INT16_C( 16167), -INT16_C( 1255), INT16_C( 4915), -INT16_C( 20570), -INT16_C( 21692), INT16_C( 28424), -INT16_C( 15462), -INT16_C( 6991), INT16_C( 13135), -INT16_C( 25311), INT16_C( 7918), -INT16_C( 8726), INT16_C( 23045), -INT16_C( 5060), INT16_C( 20741) }, { -INT16_C( 8255), -INT16_C( 9711), INT16_C( 17882), -INT16_C( 32275), INT16_C( 12788), -INT16_C( 724), -INT16_C( 14688), INT16_C( 21184), INT16_C( 4011), -INT16_C( 13179), INT16_C( 29613), -INT16_C( 26646), -INT16_C( 4016), -INT16_C( 29455), -INT16_C( 2084), -INT16_C( 25122), -INT16_C( 4138), -INT16_C( 20361), INT16_C( 25652), INT16_C( 10545), INT16_C( 24213), INT16_C( 13862), -INT16_C( 6620), -INT16_C( 12408), INT16_C( 3574), -INT16_C( 23653), -INT16_C( 31104), -INT16_C( 12230), INT16_C( 11126), INT16_C( 21084), INT16_C( 14882), -INT16_C( 1808) }, { -INT16_C( 27397), -INT16_C( 5287), -INT16_C( 4542), -INT16_C( 2801), -INT16_C( 8745), INT16_C( 11944), INT16_C( 8101), INT16_C( 16619), -INT16_C( 32448), -INT16_C( 11055), INT16_MAX, INT16_C( 7301), INT16_C( 28274), INT16_MIN, -INT16_C( 1294), INT16_C( 15311), INT16_C( 10490), INT16_C( 15398), INT16_C( 5654), -INT16_C( 4795), INT16_C( 3643), -INT16_C( 7830), INT16_C( 21804), INT16_C( 2185), INT16_C( 23095), -INT16_C( 1315), INT16_C( 25321), INT16_C( 23574), INT16_C( 16336), -INT16_C( 15374), -INT16_C( 16071), INT16_C( 13566) } }, { { INT16_C( 26410), INT16_C( 24233), -INT16_C( 9524), INT16_C( 24967), -INT16_C( 21192), INT16_C( 23959), INT16_C( 8084), -INT16_C( 30164), -INT16_C( 14292), -INT16_C( 21459), INT16_C( 26446), -INT16_C( 15236), -INT16_C( 9838), -INT16_C( 19178), INT16_C( 1555), INT16_C( 15789), INT16_C( 22126), INT16_C( 15004), INT16_C( 9009), INT16_C( 27035), INT16_C( 13265), INT16_C( 26054), -INT16_C( 3246), INT16_C( 32751), INT16_C( 7355), INT16_C( 2347), -INT16_C( 22397), INT16_C( 5581), -INT16_C( 7295), -INT16_C( 27446), INT16_C( 30954), INT16_C( 22738) }, UINT32_C(4287786702), { INT16_C( 11665), INT16_C( 25193), INT16_C( 12128), -INT16_C( 19513), -INT16_C( 18910), -INT16_C( 8910), INT16_C( 24018), INT16_C( 21990), -INT16_C( 19707), -INT16_C( 31125), INT16_C( 13719), -INT16_C( 32485), -INT16_C( 4691), INT16_C( 31961), INT16_C( 27483), -INT16_C( 4997), -INT16_C( 7016), -INT16_C( 1713), INT16_C( 5652), INT16_C( 13996), -INT16_C( 8499), -INT16_C( 24812), -INT16_C( 1477), INT16_C( 16885), INT16_C( 24750), INT16_C( 17863), -INT16_C( 7531), INT16_C( 17350), -INT16_C( 24625), INT16_C( 10943), INT16_C( 14858), -INT16_C( 24041) }, { INT16_C( 26143), INT16_C( 13211), INT16_C( 18300), INT16_C( 18793), INT16_C( 32037), INT16_C( 25065), -INT16_C( 8584), INT16_C( 9890), INT16_C( 26942), -INT16_C( 11413), INT16_C( 12620), INT16_C( 6934), -INT16_C( 10800), -INT16_C( 9658), INT16_C( 23824), INT16_C( 12156), INT16_C( 6339), INT16_C( 16226), -INT16_C( 13473), -INT16_C( 31351), INT16_C( 29257), -INT16_C( 15898), -INT16_C( 30640), -INT16_C( 28953), INT16_C( 21233), INT16_C( 15713), INT16_C( 30851), INT16_C( 21337), -INT16_C( 24755), INT16_C( 23853), -INT16_C( 22020), -INT16_C( 16500) }, { INT16_C( 26410), INT16_MAX, INT16_C( 30428), -INT16_C( 720), -INT16_C( 21192), INT16_C( 23959), INT16_C( 15434), INT16_C( 31880), -INT16_C( 14292), INT16_MIN, INT16_C( 26339), -INT16_C( 25551), -INT16_C( 9838), INT16_C( 22303), INT16_MAX, INT16_C( 15789), INT16_C( 22126), INT16_C( 14513), INT16_C( 9009), INT16_C( 27035), INT16_C( 20758), INT16_C( 26054), -INT16_C( 3246), -INT16_C( 12068), INT16_MAX, INT16_MAX, INT16_C( 23320), INT16_MAX, INT16_MIN, INT16_MAX, -INT16_C( 7162), INT16_MIN } }, { { -INT16_C( 4415), INT16_C( 8702), -INT16_C( 30790), INT16_C( 934), -INT16_C( 29447), INT16_C( 18884), -INT16_C( 21740), INT16_C( 1495), INT16_C( 14845), -INT16_C( 32701), -INT16_C( 25423), -INT16_C( 301), INT16_C( 59), INT16_C( 14172), -INT16_C( 5975), INT16_C( 27638), -INT16_C( 2857), -INT16_C( 28276), INT16_C( 12924), INT16_C( 30100), INT16_C( 22718), -INT16_C( 11585), -INT16_C( 27133), INT16_C( 215), INT16_C( 6863), -INT16_C( 32640), INT16_C( 21430), -INT16_C( 3713), -INT16_C( 9389), -INT16_C( 984), INT16_C( 7875), -INT16_C( 26009) }, UINT32_C(2402022163), { -INT16_C( 16603), -INT16_C( 7420), -INT16_C( 15593), INT16_C( 6837), -INT16_C( 29350), INT16_C( 10522), -INT16_C( 25945), INT16_C( 24234), INT16_C( 10733), INT16_C( 16463), INT16_C( 30724), -INT16_C( 14531), -INT16_C( 23402), -INT16_C( 22174), -INT16_C( 29288), -INT16_C( 17096), INT16_C( 15693), INT16_C( 25761), INT16_C( 22016), INT16_C( 23167), -INT16_C( 26141), -INT16_C( 29820), INT16_C( 11828), INT16_C( 8681), INT16_C( 14423), INT16_C( 23394), -INT16_C( 24656), INT16_C( 18210), -INT16_C( 31677), -INT16_C( 9232), INT16_C( 10514), INT16_C( 24473) }, { INT16_C( 14950), INT16_C( 26307), INT16_C( 17040), INT16_C( 29889), INT16_C( 17884), INT16_C( 4351), -INT16_C( 6029), -INT16_C( 13775), -INT16_C( 27872), -INT16_C( 11995), INT16_C( 18226), INT16_C( 30232), INT16_C( 2252), -INT16_C( 8623), -INT16_C( 5583), -INT16_C( 26819), INT16_C( 36), -INT16_C( 18946), -INT16_C( 16573), INT16_C( 7977), INT16_C( 10244), INT16_C( 30511), INT16_C( 24592), INT16_C( 12353), INT16_C( 26356), INT16_C( 9729), INT16_C( 6573), INT16_C( 31132), -INT16_C( 4574), INT16_C( 21335), -INT16_C( 27432), -INT16_C( 533) }, { -INT16_C( 1653), INT16_C( 18887), -INT16_C( 30790), INT16_C( 934), -INT16_C( 11466), INT16_C( 18884), -INT16_C( 21740), INT16_C( 1495), -INT16_C( 17139), INT16_C( 4468), -INT16_C( 25423), -INT16_C( 301), -INT16_C( 21150), -INT16_C( 30797), INT16_MIN, INT16_MIN, INT16_C( 15729), INT16_C( 6815), INT16_C( 12924), INT16_C( 31144), INT16_C( 22718), INT16_C( 691), -INT16_C( 27133), INT16_C( 215), INT16_MAX, INT16_MAX, -INT16_C( 18083), INT16_MAX, -INT16_C( 9389), -INT16_C( 984), INT16_C( 7875), INT16_C( 23940) } }, { { -INT16_C( 5739), -INT16_C( 10062), -INT16_C( 9304), -INT16_C( 21257), INT16_C( 9731), INT16_C( 4899), INT16_C( 25734), INT16_C( 31299), INT16_C( 17866), INT16_C( 30625), INT16_C( 15710), -INT16_C( 32527), INT16_C( 18475), INT16_C( 1236), -INT16_C( 16419), INT16_C( 29185), -INT16_C( 19544), INT16_C( 20554), INT16_C( 16782), -INT16_C( 28164), INT16_C( 8039), -INT16_C( 4700), -INT16_C( 6269), INT16_C( 19816), INT16_C( 2348), -INT16_C( 29756), -INT16_C( 19130), INT16_C( 29195), -INT16_C( 8194), -INT16_C( 9354), INT16_C( 30622), INT16_C( 17997) }, UINT32_C(3096876842), { -INT16_C( 27944), INT16_C( 16201), -INT16_C( 4687), INT16_C( 13356), -INT16_C( 27436), INT16_C( 385), INT16_C( 18077), -INT16_C( 7028), -INT16_C( 26629), -INT16_C( 1706), -INT16_C( 13193), INT16_C( 5588), INT16_C( 8515), INT16_C( 27996), -INT16_C( 3400), -INT16_C( 28635), INT16_C( 28293), INT16_C( 14031), -INT16_C( 933), INT16_C( 12139), -INT16_C( 4976), INT16_C( 11824), -INT16_C( 17358), INT16_C( 11794), INT16_C( 26708), -INT16_C( 13529), -INT16_C( 972), INT16_C( 30688), INT16_C( 15389), -INT16_C( 10524), INT16_C( 2351), -INT16_C( 19354) }, { INT16_C( 13943), -INT16_C( 11542), INT16_C( 21810), -INT16_C( 15871), INT16_C( 12866), INT16_C( 29936), INT16_C( 750), INT16_C( 17058), -INT16_C( 13718), -INT16_C( 25075), -INT16_C( 4410), -INT16_C( 7403), -INT16_C( 1750), INT16_C( 22969), INT16_C( 8194), INT16_C( 30989), -INT16_C( 1962), -INT16_C( 30645), INT16_C( 19789), -INT16_C( 28854), INT16_C( 15231), INT16_C( 27908), -INT16_C( 22979), -INT16_C( 22352), -INT16_C( 17040), INT16_C( 13894), INT16_C( 23723), -INT16_C( 10726), -INT16_C( 11435), INT16_C( 22575), INT16_C( 15859), INT16_C( 18897) }, { -INT16_C( 5739), INT16_C( 4659), -INT16_C( 9304), -INT16_C( 2515), INT16_C( 9731), INT16_C( 30321), INT16_C( 25734), INT16_C( 31299), INT16_MIN, -INT16_C( 26781), -INT16_C( 17603), -INT16_C( 32527), INT16_C( 6765), INT16_C( 1236), -INT16_C( 16419), INT16_C( 2354), -INT16_C( 19544), -INT16_C( 16614), INT16_C( 18856), -INT16_C( 28164), INT16_C( 10255), -INT16_C( 4700), -INT16_C( 6269), -INT16_C( 10558), INT16_C( 2348), -INT16_C( 29756), -INT16_C( 19130), INT16_C( 19962), INT16_C( 3954), INT16_C( 12051), INT16_C( 30622), -INT16_C( 457) } }, { { INT16_C( 7477), -INT16_C( 32047), INT16_C( 7274), -INT16_C( 5870), INT16_C( 5719), -INT16_C( 27562), INT16_C( 1724), INT16_C( 11580), -INT16_C( 31804), INT16_C( 28515), INT16_C( 32223), INT16_C( 13381), INT16_C( 30033), INT16_C( 17548), INT16_C( 24242), -INT16_C( 6258), INT16_C( 24443), -INT16_C( 6807), INT16_C( 31611), -INT16_C( 11570), INT16_C( 9361), INT16_C( 20071), -INT16_C( 23765), -INT16_C( 4229), -INT16_C( 8666), INT16_C( 1374), -INT16_C( 23460), -INT16_C( 21190), -INT16_C( 14823), -INT16_C( 13327), INT16_C( 32548), -INT16_C( 24654) }, UINT32_C(1518607327), { INT16_C( 21143), INT16_C( 10285), -INT16_C( 27529), -INT16_C( 23946), -INT16_C( 3785), INT16_C( 24209), -INT16_C( 4144), INT16_C( 11363), -INT16_C( 25197), -INT16_C( 21287), -INT16_C( 13724), -INT16_C( 30601), INT16_C( 10570), INT16_C( 10536), -INT16_C( 21435), -INT16_C( 9085), -INT16_C( 20225), INT16_C( 30212), INT16_C( 31556), INT16_C( 31768), -INT16_C( 22164), INT16_C( 15578), INT16_C( 15768), INT16_C( 11368), INT16_C( 16859), INT16_C( 16344), INT16_C( 20492), INT16_C( 22215), -INT16_C( 4231), -INT16_C( 16769), INT16_C( 668), -INT16_C( 25702) }, { -INT16_C( 24653), -INT16_C( 2287), INT16_C( 10522), -INT16_C( 31117), INT16_C( 19922), INT16_C( 27331), INT16_C( 11147), INT16_C( 26262), INT16_C( 28525), INT16_C( 31141), INT16_C( 27839), INT16_C( 14543), INT16_C( 20060), -INT16_C( 1801), -INT16_C( 28336), INT16_C( 915), -INT16_C( 23504), INT16_C( 19195), INT16_C( 28365), -INT16_C( 24623), -INT16_C( 27460), INT16_C( 18185), -INT16_C( 24385), INT16_C( 11437), INT16_C( 21007), -INT16_C( 12635), INT16_C( 29886), INT16_C( 6662), -INT16_C( 574), INT16_C( 4882), -INT16_C( 23153), -INT16_C( 16618) }, { -INT16_C( 3510), INT16_C( 7998), -INT16_C( 17007), INT16_MIN, INT16_C( 16137), -INT16_C( 27562), INT16_C( 7003), INT16_MAX, INT16_C( 3328), INT16_C( 9854), INT16_C( 32223), -INT16_C( 16058), INT16_C( 30630), INT16_C( 17548), INT16_C( 24242), -INT16_C( 6258), INT16_C( 24443), -INT16_C( 6807), INT16_MAX, -INT16_C( 11570), INT16_C( 9361), INT16_C( 20071), -INT16_C( 23765), INT16_C( 22805), -INT16_C( 8666), INT16_C( 3709), -INT16_C( 23460), INT16_C( 28877), -INT16_C( 4805), -INT16_C( 13327), -INT16_C( 22485), -INT16_C( 24654) } }, { { INT16_C( 4425), INT16_C( 5642), -INT16_C( 9344), INT16_C( 15541), -INT16_C( 16529), INT16_C( 11907), INT16_C( 12383), INT16_C( 28251), INT16_C( 130), INT16_C( 16444), INT16_C( 17013), INT16_C( 14171), INT16_C( 27968), -INT16_C( 12470), INT16_C( 24851), INT16_C( 23694), -INT16_C( 26510), -INT16_C( 3469), INT16_C( 10355), -INT16_C( 7634), -INT16_C( 19993), INT16_C( 17937), INT16_C( 27873), INT16_C( 25524), -INT16_C( 3988), -INT16_C( 7772), -INT16_C( 205), INT16_C( 29465), INT16_C( 25452), INT16_C( 32578), -INT16_C( 12092), INT16_C( 14300) }, UINT32_C(3693694825), { INT16_C( 22647), INT16_C( 24511), -INT16_C( 12279), -INT16_C( 5211), INT16_C( 23100), -INT16_C( 22450), -INT16_C( 3510), INT16_C( 32138), -INT16_C( 23567), INT16_C( 24304), INT16_C( 12806), -INT16_C( 13347), -INT16_C( 18173), INT16_C( 27650), INT16_C( 11016), -INT16_C( 32696), INT16_C( 1923), -INT16_C( 29217), -INT16_C( 31529), INT16_C( 4984), -INT16_C( 14626), INT16_C( 10684), INT16_C( 18105), -INT16_C( 21850), -INT16_C( 26647), -INT16_C( 4344), -INT16_C( 6455), -INT16_C( 13126), -INT16_C( 17249), -INT16_C( 22472), -INT16_C( 32280), INT16_C( 27432) }, { INT16_C( 1928), INT16_C( 24824), INT16_C( 28811), INT16_C( 27251), INT16_C( 12087), -INT16_C( 3949), INT16_C( 14709), INT16_C( 24218), -INT16_C( 23600), -INT16_C( 26034), INT16_C( 2185), INT16_C( 10342), -INT16_C( 24635), -INT16_C( 21040), -INT16_C( 2016), -INT16_C( 22504), INT16_C( 4607), -INT16_C( 29944), INT16_C( 31873), -INT16_C( 18187), -INT16_C( 30549), INT16_C( 8616), INT16_C( 17345), -INT16_C( 28033), -INT16_C( 12826), INT16_C( 28460), -INT16_C( 27946), -INT16_C( 25705), INT16_C( 26673), INT16_C( 20808), INT16_C( 24672), INT16_C( 24826) }, { INT16_C( 24575), INT16_C( 5642), -INT16_C( 9344), INT16_C( 22040), -INT16_C( 16529), -INT16_C( 26399), INT16_C( 11199), INT16_C( 28251), INT16_MIN, -INT16_C( 1730), INT16_C( 14991), -INT16_C( 3005), INT16_C( 27968), -INT16_C( 12470), INT16_C( 9000), INT16_C( 23694), INT16_C( 6530), -INT16_C( 3469), INT16_C( 10355), -INT16_C( 13203), -INT16_C( 19993), INT16_C( 19300), INT16_C( 27873), INT16_C( 25524), -INT16_C( 3988), -INT16_C( 7772), INT16_MIN, INT16_MIN, INT16_C( 9424), INT16_C( 32578), -INT16_C( 7608), INT16_MAX } }, { { INT16_C( 625), -INT16_C( 3093), -INT16_C( 8066), INT16_C( 10923), INT16_C( 21608), INT16_C( 10571), -INT16_C( 13673), INT16_C( 32187), -INT16_C( 6248), INT16_C( 28396), -INT16_C( 31878), -INT16_C( 21751), INT16_C( 20971), INT16_C( 19709), -INT16_C( 2127), INT16_C( 9132), -INT16_C( 26631), INT16_C( 30742), -INT16_C( 16009), -INT16_C( 8286), -INT16_C( 4843), -INT16_C( 21496), -INT16_C( 15177), INT16_C( 20265), INT16_C( 5547), INT16_C( 9661), -INT16_C( 14695), -INT16_C( 31535), -INT16_C( 12777), -INT16_C( 13872), INT16_C( 31941), -INT16_C( 16660) }, UINT32_C(2318795283), { -INT16_C( 10045), -INT16_C( 9879), INT16_C( 29381), INT16_C( 32133), -INT16_C( 20682), -INT16_C( 7732), -INT16_C( 30012), INT16_C( 23815), -INT16_C( 10160), INT16_C( 26850), -INT16_C( 19802), INT16_C( 27441), INT16_C( 7471), INT16_C( 16937), INT16_C( 24607), -INT16_C( 7475), INT16_C( 13880), -INT16_C( 325), INT16_C( 16808), -INT16_C( 8581), INT16_C( 18416), -INT16_C( 19264), -INT16_C( 14383), INT16_C( 8722), -INT16_C( 2913), INT16_C( 17802), -INT16_C( 17498), -INT16_C( 10832), -INT16_C( 9768), -INT16_C( 2280), -INT16_C( 6855), INT16_C( 29401) }, { -INT16_C( 27365), -INT16_C( 15248), -INT16_C( 5162), -INT16_C( 14686), INT16_C( 25138), INT16_C( 1146), -INT16_C( 29655), -INT16_C( 14298), -INT16_C( 20352), INT16_C( 9997), -INT16_C( 17045), INT16_C( 17404), INT16_C( 5271), -INT16_C( 12230), INT16_C( 5113), INT16_C( 5442), -INT16_C( 19800), INT16_C( 32473), INT16_C( 31645), -INT16_C( 12220), -INT16_C( 16418), INT16_C( 2004), -INT16_C( 1461), -INT16_C( 13104), -INT16_C( 8790), INT16_C( 5619), -INT16_C( 4197), INT16_C( 12888), -INT16_C( 28156), -INT16_C( 766), INT16_C( 17829), INT16_C( 19986) }, { INT16_MIN, -INT16_C( 25127), -INT16_C( 8066), INT16_C( 10923), INT16_C( 4456), INT16_C( 10571), -INT16_C( 13673), INT16_C( 32187), -INT16_C( 6248), INT16_MAX, -INT16_C( 31878), -INT16_C( 21751), INT16_C( 20971), INT16_C( 19709), -INT16_C( 2127), INT16_C( 9132), -INT16_C( 26631), INT16_C( 32148), INT16_MAX, -INT16_C( 8286), INT16_C( 1998), -INT16_C( 17260), -INT16_C( 15177), INT16_C( 20265), INT16_C( 5547), INT16_C( 23421), -INT16_C( 14695), INT16_C( 2056), -INT16_C( 12777), -INT16_C( 13872), INT16_C( 31941), INT16_MAX } }, { { -INT16_C( 5129), -INT16_C( 27188), INT16_C( 4455), INT16_C( 17765), INT16_C( 14800), INT16_C( 6988), INT16_C( 7219), -INT16_C( 8729), -INT16_C( 9478), -INT16_C( 27150), INT16_C( 19146), -INT16_C( 12601), -INT16_C( 13860), -INT16_C( 32309), -INT16_C( 8690), INT16_C( 1743), -INT16_C( 25399), INT16_C( 12443), INT16_C( 173), INT16_C( 32117), -INT16_C( 15815), INT16_C( 27800), -INT16_C( 32546), -INT16_C( 10167), INT16_C( 15194), INT16_C( 9325), INT16_C( 13445), INT16_C( 25074), -INT16_C( 16642), INT16_C( 3298), -INT16_C( 19812), INT16_C( 25874) }, UINT32_C(4220955982), { INT16_C( 2989), -INT16_C( 6536), INT16_C( 4301), -INT16_C( 21422), -INT16_C( 25712), -INT16_C( 5244), -INT16_C( 3370), INT16_C( 23311), INT16_C( 550), INT16_C( 9404), -INT16_C( 24640), INT16_C( 23601), INT16_C( 17233), -INT16_C( 24639), INT16_C( 22513), -INT16_C( 24934), INT16_C( 4707), INT16_C( 12421), -INT16_C( 10462), -INT16_C( 19492), INT16_C( 24947), INT16_C( 18846), -INT16_C( 21165), INT16_C( 31141), INT16_C( 25007), INT16_C( 28574), -INT16_C( 12544), INT16_C( 20939), -INT16_C( 29422), INT16_C( 1008), -INT16_C( 29980), INT16_C( 18338) }, { INT16_C( 10140), -INT16_C( 16520), INT16_C( 21758), INT16_C( 29042), INT16_C( 4277), INT16_C( 2235), INT16_C( 24765), INT16_C( 28034), INT16_C( 8385), -INT16_C( 15652), -INT16_C( 22289), INT16_C( 275), INT16_C( 1077), INT16_C( 6405), -INT16_C( 22642), INT16_C( 11105), -INT16_C( 9778), -INT16_C( 13078), INT16_C( 23597), -INT16_C( 7362), -INT16_C( 1684), INT16_C( 10731), INT16_C( 27993), INT16_C( 6806), INT16_C( 29581), INT16_C( 31964), -INT16_C( 4069), INT16_C( 20606), -INT16_C( 31756), -INT16_C( 32151), -INT16_C( 13782), -INT16_C( 1875) }, { -INT16_C( 5129), -INT16_C( 23056), INT16_C( 26059), INT16_C( 7620), INT16_C( 14800), INT16_C( 6988), INT16_C( 21395), -INT16_C( 8729), INT16_C( 8935), -INT16_C( 27150), INT16_MIN, INT16_C( 23876), -INT16_C( 13860), -INT16_C( 18234), -INT16_C( 8690), -INT16_C( 13829), -INT16_C( 25399), -INT16_C( 657), INT16_C( 13135), INT16_C( 32117), INT16_C( 23263), INT16_C( 27800), -INT16_C( 32546), INT16_MAX, INT16_MAX, INT16_MAX, INT16_C( 13445), INT16_MAX, INT16_MIN, -INT16_C( 31143), INT16_MIN, INT16_C( 16463) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi16(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_mask_adds_epi16(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_adds_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask32 k; const int16_t a[32]; const int16_t b[32]; const int16_t r[32]; } test_vec[] = { { UINT32_C(1382488990), { INT16_C( 13382), -INT16_C( 1131), -INT16_C( 8449), INT16_C( 8490), INT16_C( 27735), INT16_C( 28052), -INT16_C( 21201), INT16_C( 29768), INT16_C( 19795), -INT16_C( 30934), -INT16_C( 32642), INT16_C( 22005), INT16_C( 9346), INT16_C( 8543), -INT16_C( 14529), -INT16_C( 31373), INT16_C( 2299), -INT16_C( 1408), -INT16_C( 21530), INT16_C( 15643), -INT16_C( 20713), INT16_C( 18091), -INT16_C( 3236), -INT16_C( 20549), -INT16_C( 6847), -INT16_C( 16586), INT16_C( 11109), -INT16_C( 6124), INT16_C( 29776), -INT16_C( 28919), INT16_C( 31803), INT16_C( 13845) }, { -INT16_C( 27260), INT16_C( 27184), INT16_C( 19264), INT16_C( 22695), INT16_C( 21242), INT16_C( 22174), INT16_C( 22854), -INT16_C( 30971), INT16_C( 15423), -INT16_C( 23482), INT16_C( 23399), -INT16_C( 18548), -INT16_C( 27185), INT16_C( 2631), INT16_C( 23569), -INT16_C( 27328), INT16_C( 28913), INT16_C( 13055), -INT16_C( 22597), -INT16_C( 19062), INT16_C( 10489), INT16_C( 16139), INT16_C( 4226), -INT16_C( 15930), INT16_C( 3404), -INT16_C( 19355), -INT16_C( 3480), INT16_C( 14187), -INT16_C( 19833), -INT16_C( 26303), -INT16_C( 32498), INT16_C( 46) }, { INT16_C( 0), INT16_C( 26053), INT16_C( 10815), INT16_C( 31185), INT16_MAX, INT16_C( 0), INT16_C( 0), -INT16_C( 1203), INT16_MAX, INT16_MIN, INT16_C( 0), INT16_C( 3457), -INT16_C( 17839), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 31212), INT16_C( 11647), INT16_MIN, INT16_C( 0), INT16_C( 0), INT16_MAX, INT16_C( 990), INT16_C( 0), INT16_C( 0), INT16_MIN, INT16_C( 0), INT16_C( 0), INT16_C( 9943), INT16_C( 0), -INT16_C( 695), INT16_C( 0) } }, { UINT32_C(2888969969), { -INT16_C( 17195), -INT16_C( 12703), INT16_C( 27876), INT16_C( 26126), -INT16_C( 11140), -INT16_C( 14041), -INT16_C( 29215), INT16_C( 18813), -INT16_C( 6017), INT16_C( 1664), -INT16_C( 15973), -INT16_C( 22113), -INT16_C( 12734), INT16_C( 13225), -INT16_C( 9220), -INT16_C( 11809), INT16_C( 16535), INT16_C( 31903), -INT16_C( 21076), INT16_C( 10722), INT16_C( 2690), INT16_C( 25586), INT16_C( 28567), INT16_C( 5805), INT16_C( 11607), -INT16_C( 3556), -INT16_C( 17169), INT16_C( 12700), INT16_C( 17802), -INT16_C( 31131), INT16_C( 17441), -INT16_C( 18345) }, { -INT16_C( 2427), INT16_C( 12596), INT16_C( 6052), INT16_C( 9818), INT16_C( 19489), -INT16_C( 18295), INT16_C( 14011), INT16_C( 5070), -INT16_C( 5532), INT16_C( 21253), -INT16_C( 24154), INT16_C( 12420), -INT16_C( 5657), INT16_C( 2230), INT16_C( 3374), -INT16_C( 19520), -INT16_C( 2812), -INT16_C( 22300), INT16_C( 16140), INT16_C( 11726), INT16_C( 22411), INT16_C( 18405), -INT16_C( 19570), -INT16_C( 3494), INT16_C( 24477), INT16_C( 17477), -INT16_C( 14079), -INT16_C( 6028), INT16_C( 11187), -INT16_C( 7696), -INT16_C( 20424), INT16_C( 15508) }, { -INT16_C( 19622), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 8349), -INT16_C( 32336), -INT16_C( 15204), INT16_C( 23883), INT16_C( 0), INT16_C( 22917), INT16_MIN, -INT16_C( 9693), INT16_C( 0), INT16_C( 15455), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 9603), INT16_C( 0), INT16_C( 0), INT16_C( 25101), INT16_MAX, INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 31248), INT16_C( 6672), INT16_C( 0), INT16_MIN, INT16_C( 0), -INT16_C( 2837) } }, { UINT32_C(2984540325), { -INT16_C( 19785), INT16_C( 17374), -INT16_C( 15606), -INT16_C( 26486), -INT16_C( 7050), INT16_C( 5258), -INT16_C( 12477), INT16_C( 17496), -INT16_C( 13160), INT16_C( 19244), INT16_C( 7415), INT16_C( 12332), -INT16_C( 16179), INT16_C( 29292), INT16_C( 20793), -INT16_C( 4060), INT16_C( 515), INT16_C( 3379), -INT16_C( 16954), INT16_C( 15525), INT16_C( 12193), -INT16_C( 6832), -INT16_C( 22274), -INT16_C( 26839), INT16_C( 22133), INT16_C( 27874), INT16_C( 3954), INT16_C( 16284), INT16_C( 2511), INT16_C( 2226), -INT16_C( 10662), INT16_C( 24057) }, { INT16_C( 11480), -INT16_C( 24981), INT16_C( 4330), -INT16_C( 29733), INT16_C( 11072), INT16_C( 15984), -INT16_C( 25900), INT16_C( 18901), -INT16_C( 18192), INT16_C( 25269), INT16_C( 21191), -INT16_C( 26974), INT16_C( 21595), -INT16_C( 19041), -INT16_C( 26582), INT16_C( 530), INT16_C( 32196), -INT16_C( 20831), INT16_C( 31886), -INT16_C( 12742), -INT16_C( 21849), INT16_C( 31500), -INT16_C( 7612), INT16_C( 13508), INT16_C( 31386), INT16_C( 24983), INT16_C( 14796), INT16_C( 10231), -INT16_C( 26995), -INT16_C( 18468), -INT16_C( 4562), -INT16_C( 3143) }, { -INT16_C( 8305), INT16_C( 0), -INT16_C( 11276), INT16_C( 0), INT16_C( 0), INT16_C( 21242), INT16_C( 0), INT16_MAX, INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 14642), INT16_C( 5416), INT16_C( 10251), -INT16_C( 5789), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 14932), INT16_C( 0), INT16_C( 0), INT16_C( 24668), -INT16_C( 29886), -INT16_C( 13331), INT16_MAX, INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 24484), -INT16_C( 16242), INT16_C( 0), INT16_C( 20914) } }, { UINT32_C(4204878444), { -INT16_C( 9258), INT16_C( 32456), -INT16_C( 11130), -INT16_C( 13575), -INT16_C( 16714), INT16_C( 20735), -INT16_C( 27080), INT16_C( 1201), -INT16_C( 22065), INT16_C( 23595), INT16_C( 1855), INT16_C( 28179), -INT16_C( 13067), INT16_C( 24929), INT16_C( 551), -INT16_C( 677), INT16_C( 9182), INT16_C( 25723), INT16_C( 30200), -INT16_C( 20946), INT16_C( 11571), INT16_C( 27647), -INT16_C( 20285), -INT16_C( 28049), -INT16_C( 26023), -INT16_C( 26130), INT16_C( 417), -INT16_C( 27129), INT16_C( 26830), -INT16_C( 2568), INT16_C( 21354), INT16_C( 18674) }, { INT16_C( 28279), INT16_C( 28588), -INT16_C( 9245), INT16_C( 5661), INT16_C( 7176), -INT16_C( 13183), -INT16_C( 3891), INT16_C( 9822), INT16_C( 19850), INT16_C( 11199), -INT16_C( 14770), INT16_C( 7361), -INT16_C( 18130), -INT16_C( 26351), INT16_C( 1037), -INT16_C( 31519), -INT16_C( 29070), INT16_C( 22003), INT16_C( 4201), INT16_C( 29035), -INT16_C( 5075), -INT16_C( 1475), -INT16_C( 25380), INT16_C( 26144), -INT16_C( 7959), INT16_C( 14225), INT16_C( 21158), -INT16_C( 10924), INT16_C( 25868), INT16_C( 6510), INT16_C( 20329), -INT16_C( 9315) }, { INT16_C( 0), INT16_C( 0), -INT16_C( 20375), -INT16_C( 7914), INT16_C( 0), INT16_C( 7552), -INT16_C( 30971), INT16_C( 0), INT16_C( 0), INT16_MAX, INT16_C( 0), INT16_MAX, -INT16_C( 31197), INT16_C( 0), INT16_C( 1588), INT16_C( 0), -INT16_C( 19888), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 26172), INT16_C( 0), -INT16_C( 1905), INT16_C( 0), -INT16_C( 11905), INT16_C( 0), INT16_MIN, INT16_MAX, INT16_C( 3942), INT16_MAX, INT16_C( 9359) } }, { UINT32_C(1177587933), { -INT16_C( 25696), -INT16_C( 12872), -INT16_C( 2681), INT16_C( 25543), -INT16_C( 5999), INT16_C( 31433), INT16_C( 23240), INT16_C( 28338), INT16_C( 1709), -INT16_C( 18109), -INT16_C( 20117), -INT16_C( 10798), INT16_C( 28417), -INT16_C( 8528), -INT16_C( 7681), -INT16_C( 24795), -INT16_C( 8836), INT16_C( 1133), INT16_C( 13522), INT16_C( 25703), INT16_C( 12572), -INT16_C( 6946), -INT16_C( 28533), INT16_C( 14419), -INT16_C( 26986), INT16_C( 753), -INT16_C( 15544), INT16_C( 18903), -INT16_C( 30926), INT16_C( 12583), INT16_C( 19560), -INT16_C( 6703) }, { INT16_C( 15913), -INT16_C( 791), INT16_C( 20594), -INT16_C( 28832), INT16_C( 16001), INT16_C( 3443), -INT16_C( 14641), INT16_C( 25925), INT16_C( 14173), -INT16_C( 23193), INT16_C( 16122), INT16_C( 11758), INT16_C( 5574), INT16_C( 11870), INT16_C( 12130), -INT16_C( 29933), -INT16_C( 915), -INT16_C( 8057), -INT16_C( 6323), -INT16_C( 12689), -INT16_C( 7642), -INT16_C( 2597), INT16_C( 8617), INT16_C( 1626), -INT16_C( 15784), INT16_C( 21163), -INT16_C( 26368), -INT16_C( 14721), -INT16_C( 8530), INT16_C( 4341), INT16_C( 2061), INT16_C( 31644) }, { -INT16_C( 9783), INT16_C( 0), INT16_C( 17913), -INT16_C( 3289), INT16_C( 10002), INT16_C( 0), INT16_C( 8599), INT16_MAX, INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_MAX, INT16_C( 0), INT16_C( 0), INT16_MIN, INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 4930), -INT16_C( 9543), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 21916), INT16_MIN, INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 21621), INT16_C( 0) } }, { UINT32_C(1381704453), { -INT16_C( 13813), INT16_C( 12576), -INT16_C( 852), INT16_C( 21798), -INT16_C( 32739), INT16_C( 30043), INT16_C( 1602), INT16_C( 17351), INT16_C( 18335), INT16_C( 19977), -INT16_C( 475), INT16_C( 12894), -INT16_C( 1529), INT16_C( 3245), INT16_C( 2078), INT16_C( 10590), INT16_C( 32466), INT16_C( 32602), -INT16_C( 32646), -INT16_C( 26668), INT16_C( 12288), INT16_C( 17164), -INT16_C( 11210), -INT16_C( 10618), -INT16_C( 28901), INT16_C( 16420), -INT16_C( 32114), -INT16_C( 27278), INT16_C( 8317), -INT16_C( 25695), -INT16_C( 216), -INT16_C( 1084) }, { INT16_C( 7805), -INT16_C( 1926), INT16_C( 20126), -INT16_C( 24945), -INT16_C( 25474), -INT16_C( 18975), INT16_C( 26480), -INT16_C( 29813), -INT16_C( 20489), -INT16_C( 31285), INT16_C( 15665), -INT16_C( 20966), -INT16_C( 17571), -INT16_C( 31159), INT16_C( 3514), INT16_C( 14209), -INT16_C( 1237), -INT16_C( 14033), -INT16_C( 16567), -INT16_C( 14232), INT16_C( 18779), -INT16_C( 13443), INT16_C( 2225), -INT16_C( 22442), INT16_C( 8631), -INT16_C( 6099), INT16_C( 18270), -INT16_C( 17257), -INT16_C( 8190), -INT16_C( 17342), -INT16_C( 15378), INT16_C( 6643) }, { -INT16_C( 6008), INT16_C( 0), INT16_C( 19274), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 2154), -INT16_C( 11308), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 27914), INT16_C( 0), INT16_C( 0), INT16_C( 31229), INT16_C( 18569), INT16_C( 0), INT16_MIN, INT16_C( 31067), INT16_C( 0), -INT16_C( 8985), INT16_C( 0), INT16_C( 0), INT16_C( 10321), INT16_C( 0), INT16_C( 0), INT16_C( 127), INT16_C( 0), -INT16_C( 15594), INT16_C( 0) } }, { UINT32_C( 132326334), { INT16_C( 19426), INT16_C( 15823), INT16_C( 19604), INT16_C( 17672), INT16_C( 24148), INT16_C( 3053), INT16_C( 6783), -INT16_C( 8716), -INT16_C( 29855), INT16_C( 25497), -INT16_C( 9365), INT16_C( 22815), INT16_C( 5022), INT16_C( 23667), INT16_C( 22070), INT16_C( 6244), INT16_C( 13217), INT16_C( 13653), INT16_C( 23936), -INT16_C( 11141), INT16_C( 26811), INT16_C( 15072), -INT16_C( 11133), -INT16_C( 7145), -INT16_C( 20129), -INT16_C( 13752), INT16_C( 26508), INT16_C( 11044), -INT16_C( 26758), -INT16_C( 20345), -INT16_C( 5139), -INT16_C( 28984) }, { INT16_C( 7455), -INT16_C( 24637), INT16_C( 15994), INT16_C( 13683), INT16_C( 21415), INT16_C( 10863), -INT16_C( 30937), -INT16_C( 31218), INT16_C( 22072), -INT16_C( 15279), INT16_C( 30142), INT16_C( 14575), INT16_C( 30476), -INT16_C( 1559), -INT16_C( 20126), -INT16_C( 32377), INT16_C( 19151), INT16_C( 18720), -INT16_C( 27511), INT16_C( 12415), -INT16_C( 4377), INT16_C( 3930), INT16_C( 26741), -INT16_C( 21099), -INT16_C( 6465), INT16_C( 32114), INT16_C( 24923), INT16_C( 26549), -INT16_C( 24872), INT16_C( 15200), -INT16_C( 6320), INT16_C( 8124) }, { INT16_C( 0), -INT16_C( 8814), INT16_MAX, INT16_C( 31355), INT16_MAX, INT16_C( 13916), INT16_C( 0), INT16_MIN, -INT16_C( 7783), INT16_C( 10218), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 22108), INT16_C( 0), INT16_C( 0), INT16_C( 32368), INT16_C( 32373), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 19002), INT16_C( 15608), -INT16_C( 28244), -INT16_C( 26594), INT16_C( 18362), INT16_MAX, INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { UINT32_C(3144211762), { -INT16_C( 6287), INT16_C( 22763), INT16_C( 17878), INT16_C( 19303), -INT16_C( 595), INT16_C( 27897), INT16_C( 27619), INT16_C( 16361), -INT16_C( 24628), -INT16_C( 23130), INT16_C( 1853), -INT16_C( 29216), -INT16_C( 25362), INT16_C( 8364), INT16_C( 5497), -INT16_C( 5413), -INT16_C( 14596), -INT16_C( 11709), -INT16_C( 22005), -INT16_C( 18146), INT16_C( 6055), -INT16_C( 29915), INT16_C( 3970), INT16_C( 20170), INT16_C( 28846), -INT16_C( 5133), -INT16_C( 11401), INT16_C( 26233), INT16_C( 9584), -INT16_C( 5754), INT16_C( 25146), INT16_C( 14292) }, { INT16_C( 5928), INT16_C( 13321), INT16_C( 10177), INT16_C( 27117), INT16_C( 4670), -INT16_C( 16140), -INT16_C( 16863), -INT16_C( 12529), INT16_C( 558), -INT16_C( 22853), INT16_C( 13526), INT16_C( 17932), -INT16_C( 28071), -INT16_C( 27601), INT16_C( 1012), INT16_C( 7627), -INT16_C( 11238), -INT16_C( 9135), INT16_C( 16124), INT16_C( 14917), INT16_C( 14672), INT16_C( 29435), INT16_C( 2807), INT16_C( 9537), -INT16_C( 1012), -INT16_C( 7477), -INT16_C( 10448), -INT16_C( 30168), INT16_C( 22634), INT16_C( 24094), -INT16_C( 5797), INT16_C( 30331) }, { INT16_C( 0), INT16_MAX, INT16_C( 0), INT16_C( 0), INT16_C( 4075), INT16_C( 11757), INT16_C( 0), INT16_C( 0), -INT16_C( 24070), INT16_C( 0), INT16_C( 15379), -INT16_C( 11284), INT16_MIN, INT16_C( 0), INT16_C( 6509), INT16_C( 2214), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 3229), INT16_C( 0), -INT16_C( 480), INT16_C( 6777), INT16_C( 0), INT16_C( 27834), -INT16_C( 12610), INT16_C( 0), -INT16_C( 3935), INT16_C( 32218), INT16_C( 18340), INT16_C( 0), INT16_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_maskz_adds_epi16(test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_adds_epu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_x_mm512_set_epu8(UINT8_C( 52), UINT8_C( 29), UINT8_C( 31), UINT8_C(206), UINT8_C( 40), UINT8_C(160), UINT8_C(244), UINT8_C( 85), UINT8_C( 47), UINT8_C(153), UINT8_C(218), UINT8_C(226), UINT8_C( 99), UINT8_C(129), UINT8_C( 68), UINT8_C(218), UINT8_C(122), UINT8_C( 56), UINT8_C(240), UINT8_C( 10), UINT8_C(228), UINT8_C( 40), UINT8_C( 70), UINT8_C( 14), UINT8_C( 67), UINT8_C( 37), UINT8_C( 87), UINT8_C(131), UINT8_C( 29), UINT8_C(175), UINT8_C(167), UINT8_C(223), UINT8_C(170), UINT8_C( 76), UINT8_C( 39), UINT8_C(254), UINT8_C(222), UINT8_C(105), UINT8_C(193), UINT8_C(144), UINT8_C(182), UINT8_C(106), UINT8_C(199), UINT8_C(161), UINT8_C(173), UINT8_C( 65), UINT8_C( 54), UINT8_C(148), UINT8_C(222), UINT8_C( 9), UINT8_C(237), UINT8_C( 76), UINT8_C(149), UINT8_C(196), UINT8_C(194), UINT8_C(114), UINT8_C(167), UINT8_C( 47), UINT8_C(174), UINT8_C(135), UINT8_C(170), UINT8_C( 1), UINT8_C( 40), UINT8_C( 68)), simde_x_mm512_set_epu8(UINT8_C(183), UINT8_C(129), UINT8_C(214), UINT8_C( 34), UINT8_C(187), UINT8_C(142), UINT8_C(238), UINT8_C(248), UINT8_C(175), UINT8_C(127), UINT8_C(231), UINT8_C(164), UINT8_C( 16), UINT8_C(128), UINT8_C( 32), UINT8_C( 28), UINT8_C(111), UINT8_C( 40), UINT8_C( 8), UINT8_C(160), UINT8_C( 37), UINT8_C(193), UINT8_C(195), UINT8_C(135), UINT8_C(102), UINT8_C( 87), UINT8_C(146), UINT8_C(206), UINT8_C( 94), UINT8_C(109), UINT8_C(112), UINT8_C( 83), UINT8_C( 63), UINT8_C(215), UINT8_C( 46), UINT8_C( 55), UINT8_C( 78), UINT8_C(211), UINT8_C(175), UINT8_C(210), UINT8_C( 60), UINT8_C(205), UINT8_C(118), UINT8_C(142), UINT8_C(109), UINT8_C( 48), UINT8_C( 54), UINT8_C( 54), UINT8_C( 99), UINT8_C(248), UINT8_C(120), UINT8_C(165), UINT8_C(189), UINT8_C(132), UINT8_C(130), UINT8_C( 16), UINT8_C( 6), UINT8_C(104), UINT8_C(176), UINT8_C(111), UINT8_C(254), UINT8_C(193), UINT8_C(106), UINT8_C( 5)), simde_x_mm512_set_epu8(UINT8_C(235), UINT8_C(158), UINT8_C(245), UINT8_C(240), UINT8_C(227), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(222), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(115), UINT8_C(255), UINT8_C(100), UINT8_C(246), UINT8_C(233), UINT8_C( 96), UINT8_C(248), UINT8_C(170), UINT8_C(255), UINT8_C(233), UINT8_C(255), UINT8_C(149), UINT8_C(169), UINT8_C(124), UINT8_C(233), UINT8_C(255), UINT8_C(123), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(233), UINT8_C(255), UINT8_C( 85), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(242), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(113), UINT8_C(108), UINT8_C(202), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(241), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(130), UINT8_C(173), UINT8_C(151), UINT8_C(255), UINT8_C(246), UINT8_C(255), UINT8_C(194), UINT8_C(146), UINT8_C( 73)) }, { simde_x_mm512_set_epu8(UINT8_C( 38), UINT8_C(254), UINT8_C(204), UINT8_C( 72), UINT8_C(177), UINT8_C( 63), UINT8_C( 67), UINT8_C( 50), UINT8_C( 71), UINT8_C(226), UINT8_C(146), UINT8_C( 4), UINT8_C(134), UINT8_C( 91), UINT8_C(195), UINT8_C(148), UINT8_C( 28), UINT8_C(158), UINT8_C(223), UINT8_C(181), UINT8_C(218), UINT8_C(114), UINT8_C(200), UINT8_C(243), UINT8_C(255), UINT8_C( 48), UINT8_C(233), UINT8_C( 82), UINT8_C( 22), UINT8_C( 26), UINT8_C(128), UINT8_C(166), UINT8_C( 79), UINT8_C(206), UINT8_C(109), UINT8_C(113), UINT8_C( 86), UINT8_C(112), UINT8_C(215), UINT8_C(187), UINT8_C(174), UINT8_C(250), UINT8_C(225), UINT8_C(206), UINT8_C(242), UINT8_C(141), UINT8_C(197), UINT8_C(179), UINT8_C( 88), UINT8_C(237), UINT8_C(191), UINT8_C(149), UINT8_C(130), UINT8_C( 63), UINT8_C( 54), UINT8_C(129), UINT8_C(126), UINT8_C( 18), UINT8_C( 43), UINT8_C(251), UINT8_C(214), UINT8_C(155), UINT8_C( 61), UINT8_C(138)), simde_x_mm512_set_epu8(UINT8_C(142), UINT8_C( 33), UINT8_C( 51), UINT8_C(244), UINT8_C( 33), UINT8_C(106), UINT8_C( 75), UINT8_C(210), UINT8_C(121), UINT8_C( 52), UINT8_C(233), UINT8_C(109), UINT8_C(205), UINT8_C(175), UINT8_C(230), UINT8_C( 70), UINT8_C( 28), UINT8_C(216), UINT8_C( 76), UINT8_C(137), UINT8_C(129), UINT8_C( 5), UINT8_C(149), UINT8_C(132), UINT8_C(159), UINT8_C( 55), UINT8_C( 47), UINT8_C(212), UINT8_C(202), UINT8_C( 26), UINT8_C(247), UINT8_C( 19), UINT8_C(109), UINT8_C( 59), UINT8_C( 49), UINT8_C(175), UINT8_C( 15), UINT8_C(240), UINT8_C(155), UINT8_C( 20), UINT8_C( 85), UINT8_C( 57), UINT8_C( 91), UINT8_C(177), UINT8_C(179), UINT8_C(200), UINT8_C( 26), UINT8_C( 99), UINT8_C( 9), UINT8_C(177), UINT8_C(151), UINT8_C(246), UINT8_C(116), UINT8_C(162), UINT8_C(144), UINT8_C( 44), UINT8_C(240), UINT8_C(226), UINT8_C(215), UINT8_C(206), UINT8_C(114), UINT8_C(250), UINT8_C(170), UINT8_C(220)), simde_x_mm512_set_epu8(UINT8_C(180), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(210), UINT8_C(169), UINT8_C(142), UINT8_C(255), UINT8_C(192), UINT8_C(255), UINT8_C(255), UINT8_C(113), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(218), UINT8_C( 56), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(119), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(103), UINT8_C(255), UINT8_C(255), UINT8_C(224), UINT8_C( 52), UINT8_C(255), UINT8_C(185), UINT8_C(188), UINT8_C(255), UINT8_C(158), UINT8_C(255), UINT8_C(101), UINT8_C(255), UINT8_C(255), UINT8_C(207), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(223), UINT8_C(255), UINT8_C( 97), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(246), UINT8_C(225), UINT8_C(198), UINT8_C(173), UINT8_C(255), UINT8_C(244), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(231), UINT8_C(255)) }, { simde_x_mm512_set_epu8(UINT8_C(103), UINT8_C(253), UINT8_C( 31), UINT8_C(185), UINT8_C(101), UINT8_C(201), UINT8_C( 20), UINT8_C( 77), UINT8_C(110), UINT8_C(116), UINT8_C(226), UINT8_C( 37), UINT8_C(155), UINT8_C( 44), UINT8_C(198), UINT8_C(182), UINT8_C(132), UINT8_C( 61), UINT8_C(104), UINT8_C(181), UINT8_C( 77), UINT8_C(161), UINT8_C(154), UINT8_C(195), UINT8_C(148), UINT8_C(239), UINT8_C(178), UINT8_C(165), UINT8_C( 6), UINT8_C(251), UINT8_C( 97), UINT8_C(247), UINT8_C( 8), UINT8_C( 52), UINT8_C( 96), UINT8_C( 34), UINT8_C(233), UINT8_C( 1), UINT8_C( 7), UINT8_C(254), UINT8_C( 3), UINT8_C( 8), UINT8_C(168), UINT8_C(221), UINT8_C(238), UINT8_C(144), UINT8_C( 18), UINT8_C(245), UINT8_C(216), UINT8_C(239), UINT8_C(128), UINT8_C(174), UINT8_C(108), UINT8_C(236), UINT8_C(204), UINT8_C(151), UINT8_C( 83), UINT8_C(224), UINT8_C(253), UINT8_C( 94), UINT8_C( 40), UINT8_C( 10), UINT8_C(195), UINT8_C( 72)), simde_x_mm512_set_epu8(UINT8_C(196), UINT8_C(220), UINT8_C(209), UINT8_C(146), UINT8_C( 66), UINT8_C(240), UINT8_C(160), UINT8_C( 58), UINT8_C(206), UINT8_C(157), UINT8_C(225), UINT8_C(238), UINT8_C(235), UINT8_C( 53), UINT8_C( 91), UINT8_C( 97), UINT8_C(175), UINT8_C( 22), UINT8_C( 33), UINT8_C(101), UINT8_C(189), UINT8_C(131), UINT8_C(219), UINT8_C( 30), UINT8_C( 36), UINT8_C( 11), UINT8_C(180), UINT8_C( 46), UINT8_C(126), UINT8_C(239), UINT8_C(135), UINT8_C( 44), UINT8_C(198), UINT8_C(149), UINT8_C(235), UINT8_C( 72), UINT8_C( 18), UINT8_C(214), UINT8_C( 92), UINT8_C(199), UINT8_C( 79), UINT8_C( 28), UINT8_C(156), UINT8_C( 37), UINT8_C(212), UINT8_C(167), UINT8_C( 62), UINT8_C(201), UINT8_C(139), UINT8_C(215), UINT8_C( 68), UINT8_C(117), UINT8_C( 83), UINT8_C(105), UINT8_C(232), UINT8_C(251), UINT8_C( 6), UINT8_C(165), UINT8_C(199), UINT8_C(183), UINT8_C(254), UINT8_C( 55), UINT8_C(167), UINT8_C(238)), simde_x_mm512_set_epu8(UINT8_C(255), UINT8_C(255), UINT8_C(240), UINT8_C(255), UINT8_C(167), UINT8_C(255), UINT8_C(180), UINT8_C(135), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C( 97), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C( 83), UINT8_C(137), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(225), UINT8_C(184), UINT8_C(250), UINT8_C(255), UINT8_C(211), UINT8_C(132), UINT8_C(255), UINT8_C(232), UINT8_C(255), UINT8_C(206), UINT8_C(201), UINT8_C(255), UINT8_C(106), UINT8_C(251), UINT8_C(215), UINT8_C( 99), UINT8_C(255), UINT8_C( 82), UINT8_C( 36), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C( 80), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(196), UINT8_C(255), UINT8_C(191), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C( 89), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C( 65), UINT8_C(255), UINT8_C(255)) }, { simde_x_mm512_set_epu8(UINT8_C( 7), UINT8_C( 99), UINT8_C(210), UINT8_C(110), UINT8_C(202), UINT8_C( 10), UINT8_C(247), UINT8_C( 68), UINT8_C(177), UINT8_C(186), UINT8_C(247), UINT8_C( 36), UINT8_C(212), UINT8_C(146), UINT8_C(251), UINT8_C( 66), UINT8_C( 0), UINT8_C( 8), UINT8_C(116), UINT8_C(102), UINT8_C(175), UINT8_C(223), UINT8_C( 15), UINT8_C(232), UINT8_C(127), UINT8_C( 63), UINT8_C( 84), UINT8_C(165), UINT8_C( 18), UINT8_C(250), UINT8_C( 71), UINT8_C( 18), UINT8_C(132), UINT8_C(155), UINT8_C(102), UINT8_C( 18), UINT8_C(177), UINT8_C(146), UINT8_C(169), UINT8_C(139), UINT8_C( 21), UINT8_C( 79), UINT8_C( 4), UINT8_C(129), UINT8_C(168), UINT8_C( 42), UINT8_C( 9), UINT8_C(184), UINT8_C( 46), UINT8_C( 53), UINT8_C(243), UINT8_C(140), UINT8_C(158), UINT8_C(112), UINT8_C( 35), UINT8_C( 49), UINT8_C(253), UINT8_C(233), UINT8_C( 33), UINT8_C(178), UINT8_C(117), UINT8_C(161), UINT8_C(168), UINT8_C( 39)), simde_x_mm512_set_epu8(UINT8_C( 75), UINT8_C(218), UINT8_C(162), UINT8_C( 30), UINT8_C(119), UINT8_C(231), UINT8_C(193), UINT8_C(230), UINT8_C(151), UINT8_C( 49), UINT8_C(234), UINT8_C( 78), UINT8_C( 76), UINT8_C( 37), UINT8_C(174), UINT8_C(105), UINT8_C(224), UINT8_C(210), UINT8_C(188), UINT8_C(142), UINT8_C(116), UINT8_C(129), UINT8_C( 93), UINT8_C(124), UINT8_C(248), UINT8_C(145), UINT8_C(230), UINT8_C( 41), UINT8_C( 66), UINT8_C(115), UINT8_C(180), UINT8_C( 47), UINT8_C( 55), UINT8_C(104), UINT8_C(123), UINT8_C(150), UINT8_C(165), UINT8_C( 22), UINT8_C( 0), UINT8_C(101), UINT8_C( 55), UINT8_C( 23), UINT8_C( 90), UINT8_C( 64), UINT8_C( 52), UINT8_C(254), UINT8_C(116), UINT8_C(243), UINT8_C( 4), UINT8_C( 84), UINT8_C( 82), UINT8_C(103), UINT8_C( 43), UINT8_C(237), UINT8_C( 7), UINT8_C( 76), UINT8_C( 88), UINT8_C(212), UINT8_C( 63), UINT8_C(146), UINT8_C(224), UINT8_C(223), UINT8_C(207), UINT8_C(220)), simde_x_mm512_set_epu8(UINT8_C( 82), UINT8_C(255), UINT8_C(255), UINT8_C(140), UINT8_C(255), UINT8_C(241), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(235), UINT8_C(255), UINT8_C(114), UINT8_C(255), UINT8_C(183), UINT8_C(255), UINT8_C(171), UINT8_C(224), UINT8_C(218), UINT8_C(255), UINT8_C(244), UINT8_C(255), UINT8_C(255), UINT8_C(108), UINT8_C(255), UINT8_C(255), UINT8_C(208), UINT8_C(255), UINT8_C(206), UINT8_C( 84), UINT8_C(255), UINT8_C(251), UINT8_C( 65), UINT8_C(187), UINT8_C(255), UINT8_C(225), UINT8_C(168), UINT8_C(255), UINT8_C(168), UINT8_C(169), UINT8_C(240), UINT8_C( 76), UINT8_C(102), UINT8_C( 94), UINT8_C(193), UINT8_C(220), UINT8_C(255), UINT8_C(125), UINT8_C(255), UINT8_C( 50), UINT8_C(137), UINT8_C(255), UINT8_C(243), UINT8_C(201), UINT8_C(255), UINT8_C( 42), UINT8_C(125), UINT8_C(255), UINT8_C(255), UINT8_C( 96), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255)) }, { simde_x_mm512_set_epu8(UINT8_C(233), UINT8_C(112), UINT8_C( 62), UINT8_C( 65), UINT8_C( 37), UINT8_C(247), UINT8_C(105), UINT8_C( 54), UINT8_C(214), UINT8_C(255), UINT8_C(145), UINT8_C( 18), UINT8_C(160), UINT8_C( 92), UINT8_C( 29), UINT8_C(253), UINT8_C(230), UINT8_C( 83), UINT8_C(176), UINT8_C( 43), UINT8_C( 78), UINT8_C( 31), UINT8_C(113), UINT8_C(239), UINT8_C( 38), UINT8_C(192), UINT8_C(127), UINT8_C( 44), UINT8_C(218), UINT8_C(134), UINT8_C(224), UINT8_C( 35), UINT8_C(155), UINT8_C(171), UINT8_C( 44), UINT8_C( 53), UINT8_C(101), UINT8_C(234), UINT8_C(121), UINT8_C( 95), UINT8_C(232), UINT8_C(104), UINT8_C( 20), UINT8_C( 52), UINT8_C( 59), UINT8_C( 62), UINT8_C( 55), UINT8_C( 43), UINT8_C(246), UINT8_C( 36), UINT8_C( 25), UINT8_C( 98), UINT8_C(155), UINT8_C(231), UINT8_C( 43), UINT8_C(203), UINT8_C( 77), UINT8_C( 43), UINT8_C( 80), UINT8_C( 85), UINT8_C(227), UINT8_C( 34), UINT8_C( 77), UINT8_C(179)), simde_x_mm512_set_epu8(UINT8_C( 68), UINT8_C(183), UINT8_C(190), UINT8_C(248), UINT8_C( 54), UINT8_C(194), UINT8_C( 0), UINT8_C(190), UINT8_C(102), UINT8_C(185), UINT8_C( 5), UINT8_C(238), UINT8_C(188), UINT8_C(104), UINT8_C(139), UINT8_C(163), UINT8_C(242), UINT8_C( 94), UINT8_C(181), UINT8_C(214), UINT8_C(170), UINT8_C(182), UINT8_C(139), UINT8_C( 96), UINT8_C( 80), UINT8_C( 38), UINT8_C(151), UINT8_C(132), UINT8_C(243), UINT8_C( 37), UINT8_C(120), UINT8_C( 30), UINT8_C(115), UINT8_C( 34), UINT8_C(222), UINT8_C(121), UINT8_C(112), UINT8_C(205), UINT8_C(161), UINT8_C( 52), UINT8_C(142), UINT8_C(118), UINT8_C(195), UINT8_C(200), UINT8_C( 59), UINT8_C( 6), UINT8_C(115), UINT8_C(109), UINT8_C( 93), UINT8_C(174), UINT8_C(200), UINT8_C(190), UINT8_C(151), UINT8_C(126), UINT8_C(210), UINT8_C(200), UINT8_C( 79), UINT8_C( 43), UINT8_C( 20), UINT8_C(239), UINT8_C(149), UINT8_C(119), UINT8_C( 96), UINT8_C(120)), simde_x_mm512_set_epu8(UINT8_C(255), UINT8_C(255), UINT8_C(252), UINT8_C(255), UINT8_C( 91), UINT8_C(255), UINT8_C(105), UINT8_C(244), UINT8_C(255), UINT8_C(255), UINT8_C(150), UINT8_C(255), UINT8_C(255), UINT8_C(196), UINT8_C(168), UINT8_C(255), UINT8_C(255), UINT8_C(177), UINT8_C(255), UINT8_C(255), UINT8_C(248), UINT8_C(213), UINT8_C(252), UINT8_C(255), UINT8_C(118), UINT8_C(230), UINT8_C(255), UINT8_C(176), UINT8_C(255), UINT8_C(171), UINT8_C(255), UINT8_C( 65), UINT8_C(255), UINT8_C(205), UINT8_C(255), UINT8_C(174), UINT8_C(213), UINT8_C(255), UINT8_C(255), UINT8_C(147), UINT8_C(255), UINT8_C(222), UINT8_C(215), UINT8_C(252), UINT8_C(118), UINT8_C( 68), UINT8_C(170), UINT8_C(152), UINT8_C(255), UINT8_C(210), UINT8_C(225), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(253), UINT8_C(255), UINT8_C(156), UINT8_C( 86), UINT8_C(100), UINT8_C(255), UINT8_C(255), UINT8_C(153), UINT8_C(173), UINT8_C(255)) }, { simde_x_mm512_set_epu8(UINT8_C(150), UINT8_C(150), UINT8_C(146), UINT8_C(157), UINT8_C(103), UINT8_C( 31), UINT8_C(236), UINT8_C(192), UINT8_C(155), UINT8_C( 46), UINT8_C(170), UINT8_C(176), UINT8_C(189), UINT8_C( 74), UINT8_C(246), UINT8_C( 0), UINT8_C( 85), UINT8_C( 30), UINT8_C(190), UINT8_C( 12), UINT8_C(117), UINT8_C( 74), UINT8_C(194), UINT8_C( 97), UINT8_C(132), UINT8_C( 63), UINT8_C(227), UINT8_C( 21), UINT8_C(244), UINT8_C(229), UINT8_C(176), UINT8_C(122), UINT8_C(159), UINT8_C( 35), UINT8_C(115), UINT8_C(240), UINT8_C( 88), UINT8_C(151), UINT8_C(101), UINT8_C(194), UINT8_C( 49), UINT8_C(128), UINT8_C( 87), UINT8_C(152), UINT8_C(173), UINT8_C(242), UINT8_C(150), UINT8_C( 26), UINT8_C(183), UINT8_C( 56), UINT8_C(181), UINT8_C(193), UINT8_C(231), UINT8_C(135), UINT8_C(190), UINT8_C( 42), UINT8_C( 33), UINT8_C(174), UINT8_C( 57), UINT8_C(232), UINT8_C(176), UINT8_C(125), UINT8_C( 87), UINT8_C( 79)), simde_x_mm512_set_epu8(UINT8_C( 68), UINT8_C( 1), UINT8_C(208), UINT8_C( 74), UINT8_C(102), UINT8_C(168), UINT8_C( 41), UINT8_C( 41), UINT8_C( 83), UINT8_C( 95), UINT8_C(152), UINT8_C(131), UINT8_C(230), UINT8_C( 27), UINT8_C(126), UINT8_C(230), UINT8_C(128), UINT8_C(133), UINT8_C(107), UINT8_C(197), UINT8_C( 23), UINT8_C( 2), UINT8_C(108), UINT8_C(207), UINT8_C( 82), UINT8_C(175), UINT8_C(247), UINT8_C(180), UINT8_C( 57), UINT8_C( 31), UINT8_C(124), UINT8_C(203), UINT8_C(246), UINT8_C(195), UINT8_C(173), UINT8_C(161), UINT8_C(132), UINT8_C(162), UINT8_C( 13), UINT8_C(205), UINT8_C(225), UINT8_C( 25), UINT8_C( 39), UINT8_C(181), UINT8_C(149), UINT8_C(149), UINT8_C(138), UINT8_C(158), UINT8_C(215), UINT8_C(147), UINT8_C( 0), UINT8_C( 87), UINT8_C(244), UINT8_C( 10), UINT8_C( 1), UINT8_C( 19), UINT8_C(173), UINT8_C(221), UINT8_C(118), UINT8_C( 51), UINT8_C( 98), UINT8_C(183), UINT8_C(125), UINT8_C( 92)), simde_x_mm512_set_epu8(UINT8_C(218), UINT8_C(151), UINT8_C(255), UINT8_C(231), UINT8_C(205), UINT8_C(199), UINT8_C(255), UINT8_C(233), UINT8_C(238), UINT8_C(141), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(101), UINT8_C(255), UINT8_C(230), UINT8_C(213), UINT8_C(163), UINT8_C(255), UINT8_C(209), UINT8_C(140), UINT8_C( 76), UINT8_C(255), UINT8_C(255), UINT8_C(214), UINT8_C(238), UINT8_C(255), UINT8_C(201), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(230), UINT8_C(255), UINT8_C(255), UINT8_C(220), UINT8_C(255), UINT8_C(114), UINT8_C(255), UINT8_C(255), UINT8_C(153), UINT8_C(126), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(184), UINT8_C(255), UINT8_C(203), UINT8_C(181), UINT8_C(255), UINT8_C(255), UINT8_C(145), UINT8_C(191), UINT8_C( 61), UINT8_C(206), UINT8_C(255), UINT8_C(175), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(212), UINT8_C(171)) }, { simde_x_mm512_set_epu8(UINT8_C(166), UINT8_C(112), UINT8_C( 16), UINT8_C( 20), UINT8_C(189), UINT8_C( 23), UINT8_C( 28), UINT8_C(226), UINT8_C( 92), UINT8_C(105), UINT8_C(249), UINT8_C( 71), UINT8_C( 22), UINT8_C(153), UINT8_C( 38), UINT8_C( 58), UINT8_C( 86), UINT8_C(191), UINT8_C( 97), UINT8_C(186), UINT8_C(117), UINT8_C( 93), UINT8_C( 63), UINT8_C(142), UINT8_C( 82), UINT8_C(123), UINT8_C(210), UINT8_C(159), UINT8_C(175), UINT8_C(187), UINT8_C( 33), UINT8_C(134), UINT8_C(156), UINT8_C(243), UINT8_C(191), UINT8_C(139), UINT8_C(173), UINT8_C( 70), UINT8_C(113), UINT8_C( 30), UINT8_C(184), UINT8_C( 82), UINT8_C(234), UINT8_C(177), UINT8_C(234), UINT8_C(131), UINT8_C(180), UINT8_C( 12), UINT8_C( 47), UINT8_C(213), UINT8_C( 54), UINT8_C(222), UINT8_C(114), UINT8_C(251), UINT8_C(236), UINT8_C( 57), UINT8_C(205), UINT8_C(187), UINT8_C( 48), UINT8_C(112), UINT8_C( 34), UINT8_C( 49), UINT8_C( 95), UINT8_C(198)), simde_x_mm512_set_epu8(UINT8_C(116), UINT8_C(101), UINT8_C(212), UINT8_C(199), UINT8_C( 88), UINT8_C(165), UINT8_C(253), UINT8_C( 88), UINT8_C(138), UINT8_C( 27), UINT8_C(153), UINT8_C(155), UINT8_C(248), UINT8_C(175), UINT8_C(152), UINT8_C(246), UINT8_C( 17), UINT8_C(224), UINT8_C( 49), UINT8_C(127), UINT8_C(200), UINT8_C( 46), UINT8_C( 58), UINT8_C(159), UINT8_C(177), UINT8_C( 24), UINT8_C(139), UINT8_C(240), UINT8_C( 45), UINT8_C(115), UINT8_C(179), UINT8_C(230), UINT8_C(249), UINT8_C(217), UINT8_C(202), UINT8_C( 21), UINT8_C(134), UINT8_C(111), UINT8_C( 79), UINT8_C(153), UINT8_C(130), UINT8_C( 83), UINT8_C(196), UINT8_C(101), UINT8_C(151), UINT8_C(117), UINT8_C(152), UINT8_C(155), UINT8_C(116), UINT8_C(201), UINT8_C(241), UINT8_C( 71), UINT8_C( 24), UINT8_C(105), UINT8_C(212), UINT8_C(191), UINT8_C( 1), UINT8_C(252), UINT8_C( 15), UINT8_C( 49), UINT8_C(105), UINT8_C(159), UINT8_C(109), UINT8_C( 31)), simde_x_mm512_set_epu8(UINT8_C(255), UINT8_C(213), UINT8_C(228), UINT8_C(219), UINT8_C(255), UINT8_C(188), UINT8_C(255), UINT8_C(255), UINT8_C(230), UINT8_C(132), UINT8_C(255), UINT8_C(226), UINT8_C(255), UINT8_C(255), UINT8_C(190), UINT8_C(255), UINT8_C(103), UINT8_C(255), UINT8_C(146), UINT8_C(255), UINT8_C(255), UINT8_C(139), UINT8_C(121), UINT8_C(255), UINT8_C(255), UINT8_C(147), UINT8_C(255), UINT8_C(255), UINT8_C(220), UINT8_C(255), UINT8_C(212), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(160), UINT8_C(255), UINT8_C(181), UINT8_C(192), UINT8_C(183), UINT8_C(255), UINT8_C(165), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(248), UINT8_C(255), UINT8_C(167), UINT8_C(163), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(138), UINT8_C(255), UINT8_C(255), UINT8_C(248), UINT8_C(206), UINT8_C(255), UINT8_C( 63), UINT8_C(161), UINT8_C(139), UINT8_C(208), UINT8_C(204), UINT8_C(229)) }, { simde_x_mm512_set_epu8(UINT8_C(239), UINT8_C(174), UINT8_C( 81), UINT8_C( 89), UINT8_C( 63), UINT8_C(160), UINT8_C(219), UINT8_C(204), UINT8_C(161), UINT8_C( 4), UINT8_C(134), UINT8_C(137), UINT8_C( 48), UINT8_C( 33), UINT8_C(178), UINT8_C(122), UINT8_C( 99), UINT8_C(167), UINT8_C( 95), UINT8_C(109), UINT8_C( 38), UINT8_C(191), UINT8_C( 3), UINT8_C( 48), UINT8_C(143), UINT8_C( 51), UINT8_C( 81), UINT8_C( 76), UINT8_C(251), UINT8_C(204), UINT8_C(198), UINT8_C( 1), UINT8_C(175), UINT8_C(123), UINT8_C( 24), UINT8_C(237), UINT8_C(190), UINT8_C( 62), UINT8_C( 87), UINT8_C(255), UINT8_C(119), UINT8_C(202), UINT8_C( 4), UINT8_C( 40), UINT8_C(152), UINT8_C(128), UINT8_C(116), UINT8_C( 72), UINT8_C(245), UINT8_C(148), UINT8_C( 39), UINT8_C(106), UINT8_C(195), UINT8_C(186), UINT8_C(241), UINT8_C(178), UINT8_C( 68), UINT8_C(210), UINT8_C( 61), UINT8_C(218), UINT8_C(116), UINT8_C( 98), UINT8_C( 41), UINT8_C( 22)), simde_x_mm512_set_epu8(UINT8_C( 3), UINT8_C(245), UINT8_C( 8), UINT8_C(179), UINT8_C(242), UINT8_C( 19), UINT8_C(107), UINT8_C(170), UINT8_C(178), UINT8_C(175), UINT8_C(174), UINT8_C(170), UINT8_C( 72), UINT8_C(195), UINT8_C(199), UINT8_C( 66), UINT8_C(182), UINT8_C(183), UINT8_C( 58), UINT8_C(245), UINT8_C( 62), UINT8_C( 31), UINT8_C( 93), UINT8_C( 74), UINT8_C(114), UINT8_C( 92), UINT8_C(199), UINT8_C( 53), UINT8_C( 94), UINT8_C(185), UINT8_C(107), UINT8_C(124), UINT8_C(160), UINT8_C(193), UINT8_C( 17), UINT8_C( 65), UINT8_C(242), UINT8_C( 55), UINT8_C(163), UINT8_C(102), UINT8_C( 59), UINT8_C(173), UINT8_C(244), UINT8_C(232), UINT8_C(182), UINT8_C(139), UINT8_C(254), UINT8_C( 81), UINT8_C(228), UINT8_C(175), UINT8_C(104), UINT8_C(167), UINT8_C( 38), UINT8_C(114), UINT8_C(184), UINT8_C(118), UINT8_C(167), UINT8_C( 58), UINT8_C(153), UINT8_C(103), UINT8_C(204), UINT8_C(126), UINT8_C(111), UINT8_C( 63)), simde_x_mm512_set_epu8(UINT8_C(242), UINT8_C(255), UINT8_C( 89), UINT8_C(255), UINT8_C(255), UINT8_C(179), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(179), UINT8_C(255), UINT8_C(255), UINT8_C(120), UINT8_C(228), UINT8_C(255), UINT8_C(188), UINT8_C(255), UINT8_C(255), UINT8_C(153), UINT8_C(255), UINT8_C(100), UINT8_C(222), UINT8_C( 96), UINT8_C(122), UINT8_C(255), UINT8_C(143), UINT8_C(255), UINT8_C(129), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(125), UINT8_C(255), UINT8_C(255), UINT8_C( 41), UINT8_C(255), UINT8_C(255), UINT8_C(117), UINT8_C(250), UINT8_C(255), UINT8_C(178), UINT8_C(255), UINT8_C(248), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(153), UINT8_C(255), UINT8_C(255), UINT8_C(143), UINT8_C(255), UINT8_C(233), UINT8_C(255), UINT8_C(255), UINT8_C(255), UINT8_C(235), UINT8_C(255), UINT8_C(214), UINT8_C(255), UINT8_C(255), UINT8_C(224), UINT8_C(152), UINT8_C( 85)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_adds_epu8(test_vec[i].a, test_vec[i].b); simde_assert_m512i_u8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_adds_epu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask64 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_x_mm512_set_epu8(UINT8_C( 92), UINT8_C( 116), UINT8_C( 178), UINT8_C( 237), UINT8_C( 183), UINT8_C( 22), UINT8_C( 190), UINT8_C( 227), UINT8_C( 55), UINT8_C( 78), UINT8_C( 211), UINT8_C( 137), UINT8_C( 30), UINT8_C( 227), UINT8_C( 233), UINT8_C( 89), UINT8_C( 15), UINT8_C( 119), UINT8_C( 168), UINT8_C( 109), UINT8_C( 240), UINT8_C( 246), UINT8_C( 0), UINT8_C( 238), UINT8_C( 1), UINT8_C( 116), UINT8_C( 62), UINT8_C( 183), UINT8_C( 134), UINT8_C( 0), UINT8_C( 111), UINT8_C( 95), UINT8_C( 236), UINT8_C( 3), UINT8_C( 122), UINT8_C( 135), UINT8_C( 13), UINT8_C( 25), UINT8_C( 221), UINT8_C( 129), UINT8_C( 82), UINT8_C( 90), UINT8_C( 53), UINT8_C( 123), UINT8_C( 73), UINT8_C( 108), UINT8_C( 238), UINT8_C( 15), UINT8_C( 81), UINT8_C( 229), UINT8_C( 102), UINT8_C( 118), UINT8_C( 39), UINT8_C( 179), UINT8_C( 45), UINT8_C( 81), UINT8_C( 239), UINT8_C( 228), UINT8_C( 67), UINT8_C( 138), UINT8_C( 79), UINT8_C( 143), UINT8_C( 134), UINT8_C( 124)), UINT64_C(10224647434006242820), simde_x_mm512_set_epu8(UINT8_C( 68), UINT8_C( 148), UINT8_C( 135), UINT8_C( 141), UINT8_C( 197), UINT8_C( 178), UINT8_C( 145), UINT8_C( 56), UINT8_C( 9), UINT8_C( 135), UINT8_C( 120), UINT8_C( 220), UINT8_C( 36), UINT8_C( 153), UINT8_C( 152), UINT8_C( 25), UINT8_C( 145), UINT8_C( 193), UINT8_C( 221), UINT8_C( 136), UINT8_C( 28), UINT8_C( 212), UINT8_C( 230), UINT8_C( 170), UINT8_C( 243), UINT8_C( 53), UINT8_C( 168), UINT8_C( 149), UINT8_C( 68), UINT8_C( 42), UINT8_C( 138), UINT8_C( 111), UINT8_C( 54), UINT8_C( 198), UINT8_C( 243), UINT8_C( 27), UINT8_C( 23), UINT8_C( 41), UINT8_C( 137), UINT8_C( 44), UINT8_C( 7), UINT8_C( 136), UINT8_C( 32), UINT8_C( 213), UINT8_C( 114), UINT8_C( 184), UINT8_C( 73), UINT8_C( 160), UINT8_C( 96), UINT8_C( 110), UINT8_C( 175), UINT8_C( 180), UINT8_C( 103), UINT8_C( 156), UINT8_C( 234), UINT8_C( 18), UINT8_C( 115), UINT8_C( 54), UINT8_C( 216), UINT8_C( 125), UINT8_C( 110), UINT8_C( 31), UINT8_C( 51), UINT8_C( 152)), simde_x_mm512_set_epu8(UINT8_C( 242), UINT8_C( 85), UINT8_C( 176), UINT8_C( 80), UINT8_C( 48), UINT8_C( 93), UINT8_C( 79), UINT8_C( 127), UINT8_C( 16), UINT8_C( 41), UINT8_C( 54), UINT8_C( 140), UINT8_C( 17), UINT8_C( 42), UINT8_C( 170), UINT8_C( 38), UINT8_C( 138), UINT8_C( 200), UINT8_C( 60), UINT8_C( 19), UINT8_C( 156), UINT8_C( 149), UINT8_C( 105), UINT8_C( 180), UINT8_C( 10), UINT8_C( 247), UINT8_C( 244), UINT8_C( 200), UINT8_C( 185), UINT8_C( 96), UINT8_C( 6), UINT8_C( 24), UINT8_C( 69), UINT8_C( 73), UINT8_C( 229), UINT8_C( 3), UINT8_C( 161), UINT8_C( 50), UINT8_C( 189), UINT8_C( 217), UINT8_C( 97), UINT8_C( 62), UINT8_C( 101), UINT8_C( 56), UINT8_C( 177), UINT8_C( 83), UINT8_C( 157), UINT8_C( 200), UINT8_C( 239), UINT8_C( 200), UINT8_C( 248), UINT8_C( 240), UINT8_C( 84), UINT8_C( 161), UINT8_C( 183), UINT8_C( 54), UINT8_C( 125), UINT8_C( 85), UINT8_C( 78), UINT8_C( 191), UINT8_C( 4), UINT8_C( 113), UINT8_C( 240), UINT8_C( 203)), simde_x_mm512_set_epu8(UINT8_C( 255), UINT8_C( 116), UINT8_C( 178), UINT8_C( 237), UINT8_C( 245), UINT8_C( 255), UINT8_C( 190), UINT8_C( 183), UINT8_C( 25), UINT8_C( 176), UINT8_C( 174), UINT8_C( 137), UINT8_C( 30), UINT8_C( 195), UINT8_C( 233), UINT8_C( 63), UINT8_C( 15), UINT8_C( 119), UINT8_C( 255), UINT8_C( 155), UINT8_C( 184), UINT8_C( 255), UINT8_C( 255), UINT8_C( 238), UINT8_C( 253), UINT8_C( 116), UINT8_C( 255), UINT8_C( 183), UINT8_C( 253), UINT8_C( 0), UINT8_C( 144), UINT8_C( 135), UINT8_C( 236), UINT8_C( 3), UINT8_C( 255), UINT8_C( 30), UINT8_C( 13), UINT8_C( 25), UINT8_C( 221), UINT8_C( 255), UINT8_C( 104), UINT8_C( 90), UINT8_C( 53), UINT8_C( 123), UINT8_C( 255), UINT8_C( 255), UINT8_C( 238), UINT8_C( 255), UINT8_C( 81), UINT8_C( 229), UINT8_C( 255), UINT8_C( 118), UINT8_C( 39), UINT8_C( 255), UINT8_C( 255), UINT8_C( 81), UINT8_C( 239), UINT8_C( 228), UINT8_C( 67), UINT8_C( 138), UINT8_C( 79), UINT8_C( 144), UINT8_C( 134), UINT8_C( 124)) }, { simde_x_mm512_set_epu8(UINT8_C( 133), UINT8_C( 156), UINT8_C( 217), UINT8_C( 212), UINT8_C( 22), UINT8_C( 112), UINT8_C( 16), UINT8_C( 15), UINT8_C( 152), UINT8_C( 154), UINT8_C( 102), UINT8_C( 161), UINT8_C( 69), UINT8_C( 13), UINT8_C( 235), UINT8_C( 45), UINT8_C( 62), UINT8_C( 136), UINT8_C( 145), UINT8_C( 32), UINT8_C( 149), UINT8_C( 226), UINT8_C( 99), UINT8_C( 192), UINT8_C( 8), UINT8_C( 214), UINT8_C( 81), UINT8_C( 222), UINT8_C( 210), UINT8_C( 26), UINT8_C( 31), UINT8_C( 254), UINT8_C( 68), UINT8_C( 249), UINT8_C( 185), UINT8_C( 46), UINT8_C( 235), UINT8_C( 183), UINT8_C( 21), UINT8_C( 83), UINT8_C( 148), UINT8_C( 159), UINT8_C( 187), UINT8_C( 73), UINT8_C( 57), UINT8_C( 219), UINT8_C( 21), UINT8_C( 82), UINT8_C( 137), UINT8_C( 130), UINT8_C( 126), UINT8_C( 91), UINT8_C( 115), UINT8_C( 31), UINT8_C( 177), UINT8_C( 28), UINT8_C( 150), UINT8_C( 238), UINT8_C( 65), UINT8_C( 152), UINT8_C( 81), UINT8_C( 38), UINT8_C( 172), UINT8_C( 254)), UINT64_C( 4597426592773770833), simde_x_mm512_set_epu8(UINT8_C( 74), UINT8_C( 72), UINT8_C( 252), UINT8_C( 28), UINT8_C( 141), UINT8_C( 93), UINT8_C( 102), UINT8_C( 44), UINT8_C( 153), UINT8_C( 227), UINT8_C( 206), UINT8_C( 48), UINT8_C( 160), UINT8_C( 206), UINT8_C( 46), UINT8_C( 191), UINT8_C( 4), UINT8_C( 43), UINT8_C( 181), UINT8_C( 97), UINT8_C( 230), UINT8_C( 153), UINT8_C( 71), UINT8_C( 149), UINT8_C( 91), UINT8_C( 45), UINT8_C( 245), UINT8_C( 47), UINT8_C( 29), UINT8_C( 25), UINT8_C( 26), UINT8_C( 247), UINT8_C( 10), UINT8_C( 36), UINT8_C( 177), UINT8_C( 203), UINT8_C( 41), UINT8_C( 1), UINT8_C( 233), UINT8_C( 193), UINT8_C( 129), UINT8_C( 68), UINT8_C( 48), UINT8_C( 36), UINT8_C( 89), UINT8_C( 144), UINT8_C( 225), UINT8_C( 120), UINT8_C( 35), UINT8_C( 62), UINT8_C( 235), UINT8_C( 142), UINT8_C( 152), UINT8_C( 57), UINT8_C( 42), UINT8_C( 145), UINT8_C( 94), UINT8_C( 193), UINT8_C( 247), UINT8_C( 64), UINT8_C( 191), UINT8_C( 254), UINT8_C( 110), UINT8_C( 248)), simde_x_mm512_set_epu8(UINT8_C( 44), UINT8_C( 164), UINT8_C( 225), UINT8_C( 26), UINT8_C( 157), UINT8_C( 203), UINT8_C( 117), UINT8_C( 18), UINT8_C( 193), UINT8_C( 45), UINT8_C( 12), UINT8_C( 24), UINT8_C( 148), UINT8_C( 18), UINT8_C( 196), UINT8_C( 28), UINT8_C( 50), UINT8_C( 245), UINT8_C( 188), UINT8_C( 225), UINT8_C( 105), UINT8_C( 150), UINT8_C( 98), UINT8_C( 51), UINT8_C( 58), UINT8_C( 103), UINT8_C( 111), UINT8_C( 129), UINT8_C( 68), UINT8_C( 200), UINT8_C( 124), UINT8_C( 137), UINT8_C( 74), UINT8_C( 194), UINT8_C( 140), UINT8_C( 37), UINT8_C( 244), UINT8_C( 114), UINT8_C( 0), UINT8_C( 61), UINT8_C( 103), UINT8_C( 252), UINT8_C( 151), UINT8_C( 188), UINT8_C( 39), UINT8_C( 156), UINT8_C( 163), UINT8_C( 11), UINT8_C( 176), UINT8_C( 237), UINT8_C( 234), UINT8_C( 217), UINT8_C( 127), UINT8_C( 218), UINT8_C( 131), UINT8_C( 145), UINT8_C( 84), UINT8_C( 160), UINT8_C( 87), UINT8_C( 234), UINT8_C( 251), UINT8_C( 253), UINT8_C( 129), UINT8_C( 41)), simde_x_mm512_set_epu8(UINT8_C( 133), UINT8_C( 156), UINT8_C( 255), UINT8_C( 54), UINT8_C( 255), UINT8_C( 255), UINT8_C( 219), UINT8_C( 62), UINT8_C( 255), UINT8_C( 255), UINT8_C( 102), UINT8_C( 161), UINT8_C( 255), UINT8_C( 224), UINT8_C( 235), UINT8_C( 219), UINT8_C( 62), UINT8_C( 255), UINT8_C( 145), UINT8_C( 255), UINT8_C( 149), UINT8_C( 255), UINT8_C( 169), UINT8_C( 200), UINT8_C( 8), UINT8_C( 214), UINT8_C( 255), UINT8_C( 222), UINT8_C( 210), UINT8_C( 26), UINT8_C( 31), UINT8_C( 254), UINT8_C( 84), UINT8_C( 249), UINT8_C( 255), UINT8_C( 240), UINT8_C( 255), UINT8_C( 115), UINT8_C( 233), UINT8_C( 254), UINT8_C( 148), UINT8_C( 159), UINT8_C( 199), UINT8_C( 224), UINT8_C( 57), UINT8_C( 255), UINT8_C( 255), UINT8_C( 131), UINT8_C( 137), UINT8_C( 130), UINT8_C( 126), UINT8_C( 91), UINT8_C( 255), UINT8_C( 255), UINT8_C( 173), UINT8_C( 28), UINT8_C( 150), UINT8_C( 255), UINT8_C( 65), UINT8_C( 255), UINT8_C( 81), UINT8_C( 38), UINT8_C( 172), UINT8_C( 255)) }, { simde_x_mm512_set_epu8(UINT8_C( 226), UINT8_C( 219), UINT8_C( 51), UINT8_C( 220), UINT8_C( 8), UINT8_C( 52), UINT8_C( 97), UINT8_C( 123), UINT8_C( 207), UINT8_C( 132), UINT8_C( 95), UINT8_C( 173), UINT8_C( 70), UINT8_C( 206), UINT8_C( 195), UINT8_C( 25), UINT8_C( 159), UINT8_C( 28), UINT8_C( 198), UINT8_C( 11), UINT8_C( 242), UINT8_C( 126), UINT8_C( 81), UINT8_C( 45), UINT8_C( 233), UINT8_C( 120), UINT8_C( 173), UINT8_C( 240), UINT8_C( 7), UINT8_C( 51), UINT8_C( 199), UINT8_C( 206), UINT8_C( 235), UINT8_C( 98), UINT8_C( 88), UINT8_C( 0), UINT8_C( 190), UINT8_C( 3), UINT8_C( 124), UINT8_C( 143), UINT8_C( 50), UINT8_C( 88), UINT8_C( 171), UINT8_C( 163), UINT8_C( 212), UINT8_C( 243), UINT8_C( 162), UINT8_C( 17), UINT8_C( 254), UINT8_C( 79), UINT8_C( 140), UINT8_C( 43), UINT8_C( 179), UINT8_C( 131), UINT8_C( 233), UINT8_C( 136), UINT8_C( 96), UINT8_C( 192), UINT8_C( 233), UINT8_C( 210), UINT8_C( 227), UINT8_C( 185), UINT8_C( 71), UINT8_C( 90)), UINT64_C(12627002542648829104), simde_x_mm512_set_epu8(UINT8_C( 56), UINT8_C( 211), UINT8_C( 148), UINT8_C( 237), UINT8_C( 132), UINT8_C( 229), UINT8_C( 22), UINT8_C( 126), UINT8_C( 150), UINT8_C( 188), UINT8_C( 196), UINT8_C( 8), UINT8_C( 60), UINT8_C( 93), UINT8_C( 223), UINT8_C( 229), UINT8_C( 249), UINT8_C( 27), UINT8_C( 134), UINT8_C( 218), UINT8_C( 23), UINT8_C( 6), UINT8_C( 45), UINT8_C( 235), UINT8_C( 233), UINT8_C( 155), UINT8_C( 116), UINT8_C( 127), UINT8_C( 96), UINT8_C( 40), UINT8_C( 159), UINT8_C( 40), UINT8_C( 86), UINT8_C( 212), UINT8_C( 70), UINT8_C( 185), UINT8_C( 62), UINT8_C( 235), UINT8_C( 66), UINT8_C( 68), UINT8_C( 169), UINT8_C( 195), UINT8_C( 48), UINT8_C( 186), UINT8_C( 18), UINT8_C( 178), UINT8_C( 158), UINT8_C( 117), UINT8_C( 74), UINT8_C( 32), UINT8_C( 93), UINT8_C( 125), UINT8_C( 209), UINT8_C( 196), UINT8_C( 170), UINT8_C( 117), UINT8_C( 122), UINT8_C( 202), UINT8_C( 50), UINT8_C( 123), UINT8_C( 225), UINT8_C( 182), UINT8_C( 192), UINT8_C( 54)), simde_x_mm512_set_epu8(UINT8_C( 115), UINT8_C( 51), UINT8_C( 165), UINT8_C( 56), UINT8_C( 64), UINT8_C( 217), UINT8_C( 137), UINT8_C( 228), UINT8_C( 202), UINT8_C( 28), UINT8_C( 54), UINT8_C( 248), UINT8_C( 202), UINT8_C( 128), UINT8_C( 228), UINT8_C( 185), UINT8_C( 107), UINT8_C( 190), UINT8_C( 142), UINT8_C( 168), UINT8_C( 34), UINT8_C( 173), UINT8_C( 235), UINT8_C( 192), UINT8_C( 121), UINT8_C( 236), UINT8_C( 167), UINT8_C( 162), UINT8_C( 112), UINT8_C( 229), UINT8_C( 81), UINT8_C( 202), UINT8_C( 192), UINT8_C( 142), UINT8_C( 48), UINT8_C( 167), UINT8_C( 195), UINT8_C( 26), UINT8_C( 43), UINT8_C( 29), UINT8_C( 0), UINT8_C( 64), UINT8_C( 123), UINT8_C( 189), UINT8_C( 15), UINT8_C( 120), UINT8_C( 36), UINT8_C( 40), UINT8_C( 106), UINT8_C( 138), UINT8_C( 148), UINT8_C( 198), UINT8_C( 26), UINT8_C( 145), UINT8_C( 63), UINT8_C( 158), UINT8_C( 243), UINT8_C( 244), UINT8_C( 132), UINT8_C( 96), UINT8_C( 243), UINT8_C( 158), UINT8_C( 99), UINT8_C( 243)), simde_x_mm512_set_epu8(UINT8_C( 171), UINT8_C( 219), UINT8_C( 255), UINT8_C( 220), UINT8_C( 196), UINT8_C( 255), UINT8_C( 159), UINT8_C( 255), UINT8_C( 207), UINT8_C( 132), UINT8_C( 250), UINT8_C( 255), UINT8_C( 255), UINT8_C( 221), UINT8_C( 195), UINT8_C( 25), UINT8_C( 159), UINT8_C( 28), UINT8_C( 198), UINT8_C( 255), UINT8_C( 57), UINT8_C( 179), UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), UINT8_C( 240), UINT8_C( 208), UINT8_C( 51), UINT8_C( 199), UINT8_C( 242), UINT8_C( 235), UINT8_C( 255), UINT8_C( 118), UINT8_C( 0), UINT8_C( 255), UINT8_C( 255), UINT8_C( 124), UINT8_C( 143), UINT8_C( 169), UINT8_C( 88), UINT8_C( 171), UINT8_C( 163), UINT8_C( 33), UINT8_C( 243), UINT8_C( 162), UINT8_C( 157), UINT8_C( 180), UINT8_C( 79), UINT8_C( 241), UINT8_C( 43), UINT8_C( 179), UINT8_C( 255), UINT8_C( 233), UINT8_C( 136), UINT8_C( 255), UINT8_C( 192), UINT8_C( 182), UINT8_C( 219), UINT8_C( 227), UINT8_C( 185), UINT8_C( 71), UINT8_C( 90)) }, { simde_x_mm512_set_epu8(UINT8_C( 72), UINT8_C( 45), UINT8_C( 120), UINT8_C( 251), UINT8_C( 147), UINT8_C( 62), UINT8_C( 17), UINT8_C( 31), UINT8_C( 226), UINT8_C( 198), UINT8_C( 56), UINT8_C( 21), UINT8_C( 72), UINT8_C( 182), UINT8_C( 216), UINT8_C( 120), UINT8_C( 95), UINT8_C( 108), UINT8_C( 32), UINT8_C( 64), UINT8_C( 128), UINT8_C( 102), UINT8_C( 235), UINT8_C( 28), UINT8_C( 105), UINT8_C( 52), UINT8_C( 85), UINT8_C( 152), UINT8_C( 57), UINT8_C( 225), UINT8_C( 218), UINT8_C( 132), UINT8_C( 149), UINT8_C( 254), UINT8_C( 55), UINT8_C( 46), UINT8_C( 185), UINT8_C( 77), UINT8_C( 18), UINT8_C( 70), UINT8_C( 89), UINT8_C( 125), UINT8_C( 214), UINT8_C( 131), UINT8_C( 121), UINT8_C( 245), UINT8_C( 187), UINT8_C( 197), UINT8_C( 203), UINT8_C( 34), UINT8_C( 9), UINT8_C( 64), UINT8_C( 195), UINT8_C( 231), UINT8_C( 141), UINT8_C( 100), UINT8_C( 65), UINT8_C( 8), UINT8_C( 69), UINT8_C( 248), UINT8_C( 241), UINT8_C( 205), UINT8_C( 1), UINT8_C( 90)), UINT64_C(14515151237088493607), simde_x_mm512_set_epu8(UINT8_C( 230), UINT8_C( 247), UINT8_C( 66), UINT8_C( 1), UINT8_C( 243), UINT8_C( 60), UINT8_C( 137), UINT8_C( 173), UINT8_C( 134), UINT8_C( 192), UINT8_C( 173), UINT8_C( 182), UINT8_C( 119), UINT8_C( 248), UINT8_C( 12), UINT8_C( 113), UINT8_C( 244), UINT8_C( 172), UINT8_C( 6), UINT8_C( 69), UINT8_C( 2), UINT8_C( 181), UINT8_C( 222), UINT8_C( 130), UINT8_C( 3), UINT8_C( 128), UINT8_C( 247), UINT8_C( 24), UINT8_C( 11), UINT8_C( 162), UINT8_C( 224), UINT8_C( 110), UINT8_C( 33), UINT8_C( 232), UINT8_C( 125), UINT8_C( 35), UINT8_C( 153), UINT8_C( 208), UINT8_C( 234), UINT8_C( 38), UINT8_C( 175), UINT8_C( 9), UINT8_C( 245), UINT8_C( 132), UINT8_C( 71), UINT8_C( 31), UINT8_C( 214), UINT8_C( 93), UINT8_C( 67), UINT8_C( 45), UINT8_C( 51), UINT8_C( 164), UINT8_C( 126), UINT8_C( 108), UINT8_C( 133), UINT8_C( 185), UINT8_C( 113), UINT8_C( 32), UINT8_C( 71), UINT8_C( 55), UINT8_C( 230), UINT8_C( 82), UINT8_C( 175), UINT8_C( 236)), simde_x_mm512_set_epu8(UINT8_C( 131), UINT8_C( 121), UINT8_C( 128), UINT8_C( 103), UINT8_C( 0), UINT8_C( 101), UINT8_C( 215), UINT8_C( 89), UINT8_C( 173), UINT8_C( 191), UINT8_C( 9), UINT8_C( 249), UINT8_C( 193), UINT8_C( 13), UINT8_C( 105), UINT8_C( 92), UINT8_C( 238), UINT8_C( 235), UINT8_C( 154), UINT8_C( 142), UINT8_C( 74), UINT8_C( 121), UINT8_C( 211), UINT8_C( 52), UINT8_C( 193), UINT8_C( 163), UINT8_C( 98), UINT8_C( 106), UINT8_C( 147), UINT8_C( 209), UINT8_C( 37), UINT8_C( 70), UINT8_C( 100), UINT8_C( 121), UINT8_C( 18), UINT8_C( 28), UINT8_C( 139), UINT8_C( 107), UINT8_C( 3), UINT8_C( 194), UINT8_C( 42), UINT8_C( 72), UINT8_C( 91), UINT8_C( 86), UINT8_C( 184), UINT8_C( 9), UINT8_C( 176), UINT8_C( 118), UINT8_C( 122), UINT8_C( 148), UINT8_C( 186), UINT8_C( 193), UINT8_C( 56), UINT8_C( 71), UINT8_C( 242), UINT8_C( 49), UINT8_C( 183), UINT8_C( 53), UINT8_C( 227), UINT8_C( 3), UINT8_C( 183), UINT8_C( 43), UINT8_C( 234), UINT8_C( 85)), simde_x_mm512_set_epu8(UINT8_C( 255), UINT8_C( 255), UINT8_C( 120), UINT8_C( 251), UINT8_C( 243), UINT8_C( 62), UINT8_C( 17), UINT8_C( 255), UINT8_C( 226), UINT8_C( 255), UINT8_C( 182), UINT8_C( 255), UINT8_C( 72), UINT8_C( 182), UINT8_C( 216), UINT8_C( 120), UINT8_C( 95), UINT8_C( 108), UINT8_C( 160), UINT8_C( 64), UINT8_C( 76), UINT8_C( 255), UINT8_C( 235), UINT8_C( 182), UINT8_C( 105), UINT8_C( 52), UINT8_C( 255), UINT8_C( 152), UINT8_C( 57), UINT8_C( 255), UINT8_C( 218), UINT8_C( 180), UINT8_C( 149), UINT8_C( 255), UINT8_C( 55), UINT8_C( 46), UINT8_C( 185), UINT8_C( 77), UINT8_C( 237), UINT8_C( 232), UINT8_C( 217), UINT8_C( 125), UINT8_C( 255), UINT8_C( 218), UINT8_C( 255), UINT8_C( 245), UINT8_C( 255), UINT8_C( 211), UINT8_C( 203), UINT8_C( 193), UINT8_C( 9), UINT8_C( 64), UINT8_C( 195), UINT8_C( 231), UINT8_C( 141), UINT8_C( 100), UINT8_C( 65), UINT8_C( 8), UINT8_C( 255), UINT8_C( 248), UINT8_C( 241), UINT8_C( 125), UINT8_C( 255), UINT8_C( 255)) }, { simde_x_mm512_set_epu8(UINT8_C( 209), UINT8_C( 84), UINT8_C( 130), UINT8_C( 192), UINT8_C( 14), UINT8_C( 11), UINT8_C( 37), UINT8_C( 233), UINT8_C( 67), UINT8_C( 124), UINT8_C( 58), UINT8_C( 162), UINT8_C( 30), UINT8_C( 223), UINT8_C( 70), UINT8_C( 232), UINT8_C( 38), UINT8_C( 159), UINT8_C( 200), UINT8_C( 196), UINT8_C( 197), UINT8_C( 65), UINT8_C( 182), UINT8_C( 45), UINT8_C( 245), UINT8_C( 55), UINT8_C( 174), UINT8_C( 12), UINT8_C( 106), UINT8_C( 22), UINT8_C( 132), UINT8_C( 252), UINT8_C( 2), UINT8_C( 175), UINT8_C( 14), UINT8_C( 90), UINT8_C( 156), UINT8_C( 134), UINT8_C( 221), UINT8_C( 81), UINT8_C( 242), UINT8_C( 214), UINT8_C( 125), UINT8_C( 131), UINT8_C( 199), UINT8_C( 90), UINT8_C( 247), UINT8_C( 63), UINT8_C( 53), UINT8_C( 77), UINT8_C( 63), UINT8_C( 172), UINT8_C( 27), UINT8_C( 22), UINT8_C( 3), UINT8_C( 219), UINT8_C( 65), UINT8_C( 118), UINT8_C( 130), UINT8_C( 97), UINT8_C( 109), UINT8_C( 7), UINT8_C( 142), UINT8_C( 181)), UINT64_C( 8707623543556880126), simde_x_mm512_set_epu8(UINT8_C( 84), UINT8_C( 185), UINT8_C( 8), UINT8_C( 12), UINT8_C( 245), UINT8_C( 180), UINT8_C( 62), UINT8_C( 93), UINT8_C( 181), UINT8_C( 179), UINT8_C( 172), UINT8_C( 148), UINT8_C( 221), UINT8_C( 14), UINT8_C( 196), UINT8_C( 18), UINT8_C( 23), UINT8_C( 196), UINT8_C( 193), UINT8_C( 142), UINT8_C( 201), UINT8_C( 75), UINT8_C( 157), UINT8_C( 201), UINT8_C( 58), UINT8_C( 76), UINT8_C( 154), UINT8_C( 138), UINT8_C( 226), UINT8_C( 39), UINT8_C( 119), UINT8_C( 85), UINT8_C( 248), UINT8_C( 184), UINT8_C( 196), UINT8_C( 162), UINT8_C( 144), UINT8_C( 119), UINT8_C( 124), UINT8_C( 76), UINT8_C( 214), UINT8_C( 132), UINT8_C( 54), UINT8_C( 74), UINT8_C( 164), UINT8_C( 99), UINT8_C( 79), UINT8_C( 253), UINT8_C( 61), UINT8_C( 167), UINT8_C( 84), UINT8_C( 162), UINT8_C( 31), UINT8_C( 140), UINT8_C( 189), UINT8_C( 154), UINT8_C( 184), UINT8_C( 165), UINT8_C( 151), UINT8_C( 148), UINT8_C( 212), UINT8_C( 74), UINT8_C( 228), UINT8_C( 124)), simde_x_mm512_set_epu8(UINT8_C( 1), UINT8_C( 75), UINT8_C( 21), UINT8_C( 220), UINT8_C( 130), UINT8_C( 122), UINT8_C( 71), UINT8_C( 76), UINT8_C( 28), UINT8_C( 200), UINT8_C( 32), UINT8_C( 101), UINT8_C( 149), UINT8_C( 145), UINT8_C( 168), UINT8_C( 237), UINT8_C( 179), UINT8_C( 19), UINT8_C( 235), UINT8_C( 145), UINT8_C( 188), UINT8_C( 82), UINT8_C( 138), UINT8_C( 180), UINT8_C( 47), UINT8_C( 127), UINT8_C( 62), UINT8_C( 240), UINT8_C( 10), UINT8_C( 242), UINT8_C( 156), UINT8_C( 86), UINT8_C( 29), UINT8_C( 107), UINT8_C( 56), UINT8_C( 21), UINT8_C( 24), UINT8_C( 68), UINT8_C( 160), UINT8_C( 64), UINT8_C( 48), UINT8_C( 13), UINT8_C( 173), UINT8_C( 4), UINT8_C( 253), UINT8_C( 192), UINT8_C( 17), UINT8_C( 141), UINT8_C( 21), UINT8_C( 108), UINT8_C( 125), UINT8_C( 196), UINT8_C( 184), UINT8_C( 74), UINT8_C( 251), UINT8_C( 198), UINT8_C( 215), UINT8_C( 22), UINT8_C( 141), UINT8_C( 102), UINT8_C( 59), UINT8_C( 176), UINT8_C( 241), UINT8_C( 193)), simde_x_mm512_set_epu8(UINT8_C( 209), UINT8_C( 255), UINT8_C( 29), UINT8_C( 232), UINT8_C( 255), UINT8_C( 11), UINT8_C( 37), UINT8_C( 233), UINT8_C( 209), UINT8_C( 255), UINT8_C( 58), UINT8_C( 249), UINT8_C( 30), UINT8_C( 159), UINT8_C( 255), UINT8_C( 255), UINT8_C( 202), UINT8_C( 159), UINT8_C( 255), UINT8_C( 255), UINT8_C( 197), UINT8_C( 65), UINT8_C( 182), UINT8_C( 255), UINT8_C( 245), UINT8_C( 203), UINT8_C( 216), UINT8_C( 255), UINT8_C( 236), UINT8_C( 22), UINT8_C( 255), UINT8_C( 252), UINT8_C( 2), UINT8_C( 255), UINT8_C( 14), UINT8_C( 183), UINT8_C( 156), UINT8_C( 187), UINT8_C( 255), UINT8_C( 140), UINT8_C( 255), UINT8_C( 214), UINT8_C( 125), UINT8_C( 131), UINT8_C( 255), UINT8_C( 90), UINT8_C( 247), UINT8_C( 255), UINT8_C( 53), UINT8_C( 77), UINT8_C( 209), UINT8_C( 255), UINT8_C( 27), UINT8_C( 214), UINT8_C( 255), UINT8_C( 219), UINT8_C( 255), UINT8_C( 187), UINT8_C( 255), UINT8_C( 250), UINT8_C( 255), UINT8_C( 250), UINT8_C( 255), UINT8_C( 181)) }, { simde_x_mm512_set_epu8(UINT8_C( 192), UINT8_C( 203), UINT8_C( 214), UINT8_C( 126), UINT8_C( 67), UINT8_C( 50), UINT8_C( 238), UINT8_C( 76), UINT8_C( 237), UINT8_C( 123), UINT8_C( 169), UINT8_C( 106), UINT8_C( 182), UINT8_C( 44), UINT8_C( 117), UINT8_C( 103), UINT8_C( 81), UINT8_C( 122), UINT8_C( 56), UINT8_C( 246), UINT8_C( 67), UINT8_C( 79), UINT8_C( 83), UINT8_C( 218), UINT8_C( 243), UINT8_C( 43), UINT8_C( 27), UINT8_C( 159), UINT8_C( 102), UINT8_C( 126), UINT8_C( 38), UINT8_C( 194), UINT8_C( 232), UINT8_C( 117), UINT8_C( 218), UINT8_C( 163), UINT8_C( 198), UINT8_C( 132), UINT8_C( 181), UINT8_C( 10), UINT8_C( 18), UINT8_C( 182), UINT8_C( 14), UINT8_C( 36), UINT8_C( 249), UINT8_C( 113), UINT8_C( 40), UINT8_C( 48), UINT8_C( 149), UINT8_C( 222), UINT8_C( 181), UINT8_C( 85), UINT8_C( 221), UINT8_C( 140), UINT8_C( 65), UINT8_C( 235), UINT8_C( 15), UINT8_C( 3), UINT8_C( 45), UINT8_C( 21), UINT8_C( 72), UINT8_C( 93), UINT8_C( 108), UINT8_C( 125)), UINT64_C(12576710173448868104), simde_x_mm512_set_epu8(UINT8_C( 90), UINT8_C( 218), UINT8_C( 158), UINT8_C( 186), UINT8_C( 148), UINT8_C( 20), UINT8_C( 43), UINT8_C( 128), UINT8_C( 77), UINT8_C( 108), UINT8_C( 53), UINT8_C( 82), UINT8_C( 206), UINT8_C( 52), UINT8_C( 56), UINT8_C( 58), UINT8_C( 136), UINT8_C( 213), UINT8_C( 114), UINT8_C( 93), UINT8_C( 212), UINT8_C( 241), UINT8_C( 38), UINT8_C( 239), UINT8_C( 146), UINT8_C( 133), UINT8_C( 217), UINT8_C( 114), UINT8_C( 51), UINT8_C( 141), UINT8_C( 182), UINT8_C( 43), UINT8_C( 41), UINT8_C( 220), UINT8_C( 19), UINT8_C( 69), UINT8_C( 60), UINT8_C( 203), UINT8_C( 112), UINT8_C( 108), UINT8_C( 8), UINT8_C( 46), UINT8_C( 221), UINT8_C( 26), UINT8_C( 11), UINT8_C( 42), UINT8_C( 47), UINT8_C( 59), UINT8_C( 199), UINT8_C( 94), UINT8_C( 131), UINT8_C( 132), UINT8_C( 36), UINT8_C( 57), UINT8_C( 68), UINT8_C( 204), UINT8_C( 39), UINT8_C( 50), UINT8_C( 208), UINT8_C( 94), UINT8_C( 53), UINT8_C( 11), UINT8_C( 29), UINT8_C( 65)), simde_x_mm512_set_epu8(UINT8_C( 149), UINT8_C( 120), UINT8_C( 198), UINT8_C( 107), UINT8_C( 224), UINT8_C( 224), UINT8_C( 88), UINT8_C( 213), UINT8_C( 31), UINT8_C( 224), UINT8_C( 192), UINT8_C( 27), UINT8_C( 82), UINT8_C( 166), UINT8_C( 202), UINT8_C( 172), UINT8_C( 226), UINT8_C( 63), UINT8_C( 24), UINT8_C( 81), UINT8_C( 248), UINT8_C( 9), UINT8_C( 221), UINT8_C( 155), UINT8_C( 83), UINT8_C( 107), UINT8_C( 209), UINT8_C( 200), UINT8_C( 57), UINT8_C( 168), UINT8_C( 141), UINT8_C( 236), UINT8_C( 58), UINT8_C( 75), UINT8_C( 56), UINT8_C( 93), UINT8_C( 49), UINT8_C( 43), UINT8_C( 108), UINT8_C( 118), UINT8_C( 177), UINT8_C( 112), UINT8_C( 44), UINT8_C( 144), UINT8_C( 204), UINT8_C( 10), UINT8_C( 28), UINT8_C( 170), UINT8_C( 65), UINT8_C( 62), UINT8_C( 86), UINT8_C( 149), UINT8_C( 24), UINT8_C( 201), UINT8_C( 54), UINT8_C( 146), UINT8_C( 223), UINT8_C( 110), UINT8_C( 140), UINT8_C( 217), UINT8_C( 39), UINT8_C( 144), UINT8_C( 64), UINT8_C( 43)), simde_x_mm512_set_epu8(UINT8_C( 239), UINT8_C( 203), UINT8_C( 255), UINT8_C( 126), UINT8_C( 255), UINT8_C( 244), UINT8_C( 131), UINT8_C( 76), UINT8_C( 108), UINT8_C( 123), UINT8_C( 169), UINT8_C( 106), UINT8_C( 255), UINT8_C( 44), UINT8_C( 117), UINT8_C( 230), UINT8_C( 81), UINT8_C( 255), UINT8_C( 138), UINT8_C( 174), UINT8_C( 67), UINT8_C( 79), UINT8_C( 255), UINT8_C( 255), UINT8_C( 243), UINT8_C( 240), UINT8_C( 27), UINT8_C( 159), UINT8_C( 102), UINT8_C( 255), UINT8_C( 38), UINT8_C( 194), UINT8_C( 99), UINT8_C( 117), UINT8_C( 75), UINT8_C( 162), UINT8_C( 109), UINT8_C( 132), UINT8_C( 220), UINT8_C( 226), UINT8_C( 18), UINT8_C( 158), UINT8_C( 255), UINT8_C( 170), UINT8_C( 249), UINT8_C( 52), UINT8_C( 75), UINT8_C( 48), UINT8_C( 149), UINT8_C( 222), UINT8_C( 181), UINT8_C( 255), UINT8_C( 221), UINT8_C( 140), UINT8_C( 65), UINT8_C( 255), UINT8_C( 15), UINT8_C( 3), UINT8_C( 45), UINT8_C( 21), UINT8_C( 92), UINT8_C( 93), UINT8_C( 108), UINT8_C( 125)) }, { simde_x_mm512_set_epu8(UINT8_C( 153), UINT8_C( 132), UINT8_C( 13), UINT8_C( 176), UINT8_C( 224), UINT8_C( 123), UINT8_C( 115), UINT8_C( 207), UINT8_C( 113), UINT8_C( 205), UINT8_C( 119), UINT8_C( 189), UINT8_C( 235), UINT8_C( 19), UINT8_C( 244), UINT8_C( 29), UINT8_C( 91), UINT8_C( 192), UINT8_C( 88), UINT8_C( 151), UINT8_C( 24), UINT8_C( 31), UINT8_C( 77), UINT8_C( 92), UINT8_C( 171), UINT8_C( 48), UINT8_C( 143), UINT8_C( 142), UINT8_C( 16), UINT8_C( 18), UINT8_C( 212), UINT8_C( 190), UINT8_C( 25), UINT8_C( 151), UINT8_C( 156), UINT8_C( 165), UINT8_C( 26), UINT8_C( 133), UINT8_C( 223), UINT8_C( 219), UINT8_C( 63), UINT8_C( 13), UINT8_C( 132), UINT8_C( 41), UINT8_C( 152), UINT8_C( 197), UINT8_C( 254), UINT8_C( 202), UINT8_C( 73), UINT8_C( 3), UINT8_C( 238), UINT8_C( 10), UINT8_C( 19), UINT8_C( 58), UINT8_C( 244), UINT8_C( 181), UINT8_C( 168), UINT8_C( 59), UINT8_C( 104), UINT8_C( 216), UINT8_C( 174), UINT8_C( 42), UINT8_C( 183), UINT8_C( 162)), UINT64_C( 2633789449456316803), simde_x_mm512_set_epu8(UINT8_C( 118), UINT8_C( 44), UINT8_C( 242), UINT8_C( 84), UINT8_C( 187), UINT8_C( 208), UINT8_C( 40), UINT8_C( 86), UINT8_C( 255), UINT8_C( 121), UINT8_C( 216), UINT8_C( 44), UINT8_C( 255), UINT8_C( 38), UINT8_C( 212), UINT8_C( 38), UINT8_C( 93), UINT8_C( 149), UINT8_C( 252), UINT8_C( 195), UINT8_C( 204), UINT8_C( 22), UINT8_C( 202), UINT8_C( 131), UINT8_C( 227), UINT8_C( 25), UINT8_C( 179), UINT8_C( 188), UINT8_C( 9), UINT8_C( 221), UINT8_C( 245), UINT8_C( 53), UINT8_C( 28), UINT8_C( 61), UINT8_C( 221), UINT8_C( 150), UINT8_C( 210), UINT8_C( 121), UINT8_C( 154), UINT8_C( 121), UINT8_C( 202), UINT8_C( 196), UINT8_C( 7), UINT8_C( 2), UINT8_C( 137), UINT8_C( 111), UINT8_C( 236), UINT8_C( 198), UINT8_C( 158), UINT8_C( 83), UINT8_C( 32), UINT8_C( 207), UINT8_C( 245), UINT8_C( 48), UINT8_C( 92), UINT8_C( 47), UINT8_C( 218), UINT8_C( 11), UINT8_C( 26), UINT8_C( 90), UINT8_C( 251), UINT8_C( 73), UINT8_C( 45), UINT8_C( 254)), simde_x_mm512_set_epu8(UINT8_C( 225), UINT8_C( 126), UINT8_C( 138), UINT8_C( 199), UINT8_C( 130), UINT8_C( 29), UINT8_C( 101), UINT8_C( 241), UINT8_C( 195), UINT8_C( 118), UINT8_C( 102), UINT8_C( 12), UINT8_C( 197), UINT8_C( 215), UINT8_C( 196), UINT8_C( 46), UINT8_C( 63), UINT8_C( 78), UINT8_C( 87), UINT8_C( 18), UINT8_C( 18), UINT8_C( 32), UINT8_C( 230), UINT8_C( 250), UINT8_C( 236), UINT8_C( 133), UINT8_C( 99), UINT8_C( 65), UINT8_C( 13), UINT8_C( 25), UINT8_C( 108), UINT8_C( 135), UINT8_C( 42), UINT8_C( 0), UINT8_C( 104), UINT8_C( 239), UINT8_C( 111), UINT8_C( 146), UINT8_C( 217), UINT8_C( 125), UINT8_C( 206), UINT8_C( 209), UINT8_C( 30), UINT8_C( 92), UINT8_C( 19), UINT8_C( 248), UINT8_C( 136), UINT8_C( 127), UINT8_C( 207), UINT8_C( 23), UINT8_C( 16), UINT8_C( 192), UINT8_C( 177), UINT8_C( 116), UINT8_C( 251), UINT8_C( 206), UINT8_C( 32), UINT8_C( 22), UINT8_C( 214), UINT8_C( 253), UINT8_C( 30), UINT8_C( 64), UINT8_C( 96), UINT8_C( 190)), simde_x_mm512_set_epu8(UINT8_C( 153), UINT8_C( 132), UINT8_C( 255), UINT8_C( 176), UINT8_C( 224), UINT8_C( 237), UINT8_C( 115), UINT8_C( 207), UINT8_C( 255), UINT8_C( 205), UINT8_C( 119), UINT8_C( 189), UINT8_C( 255), UINT8_C( 253), UINT8_C( 244), UINT8_C( 84), UINT8_C( 91), UINT8_C( 192), UINT8_C( 88), UINT8_C( 213), UINT8_C( 222), UINT8_C( 31), UINT8_C( 77), UINT8_C( 255), UINT8_C( 255), UINT8_C( 48), UINT8_C( 143), UINT8_C( 142), UINT8_C( 22), UINT8_C( 246), UINT8_C( 212), UINT8_C( 190), UINT8_C( 70), UINT8_C( 61), UINT8_C( 156), UINT8_C( 165), UINT8_C( 26), UINT8_C( 255), UINT8_C( 223), UINT8_C( 219), UINT8_C( 63), UINT8_C( 13), UINT8_C( 132), UINT8_C( 41), UINT8_C( 152), UINT8_C( 197), UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), UINT8_C( 106), UINT8_C( 48), UINT8_C( 255), UINT8_C( 19), UINT8_C( 58), UINT8_C( 244), UINT8_C( 253), UINT8_C( 250), UINT8_C( 59), UINT8_C( 104), UINT8_C( 216), UINT8_C( 174), UINT8_C( 42), UINT8_C( 141), UINT8_C( 255)) }, { simde_x_mm512_set_epu8(UINT8_C( 117), UINT8_C( 69), UINT8_C( 121), UINT8_C( 211), UINT8_C( 30), UINT8_C( 183), UINT8_C( 2), UINT8_C( 216), UINT8_C( 95), UINT8_C( 253), UINT8_C( 16), UINT8_C( 178), UINT8_C( 128), UINT8_C( 215), UINT8_C( 190), UINT8_C( 66), UINT8_C( 93), UINT8_C( 224), UINT8_C( 206), UINT8_C( 254), UINT8_C( 195), UINT8_C( 29), UINT8_C( 168), UINT8_C( 138), UINT8_C( 229), UINT8_C( 42), UINT8_C( 78), UINT8_C( 210), UINT8_C( 177), UINT8_C( 38), UINT8_C( 181), UINT8_C( 14), UINT8_C( 138), UINT8_C( 142), UINT8_C( 136), UINT8_C( 42), UINT8_C( 4), UINT8_C( 79), UINT8_C( 172), UINT8_C( 146), UINT8_C( 0), UINT8_C( 85), UINT8_C( 236), UINT8_C( 61), UINT8_C( 40), UINT8_C( 181), UINT8_C( 69), UINT8_C( 157), UINT8_C( 73), UINT8_C( 222), UINT8_C( 242), UINT8_C( 72), UINT8_C( 101), UINT8_C( 96), UINT8_C( 142), UINT8_C( 132), UINT8_C( 169), UINT8_C( 43), UINT8_C( 138), UINT8_C( 114), UINT8_C( 168), UINT8_C( 8), UINT8_C( 204), UINT8_C( 75)), UINT64_C(14890918166471265655), simde_x_mm512_set_epu8(UINT8_C( 200), UINT8_C( 186), UINT8_C( 66), UINT8_C( 145), UINT8_C( 187), UINT8_C( 15), UINT8_C( 174), UINT8_C( 244), UINT8_C( 167), UINT8_C( 37), UINT8_C( 80), UINT8_C( 120), UINT8_C( 173), UINT8_C( 136), UINT8_C( 95), UINT8_C( 21), UINT8_C( 91), UINT8_C( 159), UINT8_C( 184), UINT8_C( 42), UINT8_C( 22), UINT8_C( 186), UINT8_C( 71), UINT8_C( 178), UINT8_C( 251), UINT8_C( 52), UINT8_C( 234), UINT8_C( 222), UINT8_C( 16), UINT8_C( 92), UINT8_C( 91), UINT8_C( 184), UINT8_C( 3), UINT8_C( 225), UINT8_C( 161), UINT8_C( 200), UINT8_C( 206), UINT8_C( 68), UINT8_C( 232), UINT8_C( 206), UINT8_C( 94), UINT8_C( 67), UINT8_C( 108), UINT8_C( 138), UINT8_C( 191), UINT8_C( 31), UINT8_C( 70), UINT8_C( 108), UINT8_C( 190), UINT8_C( 107), UINT8_C( 171), UINT8_C( 38), UINT8_C( 2), UINT8_C( 32), UINT8_C( 56), UINT8_C( 66), UINT8_C( 254), UINT8_C( 182), UINT8_C( 112), UINT8_C( 10), UINT8_C( 64), UINT8_C( 100), UINT8_C( 201), UINT8_C( 83)), simde_x_mm512_set_epu8(UINT8_C( 187), UINT8_C( 211), UINT8_C( 23), UINT8_C( 70), UINT8_C( 22), UINT8_C( 113), UINT8_C( 75), UINT8_C( 139), UINT8_C( 69), UINT8_C( 182), UINT8_C( 40), UINT8_C( 217), UINT8_C( 205), UINT8_C( 165), UINT8_C( 25), UINT8_C( 252), UINT8_C( 244), UINT8_C( 243), UINT8_C( 179), UINT8_C( 75), UINT8_C( 140), UINT8_C( 220), UINT8_C( 38), UINT8_C( 204), UINT8_C( 13), UINT8_C( 204), UINT8_C( 156), UINT8_C( 210), UINT8_C( 13), UINT8_C( 83), UINT8_C( 162), UINT8_C( 102), UINT8_C( 116), UINT8_C( 196), UINT8_C( 44), UINT8_C( 249), UINT8_C( 104), UINT8_C( 206), UINT8_C( 226), UINT8_C( 17), UINT8_C( 13), UINT8_C( 149), UINT8_C( 111), UINT8_C( 243), UINT8_C( 17), UINT8_C( 211), UINT8_C( 225), UINT8_C( 76), UINT8_C( 154), UINT8_C( 185), UINT8_C( 116), UINT8_C( 72), UINT8_C( 245), UINT8_C( 159), UINT8_C( 93), UINT8_C( 48), UINT8_C( 181), UINT8_C( 166), UINT8_C( 246), UINT8_C( 178), UINT8_C( 248), UINT8_C( 246), UINT8_C( 48), UINT8_C( 245)), simde_x_mm512_set_epu8(UINT8_C( 255), UINT8_C( 255), UINT8_C( 121), UINT8_C( 211), UINT8_C( 209), UINT8_C( 128), UINT8_C( 249), UINT8_C( 216), UINT8_C( 236), UINT8_C( 253), UINT8_C( 120), UINT8_C( 178), UINT8_C( 128), UINT8_C( 255), UINT8_C( 120), UINT8_C( 255), UINT8_C( 93), UINT8_C( 224), UINT8_C( 255), UINT8_C( 254), UINT8_C( 162), UINT8_C( 29), UINT8_C( 109), UINT8_C( 255), UINT8_C( 229), UINT8_C( 42), UINT8_C( 255), UINT8_C( 210), UINT8_C( 29), UINT8_C( 175), UINT8_C( 181), UINT8_C( 255), UINT8_C( 138), UINT8_C( 255), UINT8_C( 136), UINT8_C( 42), UINT8_C( 255), UINT8_C( 79), UINT8_C( 255), UINT8_C( 223), UINT8_C( 107), UINT8_C( 216), UINT8_C( 219), UINT8_C( 61), UINT8_C( 208), UINT8_C( 242), UINT8_C( 255), UINT8_C( 157), UINT8_C( 73), UINT8_C( 255), UINT8_C( 242), UINT8_C( 72), UINT8_C( 247), UINT8_C( 191), UINT8_C( 142), UINT8_C( 114), UINT8_C( 169), UINT8_C( 255), UINT8_C( 255), UINT8_C( 188), UINT8_C( 168), UINT8_C( 255), UINT8_C( 249), UINT8_C( 255)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_adds_epu8(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_u8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_adds_epu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask64 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { UINT64_C(17286015531074160252), simde_x_mm512_set_epu8(UINT8_C( 141), UINT8_C( 229), UINT8_C( 62), UINT8_C( 171), UINT8_C( 49), UINT8_C( 141), UINT8_C( 38), UINT8_C( 4), UINT8_C( 92), UINT8_C( 116), UINT8_C( 178), UINT8_C( 237), UINT8_C( 183), UINT8_C( 22), UINT8_C( 190), UINT8_C( 227), UINT8_C( 55), UINT8_C( 78), UINT8_C( 211), UINT8_C( 137), UINT8_C( 30), UINT8_C( 227), UINT8_C( 233), UINT8_C( 89), UINT8_C( 15), UINT8_C( 119), UINT8_C( 168), UINT8_C( 109), UINT8_C( 240), UINT8_C( 246), UINT8_C( 0), UINT8_C( 238), UINT8_C( 1), UINT8_C( 116), UINT8_C( 62), UINT8_C( 183), UINT8_C( 134), UINT8_C( 0), UINT8_C( 111), UINT8_C( 95), UINT8_C( 236), UINT8_C( 3), UINT8_C( 122), UINT8_C( 135), UINT8_C( 13), UINT8_C( 25), UINT8_C( 221), UINT8_C( 129), UINT8_C( 82), UINT8_C( 90), UINT8_C( 53), UINT8_C( 123), UINT8_C( 73), UINT8_C( 108), UINT8_C( 238), UINT8_C( 15), UINT8_C( 81), UINT8_C( 229), UINT8_C( 102), UINT8_C( 118), UINT8_C( 39), UINT8_C( 179), UINT8_C( 45), UINT8_C( 81)), simde_x_mm512_set_epu8(UINT8_C( 68), UINT8_C( 148), UINT8_C( 135), UINT8_C( 141), UINT8_C( 197), UINT8_C( 178), UINT8_C( 145), UINT8_C( 56), UINT8_C( 9), UINT8_C( 135), UINT8_C( 120), UINT8_C( 220), UINT8_C( 36), UINT8_C( 153), UINT8_C( 152), UINT8_C( 25), UINT8_C( 145), UINT8_C( 193), UINT8_C( 221), UINT8_C( 136), UINT8_C( 28), UINT8_C( 212), UINT8_C( 230), UINT8_C( 170), UINT8_C( 243), UINT8_C( 53), UINT8_C( 168), UINT8_C( 149), UINT8_C( 68), UINT8_C( 42), UINT8_C( 138), UINT8_C( 111), UINT8_C( 54), UINT8_C( 198), UINT8_C( 243), UINT8_C( 27), UINT8_C( 23), UINT8_C( 41), UINT8_C( 137), UINT8_C( 44), UINT8_C( 7), UINT8_C( 136), UINT8_C( 32), UINT8_C( 213), UINT8_C( 114), UINT8_C( 184), UINT8_C( 73), UINT8_C( 160), UINT8_C( 96), UINT8_C( 110), UINT8_C( 175), UINT8_C( 180), UINT8_C( 103), UINT8_C( 156), UINT8_C( 234), UINT8_C( 18), UINT8_C( 115), UINT8_C( 54), UINT8_C( 216), UINT8_C( 125), UINT8_C( 110), UINT8_C( 31), UINT8_C( 51), UINT8_C( 152)), simde_x_mm512_set_epu8(UINT8_C( 209), UINT8_C( 255), UINT8_C( 197), UINT8_C( 0), UINT8_C( 246), UINT8_C( 255), UINT8_C( 183), UINT8_C( 60), UINT8_C( 101), UINT8_C( 251), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 175), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 0), UINT8_C( 138), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 157), UINT8_C( 41), UINT8_C( 248), UINT8_C( 139), UINT8_C( 243), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 127), UINT8_C( 209), UINT8_C( 255), UINT8_C( 255), UINT8_C( 178), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 255), UINT8_C( 243), UINT8_C( 149), UINT8_C( 210), UINT8_C( 0), UINT8_C( 0)) }, { UINT64_C( 9031211210081693899), simde_x_mm512_set_epu8(UINT8_C( 150), UINT8_C( 238), UINT8_C( 65), UINT8_C( 152), UINT8_C( 81), UINT8_C( 38), UINT8_C( 172), UINT8_C( 254), UINT8_C( 242), UINT8_C( 85), UINT8_C( 176), UINT8_C( 80), UINT8_C( 48), UINT8_C( 93), UINT8_C( 79), UINT8_C( 127), UINT8_C( 16), UINT8_C( 41), UINT8_C( 54), UINT8_C( 140), UINT8_C( 17), UINT8_C( 42), UINT8_C( 170), UINT8_C( 38), UINT8_C( 138), UINT8_C( 200), UINT8_C( 60), UINT8_C( 19), UINT8_C( 156), UINT8_C( 149), UINT8_C( 105), UINT8_C( 180), UINT8_C( 10), UINT8_C( 247), UINT8_C( 244), UINT8_C( 200), UINT8_C( 185), UINT8_C( 96), UINT8_C( 6), UINT8_C( 24), UINT8_C( 69), UINT8_C( 73), UINT8_C( 229), UINT8_C( 3), UINT8_C( 161), UINT8_C( 50), UINT8_C( 189), UINT8_C( 217), UINT8_C( 97), UINT8_C( 62), UINT8_C( 101), UINT8_C( 56), UINT8_C( 177), UINT8_C( 83), UINT8_C( 157), UINT8_C( 200), UINT8_C( 239), UINT8_C( 200), UINT8_C( 248), UINT8_C( 240), UINT8_C( 84), UINT8_C( 161), UINT8_C( 183), UINT8_C( 54)), simde_x_mm512_set_epu8(UINT8_C( 63), UINT8_C( 205), UINT8_C( 87), UINT8_C( 32), UINT8_C( 191), UINT8_C( 55), UINT8_C( 14), UINT8_C( 81), UINT8_C( 133), UINT8_C( 156), UINT8_C( 217), UINT8_C( 212), UINT8_C( 22), UINT8_C( 112), UINT8_C( 16), UINT8_C( 15), UINT8_C( 152), UINT8_C( 154), UINT8_C( 102), UINT8_C( 161), UINT8_C( 69), UINT8_C( 13), UINT8_C( 235), UINT8_C( 45), UINT8_C( 62), UINT8_C( 136), UINT8_C( 145), UINT8_C( 32), UINT8_C( 149), UINT8_C( 226), UINT8_C( 99), UINT8_C( 192), UINT8_C( 8), UINT8_C( 214), UINT8_C( 81), UINT8_C( 222), UINT8_C( 210), UINT8_C( 26), UINT8_C( 31), UINT8_C( 254), UINT8_C( 68), UINT8_C( 249), UINT8_C( 185), UINT8_C( 46), UINT8_C( 235), UINT8_C( 183), UINT8_C( 21), UINT8_C( 83), UINT8_C( 148), UINT8_C( 159), UINT8_C( 187), UINT8_C( 73), UINT8_C( 57), UINT8_C( 219), UINT8_C( 21), UINT8_C( 82), UINT8_C( 137), UINT8_C( 130), UINT8_C( 126), UINT8_C( 91), UINT8_C( 115), UINT8_C( 31), UINT8_C( 177), UINT8_C( 28)), simde_x_mm512_set_epu8(UINT8_C( 0), UINT8_C( 255), UINT8_C( 152), UINT8_C( 184), UINT8_C( 255), UINT8_C( 93), UINT8_C( 0), UINT8_C( 255), UINT8_C( 0), UINT8_C( 241), UINT8_C( 0), UINT8_C( 255), UINT8_C( 0), UINT8_C( 205), UINT8_C( 0), UINT8_C( 142), UINT8_C( 0), UINT8_C( 195), UINT8_C( 0), UINT8_C( 0), UINT8_C( 86), UINT8_C( 55), UINT8_C( 255), UINT8_C( 0), UINT8_C( 200), UINT8_C( 0), UINT8_C( 205), UINT8_C( 51), UINT8_C( 255), UINT8_C( 255), UINT8_C( 204), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 122), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 255), UINT8_C( 49), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 245), UINT8_C( 221), UINT8_C( 255), UINT8_C( 129), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 199), UINT8_C( 0), UINT8_C( 255), UINT8_C( 82)) }, { UINT64_C( 6828010367541473016), simde_x_mm512_set_epu8(UINT8_C( 84), UINT8_C( 160), UINT8_C( 87), UINT8_C( 234), UINT8_C( 251), UINT8_C( 253), UINT8_C( 129), UINT8_C( 41), UINT8_C( 74), UINT8_C( 72), UINT8_C( 252), UINT8_C( 28), UINT8_C( 141), UINT8_C( 93), UINT8_C( 102), UINT8_C( 44), UINT8_C( 153), UINT8_C( 227), UINT8_C( 206), UINT8_C( 48), UINT8_C( 160), UINT8_C( 206), UINT8_C( 46), UINT8_C( 191), UINT8_C( 4), UINT8_C( 43), UINT8_C( 181), UINT8_C( 97), UINT8_C( 230), UINT8_C( 153), UINT8_C( 71), UINT8_C( 149), UINT8_C( 91), UINT8_C( 45), UINT8_C( 245), UINT8_C( 47), UINT8_C( 29), UINT8_C( 25), UINT8_C( 26), UINT8_C( 247), UINT8_C( 10), UINT8_C( 36), UINT8_C( 177), UINT8_C( 203), UINT8_C( 41), UINT8_C( 1), UINT8_C( 233), UINT8_C( 193), UINT8_C( 129), UINT8_C( 68), UINT8_C( 48), UINT8_C( 36), UINT8_C( 89), UINT8_C( 144), UINT8_C( 225), UINT8_C( 120), UINT8_C( 35), UINT8_C( 62), UINT8_C( 235), UINT8_C( 142), UINT8_C( 152), UINT8_C( 57), UINT8_C( 42), UINT8_C( 145)), simde_x_mm512_set_epu8(UINT8_C( 96), UINT8_C( 192), UINT8_C( 233), UINT8_C( 210), UINT8_C( 227), UINT8_C( 185), UINT8_C( 71), UINT8_C( 90), UINT8_C( 44), UINT8_C( 164), UINT8_C( 225), UINT8_C( 26), UINT8_C( 157), UINT8_C( 203), UINT8_C( 117), UINT8_C( 18), UINT8_C( 193), UINT8_C( 45), UINT8_C( 12), UINT8_C( 24), UINT8_C( 148), UINT8_C( 18), UINT8_C( 196), UINT8_C( 28), UINT8_C( 50), UINT8_C( 245), UINT8_C( 188), UINT8_C( 225), UINT8_C( 105), UINT8_C( 150), UINT8_C( 98), UINT8_C( 51), UINT8_C( 58), UINT8_C( 103), UINT8_C( 111), UINT8_C( 129), UINT8_C( 68), UINT8_C( 200), UINT8_C( 124), UINT8_C( 137), UINT8_C( 74), UINT8_C( 194), UINT8_C( 140), UINT8_C( 37), UINT8_C( 244), UINT8_C( 114), UINT8_C( 0), UINT8_C( 61), UINT8_C( 103), UINT8_C( 252), UINT8_C( 151), UINT8_C( 188), UINT8_C( 39), UINT8_C( 156), UINT8_C( 163), UINT8_C( 11), UINT8_C( 176), UINT8_C( 237), UINT8_C( 234), UINT8_C( 217), UINT8_C( 127), UINT8_C( 218), UINT8_C( 131), UINT8_C( 145)), simde_x_mm512_set_epu8(UINT8_C( 0), UINT8_C( 255), UINT8_C( 0), UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), UINT8_C( 200), UINT8_C( 0), UINT8_C( 118), UINT8_C( 236), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 62), UINT8_C( 255), UINT8_C( 255), UINT8_C( 218), UINT8_C( 72), UINT8_C( 0), UINT8_C( 224), UINT8_C( 242), UINT8_C( 219), UINT8_C( 0), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 149), UINT8_C( 0), UINT8_C( 255), UINT8_C( 176), UINT8_C( 97), UINT8_C( 225), UINT8_C( 150), UINT8_C( 255), UINT8_C( 84), UINT8_C( 230), UINT8_C( 255), UINT8_C( 240), UINT8_C( 255), UINT8_C( 115), UINT8_C( 233), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 199), UINT8_C( 0), UINT8_C( 128), UINT8_C( 255), UINT8_C( 255), UINT8_C( 0), UINT8_C( 211), UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0)) }, { UINT64_C(18325019528117086600), simde_x_mm512_set_epu8(UINT8_C( 122), UINT8_C( 202), UINT8_C( 50), UINT8_C( 123), UINT8_C( 225), UINT8_C( 182), UINT8_C( 192), UINT8_C( 54), UINT8_C( 175), UINT8_C( 60), UINT8_C( 31), UINT8_C( 233), UINT8_C( 108), UINT8_C( 137), UINT8_C( 164), UINT8_C( 176), UINT8_C( 226), UINT8_C( 219), UINT8_C( 51), UINT8_C( 220), UINT8_C( 8), UINT8_C( 52), UINT8_C( 97), UINT8_C( 123), UINT8_C( 207), UINT8_C( 132), UINT8_C( 95), UINT8_C( 173), UINT8_C( 70), UINT8_C( 206), UINT8_C( 195), UINT8_C( 25), UINT8_C( 159), UINT8_C( 28), UINT8_C( 198), UINT8_C( 11), UINT8_C( 242), UINT8_C( 126), UINT8_C( 81), UINT8_C( 45), UINT8_C( 233), UINT8_C( 120), UINT8_C( 173), UINT8_C( 240), UINT8_C( 7), UINT8_C( 51), UINT8_C( 199), UINT8_C( 206), UINT8_C( 235), UINT8_C( 98), UINT8_C( 88), UINT8_C( 0), UINT8_C( 190), UINT8_C( 3), UINT8_C( 124), UINT8_C( 143), UINT8_C( 50), UINT8_C( 88), UINT8_C( 171), UINT8_C( 163), UINT8_C( 212), UINT8_C( 243), UINT8_C( 162), UINT8_C( 17)), simde_x_mm512_set_epu8(UINT8_C( 243), UINT8_C( 244), UINT8_C( 132), UINT8_C( 96), UINT8_C( 243), UINT8_C( 158), UINT8_C( 99), UINT8_C( 243), UINT8_C( 56), UINT8_C( 211), UINT8_C( 148), UINT8_C( 237), UINT8_C( 132), UINT8_C( 229), UINT8_C( 22), UINT8_C( 126), UINT8_C( 150), UINT8_C( 188), UINT8_C( 196), UINT8_C( 8), UINT8_C( 60), UINT8_C( 93), UINT8_C( 223), UINT8_C( 229), UINT8_C( 249), UINT8_C( 27), UINT8_C( 134), UINT8_C( 218), UINT8_C( 23), UINT8_C( 6), UINT8_C( 45), UINT8_C( 235), UINT8_C( 233), UINT8_C( 155), UINT8_C( 116), UINT8_C( 127), UINT8_C( 96), UINT8_C( 40), UINT8_C( 159), UINT8_C( 40), UINT8_C( 86), UINT8_C( 212), UINT8_C( 70), UINT8_C( 185), UINT8_C( 62), UINT8_C( 235), UINT8_C( 66), UINT8_C( 68), UINT8_C( 169), UINT8_C( 195), UINT8_C( 48), UINT8_C( 186), UINT8_C( 18), UINT8_C( 178), UINT8_C( 158), UINT8_C( 117), UINT8_C( 74), UINT8_C( 32), UINT8_C( 93), UINT8_C( 125), UINT8_C( 209), UINT8_C( 196), UINT8_C( 170), UINT8_C( 117)), simde_x_mm512_set_epu8(UINT8_C( 255), UINT8_C( 255), UINT8_C( 182), UINT8_C( 219), UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 240), UINT8_C( 255), UINT8_C( 186), UINT8_C( 255), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 68), UINT8_C( 145), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 229), UINT8_C( 0), UINT8_C( 93), UINT8_C( 0), UINT8_C( 240), UINT8_C( 255), UINT8_C( 255), UINT8_C( 0), UINT8_C( 255), UINT8_C( 138), UINT8_C( 0), UINT8_C( 0), UINT8_C( 240), UINT8_C( 85), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), UINT8_C( 136), UINT8_C( 0), UINT8_C( 208), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 124), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0)) }, { UINT64_C( 7677112093376593822), simde_x_mm512_set_epu8(UINT8_C( 203), UINT8_C( 34), UINT8_C( 9), UINT8_C( 64), UINT8_C( 195), UINT8_C( 231), UINT8_C( 141), UINT8_C( 100), UINT8_C( 65), UINT8_C( 8), UINT8_C( 69), UINT8_C( 248), UINT8_C( 241), UINT8_C( 205), UINT8_C( 1), UINT8_C( 90), UINT8_C( 115), UINT8_C( 51), UINT8_C( 165), UINT8_C( 56), UINT8_C( 64), UINT8_C( 217), UINT8_C( 137), UINT8_C( 228), UINT8_C( 202), UINT8_C( 28), UINT8_C( 54), UINT8_C( 248), UINT8_C( 202), UINT8_C( 128), UINT8_C( 228), UINT8_C( 185), UINT8_C( 107), UINT8_C( 190), UINT8_C( 142), UINT8_C( 168), UINT8_C( 34), UINT8_C( 173), UINT8_C( 235), UINT8_C( 192), UINT8_C( 121), UINT8_C( 236), UINT8_C( 167), UINT8_C( 162), UINT8_C( 112), UINT8_C( 229), UINT8_C( 81), UINT8_C( 202), UINT8_C( 192), UINT8_C( 142), UINT8_C( 48), UINT8_C( 167), UINT8_C( 195), UINT8_C( 26), UINT8_C( 43), UINT8_C( 29), UINT8_C( 0), UINT8_C( 64), UINT8_C( 123), UINT8_C( 189), UINT8_C( 15), UINT8_C( 120), UINT8_C( 36), UINT8_C( 40)), simde_x_mm512_set_epu8(UINT8_C( 113), UINT8_C( 32), UINT8_C( 71), UINT8_C( 55), UINT8_C( 230), UINT8_C( 82), UINT8_C( 175), UINT8_C( 236), UINT8_C( 201), UINT8_C( 112), UINT8_C( 45), UINT8_C( 37), UINT8_C( 67), UINT8_C( 187), UINT8_C( 64), UINT8_C( 39), UINT8_C( 72), UINT8_C( 45), UINT8_C( 120), UINT8_C( 251), UINT8_C( 147), UINT8_C( 62), UINT8_C( 17), UINT8_C( 31), UINT8_C( 226), UINT8_C( 198), UINT8_C( 56), UINT8_C( 21), UINT8_C( 72), UINT8_C( 182), UINT8_C( 216), UINT8_C( 120), UINT8_C( 95), UINT8_C( 108), UINT8_C( 32), UINT8_C( 64), UINT8_C( 128), UINT8_C( 102), UINT8_C( 235), UINT8_C( 28), UINT8_C( 105), UINT8_C( 52), UINT8_C( 85), UINT8_C( 152), UINT8_C( 57), UINT8_C( 225), UINT8_C( 218), UINT8_C( 132), UINT8_C( 149), UINT8_C( 254), UINT8_C( 55), UINT8_C( 46), UINT8_C( 185), UINT8_C( 77), UINT8_C( 18), UINT8_C( 70), UINT8_C( 89), UINT8_C( 125), UINT8_C( 214), UINT8_C( 131), UINT8_C( 121), UINT8_C( 245), UINT8_C( 187), UINT8_C( 197)), simde_x_mm512_set_epu8(UINT8_C( 0), UINT8_C( 66), UINT8_C( 80), UINT8_C( 0), UINT8_C( 255), UINT8_C( 0), UINT8_C( 255), UINT8_C( 0), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 0), UINT8_C( 65), UINT8_C( 0), UINT8_C( 187), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 0), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 226), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 232), UINT8_C( 162), UINT8_C( 0), UINT8_C( 255), UINT8_C( 0), UINT8_C( 226), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 103), UINT8_C( 213), UINT8_C( 255), UINT8_C( 103), UINT8_C( 61), UINT8_C( 99), UINT8_C( 89), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 136), UINT8_C( 255), UINT8_C( 223), UINT8_C( 0)) }, { UINT64_C( 4840581956081845689), simde_x_mm512_set_epu8(UINT8_C( 122), UINT8_C( 148), UINT8_C( 186), UINT8_C( 193), UINT8_C( 56), UINT8_C( 71), UINT8_C( 242), UINT8_C( 49), UINT8_C( 183), UINT8_C( 53), UINT8_C( 227), UINT8_C( 3), UINT8_C( 183), UINT8_C( 43), UINT8_C( 234), UINT8_C( 85), UINT8_C( 230), UINT8_C( 247), UINT8_C( 66), UINT8_C( 1), UINT8_C( 243), UINT8_C( 60), UINT8_C( 137), UINT8_C( 173), UINT8_C( 134), UINT8_C( 192), UINT8_C( 173), UINT8_C( 182), UINT8_C( 119), UINT8_C( 248), UINT8_C( 12), UINT8_C( 113), UINT8_C( 244), UINT8_C( 172), UINT8_C( 6), UINT8_C( 69), UINT8_C( 2), UINT8_C( 181), UINT8_C( 222), UINT8_C( 130), UINT8_C( 3), UINT8_C( 128), UINT8_C( 247), UINT8_C( 24), UINT8_C( 11), UINT8_C( 162), UINT8_C( 224), UINT8_C( 110), UINT8_C( 33), UINT8_C( 232), UINT8_C( 125), UINT8_C( 35), UINT8_C( 153), UINT8_C( 208), UINT8_C( 234), UINT8_C( 38), UINT8_C( 175), UINT8_C( 9), UINT8_C( 245), UINT8_C( 132), UINT8_C( 71), UINT8_C( 31), UINT8_C( 214), UINT8_C( 93)), simde_x_mm512_set_epu8(UINT8_C( 53), UINT8_C( 77), UINT8_C( 63), UINT8_C( 172), UINT8_C( 27), UINT8_C( 22), UINT8_C( 3), UINT8_C( 219), UINT8_C( 65), UINT8_C( 118), UINT8_C( 130), UINT8_C( 97), UINT8_C( 109), UINT8_C( 7), UINT8_C( 142), UINT8_C( 181), UINT8_C( 131), UINT8_C( 121), UINT8_C( 128), UINT8_C( 103), UINT8_C( 0), UINT8_C( 101), UINT8_C( 215), UINT8_C( 89), UINT8_C( 173), UINT8_C( 191), UINT8_C( 9), UINT8_C( 249), UINT8_C( 193), UINT8_C( 13), UINT8_C( 105), UINT8_C( 92), UINT8_C( 238), UINT8_C( 235), UINT8_C( 154), UINT8_C( 142), UINT8_C( 74), UINT8_C( 121), UINT8_C( 211), UINT8_C( 52), UINT8_C( 193), UINT8_C( 163), UINT8_C( 98), UINT8_C( 106), UINT8_C( 147), UINT8_C( 209), UINT8_C( 37), UINT8_C( 70), UINT8_C( 100), UINT8_C( 121), UINT8_C( 18), UINT8_C( 28), UINT8_C( 139), UINT8_C( 107), UINT8_C( 3), UINT8_C( 194), UINT8_C( 42), UINT8_C( 72), UINT8_C( 91), UINT8_C( 86), UINT8_C( 184), UINT8_C( 9), UINT8_C( 176), UINT8_C( 118)), simde_x_mm512_set_epu8(UINT8_C( 0), UINT8_C( 225), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 245), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 0), UINT8_C( 255), UINT8_C( 50), UINT8_C( 0), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 194), UINT8_C( 104), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), UINT8_C( 0), UINT8_C( 182), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 160), UINT8_C( 211), UINT8_C( 76), UINT8_C( 255), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 255), UINT8_C( 0), UINT8_C( 158), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 133), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 0), UINT8_C( 232), UINT8_C( 217), UINT8_C( 0), UINT8_C( 255), UINT8_C( 218), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 211)) }, { UINT64_C(17498311407133456191), simde_x_mm512_set_epu8(UINT8_C( 61), UINT8_C( 167), UINT8_C( 84), UINT8_C( 162), UINT8_C( 31), UINT8_C( 140), UINT8_C( 189), UINT8_C( 154), UINT8_C( 184), UINT8_C( 165), UINT8_C( 151), UINT8_C( 148), UINT8_C( 212), UINT8_C( 74), UINT8_C( 228), UINT8_C( 124), UINT8_C( 120), UINT8_C( 215), UINT8_C( 177), UINT8_C( 122), UINT8_C( 87), UINT8_C( 137), UINT8_C( 54), UINT8_C( 254), UINT8_C( 209), UINT8_C( 84), UINT8_C( 130), UINT8_C( 192), UINT8_C( 14), UINT8_C( 11), UINT8_C( 37), UINT8_C( 233), UINT8_C( 67), UINT8_C( 124), UINT8_C( 58), UINT8_C( 162), UINT8_C( 30), UINT8_C( 223), UINT8_C( 70), UINT8_C( 232), UINT8_C( 38), UINT8_C( 159), UINT8_C( 200), UINT8_C( 196), UINT8_C( 197), UINT8_C( 65), UINT8_C( 182), UINT8_C( 45), UINT8_C( 245), UINT8_C( 55), UINT8_C( 174), UINT8_C( 12), UINT8_C( 106), UINT8_C( 22), UINT8_C( 132), UINT8_C( 252), UINT8_C( 2), UINT8_C( 175), UINT8_C( 14), UINT8_C( 90), UINT8_C( 156), UINT8_C( 134), UINT8_C( 221), UINT8_C( 81)), simde_x_mm512_set_epu8(UINT8_C( 21), UINT8_C( 108), UINT8_C( 125), UINT8_C( 196), UINT8_C( 184), UINT8_C( 74), UINT8_C( 251), UINT8_C( 198), UINT8_C( 215), UINT8_C( 22), UINT8_C( 141), UINT8_C( 102), UINT8_C( 59), UINT8_C( 176), UINT8_C( 241), UINT8_C( 193), UINT8_C( 84), UINT8_C( 185), UINT8_C( 8), UINT8_C( 12), UINT8_C( 245), UINT8_C( 180), UINT8_C( 62), UINT8_C( 93), UINT8_C( 181), UINT8_C( 179), UINT8_C( 172), UINT8_C( 148), UINT8_C( 221), UINT8_C( 14), UINT8_C( 196), UINT8_C( 18), UINT8_C( 23), UINT8_C( 196), UINT8_C( 193), UINT8_C( 142), UINT8_C( 201), UINT8_C( 75), UINT8_C( 157), UINT8_C( 201), UINT8_C( 58), UINT8_C( 76), UINT8_C( 154), UINT8_C( 138), UINT8_C( 226), UINT8_C( 39), UINT8_C( 119), UINT8_C( 85), UINT8_C( 248), UINT8_C( 184), UINT8_C( 196), UINT8_C( 162), UINT8_C( 144), UINT8_C( 119), UINT8_C( 124), UINT8_C( 76), UINT8_C( 214), UINT8_C( 132), UINT8_C( 54), UINT8_C( 74), UINT8_C( 164), UINT8_C( 99), UINT8_C( 79), UINT8_C( 253)), simde_x_mm512_set_epu8(UINT8_C( 82), UINT8_C( 255), UINT8_C( 209), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 0), UINT8_C( 255), UINT8_C( 187), UINT8_C( 0), UINT8_C( 250), UINT8_C( 0), UINT8_C( 250), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 185), UINT8_C( 134), UINT8_C( 255), UINT8_C( 255), UINT8_C( 0), UINT8_C( 255), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 233), UINT8_C( 251), UINT8_C( 90), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 227), UINT8_C( 255), UINT8_C( 0), UINT8_C( 235), UINT8_C( 0), UINT8_C( 255), UINT8_C( 255), UINT8_C( 0), UINT8_C( 255), UINT8_C( 0), UINT8_C( 255), UINT8_C( 239), UINT8_C( 255), UINT8_C( 174), UINT8_C( 0), UINT8_C( 141), UINT8_C( 255), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 68), UINT8_C( 164), UINT8_C( 255), UINT8_C( 233), UINT8_C( 255), UINT8_C( 255)) }, { UINT64_C( 3462613925466476941), simde_x_mm512_set_epu8(UINT8_C( 18), UINT8_C( 182), UINT8_C( 14), UINT8_C( 36), UINT8_C( 249), UINT8_C( 113), UINT8_C( 40), UINT8_C( 48), UINT8_C( 149), UINT8_C( 222), UINT8_C( 181), UINT8_C( 85), UINT8_C( 221), UINT8_C( 140), UINT8_C( 65), UINT8_C( 235), UINT8_C( 15), UINT8_C( 3), UINT8_C( 45), UINT8_C( 21), UINT8_C( 72), UINT8_C( 93), UINT8_C( 108), UINT8_C( 125), UINT8_C( 1), UINT8_C( 75), UINT8_C( 21), UINT8_C( 220), UINT8_C( 130), UINT8_C( 122), UINT8_C( 71), UINT8_C( 76), UINT8_C( 28), UINT8_C( 200), UINT8_C( 32), UINT8_C( 101), UINT8_C( 149), UINT8_C( 145), UINT8_C( 168), UINT8_C( 237), UINT8_C( 179), UINT8_C( 19), UINT8_C( 235), UINT8_C( 145), UINT8_C( 188), UINT8_C( 82), UINT8_C( 138), UINT8_C( 180), UINT8_C( 47), UINT8_C( 127), UINT8_C( 62), UINT8_C( 240), UINT8_C( 10), UINT8_C( 242), UINT8_C( 156), UINT8_C( 86), UINT8_C( 29), UINT8_C( 107), UINT8_C( 56), UINT8_C( 21), UINT8_C( 24), UINT8_C( 68), UINT8_C( 160), UINT8_C( 64)), simde_x_mm512_set_epu8(UINT8_C( 199), UINT8_C( 94), UINT8_C( 131), UINT8_C( 132), UINT8_C( 36), UINT8_C( 57), UINT8_C( 68), UINT8_C( 204), UINT8_C( 39), UINT8_C( 50), UINT8_C( 208), UINT8_C( 94), UINT8_C( 53), UINT8_C( 11), UINT8_C( 29), UINT8_C( 65), UINT8_C( 174), UINT8_C( 137), UINT8_C( 115), UINT8_C( 68), UINT8_C( 187), UINT8_C( 118), UINT8_C( 17), UINT8_C( 8), UINT8_C( 192), UINT8_C( 203), UINT8_C( 214), UINT8_C( 126), UINT8_C( 67), UINT8_C( 50), UINT8_C( 238), UINT8_C( 76), UINT8_C( 237), UINT8_C( 123), UINT8_C( 169), UINT8_C( 106), UINT8_C( 182), UINT8_C( 44), UINT8_C( 117), UINT8_C( 103), UINT8_C( 81), UINT8_C( 122), UINT8_C( 56), UINT8_C( 246), UINT8_C( 67), UINT8_C( 79), UINT8_C( 83), UINT8_C( 218), UINT8_C( 243), UINT8_C( 43), UINT8_C( 27), UINT8_C( 159), UINT8_C( 102), UINT8_C( 126), UINT8_C( 38), UINT8_C( 194), UINT8_C( 232), UINT8_C( 117), UINT8_C( 218), UINT8_C( 163), UINT8_C( 198), UINT8_C( 132), UINT8_C( 181), UINT8_C( 10)), simde_x_mm512_set_epu8(UINT8_C( 0), UINT8_C( 0), UINT8_C( 145), UINT8_C( 168), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 151), UINT8_C( 0), UINT8_C( 255), UINT8_C( 189), UINT8_C( 0), UINT8_C( 160), UINT8_C( 0), UINT8_C( 255), UINT8_C( 211), UINT8_C( 0), UINT8_C( 133), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 172), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 255), UINT8_C( 201), UINT8_C( 207), UINT8_C( 255), UINT8_C( 189), UINT8_C( 0), UINT8_C( 255), UINT8_C( 255), UINT8_C( 141), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 255), UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 222), UINT8_C( 200), UINT8_C( 0), UINT8_C( 74)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_adds_epu8(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_u8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_adds_epu16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_x_mm512_set_epu16(UINT16_C( 57245), UINT16_C( 31803), UINT16_C( 9053), UINT16_C( 21282), UINT16_C( 45515), UINT16_C( 57894), UINT16_C( 50445), UINT16_C( 50583), UINT16_C( 54723), UINT16_C( 52144), UINT16_C( 13347), UINT16_C( 57624), UINT16_C( 20975), UINT16_C( 65317), UINT16_C( 16493), UINT16_C( 47193), UINT16_C( 8805), UINT16_C( 17012), UINT16_C( 15383), UINT16_C( 5484), UINT16_C( 64480), UINT16_C( 10898), UINT16_C( 37488), UINT16_C( 8763), UINT16_C( 45480), UINT16_C( 3307), UINT16_C( 9193), UINT16_C( 26229), UINT16_C( 20008), UINT16_C( 59713), UINT16_C( 53564), UINT16_C( 62675)), simde_x_mm512_set_epu16(UINT16_C( 56619), UINT16_C( 61703), UINT16_C( 3086), UINT16_C( 41224), UINT16_C( 8421), UINT16_C( 52766), UINT16_C( 7424), UINT16_C( 48822), UINT16_C( 41975), UINT16_C( 22918), UINT16_C( 59915), UINT16_C( 26727), UINT16_C( 53323), UINT16_C( 34707), UINT16_C( 14572), UINT16_C( 716), UINT16_C( 56087), UINT16_C( 9094), UINT16_C( 20814), UINT16_C( 56962), UINT16_C( 14110), UINT16_C( 52082), UINT16_C( 8468), UINT16_C( 11854), UINT16_C( 1576), UINT16_C( 12690), UINT16_C( 32737), UINT16_C( 51663), UINT16_C( 37115), UINT16_C( 18115), UINT16_C( 50682), UINT16_C( 1653)), simde_x_mm512_set_epu16(UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 12139), UINT16_C( 62506), UINT16_C( 53936), UINT16_C( 65535), UINT16_C( 57869), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 31065), UINT16_C( 47909), UINT16_C( 64892), UINT16_C( 26106), UINT16_C( 36197), UINT16_C( 62446), UINT16_C( 65535), UINT16_C( 62980), UINT16_C( 45956), UINT16_C( 20617), UINT16_C( 47056), UINT16_C( 15997), UINT16_C( 41930), UINT16_C( 65535), UINT16_C( 57123), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 64328)) }, { simde_x_mm512_set_epu16(UINT16_C( 1128), UINT16_C( 14143), UINT16_C( 19763), UINT16_C( 47819), UINT16_C( 62972), UINT16_C( 13428), UINT16_C( 18537), UINT16_C( 38297), UINT16_C( 21721), UINT16_C( 58861), UINT16_C( 42577), UINT16_C( 39379), UINT16_C( 55643), UINT16_C( 20439), UINT16_C( 34514), UINT16_C( 49721), UINT16_C( 18526), UINT16_C( 46886), UINT16_C( 20377), UINT16_C( 31658), UINT16_C( 32537), UINT16_C( 50044), UINT16_C( 28503), UINT16_C( 5895), UINT16_C( 1666), UINT16_C( 50137), UINT16_C( 30976), UINT16_C( 4047), UINT16_C( 24589), UINT16_C( 29183), UINT16_C( 13128), UINT16_C( 3541)), simde_x_mm512_set_epu16(UINT16_C( 38521), UINT16_C( 64382), UINT16_C( 35873), UINT16_C( 42497), UINT16_C( 46178), UINT16_C( 38904), UINT16_C( 15657), UINT16_C( 6453), UINT16_C( 15377), UINT16_C( 45418), UINT16_C( 38208), UINT16_C( 44948), UINT16_C( 17985), UINT16_C( 28923), UINT16_C( 11094), UINT16_C( 61254), UINT16_C( 41028), UINT16_C( 4939), UINT16_C( 61438), UINT16_C( 34821), UINT16_C( 9578), UINT16_C( 46329), UINT16_C( 53213), UINT16_C( 42297), UINT16_C( 42846), UINT16_C( 12973), UINT16_C( 32628), UINT16_C( 10992), UINT16_C( 56871), UINT16_C( 37695), UINT16_C( 16317), UINT16_C( 4356)), simde_x_mm512_set_epu16(UINT16_C( 39649), UINT16_C( 65535), UINT16_C( 55636), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 52332), UINT16_C( 34194), UINT16_C( 44750), UINT16_C( 37098), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 49362), UINT16_C( 45608), UINT16_C( 65535), UINT16_C( 59554), UINT16_C( 51825), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 42115), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 48192), UINT16_C( 44512), UINT16_C( 63110), UINT16_C( 63604), UINT16_C( 15039), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 29445), UINT16_C( 7897)) }, { simde_x_mm512_set_epu16(UINT16_C( 14747), UINT16_C( 30570), UINT16_C( 56275), UINT16_C( 58793), UINT16_C( 48636), UINT16_C( 11396), UINT16_C( 12142), UINT16_C( 25883), UINT16_C( 48137), UINT16_C( 31445), UINT16_C( 15524), UINT16_C( 49322), UINT16_C( 6761), UINT16_C( 42681), UINT16_C( 8586), UINT16_C( 53309), UINT16_C( 48898), UINT16_C( 23142), UINT16_C( 17912), UINT16_C( 20578), UINT16_C( 42157), UINT16_C( 25705), UINT16_C( 39154), UINT16_C( 32521), UINT16_C( 7302), UINT16_C( 65263), UINT16_C( 7623), UINT16_C( 47267), UINT16_C( 41612), UINT16_C( 7723), UINT16_C( 51815), UINT16_C( 23273)), simde_x_mm512_set_epu16(UINT16_C( 29783), UINT16_C( 35133), UINT16_C( 18722), UINT16_C( 24788), UINT16_C( 4507), UINT16_C( 64908), UINT16_C( 18120), UINT16_C( 53792), UINT16_C( 40447), UINT16_C( 2013), UINT16_C( 19329), UINT16_C( 18474), UINT16_C( 45067), UINT16_C( 37105), UINT16_C( 40003), UINT16_C( 11523), UINT16_C( 28865), UINT16_C( 9988), UINT16_C( 16394), UINT16_C( 60605), UINT16_C( 29530), UINT16_C( 612), UINT16_C( 21547), UINT16_C( 12463), UINT16_C( 46586), UINT16_C( 6619), UINT16_C( 39845), UINT16_C( 4328), UINT16_C( 31818), UINT16_C( 16334), UINT16_C( 57400), UINT16_C( 58095)), simde_x_mm512_set_epu16(UINT16_C( 44530), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 53143), UINT16_C( 65535), UINT16_C( 30262), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 33458), UINT16_C( 34853), UINT16_C( 65535), UINT16_C( 51828), UINT16_C( 65535), UINT16_C( 48589), UINT16_C( 64832), UINT16_C( 65535), UINT16_C( 33130), UINT16_C( 34306), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 26317), UINT16_C( 60701), UINT16_C( 44984), UINT16_C( 53888), UINT16_C( 65535), UINT16_C( 47468), UINT16_C( 51595), UINT16_C( 65535), UINT16_C( 24057), UINT16_C( 65535), UINT16_C( 65535)) }, { simde_x_mm512_set_epu16(UINT16_C( 30682), UINT16_C( 30851), UINT16_C( 8660), UINT16_C( 19219), UINT16_C( 10963), UINT16_C( 474), UINT16_C( 23728), UINT16_C( 46528), UINT16_C( 63120), UINT16_C( 45881), UINT16_C( 63086), UINT16_C( 25197), UINT16_C( 2704), UINT16_C( 4513), UINT16_C( 59294), UINT16_C( 60752), UINT16_C( 50954), UINT16_C( 23266), UINT16_C( 27902), UINT16_C( 8814), UINT16_C( 4863), UINT16_C( 1260), UINT16_C( 11681), UINT16_C( 46962), UINT16_C( 62057), UINT16_C( 1167), UINT16_C( 44133), UINT16_C( 29723), UINT16_C( 20503), UINT16_C( 47951), UINT16_C( 6645), UINT16_C( 27511)), simde_x_mm512_set_epu16(UINT16_C( 15286), UINT16_C( 5686), UINT16_C( 62529), UINT16_C( 52414), UINT16_C( 64942), UINT16_C( 55157), UINT16_C( 1921), UINT16_C( 414), UINT16_C( 57075), UINT16_C( 38949), UINT16_C( 49837), UINT16_C( 16574), UINT16_C( 63519), UINT16_C( 21815), UINT16_C( 16534), UINT16_C( 5264), UINT16_C( 40611), UINT16_C( 7582), UINT16_C( 27508), UINT16_C( 40060), UINT16_C( 17332), UINT16_C( 48486), UINT16_C( 46457), UINT16_C( 3074), UINT16_C( 49520), UINT16_C( 35694), UINT16_C( 14820), UINT16_C( 26739), UINT16_C( 52931), UINT16_C( 30426), UINT16_C( 34639), UINT16_C( 60126)), simde_x_mm512_set_epu16(UINT16_C( 45968), UINT16_C( 36537), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 55631), UINT16_C( 25649), UINT16_C( 46942), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 41771), UINT16_C( 65535), UINT16_C( 26328), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 30848), UINT16_C( 55410), UINT16_C( 48874), UINT16_C( 22195), UINT16_C( 49746), UINT16_C( 58138), UINT16_C( 50036), UINT16_C( 65535), UINT16_C( 36861), UINT16_C( 58953), UINT16_C( 56462), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 41284), UINT16_C( 65535)) }, { simde_x_mm512_set_epu16(UINT16_C( 20277), UINT16_C( 2901), UINT16_C( 9119), UINT16_C( 39166), UINT16_C( 42714), UINT16_C( 26596), UINT16_C( 65466), UINT16_C( 41724), UINT16_C( 43509), UINT16_C( 36161), UINT16_C( 14559), UINT16_C( 39814), UINT16_C( 39598), UINT16_C( 35810), UINT16_C( 32670), UINT16_C( 2206), UINT16_C( 28889), UINT16_C( 41636), UINT16_C( 48990), UINT16_C( 37294), UINT16_C( 39242), UINT16_C( 39778), UINT16_C( 6979), UINT16_C( 54232), UINT16_C( 47738), UINT16_C( 57923), UINT16_C( 37900), UINT16_C( 45798), UINT16_C( 44781), UINT16_C( 27960), UINT16_C( 1998), UINT16_C( 61149)), simde_x_mm512_set_epu16(UINT16_C( 20730), UINT16_C( 18080), UINT16_C( 51364), UINT16_C( 27968), UINT16_C( 47873), UINT16_C( 51993), UINT16_C( 18829), UINT16_C( 15054), UINT16_C( 50771), UINT16_C( 37023), UINT16_C( 39205), UINT16_C( 18252), UINT16_C( 53613), UINT16_C( 36736), UINT16_C( 53958), UINT16_C( 13137), UINT16_C( 43763), UINT16_C( 19789), UINT16_C( 2934), UINT16_C( 22986), UINT16_C( 46964), UINT16_C( 64986), UINT16_C( 13395), UINT16_C( 61287), UINT16_C( 61677), UINT16_C( 22301), UINT16_C( 41322), UINT16_C( 35063), UINT16_C( 22663), UINT16_C( 28062), UINT16_C( 45351), UINT16_C( 21097)), simde_x_mm512_set_epu16(UINT16_C( 41007), UINT16_C( 20981), UINT16_C( 60483), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 56778), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 53764), UINT16_C( 58066), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 15343), UINT16_C( 65535), UINT16_C( 61425), UINT16_C( 51924), UINT16_C( 60280), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 20374), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 56022), UINT16_C( 47349), UINT16_C( 65535)) }, { simde_x_mm512_set_epu16(UINT16_C( 29192), UINT16_C( 8654), UINT16_C( 22850), UINT16_C( 62764), UINT16_C( 24193), UINT16_C( 41054), UINT16_C( 47201), UINT16_C( 29473), UINT16_C( 7514), UINT16_C( 21604), UINT16_C( 55170), UINT16_C( 17089), UINT16_C( 58910), UINT16_C( 18576), UINT16_C( 50942), UINT16_C( 19375), UINT16_C( 32394), UINT16_C( 6670), UINT16_C( 34291), UINT16_C( 10597), UINT16_C( 63848), UINT16_C( 24622), UINT16_C( 55141), UINT16_C( 33399), UINT16_C( 17676), UINT16_C( 53111), UINT16_C( 57678), UINT16_C( 53869), UINT16_C( 1069), UINT16_C( 35377), UINT16_C( 20392), UINT16_C( 6033)), simde_x_mm512_set_epu16(UINT16_C( 30110), UINT16_C( 5765), UINT16_C( 21533), UINT16_C( 32154), UINT16_C( 47788), UINT16_C( 4708), UINT16_C( 49450), UINT16_C( 15569), UINT16_C( 11433), UINT16_C( 48528), UINT16_C( 18868), UINT16_C( 49914), UINT16_C( 29591), UINT16_C( 10234), UINT16_C( 18676), UINT16_C( 18833), UINT16_C( 53298), UINT16_C( 44056), UINT16_C( 163), UINT16_C( 14043), UINT16_C( 52308), UINT16_C( 55462), UINT16_C( 1379), UINT16_C( 56593), UINT16_C( 40649), UINT16_C( 10209), UINT16_C( 62181), UINT16_C( 6865), UINT16_C( 33659), UINT16_C( 12643), UINT16_C( 58853), UINT16_C( 27590)), simde_x_mm512_set_epu16(UINT16_C( 59302), UINT16_C( 14419), UINT16_C( 44383), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 45762), UINT16_C( 65535), UINT16_C( 45042), UINT16_C( 18947), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 28810), UINT16_C( 65535), UINT16_C( 38208), UINT16_C( 65535), UINT16_C( 50726), UINT16_C( 34454), UINT16_C( 24640), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 56520), UINT16_C( 65535), UINT16_C( 58325), UINT16_C( 63320), UINT16_C( 65535), UINT16_C( 60734), UINT16_C( 34728), UINT16_C( 48020), UINT16_C( 65535), UINT16_C( 33623)) }, { simde_x_mm512_set_epu16(UINT16_C( 54408), UINT16_C( 44486), UINT16_C( 24517), UINT16_C( 51207), UINT16_C( 2791), UINT16_C( 45592), UINT16_C( 48586), UINT16_C( 36784), UINT16_C( 53369), UINT16_C( 43811), UINT16_C( 65046), UINT16_C( 44235), UINT16_C( 48681), UINT16_C( 21401), UINT16_C( 55417), UINT16_C( 5581), UINT16_C( 56410), UINT16_C( 42092), UINT16_C( 49678), UINT16_C( 22777), UINT16_C( 15839), UINT16_C( 59069), UINT16_C( 3836), UINT16_C( 55877), UINT16_C( 35316), UINT16_C( 40611), UINT16_C( 30925), UINT16_C( 38925), UINT16_C( 44290), UINT16_C( 48844), UINT16_C( 41388), UINT16_C( 24135)), simde_x_mm512_set_epu16(UINT16_C( 7663), UINT16_C( 4305), UINT16_C( 48670), UINT16_C( 39779), UINT16_C( 2052), UINT16_C( 40026), UINT16_C( 47264), UINT16_C( 7297), UINT16_C( 11022), UINT16_C( 64822), UINT16_C( 63292), UINT16_C( 56921), UINT16_C( 8160), UINT16_C( 61766), UINT16_C( 22014), UINT16_C( 41597), UINT16_C( 60919), UINT16_C( 45399), UINT16_C( 45462), UINT16_C( 35135), UINT16_C( 13486), UINT16_C( 42127), UINT16_C( 6466), UINT16_C( 5931), UINT16_C( 30448), UINT16_C( 27493), UINT16_C( 24921), UINT16_C( 23842), UINT16_C( 21170), UINT16_C( 62954), UINT16_C( 7914), UINT16_C( 35527)), simde_x_mm512_set_epu16(UINT16_C( 62071), UINT16_C( 48791), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 4843), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 44081), UINT16_C( 64391), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 56841), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 47178), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 57912), UINT16_C( 29325), UINT16_C( 65535), UINT16_C( 10302), UINT16_C( 61808), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 55846), UINT16_C( 62767), UINT16_C( 65460), UINT16_C( 65535), UINT16_C( 49302), UINT16_C( 59662)) }, { simde_x_mm512_set_epu16(UINT16_C( 61865), UINT16_C( 23426), UINT16_C( 1768), UINT16_C( 58636), UINT16_C( 57533), UINT16_C( 19108), UINT16_C( 59802), UINT16_C( 30792), UINT16_C( 61240), UINT16_C( 6209), UINT16_C( 32476), UINT16_C( 14300), UINT16_C( 7420), UINT16_C( 1985), UINT16_C( 25770), UINT16_C( 58800), UINT16_C( 51482), UINT16_C( 20894), UINT16_C( 680), UINT16_C( 29487), UINT16_C( 45202), UINT16_C( 20181), UINT16_C( 14712), UINT16_C( 17612), UINT16_C( 16683), UINT16_C( 1453), UINT16_C( 15990), UINT16_C( 36509), UINT16_C( 51737), UINT16_C( 56324), UINT16_C( 34493), UINT16_C( 17761)), simde_x_mm512_set_epu16(UINT16_C( 37550), UINT16_C( 51250), UINT16_C( 42874), UINT16_C( 8384), UINT16_C( 13698), UINT16_C( 33689), UINT16_C( 64372), UINT16_C( 20393), UINT16_C( 17692), UINT16_C( 65383), UINT16_C( 545), UINT16_C( 27220), UINT16_C( 18321), UINT16_C( 26532), UINT16_C( 40835), UINT16_C( 14036), UINT16_C( 3666), UINT16_C( 2040), UINT16_C( 61839), UINT16_C( 38748), UINT16_C( 62699), UINT16_C( 55831), UINT16_C( 23293), UINT16_C( 13208), UINT16_C( 60975), UINT16_C( 12632), UINT16_C( 13392), UINT16_C( 63703), UINT16_C( 41286), UINT16_C( 6117), UINT16_C( 32645), UINT16_C( 56152)), simde_x_mm512_set_epu16(UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 44642), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 52797), UINT16_C( 65535), UINT16_C( 51185), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 33021), UINT16_C( 41520), UINT16_C( 25741), UINT16_C( 28517), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 55148), UINT16_C( 22934), UINT16_C( 62519), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 38005), UINT16_C( 30820), UINT16_C( 65535), UINT16_C( 14085), UINT16_C( 29382), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 62441), UINT16_C( 65535), UINT16_C( 65535)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_adds_epu16(test_vec[i].a, test_vec[i].b); simde_assert_m512i_u16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_adds_epu16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t src[32]; const simde__mmask32 k; const uint16_t a[32]; const uint16_t b[32]; const uint16_t r[32]; } test_vec[] = { { { -INT16_C( 25480), INT16_C( 16389), INT16_C( 16810), INT16_C( 20465), -INT16_C( 11763), INT16_C( 1143), INT16_C( 11613), INT16_C( 5547), -INT16_C( 217), INT16_C( 4795), INT16_C( 5277), -INT16_C( 31526), -INT16_C( 1895), INT16_C( 7976), -INT16_C( 7252), INT16_C( 9466), -INT16_C( 129), INT16_C( 10853), INT16_C( 22080), INT16_C( 19833), -INT16_C( 4056), -INT16_C( 31151), -INT16_C( 994), INT16_C( 17819), INT16_C( 22267), -INT16_C( 26281), INT16_C( 12651), INT16_C( 1053), INT16_C( 17962), -INT16_C( 10716), INT16_C( 7721), -INT16_C( 22278) }, UINT32_C(1590845213), { UINT16_C(19382), UINT16_C(57003), UINT16_C(64828), UINT16_C(23140), UINT16_C(65529), UINT16_C(62879), UINT16_C(63062), UINT16_C(49550), UINT16_C(43815), UINT16_C(20933), UINT16_C(59889), UINT16_C( 6695), UINT16_C( 8711), UINT16_C( 9667), UINT16_C(38273), UINT16_C(14211), UINT16_C(12001), UINT16_C( 7446), UINT16_C(31275), UINT16_C( 9591), UINT16_C( 5754), UINT16_C(53274), UINT16_C(43020), UINT16_C(13201), UINT16_C(22099), UINT16_C(17797), UINT16_C(44096), UINT16_C(18271), UINT16_C( 8910), UINT16_C(20588), UINT16_C(61368), UINT16_C(39303) }, { UINT16_C(40222), UINT16_C(18870), UINT16_C(11544), UINT16_C(37486), UINT16_C(34883), UINT16_C(20322), UINT16_C(62256), UINT16_C(33922), UINT16_C( 1865), UINT16_C(35273), UINT16_C(10420), UINT16_C(33489), UINT16_C(15691), UINT16_C( 978), UINT16_C(23085), UINT16_C(19356), UINT16_C(21239), UINT16_C( 3988), UINT16_C( 895), UINT16_C(49825), UINT16_C( 907), UINT16_C(48145), UINT16_C(37878), UINT16_C(16448), UINT16_C( 2459), UINT16_C(20425), UINT16_C(39473), UINT16_C(31953), UINT16_C(42200), UINT16_C( 1407), UINT16_C( 7166), UINT16_C(62800) }, { UINT16_C(59604), UINT16_C(16389), UINT16_MAX, UINT16_C(60626), UINT16_MAX, UINT16_C( 1143), UINT16_C(11613), UINT16_C( 5547), UINT16_C(45680), UINT16_C(56206), UINT16_MAX, UINT16_C(40184), UINT16_C(24402), UINT16_C( 7976), UINT16_C(61358), UINT16_C( 9466), UINT16_C(65407), UINT16_C(11434), UINT16_C(22080), UINT16_C(19833), UINT16_C( 6661), UINT16_C(34385), UINT16_MAX, UINT16_C(29649), UINT16_C(22267), UINT16_C(38222), UINT16_MAX, UINT16_C(50224), UINT16_C(51110), UINT16_C(54820), UINT16_MAX, UINT16_C(43258) } }, { { -INT16_C( 7059), -INT16_C( 5115), -INT16_C( 22809), INT16_C( 29614), -INT16_C( 16470), -INT16_C( 24529), INT16_C( 28499), -INT16_C( 4384), -INT16_C( 21896), -INT16_C( 22211), INT16_C( 3652), INT16_C( 7206), -INT16_C( 23118), -INT16_C( 20447), INT16_C( 29121), INT16_C( 11942), -INT16_C( 21674), INT16_C( 15643), -INT16_C( 13999), -INT16_C( 1104), -INT16_C( 8311), -INT16_C( 9060), INT16_C( 31822), -INT16_C( 14646), INT16_C( 1830), INT16_C( 27504), -INT16_C( 27115), -INT16_C( 14201), -INT16_C( 22213), -INT16_C( 904), INT16_C( 7706), INT16_C( 28715) }, UINT32_C( 464406217), { UINT16_C(24079), UINT16_C(38934), UINT16_C(45630), UINT16_C(35956), UINT16_C(15919), UINT16_C(21843), UINT16_C(49989), UINT16_C(23488), UINT16_C(18521), UINT16_C(37923), UINT16_C(39921), UINT16_C( 2961), UINT16_C(48314), UINT16_C(33660), UINT16_C(10754), UINT16_C( 4510), UINT16_C(46472), UINT16_C(50858), UINT16_C( 7783), UINT16_C(38483), UINT16_C(42589), UINT16_C(41708), UINT16_C(44137), UINT16_C(49917), UINT16_C( 8436), UINT16_C(58710), UINT16_C(59324), UINT16_C(30449), UINT16_C(28067), UINT16_C(42489), UINT16_C(39063), UINT16_C( 8119) }, { UINT16_C(24909), UINT16_C(46310), UINT16_C(14719), UINT16_C(56395), UINT16_C(14303), UINT16_C(18559), UINT16_C(31971), UINT16_C(55306), UINT16_C(24733), UINT16_C(22973), UINT16_C(44616), UINT16_C(60367), UINT16_C(51227), UINT16_C(45713), UINT16_C(18528), UINT16_C(44498), UINT16_C(47273), UINT16_C(10338), UINT16_C(44529), UINT16_C(53253), UINT16_C(34020), UINT16_C(50968), UINT16_C( 8704), UINT16_C(40351), UINT16_C(23938), UINT16_C(51958), UINT16_C(50443), UINT16_C(10166), UINT16_C(18318), UINT16_C(61145), UINT16_C(43919), UINT16_C(14492) }, { UINT16_C(48988), UINT16_C(60421), UINT16_C(42727), UINT16_MAX, UINT16_C(49066), UINT16_C(41007), UINT16_MAX, UINT16_MAX, UINT16_C(43640), UINT16_C(60896), UINT16_MAX, UINT16_C( 7206), UINT16_C(42418), UINT16_C(45089), UINT16_C(29282), UINT16_C(11942), UINT16_C(43862), UINT16_C(61196), UINT16_C(52312), UINT16_MAX, UINT16_C(57225), UINT16_MAX, UINT16_C(31822), UINT16_MAX, UINT16_C(32374), UINT16_MAX, UINT16_C(38421), UINT16_C(40615), UINT16_C(46385), UINT16_C(64632), UINT16_C( 7706), UINT16_C(28715) } }, { { -INT16_C( 413), INT16_C( 21600), INT16_C( 26027), -INT16_C( 28892), INT16_C( 15593), -INT16_C( 5546), -INT16_C( 2466), -INT16_C( 7801), INT16_C( 32339), INT16_C( 24235), INT16_C( 24899), -INT16_C( 11899), INT16_C( 24488), INT16_C( 14272), INT16_C( 23562), INT16_C( 28271), -INT16_C( 12198), INT16_C( 1474), -INT16_C( 6347), INT16_C( 8084), -INT16_C( 5597), -INT16_C( 32247), -INT16_C( 28448), INT16_C( 13155), INT16_C( 3598), INT16_C( 21138), INT16_C( 6000), INT16_C( 6179), -INT16_C( 7306), -INT16_C( 32432), -INT16_C( 16577), -INT16_C( 26129) }, UINT32_C(3315511695), { UINT16_C(12952), UINT16_C(48356), UINT16_C(60701), UINT16_C(64830), UINT16_C(41341), UINT16_C(35889), UINT16_C(50095), UINT16_C( 8158), UINT16_C( 474), UINT16_C(20792), UINT16_C(35045), UINT16_C( 9426), UINT16_C(49479), UINT16_C(55230), UINT16_C(23666), UINT16_C( 2972), UINT16_C(32911), UINT16_C(44231), UINT16_C( 1389), UINT16_C(60073), UINT16_C(55974), UINT16_C(21878), UINT16_C(21661), UINT16_C(30837), UINT16_C(44374), UINT16_C(15305), UINT16_C(39733), UINT16_C(31839), UINT16_C( 7516), UINT16_C(52819), UINT16_C(61306), UINT16_C( 2521) }, { UINT16_C(41071), UINT16_C(56501), UINT16_C(24229), UINT16_C(19399), UINT16_C(15673), UINT16_C(54945), UINT16_C( 5778), UINT16_C(59470), UINT16_C( 6083), UINT16_C(63523), UINT16_C(33458), UINT16_C( 3700), UINT16_C(51360), UINT16_C( 6877), UINT16_C(46775), UINT16_C(10019), UINT16_C(55383), UINT16_C(64515), UINT16_C(51766), UINT16_C(28488), UINT16_C(59656), UINT16_C(39494), UINT16_C(38143), UINT16_C(49794), UINT16_C(42412), UINT16_C(24250), UINT16_C(11815), UINT16_C(51053), UINT16_C(19190), UINT16_C(44769), UINT16_C( 1024), UINT16_C(22485) }, { UINT16_C(54023), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C(15593), UINT16_C(59990), UINT16_C(63070), UINT16_MAX, UINT16_C( 6557), UINT16_C(24235), UINT16_C(24899), UINT16_C(53637), UINT16_MAX, UINT16_C(62107), UINT16_C(23562), UINT16_C(12991), UINT16_C(53338), UINT16_MAX, UINT16_C(53155), UINT16_MAX, UINT16_MAX, UINT16_C(33289), UINT16_C(37088), UINT16_MAX, UINT16_MAX, UINT16_C(21138), UINT16_C(51548), UINT16_C( 6179), UINT16_C(58230), UINT16_C(33104), UINT16_C(62330), UINT16_C(25006) } }, { { -INT16_C( 10020), INT16_C( 4948), -INT16_C( 25437), -INT16_C( 21630), -INT16_C( 14203), -INT16_C( 31675), -INT16_C( 14499), INT16_C( 2374), INT16_C( 108), -INT16_C( 27801), -INT16_C( 11218), INT16_C( 9563), INT16_C( 15390), INT16_C( 8147), -INT16_C( 22463), INT16_C( 7542), -INT16_C( 13696), INT16_C( 9008), -INT16_C( 19610), -INT16_C( 5170), INT16_C( 4987), -INT16_C( 10129), -INT16_C( 18982), INT16_C( 18145), INT16_C( 18869), -INT16_C( 6950), INT16_C( 13597), INT16_C( 15369), -INT16_C( 9103), -INT16_C( 19877), -INT16_C( 11900), INT16_C( 1232) }, UINT32_C( 36176028), { UINT16_C(63155), UINT16_C(12270), UINT16_C(23818), UINT16_C(58375), UINT16_C(59667), UINT16_C(51243), UINT16_C( 1330), UINT16_C(20396), UINT16_C(46394), UINT16_C(43915), UINT16_C(59025), UINT16_C( 5470), UINT16_C(11960), UINT16_C(21530), UINT16_C(16942), UINT16_C(57942), UINT16_C(17464), UINT16_C(16913), UINT16_C( 6306), UINT16_C(46375), UINT16_C(20993), UINT16_C(13181), UINT16_C(10839), UINT16_C(37251), UINT16_C( 3807), UINT16_C(28988), UINT16_C(39669), UINT16_C(44422), UINT16_C(41160), UINT16_C(63233), UINT16_C(22498), UINT16_C( 7129) }, { UINT16_C(60060), UINT16_C(15965), UINT16_C(33794), UINT16_C( 1267), UINT16_C(28886), UINT16_C(11575), UINT16_C(47770), UINT16_C(31422), UINT16_C(64457), UINT16_C(48875), UINT16_C(29077), UINT16_C(24171), UINT16_C(27666), UINT16_C(62549), UINT16_C(11971), UINT16_C(24335), UINT16_C(27928), UINT16_C( 6813), UINT16_C(37105), UINT16_C(51230), UINT16_C(22017), UINT16_C(39925), UINT16_C(46096), UINT16_C(55573), UINT16_C( 175), UINT16_C(17559), UINT16_C( 626), UINT16_C(33954), UINT16_C(63342), UINT16_C(12920), UINT16_C(34853), UINT16_C(15761) }, { UINT16_C(55516), UINT16_C( 4948), UINT16_C(57612), UINT16_C(59642), UINT16_MAX, UINT16_C(33861), UINT16_C(51037), UINT16_C(51818), UINT16_C( 108), UINT16_C(37735), UINT16_C(54318), UINT16_C( 9563), UINT16_C(15390), UINT16_C( 8147), UINT16_C(43073), UINT16_C( 7542), UINT16_C(51840), UINT16_C( 9008), UINT16_C(45926), UINT16_MAX, UINT16_C( 4987), UINT16_C(53106), UINT16_C(46554), UINT16_C(18145), UINT16_C(18869), UINT16_C(46547), UINT16_C(13597), UINT16_C(15369), UINT16_C(56433), UINT16_C(45659), UINT16_C(53636), UINT16_C( 1232) } }, { { INT16_C( 12277), -INT16_C( 6568), INT16_C( 30399), -INT16_C( 16210), -INT16_C( 23348), -INT16_C( 8868), INT16_C( 29016), INT16_C( 1974), INT16_C( 20082), -INT16_C( 7093), -INT16_C( 4528), -INT16_C( 16536), -INT16_C( 7963), INT16_C( 3057), -INT16_C( 32152), INT16_C( 23880), -INT16_C( 24399), INT16_C( 28996), -INT16_C( 3561), -INT16_C( 7375), -INT16_C( 29290), -INT16_C( 4416), INT16_C( 30719), INT16_C( 29173), INT16_C( 16837), INT16_C( 5461), -INT16_C( 17105), INT16_C( 5332), -INT16_C( 14947), INT16_C( 1567), INT16_C( 26696), -INT16_C( 1693) }, UINT32_C( 527083272), { UINT16_C(40090), UINT16_C(12291), UINT16_C(49961), UINT16_C(10271), UINT16_C( 5178), UINT16_C(65433), UINT16_C(61013), UINT16_C(33813), UINT16_C(59819), UINT16_C(18841), UINT16_C(47279), UINT16_C(63311), UINT16_C(45600), UINT16_C(10736), UINT16_C(23386), UINT16_C(62536), UINT16_C(19447), UINT16_C( 8228), UINT16_C(17167), UINT16_C(18761), UINT16_C(57944), UINT16_C(44361), UINT16_C(24273), UINT16_C(31794), UINT16_C(52039), UINT16_C(63173), UINT16_C( 5251), UINT16_C(42221), UINT16_C(57031), UINT16_C( 8653), UINT16_C( 5433), UINT16_C(12309) }, { UINT16_C(14689), UINT16_C(28752), UINT16_C(39293), UINT16_C(54713), UINT16_C( 636), UINT16_C(19842), UINT16_C(46176), UINT16_C(43209), UINT16_C(36735), UINT16_C( 926), UINT16_C(36003), UINT16_C(27303), UINT16_C(29802), UINT16_C(41867), UINT16_C(41097), UINT16_C(60115), UINT16_C( 9178), UINT16_C(22362), UINT16_C( 5309), UINT16_C(14636), UINT16_C(44566), UINT16_C(30598), UINT16_C(20323), UINT16_C(57887), UINT16_C(48606), UINT16_C(33509), UINT16_C(35913), UINT16_C(46060), UINT16_C(30720), UINT16_C(35414), UINT16_C(10520), UINT16_C(62068) }, { UINT16_C(12277), UINT16_C(58968), UINT16_C(30399), UINT16_C(64984), UINT16_C(42188), UINT16_C(56668), UINT16_C(29016), UINT16_C( 1974), UINT16_MAX, UINT16_C(19767), UINT16_MAX, UINT16_C(49000), UINT16_C(57573), UINT16_C(52603), UINT16_C(33384), UINT16_MAX, UINT16_C(41137), UINT16_C(30590), UINT16_C(61975), UINT16_C(33397), UINT16_C(36246), UINT16_MAX, UINT16_C(44596), UINT16_C(29173), UINT16_MAX, UINT16_MAX, UINT16_C(41164), UINT16_MAX, UINT16_MAX, UINT16_C( 1567), UINT16_C(26696), UINT16_C(63843) } }, { { -INT16_C( 12467), INT16_C( 2633), INT16_C( 30179), -INT16_C( 1725), -INT16_C( 14044), -INT16_C( 30864), -INT16_C( 28904), -INT16_C( 2199), INT16_C( 20301), -INT16_C( 27015), INT16_C( 26075), -INT16_C( 9142), -INT16_C( 24355), -INT16_C( 2458), -INT16_C( 9526), INT16_C( 6120), INT16_C( 12969), -INT16_C( 29663), INT16_C( 25767), -INT16_C( 13434), -INT16_C( 2515), INT16_C( 17746), -INT16_C( 17274), -INT16_C( 11460), -INT16_C( 19189), -INT16_C( 6551), -INT16_C( 19685), -INT16_C( 1854), INT16_C( 10324), INT16_C( 7918), -INT16_C( 10493), -INT16_C( 21451) }, UINT32_C(2956547593), { UINT16_C(49082), UINT16_C(59260), UINT16_C(52917), UINT16_C(15148), UINT16_C(27018), UINT16_C(38158), UINT16_C(30750), UINT16_C(14716), UINT16_C(15915), UINT16_C(32562), UINT16_C( 8295), UINT16_C(27293), UINT16_C(54007), UINT16_C( 22), UINT16_C(20264), UINT16_C(58033), UINT16_C(11534), UINT16_C(50377), UINT16_C(63227), UINT16_C(34559), UINT16_C( 3679), UINT16_C(32027), UINT16_C(38790), UINT16_C(45495), UINT16_C(59862), UINT16_C(15665), UINT16_C(52745), UINT16_C( 423), UINT16_C(48545), UINT16_C(51457), UINT16_C(45581), UINT16_C( 7084) }, { UINT16_C(30175), UINT16_C(56287), UINT16_C(57195), UINT16_C(51809), UINT16_C(31981), UINT16_C(29512), UINT16_C(65300), UINT16_C(59940), UINT16_C(21992), UINT16_C(61735), UINT16_C(52772), UINT16_C(50674), UINT16_C(62603), UINT16_C(39054), UINT16_C(15014), UINT16_C(34484), UINT16_C(37808), UINT16_C( 7009), UINT16_C(49778), UINT16_C(24550), UINT16_C(11838), UINT16_C(21202), UINT16_C(63277), UINT16_C( 5436), UINT16_C(25420), UINT16_C(28678), UINT16_C(63793), UINT16_C(48437), UINT16_C(50413), UINT16_C(37717), UINT16_C( 2558), UINT16_C(44569) }, { UINT16_MAX, UINT16_C( 2633), UINT16_C(30179), UINT16_MAX, UINT16_C(51492), UINT16_C(34672), UINT16_C(36632), UINT16_C(63337), UINT16_C(20301), UINT16_MAX, UINT16_C(61067), UINT16_C(56394), UINT16_MAX, UINT16_C(63078), UINT16_C(35278), UINT16_C( 6120), UINT16_C(49342), UINT16_C(35873), UINT16_C(25767), UINT16_C(59109), UINT16_C(15517), UINT16_C(53229), UINT16_C(48262), UINT16_C(54076), UINT16_C(46347), UINT16_C(58985), UINT16_C(45851), UINT16_C(63682), UINT16_MAX, UINT16_MAX, UINT16_C(55043), UINT16_C(51653) } }, { { INT16_C( 31389), INT16_C( 4042), -INT16_C( 20420), INT16_C( 31599), INT16_C( 16862), INT16_C( 3021), INT16_C( 2616), -INT16_C( 31456), INT16_C( 9837), -INT16_C( 24587), INT16_C( 11039), INT16_C( 3164), -INT16_C( 19985), -INT16_C( 4704), -INT16_C( 17989), INT16_C( 22684), INT16_C( 26164), INT16_C( 28775), -INT16_C( 10730), -INT16_C( 2837), -INT16_C( 18152), INT16_C( 20735), INT16_C( 8131), INT16_C( 12501), -INT16_C( 13499), INT16_C( 26063), INT16_C( 11254), -INT16_C( 6799), INT16_C( 4573), -INT16_C( 26414), INT16_C( 28363), -INT16_C( 16) }, UINT32_C(3933165524), { UINT16_C(23342), UINT16_C(18142), UINT16_C(56596), UINT16_C(55190), UINT16_C(27900), UINT16_C(16903), UINT16_C(55095), UINT16_C(11687), UINT16_C( 6146), UINT16_C(57106), UINT16_C(58410), UINT16_C(62839), UINT16_C(26451), UINT16_C(10228), UINT16_C(25535), UINT16_C(60690), UINT16_C(61630), UINT16_C(53811), UINT16_C(51662), UINT16_C(51881), UINT16_C(45365), UINT16_C(27660), UINT16_C(45960), UINT16_C(35481), UINT16_C(43980), UINT16_C(63082), UINT16_C(57744), UINT16_C(58347), UINT16_C(57161), UINT16_C( 2058), UINT16_C( 7234), UINT16_C( 501) }, { UINT16_C(10253), UINT16_C(56275), UINT16_C(32241), UINT16_C(10149), UINT16_C(45614), UINT16_C(46739), UINT16_C(11621), UINT16_C(12608), UINT16_C(43736), UINT16_C(26663), UINT16_C( 4748), UINT16_C(54603), UINT16_C(22257), UINT16_C(13533), UINT16_C(53874), UINT16_C(32565), UINT16_C( 2298), UINT16_C(60250), UINT16_C( 133), UINT16_C(45842), UINT16_C(42674), UINT16_C( 5993), UINT16_C(43731), UINT16_C(43849), UINT16_C(28756), UINT16_C(57364), UINT16_C(24451), UINT16_C(29877), UINT16_C(37557), UINT16_C(10408), UINT16_C(56676), UINT16_C(24231) }, { UINT16_C(31389), UINT16_C( 4042), UINT16_MAX, UINT16_C(31599), UINT16_MAX, UINT16_C( 3021), UINT16_MAX, UINT16_C(24295), UINT16_C(49882), UINT16_MAX, UINT16_C(63158), UINT16_C( 3164), UINT16_C(48708), UINT16_C(60832), UINT16_MAX, UINT16_C(22684), UINT16_C(63928), UINT16_MAX, UINT16_C(51795), UINT16_MAX, UINT16_C(47384), UINT16_C(33653), UINT16_MAX, UINT16_C(12501), UINT16_C(52037), UINT16_MAX, UINT16_C(11254), UINT16_MAX, UINT16_C( 4573), UINT16_C(12466), UINT16_C(63910), UINT16_C(24732) } }, { { INT16_C( 742), INT16_C( 27466), INT16_C( 23554), -INT16_C( 19425), -INT16_C( 30718), -INT16_C( 10805), INT16_C( 5170), -INT16_C( 30847), -INT16_C( 27259), INT16_C( 2151), INT16_C( 7668), -INT16_C( 21892), INT16_C( 9647), INT16_C( 5330), INT16_C( 30978), -INT16_C( 6030), -INT16_C( 17285), INT16_C( 32084), INT16_C( 29465), INT16_C( 6961), -INT16_C( 517), INT16_C( 12017), INT16_C( 29201), -INT16_C( 26955), INT16_C( 7175), -INT16_C( 1122), INT16_C( 6969), -INT16_C( 5723), INT16_C( 30528), INT16_C( 17149), INT16_C( 28657), INT16_C( 27691) }, UINT32_C(1172995884), { UINT16_C( 7154), UINT16_C(60768), UINT16_C(20760), UINT16_C(10779), UINT16_C(53443), UINT16_C(51904), UINT16_C(24557), UINT16_C( 9926), UINT16_C(27514), UINT16_C(47631), UINT16_C( 3299), UINT16_C(54524), UINT16_C(10108), UINT16_C(43072), UINT16_C(10918), UINT16_C(39149), UINT16_C(19782), UINT16_C(24198), UINT16_C(41375), UINT16_C(25224), UINT16_C(18802), UINT16_C(24365), UINT16_C(62376), UINT16_C( 8837), UINT16_C(38238), UINT16_C(16860), UINT16_C(55457), UINT16_C( 7445), UINT16_C(22016), UINT16_C(42693), UINT16_C(45696), UINT16_C(50751) }, { UINT16_C(50432), UINT16_C(40741), UINT16_C(44390), UINT16_C(55297), UINT16_C(12022), UINT16_C(40503), UINT16_C(48417), UINT16_C(32960), UINT16_C(40018), UINT16_C(62401), UINT16_C(55157), UINT16_C(29969), UINT16_C(54829), UINT16_C(44315), UINT16_C(23177), UINT16_C(35188), UINT16_C(39199), UINT16_C(34344), UINT16_C(10566), UINT16_C(15710), UINT16_C(38488), UINT16_C(31195), UINT16_C(40019), UINT16_C(42489), UINT16_C(47928), UINT16_C(44440), UINT16_C(43410), UINT16_C(48930), UINT16_C(16000), UINT16_C( 2412), UINT16_C(57496), UINT16_C(47250) }, { UINT16_C( 742), UINT16_C(27466), UINT16_C(65150), UINT16_MAX, UINT16_C(34818), UINT16_MAX, UINT16_C( 5170), UINT16_C(34689), UINT16_MAX, UINT16_MAX, UINT16_C(58456), UINT16_MAX, UINT16_C(64937), UINT16_MAX, UINT16_C(34095), UINT16_C(59506), UINT16_C(48251), UINT16_C(58542), UINT16_C(29465), UINT16_C(40934), UINT16_C(65019), UINT16_C(55560), UINT16_MAX, UINT16_C(51326), UINT16_MAX, UINT16_C(64414), UINT16_MAX, UINT16_C(59813), UINT16_C(30528), UINT16_C(17149), UINT16_MAX, UINT16_C(27691) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi16(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_mask_adds_epu16(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_u16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_adds_epu16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask32 k; const uint16_t a[32]; const uint16_t b[32]; const uint16_t r[32]; } test_vec[] = { { UINT32_C( 298122215), { UINT16_C(43877), UINT16_C(21426), UINT16_C(12985), UINT16_C(37214), UINT16_C(44901), UINT16_C(48588), UINT16_C(18525), UINT16_C(41048), UINT16_C(22359), UINT16_C(46043), UINT16_C(52738), UINT16_C(43622), UINT16_C(52530), UINT16_C( 6625), UINT16_C(42696), UINT16_C(11818), UINT16_C(56401), UINT16_C( 2689), UINT16_C(57359), UINT16_C(29852), UINT16_C(26767), UINT16_C(60465), UINT16_C(35504), UINT16_C( 2188), UINT16_C(26593), UINT16_C(58555), UINT16_C( 8501), UINT16_C(26510), UINT16_C(28654), UINT16_C(46976), UINT16_C(43541), UINT16_C(26597) }, { UINT16_C(26246), UINT16_C(38257), UINT16_C( 3398), UINT16_C(54538), UINT16_C(15221), UINT16_C( 9921), UINT16_C(19909), UINT16_C(42798), UINT16_C(59828), UINT16_C(59787), UINT16_C( 6411), UINT16_C(63824), UINT16_C(53384), UINT16_C(40624), UINT16_C(38266), UINT16_C( 261), UINT16_C(30460), UINT16_C(17046), UINT16_C(41092), UINT16_C(63768), UINT16_C(55772), UINT16_C(41247), UINT16_C(19751), UINT16_C(56136), UINT16_C(54071), UINT16_C(17093), UINT16_C( 5612), UINT16_C(30011), UINT16_C(60646), UINT16_C(24595), UINT16_C( 6273), UINT16_C(32097) }, { UINT16_MAX, UINT16_C(59683), UINT16_C(16383), UINT16_C( 0), UINT16_C( 0), UINT16_C(58509), UINT16_C(38434), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C(47249), UINT16_MAX, UINT16_C(12079), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(55255), UINT16_C(58324), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { UINT32_C( 314636430), { UINT16_C(55448), UINT16_C(29708), UINT16_C(11185), UINT16_C(55318), UINT16_C(24185), UINT16_C(45236), UINT16_C(31026), UINT16_C( 7922), UINT16_C(11662), UINT16_C(29843), UINT16_C(42521), UINT16_C(39893), UINT16_C(14014), UINT16_C(19736), UINT16_C(55342), UINT16_C(51039), UINT16_C(27568), UINT16_C(25147), UINT16_C(20887), UINT16_C( 4154), UINT16_C(61104), UINT16_C(58048), UINT16_C(45671), UINT16_C(62976), UINT16_C(38111), UINT16_C(63850), UINT16_C(16186), UINT16_C(63892), UINT16_C(44150), UINT16_C(42054), UINT16_C(42373), UINT16_C(13675) }, { UINT16_C(42769), UINT16_C(43159), UINT16_C(54008), UINT16_C(43192), UINT16_C(30912), UINT16_C(10378), UINT16_C(35626), UINT16_C( 2334), UINT16_C(34847), UINT16_C(22786), UINT16_C(38600), UINT16_C(15954), UINT16_C(38979), UINT16_C(51426), UINT16_C(20030), UINT16_C(20477), UINT16_C(38389), UINT16_C(60919), UINT16_C(44903), UINT16_C(10134), UINT16_C( 8231), UINT16_C(20815), UINT16_C(28075), UINT16_C(51802), UINT16_C(24054), UINT16_C(48676), UINT16_C(30451), UINT16_C(14076), UINT16_C(56847), UINT16_C(19966), UINT16_C(64556), UINT16_C( 8604) }, { UINT16_C( 0), UINT16_MAX, UINT16_C(65193), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(10256), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(55847), UINT16_C(52993), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { UINT32_C(4161770385), { UINT16_C(42306), UINT16_C(26911), UINT16_C(28613), UINT16_C(29114), UINT16_C( 5340), UINT16_C(53819), UINT16_C(24433), UINT16_C(26000), UINT16_C(36054), UINT16_C(58779), UINT16_C(39531), UINT16_C(38706), UINT16_C(52886), UINT16_C(10169), UINT16_C(51297), UINT16_C(41759), UINT16_C(15981), UINT16_C(12812), UINT16_C(50861), UINT16_C(35491), UINT16_C(57306), UINT16_C(19548), UINT16_C(60734), UINT16_C( 5297), UINT16_C(19577), UINT16_C(58617), UINT16_C(11238), UINT16_C(31868), UINT16_C(13817), UINT16_C(23203), UINT16_C(49917), UINT16_C(27389) }, { UINT16_C( 2305), UINT16_C(44700), UINT16_C(16591), UINT16_C(43576), UINT16_C(38175), UINT16_C(24054), UINT16_C(42882), UINT16_C(64370), UINT16_C(27635), UINT16_C(56032), UINT16_C(23703), UINT16_C(36950), UINT16_C(64145), UINT16_C(36587), UINT16_C(59580), UINT16_C(48632), UINT16_C(38130), UINT16_C(49516), UINT16_C(42196), UINT16_C(62315), UINT16_C(24889), UINT16_C(47953), UINT16_C(49928), UINT16_C(64695), UINT16_C(38702), UINT16_C(50646), UINT16_C(11507), UINT16_C(33878), UINT16_C(16678), UINT16_C(58130), UINT16_C( 2601), UINT16_C( 7072) }, { UINT16_C(44611), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(43515), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C(63689), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C(54111), UINT16_C(62328), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C(30495), UINT16_MAX, UINT16_C(52518), UINT16_C(34461) } }, { UINT32_C(1943866526), { UINT16_C(18609), UINT16_C(60006), UINT16_C(47018), UINT16_C(45734), UINT16_C(23930), UINT16_C(43438), UINT16_C(34036), UINT16_C(59246), UINT16_C(50353), UINT16_C(55147), UINT16_C(32005), UINT16_C(12218), UINT16_C(23431), UINT16_C( 9546), UINT16_C(10087), UINT16_C( 6296), UINT16_C(65392), UINT16_C( 6659), UINT16_C(43446), UINT16_C(12748), UINT16_C(31494), UINT16_C(64218), UINT16_C(18687), UINT16_C(45281), UINT16_C(19469), UINT16_C( 4744), UINT16_C(17097), UINT16_C(20545), UINT16_C(35997), UINT16_C( 1397), UINT16_C( 3763), UINT16_C( 8989) }, { UINT16_C( 8205), UINT16_C(49981), UINT16_C( 2761), UINT16_C(53236), UINT16_C(52869), UINT16_C(33993), UINT16_C(43543), UINT16_C( 9269), UINT16_C(48630), UINT16_C(48950), UINT16_C(30975), UINT16_C(40207), UINT16_C(34052), UINT16_C(47010), UINT16_C(49043), UINT16_C(41179), UINT16_C( 6368), UINT16_C(43363), UINT16_C(22562), UINT16_C(42873), UINT16_C(16934), UINT16_C(15660), UINT16_C(25069), UINT16_C(58209), UINT16_C(38942), UINT16_C( 7587), UINT16_C(45584), UINT16_C( 5306), UINT16_C(23607), UINT16_C(51915), UINT16_C(42524), UINT16_C(64618) }, { UINT16_C( 0), UINT16_MAX, UINT16_C(49779), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C(62980), UINT16_C(52425), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C(55621), UINT16_C(48428), UINT16_C( 0), UINT16_C(43756), UINT16_MAX, UINT16_C(58411), UINT16_C(12331), UINT16_C( 0), UINT16_C( 0), UINT16_C(59604), UINT16_C(53312), UINT16_C(46287), UINT16_C( 0) } }, { UINT32_C(3785739967), { UINT16_C( 7718), UINT16_C(19593), UINT16_C(46433), UINT16_C(20106), UINT16_C(60182), UINT16_C(13361), UINT16_C(54403), UINT16_C(37713), UINT16_C( 3207), UINT16_C(48807), UINT16_C(29544), UINT16_C(33929), UINT16_C(62233), UINT16_C(55424), UINT16_C( 9921), UINT16_C(59322), UINT16_C(17220), UINT16_C(42292), UINT16_C(48888), UINT16_C( 3827), UINT16_C( 9641), UINT16_C(11586), UINT16_C(37881), UINT16_C(32960), UINT16_C(26783), UINT16_C( 2111), UINT16_C(51419), UINT16_C(62604), UINT16_C( 3515), UINT16_C(32205), UINT16_C(34611), UINT16_C(30564) }, { UINT16_C(39114), UINT16_C(49693), UINT16_C( 4182), UINT16_C( 208), UINT16_C( 4661), UINT16_C(12077), UINT16_C(60837), UINT16_C(17839), UINT16_C(61013), UINT16_C(12365), UINT16_C(55734), UINT16_C(29221), UINT16_C(62182), UINT16_C( 6639), UINT16_C(21369), UINT16_C(17297), UINT16_C(44780), UINT16_C(16901), UINT16_C(54718), UINT16_C(62530), UINT16_C(28647), UINT16_C(35875), UINT16_C(53853), UINT16_C(45777), UINT16_C( 7873), UINT16_C(30691), UINT16_C( 2296), UINT16_C(57065), UINT16_C(55546), UINT16_C(29688), UINT16_C(35116), UINT16_C( 6326) }, { UINT16_C(46832), UINT16_MAX, UINT16_C(50615), UINT16_C(20314), UINT16_C(64843), UINT16_C(25438), UINT16_C( 0), UINT16_C(55552), UINT16_C( 0), UINT16_C(61172), UINT16_MAX, UINT16_C(63150), UINT16_C( 0), UINT16_C( 0), UINT16_C(31290), UINT16_MAX, UINT16_C(62000), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C(47461), UINT16_C( 0), UINT16_MAX, UINT16_C(34656), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(61893), UINT16_MAX, UINT16_C(36890) } }, { UINT32_C(4116364087), { UINT16_C(40336), UINT16_C(30697), UINT16_C( 3084), UINT16_C(26883), UINT16_C(54751), UINT16_C(40988), UINT16_C(65523), UINT16_C(60183), UINT16_C( 263), UINT16_C( 458), UINT16_C(49881), UINT16_C( 1396), UINT16_C(10827), UINT16_C(33309), UINT16_C(30949), UINT16_C(30071), UINT16_C(24853), UINT16_C( 8684), UINT16_C(61293), UINT16_C(19595), UINT16_C(42948), UINT16_C(47340), UINT16_C( 1190), UINT16_C(44451), UINT16_C(27909), UINT16_C(57006), UINT16_C( 8751), UINT16_C(31460), UINT16_C( 332), UINT16_C(12796), UINT16_C(29817), UINT16_C(36518) }, { UINT16_C(37589), UINT16_C(17072), UINT16_C(15233), UINT16_C(18063), UINT16_C(31714), UINT16_C(35070), UINT16_C(41343), UINT16_C(33845), UINT16_C(58127), UINT16_C(15971), UINT16_C(18181), UINT16_C(20921), UINT16_C(46408), UINT16_C(49794), UINT16_C(10281), UINT16_C(65104), UINT16_C( 186), UINT16_C(15169), UINT16_C(53307), UINT16_C( 7553), UINT16_C(32587), UINT16_C(52133), UINT16_C(55841), UINT16_C(12367), UINT16_C(45757), UINT16_C(49774), UINT16_C(10233), UINT16_C(16915), UINT16_C(38365), UINT16_C( 1540), UINT16_C(21693), UINT16_C(30469) }, { UINT16_MAX, UINT16_C(47769), UINT16_C(18317), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C(58390), UINT16_C(16429), UINT16_C( 0), UINT16_C(22317), UINT16_C(57235), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C(23853), UINT16_C( 0), UINT16_C(27148), UINT16_MAX, UINT16_C( 0), UINT16_C(57031), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C(18984), UINT16_C( 0), UINT16_C(38697), UINT16_C(14336), UINT16_C(51510), UINT16_MAX } }, { UINT32_C(2427668053), { UINT16_C(13334), UINT16_C(25006), UINT16_C(21428), UINT16_C(54572), UINT16_C(31790), UINT16_C(60165), UINT16_C(29486), UINT16_C(10414), UINT16_C(49563), UINT16_C(30826), UINT16_C(28247), UINT16_C( 5246), UINT16_C(33730), UINT16_C( 6028), UINT16_C(16329), UINT16_C(57256), UINT16_C(22131), UINT16_C(10049), UINT16_C(28073), UINT16_C(55292), UINT16_C( 489), UINT16_C( 6339), UINT16_C(29045), UINT16_C( 4160), UINT16_C(43570), UINT16_C(35208), UINT16_C( 1560), UINT16_C(55966), UINT16_C(10890), UINT16_C(21490), UINT16_C(39529), UINT16_C(56371) }, { UINT16_C(29936), UINT16_C(39172), UINT16_C( 225), UINT16_C(52081), UINT16_C(13314), UINT16_C(30691), UINT16_C( 9125), UINT16_C(55175), UINT16_C( 4045), UINT16_C(58721), UINT16_C(65301), UINT16_C(40895), UINT16_C(45353), UINT16_C(37619), UINT16_C( 9803), UINT16_C(15214), UINT16_C(29338), UINT16_C(31701), UINT16_C(18035), UINT16_C(30022), UINT16_C(10618), UINT16_C( 8172), UINT16_C(29516), UINT16_C( 6646), UINT16_C(22402), UINT16_C(38910), UINT16_C(48726), UINT16_C(32567), UINT16_C(10863), UINT16_C(47889), UINT16_C(32848), UINT16_C(60150) }, { UINT16_C(43270), UINT16_C( 0), UINT16_C(21653), UINT16_C( 0), UINT16_C(45104), UINT16_C( 0), UINT16_C(38611), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(26132), UINT16_C( 0), UINT16_C(51469), UINT16_C(41750), UINT16_C( 0), UINT16_C( 0), UINT16_C(11107), UINT16_C(14511), UINT16_C( 0), UINT16_C(10806), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(21753), UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, { UINT32_C(1701170162), { UINT16_C(44049), UINT16_C(35802), UINT16_C(50901), UINT16_C( 8874), UINT16_C(41273), UINT16_C(47931), UINT16_C(15096), UINT16_C(20307), UINT16_C(35576), UINT16_C(26574), UINT16_C(57524), UINT16_C( 1058), UINT16_C( 6496), UINT16_C(21230), UINT16_C(21476), UINT16_C(63160), UINT16_C(37631), UINT16_C(54657), UINT16_C(11353), UINT16_C(37623), UINT16_C(13005), UINT16_C(50510), UINT16_C(41324), UINT16_C(25620), UINT16_C(58155), UINT16_C(57292), UINT16_C(61123), UINT16_C( 9187), UINT16_C(53511), UINT16_C(60533), UINT16_C(11556), UINT16_C( 9442) }, { UINT16_C(25536), UINT16_C( 6649), UINT16_C(61583), UINT16_C(23723), UINT16_C(63778), UINT16_C(36642), UINT16_C(13978), UINT16_C(50675), UINT16_C(48921), UINT16_C(56484), UINT16_C(34734), UINT16_C(46591), UINT16_C(30040), UINT16_C(32161), UINT16_C(33698), UINT16_C(25249), UINT16_C(39655), UINT16_C(30331), UINT16_C(10122), UINT16_C(44243), UINT16_C(62752), UINT16_C(47931), UINT16_C(12075), UINT16_C(17792), UINT16_C( 9710), UINT16_C(39969), UINT16_C( 8620), UINT16_C( 1362), UINT16_C(62358), UINT16_C(14466), UINT16_C( 9079), UINT16_C(24219) }, { UINT16_C( 0), UINT16_C(42451), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C(29074), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C(47649), UINT16_C( 0), UINT16_C( 0), UINT16_C(55174), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C(21475), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C(53399), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C(20635), UINT16_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_maskz_adds_epu16(test_vec[i].k, a, b); simde_test_x86_assert_equal_u16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_adds_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_adds_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_adds_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_adds_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_adds_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_adds_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_adds_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_adds_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_adds_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_adds_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_adds_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_adds_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_adds_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_adds_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_adds_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_adds_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_adds_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_adds_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_adds_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_adds_epu16) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/and.c000066400000000000000000004601541400333146700161740ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN and #include #include #include static int test_simde_mm512_and_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__m512 b; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( 260.00), SIMDE_FLOAT32_C( 472.07), SIMDE_FLOAT32_C( 343.37), SIMDE_FLOAT32_C( 668.63), SIMDE_FLOAT32_C( 74.64), SIMDE_FLOAT32_C( -166.33), SIMDE_FLOAT32_C( 962.01), SIMDE_FLOAT32_C( 120.25), SIMDE_FLOAT32_C( -633.54), SIMDE_FLOAT32_C( -160.44), SIMDE_FLOAT32_C( -754.35), SIMDE_FLOAT32_C( 920.06), SIMDE_FLOAT32_C( -752.65), SIMDE_FLOAT32_C( -15.27), SIMDE_FLOAT32_C( 736.97), SIMDE_FLOAT32_C( 591.25)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 968.14), SIMDE_FLOAT32_C( 728.35), SIMDE_FLOAT32_C( -574.47), SIMDE_FLOAT32_C( 770.03), SIMDE_FLOAT32_C( -456.43), SIMDE_FLOAT32_C( 727.04), SIMDE_FLOAT32_C( -89.84), SIMDE_FLOAT32_C( 288.08), SIMDE_FLOAT32_C( -720.94), SIMDE_FLOAT32_C( -964.02), SIMDE_FLOAT32_C( 974.54), SIMDE_FLOAT32_C( -246.99), SIMDE_FLOAT32_C( -603.24), SIMDE_FLOAT32_C( -592.85), SIMDE_FLOAT32_C( -351.71), SIMDE_FLOAT32_C( 472.50)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 2.03), SIMDE_FLOAT32_C( 2.56), SIMDE_FLOAT32_C( 2.18), SIMDE_FLOAT32_C( 512.00), SIMDE_FLOAT32_C( 66.01), SIMDE_FLOAT32_C( 2.57), SIMDE_FLOAT32_C( 2.76), SIMDE_FLOAT32_C( 72.00), SIMDE_FLOAT32_C( -592.50), SIMDE_FLOAT32_C( -2.50), SIMDE_FLOAT32_C( 706.03), SIMDE_FLOAT32_C( 3.59), SIMDE_FLOAT32_C( -592.14), SIMDE_FLOAT32_C( -2.31), SIMDE_FLOAT32_C( 2.63), SIMDE_FLOAT32_C( 2.00)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -440.60), SIMDE_FLOAT32_C( 215.95), SIMDE_FLOAT32_C( -449.65), SIMDE_FLOAT32_C( 426.70), SIMDE_FLOAT32_C( 107.08), SIMDE_FLOAT32_C( -345.64), SIMDE_FLOAT32_C( 226.40), SIMDE_FLOAT32_C( 712.58), SIMDE_FLOAT32_C( -396.23), SIMDE_FLOAT32_C( -256.01), SIMDE_FLOAT32_C( 622.69), SIMDE_FLOAT32_C( -188.83), SIMDE_FLOAT32_C( 358.20), SIMDE_FLOAT32_C( -542.16), SIMDE_FLOAT32_C( 982.13), SIMDE_FLOAT32_C( 702.83)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 347.10), SIMDE_FLOAT32_C( -175.94), SIMDE_FLOAT32_C( 817.30), SIMDE_FLOAT32_C( -721.72), SIMDE_FLOAT32_C( 775.39), SIMDE_FLOAT32_C( -218.71), SIMDE_FLOAT32_C( 919.20), SIMDE_FLOAT32_C( -300.97), SIMDE_FLOAT32_C( 919.48), SIMDE_FLOAT32_C( -61.84), SIMDE_FLOAT32_C( 121.47), SIMDE_FLOAT32_C( 499.98), SIMDE_FLOAT32_C( 538.40), SIMDE_FLOAT32_C( -622.49), SIMDE_FLOAT32_C( -852.24), SIMDE_FLOAT32_C( 445.35)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 280.10), SIMDE_FLOAT32_C( 135.94), SIMDE_FLOAT32_C( 3.01), SIMDE_FLOAT32_C( 2.32), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( -136.57), SIMDE_FLOAT32_C( 3.50), SIMDE_FLOAT32_C( 2.28), SIMDE_FLOAT32_C( 3.06), SIMDE_FLOAT32_C( -32.00), SIMDE_FLOAT32_C( 2.29), SIMDE_FLOAT32_C( 184.83), SIMDE_FLOAT32_C( 2.03), SIMDE_FLOAT32_C( -526.16), SIMDE_FLOAT32_C( 852.13), SIMDE_FLOAT32_C( 2.23)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -590.91), SIMDE_FLOAT32_C( -663.88), SIMDE_FLOAT32_C( -394.32), SIMDE_FLOAT32_C( -991.87), SIMDE_FLOAT32_C( 385.94), SIMDE_FLOAT32_C( -349.46), SIMDE_FLOAT32_C( -786.25), SIMDE_FLOAT32_C( 192.19), SIMDE_FLOAT32_C( -594.16), SIMDE_FLOAT32_C( -602.03), SIMDE_FLOAT32_C( 176.16), SIMDE_FLOAT32_C( -458.14), SIMDE_FLOAT32_C( 335.26), SIMDE_FLOAT32_C( -272.70), SIMDE_FLOAT32_C( 585.90), SIMDE_FLOAT32_C( -571.61)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 530.04), SIMDE_FLOAT32_C( -606.85), SIMDE_FLOAT32_C( 112.20), SIMDE_FLOAT32_C( -437.59), SIMDE_FLOAT32_C( -396.36), SIMDE_FLOAT32_C( -280.58), SIMDE_FLOAT32_C( 819.31), SIMDE_FLOAT32_C( -726.73), SIMDE_FLOAT32_C( -263.24), SIMDE_FLOAT32_C( -511.40), SIMDE_FLOAT32_C( -175.25), SIMDE_FLOAT32_C( 728.37), SIMDE_FLOAT32_C( 881.16), SIMDE_FLOAT32_C( -49.97), SIMDE_FLOAT32_C( 618.76), SIMDE_FLOAT32_C( -518.70)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 514.03), SIMDE_FLOAT32_C( -534.75), SIMDE_FLOAT32_C( 96.06), SIMDE_FLOAT32_C( -3.29), SIMDE_FLOAT32_C( 384.31), SIMDE_FLOAT32_C( -280.08), SIMDE_FLOAT32_C( 786.25), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( -2.01), SIMDE_FLOAT32_C( -2.35), SIMDE_FLOAT32_C( 160.00), SIMDE_FLOAT32_C( 2.56), SIMDE_FLOAT32_C( 2.06), SIMDE_FLOAT32_C( -32.06), SIMDE_FLOAT32_C( 584.76), SIMDE_FLOAT32_C( -514.56)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 515.17), SIMDE_FLOAT32_C( 324.54), SIMDE_FLOAT32_C( 91.18), SIMDE_FLOAT32_C( -165.19), SIMDE_FLOAT32_C( -882.22), SIMDE_FLOAT32_C( 833.89), SIMDE_FLOAT32_C( 476.02), SIMDE_FLOAT32_C( 887.60), SIMDE_FLOAT32_C( 229.74), SIMDE_FLOAT32_C( 342.64), SIMDE_FLOAT32_C( 541.23), SIMDE_FLOAT32_C( -642.89), SIMDE_FLOAT32_C( 701.90), SIMDE_FLOAT32_C( 393.90), SIMDE_FLOAT32_C( -103.65), SIMDE_FLOAT32_C( 243.25)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 21.91), SIMDE_FLOAT32_C( -134.28), SIMDE_FLOAT32_C( 125.14), SIMDE_FLOAT32_C( -667.85), SIMDE_FLOAT32_C( -778.80), SIMDE_FLOAT32_C( -220.75), SIMDE_FLOAT32_C( 348.36), SIMDE_FLOAT32_C( 29.88), SIMDE_FLOAT32_C( -634.89), SIMDE_FLOAT32_C( -148.88), SIMDE_FLOAT32_C( 827.50), SIMDE_FLOAT32_C( -532.87), SIMDE_FLOAT32_C( -762.33), SIMDE_FLOAT32_C( 247.69), SIMDE_FLOAT32_C( -238.64), SIMDE_FLOAT32_C( 244.40)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 130.27), SIMDE_FLOAT32_C( 89.13), SIMDE_FLOAT32_C( -2.57), SIMDE_FLOAT32_C( -770.03), SIMDE_FLOAT32_C( 3.25), SIMDE_FLOAT32_C( 348.02), SIMDE_FLOAT32_C( 3.20), SIMDE_FLOAT32_C( 2.07), SIMDE_FLOAT32_C( 128.26), SIMDE_FLOAT32_C( 537.00), SIMDE_FLOAT32_C( -512.76), SIMDE_FLOAT32_C( 696.27), SIMDE_FLOAT32_C( 196.69), SIMDE_FLOAT32_C( -51.50), SIMDE_FLOAT32_C( 240.25)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 139.73), SIMDE_FLOAT32_C( 808.22), SIMDE_FLOAT32_C( -888.67), SIMDE_FLOAT32_C( -90.81), SIMDE_FLOAT32_C( 58.51), SIMDE_FLOAT32_C( -297.55), SIMDE_FLOAT32_C( -246.77), SIMDE_FLOAT32_C( -391.18), SIMDE_FLOAT32_C( 887.15), SIMDE_FLOAT32_C( 997.52), SIMDE_FLOAT32_C( 873.12), SIMDE_FLOAT32_C( -969.73), SIMDE_FLOAT32_C( 721.30), SIMDE_FLOAT32_C( -128.28), SIMDE_FLOAT32_C( -264.35), SIMDE_FLOAT32_C( -432.42)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -80.65), SIMDE_FLOAT32_C( -15.71), SIMDE_FLOAT32_C( 54.64), SIMDE_FLOAT32_C( -420.79), SIMDE_FLOAT32_C( -573.45), SIMDE_FLOAT32_C( 578.20), SIMDE_FLOAT32_C( -393.34), SIMDE_FLOAT32_C( -79.47), SIMDE_FLOAT32_C( -837.77), SIMDE_FLOAT32_C( 169.23), SIMDE_FLOAT32_C( 110.87), SIMDE_FLOAT32_C( 428.31), SIMDE_FLOAT32_C( 944.93), SIMDE_FLOAT32_C( 222.75), SIMDE_FLOAT32_C( -792.23), SIMDE_FLOAT32_C( -269.27)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 32.26), SIMDE_FLOAT32_C( 3.16), SIMDE_FLOAT32_C( 3.41), SIMDE_FLOAT32_C( -72.01), SIMDE_FLOAT32_C( 2.16), SIMDE_FLOAT32_C( 2.26), SIMDE_FLOAT32_C( -196.50), SIMDE_FLOAT32_C( -65.28), SIMDE_FLOAT32_C( 837.02), SIMDE_FLOAT32_C( 2.64), SIMDE_FLOAT32_C( 3.38), SIMDE_FLOAT32_C( 3.28), SIMDE_FLOAT32_C( 656.30), SIMDE_FLOAT32_C( 128.25), SIMDE_FLOAT32_C( -2.06), SIMDE_FLOAT32_C( -256.25)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -807.89), SIMDE_FLOAT32_C( -195.99), SIMDE_FLOAT32_C( -460.22), SIMDE_FLOAT32_C( -214.31), SIMDE_FLOAT32_C( -242.49), SIMDE_FLOAT32_C( 293.67), SIMDE_FLOAT32_C( 209.36), SIMDE_FLOAT32_C( -28.16), SIMDE_FLOAT32_C( 861.78), SIMDE_FLOAT32_C( -349.18), SIMDE_FLOAT32_C( -840.98), SIMDE_FLOAT32_C( 667.88), SIMDE_FLOAT32_C( -431.60), SIMDE_FLOAT32_C( -312.68), SIMDE_FLOAT32_C( 469.25), SIMDE_FLOAT32_C( 584.01)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 881.42), SIMDE_FLOAT32_C( 703.51), SIMDE_FLOAT32_C( 112.91), SIMDE_FLOAT32_C( 92.78), SIMDE_FLOAT32_C( 506.44), SIMDE_FLOAT32_C( 923.94), SIMDE_FLOAT32_C( -577.40), SIMDE_FLOAT32_C( -437.14), SIMDE_FLOAT32_C( -379.29), SIMDE_FLOAT32_C( 791.05), SIMDE_FLOAT32_C( 859.09), SIMDE_FLOAT32_C( 612.11), SIMDE_FLOAT32_C( 687.78), SIMDE_FLOAT32_C( 712.98), SIMDE_FLOAT32_C( -143.15), SIMDE_FLOAT32_C( -972.86)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 801.39), SIMDE_FLOAT32_C( 2.06), SIMDE_FLOAT32_C( 112.03), SIMDE_FLOAT32_C( 36.01), SIMDE_FLOAT32_C( 240.22), SIMDE_FLOAT32_C( 2.04), SIMDE_FLOAT32_C( 2.26), SIMDE_FLOAT32_C( -24.00), SIMDE_FLOAT32_C( 2.33), SIMDE_FLOAT32_C( 2.07), SIMDE_FLOAT32_C( 840.07), SIMDE_FLOAT32_C( 512.00), SIMDE_FLOAT32_C( 2.06), SIMDE_FLOAT32_C( 2.25), SIMDE_FLOAT32_C( 138.12), SIMDE_FLOAT32_C( 584.00)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 278.01), SIMDE_FLOAT32_C( -815.62), SIMDE_FLOAT32_C( 752.91), SIMDE_FLOAT32_C( 710.22), SIMDE_FLOAT32_C( -124.40), SIMDE_FLOAT32_C( -338.82), SIMDE_FLOAT32_C( -853.49), SIMDE_FLOAT32_C( 731.62), SIMDE_FLOAT32_C( 168.07), SIMDE_FLOAT32_C( -402.61), SIMDE_FLOAT32_C( -908.62), SIMDE_FLOAT32_C( 912.24), SIMDE_FLOAT32_C( 241.90), SIMDE_FLOAT32_C( 493.82), SIMDE_FLOAT32_C( -948.44), SIMDE_FLOAT32_C( 522.79)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -401.28), SIMDE_FLOAT32_C( 383.51), SIMDE_FLOAT32_C( -854.57), SIMDE_FLOAT32_C( -237.48), SIMDE_FLOAT32_C( -426.55), SIMDE_FLOAT32_C( -605.26), SIMDE_FLOAT32_C( 140.00), SIMDE_FLOAT32_C( -626.79), SIMDE_FLOAT32_C( 473.63), SIMDE_FLOAT32_C( 968.53), SIMDE_FLOAT32_C( -767.62), SIMDE_FLOAT32_C( -339.51), SIMDE_FLOAT32_C( 144.17), SIMDE_FLOAT32_C( -47.64), SIMDE_FLOAT32_C( -130.89), SIMDE_FLOAT32_C( -19.38)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 272.01), SIMDE_FLOAT32_C( 2.18), SIMDE_FLOAT32_C( 592.50), SIMDE_FLOAT32_C( 2.52), SIMDE_FLOAT32_C( -104.13), SIMDE_FLOAT32_C( -2.02), SIMDE_FLOAT32_C( 2.06), SIMDE_FLOAT32_C( 594.54), SIMDE_FLOAT32_C( 168.07), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( -652.62), SIMDE_FLOAT32_C( 2.50), SIMDE_FLOAT32_C( 144.13), SIMDE_FLOAT32_C( 45.63), SIMDE_FLOAT32_C( -2.00), SIMDE_FLOAT32_C( 2.03)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 491.53), SIMDE_FLOAT32_C( 985.75), SIMDE_FLOAT32_C( -390.64), SIMDE_FLOAT32_C( 517.90), SIMDE_FLOAT32_C( -725.16), SIMDE_FLOAT32_C( 9.87), SIMDE_FLOAT32_C( 943.82), SIMDE_FLOAT32_C( 279.49), SIMDE_FLOAT32_C( -942.01), SIMDE_FLOAT32_C( 63.94), SIMDE_FLOAT32_C( 920.28), SIMDE_FLOAT32_C( 132.72), SIMDE_FLOAT32_C( 502.41), SIMDE_FLOAT32_C( 855.02), SIMDE_FLOAT32_C( 610.59), SIMDE_FLOAT32_C( 860.61)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -675.66), SIMDE_FLOAT32_C( 128.22), SIMDE_FLOAT32_C( -915.29), SIMDE_FLOAT32_C( -679.65), SIMDE_FLOAT32_C( 537.51), SIMDE_FLOAT32_C( -484.11), SIMDE_FLOAT32_C( 502.40), SIMDE_FLOAT32_C( -785.39), SIMDE_FLOAT32_C( -128.17), SIMDE_FLOAT32_C( 101.31), SIMDE_FLOAT32_C( -990.73), SIMDE_FLOAT32_C( -514.82), SIMDE_FLOAT32_C( 231.21), SIMDE_FLOAT32_C( 964.21), SIMDE_FLOAT32_C( -258.81), SIMDE_FLOAT32_C( 355.88)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 2.51), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( -3.00), SIMDE_FLOAT32_C( 517.65), SIMDE_FLOAT32_C( 529.00), SIMDE_FLOAT32_C( 9.00), SIMDE_FLOAT32_C( 3.67), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( -2.00), SIMDE_FLOAT32_C( 50.63), SIMDE_FLOAT32_C( 920.01), SIMDE_FLOAT32_C( 2.01), SIMDE_FLOAT32_C( 227.20), SIMDE_FLOAT32_C( 836.02), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.27)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_and_ps(test_vec[i].a, test_vec[i].b); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_and_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d a; simde__m512d b; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( 266.26), SIMDE_FLOAT64_C( 537.32), SIMDE_FLOAT64_C( -326.88), SIMDE_FLOAT64_C( -882.50), SIMDE_FLOAT64_C( -89.28), SIMDE_FLOAT64_C( -631.60), SIMDE_FLOAT64_C( -243.67), SIMDE_FLOAT64_C( 78.08)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -676.48), SIMDE_FLOAT64_C( -545.20), SIMDE_FLOAT64_C( 963.41), SIMDE_FLOAT64_C( 343.81), SIMDE_FLOAT64_C( -406.87), SIMDE_FLOAT64_C( -689.93), SIMDE_FLOAT64_C( -169.12), SIMDE_FLOAT64_C( -796.89)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.02), SIMDE_FLOAT64_C( 513.07), SIMDE_FLOAT64_C( 2.50), SIMDE_FLOAT64_C( 2.13), SIMDE_FLOAT64_C( -65.03), SIMDE_FLOAT64_C( -561.53), SIMDE_FLOAT64_C( -161.04), SIMDE_FLOAT64_C( 2.06)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -354.59), SIMDE_FLOAT64_C( -334.01), SIMDE_FLOAT64_C( -406.82), SIMDE_FLOAT64_C( -535.93), SIMDE_FLOAT64_C( 534.72), SIMDE_FLOAT64_C( 276.86), SIMDE_FLOAT64_C( 401.00), SIMDE_FLOAT64_C( 921.82)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -20.44), SIMDE_FLOAT64_C( -778.21), SIMDE_FLOAT64_C( -61.28), SIMDE_FLOAT64_C( 788.42), SIMDE_FLOAT64_C( 286.07), SIMDE_FLOAT64_C( 772.65), SIMDE_FLOAT64_C( -788.54), SIMDE_FLOAT64_C( 755.47)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -20.13), SIMDE_FLOAT64_C( -2.03), SIMDE_FLOAT64_C( -48.26), SIMDE_FLOAT64_C( 532.41), SIMDE_FLOAT64_C( 2.08), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 3.00), SIMDE_FLOAT64_C( 657.31)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 197.60), SIMDE_FLOAT64_C( -669.73), SIMDE_FLOAT64_C( 859.82), SIMDE_FLOAT64_C( -638.20), SIMDE_FLOAT64_C( -808.24), SIMDE_FLOAT64_C( 961.25), SIMDE_FLOAT64_C( 916.37), SIMDE_FLOAT64_C( -473.47)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.21), SIMDE_FLOAT64_C( 10.30), SIMDE_FLOAT64_C( 402.59), SIMDE_FLOAT64_C( -919.31), SIMDE_FLOAT64_C( 484.80), SIMDE_FLOAT64_C( 567.35), SIMDE_FLOAT64_C( -979.89), SIMDE_FLOAT64_C( 784.39)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.08), SIMDE_FLOAT64_C( 2.57), SIMDE_FLOAT64_C( 3.00), SIMDE_FLOAT64_C( -534.01), SIMDE_FLOAT64_C( 3.03), SIMDE_FLOAT64_C( 513.25), SIMDE_FLOAT64_C( 912.26), SIMDE_FLOAT64_C( 3.06)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 188.83), SIMDE_FLOAT64_C( -479.33), SIMDE_FLOAT64_C( 811.81), SIMDE_FLOAT64_C( -322.50), SIMDE_FLOAT64_C( 884.11), SIMDE_FLOAT64_C( 808.53), SIMDE_FLOAT64_C( -174.95), SIMDE_FLOAT64_C( -68.05)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -882.86), SIMDE_FLOAT64_C( -939.93), SIMDE_FLOAT64_C( -855.90), SIMDE_FLOAT64_C( 170.22), SIMDE_FLOAT64_C( 115.99), SIMDE_FLOAT64_C( 297.62), SIMDE_FLOAT64_C( -527.76), SIMDE_FLOAT64_C( 219.88)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.45), SIMDE_FLOAT64_C( -3.67), SIMDE_FLOAT64_C( 771.77), SIMDE_FLOAT64_C( 160.00), SIMDE_FLOAT64_C( 3.08), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( -2.05), SIMDE_FLOAT64_C( 34.00)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -659.02), SIMDE_FLOAT64_C( 607.91), SIMDE_FLOAT64_C( -268.25), SIMDE_FLOAT64_C( 240.07), SIMDE_FLOAT64_C( 471.39), SIMDE_FLOAT64_C( -501.59), SIMDE_FLOAT64_C( 984.94), SIMDE_FLOAT64_C( -801.62)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -165.03), SIMDE_FLOAT64_C( 382.49), SIMDE_FLOAT64_C( -663.11), SIMDE_FLOAT64_C( 675.92), SIMDE_FLOAT64_C( -427.89), SIMDE_FLOAT64_C( -312.23), SIMDE_FLOAT64_C( 47.19), SIMDE_FLOAT64_C( -273.76)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -2.56), SIMDE_FLOAT64_C( 2.36), SIMDE_FLOAT64_C( -2.06), SIMDE_FLOAT64_C( 2.50), SIMDE_FLOAT64_C( 387.39), SIMDE_FLOAT64_C( -304.07), SIMDE_FLOAT64_C( 2.81), SIMDE_FLOAT64_C( -2.13)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -449.51), SIMDE_FLOAT64_C( -396.24), SIMDE_FLOAT64_C( -106.23), SIMDE_FLOAT64_C( -648.77), SIMDE_FLOAT64_C( 178.69), SIMDE_FLOAT64_C( -996.05), SIMDE_FLOAT64_C( 315.07), SIMDE_FLOAT64_C( -247.28)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 515.02), SIMDE_FLOAT64_C( 228.66), SIMDE_FLOAT64_C( 419.85), SIMDE_FLOAT64_C( -810.27), SIMDE_FLOAT64_C( 162.64), SIMDE_FLOAT64_C( 495.48), SIMDE_FLOAT64_C( -567.27), SIMDE_FLOAT64_C( 755.82)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.01), SIMDE_FLOAT64_C( 196.03), SIMDE_FLOAT64_C( 104.20), SIMDE_FLOAT64_C( -520.27), SIMDE_FLOAT64_C( 162.63), SIMDE_FLOAT64_C( 3.77), SIMDE_FLOAT64_C( 2.21), SIMDE_FLOAT64_C( 2.82)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -506.18), SIMDE_FLOAT64_C( 153.12), SIMDE_FLOAT64_C( -217.93), SIMDE_FLOAT64_C( 6.73), SIMDE_FLOAT64_C( 358.11), SIMDE_FLOAT64_C( -136.37), SIMDE_FLOAT64_C( 141.08), SIMDE_FLOAT64_C( -860.28)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -303.86), SIMDE_FLOAT64_C( -938.78), SIMDE_FLOAT64_C( 386.83), SIMDE_FLOAT64_C( -590.09), SIMDE_FLOAT64_C( -517.39), SIMDE_FLOAT64_C( -324.41), SIMDE_FLOAT64_C( 515.48), SIMDE_FLOAT64_C( 674.62)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -298.05), SIMDE_FLOAT64_C( 2.13), SIMDE_FLOAT64_C( 193.41), SIMDE_FLOAT64_C( 2.30), SIMDE_FLOAT64_C( 2.02), SIMDE_FLOAT64_C( -128.08), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 512.03)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -571.57), SIMDE_FLOAT64_C( -865.77), SIMDE_FLOAT64_C( -691.63), SIMDE_FLOAT64_C( -182.56), SIMDE_FLOAT64_C( -67.70), SIMDE_FLOAT64_C( -166.11), SIMDE_FLOAT64_C( -833.08), SIMDE_FLOAT64_C( -401.07)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 341.96), SIMDE_FLOAT64_C( 615.56), SIMDE_FLOAT64_C( 144.45), SIMDE_FLOAT64_C( 211.78), SIMDE_FLOAT64_C( -86.51), SIMDE_FLOAT64_C( 594.64), SIMDE_FLOAT64_C( 523.21), SIMDE_FLOAT64_C( -747.41)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.17), SIMDE_FLOAT64_C( 609.52), SIMDE_FLOAT64_C( 2.01), SIMDE_FLOAT64_C( 146.53), SIMDE_FLOAT64_C( -66.51), SIMDE_FLOAT64_C( 2.06), SIMDE_FLOAT64_C( 513.08), SIMDE_FLOAT64_C( -2.13)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_and_pd(test_vec[i].a, test_vec[i].b); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_and_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 src; simde__mmask16 k; simde__m512 a; simde__m512 b; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -343.60), SIMDE_FLOAT32_C( -192.26), SIMDE_FLOAT32_C( -375.10), SIMDE_FLOAT32_C( 810.28), SIMDE_FLOAT32_C( -388.15), SIMDE_FLOAT32_C( 15.81), SIMDE_FLOAT32_C( 547.95), SIMDE_FLOAT32_C( 151.06), SIMDE_FLOAT32_C( -920.74), SIMDE_FLOAT32_C( -676.14), SIMDE_FLOAT32_C( -545.26), SIMDE_FLOAT32_C( -14.56), SIMDE_FLOAT32_C( -393.14), SIMDE_FLOAT32_C( 768.60), SIMDE_FLOAT32_C( -177.89), SIMDE_FLOAT32_C( -467.51)), UINT16_C(45944), simde_mm512_set_ps(SIMDE_FLOAT32_C( -651.37), SIMDE_FLOAT32_C( 37.95), SIMDE_FLOAT32_C( -182.79), SIMDE_FLOAT32_C( 255.51), SIMDE_FLOAT32_C( 476.70), SIMDE_FLOAT32_C( 371.61), SIMDE_FLOAT32_C( -494.45), SIMDE_FLOAT32_C( 72.18), SIMDE_FLOAT32_C( -723.25), SIMDE_FLOAT32_C( 604.60), SIMDE_FLOAT32_C( 545.32), SIMDE_FLOAT32_C( -399.73), SIMDE_FLOAT32_C( -975.39), SIMDE_FLOAT32_C( 419.30), SIMDE_FLOAT32_C( -736.37), SIMDE_FLOAT32_C( 655.70)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -330.93), SIMDE_FLOAT32_C( 963.24), SIMDE_FLOAT32_C( -62.45), SIMDE_FLOAT32_C( 625.74), SIMDE_FLOAT32_C( -826.45), SIMDE_FLOAT32_C( -884.51), SIMDE_FLOAT32_C( 544.59), SIMDE_FLOAT32_C( -22.39), SIMDE_FLOAT32_C( 750.16), SIMDE_FLOAT32_C( -751.51), SIMDE_FLOAT32_C( -211.00), SIMDE_FLOAT32_C( 886.29), SIMDE_FLOAT32_C( 666.91), SIMDE_FLOAT32_C( 8.70), SIMDE_FLOAT32_C( -362.66), SIMDE_FLOAT32_C( -451.03)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -2.51), SIMDE_FLOAT32_C( -192.26), SIMDE_FLOAT32_C( -44.20), SIMDE_FLOAT32_C( 2.44), SIMDE_FLOAT32_C( -388.15), SIMDE_FLOAT32_C( 15.81), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 4.50), SIMDE_FLOAT32_C( -920.74), SIMDE_FLOAT32_C( 588.50), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 3.09), SIMDE_FLOAT32_C( 650.38), SIMDE_FLOAT32_C( 768.60), SIMDE_FLOAT32_C( -177.89), SIMDE_FLOAT32_C( -467.51)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -702.08), SIMDE_FLOAT32_C( 457.26), SIMDE_FLOAT32_C( 193.10), SIMDE_FLOAT32_C( 655.72), SIMDE_FLOAT32_C( 205.91), SIMDE_FLOAT32_C( 807.77), SIMDE_FLOAT32_C( -545.40), SIMDE_FLOAT32_C( -364.12), SIMDE_FLOAT32_C( -42.22), SIMDE_FLOAT32_C( -523.42), SIMDE_FLOAT32_C( -308.90), SIMDE_FLOAT32_C( 22.20), SIMDE_FLOAT32_C( -114.47), SIMDE_FLOAT32_C( -738.11), SIMDE_FLOAT32_C( 189.09), SIMDE_FLOAT32_C( -448.58)), UINT16_C(10313), simde_mm512_set_ps(SIMDE_FLOAT32_C( -177.43), SIMDE_FLOAT32_C( -28.38), SIMDE_FLOAT32_C( -846.37), SIMDE_FLOAT32_C( 912.26), SIMDE_FLOAT32_C( -370.39), SIMDE_FLOAT32_C( 988.78), SIMDE_FLOAT32_C( -359.74), SIMDE_FLOAT32_C( -281.72), SIMDE_FLOAT32_C( 166.18), SIMDE_FLOAT32_C( -100.50), SIMDE_FLOAT32_C( -909.51), SIMDE_FLOAT32_C( -85.95), SIMDE_FLOAT32_C( -710.91), SIMDE_FLOAT32_C( -813.11), SIMDE_FLOAT32_C( -799.86), SIMDE_FLOAT32_C( -823.45)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 329.92), SIMDE_FLOAT32_C( 113.21), SIMDE_FLOAT32_C( 300.37), SIMDE_FLOAT32_C( -777.20), SIMDE_FLOAT32_C( 193.77), SIMDE_FLOAT32_C( -864.32), SIMDE_FLOAT32_C( 579.99), SIMDE_FLOAT32_C( 488.59), SIMDE_FLOAT32_C( -684.28), SIMDE_FLOAT32_C( -65.28), SIMDE_FLOAT32_C( 876.26), SIMDE_FLOAT32_C( 378.65), SIMDE_FLOAT32_C( -964.10), SIMDE_FLOAT32_C( 626.06), SIMDE_FLOAT32_C( 97.19), SIMDE_FLOAT32_C( 612.33)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -702.08), SIMDE_FLOAT32_C( 457.26), SIMDE_FLOAT32_C( 2.28), SIMDE_FLOAT32_C( 655.72), SIMDE_FLOAT32_C( 129.00), SIMDE_FLOAT32_C( 807.77), SIMDE_FLOAT32_C( -545.40), SIMDE_FLOAT32_C( -364.12), SIMDE_FLOAT32_C( -42.22), SIMDE_FLOAT32_C( -64.00), SIMDE_FLOAT32_C( -308.90), SIMDE_FLOAT32_C( 22.20), SIMDE_FLOAT32_C( -708.03), SIMDE_FLOAT32_C( -738.11), SIMDE_FLOAT32_C( 189.09), SIMDE_FLOAT32_C( 548.31)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 153.13), SIMDE_FLOAT32_C( 47.64), SIMDE_FLOAT32_C( -104.37), SIMDE_FLOAT32_C( -453.36), SIMDE_FLOAT32_C( -57.32), SIMDE_FLOAT32_C( -673.06), SIMDE_FLOAT32_C( -857.97), SIMDE_FLOAT32_C( -158.69), SIMDE_FLOAT32_C( 504.22), SIMDE_FLOAT32_C( 774.61), SIMDE_FLOAT32_C( -50.26), SIMDE_FLOAT32_C( -594.62), SIMDE_FLOAT32_C( 628.86), SIMDE_FLOAT32_C( 362.00), SIMDE_FLOAT32_C( 770.65), SIMDE_FLOAT32_C( -621.70)), UINT16_C( 5674), simde_mm512_set_ps(SIMDE_FLOAT32_C( -181.31), SIMDE_FLOAT32_C( -271.84), SIMDE_FLOAT32_C( 138.26), SIMDE_FLOAT32_C( 59.10), SIMDE_FLOAT32_C( 703.12), SIMDE_FLOAT32_C( 374.71), SIMDE_FLOAT32_C( -674.86), SIMDE_FLOAT32_C( -198.23), SIMDE_FLOAT32_C( 769.31), SIMDE_FLOAT32_C( -859.16), SIMDE_FLOAT32_C( 111.69), SIMDE_FLOAT32_C( -420.38), SIMDE_FLOAT32_C( 345.23), SIMDE_FLOAT32_C( -263.27), SIMDE_FLOAT32_C( 122.33), SIMDE_FLOAT32_C( -11.31)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 13.03), SIMDE_FLOAT32_C( 510.73), SIMDE_FLOAT32_C( -6.19), SIMDE_FLOAT32_C( -107.87), SIMDE_FLOAT32_C( -441.23), SIMDE_FLOAT32_C( 120.22), SIMDE_FLOAT32_C( 331.67), SIMDE_FLOAT32_C( -661.48), SIMDE_FLOAT32_C( 626.32), SIMDE_FLOAT32_C( 505.21), SIMDE_FLOAT32_C( -161.83), SIMDE_FLOAT32_C( -671.34), SIMDE_FLOAT32_C( 514.06), SIMDE_FLOAT32_C( -807.61), SIMDE_FLOAT32_C( -556.61), SIMDE_FLOAT32_C( -451.72)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 153.13), SIMDE_FLOAT32_C( 47.64), SIMDE_FLOAT32_C( -104.37), SIMDE_FLOAT32_C( 49.04), SIMDE_FLOAT32_C( -57.32), SIMDE_FLOAT32_C( 88.16), SIMDE_FLOAT32_C( 2.51), SIMDE_FLOAT32_C( -158.69), SIMDE_FLOAT32_C( 504.22), SIMDE_FLOAT32_C( 774.61), SIMDE_FLOAT32_C( 32.31), SIMDE_FLOAT32_C( -594.62), SIMDE_FLOAT32_C( 2.01), SIMDE_FLOAT32_C( 362.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( -621.70)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -175.84), SIMDE_FLOAT32_C( 968.44), SIMDE_FLOAT32_C( 85.61), SIMDE_FLOAT32_C( -394.33), SIMDE_FLOAT32_C( 358.35), SIMDE_FLOAT32_C( 605.54), SIMDE_FLOAT32_C( -698.35), SIMDE_FLOAT32_C( -764.09), SIMDE_FLOAT32_C( 164.55), SIMDE_FLOAT32_C( -893.53), SIMDE_FLOAT32_C( 171.50), SIMDE_FLOAT32_C( 629.19), SIMDE_FLOAT32_C( 42.86), SIMDE_FLOAT32_C( 22.57), SIMDE_FLOAT32_C( 198.87), SIMDE_FLOAT32_C( -209.78)), UINT16_C(35386), simde_mm512_set_ps(SIMDE_FLOAT32_C( -72.35), SIMDE_FLOAT32_C( -549.59), SIMDE_FLOAT32_C( 102.63), SIMDE_FLOAT32_C( 834.67), SIMDE_FLOAT32_C( 4.81), SIMDE_FLOAT32_C( 910.94), SIMDE_FLOAT32_C( 192.67), SIMDE_FLOAT32_C( 180.42), SIMDE_FLOAT32_C( 349.29), SIMDE_FLOAT32_C( 183.58), SIMDE_FLOAT32_C( 366.06), SIMDE_FLOAT32_C( -157.87), SIMDE_FLOAT32_C( -312.42), SIMDE_FLOAT32_C( 182.79), SIMDE_FLOAT32_C( -978.11), SIMDE_FLOAT32_C( 90.48)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 175.13), SIMDE_FLOAT32_C( -712.55), SIMDE_FLOAT32_C( -809.33), SIMDE_FLOAT32_C( 698.74), SIMDE_FLOAT32_C( 142.25), SIMDE_FLOAT32_C( -727.89), SIMDE_FLOAT32_C( -520.56), SIMDE_FLOAT32_C( 353.74), SIMDE_FLOAT32_C( -705.41), SIMDE_FLOAT32_C( -196.42), SIMDE_FLOAT32_C( 407.84), SIMDE_FLOAT32_C( -285.59), SIMDE_FLOAT32_C( 496.15), SIMDE_FLOAT32_C( 800.83), SIMDE_FLOAT32_C( -740.01), SIMDE_FLOAT32_C( 769.91)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 32.03), SIMDE_FLOAT32_C( 968.44), SIMDE_FLOAT32_C( 85.61), SIMDE_FLOAT32_C( -394.33), SIMDE_FLOAT32_C( 2.13), SIMDE_FLOAT32_C( 605.54), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( -764.09), SIMDE_FLOAT32_C( 164.55), SIMDE_FLOAT32_C( -893.53), SIMDE_FLOAT32_C( 262.03), SIMDE_FLOAT32_C( -140.79), SIMDE_FLOAT32_C( 304.13), SIMDE_FLOAT32_C( 22.57), SIMDE_FLOAT32_C( -704.00), SIMDE_FLOAT32_C( -209.78)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -273.10), SIMDE_FLOAT32_C( -193.08), SIMDE_FLOAT32_C( 823.95), SIMDE_FLOAT32_C( 970.90), SIMDE_FLOAT32_C( -50.31), SIMDE_FLOAT32_C( 755.59), SIMDE_FLOAT32_C( -119.92), SIMDE_FLOAT32_C( -895.51), SIMDE_FLOAT32_C( 692.21), SIMDE_FLOAT32_C( 544.09), SIMDE_FLOAT32_C( 740.64), SIMDE_FLOAT32_C( 817.79), SIMDE_FLOAT32_C( 131.04), SIMDE_FLOAT32_C( 190.96), SIMDE_FLOAT32_C( 289.64), SIMDE_FLOAT32_C( -908.35)), UINT16_C( 1662), simde_mm512_set_ps(SIMDE_FLOAT32_C( 563.69), SIMDE_FLOAT32_C( 374.34), SIMDE_FLOAT32_C( -459.61), SIMDE_FLOAT32_C( 786.82), SIMDE_FLOAT32_C( 257.72), SIMDE_FLOAT32_C( -220.73), SIMDE_FLOAT32_C( -903.10), SIMDE_FLOAT32_C( 520.58), SIMDE_FLOAT32_C( -858.27), SIMDE_FLOAT32_C( 784.57), SIMDE_FLOAT32_C( 832.81), SIMDE_FLOAT32_C( -909.15), SIMDE_FLOAT32_C( 909.58), SIMDE_FLOAT32_C( -162.79), SIMDE_FLOAT32_C( 177.63), SIMDE_FLOAT32_C( 25.46)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 749.97), SIMDE_FLOAT32_C( -58.76), SIMDE_FLOAT32_C( 952.36), SIMDE_FLOAT32_C( 549.26), SIMDE_FLOAT32_C( 390.25), SIMDE_FLOAT32_C( -490.70), SIMDE_FLOAT32_C( 974.89), SIMDE_FLOAT32_C( 114.95), SIMDE_FLOAT32_C( 932.36), SIMDE_FLOAT32_C( -895.93), SIMDE_FLOAT32_C( -880.84), SIMDE_FLOAT32_C( -351.20), SIMDE_FLOAT32_C( -500.77), SIMDE_FLOAT32_C( 42.49), SIMDE_FLOAT32_C( 588.62), SIMDE_FLOAT32_C( 67.54)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -273.10), SIMDE_FLOAT32_C( -193.08), SIMDE_FLOAT32_C( 823.95), SIMDE_FLOAT32_C( 970.90), SIMDE_FLOAT32_C( -50.31), SIMDE_FLOAT32_C( -212.10), SIMDE_FLOAT32_C( 902.01), SIMDE_FLOAT32_C( -895.51), SIMDE_FLOAT32_C( 692.21), SIMDE_FLOAT32_C( 784.50), SIMDE_FLOAT32_C( 832.78), SIMDE_FLOAT32_C( -2.55), SIMDE_FLOAT32_C( 3.54), SIMDE_FLOAT32_C( 40.19), SIMDE_FLOAT32_C( 2.27), SIMDE_FLOAT32_C( -908.35)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 882.37), SIMDE_FLOAT32_C( -29.48), SIMDE_FLOAT32_C( 208.93), SIMDE_FLOAT32_C( -103.96), SIMDE_FLOAT32_C( -740.71), SIMDE_FLOAT32_C( -48.33), SIMDE_FLOAT32_C( -73.48), SIMDE_FLOAT32_C( 839.05), SIMDE_FLOAT32_C( -578.39), SIMDE_FLOAT32_C( -527.30), SIMDE_FLOAT32_C( 808.78), SIMDE_FLOAT32_C( 273.31), SIMDE_FLOAT32_C( -212.18), SIMDE_FLOAT32_C( 358.44), SIMDE_FLOAT32_C( -429.58), SIMDE_FLOAT32_C( 641.01)), UINT16_C(51954), simde_mm512_set_ps(SIMDE_FLOAT32_C( 159.89), SIMDE_FLOAT32_C( -431.59), SIMDE_FLOAT32_C( 692.24), SIMDE_FLOAT32_C( -189.31), SIMDE_FLOAT32_C( 84.37), SIMDE_FLOAT32_C( -971.33), SIMDE_FLOAT32_C( 50.60), SIMDE_FLOAT32_C( -980.81), SIMDE_FLOAT32_C( 362.99), SIMDE_FLOAT32_C( 722.54), SIMDE_FLOAT32_C( 564.98), SIMDE_FLOAT32_C( 242.21), SIMDE_FLOAT32_C( -393.24), SIMDE_FLOAT32_C( 738.28), SIMDE_FLOAT32_C( 192.78), SIMDE_FLOAT32_C( -360.32)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -421.96), SIMDE_FLOAT32_C( -741.98), SIMDE_FLOAT32_C( -791.19), SIMDE_FLOAT32_C( 363.28), SIMDE_FLOAT32_C( 168.15), SIMDE_FLOAT32_C( -247.26), SIMDE_FLOAT32_C( 113.19), SIMDE_FLOAT32_C( 128.76), SIMDE_FLOAT32_C( -773.73), SIMDE_FLOAT32_C( 125.25), SIMDE_FLOAT32_C( 337.69), SIMDE_FLOAT32_C( -644.22), SIMDE_FLOAT32_C( 869.52), SIMDE_FLOAT32_C( 681.99), SIMDE_FLOAT32_C( 444.36), SIMDE_FLOAT32_C( 361.44)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 146.89), SIMDE_FLOAT32_C( -2.27), SIMDE_FLOAT32_C( 208.93), SIMDE_FLOAT32_C( -103.96), SIMDE_FLOAT32_C( 42.04), SIMDE_FLOAT32_C( -48.33), SIMDE_FLOAT32_C( 48.59), SIMDE_FLOAT32_C( 839.05), SIMDE_FLOAT32_C( 2.02), SIMDE_FLOAT32_C( 2.76), SIMDE_FLOAT32_C( 2.13), SIMDE_FLOAT32_C( 2.50), SIMDE_FLOAT32_C( -212.18), SIMDE_FLOAT32_C( 358.44), SIMDE_FLOAT32_C( 192.02), SIMDE_FLOAT32_C( 641.01)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -829.72), SIMDE_FLOAT32_C( 349.95), SIMDE_FLOAT32_C( 480.03), SIMDE_FLOAT32_C( -584.69), SIMDE_FLOAT32_C( 943.11), SIMDE_FLOAT32_C( -148.79), SIMDE_FLOAT32_C( -861.78), SIMDE_FLOAT32_C( -270.87), SIMDE_FLOAT32_C( -593.74), SIMDE_FLOAT32_C( -232.02), SIMDE_FLOAT32_C( -553.31), SIMDE_FLOAT32_C( 693.33), SIMDE_FLOAT32_C( -533.82), SIMDE_FLOAT32_C( -527.51), SIMDE_FLOAT32_C( -140.16), SIMDE_FLOAT32_C( 631.76)), UINT16_C(50263), simde_mm512_set_ps(SIMDE_FLOAT32_C( 173.33), SIMDE_FLOAT32_C( -281.34), SIMDE_FLOAT32_C( -45.38), SIMDE_FLOAT32_C( -230.23), SIMDE_FLOAT32_C( -937.39), SIMDE_FLOAT32_C( 53.86), SIMDE_FLOAT32_C( -719.43), SIMDE_FLOAT32_C( 465.60), SIMDE_FLOAT32_C( 111.60), SIMDE_FLOAT32_C( 156.01), SIMDE_FLOAT32_C( -703.23), SIMDE_FLOAT32_C( 763.33), SIMDE_FLOAT32_C( 119.12), SIMDE_FLOAT32_C( -295.56), SIMDE_FLOAT32_C( 313.51), SIMDE_FLOAT32_C( -193.21)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 396.81), SIMDE_FLOAT32_C( -330.26), SIMDE_FLOAT32_C( -500.01), SIMDE_FLOAT32_C( -117.27), SIMDE_FLOAT32_C( 805.35), SIMDE_FLOAT32_C( 722.55), SIMDE_FLOAT32_C( 274.82), SIMDE_FLOAT32_C( 32.73), SIMDE_FLOAT32_C( -564.66), SIMDE_FLOAT32_C( 180.25), SIMDE_FLOAT32_C( -307.87), SIMDE_FLOAT32_C( 888.96), SIMDE_FLOAT32_C( 806.77), SIMDE_FLOAT32_C( -526.35), SIMDE_FLOAT32_C( 889.50), SIMDE_FLOAT32_C( 196.92)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 132.27), SIMDE_FLOAT32_C( -264.26), SIMDE_FLOAT32_C( 480.03), SIMDE_FLOAT32_C( -584.69), SIMDE_FLOAT32_C( 943.11), SIMDE_FLOAT32_C( 2.31), SIMDE_FLOAT32_C( -861.78), SIMDE_FLOAT32_C( -270.87), SIMDE_FLOAT32_C( -593.74), SIMDE_FLOAT32_C( 148.00), SIMDE_FLOAT32_C( -553.31), SIMDE_FLOAT32_C( 632.33), SIMDE_FLOAT32_C( -533.82), SIMDE_FLOAT32_C( -2.06), SIMDE_FLOAT32_C( 2.44), SIMDE_FLOAT32_C( 192.13)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 425.40), SIMDE_FLOAT32_C( -281.85), SIMDE_FLOAT32_C( 596.53), SIMDE_FLOAT32_C( 231.55), SIMDE_FLOAT32_C( -189.24), SIMDE_FLOAT32_C( 962.54), SIMDE_FLOAT32_C( 598.72), SIMDE_FLOAT32_C( -728.82), SIMDE_FLOAT32_C( -31.34), SIMDE_FLOAT32_C( -498.28), SIMDE_FLOAT32_C( -106.48), SIMDE_FLOAT32_C( -850.40), SIMDE_FLOAT32_C( -763.83), SIMDE_FLOAT32_C( 176.55), SIMDE_FLOAT32_C( 356.84), SIMDE_FLOAT32_C( 827.17)), UINT16_C(54643), simde_mm512_set_ps(SIMDE_FLOAT32_C( 761.43), SIMDE_FLOAT32_C( 95.69), SIMDE_FLOAT32_C( 888.39), SIMDE_FLOAT32_C( -555.84), SIMDE_FLOAT32_C( 40.33), SIMDE_FLOAT32_C( 358.74), SIMDE_FLOAT32_C( -948.08), SIMDE_FLOAT32_C( 313.44), SIMDE_FLOAT32_C( -166.07), SIMDE_FLOAT32_C( -218.95), SIMDE_FLOAT32_C( 360.34), SIMDE_FLOAT32_C( 989.68), SIMDE_FLOAT32_C( 653.42), SIMDE_FLOAT32_C( 345.37), SIMDE_FLOAT32_C( 978.06), SIMDE_FLOAT32_C( 493.94)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 109.54), SIMDE_FLOAT32_C( 122.35), SIMDE_FLOAT32_C( 770.11), SIMDE_FLOAT32_C( 306.89), SIMDE_FLOAT32_C( -347.63), SIMDE_FLOAT32_C( 772.43), SIMDE_FLOAT32_C( 958.72), SIMDE_FLOAT32_C( -435.18), SIMDE_FLOAT32_C( -680.27), SIMDE_FLOAT32_C( -653.21), SIMDE_FLOAT32_C( 453.00), SIMDE_FLOAT32_C( 299.53), SIMDE_FLOAT32_C( -837.12), SIMDE_FLOAT32_C( -8.00), SIMDE_FLOAT32_C( 561.63), SIMDE_FLOAT32_C( -594.20)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 2.41), SIMDE_FLOAT32_C( 90.06), SIMDE_FLOAT32_C( 596.53), SIMDE_FLOAT32_C( 2.13), SIMDE_FLOAT32_C( -189.24), SIMDE_FLOAT32_C( 2.02), SIMDE_FLOAT32_C( 598.72), SIMDE_FLOAT32_C( 305.13), SIMDE_FLOAT32_C( -31.34), SIMDE_FLOAT32_C( -2.04), SIMDE_FLOAT32_C( 320.00), SIMDE_FLOAT32_C( 2.33), SIMDE_FLOAT32_C( -763.83), SIMDE_FLOAT32_C( 176.55), SIMDE_FLOAT32_C( 528.01), SIMDE_FLOAT32_C( 2.32)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mask_and_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_and_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d src; simde__mmask8 k; simde__m512d a; simde__m512d b; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( -128.09), SIMDE_FLOAT64_C( -302.68), SIMDE_FLOAT64_C( 129.66), SIMDE_FLOAT64_C( -400.28), SIMDE_FLOAT64_C( -687.60), SIMDE_FLOAT64_C( -568.06), SIMDE_FLOAT64_C( -974.67), SIMDE_FLOAT64_C( 814.47)), UINT8_C( 92), simde_mm512_set_pd(SIMDE_FLOAT64_C( -854.57), SIMDE_FLOAT64_C( 353.06), SIMDE_FLOAT64_C( 903.81), SIMDE_FLOAT64_C( -723.16), SIMDE_FLOAT64_C( -194.97), SIMDE_FLOAT64_C( 114.89), SIMDE_FLOAT64_C( 497.66), SIMDE_FLOAT64_C( -446.09)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -876.89), SIMDE_FLOAT64_C( -283.08), SIMDE_FLOAT64_C( 642.58), SIMDE_FLOAT64_C( -973.49), SIMDE_FLOAT64_C( 853.14), SIMDE_FLOAT64_C( 647.44), SIMDE_FLOAT64_C( 237.52), SIMDE_FLOAT64_C( -333.12)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -128.09), SIMDE_FLOAT64_C( 257.02), SIMDE_FLOAT64_C( 129.66), SIMDE_FLOAT64_C( -705.16), SIMDE_FLOAT64_C( 3.00), SIMDE_FLOAT64_C( 2.53), SIMDE_FLOAT64_C( -974.67), SIMDE_FLOAT64_C( 814.47)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 61.63), SIMDE_FLOAT64_C( 987.28), SIMDE_FLOAT64_C( -845.84), SIMDE_FLOAT64_C( -822.08), SIMDE_FLOAT64_C( -946.95), SIMDE_FLOAT64_C( -157.17), SIMDE_FLOAT64_C( 808.43), SIMDE_FLOAT64_C( 716.34)), UINT8_C(128), simde_mm512_set_pd(SIMDE_FLOAT64_C( 876.49), SIMDE_FLOAT64_C( 503.33), SIMDE_FLOAT64_C( 842.44), SIMDE_FLOAT64_C( -417.76), SIMDE_FLOAT64_C( -171.61), SIMDE_FLOAT64_C( -96.79), SIMDE_FLOAT64_C( 45.73), SIMDE_FLOAT64_C( 312.19)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 54.70), SIMDE_FLOAT64_C( -228.57), SIMDE_FLOAT64_C( -133.57), SIMDE_FLOAT64_C( -803.47), SIMDE_FLOAT64_C( 821.61), SIMDE_FLOAT64_C( 198.21), SIMDE_FLOAT64_C( 476.20), SIMDE_FLOAT64_C( 925.71)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.41), SIMDE_FLOAT64_C( 987.28), SIMDE_FLOAT64_C( -845.84), SIMDE_FLOAT64_C( -822.08), SIMDE_FLOAT64_C( -946.95), SIMDE_FLOAT64_C( -157.17), SIMDE_FLOAT64_C( 808.43), SIMDE_FLOAT64_C( 716.34)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -24.63), SIMDE_FLOAT64_C( -758.53), SIMDE_FLOAT64_C( 216.18), SIMDE_FLOAT64_C( -869.86), SIMDE_FLOAT64_C( -556.61), SIMDE_FLOAT64_C( -869.93), SIMDE_FLOAT64_C( 935.72), SIMDE_FLOAT64_C( 467.65)), UINT8_C(132), simde_mm512_set_pd(SIMDE_FLOAT64_C( -373.41), SIMDE_FLOAT64_C( 558.94), SIMDE_FLOAT64_C( -966.64), SIMDE_FLOAT64_C( -741.87), SIMDE_FLOAT64_C( -915.12), SIMDE_FLOAT64_C( -226.56), SIMDE_FLOAT64_C( 374.42), SIMDE_FLOAT64_C( 490.85)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 143.45), SIMDE_FLOAT64_C( 16.49), SIMDE_FLOAT64_C( 323.05), SIMDE_FLOAT64_C( -564.38), SIMDE_FLOAT64_C( -932.37), SIMDE_FLOAT64_C( -126.95), SIMDE_FLOAT64_C( 46.50), SIMDE_FLOAT64_C( 812.07)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 138.19), SIMDE_FLOAT64_C( -758.53), SIMDE_FLOAT64_C( 216.18), SIMDE_FLOAT64_C( -869.86), SIMDE_FLOAT64_C( -556.61), SIMDE_FLOAT64_C( -56.13), SIMDE_FLOAT64_C( 935.72), SIMDE_FLOAT64_C( 467.65)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -587.51), SIMDE_FLOAT64_C( -331.04), SIMDE_FLOAT64_C( 711.75), SIMDE_FLOAT64_C( -149.95), SIMDE_FLOAT64_C( -625.31), SIMDE_FLOAT64_C( 387.07), SIMDE_FLOAT64_C( 510.51), SIMDE_FLOAT64_C( -791.87)), UINT8_C(197), simde_mm512_set_pd(SIMDE_FLOAT64_C( -995.18), SIMDE_FLOAT64_C( 720.96), SIMDE_FLOAT64_C( 859.59), SIMDE_FLOAT64_C( 20.65), SIMDE_FLOAT64_C( -207.40), SIMDE_FLOAT64_C( -632.30), SIMDE_FLOAT64_C( -783.67), SIMDE_FLOAT64_C( 389.24)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -619.09), SIMDE_FLOAT64_C( 681.55), SIMDE_FLOAT64_C( 914.89), SIMDE_FLOAT64_C( 240.13), SIMDE_FLOAT64_C( 14.06), SIMDE_FLOAT64_C( -669.70), SIMDE_FLOAT64_C( 554.04), SIMDE_FLOAT64_C( -602.80)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -611.02), SIMDE_FLOAT64_C( 640.52), SIMDE_FLOAT64_C( 711.75), SIMDE_FLOAT64_C( -149.95), SIMDE_FLOAT64_C( -625.31), SIMDE_FLOAT64_C( -536.00), SIMDE_FLOAT64_C( 510.51), SIMDE_FLOAT64_C( 2.04)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 28.17), SIMDE_FLOAT64_C( -545.33), SIMDE_FLOAT64_C( -993.85), SIMDE_FLOAT64_C( -636.74), SIMDE_FLOAT64_C( 315.22), SIMDE_FLOAT64_C( -560.48), SIMDE_FLOAT64_C( -264.88), SIMDE_FLOAT64_C( 866.66)), UINT8_C(152), simde_mm512_set_pd(SIMDE_FLOAT64_C( -378.42), SIMDE_FLOAT64_C( -112.43), SIMDE_FLOAT64_C( -147.85), SIMDE_FLOAT64_C( 481.16), SIMDE_FLOAT64_C( 980.68), SIMDE_FLOAT64_C( 999.62), SIMDE_FLOAT64_C( -784.92), SIMDE_FLOAT64_C( -245.05)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 838.26), SIMDE_FLOAT64_C( -863.14), SIMDE_FLOAT64_C( 336.07), SIMDE_FLOAT64_C( 237.32), SIMDE_FLOAT64_C( -803.75), SIMDE_FLOAT64_C( 816.96), SIMDE_FLOAT64_C( 217.54), SIMDE_FLOAT64_C( -660.63)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.27), SIMDE_FLOAT64_C( -545.33), SIMDE_FLOAT64_C( -993.85), SIMDE_FLOAT64_C( 224.06), SIMDE_FLOAT64_C( 768.50), SIMDE_FLOAT64_C( -560.48), SIMDE_FLOAT64_C( -264.88), SIMDE_FLOAT64_C( 866.66)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 106.10), SIMDE_FLOAT64_C( 605.57), SIMDE_FLOAT64_C( 481.85), SIMDE_FLOAT64_C( 491.86), SIMDE_FLOAT64_C( -77.86), SIMDE_FLOAT64_C( -839.61), SIMDE_FLOAT64_C( 936.76), SIMDE_FLOAT64_C( -659.60)), UINT8_C( 7), simde_mm512_set_pd(SIMDE_FLOAT64_C( 505.82), SIMDE_FLOAT64_C( -629.98), SIMDE_FLOAT64_C( -555.91), SIMDE_FLOAT64_C( -911.21), SIMDE_FLOAT64_C( 603.24), SIMDE_FLOAT64_C( -95.72), SIMDE_FLOAT64_C( 864.74), SIMDE_FLOAT64_C( 280.80)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 570.36), SIMDE_FLOAT64_C( 765.47), SIMDE_FLOAT64_C( 327.71), SIMDE_FLOAT64_C( -605.34), SIMDE_FLOAT64_C( 509.13), SIMDE_FLOAT64_C( -583.43), SIMDE_FLOAT64_C( -208.99), SIMDE_FLOAT64_C( 835.11)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 106.10), SIMDE_FLOAT64_C( 605.57), SIMDE_FLOAT64_C( 481.85), SIMDE_FLOAT64_C( 491.86), SIMDE_FLOAT64_C( -77.86), SIMDE_FLOAT64_C( -2.27), SIMDE_FLOAT64_C( 3.25), SIMDE_FLOAT64_C( 2.00)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -925.64), SIMDE_FLOAT64_C( 122.27), SIMDE_FLOAT64_C( -971.29), SIMDE_FLOAT64_C( -200.64), SIMDE_FLOAT64_C( 268.43), SIMDE_FLOAT64_C( 995.23), SIMDE_FLOAT64_C( 958.62), SIMDE_FLOAT64_C( -530.89)), UINT8_C(252), simde_mm512_set_pd(SIMDE_FLOAT64_C( -311.17), SIMDE_FLOAT64_C( -787.17), SIMDE_FLOAT64_C( -427.34), SIMDE_FLOAT64_C( 839.17), SIMDE_FLOAT64_C( -404.83), SIMDE_FLOAT64_C( 559.72), SIMDE_FLOAT64_C( 982.82), SIMDE_FLOAT64_C( -251.36)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 643.61), SIMDE_FLOAT64_C( 953.53), SIMDE_FLOAT64_C( -469.49), SIMDE_FLOAT64_C( -8.31), SIMDE_FLOAT64_C( 325.63), SIMDE_FLOAT64_C( -753.50), SIMDE_FLOAT64_C( -462.28), SIMDE_FLOAT64_C( -779.29)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.01), SIMDE_FLOAT64_C( 785.01), SIMDE_FLOAT64_C( -385.33), SIMDE_FLOAT64_C( 2.01), SIMDE_FLOAT64_C( 260.50), SIMDE_FLOAT64_C( 545.50), SIMDE_FLOAT64_C( 958.62), SIMDE_FLOAT64_C( -530.89)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -62.82), SIMDE_FLOAT64_C( -95.66), SIMDE_FLOAT64_C( 484.39), SIMDE_FLOAT64_C( -736.85), SIMDE_FLOAT64_C( 893.63), SIMDE_FLOAT64_C( -173.06), SIMDE_FLOAT64_C( 113.69), SIMDE_FLOAT64_C( 198.15)), UINT8_C(239), simde_mm512_set_pd(SIMDE_FLOAT64_C( 440.07), SIMDE_FLOAT64_C( 639.74), SIMDE_FLOAT64_C( 566.84), SIMDE_FLOAT64_C( 207.87), SIMDE_FLOAT64_C( -578.31), SIMDE_FLOAT64_C( -772.29), SIMDE_FLOAT64_C( 70.78), SIMDE_FLOAT64_C( 181.63)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -750.95), SIMDE_FLOAT64_C( 172.27), SIMDE_FLOAT64_C( -538.71), SIMDE_FLOAT64_C( -512.10), SIMDE_FLOAT64_C( -406.87), SIMDE_FLOAT64_C( -470.10), SIMDE_FLOAT64_C( -652.40), SIMDE_FLOAT64_C( -121.85)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.38), SIMDE_FLOAT64_C( 2.19), SIMDE_FLOAT64_C( 530.58), SIMDE_FLOAT64_C( -736.85), SIMDE_FLOAT64_C( -2.00), SIMDE_FLOAT64_C( -3.02), SIMDE_FLOAT64_C( 2.02), SIMDE_FLOAT64_C( 44.41)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mask_and_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_maskz_and_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m512 a; simde__m512 b; simde__m512 r; } test_vec[8] = { { UINT16_C(57131), simde_mm512_set_ps(SIMDE_FLOAT32_C( 399.48), SIMDE_FLOAT32_C( -238.06), SIMDE_FLOAT32_C( -893.32), SIMDE_FLOAT32_C( -435.26), SIMDE_FLOAT32_C( 522.86), SIMDE_FLOAT32_C( -612.44), SIMDE_FLOAT32_C( 652.00), SIMDE_FLOAT32_C( 895.17), SIMDE_FLOAT32_C( -820.93), SIMDE_FLOAT32_C( 533.04), SIMDE_FLOAT32_C( 403.71), SIMDE_FLOAT32_C( 282.24), SIMDE_FLOAT32_C( 883.67), SIMDE_FLOAT32_C( 22.67), SIMDE_FLOAT32_C( 804.53), SIMDE_FLOAT32_C( 307.97)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 932.69), SIMDE_FLOAT32_C( 912.86), SIMDE_FLOAT32_C( 409.21), SIMDE_FLOAT32_C( 585.68), SIMDE_FLOAT32_C( -59.99), SIMDE_FLOAT32_C( -146.01), SIMDE_FLOAT32_C( 160.06), SIMDE_FLOAT32_C( -248.23), SIMDE_FLOAT32_C( 780.27), SIMDE_FLOAT32_C( -642.04), SIMDE_FLOAT32_C( -94.76), SIMDE_FLOAT32_C( 563.52), SIMDE_FLOAT32_C( -953.85), SIMDE_FLOAT32_C( -735.06), SIMDE_FLOAT32_C( 312.07), SIMDE_FLOAT32_C( -630.77)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 3.02), SIMDE_FLOAT32_C( 3.56), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.25), SIMDE_FLOAT32_C( 2.04), SIMDE_FLOAT32_C( -2.25), SIMDE_FLOAT32_C( 2.50), SIMDE_FLOAT32_C( 3.38), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 68.75), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 817.54), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.13), SIMDE_FLOAT32_C( 2.40)) }, { UINT16_C(37107), simde_mm512_set_ps(SIMDE_FLOAT32_C( 145.79), SIMDE_FLOAT32_C( -588.79), SIMDE_FLOAT32_C( 895.99), SIMDE_FLOAT32_C( -454.35), SIMDE_FLOAT32_C( 444.71), SIMDE_FLOAT32_C( 343.63), SIMDE_FLOAT32_C( -33.93), SIMDE_FLOAT32_C( -461.47), SIMDE_FLOAT32_C( -87.51), SIMDE_FLOAT32_C( -587.34), SIMDE_FLOAT32_C( -54.40), SIMDE_FLOAT32_C( -339.84), SIMDE_FLOAT32_C( -976.14), SIMDE_FLOAT32_C( 850.15), SIMDE_FLOAT32_C( -700.02), SIMDE_FLOAT32_C( -579.46)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 720.42), SIMDE_FLOAT32_C( -585.67), SIMDE_FLOAT32_C( -388.81), SIMDE_FLOAT32_C( 165.49), SIMDE_FLOAT32_C( 525.65), SIMDE_FLOAT32_C( 441.42), SIMDE_FLOAT32_C( 424.69), SIMDE_FLOAT32_C( 567.94), SIMDE_FLOAT32_C( -243.26), SIMDE_FLOAT32_C( 977.37), SIMDE_FLOAT32_C( -705.87), SIMDE_FLOAT32_C( 365.97), SIMDE_FLOAT32_C( -511.37), SIMDE_FLOAT32_C( 335.33), SIMDE_FLOAT32_C( -871.52), SIMDE_FLOAT32_C( -805.60)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 2.25), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 161.17), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -40.75), SIMDE_FLOAT32_C( 577.34), SIMDE_FLOAT32_C( -2.25), SIMDE_FLOAT32_C( 321.81), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -548.02), SIMDE_FLOAT32_C( -513.07)) }, { UINT16_C(56908), simde_mm512_set_ps(SIMDE_FLOAT32_C( 813.26), SIMDE_FLOAT32_C( -716.74), SIMDE_FLOAT32_C( -548.55), SIMDE_FLOAT32_C( -83.12), SIMDE_FLOAT32_C( 301.84), SIMDE_FLOAT32_C( -843.69), SIMDE_FLOAT32_C( -236.76), SIMDE_FLOAT32_C( -34.42), SIMDE_FLOAT32_C( -591.83), SIMDE_FLOAT32_C( 11.80), SIMDE_FLOAT32_C( 521.39), SIMDE_FLOAT32_C( -937.14), SIMDE_FLOAT32_C( -662.16), SIMDE_FLOAT32_C( -974.03), SIMDE_FLOAT32_C( 576.46), SIMDE_FLOAT32_C( 704.69)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 914.44), SIMDE_FLOAT32_C( -904.34), SIMDE_FLOAT32_C( -4.84), SIMDE_FLOAT32_C( -59.72), SIMDE_FLOAT32_C( -523.01), SIMDE_FLOAT32_C( 236.78), SIMDE_FLOAT32_C( 88.72), SIMDE_FLOAT32_C( -251.99), SIMDE_FLOAT32_C( -782.65), SIMDE_FLOAT32_C( -38.86), SIMDE_FLOAT32_C( 670.53), SIMDE_FLOAT32_C( 706.52), SIMDE_FLOAT32_C( 990.40), SIMDE_FLOAT32_C( -812.48), SIMDE_FLOAT32_C( -152.33), SIMDE_FLOAT32_C( 172.86)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 768.25), SIMDE_FLOAT32_C( -648.08), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -41.53), SIMDE_FLOAT32_C( 2.04), SIMDE_FLOAT32_C( 3.01), SIMDE_FLOAT32_C( 40.06), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.38), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 662.13), SIMDE_FLOAT32_C( -780.01), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C(13045), simde_mm512_set_ps(SIMDE_FLOAT32_C( -696.33), SIMDE_FLOAT32_C( -640.98), SIMDE_FLOAT32_C( -234.04), SIMDE_FLOAT32_C( 691.30), SIMDE_FLOAT32_C( 422.16), SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( 150.98), SIMDE_FLOAT32_C( -727.93), SIMDE_FLOAT32_C( -292.95), SIMDE_FLOAT32_C( -168.48), SIMDE_FLOAT32_C( 430.75), SIMDE_FLOAT32_C( 298.75), SIMDE_FLOAT32_C( -938.39), SIMDE_FLOAT32_C( 166.50), SIMDE_FLOAT32_C( 295.10), SIMDE_FLOAT32_C( -66.94)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 725.67), SIMDE_FLOAT32_C( 691.57), SIMDE_FLOAT32_C( 408.92), SIMDE_FLOAT32_C( -190.91), SIMDE_FLOAT32_C( 682.56), SIMDE_FLOAT32_C( 311.99), SIMDE_FLOAT32_C( -213.61), SIMDE_FLOAT32_C( -160.20), SIMDE_FLOAT32_C( -421.91), SIMDE_FLOAT32_C( 600.12), SIMDE_FLOAT32_C( 657.47), SIMDE_FLOAT32_C( 816.91), SIMDE_FLOAT32_C( 267.68), SIMDE_FLOAT32_C( 898.52), SIMDE_FLOAT32_C( -80.12), SIMDE_FLOAT32_C( -724.23)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 200.00), SIMDE_FLOAT32_C( 2.70), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 148.59), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -292.88), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.07), SIMDE_FLOAT32_C( 2.06), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.51), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -2.08)) }, { UINT16_C(11913), simde_mm512_set_ps(SIMDE_FLOAT32_C( 315.17), SIMDE_FLOAT32_C( -863.78), SIMDE_FLOAT32_C( 344.73), SIMDE_FLOAT32_C( -570.00), SIMDE_FLOAT32_C( -265.79), SIMDE_FLOAT32_C( 403.67), SIMDE_FLOAT32_C( -62.80), SIMDE_FLOAT32_C( 251.47), SIMDE_FLOAT32_C( 143.15), SIMDE_FLOAT32_C( 960.55), SIMDE_FLOAT32_C( -156.81), SIMDE_FLOAT32_C( 258.89), SIMDE_FLOAT32_C( 14.13), SIMDE_FLOAT32_C( 117.08), SIMDE_FLOAT32_C( -266.20), SIMDE_FLOAT32_C( 383.43)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -568.28), SIMDE_FLOAT32_C( -745.49), SIMDE_FLOAT32_C( -964.75), SIMDE_FLOAT32_C( 259.38), SIMDE_FLOAT32_C( 750.99), SIMDE_FLOAT32_C( -521.20), SIMDE_FLOAT32_C( 513.21), SIMDE_FLOAT32_C( 787.79), SIMDE_FLOAT32_C( 316.72), SIMDE_FLOAT32_C( -19.08), SIMDE_FLOAT32_C( -845.60), SIMDE_FLOAT32_C( 815.31), SIMDE_FLOAT32_C( -301.01), SIMDE_FLOAT32_C( 479.36), SIMDE_FLOAT32_C( -159.67), SIMDE_FLOAT32_C( -155.94)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.50), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.01), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 142.02), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 8.13), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 155.69)) }, { UINT16_C(38742), simde_mm512_set_ps(SIMDE_FLOAT32_C( -500.21), SIMDE_FLOAT32_C( -899.42), SIMDE_FLOAT32_C( 770.51), SIMDE_FLOAT32_C( 777.58), SIMDE_FLOAT32_C( 547.07), SIMDE_FLOAT32_C( 747.18), SIMDE_FLOAT32_C( 16.17), SIMDE_FLOAT32_C( 859.01), SIMDE_FLOAT32_C( 78.72), SIMDE_FLOAT32_C( -378.16), SIMDE_FLOAT32_C( -980.04), SIMDE_FLOAT32_C( 143.56), SIMDE_FLOAT32_C( -706.63), SIMDE_FLOAT32_C( -986.84), SIMDE_FLOAT32_C( -673.32), SIMDE_FLOAT32_C( -774.96)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -321.38), SIMDE_FLOAT32_C( -244.51), SIMDE_FLOAT32_C( 579.94), SIMDE_FLOAT32_C( 895.47), SIMDE_FLOAT32_C( -321.30), SIMDE_FLOAT32_C( 92.97), SIMDE_FLOAT32_C( -270.40), SIMDE_FLOAT32_C( -439.43), SIMDE_FLOAT32_C( 971.85), SIMDE_FLOAT32_C( 799.33), SIMDE_FLOAT32_C( -17.61), SIMDE_FLOAT32_C( -762.15), SIMDE_FLOAT32_C( -813.48), SIMDE_FLOAT32_C( 494.42), SIMDE_FLOAT32_C( 374.64), SIMDE_FLOAT32_C( -744.47)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -320.13), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 777.06), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.89), SIMDE_FLOAT32_C( 16.13), SIMDE_FLOAT32_C( 3.29), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.08), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.23), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 3.85), SIMDE_FLOAT32_C( 2.63), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C(53846), simde_mm512_set_ps(SIMDE_FLOAT32_C( 400.20), SIMDE_FLOAT32_C( 275.50), SIMDE_FLOAT32_C( 916.86), SIMDE_FLOAT32_C( -531.67), SIMDE_FLOAT32_C( -909.37), SIMDE_FLOAT32_C( 993.65), SIMDE_FLOAT32_C( 633.64), SIMDE_FLOAT32_C( -178.42), SIMDE_FLOAT32_C( 412.35), SIMDE_FLOAT32_C( -571.03), SIMDE_FLOAT32_C( 345.26), SIMDE_FLOAT32_C( 493.12), SIMDE_FLOAT32_C( -719.68), SIMDE_FLOAT32_C( 769.35), SIMDE_FLOAT32_C( -373.84), SIMDE_FLOAT32_C( -540.22)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -282.19), SIMDE_FLOAT32_C( -584.18), SIMDE_FLOAT32_C( 433.06), SIMDE_FLOAT32_C( 752.23), SIMDE_FLOAT32_C( -792.10), SIMDE_FLOAT32_C( 940.65), SIMDE_FLOAT32_C( -237.54), SIMDE_FLOAT32_C( -796.45), SIMDE_FLOAT32_C( 821.11), SIMDE_FLOAT32_C( -769.48), SIMDE_FLOAT32_C( 951.19), SIMDE_FLOAT32_C( 526.89), SIMDE_FLOAT32_C( 481.01), SIMDE_FLOAT32_C( -678.70), SIMDE_FLOAT32_C( 690.79), SIMDE_FLOAT32_C( -617.07)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 272.19), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 528.17), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.19), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -513.01), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.04), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 512.07), SIMDE_FLOAT32_C( 2.63), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C(45516), simde_mm512_set_ps(SIMDE_FLOAT32_C( -637.18), SIMDE_FLOAT32_C( 934.54), SIMDE_FLOAT32_C( 112.29), SIMDE_FLOAT32_C( 139.60), SIMDE_FLOAT32_C( -371.31), SIMDE_FLOAT32_C( -676.65), SIMDE_FLOAT32_C( -607.44), SIMDE_FLOAT32_C( -108.80), SIMDE_FLOAT32_C( -631.32), SIMDE_FLOAT32_C( 553.47), SIMDE_FLOAT32_C( -653.07), SIMDE_FLOAT32_C( -272.71), SIMDE_FLOAT32_C( -438.05), SIMDE_FLOAT32_C( -69.28), SIMDE_FLOAT32_C( 220.30), SIMDE_FLOAT32_C( -879.60)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 695.29), SIMDE_FLOAT32_C( -288.10), SIMDE_FLOAT32_C( 8.22), SIMDE_FLOAT32_C( 267.50), SIMDE_FLOAT32_C( -160.08), SIMDE_FLOAT32_C( 251.69), SIMDE_FLOAT32_C( 416.95), SIMDE_FLOAT32_C( 429.19), SIMDE_FLOAT32_C( -938.09), SIMDE_FLOAT32_C( -996.83), SIMDE_FLOAT32_C( 772.01), SIMDE_FLOAT32_C( -88.73), SIMDE_FLOAT32_C( -661.22), SIMDE_FLOAT32_C( -945.44), SIMDE_FLOAT32_C( 528.59), SIMDE_FLOAT32_C( 677.63)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 565.04), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.01), SIMDE_FLOAT32_C( 129.50), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 104.30), SIMDE_FLOAT32_C( -546.07), SIMDE_FLOAT32_C( 544.31), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -2.02), SIMDE_FLOAT32_C( -2.13), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_maskz_and_ps(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_maskz_and_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512d a; simde__m512d b; simde__m512d r; } test_vec[8] = { { UINT8_C( 62), simde_mm512_set_pd(SIMDE_FLOAT64_C( 337.23), SIMDE_FLOAT64_C( -706.51), SIMDE_FLOAT64_C( -51.03), SIMDE_FLOAT64_C( -11.12), SIMDE_FLOAT64_C( 780.39), SIMDE_FLOAT64_C( 482.32), SIMDE_FLOAT64_C( -313.20), SIMDE_FLOAT64_C( 986.27)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -459.33), SIMDE_FLOAT64_C( 566.75), SIMDE_FLOAT64_C( 454.16), SIMDE_FLOAT64_C( -566.29), SIMDE_FLOAT64_C( 217.01), SIMDE_FLOAT64_C( -444.68), SIMDE_FLOAT64_C( 725.53), SIMDE_FLOAT64_C( -673.17)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 48.02), SIMDE_FLOAT64_C( -2.02), SIMDE_FLOAT64_C( 3.02), SIMDE_FLOAT64_C( 416.00), SIMDE_FLOAT64_C( 2.31), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(178), simde_mm512_set_pd(SIMDE_FLOAT64_C( 988.70), SIMDE_FLOAT64_C( 952.56), SIMDE_FLOAT64_C( -917.57), SIMDE_FLOAT64_C( -161.93), SIMDE_FLOAT64_C( 553.05), SIMDE_FLOAT64_C( 358.83), SIMDE_FLOAT64_C( -335.21), SIMDE_FLOAT64_C( 243.33)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 368.67), SIMDE_FLOAT64_C( -326.68), SIMDE_FLOAT64_C( -767.44), SIMDE_FLOAT64_C( -965.45), SIMDE_FLOAT64_C( 160.34), SIMDE_FLOAT64_C( -153.49), SIMDE_FLOAT64_C( 842.87), SIMDE_FLOAT64_C( -959.77)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.75), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -661.06), SIMDE_FLOAT64_C( -2.52), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 2.04), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(233), simde_mm512_set_pd(SIMDE_FLOAT64_C( 789.99), SIMDE_FLOAT64_C( -0.83), SIMDE_FLOAT64_C( -595.87), SIMDE_FLOAT64_C( -556.04), SIMDE_FLOAT64_C( -673.58), SIMDE_FLOAT64_C( 820.52), SIMDE_FLOAT64_C( 763.24), SIMDE_FLOAT64_C( 747.54)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -863.51), SIMDE_FLOAT64_C( 297.93), SIMDE_FLOAT64_C( 664.70), SIMDE_FLOAT64_C( 43.00), SIMDE_FLOAT64_C( 283.69), SIMDE_FLOAT64_C( -882.73), SIMDE_FLOAT64_C( 56.70), SIMDE_FLOAT64_C( -683.31)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 789.50), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 528.57), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 2.13), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 683.04)) }, { UINT8_C( 29), simde_mm512_set_pd(SIMDE_FLOAT64_C( -942.62), SIMDE_FLOAT64_C( 810.42), SIMDE_FLOAT64_C( -781.08), SIMDE_FLOAT64_C( 565.31), SIMDE_FLOAT64_C( -528.23), SIMDE_FLOAT64_C( -642.03), SIMDE_FLOAT64_C( -124.04), SIMDE_FLOAT64_C( -13.65)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 20.85), SIMDE_FLOAT64_C( -517.24), SIMDE_FLOAT64_C( -21.32), SIMDE_FLOAT64_C( 729.98), SIMDE_FLOAT64_C( -763.15), SIMDE_FLOAT64_C( 885.38), SIMDE_FLOAT64_C( 783.63), SIMDE_FLOAT64_C( 470.19)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 529.29), SIMDE_FLOAT64_C( -528.13), SIMDE_FLOAT64_C( 512.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 12.63)) }, { UINT8_C(102), simde_mm512_set_pd(SIMDE_FLOAT64_C( -36.60), SIMDE_FLOAT64_C( -71.02), SIMDE_FLOAT64_C( 654.55), SIMDE_FLOAT64_C( -335.18), SIMDE_FLOAT64_C( -889.86), SIMDE_FLOAT64_C( -624.64), SIMDE_FLOAT64_C( 369.01), SIMDE_FLOAT64_C( 798.90)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -217.14), SIMDE_FLOAT64_C( -334.27), SIMDE_FLOAT64_C( 522.28), SIMDE_FLOAT64_C( 754.78), SIMDE_FLOAT64_C( -987.63), SIMDE_FLOAT64_C( 746.58), SIMDE_FLOAT64_C( 358.61), SIMDE_FLOAT64_C( -154.14)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -67.00), SIMDE_FLOAT64_C( 522.02), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 608.50), SIMDE_FLOAT64_C( 352.00), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(126), simde_mm512_set_pd(SIMDE_FLOAT64_C( -16.31), SIMDE_FLOAT64_C( -95.18), SIMDE_FLOAT64_C( 860.06), SIMDE_FLOAT64_C( 464.41), SIMDE_FLOAT64_C( 822.39), SIMDE_FLOAT64_C( 185.79), SIMDE_FLOAT64_C( 959.83), SIMDE_FLOAT64_C( -98.41)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -560.42), SIMDE_FLOAT64_C( -521.57), SIMDE_FLOAT64_C( -947.45), SIMDE_FLOAT64_C( 99.55), SIMDE_FLOAT64_C( 108.53), SIMDE_FLOAT64_C( 194.26), SIMDE_FLOAT64_C( 449.89), SIMDE_FLOAT64_C( 718.27)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -2.04), SIMDE_FLOAT64_C( 784.01), SIMDE_FLOAT64_C( 96.03), SIMDE_FLOAT64_C( 3.14), SIMDE_FLOAT64_C( 128.26), SIMDE_FLOAT64_C( 3.51), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(231), simde_mm512_set_pd(SIMDE_FLOAT64_C( -343.46), SIMDE_FLOAT64_C( 643.05), SIMDE_FLOAT64_C( 758.23), SIMDE_FLOAT64_C( 243.41), SIMDE_FLOAT64_C( -569.27), SIMDE_FLOAT64_C( 62.99), SIMDE_FLOAT64_C( 403.36), SIMDE_FLOAT64_C( -111.26)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 594.69), SIMDE_FLOAT64_C( 416.92), SIMDE_FLOAT64_C( 294.94), SIMDE_FLOAT64_C( -386.69), SIMDE_FLOAT64_C( 444.27), SIMDE_FLOAT64_C( 112.48), SIMDE_FLOAT64_C( 775.25), SIMDE_FLOAT64_C( 973.66)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.01), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.27), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 56.24), SIMDE_FLOAT64_C( 3.02), SIMDE_FLOAT64_C( 3.28)) }, { UINT8_C(248), simde_mm512_set_pd(SIMDE_FLOAT64_C( 559.94), SIMDE_FLOAT64_C( -177.36), SIMDE_FLOAT64_C( 459.52), SIMDE_FLOAT64_C( 151.00), SIMDE_FLOAT64_C( -261.20), SIMDE_FLOAT64_C( 619.75), SIMDE_FLOAT64_C( -541.43), SIMDE_FLOAT64_C( -420.37)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 279.51), SIMDE_FLOAT64_C( 143.05), SIMDE_FLOAT64_C( 835.37), SIMDE_FLOAT64_C( -486.11), SIMDE_FLOAT64_C( 461.53), SIMDE_FLOAT64_C( 410.57), SIMDE_FLOAT64_C( -362.30), SIMDE_FLOAT64_C( -345.54)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.18), SIMDE_FLOAT64_C( 129.05), SIMDE_FLOAT64_C( 3.01), SIMDE_FLOAT64_C( 147.00), SIMDE_FLOAT64_C( 261.01), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_maskz_and_pd(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_and_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C( 2103907232), INT32_C(-1995421302), INT32_C( 1328084931), INT32_C( -379562245), INT32_C( 1144599747), INT32_C(-1418414219), INT32_C( 1379143176), INT32_C(-2075387410), INT32_C(-1152868472), INT32_C( 1425101887), INT32_C(-1626225579), INT32_C( 2014677400), INT32_C( 1108777022), INT32_C( 877429697), INT32_C( 1558575022), INT32_C( 651318578)), simde_mm512_set_epi32(INT32_C( -803993576), INT32_C( 163813867), INT32_C(-1017884831), INT32_C(-1258992440), INT32_C( 1182354404), INT32_C(-1425047283), INT32_C( 1708628167), INT32_C(-2051115159), INT32_C( 143410800), INT32_C(-1041180848), INT32_C(-1023702255), INT32_C( -240255997), INT32_C( 978107452), INT32_C(-1343783755), INT32_C( -880122145), INT32_C( 1183808039)), simde_mm512_set_epi32(INT32_C( 1342439424), INT32_C( 150997386), INT32_C( 1124091713), INT32_C(-1604303672), INT32_C( 1144586432), INT32_C(-1425768187), INT32_C( 1075054592), INT32_C(-2079583896), INT32_C( 134743552), INT32_C( 1089491984), INT32_C(-2112782319), INT32_C( 1879410688), INT32_C( 33855548), INT32_C( 608436353), INT32_C( 1216374414), INT32_C( 109203490)) }, { simde_mm512_set_epi32(INT32_C( 1231278072), INT32_C( 832865002), INT32_C(-1694490420), INT32_C( -466764866), INT32_C( 1702721236), INT32_C( 2092439418), INT32_C( 611933067), INT32_C(-1184445720), INT32_C(-1689922195), INT32_C(-1140532352), INT32_C( 158346875), INT32_C( 1089717474), INT32_C( 1230253618), INT32_C( 1504968720), INT32_C( 653725328), INT32_C( 1822881045)), simde_mm512_set_epi32(INT32_C(-1605590998), INT32_C( 1810066171), INT32_C( -10167007), INT32_C( 1359569321), INT32_C(-1430119757), INT32_C( 256064867), INT32_C( 788618356), INT32_C( -392789188), INT32_C( -613873031), INT32_C( 773158597), INT32_C( -194106681), INT32_C( 165465349), INT32_C(-1346434037), INT32_C(-1412186885), INT32_C( -632750822), INT32_C( 1966832804)), simde_mm512_set_epi32(INT32_C( 4227112), INT32_C( 564134634), INT32_C(-1694498816), INT32_C( 1074340264), INT32_C( 541069968), INT32_C( 201332066), INT32_C( 604067840), INT32_C(-1475968472), INT32_C(-1690238871), INT32_C( 738480768), INT32_C( 6302275), INT32_C( 13681664), INT32_C( 152308738), INT32_C( 160432144), INT32_C( 37751824), INT32_C( 1679979524)) }, { simde_mm512_set_epi32(INT32_C(-1888865381), INT32_C( 15541452), INT32_C( -670396349), INT32_C(-1090081489), INT32_C( 116025329), INT32_C( 130963716), INT32_C( 230354364), INT32_C( 1174065929), INT32_C( 1971493681), INT32_C(-1343257591), INT32_C(-1419733408), INT32_C( 659096905), INT32_C( 183711411), INT32_C( -298263182), INT32_C(-1286938208), INT32_C( 537660993)), simde_mm512_set_epi32(INT32_C( 1616282233), INT32_C(-2132390836), INT32_C( -153917613), INT32_C( 736513734), INT32_C(-1311057544), INT32_C( 505476811), INT32_C( 1767695145), INT32_C( 157469724), INT32_C( -371725260), INT32_C( 1996701751), INT32_C(-1377678442), INT32_C( -132601652), INT32_C( -903948497), INT32_C( -794660034), INT32_C( 173952757), INT32_C( 1507611872)), simde_mm512_set_epi32(INT32_C( 4325913), INT32_C( 14943308), INT32_C( -805175229), INT32_C( 721816582), INT32_C( 13255024), INT32_C( 100683776), INT32_C( 152625448), INT32_C( 23248904), INT32_C( 1635944496), INT32_C( 654508033), INT32_C(-1453325824), INT32_C( 537395272), INT32_C( 168957475), INT32_C(-1071623886), INT32_C( 38420640), INT32_C( 787520)) }, { simde_mm512_set_epi32(INT32_C( -748350470), INT32_C( 1755197901), INT32_C( 1090059253), INT32_C( 1329426651), INT32_C(-1604442789), INT32_C( 878047098), INT32_C(-1682276633), INT32_C( -78811559), INT32_C( -973139496), INT32_C( 91517188), INT32_C( 37440120), INT32_C( 829401648), INT32_C( 860279707), INT32_C( 218373799), INT32_C( -362113249), INT32_C( -694086277)), simde_mm512_set_epi32(INT32_C( 892006932), INT32_C(-1158010747), INT32_C( -700127305), INT32_C(-1058033333), INT32_C( 2135825335), INT32_C( -510043422), INT32_C( -139544800), INT32_C( 95748631), INT32_C( 1671238992), INT32_C( 1305915968), INT32_C( -582790199), INT32_C( 1758236157), INT32_C( 986405639), INT32_C( 1121234438), INT32_C( -731393712), INT32_C( -784753228)), simde_mm512_set_epi32(INT32_C( 287314448), INT32_C( 681189509), INT32_C( 1077994421), INT32_C( 1076703307), INT32_C( 541982995), INT32_C( 538004066), INT32_C(-1817561568), INT32_C( 17105425), INT32_C( 1100812624), INT32_C( 89403392), INT32_C( 213576), INT32_C( 541886512), INT32_C( 843207427), INT32_C( 265734), INT32_C(-1067282160), INT32_C( -803138256)) }, { simde_mm512_set_epi32(INT32_C( 1347900829), INT32_C(-1792354715), INT32_C( -371177698), INT32_C( 255088013), INT32_C( 1961231505), INT32_C( -659343095), INT32_C( 1620234692), INT32_C( 843561067), INT32_C( 1265300992), INT32_C(-1675104490), INT32_C( -873664156), INT32_C(-2045109653), INT32_C( 2057630636), INT32_C( 335188274), INT32_C( 1272591061), INT32_C( -327494197)), simde_mm512_set_epi32(INT32_C(-2095740678), INT32_C(-1857753563), INT32_C(-1236342636), INT32_C( 1439297909), INT32_C( -576201057), INT32_C( 1488873085), INT32_C(-1369304746), INT32_C( 567848046), INT32_C( 1335236564), INT32_C( -942680632), INT32_C(-1512916560), INT32_C( -697747292), INT32_C(-1779695782), INT32_C( 491327584), INT32_C( -563681080), INT32_C( 1527319596)), simde_mm512_set_epi32(INT32_C( 1376408), INT32_C(-1862220251), INT32_C(-1606402028), INT32_C( 83906821), INT32_C( 1420166289), INT32_C( 1488068617), INT32_C( 537006404), INT32_C( 541108330), INT32_C( 1258427904), INT32_C(-2079865600), INT32_C(-2118078176), INT32_C(-2046289888), INT32_C( 278983432), INT32_C( 289935392), INT32_C( 1245847744), INT32_C( 1208484872)) }, { simde_mm512_set_epi32(INT32_C( 131205926), INT32_C( 2061955170), INT32_C( 37003574), INT32_C( 1649229141), INT32_C( 612060260), INT32_C(-1402263233), INT32_C( -513572270), INT32_C( 701923816), INT32_C( 511549547), INT32_C( 969083331), INT32_C( 1364542630), INT32_C( -822209230), INT32_C(-1549704264), INT32_C(-1157339218), INT32_C(-2025137124), INT32_C(-1631723043)), simde_mm512_set_epi32(INT32_C( 424825857), INT32_C( 434716327), INT32_C( 1663095683), INT32_C( 1730428966), INT32_C( -252943126), INT32_C( 373990324), INT32_C( 2100741912), INT32_C(-1005385937), INT32_C(-1470990839), INT32_C(-1200692008), INT32_C(-2098508971), INT32_C( 488824783), INT32_C( 1647236603), INT32_C(-1221154150), INT32_C(-1012750616), INT32_C( 1849794587)), simde_mm512_set_epi32(INT32_C( 22151168), INT32_C( 417345570), INT32_C( 35684610), INT32_C( 1644429316), INT32_C( 543704160), INT32_C( 71967028), INT32_C( 1629651472), INT32_C( 1179944), INT32_C( 139468809), INT32_C( 943849664), INT32_C( 4276228), INT32_C( 203560194), INT32_C( 572539320), INT32_C(-1291574646), INT32_C(-2096987128), INT32_C( 234987545)) }, { simde_mm512_set_epi32(INT32_C(-2080136983), INT32_C( -623547588), INT32_C( 1015056564), INT32_C(-1333355305), INT32_C( 1925062912), INT32_C( 330138155), INT32_C( -444195598), INT32_C( 874806560), INT32_C( 319126943), INT32_C( 475403370), INT32_C( -873396634), INT32_C(-1835948135), INT32_C(-1570208244), INT32_C( -244400530), INT32_C( 1126824505), INT32_C( 1036340167)), simde_mm512_set_epi32(INT32_C(-1989715102), INT32_C( 1785805415), INT32_C( 1739507553), INT32_C(-1932540399), INT32_C( -641363523), INT32_C( 72499486), INT32_C(-2005610208), INT32_C( -497730582), INT32_C( 312611747), INT32_C( 793357988), INT32_C( 607421194), INT32_C(-1375787856), INT32_C( 1799684145), INT32_C( -551549754), INT32_C( 1249951615), INT32_C( 1447553529)), simde_mm512_set_epi32(INT32_C(-2147286944), INT32_C( 1246833188), INT32_C( 612401184), INT32_C(-2138668015), INT32_C( 1350828288), INT32_C( 10), INT32_C(-2147217376), INT32_C( 537147680), INT32_C( 301994371), INT32_C( 205521952), INT32_C( 3146242), INT32_C(-2137971568), INT32_C( 574619648), INT32_C( -786430906), INT32_C( 1107343929), INT32_C( 340085185)) }, { simde_mm512_set_epi32(INT32_C( 423295425), INT32_C( -460615607), INT32_C( 1208771148), INT32_C(-2128303155), INT32_C( -738338972), INT32_C( 2110676823), INT32_C(-1405320678), INT32_C(-2007459833), INT32_C( 1043638626), INT32_C( -542891463), INT32_C( 629803756), INT32_C(-1216921331), INT32_C( -301860714), INT32_C( 317296385), INT32_C( 1833800187), INT32_C( -645353377)), simde_mm512_set_epi32(INT32_C( -257851255), INT32_C( -479522767), INT32_C( 745275629), INT32_C(-1783480446), INT32_C(-1431666964), INT32_C( -422291816), INT32_C( -309252994), INT32_C(-2112946871), INT32_C(-2123521230), INT32_C( -883174176), INT32_C( -332498316), INT32_C( 142889340), INT32_C( 1231002435), INT32_C( 1681334055), INT32_C( 138372594), INT32_C( -878588971)), simde_mm512_set_epi32(INT32_C( 270565505), INT32_C( -536145919), INT32_C( 135004236), INT32_C(-2128598144), INT32_C(-2102897052), INT32_C( 1690588688), INT32_C(-1408237542), INT32_C(-2146920447), INT32_C( 2400546), INT32_C( -889192416), INT32_C( 604637796), INT32_C( 278796), INT32_C( 1208064002), INT32_C( 2166529), INT32_C( 135070194), INT32_C( -914325419)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_and_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_and_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask16 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C( 2103907232), INT32_C(-1995421302), INT32_C( 1328084931), INT32_C( -379562245), INT32_C( 1144599747), INT32_C(-1418414219), INT32_C( 1379143176), INT32_C(-2075387410), INT32_C(-1152868472), INT32_C( 1425101887), INT32_C(-1626225579), INT32_C( 2014677400), INT32_C( 1108777022), INT32_C( 877429697), INT32_C( 1558575022), INT32_C( 651318578)), UINT16_C(31271), simde_mm512_set_epi32(INT32_C( 1822881045), INT32_C( -803993576), INT32_C( 163813867), INT32_C(-1017884831), INT32_C(-1258992440), INT32_C( 1182354404), INT32_C(-1425047283), INT32_C( 1708628167), INT32_C(-2051115159), INT32_C( 143410800), INT32_C(-1041180848), INT32_C(-1023702255), INT32_C( -240255997), INT32_C( 978107452), INT32_C(-1343783755), INT32_C( -880122145)), simde_mm512_set_epi32(INT32_C( 1966832804), INT32_C( 1231278072), INT32_C( 832865002), INT32_C(-1694490420), INT32_C( -466764866), INT32_C( 1702721236), INT32_C( 2092439418), INT32_C( 611933067), INT32_C(-1184445720), INT32_C(-1689922195), INT32_C(-1140532352), INT32_C( 158346875), INT32_C( 1089717474), INT32_C( 1230253618), INT32_C( 1504968720), INT32_C( 653725328)), simde_mm512_set_epi32(INT32_C( 2103907232), INT32_C( 1073743896), INT32_C( 25198826), INT32_C(-2097151936), INT32_C(-1541076856), INT32_C(-1418414219), INT32_C( 671613192), INT32_C(-2075387410), INT32_C(-1152868472), INT32_C( 1425101887), INT32_C(-2147429632), INT32_C( 2014677400), INT32_C( 1108777022), INT32_C( 138685488), INT32_C( 161742864), INT32_C( 42075792)) }, { simde_mm512_set_epi32(INT32_C( 537660993), INT32_C(-1605590998), INT32_C( 1810066171), INT32_C( -10167007), INT32_C( 1359569321), INT32_C(-1430119757), INT32_C( 256064867), INT32_C( 788618356), INT32_C( -392789188), INT32_C( -613873031), INT32_C( 773158597), INT32_C( -194106681), INT32_C( 165465349), INT32_C(-1346434037), INT32_C(-1412186885), INT32_C( -632750822)), UINT16_C(57760), simde_mm512_set_epi32(INT32_C( 173952757), INT32_C( 1507611872), INT32_C(-1888865381), INT32_C( 15541452), INT32_C( -670396349), INT32_C(-1090081489), INT32_C( 116025329), INT32_C( 130963716), INT32_C( 230354364), INT32_C( 1174065929), INT32_C( 1971493681), INT32_C(-1343257591), INT32_C(-1419733408), INT32_C( 659096905), INT32_C( 183711411), INT32_C( -298263182)), simde_mm512_set_epi32(INT32_C( -362113249), INT32_C( -694086277), INT32_C( 1616282233), INT32_C(-2132390836), INT32_C( -153917613), INT32_C( 736513734), INT32_C(-1311057544), INT32_C( 505476811), INT32_C( 1767695145), INT32_C( 157469724), INT32_C( -371725260), INT32_C( 1996701751), INT32_C(-1377678442), INT32_C( -132601652), INT32_C( -903948497), INT32_C( -794660034)), simde_mm512_set_epi32(INT32_C( 172623381), INT32_C( 1350571104), INT32_C( 4325913), INT32_C( -10167007), INT32_C( 1359569321), INT32_C(-1430119757), INT32_C( 256064867), INT32_C( 100683776), INT32_C( 152625448), INT32_C( -613873031), INT32_C( 1635944496), INT32_C( -194106681), INT32_C( 165465349), INT32_C(-1346434037), INT32_C(-1412186885), INT32_C( -632750822)) }, { simde_mm512_set_epi32(INT32_C( -731393712), INT32_C( -784753228), INT32_C( -748350470), INT32_C( 1755197901), INT32_C( 1090059253), INT32_C( 1329426651), INT32_C(-1604442789), INT32_C( 878047098), INT32_C(-1682276633), INT32_C( -78811559), INT32_C( -973139496), INT32_C( 91517188), INT32_C( 37440120), INT32_C( 829401648), INT32_C( 860279707), INT32_C( 218373799)), UINT16_C(44550), simde_mm512_set_epi32(INT32_C( 335188274), INT32_C( 1272591061), INT32_C( -327494197), INT32_C( 892006932), INT32_C(-1158010747), INT32_C( -700127305), INT32_C(-1058033333), INT32_C( 2135825335), INT32_C( -510043422), INT32_C( -139544800), INT32_C( 95748631), INT32_C( 1671238992), INT32_C( 1305915968), INT32_C( -582790199), INT32_C( 1758236157), INT32_C( 986405639)), simde_mm512_set_epi32(INT32_C( 491327584), INT32_C( -563681080), INT32_C( 1527319596), INT32_C( 1347900829), INT32_C(-1792354715), INT32_C( -371177698), INT32_C( 255088013), INT32_C( 1961231505), INT32_C( -659343095), INT32_C( 1620234692), INT32_C( 843561067), INT32_C( 1265300992), INT32_C(-1675104490), INT32_C( -873664156), INT32_C(-2045109653), INT32_C( 2057630636)), simde_mm512_set_epi32(INT32_C( 289935392), INT32_C( -784753228), INT32_C( 1208484872), INT32_C( 1755197901), INT32_C(-1876295675), INT32_C(-1069529322), INT32_C( 2363657), INT32_C( 878047098), INT32_C(-1682276633), INT32_C( -78811559), INT32_C( -973139496), INT32_C( 91517188), INT32_C( 37440120), INT32_C( -918531776), INT32_C( 526441), INT32_C( 218373799)) }, { simde_mm512_set_epi32(INT32_C(-1157339218), INT32_C(-2025137124), INT32_C(-1631723043), INT32_C(-2095740678), INT32_C(-1857753563), INT32_C(-1236342636), INT32_C( 1439297909), INT32_C( -576201057), INT32_C( 1488873085), INT32_C(-1369304746), INT32_C( 567848046), INT32_C( 1335236564), INT32_C( -942680632), INT32_C(-1512916560), INT32_C( -697747292), INT32_C(-1779695782)), UINT16_C(25528), simde_mm512_set_epi32(INT32_C( 1647236603), INT32_C(-1221154150), INT32_C(-1012750616), INT32_C( 1849794587), INT32_C( 131205926), INT32_C( 2061955170), INT32_C( 37003574), INT32_C( 1649229141), INT32_C( 612060260), INT32_C(-1402263233), INT32_C( -513572270), INT32_C( 701923816), INT32_C( 511549547), INT32_C( 969083331), INT32_C( 1364542630), INT32_C( -822209230)), simde_mm512_set_epi32(INT32_C(-1570208244), INT32_C( -244400530), INT32_C( 1126824505), INT32_C( 1036340167), INT32_C( 424825857), INT32_C( 434716327), INT32_C( 1663095683), INT32_C( 1730428966), INT32_C( -252943126), INT32_C( 373990324), INT32_C( 2100741912), INT32_C(-1005385937), INT32_C(-1470990839), INT32_C(-1200692008), INT32_C(-2098508971), INT32_C( 488824783)), simde_mm512_set_epi32(INT32_C(-1157339218), INT32_C(-1322866166), INT32_C( 1126212136), INT32_C(-2095740678), INT32_C(-1857753563), INT32_C(-1236342636), INT32_C( 35684610), INT32_C( 1644429316), INT32_C( 543704160), INT32_C(-1369304746), INT32_C( 1629651472), INT32_C( 1179944), INT32_C( 139468809), INT32_C(-1512916560), INT32_C( -697747292), INT32_C(-1779695782)) }, { simde_mm512_set_epi32(INT32_C( 1799684145), INT32_C( -551549754), INT32_C( 1249951615), INT32_C( 1447553529), INT32_C(-2080136983), INT32_C( -623547588), INT32_C( 1015056564), INT32_C(-1333355305), INT32_C( 1925062912), INT32_C( 330138155), INT32_C( -444195598), INT32_C( 874806560), INT32_C( 319126943), INT32_C( 475403370), INT32_C( -873396634), INT32_C(-1835948135)), UINT16_C( 9392), simde_mm512_set_epi32(INT32_C(-1216921331), INT32_C( -301860714), INT32_C( 317296385), INT32_C( 1833800187), INT32_C( -645353377), INT32_C(-1989715102), INT32_C( 1785805415), INT32_C( 1739507553), INT32_C(-1932540399), INT32_C( -641363523), INT32_C( 72499486), INT32_C(-2005610208), INT32_C( -497730582), INT32_C( 312611747), INT32_C( 793357988), INT32_C( 607421194)), simde_mm512_set_epi32(INT32_C( 142889340), INT32_C( 1231002435), INT32_C( 1681334055), INT32_C( 138372594), INT32_C( -878588971), INT32_C( 423295425), INT32_C( -460615607), INT32_C( 1208771148), INT32_C(-2128303155), INT32_C( -738338972), INT32_C( 2110676823), INT32_C(-1405320678), INT32_C(-2007459833), INT32_C( 1043638626), INT32_C( -542891463), INT32_C( 629803756)), simde_mm512_set_epi32(INT32_C( 1799684145), INT32_C( -551549754), INT32_C( 2166529), INT32_C( 1447553529), INT32_C(-2080136983), INT32_C( 153246016), INT32_C( 1015056564), INT32_C(-1333355305), INT32_C(-2147188223), INT32_C( 330138155), INT32_C( 71450902), INT32_C(-2009825280), INT32_C( 319126943), INT32_C( 475403370), INT32_C( -873396634), INT32_C(-1835948135)) }, { simde_mm512_set_epi32(INT32_C( 861635987), INT32_C( 1823839521), INT32_C( 1391000031), INT32_C( 73229946), INT32_C( -53693878), INT32_C( -257851255), INT32_C( -479522767), INT32_C( 745275629), INT32_C(-1783480446), INT32_C(-1431666964), INT32_C( -422291816), INT32_C( -309252994), INT32_C(-2112946871), INT32_C(-2123521230), INT32_C( -883174176), INT32_C( -332498316)), UINT16_C(31381), simde_mm512_set_epi32(INT32_C( -410707923), INT32_C( -804790801), INT32_C( -675940069), INT32_C( 717543141), INT32_C( 1610339352), INT32_C( 785451213), INT32_C( -67248356), INT32_C(-1147482606), INT32_C( 877778312), INT32_C( 1833609670), INT32_C( 1105011960), INT32_C(-1909564752), INT32_C(-2137129603), INT32_C(-1991115340), INT32_C( -941377596), INT32_C(-1151664921)), simde_mm512_set_epi32(INT32_C( -605598510), INT32_C( 1332169075), INT32_C( 829771204), INT32_C( 806631323), INT32_C( -62111889), INT32_C( 1452741835), INT32_C( 921236435), INT32_C(-1348081811), INT32_C( -883327193), INT32_C(-1324808596), INT32_C(-2119312832), INT32_C(-1371509978), INT32_C(-1887676953), INT32_C( 42335263), INT32_C( 818544934), INT32_C(-1864687690)), simde_mm512_set_epi32(INT32_C( 861635987), INT32_C( 1074219363), INT32_C( 288704768), INT32_C( 537138817), INT32_C( 1548227592), INT32_C( -257851255), INT32_C( 854119184), INT32_C( 745275629), INT32_C( 5330176), INT32_C(-1431666964), INT32_C( -422291816), INT32_C(-1912583648), INT32_C(-2112946871), INT32_C( 130068), INT32_C( -883174176), INT32_C(-1873142618)) }, { simde_mm512_set_epi32(INT32_C( 1456151906), INT32_C( -346366427), INT32_C( 534496658), INT32_C( 1981510934), INT32_C( -935678271), INT32_C( 1523008579), INT32_C( -6105095), INT32_C( 2115600842), INT32_C( -420343454), INT32_C( 652783640), INT32_C( -871055383), INT32_C( 142253075), INT32_C( 557825344), INT32_C( 707825888), INT32_C( 944883191), INT32_C( 1704858885)), UINT16_C(19039), simde_mm512_set_epi32(INT32_C( 1893303454), INT32_C( 1567616976), INT32_C( 1190892677), INT32_C( 1594451864), INT32_C(-1033342432), INT32_C( -738674203), INT32_C(-1847547828), INT32_C( 1893640833), INT32_C( 26320713), INT32_C( 1830669951), INT32_C( 1304924639), INT32_C( -277717409), INT32_C(-1566722863), INT32_C( 1534951086), INT32_C( -925669609), INT32_C( 359322092)), simde_mm512_set_epi32(INT32_C( 1160904262), INT32_C( 732990033), INT32_C( 138388028), INT32_C( 1168180194), INT32_C( 1057944486), INT32_C( 1875512725), INT32_C( -910818137), INT32_C( -743685110), INT32_C( -947775444), INT32_C( 1458691146), INT32_C( 1273454073), INT32_C( 927814838), INT32_C(-1840329583), INT32_C( -789758267), INT32_C( -870216121), INT32_C( 449327093)), simde_mm512_set_epi32(INT32_C( 1456151906), INT32_C( 153127504), INT32_C( 534496658), INT32_C( 1981510934), INT32_C( 34107936), INT32_C( 1523008579), INT32_C(-2120220668), INT32_C( 2115600842), INT32_C( -420343454), INT32_C( 1142017098), INT32_C( -871055383), INT32_C( 658527254), INT32_C(-2113108847), INT32_C( 1349336708), INT32_C( -939457017), INT32_C( 273154532)) }, { simde_mm512_set_epi32(INT32_C(-1055194531), INT32_C( 1846727705), INT32_C( 335680535), INT32_C( -610713755), INT32_C( 944256620), INT32_C( 697979892), INT32_C( 49552843), INT32_C( -460412596), INT32_C(-2060335241), INT32_C( 135497979), INT32_C( -331098630), INT32_C( -140680021), INT32_C(-1676162464), INT32_C( 626483741), INT32_C( 170885439), INT32_C( 230851400)), UINT16_C(10528), simde_mm512_set_epi32(INT32_C(-1447071985), INT32_C( -310464227), INT32_C( -679161042), INT32_C( -527096592), INT32_C( 1833269922), INT32_C(-1164990327), INT32_C( 1955493691), INT32_C( 1263046717), INT32_C( 1122048689), INT32_C( 2074234443), INT32_C( 723669938), INT32_C( 284884896), INT32_C( -458264538), INT32_C( 514387150), INT32_C(-1369468153), INT32_C( 1859652102)), simde_mm512_set_epi32(INT32_C(-1315612420), INT32_C( -330188185), INT32_C( 890374), INT32_C( -958458643), INT32_C( 2023887571), INT32_C( 226435011), INT32_C( 1929100), INT32_C(-1975437469), INT32_C( 871389437), INT32_C(-2041059805), INT32_C( 636566673), INT32_C( -485819928), INT32_C( 904524629), INT32_C(-1963695561), INT32_C( -297278672), INT32_C(-1666020030)), simde_mm512_set_epi32(INT32_C(-1055194531), INT32_C( 1846727705), INT32_C( 299526), INT32_C( -610713755), INT32_C( 1744835202), INT32_C( 697979892), INT32_C( 49552843), INT32_C( 171970593), INT32_C(-2060335241), INT32_C( 135497979), INT32_C( 555749520), INT32_C( -140680021), INT32_C(-1676162464), INT32_C( 626483741), INT32_C( 170885439), INT32_C( 230851400)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_and_epi32(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_and_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { UINT16_C(21810), simde_mm512_set_epi32(INT32_C( 1183808039), INT32_C( 2103907232), INT32_C(-1995421302), INT32_C( 1328084931), INT32_C( -379562245), INT32_C( 1144599747), INT32_C(-1418414219), INT32_C( 1379143176), INT32_C(-2075387410), INT32_C(-1152868472), INT32_C( 1425101887), INT32_C(-1626225579), INT32_C( 2014677400), INT32_C( 1108777022), INT32_C( 877429697), INT32_C( 1558575022)), simde_mm512_set_epi32(INT32_C( 1822881045), INT32_C( -803993576), INT32_C( 163813867), INT32_C(-1017884831), INT32_C(-1258992440), INT32_C( 1182354404), INT32_C(-1425047283), INT32_C( 1708628167), INT32_C(-2051115159), INT32_C( 143410800), INT32_C(-1041180848), INT32_C(-1023702255), INT32_C( -240255997), INT32_C( 978107452), INT32_C(-1343783755), INT32_C( -880122145)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 1342439424), INT32_C( 0), INT32_C( 1124091713), INT32_C( 0), INT32_C( 1144586432), INT32_C( 0), INT32_C( 1075054592), INT32_C( 0), INT32_C( 0), INT32_C( 1089491984), INT32_C(-2112782319), INT32_C( 0), INT32_C( 0), INT32_C( 608436353), INT32_C( 0)) }, { UINT16_C( 3728), simde_mm512_set_epi32(INT32_C( -632750822), INT32_C( 1966832804), INT32_C( 1231278072), INT32_C( 832865002), INT32_C(-1694490420), INT32_C( -466764866), INT32_C( 1702721236), INT32_C( 2092439418), INT32_C( 611933067), INT32_C(-1184445720), INT32_C(-1689922195), INT32_C(-1140532352), INT32_C( 158346875), INT32_C( 1089717474), INT32_C( 1230253618), INT32_C( 1504968720)), simde_mm512_set_epi32(INT32_C(-1286938208), INT32_C( 537660993), INT32_C(-1605590998), INT32_C( 1810066171), INT32_C( -10167007), INT32_C( 1359569321), INT32_C(-1430119757), INT32_C( 256064867), INT32_C( 788618356), INT32_C( -392789188), INT32_C( -613873031), INT32_C( 773158597), INT32_C( -194106681), INT32_C( 165465349), INT32_C(-1346434037), INT32_C(-1412186885)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C(-1694498816), INT32_C( 1074340264), INT32_C( 541069968), INT32_C( 0), INT32_C( 604067840), INT32_C( 0), INT32_C( 0), INT32_C( 738480768), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { UINT16_C(56690), simde_mm512_set_epi32(INT32_C( -794660034), INT32_C( 173952757), INT32_C( 1507611872), INT32_C(-1888865381), INT32_C( 15541452), INT32_C( -670396349), INT32_C(-1090081489), INT32_C( 116025329), INT32_C( 130963716), INT32_C( 230354364), INT32_C( 1174065929), INT32_C( 1971493681), INT32_C(-1343257591), INT32_C(-1419733408), INT32_C( 659096905), INT32_C( 183711411)), simde_mm512_set_epi32(INT32_C( 218373799), INT32_C( -362113249), INT32_C( -694086277), INT32_C( 1616282233), INT32_C(-2132390836), INT32_C( -153917613), INT32_C( 736513734), INT32_C(-1311057544), INT32_C( 505476811), INT32_C( 1767695145), INT32_C( 157469724), INT32_C( -371725260), INT32_C( 1996701751), INT32_C(-1377678442), INT32_C( -132601652), INT32_C( -903948497)), simde_mm512_set_epi32(INT32_C( 4646), INT32_C( 172623381), INT32_C( 0), INT32_C( 4325913), INT32_C( 14943308), INT32_C( -805175229), INT32_C( 0), INT32_C( 13255024), INT32_C( 0), INT32_C( 152625448), INT32_C( 23248904), INT32_C( 1635944496), INT32_C( 0), INT32_C( 0), INT32_C( 537395272), INT32_C( 0)) }, { UINT16_C(54171), simde_mm512_set_epi32(INT32_C( 986405639), INT32_C( 1121234438), INT32_C( -731393712), INT32_C( -784753228), INT32_C( -748350470), INT32_C( 1755197901), INT32_C( 1090059253), INT32_C( 1329426651), INT32_C(-1604442789), INT32_C( 878047098), INT32_C(-1682276633), INT32_C( -78811559), INT32_C( -973139496), INT32_C( 91517188), INT32_C( 37440120), INT32_C( 829401648)), simde_mm512_set_epi32(INT32_C( 2057630636), INT32_C( 335188274), INT32_C( 1272591061), INT32_C( -327494197), INT32_C( 892006932), INT32_C(-1158010747), INT32_C( -700127305), INT32_C(-1058033333), INT32_C( 2135825335), INT32_C( -510043422), INT32_C( -139544800), INT32_C( 95748631), INT32_C( 1671238992), INT32_C( 1305915968), INT32_C( -582790199), INT32_C( 1758236157)), simde_mm512_set_epi32(INT32_C( 981488388), INT32_C( 47218690), INT32_C( 0), INT32_C(-1070033536), INT32_C( 0), INT32_C( 0), INT32_C( 1077994421), INT32_C( 1076703307), INT32_C( 541982995), INT32_C( 0), INT32_C( 0), INT32_C( 17105425), INT32_C( 1100812624), INT32_C( 0), INT32_C( 213576), INT32_C( 541886512)) }, { UINT16_C( 6763), simde_mm512_set_epi32(INT32_C( -697747292), INT32_C(-1779695782), INT32_C( 491327584), INT32_C( -563681080), INT32_C( 1527319596), INT32_C( 1347900829), INT32_C(-1792354715), INT32_C( -371177698), INT32_C( 255088013), INT32_C( 1961231505), INT32_C( -659343095), INT32_C( 1620234692), INT32_C( 843561067), INT32_C( 1265300992), INT32_C(-1675104490), INT32_C( -873664156)), simde_mm512_set_epi32(INT32_C( -822209230), INT32_C(-1549704264), INT32_C(-1157339218), INT32_C(-2025137124), INT32_C(-1631723043), INT32_C(-2095740678), INT32_C(-1857753563), INT32_C(-1236342636), INT32_C( 1439297909), INT32_C( -576201057), INT32_C( 1488873085), INT32_C(-1369304746), INT32_C( 567848046), INT32_C( 1335236564), INT32_C( -942680632), INT32_C(-1512916560)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C(-2042443768), INT32_C( 436797452), INT32_C( 0), INT32_C(-1862220251), INT32_C( 0), INT32_C( 0), INT32_C( 1420166289), INT32_C( 1488068617), INT32_C( 0), INT32_C( 541108330), INT32_C( 0), INT32_C(-2079865600), INT32_C(-2118078176)) }, { UINT16_C(17574), simde_mm512_set_epi32(INT32_C(-2098508971), INT32_C( 488824783), INT32_C( 1647236603), INT32_C(-1221154150), INT32_C(-1012750616), INT32_C( 1849794587), INT32_C( 131205926), INT32_C( 2061955170), INT32_C( 37003574), INT32_C( 1649229141), INT32_C( 612060260), INT32_C(-1402263233), INT32_C( -513572270), INT32_C( 701923816), INT32_C( 511549547), INT32_C( 969083331)), simde_mm512_set_epi32(INT32_C( -873396634), INT32_C(-1835948135), INT32_C(-1570208244), INT32_C( -244400530), INT32_C( 1126824505), INT32_C( 1036340167), INT32_C( 424825857), INT32_C( 434716327), INT32_C( 1663095683), INT32_C( 1730428966), INT32_C( -252943126), INT32_C( 373990324), INT32_C( 2100741912), INT32_C(-1005385937), INT32_C(-1470990839), INT32_C(-1200692008)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 268470153), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 742457347), INT32_C( 0), INT32_C( 0), INT32_C( 35684610), INT32_C( 0), INT32_C( 543704160), INT32_C( 0), INT32_C( 0), INT32_C( 1179944), INT32_C( 139468809), INT32_C( 0)) }, { UINT16_C( 5226), simde_mm512_set_epi32(INT32_C( 793357988), INT32_C( 607421194), INT32_C(-1375787856), INT32_C( 1799684145), INT32_C( -551549754), INT32_C( 1249951615), INT32_C( 1447553529), INT32_C(-2080136983), INT32_C( -623547588), INT32_C( 1015056564), INT32_C(-1333355305), INT32_C( 1925062912), INT32_C( 330138155), INT32_C( -444195598), INT32_C( 874806560), INT32_C( 319126943)), simde_mm512_set_epi32(INT32_C( -542891463), INT32_C( 629803756), INT32_C(-1216921331), INT32_C( -301860714), INT32_C( 317296385), INT32_C( 1833800187), INT32_C( -645353377), INT32_C(-1989715102), INT32_C( 1785805415), INT32_C( 1739507553), INT32_C(-1932540399), INT32_C( -641363523), INT32_C( 72499486), INT32_C(-2005610208), INT32_C( -497730582), INT32_C( 312611747)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1778450448), INT32_C( 0), INT32_C( 1207996795), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 612401184), INT32_C(-2138668015), INT32_C( 0), INT32_C( 10), INT32_C( 0), INT32_C( 537147680), INT32_C( 0)) }, { UINT16_C(43362), simde_mm512_set_epi32(INT32_C(-2123521230), INT32_C( -883174176), INT32_C( -332498316), INT32_C( 142889340), INT32_C( 1231002435), INT32_C( 1681334055), INT32_C( 138372594), INT32_C( -878588971), INT32_C( 423295425), INT32_C( -460615607), INT32_C( 1208771148), INT32_C(-2128303155), INT32_C( -738338972), INT32_C( 2110676823), INT32_C(-1405320678), INT32_C(-2007459833)), simde_mm512_set_epi32(INT32_C( -941377596), INT32_C(-1151664921), INT32_C( -656770411), INT32_C( 861635987), INT32_C( 1823839521), INT32_C( 1391000031), INT32_C( 73229946), INT32_C( -53693878), INT32_C( -257851255), INT32_C( -479522767), INT32_C( 745275629), INT32_C(-1783480446), INT32_C(-1431666964), INT32_C( -422291816), INT32_C( -309252994), INT32_C(-2112946871)), simde_mm512_set_epi32(INT32_C(-2124307712), INT32_C( 0), INT32_C( -938837484), INT32_C( 0), INT32_C( 1209372929), INT32_C( 0), INT32_C( 0), INT32_C( -931102144), INT32_C( 0), INT32_C( -536145919), INT32_C( 135004236), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C(-1408237542), INT32_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_and_epi32(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_and_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C( 9036212757557430666), INT64_C( 5704081348870821627), INT64_C( 4916018483251427189), INT64_C( 5923374839641151982), INT64_C(-4951532382404389825), INT64_C(-6984585675708986984), INT64_C( 4762161048923702209), INT64_C( 6694028748503799090)), simde_mm512_set_epi64(INT64_C(-3453126114950276629), INT64_C(-4371782057203512120), INT64_C( 5078173500331491597), INT64_C( 7338502100533278569), INT64_C( 615944699146983248), INT64_C(-4396767702011741181), INT64_C( 4200939521265073333), INT64_C(-3780095828076561881)), simde_mm512_set_epi64(INT64_C( 5765733423092074890), INT64_C( 4827937147730281672), INT64_C( 4915961295754526981), INT64_C( 4617324316270006632), INT64_C( 578719150276367376), INT64_C(-9074330961792628736), INT64_C( 145408472056594561), INT64_C( 5224288327930368034)) }, { simde_mm512_set_epi64(INT64_C( 5288299052354798314), INT64_C(-7277780933457101890), INT64_C( 7313132024917137274), INT64_C( 2628232513216498408), INT64_C(-7258160557155099776), INT64_C( 680094650638517474), INT64_C( 5283899056600645648), INT64_C( 2807728906149754133)), simde_mm512_set_epi64(INT64_C(-6895960825351935237), INT64_C( -43666961203633751), INT64_C(-6142317585422402205), INT64_C( 3387090051947463484), INT64_C(-2636564591268235579), INT64_C( -833681846664639227), INT64_C(-5782890152253473541), INT64_C(-2717644085040284508)), simde_mm512_set_epi64(INT64_C( 18155308360663786), INT64_C(-7277816996756381272), INT64_C( 2323877817609098594), INT64_C( 2594451620184359464), INT64_C(-7259520672634482048), INT64_C( 27068065029080064), INT64_C( 654161048765464592), INT64_C( 162142851124327428)) }, { simde_mm512_set_epi64(INT64_C(-8112615037926038324), INT64_C(-2879330391107916497), INT64_C( 498324993693604100), INT64_C( 989364461044945673), INT64_C( 8467500887117366281), INT64_C(-6097708555739527863), INT64_C( 789034506143718770), INT64_C(-5527357514795184575)), simde_mm512_set_epi64(INT64_C( 6941879334003428428), INT64_C( -661071113376870714), INT64_C(-5630949274148604213), INT64_C( 7592192837230447644), INT64_C(-1596547832800395209), INT64_C(-5917083848631867188), INT64_C(-3882429228383046850), INT64_C( 747121403871646944)), simde_mm512_set_epi64(INT64_C( 18579654875284556), INT64_C(-3458201275382494202), INT64_C( 56929894688378880), INT64_C( 655521307720597512), INT64_C( 7026328109045710849), INT64_C(-6241986883974856632), INT64_C( 725666832763081010), INT64_C( 165015392292176960)) }, { simde_mm512_set_epi64(INT64_C(-3214140792841031219), INT64_C( 4681768843666616539), INT64_C(-6891029306179981446), INT64_C(-7225323117343838631), INT64_C(-4179602309674405628), INT64_C( 160804091787717168), INT64_C( 3694873207195836071), INT64_C(-1555264558302423685)), simde_mm512_set_epi64(INT64_C( 3831140603882252421), INT64_C(-3007023874774683317), INT64_C( 9173299967578168034), INT64_C( -599340352231112169), INT64_C( 7177916815745921600), INT64_C(-2503064843376095747), INT64_C( 4236579961216216582), INT64_C(-3141312070029828684)), simde_mm512_set_epi64(INT64_C( 1234006158509482117), INT64_C( 4629950784542158923), INT64_C( 2327799239051135586), INT64_C(-7806367493009374703), INT64_C( 4727954219193348096), INT64_C( 917302477097008), INT64_C( 3621548322709573126), INT64_C(-4583941969312410320)) }, { simde_mm512_set_epi64(INT64_C( 5789189981308900965), INT64_C(-1594196073659476595), INT64_C( 8423425177495484681), INT64_C( 6958855014828193899), INT64_C( 5434426382856220438), INT64_C(-3752358975457584533), INT64_C( 8837456289202868530), INT64_C( 5465736992144414155)), simde_mm512_set_epi64(INT64_C(-9001137670469652955), INT64_C(-5310051186831134347), INT64_C(-2474764694246758787), INT64_C(-5881119101759738770), INT64_C( 5734797378155697608), INT64_C(-6497927143179601756), INT64_C(-7643735180027817888), INT64_C(-2420991802446640084)), simde_mm512_set_epi64(INT64_C( 5911629778699813), INT64_C(-6899444174404169467), INT64_C( 6099567767624753161), INT64_C( 2306424943463671914), INT64_C( 5404906694268929280), INT64_C(-9097076494042654688), INT64_C( 1198224716855775264), INT64_C( 5350875317483865096)) }, { simde_mm512_set_epi64(INT64_C( 563525163273351266), INT64_C( 158929141814345045), INT64_C( 2628778802773961023), INT64_C(-2205776103080558104), INT64_C( 2197088575617698243), INT64_C( 5860665973320586546), INT64_C(-6655929129214122066), INT64_C(-8697897714832252451)), simde_mm512_set_epi64(INT64_C( 1824613162744888999), INT64_C( 7142941570334212134), INT64_C(-1086382453544016972), INT64_C( 9022617812666091311), INT64_C(-6317857543126326056), INT64_C(-9013027400318787633), INT64_C( 7074827341732948634), INT64_C(-4349730772874059749)), simde_mm512_set_epi64(INT64_C( 95138542545547298), INT64_C( 153264234564943876), INT64_C( 2335191585971118388), INT64_C( 6999299776119439656), INT64_C( 599013974410920128), INT64_C( 18366259613799682), INT64_C( 2459037658077471370), INT64_C(-9006491134657978343)) }, { simde_mm512_set_epi64(INT64_C(-8934120309513688260), INT64_C( 4359634748931742935), INT64_C( 8268082250112664107), INT64_C(-1907805565562356448), INT64_C( 1370639783932859498), INT64_C(-3751209977007462503), INT64_C(-6743993051839021458), INT64_C( 4839674398342728647)), simde_mm512_set_epi64(INT64_C(-8545761289661498777), INT64_C( 7471128053642413585), INT64_C(-2754635356059844322), INT64_C(-8614030248086520854), INT64_C( 1342657230503784100), INT64_C( 2608854166046450864), INT64_C( 7729584549648139462), INT64_C( 5368501309454936569)), simde_mm512_set_epi64(INT64_C(-9222527198360950236), INT64_C( 2630243059467977745), INT64_C( 5801763319471669258), INT64_C(-9222228406785787616), INT64_C( 1297055947226612768), INT64_C( 13513008652297360), INT64_C( 2467972599307568198), INT64_C( 4756005960819231169)) }, { simde_mm512_set_epi64(INT64_C( 1818040010755772489), INT64_C( 5191632551175039949), INT64_C(-3171141735991582889), INT64_C(-6035806350115039225), INT64_C( 4482393771264451129), INT64_C( 2704986537996009741), INT64_C(-1296481894259912959), INT64_C( 7876111834213298271)), simde_mm512_set_epi64(INT64_C(-1107462703642111951), INT64_C( 3200934455572316034), INT64_C(-6148962785270933864), INT64_C(-1328231493238063799), INT64_C(-9120454231799900960), INT64_C(-1428069393052184196), INT64_C( 5287115201302699815), INT64_C( 594305769309064149)), simde_mm512_set_epi64(INT64_C( 1162069999159545857), INT64_C( 579838780607835008), INT64_C(-9031874063504222704), INT64_C(-6048334185741379583), INT64_C( 10310269968318496), INT64_C( 2596899559745798412), INT64_C( 5188595380067045121), INT64_C( 580122069275017301)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_and_epi64(test_vec[i].a, test_vec[i].b); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_and_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask8 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C( 9036212757557430666), INT64_C( 5704081348870821627), INT64_C( 4916018483251427189), INT64_C( 5923374839641151982), INT64_C(-4951532382404389825), INT64_C(-6984585675708986984), INT64_C( 4762161048923702209), INT64_C( 6694028748503799090)), UINT8_C( 39), simde_mm512_set_epi64(INT64_C( 7829214476264278040), INT64_C( 703575204673376097), INT64_C(-5407331354528887836), INT64_C(-6120531474030028601), INT64_C(-8809472528091429264), INT64_C(-4471837688110281967), INT64_C(-1031891648804766660), INT64_C(-5771507277206231329)), simde_mm512_set_epi64(INT64_C( 8447482571111256056), INT64_C( 3577127948173451468), INT64_C(-2004739832689101100), INT64_C( 8986958869783206795), INT64_C(-5087155628682128019), INT64_C(-4898549151711613317), INT64_C( 4680300913939983922), INT64_C( 6463791434556706448)), simde_mm512_set_epi64(INT64_C( 9036212757557430666), INT64_C( 5704081348870821627), INT64_C(-6618874695993703740), INT64_C( 5923374839641151982), INT64_C(-4951532382404389825), INT64_C(-9223140039893972463), INT64_C( 4657219002685467696), INT64_C( 694680311283451536)) }, { simde_mm512_set_epi64(INT64_C( 2309236383959261226), INT64_C( 7774175012325743905), INT64_C( 5839305773204773555), INT64_C( 1099790230208207988), INT64_C(-1687016713001301383), INT64_C( 3320690892837104327), INT64_C( 710668265524759563), INT64_C(-6065296483252896486)), UINT8_C(160), simde_mm512_set_epi64(INT64_C( 747121403871646944), INT64_C(-8112615037926038324), INT64_C(-2879330391107916497), INT64_C( 498324993693604100), INT64_C( 989364461044945673), INT64_C( 8467500887117366281), INT64_C(-6097708555739527863), INT64_C( 789034506143718770)), simde_mm512_set_epi64(INT64_C(-1555264558302423685), INT64_C( 6941879334003428428), INT64_C( -661071113376870714), INT64_C(-5630949274148604213), INT64_C( 7592192837230447644), INT64_C(-1596547832800395209), INT64_C(-5917083848631867188), INT64_C(-3882429228383046850)), simde_mm512_set_epi64(INT64_C( 741411777270518880), INT64_C( 7774175012325743905), INT64_C(-3458201275382494202), INT64_C( 1099790230208207988), INT64_C(-1687016713001301383), INT64_C( 3320690892837104327), INT64_C( 710668265524759563), INT64_C(-6065296483252896486)) }, { simde_mm512_set_epi64(INT64_C(-3141312070029828684), INT64_C(-3214140792841031219), INT64_C( 4681768843666616539), INT64_C(-6891029306179981446), INT64_C(-7225323117343838631), INT64_C(-4179602309674405628), INT64_C( 160804091787717168), INT64_C( 3694873207195836071)), UINT8_C( 6), simde_mm512_set_epi64(INT64_C( 1439622676105278165), INT64_C(-1406576864852774380), INT64_C(-4973618283186690121), INT64_C(-4544218561177052233), INT64_C(-2190619812874504416), INT64_C( 411237240453010768), INT64_C( 5608866377596359625), INT64_C( 7551566793946127111)), simde_mm512_set_epi64(INT64_C( 2110235908633979080), INT64_C( 6559787716707833245), INT64_C(-7698104879832611042), INT64_C( 1095594675397854353), INT64_C(-2831857028248186428), INT64_C( 3623067196209165824), INT64_C(-7194518998511455900), INT64_C(-8783679074311277652)), simde_mm512_set_epi64(INT64_C(-3141312070029828684), INT64_C(-3214140792841031219), INT64_C( 4681768843666616539), INT64_C(-6891029306179981446), INT64_C(-7225323117343838631), INT64_C( 1407388893058048), INT64_C( 866558102575534400), INT64_C( 3694873207195836071)) }, { simde_mm512_set_epi64(INT64_C(-4970734089418384356), INT64_C(-7008197103615375110), INT64_C(-7978990794053850988), INT64_C( 6181737452074950303), INT64_C( 6394661210895290710), INT64_C( 2438888788002740180), INT64_C(-4048782482230560336), INT64_C(-2996801797497290918)), UINT8_C(184), simde_mm512_set_epi64(INT64_C( 7074827341732948634), INT64_C(-4349730772874059749), INT64_C( 563525163273351266), INT64_C( 158929141814345045), INT64_C( 2628778802773961023), INT64_C(-2205776103080558104), INT64_C( 2197088575617698243), INT64_C( 5860665973320586546)), simde_mm512_set_epi64(INT64_C(-6743993051839021458), INT64_C( 4839674398342728647), INT64_C( 1824613162744888999), INT64_C( 7142941570334212134), INT64_C(-1086382453544016972), INT64_C( 9022617812666091311), INT64_C(-6317857543126326056), INT64_C(-9013027400318787633)), simde_mm512_set_epi64(INT64_C( 2461362369224681994), INT64_C(-7008197103615375110), INT64_C( 95138542545547298), INT64_C( 153264234564943876), INT64_C( 2335191585971118388), INT64_C( 2438888788002740180), INT64_C(-4048782482230560336), INT64_C(-2996801797497290918)) }, { simde_mm512_set_epi64(INT64_C( 7729584549648139462), INT64_C( 5368501309454936569), INT64_C(-8934120309513688260), INT64_C( 4359634748931742935), INT64_C( 8268082250112664107), INT64_C(-1907805565562356448), INT64_C( 1370639783932859498), INT64_C(-3751209977007462503)), UINT8_C(176), simde_mm512_set_epi64(INT64_C(-5226637314456684394), INT64_C( 1362777598547825147), INT64_C(-2771771646272906398), INT64_C( 7669975856184215393), INT64_C(-8300197808250187331), INT64_C( 311382923636166944), INT64_C(-2137736571596434525), INT64_C( 3407446613087781642)), simde_mm512_set_epi64(INT64_C( 613705043478027075), INT64_C( 7221274780014437874), INT64_C(-3773510896647996991), INT64_C(-1978328966883417524), INT64_C(-9140992443141990556), INT64_C( 9065287930099827226), INT64_C(-8621974329724982942), INT64_C(-2331701078232790292)), simde_mm512_set_epi64(INT64_C( 1197420910319618), INT64_C( 5368501309454936569), INT64_C(-3926997772353251008), INT64_C( 6917828375051256384), INT64_C( 8268082250112664107), INT64_C(-1907805565562356448), INT64_C( 1370639783932859498), INT64_C(-3751209977007462503)) }, { simde_mm512_set_epi64(INT64_C( 3700698387045520673), INT64_C( 5974299641953216122), INT64_C( -230613445968297847), INT64_C(-2059534601207152403), INT64_C(-7659990185762193684), INT64_C(-1813729535102735234), INT64_C(-9075037706959084750), INT64_C(-3793204198629279116)), UINT8_C(149), simde_mm512_set_epi64(INT64_C(-1763977094002909713), INT64_C(-2903140489693440283), INT64_C( 6916354853087283405), INT64_C( -288829486582280686), INT64_C( 3770029145011694022), INT64_C( 4745990232274262704), INT64_C(-9178901749894611532), INT64_C(-4043185984863798041)), simde_mm512_set_epi64(INT64_C(-2601025793624159885), INT64_C( 3563840185149175707), INT64_C( -266768530495040309), INT64_C( 3956680363155515245), INT64_C(-3793861402632321428), INT64_C(-9102379300509684954), INT64_C(-8107510778505593825), INT64_C( 3515623724266758070)), simde_mm512_set_epi64(INT64_C(-4358068267243845277), INT64_C( 5974299641953216122), INT64_C( -230613445968297847), INT64_C( 3668413965043549696), INT64_C(-7659990185762193684), INT64_C( 111745843014421024), INT64_C(-9075037706959084750), INT64_C( 54607264595188902)) }, { simde_mm512_set_epi64(INT64_C( 6254124818226667045), INT64_C( 2295645667912807702), INT64_C(-4018707571999816637), INT64_C( -26221181248372278), INT64_C(-1805361387364896744), INT64_C(-3741154382847501293), INT64_C( 2395841610067775712), INT64_C( 4058242405589980421)), UINT8_C( 95), simde_mm512_set_epi64(INT64_C( 8131676417901457360), INT64_C( 5114845102355343256), INT64_C(-4438171947452810779), INT64_C(-7935157497162192255), INT64_C( 113046603373071999), INT64_C( 5604608652266856031), INT64_C(-6729023456945537362), INT64_C(-3975720697196785172)), simde_mm512_set_epi64(INT64_C( 4986045839810005585), INT64_C( 594372055586112482), INT64_C( 4543836970229042581), INT64_C(-3911934107467365366), INT64_C(-4070664534473188278), INT64_C( 5469443597420811446), INT64_C(-7904155369341108539), INT64_C(-3737549779697651723)), simde_mm512_set_epi64(INT64_C( 6254124818226667045), INT64_C( 16747779451847552), INT64_C(-4018707571999816637), INT64_C(-9106278428011921408), INT64_C( 108112814837713994), INT64_C( 5316237509498262550), INT64_C(-9075733389403931004), INT64_C(-4034937163739561500)) }, { simde_mm512_set_epi64(INT64_C(-4532025999716330471), INT64_C( 1441736923413036901), INT64_C( 4055551302629479412), INT64_C( 212827843943377228), INT64_C(-8849072478755780357), INT64_C(-1422057783446117205), INT64_C(-7199062965036293603), INT64_C( 733947372098454344)), UINT8_C( 32), simde_mm512_set_epi64(INT64_C(-6215126846548299491), INT64_C(-2916974460339411728), INT64_C( 7873834362860447881), INT64_C( 8398781451642376253), INT64_C( 4819162425848909387), INT64_C( 3108138717093232544), INT64_C(-1968231203112162098), INT64_C(-5881820928188872186)), simde_mm512_set_epi64(INT64_C(-5650512314146637209), INT64_C( 3824130547717357), INT64_C( 8692530928452313027), INT64_C( 8285423730243427), INT64_C( 3742589136248759843), INT64_C( 2734033046067673576), INT64_C( 3884903702312804919), INT64_C(-1276802171409363646)), simde_mm512_set_epi64(INT64_C(-4532025999716330471), INT64_C( 1441736923413036901), INT64_C( 7494010129634756737), INT64_C( 212827843943377228), INT64_C(-8849072478755780357), INT64_C(-1422057783446117205), INT64_C(-7199062965036293603), INT64_C( 733947372098454344)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_and_epi64(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_and_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { UINT8_C( 50), simde_mm512_set_epi64(INT64_C( 5084416814350799776), INT64_C(-8570269232503654461), INT64_C(-1630207427926739773), INT64_C(-6092042681407238648), INT64_C(-8913721049338044536), INT64_C( 6120766000801629269), INT64_C( 8652973546099087422), INT64_C( 3768531854712764334)), simde_mm512_set_epi64(INT64_C( 7829214476264278040), INT64_C( 703575204673376097), INT64_C(-5407331354528887836), INT64_C(-6120531474030028601), INT64_C(-8809472528091429264), INT64_C(-4471837688110281967), INT64_C(-1031891648804766660), INT64_C(-5771507277206231329)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C(-6890431802948124480), INT64_C(-6123627733767157760), INT64_C( 0), INT64_C( 0), INT64_C( 8072007440746715196), INT64_C( 0)) }, { UINT8_C(144), simde_mm512_set_epi64(INT64_C(-2717644085040284508), INT64_C( 5288299052354798314), INT64_C(-7277780933457101890), INT64_C( 7313132024917137274), INT64_C( 2628232513216498408), INT64_C(-7258160557155099776), INT64_C( 680094650638517474), INT64_C( 5283899056600645648)), simde_mm512_set_epi64(INT64_C(-5527357514795184575), INT64_C(-6895960825351935237), INT64_C( -43666961203633751), INT64_C(-6142317585422402205), INT64_C( 3387090051947463484), INT64_C(-2636564591268235579), INT64_C( -833681846664639227), INT64_C(-5782890152253473541)), simde_mm512_set_epi64(INT64_C(-7905821755195257856), INT64_C( 0), INT64_C( 0), INT64_C( 2323877817609098594), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, { UINT8_C(114), simde_mm512_set_epi64(INT64_C(-3413038857294295307), INT64_C( 6475143687707440027), INT64_C( 66750031696924739), INT64_C(-4681864345113958415), INT64_C( 562484877412986300), INT64_C( 5042574770374351665), INT64_C(-5769247420573510048), INT64_C( 2830799652053530291)), simde_mm512_set_epi64(INT64_C( 937908328941131551), INT64_C(-2981077858701114759), INT64_C(-9158548898769049773), INT64_C( 3163302403568753016), INT64_C( 2171006373899068201), INT64_C( 676327318613388340), INT64_C( 8575768723328224150), INT64_C( -569519755344554193)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 5800658722606940697), INT64_C( 64181022643847235), INT64_C( 3100178613413757296), INT64_C( 0), INT64_C( 0), INT64_C( 2811090599545930240), INT64_C( 0)) }, { UINT8_C(155), simde_mm512_set_epi64(INT64_C( 4236579961216216582), INT64_C(-3141312070029828684), INT64_C(-3214140792841031219), INT64_C( 4681768843666616539), INT64_C(-6891029306179981446), INT64_C(-7225323117343838631), INT64_C(-4179602309674405628), INT64_C( 160804091787717168)), simde_mm512_set_epi64(INT64_C( 8837456289202868530), INT64_C( 5465736992144414155), INT64_C( 3831140603882252421), INT64_C(-3007023874774683317), INT64_C( 9173299967578168034), INT64_C( -599340352231112169), INT64_C( 7177916815745921600), INT64_C(-2503064843376095747)), simde_mm512_set_epi64(INT64_C( 4215460527910977538), INT64_C( 0), INT64_C( 0), INT64_C( 4629950784542158923), INT64_C( 2327799239051135586), INT64_C( 0), INT64_C( 4727954219193348096), INT64_C( 917302477097008)) }, { UINT8_C(107), simde_mm512_set_epi64(INT64_C(-2996801797497290918), INT64_C( 2110235908633979080), INT64_C( 6559787716707833245), INT64_C(-7698104879832611042), INT64_C( 1095594675397854353), INT64_C(-2831857028248186428), INT64_C( 3623067196209165824), INT64_C(-7194518998511455900)), simde_mm512_set_epi64(INT64_C(-3531361750574079048), INT64_C(-4970734089418384356), INT64_C(-7008197103615375110), INT64_C(-7978990794053850988), INT64_C( 6181737452074950303), INT64_C( 6394661210895290710), INT64_C( 2438888788002740180), INT64_C(-4048782482230560336)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 1801439990639675400), INT64_C( 1876030771317506200), INT64_C( 0), INT64_C( 360377053526492305), INT64_C( 0), INT64_C( 2324042582201603584), INT64_C(-8932954729898528480)) }, { UINT8_C(166), simde_mm512_set_epi64(INT64_C(-9013027400318787633), INT64_C( 7074827341732948634), INT64_C(-4349730772874059749), INT64_C( 563525163273351266), INT64_C( 158929141814345045), INT64_C( 2628778802773961023), INT64_C(-2205776103080558104), INT64_C( 2197088575617698243)), simde_mm512_set_epi64(INT64_C(-3751209977007462503), INT64_C(-6743993051839021458), INT64_C( 4839674398342728647), INT64_C( 1824613162744888999), INT64_C( 7142941570334212134), INT64_C(-1086382453544016972), INT64_C( 9022617812666091311), INT64_C(-6317857543126326056)), simde_mm512_set_epi64(INT64_C(-9015922487669520503), INT64_C( 0), INT64_C( 4837044293220761603), INT64_C( 0), INT64_C( 0), INT64_C( 2335191585971118388), INT64_C( 6999299776119439656), INT64_C( 0)) }, { UINT8_C(106), simde_mm512_set_epi64(INT64_C( 3407446613087781642), INT64_C(-5908963845954273231), INT64_C(-2368888154296893569), INT64_C( 6217195068479217897), INT64_C(-2678116496944625484), INT64_C(-5726717426998042368), INT64_C( 1417932582737550578), INT64_C( 3757265565845388703)), simde_mm512_set_epi64(INT64_C(-2331701078232790292), INT64_C(-5226637314456684394), INT64_C( 1362777598547825147), INT64_C(-2771771646272906398), INT64_C( 7669975856184215393), INT64_C(-8300197808250187331), INT64_C( 311382923636166944), INT64_C(-2137736571596434525)), simde_mm512_set_epi64(INT64_C( 0), INT64_C(-6523745533444423664), INT64_C( 1306048291191951739), INT64_C( 0), INT64_C( 5355107766639820832), INT64_C( 0), INT64_C( 45097422880), INT64_C( 0)) }, { UINT8_C( 98), simde_mm512_set_epi64(INT64_C(-9120454231799900960), INT64_C(-1428069393052184196), INT64_C( 5287115201302699815), INT64_C( 594305769309064149), INT64_C( 1818040010755772489), INT64_C( 5191632551175039949), INT64_C(-3171141735991582889), INT64_C(-6035806350115039225)), simde_mm512_set_epi64(INT64_C(-4043185984863798041), INT64_C(-2820807435363842669), INT64_C( 7833331097238305247), INT64_C( 314520227399119434), INT64_C(-1107462703642111951), INT64_C( 3200934455572316034), INT64_C(-6148962785270933864), INT64_C(-1328231493238063799)), simde_mm512_set_epi64(INT64_C( 0), INT64_C(-4032276290038922992), INT64_C( 5194217179798575367), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C(-9031874063504222704), INT64_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_and_epi64(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_and_si512(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C( 1762603276), INT32_C(-1316946536), INT32_C( -409636803), INT32_C(-1096492450), INT32_C( 1487241173), INT32_C(-1940071138), INT32_C( 1116126146), INT32_C( -916337722), INT32_C( 52488417), INT32_C( 1044081507), INT32_C(-1035184013), INT32_C(-1384518181), INT32_C(-1687535487), INT32_C(-1003450758), INT32_C( 131212491), INT32_C(-1389162000)), simde_mm512_set_epi32(INT32_C(-1226890085), INT32_C( 294090078), INT32_C( 1993383303), INT32_C( 2117895966), INT32_C( -579088274), INT32_C( 1336927221), INT32_C( 43647960), INT32_C( 1458493074), INT32_C( -558283956), INT32_C( 428402022), INT32_C( 974860288), INT32_C( -624769004), INT32_C(-1594949458), INT32_C( -55819000), INT32_C( 423209449), INT32_C( -449605661)), simde_mm512_set_epi32(INT32_C( 537862152), INT32_C( 293631256), INT32_C( 1720717317), INT32_C( 1042579998), INT32_C( 1478578244), INT32_C( 202162452), INT32_C( 42075072), INT32_C( 1080083074), INT32_C( 35668032), INT32_C( 403202402), INT32_C( 34082816), INT32_C(-2009020400), INT32_C(-2140536704), INT32_C(-1004534776), INT32_C( 17834185), INT32_C(-1523383840)) }, { simde_mm512_set_epi32(INT32_C(-1226448376), INT32_C( 1927095485), INT32_C( 1303264402), INT32_C( 926300607), INT32_C( 2116494628), INT32_C(-1100758331), INT32_C( 853434891), INT32_C( 1856581548), INT32_C( 1730450085), INT32_C(-1233336711), INT32_C( 410802607), INT32_C(-1458345357), INT32_C(-1414257532), INT32_C(-1908909941), INT32_C( 216004947), INT32_C(-1153616267)), simde_mm512_set_epi32(INT32_C(-1170743204), INT32_C( 1817377482), INT32_C( 934231558), INT32_C( -128119768), INT32_C( 674694491), INT32_C( 1902405145), INT32_C(-1438313883), INT32_C( -508947384), INT32_C( -334819615), INT32_C(-1499616800), INT32_C( -300643115), INT32_C(-1687918613), INT32_C( 204696129), INT32_C( -790552335), INT32_C( -837803722), INT32_C(-1569322126)), simde_mm512_set_epi32(INT32_C(-1306140664), INT32_C( 1615863944), INT32_C( 95304706), INT32_C( 806619176), INT32_C( 673645824), INT32_C( 811614721), INT32_C( 574882305), INT32_C( 1621623816), INT32_C( 1677722273), INT32_C(-1508081568), INT32_C( 135530629), INT32_C(-1996463517), INT32_C( 137371648), INT32_C(-2145386367), INT32_C( 202383634), INT32_C(-1573649808)) }, { simde_mm512_set_epi32(INT32_C( -967914791), INT32_C( 1028004547), INT32_C( 1106145634), INT32_C(-2126623640), INT32_C( -6485699), INT32_C( -256904631), INT32_C(-1220204919), INT32_C( 917835787), INT32_C( 1623427491), INT32_C( -222464855), INT32_C( -472879958), INT32_C( -762982604), INT32_C(-1085405824), INT32_C( 1812599478), INT32_C( -613988136), INT32_C( 621554720)), simde_mm512_set_epi32(INT32_C( 1377438428), INT32_C( 1201451322), INT32_C( 619734582), INT32_C(-1992526637), INT32_C( -868585296), INT32_C( 1439924174), INT32_C( 1656419868), INT32_C(-1390170089), INT32_C( 1058692615), INT32_C( -836785520), INT32_C(-1830636694), INT32_C( 554982917), INT32_C(-1514534388), INT32_C( -747673601), INT32_C( -692811287), INT32_C(-1799571639)), simde_mm512_set_epi32(INT32_C( 1107954392), INT32_C( 84152322), INT32_C( 14704674), INT32_C(-2126756800), INT32_C( -870842320), INT32_C( 1350791752), INT32_C( 570435080), INT32_C( 606146563), INT32_C( 537004035), INT32_C(-1038278528), INT32_C(-2101338070), INT32_C( 282116), INT32_C(-1526071296), INT32_C( 1074397878), INT32_C( -769390392), INT32_C( 67904512)) }, { simde_mm512_set_epi32(INT32_C(-1436950998), INT32_C(-1742059387), INT32_C( 1377677769), INT32_C(-2097193192), INT32_C( 1556973207), INT32_C( 58040738), INT32_C(-1875805492), INT32_C( -452882923), INT32_C(-2070651162), INT32_C(-1417594324), INT32_C( -990171302), INT32_C( 444234765), INT32_C( -651701039), INT32_C( -296257488), INT32_C( 1302666953), INT32_C( 1243668562)), simde_mm512_set_epi32(INT32_C( -228023402), INT32_C( 1737651280), INT32_C( 890037909), INT32_C( 822465192), INT32_C( 1525557148), INT32_C( 1672658803), INT32_C( 1808682106), INT32_C( 1316739447), INT32_C( 903813947), INT32_C( 221590740), INT32_C( 1668581990), INT32_C(-1092503304), INT32_C( 1369460064), INT32_C( 1353181098), INT32_C( 652356799), INT32_C( -684439573)), simde_mm512_set_epi32(INT32_C(-1572306430), INT32_C( 147456), INT32_C( 269262977), INT32_C( 348680), INT32_C( 1489766036), INT32_C( 53518626), INT32_C( 18504), INT32_C( 1140951061), INT32_C( 76814882), INT32_C( 151072772), INT32_C( 1081084482), INT32_C( 442513416), INT32_C( 1361069120), INT32_C( 1074225184), INT32_C( 77597833), INT32_C( 1109409858)) }, { simde_mm512_set_epi32(INT32_C(-1043054173), INT32_C( -396216896), INT32_C(-1145802326), INT32_C( -804000246), INT32_C( -145399860), INT32_C( -890427310), INT32_C( -401401997), INT32_C( 802016776), INT32_C( 1929893502), INT32_C( 73827769), INT32_C(-1971097644), INT32_C(-1831682098), INT32_C( 546355465), INT32_C( -199725455), INT32_C( 931867413), INT32_C(-1496909535)), simde_mm512_set_epi32(INT32_C(-1796636811), INT32_C(-1576316556), INT32_C( 1080356179), INT32_C(-1830141457), INT32_C(-1444813077), INT32_C( 1282909316), INT32_C( 814589845), INT32_C( 563073613), INT32_C( -161574330), INT32_C( 1115054069), INT32_C(-1922096352), INT32_C( 1283172543), INT32_C( 1028016376), INT32_C( 1652445236), INT32_C( 1602581177), INT32_C(-1986713581)), simde_mm512_set_epi32(INT32_C(-2134884063), INT32_C(-1610477248), INT32_C( 2384130), INT32_C(-1878900726), INT32_C(-1589557560), INT32_C( 1214586880), INT32_C( 536938257), INT32_C( 562938376), INT32_C( 1913033286), INT32_C( 6686129), INT32_C(-2013060352), INT32_C( 5417102), INT32_C( 536872968), INT32_C( 1612202032), INT32_C( 394338833), INT32_C(-2138822655)) }, { simde_mm512_set_epi32(INT32_C( 213329535), INT32_C( -522060385), INT32_C( -710729699), INT32_C( 911515198), INT32_C(-1475915599), INT32_C(-1846311235), INT32_C(-1624654725), INT32_C( -496488954), INT32_C(-2105881976), INT32_C( -863113580), INT32_C( -870973395), INT32_C(-2135017149), INT32_C( 1179500895), INT32_C( 102238134), INT32_C( 1890546920), INT32_C( 1651955955)), simde_mm512_set_epi32(INT32_C(-1460720620), INT32_C(-1283988079), INT32_C( 2139823103), INT32_C(-2058406982), INT32_C( -677653135), INT32_C( 526832430), INT32_C( 918576849), INT32_C(-1987609349), INT32_C( -819905099), INT32_C( 2043707434), INT32_C( 1005516756), INT32_C( 646673888), INT32_C( -792085599), INT32_C( 923333390), INT32_C( 549762390), INT32_C( 1063027034)), simde_mm512_set_epi32(INT32_C( 145171476), INT32_C(-1604196975), INT32_C( 1434648605), INT32_C( 71569978), INT32_C(-2147266511), INT32_C( 291668524), INT32_C( 369106001), INT32_C(-2147472382), INT32_C(-2111829888), INT32_C( 1216380928), INT32_C( 134610948), INT32_C( 9056576), INT32_C( 1078558977), INT32_C( 101188358), INT32_C( 545535040), INT32_C( 575963218)) }, { simde_mm512_set_epi32(INT32_C(-1614227898), INT32_C(-1072924213), INT32_C(-2048516742), INT32_C(-1735505047), INT32_C( 409846045), INT32_C( -501166301), INT32_C( 385735082), INT32_C(-1379445210), INT32_C( 1301699864), INT32_C( -237316746), INT32_C( -173549926), INT32_C(-1638681430), INT32_C( 1204990643), INT32_C( -623938106), INT32_C( 621663116), INT32_C(-2139715294)), simde_mm512_set_epi32(INT32_C( 1168648208), INT32_C( 679514223), INT32_C(-1255159953), INT32_C(-2016174737), INT32_C( -817087094), INT32_C( 1605116212), INT32_C( 684814447), INT32_C( 1274003485), INT32_C( 1881744290), INT32_C( 579021373), INT32_C( -658206082), INT32_C( 1152351107), INT32_C( -539739024), INT32_C( 1438387923), INT32_C( -569943597), INT32_C( -79238784)), simde_mm512_set_epi32(INT32_C( 92798976), INT32_C( 4171), INT32_C(-2061105814), INT32_C(-2138961559), INT32_C( 139198728), INT32_C( 1109393696), INT32_C( 13717546), INT32_C( 164052996), INT32_C( 1073742080), INT32_C( 545390644), INT32_C( -796618214), INT32_C( 67317890), INT32_C( 1204826160), INT32_C( 1351353538), INT32_C( 67457408), INT32_C(-2142861056)) }, { simde_mm512_set_epi32(INT32_C( -593800358), INT32_C( -124181915), INT32_C( 2110561848), INT32_C( 1255401496), INT32_C( -282522813), INT32_C( -286538666), INT32_C(-2011412362), INT32_C(-1839527164), INT32_C(-1330408299), INT32_C( 1769934774), INT32_C( -358481155), INT32_C( -123958768), INT32_C( 1676106379), INT32_C(-1305862521), INT32_C( 1797940107), INT32_C( 653525737)), simde_mm512_set_epi32(INT32_C(-1432835313), INT32_C( 1661538833), INT32_C( 1372337273), INT32_C(-1604084834), INT32_C( -921184393), INT32_C(-1395990480), INT32_C( 1258870002), INT32_C( -947895097), INT32_C(-1351881935), INT32_C( 99634026), INT32_C( 2033361976), INT32_C( 1231716550), INT32_C( -228173591), INT32_C(-1552770129), INT32_C( -338049103), INT32_C(-1393391283)), simde_mm512_set_epi32(INT32_C(-2003303670), INT32_C( 1611203073), INT32_C( 1372329016), INT32_C( 4425752), INT32_C( -922680509), INT32_C(-1395998704), INT32_C( 134758514), INT32_C(-2113929212), INT32_C(-1608286191), INT32_C( 24120098), INT32_C( 1747058744), INT32_C( 1208516608), INT32_C( 1650874505), INT32_C(-1574823289), INT32_C( 1795703681), INT32_C( 619708489)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_and_si512(test_vec[i].a, test_vec[i].b); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_and_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_and_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_and_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_and_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_and_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_and_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_and_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_and_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_and_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_and_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_and_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_and_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_and_si512) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/andnot.c000066400000000000000000004531131400333146700167120ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur */ #define SIMDE_TEST_X86_AVX512_INSN andnot #include #include #include static int test_simde_mm512_andnot_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C( 515723887), INT32_C( 1640697809), INT32_C(-1815268655), INT32_C( -855842079), INT32_C( -876731021), INT32_C( -422224087), INT32_C( 1402147089), INT32_C( 791567468), INT32_C( -405953943), INT32_C( 280958773), INT32_C( 359942894), INT32_C( -574064836), INT32_C( 1041426019), INT32_C( 457085316), INT32_C( 1591682265), INT32_C( 1681068921)), simde_mm512_set_epi32(INT32_C( 219659736), INT32_C( 983393088), INT32_C( 175097612), INT32_C(-1948389752), INT32_C(-1760046217), INT32_C( 996280401), INT32_C( -511102649), INT32_C(-1367446405), INT32_C(-1955017682), INT32_C(-1962774388), INT32_C( -112156455), INT32_C( 1625681413), INT32_C( -214953654), INT32_C(-1594240596), INT32_C( -837160883), INT32_C( 91929810)), simde_mm512_set_epi32(INT32_C( 16952720), INT32_C( 437542912), INT32_C( 136496396), INT32_C( 50400264), INT32_C( 335663620), INT32_C( 421658704), INT32_C(-1610080186), INT32_C(-2142232557), INT32_C( 137382918), INT32_C(-1962909560), INT32_C( -402612207), INT32_C( 539330561), INT32_C(-1054863096), INT32_C(-1597947864), INT32_C(-2147430396), INT32_C( 21539970)) }, { simde_mm512_set_epi32(INT32_C( -691442479), INT32_C(-1656332537), INT32_C( -736641091), INT32_C( 1498293216), INT32_C( -507651370), INT32_C( 1481766884), INT32_C(-1911092113), INT32_C( -872210414), INT32_C( 291047220), INT32_C(-1241987411), INT32_C( 1619041328), INT32_C( 1464413104), INT32_C(-1017310468), INT32_C( 1540491270), INT32_C( 2102275128), INT32_C(-1414382909)), simde_mm512_set_epi32(INT32_C( 242206574), INT32_C( 555720064), INT32_C( -659215600), INT32_C( 1975929957), INT32_C( 1131537123), INT32_C( 2072355897), INT32_C( 1377537047), INT32_C( 1623632095), INT32_C( 536506999), INT32_C(-1382727392), INT32_C( 37097013), INT32_C( 2004578493), INT32_C( 1803364246), INT32_C( 1342516983), INT32_C( 514234840), INT32_C(-1522858319)), simde_mm512_set_epi32(INT32_C( 136741678), INT32_C( 538542208), INT32_C( 144703488), INT32_C( 612387845), INT32_C( 37756961), INT32_C( 595853337), INT32_C( 1342734352), INT32_C( 549750989), INT32_C( 245526595), INT32_C( 134545664), INT32_C( 37093893), INT32_C( 540168205), INT32_C( 673195266), INT32_C( 337649), INT32_C( 44077504), INT32_C( 67699760)) }, { simde_mm512_set_epi32(INT32_C( 835536002), INT32_C( -63027427), INT32_C( 2017135186), INT32_C(-1844829768), INT32_C( 936597093), INT32_C( -389163916), INT32_C(-1786076372), INT32_C( 62625566), INT32_C(-1459727459), INT32_C( 1125674521), INT32_C(-1286537639), INT32_C( 550088134), INT32_C( -31520277), INT32_C(-1814664190), INT32_C( 1588224923), INT32_C( 1901241906)), simde_mm512_set_epi32(INT32_C( -684209907), INT32_C( 1116413094), INT32_C( -29612798), INT32_C(-1906935505), INT32_C( 1879010472), INT32_C( -984076172), INT32_C( -987963932), INT32_C(-1705792694), INT32_C( 1395521155), INT32_C( 2062716504), INT32_C( 1645135174), INT32_C( 273600119), INT32_C( -650211201), INT32_C( 1039685180), INT32_C(-1680417560), INT32_C( -160711201)), simde_mm512_set_epi32(INT32_C( -969766643), INT32_C( 42014882), INT32_C(-2046548736), INT32_C( 206854151), INT32_C( 1210853512), INT32_C( 84942848), INT32_C( 1075076288), INT32_C(-1740625856), INT32_C( 1392616450), INT32_C( 954368064), INT32_C( 1074659590), INT32_C( 268845617), INT32_C( 18911252), INT32_C( 740823612), INT32_C(-2125161376), INT32_C(-2044126771)) }, { simde_mm512_set_epi32(INT32_C( -911319633), INT32_C(-1035947605), INT32_C( -14347010), INT32_C( 135240154), INT32_C( 1039097026), INT32_C(-1325726567), INT32_C( 1814577462), INT32_C( -309546152), INT32_C( 2107794809), INT32_C( -690752206), INT32_C(-1567183976), INT32_C( 1570875131), INT32_C( -359037430), INT32_C( 1064726494), INT32_C( -305221103), INT32_C( 2039553475)), simde_mm512_set_epi32(INT32_C( -358609490), INT32_C( 1748558231), INT32_C( 769947846), INT32_C(-2114787166), INT32_C( 1221765938), INT32_C(-1846472677), INT32_C( 893676657), INT32_C(-2056337544), INT32_C(-1125244927), INT32_C(-2123257127), INT32_C( 1395407144), INT32_C(-2100494303), INT32_C( 123622128), INT32_C( -234312093), INT32_C( 1320504606), INT32_C( -696459867)), simde_mm512_set_epi32(INT32_C( 570425344), INT32_C( 674775060), INT32_C( 12609536), INT32_C(-2114953184), INT32_C( 1074832176), INT32_C( 16781314), INT32_C( 289680449), INT32_C( 6439968), INT32_C(-2142502912), INT32_C( 18877129), INT32_C( 1361580064), INT32_C(-2108948480), INT32_C( 88494320), INT32_C(-1073184735), INT32_C( 36785422), INT32_C(-2039693276)) }, { simde_mm512_set_epi32(INT32_C( 1741169869), INT32_C(-1806166644), INT32_C( 1030404360), INT32_C( 1645919232), INT32_C( -724495967), INT32_C( 1251263729), INT32_C( -769398486), INT32_C(-1951408118), INT32_C( 1006137744), INT32_C( -650052668), INT32_C( 1803988670), INT32_C( -565766270), INT32_C(-2075332822), INT32_C( -77783473), INT32_C( 1442895719), INT32_C( -423885068)), simde_mm512_set_epi32(INT32_C(-1467349800), INT32_C(-1486916034), INT32_C( 580711779), INT32_C( 1504148541), INT32_C( 661197291), INT32_C( 2016703871), INT32_C( 459937445), INT32_C( 1081922115), INT32_C( 93168137), INT32_C( -744509287), INT32_C( -84767472), INT32_C( 1535078904), INT32_C( 1804568444), INT32_C(-1641570308), INT32_C( 1307677448), INT32_C( -156993467)), simde_mm512_set_epi32(INT32_C(-2013134832), INT32_C( 587686450), INT32_C( 43271267), INT32_C( 430260285), INT32_C( 589824074), INT32_C( 807416078), INT32_C( 155719301), INT32_C( 1078989377), INT32_C( 67469833), INT32_C( 43974681), INT32_C(-1871707904), INT32_C( 20471928), INT32_C( 1803747412), INT32_C( 69370288), INT32_C( 166789128), INT32_C( 268466177)) }, { simde_mm512_set_epi32(INT32_C( -789590264), INT32_C( 1747530260), INT32_C( 250254813), INT32_C( -46824160), INT32_C( 1521185343), INT32_C( 1710396447), INT32_C( -401960034), INT32_C( 376331638), INT32_C( -481899788), INT32_C( 951540577), INT32_C(-1886694025), INT32_C( -615462627), INT32_C(-1246126101), INT32_C( 1628361415), INT32_C( 1197988194), INT32_C(-1740462923)), simde_mm512_set_epi32(INT32_C( 1235841465), INT32_C(-1524332124), INT32_C( 1158299501), INT32_C(-2030663913), INT32_C( -368124005), INT32_C( -216689066), INT32_C( 2049678955), INT32_C(-1811053975), INT32_C( -579248849), INT32_C( 817648154), INT32_C( 1351147076), INT32_C( -248769414), INT32_C( 1542937557), INT32_C(-1429188342), INT32_C( 753897242), INT32_C( -407543559)), simde_mm512_set_epi32(INT32_C( 151007409), INT32_C(-2063300192), INT32_C( 1090529824), INT32_C( 46268951), INT32_C(-1610314368), INT32_C(-1845165504), INT32_C( 304155745), INT32_C(-2147384823), INT32_C( 473502987), INT32_C( 524314), INT32_C( 1342212608), INT32_C( 539759714), INT32_C( 1246117908), INT32_C(-1966079736), INT32_C( 680003096), INT32_C( 1739931720)) }, { simde_mm512_set_epi32(INT32_C( 1871269268), INT32_C( 408476277), INT32_C( 620349445), INT32_C( 85656022), INT32_C( 530242315), INT32_C( 1600939321), INT32_C( 1166499662), INT32_C( 550456559), INT32_C( 1205553840), INT32_C( -507718293), INT32_C( -629410605), INT32_C(-1400491933), INT32_C(-1740280079), INT32_C( 470828561), INT32_C( 710611826), INT32_C( 1460766627)), simde_mm512_set_epi32(INT32_C( 132567711), INT32_C( -504432561), INT32_C( 1784336368), INT32_C(-1195419261), INT32_C(-1432068840), INT32_C( -756951336), INT32_C( 519218456), INT32_C( 2068445443), INT32_C( -769032976), INT32_C(-1464370595), INT32_C( -636201129), INT32_C( 798298919), INT32_C( 605141360), INT32_C( 1690763202), INT32_C( 743563485), INT32_C( 1615889032)), simde_mm512_set_epi32(INT32_C( 6722059), INT32_C( -509206518), INT32_C( 1241647088), INT32_C(-1197189119), INT32_C(-1608252400), INT32_C(-2138991936), INT32_C( 443588624), INT32_C( 1526771968), INT32_C(-1876937664), INT32_C( 134425108), INT32_C( 262916), INT32_C( 51446532), INT32_C( 605065472), INT32_C( 1623636418), INT32_C( 67166349), INT32_C( 541065224)) }, { simde_mm512_set_epi32(INT32_C( 1287269628), INT32_C( 1003736038), INT32_C( 977850641), INT32_C(-1038923525), INT32_C( -628842024), INT32_C( 1597060388), INT32_C( -643406365), INT32_C(-1390651863), INT32_C( 1433162166), INT32_C( -27649596), INT32_C( -695421854), INT32_C( 1977918902), INT32_C(-1118619506), INT32_C( 218268934), INT32_C( 602753386), INT32_C( -663684258)), simde_mm512_set_epi32(INT32_C( 238738926), INT32_C( 1501256933), INT32_C( -668514921), INT32_C( 178997567), INT32_C( 618897994), INT32_C(-1305584804), INT32_C( 287401445), INT32_C( -682321436), INT32_C(-1248279406), INT32_C(-1232466621), INT32_C( 1932263578), INT32_C( 1672045836), INT32_C( -31634555), INT32_C( -429030840), INT32_C( 1478948841), INT32_C( -636575791)), simde_mm512_set_epi32(INT32_C( 33607938), INT32_C( 1076368385), INT32_C(-1071184762), INT32_C( 145228036), INT32_C( 610468354), INT32_C(-1609676200), INT32_C( 65540), INT32_C( 1379961284), INT32_C(-1601141760), INT32_C( 8389635), INT32_C( 555745432), INT32_C( 34160648), INT32_C( 1108099841), INT32_C( -496172472), INT32_C( 1476572801), INT32_C( 34473089)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_castps_si512(simde_mm512_andnot_ps(simde_mm512_castsi512_ps(test_vec[i].a), simde_mm512_castsi512_ps(test_vec[i].b))); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_andnot_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C(-5692392796256408556), INT64_C( 6556277497990144923), INT64_C(-8451768093244871108), INT64_C( 2502789693644361692), INT64_C( 1621880469938104082), INT64_C(-7297255235572331483), INT64_C(-2352677665930074905), INT64_C( 4911158619134204312)), simde_mm512_set_epi64(INT64_C(-2565389980846007780), INT64_C( 3404730604833389160), INT64_C( 1179026943527716274), INT64_C(-2906973067026822223), INT64_C( 7260818647608791158), INT64_C( 5579870493016706466), INT64_C( 3863525595432901356), INT64_C(-6057345860275027490)), simde_mm512_set_epi64(INT64_C( 5504912800215142920), INT64_C( 2666140128227367008), INT64_C( 1173388089487589762), INT64_C(-3098395176816541663), INT64_C( 6935978858541748324), INT64_C( 4991411844055941506), INT64_C( 2343560720521648648), INT64_C(-6068587301518688186)) }, { simde_mm512_set_epi64(INT64_C(-2759528052506956708), INT64_C(-5575162278173961328), INT64_C(-8694367187788105741), INT64_C(-8990346118631710119), INT64_C( 4592063463963295950), INT64_C( 8368804684692221223), INT64_C(-6476206435262682842), INT64_C(-8876450871330607726)), simde_mm512_set_epi64(INT64_C(-6737293364691021911), INT64_C(-4204449908398867590), INT64_C( 160841008688998741), INT64_C(-6365408201349652038), INT64_C( 2001896472947132682), INT64_C( -99471897253479533), INT64_C(-1326129419939093389), INT64_C(-2924053864540399741)), simde_mm512_set_epi64(INT64_C( 2450046296219059105), INT64_C( 4973873968629295210), INT64_C( 11267795499616260), INT64_C( 2630102325595865506), INT64_C( 18051886326876416), INT64_C(-8458876851257732464), INT64_C( 5296233507537044561), INT64_C( 5992932110406914561)) }, { simde_mm512_set_epi64(INT64_C(-7457051575750248602), INT64_C( 6070335147558558873), INT64_C(-7490477224728001543), INT64_C( 1279317055657379478), INT64_C( 8751242136386772213), INT64_C( 637231009559692595), INT64_C(-7114327187130069406), INT64_C( 1391778837665435621)), simde_mm512_set_epi64(INT64_C( 6308740259462318802), INT64_C(-9185391234602091403), INT64_C( 7626881538428569222), INT64_C(-3068069010762250319), INT64_C(-4951576282750300305), INT64_C( 128509774881067912), INT64_C( 7892251767542575626), INT64_C(-8930995426443097321)), simde_mm512_set_epi64(INT64_C( 5119472124868954256), INT64_C(-9187125192365432732), INT64_C( 7048133801267954694), INT64_C(-4311066996174143199), INT64_C(-9076908839077919990), INT64_C( 74319038222733960), INT64_C( 6954164901648707592), INT64_C(-8931156479129911278)) }, { simde_mm512_set_epi64(INT64_C( 6964163421595280406), INT64_C( 8751199602933822917), INT64_C( 3889132740347846858), INT64_C(-7126879543636862431), INT64_C(-7513660375211080284), INT64_C( 6453129860776144209), INT64_C(-5544576080495062479), INT64_C( 5873331717169095384)), simde_mm512_set_epi64(INT64_C( 8830709936142460331), INT64_C(-9018958152160609695), INT64_C( 8077330800987365186), INT64_C(-8496831583043834543), INT64_C(-8535629658492460138), INT64_C(-7680526102244966263), INT64_C( -372932882462668779), INT64_C( 6329513109562115746)), simde_mm512_set_epi64(INT64_C( 1875839696979362217), INT64_C(-9042040235334956512), INT64_C( 4611686019604422400), INT64_C( 145522704699106640), INT64_C( 576814847044362770), INT64_C(-8907768133275565944), INT64_C( 5247259266967930884), INT64_C( 456185876494468130)) }, { simde_mm512_set_epi64(INT64_C( -351666990455047830), INT64_C(-7399285389685964954), INT64_C(-5908952440536913792), INT64_C( -611732173843171755), INT64_C( 7999973001790565510), INT64_C(-8075898444541975424), INT64_C( 5770350522878101247), INT64_C( 1116848091668783433)), simde_mm512_set_epi64(INT64_C( 2217552425319516429), INT64_C(-8721047939211270856), INT64_C(-2880324325532209431), INT64_C( -187231364083775137), INT64_C( 6742854000402878536), INT64_C( 168773737674717), INT64_C( 3418235066721438872), INT64_C( 4202828047673997422)), simde_mm512_set_epi64(INT64_C( 342365941483028485), INT64_C( 479783009225555992), INT64_C( 5764607798056863337), INT64_C( 604680821040318730), INT64_C( 1194123438370115656), INT64_C( 26768921351005), INT64_C( 3413729087268849664), INT64_C( 3458800331700312102)) }, { simde_mm512_set_epi64(INT64_C(-2132909336669479608), INT64_C(-1158827795013308041), INT64_C(-7670914575902882420), INT64_C( -69696623451151043), INT64_C(-4047902191338288971), INT64_C( 7092767718101885012), INT64_C( 5934909912424448575), INT64_C( 5411709750270769968)), simde_mm512_set_epi64(INT64_C(-7875865474019974757), INT64_C(-3285041077981983127), INT64_C( 8063284926890959108), INT64_C(-3700459330126222884), INT64_C(-7671356082612531796), INT64_C( 1792383659764879933), INT64_C( 2583453571264272321), INT64_C( 8675197907294370872)), simde_mm512_set_epi64(INT64_C( 1193748656575226003), INT64_C( 1152974871187164680), INT64_C( 7666393384072290304), INT64_C( 46461582121208000), INT64_C( 1155463611999061256), INT64_C( 1770267941057597993), INT64_C( 2414567396337178048), INT64_C( 3486980733385704456)) }, { simde_mm512_set_epi64(INT64_C(-3578776133799908286), INT64_C(-1505161927362377530), INT64_C( 1984257760933558326), INT64_C( -235993280127523291), INT64_C(-5471198518359697501), INT64_C(-3736915368061275681), INT64_C(-2239211533422890096), INT64_C(-3284418263843820488)), simde_mm512_set_epi64(INT64_C(-2978941464173404520), INT64_C( 4582889970668771380), INT64_C( -557407531320217043), INT64_C( 5386308122944286215), INT64_C(-2433611387892931894), INT64_C( 7741810302662188301), INT64_C( 18824623009495704), INT64_C( 3999273364541981338)), simde_mm512_set_epi64(INT64_C( 1200223062730085016), INT64_C( 1477497354446704688), INT64_C(-2287124905805331959), INT64_C( 162132485929502722), INT64_C( 5343540817909325896), INT64_C( 2544569189669112832), INT64_C( 633336187848712), INT64_C( 2702160884594393730)) }, { simde_mm512_set_epi64(INT64_C( 352684271852599798), INT64_C( 4911474499221167587), INT64_C( 1508056965830938497), INT64_C( 3074813921141815339), INT64_C( 7701628738251481990), INT64_C( -466066103765916190), INT64_C( 8562974168142071295), INT64_C( -919355185316238533)), simde_mm512_set_epi64(INT64_C( 4326901039471149930), INT64_C(-7137503476184318358), INT64_C(-1078077923693263341), INT64_C(-1859594942180658021), INT64_C(-4053912759805256064), INT64_C(-4136267192341554803), INT64_C( 7711878059533707111), INT64_C(-6181901304080395815)), simde_mm512_set_epi64(INT64_C( 4037478715378254344), INT64_C(-7434758645930761720), INT64_C(-2233710626472820718), INT64_C(-4318952040280619888), INT64_C(-8855183721016321024), INT64_C( 437135751054956557), INT64_C( 649099009830593024), INT64_C( 576514078768369856)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_castpd_si512(simde_mm512_andnot_pd(simde_mm512_castsi512_pd(test_vec[i].a), simde_mm512_castsi512_pd(test_vec[i].b))); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_andnot_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask16 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C( 483765022), INT32_C(-1234873154), INT32_C(-1289658932), INT32_C( 1557667178), INT32_C( -573006378), INT32_C( -844585804), INT32_C( 908677468), INT32_C( 120945929), INT32_C(-1595338087), INT32_C(-1433288415), INT32_C( 1272415402), INT32_C( 2052605464), INT32_C(-1185243420), INT32_C( 696776161), INT32_C( 617510437), INT32_C( 1274220393)), UINT16_C(33954), simde_mm512_set_epi32(INT32_C( 1145617415), INT32_C( -605546679), INT32_C( 2927077), INT32_C( -19849762), INT32_C(-1597262180), INT32_C( -867043590), INT32_C( -107439489), INT32_C( 1287210357), INT32_C( 1092392250), INT32_C( 1062404217), INT32_C( -979680700), INT32_C( 1129202470), INT32_C( 1479969823), INT32_C( -738882529), INT32_C( 1249939660), INT32_C( -548556138)), simde_mm512_set_epi32(INT32_C( 283318882), INT32_C( 1846644474), INT32_C( 2020741558), INT32_C( 2084726692), INT32_C(-1625067961), INT32_C( 1808817126), INT32_C( 188488265), INT32_C( -493292109), INT32_C(-1012406283), INT32_C( 2120995640), INT32_C(-1978262848), INT32_C( -210107724), INT32_C( 789544495), INT32_C( -10089859), INT32_C( -531570606), INT32_C( 1286299547)), simde_mm512_set_epi32(INT32_C( 279118432), INT32_C(-1234873154), INT32_C(-1289658932), INT32_C( 1557667178), INT32_C( -573006378), INT32_C( 595593476), INT32_C( 908677468), INT32_C( 120945929), INT32_C(-2103221563), INT32_C(-1433288415), INT32_C( 168040576), INT32_C( 2052605464), INT32_C(-1185243420), INT32_C( 696776161), INT32_C(-1605345262), INT32_C( 1274220393)) }, { simde_mm512_set_epi32(INT32_C( -281178768), INT32_C( -360418194), INT32_C( 1198549209), INT32_C( -896335694), INT32_C( 1796051299), INT32_C( -602464105), INT32_C( 1096879395), INT32_C( 2101844446), INT32_C( 1483513958), INT32_C( 55530807), INT32_C(-1589480307), INT32_C( -48228318), INT32_C( 889897511), INT32_C( 1575441246), INT32_C(-1726327647), INT32_C( 657269965)), UINT16_C(34646), simde_mm512_set_epi32(INT32_C( 1170712187), INT32_C( 1365513540), INT32_C( -159824212), INT32_C( 976500494), INT32_C( -2253502), INT32_C( 1424815879), INT32_C( 1340211205), INT32_C( 478098159), INT32_C( 545970493), INT32_C(-1738506699), INT32_C( 725751947), INT32_C( 876157308), INT32_C( 729412496), INT32_C( 1316518940), INT32_C( 479437804), INT32_C( 1749631626)), simde_mm512_set_epi32(INT32_C( -544439732), INT32_C( 514265282), INT32_C(-1783487008), INT32_C(-1073881913), INT32_C( -917759499), INT32_C( 721599990), INT32_C( 1403076580), INT32_C( -867638009), INT32_C(-2070564498), INT32_C( 1494227565), INT32_C( 156325221), INT32_C( -117354474), INT32_C(-2029550992), INT32_C( 1184377155), INT32_C( -325424585), INT32_C( -563054056)), simde_mm512_set_epi32(INT32_C(-1710751740), INT32_C( -360418194), INT32_C( 1198549209), INT32_C( -896335694), INT32_C( 1796051299), INT32_C( 721553648), INT32_C( 268517344), INT32_C(-1073691392), INT32_C( 1483513958), INT32_C( 1091567688), INT32_C(-1589480307), INT32_C( -922730494), INT32_C( 889897511), INT32_C( 8388931), INT32_C( -536328173), INT32_C( 657269965)) }, { simde_mm512_set_epi32(INT32_C(-1745677982), INT32_C( 235931267), INT32_C( -555741923), INT32_C( 150463911), INT32_C( 354747494), INT32_C( 2006985747), INT32_C( 1517200768), INT32_C( -149674742), INT32_C(-1301892689), INT32_C( 1164273534), INT32_C( -519614566), INT32_C( 1518672842), INT32_C(-1430542782), INT32_C( -567985198), INT32_C( 1793594874), INT32_C( 1766364533)), UINT16_C(51458), simde_mm512_set_epi32(INT32_C(-1697411653), INT32_C( 213103619), INT32_C( 1166379858), INT32_C( 530625194), INT32_C( 1706895557), INT32_C(-1311465088), INT32_C( 793729023), INT32_C(-1062948513), INT32_C( -58027177), INT32_C( -215831346), INT32_C(-1081872765), INT32_C( 617218322), INT32_C( 1703489303), INT32_C( 1228468220), INT32_C( 705631662), INT32_C( 1003062693)), simde_mm512_set_epi32(INT32_C(-1197760733), INT32_C(-1777870117), INT32_C( 1151957666), INT32_C( -467243461), INT32_C( 1044840108), INT32_C( 1467862627), INT32_C( 340861518), INT32_C( -683495543), INT32_C( -171219649), INT32_C(-1277374003), INT32_C(-2049184175), INT32_C( -804992531), INT32_C( 1254613706), INT32_C( -484210109), INT32_C( -976973176), INT32_C( 768220545)), simde_mm512_set_epi32(INT32_C( 537399808), INT32_C(-1845212456), INT32_C( -555741923), INT32_C( 150463911), INT32_C( 440587816), INT32_C( 2006985747), INT32_C( 1517200768), INT32_C( 390205056), INT32_C(-1301892689), INT32_C( 1164273534), INT32_C( -519614566), INT32_C( 1518672842), INT32_C(-1430542782), INT32_C( -567985198), INT32_C( -977239552), INT32_C( 1766364533)) }, { simde_mm512_set_epi32(INT32_C( 1636500168), INT32_C( 444177967), INT32_C(-1663266514), INT32_C( 191092965), INT32_C( 488118829), INT32_C(-1542228246), INT32_C(-1543977108), INT32_C(-1747326233), INT32_C( 472323781), INT32_C( 181690416), INT32_C( -8111931), INT32_C(-1512462189), INT32_C(-1412708648), INT32_C( -857864914), INT32_C(-1610668993), INT32_C( 2003858110)), UINT16_C( 5589), simde_mm512_set_epi32(INT32_C( -283174658), INT32_C( 170838247), INT32_C( -393103783), INT32_C( 2067132417), INT32_C( -418400070), INT32_C(-1518152549), INT32_C( 1910825371), INT32_C(-1243038545), INT32_C( 116520479), INT32_C( -366505216), INT32_C( 1914112492), INT32_C( 1911296968), INT32_C( 2113218059), INT32_C( -692180631), INT32_C(-1020362892), INT32_C( -633211439)), simde_mm512_set_epi32(INT32_C( 1184440056), INT32_C( 166652038), INT32_C(-1574005475), INT32_C( 2085250974), INT32_C(-1914483545), INT32_C( -801496013), INT32_C( 1887253581), INT32_C(-1389414117), INT32_C(-1991582465), INT32_C( 878735212), INT32_C(-1594175370), INT32_C( 2077658842), INT32_C(-1116765072), INT32_C( 1279728229), INT32_C( 1087544376), INT32_C( 2038214643)), simde_mm512_set_epi32(INT32_C( 1636500168), INT32_C( 444177967), INT32_C(-1663266514), INT32_C( 71960478), INT32_C( 488118829), INT32_C( 1345855520), INT32_C(-1543977108), INT32_C( 134676752), INT32_C(-1995831072), INT32_C( 339766380), INT32_C( -8111931), INT32_C( 168987666), INT32_C(-1412708648), INT32_C( 138482180), INT32_C(-1610668993), INT32_C( 557582882)) }, { simde_mm512_set_epi32(INT32_C( 551147024), INT32_C( -687338198), INT32_C( 60918053), INT32_C( 1437206085), INT32_C( 434041201), INT32_C( 1422808900), INT32_C( 419480808), INT32_C(-1939817409), INT32_C(-1683817642), INT32_C( -409888460), INT32_C( 1718430638), INT32_C( 1457046604), INT32_C( 734344028), INT32_C( 175091099), INT32_C( 770584551), INT32_C( -95488435)), UINT16_C(29324), simde_mm512_set_epi32(INT32_C( 1939419432), INT32_C( -691029505), INT32_C( -442395497), INT32_C( -427009027), INT32_C( 817522174), INT32_C( 8776211), INT32_C( 1606933870), INT32_C( -913009701), INT32_C(-1219423042), INT32_C( 450853660), INT32_C( 761339041), INT32_C( 889962544), INT32_C(-1736069360), INT32_C(-1763810886), INT32_C(-1763494181), INT32_C( 1322133292)), simde_mm512_set_epi32(INT32_C( 114683937), INT32_C( 1592723028), INT32_C( 623286176), INT32_C(-1573004789), INT32_C( 386412089), INT32_C( 1236627295), INT32_C( -815669616), INT32_C( 2140872084), INT32_C(-1844875837), INT32_C( 266739419), INT32_C(-1210833034), INT32_C( 1948981056), INT32_C( -293676893), INT32_C( 1361522457), INT32_C( 417503278), INT32_C( 633831284)), simde_mm512_set_epi32(INT32_C( 551147024), INT32_C( 136316416), INT32_C( 394016), INT32_C( 3244034), INT32_C( 434041201), INT32_C( 1422808900), INT32_C(-2145385840), INT32_C(-1939817409), INT32_C( 551233), INT32_C( -409888460), INT32_C( 1718430638), INT32_C( 1457046604), INT32_C( 1719292067), INT32_C( 1092685313), INT32_C( 770584551), INT32_C( -95488435)) }, { simde_mm512_set_epi32(INT32_C(-1371022440), INT32_C( 1457704499), INT32_C( -431597639), INT32_C(-1022830061), INT32_C( 36727871), INT32_C( 132345530), INT32_C(-1160653220), INT32_C( 1075044178), INT32_C( 1947162433), INT32_C( 484643153), INT32_C(-1413771472), INT32_C( -151443305), INT32_C( -82344071), INT32_C(-1396164880), INT32_C( 775295095), INT32_C( 1585972112)), UINT16_C(54244), simde_mm512_set_epi32(INT32_C( 1350970412), INT32_C(-1442308200), INT32_C( 1774467796), INT32_C( -258916798), INT32_C(-1518028161), INT32_C( 1215654276), INT32_C(-1158758506), INT32_C(-1884048450), INT32_C( -996858784), INT32_C( 1572275854), INT32_C( -61363356), INT32_C( 71635930), INT32_C( 890553866), INT32_C(-1657029576), INT32_C( 875900884), INT32_C( 232674574)), simde_mm512_set_epi32(INT32_C( -559322868), INT32_C( 26562494), INT32_C( 1556236736), INT32_C( 144590511), INT32_C( 2137277580), INT32_C(-1485572616), INT32_C( 664308651), INT32_C( 525825403), INT32_C( 1235000793), INT32_C( 818058128), INT32_C( 1639942075), INT32_C( 1363996226), INT32_C(-1688385601), INT32_C( 521315224), INT32_C( -495140458), INT32_C( 2110266874)), simde_mm512_set_epi32(INT32_C(-1909899008), INT32_C( 26558502), INT32_C( -431597639), INT32_C( 135151789), INT32_C( 36727871), INT32_C( 132345530), INT32_C( 84934697), INT32_C( 272909377), INT32_C( 151558553), INT32_C( 541102352), INT32_C( 27788443), INT32_C( -151443305), INT32_C( -82344071), INT32_C( 33555328), INT32_C( 775295095), INT32_C( 1585972112)) }, { simde_mm512_set_epi32(INT32_C(-1445633201), INT32_C(-1516803416), INT32_C( 2047415330), INT32_C( 756009385), INT32_C( 795635255), INT32_C( 735619934), INT32_C(-1886661005), INT32_C( 1006199392), INT32_C( -253641367), INT32_C( 505896362), INT32_C( 377279653), INT32_C( 782384760), INT32_C(-2053863520), INT32_C( 173648830), INT32_C(-1212193602), INT32_C( 646275887)), UINT16_C( 3833), simde_mm512_set_epi32(INT32_C(-1717413045), INT32_C( 37772527), INT32_C( 997132272), INT32_C( 1212574322), INT32_C( -50264086), INT32_C( 1583086284), INT32_C(-1387426254), INT32_C( 542967980), INT32_C( 321849276), INT32_C( 2124033808), INT32_C( 1752461294), INT32_C(-1726583281), INT32_C( -438403938), INT32_C(-1226147069), INT32_C( 1033013441), INT32_C(-1845989576)), simde_mm512_set_epi32(INT32_C( -928885408), INT32_C( 1847851352), INT32_C(-1563646145), INT32_C(-1610113698), INT32_C( -632488883), INT32_C( -579742459), INT32_C( 505595497), INT32_C( 1976491564), INT32_C( 1357643236), INT32_C( -210153251), INT32_C(-1628647323), INT32_C(-1816082231), INT32_C( 1251469965), INT32_C(-2146681250), INT32_C( 1797992596), INT32_C(-1790080236)), simde_mm512_set_epi32(INT32_C(-1445633201), INT32_C(-1516803416), INT32_C( 2047415330), INT32_C( 756009385), INT32_C( 38598661), INT32_C(-2128607999), INT32_C( 304234569), INT32_C( 1006199392), INT32_C( 1086388288), INT32_C(-2124328755), INT32_C(-1769435135), INT32_C( 46170304), INT32_C( 167870977), INT32_C( 173648830), INT32_C(-1212193602), INT32_C( 67469316)) }, { simde_mm512_set_epi32(INT32_C( -995130208), INT32_C(-1764606453), INT32_C( -537517512), INT32_C( 1451556674), INT32_C(-2097109774), INT32_C( 404626699), INT32_C( 1345130097), INT32_C( 1798816735), INT32_C( 621374452), INT32_C( 359481722), INT32_C( -121162344), INT32_C(-1051201334), INT32_C( 1869160778), INT32_C( -582139350), INT32_C( 314118274), INT32_C(-1141503487)), UINT16_C(47272), simde_mm512_set_epi32(INT32_C( 1226690931), INT32_C( 775179034), INT32_C(-2065746086), INT32_C( 399353184), INT32_C( 328691430), INT32_C(-1594470117), INT32_C(-1552077762), INT32_C( 88628502), INT32_C( 772052572), INT32_C( 1376748436), INT32_C(-1273427356), INT32_C( 738624056), INT32_C( 647794952), INT32_C( 804576006), INT32_C( 1968895876), INT32_C( 505069248)), simde_mm512_set_epi32(INT32_C(-1066067632), INT32_C( -638799863), INT32_C(-1513539525), INT32_C(-1037105416), INT32_C( 605705140), INT32_C(-2097483540), INT32_C( 62474077), INT32_C( 2107466991), INT32_C( 1856531921), INT32_C( 781853938), INT32_C( 1472528720), INT32_C( -275942665), INT32_C( 990137373), INT32_C( 1633665081), INT32_C( 480667256), INT32_C( -831347442)), simde_mm512_set_epi32(INT32_C(-2141190144), INT32_C(-1764606453), INT32_C( 553650209), INT32_C(-1071512936), INT32_C( 604508432), INT32_C( 404626699), INT32_C( 1345130097), INT32_C( 1798816735), INT32_C( 1084778881), INT32_C( 359481722), INT32_C( 1136918800), INT32_C(-1051201334), INT32_C( 419449877), INT32_C( -582139350), INT32_C( 314118274), INT32_C(-1141503487)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_castps_si512(simde_mm512_mask_andnot_ps(simde_mm512_castsi512_ps(test_vec[i].src), test_vec[i].k, simde_mm512_castsi512_ps(test_vec[i].a), simde_mm512_castsi512_ps(test_vec[i].b))); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_andnot_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { UINT16_C(41898), simde_mm512_set_epi32(INT32_C(-1595502197), INT32_C(-1527248547), INT32_C( 1075363080), INT32_C(-1963744626), INT32_C( -841874568), INT32_C( 1348974030), INT32_C( 932258327), INT32_C(-1638556215), INT32_C( -69119366), INT32_C(-1406064931), INT32_C( -198162021), INT32_C( -674249080), INT32_C( -972410055), INT32_C(-1112978451), INT32_C( -141156932), INT32_C(-1950860528)), simde_mm512_set_epi32(INT32_C( -211589013), INT32_C( 652089670), INT32_C( 1378847800), INT32_C( 904957231), INT32_C(-1966320781), INT32_C(-1079187730), INT32_C( 1733727399), INT32_C(-1452272768), INT32_C(-1073785858), INT32_C( -63492051), INT32_C( 1043637479), INT32_C(-1013855000), INT32_C( 942467481), INT32_C(-1080366077), INT32_C( 642537593), INT32_C( 818463971)), simde_mm512_set_epi32(INT32_C( 1392601184), INT32_C( 0), INT32_C( 304545840), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1078367392), INT32_C( 555745792), INT32_C( 69075332), INT32_C( 0), INT32_C( 168076388), INT32_C( 0), INT32_C( 941934720), INT32_C( 0), INT32_C( 4735041), INT32_C( 0)) }, { UINT16_C(54776), simde_mm512_set_epi32(INT32_C(-1423327830), INT32_C( 463002536), INT32_C( 1170361638), INT32_C( 1439896493), INT32_C( -881601279), INT32_C( 439454207), INT32_C( 642703998), INT32_C( 1761947183), INT32_C( 1210383154), INT32_C( -138151523), INT32_C( 263888472), INT32_C( 2142193967), INT32_C( -741822666), INT32_C( 755920794), INT32_C(-1972313252), INT32_C(-1912811499)), simde_mm512_set_epi32(INT32_C(-1115388021), INT32_C( 769964125), INT32_C( 418227269), INT32_C(-1388492980), INT32_C( 480660510), INT32_C( 1802844866), INT32_C( -429993967), INT32_C( 538553865), INT32_C( 2013392956), INT32_C( 197176151), INT32_C( 2006567868), INT32_C( 1705115765), INT32_C( 1202543157), INT32_C(-1263572444), INT32_C( 1425580745), INT32_C( 1097283836)), simde_mm512_set_epi32(INT32_C( 344195585), INT32_C( 610541653), INT32_C( 0), INT32_C(-1473493952), INT32_C( 0), INT32_C( 1631859200), INT32_C( 0), INT32_C( 1605632), INT32_C( 805371916), INT32_C( 134218306), INT32_C( 1879130532), INT32_C( 80), INT32_C( 69554177), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { UINT16_C(25126), simde_mm512_set_epi32(INT32_C( 633431361), INT32_C( 1663592688), INT32_C( 1399097521), INT32_C( -248917369), INT32_C(-1131808104), INT32_C( 737246109), INT32_C( -548380687), INT32_C(-1607587862), INT32_C( 223712677), INT32_C( -234850179), INT32_C( 1225779292), INT32_C(-1983080521), INT32_C( 1083031306), INT32_C( 479812120), INT32_C( 1659393180), INT32_C( 1062780085)), simde_mm512_set_epi32(INT32_C( 112954855), INT32_C( 1790377254), INT32_C( 1893295646), INT32_C( -674583179), INT32_C( 15401677), INT32_C( -641918434), INT32_C( -635981818), INT32_C( -342921360), INT32_C( -977229164), INT32_C( -339160274), INT32_C(-1866080556), INT32_C(-1369988401), INT32_C( 1985260264), INT32_C( 1810318993), INT32_C( -324233777), INT32_C(-1229418212)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 144086278), INT32_C( 546924046), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 499718), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C(-1866459520), INT32_C( 0), INT32_C( 0), INT32_C( 1667630209), INT32_C(-1945861821), INT32_C( 0)) }, { UINT16_C(40095), simde_mm512_set_epi32(INT32_C( -925173403), INT32_C(-1800856604), INT32_C(-1569556909), INT32_C(-2075468293), INT32_C(-1622295859), INT32_C( 800838040), INT32_C(-1261125664), INT32_C( 225560714), INT32_C( -249168174), INT32_C( -785821894), INT32_C(-1322298905), INT32_C( 1919393940), INT32_C( 722048893), INT32_C( 667050909), INT32_C( -741637209), INT32_C(-1063733140)), simde_mm512_set_epi32(INT32_C( 283702321), INT32_C( 1760938946), INT32_C( 1862161708), INT32_C( 218779454), INT32_C( 1870003832), INT32_C( -776472743), INT32_C( 235320856), INT32_C(-1927493256), INT32_C( 2120699773), INT32_C( 1743164034), INT32_C( 92504126), INT32_C( -822461737), INT32_C( 80989491), INT32_C( -825823244), INT32_C( 23436927), INT32_C(-1677273698)), simde_mm512_set_epi32(INT32_C( 270533648), INT32_C( 0), INT32_C( 0), INT32_C( 150995460), INT32_C( 1613775920), INT32_C( -805034431), INT32_C( 0), INT32_C( 0), INT32_C( 239206701), INT32_C( 0), INT32_C( 0), INT32_C(-1936188861), INT32_C( 80889858), INT32_C( -939226016), INT32_C( 2366552), INT32_C( 470173074)) }, { UINT16_C(25708), simde_mm512_set_epi32(INT32_C( -419506034), INT32_C(-1634084803), INT32_C(-1791352038), INT32_C( 1397909248), INT32_C( -128853850), INT32_C(-1917410935), INT32_C( 1700830870), INT32_C( 1339604709), INT32_C(-1798365850), INT32_C( -59209020), INT32_C( 731125713), INT32_C( 630650100), INT32_C(-1338681832), INT32_C( 44002851), INT32_C( -812125291), INT32_C( 1028997312)), simde_mm512_set_epi32(INT32_C( -213890367), INT32_C( 2021869397), INT32_C( 1653460709), INT32_C(-1583015005), INT32_C(-2111228672), INT32_C( 278487831), INT32_C(-1988085048), INT32_C(-1603254022), INT32_C( 1778423041), INT32_C( 1070290908), INT32_C( 1862134929), INT32_C( 1387107310), INT32_C(-1741926346), INT32_C( 476437588), INT32_C( -64629687), INT32_C( 821283219)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 1610745664), INT32_C( 1652935909), INT32_C( 0), INT32_C( 0), INT32_C( 269042198), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 58938648), INT32_C( 1147791360), INT32_C( 0), INT32_C( 134742054), INT32_C( 476090452), INT32_C( 0), INT32_C( 0)) }, { UINT16_C(26454), simde_mm512_set_epi32(INT32_C(-1766483567), INT32_C( -97069133), INT32_C( -984184350), INT32_C( -103594411), INT32_C( 1542851117), INT32_C( 476137043), INT32_C( -197399951), INT32_C(-1770261666), INT32_C( -47794230), INT32_C( -491438206), INT32_C( -344435807), INT32_C( 255371302), INT32_C( -725452804), INT32_C( 159027945), INT32_C(-1412516432), INT32_C( -472096495)), simde_mm512_set_epi32(INT32_C( 939183992), INT32_C( 45898803), INT32_C( -707307552), INT32_C( -411975944), INT32_C( 630779143), INT32_C( 1898376282), INT32_C( 2124829976), INT32_C( -114883081), INT32_C( 2093795280), INT32_C(-1982561427), INT32_C( 598306044), INT32_C( 1635474930), INT32_C(-1398853653), INT32_C( 1652658661), INT32_C(-1858170883), INT32_C( 995216280)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 8914944), INT32_C( 276911616), INT32_C( 0), INT32_C( 0), INT32_C( 1627826184), INT32_C( 176428296), INT32_C( 1761871009), INT32_C( 0), INT32_C( 155222125), INT32_C( 0), INT32_C( 1615025616), INT32_C( 0), INT32_C( 1652621572), INT32_C( 271583821), INT32_C( 0)) }, { UINT16_C(31670), simde_mm512_set_epi32(INT32_C(-1612308895), INT32_C( -722700317), INT32_C( 1003499766), INT32_C( 814072246), INT32_C( 2008726943), INT32_C( 1223905210), INT32_C( -618135276), INT32_C(-2049729375), INT32_C( 595839117), INT32_C( -226508565), INT32_C( 1598449683), INT32_C( -514630984), INT32_C( 658541354), INT32_C( 567151600), INT32_C( -71044409), INT32_C(-1688131700)), simde_mm512_set_epi32(INT32_C( 1317588071), INT32_C(-1153324271), INT32_C( 2046542506), INT32_C( 623240678), INT32_C( -39480028), INT32_C( -33815034), INT32_C( 2056788636), INT32_C( 2095887515), INT32_C( -281654456), INT32_C(-1621887341), INT32_C( 1362159003), INT32_C( 1103094461), INT32_C( 1716020502), INT32_C( 102069928), INT32_C( 474901863), INT32_C( 367619581)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 721520656), INT32_C( 1076609544), INT32_C( 85992512), INT32_C(-2012938208), INT32_C( 0), INT32_C( 546314888), INT32_C( 2016157722), INT32_C( -869006528), INT32_C( 0), INT32_C( 3179912), INT32_C( 11313157), INT32_C( 0), INT32_C( 101807112), INT32_C( 67898656), INT32_C( 0)) }, { UINT16_C(49857), simde_mm512_set_epi32(INT32_C(-2067220018), INT32_C( 1805947847), INT32_C( 2110487322), INT32_C( 1074104919), INT32_C(-1112398120), INT32_C( 225474260), INT32_C( -545045472), INT32_C( -824857753), INT32_C( -338758362), INT32_C(-1789466141), INT32_C( 1713747474), INT32_C( 808725130), INT32_C( 1298412949), INT32_C( 260904797), INT32_C( 457183382), INT32_C( 2009286767)), simde_mm512_set_epi32(INT32_C(-2042099265), INT32_C( 1225391956), INT32_C( -841393362), INT32_C( -744679138), INT32_C( -30361081), INT32_C( 1490708305), INT32_C( 1603942577), INT32_C(-1226711411), INT32_C( -720257963), INT32_C( 876066124), INT32_C( 1546499669), INT32_C( 1636147146), INT32_C( 3608382), INT32_C( -404260643), INT32_C( 1874947312), INT32_C(-2040485747)), simde_mm512_set_epi32(INT32_C( 33554481), INT32_C( 617488), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1587345), INT32_C( 0), INT32_C( 336660561), INT32_C( 539038732), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C(-2145345408)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_castps_si512(simde_mm512_maskz_andnot_ps(test_vec[i].k, simde_mm512_castsi512_ps(test_vec[i].a), simde_mm512_castsi512_ps(test_vec[i].b))); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_andnot_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask8 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C( -343739447634695407), INT64_C( 6094193684923690615), INT64_C(-7040634603669948000), INT64_C( 8872386007247991164), INT64_C(-8252638392294099885), INT64_C( 6601029892750146432), INT64_C( 7279352193089347864), INT64_C( 8662714795122682384)), UINT8_C( 55), simde_mm512_set_epi64(INT64_C( 599279934641474098), INT64_C( 7845412443385721442), INT64_C(-2777121475968104316), INT64_C( 7314283605033033979), INT64_C(-8886012248836695508), INT64_C( 3313585428802692877), INT64_C(-1598467827595787694), INT64_C(-5223130400950906727)), simde_mm512_set_epi64(INT64_C( 4132377007718714638), INT64_C( 6722749807664954665), INT64_C(-6834862606673078980), INT64_C(-8159527519597393212), INT64_C( 1169770534052573165), INT64_C(-8496887971116687127), INT64_C( 4795256091623648748), INT64_C( -534912108587925882)), simde_mm512_set_epi64(INT64_C( -343739447634695407), INT64_C( 6094193684923690615), INT64_C( 2305860618582136120), INT64_C(-8484217854339937788), INT64_C(-8252638392294099885), INT64_C(-9079040208459267872), INT64_C( 147532470349213100), INT64_C( 5192659463408496134)) }, { simde_mm512_set_epi64(INT64_C( 1137601381159569274), INT64_C( 3083515373590209262), INT64_C( 7172644931946125494), INT64_C( 7709434742472783251), INT64_C(-5570954806909339658), INT64_C( -271406020759376737), INT64_C( 4799674771715911578), INT64_C(-1218830816677094379)), UINT8_C( 50), simde_mm512_set_epi64(INT64_C(-2731162171219972563), INT64_C( 4361435470291369786), INT64_C( 4372980053959095777), INT64_C(-4964365409406827474), INT64_C(-4887932848327267276), INT64_C( 5394909549222414797), INT64_C( 8601793944421926823), INT64_C( 1320541430862898557)), simde_mm512_set_epi64(INT64_C( 3987127999885683210), INT64_C( 232842063033182789), INT64_C(-8565159867474411189), INT64_C( 6112914526494565862), INT64_C( 3462295888398647957), INT64_C( 2362909626677485241), INT64_C( 1401365959932181466), INT64_C( 2959935140000245037)), simde_mm512_set_epi64(INT64_C( 1137601381159569274), INT64_C( 3083515373590209262), INT64_C(-9151314120547500022), INT64_C( 4955199880194171328), INT64_C(-5570954806909339658), INT64_C( -271406020759376737), INT64_C( 9008205654194264), INT64_C(-1218830816677094379)) }, { simde_mm512_set_epi64(INT64_C( 4893068556614144973), INT64_C( 8066183844976877919), INT64_C( 8546857359160133238), INT64_C(-8267045803572214233), INT64_C( 8915887943252268838), INT64_C(-4953676046754636494), INT64_C( 1510704893512358974), INT64_C( 9024635443342747538)), UINT8_C(216), simde_mm512_set_epi64(INT64_C(-8634103598278842542), INT64_C(-6155398791521040805), INT64_C( 7775580441978642644), INT64_C(-5899929856226471257), INT64_C( 7028189811487947825), INT64_C(-6189665615261290781), INT64_C( 907536080618458470), INT64_C( 3906704638875451620)), simde_mm512_set_epi64(INT64_C(-4094490793791238990), INT64_C(-2091977621380611033), INT64_C( 2043918654743067438), INT64_C(-4515408626818342672), INT64_C( 1196379185714011362), INT64_C( 1607300510948935937), INT64_C( 1394814499359692419), INT64_C(-5198396047694847294)), simde_mm512_set_epi64(INT64_C( 5116214864772401312), INT64_C( 4639917106950488100), INT64_C( 8546857359160133238), INT64_C( 4701761038926749776), INT64_C( 1158058982756417730), INT64_C(-4953676046754636494), INT64_C( 1510704893512358974), INT64_C( 9024635443342747538)) }, { simde_mm512_set_epi64(INT64_C( -233515152413640809), INT64_C(-7711023580854835359), INT64_C( 685057037117132470), INT64_C(-1053400672876430250), INT64_C(-6008870355673260365), INT64_C( 6732010747677860150), INT64_C( 7912723632945414242), INT64_C( 6629652157771519554)), UINT8_C( 7), simde_mm512_set_epi64(INT64_C( 418428539766329360), INT64_C( 1870466273027415797), INT64_C( 7044646027925455043), INT64_C(-7541966937157619960), INT64_C(-4455685474515493219), INT64_C(-3587901153898980536), INT64_C( 5978767859636931605), INT64_C( 1520054098233920669)), simde_mm512_set_epi64(INT64_C( 3839280895408034825), INT64_C(-8206971788365754506), INT64_C(-4439851259277562681), INT64_C(-6789849238744039634), INT64_C(-1659448540825770878), INT64_C( 2745935889893417490), INT64_C( 3715019098340555278), INT64_C(-7036562755259908130)), simde_mm512_set_epi64(INT64_C( -233515152413640809), INT64_C(-7711023580854835359), INT64_C( 685057037117132470), INT64_C(-1053400672876430250), INT64_C(-6008870355673260365), INT64_C( 2308798513734279186), INT64_C( 2379631298345005578), INT64_C(-8484488882121788606)) }, { simde_mm512_set_epi64(INT64_C(-2379770324367148032), INT64_C( 269951545548960285), INT64_C(-5915450755405613469), INT64_C( 4377769456724035257), INT64_C( 4963028952577306253), INT64_C( 5031417887689077714), INT64_C( 5062535597864084892), INT64_C(-8442033713738522560)), UINT8_C(129), simde_mm512_set_epi64(INT64_C(-6217210315706132893), INT64_C(-5326659911006667991), INT64_C( 1028086835571864351), INT64_C(-9190513903150593462), INT64_C(-5132407930629667991), INT64_C( 3081908066365846241), INT64_C( 1991874275422300444), INT64_C(-8267800556778760378)), simde_mm512_set_epi64(INT64_C(-7195920316169423191), INT64_C( 8855103613986981069), INT64_C(-1079557804828513091), INT64_C(-7716984285220335090), INT64_C( 760039564915644558), INT64_C( 5629267284662877438), INT64_C(-8887844833591355405), INT64_C( 3227154889713027186)), simde_mm512_set_epi64(INT64_C( 1441983734462308488), INT64_C( 269951545548960285), INT64_C(-5915450755405613469), INT64_C( 4377769456724035257), INT64_C( 4963028952577306253), INT64_C( 5031417887689077714), INT64_C( 5062535597864084892), INT64_C( 2344440558065420336)) }, { simde_mm512_set_epi64(INT64_C(-1632349344831082760), INT64_C(-7746252227037734078), INT64_C( 8307071850644138234), INT64_C(-8586546786041619015), INT64_C( 404139822791089559), INT64_C(-1877631053848650154), INT64_C( 7455727023947545561), INT64_C( 9065509561364139853)), UINT8_C(251), simde_mm512_set_epi64(INT64_C( 6876828378130175291), INT64_C( 4443252594681514716), INT64_C(-6385840203869031352), INT64_C( 6938523062457490065), INT64_C( -791901096126868688), INT64_C( 5787489911096576116), INT64_C(-7854643813663956328), INT64_C( 5967336075130617342)), simde_mm512_set_epi64(INT64_C( 4146719804671055125), INT64_C( 2252037785239205430), INT64_C( 8454374735321895014), INT64_C(-1381892347656312574), INT64_C( 706165223560180728), INT64_C( -219143018686364756), INT64_C(-1570739878098539061), INT64_C(-1561542974628641964)), simde_mm512_set_epi64(INT64_C( 2341899294765252612), INT64_C( 162200290615931938), INT64_C( 5769965461718695974), INT64_C(-8318139676533907198), INT64_C( 633958070103335112), INT64_C(-1877631053848650154), INT64_C( 7494284887317302595), INT64_C(-6339871525209882112)) }, { simde_mm512_set_epi64(INT64_C( 9026638934924851598), INT64_C( 230236376028734533), INT64_C( 7791847925691209473), INT64_C( 5636683834883992106), INT64_C( 4666417032316259140), INT64_C(-9020764089960395704), INT64_C( 8213766780006614493), INT64_C(-6694788910086219877)), UINT8_C( 70), simde_mm512_set_epi64(INT64_C(-6498066308492480472), INT64_C( 5728364479291594350), INT64_C(-5884149762497402782), INT64_C( 6387650260207408060), INT64_C(-5128486331429717841), INT64_C( -868619985199698421), INT64_C(-4214853307896141180), INT64_C( -465765039913276151)), simde_mm512_set_epi64(INT64_C(-5046884246860802318), INT64_C( 1004972136752522438), INT64_C(-2378507232856704687), INT64_C( 5436650347587017589), INT64_C(-2667790265994842517), INT64_C(-2085203105823883971), INT64_C(-5490659216814537620), INT64_C(-8036188446954416194)), simde_mm512_set_epi64(INT64_C( 9026638934924851598), INT64_C( 36100064221372544), INT64_C( 7791847925691209473), INT64_C( 5636683834883992106), INT64_C( 4666417032316259140), INT64_C( 3892555578216756), INT64_C( 3624274549258028136), INT64_C(-6694788910086219877)) }, { simde_mm512_set_epi64(INT64_C(-5230007765170990668), INT64_C( 846507549899810342), INT64_C(-7111962349683310649), INT64_C( -772191960312616388), INT64_C( 3123285095915363891), INT64_C( 1623466873559833442), INT64_C( -366019171342533610), INT64_C(-2494634274663155684)), UINT8_C( 35), simde_mm512_set_epi64(INT64_C( -975714944549474590), INT64_C( 1049564164032844619), INT64_C( 7303689756219555946), INT64_C(-6372981973137131801), INT64_C(-5489514128660043293), INT64_C(-7367882453491102610), INT64_C( 6699088752588717529), INT64_C( 1411143637466671223)), simde_mm512_set_epi64(INT64_C(-2226860933844685600), INT64_C( 1920850149099678208), INT64_C( 1690361552489070319), INT64_C( 6660992074283035646), INT64_C(-5836455416301421815), INT64_C(-2339252903384749197), INT64_C(-3541767763730411989), INT64_C( 1379523068058767230)), simde_mm512_set_epi64(INT64_C(-5230007765170990668), INT64_C( 846507549899810342), INT64_C( 1307192606745607813), INT64_C( -772191960312616388), INT64_C( 3123285095915363891), INT64_C( 1623466873559833442), INT64_C(-9076991572345486814), INT64_C( 9020679430670088)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_castpd_si512(simde_mm512_mask_andnot_pd(simde_mm512_castsi512_pd(test_vec[i].src), test_vec[i].k, simde_mm512_castsi512_pd(test_vec[i].a), simde_mm512_castsi512_pd(test_vec[i].b))); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_andnot_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { UINT8_C(227), simde_mm512_set_epi64(INT64_C(-4590720219282553470), INT64_C( 7052994564826635717), INT64_C( 102182550423351600), INT64_C( 6550609573293042333), INT64_C(-6537325874213497913), INT64_C( 8955563540957921573), INT64_C( 8228815951810735558), INT64_C(-3823364876013971085)), simde_mm512_set_epi64(INT64_C( -740720849127296556), INT64_C( -933890699409471481), INT64_C( 5755588500836856312), INT64_C(-7609758858126984395), INT64_C( 5441557991346977587), INT64_C( -960797962792509213), INT64_C( 199203171802884405), INT64_C( 1812346297232380541)), simde_mm512_set_epi64(INT64_C( 3868594310811889748), INT64_C(-7923482887668088830), INT64_C( 5662415626724180168), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 54349100887310385), INT64_C( 1226687842802237964)) }, { UINT8_C(150), simde_mm512_set_epi64(INT64_C( 8029427937579490996), INT64_C(-1016228199940301895), INT64_C( 3581869483076202853), INT64_C( 4960784598491720813), INT64_C(-7670184712449022296), INT64_C( 1368687340866524346), INT64_C( 36158962521961508), INT64_C( 1367446093605161437)), simde_mm512_set_epi64(INT64_C( 210141022168102607), INT64_C(-5660044126052691316), INT64_C( 8952190750537587177), INT64_C(-7520755716476597588), INT64_C( 5025036600597137846), INT64_C( 1371349703128320142), INT64_C( 1157825117202956749), INT64_C( 3947754344252009580)), simde_mm512_set_epi64(INT64_C( 36187148471246923), INT64_C( 0), INT64_C( 0), INT64_C(-7845058891591449984), INT64_C( 0), INT64_C( 72059827504875524), INT64_C( 1157717081520079305), INT64_C( 0)) }, { UINT8_C(206), simde_mm512_set_epi64(INT64_C(-5447319738796629324), INT64_C( 7573553786407309883), INT64_C( 3210166478679154113), INT64_C( -632818268169935629), INT64_C( 2091039522714659767), INT64_C(-7890721085940980150), INT64_C(-4051485337429119412), INT64_C(-3044005681324007212)), simde_mm512_set_epi64(INT64_C(-3107571465629414339), INT64_C(-5609659848016607327), INT64_C( 3170884903864138535), INT64_C( 3780264979688453657), INT64_C(-3200960942660399317), INT64_C( 5382084213528122877), INT64_C(-4409193503472949179), INT64_C( 4723837911396640821)), simde_mm512_set_epi64(INT64_C( 4654611160413260809), INT64_C(-7916109799461190272), INT64_C( 0), INT64_C( 0), INT64_C(-4426155929590676472), INT64_C( 5224458146534150581), INT64_C( 2608396285542401), INT64_C( 0)) }, { UINT8_C(125), simde_mm512_set_epi64(INT64_C( 4207278183660861960), INT64_C(-8995945069443043606), INT64_C( 8554253801191868756), INT64_C( 3354059043086044373), INT64_C( 1657475957423553689), INT64_C(-2556137084454595182), INT64_C( 2422681642730518465), INT64_C(-8655840866694392843)), simde_mm512_set_epi64(INT64_C(-6233706215614972452), INT64_C( 2778576059313358974), INT64_C( 521154595483651590), INT64_C(-2197561166428241391), INT64_C( 751433836641726755), INT64_C( 5984411989878292578), INT64_C(-2128282437357703049), INT64_C( 6129378286910417126)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 2632358518757523476), INT64_C( 74594534185240066), INT64_C(-4539433801636773376), INT64_C( 607176734235951394), INT64_C( 218459912932966496), INT64_C( 0), INT64_C( 5769015465569462274)) }, { UINT8_C( 71), simde_mm512_set_epi64(INT64_C(-8436437744293223076), INT64_C( -780741249760151942), INT64_C( 4822350614887775462), INT64_C( 2188408541520193917), INT64_C(-3082935350304813722), INT64_C(-5875221946234265673), INT64_C(-5758090656392293952), INT64_C(-3302974504787286903)), simde_mm512_set_epi64(INT64_C(-7235195547697304884), INT64_C(-2099342694411362386), INT64_C( 6587794971423114743), INT64_C(-8750716550526717441), INT64_C(-6164466580259336301), INT64_C(-5605431759460432480), INT64_C( 8610981953023941155), INT64_C(-5677351707943910012)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 204210100791970180), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 1153063355977236480), INT64_C( 5152215214393002531), INT64_C( 2383675803176042756)) }, { UINT8_C( 16), simde_mm512_set_epi64(INT64_C(-7454619922298182462), INT64_C(-4477515570225004692), INT64_C( 3259262052820328758), INT64_C(-2323942451066306663), INT64_C(-7533087570752357418), INT64_C(-2748624972946479401), INT64_C(-7594508336042449203), INT64_C(-2829162199669149138)), simde_mm512_set_epi64(INT64_C( 231920182013128330), INT64_C( 2342360813276731434), INT64_C(-3887471131024015317), INT64_C(-6063668553337722025), INT64_C( 6394528685493045899), INT64_C( 7433558736916574563), INT64_C(-8597186079760918784), INT64_C( 9218943275377121788)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 2323862976450265158), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, { UINT8_C( 0), simde_mm512_set_epi64(INT64_C(-1874339372436527846), INT64_C(-4874669033093832828), INT64_C(-5258762659707925604), INT64_C( 1933045326528420333), INT64_C( 8704229925049171123), INT64_C(-4249956245353677661), INT64_C( 3155017878537816163), INT64_C( 8377752223970655488)), simde_mm512_set_epi64(INT64_C( 6639157720065498333), INT64_C( 7954402008564552716), INT64_C( 220412799958481097), INT64_C( 3341210828844349470), INT64_C( 930495958757986079), INT64_C(-5593607526362331219), INT64_C( 1220298896193992740), INT64_C( 1285034736351616528)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, { UINT8_C( 29), simde_mm512_set_epi64(INT64_C( 6066305475956667844), INT64_C(-4992917222673652861), INT64_C(-8395584014417236584), INT64_C( -142719058224734896), INT64_C( 509377192188320240), INT64_C( 4417811606371822828), INT64_C( 5101966917722654224), INT64_C(-9124380135803090931)), simde_mm512_set_epi64(INT64_C( 7887221249293377488), INT64_C( -182605916723991232), INT64_C( 366071292133853300), INT64_C(-7235772882062384424), INT64_C( 2002854046423029286), INT64_C(-3793561946903283248), INT64_C(-8278200760223787155), INT64_C(-6807146722179486859)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 112882606806164104), INT64_C( 1786257950099998214), INT64_C(-4462918246344945392), INT64_C( 0), INT64_C( 2341880616049730416)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_castpd_si512(simde_mm512_maskz_andnot_pd(test_vec[i].k, simde_mm512_castsi512_pd(test_vec[i].a), simde_mm512_castsi512_pd(test_vec[i].b))); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_andnot_si512(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C( -335330897), INT32_C( 1860840666), INT32_C( -837102383), INT32_C( 1544121603), INT32_C( -31451516), INT32_C( 294501250), INT32_C( 1844141610), INT32_C( 711066163), INT32_C( 1032767823), INT32_C( 466876164), INT32_C( 1432923079), INT32_C( -137339965), INT32_C(-1216294439), INT32_C( 1110388055), INT32_C( 754234048), INT32_C( -712839683)), simde_mm512_set_epi32(INT32_C( 645969883), INT32_C( 45407696), INT32_C(-1431929981), INT32_C(-1744968675), INT32_C( 1491740298), INT32_C( -669732847), INT32_C(-1678703719), INT32_C(-1110558488), INT32_C(-1993251114), INT32_C( 1910816421), INT32_C( 2013403088), INT32_C( 882585036), INT32_C( 1733706468), INT32_C( 1763057771), INT32_C( 303070795), INT32_C( -805966849)), simde_mm512_set_epi32(INT32_C( 41989712), INT32_C( 1364224), INT32_C( 547629826), INT32_C(-2081128420), INT32_C( 13246474), INT32_C( -938475503), INT32_C(-1844444783), INT32_C(-1785972536), INT32_C(-2144262512), INT32_C( 1612974241), INT32_C( 671224848), INT32_C( 730124), INT32_C( 1079378468), INT32_C( 688914472), INT32_C( 302009355), INT32_C( 175440386)) }, { simde_mm512_set_epi32(INT32_C( 1267266514), INT32_C(-1810114077), INT32_C(-1631053656), INT32_C(-1291259659), INT32_C(-1797405973), INT32_C(-2052491824), INT32_C( 218690610), INT32_C( 434694077), INT32_C( 322569513), INT32_C( -492306370), INT32_C( 1714124310), INT32_C( 757183592), INT32_C( 1904845371), INT32_C( 1921390915), INT32_C( 1219016836), INT32_C( -491589854)), simde_mm512_set_epi32(INT32_C( -843887215), INT32_C(-1144045392), INT32_C( 1523671305), INT32_C( -687015924), INT32_C( -651771268), INT32_C(-1812069901), INT32_C( 132880464), INT32_C( 1912329512), INT32_C( -208209918), INT32_C(-1079631083), INT32_C( -134611197), INT32_C(-1062410635), INT32_C( -896925558), INT32_C( -559765979), INT32_C( 1912148196), INT32_C( -437846049)), simde_mm512_set_epi32(INT32_C(-2077029375), INT32_C( 734265360), INT32_C( 1074880769), INT32_C( 1141114888), INT32_C( 1226965012), INT32_C( 307626019), INT32_C( 48433216), INT32_C( 1611862016), INT32_C( -528156670), INT32_C( 486941441), INT32_C(-1848604415), INT32_C(-1064549867), INT32_C(-1979580288), INT32_C(-1944017372), INT32_C( 827392096), INT32_C( 88346845)) }, { simde_mm512_set_epi32(INT32_C( 451034606), INT32_C( 160382101), INT32_C(-1268862602), INT32_C( 782115678), INT32_C(-1160318793), INT32_C( -575355195), INT32_C( 1432838242), INT32_C(-2114154695), INT32_C(-1020410376), INT32_C( -714076046), INT32_C(-1407849113), INT32_C( 996241684), INT32_C( 481606881), INT32_C(-1834956523), INT32_C( 493396975), INT32_C(-1084672800)), simde_mm512_set_epi32(INT32_C( 1458493934), INT32_C( 1051105030), INT32_C( -836083742), INT32_C( 1407748874), INT32_C(-1387312486), INT32_C( 776481471), INT32_C( 275093143), INT32_C( -137438390), INT32_C( 1860284960), INT32_C( 540502552), INT32_C( 1411461258), INT32_C( 1517918194), INT32_C( -266161178), INT32_C( 1269265702), INT32_C( 809771495), INT32_C(-1968711037)), simde_mm512_set_epi32(INT32_C( 1141686272), INT32_C( 908100354), INT32_C( 1243631232), INT32_C( 1365280768), INT32_C( 84478472), INT32_C( 575154234), INT32_C( 38549), INT32_C( 1979863106), INT32_C( 750793216), INT32_C( 537356808), INT32_C( 1344278664), INT32_C( 1075349218), INT32_C( -536739066), INT32_C( 1225208866), INT32_C( 537133056), INT32_C( 10930691)) }, { simde_mm512_set_epi32(INT32_C(-1562592645), INT32_C( -32255724), INT32_C( -923416118), INT32_C(-2134713284), INT32_C(-1313323965), INT32_C(-1729518909), INT32_C( 1286411285), INT32_C( -376910154), INT32_C(-1786193108), INT32_C(-2035089818), INT32_C( 1552020826), INT32_C( 726998554), INT32_C( 1864619074), INT32_C( 1828024315), INT32_C( -824341738), INT32_C(-1420030579)), simde_mm512_set_epi32(INT32_C( 1087836695), INT32_C(-2094233976), INT32_C( 1148487684), INT32_C(-1514127182), INT32_C( -524459384), INT32_C( 725104708), INT32_C( 1787286694), INT32_C(-1533684832), INT32_C( 46575098), INT32_C( 2086853653), INT32_C( 815292575), INT32_C(-1270435744), INT32_C( 2014177347), INT32_C( 1099600134), INT32_C( -622983952), INT32_C( 822011154)), simde_mm512_set_epi32(INT32_C( 1073938436), INT32_C( 19662472), INT32_C( 67108868), INT32_C( 620757122), INT32_C( 1074078344), INT32_C( 588257284), INT32_C( 570605730), INT32_C( 68489472), INT32_C( 38151378), INT32_C( 2017460241), INT32_C( 538443909), INT32_C(-1811767200), INT32_C( 269232129), INT32_C( 17469444), INT32_C( 268567776), INT32_C( 279109650)) }, { simde_mm512_set_epi32(INT32_C(-1657115762), INT32_C( 1585840022), INT32_C(-1070898703), INT32_C( 1022031619), INT32_C(-1380717315), INT32_C( 1086658406), INT32_C( -124039065), INT32_C(-1974944947), INT32_C( 2044249149), INT32_C( 1638783653), INT32_C( 1466240446), INT32_C(-1803146403), INT32_C( 1060682707), INT32_C(-1592428518), INT32_C( 156586666), INT32_C( -266957088)), simde_mm512_set_epi32(INT32_C( -703454581), INT32_C( 797686885), INT32_C( 1723425278), INT32_C( -158454369), INT32_C(-1043830066), INT32_C( 709622512), INT32_C(-2136296570), INT32_C( -863350926), INT32_C( 1844461284), INT32_C( -21472306), INT32_C(-1932483198), INT32_C(-1320584016), INT32_C( -370591173), INT32_C( -330170023), INT32_C( -975385097), INT32_C( -654562432)), simde_mm512_set_epi32(INT32_C( 1107296257), INT32_C( 554303585), INT32_C( 646971406), INT32_C(-1039923044), INT32_C( 1078460930), INT32_C( 705357968), INT32_C( 2139008), INT32_C( 1149387826), INT32_C( 69221056), INT32_C(-1643118262), INT32_C(-2003787776), INT32_C( 558453920), INT32_C(-1061093336), INT32_C( 1279394113), INT32_C( -997683883), INT32_C( 149430528)) }, { simde_mm512_set_epi32(INT32_C( 962558787), INT32_C(-1212292378), INT32_C(-1698562444), INT32_C(-1456708578), INT32_C( 1605522258), INT32_C(-1389853810), INT32_C( 605095260), INT32_C( 449573803), INT32_C(-1932095036), INT32_C( 1214045264), INT32_C(-1966228541), INT32_C( 484352026), INT32_C(-1251622562), INT32_C( 97048183), INT32_C( 1801957969), INT32_C( 39148591)), simde_mm512_set_epi32(INT32_C( 1144673524), INT32_C(-1837539909), INT32_C(-1995926176), INT32_C( -775830454), INT32_C( 1197039500), INT32_C( 605086417), INT32_C(-1681915928), INT32_C(-1694227594), INT32_C( 250277648), INT32_C( 1517650405), INT32_C( -529860796), INT32_C( 319331129), INT32_C( 1337610221), INT32_C( -515158609), INT32_C(-1958759875), INT32_C( 480005412)), simde_mm512_set_epi32(INT32_C( 1142949044), INT32_C( 4201753), INT32_C( 17301760), INT32_C( 1354858560), INT32_C( 4787340), INT32_C( 1073233), INT32_C(-1683031392), INT32_C(-2130444204), INT32_C( 36204048), INT32_C( 304152997), INT32_C( 1612858372), INT32_C( 50338593), INT32_C( 1251610273), INT32_C( -536671864), INT32_C(-2145910740), INT32_C( 478675200)) }, { simde_mm512_set_epi32(INT32_C( 477799556), INT32_C( 718106947), INT32_C( -702434720), INT32_C( 911156446), INT32_C( 692922531), INT32_C( -634559193), INT32_C( -541024501), INT32_C( 6957260), INT32_C( 891904501), INT32_C( 1674261328), INT32_C( 463285837), INT32_C( 465636281), INT32_C( -567453998), INT32_C( -675807734), INT32_C( 1242869264), INT32_C(-2003535835)), simde_mm512_set_epi32(INT32_C( -440269466), INT32_C( 1069561863), INT32_C( -850138274), INT32_C( 1324108467), INT32_C( 996083706), INT32_C(-1741332408), INT32_C(-1720688024), INT32_C( -195389802), INT32_C( -122163269), INT32_C(-1678986062), INT32_C( -261742027), INT32_C( 147621305), INT32_C( 1928957095), INT32_C( 647911914), INT32_C(-1231783784), INT32_C(-1597793099)), simde_mm512_set_epi32(INT32_C( -511704734), INT32_C( 352323588), INT32_C( 156387614), INT32_C( 1218464289), INT32_C( 303171416), INT32_C( 1048648), INT32_C( 3170400), INT32_C( -200239598), INT32_C( -929657334), INT32_C(-1742437214), INT32_C( -530448336), INT32_C( 820224), INT32_C( 550537253), INT32_C( 537395680), INT32_C(-1266659192), INT32_C( 541295760)) }, { simde_mm512_set_epi32(INT32_C(-1322452749), INT32_C(-1191485380), INT32_C( 61071601), INT32_C( -255981709), INT32_C( 1745472557), INT32_C( 1521357726), INT32_C(-1111842070), INT32_C( 1783291089), INT32_C( 718609371), INT32_C( -553071779), INT32_C(-1373014967), INT32_C( 751334079), INT32_C( -828271800), INT32_C(-1578484948), INT32_C(-1597074675), INT32_C( 393018558)), simde_mm512_set_epi32(INT32_C(-1722624236), INT32_C( -955857282), INT32_C( 1790216473), INT32_C( -762838785), INT32_C( -108799681), INT32_C( -975838651), INT32_C( 1961237228), INT32_C( 52752901), INT32_C(-1440122977), INT32_C(-1167835972), INT32_C( 1345250484), INT32_C( 2101674065), INT32_C( -149671798), INT32_C( 738167968), INT32_C( -764040824), INT32_C( -514982245)), simde_mm512_set_epi32(INT32_C( 139593476), INT32_C( 1191478850), INT32_C( 1746143496), INT32_C( 33554572), INT32_C(-1853746926), INT32_C(-2058231743), INT32_C( 1078204420), INT32_C( 19145220), INT32_C(-2144836604), INT32_C( 543437984), INT32_C( 1342603444), INT32_C( 1359282240), INT32_C( 823402626), INT32_C( 169182336), INT32_C( 1378953344), INT32_C( -536804863)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_andnot_si512(test_vec[i].a, test_vec[i].b); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_andnot_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C( 1786505147), INT32_C( 366806262), INT32_C(-1595474360), INT32_C( -741125130), INT32_C( 623580589), INT32_C( 1819639708), INT32_C(-1998267151), INT32_C( 54696203), INT32_C( 1230356730), INT32_C( -528215990), INT32_C(-1085976265), INT32_C( -88891472), INT32_C( 263402075), INT32_C( 2072408294), INT32_C( 1041613232), INT32_C(-1299496488)), simde_mm512_set_epi32(INT32_C( 634085978), INT32_C( 1880231468), INT32_C( 1457044755), INT32_C( -852481645), INT32_C( 1274177455), INT32_C( -223885439), INT32_C( 582318482), INT32_C( 1515067636), INT32_C(-1348943060), INT32_C( 88850487), INT32_C(-1067534176), INT32_C( 1770437803), INT32_C(-1608407464), INT32_C(-1335624696), INT32_C( 1637100454), INT32_C(-1783030263)), simde_mm512_set_epi32(INT32_C( 92274752), INT32_C( 1610746376), INT32_C( 1444413715), INT32_C( 203433985), INT32_C( 1255170562), INT32_C(-1836941311), INT32_C( 571547906), INT32_C( 1477181684), INT32_C(-1501035772), INT32_C( 88850485), INT32_C( 1075488896), INT32_C( 17047563), INT32_C(-1610578944), INT32_C(-2141090808), INT32_C( 1098918406), INT32_C( 87039489)) }, { simde_mm512_set_epi32(INT32_C( 923576423), INT32_C(-1078925154), INT32_C( -430557576), INT32_C(-1684773193), INT32_C(-1179101215), INT32_C(-1985153431), INT32_C( 584718967), INT32_C( -112765469), INT32_C( 1515864234), INT32_C(-1118210252), INT32_C( 931438007), INT32_C( -352031421), INT32_C( 1134370188), INT32_C( 1556623900), INT32_C( 57329867), INT32_C( 254759017)), simde_mm512_set_epi32(INT32_C( -300442460), INT32_C( -893266841), INT32_C(-1015236925), INT32_C( 262163323), INT32_C( 2095940386), INT32_C( 1009617335), INT32_C( 458760718), INT32_C(-1732442867), INT32_C( 1273945161), INT32_C( 946706622), INT32_C( 1469023509), INT32_C(-2064451876), INT32_C( -256697390), INT32_C(-1934774398), INT32_C( 433298181), INT32_C( -530351918)), simde_mm512_set_epi32(INT32_C( -938273664), INT32_C( 1078006881), INT32_C( 19431555), INT32_C( 69208392), INT32_C( 1145405442), INT32_C( 872483222), INT32_C( 419438600), INT32_C( 12058636), INT32_C( 27565121), INT32_C( 2394250), INT32_C( 1074462720), INT32_C( 83006108), INT32_C(-1339948462), INT32_C(-2145022590), INT32_C( 412160772), INT32_C( -532666222)) }, { simde_mm512_set_epi32(INT32_C( 835311518), INT32_C( 593132209), INT32_C(-1205845883), INT32_C(-2103435972), INT32_C( 331121937), INT32_C(-1122763027), INT32_C( -11044623), INT32_C( 1217358106), INT32_C( 899389553), INT32_C( 61750829), INT32_C(-1644418892), INT32_C( 1179256254), INT32_C( -236468269), INT32_C( -666751062), INT32_C( -733547571), INT32_C( 2125570021)), simde_mm512_set_epi32(INT32_C(-1443754597), INT32_C( 1972174992), INT32_C(-2074962423), INT32_C( -531291976), INT32_C( 1382830722), INT32_C( -282269267), INT32_C( 1453780297), INT32_C( 363272438), INT32_C( 1819778130), INT32_C(-1488646809), INT32_C( 1000774887), INT32_C( 2075973242), INT32_C( 251762527), INT32_C( 254090322), INT32_C( -106442053), INT32_C(-1147166459)), simde_mm512_set_epi32(INT32_C(-2009987071), INT32_C( 1418002432), INT32_C( 72521224), INT32_C( 1616191616), INT32_C( 1077957250), INT32_C( 1110180096), INT32_C( 10519816), INT32_C( 354423012), INT32_C( 1214514178), INT32_C(-1539243710), INT32_C( 570594371), INT32_C( 968151616), INT32_C( 234885132), INT32_C( 119869520), INT32_C( 698417202), INT32_C(-2129780736)) }, { simde_mm512_set_epi32(INT32_C( 1259282838), INT32_C( -167567006), INT32_C( 1470440257), INT32_C(-1702928569), INT32_C(-1493129242), INT32_C( -361616020), INT32_C( 1148861436), INT32_C(-2140586026), INT32_C(-1901343726), INT32_C( 1258604211), INT32_C( 1382183555), INT32_C( 464481172), INT32_C( 87817013), INT32_C( -25672201), INT32_C(-1647580547), INT32_C( -833959607)), simde_mm512_set_epi32(INT32_C( -711482206), INT32_C(-1110405208), INT32_C( -55795162), INT32_C(-1789106875), INT32_C(-1077987504), INT32_C( 2002242576), INT32_C( 879044440), INT32_C( 728498187), INT32_C( -580810324), INT32_C(-1054241155), INT32_C( 416673383), INT32_C( 1924176623), INT32_C( 1323235160), INT32_C( 659292758), INT32_C(-2101310960), INT32_C( 1303315999)), simde_mm512_set_epi32(INT32_C(-1802468320), INT32_C( 164662920), INT32_C(-1475837914), INT32_C( 83894272), INT32_C( 415174672), INT32_C( 352698384), INT32_C( 805635072), INT32_C( 721682441), INT32_C( 1363149228), INT32_C(-2144794548), INT32_C( 144018532), INT32_C( 1611698283), INT32_C( 1254360136), INT32_C( 17039872), INT32_C( 33554432), INT32_C( 27592214)) }, { simde_mm512_set_epi32(INT32_C( 1317706320), INT32_C( 1095937634), INT32_C(-2042379654), INT32_C( -425062813), INT32_C(-1422676870), INT32_C(-1972727484), INT32_C( 1448617643), INT32_C( 1446030445), INT32_C(-1203372071), INT32_C( 1257548767), INT32_C( 95515950), INT32_C( 288075556), INT32_C( -562902724), INT32_C( 1866018725), INT32_C( -140491543), INT32_C( -853598261)), simde_mm512_set_epi32(INT32_C(-1862602245), INT32_C( 1299263323), INT32_C(-1100697239), INT32_C(-1165132701), INT32_C(-1312528679), INT32_C(-2057483334), INT32_C(-2116201571), INT32_C(-1004874347), INT32_C( -792865239), INT32_C( 167838662), INT32_C(-1158285246), INT32_C( 788705850), INT32_C(-1470598876), INT32_C( -300747724), INT32_C( -732019428), INT32_C(-1060860437)), simde_mm512_set_epi32(INT32_C(-1871683157), INT32_C( 203489561), INT32_C( 941895937), INT32_C( 403009536), INT32_C( 281297537), INT32_C( 85281466), INT32_C(-2121969388), INT32_C(-2146807408), INT32_C( 1085800480), INT32_C( 65536), INT32_C(-1169913792), INT32_C( 771756058), INT32_C( 537407488), INT32_C(-2147299312), INT32_C( 6160660), INT32_C( 12615712)) }, { simde_mm512_set_epi32(INT32_C( 782435122), INT32_C( 1862046610), INT32_C( 2063073020), INT32_C(-2039040635), INT32_C( 1210624813), INT32_C( 1482889596), INT32_C(-1693737823), INT32_C( -742414353), INT32_C( 769657412), INT32_C(-1049696640), INT32_C( 237587070), INT32_C( 1546361918), INT32_C( -364413489), INT32_C(-1858108224), INT32_C(-1524047519), INT32_C( -892082969)), simde_mm512_set_epi32(INT32_C( 1276319466), INT32_C( -348382036), INT32_C( -54124638), INT32_C(-1613416797), INT32_C( -277896350), INT32_C(-1555914365), INT32_C( 1602672291), INT32_C( 612591504), INT32_C(-1670560036), INT32_C( 2118020891), INT32_C(-1204159467), INT32_C( 299945581), INT32_C( 1470077526), INT32_C(-1901456818), INT32_C( 1982811443), INT32_C( 366998615)), simde_mm512_set_epi32(INT32_C( 1074795720), INT32_C(-2130703316), INT32_C(-2080374526), INT32_C( 427885090), INT32_C(-1488519102), INT32_C(-1560239997), INT32_C( 1149518338), INT32_C( 603996176), INT32_C(-1878178664), INT32_C( 1041238299), INT32_C(-1341082623), INT32_C( 29377089), INT32_C( 362316304), INT32_C( 243274254), INT32_C( 1376193554), INT32_C( 353112080)) }, { simde_mm512_set_epi32(INT32_C( -664438730), INT32_C( 1158162569), INT32_C(-1048438639), INT32_C( 819552403), INT32_C( 486427093), INT32_C(-1267830843), INT32_C( 1178270581), INT32_C(-1348447676), INT32_C( -981472284), INT32_C( 1962298807), INT32_C( -393093452), INT32_C(-1754911100), INT32_C(-1506604227), INT32_C( -220324223), INT32_C( 856278899), INT32_C( 15706156)), simde_mm512_set_epi32(INT32_C( -689282393), INT32_C( -261985647), INT32_C(-1390325708), INT32_C(-1552766747), INT32_C(-1576064212), INT32_C( -185898645), INT32_C(-1798232738), INT32_C( -401409831), INT32_C( 1975803231), INT32_C( 1826250001), INT32_C(-1038398890), INT32_C( -306355124), INT32_C(-1154269982), INT32_C( -209110535), INT32_C(-2033491342), INT32_C( -971905248)), simde_mm512_set_epi32(INT32_C( 109707905), INT32_C(-1335737840), INT32_C( 740376612), INT32_C(-2094888860), INT32_C(-1576984024), INT32_C( 1082196010), INT32_C(-1866398710), INT32_C( 1074964633), INT32_C( 813700123), INT32_C( 134746112), INT32_C( 34209858), INT32_C( 1754873928), INT32_C( 419443906), INT32_C( 16851320), INT32_C(-2067652608), INT32_C( -972011776)) }, { simde_mm512_set_epi32(INT32_C(-1519344071), INT32_C( 1556822852), INT32_C(-1382496853), INT32_C( -624683333), INT32_C( 1477411394), INT32_C( -704833096), INT32_C(-1957423151), INT32_C( -471773069), INT32_C( 1263493389), INT32_C( 2117955521), INT32_C(-1143959230), INT32_C( -832581030), INT32_C(-1273834890), INT32_C( -392148704), INT32_C( 1764655366), INT32_C( -721713055)), simde_mm512_set_epi32(INT32_C(-1396008954), INT32_C( -651865449), INT32_C( 452267102), INT32_C( -741136221), INT32_C( 1539744858), INT32_C(-2014766256), INT32_C(-1095604449), INT32_C(-1527666044), INT32_C( -826073132), INT32_C( -8340331), INT32_C( 1447376741), INT32_C( 1608478316), INT32_C( 1253487795), INT32_C( 2056029052), INT32_C( -880457902), INT32_C( -691872315)), simde_mm512_set_epi32(INT32_C( 143267846), INT32_C(-2128330605), INT32_C( 308611156), INT32_C( 18032640), INT32_C( 62923800), INT32_C( 33555008), INT32_C( 883056654), INT32_C( 68201092), INT32_C(-2071978288), INT32_C(-2122280940), INT32_C( 1141188133), INT32_C( 293612580), INT32_C( 1252271233), INT32_C( 302809692), INT32_C(-2105457072), INT32_C( 33576324)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_andnot_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_andnot_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask16 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C(-1056724565), INT32_C( 1525326722), INT32_C( -860629095), INT32_C( 1674345138), INT32_C( -780517906), INT32_C(-1953060088), INT32_C(-1307294727), INT32_C(-1463687440), INT32_C( -675695615), INT32_C( 1308561010), INT32_C( 639253006), INT32_C( -651243687), INT32_C( -612977662), INT32_C( 549809264), INT32_C( 644920842), INT32_C( 1882016760)), UINT16_C(49595), simde_mm512_set_epi32(INT32_C( 1189938329), INT32_C( 1797835672), INT32_C( 297527233), INT32_C( 1889709809), INT32_C( -409509393), INT32_C( 994472936), INT32_C( -666015338), INT32_C( -260985600), INT32_C( 750956055), INT32_C( 1716384261), INT32_C(-1960820967), INT32_C( 548004165), INT32_C( 1158678479), INT32_C( 1692264694), INT32_C( 789910754), INT32_C(-1468927401)), simde_mm512_set_epi32(INT32_C( -428021677), INT32_C( 2072345032), INT32_C(-1760567882), INT32_C( -446864592), INT32_C( 1299362117), INT32_C( 1402678741), INT32_C( -667918085), INT32_C( 1296019907), INT32_C(-1260791809), INT32_C( 1231406525), INT32_C( -414651973), INT32_C( 1577314987), INT32_C( 1453608195), INT32_C( 1199168765), INT32_C( 1764761558), INT32_C(-1053144882)), simde_mm512_set_epi32(INT32_C(-1609504702), INT32_C( 277164096), INT32_C( -860629095), INT32_C( 1674345138), INT32_C( -780517906), INT32_C(-1953060088), INT32_C(-1307294727), INT32_C( 219025603), INT32_C(-1877388824), INT32_C( 1308561010), INT32_C( 1682489506), INT32_C( 1577192106), INT32_C( 312756224), INT32_C( 549809264), INT32_C( 1075841812), INT32_C( 1091176584)) }, { simde_mm512_set_epi32(INT32_C( -922226792), INT32_C( 1177794317), INT32_C(-1987384202), INT32_C( 817250921), INT32_C( 1296662639), INT32_C( 64131133), INT32_C(-1048693918), INT32_C( 1748498328), INT32_C( -392119279), INT32_C(-1074948281), INT32_C( 1219088991), INT32_C( 346956559), INT32_C( -778487174), INT32_C( 2030262893), INT32_C( -325938509), INT32_C( 2088865417)), UINT16_C(43842), simde_mm512_set_epi32(INT32_C( -911190750), INT32_C( -150954698), INT32_C(-2108244068), INT32_C( -219871492), INT32_C( 954142226), INT32_C( -657696450), INT32_C( -40171606), INT32_C( 523799369), INT32_C(-1984820679), INT32_C( -352318109), INT32_C( 1527484465), INT32_C( 1078897849), INT32_C( -979432773), INT32_C( -222789591), INT32_C( -127333602), INT32_C( 1547833861)), simde_mm512_set_epi32(INT32_C( 1706771302), INT32_C(-1876132949), INT32_C( -300867745), INT32_C(-1574226708), INT32_C( 909541228), INT32_C(-1473521559), INT32_C(-2035272090), INT32_C( -843632177), INT32_C(-1617888467), INT32_C( -960934829), INT32_C( -805571508), INT32_C( -811280081), INT32_C(-1033748670), INT32_C(-1374688928), INT32_C( -924697051), INT32_C( -396703151)), simde_mm512_set_epi32(INT32_C( 604701252), INT32_C( 1177794317), INT32_C( 1812004931), INT32_C( 817250921), INT32_C( 102789484), INT32_C( 64131133), INT32_C( 35663940), INT32_C(-1064882042), INT32_C( -392119279), INT32_C( 79249424), INT32_C( 1219088991), INT32_C( 346956559), INT32_C( -778487174), INT32_C( 2030262893), INT32_C( 8533025), INT32_C( 2088865417)) }, { simde_mm512_set_epi32(INT32_C(-1406718947), INT32_C( 276558393), INT32_C( 154803470), INT32_C( 1010355861), INT32_C( -906943422), INT32_C(-1458735792), INT32_C( -135902673), INT32_C( 2125322250), INT32_C( 668612521), INT32_C( 2134097324), INT32_C( 1431164540), INT32_C(-1097880462), INT32_C( 1895279922), INT32_C( -455917584), INT32_C(-1635623774), INT32_C( 1646110584)), UINT16_C(61721), simde_mm512_set_epi32(INT32_C(-1147100012), INT32_C( -529153170), INT32_C(-1710107397), INT32_C( 1085126684), INT32_C( -365628842), INT32_C( 1126939173), INT32_C(-1962930746), INT32_C(-2032518388), INT32_C( -893793955), INT32_C(-1793978656), INT32_C( 353794556), INT32_C( 484459160), INT32_C( 1795576890), INT32_C(-1800969495), INT32_C( 570832120), INT32_C( -805110645)), simde_mm512_set_epi32(INT32_C(-1152323073), INT32_C(-1880366011), INT32_C( 1623795528), INT32_C( 779718762), INT32_C( -950308445), INT32_C( 601329882), INT32_C( 1983067756), INT32_C( 1014514692), INT32_C( 192697146), INT32_C( 1393627685), INT32_C( -618845734), INT32_C(-1526656596), INT32_C( -668243521), INT32_C( 858775967), INT32_C( -874197170), INT32_C(-1013451033)), simde_mm512_set_epi32(INT32_C( 5263723), INT32_C( 260711425), INT32_C( 1623728896), INT32_C( 776994914), INT32_C( -906943422), INT32_C(-1458735792), INT32_C( -135902673), INT32_C( 941638656), INT32_C( 668612521), INT32_C( 2134097324), INT32_C( 1431164540), INT32_C(-1593765596), INT32_C(-1876350587), INT32_C( -455917584), INT32_C(-1635623774), INT32_C( 60097124)) }, { simde_mm512_set_epi32(INT32_C( 73765979), INT32_C( 1196192749), INT32_C( -212227718), INT32_C(-1980699203), INT32_C( -37222007), INT32_C(-1986328859), INT32_C( 1483201456), INT32_C( 129080387), INT32_C( -259597220), INT32_C(-1814466623), INT32_C( 1536667113), INT32_C( 1702406736), INT32_C( 1032855403), INT32_C( -907220805), INT32_C( -744099936), INT32_C( -484286001)), UINT16_C(60398), simde_mm512_set_epi32(INT32_C( 2131878120), INT32_C( -709717494), INT32_C( 677603870), INT32_C( 1110837767), INT32_C( 137332416), INT32_C( 1049147481), INT32_C( -429123521), INT32_C( 562109282), INT32_C( -475857832), INT32_C(-1750530864), INT32_C(-1098694184), INT32_C(-1278646805), INT32_C( 274075622), INT32_C( 310096866), INT32_C( 1944249360), INT32_C(-1457965117)), simde_mm512_set_epi32(INT32_C(-1770120574), INT32_C(-1267999916), INT32_C( 920660290), INT32_C( 1218524275), INT32_C( -813719782), INT32_C( 17574100), INT32_C( 1228269274), INT32_C( -540460196), INT32_C( -544630186), INT32_C( -973323962), INT32_C( -900762472), INT32_C( 1800691074), INT32_C( -934840396), INT32_C(-2024059127), INT32_C( 2050139755), INT32_C(-1648520849)), simde_mm512_set_epi32(INT32_C(-2140268030), INT32_C( 541673812), INT32_C( 377487680), INT32_C(-1980699203), INT32_C( -951052006), INT32_C(-1986328859), INT32_C( 152168128), INT32_C( -565698532), INT32_C( 470352390), INT32_C( 1079263494), INT32_C( 1078735872), INT32_C( 1702406736), INT32_C( -939429872), INT32_C(-2063578103), INT32_C( 135266923), INT32_C( -484286001)) }, { simde_mm512_set_epi32(INT32_C( 359551557), INT32_C( 851518101), INT32_C( 1700885885), INT32_C( 1144006274), INT32_C( 718077661), INT32_C( 1054313754), INT32_C( 65647391), INT32_C(-1867262731), INT32_C( 208941224), INT32_C( 989467762), INT32_C(-1763663368), INT32_C( 732190820), INT32_C( -780985117), INT32_C(-1786203682), INT32_C( -893464048), INT32_C(-1930046056)), UINT16_C( 5280), simde_mm512_set_epi32(INT32_C( 2082802710), INT32_C( 398405458), INT32_C( -610997258), INT32_C( 830342728), INT32_C( -327286830), INT32_C( 1285368273), INT32_C(-1636339073), INT32_C( 1467021210), INT32_C( -637556884), INT32_C( 1464578281), INT32_C( -78771124), INT32_C(-1194071193), INT32_C(-1454776494), INT32_C( 224158188), INT32_C( 1578376173), INT32_C( 2022699384)), simde_mm512_set_epi32(INT32_C(-1580866758), INT32_C( 1705729088), INT32_C(-1204463345), INT32_C( 806420788), INT32_C(-1410408996), INT32_C( 863225653), INT32_C(-2071560363), INT32_C( 1819484417), INT32_C( -246595685), INT32_C( 243263522), INT32_C( 2052176477), INT32_C( 253176681), INT32_C( 1676258794), INT32_C(-1129907739), INT32_C( 395133900), INT32_C( -86934818)), simde_mm512_set_epi32(INT32_C( 359551557), INT32_C( 851518101), INT32_C( 1700885885), INT32_C( 65844), INT32_C( 718077661), INT32_C( 862111268), INT32_C( 65647391), INT32_C(-1867262731), INT32_C( 536877203), INT32_C( 989467762), INT32_C( 1159697), INT32_C( 732190820), INT32_C( -780985117), INT32_C(-1786203682), INT32_C( -893464048), INT32_C(-1930046056)) }, { simde_mm512_set_epi32(INT32_C( -763717484), INT32_C(-1454287993), INT32_C( -815713015), INT32_C( -381645662), INT32_C( 1143121149), INT32_C(-2120634980), INT32_C( -259357121), INT32_C( -593579957), INT32_C(-1529041977), INT32_C(-2065541499), INT32_C( 1009471119), INT32_C( 674532491), INT32_C( -605291509), INT32_C( -802607554), INT32_C( -850350011), INT32_C( 732847081)), UINT16_C(41568), simde_mm512_set_epi32(INT32_C( 1295870302), INT32_C( 336570348), INT32_C(-1662536141), INT32_C(-1054381248), INT32_C( 1593114303), INT32_C(-1017054773), INT32_C(-1409414000), INT32_C( 227338784), INT32_C( 1117509139), INT32_C( 1937140770), INT32_C( 1843080524), INT32_C( 775622876), INT32_C( 903821795), INT32_C(-1108923393), INT32_C( -348808591), INT32_C( 691553406)), simde_mm512_set_epi32(INT32_C( -957741997), INT32_C( -389978329), INT32_C(-1992364300), INT32_C(-1194120095), INT32_C( 1460280679), INT32_C( -461012902), INT32_C( 191451119), INT32_C( 395863574), INT32_C( 2007897293), INT32_C( 647995187), INT32_C( 1812181798), INT32_C(-1288356108), INT32_C(-1946740515), INT32_C(-1688294491), INT32_C( -146679692), INT32_C( -960173252)), simde_mm512_set_epi32(INT32_C(-2101214207), INT32_C(-1454287993), INT32_C( 18368708), INT32_C( -381645662), INT32_C( 1143121149), INT32_C(-2120634980), INT32_C( 82799), INT32_C( -593579957), INT32_C(-1529041977), INT32_C( 76124945), INT32_C( 37410), INT32_C( 674532491), INT32_C( -605291509), INT32_C( -802607554), INT32_C( -850350011), INT32_C( 732847081)) }, { simde_mm512_set_epi32(INT32_C(-1543080560), INT32_C( 326946931), INT32_C( 691349892), INT32_C( 1226829378), INT32_C( 1127061143), INT32_C( 1548237043), INT32_C(-1885371906), INT32_C( 673215002), INT32_C( -2545554), INT32_C(-1367277302), INT32_C( -227991301), INT32_C( 746457208), INT32_C(-1737407854), INT32_C( 1988034150), INT32_C( -605858038), INT32_C( -752579769)), UINT16_C(24718), simde_mm512_set_epi32(INT32_C( 1517976828), INT32_C( 453076709), INT32_C( 1155311084), INT32_C(-1730593997), INT32_C( 2009897302), INT32_C( -813354987), INT32_C( 1160389453), INT32_C(-1543844644), INT32_C( -908777016), INT32_C( 107061968), INT32_C(-1889800585), INT32_C(-1309816398), INT32_C( 1760607631), INT32_C(-1373730647), INT32_C( 1475928392), INT32_C(-1415204909)), simde_mm512_set_epi32(INT32_C( 901302066), INT32_C( 236605933), INT32_C( 1144123725), INT32_C( 765559000), INT32_C( -272466037), INT32_C( 489940181), INT32_C( 1285546635), INT32_C( 894611583), INT32_C(-1280504231), INT32_C( -511809158), INT32_C( 517714821), INT32_C( -458114298), INT32_C(-1583011646), INT32_C( 2050708057), INT32_C(-1873361568), INT32_C( 1295393304)), simde_mm512_set_epi32(INT32_C(-1543080560), INT32_C( 68817160), INT32_C( 2183169), INT32_C( 1226829378), INT32_C( 1127061143), INT32_C( 1548237043), INT32_C(-1885371906), INT32_C( 673215002), INT32_C( 841483793), INT32_C(-1367277302), INT32_C( -227991301), INT32_C( 746457208), INT32_C(-2130378688), INT32_C( 1344361040), INT32_C(-2147089376), INT32_C( -752579769)) }, { simde_mm512_set_epi32(INT32_C( -203532895), INT32_C(-1671983312), INT32_C( -485765980), INT32_C(-1920770849), INT32_C( -87193791), INT32_C( 1659979037), INT32_C(-1337410362), INT32_C( 1209029675), INT32_C( 587197109), INT32_C( -530755740), INT32_C( 281664792), INT32_C( -47077792), INT32_C( -945013045), INT32_C( -166692659), INT32_C( 1790118115), INT32_C( 689330771)), UINT16_C( 7519), simde_mm512_set_epi32(INT32_C( -384323470), INT32_C( 473195364), INT32_C( 206146438), INT32_C(-1217279332), INT32_C(-1088463893), INT32_C( 970520784), INT32_C( -929499045), INT32_C(-1086034653), INT32_C(-1051759609), INT32_C(-1753508816), INT32_C( 1464082608), INT32_C( 492133710), INT32_C( 1610388137), INT32_C(-2026322187), INT32_C(-1721391979), INT32_C( 466414066)), simde_mm512_set_epi32(INT32_C( 1039275088), INT32_C( -195464931), INT32_C(-1467895249), INT32_C( 1829711637), INT32_C( 2006708634), INT32_C( 837542220), INT32_C( -759309790), INT32_C( -498075629), INT32_C( 922280800), INT32_C( 925077084), INT32_C( 1941328295), INT32_C( 27280850), INT32_C( -499921640), INT32_C( 738410205), INT32_C( 972641353), INT32_C( 1011602801)), simde_mm512_set_epi32(INT32_C( -203532895), INT32_C(-1671983312), INT32_C( -485765980), INT32_C( 1208886529), INT32_C( 1082171408), INT32_C( 2294028), INT32_C(-1337410362), INT32_C( 1074499600), INT32_C( 587197109), INT32_C( 536873548), INT32_C( 281664792), INT32_C( 10485904), INT32_C(-1610396400), INT32_C( 671299592), INT32_C( 546852936), INT32_C( 604181505)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_andnot_epi32(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_andnot_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { UINT16_C(56303), simde_mm512_set_epi32(INT32_C( 684353163), INT32_C( -624296854), INT32_C(-1626870831), INT32_C( 1693659819), INT32_C( 1814966119), INT32_C( 1428960968), INT32_C( 1709146671), INT32_C(-1269736679), INT32_C( -399245267), INT32_C( 128121852), INT32_C( 623395494), INT32_C( 1817163956), INT32_C( 1991308671), INT32_C( -978886098), INT32_C( 1436967950), INT32_C( 227176170)), simde_mm512_set_epi32(INT32_C( -155316348), INT32_C( 1821995326), INT32_C(-1956349521), INT32_C( 2078645861), INT32_C(-2002962850), INT32_C( 1961273418), INT32_C( 1026886280), INT32_C( 1852456749), INT32_C( 1549356853), INT32_C( 905982506), INT32_C( -562722910), INT32_C( 1231420121), INT32_C( 786944005), INT32_C(-1682464667), INT32_C( 12357782), INT32_C( 913777965)), simde_mm512_set_epi32(INT32_C( -701232892), INT32_C( 605028628), INT32_C( 0), INT32_C( 453282884), INT32_C(-2137976808), INT32_C( 0), INT32_C( 404752512), INT32_C( 1244275748), INT32_C( 340348688), INT32_C( 805306370), INT32_C( -631929600), INT32_C( 0), INT32_C( 138870784), INT32_C( 437289025), INT32_C( 1609872), INT32_C( 846528773)) }, { UINT16_C(56200), simde_mm512_set_epi32(INT32_C( -452164103), INT32_C( 1890508390), INT32_C( 1258638805), INT32_C( -750109723), INT32_C( -513503890), INT32_C( -379667747), INT32_C(-1651966538), INT32_C( 418163645), INT32_C(-1484633406), INT32_C( 128570401), INT32_C(-1432905388), INT32_C(-1460529893), INT32_C( -808466332), INT32_C(-1300168003), INT32_C( 153276923), INT32_C( -912847520)), simde_mm512_set_epi32(INT32_C( 1849401350), INT32_C(-2046167065), INT32_C(-1772087293), INT32_C( 763578781), INT32_C( -59556630), INT32_C( -574235850), INT32_C(-1931079616), INT32_C( 856557360), INT32_C( 1798494574), INT32_C( -255236934), INT32_C( -498039931), INT32_C( 1916101155), INT32_C( 1291737736), INT32_C(-1818740725), INT32_C( 1042711156), INT32_C( 770521823)), simde_mm512_set_epi32(INT32_C( 171122694), INT32_C(-2046746239), INT32_C( 0), INT32_C( 746668056), INT32_C( 471019648), INT32_C( 0), INT32_C( 6684736), INT32_C( 587334656), INT32_C( 1211142444), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 3150472), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { UINT16_C(29534), simde_mm512_set_epi32(INT32_C(-1569526022), INT32_C( -566961257), INT32_C( -454262297), INT32_C(-2011970966), INT32_C( 1729229439), INT32_C( 515441803), INT32_C( 1629075756), INT32_C( -633945234), INT32_C(-1517000454), INT32_C(-2129179491), INT32_C(-1082415130), INT32_C( -643068488), INT32_C(-1177678851), INT32_C( 811665360), INT32_C(-1120986687), INT32_C( 1945770944)), simde_mm512_set_epi32(INT32_C( 1206445472), INT32_C( 1685117563), INT32_C( -105634979), INT32_C( 300875900), INT32_C( 1292473590), INT32_C( -154568093), INT32_C( -725481309), INT32_C( 1537059805), INT32_C(-1299234249), INT32_C( 1342055246), INT32_C( 1121196977), INT32_C( -936323200), INT32_C( 284920534), INT32_C( -501374627), INT32_C( 523356394), INT32_C( 2082914622)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 541065320), INT32_C( 420487704), INT32_C( 300679188), INT32_C( 0), INT32_C( 0), INT32_C(-1799225213), INT32_C( 25766033), INT32_C( 0), INT32_C( 1323836226), INT32_C( 0), INT32_C( 1069568), INT32_C( 3244034), INT32_C(-1038311411), INT32_C( 34652202), INT32_C( 0)) }, { UINT16_C(18467), simde_mm512_set_epi32(INT32_C(-1126901666), INT32_C(-1540993522), INT32_C( -310394649), INT32_C( 1569425965), INT32_C( 1860055197), INT32_C( 1022884520), INT32_C( 886587779), INT32_C( -7751100), INT32_C( 725782952), INT32_C( 1524528742), INT32_C(-1901622691), INT32_C( -205155472), INT32_C( 1297212229), INT32_C(-1562315637), INT32_C(-1561800150), INT32_C( 1969817622)), simde_mm512_set_epi32(INT32_C( 1691822441), INT32_C( -747576101), INT32_C( 526461787), INT32_C(-1551035253), INT32_C( -494445545), INT32_C( 601243904), INT32_C( 1621282220), INT32_C( 87983768), INT32_C( 1749180883), INT32_C( 653596692), INT32_C( 1933605299), INT32_C( 2110990238), INT32_C( 1287872496), INT32_C( -947101027), INT32_C(-1469323630), INT32_C( -103698146)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 1397792977), INT32_C( 0), INT32_C( 0), INT32_C(-2147398654), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1900048802), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 134423696), INT32_C(-2003783416)) }, { UINT16_C(10499), simde_mm512_set_epi32(INT32_C(-1800578563), INT32_C( 1189147870), INT32_C( -918534753), INT32_C(-2046784432), INT32_C( 2146267513), INT32_C( 1185116678), INT32_C( 743422455), INT32_C( -958735431), INT32_C(-1272492795), INT32_C(-1993475811), INT32_C( -901911405), INT32_C( -444376352), INT32_C( 1645484254), INT32_C( 1890851846), INT32_C( 632187417), INT32_C( 2142729898)), simde_mm512_set_epi32(INT32_C( -752859034), INT32_C( -661272677), INT32_C( 1736074301), INT32_C( 1246429845), INT32_C(-1327059157), INT32_C(-1760626525), INT32_C( 693999571), INT32_C( 179503183), INT32_C(-1261277577), INT32_C( 2014601419), INT32_C( 45385261), INT32_C( 1333239387), INT32_C( 1950214560), INT32_C( 2050540474), INT32_C( -73887902), INT32_C(-1586317941)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 641343520), INT32_C( 0), INT32_C(-2147316222), INT32_C( 0), INT32_C( 0), INT32_C( 136380486), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -636451486), INT32_C(-2143256319)) }, { UINT16_C(17481), simde_mm512_set_epi32(INT32_C( -441498123), INT32_C( 324738064), INT32_C( -27713047), INT32_C( 322022433), INT32_C( -623687192), INT32_C( 441486000), INT32_C(-1091397610), INT32_C( 486920838), INT32_C( 727930899), INT32_C( 134578624), INT32_C( -229821250), INT32_C(-1459771681), INT32_C( 786852212), INT32_C(-1562273484), INT32_C( 592450244), INT32_C( -391708168)), simde_mm512_set_epi32(INT32_C( 792156312), INT32_C( 407601311), INT32_C(-1255558455), INT32_C( 1648353396), INT32_C(-1874603621), INT32_C(-1962724996), INT32_C(-1379808132), INT32_C(-1917277067), INT32_C( -327375348), INT32_C( -266290190), INT32_C( -446684576), INT32_C( -218289365), INT32_C( 1659849163), INT32_C( 313080914), INT32_C( 914897986), INT32_C( -690088867)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 134250639), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C(-2130497204), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -266323406), INT32_C( 0), INT32_C( 0), INT32_C( 1074331787), INT32_C( 0), INT32_C( 0), INT32_C( 374871045)) }, { UINT16_C(25655), simde_mm512_set_epi32(INT32_C( 7734189), INT32_C(-1107618186), INT32_C( 1291997837), INT32_C( -657618671), INT32_C( -523204184), INT32_C( 197247571), INT32_C(-1924672781), INT32_C( 1367953812), INT32_C( 1671605226), INT32_C( -667696065), INT32_C( 734579404), INT32_C( -25998720), INT32_C( -791898275), INT32_C(-1848361166), INT32_C( 302446873), INT32_C(-1290034089)), simde_mm512_set_epi32(INT32_C(-2140777278), INT32_C( 1356458144), INT32_C( 990615850), INT32_C( 122581591), INT32_C( 1842174798), INT32_C( 1633161914), INT32_C( 1487544794), INT32_C( 1680890315), INT32_C(-1051319145), INT32_C( 1671869354), INT32_C( -657093416), INT32_C( 76483879), INT32_C( 897241075), INT32_C(-1385812547), INT32_C( 518745683), INT32_C( 1278998383)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 1073801344), INT32_C( 839485730), INT32_C( 0), INT32_C( 0), INT32_C( 1612189864), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -803929072), INT32_C( 9176359), INT32_C( 0), INT32_C( 740425869), INT32_C( 216598082), INT32_C( 1277186856)) }, { UINT16_C( 9319), simde_mm512_set_epi32(INT32_C( 359510622), INT32_C( 1667719225), INT32_C( 630674948), INT32_C( 610105763), INT32_C( 20744378), INT32_C(-1334671422), INT32_C( 1934181344), INT32_C( -207473635), INT32_C( -12247390), INT32_C( 935971775), INT32_C( -814870615), INT32_C( 272416728), INT32_C(-2094904434), INT32_C( 118285194), INT32_C( 1770668331), INT32_C(-1463910375)), simde_mm512_set_epi32(INT32_C( 399098366), INT32_C(-1713281213), INT32_C( 2124618772), INT32_C(-1052563089), INT32_C( 1851869047), INT32_C( 2020277970), INT32_C(-1035589842), INT32_C(-1789987668), INT32_C( 733487930), INT32_C( -497440680), INT32_C(-1951336884), INT32_C(-1752937795), INT32_C(-1263292061), INT32_C( 242422), INT32_C( 1531342059), INT32_C( -447099781)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 1512048656), INT32_C( 0), INT32_C( 0), INT32_C( 1208514576), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C(-1072684992), INT32_C( 9498692), INT32_C( 0), INT32_C( 0), INT32_C( 200820), INT32_C( 306457792), INT32_C( 1161907298)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_andnot_epi32(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_andnot_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C( 207721957124820559), INT64_C( 7800065217939756514), INT64_C(-3924116943760495845), INT64_C(-4670511705337769443), INT64_C( 8681164262815197674), INT64_C(-1748050366477277388), INT64_C( 6521714148432251392), INT64_C( -441034964778727222)), simde_mm512_set_epi64(INT64_C( 1906622887772594248), INT64_C(-6188571553716009650), INT64_C( 264280323541139152), INT64_C( 1584607577489802492), INT64_C( 1109502791419861047), INT64_C( 7178327557348084990), INT64_C( 1395553581907359924), INT64_C(-6533533183118051768)), simde_mm512_set_epi64(INT64_C( 1735017709269196800), INT64_C(-9079113909020786676), INT64_C( 153194412888495296), INT64_C( 58731659148920032), INT64_C( 505544490090532885), INT64_C( 565705191721162), INT64_C( 98516792007995572), INT64_C( 293888628881438208)) }, { simde_mm512_set_epi64(INT64_C(-6724768156717290819), INT64_C(-5250906792133082841), INT64_C( 7101763469273509464), INT64_C(-6606445878350250265), INT64_C( -992513133092468415), INT64_C( 6991549638647222936), INT64_C(-6702609966967541799), INT64_C( 6463119549714578187)), simde_mm512_set_epi64(INT64_C( -887590357697526439), INT64_C( 7877951253899372304), INT64_C(-2913340636735054567), INT64_C(-9221955371178601401), INT64_C(-8332586207462320569), INT64_C(-6065729331557277752), INT64_C(-5495188752553836498), INT64_C(-4545091831756868823)), simde_mm512_set_epi64(INT64_C( 5837268749247317312), INT64_C( 5211811456626140688), INT64_C(-7705320311288082175), INT64_C( 1134704830971904), INT64_C( 883853889565267462), INT64_C(-8444247080808460992), INT64_C( 1226140460016631846), INT64_C(-9201976626802327520)) }, { simde_mm512_set_epi64(INT64_C(-8081018174907924542), INT64_C( 6936249846129023242), INT64_C(-1059210610078769383), INT64_C( 1593162574725548027), INT64_C( 2194029932784271057), INT64_C( 2297742112014824027), INT64_C( 6872936620014531062), INT64_C(-4458741002964204726)), simde_mm512_set_epi64(INT64_C( 7389599045220123111), INT64_C(-4734617337151831127), INT64_C(-3688698012661984630), INT64_C( 2942411497108224949), INT64_C( 3088165388972230068), INT64_C(-8598989874996476457), INT64_C(-2925060949778391940), INT64_C( 1600057734617632940)), simde_mm512_set_epi64(INT64_C( 6918937665425915941), INT64_C(-7059040014775614815), INT64_C( 901582149085413506), INT64_C( 2936772469119858692), INT64_C( 2344435893274706212), INT64_C(-9221049868269222524), INT64_C(-9221110889849910264), INT64_C( 1450299817904312484)) }, { simde_mm512_set_epi64(INT64_C(-2851531746227363368), INT64_C( 2067892326136395565), INT64_C( 5955544350840259834), INT64_C(-9215158447496033102), INT64_C(-6496129397571023850), INT64_C( 6580537045822776099), INT64_C(-1881492268188536860), INT64_C( 6477581622128112348)), simde_mm512_set_epi64(INT64_C( 4736931688263401886), INT64_C( -422510099501192510), INT64_C( 3904035851984069712), INT64_C(-1269778779692298262), INT64_C( 7103388094266435672), INT64_C( 8538164081108009860), INT64_C( 7657481289221491954), INT64_C(-6346831563088898420)), simde_mm512_set_epi64(INT64_C( 112770318310899718), INT64_C(-2161639582911543102), INT64_C( 2596786860877701120), INT64_C( 7953586499903062856), INT64_C( 4756998866794012744), INT64_C( 2606779805598826628), INT64_C( 721778983603339282), INT64_C(-6482086895067069440)) }, { simde_mm512_set_epi64(INT64_C( -821005629772787069), INT64_C(-4647973389902912809), INT64_C( 6459900742609080709), INT64_C( -1266809698382208), INT64_C( 701020828809534395), INT64_C(-8547290149729742964), INT64_C( -440779604644636577), INT64_C(-3509307452635316669)), simde_mm512_set_epi64(INT64_C( 8999318376500703433), INT64_C( 1719097867730734351), INT64_C( 360091487853740826), INT64_C(-6254537314592943558), INT64_C( -632347399973673450), INT64_C( 2614451855333869078), INT64_C( 6887846494654494209), INT64_C( 6275950466702179569)), simde_mm512_set_epi64(INT64_C( 604608525006544968), INT64_C( 36136703980768520), INT64_C( 313352018360009242), INT64_C( 137573240890), INT64_C( -720169941136284668), INT64_C( 2596327487390613522), INT64_C( 438118704866436608), INT64_C( 1157566394459521200)) }, { simde_mm512_set_epi64(INT64_C(-5483950330033170066), INT64_C(-4153699507396814554), INT64_C( 1686943364333831141), INT64_C(-6155572369391990976), INT64_C(-2338197867102969548), INT64_C( 4970317907692585902), INT64_C( -659027381808082615), INT64_C(-8301976371410819309)), simde_mm512_set_epi64(INT64_C(-5922203424268985599), INT64_C( 1802271341012641429), INT64_C(-7199161640250473305), INT64_C( 4184910176757162424), INT64_C(-5885970898589897236), INT64_C( 5320604596895707800), INT64_C(-7049806138053003152), INT64_C( 7856069210784274088)), simde_mm512_set_epi64(INT64_C( 869198318683570689), INT64_C( 1801690747234690705), INT64_C(-8640647776843037694), INT64_C( 1153141544681808056), INT64_C( 2328590264702274760), INT64_C( 649785191505621008), INT64_C( 585473076492838960), INT64_C( 6991285376398659752)) }, { simde_mm512_set_epi64(INT64_C( 772369500911491951), INT64_C(-3487181344595680581), INT64_C(-6776954808191866646), INT64_C( 1437133779275187040), INT64_C(-3742444221385296201), INT64_C( 3619551202282748987), INT64_C(-5676058734881350704), INT64_C( 3034639668798379519)), simde_mm512_set_epi64(INT64_C( 7799576852730631653), INT64_C(-4611614721990756478), INT64_C( 4179897201710999091), INT64_C(-6554042946408561565), INT64_C( 7858455943023474684), INT64_C(-4868663260305658784), INT64_C(-6563387696243649675), INT64_C( -252761203575600938)), simde_mm512_set_epi64(INT64_C( 7207351508714783872), INT64_C( 71283551638784), INT64_C( 1873656161226589713), INT64_C(-6626408997484215293), INT64_C( 2382069952524845384), INT64_C(-8339540561327800256), INT64_C( 342318686555209765), INT64_C(-3142946274104309760)) }, { simde_mm512_set_epi64(INT64_C(-6272776462503295319), INT64_C(-8894851852280934479), INT64_C( 6828037840473322695), INT64_C( -784763491569829334), INT64_C(-6956613286547242208), INT64_C(-7641604144835014945), INT64_C( 4137535773895137731), INT64_C( 3122415965305276610)), simde_mm512_set_epi64(INT64_C( 5967240469174938071), INT64_C( 2271146860082105533), INT64_C( 2488999494207974941), INT64_C(-7245269557183082373), INT64_C(-6094983942162054282), INT64_C( 5272800144124782830), INT64_C(-1112016268759137335), INT64_C( 3873297534982922048)), simde_mm512_set_epi64(INT64_C( 5912488079989451094), INT64_C( 1945726568221376524), INT64_C( 2308274862648494616), INT64_C( 747597780979417169), INT64_C( 2308728562190385238), INT64_C( 5191575370143047712), INT64_C(-4571152522202316280), INT64_C( 1477377559112455936)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_andnot_epi64(test_vec[i].a, test_vec[i].b); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_andnot_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask8 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C(-8706055201876274534), INT64_C(-2974526497282267924), INT64_C(-5064099105424399850), INT64_C( 4173762680971677425), INT64_C( 5058953897646810163), INT64_C( 3129329827313761969), INT64_C(-7680990319456213473), INT64_C( 3095613893972693568)), UINT8_C(148), simde_mm512_set_epi64(INT64_C( -438459145642420823), INT64_C( 2788318060387771818), INT64_C(-6405634033298828022), INT64_C( 5697280571633296693), INT64_C(-4038706177987584167), INT64_C( 7050984609072161968), INT64_C( 2749018709708772273), INT64_C(-2265592192997989021)), simde_mm512_set_epi64(INT64_C(-1049270424665539045), INT64_C(-5614406584732574076), INT64_C( 631202638299991092), INT64_C(-8590255914187036925), INT64_C(-6307315262773811693), INT64_C(-1209843912248425712), INT64_C(-3872834841544228683), INT64_C(-3085083838104197908)), simde_mm512_set_epi64(INT64_C( 4565176708278802), INT64_C(-2974526497282267924), INT64_C(-5064099105424399850), INT64_C(-9166793916060532222), INT64_C( 5058953897646810163), INT64_C(-8203935250787499264), INT64_C(-7680990319456213473), INT64_C( 3095613893972693568)) }, { simde_mm512_set_epi64(INT64_C(-4842938149095873389), INT64_C( -846085209911123390), INT64_C( 902030110892207375), INT64_C(-8179884512098486778), INT64_C( 7136180633023633249), INT64_C(-7202514001649392691), INT64_C(-4512985345247872566), INT64_C( 6280820093975482096)), UINT8_C( 88), simde_mm512_set_epi64(INT64_C(-5899542268894168412), INT64_C( 5687678929880926481), INT64_C( 754471637334648472), INT64_C( 1530269878614188173), INT64_C(-3481843836368626596), INT64_C( 7214537798473258692), INT64_C( 3186147264512503626), INT64_C(-2220217993706522327)), simde_mm512_set_epi64(INT64_C( 4741426381855247639), INT64_C(-6093431436741802321), INT64_C(-7277776184535270866), INT64_C(-5890238516652006119), INT64_C(-8031043717190201593), INT64_C( 7604814614465185239), INT64_C( 6712821644684838579), INT64_C(-1700024539209227072)), simde_mm512_set_epi64(INT64_C(-4842938149095873389), INT64_C(-6845110636993050962), INT64_C( 902030110892207375), INT64_C(-6178618704551475952), INT64_C( 1152921504676217603), INT64_C(-7202514001649392691), INT64_C(-4512985345247872566), INT64_C( 6280820093975482096)) }, { simde_mm512_set_epi64(INT64_C( -647905387169688868), INT64_C(-8461625299591442725), INT64_C(-4959110866452894415), INT64_C(-6046186632754619075), INT64_C(-1792277330244185216), INT64_C( 7899374623587606112), INT64_C(-2530906147097710338), INT64_C(-3452464982464189359)), UINT8_C(234), simde_mm512_set_epi64(INT64_C( 1092825191169264761), INT64_C( 518154175979275913), INT64_C(-2540128939765803497), INT64_C( 7206989642204137224), INT64_C( 5053971549089664110), INT64_C( 275130895293265200), INT64_C( 5870095287105445532), INT64_C( 3766077764635497461)), simde_mm512_set_epi64(INT64_C( 4726923138274336458), INT64_C( 3036293318033390010), INT64_C( 3265833753663381966), INT64_C(-5548402770380826836), INT64_C(-1910939043053590920), INT64_C(-2803972634053834044), INT64_C( 8571307896088376800), INT64_C(-2906367800591944553)), simde_mm512_set_epi64(INT64_C( 4652501007819903618), INT64_C( 2883153157893175602), INT64_C( 2395932937578488264), INT64_C(-6046186632754619075), INT64_C(-6820513618777071088), INT64_C( 7899374623587606112), INT64_C( 2774537390188929376), INT64_C(-3452464982464189359)) }, { simde_mm512_set_epi64(INT64_C( 1235103765186305905), INT64_C( 8251648155281492223), INT64_C( 6607793927948629202), INT64_C(-4956133557414585628), INT64_C( -962568210701922461), INT64_C( 7520783669412628517), INT64_C( 4493695514722238610), INT64_C( 6191552237626999876)), UINT8_C(175), simde_mm512_set_epi64(INT64_C(-1999731829913464848), INT64_C( 7072204574593617968), INT64_C( -329416891633690006), INT64_C( 4219653511875682573), INT64_C(-5631405021388401918), INT64_C( -157450572284011331), INT64_C(-6448890677231800514), INT64_C(-7780641104162742337)), simde_mm512_set_epi64(INT64_C( 261057906798578959), INT64_C(-4964336716206621793), INT64_C(-2469501117696455323), INT64_C( 2339328587648411167), INT64_C( 8220620103791574591), INT64_C( 273538927111600315), INT64_C(-3298288074488883789), INT64_C(-8357787233131660724)), simde_mm512_set_epi64(INT64_C( 252325274594050063), INT64_C( 8251648155281492223), INT64_C( 329344140649481477), INT64_C(-4956133557414585628), INT64_C( 4757067868831771709), INT64_C( 147282005282398210), INT64_C( 5780933484690985089), INT64_C( 577059746971148352)) }, { simde_mm512_set_epi64(INT64_C(-4285851555602414983), INT64_C(-8492982904341423564), INT64_C(-2837093742585682248), INT64_C( 267283033869441308), INT64_C( 4311088349833897908), INT64_C( -647706517356585524), INT64_C(-3770716194274572842), INT64_C(-8566807519504738391)), UINT8_C( 75), simde_mm512_set_epi64(INT64_C(-6282230583383062251), INT64_C(-7841791912404359359), INT64_C(-7579575622870303941), INT64_C(-2922061146712111361), INT64_C( 4606944383693507801), INT64_C(-6882069134795290712), INT64_C(-4540648442557822523), INT64_C( 8626282944079879495)), simde_mm512_set_epi64(INT64_C(-1823698107073259294), INT64_C( 8029233569224881686), INT64_C( 46900467487790247), INT64_C( 8663098726891022114), INT64_C( 2596646339415618602), INT64_C( 7059567741718714192), INT64_C( 7446336952031093968), INT64_C( 16931348739669095)), simde_mm512_set_epi64(INT64_C(-4285851555602414983), INT64_C( 7800656914580246550), INT64_C(-2837093742585682248), INT64_C( 267283033869441308), INT64_C( 88250757154), INT64_C( -647706517356585524), INT64_C( 2810971851134903312), INT64_C( 2252181026775072)) }, { simde_mm512_set_epi64(INT64_C( 2037127205197222183), INT64_C( 3451898891201360501), INT64_C( 1455211247092394628), INT64_C( 2206658725580708086), INT64_C( 5349364315141837270), INT64_C( 7849256443344717184), INT64_C( 4856719246957022704), INT64_C(-4923001172558722698)), UINT8_C(149), simde_mm512_set_epi64(INT64_C( 6411014556179012579), INT64_C(-8290562023531042118), INT64_C( 3513406971994598159), INT64_C( 170515694744852127), INT64_C( 7762613428125762288), INT64_C( 4486051683696872920), INT64_C(-3347799382542858009), INT64_C( 7877354972766519961)), simde_mm512_set_epi64(INT64_C( 2384233607786009160), INT64_C( 7136321197786935066), INT64_C(-2775012291419678803), INT64_C( 1447324989515017380), INT64_C(-5436087904826886612), INT64_C( 7888585058472078205), INT64_C(-7864278168616859201), INT64_C( 8559884086409161720)), simde_mm512_set_epi64(INT64_C( 2379589521848270856), INT64_C( 3451898891201360501), INT64_C( 1455211247092394628), INT64_C( 1441191875528796192), INT64_C( 5349364315141837270), INT64_C( 4699579053875929637), INT64_C( 4856719246957022704), INT64_C( 1335881482333858144)) }, { simde_mm512_set_epi64(INT64_C( -626073311570320561), INT64_C( 4678237318537021585), INT64_C( 7326175960335696621), INT64_C( 2614088339478761539), INT64_C(-3404519381245739218), INT64_C( 8481274767690754747), INT64_C(-4945537623263429760), INT64_C( 5945167030889147721)), UINT8_C(209), simde_mm512_set_epi64(INT64_C( 1396956538408270925), INT64_C( 433531675836732237), INT64_C(-2740776246441943234), INT64_C( 627773489989817177), INT64_C( 2334235533617502306), INT64_C( 5200994462656867787), INT64_C( 6058971438237170661), INT64_C(-1718043134590880356)), simde_mm512_set_epi64(INT64_C( 6582702301060698834), INT64_C(-6620728110496909408), INT64_C(-2674893574601157335), INT64_C(-3191892667818640289), INT64_C(-1755995440120031315), INT64_C(-9164966479234216120), INT64_C( -811539623059483440), INT64_C(-8790398035654865383)), simde_mm512_set_epi64(INT64_C( 5194902496598033042), INT64_C(-6910140186789469024), INT64_C( 7326175960335696621), INT64_C(-3242571914706752506), INT64_C(-3404519381245739218), INT64_C( 8481274767690754747), INT64_C(-4945537623263429760), INT64_C( 432964590381304321)) }, { simde_mm512_set_epi64(INT64_C(-6743158443935274483), INT64_C( -109319504177728220), INT64_C(-4028288193005214442), INT64_C( 132288430860812468), INT64_C( 917336920958928215), INT64_C(-8592087087533075804), INT64_C( -911564553413882344), INT64_C(-5778334739542351628)), UINT8_C(132), simde_mm512_set_epi64(INT64_C(-8373098054511418162), INT64_C( 7896680406183363835), INT64_C( 4931162839211744539), INT64_C(-7345169465412510410), INT64_C(-7349547769362151281), INT64_C( 1089692206936889), INT64_C( 6524506004040415129), INT64_C( 6226593529101379713)), simde_mm512_set_epi64(INT64_C( 3458147115787789114), INT64_C( 7210094384770191006), INT64_C( 7088560670460655534), INT64_C( -803268445524244375), INT64_C( 4723424603414443741), INT64_C( 1370109689785890561), INT64_C(-4376650697011830162), INT64_C( -620804834547376669)), simde_mm512_set_epi64(INT64_C( 2607874799996928816), INT64_C( -109319504177728220), INT64_C(-4028288193005214442), INT64_C( 132288430860812468), INT64_C( 917336920958928215), INT64_C( 1369094837600650240), INT64_C( -911564553413882344), INT64_C(-5778334739542351628)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_andnot_epi64(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_andnot_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { UINT8_C( 7), simde_mm512_set_epi64(INT64_C(-2016264017930850215), INT64_C( 6207900603916400351), INT64_C( 7392720324711365837), INT64_C( 8770333430120422633), INT64_C( 490532205378570002), INT64_C(-6106476949393880649), INT64_C(-1854090463849988422), INT64_C( 2161894352221900559)), simde_mm512_set_epi64(INT64_C( 2471053143203888378), INT64_C( 4307108638624930374), INT64_C( 8813537095665060151), INT64_C( -722272124812023485), INT64_C( -967288076808354317), INT64_C(-6013850093851417513), INT64_C( 3331958923341291108), INT64_C( -281534168919433716)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 327214808695383104), INT64_C( 592518518720905284), INT64_C(-2299289876066988032)) }, { UINT8_C( 76), simde_mm512_set_epi64(INT64_C(-7188491746248886702), INT64_C( 3795103503776882624), INT64_C( 8025930014425820340), INT64_C(-7929605366413196523), INT64_C( 5924420044782879602), INT64_C(-3302350069387149227), INT64_C(-1821341009738891830), INT64_C(-6812922588519498817)), simde_mm512_set_epi64(INT64_C(-1266328346505933550), INT64_C( 1669938728598205410), INT64_C(-7350359895777029108), INT64_C( 9139543262716722238), INT64_C(-9200593584210926828), INT64_C(-3449434666635797941), INT64_C( 4314658246940308870), INT64_C( -478133805478226079)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 217316721059520546), INT64_C( 0), INT64_C( 0), INT64_C(-9205313376526131196), INT64_C( 5910164412938), INT64_C( 0), INT64_C( 0)) }, { UINT8_C(148), simde_mm512_set_epi64(INT64_C( 2173045647004856331), INT64_C(-9109531323294262314), INT64_C(-2493109132018654878), INT64_C( 6270825741977490200), INT64_C( 8719769943602297687), INT64_C(-4201021528893071940), INT64_C( 9011627797455533120), INT64_C( 6620301637478416060)), simde_mm512_set_epi64(INT64_C(-4851330938418837166), INT64_C( 8567660546009495156), INT64_C(-2946935282469126440), INT64_C(-3944680176869437518), INT64_C(-3189291857021003507), INT64_C( 852944387991302704), INT64_C( 5948575888921546761), INT64_C( 4930911444432807162)), simde_mm512_set_epi64(INT64_C(-6880224560885528240), INT64_C( 0), INT64_C( 0), INT64_C(-8628461452576158558), INT64_C( 0), INT64_C( 739720922782507520), INT64_C( 0), INT64_C( 0)) }, { UINT8_C( 97), simde_mm512_set_epi64(INT64_C( -924406031683798297), INT64_C( 1082742291630099615), INT64_C( 3950666752159487194), INT64_C( 8443851551588188807), INT64_C( 5838662214875022266), INT64_C(-6073322957639126750), INT64_C( 1174103819847041898), INT64_C( 693926700598930845)), simde_mm512_set_epi64(INT64_C( 917406711858321823), INT64_C(-2954398701286057389), INT64_C( 580508427727522845), INT64_C(-4656281121400174897), INT64_C(-3028496641912979897), INT64_C( 6357018899588818011), INT64_C( 5102737467710367164), INT64_C( 1099306012957445482)), simde_mm512_set_epi64(INT64_C( 0), INT64_C(-3388587049163943360), INT64_C( 579945122294155269), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 450505469795237986)) }, { UINT8_C(210), simde_mm512_set_epi64(INT64_C( 4586733821042914141), INT64_C(-6562128612845469564), INT64_C(-5088971089241108691), INT64_C( 4584509013736167571), INT64_C( 7541158438725419821), INT64_C(-6577447853347647248), INT64_C( 8000393737083977627), INT64_C(-3838210298295657456)), simde_mm512_set_epi64(INT64_C(-3810154219907114893), INT64_C( 1390546034528663938), INT64_C(-7278948997228835946), INT64_C(-6400015342302035742), INT64_C(-5025729231272531675), INT64_C( -727304839347940122), INT64_C( 5841837551579279726), INT64_C(-6256756974903097514)), simde_mm512_set_epi64(INT64_C(-4604784503990056926), INT64_C( 1369116277674653954), INT64_C( 0), INT64_C(-9214222637876019104), INT64_C( 0), INT64_C( 0), INT64_C( 1157530966609723492), INT64_C( 0)) }, { UINT8_C(171), simde_mm512_set_epi64(INT64_C( 2614947921582018787), INT64_C(-4851561713766233132), INT64_C(-9121795968209612126), INT64_C( 784084589312935430), INT64_C( 3206750945776122646), INT64_C( 2956179786298753960), INT64_C( 5449808455866424595), INT64_C( 314020808054955060)), simde_mm512_set_epi64(INT64_C( 420924716680581769), INT64_C( 634178498505834615), INT64_C(-2861544115657502554), INT64_C(-7045300656768620560), INT64_C( 3724569018417139461), INT64_C( 7684038547017787602), INT64_C( 4661447160348399809), INT64_C( 8780209518656646828)), simde_mm512_set_epi64(INT64_C( 114072716522619400), INT64_C( 0), INT64_C( 6341349786890797060), INT64_C( 0), INT64_C( 1382694151414203393), INT64_C( 0), INT64_C( 4574054841401536), INT64_C( 8755017506026431112)) }, { UINT8_C(225), simde_mm512_set_epi64(INT64_C(-3697729744057786539), INT64_C(-2459882991819182775), INT64_C( 6065837030945349572), INT64_C( 8437722782224197038), INT64_C( 1700648554253726454), INT64_C(-4293199790864835662), INT64_C( 6581402203822969825), INT64_C(-6231169800047978744)), simde_mm512_set_epi64(INT64_C( 8326587265612039337), INT64_C( 6780517041864519531), INT64_C(-7817226648374121699), INT64_C(-3500732471169369834), INT64_C( 1796671772602068213), INT64_C( 1885612779837593615), INT64_C(-6040660189943903948), INT64_C( 675381603587673544)), simde_mm512_set_epi64(INT64_C( 3675211075874242728), INT64_C( 144396663190979106), INT64_C(-8970746854625959911), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 25068767419582656)) }, { UINT8_C(182), simde_mm512_set_epi64(INT64_C(-3172486234888138881), INT64_C( 3556874573334620913), INT64_C(-7174772828994546158), INT64_C( -768272060832782008), INT64_C(-7948383401788128664), INT64_C(-3962825949835743119), INT64_C(-4177466042331622142), INT64_C(-4344904134560657490)), simde_mm512_set_epi64(INT64_C( 3131865100191000199), INT64_C( 3277342092864256055), INT64_C( 2638156770812089616), INT64_C(-5499406567603861656), INT64_C( 5836973950118592576), INT64_C(-4232123399129603430), INT64_C( 8656431254350139121), INT64_C(-8853511068983619849)), simde_mm512_set_epi64(INT64_C( 2884148896870883456), INT64_C( 0), INT64_C( 2346526704673489152), INT64_C( 191440763308049952), INT64_C( 0), INT64_C( 307450048046744202), INT64_C( 4044604204229069553), INT64_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_andnot_epi64(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_andnot_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_andnot_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_andnot_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_andnot_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_andnot_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_andnot_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_andnot_si512) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_andnot_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_andnot_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_andnot_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_andnot_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_andnot_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_andnot_epi64) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/avg.c000066400000000000000000007223101400333146700162020ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN avg #include #include #include static int test_simde_mm_mask_avg_epu8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const uint8_t src[16]; const simde__mmask16 k; const uint8_t a[16]; const uint8_t b[16]; const uint8_t r[16]; } test_vec[] = { { { UINT8_C(223), UINT8_C( 94), UINT8_C(238), UINT8_C(159), UINT8_C(163), UINT8_C( 41), UINT8_C(132), UINT8_C(235), UINT8_C(179), UINT8_C(199), UINT8_C(121), UINT8_C(192), UINT8_C(144), UINT8_C(137), UINT8_C(199), UINT8_C(242) }, UINT16_C( 880), { UINT8_C( 38), UINT8_C(204), UINT8_C( 95), UINT8_C( 86), UINT8_C( 95), UINT8_C(207), UINT8_C(238), UINT8_C(187), UINT8_C(158), UINT8_C(238), UINT8_C(129), UINT8_C( 60), UINT8_C( 59), UINT8_C(113), UINT8_C(182), UINT8_C( 35) }, { UINT8_C(126), UINT8_C(124), UINT8_C(159), UINT8_C(184), UINT8_C( 44), UINT8_C(132), UINT8_C( 32), UINT8_C(176), UINT8_C(198), UINT8_C(151), UINT8_C( 42), UINT8_C( 37), UINT8_C(208), UINT8_C(244), UINT8_C( 40), UINT8_C(127) }, { UINT8_C(223), UINT8_C( 94), UINT8_C(238), UINT8_C(159), UINT8_C( 70), UINT8_C(170), UINT8_C(135), UINT8_C(235), UINT8_C(178), UINT8_C(195), UINT8_C(121), UINT8_C(192), UINT8_C(144), UINT8_C(137), UINT8_C(199), UINT8_C(242) } }, { { UINT8_C( 1), UINT8_C( 3), UINT8_C(187), UINT8_C( 89), UINT8_C(251), UINT8_C(189), UINT8_C( 42), UINT8_C( 80), UINT8_C( 90), UINT8_C(205), UINT8_C(190), UINT8_C(157), UINT8_C(162), UINT8_C( 75), UINT8_C(201), UINT8_C(144) }, UINT16_C( 3057), { UINT8_C( 39), UINT8_C( 73), UINT8_C(228), UINT8_C(182), UINT8_C(168), UINT8_C(194), UINT8_C( 73), UINT8_C( 32), UINT8_C(224), UINT8_C(227), UINT8_C( 70), UINT8_C(226), UINT8_C(117), UINT8_MAX, UINT8_C( 44), UINT8_C(192) }, { UINT8_C( 64), UINT8_C(180), UINT8_C(205), UINT8_C( 57), UINT8_C(184), UINT8_C( 62), UINT8_C(173), UINT8_C(184), UINT8_C(102), UINT8_C(195), UINT8_C( 31), UINT8_C(254), UINT8_C(204), UINT8_C(228), UINT8_C( 49), UINT8_C( 40) }, { UINT8_C( 52), UINT8_C( 3), UINT8_C(187), UINT8_C( 89), UINT8_C(176), UINT8_C(128), UINT8_C(123), UINT8_C(108), UINT8_C(163), UINT8_C(211), UINT8_C(190), UINT8_C(240), UINT8_C(162), UINT8_C( 75), UINT8_C(201), UINT8_C(144) } }, { { UINT8_C( 15), UINT8_C( 24), UINT8_C(132), UINT8_C(130), UINT8_C(210), UINT8_C( 62), UINT8_C(197), UINT8_C(109), UINT8_C( 97), UINT8_C( 2), UINT8_C( 46), UINT8_C(162), UINT8_C( 42), UINT8_C( 88), UINT8_C(245), UINT8_C(123) }, UINT16_C( 7289), { UINT8_C( 0), UINT8_C(136), UINT8_C( 83), UINT8_C(112), UINT8_C(183), UINT8_C( 64), UINT8_C( 47), UINT8_C( 79), UINT8_C(103), UINT8_C(247), UINT8_C( 96), UINT8_C(189), UINT8_C( 23), UINT8_C(119), UINT8_C(180), UINT8_C(161) }, { UINT8_C(197), UINT8_C( 69), UINT8_C(160), UINT8_C(138), UINT8_C(105), UINT8_C( 52), UINT8_C( 1), UINT8_C( 2), UINT8_C(248), UINT8_C( 44), UINT8_C(225), UINT8_C(162), UINT8_C(121), UINT8_C( 29), UINT8_C( 82), UINT8_C(180) }, { UINT8_C( 99), UINT8_C( 24), UINT8_C(132), UINT8_C(125), UINT8_C(144), UINT8_C( 58), UINT8_C( 24), UINT8_C(109), UINT8_C( 97), UINT8_C( 2), UINT8_C(161), UINT8_C(176), UINT8_C( 72), UINT8_C( 88), UINT8_C(245), UINT8_C(123) } }, { { UINT8_C(228), UINT8_C( 83), UINT8_C(134), UINT8_C( 72), UINT8_C( 93), UINT8_C(252), UINT8_C(135), UINT8_C( 44), UINT8_C(207), UINT8_C(254), UINT8_C( 26), UINT8_C( 36), UINT8_C(251), UINT8_C(191), UINT8_C(198), UINT8_C(162) }, UINT16_C(10439), { UINT8_C(177), UINT8_C( 33), UINT8_C(146), UINT8_C(155), UINT8_C(248), UINT8_C(254), UINT8_C(125), UINT8_C( 3), UINT8_C( 59), UINT8_C(138), UINT8_C( 1), UINT8_C(174), UINT8_C(242), UINT8_C(100), UINT8_C(200), UINT8_C( 21) }, { UINT8_C(174), UINT8_C(241), UINT8_C(168), UINT8_C(118), UINT8_C( 64), UINT8_C(174), UINT8_C(230), UINT8_C(175), UINT8_C(180), UINT8_C( 53), UINT8_C(234), UINT8_C(150), UINT8_C( 37), UINT8_C(168), UINT8_C( 83), UINT8_C( 3) }, { UINT8_C(176), UINT8_C(137), UINT8_C(157), UINT8_C( 72), UINT8_C( 93), UINT8_C(252), UINT8_C(178), UINT8_C( 89), UINT8_C(207), UINT8_C(254), UINT8_C( 26), UINT8_C(162), UINT8_C(251), UINT8_C(134), UINT8_C(198), UINT8_C(162) } }, { { UINT8_C(122), UINT8_C(250), UINT8_C(189), UINT8_C(160), UINT8_C(115), UINT8_C(149), UINT8_C(245), UINT8_C( 33), UINT8_C(124), UINT8_C( 29), UINT8_C(124), UINT8_C(108), UINT8_C(224), UINT8_C( 32), UINT8_C(195), UINT8_C(231) }, UINT16_C(12761), { UINT8_C(247), UINT8_C(133), UINT8_C(200), UINT8_C(215), UINT8_C(115), UINT8_C(235), UINT8_C( 55), UINT8_C(169), UINT8_C( 16), UINT8_C( 0), UINT8_C(228), UINT8_C( 54), UINT8_C(237), UINT8_C(236), UINT8_C(223), UINT8_C(171) }, { UINT8_C( 16), UINT8_C(145), UINT8_C(101), UINT8_C(177), UINT8_C( 72), UINT8_C(123), UINT8_C( 46), UINT8_C(141), UINT8_C(192), UINT8_C( 90), UINT8_C(253), UINT8_C( 21), UINT8_C( 45), UINT8_C(185), UINT8_C( 42), UINT8_C( 3) }, { UINT8_C(132), UINT8_C(250), UINT8_C(189), UINT8_C(196), UINT8_C( 94), UINT8_C(149), UINT8_C( 51), UINT8_C(155), UINT8_C(104), UINT8_C( 29), UINT8_C(124), UINT8_C(108), UINT8_C(141), UINT8_C(211), UINT8_C(195), UINT8_C(231) } }, { { UINT8_C(175), UINT8_C(113), UINT8_C( 53), UINT8_C(251), UINT8_C( 62), UINT8_C(180), UINT8_C( 14), UINT8_C(218), UINT8_C( 6), UINT8_C(177), UINT8_C(124), UINT8_C(219), UINT8_C(130), UINT8_C(183), UINT8_C(159), UINT8_C( 60) }, UINT16_C(31348), { UINT8_C(147), UINT8_C( 33), UINT8_C(100), UINT8_C( 13), UINT8_C(120), UINT8_C(107), UINT8_C(111), UINT8_C(243), UINT8_C(207), UINT8_C(130), UINT8_C( 1), UINT8_C(224), UINT8_C( 43), UINT8_C(186), UINT8_C( 31), UINT8_C( 65) }, { UINT8_C( 99), UINT8_C( 7), UINT8_C(250), UINT8_C( 91), UINT8_C( 51), UINT8_C( 29), UINT8_C(220), UINT8_C(147), UINT8_C(201), UINT8_C( 14), UINT8_C( 39), UINT8_C(168), UINT8_C(126), UINT8_C( 67), UINT8_C(188), UINT8_C( 5) }, { UINT8_C(175), UINT8_C(113), UINT8_C(175), UINT8_C(251), UINT8_C( 86), UINT8_C( 68), UINT8_C(166), UINT8_C(218), UINT8_C( 6), UINT8_C( 72), UINT8_C(124), UINT8_C(196), UINT8_C( 85), UINT8_C(127), UINT8_C(110), UINT8_C( 60) } }, { { UINT8_C(242), UINT8_C( 69), UINT8_C(162), UINT8_C(119), UINT8_C(254), UINT8_C(223), UINT8_C( 4), UINT8_C(103), UINT8_C( 71), UINT8_C(203), UINT8_C(102), UINT8_C(161), UINT8_C( 74), UINT8_C(192), UINT8_C( 71), UINT8_C( 18) }, UINT16_C(37728), { UINT8_C(164), UINT8_C(184), UINT8_C(107), UINT8_C(170), UINT8_C(176), UINT8_C( 87), UINT8_C( 21), UINT8_C(189), UINT8_C(121), UINT8_C( 65), UINT8_C(220), UINT8_C(247), UINT8_C(123), UINT8_C(116), UINT8_C(175), UINT8_C( 66) }, { UINT8_C( 59), UINT8_C( 56), UINT8_C(140), UINT8_C(215), UINT8_C( 57), UINT8_C( 79), UINT8_C( 6), UINT8_C(198), UINT8_C(158), UINT8_C( 36), UINT8_C( 45), UINT8_C(148), UINT8_C(219), UINT8_C(148), UINT8_C(243), UINT8_C(124) }, { UINT8_C(242), UINT8_C( 69), UINT8_C(162), UINT8_C(119), UINT8_C(254), UINT8_C( 83), UINT8_C( 14), UINT8_C(103), UINT8_C(140), UINT8_C( 51), UINT8_C(102), UINT8_C(161), UINT8_C(171), UINT8_C(192), UINT8_C( 71), UINT8_C( 95) } }, { { UINT8_C(235), UINT8_C(169), UINT8_C(130), UINT8_C( 40), UINT8_C(218), UINT8_C( 0), UINT8_C(170), UINT8_C(139), UINT8_C(222), UINT8_C(105), UINT8_C( 13), UINT8_C(153), UINT8_C( 80), UINT8_C(242), UINT8_C(224), UINT8_C(166) }, UINT16_C( 700), { UINT8_C( 54), UINT8_C(208), UINT8_C( 42), UINT8_C( 57), UINT8_C( 30), UINT8_C( 7), UINT8_C( 16), UINT8_C( 87), UINT8_C( 69), UINT8_C(135), UINT8_C( 64), UINT8_C(151), UINT8_C(101), UINT8_C( 6), UINT8_C( 98), UINT8_C( 55) }, { UINT8_C(207), UINT8_C(241), UINT8_C(248), UINT8_C(218), UINT8_C( 34), UINT8_C(178), UINT8_C(118), UINT8_C(254), UINT8_C(126), UINT8_C(179), UINT8_C( 49), UINT8_C(198), UINT8_C(179), UINT8_C( 64), UINT8_C( 60), UINT8_C( 3) }, { UINT8_C(235), UINT8_C(169), UINT8_C(145), UINT8_C(138), UINT8_C( 32), UINT8_C( 93), UINT8_C(170), UINT8_C(171), UINT8_C(222), UINT8_C(157), UINT8_C( 13), UINT8_C(153), UINT8_C( 80), UINT8_C(242), UINT8_C(224), UINT8_C(166) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i src = simde_mm_loadu_epi8(test_vec[i].src); simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_mask_avg_epu8(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_u8x16(r, simde_mm_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm_maskz_avg_epu8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask16 k; const uint8_t a[16]; const uint8_t b[16]; const uint8_t r[16]; } test_vec[] = { { UINT16_C( 4520), { UINT8_C(177), UINT8_C( 79), UINT8_C(221), UINT8_C(139), UINT8_C(188), UINT8_C(149), UINT8_C( 47), UINT8_C(212), UINT8_C( 46), UINT8_C( 99), UINT8_C( 6), UINT8_C( 44), UINT8_C(211), UINT8_C( 14), UINT8_C(103), UINT8_C(124) }, { UINT8_C( 38), UINT8_C(167), UINT8_C(225), UINT8_C( 40), UINT8_C( 61), UINT8_C( 80), UINT8_C( 59), UINT8_C( 97), UINT8_C(246), UINT8_C(222), UINT8_C(189), UINT8_C( 77), UINT8_C(225), UINT8_C( 45), UINT8_C( 52), UINT8_C(179) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 90), UINT8_C( 0), UINT8_C(115), UINT8_C( 0), UINT8_C(155), UINT8_C(146), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(218), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { UINT16_C(56465), { UINT8_C(144), UINT8_C( 79), UINT8_C( 55), UINT8_C(116), UINT8_C( 8), UINT8_C( 74), UINT8_C(160), UINT8_C( 23), UINT8_C( 28), UINT8_C( 86), UINT8_C( 52), UINT8_C(116), UINT8_C( 57), UINT8_C(188), UINT8_C(114), UINT8_C(224) }, { UINT8_C( 95), UINT8_C( 57), UINT8_C( 28), UINT8_C( 75), UINT8_C(205), UINT8_C(213), UINT8_C(194), UINT8_C( 20), UINT8_C(242), UINT8_C(212), UINT8_C( 50), UINT8_C(130), UINT8_C(194), UINT8_C( 27), UINT8_C(233), UINT8_C(192) }, { UINT8_C(120), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(107), UINT8_C( 0), UINT8_C( 0), UINT8_C( 22), UINT8_C( 0), UINT8_C( 0), UINT8_C( 51), UINT8_C(123), UINT8_C(126), UINT8_C( 0), UINT8_C(174), UINT8_C(208) } }, { UINT16_C(58442), { UINT8_C(248), UINT8_C(114), UINT8_C( 53), UINT8_C( 75), UINT8_C( 60), UINT8_C(224), UINT8_C( 66), UINT8_C(126), UINT8_C(183), UINT8_C( 52), UINT8_C( 73), UINT8_C( 36), UINT8_C( 12), UINT8_C( 14), UINT8_C(211), UINT8_C(237) }, { UINT8_C(230), UINT8_C(156), UINT8_C( 31), UINT8_C(235), UINT8_C( 13), UINT8_C( 84), UINT8_C(108), UINT8_C(118), UINT8_C(154), UINT8_C(237), UINT8_C( 31), UINT8_C( 53), UINT8_C( 28), UINT8_C(216), UINT8_C(122), UINT8_C( 0) }, { UINT8_C( 0), UINT8_C(135), UINT8_C( 0), UINT8_C(155), UINT8_C( 0), UINT8_C( 0), UINT8_C( 87), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 52), UINT8_C( 0), UINT8_C( 0), UINT8_C(115), UINT8_C(167), UINT8_C(119) } }, { UINT16_C(31261), { UINT8_C(128), UINT8_C(105), UINT8_C(105), UINT8_C(133), UINT8_C( 37), UINT8_C( 61), UINT8_C(137), UINT8_C(126), UINT8_C( 96), UINT8_C(176), UINT8_C(161), UINT8_C( 59), UINT8_C(188), UINT8_C(173), UINT8_C(182), UINT8_C( 71) }, { UINT8_C( 30), UINT8_C(228), UINT8_C( 8), UINT8_C(148), UINT8_C( 57), UINT8_C(106), UINT8_C( 88), UINT8_C( 65), UINT8_C( 78), UINT8_C(104), UINT8_C( 89), UINT8_C(221), UINT8_C( 7), UINT8_C(168), UINT8_C(224), UINT8_C( 69) }, { UINT8_C( 79), UINT8_C( 0), UINT8_C( 57), UINT8_C(141), UINT8_C( 47), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(140), UINT8_C( 0), UINT8_C(140), UINT8_C( 98), UINT8_C(171), UINT8_C(203), UINT8_C( 0) } }, { UINT16_C(19393), { UINT8_C( 8), UINT8_C( 63), UINT8_C( 71), UINT8_C(118), UINT8_C( 15), UINT8_C(126), UINT8_C( 0), UINT8_C(177), UINT8_C(101), UINT8_C( 31), UINT8_C(100), UINT8_C(116), UINT8_C(121), UINT8_C( 15), UINT8_C( 83), UINT8_C(183) }, { UINT8_C(240), UINT8_C(132), UINT8_C(225), UINT8_C(129), UINT8_C(219), UINT8_C( 81), UINT8_C(204), UINT8_C( 84), UINT8_C( 93), UINT8_C(119), UINT8_C( 30), UINT8_C( 24), UINT8_C( 44), UINT8_C(166), UINT8_C(183), UINT8_C( 86) }, { UINT8_C(124), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(102), UINT8_C(131), UINT8_C( 97), UINT8_C( 75), UINT8_C( 0), UINT8_C( 70), UINT8_C( 0), UINT8_C( 0), UINT8_C(133), UINT8_C( 0) } }, { UINT16_C(41252), { UINT8_C(208), UINT8_C(182), UINT8_C( 60), UINT8_C( 7), UINT8_C( 93), UINT8_C(241), UINT8_C( 49), UINT8_C(121), UINT8_C(175), UINT8_C(110), UINT8_C(183), UINT8_C(116), UINT8_C(110), UINT8_C( 30), UINT8_C( 98), UINT8_C(228) }, { UINT8_C(213), UINT8_C( 34), UINT8_C( 23), UINT8_C(211), UINT8_C( 37), UINT8_C(180), UINT8_C(142), UINT8_C( 24), UINT8_C(100), UINT8_C(164), UINT8_C( 50), UINT8_C(110), UINT8_C(254), UINT8_C(135), UINT8_C(148), UINT8_C(147) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 42), UINT8_C( 0), UINT8_C( 0), UINT8_C(211), UINT8_C( 0), UINT8_C( 0), UINT8_C(138), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 83), UINT8_C( 0), UINT8_C(188) } }, { UINT16_C(59646), { UINT8_C( 1), UINT8_C(149), UINT8_C( 22), UINT8_C( 22), UINT8_C(243), UINT8_C( 85), UINT8_C(239), UINT8_C(217), UINT8_C(154), UINT8_C(103), UINT8_C(109), UINT8_C( 37), UINT8_C( 53), UINT8_C(251), UINT8_C( 11), UINT8_C( 72) }, { UINT8_C( 24), UINT8_C(222), UINT8_C(191), UINT8_C(119), UINT8_C(253), UINT8_C(184), UINT8_C( 59), UINT8_C( 25), UINT8_C( 93), UINT8_C( 80), UINT8_C(194), UINT8_C( 70), UINT8_C(163), UINT8_C(111), UINT8_C( 27), UINT8_C(249) }, { UINT8_C( 0), UINT8_C(186), UINT8_C(107), UINT8_C( 71), UINT8_C(248), UINT8_C(135), UINT8_C(149), UINT8_C(121), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 54), UINT8_C( 0), UINT8_C(181), UINT8_C( 19), UINT8_C(161) } }, { UINT16_C(50361), { UINT8_C( 29), UINT8_C(184), UINT8_C(187), UINT8_C(163), UINT8_C(100), UINT8_C(202), UINT8_C(129), UINT8_C(220), UINT8_C(112), UINT8_C( 12), UINT8_C( 67), UINT8_C(252), UINT8_C(226), UINT8_C(146), UINT8_C( 40), UINT8_C(117) }, { UINT8_C( 65), UINT8_C(228), UINT8_C( 24), UINT8_C(137), UINT8_C(136), UINT8_C(209), UINT8_C(197), UINT8_C(194), UINT8_C( 37), UINT8_C(103), UINT8_C(218), UINT8_C(232), UINT8_C(118), UINT8_C( 96), UINT8_C( 24), UINT8_C(106) }, { UINT8_C( 47), UINT8_C( 0), UINT8_C( 0), UINT8_C(150), UINT8_C(118), UINT8_C(206), UINT8_C( 0), UINT8_C(207), UINT8_C( 0), UINT8_C( 0), UINT8_C(143), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 32), UINT8_C(112) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_maskz_avg_epu8(test_vec[i].k, a, b); simde_test_x86_assert_equal_u8x16(r, simde_mm_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm_mask_avg_epu16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const uint16_t src[8]; const simde__mmask8 k; const uint16_t a[8]; const uint16_t b[8]; const uint16_t r[8]; } test_vec[] = { { { UINT16_C(13001), UINT16_C(49556), UINT16_C(11089), UINT16_C(65127), UINT16_C(35056), UINT16_C( 1312), UINT16_C( 9004), UINT16_C(13332) }, UINT8_C(175), { UINT16_C(62617), UINT16_C(24352), UINT16_C(22336), UINT16_C(47694), UINT16_C(54709), UINT16_C(59240), UINT16_C( 1665), UINT16_C(29730) }, { UINT16_C(15758), UINT16_C(28426), UINT16_C(26767), UINT16_C(26014), UINT16_C(41205), UINT16_C(51151), UINT16_C(60304), UINT16_C(21251) }, { UINT16_C(39188), UINT16_C(26389), UINT16_C(24552), UINT16_C(36854), UINT16_C(35056), UINT16_C(55196), UINT16_C( 9004), UINT16_C(25491) } }, { { UINT16_C(61031), UINT16_C( 6143), UINT16_C(45008), UINT16_C(57419), UINT16_C( 9108), UINT16_C(11394), UINT16_C( 2778), UINT16_C(28206) }, UINT8_C( 24), { UINT16_C(23679), UINT16_C(48897), UINT16_C(37527), UINT16_C(18836), UINT16_C(12479), UINT16_C(63802), UINT16_C( 7655), UINT16_C(37835) }, { UINT16_C(11313), UINT16_C( 3876), UINT16_C( 7048), UINT16_C(40312), UINT16_C(40891), UINT16_C(43622), UINT16_C(15457), UINT16_C(51128) }, { UINT16_C(61031), UINT16_C( 6143), UINT16_C(45008), UINT16_C(29574), UINT16_C(26685), UINT16_C(11394), UINT16_C( 2778), UINT16_C(28206) } }, { { UINT16_C(36702), UINT16_C( 8916), UINT16_C(13843), UINT16_C( 647), UINT16_C(51785), UINT16_C( 6217), UINT16_C(27008), UINT16_C(20100) }, UINT8_C(193), { UINT16_C( 3770), UINT16_C(45967), UINT16_C( 3106), UINT16_C( 6053), UINT16_C(60586), UINT16_C( 4509), UINT16_C(64909), UINT16_C( 6528) }, { UINT16_C(61670), UINT16_C(48673), UINT16_C(32774), UINT16_C(51222), UINT16_C(41333), UINT16_C(28038), UINT16_C(19729), UINT16_C(45013) }, { UINT16_C(32720), UINT16_C( 8916), UINT16_C(13843), UINT16_C( 647), UINT16_C(51785), UINT16_C( 6217), UINT16_C(42319), UINT16_C(25771) } }, { { UINT16_C(22996), UINT16_C(64305), UINT16_C(47039), UINT16_C( 9690), UINT16_C(48265), UINT16_C( 8077), UINT16_C(32576), UINT16_C( 4691) }, UINT8_C( 10), { UINT16_C(40983), UINT16_C(13302), UINT16_C(51251), UINT16_C(17264), UINT16_C(27536), UINT16_C(63937), UINT16_C(31547), UINT16_C(53750) }, { UINT16_C(58466), UINT16_C(31079), UINT16_C( 3675), UINT16_C(25139), UINT16_C(33471), UINT16_C(50261), UINT16_C( 9774), UINT16_C(52132) }, { UINT16_C(22996), UINT16_C(22191), UINT16_C(47039), UINT16_C(21202), UINT16_C(48265), UINT16_C( 8077), UINT16_C(32576), UINT16_C( 4691) } }, { { UINT16_C(47876), UINT16_C(58639), UINT16_C( 7248), UINT16_C(41832), UINT16_C(59043), UINT16_C(26562), UINT16_C(56901), UINT16_C(36863) }, UINT8_C(221), { UINT16_C(50607), UINT16_C( 5102), UINT16_C( 8349), UINT16_C(60550), UINT16_C(11356), UINT16_C( 8417), UINT16_C(45837), UINT16_C(24033) }, { UINT16_C(35428), UINT16_C(56038), UINT16_C(56232), UINT16_C(61113), UINT16_C(49578), UINT16_C(26195), UINT16_C( 5136), UINT16_C(23463) }, { UINT16_C(43018), UINT16_C(58639), UINT16_C(32291), UINT16_C(60832), UINT16_C(30467), UINT16_C(26562), UINT16_C(25487), UINT16_C(23748) } }, { { UINT16_C(21351), UINT16_C(38854), UINT16_C(32515), UINT16_C(49947), UINT16_C(59713), UINT16_C(23314), UINT16_C(43905), UINT16_C(60752) }, UINT8_C(183), { UINT16_C(28446), UINT16_C(18064), UINT16_C(45813), UINT16_C(36071), UINT16_C(31067), UINT16_C( 5165), UINT16_C(56876), UINT16_C(22693) }, { UINT16_C(46420), UINT16_C(17039), UINT16_C(42271), UINT16_C(13345), UINT16_C( 2232), UINT16_C(37090), UINT16_C(43366), UINT16_C( 9416) }, { UINT16_C(37433), UINT16_C(17552), UINT16_C(44042), UINT16_C(49947), UINT16_C(16650), UINT16_C(21128), UINT16_C(43905), UINT16_C(16055) } }, { { UINT16_C(16717), UINT16_C(20503), UINT16_C(52611), UINT16_C(27592), UINT16_C(49225), UINT16_C( 3278), UINT16_C(52617), UINT16_C(58528) }, UINT8_C(176), { UINT16_C(44561), UINT16_C(34177), UINT16_C( 2545), UINT16_C(54671), UINT16_C(36436), UINT16_C(40339), UINT16_C(55759), UINT16_C(60619) }, { UINT16_C(40663), UINT16_C(41299), UINT16_C(63917), UINT16_C(18867), UINT16_C( 7798), UINT16_C(53641), UINT16_C(26059), UINT16_C(20753) }, { UINT16_C(16717), UINT16_C(20503), UINT16_C(52611), UINT16_C(27592), UINT16_C(22117), UINT16_C(46990), UINT16_C(52617), UINT16_C(40686) } }, { { UINT16_C(27456), UINT16_C(41153), UINT16_C(60032), UINT16_C(47330), UINT16_C( 2999), UINT16_C( 6670), UINT16_C(47672), UINT16_C(39638) }, UINT8_C( 18), { UINT16_C(55338), UINT16_C(41088), UINT16_C(17079), UINT16_C(42423), UINT16_C(17315), UINT16_C( 3317), UINT16_C(53014), UINT16_C( 248) }, { UINT16_C(45727), UINT16_C(38187), UINT16_C(29156), UINT16_C(50804), UINT16_C(38165), UINT16_C(60221), UINT16_C(33811), UINT16_C(59539) }, { UINT16_C(27456), UINT16_C(39638), UINT16_C(60032), UINT16_C(47330), UINT16_C(27740), UINT16_C( 6670), UINT16_C(47672), UINT16_C(39638) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i src = simde_mm_loadu_epi16(test_vec[i].src); simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_mask_avg_epu16(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_u16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm_maskz_avg_epu16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const uint16_t a[8]; const uint16_t b[8]; const uint16_t r[8]; } test_vec[] = { { UINT8_C(163), { UINT16_C(48032), UINT16_C(55188), UINT16_C(12951), UINT16_C(45863), UINT16_C( 7585), UINT16_C( 6666), UINT16_C(10347), UINT16_C(21784) }, { UINT16_C(55826), UINT16_C(23098), UINT16_C(54925), UINT16_C(54432), UINT16_C(28629), UINT16_C( 3981), UINT16_C(45286), UINT16_C(45278) }, { UINT16_C(51929), UINT16_C(39143), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 5324), UINT16_C( 0), UINT16_C(33531) } }, { UINT8_C( 32), { UINT16_C( 7784), UINT16_C(51464), UINT16_C(65205), UINT16_C(30372), UINT16_C(30102), UINT16_C(45454), UINT16_C(17167), UINT16_C(12381) }, { UINT16_C(35010), UINT16_C(19204), UINT16_C(18245), UINT16_C(19767), UINT16_C(24345), UINT16_C(35860), UINT16_C( 4547), UINT16_C(38580) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(40657), UINT16_C( 0), UINT16_C( 0) } }, { UINT8_C(150), { UINT16_C(65141), UINT16_C(20853), UINT16_C(29926), UINT16_C(21344), UINT16_C(51024), UINT16_C(44582), UINT16_C(52777), UINT16_C(58439) }, { UINT16_C(15050), UINT16_C(49285), UINT16_C(22104), UINT16_C(30833), UINT16_C( 7402), UINT16_C(45888), UINT16_C( 6809), UINT16_C(18470) }, { UINT16_C( 0), UINT16_C(35069), UINT16_C(26015), UINT16_C( 0), UINT16_C(29213), UINT16_C( 0), UINT16_C( 0), UINT16_C(38455) } }, { UINT8_C(207), { UINT16_C(49291), UINT16_C(31616), UINT16_C(45010), UINT16_C(26247), UINT16_C(28118), UINT16_C(33579), UINT16_C(29123), UINT16_C(49910) }, { UINT16_C(19996), UINT16_C(51909), UINT16_C( 8994), UINT16_C(39237), UINT16_C( 5756), UINT16_C(50924), UINT16_C(22322), UINT16_C(21862) }, { UINT16_C(34644), UINT16_C(41763), UINT16_C(27002), UINT16_C(32742), UINT16_C( 0), UINT16_C( 0), UINT16_C(25723), UINT16_C(35886) } }, { UINT8_C(217), { UINT16_C(34614), UINT16_C(47250), UINT16_C( 7698), UINT16_C(16686), UINT16_C(53181), UINT16_C(59815), UINT16_C(39047), UINT16_C(26958) }, { UINT16_C(60660), UINT16_C(17223), UINT16_C(53149), UINT16_C(21460), UINT16_C(41601), UINT16_C(27295), UINT16_C(22351), UINT16_C( 2906) }, { UINT16_C(47637), UINT16_C( 0), UINT16_C( 0), UINT16_C(19073), UINT16_C(47391), UINT16_C( 0), UINT16_C(30699), UINT16_C(14932) } }, { UINT8_C( 4), { UINT16_C(20478), UINT16_C(10512), UINT16_C(20701), UINT16_C(45563), UINT16_C(21448), UINT16_C( 1776), UINT16_C(12739), UINT16_C(18480) }, { UINT16_C( 8358), UINT16_C(19044), UINT16_C(27967), UINT16_C(59654), UINT16_C( 6723), UINT16_C(19513), UINT16_C( 3306), UINT16_C(57872) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C(24334), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { UINT8_C( 17), { UINT16_C(22178), UINT16_C(37794), UINT16_C(40419), UINT16_C(55432), UINT16_C(49929), UINT16_C(16963), UINT16_C(37167), UINT16_C(13772) }, { UINT16_C(25210), UINT16_C(39450), UINT16_C(28240), UINT16_C(19007), UINT16_C(62966), UINT16_C(38882), UINT16_C(59033), UINT16_C(16682) }, { UINT16_C(23694), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(56448), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { UINT8_C( 86), { UINT16_C( 8663), UINT16_C(52000), UINT16_C(12269), UINT16_C(20492), UINT16_C(56656), UINT16_C(28723), UINT16_C(38241), UINT16_C(26297) }, { UINT16_C(20566), UINT16_C(52418), UINT16_C( 1010), UINT16_C(36269), UINT16_C(50768), UINT16_C(38701), UINT16_C( 4452), UINT16_C(46909) }, { UINT16_C( 0), UINT16_C(52209), UINT16_C( 6640), UINT16_C( 0), UINT16_C(53712), UINT16_C( 0), UINT16_C(21347), UINT16_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_maskz_avg_epu16(test_vec[i].k, a, b); simde_test_x86_assert_equal_u16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm256_mask_avg_epu8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const uint8_t src[32]; const simde__mmask32 k; const uint8_t a[32]; const uint8_t b[32]; const uint8_t r[32]; } test_vec[] = { { { UINT8_C( 43), UINT8_C(217), UINT8_C(123), UINT8_C(234), UINT8_C( 47), UINT8_C(229), UINT8_C(173), UINT8_C(208), UINT8_C(101), UINT8_C( 58), UINT8_C( 95), UINT8_C( 33), UINT8_C(132), UINT8_C( 71), UINT8_C(220), UINT8_C(101), UINT8_C( 39), UINT8_C(213), UINT8_C( 74), UINT8_C(240), UINT8_C(189), UINT8_C(241), UINT8_C(247), UINT8_C(106), UINT8_C(168), UINT8_C(218), UINT8_C(210), UINT8_C(243), UINT8_C(145), UINT8_C(135), UINT8_C( 24), UINT8_C(188) }, UINT32_C(2426835809), { UINT8_C(120), UINT8_C( 84), UINT8_C( 96), UINT8_C(222), UINT8_C(142), UINT8_C(191), UINT8_MAX, UINT8_C( 18), UINT8_C( 6), UINT8_C(220), UINT8_C(119), UINT8_C( 46), UINT8_C(177), UINT8_C(194), UINT8_C( 30), UINT8_C(111), UINT8_C(179), UINT8_C( 21), UINT8_C(217), UINT8_C( 91), UINT8_C(239), UINT8_C(171), UINT8_C( 79), UINT8_C(129), UINT8_C( 51), UINT8_C(103), UINT8_C( 61), UINT8_C(148), UINT8_C(251), UINT8_C(228), UINT8_C( 36), UINT8_C(115) }, { UINT8_C( 56), UINT8_C(132), UINT8_C( 81), UINT8_C(198), UINT8_C( 67), UINT8_C( 81), UINT8_C(216), UINT8_C( 73), UINT8_C( 45), UINT8_C( 79), UINT8_C(119), UINT8_C(222), UINT8_C( 17), UINT8_C(150), UINT8_C( 77), UINT8_C(197), UINT8_C(171), UINT8_C( 38), UINT8_C( 32), UINT8_C(155), UINT8_C(210), UINT8_C(111), UINT8_C( 28), UINT8_C( 5), UINT8_C(215), UINT8_C( 89), UINT8_C(153), UINT8_C(210), UINT8_C( 61), UINT8_C(189), UINT8_C( 69), UINT8_C(117) }, { UINT8_C( 88), UINT8_C(217), UINT8_C(123), UINT8_C(234), UINT8_C( 47), UINT8_C(136), UINT8_C(236), UINT8_C(208), UINT8_C( 26), UINT8_C(150), UINT8_C( 95), UINT8_C( 33), UINT8_C( 97), UINT8_C( 71), UINT8_C(220), UINT8_C(154), UINT8_C( 39), UINT8_C( 30), UINT8_C(125), UINT8_C(240), UINT8_C(189), UINT8_C(141), UINT8_C(247), UINT8_C( 67), UINT8_C(168), UINT8_C(218), UINT8_C(210), UINT8_C(243), UINT8_C(156), UINT8_C(135), UINT8_C( 24), UINT8_C(116) } }, { { UINT8_C( 65), UINT8_C(151), UINT8_C( 59), UINT8_C(132), UINT8_C(232), UINT8_C( 19), UINT8_C(205), UINT8_C( 21), UINT8_C( 99), UINT8_C( 69), UINT8_C(243), UINT8_C(116), UINT8_C(219), UINT8_C( 65), UINT8_C( 57), UINT8_C(134), UINT8_C(103), UINT8_C( 90), UINT8_C( 33), UINT8_C( 57), UINT8_C(201), UINT8_C( 61), UINT8_C( 62), UINT8_C(160), UINT8_C(151), UINT8_C(215), UINT8_C(114), UINT8_C(212), UINT8_C(148), UINT8_C(184), UINT8_C( 74), UINT8_C(213) }, UINT32_C( 928613711), { UINT8_C(153), UINT8_C( 39), UINT8_C( 76), UINT8_C(252), UINT8_C(108), UINT8_C( 63), UINT8_C(112), UINT8_C( 71), UINT8_C(128), UINT8_C(170), UINT8_C(205), UINT8_C(232), UINT8_C( 4), UINT8_C(239), UINT8_C( 33), UINT8_C(205), UINT8_C( 44), UINT8_C( 96), UINT8_C(110), UINT8_C(195), UINT8_C( 55), UINT8_C(224), UINT8_C(152), UINT8_C(204), UINT8_C(152), UINT8_C(226), UINT8_C(161), UINT8_C(231), UINT8_C(103), UINT8_C(251), UINT8_C( 30), UINT8_C( 0) }, { UINT8_C( 34), UINT8_C(106), UINT8_C(252), UINT8_C(142), UINT8_C(170), UINT8_C(109), UINT8_C(213), UINT8_C( 42), UINT8_C( 23), UINT8_C(162), UINT8_C( 18), UINT8_C( 27), UINT8_C(145), UINT8_C( 52), UINT8_C(232), UINT8_C(190), UINT8_C(148), UINT8_C( 86), UINT8_C(129), UINT8_C(203), UINT8_C( 55), UINT8_C( 25), UINT8_C(151), UINT8_C(207), UINT8_C(251), UINT8_C( 57), UINT8_C(183), UINT8_C( 99), UINT8_C( 52), UINT8_C(213), UINT8_C( 99), UINT8_C( 86) }, { UINT8_C( 94), UINT8_C( 73), UINT8_C(164), UINT8_C(197), UINT8_C(232), UINT8_C( 19), UINT8_C(163), UINT8_C( 21), UINT8_C( 76), UINT8_C( 69), UINT8_C(112), UINT8_C(116), UINT8_C(219), UINT8_C( 65), UINT8_C( 57), UINT8_C(198), UINT8_C( 96), UINT8_C( 90), UINT8_C( 33), UINT8_C(199), UINT8_C( 55), UINT8_C( 61), UINT8_C(152), UINT8_C(160), UINT8_C(202), UINT8_C(142), UINT8_C(172), UINT8_C(212), UINT8_C( 78), UINT8_C(232), UINT8_C( 74), UINT8_C(213) } }, { { UINT8_C( 64), UINT8_C( 96), UINT8_C(228), UINT8_C(234), UINT8_C(205), UINT8_C(185), UINT8_C( 20), UINT8_C(228), UINT8_C( 91), UINT8_C( 39), UINT8_MAX, UINT8_C(237), UINT8_C( 91), UINT8_C(231), UINT8_C(171), UINT8_C(239), UINT8_C( 62), UINT8_C( 44), UINT8_C(186), UINT8_C(117), UINT8_C( 70), UINT8_C( 82), UINT8_C( 68), UINT8_C( 65), UINT8_C(139), UINT8_C(251), UINT8_C(164), UINT8_C(191), UINT8_C(209), UINT8_C( 8), UINT8_C( 21), UINT8_C( 17) }, UINT32_C( 905705832), { UINT8_C(178), UINT8_C( 15), UINT8_C( 25), UINT8_C( 13), UINT8_C( 54), UINT8_C( 24), UINT8_C(250), UINT8_C(145), UINT8_MAX, UINT8_C(165), UINT8_C(128), UINT8_C( 61), UINT8_C(210), UINT8_C( 59), UINT8_C(178), UINT8_C( 24), UINT8_C(141), UINT8_C(247), UINT8_C( 89), UINT8_C( 24), UINT8_C(242), UINT8_C(254), UINT8_C(215), UINT8_C(195), UINT8_C( 6), UINT8_C(236), UINT8_C(212), UINT8_C(110), UINT8_C(229), UINT8_C(207), UINT8_C(163), UINT8_C(151) }, { UINT8_C(223), UINT8_C(188), UINT8_C(164), UINT8_C( 21), UINT8_C(212), UINT8_C(159), UINT8_C(167), UINT8_C(211), UINT8_C( 68), UINT8_C( 39), UINT8_C( 17), UINT8_C( 22), UINT8_C( 98), UINT8_C(195), UINT8_C( 46), UINT8_C(239), UINT8_C(186), UINT8_C(136), UINT8_C( 7), UINT8_C(173), UINT8_C(134), UINT8_C(222), UINT8_C(112), UINT8_C(140), UINT8_C(202), UINT8_C( 69), UINT8_C(250), UINT8_C(175), UINT8_C( 20), UINT8_C(157), UINT8_C( 70), UINT8_C(243) }, { UINT8_C( 64), UINT8_C( 96), UINT8_C(228), UINT8_C( 17), UINT8_C(205), UINT8_C( 92), UINT8_C(209), UINT8_C(228), UINT8_C(162), UINT8_C( 39), UINT8_MAX, UINT8_C( 42), UINT8_C(154), UINT8_C(127), UINT8_C(112), UINT8_C(132), UINT8_C(164), UINT8_C(192), UINT8_C(186), UINT8_C( 99), UINT8_C(188), UINT8_C(238), UINT8_C(164), UINT8_C(168), UINT8_C(104), UINT8_C(251), UINT8_C(231), UINT8_C(191), UINT8_C(125), UINT8_C(182), UINT8_C( 21), UINT8_C( 17) } }, { { UINT8_C( 89), UINT8_C(235), UINT8_C( 9), UINT8_C( 45), UINT8_C(138), UINT8_C(176), UINT8_C( 0), UINT8_C(206), UINT8_C(215), UINT8_C( 17), UINT8_C(229), UINT8_C( 58), UINT8_C(213), UINT8_C( 19), UINT8_C( 41), UINT8_C(143), UINT8_C(155), UINT8_C( 49), UINT8_C( 60), UINT8_C( 33), UINT8_C( 15), UINT8_C(173), UINT8_C(173), UINT8_C(218), UINT8_C(242), UINT8_C(167), UINT8_C(137), UINT8_C( 6), UINT8_C( 68), UINT8_C(208), UINT8_C(250), UINT8_C(157) }, UINT32_C(1170867131), { UINT8_C(179), UINT8_C(203), UINT8_C( 19), UINT8_C(138), UINT8_C(220), UINT8_C(248), UINT8_C(196), UINT8_C(177), UINT8_C( 12), UINT8_C(238), UINT8_C( 65), UINT8_C(167), UINT8_C( 31), UINT8_C(125), UINT8_C(201), UINT8_C( 46), UINT8_C( 42), UINT8_C(118), UINT8_C( 8), UINT8_C( 28), UINT8_C( 30), UINT8_C(146), UINT8_C( 35), UINT8_C( 98), UINT8_C( 98), UINT8_C( 29), UINT8_C( 0), UINT8_C( 29), UINT8_C( 32), UINT8_C(202), UINT8_C( 98), UINT8_C(211) }, { UINT8_C(149), UINT8_C(117), UINT8_C( 93), UINT8_C(114), UINT8_C(110), UINT8_C( 34), UINT8_C( 35), UINT8_C(122), UINT8_C( 16), UINT8_C(100), UINT8_C( 33), UINT8_C( 47), UINT8_C(226), UINT8_C(234), UINT8_C( 93), UINT8_C( 12), UINT8_C( 97), UINT8_C(102), UINT8_C( 41), UINT8_C(127), UINT8_C(248), UINT8_C( 76), UINT8_C(225), UINT8_C( 90), UINT8_C(105), UINT8_C(225), UINT8_C(119), UINT8_C(137), UINT8_C(172), UINT8_C(217), UINT8_C( 92), UINT8_C( 65) }, { UINT8_C(164), UINT8_C(160), UINT8_C( 9), UINT8_C(126), UINT8_C(165), UINT8_C(141), UINT8_C( 0), UINT8_C(150), UINT8_C( 14), UINT8_C(169), UINT8_C(229), UINT8_C( 58), UINT8_C(213), UINT8_C( 19), UINT8_C( 41), UINT8_C(143), UINT8_C(155), UINT8_C(110), UINT8_C( 60), UINT8_C( 78), UINT8_C( 15), UINT8_C(173), UINT8_C(130), UINT8_C( 94), UINT8_C(102), UINT8_C(167), UINT8_C( 60), UINT8_C( 6), UINT8_C( 68), UINT8_C(208), UINT8_C( 95), UINT8_C(157) } }, { { UINT8_C( 78), UINT8_C(185), UINT8_C(179), UINT8_C(188), UINT8_C(219), UINT8_C(215), UINT8_C( 54), UINT8_C(235), UINT8_C( 59), UINT8_C( 88), UINT8_C( 26), UINT8_C( 29), UINT8_C( 66), UINT8_C(120), UINT8_C( 42), UINT8_C(163), UINT8_C(222), UINT8_C( 83), UINT8_C( 34), UINT8_C(214), UINT8_C(159), UINT8_C( 4), UINT8_C( 48), UINT8_C( 8), UINT8_C(229), UINT8_C(167), UINT8_C(145), UINT8_C(145), UINT8_C(128), UINT8_C(237), UINT8_C(211), UINT8_C(206) }, UINT32_C(2190182054), { UINT8_C( 93), UINT8_C(193), UINT8_C(109), UINT8_C(153), UINT8_C( 25), UINT8_C(136), UINT8_C(182), UINT8_C( 92), UINT8_C( 0), UINT8_C(224), UINT8_MAX, UINT8_C(222), UINT8_C( 51), UINT8_C( 34), UINT8_C(180), UINT8_C(210), UINT8_C( 38), UINT8_C(228), UINT8_C(218), UINT8_C( 11), UINT8_C(139), UINT8_C(107), UINT8_C(157), UINT8_C( 11), UINT8_C( 88), UINT8_C(112), UINT8_C(217), UINT8_MAX, UINT8_C(246), UINT8_C(100), UINT8_C(129), UINT8_C( 84) }, { UINT8_C( 38), UINT8_C(238), UINT8_C(237), UINT8_C( 63), UINT8_C(118), UINT8_C(163), UINT8_C(155), UINT8_C(118), UINT8_C(132), UINT8_C(155), UINT8_C( 84), UINT8_C(183), UINT8_C(189), UINT8_C( 8), UINT8_C(138), UINT8_C(227), UINT8_C(236), UINT8_C(100), UINT8_C(238), UINT8_C(119), UINT8_C(208), UINT8_C(139), UINT8_C(130), UINT8_C( 40), UINT8_C(251), UINT8_C( 92), UINT8_C( 39), UINT8_C(242), UINT8_C(192), UINT8_C(168), UINT8_C( 70), UINT8_C(230) }, { UINT8_C( 78), UINT8_C(216), UINT8_C(173), UINT8_C(188), UINT8_C(219), UINT8_C(150), UINT8_C( 54), UINT8_C(105), UINT8_C( 59), UINT8_C(190), UINT8_C(170), UINT8_C( 29), UINT8_C( 66), UINT8_C(120), UINT8_C( 42), UINT8_C(219), UINT8_C(137), UINT8_C(164), UINT8_C( 34), UINT8_C( 65), UINT8_C(159), UINT8_C( 4), UINT8_C( 48), UINT8_C( 26), UINT8_C(229), UINT8_C(102), UINT8_C(145), UINT8_C(145), UINT8_C(128), UINT8_C(237), UINT8_C(211), UINT8_C(157) } }, { { UINT8_C(151), UINT8_C( 51), UINT8_C( 38), UINT8_C( 13), UINT8_C(214), UINT8_C(193), UINT8_C(132), UINT8_C( 90), UINT8_C( 92), UINT8_C(216), UINT8_C( 18), UINT8_C( 25), UINT8_C(225), UINT8_C(156), UINT8_C(252), UINT8_C(205), UINT8_C( 0), UINT8_C(235), UINT8_C( 69), UINT8_C(208), UINT8_C(118), UINT8_C(199), UINT8_C(249), UINT8_C(114), UINT8_C( 35), UINT8_C( 32), UINT8_C(100), UINT8_C(228), UINT8_C(201), UINT8_C(170), UINT8_C(202), UINT8_C( 96) }, UINT32_C(3010326749), { UINT8_C(178), UINT8_C(241), UINT8_C( 14), UINT8_C( 14), UINT8_C(202), UINT8_C( 32), UINT8_C( 40), UINT8_C(171), UINT8_C(188), UINT8_C( 36), UINT8_C(120), UINT8_C(188), UINT8_C( 15), UINT8_C(189), UINT8_C(141), UINT8_C(134), UINT8_C(133), UINT8_C(134), UINT8_C(248), UINT8_C(168), UINT8_C(166), UINT8_C( 92), UINT8_C(140), UINT8_C(111), UINT8_C( 6), UINT8_C( 87), UINT8_C(207), UINT8_C(227), UINT8_C( 71), UINT8_C( 61), UINT8_C(150), UINT8_C(249) }, { UINT8_C( 46), UINT8_C(164), UINT8_C( 8), UINT8_C(248), UINT8_C(196), UINT8_C( 48), UINT8_C(163), UINT8_C(128), UINT8_C( 84), UINT8_C( 28), UINT8_C( 61), UINT8_C(100), UINT8_C(217), UINT8_C(202), UINT8_C(234), UINT8_C( 94), UINT8_C( 80), UINT8_C(226), UINT8_C( 7), UINT8_C(246), UINT8_C( 62), UINT8_C(147), UINT8_C(102), UINT8_C( 68), UINT8_C(234), UINT8_C( 53), UINT8_C( 39), UINT8_C( 50), UINT8_C(114), UINT8_C(189), UINT8_C( 43), UINT8_C(161) }, { UINT8_C(112), UINT8_C( 51), UINT8_C( 11), UINT8_C(131), UINT8_C(199), UINT8_C(193), UINT8_C(102), UINT8_C(150), UINT8_C( 92), UINT8_C(216), UINT8_C( 18), UINT8_C( 25), UINT8_C(116), UINT8_C(196), UINT8_C(188), UINT8_C(114), UINT8_C(107), UINT8_C(235), UINT8_C(128), UINT8_C(207), UINT8_C(118), UINT8_C(120), UINT8_C(121), UINT8_C(114), UINT8_C(120), UINT8_C( 70), UINT8_C(100), UINT8_C(228), UINT8_C( 93), UINT8_C(125), UINT8_C(202), UINT8_C(205) } }, { { UINT8_C( 98), UINT8_C( 51), UINT8_C(153), UINT8_C( 38), UINT8_C( 99), UINT8_C( 61), UINT8_C(167), UINT8_C(184), UINT8_C( 89), UINT8_C(228), UINT8_C( 28), UINT8_C( 50), UINT8_C(174), UINT8_C( 6), UINT8_C(145), UINT8_C(254), UINT8_C(232), UINT8_C(152), UINT8_C(244), UINT8_C( 38), UINT8_C( 43), UINT8_C( 90), UINT8_C(106), UINT8_C( 22), UINT8_C(144), UINT8_C(145), UINT8_C( 72), UINT8_C( 2), UINT8_C( 78), UINT8_C(115), UINT8_C(163), UINT8_C(176) }, UINT32_C( 181878183), { UINT8_C(122), UINT8_C(126), UINT8_C(194), UINT8_C(211), UINT8_C( 98), UINT8_C(222), UINT8_C( 5), UINT8_C( 16), UINT8_C(228), UINT8_C(150), UINT8_C( 14), UINT8_C(204), UINT8_C( 46), UINT8_C( 2), UINT8_C(242), UINT8_C( 90), UINT8_C( 93), UINT8_C( 92), UINT8_C(112), UINT8_C(237), UINT8_C(237), UINT8_C(184), UINT8_C(239), UINT8_C( 60), UINT8_C( 43), UINT8_C(147), UINT8_C(236), UINT8_C(210), UINT8_C(208), UINT8_C(195), UINT8_C(221), UINT8_C( 74) }, { UINT8_C( 65), UINT8_C(159), UINT8_C( 29), UINT8_C(163), UINT8_C(126), UINT8_C( 34), UINT8_C(179), UINT8_C( 98), UINT8_C(185), UINT8_C(193), UINT8_C( 47), UINT8_C(231), UINT8_C(196), UINT8_C( 33), UINT8_C( 65), UINT8_C( 33), UINT8_C(126), UINT8_C(177), UINT8_C( 14), UINT8_C(107), UINT8_C(105), UINT8_C(253), UINT8_C(167), UINT8_C(149), UINT8_C(144), UINT8_C(148), UINT8_C(103), UINT8_C( 96), UINT8_C( 87), UINT8_C( 68), UINT8_C(170), UINT8_C(153) }, { UINT8_C( 94), UINT8_C(143), UINT8_C(112), UINT8_C( 38), UINT8_C( 99), UINT8_C(128), UINT8_C(167), UINT8_C( 57), UINT8_C(207), UINT8_C(228), UINT8_C( 31), UINT8_C(218), UINT8_C(121), UINT8_C( 18), UINT8_C(145), UINT8_C(254), UINT8_C(110), UINT8_C(135), UINT8_C( 63), UINT8_C( 38), UINT8_C(171), UINT8_C( 90), UINT8_C(203), UINT8_C(105), UINT8_C(144), UINT8_C(148), UINT8_C( 72), UINT8_C(153), UINT8_C( 78), UINT8_C(115), UINT8_C(163), UINT8_C(176) } }, { { UINT8_C(228), UINT8_C(199), UINT8_C( 60), UINT8_C( 98), UINT8_C(234), UINT8_C(240), UINT8_C(196), UINT8_C(163), UINT8_C(177), UINT8_C(243), UINT8_C(138), UINT8_C(117), UINT8_C( 21), UINT8_C(204), UINT8_C(150), UINT8_C(147), UINT8_C(125), UINT8_C(164), UINT8_C(254), UINT8_C(231), UINT8_C(162), UINT8_C(166), UINT8_C(124), UINT8_C( 50), UINT8_C( 58), UINT8_C(227), UINT8_C(147), UINT8_C(145), UINT8_C( 40), UINT8_C( 61), UINT8_C( 42), UINT8_C( 12) }, UINT32_C(4016989957), { UINT8_C( 87), UINT8_C( 50), UINT8_C(146), UINT8_C( 8), UINT8_C( 38), UINT8_C( 28), UINT8_C(126), UINT8_C( 59), UINT8_C(232), UINT8_C( 20), UINT8_C(206), UINT8_C(102), UINT8_C(185), UINT8_C(204), UINT8_C( 77), UINT8_C( 91), UINT8_C(114), UINT8_C(201), UINT8_C(141), UINT8_C(172), UINT8_C(172), UINT8_C( 32), UINT8_C( 62), UINT8_C(212), UINT8_C( 94), UINT8_C(104), UINT8_C(224), UINT8_C( 99), UINT8_C(207), UINT8_C( 78), UINT8_C( 82), UINT8_C( 38) }, { UINT8_C(129), UINT8_C(228), UINT8_C( 47), UINT8_C(167), UINT8_C( 0), UINT8_C(173), UINT8_C(226), UINT8_C(233), UINT8_C(193), UINT8_C(176), UINT8_C( 79), UINT8_C(122), UINT8_C(124), UINT8_C(156), UINT8_C(213), UINT8_C(239), UINT8_C(101), UINT8_C( 99), UINT8_C(155), UINT8_C( 17), UINT8_C(131), UINT8_C(217), UINT8_C(230), UINT8_C(225), UINT8_C( 66), UINT8_C(198), UINT8_C( 68), UINT8_C( 17), UINT8_C( 21), UINT8_C(150), UINT8_C( 56), UINT8_C(150) }, { UINT8_C(108), UINT8_C(199), UINT8_C( 97), UINT8_C( 98), UINT8_C(234), UINT8_C(240), UINT8_C(196), UINT8_C(163), UINT8_C(213), UINT8_C( 98), UINT8_C(143), UINT8_C(117), UINT8_C( 21), UINT8_C(180), UINT8_C(145), UINT8_C(147), UINT8_C(125), UINT8_C(150), UINT8_C(148), UINT8_C( 95), UINT8_C(162), UINT8_C(125), UINT8_C(146), UINT8_C( 50), UINT8_C( 80), UINT8_C(151), UINT8_C(146), UINT8_C( 58), UINT8_C( 40), UINT8_C(114), UINT8_C( 69), UINT8_C( 94) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i src = simde_x_mm256_loadu_epi8(test_vec[i].src); simde__m256i a = simde_x_mm256_loadu_epi8(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi8(test_vec[i].b); simde__m256i r = simde_mm256_mask_avg_epu8(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i8x32(r, simde_x_mm256_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm256_maskz_avg_epu8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask32 k; const uint8_t a[32]; const uint8_t b[32]; const uint8_t r[32]; } test_vec[] = { { UINT32_C(3998241187), { UINT8_C(128), UINT8_C(226), UINT8_C(235), UINT8_C(237), UINT8_C(106), UINT8_C(224), UINT8_C(254), UINT8_C(106), UINT8_C(178), UINT8_C(196), UINT8_C(101), UINT8_C(242), UINT8_C( 83), UINT8_C(193), UINT8_C(154), UINT8_C(225), UINT8_C(118), UINT8_C( 15), UINT8_C( 70), UINT8_C(198), UINT8_C(232), UINT8_C( 76), UINT8_C( 2), UINT8_C(132), UINT8_C( 51), UINT8_C(110), UINT8_C( 22), UINT8_C(214), UINT8_C(191), UINT8_C(102), UINT8_C(196), UINT8_C( 64) }, { UINT8_C( 72), UINT8_C(176), UINT8_C( 45), UINT8_C(178), UINT8_C(144), UINT8_C( 43), UINT8_C( 28), UINT8_C( 66), UINT8_C(239), UINT8_C(130), UINT8_C( 52), UINT8_C( 66), UINT8_C( 67), UINT8_C(207), UINT8_C( 35), UINT8_C(186), UINT8_C(222), UINT8_C(105), UINT8_C(128), UINT8_C(198), UINT8_C(181), UINT8_C(131), UINT8_C( 74), UINT8_C(232), UINT8_C(241), UINT8_C( 96), UINT8_C(190), UINT8_C(177), UINT8_C(198), UINT8_C(131), UINT8_C(241), UINT8_C( 14) }, { UINT8_C(100), UINT8_C(201), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(134), UINT8_C( 0), UINT8_C( 86), UINT8_C(209), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 75), UINT8_C( 0), UINT8_C( 95), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(207), UINT8_C( 0), UINT8_C( 38), UINT8_C( 0), UINT8_C( 0), UINT8_C(103), UINT8_C(106), UINT8_C(196), UINT8_C( 0), UINT8_C(117), UINT8_C(219), UINT8_C( 39) } }, { UINT32_C(3284147763), { UINT8_C( 73), UINT8_C(221), UINT8_C( 6), UINT8_C( 56), UINT8_C( 95), UINT8_C( 58), UINT8_C(123), UINT8_C(162), UINT8_C( 9), UINT8_C(158), UINT8_C( 92), UINT8_C(231), UINT8_C( 8), UINT8_C(221), UINT8_C(174), UINT8_C(189), UINT8_C( 96), UINT8_C(248), UINT8_C(166), UINT8_C( 81), UINT8_C( 89), UINT8_C(100), UINT8_C( 2), UINT8_C( 31), UINT8_C(231), UINT8_C(243), UINT8_C( 46), UINT8_C( 26), UINT8_C( 17), UINT8_C(238), UINT8_C(222), UINT8_C( 90) }, { UINT8_C(203), UINT8_C(228), UINT8_C(147), UINT8_C( 42), UINT8_C( 30), UINT8_C( 14), UINT8_C(205), UINT8_C( 40), UINT8_C(172), UINT8_C( 41), UINT8_C( 15), UINT8_C(180), UINT8_C( 6), UINT8_C(189), UINT8_C(114), UINT8_C(102), UINT8_C(182), UINT8_C( 24), UINT8_C(184), UINT8_C( 15), UINT8_C(124), UINT8_C(186), UINT8_C( 46), UINT8_C(100), UINT8_C(174), UINT8_C( 92), UINT8_C(126), UINT8_C(191), UINT8_C( 75), UINT8_C( 92), UINT8_C( 26), UINT8_C( 22) }, { UINT8_C(138), UINT8_C(225), UINT8_C( 0), UINT8_C( 0), UINT8_C( 63), UINT8_C( 36), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(100), UINT8_C( 54), UINT8_C(206), UINT8_C( 7), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 24), UINT8_C( 66), UINT8_C(203), UINT8_C(168), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(124), UINT8_C( 56) } }, { UINT32_C(1598139712), { UINT8_C(187), UINT8_C( 14), UINT8_C(135), UINT8_C(103), UINT8_C( 55), UINT8_C(150), UINT8_C( 28), UINT8_C( 62), UINT8_C( 84), UINT8_C(142), UINT8_C(164), UINT8_C( 10), UINT8_C(166), UINT8_C( 92), UINT8_C( 25), UINT8_C( 34), UINT8_C( 23), UINT8_C( 71), UINT8_C(134), UINT8_C(197), UINT8_C(164), UINT8_C( 5), UINT8_C(132), UINT8_C(239), UINT8_C( 97), UINT8_C(158), UINT8_C( 5), UINT8_C(162), UINT8_C( 75), UINT8_C( 70), UINT8_C( 1), UINT8_C( 6) }, { UINT8_C( 84), UINT8_C(136), UINT8_C(110), UINT8_C(140), UINT8_C( 30), UINT8_C(138), UINT8_C(202), UINT8_C(114), UINT8_C( 24), UINT8_C(110), UINT8_C(124), UINT8_C(190), UINT8_C(203), UINT8_C(149), UINT8_C(224), UINT8_C(226), UINT8_C(221), UINT8_C(103), UINT8_C(167), UINT8_C(129), UINT8_C(108), UINT8_C( 43), UINT8_C(112), UINT8_C(205), UINT8_C(202), UINT8_C(117), UINT8_C(111), UINT8_C( 21), UINT8_C(188), UINT8_C(112), UINT8_C( 28), UINT8_C( 16) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(115), UINT8_C( 0), UINT8_C( 54), UINT8_C( 0), UINT8_C(144), UINT8_C(100), UINT8_C( 0), UINT8_C(121), UINT8_C( 0), UINT8_C(130), UINT8_C(122), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(122), UINT8_C( 0), UINT8_C(150), UINT8_C(138), UINT8_C( 58), UINT8_C( 92), UINT8_C(132), UINT8_C( 0), UINT8_C( 15), UINT8_C( 0) } }, { UINT32_C( 396135160), { UINT8_C( 20), UINT8_C(102), UINT8_C(137), UINT8_C( 44), UINT8_C(213), UINT8_C( 6), UINT8_C(234), UINT8_C(160), UINT8_C(155), UINT8_C(202), UINT8_C(130), UINT8_C(120), UINT8_C( 49), UINT8_C( 41), UINT8_C(249), UINT8_C(157), UINT8_C( 84), UINT8_C(105), UINT8_C(107), UINT8_C( 30), UINT8_C(223), UINT8_C(218), UINT8_C( 52), UINT8_C(155), UINT8_C( 75), UINT8_C( 80), UINT8_C(171), UINT8_C( 67), UINT8_C(218), UINT8_C( 72), UINT8_C( 90), UINT8_C(238) }, { UINT8_C(174), UINT8_C(228), UINT8_C( 26), UINT8_C(131), UINT8_C(234), UINT8_C( 4), UINT8_C( 35), UINT8_C(133), UINT8_C(206), UINT8_C(165), UINT8_C(254), UINT8_C( 0), UINT8_C(206), UINT8_C(247), UINT8_C(157), UINT8_C( 35), UINT8_C( 97), UINT8_C( 8), UINT8_C( 65), UINT8_C( 64), UINT8_C(227), UINT8_C(117), UINT8_C(219), UINT8_C( 46), UINT8_C(197), UINT8_C(134), UINT8_C(113), UINT8_C(159), UINT8_C(206), UINT8_C(204), UINT8_C(141), UINT8_C(125) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 88), UINT8_C(224), UINT8_C( 5), UINT8_C(135), UINT8_C(147), UINT8_C( 0), UINT8_C(184), UINT8_C( 0), UINT8_C( 60), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 96), UINT8_C( 0), UINT8_C( 0), UINT8_C( 86), UINT8_C( 47), UINT8_C(225), UINT8_C( 0), UINT8_C( 0), UINT8_C(101), UINT8_C(136), UINT8_C(107), UINT8_C(142), UINT8_C( 0), UINT8_C(212), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { UINT32_C(2583734192), { UINT8_C(171), UINT8_C( 36), UINT8_C( 31), UINT8_C(122), UINT8_C(201), UINT8_C( 29), UINT8_C(122), UINT8_C(152), UINT8_C( 21), UINT8_C( 23), UINT8_C(187), UINT8_C(118), UINT8_C( 32), UINT8_C(252), UINT8_C(182), UINT8_C( 3), UINT8_C(114), UINT8_C(145), UINT8_C( 49), UINT8_C( 55), UINT8_C( 23), UINT8_C(162), UINT8_C(215), UINT8_C(230), UINT8_C(110), UINT8_C(100), UINT8_C( 99), UINT8_C( 30), UINT8_C( 12), UINT8_C( 99), UINT8_C(184), UINT8_C(183) }, { UINT8_C(135), UINT8_C(216), UINT8_C( 49), UINT8_C( 81), UINT8_C(245), UINT8_C(171), UINT8_C(233), UINT8_C( 10), UINT8_C(195), UINT8_C(164), UINT8_C(128), UINT8_C(227), UINT8_C(160), UINT8_C( 54), UINT8_C(230), UINT8_C( 18), UINT8_C(199), UINT8_C( 23), UINT8_C( 74), UINT8_C(223), UINT8_C(185), UINT8_C( 33), UINT8_C(197), UINT8_C( 40), UINT8_C(133), UINT8_C( 40), UINT8_C( 70), UINT8_C(145), UINT8_C(139), UINT8_MAX, UINT8_C( 73), UINT8_C( 19) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(223), UINT8_C(100), UINT8_C( 0), UINT8_C( 81), UINT8_C(108), UINT8_C( 94), UINT8_C(158), UINT8_C( 0), UINT8_C( 0), UINT8_C(153), UINT8_C( 0), UINT8_C( 11), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 70), UINT8_C( 0), UINT8_C( 88), UINT8_C( 76), UINT8_C( 0), UINT8_C( 0), UINT8_C(101) } }, { UINT32_C(3429137111), { UINT8_C( 38), UINT8_C( 77), UINT8_C(215), UINT8_C(233), UINT8_C(241), UINT8_C( 87), UINT8_C(204), UINT8_C(145), UINT8_C(142), UINT8_C(178), UINT8_C(164), UINT8_C( 85), UINT8_C(201), UINT8_C(238), UINT8_C( 52), UINT8_C(130), UINT8_C( 15), UINT8_C(249), UINT8_C(170), UINT8_C(148), UINT8_C( 33), UINT8_C(241), UINT8_C( 38), UINT8_C(173), UINT8_C(240), UINT8_C(111), UINT8_C(192), UINT8_C(199), UINT8_C(233), UINT8_C( 36), UINT8_C(147), UINT8_C( 15) }, { UINT8_C(113), UINT8_C(106), UINT8_C(248), UINT8_C( 98), UINT8_C(194), UINT8_C(196), UINT8_C(243), UINT8_C( 80), UINT8_C(118), UINT8_C(151), UINT8_C(165), UINT8_C( 63), UINT8_C(133), UINT8_C(218), UINT8_C(194), UINT8_C(148), UINT8_C(211), UINT8_C(108), UINT8_C( 41), UINT8_C(245), UINT8_C( 93), UINT8_C( 79), UINT8_C(162), UINT8_C( 77), UINT8_C(190), UINT8_C( 98), UINT8_C( 20), UINT8_C(167), UINT8_C(134), UINT8_C(168), UINT8_C(183), UINT8_C(247) }, { UINT8_C( 76), UINT8_C( 92), UINT8_C(232), UINT8_C( 0), UINT8_C(218), UINT8_C( 0), UINT8_C(224), UINT8_C(113), UINT8_C( 0), UINT8_C(165), UINT8_C( 0), UINT8_C( 74), UINT8_C(167), UINT8_C(228), UINT8_C(123), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(106), UINT8_C( 0), UINT8_C( 0), UINT8_C(160), UINT8_C(100), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(106), UINT8_C(183), UINT8_C( 0), UINT8_C( 0), UINT8_C(165), UINT8_C(131) } }, { UINT32_C(3562647314), { UINT8_C(116), UINT8_C( 76), UINT8_C( 36), UINT8_C(234), UINT8_C(228), UINT8_C(202), UINT8_C( 42), UINT8_C(105), UINT8_C(164), UINT8_C(236), UINT8_C(254), UINT8_C(119), UINT8_C( 88), UINT8_C( 39), UINT8_C(108), UINT8_C(182), UINT8_C(118), UINT8_C( 14), UINT8_C( 3), UINT8_C( 52), UINT8_C(112), UINT8_C( 24), UINT8_C(219), UINT8_C(246), UINT8_C(192), UINT8_C(146), UINT8_C(237), UINT8_C(210), UINT8_C( 66), UINT8_C( 70), UINT8_C(167), UINT8_C(182) }, { UINT8_C(147), UINT8_C(203), UINT8_C(160), UINT8_C(119), UINT8_C(149), UINT8_C(202), UINT8_C(224), UINT8_C( 57), UINT8_C(182), UINT8_C(222), UINT8_C(177), UINT8_C( 15), UINT8_C( 5), UINT8_C( 29), UINT8_C(197), UINT8_C(123), UINT8_C( 44), UINT8_C(200), UINT8_C(175), UINT8_C(156), UINT8_C(224), UINT8_C(139), UINT8_C(147), UINT8_C(160), UINT8_C( 29), UINT8_C(128), UINT8_C(115), UINT8_C( 95), UINT8_C(199), UINT8_C( 26), UINT8_C( 21), UINT8_C( 90) }, { UINT8_C( 0), UINT8_C(140), UINT8_C( 0), UINT8_C( 0), UINT8_C(189), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(173), UINT8_C(229), UINT8_C(216), UINT8_C( 67), UINT8_C( 0), UINT8_C( 34), UINT8_C( 0), UINT8_C(153), UINT8_C( 81), UINT8_C( 0), UINT8_C( 0), UINT8_C(104), UINT8_C(168), UINT8_C( 0), UINT8_C(183), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(176), UINT8_C( 0), UINT8_C(133), UINT8_C( 0), UINT8_C( 94), UINT8_C(136) } }, { UINT32_C(2077341413), { UINT8_C(128), UINT8_C(177), UINT8_C(180), UINT8_C( 55), UINT8_C(144), UINT8_C(101), UINT8_C( 70), UINT8_C(149), UINT8_C(131), UINT8_C( 11), UINT8_C( 17), UINT8_C(175), UINT8_C(211), UINT8_C(192), UINT8_C( 75), UINT8_C(180), UINT8_C( 75), UINT8_C(222), UINT8_C( 84), UINT8_C(105), UINT8_C( 95), UINT8_C(199), UINT8_C(200), UINT8_C( 38), UINT8_C(225), UINT8_C(222), UINT8_C(128), UINT8_C(199), UINT8_C(148), UINT8_C( 81), UINT8_C( 66), UINT8_C( 20) }, { UINT8_C( 2), UINT8_C(246), UINT8_C( 75), UINT8_C(146), UINT8_C( 92), UINT8_C(145), UINT8_C( 40), UINT8_C(223), UINT8_C(156), UINT8_C( 57), UINT8_C(142), UINT8_C(112), UINT8_C(249), UINT8_C(217), UINT8_C( 36), UINT8_C( 69), UINT8_C(184), UINT8_C(120), UINT8_C(174), UINT8_C( 23), UINT8_C( 64), UINT8_C(118), UINT8_C( 61), UINT8_C( 33), UINT8_C( 84), UINT8_C(189), UINT8_C(232), UINT8_C(232), UINT8_C( 14), UINT8_C( 42), UINT8_C(253), UINT8_C( 16) }, { UINT8_C( 65), UINT8_C( 0), UINT8_C(128), UINT8_C( 0), UINT8_C( 0), UINT8_C(123), UINT8_C( 55), UINT8_C(186), UINT8_C( 0), UINT8_C( 34), UINT8_C( 80), UINT8_C( 0), UINT8_C(230), UINT8_C(205), UINT8_C( 0), UINT8_C(125), UINT8_C(130), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 80), UINT8_C( 0), UINT8_C(131), UINT8_C( 36), UINT8_C(155), UINT8_C(206), UINT8_C( 0), UINT8_C(216), UINT8_C( 81), UINT8_C( 62), UINT8_C(160), UINT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi8(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi8(test_vec[i].b); simde__m256i r = simde_mm256_maskz_avg_epu8(test_vec[i].k, a, b); simde_test_x86_assert_equal_i8x32(r, simde_x_mm256_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm256_mask_avg_epu16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const uint16_t src[16]; const simde__mmask16 k; const uint16_t a[16]; const uint16_t b[16]; const uint16_t r[16]; } test_vec[] = { { { UINT16_C(61285), UINT16_C( 8905), UINT16_C(29130), UINT16_C(28554), UINT16_C(37319), UINT16_C(54261), UINT16_C(53731), UINT16_C(48551), UINT16_C(52128), UINT16_C(51552), UINT16_C(39817), UINT16_C(26740), UINT16_C(55116), UINT16_C(46532), UINT16_C(63557), UINT16_C(43569) }, UINT16_C(64231), { UINT16_C(45517), UINT16_C(22379), UINT16_C(12833), UINT16_C( 5864), UINT16_C(52229), UINT16_C(44263), UINT16_C(34953), UINT16_C(59768), UINT16_C( 337), UINT16_C(50564), UINT16_C(53354), UINT16_C(11933), UINT16_C(57990), UINT16_C(46886), UINT16_C( 3469), UINT16_C(23217) }, { UINT16_C( 7358), UINT16_C(57265), UINT16_C(39502), UINT16_C(21494), UINT16_C(56678), UINT16_C(61439), UINT16_C(30565), UINT16_C(46808), UINT16_C(23673), UINT16_C(58236), UINT16_C( 6445), UINT16_C(45841), UINT16_C(14331), UINT16_C(34922), UINT16_C( 6980), UINT16_C( 738) }, { UINT16_C(26438), UINT16_C(39822), UINT16_C(26168), UINT16_C(28554), UINT16_C(37319), UINT16_C(52851), UINT16_C(32759), UINT16_C(53288), UINT16_C(52128), UINT16_C(54400), UINT16_C(39817), UINT16_C(28887), UINT16_C(36161), UINT16_C(40904), UINT16_C( 5225), UINT16_C(11978) } }, { { UINT16_C(37943), UINT16_C(34274), UINT16_C(55342), UINT16_C(38104), UINT16_C(55221), UINT16_C( 7043), UINT16_C(23375), UINT16_C(51409), UINT16_C(19895), UINT16_C(58539), UINT16_C(48230), UINT16_C(25239), UINT16_C( 499), UINT16_C(14314), UINT16_C(52508), UINT16_C(21305) }, UINT16_C( 7009), { UINT16_C(36824), UINT16_C(45299), UINT16_C(43299), UINT16_C(42632), UINT16_C(55236), UINT16_C(38145), UINT16_C(47263), UINT16_C(19171), UINT16_C(18845), UINT16_C(13318), UINT16_C(63915), UINT16_C(38454), UINT16_C(21040), UINT16_C(26979), UINT16_C(50342), UINT16_C(32389) }, { UINT16_C(30803), UINT16_C(30255), UINT16_C(46881), UINT16_C(58652), UINT16_C( 7566), UINT16_C(11643), UINT16_C(24277), UINT16_C(29303), UINT16_C(32167), UINT16_C(21415), UINT16_C(56694), UINT16_C(42729), UINT16_C(19503), UINT16_C(54543), UINT16_C(37904), UINT16_C(25428) }, { UINT16_C(33814), UINT16_C(34274), UINT16_C(55342), UINT16_C(38104), UINT16_C(55221), UINT16_C(24894), UINT16_C(35770), UINT16_C(51409), UINT16_C(25506), UINT16_C(17367), UINT16_C(48230), UINT16_C(40592), UINT16_C(20272), UINT16_C(14314), UINT16_C(52508), UINT16_C(21305) } }, { { UINT16_C(33549), UINT16_C(11993), UINT16_C(62778), UINT16_C(51220), UINT16_C(36626), UINT16_C(59381), UINT16_C(27885), UINT16_C(37978), UINT16_C( 489), UINT16_C(24551), UINT16_C(53470), UINT16_C( 3333), UINT16_C( 5148), UINT16_C(11491), UINT16_C(14249), UINT16_C(46735) }, UINT16_C(26810), { UINT16_C(62692), UINT16_C(63581), UINT16_C(28604), UINT16_C(45447), UINT16_C(29783), UINT16_C(45341), UINT16_C( 1545), UINT16_C(61618), UINT16_C(36965), UINT16_C(27329), UINT16_C(56733), UINT16_C(32894), UINT16_C( 9994), UINT16_C(39351), UINT16_C(29149), UINT16_C(49666) }, { UINT16_C(24421), UINT16_C( 8634), UINT16_C(17103), UINT16_C( 9938), UINT16_C(61366), UINT16_C(49111), UINT16_C(35317), UINT16_C(23216), UINT16_C(28953), UINT16_C(46788), UINT16_C(17230), UINT16_C(22583), UINT16_C(61034), UINT16_C(18674), UINT16_C(62560), UINT16_C(50442) }, { UINT16_C(33549), UINT16_C(36108), UINT16_C(62778), UINT16_C(27693), UINT16_C(45575), UINT16_C(47226), UINT16_C(27885), UINT16_C(42417), UINT16_C( 489), UINT16_C(24551), UINT16_C(53470), UINT16_C(27739), UINT16_C( 5148), UINT16_C(29013), UINT16_C(45855), UINT16_C(46735) } }, { { UINT16_C(50259), UINT16_C( 8935), UINT16_C(47366), UINT16_C(48456), UINT16_C( 8105), UINT16_C(40572), UINT16_C(11432), UINT16_C(49657), UINT16_C(48541), UINT16_C(60536), UINT16_C(44800), UINT16_C(27460), UINT16_C(13981), UINT16_C(64947), UINT16_C(48426), UINT16_C(32451) }, UINT16_C(43649), { UINT16_C(34976), UINT16_C(59747), UINT16_C( 3141), UINT16_C(49416), UINT16_C(45483), UINT16_C(42222), UINT16_C(35698), UINT16_C(60001), UINT16_C(25207), UINT16_C(48281), UINT16_C(14285), UINT16_C(33010), UINT16_C( 7476), UINT16_C(63293), UINT16_C(48795), UINT16_C(15265) }, { UINT16_C( 1350), UINT16_C(35620), UINT16_C(11537), UINT16_C(48205), UINT16_C(15326), UINT16_C(20576), UINT16_C(49862), UINT16_C(15931), UINT16_C(54308), UINT16_C(61946), UINT16_C(60427), UINT16_C(16497), UINT16_C(44553), UINT16_C(42039), UINT16_C(55660), UINT16_C(46048) }, { UINT16_C(18163), UINT16_C( 8935), UINT16_C(47366), UINT16_C(48456), UINT16_C( 8105), UINT16_C(40572), UINT16_C(11432), UINT16_C(37966), UINT16_C(48541), UINT16_C(55114), UINT16_C(44800), UINT16_C(24754), UINT16_C(13981), UINT16_C(52666), UINT16_C(48426), UINT16_C(30657) } }, { { UINT16_C( 1246), UINT16_C(61246), UINT16_C(35633), UINT16_C( 4012), UINT16_C( 3270), UINT16_C(36192), UINT16_C(39886), UINT16_C(62155), UINT16_C(50543), UINT16_C(31715), UINT16_C(21681), UINT16_C(48059), UINT16_C(61954), UINT16_C(28511), UINT16_C(16331), UINT16_C(43298) }, UINT16_C(24644), { UINT16_C(30105), UINT16_C(17900), UINT16_C(45701), UINT16_C(58705), UINT16_C( 8255), UINT16_C( 2688), UINT16_C(61202), UINT16_C(63183), UINT16_C(33130), UINT16_C( 9546), UINT16_C(19772), UINT16_C(39704), UINT16_C(58300), UINT16_C(57051), UINT16_C( 8077), UINT16_C( 9790) }, { UINT16_C(10900), UINT16_C( 6507), UINT16_C(48349), UINT16_C( 7422), UINT16_C(32476), UINT16_C(61223), UINT16_C(63086), UINT16_C(55525), UINT16_C(12151), UINT16_C(46078), UINT16_C( 5756), UINT16_C(14415), UINT16_C(11001), UINT16_C(34326), UINT16_C(21833), UINT16_C(56748) }, { UINT16_C( 1246), UINT16_C(61246), UINT16_C(47025), UINT16_C( 4012), UINT16_C( 3270), UINT16_C(36192), UINT16_C(62144), UINT16_C(62155), UINT16_C(50543), UINT16_C(31715), UINT16_C(21681), UINT16_C(48059), UINT16_C(61954), UINT16_C(45689), UINT16_C(14955), UINT16_C(43298) } }, { { UINT16_C( 6015), UINT16_C(23799), UINT16_C(62932), UINT16_C(45177), UINT16_C(41076), UINT16_C(58015), UINT16_C(33942), UINT16_C( 3770), UINT16_C(47284), UINT16_C(12481), UINT16_C( 4302), UINT16_C(51305), UINT16_C(32570), UINT16_C(33614), UINT16_C(64468), UINT16_C(21601) }, UINT16_C(22546), { UINT16_C(59056), UINT16_C(10573), UINT16_C(49559), UINT16_C(14025), UINT16_C(24739), UINT16_C(24251), UINT16_C(28526), UINT16_C(12054), UINT16_C(58783), UINT16_C( 2112), UINT16_C(31405), UINT16_C(64392), UINT16_C(23806), UINT16_C(24566), UINT16_C( 2480), UINT16_C(25015) }, { UINT16_C( 1263), UINT16_C(34442), UINT16_C(21702), UINT16_C(27069), UINT16_C(30900), UINT16_C( 8903), UINT16_C(57063), UINT16_C(34385), UINT16_C(37315), UINT16_C(28815), UINT16_C( 5900), UINT16_C( 2667), UINT16_C(25203), UINT16_C( 9321), UINT16_C( 8299), UINT16_C(23173) }, { UINT16_C( 6015), UINT16_C(22508), UINT16_C(62932), UINT16_C(45177), UINT16_C(27820), UINT16_C(58015), UINT16_C(33942), UINT16_C( 3770), UINT16_C(47284), UINT16_C(12481), UINT16_C( 4302), UINT16_C(33530), UINT16_C(24505), UINT16_C(33614), UINT16_C( 5390), UINT16_C(21601) } }, { { UINT16_C( 3876), UINT16_C(60129), UINT16_C(40547), UINT16_C( 5972), UINT16_C( 6934), UINT16_C(64825), UINT16_C(35833), UINT16_C(48259), UINT16_C( 4636), UINT16_C(10284), UINT16_C(38953), UINT16_C(40242), UINT16_C(39930), UINT16_C(26049), UINT16_C(18107), UINT16_C(57535) }, UINT16_C(41045), { UINT16_C(47562), UINT16_C( 7742), UINT16_C(21712), UINT16_C( 2618), UINT16_C(13137), UINT16_C(54677), UINT16_C(45552), UINT16_C( 7399), UINT16_C( 4570), UINT16_C( 3252), UINT16_C(44718), UINT16_C(28584), UINT16_C(25363), UINT16_C(54197), UINT16_C( 2627), UINT16_C( 3699) }, { UINT16_C(45763), UINT16_C(37932), UINT16_C(26118), UINT16_C(22686), UINT16_C(13210), UINT16_C(35373), UINT16_C( 5348), UINT16_C(48806), UINT16_C(23333), UINT16_C(54219), UINT16_C(29449), UINT16_C( 7490), UINT16_C(63446), UINT16_C( 6896), UINT16_C(25346), UINT16_C(50472) }, { UINT16_C(46663), UINT16_C(60129), UINT16_C(23915), UINT16_C( 5972), UINT16_C(13174), UINT16_C(64825), UINT16_C(25450), UINT16_C(48259), UINT16_C( 4636), UINT16_C(10284), UINT16_C(38953), UINT16_C(40242), UINT16_C(39930), UINT16_C(30547), UINT16_C(18107), UINT16_C(27086) } }, { { UINT16_C(21525), UINT16_C( 7257), UINT16_C(63419), UINT16_C(21876), UINT16_C(41258), UINT16_C( 4063), UINT16_C(34229), UINT16_C(56269), UINT16_C(39136), UINT16_C(60078), UINT16_C(61707), UINT16_C(57863), UINT16_C(63464), UINT16_C(60156), UINT16_C( 9306), UINT16_C(28848) }, UINT16_C( 2424), { UINT16_C(13196), UINT16_C( 1), UINT16_C(11144), UINT16_C(26529), UINT16_C(22074), UINT16_C( 2285), UINT16_C(52529), UINT16_C(57504), UINT16_C(44215), UINT16_C(48849), UINT16_C(47502), UINT16_C(35509), UINT16_C( 4260), UINT16_C(21678), UINT16_C( 9856), UINT16_C( 3165) }, { UINT16_C(24154), UINT16_C(57868), UINT16_C(44426), UINT16_C(50250), UINT16_C(14083), UINT16_C(13772), UINT16_C(27908), UINT16_C(48149), UINT16_C(58905), UINT16_C(42874), UINT16_C(12447), UINT16_C(17201), UINT16_C(57152), UINT16_C(49303), UINT16_C(62725), UINT16_C(24524) }, { UINT16_C(21525), UINT16_C( 7257), UINT16_C(63419), UINT16_C(38390), UINT16_C(18079), UINT16_C( 8029), UINT16_C(40219), UINT16_C(56269), UINT16_C(51560), UINT16_C(60078), UINT16_C(61707), UINT16_C(26355), UINT16_C(63464), UINT16_C(60156), UINT16_C( 9306), UINT16_C(28848) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i src = simde_x_mm256_loadu_epi16(test_vec[i].src); simde__m256i a = simde_x_mm256_loadu_epi16(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi16(test_vec[i].b); simde__m256i r = simde_mm256_mask_avg_epu16(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_u16x16(r, simde_x_mm256_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm256_maskz_avg_epu16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask16 k; const uint16_t a[16]; const uint16_t b[16]; const uint16_t r[16]; } test_vec[] = { { UINT16_C(57756), { UINT16_C(48284), UINT16_C( 210), UINT16_C(57470), UINT16_C(23667), UINT16_C(34039), UINT16_C(45577), UINT16_C(60747), UINT16_C(36233), UINT16_C(61458), UINT16_C(64798), UINT16_C( 3514), UINT16_C(56831), UINT16_C(32324), UINT16_C( 4150), UINT16_C(54047), UINT16_C(48113) }, { UINT16_C(50319), UINT16_C( 3515), UINT16_C(12196), UINT16_C(39785), UINT16_C(29363), UINT16_C(65102), UINT16_C(55136), UINT16_C(29324), UINT16_C(43719), UINT16_C(33391), UINT16_C(28343), UINT16_C(64351), UINT16_C(38381), UINT16_C( 3083), UINT16_C(64872), UINT16_C(63432) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C(34833), UINT16_C(31726), UINT16_C(31701), UINT16_C( 0), UINT16_C( 0), UINT16_C(32779), UINT16_C(52589), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 3617), UINT16_C(59460), UINT16_C(55773) } }, { UINT16_C(33729), { UINT16_C(25861), UINT16_C(28338), UINT16_C(25857), UINT16_C(20449), UINT16_C(16740), UINT16_C(61478), UINT16_C(61107), UINT16_C( 8858), UINT16_C(20848), UINT16_C(53137), UINT16_C(32332), UINT16_C(22372), UINT16_C(52618), UINT16_C(21076), UINT16_C( 5572), UINT16_C(51670) }, { UINT16_C(34939), UINT16_C(31800), UINT16_C( 6638), UINT16_C(21195), UINT16_C(61786), UINT16_C( 3394), UINT16_C(56543), UINT16_C(20271), UINT16_C(49197), UINT16_C(31006), UINT16_C(33598), UINT16_C(51664), UINT16_C( 9552), UINT16_C( 5147), UINT16_C(61754), UINT16_C(46558) }, { UINT16_C(30400), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(58825), UINT16_C(14565), UINT16_C(35023), UINT16_C(42072), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(49114) } }, { UINT16_C( 5754), { UINT16_C(26673), UINT16_C(64559), UINT16_C(35258), UINT16_C(64750), UINT16_C(52630), UINT16_C(50648), UINT16_C( 1309), UINT16_C(15238), UINT16_C(50302), UINT16_C(20158), UINT16_C( 3725), UINT16_C(43379), UINT16_C(44579), UINT16_C( 410), UINT16_C( 5219), UINT16_C(38167) }, { UINT16_C(18044), UINT16_C(13969), UINT16_C(32719), UINT16_C(25906), UINT16_C( 2637), UINT16_C(27178), UINT16_C(45071), UINT16_C(36261), UINT16_C(25717), UINT16_C( 732), UINT16_C(20338), UINT16_C(38315), UINT16_C(18173), UINT16_C(24982), UINT16_C(44378), UINT16_C(55286) }, { UINT16_C( 0), UINT16_C(39264), UINT16_C( 0), UINT16_C(45328), UINT16_C(27634), UINT16_C(38913), UINT16_C(23190), UINT16_C( 0), UINT16_C( 0), UINT16_C(10445), UINT16_C(12032), UINT16_C( 0), UINT16_C(31376), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { UINT16_C(34803), { UINT16_C(49677), UINT16_C(16391), UINT16_C(21543), UINT16_C(21066), UINT16_C(23230), UINT16_C(25346), UINT16_C(30695), UINT16_C(50119), UINT16_C(14970), UINT16_C( 9491), UINT16_C( 4303), UINT16_C(26219), UINT16_C(50801), UINT16_C(26387), UINT16_C( 1949), UINT16_C(43759) }, { UINT16_C(63177), UINT16_C(61930), UINT16_C(13642), UINT16_C( 2115), UINT16_C(17807), UINT16_C(30315), UINT16_C(13245), UINT16_C(14138), UINT16_C(19821), UINT16_C(15452), UINT16_C(51293), UINT16_C(53154), UINT16_C(46734), UINT16_C(11062), UINT16_C( 9661), UINT16_C(34517) }, { UINT16_C(56427), UINT16_C(39161), UINT16_C( 0), UINT16_C( 0), UINT16_C(20519), UINT16_C(27831), UINT16_C(21970), UINT16_C(32129), UINT16_C(17396), UINT16_C(12472), UINT16_C(27798), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(39138) } }, { UINT16_C(49179), { UINT16_C(25975), UINT16_C(47861), UINT16_C(33901), UINT16_C(55552), UINT16_C(48634), UINT16_C(13324), UINT16_C(31220), UINT16_C(20609), UINT16_C(57269), UINT16_C(22552), UINT16_C(42670), UINT16_C(58382), UINT16_C(52177), UINT16_C(42762), UINT16_C( 9553), UINT16_C(51559) }, { UINT16_C(23691), UINT16_C(63619), UINT16_C(33760), UINT16_C(56017), UINT16_C(56640), UINT16_C(13327), UINT16_C(36950), UINT16_C( 3205), UINT16_C(40303), UINT16_C( 7524), UINT16_C(29252), UINT16_C( 5378), UINT16_C( 3133), UINT16_C(36540), UINT16_C( 9009), UINT16_C(48215) }, { UINT16_C(24833), UINT16_C(55740), UINT16_C( 0), UINT16_C(55785), UINT16_C(52637), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 9281), UINT16_C(49887) } }, { UINT16_C(56191), { UINT16_C(24501), UINT16_C(34398), UINT16_C(40762), UINT16_C(18788), UINT16_C(47827), UINT16_C(22745), UINT16_C(18886), UINT16_C(10998), UINT16_C(14950), UINT16_C(26780), UINT16_C(55631), UINT16_C( 3188), UINT16_C(42600), UINT16_C(48943), UINT16_C(44898), UINT16_C( 6042) }, { UINT16_C(63758), UINT16_C(18590), UINT16_C( 664), UINT16_C(27537), UINT16_C(27580), UINT16_C(33732), UINT16_C(47796), UINT16_C( 6829), UINT16_C(19188), UINT16_C(17283), UINT16_C(63267), UINT16_C(35663), UINT16_C(32669), UINT16_C( 75), UINT16_C(58670), UINT16_C(15383) }, { UINT16_C(44130), UINT16_C(26494), UINT16_C(20713), UINT16_C(23163), UINT16_C(37704), UINT16_C(28239), UINT16_C(33341), UINT16_C( 0), UINT16_C(17069), UINT16_C(22032), UINT16_C( 0), UINT16_C(19426), UINT16_C(37635), UINT16_C( 0), UINT16_C(51784), UINT16_C(10713) } }, { UINT16_C(46558), { UINT16_C(30341), UINT16_C( 5815), UINT16_C(29922), UINT16_C(42625), UINT16_C(13815), UINT16_C(42080), UINT16_C(21584), UINT16_C(54254), UINT16_C( 4759), UINT16_C(59338), UINT16_C(26781), UINT16_C(59494), UINT16_C(37992), UINT16_C(32718), UINT16_C(44240), UINT16_C(21813) }, { UINT16_C(60451), UINT16_C( 1388), UINT16_C(60768), UINT16_C(22443), UINT16_C( 2851), UINT16_C(29692), UINT16_C(59999), UINT16_C(63046), UINT16_C( 4348), UINT16_C(39645), UINT16_C(17272), UINT16_C(57474), UINT16_C(20695), UINT16_C(43104), UINT16_C(38397), UINT16_C( 8445) }, { UINT16_C( 0), UINT16_C( 3602), UINT16_C(45345), UINT16_C(32534), UINT16_C( 8333), UINT16_C( 0), UINT16_C(40792), UINT16_C(58650), UINT16_C( 4554), UINT16_C( 0), UINT16_C(22027), UINT16_C( 0), UINT16_C(29344), UINT16_C(37911), UINT16_C( 0), UINT16_C(15129) } }, { UINT16_C(27009), { UINT16_C(57893), UINT16_C(53335), UINT16_C(31289), UINT16_C(13787), UINT16_C(15085), UINT16_C(13088), UINT16_C( 7216), UINT16_C( 3651), UINT16_C(48310), UINT16_C(14673), UINT16_C(10652), UINT16_C(64649), UINT16_C(34513), UINT16_C(52881), UINT16_C( 5030), UINT16_C(52024) }, { UINT16_C(36853), UINT16_C(11931), UINT16_C(30217), UINT16_C(63076), UINT16_C(33968), UINT16_C(57641), UINT16_C(27808), UINT16_C(22511), UINT16_C(16424), UINT16_C(50576), UINT16_C( 6505), UINT16_C(15041), UINT16_C(21408), UINT16_C(17929), UINT16_C(16742), UINT16_C(23314) }, { UINT16_C(47373), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(13081), UINT16_C(32367), UINT16_C( 0), UINT16_C( 0), UINT16_C(39845), UINT16_C( 0), UINT16_C(35405), UINT16_C(10886), UINT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi16(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi16(test_vec[i].b); simde__m256i r = simde_mm256_maskz_avg_epu16(test_vec[i].k, a, b); simde_test_x86_assert_equal_u16x16(r, simde_x_mm256_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_avg_epu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_x_mm512_set_epu8(UINT8_C( 54), UINT8_C( 98), UINT8_C(144), UINT8_C( 33), UINT8_C(227), UINT8_C( 68), UINT8_C( 44), UINT8_C(252), UINT8_C(188), UINT8_C(131), UINT8_C( 22), UINT8_C(137), UINT8_C(110), UINT8_C(230), UINT8_C(116), UINT8_C( 30), UINT8_C(237), UINT8_C(136), UINT8_C(151), UINT8_C(122), UINT8_C( 83), UINT8_C(138), UINT8_C(146), UINT8_C(187), UINT8_C( 83), UINT8_C(179), UINT8_C(167), UINT8_C( 88), UINT8_C(248), UINT8_C( 88), UINT8_C(101), UINT8_C(132), UINT8_C(203), UINT8_C( 36), UINT8_C( 53), UINT8_C(174), UINT8_C( 78), UINT8_C( 97), UINT8_C(124), UINT8_C(201), UINT8_C( 12), UINT8_C(233), UINT8_C(196), UINT8_C(121), UINT8_C( 77), UINT8_C( 2), UINT8_C(189), UINT8_C(251), UINT8_C(252), UINT8_C(129), UINT8_C( 38), UINT8_C(107), UINT8_C(130), UINT8_C(227), UINT8_C(158), UINT8_C(158), UINT8_C(195), UINT8_C( 10), UINT8_C(101), UINT8_C( 55), UINT8_C( 64), UINT8_C(106), UINT8_C(118), UINT8_C( 58)), simde_x_mm512_set_epu8(UINT8_C(139), UINT8_C( 91), UINT8_C( 12), UINT8_C(185), UINT8_C( 48), UINT8_C(130), UINT8_C(230), UINT8_C(176), UINT8_C(212), UINT8_C( 20), UINT8_C(110), UINT8_C(217), UINT8_C(186), UINT8_C(186), UINT8_C(132), UINT8_C(114), UINT8_C(188), UINT8_C(251), UINT8_C(225), UINT8_C(230), UINT8_C(165), UINT8_C(108), UINT8_C( 17), UINT8_C( 9), UINT8_C(245), UINT8_C(227), UINT8_C(125), UINT8_C(240), UINT8_C( 47), UINT8_C( 91), UINT8_C( 3), UINT8_C(135), UINT8_C(243), UINT8_C(103), UINT8_C(152), UINT8_C(185), UINT8_C( 49), UINT8_C( 8), UINT8_C( 66), UINT8_C(147), UINT8_C( 50), UINT8_C(167), UINT8_C( 57), UINT8_C(102), UINT8_C(142), UINT8_C( 47), UINT8_C(173), UINT8_C( 47), UINT8_C(172), UINT8_C(237), UINT8_C( 57), UINT8_C(255), UINT8_C(101), UINT8_C(227), UINT8_C(198), UINT8_C(199), UINT8_C( 25), UINT8_C(175), UINT8_C(229), UINT8_C(172), UINT8_C(210), UINT8_C(198), UINT8_C(215), UINT8_C(150)), simde_x_mm512_set_epu8(UINT8_C( 97), UINT8_C( 95), UINT8_C( 78), UINT8_C(109), UINT8_C(138), UINT8_C( 99), UINT8_C(137), UINT8_C(214), UINT8_C(200), UINT8_C( 76), UINT8_C( 66), UINT8_C(177), UINT8_C(148), UINT8_C(208), UINT8_C(124), UINT8_C( 72), UINT8_C(213), UINT8_C(194), UINT8_C(188), UINT8_C(176), UINT8_C(124), UINT8_C(123), UINT8_C( 82), UINT8_C( 98), UINT8_C(164), UINT8_C(203), UINT8_C(146), UINT8_C(164), UINT8_C(148), UINT8_C( 90), UINT8_C( 52), UINT8_C(134), UINT8_C(223), UINT8_C( 70), UINT8_C(103), UINT8_C(180), UINT8_C( 64), UINT8_C( 53), UINT8_C( 95), UINT8_C(174), UINT8_C( 31), UINT8_C(200), UINT8_C(127), UINT8_C(112), UINT8_C(110), UINT8_C( 25), UINT8_C(181), UINT8_C(149), UINT8_C(212), UINT8_C(183), UINT8_C( 48), UINT8_C(181), UINT8_C(116), UINT8_C(227), UINT8_C(178), UINT8_C(179), UINT8_C(110), UINT8_C( 93), UINT8_C(165), UINT8_C(114), UINT8_C(137), UINT8_C(152), UINT8_C(167), UINT8_C(104)) }, { simde_x_mm512_set_epu8(UINT8_C(208), UINT8_C(219), UINT8_C( 57), UINT8_C(220), UINT8_C( 6), UINT8_C( 65), UINT8_C( 21), UINT8_C( 70), UINT8_C( 42), UINT8_C( 69), UINT8_C(205), UINT8_C( 75), UINT8_C( 45), UINT8_C( 93), UINT8_C(126), UINT8_C( 6), UINT8_C(174), UINT8_C( 50), UINT8_C( 17), UINT8_C(221), UINT8_C(217), UINT8_C(127), UINT8_C(111), UINT8_C(120), UINT8_C(131), UINT8_C(148), UINT8_C(141), UINT8_C(152), UINT8_C(144), UINT8_C(136), UINT8_C( 66), UINT8_C( 61), UINT8_C(132), UINT8_C(112), UINT8_C( 28), UINT8_C(152), UINT8_C(127), UINT8_C(231), UINT8_C(109), UINT8_C(161), UINT8_C(158), UINT8_C( 6), UINT8_C(213), UINT8_C(173), UINT8_C(118), UINT8_C(179), UINT8_C( 79), UINT8_C(178), UINT8_C( 46), UINT8_C(236), UINT8_C( 49), UINT8_C( 56), UINT8_C(157), UINT8_C(129), UINT8_C( 46), UINT8_C(200), UINT8_C( 89), UINT8_C(130), UINT8_C(187), UINT8_C(247), UINT8_C(213), UINT8_C( 39), UINT8_C(146), UINT8_C(142)), simde_x_mm512_set_epu8(UINT8_C( 76), UINT8_C(227), UINT8_C(192), UINT8_C(129), UINT8_C(228), UINT8_C( 26), UINT8_C(165), UINT8_C(150), UINT8_C(111), UINT8_C( 32), UINT8_C( 11), UINT8_C( 66), UINT8_C( 65), UINT8_C(106), UINT8_C(209), UINT8_C(140), UINT8_C( 66), UINT8_C( 2), UINT8_C(157), UINT8_C(126), UINT8_C( 65), UINT8_C( 8), UINT8_C( 57), UINT8_C( 55), UINT8_C(150), UINT8_C( 40), UINT8_C(164), UINT8_C( 54), UINT8_C( 94), UINT8_C(153), UINT8_C(234), UINT8_C(140), UINT8_C( 45), UINT8_C( 64), UINT8_C(136), UINT8_C(209), UINT8_C(101), UINT8_C(213), UINT8_C(147), UINT8_C(253), UINT8_C( 57), UINT8_C(116), UINT8_C( 96), UINT8_C( 87), UINT8_C(172), UINT8_C( 78), UINT8_C(156), UINT8_C(126), UINT8_C( 3), UINT8_C(150), UINT8_C(133), UINT8_C( 13), UINT8_C(226), UINT8_C( 18), UINT8_C(106), UINT8_C(202), UINT8_C( 95), UINT8_C( 92), UINT8_C( 59), UINT8_C(210), UINT8_C(140), UINT8_C( 87), UINT8_C(213), UINT8_C( 52)), simde_x_mm512_set_epu8(UINT8_C(142), UINT8_C(223), UINT8_C(125), UINT8_C(175), UINT8_C(117), UINT8_C( 46), UINT8_C( 93), UINT8_C(110), UINT8_C( 77), UINT8_C( 51), UINT8_C(108), UINT8_C( 71), UINT8_C( 55), UINT8_C(100), UINT8_C(168), UINT8_C( 73), UINT8_C(120), UINT8_C( 26), UINT8_C( 87), UINT8_C(174), UINT8_C(141), UINT8_C( 68), UINT8_C( 84), UINT8_C( 88), UINT8_C(141), UINT8_C( 94), UINT8_C(153), UINT8_C(103), UINT8_C(119), UINT8_C(145), UINT8_C(150), UINT8_C(101), UINT8_C( 89), UINT8_C( 88), UINT8_C( 82), UINT8_C(181), UINT8_C(114), UINT8_C(222), UINT8_C(128), UINT8_C(207), UINT8_C(108), UINT8_C( 61), UINT8_C(155), UINT8_C(130), UINT8_C(145), UINT8_C(129), UINT8_C(118), UINT8_C(152), UINT8_C( 25), UINT8_C(193), UINT8_C( 91), UINT8_C( 35), UINT8_C(192), UINT8_C( 74), UINT8_C( 76), UINT8_C(201), UINT8_C( 92), UINT8_C(111), UINT8_C(123), UINT8_C(229), UINT8_C(177), UINT8_C( 63), UINT8_C(180), UINT8_C( 97)) }, { simde_x_mm512_set_epu8(UINT8_C( 11), UINT8_C(110), UINT8_C(145), UINT8_C( 98), UINT8_C(192), UINT8_C(110), UINT8_C(161), UINT8_C( 50), UINT8_C(252), UINT8_C(104), UINT8_C(187), UINT8_C( 15), UINT8_C(183), UINT8_C(198), UINT8_C( 79), UINT8_C( 9), UINT8_C( 68), UINT8_C(123), UINT8_C(137), UINT8_C(144), UINT8_C(105), UINT8_C( 90), UINT8_C( 86), UINT8_C( 94), UINT8_C( 93), UINT8_C( 77), UINT8_C(192), UINT8_C( 8), UINT8_C( 44), UINT8_C( 25), UINT8_C( 87), UINT8_C(143), UINT8_C(226), UINT8_C( 88), UINT8_C(154), UINT8_C(252), UINT8_C( 55), UINT8_C(166), UINT8_C(142), UINT8_C(195), UINT8_C(169), UINT8_C(178), UINT8_C(205), UINT8_C( 91), UINT8_C( 65), UINT8_C(198), UINT8_C(127), UINT8_C( 53), UINT8_C(245), UINT8_C( 24), UINT8_C(177), UINT8_C(240), UINT8_C(199), UINT8_C( 24), UINT8_C(143), UINT8_C(108), UINT8_C( 39), UINT8_C(170), UINT8_C(163), UINT8_C(222), UINT8_C( 56), UINT8_C(160), UINT8_C(200), UINT8_C(122)), simde_x_mm512_set_epu8(UINT8_C( 57), UINT8_C( 48), UINT8_C(248), UINT8_C( 92), UINT8_C( 57), UINT8_C(154), UINT8_C( 83), UINT8_C(125), UINT8_C( 10), UINT8_C(154), UINT8_C( 72), UINT8_C(111), UINT8_C( 67), UINT8_C( 46), UINT8_C(125), UINT8_C(202), UINT8_C( 45), UINT8_C( 61), UINT8_C( 52), UINT8_C(168), UINT8_C(212), UINT8_C( 16), UINT8_C( 37), UINT8_C(131), UINT8_C( 48), UINT8_C( 90), UINT8_C(210), UINT8_C(233), UINT8_C(148), UINT8_C( 2), UINT8_C( 10), UINT8_C( 62), UINT8_C( 89), UINT8_C(180), UINT8_C( 94), UINT8_C( 22), UINT8_C(254), UINT8_C( 77), UINT8_C( 0), UINT8_C(151), UINT8_C(212), UINT8_C( 38), UINT8_C(106), UINT8_C(205), UINT8_C( 72), UINT8_C(159), UINT8_C(226), UINT8_C( 51), UINT8_C( 53), UINT8_C(162), UINT8_C(189), UINT8_C( 88), UINT8_C( 42), UINT8_C(136), UINT8_C(104), UINT8_C(208), UINT8_C(154), UINT8_C(241), UINT8_C(187), UINT8_C(143), UINT8_C(157), UINT8_C( 95), UINT8_C( 29), UINT8_C( 97)), simde_x_mm512_set_epu8(UINT8_C( 34), UINT8_C( 79), UINT8_C(197), UINT8_C( 95), UINT8_C(125), UINT8_C(132), UINT8_C(122), UINT8_C( 88), UINT8_C(131), UINT8_C(129), UINT8_C(130), UINT8_C( 63), UINT8_C(125), UINT8_C(122), UINT8_C(102), UINT8_C(106), UINT8_C( 57), UINT8_C( 92), UINT8_C( 95), UINT8_C(156), UINT8_C(159), UINT8_C( 53), UINT8_C( 62), UINT8_C(113), UINT8_C( 71), UINT8_C( 84), UINT8_C(201), UINT8_C(121), UINT8_C( 96), UINT8_C( 14), UINT8_C( 49), UINT8_C(103), UINT8_C(158), UINT8_C(134), UINT8_C(124), UINT8_C(137), UINT8_C(155), UINT8_C(122), UINT8_C( 71), UINT8_C(173), UINT8_C(191), UINT8_C(108), UINT8_C(156), UINT8_C(148), UINT8_C( 69), UINT8_C(179), UINT8_C(177), UINT8_C( 52), UINT8_C(149), UINT8_C( 93), UINT8_C(183), UINT8_C(164), UINT8_C(121), UINT8_C( 80), UINT8_C(124), UINT8_C(158), UINT8_C( 97), UINT8_C(206), UINT8_C(175), UINT8_C(183), UINT8_C(107), UINT8_C(128), UINT8_C(115), UINT8_C(110)) }, { simde_x_mm512_set_epu8(UINT8_C(130), UINT8_C( 79), UINT8_C( 82), UINT8_C( 5), UINT8_C(236), UINT8_C(119), UINT8_C( 25), UINT8_C( 77), UINT8_C(139), UINT8_C(103), UINT8_C(204), UINT8_C( 53), UINT8_C( 70), UINT8_C( 81), UINT8_C( 35), UINT8_C(154), UINT8_C(229), UINT8_C( 64), UINT8_C( 94), UINT8_C(131), UINT8_C(217), UINT8_C( 4), UINT8_C(103), UINT8_C(204), UINT8_C( 53), UINT8_C( 46), UINT8_C(123), UINT8_C(123), UINT8_C(251), UINT8_C(227), UINT8_C(153), UINT8_C( 16), UINT8_C( 79), UINT8_C(199), UINT8_C( 15), UINT8_C(177), UINT8_C( 83), UINT8_C( 15), UINT8_C( 37), UINT8_C(215), UINT8_C(178), UINT8_C(145), UINT8_C(240), UINT8_C( 53), UINT8_C(116), UINT8_C(195), UINT8_C(101), UINT8_C(243), UINT8_C(134), UINT8_C( 93), UINT8_C( 64), UINT8_C(103), UINT8_C( 73), UINT8_C( 82), UINT8_C(189), UINT8_C( 37), UINT8_C(202), UINT8_C( 62), UINT8_C(157), UINT8_C( 97), UINT8_C(142), UINT8_C( 6), UINT8_C( 38), UINT8_C( 24)), simde_x_mm512_set_epu8(UINT8_C( 57), UINT8_C( 93), UINT8_C(144), UINT8_C( 30), UINT8_C( 11), UINT8_C(223), UINT8_C( 55), UINT8_C(131), UINT8_C( 29), UINT8_C(176), UINT8_C( 52), UINT8_C( 84), UINT8_C( 79), UINT8_C( 47), UINT8_C(128), UINT8_C( 28), UINT8_C( 73), UINT8_C(131), UINT8_C(138), UINT8_C( 1), UINT8_C( 98), UINT8_C(110), UINT8_C(210), UINT8_C(168), UINT8_C(208), UINT8_C(103), UINT8_C(151), UINT8_C( 58), UINT8_C(149), UINT8_C( 16), UINT8_C(113), UINT8_C( 41), UINT8_C(201), UINT8_C( 13), UINT8_C(211), UINT8_C(230), UINT8_C(208), UINT8_C(154), UINT8_C( 49), UINT8_C(127), UINT8_C( 93), UINT8_C( 7), UINT8_C(136), UINT8_C( 53), UINT8_C( 19), UINT8_C( 95), UINT8_C(187), UINT8_C(149), UINT8_C( 0), UINT8_C(161), UINT8_C(228), UINT8_C( 84), UINT8_C( 84), UINT8_C(105), UINT8_C( 90), UINT8_C( 17), UINT8_C( 3), UINT8_C(183), UINT8_C(131), UINT8_C( 24), UINT8_C(201), UINT8_C( 5), UINT8_C(128), UINT8_C( 80)), simde_x_mm512_set_epu8(UINT8_C( 94), UINT8_C( 86), UINT8_C(113), UINT8_C( 18), UINT8_C(124), UINT8_C(171), UINT8_C( 40), UINT8_C(104), UINT8_C( 84), UINT8_C(140), UINT8_C(128), UINT8_C( 69), UINT8_C( 75), UINT8_C( 64), UINT8_C( 82), UINT8_C( 91), UINT8_C(151), UINT8_C( 98), UINT8_C(116), UINT8_C( 66), UINT8_C(158), UINT8_C( 57), UINT8_C(157), UINT8_C(186), UINT8_C(131), UINT8_C( 75), UINT8_C(137), UINT8_C( 91), UINT8_C(200), UINT8_C(122), UINT8_C(133), UINT8_C( 29), UINT8_C(140), UINT8_C(106), UINT8_C(113), UINT8_C(204), UINT8_C(146), UINT8_C( 85), UINT8_C( 43), UINT8_C(171), UINT8_C(136), UINT8_C( 76), UINT8_C(188), UINT8_C( 53), UINT8_C( 68), UINT8_C(145), UINT8_C(144), UINT8_C(196), UINT8_C( 67), UINT8_C(127), UINT8_C(146), UINT8_C( 94), UINT8_C( 79), UINT8_C( 94), UINT8_C(140), UINT8_C( 27), UINT8_C(103), UINT8_C(123), UINT8_C(144), UINT8_C( 61), UINT8_C(172), UINT8_C( 6), UINT8_C( 83), UINT8_C( 52)) }, { simde_x_mm512_set_epu8(UINT8_C( 64), UINT8_C( 98), UINT8_C(116), UINT8_C(169), UINT8_C(168), UINT8_C(153), UINT8_C(192), UINT8_C(140), UINT8_C( 79), UINT8_C( 29), UINT8_C(180), UINT8_C(232), UINT8_C( 89), UINT8_C( 84), UINT8_C(140), UINT8_C(130), UINT8_C(108), UINT8_C(184), UINT8_C(116), UINT8_C( 83), UINT8_C(161), UINT8_C(105), UINT8_C( 17), UINT8_C(119), UINT8_C( 0), UINT8_C(101), UINT8_C(104), UINT8_C(103), UINT8_C(226), UINT8_C( 60), UINT8_C( 23), UINT8_C(129), UINT8_C(141), UINT8_C(179), UINT8_C( 87), UINT8_C(113), UINT8_C(251), UINT8_C(219), UINT8_C(107), UINT8_C(119), UINT8_C( 70), UINT8_C(169), UINT8_C( 11), UINT8_C( 91), UINT8_C( 23), UINT8_C(156), UINT8_C( 99), UINT8_C( 36), UINT8_C(154), UINT8_C( 26), UINT8_C( 23), UINT8_C(168), UINT8_C( 12), UINT8_C(101), UINT8_C(189), UINT8_C(124), UINT8_C(154), UINT8_C( 52), UINT8_C(134), UINT8_C(128), UINT8_C(213), UINT8_C( 85), UINT8_C(189), UINT8_C(209)), simde_x_mm512_set_epu8(UINT8_C( 21), UINT8_C( 27), UINT8_C( 57), UINT8_C(175), UINT8_C(191), UINT8_C( 35), UINT8_C( 75), UINT8_C( 5), UINT8_C(168), UINT8_C( 42), UINT8_C(198), UINT8_C(223), UINT8_C(239), UINT8_C(225), UINT8_C(227), UINT8_C( 5), UINT8_C( 85), UINT8_C(196), UINT8_C(184), UINT8_C(155), UINT8_C( 73), UINT8_C(109), UINT8_C(131), UINT8_C(103), UINT8_C(131), UINT8_C(155), UINT8_C(105), UINT8_C( 70), UINT8_C(164), UINT8_C(114), UINT8_C(114), UINT8_C(212), UINT8_C(157), UINT8_C( 34), UINT8_C(220), UINT8_C(250), UINT8_C(254), UINT8_C(227), UINT8_C(240), UINT8_C( 41), UINT8_C(208), UINT8_C( 26), UINT8_C(221), UINT8_C(132), UINT8_C(121), UINT8_C(178), UINT8_C(219), UINT8_C( 55), UINT8_C( 45), UINT8_C( 78), UINT8_C(233), UINT8_C( 53), UINT8_C( 87), UINT8_C(172), UINT8_C(122), UINT8_C(147), UINT8_C(187), UINT8_C(115), UINT8_C( 22), UINT8_C(254), UINT8_C( 9), UINT8_C(218), UINT8_C(224), UINT8_C(214)), simde_x_mm512_set_epu8(UINT8_C( 43), UINT8_C( 63), UINT8_C( 87), UINT8_C(172), UINT8_C(180), UINT8_C( 94), UINT8_C(134), UINT8_C( 73), UINT8_C(124), UINT8_C( 36), UINT8_C(189), UINT8_C(228), UINT8_C(164), UINT8_C(155), UINT8_C(184), UINT8_C( 68), UINT8_C( 97), UINT8_C(190), UINT8_C(150), UINT8_C(119), UINT8_C(117), UINT8_C(107), UINT8_C( 74), UINT8_C(111), UINT8_C( 66), UINT8_C(128), UINT8_C(105), UINT8_C( 87), UINT8_C(195), UINT8_C( 87), UINT8_C( 69), UINT8_C(171), UINT8_C(149), UINT8_C(107), UINT8_C(154), UINT8_C(182), UINT8_C(253), UINT8_C(223), UINT8_C(174), UINT8_C( 80), UINT8_C(139), UINT8_C( 98), UINT8_C(116), UINT8_C(112), UINT8_C( 72), UINT8_C(167), UINT8_C(159), UINT8_C( 46), UINT8_C(100), UINT8_C( 52), UINT8_C(128), UINT8_C(111), UINT8_C( 50), UINT8_C(137), UINT8_C(156), UINT8_C(136), UINT8_C(171), UINT8_C( 84), UINT8_C( 78), UINT8_C(191), UINT8_C(111), UINT8_C(152), UINT8_C(207), UINT8_C(212)) }, { simde_x_mm512_set_epu8(UINT8_C(136), UINT8_C( 77), UINT8_C( 92), UINT8_C(127), UINT8_C(250), UINT8_C( 34), UINT8_C(180), UINT8_C(171), UINT8_C( 31), UINT8_C(112), UINT8_C(162), UINT8_C(182), UINT8_C(240), UINT8_C(136), UINT8_C( 96), UINT8_C( 85), UINT8_C( 41), UINT8_C( 52), UINT8_C( 59), UINT8_C(203), UINT8_C(213), UINT8_C(103), UINT8_C(221), UINT8_C(176), UINT8_C( 99), UINT8_C( 2), UINT8_C( 21), UINT8_C(182), UINT8_C( 7), UINT8_C(176), UINT8_C(125), UINT8_C(109), UINT8_C(233), UINT8_C(242), UINT8_C( 5), UINT8_C(109), UINT8_C( 73), UINT8_C(240), UINT8_C(175), UINT8_C( 24), UINT8_C( 44), UINT8_C(103), UINT8_C(204), UINT8_C( 40), UINT8_C( 78), UINT8_C(117), UINT8_C(221), UINT8_C(168), UINT8_C(194), UINT8_C( 10), UINT8_C( 15), UINT8_C(227), UINT8_C( 13), UINT8_C(241), UINT8_C( 93), UINT8_C( 23), UINT8_C( 31), UINT8_C( 84), UINT8_C(219), UINT8_C( 76), UINT8_C( 52), UINT8_C( 31), UINT8_C( 22), UINT8_C(183)), simde_x_mm512_set_epu8(UINT8_C( 0), UINT8_C( 70), UINT8_C(193), UINT8_C(116), UINT8_C( 9), UINT8_C( 85), UINT8_C(190), UINT8_C(154), UINT8_C( 37), UINT8_C(184), UINT8_C(193), UINT8_C(107), UINT8_C( 79), UINT8_C(235), UINT8_C( 33), UINT8_C(125), UINT8_C(111), UINT8_C(108), UINT8_C( 99), UINT8_C(162), UINT8_C(175), UINT8_C(211), UINT8_C( 79), UINT8_C(167), UINT8_C(166), UINT8_C( 89), UINT8_C( 21), UINT8_C(138), UINT8_C( 15), UINT8_C(105), UINT8_C(191), UINT8_C(205), UINT8_C( 34), UINT8_C(122), UINT8_C(209), UINT8_C(143), UINT8_C(216), UINT8_C(162), UINT8_C(239), UINT8_C(190), UINT8_C(212), UINT8_C(246), UINT8_C(251), UINT8_C(206), UINT8_C( 61), UINT8_C( 80), UINT8_C(151), UINT8_C( 93), UINT8_C( 56), UINT8_C(135), UINT8_C(169), UINT8_C( 8), UINT8_C(191), UINT8_C( 66), UINT8_C( 60), UINT8_C( 24), UINT8_C( 93), UINT8_C(156), UINT8_C( 32), UINT8_C( 27), UINT8_C( 48), UINT8_C(210), UINT8_C(231), UINT8_C( 10)), simde_x_mm512_set_epu8(UINT8_C( 68), UINT8_C( 74), UINT8_C(143), UINT8_C(122), UINT8_C(130), UINT8_C( 60), UINT8_C(185), UINT8_C(163), UINT8_C( 34), UINT8_C(148), UINT8_C(178), UINT8_C(145), UINT8_C(160), UINT8_C(186), UINT8_C( 65), UINT8_C(105), UINT8_C( 76), UINT8_C( 80), UINT8_C( 79), UINT8_C(183), UINT8_C(194), UINT8_C(157), UINT8_C(150), UINT8_C(172), UINT8_C(133), UINT8_C( 46), UINT8_C( 21), UINT8_C(160), UINT8_C( 11), UINT8_C(141), UINT8_C(158), UINT8_C(157), UINT8_C(134), UINT8_C(182), UINT8_C(107), UINT8_C(126), UINT8_C(145), UINT8_C(201), UINT8_C(207), UINT8_C(107), UINT8_C(128), UINT8_C(175), UINT8_C(228), UINT8_C(123), UINT8_C( 70), UINT8_C( 99), UINT8_C(186), UINT8_C(131), UINT8_C(125), UINT8_C( 73), UINT8_C( 92), UINT8_C(118), UINT8_C(102), UINT8_C(154), UINT8_C( 77), UINT8_C( 24), UINT8_C( 62), UINT8_C(120), UINT8_C(126), UINT8_C( 52), UINT8_C( 50), UINT8_C(121), UINT8_C(127), UINT8_C( 97)) }, { simde_x_mm512_set_epu8(UINT8_C(234), UINT8_C(192), UINT8_C(234), UINT8_C(237), UINT8_C( 5), UINT8_C(121), UINT8_C(180), UINT8_C( 23), UINT8_C( 48), UINT8_C(184), UINT8_C(208), UINT8_C(106), UINT8_C( 94), UINT8_C(238), UINT8_C(124), UINT8_C( 15), UINT8_C(123), UINT8_C(203), UINT8_C( 16), UINT8_C( 14), UINT8_C(131), UINT8_C(218), UINT8_C( 99), UINT8_C(178), UINT8_C(217), UINT8_C(189), UINT8_C(211), UINT8_C(222), UINT8_C( 27), UINT8_C( 25), UINT8_C( 55), UINT8_C(187), UINT8_C( 24), UINT8_C(250), UINT8_C( 44), UINT8_C(207), UINT8_C(143), UINT8_C(198), UINT8_C(115), UINT8_C( 4), UINT8_C( 18), UINT8_C( 78), UINT8_C( 97), UINT8_C( 62), UINT8_C( 38), UINT8_C(184), UINT8_C(216), UINT8_C(124), UINT8_C(155), UINT8_C( 45), UINT8_C( 66), UINT8_C( 36), UINT8_C( 7), UINT8_C( 22), UINT8_C(106), UINT8_C( 14), UINT8_C( 55), UINT8_C( 9), UINT8_C(104), UINT8_C(136), UINT8_C(167), UINT8_C(129), UINT8_C( 7), UINT8_C( 98)), simde_x_mm512_set_epu8(UINT8_C( 4), UINT8_C( 28), UINT8_C(178), UINT8_C(230), UINT8_C( 8), UINT8_C( 80), UINT8_C(138), UINT8_C(121), UINT8_C(141), UINT8_C(178), UINT8_C(150), UINT8_C( 72), UINT8_C(121), UINT8_C( 20), UINT8_C( 28), UINT8_C(243), UINT8_C(255), UINT8_C(153), UINT8_C( 78), UINT8_C(119), UINT8_C( 23), UINT8_C(182), UINT8_C(133), UINT8_C(168), UINT8_C(148), UINT8_C(129), UINT8_C(203), UINT8_C(206), UINT8_C( 25), UINT8_C(232), UINT8_C( 71), UINT8_C( 92), UINT8_C( 90), UINT8_C(159), UINT8_C(196), UINT8_C( 86), UINT8_C(163), UINT8_C(105), UINT8_C(169), UINT8_C(248), UINT8_C(169), UINT8_C( 85), UINT8_C(177), UINT8_C(146), UINT8_C( 77), UINT8_C( 44), UINT8_C(150), UINT8_C( 64), UINT8_C(145), UINT8_C( 33), UINT8_C(163), UINT8_C(255), UINT8_C( 33), UINT8_C( 86), UINT8_C( 3), UINT8_C(162), UINT8_C(241), UINT8_C(203), UINT8_C( 13), UINT8_C( 95), UINT8_C(247), UINT8_C( 54), UINT8_C( 73), UINT8_C(154)), simde_x_mm512_set_epu8(UINT8_C(119), UINT8_C(110), UINT8_C(206), UINT8_C(234), UINT8_C( 7), UINT8_C(101), UINT8_C(159), UINT8_C( 72), UINT8_C( 95), UINT8_C(181), UINT8_C(179), UINT8_C( 89), UINT8_C(108), UINT8_C(129), UINT8_C( 76), UINT8_C(129), UINT8_C(189), UINT8_C(178), UINT8_C( 47), UINT8_C( 67), UINT8_C( 77), UINT8_C(200), UINT8_C(116), UINT8_C(173), UINT8_C(183), UINT8_C(159), UINT8_C(207), UINT8_C(214), UINT8_C( 26), UINT8_C(129), UINT8_C( 63), UINT8_C(140), UINT8_C( 57), UINT8_C(205), UINT8_C(120), UINT8_C(147), UINT8_C(153), UINT8_C(152), UINT8_C(142), UINT8_C(126), UINT8_C( 94), UINT8_C( 82), UINT8_C(137), UINT8_C(104), UINT8_C( 58), UINT8_C(114), UINT8_C(183), UINT8_C( 94), UINT8_C(150), UINT8_C( 39), UINT8_C(115), UINT8_C(146), UINT8_C( 20), UINT8_C( 54), UINT8_C( 55), UINT8_C( 88), UINT8_C(148), UINT8_C(106), UINT8_C( 59), UINT8_C(116), UINT8_C(207), UINT8_C( 92), UINT8_C( 40), UINT8_C(126)) }, { simde_x_mm512_set_epu8(UINT8_C( 83), UINT8_C(142), UINT8_C( 42), UINT8_C(220), UINT8_C( 37), UINT8_C( 62), UINT8_C( 98), UINT8_C(238), UINT8_C(153), UINT8_C( 28), UINT8_C(206), UINT8_C(122), UINT8_C( 85), UINT8_C(118), UINT8_C(175), UINT8_C(149), UINT8_C(220), UINT8_C( 77), UINT8_C( 29), UINT8_C( 65), UINT8_C(199), UINT8_C(150), UINT8_C( 85), UINT8_C(205), UINT8_C(179), UINT8_C(171), UINT8_C( 47), UINT8_C( 79), UINT8_C(186), UINT8_C(189), UINT8_C( 5), UINT8_C( 88), UINT8_C(186), UINT8_C(241), UINT8_C(203), UINT8_C(126), UINT8_C(116), UINT8_C( 53), UINT8_C( 43), UINT8_C(208), UINT8_C(141), UINT8_C(174), UINT8_C(168), UINT8_C( 33), UINT8_C(144), UINT8_C(236), UINT8_C( 92), UINT8_C(185), UINT8_C(194), UINT8_C( 20), UINT8_C( 6), UINT8_C( 82), UINT8_C( 17), UINT8_C(156), UINT8_C(193), UINT8_C( 6), UINT8_C( 65), UINT8_C( 12), UINT8_C( 82), UINT8_C(186), UINT8_C( 9), UINT8_C( 38), UINT8_C(161), UINT8_C(250)), simde_x_mm512_set_epu8(UINT8_C( 7), UINT8_C(237), UINT8_C(219), UINT8_C(171), UINT8_C(236), UINT8_C(104), UINT8_C( 84), UINT8_C( 1), UINT8_C(226), UINT8_C( 80), UINT8_C( 28), UINT8_C(225), UINT8_C(190), UINT8_C(201), UINT8_C(130), UINT8_C(143), UINT8_C( 85), UINT8_C(214), UINT8_C(162), UINT8_C(248), UINT8_C(236), UINT8_C(124), UINT8_C(162), UINT8_C(253), UINT8_C(116), UINT8_C(105), UINT8_C(252), UINT8_C(254), UINT8_C( 40), UINT8_C( 36), UINT8_C( 40), UINT8_C(179), UINT8_C(113), UINT8_C(185), UINT8_C( 37), UINT8_C(245), UINT8_C( 41), UINT8_C( 6), UINT8_C(210), UINT8_C( 50), UINT8_C( 82), UINT8_C(223), UINT8_C(202), UINT8_C( 34), UINT8_C(124), UINT8_C( 12), UINT8_C(156), UINT8_C(107), UINT8_C( 75), UINT8_C(221), UINT8_C(108), UINT8_C(193), UINT8_C( 4), UINT8_C(112), UINT8_C(113), UINT8_C(115), UINT8_C(250), UINT8_C( 35), UINT8_C(161), UINT8_C( 10), UINT8_C(151), UINT8_C(213), UINT8_C(128), UINT8_C( 1)), simde_x_mm512_set_epu8(UINT8_C( 45), UINT8_C(190), UINT8_C(131), UINT8_C(196), UINT8_C(137), UINT8_C( 83), UINT8_C( 91), UINT8_C(120), UINT8_C(190), UINT8_C( 54), UINT8_C(117), UINT8_C(174), UINT8_C(138), UINT8_C(160), UINT8_C(153), UINT8_C(146), UINT8_C(153), UINT8_C(146), UINT8_C( 96), UINT8_C(157), UINT8_C(218), UINT8_C(137), UINT8_C(124), UINT8_C(229), UINT8_C(148), UINT8_C(138), UINT8_C(150), UINT8_C(167), UINT8_C(113), UINT8_C(113), UINT8_C( 23), UINT8_C(134), UINT8_C(150), UINT8_C(213), UINT8_C(120), UINT8_C(186), UINT8_C( 79), UINT8_C( 30), UINT8_C(127), UINT8_C(129), UINT8_C(112), UINT8_C(199), UINT8_C(185), UINT8_C( 34), UINT8_C(134), UINT8_C(124), UINT8_C(124), UINT8_C(146), UINT8_C(135), UINT8_C(121), UINT8_C( 57), UINT8_C(138), UINT8_C( 11), UINT8_C(134), UINT8_C(153), UINT8_C( 61), UINT8_C(158), UINT8_C( 24), UINT8_C(122), UINT8_C( 98), UINT8_C( 80), UINT8_C(126), UINT8_C(145), UINT8_C(126)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_avg_epu8(test_vec[i].a, test_vec[i].b); simde_assert_m512i_u8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_avg_epu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask64 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_x_mm512_set_epu8(UINT8_C(117), UINT8_C(152), UINT8_C(221), UINT8_C(216), UINT8_C(255), UINT8_C( 43), UINT8_C( 10), UINT8_C(211), UINT8_C(214), UINT8_C( 80), UINT8_C(187), UINT8_C(241), UINT8_C(242), UINT8_C(134), UINT8_C( 60), UINT8_C( 93), UINT8_C( 23), UINT8_C(122), UINT8_C( 10), UINT8_C(108), UINT8_C(232), UINT8_C(191), UINT8_C(217), UINT8_C(158), UINT8_C(199), UINT8_C(250), UINT8_C( 81), UINT8_C(211), UINT8_C(229), UINT8_C( 28), UINT8_C(171), UINT8_C( 15), UINT8_C(138), UINT8_C( 52), UINT8_C( 10), UINT8_C(140), UINT8_C( 26), UINT8_C(213), UINT8_C(218), UINT8_C(229), UINT8_C( 66), UINT8_C(204), UINT8_C( 5), UINT8_C(255), UINT8_C(228), UINT8_C( 3), UINT8_C(123), UINT8_C(140), UINT8_C(222), UINT8_C(224), UINT8_C( 98), UINT8_C(103), UINT8_C(237), UINT8_C(138), UINT8_C(179), UINT8_C(224), UINT8_C( 60), UINT8_C(176), UINT8_C( 22), UINT8_C(230), UINT8_C( 60), UINT8_C(244), UINT8_C(191), UINT8_C( 88)), UINT64_C( 2117573942), simde_x_mm512_set_epu8(UINT8_C( 32), UINT8_C( 22), UINT8_C( 88), UINT8_C(222), UINT8_C( 12), UINT8_C( 90), UINT8_C(155), UINT8_C(252), UINT8_C(242), UINT8_C( 42), UINT8_C(169), UINT8_C(105), UINT8_C( 22), UINT8_C( 34), UINT8_C(113), UINT8_C(184), UINT8_C(216), UINT8_C(186), UINT8_C(232), UINT8_C(159), UINT8_C(188), UINT8_C(250), UINT8_C( 98), UINT8_C(132), UINT8_C(221), UINT8_C( 11), UINT8_C(138), UINT8_C(207), UINT8_C(214), UINT8_C( 24), UINT8_C(222), UINT8_C( 73), UINT8_C(253), UINT8_C(184), UINT8_C(153), UINT8_C( 26), UINT8_C(220), UINT8_C(147), UINT8_C( 37), UINT8_C( 50), UINT8_C( 26), UINT8_C( 78), UINT8_C( 33), UINT8_C( 67), UINT8_C(248), UINT8_C(190), UINT8_C( 29), UINT8_C( 31), UINT8_C( 34), UINT8_C( 40), UINT8_C(189), UINT8_C( 86), UINT8_C( 38), UINT8_C(128), UINT8_C(150), UINT8_C(241), UINT8_C(100), UINT8_C( 53), UINT8_C( 42), UINT8_C( 55), UINT8_C( 87), UINT8_C(241), UINT8_C(251), UINT8_C(171)), simde_x_mm512_set_epu8(UINT8_C(229), UINT8_C(148), UINT8_C(139), UINT8_C(168), UINT8_C(149), UINT8_C( 53), UINT8_C(240), UINT8_C(255), UINT8_C(164), UINT8_C(137), UINT8_C( 17), UINT8_C(134), UINT8_C( 22), UINT8_C(243), UINT8_C( 7), UINT8_C(130), UINT8_C(232), UINT8_C(205), UINT8_C(227), UINT8_C(142), UINT8_C(100), UINT8_C(203), UINT8_C( 0), UINT8_C(144), UINT8_C(176), UINT8_C( 89), UINT8_C( 91), UINT8_C( 1), UINT8_C(102), UINT8_C(254), UINT8_C(189), UINT8_C(168), UINT8_C(251), UINT8_C(171), UINT8_C( 24), UINT8_C( 13), UINT8_C( 67), UINT8_C( 49), UINT8_C( 20), UINT8_C(185), UINT8_C(232), UINT8_C( 19), UINT8_C(238), UINT8_C( 58), UINT8_C(109), UINT8_C(140), UINT8_C( 95), UINT8_C( 71), UINT8_C( 47), UINT8_C(118), UINT8_C(241), UINT8_C(225), UINT8_C(186), UINT8_C(175), UINT8_C( 45), UINT8_C( 88), UINT8_C(164), UINT8_C( 95), UINT8_C(253), UINT8_C(227), UINT8_C( 20), UINT8_C(170), UINT8_C(251), UINT8_C( 57)), simde_x_mm512_set_epu8(UINT8_C(117), UINT8_C(152), UINT8_C(221), UINT8_C(216), UINT8_C(255), UINT8_C( 43), UINT8_C( 10), UINT8_C(211), UINT8_C(214), UINT8_C( 80), UINT8_C(187), UINT8_C(241), UINT8_C(242), UINT8_C(134), UINT8_C( 60), UINT8_C( 93), UINT8_C( 23), UINT8_C(122), UINT8_C( 10), UINT8_C(108), UINT8_C(232), UINT8_C(191), UINT8_C(217), UINT8_C(158), UINT8_C(199), UINT8_C(250), UINT8_C( 81), UINT8_C(211), UINT8_C(229), UINT8_C( 28), UINT8_C(171), UINT8_C( 15), UINT8_C(138), UINT8_C(178), UINT8_C( 89), UINT8_C( 20), UINT8_C(144), UINT8_C( 98), UINT8_C( 29), UINT8_C(229), UINT8_C( 66), UINT8_C(204), UINT8_C(136), UINT8_C( 63), UINT8_C(228), UINT8_C(165), UINT8_C( 62), UINT8_C( 51), UINT8_C( 41), UINT8_C(224), UINT8_C( 98), UINT8_C(156), UINT8_C(112), UINT8_C(152), UINT8_C(179), UINT8_C(165), UINT8_C( 60), UINT8_C(176), UINT8_C(148), UINT8_C(141), UINT8_C( 60), UINT8_C(206), UINT8_C(251), UINT8_C( 88)) }, { simde_x_mm512_set_epu8(UINT8_C(248), UINT8_C(153), UINT8_C(124), UINT8_C(192), UINT8_C(251), UINT8_C( 73), UINT8_C( 83), UINT8_C(149), UINT8_C(192), UINT8_C(225), UINT8_C( 11), UINT8_C( 45), UINT8_C(242), UINT8_C(146), UINT8_C(100), UINT8_C(250), UINT8_C(206), UINT8_C(133), UINT8_C(162), UINT8_C( 12), UINT8_C(227), UINT8_C(156), UINT8_C( 97), UINT8_C(141), UINT8_C(103), UINT8_C(177), UINT8_C(102), UINT8_C(250), UINT8_C(236), UINT8_C(151), UINT8_C(250), UINT8_C( 69), UINT8_C( 19), UINT8_C(102), UINT8_C(130), UINT8_C(239), UINT8_C( 26), UINT8_C(151), UINT8_C( 91), UINT8_C(218), UINT8_C(106), UINT8_C( 8), UINT8_C( 85), UINT8_C(190), UINT8_C( 40), UINT8_C(207), UINT8_C( 10), UINT8_C( 15), UINT8_C( 30), UINT8_C( 97), UINT8_C(208), UINT8_C( 26), UINT8_C( 77), UINT8_C(104), UINT8_C(138), UINT8_C( 49), UINT8_C(222), UINT8_C(133), UINT8_C( 22), UINT8_C(114), UINT8_C(226), UINT8_C( 75), UINT8_C( 2), UINT8_C(236)), UINT64_C( 474091127), simde_x_mm512_set_epu8(UINT8_C( 64), UINT8_C( 45), UINT8_C(186), UINT8_C( 94), UINT8_C(127), UINT8_C(186), UINT8_C(127), UINT8_C(178), UINT8_C(198), UINT8_C( 92), UINT8_C(231), UINT8_C(248), UINT8_C( 21), UINT8_C( 89), UINT8_C( 8), UINT8_C( 1), UINT8_C( 85), UINT8_C( 5), UINT8_C(111), UINT8_C(109), UINT8_C( 6), UINT8_C(229), UINT8_C( 18), UINT8_C( 62), UINT8_C(249), UINT8_C(126), UINT8_C(234), UINT8_C(220), UINT8_C(246), UINT8_C(255), UINT8_C( 1), UINT8_C(115), UINT8_C( 87), UINT8_C( 93), UINT8_C(185), UINT8_C(156), UINT8_C(164), UINT8_C(103), UINT8_C(237), UINT8_C(252), UINT8_C(126), UINT8_C(112), UINT8_C(184), UINT8_C( 45), UINT8_C( 61), UINT8_C(246), UINT8_C( 68), UINT8_C(163), UINT8_C( 5), UINT8_C(127), UINT8_C(109), UINT8_C(194), UINT8_C(167), UINT8_C(139), UINT8_C(130), UINT8_C( 52), UINT8_C(248), UINT8_C(164), UINT8_C(233), UINT8_C(208), UINT8_C(104), UINT8_C(136), UINT8_C(254), UINT8_C(148)), simde_x_mm512_set_epu8(UINT8_C( 10), UINT8_C(152), UINT8_C(249), UINT8_C(150), UINT8_C(250), UINT8_C( 9), UINT8_C(168), UINT8_C( 52), UINT8_C( 69), UINT8_C( 91), UINT8_C(134), UINT8_C( 83), UINT8_C( 54), UINT8_C(214), UINT8_C( 9), UINT8_C(100), UINT8_C( 84), UINT8_C( 66), UINT8_C( 99), UINT8_C(199), UINT8_C( 20), UINT8_C(200), UINT8_C(215), UINT8_C( 34), UINT8_C( 96), UINT8_C(125), UINT8_C( 40), UINT8_C(246), UINT8_C( 37), UINT8_C(202), UINT8_C(215), UINT8_C(111), UINT8_C(239), UINT8_C( 73), UINT8_C( 10), UINT8_C( 78), UINT8_C(192), UINT8_C( 57), UINT8_C( 95), UINT8_C( 52), UINT8_C(133), UINT8_C(102), UINT8_C(165), UINT8_C(231), UINT8_C(182), UINT8_C( 23), UINT8_C(129), UINT8_C(213), UINT8_C(123), UINT8_C(235), UINT8_C(187), UINT8_C( 72), UINT8_C(170), UINT8_C( 39), UINT8_C(204), UINT8_C( 88), UINT8_C( 48), UINT8_C( 73), UINT8_C(134), UINT8_C( 45), UINT8_C(127), UINT8_C( 12), UINT8_C( 19), UINT8_C(132)), simde_x_mm512_set_epu8(UINT8_C(248), UINT8_C(153), UINT8_C(124), UINT8_C(192), UINT8_C(251), UINT8_C( 73), UINT8_C( 83), UINT8_C(149), UINT8_C(192), UINT8_C(225), UINT8_C( 11), UINT8_C( 45), UINT8_C(242), UINT8_C(146), UINT8_C(100), UINT8_C(250), UINT8_C(206), UINT8_C(133), UINT8_C(162), UINT8_C( 12), UINT8_C(227), UINT8_C(156), UINT8_C( 97), UINT8_C(141), UINT8_C(103), UINT8_C(177), UINT8_C(102), UINT8_C(250), UINT8_C(236), UINT8_C(151), UINT8_C(250), UINT8_C( 69), UINT8_C( 19), UINT8_C(102), UINT8_C(130), UINT8_C(117), UINT8_C(178), UINT8_C( 80), UINT8_C( 91), UINT8_C(218), UINT8_C(106), UINT8_C(107), UINT8_C( 85), UINT8_C(190), UINT8_C( 40), UINT8_C(207), UINT8_C( 99), UINT8_C( 15), UINT8_C( 30), UINT8_C( 97), UINT8_C(208), UINT8_C( 26), UINT8_C(169), UINT8_C( 89), UINT8_C(167), UINT8_C( 49), UINT8_C(222), UINT8_C(119), UINT8_C(184), UINT8_C(127), UINT8_C(226), UINT8_C( 74), UINT8_C(137), UINT8_C(140)) }, { simde_x_mm512_set_epu8(UINT8_C(221), UINT8_C(100), UINT8_C(182), UINT8_C(149), UINT8_C( 44), UINT8_C(198), UINT8_C( 20), UINT8_C( 23), UINT8_C(105), UINT8_C(188), UINT8_C(118), UINT8_C(243), UINT8_C(175), UINT8_C( 41), UINT8_C(183), UINT8_C(141), UINT8_C(145), UINT8_C( 21), UINT8_C( 99), UINT8_C(117), UINT8_C(242), UINT8_C(144), UINT8_C( 71), UINT8_C( 21), UINT8_C(142), UINT8_C(181), UINT8_C( 66), UINT8_C(137), UINT8_C(194), UINT8_C(226), UINT8_C( 86), UINT8_C(128), UINT8_C(109), UINT8_C( 15), UINT8_C(187), UINT8_C( 22), UINT8_C(243), UINT8_C( 38), UINT8_C(163), UINT8_C(215), UINT8_C( 96), UINT8_C( 79), UINT8_C(232), UINT8_C(216), UINT8_C( 90), UINT8_C( 31), UINT8_C(221), UINT8_C( 22), UINT8_C(144), UINT8_C(219), UINT8_C( 29), UINT8_C( 29), UINT8_C( 7), UINT8_C( 8), UINT8_C(106), UINT8_C(210), UINT8_C( 85), UINT8_C(118), UINT8_C(120), UINT8_C(208), UINT8_C(112), UINT8_C( 80), UINT8_C(173), UINT8_C( 55)), UINT64_C( 66155005), simde_x_mm512_set_epu8(UINT8_C( 90), UINT8_C(190), UINT8_C(204), UINT8_C( 44), UINT8_C(145), UINT8_C( 10), UINT8_C(145), UINT8_C( 20), UINT8_C(245), UINT8_C(128), UINT8_C(239), UINT8_C(216), UINT8_C(215), UINT8_C( 0), UINT8_C(241), UINT8_C(105), UINT8_C( 81), UINT8_C( 3), UINT8_C( 23), UINT8_C(107), UINT8_C(238), UINT8_C( 80), UINT8_C(150), UINT8_C( 52), UINT8_C( 80), UINT8_C(120), UINT8_C( 83), UINT8_C(139), UINT8_C( 84), UINT8_C(178), UINT8_C( 47), UINT8_C(223), UINT8_C(103), UINT8_C( 66), UINT8_C( 79), UINT8_C( 53), UINT8_C(211), UINT8_C( 20), UINT8_C(111), UINT8_C(197), UINT8_C(238), UINT8_C( 30), UINT8_C( 70), UINT8_C(231), UINT8_C(199), UINT8_C( 18), UINT8_C(252), UINT8_C(101), UINT8_C( 75), UINT8_C( 12), UINT8_C( 85), UINT8_C( 93), UINT8_C(177), UINT8_C(243), UINT8_C( 43), UINT8_C( 45), UINT8_C( 40), UINT8_C(175), UINT8_C( 99), UINT8_C(172), UINT8_C( 7), UINT8_C(156), UINT8_C(188), UINT8_C(130)), simde_x_mm512_set_epu8(UINT8_C( 42), UINT8_C( 32), UINT8_C(165), UINT8_C(130), UINT8_C(119), UINT8_C( 88), UINT8_C(110), UINT8_C( 93), UINT8_C( 75), UINT8_C(207), UINT8_C(193), UINT8_C(214), UINT8_C( 54), UINT8_C(185), UINT8_C( 87), UINT8_C(255), UINT8_C(231), UINT8_C(196), UINT8_C(102), UINT8_C(158), UINT8_C(161), UINT8_C(222), UINT8_C(210), UINT8_C( 94), UINT8_C(118), UINT8_C(127), UINT8_C(194), UINT8_C(186), UINT8_C( 80), UINT8_C(125), UINT8_C(244), UINT8_C( 33), UINT8_C(110), UINT8_C(247), UINT8_C(227), UINT8_C(141), UINT8_C(139), UINT8_C( 52), UINT8_C(130), UINT8_C(241), UINT8_C(138), UINT8_C(133), UINT8_C(240), UINT8_C( 72), UINT8_C( 84), UINT8_C( 54), UINT8_C( 76), UINT8_C(208), UINT8_C(177), UINT8_C(100), UINT8_C(198), UINT8_C( 30), UINT8_C( 35), UINT8_C( 68), UINT8_C(216), UINT8_C( 8), UINT8_C( 68), UINT8_C(120), UINT8_C(187), UINT8_C(206), UINT8_C(102), UINT8_C(133), UINT8_C( 95), UINT8_C(110)), simde_x_mm512_set_epu8(UINT8_C(221), UINT8_C(100), UINT8_C(182), UINT8_C(149), UINT8_C( 44), UINT8_C(198), UINT8_C( 20), UINT8_C( 23), UINT8_C(105), UINT8_C(188), UINT8_C(118), UINT8_C(243), UINT8_C(175), UINT8_C( 41), UINT8_C(183), UINT8_C(141), UINT8_C(145), UINT8_C( 21), UINT8_C( 99), UINT8_C(117), UINT8_C(242), UINT8_C(144), UINT8_C( 71), UINT8_C( 21), UINT8_C(142), UINT8_C(181), UINT8_C( 66), UINT8_C(137), UINT8_C(194), UINT8_C(226), UINT8_C( 86), UINT8_C(128), UINT8_C(109), UINT8_C( 15), UINT8_C(187), UINT8_C( 22), UINT8_C(243), UINT8_C( 38), UINT8_C(121), UINT8_C(219), UINT8_C(188), UINT8_C( 82), UINT8_C(155), UINT8_C(152), UINT8_C( 90), UINT8_C( 31), UINT8_C(221), UINT8_C(155), UINT8_C(144), UINT8_C( 56), UINT8_C(142), UINT8_C( 62), UINT8_C( 7), UINT8_C( 8), UINT8_C(106), UINT8_C( 27), UINT8_C( 54), UINT8_C(148), UINT8_C(143), UINT8_C(189), UINT8_C( 55), UINT8_C(145), UINT8_C(173), UINT8_C(120)) }, { simde_x_mm512_set_epu8(UINT8_C(197), UINT8_C(175), UINT8_C(198), UINT8_C( 71), UINT8_C(137), UINT8_C(191), UINT8_C(178), UINT8_C(155), UINT8_C(242), UINT8_C( 4), UINT8_C(232), UINT8_C(161), UINT8_C(106), UINT8_C( 31), UINT8_C(104), UINT8_C( 20), UINT8_C( 65), UINT8_C(248), UINT8_C(181), UINT8_C(128), UINT8_C(175), UINT8_C( 68), UINT8_C(170), UINT8_C( 98), UINT8_C(201), UINT8_C( 10), UINT8_C( 75), UINT8_C( 51), UINT8_C(199), UINT8_C(145), UINT8_C( 87), UINT8_C( 47), UINT8_C(235), UINT8_C(105), UINT8_C( 17), UINT8_C(107), UINT8_C(137), UINT8_C(238), UINT8_C(133), UINT8_C( 81), UINT8_C( 54), UINT8_C(134), UINT8_C(173), UINT8_C( 81), UINT8_C( 21), UINT8_C( 13), UINT8_C( 6), UINT8_C(200), UINT8_C(140), UINT8_C(177), UINT8_C(141), UINT8_C( 81), UINT8_C( 31), UINT8_C( 29), UINT8_C(140), UINT8_C(138), UINT8_C(175), UINT8_C( 98), UINT8_C( 23), UINT8_C(148), UINT8_C(130), UINT8_C( 95), UINT8_C(212), UINT8_C(200)), UINT64_C( 518008675), simde_x_mm512_set_epu8(UINT8_C(155), UINT8_C(133), UINT8_C( 64), UINT8_C(186), UINT8_C(210), UINT8_C(255), UINT8_C( 70), UINT8_C(210), UINT8_C( 96), UINT8_C( 45), UINT8_C( 57), UINT8_C(248), UINT8_C( 23), UINT8_C( 34), UINT8_C(240), UINT8_C(208), UINT8_C( 74), UINT8_C( 85), UINT8_C(150), UINT8_C( 98), UINT8_C( 81), UINT8_C(149), UINT8_C(213), UINT8_C( 64), UINT8_C(146), UINT8_C(124), UINT8_C(134), UINT8_C(133), UINT8_C( 20), UINT8_C(122), UINT8_C( 57), UINT8_C(241), UINT8_C( 58), UINT8_C( 90), UINT8_C(153), UINT8_C( 57), UINT8_C( 51), UINT8_C(138), UINT8_C( 37), UINT8_C(177), UINT8_C( 13), UINT8_C(116), UINT8_C(177), UINT8_C(238), UINT8_C(169), UINT8_C(177), UINT8_C(173), UINT8_C(231), UINT8_C(226), UINT8_C(216), UINT8_C(126), UINT8_C( 80), UINT8_C(182), UINT8_C( 71), UINT8_C(188), UINT8_C( 53), UINT8_C(255), UINT8_C(200), UINT8_C(176), UINT8_C( 17), UINT8_C(127), UINT8_C( 83), UINT8_C(247), UINT8_C( 0)), simde_x_mm512_set_epu8(UINT8_C(226), UINT8_C( 1), UINT8_C(238), UINT8_C(252), UINT8_C( 39), UINT8_C( 85), UINT8_C( 69), UINT8_C( 68), UINT8_C(143), UINT8_C(218), UINT8_C( 28), UINT8_C( 83), UINT8_C(225), UINT8_C( 61), UINT8_C( 37), UINT8_C( 67), UINT8_C( 46), UINT8_C(213), UINT8_C( 32), UINT8_C(183), UINT8_C(230), UINT8_C( 2), UINT8_C(250), UINT8_C(122), UINT8_C(205), UINT8_C(118), UINT8_C( 3), UINT8_C( 17), UINT8_C( 32), UINT8_C( 82), UINT8_C( 40), UINT8_C( 0), UINT8_C( 28), UINT8_C( 37), UINT8_C(253), UINT8_C(171), UINT8_C(164), UINT8_C( 45), UINT8_C(233), UINT8_C(198), UINT8_C(148), UINT8_C( 44), UINT8_C( 28), UINT8_C( 77), UINT8_C( 12), UINT8_C( 81), UINT8_C(153), UINT8_C( 7), UINT8_C(234), UINT8_C( 32), UINT8_C(228), UINT8_C(142), UINT8_C(168), UINT8_C( 42), UINT8_C( 92), UINT8_C( 70), UINT8_C(144), UINT8_C(203), UINT8_C(149), UINT8_C( 41), UINT8_C(206), UINT8_C(198), UINT8_C( 56), UINT8_C( 54)), simde_x_mm512_set_epu8(UINT8_C(197), UINT8_C(175), UINT8_C(198), UINT8_C( 71), UINT8_C(137), UINT8_C(191), UINT8_C(178), UINT8_C(155), UINT8_C(242), UINT8_C( 4), UINT8_C(232), UINT8_C(161), UINT8_C(106), UINT8_C( 31), UINT8_C(104), UINT8_C( 20), UINT8_C( 65), UINT8_C(248), UINT8_C(181), UINT8_C(128), UINT8_C(175), UINT8_C( 68), UINT8_C(170), UINT8_C( 98), UINT8_C(201), UINT8_C( 10), UINT8_C( 75), UINT8_C( 51), UINT8_C(199), UINT8_C(145), UINT8_C( 87), UINT8_C( 47), UINT8_C(235), UINT8_C(105), UINT8_C( 17), UINT8_C(114), UINT8_C(108), UINT8_C( 92), UINT8_C(135), UINT8_C( 81), UINT8_C( 81), UINT8_C( 80), UINT8_C(103), UINT8_C( 81), UINT8_C( 21), UINT8_C( 13), UINT8_C( 6), UINT8_C(200), UINT8_C(140), UINT8_C(177), UINT8_C(177), UINT8_C( 81), UINT8_C(175), UINT8_C( 57), UINT8_C(140), UINT8_C( 62), UINT8_C(175), UINT8_C(202), UINT8_C(163), UINT8_C(148), UINT8_C(130), UINT8_C( 95), UINT8_C(152), UINT8_C( 27)) }, { simde_x_mm512_set_epu8(UINT8_C(121), UINT8_C( 9), UINT8_C(193), UINT8_C(106), UINT8_C( 93), UINT8_C( 44), UINT8_C( 0), UINT8_C(223), UINT8_C(203), UINT8_C(101), UINT8_C( 76), UINT8_C( 37), UINT8_C( 94), UINT8_C(224), UINT8_C(152), UINT8_C(236), UINT8_C(208), UINT8_C( 45), UINT8_C( 88), UINT8_C(163), UINT8_C(104), UINT8_C( 42), UINT8_C(157), UINT8_C( 59), UINT8_C( 90), UINT8_C(187), UINT8_C(107), UINT8_C( 16), UINT8_C(138), UINT8_C(137), UINT8_C(196), UINT8_C( 51), UINT8_C(126), UINT8_C(178), UINT8_C(114), UINT8_C(181), UINT8_C(181), UINT8_C( 19), UINT8_C(113), UINT8_C( 84), UINT8_C( 47), UINT8_C(173), UINT8_C(230), UINT8_C(218), UINT8_C( 64), UINT8_C(150), UINT8_C(107), UINT8_C( 56), UINT8_C(130), UINT8_C(248), UINT8_C( 35), UINT8_C(112), UINT8_C(178), UINT8_C( 75), UINT8_C(231), UINT8_C( 1), UINT8_C(229), UINT8_C(189), UINT8_C( 49), UINT8_C( 75), UINT8_C(217), UINT8_C(188), UINT8_C(205), UINT8_C( 42)), UINT64_C( 4020943947), simde_x_mm512_set_epu8(UINT8_C(244), UINT8_C(209), UINT8_C(117), UINT8_C( 40), UINT8_C( 42), UINT8_C( 16), UINT8_C(230), UINT8_C(122), UINT8_C(122), UINT8_C(219), UINT8_C(158), UINT8_C(236), UINT8_C( 86), UINT8_C(169), UINT8_C(166), UINT8_C(144), UINT8_C(141), UINT8_C( 79), UINT8_C(123), UINT8_C( 33), UINT8_C(201), UINT8_C(131), UINT8_C(102), UINT8_C( 59), UINT8_C(199), UINT8_C( 19), UINT8_C(252), UINT8_C(201), UINT8_C(170), UINT8_C( 88), UINT8_C(209), UINT8_C( 29), UINT8_C(140), UINT8_C(198), UINT8_C(115), UINT8_C(193), UINT8_C(241), UINT8_C(202), UINT8_C( 84), UINT8_C(255), UINT8_C( 5), UINT8_C(223), UINT8_C(160), UINT8_C( 93), UINT8_C( 97), UINT8_C(124), UINT8_C( 26), UINT8_C(222), UINT8_C(175), UINT8_C(168), UINT8_C( 26), UINT8_C(155), UINT8_C(117), UINT8_C(221), UINT8_C(174), UINT8_C( 92), UINT8_C(115), UINT8_C(243), UINT8_C(104), UINT8_C( 83), UINT8_C( 80), UINT8_C(138), UINT8_C( 34), UINT8_C( 48)), simde_x_mm512_set_epu8(UINT8_C(168), UINT8_C(187), UINT8_C(118), UINT8_C( 36), UINT8_C(110), UINT8_C( 81), UINT8_C(219), UINT8_C( 36), UINT8_C(182), UINT8_C(147), UINT8_C( 47), UINT8_C( 12), UINT8_C(227), UINT8_C(175), UINT8_C( 76), UINT8_C(234), UINT8_C( 91), UINT8_C(125), UINT8_C( 98), UINT8_C( 17), UINT8_C(115), UINT8_C( 58), UINT8_C(149), UINT8_C( 90), UINT8_C(115), UINT8_C(232), UINT8_C( 83), UINT8_C( 17), UINT8_C(245), UINT8_C( 20), UINT8_C( 81), UINT8_C( 54), UINT8_C(197), UINT8_C(112), UINT8_C(154), UINT8_C( 13), UINT8_C( 8), UINT8_C(151), UINT8_C(229), UINT8_C(129), UINT8_C(144), UINT8_C(125), UINT8_C( 21), UINT8_C( 55), UINT8_C( 24), UINT8_C( 58), UINT8_C( 7), UINT8_C(127), UINT8_C(150), UINT8_C(222), UINT8_C(105), UINT8_C(207), UINT8_C(223), UINT8_C(121), UINT8_C( 0), UINT8_C(127), UINT8_C(191), UINT8_C(166), UINT8_C(133), UINT8_C(112), UINT8_C(199), UINT8_C( 77), UINT8_C( 42), UINT8_C( 34)), simde_x_mm512_set_epu8(UINT8_C(121), UINT8_C( 9), UINT8_C(193), UINT8_C(106), UINT8_C( 93), UINT8_C( 44), UINT8_C( 0), UINT8_C(223), UINT8_C(203), UINT8_C(101), UINT8_C( 76), UINT8_C( 37), UINT8_C( 94), UINT8_C(224), UINT8_C(152), UINT8_C(236), UINT8_C(208), UINT8_C( 45), UINT8_C( 88), UINT8_C(163), UINT8_C(104), UINT8_C( 42), UINT8_C(157), UINT8_C( 59), UINT8_C( 90), UINT8_C(187), UINT8_C(107), UINT8_C( 16), UINT8_C(138), UINT8_C(137), UINT8_C(196), UINT8_C( 51), UINT8_C(169), UINT8_C(155), UINT8_C(135), UINT8_C(181), UINT8_C(125), UINT8_C(177), UINT8_C(157), UINT8_C(192), UINT8_C( 75), UINT8_C(173), UINT8_C( 91), UINT8_C(218), UINT8_C( 61), UINT8_C(150), UINT8_C( 17), UINT8_C( 56), UINT8_C(163), UINT8_C(248), UINT8_C( 66), UINT8_C(181), UINT8_C(170), UINT8_C(171), UINT8_C(231), UINT8_C( 1), UINT8_C(229), UINT8_C(205), UINT8_C( 49), UINT8_C( 75), UINT8_C(140), UINT8_C(188), UINT8_C( 38), UINT8_C( 41)) }, { simde_x_mm512_set_epu8(UINT8_C( 84), UINT8_C(222), UINT8_C( 36), UINT8_C(132), UINT8_C( 32), UINT8_C(182), UINT8_C( 73), UINT8_C(182), UINT8_C( 77), UINT8_C(116), UINT8_C( 50), UINT8_C( 82), UINT8_C( 68), UINT8_C( 72), UINT8_C( 23), UINT8_C( 32), UINT8_C(202), UINT8_C( 82), UINT8_C( 53), UINT8_C( 71), UINT8_C( 22), UINT8_C( 92), UINT8_C( 42), UINT8_C(133), UINT8_C(215), UINT8_C( 34), UINT8_C( 75), UINT8_C( 63), UINT8_C(139), UINT8_C( 23), UINT8_C(141), UINT8_C( 66), UINT8_C(166), UINT8_C( 99), UINT8_C(183), UINT8_C(237), UINT8_C(213), UINT8_C(192), UINT8_C(235), UINT8_C( 20), UINT8_C(108), UINT8_C(253), UINT8_C(147), UINT8_C(214), UINT8_C( 44), UINT8_C(210), UINT8_C( 20), UINT8_C( 90), UINT8_C( 2), UINT8_C(252), UINT8_C(108), UINT8_C( 27), UINT8_C(207), UINT8_C( 69), UINT8_C(172), UINT8_C( 82), UINT8_C( 9), UINT8_C( 0), UINT8_C( 42), UINT8_C(118), UINT8_C(253), UINT8_C(189), UINT8_C( 6), UINT8_C( 30)), UINT64_C( 589511266), simde_x_mm512_set_epu8(UINT8_C(159), UINT8_C(199), UINT8_C( 92), UINT8_C(122), UINT8_C( 12), UINT8_C( 6), UINT8_C( 73), UINT8_C(222), UINT8_C( 93), UINT8_C(236), UINT8_C(196), UINT8_C(151), UINT8_C( 37), UINT8_C(206), UINT8_C(135), UINT8_C(234), UINT8_C(134), UINT8_C( 99), UINT8_C(198), UINT8_C( 81), UINT8_C( 12), UINT8_C(102), UINT8_C(195), UINT8_C(222), UINT8_C( 31), UINT8_C( 64), UINT8_C( 80), UINT8_C( 65), UINT8_C(110), UINT8_C(185), UINT8_C( 8), UINT8_C( 98), UINT8_C(113), UINT8_C( 81), UINT8_C(193), UINT8_C(220), UINT8_C(195), UINT8_C(204), UINT8_C( 7), UINT8_C(184), UINT8_C(203), UINT8_C( 3), UINT8_C(246), UINT8_C(115), UINT8_C(106), UINT8_C(122), UINT8_C( 85), UINT8_C(105), UINT8_C( 90), UINT8_C( 75), UINT8_C(186), UINT8_C( 89), UINT8_C( 25), UINT8_C(170), UINT8_C(216), UINT8_C(247), UINT8_C(137), UINT8_C(237), UINT8_C(110), UINT8_C(230), UINT8_C(130), UINT8_C(124), UINT8_C( 6), UINT8_C(245)), simde_x_mm512_set_epu8(UINT8_C( 79), UINT8_C( 23), UINT8_C(207), UINT8_C( 80), UINT8_C(207), UINT8_C( 81), UINT8_C( 7), UINT8_C(130), UINT8_C(181), UINT8_C(246), UINT8_C(189), UINT8_C(163), UINT8_C(120), UINT8_C( 35), UINT8_C(200), UINT8_C( 3), UINT8_C( 24), UINT8_C(158), UINT8_C(243), UINT8_C( 3), UINT8_C(150), UINT8_C(228), UINT8_C(184), UINT8_C(101), UINT8_C(219), UINT8_C(121), UINT8_C( 65), UINT8_C( 30), UINT8_C(249), UINT8_C( 33), UINT8_C(115), UINT8_C(122), UINT8_C(244), UINT8_C(143), UINT8_C(149), UINT8_C( 29), UINT8_C(178), UINT8_C(131), UINT8_C(101), UINT8_C(120), UINT8_C( 82), UINT8_C(219), UINT8_C(186), UINT8_C(193), UINT8_C(234), UINT8_C(139), UINT8_C( 28), UINT8_C( 91), UINT8_C(105), UINT8_C(250), UINT8_C(223), UINT8_C(189), UINT8_C(224), UINT8_C(245), UINT8_C(188), UINT8_C( 51), UINT8_C( 24), UINT8_C( 89), UINT8_C( 22), UINT8_C( 52), UINT8_C(174), UINT8_C(224), UINT8_C( 67), UINT8_C(156)), simde_x_mm512_set_epu8(UINT8_C( 84), UINT8_C(222), UINT8_C( 36), UINT8_C(132), UINT8_C( 32), UINT8_C(182), UINT8_C( 73), UINT8_C(182), UINT8_C( 77), UINT8_C(116), UINT8_C( 50), UINT8_C( 82), UINT8_C( 68), UINT8_C( 72), UINT8_C( 23), UINT8_C( 32), UINT8_C(202), UINT8_C( 82), UINT8_C( 53), UINT8_C( 71), UINT8_C( 22), UINT8_C( 92), UINT8_C( 42), UINT8_C(133), UINT8_C(215), UINT8_C( 34), UINT8_C( 75), UINT8_C( 63), UINT8_C(139), UINT8_C( 23), UINT8_C(141), UINT8_C( 66), UINT8_C(166), UINT8_C( 99), UINT8_C(171), UINT8_C(237), UINT8_C(213), UINT8_C(192), UINT8_C( 54), UINT8_C(152), UINT8_C(108), UINT8_C(253), UINT8_C(216), UINT8_C(214), UINT8_C( 44), UINT8_C(210), UINT8_C( 57), UINT8_C( 98), UINT8_C( 2), UINT8_C(252), UINT8_C(205), UINT8_C(139), UINT8_C(125), UINT8_C( 69), UINT8_C(202), UINT8_C( 82), UINT8_C( 9), UINT8_C(163), UINT8_C( 66), UINT8_C(118), UINT8_C(253), UINT8_C(189), UINT8_C( 37), UINT8_C( 30)) }, { simde_x_mm512_set_epu8(UINT8_C(122), UINT8_C( 31), UINT8_C(165), UINT8_C(180), UINT8_C(160), UINT8_C(238), UINT8_C( 21), UINT8_C( 93), UINT8_C(118), UINT8_C( 87), UINT8_C(251), UINT8_C( 43), UINT8_C(165), UINT8_C(232), UINT8_C(151), UINT8_C(210), UINT8_C(191), UINT8_C( 60), UINT8_C(141), UINT8_C(172), UINT8_C(143), UINT8_C(152), UINT8_C( 64), UINT8_C(105), UINT8_C( 15), UINT8_C(129), UINT8_C(227), UINT8_C( 58), UINT8_C( 92), UINT8_C(145), UINT8_C( 98), UINT8_C(228), UINT8_C(166), UINT8_C( 72), UINT8_C(150), UINT8_C( 59), UINT8_C( 16), UINT8_C( 1), UINT8_C(194), UINT8_C( 73), UINT8_C(230), UINT8_C(150), UINT8_C(242), UINT8_C( 38), UINT8_C(140), UINT8_C(234), UINT8_C( 77), UINT8_C( 54), UINT8_C( 3), UINT8_C(126), UINT8_C(200), UINT8_C(132), UINT8_C(171), UINT8_C(245), UINT8_C(169), UINT8_C(148), UINT8_C(166), UINT8_C( 7), UINT8_C( 6), UINT8_C(227), UINT8_C( 91), UINT8_C(103), UINT8_C( 93), UINT8_C(244)), UINT64_C( 1742408254), simde_x_mm512_set_epu8(UINT8_C(195), UINT8_C(222), UINT8_C( 29), UINT8_C(255), UINT8_C(195), UINT8_C(117), UINT8_C(247), UINT8_C(187), UINT8_C( 69), UINT8_C(188), UINT8_C(174), UINT8_C(241), UINT8_C(175), UINT8_C(167), UINT8_C(226), UINT8_C( 66), UINT8_C(177), UINT8_C(104), UINT8_C(172), UINT8_C(245), UINT8_C( 53), UINT8_C( 17), UINT8_C( 64), UINT8_C( 70), UINT8_C(209), UINT8_C(113), UINT8_C( 86), UINT8_C(118), UINT8_C( 56), UINT8_C( 92), UINT8_C(177), UINT8_C(185), UINT8_C(205), UINT8_C(241), UINT8_C(149), UINT8_C(204), UINT8_C(155), UINT8_C( 35), UINT8_C(114), UINT8_C( 12), UINT8_C(212), UINT8_C( 63), UINT8_C( 66), UINT8_C( 74), UINT8_C( 97), UINT8_C(253), UINT8_C(218), UINT8_C( 22), UINT8_C( 38), UINT8_C( 68), UINT8_C(247), UINT8_C(250), UINT8_C(215), UINT8_C( 66), UINT8_C( 67), UINT8_C( 91), UINT8_C(177), UINT8_C(159), UINT8_C(198), UINT8_C(145), UINT8_C(183), UINT8_C( 26), UINT8_C( 66), UINT8_C(165)), simde_x_mm512_set_epu8(UINT8_C(104), UINT8_C(148), UINT8_C(187), UINT8_C(240), UINT8_C(224), UINT8_C(131), UINT8_C( 1), UINT8_C(106), UINT8_C( 77), UINT8_C(151), UINT8_C(127), UINT8_C(197), UINT8_C(118), UINT8_C( 35), UINT8_C( 55), UINT8_C(144), UINT8_C( 54), UINT8_C(201), UINT8_C( 41), UINT8_C(221), UINT8_C(189), UINT8_C( 99), UINT8_C(112), UINT8_C(181), UINT8_C( 52), UINT8_C(200), UINT8_C(153), UINT8_C(231), UINT8_C(146), UINT8_C(105), UINT8_C(102), UINT8_C(227), UINT8_C(214), UINT8_C(182), UINT8_C( 31), UINT8_C( 41), UINT8_C( 93), UINT8_C(234), UINT8_C( 82), UINT8_C( 71), UINT8_C( 52), UINT8_C(241), UINT8_C(224), UINT8_C( 69), UINT8_C( 73), UINT8_C( 93), UINT8_C(195), UINT8_C( 84), UINT8_C(170), UINT8_C(173), UINT8_C(170), UINT8_C( 88), UINT8_C( 60), UINT8_C(109), UINT8_C( 86), UINT8_C(119), UINT8_C(141), UINT8_C(206), UINT8_C( 51), UINT8_C( 6), UINT8_C( 71), UINT8_C(253), UINT8_C( 22), UINT8_C( 92)), simde_x_mm512_set_epu8(UINT8_C(122), UINT8_C( 31), UINT8_C(165), UINT8_C(180), UINT8_C(160), UINT8_C(238), UINT8_C( 21), UINT8_C( 93), UINT8_C(118), UINT8_C( 87), UINT8_C(251), UINT8_C( 43), UINT8_C(165), UINT8_C(232), UINT8_C(151), UINT8_C(210), UINT8_C(191), UINT8_C( 60), UINT8_C(141), UINT8_C(172), UINT8_C(143), UINT8_C(152), UINT8_C( 64), UINT8_C(105), UINT8_C( 15), UINT8_C(129), UINT8_C(227), UINT8_C( 58), UINT8_C( 92), UINT8_C(145), UINT8_C( 98), UINT8_C(228), UINT8_C(166), UINT8_C(212), UINT8_C( 90), UINT8_C( 59), UINT8_C( 16), UINT8_C(135), UINT8_C( 98), UINT8_C( 42), UINT8_C(132), UINT8_C(152), UINT8_C(242), UINT8_C( 72), UINT8_C( 85), UINT8_C(234), UINT8_C(207), UINT8_C( 53), UINT8_C( 3), UINT8_C(126), UINT8_C(200), UINT8_C(132), UINT8_C(138), UINT8_C(245), UINT8_C( 77), UINT8_C(148), UINT8_C(166), UINT8_C( 7), UINT8_C(125), UINT8_C( 76), UINT8_C(127), UINT8_C(140), UINT8_C( 44), UINT8_C(244)) }, { simde_x_mm512_set_epu8(UINT8_C(204), UINT8_C( 72), UINT8_C(160), UINT8_C( 51), UINT8_C( 17), UINT8_C( 91), UINT8_C( 99), UINT8_C(232), UINT8_C(230), UINT8_C( 75), UINT8_C( 98), UINT8_C(249), UINT8_C(240), UINT8_C(193), UINT8_C( 40), UINT8_C(144), UINT8_C( 13), UINT8_C(232), UINT8_C(120), UINT8_C( 62), UINT8_C(136), UINT8_C( 7), UINT8_C(252), UINT8_C( 3), UINT8_C(225), UINT8_C( 43), UINT8_C(176), UINT8_C(138), UINT8_C(205), UINT8_C( 22), UINT8_C(172), UINT8_C( 52), UINT8_C(224), UINT8_C(163), UINT8_C( 92), UINT8_C(198), UINT8_C(193), UINT8_C( 77), UINT8_C( 2), UINT8_C( 58), UINT8_C(117), UINT8_C(166), UINT8_C( 25), UINT8_C( 65), UINT8_C( 70), UINT8_C( 62), UINT8_C(155), UINT8_C(179), UINT8_C(194), UINT8_C(252), UINT8_C(155), UINT8_C(113), UINT8_C( 69), UINT8_C( 48), UINT8_C( 10), UINT8_C( 99), UINT8_C( 95), UINT8_C(207), UINT8_C( 28), UINT8_C(202), UINT8_C(125), UINT8_C(229), UINT8_C(112), UINT8_C( 77)), UINT64_C( 3364149510), simde_x_mm512_set_epu8(UINT8_C( 11), UINT8_C( 82), UINT8_C( 84), UINT8_C(224), UINT8_C(162), UINT8_C(242), UINT8_C(204), UINT8_C( 68), UINT8_C(182), UINT8_C( 59), UINT8_C( 47), UINT8_C(221), UINT8_C( 34), UINT8_C(151), UINT8_C(204), UINT8_C( 95), UINT8_C(116), UINT8_C( 40), UINT8_C(133), UINT8_C(233), UINT8_C(222), UINT8_C( 50), UINT8_C( 44), UINT8_C( 33), UINT8_C( 29), UINT8_C(204), UINT8_C(121), UINT8_C( 27), UINT8_C(138), UINT8_C(185), UINT8_C( 5), UINT8_C(107), UINT8_C( 3), UINT8_C( 36), UINT8_C( 56), UINT8_C(124), UINT8_C(129), UINT8_C( 58), UINT8_C( 37), UINT8_C(111), UINT8_C(254), UINT8_C(180), UINT8_C( 54), UINT8_C(250), UINT8_C( 42), UINT8_C( 75), UINT8_C(154), UINT8_C(231), UINT8_C(115), UINT8_C(171), UINT8_C(247), UINT8_C(142), UINT8_C( 9), UINT8_C(239), UINT8_C(101), UINT8_C( 47), UINT8_C( 79), UINT8_C(209), UINT8_C(133), UINT8_C(218), UINT8_C(253), UINT8_C( 22), UINT8_C( 28), UINT8_C(216)), simde_x_mm512_set_epu8(UINT8_C( 60), UINT8_C( 92), UINT8_C(115), UINT8_C( 5), UINT8_C(100), UINT8_C( 27), UINT8_C(227), UINT8_C(100), UINT8_C( 89), UINT8_C(205), UINT8_C( 67), UINT8_C( 82), UINT8_C(155), UINT8_C(204), UINT8_C(152), UINT8_C(151), UINT8_C(209), UINT8_C(229), UINT8_C(210), UINT8_C( 14), UINT8_C(231), UINT8_C( 89), UINT8_C(236), UINT8_C(229), UINT8_C(214), UINT8_C( 93), UINT8_C( 51), UINT8_C(119), UINT8_C( 13), UINT8_C( 60), UINT8_C(212), UINT8_C(222), UINT8_C( 67), UINT8_C( 38), UINT8_C( 63), UINT8_C(141), UINT8_C( 12), UINT8_C(159), UINT8_C( 36), UINT8_C(134), UINT8_C( 6), UINT8_C(139), UINT8_C( 90), UINT8_C(132), UINT8_C(140), UINT8_C(228), UINT8_C(152), UINT8_C(216), UINT8_C(137), UINT8_C(105), UINT8_C(237), UINT8_C(202), UINT8_C( 29), UINT8_C(230), UINT8_C( 44), UINT8_C( 5), UINT8_C( 39), UINT8_C( 73), UINT8_C(177), UINT8_C(242), UINT8_C(209), UINT8_C( 96), UINT8_C( 58), UINT8_C(209)), simde_x_mm512_set_epu8(UINT8_C(204), UINT8_C( 72), UINT8_C(160), UINT8_C( 51), UINT8_C( 17), UINT8_C( 91), UINT8_C( 99), UINT8_C(232), UINT8_C(230), UINT8_C( 75), UINT8_C( 98), UINT8_C(249), UINT8_C(240), UINT8_C(193), UINT8_C( 40), UINT8_C(144), UINT8_C( 13), UINT8_C(232), UINT8_C(120), UINT8_C( 62), UINT8_C(136), UINT8_C( 7), UINT8_C(252), UINT8_C( 3), UINT8_C(225), UINT8_C( 43), UINT8_C(176), UINT8_C(138), UINT8_C(205), UINT8_C( 22), UINT8_C(172), UINT8_C( 52), UINT8_C( 35), UINT8_C( 37), UINT8_C( 92), UINT8_C(198), UINT8_C( 71), UINT8_C( 77), UINT8_C( 2), UINT8_C( 58), UINT8_C(130), UINT8_C(166), UINT8_C( 25), UINT8_C( 65), UINT8_C( 70), UINT8_C(152), UINT8_C(155), UINT8_C(179), UINT8_C(126), UINT8_C(138), UINT8_C(155), UINT8_C(172), UINT8_C( 19), UINT8_C( 48), UINT8_C( 10), UINT8_C( 26), UINT8_C( 95), UINT8_C(207), UINT8_C( 28), UINT8_C(202), UINT8_C(125), UINT8_C( 59), UINT8_C( 43), UINT8_C( 77)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_avg_epu8(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_u32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_avg_epu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask64 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { UINT64_C( 1713497089), simde_x_mm512_set_epu8(UINT8_C(151), UINT8_C( 80), UINT8_C(240), UINT8_C(132), UINT8_C(208), UINT8_C( 76), UINT8_C(165), UINT8_C(128), UINT8_C( 54), UINT8_C( 63), UINT8_C(114), UINT8_C(183), UINT8_C(230), UINT8_C(208), UINT8_C(232), UINT8_C(243), UINT8_C( 5), UINT8_C(123), UINT8_C(211), UINT8_C(199), UINT8_C(149), UINT8_C( 47), UINT8_C( 90), UINT8_C(202), UINT8_C( 1), UINT8_C(118), UINT8_C( 37), UINT8_C(249), UINT8_C( 83), UINT8_C( 31), UINT8_C(233), UINT8_C(236), UINT8_C(152), UINT8_C(114), UINT8_C( 63), UINT8_C( 25), UINT8_C(176), UINT8_C( 17), UINT8_C( 37), UINT8_C(212), UINT8_C(144), UINT8_C( 41), UINT8_C(238), UINT8_C( 86), UINT8_C(114), UINT8_C(233), UINT8_C(170), UINT8_C(157), UINT8_C(114), UINT8_C( 25), UINT8_C( 94), UINT8_C( 34), UINT8_C(208), UINT8_C(252), UINT8_C(133), UINT8_C(212), UINT8_C(188), UINT8_C( 19), UINT8_C( 47), UINT8_C(134), UINT8_C(117), UINT8_C( 69), UINT8_C(135), UINT8_C( 66)), simde_x_mm512_set_epu8(UINT8_C(180), UINT8_C(237), UINT8_C(100), UINT8_C( 59), UINT8_C(192), UINT8_C(196), UINT8_C(203), UINT8_C( 16), UINT8_C( 0), UINT8_C(167), UINT8_C( 13), UINT8_C( 17), UINT8_C(116), UINT8_C( 41), UINT8_C( 54), UINT8_C(248), UINT8_C(144), UINT8_C(109), UINT8_C( 94), UINT8_C( 19), UINT8_C( 46), UINT8_C(201), UINT8_C(103), UINT8_C( 7), UINT8_C(241), UINT8_C(244), UINT8_C(234), UINT8_C(127), UINT8_C(208), UINT8_C(173), UINT8_C(247), UINT8_C(171), UINT8_C(177), UINT8_C(244), UINT8_C( 76), UINT8_C(191), UINT8_C(166), UINT8_C( 19), UINT8_C( 33), UINT8_C(206), UINT8_C( 89), UINT8_C(216), UINT8_C(139), UINT8_C(111), UINT8_C( 48), UINT8_C(119), UINT8_C(201), UINT8_C( 66), UINT8_C(113), UINT8_C(254), UINT8_C(207), UINT8_C(146), UINT8_C(201), UINT8_C( 44), UINT8_C(125), UINT8_C(195), UINT8_C(221), UINT8_C(218), UINT8_C(229), UINT8_C(236), UINT8_C( 32), UINT8_C(197), UINT8_C(234), UINT8_C(125)), simde_x_mm512_set_epu8(UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(179), UINT8_C( 70), UINT8_C( 0), UINT8_C( 0), UINT8_C( 18), UINT8_C( 35), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(189), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(112), UINT8_C(114), UINT8_C(140), UINT8_C(151), UINT8_C( 0), UINT8_C( 0), UINT8_C(148), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 96)) }, { UINT64_C( 4250428108), simde_x_mm512_set_epu8(UINT8_C(107), UINT8_C(163), UINT8_C(179), UINT8_C( 53), UINT8_C(126), UINT8_C( 10), UINT8_C(123), UINT8_C(143), UINT8_C(215), UINT8_C(148), UINT8_C(197), UINT8_C(220), UINT8_C(232), UINT8_C(205), UINT8_C(188), UINT8_C(218), UINT8_C( 19), UINT8_C(120), UINT8_C(138), UINT8_C( 63), UINT8_C( 24), UINT8_C( 72), UINT8_C( 39), UINT8_C( 31), UINT8_C(164), UINT8_C( 52), UINT8_C( 81), UINT8_C( 39), UINT8_C(186), UINT8_C( 73), UINT8_C( 76), UINT8_C(114), UINT8_C(249), UINT8_C( 4), UINT8_C(201), UINT8_C(188), UINT8_C(120), UINT8_C( 98), UINT8_C(141), UINT8_C(200), UINT8_C( 93), UINT8_C(254), UINT8_C( 78), UINT8_C( 16), UINT8_C( 88), UINT8_C( 71), UINT8_C(144), UINT8_C(138), UINT8_C( 4), UINT8_C(168), UINT8_C( 76), UINT8_C( 88), UINT8_C(159), UINT8_C(107), UINT8_C(228), UINT8_C(197), UINT8_C(209), UINT8_C(111), UINT8_C( 61), UINT8_C(147), UINT8_C(137), UINT8_C( 97), UINT8_C(105), UINT8_C(160)), simde_x_mm512_set_epu8(UINT8_C( 95), UINT8_C(238), UINT8_C( 11), UINT8_C( 96), UINT8_C(252), UINT8_C(162), UINT8_C(116), UINT8_C(225), UINT8_C( 52), UINT8_C(254), UINT8_C( 98), UINT8_C( 10), UINT8_C( 5), UINT8_C( 19), UINT8_C(191), UINT8_C( 10), UINT8_C(147), UINT8_C( 52), UINT8_C(171), UINT8_C(224), UINT8_C( 38), UINT8_C( 92), UINT8_C( 6), UINT8_C(185), UINT8_C(177), UINT8_C( 79), UINT8_C(162), UINT8_C(113), UINT8_C(139), UINT8_C( 20), UINT8_C(174), UINT8_C( 82), UINT8_C(136), UINT8_C(114), UINT8_C(204), UINT8_C(188), UINT8_C(236), UINT8_C(209), UINT8_C(166), UINT8_C(169), UINT8_C( 79), UINT8_C(219), UINT8_C( 63), UINT8_C(167), UINT8_C(216), UINT8_C(189), UINT8_C(187), UINT8_C(139), UINT8_C(254), UINT8_C(206), UINT8_C(156), UINT8_C(171), UINT8_C(128), UINT8_C(139), UINT8_C( 97), UINT8_C(237), UINT8_C( 71), UINT8_C(239), UINT8_C( 0), UINT8_C(174), UINT8_C(229), UINT8_C(124), UINT8_C( 45), UINT8_C( 57)), simde_x_mm512_set_epu8(UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(193), UINT8_C( 59), UINT8_C(203), UINT8_C(188), UINT8_C(178), UINT8_C(154), UINT8_C( 0), UINT8_C(185), UINT8_C( 0), UINT8_C(237), UINT8_C( 0), UINT8_C( 92), UINT8_C(152), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(187), UINT8_C(116), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(163), UINT8_C( 0), UINT8_C(140), UINT8_C(175), UINT8_C( 0), UINT8_C( 0), UINT8_C(183), UINT8_C(111), UINT8_C( 0), UINT8_C( 0)) }, { UINT64_C( 3228288806), simde_x_mm512_set_epu8(UINT8_C( 83), UINT8_C( 39), UINT8_C(120), UINT8_C(115), UINT8_C(205), UINT8_C(228), UINT8_C(102), UINT8_C(158), UINT8_C(179), UINT8_C(121), UINT8_C( 42), UINT8_C(114), UINT8_C(255), UINT8_C(112), UINT8_C( 17), UINT8_C(225), UINT8_C(108), UINT8_C(229), UINT8_C( 66), UINT8_C( 23), UINT8_C( 69), UINT8_C(166), UINT8_C(210), UINT8_C(165), UINT8_C(175), UINT8_C(169), UINT8_C( 1), UINT8_C(245), UINT8_C( 84), UINT8_C(139), UINT8_C( 79), UINT8_C(146), UINT8_C(212), UINT8_C(226), UINT8_C( 33), UINT8_C( 53), UINT8_C( 64), UINT8_C(240), UINT8_C(145), UINT8_C(215), UINT8_C(154), UINT8_C( 13), UINT8_C( 97), UINT8_C(201), UINT8_C( 19), UINT8_C(240), UINT8_C(188), UINT8_C(173), UINT8_C(228), UINT8_C( 88), UINT8_C( 41), UINT8_C( 18), UINT8_C(177), UINT8_C(249), UINT8_C(161), UINT8_C(144), UINT8_C(168), UINT8_C(228), UINT8_C(181), UINT8_C( 34), UINT8_C(226), UINT8_C(255), UINT8_C( 52), UINT8_C(164)), simde_x_mm512_set_epu8(UINT8_C( 60), UINT8_C(216), UINT8_C( 36), UINT8_C(217), UINT8_C(176), UINT8_C(146), UINT8_C( 14), UINT8_C(195), UINT8_C(217), UINT8_C(186), UINT8_C(140), UINT8_C(157), UINT8_C(174), UINT8_C(143), UINT8_C(136), UINT8_C(140), UINT8_C(198), UINT8_C( 18), UINT8_C( 72), UINT8_C( 23), UINT8_C(139), UINT8_C(151), UINT8_C( 83), UINT8_C( 3), UINT8_C(152), UINT8_C( 34), UINT8_C( 72), UINT8_C(223), UINT8_C( 84), UINT8_C(166), UINT8_C(140), UINT8_C(210), UINT8_C(238), UINT8_C( 96), UINT8_C(210), UINT8_C(147), UINT8_C(153), UINT8_C(238), UINT8_C(217), UINT8_C( 67), UINT8_C(118), UINT8_C( 12), UINT8_C(237), UINT8_C(171), UINT8_C( 23), UINT8_C( 75), UINT8_C(178), UINT8_C(118), UINT8_C(128), UINT8_C( 11), UINT8_C(225), UINT8_C(116), UINT8_C(179), UINT8_C( 97), UINT8_C( 87), UINT8_C( 53), UINT8_C(223), UINT8_C( 37), UINT8_C( 28), UINT8_C( 24), UINT8_C(153), UINT8_C( 99), UINT8_C(181), UINT8_C( 41)), simde_x_mm512_set_epu8(UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(225), UINT8_C(161), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 13), UINT8_C(167), UINT8_C( 0), UINT8_C( 21), UINT8_C( 0), UINT8_C(183), UINT8_C(146), UINT8_C(178), UINT8_C( 50), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(173), UINT8_C(124), UINT8_C( 99), UINT8_C( 0), UINT8_C( 0), UINT8_C(105), UINT8_C( 0), UINT8_C( 0), UINT8_C(177), UINT8_C(117), UINT8_C( 0)) }, { UINT64_C( 74372392), simde_x_mm512_set_epu8(UINT8_C( 19), UINT8_C(169), UINT8_C( 42), UINT8_C(146), UINT8_C( 68), UINT8_C( 97), UINT8_C(131), UINT8_C( 75), UINT8_C( 30), UINT8_C(202), UINT8_C(218), UINT8_C(236), UINT8_C(160), UINT8_C( 84), UINT8_C(108), UINT8_C( 24), UINT8_C(202), UINT8_C(230), UINT8_C(131), UINT8_C(203), UINT8_C( 48), UINT8_C(178), UINT8_C(160), UINT8_C( 82), UINT8_C(240), UINT8_C(188), UINT8_C(191), UINT8_C( 28), UINT8_C(174), UINT8_C(140), UINT8_C(119), UINT8_C(143), UINT8_C(102), UINT8_C( 90), UINT8_C( 86), UINT8_C(242), UINT8_C(207), UINT8_C( 71), UINT8_C( 2), UINT8_C( 28), UINT8_C(194), UINT8_C( 53), UINT8_C(214), UINT8_C(158), UINT8_C( 11), UINT8_C( 81), UINT8_C( 46), UINT8_C( 73), UINT8_C(250), UINT8_C(127), UINT8_C(146), UINT8_C(248), UINT8_C(106), UINT8_C( 95), UINT8_C(130), UINT8_C(129), UINT8_C(153), UINT8_C(235), UINT8_C(236), UINT8_C(185), UINT8_C(106), UINT8_C( 23), UINT8_C(205), UINT8_C(209)), simde_x_mm512_set_epu8(UINT8_C(221), UINT8_C(195), UINT8_C( 15), UINT8_C( 71), UINT8_C(113), UINT8_C(109), UINT8_C( 91), UINT8_C(139), UINT8_C( 0), UINT8_C(121), UINT8_C( 48), UINT8_C(109), UINT8_C( 55), UINT8_C(125), UINT8_C(144), UINT8_C( 80), UINT8_C( 48), UINT8_C( 40), UINT8_C( 32), UINT8_C(158), UINT8_C( 64), UINT8_C(225), UINT8_C(246), UINT8_C(250), UINT8_C(216), UINT8_C( 37), UINT8_C( 76), UINT8_C(205), UINT8_C( 27), UINT8_C(254), UINT8_C(155), UINT8_C(246), UINT8_C(218), UINT8_C( 69), UINT8_C( 1), UINT8_C(216), UINT8_C(164), UINT8_C( 62), UINT8_C( 73), UINT8_C(202), UINT8_C( 28), UINT8_C(155), UINT8_C(152), UINT8_C(139), UINT8_C( 24), UINT8_C(201), UINT8_C( 82), UINT8_C(156), UINT8_C(214), UINT8_C( 62), UINT8_C(143), UINT8_C(110), UINT8_C(164), UINT8_C(127), UINT8_C(164), UINT8_C( 20), UINT8_C(221), UINT8_C( 35), UINT8_C( 30), UINT8_C(170), UINT8_C(120), UINT8_C( 91), UINT8_C(187), UINT8_C(207)), simde_x_mm512_set_epu8(UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 67), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(104), UINT8_C(183), UINT8_C( 0), UINT8_C( 18), UINT8_C(141), UINT8_C( 64), UINT8_C( 0), UINT8_C(232), UINT8_C( 95), UINT8_C( 0), UINT8_C(179), UINT8_C( 0), UINT8_C(111), UINT8_C( 0), UINT8_C( 75), UINT8_C( 0), UINT8_C( 0), UINT8_C(133), UINT8_C( 0), UINT8_C(113), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0)) }, { UINT64_C( 1704849946), simde_x_mm512_set_epu8(UINT8_C( 75), UINT8_C(130), UINT8_C(174), UINT8_C(185), UINT8_C(240), UINT8_C(251), UINT8_C(233), UINT8_C(143), UINT8_C(196), UINT8_C( 48), UINT8_C(196), UINT8_C( 72), UINT8_C(201), UINT8_C(123), UINT8_C( 70), UINT8_C(237), UINT8_C( 30), UINT8_C(225), UINT8_C(201), UINT8_C(186), UINT8_C( 9), UINT8_C(202), UINT8_C( 73), UINT8_C(192), UINT8_C( 82), UINT8_C(124), UINT8_C( 29), UINT8_C( 8), UINT8_C( 64), UINT8_C( 66), UINT8_C(254), UINT8_C(144), UINT8_C(122), UINT8_C(124), UINT8_C( 90), UINT8_C( 22), UINT8_C(107), UINT8_C(236), UINT8_C( 37), UINT8_C(140), UINT8_C(185), UINT8_C( 48), UINT8_C(255), UINT8_C(239), UINT8_C(166), UINT8_C( 3), UINT8_C(222), UINT8_C( 36), UINT8_C(239), UINT8_C(218), UINT8_C(100), UINT8_C(226), UINT8_C(118), UINT8_C( 42), UINT8_C(231), UINT8_C(211), UINT8_C( 4), UINT8_C( 8), UINT8_C( 53), UINT8_C( 84), UINT8_C(136), UINT8_C( 61), UINT8_C( 90), UINT8_C(237)), simde_x_mm512_set_epu8(UINT8_C( 81), UINT8_C( 81), UINT8_C( 43), UINT8_C( 91), UINT8_C(149), UINT8_C(240), UINT8_C( 71), UINT8_C(161), UINT8_C(204), UINT8_C( 80), UINT8_C( 62), UINT8_C(128), UINT8_C(178), UINT8_C( 60), UINT8_C( 92), UINT8_C( 64), UINT8_C( 44), UINT8_C(188), UINT8_C(121), UINT8_C(108), UINT8_C( 90), UINT8_C( 48), UINT8_C( 8), UINT8_C(192), UINT8_C(189), UINT8_C(101), UINT8_C( 72), UINT8_C(114), UINT8_C(232), UINT8_C(233), UINT8_C(205), UINT8_C( 5), UINT8_C(117), UINT8_C( 79), UINT8_C( 29), UINT8_C(232), UINT8_C( 51), UINT8_C(176), UINT8_C(226), UINT8_C(160), UINT8_C(230), UINT8_C(138), UINT8_C( 64), UINT8_C(111), UINT8_C(190), UINT8_C(200), UINT8_C(126), UINT8_C(168), UINT8_C(222), UINT8_C( 5), UINT8_C(142), UINT8_C( 0), UINT8_C( 60), UINT8_C( 23), UINT8_C(145), UINT8_C(197), UINT8_C( 92), UINT8_C( 21), UINT8_C(182), UINT8_C( 99), UINT8_C(234), UINT8_C(209), UINT8_C(134), UINT8_C( 50)), simde_x_mm512_set_epu8(UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(102), UINT8_C( 60), UINT8_C( 0), UINT8_C( 0), UINT8_C(206), UINT8_C( 0), UINT8_C(150), UINT8_C(208), UINT8_C( 0), UINT8_C( 0), UINT8_C(175), UINT8_C(178), UINT8_C(102), UINT8_C( 0), UINT8_C(102), UINT8_C(231), UINT8_C(112), UINT8_C(121), UINT8_C(113), UINT8_C( 0), UINT8_C( 0), UINT8_C(188), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 92), UINT8_C(185), UINT8_C( 0), UINT8_C(112), UINT8_C( 0)) }, { UINT64_C( 616766838), simde_x_mm512_set_epu8(UINT8_C(180), UINT8_C(137), UINT8_C( 51), UINT8_C( 92), UINT8_C(187), UINT8_C( 82), UINT8_C( 53), UINT8_C( 28), UINT8_C(223), UINT8_C(152), UINT8_C(200), UINT8_C(193), UINT8_C(225), UINT8_C(154), UINT8_C(231), UINT8_C( 61), UINT8_C(162), UINT8_C( 56), UINT8_C(127), UINT8_C(187), UINT8_C(196), UINT8_C(180), UINT8_C(126), UINT8_C( 42), UINT8_C(219), UINT8_C( 5), UINT8_C(113), UINT8_C(138), UINT8_C(204), UINT8_C(253), UINT8_C(215), UINT8_C(247), UINT8_C(235), UINT8_C( 43), UINT8_C(234), UINT8_C( 33), UINT8_C(112), UINT8_C(108), UINT8_C(220), UINT8_C( 90), UINT8_C(144), UINT8_C(181), UINT8_C( 55), UINT8_C( 53), UINT8_C( 64), UINT8_C(203), UINT8_C(190), UINT8_C(189), UINT8_C(102), UINT8_C(244), UINT8_C( 66), UINT8_C(205), UINT8_C( 39), UINT8_C(196), UINT8_C(165), UINT8_C(215), UINT8_C(172), UINT8_C( 65), UINT8_C( 68), UINT8_C(204), UINT8_C( 53), UINT8_C( 34), UINT8_C( 78), UINT8_C(127)), simde_x_mm512_set_epu8(UINT8_C(141), UINT8_C(142), UINT8_C(201), UINT8_C(183), UINT8_C( 28), UINT8_C(210), UINT8_C( 48), UINT8_C(119), UINT8_C(143), UINT8_C( 53), UINT8_C(224), UINT8_C( 96), UINT8_C(218), UINT8_C(201), UINT8_C(253), UINT8_C(241), UINT8_C( 77), UINT8_C(249), UINT8_C( 74), UINT8_C( 43), UINT8_C(181), UINT8_C(113), UINT8_C(189), UINT8_C(106), UINT8_C( 54), UINT8_C( 32), UINT8_C(202), UINT8_C( 67), UINT8_C(214), UINT8_C( 23), UINT8_C(167), UINT8_C( 40), UINT8_C(139), UINT8_C( 52), UINT8_C( 60), UINT8_C( 38), UINT8_C( 95), UINT8_C( 19), UINT8_C(138), UINT8_C(217), UINT8_C(209), UINT8_C( 61), UINT8_C( 63), UINT8_C(128), UINT8_C(163), UINT8_C( 56), UINT8_C(251), UINT8_C(135), UINT8_C( 69), UINT8_C(144), UINT8_C(129), UINT8_C(103), UINT8_C(244), UINT8_C( 97), UINT8_C(191), UINT8_C( 58), UINT8_C(119), UINT8_C( 19), UINT8_C(105), UINT8_C(174), UINT8_C(180), UINT8_C( 41), UINT8_C(251), UINT8_C(155)), simde_x_mm512_set_epu8(UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(147), UINT8_C( 0), UINT8_C( 0), UINT8_C( 64), UINT8_C( 0), UINT8_C( 0), UINT8_C(177), UINT8_C(121), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(221), UINT8_C(162), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(154), UINT8_C(142), UINT8_C(147), UINT8_C( 0), UINT8_C(137), UINT8_C( 0), UINT8_C( 42), UINT8_C( 87), UINT8_C(189), UINT8_C( 0), UINT8_C( 38), UINT8_C(165), UINT8_C( 0)) }, { UINT64_C( 771922964), simde_x_mm512_set_epu8(UINT8_C( 84), UINT8_C(193), UINT8_C(196), UINT8_C(188), UINT8_C( 28), UINT8_C( 14), UINT8_C(183), UINT8_C( 63), UINT8_C(195), UINT8_C( 17), UINT8_C( 60), UINT8_C(196), UINT8_C(140), UINT8_C(189), UINT8_C(109), UINT8_C( 63), UINT8_C( 42), UINT8_C(192), UINT8_C(240), UINT8_C(166), UINT8_C( 14), UINT8_C(224), UINT8_C(252), UINT8_C( 17), UINT8_C( 46), UINT8_C(184), UINT8_C(253), UINT8_C( 54), UINT8_C( 94), UINT8_C( 75), UINT8_C(123), UINT8_C(145), UINT8_C(160), UINT8_C(164), UINT8_C(123), UINT8_C(216), UINT8_C(170), UINT8_C(112), UINT8_C(140), UINT8_C( 56), UINT8_C( 30), UINT8_C(182), UINT8_C(108), UINT8_C( 63), UINT8_C( 54), UINT8_C( 68), UINT8_C( 21), UINT8_C( 50), UINT8_C( 82), UINT8_C( 10), UINT8_C(167), UINT8_C(177), UINT8_C(210), UINT8_C( 2), UINT8_C( 48), UINT8_C( 73), UINT8_C(153), UINT8_C( 42), UINT8_C( 43), UINT8_C( 19), UINT8_C( 82), UINT8_C(112), UINT8_C(146), UINT8_C( 77)), simde_x_mm512_set_epu8(UINT8_C(117), UINT8_C( 61), UINT8_C(219), UINT8_C(166), UINT8_C(101), UINT8_C(146), UINT8_C(252), UINT8_C(220), UINT8_C(111), UINT8_C( 7), UINT8_C(233), UINT8_C( 32), UINT8_C(239), UINT8_C(243), UINT8_C(204), UINT8_C(249), UINT8_C(179), UINT8_C(241), UINT8_C(226), UINT8_C(120), UINT8_C(244), UINT8_C(118), UINT8_C(248), UINT8_C(141), UINT8_C(179), UINT8_C(252), UINT8_C(219), UINT8_C( 58), UINT8_C( 62), UINT8_C(244), UINT8_C(181), UINT8_C( 77), UINT8_C( 48), UINT8_C(136), UINT8_C(150), UINT8_C( 74), UINT8_C( 87), UINT8_C(176), UINT8_C(194), UINT8_C( 45), UINT8_C(190), UINT8_C(213), UINT8_C( 70), UINT8_C( 70), UINT8_C(177), UINT8_C(254), UINT8_C(160), UINT8_C( 78), UINT8_C(253), UINT8_C(131), UINT8_C(202), UINT8_C(108), UINT8_C(174), UINT8_C(186), UINT8_C(172), UINT8_C(193), UINT8_C(162), UINT8_C(235), UINT8_C(165), UINT8_C(158), UINT8_C(140), UINT8_C(172), UINT8_C(251), UINT8_C(116)), simde_x_mm512_set_epu8(UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(137), UINT8_C( 0), UINT8_C(129), UINT8_C(144), UINT8_C(167), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 91), UINT8_C( 0), UINT8_C(168), UINT8_C( 0), UINT8_C( 0), UINT8_C(143), UINT8_C(192), UINT8_C( 94), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 89), UINT8_C( 0), UINT8_C(142), UINT8_C( 0), UINT8_C( 0)) }, { UINT64_C( 97696573), simde_x_mm512_set_epu8(UINT8_C( 56), UINT8_C(233), UINT8_C(161), UINT8_C( 70), UINT8_C( 29), UINT8_C(119), UINT8_C(231), UINT8_C(252), UINT8_C(132), UINT8_C(198), UINT8_C( 18), UINT8_C(195), UINT8_C( 88), UINT8_C(114), UINT8_C(173), UINT8_C( 54), UINT8_C( 75), UINT8_C( 3), UINT8_C(214), UINT8_C( 2), UINT8_C(139), UINT8_C(186), UINT8_C( 99), UINT8_C(154), UINT8_C(177), UINT8_C(145), UINT8_C(140), UINT8_C(170), UINT8_C(124), UINT8_C( 32), UINT8_C(208), UINT8_C(227), UINT8_C( 14), UINT8_C( 57), UINT8_C( 96), UINT8_C(190), UINT8_C(123), UINT8_C(184), UINT8_C( 51), UINT8_C( 34), UINT8_C(239), UINT8_C( 21), UINT8_C(170), UINT8_C(146), UINT8_C(232), UINT8_C( 0), UINT8_C(180), UINT8_C( 80), UINT8_C( 72), UINT8_C(231), UINT8_C(149), UINT8_C(142), UINT8_C(142), UINT8_C( 86), UINT8_C( 39), UINT8_C(243), UINT8_C( 33), UINT8_C(235), UINT8_C( 93), UINT8_C(129), UINT8_C( 46), UINT8_C(131), UINT8_C(184), UINT8_C(105)), simde_x_mm512_set_epu8(UINT8_C(188), UINT8_C(104), UINT8_C(150), UINT8_C( 91), UINT8_C(223), UINT8_C( 10), UINT8_C(154), UINT8_C(110), UINT8_C(234), UINT8_C(193), UINT8_C( 58), UINT8_C(132), UINT8_C( 18), UINT8_C(225), UINT8_C( 95), UINT8_C( 7), UINT8_C(253), UINT8_C( 41), UINT8_C(230), UINT8_C(186), UINT8_C( 9), UINT8_C( 18), UINT8_C(163), UINT8_C( 56), UINT8_C( 96), UINT8_C(249), UINT8_C( 80), UINT8_C( 62), UINT8_C(194), UINT8_C( 89), UINT8_C(163), UINT8_C(133), UINT8_C( 35), UINT8_C(148), UINT8_C(177), UINT8_C( 29), UINT8_C(195), UINT8_C(169), UINT8_C( 29), UINT8_C( 99), UINT8_C(232), UINT8_C( 69), UINT8_C( 58), UINT8_C(168), UINT8_C(144), UINT8_C(242), UINT8_C(220), UINT8_C( 1), UINT8_C( 48), UINT8_C(123), UINT8_C( 36), UINT8_C(240), UINT8_C( 56), UINT8_C(103), UINT8_C(183), UINT8_C( 7), UINT8_C(134), UINT8_C(138), UINT8_C(158), UINT8_C(199), UINT8_C( 65), UINT8_C( 92), UINT8_C(182), UINT8_C(118)), simde_x_mm512_set_epu8(UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(177), UINT8_C( 0), UINT8_C( 67), UINT8_C(236), UINT8_C( 45), UINT8_C( 0), UINT8_C(157), UINT8_C( 0), UINT8_C( 0), UINT8_C(200), UINT8_C( 0), UINT8_C( 60), UINT8_C( 0), UINT8_C( 93), UINT8_C(191), UINT8_C( 99), UINT8_C( 0), UINT8_C(111), UINT8_C(125), UINT8_C( 0), UINT8_C( 0), UINT8_C(126), UINT8_C(164), UINT8_C( 56), UINT8_C(112), UINT8_C( 0), UINT8_C(112)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_avg_epu8(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_u8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_avg_epu16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_x_mm512_set_epu16(UINT16_C( 13922), UINT16_C( 36897), UINT16_C( 58180), UINT16_C( 11516), UINT16_C( 48259), UINT16_C( 5769), UINT16_C( 28390), UINT16_C( 29726), UINT16_C( 60808), UINT16_C( 38778), UINT16_C( 21386), UINT16_C( 37563), UINT16_C( 21427), UINT16_C( 42840), UINT16_C( 63576), UINT16_C( 25988), UINT16_C( 52004), UINT16_C( 13742), UINT16_C( 20065), UINT16_C( 31945), UINT16_C( 3305), UINT16_C( 50297), UINT16_C( 19714), UINT16_C( 48635), UINT16_C( 64641), UINT16_C( 9835), UINT16_C( 33507), UINT16_C( 40606), UINT16_C( 49930), UINT16_C( 25911), UINT16_C( 16490), UINT16_C( 30266)), simde_x_mm512_set_epu16(UINT16_C( 35675), UINT16_C( 3257), UINT16_C( 12418), UINT16_C( 59056), UINT16_C( 54292), UINT16_C( 28377), UINT16_C( 47802), UINT16_C( 33906), UINT16_C( 48379), UINT16_C( 57830), UINT16_C( 42348), UINT16_C( 4361), UINT16_C( 62947), UINT16_C( 32240), UINT16_C( 12123), UINT16_C( 903), UINT16_C( 62311), UINT16_C( 39097), UINT16_C( 12552), UINT16_C( 17043), UINT16_C( 12967), UINT16_C( 14694), UINT16_C( 36399), UINT16_C( 44335), UINT16_C( 44269), UINT16_C( 14847), UINT16_C( 26083), UINT16_C( 50887), UINT16_C( 6575), UINT16_C( 58796), UINT16_C( 53958), UINT16_C( 55190)), simde_x_mm512_set_epu16(UINT16_C( 24799), UINT16_C( 20077), UINT16_C( 35299), UINT16_C( 35286), UINT16_C( 51276), UINT16_C( 17073), UINT16_C( 38096), UINT16_C( 31816), UINT16_C( 54594), UINT16_C( 48304), UINT16_C( 31867), UINT16_C( 20962), UINT16_C( 42187), UINT16_C( 37540), UINT16_C( 37850), UINT16_C( 13446), UINT16_C( 57158), UINT16_C( 26420), UINT16_C( 16309), UINT16_C( 24494), UINT16_C( 8136), UINT16_C( 32496), UINT16_C( 28057), UINT16_C( 46485), UINT16_C( 54455), UINT16_C( 12341), UINT16_C( 29795), UINT16_C( 45747), UINT16_C( 28253), UINT16_C( 42354), UINT16_C( 35224), UINT16_C( 42728)) }, { simde_x_mm512_set_epu16(UINT16_C( 53467), UINT16_C( 14812), UINT16_C( 1601), UINT16_C( 5446), UINT16_C( 10821), UINT16_C( 52555), UINT16_C( 11613), UINT16_C( 32262), UINT16_C( 44594), UINT16_C( 4573), UINT16_C( 55679), UINT16_C( 28536), UINT16_C( 33684), UINT16_C( 36248), UINT16_C( 37000), UINT16_C( 16957), UINT16_C( 33904), UINT16_C( 7320), UINT16_C( 32743), UINT16_C( 28065), UINT16_C( 40454), UINT16_C( 54701), UINT16_C( 30387), UINT16_C( 20402), UINT16_C( 12012), UINT16_C( 12600), UINT16_C( 40321), UINT16_C( 11976), UINT16_C( 22914), UINT16_C( 48119), UINT16_C( 54567), UINT16_C( 37518)), simde_x_mm512_set_epu16(UINT16_C( 19683), UINT16_C( 49281), UINT16_C( 58394), UINT16_C( 42390), UINT16_C( 28448), UINT16_C( 2882), UINT16_C( 16746), UINT16_C( 53644), UINT16_C( 16898), UINT16_C( 40318), UINT16_C( 16648), UINT16_C( 14647), UINT16_C( 38440), UINT16_C( 42038), UINT16_C( 24217), UINT16_C( 60044), UINT16_C( 11584), UINT16_C( 35025), UINT16_C( 26069), UINT16_C( 37885), UINT16_C( 14708), UINT16_C( 24663), UINT16_C( 44110), UINT16_C( 40062), UINT16_C( 918), UINT16_C( 34061), UINT16_C( 57874), UINT16_C( 27338), UINT16_C( 24412), UINT16_C( 15314), UINT16_C( 35927), UINT16_C( 54580)), simde_x_mm512_set_epu16(UINT16_C( 36575), UINT16_C( 32047), UINT16_C( 29998), UINT16_C( 23918), UINT16_C( 19635), UINT16_C( 27719), UINT16_C( 14180), UINT16_C( 42953), UINT16_C( 30746), UINT16_C( 22446), UINT16_C( 36164), UINT16_C( 21592), UINT16_C( 36062), UINT16_C( 39143), UINT16_C( 30609), UINT16_C( 38501), UINT16_C( 22744), UINT16_C( 21173), UINT16_C( 29406), UINT16_C( 32975), UINT16_C( 27581), UINT16_C( 39682), UINT16_C( 37249), UINT16_C( 30232), UINT16_C( 6465), UINT16_C( 23331), UINT16_C( 49098), UINT16_C( 19657), UINT16_C( 23663), UINT16_C( 31717), UINT16_C( 45247), UINT16_C( 46049)) }, { simde_x_mm512_set_epu16(UINT16_C( 2926), UINT16_C( 37218), UINT16_C( 49262), UINT16_C( 41266), UINT16_C( 64616), UINT16_C( 47887), UINT16_C( 47046), UINT16_C( 20233), UINT16_C( 17531), UINT16_C( 35216), UINT16_C( 26970), UINT16_C( 22110), UINT16_C( 23885), UINT16_C( 49160), UINT16_C( 11289), UINT16_C( 22415), UINT16_C( 57944), UINT16_C( 39676), UINT16_C( 14246), UINT16_C( 36547), UINT16_C( 43442), UINT16_C( 52571), UINT16_C( 16838), UINT16_C( 32565), UINT16_C( 62744), UINT16_C( 45552), UINT16_C( 50968), UINT16_C( 36716), UINT16_C( 10154), UINT16_C( 41950), UINT16_C( 14496), UINT16_C( 51322)), simde_x_mm512_set_epu16(UINT16_C( 14640), UINT16_C( 63580), UINT16_C( 14746), UINT16_C( 21373), UINT16_C( 2714), UINT16_C( 18543), UINT16_C( 17198), UINT16_C( 32202), UINT16_C( 11581), UINT16_C( 13480), UINT16_C( 54288), UINT16_C( 9603), UINT16_C( 12378), UINT16_C( 53993), UINT16_C( 37890), UINT16_C( 2622), UINT16_C( 22964), UINT16_C( 24086), UINT16_C( 65101), UINT16_C( 151), UINT16_C( 54310), UINT16_C( 27341), UINT16_C( 18591), UINT16_C( 57907), UINT16_C( 13730), UINT16_C( 48472), UINT16_C( 10888), UINT16_C( 26832), UINT16_C( 39665), UINT16_C( 48015), UINT16_C( 40287), UINT16_C( 7521)), simde_x_mm512_set_epu16(UINT16_C( 8783), UINT16_C( 50399), UINT16_C( 32004), UINT16_C( 31320), UINT16_C( 33665), UINT16_C( 33215), UINT16_C( 32122), UINT16_C( 26218), UINT16_C( 14556), UINT16_C( 24348), UINT16_C( 40629), UINT16_C( 15857), UINT16_C( 18132), UINT16_C( 51577), UINT16_C( 24590), UINT16_C( 12519), UINT16_C( 40454), UINT16_C( 31881), UINT16_C( 39674), UINT16_C( 18349), UINT16_C( 48876), UINT16_C( 39956), UINT16_C( 17715), UINT16_C( 45236), UINT16_C( 38237), UINT16_C( 47012), UINT16_C( 30928), UINT16_C( 31774), UINT16_C( 24910), UINT16_C( 44983), UINT16_C( 27392), UINT16_C( 29422)) }, { simde_x_mm512_set_epu16(UINT16_C( 33359), UINT16_C( 20997), UINT16_C( 60535), UINT16_C( 6477), UINT16_C( 35687), UINT16_C( 52277), UINT16_C( 18001), UINT16_C( 9114), UINT16_C( 58688), UINT16_C( 24195), UINT16_C( 55556), UINT16_C( 26572), UINT16_C( 13614), UINT16_C( 31611), UINT16_C( 64483), UINT16_C( 39184), UINT16_C( 20423), UINT16_C( 4017), UINT16_C( 21263), UINT16_C( 9687), UINT16_C( 45713), UINT16_C( 61493), UINT16_C( 29891), UINT16_C( 26099), UINT16_C( 34397), UINT16_C( 16487), UINT16_C( 18770), UINT16_C( 48421), UINT16_C( 51774), UINT16_C( 40289), UINT16_C( 36358), UINT16_C( 9752)), simde_x_mm512_set_epu16(UINT16_C( 14685), UINT16_C( 36894), UINT16_C( 3039), UINT16_C( 14211), UINT16_C( 7600), UINT16_C( 13396), UINT16_C( 20271), UINT16_C( 32796), UINT16_C( 18819), UINT16_C( 35329), UINT16_C( 25198), UINT16_C( 53928), UINT16_C( 53351), UINT16_C( 38714), UINT16_C( 38160), UINT16_C( 28969), UINT16_C( 51469), UINT16_C( 54246), UINT16_C( 53402), UINT16_C( 12671), UINT16_C( 23815), UINT16_C( 34869), UINT16_C( 4959), UINT16_C( 48021), UINT16_C( 161), UINT16_C( 58452), UINT16_C( 21609), UINT16_C( 23057), UINT16_C( 951), UINT16_C( 33560), UINT16_C( 51461), UINT16_C( 32848)), simde_x_mm512_set_epu16(UINT16_C( 24022), UINT16_C( 28946), UINT16_C( 31787), UINT16_C( 10344), UINT16_C( 21644), UINT16_C( 32837), UINT16_C( 19136), UINT16_C( 20955), UINT16_C( 38754), UINT16_C( 29762), UINT16_C( 40377), UINT16_C( 40250), UINT16_C( 33483), UINT16_C( 35163), UINT16_C( 51322), UINT16_C( 34077), UINT16_C( 35946), UINT16_C( 29132), UINT16_C( 37333), UINT16_C( 11179), UINT16_C( 34764), UINT16_C( 48181), UINT16_C( 17425), UINT16_C( 37060), UINT16_C( 17279), UINT16_C( 37470), UINT16_C( 20190), UINT16_C( 35739), UINT16_C( 26363), UINT16_C( 36925), UINT16_C( 43910), UINT16_C( 21300)) }, { simde_x_mm512_set_epu16(UINT16_C( 16482), UINT16_C( 29865), UINT16_C( 43161), UINT16_C( 49292), UINT16_C( 20253), UINT16_C( 46312), UINT16_C( 22868), UINT16_C( 35970), UINT16_C( 27832), UINT16_C( 29779), UINT16_C( 41321), UINT16_C( 4471), UINT16_C( 101), UINT16_C( 26727), UINT16_C( 57916), UINT16_C( 6017), UINT16_C( 36275), UINT16_C( 22385), UINT16_C( 64475), UINT16_C( 27511), UINT16_C( 18089), UINT16_C( 2907), UINT16_C( 6044), UINT16_C( 25380), UINT16_C( 39450), UINT16_C( 6056), UINT16_C( 3173), UINT16_C( 48508), UINT16_C( 39476), UINT16_C( 34432), UINT16_C( 54613), UINT16_C( 48593)), simde_x_mm512_set_epu16(UINT16_C( 5403), UINT16_C( 14767), UINT16_C( 48931), UINT16_C( 19205), UINT16_C( 43050), UINT16_C( 50911), UINT16_C( 61409), UINT16_C( 58117), UINT16_C( 21956), UINT16_C( 47259), UINT16_C( 18797), UINT16_C( 33639), UINT16_C( 33691), UINT16_C( 26950), UINT16_C( 42098), UINT16_C( 29396), UINT16_C( 40226), UINT16_C( 56570), UINT16_C( 65251), UINT16_C( 61481), UINT16_C( 53274), UINT16_C( 56708), UINT16_C( 31154), UINT16_C( 56119), UINT16_C( 11598), UINT16_C( 59701), UINT16_C( 22444), UINT16_C( 31379), UINT16_C( 47987), UINT16_C( 5886), UINT16_C( 2522), UINT16_C( 57558)), simde_x_mm512_set_epu16(UINT16_C( 10943), UINT16_C( 22316), UINT16_C( 46046), UINT16_C( 34249), UINT16_C( 31652), UINT16_C( 48612), UINT16_C( 42139), UINT16_C( 47044), UINT16_C( 24894), UINT16_C( 38519), UINT16_C( 30059), UINT16_C( 19055), UINT16_C( 16896), UINT16_C( 26839), UINT16_C( 50007), UINT16_C( 17707), UINT16_C( 38251), UINT16_C( 39478), UINT16_C( 64863), UINT16_C( 44496), UINT16_C( 35682), UINT16_C( 29808), UINT16_C( 18599), UINT16_C( 40750), UINT16_C( 25524), UINT16_C( 32879), UINT16_C( 12809), UINT16_C( 39944), UINT16_C( 43732), UINT16_C( 20159), UINT16_C( 28568), UINT16_C( 53076)) }, { simde_x_mm512_set_epu16(UINT16_C( 34893), UINT16_C( 23679), UINT16_C( 64034), UINT16_C( 46251), UINT16_C( 8048), UINT16_C( 41654), UINT16_C( 61576), UINT16_C( 24661), UINT16_C( 10548), UINT16_C( 15307), UINT16_C( 54631), UINT16_C( 56752), UINT16_C( 25346), UINT16_C( 5558), UINT16_C( 1968), UINT16_C( 32109), UINT16_C( 59890), UINT16_C( 1389), UINT16_C( 18928), UINT16_C( 44824), UINT16_C( 11367), UINT16_C( 52264), UINT16_C( 20085), UINT16_C( 56744), UINT16_C( 49674), UINT16_C( 4067), UINT16_C( 3569), UINT16_C( 23831), UINT16_C( 8020), UINT16_C( 56140), UINT16_C( 13343), UINT16_C( 5815)), simde_x_mm512_set_epu16(UINT16_C( 70), UINT16_C( 49524), UINT16_C( 2389), UINT16_C( 48794), UINT16_C( 9656), UINT16_C( 49515), UINT16_C( 20459), UINT16_C( 8573), UINT16_C( 28524), UINT16_C( 25506), UINT16_C( 45011), UINT16_C( 20391), UINT16_C( 42585), UINT16_C( 5514), UINT16_C( 3945), UINT16_C( 49101), UINT16_C( 8826), UINT16_C( 53647), UINT16_C( 55458), UINT16_C( 61374), UINT16_C( 54518), UINT16_C( 64462), UINT16_C( 15696), UINT16_C( 38749), UINT16_C( 14471), UINT16_C( 43272), UINT16_C( 48962), UINT16_C( 15384), UINT16_C( 23964), UINT16_C( 8219), UINT16_C( 12498), UINT16_C( 59146)), simde_x_mm512_set_epu16(UINT16_C( 17482), UINT16_C( 36602), UINT16_C( 33212), UINT16_C( 47523), UINT16_C( 8852), UINT16_C( 45585), UINT16_C( 41018), UINT16_C( 16617), UINT16_C( 19536), UINT16_C( 20407), UINT16_C( 49821), UINT16_C( 38572), UINT16_C( 33966), UINT16_C( 5536), UINT16_C( 2957), UINT16_C( 40605), UINT16_C( 34358), UINT16_C( 27518), UINT16_C( 37193), UINT16_C( 53099), UINT16_C( 32943), UINT16_C( 58363), UINT16_C( 17891), UINT16_C( 47747), UINT16_C( 32073), UINT16_C( 23670), UINT16_C( 26266), UINT16_C( 19608), UINT16_C( 15992), UINT16_C( 32180), UINT16_C( 12921), UINT16_C( 32481)) }, { simde_x_mm512_set_epu16(UINT16_C( 60096), UINT16_C( 60141), UINT16_C( 1401), UINT16_C( 46103), UINT16_C( 12472), UINT16_C( 53354), UINT16_C( 24302), UINT16_C( 31759), UINT16_C( 31691), UINT16_C( 4110), UINT16_C( 33754), UINT16_C( 25522), UINT16_C( 55741), UINT16_C( 54238), UINT16_C( 6937), UINT16_C( 14267), UINT16_C( 6394), UINT16_C( 11471), UINT16_C( 36806), UINT16_C( 29444), UINT16_C( 4686), UINT16_C( 24894), UINT16_C( 9912), UINT16_C( 55420), UINT16_C( 39725), UINT16_C( 16932), UINT16_C( 1814), UINT16_C( 27150), UINT16_C( 14089), UINT16_C( 26760), UINT16_C( 42881), UINT16_C( 1890)), simde_x_mm512_set_epu16(UINT16_C( 1052), UINT16_C( 45798), UINT16_C( 2128), UINT16_C( 35449), UINT16_C( 36274), UINT16_C( 38472), UINT16_C( 30996), UINT16_C( 7411), UINT16_C( 65433), UINT16_C( 20087), UINT16_C( 6070), UINT16_C( 34216), UINT16_C( 38017), UINT16_C( 52174), UINT16_C( 6632), UINT16_C( 18268), UINT16_C( 23199), UINT16_C( 50262), UINT16_C( 41833), UINT16_C( 43512), UINT16_C( 43349), UINT16_C( 45458), UINT16_C( 19756), UINT16_C( 38464), UINT16_C( 37153), UINT16_C( 41983), UINT16_C( 8534), UINT16_C( 930), UINT16_C( 61899), UINT16_C( 3423), UINT16_C( 63286), UINT16_C( 18842)), simde_x_mm512_set_epu16(UINT16_C( 30574), UINT16_C( 52970), UINT16_C( 1765), UINT16_C( 40776), UINT16_C( 24373), UINT16_C( 45913), UINT16_C( 27649), UINT16_C( 19585), UINT16_C( 48562), UINT16_C( 12099), UINT16_C( 19912), UINT16_C( 29869), UINT16_C( 46879), UINT16_C( 53206), UINT16_C( 6785), UINT16_C( 16268), UINT16_C( 14797), UINT16_C( 30867), UINT16_C( 39320), UINT16_C( 36478), UINT16_C( 24018), UINT16_C( 35176), UINT16_C( 14834), UINT16_C( 46942), UINT16_C( 38439), UINT16_C( 29458), UINT16_C( 5174), UINT16_C( 14040), UINT16_C( 37994), UINT16_C( 15092), UINT16_C( 53084), UINT16_C( 10366)) }, { simde_x_mm512_set_epu16(UINT16_C( 21390), UINT16_C( 10972), UINT16_C( 9534), UINT16_C( 25326), UINT16_C( 39196), UINT16_C( 52858), UINT16_C( 21878), UINT16_C( 44949), UINT16_C( 56397), UINT16_C( 7489), UINT16_C( 51094), UINT16_C( 21965), UINT16_C( 45995), UINT16_C( 12111), UINT16_C( 47805), UINT16_C( 1368), UINT16_C( 47857), UINT16_C( 52094), UINT16_C( 29749), UINT16_C( 11216), UINT16_C( 36270), UINT16_C( 43041), UINT16_C( 37100), UINT16_C( 23737), UINT16_C( 49684), UINT16_C( 1618), UINT16_C( 4508), UINT16_C( 49414), UINT16_C( 16652), UINT16_C( 21178), UINT16_C( 2342), UINT16_C( 41466)), simde_x_mm512_set_epu16(UINT16_C( 2029), UINT16_C( 56235), UINT16_C( 60520), UINT16_C( 21505), UINT16_C( 57936), UINT16_C( 7393), UINT16_C( 48841), UINT16_C( 33423), UINT16_C( 21974), UINT16_C( 41720), UINT16_C( 60540), UINT16_C( 41725), UINT16_C( 29801), UINT16_C( 64766), UINT16_C( 10276), UINT16_C( 10419), UINT16_C( 29113), UINT16_C( 9717), UINT16_C( 10502), UINT16_C( 53810), UINT16_C( 21215), UINT16_C( 51746), UINT16_C( 31756), UINT16_C( 40043), UINT16_C( 19421), UINT16_C( 27841), UINT16_C( 1136), UINT16_C( 29043), UINT16_C( 64035), UINT16_C( 41226), UINT16_C( 38869), UINT16_C( 32769)), simde_x_mm512_set_epu16(UINT16_C( 11710), UINT16_C( 33604), UINT16_C( 35027), UINT16_C( 23416), UINT16_C( 48566), UINT16_C( 30126), UINT16_C( 35360), UINT16_C( 39186), UINT16_C( 39186), UINT16_C( 24605), UINT16_C( 55817), UINT16_C( 31845), UINT16_C( 37898), UINT16_C( 38439), UINT16_C( 29041), UINT16_C( 5894), UINT16_C( 38485), UINT16_C( 30906), UINT16_C( 20126), UINT16_C( 32513), UINT16_C( 28743), UINT16_C( 47394), UINT16_C( 34428), UINT16_C( 31890), UINT16_C( 34553), UINT16_C( 14730), UINT16_C( 2822), UINT16_C( 39229), UINT16_C( 40344), UINT16_C( 31202), UINT16_C( 20606), UINT16_C( 37118)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_avg_epu16(test_vec[i].a, test_vec[i].b); simde_assert_m512i_u16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_avg_epu16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const uint16_t src[32]; const simde__mmask32 k; const uint16_t a[32]; const uint16_t b[32]; const uint16_t r[32]; } test_vec[] = { { { UINT16_C(19100), UINT16_C(47010), UINT16_C(60087), UINT16_C(28088), UINT16_C(36628), UINT16_C( 9660), UINT16_C(16286), UINT16_C( 8161), UINT16_C(63657), UINT16_C(63180), UINT16_C( 6828), UINT16_C(46048), UINT16_C(34775), UINT16_C(18101), UINT16_C(58268), UINT16_C(14486), UINT16_C(14382), UINT16_C(58864), UINT16_C(43042), UINT16_C(14162), UINT16_C( 3895), UINT16_C(54876), UINT16_C(15950), UINT16_C(63733), UINT16_C(49462), UINT16_C(58350), UINT16_C(53212), UINT16_C(45974), UINT16_C(19286), UINT16_C(62458), UINT16_C(36911), UINT16_C(23851) }, UINT32_C(3946978248), { UINT16_C(38340), UINT16_C(64290), UINT16_C(32420), UINT16_C(62161), UINT16_C(51132), UINT16_C(62442), UINT16_C(55688), UINT16_C(25814), UINT16_C(27816), UINT16_C(65048), UINT16_C( 4791), UINT16_C(59121), UINT16_C( 7586), UINT16_C(27203), UINT16_C(34360), UINT16_C(64597), UINT16_C(30491), UINT16_C(49144), UINT16_C(51702), UINT16_C(45745), UINT16_C(40080), UINT16_C( 6565), UINT16_C(31605), UINT16_C( 7549), UINT16_C(38375), UINT16_C(40731), UINT16_C( 3495), UINT16_C(18821), UINT16_C(51498), UINT16_C(25268), UINT16_C( 2383), UINT16_C(27231) }, { UINT16_C(22401), UINT16_C(30505), UINT16_C(55840), UINT16_C(45353), UINT16_C(53110), UINT16_C(60362), UINT16_C(18250), UINT16_C(12808), UINT16_C( 9437), UINT16_C(34001), UINT16_C(22065), UINT16_C(23502), UINT16_C(33311), UINT16_C(28349), UINT16_C( 7307), UINT16_C( 3288), UINT16_C( 371), UINT16_C(38019), UINT16_C(44508), UINT16_C(21061), UINT16_C( 3964), UINT16_C(50750), UINT16_C(18006), UINT16_C(13304), UINT16_C(51562), UINT16_C(39864), UINT16_C(34336), UINT16_C(16374), UINT16_C(46088), UINT16_C(37806), UINT16_C(34512), UINT16_C(17568) }, { UINT16_C(19100), UINT16_C(47010), UINT16_C(60087), UINT16_C(53757), UINT16_C(36628), UINT16_C( 9660), UINT16_C(36969), UINT16_C(19311), UINT16_C(18627), UINT16_C(49525), UINT16_C( 6828), UINT16_C(41312), UINT16_C(20449), UINT16_C(18101), UINT16_C(58268), UINT16_C(14486), UINT16_C(14382), UINT16_C(43582), UINT16_C(43042), UINT16_C(14162), UINT16_C( 3895), UINT16_C(54876), UINT16_C(24806), UINT16_C(63733), UINT16_C(44969), UINT16_C(40298), UINT16_C(53212), UINT16_C(17598), UINT16_C(19286), UINT16_C(31537), UINT16_C(18448), UINT16_C(22400) } }, { { UINT16_C( 9096), UINT16_C(25816), UINT16_C( 7632), UINT16_C(19638), UINT16_C(62508), UINT16_C(33299), UINT16_C( 2875), UINT16_C(42422), UINT16_C(28373), UINT16_C(62785), UINT16_C(14324), UINT16_C(64564), UINT16_C(58091), UINT16_C(48271), UINT16_C(12137), UINT16_C(61696), UINT16_C(55379), UINT16_C( 9045), UINT16_C( 3061), UINT16_C( 8560), UINT16_C(33536), UINT16_C(15267), UINT16_C(22926), UINT16_C(25568), UINT16_C( 8647), UINT16_C(47960), UINT16_C(36185), UINT16_C(17591), UINT16_C(18287), UINT16_C(55296), UINT16_C( 118), UINT16_C(51657) }, UINT32_C(3454869208), { UINT16_C(23850), UINT16_C(10990), UINT16_C(37600), UINT16_C(28261), UINT16_C(17899), UINT16_C(46034), UINT16_C(10855), UINT16_C(49262), UINT16_C( 9911), UINT16_C( 9988), UINT16_C( 1389), UINT16_C(58367), UINT16_C(51461), UINT16_C(57005), UINT16_C(39655), UINT16_C( 4523), UINT16_C(39671), UINT16_C(55099), UINT16_C(41004), UINT16_C( 5957), UINT16_C( 6118), UINT16_C(19914), UINT16_C(14658), UINT16_C(63757), UINT16_C( 4447), UINT16_C(52256), UINT16_C( 8214), UINT16_C( 7343), UINT16_C(23785), UINT16_C(53498), UINT16_C(42486), UINT16_C(60898) }, { UINT16_C( 7487), UINT16_C(27588), UINT16_C( 2750), UINT16_C(42115), UINT16_C(19745), UINT16_C(25585), UINT16_C(65158), UINT16_C(58717), UINT16_C(32015), UINT16_C( 9905), UINT16_C(24989), UINT16_C(34370), UINT16_C(15549), UINT16_C(46167), UINT16_C(14817), UINT16_C( 8609), UINT16_C(26198), UINT16_C( 5260), UINT16_C( 3952), UINT16_C(37304), UINT16_C(43357), UINT16_C(58357), UINT16_C(21159), UINT16_C(47049), UINT16_C(31439), UINT16_C(28125), UINT16_C( 8155), UINT16_C(39411), UINT16_C(19035), UINT16_C(15437), UINT16_C(61059), UINT16_C(55901) }, { UINT16_C( 9096), UINT16_C(25816), UINT16_C( 7632), UINT16_C(35188), UINT16_C(18822), UINT16_C(33299), UINT16_C(38007), UINT16_C(53990), UINT16_C(28373), UINT16_C( 9947), UINT16_C(13189), UINT16_C(46369), UINT16_C(33505), UINT16_C(48271), UINT16_C(12137), UINT16_C(61696), UINT16_C(32935), UINT16_C( 9045), UINT16_C(22478), UINT16_C(21631), UINT16_C(33536), UINT16_C(39136), UINT16_C(17909), UINT16_C(55403), UINT16_C(17943), UINT16_C(47960), UINT16_C( 8185), UINT16_C(23377), UINT16_C(18287), UINT16_C(55296), UINT16_C(51773), UINT16_C(58400) } }, { { UINT16_C(59988), UINT16_C(50414), UINT16_C(43001), UINT16_C(22102), UINT16_C(19280), UINT16_C(63546), UINT16_C( 925), UINT16_C(27823), UINT16_C(35965), UINT16_C(23001), UINT16_C(52651), UINT16_C( 1778), UINT16_C(16151), UINT16_C(39746), UINT16_C(41005), UINT16_C(33397), UINT16_C(25482), UINT16_C(33606), UINT16_C(39946), UINT16_C(23514), UINT16_C( 5351), UINT16_C(33875), UINT16_C( 535), UINT16_C(38129), UINT16_C(51854), UINT16_C(14829), UINT16_C(57239), UINT16_C(44863), UINT16_C(33054), UINT16_C(19530), UINT16_C(48929), UINT16_C(43982) }, UINT32_C( 758060066), { UINT16_C( 2481), UINT16_C(39048), UINT16_C(56093), UINT16_C(13341), UINT16_C( 3805), UINT16_C(27592), UINT16_C(46808), UINT16_C(28836), UINT16_C(58261), UINT16_C(46111), UINT16_C(26980), UINT16_C(34304), UINT16_C(52776), UINT16_C(18993), UINT16_C(24802), UINT16_C(37751), UINT16_C(65385), UINT16_C(34348), UINT16_C(18906), UINT16_C(47034), UINT16_C(33623), UINT16_C(12066), UINT16_C(50745), UINT16_C(52895), UINT16_C(48809), UINT16_C( 3714), UINT16_C(33319), UINT16_C(20372), UINT16_C(50512), UINT16_C(13210), UINT16_C( 4390), UINT16_C(36806) }, { UINT16_C(61969), UINT16_C(60182), UINT16_C(53307), UINT16_C(37539), UINT16_C(50515), UINT16_C(36034), UINT16_C(24972), UINT16_C(13659), UINT16_C(56608), UINT16_C(18243), UINT16_C(55136), UINT16_C(45207), UINT16_C(12701), UINT16_C(50147), UINT16_C(43586), UINT16_C(21330), UINT16_C(26780), UINT16_C(55359), UINT16_C(57913), UINT16_C(35946), UINT16_C(11431), UINT16_C(13081), UINT16_C(29838), UINT16_C(44649), UINT16_C(44113), UINT16_C(45557), UINT16_C(35972), UINT16_C( 8546), UINT16_C(17853), UINT16_C( 228), UINT16_C(14063), UINT16_C(35923) }, { UINT16_C(59988), UINT16_C(49615), UINT16_C(43001), UINT16_C(22102), UINT16_C(19280), UINT16_C(31813), UINT16_C( 925), UINT16_C(27823), UINT16_C(35965), UINT16_C(23001), UINT16_C(41058), UINT16_C( 1778), UINT16_C(32739), UINT16_C(39746), UINT16_C(41005), UINT16_C(33397), UINT16_C(46083), UINT16_C(44854), UINT16_C(38410), UINT16_C(41490), UINT16_C( 5351), UINT16_C(12574), UINT16_C( 535), UINT16_C(38129), UINT16_C(46461), UINT16_C(14829), UINT16_C(34646), UINT16_C(14459), UINT16_C(33054), UINT16_C( 6719), UINT16_C(48929), UINT16_C(43982) } }, { { UINT16_C(37535), UINT16_C(55396), UINT16_C(52852), UINT16_C( 7268), UINT16_C(32251), UINT16_C(35151), UINT16_C(47345), UINT16_C(17207), UINT16_C(11365), UINT16_C(59892), UINT16_C(22201), UINT16_C(30218), UINT16_C(61084), UINT16_C(35702), UINT16_C(51748), UINT16_C(49943), UINT16_C(31580), UINT16_C(53659), UINT16_C( 74), UINT16_C(17901), UINT16_C(15485), UINT16_C(28622), UINT16_C( 1525), UINT16_C(23218), UINT16_C(42545), UINT16_C(59971), UINT16_C(19965), UINT16_C(39265), UINT16_C(55099), UINT16_C(24356), UINT16_C(15521), UINT16_C(65059) }, UINT32_C( 30391991), { UINT16_C(48318), UINT16_C(15430), UINT16_C( 5368), UINT16_C(60843), UINT16_C(23833), UINT16_C(19271), UINT16_C(35331), UINT16_C( 53), UINT16_C(38615), UINT16_C( 4761), UINT16_C(48750), UINT16_C( 3954), UINT16_C(38394), UINT16_C(45325), UINT16_C(56403), UINT16_C( 4787), UINT16_C(63896), UINT16_C(37198), UINT16_C(63758), UINT16_C(10110), UINT16_C(50774), UINT16_C(22898), UINT16_C(43088), UINT16_C(10330), UINT16_C(62270), UINT16_C(44090), UINT16_C(44209), UINT16_C(43964), UINT16_C(51521), UINT16_C(38237), UINT16_C( 4262), UINT16_C(16039) }, { UINT16_C(62729), UINT16_C( 6095), UINT16_C(20206), UINT16_C(17471), UINT16_C(45332), UINT16_C(25757), UINT16_C(63321), UINT16_C(39052), UINT16_C(51179), UINT16_C(40004), UINT16_C( 115), UINT16_C(46408), UINT16_C(42442), UINT16_C(28746), UINT16_C(61877), UINT16_C(48814), UINT16_C(32486), UINT16_C(54486), UINT16_C( 5580), UINT16_C(57368), UINT16_C(46534), UINT16_C( 8260), UINT16_C(53677), UINT16_C(39096), UINT16_C(64664), UINT16_C( 2868), UINT16_C(31997), UINT16_C(51136), UINT16_C( 2593), UINT16_C(54839), UINT16_C(58875), UINT16_C(57749) }, { UINT16_C(55524), UINT16_C(10763), UINT16_C(12787), UINT16_C( 7268), UINT16_C(34583), UINT16_C(22514), UINT16_C(47345), UINT16_C(19553), UINT16_C(11365), UINT16_C(22383), UINT16_C(24433), UINT16_C(25181), UINT16_C(40418), UINT16_C(37036), UINT16_C(51748), UINT16_C(26801), UINT16_C(48191), UINT16_C(45842), UINT16_C(34669), UINT16_C(33739), UINT16_C(15485), UINT16_C(28622), UINT16_C(48383), UINT16_C(24713), UINT16_C(63467), UINT16_C(59971), UINT16_C(19965), UINT16_C(39265), UINT16_C(55099), UINT16_C(24356), UINT16_C(15521), UINT16_C(65059) } }, { { UINT16_C(27491), UINT16_C(12213), UINT16_C(52608), UINT16_C(17935), UINT16_C(21635), UINT16_C(12390), UINT16_C( 7717), UINT16_C(48584), UINT16_C(64539), UINT16_C( 6344), UINT16_C(35193), UINT16_C(39647), UINT16_C( 5779), UINT16_C(36721), UINT16_C( 1787), UINT16_C(24432), UINT16_C( 9841), UINT16_C(61838), UINT16_C(40691), UINT16_C(30263), UINT16_C(40690), UINT16_C( 6054), UINT16_C(28348), UINT16_C(55252), UINT16_C(40043), UINT16_C(58607), UINT16_C(52773), UINT16_C(47486), UINT16_C(61412), UINT16_C(57416), UINT16_C(47349), UINT16_C(26175) }, UINT32_C(3528969694), { UINT16_C(36715), UINT16_C(23880), UINT16_C(61229), UINT16_C(59764), UINT16_C(18525), UINT16_C(51393), UINT16_C(45285), UINT16_C( 2732), UINT16_C(11135), UINT16_C(25539), UINT16_C( 2842), UINT16_C( 4163), UINT16_C(33476), UINT16_C(41590), UINT16_C(52816), UINT16_C(47988), UINT16_C(48477), UINT16_C(35353), UINT16_C(36268), UINT16_C( 2419), UINT16_C(13526), UINT16_C(48082), UINT16_C(32485), UINT16_C(25797), UINT16_C(35241), UINT16_C(50375), UINT16_C( 2964), UINT16_C(22740), UINT16_C(19085), UINT16_C(56827), UINT16_C(28440), UINT16_C(30105) }, { UINT16_C(45612), UINT16_C(55551), UINT16_C(29503), UINT16_C( 5602), UINT16_C(46247), UINT16_C(36048), UINT16_C(38450), UINT16_C(56560), UINT16_C(47135), UINT16_C(45984), UINT16_C(29891), UINT16_C(20492), UINT16_C( 1982), UINT16_C(55086), UINT16_C(51062), UINT16_C(41804), UINT16_C(19577), UINT16_C(47227), UINT16_C(23999), UINT16_C(26318), UINT16_C(40465), UINT16_C(17651), UINT16_C(58164), UINT16_C(21280), UINT16_C(49307), UINT16_C(24071), UINT16_C( 4916), UINT16_C(62127), UINT16_C(56602), UINT16_C(37065), UINT16_C( 5796), UINT16_C( 7475) }, { UINT16_C(27491), UINT16_C(39716), UINT16_C(45366), UINT16_C(32683), UINT16_C(32386), UINT16_C(12390), UINT16_C(41868), UINT16_C(29646), UINT16_C(29135), UINT16_C( 6344), UINT16_C(16367), UINT16_C(12328), UINT16_C( 5779), UINT16_C(36721), UINT16_C(51939), UINT16_C(44896), UINT16_C(34027), UINT16_C(41290), UINT16_C(30134), UINT16_C(30263), UINT16_C(26996), UINT16_C( 6054), UINT16_C(45325), UINT16_C(55252), UINT16_C(40043), UINT16_C(37223), UINT16_C(52773), UINT16_C(47486), UINT16_C(37844), UINT16_C(57416), UINT16_C(17118), UINT16_C(18790) } }, { { UINT16_C(44898), UINT16_C( 8661), UINT16_C(41740), UINT16_C( 7815), UINT16_C(31298), UINT16_C(30306), UINT16_C(33374), UINT16_C(63946), UINT16_C(53570), UINT16_C(30296), UINT16_C( 2020), UINT16_C(65128), UINT16_C(13028), UINT16_C(34958), UINT16_C(49736), UINT16_C(43685), UINT16_C(31345), UINT16_C(32203), UINT16_C(21022), UINT16_C(24731), UINT16_C(64973), UINT16_C(11222), UINT16_C(41087), UINT16_C(49444), UINT16_C(31857), UINT16_C(21815), UINT16_C(41091), UINT16_C(26451), UINT16_C(58066), UINT16_C( 6895), UINT16_C(38052), UINT16_C( 5572) }, UINT32_C( 764579599), { UINT16_C(12001), UINT16_C(44685), UINT16_C(25387), UINT16_C(43993), UINT16_C(65028), UINT16_C(30060), UINT16_C(42106), UINT16_C(65227), UINT16_C( 7748), UINT16_C( 5733), UINT16_C(21760), UINT16_C(42032), UINT16_C(62697), UINT16_C(63673), UINT16_C(19587), UINT16_C(25637), UINT16_C(45690), UINT16_C(42259), UINT16_C(60438), UINT16_C( 6736), UINT16_C(48618), UINT16_C(25999), UINT16_C(23137), UINT16_C(42339), UINT16_C(51321), UINT16_C(31163), UINT16_C(60189), UINT16_C( 1822), UINT16_C(55263), UINT16_C(25343), UINT16_C( 9507), UINT16_C(40390) }, { UINT16_C(55767), UINT16_C(60739), UINT16_C(37830), UINT16_C(45063), UINT16_C(38736), UINT16_C(45333), UINT16_C(30961), UINT16_C(27222), UINT16_C( 4417), UINT16_C(24292), UINT16_C( 764), UINT16_C(56165), UINT16_C(26073), UINT16_C(64829), UINT16_C( 1162), UINT16_C(24986), UINT16_C(56797), UINT16_C(41807), UINT16_C(22129), UINT16_C(49492), UINT16_C(27117), UINT16_C(57203), UINT16_C(51682), UINT16_C( 9033), UINT16_C(11739), UINT16_C(55169), UINT16_C(59183), UINT16_C( 2483), UINT16_C(61516), UINT16_C(54790), UINT16_C(41204), UINT16_C(53815) }, { UINT16_C(33884), UINT16_C(52712), UINT16_C(31609), UINT16_C(44528), UINT16_C(31298), UINT16_C(30306), UINT16_C(33374), UINT16_C(63946), UINT16_C( 6083), UINT16_C(15013), UINT16_C(11262), UINT16_C(49099), UINT16_C(13028), UINT16_C(34958), UINT16_C(49736), UINT16_C(25312), UINT16_C(31345), UINT16_C(42033), UINT16_C(21022), UINT16_C(24731), UINT16_C(37868), UINT16_C(11222), UINT16_C(41087), UINT16_C(25686), UINT16_C(31530), UINT16_C(21815), UINT16_C(59686), UINT16_C( 2153), UINT16_C(58066), UINT16_C(40067), UINT16_C(38052), UINT16_C( 5572) } }, { { UINT16_C(34430), UINT16_C(61301), UINT16_C(51677), UINT16_C(51888), UINT16_C( 9011), UINT16_C( 5545), UINT16_C(62445), UINT16_C(51256), UINT16_C(47392), UINT16_C(20639), UINT16_C(21152), UINT16_C(60505), UINT16_C(24387), UINT16_C(14274), UINT16_C(64255), UINT16_C(32009), UINT16_C(32640), UINT16_C(23916), UINT16_C( 7496), UINT16_C(31528), UINT16_C(53568), UINT16_C(11664), UINT16_C(51396), UINT16_C(58869), UINT16_C(38274), UINT16_C( 8757), UINT16_C(36583), UINT16_C(10767), UINT16_C(53741), UINT16_C(60514), UINT16_C(27595), UINT16_C(19562) }, UINT32_C( 866768618), { UINT16_C(53747), UINT16_C(13486), UINT16_C(16291), UINT16_C(26465), UINT16_C(22279), UINT16_C(35148), UINT16_C(33260), UINT16_C(54188), UINT16_C(47887), UINT16_C(64766), UINT16_C(24716), UINT16_C(22761), UINT16_C(21451), UINT16_C(46756), UINT16_C(19753), UINT16_C( 7657), UINT16_C(38687), UINT16_C(49745), UINT16_C(45782), UINT16_C(56873), UINT16_C(30217), UINT16_C(62823), UINT16_C( 5111), UINT16_C( 1993), UINT16_C(51150), UINT16_C(23299), UINT16_C(60455), UINT16_C(62131), UINT16_C(22335), UINT16_C(27048), UINT16_C(37284), UINT16_C(50054) }, { UINT16_C(55081), UINT16_C(65413), UINT16_C(44937), UINT16_C(37853), UINT16_C(17701), UINT16_C( 7304), UINT16_C(20824), UINT16_C(10019), UINT16_C(10008), UINT16_C(16258), UINT16_C(13587), UINT16_C(21298), UINT16_C(55948), UINT16_C(12476), UINT16_C(17004), UINT16_C(38388), UINT16_C(31001), UINT16_C(41620), UINT16_C(29224), UINT16_C(19765), UINT16_C(48823), UINT16_C( 3946), UINT16_C(36111), UINT16_C(10294), UINT16_C(47284), UINT16_C(51303), UINT16_C(39405), UINT16_C(31003), UINT16_C(55156), UINT16_C(57514), UINT16_C(40473), UINT16_C(12917) }, { UINT16_C(34430), UINT16_C(39450), UINT16_C(51677), UINT16_C(32159), UINT16_C( 9011), UINT16_C(21226), UINT16_C(27042), UINT16_C(32104), UINT16_C(47392), UINT16_C(40512), UINT16_C(19152), UINT16_C(60505), UINT16_C(38700), UINT16_C(14274), UINT16_C(18379), UINT16_C(23023), UINT16_C(34844), UINT16_C(23916), UINT16_C( 7496), UINT16_C(38319), UINT16_C(53568), UINT16_C(33385), UINT16_C(51396), UINT16_C( 6144), UINT16_C(49217), UINT16_C(37301), UINT16_C(36583), UINT16_C(10767), UINT16_C(38746), UINT16_C(42281), UINT16_C(27595), UINT16_C(19562) } }, { { UINT16_C( 2327), UINT16_C(16596), UINT16_C( 2683), UINT16_C(12941), UINT16_C(63432), UINT16_C(55106), UINT16_C(30853), UINT16_C(14847), UINT16_C(26417), UINT16_C( 7681), UINT16_C( 7168), UINT16_C(29848), UINT16_C(17139), UINT16_C( 3156), UINT16_C(51680), UINT16_C(63294), UINT16_C( 5075), UINT16_C(20023), UINT16_C(50461), UINT16_C(58753), UINT16_C(50108), UINT16_C(16828), UINT16_C(48187), UINT16_C(27771), UINT16_C(31779), UINT16_C( 9099), UINT16_C( 9113), UINT16_C(35992), UINT16_C(60517), UINT16_C(17817), UINT16_C(55222), UINT16_C(35132) }, UINT32_C( 131560682), { UINT16_C(22585), UINT16_C(62956), UINT16_C(43291), UINT16_C(22327), UINT16_C(45669), UINT16_C(35011), UINT16_C(20014), UINT16_C(51115), UINT16_C(17265), UINT16_C(54868), UINT16_C(60720), UINT16_C(58907), UINT16_C(22724), UINT16_C(44911), UINT16_C(18124), UINT16_C( 1462), UINT16_C(41887), UINT16_C(47866), UINT16_C(12620), UINT16_C(45329), UINT16_C(54755), UINT16_C( 4665), UINT16_C(58403), UINT16_C(38361), UINT16_C(11560), UINT16_C(22635), UINT16_C(34586), UINT16_C(57150), UINT16_C(44511), UINT16_C(43918), UINT16_C(17651), UINT16_C(37552) }, { UINT16_C(43751), UINT16_C(13133), UINT16_C(24284), UINT16_C(49124), UINT16_C( 7475), UINT16_C(22481), UINT16_C(43778), UINT16_C(10988), UINT16_C(22488), UINT16_C(62338), UINT16_C(49374), UINT16_C(48594), UINT16_C(24685), UINT16_C(24680), UINT16_C( 6308), UINT16_C(36083), UINT16_C(16579), UINT16_C(40895), UINT16_C(42142), UINT16_C(53854), UINT16_C(12481), UINT16_C(49961), UINT16_C( 5595), UINT16_C(46061), UINT16_C(28524), UINT16_C(19366), UINT16_C(30767), UINT16_C(39944), UINT16_C(29144), UINT16_C(32253), UINT16_C(61577), UINT16_C(19465) }, { UINT16_C( 2327), UINT16_C(38045), UINT16_C( 2683), UINT16_C(35726), UINT16_C(63432), UINT16_C(28746), UINT16_C(31896), UINT16_C(31052), UINT16_C(26417), UINT16_C( 7681), UINT16_C(55047), UINT16_C(29848), UINT16_C(23705), UINT16_C(34796), UINT16_C(12216), UINT16_C(63294), UINT16_C(29233), UINT16_C(44381), UINT16_C(27381), UINT16_C(58753), UINT16_C(33618), UINT16_C(16828), UINT16_C(31999), UINT16_C(42211), UINT16_C(20042), UINT16_C(21001), UINT16_C(32677), UINT16_C(35992), UINT16_C(60517), UINT16_C(17817), UINT16_C(55222), UINT16_C(35132) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi16(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_mask_avg_epu16(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_u16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_avg_epu16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask32 k; const uint16_t a[32]; const uint16_t b[32]; const uint16_t r[32]; } test_vec[] = { { UINT32_C(2786950776), { UINT16_C(13284), UINT16_C(64018), UINT16_C(52116), UINT16_C(52299), UINT16_C(41321), UINT16_C(22662), UINT16_C(38227), UINT16_C(57256), UINT16_C( 6590), UINT16_C(53296), UINT16_C( 8772), UINT16_C(11782), UINT16_C(51609), UINT16_C( 4485), UINT16_C(41543), UINT16_C(11191), UINT16_C(51669), UINT16_C(26917), UINT16_C(28820), UINT16_C(64822), UINT16_C(48145), UINT16_C(25686), UINT16_C(65105), UINT16_C( 3907), UINT16_C(29463), UINT16_C(23519), UINT16_C(58773), UINT16_C(12169), UINT16_C( 3758), UINT16_C(62784), UINT16_C(63665), UINT16_C(34336) }, { UINT16_C(17857), UINT16_C(22256), UINT16_C( 9909), UINT16_C(50771), UINT16_C(43490), UINT16_C(13099), UINT16_C(28327), UINT16_C(48962), UINT16_C( 8674), UINT16_C(30490), UINT16_C(41991), UINT16_C(46502), UINT16_C(59314), UINT16_C(25515), UINT16_C(52191), UINT16_C(41194), UINT16_C(55825), UINT16_C(50934), UINT16_C(18944), UINT16_C(57997), UINT16_C(47347), UINT16_C(39701), UINT16_C(22310), UINT16_C( 2138), UINT16_C(29816), UINT16_C(32640), UINT16_C( 9752), UINT16_C(52021), UINT16_C(57357), UINT16_C(60462), UINT16_C( 6315), UINT16_C(48269) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(51535), UINT16_C(42406), UINT16_C(17881), UINT16_C(33277), UINT16_C( 0), UINT16_C( 0), UINT16_C(41893), UINT16_C(25382), UINT16_C(29142), UINT16_C(55462), UINT16_C(15000), UINT16_C(46867), UINT16_C( 0), UINT16_C(53747), UINT16_C( 0), UINT16_C(23882), UINT16_C(61410), UINT16_C(47746), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(28080), UINT16_C(34263), UINT16_C( 0), UINT16_C( 0), UINT16_C(61623), UINT16_C( 0), UINT16_C(41303) } }, { UINT32_C(4068705266), { UINT16_C( 4301), UINT16_C(49620), UINT16_C(59848), UINT16_C(61020), UINT16_C(46656), UINT16_C(47607), UINT16_C(30506), UINT16_C(17208), UINT16_C(28061), UINT16_C(43790), UINT16_C(15437), UINT16_C(63895), UINT16_C( 9301), UINT16_C(18357), UINT16_C(14504), UINT16_C(30010), UINT16_C( 3656), UINT16_C( 4150), UINT16_C(37624), UINT16_C(14591), UINT16_C(63048), UINT16_C(29681), UINT16_C(10861), UINT16_C( 2742), UINT16_C(50327), UINT16_C(58805), UINT16_C(19712), UINT16_C(21982), UINT16_C(37745), UINT16_C( 6557), UINT16_C(55244), UINT16_C( 5263) }, { UINT16_C(50661), UINT16_C(56613), UINT16_C( 9304), UINT16_C(40982), UINT16_C( 1818), UINT16_C(34579), UINT16_C(51505), UINT16_C(51601), UINT16_C(18317), UINT16_C(36526), UINT16_C(35988), UINT16_C( 1507), UINT16_C(32799), UINT16_C(60191), UINT16_C(44631), UINT16_C(15616), UINT16_C( 9587), UINT16_C(51994), UINT16_C(12361), UINT16_C(25452), UINT16_C(32568), UINT16_C(27114), UINT16_C(31561), UINT16_C(54834), UINT16_C(57538), UINT16_C(22116), UINT16_C(18540), UINT16_C(35932), UINT16_C(31688), UINT16_C( 8311), UINT16_C(30505), UINT16_C(40029) }, { UINT16_C( 0), UINT16_C(53117), UINT16_C( 0), UINT16_C( 0), UINT16_C(24237), UINT16_C(41093), UINT16_C(41006), UINT16_C(34405), UINT16_C(23189), UINT16_C(40158), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(22813), UINT16_C( 6622), UINT16_C(28072), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(28788), UINT16_C( 0), UINT16_C(40461), UINT16_C( 0), UINT16_C( 0), UINT16_C(34717), UINT16_C( 7434), UINT16_C(42875), UINT16_C(22646) } }, { UINT32_C(3848828828), { UINT16_C(54440), UINT16_C(57416), UINT16_C(12883), UINT16_C(40009), UINT16_C(31918), UINT16_C(28787), UINT16_C(55132), UINT16_C(51655), UINT16_C( 8991), UINT16_C(59477), UINT16_C(52382), UINT16_C(50952), UINT16_C(25924), UINT16_C(57443), UINT16_C(52188), UINT16_C(33990), UINT16_C( 3743), UINT16_C(62308), UINT16_C(44609), UINT16_C(61327), UINT16_C( 554), UINT16_C(34399), UINT16_C( 9946), UINT16_C(63823), UINT16_C(42057), UINT16_C(59361), UINT16_C(59761), UINT16_C(46510), UINT16_C( 4686), UINT16_C(11157), UINT16_C(23517), UINT16_C(32175) }, { UINT16_C( 5226), UINT16_C(43888), UINT16_C(65474), UINT16_C(60570), UINT16_C(63746), UINT16_C(56434), UINT16_C(49696), UINT16_C(27093), UINT16_C(46950), UINT16_C(55121), UINT16_C(65440), UINT16_C(61324), UINT16_C( 8721), UINT16_C(61210), UINT16_C(51581), UINT16_C(59244), UINT16_C(56541), UINT16_C(40850), UINT16_C(11483), UINT16_C(56715), UINT16_C(65062), UINT16_C(18105), UINT16_C(36800), UINT16_C( 9903), UINT16_C( 70), UINT16_C(59134), UINT16_C(35328), UINT16_C( 4565), UINT16_C(61356), UINT16_C(10752), UINT16_C(27833), UINT16_C(38417) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C(39179), UINT16_C(50290), UINT16_C(47832), UINT16_C( 0), UINT16_C( 0), UINT16_C(39374), UINT16_C(27971), UINT16_C(57299), UINT16_C(58911), UINT16_C( 0), UINT16_C(17323), UINT16_C(59327), UINT16_C(51885), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(59021), UINT16_C( 0), UINT16_C(26252), UINT16_C(23373), UINT16_C( 0), UINT16_C(21064), UINT16_C( 0), UINT16_C(47545), UINT16_C( 0), UINT16_C( 0), UINT16_C(10955), UINT16_C(25675), UINT16_C(35296) } }, { UINT32_C( 607560776), { UINT16_C(49616), UINT16_C(62977), UINT16_C(48063), UINT16_C(32572), UINT16_C(60490), UINT16_C(37030), UINT16_C(42220), UINT16_C(60534), UINT16_C(19502), UINT16_C(56318), UINT16_C(65083), UINT16_C(62469), UINT16_C( 5739), UINT16_C(45963), UINT16_C(49594), UINT16_C(35799), UINT16_C(55682), UINT16_C(17025), UINT16_C(48788), UINT16_C(57025), UINT16_C(26538), UINT16_C(38510), UINT16_C(58379), UINT16_C(14979), UINT16_C(33072), UINT16_C(27669), UINT16_C( 6783), UINT16_C(60000), UINT16_C(60208), UINT16_C(60318), UINT16_C(30124), UINT16_C(12150) }, { UINT16_C(63310), UINT16_C(57969), UINT16_C(12981), UINT16_C(24512), UINT16_C(11930), UINT16_C(42486), UINT16_C(30995), UINT16_C(17375), UINT16_C(62714), UINT16_C(31151), UINT16_C( 4110), UINT16_C(16228), UINT16_C( 763), UINT16_C(43050), UINT16_C(41079), UINT16_C(50903), UINT16_C(18583), UINT16_C(19880), UINT16_C(27002), UINT16_C( 5292), UINT16_C(41623), UINT16_C(43706), UINT16_C(39195), UINT16_C( 5614), UINT16_C(40334), UINT16_C(40079), UINT16_C(62381), UINT16_C(43483), UINT16_C( 1525), UINT16_C(27729), UINT16_C(10405), UINT16_C(15666) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(28542), UINT16_C( 0), UINT16_C( 0), UINT16_C(36608), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(34597), UINT16_C( 0), UINT16_C( 0), UINT16_C(44507), UINT16_C( 0), UINT16_C(43351), UINT16_C( 0), UINT16_C(18453), UINT16_C(37895), UINT16_C( 0), UINT16_C(34081), UINT16_C(41108), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(34582), UINT16_C( 0), UINT16_C( 0), UINT16_C(44024), UINT16_C( 0), UINT16_C( 0) } }, { UINT32_C(3934968688), { UINT16_C(13892), UINT16_C(56319), UINT16_C(47577), UINT16_C(62598), UINT16_C(29778), UINT16_C(57354), UINT16_C(39185), UINT16_C(49021), UINT16_C(22668), UINT16_C(33128), UINT16_C(47454), UINT16_C( 1005), UINT16_C( 8417), UINT16_C(20800), UINT16_C(51963), UINT16_C(16187), UINT16_C(14849), UINT16_C(55834), UINT16_C(41203), UINT16_C(18126), UINT16_C(55316), UINT16_C( 9766), UINT16_C(41841), UINT16_C(64997), UINT16_C(19964), UINT16_C(23166), UINT16_C(27654), UINT16_C(59229), UINT16_C(40588), UINT16_C(34616), UINT16_C(29544), UINT16_C(27078) }, { UINT16_C(57518), UINT16_C(41283), UINT16_C( 4737), UINT16_C(38375), UINT16_C( 3818), UINT16_C(23739), UINT16_C(41137), UINT16_C(44377), UINT16_C(55533), UINT16_C(62215), UINT16_C(25924), UINT16_C(53466), UINT16_C( 4611), UINT16_C(27479), UINT16_C( 7558), UINT16_C(13525), UINT16_C( 6397), UINT16_C(32469), UINT16_C(48426), UINT16_C( 5396), UINT16_C(53195), UINT16_C(31857), UINT16_C(51824), UINT16_C(23850), UINT16_C(12706), UINT16_C(58961), UINT16_C(11158), UINT16_C(39350), UINT16_C( 3390), UINT16_C(50181), UINT16_C(55850), UINT16_C(10488) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(16798), UINT16_C(40547), UINT16_C(40161), UINT16_C( 0), UINT16_C(39101), UINT16_C(47672), UINT16_C( 0), UINT16_C(27236), UINT16_C( 6514), UINT16_C( 0), UINT16_C(29761), UINT16_C(14856), UINT16_C( 0), UINT16_C(44152), UINT16_C( 0), UINT16_C(11761), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(44424), UINT16_C( 0), UINT16_C(41064), UINT16_C( 0), UINT16_C(49290), UINT16_C( 0), UINT16_C(42399), UINT16_C(42697), UINT16_C(18783) } }, { UINT32_C( 497470962), { UINT16_C(47754), UINT16_C(21810), UINT16_C(41866), UINT16_C(64210), UINT16_C(64621), UINT16_C( 4183), UINT16_C(43053), UINT16_C(50422), UINT16_C(44500), UINT16_C( 4701), UINT16_C(25274), UINT16_C(58838), UINT16_C(52796), UINT16_C(12045), UINT16_C(45979), UINT16_C( 9804), UINT16_C(32366), UINT16_C(63611), UINT16_C(19745), UINT16_C(36594), UINT16_C(18761), UINT16_C(30622), UINT16_C(38386), UINT16_C(50747), UINT16_C(38978), UINT16_C(64728), UINT16_C(44795), UINT16_C(14305), UINT16_C(61052), UINT16_C( 5990), UINT16_C(45730), UINT16_C( 4157) }, { UINT16_C(47408), UINT16_C(20744), UINT16_C(64006), UINT16_C(20704), UINT16_C(32323), UINT16_C(13767), UINT16_C( 531), UINT16_C(22011), UINT16_C(54170), UINT16_C(38226), UINT16_C(13185), UINT16_C(64973), UINT16_C(13090), UINT16_C(50197), UINT16_C(21222), UINT16_C( 5844), UINT16_C(56331), UINT16_C( 4712), UINT16_C(18646), UINT16_C( 6498), UINT16_C(10694), UINT16_C(55887), UINT16_C(18987), UINT16_C(50479), UINT16_C(33054), UINT16_C(40795), UINT16_C(10421), UINT16_C(55197), UINT16_C(45659), UINT16_C(16795), UINT16_C(28420), UINT16_C( 4184) }, { UINT16_C( 0), UINT16_C(21277), UINT16_C( 0), UINT16_C( 0), UINT16_C(48472), UINT16_C( 8975), UINT16_C(21792), UINT16_C(36217), UINT16_C(49335), UINT16_C( 0), UINT16_C(19230), UINT16_C(61906), UINT16_C( 0), UINT16_C( 0), UINT16_C(33601), UINT16_C( 7824), UINT16_C( 0), UINT16_C(34162), UINT16_C(19196), UINT16_C( 0), UINT16_C( 0), UINT16_C(43255), UINT16_C( 0), UINT16_C(50613), UINT16_C(36016), UINT16_C( 0), UINT16_C(27608), UINT16_C(34751), UINT16_C(53356), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { UINT32_C( 555925579), { UINT16_C(33800), UINT16_C(52794), UINT16_C(35245), UINT16_C(55464), UINT16_C(55508), UINT16_C(62109), UINT16_C(63577), UINT16_C( 3729), UINT16_C(11808), UINT16_C(31973), UINT16_C(32992), UINT16_C(58813), UINT16_C( 5615), UINT16_C(15093), UINT16_C( 6101), UINT16_C(56667), UINT16_C(38555), UINT16_C(18604), UINT16_C(21535), UINT16_C(62240), UINT16_C(48428), UINT16_C(34533), UINT16_C(30646), UINT16_C(54932), UINT16_C(31397), UINT16_C(34386), UINT16_C( 4346), UINT16_C(60011), UINT16_C(24613), UINT16_C(64292), UINT16_C(32887), UINT16_C( 4824) }, { UINT16_C(33814), UINT16_C(13658), UINT16_C(31449), UINT16_C( 1321), UINT16_C( 3639), UINT16_C(60811), UINT16_C( 8325), UINT16_C(11204), UINT16_C( 5786), UINT16_C(38065), UINT16_C( 7206), UINT16_C(19582), UINT16_C(41852), UINT16_C(62279), UINT16_C( 7971), UINT16_C(14597), UINT16_C(24484), UINT16_C(32110), UINT16_C(38873), UINT16_C( 4226), UINT16_C( 3750), UINT16_C(11262), UINT16_C(49710), UINT16_C(51286), UINT16_C( 2008), UINT16_C(65372), UINT16_C(56099), UINT16_C(40779), UINT16_C(37502), UINT16_C(41362), UINT16_C(38833), UINT16_C(21978) }, { UINT16_C(33807), UINT16_C(33226), UINT16_C( 0), UINT16_C(28393), UINT16_C( 0), UINT16_C( 0), UINT16_C(35951), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 7036), UINT16_C(35632), UINT16_C( 0), UINT16_C(25357), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(22898), UINT16_C( 0), UINT16_C( 0), UINT16_C(16703), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(52827), UINT16_C( 0), UINT16_C( 0) } }, { UINT32_C(3486664950), { UINT16_C(21984), UINT16_C(34528), UINT16_C(56931), UINT16_C(37297), UINT16_C( 2208), UINT16_C(30809), UINT16_C(46351), UINT16_C(13175), UINT16_C(49808), UINT16_C( 3794), UINT16_C(25940), UINT16_C( 1711), UINT16_C(35324), UINT16_C(62299), UINT16_C(11986), UINT16_C(45762), UINT16_C(41603), UINT16_C(58936), UINT16_C(59776), UINT16_C( 8311), UINT16_C(53489), UINT16_C( 409), UINT16_C( 4229), UINT16_C( 5684), UINT16_C( 1747), UINT16_C(10020), UINT16_C(54379), UINT16_C(26669), UINT16_C(35165), UINT16_C(12123), UINT16_C( 7607), UINT16_C(15073) }, { UINT16_C( 6592), UINT16_C(16416), UINT16_C(38659), UINT16_C(62561), UINT16_C(64103), UINT16_C(60661), UINT16_C(10506), UINT16_C(56578), UINT16_C(10032), UINT16_C(39685), UINT16_C(13051), UINT16_C(22531), UINT16_C(24251), UINT16_C(29320), UINT16_C(27004), UINT16_C(15532), UINT16_C(52355), UINT16_C(34428), UINT16_C(56675), UINT16_C(51834), UINT16_C(28887), UINT16_C(58039), UINT16_C(47513), UINT16_C(51647), UINT16_C(50400), UINT16_C(56165), UINT16_C(26871), UINT16_C(45620), UINT16_C(48327), UINT16_C(17189), UINT16_C(53541), UINT16_C(43135) }, { UINT16_C( 0), UINT16_C(25472), UINT16_C(47795), UINT16_C( 0), UINT16_C(33156), UINT16_C(45735), UINT16_C(28429), UINT16_C(34877), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(12121), UINT16_C( 0), UINT16_C( 0), UINT16_C(19495), UINT16_C( 0), UINT16_C( 0), UINT16_C(46682), UINT16_C( 0), UINT16_C( 0), UINT16_C(41188), UINT16_C( 0), UINT16_C(25871), UINT16_C(28666), UINT16_C(26074), UINT16_C(33093), UINT16_C(40625), UINT16_C(36145), UINT16_C( 0), UINT16_C( 0), UINT16_C(30574), UINT16_C(29104) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_maskz_avg_epu16(test_vec[i].k, a, b); simde_test_x86_assert_equal_u16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_avg_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_avg_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_avg_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_avg_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_avg_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_avg_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_avg_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_avg_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_avg_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_avg_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_avg_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_avg_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_avg_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_avg_epu16) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/blend.c000066400000000000000000004672111400333146700165170ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN blend #include #include #include static int test_simde_mm_mask_blend_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask16 k; const int8_t a[16]; const int8_t b[16]; const int8_t r[16]; } test_vec[] = { { UINT16_C(41706), { INT8_C( 58), INT8_C( 123), INT8_C( 55), -INT8_C( 105), INT8_C( 66), -INT8_C( 8), INT8_C( 94), INT8_C( 11), -INT8_C( 11), INT8_C( 101), -INT8_C( 122), -INT8_C( 31), INT8_C( 88), INT8_C( 88), -INT8_C( 62), -INT8_C( 113) }, { INT8_C( 103), INT8_C( 85), INT8_C( 7), INT8_C( 69), -INT8_C( 53), INT8_C( 39), INT8_C( 12), INT8_C( 74), INT8_C( 112), INT8_C( 47), -INT8_C( 127), -INT8_C( 29), -INT8_C( 72), INT8_C( 107), -INT8_C( 123), -INT8_C( 13) }, { INT8_C( 58), INT8_C( 85), INT8_C( 55), INT8_C( 69), INT8_C( 66), INT8_C( 39), INT8_C( 12), INT8_C( 74), -INT8_C( 11), INT8_C( 47), -INT8_C( 122), -INT8_C( 31), INT8_C( 88), INT8_C( 107), -INT8_C( 62), -INT8_C( 13) } }, { UINT16_C(48358), { -INT8_C( 118), INT8_C( 41), -INT8_C( 75), -INT8_C( 24), INT8_C( 52), -INT8_C( 86), INT8_C( 78), -INT8_C( 70), -INT8_C( 117), -INT8_C( 90), INT8_C( 19), INT8_C( 77), INT8_C( 53), INT8_C( 122), -INT8_C( 94), INT8_C( 60) }, { -INT8_C( 64), INT8_C( 109), INT8_C( 99), -INT8_C( 52), -INT8_C( 73), -INT8_C( 45), -INT8_C( 5), INT8_C( 57), -INT8_C( 74), -INT8_C( 77), -INT8_C( 92), INT8_C( 59), -INT8_C( 90), -INT8_C( 117), -INT8_C( 8), INT8_C( 49) }, { -INT8_C( 118), INT8_C( 109), INT8_C( 99), -INT8_C( 24), INT8_C( 52), -INT8_C( 45), -INT8_C( 5), INT8_C( 57), -INT8_C( 117), -INT8_C( 90), -INT8_C( 92), INT8_C( 59), -INT8_C( 90), -INT8_C( 117), -INT8_C( 94), INT8_C( 49) } }, { UINT16_C(44468), { INT8_C( 25), -INT8_C( 24), INT8_C( 87), INT8_C( 103), -INT8_C( 93), -INT8_C( 30), INT8_C( 13), -INT8_C( 74), INT8_C( 47), INT8_C( 66), INT8_C( 48), -INT8_C( 46), INT8_MAX, -INT8_C( 16), INT8_C( 63), -INT8_C( 30) }, { -INT8_C( 68), -INT8_C( 9), -INT8_C( 74), -INT8_C( 73), INT8_C( 48), INT8_C( 108), INT8_C( 107), -INT8_C( 44), -INT8_C( 88), INT8_C( 17), INT8_C( 95), -INT8_C( 96), INT8_C( 66), INT8_C( 19), INT8_C( 77), INT8_C( 92) }, { INT8_C( 25), -INT8_C( 24), -INT8_C( 74), INT8_C( 103), INT8_C( 48), INT8_C( 108), INT8_C( 13), -INT8_C( 44), -INT8_C( 88), INT8_C( 66), INT8_C( 95), -INT8_C( 96), INT8_MAX, INT8_C( 19), INT8_C( 63), INT8_C( 92) } }, { UINT16_C(42236), { -INT8_C( 61), -INT8_C( 97), -INT8_C( 122), -INT8_C( 47), INT8_C( 85), -INT8_C( 75), INT8_C( 19), -INT8_C( 123), -INT8_C( 121), -INT8_C( 110), INT8_C( 118), -INT8_C( 57), INT8_C( 117), INT8_C( 50), -INT8_C( 66), INT8_C( 43) }, { -INT8_C( 22), -INT8_C( 18), -INT8_C( 105), INT8_C( 85), -INT8_C( 62), INT8_C( 63), INT8_C( 102), INT8_C( 34), -INT8_C( 33), -INT8_C( 87), INT8_C( 53), INT8_C( 44), INT8_C( 5), INT8_C( 49), -INT8_C( 48), -INT8_C( 56) }, { -INT8_C( 61), -INT8_C( 97), -INT8_C( 105), INT8_C( 85), -INT8_C( 62), INT8_C( 63), INT8_C( 102), INT8_C( 34), -INT8_C( 121), -INT8_C( 110), INT8_C( 53), -INT8_C( 57), INT8_C( 117), INT8_C( 49), -INT8_C( 66), -INT8_C( 56) } }, { UINT16_C(22224), { -INT8_C( 103), INT8_C( 37), INT8_C( 12), -INT8_C( 83), -INT8_C( 85), -INT8_C( 109), INT8_C( 63), INT8_C( 33), INT8_C( 90), -INT8_C( 76), INT8_C( 83), INT8_C( 24), -INT8_C( 33), INT8_C( 61), INT8_C( 6), INT8_C( 119) }, { -INT8_C( 110), -INT8_C( 55), -INT8_C( 74), -INT8_C( 7), -INT8_C( 21), -INT8_C( 106), -INT8_C( 94), INT8_C( 32), -INT8_C( 62), -INT8_C( 89), INT8_C( 82), -INT8_C( 109), INT8_C( 111), INT8_C( 34), -INT8_C( 23), INT8_C( 9) }, { -INT8_C( 103), INT8_C( 37), INT8_C( 12), -INT8_C( 83), -INT8_C( 21), -INT8_C( 109), -INT8_C( 94), INT8_C( 32), INT8_C( 90), -INT8_C( 89), INT8_C( 82), INT8_C( 24), INT8_C( 111), INT8_C( 61), -INT8_C( 23), INT8_C( 119) } }, { UINT16_C(62792), { -INT8_C( 74), -INT8_C( 13), -INT8_C( 119), -INT8_C( 11), INT8_C( 20), -INT8_C( 29), -INT8_C( 86), INT8_C( 103), -INT8_C( 4), -INT8_C( 119), -INT8_C( 91), INT8_C( 2), INT8_C( 0), INT8_C( 55), -INT8_C( 53), -INT8_C( 73) }, { INT8_C( 48), -INT8_C( 74), INT8_C( 77), -INT8_C( 46), -INT8_C( 41), INT8_C( 15), INT8_C( 121), INT8_C( 41), -INT8_C( 94), -INT8_C( 23), INT8_C( 75), -INT8_C( 116), -INT8_C( 14), -INT8_C( 109), -INT8_C( 127), -INT8_C( 88) }, { -INT8_C( 74), -INT8_C( 13), -INT8_C( 119), -INT8_C( 46), INT8_C( 20), -INT8_C( 29), INT8_C( 121), INT8_C( 103), -INT8_C( 94), -INT8_C( 119), INT8_C( 75), INT8_C( 2), -INT8_C( 14), -INT8_C( 109), -INT8_C( 127), -INT8_C( 88) } }, { UINT16_C( 2694), { -INT8_C( 99), -INT8_C( 102), -INT8_C( 18), INT8_C( 71), INT8_C( 2), -INT8_C( 22), -INT8_C( 47), -INT8_C( 89), -INT8_C( 20), -INT8_C( 47), -INT8_C( 34), -INT8_C( 72), -INT8_C( 120), INT8_C( 15), INT8_C( 110), -INT8_C( 43) }, { -INT8_C( 31), INT8_C( 69), -INT8_C( 27), INT8_C( 91), INT8_C( 110), -INT8_C( 121), INT8_C( 68), -INT8_C( 70), INT8_C( 19), INT8_C( 54), INT8_C( 77), -INT8_C( 107), -INT8_C( 34), -INT8_C( 44), -INT8_C( 97), INT8_C( 123) }, { -INT8_C( 99), INT8_C( 69), -INT8_C( 27), INT8_C( 71), INT8_C( 2), -INT8_C( 22), -INT8_C( 47), -INT8_C( 70), -INT8_C( 20), INT8_C( 54), -INT8_C( 34), -INT8_C( 107), -INT8_C( 120), INT8_C( 15), INT8_C( 110), -INT8_C( 43) } }, { UINT16_C(36206), { -INT8_C( 61), INT8_C( 112), INT8_C( 119), -INT8_C( 108), INT8_C( 23), INT8_C( 100), INT8_C( 101), -INT8_C( 10), INT8_C( 28), -INT8_C( 18), INT8_C( 5), -INT8_C( 118), -INT8_C( 61), -INT8_C( 26), -INT8_C( 48), -INT8_C( 88) }, { INT8_C( 65), INT8_C( 62), INT8_C( 48), -INT8_C( 123), -INT8_C( 8), INT8_C( 67), -INT8_C( 69), INT8_C( 70), -INT8_C( 40), -INT8_C( 103), INT8_C( 26), INT8_C( 120), INT8_C( 21), -INT8_C( 120), INT8_C( 5), -INT8_C( 40) }, { -INT8_C( 61), INT8_C( 62), INT8_C( 48), -INT8_C( 123), INT8_C( 23), INT8_C( 67), -INT8_C( 69), -INT8_C( 10), -INT8_C( 40), -INT8_C( 18), INT8_C( 26), INT8_C( 120), -INT8_C( 61), -INT8_C( 26), -INT8_C( 48), -INT8_C( 40) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_mask_blend_epi8(test_vec[i].k, a, b); simde_test_x86_assert_equal_i8x16(r, simde_x_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask16 k = simde_test_x86_random_mmask16(); simde__m128i a = simde_test_x86_random_i8x16(); simde__m128i b = simde_test_x86_random_i8x16(); simde__m128i r = simde_mm_mask_blend_epi8(k, a, b); simde_test_x86_write_mmask16(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_mask_blend_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask8 k; const int16_t a[8]; const int16_t b[8]; const int16_t r[8]; } test_vec[] = { { UINT8_C(249), { INT16_C( 27773), -INT16_C( 7920), INT16_C( 1745), -INT16_C( 16387), -INT16_C( 30965), -INT16_C( 3453), INT16_C( 11095), -INT16_C( 27085) }, { -INT16_C( 18085), -INT16_C( 24690), -INT16_C( 11148), INT16_C( 3703), -INT16_C( 4114), INT16_C( 30499), -INT16_C( 1035), INT16_C( 29296) }, { -INT16_C( 18085), -INT16_C( 7920), INT16_C( 1745), INT16_C( 3703), -INT16_C( 4114), INT16_C( 30499), -INT16_C( 1035), INT16_C( 29296) } }, { UINT8_C(103), { INT16_C( 21376), -INT16_C( 30920), -INT16_C( 1968), -INT16_C( 10350), -INT16_C( 31621), -INT16_C( 22993), -INT16_C( 14920), INT16_C( 28930) }, { -INT16_C( 24237), INT16_C( 10469), -INT16_C( 3304), INT16_C( 2070), -INT16_C( 29418), INT16_C( 4605), INT16_C( 28669), INT16_C( 32376) }, { -INT16_C( 24237), INT16_C( 10469), -INT16_C( 3304), -INT16_C( 10350), -INT16_C( 31621), INT16_C( 4605), INT16_C( 28669), INT16_C( 28930) } }, { UINT8_C(194), { INT16_C( 1457), -INT16_C( 22254), -INT16_C( 5737), INT16_C( 7204), -INT16_C( 13800), -INT16_C( 8748), INT16_C( 17868), INT16_C( 27953) }, { INT16_C( 22826), INT16_C( 7814), -INT16_C( 29073), -INT16_C( 716), INT16_C( 18059), -INT16_C( 1286), INT16_C( 30910), INT16_C( 28604) }, { INT16_C( 1457), INT16_C( 7814), -INT16_C( 5737), INT16_C( 7204), -INT16_C( 13800), -INT16_C( 8748), INT16_C( 30910), INT16_C( 28604) } }, { UINT8_C(125), { INT16_C( 6350), -INT16_C( 18667), INT16_C( 12604), INT16_C( 2000), -INT16_C( 21243), INT16_C( 19155), INT16_C( 16862), INT16_C( 14196) }, { -INT16_C( 27961), INT16_C( 21927), -INT16_C( 23353), INT16_C( 3552), -INT16_C( 9570), INT16_C( 6091), INT16_C( 15254), INT16_C( 25748) }, { -INT16_C( 27961), -INT16_C( 18667), -INT16_C( 23353), INT16_C( 3552), -INT16_C( 9570), INT16_C( 6091), INT16_C( 15254), INT16_C( 14196) } }, { UINT8_C( 83), { INT16_C( 7081), -INT16_C( 9584), -INT16_C( 26645), -INT16_C( 26145), INT16_C( 10602), -INT16_C( 21641), -INT16_C( 20578), INT16_C( 12402) }, { -INT16_C( 14506), -INT16_C( 1289), INT16_C( 1191), -INT16_C( 32360), -INT16_C( 20528), INT16_C( 2839), INT16_C( 31556), -INT16_C( 4770) }, { -INT16_C( 14506), -INT16_C( 1289), -INT16_C( 26645), -INT16_C( 26145), -INT16_C( 20528), -INT16_C( 21641), INT16_C( 31556), INT16_C( 12402) } }, { UINT8_C(151), { -INT16_C( 14098), -INT16_C( 31358), INT16_C( 7079), -INT16_C( 11792), -INT16_C( 25709), INT16_C( 17007), -INT16_C( 24818), -INT16_C( 10856) }, { -INT16_C( 28009), -INT16_C( 25731), -INT16_C( 470), -INT16_C( 9621), INT16_C( 30230), -INT16_C( 28386), INT16_C( 3029), -INT16_C( 15576) }, { -INT16_C( 28009), -INT16_C( 25731), -INT16_C( 470), -INT16_C( 11792), INT16_C( 30230), INT16_C( 17007), -INT16_C( 24818), -INT16_C( 15576) } }, { UINT8_C(211), { INT16_C( 18859), -INT16_C( 14725), INT16_C( 19513), -INT16_C( 11175), -INT16_C( 25669), INT16_C( 23266), -INT16_C( 18381), -INT16_C( 14863) }, { -INT16_C( 29387), INT16_C( 13296), -INT16_C( 13576), INT16_C( 28489), -INT16_C( 9240), -INT16_C( 3260), INT16_C( 1795), -INT16_C( 20793) }, { -INT16_C( 29387), INT16_C( 13296), INT16_C( 19513), -INT16_C( 11175), -INT16_C( 9240), INT16_C( 23266), INT16_C( 1795), -INT16_C( 20793) } }, { UINT8_C( 80), { INT16_C( 30018), -INT16_C( 29047), INT16_C( 24270), INT16_C( 27209), -INT16_C( 23744), -INT16_C( 1891), INT16_C( 25493), INT16_C( 8749) }, { INT16_C( 24915), INT16_C( 7450), -INT16_C( 30294), -INT16_C( 31483), -INT16_C( 1843), -INT16_C( 10871), INT16_C( 14271), INT16_C( 293) }, { INT16_C( 30018), -INT16_C( 29047), INT16_C( 24270), INT16_C( 27209), -INT16_C( 1843), -INT16_C( 1891), INT16_C( 14271), INT16_C( 8749) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_mask_blend_epi16(test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x8(r, simde_x_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i b = simde_test_x86_random_i16x8(); simde__m128i r = simde_mm_mask_blend_epi16(k, a, b); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_mask_blend_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask8 k; const int32_t a[4]; const int32_t b[4]; const int32_t r[4]; } test_vec[] = { { UINT8_C( 50), { -INT32_C( 1370076836), -INT32_C( 960805400), INT32_C( 2002808766), INT32_C( 17942020) }, { INT32_C( 955999198), INT32_C( 934509133), INT32_C( 143229169), -INT32_C( 1606769852) }, { -INT32_C( 1370076836), INT32_C( 934509133), INT32_C( 2002808766), INT32_C( 17942020) } }, { UINT8_C(236), { -INT32_C( 690729328), INT32_C( 2023004681), -INT32_C( 1048835077), INT32_C( 2141158684) }, { -INT32_C( 221456520), INT32_C( 182649994), INT32_C( 810544013), -INT32_C( 1239617754) }, { -INT32_C( 690729328), INT32_C( 2023004681), INT32_C( 810544013), -INT32_C( 1239617754) } }, { UINT8_C( 62), { -INT32_C( 1958245135), INT32_C( 730251296), -INT32_C( 1169733828), -INT32_C( 1120745498) }, { -INT32_C( 1756945005), -INT32_C( 198880760), -INT32_C( 1877322335), INT32_C( 1674498162) }, { -INT32_C( 1958245135), -INT32_C( 198880760), -INT32_C( 1877322335), INT32_C( 1674498162) } }, { UINT8_C( 93), { -INT32_C( 696390122), -INT32_C( 1139627659), -INT32_C( 1214001936), INT32_C( 608919807) }, { -INT32_C( 97721688), INT32_C( 1570447367), INT32_C( 198126394), INT32_C( 275264250) }, { -INT32_C( 97721688), -INT32_C( 1139627659), INT32_C( 198126394), INT32_C( 275264250) } }, { UINT8_C( 32), { -INT32_C( 1902778651), -INT32_C( 981511432), INT32_C( 1438922485), INT32_C( 1694361729) }, { INT32_C( 896268052), -INT32_C( 1099970414), -INT32_C( 910656873), -INT32_C( 924137246) }, { -INT32_C( 1902778651), -INT32_C( 981511432), INT32_C( 1438922485), INT32_C( 1694361729) } }, { UINT8_C(174), { -INT32_C( 777628033), INT32_C( 214330325), INT32_C( 395123759), INT32_C( 288157977) }, { INT32_C( 631464285), INT32_C( 1270702544), -INT32_C( 517110247), -INT32_C( 259000976) }, { -INT32_C( 777628033), INT32_C( 1270702544), -INT32_C( 517110247), -INT32_C( 259000976) } }, { UINT8_C( 76), { -INT32_C( 1591623370), -INT32_C( 1529795192), -INT32_C( 1396840261), INT32_C( 1963576852) }, { -INT32_C( 750375054), INT32_C( 1928172012), -INT32_C( 1260138818), -INT32_C( 1811885218) }, { -INT32_C( 1591623370), -INT32_C( 1529795192), -INT32_C( 1260138818), -INT32_C( 1811885218) } }, { UINT8_C(148), { INT32_C( 1327248673), -INT32_C( 284508154), INT32_C( 1275311742), -INT32_C( 272729664) }, { INT32_C( 1356567231), INT32_C( 1292782975), INT32_C( 61588016), -INT32_C( 459784254) }, { INT32_C( 1327248673), -INT32_C( 284508154), INT32_C( 61588016), -INT32_C( 272729664) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_mask_blend_epi32(test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128i a = simde_test_x86_random_i32x4(); simde__m128i b = simde_test_x86_random_i32x4(); simde__m128i r = simde_mm_mask_blend_epi32(k, a, b); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_mask_blend_epi64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask8 k; const int64_t a[2]; const int64_t b[2]; const int64_t r[2]; } test_vec[] = { { UINT8_C(117), { -INT64_C( 868233490955816012), -INT64_C( 8023546470421282962) }, { -INT64_C( 4036531465351072131), INT64_C( 5457357344973913864) }, { -INT64_C( 4036531465351072131), -INT64_C( 8023546470421282962) } }, { UINT8_C(160), { -INT64_C( 932014553403899848), -INT64_C( 7807973341973579900) }, { INT64_C( 7706889779473313680), INT64_C( 3528091987569884392) }, { -INT64_C( 932014553403899848), -INT64_C( 7807973341973579900) } }, { UINT8_C( 13), { -INT64_C( 2654392715038370860), INT64_C( 7256720621612585124) }, { -INT64_C( 3248439389745864028), INT64_C( 906625645223297338) }, { -INT64_C( 3248439389745864028), INT64_C( 7256720621612585124) } }, { UINT8_C(206), { INT64_C( 3147046207213449556), INT64_C( 3005454041982489093) }, { -INT64_C( 1889098808231413676), INT64_C( 3974191252607160157) }, { INT64_C( 3147046207213449556), INT64_C( 3974191252607160157) } }, { UINT8_C(218), { -INT64_C( 4725154603078434855), -INT64_C( 1452288612773823232) }, { INT64_C( 9016053822695368239), INT64_C( 8761356837411911826) }, { -INT64_C( 4725154603078434855), INT64_C( 8761356837411911826) } }, { UINT8_C(215), { -INT64_C( 3950922088230690997), -INT64_C( 1268634622804315962) }, { -INT64_C( 4792224427882568073), INT64_C( 2047121126703512313) }, { -INT64_C( 4792224427882568073), INT64_C( 2047121126703512313) } }, { UINT8_C(181), { INT64_C( 2554325437316614187), INT64_C( 3613182180302039237) }, { -INT64_C( 3942280737969467209), INT64_C( 7934135569143749521) }, { -INT64_C( 3942280737969467209), INT64_C( 3613182180302039237) } }, { UINT8_C(254), { INT64_C( 3795291543367195164), -INT64_C( 8317177106714765247) }, { INT64_C( 626250131139388395), INT64_C( 5634769709070751613) }, { INT64_C( 3795291543367195164), INT64_C( 5634769709070751613) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_mask_blend_epi64(test_vec[i].k, a, b); simde_test_x86_assert_equal_i64x2(r, simde_x_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128i a = simde_test_x86_random_i64x2(); simde__m128i b = simde_test_x86_random_i64x2(); simde__m128i r = simde_mm_mask_blend_epi64(k, a, b); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_mask_blend_ps (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask8 k; const simde_float32 a[8]; const simde_float32 b[8]; const simde_float32 r[8]; } test_vec[] = { { UINT8_C( 39), { SIMDE_FLOAT32_C( -639.84), SIMDE_FLOAT32_C( 741.44), SIMDE_FLOAT32_C( 755.52), SIMDE_FLOAT32_C( -503.25) }, { SIMDE_FLOAT32_C( 89.71), SIMDE_FLOAT32_C( 206.49), SIMDE_FLOAT32_C( -233.67), SIMDE_FLOAT32_C( -362.60) }, { SIMDE_FLOAT32_C( 89.71), SIMDE_FLOAT32_C( 206.49), SIMDE_FLOAT32_C( -233.67), SIMDE_FLOAT32_C( -503.25) } }, { UINT8_C(205), { SIMDE_FLOAT32_C( 180.01), SIMDE_FLOAT32_C( 348.51), SIMDE_FLOAT32_C( 413.37), SIMDE_FLOAT32_C( 694.90) }, { SIMDE_FLOAT32_C( 865.39), SIMDE_FLOAT32_C( 331.86), SIMDE_FLOAT32_C( -871.02), SIMDE_FLOAT32_C( -715.35) }, { SIMDE_FLOAT32_C( 865.39), SIMDE_FLOAT32_C( 348.51), SIMDE_FLOAT32_C( -871.02), SIMDE_FLOAT32_C( -715.35) } }, { UINT8_C(136), { SIMDE_FLOAT32_C( 381.28), SIMDE_FLOAT32_C( -125.11), SIMDE_FLOAT32_C( -174.63), SIMDE_FLOAT32_C( 503.75) }, { SIMDE_FLOAT32_C( 922.00), SIMDE_FLOAT32_C( -613.33), SIMDE_FLOAT32_C( -404.48), SIMDE_FLOAT32_C( 984.72) }, { SIMDE_FLOAT32_C( 381.28), SIMDE_FLOAT32_C( -125.11), SIMDE_FLOAT32_C( -174.63), SIMDE_FLOAT32_C( 984.72) } }, { UINT8_C( 37), { SIMDE_FLOAT32_C( 562.47), SIMDE_FLOAT32_C( -146.22), SIMDE_FLOAT32_C( -88.35), SIMDE_FLOAT32_C( -849.23) }, { SIMDE_FLOAT32_C( 213.94), SIMDE_FLOAT32_C( -346.91), SIMDE_FLOAT32_C( 906.29), SIMDE_FLOAT32_C( 710.69) }, { SIMDE_FLOAT32_C( 213.94), SIMDE_FLOAT32_C( -146.22), SIMDE_FLOAT32_C( 906.29), SIMDE_FLOAT32_C( -849.23) } }, { UINT8_C( 24), { SIMDE_FLOAT32_C( 112.78), SIMDE_FLOAT32_C( -522.98), SIMDE_FLOAT32_C( -619.80), SIMDE_FLOAT32_C( -869.27) }, { SIMDE_FLOAT32_C( 657.03), SIMDE_FLOAT32_C( 728.71), SIMDE_FLOAT32_C( 544.09), SIMDE_FLOAT32_C( 351.94) }, { SIMDE_FLOAT32_C( 112.78), SIMDE_FLOAT32_C( -522.98), SIMDE_FLOAT32_C( -619.80), SIMDE_FLOAT32_C( 351.94) } }, { UINT8_C(213), { SIMDE_FLOAT32_C( -124.04), SIMDE_FLOAT32_C( 480.92), SIMDE_FLOAT32_C( 878.75), SIMDE_FLOAT32_C( 551.80) }, { SIMDE_FLOAT32_C( -137.80), SIMDE_FLOAT32_C( -246.36), SIMDE_FLOAT32_C( -622.83), SIMDE_FLOAT32_C( -634.06) }, { SIMDE_FLOAT32_C( -137.80), SIMDE_FLOAT32_C( 480.92), SIMDE_FLOAT32_C( -622.83), SIMDE_FLOAT32_C( 551.80) } }, { UINT8_C(143), { SIMDE_FLOAT32_C( -236.16), SIMDE_FLOAT32_C( -38.53), SIMDE_FLOAT32_C( -339.64), SIMDE_FLOAT32_C( -945.28) }, { SIMDE_FLOAT32_C( -476.06), SIMDE_FLOAT32_C( 514.14), SIMDE_FLOAT32_C( -33.63), SIMDE_FLOAT32_C( -325.30) }, { SIMDE_FLOAT32_C( -476.06), SIMDE_FLOAT32_C( 514.14), SIMDE_FLOAT32_C( -33.63), SIMDE_FLOAT32_C( -325.30) } }, { UINT8_C(118), { SIMDE_FLOAT32_C( 619.46), SIMDE_FLOAT32_C( -419.01), SIMDE_FLOAT32_C( -561.23), SIMDE_FLOAT32_C( 362.26) }, { SIMDE_FLOAT32_C( 693.78), SIMDE_FLOAT32_C( -84.21), SIMDE_FLOAT32_C( 742.46), SIMDE_FLOAT32_C( 824.50) }, { SIMDE_FLOAT32_C( 619.46), SIMDE_FLOAT32_C( -84.21), SIMDE_FLOAT32_C( 742.46), SIMDE_FLOAT32_C( 362.26) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 b = simde_mm_loadu_ps(test_vec[i].b); simde__m128 r = simde_mm_mask_blend_ps(test_vec[i].k, a, b); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128 a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m128 b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m128 r = simde_mm_mask_blend_ps(k, a, b); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_mask_blend_pd (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask8 k; const simde_float64 a[4]; const simde_float64 b[4]; const simde_float64 r[4]; } test_vec[] = { { UINT8_C( 47), { SIMDE_FLOAT64_C( 471.17), SIMDE_FLOAT64_C( 368.60) }, { SIMDE_FLOAT64_C( 924.77), SIMDE_FLOAT64_C( 65.27) }, { SIMDE_FLOAT64_C( 924.77), SIMDE_FLOAT64_C( 65.27) } }, { UINT8_C(112), { SIMDE_FLOAT64_C( 405.68), SIMDE_FLOAT64_C( -55.98) }, { SIMDE_FLOAT64_C( 796.35), SIMDE_FLOAT64_C( -732.12) }, { SIMDE_FLOAT64_C( 405.68), SIMDE_FLOAT64_C( -55.98) } }, { UINT8_C( 18), { SIMDE_FLOAT64_C( -826.48), SIMDE_FLOAT64_C( -366.18) }, { SIMDE_FLOAT64_C( -626.69), SIMDE_FLOAT64_C( -62.63) }, { SIMDE_FLOAT64_C( -826.48), SIMDE_FLOAT64_C( -62.63) } }, { UINT8_C( 11), { SIMDE_FLOAT64_C( 33.67), SIMDE_FLOAT64_C( -7.91) }, { SIMDE_FLOAT64_C( -880.77), SIMDE_FLOAT64_C( -452.19) }, { SIMDE_FLOAT64_C( -880.77), SIMDE_FLOAT64_C( -452.19) } }, { UINT8_C( 2), { SIMDE_FLOAT64_C( -206.07), SIMDE_FLOAT64_C( 275.89) }, { SIMDE_FLOAT64_C( 577.92), SIMDE_FLOAT64_C( 374.93) }, { SIMDE_FLOAT64_C( -206.07), SIMDE_FLOAT64_C( 374.93) } }, { UINT8_C(106), { SIMDE_FLOAT64_C( -59.82), SIMDE_FLOAT64_C( 68.70) }, { SIMDE_FLOAT64_C( -369.54), SIMDE_FLOAT64_C( -317.37) }, { SIMDE_FLOAT64_C( -59.82), SIMDE_FLOAT64_C( -317.37) } }, { UINT8_C( 48), { SIMDE_FLOAT64_C( 203.29), SIMDE_FLOAT64_C( -846.20) }, { SIMDE_FLOAT64_C( -738.20), SIMDE_FLOAT64_C( 128.05) }, { SIMDE_FLOAT64_C( 203.29), SIMDE_FLOAT64_C( -846.20) } }, { UINT8_C( 75), { SIMDE_FLOAT64_C( -493.65), SIMDE_FLOAT64_C( -466.26) }, { SIMDE_FLOAT64_C( -836.91), SIMDE_FLOAT64_C( -697.29) }, { SIMDE_FLOAT64_C( -836.91), SIMDE_FLOAT64_C( -697.29) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d b = simde_mm_loadu_pd(test_vec[i].b); simde__m128d r = simde_mm_mask_blend_pd(test_vec[i].k, a, b); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128d a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m128d b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m128d r = simde_mm_mask_blend_pd(k, a, b); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask_blend_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask32 k; const int8_t a[32]; const int8_t b[32]; const int8_t r[32]; } test_vec[] = { { UINT16_C(13581), { -INT8_C( 103), INT8_C( 106), -INT8_C( 31), -INT8_C( 9), INT8_C( 61), -INT8_C( 60), INT8_C( 24), INT8_C( 51), INT8_C( 122), INT8_C( 46), INT8_C( 31), -INT8_C( 47), INT8_C( 18), INT8_C( 56), INT8_C( 37), -INT8_C( 4), INT8_C( 97), INT8_C( 104), INT8_C( 26), INT8_C( 25), -INT8_C( 127), -INT8_C( 20), INT8_C( 67), INT8_C( 22), INT8_C( 74), -INT8_C( 12), -INT8_C( 58), -INT8_C( 7), INT8_C( 16), -INT8_C( 45), INT8_C( 46), -INT8_C( 87) }, { INT8_C( 61), INT8_C( 16), -INT8_C( 95), INT8_C( 122), -INT8_C( 44), -INT8_C( 71), -INT8_C( 83), INT8_C( 78), -INT8_C( 25), -INT8_C( 52), INT8_C( 32), -INT8_C( 6), INT8_C( 4), INT8_C( 69), -INT8_C( 10), INT8_C( 102), -INT8_C( 83), INT8_C( 17), INT8_MAX, INT8_C( 46), -INT8_C( 3), -INT8_C( 62), INT8_C( 68), INT8_C( 71), -INT8_C( 73), INT8_C( 10), INT8_C( 65), -INT8_C( 57), -INT8_C( 35), INT8_C( 111), INT8_C( 113), INT8_C( 26) }, { INT8_C( 61), INT8_C( 106), -INT8_C( 95), INT8_C( 122), INT8_C( 61), -INT8_C( 60), INT8_C( 24), INT8_C( 51), -INT8_C( 25), INT8_C( 46), INT8_C( 32), -INT8_C( 47), INT8_C( 4), INT8_C( 69), INT8_C( 37), -INT8_C( 4), INT8_C( 97), INT8_C( 104), INT8_C( 26), INT8_C( 25), -INT8_C( 127), -INT8_C( 20), INT8_C( 67), INT8_C( 22), INT8_C( 74), -INT8_C( 12), -INT8_C( 58), -INT8_C( 7), INT8_C( 16), -INT8_C( 45), INT8_C( 46), -INT8_C( 87) } }, { UINT16_C( 4735), { -INT8_C( 107), INT8_C( 83), -INT8_C( 53), INT8_C( 66), -INT8_C( 94), -INT8_C( 77), INT8_C( 15), -INT8_C( 62), -INT8_C( 83), INT8_C( 19), INT8_C( 7), -INT8_C( 93), INT8_C( 121), -INT8_C( 76), -INT8_C( 76), -INT8_C( 8), -INT8_C( 30), -INT8_C( 79), -INT8_C( 69), INT8_C( 38), -INT8_C( 7), INT8_C( 114), INT8_C( 48), INT8_C( 58), INT8_C( 57), INT8_C( 13), -INT8_C( 87), -INT8_C( 86), INT8_C( 39), INT8_C( 41), -INT8_C( 68), -INT8_C( 68) }, { INT8_C( 124), -INT8_C( 120), -INT8_C( 1), INT8_C( 30), INT8_C( 59), INT8_C( 14), -INT8_C( 32), -INT8_C( 24), INT8_C( 33), -INT8_C( 25), -INT8_C( 117), -INT8_C( 101), -INT8_C( 101), INT8_C( 64), -INT8_C( 109), INT8_C( 125), -INT8_C( 15), INT8_C( 78), -INT8_C( 93), -INT8_C( 22), -INT8_C( 64), -INT8_C( 45), INT8_C( 36), -INT8_C( 6), -INT8_C( 32), -INT8_C( 50), -INT8_C( 92), INT8_C( 8), -INT8_C( 9), INT8_C( 97), -INT8_C( 60), INT8_C( 115) }, { INT8_C( 124), -INT8_C( 120), -INT8_C( 1), INT8_C( 30), INT8_C( 59), INT8_C( 14), -INT8_C( 32), -INT8_C( 62), -INT8_C( 83), -INT8_C( 25), INT8_C( 7), -INT8_C( 93), -INT8_C( 101), -INT8_C( 76), -INT8_C( 76), -INT8_C( 8), -INT8_C( 30), -INT8_C( 79), -INT8_C( 69), INT8_C( 38), -INT8_C( 7), INT8_C( 114), INT8_C( 48), INT8_C( 58), INT8_C( 57), INT8_C( 13), -INT8_C( 87), -INT8_C( 86), INT8_C( 39), INT8_C( 41), -INT8_C( 68), -INT8_C( 68) } }, { UINT16_C(50153), { -INT8_C( 110), INT8_C( 36), -INT8_C( 47), INT8_C( 114), INT8_C( 12), -INT8_C( 13), INT8_C( 90), -INT8_C( 105), -INT8_C( 114), -INT8_C( 11), -INT8_C( 41), INT8_C( 33), INT8_C( 115), -INT8_C( 55), INT8_C( 112), INT8_C( 22), -INT8_C( 77), INT8_C( 48), -INT8_C( 22), -INT8_C( 40), INT8_C( 42), -INT8_C( 54), -INT8_C( 90), -INT8_C( 49), -INT8_C( 46), -INT8_C( 99), INT8_C( 48), -INT8_C( 105), INT8_C( 16), INT8_C( 25), INT8_C( 90), -INT8_C( 94) }, { INT8_C( 61), INT8_C( 44), INT8_C( 21), INT8_C( 73), INT8_C( 31), INT8_C( 111), -INT8_C( 32), -INT8_C( 83), INT8_C( 100), -INT8_C( 72), -INT8_C( 50), -INT8_C( 41), -INT8_C( 127), INT8_C( 62), -INT8_C( 18), INT8_C( 52), INT8_C( 111), -INT8_C( 40), INT8_C( 12), -INT8_C( 103), -INT8_C( 94), -INT8_C( 78), INT8_C( 104), INT8_C( 117), INT8_C( 79), -INT8_C( 104), INT8_C( 12), INT8_C( 96), -INT8_C( 79), INT8_C( 102), INT8_C( 2), -INT8_C( 18) }, { INT8_C( 61), INT8_C( 36), -INT8_C( 47), INT8_C( 73), INT8_C( 12), INT8_C( 111), -INT8_C( 32), -INT8_C( 83), INT8_C( 100), -INT8_C( 72), -INT8_C( 41), INT8_C( 33), INT8_C( 115), -INT8_C( 55), -INT8_C( 18), INT8_C( 52), -INT8_C( 77), INT8_C( 48), -INT8_C( 22), -INT8_C( 40), INT8_C( 42), -INT8_C( 54), -INT8_C( 90), -INT8_C( 49), -INT8_C( 46), -INT8_C( 99), INT8_C( 48), -INT8_C( 105), INT8_C( 16), INT8_C( 25), INT8_C( 90), -INT8_C( 94) } }, { UINT16_C( 6034), { INT8_C( 55), -INT8_C( 79), -INT8_C( 122), INT8_C( 24), INT8_C( 94), -INT8_C( 21), -INT8_C( 48), INT8_C( 45), -INT8_C( 62), INT8_C( 81), INT8_C( 107), -INT8_C( 80), -INT8_C( 123), -INT8_C( 38), -INT8_C( 120), -INT8_C( 110), INT8_C( 116), INT8_C( 43), INT8_C( 68), -INT8_C( 36), -INT8_C( 96), -INT8_C( 108), INT8_C( 117), -INT8_C( 84), -INT8_C( 12), INT8_C( 38), INT8_C( 18), -INT8_C( 10), INT8_C( 21), -INT8_C( 91), INT8_C( 14), INT8_C( 76) }, { INT8_C( 86), -INT8_C( 108), INT8_C( 100), -INT8_C( 75), INT8_MAX, INT8_C( 52), -INT8_C( 30), INT8_C( 66), -INT8_C( 123), INT8_C( 77), -INT8_C( 14), INT8_C( 11), INT8_C( 40), INT8_C( 123), -INT8_C( 99), -INT8_C( 100), -INT8_C( 90), -INT8_C( 31), INT8_C( 120), INT8_C( 70), INT8_C( 117), -INT8_C( 19), -INT8_C( 14), INT8_C( 105), INT8_C( 20), INT8_C( 4), INT8_C( 96), INT8_C( 41), -INT8_C( 87), INT8_C( 110), INT8_C( 117), INT8_C( 0) }, { INT8_C( 55), -INT8_C( 108), -INT8_C( 122), INT8_C( 24), INT8_MAX, -INT8_C( 21), -INT8_C( 48), INT8_C( 66), -INT8_C( 123), INT8_C( 77), -INT8_C( 14), -INT8_C( 80), INT8_C( 40), -INT8_C( 38), -INT8_C( 120), -INT8_C( 110), INT8_C( 116), INT8_C( 43), INT8_C( 68), -INT8_C( 36), -INT8_C( 96), -INT8_C( 108), INT8_C( 117), -INT8_C( 84), -INT8_C( 12), INT8_C( 38), INT8_C( 18), -INT8_C( 10), INT8_C( 21), -INT8_C( 91), INT8_C( 14), INT8_C( 76) } }, { UINT16_C(55810), { -INT8_C( 75), -INT8_C( 126), INT8_C( 14), -INT8_C( 105), -INT8_C( 60), -INT8_C( 108), -INT8_C( 28), -INT8_C( 74), -INT8_C( 97), INT8_C( 12), INT8_C( 49), INT8_C( 60), -INT8_C( 88), -INT8_C( 41), INT8_C( 29), INT8_C( 33), INT8_C( 29), -INT8_C( 109), INT8_C( 14), INT8_C( 15), -INT8_C( 4), INT8_C( 34), INT8_C( 20), INT8_C( 92), INT8_C( 75), -INT8_C( 67), -INT8_C( 54), -INT8_C( 63), -INT8_C( 67), -INT8_C( 51), -INT8_C( 101), INT8_C( 114) }, { INT8_C( 79), -INT8_C( 87), INT8_C( 9), INT8_C( 19), INT8_C( 61), -INT8_C( 18), -INT8_C( 55), -INT8_C( 36), -INT8_C( 6), -INT8_C( 5), INT8_C( 24), -INT8_C( 93), -INT8_C( 46), INT8_C( 54), -INT8_C( 60), -INT8_C( 16), -INT8_C( 55), -INT8_C( 46), -INT8_C( 1), -INT8_C( 59), -INT8_C( 11), INT8_C( 19), INT8_C( 34), INT8_C( 64), -INT8_C( 47), -INT8_C( 20), INT8_C( 1), -INT8_C( 114), -INT8_C( 71), -INT8_C( 100), INT8_C( 1), INT8_C( 8) }, { -INT8_C( 75), -INT8_C( 87), INT8_C( 14), -INT8_C( 105), -INT8_C( 60), -INT8_C( 108), -INT8_C( 28), -INT8_C( 74), -INT8_C( 97), -INT8_C( 5), INT8_C( 49), -INT8_C( 93), -INT8_C( 46), -INT8_C( 41), -INT8_C( 60), -INT8_C( 16), INT8_C( 29), -INT8_C( 109), INT8_C( 14), INT8_C( 15), -INT8_C( 4), INT8_C( 34), INT8_C( 20), INT8_C( 92), INT8_C( 75), -INT8_C( 67), -INT8_C( 54), -INT8_C( 63), -INT8_C( 67), -INT8_C( 51), -INT8_C( 101), INT8_C( 114) } }, { UINT16_C( 2630), { INT8_C( 27), -INT8_C( 125), -INT8_C( 8), -INT8_C( 27), INT8_C( 96), -INT8_C( 13), -INT8_C( 32), INT8_C( 120), -INT8_C( 106), -INT8_C( 78), -INT8_C( 82), INT8_C( 90), -INT8_C( 94), INT8_C( 119), INT8_C( 44), -INT8_C( 94), INT8_C( 61), INT8_C( 33), -INT8_C( 75), INT8_C( 95), INT8_C( 98), -INT8_C( 122), INT8_C( 75), INT8_C( 99), INT8_C( 21), INT8_C( 5), INT8_C( 0), INT8_C( 22), INT8_C( 13), INT8_C( 70), INT8_C( 32), INT8_C( 41) }, { -INT8_C( 55), INT8_C( 25), INT8_C( 14), INT8_C( 41), INT8_C( 12), -INT8_C( 18), -INT8_C( 94), -INT8_C( 94), -INT8_C( 96), INT8_C( 80), -INT8_C( 4), INT8_C( 67), -INT8_C( 56), INT8_C( 40), -INT8_C( 27), INT8_C( 5), INT8_C( 74), -INT8_C( 102), INT8_C( 100), -INT8_C( 84), INT8_C( 33), -INT8_C( 81), INT8_C( 15), INT8_C( 54), -INT8_C( 76), INT8_C( 15), INT8_C( 76), -INT8_C( 62), INT8_C( 85), INT8_C( 108), -INT8_C( 21), INT8_C( 31) }, { INT8_C( 27), INT8_C( 25), INT8_C( 14), -INT8_C( 27), INT8_C( 96), -INT8_C( 13), -INT8_C( 94), INT8_C( 120), -INT8_C( 106), INT8_C( 80), -INT8_C( 82), INT8_C( 67), -INT8_C( 94), INT8_C( 119), INT8_C( 44), -INT8_C( 94), INT8_C( 61), INT8_C( 33), -INT8_C( 75), INT8_C( 95), INT8_C( 98), -INT8_C( 122), INT8_C( 75), INT8_C( 99), INT8_C( 21), INT8_C( 5), INT8_C( 0), INT8_C( 22), INT8_C( 13), INT8_C( 70), INT8_C( 32), INT8_C( 41) } }, { UINT16_C(63877), { INT8_C( 72), -INT8_C( 111), -INT8_C( 25), -INT8_C( 22), INT8_C( 51), -INT8_C( 121), INT8_C( 59), INT8_C( 47), -INT8_C( 54), INT8_C( 3), INT8_C( 88), -INT8_C( 81), INT8_C( 8), -INT8_C( 94), INT8_C( 74), INT8_C( 108), INT8_C( 78), INT8_C( 107), INT8_C( 27), INT8_C( 93), -INT8_C( 95), -INT8_C( 48), INT8_C( 109), -INT8_C( 19), -INT8_C( 110), -INT8_C( 62), INT8_C( 89), INT8_C( 125), -INT8_C( 31), -INT8_C( 33), INT8_C( 118), INT8_C( 42) }, { INT8_C( 112), INT8_C( 93), INT8_C( 20), -INT8_C( 92), -INT8_C( 28), INT8_C( 79), -INT8_C( 45), -INT8_C( 81), INT8_C( 82), INT8_C( 43), INT8_C( 94), INT8_C( 90), -INT8_C( 51), -INT8_C( 88), -INT8_C( 58), INT8_C( 27), INT8_C( 19), -INT8_C( 30), INT8_C( 121), -INT8_C( 76), -INT8_C( 78), -INT8_C( 26), -INT8_C( 95), INT8_C( 68), -INT8_C( 88), -INT8_C( 5), -INT8_C( 63), -INT8_C( 118), -INT8_C( 38), INT8_C( 55), -INT8_C( 76), INT8_C( 74) }, { INT8_C( 112), -INT8_C( 111), INT8_C( 20), -INT8_C( 22), INT8_C( 51), -INT8_C( 121), INT8_C( 59), -INT8_C( 81), INT8_C( 82), INT8_C( 3), INT8_C( 88), INT8_C( 90), -INT8_C( 51), -INT8_C( 88), -INT8_C( 58), INT8_C( 27), INT8_C( 78), INT8_C( 107), INT8_C( 27), INT8_C( 93), -INT8_C( 95), -INT8_C( 48), INT8_C( 109), -INT8_C( 19), -INT8_C( 110), -INT8_C( 62), INT8_C( 89), INT8_C( 125), -INT8_C( 31), -INT8_C( 33), INT8_C( 118), INT8_C( 42) } }, { UINT16_C(51348), { -INT8_C( 18), INT8_C( 120), INT8_C( 24), -INT8_C( 62), INT8_C( 39), INT8_C( 106), -INT8_C( 19), -INT8_C( 122), -INT8_C( 59), -INT8_C( 69), INT8_C( 46), -INT8_C( 117), -INT8_C( 42), INT8_C( 66), INT8_C( 109), INT8_C( 79), -INT8_C( 10), INT8_C( 31), INT8_C( 53), -INT8_C( 104), INT8_C( 99), -INT8_C( 34), -INT8_C( 109), INT8_C( 36), INT8_C( 104), INT8_C( 109), INT8_C( 91), INT8_C( 28), -INT8_C( 73), -INT8_C( 17), -INT8_C( 28), -INT8_C( 90) }, { INT8_C( 104), -INT8_C( 4), INT8_C( 104), -INT8_C( 113), INT8_C( 103), INT8_C( 85), INT8_C( 21), INT8_C( 44), INT8_C( 16), INT8_C( 68), -INT8_C( 73), -INT8_C( 25), -INT8_C( 122), INT8_C( 37), INT8_C( 54), INT8_C( 124), INT8_C( 68), INT8_C( 108), INT8_C( 20), -INT8_C( 88), INT8_C( 74), -INT8_C( 89), -INT8_C( 52), -INT8_C( 78), INT8_C( 20), INT8_C( 40), -INT8_C( 50), -INT8_C( 52), INT8_C( 23), -INT8_C( 78), INT8_C( 114), INT8_MAX }, { -INT8_C( 18), INT8_C( 120), INT8_C( 104), -INT8_C( 62), INT8_C( 103), INT8_C( 106), -INT8_C( 19), INT8_C( 44), -INT8_C( 59), -INT8_C( 69), INT8_C( 46), -INT8_C( 25), -INT8_C( 42), INT8_C( 66), INT8_C( 54), INT8_C( 124), -INT8_C( 10), INT8_C( 31), INT8_C( 53), -INT8_C( 104), INT8_C( 99), -INT8_C( 34), -INT8_C( 109), INT8_C( 36), INT8_C( 104), INT8_C( 109), INT8_C( 91), INT8_C( 28), -INT8_C( 73), -INT8_C( 17), -INT8_C( 28), -INT8_C( 90) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi8(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi8(test_vec[i].b); simde__m256i r = simde_mm256_mask_blend_epi8(test_vec[i].k, a, b); simde_test_x86_assert_equal_i8x32(r, simde_x_mm256_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask16 k = simde_test_x86_random_mmask16(); simde__m256i a = simde_test_x86_random_i8x32(); simde__m256i b = simde_test_x86_random_i8x32(); simde__m256i r = simde_mm256_mask_blend_epi8(k, a, b); simde_test_x86_write_mmask16(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x32(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask_blend_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask16 k; const int16_t a[16]; const int16_t b[16]; const int16_t r[16]; } test_vec[] = { { UINT16_C(24266), { -INT16_C( 14738), -INT16_C( 25037), INT16_C( 7320), INT16_C( 5650), INT16_C( 31105), INT16_C( 30204), INT16_C( 6032), INT16_C( 1329), INT16_C( 15938), INT16_C( 20922), -INT16_C( 27944), -INT16_C( 14965), INT16_C( 4067), INT16_C( 8408), -INT16_C( 23954), -INT16_C( 9090) }, { -INT16_C( 19864), INT16_C( 122), -INT16_C( 29234), INT16_C( 20247), INT16_C( 4870), -INT16_C( 26939), -INT16_C( 2518), INT16_C( 27803), INT16_C( 21812), INT16_C( 3261), INT16_C( 18920), -INT16_C( 13359), -INT16_C( 21928), -INT16_C( 14613), INT16_C( 26956), -INT16_C( 19038) }, { -INT16_C( 14738), INT16_C( 122), INT16_C( 7320), INT16_C( 20247), INT16_C( 31105), INT16_C( 30204), -INT16_C( 2518), INT16_C( 27803), INT16_C( 15938), INT16_C( 3261), INT16_C( 18920), -INT16_C( 13359), -INT16_C( 21928), INT16_C( 8408), INT16_C( 26956), -INT16_C( 9090) } }, { UINT16_C( 7451), { -INT16_C( 5707), -INT16_C( 13142), -INT16_C( 20423), -INT16_C( 289), INT16_C( 2630), -INT16_C( 7692), INT16_C( 10614), INT16_C( 13367), INT16_C( 7989), INT16_C( 1917), -INT16_C( 10774), -INT16_C( 10831), -INT16_C( 613), INT16_C( 15678), INT16_C( 23218), INT16_C( 26714) }, { INT16_C( 1091), INT16_C( 31796), INT16_C( 5300), -INT16_C( 1414), INT16_C( 28446), -INT16_C( 27428), INT16_C( 5016), -INT16_C( 12856), INT16_C( 17714), INT16_C( 7380), -INT16_C( 31462), -INT16_C( 18959), INT16_C( 12163), INT16_C( 13811), INT16_C( 19849), -INT16_C( 12899) }, { INT16_C( 1091), INT16_C( 31796), -INT16_C( 20423), -INT16_C( 1414), INT16_C( 28446), -INT16_C( 7692), INT16_C( 10614), INT16_C( 13367), INT16_C( 17714), INT16_C( 1917), -INT16_C( 31462), -INT16_C( 18959), INT16_C( 12163), INT16_C( 15678), INT16_C( 23218), INT16_C( 26714) } }, { UINT16_C(53842), { INT16_C( 1609), -INT16_C( 15130), INT16_C( 1025), -INT16_C( 8909), -INT16_C( 13416), INT16_C( 25072), INT16_C( 8856), INT16_C( 28070), -INT16_C( 16066), INT16_C( 12274), INT16_C( 30070), INT16_C( 26974), -INT16_C( 5973), INT16_C( 18615), INT16_C( 2485), -INT16_C( 486) }, { INT16_C( 15), INT16_C( 4290), -INT16_C( 2812), -INT16_C( 25107), -INT16_C( 8768), INT16_C( 23038), -INT16_C( 23297), INT16_C( 15814), -INT16_C( 18331), -INT16_C( 9108), -INT16_C( 13522), -INT16_C( 9915), -INT16_C( 845), INT16_C( 26657), INT16_C( 15365), INT16_C( 5478) }, { INT16_C( 1609), INT16_C( 4290), INT16_C( 1025), -INT16_C( 8909), -INT16_C( 8768), INT16_C( 25072), -INT16_C( 23297), INT16_C( 28070), -INT16_C( 16066), -INT16_C( 9108), INT16_C( 30070), INT16_C( 26974), -INT16_C( 845), INT16_C( 18615), INT16_C( 15365), INT16_C( 5478) } }, { UINT16_C(10556), { INT16_C( 16677), INT16_C( 4894), -INT16_C( 8226), -INT16_C( 8976), -INT16_C( 4040), -INT16_C( 384), -INT16_C( 6611), -INT16_C( 25930), -INT16_C( 6974), INT16_C( 1893), INT16_C( 6333), -INT16_C( 8444), INT16_C( 2432), -INT16_C( 6629), INT16_C( 22302), INT16_C( 17423) }, { INT16_C( 11928), INT16_C( 30295), INT16_C( 18189), INT16_C( 17746), -INT16_C( 11465), INT16_C( 25923), -INT16_C( 1607), INT16_C( 31743), INT16_C( 25822), -INT16_C( 25726), -INT16_C( 31108), -INT16_C( 902), -INT16_C( 27248), -INT16_C( 20766), -INT16_C( 3347), -INT16_C( 31246) }, { INT16_C( 16677), INT16_C( 4894), INT16_C( 18189), INT16_C( 17746), -INT16_C( 11465), INT16_C( 25923), -INT16_C( 6611), -INT16_C( 25930), INT16_C( 25822), INT16_C( 1893), INT16_C( 6333), -INT16_C( 902), INT16_C( 2432), -INT16_C( 20766), INT16_C( 22302), INT16_C( 17423) } }, { UINT16_C(18720), { INT16_C( 11772), INT16_C( 20113), -INT16_C( 14222), -INT16_C( 19167), -INT16_C( 9683), INT16_C( 11438), -INT16_C( 29611), -INT16_C( 10096), INT16_C( 3112), -INT16_C( 23970), -INT16_C( 4600), -INT16_C( 5320), INT16_C( 9629), -INT16_C( 28707), -INT16_C( 598), -INT16_C( 22823) }, { INT16_C( 27178), -INT16_C( 25355), INT16_C( 5682), INT16_C( 24657), -INT16_C( 15), INT16_C( 18060), INT16_C( 7564), -INT16_C( 19426), INT16_C( 32041), INT16_C( 12886), -INT16_C( 29077), INT16_C( 2077), -INT16_C( 1357), INT16_C( 24216), INT16_C( 29175), INT16_C( 8452) }, { INT16_C( 11772), INT16_C( 20113), -INT16_C( 14222), -INT16_C( 19167), -INT16_C( 9683), INT16_C( 18060), -INT16_C( 29611), -INT16_C( 10096), INT16_C( 32041), -INT16_C( 23970), -INT16_C( 4600), INT16_C( 2077), INT16_C( 9629), -INT16_C( 28707), INT16_C( 29175), -INT16_C( 22823) } }, { UINT16_C(63963), { INT16_C( 3517), INT16_C( 3600), INT16_C( 365), -INT16_C( 1523), -INT16_C( 26297), INT16_C( 26135), INT16_C( 16461), -INT16_C( 23325), INT16_C( 20082), -INT16_C( 28878), -INT16_C( 6569), -INT16_C( 4215), -INT16_C( 32700), INT16_C( 18528), INT16_C( 15265), INT16_C( 24130) }, { INT16_C( 21064), -INT16_C( 18836), INT16_C( 31315), -INT16_C( 25936), -INT16_C( 14573), INT16_C( 24832), -INT16_C( 7417), INT16_C( 31237), INT16_C( 14130), -INT16_C( 30455), -INT16_C( 27875), INT16_C( 24952), -INT16_C( 10221), -INT16_C( 19030), -INT16_C( 5101), INT16_C( 23315) }, { INT16_C( 21064), -INT16_C( 18836), INT16_C( 365), -INT16_C( 25936), -INT16_C( 14573), INT16_C( 26135), -INT16_C( 7417), INT16_C( 31237), INT16_C( 14130), -INT16_C( 28878), -INT16_C( 6569), INT16_C( 24952), -INT16_C( 10221), -INT16_C( 19030), -INT16_C( 5101), INT16_C( 23315) } }, { UINT16_C(32830), { -INT16_C( 28399), -INT16_C( 15878), INT16_C( 3371), INT16_C( 11400), -INT16_C( 28562), INT16_C( 29455), INT16_C( 16650), INT16_C( 5035), -INT16_C( 14134), INT16_C( 17062), -INT16_C( 17878), -INT16_C( 11238), INT16_C( 11631), -INT16_C( 32064), -INT16_C( 375), -INT16_C( 26110) }, { -INT16_C( 881), -INT16_C( 17828), -INT16_C( 7158), INT16_C( 30950), -INT16_C( 2444), INT16_C( 32492), -INT16_C( 26825), INT16_C( 658), INT16_C( 14431), -INT16_C( 30396), INT16_C( 24562), INT16_C( 24925), INT16_C( 7564), INT16_C( 5604), -INT16_C( 6629), -INT16_C( 21840) }, { -INT16_C( 28399), -INT16_C( 17828), -INT16_C( 7158), INT16_C( 30950), -INT16_C( 2444), INT16_C( 32492), INT16_C( 16650), INT16_C( 5035), -INT16_C( 14134), INT16_C( 17062), -INT16_C( 17878), -INT16_C( 11238), INT16_C( 11631), -INT16_C( 32064), -INT16_C( 375), -INT16_C( 21840) } }, { UINT16_C( 3299), { -INT16_C( 4763), INT16_C( 19440), INT16_C( 25957), INT16_C( 20801), INT16_C( 31203), INT16_C( 30184), INT16_C( 18555), -INT16_C( 16466), -INT16_C( 24367), INT16_C( 12062), -INT16_C( 21758), -INT16_C( 6580), INT16_C( 26816), INT16_C( 28876), -INT16_C( 20718), INT16_C( 30588) }, { INT16_C( 28060), INT16_C( 707), INT16_C( 1234), -INT16_C( 19117), INT16_C( 15485), -INT16_C( 2005), -INT16_C( 9852), INT16_C( 21944), -INT16_C( 10631), INT16_C( 31620), -INT16_C( 11903), INT16_C( 16993), INT16_C( 11833), INT16_C( 19378), INT16_C( 12253), INT16_C( 31427) }, { INT16_C( 28060), INT16_C( 707), INT16_C( 25957), INT16_C( 20801), INT16_C( 31203), -INT16_C( 2005), -INT16_C( 9852), INT16_C( 21944), -INT16_C( 24367), INT16_C( 12062), -INT16_C( 11903), INT16_C( 16993), INT16_C( 26816), INT16_C( 28876), -INT16_C( 20718), INT16_C( 30588) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi16(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi16(test_vec[i].b); simde__m256i r = simde_mm256_mask_blend_epi16(test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x16(r, simde_x_mm256_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask16 k = simde_test_x86_random_mmask16(); simde__m256i a = simde_test_x86_random_i16x16(); simde__m256i b = simde_test_x86_random_i16x16(); simde__m256i r = simde_mm256_mask_blend_epi16(k, a, b); simde_test_x86_write_mmask16(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x16(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask_blend_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask8 k; const int32_t a[8]; const int32_t b[8]; const int32_t r[8]; } test_vec[] = { { UINT8_C(163), { INT32_C( 2132484462), -INT32_C( 747366511), INT32_C( 375182036), INT32_C( 479607640), -INT32_C( 491675589), INT32_C( 215550418), -INT32_C( 1895874057), -INT32_C( 1674407122) }, { INT32_C( 1176194484), INT32_C( 957976677), -INT32_C( 1169132190), -INT32_C( 321395023), INT32_C( 1506707591), -INT32_C( 2006538351), INT32_C( 387343849), -INT32_C( 1380759304) }, { INT32_C( 1176194484), INT32_C( 957976677), INT32_C( 375182036), INT32_C( 479607640), -INT32_C( 491675589), -INT32_C( 2006538351), -INT32_C( 1895874057), -INT32_C( 1380759304) } }, { UINT8_C(150), { INT32_C( 1593570254), -INT32_C( 2101267444), INT32_C( 1798536068), -INT32_C( 621666478), -INT32_C( 1804907539), INT32_C( 394130353), INT32_C( 1376752650), INT32_C( 384351303) }, { -INT32_C( 1133190225), -INT32_C( 1673644776), INT32_C( 50819505), INT32_C( 2111764880), -INT32_C( 166639291), INT32_C( 1192070973), INT32_C( 1805196579), -INT32_C( 1988001063) }, { INT32_C( 1593570254), -INT32_C( 1673644776), INT32_C( 50819505), -INT32_C( 621666478), -INT32_C( 166639291), INT32_C( 394130353), INT32_C( 1376752650), -INT32_C( 1988001063) } }, { UINT8_C(101), { INT32_C( 729630197), -INT32_C( 186901885), INT32_C( 461692705), INT32_C( 123732413), -INT32_C( 1589357038), -INT32_C( 2117760156), -INT32_C( 1504038876), -INT32_C( 1509104719) }, { -INT32_C( 1412331224), -INT32_C( 979391069), INT32_C( 1256203149), INT32_C( 928071716), -INT32_C( 86469226), INT32_C( 1165729056), INT32_C( 2129385165), -INT32_C( 500828231) }, { -INT32_C( 1412331224), -INT32_C( 186901885), INT32_C( 1256203149), INT32_C( 123732413), -INT32_C( 1589357038), INT32_C( 1165729056), INT32_C( 2129385165), -INT32_C( 1509104719) } }, { UINT8_C(129), { -INT32_C( 1541108234), INT32_C( 1345448237), INT32_C( 158694345), INT32_C( 1654697165), INT32_C( 579050116), -INT32_C( 319829994), -INT32_C( 1415221581), -INT32_C( 1993570413) }, { INT32_C( 1110265877), INT32_C( 59924026), -INT32_C( 1492318246), INT32_C( 940158387), INT32_C( 1566215239), INT32_C( 139020628), INT32_C( 1253306295), -INT32_C( 1948983434) }, { INT32_C( 1110265877), INT32_C( 1345448237), INT32_C( 158694345), INT32_C( 1654697165), INT32_C( 579050116), -INT32_C( 319829994), -INT32_C( 1415221581), -INT32_C( 1948983434) } }, { UINT8_C( 47), { INT32_C( 1617546497), INT32_C( 1731882336), INT32_C( 656138618), INT32_C( 2003719146), -INT32_C( 154415955), INT32_C( 78566165), INT32_C( 1702623366), -INT32_C( 829094196) }, { INT32_C( 875495124), -INT32_C( 442800021), INT32_C( 856471113), -INT32_C( 1230341367), INT32_C( 1554871879), -INT32_C( 798926007), INT32_C( 540400723), -INT32_C( 1225864478) }, { INT32_C( 875495124), -INT32_C( 442800021), INT32_C( 856471113), -INT32_C( 1230341367), -INT32_C( 154415955), -INT32_C( 798926007), INT32_C( 1702623366), -INT32_C( 829094196) } }, { UINT8_C(201), { -INT32_C( 2076906980), INT32_C( 1020074630), -INT32_C( 1589247962), INT32_C( 568917163), INT32_C( 74073513), -INT32_C( 2108147034), INT32_C( 979662960), -INT32_C( 2113725594) }, { -INT32_C( 1962526715), INT32_C( 2026427218), INT32_C( 2115636947), -INT32_C( 1264647670), -INT32_C( 306705849), -INT32_C( 1267789756), -INT32_C( 286272376), -INT32_C( 193924369) }, { -INT32_C( 1962526715), INT32_C( 1020074630), -INT32_C( 1589247962), -INT32_C( 1264647670), INT32_C( 74073513), -INT32_C( 2108147034), -INT32_C( 286272376), -INT32_C( 193924369) } }, { UINT8_C( 42), { INT32_C( 1232896118), INT32_C( 1444803912), INT32_C( 291543823), INT32_C( 1163465787), -INT32_C( 578206003), -INT32_C( 1973010762), INT32_C( 528045101), INT32_C( 994733509) }, { INT32_C( 897959661), -INT32_C( 896818501), INT32_C( 2027744317), -INT32_C( 843238144), INT32_C( 833308539), -INT32_C( 1296363131), INT32_C( 718353509), -INT32_C( 1889133662) }, { INT32_C( 1232896118), -INT32_C( 896818501), INT32_C( 291543823), -INT32_C( 843238144), -INT32_C( 578206003), -INT32_C( 1296363131), INT32_C( 528045101), INT32_C( 994733509) } }, { UINT8_C(226), { -INT32_C( 1919040021), INT32_C( 1019897936), INT32_C( 2034058052), INT32_C( 1207175680), -INT32_C( 959699275), INT32_C( 372014817), INT32_C( 1807242832), -INT32_C( 1488107588) }, { INT32_C( 1563749132), -INT32_C( 1751515309), INT32_C( 1108399682), -INT32_C( 1769339679), INT32_C( 207443498), INT32_C( 623020501), -INT32_C( 1668228385), INT32_C( 776199713) }, { -INT32_C( 1919040021), -INT32_C( 1751515309), INT32_C( 2034058052), INT32_C( 1207175680), -INT32_C( 959699275), INT32_C( 623020501), -INT32_C( 1668228385), INT32_C( 776199713) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi32(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi32(test_vec[i].b); simde__m256i r = simde_mm256_mask_blend_epi32(test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x8(r, simde_x_mm256_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256i a = simde_test_x86_random_i32x8(); simde__m256i b = simde_test_x86_random_i32x8(); simde__m256i r = simde_mm256_mask_blend_epi32(k, a, b); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask_blend_epi64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask8 k; const int64_t a[4]; const int64_t b[4]; const int64_t r[4]; } test_vec[] = { { UINT8_C(201), { -INT64_C( 308018130299614344), -INT64_C( 2525830522092061757), -INT64_C( 416347531116413233), -INT64_C( 8138768587312868250) }, { -INT64_C( 6920695771569968684), INT64_C( 1305016133113860283), INT64_C( 752561435714440535), -INT64_C( 6188688236356924298) }, { -INT64_C( 6920695771569968684), -INT64_C( 2525830522092061757), -INT64_C( 416347531116413233), -INT64_C( 6188688236356924298) } }, { UINT8_C(133), { INT64_C( 7466406984093442852), -INT64_C( 3956204483172966552), -INT64_C( 8268760645077528794), INT64_C( 1127604060231552453) }, { INT64_C( 8264264789584382129), INT64_C( 4338176000236214896), -INT64_C( 2564442544032865828), -INT64_C( 8427179542245946350) }, { INT64_C( 8264264789584382129), -INT64_C( 3956204483172966552), -INT64_C( 2564442544032865828), INT64_C( 1127604060231552453) } }, { UINT8_C(172), { -INT64_C( 2521970146282122241), INT64_C( 4721233250972035933), -INT64_C( 7454242660181056971), -INT64_C( 7600952917491545550) }, { -INT64_C( 4669909134846051888), -INT64_C( 8470445899461876335), INT64_C( 3653705726872111952), -INT64_C( 916046964237720036) }, { -INT64_C( 2521970146282122241), INT64_C( 4721233250972035933), INT64_C( 3653705726872111952), -INT64_C( 916046964237720036) } }, { UINT8_C(197), { -INT64_C( 7050412182245718161), -INT64_C( 7597715734445991856), -INT64_C( 8849071482113447038), -INT64_C( 7123678462616214319) }, { -INT64_C( 8936731338221597725), -INT64_C( 1363656740049850958), INT64_C( 4891528711540746749), INT64_C( 784686428748614167) }, { -INT64_C( 8936731338221597725), -INT64_C( 7597715734445991856), INT64_C( 4891528711540746749), -INT64_C( 7123678462616214319) } }, { UINT8_C( 15), { INT64_C( 5630278406940747286), INT64_C( 6761494806443696262), -INT64_C( 6993225919858003543), INT64_C( 3640823970439379379) }, { INT64_C( 1433486689512376373), -INT64_C( 1263892717404184988), INT64_C( 766664538745095409), INT64_C( 7729311888806340598) }, { INT64_C( 1433486689512376373), -INT64_C( 1263892717404184988), INT64_C( 766664538745095409), INT64_C( 7729311888806340598) } }, { UINT8_C(208), { -INT64_C( 9002854097877204506), -INT64_C( 2302264527912687782), INT64_C( 1960263980206723456), INT64_C( 8258120175318026178) }, { -INT64_C( 4528883555939518531), INT64_C( 2797271543470354260), INT64_C( 849095971951610104), -INT64_C( 7791947714046584353) }, { -INT64_C( 9002854097877204506), -INT64_C( 2302264527912687782), INT64_C( 1960263980206723456), INT64_C( 8258120175318026178) } }, { UINT8_C(103), { INT64_C( 5510874724287002362), INT64_C( 4920771090712718186), -INT64_C( 2813737395725404293), INT64_C( 3838828557303176814) }, { INT64_C( 1018330989910037624), -INT64_C( 6430671693439560895), INT64_C( 6141130776168255137), -INT64_C( 6738094050257869044) }, { INT64_C( 1018330989910037624), -INT64_C( 6430671693439560895), INT64_C( 6141130776168255137), INT64_C( 3838828557303176814) } }, { UINT8_C(143), { -INT64_C( 5984930301473490983), INT64_C( 9146102965131964065), INT64_C( 7994095526614486326), -INT64_C( 8216191123138856977) }, { INT64_C( 1150157806066544049), INT64_C( 9172503448356834460), INT64_C( 6595894848746926229), INT64_C( 850546298980737419) }, { INT64_C( 1150157806066544049), INT64_C( 9172503448356834460), INT64_C( 6595894848746926229), INT64_C( 850546298980737419) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi64(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi64(test_vec[i].b); simde__m256i r = simde_mm256_mask_blend_epi64(test_vec[i].k, a, b); simde_test_x86_assert_equal_i64x4(r, simde_x_mm256_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256i a = simde_test_x86_random_i64x4(); simde__m256i b = simde_test_x86_random_i64x4(); simde__m256i r = simde_mm256_mask_blend_epi64(k, a, b); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask_blend_ps (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask8 k; const simde_float32 a[8]; const simde_float32 b[8]; const simde_float32 r[8]; } test_vec[] = { { UINT8_C(145), { SIMDE_FLOAT32_C( -666.04), SIMDE_FLOAT32_C( -910.73), SIMDE_FLOAT32_C( 105.35), SIMDE_FLOAT32_C( -622.43), SIMDE_FLOAT32_C( -805.58), SIMDE_FLOAT32_C( -324.90), SIMDE_FLOAT32_C( 657.71), SIMDE_FLOAT32_C( -101.07) }, { SIMDE_FLOAT32_C( 407.96), SIMDE_FLOAT32_C( 118.11), SIMDE_FLOAT32_C( 128.03), SIMDE_FLOAT32_C( -679.02), SIMDE_FLOAT32_C( 825.56), SIMDE_FLOAT32_C( -692.56), SIMDE_FLOAT32_C( -84.89), SIMDE_FLOAT32_C( 217.59) }, { SIMDE_FLOAT32_C( 407.96), SIMDE_FLOAT32_C( -910.73), SIMDE_FLOAT32_C( 105.35), SIMDE_FLOAT32_C( -622.43), SIMDE_FLOAT32_C( 825.56), SIMDE_FLOAT32_C( -324.90), SIMDE_FLOAT32_C( 657.71), SIMDE_FLOAT32_C( 217.59) } }, { UINT8_C( 44), { SIMDE_FLOAT32_C( -959.89), SIMDE_FLOAT32_C( 319.48), SIMDE_FLOAT32_C( 787.79), SIMDE_FLOAT32_C( -711.80), SIMDE_FLOAT32_C( 905.92), SIMDE_FLOAT32_C( 145.58), SIMDE_FLOAT32_C( 75.27), SIMDE_FLOAT32_C( -317.71) }, { SIMDE_FLOAT32_C( 332.64), SIMDE_FLOAT32_C( -857.18), SIMDE_FLOAT32_C( -296.21), SIMDE_FLOAT32_C( -979.65), SIMDE_FLOAT32_C( 338.40), SIMDE_FLOAT32_C( 320.07), SIMDE_FLOAT32_C( -645.69), SIMDE_FLOAT32_C( 427.67) }, { SIMDE_FLOAT32_C( -959.89), SIMDE_FLOAT32_C( 319.48), SIMDE_FLOAT32_C( -296.21), SIMDE_FLOAT32_C( -979.65), SIMDE_FLOAT32_C( 905.92), SIMDE_FLOAT32_C( 320.07), SIMDE_FLOAT32_C( 75.27), SIMDE_FLOAT32_C( -317.71) } }, { UINT8_C(171), { SIMDE_FLOAT32_C( -268.12), SIMDE_FLOAT32_C( 622.08), SIMDE_FLOAT32_C( 100.51), SIMDE_FLOAT32_C( -610.41), SIMDE_FLOAT32_C( -478.98), SIMDE_FLOAT32_C( -491.52), SIMDE_FLOAT32_C( 507.70), SIMDE_FLOAT32_C( 649.05) }, { SIMDE_FLOAT32_C( -170.54), SIMDE_FLOAT32_C( 333.25), SIMDE_FLOAT32_C( 956.49), SIMDE_FLOAT32_C( 744.56), SIMDE_FLOAT32_C( -449.15), SIMDE_FLOAT32_C( 446.24), SIMDE_FLOAT32_C( 784.67), SIMDE_FLOAT32_C( 870.32) }, { SIMDE_FLOAT32_C( -170.54), SIMDE_FLOAT32_C( 333.25), SIMDE_FLOAT32_C( 100.51), SIMDE_FLOAT32_C( 744.56), SIMDE_FLOAT32_C( -478.98), SIMDE_FLOAT32_C( 446.24), SIMDE_FLOAT32_C( 507.70), SIMDE_FLOAT32_C( 870.32) } }, { UINT8_C(177), { SIMDE_FLOAT32_C( -927.13), SIMDE_FLOAT32_C( 776.24), SIMDE_FLOAT32_C( -620.39), SIMDE_FLOAT32_C( 148.14), SIMDE_FLOAT32_C( -541.47), SIMDE_FLOAT32_C( 712.26), SIMDE_FLOAT32_C( 290.97), SIMDE_FLOAT32_C( 162.32) }, { SIMDE_FLOAT32_C( 732.61), SIMDE_FLOAT32_C( -370.63), SIMDE_FLOAT32_C( -517.61), SIMDE_FLOAT32_C( -913.08), SIMDE_FLOAT32_C( -942.96), SIMDE_FLOAT32_C( -92.20), SIMDE_FLOAT32_C( -181.20), SIMDE_FLOAT32_C( 679.12) }, { SIMDE_FLOAT32_C( 732.61), SIMDE_FLOAT32_C( 776.24), SIMDE_FLOAT32_C( -620.39), SIMDE_FLOAT32_C( 148.14), SIMDE_FLOAT32_C( -942.96), SIMDE_FLOAT32_C( -92.20), SIMDE_FLOAT32_C( 290.97), SIMDE_FLOAT32_C( 679.12) } }, { UINT8_C( 48), { SIMDE_FLOAT32_C( 208.39), SIMDE_FLOAT32_C( -799.86), SIMDE_FLOAT32_C( -483.20), SIMDE_FLOAT32_C( -283.92), SIMDE_FLOAT32_C( 849.19), SIMDE_FLOAT32_C( 346.25), SIMDE_FLOAT32_C( -950.66), SIMDE_FLOAT32_C( 805.68) }, { SIMDE_FLOAT32_C( 90.82), SIMDE_FLOAT32_C( -399.82), SIMDE_FLOAT32_C( 251.91), SIMDE_FLOAT32_C( -124.51), SIMDE_FLOAT32_C( -529.50), SIMDE_FLOAT32_C( -514.06), SIMDE_FLOAT32_C( -51.64), SIMDE_FLOAT32_C( -753.25) }, { SIMDE_FLOAT32_C( 208.39), SIMDE_FLOAT32_C( -799.86), SIMDE_FLOAT32_C( -483.20), SIMDE_FLOAT32_C( -283.92), SIMDE_FLOAT32_C( -529.50), SIMDE_FLOAT32_C( -514.06), SIMDE_FLOAT32_C( -950.66), SIMDE_FLOAT32_C( 805.68) } }, { UINT8_C(184), { SIMDE_FLOAT32_C( -903.50), SIMDE_FLOAT32_C( -294.72), SIMDE_FLOAT32_C( -422.18), SIMDE_FLOAT32_C( 387.47), SIMDE_FLOAT32_C( 867.60), SIMDE_FLOAT32_C( -689.57), SIMDE_FLOAT32_C( -983.16), SIMDE_FLOAT32_C( -650.02) }, { SIMDE_FLOAT32_C( -602.65), SIMDE_FLOAT32_C( -926.12), SIMDE_FLOAT32_C( 257.79), SIMDE_FLOAT32_C( 216.14), SIMDE_FLOAT32_C( 753.00), SIMDE_FLOAT32_C( 266.10), SIMDE_FLOAT32_C( -575.47), SIMDE_FLOAT32_C( 953.14) }, { SIMDE_FLOAT32_C( -903.50), SIMDE_FLOAT32_C( -294.72), SIMDE_FLOAT32_C( -422.18), SIMDE_FLOAT32_C( 216.14), SIMDE_FLOAT32_C( 753.00), SIMDE_FLOAT32_C( 266.10), SIMDE_FLOAT32_C( -983.16), SIMDE_FLOAT32_C( 953.14) } }, { UINT8_C( 73), { SIMDE_FLOAT32_C( 140.61), SIMDE_FLOAT32_C( 802.33), SIMDE_FLOAT32_C( 129.15), SIMDE_FLOAT32_C( 189.95), SIMDE_FLOAT32_C( 608.01), SIMDE_FLOAT32_C( -780.03), SIMDE_FLOAT32_C( 790.13), SIMDE_FLOAT32_C( -140.08) }, { SIMDE_FLOAT32_C( 95.46), SIMDE_FLOAT32_C( -739.36), SIMDE_FLOAT32_C( 345.87), SIMDE_FLOAT32_C( -956.18), SIMDE_FLOAT32_C( -492.61), SIMDE_FLOAT32_C( -788.58), SIMDE_FLOAT32_C( -859.67), SIMDE_FLOAT32_C( 212.67) }, { SIMDE_FLOAT32_C( 95.46), SIMDE_FLOAT32_C( 802.33), SIMDE_FLOAT32_C( 129.15), SIMDE_FLOAT32_C( -956.18), SIMDE_FLOAT32_C( 608.01), SIMDE_FLOAT32_C( -780.03), SIMDE_FLOAT32_C( -859.67), SIMDE_FLOAT32_C( -140.08) } }, { UINT8_C(212), { SIMDE_FLOAT32_C( 527.80), SIMDE_FLOAT32_C( 80.27), SIMDE_FLOAT32_C( 99.67), SIMDE_FLOAT32_C( 544.64), SIMDE_FLOAT32_C( 430.25), SIMDE_FLOAT32_C( 497.01), SIMDE_FLOAT32_C( 618.52), SIMDE_FLOAT32_C( -311.96) }, { SIMDE_FLOAT32_C( -286.84), SIMDE_FLOAT32_C( 371.52), SIMDE_FLOAT32_C( 954.14), SIMDE_FLOAT32_C( 137.69), SIMDE_FLOAT32_C( 324.66), SIMDE_FLOAT32_C( 737.04), SIMDE_FLOAT32_C( -721.70), SIMDE_FLOAT32_C( 127.00) }, { SIMDE_FLOAT32_C( 527.80), SIMDE_FLOAT32_C( 80.27), SIMDE_FLOAT32_C( 954.14), SIMDE_FLOAT32_C( 544.64), SIMDE_FLOAT32_C( 324.66), SIMDE_FLOAT32_C( 497.01), SIMDE_FLOAT32_C( -721.70), SIMDE_FLOAT32_C( 127.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 b = simde_mm256_loadu_ps(test_vec[i].b); simde__m256 r = simde_mm256_mask_blend_ps(test_vec[i].k, a, b); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256 a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m256 b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m256 r = simde_mm256_mask_blend_ps(k, a, b); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask_blend_pd (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask8 k; const simde_float64 a[4]; const simde_float64 b[4]; const simde_float64 r[4]; } test_vec[] = { { UINT8_C(157), { SIMDE_FLOAT64_C( 468.25), SIMDE_FLOAT64_C( -265.00), SIMDE_FLOAT64_C( 86.16), SIMDE_FLOAT64_C( 258.38) }, { SIMDE_FLOAT64_C( 594.93), SIMDE_FLOAT64_C( -818.38), SIMDE_FLOAT64_C( 519.02), SIMDE_FLOAT64_C( -59.21) }, { SIMDE_FLOAT64_C( 594.93), SIMDE_FLOAT64_C( -265.00), SIMDE_FLOAT64_C( 519.02), SIMDE_FLOAT64_C( -59.21) } }, { UINT8_C(212), { SIMDE_FLOAT64_C( -973.59), SIMDE_FLOAT64_C( 152.22), SIMDE_FLOAT64_C( -634.23), SIMDE_FLOAT64_C( 239.07) }, { SIMDE_FLOAT64_C( 941.46), SIMDE_FLOAT64_C( 893.57), SIMDE_FLOAT64_C( -680.66), SIMDE_FLOAT64_C( 41.12) }, { SIMDE_FLOAT64_C( -973.59), SIMDE_FLOAT64_C( 152.22), SIMDE_FLOAT64_C( -680.66), SIMDE_FLOAT64_C( 239.07) } }, { UINT8_C(209), { SIMDE_FLOAT64_C( 749.59), SIMDE_FLOAT64_C( -461.86), SIMDE_FLOAT64_C( 56.73), SIMDE_FLOAT64_C( -562.37) }, { SIMDE_FLOAT64_C( 251.29), SIMDE_FLOAT64_C( -571.74), SIMDE_FLOAT64_C( -608.23), SIMDE_FLOAT64_C( -611.02) }, { SIMDE_FLOAT64_C( 251.29), SIMDE_FLOAT64_C( -461.86), SIMDE_FLOAT64_C( 56.73), SIMDE_FLOAT64_C( -562.37) } }, { UINT8_C(219), { SIMDE_FLOAT64_C( -871.20), SIMDE_FLOAT64_C( -332.71), SIMDE_FLOAT64_C( -120.08), SIMDE_FLOAT64_C( -5.01) }, { SIMDE_FLOAT64_C( -864.46), SIMDE_FLOAT64_C( 614.92), SIMDE_FLOAT64_C( -918.85), SIMDE_FLOAT64_C( 393.92) }, { SIMDE_FLOAT64_C( -864.46), SIMDE_FLOAT64_C( 614.92), SIMDE_FLOAT64_C( -120.08), SIMDE_FLOAT64_C( 393.92) } }, { UINT8_C( 99), { SIMDE_FLOAT64_C( -737.23), SIMDE_FLOAT64_C( -87.06), SIMDE_FLOAT64_C( -849.36), SIMDE_FLOAT64_C( -511.78) }, { SIMDE_FLOAT64_C( -60.65), SIMDE_FLOAT64_C( 302.86), SIMDE_FLOAT64_C( -146.01), SIMDE_FLOAT64_C( -821.58) }, { SIMDE_FLOAT64_C( -60.65), SIMDE_FLOAT64_C( 302.86), SIMDE_FLOAT64_C( -849.36), SIMDE_FLOAT64_C( -511.78) } }, { UINT8_C( 72), { SIMDE_FLOAT64_C( -252.44), SIMDE_FLOAT64_C( -502.24), SIMDE_FLOAT64_C( -714.56), SIMDE_FLOAT64_C( -814.23) }, { SIMDE_FLOAT64_C( -752.65), SIMDE_FLOAT64_C( -176.43), SIMDE_FLOAT64_C( 242.51), SIMDE_FLOAT64_C( -315.02) }, { SIMDE_FLOAT64_C( -252.44), SIMDE_FLOAT64_C( -502.24), SIMDE_FLOAT64_C( -714.56), SIMDE_FLOAT64_C( -315.02) } }, { UINT8_C( 85), { SIMDE_FLOAT64_C( 670.77), SIMDE_FLOAT64_C( 76.74), SIMDE_FLOAT64_C( -536.15), SIMDE_FLOAT64_C( 423.69) }, { SIMDE_FLOAT64_C( 205.55), SIMDE_FLOAT64_C( 131.14), SIMDE_FLOAT64_C( -696.40), SIMDE_FLOAT64_C( -799.46) }, { SIMDE_FLOAT64_C( 205.55), SIMDE_FLOAT64_C( 76.74), SIMDE_FLOAT64_C( -696.40), SIMDE_FLOAT64_C( 423.69) } }, { UINT8_C(104), { SIMDE_FLOAT64_C( 918.52), SIMDE_FLOAT64_C( -718.30), SIMDE_FLOAT64_C( -339.40), SIMDE_FLOAT64_C( 128.37) }, { SIMDE_FLOAT64_C( -455.53), SIMDE_FLOAT64_C( 573.54), SIMDE_FLOAT64_C( 279.01), SIMDE_FLOAT64_C( 32.69) }, { SIMDE_FLOAT64_C( 918.52), SIMDE_FLOAT64_C( -718.30), SIMDE_FLOAT64_C( -339.40), SIMDE_FLOAT64_C( 32.69) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d b = simde_mm256_loadu_pd(test_vec[i].b); simde__m256d r = simde_mm256_mask_blend_pd(test_vec[i].k, a, b); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256d a = simde_test_x86_random_f64x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256d b = simde_test_x86_random_f64x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256d r = simde_mm256_mask_blend_pd(k, a, b); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f64x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_mask_blend_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask64 k; const int8_t a[64]; const int8_t b[64]; const int8_t r[64]; } test_vec[] = { { UINT64_C( 426401881765016573), { INT8_C( 93), -INT8_C( 1), INT8_C( 35), -INT8_C( 77), -INT8_C( 39), INT8_C( 19), INT8_C( 126), INT8_C( 59), INT8_C( 6), -INT8_C( 107), INT8_C( 0), -INT8_C( 1), INT8_C( 9), -INT8_C( 116), -INT8_C( 27), -INT8_C( 68), -INT8_C( 96), -INT8_C( 78), INT8_C( 98), -INT8_C( 106), INT8_C( 39), -INT8_C( 47), INT8_C( 120), INT8_C( 37), INT8_C( 69), -INT8_C( 94), -INT8_C( 69), -INT8_C( 123), -INT8_C( 124), -INT8_C( 91), -INT8_C( 118), -INT8_C( 31), -INT8_C( 92), -INT8_C( 83), -INT8_C( 107), INT8_C( 126), -INT8_C( 63), INT8_C( 19), -INT8_C( 71), -INT8_C( 57), -INT8_C( 88), -INT8_C( 71), -INT8_C( 58), -INT8_C( 79), INT8_C( 69), -INT8_C( 85), INT8_C( 109), -INT8_C( 27), INT8_C( 93), -INT8_C( 48), INT8_C( 123), -INT8_C( 123), -INT8_C( 95), -INT8_C( 12), -INT8_C( 86), -INT8_C( 26), -INT8_C( 106), INT8_C( 101), INT8_C( 108), INT8_C( 26), INT8_C( 11), -INT8_C( 10), -INT8_C( 4), -INT8_C( 81) }, { -INT8_C( 92), -INT8_C( 111), INT8_C( 45), INT8_C( 101), -INT8_C( 92), -INT8_C( 25), INT8_C( 44), INT8_C( 76), -INT8_C( 96), -INT8_C( 14), -INT8_C( 2), -INT8_C( 26), -INT8_C( 99), INT8_C( 107), -INT8_C( 53), -INT8_C( 6), INT8_C( 59), INT8_C( 71), INT8_MAX, -INT8_C( 35), INT8_C( 59), INT8_C( 41), -INT8_C( 61), -INT8_C( 47), -INT8_C( 113), INT8_C( 47), -INT8_C( 21), -INT8_C( 102), INT8_C( 38), -INT8_C( 25), INT8_C( 73), -INT8_C( 54), INT8_C( 120), INT8_C( 119), INT8_C( 47), INT8_C( 28), INT8_C( 94), INT8_C( 91), INT8_C( 105), -INT8_C( 2), INT8_C( 77), INT8_C( 103), -INT8_C( 28), -INT8_C( 22), -INT8_C( 46), -INT8_C( 80), -INT8_C( 28), INT8_C( 14), -INT8_C( 9), INT8_C( 100), -INT8_C( 21), INT8_C( 50), -INT8_C( 115), -INT8_C( 82), INT8_C( 3), INT8_C( 28), -INT8_C( 34), -INT8_C( 18), -INT8_C( 74), INT8_C( 4), -INT8_C( 42), INT8_C( 0), -INT8_C( 50), INT8_C( 78) }, { -INT8_C( 92), -INT8_C( 1), INT8_C( 45), INT8_C( 101), -INT8_C( 92), -INT8_C( 25), INT8_C( 44), INT8_C( 76), -INT8_C( 96), -INT8_C( 14), INT8_C( 0), -INT8_C( 1), -INT8_C( 99), INT8_C( 107), -INT8_C( 53), -INT8_C( 68), INT8_C( 59), -INT8_C( 78), INT8_C( 98), -INT8_C( 35), INT8_C( 39), INT8_C( 41), INT8_C( 120), INT8_C( 37), INT8_C( 69), INT8_C( 47), -INT8_C( 21), -INT8_C( 123), INT8_C( 38), -INT8_C( 91), -INT8_C( 118), -INT8_C( 54), -INT8_C( 92), -INT8_C( 83), -INT8_C( 107), INT8_C( 126), -INT8_C( 63), INT8_C( 19), INT8_C( 105), -INT8_C( 57), -INT8_C( 88), INT8_C( 103), -INT8_C( 58), -INT8_C( 79), INT8_C( 69), -INT8_C( 80), -INT8_C( 28), INT8_C( 14), INT8_C( 93), INT8_C( 100), INT8_C( 123), INT8_C( 50), -INT8_C( 95), -INT8_C( 82), INT8_C( 3), INT8_C( 28), -INT8_C( 34), INT8_C( 101), -INT8_C( 74), INT8_C( 26), INT8_C( 11), -INT8_C( 10), -INT8_C( 4), -INT8_C( 81) } }, { UINT64_C(11949127714346892663), { INT8_C( 59), -INT8_C( 72), -INT8_C( 113), INT8_C( 13), INT8_C( 104), INT8_C( 115), INT8_C( 27), INT8_C( 95), -INT8_C( 41), INT8_C( 6), -INT8_C( 111), INT8_C( 101), -INT8_C( 75), -INT8_C( 108), -INT8_C( 127), -INT8_C( 109), -INT8_C( 126), INT8_C( 56), -INT8_C( 105), INT8_C( 88), INT8_C( 56), INT8_C( 101), -INT8_C( 89), -INT8_C( 81), INT8_C( 98), INT8_C( 18), -INT8_C( 124), -INT8_C( 70), -INT8_C( 26), INT8_C( 87), INT8_C( 95), INT8_C( 33), INT8_C( 15), -INT8_C( 18), INT8_C( 46), INT8_C( 119), INT8_C( 97), INT8_C( 74), -INT8_C( 42), INT8_C( 57), INT8_C( 80), INT8_C( 103), -INT8_C( 98), INT8_C( 5), -INT8_C( 5), INT8_C( 31), -INT8_C( 104), INT8_C( 126), INT8_C( 87), INT8_C( 47), -INT8_C( 42), -INT8_C( 113), -INT8_C( 108), INT8_C( 125), INT8_C( 62), -INT8_C( 10), -INT8_C( 113), -INT8_C( 62), -INT8_C( 80), INT8_C( 117), INT8_C( 26), INT8_C( 15), -INT8_C( 106), INT8_C( 41) }, { -INT8_C( 3), -INT8_C( 59), -INT8_C( 95), INT8_C( 95), INT8_C( 15), INT8_C( 119), -INT8_C( 104), INT8_C( 95), -INT8_C( 33), INT8_C( 54), INT8_C( 101), -INT8_C( 38), INT8_C( 85), -INT8_C( 3), INT8_C( 88), -INT8_C( 83), INT8_C( 45), INT8_C( 47), INT8_C( 60), -INT8_C( 63), -INT8_C( 84), INT8_C( 123), -INT8_C( 72), INT8_C( 60), INT8_C( 61), INT8_C( 104), -INT8_C( 79), INT8_C( 87), INT8_C( 120), INT8_C( 72), -INT8_C( 127), INT8_C( 117), INT8_C( 13), INT8_C( 34), -INT8_C( 44), INT8_C( 28), -INT8_C( 103), INT8_C( 108), INT8_C( 123), INT8_C( 120), -INT8_C( 94), -INT8_C( 32), INT8_C( 83), -INT8_C( 8), -INT8_C( 34), -INT8_C( 85), -INT8_C( 91), INT8_C( 11), -INT8_C( 38), -INT8_C( 31), -INT8_C( 52), -INT8_C( 121), INT8_C( 92), -INT8_C( 124), -INT8_C( 61), -INT8_C( 102), -INT8_C( 19), INT8_C( 116), -INT8_C( 15), INT8_C( 101), -INT8_C( 68), INT8_C( 114), -INT8_C( 38), -INT8_C( 55) }, { -INT8_C( 3), -INT8_C( 59), -INT8_C( 95), INT8_C( 13), INT8_C( 15), INT8_C( 119), -INT8_C( 104), INT8_C( 95), -INT8_C( 33), INT8_C( 6), INT8_C( 101), -INT8_C( 38), INT8_C( 85), -INT8_C( 3), INT8_C( 88), -INT8_C( 83), INT8_C( 45), INT8_C( 47), -INT8_C( 105), -INT8_C( 63), INT8_C( 56), INT8_C( 123), -INT8_C( 72), -INT8_C( 81), INT8_C( 61), INT8_C( 18), -INT8_C( 79), -INT8_C( 70), INT8_C( 120), INT8_C( 87), -INT8_C( 127), INT8_C( 117), INT8_C( 15), -INT8_C( 18), INT8_C( 46), INT8_C( 28), -INT8_C( 103), INT8_C( 74), INT8_C( 123), INT8_C( 57), INT8_C( 80), INT8_C( 103), INT8_C( 83), INT8_C( 5), -INT8_C( 34), INT8_C( 31), -INT8_C( 91), INT8_C( 11), -INT8_C( 38), -INT8_C( 31), -INT8_C( 42), -INT8_C( 113), INT8_C( 92), INT8_C( 125), -INT8_C( 61), -INT8_C( 102), -INT8_C( 19), -INT8_C( 62), -INT8_C( 15), INT8_C( 117), INT8_C( 26), INT8_C( 114), -INT8_C( 106), -INT8_C( 55) } }, { UINT64_C(13737774482719092628), { INT8_C( 65), -INT8_C( 7), -INT8_C( 74), INT8_C( 31), -INT8_C( 91), INT8_C( 91), INT8_C( 42), INT8_MAX, INT8_C( 60), -INT8_C( 9), INT8_C( 6), -INT8_C( 103), INT8_C( 123), -INT8_C( 55), INT8_C( 51), INT8_C( 104), INT8_C( 62), INT8_C( 36), -INT8_C( 51), -INT8_C( 6), -INT8_C( 105), -INT8_C( 88), -INT8_C( 60), INT8_C( 43), INT8_C( 87), -INT8_C( 87), INT8_C( 89), INT8_C( 114), INT8_C( 10), INT8_C( 0), INT8_C( 48), INT8_C( 76), -INT8_C( 7), -INT8_C( 26), INT8_C( 107), -INT8_C( 98), INT8_C( 65), -INT8_C( 106), INT8_C( 30), INT8_C( 126), -INT8_C( 115), INT8_C( 36), INT8_C( 23), INT8_C( 8), -INT8_C( 18), INT8_C( 74), INT8_C( 113), INT8_C( 44), INT8_C( 110), INT8_C( 62), INT8_C( 38), INT8_C( 5), -INT8_C( 26), -INT8_C( 22), INT8_C( 49), INT8_C( 61), -INT8_C( 108), -INT8_C( 118), -INT8_C( 80), -INT8_C( 98), -INT8_C( 118), -INT8_C( 32), -INT8_C( 22), -INT8_C( 124) }, { -INT8_C( 57), INT8_C( 86), INT8_C( 34), INT8_C( 8), -INT8_C( 20), INT8_C( 64), -INT8_C( 122), INT8_C( 121), INT8_C( 101), -INT8_C( 99), -INT8_C( 127), INT8_C( 83), -INT8_C( 25), -INT8_C( 14), INT8_MAX, INT8_C( 86), INT8_C( 49), -INT8_C( 91), INT8_C( 91), INT8_C( 23), -INT8_C( 112), -INT8_C( 116), INT8_C( 85), INT8_C( 36), INT8_C( 23), INT8_C( 5), -INT8_C( 62), -INT8_C( 95), -INT8_C( 27), -INT8_C( 83), INT8_C( 37), -INT8_C( 84), INT8_C( 3), INT8_C( 72), -INT8_C( 75), -INT8_C( 17), -INT8_C( 120), INT8_C( 59), INT8_C( 104), -INT8_C( 19), -INT8_C( 39), -INT8_C( 23), INT8_C( 64), -INT8_C( 64), -INT8_C( 36), -INT8_C( 65), INT8_C( 22), INT8_C( 13), INT8_C( 101), INT8_C( 114), INT8_C( 36), -INT8_C( 11), -INT8_C( 2), INT8_C( 121), INT8_C( 25), INT8_C( 21), INT8_C( 126), -INT8_C( 37), -INT8_C( 73), INT8_C( 100), -INT8_C( 120), -INT8_C( 36), INT8_C( 16), -INT8_C( 117) }, { INT8_C( 65), -INT8_C( 7), INT8_C( 34), INT8_C( 31), -INT8_C( 20), INT8_C( 91), INT8_C( 42), INT8_C( 121), INT8_C( 101), -INT8_C( 99), -INT8_C( 127), INT8_C( 83), INT8_C( 123), -INT8_C( 14), INT8_C( 51), INT8_C( 86), INT8_C( 49), INT8_C( 36), INT8_C( 91), -INT8_C( 6), -INT8_C( 105), -INT8_C( 116), INT8_C( 85), INT8_C( 36), INT8_C( 87), INT8_C( 5), -INT8_C( 62), -INT8_C( 95), INT8_C( 10), -INT8_C( 83), INT8_C( 48), INT8_C( 76), INT8_C( 3), INT8_C( 72), INT8_C( 107), -INT8_C( 17), -INT8_C( 120), -INT8_C( 106), INT8_C( 30), INT8_C( 126), -INT8_C( 39), INT8_C( 36), INT8_C( 23), INT8_C( 8), -INT8_C( 18), -INT8_C( 65), INT8_C( 22), INT8_C( 44), INT8_C( 110), INT8_C( 114), INT8_C( 36), INT8_C( 5), -INT8_C( 26), INT8_C( 121), INT8_C( 49), INT8_C( 21), -INT8_C( 108), -INT8_C( 37), -INT8_C( 73), INT8_C( 100), -INT8_C( 120), -INT8_C( 36), -INT8_C( 22), -INT8_C( 117) } }, { UINT64_C(15752151143515079972), { -INT8_C( 52), -INT8_C( 37), -INT8_C( 102), -INT8_C( 88), -INT8_C( 102), -INT8_C( 79), -INT8_C( 75), -INT8_C( 1), INT8_C( 35), -INT8_C( 39), -INT8_C( 12), INT8_C( 33), INT8_C( 83), INT8_C( 13), INT8_C( 55), -INT8_C( 47), -INT8_C( 23), -INT8_C( 18), INT8_C( 53), INT8_C( 113), -INT8_C( 54), INT8_C( 70), -INT8_C( 3), -INT8_C( 17), INT8_C( 11), INT8_C( 119), -INT8_C( 100), INT8_C( 12), INT8_C( 90), INT8_C( 54), -INT8_C( 26), INT8_C( 38), INT8_C( 17), -INT8_C( 127), -INT8_C( 50), -INT8_C( 84), INT8_C( 50), -INT8_C( 125), -INT8_C( 85), INT8_C( 85), INT8_C( 92), -INT8_C( 96), INT8_C( 118), -INT8_C( 81), -INT8_C( 83), -INT8_C( 83), -INT8_C( 127), -INT8_C( 106), -INT8_C( 101), -INT8_C( 74), INT8_C( 8), INT8_C( 102), -INT8_C( 4), INT8_C( 5), INT8_C( 85), INT8_C( 8), INT8_C( 124), -INT8_C( 15), INT8_C( 20), -INT8_C( 42), INT8_C( 39), -INT8_C( 5), -INT8_C( 4), INT8_C( 57) }, { INT8_C( 124), -INT8_C( 54), -INT8_C( 27), -INT8_C( 82), INT8_C( 77), -INT8_C( 112), INT8_C( 3), -INT8_C( 86), INT8_C( 48), INT8_C( 121), INT8_C( 89), -INT8_C( 34), INT8_C( 39), -INT8_C( 38), INT8_C( 116), -INT8_C( 62), -INT8_C( 111), INT8_C( 124), INT8_C( 40), -INT8_C( 115), -INT8_C( 127), INT8_C( 125), -INT8_C( 107), -INT8_C( 2), INT8_C( 110), -INT8_C( 86), -INT8_C( 44), -INT8_C( 106), -INT8_C( 91), -INT8_C( 47), -INT8_C( 49), INT8_C( 33), -INT8_C( 101), -INT8_C( 76), -INT8_C( 49), -INT8_C( 23), INT8_C( 68), -INT8_C( 46), -INT8_C( 109), INT8_C( 117), INT8_C( 75), -INT8_C( 20), INT8_C( 83), INT8_C( 114), -INT8_C( 57), -INT8_C( 57), INT8_C( 53), INT8_C( 88), INT8_C( 68), INT8_C( 93), -INT8_C( 27), -INT8_C( 59), -INT8_C( 37), INT8_C( 123), -INT8_C( 61), INT8_C( 73), INT8_C( 37), -INT8_C( 104), -INT8_C( 33), -INT8_C( 54), INT8_C( 105), -INT8_C( 82), -INT8_C( 21), INT8_C( 4) }, { -INT8_C( 52), -INT8_C( 37), -INT8_C( 27), -INT8_C( 88), -INT8_C( 102), -INT8_C( 112), -INT8_C( 75), -INT8_C( 1), INT8_C( 48), -INT8_C( 39), INT8_C( 89), INT8_C( 33), INT8_C( 83), INT8_C( 13), INT8_C( 116), -INT8_C( 62), -INT8_C( 23), INT8_C( 124), INT8_C( 53), -INT8_C( 115), -INT8_C( 127), INT8_C( 125), -INT8_C( 107), -INT8_C( 17), INT8_C( 110), INT8_C( 119), -INT8_C( 44), -INT8_C( 106), INT8_C( 90), -INT8_C( 47), -INT8_C( 26), INT8_C( 33), -INT8_C( 101), -INT8_C( 127), -INT8_C( 50), -INT8_C( 84), INT8_C( 50), -INT8_C( 125), -INT8_C( 85), INT8_C( 85), INT8_C( 92), -INT8_C( 20), INT8_C( 118), -INT8_C( 81), -INT8_C( 83), -INT8_C( 57), INT8_C( 53), INT8_C( 88), -INT8_C( 101), INT8_C( 93), INT8_C( 8), -INT8_C( 59), -INT8_C( 37), INT8_C( 5), INT8_C( 85), INT8_C( 73), INT8_C( 124), -INT8_C( 104), INT8_C( 20), -INT8_C( 54), INT8_C( 105), -INT8_C( 5), -INT8_C( 21), INT8_C( 4) } }, { UINT64_C(15500405359103556194), { INT8_C( 109), INT8_C( 111), INT8_C( 74), INT8_C( 52), INT8_C( 54), INT8_MAX, -INT8_C( 116), INT8_C( 122), -INT8_C( 36), INT8_C( 113), INT8_C( 64), -INT8_C( 73), -INT8_C( 20), INT8_C( 3), INT8_C( 1), INT8_C( 17), -INT8_C( 101), -INT8_C( 32), -INT8_C( 37), INT8_C( 4), -INT8_C( 113), -INT8_C( 58), INT8_C( 9), -INT8_C( 15), INT8_MIN, -INT8_C( 10), -INT8_C( 104), INT8_C( 12), INT8_C( 119), -INT8_C( 76), -INT8_C( 28), -INT8_C( 28), INT8_C( 35), INT8_C( 46), INT8_C( 24), INT8_C( 90), -INT8_C( 83), -INT8_C( 92), -INT8_C( 44), -INT8_C( 119), INT8_C( 21), INT8_C( 20), INT8_C( 65), INT8_C( 2), INT8_C( 24), INT8_C( 66), INT8_C( 19), -INT8_C( 77), INT8_C( 34), -INT8_C( 17), -INT8_C( 72), -INT8_C( 79), -INT8_C( 75), -INT8_C( 63), -INT8_C( 93), INT8_C( 54), -INT8_C( 73), INT8_C( 59), INT8_C( 66), INT8_C( 46), -INT8_C( 16), INT8_C( 38), INT8_C( 18), INT8_C( 19) }, { INT8_C( 84), INT8_C( 42), INT8_C( 109), INT8_C( 1), -INT8_C( 50), INT8_C( 66), -INT8_C( 117), -INT8_C( 28), INT8_C( 86), -INT8_C( 52), -INT8_C( 26), INT8_C( 110), INT8_C( 14), -INT8_C( 7), INT8_C( 34), INT8_C( 48), -INT8_C( 24), -INT8_C( 38), -INT8_C( 30), -INT8_C( 98), -INT8_C( 101), -INT8_C( 123), -INT8_C( 44), INT8_C( 82), -INT8_C( 64), INT8_C( 22), -INT8_C( 127), -INT8_C( 80), INT8_C( 61), -INT8_C( 109), -INT8_C( 60), -INT8_C( 111), -INT8_C( 66), INT8_C( 49), -INT8_C( 109), -INT8_C( 116), INT8_C( 115), INT8_C( 30), INT8_C( 112), -INT8_C( 54), -INT8_C( 22), INT8_C( 86), INT8_C( 56), -INT8_C( 8), INT8_C( 80), INT8_C( 90), INT8_C( 40), INT8_C( 56), INT8_C( 52), INT8_C( 10), -INT8_C( 42), -INT8_C( 49), -INT8_C( 113), -INT8_C( 86), INT8_C( 34), INT8_C( 80), -INT8_C( 63), -INT8_C( 93), INT8_C( 0), -INT8_C( 2), INT8_C( 54), -INT8_C( 60), -INT8_C( 113), -INT8_C( 12) }, { INT8_C( 109), INT8_C( 42), INT8_C( 74), INT8_C( 52), INT8_C( 54), INT8_C( 66), -INT8_C( 117), INT8_C( 122), -INT8_C( 36), -INT8_C( 52), INT8_C( 64), INT8_C( 110), INT8_C( 14), -INT8_C( 7), INT8_C( 1), INT8_C( 48), -INT8_C( 24), -INT8_C( 32), -INT8_C( 30), -INT8_C( 98), -INT8_C( 113), -INT8_C( 123), -INT8_C( 44), INT8_C( 82), -INT8_C( 64), INT8_C( 22), -INT8_C( 127), INT8_C( 12), INT8_C( 119), -INT8_C( 109), -INT8_C( 28), -INT8_C( 111), INT8_C( 35), INT8_C( 46), -INT8_C( 109), -INT8_C( 116), -INT8_C( 83), -INT8_C( 92), -INT8_C( 44), -INT8_C( 54), INT8_C( 21), INT8_C( 20), INT8_C( 65), INT8_C( 2), INT8_C( 24), INT8_C( 66), INT8_C( 19), INT8_C( 56), INT8_C( 34), -INT8_C( 17), -INT8_C( 42), -INT8_C( 49), -INT8_C( 113), -INT8_C( 63), -INT8_C( 93), INT8_C( 54), -INT8_C( 63), -INT8_C( 93), INT8_C( 0), INT8_C( 46), INT8_C( 54), INT8_C( 38), -INT8_C( 113), -INT8_C( 12) } }, { UINT64_C( 3041039432355488502), { INT8_C( 72), INT8_C( 108), INT8_C( 34), -INT8_C( 104), -INT8_C( 58), INT8_C( 75), -INT8_C( 48), -INT8_C( 5), INT8_C( 85), -INT8_C( 89), -INT8_C( 54), -INT8_C( 27), INT8_C( 81), -INT8_C( 20), INT8_C( 53), INT8_C( 18), -INT8_C( 113), INT8_C( 53), INT8_C( 16), -INT8_C( 58), -INT8_C( 6), -INT8_C( 96), -INT8_C( 70), -INT8_C( 16), -INT8_C( 62), INT8_C( 59), INT8_C( 89), INT8_C( 3), INT8_C( 45), -INT8_C( 115), INT8_C( 45), INT8_C( 117), -INT8_C( 7), INT8_C( 80), INT8_C( 13), -INT8_C( 65), -INT8_C( 101), -INT8_C( 35), -INT8_C( 70), -INT8_C( 16), -INT8_C( 124), -INT8_C( 123), -INT8_C( 43), -INT8_C( 42), INT8_C( 113), INT8_C( 10), -INT8_C( 24), INT8_C( 1), INT8_C( 64), -INT8_C( 7), -INT8_C( 57), INT8_C( 58), -INT8_C( 103), -INT8_C( 127), INT8_C( 42), INT8_C( 91), -INT8_C( 67), -INT8_C( 125), INT8_C( 94), -INT8_C( 22), INT8_C( 16), -INT8_C( 116), INT8_C( 95), INT8_C( 9) }, { -INT8_C( 36), INT8_C( 108), -INT8_C( 55), INT8_C( 119), INT8_C( 73), -INT8_C( 125), INT8_C( 103), -INT8_C( 50), INT8_C( 8), INT8_C( 61), -INT8_C( 92), INT8_C( 122), INT8_C( 71), -INT8_C( 116), INT8_C( 123), -INT8_C( 121), -INT8_C( 123), INT8_C( 66), -INT8_C( 63), INT8_C( 30), -INT8_C( 61), -INT8_C( 21), INT8_C( 122), INT8_MIN, INT8_C( 111), -INT8_C( 40), INT8_C( 106), INT8_MAX, INT8_C( 100), -INT8_C( 55), -INT8_C( 119), INT8_C( 64), INT8_C( 53), INT8_C( 82), -INT8_C( 73), INT8_MAX, -INT8_C( 43), INT8_C( 31), INT8_C( 77), -INT8_C( 34), INT8_C( 92), -INT8_C( 15), INT8_C( 88), -INT8_C( 93), INT8_C( 125), -INT8_C( 45), INT8_C( 43), INT8_C( 3), INT8_C( 21), -INT8_C( 20), INT8_C( 33), -INT8_C( 40), -INT8_C( 40), -INT8_C( 101), INT8_C( 89), INT8_C( 71), INT8_C( 116), -INT8_C( 61), -INT8_C( 58), -INT8_C( 40), -INT8_C( 115), INT8_C( 79), INT8_C( 25), -INT8_C( 62) }, { INT8_C( 72), INT8_C( 108), -INT8_C( 55), -INT8_C( 104), INT8_C( 73), -INT8_C( 125), INT8_C( 103), -INT8_C( 50), INT8_C( 85), INT8_C( 61), -INT8_C( 54), -INT8_C( 27), INT8_C( 81), -INT8_C( 116), INT8_C( 53), INT8_C( 18), -INT8_C( 123), INT8_C( 53), INT8_C( 16), -INT8_C( 58), -INT8_C( 6), -INT8_C( 96), -INT8_C( 70), INT8_MIN, INT8_C( 111), INT8_C( 59), INT8_C( 89), INT8_MAX, INT8_C( 45), -INT8_C( 55), -INT8_C( 119), INT8_C( 117), -INT8_C( 7), INT8_C( 80), INT8_C( 13), -INT8_C( 65), -INT8_C( 101), -INT8_C( 35), INT8_C( 77), -INT8_C( 16), INT8_C( 92), -INT8_C( 123), -INT8_C( 43), -INT8_C( 42), INT8_C( 125), -INT8_C( 45), INT8_C( 43), INT8_C( 3), INT8_C( 21), -INT8_C( 20), -INT8_C( 57), INT8_C( 58), -INT8_C( 40), -INT8_C( 101), INT8_C( 42), INT8_C( 91), -INT8_C( 67), -INT8_C( 61), INT8_C( 94), -INT8_C( 40), INT8_C( 16), INT8_C( 79), INT8_C( 95), INT8_C( 9) } }, { UINT64_C( 5428402085014130849), { INT8_MAX, -INT8_C( 83), -INT8_C( 17), -INT8_C( 3), INT8_MIN, INT8_C( 26), INT8_C( 0), -INT8_C( 107), INT8_C( 6), INT8_C( 33), INT8_C( 109), -INT8_C( 34), -INT8_C( 67), -INT8_C( 58), INT8_C( 37), INT8_C( 49), -INT8_C( 118), -INT8_C( 20), INT8_C( 9), INT8_C( 23), INT8_C( 59), INT8_C( 34), -INT8_C( 39), -INT8_C( 35), -INT8_C( 13), INT8_C( 27), INT8_C( 84), -INT8_C( 30), -INT8_C( 87), -INT8_C( 87), INT8_C( 46), INT8_C( 41), INT8_C( 86), INT8_C( 29), INT8_C( 38), -INT8_C( 42), INT8_C( 55), INT8_C( 38), INT8_C( 107), INT8_C( 61), INT8_C( 71), -INT8_C( 40), INT8_C( 28), INT8_C( 4), -INT8_C( 97), INT8_C( 65), INT8_C( 53), INT8_C( 41), INT8_C( 45), INT8_C( 63), INT8_C( 64), INT8_C( 105), INT8_C( 97), INT8_C( 25), INT8_C( 70), INT8_C( 84), INT8_C( 52), -INT8_C( 102), INT8_C( 55), -INT8_C( 34), INT8_C( 67), INT8_C( 101), INT8_C( 7), -INT8_C( 103) }, { -INT8_C( 126), INT8_C( 45), INT8_C( 111), -INT8_C( 71), INT8_C( 83), -INT8_C( 38), -INT8_C( 10), -INT8_C( 102), -INT8_C( 78), INT8_C( 18), -INT8_C( 97), INT8_C( 81), INT8_C( 84), -INT8_C( 44), INT8_C( 122), -INT8_C( 127), INT8_C( 19), -INT8_C( 70), -INT8_C( 22), INT8_C( 117), -INT8_C( 44), INT8_C( 48), -INT8_C( 55), INT8_C( 8), -INT8_C( 54), INT8_C( 0), -INT8_C( 26), INT8_C( 13), INT8_C( 101), -INT8_C( 19), -INT8_C( 90), -INT8_C( 25), INT8_C( 26), INT8_C( 21), -INT8_C( 96), INT8_C( 109), -INT8_C( 17), -INT8_C( 105), INT8_C( 8), -INT8_C( 94), -INT8_C( 87), -INT8_C( 89), -INT8_C( 13), -INT8_C( 3), INT8_C( 123), INT8_C( 110), INT8_MAX, -INT8_C( 113), INT8_C( 40), INT8_C( 105), INT8_C( 4), -INT8_C( 4), -INT8_C( 102), -INT8_C( 51), INT8_C( 5), INT8_C( 100), -INT8_C( 50), -INT8_C( 21), INT8_C( 114), INT8_C( 51), -INT8_C( 39), INT8_C( 24), INT8_C( 27), -INT8_C( 13) }, { -INT8_C( 126), -INT8_C( 83), -INT8_C( 17), -INT8_C( 3), INT8_MIN, -INT8_C( 38), INT8_C( 0), -INT8_C( 102), INT8_C( 6), INT8_C( 33), INT8_C( 109), -INT8_C( 34), INT8_C( 84), -INT8_C( 58), INT8_C( 122), -INT8_C( 127), INT8_C( 19), -INT8_C( 20), INT8_C( 9), INT8_C( 23), INT8_C( 59), INT8_C( 34), -INT8_C( 55), -INT8_C( 35), -INT8_C( 54), INT8_C( 0), -INT8_C( 26), -INT8_C( 30), INT8_C( 101), -INT8_C( 19), -INT8_C( 90), INT8_C( 41), INT8_C( 26), INT8_C( 21), -INT8_C( 96), INT8_C( 109), INT8_C( 55), -INT8_C( 105), INT8_C( 8), -INT8_C( 94), INT8_C( 71), -INT8_C( 89), -INT8_C( 13), -INT8_C( 3), -INT8_C( 97), INT8_C( 65), INT8_C( 53), -INT8_C( 113), INT8_C( 40), INT8_C( 63), INT8_C( 4), INT8_C( 105), -INT8_C( 102), INT8_C( 25), INT8_C( 5), INT8_C( 84), -INT8_C( 50), -INT8_C( 21), INT8_C( 55), INT8_C( 51), INT8_C( 67), INT8_C( 101), INT8_C( 27), -INT8_C( 103) } }, { UINT64_C(18212391219510754094), { INT8_C( 16), -INT8_C( 77), -INT8_C( 7), -INT8_C( 117), INT8_C( 33), INT8_C( 120), INT8_C( 26), INT8_C( 73), -INT8_C( 30), INT8_C( 30), INT8_C( 70), INT8_C( 124), -INT8_C( 20), INT8_C( 75), -INT8_C( 32), -INT8_C( 70), INT8_C( 54), INT8_C( 82), -INT8_C( 19), INT8_C( 15), INT8_C( 107), INT8_C( 8), INT8_C( 3), -INT8_C( 103), -INT8_C( 60), INT8_C( 100), -INT8_C( 74), INT8_C( 22), -INT8_C( 51), INT8_C( 118), INT8_C( 18), -INT8_C( 35), INT8_C( 41), INT8_C( 12), INT8_C( 104), INT8_C( 74), -INT8_C( 124), -INT8_C( 125), -INT8_C( 109), INT8_C( 102), -INT8_C( 95), -INT8_C( 39), -INT8_C( 30), -INT8_C( 115), INT8_C( 36), -INT8_C( 61), INT8_C( 71), INT8_C( 91), INT8_C( 21), INT8_C( 53), INT8_C( 106), INT8_MIN, INT8_C( 61), INT8_C( 109), INT8_C( 25), INT8_C( 1), -INT8_C( 47), -INT8_C( 48), INT8_C( 24), -INT8_C( 98), INT8_C( 70), INT8_C( 42), INT8_C( 123), INT8_C( 111) }, { INT8_C( 54), -INT8_C( 28), -INT8_C( 71), -INT8_C( 69), INT8_C( 103), INT8_C( 76), INT8_C( 33), INT8_C( 8), INT8_C( 38), INT8_C( 4), -INT8_C( 106), INT8_C( 74), -INT8_C( 57), -INT8_C( 35), -INT8_C( 91), -INT8_C( 36), INT8_C( 18), INT8_C( 16), INT8_C( 93), INT8_C( 80), INT8_C( 125), INT8_C( 118), INT8_C( 81), INT8_C( 79), INT8_C( 70), INT8_C( 105), -INT8_C( 19), -INT8_C( 116), -INT8_C( 108), INT8_C( 105), -INT8_C( 5), -INT8_C( 54), INT8_C( 77), -INT8_C( 76), -INT8_C( 123), -INT8_C( 76), INT8_C( 1), -INT8_C( 89), -INT8_C( 68), INT8_C( 39), -INT8_C( 85), INT8_C( 82), INT8_C( 113), INT8_C( 114), INT8_C( 48), INT8_C( 23), INT8_C( 78), INT8_C( 66), INT8_C( 39), -INT8_C( 85), -INT8_C( 110), -INT8_C( 92), INT8_C( 34), -INT8_C( 28), -INT8_C( 13), INT8_C( 104), INT8_C( 77), -INT8_C( 31), -INT8_C( 11), -INT8_C( 31), INT8_C( 74), -INT8_C( 16), -INT8_C( 84), -INT8_C( 105) }, { INT8_C( 16), -INT8_C( 28), -INT8_C( 71), -INT8_C( 69), INT8_C( 33), INT8_C( 76), INT8_C( 26), INT8_C( 73), INT8_C( 38), INT8_C( 4), INT8_C( 70), INT8_C( 74), -INT8_C( 57), -INT8_C( 35), -INT8_C( 32), -INT8_C( 36), INT8_C( 18), INT8_C( 82), -INT8_C( 19), INT8_C( 15), INT8_C( 107), INT8_C( 118), INT8_C( 81), -INT8_C( 103), INT8_C( 70), INT8_C( 100), -INT8_C( 19), -INT8_C( 116), -INT8_C( 108), INT8_C( 118), INT8_C( 18), -INT8_C( 35), INT8_C( 41), -INT8_C( 76), INT8_C( 104), INT8_C( 74), INT8_C( 1), -INT8_C( 125), -INT8_C( 68), INT8_C( 102), -INT8_C( 85), -INT8_C( 39), -INT8_C( 30), INT8_C( 114), INT8_C( 36), INT8_C( 23), INT8_C( 78), INT8_C( 91), INT8_C( 39), -INT8_C( 85), -INT8_C( 110), -INT8_C( 92), INT8_C( 34), -INT8_C( 28), INT8_C( 25), INT8_C( 104), -INT8_C( 47), -INT8_C( 48), -INT8_C( 11), -INT8_C( 31), INT8_C( 74), -INT8_C( 16), -INT8_C( 84), -INT8_C( 105) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_mask_blend_epi8(test_vec[i].k, a, b); simde_test_x86_assert_equal_i8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask64 k = simde_test_x86_random_mmask64(); simde__m512i a = simde_test_x86_random_i8x64(); simde__m512i b = simde_test_x86_random_i8x64(); simde__m512i r = simde_mm512_mask_blend_epi8(k, a, b); simde_test_x86_write_mmask64(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x64(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_mask_blend_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask32 k; const int16_t a[32]; const int16_t b[32]; const int16_t r[32]; } test_vec[] = { { UINT32_C(3621238078), { -INT16_C( 16557), -INT16_C( 3372), INT16_C( 11384), INT16_C( 26031), -INT16_C( 28252), -INT16_C( 29324), -INT16_C( 23149), INT16_C( 23418), -INT16_C( 18991), -INT16_C( 196), INT16_C( 29968), INT16_C( 16008), INT16_C( 22588), INT16_C( 31351), INT16_C( 19981), INT16_C( 24914), INT16_C( 9742), -INT16_C( 31149), INT16_C( 850), -INT16_C( 2069), INT16_C( 24724), INT16_C( 10116), -INT16_C( 507), -INT16_C( 10622), -INT16_C( 16461), -INT16_C( 15403), INT16_C( 23860), INT16_C( 28674), INT16_C( 31157), -INT16_C( 15381), INT16_C( 15816), -INT16_C( 10716) }, { INT16_C( 30563), -INT16_C( 18852), INT16_C( 18554), INT16_C( 4013), INT16_C( 12712), -INT16_C( 21194), -INT16_C( 18128), -INT16_C( 7293), INT16_C( 22648), -INT16_C( 21337), -INT16_C( 22091), INT16_C( 27165), INT16_C( 2082), -INT16_C( 5587), INT16_C( 20805), -INT16_C( 22336), INT16_C( 7625), INT16_C( 17246), INT16_C( 2917), INT16_C( 3410), -INT16_C( 30403), INT16_C( 28090), INT16_C( 15682), -INT16_C( 17840), -INT16_C( 2155), INT16_C( 19046), -INT16_C( 31840), -INT16_C( 15436), -INT16_C( 7541), -INT16_C( 12115), INT16_C( 28211), -INT16_C( 903) }, { -INT16_C( 16557), -INT16_C( 18852), INT16_C( 18554), INT16_C( 4013), INT16_C( 12712), -INT16_C( 21194), -INT16_C( 23149), INT16_C( 23418), INT16_C( 22648), -INT16_C( 196), -INT16_C( 22091), INT16_C( 16008), INT16_C( 2082), -INT16_C( 5587), INT16_C( 19981), -INT16_C( 22336), INT16_C( 7625), INT16_C( 17246), INT16_C( 2917), -INT16_C( 2069), -INT16_C( 30403), INT16_C( 10116), INT16_C( 15682), -INT16_C( 17840), -INT16_C( 2155), INT16_C( 19046), -INT16_C( 31840), INT16_C( 28674), -INT16_C( 7541), -INT16_C( 15381), INT16_C( 28211), -INT16_C( 903) } }, { UINT32_C(4030781323), { -INT16_C( 27933), INT16_C( 8445), -INT16_C( 18661), INT16_C( 23949), -INT16_C( 8716), -INT16_C( 30441), INT16_C( 32469), INT16_C( 30163), -INT16_C( 30975), -INT16_C( 29384), -INT16_C( 6551), -INT16_C( 25251), -INT16_C( 10668), -INT16_C( 8295), -INT16_C( 9810), -INT16_C( 28209), -INT16_C( 13204), -INT16_C( 30799), INT16_C( 16003), INT16_C( 30693), -INT16_C( 997), -INT16_C( 4096), -INT16_C( 11398), INT16_C( 31846), -INT16_C( 24998), -INT16_C( 15351), INT16_C( 26244), -INT16_C( 10143), -INT16_C( 1475), -INT16_C( 5193), -INT16_C( 31020), INT16_C( 16508) }, { INT16_C( 11602), -INT16_C( 10809), -INT16_C( 21397), -INT16_C( 31156), INT16_C( 19625), INT16_C( 9079), -INT16_C( 8929), INT16_C( 31391), -INT16_C( 22405), INT16_C( 62), -INT16_C( 24817), INT16_C( 19672), -INT16_C( 28519), INT16_C( 27959), -INT16_C( 19690), INT16_C( 27053), INT16_C( 30176), INT16_C( 19262), -INT16_C( 29919), -INT16_C( 13615), INT16_C( 18647), -INT16_C( 2066), -INT16_C( 29403), -INT16_C( 24207), -INT16_C( 20682), INT16_C( 17825), INT16_C( 31054), -INT16_C( 6255), -INT16_C( 14327), INT16_C( 8277), INT16_C( 635), INT16_C( 23433) }, { INT16_C( 11602), -INT16_C( 10809), -INT16_C( 18661), -INT16_C( 31156), -INT16_C( 8716), -INT16_C( 30441), INT16_C( 32469), INT16_C( 31391), -INT16_C( 22405), INT16_C( 62), -INT16_C( 24817), -INT16_C( 25251), -INT16_C( 28519), -INT16_C( 8295), -INT16_C( 19690), INT16_C( 27053), -INT16_C( 13204), -INT16_C( 30799), INT16_C( 16003), INT16_C( 30693), -INT16_C( 997), -INT16_C( 4096), -INT16_C( 29403), INT16_C( 31846), -INT16_C( 24998), -INT16_C( 15351), INT16_C( 26244), -INT16_C( 10143), -INT16_C( 14327), INT16_C( 8277), INT16_C( 635), INT16_C( 23433) } }, { UINT32_C(2577844087), { INT16_C( 30546), INT16_C( 10851), INT16_C( 20928), -INT16_C( 6879), -INT16_C( 27937), INT16_C( 5510), INT16_C( 10049), -INT16_C( 28838), -INT16_C( 5215), -INT16_C( 21898), -INT16_C( 13389), INT16_C( 11978), INT16_C( 21454), INT16_C( 17801), INT16_C( 12059), INT16_C( 28126), INT16_C( 17062), INT16_C( 26263), -INT16_C( 18285), INT16_C( 29260), -INT16_C( 11702), -INT16_C( 29817), -INT16_C( 7686), -INT16_C( 25830), -INT16_C( 28212), INT16_C( 32581), INT16_C( 4188), INT16_C( 10925), INT16_C( 13923), INT16_C( 32368), INT16_C( 20069), INT16_C( 3308) }, { -INT16_C( 31856), INT16_C( 9330), -INT16_C( 16836), -INT16_C( 31082), INT16_C( 7825), -INT16_C( 29934), INT16_C( 11519), -INT16_C( 13274), INT16_C( 27581), INT16_C( 6731), -INT16_C( 1669), -INT16_C( 8380), -INT16_C( 19409), -INT16_C( 27299), INT16_C( 18691), -INT16_C( 27743), INT16_C( 5069), INT16_C( 2487), INT16_C( 20178), INT16_C( 25487), -INT16_C( 24212), INT16_C( 27630), INT16_C( 5326), -INT16_C( 29897), -INT16_C( 31873), -INT16_C( 1115), -INT16_C( 5508), -INT16_C( 21542), INT16_C( 14238), -INT16_C( 24256), -INT16_C( 7807), INT16_C( 20021) }, { -INT16_C( 31856), INT16_C( 9330), -INT16_C( 16836), -INT16_C( 6879), INT16_C( 7825), -INT16_C( 29934), INT16_C( 11519), -INT16_C( 28838), INT16_C( 27581), INT16_C( 6731), -INT16_C( 1669), INT16_C( 11978), INT16_C( 21454), INT16_C( 17801), INT16_C( 18691), -INT16_C( 27743), INT16_C( 17062), INT16_C( 2487), INT16_C( 20178), INT16_C( 29260), -INT16_C( 11702), INT16_C( 27630), -INT16_C( 7686), -INT16_C( 29897), -INT16_C( 31873), INT16_C( 32581), INT16_C( 4188), -INT16_C( 21542), INT16_C( 14238), INT16_C( 32368), INT16_C( 20069), INT16_C( 20021) } }, { UINT32_C(3344428277), { -INT16_C( 6598), -INT16_C( 22998), INT16_C( 6280), INT16_C( 22034), INT16_C( 18732), -INT16_C( 21535), -INT16_C( 30772), INT16_C( 18598), -INT16_C( 32655), INT16_C( 4084), INT16_C( 13496), INT16_C( 14769), -INT16_C( 6634), INT16_C( 2951), -INT16_C( 8494), INT16_C( 3538), -INT16_C( 828), INT16_C( 19635), -INT16_C( 15084), INT16_C( 16546), -INT16_C( 31729), -INT16_C( 9237), -INT16_C( 28149), INT16_C( 31780), INT16_C( 6162), -INT16_C( 13685), INT16_C( 15436), INT16_C( 25091), -INT16_C( 30174), -INT16_C( 2707), INT16_C( 16232), INT16_C( 11522) }, { -INT16_C( 19141), INT16_C( 20345), INT16_C( 7291), -INT16_C( 30065), INT16_C( 31648), -INT16_C( 21659), -INT16_C( 30451), INT16_C( 7975), -INT16_C( 19807), -INT16_C( 4374), -INT16_C( 4625), INT16_C( 4432), -INT16_C( 16776), -INT16_C( 8186), INT16_C( 2301), INT16_C( 14605), -INT16_C( 30786), INT16_C( 14728), INT16_C( 6307), INT16_C( 17347), INT16_C( 10387), -INT16_C( 24338), INT16_C( 5554), INT16_C( 21439), -INT16_C( 22073), -INT16_C( 18879), -INT16_C( 28009), INT16_C( 4040), -INT16_C( 12720), INT16_C( 19951), -INT16_C( 553), -INT16_C( 27258) }, { -INT16_C( 19141), -INT16_C( 22998), INT16_C( 7291), INT16_C( 22034), INT16_C( 31648), -INT16_C( 21659), -INT16_C( 30451), INT16_C( 7975), -INT16_C( 32655), INT16_C( 4084), -INT16_C( 4625), INT16_C( 4432), -INT16_C( 6634), -INT16_C( 8186), INT16_C( 2301), INT16_C( 14605), -INT16_C( 30786), INT16_C( 14728), INT16_C( 6307), INT16_C( 16546), INT16_C( 10387), -INT16_C( 9237), INT16_C( 5554), INT16_C( 31780), -INT16_C( 22073), -INT16_C( 18879), -INT16_C( 28009), INT16_C( 25091), -INT16_C( 30174), -INT16_C( 2707), -INT16_C( 553), -INT16_C( 27258) } }, { UINT32_C( 667815812), { -INT16_C( 28377), -INT16_C( 17814), INT16_C( 22713), INT16_C( 27482), INT16_C( 6509), INT16_C( 13503), INT16_C( 195), INT16_C( 23275), -INT16_C( 19566), -INT16_C( 7575), INT16_C( 22657), INT16_C( 22576), -INT16_C( 18859), -INT16_C( 9747), -INT16_C( 17467), -INT16_C( 5120), INT16_C( 27212), INT16_C( 1702), INT16_C( 194), INT16_C( 12145), INT16_C( 12314), -INT16_C( 8860), INT16_C( 20273), -INT16_C( 15561), -INT16_C( 24574), -INT16_C( 31834), -INT16_C( 10504), INT16_C( 20188), -INT16_C( 13940), INT16_C( 21031), INT16_C( 10373), -INT16_C( 11970) }, { -INT16_C( 6766), INT16_C( 21975), INT16_C( 18917), -INT16_C( 124), -INT16_C( 6023), -INT16_C( 21796), INT16_C( 4919), INT16_C( 14702), INT16_C( 5299), -INT16_C( 21315), -INT16_C( 26134), INT16_C( 30458), INT16_C( 8546), -INT16_C( 6200), INT16_C( 1865), -INT16_C( 9031), -INT16_C( 28436), -INT16_C( 11983), -INT16_C( 18983), INT16_C( 21457), -INT16_C( 21090), -INT16_C( 10755), INT16_C( 27585), INT16_C( 29711), -INT16_C( 13185), INT16_C( 26912), INT16_C( 6757), -INT16_C( 14368), -INT16_C( 22468), -INT16_C( 31313), INT16_C( 26799), -INT16_C( 25759) }, { -INT16_C( 28377), -INT16_C( 17814), INT16_C( 18917), INT16_C( 27482), INT16_C( 6509), INT16_C( 13503), INT16_C( 195), INT16_C( 14702), INT16_C( 5299), -INT16_C( 21315), -INT16_C( 26134), INT16_C( 30458), -INT16_C( 18859), -INT16_C( 9747), -INT16_C( 17467), -INT16_C( 5120), INT16_C( 27212), -INT16_C( 11983), -INT16_C( 18983), INT16_C( 21457), INT16_C( 12314), -INT16_C( 8860), INT16_C( 27585), INT16_C( 29711), -INT16_C( 13185), INT16_C( 26912), INT16_C( 6757), INT16_C( 20188), -INT16_C( 13940), -INT16_C( 31313), INT16_C( 10373), -INT16_C( 11970) } }, { UINT32_C(3530396408), { INT16_C( 15944), -INT16_C( 6619), INT16_C( 8939), -INT16_C( 21317), -INT16_C( 13682), INT16_C( 3361), INT16_C( 16790), -INT16_C( 1161), INT16_C( 22364), -INT16_C( 26429), INT16_C( 29439), -INT16_C( 20707), INT16_C( 32730), -INT16_C( 11702), -INT16_C( 18671), INT16_C( 22948), -INT16_C( 13835), -INT16_C( 7873), -INT16_C( 1044), INT16_C( 31373), -INT16_C( 20795), INT16_C( 23687), -INT16_C( 272), INT16_C( 19543), INT16_C( 6741), INT16_C( 21988), INT16_C( 396), INT16_C( 26116), INT16_C( 20096), -INT16_C( 28103), -INT16_C( 8954), -INT16_C( 1045) }, { INT16_C( 11175), -INT16_C( 27684), INT16_C( 27174), -INT16_C( 5363), -INT16_C( 27624), INT16_C( 2119), -INT16_C( 24685), -INT16_C( 6060), INT16_C( 14521), INT16_C( 17981), INT16_C( 16698), -INT16_C( 17748), -INT16_C( 6768), -INT16_C( 27060), INT16_C( 14531), INT16_C( 27281), INT16_C( 28259), -INT16_C( 30211), INT16_C( 2776), -INT16_C( 3980), -INT16_C( 17250), INT16_C( 12793), INT16_C( 19803), INT16_C( 5146), INT16_C( 22406), -INT16_C( 16294), INT16_C( 1945), INT16_C( 10618), -INT16_C( 14356), -INT16_C( 20545), INT16_C( 20735), INT16_C( 25113) }, { INT16_C( 15944), -INT16_C( 6619), INT16_C( 8939), -INT16_C( 5363), -INT16_C( 27624), INT16_C( 2119), -INT16_C( 24685), -INT16_C( 6060), INT16_C( 22364), INT16_C( 17981), INT16_C( 29439), -INT16_C( 20707), -INT16_C( 6768), -INT16_C( 11702), -INT16_C( 18671), INT16_C( 27281), INT16_C( 28259), -INT16_C( 7873), INT16_C( 2776), -INT16_C( 3980), -INT16_C( 20795), INT16_C( 12793), INT16_C( 19803), INT16_C( 19543), INT16_C( 6741), -INT16_C( 16294), INT16_C( 396), INT16_C( 26116), -INT16_C( 14356), -INT16_C( 28103), INT16_C( 20735), INT16_C( 25113) } }, { UINT32_C(2531989182), { INT16_C( 24352), -INT16_C( 16505), -INT16_C( 32741), INT16_C( 30448), INT16_C( 2765), INT16_C( 21387), -INT16_C( 6814), -INT16_C( 1261), -INT16_C( 28948), -INT16_C( 9948), -INT16_C( 7339), INT16_C( 21640), -INT16_C( 24013), -INT16_C( 3402), -INT16_C( 24136), -INT16_C( 9848), INT16_C( 3840), INT16_C( 7320), -INT16_C( 30577), INT16_C( 23954), INT16_C( 7571), -INT16_C( 2640), -INT16_C( 15357), -INT16_C( 4112), INT16_C( 5202), -INT16_C( 22584), INT16_C( 20983), INT16_C( 11003), -INT16_C( 19981), -INT16_C( 21732), -INT16_C( 23214), INT16_C( 21124) }, { INT16_C( 7348), INT16_C( 17518), INT16_C( 421), INT16_C( 14497), INT16_C( 20766), INT16_C( 8493), INT16_C( 7445), INT16_C( 26385), -INT16_C( 9935), INT16_C( 10254), INT16_C( 2346), INT16_C( 7506), INT16_C( 28602), INT16_C( 3273), INT16_C( 19732), -INT16_C( 14241), -INT16_C( 12950), INT16_C( 3852), -INT16_C( 21042), -INT16_C( 4793), INT16_C( 29951), INT16_C( 5134), INT16_C( 8081), -INT16_C( 15748), -INT16_C( 29959), INT16_C( 9194), INT16_C( 15508), INT16_C( 20033), INT16_C( 2731), -INT16_C( 16549), -INT16_C( 17833), -INT16_C( 15992) }, { INT16_C( 24352), INT16_C( 17518), INT16_C( 421), INT16_C( 14497), INT16_C( 20766), INT16_C( 8493), -INT16_C( 6814), INT16_C( 26385), -INT16_C( 28948), INT16_C( 10254), INT16_C( 2346), INT16_C( 21640), INT16_C( 28602), -INT16_C( 3402), -INT16_C( 24136), -INT16_C( 9848), -INT16_C( 12950), INT16_C( 3852), -INT16_C( 30577), -INT16_C( 4793), INT16_C( 7571), INT16_C( 5134), INT16_C( 8081), -INT16_C( 15748), INT16_C( 5202), INT16_C( 9194), INT16_C( 15508), INT16_C( 11003), INT16_C( 2731), -INT16_C( 21732), -INT16_C( 23214), -INT16_C( 15992) } }, { UINT32_C(1456510087), { INT16_C( 5954), INT16_C( 16707), INT16_C( 20875), INT16_C( 7253), -INT16_C( 11919), INT16_C( 27358), -INT16_C( 14244), -INT16_C( 3955), -INT16_C( 12795), -INT16_C( 20418), -INT16_C( 26152), INT16_C( 12400), -INT16_C( 1965), -INT16_C( 9231), -INT16_C( 15732), -INT16_C( 12751), INT16_C( 29913), INT16_C( 25871), INT16_C( 26053), INT16_C( 13953), INT16_C( 24630), -INT16_C( 28000), INT16_C( 11816), INT16_C( 11650), -INT16_C( 15876), -INT16_C( 10786), INT16_C( 20058), -INT16_C( 20987), -INT16_C( 2490), -INT16_C( 11639), -INT16_C( 17736), -INT16_C( 27999) }, { -INT16_C( 20434), -INT16_C( 3081), INT16_C( 30741), INT16_C( 19498), -INT16_C( 13608), INT16_C( 478), INT16_C( 25080), -INT16_C( 2770), INT16_C( 3106), INT16_C( 31946), -INT16_C( 12454), -INT16_C( 24534), -INT16_C( 19515), INT16_C( 32371), INT16_C( 5229), -INT16_C( 25840), INT16_C( 1988), -INT16_C( 9585), -INT16_C( 18049), INT16_C( 22566), INT16_C( 1155), INT16_C( 31833), -INT16_C( 30875), -INT16_C( 30863), INT16_C( 15252), -INT16_C( 4604), INT16_C( 11786), -INT16_C( 12401), INT16_C( 738), INT16_C( 20301), INT16_C( 23830), -INT16_C( 9493) }, { -INT16_C( 20434), -INT16_C( 3081), INT16_C( 30741), INT16_C( 7253), -INT16_C( 11919), INT16_C( 27358), -INT16_C( 14244), -INT16_C( 2770), -INT16_C( 12795), -INT16_C( 20418), -INT16_C( 12454), INT16_C( 12400), -INT16_C( 19515), -INT16_C( 9231), -INT16_C( 15732), -INT16_C( 25840), INT16_C( 29913), INT16_C( 25871), INT16_C( 26053), INT16_C( 13953), INT16_C( 1155), -INT16_C( 28000), -INT16_C( 30875), -INT16_C( 30863), -INT16_C( 15876), -INT16_C( 4604), INT16_C( 11786), -INT16_C( 20987), INT16_C( 738), -INT16_C( 11639), INT16_C( 23830), -INT16_C( 27999) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_mask_blend_epi16(test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask32 k = simde_test_x86_random_mmask32(); simde__m512i a = simde_test_x86_random_i16x32(); simde__m512i b = simde_test_x86_random_i16x32(); simde__m512i r = simde_mm512_mask_blend_epi16(k, a, b); simde_test_x86_write_mmask32(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x32(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_mask_blend_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { UINT16_C(12684), simde_mm512_set_epi32(INT32_C( 2139597246), INT32_C(-2035467821), INT32_C(-1381016544), INT32_C( -293624181), INT32_C( 1610331725), INT32_C( 134146865), INT32_C( 837546022), INT32_C(-1561535917), INT32_C( 522767958), INT32_C( 350987310), INT32_C( 1200416302), INT32_C( 1035712103), INT32_C( -697441354), INT32_C(-1209277994), INT32_C( 1905768221), INT32_C( 941422574)), simde_mm512_set_epi32(INT32_C( -854245863), INT32_C( 77819890), INT32_C( -597706880), INT32_C( -28687979), INT32_C( 1384494246), INT32_C(-1885694903), INT32_C( 479641666), INT32_C( 436747778), INT32_C( 1142933685), INT32_C( -36150185), INT32_C( 234764144), INT32_C( -925516387), INT32_C( 1528722995), INT32_C( 1957265461), INT32_C( -558613563), INT32_C( 535737103)), simde_mm512_set_epi32(INT32_C( 2139597246), INT32_C(-2035467821), INT32_C( -597706880), INT32_C( -28687979), INT32_C( 1610331725), INT32_C( 134146865), INT32_C( 837546022), INT32_C( 436747778), INT32_C( 1142933685), INT32_C( 350987310), INT32_C( 1200416302), INT32_C( 1035712103), INT32_C( 1528722995), INT32_C( 1957265461), INT32_C( 1905768221), INT32_C( 941422574)) }, { UINT16_C(12889), simde_mm512_set_epi32(INT32_C(-1009854213), INT32_C( 19207470), INT32_C( 2053019824), INT32_C( 1679893080), INT32_C( -761309092), INT32_C(-1797634461), INT32_C( 1499461014), INT32_C( -349931656), INT32_C( 1308377490), INT32_C(-1862232386), INT32_C( -706282442), INT32_C( 1752887042), INT32_C( 1045610342), INT32_C( 88096217), INT32_C(-1144289151), INT32_C( 1410502196)), simde_mm512_set_epi32(INT32_C( 1437842356), INT32_C(-1817562257), INT32_C( 808924311), INT32_C( 1765692072), INT32_C(-1346910557), INT32_C( -92284700), INT32_C( 1533217965), INT32_C( 1732689820), INT32_C(-1401128233), INT32_C( -762168473), INT32_C( 97276971), INT32_C( 2145432631), INT32_C(-1561525899), INT32_C(-2005427238), INT32_C( -455460474), INT32_C( -933959435)), simde_mm512_set_epi32(INT32_C(-1009854213), INT32_C( 19207470), INT32_C( 808924311), INT32_C( 1765692072), INT32_C( -761309092), INT32_C(-1797634461), INT32_C( 1533217965), INT32_C( -349931656), INT32_C( 1308377490), INT32_C( -762168473), INT32_C( -706282442), INT32_C( 2145432631), INT32_C(-1561525899), INT32_C( 88096217), INT32_C(-1144289151), INT32_C( -933959435)) }, { UINT16_C(18390), simde_mm512_set_epi32(INT32_C( 191788932), INT32_C( 410937469), INT32_C( 218604234), INT32_C( -632545043), INT32_C( 246082482), INT32_C( 2029197195), INT32_C( 1188965621), INT32_C( -844747875), INT32_C( 989502056), INT32_C( 2119540790), INT32_C(-1766179858), INT32_C(-1109416221), INT32_C(-1963025204), INT32_C( -932958949), INT32_C( 47867627), INT32_C( -567270366)), simde_mm512_set_epi32(INT32_C( 558500028), INT32_C( 1400907983), INT32_C(-1581715774), INT32_C( -29022872), INT32_C( 1773849857), INT32_C( -311975417), INT32_C( 1183760637), INT32_C(-1160252785), INT32_C( 2107838031), INT32_C( 1909470743), INT32_C(-2018375211), INT32_C( 267812095), INT32_C( -225335539), INT32_C( -871226308), INT32_C( -872412082), INT32_C( 1435481672)), simde_mm512_set_epi32(INT32_C( 191788932), INT32_C( 1400907983), INT32_C( 218604234), INT32_C( -632545043), INT32_C( 246082482), INT32_C( -311975417), INT32_C( 1183760637), INT32_C(-1160252785), INT32_C( 2107838031), INT32_C( 1909470743), INT32_C(-1766179858), INT32_C( 267812095), INT32_C(-1963025204), INT32_C( -871226308), INT32_C( -872412082), INT32_C( -567270366)) }, { UINT16_C(46409), simde_mm512_set_epi32(INT32_C( -239336968), INT32_C( 1154172094), INT32_C( 1382102779), INT32_C(-1946237060), INT32_C( -995869857), INT32_C( 1050338310), INT32_C(-2047829467), INT32_C( -881484106), INT32_C( -67227529), INT32_C( -510303256), INT32_C( -387177060), INT32_C( 2007995362), INT32_C( 1596393504), INT32_C( 1716443052), INT32_C( 1056333857), INT32_C( -879795312)), simde_mm512_set_epi32(INT32_C(-1321156942), INT32_C(-1763902924), INT32_C( -145319736), INT32_C( 356975558), INT32_C( 109934631), INT32_C( 1326272066), INT32_C( 723198088), INT32_C(-1812908400), INT32_C( 1665386649), INT32_C( 1770577849), INT32_C(-2015166919), INT32_C(-1565649496), INT32_C( 1045296779), INT32_C( 1401153164), INT32_C( -294475079), INT32_C( 378377774)), simde_mm512_set_epi32(INT32_C(-1321156942), INT32_C( 1154172094), INT32_C( -145319736), INT32_C( 356975558), INT32_C( -995869857), INT32_C( 1326272066), INT32_C(-2047829467), INT32_C(-1812908400), INT32_C( -67227529), INT32_C( 1770577849), INT32_C( -387177060), INT32_C( 2007995362), INT32_C( 1045296779), INT32_C( 1716443052), INT32_C( 1056333857), INT32_C( 378377774)) }, { UINT16_C(35033), simde_mm512_set_epi32(INT32_C( 576121858), INT32_C( -83274089), INT32_C( 1081604364), INT32_C( 1853977291), INT32_C(-1408149319), INT32_C(-1793071292), INT32_C( -580417531), INT32_C( 1708989591), INT32_C(-1803428364), INT32_C(-1884594628), INT32_C(-1049896819), INT32_C( 1351777033), INT32_C( -543435799), INT32_C( 45073785), INT32_C( 310971883), INT32_C( 586295496)), simde_mm512_set_epi32(INT32_C( 274706498), INT32_C( 1339140311), INT32_C( 773365916), INT32_C( -407443831), INT32_C( -44487881), INT32_C( -363465063), INT32_C( -778555208), INT32_C( -640038352), INT32_C( -428291654), INT32_C(-1080717955), INT32_C( 906661653), INT32_C( 1353353955), INT32_C(-1311936279), INT32_C( 1168846380), INT32_C( -71806717), INT32_C( 617275327)), simde_mm512_set_epi32(INT32_C( 274706498), INT32_C( -83274089), INT32_C( 1081604364), INT32_C( 1853977291), INT32_C( -44487881), INT32_C(-1793071292), INT32_C( -580417531), INT32_C( 1708989591), INT32_C( -428291654), INT32_C(-1080717955), INT32_C(-1049896819), INT32_C( 1353353955), INT32_C(-1311936279), INT32_C( 45073785), INT32_C( 310971883), INT32_C( 617275327)) }, { UINT16_C(62826), simde_mm512_set_epi32(INT32_C( -943712419), INT32_C( -981833223), INT32_C( 2020022414), INT32_C( 630972788), INT32_C( 1615502534), INT32_C( 991949979), INT32_C( 601817641), INT32_C(-2063962607), INT32_C(-1545145030), INT32_C( 1626575612), INT32_C(-1511315708), INT32_C( 1422623346), INT32_C( 1496301111), INT32_C(-1751918881), INT32_C( 333195983), INT32_C( 1655699275)), simde_mm512_set_epi32(INT32_C(-1770653828), INT32_C( -674401292), INT32_C(-2023667251), INT32_C( 1038799540), INT32_C(-1877506849), INT32_C( 791301479), INT32_C(-2115975814), INT32_C( 1430860109), INT32_C(-2123570597), INT32_C(-2010985064), INT32_C( 1367050649), INT32_C( -268988786), INT32_C( 1975120887), INT32_C( 83320183), INT32_C( 2120549505), INT32_C( 163195572)), simde_mm512_set_epi32(INT32_C(-1770653828), INT32_C( -674401292), INT32_C(-2023667251), INT32_C( 1038799540), INT32_C( 1615502534), INT32_C( 791301479), INT32_C( 601817641), INT32_C( 1430860109), INT32_C(-1545145030), INT32_C(-2010985064), INT32_C( 1367050649), INT32_C( 1422623346), INT32_C( 1975120887), INT32_C(-1751918881), INT32_C( 2120549505), INT32_C( 1655699275)) }, { UINT16_C( 2185), simde_mm512_set_epi32(INT32_C( 1990735603), INT32_C( -226564964), INT32_C( 2030923034), INT32_C( 1100474296), INT32_C( 1370205128), INT32_C( 1609607637), INT32_C( -586788969), INT32_C( 1072075481), INT32_C( -723316478), INT32_C(-1331909036), INT32_C(-1524508879), INT32_C( -832646284), INT32_C(-1636241911), INT32_C( 215718784), INT32_C( 478814832), INT32_C( 1231303010)), simde_mm512_set_epi32(INT32_C( 96871414), INT32_C( 245175863), INT32_C( 1584772139), INT32_C( 1604795180), INT32_C(-1972440261), INT32_C(-1471456371), INT32_C( 1108051273), INT32_C( -4141330), INT32_C(-1064630270), INT32_C( -911670021), INT32_C(-1808660435), INT32_C(-2035294308), INT32_C(-2014140232), INT32_C(-1194657062), INT32_C(-1732550793), INT32_C(-1643523135)), simde_mm512_set_epi32(INT32_C( 1990735603), INT32_C( -226564964), INT32_C( 2030923034), INT32_C( 1100474296), INT32_C(-1972440261), INT32_C( 1609607637), INT32_C( -586788969), INT32_C( 1072075481), INT32_C(-1064630270), INT32_C(-1331909036), INT32_C(-1524508879), INT32_C( -832646284), INT32_C(-2014140232), INT32_C( 215718784), INT32_C( 478814832), INT32_C(-1643523135)) }, { UINT16_C(11244), simde_mm512_set_epi32(INT32_C( 1605800253), INT32_C( 1825375434), INT32_C( -280209407), INT32_C( 1616462798), INT32_C( 1380939760), INT32_C( 1670822362), INT32_C(-1872387325), INT32_C( 451486273), INT32_C( -978012107), INT32_C( -718128180), INT32_C(-1625787118), INT32_C( -879101117), INT32_C( 173600397), INT32_C( 1426384314), INT32_C( -517748272), INT32_C( 76760759)), simde_mm512_set_epi32(INT32_C( 805228357), INT32_C( 980137697), INT32_C( -555168446), INT32_C(-2016549382), INT32_C( 927860791), INT32_C(-1515288559), INT32_C( -918296563), INT32_C(-1858382028), INT32_C( 1365882699), INT32_C( 616589376), INT32_C(-1573056329), INT32_C( 1014781400), INT32_C( -260360112), INT32_C( -953114112), INT32_C( 1191757764), INT32_C( -706360509)), simde_mm512_set_epi32(INT32_C( 1605800253), INT32_C( 1825375434), INT32_C( -555168446), INT32_C( 1616462798), INT32_C( 927860791), INT32_C( 1670822362), INT32_C( -918296563), INT32_C(-1858382028), INT32_C( 1365882699), INT32_C( 616589376), INT32_C(-1573056329), INT32_C( -879101117), INT32_C( -260360112), INT32_C( -953114112), INT32_C( -517748272), INT32_C( 76760759)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_blend_epi32(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_blend_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { UINT8_C(140), simde_mm512_set_epi64(INT64_C(-8742267720341431264), INT64_C(-1261106253099452851), INT64_C( 576156398873473062), INT64_C(-6706745694521602474), INT64_C( 1507479018961430062), INT64_C( 4448349614053909430), INT64_C(-5193809434096716003), INT64_C( 4043379170809950035)), simde_mm512_set_epi64(INT64_C( 334233886225577856), INT64_C( -123213930208840538), INT64_C(-8098997938139250622), INT64_C( 1875817424253601973), INT64_C( -155263862084585616), INT64_C(-3975062612548356557), INT64_C( 8406391148321717189), INT64_C( 2300973338778380734)), simde_mm512_set_epi64(INT64_C( 334233886225577856), INT64_C(-1261106253099452851), INT64_C( 576156398873473062), INT64_C(-6706745694521602474), INT64_C( -155263862084585616), INT64_C(-3975062612548356557), INT64_C(-5193809434096716003), INT64_C( 4043379170809950035)) }, { UINT8_C( 25), simde_mm512_set_epi64(INT64_C( 8817653003799568984), INT64_C(-3269797649790122397), INT64_C( 6440136020702033784), INT64_C( 5619438532805301950), INT64_C(-3033459988376129790), INT64_C( 4490862223337471449), INT64_C(-4914684479302103500), INT64_C( 357175739365339737)), simde_mm512_set_epi64(INT64_C( 3474303462450025128), INT64_C(-5784936788749461276), INT64_C( 6585121019047362460), INT64_C(-6017799934704469145), INT64_C( 417801411244373047), INT64_C(-6706702665772459046), INT64_C(-1956187837089650443), INT64_C(-4337290818543610578)), simde_mm512_set_epi64(INT64_C( 8817653003799568984), INT64_C(-3269797649790122397), INT64_C( 6440136020702033784), INT64_C(-6017799934704469145), INT64_C( 417801411244373047), INT64_C( 4490862223337471449), INT64_C(-4914684479302103500), INT64_C(-4337290818543610578)) }, { UINT8_C(111), simde_mm512_set_epi64(INT64_C(-2716760272685831246), INT64_C( 8715335590848900341), INT64_C(-3628164495500993944), INT64_C( 9103358378116791278), INT64_C(-4764906384514966324), INT64_C(-4007028174417664277), INT64_C(-2436407666547579589), INT64_C( 1334270375494925236)), simde_mm512_set_epi64(INT64_C( -124652284302144255), INT64_C(-1339924211987201795), INT64_C(-4983247764560081329), INT64_C( 8201114396130413013), INT64_C( 1150244193567876877), INT64_C(-3741888496852267954), INT64_C( 6165346835439187844), INT64_C( 1764962990274618058)), simde_mm512_set_epi64(INT64_C(-2716760272685831246), INT64_C(-1339924211987201795), INT64_C(-4983247764560081329), INT64_C( 9103358378116791278), INT64_C( 1150244193567876877), INT64_C(-3741888496852267954), INT64_C( 6165346835439187844), INT64_C( 1764962990274618058)) }, { UINT8_C(194), simde_mm512_set_epi64(INT64_C(-4277228465836858362), INT64_C(-8795360585136628042), INT64_C( -288740034661227544), INT64_C(-1662912808453434398), INT64_C( 6856457892943288236), INT64_C( 4536919372887712656), INT64_C(-1781208167188155063), INT64_C( 2398739356475992271)), simde_mm512_set_epi64(INT64_C( 472165646169099842), INT64_C( 3106112138971788944), INT64_C( 7152781194420608953), INT64_C(-8655076010356763224), INT64_C( 4489515481820292748), INT64_C(-1264760833413638610), INT64_C(-1027944449129626434), INT64_C( 5936086237864445820)), simde_mm512_set_epi64(INT64_C( 472165646169099842), INT64_C( 3106112138971788944), INT64_C( -288740034661227544), INT64_C(-1662912808453434398), INT64_C( 6856457892943288236), INT64_C( 4536919372887712656), INT64_C(-1027944449129626434), INT64_C( 2398739356475992271)) }, { UINT8_C(198), simde_mm512_set_epi64(INT64_C(-7701182554821916667), INT64_C( 7340054405040954868), INT64_C(-8094272290232215411), INT64_C( 5805838151970444265), INT64_C( 193590432792907243), INT64_C( 2518119983696523684), INT64_C(-7781788212556415310), INT64_C(-7575905367749125944)), simde_mm512_set_epi64(INT64_C(-1561070555307167560), INT64_C(-2748943786159060550), INT64_C(-4641648272018338027), INT64_C( 5812610979620286697), INT64_C( 5020156980371149059), INT64_C( 2651177342668827650), INT64_C( -357659487777588980), INT64_C( 7962771835258493113)), simde_mm512_set_epi64(INT64_C(-1561070555307167560), INT64_C(-2748943786159060550), INT64_C(-8094272290232215411), INT64_C( 5805838151970444265), INT64_C( 193590432792907243), INT64_C( 2651177342668827650), INT64_C( -357659487777588980), INT64_C(-7575905367749125944)) }, { UINT8_C( 55), simde_mm512_set_epi64(INT64_C( 2584787088481873425), INT64_C(-6636347369800363268), INT64_C(-6491051538368462222), INT64_C( 6426564339256514271), INT64_C( 1431065851799271243), INT64_C(-5052971989011532438), INT64_C( 1179855426247829719), INT64_C( 3321581320948606601)), simde_mm512_set_epi64(INT64_C(-9088046918826118835), INT64_C(-9120666262578213480), INT64_C( 5871437833456553614), INT64_C( 8483079615394831735), INT64_C( 9107690773687184052), INT64_C(-4053213973120914951), INT64_C( 8675930205947945332), INT64_C( 6938530551127078043)), simde_mm512_set_epi64(INT64_C( 2584787088481873425), INT64_C(-6636347369800363268), INT64_C( 5871437833456553614), INT64_C( 8483079615394831735), INT64_C( 1431065851799271243), INT64_C(-4053213973120914951), INT64_C( 8675930205947945332), INT64_C( 6938530551127078043)) }, { UINT8_C(103), simde_mm512_set_epi64(INT64_C( 4604529133310120194), INT64_C(-5720505748096428239), INT64_C(-3576188556257202679), INT64_C( 926505122891702896), INT64_C( 5288406162053320871), INT64_C(-5908713324082235524), INT64_C(-2896531491248846387), INT64_C( 4461610053817304287)), simde_mm512_set_epi64(INT64_C( -17786873681606654), INT64_C(-3915592922452326355), INT64_C(-8741522488314124104), INT64_C(-5131013008663027849), INT64_C(-7058878113053657357), INT64_C( -973089108768494310), INT64_C( 4726501112778828744), INT64_C( 6913212164015017879)), simde_mm512_set_epi64(INT64_C( 4604529133310120194), INT64_C(-3915592922452326355), INT64_C(-8741522488314124104), INT64_C( 926505122891702896), INT64_C( 5288406162053320871), INT64_C( -973089108768494310), INT64_C( 4726501112778828744), INT64_C( 6913212164015017879)) }, { UINT8_C( 73), simde_mm512_set_epi64(INT64_C(-4200530011080213556), INT64_C(-6982702498652226749), INT64_C( 745608029114000826), INT64_C(-2223711895723751753), INT64_C( 4918324162995104748), INT64_C( 416059555292452407), INT64_C( 6806544510221761324), INT64_C(-8471566411485193331)), simde_mm512_set_epi64(INT64_C( 5866421522993801280), INT64_C(-6756225486806034984), INT64_C(-1118238162881043968), INT64_C( 5118560624722692931), INT64_C( 6896859572368901322), INT64_C(-1203490237480090674), INT64_C( 5931091108616911322), INT64_C(-8041842325868436927)), simde_mm512_set_epi64(INT64_C(-4200530011080213556), INT64_C(-6756225486806034984), INT64_C( 745608029114000826), INT64_C(-2223711895723751753), INT64_C( 6896859572368901322), INT64_C( 416059555292452407), INT64_C( 6806544510221761324), INT64_C(-8041842325868436927)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_blend_epi64(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_blend_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m512 a; simde__m512 b; simde__m512 r; } test_vec[8] = { { UINT16_C(28658), simde_mm512_set_ps(SIMDE_FLOAT32_C( 986.64), SIMDE_FLOAT32_C( 121.90), SIMDE_FLOAT32_C( -796.62), SIMDE_FLOAT32_C( 983.17), SIMDE_FLOAT32_C( 569.02), SIMDE_FLOAT32_C( -88.58), SIMDE_FLOAT32_C( -750.53), SIMDE_FLOAT32_C( 52.16), SIMDE_FLOAT32_C( 863.27), SIMDE_FLOAT32_C( -937.53), SIMDE_FLOAT32_C( 272.85), SIMDE_FLOAT32_C( -836.56), SIMDE_FLOAT32_C( -517.71), SIMDE_FLOAT32_C( 436.89), SIMDE_FLOAT32_C( -561.62), SIMDE_FLOAT32_C( -796.29)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 721.67), SIMDE_FLOAT32_C( -355.29), SIMDE_FLOAT32_C( -776.65), SIMDE_FLOAT32_C( -467.78), SIMDE_FLOAT32_C( -890.68), SIMDE_FLOAT32_C( -288.13), SIMDE_FLOAT32_C( 739.88), SIMDE_FLOAT32_C( -3.67), SIMDE_FLOAT32_C( 356.91), SIMDE_FLOAT32_C( -250.13), SIMDE_FLOAT32_C( -609.99), SIMDE_FLOAT32_C( -756.57), SIMDE_FLOAT32_C( -441.01), SIMDE_FLOAT32_C( 675.23), SIMDE_FLOAT32_C( -112.56), SIMDE_FLOAT32_C( 752.66)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 986.64), SIMDE_FLOAT32_C( -355.29), SIMDE_FLOAT32_C( -776.65), SIMDE_FLOAT32_C( 983.17), SIMDE_FLOAT32_C( -890.68), SIMDE_FLOAT32_C( -288.13), SIMDE_FLOAT32_C( 739.88), SIMDE_FLOAT32_C( -3.67), SIMDE_FLOAT32_C( 356.91), SIMDE_FLOAT32_C( -250.13), SIMDE_FLOAT32_C( -609.99), SIMDE_FLOAT32_C( -756.57), SIMDE_FLOAT32_C( -517.71), SIMDE_FLOAT32_C( 436.89), SIMDE_FLOAT32_C( -112.56), SIMDE_FLOAT32_C( -796.29)) }, { UINT16_C(13167), simde_mm512_set_ps(SIMDE_FLOAT32_C( -177.79), SIMDE_FLOAT32_C( 957.03), SIMDE_FLOAT32_C( -193.15), SIMDE_FLOAT32_C( 645.09), SIMDE_FLOAT32_C( -0.96), SIMDE_FLOAT32_C( 66.15), SIMDE_FLOAT32_C( 565.09), SIMDE_FLOAT32_C( -991.06), SIMDE_FLOAT32_C( -217.74), SIMDE_FLOAT32_C( 162.91), SIMDE_FLOAT32_C( 837.05), SIMDE_FLOAT32_C( 132.83), SIMDE_FLOAT32_C( -183.75), SIMDE_FLOAT32_C( -958.98), SIMDE_FLOAT32_C( -343.18), SIMDE_FLOAT32_C( -412.04)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -623.32), SIMDE_FLOAT32_C( 372.80), SIMDE_FLOAT32_C( -286.04), SIMDE_FLOAT32_C( 347.55), SIMDE_FLOAT32_C( -954.70), SIMDE_FLOAT32_C( 272.86), SIMDE_FLOAT32_C( 787.91), SIMDE_FLOAT32_C( 529.75), SIMDE_FLOAT32_C( -43.99), SIMDE_FLOAT32_C( 645.49), SIMDE_FLOAT32_C( -301.76), SIMDE_FLOAT32_C( -390.74), SIMDE_FLOAT32_C( 671.11), SIMDE_FLOAT32_C( -513.10), SIMDE_FLOAT32_C( 467.15), SIMDE_FLOAT32_C( -961.27)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -177.79), SIMDE_FLOAT32_C( 957.03), SIMDE_FLOAT32_C( -286.04), SIMDE_FLOAT32_C( 347.55), SIMDE_FLOAT32_C( -0.96), SIMDE_FLOAT32_C( 66.15), SIMDE_FLOAT32_C( 787.91), SIMDE_FLOAT32_C( 529.75), SIMDE_FLOAT32_C( -217.74), SIMDE_FLOAT32_C( 645.49), SIMDE_FLOAT32_C( -301.76), SIMDE_FLOAT32_C( 132.83), SIMDE_FLOAT32_C( 671.11), SIMDE_FLOAT32_C( -513.10), SIMDE_FLOAT32_C( 467.15), SIMDE_FLOAT32_C( -961.27)) }, { UINT16_C(10447), simde_mm512_set_ps(SIMDE_FLOAT32_C( 986.49), SIMDE_FLOAT32_C( 854.73), SIMDE_FLOAT32_C( 459.72), SIMDE_FLOAT32_C( -110.83), SIMDE_FLOAT32_C( -875.29), SIMDE_FLOAT32_C( 594.30), SIMDE_FLOAT32_C( -331.55), SIMDE_FLOAT32_C( -808.64), SIMDE_FLOAT32_C( 705.45), SIMDE_FLOAT32_C( -55.08), SIMDE_FLOAT32_C( 606.63), SIMDE_FLOAT32_C( -13.01), SIMDE_FLOAT32_C( 483.39), SIMDE_FLOAT32_C( 565.56), SIMDE_FLOAT32_C( 735.84), SIMDE_FLOAT32_C( -855.34)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 263.46), SIMDE_FLOAT32_C( -173.99), SIMDE_FLOAT32_C( -448.77), SIMDE_FLOAT32_C( -18.46), SIMDE_FLOAT32_C( 60.12), SIMDE_FLOAT32_C( 895.07), SIMDE_FLOAT32_C( 593.75), SIMDE_FLOAT32_C( -910.69), SIMDE_FLOAT32_C( -898.20), SIMDE_FLOAT32_C( -885.41), SIMDE_FLOAT32_C( -446.34), SIMDE_FLOAT32_C( -539.23), SIMDE_FLOAT32_C( 177.56), SIMDE_FLOAT32_C( 85.90), SIMDE_FLOAT32_C( -977.71), SIMDE_FLOAT32_C( 589.01)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 986.49), SIMDE_FLOAT32_C( 854.73), SIMDE_FLOAT32_C( -448.77), SIMDE_FLOAT32_C( -110.83), SIMDE_FLOAT32_C( 60.12), SIMDE_FLOAT32_C( 594.30), SIMDE_FLOAT32_C( -331.55), SIMDE_FLOAT32_C( -808.64), SIMDE_FLOAT32_C( -898.20), SIMDE_FLOAT32_C( -885.41), SIMDE_FLOAT32_C( 606.63), SIMDE_FLOAT32_C( -13.01), SIMDE_FLOAT32_C( 177.56), SIMDE_FLOAT32_C( 85.90), SIMDE_FLOAT32_C( -977.71), SIMDE_FLOAT32_C( 589.01)) }, { UINT16_C(64052), simde_mm512_set_ps(SIMDE_FLOAT32_C( -833.77), SIMDE_FLOAT32_C( -382.41), SIMDE_FLOAT32_C( 155.80), SIMDE_FLOAT32_C( -175.51), SIMDE_FLOAT32_C( 270.94), SIMDE_FLOAT32_C( -347.54), SIMDE_FLOAT32_C( -823.80), SIMDE_FLOAT32_C( -462.55), SIMDE_FLOAT32_C( 93.71), SIMDE_FLOAT32_C( -510.90), SIMDE_FLOAT32_C( 589.53), SIMDE_FLOAT32_C( 762.37), SIMDE_FLOAT32_C( -64.95), SIMDE_FLOAT32_C( -200.72), SIMDE_FLOAT32_C( 590.31), SIMDE_FLOAT32_C( 904.10)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 932.33), SIMDE_FLOAT32_C( -948.81), SIMDE_FLOAT32_C( -663.23), SIMDE_FLOAT32_C( -224.49), SIMDE_FLOAT32_C( 61.61), SIMDE_FLOAT32_C( -513.25), SIMDE_FLOAT32_C( 862.87), SIMDE_FLOAT32_C( 888.55), SIMDE_FLOAT32_C( -356.41), SIMDE_FLOAT32_C( 536.26), SIMDE_FLOAT32_C( 46.41), SIMDE_FLOAT32_C( 968.69), SIMDE_FLOAT32_C( 819.71), SIMDE_FLOAT32_C( -256.62), SIMDE_FLOAT32_C( -508.11), SIMDE_FLOAT32_C( 806.88)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 932.33), SIMDE_FLOAT32_C( -948.81), SIMDE_FLOAT32_C( -663.23), SIMDE_FLOAT32_C( -224.49), SIMDE_FLOAT32_C( 61.61), SIMDE_FLOAT32_C( -347.54), SIMDE_FLOAT32_C( 862.87), SIMDE_FLOAT32_C( -462.55), SIMDE_FLOAT32_C( 93.71), SIMDE_FLOAT32_C( -510.90), SIMDE_FLOAT32_C( 46.41), SIMDE_FLOAT32_C( 968.69), SIMDE_FLOAT32_C( -64.95), SIMDE_FLOAT32_C( -256.62), SIMDE_FLOAT32_C( 590.31), SIMDE_FLOAT32_C( 904.10)) }, { UINT16_C(43223), simde_mm512_set_ps(SIMDE_FLOAT32_C( 810.27), SIMDE_FLOAT32_C( 830.75), SIMDE_FLOAT32_C( 701.96), SIMDE_FLOAT32_C( 496.75), SIMDE_FLOAT32_C( -369.80), SIMDE_FLOAT32_C( -455.71), SIMDE_FLOAT32_C( -712.56), SIMDE_FLOAT32_C( 961.22), SIMDE_FLOAT32_C( -136.67), SIMDE_FLOAT32_C( 165.04), SIMDE_FLOAT32_C( -204.19), SIMDE_FLOAT32_C( 122.42), SIMDE_FLOAT32_C( -370.53), SIMDE_FLOAT32_C( -979.01), SIMDE_FLOAT32_C( -726.98), SIMDE_FLOAT32_C( 156.30)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -639.87), SIMDE_FLOAT32_C( 979.28), SIMDE_FLOAT32_C( 637.46), SIMDE_FLOAT32_C( 800.56), SIMDE_FLOAT32_C( -577.80), SIMDE_FLOAT32_C( 389.08), SIMDE_FLOAT32_C( 966.56), SIMDE_FLOAT32_C( -731.72), SIMDE_FLOAT32_C( -496.34), SIMDE_FLOAT32_C( 344.28), SIMDE_FLOAT32_C( 729.72), SIMDE_FLOAT32_C( 160.21), SIMDE_FLOAT32_C( 511.10), SIMDE_FLOAT32_C( 746.94), SIMDE_FLOAT32_C( -855.19), SIMDE_FLOAT32_C( 203.47)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -639.87), SIMDE_FLOAT32_C( 830.75), SIMDE_FLOAT32_C( 637.46), SIMDE_FLOAT32_C( 496.75), SIMDE_FLOAT32_C( -577.80), SIMDE_FLOAT32_C( -455.71), SIMDE_FLOAT32_C( -712.56), SIMDE_FLOAT32_C( 961.22), SIMDE_FLOAT32_C( -496.34), SIMDE_FLOAT32_C( 344.28), SIMDE_FLOAT32_C( -204.19), SIMDE_FLOAT32_C( 160.21), SIMDE_FLOAT32_C( -370.53), SIMDE_FLOAT32_C( 746.94), SIMDE_FLOAT32_C( -855.19), SIMDE_FLOAT32_C( 203.47)) }, { UINT16_C(29684), simde_mm512_set_ps(SIMDE_FLOAT32_C( -516.27), SIMDE_FLOAT32_C( -631.52), SIMDE_FLOAT32_C( -333.70), SIMDE_FLOAT32_C( 63.56), SIMDE_FLOAT32_C( 874.74), SIMDE_FLOAT32_C( -961.20), SIMDE_FLOAT32_C( -924.01), SIMDE_FLOAT32_C( 542.80), SIMDE_FLOAT32_C( -706.18), SIMDE_FLOAT32_C( -538.09), SIMDE_FLOAT32_C( 38.89), SIMDE_FLOAT32_C( -242.57), SIMDE_FLOAT32_C( -337.54), SIMDE_FLOAT32_C( 184.20), SIMDE_FLOAT32_C( -229.00), SIMDE_FLOAT32_C( -133.06)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 57.66), SIMDE_FLOAT32_C( 125.72), SIMDE_FLOAT32_C( 14.67), SIMDE_FLOAT32_C( 11.14), SIMDE_FLOAT32_C( -363.42), SIMDE_FLOAT32_C( -80.26), SIMDE_FLOAT32_C( -12.54), SIMDE_FLOAT32_C( 560.55), SIMDE_FLOAT32_C( -59.35), SIMDE_FLOAT32_C( -247.72), SIMDE_FLOAT32_C( -719.76), SIMDE_FLOAT32_C( 280.49), SIMDE_FLOAT32_C( 296.24), SIMDE_FLOAT32_C( -303.23), SIMDE_FLOAT32_C( -844.84), SIMDE_FLOAT32_C( 452.16)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -516.27), SIMDE_FLOAT32_C( 125.72), SIMDE_FLOAT32_C( 14.67), SIMDE_FLOAT32_C( 11.14), SIMDE_FLOAT32_C( 874.74), SIMDE_FLOAT32_C( -961.20), SIMDE_FLOAT32_C( -12.54), SIMDE_FLOAT32_C( 560.55), SIMDE_FLOAT32_C( -59.35), SIMDE_FLOAT32_C( -247.72), SIMDE_FLOAT32_C( -719.76), SIMDE_FLOAT32_C( 280.49), SIMDE_FLOAT32_C( -337.54), SIMDE_FLOAT32_C( -303.23), SIMDE_FLOAT32_C( -229.00), SIMDE_FLOAT32_C( -133.06)) }, { UINT16_C( 5687), simde_mm512_set_ps(SIMDE_FLOAT32_C( -252.71), SIMDE_FLOAT32_C( 314.80), SIMDE_FLOAT32_C( 998.07), SIMDE_FLOAT32_C( 575.47), SIMDE_FLOAT32_C( 52.24), SIMDE_FLOAT32_C( 443.69), SIMDE_FLOAT32_C( 234.67), SIMDE_FLOAT32_C( 894.50), SIMDE_FLOAT32_C( -487.55), SIMDE_FLOAT32_C( -250.47), SIMDE_FLOAT32_C( -500.78), SIMDE_FLOAT32_C( 379.78), SIMDE_FLOAT32_C( 612.27), SIMDE_FLOAT32_C( -899.55), SIMDE_FLOAT32_C( -426.63), SIMDE_FLOAT32_C( 359.38)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -262.03), SIMDE_FLOAT32_C( 81.51), SIMDE_FLOAT32_C( -484.02), SIMDE_FLOAT32_C( 504.24), SIMDE_FLOAT32_C( 157.78), SIMDE_FLOAT32_C( 62.09), SIMDE_FLOAT32_C( 193.22), SIMDE_FLOAT32_C( -72.99), SIMDE_FLOAT32_C( -54.28), SIMDE_FLOAT32_C( -361.95), SIMDE_FLOAT32_C( 726.76), SIMDE_FLOAT32_C( 663.18), SIMDE_FLOAT32_C( 290.10), SIMDE_FLOAT32_C( 238.07), SIMDE_FLOAT32_C( -777.03), SIMDE_FLOAT32_C( 227.93)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -252.71), SIMDE_FLOAT32_C( 314.80), SIMDE_FLOAT32_C( 998.07), SIMDE_FLOAT32_C( 504.24), SIMDE_FLOAT32_C( 52.24), SIMDE_FLOAT32_C( 62.09), SIMDE_FLOAT32_C( 193.22), SIMDE_FLOAT32_C( 894.50), SIMDE_FLOAT32_C( -487.55), SIMDE_FLOAT32_C( -250.47), SIMDE_FLOAT32_C( 726.76), SIMDE_FLOAT32_C( 663.18), SIMDE_FLOAT32_C( 612.27), SIMDE_FLOAT32_C( 238.07), SIMDE_FLOAT32_C( -777.03), SIMDE_FLOAT32_C( 227.93)) }, { UINT16_C(46817), simde_mm512_set_ps(SIMDE_FLOAT32_C( 60.97), SIMDE_FLOAT32_C( 294.39), SIMDE_FLOAT32_C( 134.62), SIMDE_FLOAT32_C( -712.88), SIMDE_FLOAT32_C( -527.46), SIMDE_FLOAT32_C( 556.17), SIMDE_FLOAT32_C( 671.08), SIMDE_FLOAT32_C( -149.99), SIMDE_FLOAT32_C( -247.28), SIMDE_FLOAT32_C( -221.96), SIMDE_FLOAT32_C( -789.76), SIMDE_FLOAT32_C( 665.60), SIMDE_FLOAT32_C( 590.64), SIMDE_FLOAT32_C( -335.79), SIMDE_FLOAT32_C( -964.26), SIMDE_FLOAT32_C( 137.06)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 741.48), SIMDE_FLOAT32_C( -567.93), SIMDE_FLOAT32_C( 572.38), SIMDE_FLOAT32_C( -363.96), SIMDE_FLOAT32_C( 267.49), SIMDE_FLOAT32_C( 878.76), SIMDE_FLOAT32_C( -445.04), SIMDE_FLOAT32_C( -252.24), SIMDE_FLOAT32_C( 869.52), SIMDE_FLOAT32_C( -356.95), SIMDE_FLOAT32_C( 128.10), SIMDE_FLOAT32_C( 544.58), SIMDE_FLOAT32_C( 242.93), SIMDE_FLOAT32_C( -919.16), SIMDE_FLOAT32_C( 758.90), SIMDE_FLOAT32_C( -466.75)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 741.48), SIMDE_FLOAT32_C( 294.39), SIMDE_FLOAT32_C( 572.38), SIMDE_FLOAT32_C( -363.96), SIMDE_FLOAT32_C( -527.46), SIMDE_FLOAT32_C( 878.76), SIMDE_FLOAT32_C( -445.04), SIMDE_FLOAT32_C( -149.99), SIMDE_FLOAT32_C( 869.52), SIMDE_FLOAT32_C( -356.95), SIMDE_FLOAT32_C( 128.10), SIMDE_FLOAT32_C( 665.60), SIMDE_FLOAT32_C( 590.64), SIMDE_FLOAT32_C( -335.79), SIMDE_FLOAT32_C( -964.26), SIMDE_FLOAT32_C( -466.75)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mask_blend_ps(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_blend_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512d a; simde__m512d b; simde__m512d r; } test_vec[8] = { { UINT8_C(211), simde_mm512_set_pd(SIMDE_FLOAT64_C( 863.27), SIMDE_FLOAT64_C( -937.53), SIMDE_FLOAT64_C( 272.85), SIMDE_FLOAT64_C( -836.56), SIMDE_FLOAT64_C( -517.71), SIMDE_FLOAT64_C( 436.89), SIMDE_FLOAT64_C( -561.62), SIMDE_FLOAT64_C( -796.29)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 356.91), SIMDE_FLOAT64_C( -250.13), SIMDE_FLOAT64_C( -609.99), SIMDE_FLOAT64_C( -756.57), SIMDE_FLOAT64_C( -441.01), SIMDE_FLOAT64_C( 675.23), SIMDE_FLOAT64_C( -112.56), SIMDE_FLOAT64_C( 752.66)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 356.91), SIMDE_FLOAT64_C( -250.13), SIMDE_FLOAT64_C( 272.85), SIMDE_FLOAT64_C( -756.57), SIMDE_FLOAT64_C( -517.71), SIMDE_FLOAT64_C( 436.89), SIMDE_FLOAT64_C( -112.56), SIMDE_FLOAT64_C( 752.66)) }, { UINT8_C( 25), simde_mm512_set_pd(SIMDE_FLOAT64_C( 721.67), SIMDE_FLOAT64_C( -355.29), SIMDE_FLOAT64_C( -776.65), SIMDE_FLOAT64_C( -467.78), SIMDE_FLOAT64_C( -890.68), SIMDE_FLOAT64_C( -288.13), SIMDE_FLOAT64_C( 739.88), SIMDE_FLOAT64_C( -3.67)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -963.76), SIMDE_FLOAT64_C( 986.64), SIMDE_FLOAT64_C( 121.90), SIMDE_FLOAT64_C( -796.62), SIMDE_FLOAT64_C( 983.17), SIMDE_FLOAT64_C( 569.02), SIMDE_FLOAT64_C( -88.58), SIMDE_FLOAT64_C( -750.53)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 721.67), SIMDE_FLOAT64_C( -355.29), SIMDE_FLOAT64_C( -776.65), SIMDE_FLOAT64_C( -796.62), SIMDE_FLOAT64_C( 983.17), SIMDE_FLOAT64_C( -288.13), SIMDE_FLOAT64_C( 739.88), SIMDE_FLOAT64_C( -750.53)) }, { UINT8_C( 46), simde_mm512_set_pd(SIMDE_FLOAT64_C( -217.74), SIMDE_FLOAT64_C( 162.91), SIMDE_FLOAT64_C( 837.05), SIMDE_FLOAT64_C( 132.83), SIMDE_FLOAT64_C( -183.75), SIMDE_FLOAT64_C( -958.98), SIMDE_FLOAT64_C( -343.18), SIMDE_FLOAT64_C( -412.04)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -43.99), SIMDE_FLOAT64_C( 645.49), SIMDE_FLOAT64_C( -301.76), SIMDE_FLOAT64_C( -390.74), SIMDE_FLOAT64_C( 671.11), SIMDE_FLOAT64_C( -513.10), SIMDE_FLOAT64_C( 467.15), SIMDE_FLOAT64_C( -961.27)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -217.74), SIMDE_FLOAT64_C( 162.91), SIMDE_FLOAT64_C( -301.76), SIMDE_FLOAT64_C( 132.83), SIMDE_FLOAT64_C( 671.11), SIMDE_FLOAT64_C( -513.10), SIMDE_FLOAT64_C( 467.15), SIMDE_FLOAT64_C( -412.04)) }, { UINT8_C(180), simde_mm512_set_pd(SIMDE_FLOAT64_C( -623.32), SIMDE_FLOAT64_C( 372.80), SIMDE_FLOAT64_C( -286.04), SIMDE_FLOAT64_C( 347.55), SIMDE_FLOAT64_C( -954.70), SIMDE_FLOAT64_C( 272.86), SIMDE_FLOAT64_C( 787.91), SIMDE_FLOAT64_C( 529.75)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 153.63), SIMDE_FLOAT64_C( -177.79), SIMDE_FLOAT64_C( 957.03), SIMDE_FLOAT64_C( -193.15), SIMDE_FLOAT64_C( 645.09), SIMDE_FLOAT64_C( -0.96), SIMDE_FLOAT64_C( 66.15), SIMDE_FLOAT64_C( 565.09)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 153.63), SIMDE_FLOAT64_C( 372.80), SIMDE_FLOAT64_C( 957.03), SIMDE_FLOAT64_C( -193.15), SIMDE_FLOAT64_C( -954.70), SIMDE_FLOAT64_C( -0.96), SIMDE_FLOAT64_C( 787.91), SIMDE_FLOAT64_C( 529.75)) }, { UINT8_C(125), simde_mm512_set_pd(SIMDE_FLOAT64_C( 705.45), SIMDE_FLOAT64_C( -55.08), SIMDE_FLOAT64_C( 606.63), SIMDE_FLOAT64_C( -13.01), SIMDE_FLOAT64_C( 483.39), SIMDE_FLOAT64_C( 565.56), SIMDE_FLOAT64_C( 735.84), SIMDE_FLOAT64_C( -855.34)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -898.20), SIMDE_FLOAT64_C( -885.41), SIMDE_FLOAT64_C( -446.34), SIMDE_FLOAT64_C( -539.23), SIMDE_FLOAT64_C( 177.56), SIMDE_FLOAT64_C( 85.90), SIMDE_FLOAT64_C( -977.71), SIMDE_FLOAT64_C( 589.01)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 705.45), SIMDE_FLOAT64_C( -885.41), SIMDE_FLOAT64_C( -446.34), SIMDE_FLOAT64_C( -539.23), SIMDE_FLOAT64_C( 177.56), SIMDE_FLOAT64_C( 85.90), SIMDE_FLOAT64_C( 735.84), SIMDE_FLOAT64_C( 589.01)) }, { UINT8_C(188), simde_mm512_set_pd(SIMDE_FLOAT64_C( 263.46), SIMDE_FLOAT64_C( -173.99), SIMDE_FLOAT64_C( -448.77), SIMDE_FLOAT64_C( -18.46), SIMDE_FLOAT64_C( 60.12), SIMDE_FLOAT64_C( 895.07), SIMDE_FLOAT64_C( 593.75), SIMDE_FLOAT64_C( -910.69)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -347.65), SIMDE_FLOAT64_C( 986.49), SIMDE_FLOAT64_C( 854.73), SIMDE_FLOAT64_C( 459.72), SIMDE_FLOAT64_C( -110.83), SIMDE_FLOAT64_C( -875.29), SIMDE_FLOAT64_C( 594.30), SIMDE_FLOAT64_C( -331.55)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -347.65), SIMDE_FLOAT64_C( -173.99), SIMDE_FLOAT64_C( 854.73), SIMDE_FLOAT64_C( 459.72), SIMDE_FLOAT64_C( -110.83), SIMDE_FLOAT64_C( -875.29), SIMDE_FLOAT64_C( 593.75), SIMDE_FLOAT64_C( -910.69)) }, { UINT8_C(190), simde_mm512_set_pd(SIMDE_FLOAT64_C( 93.71), SIMDE_FLOAT64_C( -510.90), SIMDE_FLOAT64_C( 589.53), SIMDE_FLOAT64_C( 762.37), SIMDE_FLOAT64_C( -64.95), SIMDE_FLOAT64_C( -200.72), SIMDE_FLOAT64_C( 590.31), SIMDE_FLOAT64_C( 904.10)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -356.41), SIMDE_FLOAT64_C( 536.26), SIMDE_FLOAT64_C( 46.41), SIMDE_FLOAT64_C( 968.69), SIMDE_FLOAT64_C( 819.71), SIMDE_FLOAT64_C( -256.62), SIMDE_FLOAT64_C( -508.11), SIMDE_FLOAT64_C( 806.88)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -356.41), SIMDE_FLOAT64_C( -510.90), SIMDE_FLOAT64_C( 46.41), SIMDE_FLOAT64_C( 968.69), SIMDE_FLOAT64_C( 819.71), SIMDE_FLOAT64_C( -256.62), SIMDE_FLOAT64_C( -508.11), SIMDE_FLOAT64_C( 904.10)) }, { UINT8_C(178), simde_mm512_set_pd(SIMDE_FLOAT64_C( 932.33), SIMDE_FLOAT64_C( -948.81), SIMDE_FLOAT64_C( -663.23), SIMDE_FLOAT64_C( -224.49), SIMDE_FLOAT64_C( 61.61), SIMDE_FLOAT64_C( -513.25), SIMDE_FLOAT64_C( 862.87), SIMDE_FLOAT64_C( 888.55)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 178.62), SIMDE_FLOAT64_C( -833.77), SIMDE_FLOAT64_C( -382.41), SIMDE_FLOAT64_C( 155.80), SIMDE_FLOAT64_C( -175.51), SIMDE_FLOAT64_C( 270.94), SIMDE_FLOAT64_C( -347.54), SIMDE_FLOAT64_C( -823.80)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 178.62), SIMDE_FLOAT64_C( -948.81), SIMDE_FLOAT64_C( -382.41), SIMDE_FLOAT64_C( 155.80), SIMDE_FLOAT64_C( 61.61), SIMDE_FLOAT64_C( -513.25), SIMDE_FLOAT64_C( -347.54), SIMDE_FLOAT64_C( 888.55)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mask_blend_pd(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_blend_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_blend_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_blend_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_blend_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_blend_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_blend_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_blend_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_blend_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_blend_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_blend_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_blend_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_blend_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_blend_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_blend_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_blend_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_blend_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_blend_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_blend_pd) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/broadcast.c000066400000000000000000011042531400333146700173700ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan * 2020 Christopher Moore */ #define SIMDE_TEST_X86_AVX512_INSN broadcast #include #include static int test_simde_mm256_broadcast_f32x2 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( -751.49), SIMDE_FLOAT32_C( -275.35), SIMDE_FLOAT32_C( 260.73), SIMDE_FLOAT32_C( 40.02) }, { SIMDE_FLOAT32_C( -751.49), SIMDE_FLOAT32_C( -275.35), SIMDE_FLOAT32_C( -751.49), SIMDE_FLOAT32_C( -275.35), SIMDE_FLOAT32_C( -751.49), SIMDE_FLOAT32_C( -275.35), SIMDE_FLOAT32_C( -751.49), SIMDE_FLOAT32_C( -275.35) } }, { { SIMDE_FLOAT32_C( 629.63), SIMDE_FLOAT32_C( 163.39), SIMDE_FLOAT32_C( 167.23), SIMDE_FLOAT32_C( 652.38) }, { SIMDE_FLOAT32_C( 629.63), SIMDE_FLOAT32_C( 163.39), SIMDE_FLOAT32_C( 629.63), SIMDE_FLOAT32_C( 163.39), SIMDE_FLOAT32_C( 629.63), SIMDE_FLOAT32_C( 163.39), SIMDE_FLOAT32_C( 629.63), SIMDE_FLOAT32_C( 163.39) } }, { { SIMDE_FLOAT32_C( 574.73), SIMDE_FLOAT32_C( -529.99), SIMDE_FLOAT32_C( 389.79), SIMDE_FLOAT32_C( -875.04) }, { SIMDE_FLOAT32_C( 574.73), SIMDE_FLOAT32_C( -529.99), SIMDE_FLOAT32_C( 574.73), SIMDE_FLOAT32_C( -529.99), SIMDE_FLOAT32_C( 574.73), SIMDE_FLOAT32_C( -529.99), SIMDE_FLOAT32_C( 574.73), SIMDE_FLOAT32_C( -529.99) } }, { { SIMDE_FLOAT32_C( -790.15), SIMDE_FLOAT32_C( 7.90), SIMDE_FLOAT32_C( 834.33), SIMDE_FLOAT32_C( 549.92) }, { SIMDE_FLOAT32_C( -790.15), SIMDE_FLOAT32_C( 7.90), SIMDE_FLOAT32_C( -790.15), SIMDE_FLOAT32_C( 7.90), SIMDE_FLOAT32_C( -790.15), SIMDE_FLOAT32_C( 7.90), SIMDE_FLOAT32_C( -790.15), SIMDE_FLOAT32_C( 7.90) } }, { { SIMDE_FLOAT32_C( 494.62), SIMDE_FLOAT32_C( -875.96), SIMDE_FLOAT32_C( -221.96), SIMDE_FLOAT32_C( -519.70) }, { SIMDE_FLOAT32_C( 494.62), SIMDE_FLOAT32_C( -875.96), SIMDE_FLOAT32_C( 494.62), SIMDE_FLOAT32_C( -875.96), SIMDE_FLOAT32_C( 494.62), SIMDE_FLOAT32_C( -875.96), SIMDE_FLOAT32_C( 494.62), SIMDE_FLOAT32_C( -875.96) } }, { { SIMDE_FLOAT32_C( -583.03), SIMDE_FLOAT32_C( -938.00), SIMDE_FLOAT32_C( 973.38), SIMDE_FLOAT32_C( -468.70) }, { SIMDE_FLOAT32_C( -583.03), SIMDE_FLOAT32_C( -938.00), SIMDE_FLOAT32_C( -583.03), SIMDE_FLOAT32_C( -938.00), SIMDE_FLOAT32_C( -583.03), SIMDE_FLOAT32_C( -938.00), SIMDE_FLOAT32_C( -583.03), SIMDE_FLOAT32_C( -938.00) } }, { { SIMDE_FLOAT32_C( 521.04), SIMDE_FLOAT32_C( -960.21), SIMDE_FLOAT32_C( -215.76), SIMDE_FLOAT32_C( -218.82) }, { SIMDE_FLOAT32_C( 521.04), SIMDE_FLOAT32_C( -960.21), SIMDE_FLOAT32_C( 521.04), SIMDE_FLOAT32_C( -960.21), SIMDE_FLOAT32_C( 521.04), SIMDE_FLOAT32_C( -960.21), SIMDE_FLOAT32_C( 521.04), SIMDE_FLOAT32_C( -960.21) } }, { { SIMDE_FLOAT32_C( 315.04), SIMDE_FLOAT32_C( 872.51), SIMDE_FLOAT32_C( 318.60), SIMDE_FLOAT32_C( 720.27) }, { SIMDE_FLOAT32_C( 315.04), SIMDE_FLOAT32_C( 872.51), SIMDE_FLOAT32_C( 315.04), SIMDE_FLOAT32_C( 872.51), SIMDE_FLOAT32_C( 315.04), SIMDE_FLOAT32_C( 872.51), SIMDE_FLOAT32_C( 315.04), SIMDE_FLOAT32_C( 872.51) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_broadcast_f32x2(a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_mask_broadcast_f32x2 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[8]; const simde__mmask8 k; const simde_float32 a[4]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( -155.94), SIMDE_FLOAT32_C( -965.17), SIMDE_FLOAT32_C( 378.08), SIMDE_FLOAT32_C( 365.29), SIMDE_FLOAT32_C( -495.97), SIMDE_FLOAT32_C( 311.10), SIMDE_FLOAT32_C( 575.79), SIMDE_FLOAT32_C( -655.57) }, UINT8_C( 85), { SIMDE_FLOAT32_C( 963.37), SIMDE_FLOAT32_C( -596.05), SIMDE_FLOAT32_C( 183.95), SIMDE_FLOAT32_C( -410.87) }, { SIMDE_FLOAT32_C( 963.37), SIMDE_FLOAT32_C( -965.17), SIMDE_FLOAT32_C( 963.37), SIMDE_FLOAT32_C( 365.29), SIMDE_FLOAT32_C( 963.37), SIMDE_FLOAT32_C( 311.10), SIMDE_FLOAT32_C( 963.37), SIMDE_FLOAT32_C( -655.57) } }, { { SIMDE_FLOAT32_C( 431.64), SIMDE_FLOAT32_C( 613.27), SIMDE_FLOAT32_C( -834.97), SIMDE_FLOAT32_C( 711.68), SIMDE_FLOAT32_C( -862.98), SIMDE_FLOAT32_C( -74.52), SIMDE_FLOAT32_C( -451.05), SIMDE_FLOAT32_C( -751.41) }, UINT8_C(193), { SIMDE_FLOAT32_C( -39.01), SIMDE_FLOAT32_C( 325.90), SIMDE_FLOAT32_C( -543.82), SIMDE_FLOAT32_C( 50.30) }, { SIMDE_FLOAT32_C( -39.01), SIMDE_FLOAT32_C( 613.27), SIMDE_FLOAT32_C( -834.97), SIMDE_FLOAT32_C( 711.68), SIMDE_FLOAT32_C( -862.98), SIMDE_FLOAT32_C( -74.52), SIMDE_FLOAT32_C( -39.01), SIMDE_FLOAT32_C( 325.90) } }, { { SIMDE_FLOAT32_C( -570.27), SIMDE_FLOAT32_C( -600.03), SIMDE_FLOAT32_C( -713.28), SIMDE_FLOAT32_C( -16.45), SIMDE_FLOAT32_C( -512.72), SIMDE_FLOAT32_C( 640.13), SIMDE_FLOAT32_C( 632.82), SIMDE_FLOAT32_C( -156.53) }, UINT8_C(110), { SIMDE_FLOAT32_C( 351.05), SIMDE_FLOAT32_C( 39.68), SIMDE_FLOAT32_C( 822.74), SIMDE_FLOAT32_C( -140.05) }, { SIMDE_FLOAT32_C( -570.27), SIMDE_FLOAT32_C( 39.68), SIMDE_FLOAT32_C( 351.05), SIMDE_FLOAT32_C( 39.68), SIMDE_FLOAT32_C( -512.72), SIMDE_FLOAT32_C( 39.68), SIMDE_FLOAT32_C( 351.05), SIMDE_FLOAT32_C( -156.53) } }, { { SIMDE_FLOAT32_C( 219.95), SIMDE_FLOAT32_C( 765.90), SIMDE_FLOAT32_C( 464.19), SIMDE_FLOAT32_C( -363.72), SIMDE_FLOAT32_C( 978.16), SIMDE_FLOAT32_C( -55.83), SIMDE_FLOAT32_C( -268.61), SIMDE_FLOAT32_C( -471.94) }, UINT8_C(194), { SIMDE_FLOAT32_C( 300.83), SIMDE_FLOAT32_C( 122.56), SIMDE_FLOAT32_C( -137.37), SIMDE_FLOAT32_C( -830.55) }, { SIMDE_FLOAT32_C( 219.95), SIMDE_FLOAT32_C( 122.56), SIMDE_FLOAT32_C( 464.19), SIMDE_FLOAT32_C( -363.72), SIMDE_FLOAT32_C( 978.16), SIMDE_FLOAT32_C( -55.83), SIMDE_FLOAT32_C( 300.83), SIMDE_FLOAT32_C( 122.56) } }, { { SIMDE_FLOAT32_C( -993.95), SIMDE_FLOAT32_C( 735.37), SIMDE_FLOAT32_C( -715.04), SIMDE_FLOAT32_C( 363.48), SIMDE_FLOAT32_C( 997.38), SIMDE_FLOAT32_C( 957.48), SIMDE_FLOAT32_C( 411.04), SIMDE_FLOAT32_C( 318.40) }, UINT8_C( 0), { SIMDE_FLOAT32_C( 944.29), SIMDE_FLOAT32_C( 688.98), SIMDE_FLOAT32_C( -319.61), SIMDE_FLOAT32_C( 391.33) }, { SIMDE_FLOAT32_C( -993.95), SIMDE_FLOAT32_C( 735.37), SIMDE_FLOAT32_C( -715.04), SIMDE_FLOAT32_C( 363.48), SIMDE_FLOAT32_C( 997.38), SIMDE_FLOAT32_C( 957.48), SIMDE_FLOAT32_C( 411.04), SIMDE_FLOAT32_C( 318.40) } }, { { SIMDE_FLOAT32_C( -917.62), SIMDE_FLOAT32_C( -406.65), SIMDE_FLOAT32_C( -532.97), SIMDE_FLOAT32_C( 298.17), SIMDE_FLOAT32_C( -598.91), SIMDE_FLOAT32_C( 107.47), SIMDE_FLOAT32_C( 214.95), SIMDE_FLOAT32_C( 587.62) }, UINT8_C(159), { SIMDE_FLOAT32_C( -173.39), SIMDE_FLOAT32_C( -170.67), SIMDE_FLOAT32_C( -483.21), SIMDE_FLOAT32_C( 718.07) }, { SIMDE_FLOAT32_C( -173.39), SIMDE_FLOAT32_C( -170.67), SIMDE_FLOAT32_C( -173.39), SIMDE_FLOAT32_C( -170.67), SIMDE_FLOAT32_C( -173.39), SIMDE_FLOAT32_C( 107.47), SIMDE_FLOAT32_C( 214.95), SIMDE_FLOAT32_C( -170.67) } }, { { SIMDE_FLOAT32_C( 526.28), SIMDE_FLOAT32_C( -786.80), SIMDE_FLOAT32_C( 286.87), SIMDE_FLOAT32_C( -560.33), SIMDE_FLOAT32_C( 596.72), SIMDE_FLOAT32_C( 991.58), SIMDE_FLOAT32_C( -572.23), SIMDE_FLOAT32_C( 587.29) }, UINT8_C( 79), { SIMDE_FLOAT32_C( 221.82), SIMDE_FLOAT32_C( 117.18), SIMDE_FLOAT32_C( -624.10), SIMDE_FLOAT32_C( 727.41) }, { SIMDE_FLOAT32_C( 221.82), SIMDE_FLOAT32_C( 117.18), SIMDE_FLOAT32_C( 221.82), SIMDE_FLOAT32_C( 117.18), SIMDE_FLOAT32_C( 596.72), SIMDE_FLOAT32_C( 991.58), SIMDE_FLOAT32_C( 221.82), SIMDE_FLOAT32_C( 587.29) } }, { { SIMDE_FLOAT32_C( -473.57), SIMDE_FLOAT32_C( 647.70), SIMDE_FLOAT32_C( -174.14), SIMDE_FLOAT32_C( -701.99), SIMDE_FLOAT32_C( -317.30), SIMDE_FLOAT32_C( -833.25), SIMDE_FLOAT32_C( -470.85), SIMDE_FLOAT32_C( 426.74) }, UINT8_C(169), { SIMDE_FLOAT32_C( -800.29), SIMDE_FLOAT32_C( -506.53), SIMDE_FLOAT32_C( 682.63), SIMDE_FLOAT32_C( 942.35) }, { SIMDE_FLOAT32_C( -800.29), SIMDE_FLOAT32_C( 647.70), SIMDE_FLOAT32_C( -174.14), SIMDE_FLOAT32_C( -506.53), SIMDE_FLOAT32_C( -317.30), SIMDE_FLOAT32_C( -506.53), SIMDE_FLOAT32_C( -470.85), SIMDE_FLOAT32_C( -506.53) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 src = simde_mm256_loadu_ps(test_vec[i].src); simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_mask_broadcast_f32x2(src, test_vec[i].k, a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_maskz_broadcast_f32x2 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const simde_float32 a[4]; const simde_float32 r[8]; } test_vec[] = { { UINT8_C(167), { SIMDE_FLOAT32_C( -73.48), SIMDE_FLOAT32_C( -950.66), SIMDE_FLOAT32_C( 265.90), SIMDE_FLOAT32_C( -988.50) }, { SIMDE_FLOAT32_C( -73.48), SIMDE_FLOAT32_C( -950.66), SIMDE_FLOAT32_C( -73.48), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -950.66), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -950.66) } }, { UINT8_C(122), { SIMDE_FLOAT32_C( 490.14), SIMDE_FLOAT32_C( -286.45), SIMDE_FLOAT32_C( -424.27), SIMDE_FLOAT32_C( -754.18) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -286.45), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -286.45), SIMDE_FLOAT32_C( 490.14), SIMDE_FLOAT32_C( -286.45), SIMDE_FLOAT32_C( 490.14), SIMDE_FLOAT32_C( 0.00) } }, { UINT8_C( 66), { SIMDE_FLOAT32_C( -622.52), SIMDE_FLOAT32_C( -691.02), SIMDE_FLOAT32_C( 48.53), SIMDE_FLOAT32_C( -368.74) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -691.02), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -622.52), SIMDE_FLOAT32_C( 0.00) } }, { UINT8_C(140), { SIMDE_FLOAT32_C( 336.37), SIMDE_FLOAT32_C( -709.34), SIMDE_FLOAT32_C( 65.79), SIMDE_FLOAT32_C( -200.10) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 336.37), SIMDE_FLOAT32_C( -709.34), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -709.34) } }, { UINT8_C(151), { SIMDE_FLOAT32_C( 450.42), SIMDE_FLOAT32_C( 257.72), SIMDE_FLOAT32_C( -507.45), SIMDE_FLOAT32_C( -644.25) }, { SIMDE_FLOAT32_C( 450.42), SIMDE_FLOAT32_C( 257.72), SIMDE_FLOAT32_C( 450.42), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 450.42), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 257.72) } }, { UINT8_C( 11), { SIMDE_FLOAT32_C( -161.31), SIMDE_FLOAT32_C( 845.16), SIMDE_FLOAT32_C( 584.32), SIMDE_FLOAT32_C( 641.28) }, { SIMDE_FLOAT32_C( -161.31), SIMDE_FLOAT32_C( 845.16), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 845.16), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { UINT8_C( 82), { SIMDE_FLOAT32_C( 565.26), SIMDE_FLOAT32_C( 325.20), SIMDE_FLOAT32_C( -344.79), SIMDE_FLOAT32_C( -940.47) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 325.20), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 565.26), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 565.26), SIMDE_FLOAT32_C( 0.00) } }, { UINT8_C(152), { SIMDE_FLOAT32_C( 715.85), SIMDE_FLOAT32_C( -726.67), SIMDE_FLOAT32_C( 812.36), SIMDE_FLOAT32_C( -643.19) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -726.67), SIMDE_FLOAT32_C( 715.85), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -726.67) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_maskz_broadcast_f32x2(test_vec[i].k, a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_broadcast_f32x2 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( -125.63), SIMDE_FLOAT32_C( 601.06), SIMDE_FLOAT32_C( 20.21), SIMDE_FLOAT32_C( -317.28) }, { SIMDE_FLOAT32_C( -125.63), SIMDE_FLOAT32_C( 601.06), SIMDE_FLOAT32_C( -125.63), SIMDE_FLOAT32_C( 601.06), SIMDE_FLOAT32_C( -125.63), SIMDE_FLOAT32_C( 601.06), SIMDE_FLOAT32_C( -125.63), SIMDE_FLOAT32_C( 601.06), SIMDE_FLOAT32_C( -125.63), SIMDE_FLOAT32_C( 601.06), SIMDE_FLOAT32_C( -125.63), SIMDE_FLOAT32_C( 601.06), SIMDE_FLOAT32_C( -125.63), SIMDE_FLOAT32_C( 601.06), SIMDE_FLOAT32_C( -125.63), SIMDE_FLOAT32_C( 601.06) } }, { { SIMDE_FLOAT32_C( -590.78), SIMDE_FLOAT32_C( 832.90), SIMDE_FLOAT32_C( 590.84), SIMDE_FLOAT32_C( 180.72) }, { SIMDE_FLOAT32_C( -590.78), SIMDE_FLOAT32_C( 832.90), SIMDE_FLOAT32_C( -590.78), SIMDE_FLOAT32_C( 832.90), SIMDE_FLOAT32_C( -590.78), SIMDE_FLOAT32_C( 832.90), SIMDE_FLOAT32_C( -590.78), SIMDE_FLOAT32_C( 832.90), SIMDE_FLOAT32_C( -590.78), SIMDE_FLOAT32_C( 832.90), SIMDE_FLOAT32_C( -590.78), SIMDE_FLOAT32_C( 832.90), SIMDE_FLOAT32_C( -590.78), SIMDE_FLOAT32_C( 832.90), SIMDE_FLOAT32_C( -590.78), SIMDE_FLOAT32_C( 832.90) } }, { { SIMDE_FLOAT32_C( -605.74), SIMDE_FLOAT32_C( -713.02), SIMDE_FLOAT32_C( 218.93), SIMDE_FLOAT32_C( -470.99) }, { SIMDE_FLOAT32_C( -605.74), SIMDE_FLOAT32_C( -713.02), SIMDE_FLOAT32_C( -605.74), SIMDE_FLOAT32_C( -713.02), SIMDE_FLOAT32_C( -605.74), SIMDE_FLOAT32_C( -713.02), SIMDE_FLOAT32_C( -605.74), SIMDE_FLOAT32_C( -713.02), SIMDE_FLOAT32_C( -605.74), SIMDE_FLOAT32_C( -713.02), SIMDE_FLOAT32_C( -605.74), SIMDE_FLOAT32_C( -713.02), SIMDE_FLOAT32_C( -605.74), SIMDE_FLOAT32_C( -713.02), SIMDE_FLOAT32_C( -605.74), SIMDE_FLOAT32_C( -713.02) } }, { { SIMDE_FLOAT32_C( 61.13), SIMDE_FLOAT32_C( -592.59), SIMDE_FLOAT32_C( 423.81), SIMDE_FLOAT32_C( 987.29) }, { SIMDE_FLOAT32_C( 61.13), SIMDE_FLOAT32_C( -592.59), SIMDE_FLOAT32_C( 61.13), SIMDE_FLOAT32_C( -592.59), SIMDE_FLOAT32_C( 61.13), SIMDE_FLOAT32_C( -592.59), SIMDE_FLOAT32_C( 61.13), SIMDE_FLOAT32_C( -592.59), SIMDE_FLOAT32_C( 61.13), SIMDE_FLOAT32_C( -592.59), SIMDE_FLOAT32_C( 61.13), SIMDE_FLOAT32_C( -592.59), SIMDE_FLOAT32_C( 61.13), SIMDE_FLOAT32_C( -592.59), SIMDE_FLOAT32_C( 61.13), SIMDE_FLOAT32_C( -592.59) } }, { { SIMDE_FLOAT32_C( -608.09), SIMDE_FLOAT32_C( -99.23), SIMDE_FLOAT32_C( 300.10), SIMDE_FLOAT32_C( -254.94) }, { SIMDE_FLOAT32_C( -608.09), SIMDE_FLOAT32_C( -99.23), SIMDE_FLOAT32_C( -608.09), SIMDE_FLOAT32_C( -99.23), SIMDE_FLOAT32_C( -608.09), SIMDE_FLOAT32_C( -99.23), SIMDE_FLOAT32_C( -608.09), SIMDE_FLOAT32_C( -99.23), SIMDE_FLOAT32_C( -608.09), SIMDE_FLOAT32_C( -99.23), SIMDE_FLOAT32_C( -608.09), SIMDE_FLOAT32_C( -99.23), SIMDE_FLOAT32_C( -608.09), SIMDE_FLOAT32_C( -99.23), SIMDE_FLOAT32_C( -608.09), SIMDE_FLOAT32_C( -99.23) } }, { { SIMDE_FLOAT32_C( -727.78), SIMDE_FLOAT32_C( 285.14), SIMDE_FLOAT32_C( 318.61), SIMDE_FLOAT32_C( 956.19) }, { SIMDE_FLOAT32_C( -727.78), SIMDE_FLOAT32_C( 285.14), SIMDE_FLOAT32_C( -727.78), SIMDE_FLOAT32_C( 285.14), SIMDE_FLOAT32_C( -727.78), SIMDE_FLOAT32_C( 285.14), SIMDE_FLOAT32_C( -727.78), SIMDE_FLOAT32_C( 285.14), SIMDE_FLOAT32_C( -727.78), SIMDE_FLOAT32_C( 285.14), SIMDE_FLOAT32_C( -727.78), SIMDE_FLOAT32_C( 285.14), SIMDE_FLOAT32_C( -727.78), SIMDE_FLOAT32_C( 285.14), SIMDE_FLOAT32_C( -727.78), SIMDE_FLOAT32_C( 285.14) } }, { { SIMDE_FLOAT32_C( 704.27), SIMDE_FLOAT32_C( 738.40), SIMDE_FLOAT32_C( 301.28), SIMDE_FLOAT32_C( -459.90) }, { SIMDE_FLOAT32_C( 704.27), SIMDE_FLOAT32_C( 738.40), SIMDE_FLOAT32_C( 704.27), SIMDE_FLOAT32_C( 738.40), SIMDE_FLOAT32_C( 704.27), SIMDE_FLOAT32_C( 738.40), SIMDE_FLOAT32_C( 704.27), SIMDE_FLOAT32_C( 738.40), SIMDE_FLOAT32_C( 704.27), SIMDE_FLOAT32_C( 738.40), SIMDE_FLOAT32_C( 704.27), SIMDE_FLOAT32_C( 738.40), SIMDE_FLOAT32_C( 704.27), SIMDE_FLOAT32_C( 738.40), SIMDE_FLOAT32_C( 704.27), SIMDE_FLOAT32_C( 738.40) } }, { { SIMDE_FLOAT32_C( 379.79), SIMDE_FLOAT32_C( -819.17), SIMDE_FLOAT32_C( 172.39), SIMDE_FLOAT32_C( -722.17) }, { SIMDE_FLOAT32_C( 379.79), SIMDE_FLOAT32_C( -819.17), SIMDE_FLOAT32_C( 379.79), SIMDE_FLOAT32_C( -819.17), SIMDE_FLOAT32_C( 379.79), SIMDE_FLOAT32_C( -819.17), SIMDE_FLOAT32_C( 379.79), SIMDE_FLOAT32_C( -819.17), SIMDE_FLOAT32_C( 379.79), SIMDE_FLOAT32_C( -819.17), SIMDE_FLOAT32_C( 379.79), SIMDE_FLOAT32_C( -819.17), SIMDE_FLOAT32_C( 379.79), SIMDE_FLOAT32_C( -819.17), SIMDE_FLOAT32_C( 379.79), SIMDE_FLOAT32_C( -819.17) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_broadcast_f32x2(a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_broadcast_f32x2 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[16]; const simde__mmask16 k; const simde_float32 a[4]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 16.97), SIMDE_FLOAT32_C( -724.36), SIMDE_FLOAT32_C( -251.03), SIMDE_FLOAT32_C( 955.86), SIMDE_FLOAT32_C( -884.86), SIMDE_FLOAT32_C( 79.30), SIMDE_FLOAT32_C( 805.27), SIMDE_FLOAT32_C( 217.58), SIMDE_FLOAT32_C( 919.33), SIMDE_FLOAT32_C( -770.42), SIMDE_FLOAT32_C( -363.93), SIMDE_FLOAT32_C( -528.80), SIMDE_FLOAT32_C( 387.46), SIMDE_FLOAT32_C( 8.94), SIMDE_FLOAT32_C( 238.55), SIMDE_FLOAT32_C( -769.11) }, UINT16_C(26495), { SIMDE_FLOAT32_C( -701.54), SIMDE_FLOAT32_C( -832.82), SIMDE_FLOAT32_C( 858.15), SIMDE_FLOAT32_C( 988.45) }, { SIMDE_FLOAT32_C( -701.54), SIMDE_FLOAT32_C( -832.82), SIMDE_FLOAT32_C( -701.54), SIMDE_FLOAT32_C( -832.82), SIMDE_FLOAT32_C( -701.54), SIMDE_FLOAT32_C( -832.82), SIMDE_FLOAT32_C( -701.54), SIMDE_FLOAT32_C( 217.58), SIMDE_FLOAT32_C( -701.54), SIMDE_FLOAT32_C( -832.82), SIMDE_FLOAT32_C( -701.54), SIMDE_FLOAT32_C( -528.80), SIMDE_FLOAT32_C( 387.46), SIMDE_FLOAT32_C( -832.82), SIMDE_FLOAT32_C( -701.54), SIMDE_FLOAT32_C( -769.11) } }, { { SIMDE_FLOAT32_C( 886.30), SIMDE_FLOAT32_C( 115.75), SIMDE_FLOAT32_C( -627.06), SIMDE_FLOAT32_C( -987.33), SIMDE_FLOAT32_C( -126.79), SIMDE_FLOAT32_C( 964.00), SIMDE_FLOAT32_C( -128.64), SIMDE_FLOAT32_C( -75.15), SIMDE_FLOAT32_C( 949.72), SIMDE_FLOAT32_C( -114.82), SIMDE_FLOAT32_C( 286.01), SIMDE_FLOAT32_C( -995.38), SIMDE_FLOAT32_C( 721.81), SIMDE_FLOAT32_C( -531.94), SIMDE_FLOAT32_C( -379.35), SIMDE_FLOAT32_C( 301.40) }, UINT16_C(55066), { SIMDE_FLOAT32_C( -112.99), SIMDE_FLOAT32_C( 933.42), SIMDE_FLOAT32_C( -66.18), SIMDE_FLOAT32_C( -307.32) }, { SIMDE_FLOAT32_C( 886.30), SIMDE_FLOAT32_C( 933.42), SIMDE_FLOAT32_C( -627.06), SIMDE_FLOAT32_C( 933.42), SIMDE_FLOAT32_C( -112.99), SIMDE_FLOAT32_C( 964.00), SIMDE_FLOAT32_C( -128.64), SIMDE_FLOAT32_C( -75.15), SIMDE_FLOAT32_C( -112.99), SIMDE_FLOAT32_C( 933.42), SIMDE_FLOAT32_C( -112.99), SIMDE_FLOAT32_C( -995.38), SIMDE_FLOAT32_C( -112.99), SIMDE_FLOAT32_C( -531.94), SIMDE_FLOAT32_C( -112.99), SIMDE_FLOAT32_C( 933.42) } }, { { SIMDE_FLOAT32_C( 858.06), SIMDE_FLOAT32_C( -630.09), SIMDE_FLOAT32_C( 82.49), SIMDE_FLOAT32_C( 401.49), SIMDE_FLOAT32_C( -226.24), SIMDE_FLOAT32_C( -448.63), SIMDE_FLOAT32_C( -200.28), SIMDE_FLOAT32_C( -144.91), SIMDE_FLOAT32_C( 574.72), SIMDE_FLOAT32_C( -647.66), SIMDE_FLOAT32_C( 850.68), SIMDE_FLOAT32_C( -645.45), SIMDE_FLOAT32_C( -136.23), SIMDE_FLOAT32_C( 385.26), SIMDE_FLOAT32_C( -998.08), SIMDE_FLOAT32_C( -718.84) }, UINT16_C(39639), { SIMDE_FLOAT32_C( -394.96), SIMDE_FLOAT32_C( -89.93), SIMDE_FLOAT32_C( 511.24), SIMDE_FLOAT32_C( 328.98) }, { SIMDE_FLOAT32_C( -394.96), SIMDE_FLOAT32_C( -89.93), SIMDE_FLOAT32_C( -394.96), SIMDE_FLOAT32_C( 401.49), SIMDE_FLOAT32_C( -394.96), SIMDE_FLOAT32_C( -448.63), SIMDE_FLOAT32_C( -394.96), SIMDE_FLOAT32_C( -89.93), SIMDE_FLOAT32_C( 574.72), SIMDE_FLOAT32_C( -89.93), SIMDE_FLOAT32_C( 850.68), SIMDE_FLOAT32_C( -89.93), SIMDE_FLOAT32_C( -394.96), SIMDE_FLOAT32_C( 385.26), SIMDE_FLOAT32_C( -998.08), SIMDE_FLOAT32_C( -89.93) } }, { { SIMDE_FLOAT32_C( -783.73), SIMDE_FLOAT32_C( -210.92), SIMDE_FLOAT32_C( -991.67), SIMDE_FLOAT32_C( 979.95), SIMDE_FLOAT32_C( 49.71), SIMDE_FLOAT32_C( -489.71), SIMDE_FLOAT32_C( -591.16), SIMDE_FLOAT32_C( 388.37), SIMDE_FLOAT32_C( -622.36), SIMDE_FLOAT32_C( 45.42), SIMDE_FLOAT32_C( -553.07), SIMDE_FLOAT32_C( 498.54), SIMDE_FLOAT32_C( 904.46), SIMDE_FLOAT32_C( -795.68), SIMDE_FLOAT32_C( -943.60), SIMDE_FLOAT32_C( 933.59) }, UINT16_C(44422), { SIMDE_FLOAT32_C( 213.33), SIMDE_FLOAT32_C( -541.90), SIMDE_FLOAT32_C( 310.55), SIMDE_FLOAT32_C( -596.77) }, { SIMDE_FLOAT32_C( -783.73), SIMDE_FLOAT32_C( -541.90), SIMDE_FLOAT32_C( 213.33), SIMDE_FLOAT32_C( 979.95), SIMDE_FLOAT32_C( 49.71), SIMDE_FLOAT32_C( -489.71), SIMDE_FLOAT32_C( -591.16), SIMDE_FLOAT32_C( -541.90), SIMDE_FLOAT32_C( 213.33), SIMDE_FLOAT32_C( 45.42), SIMDE_FLOAT32_C( 213.33), SIMDE_FLOAT32_C( -541.90), SIMDE_FLOAT32_C( 904.46), SIMDE_FLOAT32_C( -541.90), SIMDE_FLOAT32_C( -943.60), SIMDE_FLOAT32_C( -541.90) } }, { { SIMDE_FLOAT32_C( 4.43), SIMDE_FLOAT32_C( 378.61), SIMDE_FLOAT32_C( -660.44), SIMDE_FLOAT32_C( -60.44), SIMDE_FLOAT32_C( 265.90), SIMDE_FLOAT32_C( 922.57), SIMDE_FLOAT32_C( -447.45), SIMDE_FLOAT32_C( -208.75), SIMDE_FLOAT32_C( -386.55), SIMDE_FLOAT32_C( -791.16), SIMDE_FLOAT32_C( 993.63), SIMDE_FLOAT32_C( -107.89), SIMDE_FLOAT32_C( 758.84), SIMDE_FLOAT32_C( -215.37), SIMDE_FLOAT32_C( 198.46), SIMDE_FLOAT32_C( -486.35) }, UINT16_C(19819), { SIMDE_FLOAT32_C( 413.19), SIMDE_FLOAT32_C( 527.77), SIMDE_FLOAT32_C( 286.90), SIMDE_FLOAT32_C( -50.52) }, { SIMDE_FLOAT32_C( 413.19), SIMDE_FLOAT32_C( 527.77), SIMDE_FLOAT32_C( -660.44), SIMDE_FLOAT32_C( 527.77), SIMDE_FLOAT32_C( 265.90), SIMDE_FLOAT32_C( 527.77), SIMDE_FLOAT32_C( 413.19), SIMDE_FLOAT32_C( -208.75), SIMDE_FLOAT32_C( 413.19), SIMDE_FLOAT32_C( -791.16), SIMDE_FLOAT32_C( 413.19), SIMDE_FLOAT32_C( 527.77), SIMDE_FLOAT32_C( 758.84), SIMDE_FLOAT32_C( -215.37), SIMDE_FLOAT32_C( 413.19), SIMDE_FLOAT32_C( -486.35) } }, { { SIMDE_FLOAT32_C( 968.23), SIMDE_FLOAT32_C( -877.74), SIMDE_FLOAT32_C( -102.63), SIMDE_FLOAT32_C( -954.86), SIMDE_FLOAT32_C( -411.69), SIMDE_FLOAT32_C( 708.12), SIMDE_FLOAT32_C( -635.17), SIMDE_FLOAT32_C( 743.77), SIMDE_FLOAT32_C( 622.65), SIMDE_FLOAT32_C( 851.75), SIMDE_FLOAT32_C( -569.83), SIMDE_FLOAT32_C( 908.51), SIMDE_FLOAT32_C( -674.71), SIMDE_FLOAT32_C( 173.61), SIMDE_FLOAT32_C( -162.66), SIMDE_FLOAT32_C( 200.03) }, UINT16_C(57825), { SIMDE_FLOAT32_C( -696.94), SIMDE_FLOAT32_C( 529.84), SIMDE_FLOAT32_C( -942.89), SIMDE_FLOAT32_C( 880.87) }, { SIMDE_FLOAT32_C( -696.94), SIMDE_FLOAT32_C( -877.74), SIMDE_FLOAT32_C( -102.63), SIMDE_FLOAT32_C( -954.86), SIMDE_FLOAT32_C( -411.69), SIMDE_FLOAT32_C( 529.84), SIMDE_FLOAT32_C( -696.94), SIMDE_FLOAT32_C( 529.84), SIMDE_FLOAT32_C( -696.94), SIMDE_FLOAT32_C( 851.75), SIMDE_FLOAT32_C( -569.83), SIMDE_FLOAT32_C( 908.51), SIMDE_FLOAT32_C( -674.71), SIMDE_FLOAT32_C( 529.84), SIMDE_FLOAT32_C( -696.94), SIMDE_FLOAT32_C( 529.84) } }, { { SIMDE_FLOAT32_C( 733.15), SIMDE_FLOAT32_C( 63.36), SIMDE_FLOAT32_C( 903.02), SIMDE_FLOAT32_C( -977.76), SIMDE_FLOAT32_C( 704.77), SIMDE_FLOAT32_C( 985.75), SIMDE_FLOAT32_C( -492.96), SIMDE_FLOAT32_C( 872.57), SIMDE_FLOAT32_C( -697.69), SIMDE_FLOAT32_C( -32.06), SIMDE_FLOAT32_C( -826.65), SIMDE_FLOAT32_C( 423.95), SIMDE_FLOAT32_C( -668.70), SIMDE_FLOAT32_C( -777.46), SIMDE_FLOAT32_C( -794.02), SIMDE_FLOAT32_C( 931.91) }, UINT16_C(22885), { SIMDE_FLOAT32_C( 241.78), SIMDE_FLOAT32_C( -340.95), SIMDE_FLOAT32_C( -411.67), SIMDE_FLOAT32_C( -904.01) }, { SIMDE_FLOAT32_C( 241.78), SIMDE_FLOAT32_C( 63.36), SIMDE_FLOAT32_C( 241.78), SIMDE_FLOAT32_C( -977.76), SIMDE_FLOAT32_C( 704.77), SIMDE_FLOAT32_C( -340.95), SIMDE_FLOAT32_C( 241.78), SIMDE_FLOAT32_C( 872.57), SIMDE_FLOAT32_C( 241.78), SIMDE_FLOAT32_C( -32.06), SIMDE_FLOAT32_C( -826.65), SIMDE_FLOAT32_C( -340.95), SIMDE_FLOAT32_C( 241.78), SIMDE_FLOAT32_C( -777.46), SIMDE_FLOAT32_C( 241.78), SIMDE_FLOAT32_C( 931.91) } }, { { SIMDE_FLOAT32_C( 377.61), SIMDE_FLOAT32_C( 543.54), SIMDE_FLOAT32_C( -676.81), SIMDE_FLOAT32_C( 796.04), SIMDE_FLOAT32_C( -952.55), SIMDE_FLOAT32_C( 439.69), SIMDE_FLOAT32_C( -139.34), SIMDE_FLOAT32_C( 103.48), SIMDE_FLOAT32_C( -782.74), SIMDE_FLOAT32_C( 562.99), SIMDE_FLOAT32_C( 161.99), SIMDE_FLOAT32_C( 620.38), SIMDE_FLOAT32_C( 696.86), SIMDE_FLOAT32_C( 88.47), SIMDE_FLOAT32_C( 998.69), SIMDE_FLOAT32_C( -955.66) }, UINT16_C(13591), { SIMDE_FLOAT32_C( -395.69), SIMDE_FLOAT32_C( -372.87), SIMDE_FLOAT32_C( -839.61), SIMDE_FLOAT32_C( 668.17) }, { SIMDE_FLOAT32_C( -395.69), SIMDE_FLOAT32_C( -372.87), SIMDE_FLOAT32_C( -395.69), SIMDE_FLOAT32_C( 796.04), SIMDE_FLOAT32_C( -395.69), SIMDE_FLOAT32_C( 439.69), SIMDE_FLOAT32_C( -139.34), SIMDE_FLOAT32_C( 103.48), SIMDE_FLOAT32_C( -395.69), SIMDE_FLOAT32_C( 562.99), SIMDE_FLOAT32_C( -395.69), SIMDE_FLOAT32_C( 620.38), SIMDE_FLOAT32_C( -395.69), SIMDE_FLOAT32_C( -372.87), SIMDE_FLOAT32_C( 998.69), SIMDE_FLOAT32_C( -955.66) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_mask_broadcast_f32x2(src, test_vec[i].k, a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_maskz_broadcast_f32x2 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask16 k; const simde_float32 a[4]; const simde_float32 r[16]; } test_vec[] = { { UINT16_C(18884), { SIMDE_FLOAT32_C( 545.10), SIMDE_FLOAT32_C( -550.17), SIMDE_FLOAT32_C( -710.41), SIMDE_FLOAT32_C( 204.85) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 545.10), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 545.10), SIMDE_FLOAT32_C( -550.17), SIMDE_FLOAT32_C( 545.10), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -550.17), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 545.10), SIMDE_FLOAT32_C( 0.00) } }, { UINT16_C(16968), { SIMDE_FLOAT32_C( 51.85), SIMDE_FLOAT32_C( -493.14), SIMDE_FLOAT32_C( -214.52), SIMDE_FLOAT32_C( 484.86) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -493.14), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 51.85), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -493.14), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 51.85), SIMDE_FLOAT32_C( 0.00) } }, { UINT16_C(55493), { SIMDE_FLOAT32_C( -547.31), SIMDE_FLOAT32_C( -681.83), SIMDE_FLOAT32_C( 567.76), SIMDE_FLOAT32_C( 376.14) }, { SIMDE_FLOAT32_C( -547.31), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -547.31), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -547.31), SIMDE_FLOAT32_C( -681.83), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -681.83), SIMDE_FLOAT32_C( -547.31), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -547.31), SIMDE_FLOAT32_C( -681.83) } }, { UINT16_C( 1280), { SIMDE_FLOAT32_C( 358.99), SIMDE_FLOAT32_C( -507.35), SIMDE_FLOAT32_C( -959.80), SIMDE_FLOAT32_C( 688.48) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 358.99), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 358.99), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { UINT16_C(16569), { SIMDE_FLOAT32_C( -988.71), SIMDE_FLOAT32_C( 789.03), SIMDE_FLOAT32_C( -740.57), SIMDE_FLOAT32_C( -739.46) }, { SIMDE_FLOAT32_C( -988.71), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 789.03), SIMDE_FLOAT32_C( -988.71), SIMDE_FLOAT32_C( 789.03), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 789.03), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -988.71), SIMDE_FLOAT32_C( 0.00) } }, { UINT16_C(26242), { SIMDE_FLOAT32_C( -555.34), SIMDE_FLOAT32_C( 402.79), SIMDE_FLOAT32_C( -274.64), SIMDE_FLOAT32_C( 159.53) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 402.79), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 402.79), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 402.79), SIMDE_FLOAT32_C( -555.34), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 402.79), SIMDE_FLOAT32_C( -555.34), SIMDE_FLOAT32_C( 0.00) } }, { UINT16_C(39055), { SIMDE_FLOAT32_C( -25.84), SIMDE_FLOAT32_C( -228.90), SIMDE_FLOAT32_C( 813.40), SIMDE_FLOAT32_C( 762.90) }, { SIMDE_FLOAT32_C( -25.84), SIMDE_FLOAT32_C( -228.90), SIMDE_FLOAT32_C( -25.84), SIMDE_FLOAT32_C( -228.90), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -228.90), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -228.90), SIMDE_FLOAT32_C( -25.84), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -228.90) } }, { UINT16_C(53187), { SIMDE_FLOAT32_C( -400.08), SIMDE_FLOAT32_C( -173.64), SIMDE_FLOAT32_C( -349.66), SIMDE_FLOAT32_C( -663.64) }, { SIMDE_FLOAT32_C( -400.08), SIMDE_FLOAT32_C( -173.64), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -400.08), SIMDE_FLOAT32_C( -173.64), SIMDE_FLOAT32_C( -400.08), SIMDE_FLOAT32_C( -173.64), SIMDE_FLOAT32_C( -400.08), SIMDE_FLOAT32_C( -173.64), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -400.08), SIMDE_FLOAT32_C( -173.64) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_maskz_broadcast_f32x2(test_vec[i].k, a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_broadcast_f32x8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( -416.45), SIMDE_FLOAT32_C( 711.04), SIMDE_FLOAT32_C( 494.08), SIMDE_FLOAT32_C( 55.06), SIMDE_FLOAT32_C( -527.80), SIMDE_FLOAT32_C( -810.11), SIMDE_FLOAT32_C( 486.30), SIMDE_FLOAT32_C( -695.23) }, { SIMDE_FLOAT32_C( -416.45), SIMDE_FLOAT32_C( 711.04), SIMDE_FLOAT32_C( 494.08), SIMDE_FLOAT32_C( 55.06), SIMDE_FLOAT32_C( -527.80), SIMDE_FLOAT32_C( -810.11), SIMDE_FLOAT32_C( 486.30), SIMDE_FLOAT32_C( -695.23), SIMDE_FLOAT32_C( -416.45), SIMDE_FLOAT32_C( 711.04), SIMDE_FLOAT32_C( 494.08), SIMDE_FLOAT32_C( 55.06), SIMDE_FLOAT32_C( -527.80), SIMDE_FLOAT32_C( -810.11), SIMDE_FLOAT32_C( 486.30), SIMDE_FLOAT32_C( -695.23) } }, { { SIMDE_FLOAT32_C( -800.88), SIMDE_FLOAT32_C( -452.72), SIMDE_FLOAT32_C( -904.66), SIMDE_FLOAT32_C( -614.99), SIMDE_FLOAT32_C( -172.17), SIMDE_FLOAT32_C( 311.84), SIMDE_FLOAT32_C( -833.25), SIMDE_FLOAT32_C( -503.53) }, { SIMDE_FLOAT32_C( -800.88), SIMDE_FLOAT32_C( -452.72), SIMDE_FLOAT32_C( -904.66), SIMDE_FLOAT32_C( -614.99), SIMDE_FLOAT32_C( -172.17), SIMDE_FLOAT32_C( 311.84), SIMDE_FLOAT32_C( -833.25), SIMDE_FLOAT32_C( -503.53), SIMDE_FLOAT32_C( -800.88), SIMDE_FLOAT32_C( -452.72), SIMDE_FLOAT32_C( -904.66), SIMDE_FLOAT32_C( -614.99), SIMDE_FLOAT32_C( -172.17), SIMDE_FLOAT32_C( 311.84), SIMDE_FLOAT32_C( -833.25), SIMDE_FLOAT32_C( -503.53) } }, { { SIMDE_FLOAT32_C( -875.06), SIMDE_FLOAT32_C( 874.51), SIMDE_FLOAT32_C( -123.24), SIMDE_FLOAT32_C( 657.48), SIMDE_FLOAT32_C( 309.07), SIMDE_FLOAT32_C( 484.03), SIMDE_FLOAT32_C( -839.17), SIMDE_FLOAT32_C( 10.32) }, { SIMDE_FLOAT32_C( -875.06), SIMDE_FLOAT32_C( 874.51), SIMDE_FLOAT32_C( -123.24), SIMDE_FLOAT32_C( 657.48), SIMDE_FLOAT32_C( 309.07), SIMDE_FLOAT32_C( 484.03), SIMDE_FLOAT32_C( -839.17), SIMDE_FLOAT32_C( 10.32), SIMDE_FLOAT32_C( -875.06), SIMDE_FLOAT32_C( 874.51), SIMDE_FLOAT32_C( -123.24), SIMDE_FLOAT32_C( 657.48), SIMDE_FLOAT32_C( 309.07), SIMDE_FLOAT32_C( 484.03), SIMDE_FLOAT32_C( -839.17), SIMDE_FLOAT32_C( 10.32) } }, { { SIMDE_FLOAT32_C( -515.09), SIMDE_FLOAT32_C( 924.58), SIMDE_FLOAT32_C( -659.21), SIMDE_FLOAT32_C( 676.36), SIMDE_FLOAT32_C( -421.41), SIMDE_FLOAT32_C( -682.12), SIMDE_FLOAT32_C( -306.00), SIMDE_FLOAT32_C( -939.89) }, { SIMDE_FLOAT32_C( -515.09), SIMDE_FLOAT32_C( 924.58), SIMDE_FLOAT32_C( -659.21), SIMDE_FLOAT32_C( 676.36), SIMDE_FLOAT32_C( -421.41), SIMDE_FLOAT32_C( -682.12), SIMDE_FLOAT32_C( -306.00), SIMDE_FLOAT32_C( -939.89), SIMDE_FLOAT32_C( -515.09), SIMDE_FLOAT32_C( 924.58), SIMDE_FLOAT32_C( -659.21), SIMDE_FLOAT32_C( 676.36), SIMDE_FLOAT32_C( -421.41), SIMDE_FLOAT32_C( -682.12), SIMDE_FLOAT32_C( -306.00), SIMDE_FLOAT32_C( -939.89) } }, { { SIMDE_FLOAT32_C( -812.70), SIMDE_FLOAT32_C( 906.23), SIMDE_FLOAT32_C( -979.37), SIMDE_FLOAT32_C( -275.20), SIMDE_FLOAT32_C( 664.08), SIMDE_FLOAT32_C( -809.85), SIMDE_FLOAT32_C( 934.39), SIMDE_FLOAT32_C( 280.51) }, { SIMDE_FLOAT32_C( -812.70), SIMDE_FLOAT32_C( 906.23), SIMDE_FLOAT32_C( -979.37), SIMDE_FLOAT32_C( -275.20), SIMDE_FLOAT32_C( 664.08), SIMDE_FLOAT32_C( -809.85), SIMDE_FLOAT32_C( 934.39), SIMDE_FLOAT32_C( 280.51), SIMDE_FLOAT32_C( -812.70), SIMDE_FLOAT32_C( 906.23), SIMDE_FLOAT32_C( -979.37), SIMDE_FLOAT32_C( -275.20), SIMDE_FLOAT32_C( 664.08), SIMDE_FLOAT32_C( -809.85), SIMDE_FLOAT32_C( 934.39), SIMDE_FLOAT32_C( 280.51) } }, { { SIMDE_FLOAT32_C( 461.56), SIMDE_FLOAT32_C( -484.84), SIMDE_FLOAT32_C( -776.35), SIMDE_FLOAT32_C( -37.28), SIMDE_FLOAT32_C( -552.72), SIMDE_FLOAT32_C( 358.22), SIMDE_FLOAT32_C( 561.82), SIMDE_FLOAT32_C( 465.10) }, { SIMDE_FLOAT32_C( 461.56), SIMDE_FLOAT32_C( -484.84), SIMDE_FLOAT32_C( -776.35), SIMDE_FLOAT32_C( -37.28), SIMDE_FLOAT32_C( -552.72), SIMDE_FLOAT32_C( 358.22), SIMDE_FLOAT32_C( 561.82), SIMDE_FLOAT32_C( 465.10), SIMDE_FLOAT32_C( 461.56), SIMDE_FLOAT32_C( -484.84), SIMDE_FLOAT32_C( -776.35), SIMDE_FLOAT32_C( -37.28), SIMDE_FLOAT32_C( -552.72), SIMDE_FLOAT32_C( 358.22), SIMDE_FLOAT32_C( 561.82), SIMDE_FLOAT32_C( 465.10) } }, { { SIMDE_FLOAT32_C( 996.67), SIMDE_FLOAT32_C( -908.09), SIMDE_FLOAT32_C( -292.64), SIMDE_FLOAT32_C( -421.79), SIMDE_FLOAT32_C( -984.50), SIMDE_FLOAT32_C( -529.88), SIMDE_FLOAT32_C( 228.67), SIMDE_FLOAT32_C( -756.34) }, { SIMDE_FLOAT32_C( 996.67), SIMDE_FLOAT32_C( -908.09), SIMDE_FLOAT32_C( -292.64), SIMDE_FLOAT32_C( -421.79), SIMDE_FLOAT32_C( -984.50), SIMDE_FLOAT32_C( -529.88), SIMDE_FLOAT32_C( 228.67), SIMDE_FLOAT32_C( -756.34), SIMDE_FLOAT32_C( 996.67), SIMDE_FLOAT32_C( -908.09), SIMDE_FLOAT32_C( -292.64), SIMDE_FLOAT32_C( -421.79), SIMDE_FLOAT32_C( -984.50), SIMDE_FLOAT32_C( -529.88), SIMDE_FLOAT32_C( 228.67), SIMDE_FLOAT32_C( -756.34) } }, { { SIMDE_FLOAT32_C( 236.36), SIMDE_FLOAT32_C( 442.90), SIMDE_FLOAT32_C( -175.57), SIMDE_FLOAT32_C( -799.66), SIMDE_FLOAT32_C( 97.65), SIMDE_FLOAT32_C( -822.08), SIMDE_FLOAT32_C( -738.45), SIMDE_FLOAT32_C( 923.13) }, { SIMDE_FLOAT32_C( 236.36), SIMDE_FLOAT32_C( 442.90), SIMDE_FLOAT32_C( -175.57), SIMDE_FLOAT32_C( -799.66), SIMDE_FLOAT32_C( 97.65), SIMDE_FLOAT32_C( -822.08), SIMDE_FLOAT32_C( -738.45), SIMDE_FLOAT32_C( 923.13), SIMDE_FLOAT32_C( 236.36), SIMDE_FLOAT32_C( 442.90), SIMDE_FLOAT32_C( -175.57), SIMDE_FLOAT32_C( -799.66), SIMDE_FLOAT32_C( 97.65), SIMDE_FLOAT32_C( -822.08), SIMDE_FLOAT32_C( -738.45), SIMDE_FLOAT32_C( 923.13) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_broadcast_f32x8(a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_broadcast_f32x8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[16]; const simde__mmask16 k; const simde_float32 a[8]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 280.29), SIMDE_FLOAT32_C( 838.38), SIMDE_FLOAT32_C( 622.29), SIMDE_FLOAT32_C( 762.17), SIMDE_FLOAT32_C( -281.25), SIMDE_FLOAT32_C( 985.78), SIMDE_FLOAT32_C( 78.74), SIMDE_FLOAT32_C( -555.08), SIMDE_FLOAT32_C( 759.89), SIMDE_FLOAT32_C( -557.22), SIMDE_FLOAT32_C( 754.50), SIMDE_FLOAT32_C( 954.59), SIMDE_FLOAT32_C( -153.57), SIMDE_FLOAT32_C( 932.38), SIMDE_FLOAT32_C( 449.83), SIMDE_FLOAT32_C( 378.57) }, UINT16_C(36924), { SIMDE_FLOAT32_C( 598.39), SIMDE_FLOAT32_C( -917.42), SIMDE_FLOAT32_C( 853.85), SIMDE_FLOAT32_C( 635.72), SIMDE_FLOAT32_C( 497.82), SIMDE_FLOAT32_C( 880.65), SIMDE_FLOAT32_C( -930.36), SIMDE_FLOAT32_C( -512.19) }, { SIMDE_FLOAT32_C( 280.29), SIMDE_FLOAT32_C( 838.38), SIMDE_FLOAT32_C( 853.85), SIMDE_FLOAT32_C( 635.72), SIMDE_FLOAT32_C( 497.82), SIMDE_FLOAT32_C( 880.65), SIMDE_FLOAT32_C( 78.74), SIMDE_FLOAT32_C( -555.08), SIMDE_FLOAT32_C( 759.89), SIMDE_FLOAT32_C( -557.22), SIMDE_FLOAT32_C( 754.50), SIMDE_FLOAT32_C( 954.59), SIMDE_FLOAT32_C( 497.82), SIMDE_FLOAT32_C( 932.38), SIMDE_FLOAT32_C( 449.83), SIMDE_FLOAT32_C( -512.19) } }, { { SIMDE_FLOAT32_C( -437.09), SIMDE_FLOAT32_C( -187.22), SIMDE_FLOAT32_C( -573.53), SIMDE_FLOAT32_C( 628.55), SIMDE_FLOAT32_C( 16.28), SIMDE_FLOAT32_C( -343.67), SIMDE_FLOAT32_C( 13.33), SIMDE_FLOAT32_C( 92.74), SIMDE_FLOAT32_C( 617.88), SIMDE_FLOAT32_C( 659.02), SIMDE_FLOAT32_C( 114.72), SIMDE_FLOAT32_C( 86.74), SIMDE_FLOAT32_C( -78.46), SIMDE_FLOAT32_C( -669.19), SIMDE_FLOAT32_C( 913.81), SIMDE_FLOAT32_C( 480.88) }, UINT16_C(25166), { SIMDE_FLOAT32_C( -761.34), SIMDE_FLOAT32_C( 162.88), SIMDE_FLOAT32_C( -410.95), SIMDE_FLOAT32_C( -918.77), SIMDE_FLOAT32_C( 294.07), SIMDE_FLOAT32_C( 489.11), SIMDE_FLOAT32_C( 466.01), SIMDE_FLOAT32_C( 281.28) }, { SIMDE_FLOAT32_C( -437.09), SIMDE_FLOAT32_C( 162.88), SIMDE_FLOAT32_C( -410.95), SIMDE_FLOAT32_C( -918.77), SIMDE_FLOAT32_C( 16.28), SIMDE_FLOAT32_C( -343.67), SIMDE_FLOAT32_C( 466.01), SIMDE_FLOAT32_C( 92.74), SIMDE_FLOAT32_C( 617.88), SIMDE_FLOAT32_C( 162.88), SIMDE_FLOAT32_C( 114.72), SIMDE_FLOAT32_C( 86.74), SIMDE_FLOAT32_C( -78.46), SIMDE_FLOAT32_C( 489.11), SIMDE_FLOAT32_C( 466.01), SIMDE_FLOAT32_C( 480.88) } }, { { SIMDE_FLOAT32_C( -606.28), SIMDE_FLOAT32_C( 188.60), SIMDE_FLOAT32_C( -142.85), SIMDE_FLOAT32_C( -814.99), SIMDE_FLOAT32_C( 440.56), SIMDE_FLOAT32_C( 576.44), SIMDE_FLOAT32_C( 238.85), SIMDE_FLOAT32_C( 303.69), SIMDE_FLOAT32_C( 150.34), SIMDE_FLOAT32_C( 808.69), SIMDE_FLOAT32_C( -362.83), SIMDE_FLOAT32_C( -158.08), SIMDE_FLOAT32_C( -803.96), SIMDE_FLOAT32_C( -196.75), SIMDE_FLOAT32_C( -727.89), SIMDE_FLOAT32_C( 308.53) }, UINT16_C(23787), { SIMDE_FLOAT32_C( 944.26), SIMDE_FLOAT32_C( 110.45), SIMDE_FLOAT32_C( 407.09), SIMDE_FLOAT32_C( 45.91), SIMDE_FLOAT32_C( -335.37), SIMDE_FLOAT32_C( -560.84), SIMDE_FLOAT32_C( 3.97), SIMDE_FLOAT32_C( 760.14) }, { SIMDE_FLOAT32_C( 944.26), SIMDE_FLOAT32_C( 110.45), SIMDE_FLOAT32_C( -142.85), SIMDE_FLOAT32_C( 45.91), SIMDE_FLOAT32_C( 440.56), SIMDE_FLOAT32_C( -560.84), SIMDE_FLOAT32_C( 3.97), SIMDE_FLOAT32_C( 760.14), SIMDE_FLOAT32_C( 150.34), SIMDE_FLOAT32_C( 808.69), SIMDE_FLOAT32_C( 407.09), SIMDE_FLOAT32_C( 45.91), SIMDE_FLOAT32_C( -335.37), SIMDE_FLOAT32_C( -196.75), SIMDE_FLOAT32_C( 3.97), SIMDE_FLOAT32_C( 308.53) } }, { { SIMDE_FLOAT32_C( -278.78), SIMDE_FLOAT32_C( 517.15), SIMDE_FLOAT32_C( -283.92), SIMDE_FLOAT32_C( 114.05), SIMDE_FLOAT32_C( 798.05), SIMDE_FLOAT32_C( 868.23), SIMDE_FLOAT32_C( 258.92), SIMDE_FLOAT32_C( -367.27), SIMDE_FLOAT32_C( -720.23), SIMDE_FLOAT32_C( -836.19), SIMDE_FLOAT32_C( 163.28), SIMDE_FLOAT32_C( 201.97), SIMDE_FLOAT32_C( 461.48), SIMDE_FLOAT32_C( 33.48), SIMDE_FLOAT32_C( 752.68), SIMDE_FLOAT32_C( 274.33) }, UINT16_C( 9614), { SIMDE_FLOAT32_C( -353.42), SIMDE_FLOAT32_C( 72.45), SIMDE_FLOAT32_C( -313.79), SIMDE_FLOAT32_C( 54.95), SIMDE_FLOAT32_C( -482.32), SIMDE_FLOAT32_C( -268.09), SIMDE_FLOAT32_C( 146.77), SIMDE_FLOAT32_C( 772.72) }, { SIMDE_FLOAT32_C( -278.78), SIMDE_FLOAT32_C( 72.45), SIMDE_FLOAT32_C( -313.79), SIMDE_FLOAT32_C( 54.95), SIMDE_FLOAT32_C( 798.05), SIMDE_FLOAT32_C( 868.23), SIMDE_FLOAT32_C( 258.92), SIMDE_FLOAT32_C( 772.72), SIMDE_FLOAT32_C( -353.42), SIMDE_FLOAT32_C( -836.19), SIMDE_FLOAT32_C( -313.79), SIMDE_FLOAT32_C( 201.97), SIMDE_FLOAT32_C( 461.48), SIMDE_FLOAT32_C( -268.09), SIMDE_FLOAT32_C( 752.68), SIMDE_FLOAT32_C( 274.33) } }, { { SIMDE_FLOAT32_C( -894.15), SIMDE_FLOAT32_C( -6.16), SIMDE_FLOAT32_C( 455.15), SIMDE_FLOAT32_C( -216.19), SIMDE_FLOAT32_C( 419.21), SIMDE_FLOAT32_C( -283.83), SIMDE_FLOAT32_C( -341.07), SIMDE_FLOAT32_C( -431.79), SIMDE_FLOAT32_C( 825.19), SIMDE_FLOAT32_C( -956.94), SIMDE_FLOAT32_C( 688.79), SIMDE_FLOAT32_C( 509.40), SIMDE_FLOAT32_C( -511.22), SIMDE_FLOAT32_C( -14.80), SIMDE_FLOAT32_C( -763.30), SIMDE_FLOAT32_C( -769.02) }, UINT16_C(57357), { SIMDE_FLOAT32_C( -152.14), SIMDE_FLOAT32_C( -951.21), SIMDE_FLOAT32_C( 936.35), SIMDE_FLOAT32_C( -713.46), SIMDE_FLOAT32_C( 933.97), SIMDE_FLOAT32_C( -738.03), SIMDE_FLOAT32_C( 3.91), SIMDE_FLOAT32_C( -225.68) }, { SIMDE_FLOAT32_C( -152.14), SIMDE_FLOAT32_C( -6.16), SIMDE_FLOAT32_C( 936.35), SIMDE_FLOAT32_C( -713.46), SIMDE_FLOAT32_C( 419.21), SIMDE_FLOAT32_C( -283.83), SIMDE_FLOAT32_C( -341.07), SIMDE_FLOAT32_C( -431.79), SIMDE_FLOAT32_C( 825.19), SIMDE_FLOAT32_C( -956.94), SIMDE_FLOAT32_C( 688.79), SIMDE_FLOAT32_C( 509.40), SIMDE_FLOAT32_C( -511.22), SIMDE_FLOAT32_C( -738.03), SIMDE_FLOAT32_C( 3.91), SIMDE_FLOAT32_C( -225.68) } }, { { SIMDE_FLOAT32_C( 958.35), SIMDE_FLOAT32_C( 959.55), SIMDE_FLOAT32_C( -771.84), SIMDE_FLOAT32_C( -312.71), SIMDE_FLOAT32_C( 261.02), SIMDE_FLOAT32_C( -965.72), SIMDE_FLOAT32_C( -898.55), SIMDE_FLOAT32_C( 98.86), SIMDE_FLOAT32_C( -506.78), SIMDE_FLOAT32_C( 475.13), SIMDE_FLOAT32_C( -561.78), SIMDE_FLOAT32_C( 145.04), SIMDE_FLOAT32_C( -310.71), SIMDE_FLOAT32_C( -100.99), SIMDE_FLOAT32_C( 656.93), SIMDE_FLOAT32_C( 955.62) }, UINT16_C(55637), { SIMDE_FLOAT32_C( 64.66), SIMDE_FLOAT32_C( 704.14), SIMDE_FLOAT32_C( 421.81), SIMDE_FLOAT32_C( -620.94), SIMDE_FLOAT32_C( -124.06), SIMDE_FLOAT32_C( 858.04), SIMDE_FLOAT32_C( -855.91), SIMDE_FLOAT32_C( 691.15) }, { SIMDE_FLOAT32_C( 64.66), SIMDE_FLOAT32_C( 959.55), SIMDE_FLOAT32_C( 421.81), SIMDE_FLOAT32_C( -312.71), SIMDE_FLOAT32_C( -124.06), SIMDE_FLOAT32_C( -965.72), SIMDE_FLOAT32_C( -855.91), SIMDE_FLOAT32_C( 98.86), SIMDE_FLOAT32_C( 64.66), SIMDE_FLOAT32_C( 475.13), SIMDE_FLOAT32_C( -561.78), SIMDE_FLOAT32_C( -620.94), SIMDE_FLOAT32_C( -124.06), SIMDE_FLOAT32_C( -100.99), SIMDE_FLOAT32_C( -855.91), SIMDE_FLOAT32_C( 691.15) } }, { { SIMDE_FLOAT32_C( 165.52), SIMDE_FLOAT32_C( -117.15), SIMDE_FLOAT32_C( -914.50), SIMDE_FLOAT32_C( -48.64), SIMDE_FLOAT32_C( 429.74), SIMDE_FLOAT32_C( 612.18), SIMDE_FLOAT32_C( 933.85), SIMDE_FLOAT32_C( -778.14), SIMDE_FLOAT32_C( -214.40), SIMDE_FLOAT32_C( 623.77), SIMDE_FLOAT32_C( -288.84), SIMDE_FLOAT32_C( -541.76), SIMDE_FLOAT32_C( 699.14), SIMDE_FLOAT32_C( 473.09), SIMDE_FLOAT32_C( -762.45), SIMDE_FLOAT32_C( -518.42) }, UINT16_C(63181), { SIMDE_FLOAT32_C( 188.68), SIMDE_FLOAT32_C( -923.58), SIMDE_FLOAT32_C( -542.98), SIMDE_FLOAT32_C( 193.71), SIMDE_FLOAT32_C( -319.51), SIMDE_FLOAT32_C( 46.76), SIMDE_FLOAT32_C( -44.67), SIMDE_FLOAT32_C( -768.90) }, { SIMDE_FLOAT32_C( 188.68), SIMDE_FLOAT32_C( -117.15), SIMDE_FLOAT32_C( -542.98), SIMDE_FLOAT32_C( 193.71), SIMDE_FLOAT32_C( 429.74), SIMDE_FLOAT32_C( 612.18), SIMDE_FLOAT32_C( -44.67), SIMDE_FLOAT32_C( -768.90), SIMDE_FLOAT32_C( -214.40), SIMDE_FLOAT32_C( -923.58), SIMDE_FLOAT32_C( -542.98), SIMDE_FLOAT32_C( -541.76), SIMDE_FLOAT32_C( -319.51), SIMDE_FLOAT32_C( 46.76), SIMDE_FLOAT32_C( -44.67), SIMDE_FLOAT32_C( -768.90) } }, { { SIMDE_FLOAT32_C( -857.07), SIMDE_FLOAT32_C( -775.77), SIMDE_FLOAT32_C( -351.82), SIMDE_FLOAT32_C( 984.69), SIMDE_FLOAT32_C( -320.14), SIMDE_FLOAT32_C( -636.62), SIMDE_FLOAT32_C( 297.63), SIMDE_FLOAT32_C( 186.04), SIMDE_FLOAT32_C( 780.35), SIMDE_FLOAT32_C( -693.20), SIMDE_FLOAT32_C( -589.12), SIMDE_FLOAT32_C( 731.33), SIMDE_FLOAT32_C( -601.90), SIMDE_FLOAT32_C( -195.41), SIMDE_FLOAT32_C( -239.98), SIMDE_FLOAT32_C( 675.16) }, UINT16_C(63687), { SIMDE_FLOAT32_C( 751.41), SIMDE_FLOAT32_C( 926.41), SIMDE_FLOAT32_C( 149.18), SIMDE_FLOAT32_C( -662.14), SIMDE_FLOAT32_C( -649.07), SIMDE_FLOAT32_C( -858.90), SIMDE_FLOAT32_C( 465.33), SIMDE_FLOAT32_C( 831.66) }, { SIMDE_FLOAT32_C( 751.41), SIMDE_FLOAT32_C( 926.41), SIMDE_FLOAT32_C( 149.18), SIMDE_FLOAT32_C( 984.69), SIMDE_FLOAT32_C( -320.14), SIMDE_FLOAT32_C( -636.62), SIMDE_FLOAT32_C( 465.33), SIMDE_FLOAT32_C( 831.66), SIMDE_FLOAT32_C( 780.35), SIMDE_FLOAT32_C( -693.20), SIMDE_FLOAT32_C( -589.12), SIMDE_FLOAT32_C( -662.14), SIMDE_FLOAT32_C( -649.07), SIMDE_FLOAT32_C( -858.90), SIMDE_FLOAT32_C( 465.33), SIMDE_FLOAT32_C( 831.66) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_mask_broadcast_f32x8(src, test_vec[i].k, a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_maskz_broadcast_f32x8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask16 k; const simde_float32 a[8]; const simde_float32 r[16]; } test_vec[] = { { UINT16_C(49062), { SIMDE_FLOAT32_C( -67.12), SIMDE_FLOAT32_C( -144.98), SIMDE_FLOAT32_C( -693.09), SIMDE_FLOAT32_C( -717.03), SIMDE_FLOAT32_C( 833.33), SIMDE_FLOAT32_C( -297.62), SIMDE_FLOAT32_C( -166.55), SIMDE_FLOAT32_C( 748.74) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -144.98), SIMDE_FLOAT32_C( -693.09), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -297.62), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 748.74), SIMDE_FLOAT32_C( -67.12), SIMDE_FLOAT32_C( -144.98), SIMDE_FLOAT32_C( -693.09), SIMDE_FLOAT32_C( -717.03), SIMDE_FLOAT32_C( 833.33), SIMDE_FLOAT32_C( -297.62), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 748.74) } }, { UINT16_C( 6586), { SIMDE_FLOAT32_C( -140.91), SIMDE_FLOAT32_C( -189.72), SIMDE_FLOAT32_C( -663.50), SIMDE_FLOAT32_C( 613.12), SIMDE_FLOAT32_C( 772.89), SIMDE_FLOAT32_C( -76.35), SIMDE_FLOAT32_C( 859.08), SIMDE_FLOAT32_C( 595.36) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -189.72), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 613.12), SIMDE_FLOAT32_C( 772.89), SIMDE_FLOAT32_C( -76.35), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 595.36), SIMDE_FLOAT32_C( -140.91), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 613.12), SIMDE_FLOAT32_C( 772.89), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { UINT16_C(41380), { SIMDE_FLOAT32_C( -218.39), SIMDE_FLOAT32_C( -397.45), SIMDE_FLOAT32_C( 20.87), SIMDE_FLOAT32_C( 703.15), SIMDE_FLOAT32_C( -126.69), SIMDE_FLOAT32_C( 776.77), SIMDE_FLOAT32_C( -820.00), SIMDE_FLOAT32_C( 252.00) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 20.87), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 776.77), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 252.00), SIMDE_FLOAT32_C( -218.39), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 776.77), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 252.00) } }, { UINT16_C(14746), { SIMDE_FLOAT32_C( 488.59), SIMDE_FLOAT32_C( -333.19), SIMDE_FLOAT32_C( 82.99), SIMDE_FLOAT32_C( 818.76), SIMDE_FLOAT32_C( 927.98), SIMDE_FLOAT32_C( 586.60), SIMDE_FLOAT32_C( 933.90), SIMDE_FLOAT32_C( 84.47) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -333.19), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 818.76), SIMDE_FLOAT32_C( 927.98), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 84.47), SIMDE_FLOAT32_C( 488.59), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 818.76), SIMDE_FLOAT32_C( 927.98), SIMDE_FLOAT32_C( 586.60), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { UINT16_C(22430), { SIMDE_FLOAT32_C( -788.60), SIMDE_FLOAT32_C( -2.38), SIMDE_FLOAT32_C( -57.26), SIMDE_FLOAT32_C( -363.40), SIMDE_FLOAT32_C( 348.91), SIMDE_FLOAT32_C( 172.83), SIMDE_FLOAT32_C( 816.49), SIMDE_FLOAT32_C( 677.29) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -2.38), SIMDE_FLOAT32_C( -57.26), SIMDE_FLOAT32_C( -363.40), SIMDE_FLOAT32_C( 348.91), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 677.29), SIMDE_FLOAT32_C( -788.60), SIMDE_FLOAT32_C( -2.38), SIMDE_FLOAT32_C( -57.26), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 348.91), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 816.49), SIMDE_FLOAT32_C( 0.00) } }, { UINT16_C(53747), { SIMDE_FLOAT32_C( -74.10), SIMDE_FLOAT32_C( 628.20), SIMDE_FLOAT32_C( 176.40), SIMDE_FLOAT32_C( 789.58), SIMDE_FLOAT32_C( 434.02), SIMDE_FLOAT32_C( 537.30), SIMDE_FLOAT32_C( 360.66), SIMDE_FLOAT32_C( -306.64) }, { SIMDE_FLOAT32_C( -74.10), SIMDE_FLOAT32_C( 628.20), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 434.02), SIMDE_FLOAT32_C( 537.30), SIMDE_FLOAT32_C( 360.66), SIMDE_FLOAT32_C( -306.64), SIMDE_FLOAT32_C( -74.10), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 434.02), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 360.66), SIMDE_FLOAT32_C( -306.64) } }, { UINT16_C(57660), { SIMDE_FLOAT32_C( 529.43), SIMDE_FLOAT32_C( 185.72), SIMDE_FLOAT32_C( -666.37), SIMDE_FLOAT32_C( 372.37), SIMDE_FLOAT32_C( 420.53), SIMDE_FLOAT32_C( -76.09), SIMDE_FLOAT32_C( -764.18), SIMDE_FLOAT32_C( 472.62) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -666.37), SIMDE_FLOAT32_C( 372.37), SIMDE_FLOAT32_C( 420.53), SIMDE_FLOAT32_C( -76.09), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 529.43), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -76.09), SIMDE_FLOAT32_C( -764.18), SIMDE_FLOAT32_C( 472.62) } }, { UINT16_C(60506), { SIMDE_FLOAT32_C( -796.21), SIMDE_FLOAT32_C( 148.32), SIMDE_FLOAT32_C( 781.59), SIMDE_FLOAT32_C( 218.77), SIMDE_FLOAT32_C( 802.35), SIMDE_FLOAT32_C( -915.03), SIMDE_FLOAT32_C( -953.21), SIMDE_FLOAT32_C( -530.25) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 148.32), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 218.77), SIMDE_FLOAT32_C( 802.35), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -953.21), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 781.59), SIMDE_FLOAT32_C( 218.77), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -915.03), SIMDE_FLOAT32_C( -953.21), SIMDE_FLOAT32_C( -530.25) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_maskz_broadcast_f32x8(test_vec[i].k, a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_broadcast_f64x2 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 715.32), SIMDE_FLOAT64_C( 317.58) }, { SIMDE_FLOAT64_C( 715.32), SIMDE_FLOAT64_C( 317.58), SIMDE_FLOAT64_C( 715.32), SIMDE_FLOAT64_C( 317.58), SIMDE_FLOAT64_C( 715.32), SIMDE_FLOAT64_C( 317.58), SIMDE_FLOAT64_C( 715.32), SIMDE_FLOAT64_C( 317.58) } }, { { SIMDE_FLOAT64_C( -404.76), SIMDE_FLOAT64_C( -835.80) }, { SIMDE_FLOAT64_C( -404.76), SIMDE_FLOAT64_C( -835.80), SIMDE_FLOAT64_C( -404.76), SIMDE_FLOAT64_C( -835.80), SIMDE_FLOAT64_C( -404.76), SIMDE_FLOAT64_C( -835.80), SIMDE_FLOAT64_C( -404.76), SIMDE_FLOAT64_C( -835.80) } }, { { SIMDE_FLOAT64_C( 653.97), SIMDE_FLOAT64_C( -774.97) }, { SIMDE_FLOAT64_C( 653.97), SIMDE_FLOAT64_C( -774.97), SIMDE_FLOAT64_C( 653.97), SIMDE_FLOAT64_C( -774.97), SIMDE_FLOAT64_C( 653.97), SIMDE_FLOAT64_C( -774.97), SIMDE_FLOAT64_C( 653.97), SIMDE_FLOAT64_C( -774.97) } }, { { SIMDE_FLOAT64_C( -843.04), SIMDE_FLOAT64_C( -900.71) }, { SIMDE_FLOAT64_C( -843.04), SIMDE_FLOAT64_C( -900.71), SIMDE_FLOAT64_C( -843.04), SIMDE_FLOAT64_C( -900.71), SIMDE_FLOAT64_C( -843.04), SIMDE_FLOAT64_C( -900.71), SIMDE_FLOAT64_C( -843.04), SIMDE_FLOAT64_C( -900.71) } }, { { SIMDE_FLOAT64_C( -197.71), SIMDE_FLOAT64_C( -989.91) }, { SIMDE_FLOAT64_C( -197.71), SIMDE_FLOAT64_C( -989.91), SIMDE_FLOAT64_C( -197.71), SIMDE_FLOAT64_C( -989.91), SIMDE_FLOAT64_C( -197.71), SIMDE_FLOAT64_C( -989.91), SIMDE_FLOAT64_C( -197.71), SIMDE_FLOAT64_C( -989.91) } }, { { SIMDE_FLOAT64_C( 515.43), SIMDE_FLOAT64_C( 879.19) }, { SIMDE_FLOAT64_C( 515.43), SIMDE_FLOAT64_C( 879.19), SIMDE_FLOAT64_C( 515.43), SIMDE_FLOAT64_C( 879.19), SIMDE_FLOAT64_C( 515.43), SIMDE_FLOAT64_C( 879.19), SIMDE_FLOAT64_C( 515.43), SIMDE_FLOAT64_C( 879.19) } }, { { SIMDE_FLOAT64_C( 610.61), SIMDE_FLOAT64_C( 540.00) }, { SIMDE_FLOAT64_C( 610.61), SIMDE_FLOAT64_C( 540.00), SIMDE_FLOAT64_C( 610.61), SIMDE_FLOAT64_C( 540.00), SIMDE_FLOAT64_C( 610.61), SIMDE_FLOAT64_C( 540.00), SIMDE_FLOAT64_C( 610.61), SIMDE_FLOAT64_C( 540.00) } }, { { SIMDE_FLOAT64_C( -234.86), SIMDE_FLOAT64_C( 751.29) }, { SIMDE_FLOAT64_C( -234.86), SIMDE_FLOAT64_C( 751.29), SIMDE_FLOAT64_C( -234.86), SIMDE_FLOAT64_C( 751.29), SIMDE_FLOAT64_C( -234.86), SIMDE_FLOAT64_C( 751.29), SIMDE_FLOAT64_C( -234.86), SIMDE_FLOAT64_C( 751.29) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_broadcast_f64x2(a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_broadcast_f64x2 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[2]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 749.13), SIMDE_FLOAT64_C( 675.39), SIMDE_FLOAT64_C( -739.63), SIMDE_FLOAT64_C( -916.53), SIMDE_FLOAT64_C( -70.94), SIMDE_FLOAT64_C( -224.46), SIMDE_FLOAT64_C( -485.72), SIMDE_FLOAT64_C( 433.96) }, UINT8_C(250), { SIMDE_FLOAT64_C( 424.19), SIMDE_FLOAT64_C( -720.98) }, { SIMDE_FLOAT64_C( 749.13), SIMDE_FLOAT64_C( -720.98), SIMDE_FLOAT64_C( -739.63), SIMDE_FLOAT64_C( -720.98), SIMDE_FLOAT64_C( 424.19), SIMDE_FLOAT64_C( -720.98), SIMDE_FLOAT64_C( 424.19), SIMDE_FLOAT64_C( -720.98) } }, { { SIMDE_FLOAT64_C( 461.33), SIMDE_FLOAT64_C( -402.24), SIMDE_FLOAT64_C( -437.75), SIMDE_FLOAT64_C( 785.96), SIMDE_FLOAT64_C( -372.46), SIMDE_FLOAT64_C( 110.74), SIMDE_FLOAT64_C( -831.39), SIMDE_FLOAT64_C( 846.99) }, UINT8_C( 78), { SIMDE_FLOAT64_C( -572.48), SIMDE_FLOAT64_C( 394.61) }, { SIMDE_FLOAT64_C( 461.33), SIMDE_FLOAT64_C( 394.61), SIMDE_FLOAT64_C( -572.48), SIMDE_FLOAT64_C( 394.61), SIMDE_FLOAT64_C( -372.46), SIMDE_FLOAT64_C( 110.74), SIMDE_FLOAT64_C( -572.48), SIMDE_FLOAT64_C( 846.99) } }, { { SIMDE_FLOAT64_C( 215.35), SIMDE_FLOAT64_C( -616.54), SIMDE_FLOAT64_C( -262.30), SIMDE_FLOAT64_C( -426.39), SIMDE_FLOAT64_C( -336.22), SIMDE_FLOAT64_C( -839.02), SIMDE_FLOAT64_C( 672.49), SIMDE_FLOAT64_C( 589.70) }, UINT8_C(163), { SIMDE_FLOAT64_C( -982.23), SIMDE_FLOAT64_C( -416.77) }, { SIMDE_FLOAT64_C( -982.23), SIMDE_FLOAT64_C( -416.77), SIMDE_FLOAT64_C( -262.30), SIMDE_FLOAT64_C( -426.39), SIMDE_FLOAT64_C( -336.22), SIMDE_FLOAT64_C( -416.77), SIMDE_FLOAT64_C( 672.49), SIMDE_FLOAT64_C( -416.77) } }, { { SIMDE_FLOAT64_C( -578.35), SIMDE_FLOAT64_C( -267.73), SIMDE_FLOAT64_C( 242.90), SIMDE_FLOAT64_C( 449.74), SIMDE_FLOAT64_C( 714.62), SIMDE_FLOAT64_C( 671.90), SIMDE_FLOAT64_C( 577.25), SIMDE_FLOAT64_C( -88.86) }, UINT8_C(222), { SIMDE_FLOAT64_C( 379.16), SIMDE_FLOAT64_C( 573.95) }, { SIMDE_FLOAT64_C( -578.35), SIMDE_FLOAT64_C( 573.95), SIMDE_FLOAT64_C( 379.16), SIMDE_FLOAT64_C( 573.95), SIMDE_FLOAT64_C( 379.16), SIMDE_FLOAT64_C( 671.90), SIMDE_FLOAT64_C( 379.16), SIMDE_FLOAT64_C( 573.95) } }, { { SIMDE_FLOAT64_C( 428.10), SIMDE_FLOAT64_C( -969.60), SIMDE_FLOAT64_C( -117.58), SIMDE_FLOAT64_C( -121.88), SIMDE_FLOAT64_C( -513.12), SIMDE_FLOAT64_C( -67.52), SIMDE_FLOAT64_C( -880.81), SIMDE_FLOAT64_C( 257.25) }, UINT8_C( 35), { SIMDE_FLOAT64_C( -71.92), SIMDE_FLOAT64_C( -682.64) }, { SIMDE_FLOAT64_C( -71.92), SIMDE_FLOAT64_C( -682.64), SIMDE_FLOAT64_C( -117.58), SIMDE_FLOAT64_C( -121.88), SIMDE_FLOAT64_C( -513.12), SIMDE_FLOAT64_C( -682.64), SIMDE_FLOAT64_C( -880.81), SIMDE_FLOAT64_C( 257.25) } }, { { SIMDE_FLOAT64_C( 858.06), SIMDE_FLOAT64_C( -576.56), SIMDE_FLOAT64_C( -199.04), SIMDE_FLOAT64_C( 741.89), SIMDE_FLOAT64_C( 940.66), SIMDE_FLOAT64_C( -320.73), SIMDE_FLOAT64_C( -519.45), SIMDE_FLOAT64_C( -359.73) }, UINT8_C( 14), { SIMDE_FLOAT64_C( -260.24), SIMDE_FLOAT64_C( 150.09) }, { SIMDE_FLOAT64_C( 858.06), SIMDE_FLOAT64_C( 150.09), SIMDE_FLOAT64_C( -260.24), SIMDE_FLOAT64_C( 150.09), SIMDE_FLOAT64_C( 940.66), SIMDE_FLOAT64_C( -320.73), SIMDE_FLOAT64_C( -519.45), SIMDE_FLOAT64_C( -359.73) } }, { { SIMDE_FLOAT64_C( 508.76), SIMDE_FLOAT64_C( 671.76), SIMDE_FLOAT64_C( 188.22), SIMDE_FLOAT64_C( -524.84), SIMDE_FLOAT64_C( 958.74), SIMDE_FLOAT64_C( -408.21), SIMDE_FLOAT64_C( -756.34), SIMDE_FLOAT64_C( 260.63) }, UINT8_C( 48), { SIMDE_FLOAT64_C( -287.86), SIMDE_FLOAT64_C( -66.95) }, { SIMDE_FLOAT64_C( 508.76), SIMDE_FLOAT64_C( 671.76), SIMDE_FLOAT64_C( 188.22), SIMDE_FLOAT64_C( -524.84), SIMDE_FLOAT64_C( -287.86), SIMDE_FLOAT64_C( -66.95), SIMDE_FLOAT64_C( -756.34), SIMDE_FLOAT64_C( 260.63) } }, { { SIMDE_FLOAT64_C( 741.62), SIMDE_FLOAT64_C( 389.31), SIMDE_FLOAT64_C( -806.05), SIMDE_FLOAT64_C( 761.48), SIMDE_FLOAT64_C( 242.55), SIMDE_FLOAT64_C( 550.14), SIMDE_FLOAT64_C( 214.54), SIMDE_FLOAT64_C( -176.03) }, UINT8_C( 79), { SIMDE_FLOAT64_C( 639.90), SIMDE_FLOAT64_C( 881.52) }, { SIMDE_FLOAT64_C( 639.90), SIMDE_FLOAT64_C( 881.52), SIMDE_FLOAT64_C( 639.90), SIMDE_FLOAT64_C( 881.52), SIMDE_FLOAT64_C( 242.55), SIMDE_FLOAT64_C( 550.14), SIMDE_FLOAT64_C( 639.90), SIMDE_FLOAT64_C( -176.03) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_mask_broadcast_f64x2(src, test_vec[i].k, a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_maskz_broadcast_f64x2 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const simde_float64 a[2]; const simde_float64 r[8]; } test_vec[] = { { UINT8_C( 32), { SIMDE_FLOAT64_C( 95.43), SIMDE_FLOAT64_C( -111.80) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -111.80), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C(212), { SIMDE_FLOAT64_C( 159.26), SIMDE_FLOAT64_C( 721.63) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 159.26), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 159.26), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 159.26), SIMDE_FLOAT64_C( 721.63) } }, { UINT8_C(232), { SIMDE_FLOAT64_C( -41.02), SIMDE_FLOAT64_C( 592.81) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 592.81), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 592.81), SIMDE_FLOAT64_C( -41.02), SIMDE_FLOAT64_C( 592.81) } }, { UINT8_C(112), { SIMDE_FLOAT64_C( 80.26), SIMDE_FLOAT64_C( 969.51) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 80.26), SIMDE_FLOAT64_C( 969.51), SIMDE_FLOAT64_C( 80.26), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C(215), { SIMDE_FLOAT64_C( 905.16), SIMDE_FLOAT64_C( -968.55) }, { SIMDE_FLOAT64_C( 905.16), SIMDE_FLOAT64_C( -968.55), SIMDE_FLOAT64_C( 905.16), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 905.16), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 905.16), SIMDE_FLOAT64_C( -968.55) } }, { UINT8_C(135), { SIMDE_FLOAT64_C( 140.43), SIMDE_FLOAT64_C( 267.82) }, { SIMDE_FLOAT64_C( 140.43), SIMDE_FLOAT64_C( 267.82), SIMDE_FLOAT64_C( 140.43), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 267.82) } }, { UINT8_C(192), { SIMDE_FLOAT64_C( -853.88), SIMDE_FLOAT64_C( 811.68) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -853.88), SIMDE_FLOAT64_C( 811.68) } }, { UINT8_C( 17), { SIMDE_FLOAT64_C( -661.24), SIMDE_FLOAT64_C( 561.84) }, { SIMDE_FLOAT64_C( -661.24), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -661.24), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_maskz_broadcast_f64x2(test_vec[i].k, a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_broadcast_f32x4 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( -558.39), SIMDE_FLOAT32_C( -943.50), SIMDE_FLOAT32_C( 652.52), SIMDE_FLOAT32_C( 945.52) }, { SIMDE_FLOAT32_C( -558.39), SIMDE_FLOAT32_C( -943.50), SIMDE_FLOAT32_C( 652.52), SIMDE_FLOAT32_C( 945.52), SIMDE_FLOAT32_C( -558.39), SIMDE_FLOAT32_C( -943.50), SIMDE_FLOAT32_C( 652.52), SIMDE_FLOAT32_C( 945.52) } }, { { SIMDE_FLOAT32_C( -577.06), SIMDE_FLOAT32_C( -623.59), SIMDE_FLOAT32_C( -742.48), SIMDE_FLOAT32_C( -807.52) }, { SIMDE_FLOAT32_C( -577.06), SIMDE_FLOAT32_C( -623.59), SIMDE_FLOAT32_C( -742.48), SIMDE_FLOAT32_C( -807.52), SIMDE_FLOAT32_C( -577.06), SIMDE_FLOAT32_C( -623.59), SIMDE_FLOAT32_C( -742.48), SIMDE_FLOAT32_C( -807.52) } }, { { SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( -222.35), SIMDE_FLOAT32_C( 965.41), SIMDE_FLOAT32_C( -320.94) }, { SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( -222.35), SIMDE_FLOAT32_C( 965.41), SIMDE_FLOAT32_C( -320.94), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( -222.35), SIMDE_FLOAT32_C( 965.41), SIMDE_FLOAT32_C( -320.94) } }, { { SIMDE_FLOAT32_C( 34.85), SIMDE_FLOAT32_C( -238.64), SIMDE_FLOAT32_C( -834.61), SIMDE_FLOAT32_C( 763.48) }, { SIMDE_FLOAT32_C( 34.85), SIMDE_FLOAT32_C( -238.64), SIMDE_FLOAT32_C( -834.61), SIMDE_FLOAT32_C( 763.48), SIMDE_FLOAT32_C( 34.85), SIMDE_FLOAT32_C( -238.64), SIMDE_FLOAT32_C( -834.61), SIMDE_FLOAT32_C( 763.48) } }, { { SIMDE_FLOAT32_C( -215.99), SIMDE_FLOAT32_C( -214.29), SIMDE_FLOAT32_C( 432.66), SIMDE_FLOAT32_C( -222.94) }, { SIMDE_FLOAT32_C( -215.99), SIMDE_FLOAT32_C( -214.29), SIMDE_FLOAT32_C( 432.66), SIMDE_FLOAT32_C( -222.94), SIMDE_FLOAT32_C( -215.99), SIMDE_FLOAT32_C( -214.29), SIMDE_FLOAT32_C( 432.66), SIMDE_FLOAT32_C( -222.94) } }, { { SIMDE_FLOAT32_C( -994.85), SIMDE_FLOAT32_C( -413.17), SIMDE_FLOAT32_C( -100.86), SIMDE_FLOAT32_C( 836.37) }, { SIMDE_FLOAT32_C( -994.85), SIMDE_FLOAT32_C( -413.17), SIMDE_FLOAT32_C( -100.86), SIMDE_FLOAT32_C( 836.37), SIMDE_FLOAT32_C( -994.85), SIMDE_FLOAT32_C( -413.17), SIMDE_FLOAT32_C( -100.86), SIMDE_FLOAT32_C( 836.37) } }, { { SIMDE_FLOAT32_C( 809.63), SIMDE_FLOAT32_C( -520.84), SIMDE_FLOAT32_C( 265.00), SIMDE_FLOAT32_C( -111.67) }, { SIMDE_FLOAT32_C( 809.63), SIMDE_FLOAT32_C( -520.84), SIMDE_FLOAT32_C( 265.00), SIMDE_FLOAT32_C( -111.67), SIMDE_FLOAT32_C( 809.63), SIMDE_FLOAT32_C( -520.84), SIMDE_FLOAT32_C( 265.00), SIMDE_FLOAT32_C( -111.67) } }, { { SIMDE_FLOAT32_C( -855.41), SIMDE_FLOAT32_C( -875.73), SIMDE_FLOAT32_C( -447.77), SIMDE_FLOAT32_C( 263.25) }, { SIMDE_FLOAT32_C( -855.41), SIMDE_FLOAT32_C( -875.73), SIMDE_FLOAT32_C( -447.77), SIMDE_FLOAT32_C( 263.25), SIMDE_FLOAT32_C( -855.41), SIMDE_FLOAT32_C( -875.73), SIMDE_FLOAT32_C( -447.77), SIMDE_FLOAT32_C( 263.25) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_broadcast_f32x4(a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_mask_broadcast_f32x4 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[8]; const simde__mmask8 k; const simde_float32 a[4]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 113.43), SIMDE_FLOAT32_C( 410.75), SIMDE_FLOAT32_C( -451.88), SIMDE_FLOAT32_C( -725.60), SIMDE_FLOAT32_C( 921.94), SIMDE_FLOAT32_C( -987.53), SIMDE_FLOAT32_C( 590.45), SIMDE_FLOAT32_C( -298.17) }, UINT8_C( 50), { SIMDE_FLOAT32_C( 44.31), SIMDE_FLOAT32_C( 797.52), SIMDE_FLOAT32_C( -107.60), SIMDE_FLOAT32_C( -484.17) }, { SIMDE_FLOAT32_C( 113.43), SIMDE_FLOAT32_C( 797.52), SIMDE_FLOAT32_C( -451.88), SIMDE_FLOAT32_C( -725.60), SIMDE_FLOAT32_C( 44.31), SIMDE_FLOAT32_C( 797.52), SIMDE_FLOAT32_C( 590.45), SIMDE_FLOAT32_C( -298.17) } }, { { SIMDE_FLOAT32_C( 556.86), SIMDE_FLOAT32_C( -797.02), SIMDE_FLOAT32_C( 402.24), SIMDE_FLOAT32_C( 441.25), SIMDE_FLOAT32_C( 142.97), SIMDE_FLOAT32_C( 883.64), SIMDE_FLOAT32_C( -635.48), SIMDE_FLOAT32_C( -488.89) }, UINT8_C(165), { SIMDE_FLOAT32_C( -333.03), SIMDE_FLOAT32_C( 703.87), SIMDE_FLOAT32_C( -69.82), SIMDE_FLOAT32_C( 527.07) }, { SIMDE_FLOAT32_C( -333.03), SIMDE_FLOAT32_C( -797.02), SIMDE_FLOAT32_C( -69.82), SIMDE_FLOAT32_C( 441.25), SIMDE_FLOAT32_C( 142.97), SIMDE_FLOAT32_C( 703.87), SIMDE_FLOAT32_C( -635.48), SIMDE_FLOAT32_C( 527.07) } }, { { SIMDE_FLOAT32_C( 425.48), SIMDE_FLOAT32_C( 960.83), SIMDE_FLOAT32_C( 698.87), SIMDE_FLOAT32_C( -175.48), SIMDE_FLOAT32_C( 789.83), SIMDE_FLOAT32_C( 633.19), SIMDE_FLOAT32_C( 85.22), SIMDE_FLOAT32_C( 351.45) }, UINT8_C(206), { SIMDE_FLOAT32_C( 362.09), SIMDE_FLOAT32_C( -387.94), SIMDE_FLOAT32_C( -58.09), SIMDE_FLOAT32_C( -381.37) }, { SIMDE_FLOAT32_C( 425.48), SIMDE_FLOAT32_C( -387.94), SIMDE_FLOAT32_C( -58.09), SIMDE_FLOAT32_C( -381.37), SIMDE_FLOAT32_C( 789.83), SIMDE_FLOAT32_C( 633.19), SIMDE_FLOAT32_C( -58.09), SIMDE_FLOAT32_C( -381.37) } }, { { SIMDE_FLOAT32_C( 385.81), SIMDE_FLOAT32_C( 368.14), SIMDE_FLOAT32_C( -607.80), SIMDE_FLOAT32_C( 623.02), SIMDE_FLOAT32_C( -955.44), SIMDE_FLOAT32_C( -138.05), SIMDE_FLOAT32_C( -245.78), SIMDE_FLOAT32_C( -750.22) }, UINT8_C(110), { SIMDE_FLOAT32_C( 548.54), SIMDE_FLOAT32_C( -618.32), SIMDE_FLOAT32_C( -113.43), SIMDE_FLOAT32_C( -437.94) }, { SIMDE_FLOAT32_C( 385.81), SIMDE_FLOAT32_C( -618.32), SIMDE_FLOAT32_C( -113.43), SIMDE_FLOAT32_C( -437.94), SIMDE_FLOAT32_C( -955.44), SIMDE_FLOAT32_C( -618.32), SIMDE_FLOAT32_C( -113.43), SIMDE_FLOAT32_C( -750.22) } }, { { SIMDE_FLOAT32_C( -510.40), SIMDE_FLOAT32_C( -247.29), SIMDE_FLOAT32_C( -272.50), SIMDE_FLOAT32_C( 154.15), SIMDE_FLOAT32_C( 745.34), SIMDE_FLOAT32_C( 865.17), SIMDE_FLOAT32_C( 893.80), SIMDE_FLOAT32_C( 79.97) }, UINT8_C(108), { SIMDE_FLOAT32_C( -178.61), SIMDE_FLOAT32_C( 31.69), SIMDE_FLOAT32_C( 669.52), SIMDE_FLOAT32_C( 693.51) }, { SIMDE_FLOAT32_C( -510.40), SIMDE_FLOAT32_C( -247.29), SIMDE_FLOAT32_C( 669.52), SIMDE_FLOAT32_C( 693.51), SIMDE_FLOAT32_C( 745.34), SIMDE_FLOAT32_C( 31.69), SIMDE_FLOAT32_C( 669.52), SIMDE_FLOAT32_C( 79.97) } }, { { SIMDE_FLOAT32_C( -127.96), SIMDE_FLOAT32_C( -619.72), SIMDE_FLOAT32_C( 284.07), SIMDE_FLOAT32_C( 372.86), SIMDE_FLOAT32_C( 649.51), SIMDE_FLOAT32_C( 278.96), SIMDE_FLOAT32_C( 407.00), SIMDE_FLOAT32_C( 484.63) }, UINT8_C( 35), { SIMDE_FLOAT32_C( -266.56), SIMDE_FLOAT32_C( -110.85), SIMDE_FLOAT32_C( -976.05), SIMDE_FLOAT32_C( -446.86) }, { SIMDE_FLOAT32_C( -266.56), SIMDE_FLOAT32_C( -110.85), SIMDE_FLOAT32_C( 284.07), SIMDE_FLOAT32_C( 372.86), SIMDE_FLOAT32_C( 649.51), SIMDE_FLOAT32_C( -110.85), SIMDE_FLOAT32_C( 407.00), SIMDE_FLOAT32_C( 484.63) } }, { { SIMDE_FLOAT32_C( -413.34), SIMDE_FLOAT32_C( 993.71), SIMDE_FLOAT32_C( -725.95), SIMDE_FLOAT32_C( 912.24), SIMDE_FLOAT32_C( 38.79), SIMDE_FLOAT32_C( -113.15), SIMDE_FLOAT32_C( 355.83), SIMDE_FLOAT32_C( 489.44) }, UINT8_C(174), { SIMDE_FLOAT32_C( 271.71), SIMDE_FLOAT32_C( 611.34), SIMDE_FLOAT32_C( 750.31), SIMDE_FLOAT32_C( 445.31) }, { SIMDE_FLOAT32_C( -413.34), SIMDE_FLOAT32_C( 611.34), SIMDE_FLOAT32_C( 750.31), SIMDE_FLOAT32_C( 445.31), SIMDE_FLOAT32_C( 38.79), SIMDE_FLOAT32_C( 611.34), SIMDE_FLOAT32_C( 355.83), SIMDE_FLOAT32_C( 445.31) } }, { { SIMDE_FLOAT32_C( 394.72), SIMDE_FLOAT32_C( -2.71), SIMDE_FLOAT32_C( 433.21), SIMDE_FLOAT32_C( 979.88), SIMDE_FLOAT32_C( 870.25), SIMDE_FLOAT32_C( 239.46), SIMDE_FLOAT32_C( 664.36), SIMDE_FLOAT32_C( -21.11) }, UINT8_C(236), { SIMDE_FLOAT32_C( 20.21), SIMDE_FLOAT32_C( -364.92), SIMDE_FLOAT32_C( 870.25), SIMDE_FLOAT32_C( 218.91) }, { SIMDE_FLOAT32_C( 394.72), SIMDE_FLOAT32_C( -2.71), SIMDE_FLOAT32_C( 870.25), SIMDE_FLOAT32_C( 218.91), SIMDE_FLOAT32_C( 870.25), SIMDE_FLOAT32_C( -364.92), SIMDE_FLOAT32_C( 870.25), SIMDE_FLOAT32_C( 218.91) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 src = simde_mm256_loadu_ps(test_vec[i].src); simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_mask_broadcast_f32x4(src, test_vec[i].k, a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_maskz_broadcast_f32x4 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const simde_float32 a[4]; const simde_float32 r[8]; } test_vec[] = { { UINT8_C(233), { SIMDE_FLOAT32_C( 749.31), SIMDE_FLOAT32_C( -425.85), SIMDE_FLOAT32_C( 752.50), SIMDE_FLOAT32_C( -794.87) }, { SIMDE_FLOAT32_C( 749.31), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -794.87), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -425.85), SIMDE_FLOAT32_C( 752.50), SIMDE_FLOAT32_C( -794.87) } }, { UINT8_C(237), { SIMDE_FLOAT32_C( 236.00), SIMDE_FLOAT32_C( 493.54), SIMDE_FLOAT32_C( -992.91), SIMDE_FLOAT32_C( 213.78) }, { SIMDE_FLOAT32_C( 236.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -992.91), SIMDE_FLOAT32_C( 213.78), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 493.54), SIMDE_FLOAT32_C( -992.91), SIMDE_FLOAT32_C( 213.78) } }, { UINT8_C(229), { SIMDE_FLOAT32_C( 572.59), SIMDE_FLOAT32_C( -505.20), SIMDE_FLOAT32_C( -888.69), SIMDE_FLOAT32_C( -168.99) }, { SIMDE_FLOAT32_C( 572.59), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -888.69), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -505.20), SIMDE_FLOAT32_C( -888.69), SIMDE_FLOAT32_C( -168.99) } }, { UINT8_C(115), { SIMDE_FLOAT32_C( 961.78), SIMDE_FLOAT32_C( 587.15), SIMDE_FLOAT32_C( 162.08), SIMDE_FLOAT32_C( 131.99) }, { SIMDE_FLOAT32_C( 961.78), SIMDE_FLOAT32_C( 587.15), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 961.78), SIMDE_FLOAT32_C( 587.15), SIMDE_FLOAT32_C( 162.08), SIMDE_FLOAT32_C( 0.00) } }, { UINT8_C(107), { SIMDE_FLOAT32_C( 722.82), SIMDE_FLOAT32_C( 519.77), SIMDE_FLOAT32_C( -160.36), SIMDE_FLOAT32_C( 908.34) }, { SIMDE_FLOAT32_C( 722.82), SIMDE_FLOAT32_C( 519.77), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 908.34), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 519.77), SIMDE_FLOAT32_C( -160.36), SIMDE_FLOAT32_C( 0.00) } }, { UINT8_C(148), { SIMDE_FLOAT32_C( 251.18), SIMDE_FLOAT32_C( -347.86), SIMDE_FLOAT32_C( -514.92), SIMDE_FLOAT32_C( -206.57) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -514.92), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 251.18), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -206.57) } }, { UINT8_C(156), { SIMDE_FLOAT32_C( 874.47), SIMDE_FLOAT32_C( -711.75), SIMDE_FLOAT32_C( -458.03), SIMDE_FLOAT32_C( -188.74) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -458.03), SIMDE_FLOAT32_C( -188.74), SIMDE_FLOAT32_C( 874.47), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -188.74) } }, { UINT8_C( 78), { SIMDE_FLOAT32_C( -804.36), SIMDE_FLOAT32_C( -844.65), SIMDE_FLOAT32_C( -82.05), SIMDE_FLOAT32_C( -986.67) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -844.65), SIMDE_FLOAT32_C( -82.05), SIMDE_FLOAT32_C( -986.67), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -82.05), SIMDE_FLOAT32_C( 0.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_maskz_broadcast_f32x4(test_vec[i].k, a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_broadcast_f64x2 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( -818.06), SIMDE_FLOAT64_C( 862.04) }, { SIMDE_FLOAT64_C( -818.06), SIMDE_FLOAT64_C( 862.04), SIMDE_FLOAT64_C( -818.06), SIMDE_FLOAT64_C( 862.04) } }, { { SIMDE_FLOAT64_C( 251.28), SIMDE_FLOAT64_C( -807.49) }, { SIMDE_FLOAT64_C( 251.28), SIMDE_FLOAT64_C( -807.49), SIMDE_FLOAT64_C( 251.28), SIMDE_FLOAT64_C( -807.49) } }, { { SIMDE_FLOAT64_C( 489.47), SIMDE_FLOAT64_C( 521.73) }, { SIMDE_FLOAT64_C( 489.47), SIMDE_FLOAT64_C( 521.73), SIMDE_FLOAT64_C( 489.47), SIMDE_FLOAT64_C( 521.73) } }, { { SIMDE_FLOAT64_C( 697.15), SIMDE_FLOAT64_C( -943.39) }, { SIMDE_FLOAT64_C( 697.15), SIMDE_FLOAT64_C( -943.39), SIMDE_FLOAT64_C( 697.15), SIMDE_FLOAT64_C( -943.39) } }, { { SIMDE_FLOAT64_C( 397.38), SIMDE_FLOAT64_C( 769.24) }, { SIMDE_FLOAT64_C( 397.38), SIMDE_FLOAT64_C( 769.24), SIMDE_FLOAT64_C( 397.38), SIMDE_FLOAT64_C( 769.24) } }, { { SIMDE_FLOAT64_C( 607.10), SIMDE_FLOAT64_C( -411.28) }, { SIMDE_FLOAT64_C( 607.10), SIMDE_FLOAT64_C( -411.28), SIMDE_FLOAT64_C( 607.10), SIMDE_FLOAT64_C( -411.28) } }, { { SIMDE_FLOAT64_C( -417.96), SIMDE_FLOAT64_C( -732.77) }, { SIMDE_FLOAT64_C( -417.96), SIMDE_FLOAT64_C( -732.77), SIMDE_FLOAT64_C( -417.96), SIMDE_FLOAT64_C( -732.77) } }, { { SIMDE_FLOAT64_C( 409.47), SIMDE_FLOAT64_C( -49.18) }, { SIMDE_FLOAT64_C( 409.47), SIMDE_FLOAT64_C( -49.18), SIMDE_FLOAT64_C( 409.47), SIMDE_FLOAT64_C( -49.18) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m256d r = simde_mm256_broadcast_f64x2(a); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_mask_broadcast_f64x2 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[4]; const simde__mmask8 k; const simde_float64 a[2]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( -338.03), SIMDE_FLOAT64_C( 731.04), SIMDE_FLOAT64_C( 652.28), SIMDE_FLOAT64_C( 868.31) }, UINT8_C(176), { SIMDE_FLOAT64_C( -240.09), SIMDE_FLOAT64_C( 738.97) }, { SIMDE_FLOAT64_C( -338.03), SIMDE_FLOAT64_C( 731.04), SIMDE_FLOAT64_C( 652.28), SIMDE_FLOAT64_C( 868.31) } }, { { SIMDE_FLOAT64_C( -161.99), SIMDE_FLOAT64_C( -539.33), SIMDE_FLOAT64_C( -491.52), SIMDE_FLOAT64_C( 960.24) }, UINT8_C( 23), { SIMDE_FLOAT64_C( 782.07), SIMDE_FLOAT64_C( 293.17) }, { SIMDE_FLOAT64_C( 782.07), SIMDE_FLOAT64_C( 293.17), SIMDE_FLOAT64_C( 782.07), SIMDE_FLOAT64_C( 960.24) } }, { { SIMDE_FLOAT64_C( -948.97), SIMDE_FLOAT64_C( 718.70), SIMDE_FLOAT64_C( -833.55), SIMDE_FLOAT64_C( 519.24) }, UINT8_C(166), { SIMDE_FLOAT64_C( 879.34), SIMDE_FLOAT64_C( -863.77) }, { SIMDE_FLOAT64_C( -948.97), SIMDE_FLOAT64_C( -863.77), SIMDE_FLOAT64_C( 879.34), SIMDE_FLOAT64_C( 519.24) } }, { { SIMDE_FLOAT64_C( 136.25), SIMDE_FLOAT64_C( -99.23), SIMDE_FLOAT64_C( 178.08), SIMDE_FLOAT64_C( -929.05) }, UINT8_C( 20), { SIMDE_FLOAT64_C( -614.75), SIMDE_FLOAT64_C( -70.42) }, { SIMDE_FLOAT64_C( 136.25), SIMDE_FLOAT64_C( -99.23), SIMDE_FLOAT64_C( -614.75), SIMDE_FLOAT64_C( -929.05) } }, { { SIMDE_FLOAT64_C( -617.52), SIMDE_FLOAT64_C( -721.29), SIMDE_FLOAT64_C( -762.54), SIMDE_FLOAT64_C( 70.31) }, UINT8_C( 5), { SIMDE_FLOAT64_C( -322.15), SIMDE_FLOAT64_C( -417.60) }, { SIMDE_FLOAT64_C( -322.15), SIMDE_FLOAT64_C( -721.29), SIMDE_FLOAT64_C( -322.15), SIMDE_FLOAT64_C( 70.31) } }, { { SIMDE_FLOAT64_C( -577.36), SIMDE_FLOAT64_C( 298.63), SIMDE_FLOAT64_C( -985.58), SIMDE_FLOAT64_C( -562.98) }, UINT8_C(167), { SIMDE_FLOAT64_C( -39.73), SIMDE_FLOAT64_C( 262.95) }, { SIMDE_FLOAT64_C( -39.73), SIMDE_FLOAT64_C( 262.95), SIMDE_FLOAT64_C( -39.73), SIMDE_FLOAT64_C( -562.98) } }, { { SIMDE_FLOAT64_C( 943.89), SIMDE_FLOAT64_C( -108.91), SIMDE_FLOAT64_C( -463.93), SIMDE_FLOAT64_C( 675.74) }, UINT8_C(200), { SIMDE_FLOAT64_C( -918.41), SIMDE_FLOAT64_C( 364.14) }, { SIMDE_FLOAT64_C( 943.89), SIMDE_FLOAT64_C( -108.91), SIMDE_FLOAT64_C( -463.93), SIMDE_FLOAT64_C( 364.14) } }, { { SIMDE_FLOAT64_C( -90.94), SIMDE_FLOAT64_C( -345.61), SIMDE_FLOAT64_C( -599.08), SIMDE_FLOAT64_C( -818.15) }, UINT8_C(109), { SIMDE_FLOAT64_C( -714.87), SIMDE_FLOAT64_C( -771.51) }, { SIMDE_FLOAT64_C( -714.87), SIMDE_FLOAT64_C( -345.61), SIMDE_FLOAT64_C( -714.87), SIMDE_FLOAT64_C( -771.51) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d src = simde_mm256_loadu_pd(test_vec[i].src); simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m256d r = simde_mm256_mask_broadcast_f64x2(src, test_vec[i].k, a); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_maskz_broadcast_f64x2 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const simde_float64 a[2]; const simde_float64 r[4]; } test_vec[] = { { UINT8_C(197), { SIMDE_FLOAT64_C( -215.62), SIMDE_FLOAT64_C( 35.19) }, { SIMDE_FLOAT64_C( -215.62), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -215.62), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C(198), { SIMDE_FLOAT64_C( 716.52), SIMDE_FLOAT64_C( 473.89) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 473.89), SIMDE_FLOAT64_C( 716.52), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C( 85), { SIMDE_FLOAT64_C( 312.77), SIMDE_FLOAT64_C( 715.13) }, { SIMDE_FLOAT64_C( 312.77), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 312.77), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C(212), { SIMDE_FLOAT64_C( 527.96), SIMDE_FLOAT64_C( -502.50) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 527.96), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C( 50), { SIMDE_FLOAT64_C( -571.65), SIMDE_FLOAT64_C( 248.58) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 248.58), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C(207), { SIMDE_FLOAT64_C( 234.22), SIMDE_FLOAT64_C( 607.13) }, { SIMDE_FLOAT64_C( 234.22), SIMDE_FLOAT64_C( 607.13), SIMDE_FLOAT64_C( 234.22), SIMDE_FLOAT64_C( 607.13) } }, { UINT8_C( 5), { SIMDE_FLOAT64_C( -229.19), SIMDE_FLOAT64_C( -58.91) }, { SIMDE_FLOAT64_C( -229.19), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -229.19), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C(104), { SIMDE_FLOAT64_C( -8.77), SIMDE_FLOAT64_C( 682.18) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 682.18) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m256d r = simde_mm256_maskz_broadcast_f64x2(test_vec[i].k, a); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_broadcast_f32x4(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m512 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 241.63), SIMDE_FLOAT32_C( 962.32), SIMDE_FLOAT32_C( -223.53), SIMDE_FLOAT32_C( -221.69)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 241.63), SIMDE_FLOAT32_C( 962.32), SIMDE_FLOAT32_C( -223.53), SIMDE_FLOAT32_C( -221.69), SIMDE_FLOAT32_C( 241.63), SIMDE_FLOAT32_C( 962.32), SIMDE_FLOAT32_C( -223.53), SIMDE_FLOAT32_C( -221.69), SIMDE_FLOAT32_C( 241.63), SIMDE_FLOAT32_C( 962.32), SIMDE_FLOAT32_C( -223.53), SIMDE_FLOAT32_C( -221.69), SIMDE_FLOAT32_C( 241.63), SIMDE_FLOAT32_C( 962.32), SIMDE_FLOAT32_C( -223.53), SIMDE_FLOAT32_C( -221.69)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 115.71), SIMDE_FLOAT32_C( -206.04), SIMDE_FLOAT32_C( -581.48), SIMDE_FLOAT32_C( 670.36)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 115.71), SIMDE_FLOAT32_C( -206.04), SIMDE_FLOAT32_C( -581.48), SIMDE_FLOAT32_C( 670.36), SIMDE_FLOAT32_C( 115.71), SIMDE_FLOAT32_C( -206.04), SIMDE_FLOAT32_C( -581.48), SIMDE_FLOAT32_C( 670.36), SIMDE_FLOAT32_C( 115.71), SIMDE_FLOAT32_C( -206.04), SIMDE_FLOAT32_C( -581.48), SIMDE_FLOAT32_C( 670.36), SIMDE_FLOAT32_C( 115.71), SIMDE_FLOAT32_C( -206.04), SIMDE_FLOAT32_C( -581.48), SIMDE_FLOAT32_C( 670.36)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 297.45), SIMDE_FLOAT32_C( 193.39), SIMDE_FLOAT32_C( -163.24), SIMDE_FLOAT32_C( -775.87)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 297.45), SIMDE_FLOAT32_C( 193.39), SIMDE_FLOAT32_C( -163.24), SIMDE_FLOAT32_C( -775.87), SIMDE_FLOAT32_C( 297.45), SIMDE_FLOAT32_C( 193.39), SIMDE_FLOAT32_C( -163.24), SIMDE_FLOAT32_C( -775.87), SIMDE_FLOAT32_C( 297.45), SIMDE_FLOAT32_C( 193.39), SIMDE_FLOAT32_C( -163.24), SIMDE_FLOAT32_C( -775.87), SIMDE_FLOAT32_C( 297.45), SIMDE_FLOAT32_C( 193.39), SIMDE_FLOAT32_C( -163.24), SIMDE_FLOAT32_C( -775.87)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -675.37), SIMDE_FLOAT32_C( 853.20), SIMDE_FLOAT32_C( -377.67), SIMDE_FLOAT32_C( 233.14)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -675.37), SIMDE_FLOAT32_C( 853.20), SIMDE_FLOAT32_C( -377.67), SIMDE_FLOAT32_C( 233.14), SIMDE_FLOAT32_C( -675.37), SIMDE_FLOAT32_C( 853.20), SIMDE_FLOAT32_C( -377.67), SIMDE_FLOAT32_C( 233.14), SIMDE_FLOAT32_C( -675.37), SIMDE_FLOAT32_C( 853.20), SIMDE_FLOAT32_C( -377.67), SIMDE_FLOAT32_C( 233.14), SIMDE_FLOAT32_C( -675.37), SIMDE_FLOAT32_C( 853.20), SIMDE_FLOAT32_C( -377.67), SIMDE_FLOAT32_C( 233.14)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -156.08), SIMDE_FLOAT32_C( -209.26), SIMDE_FLOAT32_C( 48.51), SIMDE_FLOAT32_C( -627.76)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -156.08), SIMDE_FLOAT32_C( -209.26), SIMDE_FLOAT32_C( 48.51), SIMDE_FLOAT32_C( -627.76), SIMDE_FLOAT32_C( -156.08), SIMDE_FLOAT32_C( -209.26), SIMDE_FLOAT32_C( 48.51), SIMDE_FLOAT32_C( -627.76), SIMDE_FLOAT32_C( -156.08), SIMDE_FLOAT32_C( -209.26), SIMDE_FLOAT32_C( 48.51), SIMDE_FLOAT32_C( -627.76), SIMDE_FLOAT32_C( -156.08), SIMDE_FLOAT32_C( -209.26), SIMDE_FLOAT32_C( 48.51), SIMDE_FLOAT32_C( -627.76)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 108.40), SIMDE_FLOAT32_C( 970.37), SIMDE_FLOAT32_C( 934.72), SIMDE_FLOAT32_C( -932.81)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 108.40), SIMDE_FLOAT32_C( 970.37), SIMDE_FLOAT32_C( 934.72), SIMDE_FLOAT32_C( -932.81), SIMDE_FLOAT32_C( 108.40), SIMDE_FLOAT32_C( 970.37), SIMDE_FLOAT32_C( 934.72), SIMDE_FLOAT32_C( -932.81), SIMDE_FLOAT32_C( 108.40), SIMDE_FLOAT32_C( 970.37), SIMDE_FLOAT32_C( 934.72), SIMDE_FLOAT32_C( -932.81), SIMDE_FLOAT32_C( 108.40), SIMDE_FLOAT32_C( 970.37), SIMDE_FLOAT32_C( 934.72), SIMDE_FLOAT32_C( -932.81)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 690.58), SIMDE_FLOAT32_C( 836.42), SIMDE_FLOAT32_C( -952.66), SIMDE_FLOAT32_C( 22.35)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 690.58), SIMDE_FLOAT32_C( 836.42), SIMDE_FLOAT32_C( -952.66), SIMDE_FLOAT32_C( 22.35), SIMDE_FLOAT32_C( 690.58), SIMDE_FLOAT32_C( 836.42), SIMDE_FLOAT32_C( -952.66), SIMDE_FLOAT32_C( 22.35), SIMDE_FLOAT32_C( 690.58), SIMDE_FLOAT32_C( 836.42), SIMDE_FLOAT32_C( -952.66), SIMDE_FLOAT32_C( 22.35), SIMDE_FLOAT32_C( 690.58), SIMDE_FLOAT32_C( 836.42), SIMDE_FLOAT32_C( -952.66), SIMDE_FLOAT32_C( 22.35)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 740.10), SIMDE_FLOAT32_C( 159.65), SIMDE_FLOAT32_C( -65.49), SIMDE_FLOAT32_C( 946.83)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 740.10), SIMDE_FLOAT32_C( 159.65), SIMDE_FLOAT32_C( -65.49), SIMDE_FLOAT32_C( 946.83), SIMDE_FLOAT32_C( 740.10), SIMDE_FLOAT32_C( 159.65), SIMDE_FLOAT32_C( -65.49), SIMDE_FLOAT32_C( 946.83), SIMDE_FLOAT32_C( 740.10), SIMDE_FLOAT32_C( 159.65), SIMDE_FLOAT32_C( -65.49), SIMDE_FLOAT32_C( 946.83), SIMDE_FLOAT32_C( 740.10), SIMDE_FLOAT32_C( 159.65), SIMDE_FLOAT32_C( -65.49), SIMDE_FLOAT32_C( 946.83)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_broadcast_f32x4(test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_broadcast_f32x4(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 src; simde__mmask16 k; simde__m128 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -476.82), SIMDE_FLOAT32_C( 687.27), SIMDE_FLOAT32_C( 239.12), SIMDE_FLOAT32_C( -622.96), SIMDE_FLOAT32_C( 479.82), SIMDE_FLOAT32_C( -652.18), SIMDE_FLOAT32_C( 585.66), SIMDE_FLOAT32_C( -840.39), SIMDE_FLOAT32_C( -680.47), SIMDE_FLOAT32_C( -211.69), SIMDE_FLOAT32_C( 879.50), SIMDE_FLOAT32_C( 245.88), SIMDE_FLOAT32_C( 689.68), SIMDE_FLOAT32_C( 107.64), SIMDE_FLOAT32_C( -872.56), SIMDE_FLOAT32_C( -586.10)), UINT16_C(63721), simde_mm_set_ps(SIMDE_FLOAT32_C( 337.98), SIMDE_FLOAT32_C( -931.30), SIMDE_FLOAT32_C( -93.71), SIMDE_FLOAT32_C( 492.43)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 337.98), SIMDE_FLOAT32_C( -931.30), SIMDE_FLOAT32_C( -93.71), SIMDE_FLOAT32_C( 492.43), SIMDE_FLOAT32_C( 337.98), SIMDE_FLOAT32_C( -652.18), SIMDE_FLOAT32_C( 585.66), SIMDE_FLOAT32_C( -840.39), SIMDE_FLOAT32_C( 337.98), SIMDE_FLOAT32_C( -931.30), SIMDE_FLOAT32_C( -93.71), SIMDE_FLOAT32_C( 245.88), SIMDE_FLOAT32_C( 337.98), SIMDE_FLOAT32_C( 107.64), SIMDE_FLOAT32_C( -872.56), SIMDE_FLOAT32_C( 492.43)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 792.34), SIMDE_FLOAT32_C( -828.98), SIMDE_FLOAT32_C( 152.82), SIMDE_FLOAT32_C( 261.49), SIMDE_FLOAT32_C( -674.96), SIMDE_FLOAT32_C( -626.70), SIMDE_FLOAT32_C( -365.50), SIMDE_FLOAT32_C( 522.39), SIMDE_FLOAT32_C( 659.15), SIMDE_FLOAT32_C( 204.13), SIMDE_FLOAT32_C( 487.20), SIMDE_FLOAT32_C( 790.92), SIMDE_FLOAT32_C( -372.23), SIMDE_FLOAT32_C( -362.18), SIMDE_FLOAT32_C( 725.62), SIMDE_FLOAT32_C( 817.00)), UINT16_C(44067), simde_mm_set_ps(SIMDE_FLOAT32_C( -858.39), SIMDE_FLOAT32_C( 608.18), SIMDE_FLOAT32_C( 129.78), SIMDE_FLOAT32_C( -779.98)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -858.39), SIMDE_FLOAT32_C( -828.98), SIMDE_FLOAT32_C( 129.78), SIMDE_FLOAT32_C( 261.49), SIMDE_FLOAT32_C( -858.39), SIMDE_FLOAT32_C( 608.18), SIMDE_FLOAT32_C( -365.50), SIMDE_FLOAT32_C( 522.39), SIMDE_FLOAT32_C( 659.15), SIMDE_FLOAT32_C( 204.13), SIMDE_FLOAT32_C( 129.78), SIMDE_FLOAT32_C( 790.92), SIMDE_FLOAT32_C( -372.23), SIMDE_FLOAT32_C( -362.18), SIMDE_FLOAT32_C( 129.78), SIMDE_FLOAT32_C( -779.98)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 433.05), SIMDE_FLOAT32_C( 405.44), SIMDE_FLOAT32_C( 652.04), SIMDE_FLOAT32_C( -453.75), SIMDE_FLOAT32_C( 56.24), SIMDE_FLOAT32_C( 506.86), SIMDE_FLOAT32_C( -127.57), SIMDE_FLOAT32_C( -230.83), SIMDE_FLOAT32_C( -815.89), SIMDE_FLOAT32_C( 351.22), SIMDE_FLOAT32_C( -739.81), SIMDE_FLOAT32_C( -104.33), SIMDE_FLOAT32_C( 331.38), SIMDE_FLOAT32_C( 749.42), SIMDE_FLOAT32_C( 151.95), SIMDE_FLOAT32_C( -25.90)), UINT16_C(12331), simde_mm_set_ps(SIMDE_FLOAT32_C( -159.95), SIMDE_FLOAT32_C( -519.57), SIMDE_FLOAT32_C( -66.62), SIMDE_FLOAT32_C( -690.93)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 433.05), SIMDE_FLOAT32_C( 405.44), SIMDE_FLOAT32_C( -66.62), SIMDE_FLOAT32_C( -690.93), SIMDE_FLOAT32_C( 56.24), SIMDE_FLOAT32_C( 506.86), SIMDE_FLOAT32_C( -127.57), SIMDE_FLOAT32_C( -230.83), SIMDE_FLOAT32_C( -815.89), SIMDE_FLOAT32_C( 351.22), SIMDE_FLOAT32_C( -66.62), SIMDE_FLOAT32_C( -104.33), SIMDE_FLOAT32_C( -159.95), SIMDE_FLOAT32_C( 749.42), SIMDE_FLOAT32_C( -66.62), SIMDE_FLOAT32_C( -690.93)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 417.50), SIMDE_FLOAT32_C( 245.21), SIMDE_FLOAT32_C( 960.01), SIMDE_FLOAT32_C( -303.61), SIMDE_FLOAT32_C( -550.57), SIMDE_FLOAT32_C( 665.98), SIMDE_FLOAT32_C( -521.00), SIMDE_FLOAT32_C( 239.39), SIMDE_FLOAT32_C( 798.32), SIMDE_FLOAT32_C( 251.37), SIMDE_FLOAT32_C( -596.78), SIMDE_FLOAT32_C( 840.69), SIMDE_FLOAT32_C( -684.92), SIMDE_FLOAT32_C( 87.08), SIMDE_FLOAT32_C( 734.84), SIMDE_FLOAT32_C( -854.89)), UINT16_C(52021), simde_mm_set_ps(SIMDE_FLOAT32_C( -116.62), SIMDE_FLOAT32_C( -17.97), SIMDE_FLOAT32_C( 229.99), SIMDE_FLOAT32_C( -771.72)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -116.62), SIMDE_FLOAT32_C( -17.97), SIMDE_FLOAT32_C( 960.01), SIMDE_FLOAT32_C( -303.61), SIMDE_FLOAT32_C( -116.62), SIMDE_FLOAT32_C( 665.98), SIMDE_FLOAT32_C( 229.99), SIMDE_FLOAT32_C( -771.72), SIMDE_FLOAT32_C( 798.32), SIMDE_FLOAT32_C( 251.37), SIMDE_FLOAT32_C( 229.99), SIMDE_FLOAT32_C( -771.72), SIMDE_FLOAT32_C( -684.92), SIMDE_FLOAT32_C( -17.97), SIMDE_FLOAT32_C( 734.84), SIMDE_FLOAT32_C( -771.72)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -616.45), SIMDE_FLOAT32_C( 914.70), SIMDE_FLOAT32_C( -963.67), SIMDE_FLOAT32_C( -935.61), SIMDE_FLOAT32_C( 106.52), SIMDE_FLOAT32_C( 367.48), SIMDE_FLOAT32_C( -10.30), SIMDE_FLOAT32_C( 543.55), SIMDE_FLOAT32_C( 142.17), SIMDE_FLOAT32_C( -844.51), SIMDE_FLOAT32_C( -959.58), SIMDE_FLOAT32_C( 913.58), SIMDE_FLOAT32_C( -227.61), SIMDE_FLOAT32_C( -979.09), SIMDE_FLOAT32_C( -746.95), SIMDE_FLOAT32_C( 363.67)), UINT16_C(46395), simde_mm_set_ps(SIMDE_FLOAT32_C( -319.95), SIMDE_FLOAT32_C( -241.48), SIMDE_FLOAT32_C( -416.05), SIMDE_FLOAT32_C( -700.83)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -319.95), SIMDE_FLOAT32_C( 914.70), SIMDE_FLOAT32_C( -416.05), SIMDE_FLOAT32_C( -700.83), SIMDE_FLOAT32_C( 106.52), SIMDE_FLOAT32_C( -241.48), SIMDE_FLOAT32_C( -10.30), SIMDE_FLOAT32_C( -700.83), SIMDE_FLOAT32_C( 142.17), SIMDE_FLOAT32_C( -844.51), SIMDE_FLOAT32_C( -416.05), SIMDE_FLOAT32_C( -700.83), SIMDE_FLOAT32_C( -319.95), SIMDE_FLOAT32_C( -979.09), SIMDE_FLOAT32_C( -416.05), SIMDE_FLOAT32_C( -700.83)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 810.23), SIMDE_FLOAT32_C( -571.66), SIMDE_FLOAT32_C( -313.94), SIMDE_FLOAT32_C( 812.08), SIMDE_FLOAT32_C( 905.89), SIMDE_FLOAT32_C( 95.84), SIMDE_FLOAT32_C( -942.64), SIMDE_FLOAT32_C( 490.95), SIMDE_FLOAT32_C( 432.01), SIMDE_FLOAT32_C( -989.57), SIMDE_FLOAT32_C( -908.07), SIMDE_FLOAT32_C( 843.06), SIMDE_FLOAT32_C( -567.12), SIMDE_FLOAT32_C( 561.55), SIMDE_FLOAT32_C( -316.58), SIMDE_FLOAT32_C( -224.94)), UINT16_C(28510), simde_mm_set_ps(SIMDE_FLOAT32_C( 608.47), SIMDE_FLOAT32_C( 502.71), SIMDE_FLOAT32_C( 524.73), SIMDE_FLOAT32_C( -206.66)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 810.23), SIMDE_FLOAT32_C( 502.71), SIMDE_FLOAT32_C( 524.73), SIMDE_FLOAT32_C( 812.08), SIMDE_FLOAT32_C( 608.47), SIMDE_FLOAT32_C( 502.71), SIMDE_FLOAT32_C( 524.73), SIMDE_FLOAT32_C( -206.66), SIMDE_FLOAT32_C( 432.01), SIMDE_FLOAT32_C( 502.71), SIMDE_FLOAT32_C( -908.07), SIMDE_FLOAT32_C( -206.66), SIMDE_FLOAT32_C( 608.47), SIMDE_FLOAT32_C( 502.71), SIMDE_FLOAT32_C( 524.73), SIMDE_FLOAT32_C( -224.94)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -770.27), SIMDE_FLOAT32_C( -598.61), SIMDE_FLOAT32_C( 672.88), SIMDE_FLOAT32_C( -504.06), SIMDE_FLOAT32_C( 481.78), SIMDE_FLOAT32_C( -154.88), SIMDE_FLOAT32_C( -363.51), SIMDE_FLOAT32_C( -643.93), SIMDE_FLOAT32_C( -973.84), SIMDE_FLOAT32_C( -599.20), SIMDE_FLOAT32_C( 230.44), SIMDE_FLOAT32_C( -713.35), SIMDE_FLOAT32_C( -554.88), SIMDE_FLOAT32_C( -858.98), SIMDE_FLOAT32_C( -21.09), SIMDE_FLOAT32_C( -441.11)), UINT16_C( 6749), simde_mm_set_ps(SIMDE_FLOAT32_C( 687.60), SIMDE_FLOAT32_C( 681.66), SIMDE_FLOAT32_C( -362.35), SIMDE_FLOAT32_C( -482.20)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -770.27), SIMDE_FLOAT32_C( -598.61), SIMDE_FLOAT32_C( 672.88), SIMDE_FLOAT32_C( -482.20), SIMDE_FLOAT32_C( 687.60), SIMDE_FLOAT32_C( -154.88), SIMDE_FLOAT32_C( -362.35), SIMDE_FLOAT32_C( -643.93), SIMDE_FLOAT32_C( -973.84), SIMDE_FLOAT32_C( 681.66), SIMDE_FLOAT32_C( 230.44), SIMDE_FLOAT32_C( -482.20), SIMDE_FLOAT32_C( 687.60), SIMDE_FLOAT32_C( 681.66), SIMDE_FLOAT32_C( -21.09), SIMDE_FLOAT32_C( -482.20)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -454.36), SIMDE_FLOAT32_C( -172.69), SIMDE_FLOAT32_C( 256.23), SIMDE_FLOAT32_C( 682.27), SIMDE_FLOAT32_C( -43.91), SIMDE_FLOAT32_C( -300.48), SIMDE_FLOAT32_C( 916.93), SIMDE_FLOAT32_C( -592.77), SIMDE_FLOAT32_C( 939.83), SIMDE_FLOAT32_C( -553.88), SIMDE_FLOAT32_C( -796.09), SIMDE_FLOAT32_C( -515.91), SIMDE_FLOAT32_C( 623.85), SIMDE_FLOAT32_C( 359.37), SIMDE_FLOAT32_C( -557.79), SIMDE_FLOAT32_C( 595.65)), UINT16_C( 8287), simde_mm_set_ps(SIMDE_FLOAT32_C( -705.53), SIMDE_FLOAT32_C( 238.42), SIMDE_FLOAT32_C( 504.37), SIMDE_FLOAT32_C( 296.48)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -454.36), SIMDE_FLOAT32_C( -172.69), SIMDE_FLOAT32_C( 504.37), SIMDE_FLOAT32_C( 682.27), SIMDE_FLOAT32_C( -43.91), SIMDE_FLOAT32_C( -300.48), SIMDE_FLOAT32_C( 916.93), SIMDE_FLOAT32_C( -592.77), SIMDE_FLOAT32_C( 939.83), SIMDE_FLOAT32_C( 238.42), SIMDE_FLOAT32_C( -796.09), SIMDE_FLOAT32_C( 296.48), SIMDE_FLOAT32_C( -705.53), SIMDE_FLOAT32_C( 238.42), SIMDE_FLOAT32_C( 504.37), SIMDE_FLOAT32_C( 296.48)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mask_broadcast_f32x4(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_maskz_broadcast_f32x4(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m128 a; simde__m512 r; } test_vec[8] = { { UINT16_C(12860), simde_mm_set_ps(SIMDE_FLOAT32_C( -93.71), SIMDE_FLOAT32_C( 137.99), SIMDE_FLOAT32_C( 492.43), SIMDE_FLOAT32_C( 420.83)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 492.43), SIMDE_FLOAT32_C( 420.83), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 492.43), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 492.43), SIMDE_FLOAT32_C( 420.83), SIMDE_FLOAT32_C( -93.71), SIMDE_FLOAT32_C( 137.99), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C(63770), simde_mm_set_ps(SIMDE_FLOAT32_C( -652.18), SIMDE_FLOAT32_C( -872.56), SIMDE_FLOAT32_C( 585.66), SIMDE_FLOAT32_C( -586.10)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -652.18), SIMDE_FLOAT32_C( -872.56), SIMDE_FLOAT32_C( 585.66), SIMDE_FLOAT32_C( -586.10), SIMDE_FLOAT32_C( -652.18), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -586.10), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -586.10), SIMDE_FLOAT32_C( -652.18), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 585.66), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C(26030), simde_mm_set_ps(SIMDE_FLOAT32_C( 700.39), SIMDE_FLOAT32_C( 129.78), SIMDE_FLOAT32_C( 708.98), SIMDE_FLOAT32_C( -779.98)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 129.78), SIMDE_FLOAT32_C( 708.98), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 129.78), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -779.98), SIMDE_FLOAT32_C( 700.39), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 708.98), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 700.39), SIMDE_FLOAT32_C( 129.78), SIMDE_FLOAT32_C( 708.98), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C(41122), simde_mm_set_ps(SIMDE_FLOAT32_C( -362.18), SIMDE_FLOAT32_C( -626.70), SIMDE_FLOAT32_C( 725.62), SIMDE_FLOAT32_C( -365.50)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -362.18), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 725.62), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -362.18), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 725.62), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 725.62), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C(49851), simde_mm_set_ps(SIMDE_FLOAT32_C( -519.57), SIMDE_FLOAT32_C( -632.83), SIMDE_FLOAT32_C( -66.62), SIMDE_FLOAT32_C( -181.94)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -519.57), SIMDE_FLOAT32_C( -632.83), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -66.62), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -519.57), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -66.62), SIMDE_FLOAT32_C( -181.94), SIMDE_FLOAT32_C( -519.57), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -66.62), SIMDE_FLOAT32_C( -181.94)) }, { UINT16_C(41826), simde_mm_set_ps(SIMDE_FLOAT32_C( 56.24), SIMDE_FLOAT32_C( 749.42), SIMDE_FLOAT32_C( 506.86), SIMDE_FLOAT32_C( 151.95)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 56.24), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 506.86), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 506.86), SIMDE_FLOAT32_C( 151.95), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 749.42), SIMDE_FLOAT32_C( 506.86), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 506.86), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C(19285), simde_mm_set_ps(SIMDE_FLOAT32_C( 57.37), SIMDE_FLOAT32_C( -17.97), SIMDE_FLOAT32_C( 347.13), SIMDE_FLOAT32_C( 229.99)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -17.97), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 57.37), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 347.13), SIMDE_FLOAT32_C( 229.99), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -17.97), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 229.99), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -17.97), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 229.99)) }, { UINT16_C(48133), simde_mm_set_ps(SIMDE_FLOAT32_C( -684.92), SIMDE_FLOAT32_C( -550.57), SIMDE_FLOAT32_C( 87.08), SIMDE_FLOAT32_C( 665.98)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -684.92), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 87.08), SIMDE_FLOAT32_C( 665.98), SIMDE_FLOAT32_C( -684.92), SIMDE_FLOAT32_C( -550.57), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -550.57), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 665.98)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_maskz_broadcast_f32x4(test_vec[i].k, test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_broadcast_f64x4(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m512d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 241.63), SIMDE_FLOAT64_C( 962.32), SIMDE_FLOAT64_C( -223.53), SIMDE_FLOAT64_C( -221.69)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 241.63), SIMDE_FLOAT64_C( 962.32), SIMDE_FLOAT64_C( -223.53), SIMDE_FLOAT64_C( -221.69), SIMDE_FLOAT64_C( 241.63), SIMDE_FLOAT64_C( 962.32), SIMDE_FLOAT64_C( -223.53), SIMDE_FLOAT64_C( -221.69)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 115.71), SIMDE_FLOAT64_C( -206.04), SIMDE_FLOAT64_C( -581.48), SIMDE_FLOAT64_C( 670.36)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 115.71), SIMDE_FLOAT64_C( -206.04), SIMDE_FLOAT64_C( -581.48), SIMDE_FLOAT64_C( 670.36), SIMDE_FLOAT64_C( 115.71), SIMDE_FLOAT64_C( -206.04), SIMDE_FLOAT64_C( -581.48), SIMDE_FLOAT64_C( 670.36)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 297.45), SIMDE_FLOAT64_C( 193.39), SIMDE_FLOAT64_C( -163.24), SIMDE_FLOAT64_C( -775.87)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 297.45), SIMDE_FLOAT64_C( 193.39), SIMDE_FLOAT64_C( -163.24), SIMDE_FLOAT64_C( -775.87), SIMDE_FLOAT64_C( 297.45), SIMDE_FLOAT64_C( 193.39), SIMDE_FLOAT64_C( -163.24), SIMDE_FLOAT64_C( -775.87)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -675.37), SIMDE_FLOAT64_C( 853.20), SIMDE_FLOAT64_C( -377.67), SIMDE_FLOAT64_C( 233.14)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -675.37), SIMDE_FLOAT64_C( 853.20), SIMDE_FLOAT64_C( -377.67), SIMDE_FLOAT64_C( 233.14), SIMDE_FLOAT64_C( -675.37), SIMDE_FLOAT64_C( 853.20), SIMDE_FLOAT64_C( -377.67), SIMDE_FLOAT64_C( 233.14)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -156.08), SIMDE_FLOAT64_C( -209.26), SIMDE_FLOAT64_C( 48.51), SIMDE_FLOAT64_C( -627.76)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -156.08), SIMDE_FLOAT64_C( -209.26), SIMDE_FLOAT64_C( 48.51), SIMDE_FLOAT64_C( -627.76), SIMDE_FLOAT64_C( -156.08), SIMDE_FLOAT64_C( -209.26), SIMDE_FLOAT64_C( 48.51), SIMDE_FLOAT64_C( -627.76)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 108.40), SIMDE_FLOAT64_C( 970.37), SIMDE_FLOAT64_C( 934.72), SIMDE_FLOAT64_C( -932.81)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 108.40), SIMDE_FLOAT64_C( 970.37), SIMDE_FLOAT64_C( 934.72), SIMDE_FLOAT64_C( -932.81), SIMDE_FLOAT64_C( 108.40), SIMDE_FLOAT64_C( 970.37), SIMDE_FLOAT64_C( 934.72), SIMDE_FLOAT64_C( -932.81)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 690.58), SIMDE_FLOAT64_C( 836.42), SIMDE_FLOAT64_C( -952.66), SIMDE_FLOAT64_C( 22.35)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 690.58), SIMDE_FLOAT64_C( 836.42), SIMDE_FLOAT64_C( -952.66), SIMDE_FLOAT64_C( 22.35), SIMDE_FLOAT64_C( 690.58), SIMDE_FLOAT64_C( 836.42), SIMDE_FLOAT64_C( -952.66), SIMDE_FLOAT64_C( 22.35)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 740.10), SIMDE_FLOAT64_C( 159.65), SIMDE_FLOAT64_C( -65.49), SIMDE_FLOAT64_C( 946.83)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 740.10), SIMDE_FLOAT64_C( 159.65), SIMDE_FLOAT64_C( -65.49), SIMDE_FLOAT64_C( 946.83), SIMDE_FLOAT64_C( 740.10), SIMDE_FLOAT64_C( 159.65), SIMDE_FLOAT64_C( -65.49), SIMDE_FLOAT64_C( 946.83)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_broadcast_f64x4(test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_broadcast_f64x4(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d src; simde__mmask8 k; simde__m256d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( -396.88), SIMDE_FLOAT64_C( 354.04), SIMDE_FLOAT64_C( 268.06), SIMDE_FLOAT64_C( -972.10), SIMDE_FLOAT64_C( -213.85), SIMDE_FLOAT64_C( -574.68), SIMDE_FLOAT64_C( 137.99), SIMDE_FLOAT64_C( 420.83)), UINT8_C( 60), simde_mm256_set_pd(SIMDE_FLOAT64_C( 337.98), SIMDE_FLOAT64_C( -931.30), SIMDE_FLOAT64_C( -93.71), SIMDE_FLOAT64_C( 492.43)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -396.88), SIMDE_FLOAT64_C( 354.04), SIMDE_FLOAT64_C( -93.71), SIMDE_FLOAT64_C( 492.43), SIMDE_FLOAT64_C( 337.98), SIMDE_FLOAT64_C( -931.30), SIMDE_FLOAT64_C( 137.99), SIMDE_FLOAT64_C( 420.83)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -680.47), SIMDE_FLOAT64_C( -211.69), SIMDE_FLOAT64_C( 879.50), SIMDE_FLOAT64_C( 245.88), SIMDE_FLOAT64_C( 689.68), SIMDE_FLOAT64_C( 107.64), SIMDE_FLOAT64_C( -872.56), SIMDE_FLOAT64_C( -586.10)), UINT8_C( 26), simde_mm256_set_pd(SIMDE_FLOAT64_C( -622.96), SIMDE_FLOAT64_C( 479.82), SIMDE_FLOAT64_C( -652.18), SIMDE_FLOAT64_C( 585.66)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -680.47), SIMDE_FLOAT64_C( -211.69), SIMDE_FLOAT64_C( 879.50), SIMDE_FLOAT64_C( 585.66), SIMDE_FLOAT64_C( -622.96), SIMDE_FLOAT64_C( 107.64), SIMDE_FLOAT64_C( -652.18), SIMDE_FLOAT64_C( -586.10)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 408.02), SIMDE_FLOAT64_C( 662.99), SIMDE_FLOAT64_C( -491.44), SIMDE_FLOAT64_C( -586.97), SIMDE_FLOAT64_C( -858.39), SIMDE_FLOAT64_C( 608.18), SIMDE_FLOAT64_C( 129.78), SIMDE_FLOAT64_C( -779.98)), UINT8_C(174), simde_mm256_set_pd(SIMDE_FLOAT64_C( 469.58), SIMDE_FLOAT64_C( -229.18), SIMDE_FLOAT64_C( 700.39), SIMDE_FLOAT64_C( 708.98)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 469.58), SIMDE_FLOAT64_C( 662.99), SIMDE_FLOAT64_C( 700.39), SIMDE_FLOAT64_C( -586.97), SIMDE_FLOAT64_C( 469.58), SIMDE_FLOAT64_C( -229.18), SIMDE_FLOAT64_C( 700.39), SIMDE_FLOAT64_C( -779.98)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -491.65), SIMDE_FLOAT64_C( 792.34), SIMDE_FLOAT64_C( -828.98), SIMDE_FLOAT64_C( 152.82), SIMDE_FLOAT64_C( 261.49), SIMDE_FLOAT64_C( -674.96), SIMDE_FLOAT64_C( -626.70), SIMDE_FLOAT64_C( -365.50)), UINT8_C(162), simde_mm256_set_pd(SIMDE_FLOAT64_C( 790.92), SIMDE_FLOAT64_C( -372.23), SIMDE_FLOAT64_C( -362.18), SIMDE_FLOAT64_C( 725.62)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 790.92), SIMDE_FLOAT64_C( 792.34), SIMDE_FLOAT64_C( -362.18), SIMDE_FLOAT64_C( 152.82), SIMDE_FLOAT64_C( 261.49), SIMDE_FLOAT64_C( -674.96), SIMDE_FLOAT64_C( -362.18), SIMDE_FLOAT64_C( -365.50)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -230.83), SIMDE_FLOAT64_C( -480.11), SIMDE_FLOAT64_C( 511.94), SIMDE_FLOAT64_C( 614.74), SIMDE_FLOAT64_C( 794.95), SIMDE_FLOAT64_C( -331.37), SIMDE_FLOAT64_C( -632.83), SIMDE_FLOAT64_C( -181.94)), UINT8_C(187), simde_mm256_set_pd(SIMDE_FLOAT64_C( 125.71), SIMDE_FLOAT64_C( -159.95), SIMDE_FLOAT64_C( -519.57), SIMDE_FLOAT64_C( -66.62)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 125.71), SIMDE_FLOAT64_C( -480.11), SIMDE_FLOAT64_C( -519.57), SIMDE_FLOAT64_C( -66.62), SIMDE_FLOAT64_C( 125.71), SIMDE_FLOAT64_C( -331.37), SIMDE_FLOAT64_C( -519.57), SIMDE_FLOAT64_C( -66.62)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 870.57), SIMDE_FLOAT64_C( -815.89), SIMDE_FLOAT64_C( 351.22), SIMDE_FLOAT64_C( -739.81), SIMDE_FLOAT64_C( -104.33), SIMDE_FLOAT64_C( 331.38), SIMDE_FLOAT64_C( 749.42), SIMDE_FLOAT64_C( 151.95)), UINT8_C( 98), simde_mm256_set_pd(SIMDE_FLOAT64_C( 652.04), SIMDE_FLOAT64_C( -453.75), SIMDE_FLOAT64_C( 56.24), SIMDE_FLOAT64_C( 506.86)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 870.57), SIMDE_FLOAT64_C( -453.75), SIMDE_FLOAT64_C( 56.24), SIMDE_FLOAT64_C( -739.81), SIMDE_FLOAT64_C( -104.33), SIMDE_FLOAT64_C( 331.38), SIMDE_FLOAT64_C( 56.24), SIMDE_FLOAT64_C( 151.95)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -854.89), SIMDE_FLOAT64_C( 376.16), SIMDE_FLOAT64_C( -846.26), SIMDE_FLOAT64_C( 817.65), SIMDE_FLOAT64_C( -403.95), SIMDE_FLOAT64_C( -116.62), SIMDE_FLOAT64_C( -17.97), SIMDE_FLOAT64_C( 229.99)), UINT8_C( 85), simde_mm256_set_pd(SIMDE_FLOAT64_C( -65.83), SIMDE_FLOAT64_C( -494.87), SIMDE_FLOAT64_C( 57.37), SIMDE_FLOAT64_C( 347.13)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -854.89), SIMDE_FLOAT64_C( -494.87), SIMDE_FLOAT64_C( -846.26), SIMDE_FLOAT64_C( 347.13), SIMDE_FLOAT64_C( -403.95), SIMDE_FLOAT64_C( -494.87), SIMDE_FLOAT64_C( -17.97), SIMDE_FLOAT64_C( 347.13)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -700.83), SIMDE_FLOAT64_C( -289.50), SIMDE_FLOAT64_C( 417.50), SIMDE_FLOAT64_C( 245.21), SIMDE_FLOAT64_C( 960.01), SIMDE_FLOAT64_C( -303.61), SIMDE_FLOAT64_C( -550.57), SIMDE_FLOAT64_C( 665.98)), UINT8_C( 5), simde_mm256_set_pd(SIMDE_FLOAT64_C( -596.78), SIMDE_FLOAT64_C( 840.69), SIMDE_FLOAT64_C( -684.92), SIMDE_FLOAT64_C( 87.08)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -700.83), SIMDE_FLOAT64_C( -289.50), SIMDE_FLOAT64_C( 417.50), SIMDE_FLOAT64_C( 245.21), SIMDE_FLOAT64_C( 960.01), SIMDE_FLOAT64_C( 840.69), SIMDE_FLOAT64_C( -550.57), SIMDE_FLOAT64_C( 87.08)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mask_broadcast_f64x4(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_maskz_broadcast_f64x4(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m256d a; simde__m512d r; } test_vec[8] = { { UINT8_C( 25), simde_mm256_set_pd(SIMDE_FLOAT64_C( -93.71), SIMDE_FLOAT64_C( 137.99), SIMDE_FLOAT64_C( 492.43), SIMDE_FLOAT64_C( 420.83)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 420.83), SIMDE_FLOAT64_C( -93.71), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 420.83)) }, { UINT8_C(223), simde_mm256_set_pd(SIMDE_FLOAT64_C( 354.04), SIMDE_FLOAT64_C( -261.67), SIMDE_FLOAT64_C( 268.06), SIMDE_FLOAT64_C( 648.56)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 354.04), SIMDE_FLOAT64_C( -261.67), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 648.56), SIMDE_FLOAT64_C( 354.04), SIMDE_FLOAT64_C( -261.67), SIMDE_FLOAT64_C( 268.06), SIMDE_FLOAT64_C( 648.56)) }, { UINT8_C(191), simde_mm256_set_pd(SIMDE_FLOAT64_C( 107.64), SIMDE_FLOAT64_C( -652.18), SIMDE_FLOAT64_C( -872.56), SIMDE_FLOAT64_C( 585.66)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 107.64), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -872.56), SIMDE_FLOAT64_C( 585.66), SIMDE_FLOAT64_C( 107.64), SIMDE_FLOAT64_C( -652.18), SIMDE_FLOAT64_C( -872.56), SIMDE_FLOAT64_C( 585.66)) }, { UINT8_C( 77), simde_mm256_set_pd(SIMDE_FLOAT64_C( -476.82), SIMDE_FLOAT64_C( -211.69), SIMDE_FLOAT64_C( 687.27), SIMDE_FLOAT64_C( 879.50)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -211.69), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -476.82), SIMDE_FLOAT64_C( -211.69), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 879.50)) }, { UINT8_C(216), simde_mm256_set_pd(SIMDE_FLOAT64_C( -229.18), SIMDE_FLOAT64_C( 608.18), SIMDE_FLOAT64_C( 700.39), SIMDE_FLOAT64_C( 129.78)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -229.18), SIMDE_FLOAT64_C( 608.18), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 129.78), SIMDE_FLOAT64_C( -229.18), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(196), simde_mm256_set_pd(SIMDE_FLOAT64_C( 408.02), SIMDE_FLOAT64_C( -213.85), SIMDE_FLOAT64_C( 662.99), SIMDE_FLOAT64_C( 346.52)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 408.02), SIMDE_FLOAT64_C( -213.85), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -213.85), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(125), simde_mm256_set_pd(SIMDE_FLOAT64_C( 261.49), SIMDE_FLOAT64_C( -372.23), SIMDE_FLOAT64_C( -674.96), SIMDE_FLOAT64_C( -362.18)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -372.23), SIMDE_FLOAT64_C( -674.96), SIMDE_FLOAT64_C( -362.18), SIMDE_FLOAT64_C( 261.49), SIMDE_FLOAT64_C( -372.23), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -362.18)) }, { UINT8_C( 95), simde_mm256_set_pd(SIMDE_FLOAT64_C( 475.10), SIMDE_FLOAT64_C( -491.65), SIMDE_FLOAT64_C( 659.15), SIMDE_FLOAT64_C( 792.34)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -491.65), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 792.34), SIMDE_FLOAT64_C( 475.10), SIMDE_FLOAT64_C( -491.65), SIMDE_FLOAT64_C( 659.15), SIMDE_FLOAT64_C( 792.34)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_maskz_broadcast_f64x4(test_vec[i].k, test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_broadcast_i32x4(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m512i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 1322912216), INT32_C( -192131569), INT32_C( 457247766), INT32_C( 1585478853)), simde_mm512_set_epi32(INT32_C( 1322912216), INT32_C( -192131569), INT32_C( 457247766), INT32_C( 1585478853), INT32_C( 1322912216), INT32_C( -192131569), INT32_C( 457247766), INT32_C( 1585478853), INT32_C( 1322912216), INT32_C( -192131569), INT32_C( 457247766), INT32_C( 1585478853), INT32_C( 1322912216), INT32_C( -192131569), INT32_C( 457247766), INT32_C( 1585478853)) }, { simde_mm_set_epi32(INT32_C( 455358584), INT32_C( -549958328), INT32_C( 1779282555), INT32_C(-1938144165)), simde_mm512_set_epi32(INT32_C( 455358584), INT32_C( -549958328), INT32_C( 1779282555), INT32_C(-1938144165), INT32_C( 455358584), INT32_C( -549958328), INT32_C( 1779282555), INT32_C(-1938144165), INT32_C( 455358584), INT32_C( -549958328), INT32_C( 1779282555), INT32_C(-1938144165), INT32_C( 455358584), INT32_C( -549958328), INT32_C( 1779282555), INT32_C(-1938144165)) }, { simde_mm_set_epi32(INT32_C( 35244693), INT32_C( -163894097), INT32_C( -32854349), INT32_C(-1300832792)), simde_mm512_set_epi32(INT32_C( 35244693), INT32_C( -163894097), INT32_C( -32854349), INT32_C(-1300832792), INT32_C( 35244693), INT32_C( -163894097), INT32_C( -32854349), INT32_C(-1300832792), INT32_C( 35244693), INT32_C( -163894097), INT32_C( -32854349), INT32_C(-1300832792), INT32_C( 35244693), INT32_C( -163894097), INT32_C( -32854349), INT32_C(-1300832792)) }, { simde_mm_set_epi32(INT32_C( 1137728540), INT32_C( 1602744474), INT32_C( -610393021), INT32_C(-1810116300)), simde_mm512_set_epi32(INT32_C( 1137728540), INT32_C( 1602744474), INT32_C( -610393021), INT32_C(-1810116300), INT32_C( 1137728540), INT32_C( 1602744474), INT32_C( -610393021), INT32_C(-1810116300), INT32_C( 1137728540), INT32_C( 1602744474), INT32_C( -610393021), INT32_C(-1810116300), INT32_C( 1137728540), INT32_C( 1602744474), INT32_C( -610393021), INT32_C(-1810116300)) }, { simde_mm_set_epi32(INT32_C(-1023450780), INT32_C( 840494259), INT32_C(-1087383364), INT32_C(-1604779562)), simde_mm512_set_epi32(INT32_C(-1023450780), INT32_C( 840494259), INT32_C(-1087383364), INT32_C(-1604779562), INT32_C(-1023450780), INT32_C( 840494259), INT32_C(-1087383364), INT32_C(-1604779562), INT32_C(-1023450780), INT32_C( 840494259), INT32_C(-1087383364), INT32_C(-1604779562), INT32_C(-1023450780), INT32_C( 840494259), INT32_C(-1087383364), INT32_C(-1604779562)) }, { simde_mm_set_epi32(INT32_C( 1284866833), INT32_C( 27132707), INT32_C(-1597877982), INT32_C(-1252321438)), simde_mm512_set_epi32(INT32_C( 1284866833), INT32_C( 27132707), INT32_C(-1597877982), INT32_C(-1252321438), INT32_C( 1284866833), INT32_C( 27132707), INT32_C(-1597877982), INT32_C(-1252321438), INT32_C( 1284866833), INT32_C( 27132707), INT32_C(-1597877982), INT32_C(-1252321438), INT32_C( 1284866833), INT32_C( 27132707), INT32_C(-1597877982), INT32_C(-1252321438)) }, { simde_mm_set_epi32(INT32_C( -165954025), INT32_C( 878840386), INT32_C( -802596544), INT32_C( 1574139347)), simde_mm512_set_epi32(INT32_C( -165954025), INT32_C( 878840386), INT32_C( -802596544), INT32_C( 1574139347), INT32_C( -165954025), INT32_C( 878840386), INT32_C( -802596544), INT32_C( 1574139347), INT32_C( -165954025), INT32_C( 878840386), INT32_C( -802596544), INT32_C( 1574139347), INT32_C( -165954025), INT32_C( 878840386), INT32_C( -802596544), INT32_C( 1574139347)) }, { simde_mm_set_epi32(INT32_C( -602275056), INT32_C(-1823359312), INT32_C( 1232365699), INT32_C( 345237769)), simde_mm512_set_epi32(INT32_C( -602275056), INT32_C(-1823359312), INT32_C( 1232365699), INT32_C( 345237769), INT32_C( -602275056), INT32_C(-1823359312), INT32_C( 1232365699), INT32_C( 345237769), INT32_C( -602275056), INT32_C(-1823359312), INT32_C( 1232365699), INT32_C( 345237769), INT32_C( -602275056), INT32_C(-1823359312), INT32_C( 1232365699), INT32_C( 345237769)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_broadcast_i32x4(test_vec[i].a); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_broadcast_i32x4(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask16 k; simde__m128i a; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C( 1479802474), INT32_C( 587294539), INT32_C( -174751528), INT32_C( 1465222154), INT32_C( 1625882140), INT32_C(-1283973275), INT32_C( 567394727), INT32_C( 1808136008), INT32_C( 324921956), INT32_C(-1888780980), INT32_C( -262803011), INT32_C( 2131227345), INT32_C( -161180317), INT32_C( -569391310), INT32_C( 471197581), INT32_C( 2029664703)), UINT16_C(12686), simde_mm_set_epi32(INT32_C(-1929654026), INT32_C(-1217014324), INT32_C( 230292224), INT32_C( 1361651453)), simde_mm512_set_epi32(INT32_C( 1479802474), INT32_C( 587294539), INT32_C( 230292224), INT32_C( 1361651453), INT32_C( 1625882140), INT32_C(-1283973275), INT32_C( 567394727), INT32_C( 1361651453), INT32_C(-1929654026), INT32_C(-1888780980), INT32_C( -262803011), INT32_C( 2131227345), INT32_C(-1929654026), INT32_C(-1217014324), INT32_C( 230292224), INT32_C( 2029664703)) }, { simde_mm512_set_epi32(INT32_C( 1958214116), INT32_C( 2124258263), INT32_C(-1603442041), INT32_C(-1137458903), INT32_C( -291704812), INT32_C( -523349105), INT32_C( -769676631), INT32_C( 359038153), INT32_C( -860324016), INT32_C( 142975746), INT32_C( 1871956670), INT32_C(-2122929741), INT32_C( 1007202856), INT32_C(-1693638626), INT32_C(-1497430440), INT32_C( 766142674)), UINT16_C( 3460), simde_mm_set_epi32(INT32_C(-1801778632), INT32_C( 793094568), INT32_C( 739597071), INT32_C( 1855829690)), simde_mm512_set_epi32(INT32_C( 1958214116), INT32_C( 2124258263), INT32_C(-1603442041), INT32_C(-1137458903), INT32_C(-1801778632), INT32_C( 793094568), INT32_C( -769676631), INT32_C( 1855829690), INT32_C(-1801778632), INT32_C( 142975746), INT32_C( 1871956670), INT32_C(-2122929741), INT32_C( 1007202856), INT32_C( 793094568), INT32_C(-1497430440), INT32_C( 766142674)) }, { simde_mm512_set_epi32(INT32_C( -491998875), INT32_C( -465346847), INT32_C( 1096008422), INT32_C( -151618100), INT32_C( -483382033), INT32_C(-1500806456), INT32_C( 175505846), INT32_C( -698441328), INT32_C( -515513970), INT32_C( 1679973349), INT32_C(-1523347194), INT32_C( 91392241), INT32_C( -561919749), INT32_C( -634254878), INT32_C( -625316172), INT32_C( -17019235)), UINT16_C(25030), simde_mm_set_epi32(INT32_C( -839244820), INT32_C(-1678825378), INT32_C( 464598558), INT32_C(-1198702193)), simde_mm512_set_epi32(INT32_C( -491998875), INT32_C(-1678825378), INT32_C( 464598558), INT32_C( -151618100), INT32_C( -483382033), INT32_C(-1500806456), INT32_C( 175505846), INT32_C(-1198702193), INT32_C( -839244820), INT32_C(-1678825378), INT32_C(-1523347194), INT32_C( 91392241), INT32_C( -561919749), INT32_C(-1678825378), INT32_C( 464598558), INT32_C( -17019235)) }, { simde_mm512_set_epi32(INT32_C( 1319681857), INT32_C( 649867282), INT32_C(-1955467744), INT32_C(-1687114005), INT32_C(-1950655074), INT32_C(-2040429697), INT32_C( 1764915437), INT32_C( 813475409), INT32_C(-1622276195), INT32_C( 614665853), INT32_C( -661145222), INT32_C( -43416876), INT32_C( 954392932), INT32_C(-1003825870), INT32_C( -858676034), INT32_C( 1589986539)), UINT16_C(29308), simde_mm_set_epi32(INT32_C(-1945617369), INT32_C( -313192838), INT32_C( -614227976), INT32_C( -73637500)), simde_mm512_set_epi32(INT32_C( 1319681857), INT32_C( -313192838), INT32_C( -614227976), INT32_C( -73637500), INT32_C(-1950655074), INT32_C(-2040429697), INT32_C( -614227976), INT32_C( 813475409), INT32_C(-1622276195), INT32_C( -313192838), INT32_C( -614227976), INT32_C( -73637500), INT32_C(-1945617369), INT32_C( -313192838), INT32_C( -858676034), INT32_C( 1589986539)) }, { simde_mm512_set_epi32(INT32_C( 482652005), INT32_C( 1083073699), INT32_C( -547163888), INT32_C(-1439583577), INT32_C( -836573741), INT32_C(-2032318592), INT32_C( 1307381638), INT32_C( 2027662416), INT32_C( 2001285861), INT32_C( 1074543972), INT32_C(-2107097596), INT32_C(-2025611729), INT32_C( 962055101), INT32_C( 1886777199), INT32_C( 1689643613), INT32_C(-1874481648)), UINT16_C(45428), simde_mm_set_epi32(INT32_C( 110278011), INT32_C(-1940227644), INT32_C(-1803195700), INT32_C( 1287862649)), simde_mm512_set_epi32(INT32_C( 110278011), INT32_C( 1083073699), INT32_C(-1803195700), INT32_C( 1287862649), INT32_C( -836573741), INT32_C(-2032318592), INT32_C( 1307381638), INT32_C( 1287862649), INT32_C( 2001285861), INT32_C(-1940227644), INT32_C(-1803195700), INT32_C( 1287862649), INT32_C( 962055101), INT32_C(-1940227644), INT32_C( 1689643613), INT32_C(-1874481648)) }, { simde_mm512_set_epi32(INT32_C( 485695865), INT32_C( 1704586743), INT32_C(-1227241134), INT32_C( 279727823), INT32_C( -480355834), INT32_C( 1374909005), INT32_C(-1706379633), INT32_C( 1300025155), INT32_C( 1901096153), INT32_C(-1845297076), INT32_C( 188971064), INT32_C( 1903842318), INT32_C(-1221674473), INT32_C(-1332164211), INT32_C( 23564349), INT32_C(-2098316192)), UINT16_C(21964), simde_mm_set_epi32(INT32_C(-1820692848), INT32_C( -830585945), INT32_C( 1667959054), INT32_C(-1758734041)), simde_mm512_set_epi32(INT32_C( 485695865), INT32_C( -830585945), INT32_C(-1227241134), INT32_C(-1758734041), INT32_C( -480355834), INT32_C( -830585945), INT32_C(-1706379633), INT32_C(-1758734041), INT32_C(-1820692848), INT32_C( -830585945), INT32_C( 188971064), INT32_C( 1903842318), INT32_C(-1820692848), INT32_C( -830585945), INT32_C( 23564349), INT32_C(-2098316192)) }, { simde_mm512_set_epi32(INT32_C(-1876069406), INT32_C( 1820341222), INT32_C( 987166931), INT32_C(-1021572249), INT32_C(-1046533173), INT32_C(-1808511518), INT32_C( -283777637), INT32_C( -168486656), INT32_C( 1250903497), INT32_C( 1175614584), INT32_C( 204391673), INT32_C( -667659280), INT32_C( 2035348040), INT32_C( -596829354), INT32_C(-1607289004), INT32_C( -670488239)), UINT16_C(31159), simde_mm_set_epi32(INT32_C(-1492076939), INT32_C( 1502879171), INT32_C( 1497885207), INT32_C(-1325620059)), simde_mm512_set_epi32(INT32_C(-1876069406), INT32_C( 1502879171), INT32_C( 1497885207), INT32_C(-1325620059), INT32_C(-1492076939), INT32_C(-1808511518), INT32_C( -283777637), INT32_C(-1325620059), INT32_C(-1492076939), INT32_C( 1175614584), INT32_C( 1497885207), INT32_C(-1325620059), INT32_C( 2035348040), INT32_C( 1502879171), INT32_C( 1497885207), INT32_C(-1325620059)) }, { simde_mm512_set_epi32(INT32_C(-1346174896), INT32_C( 1223712250), INT32_C( 2029339086), INT32_C( 2108949315), INT32_C(-1822742445), INT32_C( -343433299), INT32_C(-1626119528), INT32_C( 1735301543), INT32_C( 766111295), INT32_C( -80424103), INT32_C( 1232059506), INT32_C(-1681875170), INT32_C( 1819208351), INT32_C( -734074357), INT32_C( 61937468), INT32_C(-1403575087)), UINT16_C(37926), simde_mm_set_epi32(INT32_C( 1656599178), INT32_C( 1293315993), INT32_C( -728433677), INT32_C( -125533424)), simde_mm512_set_epi32(INT32_C( 1656599178), INT32_C( 1223712250), INT32_C( 2029339086), INT32_C( -125533424), INT32_C(-1822742445), INT32_C( 1293315993), INT32_C(-1626119528), INT32_C( 1735301543), INT32_C( 766111295), INT32_C( -80424103), INT32_C( -728433677), INT32_C(-1681875170), INT32_C( 1819208351), INT32_C( 1293315993), INT32_C( -728433677), INT32_C(-1403575087)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_broadcast_i32x4(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_broadcast_i32x4(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m128i a; simde__m512i r; } test_vec[8] = { { UINT16_C(57503), simde_mm_set_epi32(INT32_C( 913371223), INT32_C( 1946242675), INT32_C(-1851162974), INT32_C(-1090004303)), simde_mm512_set_epi32(INT32_C( 913371223), INT32_C( 1946242675), INT32_C(-1851162974), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 913371223), INT32_C( 0), INT32_C( 0), INT32_C(-1090004303), INT32_C( 913371223), INT32_C( 1946242675), INT32_C(-1851162974), INT32_C(-1090004303)) }, { UINT16_C( 9830), simde_mm_set_epi32(INT32_C( -754702866), INT32_C( 59910169), INT32_C(-1421684089), INT32_C( 1688249563)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C(-1421684089), INT32_C( 0), INT32_C( 0), INT32_C( 59910169), INT32_C(-1421684089), INT32_C( 0), INT32_C( 0), INT32_C( 59910169), INT32_C(-1421684089), INT32_C( 0), INT32_C( 0), INT32_C( 59910169), INT32_C(-1421684089), INT32_C( 0)) }, { UINT16_C(54973), simde_mm_set_epi32(INT32_C( 1295192258), INT32_C( 2064350366), INT32_C(-1387191485), INT32_C( 1585557386)), simde_mm512_set_epi32(INT32_C( 1295192258), INT32_C( 2064350366), INT32_C( 0), INT32_C( 1585557386), INT32_C( 0), INT32_C( 2064350366), INT32_C(-1387191485), INT32_C( 0), INT32_C( 1295192258), INT32_C( 0), INT32_C(-1387191485), INT32_C( 1585557386), INT32_C( 1295192258), INT32_C( 2064350366), INT32_C( 0), INT32_C( 1585557386)) }, { UINT16_C( 2571), simde_mm_set_epi32(INT32_C( 273665101), INT32_C( -889778981), INT32_C( 888851167), INT32_C( 342766140)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 273665101), INT32_C( 0), INT32_C( 888851167), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 273665101), INT32_C( 0), INT32_C( 888851167), INT32_C( 342766140)) }, { UINT16_C(34156), simde_mm_set_epi32(INT32_C( 809684493), INT32_C( -666403540), INT32_C(-1117073828), INT32_C(-1916337185)), simde_mm512_set_epi32(INT32_C( 809684493), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -666403540), INT32_C( 0), INT32_C(-1916337185), INT32_C( 0), INT32_C( -666403540), INT32_C(-1117073828), INT32_C( 0), INT32_C( 809684493), INT32_C( -666403540), INT32_C( 0), INT32_C( 0)) }, { UINT16_C( 6544), simde_mm_set_epi32(INT32_C( 1692879261), INT32_C( -671588299), INT32_C( -258764942), INT32_C(-1633977409)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C(-1633977409), INT32_C( 1692879261), INT32_C( 0), INT32_C( 0), INT32_C(-1633977409), INT32_C( 1692879261), INT32_C( 0), INT32_C( 0), INT32_C(-1633977409), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { UINT16_C(45909), simde_mm_set_epi32(INT32_C( 472486650), INT32_C( 1238366490), INT32_C(-1084360471), INT32_C( 686181072)), simde_mm512_set_epi32(INT32_C( 472486650), INT32_C( 0), INT32_C(-1084360471), INT32_C( 686181072), INT32_C( 0), INT32_C( 0), INT32_C(-1084360471), INT32_C( 686181072), INT32_C( 0), INT32_C( 1238366490), INT32_C( 0), INT32_C( 686181072), INT32_C( 0), INT32_C( 1238366490), INT32_C( 0), INT32_C( 686181072)) }, { UINT16_C(56653), simde_mm_set_epi32(INT32_C( 1655322598), INT32_C( -841418169), INT32_C( -643403227), INT32_C(-1868778842)), simde_mm512_set_epi32(INT32_C( 1655322598), INT32_C( -841418169), INT32_C( 0), INT32_C(-1868778842), INT32_C( 1655322598), INT32_C( -841418169), INT32_C( 0), INT32_C(-1868778842), INT32_C( 0), INT32_C( -841418169), INT32_C( 0), INT32_C( 0), INT32_C( 1655322598), INT32_C( -841418169), INT32_C( 0), INT32_C(-1868778842)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_broadcast_i32x4(test_vec[i].k, test_vec[i].a); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_broadcast_i64x4(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m512i r; } test_vec[8] = { { simde_mm256_set_epi64x(INT64_C( 2067253863170152603), INT64_C( 7322969156688688496), INT64_C(-3040413397780943697), INT64_C( -347515311309491350)), simde_mm512_set_epi64(INT64_C( 2067253863170152603), INT64_C( 7322969156688688496), INT64_C(-3040413397780943697), INT64_C( -347515311309491350), INT64_C( 2067253863170152603), INT64_C( 7322969156688688496), INT64_C(-3040413397780943697), INT64_C( -347515311309491350)) }, { simde_mm256_set_epi64x(INT64_C(-8775907405261856642), INT64_C( 2994184764454707691), INT64_C( 5740004668815682638), INT64_C(-6479861669953478300)), simde_mm512_set_epi64(INT64_C(-8775907405261856642), INT64_C( 2994184764454707691), INT64_C( 5740004668815682638), INT64_C(-6479861669953478300), INT64_C(-8775907405261856642), INT64_C( 2994184764454707691), INT64_C( 5740004668815682638), INT64_C(-6479861669953478300)) }, { simde_mm256_set_epi64x(INT64_C(-1508734178901937051), INT64_C(-9017252864562564261), INT64_C( -273279204292504060), INT64_C( 619750219118375084)), simde_mm512_set_epi64(INT64_C(-1508734178901937051), INT64_C(-9017252864562564261), INT64_C( -273279204292504060), INT64_C( 619750219118375084), INT64_C(-1508734178901937051), INT64_C(-9017252864562564261), INT64_C( -273279204292504060), INT64_C( 619750219118375084)) }, { simde_mm256_set_epi64x(INT64_C( 5726987144774798582), INT64_C(-5242976599564634972), INT64_C(-2397121704692329659), INT64_C( 8619348224440898856)), simde_mm512_set_epi64(INT64_C( 5726987144774798582), INT64_C(-5242976599564634972), INT64_C(-2397121704692329659), INT64_C( 8619348224440898856), INT64_C( 5726987144774798582), INT64_C(-5242976599564634972), INT64_C(-2397121704692329659), INT64_C( 8619348224440898856)) }, { simde_mm256_set_epi64x(INT64_C( 3770039990400590046), INT64_C(-4228023324121815234), INT64_C(-2554402032947045809), INT64_C(-5734730006803594733)), simde_mm512_set_epi64(INT64_C( 3770039990400590046), INT64_C(-4228023324121815234), INT64_C(-2554402032947045809), INT64_C(-5734730006803594733), INT64_C( 3770039990400590046), INT64_C(-4228023324121815234), INT64_C(-2554402032947045809), INT64_C(-5734730006803594733)) }, { simde_mm256_set_epi64x(INT64_C(-7969300362390541280), INT64_C( 5131273406597805369), INT64_C( 3164578103377175393), INT64_C( -896289702737256643)), simde_mm512_set_epi64(INT64_C(-7969300362390541280), INT64_C( 5131273406597805369), INT64_C( 3164578103377175393), INT64_C( -896289702737256643), INT64_C(-7969300362390541280), INT64_C( 5131273406597805369), INT64_C( 3164578103377175393), INT64_C( -896289702737256643)) }, { simde_mm256_set_epi64x(INT64_C( 6358202424481672256), INT64_C(-2088789378195753898), INT64_C(-3832720361616382569), INT64_C(-1395499602347228816)), simde_mm512_set_epi64(INT64_C( 6358202424481672256), INT64_C(-2088789378195753898), INT64_C(-3832720361616382569), INT64_C(-1395499602347228816), INT64_C( 6358202424481672256), INT64_C(-2088789378195753898), INT64_C(-3832720361616382569), INT64_C(-1395499602347228816)) }, { simde_mm256_set_epi64x(INT64_C(-7005415045902450329), INT64_C( 454800303112400674), INT64_C( 120562593220559221), INT64_C(-9183341893829321065)), simde_mm512_set_epi64(INT64_C(-7005415045902450329), INT64_C( 454800303112400674), INT64_C( 120562593220559221), INT64_C(-9183341893829321065), INT64_C(-7005415045902450329), INT64_C( 454800303112400674), INT64_C( 120562593220559221), INT64_C(-9183341893829321065)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_broadcast_i64x4(test_vec[i].a); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_broadcast_i64x4(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask8 k; simde__m256i a; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C(-6314317108894035774), INT64_C( 8866317312363406147), INT64_C( 6809917121524389565), INT64_C(-3241424127607560167), INT64_C(-6106086665810303781), INT64_C( 633642393017577559), INT64_C( 8359048641648361122), INT64_C(-4681532830833057633)), UINT8_C( 60), simde_mm256_set_epi64x(INT64_C( 3477568421141904684), INT64_C(-4797795556098898977), INT64_C( 3208117008747973709), INT64_C(-3821571623174354209)), simde_mm512_set_epi64(INT64_C(-6314317108894035774), INT64_C( 8866317312363406147), INT64_C( 3208117008747973709), INT64_C(-3821571623174354209), INT64_C( 3477568421141904684), INT64_C(-4797795556098898977), INT64_C( 8359048641648361122), INT64_C(-4681532830833057633)) }, { simde_mm512_set_epi64(INT64_C( 1306125493676423142), INT64_C(-3613863514463636955), INT64_C(-8026344006176744115), INT64_C( 2029314710784964890), INT64_C(-4657292759333975344), INT64_C( 4825522705097247133), INT64_C(-2884449776545067150), INT64_C(-7017879531382302320)), UINT8_C( 0), simde_mm256_set_epi64x(INT64_C(-4405193415265233332), INT64_C( 7250935849068321562), INT64_C(-6027293339582699304), INT64_C(-3733599027822978693)), simde_mm512_set_epi64(INT64_C( 1306125493676423142), INT64_C(-3613863514463636955), INT64_C(-8026344006176744115), INT64_C( 2029314710784964890), INT64_C(-4657292759333975344), INT64_C( 4825522705097247133), INT64_C(-2884449776545067150), INT64_C(-7017879531382302320)) }, { simde_mm512_set_epi64(INT64_C( 4688717956956220153), INT64_C(-1915316091557446787), INT64_C( 1577347929723399506), INT64_C(-7813885322626023749), INT64_C(-6811547529988353683), INT64_C( 2997984888778655645), INT64_C( 3443124806434765346), INT64_C( 5852240145563215278)), UINT8_C(246), simde_mm256_set_epi64x(INT64_C(-1891210360757244537), INT64_C( 6167039147883013727), INT64_C( 3386552444698298512), INT64_C( 7545310155849572514)), simde_mm512_set_epi64(INT64_C(-1891210360757244537), INT64_C( 6167039147883013727), INT64_C( 3386552444698298512), INT64_C( 7545310155849572514), INT64_C(-6811547529988353683), INT64_C( 6167039147883013727), INT64_C( 3386552444698298512), INT64_C( 5852240145563215278)) }, { simde_mm512_set_epi64(INT64_C( 5038277295705077786), INT64_C(-8704670477732479640), INT64_C(-4548397220420700343), INT64_C( 8046739269734052975), INT64_C( 7094379553694909752), INT64_C( 4795143479989329521), INT64_C(-4501545483124413586), INT64_C(-3553418787378740418)), UINT8_C( 91), simde_mm256_set_epi64x(INT64_C(-1193819960890806229), INT64_C( 1698145641448748604), INT64_C(-5983907472113043464), INT64_C( 2399871967268573321)), simde_mm512_set_epi64(INT64_C( 5038277295705077786), INT64_C( 1698145641448748604), INT64_C(-4548397220420700343), INT64_C( 2399871967268573321), INT64_C(-1193819960890806229), INT64_C( 4795143479989329521), INT64_C(-5983907472113043464), INT64_C( 2399871967268573321)) }, { simde_mm512_set_epi64(INT64_C(-7015430497800685262), INT64_C( 6395476272833483099), INT64_C(-7658177893206805688), INT64_C( 8616202346974378134), INT64_C( 4658965153462790469), INT64_C(-8694270525310808014), INT64_C(-6021620893121233714), INT64_C(-2734912706905093379)), UINT8_C(200), simde_mm256_set_epi64x(INT64_C(-1469383970610000896), INT64_C( 2906056864364420569), INT64_C(-8420208282727167471), INT64_C(-2445653243165948933)), simde_mm512_set_epi64(INT64_C(-1469383970610000896), INT64_C( 2906056864364420569), INT64_C(-7658177893206805688), INT64_C( 8616202346974378134), INT64_C(-1469383970610000896), INT64_C(-8694270525310808014), INT64_C(-6021620893121233714), INT64_C(-2734912706905093379)) }, { simde_mm512_set_epi64(INT64_C( -14573144697473529), INT64_C( 8194534140513027918), INT64_C( 2864848388614962181), INT64_C(-8899252041456864412), INT64_C( 6379752944219310901), INT64_C(-1860193003353627344), INT64_C(-6904865090556452860), INT64_C( 3719036040063860682)), UINT8_C(242), simde_mm256_set_epi64x(INT64_C(-5869124324801971655), INT64_C( 4548184433513821860), INT64_C( -866976878921007676), INT64_C( 2203520398864570966)), simde_mm512_set_epi64(INT64_C(-5869124324801971655), INT64_C( 4548184433513821860), INT64_C( -866976878921007676), INT64_C( 2203520398864570966), INT64_C( 6379752944219310901), INT64_C(-1860193003353627344), INT64_C( -866976878921007676), INT64_C( 3719036040063860682)) }, { simde_mm512_set_epi64(INT64_C( 6933317985964373307), INT64_C(-7912084547370987750), INT64_C( 1434122569595023374), INT64_C( 372849821895528123), INT64_C( -797096709674116855), INT64_C( 7124042714150240897), INT64_C( 192820077199458500), INT64_C( 2333974304098521090)), UINT8_C(243), simde_mm256_set_epi64x(INT64_C( 9216760499566437432), INT64_C(-3611239802138142732), INT64_C(-4586686018735308980), INT64_C(-4383556822793463465)), simde_mm512_set_epi64(INT64_C( 9216760499566437432), INT64_C(-3611239802138142732), INT64_C(-4586686018735308980), INT64_C(-4383556822793463465), INT64_C( -797096709674116855), INT64_C( 7124042714150240897), INT64_C(-4586686018735308980), INT64_C(-4383556822793463465)) }, { simde_mm512_set_epi64(INT64_C(-1447537183271280169), INT64_C( 3992622506060288146), INT64_C(-4043997837551953925), INT64_C( 6303477149728220498), INT64_C( 7148655265583700891), INT64_C(-2780283900793463061), INT64_C( 3296623181868458839), INT64_C( 3808941703531633947)), UINT8_C(191), simde_mm256_set_epi64x(INT64_C( 4775871390633368548), INT64_C( 1184569154591270183), INT64_C(-1750343127516454914), INT64_C( 3950749388527391085)), simde_mm512_set_epi64(INT64_C( 4775871390633368548), INT64_C( 3992622506060288146), INT64_C(-1750343127516454914), INT64_C( 3950749388527391085), INT64_C( 4775871390633368548), INT64_C( 1184569154591270183), INT64_C(-1750343127516454914), INT64_C( 3950749388527391085)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_broadcast_i64x4(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_broadcast_i64x4(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m256i a; simde__m512i r; } test_vec[8] = { { UINT8_C( 81), simde_mm256_set_epi64x(INT64_C(-3226888659503117201), INT64_C( 7490209482650655404), INT64_C(-9179276487306987344), INT64_C( 7055682156038845095)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 7490209482650655404), INT64_C( 0), INT64_C( 7055682156038845095), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 7055682156038845095)) }, { UINT8_C(230), simde_mm256_set_epi64x(INT64_C( 6952848743567724070), INT64_C( 6398498157984007660), INT64_C(-7276216502972313781), INT64_C( 4842545408380684085)), simde_mm512_set_epi64(INT64_C( 6952848743567724070), INT64_C( 6398498157984007660), INT64_C(-7276216502972313781), INT64_C( 0), INT64_C( 0), INT64_C( 6398498157984007660), INT64_C(-7276216502972313781), INT64_C( 0)) }, { UINT8_C(115), simde_mm256_set_epi64x(INT64_C( -147426939517817059), INT64_C(-3374766540151601501), INT64_C( 9013437962204473886), INT64_C( 2290211861166994880)), simde_mm512_set_epi64(INT64_C( 0), INT64_C(-3374766540151601501), INT64_C( 9013437962204473886), INT64_C( 2290211861166994880), INT64_C( 0), INT64_C( 0), INT64_C( 9013437962204473886), INT64_C( 2290211861166994880)) }, { UINT8_C(102), simde_mm256_set_epi64x(INT64_C(-8700458333795307779), INT64_C(-9147297996573979024), INT64_C(-3649385965919135635), INT64_C( 1818037113458506686)), simde_mm512_set_epi64(INT64_C( 0), INT64_C(-9147297996573979024), INT64_C(-3649385965919135635), INT64_C( 0), INT64_C( 0), INT64_C(-9147297996573979024), INT64_C(-3649385965919135635), INT64_C( 0)) }, { UINT8_C( 59), simde_mm256_set_epi64x(INT64_C( 8763762661767364639), INT64_C(-7194784414741958081), INT64_C(-1605849263772874289), INT64_C(-2187551180549076287)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C(-1605849263772874289), INT64_C(-2187551180549076287), INT64_C( 8763762661767364639), INT64_C( 0), INT64_C(-1605849263772874289), INT64_C(-2187551180549076287)) }, { UINT8_C(119), simde_mm256_set_epi64x(INT64_C( 3282428208913039389), INT64_C(-2887297167729747289), INT64_C( 6938672003976555894), INT64_C(-3765766577293323049)), simde_mm512_set_epi64(INT64_C( 0), INT64_C(-2887297167729747289), INT64_C( 6938672003976555894), INT64_C(-3765766577293323049), INT64_C( 0), INT64_C(-2887297167729747289), INT64_C( 6938672003976555894), INT64_C(-3765766577293323049)) }, { UINT8_C( 25), simde_mm256_set_epi64x(INT64_C(-4802008903577488206), INT64_C(-3983516919532966210), INT64_C(-4702094198572773446), INT64_C( -958715043139892800)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( -958715043139892800), INT64_C(-4802008903577488206), INT64_C( 0), INT64_C( 0), INT64_C( -958715043139892800)) }, { UINT8_C(207), simde_mm256_set_epi64x(INT64_C( 2289318697780797186), INT64_C(-4515948424499803858), INT64_C( 7316310196690749623), INT64_C( 4937967944726422430)), simde_mm512_set_epi64(INT64_C( 2289318697780797186), INT64_C(-4515948424499803858), INT64_C( 0), INT64_C( 0), INT64_C( 2289318697780797186), INT64_C(-4515948424499803858), INT64_C( 7316310196690749623), INT64_C( 4937967944726422430)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_broadcast_i64x4(test_vec[i].k, test_vec[i].a); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_broadcastd_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m512i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C(-1051270324), INT32_C(-1977183446), INT32_C( -548195640), INT32_C(-1363461466)), simde_mm512_set_epi32(INT32_C(-1363461466), INT32_C(-1363461466), INT32_C(-1363461466), INT32_C(-1363461466), INT32_C(-1363461466), INT32_C(-1363461466), INT32_C(-1363461466), INT32_C(-1363461466), INT32_C(-1363461466), INT32_C(-1363461466), INT32_C(-1363461466), INT32_C(-1363461466), INT32_C(-1363461466), INT32_C(-1363461466), INT32_C(-1363461466), INT32_C(-1363461466)) }, { simde_mm_set_epi32(INT32_C( 979094891), INT32_C( 416506319), INT32_C( 2123490297), INT32_C( 200388421)), simde_mm512_set_epi32(INT32_C( 200388421), INT32_C( 200388421), INT32_C( 200388421), INT32_C( 200388421), INT32_C( 200388421), INT32_C( 200388421), INT32_C( 200388421), INT32_C( 200388421), INT32_C( 200388421), INT32_C( 200388421), INT32_C( 200388421), INT32_C( 200388421), INT32_C( 200388421), INT32_C( 200388421), INT32_C( 200388421), INT32_C( 200388421)) }, { simde_mm_set_epi32(INT32_C( 1927260635), INT32_C( 1201458882), INT32_C(-1448742498), INT32_C(-1111904220)), simde_mm512_set_epi32(INT32_C(-1111904220), INT32_C(-1111904220), INT32_C(-1111904220), INT32_C(-1111904220), INT32_C(-1111904220), INT32_C(-1111904220), INT32_C(-1111904220), INT32_C(-1111904220), INT32_C(-1111904220), INT32_C(-1111904220), INT32_C(-1111904220), INT32_C(-1111904220), INT32_C(-1111904220), INT32_C(-1111904220), INT32_C(-1111904220), INT32_C(-1111904220)) }, { simde_mm_set_epi32(INT32_C( -976455818), INT32_C( 542613123), INT32_C( -15911923), INT32_C( -562895064)), simde_mm512_set_epi32(INT32_C( -562895064), INT32_C( -562895064), INT32_C( -562895064), INT32_C( -562895064), INT32_C( -562895064), INT32_C( -562895064), INT32_C( -562895064), INT32_C( -562895064), INT32_C( -562895064), INT32_C( -562895064), INT32_C( -562895064), INT32_C( -562895064), INT32_C( -562895064), INT32_C( -562895064), INT32_C( -562895064), INT32_C( -562895064)) }, { simde_mm_set_epi32(INT32_C( 836747087), INT32_C(-1431045412), INT32_C(-1356396683), INT32_C( 1489138473)), simde_mm512_set_epi32(INT32_C( 1489138473), INT32_C( 1489138473), INT32_C( 1489138473), INT32_C( 1489138473), INT32_C( 1489138473), INT32_C( 1489138473), INT32_C( 1489138473), INT32_C( 1489138473), INT32_C( 1489138473), INT32_C( 1489138473), INT32_C( 1489138473), INT32_C( 1489138473), INT32_C( 1489138473), INT32_C( 1489138473), INT32_C( 1489138473), INT32_C( 1489138473)) }, { simde_mm_set_epi32(INT32_C(-1783426961), INT32_C( -263517415), INT32_C(-1697630001), INT32_C( 2025142863)), simde_mm512_set_epi32(INT32_C( 2025142863), INT32_C( 2025142863), INT32_C( 2025142863), INT32_C( 2025142863), INT32_C( 2025142863), INT32_C( 2025142863), INT32_C( 2025142863), INT32_C( 2025142863), INT32_C( 2025142863), INT32_C( 2025142863), INT32_C( 2025142863), INT32_C( 2025142863), INT32_C( 2025142863), INT32_C( 2025142863), INT32_C( 2025142863), INT32_C( 2025142863)) }, { simde_mm_set_epi32(INT32_C( 300619496), INT32_C( -659754204), INT32_C(-1019736463), INT32_C( 1022872166)), simde_mm512_set_epi32(INT32_C( 1022872166), INT32_C( 1022872166), INT32_C( 1022872166), INT32_C( 1022872166), INT32_C( 1022872166), INT32_C( 1022872166), INT32_C( 1022872166), INT32_C( 1022872166), INT32_C( 1022872166), INT32_C( 1022872166), INT32_C( 1022872166), INT32_C( 1022872166), INT32_C( 1022872166), INT32_C( 1022872166), INT32_C( 1022872166), INT32_C( 1022872166)) }, { simde_mm_set_epi32(INT32_C( -274893610), INT32_C( 171227717), INT32_C( 1187872667), INT32_C( -590903223)), simde_mm512_set_epi32(INT32_C( -590903223), INT32_C( -590903223), INT32_C( -590903223), INT32_C( -590903223), INT32_C( -590903223), INT32_C( -590903223), INT32_C( -590903223), INT32_C( -590903223), INT32_C( -590903223), INT32_C( -590903223), INT32_C( -590903223), INT32_C( -590903223), INT32_C( -590903223), INT32_C( -590903223), INT32_C( -590903223), INT32_C( -590903223)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_broadcastd_epi32(test_vec[i].a); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_broadcastd_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask16 k; simde__m128i a; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C( 1638944021), INT32_C( -385149059), INT32_C( 852916680), INT32_C(-1839015366), INT32_C( 1146921463), INT32_C( 765234486), INT32_C( -388218844), INT32_C(-1402803832), INT32_C( 1245942358), INT32_C( 2001202713), INT32_C( 868062804), INT32_C(-1988191751), INT32_C( 807099340), INT32_C( -38350755), INT32_C( -443928971), INT32_C( -432554813)), UINT16_C(24594), simde_mm_set_epi32(INT32_C( -255909174), INT32_C(-1302917278), INT32_C( 327520540), INT32_C( 176606543)), simde_mm512_set_epi32(INT32_C( 1638944021), INT32_C( 176606543), INT32_C( 176606543), INT32_C(-1839015366), INT32_C( 1146921463), INT32_C( 765234486), INT32_C( -388218844), INT32_C(-1402803832), INT32_C( 1245942358), INT32_C( 2001202713), INT32_C( 868062804), INT32_C( 176606543), INT32_C( 807099340), INT32_C( -38350755), INT32_C( 176606543), INT32_C( -432554813)) }, { simde_mm512_set_epi32(INT32_C( -115460801), INT32_C( 1889676725), INT32_C( 2912775), INT32_C(-1289469215), INT32_C( 1033489041), INT32_C( 147853139), INT32_C( 706073024), INT32_C( -130092746), INT32_C( -799642653), INT32_C(-1439962375), INT32_C(-1798405841), INT32_C( 1190396108), INT32_C(-1013986568), INT32_C( 994541610), INT32_C(-1127995400), INT32_C( 1108325476)), UINT16_C(40849), simde_mm_set_epi32(INT32_C( 250706831), INT32_C( -936079925), INT32_C(-1129184131), INT32_C( 803417186)), simde_mm512_set_epi32(INT32_C( 803417186), INT32_C( 1889676725), INT32_C( 2912775), INT32_C( 803417186), INT32_C( 803417186), INT32_C( 803417186), INT32_C( 803417186), INT32_C( 803417186), INT32_C( 803417186), INT32_C(-1439962375), INT32_C(-1798405841), INT32_C( 803417186), INT32_C(-1013986568), INT32_C( 994541610), INT32_C(-1127995400), INT32_C( 803417186)) }, { simde_mm512_set_epi32(INT32_C( 357625867), INT32_C( -157238200), INT32_C( 909767636), INT32_C( 1422277073), INT32_C( 2123935701), INT32_C(-1040550911), INT32_C( 686758291), INT32_C(-2090356905), INT32_C( -362358815), INT32_C( -482453842), INT32_C( 117787421), INT32_C( 1300554279), INT32_C(-1085613264), INT32_C( -109297466), INT32_C(-1230203271), INT32_C(-1731521429)), UINT16_C(53728), simde_mm_set_epi32(INT32_C( -707786971), INT32_C( 1712040202), INT32_C(-2012675757), INT32_C(-1396559749)), simde_mm512_set_epi32(INT32_C(-1396559749), INT32_C(-1396559749), INT32_C( 909767636), INT32_C(-1396559749), INT32_C( 2123935701), INT32_C(-1040550911), INT32_C( 686758291), INT32_C(-1396559749), INT32_C(-1396559749), INT32_C(-1396559749), INT32_C(-1396559749), INT32_C( 1300554279), INT32_C(-1085613264), INT32_C( -109297466), INT32_C(-1230203271), INT32_C(-1731521429)) }, { simde_mm512_set_epi32(INT32_C( 2041534605), INT32_C( 1255681923), INT32_C( 1220121473), INT32_C( 1819952522), INT32_C(-1737362693), INT32_C( 712438877), INT32_C(-1234448370), INT32_C( 217554028), INT32_C(-1878093154), INT32_C( -741869417), INT32_C( 943666007), INT32_C( 622675686), INT32_C( -269910912), INT32_C( 137195559), INT32_C( 469574756), INT32_C( 1490101689)), UINT16_C(50038), simde_mm_set_epi32(INT32_C( -272719467), INT32_C( -594597983), INT32_C( -820913821), INT32_C( 345700481)), simde_mm512_set_epi32(INT32_C( 345700481), INT32_C( 345700481), INT32_C( 1220121473), INT32_C( 1819952522), INT32_C(-1737362693), INT32_C( 712438877), INT32_C( 345700481), INT32_C( 345700481), INT32_C(-1878093154), INT32_C( 345700481), INT32_C( 345700481), INT32_C( 345700481), INT32_C( -269910912), INT32_C( 345700481), INT32_C( 345700481), INT32_C( 1490101689)) }, { simde_mm512_set_epi32(INT32_C( 605201121), INT32_C( 2188130), INT32_C( -956406632), INT32_C(-1144421408), INT32_C(-2008693903), INT32_C( 1823632430), INT32_C( 2043624683), INT32_C( 457225971), INT32_C( 1484257119), INT32_C( 719932227), INT32_C( 1722430058), INT32_C( 916001650), INT32_C( 553469699), INT32_C(-2003831430), INT32_C(-1834906502), INT32_C( 225358926)), UINT16_C(22657), simde_mm_set_epi32(INT32_C( 290541765), INT32_C( -479926223), INT32_C( 2079119915), INT32_C( -331512500)), simde_mm512_set_epi32(INT32_C( 605201121), INT32_C( -331512500), INT32_C( -956406632), INT32_C( -331512500), INT32_C( -331512500), INT32_C( 1823632430), INT32_C( 2043624683), INT32_C( 457225971), INT32_C( -331512500), INT32_C( 719932227), INT32_C( 1722430058), INT32_C( 916001650), INT32_C( 553469699), INT32_C(-2003831430), INT32_C(-1834906502), INT32_C( -331512500)) }, { simde_mm512_set_epi32(INT32_C( -545987817), INT32_C(-1146550995), INT32_C( 963048631), INT32_C( -701605919), INT32_C( 432096480), INT32_C(-2030393254), INT32_C(-1236899565), INT32_C(-1697034971), INT32_C( -998012960), INT32_C(-1579141793), INT32_C( 1664269708), INT32_C( -667117157), INT32_C( -708117814), INT32_C( 85211107), INT32_C( 909670673), INT32_C( 1616737139)), UINT16_C( 4531), simde_mm_set_epi32(INT32_C( -503580732), INT32_C(-1790221512), INT32_C(-1663970343), INT32_C( 1633501790)), simde_mm512_set_epi32(INT32_C( -545987817), INT32_C(-1146550995), INT32_C( 963048631), INT32_C( 1633501790), INT32_C( 432096480), INT32_C(-2030393254), INT32_C(-1236899565), INT32_C( 1633501790), INT32_C( 1633501790), INT32_C(-1579141793), INT32_C( 1633501790), INT32_C( 1633501790), INT32_C( -708117814), INT32_C( 85211107), INT32_C( 1633501790), INT32_C( 1633501790)) }, { simde_mm512_set_epi32(INT32_C(-1668661089), INT32_C( 1895031925), INT32_C( 2107029353), INT32_C(-1915428586), INT32_C( 963718296), INT32_C( 1878898594), INT32_C( -403168746), INT32_C( 502390291), INT32_C( 1855826407), INT32_C(-1442018177), INT32_C( -244961355), INT32_C( 1777042193), INT32_C( 373997996), INT32_C( -684064874), INT32_C( 930695451), INT32_C(-1073438864)), UINT16_C(53861), simde_mm_set_epi32(INT32_C( 1599859635), INT32_C( 543659234), INT32_C(-1222091200), INT32_C( 817594139)), simde_mm512_set_epi32(INT32_C( 817594139), INT32_C( 817594139), INT32_C( 2107029353), INT32_C( 817594139), INT32_C( 963718296), INT32_C( 1878898594), INT32_C( 817594139), INT32_C( 502390291), INT32_C( 1855826407), INT32_C( 817594139), INT32_C( 817594139), INT32_C( 1777042193), INT32_C( 373997996), INT32_C( 817594139), INT32_C( 930695451), INT32_C( 817594139)) }, { simde_mm512_set_epi32(INT32_C( -831807470), INT32_C( -591553083), INT32_C( -492649784), INT32_C(-1394371521), INT32_C(-1760655625), INT32_C( 2135736563), INT32_C(-2075134444), INT32_C( -933317766), INT32_C( -731013025), INT32_C(-2091361347), INT32_C( 1562364760), INT32_C( -612070110), INT32_C( 1365385309), INT32_C( -121237183), INT32_C( 1543044931), INT32_C(-1490381593)), UINT16_C(20921), simde_mm_set_epi32(INT32_C(-1466503600), INT32_C( 824864478), INT32_C(-1491396230), INT32_C(-1907140086)), simde_mm512_set_epi32(INT32_C( -831807470), INT32_C(-1907140086), INT32_C( -492649784), INT32_C(-1907140086), INT32_C(-1760655625), INT32_C( 2135736563), INT32_C(-2075134444), INT32_C(-1907140086), INT32_C(-1907140086), INT32_C(-2091361347), INT32_C(-1907140086), INT32_C(-1907140086), INT32_C(-1907140086), INT32_C( -121237183), INT32_C( 1543044931), INT32_C(-1907140086)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_broadcastd_epi32(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_broadcastd_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m128i a; simde__m512i r; } test_vec[8] = { { UINT16_C(21274), simde_mm_set_epi32(INT32_C( 1459257075), INT32_C( 587801532), INT32_C( 1631678564), INT32_C( 715337051)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 715337051), INT32_C( 0), INT32_C( 715337051), INT32_C( 0), INT32_C( 0), INT32_C( 715337051), INT32_C( 715337051), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 715337051), INT32_C( 715337051), INT32_C( 0), INT32_C( 715337051), INT32_C( 0)) }, { UINT16_C(59357), simde_mm_set_epi32(INT32_C(-2022546688), INT32_C( 2145084340), INT32_C( 29275255), INT32_C( -827125259)), simde_mm512_set_epi32(INT32_C( -827125259), INT32_C( -827125259), INT32_C( -827125259), INT32_C( 0), INT32_C( 0), INT32_C( -827125259), INT32_C( -827125259), INT32_C( -827125259), INT32_C( -827125259), INT32_C( -827125259), INT32_C( 0), INT32_C( -827125259), INT32_C( -827125259), INT32_C( -827125259), INT32_C( 0), INT32_C( -827125259)) }, { UINT16_C(34446), simde_mm_set_epi32(INT32_C( 973425906), INT32_C( -935954345), INT32_C( 1285315081), INT32_C( 2142489532)), simde_mm512_set_epi32(INT32_C( 2142489532), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 2142489532), INT32_C( 2142489532), INT32_C( 0), INT32_C( 2142489532), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 2142489532), INT32_C( 2142489532), INT32_C( 2142489532), INT32_C( 0)) }, { UINT16_C(33955), simde_mm_set_epi32(INT32_C(-1114656122), INT32_C( 1221674060), INT32_C( -740975665), INT32_C( 2132760332)), simde_mm512_set_epi32(INT32_C( 2132760332), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 2132760332), INT32_C( 0), INT32_C( 0), INT32_C( 2132760332), INT32_C( 0), INT32_C( 2132760332), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 2132760332), INT32_C( 2132760332)) }, { UINT16_C(52572), simde_mm_set_epi32(INT32_C( -724774954), INT32_C( -166426332), INT32_C(-1571631693), INT32_C( -124417294)), simde_mm512_set_epi32(INT32_C( -124417294), INT32_C( -124417294), INT32_C( 0), INT32_C( 0), INT32_C( -124417294), INT32_C( -124417294), INT32_C( 0), INT32_C( -124417294), INT32_C( 0), INT32_C( -124417294), INT32_C( 0), INT32_C( -124417294), INT32_C( -124417294), INT32_C( -124417294), INT32_C( 0), INT32_C( 0)) }, { UINT16_C(38931), simde_mm_set_epi32(INT32_C(-1992244525), INT32_C( -292982508), INT32_C( -691380397), INT32_C(-1292068161)), simde_mm512_set_epi32(INT32_C(-1292068161), INT32_C( 0), INT32_C( 0), INT32_C(-1292068161), INT32_C(-1292068161), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C(-1292068161), INT32_C( 0), INT32_C( 0), INT32_C(-1292068161), INT32_C(-1292068161)) }, { UINT16_C(32377), simde_mm_set_epi32(INT32_C( -766689829), INT32_C(-1724046912), INT32_C( 1799018744), INT32_C( 623047724)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 623047724), INT32_C( 623047724), INT32_C( 623047724), INT32_C( 623047724), INT32_C( 623047724), INT32_C( 623047724), INT32_C( 0), INT32_C( 0), INT32_C( 623047724), INT32_C( 623047724), INT32_C( 623047724), INT32_C( 623047724), INT32_C( 0), INT32_C( 0), INT32_C( 623047724)) }, { UINT16_C(18782), simde_mm_set_epi32(INT32_C(-2020669200), INT32_C( -170583969), INT32_C( -628885190), INT32_C( 818636447)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 818636447), INT32_C( 0), INT32_C( 0), INT32_C( 818636447), INT32_C( 0), INT32_C( 0), INT32_C( 818636447), INT32_C( 0), INT32_C( 818636447), INT32_C( 0), INT32_C( 818636447), INT32_C( 818636447), INT32_C( 818636447), INT32_C( 818636447), INT32_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_broadcastd_epi32(test_vec[i].k, test_vec[i].a); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_broadcastq_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m512i r; } test_vec[8] = { { simde_mm_set_epi64x(INT64_C(-4515171658517540054), INT64_C(-2354482342678283610)), simde_mm512_set_epi64(INT64_C(-2354482342678283610), INT64_C(-2354482342678283610), INT64_C(-2354482342678283610), INT64_C(-2354482342678283610), INT64_C(-2354482342678283610), INT64_C(-2354482342678283610), INT64_C(-2354482342678283610), INT64_C(-2354482342678283610)) }, { simde_mm_set_epi64x(INT64_C( 4205180536942191055), INT64_C( 9120321379188715333)), simde_mm512_set_epi64(INT64_C( 9120321379188715333), INT64_C( 9120321379188715333), INT64_C( 9120321379188715333), INT64_C( 9120321379188715333), INT64_C( 9120321379188715333), INT64_C( 9120321379188715333), INT64_C( 9120321379188715333), INT64_C( 9120321379188715333)) }, { simde_mm_set_epi64x(INT64_C( 8277521399394651842), INT64_C(-6222301646052282332)), simde_mm512_set_epi64(INT64_C(-6222301646052282332), INT64_C(-6222301646052282332), INT64_C(-6222301646052282332), INT64_C(-6222301646052282332), INT64_C(-6222301646052282332), INT64_C(-6222301646052282332), INT64_C(-6222301646052282332), INT64_C(-6222301646052282332)) }, { simde_mm_set_epi64x(INT64_C(-4193845803756315005), INT64_C( -68341185169397976)), simde_mm512_set_epi64(INT64_C( -68341185169397976), INT64_C( -68341185169397976), INT64_C( -68341185169397976), INT64_C( -68341185169397976), INT64_C( -68341185169397976), INT64_C( -68341185169397976), INT64_C( -68341185169397976), INT64_C( -68341185169397976)) }, { simde_mm_set_epi64x(INT64_C( 3593801376552188636), INT64_C(-5825679392398740695)), simde_mm512_set_epi64(INT64_C(-5825679392398740695), INT64_C(-5825679392398740695), INT64_C(-5825679392398740695), INT64_C(-5825679392398740695), INT64_C(-5825679392398740695), INT64_C(-5825679392398740695), INT64_C(-5825679392398740695), INT64_C(-5825679392398740695)) }, { simde_mm_set_epi64x(INT64_C(-7659760468268217575), INT64_C(-7291265332978304433)), simde_mm512_set_epi64(INT64_C(-7291265332978304433), INT64_C(-7291265332978304433), INT64_C(-7291265332978304433), INT64_C(-7291265332978304433), INT64_C(-7291265332978304433), INT64_C(-7291265332978304433), INT64_C(-7291265332978304433), INT64_C(-7291265332978304433)) }, { simde_mm_set_epi64x(INT64_C( 1291150907495215908), INT64_C(-4379734758100841882)), simde_mm512_set_epi64(INT64_C(-4379734758100841882), INT64_C(-4379734758100841882), INT64_C(-4379734758100841882), INT64_C(-4379734758100841882), INT64_C(-4379734758100841882), INT64_C(-4379734758100841882), INT64_C(-4379734758100841882), INT64_C(-4379734758100841882)) }, { simde_mm_set_epi64x(INT64_C(-1180659064658150843), INT64_C( 5101874260281362505)), simde_mm512_set_epi64(INT64_C( 5101874260281362505), INT64_C( 5101874260281362505), INT64_C( 5101874260281362505), INT64_C( 5101874260281362505), INT64_C( 5101874260281362505), INT64_C( 5101874260281362505), INT64_C( 5101874260281362505), INT64_C( 5101874260281362505)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_broadcastq_epi64(test_vec[i].a); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_broadcastq_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask8 k; simde__m128i a; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C( 7039210974079555453), INT64_C( 3663249249268849210), INT64_C( 4925990175430708534), INT64_C(-1667387235778762360), INT64_C( 5351281682312326681), INT64_C( 3728301356360833529), INT64_C( 3466465274179801181), INT64_C(-1906660408329519933)), UINT8_C( 18), simde_mm_set_epi64x(INT64_C(-5595987098075819748), INT64_C( 758519329013942001)), simde_mm512_set_epi64(INT64_C( 7039210974079555453), INT64_C( 3663249249268849210), INT64_C( 4925990175430708534), INT64_C( 758519329013942001), INT64_C( 5351281682312326681), INT64_C( 3728301356360833529), INT64_C( 758519329013942001), INT64_C(-1906660408329519933)) }, { simde_mm512_set_epi64(INT64_C( 8116099733890298375), INT64_C(-5538228106590303599), INT64_C( 635024397322015168), INT64_C( -558744086021510173), INT64_C(-6184591305598926545), INT64_C( 5112712356426664696), INT64_C( 4271523692628158456), INT64_C( 4760221676782691018)), UINT8_C( 63), simde_mm_set_epi64x(INT64_C(-4849808913003762590), INT64_C( 7901381612815228817)), simde_mm512_set_epi64(INT64_C( 8116099733890298375), INT64_C(-5538228106590303599), INT64_C( 7901381612815228817), INT64_C( 7901381612815228817), INT64_C( 7901381612815228817), INT64_C( 7901381612815228817), INT64_C( 7901381612815228817), INT64_C( 7901381612815228817)) }, { simde_mm512_set_epi64(INT64_C( 3907422245001509329), INT64_C( 9122234377856250881), INT64_C( 2949604402306461527), INT64_C(-1556319256029800786), INT64_C( 505893122375737895), INT64_C(-4662673460798144314), INT64_C(-5283682813813779349), INT64_C( 1076777643387686347)), UINT8_C( 72), simde_mm_set_epi64x(INT64_C(-5998178448496319999), INT64_C( 7927410529462710283)), simde_mm512_set_epi64(INT64_C( 3907422245001509329), INT64_C( 7927410529462710283), INT64_C( 2949604402306461527), INT64_C(-1556319256029800786), INT64_C( 7927410529462710283), INT64_C(-4662673460798144314), INT64_C(-5283682813813779349), INT64_C( 1076777643387686347)) }, { simde_mm512_set_epi64(INT64_C( 7816636564820325115), INT64_C( 3059901680174485518), INT64_C( 934387437789942430), INT64_C(-3186304882973920425), INT64_C( 2674371711409421440), INT64_C( 589250439531013220), INT64_C( 6399938025556543269), INT64_C( 7353156679309525331)), UINT8_C(129), simde_mm_set_epi64x(INT64_C(-3132262719190613130), INT64_C( 8768324363382960003)), simde_mm512_set_epi64(INT64_C( 8768324363382960003), INT64_C( 3059901680174485518), INT64_C( 934387437789942430), INT64_C(-3186304882973920425), INT64_C( 2674371711409421440), INT64_C( 589250439531013220), INT64_C( 6399938025556543269), INT64_C( 8768324363382960003)) }, { simde_mm512_set_epi64(INT64_C(-8627274619235963858), INT64_C( 8777301179240593139), INT64_C( 6374835785680112451), INT64_C( 7397780769673384818), INT64_C( 2377134258823099770), INT64_C(-7880863417082399666), INT64_C(-1171321188047181919), INT64_C(-3525798013683697535)), UINT8_C(224), simde_mm_set_epi64x(INT64_C(-5077429793204296991), INT64_C( 9397950127957144)), simde_mm512_set_epi64(INT64_C( 9397950127957144), INT64_C( 9397950127957144), INT64_C( 9397950127957144), INT64_C( 7397780769673384818), INT64_C( 2377134258823099770), INT64_C(-7880863417082399666), INT64_C(-1171321188047181919), INT64_C(-3525798013683697535)) }, { simde_mm512_set_epi64(INT64_C(-8720472620890953453), INT64_C(-7288709697316354080), INT64_C(-6782362355017532020), INT64_C(-2865246368328647990), INT64_C( 365978918730627345), INT64_C( 6943833138524147909), INT64_C(-2061267430198683093), INT64_C(-1423835345422209809)), UINT8_C(224), simde_mm_set_epi64x(INT64_C(-2344999814881016531), INT64_C( 4136262378195933153)), simde_mm512_set_epi64(INT64_C( 4136262378195933153), INT64_C( 4136262378195933153), INT64_C( 4136262378195933153), INT64_C(-2865246368328647990), INT64_C( 365978918730627345), INT64_C( 6943833138524147909), INT64_C(-2061267430198683093), INT64_C(-1423835345422209809)) }, { simde_mm512_set_epi64(INT64_C(-1731596578336940525), INT64_C( 7970713727971134591), INT64_C(-1052101006731803887), INT64_C( 1606309165200441238), INT64_C( 3997306527802498928), INT64_C(-2162862772330994888), INT64_C(-7146698203065400738), INT64_C(-4784261768320577101)), UINT8_C(162), simde_mm_set_epi64x(INT64_C( 8139100144857954153), INT64_C(-8226703133729805160)), simde_mm512_set_epi64(INT64_C(-8226703133729805160), INT64_C( 7970713727971134591), INT64_C(-8226703133729805160), INT64_C( 1606309165200441238), INT64_C( 3997306527802498928), INT64_C(-2162862772330994888), INT64_C(-8226703133729805160), INT64_C(-4784261768320577101)) }, { simde_mm512_set_epi64(INT64_C(-4008569278181826465), INT64_C(-8982328587921142952), INT64_C(-2628821103943737251), INT64_C( -520709734501122237), INT64_C(-6401140198895522893), INT64_C( 2334998633271287360), INT64_C( 3511540092651127844), INT64_C( 3766647997225123999)), UINT8_C( 20), simde_mm_set_epi64x(INT64_C(-2115914707760868289), INT64_C(-7561958326757703437)), simde_mm512_set_epi64(INT64_C(-4008569278181826465), INT64_C(-8982328587921142952), INT64_C(-2628821103943737251), INT64_C(-7561958326757703437), INT64_C(-6401140198895522893), INT64_C(-7561958326757703437), INT64_C( 3511540092651127844), INT64_C( 3766647997225123999)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_broadcastq_epi64(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_broadcastq_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m128i a; simde__m512i r; } test_vec[8] = { { UINT8_C( 26), simde_mm_set_epi64x(INT64_C( 2524588358110376036), INT64_C( 3072349241054123220)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 3072349241054123220), INT64_C( 3072349241054123220), INT64_C( 0), INT64_C( 3072349241054123220), INT64_C( 0)) }, { UINT8_C(243), simde_mm_set_epi64x(INT64_C( 125736266274902517), INT64_C( 4529119523676940253)), simde_mm512_set_epi64(INT64_C( 4529119523676940253), INT64_C( 4529119523676940253), INT64_C( 4529119523676940253), INT64_C( 4529119523676940253), INT64_C( 0), INT64_C( 0), INT64_C( 4529119523676940253), INT64_C( 4529119523676940253)) }, { UINT8_C(180), simde_mm_set_epi64x(INT64_C( 9201922475629043961), INT64_C(-5256397243355602176)), simde_mm512_set_epi64(INT64_C(-5256397243355602176), INT64_C( 0), INT64_C(-5256397243355602176), INT64_C(-5256397243355602176), INT64_C( 0), INT64_C(-5256397243355602176), INT64_C( 0), INT64_C( 0)) }, { UINT8_C( 9), simde_mm_set_epi64x(INT64_C( 4382010425855345827), INT64_C( 4180832434708183127)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 4180832434708183127), INT64_C( 0), INT64_C( 0), INT64_C( 4180832434708183127)) }, { UINT8_C( 12), simde_mm_set_epi64x(INT64_C(-1597707644585397626), INT64_C( 5247050137625533391)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 5247050137625533391), INT64_C( 5247050137625533391), INT64_C( 0), INT64_C( 0)) }, { UINT8_C(150), simde_mm_set_epi64x(INT64_C(-3112884720261363420), INT64_C(-6750106718621562126)), simde_mm512_set_epi64(INT64_C(-6750106718621562126), INT64_C( 0), INT64_C( 0), INT64_C(-6750106718621562126), INT64_C( 0), INT64_C(-6750106718621562126), INT64_C(-6750106718621562126), INT64_C( 0)) }, { UINT8_C( 19), simde_mm_set_epi64x(INT64_C(-1258350286556471469), INT64_C(-5549390491787734701)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C(-5549390491787734701), INT64_C( 0), INT64_C( 0), INT64_C(-5549390491787734701), INT64_C(-5549390491787734701)) }, { UINT8_C(211), simde_mm_set_epi64x(INT64_C( 7726726670994043948), INT64_C( 5635717459582615161)), simde_mm512_set_epi64(INT64_C( 5635717459582615161), INT64_C( 5635717459582615161), INT64_C( 0), INT64_C( 5635717459582615161), INT64_C( 0), INT64_C( 0), INT64_C( 5635717459582615161), INT64_C( 5635717459582615161)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_broadcastq_epi64(test_vec[i].k, test_vec[i].a); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_broadcastss_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m512 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 104.48), SIMDE_FLOAT32_C( 410.97), SIMDE_FLOAT32_C( 548.32), SIMDE_FLOAT32_C( 631.04)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 631.04), SIMDE_FLOAT32_C( 631.04), SIMDE_FLOAT32_C( 631.04), SIMDE_FLOAT32_C( 631.04), SIMDE_FLOAT32_C( 631.04), SIMDE_FLOAT32_C( 631.04), SIMDE_FLOAT32_C( 631.04), SIMDE_FLOAT32_C( 631.04), SIMDE_FLOAT32_C( 631.04), SIMDE_FLOAT32_C( 631.04), SIMDE_FLOAT32_C( 631.04), SIMDE_FLOAT32_C( 631.04), SIMDE_FLOAT32_C( 631.04), SIMDE_FLOAT32_C( 631.04), SIMDE_FLOAT32_C( 631.04), SIMDE_FLOAT32_C( 631.04)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 668.18), SIMDE_FLOAT32_C( -107.97), SIMDE_FLOAT32_C( -627.99), SIMDE_FLOAT32_C( -347.00)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -347.00), SIMDE_FLOAT32_C( -347.00), SIMDE_FLOAT32_C( -347.00), SIMDE_FLOAT32_C( -347.00), SIMDE_FLOAT32_C( -347.00), SIMDE_FLOAT32_C( -347.00), SIMDE_FLOAT32_C( -347.00), SIMDE_FLOAT32_C( -347.00), SIMDE_FLOAT32_C( -347.00), SIMDE_FLOAT32_C( -347.00), SIMDE_FLOAT32_C( -347.00), SIMDE_FLOAT32_C( -347.00), SIMDE_FLOAT32_C( -347.00), SIMDE_FLOAT32_C( -347.00), SIMDE_FLOAT32_C( -347.00), SIMDE_FLOAT32_C( -347.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 236.12), SIMDE_FLOAT32_C( -776.74), SIMDE_FLOAT32_C( 643.82), SIMDE_FLOAT32_C( -941.79)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -941.79), SIMDE_FLOAT32_C( -941.79), SIMDE_FLOAT32_C( -941.79), SIMDE_FLOAT32_C( -941.79), SIMDE_FLOAT32_C( -941.79), SIMDE_FLOAT32_C( -941.79), SIMDE_FLOAT32_C( -941.79), SIMDE_FLOAT32_C( -941.79), SIMDE_FLOAT32_C( -941.79), SIMDE_FLOAT32_C( -941.79), SIMDE_FLOAT32_C( -941.79), SIMDE_FLOAT32_C( -941.79), SIMDE_FLOAT32_C( -941.79), SIMDE_FLOAT32_C( -941.79), SIMDE_FLOAT32_C( -941.79), SIMDE_FLOAT32_C( -941.79)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -401.52), SIMDE_FLOAT32_C( 338.53), SIMDE_FLOAT32_C( -725.48), SIMDE_FLOAT32_C( 387.06)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 387.06), SIMDE_FLOAT32_C( 387.06), SIMDE_FLOAT32_C( 387.06), SIMDE_FLOAT32_C( 387.06), SIMDE_FLOAT32_C( 387.06), SIMDE_FLOAT32_C( 387.06), SIMDE_FLOAT32_C( 387.06), SIMDE_FLOAT32_C( 387.06), SIMDE_FLOAT32_C( 387.06), SIMDE_FLOAT32_C( 387.06), SIMDE_FLOAT32_C( 387.06), SIMDE_FLOAT32_C( 387.06), SIMDE_FLOAT32_C( 387.06), SIMDE_FLOAT32_C( 387.06), SIMDE_FLOAT32_C( 387.06), SIMDE_FLOAT32_C( 387.06)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 119.04), SIMDE_FLOAT32_C( 263.81), SIMDE_FLOAT32_C( 717.18), SIMDE_FLOAT32_C( -996.30)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -996.30), SIMDE_FLOAT32_C( -996.30), SIMDE_FLOAT32_C( -996.30), SIMDE_FLOAT32_C( -996.30), SIMDE_FLOAT32_C( -996.30), SIMDE_FLOAT32_C( -996.30), SIMDE_FLOAT32_C( -996.30), SIMDE_FLOAT32_C( -996.30), SIMDE_FLOAT32_C( -996.30), SIMDE_FLOAT32_C( -996.30), SIMDE_FLOAT32_C( -996.30), SIMDE_FLOAT32_C( -996.30), SIMDE_FLOAT32_C( -996.30), SIMDE_FLOAT32_C( -996.30), SIMDE_FLOAT32_C( -996.30), SIMDE_FLOAT32_C( -996.30)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -278.68), SIMDE_FLOAT32_C( 120.15), SIMDE_FLOAT32_C( 751.98), SIMDE_FLOAT32_C( 536.33)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 536.33), SIMDE_FLOAT32_C( 536.33), SIMDE_FLOAT32_C( 536.33), SIMDE_FLOAT32_C( 536.33), SIMDE_FLOAT32_C( 536.33), SIMDE_FLOAT32_C( 536.33), SIMDE_FLOAT32_C( 536.33), SIMDE_FLOAT32_C( 536.33), SIMDE_FLOAT32_C( 536.33), SIMDE_FLOAT32_C( 536.33), SIMDE_FLOAT32_C( 536.33), SIMDE_FLOAT32_C( 536.33), SIMDE_FLOAT32_C( 536.33), SIMDE_FLOAT32_C( 536.33), SIMDE_FLOAT32_C( 536.33), SIMDE_FLOAT32_C( 536.33)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -143.65), SIMDE_FLOAT32_C( 810.77), SIMDE_FLOAT32_C( -448.76), SIMDE_FLOAT32_C( 234.43)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 234.43), SIMDE_FLOAT32_C( 234.43), SIMDE_FLOAT32_C( 234.43), SIMDE_FLOAT32_C( 234.43), SIMDE_FLOAT32_C( 234.43), SIMDE_FLOAT32_C( 234.43), SIMDE_FLOAT32_C( 234.43), SIMDE_FLOAT32_C( 234.43), SIMDE_FLOAT32_C( 234.43), SIMDE_FLOAT32_C( 234.43), SIMDE_FLOAT32_C( 234.43), SIMDE_FLOAT32_C( 234.43), SIMDE_FLOAT32_C( 234.43), SIMDE_FLOAT32_C( 234.43), SIMDE_FLOAT32_C( 234.43), SIMDE_FLOAT32_C( 234.43)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -42.20), SIMDE_FLOAT32_C( -923.83), SIMDE_FLOAT32_C( 357.03), SIMDE_FLOAT32_C( -933.51)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -933.51), SIMDE_FLOAT32_C( -933.51), SIMDE_FLOAT32_C( -933.51), SIMDE_FLOAT32_C( -933.51), SIMDE_FLOAT32_C( -933.51), SIMDE_FLOAT32_C( -933.51), SIMDE_FLOAT32_C( -933.51), SIMDE_FLOAT32_C( -933.51), SIMDE_FLOAT32_C( -933.51), SIMDE_FLOAT32_C( -933.51), SIMDE_FLOAT32_C( -933.51), SIMDE_FLOAT32_C( -933.51), SIMDE_FLOAT32_C( -933.51), SIMDE_FLOAT32_C( -933.51), SIMDE_FLOAT32_C( -933.51), SIMDE_FLOAT32_C( -933.51)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_broadcastss_ps(test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_broadcastss_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 src; simde__mmask16 k; simde__m128 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -227.30), SIMDE_FLOAT32_C( 999.04), SIMDE_FLOAT32_C( 956.07), SIMDE_FLOAT32_C( -270.40), SIMDE_FLOAT32_C( 132.00), SIMDE_FLOAT32_C( 480.19), SIMDE_FLOAT32_C( -107.97), SIMDE_FLOAT32_C( -347.00), SIMDE_FLOAT32_C( -927.52), SIMDE_FLOAT32_C( -67.87), SIMDE_FLOAT32_C( 891.86), SIMDE_FLOAT32_C( -870.50), SIMDE_FLOAT32_C( 932.69), SIMDE_FLOAT32_C( 244.86), SIMDE_FLOAT32_C( -621.59), SIMDE_FLOAT32_C( 36.25)), UINT16_C(30253), simde_mm_set_ps(SIMDE_FLOAT32_C( 134.75), SIMDE_FLOAT32_C( 871.12), SIMDE_FLOAT32_C( 104.48), SIMDE_FLOAT32_C( 548.32)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -227.30), SIMDE_FLOAT32_C( 548.32), SIMDE_FLOAT32_C( 548.32), SIMDE_FLOAT32_C( 548.32), SIMDE_FLOAT32_C( 132.00), SIMDE_FLOAT32_C( 548.32), SIMDE_FLOAT32_C( 548.32), SIMDE_FLOAT32_C( -347.00), SIMDE_FLOAT32_C( -927.52), SIMDE_FLOAT32_C( -67.87), SIMDE_FLOAT32_C( 548.32), SIMDE_FLOAT32_C( -870.50), SIMDE_FLOAT32_C( 548.32), SIMDE_FLOAT32_C( 548.32), SIMDE_FLOAT32_C( -621.59), SIMDE_FLOAT32_C( 548.32)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -996.30), SIMDE_FLOAT32_C( 389.31), SIMDE_FLOAT32_C( 670.35), SIMDE_FLOAT32_C( 396.13), SIMDE_FLOAT32_C( -971.67), SIMDE_FLOAT32_C( 528.69), SIMDE_FLOAT32_C( 275.37), SIMDE_FLOAT32_C( 338.53), SIMDE_FLOAT32_C( 387.06), SIMDE_FLOAT32_C( 29.64), SIMDE_FLOAT32_C( 199.34), SIMDE_FLOAT32_C( -686.40), SIMDE_FLOAT32_C( 717.18), SIMDE_FLOAT32_C( 416.06), SIMDE_FLOAT32_C( 645.78), SIMDE_FLOAT32_C( -990.79)), UINT16_C(37933), simde_mm_set_ps(SIMDE_FLOAT32_C( 442.28), SIMDE_FLOAT32_C( 811.14), SIMDE_FLOAT32_C( -767.79), SIMDE_FLOAT32_C( 236.12)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 236.12), SIMDE_FLOAT32_C( 389.31), SIMDE_FLOAT32_C( 670.35), SIMDE_FLOAT32_C( 236.12), SIMDE_FLOAT32_C( -971.67), SIMDE_FLOAT32_C( 236.12), SIMDE_FLOAT32_C( 275.37), SIMDE_FLOAT32_C( 338.53), SIMDE_FLOAT32_C( 387.06), SIMDE_FLOAT32_C( 29.64), SIMDE_FLOAT32_C( 236.12), SIMDE_FLOAT32_C( -686.40), SIMDE_FLOAT32_C( 236.12), SIMDE_FLOAT32_C( 236.12), SIMDE_FLOAT32_C( 645.78), SIMDE_FLOAT32_C( 236.12)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 810.77), SIMDE_FLOAT32_C( 234.43), SIMDE_FLOAT32_C( 101.39), SIMDE_FLOAT32_C( -366.10), SIMDE_FLOAT32_C( 855.96), SIMDE_FLOAT32_C( -55.56), SIMDE_FLOAT32_C( 896.89), SIMDE_FLOAT32_C( 697.60), SIMDE_FLOAT32_C( 120.15), SIMDE_FLOAT32_C( 536.33), SIMDE_FLOAT32_C( -156.71), SIMDE_FLOAT32_C( -331.13), SIMDE_FLOAT32_C( -143.65), SIMDE_FLOAT32_C( -448.76), SIMDE_FLOAT32_C( -628.22), SIMDE_FLOAT32_C( 318.72)), UINT16_C(19701), simde_mm_set_ps(SIMDE_FLOAT32_C( -248.84), SIMDE_FLOAT32_C( 566.99), SIMDE_FLOAT32_C( -650.08), SIMDE_FLOAT32_C( -460.40)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 810.77), SIMDE_FLOAT32_C( -460.40), SIMDE_FLOAT32_C( 101.39), SIMDE_FLOAT32_C( -366.10), SIMDE_FLOAT32_C( -460.40), SIMDE_FLOAT32_C( -460.40), SIMDE_FLOAT32_C( 896.89), SIMDE_FLOAT32_C( 697.60), SIMDE_FLOAT32_C( -460.40), SIMDE_FLOAT32_C( -460.40), SIMDE_FLOAT32_C( -460.40), SIMDE_FLOAT32_C( -460.40), SIMDE_FLOAT32_C( -143.65), SIMDE_FLOAT32_C( -460.40), SIMDE_FLOAT32_C( -628.22), SIMDE_FLOAT32_C( -460.40)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 779.71), SIMDE_FLOAT32_C( 196.66), SIMDE_FLOAT32_C( -0.50), SIMDE_FLOAT32_C( -273.06), SIMDE_FLOAT32_C( 429.50), SIMDE_FLOAT32_C( 650.80), SIMDE_FLOAT32_C( 509.10), SIMDE_FLOAT32_C( 709.57), SIMDE_FLOAT32_C( -561.64), SIMDE_FLOAT32_C( -923.83), SIMDE_FLOAT32_C( -933.51), SIMDE_FLOAT32_C( -304.13), SIMDE_FLOAT32_C( 728.72), SIMDE_FLOAT32_C( -511.49), SIMDE_FLOAT32_C( 144.42), SIMDE_FLOAT32_C( 848.91)), UINT16_C(27468), simde_mm_set_ps(SIMDE_FLOAT32_C( 949.19), SIMDE_FLOAT32_C( -102.63), SIMDE_FLOAT32_C( 87.04), SIMDE_FLOAT32_C( 914.16)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 779.71), SIMDE_FLOAT32_C( 914.16), SIMDE_FLOAT32_C( 914.16), SIMDE_FLOAT32_C( -273.06), SIMDE_FLOAT32_C( 914.16), SIMDE_FLOAT32_C( 650.80), SIMDE_FLOAT32_C( 914.16), SIMDE_FLOAT32_C( 914.16), SIMDE_FLOAT32_C( -561.64), SIMDE_FLOAT32_C( 914.16), SIMDE_FLOAT32_C( -933.51), SIMDE_FLOAT32_C( -304.13), SIMDE_FLOAT32_C( 914.16), SIMDE_FLOAT32_C( 914.16), SIMDE_FLOAT32_C( 144.42), SIMDE_FLOAT32_C( 848.91)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -960.76), SIMDE_FLOAT32_C( -613.57), SIMDE_FLOAT32_C( 864.92), SIMDE_FLOAT32_C( 278.02), SIMDE_FLOAT32_C( 573.37), SIMDE_FLOAT32_C( 393.40), SIMDE_FLOAT32_C( -782.91), SIMDE_FLOAT32_C( -933.90), SIMDE_FLOAT32_C( -291.87), SIMDE_FLOAT32_C( 382.75), SIMDE_FLOAT32_C( -62.73), SIMDE_FLOAT32_C( 163.52), SIMDE_FLOAT32_C( 87.09), SIMDE_FLOAT32_C( -486.60), SIMDE_FLOAT32_C( -157.79), SIMDE_FLOAT32_C( -247.69)), UINT16_C(56353), simde_mm_set_ps(SIMDE_FLOAT32_C( -97.06), SIMDE_FLOAT32_C( -2.41), SIMDE_FLOAT32_C( 418.81), SIMDE_FLOAT32_C( -141.42)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -141.42), SIMDE_FLOAT32_C( -141.42), SIMDE_FLOAT32_C( 864.92), SIMDE_FLOAT32_C( -141.42), SIMDE_FLOAT32_C( -141.42), SIMDE_FLOAT32_C( -141.42), SIMDE_FLOAT32_C( -782.91), SIMDE_FLOAT32_C( -933.90), SIMDE_FLOAT32_C( -291.87), SIMDE_FLOAT32_C( 382.75), SIMDE_FLOAT32_C( -141.42), SIMDE_FLOAT32_C( 163.52), SIMDE_FLOAT32_C( 87.09), SIMDE_FLOAT32_C( -486.60), SIMDE_FLOAT32_C( -157.79), SIMDE_FLOAT32_C( -141.42)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -490.98), SIMDE_FLOAT32_C( -718.54), SIMDE_FLOAT32_C( -172.08), SIMDE_FLOAT32_C( 476.19), SIMDE_FLOAT32_C( -825.45), SIMDE_FLOAT32_C( -528.02), SIMDE_FLOAT32_C( -604.26), SIMDE_FLOAT32_C( -201.78), SIMDE_FLOAT32_C( -105.47), SIMDE_FLOAT32_C( 619.70), SIMDE_FLOAT32_C( 603.28), SIMDE_FLOAT32_C( -553.28), SIMDE_FLOAT32_C( 787.83), SIMDE_FLOAT32_C( -945.21), SIMDE_FLOAT32_C( -786.09), SIMDE_FLOAT32_C( 628.77)), UINT16_C(51486), simde_mm_set_ps(SIMDE_FLOAT32_C( 54.48), SIMDE_FLOAT32_C( 679.92), SIMDE_FLOAT32_C( -550.45), SIMDE_FLOAT32_C( -482.87)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -482.87), SIMDE_FLOAT32_C( -482.87), SIMDE_FLOAT32_C( -172.08), SIMDE_FLOAT32_C( 476.19), SIMDE_FLOAT32_C( -482.87), SIMDE_FLOAT32_C( -528.02), SIMDE_FLOAT32_C( -604.26), SIMDE_FLOAT32_C( -482.87), SIMDE_FLOAT32_C( -105.47), SIMDE_FLOAT32_C( 619.70), SIMDE_FLOAT32_C( 603.28), SIMDE_FLOAT32_C( -482.87), SIMDE_FLOAT32_C( -482.87), SIMDE_FLOAT32_C( -482.87), SIMDE_FLOAT32_C( -482.87), SIMDE_FLOAT32_C( 628.77)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -124.63), SIMDE_FLOAT32_C( -948.04), SIMDE_FLOAT32_C( -344.11), SIMDE_FLOAT32_C( -424.86), SIMDE_FLOAT32_C( 640.76), SIMDE_FLOAT32_C( -243.42), SIMDE_FLOAT32_C( 962.71), SIMDE_FLOAT32_C( 314.11), SIMDE_FLOAT32_C( 599.88), SIMDE_FLOAT32_C( -844.53), SIMDE_FLOAT32_C( -530.48), SIMDE_FLOAT32_C( 563.54), SIMDE_FLOAT32_C( 165.16), SIMDE_FLOAT32_C( 384.17), SIMDE_FLOAT32_C( 149.22), SIMDE_FLOAT32_C( 712.14)), UINT16_C(53759), simde_mm_set_ps(SIMDE_FLOAT32_C( -284.59), SIMDE_FLOAT32_C( -286.48), SIMDE_FLOAT32_C( -340.65), SIMDE_FLOAT32_C( 563.88)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 563.88), SIMDE_FLOAT32_C( 563.88), SIMDE_FLOAT32_C( -344.11), SIMDE_FLOAT32_C( 563.88), SIMDE_FLOAT32_C( 640.76), SIMDE_FLOAT32_C( -243.42), SIMDE_FLOAT32_C( 962.71), SIMDE_FLOAT32_C( 563.88), SIMDE_FLOAT32_C( 563.88), SIMDE_FLOAT32_C( 563.88), SIMDE_FLOAT32_C( 563.88), SIMDE_FLOAT32_C( 563.88), SIMDE_FLOAT32_C( 563.88), SIMDE_FLOAT32_C( 563.88), SIMDE_FLOAT32_C( 563.88), SIMDE_FLOAT32_C( 563.88)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -434.52), SIMDE_FLOAT32_C( 678.42), SIMDE_FLOAT32_C( -65.20), SIMDE_FLOAT32_C( -319.19), SIMDE_FLOAT32_C( 664.97), SIMDE_FLOAT32_C( 9.01), SIMDE_FLOAT32_C( -334.08), SIMDE_FLOAT32_C( -870.44), SIMDE_FLOAT32_C( 269.08), SIMDE_FLOAT32_C( -345.75), SIMDE_FLOAT32_C( -732.77), SIMDE_FLOAT32_C( 374.12), SIMDE_FLOAT32_C( -491.24), SIMDE_FLOAT32_C( 525.54), SIMDE_FLOAT32_C( -178.26), SIMDE_FLOAT32_C( -733.62)), UINT16_C(50870), simde_mm_set_ps(SIMDE_FLOAT32_C( -300.63), SIMDE_FLOAT32_C( -396.75), SIMDE_FLOAT32_C( 745.02), SIMDE_FLOAT32_C( 369.43)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 369.43), SIMDE_FLOAT32_C( 369.43), SIMDE_FLOAT32_C( -65.20), SIMDE_FLOAT32_C( -319.19), SIMDE_FLOAT32_C( 664.97), SIMDE_FLOAT32_C( 369.43), SIMDE_FLOAT32_C( 369.43), SIMDE_FLOAT32_C( -870.44), SIMDE_FLOAT32_C( 369.43), SIMDE_FLOAT32_C( -345.75), SIMDE_FLOAT32_C( 369.43), SIMDE_FLOAT32_C( 369.43), SIMDE_FLOAT32_C( -491.24), SIMDE_FLOAT32_C( 369.43), SIMDE_FLOAT32_C( 369.43), SIMDE_FLOAT32_C( -733.62)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mask_broadcastss_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_maskz_broadcastss_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m128 a; simde__m512 r; } test_vec[8] = { { UINT16_C(25371), simde_mm_set_ps(SIMDE_FLOAT32_C( 104.48), SIMDE_FLOAT32_C( 410.97), SIMDE_FLOAT32_C( 548.32), SIMDE_FLOAT32_C( 631.04)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 631.04), SIMDE_FLOAT32_C( 631.04), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 631.04), SIMDE_FLOAT32_C( 631.04), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 631.04), SIMDE_FLOAT32_C( 631.04), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 631.04), SIMDE_FLOAT32_C( 631.04)) }, { UINT16_C(49342), simde_mm_set_ps(SIMDE_FLOAT32_C( -559.08), SIMDE_FLOAT32_C( 480.19), SIMDE_FLOAT32_C( 668.18), SIMDE_FLOAT32_C( -107.97)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -107.97), SIMDE_FLOAT32_C( -107.97), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -107.97), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -107.97), SIMDE_FLOAT32_C( -107.97), SIMDE_FLOAT32_C( -107.97), SIMDE_FLOAT32_C( -107.97), SIMDE_FLOAT32_C( -107.97), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C(24820), simde_mm_set_ps(SIMDE_FLOAT32_C( 811.14), SIMDE_FLOAT32_C( -333.00), SIMDE_FLOAT32_C( -767.79), SIMDE_FLOAT32_C( 825.12)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 825.12), SIMDE_FLOAT32_C( 825.12), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 825.12), SIMDE_FLOAT32_C( 825.12), SIMDE_FLOAT32_C( 825.12), SIMDE_FLOAT32_C( 825.12), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 825.12), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C(45881), simde_mm_set_ps(SIMDE_FLOAT32_C( -610.29), SIMDE_FLOAT32_C( -971.67), SIMDE_FLOAT32_C( 997.86), SIMDE_FLOAT32_C( 528.69)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 528.69), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 528.69), SIMDE_FLOAT32_C( 528.69), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 528.69), SIMDE_FLOAT32_C( 528.69), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 528.69), SIMDE_FLOAT32_C( 528.69), SIMDE_FLOAT32_C( 528.69), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 528.69)) }, { UINT16_C(28771), simde_mm_set_ps(SIMDE_FLOAT32_C( -248.84), SIMDE_FLOAT32_C( 102.57), SIMDE_FLOAT32_C( 566.99), SIMDE_FLOAT32_C( 900.54)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 900.54), SIMDE_FLOAT32_C( 900.54), SIMDE_FLOAT32_C( 900.54), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 900.54), SIMDE_FLOAT32_C( 900.54), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 900.54), SIMDE_FLOAT32_C( 900.54)) }, { UINT16_C(61611), simde_mm_set_ps(SIMDE_FLOAT32_C( 318.72), SIMDE_FLOAT32_C( -366.10), SIMDE_FLOAT32_C( 625.17), SIMDE_FLOAT32_C( 855.96)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 855.96), SIMDE_FLOAT32_C( 855.96), SIMDE_FLOAT32_C( 855.96), SIMDE_FLOAT32_C( 855.96), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 855.96), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 855.96), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 855.96), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 855.96), SIMDE_FLOAT32_C( 855.96)) }, { UINT16_C(55548), simde_mm_set_ps(SIMDE_FLOAT32_C( 324.92), SIMDE_FLOAT32_C( -304.13), SIMDE_FLOAT32_C( 949.19), SIMDE_FLOAT32_C( 617.60)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 617.60), SIMDE_FLOAT32_C( 617.60), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 617.60), SIMDE_FLOAT32_C( 617.60), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 617.60), SIMDE_FLOAT32_C( 617.60), SIMDE_FLOAT32_C( 617.60), SIMDE_FLOAT32_C( 617.60), SIMDE_FLOAT32_C( 617.60), SIMDE_FLOAT32_C( 617.60), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C(15841), simde_mm_set_ps(SIMDE_FLOAT32_C( 144.42), SIMDE_FLOAT32_C( -0.50), SIMDE_FLOAT32_C( 848.91), SIMDE_FLOAT32_C( -273.06)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -273.06), SIMDE_FLOAT32_C( -273.06), SIMDE_FLOAT32_C( -273.06), SIMDE_FLOAT32_C( -273.06), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -273.06), SIMDE_FLOAT32_C( -273.06), SIMDE_FLOAT32_C( -273.06), SIMDE_FLOAT32_C( -273.06), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -273.06)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_maskz_broadcastss_ps(test_vec[i].k, test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_broadcastsd_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m512d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 912.41), SIMDE_FLOAT64_C( 842.49)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 842.49), SIMDE_FLOAT64_C( 842.49), SIMDE_FLOAT64_C( 842.49), SIMDE_FLOAT64_C( 842.49), SIMDE_FLOAT64_C( 842.49), SIMDE_FLOAT64_C( 842.49), SIMDE_FLOAT64_C( 842.49), SIMDE_FLOAT64_C( 842.49)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -93.61), SIMDE_FLOAT64_C( -903.55)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -903.55), SIMDE_FLOAT64_C( -903.55), SIMDE_FLOAT64_C( -903.55), SIMDE_FLOAT64_C( -903.55), SIMDE_FLOAT64_C( -903.55), SIMDE_FLOAT64_C( -903.55), SIMDE_FLOAT64_C( -903.55), SIMDE_FLOAT64_C( -903.55)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -219.93), SIMDE_FLOAT64_C( -754.32)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -754.32), SIMDE_FLOAT64_C( -754.32), SIMDE_FLOAT64_C( -754.32), SIMDE_FLOAT64_C( -754.32), SIMDE_FLOAT64_C( -754.32), SIMDE_FLOAT64_C( -754.32), SIMDE_FLOAT64_C( -754.32), SIMDE_FLOAT64_C( -754.32)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 137.51), SIMDE_FLOAT64_C( 527.47)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 527.47), SIMDE_FLOAT64_C( 527.47), SIMDE_FLOAT64_C( 527.47), SIMDE_FLOAT64_C( 527.47), SIMDE_FLOAT64_C( 527.47), SIMDE_FLOAT64_C( 527.47), SIMDE_FLOAT64_C( 527.47), SIMDE_FLOAT64_C( 527.47)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 596.82), SIMDE_FLOAT64_C( 365.41)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 365.41), SIMDE_FLOAT64_C( 365.41), SIMDE_FLOAT64_C( 365.41), SIMDE_FLOAT64_C( 365.41), SIMDE_FLOAT64_C( 365.41), SIMDE_FLOAT64_C( 365.41), SIMDE_FLOAT64_C( 365.41), SIMDE_FLOAT64_C( 365.41)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -229.59), SIMDE_FLOAT64_C( -642.88)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -642.88), SIMDE_FLOAT64_C( -642.88), SIMDE_FLOAT64_C( -642.88), SIMDE_FLOAT64_C( -642.88), SIMDE_FLOAT64_C( -642.88), SIMDE_FLOAT64_C( -642.88), SIMDE_FLOAT64_C( -642.88), SIMDE_FLOAT64_C( -642.88)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 308.04), SIMDE_FLOAT64_C( -958.64)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -958.64), SIMDE_FLOAT64_C( -958.64), SIMDE_FLOAT64_C( -958.64), SIMDE_FLOAT64_C( -958.64), SIMDE_FLOAT64_C( -958.64), SIMDE_FLOAT64_C( -958.64), SIMDE_FLOAT64_C( -958.64), SIMDE_FLOAT64_C( -958.64)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 991.16), SIMDE_FLOAT64_C( -172.14)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -172.14), SIMDE_FLOAT64_C( -172.14), SIMDE_FLOAT64_C( -172.14), SIMDE_FLOAT64_C( -172.14), SIMDE_FLOAT64_C( -172.14), SIMDE_FLOAT64_C( -172.14), SIMDE_FLOAT64_C( -172.14), SIMDE_FLOAT64_C( -172.14)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_broadcastsd_pd(test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_broadcastsd_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d src; simde__mmask8 k; simde__m128d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( -746.75), SIMDE_FLOAT64_C( 634.39), SIMDE_FLOAT64_C( -651.68), SIMDE_FLOAT64_C( -903.55), SIMDE_FLOAT64_C( 689.73), SIMDE_FLOAT64_C( 178.89), SIMDE_FLOAT64_C( -342.04), SIMDE_FLOAT64_C( -292.58)), UINT8_C(162), simde_mm_set_pd(SIMDE_FLOAT64_C( 63.63), SIMDE_FLOAT64_C( 912.41)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 912.41), SIMDE_FLOAT64_C( 634.39), SIMDE_FLOAT64_C( 912.41), SIMDE_FLOAT64_C( -903.55), SIMDE_FLOAT64_C( 689.73), SIMDE_FLOAT64_C( 178.89), SIMDE_FLOAT64_C( 912.41), SIMDE_FLOAT64_C( -292.58)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -287.03), SIMDE_FLOAT64_C( 851.62), SIMDE_FLOAT64_C( 765.97), SIMDE_FLOAT64_C( 137.51), SIMDE_FLOAT64_C( -457.60), SIMDE_FLOAT64_C( 815.46), SIMDE_FLOAT64_C( 365.41), SIMDE_FLOAT64_C( 250.27)), UINT8_C( 66), simde_mm_set_pd(SIMDE_FLOAT64_C( -149.06), SIMDE_FLOAT64_C( -899.78)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -287.03), SIMDE_FLOAT64_C( -899.78), SIMDE_FLOAT64_C( 765.97), SIMDE_FLOAT64_C( 137.51), SIMDE_FLOAT64_C( -457.60), SIMDE_FLOAT64_C( 815.46), SIMDE_FLOAT64_C( -899.78), SIMDE_FLOAT64_C( 250.27)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -958.64), SIMDE_FLOAT64_C( 523.36), SIMDE_FLOAT64_C( -361.34), SIMDE_FLOAT64_C( -153.87), SIMDE_FLOAT64_C( -642.88), SIMDE_FLOAT64_C( 573.19), SIMDE_FLOAT64_C( 308.04), SIMDE_FLOAT64_C( -38.88)), UINT8_C(115), simde_mm_set_pd(SIMDE_FLOAT64_C( -758.12), SIMDE_FLOAT64_C( 12.83)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -958.64), SIMDE_FLOAT64_C( 12.83), SIMDE_FLOAT64_C( 12.83), SIMDE_FLOAT64_C( 12.83), SIMDE_FLOAT64_C( -642.88), SIMDE_FLOAT64_C( 573.19), SIMDE_FLOAT64_C( 12.83), SIMDE_FLOAT64_C( 12.83)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -221.94), SIMDE_FLOAT64_C( 499.58), SIMDE_FLOAT64_C( 49.04), SIMDE_FLOAT64_C( -205.69), SIMDE_FLOAT64_C( 991.16), SIMDE_FLOAT64_C( -984.94), SIMDE_FLOAT64_C( 224.44), SIMDE_FLOAT64_C( 644.01)), UINT8_C( 4), simde_mm_set_pd(SIMDE_FLOAT64_C( 53.80), SIMDE_FLOAT64_C( -691.82)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -221.94), SIMDE_FLOAT64_C( 499.58), SIMDE_FLOAT64_C( 49.04), SIMDE_FLOAT64_C( -205.69), SIMDE_FLOAT64_C( 991.16), SIMDE_FLOAT64_C( -691.82), SIMDE_FLOAT64_C( 224.44), SIMDE_FLOAT64_C( 644.01)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -225.42), SIMDE_FLOAT64_C( -144.08), SIMDE_FLOAT64_C( -549.59), SIMDE_FLOAT64_C( 465.78), SIMDE_FLOAT64_C( -316.69), SIMDE_FLOAT64_C( -133.94), SIMDE_FLOAT64_C( -646.50), SIMDE_FLOAT64_C( 160.17)), UINT8_C(172), simde_mm_set_pd(SIMDE_FLOAT64_C( 841.59), SIMDE_FLOAT64_C( 843.47)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 843.47), SIMDE_FLOAT64_C( -144.08), SIMDE_FLOAT64_C( 843.47), SIMDE_FLOAT64_C( 465.78), SIMDE_FLOAT64_C( 843.47), SIMDE_FLOAT64_C( 843.47), SIMDE_FLOAT64_C( -646.50), SIMDE_FLOAT64_C( 160.17)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -560.31), SIMDE_FLOAT64_C( -882.49), SIMDE_FLOAT64_C( -54.78), SIMDE_FLOAT64_C( -896.38), SIMDE_FLOAT64_C( 607.65), SIMDE_FLOAT64_C( -296.43), SIMDE_FLOAT64_C( 124.51), SIMDE_FLOAT64_C( -913.38)), UINT8_C(201), simde_mm_set_pd(SIMDE_FLOAT64_C( 625.18), SIMDE_FLOAT64_C( 54.43)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 54.43), SIMDE_FLOAT64_C( 54.43), SIMDE_FLOAT64_C( -54.78), SIMDE_FLOAT64_C( -896.38), SIMDE_FLOAT64_C( 54.43), SIMDE_FLOAT64_C( -296.43), SIMDE_FLOAT64_C( 124.51), SIMDE_FLOAT64_C( 54.43)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 349.28), SIMDE_FLOAT64_C( -559.74), SIMDE_FLOAT64_C( -116.49), SIMDE_FLOAT64_C( 342.49), SIMDE_FLOAT64_C( -608.07), SIMDE_FLOAT64_C( 778.83), SIMDE_FLOAT64_C( -284.17), SIMDE_FLOAT64_C( -113.81)), UINT8_C(234), simde_mm_set_pd(SIMDE_FLOAT64_C( 312.83), SIMDE_FLOAT64_C( -27.64)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -27.64), SIMDE_FLOAT64_C( -27.64), SIMDE_FLOAT64_C( -27.64), SIMDE_FLOAT64_C( 342.49), SIMDE_FLOAT64_C( -27.64), SIMDE_FLOAT64_C( 778.83), SIMDE_FLOAT64_C( -27.64), SIMDE_FLOAT64_C( -113.81)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 364.84), SIMDE_FLOAT64_C( 86.19), SIMDE_FLOAT64_C( -699.29), SIMDE_FLOAT64_C( 244.26), SIMDE_FLOAT64_C( -206.27), SIMDE_FLOAT64_C( -921.17), SIMDE_FLOAT64_C( 483.42), SIMDE_FLOAT64_C( -935.00)), UINT8_C( 12), simde_mm_set_pd(SIMDE_FLOAT64_C( -328.98), SIMDE_FLOAT64_C( 803.91)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 364.84), SIMDE_FLOAT64_C( 86.19), SIMDE_FLOAT64_C( -699.29), SIMDE_FLOAT64_C( 244.26), SIMDE_FLOAT64_C( 803.91), SIMDE_FLOAT64_C( 803.91), SIMDE_FLOAT64_C( 483.42), SIMDE_FLOAT64_C( -935.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mask_broadcastsd_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_maskz_broadcastsd_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m128d a; simde__m512d r; } test_vec[8] = { { UINT8_C(128), simde_mm_set_pd(SIMDE_FLOAT64_C( 912.41), SIMDE_FLOAT64_C( 842.49)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 842.49), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C( 2), simde_mm_set_pd(SIMDE_FLOAT64_C( -651.68), SIMDE_FLOAT64_C( -93.61)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -93.61), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(216), simde_mm_set_pd(SIMDE_FLOAT64_C( 881.57), SIMDE_FLOAT64_C( -899.78)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -899.78), SIMDE_FLOAT64_C( -899.78), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -899.78), SIMDE_FLOAT64_C( -899.78), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(183), simde_mm_set_pd(SIMDE_FLOAT64_C( -292.94), SIMDE_FLOAT64_C( 765.97)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 765.97), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 765.97), SIMDE_FLOAT64_C( 765.97), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 765.97), SIMDE_FLOAT64_C( 765.97), SIMDE_FLOAT64_C( 765.97)) }, { UINT8_C(169), simde_mm_set_pd(SIMDE_FLOAT64_C( -758.12), SIMDE_FLOAT64_C( 593.03)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 593.03), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 593.03), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 593.03), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 593.03)) }, { UINT8_C(243), simde_mm_set_pd(SIMDE_FLOAT64_C( 523.36), SIMDE_FLOAT64_C( 761.91)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 761.91), SIMDE_FLOAT64_C( 761.91), SIMDE_FLOAT64_C( 761.91), SIMDE_FLOAT64_C( 761.91), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 761.91), SIMDE_FLOAT64_C( 761.91)) }, { UINT8_C(109), simde_mm_set_pd(SIMDE_FLOAT64_C( -984.94), SIMDE_FLOAT64_C( 53.80)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 53.80), SIMDE_FLOAT64_C( 53.80), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 53.80), SIMDE_FLOAT64_C( 53.80), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 53.80)) }, { UINT8_C(168), simde_mm_set_pd(SIMDE_FLOAT64_C( 644.01), SIMDE_FLOAT64_C( 499.58)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 499.58), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 499.58), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 499.58), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_maskz_broadcastsd_pd(test_vec[i].k, test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_broadcastb_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m512i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( -17), INT8_C( 88), INT8_C(-122), INT8_C(-119), INT8_C( 111), INT8_C( 87), INT8_C( -76), INT8_C( 27), INT8_C( -93), INT8_C( -8), INT8_C( -17), INT8_C( 24), INT8_C( 69), INT8_C( 116), INT8_C( -78), INT8_C(-124)), simde_mm512_set_epi8(INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124), INT8_C(-124)) }, { simde_mm_set_epi8(INT8_C( -5), INT8_C(-114), INT8_C( -86), INT8_C( -2), INT8_C( 33), INT8_C( 90), INT8_C( -50), INT8_C( 28), INT8_C( 15), INT8_C( 12), INT8_C( 32), INT8_C( 54), INT8_C( -15), INT8_C( -57), INT8_C( 36), INT8_C( -50)), simde_mm512_set_epi8(INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50)) }, { simde_mm_set_epi8(INT8_C( -49), INT8_C( -76), INT8_C( -62), INT8_C( 118), INT8_C( -4), INT8_C( -25), INT8_C( -58), INT8_C( 126), INT8_C(-115), INT8_C( 126), INT8_C(-104), INT8_C( 127), INT8_C( 15), INT8_C( 41), INT8_C( 68), INT8_C( 31)), simde_mm512_set_epi8(INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31), INT8_C( 31)) }, { simde_mm_set_epi8(INT8_C( -30), INT8_C( -23), INT8_C( -42), INT8_C( -27), INT8_C(-102), INT8_C( -5), INT8_C( -87), INT8_C( 98), INT8_C( 33), INT8_C( 73), INT8_C( 125), INT8_C( 120), INT8_C( -70), INT8_C( 59), INT8_C( 124), INT8_C( 46)), simde_mm512_set_epi8(INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46), INT8_C( 46)) }, { simde_mm_set_epi8(INT8_C( -18), INT8_C( 28), INT8_C( -19), INT8_C( -73), INT8_C( -19), INT8_C( 67), INT8_C( 79), INT8_C( -45), INT8_C(-124), INT8_C( 80), INT8_C(-101), INT8_C(-122), INT8_C( -54), INT8_C( 30), INT8_C( -16), INT8_C( 55)), simde_mm512_set_epi8(INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55)) }, { simde_mm_set_epi8(INT8_C( 6), INT8_C( -5), INT8_C( 37), INT8_C( -97), INT8_C( 16), INT8_C( -5), INT8_C( -18), INT8_C( 14), INT8_C(-120), INT8_C( -59), INT8_C( -43), INT8_C( -97), INT8_C( -71), INT8_C( -73), INT8_C( -73), INT8_C( -50)), simde_mm512_set_epi8(INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -50)) }, { simde_mm_set_epi8(INT8_C( 119), INT8_C( 60), INT8_C( 63), INT8_C( -26), INT8_C( 50), INT8_C( 56), INT8_C( 40), INT8_C( -7), INT8_C( 68), INT8_C( -11), INT8_C( -21), INT8_C( -77), INT8_C( 56), INT8_C(-109), INT8_C(-118), INT8_C(-108)), simde_mm512_set_epi8(INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108), INT8_C(-108)) }, { simde_mm_set_epi8(INT8_C(-112), INT8_C( 65), INT8_C( 26), INT8_C( -90), INT8_C( -77), INT8_C( 72), INT8_C( 2), INT8_C( 4), INT8_C( -52), INT8_C( -82), INT8_C( -18), INT8_C( -66), INT8_C(-118), INT8_C( -10), INT8_C( 52), INT8_C( -40)), simde_mm512_set_epi8(INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40), INT8_C( -40)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_broadcastb_epi8(test_vec[i].a); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_broadcastb_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask64 k; simde__m128i a; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi8(INT8_C( 65), INT8_C( -68), INT8_C( 102), INT8_C(-122), INT8_C( 40), INT8_C( 19), INT8_C(-111), INT8_C( 8), INT8_C( -58), INT8_C(-120), INT8_C( 111), INT8_C( 10), INT8_C( -54), INT8_C(-100), INT8_C( 36), INT8_C( 27), INT8_C(-106), INT8_C(-123), INT8_C( 120), INT8_C( 43), INT8_C( -31), INT8_C( 4), INT8_C( 10), INT8_C( 96), INT8_C( -40), INT8_C( 23), INT8_C( 31), INT8_C( 73), INT8_C( -51), INT8_C( 91), INT8_C( 68), INT8_C( -23), INT8_C(-108), INT8_C( 52), INT8_C( 23), INT8_C( 115), INT8_C( -4), INT8_C( 24), INT8_C( 106), INT8_C( -92), INT8_C( 126), INT8_C( -6), INT8_C( 16), INT8_C( 102), INT8_C( -47), INT8_C(-116), INT8_C( -4), INT8_C( 33), INT8_C( -25), INT8_C(-108), INT8_C(-115), INT8_C(-104), INT8_C( -39), INT8_C( 49), INT8_C( 72), INT8_C( 44), INT8_C( -17), INT8_C( -66), INT8_C( -61), INT8_C( -68), INT8_C( 124), INT8_C( 20), INT8_C( 64), INT8_C( -49)), UINT64_C( 2970261907), simde_mm_set_epi8(INT8_C( -78), INT8_C( -60), INT8_C(-122), INT8_C( -66), INT8_C( 8), INT8_C( -42), INT8_C( 44), INT8_C( 45), INT8_C( 37), INT8_C( -9), INT8_C( -14), INT8_C( 38), INT8_C( -85), INT8_C( 35), INT8_C( 8), INT8_C(-127)), simde_mm512_set_epi8(INT8_C( 65), INT8_C( -68), INT8_C( 102), INT8_C(-122), INT8_C( 40), INT8_C( 19), INT8_C(-111), INT8_C( 8), INT8_C( -58), INT8_C(-120), INT8_C( 111), INT8_C( 10), INT8_C( -54), INT8_C(-100), INT8_C( 36), INT8_C( 27), INT8_C(-106), INT8_C(-123), INT8_C( 120), INT8_C( 43), INT8_C( -31), INT8_C( 4), INT8_C( 10), INT8_C( 96), INT8_C( -40), INT8_C( 23), INT8_C( 31), INT8_C( 73), INT8_C( -51), INT8_C( 91), INT8_C( 68), INT8_C( -23), INT8_C(-127), INT8_C( 52), INT8_C(-127), INT8_C(-127), INT8_C( -4), INT8_C( 24), INT8_C( 106), INT8_C(-127), INT8_C( 126), INT8_C( -6), INT8_C( 16), INT8_C( 102), INT8_C(-127), INT8_C(-116), INT8_C(-127), INT8_C( 33), INT8_C(-127), INT8_C(-108), INT8_C(-115), INT8_C(-127), INT8_C(-127), INT8_C( 49), INT8_C( 72), INT8_C(-127), INT8_C(-127), INT8_C( -66), INT8_C( -61), INT8_C(-127), INT8_C( 124), INT8_C( 20), INT8_C(-127), INT8_C(-127)) }, { simde_mm512_set_epi8(INT8_C( -76), INT8_C( 58), INT8_C( 70), INT8_C(-106), INT8_C( 120), INT8_C( 72), INT8_C( -4), INT8_C( -60), INT8_C( 104), INT8_C( 101), INT8_C( 53), INT8_C(-121), INT8_C( 102), INT8_C(-115), INT8_C( 90), INT8_C( 31), INT8_C( 11), INT8_C( 68), INT8_C( 48), INT8_C( 4), INT8_C( 55), INT8_C( -83), INT8_C( 75), INT8_C( -60), INT8_C( -54), INT8_C( 57), INT8_C( 70), INT8_C(-114), INT8_C( 51), INT8_C( -72), INT8_C( -71), INT8_C( 17), INT8_C( 48), INT8_C( 40), INT8_C(-108), INT8_C( -12), INT8_C( -11), INT8_C( -71), INT8_C(-114), INT8_C( -36), INT8_C( -92), INT8_C( 101), INT8_C( 30), INT8_C( 10), INT8_C( 43), INT8_C(-116), INT8_C( -45), INT8_C(-104), INT8_C( 99), INT8_C( 108), INT8_C( 90), INT8_C( 7), INT8_C( 112), INT8_C( 86), INT8_C(-125), INT8_C( 88), INT8_C( 27), INT8_C( 40), INT8_C( 10), INT8_C(-105), INT8_C( 76), INT8_C(-101), INT8_C( 87), INT8_C( 112)), UINT64_C( 458960523), simde_mm_set_epi8(INT8_C( 70), INT8_C( -73), INT8_C( -42), INT8_C( -19), INT8_C( 110), INT8_C( -58), INT8_C(-117), INT8_C(-100), INT8_C( 52), INT8_C( -63), INT8_C( -88), INT8_C( -55), INT8_C( 90), INT8_C( -15), INT8_C( -11), INT8_C( -21)), simde_mm512_set_epi8(INT8_C( -76), INT8_C( 58), INT8_C( 70), INT8_C(-106), INT8_C( 120), INT8_C( 72), INT8_C( -4), INT8_C( -60), INT8_C( 104), INT8_C( 101), INT8_C( 53), INT8_C(-121), INT8_C( 102), INT8_C(-115), INT8_C( 90), INT8_C( 31), INT8_C( 11), INT8_C( 68), INT8_C( 48), INT8_C( 4), INT8_C( 55), INT8_C( -83), INT8_C( 75), INT8_C( -60), INT8_C( -54), INT8_C( 57), INT8_C( 70), INT8_C(-114), INT8_C( 51), INT8_C( -72), INT8_C( -71), INT8_C( 17), INT8_C( 48), INT8_C( 40), INT8_C(-108), INT8_C( -21), INT8_C( -21), INT8_C( -71), INT8_C( -21), INT8_C( -21), INT8_C( -92), INT8_C( -21), INT8_C( 30), INT8_C( -21), INT8_C( -21), INT8_C(-116), INT8_C( -21), INT8_C( -21), INT8_C( 99), INT8_C( 108), INT8_C( -21), INT8_C( 7), INT8_C( -21), INT8_C( -21), INT8_C( -21), INT8_C( 88), INT8_C( -21), INT8_C( 40), INT8_C( 10), INT8_C(-105), INT8_C( -21), INT8_C(-101), INT8_C( -21), INT8_C( -21)) }, { simde_mm512_set_epi8(INT8_C( -65), INT8_C( -9), INT8_C( -93), INT8_C(-113), INT8_C( -10), INT8_C( 74), INT8_C( 39), INT8_C( 57), INT8_C( 91), INT8_C( -48), INT8_C( 11), INT8_C( -15), INT8_C( 21), INT8_C( -88), INT8_C( 91), INT8_C( 87), INT8_C(-120), INT8_C(-105), INT8_C( -47), INT8_C( 85), INT8_C( -98), INT8_C( 22), INT8_C(-124), INT8_C(-124), INT8_C( 2), INT8_C(-104), INT8_C( 27), INT8_C( 96), INT8_C( -89), INT8_C( 31), INT8_C( 20), INT8_C( 31), INT8_C( -95), INT8_C( 13), INT8_C( 37), INT8_C( 31), INT8_C( -72), INT8_C( 83), INT8_C( 94), INT8_C( 52), INT8_C( 41), INT8_C( 25), INT8_C( -42), INT8_C(-109), INT8_C( 31), INT8_C( 88), INT8_C( -71), INT8_C( -89), INT8_C( 103), INT8_C( -85), INT8_C( -29), INT8_C( 86), INT8_C( 71), INT8_C( 28), INT8_C( -23), INT8_C( 28), INT8_C( -53), INT8_C( -82), INT8_C( 58), INT8_C( -12), INT8_C( 63), INT8_C( 39), INT8_C( -32), INT8_C( -94)), UINT64_C( 1058428392), simde_mm_set_epi8(INT8_C( 85), INT8_C( -11), INT8_C( -21), INT8_C( 66), INT8_C( 72), INT8_C( -7), INT8_C( -18), INT8_C(-121), INT8_C( 56), INT8_C( 51), INT8_C( 101), INT8_C( 91), INT8_C( -85), INT8_C( -32), INT8_C( -40), INT8_C( -81)), simde_mm512_set_epi8(INT8_C( -65), INT8_C( -9), INT8_C( -93), INT8_C(-113), INT8_C( -10), INT8_C( 74), INT8_C( 39), INT8_C( 57), INT8_C( 91), INT8_C( -48), INT8_C( 11), INT8_C( -15), INT8_C( 21), INT8_C( -88), INT8_C( 91), INT8_C( 87), INT8_C(-120), INT8_C(-105), INT8_C( -47), INT8_C( 85), INT8_C( -98), INT8_C( 22), INT8_C(-124), INT8_C(-124), INT8_C( 2), INT8_C(-104), INT8_C( 27), INT8_C( 96), INT8_C( -89), INT8_C( 31), INT8_C( 20), INT8_C( 31), INT8_C( -95), INT8_C( 13), INT8_C( -81), INT8_C( -81), INT8_C( -81), INT8_C( -81), INT8_C( -81), INT8_C( -81), INT8_C( 41), INT8_C( 25), INT8_C( -42), INT8_C( -81), INT8_C( 31), INT8_C( -81), INT8_C( -81), INT8_C( -89), INT8_C( 103), INT8_C( -81), INT8_C( -29), INT8_C( -81), INT8_C( 71), INT8_C( -81), INT8_C( -23), INT8_C( -81), INT8_C( -81), INT8_C( -81), INT8_C( -81), INT8_C( -12), INT8_C( -81), INT8_C( 39), INT8_C( -32), INT8_C( -94)) }, { simde_mm512_set_epi8(INT8_C( 85), INT8_C( 18), INT8_C(-117), INT8_C( -50), INT8_C( -8), INT8_C( 126), INT8_C( 103), INT8_C( -42), INT8_C( 107), INT8_C( -60), INT8_C( -85), INT8_C( 123), INT8_C( -11), INT8_C( 41), INT8_C( 98), INT8_C( 115), INT8_C( 14), INT8_C( 34), INT8_C( 89), INT8_C( 101), INT8_C( 39), INT8_C( 26), INT8_C( 121), INT8_C( 70), INT8_C( -20), INT8_C( -34), INT8_C( -11), INT8_C( 72), INT8_C( 8), INT8_C( -24), INT8_C(-104), INT8_C( 61), INT8_C(-108), INT8_C( -43), INT8_C( 102), INT8_C( 100), INT8_C( -29), INT8_C( -21), INT8_C( 70), INT8_C( -28), INT8_C( -21), INT8_C( -82), INT8_C( -18), INT8_C( 9), INT8_C( 94), INT8_C( -32), INT8_C( 97), INT8_C( -86), INT8_C( 87), INT8_C( 62), INT8_C(-118), INT8_C( 17), INT8_C( 18), INT8_C(-126), INT8_C( 74), INT8_C( -83), INT8_C( -46), INT8_C(-103), INT8_C( -21), INT8_C( 108), INT8_C( -58), INT8_C(-126), INT8_C( -28), INT8_C(-112)), UINT64_C( 923153287), simde_mm_set_epi8(INT8_C( 73), INT8_C( -73), INT8_C( -11), INT8_C( 36), INT8_C( -17), INT8_C( 70), INT8_C(-102), INT8_C(-111), INT8_C( 27), INT8_C( -97), INT8_C( -6), INT8_C( -7), INT8_C( 28), INT8_C( -52), INT8_C( -54), INT8_C( -50)), simde_mm512_set_epi8(INT8_C( 85), INT8_C( 18), INT8_C(-117), INT8_C( -50), INT8_C( -8), INT8_C( 126), INT8_C( 103), INT8_C( -42), INT8_C( 107), INT8_C( -60), INT8_C( -85), INT8_C( 123), INT8_C( -11), INT8_C( 41), INT8_C( 98), INT8_C( 115), INT8_C( 14), INT8_C( 34), INT8_C( 89), INT8_C( 101), INT8_C( 39), INT8_C( 26), INT8_C( 121), INT8_C( 70), INT8_C( -20), INT8_C( -34), INT8_C( -11), INT8_C( 72), INT8_C( 8), INT8_C( -24), INT8_C(-104), INT8_C( 61), INT8_C(-108), INT8_C( -43), INT8_C( -50), INT8_C( -50), INT8_C( -29), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C( -21), INT8_C( -82), INT8_C( -18), INT8_C( 9), INT8_C( 94), INT8_C( -50), INT8_C( -50), INT8_C( -86), INT8_C( 87), INT8_C( 62), INT8_C( -50), INT8_C( -50), INT8_C( 18), INT8_C(-126), INT8_C( -50), INT8_C( -50), INT8_C( -50), INT8_C(-103), INT8_C( -21), INT8_C( 108), INT8_C( -58), INT8_C( -50), INT8_C( -50), INT8_C( -50)) }, { simde_mm512_set_epi8(INT8_C( 67), INT8_C(-107), INT8_C( 82), INT8_C( 55), INT8_C( 64), INT8_C( 72), INT8_C( -53), INT8_C( 66), INT8_C( -50), INT8_C( 103), INT8_C( -13), INT8_C( 78), INT8_C( 15), INT8_C( 32), INT8_C( 76), INT8_C( 78), INT8_C( 28), INT8_C( -98), INT8_C(-128), INT8_C( 80), INT8_C( 106), INT8_C( -45), INT8_C( 79), INT8_C( 116), INT8_C( 23), INT8_C( 31), INT8_C( 117), INT8_C( -12), INT8_C( -59), INT8_C( -16), INT8_C( 98), INT8_C( -49), INT8_C( 116), INT8_C( -82), INT8_C( 92), INT8_C( 1), INT8_C( 30), INT8_C(-100), INT8_C( 61), INT8_C( -14), INT8_C( 26), INT8_C( -40), INT8_C( -78), INT8_C( -85), INT8_C( -24), INT8_C( -47), INT8_C( -93), INT8_C( -1), INT8_C( 21), INT8_C( 82), INT8_C( 119), INT8_C( 64), INT8_C( 74), INT8_C( -53), INT8_C( 58), INT8_C( 33), INT8_C( 14), INT8_C( 114), INT8_C( 35), INT8_C( 109), INT8_C( -74), INT8_C( -59), INT8_C( -81), INT8_C( 16)), UINT64_C( 594368556), simde_mm_set_epi8(INT8_C( 26), INT8_C( -78), INT8_C( 32), INT8_C( 10), INT8_C(-126), INT8_C( 64), INT8_C( 35), INT8_C( -54), INT8_C( -42), INT8_C( -70), INT8_C( 114), INT8_C( 111), INT8_C( 111), INT8_C( 11), INT8_C( 104), INT8_C( 39)), simde_mm512_set_epi8(INT8_C( 67), INT8_C(-107), INT8_C( 82), INT8_C( 55), INT8_C( 64), INT8_C( 72), INT8_C( -53), INT8_C( 66), INT8_C( -50), INT8_C( 103), INT8_C( -13), INT8_C( 78), INT8_C( 15), INT8_C( 32), INT8_C( 76), INT8_C( 78), INT8_C( 28), INT8_C( -98), INT8_C(-128), INT8_C( 80), INT8_C( 106), INT8_C( -45), INT8_C( 79), INT8_C( 116), INT8_C( 23), INT8_C( 31), INT8_C( 117), INT8_C( -12), INT8_C( -59), INT8_C( -16), INT8_C( 98), INT8_C( -49), INT8_C( 116), INT8_C( -82), INT8_C( 39), INT8_C( 1), INT8_C( 30), INT8_C(-100), INT8_C( 39), INT8_C( 39), INT8_C( 26), INT8_C( 39), INT8_C( 39), INT8_C( -85), INT8_C( 39), INT8_C( 39), INT8_C( -93), INT8_C( 39), INT8_C( 21), INT8_C( 39), INT8_C( 119), INT8_C( 39), INT8_C( 39), INT8_C( -53), INT8_C( 58), INT8_C( 33), INT8_C( 14), INT8_C( 114), INT8_C( 39), INT8_C( 109), INT8_C( 39), INT8_C( 39), INT8_C( -81), INT8_C( 16)) }, { simde_mm512_set_epi8(INT8_C( 124), INT8_C( 71), INT8_C(-128), INT8_C( 110), INT8_C(-123), INT8_C( -14), INT8_C( 123), INT8_C( -42), INT8_C( 94), INT8_C( 60), INT8_C( 116), INT8_C( -89), INT8_C( 73), INT8_C( -61), INT8_C( -3), INT8_C(-114), INT8_C( -92), INT8_C( -78), INT8_C( 90), INT8_C( 44), INT8_C( -84), INT8_C( -33), INT8_C( 116), INT8_C( -6), INT8_C( -44), INT8_C( 126), INT8_C( -26), INT8_C( 80), INT8_C( -91), INT8_C(-125), INT8_C( 72), INT8_C( -8), INT8_C( -16), INT8_C( 95), INT8_C( -25), INT8_C( -16), INT8_C( -52), INT8_C( 116), INT8_C( -23), INT8_C(-102), INT8_C( 119), INT8_C( -76), INT8_C( 48), INT8_C( 26), INT8_C(-128), INT8_C( 43), INT8_C( 99), INT8_C( -34), INT8_C(-103), INT8_C( -40), INT8_C( 47), INT8_C(-112), INT8_C(-117), INT8_C( 111), INT8_C(-126), INT8_C(-115), INT8_C( 65), INT8_C( -55), INT8_C( 49), INT8_C( 37), INT8_C(-110), INT8_C(-124), INT8_C( 126), INT8_C( -2)), UINT64_C( 1610616610), simde_mm_set_epi8(INT8_C( -95), INT8_C( 29), INT8_C( -58), INT8_C( -87), INT8_C( 73), INT8_C( 12), INT8_C( -29), INT8_C( 41), INT8_C( -96), INT8_C( 122), INT8_C( -95), INT8_C( -33), INT8_C(-128), INT8_C( 2), INT8_C( 115), INT8_C( 108)), simde_mm512_set_epi8(INT8_C( 124), INT8_C( 71), INT8_C(-128), INT8_C( 110), INT8_C(-123), INT8_C( -14), INT8_C( 123), INT8_C( -42), INT8_C( 94), INT8_C( 60), INT8_C( 116), INT8_C( -89), INT8_C( 73), INT8_C( -61), INT8_C( -3), INT8_C(-114), INT8_C( -92), INT8_C( -78), INT8_C( 90), INT8_C( 44), INT8_C( -84), INT8_C( -33), INT8_C( 116), INT8_C( -6), INT8_C( -44), INT8_C( 126), INT8_C( -26), INT8_C( 80), INT8_C( -91), INT8_C(-125), INT8_C( 72), INT8_C( -8), INT8_C( -16), INT8_C( 108), INT8_C( 108), INT8_C( -16), INT8_C( -52), INT8_C( 116), INT8_C( -23), INT8_C(-102), INT8_C( 119), INT8_C( -76), INT8_C( 48), INT8_C( 26), INT8_C(-128), INT8_C( 43), INT8_C( 99), INT8_C( -34), INT8_C(-103), INT8_C( -40), INT8_C( 47), INT8_C(-112), INT8_C( 108), INT8_C( 108), INT8_C( 108), INT8_C( 108), INT8_C( 65), INT8_C( -55), INT8_C( 108), INT8_C( 37), INT8_C(-110), INT8_C(-124), INT8_C( 108), INT8_C( -2)) }, { simde_mm512_set_epi8(INT8_C( 73), INT8_C( -95), INT8_C( -44), INT8_C( 123), INT8_C( -34), INT8_C(-122), INT8_C( 105), INT8_C( -63), INT8_C( -13), INT8_C( -78), INT8_C( -7), INT8_C( 88), INT8_C(-101), INT8_C( 60), INT8_C( 29), INT8_C( -15), INT8_C( 87), INT8_C( -77), INT8_C( 65), INT8_C( 71), INT8_C( 113), INT8_C(-124), INT8_C( -41), INT8_C( -18), INT8_C( 37), INT8_C( -20), INT8_C( 112), INT8_C( 70), INT8_C( 36), INT8_C( -80), INT8_C( 122), INT8_C( -28), INT8_C( -45), INT8_C(-113), INT8_C( 68), INT8_C( 23), INT8_C( 84), INT8_C( 56), INT8_C( -44), INT8_C( -61), INT8_C( -78), INT8_C( 6), INT8_C(-108), INT8_C( 73), INT8_C( -22), INT8_C( -71), INT8_C( 1), INT8_C( 7), INT8_C( 47), INT8_C( 18), INT8_C(-127), INT8_C( 127), INT8_C( -16), INT8_C( -48), INT8_C( -39), INT8_C( 106), INT8_C( 27), INT8_C( 40), INT8_C( -58), INT8_C( -56), INT8_C( -27), INT8_C( 17), INT8_C( 29), INT8_C( -46)), UINT64_C( 2168160586), simde_mm_set_epi8(INT8_C( 45), INT8_C( 89), INT8_C( -40), INT8_C( 94), INT8_C( -55), INT8_C( -34), INT8_C(-119), INT8_C(-109), INT8_C( 3), INT8_C(-117), INT8_C(-101), INT8_C( 63), INT8_C( 122), INT8_C( -4), INT8_C(-100), INT8_C( -84)), simde_mm512_set_epi8(INT8_C( 73), INT8_C( -95), INT8_C( -44), INT8_C( 123), INT8_C( -34), INT8_C(-122), INT8_C( 105), INT8_C( -63), INT8_C( -13), INT8_C( -78), INT8_C( -7), INT8_C( 88), INT8_C(-101), INT8_C( 60), INT8_C( 29), INT8_C( -15), INT8_C( 87), INT8_C( -77), INT8_C( 65), INT8_C( 71), INT8_C( 113), INT8_C(-124), INT8_C( -41), INT8_C( -18), INT8_C( 37), INT8_C( -20), INT8_C( 112), INT8_C( 70), INT8_C( 36), INT8_C( -80), INT8_C( 122), INT8_C( -28), INT8_C( -84), INT8_C(-113), INT8_C( 68), INT8_C( 23), INT8_C( 84), INT8_C( 56), INT8_C( -44), INT8_C( -84), INT8_C( -78), INT8_C( 6), INT8_C( -84), INT8_C( -84), INT8_C( -84), INT8_C( -71), INT8_C( -84), INT8_C( -84), INT8_C( -84), INT8_C( 18), INT8_C(-127), INT8_C( 127), INT8_C( -16), INT8_C( -48), INT8_C( -39), INT8_C( -84), INT8_C( 27), INT8_C( -84), INT8_C( -58), INT8_C( -56), INT8_C( -84), INT8_C( 17), INT8_C( -84), INT8_C( -46)) }, { simde_mm512_set_epi8(INT8_C( 38), INT8_C( -12), INT8_C( -37), INT8_C( 58), INT8_C( 89), INT8_C(-127), INT8_C( -11), INT8_C( 26), INT8_C( -29), INT8_C(-122), INT8_C( 86), INT8_C( 69), INT8_C( 63), INT8_C( 74), INT8_C( 90), INT8_C( 88), INT8_C( -75), INT8_C( -43), INT8_C( 36), INT8_C( 61), INT8_C( -19), INT8_C( 27), INT8_C(-123), INT8_C( 78), INT8_C( 67), INT8_C( 58), INT8_C( -32), INT8_C( 42), INT8_C( 25), INT8_C( -26), INT8_C( 122), INT8_C(-100), INT8_C(-107), INT8_C( -53), INT8_C(-114), INT8_C( 63), INT8_C(-100), INT8_C( 53), INT8_C( -32), INT8_C( -39), INT8_C( -75), INT8_C(-119), INT8_C( -67), INT8_C( 96), INT8_C( -6), INT8_C( -22), INT8_C( -12), INT8_C( 19), INT8_C( -51), INT8_C( 42), INT8_C( 39), INT8_C(-124), INT8_C( 38), INT8_C( -95), INT8_C(-119), INT8_C( -9), INT8_C( 94), INT8_C( -51), INT8_C( 1), INT8_C( -64), INT8_C( -67), INT8_C(-127), INT8_C( -33), INT8_C( 75)), UINT64_C( 3579095368), simde_mm_set_epi8(INT8_C( -71), INT8_C(-112), INT8_C(-122), INT8_C( -13), INT8_C(-109), INT8_C( 21), INT8_C( 27), INT8_C(-109), INT8_C( 55), INT8_C( 9), INT8_C( 117), INT8_C( -28), INT8_C( -58), INT8_C( -1), INT8_C( 3), INT8_C( -34)), simde_mm512_set_epi8(INT8_C( 38), INT8_C( -12), INT8_C( -37), INT8_C( 58), INT8_C( 89), INT8_C(-127), INT8_C( -11), INT8_C( 26), INT8_C( -29), INT8_C(-122), INT8_C( 86), INT8_C( 69), INT8_C( 63), INT8_C( 74), INT8_C( 90), INT8_C( 88), INT8_C( -75), INT8_C( -43), INT8_C( 36), INT8_C( 61), INT8_C( -19), INT8_C( 27), INT8_C(-123), INT8_C( 78), INT8_C( 67), INT8_C( 58), INT8_C( -32), INT8_C( 42), INT8_C( 25), INT8_C( -26), INT8_C( 122), INT8_C(-100), INT8_C( -34), INT8_C( -34), INT8_C(-114), INT8_C( -34), INT8_C(-100), INT8_C( -34), INT8_C( -32), INT8_C( -34), INT8_C( -75), INT8_C( -34), INT8_C( -67), INT8_C( -34), INT8_C( -6), INT8_C( -34), INT8_C( -12), INT8_C( 19), INT8_C( -34), INT8_C( 42), INT8_C( -34), INT8_C(-124), INT8_C( -34), INT8_C( -95), INT8_C(-119), INT8_C( -34), INT8_C( 94), INT8_C( -34), INT8_C( 1), INT8_C( -64), INT8_C( -34), INT8_C(-127), INT8_C( -33), INT8_C( 75)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_broadcastb_epi8(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_broadcastb_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask64 k; simde__m128i a; simde__m512i r; } test_vec[8] = { { UINT64_C( 2081702095), simde_mm_set_epi8(INT8_C( 126), INT8_C( -6), INT8_C( 16), INT8_C( 102), INT8_C( -47), INT8_C(-116), INT8_C( -4), INT8_C( 33), INT8_C( -25), INT8_C(-108), INT8_C(-115), INT8_C(-104), INT8_C( -39), INT8_C( 49), INT8_C( 72), INT8_C( 44)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 44), INT8_C( 44), INT8_C( 44), INT8_C( 44), INT8_C( 44), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 44), INT8_C( 0), INT8_C( 44), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 44), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 44), INT8_C( 44), INT8_C( 0), INT8_C( 0), INT8_C( 44), INT8_C( 44), INT8_C( 44), INT8_C( 44)) }, { UINT64_C( 4229458596), simde_mm_set_epi8(INT8_C(-106), INT8_C(-123), INT8_C( 120), INT8_C( 43), INT8_C( -31), INT8_C( 4), INT8_C( 10), INT8_C( 96), INT8_C( -40), INT8_C( 23), INT8_C( 31), INT8_C( 73), INT8_C( -51), INT8_C( 91), INT8_C( 68), INT8_C( -23)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -23), INT8_C( -23), INT8_C( -23), INT8_C( -23), INT8_C( -23), INT8_C( -23), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -23), INT8_C( -23), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -23), INT8_C( -23), INT8_C( 0), INT8_C( -23), INT8_C( 0), INT8_C( -23), INT8_C( 0), INT8_C( -23), INT8_C( 0), INT8_C( -23), INT8_C( 0), INT8_C( 0), INT8_C( -23), INT8_C( 0), INT8_C( 0)) }, { UINT64_C( 3399230491), simde_mm_set_epi8(INT8_C( -40), INT8_C( -29), INT8_C( 78), INT8_C( 94), INT8_C( -79), INT8_C( 10), INT8_C(-103), INT8_C(-109), INT8_C( 65), INT8_C( -68), INT8_C( 102), INT8_C(-122), INT8_C( 40), INT8_C( 19), INT8_C(-111), INT8_C( 8)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 8), INT8_C( 8), INT8_C( 0), INT8_C( 0), INT8_C( 8), INT8_C( 0), INT8_C( 8), INT8_C( 0), INT8_C( 8), INT8_C( 0), INT8_C( 0), INT8_C( 8), INT8_C( 8), INT8_C( 8), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 8), INT8_C( 0), INT8_C( 0), INT8_C( 8), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 8), INT8_C( 8), INT8_C( 0), INT8_C( 8), INT8_C( 8)) }, { UINT64_C( 2871199873), simde_mm_set_epi8(INT8_C( 27), INT8_C( 40), INT8_C( 10), INT8_C(-105), INT8_C( 76), INT8_C(-101), INT8_C( 87), INT8_C( 112), INT8_C( -78), INT8_C( -60), INT8_C(-122), INT8_C( -66), INT8_C( 8), INT8_C( -42), INT8_C( 44), INT8_C( 45)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 45), INT8_C( 0), INT8_C( 45), INT8_C( 0), INT8_C( 45), INT8_C( 0), INT8_C( 45), INT8_C( 45), INT8_C( 0), INT8_C( 0), INT8_C( 45), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 45), INT8_C( 45), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 45), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 45), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 45)) }, { UINT64_C( 1884717912), simde_mm_set_epi8(INT8_C( 48), INT8_C( 40), INT8_C(-108), INT8_C( -12), INT8_C( -11), INT8_C( -71), INT8_C(-114), INT8_C( -36), INT8_C( -92), INT8_C( 101), INT8_C( 30), INT8_C( 10), INT8_C( 43), INT8_C(-116), INT8_C( -45), INT8_C(-104)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-104), INT8_C(-104), INT8_C(-104), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-104), INT8_C( 0), INT8_C(-104), INT8_C( 0), INT8_C(-104), INT8_C(-104), INT8_C( 0), INT8_C(-104), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-104), INT8_C(-104), INT8_C( 0), INT8_C(-104), INT8_C( 0), INT8_C(-104), INT8_C(-104), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { UINT64_C( 867744017), simde_mm_set_epi8(INT8_C( 104), INT8_C( 101), INT8_C( 53), INT8_C(-121), INT8_C( 102), INT8_C(-115), INT8_C( 90), INT8_C( 31), INT8_C( 11), INT8_C( 68), INT8_C( 48), INT8_C( 4), INT8_C( 55), INT8_C( -83), INT8_C( 75), INT8_C( -60)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -60), INT8_C( -60), INT8_C( 0), INT8_C( 0), INT8_C( -60), INT8_C( -60), INT8_C( -60), INT8_C( 0), INT8_C( -60), INT8_C( -60), INT8_C( -60), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -60), INT8_C( 0), INT8_C( -60), INT8_C( -60), INT8_C( -60), INT8_C( 0), INT8_C( 0), INT8_C( -60), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -60), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -60)) }, { UINT64_C( 2018049220), simde_mm_set_epi8(INT8_C( 52), INT8_C( -63), INT8_C( -88), INT8_C( -55), INT8_C( 90), INT8_C( -15), INT8_C( -11), INT8_C( -21), INT8_C( 100), INT8_C( -84), INT8_C( -92), INT8_C( -78), INT8_C( 27), INT8_C( 91), INT8_C( 46), INT8_C(-117)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-117), INT8_C( 0), INT8_C( 0), INT8_C(-117), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C(-117), INT8_C( 0), INT8_C( 0), INT8_C(-117), INT8_C(-117), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-117), INT8_C( 0), INT8_C( 0)) }, { UINT64_C( 1858505628), simde_mm_set_epi8(INT8_C( 103), INT8_C( -85), INT8_C( -29), INT8_C( 86), INT8_C( 71), INT8_C( 28), INT8_C( -23), INT8_C( 28), INT8_C( -53), INT8_C( -82), INT8_C( 58), INT8_C( -12), INT8_C( 63), INT8_C( 39), INT8_C( -32), INT8_C( -94)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -94), INT8_C( -94), INT8_C( 0), INT8_C( -94), INT8_C( -94), INT8_C( -94), INT8_C( 0), INT8_C( -94), INT8_C( -94), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -94), INT8_C( -94), INT8_C( 0), INT8_C( -94), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -94), INT8_C( 0), INT8_C( -94), INT8_C( -94), INT8_C( -94), INT8_C( 0), INT8_C( 0), INT8_C( -94), INT8_C( -94), INT8_C( -94), INT8_C( 0), INT8_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_broadcastb_epi8(test_vec[i].k, test_vec[i].a); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_broadcastw_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m512i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C( -4264), INT16_C(-31095), INT16_C( 28503), INT16_C(-19429), INT16_C(-23560), INT16_C( -4328), INT16_C( 17780), INT16_C(-19836)), simde_mm512_set_epi16(INT16_C(-19836), INT16_C(-19836), INT16_C(-19836), INT16_C(-19836), INT16_C(-19836), INT16_C(-19836), INT16_C(-19836), INT16_C(-19836), INT16_C(-19836), INT16_C(-19836), INT16_C(-19836), INT16_C(-19836), INT16_C(-19836), INT16_C(-19836), INT16_C(-19836), INT16_C(-19836), INT16_C(-19836), INT16_C(-19836), INT16_C(-19836), INT16_C(-19836), INT16_C(-19836), INT16_C(-19836), INT16_C(-19836), INT16_C(-19836), INT16_C(-19836), INT16_C(-19836), INT16_C(-19836), INT16_C(-19836), INT16_C(-19836), INT16_C(-19836), INT16_C(-19836), INT16_C(-19836)) }, { simde_mm_set_epi16(INT16_C( -1138), INT16_C(-21762), INT16_C( 8538), INT16_C(-12772), INT16_C( 3852), INT16_C( 8246), INT16_C( -3641), INT16_C( 9422)), simde_mm512_set_epi16(INT16_C( 9422), INT16_C( 9422), INT16_C( 9422), INT16_C( 9422), INT16_C( 9422), INT16_C( 9422), INT16_C( 9422), INT16_C( 9422), INT16_C( 9422), INT16_C( 9422), INT16_C( 9422), INT16_C( 9422), INT16_C( 9422), INT16_C( 9422), INT16_C( 9422), INT16_C( 9422), INT16_C( 9422), INT16_C( 9422), INT16_C( 9422), INT16_C( 9422), INT16_C( 9422), INT16_C( 9422), INT16_C( 9422), INT16_C( 9422), INT16_C( 9422), INT16_C( 9422), INT16_C( 9422), INT16_C( 9422), INT16_C( 9422), INT16_C( 9422), INT16_C( 9422), INT16_C( 9422)) }, { simde_mm_set_epi16(INT16_C(-12364), INT16_C(-15754), INT16_C( -793), INT16_C(-14722), INT16_C(-29314), INT16_C(-26497), INT16_C( 3881), INT16_C( 17439)), simde_mm512_set_epi16(INT16_C( 17439), INT16_C( 17439), INT16_C( 17439), INT16_C( 17439), INT16_C( 17439), INT16_C( 17439), INT16_C( 17439), INT16_C( 17439), INT16_C( 17439), INT16_C( 17439), INT16_C( 17439), INT16_C( 17439), INT16_C( 17439), INT16_C( 17439), INT16_C( 17439), INT16_C( 17439), INT16_C( 17439), INT16_C( 17439), INT16_C( 17439), INT16_C( 17439), INT16_C( 17439), INT16_C( 17439), INT16_C( 17439), INT16_C( 17439), INT16_C( 17439), INT16_C( 17439), INT16_C( 17439), INT16_C( 17439), INT16_C( 17439), INT16_C( 17439), INT16_C( 17439), INT16_C( 17439)) }, { simde_mm_set_epi16(INT16_C( -7447), INT16_C(-10523), INT16_C(-25861), INT16_C(-22174), INT16_C( 8521), INT16_C( 32120), INT16_C(-17861), INT16_C( 31790)), simde_mm512_set_epi16(INT16_C( 31790), INT16_C( 31790), INT16_C( 31790), INT16_C( 31790), INT16_C( 31790), INT16_C( 31790), INT16_C( 31790), INT16_C( 31790), INT16_C( 31790), INT16_C( 31790), INT16_C( 31790), INT16_C( 31790), INT16_C( 31790), INT16_C( 31790), INT16_C( 31790), INT16_C( 31790), INT16_C( 31790), INT16_C( 31790), INT16_C( 31790), INT16_C( 31790), INT16_C( 31790), INT16_C( 31790), INT16_C( 31790), INT16_C( 31790), INT16_C( 31790), INT16_C( 31790), INT16_C( 31790), INT16_C( 31790), INT16_C( 31790), INT16_C( 31790), INT16_C( 31790), INT16_C( 31790)) }, { simde_mm_set_epi16(INT16_C( -4580), INT16_C( -4681), INT16_C( -4797), INT16_C( 20435), INT16_C(-31664), INT16_C(-25722), INT16_C(-13794), INT16_C( -4041)), simde_mm512_set_epi16(INT16_C( -4041), INT16_C( -4041), INT16_C( -4041), INT16_C( -4041), INT16_C( -4041), INT16_C( -4041), INT16_C( -4041), INT16_C( -4041), INT16_C( -4041), INT16_C( -4041), INT16_C( -4041), INT16_C( -4041), INT16_C( -4041), INT16_C( -4041), INT16_C( -4041), INT16_C( -4041), INT16_C( -4041), INT16_C( -4041), INT16_C( -4041), INT16_C( -4041), INT16_C( -4041), INT16_C( -4041), INT16_C( -4041), INT16_C( -4041), INT16_C( -4041), INT16_C( -4041), INT16_C( -4041), INT16_C( -4041), INT16_C( -4041), INT16_C( -4041), INT16_C( -4041), INT16_C( -4041)) }, { simde_mm_set_epi16(INT16_C( 1787), INT16_C( 9631), INT16_C( 4347), INT16_C( -4594), INT16_C(-30523), INT16_C(-10849), INT16_C(-17993), INT16_C(-18482)), simde_mm512_set_epi16(INT16_C(-18482), INT16_C(-18482), INT16_C(-18482), INT16_C(-18482), INT16_C(-18482), INT16_C(-18482), INT16_C(-18482), INT16_C(-18482), INT16_C(-18482), INT16_C(-18482), INT16_C(-18482), INT16_C(-18482), INT16_C(-18482), INT16_C(-18482), INT16_C(-18482), INT16_C(-18482), INT16_C(-18482), INT16_C(-18482), INT16_C(-18482), INT16_C(-18482), INT16_C(-18482), INT16_C(-18482), INT16_C(-18482), INT16_C(-18482), INT16_C(-18482), INT16_C(-18482), INT16_C(-18482), INT16_C(-18482), INT16_C(-18482), INT16_C(-18482), INT16_C(-18482), INT16_C(-18482)) }, { simde_mm_set_epi16(INT16_C( 30524), INT16_C( 16358), INT16_C( 12856), INT16_C( 10489), INT16_C( 17653), INT16_C( -5197), INT16_C( 14483), INT16_C(-30060)), simde_mm512_set_epi16(INT16_C(-30060), INT16_C(-30060), INT16_C(-30060), INT16_C(-30060), INT16_C(-30060), INT16_C(-30060), INT16_C(-30060), INT16_C(-30060), INT16_C(-30060), INT16_C(-30060), INT16_C(-30060), INT16_C(-30060), INT16_C(-30060), INT16_C(-30060), INT16_C(-30060), INT16_C(-30060), INT16_C(-30060), INT16_C(-30060), INT16_C(-30060), INT16_C(-30060), INT16_C(-30060), INT16_C(-30060), INT16_C(-30060), INT16_C(-30060), INT16_C(-30060), INT16_C(-30060), INT16_C(-30060), INT16_C(-30060), INT16_C(-30060), INT16_C(-30060), INT16_C(-30060), INT16_C(-30060)) }, { simde_mm_set_epi16(INT16_C(-28607), INT16_C( 6822), INT16_C(-19640), INT16_C( 516), INT16_C(-13138), INT16_C( -4418), INT16_C(-29962), INT16_C( 13528)), simde_mm512_set_epi16(INT16_C( 13528), INT16_C( 13528), INT16_C( 13528), INT16_C( 13528), INT16_C( 13528), INT16_C( 13528), INT16_C( 13528), INT16_C( 13528), INT16_C( 13528), INT16_C( 13528), INT16_C( 13528), INT16_C( 13528), INT16_C( 13528), INT16_C( 13528), INT16_C( 13528), INT16_C( 13528), INT16_C( 13528), INT16_C( 13528), INT16_C( 13528), INT16_C( 13528), INT16_C( 13528), INT16_C( 13528), INT16_C( 13528), INT16_C( 13528), INT16_C( 13528), INT16_C( 13528), INT16_C( 13528), INT16_C( 13528), INT16_C( 13528), INT16_C( 13528), INT16_C( 13528), INT16_C( 13528)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_broadcastw_epi16(test_vec[i].a); simde_assert_m512i_i16(r, ==, test_vec[i].r); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm256_broadcast_f32x2) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_broadcast_f32x2) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_broadcast_f32x2) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_broadcast_f32x2) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_broadcast_f32x2) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_broadcast_f32x2) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_broadcast_f32x8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_broadcast_f32x8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_broadcast_f32x8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_broadcast_f64x2) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_broadcast_f64x2) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_broadcast_f64x2) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_broadcast_f32x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_broadcast_f32x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_broadcast_f32x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_broadcast_f64x2) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_broadcast_f64x2) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_broadcast_f64x2) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_broadcast_f32x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_broadcast_f32x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_broadcast_f32x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_broadcast_f64x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_broadcast_f64x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_broadcast_f64x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_broadcast_i32x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_broadcast_i32x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_broadcast_i32x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_broadcast_i64x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_broadcast_i64x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_broadcast_i64x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_broadcastd_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_broadcastd_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_broadcastd_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_broadcastq_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_broadcastq_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_broadcastq_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_broadcastss_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_broadcastss_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_broadcastss_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_broadcastsd_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_broadcastsd_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_broadcastsd_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_broadcastb_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_broadcastb_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_broadcastb_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_broadcastw_epi16) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/cast.c000066400000000000000000003234761400333146700163710ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #define SIMDE_TEST_X86_AVX512_INSN cast #include #include #include #include static int test_simde_mm512_castpd512_pd128(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d a; simde__m128d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( -503.58), SIMDE_FLOAT64_C( 409.99), SIMDE_FLOAT64_C( 882.79), SIMDE_FLOAT64_C( 967.41), SIMDE_FLOAT64_C( 644.16), SIMDE_FLOAT64_C( 22.93), SIMDE_FLOAT64_C( 629.49), SIMDE_FLOAT64_C( -19.16)), simde_mm_set_pd(SIMDE_FLOAT64_C( 629.49), SIMDE_FLOAT64_C( -19.16)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -371.76), SIMDE_FLOAT64_C( -903.70), SIMDE_FLOAT64_C( -183.89), SIMDE_FLOAT64_C( -1.20), SIMDE_FLOAT64_C( -62.86), SIMDE_FLOAT64_C( 444.19), SIMDE_FLOAT64_C( 672.79), SIMDE_FLOAT64_C( 471.18)), simde_mm_set_pd(SIMDE_FLOAT64_C( 672.79), SIMDE_FLOAT64_C( 471.18)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -771.58), SIMDE_FLOAT64_C( 494.03), SIMDE_FLOAT64_C( 138.84), SIMDE_FLOAT64_C( -195.59), SIMDE_FLOAT64_C( -760.58), SIMDE_FLOAT64_C( -773.64), SIMDE_FLOAT64_C( 870.25), SIMDE_FLOAT64_C( 314.25)), simde_mm_set_pd(SIMDE_FLOAT64_C( 870.25), SIMDE_FLOAT64_C( 314.25)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -2.05), SIMDE_FLOAT64_C( -418.62), SIMDE_FLOAT64_C( -507.96), SIMDE_FLOAT64_C( -261.33), SIMDE_FLOAT64_C( -683.39), SIMDE_FLOAT64_C( -27.96), SIMDE_FLOAT64_C( 534.55), SIMDE_FLOAT64_C( 185.44)), simde_mm_set_pd(SIMDE_FLOAT64_C( 534.55), SIMDE_FLOAT64_C( 185.44)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 596.68), SIMDE_FLOAT64_C( -902.04), SIMDE_FLOAT64_C( 847.76), SIMDE_FLOAT64_C( 134.78), SIMDE_FLOAT64_C( 116.62), SIMDE_FLOAT64_C( 7.30), SIMDE_FLOAT64_C( 382.57), SIMDE_FLOAT64_C( 434.75)), simde_mm_set_pd(SIMDE_FLOAT64_C( 382.57), SIMDE_FLOAT64_C( 434.75)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 688.01), SIMDE_FLOAT64_C( 829.27), SIMDE_FLOAT64_C( -970.03), SIMDE_FLOAT64_C( 528.04), SIMDE_FLOAT64_C( -386.08), SIMDE_FLOAT64_C( 603.97), SIMDE_FLOAT64_C( -132.68), SIMDE_FLOAT64_C( 463.66)), simde_mm_set_pd(SIMDE_FLOAT64_C( -132.68), SIMDE_FLOAT64_C( 463.66)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 235.78), SIMDE_FLOAT64_C( -244.37), SIMDE_FLOAT64_C( 846.69), SIMDE_FLOAT64_C( 794.12), SIMDE_FLOAT64_C( 672.11), SIMDE_FLOAT64_C( -445.78), SIMDE_FLOAT64_C( 396.01), SIMDE_FLOAT64_C( -284.10)), simde_mm_set_pd(SIMDE_FLOAT64_C( 396.01), SIMDE_FLOAT64_C( -284.10)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 603.46), SIMDE_FLOAT64_C( -988.15), SIMDE_FLOAT64_C( -342.31), SIMDE_FLOAT64_C( -282.11), SIMDE_FLOAT64_C( 379.72), SIMDE_FLOAT64_C( -680.63), SIMDE_FLOAT64_C( -541.51), SIMDE_FLOAT64_C( -40.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( -541.51), SIMDE_FLOAT64_C( -40.90)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm512_castpd512_pd128(test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_castpd512_pd256(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d a; simde__m256d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( 414.39), SIMDE_FLOAT64_C( -15.63), SIMDE_FLOAT64_C( 546.05), SIMDE_FLOAT64_C( -960.01), SIMDE_FLOAT64_C( -752.11), SIMDE_FLOAT64_C( -702.83), SIMDE_FLOAT64_C( -642.16), SIMDE_FLOAT64_C( 617.65)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -752.11), SIMDE_FLOAT64_C( -702.83), SIMDE_FLOAT64_C( -642.16), SIMDE_FLOAT64_C( 617.65)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -964.21), SIMDE_FLOAT64_C( -388.73), SIMDE_FLOAT64_C( 322.52), SIMDE_FLOAT64_C( 699.65), SIMDE_FLOAT64_C( 390.02), SIMDE_FLOAT64_C( -490.64), SIMDE_FLOAT64_C( -297.41), SIMDE_FLOAT64_C( 563.23)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 390.02), SIMDE_FLOAT64_C( -490.64), SIMDE_FLOAT64_C( -297.41), SIMDE_FLOAT64_C( 563.23)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 920.55), SIMDE_FLOAT64_C( 39.56), SIMDE_FLOAT64_C( 750.72), SIMDE_FLOAT64_C( 440.72), SIMDE_FLOAT64_C( -246.45), SIMDE_FLOAT64_C( 981.88), SIMDE_FLOAT64_C( -299.81), SIMDE_FLOAT64_C( -98.82)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -246.45), SIMDE_FLOAT64_C( 981.88), SIMDE_FLOAT64_C( -299.81), SIMDE_FLOAT64_C( -98.82)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -471.70), SIMDE_FLOAT64_C( -359.51), SIMDE_FLOAT64_C( 724.77), SIMDE_FLOAT64_C( -820.10), SIMDE_FLOAT64_C( 777.44), SIMDE_FLOAT64_C( -220.31), SIMDE_FLOAT64_C( 567.27), SIMDE_FLOAT64_C( 977.08)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 777.44), SIMDE_FLOAT64_C( -220.31), SIMDE_FLOAT64_C( 567.27), SIMDE_FLOAT64_C( 977.08)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 438.55), SIMDE_FLOAT64_C( -230.11), SIMDE_FLOAT64_C( 759.60), SIMDE_FLOAT64_C( 615.37), SIMDE_FLOAT64_C( -356.18), SIMDE_FLOAT64_C( 777.70), SIMDE_FLOAT64_C( -385.32), SIMDE_FLOAT64_C( -478.17)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -356.18), SIMDE_FLOAT64_C( 777.70), SIMDE_FLOAT64_C( -385.32), SIMDE_FLOAT64_C( -478.17)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -337.84), SIMDE_FLOAT64_C( -274.76), SIMDE_FLOAT64_C( 865.32), SIMDE_FLOAT64_C( -257.44), SIMDE_FLOAT64_C( 325.76), SIMDE_FLOAT64_C( -67.76), SIMDE_FLOAT64_C( -238.87), SIMDE_FLOAT64_C( -384.04)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 325.76), SIMDE_FLOAT64_C( -67.76), SIMDE_FLOAT64_C( -238.87), SIMDE_FLOAT64_C( -384.04)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 120.04), SIMDE_FLOAT64_C( 416.14), SIMDE_FLOAT64_C( 299.72), SIMDE_FLOAT64_C( 991.84), SIMDE_FLOAT64_C( 325.55), SIMDE_FLOAT64_C( 685.94), SIMDE_FLOAT64_C( -182.11), SIMDE_FLOAT64_C( 943.15)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 325.55), SIMDE_FLOAT64_C( 685.94), SIMDE_FLOAT64_C( -182.11), SIMDE_FLOAT64_C( 943.15)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 556.40), SIMDE_FLOAT64_C( -775.19), SIMDE_FLOAT64_C( -814.91), SIMDE_FLOAT64_C( 73.06), SIMDE_FLOAT64_C( 317.95), SIMDE_FLOAT64_C( -552.86), SIMDE_FLOAT64_C( -767.41), SIMDE_FLOAT64_C( 130.66)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 317.95), SIMDE_FLOAT64_C( -552.86), SIMDE_FLOAT64_C( -767.41), SIMDE_FLOAT64_C( 130.66)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm512_castpd512_pd256(test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_castpd128_pd512(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m512d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 605.48), SIMDE_FLOAT64_C( 349.95)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 605.48), SIMDE_FLOAT64_C( 349.95)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 125.01), SIMDE_FLOAT64_C( -52.81)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 125.01), SIMDE_FLOAT64_C( -52.81)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -137.83), SIMDE_FLOAT64_C( -367.46)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -137.83), SIMDE_FLOAT64_C( -367.46)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -706.70), SIMDE_FLOAT64_C( -54.57)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -706.70), SIMDE_FLOAT64_C( -54.57)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -477.44), SIMDE_FLOAT64_C( -678.80)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -477.44), SIMDE_FLOAT64_C( -678.80)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -750.01), SIMDE_FLOAT64_C( 943.83)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -750.01), SIMDE_FLOAT64_C( 943.83)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 329.32), SIMDE_FLOAT64_C( 600.02)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 329.32), SIMDE_FLOAT64_C( 600.02)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 495.92), SIMDE_FLOAT64_C( 855.97)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 495.92), SIMDE_FLOAT64_C( 855.97)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_castpd128_pd512(test_vec[i].a); r=simde_mm512_maskz_mov_pd(UINT8_C(3), r); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_castpd256_pd512(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m512d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -610.76), SIMDE_FLOAT64_C( -445.99), SIMDE_FLOAT64_C( -292.19), SIMDE_FLOAT64_C( 327.18)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -610.76), SIMDE_FLOAT64_C( -445.99), SIMDE_FLOAT64_C( -292.19), SIMDE_FLOAT64_C( 327.18)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 498.95), SIMDE_FLOAT64_C( 765.72), SIMDE_FLOAT64_C( -978.89), SIMDE_FLOAT64_C( -427.55)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 498.95), SIMDE_FLOAT64_C( 765.72), SIMDE_FLOAT64_C( -978.89), SIMDE_FLOAT64_C( -427.55)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -991.71), SIMDE_FLOAT64_C( -66.46), SIMDE_FLOAT64_C( -87.69), SIMDE_FLOAT64_C( 225.73)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -991.71), SIMDE_FLOAT64_C( -66.46), SIMDE_FLOAT64_C( -87.69), SIMDE_FLOAT64_C( 225.73)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -983.02), SIMDE_FLOAT64_C( 768.15), SIMDE_FLOAT64_C( 423.32), SIMDE_FLOAT64_C( -601.96)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -983.02), SIMDE_FLOAT64_C( 768.15), SIMDE_FLOAT64_C( 423.32), SIMDE_FLOAT64_C( -601.96)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 269.59), SIMDE_FLOAT64_C( -12.98), SIMDE_FLOAT64_C( 833.22), SIMDE_FLOAT64_C( -167.34)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 269.59), SIMDE_FLOAT64_C( -12.98), SIMDE_FLOAT64_C( 833.22), SIMDE_FLOAT64_C( -167.34)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -976.56), SIMDE_FLOAT64_C( -631.12), SIMDE_FLOAT64_C( -613.64), SIMDE_FLOAT64_C( -574.89)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -976.56), SIMDE_FLOAT64_C( -631.12), SIMDE_FLOAT64_C( -613.64), SIMDE_FLOAT64_C( -574.89)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 900.29), SIMDE_FLOAT64_C( 197.75), SIMDE_FLOAT64_C( -976.11), SIMDE_FLOAT64_C( -958.45)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 900.29), SIMDE_FLOAT64_C( 197.75), SIMDE_FLOAT64_C( -976.11), SIMDE_FLOAT64_C( -958.45)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -467.55), SIMDE_FLOAT64_C( -302.23), SIMDE_FLOAT64_C( 207.74), SIMDE_FLOAT64_C( -217.89)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -467.55), SIMDE_FLOAT64_C( -302.23), SIMDE_FLOAT64_C( 207.74), SIMDE_FLOAT64_C( -217.89)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_castpd256_pd512(test_vec[i].a); r=simde_mm512_maskz_mov_pd(UINT8_C(15), r); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_castps512_ps128(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__m128 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( 764.32), SIMDE_FLOAT32_C( -951.31), SIMDE_FLOAT32_C( 179.06), SIMDE_FLOAT32_C( -437.66), SIMDE_FLOAT32_C( 402.64), SIMDE_FLOAT32_C( 734.29), SIMDE_FLOAT32_C( 267.34), SIMDE_FLOAT32_C( 208.00), SIMDE_FLOAT32_C( 587.55), SIMDE_FLOAT32_C( -635.96), SIMDE_FLOAT32_C( -958.84), SIMDE_FLOAT32_C( -271.31), SIMDE_FLOAT32_C( 711.90), SIMDE_FLOAT32_C( 959.36), SIMDE_FLOAT32_C( 988.07), SIMDE_FLOAT32_C( -648.23)), simde_mm_set_ps(SIMDE_FLOAT32_C( 711.90), SIMDE_FLOAT32_C( 959.36), SIMDE_FLOAT32_C( 988.07), SIMDE_FLOAT32_C( -648.23)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 193.72), SIMDE_FLOAT32_C( -347.47), SIMDE_FLOAT32_C( 437.71), SIMDE_FLOAT32_C( -756.77), SIMDE_FLOAT32_C( -591.14), SIMDE_FLOAT32_C( -431.84), SIMDE_FLOAT32_C( -564.12), SIMDE_FLOAT32_C( 252.26), SIMDE_FLOAT32_C( -784.50), SIMDE_FLOAT32_C( 149.30), SIMDE_FLOAT32_C( -246.02), SIMDE_FLOAT32_C( -60.58), SIMDE_FLOAT32_C( 384.90), SIMDE_FLOAT32_C( 156.04), SIMDE_FLOAT32_C( -595.88), SIMDE_FLOAT32_C( -574.45)), simde_mm_set_ps(SIMDE_FLOAT32_C( 384.90), SIMDE_FLOAT32_C( 156.04), SIMDE_FLOAT32_C( -595.88), SIMDE_FLOAT32_C( -574.45)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -425.80), SIMDE_FLOAT32_C( 886.22), SIMDE_FLOAT32_C( -478.46), SIMDE_FLOAT32_C( -993.15), SIMDE_FLOAT32_C( -912.29), SIMDE_FLOAT32_C( -564.52), SIMDE_FLOAT32_C( -25.07), SIMDE_FLOAT32_C( -469.67), SIMDE_FLOAT32_C( 828.84), SIMDE_FLOAT32_C( -525.49), SIMDE_FLOAT32_C( -521.07), SIMDE_FLOAT32_C( 132.36), SIMDE_FLOAT32_C( 368.34), SIMDE_FLOAT32_C( 996.83), SIMDE_FLOAT32_C( 514.03), SIMDE_FLOAT32_C( 350.58)), simde_mm_set_ps(SIMDE_FLOAT32_C( 368.34), SIMDE_FLOAT32_C( 996.83), SIMDE_FLOAT32_C( 514.03), SIMDE_FLOAT32_C( 350.58)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -806.26), SIMDE_FLOAT32_C( -958.10), SIMDE_FLOAT32_C( -61.10), SIMDE_FLOAT32_C( 698.78), SIMDE_FLOAT32_C( 893.49), SIMDE_FLOAT32_C( -774.73), SIMDE_FLOAT32_C( -469.54), SIMDE_FLOAT32_C( 88.88), SIMDE_FLOAT32_C( -797.01), SIMDE_FLOAT32_C( -143.15), SIMDE_FLOAT32_C( -201.38), SIMDE_FLOAT32_C( -439.08), SIMDE_FLOAT32_C( 122.21), SIMDE_FLOAT32_C( 251.76), SIMDE_FLOAT32_C( 853.77), SIMDE_FLOAT32_C( -306.48)), simde_mm_set_ps(SIMDE_FLOAT32_C( 122.21), SIMDE_FLOAT32_C( 251.76), SIMDE_FLOAT32_C( 853.77), SIMDE_FLOAT32_C( -306.48)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 995.27), SIMDE_FLOAT32_C( 762.38), SIMDE_FLOAT32_C( 849.07), SIMDE_FLOAT32_C( 533.84), SIMDE_FLOAT32_C( 128.75), SIMDE_FLOAT32_C( 581.04), SIMDE_FLOAT32_C( 680.92), SIMDE_FLOAT32_C( 97.54), SIMDE_FLOAT32_C( 183.54), SIMDE_FLOAT32_C( 89.55), SIMDE_FLOAT32_C( -801.20), SIMDE_FLOAT32_C( -458.50), SIMDE_FLOAT32_C( -944.26), SIMDE_FLOAT32_C( -648.24), SIMDE_FLOAT32_C( -216.16), SIMDE_FLOAT32_C( 273.51)), simde_mm_set_ps(SIMDE_FLOAT32_C( -944.26), SIMDE_FLOAT32_C( -648.24), SIMDE_FLOAT32_C( -216.16), SIMDE_FLOAT32_C( 273.51)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 563.30), SIMDE_FLOAT32_C( -488.59), SIMDE_FLOAT32_C( -41.61), SIMDE_FLOAT32_C( -745.38), SIMDE_FLOAT32_C( -156.54), SIMDE_FLOAT32_C( -651.09), SIMDE_FLOAT32_C( 393.55), SIMDE_FLOAT32_C( -610.90), SIMDE_FLOAT32_C( 656.44), SIMDE_FLOAT32_C( 254.16), SIMDE_FLOAT32_C( -27.43), SIMDE_FLOAT32_C( -862.48), SIMDE_FLOAT32_C( 639.71), SIMDE_FLOAT32_C( -674.10), SIMDE_FLOAT32_C( -151.15), SIMDE_FLOAT32_C( -736.28)), simde_mm_set_ps(SIMDE_FLOAT32_C( 639.71), SIMDE_FLOAT32_C( -674.10), SIMDE_FLOAT32_C( -151.15), SIMDE_FLOAT32_C( -736.28)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -997.30), SIMDE_FLOAT32_C( 958.88), SIMDE_FLOAT32_C( 414.33), SIMDE_FLOAT32_C( -486.71), SIMDE_FLOAT32_C( 347.81), SIMDE_FLOAT32_C( -73.64), SIMDE_FLOAT32_C( 301.45), SIMDE_FLOAT32_C( -935.75), SIMDE_FLOAT32_C( 554.43), SIMDE_FLOAT32_C( 239.85), SIMDE_FLOAT32_C( -172.14), SIMDE_FLOAT32_C( -200.20), SIMDE_FLOAT32_C( 383.84), SIMDE_FLOAT32_C( -4.79), SIMDE_FLOAT32_C( 659.24), SIMDE_FLOAT32_C( 178.95)), simde_mm_set_ps(SIMDE_FLOAT32_C( 383.84), SIMDE_FLOAT32_C( -4.79), SIMDE_FLOAT32_C( 659.24), SIMDE_FLOAT32_C( 178.95)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 282.49), SIMDE_FLOAT32_C( -287.66), SIMDE_FLOAT32_C( -300.74), SIMDE_FLOAT32_C( -282.12), SIMDE_FLOAT32_C( 228.49), SIMDE_FLOAT32_C( 194.42), SIMDE_FLOAT32_C( 232.99), SIMDE_FLOAT32_C( 312.60), SIMDE_FLOAT32_C( 59.18), SIMDE_FLOAT32_C( 591.94), SIMDE_FLOAT32_C( 490.41), SIMDE_FLOAT32_C( 768.05), SIMDE_FLOAT32_C( 628.85), SIMDE_FLOAT32_C( 497.59), SIMDE_FLOAT32_C( 700.20), SIMDE_FLOAT32_C( -694.51)), simde_mm_set_ps(SIMDE_FLOAT32_C( 628.85), SIMDE_FLOAT32_C( 497.59), SIMDE_FLOAT32_C( 700.20), SIMDE_FLOAT32_C( -694.51)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm512_castps512_ps128(test_vec[i].a); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_castps512_ps256(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__m256 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( 516.61), SIMDE_FLOAT32_C( 494.30), SIMDE_FLOAT32_C( 266.21), SIMDE_FLOAT32_C( 450.63), SIMDE_FLOAT32_C( -862.95), SIMDE_FLOAT32_C( -528.18), SIMDE_FLOAT32_C( 206.23), SIMDE_FLOAT32_C( -212.40), SIMDE_FLOAT32_C( 805.40), SIMDE_FLOAT32_C( -902.72), SIMDE_FLOAT32_C( -631.10), SIMDE_FLOAT32_C( -480.24), SIMDE_FLOAT32_C( 577.49), SIMDE_FLOAT32_C( 490.33), SIMDE_FLOAT32_C( -227.96), SIMDE_FLOAT32_C( 938.93)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 805.40), SIMDE_FLOAT32_C( -902.72), SIMDE_FLOAT32_C( -631.10), SIMDE_FLOAT32_C( -480.24), SIMDE_FLOAT32_C( 577.49), SIMDE_FLOAT32_C( 490.33), SIMDE_FLOAT32_C( -227.96), SIMDE_FLOAT32_C( 938.93)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 205.80), SIMDE_FLOAT32_C( -896.03), SIMDE_FLOAT32_C( -533.62), SIMDE_FLOAT32_C( -495.13), SIMDE_FLOAT32_C( 100.81), SIMDE_FLOAT32_C( 789.28), SIMDE_FLOAT32_C( 976.66), SIMDE_FLOAT32_C( -300.54), SIMDE_FLOAT32_C( -430.09), SIMDE_FLOAT32_C( 725.00), SIMDE_FLOAT32_C( 80.84), SIMDE_FLOAT32_C( -71.63), SIMDE_FLOAT32_C( 804.31), SIMDE_FLOAT32_C( 798.70), SIMDE_FLOAT32_C( 251.20), SIMDE_FLOAT32_C( -32.38)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -430.09), SIMDE_FLOAT32_C( 725.00), SIMDE_FLOAT32_C( 80.84), SIMDE_FLOAT32_C( -71.63), SIMDE_FLOAT32_C( 804.31), SIMDE_FLOAT32_C( 798.70), SIMDE_FLOAT32_C( 251.20), SIMDE_FLOAT32_C( -32.38)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 635.87), SIMDE_FLOAT32_C( 20.88), SIMDE_FLOAT32_C( 462.09), SIMDE_FLOAT32_C( 342.91), SIMDE_FLOAT32_C( -645.79), SIMDE_FLOAT32_C( 103.74), SIMDE_FLOAT32_C( -376.41), SIMDE_FLOAT32_C( 439.99), SIMDE_FLOAT32_C( 895.02), SIMDE_FLOAT32_C( 204.80), SIMDE_FLOAT32_C( 1.77), SIMDE_FLOAT32_C( 909.62), SIMDE_FLOAT32_C( -446.42), SIMDE_FLOAT32_C( 7.95), SIMDE_FLOAT32_C( -614.77), SIMDE_FLOAT32_C( 315.09)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 895.02), SIMDE_FLOAT32_C( 204.80), SIMDE_FLOAT32_C( 1.77), SIMDE_FLOAT32_C( 909.62), SIMDE_FLOAT32_C( -446.42), SIMDE_FLOAT32_C( 7.95), SIMDE_FLOAT32_C( -614.77), SIMDE_FLOAT32_C( 315.09)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -372.65), SIMDE_FLOAT32_C( 316.64), SIMDE_FLOAT32_C( -78.40), SIMDE_FLOAT32_C( 677.31), SIMDE_FLOAT32_C( 299.82), SIMDE_FLOAT32_C( 452.58), SIMDE_FLOAT32_C( 876.09), SIMDE_FLOAT32_C( 746.57), SIMDE_FLOAT32_C( 916.69), SIMDE_FLOAT32_C( -811.99), SIMDE_FLOAT32_C( 637.74), SIMDE_FLOAT32_C( -83.23), SIMDE_FLOAT32_C( 289.79), SIMDE_FLOAT32_C( -689.87), SIMDE_FLOAT32_C( 493.07), SIMDE_FLOAT32_C( 108.56)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 916.69), SIMDE_FLOAT32_C( -811.99), SIMDE_FLOAT32_C( 637.74), SIMDE_FLOAT32_C( -83.23), SIMDE_FLOAT32_C( 289.79), SIMDE_FLOAT32_C( -689.87), SIMDE_FLOAT32_C( 493.07), SIMDE_FLOAT32_C( 108.56)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -296.96), SIMDE_FLOAT32_C( 756.35), SIMDE_FLOAT32_C( -395.61), SIMDE_FLOAT32_C( -521.80), SIMDE_FLOAT32_C( -721.04), SIMDE_FLOAT32_C( -68.70), SIMDE_FLOAT32_C( -334.12), SIMDE_FLOAT32_C( -56.72), SIMDE_FLOAT32_C( 59.02), SIMDE_FLOAT32_C( -66.01), SIMDE_FLOAT32_C( -767.54), SIMDE_FLOAT32_C( 502.66), SIMDE_FLOAT32_C( -591.60), SIMDE_FLOAT32_C( -826.33), SIMDE_FLOAT32_C( -334.73), SIMDE_FLOAT32_C( -13.18)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 59.02), SIMDE_FLOAT32_C( -66.01), SIMDE_FLOAT32_C( -767.54), SIMDE_FLOAT32_C( 502.66), SIMDE_FLOAT32_C( -591.60), SIMDE_FLOAT32_C( -826.33), SIMDE_FLOAT32_C( -334.73), SIMDE_FLOAT32_C( -13.18)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -2.88), SIMDE_FLOAT32_C( -966.98), SIMDE_FLOAT32_C( 854.50), SIMDE_FLOAT32_C( 668.99), SIMDE_FLOAT32_C( 259.81), SIMDE_FLOAT32_C( -77.24), SIMDE_FLOAT32_C( -462.32), SIMDE_FLOAT32_C( 688.88), SIMDE_FLOAT32_C( 754.80), SIMDE_FLOAT32_C( 986.78), SIMDE_FLOAT32_C( 961.39), SIMDE_FLOAT32_C( 850.00), SIMDE_FLOAT32_C( 721.06), SIMDE_FLOAT32_C( -70.04), SIMDE_FLOAT32_C( 136.98), SIMDE_FLOAT32_C( 949.65)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 754.80), SIMDE_FLOAT32_C( 986.78), SIMDE_FLOAT32_C( 961.39), SIMDE_FLOAT32_C( 850.00), SIMDE_FLOAT32_C( 721.06), SIMDE_FLOAT32_C( -70.04), SIMDE_FLOAT32_C( 136.98), SIMDE_FLOAT32_C( 949.65)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 841.78), SIMDE_FLOAT32_C( -315.64), SIMDE_FLOAT32_C( 747.52), SIMDE_FLOAT32_C( 946.88), SIMDE_FLOAT32_C( 871.78), SIMDE_FLOAT32_C( 775.22), SIMDE_FLOAT32_C( -159.38), SIMDE_FLOAT32_C( 270.48), SIMDE_FLOAT32_C( -276.64), SIMDE_FLOAT32_C( 602.71), SIMDE_FLOAT32_C( -825.17), SIMDE_FLOAT32_C( -882.05), SIMDE_FLOAT32_C( -12.61), SIMDE_FLOAT32_C( -809.78), SIMDE_FLOAT32_C( -114.18), SIMDE_FLOAT32_C( 260.44)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -276.64), SIMDE_FLOAT32_C( 602.71), SIMDE_FLOAT32_C( -825.17), SIMDE_FLOAT32_C( -882.05), SIMDE_FLOAT32_C( -12.61), SIMDE_FLOAT32_C( -809.78), SIMDE_FLOAT32_C( -114.18), SIMDE_FLOAT32_C( 260.44)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 258.67), SIMDE_FLOAT32_C( 265.23), SIMDE_FLOAT32_C( 761.29), SIMDE_FLOAT32_C( -932.57), SIMDE_FLOAT32_C( 366.86), SIMDE_FLOAT32_C( -726.22), SIMDE_FLOAT32_C( 179.78), SIMDE_FLOAT32_C( 965.96), SIMDE_FLOAT32_C( -420.83), SIMDE_FLOAT32_C( -371.51), SIMDE_FLOAT32_C( 392.50), SIMDE_FLOAT32_C( 925.30), SIMDE_FLOAT32_C( 990.85), SIMDE_FLOAT32_C( -34.78), SIMDE_FLOAT32_C( -24.58), SIMDE_FLOAT32_C( -150.35)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -420.83), SIMDE_FLOAT32_C( -371.51), SIMDE_FLOAT32_C( 392.50), SIMDE_FLOAT32_C( 925.30), SIMDE_FLOAT32_C( 990.85), SIMDE_FLOAT32_C( -34.78), SIMDE_FLOAT32_C( -24.58), SIMDE_FLOAT32_C( -150.35)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm512_castps512_ps256(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_castps_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -475.09), SIMDE_FLOAT32_C( 736.84), SIMDE_FLOAT32_C( -702.97), SIMDE_FLOAT32_C( -433.94), SIMDE_FLOAT32_C( 854.93), SIMDE_FLOAT32_C( -157.11), SIMDE_FLOAT32_C( 43.61), SIMDE_FLOAT32_C( -310.79), SIMDE_FLOAT32_C( -893.63), SIMDE_FLOAT32_C( 37.00), SIMDE_FLOAT32_C( 245.96), SIMDE_FLOAT32_C( 381.92), SIMDE_FLOAT32_C( -702.80), SIMDE_FLOAT32_C( 931.47), SIMDE_FLOAT32_C( 429.22), SIMDE_FLOAT32_C( -639.06)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -475.09), SIMDE_FLOAT32_C( 736.84), SIMDE_FLOAT32_C( -702.97), SIMDE_FLOAT32_C( -433.94), SIMDE_FLOAT32_C( 854.93), SIMDE_FLOAT32_C( -157.11), SIMDE_FLOAT32_C( 43.61), SIMDE_FLOAT32_C( -310.79), SIMDE_FLOAT32_C( -893.63), SIMDE_FLOAT32_C( 37.00), SIMDE_FLOAT32_C( 245.96), SIMDE_FLOAT32_C( 381.92), SIMDE_FLOAT32_C( -702.80), SIMDE_FLOAT32_C( 931.47), SIMDE_FLOAT32_C( 429.22), SIMDE_FLOAT32_C( -639.06)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -147.89), SIMDE_FLOAT32_C( 347.19), SIMDE_FLOAT32_C( 203.54), SIMDE_FLOAT32_C( -988.99), SIMDE_FLOAT32_C( 306.65), SIMDE_FLOAT32_C( 641.36), SIMDE_FLOAT32_C( -965.49), SIMDE_FLOAT32_C( -587.11), SIMDE_FLOAT32_C( 724.40), SIMDE_FLOAT32_C( -636.19), SIMDE_FLOAT32_C( -368.06), SIMDE_FLOAT32_C( 887.68), SIMDE_FLOAT32_C( -970.41), SIMDE_FLOAT32_C( -566.45), SIMDE_FLOAT32_C( 791.35), SIMDE_FLOAT32_C( -284.64)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -147.89), SIMDE_FLOAT32_C( 347.19), SIMDE_FLOAT32_C( 203.54), SIMDE_FLOAT32_C( -988.99), SIMDE_FLOAT32_C( 306.65), SIMDE_FLOAT32_C( 641.36), SIMDE_FLOAT32_C( -965.49), SIMDE_FLOAT32_C( -587.11), SIMDE_FLOAT32_C( 724.40), SIMDE_FLOAT32_C( -636.19), SIMDE_FLOAT32_C( -368.06), SIMDE_FLOAT32_C( 887.68), SIMDE_FLOAT32_C( -970.41), SIMDE_FLOAT32_C( -566.45), SIMDE_FLOAT32_C( 791.35), SIMDE_FLOAT32_C( -284.64)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -73.77), SIMDE_FLOAT32_C( -499.31), SIMDE_FLOAT32_C( 113.82), SIMDE_FLOAT32_C( -847.95), SIMDE_FLOAT32_C( -593.73), SIMDE_FLOAT32_C( 978.06), SIMDE_FLOAT32_C( 354.00), SIMDE_FLOAT32_C( -120.89), SIMDE_FLOAT32_C( -554.86), SIMDE_FLOAT32_C( -105.83), SIMDE_FLOAT32_C( 247.41), SIMDE_FLOAT32_C( 728.79), SIMDE_FLOAT32_C( -359.78), SIMDE_FLOAT32_C( -375.68), SIMDE_FLOAT32_C( -456.84), SIMDE_FLOAT32_C( -859.51)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -73.77), SIMDE_FLOAT32_C( -499.31), SIMDE_FLOAT32_C( 113.82), SIMDE_FLOAT32_C( -847.95), SIMDE_FLOAT32_C( -593.73), SIMDE_FLOAT32_C( 978.06), SIMDE_FLOAT32_C( 354.00), SIMDE_FLOAT32_C( -120.89), SIMDE_FLOAT32_C( -554.86), SIMDE_FLOAT32_C( -105.83), SIMDE_FLOAT32_C( 247.41), SIMDE_FLOAT32_C( 728.79), SIMDE_FLOAT32_C( -359.78), SIMDE_FLOAT32_C( -375.68), SIMDE_FLOAT32_C( -456.84), SIMDE_FLOAT32_C( -859.51)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -892.63), SIMDE_FLOAT32_C( 554.75), SIMDE_FLOAT32_C( 786.31), SIMDE_FLOAT32_C( 695.92), SIMDE_FLOAT32_C( -629.78), SIMDE_FLOAT32_C( 376.00), SIMDE_FLOAT32_C( 490.47), SIMDE_FLOAT32_C( -658.22), SIMDE_FLOAT32_C( -993.58), SIMDE_FLOAT32_C( 931.16), SIMDE_FLOAT32_C( 896.06), SIMDE_FLOAT32_C( -621.22), SIMDE_FLOAT32_C( 936.87), SIMDE_FLOAT32_C( -330.96), SIMDE_FLOAT32_C( 683.22), SIMDE_FLOAT32_C( 273.63)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -892.63), SIMDE_FLOAT32_C( 554.75), SIMDE_FLOAT32_C( 786.31), SIMDE_FLOAT32_C( 695.92), SIMDE_FLOAT32_C( -629.78), SIMDE_FLOAT32_C( 376.00), SIMDE_FLOAT32_C( 490.47), SIMDE_FLOAT32_C( -658.22), SIMDE_FLOAT32_C( -993.58), SIMDE_FLOAT32_C( 931.16), SIMDE_FLOAT32_C( 896.06), SIMDE_FLOAT32_C( -621.22), SIMDE_FLOAT32_C( 936.87), SIMDE_FLOAT32_C( -330.96), SIMDE_FLOAT32_C( 683.22), SIMDE_FLOAT32_C( 273.63)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -464.48), SIMDE_FLOAT32_C( 711.99), SIMDE_FLOAT32_C( 146.80), SIMDE_FLOAT32_C( -790.19), SIMDE_FLOAT32_C( -196.31), SIMDE_FLOAT32_C( -19.08), SIMDE_FLOAT32_C( 390.50), SIMDE_FLOAT32_C( -693.04), SIMDE_FLOAT32_C( 121.41), SIMDE_FLOAT32_C( -677.25), SIMDE_FLOAT32_C( 840.59), SIMDE_FLOAT32_C( 653.82), SIMDE_FLOAT32_C( -341.60), SIMDE_FLOAT32_C( -152.67), SIMDE_FLOAT32_C( -910.13), SIMDE_FLOAT32_C( 648.18)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -464.48), SIMDE_FLOAT32_C( 711.99), SIMDE_FLOAT32_C( 146.80), SIMDE_FLOAT32_C( -790.19), SIMDE_FLOAT32_C( -196.31), SIMDE_FLOAT32_C( -19.08), SIMDE_FLOAT32_C( 390.50), SIMDE_FLOAT32_C( -693.04), SIMDE_FLOAT32_C( 121.41), SIMDE_FLOAT32_C( -677.25), SIMDE_FLOAT32_C( 840.59), SIMDE_FLOAT32_C( 653.82), SIMDE_FLOAT32_C( -341.60), SIMDE_FLOAT32_C( -152.67), SIMDE_FLOAT32_C( -910.13), SIMDE_FLOAT32_C( 648.18)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 610.99), SIMDE_FLOAT32_C( 354.13), SIMDE_FLOAT32_C( -805.98), SIMDE_FLOAT32_C( 470.53), SIMDE_FLOAT32_C( 848.62), SIMDE_FLOAT32_C( -853.14), SIMDE_FLOAT32_C( 898.19), SIMDE_FLOAT32_C( -186.52), SIMDE_FLOAT32_C( -954.10), SIMDE_FLOAT32_C( 887.52), SIMDE_FLOAT32_C( 607.34), SIMDE_FLOAT32_C( 42.34), SIMDE_FLOAT32_C( -842.20), SIMDE_FLOAT32_C( 627.26), SIMDE_FLOAT32_C( 417.38), SIMDE_FLOAT32_C( -300.32)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 610.99), SIMDE_FLOAT32_C( 354.13), SIMDE_FLOAT32_C( -805.98), SIMDE_FLOAT32_C( 470.53), SIMDE_FLOAT32_C( 848.62), SIMDE_FLOAT32_C( -853.14), SIMDE_FLOAT32_C( 898.19), SIMDE_FLOAT32_C( -186.52), SIMDE_FLOAT32_C( -954.10), SIMDE_FLOAT32_C( 887.52), SIMDE_FLOAT32_C( 607.34), SIMDE_FLOAT32_C( 42.34), SIMDE_FLOAT32_C( -842.20), SIMDE_FLOAT32_C( 627.26), SIMDE_FLOAT32_C( 417.38), SIMDE_FLOAT32_C( -300.32)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -91.92), SIMDE_FLOAT32_C( -128.53), SIMDE_FLOAT32_C( -157.39), SIMDE_FLOAT32_C( 922.31), SIMDE_FLOAT32_C( 68.36), SIMDE_FLOAT32_C( -842.81), SIMDE_FLOAT32_C( 60.56), SIMDE_FLOAT32_C( 689.02), SIMDE_FLOAT32_C( -70.29), SIMDE_FLOAT32_C( -753.33), SIMDE_FLOAT32_C( -999.31), SIMDE_FLOAT32_C( 72.69), SIMDE_FLOAT32_C( 47.99), SIMDE_FLOAT32_C( 903.18), SIMDE_FLOAT32_C( -5.05), SIMDE_FLOAT32_C( -435.35)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -91.92), SIMDE_FLOAT32_C( -128.53), SIMDE_FLOAT32_C( -157.39), SIMDE_FLOAT32_C( 922.31), SIMDE_FLOAT32_C( 68.36), SIMDE_FLOAT32_C( -842.81), SIMDE_FLOAT32_C( 60.56), SIMDE_FLOAT32_C( 689.02), SIMDE_FLOAT32_C( -70.29), SIMDE_FLOAT32_C( -753.33), SIMDE_FLOAT32_C( -999.31), SIMDE_FLOAT32_C( 72.69), SIMDE_FLOAT32_C( 47.99), SIMDE_FLOAT32_C( 903.18), SIMDE_FLOAT32_C( -5.05), SIMDE_FLOAT32_C( -435.35)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -687.13), SIMDE_FLOAT32_C( -562.12), SIMDE_FLOAT32_C( -564.90), SIMDE_FLOAT32_C( -948.64), SIMDE_FLOAT32_C( 287.72), SIMDE_FLOAT32_C( -13.16), SIMDE_FLOAT32_C( 706.69), SIMDE_FLOAT32_C( 62.48), SIMDE_FLOAT32_C( 15.33), SIMDE_FLOAT32_C( -938.52), SIMDE_FLOAT32_C( -548.66), SIMDE_FLOAT32_C( 893.31), SIMDE_FLOAT32_C( -166.31), SIMDE_FLOAT32_C( 3.21), SIMDE_FLOAT32_C( -121.90), SIMDE_FLOAT32_C( -237.87)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -687.13), SIMDE_FLOAT32_C( -562.12), SIMDE_FLOAT32_C( -564.90), SIMDE_FLOAT32_C( -948.64), SIMDE_FLOAT32_C( 287.72), SIMDE_FLOAT32_C( -13.16), SIMDE_FLOAT32_C( 706.69), SIMDE_FLOAT32_C( 62.48), SIMDE_FLOAT32_C( 15.33), SIMDE_FLOAT32_C( -938.52), SIMDE_FLOAT32_C( -548.66), SIMDE_FLOAT32_C( 893.31), SIMDE_FLOAT32_C( -166.31), SIMDE_FLOAT32_C( 3.21), SIMDE_FLOAT32_C( -121.90), SIMDE_FLOAT32_C( -237.87)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_castpd_ps(simde_mm512_castps_pd(test_vec[i].a)); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_castsi512_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C( 1318886849), INT32_C( -963615992), INT32_C( 1168255165), INT32_C(-1910220907), INT32_C( -362848940), INT32_C( 1228894571), INT32_C( -604141281), INT32_C( -310556576), INT32_C( -195291453), INT32_C(-1762187610), INT32_C( 785462248), INT32_C(-1654799886), INT32_C( 443157727), INT32_C( 291457152), INT32_C( -673220379), INT32_C( 1588648097)), simde_mm512_set_epi32(INT32_C( 1318886849), INT32_C( -963615992), INT32_C( 1168255165), INT32_C(-1910220907), INT32_C( -362848940), INT32_C( 1228894571), INT32_C( -604141281), INT32_C( -310556576), INT32_C( -195291453), INT32_C(-1762187610), INT32_C( 785462248), INT32_C(-1654799886), INT32_C( 443157727), INT32_C( 291457152), INT32_C( -673220379), INT32_C( 1588648097)) }, { simde_mm512_set_epi32(INT32_C( 388365668), INT32_C( 1922479678), INT32_C(-1025869072), INT32_C( 1053113602), INT32_C( -948060413), INT32_C( 1300260897), INT32_C( 723694906), INT32_C( 1579982945), INT32_C( 551168292), INT32_C( 2117629385), INT32_C( 1278333888), INT32_C(-1219831740), INT32_C( 1012175805), INT32_C(-2142193359), INT32_C(-1234937614), INT32_C( 296489222)), simde_mm512_set_epi32(INT32_C( 388365668), INT32_C( 1922479678), INT32_C(-1025869072), INT32_C( 1053113602), INT32_C( -948060413), INT32_C( 1300260897), INT32_C( 723694906), INT32_C( 1579982945), INT32_C( 551168292), INT32_C( 2117629385), INT32_C( 1278333888), INT32_C(-1219831740), INT32_C( 1012175805), INT32_C(-2142193359), INT32_C(-1234937614), INT32_C( 296489222)) }, { simde_mm512_set_epi32(INT32_C( 1558252976), INT32_C( -896391163), INT32_C( -282653026), INT32_C( -261252112), INT32_C(-1886684518), INT32_C( -792082029), INT32_C(-1821569388), INT32_C( 452082945), INT32_C( 1996286767), INT32_C( -816391022), INT32_C( 52318481), INT32_C( -732357064), INT32_C(-1730294171), INT32_C( 1249707888), INT32_C( 1882801706), INT32_C( -348126448)), simde_mm512_set_epi32(INT32_C( 1558252976), INT32_C( -896391163), INT32_C( -282653026), INT32_C( -261252112), INT32_C(-1886684518), INT32_C( -792082029), INT32_C(-1821569388), INT32_C( 452082945), INT32_C( 1996286767), INT32_C( -816391022), INT32_C( 52318481), INT32_C( -732357064), INT32_C(-1730294171), INT32_C( 1249707888), INT32_C( 1882801706), INT32_C( -348126448)) }, { simde_mm512_set_epi32(INT32_C(-1306007403), INT32_C( -14823844), INT32_C( 68140915), INT32_C(-1279691583), INT32_C( -366833324), INT32_C( 1029809788), INT32_C( 398169753), INT32_C( 1644631001), INT32_C(-1495643320), INT32_C( -349662299), INT32_C( 337000079), INT32_C(-1599869755), INT32_C( -326525511), INT32_C( 2145777358), INT32_C( -671246537), INT32_C(-1483766126)), simde_mm512_set_epi32(INT32_C(-1306007403), INT32_C( -14823844), INT32_C( 68140915), INT32_C(-1279691583), INT32_C( -366833324), INT32_C( 1029809788), INT32_C( 398169753), INT32_C( 1644631001), INT32_C(-1495643320), INT32_C( -349662299), INT32_C( 337000079), INT32_C(-1599869755), INT32_C( -326525511), INT32_C( 2145777358), INT32_C( -671246537), INT32_C(-1483766126)) }, { simde_mm512_set_epi32(INT32_C( 1306718049), INT32_C( -481786167), INT32_C( 1036658403), INT32_C(-1275842496), INT32_C(-1384128337), INT32_C( 1110346848), INT32_C( 1633808121), INT32_C( 818679772), INT32_C( 528120539), INT32_C( 1454269875), INT32_C(-1309905851), INT32_C( 195834431), INT32_C( 1710057355), INT32_C(-1286641516), INT32_C( -825070396), INT32_C(-1314661204)), simde_mm512_set_epi32(INT32_C( 1306718049), INT32_C( -481786167), INT32_C( 1036658403), INT32_C(-1275842496), INT32_C(-1384128337), INT32_C( 1110346848), INT32_C( 1633808121), INT32_C( 818679772), INT32_C( 528120539), INT32_C( 1454269875), INT32_C(-1309905851), INT32_C( 195834431), INT32_C( 1710057355), INT32_C(-1286641516), INT32_C( -825070396), INT32_C(-1314661204)) }, { simde_mm512_set_epi32(INT32_C( 897666230), INT32_C(-1247200387), INT32_C(-1594960612), INT32_C( 784358708), INT32_C( -639715103), INT32_C( 1722588202), INT32_C( -957042730), INT32_C( 1748493888), INT32_C(-1424605467), INT32_C( 1715503797), INT32_C(-1877902145), INT32_C( 1433291829), INT32_C(-1672795293), INT32_C( -125523250), INT32_C( 817060741), INT32_C(-1940108057)), simde_mm512_set_epi32(INT32_C( 897666230), INT32_C(-1247200387), INT32_C(-1594960612), INT32_C( 784358708), INT32_C( -639715103), INT32_C( 1722588202), INT32_C( -957042730), INT32_C( 1748493888), INT32_C(-1424605467), INT32_C( 1715503797), INT32_C(-1877902145), INT32_C( 1433291829), INT32_C(-1672795293), INT32_C( -125523250), INT32_C( 817060741), INT32_C(-1940108057)) }, { simde_mm512_set_epi32(INT32_C( -771897847), INT32_C( 51088448), INT32_C( 731748986), INT32_C(-2010019631), INT32_C(-1805256328), INT32_C( -982668321), INT32_C(-1986983933), INT32_C( -806537017), INT32_C( 2103732941), INT32_C(-1950119891), INT32_C( -262758582), INT32_C( -704905824), INT32_C( -298698020), INT32_C( -290664422), INT32_C( -169474404), INT32_C( -381923585)), simde_mm512_set_epi32(INT32_C( -771897847), INT32_C( 51088448), INT32_C( 731748986), INT32_C(-2010019631), INT32_C(-1805256328), INT32_C( -982668321), INT32_C(-1986983933), INT32_C( -806537017), INT32_C( 2103732941), INT32_C(-1950119891), INT32_C( -262758582), INT32_C( -704905824), INT32_C( -298698020), INT32_C( -290664422), INT32_C( -169474404), INT32_C( -381923585)) }, { simde_mm512_set_epi32(INT32_C(-1424890095), INT32_C( -662922774), INT32_C( 1172732731), INT32_C( 220940559), INT32_C( 736508018), INT32_C(-1201335155), INT32_C( -747976663), INT32_C( 1864271935), INT32_C( 1893933430), INT32_C( 214467364), INT32_C(-1230640603), INT32_C(-1529352277), INT32_C( 301681975), INT32_C( -86355089), INT32_C( 1945634979), INT32_C( 1310017249)), simde_mm512_set_epi32(INT32_C(-1424890095), INT32_C( -662922774), INT32_C( 1172732731), INT32_C( 220940559), INT32_C( 736508018), INT32_C(-1201335155), INT32_C( -747976663), INT32_C( 1864271935), INT32_C( 1893933430), INT32_C( 214467364), INT32_C(-1230640603), INT32_C(-1529352277), INT32_C( 301681975), INT32_C( -86355089), INT32_C( 1945634979), INT32_C( 1310017249)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_castps_si512(simde_mm512_castsi512_ps(test_vec[i].a)); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_castpd_ps (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( -499.76), SIMDE_FLOAT64_C( 568.93), SIMDE_FLOAT64_C( 465.37), SIMDE_FLOAT64_C( -672.01), SIMDE_FLOAT64_C( -851.41), SIMDE_FLOAT64_C( -865.69), SIMDE_FLOAT64_C( -361.86), SIMDE_FLOAT64_C( -897.41) }, { SIMDE_FLOAT64_C( -499.76), SIMDE_FLOAT64_C( 568.93), SIMDE_FLOAT64_C( 465.37), SIMDE_FLOAT64_C( -672.01), SIMDE_FLOAT64_C( -851.41), SIMDE_FLOAT64_C( -865.69), SIMDE_FLOAT64_C( -361.86), SIMDE_FLOAT64_C( -897.41) } }, { { SIMDE_FLOAT64_C( 538.51), SIMDE_FLOAT64_C( -244.20), SIMDE_FLOAT64_C( 337.99), SIMDE_FLOAT64_C( -687.18), SIMDE_FLOAT64_C( 145.70), SIMDE_FLOAT64_C( 844.87), SIMDE_FLOAT64_C( 753.87), SIMDE_FLOAT64_C( 452.47) }, { SIMDE_FLOAT64_C( 538.51), SIMDE_FLOAT64_C( -244.20), SIMDE_FLOAT64_C( 337.99), SIMDE_FLOAT64_C( -687.18), SIMDE_FLOAT64_C( 145.70), SIMDE_FLOAT64_C( 844.87), SIMDE_FLOAT64_C( 753.87), SIMDE_FLOAT64_C( 452.47) } }, { { SIMDE_FLOAT64_C( -653.48), SIMDE_FLOAT64_C( -270.86), SIMDE_FLOAT64_C( -547.24), SIMDE_FLOAT64_C( -931.09), SIMDE_FLOAT64_C( -78.54), SIMDE_FLOAT64_C( 915.72), SIMDE_FLOAT64_C( 958.51), SIMDE_FLOAT64_C( -819.67) }, { SIMDE_FLOAT64_C( -653.48), SIMDE_FLOAT64_C( -270.86), SIMDE_FLOAT64_C( -547.24), SIMDE_FLOAT64_C( -931.09), SIMDE_FLOAT64_C( -78.54), SIMDE_FLOAT64_C( 915.72), SIMDE_FLOAT64_C( 958.51), SIMDE_FLOAT64_C( -819.67) } }, { { SIMDE_FLOAT64_C( 251.01), SIMDE_FLOAT64_C( 551.04), SIMDE_FLOAT64_C( -32.00), SIMDE_FLOAT64_C( -266.08), SIMDE_FLOAT64_C( 853.30), SIMDE_FLOAT64_C( 460.19), SIMDE_FLOAT64_C( 322.00), SIMDE_FLOAT64_C( -646.46) }, { SIMDE_FLOAT64_C( 251.01), SIMDE_FLOAT64_C( 551.04), SIMDE_FLOAT64_C( -32.00), SIMDE_FLOAT64_C( -266.08), SIMDE_FLOAT64_C( 853.30), SIMDE_FLOAT64_C( 460.19), SIMDE_FLOAT64_C( 322.00), SIMDE_FLOAT64_C( -646.46) } }, { { SIMDE_FLOAT64_C( 29.12), SIMDE_FLOAT64_C( -212.62), SIMDE_FLOAT64_C( -318.47), SIMDE_FLOAT64_C( 177.72), SIMDE_FLOAT64_C( -78.31), SIMDE_FLOAT64_C( 319.66), SIMDE_FLOAT64_C( 280.31), SIMDE_FLOAT64_C( -539.80) }, { SIMDE_FLOAT64_C( 29.12), SIMDE_FLOAT64_C( -212.62), SIMDE_FLOAT64_C( -318.47), SIMDE_FLOAT64_C( 177.72), SIMDE_FLOAT64_C( -78.31), SIMDE_FLOAT64_C( 319.66), SIMDE_FLOAT64_C( 280.31), SIMDE_FLOAT64_C( -539.80) } }, { { SIMDE_FLOAT64_C( -924.54), SIMDE_FLOAT64_C( -381.70), SIMDE_FLOAT64_C( -226.98), SIMDE_FLOAT64_C( 221.16), SIMDE_FLOAT64_C( -536.83), SIMDE_FLOAT64_C( -473.11), SIMDE_FLOAT64_C( -326.37), SIMDE_FLOAT64_C( -190.32) }, { SIMDE_FLOAT64_C( -924.54), SIMDE_FLOAT64_C( -381.70), SIMDE_FLOAT64_C( -226.98), SIMDE_FLOAT64_C( 221.16), SIMDE_FLOAT64_C( -536.83), SIMDE_FLOAT64_C( -473.11), SIMDE_FLOAT64_C( -326.37), SIMDE_FLOAT64_C( -190.32) } }, { { SIMDE_FLOAT64_C( 256.03), SIMDE_FLOAT64_C( 126.39), SIMDE_FLOAT64_C( -121.40), SIMDE_FLOAT64_C( -822.51), SIMDE_FLOAT64_C( 42.11), SIMDE_FLOAT64_C( -162.89), SIMDE_FLOAT64_C( -642.18), SIMDE_FLOAT64_C( -706.89) }, { SIMDE_FLOAT64_C( 256.03), SIMDE_FLOAT64_C( 126.39), SIMDE_FLOAT64_C( -121.40), SIMDE_FLOAT64_C( -822.51), SIMDE_FLOAT64_C( 42.11), SIMDE_FLOAT64_C( -162.89), SIMDE_FLOAT64_C( -642.18), SIMDE_FLOAT64_C( -706.89) } }, { { SIMDE_FLOAT64_C( -611.85), SIMDE_FLOAT64_C( 325.82), SIMDE_FLOAT64_C( 27.04), SIMDE_FLOAT64_C( -758.55), SIMDE_FLOAT64_C( -213.98), SIMDE_FLOAT64_C( -650.96), SIMDE_FLOAT64_C( -405.01), SIMDE_FLOAT64_C( 815.14) }, { SIMDE_FLOAT64_C( -611.85), SIMDE_FLOAT64_C( 325.82), SIMDE_FLOAT64_C( 27.04), SIMDE_FLOAT64_C( -758.55), SIMDE_FLOAT64_C( -213.98), SIMDE_FLOAT64_C( -650.96), SIMDE_FLOAT64_C( -405.01), SIMDE_FLOAT64_C( 815.14) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_castps_pd(simde_mm512_castpd_ps(a)); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512d a = simde_test_x86_random_f64x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde_test_x86_write_f64x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f64x8(2, a, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_castpd_si512(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C(-2932283473482861325), INT64_C(-7551244776617231168), INT64_C(-7807931705287408960), INT64_C(-5709657210396534887), INT64_C(-5733822187822983321), INT64_C(-6449026762131886574), INT64_C(-4469461462291751367), INT64_C(-1966061413581863839)), simde_mm512_set_epi64(INT64_C(-2932283473482861325), INT64_C(-7551244776617231168), INT64_C(-7807931705287408960), INT64_C(-5709657210396534887), INT64_C(-5733822187822983321), INT64_C(-6449026762131886574), INT64_C(-4469461462291751367), INT64_C(-1966061413581863839)) }, { simde_mm512_set_epi64(INT64_C( 1279499818540525091), INT64_C( 7861921123166401201), INT64_C(-7776868620274070112), INT64_C( 1282257894426558327), INT64_C( 7845161632627040500), INT64_C(-5240804517978550546), INT64_C( 5302531798719229176), INT64_C(-5843726612220411524)), simde_mm512_set_epi64(INT64_C( 1279499818540525091), INT64_C( 7861921123166401201), INT64_C(-7776868620274070112), INT64_C( 1282257894426558327), INT64_C( 7845161632627040500), INT64_C(-5240804517978550546), INT64_C( 5302531798719229176), INT64_C(-5843726612220411524)) }, { simde_mm512_set_epi64(INT64_C( 8264653255264008622), INT64_C( 162552689407469387), INT64_C(-6727945509653055258), INT64_C(-8051656683653588284), INT64_C(-3673194952315675544), INT64_C(-6715795272606770523), INT64_C( 4713570617590173150), INT64_C(-1460297597223426823)), simde_mm512_set_epi64(INT64_C( 8264653255264008622), INT64_C( 162552689407469387), INT64_C(-6727945509653055258), INT64_C(-8051656683653588284), INT64_C(-3673194952315675544), INT64_C(-6715795272606770523), INT64_C( 4713570617590173150), INT64_C(-1460297597223426823)) }, { simde_mm512_set_epi64(INT64_C( 8052472118589501494), INT64_C(-9124867990213523455), INT64_C( 8236047422487698023), INT64_C( 5945811080038499438), INT64_C( 6518955141271267147), INT64_C( 2853045589014014838), INT64_C( 2581043198697401036), INT64_C( 3427873189039658818)), simde_mm512_set_epi64(INT64_C( 8052472118589501494), INT64_C(-9124867990213523455), INT64_C( 8236047422487698023), INT64_C( 5945811080038499438), INT64_C( 6518955141271267147), INT64_C( 2853045589014014838), INT64_C( 2581043198697401036), INT64_C( 3427873189039658818)) }, { simde_mm512_set_epi64(INT64_C(-3824716377022324243), INT64_C( 2673669294054155970), INT64_C(-3539125802254885907), INT64_C(-2325147789690797111), INT64_C(-8959458229056338743), INT64_C( 8174617038106487054), INT64_C( 2599971863369021204), INT64_C(-4237128038265732285)), simde_mm512_set_epi64(INT64_C(-3824716377022324243), INT64_C( 2673669294054155970), INT64_C(-3539125802254885907), INT64_C(-2325147789690797111), INT64_C(-8959458229056338743), INT64_C( 8174617038106487054), INT64_C( 2599971863369021204), INT64_C(-4237128038265732285)) }, { simde_mm512_set_epi64(INT64_C( 467193351903391999), INT64_C( 4360504692705357109), INT64_C( 9013482442495432840), INT64_C(-2009950226490426066), INT64_C( 483507342486254018), INT64_C( 183419223247078674), INT64_C( 2287275646029180906), INT64_C(-3986540516598251529)), simde_mm512_set_epi64(INT64_C( 467193351903391999), INT64_C( 4360504692705357109), INT64_C( 9013482442495432840), INT64_C(-2009950226490426066), INT64_C( 483507342486254018), INT64_C( 183419223247078674), INT64_C( 2287275646029180906), INT64_C(-3986540516598251529)) }, { simde_mm512_set_epi64(INT64_C(-3077645007853633163), INT64_C(-1572779564739653348), INT64_C(-9063178373150191806), INT64_C(-7245631614198864513), INT64_C(-8737364776584512923), INT64_C(-2892458838054920588), INT64_C( 471547968378055740), INT64_C( 775209234293426283)), simde_mm512_set_epi64(INT64_C(-3077645007853633163), INT64_C(-1572779564739653348), INT64_C(-9063178373150191806), INT64_C(-7245631614198864513), INT64_C(-8737364776584512923), INT64_C(-2892458838054920588), INT64_C( 471547968378055740), INT64_C( 775209234293426283)) }, { simde_mm512_set_epi64(INT64_C(-6041628496323364468), INT64_C(-9137870096902224131), INT64_C(-1573113398564050163), INT64_C( 8854240990059664398), INT64_C(-2186298020810705372), INT64_C(-2128579590201805820), INT64_C( 2972790510556186969), INT64_C(-3293272700939969964)), simde_mm512_set_epi64(INT64_C(-6041628496323364468), INT64_C(-9137870096902224131), INT64_C(-1573113398564050163), INT64_C( 8854240990059664398), INT64_C(-2186298020810705372), INT64_C(-2128579590201805820), INT64_C( 2972790510556186969), INT64_C(-3293272700939969964)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_castpd_si512(simde_mm512_castsi512_pd(test_vec[i].a)); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_castsi512_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C(-2932283473482861325), INT64_C(-7551244776617231168), INT64_C(-7807931705287408960), INT64_C(-5709657210396534887), INT64_C(-5733822187822983321), INT64_C(-6449026762131886574), INT64_C(-4469461462291751367), INT64_C(-1966061413581863839)), simde_mm512_set_epi64(INT64_C(-2932283473482861325), INT64_C(-7551244776617231168), INT64_C(-7807931705287408960), INT64_C(-5709657210396534887), INT64_C(-5733822187822983321), INT64_C(-6449026762131886574), INT64_C(-4469461462291751367), INT64_C(-1966061413581863839)) }, { simde_mm512_set_epi64(INT64_C( 1279499818540525091), INT64_C( 7861921123166401201), INT64_C(-7776868620274070112), INT64_C( 1282257894426558327), INT64_C( 7845161632627040500), INT64_C(-5240804517978550546), INT64_C( 5302531798719229176), INT64_C(-5843726612220411524)), simde_mm512_set_epi64(INT64_C( 1279499818540525091), INT64_C( 7861921123166401201), INT64_C(-7776868620274070112), INT64_C( 1282257894426558327), INT64_C( 7845161632627040500), INT64_C(-5240804517978550546), INT64_C( 5302531798719229176), INT64_C(-5843726612220411524)) }, { simde_mm512_set_epi64(INT64_C( 8264653255264008622), INT64_C( 162552689407469387), INT64_C(-6727945509653055258), INT64_C(-8051656683653588284), INT64_C(-3673194952315675544), INT64_C(-6715795272606770523), INT64_C( 4713570617590173150), INT64_C(-1460297597223426823)), simde_mm512_set_epi64(INT64_C( 8264653255264008622), INT64_C( 162552689407469387), INT64_C(-6727945509653055258), INT64_C(-8051656683653588284), INT64_C(-3673194952315675544), INT64_C(-6715795272606770523), INT64_C( 4713570617590173150), INT64_C(-1460297597223426823)) }, { simde_mm512_set_epi64(INT64_C( 8052472118589501494), INT64_C(-9124867990213523455), INT64_C( 8236047422487698023), INT64_C( 5945811080038499438), INT64_C( 6518955141271267147), INT64_C( 2853045589014014838), INT64_C( 2581043198697401036), INT64_C( 3427873189039658818)), simde_mm512_set_epi64(INT64_C( 8052472118589501494), INT64_C(-9124867990213523455), INT64_C( 8236047422487698023), INT64_C( 5945811080038499438), INT64_C( 6518955141271267147), INT64_C( 2853045589014014838), INT64_C( 2581043198697401036), INT64_C( 3427873189039658818)) }, { simde_mm512_set_epi64(INT64_C(-3824716377022324243), INT64_C( 2673669294054155970), INT64_C(-3539125802254885907), INT64_C(-2325147789690797111), INT64_C(-8959458229056338743), INT64_C( 8174617038106487054), INT64_C( 2599971863369021204), INT64_C(-4237128038265732285)), simde_mm512_set_epi64(INT64_C(-3824716377022324243), INT64_C( 2673669294054155970), INT64_C(-3539125802254885907), INT64_C(-2325147789690797111), INT64_C(-8959458229056338743), INT64_C( 8174617038106487054), INT64_C( 2599971863369021204), INT64_C(-4237128038265732285)) }, { simde_mm512_set_epi64(INT64_C( 467193351903391999), INT64_C( 4360504692705357109), INT64_C( 9013482442495432840), INT64_C(-2009950226490426066), INT64_C( 483507342486254018), INT64_C( 183419223247078674), INT64_C( 2287275646029180906), INT64_C(-3986540516598251529)), simde_mm512_set_epi64(INT64_C( 467193351903391999), INT64_C( 4360504692705357109), INT64_C( 9013482442495432840), INT64_C(-2009950226490426066), INT64_C( 483507342486254018), INT64_C( 183419223247078674), INT64_C( 2287275646029180906), INT64_C(-3986540516598251529)) }, { simde_mm512_set_epi64(INT64_C(-3077645007853633163), INT64_C(-1572779564739653348), INT64_C(-9063178373150191806), INT64_C(-7245631614198864513), INT64_C(-8737364776584512923), INT64_C(-2892458838054920588), INT64_C( 471547968378055740), INT64_C( 775209234293426283)), simde_mm512_set_epi64(INT64_C(-3077645007853633163), INT64_C(-1572779564739653348), INT64_C(-9063178373150191806), INT64_C(-7245631614198864513), INT64_C(-8737364776584512923), INT64_C(-2892458838054920588), INT64_C( 471547968378055740), INT64_C( 775209234293426283)) }, { simde_mm512_set_epi64(INT64_C(-6041628496323364468), INT64_C(-9137870096902224131), INT64_C(-1573113398564050163), INT64_C( 8854240990059664398), INT64_C(-2186298020810705372), INT64_C(-2128579590201805820), INT64_C( 2972790510556186969), INT64_C(-3293272700939969964)), simde_mm512_set_epi64(INT64_C(-6041628496323364468), INT64_C(-9137870096902224131), INT64_C(-1573113398564050163), INT64_C( 8854240990059664398), INT64_C(-2186298020810705372), INT64_C(-2128579590201805820), INT64_C( 2972790510556186969), INT64_C(-3293272700939969964)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_castpd_si512(simde_mm512_castsi512_pd(test_vec[i].a)); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_castsi128_si512(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m512i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C(-1668834023), INT32_C(-1352312258), INT32_C( 556637397), INT32_C( -245835434)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C(-1668834023), INT32_C(-1352312258), INT32_C( 556637397), INT32_C( -245835434)) }, { simde_mm_set_epi32(INT32_C( 1624943139), INT32_C( -461116292), INT32_C(-2119913042), INT32_C( -900874486)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1624943139), INT32_C( -461116292), INT32_C(-2119913042), INT32_C( -900874486)) }, { simde_mm_set_epi32(INT32_C( -483029339), INT32_C( 1367342572), INT32_C( 1717714414), INT32_C( -425758846)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -483029339), INT32_C( 1367342572), INT32_C( 1717714414), INT32_C( -425758846)) }, { simde_mm_set_epi32(INT32_C( -911211301), INT32_C( 327710477), INT32_C( -375455356), INT32_C(-1057936365)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -911211301), INT32_C( 327710477), INT32_C( -375455356), INT32_C(-1057936365)) }, { simde_mm_set_epi32(INT32_C( 275440871), INT32_C( 641107064), INT32_C( 1055988271), INT32_C( 449906430)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 275440871), INT32_C( 641107064), INT32_C( 1055988271), INT32_C( 449906430)) }, { simde_mm_set_epi32(INT32_C( -222973014), INT32_C( -356035719), INT32_C( -568690768), INT32_C( 1550764475)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -222973014), INT32_C( -356035719), INT32_C( -568690768), INT32_C( 1550764475)) }, { simde_mm_set_epi32(INT32_C( 1024653484), INT32_C(-1114765609), INT32_C( 501755487), INT32_C( 1011153625)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1024653484), INT32_C(-1114765609), INT32_C( 501755487), INT32_C( 1011153625)) }, { simde_mm_set_epi32(INT32_C( 1112995048), INT32_C( 1174048979), INT32_C( 979704874), INT32_C( -581442043)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1112995048), INT32_C( 1174048979), INT32_C( 979704874), INT32_C( -581442043)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_castsi128_si512(test_vec[i].a); r=simde_mm512_maskz_mov_epi32(UINT8_C(15), r); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_castsi256_si512(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m512i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C( -627018310), INT32_C( -732773372), INT32_C(-1935004141), INT32_C( 1864732488), INT32_C( 140289699), INT32_C(-1570899663), INT32_C(-1630998993), INT32_C( 818347323)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -627018310), INT32_C( -732773372), INT32_C(-1935004141), INT32_C( 1864732488), INT32_C( 140289699), INT32_C(-1570899663), INT32_C(-1630998993), INT32_C( 818347323)) }, { simde_mm256_set_epi32(INT32_C( -208321892), INT32_C(-1091642607), INT32_C( 600758994), INT32_C( 251226426), INT32_C( -263819406), INT32_C(-1881495622), INT32_C( 1833100173), INT32_C(-1639467184)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -208321892), INT32_C(-1091642607), INT32_C( 600758994), INT32_C( 251226426), INT32_C( -263819406), INT32_C(-1881495622), INT32_C( 1833100173), INT32_C(-1639467184)) }, { simde_mm256_set_epi32(INT32_C(-1754784817), INT32_C( -579924690), INT32_C( 661573432), INT32_C(-1181370591), INT32_C( 60095626), INT32_C( 956226137), INT32_C( -117371256), INT32_C(-1481124874)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C(-1754784817), INT32_C( -579924690), INT32_C( 661573432), INT32_C(-1181370591), INT32_C( 60095626), INT32_C( 956226137), INT32_C( -117371256), INT32_C(-1481124874)) }, { simde_mm256_set_epi32(INT32_C( 2059950650), INT32_C( 100086839), INT32_C( 1040622382), INT32_C( 188824341), INT32_C( 1391404675), INT32_C(-2134538567), INT32_C( 1420659092), INT32_C( 1719736439)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 2059950650), INT32_C( 100086839), INT32_C( 1040622382), INT32_C( 188824341), INT32_C( 1391404675), INT32_C(-2134538567), INT32_C( 1420659092), INT32_C( 1719736439)) }, { simde_mm256_set_epi32(INT32_C( -72277944), INT32_C( -175328), INT32_C( 1214469435), INT32_C( -481210186), INT32_C( 1755907779), INT32_C( 1941862792), INT32_C( -53966731), INT32_C( 177508362)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -72277944), INT32_C( -175328), INT32_C( 1214469435), INT32_C( -481210186), INT32_C( 1755907779), INT32_C( 1941862792), INT32_C( -53966731), INT32_C( 177508362)) }, { simde_mm256_set_epi32(INT32_C( -579195101), INT32_C( 394175316), INT32_C( 2062674518), INT32_C(-2146728292), INT32_C( 67273180), INT32_C(-1275684351), INT32_C( 245156584), INT32_C( 382170563)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -579195101), INT32_C( 394175316), INT32_C( 2062674518), INT32_C(-2146728292), INT32_C( 67273180), INT32_C(-1275684351), INT32_C( 245156584), INT32_C( 382170563)) }, { simde_mm256_set_epi32(INT32_C(-1725634605), INT32_C(-1992977299), INT32_C( -692965094), INT32_C( 1390281908), INT32_C( 1056328531), INT32_C( 448471511), INT32_C( 676963201), INT32_C( -471721601)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C(-1725634605), INT32_C(-1992977299), INT32_C( -692965094), INT32_C( 1390281908), INT32_C( 1056328531), INT32_C( 448471511), INT32_C( 676963201), INT32_C( -471721601)) }, { simde_mm256_set_epi32(INT32_C( 868040614), INT32_C( -210101311), INT32_C( 1711471891), INT32_C( -87087404), INT32_C(-1569971706), INT32_C(-1573190685), INT32_C( 1472943366), INT32_C(-1700287741)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 868040614), INT32_C( -210101311), INT32_C( 1711471891), INT32_C( -87087404), INT32_C(-1569971706), INT32_C(-1573190685), INT32_C( 1472943366), INT32_C(-1700287741)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_castsi256_si512(test_vec[i].a); r=simde_mm512_maskz_mov_epi32(UINT8_C(255), r); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_castps_si512(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C( 1318886849), INT32_C( -963615992), INT32_C( 1168255165), INT32_C(-1910220907), INT32_C( -362848940), INT32_C( 1228894571), INT32_C( -604141281), INT32_C( -310556576), INT32_C( -195291453), INT32_C(-1762187610), INT32_C( 785462248), INT32_C(-1654799886), INT32_C( 443157727), INT32_C( 291457152), INT32_C( -673220379), INT32_C( 1588648097)), simde_mm512_set_epi32(INT32_C( 1318886849), INT32_C( -963615992), INT32_C( 1168255165), INT32_C(-1910220907), INT32_C( -362848940), INT32_C( 1228894571), INT32_C( -604141281), INT32_C( -310556576), INT32_C( -195291453), INT32_C(-1762187610), INT32_C( 785462248), INT32_C(-1654799886), INT32_C( 443157727), INT32_C( 291457152), INT32_C( -673220379), INT32_C( 1588648097)) }, { simde_mm512_set_epi32(INT32_C( 388365668), INT32_C( 1922479678), INT32_C(-1025869072), INT32_C( 1053113602), INT32_C( -948060413), INT32_C( 1300260897), INT32_C( 723694906), INT32_C( 1579982945), INT32_C( 551168292), INT32_C( 2117629385), INT32_C( 1278333888), INT32_C(-1219831740), INT32_C( 1012175805), INT32_C(-2142193359), INT32_C(-1234937614), INT32_C( 296489222)), simde_mm512_set_epi32(INT32_C( 388365668), INT32_C( 1922479678), INT32_C(-1025869072), INT32_C( 1053113602), INT32_C( -948060413), INT32_C( 1300260897), INT32_C( 723694906), INT32_C( 1579982945), INT32_C( 551168292), INT32_C( 2117629385), INT32_C( 1278333888), INT32_C(-1219831740), INT32_C( 1012175805), INT32_C(-2142193359), INT32_C(-1234937614), INT32_C( 296489222)) }, { simde_mm512_set_epi32(INT32_C( 1558252976), INT32_C( -896391163), INT32_C( -282653026), INT32_C( -261252112), INT32_C(-1886684518), INT32_C( -792082029), INT32_C(-1821569388), INT32_C( 452082945), INT32_C( 1996286767), INT32_C( -816391022), INT32_C( 52318481), INT32_C( -732357064), INT32_C(-1730294171), INT32_C( 1249707888), INT32_C( 1882801706), INT32_C( -348126448)), simde_mm512_set_epi32(INT32_C( 1558252976), INT32_C( -896391163), INT32_C( -282653026), INT32_C( -261252112), INT32_C(-1886684518), INT32_C( -792082029), INT32_C(-1821569388), INT32_C( 452082945), INT32_C( 1996286767), INT32_C( -816391022), INT32_C( 52318481), INT32_C( -732357064), INT32_C(-1730294171), INT32_C( 1249707888), INT32_C( 1882801706), INT32_C( -348126448)) }, { simde_mm512_set_epi32(INT32_C(-1306007403), INT32_C( -14823844), INT32_C( 68140915), INT32_C(-1279691583), INT32_C( -366833324), INT32_C( 1029809788), INT32_C( 398169753), INT32_C( 1644631001), INT32_C(-1495643320), INT32_C( -349662299), INT32_C( 337000079), INT32_C(-1599869755), INT32_C( -326525511), INT32_C( 2145777358), INT32_C( -671246537), INT32_C(-1483766126)), simde_mm512_set_epi32(INT32_C(-1306007403), INT32_C( -14823844), INT32_C( 68140915), INT32_C(-1279691583), INT32_C( -366833324), INT32_C( 1029809788), INT32_C( 398169753), INT32_C( 1644631001), INT32_C(-1495643320), INT32_C( -349662299), INT32_C( 337000079), INT32_C(-1599869755), INT32_C( -326525511), INT32_C( 2145777358), INT32_C( -671246537), INT32_C(-1483766126)) }, { simde_mm512_set_epi32(INT32_C( 1306718049), INT32_C( -481786167), INT32_C( 1036658403), INT32_C(-1275842496), INT32_C(-1384128337), INT32_C( 1110346848), INT32_C( 1633808121), INT32_C( 818679772), INT32_C( 528120539), INT32_C( 1454269875), INT32_C(-1309905851), INT32_C( 195834431), INT32_C( 1710057355), INT32_C(-1286641516), INT32_C( -825070396), INT32_C(-1314661204)), simde_mm512_set_epi32(INT32_C( 1306718049), INT32_C( -481786167), INT32_C( 1036658403), INT32_C(-1275842496), INT32_C(-1384128337), INT32_C( 1110346848), INT32_C( 1633808121), INT32_C( 818679772), INT32_C( 528120539), INT32_C( 1454269875), INT32_C(-1309905851), INT32_C( 195834431), INT32_C( 1710057355), INT32_C(-1286641516), INT32_C( -825070396), INT32_C(-1314661204)) }, { simde_mm512_set_epi32(INT32_C( 897666230), INT32_C(-1247200387), INT32_C(-1594960612), INT32_C( 784358708), INT32_C( -639715103), INT32_C( 1722588202), INT32_C( -957042730), INT32_C( 1748493888), INT32_C(-1424605467), INT32_C( 1715503797), INT32_C(-1877902145), INT32_C( 1433291829), INT32_C(-1672795293), INT32_C( -125523250), INT32_C( 817060741), INT32_C(-1940108057)), simde_mm512_set_epi32(INT32_C( 897666230), INT32_C(-1247200387), INT32_C(-1594960612), INT32_C( 784358708), INT32_C( -639715103), INT32_C( 1722588202), INT32_C( -957042730), INT32_C( 1748493888), INT32_C(-1424605467), INT32_C( 1715503797), INT32_C(-1877902145), INT32_C( 1433291829), INT32_C(-1672795293), INT32_C( -125523250), INT32_C( 817060741), INT32_C(-1940108057)) }, { simde_mm512_set_epi32(INT32_C( -771897847), INT32_C( 51088448), INT32_C( 731748986), INT32_C(-2010019631), INT32_C(-1805256328), INT32_C( -982668321), INT32_C(-1986983933), INT32_C( -806537017), INT32_C( 2103732941), INT32_C(-1950119891), INT32_C( -262758582), INT32_C( -704905824), INT32_C( -298698020), INT32_C( -290664422), INT32_C( -169474404), INT32_C( -381923585)), simde_mm512_set_epi32(INT32_C( -771897847), INT32_C( 51088448), INT32_C( 731748986), INT32_C(-2010019631), INT32_C(-1805256328), INT32_C( -982668321), INT32_C(-1986983933), INT32_C( -806537017), INT32_C( 2103732941), INT32_C(-1950119891), INT32_C( -262758582), INT32_C( -704905824), INT32_C( -298698020), INT32_C( -290664422), INT32_C( -169474404), INT32_C( -381923585)) }, { simde_mm512_set_epi32(INT32_C(-1424890095), INT32_C( -662922774), INT32_C( 1172732731), INT32_C( 220940559), INT32_C( 736508018), INT32_C(-1201335155), INT32_C( -747976663), INT32_C( 1864271935), INT32_C( 1893933430), INT32_C( 214467364), INT32_C(-1230640603), INT32_C(-1529352277), INT32_C( 301681975), INT32_C( -86355089), INT32_C( 1945634979), INT32_C( 1310017249)), simde_mm512_set_epi32(INT32_C(-1424890095), INT32_C( -662922774), INT32_C( 1172732731), INT32_C( 220940559), INT32_C( 736508018), INT32_C(-1201335155), INT32_C( -747976663), INT32_C( 1864271935), INT32_C( 1893933430), INT32_C( 214467364), INT32_C(-1230640603), INT32_C(-1529352277), INT32_C( 301681975), INT32_C( -86355089), INT32_C( 1945634979), INT32_C( 1310017249)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_castps_si512(simde_mm512_castsi512_ps(test_vec[i].a)); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_castps128_ps512(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m512 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -124.41), SIMDE_FLOAT32_C( 994.42), SIMDE_FLOAT32_C( -888.56), SIMDE_FLOAT32_C( -241.67)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -124.41), SIMDE_FLOAT32_C( 994.42), SIMDE_FLOAT32_C( -888.56), SIMDE_FLOAT32_C( -241.67)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -472.63), SIMDE_FLOAT32_C( -923.61), SIMDE_FLOAT32_C( -185.31), SIMDE_FLOAT32_C( 531.85)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -472.63), SIMDE_FLOAT32_C( -923.61), SIMDE_FLOAT32_C( -185.31), SIMDE_FLOAT32_C( 531.85)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -636.73), SIMDE_FLOAT32_C( -810.98), SIMDE_FLOAT32_C( -952.52), SIMDE_FLOAT32_C( 896.08)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -636.73), SIMDE_FLOAT32_C( -810.98), SIMDE_FLOAT32_C( -952.52), SIMDE_FLOAT32_C( 896.08)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 142.17), SIMDE_FLOAT32_C( -205.03), SIMDE_FLOAT32_C( -269.17), SIMDE_FLOAT32_C( -426.17)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 142.17), SIMDE_FLOAT32_C( -205.03), SIMDE_FLOAT32_C( -269.17), SIMDE_FLOAT32_C( -426.17)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 149.14), SIMDE_FLOAT32_C( -672.83), SIMDE_FLOAT32_C( -162.76), SIMDE_FLOAT32_C( -647.13)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 149.14), SIMDE_FLOAT32_C( -672.83), SIMDE_FLOAT32_C( -162.76), SIMDE_FLOAT32_C( -647.13)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -915.60), SIMDE_FLOAT32_C( 75.79), SIMDE_FLOAT32_C( 831.71), SIMDE_FLOAT32_C( 144.92)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -915.60), SIMDE_FLOAT32_C( 75.79), SIMDE_FLOAT32_C( 831.71), SIMDE_FLOAT32_C( 144.92)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -748.69), SIMDE_FLOAT32_C( -492.84), SIMDE_FLOAT32_C( -132.48), SIMDE_FLOAT32_C( -765.52)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -748.69), SIMDE_FLOAT32_C( -492.84), SIMDE_FLOAT32_C( -132.48), SIMDE_FLOAT32_C( -765.52)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 346.51), SIMDE_FLOAT32_C( 297.54), SIMDE_FLOAT32_C( 196.93), SIMDE_FLOAT32_C( -729.98)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 346.51), SIMDE_FLOAT32_C( 297.54), SIMDE_FLOAT32_C( 196.93), SIMDE_FLOAT32_C( -729.98)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_castps128_ps512(test_vec[i].a); r=simde_mm512_maskz_mov_ps(UINT8_C(15), r); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_castps256_ps512(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m512 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( -144.51), SIMDE_FLOAT32_C( 522.85), SIMDE_FLOAT32_C( 259.94), SIMDE_FLOAT32_C( 889.02), SIMDE_FLOAT32_C( 47.86), SIMDE_FLOAT32_C( 181.02), SIMDE_FLOAT32_C( 389.89), SIMDE_FLOAT32_C( -27.36)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -144.51), SIMDE_FLOAT32_C( 522.85), SIMDE_FLOAT32_C( 259.94), SIMDE_FLOAT32_C( 889.02), SIMDE_FLOAT32_C( 47.86), SIMDE_FLOAT32_C( 181.02), SIMDE_FLOAT32_C( 389.89), SIMDE_FLOAT32_C( -27.36)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -477.70), SIMDE_FLOAT32_C( -298.40), SIMDE_FLOAT32_C( -730.62), SIMDE_FLOAT32_C( 379.73), SIMDE_FLOAT32_C( 223.22), SIMDE_FLOAT32_C( -917.34), SIMDE_FLOAT32_C( -268.46), SIMDE_FLOAT32_C( -361.61)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -477.70), SIMDE_FLOAT32_C( -298.40), SIMDE_FLOAT32_C( -730.62), SIMDE_FLOAT32_C( 379.73), SIMDE_FLOAT32_C( 223.22), SIMDE_FLOAT32_C( -917.34), SIMDE_FLOAT32_C( -268.46), SIMDE_FLOAT32_C( -361.61)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 995.68), SIMDE_FLOAT32_C( 377.56), SIMDE_FLOAT32_C( -263.10), SIMDE_FLOAT32_C( 880.58), SIMDE_FLOAT32_C( 61.47), SIMDE_FLOAT32_C( 993.95), SIMDE_FLOAT32_C( -45.95), SIMDE_FLOAT32_C( -445.12)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 995.68), SIMDE_FLOAT32_C( 377.56), SIMDE_FLOAT32_C( -263.10), SIMDE_FLOAT32_C( 880.58), SIMDE_FLOAT32_C( 61.47), SIMDE_FLOAT32_C( 993.95), SIMDE_FLOAT32_C( -45.95), SIMDE_FLOAT32_C( -445.12)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 391.67), SIMDE_FLOAT32_C( -695.88), SIMDE_FLOAT32_C( 312.22), SIMDE_FLOAT32_C( 389.64), SIMDE_FLOAT32_C( 81.09), SIMDE_FLOAT32_C( 745.89), SIMDE_FLOAT32_C( -178.17), SIMDE_FLOAT32_C( -269.92)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 391.67), SIMDE_FLOAT32_C( -695.88), SIMDE_FLOAT32_C( 312.22), SIMDE_FLOAT32_C( 389.64), SIMDE_FLOAT32_C( 81.09), SIMDE_FLOAT32_C( 745.89), SIMDE_FLOAT32_C( -178.17), SIMDE_FLOAT32_C( -269.92)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 498.47), SIMDE_FLOAT32_C( -985.97), SIMDE_FLOAT32_C( 802.71), SIMDE_FLOAT32_C( -28.60), SIMDE_FLOAT32_C( -781.89), SIMDE_FLOAT32_C( -410.06), SIMDE_FLOAT32_C( -602.74), SIMDE_FLOAT32_C( -353.26)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 498.47), SIMDE_FLOAT32_C( -985.97), SIMDE_FLOAT32_C( 802.71), SIMDE_FLOAT32_C( -28.60), SIMDE_FLOAT32_C( -781.89), SIMDE_FLOAT32_C( -410.06), SIMDE_FLOAT32_C( -602.74), SIMDE_FLOAT32_C( -353.26)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 265.34), SIMDE_FLOAT32_C( -587.26), SIMDE_FLOAT32_C( -801.19), SIMDE_FLOAT32_C( -4.56), SIMDE_FLOAT32_C( -900.49), SIMDE_FLOAT32_C( 391.40), SIMDE_FLOAT32_C( -416.15), SIMDE_FLOAT32_C( -286.27)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 265.34), SIMDE_FLOAT32_C( -587.26), SIMDE_FLOAT32_C( -801.19), SIMDE_FLOAT32_C( -4.56), SIMDE_FLOAT32_C( -900.49), SIMDE_FLOAT32_C( 391.40), SIMDE_FLOAT32_C( -416.15), SIMDE_FLOAT32_C( -286.27)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 167.27), SIMDE_FLOAT32_C( -483.42), SIMDE_FLOAT32_C( 759.82), SIMDE_FLOAT32_C( -560.80), SIMDE_FLOAT32_C( -968.24), SIMDE_FLOAT32_C( -534.87), SIMDE_FLOAT32_C( -698.82), SIMDE_FLOAT32_C( -382.24)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 167.27), SIMDE_FLOAT32_C( -483.42), SIMDE_FLOAT32_C( 759.82), SIMDE_FLOAT32_C( -560.80), SIMDE_FLOAT32_C( -968.24), SIMDE_FLOAT32_C( -534.87), SIMDE_FLOAT32_C( -698.82), SIMDE_FLOAT32_C( -382.24)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -792.23), SIMDE_FLOAT32_C( 994.18), SIMDE_FLOAT32_C( -632.41), SIMDE_FLOAT32_C( 872.87), SIMDE_FLOAT32_C( -406.41), SIMDE_FLOAT32_C( 989.00), SIMDE_FLOAT32_C( 781.89), SIMDE_FLOAT32_C( 609.26)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -792.23), SIMDE_FLOAT32_C( 994.18), SIMDE_FLOAT32_C( -632.41), SIMDE_FLOAT32_C( 872.87), SIMDE_FLOAT32_C( -406.41), SIMDE_FLOAT32_C( 989.00), SIMDE_FLOAT32_C( 781.89), SIMDE_FLOAT32_C( 609.26)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_castps256_ps512(test_vec[i].a); r=simde_mm512_maskz_mov_ps(UINT8_C(255), r); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_castsi512_si128(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m128i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C( 1658575222), INT32_C(-1117261553), INT32_C(-1839997259), INT32_C( -299852262), INT32_C( -856912374), INT32_C( 2142936567), INT32_C( -954684084), INT32_C( 1657017766), INT32_C( -348123015), INT32_C( -101609698), INT32_C( 554693435), INT32_C(-1533582435), INT32_C( -400642696), INT32_C( 1776297700), INT32_C( 1392524409), INT32_C( 398537104)), simde_mm_set_epi32(INT32_C( -400642696), INT32_C( 1776297700), INT32_C( 1392524409), INT32_C( 398537104)) }, { simde_mm512_set_epi32(INT32_C( 776372316), INT32_C( 831674978), INT32_C( 55416914), INT32_C( 243273534), INT32_C( -792877249), INT32_C( -771344813), INT32_C( -310546031), INT32_C(-2107743643), INT32_C(-2024392408), INT32_C(-1627000847), INT32_C( 2079816508), INT32_C( 371701407), INT32_C(-1936589526), INT32_C(-1276131185), INT32_C(-2091159003), INT32_C( 424652427)), simde_mm_set_epi32(INT32_C(-1936589526), INT32_C(-1276131185), INT32_C(-2091159003), INT32_C( 424652427)) }, { simde_mm512_set_epi32(INT32_C( 1173201274), INT32_C( 2136038971), INT32_C( 1938580455), INT32_C( 1481503254), INT32_C( -900987494), INT32_C( 381103974), INT32_C( -844512348), INT32_C( 585017509), INT32_C( 711603246), INT32_C(-1519792959), INT32_C( 1933920466), INT32_C(-1698769431), INT32_C(-1782976050), INT32_C(-1748575141), INT32_C(-1460360861), INT32_C( -334769088)), simde_mm_set_epi32(INT32_C(-1782976050), INT32_C(-1748575141), INT32_C(-1460360861), INT32_C( -334769088)) }, { simde_mm512_set_epi32(INT32_C( 1804169520), INT32_C(-1929398298), INT32_C( 1527159056), INT32_C( 779300869), INT32_C(-1718101462), INT32_C( -324959902), INT32_C( 356284621), INT32_C( 259973358), INT32_C( -85521098), INT32_C( 1509969945), INT32_C( -146300380), INT32_C(-1326146169), INT32_C( -222527512), INT32_C( 2102498956), INT32_C( 21770579), INT32_C( 799947456)), simde_mm_set_epi32(INT32_C( -222527512), INT32_C( 2102498956), INT32_C( 21770579), INT32_C( 799947456)) }, { simde_mm512_set_epi32(INT32_C( 21446459), INT32_C( -874117397), INT32_C(-1237626592), INT32_C(-1776987073), INT32_C(-1868633440), INT32_C( 1756862535), INT32_C( 992623093), INT32_C( 2049114875), INT32_C(-1780158792), INT32_C( -396995291), INT32_C( 1019180338), INT32_C(-1532238028), INT32_C( 1367863842), INT32_C(-1092466184), INT32_C( 1828179807), INT32_C( -163821277)), simde_mm_set_epi32(INT32_C( 1367863842), INT32_C(-1092466184), INT32_C( 1828179807), INT32_C( -163821277)) }, { simde_mm512_set_epi32(INT32_C( 430786946), INT32_C( -784387301), INT32_C(-1254472927), INT32_C(-1743072668), INT32_C( 651286019), INT32_C(-1388487554), INT32_C( 757390683), INT32_C( 1947676255), INT32_C( 969518281), INT32_C( -72978072), INT32_C( 760758773), INT32_C( 1573392247), INT32_C( 865448841), INT32_C( 1831180971), INT32_C(-1639739355), INT32_C( 75957511)), simde_mm_set_epi32(INT32_C( 865448841), INT32_C( 1831180971), INT32_C(-1639739355), INT32_C( 75957511)) }, { simde_mm512_set_epi32(INT32_C( 2143921197), INT32_C( -566618658), INT32_C( -232811985), INT32_C( -646802339), INT32_C(-1178364835), INT32_C( 1276977735), INT32_C( 767086016), INT32_C( -313594533), INT32_C(-1508762742), INT32_C( 64124631), INT32_C(-1825241034), INT32_C( 688272909), INT32_C( 217977439), INT32_C( 193086834), INT32_C( -334037547), INT32_C( -96078706)), simde_mm_set_epi32(INT32_C( 217977439), INT32_C( 193086834), INT32_C( -334037547), INT32_C( -96078706)) }, { simde_mm512_set_epi32(INT32_C(-1446801805), INT32_C( 1701962052), INT32_C( 2110825600), INT32_C( 1003088425), INT32_C(-1369466226), INT32_C( -588712121), INT32_C(-1911969632), INT32_C( 1166459977), INT32_C(-1912318103), INT32_C( -539596217), INT32_C(-1472577858), INT32_C( -435893517), INT32_C( 1340330175), INT32_C( 1762005905), INT32_C(-1553204174), INT32_C( -839776889)), simde_mm_set_epi32(INT32_C( 1340330175), INT32_C( 1762005905), INT32_C(-1553204174), INT32_C( -839776889)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm512_castsi512_si128(test_vec[i].a); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_castsi512_si256(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m256i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C( 1313023361), INT32_C( -284143420), INT32_C( 762022716), INT32_C( -109476439), INT32_C( -377196873), INT32_C( 289021876), INT32_C(-1835156104), INT32_C(-2032178077), INT32_C(-1821500948), INT32_C( 676317044), INT32_C( 1400280404), INT32_C(-1176819357), INT32_C( -771867753), INT32_C(-1499090744), INT32_C( 845761822), INT32_C( 1051767340)), simde_mm256_set_epi32(INT32_C(-1821500948), INT32_C( 676317044), INT32_C( 1400280404), INT32_C(-1176819357), INT32_C( -771867753), INT32_C(-1499090744), INT32_C( 845761822), INT32_C( 1051767340)) }, { simde_mm512_set_epi32(INT32_C( 584849517), INT32_C( 1957310042), INT32_C( 1025237717), INT32_C( -771085485), INT32_C( 578687068), INT32_C( 23449370), INT32_C(-1748604352), INT32_C( 1528872918), INT32_C( 331610618), INT32_C( 1589449489), INT32_C(-1723663783), INT32_C( -274890341), INT32_C( 1614271470), INT32_C(-1087921179), INT32_C( 318041946), INT32_C(-1241918784)), simde_mm256_set_epi32(INT32_C( 331610618), INT32_C( 1589449489), INT32_C(-1723663783), INT32_C( -274890341), INT32_C( 1614271470), INT32_C(-1087921179), INT32_C( 318041946), INT32_C(-1241918784)) }, { simde_mm512_set_epi32(INT32_C(-1852610712), INT32_C( 1069895831), INT32_C(-1772412775), INT32_C( 1674180593), INT32_C( 1091612022), INT32_C( 2100501546), INT32_C( 179298125), INT32_C( -696375983), INT32_C( 697782088), INT32_C( 360978520), INT32_C( -569372444), INT32_C(-1688535258), INT32_C( 857372618), INT32_C( 1720533351), INT32_C( 174096645), INT32_C( -27592678)), simde_mm256_set_epi32(INT32_C( 697782088), INT32_C( 360978520), INT32_C( -569372444), INT32_C(-1688535258), INT32_C( 857372618), INT32_C( 1720533351), INT32_C( 174096645), INT32_C( -27592678)) }, { simde_mm512_set_epi32(INT32_C( 1588790835), INT32_C( 1641509494), INT32_C(-1255607211), INT32_C( 325546446), INT32_C( -545478700), INT32_C( 1405876195), INT32_C(-1090548279), INT32_C(-1934286399), INT32_C(-1145918593), INT32_C( -667441949), INT32_C( 804839434), INT32_C( 1326022329), INT32_C( 307496614), INT32_C(-1108181850), INT32_C( 1980353111), INT32_C( 2112031083)), simde_mm256_set_epi32(INT32_C(-1145918593), INT32_C( -667441949), INT32_C( 804839434), INT32_C( 1326022329), INT32_C( 307496614), INT32_C(-1108181850), INT32_C( 1980353111), INT32_C( 2112031083)) }, { simde_mm512_set_epi32(INT32_C(-1042067236), INT32_C(-1304341967), INT32_C( 157202478), INT32_C( 335492980), INT32_C( 1721471627), INT32_C( -970374929), INT32_C(-1215103856), INT32_C( 804236438), INT32_C( 763203116), INT32_C( 1650455375), INT32_C( 191870396), INT32_C(-1509651897), INT32_C( 1356692638), INT32_C(-1388072975), INT32_C(-1822839305), INT32_C( 511824893)), simde_mm256_set_epi32(INT32_C( 763203116), INT32_C( 1650455375), INT32_C( 191870396), INT32_C(-1509651897), INT32_C( 1356692638), INT32_C(-1388072975), INT32_C(-1822839305), INT32_C( 511824893)) }, { simde_mm512_set_epi32(INT32_C( -321038022), INT32_C( 1580565478), INT32_C( 1887212628), INT32_C( -321448210), INT32_C( 1646995861), INT32_C( 223122732), INT32_C(-1757872907), INT32_C(-1745139814), INT32_C( 1574697064), INT32_C( 1593370323), INT32_C( 1019611869), INT32_C( 1148772416), INT32_C( 41212650), INT32_C( 1180707726), INT32_C(-1448405590), INT32_C( -480086797)), simde_mm256_set_epi32(INT32_C( 1574697064), INT32_C( 1593370323), INT32_C( 1019611869), INT32_C( 1148772416), INT32_C( 41212650), INT32_C( 1180707726), INT32_C(-1448405590), INT32_C( -480086797)) }, { simde_mm512_set_epi32(INT32_C(-2123925649), INT32_C(-1300783990), INT32_C( 1491522954), INT32_C(-1497225833), INT32_C( 264593112), INT32_C(-1577295575), INT32_C( 1721102182), INT32_C( -406919611), INT32_C( -248074414), INT32_C(-1644442090), INT32_C(-1586232899), INT32_C( -62204050), INT32_C( 356135618), INT32_C(-1239722051), INT32_C( 1765867002), INT32_C(-2007643147)), simde_mm256_set_epi32(INT32_C( -248074414), INT32_C(-1644442090), INT32_C(-1586232899), INT32_C( -62204050), INT32_C( 356135618), INT32_C(-1239722051), INT32_C( 1765867002), INT32_C(-2007643147)) }, { simde_mm512_set_epi32(INT32_C( 1712658433), INT32_C( 1782026188), INT32_C( 1026238577), INT32_C( 999831740), INT32_C( 1890262543), INT32_C( -282267234), INT32_C( -314081065), INT32_C( 1258071145), INT32_C( -96847849), INT32_C( -33628993), INT32_C( -700755553), INT32_C( 1395877145), INT32_C( 1943621426), INT32_C( 1606414493), INT32_C( 234520793), INT32_C( 1932887352)), simde_mm256_set_epi32(INT32_C( -96847849), INT32_C( -33628993), INT32_C( -700755553), INT32_C( 1395877145), INT32_C( 1943621426), INT32_C( 1606414493), INT32_C( 234520793), INT32_C( 1932887352)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm512_castsi512_si256(test_vec[i].a); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_castpd512_pd128) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_castpd512_pd256) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_castpd128_pd512) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_castpd256_pd512) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_castpd_si512) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_castps_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_castpd_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_castps_si512) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_castps128_ps512) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_castps256_ps512) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_castps512_ps128) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_castps512_ps256) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_castsi512_si128) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_castsi128_si512) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_castsi512_si256) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_castsi256_si512) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_castsi512_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_castsi512_pd) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/cmp.c000066400000000000000000001160751400333146700162110ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur */ #define SIMDE_TEST_X86_AVX512_INSN cmp #include #include #include #if !defined(SIMDE_NATIVE_ALIASES_TESTING) static int test_simde_mm512_cmp_ps_mask (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 b[16]; const int imm8; const simde__mmask16 r; } test_vec[] = { { { SIMDE_FLOAT32_C( 662.74), SIMDE_FLOAT32_C( -296.50), SIMDE_FLOAT32_C( 51.89), SIMDE_FLOAT32_C( -877.47), SIMDE_FLOAT32_C( 872.85), SIMDE_FLOAT32_C( -965.85), SIMDE_FLOAT32_C( -688.43), SIMDE_FLOAT32_C( 580.42), SIMDE_FLOAT32_C( 433.23), SIMDE_FLOAT32_C( 579.01), SIMDE_FLOAT32_C( 251.51), SIMDE_FLOAT32_C( -341.82), SIMDE_FLOAT32_C( -712.66), SIMDE_FLOAT32_C( -533.80), SIMDE_FLOAT32_C( 415.61), SIMDE_FLOAT32_C( 350.44) }, { SIMDE_FLOAT32_C( -243.73), SIMDE_FLOAT32_C( -899.82), SIMDE_FLOAT32_C( -876.74), SIMDE_FLOAT32_C( 548.84), SIMDE_FLOAT32_C( 112.62), SIMDE_FLOAT32_C( 228.46), SIMDE_FLOAT32_C( 592.01), SIMDE_FLOAT32_C( -633.78), SIMDE_FLOAT32_C( 405.27), SIMDE_FLOAT32_C( 447.83), SIMDE_FLOAT32_C( -706.03), SIMDE_FLOAT32_C( 628.46), SIMDE_FLOAT32_C( -379.69), SIMDE_FLOAT32_C( 592.13), SIMDE_FLOAT32_C( 228.45), SIMDE_FLOAT32_C( -716.95) }, INT32_C( 0), UINT16_C( 0) }, { { SIMDE_FLOAT32_C( -704.37), SIMDE_FLOAT32_C( -719.66), SIMDE_FLOAT32_C( -594.42), SIMDE_FLOAT32_C( -831.52), SIMDE_FLOAT32_C( -685.51), SIMDE_FLOAT32_C( -282.86), SIMDE_FLOAT32_C( 748.90), SIMDE_FLOAT32_C( 747.73), SIMDE_FLOAT32_C( -703.85), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( -594.10), SIMDE_FLOAT32_C( -416.51), SIMDE_FLOAT32_C( 466.61), SIMDE_FLOAT32_C( 821.52), SIMDE_FLOAT32_C( 933.94), SIMDE_FLOAT32_C( -777.12) }, { SIMDE_FLOAT32_C( 921.70), SIMDE_FLOAT32_C( -942.80), SIMDE_FLOAT32_C( 771.72), SIMDE_FLOAT32_C( 34.32), SIMDE_FLOAT32_C( 285.66), SIMDE_FLOAT32_C( 363.73), SIMDE_FLOAT32_C( 400.54), SIMDE_FLOAT32_C( -309.07), SIMDE_FLOAT32_C( -188.44), SIMDE_FLOAT32_C( 694.50), SIMDE_FLOAT32_C( -680.61), SIMDE_FLOAT32_C( 431.87), SIMDE_FLOAT32_C( 286.63), SIMDE_FLOAT32_C( 547.84), SIMDE_FLOAT32_C( 714.92), SIMDE_FLOAT32_C( 582.26) }, INT32_C( 1), UINT16_C(35645) }, { { SIMDE_FLOAT32_C( 828.18), SIMDE_FLOAT32_C( -879.50), SIMDE_FLOAT32_C( 750.74), SIMDE_FLOAT32_C( -857.33), SIMDE_FLOAT32_C( -162.36), SIMDE_FLOAT32_C( 499.63), SIMDE_FLOAT32_C( 890.40), SIMDE_FLOAT32_C( 133.79), SIMDE_FLOAT32_C( -499.96), SIMDE_FLOAT32_C( -703.70), SIMDE_FLOAT32_C( 717.28), SIMDE_FLOAT32_C( 966.65), SIMDE_FLOAT32_C( -882.18), SIMDE_FLOAT32_C( 651.22), SIMDE_FLOAT32_C( -810.47), SIMDE_FLOAT32_C( -960.48) }, { SIMDE_FLOAT32_C( 708.42), SIMDE_FLOAT32_C( 961.25), SIMDE_FLOAT32_C( 73.84), SIMDE_FLOAT32_C( -5.91), SIMDE_FLOAT32_C( 324.99), SIMDE_FLOAT32_C( -525.63), SIMDE_FLOAT32_C( 685.02), SIMDE_FLOAT32_C( -863.45), SIMDE_FLOAT32_C( -831.12), SIMDE_FLOAT32_C( -995.59), SIMDE_FLOAT32_C( 568.42), SIMDE_FLOAT32_C( 455.51), SIMDE_FLOAT32_C( 552.25), SIMDE_FLOAT32_C( 283.34), SIMDE_FLOAT32_C( 37.76), SIMDE_FLOAT32_C( 380.43) }, INT32_C( 2), UINT16_C(53274) }, { { SIMDE_FLOAT32_C( 403.83), SIMDE_FLOAT32_C( -211.50), SIMDE_FLOAT32_C( 523.10), SIMDE_FLOAT32_C( -758.53), SIMDE_FLOAT32_C( -711.87), SIMDE_FLOAT32_C( 413.50), SIMDE_FLOAT32_C( 375.26), SIMDE_FLOAT32_C( -211.83), SIMDE_FLOAT32_C( 709.80), SIMDE_FLOAT32_C( 92.55), SIMDE_FLOAT32_C( -245.18), SIMDE_FLOAT32_C( 827.62), SIMDE_FLOAT32_C( -256.23), SIMDE_FLOAT32_C( -55.64), SIMDE_FLOAT32_C( 867.14), SIMDE_FLOAT32_C( -547.81) }, { SIMDE_FLOAT32_C( -94.39), SIMDE_FLOAT32_C( -59.02), SIMDE_FLOAT32_C( 446.28), SIMDE_FLOAT32_C( -769.41), SIMDE_FLOAT32_C( 415.35), SIMDE_FLOAT32_C( 131.30), SIMDE_FLOAT32_C( -632.86), SIMDE_FLOAT32_C( 584.23), SIMDE_FLOAT32_C( 135.71), SIMDE_FLOAT32_C( 935.56), SIMDE_FLOAT32_C( 39.74), SIMDE_FLOAT32_C( -312.04), SIMDE_FLOAT32_C( 218.89), SIMDE_FLOAT32_C( -922.50), SIMDE_FLOAT32_C( -931.62), SIMDE_FLOAT32_C( -377.28) }, INT32_C( 3), UINT16_C( 0) }, { { SIMDE_FLOAT32_C( -134.00), SIMDE_FLOAT32_C( 591.48), SIMDE_FLOAT32_C( -135.81), SIMDE_FLOAT32_C( 154.13), SIMDE_FLOAT32_C( 4.98), SIMDE_FLOAT32_C( -760.55), SIMDE_FLOAT32_C( 942.30), SIMDE_FLOAT32_C( -285.22), SIMDE_FLOAT32_C( 332.00), SIMDE_FLOAT32_C( -302.88), SIMDE_FLOAT32_C( -457.60), SIMDE_FLOAT32_C( -924.23), SIMDE_FLOAT32_C( 641.48), SIMDE_FLOAT32_C( -590.45), SIMDE_FLOAT32_C( -472.04), SIMDE_FLOAT32_C( -452.91) }, { SIMDE_FLOAT32_C( 350.53), SIMDE_FLOAT32_C( 974.23), SIMDE_FLOAT32_C( -222.32), SIMDE_FLOAT32_C( -234.12), SIMDE_FLOAT32_C( 105.53), SIMDE_FLOAT32_C( 144.82), SIMDE_FLOAT32_C( -649.88), SIMDE_FLOAT32_C( -758.76), SIMDE_FLOAT32_C( 80.38), SIMDE_FLOAT32_C( 389.85), SIMDE_FLOAT32_C( -70.81), SIMDE_FLOAT32_C( -700.73), SIMDE_FLOAT32_C( 467.35), SIMDE_FLOAT32_C( -2.42), SIMDE_FLOAT32_C( -78.01), SIMDE_FLOAT32_C( -666.65) }, INT32_C( 4), UINT16_MAX }, { { SIMDE_FLOAT32_C( -410.94), SIMDE_FLOAT32_C( 786.18), SIMDE_FLOAT32_C( 487.47), SIMDE_FLOAT32_C( 594.03), SIMDE_FLOAT32_C( -974.37), SIMDE_FLOAT32_C( 429.77), SIMDE_FLOAT32_C( -691.18), SIMDE_FLOAT32_C( 357.63), SIMDE_FLOAT32_C( -873.11), SIMDE_FLOAT32_C( -148.78), SIMDE_FLOAT32_C( 433.40), SIMDE_FLOAT32_C( 768.37), SIMDE_FLOAT32_C( 260.77), SIMDE_FLOAT32_C( 961.36), SIMDE_FLOAT32_C( -684.54), SIMDE_FLOAT32_C( -388.70) }, { SIMDE_FLOAT32_C( 935.59), SIMDE_FLOAT32_C( 93.14), SIMDE_FLOAT32_C( 377.18), SIMDE_FLOAT32_C( 41.12), SIMDE_FLOAT32_C( -762.04), SIMDE_FLOAT32_C( 727.30), SIMDE_FLOAT32_C( 282.36), SIMDE_FLOAT32_C( 318.33), SIMDE_FLOAT32_C( 117.15), SIMDE_FLOAT32_C( -788.45), SIMDE_FLOAT32_C( 617.60), SIMDE_FLOAT32_C( -415.50), SIMDE_FLOAT32_C( 209.13), SIMDE_FLOAT32_C( -460.41), SIMDE_FLOAT32_C( -82.16), SIMDE_FLOAT32_C( 798.18) }, INT32_C( 5), UINT16_C(14990) }, { { SIMDE_FLOAT32_C( -674.23), SIMDE_FLOAT32_C( -594.68), SIMDE_FLOAT32_C( 392.22), SIMDE_FLOAT32_C( -648.60), SIMDE_FLOAT32_C( 835.09), SIMDE_FLOAT32_C( 701.04), SIMDE_FLOAT32_C( 709.03), SIMDE_FLOAT32_C( 961.98), SIMDE_FLOAT32_C( -447.74), SIMDE_FLOAT32_C( 142.43), SIMDE_FLOAT32_C( 730.35), SIMDE_FLOAT32_C( 813.03), SIMDE_FLOAT32_C( 103.79), SIMDE_FLOAT32_C( -954.19), SIMDE_FLOAT32_C( -575.67), SIMDE_FLOAT32_C( 39.38) }, { SIMDE_FLOAT32_C( 138.95), SIMDE_FLOAT32_C( 801.51), SIMDE_FLOAT32_C( -919.50), SIMDE_FLOAT32_C( 376.91), SIMDE_FLOAT32_C( 528.81), SIMDE_FLOAT32_C( 362.86), SIMDE_FLOAT32_C( -304.76), SIMDE_FLOAT32_C( -354.04), SIMDE_FLOAT32_C( 574.42), SIMDE_FLOAT32_C( -687.16), SIMDE_FLOAT32_C( 230.46), SIMDE_FLOAT32_C( -216.45), SIMDE_FLOAT32_C( -147.57), SIMDE_FLOAT32_C( -851.70), SIMDE_FLOAT32_C( -418.27), SIMDE_FLOAT32_C( 178.19) }, INT32_C( 6), UINT16_C( 7924) }, { { SIMDE_FLOAT32_C( -446.38), SIMDE_FLOAT32_C( 973.95), SIMDE_FLOAT32_C( 529.59), SIMDE_FLOAT32_C( -611.29), SIMDE_FLOAT32_C( 674.99), SIMDE_FLOAT32_C( 238.63), SIMDE_FLOAT32_C( -649.31), SIMDE_FLOAT32_C( -772.76), SIMDE_FLOAT32_C( -618.94), SIMDE_FLOAT32_C( -918.96), SIMDE_FLOAT32_C( -959.73), SIMDE_FLOAT32_C( 484.85), SIMDE_FLOAT32_C( -873.15), SIMDE_FLOAT32_C( -535.40), SIMDE_FLOAT32_C( -475.77), SIMDE_FLOAT32_C( 265.80) }, { SIMDE_FLOAT32_C( -733.89), SIMDE_FLOAT32_C( -395.27), SIMDE_FLOAT32_C( -357.30), SIMDE_FLOAT32_C( 794.92), SIMDE_FLOAT32_C( 967.60), SIMDE_FLOAT32_C( 337.94), SIMDE_FLOAT32_C( -559.13), SIMDE_FLOAT32_C( 542.02), SIMDE_FLOAT32_C( 650.78), SIMDE_FLOAT32_C( 671.33), SIMDE_FLOAT32_C( -674.44), SIMDE_FLOAT32_C( -496.79), SIMDE_FLOAT32_C( 819.63), SIMDE_FLOAT32_C( -92.71), SIMDE_FLOAT32_C( 681.40), SIMDE_FLOAT32_C( -626.75) }, INT32_C( 7), UINT16_MAX }, { { SIMDE_FLOAT32_C( -118.76), SIMDE_FLOAT32_C( 210.99), SIMDE_FLOAT32_C( -238.04), SIMDE_FLOAT32_C( -443.77), SIMDE_FLOAT32_C( -550.38), SIMDE_FLOAT32_C( 112.65), SIMDE_FLOAT32_C( -216.52), SIMDE_FLOAT32_C( -169.32), SIMDE_FLOAT32_C( 193.68), SIMDE_FLOAT32_C( -176.25), SIMDE_FLOAT32_C( -684.48), SIMDE_FLOAT32_C( 320.53), SIMDE_FLOAT32_C( 288.35), SIMDE_FLOAT32_C( -160.25), SIMDE_FLOAT32_C( -413.67), SIMDE_FLOAT32_C( 554.45) }, { SIMDE_FLOAT32_C( 444.49), SIMDE_FLOAT32_C( 229.03), SIMDE_FLOAT32_C( 349.37), SIMDE_FLOAT32_C( 412.09), SIMDE_FLOAT32_C( -433.02), SIMDE_FLOAT32_C( 790.25), SIMDE_FLOAT32_C( -45.90), SIMDE_FLOAT32_C( -782.24), SIMDE_FLOAT32_C( 461.58), SIMDE_FLOAT32_C( 279.66), SIMDE_FLOAT32_C( -279.03), SIMDE_FLOAT32_C( 281.21), SIMDE_FLOAT32_C( -813.04), SIMDE_FLOAT32_C( -597.63), SIMDE_FLOAT32_C( 654.46), SIMDE_FLOAT32_C( 68.20) }, INT32_C( 8), UINT16_C( 0) }, { { SIMDE_FLOAT32_C( 613.36), SIMDE_FLOAT32_C( -583.58), SIMDE_FLOAT32_C( 624.43), SIMDE_FLOAT32_C( -937.02), SIMDE_FLOAT32_C( 529.07), SIMDE_FLOAT32_C( -592.09), SIMDE_FLOAT32_C( -106.35), SIMDE_FLOAT32_C( -277.25), SIMDE_FLOAT32_C( 231.66), SIMDE_FLOAT32_C( 209.18), SIMDE_FLOAT32_C( -956.71), SIMDE_FLOAT32_C( -480.00), SIMDE_FLOAT32_C( -951.07), SIMDE_FLOAT32_C( -370.38), SIMDE_FLOAT32_C( -925.54), SIMDE_FLOAT32_C( 493.42) }, { SIMDE_FLOAT32_C( 858.65), SIMDE_FLOAT32_C( 423.83), SIMDE_FLOAT32_C( -94.50), SIMDE_FLOAT32_C( -574.37), SIMDE_FLOAT32_C( 214.07), SIMDE_FLOAT32_C( 859.61), SIMDE_FLOAT32_C( -356.61), SIMDE_FLOAT32_C( -324.35), SIMDE_FLOAT32_C( 139.27), SIMDE_FLOAT32_C( 364.36), SIMDE_FLOAT32_C( 956.86), SIMDE_FLOAT32_C( 326.23), SIMDE_FLOAT32_C( 766.72), SIMDE_FLOAT32_C( 611.33), SIMDE_FLOAT32_C( -605.57), SIMDE_FLOAT32_C( 380.08) }, INT32_C( 9), UINT16_C(32299) }, { { SIMDE_FLOAT32_C( -972.25), SIMDE_FLOAT32_C( -981.14), SIMDE_FLOAT32_C( 443.06), SIMDE_FLOAT32_C( 556.82), SIMDE_FLOAT32_C( -573.23), SIMDE_FLOAT32_C( -663.28), SIMDE_FLOAT32_C( -720.43), SIMDE_FLOAT32_C( 658.42), SIMDE_FLOAT32_C( 545.89), SIMDE_FLOAT32_C( -677.14), SIMDE_FLOAT32_C( -821.57), SIMDE_FLOAT32_C( 594.83), SIMDE_FLOAT32_C( -47.52), SIMDE_FLOAT32_C( -747.12), SIMDE_FLOAT32_C( 88.25), SIMDE_FLOAT32_C( -188.87) }, { SIMDE_FLOAT32_C( 676.71), SIMDE_FLOAT32_C( 993.75), SIMDE_FLOAT32_C( 236.76), SIMDE_FLOAT32_C( -109.21), SIMDE_FLOAT32_C( 853.36), SIMDE_FLOAT32_C( 880.15), SIMDE_FLOAT32_C( 566.44), SIMDE_FLOAT32_C( -7.37), SIMDE_FLOAT32_C( 244.51), SIMDE_FLOAT32_C( 523.30), SIMDE_FLOAT32_C( -681.15), SIMDE_FLOAT32_C( 11.24), SIMDE_FLOAT32_C( 134.63), SIMDE_FLOAT32_C( -286.72), SIMDE_FLOAT32_C( -608.68), SIMDE_FLOAT32_C( 162.38) }, INT32_C( 10), UINT16_C(46707) }, { { SIMDE_FLOAT32_C( -267.86), SIMDE_FLOAT32_C( 834.38), SIMDE_FLOAT32_C( -280.80), SIMDE_FLOAT32_C( 158.91), SIMDE_FLOAT32_C( -828.90), SIMDE_FLOAT32_C( -1.23), SIMDE_FLOAT32_C( -182.67), SIMDE_FLOAT32_C( 716.99), SIMDE_FLOAT32_C( 321.63), SIMDE_FLOAT32_C( -4.24), SIMDE_FLOAT32_C( 311.82), SIMDE_FLOAT32_C( -725.89), SIMDE_FLOAT32_C( 248.64), SIMDE_FLOAT32_C( -599.93), SIMDE_FLOAT32_C( 85.24), SIMDE_FLOAT32_C( -74.64) }, { SIMDE_FLOAT32_C( -606.18), SIMDE_FLOAT32_C( -677.99), SIMDE_FLOAT32_C( 816.14), SIMDE_FLOAT32_C( -752.82), SIMDE_FLOAT32_C( -797.84), SIMDE_FLOAT32_C( 382.58), SIMDE_FLOAT32_C( 239.80), SIMDE_FLOAT32_C( 446.68), SIMDE_FLOAT32_C( -94.12), SIMDE_FLOAT32_C( 558.66), SIMDE_FLOAT32_C( -542.09), SIMDE_FLOAT32_C( -959.49), SIMDE_FLOAT32_C( -728.06), SIMDE_FLOAT32_C( -150.77), SIMDE_FLOAT32_C( 202.89), SIMDE_FLOAT32_C( 4.08) }, INT32_C( 11), UINT16_C( 0) }, { { SIMDE_FLOAT32_C( -316.38), SIMDE_FLOAT32_C( 922.09), SIMDE_FLOAT32_C( -837.02), SIMDE_FLOAT32_C( -145.29), SIMDE_FLOAT32_C( -79.14), SIMDE_FLOAT32_C( -19.68), SIMDE_FLOAT32_C( -428.29), SIMDE_FLOAT32_C( -757.51), SIMDE_FLOAT32_C( 976.07), SIMDE_FLOAT32_C( 883.53), SIMDE_FLOAT32_C( -483.40), SIMDE_FLOAT32_C( 224.72), SIMDE_FLOAT32_C( -716.41), SIMDE_FLOAT32_C( 601.84), SIMDE_FLOAT32_C( -849.93), SIMDE_FLOAT32_C( -322.59) }, { SIMDE_FLOAT32_C( 923.85), SIMDE_FLOAT32_C( 966.22), SIMDE_FLOAT32_C( -75.41), SIMDE_FLOAT32_C( -873.98), SIMDE_FLOAT32_C( 348.80), SIMDE_FLOAT32_C( -835.61), SIMDE_FLOAT32_C( 572.69), SIMDE_FLOAT32_C( -745.32), SIMDE_FLOAT32_C( 723.05), SIMDE_FLOAT32_C( -969.40), SIMDE_FLOAT32_C( -704.81), SIMDE_FLOAT32_C( 994.98), SIMDE_FLOAT32_C( -120.16), SIMDE_FLOAT32_C( 498.08), SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( 563.45) }, INT32_C( 12), UINT16_MAX }, { { SIMDE_FLOAT32_C( 420.16), SIMDE_FLOAT32_C( 162.04), SIMDE_FLOAT32_C( -581.83), SIMDE_FLOAT32_C( -658.98), SIMDE_FLOAT32_C( -857.64), SIMDE_FLOAT32_C( -10.13), SIMDE_FLOAT32_C( -416.49), SIMDE_FLOAT32_C( -881.56), SIMDE_FLOAT32_C( -126.60), SIMDE_FLOAT32_C( 100.10), SIMDE_FLOAT32_C( 343.15), SIMDE_FLOAT32_C( 156.99), SIMDE_FLOAT32_C( -298.05), SIMDE_FLOAT32_C( 493.23), SIMDE_FLOAT32_C( 834.41), SIMDE_FLOAT32_C( -374.20) }, { SIMDE_FLOAT32_C( 459.44), SIMDE_FLOAT32_C( -241.01), SIMDE_FLOAT32_C( -248.18), SIMDE_FLOAT32_C( -191.76), SIMDE_FLOAT32_C( -76.61), SIMDE_FLOAT32_C( -675.49), SIMDE_FLOAT32_C( 62.92), SIMDE_FLOAT32_C( -353.57), SIMDE_FLOAT32_C( -644.89), SIMDE_FLOAT32_C( 358.11), SIMDE_FLOAT32_C( -358.58), SIMDE_FLOAT32_C( 234.95), SIMDE_FLOAT32_C( -143.82), SIMDE_FLOAT32_C( 640.48), SIMDE_FLOAT32_C( -201.60), SIMDE_FLOAT32_C( -723.66) }, INT32_C( 13), UINT16_C(50466) }, { { SIMDE_FLOAT32_C( -197.48), SIMDE_FLOAT32_C( 216.57), SIMDE_FLOAT32_C( -382.64), SIMDE_FLOAT32_C( -55.12), SIMDE_FLOAT32_C( -793.56), SIMDE_FLOAT32_C( 200.87), SIMDE_FLOAT32_C( 63.32), SIMDE_FLOAT32_C( 79.84), SIMDE_FLOAT32_C( -699.03), SIMDE_FLOAT32_C( -593.53), SIMDE_FLOAT32_C( -763.16), SIMDE_FLOAT32_C( 2.92), SIMDE_FLOAT32_C( 899.70), SIMDE_FLOAT32_C( -928.76), SIMDE_FLOAT32_C( 628.72), SIMDE_FLOAT32_C( 359.14) }, { SIMDE_FLOAT32_C( -169.76), SIMDE_FLOAT32_C( -619.46), SIMDE_FLOAT32_C( -832.62), SIMDE_FLOAT32_C( 753.63), SIMDE_FLOAT32_C( -294.96), SIMDE_FLOAT32_C( 230.30), SIMDE_FLOAT32_C( -599.94), SIMDE_FLOAT32_C( 60.15), SIMDE_FLOAT32_C( -411.60), SIMDE_FLOAT32_C( 41.48), SIMDE_FLOAT32_C( -704.90), SIMDE_FLOAT32_C( 444.59), SIMDE_FLOAT32_C( -318.04), SIMDE_FLOAT32_C( 93.51), SIMDE_FLOAT32_C( 720.93), SIMDE_FLOAT32_C( 484.48) }, INT32_C( 14), UINT16_C( 4294) }, { { SIMDE_FLOAT32_C( -689.92), SIMDE_FLOAT32_C( -661.71), SIMDE_FLOAT32_C( -570.64), SIMDE_FLOAT32_C( -483.48), SIMDE_FLOAT32_C( 539.16), SIMDE_FLOAT32_C( 492.68), SIMDE_FLOAT32_C( 596.36), SIMDE_FLOAT32_C( 840.13), SIMDE_FLOAT32_C( 899.15), SIMDE_FLOAT32_C( 833.20), SIMDE_FLOAT32_C( -156.95), SIMDE_FLOAT32_C( 798.85), SIMDE_FLOAT32_C( 904.44), SIMDE_FLOAT32_C( -528.23), SIMDE_FLOAT32_C( 157.99), SIMDE_FLOAT32_C( -265.32) }, { SIMDE_FLOAT32_C( -147.69), SIMDE_FLOAT32_C( 325.37), SIMDE_FLOAT32_C( -511.69), SIMDE_FLOAT32_C( 557.35), SIMDE_FLOAT32_C( -444.33), SIMDE_FLOAT32_C( -111.63), SIMDE_FLOAT32_C( -382.50), SIMDE_FLOAT32_C( 144.07), SIMDE_FLOAT32_C( 929.85), SIMDE_FLOAT32_C( -87.39), SIMDE_FLOAT32_C( -411.34), SIMDE_FLOAT32_C( -388.19), SIMDE_FLOAT32_C( -993.88), SIMDE_FLOAT32_C( -690.41), SIMDE_FLOAT32_C( -903.71), SIMDE_FLOAT32_C( -683.81) }, INT32_C( 15), UINT16_MAX }, { { SIMDE_FLOAT32_C( -352.12), SIMDE_FLOAT32_C( -474.35), SIMDE_FLOAT32_C( -167.29), SIMDE_FLOAT32_C( -812.96), SIMDE_FLOAT32_C( -981.67), SIMDE_FLOAT32_C( -570.92), SIMDE_FLOAT32_C( -972.83), SIMDE_FLOAT32_C( 917.49), SIMDE_FLOAT32_C( -737.72), SIMDE_FLOAT32_C( -129.78), SIMDE_FLOAT32_C( 716.34), SIMDE_FLOAT32_C( -833.28), SIMDE_FLOAT32_C( 341.99), SIMDE_FLOAT32_C( -125.67), SIMDE_FLOAT32_C( -98.59), SIMDE_FLOAT32_C( -805.70) }, { SIMDE_FLOAT32_C( -800.30), SIMDE_FLOAT32_C( 389.72), SIMDE_FLOAT32_C( 751.65), SIMDE_FLOAT32_C( -244.63), SIMDE_FLOAT32_C( -721.91), SIMDE_FLOAT32_C( -630.84), SIMDE_FLOAT32_C( 899.44), SIMDE_FLOAT32_C( -792.06), SIMDE_FLOAT32_C( 281.76), SIMDE_FLOAT32_C( -511.90), SIMDE_FLOAT32_C( -180.26), SIMDE_FLOAT32_C( 287.88), SIMDE_FLOAT32_C( -202.31), SIMDE_FLOAT32_C( -83.97), SIMDE_FLOAT32_C( 604.08), SIMDE_FLOAT32_C( 445.56) }, INT32_C( 16), UINT16_C( 0) }, { { SIMDE_FLOAT32_C( 441.68), SIMDE_FLOAT32_C( -563.21), SIMDE_FLOAT32_C( 632.60), SIMDE_FLOAT32_C( 460.01), SIMDE_FLOAT32_C( -134.13), SIMDE_FLOAT32_C( 659.78), SIMDE_FLOAT32_C( 377.50), SIMDE_FLOAT32_C( 128.15), SIMDE_FLOAT32_C( -470.00), SIMDE_FLOAT32_C( 93.84), SIMDE_FLOAT32_C( 294.87), SIMDE_FLOAT32_C( 871.99), SIMDE_FLOAT32_C( 968.16), SIMDE_FLOAT32_C( -803.72), SIMDE_FLOAT32_C( -933.71), SIMDE_FLOAT32_C( -832.14) }, { SIMDE_FLOAT32_C( 586.00), SIMDE_FLOAT32_C( 817.94), SIMDE_FLOAT32_C( -76.77), SIMDE_FLOAT32_C( 864.08), SIMDE_FLOAT32_C( -812.90), SIMDE_FLOAT32_C( -177.33), SIMDE_FLOAT32_C( -927.98), SIMDE_FLOAT32_C( 468.86), SIMDE_FLOAT32_C( 310.77), SIMDE_FLOAT32_C( -108.24), SIMDE_FLOAT32_C( -243.26), SIMDE_FLOAT32_C( -891.55), SIMDE_FLOAT32_C( 807.79), SIMDE_FLOAT32_C( -639.18), SIMDE_FLOAT32_C( 554.02), SIMDE_FLOAT32_C( 249.47) }, INT32_C( 17), UINT16_C(57739) }, { { SIMDE_FLOAT32_C( -202.39), SIMDE_FLOAT32_C( 186.62), SIMDE_FLOAT32_C( -290.52), SIMDE_FLOAT32_C( 663.47), SIMDE_FLOAT32_C( -153.60), SIMDE_FLOAT32_C( -913.02), SIMDE_FLOAT32_C( -208.38), SIMDE_FLOAT32_C( 376.40), SIMDE_FLOAT32_C( 180.82), SIMDE_FLOAT32_C( -913.51), SIMDE_FLOAT32_C( 248.39), SIMDE_FLOAT32_C( 148.98), SIMDE_FLOAT32_C( -717.23), SIMDE_FLOAT32_C( 314.68), SIMDE_FLOAT32_C( 316.85), SIMDE_FLOAT32_C( 868.77) }, { SIMDE_FLOAT32_C( 132.62), SIMDE_FLOAT32_C( -759.92), SIMDE_FLOAT32_C( 732.85), SIMDE_FLOAT32_C( 319.72), SIMDE_FLOAT32_C( 62.75), SIMDE_FLOAT32_C( 804.87), SIMDE_FLOAT32_C( -211.42), SIMDE_FLOAT32_C( -626.48), SIMDE_FLOAT32_C( -303.37), SIMDE_FLOAT32_C( 545.32), SIMDE_FLOAT32_C( -518.02), SIMDE_FLOAT32_C( -495.58), SIMDE_FLOAT32_C( 906.14), SIMDE_FLOAT32_C( -964.00), SIMDE_FLOAT32_C( 753.90), SIMDE_FLOAT32_C( -296.25) }, INT32_C( 18), UINT16_C(21045) }, { { SIMDE_FLOAT32_C( 222.62), SIMDE_FLOAT32_C( -536.62), SIMDE_FLOAT32_C( -632.78), SIMDE_FLOAT32_C( -930.98), SIMDE_FLOAT32_C( -449.64), SIMDE_FLOAT32_C( 158.84), SIMDE_FLOAT32_C( 445.42), SIMDE_FLOAT32_C( 731.18), SIMDE_FLOAT32_C( 245.34), SIMDE_FLOAT32_C( -306.20), SIMDE_FLOAT32_C( -119.84), SIMDE_FLOAT32_C( 528.11), SIMDE_FLOAT32_C( -991.52), SIMDE_FLOAT32_C( -802.99), SIMDE_FLOAT32_C( 396.88), SIMDE_FLOAT32_C( 141.10) }, { SIMDE_FLOAT32_C( -562.91), SIMDE_FLOAT32_C( 129.73), SIMDE_FLOAT32_C( -539.18), SIMDE_FLOAT32_C( 499.84), SIMDE_FLOAT32_C( -65.40), SIMDE_FLOAT32_C( 249.40), SIMDE_FLOAT32_C( 873.36), SIMDE_FLOAT32_C( 631.23), SIMDE_FLOAT32_C( -205.28), SIMDE_FLOAT32_C( -644.66), SIMDE_FLOAT32_C( -864.35), SIMDE_FLOAT32_C( -299.14), SIMDE_FLOAT32_C( -608.67), SIMDE_FLOAT32_C( 889.55), SIMDE_FLOAT32_C( 404.61), SIMDE_FLOAT32_C( 613.95) }, INT32_C( 19), UINT16_C( 0) }, { { SIMDE_FLOAT32_C( -647.07), SIMDE_FLOAT32_C( 771.83), SIMDE_FLOAT32_C( 682.97), SIMDE_FLOAT32_C( -96.71), SIMDE_FLOAT32_C( -69.32), SIMDE_FLOAT32_C( 128.39), SIMDE_FLOAT32_C( -365.53), SIMDE_FLOAT32_C( -823.99), SIMDE_FLOAT32_C( 822.19), SIMDE_FLOAT32_C( 514.63), SIMDE_FLOAT32_C( 704.12), SIMDE_FLOAT32_C( 830.67), SIMDE_FLOAT32_C( 711.64), SIMDE_FLOAT32_C( 101.00), SIMDE_FLOAT32_C( -28.22), SIMDE_FLOAT32_C( -851.27) }, { SIMDE_FLOAT32_C( -769.27), SIMDE_FLOAT32_C( 432.60), SIMDE_FLOAT32_C( 648.57), SIMDE_FLOAT32_C( 165.32), SIMDE_FLOAT32_C( -318.00), SIMDE_FLOAT32_C( 521.93), SIMDE_FLOAT32_C( -203.45), SIMDE_FLOAT32_C( 476.73), SIMDE_FLOAT32_C( 877.27), SIMDE_FLOAT32_C( -67.79), SIMDE_FLOAT32_C( -822.41), SIMDE_FLOAT32_C( -731.40), SIMDE_FLOAT32_C( -178.24), SIMDE_FLOAT32_C( 582.20), SIMDE_FLOAT32_C( 882.55), SIMDE_FLOAT32_C( 174.68) }, INT32_C( 20), UINT16_MAX }, { { SIMDE_FLOAT32_C( 354.04), SIMDE_FLOAT32_C( 565.52), SIMDE_FLOAT32_C( -922.02), SIMDE_FLOAT32_C( -715.29), SIMDE_FLOAT32_C( -306.09), SIMDE_FLOAT32_C( -287.55), SIMDE_FLOAT32_C( -539.27), SIMDE_FLOAT32_C( -483.90), SIMDE_FLOAT32_C( -772.92), SIMDE_FLOAT32_C( -835.15), SIMDE_FLOAT32_C( -653.23), SIMDE_FLOAT32_C( 938.73), SIMDE_FLOAT32_C( 265.85), SIMDE_FLOAT32_C( 318.55), SIMDE_FLOAT32_C( -912.54), SIMDE_FLOAT32_C( 496.58) }, { SIMDE_FLOAT32_C( -248.85), SIMDE_FLOAT32_C( 736.03), SIMDE_FLOAT32_C( -338.10), SIMDE_FLOAT32_C( 433.16), SIMDE_FLOAT32_C( 257.97), SIMDE_FLOAT32_C( 458.45), SIMDE_FLOAT32_C( -90.12), SIMDE_FLOAT32_C( 135.24), SIMDE_FLOAT32_C( -609.34), SIMDE_FLOAT32_C( 87.47), SIMDE_FLOAT32_C( 403.84), SIMDE_FLOAT32_C( 212.42), SIMDE_FLOAT32_C( -330.33), SIMDE_FLOAT32_C( 286.39), SIMDE_FLOAT32_C( -612.90), SIMDE_FLOAT32_C( -976.29) }, INT32_C( 21), UINT16_C(47105) }, { { SIMDE_FLOAT32_C( -148.09), SIMDE_FLOAT32_C( -534.92), SIMDE_FLOAT32_C( -691.57), SIMDE_FLOAT32_C( 545.82), SIMDE_FLOAT32_C( 177.53), SIMDE_FLOAT32_C( -230.85), SIMDE_FLOAT32_C( -938.08), SIMDE_FLOAT32_C( 404.61), SIMDE_FLOAT32_C( -65.99), SIMDE_FLOAT32_C( -591.31), SIMDE_FLOAT32_C( 343.34), SIMDE_FLOAT32_C( -800.14), SIMDE_FLOAT32_C( 727.24), SIMDE_FLOAT32_C( 430.80), SIMDE_FLOAT32_C( 696.43), SIMDE_FLOAT32_C( -521.61) }, { SIMDE_FLOAT32_C( 166.83), SIMDE_FLOAT32_C( -641.66), SIMDE_FLOAT32_C( 911.55), SIMDE_FLOAT32_C( -575.20), SIMDE_FLOAT32_C( 816.79), SIMDE_FLOAT32_C( -178.57), SIMDE_FLOAT32_C( 560.03), SIMDE_FLOAT32_C( -792.55), SIMDE_FLOAT32_C( 908.90), SIMDE_FLOAT32_C( -36.13), SIMDE_FLOAT32_C( 419.87), SIMDE_FLOAT32_C( -421.42), SIMDE_FLOAT32_C( -749.74), SIMDE_FLOAT32_C( 806.97), SIMDE_FLOAT32_C( -397.71), SIMDE_FLOAT32_C( 102.17) }, INT32_C( 22), UINT16_C(20618) }, { { SIMDE_FLOAT32_C( -727.95), SIMDE_FLOAT32_C( -89.29), SIMDE_FLOAT32_C( -352.01), SIMDE_FLOAT32_C( 449.58), SIMDE_FLOAT32_C( 679.87), SIMDE_FLOAT32_C( -290.09), SIMDE_FLOAT32_C( -145.82), SIMDE_FLOAT32_C( -386.13), SIMDE_FLOAT32_C( 118.59), SIMDE_FLOAT32_C( -802.48), SIMDE_FLOAT32_C( -186.27), SIMDE_FLOAT32_C( -154.16), SIMDE_FLOAT32_C( 628.32), SIMDE_FLOAT32_C( -489.83), SIMDE_FLOAT32_C( 324.23), SIMDE_FLOAT32_C( -204.85) }, { SIMDE_FLOAT32_C( -131.50), SIMDE_FLOAT32_C( 235.78), SIMDE_FLOAT32_C( 219.94), SIMDE_FLOAT32_C( -314.71), SIMDE_FLOAT32_C( -942.79), SIMDE_FLOAT32_C( -220.02), SIMDE_FLOAT32_C( -107.26), SIMDE_FLOAT32_C( 966.11), SIMDE_FLOAT32_C( 743.85), SIMDE_FLOAT32_C( -687.39), SIMDE_FLOAT32_C( -455.31), SIMDE_FLOAT32_C( 994.11), SIMDE_FLOAT32_C( -880.42), SIMDE_FLOAT32_C( 146.98), SIMDE_FLOAT32_C( 96.28), SIMDE_FLOAT32_C( -608.37) }, INT32_C( 23), UINT16_MAX }, { { SIMDE_FLOAT32_C( -942.31), SIMDE_FLOAT32_C( 744.27), SIMDE_FLOAT32_C( 841.21), SIMDE_FLOAT32_C( 737.56), SIMDE_FLOAT32_C( -545.82), SIMDE_FLOAT32_C( -304.61), SIMDE_FLOAT32_C( -648.57), SIMDE_FLOAT32_C( 572.77), SIMDE_FLOAT32_C( -107.09), SIMDE_FLOAT32_C( 165.16), SIMDE_FLOAT32_C( -581.39), SIMDE_FLOAT32_C( -478.77), SIMDE_FLOAT32_C( 675.33), SIMDE_FLOAT32_C( 742.84), SIMDE_FLOAT32_C( 316.38), SIMDE_FLOAT32_C( -456.17) }, { SIMDE_FLOAT32_C( -21.39), SIMDE_FLOAT32_C( -463.68), SIMDE_FLOAT32_C( 229.13), SIMDE_FLOAT32_C( 35.82), SIMDE_FLOAT32_C( 316.30), SIMDE_FLOAT32_C( -878.13), SIMDE_FLOAT32_C( 1.93), SIMDE_FLOAT32_C( 60.15), SIMDE_FLOAT32_C( -565.52), SIMDE_FLOAT32_C( 546.62), SIMDE_FLOAT32_C( 54.26), SIMDE_FLOAT32_C( -445.94), SIMDE_FLOAT32_C( -306.41), SIMDE_FLOAT32_C( -849.45), SIMDE_FLOAT32_C( -54.31), SIMDE_FLOAT32_C( -248.72) }, INT32_C( 24), UINT16_C( 0) }, { { SIMDE_FLOAT32_C( 894.82), SIMDE_FLOAT32_C( -213.10), SIMDE_FLOAT32_C( -511.16), SIMDE_FLOAT32_C( -651.01), SIMDE_FLOAT32_C( 482.29), SIMDE_FLOAT32_C( -159.73), SIMDE_FLOAT32_C( 921.76), SIMDE_FLOAT32_C( -624.79), SIMDE_FLOAT32_C( -994.57), SIMDE_FLOAT32_C( -659.63), SIMDE_FLOAT32_C( -103.56), SIMDE_FLOAT32_C( 680.76), SIMDE_FLOAT32_C( -916.80), SIMDE_FLOAT32_C( -787.19), SIMDE_FLOAT32_C( -775.40), SIMDE_FLOAT32_C( 61.82) }, { SIMDE_FLOAT32_C( -250.87), SIMDE_FLOAT32_C( 453.72), SIMDE_FLOAT32_C( -902.36), SIMDE_FLOAT32_C( -934.56), SIMDE_FLOAT32_C( 575.59), SIMDE_FLOAT32_C( 99.57), SIMDE_FLOAT32_C( 125.59), SIMDE_FLOAT32_C( -989.93), SIMDE_FLOAT32_C( -353.81), SIMDE_FLOAT32_C( -820.15), SIMDE_FLOAT32_C( -435.87), SIMDE_FLOAT32_C( 339.78), SIMDE_FLOAT32_C( -669.60), SIMDE_FLOAT32_C( 509.82), SIMDE_FLOAT32_C( -908.94), SIMDE_FLOAT32_C( -774.79) }, INT32_C( 25), UINT16_C(12594) }, { { SIMDE_FLOAT32_C( -703.28), SIMDE_FLOAT32_C( -420.10), SIMDE_FLOAT32_C( -425.79), SIMDE_FLOAT32_C( 779.02), SIMDE_FLOAT32_C( 420.16), SIMDE_FLOAT32_C( -504.03), SIMDE_FLOAT32_C( -845.78), SIMDE_FLOAT32_C( 425.59), SIMDE_FLOAT32_C( -163.66), SIMDE_FLOAT32_C( 50.66), SIMDE_FLOAT32_C( 106.36), SIMDE_FLOAT32_C( -80.46), SIMDE_FLOAT32_C( 263.47), SIMDE_FLOAT32_C( 330.95), SIMDE_FLOAT32_C( 981.36), SIMDE_FLOAT32_C( -987.39) }, { SIMDE_FLOAT32_C( -215.33), SIMDE_FLOAT32_C( -921.00), SIMDE_FLOAT32_C( -921.96), SIMDE_FLOAT32_C( -639.74), SIMDE_FLOAT32_C( 178.56), SIMDE_FLOAT32_C( 203.63), SIMDE_FLOAT32_C( -629.67), SIMDE_FLOAT32_C( 824.75), SIMDE_FLOAT32_C( 383.48), SIMDE_FLOAT32_C( -65.54), SIMDE_FLOAT32_C( 164.53), SIMDE_FLOAT32_C( 713.88), SIMDE_FLOAT32_C( -555.72), SIMDE_FLOAT32_C( 255.59), SIMDE_FLOAT32_C( 939.10), SIMDE_FLOAT32_C( -258.99) }, INT32_C( 26), UINT16_C(36321) }, { { SIMDE_FLOAT32_C( 835.48), SIMDE_FLOAT32_C( -486.70), SIMDE_FLOAT32_C( -479.98), SIMDE_FLOAT32_C( 255.65), SIMDE_FLOAT32_C( 9.27), SIMDE_FLOAT32_C( -325.76), SIMDE_FLOAT32_C( -318.76), SIMDE_FLOAT32_C( 845.61), SIMDE_FLOAT32_C( 724.90), SIMDE_FLOAT32_C( 787.60), SIMDE_FLOAT32_C( -234.85), SIMDE_FLOAT32_C( -11.62), SIMDE_FLOAT32_C( 118.55), SIMDE_FLOAT32_C( -253.49), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 903.23) }, { SIMDE_FLOAT32_C( -174.49), SIMDE_FLOAT32_C( 79.03), SIMDE_FLOAT32_C( -736.51), SIMDE_FLOAT32_C( -995.93), SIMDE_FLOAT32_C( -717.34), SIMDE_FLOAT32_C( -366.18), SIMDE_FLOAT32_C( 828.82), SIMDE_FLOAT32_C( 666.15), SIMDE_FLOAT32_C( 568.29), SIMDE_FLOAT32_C( -6.65), SIMDE_FLOAT32_C( 380.03), SIMDE_FLOAT32_C( -987.43), SIMDE_FLOAT32_C( -751.06), SIMDE_FLOAT32_C( 319.13), SIMDE_FLOAT32_C( -246.43), SIMDE_FLOAT32_C( -915.58) }, INT32_C( 27), UINT16_C( 0) }, { { SIMDE_FLOAT32_C( 832.43), SIMDE_FLOAT32_C( 273.60), SIMDE_FLOAT32_C( 340.07), SIMDE_FLOAT32_C( -158.30), SIMDE_FLOAT32_C( 947.84), SIMDE_FLOAT32_C( -978.69), SIMDE_FLOAT32_C( -312.69), SIMDE_FLOAT32_C( 672.74), SIMDE_FLOAT32_C( 808.91), SIMDE_FLOAT32_C( 452.46), SIMDE_FLOAT32_C( -338.88), SIMDE_FLOAT32_C( -72.53), SIMDE_FLOAT32_C( -801.03), SIMDE_FLOAT32_C( 662.10), SIMDE_FLOAT32_C( -169.31), SIMDE_FLOAT32_C( 24.48) }, { SIMDE_FLOAT32_C( -258.87), SIMDE_FLOAT32_C( 94.18), SIMDE_FLOAT32_C( 28.55), SIMDE_FLOAT32_C( 23.79), SIMDE_FLOAT32_C( 728.01), SIMDE_FLOAT32_C( -142.63), SIMDE_FLOAT32_C( -310.06), SIMDE_FLOAT32_C( 296.29), SIMDE_FLOAT32_C( 850.72), SIMDE_FLOAT32_C( -930.03), SIMDE_FLOAT32_C( 308.86), SIMDE_FLOAT32_C( -900.34), SIMDE_FLOAT32_C( 389.10), SIMDE_FLOAT32_C( -937.56), SIMDE_FLOAT32_C( -815.92), SIMDE_FLOAT32_C( 221.53) }, INT32_C( 28), UINT16_MAX }, { { SIMDE_FLOAT32_C( 336.03), SIMDE_FLOAT32_C( 524.16), SIMDE_FLOAT32_C( -936.77), SIMDE_FLOAT32_C( 283.87), SIMDE_FLOAT32_C( 545.47), SIMDE_FLOAT32_C( -249.46), SIMDE_FLOAT32_C( -43.38), SIMDE_FLOAT32_C( 354.38), SIMDE_FLOAT32_C( -797.00), SIMDE_FLOAT32_C( 617.74), SIMDE_FLOAT32_C( -718.15), SIMDE_FLOAT32_C( -598.03), SIMDE_FLOAT32_C( 279.84), SIMDE_FLOAT32_C( 112.54), SIMDE_FLOAT32_C( 426.45), SIMDE_FLOAT32_C( -979.03) }, { SIMDE_FLOAT32_C( -793.27), SIMDE_FLOAT32_C( -545.00), SIMDE_FLOAT32_C( 44.77), SIMDE_FLOAT32_C( 934.73), SIMDE_FLOAT32_C( 312.37), SIMDE_FLOAT32_C( 734.71), SIMDE_FLOAT32_C( 231.03), SIMDE_FLOAT32_C( 163.09), SIMDE_FLOAT32_C( 804.68), SIMDE_FLOAT32_C( -460.11), SIMDE_FLOAT32_C( 262.76), SIMDE_FLOAT32_C( 193.77), SIMDE_FLOAT32_C( -397.67), SIMDE_FLOAT32_C( 446.84), SIMDE_FLOAT32_C( -584.70), SIMDE_FLOAT32_C( 938.36) }, INT32_C( 29), UINT16_C(21139) }, { { SIMDE_FLOAT32_C( -29.00), SIMDE_FLOAT32_C( -521.48), SIMDE_FLOAT32_C( 222.23), SIMDE_FLOAT32_C( -483.53), SIMDE_FLOAT32_C( 229.06), SIMDE_FLOAT32_C( -821.16), SIMDE_FLOAT32_C( 870.85), SIMDE_FLOAT32_C( 432.06), SIMDE_FLOAT32_C( 796.58), SIMDE_FLOAT32_C( -847.30), SIMDE_FLOAT32_C( 834.03), SIMDE_FLOAT32_C( 76.42), SIMDE_FLOAT32_C( 265.24), SIMDE_FLOAT32_C( 260.47), SIMDE_FLOAT32_C( 97.39), SIMDE_FLOAT32_C( 471.97) }, { SIMDE_FLOAT32_C( 715.47), SIMDE_FLOAT32_C( -857.84), SIMDE_FLOAT32_C( 406.70), SIMDE_FLOAT32_C( 27.84), SIMDE_FLOAT32_C( 876.87), SIMDE_FLOAT32_C( -362.27), SIMDE_FLOAT32_C( -809.06), SIMDE_FLOAT32_C( 681.54), SIMDE_FLOAT32_C( 177.62), SIMDE_FLOAT32_C( 453.69), SIMDE_FLOAT32_C( -124.69), SIMDE_FLOAT32_C( 779.94), SIMDE_FLOAT32_C( -99.47), SIMDE_FLOAT32_C( 290.61), SIMDE_FLOAT32_C( 718.30), SIMDE_FLOAT32_C( 871.53) }, INT32_C( 30), UINT16_C( 5442) }, { { SIMDE_FLOAT32_C( 769.14), SIMDE_FLOAT32_C( -59.47), SIMDE_FLOAT32_C( -612.01), SIMDE_FLOAT32_C( -1.80), SIMDE_FLOAT32_C( 119.37), SIMDE_FLOAT32_C( -741.16), SIMDE_FLOAT32_C( -569.75), SIMDE_FLOAT32_C( -84.05), SIMDE_FLOAT32_C( -588.46), SIMDE_FLOAT32_C( -735.72), SIMDE_FLOAT32_C( 992.37), SIMDE_FLOAT32_C( 676.78), SIMDE_FLOAT32_C( 524.75), SIMDE_FLOAT32_C( 89.76), SIMDE_FLOAT32_C( 148.75), SIMDE_FLOAT32_C( 240.22) }, { SIMDE_FLOAT32_C( 231.92), SIMDE_FLOAT32_C( -444.55), SIMDE_FLOAT32_C( -731.94), SIMDE_FLOAT32_C( 108.79), SIMDE_FLOAT32_C( 193.18), SIMDE_FLOAT32_C( -541.00), SIMDE_FLOAT32_C( -209.67), SIMDE_FLOAT32_C( -629.20), SIMDE_FLOAT32_C( 912.69), SIMDE_FLOAT32_C( 665.64), SIMDE_FLOAT32_C( -849.26), SIMDE_FLOAT32_C( -186.78), SIMDE_FLOAT32_C( -43.74), SIMDE_FLOAT32_C( 869.04), SIMDE_FLOAT32_C( -315.25), SIMDE_FLOAT32_C( -274.61) }, INT32_C( 31), UINT16_MAX } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__mmask16 r = simde_mm512_cmp_ps_mask(a, b, test_vec[i].imm8); simde_assert_equal_mmask16(r, test_vec[i].r); } return 0; } static int test_simde_mm512_cmp_pd_mask (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 b[8]; const int imm8; const simde__mmask8 r; } test_vec[] = { { { SIMDE_FLOAT64_C( -889.13), SIMDE_FLOAT64_C( 346.35), SIMDE_FLOAT64_C( -842.69), SIMDE_FLOAT64_C( 879.16), SIMDE_FLOAT64_C( 37.28), SIMDE_FLOAT64_C( 607.79), SIMDE_FLOAT64_C( -858.34), SIMDE_FLOAT64_C( -122.77) }, { SIMDE_FLOAT64_C( 597.22), SIMDE_FLOAT64_C( -446.39), SIMDE_FLOAT64_C( -495.07), SIMDE_FLOAT64_C( -701.44), SIMDE_FLOAT64_C( 913.94), SIMDE_FLOAT64_C( 514.01), SIMDE_FLOAT64_C( -970.90), SIMDE_FLOAT64_C( 91.95) }, INT32_C( 18), UINT8_C(149) }, { { SIMDE_FLOAT64_C( -229.78), SIMDE_FLOAT64_C( 109.42), SIMDE_FLOAT64_C( 986.52), SIMDE_FLOAT64_C( 450.97), SIMDE_FLOAT64_C( -621.15), SIMDE_FLOAT64_C( 366.22), SIMDE_FLOAT64_C( -999.97), SIMDE_FLOAT64_C( -551.44) }, { SIMDE_FLOAT64_C( -43.21), SIMDE_FLOAT64_C( -236.56), SIMDE_FLOAT64_C( 73.66), SIMDE_FLOAT64_C( 21.45), SIMDE_FLOAT64_C( 426.81), SIMDE_FLOAT64_C( -684.87), SIMDE_FLOAT64_C( -547.62), SIMDE_FLOAT64_C( 194.20) }, INT32_C( 30), UINT8_C( 46) }, { { SIMDE_FLOAT64_C( 465.94), SIMDE_FLOAT64_C( -899.85), SIMDE_FLOAT64_C( 236.88), SIMDE_FLOAT64_C( -744.20), SIMDE_FLOAT64_C( 213.84), SIMDE_FLOAT64_C( 84.61), SIMDE_FLOAT64_C( -4.00), SIMDE_FLOAT64_C( 791.14) }, { SIMDE_FLOAT64_C( 691.24), SIMDE_FLOAT64_C( -392.69), SIMDE_FLOAT64_C( 37.26), SIMDE_FLOAT64_C( 209.16), SIMDE_FLOAT64_C( -604.04), SIMDE_FLOAT64_C( -124.25), SIMDE_FLOAT64_C( -288.59), SIMDE_FLOAT64_C( -412.86) }, INT32_C( 21), UINT8_C(244) }, { { SIMDE_FLOAT64_C( -618.86), SIMDE_FLOAT64_C( 797.13), SIMDE_FLOAT64_C( -583.56), SIMDE_FLOAT64_C( 46.88), SIMDE_FLOAT64_C( -89.41), SIMDE_FLOAT64_C( -683.29), SIMDE_FLOAT64_C( 20.57), SIMDE_FLOAT64_C( -213.31) }, { SIMDE_FLOAT64_C( 887.10), SIMDE_FLOAT64_C( -441.79), SIMDE_FLOAT64_C( 836.33), SIMDE_FLOAT64_C( 135.59), SIMDE_FLOAT64_C( 918.70), SIMDE_FLOAT64_C( 512.23), SIMDE_FLOAT64_C( -895.63), SIMDE_FLOAT64_C( -900.96) }, INT32_C( 31), UINT8_MAX }, { { SIMDE_FLOAT64_C( -989.35), SIMDE_FLOAT64_C( -86.98), SIMDE_FLOAT64_C( 193.68), SIMDE_FLOAT64_C( -742.71), SIMDE_FLOAT64_C( -727.59), SIMDE_FLOAT64_C( -646.86), SIMDE_FLOAT64_C( 183.87), SIMDE_FLOAT64_C( 287.33) }, { SIMDE_FLOAT64_C( -774.81), SIMDE_FLOAT64_C( -242.40), SIMDE_FLOAT64_C( 53.99), SIMDE_FLOAT64_C( -593.99), SIMDE_FLOAT64_C( 779.72), SIMDE_FLOAT64_C( 806.29), SIMDE_FLOAT64_C( -734.86), SIMDE_FLOAT64_C( -839.78) }, INT32_C( 6), UINT8_C(198) }, { { SIMDE_FLOAT64_C( -467.13), SIMDE_FLOAT64_C( 942.12), SIMDE_FLOAT64_C( 248.01), SIMDE_FLOAT64_C( 325.07), SIMDE_FLOAT64_C( -486.56), SIMDE_FLOAT64_C( 428.42), SIMDE_FLOAT64_C( 503.39), SIMDE_FLOAT64_C( 520.75) }, { SIMDE_FLOAT64_C( 191.14), SIMDE_FLOAT64_C( 441.43), SIMDE_FLOAT64_C( -872.87), SIMDE_FLOAT64_C( -283.89), SIMDE_FLOAT64_C( 651.45), SIMDE_FLOAT64_C( 971.81), SIMDE_FLOAT64_C( -736.72), SIMDE_FLOAT64_C( -71.12) }, INT32_C( 17), UINT8_C( 49) }, { { SIMDE_FLOAT64_C( 768.97), SIMDE_FLOAT64_C( 83.87), SIMDE_FLOAT64_C( -412.86), SIMDE_FLOAT64_C( 997.28), SIMDE_FLOAT64_C( -659.21), SIMDE_FLOAT64_C( 650.14), SIMDE_FLOAT64_C( 927.00), SIMDE_FLOAT64_C( 40.06) }, { SIMDE_FLOAT64_C( -783.94), SIMDE_FLOAT64_C( 289.86), SIMDE_FLOAT64_C( -325.98), SIMDE_FLOAT64_C( -693.23), SIMDE_FLOAT64_C( 823.83), SIMDE_FLOAT64_C( 81.84), SIMDE_FLOAT64_C( -557.12), SIMDE_FLOAT64_C( 458.20) }, INT32_C( 28), UINT8_MAX }, { { SIMDE_FLOAT64_C( 728.01), SIMDE_FLOAT64_C( -387.72), SIMDE_FLOAT64_C( -341.65), SIMDE_FLOAT64_C( -84.21), SIMDE_FLOAT64_C( 640.32), SIMDE_FLOAT64_C( -112.91), SIMDE_FLOAT64_C( 308.09), SIMDE_FLOAT64_C( 20.16) }, { SIMDE_FLOAT64_C( 745.10), SIMDE_FLOAT64_C( 919.13), SIMDE_FLOAT64_C( -195.91), SIMDE_FLOAT64_C( -612.27), SIMDE_FLOAT64_C( -399.85), SIMDE_FLOAT64_C( -354.18), SIMDE_FLOAT64_C( -752.13), SIMDE_FLOAT64_C( 868.12) }, INT32_C( 29), UINT8_C(120) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__mmask8 r = simde_mm512_cmp_pd_mask(a, b, test_vec[i].imm8); simde_assert_equal_mmask8(r, test_vec[i].r); } return 0; } #else /* To avoid a warning about expr < 0 always evaluating to false * (-Wtype-limits) because there are no functions to test. */ static int test_simde_dummy (SIMDE_MUNIT_TEST_ARGS) { return 0; } #endif /* !defined(SIMDE_NATIVE_ALIASES_TESTING */ SIMDE_TEST_FUNC_LIST_BEGIN #if !defined(SIMDE_NATIVE_ALIASES_TESTING) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cmp_ps_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cmp_pd_mask) #else SIMDE_TEST_FUNC_LIST_ENTRY(dummy) #endif SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/cmpeq.c000066400000000000000000002013231400333146700165260ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur */ #define SIMDE_TEST_X86_AVX512_INSN cmpeq #include #include #include static int test_simde_mm512_cmpeq_epi8_mask(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__mmask64 r; } test_vec[8] = { { simde_mm512_set_epi8(INT8_C( 73), INT8_C( 68), INT8_C( -71), INT8_C( -32), INT8_C( 100), INT8_C( 125), INT8_C( 89), INT8_C( 95), INT8_C( -23), INT8_C( 76), INT8_C( 84), INT8_C( -43), INT8_C( 86), INT8_C( 29), INT8_C( 64), INT8_C( 30), INT8_C( -17), INT8_C( -61), INT8_C( 115), INT8_C( -53), INT8_C(-100), INT8_C( 104), INT8_C( 111), INT8_C( -59), INT8_C( -31), INT8_C( 20), INT8_C( 31), INT8_C(-121), INT8_C( 90), INT8_C( 18), INT8_C( 6), INT8_C( 39), INT8_C( 15), INT8_C( 62), INT8_C( 39), INT8_C( -25), INT8_C( 60), INT8_C( 110), INT8_C( 45), INT8_C( 113), INT8_C( 81), INT8_C( 78), INT8_C( -53), INT8_C( 11), INT8_C( -27), INT8_C(-113), INT8_C( -3), INT8_C( 14), INT8_C( 109), INT8_C( 43), INT8_C( -54), INT8_C( 111), INT8_C( -91), INT8_C( -21), INT8_C( 102), INT8_C( 8), INT8_C( -41), INT8_C( -47), INT8_C( -90), INT8_C( 3), INT8_C( 18), INT8_C( 32), INT8_C( 89), INT8_C( -62)), simde_mm512_set_epi8(INT8_C( 73), INT8_C( 68), INT8_C( -71), INT8_C( -46), INT8_C( 72), INT8_C( 42), INT8_C( 65), INT8_C( -27), INT8_C( -37), INT8_C( 76), INT8_C( 84), INT8_C( -95), INT8_C( -29), INT8_C( 29), INT8_C( 64), INT8_C( 75), INT8_C( -17), INT8_C( -61), INT8_C( 76), INT8_C( 88), INT8_C(-100), INT8_C( -91), INT8_C( 111), INT8_C( -59), INT8_C( -31), INT8_C( -17), INT8_C( 31), INT8_C( 68), INT8_C( 90), INT8_C( 18), INT8_C(-110), INT8_C( 39), INT8_C( 15), INT8_C( 62), INT8_C( 24), INT8_C( -25), INT8_C( 80), INT8_C( 122), INT8_C( 103), INT8_C(-115), INT8_C( 81), INT8_C( 24), INT8_C( 32), INT8_C( 82), INT8_C( -27), INT8_C( 105), INT8_C( -3), INT8_C( 14), INT8_C( 109), INT8_C( 43), INT8_C( -54), INT8_C(-121), INT8_C( -91), INT8_C(-124), INT8_C( 102), INT8_C( 46), INT8_C( -41), INT8_C( 101), INT8_C( 51), INT8_C( 3), INT8_C( 18), INT8_C( 32), INT8_C( -12), INT8_C( -62)), UINT64_C(0xe066cbadd08bea9d) }, { simde_mm512_set_epi8(INT8_C( -84), INT8_C(-108), INT8_C( 13), INT8_C( -97), INT8_C( -34), INT8_C( 27), INT8_C( 124), INT8_C(-120), INT8_C( 3), INT8_C( 26), INT8_C( 43), INT8_C( -96), INT8_C( -63), INT8_C( 49), INT8_C( 127), INT8_C( 0), INT8_C( -20), INT8_C( -31), INT8_C( 125), INT8_C( 14), INT8_C( -53), INT8_C( -87), INT8_C( 115), INT8_C( -20), INT8_C( -93), INT8_C( 70), INT8_C( 29), INT8_C( -90), INT8_C( 105), INT8_C( -54), INT8_C( 96), INT8_C(-106), INT8_C( 94), INT8_C( -52), INT8_C( -50), INT8_C( -79), INT8_C( -54), INT8_C( 78), INT8_C( 17), INT8_C( 81), INT8_C( 35), INT8_C( 120), INT8_C( 47), INT8_C( -25), INT8_C( 110), INT8_C( 55), INT8_C( 40), INT8_C( -31), INT8_C( 93), INT8_C( -23), INT8_C( 4), INT8_C( 45), INT8_C( 59), INT8_C( -33), INT8_C( 124), INT8_C( -52), INT8_C( 42), INT8_C( -14), INT8_C(-119), INT8_C( 88), INT8_C( -38), INT8_C( -31), INT8_C(-113), INT8_C( 33)), simde_mm512_set_epi8(INT8_C( 25), INT8_C(-108), INT8_C( 13), INT8_C( -97), INT8_C( -34), INT8_C( 119), INT8_C( 122), INT8_C( 82), INT8_C( 3), INT8_C(-111), INT8_C( 60), INT8_C( -96), INT8_C( 26), INT8_C( 3), INT8_C( -7), INT8_C( -8), INT8_C( 94), INT8_C( -31), INT8_C( -71), INT8_C( 14), INT8_C(-105), INT8_C( -87), INT8_C( 115), INT8_C( -68), INT8_C( -93), INT8_C( 70), INT8_C( -47), INT8_C(-106), INT8_C( 105), INT8_C( -54), INT8_C( 96), INT8_C( 105), INT8_C( 94), INT8_C( 84), INT8_C( -50), INT8_C( -79), INT8_C( -54), INT8_C( 78), INT8_C( 17), INT8_C( 39), INT8_C( 35), INT8_C( -87), INT8_C( -83), INT8_C( -25), INT8_C( 110), INT8_C( 2), INT8_C( -90), INT8_C( -31), INT8_C( 19), INT8_C( -23), INT8_C( 4), INT8_C( 16), INT8_C( 59), INT8_C( -33), INT8_C( 124), INT8_C( 127), INT8_C( -60), INT8_C( -14), INT8_C(-119), INT8_C( 88), INT8_C( -38), INT8_C( 109), INT8_C(-113), INT8_C( 25)), UINT64_C(0x789056cebe996e7a) }, { simde_mm512_set_epi8(INT8_C( 93), INT8_C( 110), INT8_C( 120), INT8_C( 25), INT8_C( -37), INT8_C( -25), INT8_C( -34), INT8_C(-108), INT8_C( -77), INT8_C(-114), INT8_C( 79), INT8_C( -50), INT8_C( -94), INT8_C( 22), INT8_C( -28), INT8_C(-105), INT8_C( 110), INT8_C( 29), INT8_C( -9), INT8_C( -13), INT8_C( -71), INT8_C( 107), INT8_C(-115), INT8_C( 86), INT8_C(-127), INT8_C(-100), INT8_C( 1), INT8_C( 21), INT8_C( -55), INT8_C( -85), INT8_C( -55), INT8_C( -81), INT8_C( -41), INT8_C( 39), INT8_C( 18), INT8_C( -92), INT8_C( 11), INT8_C( -32), INT8_C( -53), INT8_C( 38), INT8_C( -49), INT8_C(-118), INT8_C( 20), INT8_C( 66), INT8_C(-106), INT8_C(-109), INT8_C( 45), INT8_C( -24), INT8_C( -47), INT8_C( 95), INT8_C( 50), INT8_C( 105), INT8_C( 58), INT8_C( 25), INT8_C( -53), INT8_C( -61), INT8_C( -90), INT8_C( 92), INT8_C( 83), INT8_C( 120), INT8_C( 107), INT8_C( -72), INT8_C( 3), INT8_C( -1)), simde_mm512_set_epi8(INT8_C( 93), INT8_C( 110), INT8_C( 120), INT8_C( -75), INT8_C( -37), INT8_C( -25), INT8_C( 14), INT8_C(-108), INT8_C( 5), INT8_C(-114), INT8_C( 79), INT8_C( 57), INT8_C( -94), INT8_C( 22), INT8_C( 4), INT8_C( 3), INT8_C( 66), INT8_C( 29), INT8_C( -9), INT8_C( 20), INT8_C( 93), INT8_C( 107), INT8_C( 6), INT8_C( 86), INT8_C( 84), INT8_C(-100), INT8_C( 81), INT8_C( 21), INT8_C( 117), INT8_C( 22), INT8_C( -55), INT8_C( -81), INT8_C( -41), INT8_C( -17), INT8_C( 19), INT8_C( 106), INT8_C( 114), INT8_C( -32), INT8_C( 98), INT8_C( -16), INT8_C( -49), INT8_C(-118), INT8_C( 20), INT8_C( 66), INT8_C(-106), INT8_C(-109), INT8_C( 97), INT8_C( 29), INT8_C( -47), INT8_C( -47), INT8_C( 50), INT8_C(-111), INT8_C( 58), INT8_C( 115), INT8_C( -53), INT8_C( 93), INT8_C( -90), INT8_C( -27), INT8_C( 41), INT8_C( 120), INT8_C( 119), INT8_C( 86), INT8_C( -36), INT8_C( -1)), UINT64_C(0xed6c655384fcaa91) }, { simde_mm512_set_epi8(INT8_C( 10), INT8_C( 75), INT8_C( 91), INT8_C( -99), INT8_C( -88), INT8_C( 99), INT8_C( -86), INT8_C( 96), INT8_C( 14), INT8_C( -1), INT8_C( 14), INT8_C( 100), INT8_C(-114), INT8_C( 63), INT8_C( 68), INT8_C(-113), INT8_C( -59), INT8_C( -42), INT8_C( -14), INT8_C(-111), INT8_C( 6), INT8_C( 68), INT8_C( 11), INT8_C(-108), INT8_C( -62), INT8_C( 87), INT8_C( -72), INT8_C( -23), INT8_C( 78), INT8_C( -18), INT8_C( -36), INT8_C( -6), INT8_C( -68), INT8_C(-115), INT8_C( -24), INT8_C( 127), INT8_C( -36), INT8_C( 21), INT8_C( 38), INT8_C(-106), INT8_C( 33), INT8_C( -66), INT8_C(-121), INT8_C( 36), INT8_C( 24), INT8_C( 61), INT8_C( 66), INT8_C( 20), INT8_C( 63), INT8_C( -18), INT8_C( 11), INT8_C(-103), INT8_C( -19), INT8_C( -42), INT8_C( -69), INT8_C( 53), INT8_C( -40), INT8_C( 112), INT8_C( 8), INT8_C( -69), INT8_C(-102), INT8_C( 62), INT8_C( 85), INT8_C( 62)), simde_mm512_set_epi8(INT8_C( 10), INT8_C( 75), INT8_C( -74), INT8_C( -47), INT8_C( -88), INT8_C( 99), INT8_C( -86), INT8_C(-128), INT8_C( 94), INT8_C( -1), INT8_C( 99), INT8_C( 100), INT8_C( -25), INT8_C( 7), INT8_C( 59), INT8_C(-113), INT8_C( 119), INT8_C( -42), INT8_C( -14), INT8_C( 79), INT8_C( 4), INT8_C(-111), INT8_C( 11), INT8_C( 80), INT8_C( -78), INT8_C( 87), INT8_C( -72), INT8_C(-111), INT8_C( -95), INT8_C( -18), INT8_C( -36), INT8_C( -40), INT8_C( -68), INT8_C(-115), INT8_C( -24), INT8_C( -50), INT8_C( -36), INT8_C( 10), INT8_C( 47), INT8_C( 62), INT8_C( -15), INT8_C( -66), INT8_C(-122), INT8_C( 36), INT8_C( -22), INT8_C( 61), INT8_C( -11), INT8_C( 20), INT8_C( 63), INT8_C( 82), INT8_C(-113), INT8_C(-103), INT8_C( -19), INT8_C( -42), INT8_C( -69), INT8_C( 53), INT8_C( -40), INT8_C( 112), INT8_C( 8), INT8_C( -69), INT8_C(-102), INT8_C( 23), INT8_C( 85), INT8_C( 62)), UINT64_C(0xce516266e8559ffb) }, { simde_mm512_set_epi8(INT8_C( -7), INT8_C( 2), INT8_C(-111), INT8_C( 64), INT8_C(-100), INT8_C( 87), INT8_C( 100), INT8_C( -30), INT8_C( -39), INT8_C( -38), INT8_C( 121), INT8_C( 55), INT8_C( -64), INT8_C( 81), INT8_C( -3), INT8_C( 79), INT8_C( -41), INT8_C( 118), INT8_C( -37), INT8_C( -34), INT8_C( -13), INT8_C( 63), INT8_C( 26), INT8_C( -81), INT8_C( 90), INT8_C( 43), INT8_C( -31), INT8_C( -17), INT8_C(-100), INT8_C( -71), INT8_C(-104), INT8_C( -66), INT8_C( -94), INT8_C( -89), INT8_C( 100), INT8_C( 36), INT8_C( 17), INT8_C( 116), INT8_C( -30), INT8_C( 16), INT8_C( 110), INT8_C( 98), INT8_C( 11), INT8_C( -42), INT8_C( -78), INT8_C( -68), INT8_C( -26), INT8_C( -35), INT8_C( 12), INT8_C( -40), INT8_C( -27), INT8_C( -40), INT8_C(-102), INT8_C(-109), INT8_C( 39), INT8_C( 29), INT8_C( 21), INT8_C( 9), INT8_C( 49), INT8_C( -13), INT8_C( -49), INT8_C( 7), INT8_C( 91), INT8_C( 15)), simde_mm512_set_epi8(INT8_C( 78), INT8_C( 2), INT8_C( -91), INT8_C( 64), INT8_C(-100), INT8_C( 41), INT8_C( -34), INT8_C( -46), INT8_C( -39), INT8_C( 31), INT8_C( 13), INT8_C( 55), INT8_C( -42), INT8_C( 33), INT8_C( -3), INT8_C( 79), INT8_C( -41), INT8_C( 118), INT8_C( -37), INT8_C( 90), INT8_C( -13), INT8_C( 63), INT8_C( 51), INT8_C( -81), INT8_C( 90), INT8_C( 43), INT8_C( -31), INT8_C(-112), INT8_C(-100), INT8_C( 41), INT8_C(-104), INT8_C( -66), INT8_C( -94), INT8_C( -89), INT8_C( -85), INT8_C(-109), INT8_C( 113), INT8_C( 116), INT8_C( 100), INT8_C( 16), INT8_C( 5), INT8_C( -50), INT8_C( -51), INT8_C( -42), INT8_C( -95), INT8_C( -68), INT8_C( -26), INT8_C( -35), INT8_C( -73), INT8_C( 71), INT8_C( 65), INT8_C( -40), INT8_C(-102), INT8_C( 7), INT8_C( 94), INT8_C( 29), INT8_C( 65), INT8_C( 9), INT8_C( 49), INT8_C( -13), INT8_C( -33), INT8_C( 7), INT8_C(-101), INT8_C( 15)), UINT64_C(0x5893edebc5171975) }, { simde_mm512_set_epi8(INT8_C( -34), INT8_C( -12), INT8_C( 105), INT8_C(-124), INT8_C( -33), INT8_C( -79), INT8_C( -6), INT8_C( 54), INT8_C( 81), INT8_C( -11), INT8_C( 67), INT8_C( 63), INT8_C( 103), INT8_C( 119), INT8_C( -89), INT8_C( 40), INT8_C( 8), INT8_C( -38), INT8_C( 71), INT8_C( 66), INT8_C(-106), INT8_C( -45), INT8_C( 18), INT8_C( 100), INT8_C( 122), INT8_C( 93), INT8_C( -42), INT8_C( 5), INT8_C( -39), INT8_C( 37), INT8_C( -70), INT8_C( 13), INT8_C( 99), INT8_C( -57), INT8_C( -88), INT8_C( -36), INT8_C(-103), INT8_C( 25), INT8_C( 94), INT8_C(-107), INT8_C( -32), INT8_C( -12), INT8_C( -14), INT8_C( 32), INT8_C( -38), INT8_C( 10), INT8_C( 89), INT8_C( -69), INT8_C( -8), INT8_C( 69), INT8_C( -20), INT8_C(-122), INT8_C( -75), INT8_C( -71), INT8_C( 3), INT8_C( 102), INT8_C( 119), INT8_C( -58), INT8_C( -49), INT8_C( 80), INT8_C( -15), INT8_C( -97), INT8_C( 45), INT8_C( 96)), simde_mm512_set_epi8(INT8_C( -34), INT8_C( -12), INT8_C( 81), INT8_C(-115), INT8_C( -33), INT8_C( -79), INT8_C(-117), INT8_C( -34), INT8_C( 81), INT8_C( -11), INT8_C( -63), INT8_C( -61), INT8_C( 53), INT8_C( 119), INT8_C( 26), INT8_C( 40), INT8_C( 8), INT8_C( -38), INT8_C( 25), INT8_C( -23), INT8_C( -16), INT8_C( -45), INT8_C( -64), INT8_C( 100), INT8_C( 91), INT8_C( 93), INT8_C( -42), INT8_C( 5), INT8_C( 81), INT8_C( -76), INT8_C( -70), INT8_C( 13), INT8_C( 26), INT8_C( -57), INT8_C( -88), INT8_C( -64), INT8_C( -68), INT8_C( -91), INT8_C(-123), INT8_C( 38), INT8_C( -32), INT8_C( 29), INT8_C( 82), INT8_C( 54), INT8_C(-107), INT8_C( 10), INT8_C( 89), INT8_C( 28), INT8_C( -27), INT8_C( 41), INT8_C( -20), INT8_C(-122), INT8_C( -75), INT8_C( -71), INT8_C( 3), INT8_C( -30), INT8_C( 97), INT8_C( 18), INT8_C( -90), INT8_C( 107), INT8_C( 99), INT8_C( 10), INT8_C( 45), INT8_C( 96)), UINT64_C(0xccc5c57360863e03) }, { simde_mm512_set_epi8(INT8_C( 48), INT8_C( 94), INT8_C( 112), INT8_C(-107), INT8_C( -34), INT8_C( -86), INT8_C( 65), INT8_C( 92), INT8_C( 97), INT8_C( -99), INT8_C( 28), INT8_C( 47), INT8_C(-117), INT8_C( -22), INT8_C(-111), INT8_C( -67), INT8_C( 113), INT8_C(-107), INT8_C( -23), INT8_C( 77), INT8_C( 60), INT8_C( 104), INT8_C(-116), INT8_C( -86), INT8_C(-113), INT8_C( -79), INT8_C( -64), INT8_C( -15), INT8_C(-123), INT8_C( 99), INT8_C( 25), INT8_C( 27), INT8_C( -40), INT8_C( 126), INT8_C( -66), INT8_C( -45), INT8_C( 57), INT8_C( -30), INT8_C( -12), INT8_C( 16), INT8_C( 122), INT8_C( 124), INT8_C( -75), INT8_C( 50), INT8_C( -6), INT8_C( 41), INT8_C( -47), INT8_C( -3), INT8_C( 29), INT8_C( -20), INT8_C( -45), INT8_C( -46), INT8_C( -45), INT8_C( -14), INT8_C( 99), INT8_C( 84), INT8_C( -62), INT8_C( -99), INT8_C(-104), INT8_C( -78), INT8_C( 106), INT8_C(-117), INT8_C( -94), INT8_C( 3)), simde_mm512_set_epi8(INT8_C( -88), INT8_C( -74), INT8_C( 2), INT8_C( -83), INT8_C(-112), INT8_C( -91), INT8_C( 65), INT8_C( 92), INT8_C( 97), INT8_C( 52), INT8_C( 28), INT8_C( -65), INT8_C(-117), INT8_C( -90), INT8_C(-111), INT8_C( -67), INT8_C( 113), INT8_C( 16), INT8_C(-124), INT8_C( 77), INT8_C( 60), INT8_C( -64), INT8_C(-116), INT8_C( -89), INT8_C( -7), INT8_C( -79), INT8_C( 46), INT8_C( 114), INT8_C(-107), INT8_C( 99), INT8_C( -79), INT8_C( 80), INT8_C( -40), INT8_C( -81), INT8_C( -66), INT8_C( -45), INT8_C( 57), INT8_C( -30), INT8_C( 66), INT8_C( 71), INT8_C( 122), INT8_C( 95), INT8_C( -43), INT8_C( 50), INT8_C( -6), INT8_C( 41), INT8_C( -47), INT8_C( 95), INT8_C( 5), INT8_C( -20), INT8_C( -45), INT8_C(-118), INT8_C( 70), INT8_C( 81), INT8_C( 0), INT8_C( 84), INT8_C( -62), INT8_C(-101), INT8_C( 13), INT8_C( 33), INT8_C(-104), INT8_C(-117), INT8_C( -11), INT8_C( 20)), UINT64_C(0x3ab9a44bc9e6184) }, { simde_mm512_set_epi8(INT8_C( 67), INT8_C( 34), INT8_C( -33), INT8_C( 31), INT8_C(-128), INT8_C( 55), INT8_C( 93), INT8_C( 58), INT8_C( 57), INT8_C( 104), INT8_C(-110), INT8_C( 59), INT8_C( 55), INT8_C( 33), INT8_C(-122), INT8_C( 69), INT8_C( 57), INT8_C( 30), INT8_C( -13), INT8_C( -65), INT8_C( -22), INT8_C(-100), INT8_C( 18), INT8_C( -65), INT8_C( -60), INT8_C(-105), INT8_C( 27), INT8_C( -71), INT8_C( 52), INT8_C( 12), INT8_C( -4), INT8_C( 64), INT8_C( 20), INT8_C( 51), INT8_C( 87), INT8_C( 43), INT8_C( 26), INT8_C( 6), INT8_C( -66), INT8_C( -40), INT8_C( 87), INT8_C( 1), INT8_C( -26), INT8_C( 92), INT8_C( -33), INT8_C( 8), INT8_C( 42), INT8_C( -93), INT8_C( 44), INT8_C( -55), INT8_C(-113), INT8_C( -43), INT8_C( 32), INT8_C( 105), INT8_C( -27), INT8_C( 96), INT8_C( 72), INT8_C( 48), INT8_C( -46), INT8_C( 24), INT8_C( -10), INT8_C( -98), INT8_C( -56), INT8_C( -41)), simde_mm512_set_epi8(INT8_C( 67), INT8_C( -63), INT8_C( -33), INT8_C( 31), INT8_C(-128), INT8_C( 55), INT8_C( 93), INT8_C( -8), INT8_C( 82), INT8_C( 104), INT8_C(-110), INT8_C( 59), INT8_C( 55), INT8_C( 0), INT8_C( -25), INT8_C( 69), INT8_C( 27), INT8_C( 30), INT8_C( -13), INT8_C( -65), INT8_C( -7), INT8_C( -28), INT8_C( 18), INT8_C( -65), INT8_C( 67), INT8_C( -3), INT8_C( 57), INT8_C( -68), INT8_C( 52), INT8_C( 12), INT8_C( -4), INT8_C(-128), INT8_C( 20), INT8_C( 37), INT8_C( 9), INT8_C( 80), INT8_C( 26), INT8_C( 6), INT8_C( -66), INT8_C( 9), INT8_C( -98), INT8_C( 1), INT8_C( -26), INT8_C( 92), INT8_C( -33), INT8_C( 8), INT8_C( -81), INT8_C( -93), INT8_C( 116), INT8_C( -55), INT8_C(-113), INT8_C( -43), INT8_C( 32), INT8_C( 105), INT8_C( -27), INT8_C( 37), INT8_C( 72), INT8_C( -73), INT8_C( -19), INT8_C( 96), INT8_C( 52), INT8_C( -98), INT8_C( -45), INT8_C( -41)), UINT64_C(0xbe79730e8e7d7e85) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask64 r = simde_mm512_cmpeq_epi8_mask(test_vec[i].a, test_vec[i].b); simde_assert_equal_mmask64(r, test_vec[i].r); } return 0; } static int test_simde_mm512_cmpeq_epi32_mask(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__mmask16 r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C( 1955445938), INT32_C( 1791143901), INT32_C(-1554982337), INT32_C(-1864115653), INT32_C(-1774796435), INT32_C( 1168347531), INT32_C( 660969508), INT32_C( 1153796239), INT32_C( 609464964), INT32_C( 1687040663), INT32_C( -477087011), INT32_C( 309017072), INT32_C(-2144421691), INT32_C( 682838279), INT32_C( 1954361304), INT32_C( 1694661655)), simde_mm512_set_epi32(INT32_C( 1955445938), INT32_C( 1791143901), INT32_C(-1554982337), INT32_C(-1021004514), INT32_C(-1774796435), INT32_C( 1168347531), INT32_C( 1662960443), INT32_C( 1153796239), INT32_C( 609464964), INT32_C( 428287860), INT32_C( 954212709), INT32_C( 309017072), INT32_C(-2144421691), INT32_C( 1263291650), INT32_C( 1954361304), INT32_C( 1694661655)), UINT16_C(60827) }, { simde_mm512_set_epi32(INT32_C(-1966343749), INT32_C( 200215975), INT32_C(-1192030414), INT32_C( 122433675), INT32_C( 2024442800), INT32_C(-1612503082), INT32_C( -352764842), INT32_C( -964919402), INT32_C( 198779956), INT32_C(-1052322954), INT32_C(-2053476283), INT32_C( 1410443780), INT32_C( -220052733), INT32_C( 1401656692), INT32_C( 344284159), INT32_C( 358488145)), simde_mm512_set_epi32(INT32_C(-1966343749), INT32_C( 200215975), INT32_C( 1606502639), INT32_C( 122433675), INT32_C( 2024442800), INT32_C( 1676122341), INT32_C( 1013297923), INT32_C( 1764819720), INT32_C(-1006160158), INT32_C( 500282446), INT32_C(-2053476283), INT32_C( 1410443780), INT32_C( -891361692), INT32_C( 1401656692), INT32_C(-1666424273), INT32_C( 358488145)), UINT16_C(55349) }, { simde_mm512_set_epi32(INT32_C( 2029514541), INT32_C(-1405121342), INT32_C( -922299669), INT32_C(-1157312309), INT32_C( 348700274), INT32_C( 1888848128), INT32_C( -909115111), INT32_C(-1689020830), INT32_C( -310128293), INT32_C(-2105034529), INT32_C( 1894199289), INT32_C( -520350707), INT32_C( 2077151614), INT32_C( 947639177), INT32_C( 972348424), INT32_C( 688864219)), simde_mm512_set_epi32(INT32_C( 2029514541), INT32_C(-1405121342), INT32_C( -922299669), INT32_C(-1157312309), INT32_C( -582496350), INT32_C( 321618555), INT32_C( -707023911), INT32_C(-1689020830), INT32_C(-1223923200), INT32_C( -293312096), INT32_C( 1894199289), INT32_C( -89856908), INT32_C( 2077151614), INT32_C( 947639177), INT32_C( 352633301), INT32_C( -580477735)), UINT16_C(61740) }, { simde_mm512_set_epi32(INT32_C( 63224893), INT32_C( -945243939), INT32_C( 1472177104), INT32_C(-1518639341), INT32_C( 1244274955), INT32_C(-1053463974), INT32_C( 207788622), INT32_C( -375726536), INT32_C( -219606791), INT32_C( -871332353), INT32_C( 2036105492), INT32_C( 1465626684), INT32_C( 930923741), INT32_C(-1996202276), INT32_C( 336972805), INT32_C(-1729161210)), simde_mm512_set_epi32(INT32_C( 1739683658), INT32_C( -945243939), INT32_C( 1472177104), INT32_C( -959740920), INT32_C( 1244274955), INT32_C( 236525659), INT32_C( 207788622), INT32_C( 1159372556), INT32_C( -632841040), INT32_C( -871332353), INT32_C( 2036105492), INT32_C(-1821161401), INT32_C( 626098659), INT32_C(-1099705903), INT32_C( 1892226777), INT32_C( 183269504)), UINT16_C(27232) }, { simde_mm512_set_epi32(INT32_C( 1660264790), INT32_C( -188014963), INT32_C( 1960568786), INT32_C( 630575470), INT32_C(-1560285386), INT32_C(-1080983958), INT32_C( -186614663), INT32_C(-1365084922), INT32_C( 1687374482), INT32_C( 2091712477), INT32_C( 1770300152), INT32_C( 1222615684), INT32_C( 987382002), INT32_C( -869689297), INT32_C( 1381156346), INT32_C( 352829646)), simde_mm512_set_epi32(INT32_C( 1495188549), INT32_C( -188014963), INT32_C( -879412194), INT32_C( 1172150075), INT32_C( 1163780404), INT32_C(-1080983958), INT32_C( -186614663), INT32_C(-1365084922), INT32_C( 1196042729), INT32_C( 2091712477), INT32_C( 1770300152), INT32_C( 1222615684), INT32_C( 987382002), INT32_C( -453542339), INT32_C(-1460537486), INT32_C( 1311735715)), UINT16_C(18296) }, { simde_mm512_set_epi32(INT32_C( -830898164), INT32_C( 2065530031), INT32_C( 1849339474), INT32_C( -161498764), INT32_C( 726295410), INT32_C(-1366062470), INT32_C( 110025501), INT32_C(-2061598845), INT32_C(-1911113344), INT32_C(-2008355607), INT32_C( 1140427951), INT32_C( 1963231912), INT32_C( 1593065931), INT32_C( 1712671682), INT32_C(-2139143015), INT32_C( 330252777)), simde_mm512_set_epi32(INT32_C( -830898164), INT32_C(-2132024757), INT32_C( 1102342058), INT32_C( -161498764), INT32_C(-2106128090), INT32_C( 648329890), INT32_C(-1284054768), INT32_C(-2061598845), INT32_C(-1911113344), INT32_C(-2008355607), INT32_C( 1715485148), INT32_C( 155412419), INT32_C( 1273550758), INT32_C( 1712671682), INT32_C(-1857983881), INT32_C( 1633779150)), UINT16_C(37316) }, { simde_mm512_set_epi32(INT32_C( 1956746364), INT32_C( 1930323834), INT32_C( 923874794), INT32_C( 121318212), INT32_C(-1375858452), INT32_C( -462992597), INT32_C( 1495829546), INT32_C( 697040437), INT32_C( 727111035), INT32_C(-2061427382), INT32_C( -815432287), INT32_C( 913775211), INT32_C(-1333809472), INT32_C( 114048073), INT32_C( 1312920985), INT32_C(-1819914035)), simde_mm512_set_epi32(INT32_C( 1659376087), INT32_C( 1930323834), INT32_C( 923874794), INT32_C( 121318212), INT32_C(-1375858452), INT32_C(-1480844812), INT32_C(-1803673478), INT32_C( 697040437), INT32_C( 1793922150), INT32_C( 391658500), INT32_C( -815432287), INT32_C( 913775211), INT32_C(-1333809472), INT32_C( -169359358), INT32_C( 140424991), INT32_C(-1819914035)), UINT16_C(31033) }, { simde_mm512_set_epi32(INT32_C( 1334496661), INT32_C(-1765072906), INT32_C(-1980138391), INT32_C(-1150536116), INT32_C( -711226926), INT32_C( 1955166809), INT32_C( 1418224832), INT32_C( 1791996583), INT32_C(-1305868646), INT32_C( -507537618), INT32_C( 272749509), INT32_C(-1826072492), INT32_C( -629068596), INT32_C(-2142583585), INT32_C( 2048200365), INT32_C(-1377550438)), simde_mm512_set_epi32(INT32_C( 1334496661), INT32_C(-1765072906), INT32_C( -890751438), INT32_C(-1150536116), INT32_C( 307879329), INT32_C( 1955166809), INT32_C(-1884386825), INT32_C( 1791996583), INT32_C( 1128431085), INT32_C( -507537618), INT32_C( 272749509), INT32_C( 1579228324), INT32_C( 1577134581), INT32_C(-2142583585), INT32_C( 1998674783), INT32_C(-1377550438)), UINT16_C(54629) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask16 r = simde_mm512_cmpeq_epi32_mask(test_vec[i].a, test_vec[i].b); simde_assert_equal_mmask16(r, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_cmpeq_epi32_mask(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m512i a; simde__m512i b; simde__mmask16 r; } test_vec[8] = { { UINT16_C(15798), simde_mm512_set_epi32(INT32_C(-1396783922), INT32_C( 2147469122), INT32_C( 245941047), INT32_C(-1608794680), INT32_C( 1508622706), INT32_C( -820009589), INT32_C(-2056933337), INT32_C( 1399160559), INT32_C( -781782717), INT32_C( -745938688), INT32_C( 1376412239), INT32_C( 358147007), INT32_C(-2141927311), INT32_C( -13921241), INT32_C( 993767039), INT32_C( 1399506469)), simde_mm512_set_epi32(INT32_C(-1396783922), INT32_C( 1246457300), INT32_C( 1479047358), INT32_C( -46329110), INT32_C( 1508622706), INT32_C( -820009589), INT32_C( 2080173402), INT32_C( 1937046285), INT32_C( 1642337112), INT32_C( 717149471), INT32_C( 913865239), INT32_C( 358147007), INT32_C( 1535690363), INT32_C( -13921241), INT32_C( 993767039), INT32_C( 520030741)), UINT16_C( 3094) }, { UINT16_C(11237), simde_mm512_set_epi32(INT32_C( -503773363), INT32_C(-1842967576), INT32_C( 214407733), INT32_C( 2136243689), INT32_C( 619563347), INT32_C(-2046647578), INT32_C( -882198373), INT32_C( -832110086), INT32_C( 828288790), INT32_C( 1819046419), INT32_C( 292064966), INT32_C( -747926472), INT32_C( -881463995), INT32_C(-1469434386), INT32_C( -207196114), INT32_C(-1865668915)), simde_mm512_set_epi32(INT32_C( 261150369), INT32_C(-1842967576), INT32_C(-1148601855), INT32_C( 2136243689), INT32_C(-2013121518), INT32_C(-2046647578), INT32_C(-1816537753), INT32_C( -832110086), INT32_C( 828288790), INT32_C( 1801925150), INT32_C( 292064966), INT32_C(-1755078036), INT32_C(-1190065823), INT32_C( 764421376), INT32_C( -207196114), INT32_C(-1865668915)), UINT16_C( 417) }, { UINT16_C( 61), simde_mm512_set_epi32(INT32_C( 219790698), INT32_C( 346516102), INT32_C(-2082495343), INT32_C( 775700745), INT32_C( -630025741), INT32_C( 1914162819), INT32_C( -226095226), INT32_C( -350619769), INT32_C( 1825330894), INT32_C( -79420124), INT32_C( 939662489), INT32_C( 667606641), INT32_C(-1935137328), INT32_C( 247120548), INT32_C( -894861328), INT32_C(-1690188311)), simde_mm512_set_epi32(INT32_C( 400967600), INT32_C( 346516102), INT32_C(-2082495343), INT32_C( 775700745), INT32_C( -630025741), INT32_C( 1914162819), INT32_C(-1771983812), INT32_C( -350619769), INT32_C( 1825330894), INT32_C(-1887033010), INT32_C( 939662489), INT32_C( -440942769), INT32_C(-1935137328), INT32_C( 247120548), INT32_C(-1352163534), INT32_C(-1690188311)), UINT16_C( 45) }, { UINT16_C(40799), simde_mm512_set_epi32(INT32_C(-1103261115), INT32_C(-1530092257), INT32_C( -178076517), INT32_C( 1725180044), INT32_C( -619562554), INT32_C(-2028225116), INT32_C( -516465044), INT32_C( -790492311), INT32_C( 770588317), INT32_C( 966229539), INT32_C( 1696981823), INT32_C( 1846986452), INT32_C( 201003704), INT32_C( -88144749), INT32_C( -459260562), INT32_C(-1462493092)), simde_mm512_set_epi32(INT32_C(-1103261115), INT32_C(-1530092257), INT32_C( 1272329003), INT32_C( 1725180044), INT32_C(-1714282865), INT32_C( 1964019033), INT32_C( -6888948), INT32_C( -794446809), INT32_C( 770588317), INT32_C( 966229539), INT32_C(-1587543669), INT32_C( 1846986452), INT32_C( 201003704), INT32_C( -88144749), INT32_C(-1549459108), INT32_C(-1462493092)), UINT16_C(36957) }, { UINT16_C(18708), simde_mm512_set_epi32(INT32_C( 1654168369), INT32_C(-1358646009), INT32_C( 945188582), INT32_C( 1242452940), INT32_C(-2068238117), INT32_C( 613827224), INT32_C( 1766050173), INT32_C( 788865946), INT32_C( -226150288), INT32_C( 20626714), INT32_C(-1790747056), INT32_C(-1510999017), INT32_C(-2059568770), INT32_C( 525242273), INT32_C(-1970979230), INT32_C( -983788353)), simde_mm512_set_epi32(INT32_C(-1802152524), INT32_C(-1358646009), INT32_C( 1400410557), INT32_C( 1242452940), INT32_C(-2068238117), INT32_C(-1745049433), INT32_C(-1272787498), INT32_C( 36641197), INT32_C( -226150288), INT32_C(-2087200149), INT32_C( -530182364), INT32_C(-1510999017), INT32_C(-2082577633), INT32_C( 525242273), INT32_C(-1970979230), INT32_C( -983788353)), UINT16_C(18452) }, { UINT16_C(48938), simde_mm512_set_epi32(INT32_C(-1802886705), INT32_C( 505130099), INT32_C( 1294359394), INT32_C( 564426410), INT32_C( -813242663), INT32_C(-1097324530), INT32_C( 1599346411), INT32_C(-1815738445), INT32_C( 2114996332), INT32_C( 2143192037), INT32_C( 342894910), INT32_C( 1933006347), INT32_C( 215936041), INT32_C( 2138148935), INT32_C(-1975112588), INT32_C(-1313889253)), simde_mm512_set_epi32(INT32_C( 1272515820), INT32_C(-1571014987), INT32_C( 1294359394), INT32_C( 564426410), INT32_C( -305474417), INT32_C(-2099686495), INT32_C( 217917259), INT32_C( 1770631752), INT32_C( 2114996332), INT32_C( 2143192037), INT32_C( -26985081), INT32_C( 603877714), INT32_C( 1592556524), INT32_C( 420570241), INT32_C(-1975112588), INT32_C(-1313889253)), UINT16_C(12290) }, { UINT16_C(14127), simde_mm512_set_epi32(INT32_C( 452796731), INT32_C( -256668338), INT32_C(-1710549095), INT32_C( 1982965424), INT32_C( 1184306045), INT32_C( -221254467), INT32_C( 1420239721), INT32_C( 2028887361), INT32_C(-1950932361), INT32_C( 1650853943), INT32_C( 239751123), INT32_C( 1018010808), INT32_C( -248946240), INT32_C( 701510715), INT32_C( 824235240), INT32_C( 1829156606)), simde_mm512_set_epi32(INT32_C( 452796731), INT32_C( 1031814185), INT32_C(-1710549095), INT32_C( 406415467), INT32_C( 1184306045), INT32_C( -221254467), INT32_C( 419739010), INT32_C( 1708161231), INT32_C(-1950932361), INT32_C( 1650853943), INT32_C( 239751123), INT32_C( 1018010808), INT32_C( -248946240), INT32_C( 701510715), INT32_C(-1571248435), INT32_C( 1829156606)), UINT16_C( 9261) }, { UINT16_C(22801), simde_mm512_set_epi32(INT32_C( 1869800572), INT32_C( 184060195), INT32_C( 81710208), INT32_C( -451284065), INT32_C( 397153235), INT32_C( 120564446), INT32_C(-2128920097), INT32_C( 1498011427), INT32_C( -602736654), INT32_C( -931955343), INT32_C( 270436915), INT32_C( -984637478), INT32_C( 2080482721), INT32_C( 1599947836), INT32_C( 374268618), INT32_C( 202341051)), simde_mm512_set_epi32(INT32_C( 1869800572), INT32_C( 350721255), INT32_C( 1725621650), INT32_C( 2020045509), INT32_C( 397153235), INT32_C( 2059505832), INT32_C(-2128920097), INT32_C( 1498011427), INT32_C( 884679844), INT32_C( -931955343), INT32_C(-1565261303), INT32_C( -984637478), INT32_C( 1047792745), INT32_C( 969830078), INT32_C( 374268618), INT32_C( 202341051)), UINT16_C( 2321) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask16 r = simde_mm512_mask_cmpeq_epi32_mask(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_equal_mmask16(HEDLEY_STATIC_CAST(uint16_t, r), HEDLEY_STATIC_CAST(uint16_t, test_vec[i].r)); } return 0; } static int test_simde_mm512_cmpeq_epi64_mask(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__mmask8 r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C( 1145569124203592220), INT64_C( 8866992319046943109), INT64_C( 1920152028348566704), INT64_C( 5434169962120345100), INT64_C( 2279810443797316081), INT64_C( 8202334326145056493), INT64_C(-3178237508368099649), INT64_C( 691539695110831332)), simde_mm512_set_epi64(INT64_C( 1145569124203592220), INT64_C( 7456258988741005047), INT64_C( 1920152028348566704), INT64_C(-5531344059509930758), INT64_C( 2279810443797316081), INT64_C( 4212838215119313762), INT64_C(-3178237508368099649), INT64_C( 805234941373423218)), UINT8_C(170) }, { simde_mm512_set_epi64(INT64_C(-9153030146845322943), INT64_C(-3269802522838954453), INT64_C( 4057416377680772191), INT64_C(-4770808841142629569), INT64_C(-3341852712217874901), INT64_C( 2807150139607841402), INT64_C(-4019938614639236980), INT64_C(-5612844059017155926)), simde_mm512_set_epi64(INT64_C(-9153030146845322943), INT64_C(-3269802522838954453), INT64_C( 4057416377680772191), INT64_C( -940603917401247915), INT64_C(-3341852712217874901), INT64_C( 2807150139607841402), INT64_C(-4019938614639236980), INT64_C(-5612844059017155926)), UINT8_C(239) }, { simde_mm512_set_epi64(INT64_C(-6535160640888369935), INT64_C( 4320507985166557084), INT64_C( 5472888856009482484), INT64_C(-3128657528300165720), INT64_C( 7430914891859325397), INT64_C( -863913734109164798), INT64_C(-7794735185814972844), INT64_C( 6163895309000776850)), simde_mm512_set_epi64(INT64_C(-4220461743444256802), INT64_C( 2616373937292152846), INT64_C( 5472888856009482484), INT64_C(-3128657528300165720), INT64_C(-4693544589206901195), INT64_C( -863913734109164798), INT64_C(-7794735185814972844), INT64_C(-7914074467628930001)), UINT8_C( 54) }, { simde_mm512_set_epi64(INT64_C(-2366835550617683621), INT64_C(-7526131353484035546), INT64_C( 792273323175818479), INT64_C(-3293855504021481788), INT64_C( 5766970185621377329), INT64_C( 8555682923177627503), INT64_C( 6363802824474944219), INT64_C(-2294667848278645794)), simde_mm512_set_epi64(INT64_C(-2869910567061155737), INT64_C(-7526131353484035546), INT64_C( 792273323175818479), INT64_C( 7002007300200137801), INT64_C( 5766970185621377329), INT64_C( 8555682923177627503), INT64_C( 6363802824474944219), INT64_C( 1235305386817146646)), UINT8_C(110) }, { simde_mm512_set_epi64(INT64_C( 1781453983767744725), INT64_C( 8717105342239974860), INT64_C(-6555437070120516632), INT64_C( -945434448901737124), INT64_C( 2467182069214891728), INT64_C( 6380355612148977321), INT64_C(-8948278762783625779), INT64_C( 4377995125356963906)), simde_mm512_set_epi64(INT64_C( 1781453983767744725), INT64_C( 8717105342239974860), INT64_C(-1574265126641933862), INT64_C(-3609136820784425910), INT64_C( 8413977304256335681), INT64_C(-4046135395677773903), INT64_C(-8948278762783625779), INT64_C( 4377995125356963906)), UINT8_C(195) }, { simde_mm512_set_epi64(INT64_C( 3163831140256245302), INT64_C(-7476767975543057611), INT64_C( 4783231484571490676), INT64_C( 4356333140933542181), INT64_C(-5734470250608567849), INT64_C( 5428089484060124354), INT64_C( 6915844972668556704), INT64_C(-4128418267655054219)), simde_mm512_set_epi64(INT64_C( 3163831140256245302), INT64_C(-7945608864951271413), INT64_C(-3728561425301803734), INT64_C( 6344562138072151085), INT64_C(-5734470250608567849), INT64_C( 5868624435741359252), INT64_C( 6915844972668556704), INT64_C(-4128418267655054219)), UINT8_C(139) }, { simde_mm512_set_epi64(INT64_C(-8545152605640787948), INT64_C(-5234822620280611494), INT64_C(-2932089901585751375), INT64_C( 4017618173912988951), INT64_C(-8696436452927061736), INT64_C( 3602642025812661939), INT64_C( 5777136991119584953), INT64_C(-5473038481952171581)), simde_mm512_set_epi64(INT64_C(-8545152605640787948), INT64_C(-7808252369899371159), INT64_C(-2932089901585751375), INT64_C( -26139149052321087), INT64_C(-6986660196527912755), INT64_C( 3602642025812661939), INT64_C( 5777136991119584953), INT64_C(-5473038481952171581)), UINT8_C(167) }, { simde_mm512_set_epi64(INT64_C( 7780170108497689334), INT64_C(-8001282944915881932), INT64_C( 382835809361431399), INT64_C(-5014881555296189468), INT64_C(-1844642447215154571), INT64_C(-5452282829002750089), INT64_C(-7793611854809744260), INT64_C( 938166230586687295)), simde_mm512_set_epi64(INT64_C( 7780170108497689334), INT64_C( 1739290942316187796), INT64_C( 7815402837606564081), INT64_C(-5014881555296189468), INT64_C(-1844642447215154571), INT64_C( -253128228754997390), INT64_C(-7793611854809744260), INT64_C( 938166230586687295)), UINT8_C(155) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask8 r = simde_mm512_cmpeq_epi64_mask(test_vec[i].a, test_vec[i].b); simde_assert_equal_mmask8(r, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_cmpeq_epi64_mask(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512i a; simde__m512i b; simde__mmask8 r; } test_vec[8] = { { UINT8_C( 90), simde_mm512_set_epi64(INT64_C( 7722926897436765530), INT64_C( 7338279138551748064), INT64_C( 8433308126101200079), INT64_C(-4390305748733976547), INT64_C(-1482589068035252753), INT64_C(-5002011091694476743), INT64_C( 5809674310022718254), INT64_C( 7620249298233351482)), simde_mm512_set_epi64(INT64_C( 7722926897436765530), INT64_C( 7338279138551748064), INT64_C(-2656726859984743367), INT64_C(-4390305748733976547), INT64_C(-1482589068035252753), INT64_C(-5269390469191050553), INT64_C( 5809674310022718254), INT64_C( 7620249298233351482)), UINT8_C( 90) }, { UINT8_C(178), simde_mm512_set_epi64(INT64_C(-8806453660480970182), INT64_C(-2722914847628644365), INT64_C( 2171146127585219679), INT64_C(-7200523266853707115), INT64_C( 8505301695237968355), INT64_C( 6373940775215479358), INT64_C( 8209357864908427195), INT64_C( -995665125730760835)), simde_mm512_set_epi64(INT64_C(-8806453660480970182), INT64_C(-2722914847628644365), INT64_C(-1180134256156200317), INT64_C(-7200523266853707115), INT64_C(-1610604796376715795), INT64_C( 5419019224867820225), INT64_C( 8209357864908427195), INT64_C( -995665125730760835)), UINT8_C(146) }, { UINT8_C(171), simde_mm512_set_epi64(INT64_C(-6245801519083893310), INT64_C(-7866373458730819532), INT64_C(-5627757407772356197), INT64_C(-2425546480980122794), INT64_C(-8451301604567613199), INT64_C( 1369383717682198649), INT64_C( -532343328754521574), INT64_C(-1062878680437210584)), simde_mm512_set_epi64(INT64_C(-9163399881020056955), INT64_C(-2992244142829238392), INT64_C( -213476403626539965), INT64_C(-8591297333400286921), INT64_C(-8451301604567613199), INT64_C(-8139768780594538635), INT64_C(-4714070518405120331), INT64_C(-1062878680437210584)), UINT8_C( 9) }, { UINT8_C( 28), simde_mm512_set_epi64(INT64_C( 7845956693704412298), INT64_C(-5781930833336581955), INT64_C( 2851517750261041799), INT64_C(-5814293521236182366), INT64_C( 2292150971239308783), INT64_C( 2594053186857735013), INT64_C( 7307535341641173075), INT64_C(-4427478291595527940)), simde_mm512_set_epi64(INT64_C( 536264388241191871), INT64_C(-5781930833336581955), INT64_C( 2851517750261041799), INT64_C( 1349842462394812975), INT64_C( 2292150971239308783), INT64_C( 2594053186857735013), INT64_C( 7307535341641173075), INT64_C(-4427478291595527940)), UINT8_C( 12) }, { UINT8_C(248), simde_mm512_set_epi64(INT64_C( 4900816215694077255), INT64_C(-2732029741423656661), INT64_C( 1082977534221618055), INT64_C(-3092044493389993636), INT64_C(-4299277917890019767), INT64_C(-2055775203132417874), INT64_C( -778633101599852237), INT64_C( -563223173848121636)), simde_mm512_set_epi64(INT64_C( 7049304296219110648), INT64_C(-2732029741423656661), INT64_C( 7088083428992159722), INT64_C(-3092044493389993636), INT64_C(-4299277917890019767), INT64_C( 4225506809727089751), INT64_C( -778633101599852237), INT64_C( -563223173848121636)), UINT8_C( 88) }, { UINT8_C(171), simde_mm512_set_epi64(INT64_C(-1412821155990992029), INT64_C( 4454576651901490962), INT64_C(-7284760734604447652), INT64_C(-7443130466673006479), INT64_C( 320054597637804434), INT64_C(-8860872372305530355), INT64_C(-8428145646879978292), INT64_C(-6547252853189215611)), simde_mm512_set_epi64(INT64_C(-1412821155990992029), INT64_C(-2354123670646573707), INT64_C( 4506838144989822528), INT64_C(-7443130466673006479), INT64_C(-5147543239321546686), INT64_C(-8860872372305530355), INT64_C(-8428145646879978292), INT64_C(-6547252853189215611)), UINT8_C(131) }, { UINT8_C( 29), simde_mm512_set_epi64(INT64_C( 5675137803130124480), INT64_C( 1211541157654985046), INT64_C( 8724633375562564314), INT64_C(-2760658800846254598), INT64_C(-6714474269646576270), INT64_C( 3484180661422871715), INT64_C( 1469796163712815354), INT64_C(-2336393240308600160)), simde_mm512_set_epi64(INT64_C( 5675137803130124480), INT64_C( 1211541157654985046), INT64_C(-8867413355151838495), INT64_C(-8867147959443474315), INT64_C(-6714474269646576270), INT64_C( 3484180661422871715), INT64_C(-7735267815657951749), INT64_C( 413036036281601883)), UINT8_C( 12) }, { UINT8_C(211), simde_mm512_set_epi64(INT64_C(-6713502673628263139), INT64_C( 1559753162601267291), INT64_C( 5045660940436454371), INT64_C( 7013290440433503154), INT64_C(-8475145246816690249), INT64_C(-6834826688677600633), INT64_C(-2109099044497919348), INT64_C( 1351143524438105934)), simde_mm512_set_epi64(INT64_C( 5625319538109918668), INT64_C( 1559753162601267291), INT64_C( 5045660940436454371), INT64_C(-4654386914804892920), INT64_C( 2407237530895996207), INT64_C(-6834826688677600633), INT64_C( 4684210505965066200), INT64_C( 1351143524438105934)), UINT8_C( 65) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask8 r = simde_mm512_mask_cmpeq_epi64_mask(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_equal_mmask8(r, test_vec[i].r); } return 0; } #if !defined(SIMDE_BUG_GCC_96174) static int test_simde_mm512_cmpeq_ps_mask (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 b[16]; const simde__mmask16 r; } test_vec[] = { { { SIMDE_FLOAT32_C( -330.05), SIMDE_FLOAT32_C( 847.28), SIMDE_FLOAT32_C( 61.79), SIMDE_FLOAT32_C( 748.75), SIMDE_FLOAT32_C( -125.94), SIMDE_FLOAT32_C( -287.83), SIMDE_FLOAT32_C( -156.45), SIMDE_FLOAT32_C( -904.26), SIMDE_FLOAT32_C( 393.62), SIMDE_FLOAT32_C( 694.71), SIMDE_FLOAT32_C( 345.37), SIMDE_FLOAT32_C( 245.98), SIMDE_FLOAT32_C( -522.67), SIMDE_FLOAT32_C( 140.34), SIMDE_FLOAT32_C( -555.38), SIMDE_FLOAT32_C( 596.45) }, { SIMDE_FLOAT32_C( -330.05), SIMDE_FLOAT32_C( -812.74), SIMDE_FLOAT32_C( 61.79), SIMDE_FLOAT32_C( -304.55), SIMDE_FLOAT32_C( 95.53), SIMDE_FLOAT32_C( -287.83), SIMDE_FLOAT32_C( -156.45), SIMDE_FLOAT32_C( 699.14), SIMDE_FLOAT32_C( 676.85), SIMDE_FLOAT32_C( 694.71), SIMDE_FLOAT32_C( 345.37), SIMDE_FLOAT32_C( 245.98), SIMDE_FLOAT32_C( -161.51), SIMDE_FLOAT32_C( 140.34), SIMDE_FLOAT32_C( -399.39), SIMDE_FLOAT32_C( 596.45) }, UINT16_C(44645) }, { { SIMDE_FLOAT32_C( -717.84), SIMDE_FLOAT32_C( 512.02), SIMDE_FLOAT32_C( 98.80), SIMDE_FLOAT32_C( -966.72), SIMDE_FLOAT32_C( -60.71), SIMDE_FLOAT32_C( -584.27), SIMDE_FLOAT32_C( 204.10), SIMDE_FLOAT32_C( 295.96), SIMDE_FLOAT32_C( -70.24), SIMDE_FLOAT32_C( 661.77), SIMDE_FLOAT32_C( 894.04), SIMDE_FLOAT32_C( 352.28), SIMDE_FLOAT32_C( 620.44), SIMDE_FLOAT32_C( 936.22), SIMDE_FLOAT32_C( 428.81), SIMDE_FLOAT32_C( 543.55) }, { SIMDE_FLOAT32_C( -717.84), SIMDE_FLOAT32_C( 313.36), SIMDE_FLOAT32_C( -806.61), SIMDE_FLOAT32_C( 690.40), SIMDE_FLOAT32_C( 646.60), SIMDE_FLOAT32_C( -584.27), SIMDE_FLOAT32_C( 204.10), SIMDE_FLOAT32_C( 460.04), SIMDE_FLOAT32_C( 733.57), SIMDE_FLOAT32_C( 661.77), SIMDE_FLOAT32_C( 894.04), SIMDE_FLOAT32_C( -252.47), SIMDE_FLOAT32_C( -340.15), SIMDE_FLOAT32_C( 936.22), SIMDE_FLOAT32_C( 428.81), SIMDE_FLOAT32_C( 543.55) }, UINT16_C(58977) }, { { SIMDE_FLOAT32_C( 375.91), SIMDE_FLOAT32_C( -19.72), SIMDE_FLOAT32_C( 336.05), SIMDE_FLOAT32_C( -540.20), SIMDE_FLOAT32_C( -665.47), SIMDE_FLOAT32_C( -492.58), SIMDE_FLOAT32_C( 15.30), SIMDE_FLOAT32_C( 126.92), SIMDE_FLOAT32_C( 767.58), SIMDE_FLOAT32_C( 861.15), SIMDE_FLOAT32_C( -58.47), SIMDE_FLOAT32_C( -387.52), SIMDE_FLOAT32_C( 800.70), SIMDE_FLOAT32_C( -537.31), SIMDE_FLOAT32_C( -644.51), SIMDE_FLOAT32_C( -955.44) }, { SIMDE_FLOAT32_C( 375.91), SIMDE_FLOAT32_C( -356.80), SIMDE_FLOAT32_C( 336.05), SIMDE_FLOAT32_C( -10.02), SIMDE_FLOAT32_C( -64.34), SIMDE_FLOAT32_C( 408.76), SIMDE_FLOAT32_C( -734.89), SIMDE_FLOAT32_C( 126.92), SIMDE_FLOAT32_C( 10.21), SIMDE_FLOAT32_C( 861.15), SIMDE_FLOAT32_C( 153.18), SIMDE_FLOAT32_C( 569.21), SIMDE_FLOAT32_C( 321.66), SIMDE_FLOAT32_C( -537.31), SIMDE_FLOAT32_C( 613.36), SIMDE_FLOAT32_C( -776.54) }, UINT16_C( 8837) }, { { SIMDE_FLOAT32_C( -842.85), SIMDE_FLOAT32_C( -336.15), SIMDE_FLOAT32_C( -966.61), SIMDE_FLOAT32_C( 670.20), SIMDE_FLOAT32_C( 783.55), SIMDE_FLOAT32_C( 477.19), SIMDE_FLOAT32_C( -864.95), SIMDE_FLOAT32_C( -372.20), SIMDE_FLOAT32_C( -94.30), SIMDE_FLOAT32_C( -879.40), SIMDE_FLOAT32_C( -161.82), SIMDE_FLOAT32_C( 100.12), SIMDE_FLOAT32_C( 850.32), SIMDE_FLOAT32_C( 476.49), SIMDE_FLOAT32_C( -174.44), SIMDE_FLOAT32_C( 934.13) }, { SIMDE_FLOAT32_C( 404.40), SIMDE_FLOAT32_C( -570.57), SIMDE_FLOAT32_C( -86.01), SIMDE_FLOAT32_C( 670.20), SIMDE_FLOAT32_C( 312.45), SIMDE_FLOAT32_C( 381.45), SIMDE_FLOAT32_C( -864.95), SIMDE_FLOAT32_C( -372.20), SIMDE_FLOAT32_C( -94.30), SIMDE_FLOAT32_C( -879.40), SIMDE_FLOAT32_C( -161.82), SIMDE_FLOAT32_C( 100.12), SIMDE_FLOAT32_C( 850.32), SIMDE_FLOAT32_C( 386.54), SIMDE_FLOAT32_C( 295.11), SIMDE_FLOAT32_C( -992.52) }, UINT16_C( 8136) }, { { SIMDE_FLOAT32_C( 877.31), SIMDE_FLOAT32_C( 884.47), SIMDE_FLOAT32_C( -380.38), SIMDE_FLOAT32_C( -700.84), SIMDE_FLOAT32_C( 945.89), SIMDE_FLOAT32_C( 280.68), SIMDE_FLOAT32_C( 832.06), SIMDE_FLOAT32_C( 359.22), SIMDE_FLOAT32_C( 586.70), SIMDE_FLOAT32_C( 448.55), SIMDE_FLOAT32_C( 510.98), SIMDE_FLOAT32_C( -325.00), SIMDE_FLOAT32_C( 847.13), SIMDE_FLOAT32_C( -548.42), SIMDE_FLOAT32_C( -663.23), SIMDE_FLOAT32_C( 110.33) }, { SIMDE_FLOAT32_C( 877.31), SIMDE_FLOAT32_C( 884.47), SIMDE_FLOAT32_C( -380.38), SIMDE_FLOAT32_C( -700.84), SIMDE_FLOAT32_C( 945.89), SIMDE_FLOAT32_C( 280.68), SIMDE_FLOAT32_C( 832.06), SIMDE_FLOAT32_C( 359.22), SIMDE_FLOAT32_C( -806.36), SIMDE_FLOAT32_C( -673.67), SIMDE_FLOAT32_C( 510.98), SIMDE_FLOAT32_C( -346.39), SIMDE_FLOAT32_C( 789.45), SIMDE_FLOAT32_C( -548.42), SIMDE_FLOAT32_C( 989.10), SIMDE_FLOAT32_C( -487.94) }, UINT16_C( 9471) }, { { SIMDE_FLOAT32_C( -787.05), SIMDE_FLOAT32_C( 806.72), SIMDE_FLOAT32_C( 520.29), SIMDE_FLOAT32_C( -321.05), SIMDE_FLOAT32_C( -366.95), SIMDE_FLOAT32_C( -748.89), SIMDE_FLOAT32_C( 687.71), SIMDE_FLOAT32_C( -416.88), SIMDE_FLOAT32_C( -561.92), SIMDE_FLOAT32_C( -926.01), SIMDE_FLOAT32_C( 843.79), SIMDE_FLOAT32_C( 849.56), SIMDE_FLOAT32_C( -51.86), SIMDE_FLOAT32_C( -481.78), SIMDE_FLOAT32_C( 491.33), SIMDE_FLOAT32_C( -936.26) }, { SIMDE_FLOAT32_C( -787.05), SIMDE_FLOAT32_C( 806.72), SIMDE_FLOAT32_C( 299.54), SIMDE_FLOAT32_C( 884.74), SIMDE_FLOAT32_C( -278.71), SIMDE_FLOAT32_C( -748.89), SIMDE_FLOAT32_C( 570.30), SIMDE_FLOAT32_C( -416.88), SIMDE_FLOAT32_C( -561.92), SIMDE_FLOAT32_C( 59.09), SIMDE_FLOAT32_C( 843.79), SIMDE_FLOAT32_C( 849.56), SIMDE_FLOAT32_C( -136.84), SIMDE_FLOAT32_C( -481.78), SIMDE_FLOAT32_C( 491.33), SIMDE_FLOAT32_C( -936.26) }, UINT16_C(60835) }, { { SIMDE_FLOAT32_C( -837.49), SIMDE_FLOAT32_C( -79.02), SIMDE_FLOAT32_C( -844.39), SIMDE_FLOAT32_C( -973.47), SIMDE_FLOAT32_C( -499.80), SIMDE_FLOAT32_C( 961.14), SIMDE_FLOAT32_C( 336.59), SIMDE_FLOAT32_C( -368.95), SIMDE_FLOAT32_C( 727.99), SIMDE_FLOAT32_C( -900.81), SIMDE_FLOAT32_C( 655.07), SIMDE_FLOAT32_C( -624.42), SIMDE_FLOAT32_C( 244.09), SIMDE_FLOAT32_C( 360.96), SIMDE_FLOAT32_C( -837.70), SIMDE_FLOAT32_C( -929.19) }, { SIMDE_FLOAT32_C( -837.49), SIMDE_FLOAT32_C( -79.02), SIMDE_FLOAT32_C( -169.54), SIMDE_FLOAT32_C( 100.98), SIMDE_FLOAT32_C( -499.80), SIMDE_FLOAT32_C( 961.14), SIMDE_FLOAT32_C( -254.87), SIMDE_FLOAT32_C( 592.42), SIMDE_FLOAT32_C( 312.40), SIMDE_FLOAT32_C( 958.12), SIMDE_FLOAT32_C( -284.13), SIMDE_FLOAT32_C( -624.42), SIMDE_FLOAT32_C( -196.30), SIMDE_FLOAT32_C( 360.96), SIMDE_FLOAT32_C( -837.70), SIMDE_FLOAT32_C( -975.45) }, UINT16_C(26675) }, { { SIMDE_FLOAT32_C( 928.69), SIMDE_FLOAT32_C( -3.95), SIMDE_FLOAT32_C( -214.33), SIMDE_FLOAT32_C( -971.80), SIMDE_FLOAT32_C( -780.70), SIMDE_FLOAT32_C( 950.39), SIMDE_FLOAT32_C( -857.68), SIMDE_FLOAT32_C( -246.08), SIMDE_FLOAT32_C( 789.62), SIMDE_FLOAT32_C( -840.89), SIMDE_FLOAT32_C( 194.42), SIMDE_FLOAT32_C( -873.48), SIMDE_FLOAT32_C( -365.78), SIMDE_FLOAT32_C( -117.81), SIMDE_FLOAT32_C( 601.86), SIMDE_FLOAT32_C( 913.26) }, { SIMDE_FLOAT32_C( 928.69), SIMDE_FLOAT32_C( -3.95), SIMDE_FLOAT32_C( -214.33), SIMDE_FLOAT32_C( 377.34), SIMDE_FLOAT32_C( -525.21), SIMDE_FLOAT32_C( -436.16), SIMDE_FLOAT32_C( 186.25), SIMDE_FLOAT32_C( -246.08), SIMDE_FLOAT32_C( 623.36), SIMDE_FLOAT32_C( -840.89), SIMDE_FLOAT32_C( 194.42), SIMDE_FLOAT32_C( -873.48), SIMDE_FLOAT32_C( -679.52), SIMDE_FLOAT32_C( 447.41), SIMDE_FLOAT32_C( -608.79), SIMDE_FLOAT32_C( 721.43) }, UINT16_C( 3719) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__mmask16 r = simde_mm512_cmpeq_ps_mask(a, b); simde_assert_equal_mmask16(r, test_vec[i].r); } return 0; } static int test_simde_mm512_cmpeq_pd_mask (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 b[8]; const simde__mmask8 r; } test_vec[] = { { { SIMDE_FLOAT64_C( 271.69), SIMDE_FLOAT64_C( 217.70), SIMDE_FLOAT64_C( 925.72), SIMDE_FLOAT64_C( 520.03), SIMDE_FLOAT64_C( 127.68), SIMDE_FLOAT64_C( -63.68), SIMDE_FLOAT64_C( -338.02), SIMDE_FLOAT64_C( 823.86) }, { SIMDE_FLOAT64_C( 690.45), SIMDE_FLOAT64_C( 347.44), SIMDE_FLOAT64_C( -649.53), SIMDE_FLOAT64_C( -641.60), SIMDE_FLOAT64_C( 658.05), SIMDE_FLOAT64_C( -212.84), SIMDE_FLOAT64_C( 796.21), SIMDE_FLOAT64_C( -36.15) }, UINT8_C( 0) }, { { SIMDE_FLOAT64_C( 417.99), SIMDE_FLOAT64_C( -883.29), SIMDE_FLOAT64_C( 571.34), SIMDE_FLOAT64_C( 535.08), SIMDE_FLOAT64_C( -923.74), SIMDE_FLOAT64_C( 773.69), SIMDE_FLOAT64_C( -589.26), SIMDE_FLOAT64_C( 350.94) }, { SIMDE_FLOAT64_C( 179.83), SIMDE_FLOAT64_C( 445.85), SIMDE_FLOAT64_C( -677.60), SIMDE_FLOAT64_C( -480.10), SIMDE_FLOAT64_C( -974.87), SIMDE_FLOAT64_C( -558.93), SIMDE_FLOAT64_C( 47.77), SIMDE_FLOAT64_C( 924.57) }, UINT8_C( 0) }, { { SIMDE_FLOAT64_C( -695.12), SIMDE_FLOAT64_C( -819.01), SIMDE_FLOAT64_C( 861.37), SIMDE_FLOAT64_C( -968.92), SIMDE_FLOAT64_C( -642.09), SIMDE_FLOAT64_C( 475.36), SIMDE_FLOAT64_C( -653.40), SIMDE_FLOAT64_C( 274.91) }, { SIMDE_FLOAT64_C( 408.72), SIMDE_FLOAT64_C( -646.21), SIMDE_FLOAT64_C( -843.45), SIMDE_FLOAT64_C( 107.84), SIMDE_FLOAT64_C( 465.38), SIMDE_FLOAT64_C( -336.34), SIMDE_FLOAT64_C( -820.42), SIMDE_FLOAT64_C( -749.33) }, UINT8_C( 0) }, { { SIMDE_FLOAT64_C( -9.72), SIMDE_FLOAT64_C( 643.44), SIMDE_FLOAT64_C( 336.27), SIMDE_FLOAT64_C( -313.97), SIMDE_FLOAT64_C( -863.83), SIMDE_FLOAT64_C( -448.10), SIMDE_FLOAT64_C( 771.84), SIMDE_FLOAT64_C( 249.27) }, { SIMDE_FLOAT64_C( -506.33), SIMDE_FLOAT64_C( 28.98), SIMDE_FLOAT64_C( -919.42), SIMDE_FLOAT64_C( -710.08), SIMDE_FLOAT64_C( -376.38), SIMDE_FLOAT64_C( 181.22), SIMDE_FLOAT64_C( -315.61), SIMDE_FLOAT64_C( -521.71) }, UINT8_C( 0) }, { { SIMDE_FLOAT64_C( -309.90), SIMDE_FLOAT64_C( -566.85), SIMDE_FLOAT64_C( 953.96), SIMDE_FLOAT64_C( -760.71), SIMDE_FLOAT64_C( 715.80), SIMDE_FLOAT64_C( 511.82), SIMDE_FLOAT64_C( 185.57), SIMDE_FLOAT64_C( 958.96) }, { SIMDE_FLOAT64_C( -823.31), SIMDE_FLOAT64_C( 653.67), SIMDE_FLOAT64_C( 300.89), SIMDE_FLOAT64_C( -999.35), SIMDE_FLOAT64_C( -123.69), SIMDE_FLOAT64_C( -935.82), SIMDE_FLOAT64_C( -283.75), SIMDE_FLOAT64_C( -911.67) }, UINT8_C( 0) }, { { SIMDE_FLOAT64_C( -370.17), SIMDE_FLOAT64_C( 581.24), SIMDE_FLOAT64_C( 903.15), SIMDE_FLOAT64_C( -702.97), SIMDE_FLOAT64_C( -784.81), SIMDE_FLOAT64_C( -282.51), SIMDE_FLOAT64_C( -162.91), SIMDE_FLOAT64_C( -67.74) }, { SIMDE_FLOAT64_C( -458.51), SIMDE_FLOAT64_C( -138.00), SIMDE_FLOAT64_C( 634.22), SIMDE_FLOAT64_C( -641.32), SIMDE_FLOAT64_C( -700.95), SIMDE_FLOAT64_C( -830.62), SIMDE_FLOAT64_C( -270.15), SIMDE_FLOAT64_C( -342.52) }, UINT8_C( 0) }, { { SIMDE_FLOAT64_C( -741.30), SIMDE_FLOAT64_C( -961.63), SIMDE_FLOAT64_C( -159.42), SIMDE_FLOAT64_C( 596.72), SIMDE_FLOAT64_C( -872.26), SIMDE_FLOAT64_C( -77.79), SIMDE_FLOAT64_C( 608.69), SIMDE_FLOAT64_C( 181.91) }, { SIMDE_FLOAT64_C( -693.78), SIMDE_FLOAT64_C( -430.90), SIMDE_FLOAT64_C( -141.87), SIMDE_FLOAT64_C( -384.25), SIMDE_FLOAT64_C( -74.70), SIMDE_FLOAT64_C( 434.70), SIMDE_FLOAT64_C( -4.99), SIMDE_FLOAT64_C( 104.05) }, UINT8_C( 0) }, { { SIMDE_FLOAT64_C( 817.79), SIMDE_FLOAT64_C( 652.33), SIMDE_FLOAT64_C( -345.32), SIMDE_FLOAT64_C( 150.71), SIMDE_FLOAT64_C( 939.32), SIMDE_FLOAT64_C( -867.25), SIMDE_FLOAT64_C( 158.96), SIMDE_FLOAT64_C( -396.12) }, { SIMDE_FLOAT64_C( 363.34), SIMDE_FLOAT64_C( 571.53), SIMDE_FLOAT64_C( -232.25), SIMDE_FLOAT64_C( 496.58), SIMDE_FLOAT64_C( 40.81), SIMDE_FLOAT64_C( -69.57), SIMDE_FLOAT64_C( 792.81), SIMDE_FLOAT64_C( 833.83) }, UINT8_C( 0) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__mmask8 r = simde_mm512_cmpeq_pd_mask(a, b); simde_assert_equal_mmask8(r, test_vec[i].r); } return 0; } #endif /* !defined(SIMDE_BUG_GCC_96174) */ SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cmpeq_epi8_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cmpeq_epi32_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_cmpeq_epi32_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cmpeq_epi64_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_cmpeq_epi64_mask) #if !defined(SIMDE_BUG_GCC_96174) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cmpeq_ps_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cmpeq_pd_mask) #endif SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/cmpge.c000066400000000000000000001317431400333146700165240ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #define SIMDE_TEST_X86_AVX512_INSN cmpge #include #include #include static int test_simde_mm512_cmpge_epi8_mask(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__mmask64 r; } test_vec[8] = { { simde_mm512_set_epi8(INT8_C( 92), INT8_C(-121), INT8_C( 120), INT8_C( -19), INT8_C( -73), INT8_C( 22), INT8_C( -66), INT8_C( -29), INT8_C( 55), INT8_C( -63), INT8_C( -45), INT8_C(-119), INT8_C( 30), INT8_C( -29), INT8_C( -26), INT8_C( 89), INT8_C( -13), INT8_C( 119), INT8_C( -88), INT8_C( 109), INT8_C( 68), INT8_C( -10), INT8_C( 0), INT8_C( -18), INT8_C( 1), INT8_C( 116), INT8_C( -13), INT8_C( -73), INT8_C(-122), INT8_C( 0), INT8_C( 111), INT8_C( 95), INT8_C( -20), INT8_C( 3), INT8_C( 122), INT8_C( -43), INT8_C( 13), INT8_C( 25), INT8_C( -35), INT8_C(-127), INT8_C( 82), INT8_C( 90), INT8_C( 53), INT8_C( 123), INT8_C( 73), INT8_C( 108), INT8_C( -18), INT8_C( 15), INT8_C( 115), INT8_C( 54), INT8_C( 102), INT8_C( 118), INT8_C( 39), INT8_C( -77), INT8_C( 45), INT8_C( 81), INT8_C( -17), INT8_C( -28), INT8_C( 67), INT8_C( -85), INT8_C( 79), INT8_C(-113), INT8_C(-122), INT8_C( 124)), simde_mm512_set_epi8(INT8_C( 9), INT8_C(-121), INT8_C( 120), INT8_C( -36), INT8_C( 36), INT8_C(-103), INT8_C(-104), INT8_C( 25), INT8_C(-111), INT8_C( -63), INT8_C( -35), INT8_C(-120), INT8_C( 28), INT8_C( -44), INT8_C( -26), INT8_C( -86), INT8_C( -13), INT8_C( 53), INT8_C( -88), INT8_C(-107), INT8_C( 68), INT8_C( 42), INT8_C(-118), INT8_C( 111), INT8_C( 54), INT8_C( -58), INT8_C( -13), INT8_C( 27), INT8_C( 23), INT8_C( 41), INT8_C(-119), INT8_C( 44), INT8_C( 7), INT8_C(-120), INT8_C( 32), INT8_C( -43), INT8_C( 114), INT8_C( -72), INT8_C( 73), INT8_C( -96), INT8_C( 96), INT8_C( 110), INT8_C( -81), INT8_C( -76), INT8_C( 103), INT8_C(-100), INT8_C( -22), INT8_C( 18), INT8_C( 115), INT8_C( 54), INT8_C( -40), INT8_C( 125), INT8_C( 110), INT8_C( 31), INT8_C( 51), INT8_C(-104), INT8_C(-115), INT8_C( -27), INT8_C( 62), INT8_C( -85), INT8_C( 49), INT8_C(-115), INT8_C( 38), INT8_C( 4)), UINT64_C(0xf6dffa637436e1bd) }, { simde_mm512_set_epi8(INT8_C( 16), INT8_C( 41), INT8_C( 102), INT8_C( -95), INT8_C( 17), INT8_C( 42), INT8_C( -86), INT8_C( 38), INT8_C( 62), INT8_C( -56), INT8_C( 60), INT8_C( 19), INT8_C(-100), INT8_C(-107), INT8_C( 105), INT8_C( -76), INT8_C( 10), INT8_C( -9), INT8_C( -12), INT8_C( -56), INT8_C( -71), INT8_C( 96), INT8_C( 31), INT8_C( 24), INT8_C( 68), INT8_C( -7), INT8_C( -27), INT8_C( 3), INT8_C( -21), INT8_C( 50), INT8_C( -67), INT8_C( -39), INT8_C(-108), INT8_C( 62), INT8_C( 101), INT8_C( 56), INT8_C( -79), INT8_C( -37), INT8_C( -99), INT8_C( -56), INT8_C(-119), INT8_C( -56), INT8_C( -8), INT8_C( -16), INT8_C( 115), INT8_C( -95), INT8_C( -73), INT8_C( 54), INT8_C( 125), INT8_C( 85), INT8_C( 78), INT8_C( -65), INT8_C( 4), INT8_C( 38), INT8_C( -84), INT8_C( -53), INT8_C( 68), INT8_C(-108), INT8_C(-121), INT8_C(-115), INT8_C( -59), INT8_C( -78), INT8_C(-111), INT8_C( 56)), simde_mm512_set_epi8(INT8_C(-104), INT8_C(-102), INT8_C( 102), INT8_C( -95), INT8_C( 69), INT8_C( 13), INT8_C( -21), INT8_C( 45), INT8_C( 62), INT8_C(-120), INT8_C(-111), INT8_C( 32), INT8_C(-107), INT8_C( -30), INT8_C( 99), INT8_C( -64), INT8_C( 8), INT8_C( -42), INT8_C( 81), INT8_C( -34), INT8_C( -46), INT8_C( 26), INT8_C( 31), INT8_C( -2), INT8_C( 68), INT8_C( -7), INT8_C( -71), INT8_C( 46), INT8_C( -21), INT8_C( -73), INT8_C( 21), INT8_C( 83), INT8_C(-108), INT8_C( -97), INT8_C( -69), INT8_C( 73), INT8_C( 57), INT8_C( -37), INT8_C( 21), INT8_C( 82), INT8_C(-119), INT8_C(-126), INT8_C( 126), INT8_C( 91), INT8_C( 115), INT8_C( 31), INT8_C( -79), INT8_C( 28), INT8_C(-106), INT8_C( -18), INT8_C( 65), INT8_C(-104), INT8_C( 81), INT8_C( 38), INT8_C( -84), INT8_C( -2), INT8_C( -14), INT8_C( 85), INT8_C( -80), INT8_C( 80), INT8_C( 48), INT8_C( 93), INT8_C( 79), INT8_C( 127)), UINT64_C(0xf4eac7ece4cbf680) }, { simde_mm512_set_epi8(INT8_C( 50), INT8_C( 43), INT8_C( -68), INT8_C( 97), INT8_C( -26), INT8_C(-103), INT8_C( 71), INT8_C(-107), INT8_C( 91), INT8_C( 45), INT8_C( -11), INT8_C( 47), INT8_C( 29), INT8_C( -56), INT8_C( 26), INT8_C( -9), INT8_C( 10), INT8_C( 36), INT8_C(-116), INT8_C( -53), INT8_C( 41), INT8_C( 1), INT8_C( -23), INT8_C( 61), INT8_C(-127), INT8_C( -4), INT8_C( 48), INT8_C( -68), INT8_C( 89), INT8_C(-112), INT8_C( -31), INT8_C( 120), INT8_C( 35), INT8_C( 62), INT8_C( -21), INT8_C(-114), INT8_C(-104), INT8_C( 57), INT8_C( 42), INT8_C(-111), INT8_C( 94), INT8_C( -63), INT8_C( 87), INT8_C( 64), INT8_C( -65), INT8_C( -2), INT8_C( 110), INT8_C( -8), INT8_C( 63), INT8_C( -51), INT8_C( -4), INT8_C( 32), INT8_C( -65), INT8_C( 55), INT8_C( 14), INT8_C( 81), INT8_C(-123), INT8_C(-100), INT8_C( -39), INT8_C( -44), INT8_C( 22), INT8_C( 112), INT8_C( 16), INT8_C( 15)), simde_mm512_set_epi8(INT8_C( 50), INT8_C( -11), INT8_C( -68), INT8_C( -31), INT8_C( 105), INT8_C(-106), INT8_C( 98), INT8_C( 51), INT8_C( 58), INT8_C( 103), INT8_C( 111), INT8_C(-127), INT8_C( 68), INT8_C( -56), INT8_C( 124), INT8_C(-119), INT8_C( 74), INT8_C( -62), INT8_C(-116), INT8_C( 37), INT8_C( -12), INT8_C( 114), INT8_C( 0), INT8_C( 61), INT8_C( 103), INT8_C( -4), INT8_C(-105), INT8_C( -68), INT8_C( 39), INT8_C(-100), INT8_C( -93), INT8_C( 11), INT8_C( -80), INT8_C( -19), INT8_C( -22), INT8_C( -39), INT8_C( 127), INT8_C( -38), INT8_C(-125), INT8_C(-111), INT8_C( 84), INT8_C( -96), INT8_C( 87), INT8_C( -22), INT8_C( -5), INT8_C( -3), INT8_C(-127), INT8_C( 41), INT8_C( 74), INT8_C( 72), INT8_C( -4), INT8_C( 28), INT8_C(-115), INT8_C( 93), INT8_C( 102), INT8_C( 44), INT8_C(-103), INT8_C( -29), INT8_C( -50), INT8_C( 48), INT8_C( -96), INT8_C( -50), INT8_C( 46), INT8_C( -65)), UINT64_C(0xf495697be7f6392d) }, { simde_mm512_set_epi8(INT8_C( -97), INT8_C( 28), INT8_C( -58), INT8_C( 11), INT8_C( -14), INT8_C( 126), INT8_C( 81), INT8_C( 45), INT8_C( -23), INT8_C( 120), INT8_C( -83), INT8_C( -16), INT8_C( 7), INT8_C( 51), INT8_C( -57), INT8_C( -50), INT8_C( -21), INT8_C( 98), INT8_C( 88), INT8_C( 0), INT8_C( -66), INT8_C( 3), INT8_C( 124), INT8_C(-113), INT8_C( 50), INT8_C( 32), INT8_C( -85), INT8_C( -93), INT8_C( -44), INT8_C( -13), INT8_C( -94), INT8_C( 17), INT8_C( 122), INT8_C( 79), INT8_C(-116), INT8_C( 43), INT8_C( -77), INT8_C(-125), INT8_C( -23), INT8_C(-120), INT8_C( 96), INT8_C( -64), INT8_C( -23), INT8_C( -46), INT8_C( -29), INT8_C( -71), INT8_C( 71), INT8_C( -80), INT8_C( 44), INT8_C( -92), INT8_C( -31), INT8_C( 26), INT8_C( 8), INT8_C( 52), INT8_C( 117), INT8_C( 123), INT8_C( -63), INT8_C( 45), INT8_C( 95), INT8_C( 24), INT8_C(-108), INT8_C( 18), INT8_C( -60), INT8_C( 28)), simde_mm512_set_epi8(INT8_C( -23), INT8_C(-101), INT8_C( 116), INT8_C( 127), INT8_C( 96), INT8_C( 40), INT8_C( -97), INT8_C( 40), INT8_C( 86), INT8_C( -44), INT8_C( 70), INT8_C( -71), INT8_C( 62), INT8_C( -21), INT8_C( 66), INT8_C( 68), INT8_C( -87), INT8_C( -61), INT8_C( 48), INT8_C( -70), INT8_C( 18), INT8_C( -78), INT8_C( -98), INT8_C( 117), INT8_C( 74), INT8_C( 32), INT8_C( 93), INT8_C( 125), INT8_C( -47), INT8_C( -60), INT8_C( -86), INT8_C( 117), INT8_C( 122), INT8_C( -54), INT8_C( 50), INT8_C( 123), INT8_C( -31), INT8_C( -74), INT8_C( -64), INT8_C( 54), INT8_C( -81), INT8_C( 60), INT8_C( 31), INT8_C( -23), INT8_C( 108), INT8_C(-119), INT8_C( -92), INT8_C( -80), INT8_C( -30), INT8_C( -37), INT8_C( 51), INT8_C( -36), INT8_C( 8), INT8_C( 52), INT8_C( 97), INT8_C( 123), INT8_C( -49), INT8_C(-124), INT8_C( 95), INT8_C( -83), INT8_C( 70), INT8_C( -50), INT8_C( -61), INT8_C( 25)), UINT64_C(0x4754f64cc2879f77) }, { simde_mm512_set_epi8(INT8_C( 121), INT8_C( -20), INT8_C( -89), INT8_C( -94), INT8_C( 112), INT8_C( -27), INT8_C( 81), INT8_C( -54), INT8_C( -64), INT8_C(-114), INT8_C( 48), INT8_C( -89), INT8_C( -61), INT8_C( 26), INT8_C( 43), INT8_C( 29), INT8_C( 0), INT8_C( 125), INT8_C( -42), INT8_C( -67), INT8_C( 15), INT8_C( 120), INT8_C( 36), INT8_C( 40), INT8_C( -53), INT8_C( 34), INT8_C(-108), INT8_C( -58), INT8_C( 26), INT8_C(-111), INT8_C( 63), INT8_C( -98), INT8_C( 65), INT8_C( 8), INT8_C(-124), INT8_C( 96), INT8_C( -13), INT8_C( -98), INT8_C( 99), INT8_C( 90), INT8_C( 56), INT8_C( -45), INT8_C(-108), INT8_C( -19), INT8_C(-124), INT8_C( -27), INT8_C( 22), INT8_C( 126), INT8_C(-106), INT8_C( -68), INT8_C( -60), INT8_C( 8), INT8_C( -54), INT8_C( 93), INT8_C( -33), INT8_C( -27), INT8_C( -7), INT8_C( 27), INT8_C(-122), INT8_C( -88), INT8_C( 23), INT8_C( 6), INT8_C( 45), INT8_C( -21)), simde_mm512_set_epi8(INT8_C( 105), INT8_C( 52), INT8_C( 85), INT8_C(-104), INT8_C( 57), INT8_C( -31), INT8_C( -38), INT8_C(-124), INT8_C(-107), INT8_C( -2), INT8_C( 55), INT8_C( 46), INT8_C( -71), INT8_C( 77), INT8_C( 18), INT8_C( 70), INT8_C( 89), INT8_C( 125), INT8_C( -42), INT8_C(-125), INT8_C( 121), INT8_C( -11), INT8_C( -69), INT8_C( -59), INT8_C( -53), INT8_C( 34), INT8_C( 9), INT8_C( 64), INT8_C( -61), INT8_C( -25), INT8_C(-115), INT8_C( 100), INT8_C( 65), INT8_C( 8), INT8_C( 69), INT8_C( -8), INT8_C( -15), INT8_C( -51), INT8_C( 1), INT8_C( 90), INT8_C( 115), INT8_C( 51), INT8_C( -91), INT8_C( 56), INT8_C( 64), INT8_C( -39), INT8_C(-119), INT8_C( -28), INT8_C( -54), INT8_C( 28), INT8_C( 54), INT8_C( -8), INT8_C( -54), INT8_C(-128), INT8_C( -28), INT8_C( -71), INT8_C( 107), INT8_C( -66), INT8_C(-114), INT8_C( -88), INT8_C( 34), INT8_C( -83), INT8_C( -21), INT8_C( -64)), UINT64_C(0x9f8a77cadb071d57) }, { simde_mm512_set_epi8(INT8_C( 33), INT8_C( 121), INT8_C( 125), INT8_C( 35), INT8_C(-103), INT8_C( -48), INT8_C( -22), INT8_C( 38), INT8_C( -81), INT8_C( 9), INT8_C( -11), INT8_C(-124), INT8_C( 71), INT8_C( 9), INT8_C( -42), INT8_C( 118), INT8_C( 67), INT8_C( 45), INT8_C( 51), INT8_C( -92), INT8_C( 126), INT8_C( 108), INT8_C(-123), INT8_C( -71), INT8_C( 113), INT8_C( 32), INT8_C( 71), INT8_C( 3), INT8_C( -26), INT8_C( 82), INT8_C( -81), INT8_C( -20), INT8_C( -55), INT8_C( 112), INT8_C( 66), INT8_C( 37), INT8_C( 67), INT8_C( -69), INT8_C( 64), INT8_C( 39), INT8_C( 72), INT8_C( 45), INT8_C( 120), INT8_C( -5), INT8_C(-109), INT8_C( 62), INT8_C( 17), INT8_C( 31), INT8_C( -30), INT8_C( -58), INT8_C( 56), INT8_C( 21), INT8_C( 72), INT8_C( -75), INT8_C( -34), INT8_C( 120), INT8_C( 95), INT8_C( 108), INT8_C( 32), INT8_C( 64), INT8_C(-128), INT8_C( 102), INT8_C( -21), INT8_C( 28)), simde_mm512_set_epi8(INT8_C( 100), INT8_C( 121), INT8_C( 18), INT8_C( 28), INT8_C(-117), INT8_C( 107), INT8_C( 3), INT8_C( -62), INT8_C( 42), INT8_C( 72), INT8_C( 91), INT8_C( 86), INT8_C( -72), INT8_C( 9), INT8_C( -80), INT8_C( 118), INT8_C( 122), INT8_C(-108), INT8_C( -70), INT8_C( -63), INT8_C( 56), INT8_C( 71), INT8_C( -14), INT8_C( 49), INT8_C( -73), INT8_C( 53), INT8_C( -29), INT8_C( 3), INT8_C( -73), INT8_C( 43), INT8_C( -22), INT8_C( 85), INT8_C( -26), INT8_C( -9), INT8_C( 66), INT8_C( 1), INT8_C( -13), INT8_C( 60), INT8_C(-119), INT8_C( -83), INT8_C(-122), INT8_C( -64), INT8_C( -83), INT8_C( -74), INT8_C( 119), INT8_C( -8), INT8_C( 12), INT8_C( 113), INT8_C( -12), INT8_C( -84), INT8_C( 6), INT8_C( 69), INT8_C( 2), INT8_C( -75), INT8_C( -34), INT8_C(-126), INT8_C( 3), INT8_C(-128), INT8_C( -9), INT8_C( 24), INT8_C( 11), INT8_C( -94), INT8_C( -32), INT8_C( 110)), UINT64_C(0x790f6cbc7bf66ff6) }, { simde_mm512_set_epi8(INT8_C( 2), INT8_C( -81), INT8_C( 14), INT8_C( 90), INT8_C(-100), INT8_C(-122), INT8_C( -35), INT8_C( 81), INT8_C( -14), INT8_C( -42), INT8_C( 125), INT8_C(-125), INT8_C( -57), INT8_C( 90), INT8_C( -9), INT8_C( 63), INT8_C( 53), INT8_C( 77), INT8_C( 63), INT8_C( -84), INT8_C( 27), INT8_C( 22), INT8_C( -28), INT8_C( -37), INT8_C( 65), INT8_C( 118), INT8_C(-126), INT8_C( 97), INT8_C( 109), INT8_C(-119), INT8_C(-114), INT8_C( -75), INT8_C(-125), INT8_C( 121), INT8_C(-128), INT8_C( 103), INT8_C( 0), INT8_C( 101), INT8_C( -41), INT8_C( 89), INT8_C( 67), INT8_C( -65), INT8_C( 9), INT8_C( -7), INT8_C( -63), INT8_C( 13), INT8_C( 105), INT8_C( 92), INT8_C( -18), INT8_C( -21), INT8_C(-102), INT8_C(-114), INT8_C( 74), INT8_C( 121), INT8_C( -45), INT8_C( 52), INT8_C( -63), INT8_C( -93), INT8_C( 98), INT8_C( 106), INT8_C(-109), INT8_C( -47), INT8_C( 37), INT8_C( 70)), simde_mm512_set_epi8(INT8_C( -42), INT8_C(-124), INT8_C( 54), INT8_C( 74), INT8_C( -92), INT8_C( 99), INT8_C( 79), INT8_C( -3), INT8_C( 61), INT8_C( -89), INT8_C( 84), INT8_C( -94), INT8_C( 31), INT8_C(-116), INT8_C( -67), INT8_C(-102), INT8_C( -72), INT8_C( -91), INT8_C(-105), INT8_C(-108), INT8_C( -44), INT8_C( 74), INT8_C( -28), INT8_C( 124), INT8_C( 120), INT8_C( -41), INT8_C( -79), INT8_C( 122), INT8_C( 87), INT8_C(-119), INT8_C( 54), INT8_C( -2), INT8_C( -47), INT8_C( 84), INT8_C(-126), INT8_C( -64), INT8_C( 14), INT8_C( 11), INT8_C( 37), INT8_C( -23), INT8_C( 67), INT8_C( 124), INT8_C( 58), INT8_C( -94), INT8_C( 30), INT8_C( -33), INT8_C( 70), INT8_C( -24), INT8_C( 38), INT8_C( -97), INT8_C( -56), INT8_C( -60), INT8_C( -59), INT8_C( 65), INT8_C( -74), INT8_C( 45), INT8_C( -11), INT8_C( 55), INT8_C( -82), INT8_C( 12), INT8_C( 106), INT8_C( 22), INT8_C(-124), INT8_C( -4)), UINT64_C(0xd167fa4c55974f33) }, { simde_mm512_set_epi8(INT8_C( 18), INT8_C( 13), INT8_C( 14), INT8_C( 4), INT8_C( -3), INT8_C( -64), INT8_C( 17), INT8_C(-115), INT8_C( 21), INT8_C( -34), INT8_C( 125), INT8_C( -60), INT8_C( -72), INT8_C( 74), INT8_C( -5), INT8_C( -21), INT8_C( -41), INT8_C( 22), INT8_C( 45), INT8_C( 102), INT8_C( 59), INT8_C( -80), INT8_C( -15), INT8_C( -63), INT8_C( 84), INT8_C( -71), INT8_C( 8), INT8_C( 12), INT8_C( -11), INT8_C( -76), INT8_C( 62), INT8_C( 93), INT8_C( -75), INT8_C( -77), INT8_C( -84), INT8_C(-108), INT8_C( -35), INT8_C( 14), INT8_C( -60), INT8_C( 18), INT8_C( 23), INT8_C( -60), INT8_C( -63), INT8_C(-114), INT8_C( -55), INT8_C( 75), INT8_C( -99), INT8_C( -55), INT8_C( 58), INT8_C( 76), INT8_C(-102), INT8_C(-118), INT8_C( 10), INT8_C( 39), INT8_C( 119), INT8_C( 85), INT8_C( -8), INT8_C( -72), INT8_C( -60), INT8_C( -94), INT8_C(-112), INT8_C( 119), INT8_C( 124), INT8_C( 76)), simde_mm512_set_epi8(INT8_C( 18), INT8_C( -74), INT8_C( 14), INT8_C( 36), INT8_C( -7), INT8_C( 113), INT8_C( 40), INT8_C( 48), INT8_C(-107), INT8_C( -34), INT8_C( -75), INT8_C( 85), INT8_C( -35), INT8_C(-116), INT8_C( 65), INT8_C( -21), INT8_C( 15), INT8_C( 3), INT8_C( 45), INT8_C( 21), INT8_C( 72), INT8_C( 93), INT8_C( 108), INT8_C( 125), INT8_C( 1), INT8_C( 75), INT8_C( 21), INT8_C( -36), INT8_C(-126), INT8_C( 122), INT8_C( 71), INT8_C( 76), INT8_C( 28), INT8_C( -56), INT8_C( 32), INT8_C( 101), INT8_C(-107), INT8_C(-111), INT8_C( -88), INT8_C( -19), INT8_C( -77), INT8_C( 19), INT8_C( -21), INT8_C(-111), INT8_C( -68), INT8_C( 82), INT8_C(-118), INT8_C( -76), INT8_C( 47), INT8_C( 127), INT8_C( 62), INT8_C( -16), INT8_C( 10), INT8_C( -14), INT8_C(-100), INT8_C( 86), INT8_C( 29), INT8_C( 107), INT8_C( 56), INT8_C( 21), INT8_C( 24), INT8_C( 68), INT8_C( -96), INT8_C( 64)), UINT64_C(0xe8e570990f8b8e07) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask64 r = simde_mm512_cmpge_epi8_mask(test_vec[i].a, test_vec[i].b); simde_assert_equal_mmask64(r, test_vec[i].r); } return 0; } static int test_simde_mm512_cmpge_epu8_mask(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__mmask64 r; } test_vec[8] = { { simde_x_mm512_set_epu8(UINT8_C( 92), UINT8_C(135), UINT8_C(120), UINT8_C(237), UINT8_C(183), UINT8_C( 22), UINT8_C(190), UINT8_C(227), UINT8_C( 55), UINT8_C(193), UINT8_C(211), UINT8_C(137), UINT8_C( 30), UINT8_C(227), UINT8_C(230), UINT8_C( 89), UINT8_C(243), UINT8_C(119), UINT8_C(168), UINT8_C(109), UINT8_C( 68), UINT8_C(246), UINT8_C( 0), UINT8_C(238), UINT8_C( 1), UINT8_C(116), UINT8_C(243), UINT8_C(183), UINT8_C(134), UINT8_C( 0), UINT8_C(111), UINT8_C( 95), UINT8_C(236), UINT8_C( 3), UINT8_C(122), UINT8_C(213), UINT8_C( 13), UINT8_C( 25), UINT8_C(221), UINT8_C(129), UINT8_C( 82), UINT8_C( 90), UINT8_C( 53), UINT8_C(123), UINT8_C( 73), UINT8_C(108), UINT8_C(238), UINT8_C( 15), UINT8_C(115), UINT8_C( 54), UINT8_C(102), UINT8_C(118), UINT8_C( 39), UINT8_C(179), UINT8_C( 45), UINT8_C( 81), UINT8_C(239), UINT8_C(228), UINT8_C( 67), UINT8_C(171), UINT8_C( 79), UINT8_C(143), UINT8_C(134), UINT8_C(124)), simde_x_mm512_set_epu8(UINT8_C( 9), UINT8_C(135), UINT8_C(120), UINT8_C(220), UINT8_C( 36), UINT8_C(153), UINT8_C(152), UINT8_C( 25), UINT8_C(145), UINT8_C(193), UINT8_C(221), UINT8_C(136), UINT8_C( 28), UINT8_C(212), UINT8_C(230), UINT8_C(170), UINT8_C(243), UINT8_C( 53), UINT8_C(168), UINT8_C(149), UINT8_C( 68), UINT8_C( 42), UINT8_C(138), UINT8_C(111), UINT8_C( 54), UINT8_C(198), UINT8_C(243), UINT8_C( 27), UINT8_C( 23), UINT8_C( 41), UINT8_C(137), UINT8_C( 44), UINT8_C( 7), UINT8_C(136), UINT8_C( 32), UINT8_C(213), UINT8_C(114), UINT8_C(184), UINT8_C( 73), UINT8_C(160), UINT8_C( 96), UINT8_C(110), UINT8_C(175), UINT8_C(180), UINT8_C(103), UINT8_C(156), UINT8_C(234), UINT8_C( 18), UINT8_C(115), UINT8_C( 54), UINT8_C(216), UINT8_C(125), UINT8_C(110), UINT8_C( 31), UINT8_C( 51), UINT8_C(152), UINT8_C(141), UINT8_C(229), UINT8_C( 62), UINT8_C(171), UINT8_C( 49), UINT8_C(141), UINT8_C( 38), UINT8_C( 4)), UINT64_C(0xfb5eed39b202c4bf) }, { simde_x_mm512_set_epu8(UINT8_C( 16), UINT8_C( 41), UINT8_C(102), UINT8_C(161), UINT8_C( 17), UINT8_C( 42), UINT8_C(170), UINT8_C( 38), UINT8_C( 62), UINT8_C(200), UINT8_C( 60), UINT8_C( 19), UINT8_C(156), UINT8_C(149), UINT8_C(105), UINT8_C(180), UINT8_C( 10), UINT8_C(247), UINT8_C(244), UINT8_C(200), UINT8_C(185), UINT8_C( 96), UINT8_C( 31), UINT8_C( 24), UINT8_C( 68), UINT8_C(249), UINT8_C(229), UINT8_C( 3), UINT8_C(235), UINT8_C( 50), UINT8_C(189), UINT8_C(217), UINT8_C(148), UINT8_C( 62), UINT8_C(101), UINT8_C( 56), UINT8_C(177), UINT8_C(219), UINT8_C(157), UINT8_C(200), UINT8_C(137), UINT8_C(200), UINT8_C(248), UINT8_C(240), UINT8_C(115), UINT8_C(161), UINT8_C(183), UINT8_C( 54), UINT8_C(125), UINT8_C( 85), UINT8_C( 78), UINT8_C(191), UINT8_C( 4), UINT8_C( 38), UINT8_C(172), UINT8_C(203), UINT8_C( 68), UINT8_C(148), UINT8_C(135), UINT8_C(141), UINT8_C(197), UINT8_C(178), UINT8_C(145), UINT8_C( 56)), simde_x_mm512_set_epu8(UINT8_C(152), UINT8_C(154), UINT8_C(102), UINT8_C(161), UINT8_C( 69), UINT8_C( 13), UINT8_C(235), UINT8_C( 45), UINT8_C( 62), UINT8_C(136), UINT8_C(145), UINT8_C( 32), UINT8_C(149), UINT8_C(226), UINT8_C( 99), UINT8_C(192), UINT8_C( 8), UINT8_C(214), UINT8_C( 81), UINT8_C(222), UINT8_C(210), UINT8_C( 26), UINT8_C( 31), UINT8_C(254), UINT8_C( 68), UINT8_C(249), UINT8_C(185), UINT8_C( 46), UINT8_C(235), UINT8_C(183), UINT8_C( 21), UINT8_C( 83), UINT8_C(148), UINT8_C(159), UINT8_C(187), UINT8_C( 73), UINT8_C( 57), UINT8_C(219), UINT8_C( 21), UINT8_C( 82), UINT8_C(137), UINT8_C(130), UINT8_C(126), UINT8_C( 91), UINT8_C(115), UINT8_C( 31), UINT8_C(177), UINT8_C( 28), UINT8_C(150), UINT8_C(238), UINT8_C( 65), UINT8_C(152), UINT8_C( 81), UINT8_C( 38), UINT8_C(172), UINT8_C(254), UINT8_C(242), UINT8_C( 85), UINT8_C(176), UINT8_C( 80), UINT8_C( 48), UINT8_C( 93), UINT8_C( 79), UINT8_C(127)), UINT64_C(0x34cae6eb8fff365e) }, { simde_x_mm512_set_epu8(UINT8_C( 50), UINT8_C( 43), UINT8_C(188), UINT8_C( 97), UINT8_C(230), UINT8_C(153), UINT8_C( 71), UINT8_C(149), UINT8_C( 91), UINT8_C( 45), UINT8_C(245), UINT8_C( 47), UINT8_C( 29), UINT8_C(200), UINT8_C( 26), UINT8_C(247), UINT8_C( 10), UINT8_C( 36), UINT8_C(140), UINT8_C(203), UINT8_C( 41), UINT8_C( 1), UINT8_C(233), UINT8_C( 61), UINT8_C(129), UINT8_C(252), UINT8_C( 48), UINT8_C(188), UINT8_C( 89), UINT8_C(144), UINT8_C(225), UINT8_C(120), UINT8_C( 35), UINT8_C( 62), UINT8_C(235), UINT8_C(142), UINT8_C(152), UINT8_C( 57), UINT8_C( 42), UINT8_C(145), UINT8_C( 94), UINT8_C(193), UINT8_C( 87), UINT8_C( 64), UINT8_C(191), UINT8_C(254), UINT8_C(110), UINT8_C(248), UINT8_C( 63), UINT8_C(205), UINT8_C(252), UINT8_C( 32), UINT8_C(191), UINT8_C( 55), UINT8_C( 14), UINT8_C( 81), UINT8_C(133), UINT8_C(156), UINT8_C(217), UINT8_C(212), UINT8_C( 22), UINT8_C(112), UINT8_C( 16), UINT8_C( 15)), simde_x_mm512_set_epu8(UINT8_C( 50), UINT8_C(245), UINT8_C(188), UINT8_C(225), UINT8_C(105), UINT8_C(150), UINT8_C( 98), UINT8_C( 51), UINT8_C( 58), UINT8_C(103), UINT8_C(111), UINT8_C(129), UINT8_C( 68), UINT8_C(200), UINT8_C(124), UINT8_C(137), UINT8_C( 74), UINT8_C(194), UINT8_C(140), UINT8_C( 37), UINT8_C(244), UINT8_C(114), UINT8_C( 0), UINT8_C( 61), UINT8_C(103), UINT8_C(252), UINT8_C(151), UINT8_C(188), UINT8_C( 39), UINT8_C(156), UINT8_C(163), UINT8_C( 11), UINT8_C(176), UINT8_C(237), UINT8_C(234), UINT8_C(217), UINT8_C(127), UINT8_C(218), UINT8_C(131), UINT8_C(145), UINT8_C( 84), UINT8_C(160), UINT8_C( 87), UINT8_C(234), UINT8_C(251), UINT8_C(253), UINT8_C(129), UINT8_C( 41), UINT8_C( 74), UINT8_C( 72), UINT8_C(252), UINT8_C( 28), UINT8_C(141), UINT8_C( 93), UINT8_C(102), UINT8_C( 44), UINT8_C(153), UINT8_C(227), UINT8_C(206), UINT8_C( 48), UINT8_C(160), UINT8_C(206), UINT8_C( 46), UINT8_C(191)), UINT64_C(0xada533db29e57930) }, { simde_x_mm512_set_epu8(UINT8_C(159), UINT8_C( 28), UINT8_C(198), UINT8_C( 11), UINT8_C(242), UINT8_C(126), UINT8_C( 81), UINT8_C( 45), UINT8_C(233), UINT8_C(120), UINT8_C(173), UINT8_C(240), UINT8_C( 7), UINT8_C( 51), UINT8_C(199), UINT8_C(206), UINT8_C(235), UINT8_C( 98), UINT8_C( 88), UINT8_C( 0), UINT8_C(190), UINT8_C( 3), UINT8_C(124), UINT8_C(143), UINT8_C( 50), UINT8_C( 32), UINT8_C(171), UINT8_C(163), UINT8_C(212), UINT8_C(243), UINT8_C(162), UINT8_C( 17), UINT8_C(122), UINT8_C( 79), UINT8_C(140), UINT8_C( 43), UINT8_C(179), UINT8_C(131), UINT8_C(233), UINT8_C(136), UINT8_C( 96), UINT8_C(192), UINT8_C(233), UINT8_C(210), UINT8_C(227), UINT8_C(185), UINT8_C( 71), UINT8_C(176), UINT8_C( 44), UINT8_C(164), UINT8_C(225), UINT8_C( 26), UINT8_C( 8), UINT8_C( 52), UINT8_C(117), UINT8_C(123), UINT8_C(193), UINT8_C( 45), UINT8_C( 95), UINT8_C( 24), UINT8_C(148), UINT8_C( 18), UINT8_C(196), UINT8_C( 28)), simde_x_mm512_set_epu8(UINT8_C(233), UINT8_C(155), UINT8_C(116), UINT8_C(127), UINT8_C( 96), UINT8_C( 40), UINT8_C(159), UINT8_C( 40), UINT8_C( 86), UINT8_C(212), UINT8_C( 70), UINT8_C(185), UINT8_C( 62), UINT8_C(235), UINT8_C( 66), UINT8_C( 68), UINT8_C(169), UINT8_C(195), UINT8_C( 48), UINT8_C(186), UINT8_C( 18), UINT8_C(178), UINT8_C(158), UINT8_C(117), UINT8_C( 74), UINT8_C( 32), UINT8_C( 93), UINT8_C(125), UINT8_C(209), UINT8_C(196), UINT8_C(170), UINT8_C(117), UINT8_C(122), UINT8_C(202), UINT8_C( 50), UINT8_C(123), UINT8_C(225), UINT8_C(182), UINT8_C(192), UINT8_C( 54), UINT8_C(175), UINT8_C( 60), UINT8_C( 31), UINT8_C(233), UINT8_C(108), UINT8_C(137), UINT8_C(164), UINT8_C(176), UINT8_C(226), UINT8_C(219), UINT8_C( 51), UINT8_C(220), UINT8_C( 8), UINT8_C( 52), UINT8_C( 97), UINT8_C(123), UINT8_C(207), UINT8_C(132), UINT8_C( 95), UINT8_C(173), UINT8_C( 70), UINT8_C(206), UINT8_C(195), UINT8_C( 25)), UINT64_C(0x2db3a97ca36d2f2b) }, { simde_x_mm512_set_epu8(UINT8_C(121), UINT8_C(236), UINT8_C(167), UINT8_C(162), UINT8_C(112), UINT8_C(229), UINT8_C( 81), UINT8_C(202), UINT8_C(192), UINT8_C(142), UINT8_C( 48), UINT8_C(167), UINT8_C(195), UINT8_C( 26), UINT8_C( 43), UINT8_C( 29), UINT8_C( 0), UINT8_C(125), UINT8_C(214), UINT8_C(189), UINT8_C( 15), UINT8_C(120), UINT8_C( 36), UINT8_C( 40), UINT8_C(203), UINT8_C( 34), UINT8_C(148), UINT8_C(198), UINT8_C( 26), UINT8_C(145), UINT8_C( 63), UINT8_C(158), UINT8_C( 65), UINT8_C( 8), UINT8_C(132), UINT8_C( 96), UINT8_C(243), UINT8_C(158), UINT8_C( 99), UINT8_C( 90), UINT8_C( 56), UINT8_C(211), UINT8_C(148), UINT8_C(237), UINT8_C(132), UINT8_C(229), UINT8_C( 22), UINT8_C(126), UINT8_C(150), UINT8_C(188), UINT8_C(196), UINT8_C( 8), UINT8_C(202), UINT8_C( 93), UINT8_C(223), UINT8_C(229), UINT8_C(249), UINT8_C( 27), UINT8_C(134), UINT8_C(168), UINT8_C( 23), UINT8_C( 6), UINT8_C( 45), UINT8_C(235)), simde_x_mm512_set_epu8(UINT8_C(105), UINT8_C( 52), UINT8_C( 85), UINT8_C(152), UINT8_C( 57), UINT8_C(225), UINT8_C(218), UINT8_C(132), UINT8_C(149), UINT8_C(254), UINT8_C( 55), UINT8_C( 46), UINT8_C(185), UINT8_C( 77), UINT8_C( 18), UINT8_C( 70), UINT8_C( 89), UINT8_C(125), UINT8_C(214), UINT8_C(131), UINT8_C(121), UINT8_C(245), UINT8_C(187), UINT8_C(197), UINT8_C(203), UINT8_C( 34), UINT8_C( 9), UINT8_C( 64), UINT8_C(195), UINT8_C(231), UINT8_C(141), UINT8_C(100), UINT8_C( 65), UINT8_C( 8), UINT8_C( 69), UINT8_C(248), UINT8_C(241), UINT8_C(205), UINT8_C( 1), UINT8_C( 90), UINT8_C(115), UINT8_C( 51), UINT8_C(165), UINT8_C( 56), UINT8_C( 64), UINT8_C(217), UINT8_C(137), UINT8_C(228), UINT8_C(202), UINT8_C( 28), UINT8_C( 54), UINT8_C(248), UINT8_C(202), UINT8_C(128), UINT8_C(228), UINT8_C(185), UINT8_C(107), UINT8_C(190), UINT8_C(142), UINT8_C(168), UINT8_C( 34), UINT8_C(173), UINT8_C(235), UINT8_C(192)), UINT64_C(0xfd9a70f1eb5c6991) }, { simde_x_mm512_set_epu8(UINT8_C( 33), UINT8_C(121), UINT8_C(125), UINT8_C( 35), UINT8_C(153), UINT8_C(208), UINT8_C(234), UINT8_C( 38), UINT8_C(175), UINT8_C( 9), UINT8_C(245), UINT8_C(132), UINT8_C( 71), UINT8_C( 9), UINT8_C(214), UINT8_C(118), UINT8_C( 67), UINT8_C( 45), UINT8_C( 51), UINT8_C(164), UINT8_C(126), UINT8_C(108), UINT8_C(133), UINT8_C(185), UINT8_C(113), UINT8_C( 32), UINT8_C( 71), UINT8_C( 3), UINT8_C(230), UINT8_C( 82), UINT8_C(175), UINT8_C(236), UINT8_C(201), UINT8_C(112), UINT8_C( 66), UINT8_C( 37), UINT8_C( 67), UINT8_C(187), UINT8_C( 64), UINT8_C( 39), UINT8_C( 72), UINT8_C( 45), UINT8_C(120), UINT8_C(251), UINT8_C(147), UINT8_C( 62), UINT8_C( 17), UINT8_C( 31), UINT8_C(226), UINT8_C(198), UINT8_C( 56), UINT8_C( 21), UINT8_C( 72), UINT8_C(181), UINT8_C(222), UINT8_C(120), UINT8_C( 95), UINT8_C(108), UINT8_C( 32), UINT8_C( 64), UINT8_C(128), UINT8_C(102), UINT8_C(235), UINT8_C( 28)), simde_x_mm512_set_epu8(UINT8_C(100), UINT8_C(121), UINT8_C( 18), UINT8_C( 28), UINT8_C(139), UINT8_C(107), UINT8_C( 3), UINT8_C(194), UINT8_C( 42), UINT8_C( 72), UINT8_C( 91), UINT8_C( 86), UINT8_C(184), UINT8_C( 9), UINT8_C(176), UINT8_C(118), UINT8_C(122), UINT8_C(148), UINT8_C(186), UINT8_C(193), UINT8_C( 56), UINT8_C( 71), UINT8_C(242), UINT8_C( 49), UINT8_C(183), UINT8_C( 53), UINT8_C(227), UINT8_C( 3), UINT8_C(183), UINT8_C( 43), UINT8_C(234), UINT8_C( 85), UINT8_C(230), UINT8_C(247), UINT8_C( 66), UINT8_C( 1), UINT8_C(243), UINT8_C( 60), UINT8_C(137), UINT8_C(173), UINT8_C(134), UINT8_C(192), UINT8_C(173), UINT8_C(182), UINT8_C(119), UINT8_C(248), UINT8_C( 12), UINT8_C(113), UINT8_C(244), UINT8_C(172), UINT8_C( 6), UINT8_C( 69), UINT8_C( 2), UINT8_C(181), UINT8_C(222), UINT8_C(130), UINT8_C( 3), UINT8_C(128), UINT8_C(247), UINT8_C( 24), UINT8_C( 11), UINT8_C(162), UINT8_C(224), UINT8_C(110)), UINT64_C(0x7eb70d1d341a6e9a) }, { simde_x_mm512_set_epu8(UINT8_C( 2), UINT8_C(175), UINT8_C( 14), UINT8_C( 90), UINT8_C(156), UINT8_C(134), UINT8_C(221), UINT8_C( 81), UINT8_C(242), UINT8_C(214), UINT8_C(125), UINT8_C(131), UINT8_C(199), UINT8_C( 90), UINT8_C(247), UINT8_C( 63), UINT8_C( 53), UINT8_C( 77), UINT8_C( 63), UINT8_C(172), UINT8_C( 27), UINT8_C( 22), UINT8_C(228), UINT8_C(219), UINT8_C( 65), UINT8_C(118), UINT8_C(130), UINT8_C( 97), UINT8_C(109), UINT8_C(137), UINT8_C(142), UINT8_C(181), UINT8_C(131), UINT8_C(121), UINT8_C(128), UINT8_C(103), UINT8_C( 0), UINT8_C(101), UINT8_C(215), UINT8_C( 89), UINT8_C( 67), UINT8_C(191), UINT8_C( 9), UINT8_C(249), UINT8_C(193), UINT8_C( 13), UINT8_C(105), UINT8_C( 92), UINT8_C(238), UINT8_C(235), UINT8_C(154), UINT8_C(142), UINT8_C( 74), UINT8_C(121), UINT8_C(211), UINT8_C( 52), UINT8_C(193), UINT8_C(163), UINT8_C( 98), UINT8_C(106), UINT8_C(147), UINT8_C(209), UINT8_C( 37), UINT8_C( 70)), simde_x_mm512_set_epu8(UINT8_C(214), UINT8_C(132), UINT8_C( 54), UINT8_C( 74), UINT8_C(164), UINT8_C( 99), UINT8_C( 79), UINT8_C(253), UINT8_C( 61), UINT8_C(167), UINT8_C( 84), UINT8_C(162), UINT8_C( 31), UINT8_C(140), UINT8_C(189), UINT8_C(154), UINT8_C(184), UINT8_C(165), UINT8_C(151), UINT8_C(148), UINT8_C(212), UINT8_C( 74), UINT8_C(228), UINT8_C(124), UINT8_C(120), UINT8_C(215), UINT8_C(177), UINT8_C(122), UINT8_C( 87), UINT8_C(137), UINT8_C( 54), UINT8_C(254), UINT8_C(209), UINT8_C( 84), UINT8_C(130), UINT8_C(192), UINT8_C( 14), UINT8_C( 11), UINT8_C( 37), UINT8_C(233), UINT8_C( 67), UINT8_C(124), UINT8_C( 58), UINT8_C(162), UINT8_C( 30), UINT8_C(223), UINT8_C( 70), UINT8_C(232), UINT8_C( 38), UINT8_C(159), UINT8_C(200), UINT8_C(196), UINT8_C(197), UINT8_C( 65), UINT8_C(182), UINT8_C( 45), UINT8_C(245), UINT8_C( 55), UINT8_C(174), UINT8_C( 12), UINT8_C(106), UINT8_C( 22), UINT8_C(132), UINT8_C(252)), UINT64_C(0x56ea130e46dac75c) }, { simde_x_mm512_set_epu8(UINT8_C( 18), UINT8_C( 13), UINT8_C( 14), UINT8_C( 4), UINT8_C(253), UINT8_C(192), UINT8_C( 17), UINT8_C(141), UINT8_C( 21), UINT8_C(222), UINT8_C(125), UINT8_C(196), UINT8_C(184), UINT8_C( 74), UINT8_C(251), UINT8_C(235), UINT8_C(215), UINT8_C( 22), UINT8_C( 45), UINT8_C(102), UINT8_C( 59), UINT8_C(176), UINT8_C(241), UINT8_C(193), UINT8_C( 84), UINT8_C(185), UINT8_C( 8), UINT8_C( 12), UINT8_C(245), UINT8_C(180), UINT8_C( 62), UINT8_C( 93), UINT8_C(181), UINT8_C(179), UINT8_C(172), UINT8_C(148), UINT8_C(221), UINT8_C( 14), UINT8_C(196), UINT8_C( 18), UINT8_C( 23), UINT8_C(196), UINT8_C(193), UINT8_C(142), UINT8_C(201), UINT8_C( 75), UINT8_C(157), UINT8_C(201), UINT8_C( 58), UINT8_C( 76), UINT8_C(154), UINT8_C(138), UINT8_C( 10), UINT8_C( 39), UINT8_C(119), UINT8_C( 85), UINT8_C(248), UINT8_C(184), UINT8_C(196), UINT8_C(162), UINT8_C(144), UINT8_C(119), UINT8_C(124), UINT8_C( 76)), simde_x_mm512_set_epu8(UINT8_C( 18), UINT8_C(182), UINT8_C( 14), UINT8_C( 36), UINT8_C(249), UINT8_C(113), UINT8_C( 40), UINT8_C( 48), UINT8_C(149), UINT8_C(222), UINT8_C(181), UINT8_C( 85), UINT8_C(221), UINT8_C(140), UINT8_C( 65), UINT8_C(235), UINT8_C( 15), UINT8_C( 3), UINT8_C( 45), UINT8_C( 21), UINT8_C( 72), UINT8_C( 93), UINT8_C(108), UINT8_C(125), UINT8_C( 1), UINT8_C( 75), UINT8_C( 21), UINT8_C(220), UINT8_C(130), UINT8_C(122), UINT8_C( 71), UINT8_C( 76), UINT8_C( 28), UINT8_C(200), UINT8_C( 32), UINT8_C(101), UINT8_C(149), UINT8_C(145), UINT8_C(168), UINT8_C(237), UINT8_C(179), UINT8_C( 19), UINT8_C(235), UINT8_C(145), UINT8_C(188), UINT8_C( 82), UINT8_C(138), UINT8_C(180), UINT8_C( 47), UINT8_C(127), UINT8_C( 62), UINT8_C(240), UINT8_C( 10), UINT8_C(242), UINT8_C(156), UINT8_C( 86), UINT8_C( 29), UINT8_C(107), UINT8_C( 56), UINT8_C( 21), UINT8_C( 24), UINT8_C( 68), UINT8_C(160), UINT8_C( 64)), UINT64_C(0xad53f7cdba4ba8fd) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask64 r = simde_mm512_cmpge_epu8_mask(test_vec[i].a, test_vec[i].b); simde_assert_equal_mmask64(r, test_vec[i].r); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cmpge_epi8_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cmpge_epu8_mask) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/cmpgt.c000066400000000000000000002233511400333146700165400ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #define SIMDE_TEST_X86_AVX512_INSN cmpgt #include #include #include static int test_simde_mm512_cmpgt_epi8_mask(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__mmask64 r; } test_vec[8] = { { simde_mm512_set_epi8(INT8_C( 92), INT8_C(-121), INT8_C( 120), INT8_C( -19), INT8_C( -73), INT8_C( 22), INT8_C( -66), INT8_C( -29), INT8_C( 55), INT8_C( -63), INT8_C( -45), INT8_C(-119), INT8_C( 30), INT8_C( -29), INT8_C( -26), INT8_C( 89), INT8_C( -13), INT8_C( 119), INT8_C( -88), INT8_C( 109), INT8_C( 68), INT8_C( -10), INT8_C( 0), INT8_C( -18), INT8_C( 1), INT8_C( 116), INT8_C( -13), INT8_C( -73), INT8_C(-122), INT8_C( 0), INT8_C( 111), INT8_C( 95), INT8_C( -20), INT8_C( 3), INT8_C( 122), INT8_C( -43), INT8_C( 13), INT8_C( 25), INT8_C( -35), INT8_C(-127), INT8_C( 82), INT8_C( 90), INT8_C( 53), INT8_C( 123), INT8_C( 73), INT8_C( 108), INT8_C( -18), INT8_C( 15), INT8_C( 115), INT8_C( 54), INT8_C( 102), INT8_C( 118), INT8_C( 39), INT8_C( -77), INT8_C( 45), INT8_C( 81), INT8_C( -17), INT8_C( -28), INT8_C( 67), INT8_C( -85), INT8_C( 79), INT8_C(-113), INT8_C(-122), INT8_C( 124)), simde_mm512_set_epi8(INT8_C( 9), INT8_C(-121), INT8_C( 120), INT8_C( -36), INT8_C( 36), INT8_C(-103), INT8_C(-104), INT8_C( 25), INT8_C(-111), INT8_C( -63), INT8_C( -35), INT8_C(-120), INT8_C( 28), INT8_C( -44), INT8_C( -26), INT8_C( -86), INT8_C( -13), INT8_C( 53), INT8_C( -88), INT8_C(-107), INT8_C( 68), INT8_C( 42), INT8_C(-118), INT8_C( 111), INT8_C( 54), INT8_C( -58), INT8_C( -13), INT8_C( 27), INT8_C( 23), INT8_C( 41), INT8_C(-119), INT8_C( 44), INT8_C( 7), INT8_C(-120), INT8_C( 32), INT8_C( -43), INT8_C( 114), INT8_C( -72), INT8_C( 73), INT8_C( -96), INT8_C( 96), INT8_C( 110), INT8_C( -81), INT8_C( -76), INT8_C( 103), INT8_C(-100), INT8_C( -22), INT8_C( 18), INT8_C( 115), INT8_C( 54), INT8_C( -40), INT8_C( 125), INT8_C( 110), INT8_C( 31), INT8_C( 51), INT8_C(-104), INT8_C(-115), INT8_C( -27), INT8_C( 62), INT8_C( -85), INT8_C( 49), INT8_C(-115), INT8_C( 38), INT8_C( 4)), UINT64_C(0x969d5243643621ad) }, { simde_mm512_set_epi8(INT8_C( 16), INT8_C( 41), INT8_C( 102), INT8_C( -95), INT8_C( 17), INT8_C( 42), INT8_C( -86), INT8_C( 38), INT8_C( 62), INT8_C( -56), INT8_C( 60), INT8_C( 19), INT8_C(-100), INT8_C(-107), INT8_C( 105), INT8_C( -76), INT8_C( 10), INT8_C( -9), INT8_C( -12), INT8_C( -56), INT8_C( -71), INT8_C( 96), INT8_C( 31), INT8_C( 24), INT8_C( 68), INT8_C( -7), INT8_C( -27), INT8_C( 3), INT8_C( -21), INT8_C( 50), INT8_C( -67), INT8_C( -39), INT8_C(-108), INT8_C( 62), INT8_C( 101), INT8_C( 56), INT8_C( -79), INT8_C( -37), INT8_C( -99), INT8_C( -56), INT8_C(-119), INT8_C( -56), INT8_C( -8), INT8_C( -16), INT8_C( 115), INT8_C( -95), INT8_C( -73), INT8_C( 54), INT8_C( 125), INT8_C( 85), INT8_C( 78), INT8_C( -65), INT8_C( 4), INT8_C( 38), INT8_C( -84), INT8_C( -53), INT8_C( 68), INT8_C(-108), INT8_C(-121), INT8_C(-115), INT8_C( -59), INT8_C( -78), INT8_C(-111), INT8_C( 56)), simde_mm512_set_epi8(INT8_C(-104), INT8_C(-102), INT8_C( 102), INT8_C( -95), INT8_C( 69), INT8_C( 13), INT8_C( -21), INT8_C( 45), INT8_C( 62), INT8_C(-120), INT8_C(-111), INT8_C( 32), INT8_C(-107), INT8_C( -30), INT8_C( 99), INT8_C( -64), INT8_C( 8), INT8_C( -42), INT8_C( 81), INT8_C( -34), INT8_C( -46), INT8_C( 26), INT8_C( 31), INT8_C( -2), INT8_C( 68), INT8_C( -7), INT8_C( -71), INT8_C( 46), INT8_C( -21), INT8_C( -73), INT8_C( 21), INT8_C( 83), INT8_C(-108), INT8_C( -97), INT8_C( -69), INT8_C( 73), INT8_C( 57), INT8_C( -37), INT8_C( 21), INT8_C( 82), INT8_C(-119), INT8_C(-126), INT8_C( 126), INT8_C( 91), INT8_C( 115), INT8_C( 31), INT8_C( -79), INT8_C( 28), INT8_C(-106), INT8_C( -18), INT8_C( 65), INT8_C(-104), INT8_C( 81), INT8_C( 38), INT8_C( -84), INT8_C( -2), INT8_C( -14), INT8_C( 85), INT8_C( -80), INT8_C( 80), INT8_C( 48), INT8_C( 93), INT8_C( 79), INT8_C( 127)), UINT64_C(0xc46ac5246043f080) }, { simde_mm512_set_epi8(INT8_C( 50), INT8_C( 43), INT8_C( -68), INT8_C( 97), INT8_C( -26), INT8_C(-103), INT8_C( 71), INT8_C(-107), INT8_C( 91), INT8_C( 45), INT8_C( -11), INT8_C( 47), INT8_C( 29), INT8_C( -56), INT8_C( 26), INT8_C( -9), INT8_C( 10), INT8_C( 36), INT8_C(-116), INT8_C( -53), INT8_C( 41), INT8_C( 1), INT8_C( -23), INT8_C( 61), INT8_C(-127), INT8_C( -4), INT8_C( 48), INT8_C( -68), INT8_C( 89), INT8_C(-112), INT8_C( -31), INT8_C( 120), INT8_C( 35), INT8_C( 62), INT8_C( -21), INT8_C(-114), INT8_C(-104), INT8_C( 57), INT8_C( 42), INT8_C(-111), INT8_C( 94), INT8_C( -63), INT8_C( 87), INT8_C( 64), INT8_C( -65), INT8_C( -2), INT8_C( 110), INT8_C( -8), INT8_C( 63), INT8_C( -51), INT8_C( -4), INT8_C( 32), INT8_C( -65), INT8_C( 55), INT8_C( 14), INT8_C( 81), INT8_C(-123), INT8_C(-100), INT8_C( -39), INT8_C( -44), INT8_C( 22), INT8_C( 112), INT8_C( 16), INT8_C( 15)), simde_mm512_set_epi8(INT8_C( 50), INT8_C( -11), INT8_C( -68), INT8_C( -31), INT8_C( 105), INT8_C(-106), INT8_C( 98), INT8_C( 51), INT8_C( 58), INT8_C( 103), INT8_C( 111), INT8_C(-127), INT8_C( 68), INT8_C( -56), INT8_C( 124), INT8_C(-119), INT8_C( 74), INT8_C( -62), INT8_C(-116), INT8_C( 37), INT8_C( -12), INT8_C( 114), INT8_C( 0), INT8_C( 61), INT8_C( 103), INT8_C( -4), INT8_C(-105), INT8_C( -68), INT8_C( 39), INT8_C(-100), INT8_C( -93), INT8_C( 11), INT8_C( -80), INT8_C( -19), INT8_C( -22), INT8_C( -39), INT8_C( 127), INT8_C( -38), INT8_C(-125), INT8_C(-111), INT8_C( 84), INT8_C( -96), INT8_C( 87), INT8_C( -22), INT8_C( -5), INT8_C( -3), INT8_C(-127), INT8_C( 41), INT8_C( 74), INT8_C( 72), INT8_C( -4), INT8_C( 28), INT8_C(-115), INT8_C( 93), INT8_C( 102), INT8_C( 44), INT8_C(-103), INT8_C( -29), INT8_C( -50), INT8_C( 48), INT8_C( -96), INT8_C( -50), INT8_C( 46), INT8_C( -65)), UINT64_C(0x5491482be6d6192d) }, { simde_mm512_set_epi8(INT8_C( -97), INT8_C( 28), INT8_C( -58), INT8_C( 11), INT8_C( -14), INT8_C( 126), INT8_C( 81), INT8_C( 45), INT8_C( -23), INT8_C( 120), INT8_C( -83), INT8_C( -16), INT8_C( 7), INT8_C( 51), INT8_C( -57), INT8_C( -50), INT8_C( -21), INT8_C( 98), INT8_C( 88), INT8_C( 0), INT8_C( -66), INT8_C( 3), INT8_C( 124), INT8_C(-113), INT8_C( 50), INT8_C( 32), INT8_C( -85), INT8_C( -93), INT8_C( -44), INT8_C( -13), INT8_C( -94), INT8_C( 17), INT8_C( 122), INT8_C( 79), INT8_C(-116), INT8_C( 43), INT8_C( -77), INT8_C(-125), INT8_C( -23), INT8_C(-120), INT8_C( 96), INT8_C( -64), INT8_C( -23), INT8_C( -46), INT8_C( -29), INT8_C( -71), INT8_C( 71), INT8_C( -80), INT8_C( 44), INT8_C( -92), INT8_C( -31), INT8_C( 26), INT8_C( 8), INT8_C( 52), INT8_C( 117), INT8_C( 123), INT8_C( -63), INT8_C( 45), INT8_C( 95), INT8_C( 24), INT8_C(-108), INT8_C( 18), INT8_C( -60), INT8_C( 28)), simde_mm512_set_epi8(INT8_C( -23), INT8_C(-101), INT8_C( 116), INT8_C( 127), INT8_C( 96), INT8_C( 40), INT8_C( -97), INT8_C( 40), INT8_C( 86), INT8_C( -44), INT8_C( 70), INT8_C( -71), INT8_C( 62), INT8_C( -21), INT8_C( 66), INT8_C( 68), INT8_C( -87), INT8_C( -61), INT8_C( 48), INT8_C( -70), INT8_C( 18), INT8_C( -78), INT8_C( -98), INT8_C( 117), INT8_C( 74), INT8_C( 32), INT8_C( 93), INT8_C( 125), INT8_C( -47), INT8_C( -60), INT8_C( -86), INT8_C( 117), INT8_C( 122), INT8_C( -54), INT8_C( 50), INT8_C( 123), INT8_C( -31), INT8_C( -74), INT8_C( -64), INT8_C( 54), INT8_C( -81), INT8_C( 60), INT8_C( 31), INT8_C( -23), INT8_C( 108), INT8_C(-119), INT8_C( -92), INT8_C( -80), INT8_C( -30), INT8_C( -37), INT8_C( 51), INT8_C( -36), INT8_C( 8), INT8_C( 52), INT8_C( 97), INT8_C( 123), INT8_C( -49), INT8_C(-124), INT8_C( 95), INT8_C( -83), INT8_C( 70), INT8_C( -50), INT8_C( -61), INT8_C( 25)), UINT64_C(0x4754f60c42869257) }, { simde_mm512_set_epi8(INT8_C( 121), INT8_C( -20), INT8_C( -89), INT8_C( -94), INT8_C( 112), INT8_C( -27), INT8_C( 81), INT8_C( -54), INT8_C( -64), INT8_C(-114), INT8_C( 48), INT8_C( -89), INT8_C( -61), INT8_C( 26), INT8_C( 43), INT8_C( 29), INT8_C( 0), INT8_C( 125), INT8_C( -42), INT8_C( -67), INT8_C( 15), INT8_C( 120), INT8_C( 36), INT8_C( 40), INT8_C( -53), INT8_C( 34), INT8_C(-108), INT8_C( -58), INT8_C( 26), INT8_C(-111), INT8_C( 63), INT8_C( -98), INT8_C( 65), INT8_C( 8), INT8_C(-124), INT8_C( 96), INT8_C( -13), INT8_C( -98), INT8_C( 99), INT8_C( 90), INT8_C( 56), INT8_C( -45), INT8_C(-108), INT8_C( -19), INT8_C(-124), INT8_C( -27), INT8_C( 22), INT8_C( 126), INT8_C(-106), INT8_C( -68), INT8_C( -60), INT8_C( 8), INT8_C( -54), INT8_C( 93), INT8_C( -33), INT8_C( -27), INT8_C( -7), INT8_C( 27), INT8_C(-122), INT8_C( -88), INT8_C( 23), INT8_C( 6), INT8_C( 45), INT8_C( -21)), simde_mm512_set_epi8(INT8_C( 105), INT8_C( 52), INT8_C( 85), INT8_C(-104), INT8_C( 57), INT8_C( -31), INT8_C( -38), INT8_C(-124), INT8_C(-107), INT8_C( -2), INT8_C( 55), INT8_C( 46), INT8_C( -71), INT8_C( 77), INT8_C( 18), INT8_C( 70), INT8_C( 89), INT8_C( 125), INT8_C( -42), INT8_C(-125), INT8_C( 121), INT8_C( -11), INT8_C( -69), INT8_C( -59), INT8_C( -53), INT8_C( 34), INT8_C( 9), INT8_C( 64), INT8_C( -61), INT8_C( -25), INT8_C(-115), INT8_C( 100), INT8_C( 65), INT8_C( 8), INT8_C( 69), INT8_C( -8), INT8_C( -15), INT8_C( -51), INT8_C( 1), INT8_C( 90), INT8_C( 115), INT8_C( 51), INT8_C( -91), INT8_C( 56), INT8_C( 64), INT8_C( -39), INT8_C(-119), INT8_C( -28), INT8_C( -54), INT8_C( 28), INT8_C( 54), INT8_C( -8), INT8_C( -54), INT8_C(-128), INT8_C( -28), INT8_C( -71), INT8_C( 107), INT8_C( -66), INT8_C(-114), INT8_C( -88), INT8_C( 34), INT8_C( -83), INT8_C( -21), INT8_C( -64)), UINT64_C(0x9f8a170a1a071547) }, { simde_mm512_set_epi8(INT8_C( 33), INT8_C( 121), INT8_C( 125), INT8_C( 35), INT8_C(-103), INT8_C( -48), INT8_C( -22), INT8_C( 38), INT8_C( -81), INT8_C( 9), INT8_C( -11), INT8_C(-124), INT8_C( 71), INT8_C( 9), INT8_C( -42), INT8_C( 118), INT8_C( 67), INT8_C( 45), INT8_C( 51), INT8_C( -92), INT8_C( 126), INT8_C( 108), INT8_C(-123), INT8_C( -71), INT8_C( 113), INT8_C( 32), INT8_C( 71), INT8_C( 3), INT8_C( -26), INT8_C( 82), INT8_C( -81), INT8_C( -20), INT8_C( -55), INT8_C( 112), INT8_C( 66), INT8_C( 37), INT8_C( 67), INT8_C( -69), INT8_C( 64), INT8_C( 39), INT8_C( 72), INT8_C( 45), INT8_C( 120), INT8_C( -5), INT8_C(-109), INT8_C( 62), INT8_C( 17), INT8_C( 31), INT8_C( -30), INT8_C( -58), INT8_C( 56), INT8_C( 21), INT8_C( 72), INT8_C( -75), INT8_C( -34), INT8_C( 120), INT8_C( 95), INT8_C( 108), INT8_C( 32), INT8_C( 64), INT8_C(-128), INT8_C( 102), INT8_C( -21), INT8_C( 28)), simde_mm512_set_epi8(INT8_C( 100), INT8_C( 121), INT8_C( 18), INT8_C( 28), INT8_C(-117), INT8_C( 107), INT8_C( 3), INT8_C( -62), INT8_C( 42), INT8_C( 72), INT8_C( 91), INT8_C( 86), INT8_C( -72), INT8_C( 9), INT8_C( -80), INT8_C( 118), INT8_C( 122), INT8_C(-108), INT8_C( -70), INT8_C( -63), INT8_C( 56), INT8_C( 71), INT8_C( -14), INT8_C( 49), INT8_C( -73), INT8_C( 53), INT8_C( -29), INT8_C( 3), INT8_C( -73), INT8_C( 43), INT8_C( -22), INT8_C( 85), INT8_C( -26), INT8_C( -9), INT8_C( 66), INT8_C( 1), INT8_C( -13), INT8_C( 60), INT8_C(-119), INT8_C( -83), INT8_C(-122), INT8_C( -64), INT8_C( -83), INT8_C( -74), INT8_C( 119), INT8_C( -8), INT8_C( 12), INT8_C( 113), INT8_C( -12), INT8_C( -84), INT8_C( 6), INT8_C( 69), INT8_C( 2), INT8_C( -75), INT8_C( -34), INT8_C(-126), INT8_C( 3), INT8_C(-128), INT8_C( -9), INT8_C( 24), INT8_C( 11), INT8_C( -94), INT8_C( -32), INT8_C( 110)), UINT64_C(0x390a6cac5bf669f6) }, { simde_mm512_set_epi8(INT8_C( 2), INT8_C( -81), INT8_C( 14), INT8_C( 90), INT8_C(-100), INT8_C(-122), INT8_C( -35), INT8_C( 81), INT8_C( -14), INT8_C( -42), INT8_C( 125), INT8_C(-125), INT8_C( -57), INT8_C( 90), INT8_C( -9), INT8_C( 63), INT8_C( 53), INT8_C( 77), INT8_C( 63), INT8_C( -84), INT8_C( 27), INT8_C( 22), INT8_C( -28), INT8_C( -37), INT8_C( 65), INT8_C( 118), INT8_C(-126), INT8_C( 97), INT8_C( 109), INT8_C(-119), INT8_C(-114), INT8_C( -75), INT8_C(-125), INT8_C( 121), INT8_C(-128), INT8_C( 103), INT8_C( 0), INT8_C( 101), INT8_C( -41), INT8_C( 89), INT8_C( 67), INT8_C( -65), INT8_C( 9), INT8_C( -7), INT8_C( -63), INT8_C( 13), INT8_C( 105), INT8_C( 92), INT8_C( -18), INT8_C( -21), INT8_C(-102), INT8_C(-114), INT8_C( 74), INT8_C( 121), INT8_C( -45), INT8_C( 52), INT8_C( -63), INT8_C( -93), INT8_C( 98), INT8_C( 106), INT8_C(-109), INT8_C( -47), INT8_C( 37), INT8_C( 70)), simde_mm512_set_epi8(INT8_C( -42), INT8_C(-124), INT8_C( 54), INT8_C( 74), INT8_C( -92), INT8_C( 99), INT8_C( 79), INT8_C( -3), INT8_C( 61), INT8_C( -89), INT8_C( 84), INT8_C( -94), INT8_C( 31), INT8_C(-116), INT8_C( -67), INT8_C(-102), INT8_C( -72), INT8_C( -91), INT8_C(-105), INT8_C(-108), INT8_C( -44), INT8_C( 74), INT8_C( -28), INT8_C( 124), INT8_C( 120), INT8_C( -41), INT8_C( -79), INT8_C( 122), INT8_C( 87), INT8_C(-119), INT8_C( 54), INT8_C( -2), INT8_C( -47), INT8_C( 84), INT8_C(-126), INT8_C( -64), INT8_C( 14), INT8_C( 11), INT8_C( 37), INT8_C( -23), INT8_C( 67), INT8_C( 124), INT8_C( 58), INT8_C( -94), INT8_C( 30), INT8_C( -33), INT8_C( 70), INT8_C( -24), INT8_C( 38), INT8_C( -97), INT8_C( -56), INT8_C( -60), INT8_C( -59), INT8_C( 65), INT8_C( -74), INT8_C( 45), INT8_C( -11), INT8_C( 55), INT8_C( -82), INT8_C( 12), INT8_C( 106), INT8_C( 22), INT8_C(-124), INT8_C( -4)), UINT64_C(0xd167f84855174f33) }, { simde_mm512_set_epi8(INT8_C( 18), INT8_C( 13), INT8_C( 14), INT8_C( 4), INT8_C( -3), INT8_C( -64), INT8_C( 17), INT8_C(-115), INT8_C( 21), INT8_C( -34), INT8_C( 125), INT8_C( -60), INT8_C( -72), INT8_C( 74), INT8_C( -5), INT8_C( -21), INT8_C( -41), INT8_C( 22), INT8_C( 45), INT8_C( 102), INT8_C( 59), INT8_C( -80), INT8_C( -15), INT8_C( -63), INT8_C( 84), INT8_C( -71), INT8_C( 8), INT8_C( 12), INT8_C( -11), INT8_C( -76), INT8_C( 62), INT8_C( 93), INT8_C( -75), INT8_C( -77), INT8_C( -84), INT8_C(-108), INT8_C( -35), INT8_C( 14), INT8_C( -60), INT8_C( 18), INT8_C( 23), INT8_C( -60), INT8_C( -63), INT8_C(-114), INT8_C( -55), INT8_C( 75), INT8_C( -99), INT8_C( -55), INT8_C( 58), INT8_C( 76), INT8_C(-102), INT8_C(-118), INT8_C( 10), INT8_C( 39), INT8_C( 119), INT8_C( 85), INT8_C( -8), INT8_C( -72), INT8_C( -60), INT8_C( -94), INT8_C(-112), INT8_C( 119), INT8_C( 124), INT8_C( 76)), simde_mm512_set_epi8(INT8_C( 18), INT8_C( -74), INT8_C( 14), INT8_C( 36), INT8_C( -7), INT8_C( 113), INT8_C( 40), INT8_C( 48), INT8_C(-107), INT8_C( -34), INT8_C( -75), INT8_C( 85), INT8_C( -35), INT8_C(-116), INT8_C( 65), INT8_C( -21), INT8_C( 15), INT8_C( 3), INT8_C( 45), INT8_C( 21), INT8_C( 72), INT8_C( 93), INT8_C( 108), INT8_C( 125), INT8_C( 1), INT8_C( 75), INT8_C( 21), INT8_C( -36), INT8_C(-126), INT8_C( 122), INT8_C( 71), INT8_C( 76), INT8_C( 28), INT8_C( -56), INT8_C( 32), INT8_C( 101), INT8_C(-107), INT8_C(-111), INT8_C( -88), INT8_C( -19), INT8_C( -77), INT8_C( 19), INT8_C( -21), INT8_C(-111), INT8_C( -68), INT8_C( 82), INT8_C(-118), INT8_C( -76), INT8_C( 47), INT8_C( 127), INT8_C( 62), INT8_C( -16), INT8_C( 10), INT8_C( -14), INT8_C(-100), INT8_C( 86), INT8_C( 29), INT8_C( 107), INT8_C( 56), INT8_C( 21), INT8_C( 24), INT8_C( 68), INT8_C( -96), INT8_C( 64)), UINT64_C(0x48a450990f8b8607) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask64 r = simde_mm512_cmpgt_epi8_mask(test_vec[i].a, test_vec[i].b); simde_assert_equal_mmask64(r, test_vec[i].r); } return 0; } static int test_simde_mm512_cmpgt_epu8_mask(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__mmask64 r; } test_vec[8] = { { simde_x_mm512_set_epu8(UINT8_C( 92), UINT8_C(135), UINT8_C(120), UINT8_C(237), UINT8_C(183), UINT8_C( 22), UINT8_C(190), UINT8_C(227), UINT8_C( 55), UINT8_C(193), UINT8_C(211), UINT8_C(137), UINT8_C( 30), UINT8_C(227), UINT8_C(230), UINT8_C( 89), UINT8_C(243), UINT8_C(119), UINT8_C(168), UINT8_C(109), UINT8_C( 68), UINT8_C(246), UINT8_C( 0), UINT8_C(238), UINT8_C( 1), UINT8_C(116), UINT8_C(243), UINT8_C(183), UINT8_C(134), UINT8_C( 0), UINT8_C(111), UINT8_C( 95), UINT8_C(236), UINT8_C( 3), UINT8_C(122), UINT8_C(213), UINT8_C( 13), UINT8_C( 25), UINT8_C(221), UINT8_C(129), UINT8_C( 82), UINT8_C( 90), UINT8_C( 53), UINT8_C(123), UINT8_C( 73), UINT8_C(108), UINT8_C(238), UINT8_C( 15), UINT8_C(115), UINT8_C( 54), UINT8_C(102), UINT8_C(118), UINT8_C( 39), UINT8_C(179), UINT8_C( 45), UINT8_C( 81), UINT8_C(239), UINT8_C(228), UINT8_C( 67), UINT8_C(171), UINT8_C( 79), UINT8_C(143), UINT8_C(134), UINT8_C(124)), simde_x_mm512_set_epu8(UINT8_C( 9), UINT8_C(135), UINT8_C(120), UINT8_C(220), UINT8_C( 36), UINT8_C(153), UINT8_C(152), UINT8_C( 25), UINT8_C(145), UINT8_C(193), UINT8_C(221), UINT8_C(136), UINT8_C( 28), UINT8_C(212), UINT8_C(230), UINT8_C(170), UINT8_C(243), UINT8_C( 53), UINT8_C(168), UINT8_C(149), UINT8_C( 68), UINT8_C( 42), UINT8_C(138), UINT8_C(111), UINT8_C( 54), UINT8_C(198), UINT8_C(243), UINT8_C( 27), UINT8_C( 23), UINT8_C( 41), UINT8_C(137), UINT8_C( 44), UINT8_C( 7), UINT8_C(136), UINT8_C( 32), UINT8_C(213), UINT8_C(114), UINT8_C(184), UINT8_C( 73), UINT8_C(160), UINT8_C( 96), UINT8_C(110), UINT8_C(175), UINT8_C(180), UINT8_C(103), UINT8_C(156), UINT8_C(234), UINT8_C( 18), UINT8_C(115), UINT8_C( 54), UINT8_C(216), UINT8_C(125), UINT8_C(110), UINT8_C( 31), UINT8_C( 51), UINT8_C(152), UINT8_C(141), UINT8_C(229), UINT8_C( 62), UINT8_C(171), UINT8_C( 49), UINT8_C(141), UINT8_C( 38), UINT8_C( 4)), UINT64_C(0x9b1c4519a20204af) }, { simde_x_mm512_set_epu8(UINT8_C( 16), UINT8_C( 41), UINT8_C(102), UINT8_C(161), UINT8_C( 17), UINT8_C( 42), UINT8_C(170), UINT8_C( 38), UINT8_C( 62), UINT8_C(200), UINT8_C( 60), UINT8_C( 19), UINT8_C(156), UINT8_C(149), UINT8_C(105), UINT8_C(180), UINT8_C( 10), UINT8_C(247), UINT8_C(244), UINT8_C(200), UINT8_C(185), UINT8_C( 96), UINT8_C( 31), UINT8_C( 24), UINT8_C( 68), UINT8_C(249), UINT8_C(229), UINT8_C( 3), UINT8_C(235), UINT8_C( 50), UINT8_C(189), UINT8_C(217), UINT8_C(148), UINT8_C( 62), UINT8_C(101), UINT8_C( 56), UINT8_C(177), UINT8_C(219), UINT8_C(157), UINT8_C(200), UINT8_C(137), UINT8_C(200), UINT8_C(248), UINT8_C(240), UINT8_C(115), UINT8_C(161), UINT8_C(183), UINT8_C( 54), UINT8_C(125), UINT8_C( 85), UINT8_C( 78), UINT8_C(191), UINT8_C( 4), UINT8_C( 38), UINT8_C(172), UINT8_C(203), UINT8_C( 68), UINT8_C(148), UINT8_C(135), UINT8_C(141), UINT8_C(197), UINT8_C(178), UINT8_C(145), UINT8_C( 56)), simde_x_mm512_set_epu8(UINT8_C(152), UINT8_C(154), UINT8_C(102), UINT8_C(161), UINT8_C( 69), UINT8_C( 13), UINT8_C(235), UINT8_C( 45), UINT8_C( 62), UINT8_C(136), UINT8_C(145), UINT8_C( 32), UINT8_C(149), UINT8_C(226), UINT8_C( 99), UINT8_C(192), UINT8_C( 8), UINT8_C(214), UINT8_C( 81), UINT8_C(222), UINT8_C(210), UINT8_C( 26), UINT8_C( 31), UINT8_C(254), UINT8_C( 68), UINT8_C(249), UINT8_C(185), UINT8_C( 46), UINT8_C(235), UINT8_C(183), UINT8_C( 21), UINT8_C( 83), UINT8_C(148), UINT8_C(159), UINT8_C(187), UINT8_C( 73), UINT8_C( 57), UINT8_C(219), UINT8_C( 21), UINT8_C( 82), UINT8_C(137), UINT8_C(130), UINT8_C(126), UINT8_C( 91), UINT8_C(115), UINT8_C( 31), UINT8_C(177), UINT8_C( 28), UINT8_C(150), UINT8_C(238), UINT8_C( 65), UINT8_C(152), UINT8_C( 81), UINT8_C( 38), UINT8_C(172), UINT8_C(254), UINT8_C(242), UINT8_C( 85), UINT8_C(176), UINT8_C( 80), UINT8_C( 48), UINT8_C( 93), UINT8_C( 79), UINT8_C(127)), UINT64_C(0x44ae4230b77305e) }, { simde_x_mm512_set_epu8(UINT8_C( 50), UINT8_C( 43), UINT8_C(188), UINT8_C( 97), UINT8_C(230), UINT8_C(153), UINT8_C( 71), UINT8_C(149), UINT8_C( 91), UINT8_C( 45), UINT8_C(245), UINT8_C( 47), UINT8_C( 29), UINT8_C(200), UINT8_C( 26), UINT8_C(247), UINT8_C( 10), UINT8_C( 36), UINT8_C(140), UINT8_C(203), UINT8_C( 41), UINT8_C( 1), UINT8_C(233), UINT8_C( 61), UINT8_C(129), UINT8_C(252), UINT8_C( 48), UINT8_C(188), UINT8_C( 89), UINT8_C(144), UINT8_C(225), UINT8_C(120), UINT8_C( 35), UINT8_C( 62), UINT8_C(235), UINT8_C(142), UINT8_C(152), UINT8_C( 57), UINT8_C( 42), UINT8_C(145), UINT8_C( 94), UINT8_C(193), UINT8_C( 87), UINT8_C( 64), UINT8_C(191), UINT8_C(254), UINT8_C(110), UINT8_C(248), UINT8_C( 63), UINT8_C(205), UINT8_C(252), UINT8_C( 32), UINT8_C(191), UINT8_C( 55), UINT8_C( 14), UINT8_C( 81), UINT8_C(133), UINT8_C(156), UINT8_C(217), UINT8_C(212), UINT8_C( 22), UINT8_C(112), UINT8_C( 16), UINT8_C( 15)), simde_x_mm512_set_epu8(UINT8_C( 50), UINT8_C(245), UINT8_C(188), UINT8_C(225), UINT8_C(105), UINT8_C(150), UINT8_C( 98), UINT8_C( 51), UINT8_C( 58), UINT8_C(103), UINT8_C(111), UINT8_C(129), UINT8_C( 68), UINT8_C(200), UINT8_C(124), UINT8_C(137), UINT8_C( 74), UINT8_C(194), UINT8_C(140), UINT8_C( 37), UINT8_C(244), UINT8_C(114), UINT8_C( 0), UINT8_C( 61), UINT8_C(103), UINT8_C(252), UINT8_C(151), UINT8_C(188), UINT8_C( 39), UINT8_C(156), UINT8_C(163), UINT8_C( 11), UINT8_C(176), UINT8_C(237), UINT8_C(234), UINT8_C(217), UINT8_C(127), UINT8_C(218), UINT8_C(131), UINT8_C(145), UINT8_C( 84), UINT8_C(160), UINT8_C( 87), UINT8_C(234), UINT8_C(251), UINT8_C(253), UINT8_C(129), UINT8_C( 41), UINT8_C( 74), UINT8_C( 72), UINT8_C(252), UINT8_C( 28), UINT8_C(141), UINT8_C( 93), UINT8_C(102), UINT8_C( 44), UINT8_C(153), UINT8_C(227), UINT8_C(206), UINT8_C( 48), UINT8_C(160), UINT8_C(206), UINT8_C( 46), UINT8_C(191)), UINT64_C(0xda1128b28c55930) }, { simde_x_mm512_set_epu8(UINT8_C(159), UINT8_C( 28), UINT8_C(198), UINT8_C( 11), UINT8_C(242), UINT8_C(126), UINT8_C( 81), UINT8_C( 45), UINT8_C(233), UINT8_C(120), UINT8_C(173), UINT8_C(240), UINT8_C( 7), UINT8_C( 51), UINT8_C(199), UINT8_C(206), UINT8_C(235), UINT8_C( 98), UINT8_C( 88), UINT8_C( 0), UINT8_C(190), UINT8_C( 3), UINT8_C(124), UINT8_C(143), UINT8_C( 50), UINT8_C( 32), UINT8_C(171), UINT8_C(163), UINT8_C(212), UINT8_C(243), UINT8_C(162), UINT8_C( 17), UINT8_C(122), UINT8_C( 79), UINT8_C(140), UINT8_C( 43), UINT8_C(179), UINT8_C(131), UINT8_C(233), UINT8_C(136), UINT8_C( 96), UINT8_C(192), UINT8_C(233), UINT8_C(210), UINT8_C(227), UINT8_C(185), UINT8_C( 71), UINT8_C(176), UINT8_C( 44), UINT8_C(164), UINT8_C(225), UINT8_C( 26), UINT8_C( 8), UINT8_C( 52), UINT8_C(117), UINT8_C(123), UINT8_C(193), UINT8_C( 45), UINT8_C( 95), UINT8_C( 24), UINT8_C(148), UINT8_C( 18), UINT8_C(196), UINT8_C( 28)), simde_x_mm512_set_epu8(UINT8_C(233), UINT8_C(155), UINT8_C(116), UINT8_C(127), UINT8_C( 96), UINT8_C( 40), UINT8_C(159), UINT8_C( 40), UINT8_C( 86), UINT8_C(212), UINT8_C( 70), UINT8_C(185), UINT8_C( 62), UINT8_C(235), UINT8_C( 66), UINT8_C( 68), UINT8_C(169), UINT8_C(195), UINT8_C( 48), UINT8_C(186), UINT8_C( 18), UINT8_C(178), UINT8_C(158), UINT8_C(117), UINT8_C( 74), UINT8_C( 32), UINT8_C( 93), UINT8_C(125), UINT8_C(209), UINT8_C(196), UINT8_C(170), UINT8_C(117), UINT8_C(122), UINT8_C(202), UINT8_C( 50), UINT8_C(123), UINT8_C(225), UINT8_C(182), UINT8_C(192), UINT8_C( 54), UINT8_C(175), UINT8_C( 60), UINT8_C( 31), UINT8_C(233), UINT8_C(108), UINT8_C(137), UINT8_C(164), UINT8_C(176), UINT8_C(226), UINT8_C(219), UINT8_C( 51), UINT8_C(220), UINT8_C( 8), UINT8_C( 52), UINT8_C( 97), UINT8_C(123), UINT8_C(207), UINT8_C(132), UINT8_C( 95), UINT8_C(173), UINT8_C( 70), UINT8_C(206), UINT8_C(195), UINT8_C( 25)), UINT64_C(0x2db3a93c236c220b) }, { simde_x_mm512_set_epu8(UINT8_C(121), UINT8_C(236), UINT8_C(167), UINT8_C(162), UINT8_C(112), UINT8_C(229), UINT8_C( 81), UINT8_C(202), UINT8_C(192), UINT8_C(142), UINT8_C( 48), UINT8_C(167), UINT8_C(195), UINT8_C( 26), UINT8_C( 43), UINT8_C( 29), UINT8_C( 0), UINT8_C(125), UINT8_C(214), UINT8_C(189), UINT8_C( 15), UINT8_C(120), UINT8_C( 36), UINT8_C( 40), UINT8_C(203), UINT8_C( 34), UINT8_C(148), UINT8_C(198), UINT8_C( 26), UINT8_C(145), UINT8_C( 63), UINT8_C(158), UINT8_C( 65), UINT8_C( 8), UINT8_C(132), UINT8_C( 96), UINT8_C(243), UINT8_C(158), UINT8_C( 99), UINT8_C( 90), UINT8_C( 56), UINT8_C(211), UINT8_C(148), UINT8_C(237), UINT8_C(132), UINT8_C(229), UINT8_C( 22), UINT8_C(126), UINT8_C(150), UINT8_C(188), UINT8_C(196), UINT8_C( 8), UINT8_C(202), UINT8_C( 93), UINT8_C(223), UINT8_C(229), UINT8_C(249), UINT8_C( 27), UINT8_C(134), UINT8_C(168), UINT8_C( 23), UINT8_C( 6), UINT8_C( 45), UINT8_C(235)), simde_x_mm512_set_epu8(UINT8_C(105), UINT8_C( 52), UINT8_C( 85), UINT8_C(152), UINT8_C( 57), UINT8_C(225), UINT8_C(218), UINT8_C(132), UINT8_C(149), UINT8_C(254), UINT8_C( 55), UINT8_C( 46), UINT8_C(185), UINT8_C( 77), UINT8_C( 18), UINT8_C( 70), UINT8_C( 89), UINT8_C(125), UINT8_C(214), UINT8_C(131), UINT8_C(121), UINT8_C(245), UINT8_C(187), UINT8_C(197), UINT8_C(203), UINT8_C( 34), UINT8_C( 9), UINT8_C( 64), UINT8_C(195), UINT8_C(231), UINT8_C(141), UINT8_C(100), UINT8_C( 65), UINT8_C( 8), UINT8_C( 69), UINT8_C(248), UINT8_C(241), UINT8_C(205), UINT8_C( 1), UINT8_C( 90), UINT8_C(115), UINT8_C( 51), UINT8_C(165), UINT8_C( 56), UINT8_C( 64), UINT8_C(217), UINT8_C(137), UINT8_C(228), UINT8_C(202), UINT8_C( 28), UINT8_C( 54), UINT8_C(248), UINT8_C(202), UINT8_C(128), UINT8_C(228), UINT8_C(185), UINT8_C(107), UINT8_C(190), UINT8_C(142), UINT8_C(168), UINT8_C( 34), UINT8_C(173), UINT8_C(235), UINT8_C(192)), UINT64_C(0xfd9a10312a5c6181) }, { simde_x_mm512_set_epu8(UINT8_C( 33), UINT8_C(121), UINT8_C(125), UINT8_C( 35), UINT8_C(153), UINT8_C(208), UINT8_C(234), UINT8_C( 38), UINT8_C(175), UINT8_C( 9), UINT8_C(245), UINT8_C(132), UINT8_C( 71), UINT8_C( 9), UINT8_C(214), UINT8_C(118), UINT8_C( 67), UINT8_C( 45), UINT8_C( 51), UINT8_C(164), UINT8_C(126), UINT8_C(108), UINT8_C(133), UINT8_C(185), UINT8_C(113), UINT8_C( 32), UINT8_C( 71), UINT8_C( 3), UINT8_C(230), UINT8_C( 82), UINT8_C(175), UINT8_C(236), UINT8_C(201), UINT8_C(112), UINT8_C( 66), UINT8_C( 37), UINT8_C( 67), UINT8_C(187), UINT8_C( 64), UINT8_C( 39), UINT8_C( 72), UINT8_C( 45), UINT8_C(120), UINT8_C(251), UINT8_C(147), UINT8_C( 62), UINT8_C( 17), UINT8_C( 31), UINT8_C(226), UINT8_C(198), UINT8_C( 56), UINT8_C( 21), UINT8_C( 72), UINT8_C(181), UINT8_C(222), UINT8_C(120), UINT8_C( 95), UINT8_C(108), UINT8_C( 32), UINT8_C( 64), UINT8_C(128), UINT8_C(102), UINT8_C(235), UINT8_C( 28)), simde_x_mm512_set_epu8(UINT8_C(100), UINT8_C(121), UINT8_C( 18), UINT8_C( 28), UINT8_C(139), UINT8_C(107), UINT8_C( 3), UINT8_C(194), UINT8_C( 42), UINT8_C( 72), UINT8_C( 91), UINT8_C( 86), UINT8_C(184), UINT8_C( 9), UINT8_C(176), UINT8_C(118), UINT8_C(122), UINT8_C(148), UINT8_C(186), UINT8_C(193), UINT8_C( 56), UINT8_C( 71), UINT8_C(242), UINT8_C( 49), UINT8_C(183), UINT8_C( 53), UINT8_C(227), UINT8_C( 3), UINT8_C(183), UINT8_C( 43), UINT8_C(234), UINT8_C( 85), UINT8_C(230), UINT8_C(247), UINT8_C( 66), UINT8_C( 1), UINT8_C(243), UINT8_C( 60), UINT8_C(137), UINT8_C(173), UINT8_C(134), UINT8_C(192), UINT8_C(173), UINT8_C(182), UINT8_C(119), UINT8_C(248), UINT8_C( 12), UINT8_C(113), UINT8_C(244), UINT8_C(172), UINT8_C( 6), UINT8_C( 69), UINT8_C( 2), UINT8_C(181), UINT8_C(222), UINT8_C(130), UINT8_C( 3), UINT8_C(128), UINT8_C(247), UINT8_C( 24), UINT8_C( 11), UINT8_C(162), UINT8_C(224), UINT8_C(110)), UINT64_C(0x3eb20d0d141a689a) }, { simde_x_mm512_set_epu8(UINT8_C( 2), UINT8_C(175), UINT8_C( 14), UINT8_C( 90), UINT8_C(156), UINT8_C(134), UINT8_C(221), UINT8_C( 81), UINT8_C(242), UINT8_C(214), UINT8_C(125), UINT8_C(131), UINT8_C(199), UINT8_C( 90), UINT8_C(247), UINT8_C( 63), UINT8_C( 53), UINT8_C( 77), UINT8_C( 63), UINT8_C(172), UINT8_C( 27), UINT8_C( 22), UINT8_C(228), UINT8_C(219), UINT8_C( 65), UINT8_C(118), UINT8_C(130), UINT8_C( 97), UINT8_C(109), UINT8_C(137), UINT8_C(142), UINT8_C(181), UINT8_C(131), UINT8_C(121), UINT8_C(128), UINT8_C(103), UINT8_C( 0), UINT8_C(101), UINT8_C(215), UINT8_C( 89), UINT8_C( 67), UINT8_C(191), UINT8_C( 9), UINT8_C(249), UINT8_C(193), UINT8_C( 13), UINT8_C(105), UINT8_C( 92), UINT8_C(238), UINT8_C(235), UINT8_C(154), UINT8_C(142), UINT8_C( 74), UINT8_C(121), UINT8_C(211), UINT8_C( 52), UINT8_C(193), UINT8_C(163), UINT8_C( 98), UINT8_C(106), UINT8_C(147), UINT8_C(209), UINT8_C( 37), UINT8_C( 70)), simde_x_mm512_set_epu8(UINT8_C(214), UINT8_C(132), UINT8_C( 54), UINT8_C( 74), UINT8_C(164), UINT8_C( 99), UINT8_C( 79), UINT8_C(253), UINT8_C( 61), UINT8_C(167), UINT8_C( 84), UINT8_C(162), UINT8_C( 31), UINT8_C(140), UINT8_C(189), UINT8_C(154), UINT8_C(184), UINT8_C(165), UINT8_C(151), UINT8_C(148), UINT8_C(212), UINT8_C( 74), UINT8_C(228), UINT8_C(124), UINT8_C(120), UINT8_C(215), UINT8_C(177), UINT8_C(122), UINT8_C( 87), UINT8_C(137), UINT8_C( 54), UINT8_C(254), UINT8_C(209), UINT8_C( 84), UINT8_C(130), UINT8_C(192), UINT8_C( 14), UINT8_C( 11), UINT8_C( 37), UINT8_C(233), UINT8_C( 67), UINT8_C(124), UINT8_C( 58), UINT8_C(162), UINT8_C( 30), UINT8_C(223), UINT8_C( 70), UINT8_C(232), UINT8_C( 38), UINT8_C(159), UINT8_C(200), UINT8_C(196), UINT8_C(197), UINT8_C( 65), UINT8_C(182), UINT8_C( 45), UINT8_C(245), UINT8_C( 55), UINT8_C(174), UINT8_C( 12), UINT8_C(106), UINT8_C( 22), UINT8_C(132), UINT8_C(252)), UINT64_C(0x56ea110a465ac75c) }, { simde_x_mm512_set_epu8(UINT8_C( 18), UINT8_C( 13), UINT8_C( 14), UINT8_C( 4), UINT8_C(253), UINT8_C(192), UINT8_C( 17), UINT8_C(141), UINT8_C( 21), UINT8_C(222), UINT8_C(125), UINT8_C(196), UINT8_C(184), UINT8_C( 74), UINT8_C(251), UINT8_C(235), UINT8_C(215), UINT8_C( 22), UINT8_C( 45), UINT8_C(102), UINT8_C( 59), UINT8_C(176), UINT8_C(241), UINT8_C(193), UINT8_C( 84), UINT8_C(185), UINT8_C( 8), UINT8_C( 12), UINT8_C(245), UINT8_C(180), UINT8_C( 62), UINT8_C( 93), UINT8_C(181), UINT8_C(179), UINT8_C(172), UINT8_C(148), UINT8_C(221), UINT8_C( 14), UINT8_C(196), UINT8_C( 18), UINT8_C( 23), UINT8_C(196), UINT8_C(193), UINT8_C(142), UINT8_C(201), UINT8_C( 75), UINT8_C(157), UINT8_C(201), UINT8_C( 58), UINT8_C( 76), UINT8_C(154), UINT8_C(138), UINT8_C( 10), UINT8_C( 39), UINT8_C(119), UINT8_C( 85), UINT8_C(248), UINT8_C(184), UINT8_C(196), UINT8_C(162), UINT8_C(144), UINT8_C(119), UINT8_C(124), UINT8_C( 76)), simde_x_mm512_set_epu8(UINT8_C( 18), UINT8_C(182), UINT8_C( 14), UINT8_C( 36), UINT8_C(249), UINT8_C(113), UINT8_C( 40), UINT8_C( 48), UINT8_C(149), UINT8_C(222), UINT8_C(181), UINT8_C( 85), UINT8_C(221), UINT8_C(140), UINT8_C( 65), UINT8_C(235), UINT8_C( 15), UINT8_C( 3), UINT8_C( 45), UINT8_C( 21), UINT8_C( 72), UINT8_C( 93), UINT8_C(108), UINT8_C(125), UINT8_C( 1), UINT8_C( 75), UINT8_C( 21), UINT8_C(220), UINT8_C(130), UINT8_C(122), UINT8_C( 71), UINT8_C( 76), UINT8_C( 28), UINT8_C(200), UINT8_C( 32), UINT8_C(101), UINT8_C(149), UINT8_C(145), UINT8_C(168), UINT8_C(237), UINT8_C(179), UINT8_C( 19), UINT8_C(235), UINT8_C(145), UINT8_C(188), UINT8_C( 82), UINT8_C(138), UINT8_C(180), UINT8_C( 47), UINT8_C(127), UINT8_C( 62), UINT8_C(240), UINT8_C( 10), UINT8_C(242), UINT8_C(156), UINT8_C( 86), UINT8_C( 29), UINT8_C(107), UINT8_C( 56), UINT8_C( 21), UINT8_C( 24), UINT8_C( 68), UINT8_C(160), UINT8_C( 64)), UINT64_C(0xd12d7cdba4ba0fd) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask64 r = simde_mm512_cmpgt_epu8_mask(test_vec[i].a, test_vec[i].b); simde_assert_equal_mmask64(r, test_vec[i].r); } return 0; } static int test_simde_mm512_cmpgt_epi32_mask(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__mmask16 r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C( -126651070), INT32_C( 1757388710), INT32_C( 617530196), INT32_C( 407807901), INT32_C( 1271989524), INT32_C( 1251214807), INT32_C(-1247045111), INT32_C(-1024057759), INT32_C( 50729453), INT32_C( 464444874), INT32_C( 1840702207), INT32_C( 1916050591), INT32_C( 484601458), INT32_C( -782065931), INT32_C(-1485735658), INT32_C(-1326388993)), simde_mm512_set_epi32(INT32_C( 111072774), INT32_C( 1757388710), INT32_C( 1496897687), INT32_C( 407807901), INT32_C( 1271989524), INT32_C( 1496985365), INT32_C(-1247045111), INT32_C(-1225014979), INT32_C( 50729453), INT32_C( 464444874), INT32_C( 924537351), INT32_C( 1916050591), INT32_C( 484601458), INT32_C( -782065931), INT32_C(-1485735658), INT32_C(-1326388993)), UINT16_C( 288) }, { simde_mm512_set_epi32(INT32_C( 2106044062), INT32_C( 1752498924), INT32_C(-1086695378), INT32_C( 627787891), INT32_C(-1783053554), INT32_C(-1485517848), INT32_C( 1105114322), INT32_C(-1862707588), INT32_C( 823946037), INT32_C(-2030244995), INT32_C( -219180660), INT32_C( 810910725), INT32_C( -642105946), INT32_C( 760774613), INT32_C( -62603432), INT32_C(-2064446807)), simde_mm512_set_epi32(INT32_C( 2106044062), INT32_C( 1752498924), INT32_C( -582421212), INT32_C( 1649238471), INT32_C( 1446053889), INT32_C(-1485517848), INT32_C( 1105114322), INT32_C(-1862707588), INT32_C( -846383385), INT32_C(-2030244995), INT32_C( -905258415), INT32_C( 810910725), INT32_C(-1668595380), INT32_C( -760772652), INT32_C( 2145797270), INT32_C( 57887151)), UINT16_C( 172) }, { simde_mm512_set_epi32(INT32_C( 948728954), INT32_C( 965445469), INT32_C( -298261731), INT32_C( 1889741023), INT32_C( 101476677), INT32_C( -598834633), INT32_C( 1592735604), INT32_C( 428243294), INT32_C(-2001034764), INT32_C( -639043872), INT32_C( 567427880), INT32_C(-1305749494), INT32_C( -204185535), INT32_C( -550643286), INT32_C( -170363385), INT32_C( 1483518213)), simde_mm512_set_epi32(INT32_C( 948728954), INT32_C( 965445469), INT32_C( 364841947), INT32_C(-1221758106), INT32_C( 101476677), INT32_C( -598834633), INT32_C( 1592735604), INT32_C(-1456245493), INT32_C(-2001034764), INT32_C( -639043872), INT32_C( 567427880), INT32_C(-1305749494), INT32_C( -204185535), INT32_C( 830345587), INT32_C( -170363385), INT32_C( -603563929)), UINT16_C( 4353) }, { simde_mm512_set_epi32(INT32_C( -163413000), INT32_C( -831194762), INT32_C( -664019578), INT32_C( 2031024026), INT32_C( 1912388774), INT32_C( 982200166), INT32_C( 596130243), INT32_C( 446035443), INT32_C( 1373006598), INT32_C(-1540837035), INT32_C( 1581631435), INT32_C(-2083299381), INT32_C( 1992847454), INT32_C( 448258110), INT32_C( 875345838), INT32_C( 1612926819)), simde_mm512_set_epi32(INT32_C( 81573630), INT32_C( -831194762), INT32_C( -664019578), INT32_C( 2031024026), INT32_C( 1912388774), INT32_C(-2072470454), INT32_C( 596130243), INT32_C( 1961646011), INT32_C( 1373006598), INT32_C( -197223193), INT32_C( 1581631435), INT32_C(-2083299381), INT32_C( 1457480410), INT32_C( 1181119535), INT32_C( 1263228451), INT32_C(-1998542716)), UINT16_C( 1033) }, { simde_mm512_set_epi32(INT32_C( 1436278246), INT32_C( 99684976), INT32_C( 1345577484), INT32_C( 166701508), INT32_C( -780731111), INT32_C( -840749601), INT32_C( 1523342039), INT32_C( 1058674665), INT32_C( -523908416), INT32_C( 1659465207), INT32_C(-1927062215), INT32_C(-1156760340), INT32_C( 715569317), INT32_C(-1515814414), INT32_C( 1243253180), INT32_C( 2080215882)), simde_mm512_set_epi32(INT32_C( 432908742), INT32_C( 99684976), INT32_C( -14330157), INT32_C(-1223154556), INT32_C( -780731111), INT32_C( 696697372), INT32_C( 1523342039), INT32_C( 1058674665), INT32_C( -523908416), INT32_C( 1659465207), INT32_C(-1927062215), INT32_C(-1156760340), INT32_C( -171262349), INT32_C(-1515814414), INT32_C(-1234169573), INT32_C(-1847568101)), UINT16_C(45067) }, { simde_mm512_set_epi32(INT32_C( 1399825551), INT32_C(-1064541474), INT32_C( 2112452992), INT32_C( 575137303), INT32_C( -979898374), INT32_C(-1476679333), INT32_C( 1320423852), INT32_C( 1767893242), INT32_C( -389599783), INT32_C(-1459729991), INT32_C( 995424065), INT32_C( -522129019), INT32_C( -466751981), INT32_C( 1371238810), INT32_C( 1006677155), INT32_C( 1609037982)), simde_mm512_set_epi32(INT32_C( 1399825551), INT32_C(-1064541474), INT32_C( 2112452992), INT32_C( 134645750), INT32_C( 500192289), INT32_C( 1600988950), INT32_C( 1320423852), INT32_C( 1198845893), INT32_C( -389599783), INT32_C( 1504468794), INT32_C( 995424065), INT32_C(-2123865443), INT32_C( -466751981), INT32_C(-1711282630), INT32_C( 1006677155), INT32_C( 1609037982)), UINT16_C( 4372) }, { simde_mm512_set_epi32(INT32_C(-1862774816), INT32_C( 28374488), INT32_C( 250156705), INT32_C( -932694837), INT32_C(-2079251566), INT32_C( -246439183), INT32_C( -875109534), INT32_C( 1740046060), INT32_C( 1735819269), INT32_C( 1371885292), INT32_C( -914870851), INT32_C( -473073032), INT32_C( -580976455), INT32_C( 1688786028), INT32_C( 637430498), INT32_C(-1740972685)), simde_mm512_set_epi32(INT32_C(-1862774816), INT32_C( 28374488), INT32_C( 580744870), INT32_C( -666445473), INT32_C( -129274908), INT32_C( -928751425), INT32_C( -388443661), INT32_C( 1740046060), INT32_C(-1909361652), INT32_C( 1371885292), INT32_C( 857928163), INT32_C( 37075976), INT32_C( -580976455), INT32_C(-1545948444), INT32_C( 637430498), INT32_C(-1740972685)), UINT16_C( 1156) }, { simde_mm512_set_epi32(INT32_C(-1890406982), INT32_C( 2110791016), INT32_C( 1083476771), INT32_C( -620691621), INT32_C( 543588207), INT32_C( -227503647), INT32_C( -759273149), INT32_C( 775085710), INT32_C( 1404885802), INT32_C(-1395233065), INT32_C( 832528180), INT32_C( 1065959566), INT32_C(-2083201484), INT32_C( 937916550), INT32_C( -710457746), INT32_C( -246147415)), simde_mm512_set_epi32(INT32_C( 84669207), INT32_C( 470641840), INT32_C( 1083476771), INT32_C( -620691621), INT32_C( 1099959895), INT32_C( -961354454), INT32_C(-1751384146), INT32_C( 775085710), INT32_C( 1075765582), INT32_C( 834655006), INT32_C( 832528180), INT32_C( 1065959566), INT32_C( 954342416), INT32_C( 937916550), INT32_C(-1946395018), INT32_C( 757651617)), UINT16_C(18050) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask16 r = simde_mm512_cmpgt_epi32_mask(test_vec[i].a, test_vec[i].b); simde_assert_equal_mmask16(r, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_cmpgt_epi32_mask(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m512i a; simde__m512i b; simde__mmask16 r; } test_vec[8] = { { UINT16_C(12249), simde_mm512_set_epi32(INT32_C(-1151856667), INT32_C( -49918748), INT32_C(-1709830250), INT32_C( 1750293451), INT32_C(-1728641738), INT32_C( 79295022), INT32_C( 308064941), INT32_C( 1216157597), INT32_C( 320231148), INT32_C( -697875804), INT32_C( 990066955), INT32_C(-2091005609), INT32_C( 1037816180), INT32_C( -748290940), INT32_C( 1166526776), INT32_C(-1428331975)), simde_mm512_set_epi32(INT32_C( 1846695950), INT32_C( 884046092), INT32_C( -392734818), INT32_C(-1618937497), INT32_C( 670851975), INT32_C(-1269946840), INT32_C(-1917256160), INT32_C( 228203505), INT32_C( 1263965918), INT32_C(-2053175323), INT32_C(-1206891799), INT32_C( -371464947), INT32_C( -99745007), INT32_C( -847464628), INT32_C( -93605380), INT32_C(-1859612096)), UINT16_C( 1865) }, { UINT16_C(47912), simde_mm512_set_epi32(INT32_C( 238725197), INT32_C( 1521340392), INT32_C(-2077410041), INT32_C( 2110592657), INT32_C( 630925822), INT32_C( 689275449), INT32_C(-1970822997), INT32_C(-1836727953), INT32_C( 237271984), INT32_C( -578417637), INT32_C( -768235708), INT32_C( 1362514984), INT32_C( 2019276284), INT32_C(-1411239380), INT32_C( 2052618114), INT32_C(-1238213534)), simde_mm512_set_epi32(INT32_C( -669654436), INT32_C( -822780196), INT32_C( 205688995), INT32_C( 1680146061), INT32_C( 393599682), INT32_C(-1451941808), INT32_C( 947305201), INT32_C( -75999449), INT32_C( -484166756), INT32_C( 833555969), INT32_C( -549302423), INT32_C( 1610578173), INT32_C(-2070337581), INT32_C( 664412106), INT32_C( 255732930), INT32_C( 1319359183)), UINT16_C(38920) }, { UINT16_C(35968), simde_mm512_set_epi32(INT32_C( -359339347), INT32_C( -666198998), INT32_C( 830421084), INT32_C(-2087460228), INT32_C(-1475104408), INT32_C( 721289147), INT32_C( 1281161083), INT32_C( 852871551), INT32_C(-1589170839), INT32_C( -746357372), INT32_C( -154169474), INT32_C( -148954645), INT32_C(-1357418925), INT32_C(-2112116028), INT32_C( 174617048), INT32_C( -4103177)), simde_mm512_set_epi32(INT32_C( -871886017), INT32_C(-1688556984), INT32_C( 524690471), INT32_C( -124192434), INT32_C( 1423100629), INT32_C( -161239972), INT32_C( -396308843), INT32_C( 1070701282), INT32_C(-1826016016), INT32_C( 425347357), INT32_C( 366897524), INT32_C( -401692546), INT32_C( -812557761), INT32_C( 1614519786), INT32_C(-1648390428), INT32_C( 1830061179)), UINT16_C(33920) }, { UINT16_C(16809), simde_mm512_set_epi32(INT32_C(-1280324837), INT32_C( -161347329), INT32_C( 735858798), INT32_C( -674499230), INT32_C( -21391979), INT32_C( -381504266), INT32_C( 1528317100), INT32_C( 430345703), INT32_C( -679679907), INT32_C( 515475896), INT32_C( -731085275), INT32_C( 1308429395), INT32_C( 582932299), INT32_C( 489160586), INT32_C( 1760701165), INT32_C( 118948129)), simde_mm512_set_epi32(INT32_C( 1240889055), INT32_C( 888122014), INT32_C(-1469013917), INT32_C( 1209972337), INT32_C( -691361230), INT32_C( 361393556), INT32_C(-1167116782), INT32_C( 1467757879), INT32_C( 2124803699), INT32_C( -154870634), INT32_C(-1603618479), INT32_C(-2032203238), INT32_C( 2111773805), INT32_C( -496949166), INT32_C( 1844580247), INT32_C(-2053814402)), UINT16_C( 33) }, { UINT16_C(44464), simde_mm512_set_epi32(INT32_C( 1072149321), INT32_C( 1813169024), INT32_C(-1284365076), INT32_C(-1623700182), INT32_C( 530512850), INT32_C( 116537892), INT32_C( 258206492), INT32_C( 690441736), INT32_C( 1005371642), INT32_C( 1116924342), INT32_C( 1297564984), INT32_C( -835039581), INT32_C( 1286263864), INT32_C(-1749149234), INT32_C( 558298824), INT32_C( 1064688827)), simde_mm512_set_epi32(INT32_C( 1323805616), INT32_C(-1558886902), INT32_C( 1778691088), INT32_C( 108147743), INT32_C( 1106435712), INT32_C( -967535450), INT32_C( 600280311), INT32_C( 109364043), INT32_C( 423389578), INT32_C( 1225761441), INT32_C( -440804681), INT32_C( -707540326), INT32_C(-1898655855), INT32_C(-1268681648), INT32_C(-1360056367), INT32_C( -275254487)), UINT16_C( 1440) }, { UINT16_C(23993), simde_mm512_set_epi32(INT32_C(-2038065128), INT32_C( -446679229), INT32_C( 78082001), INT32_C( 379830516), INT32_C(-1929569644), INT32_C( 1595859976), INT32_C( 320798226), INT32_C( -738570818), INT32_C( -165441023), INT32_C( -172594873), INT32_C( 912601062), INT32_C( -56802863), INT32_C( 503255814), INT32_C( 2046199592), INT32_C( -622599746), INT32_C( 1337235103)), simde_mm512_set_epi32(INT32_C(-1519343201), INT32_C( -448055921), INT32_C(-1909251875), INT32_C( -347447915), INT32_C( 397553753), INT32_C( 713040821), INT32_C(-1458903601), INT32_C( -45886582), INT32_C( 1230465483), INT32_C( -828483015), INT32_C( -699493978), INT32_C(-1811052070), INT32_C( 1577065087), INT32_C( -109599940), INT32_C(-1093577090), INT32_C(-1788879767)), UINT16_C(21553) }, { UINT16_C(10358), simde_mm512_set_epi32(INT32_C( 648390363), INT32_C( -30837841), INT32_C(-1635592815), INT32_C( -694389961), INT32_C( -883952626), INT32_C( -761345991), INT32_C( 346040825), INT32_C(-1780780575), INT32_C( 1510717568), INT32_C(-1185143236), INT32_C( 2143540932), INT32_C( 880567806), INT32_C(-1670993371), INT32_C(-1942419167), INT32_C(-1196759463), INT32_C( 1386099146)), simde_mm512_set_epi32(INT32_C(-1614031176), INT32_C( 414071648), INT32_C(-1152911954), INT32_C( 424701353), INT32_C( 1739922394), INT32_C( -506382165), INT32_C( 257126844), INT32_C( 1724223193), INT32_C( 1096709845), INT32_C(-1643231112), INT32_C(-1639890652), INT32_C( -403971200), INT32_C( 1318667734), INT32_C( 206062573), INT32_C( -18472190), INT32_C( -1701112)), UINT16_C( 112) }, { UINT16_C(35023), simde_mm512_set_epi32(INT32_C( 228305355), INT32_C(-1904004735), INT32_C( 118523411), INT32_C( 1661507666), INT32_C(-1400326500), INT32_C( 63010183), INT32_C( 62197704), INT32_C( -635599967), INT32_C( 1677709284), INT32_C(-1294080152), INT32_C( -900737233), INT32_C(-1991940005), INT32_C( -240404149), INT32_C(-1448242105), INT32_C(-1972665039), INT32_C( 1511694245)), simde_mm512_set_epi32(INT32_C(-1506289043), INT32_C( 82234507), INT32_C( -557930538), INT32_C( -911612825), INT32_C( 1352158017), INT32_C( -554125937), INT32_C( -727289650), INT32_C(-1102664191), INT32_C( 1941639559), INT32_C(-2124299952), INT32_C( -385431179), INT32_C( 112242864), INT32_C( -66697069), INT32_C( 1379403470), INT32_C(-1996504296), INT32_C( 658235880)), UINT16_C(32835) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask16 r = simde_mm512_mask_cmpgt_epi32_mask(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_equal_mmask16(r, test_vec[i].r); } return 0; } static int test_simde_mm512_cmpgt_epi64_mask(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__mmask8 r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C(-3344943500899736927), INT64_C( -508674271294480923), INT64_C( 4367550852745697236), INT64_C(-1765523250257788813), INT64_C(-6325172456788566604), INT64_C( 3340966423446181237), INT64_C( 4899669249714194025), INT64_C(-3109134868060088778)), simde_mm512_set_epi64(INT64_C(-3344943500899736927), INT64_C( -115747659216396604), INT64_C( 7173930031566073597), INT64_C(-1765523250257788813), INT64_C(-4525526016698522721), INT64_C( 3340966423446181237), INT64_C( 4899669249714194025), INT64_C(-3109134868060088778)), UINT8_C( 0) }, { simde_mm512_set_epi64(INT64_C( 161300839730502102), INT64_C(-4154011080047619988), INT64_C( 3510183776865199207), INT64_C( 5188039415407364902), INT64_C(-8649541106015277925), INT64_C( 2036065042708484429), INT64_C(-7714656548902029220), INT64_C(-9105101012109136347)), simde_mm512_set_epi64(INT64_C( 161300839730502102), INT64_C(-4154011080047619988), INT64_C( 6120426103009778754), INT64_C( -248023738166197182), INT64_C(-8649541106015277925), INT64_C( 2693047687180833180), INT64_C(-7714656548902029220), INT64_C(-9105101012109136347)), UINT8_C( 16) }, { simde_mm512_set_epi64(INT64_C(-2825253727352691686), INT64_C( 4405965118825132522), INT64_C(-6791426899562410985), INT64_C(-4409900925389880930), INT64_C( 1845845856613597884), INT64_C(-4842241423465696621), INT64_C( 163081221433998591), INT64_C( 4482804709675222173)), simde_mm512_set_epi64(INT64_C( 3285810068291760082), INT64_C( 4405965118825132522), INT64_C(-6791426899562410985), INT64_C(-4943963491966669187), INT64_C( 1845845856613597884), INT64_C( 7384036385676540465), INT64_C(-7313503223753260102), INT64_C( 5128036791088991318)), UINT8_C( 18) }, { simde_mm512_set_epi64(INT64_C(-1633105180711142836), INT64_C(-4287337651200520652), INT64_C( 8346707004388378871), INT64_C(-5848595418894109542), INT64_C(-7300386321370732776), INT64_C( -648586863376006844), INT64_C(-3473939784680689044), INT64_C(-1628617817613399979)), simde_mm512_set_epi64(INT64_C( 1934898870952452550), INT64_C(-4287337651200520652), INT64_C( 1557479703737443505), INT64_C(-5848595418894109542), INT64_C(-7179299072208562799), INT64_C( -648586863376006844), INT64_C(-3473939784680689044), INT64_C( 2817575692173645704)), UINT8_C( 32) }, { simde_mm512_set_epi64(INT64_C(-8952123954418726140), INT64_C( 5461301954902244462), INT64_C(-5820184907423972656), INT64_C( 420402622060248705), INT64_C(-1664441445637860283), INT64_C(-9088734991256809986), INT64_C( 5606803261787264235), INT64_C( 3392608019150722653)), simde_mm512_set_epi64(INT64_C(-8952123954418726140), INT64_C(-6318099565586317695), INT64_C(-5820184907423972656), INT64_C( 420402622060248705), INT64_C(-1664441445637860283), INT64_C( 6565206217411025613), INT64_C( 8598198622090956400), INT64_C(-7576266643160730964)), UINT8_C( 65) }, { simde_mm512_set_epi64(INT64_C(-3313522622815895345), INT64_C(-6452175545498154090), INT64_C( -937049212555566038), INT64_C(-4143019958444030865), INT64_C(-3410665359562609619), INT64_C( 966786109195223540), INT64_C( 7283097367839393163), INT64_C(-2640534975929709368)), simde_mm512_set_epi64(INT64_C(-3313522622815895345), INT64_C(-6452175545498154090), INT64_C( 7057508826094118763), INT64_C(-2466255848420720587), INT64_C(-3410665359562609619), INT64_C(-7091282311083875172), INT64_C(-5778676633446214654), INT64_C(-2640534975929709368)), UINT8_C( 6) }, { simde_mm512_set_epi64(INT64_C( 7946101066156420330), INT64_C(-1199223599247032864), INT64_C(-1997073553979895023), INT64_C(-2305098272308636911), INT64_C( -630363562210498119), INT64_C( 4426020973322885294), INT64_C( 8782098874831326668), INT64_C(-6058337867533474769)), simde_mm512_set_epi64(INT64_C( 7946101066156420330), INT64_C(-1199223599247032864), INT64_C(-1997073553979895023), INT64_C(-2305098272308636911), INT64_C( -630363562210498119), INT64_C( 8629524505567702841), INT64_C( 8782098874831326668), INT64_C( 2660246489815857132)), UINT8_C( 0) }, { simde_mm512_set_epi64(INT64_C(-2815932903868980343), INT64_C( 791308056982133256), INT64_C( 8277712790583824674), INT64_C(-3943050990178000322), INT64_C(-2127265598488665647), INT64_C( 4379715049649431166), INT64_C(-9154071905230416728), INT64_C(-2123362159730266714)), simde_mm512_set_epi64(INT64_C(-2815932903868980343), INT64_C( 791308056982133256), INT64_C(-6685750631550937327), INT64_C( 1585978438239301211), INT64_C( 3432556139556266760), INT64_C( 4379715049649431166), INT64_C(-9154071905230416728), INT64_C(-1483875325616410698)), UINT8_C( 32) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask8 r = simde_mm512_cmpgt_epi64_mask(test_vec[i].a, test_vec[i].b); simde_assert_equal_mmask8(r, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_cmpgt_epi64_mask(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512i a; simde__m512i b; simde__mmask8 r; } test_vec[8] = { { UINT8_C( 16), simde_mm512_set_epi64(INT64_C( 2255026789087372129), INT64_C( 6954636019969939696), INT64_C( 8135587588110756767), INT64_C(-6775895683000468083), INT64_C( -900701109459786534), INT64_C(-7915280239032503360), INT64_C(-1216817989725562739), INT64_C(-8432176550710264558)), simde_mm512_set_epi64(INT64_C( 2255026789087372129), INT64_C( 6954636019969939696), INT64_C( 8135587588110756767), INT64_C(-6775895683000468083), INT64_C( -900701109459786534), INT64_C(-3598046066747317833), INT64_C(-1709941778912207388), INT64_C( 3169788859761289772)), UINT8_C( 0) }, { UINT8_C( 6), simde_mm512_set_epi64(INT64_C(-2239217399172416071), INT64_C(-1788368623206610532), INT64_C(-8621353419023287056), INT64_C( 2167646577764527484), INT64_C( 7373916114077140533), INT64_C( 4679914315089846813), INT64_C(-4785189148228967071), INT64_C(-2291884851836211058)), simde_mm512_set_epi64(INT64_C(-4674113889822188979), INT64_C( 6851357122574510655), INT64_C(-8621353419023287056), INT64_C( 2167646577764527484), INT64_C( 7373916114077140533), INT64_C(-2091630176064440564), INT64_C(-4166997073722787613), INT64_C(-2291884851836211058)), UINT8_C( 4) }, { UINT8_C(231), simde_mm512_set_epi64(INT64_C( -437845083503418422), INT64_C( 7030863114044209442), INT64_C( 8238064857893579595), INT64_C( 3062172269146343786), INT64_C( 7457006241836305381), INT64_C(-9078752323516671886), INT64_C(-6382075143273833301), INT64_C( 3840898770164583597)), simde_mm512_set_epi64(INT64_C(-4268408126209392137), INT64_C( 7030863114044209442), INT64_C( 663353489862938549), INT64_C( 3062172269146343786), INT64_C( 7457006241836305381), INT64_C( 8174310593560152615), INT64_C(-6382075143273833301), INT64_C(-4495103935185291795)), UINT8_C(161) }, { UINT8_C( 60), simde_mm512_set_epi64(INT64_C( 3543184366849060052), INT64_C( 8101296544771348510), INT64_C( 1359772700119148960), INT64_C(-8357828074665392254), INT64_C(-5672294839872616078), INT64_C(-2918525673450782654), INT64_C(-6303315662009814438), INT64_C( 4773615511108508590)), simde_mm512_set_epi64(INT64_C( 3543184366849060052), INT64_C( 286276641590586651), INT64_C( 1359772700119148960), INT64_C(-3217204137928962858), INT64_C(-5672294839872616078), INT64_C(-2918525673450782654), INT64_C(-2554453706959743566), INT64_C(-6197005744039272430)), UINT8_C( 0) }, { UINT8_C( 97), simde_mm512_set_epi64(INT64_C(-4278296701436995238), INT64_C( 3569507405853529045), INT64_C(-3380367559374400304), INT64_C(-4948363566435325304), INT64_C(-6678298576976263631), INT64_C( 8848650777417470336), INT64_C( 6320411494008491541), INT64_C( 2280208700508329072)), simde_mm512_set_epi64(INT64_C( 326944370261152484), INT64_C( 3569507405853529045), INT64_C( 715678757448860576), INT64_C(-4948363566435325304), INT64_C(-6678298576976263631), INT64_C(-5367013526541491012), INT64_C( 1008601224594483315), INT64_C( 2280208700508329072)), UINT8_C( 0) }, { UINT8_C(153), simde_mm512_set_epi64(INT64_C( 8361426666750729591), INT64_C(-6668359429543518025), INT64_C( 2952092805333509636), INT64_C( 8284871946243647248), INT64_C(-8896262213455925533), INT64_C( 3194469353298560173), INT64_C( 5466230282228711049), INT64_C(-1091365868294702661)), simde_mm512_set_epi64(INT64_C(-8667260419906723988), INT64_C(-6668359429543518025), INT64_C( 2952092805333509636), INT64_C( 8284871946243647248), INT64_C( 3185065043241333471), INT64_C( -134870333477219304), INT64_C( 5466230282228711049), INT64_C(-4571723861926798973)), UINT8_C(129) }, { UINT8_C( 60), simde_mm512_set_epi64(INT64_C(-5632979726637184794), INT64_C( 3790754159972080576), INT64_C(-7842038005332057398), INT64_C(-1292705499011984897), INT64_C( 7597886654367336733), INT64_C( 1457057381762531412), INT64_C(-1572264173383359920), INT64_C(-8716209376375056305)), simde_mm512_set_epi64(INT64_C(-5632979726637184794), INT64_C( 3790754159972080576), INT64_C( 1913605115921194336), INT64_C(-6143563121944184390), INT64_C( 7597886654367336733), INT64_C( 1457057381762531412), INT64_C( 7253226870637562008), INT64_C(-6283001429373579825)), UINT8_C( 16) }, { UINT8_C( 88), simde_mm512_set_epi64(INT64_C(-2374777447002601129), INT64_C(-5785141086360428669), INT64_C( 6450311718709789609), INT64_C( 4609381622161693926), INT64_C( -638886780002324864), INT64_C(-5739159461288227194), INT64_C(-4392084870376418631), INT64_C( 2798977638636065147)), simde_mm512_set_epi64(INT64_C( 753500986908300233), INT64_C(-5785141086360428669), INT64_C( 6450311718709789609), INT64_C(-4648819914956469219), INT64_C(-8767820380557260648), INT64_C(-5739159461288227194), INT64_C( 2360822030941279123), INT64_C(-6092063218708168180)), UINT8_C( 24) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask8 r = simde_mm512_mask_cmpgt_epi64_mask(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_equal_mmask8(r, test_vec[i].r); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cmpgt_epi8_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cmpgt_epu8_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cmpgt_epi32_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_cmpgt_epi32_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cmpgt_epi64_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_cmpgt_epi64_mask) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/cmple.c000066400000000000000000001316761400333146700165360ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #define SIMDE_TEST_X86_AVX512_INSN cmple #include #include #include static int test_simde_mm512_cmple_epi8_mask(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__mmask64 r; } test_vec[8] = { { simde_mm512_set_epi8(INT8_C( 52), INT8_C( 1), INT8_C( 13), INT8_C( 75), INT8_C( -85), INT8_C( 72), INT8_C( -19), INT8_C( -45), INT8_C( -9), INT8_C(-112), INT8_C( -83), INT8_C( -75), INT8_C( 108), INT8_C( -56), INT8_C( 1), INT8_C( 67), INT8_C(-121), INT8_C( -80), INT8_C( -61), INT8_C( -21), INT8_C( 32), INT8_C( 90), INT8_C( 18), INT8_C( -76), INT8_C( -55), INT8_C(-120), INT8_C( 21), INT8_C( 67), INT8_C( -90), INT8_C( 96), INT8_C( 54), INT8_C( 110), INT8_C( 64), INT8_C( 8), INT8_C( -22), INT8_C( 94), INT8_C( 52), INT8_C( -99), INT8_C( -4), INT8_C( -84), INT8_C( -6), INT8_C( 125), INT8_C( -1), INT8_C( -60), INT8_C( 92), INT8_C(-113), INT8_C( -19), INT8_C( 84), INT8_C( -51), INT8_C( 77), INT8_C( 43), INT8_C( -20), INT8_C( -65), INT8_C( 126), INT8_C( 26), INT8_C( 119), INT8_C( -48), INT8_C( -40), INT8_C( 25), INT8_C(-102), INT8_C( -57), INT8_C( -51), INT8_C( 66), INT8_C( 22)), simde_mm512_set_epi8(INT8_C( 86), INT8_C( 9), INT8_C( -71), INT8_C( -90), INT8_C( 18), INT8_C( -62), INT8_C( -33), INT8_C(-112), INT8_C( -98), INT8_C( -84), INT8_C(-115), INT8_C( 15), INT8_C( -33), INT8_C( -76), INT8_C( 61), INT8_C( -95), INT8_C(-105), INT8_C( -60), INT8_C( -6), INT8_C(-111), INT8_C( 16), INT8_C(-119), INT8_C( -8), INT8_C( 102), INT8_C(-121), INT8_C( 65), INT8_C( 74), INT8_C( -15), INT8_C( -75), INT8_C( 113), INT8_C( 12), INT8_C( 72), INT8_C( 11), INT8_C( -99), INT8_C(-102), INT8_C(-120), INT8_C( 85), INT8_C( -27), INT8_C( 50), INT8_C( 44), INT8_C( 99), INT8_C( 97), INT8_C( -64), INT8_C( 17), INT8_C( -71), INT8_C( 126), INT8_C( 120), INT8_C( 30), INT8_C( 35), INT8_C( -80), INT8_C( -38), INT8_C( 107), INT8_C( 58), INT8_C( 8), INT8_C( -10), INT8_C( 0), INT8_C(-107), INT8_C( -88), INT8_C(-112), INT8_C( 12), INT8_C(-116), INT8_C( -27), INT8_C( -98), INT8_C(-111)), UINT64_C(14434847609910106132) }, { simde_mm512_set_epi8(INT8_C( 107), INT8_C(-116), INT8_C( 122), INT8_C( 102), INT8_C( -58), INT8_C( 110), INT8_C( -59), INT8_C( -22), INT8_C( 43), INT8_C( 28), INT8_C( 44), INT8_C( -42), INT8_C(-114), INT8_C( -45), INT8_C( -97), INT8_C( 37), INT8_C(-125), INT8_C( 89), INT8_C(-120), INT8_C( -47), INT8_C( -37), INT8_C( -38), INT8_C( 100), INT8_C( 17), INT8_C( 50), INT8_C( -37), INT8_C( -47), INT8_C( 89), INT8_C( 20), INT8_C( -20), INT8_C( -62), INT8_C(-101), INT8_C( 62), INT8_C( 121), INT8_C(-111), INT8_C( -6), INT8_C( 1), INT8_C( 18), INT8_C( 98), INT8_C( 82), INT8_C( -13), INT8_C( 78), INT8_C( -4), INT8_C( 14), INT8_C( -66), INT8_C( 64), INT8_C( -67), INT8_C( 109), INT8_C( 39), INT8_C( 91), INT8_C( -87), INT8_C( 9), INT8_C( -93), INT8_C(-109), INT8_C( 6), INT8_C( 98), INT8_C( 120), INT8_C( 124), INT8_C( 122), INT8_C( -19), INT8_C( -77), INT8_C( 57), INT8_C( -96), INT8_C( 126)), simde_mm512_set_epi8(INT8_C( -21), INT8_C(-123), INT8_C( -78), INT8_C( -79), INT8_C( -86), INT8_C( 25), INT8_C(-109), INT8_C( -7), INT8_C( 99), INT8_C( -40), INT8_C( 26), INT8_C( 97), INT8_C(-125), INT8_C( 116), INT8_C( -95), INT8_C( 35), INT8_C( 2), INT8_C(-112), INT8_C( 36), INT8_C( -92), INT8_C( 3), INT8_C(-116), INT8_C( 114), INT8_C(-106), INT8_C( 44), INT8_C( 56), INT8_C( 61), INT8_C( -40), INT8_C( 0), INT8_C( 125), INT8_C( -9), INT8_C(-122), INT8_C( -60), INT8_C( 40), INT8_C( -45), INT8_C( -33), INT8_C( -44), INT8_C( -46), INT8_C( -92), INT8_C( -73), INT8_C( -80), INT8_C( 116), INT8_C( -97), INT8_C( -44), INT8_C(-128), INT8_C( 96), INT8_C( 31), INT8_C( 65), INT8_C( 78), INT8_C(-127), INT8_C(-104), INT8_C( -24), INT8_C( 50), INT8_C(-106), INT8_C( 74), INT8_C( -36), INT8_C( -78), INT8_C( -25), INT8_C( 74), INT8_C( 73), INT8_C(-109), INT8_C( 66), INT8_C( 65), INT8_C( 122)), UINT64_C(114466196149407254) }, { simde_mm512_set_epi8(INT8_C(-114), INT8_C( -20), INT8_C( 33), INT8_C( 85), INT8_C( 122), INT8_C( -58), INT8_C(-110), INT8_C( 73), INT8_C( -51), INT8_C( 77), INT8_C( 19), INT8_C( 55), INT8_C( 58), INT8_C(-110), INT8_C( 71), INT8_C(-103), INT8_C( 4), INT8_C(-105), INT8_C( 17), INT8_C( -19), INT8_C( -69), INT8_C( -9), INT8_C( 20), INT8_C( -70), INT8_C( -64), INT8_C( 10), INT8_C( 85), INT8_C( 102), INT8_C( -33), INT8_C(-106), INT8_C(-128), INT8_C( 35), INT8_C( -69), INT8_C( -59), INT8_C( 120), INT8_C(-107), INT8_C( 106), INT8_C( -82), INT8_C( -54), INT8_C(-116), INT8_C( 23), INT8_C( 8), INT8_C( -4), INT8_C( -91), INT8_C( -73), INT8_C( -40), INT8_C( 102), INT8_C( -54), INT8_C( -23), INT8_C( -80), INT8_C(-110), INT8_C( -39), INT8_C( 63), INT8_C( 120), INT8_C( 11), INT8_C( -6), INT8_C( 120), INT8_C( -34), INT8_C( -31), INT8_C(-107), INT8_C( 78), INT8_C( 119), INT8_C( -83), INT8_C( 34)), simde_mm512_set_epi8(INT8_C( -52), INT8_C( -70), INT8_C( 68), INT8_C( -91), INT8_C(-105), INT8_C(-106), INT8_C( -66), INT8_C( -74), INT8_C( 10), INT8_C(-101), INT8_C( -82), INT8_C( 74), INT8_C( 61), INT8_C(-127), INT8_C( 33), INT8_C( -25), INT8_C( -39), INT8_C( 43), INT8_C( 29), INT8_C( 72), INT8_C( -22), INT8_C( 98), INT8_C( -15), INT8_C( 51), INT8_C(-120), INT8_C( -23), INT8_C( 8), INT8_C( 42), INT8_C(-125), INT8_C( -3), INT8_C( 56), INT8_C( 103), INT8_C(-111), INT8_C( -5), INT8_C( 92), INT8_C( -35), INT8_C( 109), INT8_C( -50), INT8_C( -47), INT8_C(-123), INT8_C( 113), INT8_C( -13), INT8_C( 46), INT8_C( -7), INT8_C( 18), INT8_C( 70), INT8_C( 27), INT8_C( -34), INT8_C( -89), INT8_C( 80), INT8_C(-101), INT8_C( -36), INT8_C( 41), INT8_C( -84), INT8_C( -38), INT8_C( -38), INT8_C( 67), INT8_C( -35), INT8_C(-125), INT8_C( -16), INT8_C( -10), INT8_C(-121), INT8_C( -73), INT8_C( -7)), UINT64_C(11716533376188772370) }, { simde_mm512_set_epi8(INT8_C( -44), INT8_C( 33), INT8_C(-126), INT8_C( -76), INT8_C( 95), INT8_C( 42), INT8_C( 112), INT8_C(-100), INT8_C(-103), INT8_C( 126), INT8_C(-112), INT8_C( -76), INT8_C(-105), INT8_C( -79), INT8_C( -40), INT8_C( -27), INT8_C( -60), INT8_C( -42), INT8_C(-109), INT8_C( -31), INT8_C( -68), INT8_C( -24), INT8_C( -70), INT8_C( 67), INT8_C( -80), INT8_C( 19), INT8_C( -96), INT8_C( 2), INT8_C( -54), INT8_C( 80), INT8_C( 72), INT8_C( -38), INT8_C( 64), INT8_C( -70), INT8_C( -47), INT8_C( -13), INT8_C( 21), INT8_C( 42), INT8_C(-120), INT8_C( -10), INT8_C( 63), INT8_C( -33), INT8_C( -57), INT8_C( -57), INT8_C( 116), INT8_C( 15), INT8_C( -51), INT8_C( -6), INT8_C( 122), INT8_C( 99), INT8_C(-128), INT8_C( -5), INT8_C( -25), INT8_C( 119), INT8_C( -74), INT8_C( 99), INT8_C( -40), INT8_C( -57), INT8_C( 28), INT8_C( -42), INT8_C(-120), INT8_C( 65), INT8_C( -45), INT8_C(-120)), simde_mm512_set_epi8(INT8_C( -9), INT8_C( 84), INT8_C( 59), INT8_C( 71), INT8_C( 13), INT8_C( 39), INT8_C( -79), INT8_C( -26), INT8_C( 82), INT8_C( -47), INT8_C( -45), INT8_C( 29), INT8_C( -91), INT8_C( -34), INT8_C( 79), INT8_C(-114), INT8_C( -91), INT8_C( 13), INT8_C( 12), INT8_C( -60), INT8_C( -44), INT8_C(-121), INT8_C( 70), INT8_C( 85), INT8_C( -49), INT8_C( 2), INT8_C( 65), INT8_C( 20), INT8_C( 84), INT8_C( 52), INT8_C( -41), INT8_C( 31), INT8_C( 85), INT8_C( 5), INT8_C(-119), INT8_C( -80), INT8_C( 55), INT8_C( 106), INT8_C( 104), INT8_C(-127), INT8_C( 66), INT8_C( 9), INT8_C( -87), INT8_C( 1), INT8_C( 62), INT8_C( -13), INT8_C( -95), INT8_C( 107), INT8_C( -18), INT8_C( 9), INT8_C( -38), INT8_C( 114), INT8_C( -6), INT8_C( 83), INT8_C( -70), INT8_C( -62), INT8_C( -54), INT8_C( 89), INT8_C( -72), INT8_C( -70), INT8_C( -80), INT8_C( -23), INT8_C( -13), INT8_C( 125)), UINT64_C(17419478854498597451) }, { simde_mm512_set_epi8(INT8_C(-122), INT8_C( 98), INT8_C( -61), INT8_C( -27), INT8_C( -69), INT8_C( -73), INT8_C( 19), INT8_C( -42), INT8_C( -94), INT8_C( -91), INT8_C( 70), INT8_C( 49), INT8_C( -51), INT8_C( -48), INT8_C( -34), INT8_C( -19), INT8_C(-122), INT8_C( 35), INT8_C( 100), INT8_C( -94), INT8_C( -69), INT8_C( 64), INT8_C( 56), INT8_C( -16), INT8_C( 95), INT8_C( -67), INT8_C( 120), INT8_C( -89), INT8_C( 79), INT8_C( 34), INT8_C(-114), INT8_C( -60), INT8_C( 60), INT8_C( -94), INT8_C( -73), INT8_C(-127), INT8_C( 53), INT8_C( -21), INT8_C( -21), INT8_C( 93), INT8_C( -84), INT8_C( -3), INT8_C( -33), INT8_C(-100), INT8_C( -51), INT8_C( 64), INT8_C( 122), INT8_C( 20), INT8_C( 71), INT8_C( 31), INT8_C( 119), INT8_C( -31), INT8_C( -54), INT8_C(-117), INT8_C( 16), INT8_C( -86), INT8_C( -2), INT8_C( -44), INT8_C( 18), INT8_C( 10), INT8_C( 87), INT8_C( 126), INT8_C( 125), INT8_C(-118)), simde_mm512_set_epi8(INT8_C( 73), INT8_C( -43), INT8_C( -22), INT8_C( 17), INT8_C( 58), INT8_C(-109), INT8_C( -46), INT8_C( -57), INT8_C( 64), INT8_C( 101), INT8_C( 126), INT8_C( -55), INT8_C(-126), INT8_C( 125), INT8_C( 63), INT8_C( 11), INT8_C( 35), INT8_C( 47), INT8_C( -77), INT8_C( -22), INT8_C( -98), INT8_C( -82), INT8_C( -84), INT8_C(-104), INT8_C( 55), INT8_C( -26), INT8_C( -90), INT8_C( -11), INT8_C( 51), INT8_C( 115), INT8_C( 98), INT8_C( -34), INT8_C( -10), INT8_C( 39), INT8_C( 107), INT8_C( 53), INT8_C( -20), INT8_C( 127), INT8_C( -9), INT8_C( 9), INT8_C(-101), INT8_C( -21), INT8_C( -30), INT8_C( -70), INT8_C(-128), INT8_C(-115), INT8_C(-123), INT8_C( -73), INT8_C( -77), INT8_C( 54), INT8_C( 113), INT8_C( 34), INT8_C( 55), INT8_C( -74), INT8_C( -52), INT8_C( 17), INT8_C( 5), INT8_C(-118), INT8_C( -68), INT8_C( 115), INT8_C( 127), INT8_C( 35), INT8_C( 86), INT8_C( 24)), UINT64_C(13323847096662515097) }, { simde_mm512_set_epi8(INT8_C( 37), INT8_C( -12), INT8_C( -32), INT8_C( 126), INT8_C( -35), INT8_C(-101), INT8_C( 64), INT8_C( -71), INT8_C( 102), INT8_C( 71), INT8_C( -15), INT8_C( -36), INT8_C( -31), INT8_C( -15), INT8_C( 2), INT8_C( -32), INT8_C( 7), INT8_C( -43), INT8_C( 119), INT8_C( -54), INT8_C( -96), INT8_C( -6), INT8_C( 62), INT8_C( 104), INT8_C( 35), INT8_C( 37), INT8_C( 120), INT8_C( -4), INT8_C( 62), INT8_C( 54), INT8_C( -97), INT8_C(-103), INT8_C(-115), INT8_C( -68), INT8_C( -94), INT8_C( -96), INT8_C( -71), INT8_C( 108), INT8_C( 112), INT8_C( 21), INT8_C( -60), INT8_C(-107), INT8_C( 13), INT8_C( 23), INT8_C( 28), INT8_C( 94), INT8_C(-121), INT8_C(-105), INT8_C( 117), INT8_C( 92), INT8_C( -5), INT8_C( 102), INT8_C( 113), INT8_C( -68), INT8_C( -73), INT8_C( -98), INT8_C(-116), INT8_C(-108), INT8_C( 104), INT8_C( -94), INT8_C( 123), INT8_C( -15), INT8_C( 105), INT8_C( 49)), simde_mm512_set_epi8(INT8_C( 5), INT8_C( 84), INT8_C( 11), INT8_C( -98), INT8_C( -61), INT8_C( -19), INT8_C( 52), INT8_C( -5), INT8_C( -79), INT8_C( 45), INT8_C( -79), INT8_C( -17), INT8_C( -45), INT8_C( 68), INT8_C( 30), INT8_C( -51), INT8_C(-115), INT8_C(-100), INT8_C( -59), INT8_C( -64), INT8_C( 94), INT8_C( -29), INT8_C( -94), INT8_C( 91), INT8_C(-128), INT8_C( -59), INT8_C( -67), INT8_C( -23), INT8_C( 61), INT8_C(-122), INT8_C(-120), INT8_C(-108), INT8_C( -31), INT8_C( -12), INT8_C( -69), INT8_C( 83), INT8_C( -41), INT8_C(-119), INT8_C(-114), INT8_C( 34), INT8_C( 83), INT8_C( 109), INT8_C(-105), INT8_C( -73), INT8_C( 94), INT8_C( -53), INT8_C( 17), INT8_C( 104), INT8_C( -66), INT8_C( 76), INT8_C(-125), INT8_C( -11), INT8_C( 91), INT8_C( -85), INT8_C( -13), INT8_C( -16), INT8_C( -56), INT8_C( 83), INT8_C( 72), INT8_C( 80), INT8_C( 38), INT8_C( -24), INT8_C( 28), INT8_C( 126)), UINT64_C(7284018247602209745) }, { simde_mm512_set_epi8(INT8_C( -73), INT8_C(-128), INT8_C( 96), INT8_C( -55), INT8_C( -66), INT8_C( -7), INT8_C( -10), INT8_C(-120), INT8_C( 107), INT8_C( -20), INT8_C( -32), INT8_C( 4), INT8_C(-122), INT8_C(-121), INT8_C( 77), INT8_C( 72), INT8_C( 24), INT8_C( -59), INT8_C( 5), INT8_C( -25), INT8_C( -39), INT8_C( 100), INT8_C( -14), INT8_C( 62), INT8_C( -76), INT8_C( -56), INT8_C( -35), INT8_C( 47), INT8_C( 88), INT8_C( 70), INT8_C( 51), INT8_C( 15), INT8_C( 65), INT8_C( 78), INT8_C( -75), INT8_C( 26), INT8_C( -34), INT8_C( -87), INT8_C( -19), INT8_C( -93), INT8_C( -9), INT8_C( -97), INT8_C( 126), INT8_C( -61), INT8_C( 126), INT8_C( 124), INT8_C( 103), INT8_C(-124), INT8_C( 74), INT8_C( -95), INT8_C( 17), INT8_C( -78), INT8_C(-110), INT8_C( -79), INT8_C( 39), INT8_C( 55), INT8_C( 21), INT8_C( 70), INT8_C( -36), INT8_C( -25), INT8_C( -3), INT8_C( -83), INT8_C( -67), INT8_C( -7)), simde_mm512_set_epi8(INT8_C( -94), INT8_C( 1), INT8_C( 32), INT8_C( 2), INT8_C(-119), INT8_C( 91), INT8_C(-121), INT8_C( 50), INT8_C( -99), INT8_C( 3), INT8_C( 73), INT8_C( 40), INT8_C(-124), INT8_C( -23), INT8_C( 95), INT8_C( 102), INT8_C( 41), INT8_C( 56), INT8_C( 70), INT8_C( 126), INT8_C( 48), INT8_C( -42), INT8_C( 60), INT8_C( 88), INT8_C( 38), INT8_C( 76), INT8_C( -4), INT8_C(-121), INT8_C( -33), INT8_C( 119), INT8_C(-103), INT8_C( 4), INT8_C( 100), INT8_C( 93), INT8_C( 25), INT8_C(-105), INT8_C(-113), INT8_C( 48), INT8_C( 102), INT8_C( -75), INT8_C( 35), INT8_C( 24), INT8_C( 12), INT8_C( 10), INT8_C( -8), INT8_C( 18), INT8_C( -50), INT8_C( 14), INT8_C( -72), INT8_C( 90), INT8_C( -35), INT8_C( 103), INT8_C( -97), INT8_C( -75), INT8_C( 46), INT8_C( -43), INT8_C( 101), INT8_C( -57), INT8_C( 37), INT8_C( 23), INT8_C(-127), INT8_C( 86), INT8_C( -24), INT8_C( 104)), UINT64_C(6158667976012816055) }, { simde_mm512_set_epi8(INT8_C(-100), INT8_C( -98), INT8_C( -42), INT8_C( 95), INT8_C( 40), INT8_C( 8), INT8_C( 114), INT8_C( 23), INT8_C( -26), INT8_C(-115), INT8_C( 79), INT8_C( 45), INT8_C( 90), INT8_C( 114), INT8_C( 2), INT8_C( 107), INT8_C( -80), INT8_C( -3), INT8_C( -37), INT8_C( -87), INT8_C( -51), INT8_C( -33), INT8_C( -48), INT8_C( -87), INT8_C( -24), INT8_C( -52), INT8_C( 116), INT8_C( 127), INT8_C( 3), INT8_C( 109), INT8_C(-127), INT8_C( -79), INT8_C( -48), INT8_C(-121), INT8_C( 27), INT8_C( 17), INT8_C( 52), INT8_C(-113), INT8_C( -21), INT8_C( 13), INT8_C(-121), INT8_C( -96), INT8_C( 10), INT8_C(-116), INT8_C( 62), INT8_C( 99), INT8_C( 21), INT8_C( 68), INT8_C( -43), INT8_C( 107), INT8_C( 47), INT8_C( -18), INT8_C( -46), INT8_C(-117), INT8_C( 83), INT8_C( -33), INT8_C( 50), INT8_C( 26), INT8_C( 5), INT8_C(-102), INT8_C( -62), INT8_C(-117), INT8_C( 11), INT8_C( -89)), simde_mm512_set_epi8(INT8_C( -7), INT8_C(-107), INT8_C(-110), INT8_C( 3), INT8_C( 123), INT8_C( 42), INT8_C( 35), INT8_C(-112), INT8_C( -33), INT8_C( -27), INT8_C( -71), INT8_C( 60), INT8_C( 28), INT8_C(-128), INT8_C( -61), INT8_C( -94), INT8_C( -53), INT8_C(-120), INT8_C( -58), INT8_C( 86), INT8_C( 54), INT8_C( -41), INT8_C( -24), INT8_C( -24), INT8_C( -79), INT8_C( 97), INT8_C( 11), INT8_C(-106), INT8_C( -2), INT8_C( -80), INT8_C( 54), INT8_C( 5), INT8_C( 37), INT8_C( 35), INT8_C( -99), INT8_C(-117), INT8_C( -23), INT8_C( 93), INT8_C( 84), INT8_C( 64), INT8_C(-111), INT8_C( -21), INT8_C( -43), INT8_C( 49), INT8_C( 7), INT8_C( -32), INT8_C( 68), INT8_C( -29), INT8_C( 119), INT8_C( 93), INT8_C( -20), INT8_C( -53), INT8_C( 28), INT8_C( -11), INT8_C( -1), INT8_C( 93), INT8_C( -68), INT8_C( 100), INT8_C( 31), INT8_C( 98), INT8_C( 78), INT8_C( 52), INT8_C( -6), INT8_C( -1)), UINT64_C(10110751878864342397) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask64 r = simde_mm512_cmple_epi8_mask(test_vec[i].a, test_vec[i].b); simde_assert_equal_mmask64(r, test_vec[i].r); } return 0; } static int test_simde_mm512_cmple_epu8_mask(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__mmask64 r; } test_vec[8] = { { simde_x_mm512_set_epu8(UINT8_C(212), UINT8_C( 13), UINT8_C( 31), UINT8_C(214), UINT8_C(180), UINT8_C(244), UINT8_C( 71), UINT8_C( 63), UINT8_C(225), UINT8_C(144), UINT8_C( 44), UINT8_C(106), UINT8_C(204), UINT8_C( 63), UINT8_C(146), UINT8_C(179), UINT8_C( 32), UINT8_C(164), UINT8_C(133), UINT8_C( 40), UINT8_C(191), UINT8_C(127), UINT8_C(223), UINT8_C( 71), UINT8_C(188), UINT8_C( 44), UINT8_C(171), UINT8_C( 22), UINT8_C( 89), UINT8_C(241), UINT8_C( 98), UINT8_C(161), UINT8_C( 5), UINT8_C(111), UINT8_C(103), UINT8_C(163), UINT8_C(148), UINT8_C(182), UINT8_C( 53), UINT8_C(167), UINT8_C(123), UINT8_C( 87), UINT8_C(248), UINT8_C( 30), UINT8_C( 59), UINT8_C( 34), UINT8_C(228), UINT8_C(123), UINT8_C( 19), UINT8_C( 1), UINT8_C(177), UINT8_C(191), UINT8_C(228), UINT8_C(252), UINT8_C( 82), UINT8_C( 41), UINT8_C(190), UINT8_C( 81), UINT8_C(126), UINT8_C(186), UINT8_C(232), UINT8_C(220), UINT8_C(248), UINT8_C( 9)), simde_x_mm512_set_epu8(UINT8_C(156), UINT8_C(167), UINT8_C(182), UINT8_C(174), UINT8_C( 59), UINT8_C( 33), UINT8_C(108), UINT8_C( 56), UINT8_C(235), UINT8_C(112), UINT8_C( 57), UINT8_C(123), UINT8_C(204), UINT8_C(222), UINT8_C(254), UINT8_C(219), UINT8_C( 79), UINT8_C(226), UINT8_C(235), UINT8_C(225), UINT8_C(225), UINT8_C(214), UINT8_C( 93), UINT8_C(110), UINT8_C(130), UINT8_C(140), UINT8_C( 9), UINT8_C(197), UINT8_C(196), UINT8_C(218), UINT8_C( 26), UINT8_C(172), UINT8_C( 79), UINT8_C(211), UINT8_C(167), UINT8_C(175), UINT8_C(117), UINT8_C(109), UINT8_C(123), UINT8_C( 61), UINT8_C(201), UINT8_C(219), UINT8_C( 65), UINT8_C(225), UINT8_C(169), UINT8_C(245), UINT8_C(163), UINT8_C(246), UINT8_C( 72), UINT8_C( 0), UINT8_C(139), UINT8_C( 27), UINT8_C(202), UINT8_C(250), UINT8_C( 9), UINT8_C( 85), UINT8_C( 2), UINT8_C(226), UINT8_C( 89), UINT8_C( 13), UINT8_C(200), UINT8_C(251), UINT8_C( 41), UINT8_C(231)), UINT64_C(7115684499037192517) }, { simde_x_mm512_set_epu8(UINT8_C( 29), UINT8_C(222), UINT8_C(229), UINT8_C(138), UINT8_C(180), UINT8_C(164), UINT8_C( 76), UINT8_C(245), UINT8_C( 99), UINT8_C(226), UINT8_C(230), UINT8_C( 98), UINT8_C(233), UINT8_C( 51), UINT8_C(144), UINT8_C(148), UINT8_C( 54), UINT8_C( 78), UINT8_C(119), UINT8_C( 10), UINT8_C(178), UINT8_C(149), UINT8_C(114), UINT8_C( 10), UINT8_C(114), UINT8_C(229), UINT8_C(111), UINT8_C(217), UINT8_C(221), UINT8_C(177), UINT8_C(132), UINT8_C(239), UINT8_C( 37), UINT8_C( 97), UINT8_C( 39), UINT8_C(159), UINT8_C(180), UINT8_C(171), UINT8_C(207), UINT8_C( 97), UINT8_C( 34), UINT8_C( 28), UINT8_C(196), UINT8_C( 31), UINT8_C(161), UINT8_C(176), UINT8_C(212), UINT8_C(150), UINT8_C(127), UINT8_C( 96), UINT8_C( 8), UINT8_C(231), UINT8_C(113), UINT8_C(244), UINT8_C(225), UINT8_C( 59), UINT8_C( 8), UINT8_C(189), UINT8_C(180), UINT8_C(181), UINT8_C( 17), UINT8_C(247), UINT8_C(253), UINT8_C(193)), simde_x_mm512_set_epu8(UINT8_C(139), UINT8_C(135), UINT8_C(125), UINT8_C(234), UINT8_C(170), UINT8_C(118), UINT8_C(218), UINT8_C(127), UINT8_C(224), UINT8_C( 87), UINT8_C(101), UINT8_C( 79), UINT8_C(160), UINT8_C( 79), UINT8_C(171), UINT8_C(143), UINT8_C(221), UINT8_C( 27), UINT8_C( 16), UINT8_C(105), UINT8_C(233), UINT8_C(128), UINT8_C( 36), UINT8_C( 18), UINT8_C( 43), UINT8_C(252), UINT8_C(123), UINT8_C(101), UINT8_C( 69), UINT8_C( 20), UINT8_C(168), UINT8_C(248), UINT8_C( 42), UINT8_C(112), UINT8_C( 32), UINT8_C(253), UINT8_C( 41), UINT8_C( 94), UINT8_C(127), UINT8_C(246), UINT8_C(224), UINT8_C( 59), UINT8_C(224), UINT8_C(111), UINT8_C(208), UINT8_C( 14), UINT8_C(109), UINT8_C(170), UINT8_C(202), UINT8_C(101), UINT8_C(232), UINT8_C( 20), UINT8_C( 79), UINT8_C( 96), UINT8_C(249), UINT8_C(176), UINT8_C(110), UINT8_C(169), UINT8_C( 85), UINT8_C( 81), UINT8_C( 78), UINT8_C( 8), UINT8_C(191), UINT8_C( 88)), UINT64_C(10558295030420333448) }, { simde_x_mm512_set_epu8(UINT8_C(179), UINT8_C(213), UINT8_C( 48), UINT8_C(187), UINT8_C(164), UINT8_C( 7), UINT8_C( 91), UINT8_C( 4), UINT8_C(102), UINT8_C(109), UINT8_C(177), UINT8_C(255), UINT8_C(192), UINT8_C(222), UINT8_C(234), UINT8_C(134), UINT8_C(145), UINT8_C(124), UINT8_C(251), UINT8_C(236), UINT8_C(216), UINT8_C(105), UINT8_C(153), UINT8_C( 89), UINT8_C( 43), UINT8_C( 64), UINT8_C(112), UINT8_C(212), UINT8_C(140), UINT8_C( 39), UINT8_C( 85), UINT8_C(189), UINT8_C(227), UINT8_C(172), UINT8_C(113), UINT8_C(242), UINT8_C( 62), UINT8_C(140), UINT8_C(107), UINT8_C(240), UINT8_C(230), UINT8_C(141), UINT8_C(154), UINT8_C(244), UINT8_C(228), UINT8_C(222), UINT8_C(241), UINT8_C( 55), UINT8_C( 62), UINT8_C(128), UINT8_C( 59), UINT8_C( 35), UINT8_C(194), UINT8_C(240), UINT8_C(211), UINT8_C(214), UINT8_C( 18), UINT8_C(149), UINT8_C(249), UINT8_C(119), UINT8_C( 83), UINT8_C(223), UINT8_C(207), UINT8_C(164)), simde_x_mm512_set_epu8(UINT8_C( 54), UINT8_C( 3), UINT8_C(127), UINT8_C(243), UINT8_C(233), UINT8_C( 35), UINT8_C(224), UINT8_C( 24), UINT8_C(138), UINT8_C(223), UINT8_C(122), UINT8_C(117), UINT8_C(242), UINT8_C(132), UINT8_C(185), UINT8_C( 60), UINT8_C(130), UINT8_C( 34), UINT8_C(198), UINT8_C( 87), UINT8_C(166), UINT8_C( 62), UINT8_C(236), UINT8_C(111), UINT8_C(236), UINT8_C(148), UINT8_C( 23), UINT8_C(102), UINT8_C(203), UINT8_C( 39), UINT8_C( 44), UINT8_C(229), UINT8_C(165), UINT8_C( 64), UINT8_C(247), UINT8_C(232), UINT8_C(252), UINT8_C( 14), UINT8_C(190), UINT8_C(210), UINT8_C(114), UINT8_C( 77), UINT8_C(213), UINT8_C( 48), UINT8_C(150), UINT8_C(192), UINT8_C( 7), UINT8_C( 60), UINT8_C(243), UINT8_C(183), UINT8_C( 19), UINT8_C( 12), UINT8_C( 82), UINT8_C(159), UINT8_C(244), UINT8_C( 58), UINT8_C(100), UINT8_C(194), UINT8_C(140), UINT8_C(234), UINT8_C( 26), UINT8_C( 79), UINT8_C(203), UINT8_C( 24)), UINT64_C(4595927599441625808) }, { simde_x_mm512_set_epu8(UINT8_C( 20), UINT8_C(104), UINT8_C(252), UINT8_C( 18), UINT8_C( 49), UINT8_C(100), UINT8_C(232), UINT8_C(164), UINT8_C(213), UINT8_C( 4), UINT8_C(177), UINT8_C(161), UINT8_C(177), UINT8_C( 3), UINT8_C(134), UINT8_C(175), UINT8_C( 44), UINT8_C(123), UINT8_C(208), UINT8_C( 13), UINT8_C(112), UINT8_C(152), UINT8_C(166), UINT8_C( 36), UINT8_C(194), UINT8_C( 54), UINT8_C(139), UINT8_C(252), UINT8_C(211), UINT8_C( 63), UINT8_C( 1), UINT8_C(193), UINT8_C(192), UINT8_C( 71), UINT8_C( 33), UINT8_C(244), UINT8_C(217), UINT8_C(149), UINT8_C(174), UINT8_C(175), UINT8_C(183), UINT8_C( 89), UINT8_C( 4), UINT8_C(212), UINT8_C(123), UINT8_C( 28), UINT8_C(107), UINT8_C( 62), UINT8_C(181), UINT8_C( 4), UINT8_C(156), UINT8_C( 13), UINT8_C( 99), UINT8_C(210), UINT8_C( 57), UINT8_C( 39), UINT8_C( 59), UINT8_C( 79), UINT8_C( 89), UINT8_C(123), UINT8_C( 82), UINT8_C(235), UINT8_C(217), UINT8_C(152)), simde_x_mm512_set_epu8(UINT8_C(150), UINT8_C(194), UINT8_C(117), UINT8_C( 49), UINT8_C(221), UINT8_C(199), UINT8_C(212), UINT8_C(148), UINT8_C( 85), UINT8_C( 59), UINT8_C(185), UINT8_C( 89), UINT8_C(176), UINT8_C(174), UINT8_C(127), UINT8_C(134), UINT8_C(249), UINT8_C(178), UINT8_C(114), UINT8_C(195), UINT8_C(248), UINT8_C(153), UINT8_C( 86), UINT8_C(137), UINT8_C( 78), UINT8_C(198), UINT8_C(186), UINT8_C(113), UINT8_C(114), UINT8_C(213), UINT8_C(233), UINT8_C(100), UINT8_C(115), UINT8_C( 27), UINT8_C( 6), UINT8_C(115), UINT8_C( 71), UINT8_C(139), UINT8_C(249), UINT8_C(103), UINT8_C(106), UINT8_C(242), UINT8_C( 15), UINT8_C(207), UINT8_C( 82), UINT8_C(215), UINT8_C(153), UINT8_C(201), UINT8_C(103), UINT8_C(101), UINT8_C(182), UINT8_C(195), UINT8_C(233), UINT8_C( 35), UINT8_C(201), UINT8_C(188), UINT8_C(248), UINT8_C(236), UINT8_C( 57), UINT8_C(212), UINT8_C( 14), UINT8_C(200), UINT8_C( 28), UINT8_C(223)), UINT64_C(15881061616211950545) }, { simde_x_mm512_set_epu8(UINT8_C( 75), UINT8_C(247), UINT8_C( 8), UINT8_C(191), UINT8_C(201), UINT8_C(123), UINT8_C( 94), UINT8_C(179), UINT8_C(244), UINT8_C(101), UINT8_C( 84), UINT8_C(235), UINT8_C( 73), UINT8_C(217), UINT8_C(236), UINT8_C( 67), UINT8_C(172), UINT8_C(176), UINT8_C( 63), UINT8_C( 10), UINT8_C( 5), UINT8_C(236), UINT8_C(174), UINT8_C(220), UINT8_C(206), UINT8_C(250), UINT8_C(153), UINT8_C(199), UINT8_C(238), UINT8_C(212), UINT8_C(149), UINT8_C(133), UINT8_C( 38), UINT8_C( 78), UINT8_C(150), UINT8_C(121), UINT8_C(152), UINT8_C(118), UINT8_C(104), UINT8_C(114), UINT8_C( 96), UINT8_C(108), UINT8_C( 94), UINT8_C(164), UINT8_C(231), UINT8_C( 95), UINT8_C( 67), UINT8_C(157), UINT8_C(160), UINT8_C( 73), UINT8_C(157), UINT8_C(140), UINT8_C(130), UINT8_C(247), UINT8_C(150), UINT8_C(152), UINT8_C(157), UINT8_C(232), UINT8_C( 29), UINT8_C(121), UINT8_C(206), UINT8_C(156), UINT8_C(126), UINT8_C( 23)), simde_x_mm512_set_epu8(UINT8_C(100), UINT8_C( 83), UINT8_C( 54), UINT8_C( 71), UINT8_C( 37), UINT8_C( 5), UINT8_C(116), UINT8_C( 23), UINT8_C(202), UINT8_C( 63), UINT8_C( 69), UINT8_C(165), UINT8_C( 74), UINT8_C(243), UINT8_C(197), UINT8_C(135), UINT8_C( 54), UINT8_C(163), UINT8_C(231), UINT8_C(198), UINT8_C(236), UINT8_C( 28), UINT8_C(182), UINT8_C(250), UINT8_C(142), UINT8_C(142), UINT8_C(129), UINT8_C(112), UINT8_C( 35), UINT8_C(210), UINT8_C(144), UINT8_C(254), UINT8_C(152), UINT8_C(159), UINT8_C( 93), UINT8_C( 83), UINT8_C(237), UINT8_C(179), UINT8_C(152), UINT8_C( 79), UINT8_C(232), UINT8_C(160), UINT8_C(116), UINT8_C(255), UINT8_C(123), UINT8_C(122), UINT8_C(140), UINT8_C(155), UINT8_C(208), UINT8_C(117), UINT8_C(159), UINT8_C(124), UINT8_C( 66), UINT8_C(106), UINT8_C( 14), UINT8_C(238), UINT8_C( 20), UINT8_C(155), UINT8_C(176), UINT8_C( 29), UINT8_C(190), UINT8_C(210), UINT8_C(118), UINT8_C(126)), UINT64_C(11677054287794856229) }, { simde_x_mm512_set_epu8(UINT8_C(207), UINT8_C( 64), UINT8_C(105), UINT8_C(187), UINT8_C(171), UINT8_C( 70), UINT8_C(149), UINT8_C(106), UINT8_C(166), UINT8_C(121), UINT8_C(188), UINT8_C(223), UINT8_C(179), UINT8_C(107), UINT8_C(221), UINT8_C(150), UINT8_C(139), UINT8_C( 19), UINT8_C(235), UINT8_C( 53), UINT8_C(220), UINT8_C( 58), UINT8_C(119), UINT8_C(117), UINT8_C(168), UINT8_C( 70), UINT8_C(147), UINT8_C(215), UINT8_C(204), UINT8_C(240), UINT8_C( 35), UINT8_C(179), UINT8_C(220), UINT8_C(143), UINT8_C(187), UINT8_C( 63), UINT8_C(203), UINT8_C(224), UINT8_C(132), UINT8_C( 76), UINT8_C( 40), UINT8_C( 69), UINT8_C( 2), UINT8_C( 69), UINT8_C( 77), UINT8_C( 42), UINT8_C( 24), UINT8_C(201), UINT8_C( 19), UINT8_C(253), UINT8_C(249), UINT8_C(203), UINT8_C( 23), UINT8_C(169), UINT8_C(207), UINT8_C( 8), UINT8_C(208), UINT8_C(154), UINT8_C( 75), UINT8_C(165), UINT8_C( 0), UINT8_C(121), UINT8_C( 35), UINT8_C( 83)), simde_x_mm512_set_epu8(UINT8_C(148), UINT8_C( 45), UINT8_C( 60), UINT8_C(121), UINT8_C(136), UINT8_C(104), UINT8_C( 13), UINT8_C(217), UINT8_C( 88), UINT8_C(183), UINT8_C(187), UINT8_C(143), UINT8_C(198), UINT8_C(210), UINT8_C(145), UINT8_C(248), UINT8_C( 52), UINT8_C( 10), UINT8_C(182), UINT8_C( 49), UINT8_C( 78), UINT8_C( 50), UINT8_C( 27), UINT8_C( 82), UINT8_C( 46), UINT8_C(154), UINT8_C( 72), UINT8_C( 7), UINT8_C( 99), UINT8_C( 90), UINT8_C(196), UINT8_C(238), UINT8_C(215), UINT8_C(157), UINT8_C(102), UINT8_C( 95), UINT8_C(144), UINT8_C( 12), UINT8_C( 52), UINT8_C( 36), UINT8_C( 87), UINT8_C( 93), UINT8_C( 55), UINT8_C( 68), UINT8_C(113), UINT8_C( 27), UINT8_C(111), UINT8_C(201), UINT8_C(130), UINT8_C( 33), UINT8_C(210), UINT8_C( 36), UINT8_C( 59), UINT8_C(255), UINT8_C( 46), UINT8_C( 49), UINT8_C( 22), UINT8_C(148), UINT8_C(144), UINT8_C(146), UINT8_C(245), UINT8_C( 41), UINT8_C(198), UINT8_C(231)), UINT64_C(381961832516783403) }, { simde_x_mm512_set_epu8(UINT8_C(235), UINT8_C( 63), UINT8_C(229), UINT8_C(216), UINT8_C( 37), UINT8_C(222), UINT8_C( 30), UINT8_C( 44), UINT8_C(158), UINT8_C(214), UINT8_C(245), UINT8_C( 16), UINT8_C( 13), UINT8_C( 12), UINT8_C(141), UINT8_C(114), UINT8_C(211), UINT8_C( 56), UINT8_C(106), UINT8_C( 90), UINT8_C( 25), UINT8_C( 48), UINT8_C(151), UINT8_C( 2), UINT8_C(239), UINT8_C(116), UINT8_C( 99), UINT8_C( 74), UINT8_C(147), UINT8_C( 81), UINT8_C(180), UINT8_C(169), UINT8_C(183), UINT8_C(247), UINT8_C(193), UINT8_C( 12), UINT8_C(147), UINT8_C(164), UINT8_C(194), UINT8_C(196), UINT8_C( 62), UINT8_C(138), UINT8_C(220), UINT8_C(182), UINT8_C(122), UINT8_C( 2), UINT8_C(203), UINT8_C( 14), UINT8_C( 82), UINT8_C(208), UINT8_C(210), UINT8_C( 59), UINT8_C( 36), UINT8_C(151), UINT8_C( 18), UINT8_C( 92), UINT8_C(134), UINT8_C(208), UINT8_C( 95), UINT8_C( 59), UINT8_C(250), UINT8_C( 9), UINT8_C(199), UINT8_C(157)), simde_x_mm512_set_epu8(UINT8_C(138), UINT8_C(192), UINT8_C( 19), UINT8_C( 45), UINT8_C( 2), UINT8_C( 30), UINT8_C(159), UINT8_C(195), UINT8_C( 92), UINT8_C(244), UINT8_C(218), UINT8_C(152), UINT8_C(238), UINT8_C(209), UINT8_C( 95), UINT8_C( 38), UINT8_C( 76), UINT8_C( 25), UINT8_C(198), UINT8_C( 22), UINT8_C(237), UINT8_C( 18), UINT8_C(148), UINT8_C(223), UINT8_C( 99), UINT8_C(223), UINT8_C(112), UINT8_C(190), UINT8_C( 45), UINT8_C( 13), UINT8_C( 32), UINT8_C(140), UINT8_C(249), UINT8_C(183), UINT8_C(177), UINT8_C( 27), UINT8_C( 27), UINT8_C(174), UINT8_C(110), UINT8_C( 62), UINT8_C(140), UINT8_C(195), UINT8_C(157), UINT8_C(183), UINT8_C( 81), UINT8_C( 92), UINT8_C( 28), UINT8_C( 99), UINT8_C(115), UINT8_C( 41), UINT8_C(207), UINT8_C(220), UINT8_C(170), UINT8_C( 70), UINT8_C(196), UINT8_C( 53), UINT8_C( 45), UINT8_C(254), UINT8_C(177), UINT8_C(198), UINT8_C( 26), UINT8_C(251), UINT8_C(201), UINT8_C(125)), UINT64_C(4853800061908654710) }, { simde_x_mm512_set_epu8(UINT8_C( 39), UINT8_C(111), UINT8_C( 79), UINT8_C( 64), UINT8_C(165), UINT8_C( 55), UINT8_C( 15), UINT8_C(251), UINT8_C(155), UINT8_C(226), UINT8_C(102), UINT8_C( 27), UINT8_C(173), UINT8_C(169), UINT8_C(205), UINT8_C( 72), UINT8_C(187), UINT8_C(157), UINT8_C(143), UINT8_C( 48), UINT8_C(145), UINT8_C(133), UINT8_C(221), UINT8_C(157), UINT8_C(112), UINT8_C(228), UINT8_C(192), UINT8_C(246), UINT8_C( 19), UINT8_C(154), UINT8_C(151), UINT8_C( 43), UINT8_C( 18), UINT8_C( 33), UINT8_C(196), UINT8_C( 32), UINT8_C(143), UINT8_C(197), UINT8_C( 77), UINT8_C( 35), UINT8_C(108), UINT8_C( 21), UINT8_C(240), UINT8_C( 47), UINT8_C( 44), UINT8_C(206), UINT8_C(103), UINT8_C(244), UINT8_C(131), UINT8_C(178), UINT8_C(145), UINT8_C( 4), UINT8_C(241), UINT8_C( 13), UINT8_C(185), UINT8_C( 72), UINT8_C(226), UINT8_C( 93), UINT8_C(116), UINT8_C( 66), UINT8_C(223), UINT8_C(186), UINT8_C(235), UINT8_C( 57)), simde_x_mm512_set_epu8(UINT8_C(137), UINT8_C( 6), UINT8_C( 38), UINT8_C(185), UINT8_C(182), UINT8_C(134), UINT8_C( 54), UINT8_C( 21), UINT8_C(101), UINT8_C( 51), UINT8_C(169), UINT8_C(214), UINT8_C(204), UINT8_C( 43), UINT8_C( 15), UINT8_C(161), UINT8_C(123), UINT8_C(201), UINT8_C(126), UINT8_C( 21), UINT8_C(181), UINT8_C(113), UINT8_C(224), UINT8_C(235), UINT8_C(112), UINT8_C( 81), UINT8_C( 97), UINT8_C(103), UINT8_C( 63), UINT8_C( 88), UINT8_C(251), UINT8_C( 83), UINT8_C( 37), UINT8_C( 52), UINT8_C(172), UINT8_C( 10), UINT8_C(101), UINT8_C(146), UINT8_C(200), UINT8_C(167), UINT8_C( 91), UINT8_C(136), UINT8_C(165), UINT8_C( 10), UINT8_C( 46), UINT8_C(208), UINT8_C(176), UINT8_C(155), UINT8_C(102), UINT8_C(231), UINT8_C( 88), UINT8_C(169), UINT8_C( 36), UINT8_C(196), UINT8_C(172), UINT8_C(187), UINT8_C(198), UINT8_C(153), UINT8_C(113), UINT8_C( 47), UINT8_C(217), UINT8_C(143), UINT8_C(254), UINT8_C( 59)), UINT64_C(11401226995314349379) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask64 r = simde_mm512_cmple_epu8_mask(test_vec[i].a, test_vec[i].b); simde_assert_equal_mmask64(r, test_vec[i].r); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cmple_epi8_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cmple_epu8_mask) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/cmplt.c000066400000000000000000001635561400333146700165570ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #define SIMDE_TEST_X86_AVX512_INSN cmplt #include #include #include static int test_simde_mm512_cmplt_epi8_mask(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__mmask64 r; } test_vec[8] = { { simde_mm512_set_epi8(INT8_C( -77), INT8_C( -25), INT8_C( -46), INT8_C( 46), INT8_C( 13), INT8_C( 4), INT8_C( -85), INT8_C( -85), INT8_C( -84), INT8_C( -54), INT8_C( 24), INT8_C( 27), INT8_C(-115), INT8_C( 32), INT8_C( -88), INT8_C( -66), INT8_C( 113), INT8_C( -86), INT8_C( 24), INT8_C( 39), INT8_C( -74), INT8_C( 61), INT8_C( 110), INT8_C( 70), INT8_C( 64), INT8_C( 85), INT8_C(-108), INT8_C( -7), INT8_C(-115), INT8_C( -79), INT8_C( -97), INT8_C( -25), INT8_C( 108), INT8_C( -91), INT8_C( 98), INT8_C( 90), INT8_C( -86), INT8_C(-115), INT8_C( 85), INT8_C( -77), INT8_C( -94), INT8_C( -97), INT8_C( 118), INT8_C( -72), INT8_C( -59), INT8_C( 1), INT8_C( 5), INT8_C( 40), INT8_C( -51), INT8_C(-106), INT8_C( -27), INT8_C(-101), INT8_C( 27), INT8_C( -43), INT8_C( -67), INT8_C(-118), INT8_C( 9), INT8_C( -56), INT8_C(-102), INT8_C( -49), INT8_C( -36), INT8_C( 56), INT8_C( 85), INT8_C( 82)), simde_mm512_set_epi8(INT8_C( 15), INT8_C( -44), INT8_C( -4), INT8_C( 65), INT8_C( 115), INT8_C( 75), INT8_C(-128), INT8_C( -29), INT8_C( -41), INT8_C( -89), INT8_C( -75), INT8_C( -12), INT8_C( 8), INT8_C( -18), INT8_C( 0), INT8_C( 50), INT8_C( -20), INT8_C( 66), INT8_C( 59), INT8_C( 42), INT8_C( 112), INT8_C(-128), INT8_C( 83), INT8_C( 7), INT8_C( 66), INT8_C( -29), INT8_C( -70), INT8_C( 42), INT8_C(-100), INT8_C( 85), INT8_C( -81), INT8_C( -93), INT8_C( 9), INT8_C( -66), INT8_C( -12), INT8_C( 91), INT8_C( 12), INT8_C( 45), INT8_C( 127), INT8_C(-123), INT8_C( -53), INT8_C( 78), INT8_C( 39), INT8_C(-107), INT8_C( 4), INT8_C( -32), INT8_C( -36), INT8_C( -2), INT8_C( -88), INT8_C( 39), INT8_C( -66), INT8_C( -14), INT8_C( -93), INT8_C( -77), INT8_C( -69), INT8_C( 8), INT8_C(-103), INT8_C( 105), INT8_C( 72), INT8_C( -8), INT8_C( 49), INT8_C(-104), INT8_C( 40), INT8_C( 17)), UINT64_C(13658143053960466808) }, { simde_mm512_set_epi8(INT8_C(-103), INT8_C( 71), INT8_C( 97), INT8_C( 13), INT8_C( -23), INT8_C(-103), INT8_C(-115), INT8_C( 49), INT8_C( 20), INT8_C( -2), INT8_C( 49), INT8_C( 20), INT8_C( -48), INT8_C( 19), INT8_C( 36), INT8_C( 5), INT8_C( 110), INT8_C( -17), INT8_C( -78), INT8_C( -81), INT8_C( -87), INT8_C( -42), INT8_C( -90), INT8_C( -74), INT8_C( -81), INT8_C( -59), INT8_C( -73), INT8_C( 101), INT8_C( -93), INT8_C( 16), INT8_C( 54), INT8_C(-126), INT8_C( 73), INT8_C(-113), INT8_C( -63), INT8_C( 0), INT8_C( -4), INT8_C( -98), INT8_C( 69), INT8_C( -82), INT8_C(-103), INT8_C(-100), INT8_C( -26), INT8_C( 6), INT8_C( -99), INT8_C( 22), INT8_C( 1), INT8_C(-119), INT8_C( 62), INT8_C( -91), INT8_C( 126), INT8_C( -42), INT8_C( -88), INT8_C( -10), INT8_C( 66), INT8_C(-108), INT8_C( 4), INT8_C( -81), INT8_C(-101), INT8_C( -87), INT8_C( -36), INT8_C( 114), INT8_C( 105), INT8_C( 70)), simde_mm512_set_epi8(INT8_C(-106), INT8_C( 118), INT8_C(-104), INT8_C( 91), INT8_C( 22), INT8_C( -65), INT8_C( 99), INT8_C( 90), INT8_C( -59), INT8_C( 121), INT8_C( 74), INT8_C( 82), INT8_C( 17), INT8_C( -59), INT8_C( 19), INT8_C(-103), INT8_C( 84), INT8_C( 62), INT8_C( 105), INT8_C( -37), INT8_C( -29), INT8_C(-118), INT8_C( 43), INT8_C( 43), INT8_C( 33), INT8_C(-102), INT8_C( -5), INT8_C( -51), INT8_C( -22), INT8_C( 3), INT8_C( 47), INT8_C( -50), INT8_C( -25), INT8_C( 25), INT8_C( -33), INT8_C( -68), INT8_C( 52), INT8_C( -60), INT8_C( 19), INT8_C(-103), INT8_C( 71), INT8_C( 88), INT8_C( 82), INT8_C( -50), INT8_C( -90), INT8_C( -3), INT8_C( -25), INT8_C( 35), INT8_C(-100), INT8_C( -67), INT8_C( 46), INT8_C( 77), INT8_C( -70), INT8_C( 1), INT8_C( 82), INT8_C( 114), INT8_C( -55), INT8_C( 11), INT8_C( -95), INT8_C( 111), INT8_C( 48), INT8_C( 71), INT8_C( 90), INT8_C( -66)), UINT64_C(6879384398415355768) }, { simde_mm512_set_epi8(INT8_C( -71), INT8_C( 111), INT8_C( -21), INT8_C( 61), INT8_C( 5), INT8_C( 96), INT8_C( 81), INT8_C( 9), INT8_C(-121), INT8_C( 39), INT8_C( -77), INT8_C( 111), INT8_C( -66), INT8_C( 56), INT8_C( -30), INT8_C( -47), INT8_C( 60), INT8_C( -59), INT8_C( 45), INT8_C( 86), INT8_C( -57), INT8_C( -53), INT8_C( 106), INT8_C( 23), INT8_C( -11), INT8_C( 82), INT8_C( 92), INT8_C( -19), INT8_C( 64), INT8_C( 103), INT8_C( -89), INT8_C( -98), INT8_C( -33), INT8_C( 99), INT8_C( 24), INT8_C( 46), INT8_C( 12), INT8_C( -4), INT8_C( -89), INT8_C( 107), INT8_C( -35), INT8_C( 71), INT8_C( 43), INT8_C( 111), INT8_C( -31), INT8_C( -90), INT8_C( -5), INT8_C( 22), INT8_C( 27), INT8_C( -27), INT8_C( -44), INT8_C( 119), INT8_C( -73), INT8_C( 55), INT8_C(-108), INT8_C( -98), INT8_C( 121), INT8_C(-114), INT8_C( 51), INT8_C( 28), INT8_C( 69), INT8_C( 37), INT8_C( 17), INT8_C( -56)), simde_mm512_set_epi8(INT8_C( -65), INT8_C( 52), INT8_C( -9), INT8_C( 14), INT8_C( 23), INT8_C(-117), INT8_C( 110), INT8_C( 60), INT8_C( 81), INT8_C(-125), INT8_C( 120), INT8_C( 71), INT8_C( 100), INT8_C( -41), INT8_C( 116), INT8_C(-126), INT8_C(-101), INT8_C( 34), INT8_C( -39), INT8_C( 84), INT8_C( -70), INT8_C(-112), INT8_C( -6), INT8_C( -59), INT8_C( 82), INT8_C( 94), INT8_C( 76), INT8_C( 114), INT8_C( 127), INT8_C( -86), INT8_C( -89), INT8_C( 93), INT8_C( 1), INT8_C( 119), INT8_C( -87), INT8_C( -11), INT8_C( -62), INT8_C( -56), INT8_C( -72), INT8_C( -84), INT8_C( 46), INT8_C( 34), INT8_C( -72), INT8_C(-127), INT8_C( 99), INT8_C( 102), INT8_C( 60), INT8_C( 57), INT8_C( -62), INT8_C( -50), INT8_C( -30), INT8_C( 99), INT8_C( -14), INT8_C( 93), INT8_C( -12), INT8_C(-120), INT8_C( 126), INT8_C( 7), INT8_C( 84), INT8_C( 10), INT8_C( -53), INT8_C( 100), INT8_C( 113), INT8_C( -25)), UINT64_C(12369770630542733031) }, { simde_mm512_set_epi8(INT8_C( 56), INT8_C( -4), INT8_C( -4), INT8_C( -38), INT8_C( 25), INT8_C( 103), INT8_C( -62), INT8_C( 92), INT8_C( 31), INT8_C( -43), INT8_C( -18), INT8_C( 90), INT8_C( -11), INT8_C( 80), INT8_C(-102), INT8_C( -32), INT8_C( -48), INT8_C( -4), INT8_C(-124), INT8_C(-111), INT8_C( 122), INT8_C( 50), INT8_C( 20), INT8_C( 73), INT8_C( 10), INT8_C( -61), INT8_C( 30), INT8_C( 39), INT8_C( 47), INT8_C( -1), INT8_C( -24), INT8_C(-123), INT8_C( 91), INT8_C( 9), INT8_C( -27), INT8_C( 35), INT8_C( 0), INT8_C( -62), INT8_C( -75), INT8_C( 104), INT8_C( 44), INT8_C( 16), INT8_C(-126), INT8_C( 97), INT8_C( -20), INT8_C( -56), INT8_C( -70), INT8_C( -25), INT8_C( 96), INT8_C( 112), INT8_C( 102), INT8_C( 2), INT8_C( -70), INT8_C( -47), INT8_C( 77), INT8_C( -51), INT8_C( -63), INT8_C( 15), INT8_C( -89), INT8_C( -32), INT8_C( 72), INT8_C( 102), INT8_C( -45), INT8_C( 80)), simde_mm512_set_epi8(INT8_C( 46), INT8_C( -69), INT8_C( -3), INT8_C( 7), INT8_C( -53), INT8_C( -75), INT8_C(-113), INT8_C( 66), INT8_C(-117), INT8_C( -4), INT8_C( -75), INT8_C(-100), INT8_C( -10), INT8_C(-123), INT8_C( 108), INT8_C( 99), INT8_C( 34), INT8_C( 28), INT8_C( 20), INT8_C( -3), INT8_C( -41), INT8_C( 37), INT8_C( 126), INT8_C( 37), INT8_C( 86), INT8_C(-126), INT8_C( 57), INT8_C( -79), INT8_C(-102), INT8_C( -22), INT8_C( -65), INT8_C( -86), INT8_C( -13), INT8_C( 63), INT8_C( -35), INT8_C( 12), INT8_C( 52), INT8_C( 115), INT8_C( 54), INT8_C(-115), INT8_C( 85), INT8_C(-123), INT8_C( -3), INT8_C( -67), INT8_C( 75), INT8_C( -64), INT8_C( 52), INT8_C( 126), INT8_C( 62), INT8_C( -88), INT8_C( 10), INT8_C( 75), INT8_C( -91), INT8_C( 62), INT8_C( 97), INT8_C( 54), INT8_C( -80), INT8_C( -98), INT8_C( -77), INT8_C( 80), INT8_C( 14), INT8_C( 105), INT8_C( -43), INT8_C( 19)), UINT64_C(3480141911697332022) }, { simde_mm512_set_epi8(INT8_C( 30), INT8_C( 13), INT8_C( 72), INT8_C( 124), INT8_C( 6), INT8_C( -85), INT8_C( -61), INT8_C( -45), INT8_C(-117), INT8_C( 64), INT8_C(-110), INT8_C( -2), INT8_C( 83), INT8_C( 64), INT8_C( 94), INT8_C( 33), INT8_C( 87), INT8_C( -89), INT8_C( -85), INT8_C( -82), INT8_C( 61), INT8_C( -90), INT8_C( 27), INT8_C(-115), INT8_C( -84), INT8_C( -79), INT8_C( -56), INT8_C( 66), INT8_C( 57), INT8_C( 48), INT8_C( 34), INT8_C( 90), INT8_C( 51), INT8_C( 19), INT8_C( 105), INT8_C( -57), INT8_C( -67), INT8_C(-104), INT8_C( 1), INT8_C(-128), INT8_C( 95), INT8_C( 94), INT8_C( -45), INT8_C( -86), INT8_C( 116), INT8_C( 95), INT8_C( 64), INT8_C(-106), INT8_C( 32), INT8_C( -60), INT8_C(-105), INT8_C( 23), INT8_C( 115), INT8_C( -71), INT8_C( -22), INT8_C( -60), INT8_C( -51), INT8_C( 42), INT8_C( -96), INT8_C( -3), INT8_C( 39), INT8_C( -17), INT8_C( 55), INT8_C(-100)), simde_mm512_set_epi8(INT8_C( 33), INT8_C( -73), INT8_C( 56), INT8_C(-105), INT8_C( 103), INT8_C(-109), INT8_C( 18), INT8_C( -30), INT8_C( 97), INT8_C( 18), INT8_C( 119), INT8_C( -24), INT8_C( 104), INT8_C( 64), INT8_C( -85), INT8_C( -21), INT8_C( 18), INT8_C(-115), INT8_C( 98), INT8_C( 20), INT8_C( 51), INT8_C( 30), INT8_C( -90), INT8_C( -24), INT8_C( -99), INT8_C( -91), INT8_C( 11), INT8_C( -84), INT8_C( 56), INT8_C( 98), INT8_C( -91), INT8_C( -93), INT8_C( 0), INT8_C( 119), INT8_C( 113), INT8_C( 107), INT8_C( 103), INT8_C( 96), INT8_C( -68), INT8_C(-127), INT8_C( -84), INT8_C( 51), INT8_C( 103), INT8_C( 56), INT8_C( -12), INT8_C( 53), INT8_C( 32), INT8_C(-117), INT8_C( -90), INT8_C( -10), INT8_C( 29), INT8_C( 115), INT8_C( 127), INT8_C( 10), INT8_C( -23), INT8_C( 108), INT8_C( 92), INT8_C( -2), INT8_C( -94), INT8_C( 83), INT8_C( -55), INT8_C( 115), INT8_C( -67), INT8_C( -25)), UINT64_C(10063351798194798005) }, { simde_mm512_set_epi8(INT8_C( 30), INT8_C( -62), INT8_C( -2), INT8_C( 110), INT8_C( -99), INT8_C( 0), INT8_C( 114), INT8_C(-101), INT8_C( -98), INT8_C(-101), INT8_C( 110), INT8_C( 127), INT8_C( -57), INT8_C( 112), INT8_C( 1), INT8_C( -68), INT8_C( -53), INT8_C( 40), INT8_C( 60), INT8_C( -7), INT8_C( 119), INT8_C( -84), INT8_C( 59), INT8_C( 41), INT8_C( 94), INT8_C( 56), INT8_C( -73), INT8_C(-113), INT8_C(-101), INT8_C( 70), INT8_C( -5), INT8_C(-102), INT8_C( -24), INT8_C( -88), INT8_C( -82), INT8_C( -98), INT8_C( 103), INT8_C( 114), INT8_C( -24), INT8_C( -1), INT8_C( 33), INT8_C( -48), INT8_C( 56), INT8_C( -37), INT8_C( -82), INT8_C( 126), INT8_C( -6), INT8_C( 117), INT8_C(-112), INT8_C( -39), INT8_C( 59), INT8_C( 25), INT8_C( -19), INT8_C( 35), INT8_C( -12), INT8_C( -23), INT8_C(-128), INT8_C( -16), INT8_C( -33), INT8_C( -91), INT8_C( -40), INT8_C( -21), INT8_C( -38), INT8_C( 79)), simde_mm512_set_epi8(INT8_C( 42), INT8_C( 118), INT8_C( 39), INT8_C( 16), INT8_C( 45), INT8_C( -16), INT8_C( 38), INT8_C( 64), INT8_C( -47), INT8_C( 99), INT8_C( 10), INT8_C( -26), INT8_C( -31), INT8_C( -27), INT8_C( -3), INT8_C( 108), INT8_C( -96), INT8_C( -87), INT8_C( -84), INT8_C( 9), INT8_C( 24), INT8_C( 14), INT8_C( 123), INT8_C( -80), INT8_C( 1), INT8_C( 70), INT8_C( 95), INT8_C( 7), INT8_C( -79), INT8_C( -64), INT8_C( 81), INT8_C( -84), INT8_C(-123), INT8_C( -69), INT8_C( -73), INT8_C( 98), INT8_C( -88), INT8_C( 100), INT8_C( -60), INT8_C( 125), INT8_C( 62), INT8_C( -77), INT8_C( 32), INT8_C( 40), INT8_C( 68), INT8_C( 75), INT8_C( 112), INT8_C(-128), INT8_C( -18), INT8_C( -3), INT8_C( 2), INT8_C( 28), INT8_C( -88), INT8_C( 15), INT8_C( -42), INT8_C( -1), INT8_C( 10), INT8_C( -70), INT8_C( -90), INT8_C( -93), INT8_C( 65), INT8_C( 119), INT8_C( 107), INT8_C( 29)), UINT64_C(16846020600598811022) }, { simde_mm512_set_epi8(INT8_C( 21), INT8_C( 119), INT8_C( 54), INT8_C( -98), INT8_C(-127), INT8_C( 24), INT8_C( -58), INT8_C(-124), INT8_C( 58), INT8_C( 6), INT8_C( 90), INT8_C( -82), INT8_C( 81), INT8_C(-114), INT8_C( -76), INT8_C( -79), INT8_C(-107), INT8_C( 107), INT8_C( -44), INT8_C( 36), INT8_C( -3), INT8_C( -89), INT8_C( 118), INT8_C(-104), INT8_C( 90), INT8_C( 122), INT8_C( 4), INT8_C( 68), INT8_C( 34), INT8_C( 55), INT8_C( 65), INT8_C( 86), INT8_C( 74), INT8_C( -50), INT8_C(-117), INT8_C( 7), INT8_C( 11), INT8_C( -4), INT8_C( -3), INT8_C( 109), INT8_C( -44), INT8_C( -96), INT8_C( 98), INT8_C( 87), INT8_C( -59), INT8_C( 95), INT8_C( -16), INT8_C( -64), INT8_C( -50), INT8_C( -5), INT8_C( -97), INT8_C( -47), INT8_C( -88), INT8_C( 77), INT8_C( -27), INT8_C( -13), INT8_C( -76), INT8_C( -43), INT8_C( 104), INT8_C( 53), INT8_C( 4), INT8_C( -45), INT8_C( 81), INT8_C( 115)), simde_mm512_set_epi8(INT8_C( 58), INT8_C( 41), INT8_C( 85), INT8_C( -51), INT8_C( 1), INT8_C( 51), INT8_C( 56), INT8_C(-109), INT8_C( 109), INT8_C( 112), INT8_C( 72), INT8_C( -55), INT8_C( -35), INT8_C( -66), INT8_C( -30), INT8_C( -94), INT8_C( 71), INT8_C( 55), INT8_C( 100), INT8_C( 34), INT8_C( 17), INT8_C( 115), INT8_C( 127), INT8_C( 32), INT8_C( 101), INT8_C( 91), INT8_C( 97), INT8_C( -40), INT8_C( 45), INT8_C( -66), INT8_C( -66), INT8_C( 34), INT8_C( -55), INT8_C( 18), INT8_C( -74), INT8_C( -1), INT8_C( 33), INT8_C( -59), INT8_C( 16), INT8_C( -80), INT8_C( -4), INT8_C( 84), INT8_C( 30), INT8_C( -62), INT8_C(-115), INT8_C( 37), INT8_C( 41), INT8_C( 57), INT8_C( -41), INT8_C( -67), INT8_C( -77), INT8_C( 48), INT8_C( 113), INT8_C( 84), INT8_C( 44), INT8_C( 98), INT8_C( -38), INT8_C( 2), INT8_C(-122), INT8_C( -26), INT8_C( 77), INT8_C( 90), INT8_C( 30), INT8_C( 127)), UINT64_C(13823429244140896205) }, { simde_mm512_set_epi8(INT8_C( -22), INT8_C( 124), INT8_C( 34), INT8_C( -16), INT8_C( 8), INT8_C( 122), INT8_C( 46), INT8_C(-121), INT8_C( 29), INT8_C( 20), INT8_C( -3), INT8_C( -93), INT8_C( 62), INT8_C( 115), INT8_C( 28), INT8_C( -90), INT8_C( 27), INT8_C( 21), INT8_C( -60), INT8_C( 15), INT8_C( 72), INT8_C(-113), INT8_C( 5), INT8_C( -77), INT8_C( 42), INT8_C( -54), INT8_C( -42), INT8_C(-118), INT8_C( -22), INT8_C( 63), INT8_C( -17), INT8_C( -10), INT8_C( -67), INT8_C( 58), INT8_C( -23), INT8_C(-108), INT8_C(-119), INT8_C( 102), INT8_C( 88), INT8_C( 112), INT8_C(-126), INT8_C(-115), INT8_C( -3), INT8_C( -3), INT8_C( 115), INT8_C( -28), INT8_C( 73), INT8_C( 86), INT8_C( 27), INT8_C( -49), INT8_C( 40), INT8_C( 0), INT8_C( 84), INT8_C(-105), INT8_C( -76), INT8_C( -85), INT8_C( 88), INT8_C( 123), INT8_C( 42), INT8_C( 35), INT8_C( 67), INT8_C( -90), INT8_C( 86), INT8_C( -23)), simde_mm512_set_epi8(INT8_C(-115), INT8_C( -9), INT8_C( 73), INT8_C( 35), INT8_C(-126), INT8_C(-117), INT8_C( -35), INT8_C( 40), INT8_C(-119), INT8_C( -28), INT8_C( -11), INT8_C( -21), INT8_C( -21), INT8_C( 36), INT8_C( -54), INT8_C( -56), INT8_C( 78), INT8_C( 7), INT8_C( -48), INT8_C( -59), INT8_C( 121), INT8_C( 74), INT8_C( -58), INT8_C( 102), INT8_C( 91), INT8_C( 126), INT8_C( -48), INT8_C( 121), INT8_C( -54), INT8_C( 92), INT8_C( -18), INT8_C( 115), INT8_C( -82), INT8_C(-102), INT8_C( -44), INT8_C( 35), INT8_C( 20), INT8_C(-107), INT8_C( 72), INT8_C( -9), INT8_C( -37), INT8_C( -54), INT8_C( -20), INT8_C( -31), INT8_C( 67), INT8_C( 96), INT8_C( -94), INT8_C( -5), INT8_C( -22), INT8_C( 67), INT8_C( 22), INT8_C( -62), INT8_C( -72), INT8_C( -76), INT8_C( -32), INT8_C( 86), INT8_C( -8), INT8_C( 57), INT8_C( -71), INT8_C( -16), INT8_C(-124), INT8_C( -88), INT8_C( 49), INT8_C( 35)), UINT64_C(3535798313217705733) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask64 r = simde_mm512_cmplt_epi8_mask(test_vec[i].a, test_vec[i].b); simde_assert_equal_mmask64(r, test_vec[i].r); } return 0; } static int test_simde_mm512_cmplt_epu8_mask(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__mmask64 r; } test_vec[8] = { { simde_x_mm512_set_epu8(UINT8_C(200), UINT8_C( 64), UINT8_C(228), UINT8_C(187), UINT8_C( 53), UINT8_C(115), UINT8_C(212), UINT8_C(224), UINT8_C(234), UINT8_C( 45), UINT8_C(183), UINT8_C(185), UINT8_C(117), UINT8_C(137), UINT8_C(224), UINT8_C( 48), UINT8_C(225), UINT8_C(229), UINT8_C(194), UINT8_C(201), UINT8_C(105), UINT8_C(193), UINT8_C(219), UINT8_C(144), UINT8_C( 87), UINT8_C(205), UINT8_C( 38), UINT8_C(200), UINT8_C( 89), UINT8_C( 3), UINT8_C(155), UINT8_C(245), UINT8_C( 35), UINT8_C(246), UINT8_C( 15), UINT8_C(254), UINT8_C(226), UINT8_C(163), UINT8_C( 22), UINT8_C(135), UINT8_C(105), UINT8_C(246), UINT8_C(210), UINT8_C(152), UINT8_C(131), UINT8_C(226), UINT8_C(111), UINT8_C(207), UINT8_C( 48), UINT8_C(166), UINT8_C( 61), UINT8_C( 72), UINT8_C(115), UINT8_C( 10), UINT8_C(124), UINT8_C( 60), UINT8_C(127), UINT8_C( 50), UINT8_C( 32), UINT8_C( 65), UINT8_C(138), UINT8_C(206), UINT8_C( 67), UINT8_C( 10)), simde_x_mm512_set_epu8(UINT8_C(101), UINT8_C(156), UINT8_C(229), UINT8_C( 20), UINT8_C(123), UINT8_C(204), UINT8_C(216), UINT8_C( 73), UINT8_C(103), UINT8_C(232), UINT8_C(253), UINT8_C(122), UINT8_C( 63), UINT8_C(129), UINT8_C(179), UINT8_C(165), UINT8_C(219), UINT8_C( 23), UINT8_C( 44), UINT8_C(209), UINT8_C( 74), UINT8_C(150), UINT8_C(180), UINT8_C(217), UINT8_C( 86), UINT8_C(119), UINT8_C( 26), UINT8_C( 59), UINT8_C(214), UINT8_C( 93), UINT8_C( 27), UINT8_C( 37), UINT8_C( 61), UINT8_C( 47), UINT8_C(126), UINT8_C(138), UINT8_C(246), UINT8_C( 88), UINT8_C(146), UINT8_C(144), UINT8_C(216), UINT8_C( 36), UINT8_C(167), UINT8_C(247), UINT8_C(118), UINT8_C( 82), UINT8_C( 16), UINT8_C(237), UINT8_C(182), UINT8_C(219), UINT8_C( 49), UINT8_C( 46), UINT8_C(225), UINT8_C( 22), UINT8_C(189), UINT8_C( 65), UINT8_C(101), UINT8_C(226), UINT8_C( 23), UINT8_C(220), UINT8_C( 28), UINT8_C( 93), UINT8_C(158), UINT8_C( 8)), UINT64_C(7953657163028746066) }, { simde_x_mm512_set_epu8(UINT8_C( 72), UINT8_C( 33), UINT8_C(154), UINT8_C(122), UINT8_C( 74), UINT8_C(178), UINT8_C( 50), UINT8_C(213), UINT8_C( 98), UINT8_C(217), UINT8_C( 79), UINT8_C(232), UINT8_C(132), UINT8_C(243), UINT8_C(145), UINT8_C(149), UINT8_C( 59), UINT8_C(222), UINT8_C(125), UINT8_C(147), UINT8_C(164), UINT8_C(104), UINT8_C(229), UINT8_C(111), UINT8_C( 98), UINT8_C(138), UINT8_C(193), UINT8_C(124), UINT8_C( 63), UINT8_C(242), UINT8_C( 1), UINT8_C( 40), UINT8_C(135), UINT8_C(187), UINT8_C(113), UINT8_C(230), UINT8_C(208), UINT8_C(113), UINT8_C(106), UINT8_C( 33), UINT8_C(173), UINT8_C( 82), UINT8_C( 93), UINT8_C(149), UINT8_C( 4), UINT8_C(122), UINT8_C( 61), UINT8_C( 54), UINT8_C(106), UINT8_C(212), UINT8_C( 67), UINT8_C(253), UINT8_C(216), UINT8_C(134), UINT8_C(207), UINT8_C( 61), UINT8_C(208), UINT8_C( 81), UINT8_C(150), UINT8_C(127), UINT8_C( 37), UINT8_C(137), UINT8_C(225), UINT8_C( 78)), simde_x_mm512_set_epu8(UINT8_C( 96), UINT8_C(106), UINT8_C(154), UINT8_C( 0), UINT8_C( 1), UINT8_C(122), UINT8_C(193), UINT8_C( 25), UINT8_C(239), UINT8_C(152), UINT8_C( 87), UINT8_C( 80), UINT8_C( 31), UINT8_C(133), UINT8_C(251), UINT8_C( 23), UINT8_C(140), UINT8_C(137), UINT8_C(157), UINT8_C(114), UINT8_C( 93), UINT8_C(199), UINT8_C( 63), UINT8_C( 63), UINT8_C( 7), UINT8_C(151), UINT8_C( 52), UINT8_C( 78), UINT8_C(106), UINT8_C( 19), UINT8_C( 61), UINT8_C( 80), UINT8_C(227), UINT8_C( 61), UINT8_C(244), UINT8_C( 16), UINT8_C(153), UINT8_C(241), UINT8_C(118), UINT8_C(250), UINT8_C(216), UINT8_C( 38), UINT8_C( 9), UINT8_C(176), UINT8_C( 88), UINT8_C(222), UINT8_C( 84), UINT8_C(243), UINT8_C( 6), UINT8_C( 75), UINT8_C( 1), UINT8_C(141), UINT8_C(140), UINT8_C( 75), UINT8_C(187), UINT8_C(128), UINT8_C(101), UINT8_C(169), UINT8_C(202), UINT8_C(205), UINT8_C(101), UINT8_C( 13), UINT8_C(249), UINT8_C(158)), UINT64_C(14024952834426863995) }, { simde_x_mm512_set_epu8(UINT8_C(118), UINT8_C(178), UINT8_C(224), UINT8_C( 82), UINT8_C( 86), UINT8_C(103), UINT8_C( 51), UINT8_C( 73), UINT8_C(131), UINT8_C(150), UINT8_C( 58), UINT8_C(120), UINT8_C( 23), UINT8_C(173), UINT8_C( 81), UINT8_C(210), UINT8_C(130), UINT8_C( 18), UINT8_C(188), UINT8_C( 71), UINT8_C( 31), UINT8_C(136), UINT8_C(151), UINT8_C(116), UINT8_C(141), UINT8_C( 84), UINT8_C( 91), UINT8_C( 42), UINT8_C( 78), UINT8_C(105), UINT8_C( 68), UINT8_C(247), UINT8_C(246), UINT8_C( 98), UINT8_C(236), UINT8_C(254), UINT8_C( 34), UINT8_C( 84), UINT8_C(115), UINT8_C(219), UINT8_C( 18), UINT8_C( 7), UINT8_C( 63), UINT8_C(118), UINT8_C( 52), UINT8_C( 47), UINT8_C(109), UINT8_C( 86), UINT8_C( 94), UINT8_C( 32), UINT8_C( 34), UINT8_C(216), UINT8_C(187), UINT8_C(179), UINT8_C( 76), UINT8_C(176), UINT8_C(254), UINT8_C(105), UINT8_C( 86), UINT8_C(220), UINT8_C( 7), UINT8_C( 77), UINT8_C( 8), UINT8_C(213)), simde_x_mm512_set_epu8(UINT8_C(139), UINT8_C( 33), UINT8_C( 28), UINT8_C(170), UINT8_C( 51), UINT8_C(116), UINT8_C(243), UINT8_C( 67), UINT8_C(171), UINT8_C( 22), UINT8_C( 12), UINT8_C( 38), UINT8_C(216), UINT8_C(230), UINT8_C(112), UINT8_C( 69), UINT8_C(229), UINT8_C(115), UINT8_C(110), UINT8_C(236), UINT8_C( 84), UINT8_C(115), UINT8_C(237), UINT8_C(235), UINT8_C( 57), UINT8_C(112), UINT8_C( 59), UINT8_C(169), UINT8_C(188), UINT8_C( 43), UINT8_C( 43), UINT8_C(171), UINT8_C(177), UINT8_C( 79), UINT8_C(216), UINT8_C( 10), UINT8_C(218), UINT8_C(219), UINT8_C(205), UINT8_C( 15), UINT8_C(248), UINT8_C( 98), UINT8_C( 68), UINT8_C( 51), UINT8_C( 2), UINT8_C( 76), UINT8_C(147), UINT8_C(206), UINT8_C(115), UINT8_C(128), UINT8_C(102), UINT8_C(116), UINT8_C( 28), UINT8_C( 5), UINT8_C( 73), UINT8_C(210), UINT8_C(179), UINT8_C(184), UINT8_C( 57), UINT8_C( 24), UINT8_C( 10), UINT8_C( 26), UINT8_C( 0), UINT8_C( 23)), UINT64_C(10848849723635786056) }, { simde_x_mm512_set_epu8(UINT8_C( 59), UINT8_C(122), UINT8_C( 87), UINT8_C(211), UINT8_C(197), UINT8_C(251), UINT8_C( 70), UINT8_C(103), UINT8_C( 31), UINT8_C(245), UINT8_C(135), UINT8_C( 65), UINT8_C(214), UINT8_C(158), UINT8_C(167), UINT8_C( 6), UINT8_C(130), UINT8_C( 46), UINT8_C( 33), UINT8_C( 54), UINT8_C( 11), UINT8_C(245), UINT8_C( 84), UINT8_C(244), UINT8_C(176), UINT8_C( 82), UINT8_C(238), UINT8_C(130), UINT8_C(209), UINT8_C(204), UINT8_C(248), UINT8_C( 0), UINT8_C(157), UINT8_C(108), UINT8_C(156), UINT8_C(156), UINT8_C(237), UINT8_C( 39), UINT8_C(113), UINT8_C(164), UINT8_C( 74), UINT8_C( 17), UINT8_C(157), UINT8_C(212), UINT8_C( 94), UINT8_C(225), UINT8_C(154), UINT8_C(241), UINT8_C(234), UINT8_C( 71), UINT8_C( 97), UINT8_C( 49), UINT8_C(235), UINT8_C( 52), UINT8_C(222), UINT8_C( 20), UINT8_C( 5), UINT8_C(231), UINT8_C(123), UINT8_C( 11), UINT8_C( 62), UINT8_C(215), UINT8_C(218), UINT8_C( 46)), simde_x_mm512_set_epu8(UINT8_C(124), UINT8_C(178), UINT8_C(245), UINT8_C(131), UINT8_C( 34), UINT8_C(224), UINT8_C( 87), UINT8_C(197), UINT8_C(182), UINT8_C(209), UINT8_C(166), UINT8_C( 5), UINT8_C(234), UINT8_C(185), UINT8_C(158), UINT8_C(144), UINT8_C( 14), UINT8_C(107), UINT8_C(220), UINT8_C( 70), UINT8_C(119), UINT8_C(160), UINT8_C( 31), UINT8_C(191), UINT8_C(230), UINT8_C(198), UINT8_C(152), UINT8_C( 33), UINT8_C( 22), UINT8_C( 95), UINT8_C(212), UINT8_C(255), UINT8_C(113), UINT8_C(254), UINT8_C( 2), UINT8_C(210), UINT8_C(195), UINT8_C(115), UINT8_C(106), UINT8_C(226), UINT8_C( 64), UINT8_C(138), UINT8_C( 67), UINT8_C( 75), UINT8_C( 11), UINT8_C(130), UINT8_C(117), UINT8_C( 51), UINT8_C(106), UINT8_C(104), UINT8_C( 34), UINT8_C(112), UINT8_C( 78), UINT8_C( 85), UINT8_C(189), UINT8_C( 13), UINT8_C(140), UINT8_C( 10), UINT8_C( 60), UINT8_C( 18), UINT8_C( 8), UINT8_C(179), UINT8_C( 57), UINT8_C(196)), UINT64_C(16405901789334885521) }, { simde_x_mm512_set_epu8(UINT8_C(133), UINT8_C( 51), UINT8_C(153), UINT8_C( 32), UINT8_C( 25), UINT8_C(207), UINT8_C( 4), UINT8_C( 40), UINT8_C( 26), UINT8_C( 76), UINT8_C( 93), UINT8_C( 5), UINT8_C(177), UINT8_C(180), UINT8_C(109), UINT8_C(128), UINT8_C(101), UINT8_C( 26), UINT8_C(223), UINT8_C( 68), UINT8_C( 88), UINT8_C( 3), UINT8_C( 1), UINT8_C(148), UINT8_C( 0), UINT8_C(113), UINT8_C( 62), UINT8_C(107), UINT8_C(163), UINT8_C(164), UINT8_C(165), UINT8_C(185), UINT8_C(198), UINT8_C( 6), UINT8_C(199), UINT8_C(198), UINT8_C(207), UINT8_C(139), UINT8_C( 4), UINT8_C( 52), UINT8_C( 26), UINT8_C(160), UINT8_C(162), UINT8_C(224), UINT8_C( 24), UINT8_C(137), UINT8_C(101), UINT8_C( 24), UINT8_C(115), UINT8_C(224), UINT8_C(208), UINT8_C( 54), UINT8_C(102), UINT8_C( 97), UINT8_C(207), UINT8_C( 72), UINT8_C( 87), UINT8_C( 19), UINT8_C(168), UINT8_C(205), UINT8_C( 92), UINT8_C( 79), UINT8_C( 86), UINT8_C(144)), simde_x_mm512_set_epu8(UINT8_C( 76), UINT8_C(120), UINT8_C(206), UINT8_C( 35), UINT8_C( 84), UINT8_C(143), UINT8_C(212), UINT8_C( 97), UINT8_C(238), UINT8_C(159), UINT8_C(181), UINT8_C(100), UINT8_C(208), UINT8_C(157), UINT8_C( 32), UINT8_C(247), UINT8_C( 15), UINT8_C(143), UINT8_C( 2), UINT8_C(229), UINT8_C( 17), UINT8_C( 50), UINT8_C( 1), UINT8_C(241), UINT8_C( 90), UINT8_C(132), UINT8_C( 93), UINT8_C( 20), UINT8_C( 46), UINT8_C(183), UINT8_C(154), UINT8_C(255), UINT8_C(118), UINT8_C(135), UINT8_C( 8), UINT8_C(254), UINT8_C(139), UINT8_C(221), UINT8_C(207), UINT8_C(230), UINT8_C(231), UINT8_C( 92), UINT8_C(100), UINT8_C(108), UINT8_C(158), UINT8_C(233), UINT8_C( 8), UINT8_C(234), UINT8_C(189), UINT8_C(236), UINT8_C( 58), UINT8_C(205), UINT8_C(125), UINT8_C(116), UINT8_C(230), UINT8_C(218), UINT8_C(185), UINT8_C(225), UINT8_C( 61), UINT8_C(183), UINT8_C(233), UINT8_C(244), UINT8_C(138), UINT8_C(204)), UINT64_C(8933265779370876879) }, { simde_x_mm512_set_epu8(UINT8_C( 74), UINT8_C( 8), UINT8_C( 71), UINT8_C( 14), UINT8_C(239), UINT8_C(140), UINT8_C( 39), UINT8_C( 68), UINT8_C( 18), UINT8_C(182), UINT8_C(128), UINT8_C(142), UINT8_C( 75), UINT8_C(196), UINT8_C(121), UINT8_C(239), UINT8_C( 67), UINT8_C(139), UINT8_C( 89), UINT8_C( 42), UINT8_C(150), UINT8_C(200), UINT8_C( 22), UINT8_C( 70), UINT8_C( 92), UINT8_C(114), UINT8_C( 0), UINT8_C(232), UINT8_C(121), UINT8_C(124), UINT8_C(100), UINT8_C(100), UINT8_C(142), UINT8_C( 19), UINT8_C(218), UINT8_C(104), UINT8_C(159), UINT8_C(120), UINT8_C(122), UINT8_C( 55), UINT8_C(213), UINT8_C(170), UINT8_C(221), UINT8_C(149), UINT8_C(230), UINT8_C(250), UINT8_C(104), UINT8_C( 36), UINT8_C( 99), UINT8_C( 18), UINT8_C(124), UINT8_C(175), UINT8_C(103), UINT8_C(186), UINT8_C(205), UINT8_C( 43), UINT8_C(141), UINT8_C(148), UINT8_C(140), UINT8_C( 44), UINT8_C(237), UINT8_C(120), UINT8_C(114), UINT8_C(100)), simde_x_mm512_set_epu8(UINT8_C(124), UINT8_C(149), UINT8_C( 71), UINT8_C(212), UINT8_C(137), UINT8_C(252), UINT8_C(249), UINT8_C( 42), UINT8_C(167), UINT8_C(191), UINT8_C(236), UINT8_C(252), UINT8_C( 26), UINT8_C( 50), UINT8_C( 98), UINT8_C(162), UINT8_C( 91), UINT8_C(215), UINT8_C( 44), UINT8_C( 48), UINT8_C( 41), UINT8_C(167), UINT8_C( 25), UINT8_C( 39), UINT8_C(183), UINT8_C(181), UINT8_C(250), UINT8_C( 47), UINT8_C( 5), UINT8_C(113), UINT8_C( 48), UINT8_C(195), UINT8_C(111), UINT8_C( 46), UINT8_C( 74), UINT8_C( 84), UINT8_C(145), UINT8_C( 27), UINT8_C(231), UINT8_C(119), UINT8_C( 33), UINT8_C(230), UINT8_C( 22), UINT8_C( 69), UINT8_C( 48), UINT8_C( 7), UINT8_C( 45), UINT8_C(104), UINT8_C( 71), UINT8_C( 82), UINT8_C(107), UINT8_C( 14), UINT8_C( 73), UINT8_C(202), UINT8_C( 78), UINT8_C(132), UINT8_C( 67), UINT8_C( 79), UINT8_C(233), UINT8_C(140), UINT8_C(133), UINT8_C( 99), UINT8_C(202), UINT8_C( 75)), UINT64_C(15488110983464961330) }, { simde_x_mm512_set_epu8(UINT8_C( 78), UINT8_C( 83), UINT8_C(217), UINT8_C( 23), UINT8_C(204), UINT8_C( 27), UINT8_C( 84), UINT8_C( 42), UINT8_C(170), UINT8_C( 43), UINT8_C(212), UINT8_C(144), UINT8_C( 56), UINT8_C(177), UINT8_C(191), UINT8_C(215), UINT8_C( 39), UINT8_C( 30), UINT8_C( 2), UINT8_C(234), UINT8_C( 49), UINT8_C(151), UINT8_C(136), UINT8_C(175), UINT8_C(252), UINT8_C(162), UINT8_C(152), UINT8_C(153), UINT8_C(239), UINT8_C(231), UINT8_C(133), UINT8_C(178), UINT8_C(148), UINT8_C( 35), UINT8_C(158), UINT8_C(129), UINT8_C( 19), UINT8_C(213), UINT8_C( 89), UINT8_C(159), UINT8_C(156), UINT8_C( 31), UINT8_C(228), UINT8_C(142), UINT8_C( 99), UINT8_C( 45), UINT8_C(244), UINT8_C(239), UINT8_C( 20), UINT8_C( 92), UINT8_C(183), UINT8_C( 74), UINT8_C(105), UINT8_C(182), UINT8_C(238), UINT8_C( 27), UINT8_C(161), UINT8_C(150), UINT8_C(240), UINT8_C( 67), UINT8_C( 60), UINT8_C(157), UINT8_C( 26), UINT8_C( 30)), simde_x_mm512_set_epu8(UINT8_C(112), UINT8_C(252), UINT8_C(254), UINT8_C(234), UINT8_C(115), UINT8_C(252), UINT8_C(144), UINT8_C(157), UINT8_C(106), UINT8_C(131), UINT8_C(237), UINT8_C( 28), UINT8_C( 29), UINT8_C( 85), UINT8_C( 8), UINT8_C(128), UINT8_C(244), UINT8_C(127), UINT8_C(116), UINT8_C( 60), UINT8_C( 88), UINT8_C(104), UINT8_C(162), UINT8_C(203), UINT8_C(144), UINT8_C( 38), UINT8_C(193), UINT8_C(181), UINT8_C(155), UINT8_C( 59), UINT8_C( 61), UINT8_C( 4), UINT8_C( 63), UINT8_C(240), UINT8_C( 88), UINT8_C( 14), UINT8_C( 73), UINT8_C(125), UINT8_C(224), UINT8_C(117), UINT8_C(118), UINT8_C(109), UINT8_C( 68), UINT8_C( 42), UINT8_C(150), UINT8_C( 79), UINT8_C(167), UINT8_C( 25), UINT8_C( 58), UINT8_C(250), UINT8_C(130), UINT8_C(160), UINT8_C( 75), UINT8_C(145), UINT8_C(152), UINT8_C(149), UINT8_C(134), UINT8_C(252), UINT8_C( 13), UINT8_C(165), UINT8_C(218), UINT8_C( 88), UINT8_C( 59), UINT8_C(228)), UINT64_C(17825505917769929051) }, { simde_x_mm512_set_epu8(UINT8_C( 80), UINT8_C(151), UINT8_C(216), UINT8_C(130), UINT8_C(149), UINT8_C(124), UINT8_C( 37), UINT8_C( 84), UINT8_C(103), UINT8_C( 99), UINT8_C(115), UINT8_C(151), UINT8_C(233), UINT8_C(197), UINT8_C(132), UINT8_C(158), UINT8_C( 23), UINT8_C( 54), UINT8_C(164), UINT8_C(107), UINT8_C(233), UINT8_C(122), UINT8_C( 62), UINT8_C( 22), UINT8_C(179), UINT8_C( 56), UINT8_C(117), UINT8_C(196), UINT8_C(102), UINT8_C( 82), UINT8_C( 6), UINT8_C(242), UINT8_C(100), UINT8_C(238), UINT8_C(103), UINT8_C( 83), UINT8_C(139), UINT8_C(142), UINT8_C(174), UINT8_C(130), UINT8_C(118), UINT8_C( 29), UINT8_C(246), UINT8_C(127), UINT8_C(235), UINT8_C( 33), UINT8_C(253), UINT8_C(147), UINT8_C( 41), UINT8_C( 14), UINT8_C(193), UINT8_C(126), UINT8_C(220), UINT8_C(114), UINT8_C( 22), UINT8_C( 77), UINT8_C( 40), UINT8_C(150), UINT8_C(218), UINT8_C(187), UINT8_C(209), UINT8_C(123), UINT8_C( 46), UINT8_C(156)), simde_x_mm512_set_epu8(UINT8_C(176), UINT8_C( 45), UINT8_C(210), UINT8_C(149), UINT8_C(149), UINT8_C(249), UINT8_C( 13), UINT8_C(137), UINT8_C(118), UINT8_C(232), UINT8_C(127), UINT8_C( 30), UINT8_C(175), UINT8_C(210), UINT8_C(248), UINT8_C(191), UINT8_C( 96), UINT8_C( 79), UINT8_C(110), UINT8_C(154), UINT8_C(119), UINT8_C(253), UINT8_C(133), UINT8_C( 16), UINT8_C(243), UINT8_C( 4), UINT8_C( 4), UINT8_C(112), UINT8_C(245), UINT8_C(173), UINT8_C( 10), UINT8_C(196), UINT8_C(208), UINT8_C( 87), UINT8_C( 86), UINT8_C(157), UINT8_C(215), UINT8_C( 65), UINT8_C(145), UINT8_C(212), UINT8_C( 76), UINT8_C(163), UINT8_C( 24), UINT8_C(147), UINT8_C( 61), UINT8_C(161), UINT8_C( 63), UINT8_C( 34), UINT8_C(236), UINT8_C( 70), UINT8_C(243), UINT8_C(236), UINT8_C( 49), UINT8_C( 99), UINT8_C(157), UINT8_C( 87), UINT8_C( 4), UINT8_C( 86), UINT8_C(157), UINT8_C( 37), UINT8_C( 44), UINT8_C( 59), UINT8_C(149), UINT8_C( 14)), UINT64_C(10801838139217605378) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask64 r = simde_mm512_cmplt_epu8_mask(test_vec[i].a, test_vec[i].b); simde_assert_equal_mmask64(r, test_vec[i].r); } return 0; } #if !defined(SIMDE_BUG_GCC_96174) static int test_simde_mm512_cmplt_ps_mask (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 b[16]; const simde__mmask16 r; } test_vec[] = { { { SIMDE_FLOAT32_C( -679.30), SIMDE_FLOAT32_C( 966.54), SIMDE_FLOAT32_C( -8.95), SIMDE_FLOAT32_C( 958.36), SIMDE_FLOAT32_C( -725.44), SIMDE_FLOAT32_C( -760.28), SIMDE_FLOAT32_C( 751.05), SIMDE_FLOAT32_C( 763.86), SIMDE_FLOAT32_C( -137.17), SIMDE_FLOAT32_C( -526.42), SIMDE_FLOAT32_C( -580.26), SIMDE_FLOAT32_C( 72.73), SIMDE_FLOAT32_C( 848.96), SIMDE_FLOAT32_C( -167.99), SIMDE_FLOAT32_C( 95.30), SIMDE_FLOAT32_C( 277.78) }, { SIMDE_FLOAT32_C( 425.87), SIMDE_FLOAT32_C( -693.81), SIMDE_FLOAT32_C( 225.64), SIMDE_FLOAT32_C( -374.81), SIMDE_FLOAT32_C( -490.07), SIMDE_FLOAT32_C( 62.38), SIMDE_FLOAT32_C( 630.88), SIMDE_FLOAT32_C( 308.80), SIMDE_FLOAT32_C( -539.84), SIMDE_FLOAT32_C( -683.39), SIMDE_FLOAT32_C( -735.32), SIMDE_FLOAT32_C( -999.26), SIMDE_FLOAT32_C( 848.96), SIMDE_FLOAT32_C( 579.92), SIMDE_FLOAT32_C( 118.33), SIMDE_FLOAT32_C( -830.34) }, UINT16_C(24629) }, { { SIMDE_FLOAT32_C( -833.10), SIMDE_FLOAT32_C( 667.53), SIMDE_FLOAT32_C( -768.58), SIMDE_FLOAT32_C( 27.81), SIMDE_FLOAT32_C( 969.40), SIMDE_FLOAT32_C( -884.80), SIMDE_FLOAT32_C( -758.63), SIMDE_FLOAT32_C( -724.10), SIMDE_FLOAT32_C( -716.35), SIMDE_FLOAT32_C( -476.71), SIMDE_FLOAT32_C( 419.04), SIMDE_FLOAT32_C( 832.05), SIMDE_FLOAT32_C( 151.35), SIMDE_FLOAT32_C( -175.30), SIMDE_FLOAT32_C( 66.61), SIMDE_FLOAT32_C( 351.20) }, { SIMDE_FLOAT32_C( 118.05), SIMDE_FLOAT32_C( -502.75), SIMDE_FLOAT32_C( -814.79), SIMDE_FLOAT32_C( 929.98), SIMDE_FLOAT32_C( 432.78), SIMDE_FLOAT32_C( -886.46), SIMDE_FLOAT32_C( 577.10), SIMDE_FLOAT32_C( -862.12), SIMDE_FLOAT32_C( 136.94), SIMDE_FLOAT32_C( 908.37), SIMDE_FLOAT32_C( -807.53), SIMDE_FLOAT32_C( -626.26), SIMDE_FLOAT32_C( 93.44), SIMDE_FLOAT32_C( 143.01), SIMDE_FLOAT32_C( 933.29), SIMDE_FLOAT32_C( 260.34) }, UINT16_C(25417) }, { { SIMDE_FLOAT32_C( 397.82), SIMDE_FLOAT32_C( 82.73), SIMDE_FLOAT32_C( -728.93), SIMDE_FLOAT32_C( -716.45), SIMDE_FLOAT32_C( 278.34), SIMDE_FLOAT32_C( -422.65), SIMDE_FLOAT32_C( -540.28), SIMDE_FLOAT32_C( 265.15), SIMDE_FLOAT32_C( 279.24), SIMDE_FLOAT32_C( -171.08), SIMDE_FLOAT32_C( -468.61), SIMDE_FLOAT32_C( 443.34), SIMDE_FLOAT32_C( 751.73), SIMDE_FLOAT32_C( -744.43), SIMDE_FLOAT32_C( 566.91), SIMDE_FLOAT32_C( -904.35) }, { SIMDE_FLOAT32_C( -692.80), SIMDE_FLOAT32_C( 82.73), SIMDE_FLOAT32_C( 507.25), SIMDE_FLOAT32_C( -716.45), SIMDE_FLOAT32_C( -871.32), SIMDE_FLOAT32_C( 909.91), SIMDE_FLOAT32_C( -907.02), SIMDE_FLOAT32_C( -102.77), SIMDE_FLOAT32_C( 677.37), SIMDE_FLOAT32_C( -171.08), SIMDE_FLOAT32_C( -468.61), SIMDE_FLOAT32_C( -257.08), SIMDE_FLOAT32_C( 751.73), SIMDE_FLOAT32_C( 841.70), SIMDE_FLOAT32_C( 271.27), SIMDE_FLOAT32_C( 149.55) }, UINT16_C(41252) }, { { SIMDE_FLOAT32_C( -351.59), SIMDE_FLOAT32_C( -757.31), SIMDE_FLOAT32_C( -739.49), SIMDE_FLOAT32_C( 354.82), SIMDE_FLOAT32_C( 779.77), SIMDE_FLOAT32_C( 796.84), SIMDE_FLOAT32_C( 253.65), SIMDE_FLOAT32_C( -980.02), SIMDE_FLOAT32_C( -824.56), SIMDE_FLOAT32_C( -806.24), SIMDE_FLOAT32_C( 218.91), SIMDE_FLOAT32_C( 807.03), SIMDE_FLOAT32_C( -499.44), SIMDE_FLOAT32_C( 683.75), SIMDE_FLOAT32_C( 242.90), SIMDE_FLOAT32_C( 681.31) }, { SIMDE_FLOAT32_C( 698.06), SIMDE_FLOAT32_C( 143.17), SIMDE_FLOAT32_C( 645.90), SIMDE_FLOAT32_C( 354.82), SIMDE_FLOAT32_C( 561.25), SIMDE_FLOAT32_C( -928.28), SIMDE_FLOAT32_C( 482.94), SIMDE_FLOAT32_C( 28.55), SIMDE_FLOAT32_C( 701.67), SIMDE_FLOAT32_C( 834.16), SIMDE_FLOAT32_C( 386.75), SIMDE_FLOAT32_C( 807.03), SIMDE_FLOAT32_C( 558.03), SIMDE_FLOAT32_C( -756.03), SIMDE_FLOAT32_C( 930.12), SIMDE_FLOAT32_C( -793.56) }, UINT16_C(22471) }, { { SIMDE_FLOAT32_C( 434.87), SIMDE_FLOAT32_C( -355.05), SIMDE_FLOAT32_C( -653.48), SIMDE_FLOAT32_C( 594.11), SIMDE_FLOAT32_C( 799.49), SIMDE_FLOAT32_C( 264.31), SIMDE_FLOAT32_C( -8.19), SIMDE_FLOAT32_C( -922.96), SIMDE_FLOAT32_C( 308.23), SIMDE_FLOAT32_C( -871.48), SIMDE_FLOAT32_C( 543.66), SIMDE_FLOAT32_C( 721.18), SIMDE_FLOAT32_C( -314.45), SIMDE_FLOAT32_C( 897.43), SIMDE_FLOAT32_C( 646.34), SIMDE_FLOAT32_C( -691.19) }, { SIMDE_FLOAT32_C( -506.84), SIMDE_FLOAT32_C( -355.05), SIMDE_FLOAT32_C( 70.02), SIMDE_FLOAT32_C( -186.22), SIMDE_FLOAT32_C( 745.56), SIMDE_FLOAT32_C( -329.15), SIMDE_FLOAT32_C( -306.53), SIMDE_FLOAT32_C( -665.08), SIMDE_FLOAT32_C( -81.67), SIMDE_FLOAT32_C( 690.25), SIMDE_FLOAT32_C( -343.01), SIMDE_FLOAT32_C( 742.59), SIMDE_FLOAT32_C( -989.44), SIMDE_FLOAT32_C( 198.45), SIMDE_FLOAT32_C( 334.24), SIMDE_FLOAT32_C( 445.42) }, UINT16_C(35460) }, { { SIMDE_FLOAT32_C( 72.70), SIMDE_FLOAT32_C( -926.98), SIMDE_FLOAT32_C( 386.60), SIMDE_FLOAT32_C( -166.44), SIMDE_FLOAT32_C( -372.12), SIMDE_FLOAT32_C( 156.01), SIMDE_FLOAT32_C( -432.45), SIMDE_FLOAT32_C( -171.34), SIMDE_FLOAT32_C( -100.09), SIMDE_FLOAT32_C( 220.75), SIMDE_FLOAT32_C( -427.23), SIMDE_FLOAT32_C( -735.37), SIMDE_FLOAT32_C( 440.82), SIMDE_FLOAT32_C( -646.62), SIMDE_FLOAT32_C( 895.12), SIMDE_FLOAT32_C( 585.45) }, { SIMDE_FLOAT32_C( -15.73), SIMDE_FLOAT32_C( 536.94), SIMDE_FLOAT32_C( -374.81), SIMDE_FLOAT32_C( 158.91), SIMDE_FLOAT32_C( 525.00), SIMDE_FLOAT32_C( 478.37), SIMDE_FLOAT32_C( -432.45), SIMDE_FLOAT32_C( -483.69), SIMDE_FLOAT32_C( 887.57), SIMDE_FLOAT32_C( 220.75), SIMDE_FLOAT32_C( 709.30), SIMDE_FLOAT32_C( 187.04), SIMDE_FLOAT32_C( -436.07), SIMDE_FLOAT32_C( 329.70), SIMDE_FLOAT32_C( 57.53), SIMDE_FLOAT32_C( 636.63) }, UINT16_C(44346) }, { { SIMDE_FLOAT32_C( -715.67), SIMDE_FLOAT32_C( -253.10), SIMDE_FLOAT32_C( 805.99), SIMDE_FLOAT32_C( 896.48), SIMDE_FLOAT32_C( -683.44), SIMDE_FLOAT32_C( -642.77), SIMDE_FLOAT32_C( -746.45), SIMDE_FLOAT32_C( 318.24), SIMDE_FLOAT32_C( -949.63), SIMDE_FLOAT32_C( -203.63), SIMDE_FLOAT32_C( -894.66), SIMDE_FLOAT32_C( 648.89), SIMDE_FLOAT32_C( 110.40), SIMDE_FLOAT32_C( 662.12), SIMDE_FLOAT32_C( 821.38), SIMDE_FLOAT32_C( 820.81) }, { SIMDE_FLOAT32_C( 147.48), SIMDE_FLOAT32_C( 715.61), SIMDE_FLOAT32_C( -594.01), SIMDE_FLOAT32_C( 128.99), SIMDE_FLOAT32_C( 847.91), SIMDE_FLOAT32_C( -246.50), SIMDE_FLOAT32_C( -172.62), SIMDE_FLOAT32_C( 927.56), SIMDE_FLOAT32_C( -949.63), SIMDE_FLOAT32_C( -193.40), SIMDE_FLOAT32_C( 284.28), SIMDE_FLOAT32_C( 354.14), SIMDE_FLOAT32_C( -296.72), SIMDE_FLOAT32_C( 320.79), SIMDE_FLOAT32_C( 108.95), SIMDE_FLOAT32_C( -12.38) }, UINT16_C( 1779) }, { { SIMDE_FLOAT32_C( 372.34), SIMDE_FLOAT32_C( 943.17), SIMDE_FLOAT32_C( -546.38), SIMDE_FLOAT32_C( -534.61), SIMDE_FLOAT32_C( -390.69), SIMDE_FLOAT32_C( 249.11), SIMDE_FLOAT32_C( 492.46), SIMDE_FLOAT32_C( 83.28), SIMDE_FLOAT32_C( -13.87), SIMDE_FLOAT32_C( 563.95), SIMDE_FLOAT32_C( 27.19), SIMDE_FLOAT32_C( 69.48), SIMDE_FLOAT32_C( -499.31), SIMDE_FLOAT32_C( 588.53), SIMDE_FLOAT32_C( 881.11), SIMDE_FLOAT32_C( -291.35) }, { SIMDE_FLOAT32_C( 896.28), SIMDE_FLOAT32_C( -328.16), SIMDE_FLOAT32_C( -58.67), SIMDE_FLOAT32_C( -222.16), SIMDE_FLOAT32_C( 369.25), SIMDE_FLOAT32_C( 249.11), SIMDE_FLOAT32_C( 39.79), SIMDE_FLOAT32_C( 257.60), SIMDE_FLOAT32_C( -13.87), SIMDE_FLOAT32_C( 385.43), SIMDE_FLOAT32_C( 657.69), SIMDE_FLOAT32_C( 261.33), SIMDE_FLOAT32_C( -197.63), SIMDE_FLOAT32_C( -362.80), SIMDE_FLOAT32_C( -10.34), SIMDE_FLOAT32_C( -825.29) }, UINT16_C( 7325) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__mmask16 r = simde_mm512_cmplt_ps_mask(a, b); simde_assert_equal_mmask16(r, test_vec[i].r); } return 0; } static int test_simde_mm512_cmplt_pd_mask (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 b[8]; const simde__mmask8 r; } test_vec[] = { { { SIMDE_FLOAT64_C( 159.59), SIMDE_FLOAT64_C( 210.36), SIMDE_FLOAT64_C( -469.27), SIMDE_FLOAT64_C( -961.30), SIMDE_FLOAT64_C( -565.87), SIMDE_FLOAT64_C( -556.86), SIMDE_FLOAT64_C( 785.14), SIMDE_FLOAT64_C( -76.60) }, { SIMDE_FLOAT64_C( 549.87), SIMDE_FLOAT64_C( -373.87), SIMDE_FLOAT64_C( 375.69), SIMDE_FLOAT64_C( 255.43), SIMDE_FLOAT64_C( -924.84), SIMDE_FLOAT64_C( 236.64), SIMDE_FLOAT64_C( -838.91), SIMDE_FLOAT64_C( 432.31) }, UINT8_C(173) }, { { SIMDE_FLOAT64_C( -86.33), SIMDE_FLOAT64_C( 998.88), SIMDE_FLOAT64_C( 169.13), SIMDE_FLOAT64_C( 558.70), SIMDE_FLOAT64_C( 146.37), SIMDE_FLOAT64_C( 90.58), SIMDE_FLOAT64_C( 405.41), SIMDE_FLOAT64_C( -319.04) }, { SIMDE_FLOAT64_C( -110.18), SIMDE_FLOAT64_C( 182.04), SIMDE_FLOAT64_C( -496.16), SIMDE_FLOAT64_C( -883.07), SIMDE_FLOAT64_C( 321.39), SIMDE_FLOAT64_C( -344.51), SIMDE_FLOAT64_C( -99.97), SIMDE_FLOAT64_C( -263.70) }, UINT8_C(144) }, { { SIMDE_FLOAT64_C( 29.15), SIMDE_FLOAT64_C( -41.18), SIMDE_FLOAT64_C( -110.04), SIMDE_FLOAT64_C( 548.12), SIMDE_FLOAT64_C( 271.03), SIMDE_FLOAT64_C( -770.85), SIMDE_FLOAT64_C( 346.58), SIMDE_FLOAT64_C( 912.24) }, { SIMDE_FLOAT64_C( 79.16), SIMDE_FLOAT64_C( 358.77), SIMDE_FLOAT64_C( -231.91), SIMDE_FLOAT64_C( 206.83), SIMDE_FLOAT64_C( 115.65), SIMDE_FLOAT64_C( -336.81), SIMDE_FLOAT64_C( -732.53), SIMDE_FLOAT64_C( 334.52) }, UINT8_C( 35) }, { { SIMDE_FLOAT64_C( 256.18), SIMDE_FLOAT64_C( -459.33), SIMDE_FLOAT64_C( 101.00), SIMDE_FLOAT64_C( -417.14), SIMDE_FLOAT64_C( -900.86), SIMDE_FLOAT64_C( -806.81), SIMDE_FLOAT64_C( -5.42), SIMDE_FLOAT64_C( 857.96) }, { SIMDE_FLOAT64_C( -232.59), SIMDE_FLOAT64_C( 931.14), SIMDE_FLOAT64_C( -321.87), SIMDE_FLOAT64_C( 407.35), SIMDE_FLOAT64_C( 262.90), SIMDE_FLOAT64_C( 592.56), SIMDE_FLOAT64_C( -812.34), SIMDE_FLOAT64_C( 950.75) }, UINT8_C(186) }, { { SIMDE_FLOAT64_C( -662.53), SIMDE_FLOAT64_C( 872.08), SIMDE_FLOAT64_C( -996.83), SIMDE_FLOAT64_C( 245.09), SIMDE_FLOAT64_C( -755.15), SIMDE_FLOAT64_C( 154.86), SIMDE_FLOAT64_C( 690.61), SIMDE_FLOAT64_C( -850.32) }, { SIMDE_FLOAT64_C( 718.59), SIMDE_FLOAT64_C( -644.78), SIMDE_FLOAT64_C( -744.92), SIMDE_FLOAT64_C( 162.05), SIMDE_FLOAT64_C( -429.20), SIMDE_FLOAT64_C( 382.77), SIMDE_FLOAT64_C( -712.41), SIMDE_FLOAT64_C( 553.41) }, UINT8_C(181) }, { { SIMDE_FLOAT64_C( -767.88), SIMDE_FLOAT64_C( 220.93), SIMDE_FLOAT64_C( -852.88), SIMDE_FLOAT64_C( -422.20), SIMDE_FLOAT64_C( 24.06), SIMDE_FLOAT64_C( 396.29), SIMDE_FLOAT64_C( 393.46), SIMDE_FLOAT64_C( 825.11) }, { SIMDE_FLOAT64_C( -326.63), SIMDE_FLOAT64_C( 260.49), SIMDE_FLOAT64_C( 21.96), SIMDE_FLOAT64_C( -870.80), SIMDE_FLOAT64_C( 390.98), SIMDE_FLOAT64_C( -810.50), SIMDE_FLOAT64_C( -47.31), SIMDE_FLOAT64_C( 928.47) }, UINT8_C(151) }, { { SIMDE_FLOAT64_C( 764.04), SIMDE_FLOAT64_C( -755.85), SIMDE_FLOAT64_C( 350.20), SIMDE_FLOAT64_C( -122.92), SIMDE_FLOAT64_C( 41.32), SIMDE_FLOAT64_C( 468.91), SIMDE_FLOAT64_C( 941.23), SIMDE_FLOAT64_C( -826.92) }, { SIMDE_FLOAT64_C( -79.39), SIMDE_FLOAT64_C( -301.22), SIMDE_FLOAT64_C( -613.48), SIMDE_FLOAT64_C( -831.83), SIMDE_FLOAT64_C( -533.10), SIMDE_FLOAT64_C( 168.63), SIMDE_FLOAT64_C( 232.01), SIMDE_FLOAT64_C( -589.49) }, UINT8_C(130) }, { { SIMDE_FLOAT64_C( 431.35), SIMDE_FLOAT64_C( -312.15), SIMDE_FLOAT64_C( -300.41), SIMDE_FLOAT64_C( -919.37), SIMDE_FLOAT64_C( 97.60), SIMDE_FLOAT64_C( 323.36), SIMDE_FLOAT64_C( 650.47), SIMDE_FLOAT64_C( 378.00) }, { SIMDE_FLOAT64_C( -942.80), SIMDE_FLOAT64_C( 278.12), SIMDE_FLOAT64_C( 437.54), SIMDE_FLOAT64_C( -207.26), SIMDE_FLOAT64_C( 628.37), SIMDE_FLOAT64_C( -977.34), SIMDE_FLOAT64_C( -73.78), SIMDE_FLOAT64_C( -44.83) }, UINT8_C( 30) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__mmask8 r = simde_mm512_cmplt_pd_mask(a, b); simde_assert_equal_mmask8(r, test_vec[i].r); } return 0; } #endif /* !defined(SIMDE_BUG_GCC_96174) */ SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cmplt_epi8_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cmplt_epu8_mask) #if !defined(SIMDE_BUG_GCC_96174) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cmplt_ps_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cmplt_pd_mask) #endif SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/copysign.c000066400000000000000000000511671400333146700172650ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #define SIMDE_TEST_X86_AVX512_INSN copysign #include #include static int test_simde_x_mm512_copysign_ps (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde_float32 a[16]; const simde_float32 b[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 101.85), SIMDE_FLOAT32_C( 638.93), SIMDE_FLOAT32_C( -988.13), SIMDE_FLOAT32_C( 346.44), SIMDE_FLOAT32_C( -984.29), SIMDE_FLOAT32_C( 33.87), SIMDE_FLOAT32_C( 170.56), SIMDE_FLOAT32_C( 295.52), SIMDE_FLOAT32_C( 361.74), SIMDE_FLOAT32_C( -255.41), SIMDE_FLOAT32_C( -306.51), SIMDE_FLOAT32_C( -203.63), SIMDE_FLOAT32_C( -477.65), SIMDE_FLOAT32_C( 156.06), SIMDE_FLOAT32_C( -273.72), SIMDE_FLOAT32_C( 883.97) }, { SIMDE_FLOAT32_C( 199.87), SIMDE_FLOAT32_C( -325.33), SIMDE_FLOAT32_C( -536.96), SIMDE_FLOAT32_C( -823.46), SIMDE_FLOAT32_C( 717.11), SIMDE_FLOAT32_C( 790.99), SIMDE_FLOAT32_C( 305.34), SIMDE_FLOAT32_C( 374.95), SIMDE_FLOAT32_C( -940.88), SIMDE_FLOAT32_C( -518.43), SIMDE_FLOAT32_C( 296.03), SIMDE_FLOAT32_C( 464.18), SIMDE_FLOAT32_C( 842.36), SIMDE_FLOAT32_C( 732.27), SIMDE_FLOAT32_C( -231.90), SIMDE_FLOAT32_C( -55.79) }, { SIMDE_FLOAT32_C( 101.85), SIMDE_FLOAT32_C( -638.93), SIMDE_FLOAT32_C( -988.13), SIMDE_FLOAT32_C( -346.44), SIMDE_FLOAT32_C( 984.29), SIMDE_FLOAT32_C( 33.87), SIMDE_FLOAT32_C( 170.56), SIMDE_FLOAT32_C( 295.52), SIMDE_FLOAT32_C( -361.74), SIMDE_FLOAT32_C( -255.41), SIMDE_FLOAT32_C( 306.51), SIMDE_FLOAT32_C( 203.63), SIMDE_FLOAT32_C( 477.65), SIMDE_FLOAT32_C( 156.06), SIMDE_FLOAT32_C( -273.72), SIMDE_FLOAT32_C( -883.97) } }, { { SIMDE_FLOAT32_C( 371.20), SIMDE_FLOAT32_C( -220.03), SIMDE_FLOAT32_C( -709.35), SIMDE_FLOAT32_C( 386.90), SIMDE_FLOAT32_C( 813.83), SIMDE_FLOAT32_C( 461.21), SIMDE_FLOAT32_C( -317.58), SIMDE_FLOAT32_C( 175.57), SIMDE_FLOAT32_C( -794.20), SIMDE_FLOAT32_C( 375.91), SIMDE_FLOAT32_C( 971.94), SIMDE_FLOAT32_C( -271.85), SIMDE_FLOAT32_C( -468.03), SIMDE_FLOAT32_C( -301.78), SIMDE_FLOAT32_C( -387.88), SIMDE_FLOAT32_C( 731.84) }, { SIMDE_FLOAT32_C( 372.89), SIMDE_FLOAT32_C( 75.16), SIMDE_FLOAT32_C( 908.38), SIMDE_FLOAT32_C( 90.00), SIMDE_FLOAT32_C( -133.86), SIMDE_FLOAT32_C( 213.73), SIMDE_FLOAT32_C( -535.05), SIMDE_FLOAT32_C( -74.73), SIMDE_FLOAT32_C( 695.30), SIMDE_FLOAT32_C( 760.98), SIMDE_FLOAT32_C( -610.55), SIMDE_FLOAT32_C( 537.65), SIMDE_FLOAT32_C( 493.25), SIMDE_FLOAT32_C( 157.55), SIMDE_FLOAT32_C( -518.14), SIMDE_FLOAT32_C( -135.55) }, { SIMDE_FLOAT32_C( 371.20), SIMDE_FLOAT32_C( 220.03), SIMDE_FLOAT32_C( 709.35), SIMDE_FLOAT32_C( 386.90), SIMDE_FLOAT32_C( -813.83), SIMDE_FLOAT32_C( 461.21), SIMDE_FLOAT32_C( -317.58), SIMDE_FLOAT32_C( -175.57), SIMDE_FLOAT32_C( 794.20), SIMDE_FLOAT32_C( 375.91), SIMDE_FLOAT32_C( -971.94), SIMDE_FLOAT32_C( 271.85), SIMDE_FLOAT32_C( 468.03), SIMDE_FLOAT32_C( 301.78), SIMDE_FLOAT32_C( -387.88), SIMDE_FLOAT32_C( -731.84) } }, { { SIMDE_FLOAT32_C( 937.51), SIMDE_FLOAT32_C( -227.49), SIMDE_FLOAT32_C( -748.65), SIMDE_FLOAT32_C( 751.35), SIMDE_FLOAT32_C( -766.28), SIMDE_FLOAT32_C( -66.23), SIMDE_FLOAT32_C( -73.08), SIMDE_FLOAT32_C( -560.48), SIMDE_FLOAT32_C( -690.32), SIMDE_FLOAT32_C( -101.14), SIMDE_FLOAT32_C( 167.67), SIMDE_FLOAT32_C( -158.35), SIMDE_FLOAT32_C( 597.08), SIMDE_FLOAT32_C( 779.79), SIMDE_FLOAT32_C( -426.51), SIMDE_FLOAT32_C( -30.03) }, { SIMDE_FLOAT32_C( -145.05), SIMDE_FLOAT32_C( -518.13), SIMDE_FLOAT32_C( -940.03), SIMDE_FLOAT32_C( 721.10), SIMDE_FLOAT32_C( 695.60), SIMDE_FLOAT32_C( -475.08), SIMDE_FLOAT32_C( -353.64), SIMDE_FLOAT32_C( 390.89), SIMDE_FLOAT32_C( -714.10), SIMDE_FLOAT32_C( 35.81), SIMDE_FLOAT32_C( -71.46), SIMDE_FLOAT32_C( 779.15), SIMDE_FLOAT32_C( -806.64), SIMDE_FLOAT32_C( 410.40), SIMDE_FLOAT32_C( -356.40), SIMDE_FLOAT32_C( -869.13) }, { SIMDE_FLOAT32_C( -937.51), SIMDE_FLOAT32_C( -227.49), SIMDE_FLOAT32_C( -748.65), SIMDE_FLOAT32_C( 751.35), SIMDE_FLOAT32_C( 766.28), SIMDE_FLOAT32_C( -66.23), SIMDE_FLOAT32_C( -73.08), SIMDE_FLOAT32_C( 560.48), SIMDE_FLOAT32_C( -690.32), SIMDE_FLOAT32_C( 101.14), SIMDE_FLOAT32_C( -167.67), SIMDE_FLOAT32_C( 158.35), SIMDE_FLOAT32_C( -597.08), SIMDE_FLOAT32_C( 779.79), SIMDE_FLOAT32_C( -426.51), SIMDE_FLOAT32_C( -30.03) } }, { { SIMDE_FLOAT32_C( -817.09), SIMDE_FLOAT32_C( -105.05), SIMDE_FLOAT32_C( 882.22), SIMDE_FLOAT32_C( -583.37), SIMDE_FLOAT32_C( 828.72), SIMDE_FLOAT32_C( -190.86), SIMDE_FLOAT32_C( -143.85), SIMDE_FLOAT32_C( -861.60), SIMDE_FLOAT32_C( 708.00), SIMDE_FLOAT32_C( -976.17), SIMDE_FLOAT32_C( -19.95), SIMDE_FLOAT32_C( 305.09), SIMDE_FLOAT32_C( 803.62), SIMDE_FLOAT32_C( 553.54), SIMDE_FLOAT32_C( -724.94), SIMDE_FLOAT32_C( -341.43) }, { SIMDE_FLOAT32_C( -964.59), SIMDE_FLOAT32_C( -664.97), SIMDE_FLOAT32_C( -620.33), SIMDE_FLOAT32_C( 731.01), SIMDE_FLOAT32_C( -140.05), SIMDE_FLOAT32_C( 26.03), SIMDE_FLOAT32_C( 121.90), SIMDE_FLOAT32_C( 145.85), SIMDE_FLOAT32_C( -938.16), SIMDE_FLOAT32_C( -949.56), SIMDE_FLOAT32_C( -75.00), SIMDE_FLOAT32_C( -744.80), SIMDE_FLOAT32_C( 460.85), SIMDE_FLOAT32_C( 568.59), SIMDE_FLOAT32_C( -613.93), SIMDE_FLOAT32_C( 643.76) }, { SIMDE_FLOAT32_C( -817.09), SIMDE_FLOAT32_C( -105.05), SIMDE_FLOAT32_C( -882.22), SIMDE_FLOAT32_C( 583.37), SIMDE_FLOAT32_C( -828.72), SIMDE_FLOAT32_C( 190.86), SIMDE_FLOAT32_C( 143.85), SIMDE_FLOAT32_C( 861.60), SIMDE_FLOAT32_C( -708.00), SIMDE_FLOAT32_C( -976.17), SIMDE_FLOAT32_C( -19.95), SIMDE_FLOAT32_C( -305.09), SIMDE_FLOAT32_C( 803.62), SIMDE_FLOAT32_C( 553.54), SIMDE_FLOAT32_C( -724.94), SIMDE_FLOAT32_C( 341.43) } }, { { SIMDE_FLOAT32_C( -536.46), SIMDE_FLOAT32_C( -731.70), SIMDE_FLOAT32_C( -939.61), SIMDE_FLOAT32_C( -707.74), SIMDE_FLOAT32_C( 77.44), SIMDE_FLOAT32_C( -83.46), SIMDE_FLOAT32_C( -569.34), SIMDE_FLOAT32_C( -214.56), SIMDE_FLOAT32_C( -59.63), SIMDE_FLOAT32_C( 410.72), SIMDE_FLOAT32_C( -909.47), SIMDE_FLOAT32_C( -256.01), SIMDE_FLOAT32_C( -35.74), SIMDE_FLOAT32_C( -634.41), SIMDE_FLOAT32_C( 402.56), SIMDE_FLOAT32_C( -0.33) }, { SIMDE_FLOAT32_C( -299.38), SIMDE_FLOAT32_C( 782.23), SIMDE_FLOAT32_C( -269.32), SIMDE_FLOAT32_C( 560.57), SIMDE_FLOAT32_C( -191.74), SIMDE_FLOAT32_C( 852.58), SIMDE_FLOAT32_C( -293.59), SIMDE_FLOAT32_C( -129.90), SIMDE_FLOAT32_C( 903.02), SIMDE_FLOAT32_C( 631.41), SIMDE_FLOAT32_C( 125.30), SIMDE_FLOAT32_C( 363.87), SIMDE_FLOAT32_C( 200.00), SIMDE_FLOAT32_C( 511.37), SIMDE_FLOAT32_C( 7.62), SIMDE_FLOAT32_C( 663.54) }, { SIMDE_FLOAT32_C( -536.46), SIMDE_FLOAT32_C( 731.70), SIMDE_FLOAT32_C( -939.61), SIMDE_FLOAT32_C( 707.74), SIMDE_FLOAT32_C( -77.44), SIMDE_FLOAT32_C( 83.46), SIMDE_FLOAT32_C( -569.34), SIMDE_FLOAT32_C( -214.56), SIMDE_FLOAT32_C( 59.63), SIMDE_FLOAT32_C( 410.72), SIMDE_FLOAT32_C( 909.47), SIMDE_FLOAT32_C( 256.01), SIMDE_FLOAT32_C( 35.74), SIMDE_FLOAT32_C( 634.41), SIMDE_FLOAT32_C( 402.56), SIMDE_FLOAT32_C( 0.33) } }, { { SIMDE_FLOAT32_C( 779.67), SIMDE_FLOAT32_C( 68.01), SIMDE_FLOAT32_C( 955.81), SIMDE_FLOAT32_C( -142.89), SIMDE_FLOAT32_C( 984.55), SIMDE_FLOAT32_C( -613.53), SIMDE_FLOAT32_C( 642.55), SIMDE_FLOAT32_C( -75.08), SIMDE_FLOAT32_C( 797.19), SIMDE_FLOAT32_C( 733.08), SIMDE_FLOAT32_C( 668.91), SIMDE_FLOAT32_C( -238.56), SIMDE_FLOAT32_C( -901.34), SIMDE_FLOAT32_C( 71.47), SIMDE_FLOAT32_C( 761.11), SIMDE_FLOAT32_C( -200.72) }, { SIMDE_FLOAT32_C( -146.31), SIMDE_FLOAT32_C( -508.21), SIMDE_FLOAT32_C( -640.16), SIMDE_FLOAT32_C( 661.95), SIMDE_FLOAT32_C( -655.63), SIMDE_FLOAT32_C( 66.26), SIMDE_FLOAT32_C( -467.95), SIMDE_FLOAT32_C( -752.61), SIMDE_FLOAT32_C( -302.33), SIMDE_FLOAT32_C( 657.35), SIMDE_FLOAT32_C( 611.26), SIMDE_FLOAT32_C( 897.67), SIMDE_FLOAT32_C( 168.73), SIMDE_FLOAT32_C( -381.12), SIMDE_FLOAT32_C( 561.22), SIMDE_FLOAT32_C( -51.60) }, { SIMDE_FLOAT32_C( -779.67), SIMDE_FLOAT32_C( -68.01), SIMDE_FLOAT32_C( -955.81), SIMDE_FLOAT32_C( 142.89), SIMDE_FLOAT32_C( -984.55), SIMDE_FLOAT32_C( 613.53), SIMDE_FLOAT32_C( -642.55), SIMDE_FLOAT32_C( -75.08), SIMDE_FLOAT32_C( -797.19), SIMDE_FLOAT32_C( 733.08), SIMDE_FLOAT32_C( 668.91), SIMDE_FLOAT32_C( 238.56), SIMDE_FLOAT32_C( 901.34), SIMDE_FLOAT32_C( -71.47), SIMDE_FLOAT32_C( 761.11), SIMDE_FLOAT32_C( -200.72) } }, { { SIMDE_FLOAT32_C( 686.89), SIMDE_FLOAT32_C( 517.02), SIMDE_FLOAT32_C( 805.50), SIMDE_FLOAT32_C( 671.44), SIMDE_FLOAT32_C( 903.49), SIMDE_FLOAT32_C( 448.05), SIMDE_FLOAT32_C( -403.64), SIMDE_FLOAT32_C( 700.67), SIMDE_FLOAT32_C( 181.13), SIMDE_FLOAT32_C( -734.74), SIMDE_FLOAT32_C( -537.88), SIMDE_FLOAT32_C( 279.80), SIMDE_FLOAT32_C( 336.73), SIMDE_FLOAT32_C( -776.77), SIMDE_FLOAT32_C( -920.93), SIMDE_FLOAT32_C( -809.58) }, { SIMDE_FLOAT32_C( -284.98), SIMDE_FLOAT32_C( -561.08), SIMDE_FLOAT32_C( 852.37), SIMDE_FLOAT32_C( 59.39), SIMDE_FLOAT32_C( 505.18), SIMDE_FLOAT32_C( -615.57), SIMDE_FLOAT32_C( 306.78), SIMDE_FLOAT32_C( -797.15), SIMDE_FLOAT32_C( -958.22), SIMDE_FLOAT32_C( -81.96), SIMDE_FLOAT32_C( -899.48), SIMDE_FLOAT32_C( 210.50), SIMDE_FLOAT32_C( 536.92), SIMDE_FLOAT32_C( 661.73), SIMDE_FLOAT32_C( -841.10), SIMDE_FLOAT32_C( 223.81) }, { SIMDE_FLOAT32_C( -686.89), SIMDE_FLOAT32_C( -517.02), SIMDE_FLOAT32_C( 805.50), SIMDE_FLOAT32_C( 671.44), SIMDE_FLOAT32_C( 903.49), SIMDE_FLOAT32_C( -448.05), SIMDE_FLOAT32_C( 403.64), SIMDE_FLOAT32_C( -700.67), SIMDE_FLOAT32_C( -181.13), SIMDE_FLOAT32_C( -734.74), SIMDE_FLOAT32_C( -537.88), SIMDE_FLOAT32_C( 279.80), SIMDE_FLOAT32_C( 336.73), SIMDE_FLOAT32_C( 776.77), SIMDE_FLOAT32_C( -920.93), SIMDE_FLOAT32_C( 809.58) } }, { { SIMDE_FLOAT32_C( 178.75), SIMDE_FLOAT32_C( 964.40), SIMDE_FLOAT32_C( -104.75), SIMDE_FLOAT32_C( 82.24), SIMDE_FLOAT32_C( 412.46), SIMDE_FLOAT32_C( 491.60), SIMDE_FLOAT32_C( -217.08), SIMDE_FLOAT32_C( -406.41), SIMDE_FLOAT32_C( 756.86), SIMDE_FLOAT32_C( 245.04), SIMDE_FLOAT32_C( 873.39), SIMDE_FLOAT32_C( 93.59), SIMDE_FLOAT32_C( 468.27), SIMDE_FLOAT32_C( 952.46), SIMDE_FLOAT32_C( 284.01), SIMDE_FLOAT32_C( -816.71) }, { SIMDE_FLOAT32_C( -608.62), SIMDE_FLOAT32_C( 136.39), SIMDE_FLOAT32_C( 242.67), SIMDE_FLOAT32_C( 896.56), SIMDE_FLOAT32_C( 520.81), SIMDE_FLOAT32_C( -450.55), SIMDE_FLOAT32_C( -900.60), SIMDE_FLOAT32_C( 562.59), SIMDE_FLOAT32_C( 467.49), SIMDE_FLOAT32_C( -800.08), SIMDE_FLOAT32_C( -226.91), SIMDE_FLOAT32_C( 4.40), SIMDE_FLOAT32_C( 861.66), SIMDE_FLOAT32_C( -68.01), SIMDE_FLOAT32_C( -771.79), SIMDE_FLOAT32_C( 40.41) }, { SIMDE_FLOAT32_C( -178.75), SIMDE_FLOAT32_C( 964.40), SIMDE_FLOAT32_C( 104.75), SIMDE_FLOAT32_C( 82.24), SIMDE_FLOAT32_C( 412.46), SIMDE_FLOAT32_C( -491.60), SIMDE_FLOAT32_C( -217.08), SIMDE_FLOAT32_C( 406.41), SIMDE_FLOAT32_C( 756.86), SIMDE_FLOAT32_C( -245.04), SIMDE_FLOAT32_C( -873.39), SIMDE_FLOAT32_C( 93.59), SIMDE_FLOAT32_C( 468.27), SIMDE_FLOAT32_C( -952.46), SIMDE_FLOAT32_C( -284.01), SIMDE_FLOAT32_C( 816.71) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 r = simde_x_mm512_copysign_ps(a, b); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512 a = simde_test_x86_random_f32x16(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m512 b = simde_test_x86_random_f32x16(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m512 r = simde_x_mm512_copysign_ps(a, b); simde_test_x86_write_f32x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f32x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_x_mm512_copysign_pd (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde_float64 a[8]; const simde_float64 b[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( -103.60), SIMDE_FLOAT64_C( 123.45), SIMDE_FLOAT64_C( -877.34), SIMDE_FLOAT64_C( -691.14), SIMDE_FLOAT64_C( -384.94), SIMDE_FLOAT64_C( -94.43), SIMDE_FLOAT64_C( -97.55), SIMDE_FLOAT64_C( -628.08) }, { SIMDE_FLOAT64_C( -849.39), SIMDE_FLOAT64_C( -224.16), SIMDE_FLOAT64_C( 465.51), SIMDE_FLOAT64_C( 618.88), SIMDE_FLOAT64_C( -271.70), SIMDE_FLOAT64_C( -250.47), SIMDE_FLOAT64_C( 802.16), SIMDE_FLOAT64_C( 119.68) }, { SIMDE_FLOAT64_C( -103.60), SIMDE_FLOAT64_C( -123.45), SIMDE_FLOAT64_C( 877.34), SIMDE_FLOAT64_C( 691.14), SIMDE_FLOAT64_C( -384.94), SIMDE_FLOAT64_C( -94.43), SIMDE_FLOAT64_C( 97.55), SIMDE_FLOAT64_C( 628.08) } }, { { SIMDE_FLOAT64_C( 885.92), SIMDE_FLOAT64_C( 44.83), SIMDE_FLOAT64_C( 16.24), SIMDE_FLOAT64_C( 406.73), SIMDE_FLOAT64_C( 594.28), SIMDE_FLOAT64_C( 115.64), SIMDE_FLOAT64_C( -30.68), SIMDE_FLOAT64_C( 61.77) }, { SIMDE_FLOAT64_C( 315.57), SIMDE_FLOAT64_C( 742.41), SIMDE_FLOAT64_C( -933.83), SIMDE_FLOAT64_C( 177.22), SIMDE_FLOAT64_C( -325.59), SIMDE_FLOAT64_C( -705.62), SIMDE_FLOAT64_C( -782.37), SIMDE_FLOAT64_C( 570.81) }, { SIMDE_FLOAT64_C( 885.92), SIMDE_FLOAT64_C( 44.83), SIMDE_FLOAT64_C( -16.24), SIMDE_FLOAT64_C( 406.73), SIMDE_FLOAT64_C( -594.28), SIMDE_FLOAT64_C( -115.64), SIMDE_FLOAT64_C( -30.68), SIMDE_FLOAT64_C( 61.77) } }, { { SIMDE_FLOAT64_C( 417.83), SIMDE_FLOAT64_C( -659.71), SIMDE_FLOAT64_C( 879.67), SIMDE_FLOAT64_C( -967.11), SIMDE_FLOAT64_C( 245.86), SIMDE_FLOAT64_C( -217.88), SIMDE_FLOAT64_C( -595.19), SIMDE_FLOAT64_C( 396.47) }, { SIMDE_FLOAT64_C( 557.95), SIMDE_FLOAT64_C( 870.32), SIMDE_FLOAT64_C( 15.35), SIMDE_FLOAT64_C( -713.75), SIMDE_FLOAT64_C( -380.15), SIMDE_FLOAT64_C( -182.49), SIMDE_FLOAT64_C( 405.93), SIMDE_FLOAT64_C( -494.24) }, { SIMDE_FLOAT64_C( 417.83), SIMDE_FLOAT64_C( 659.71), SIMDE_FLOAT64_C( 879.67), SIMDE_FLOAT64_C( -967.11), SIMDE_FLOAT64_C( -245.86), SIMDE_FLOAT64_C( -217.88), SIMDE_FLOAT64_C( 595.19), SIMDE_FLOAT64_C( -396.47) } }, { { SIMDE_FLOAT64_C( 862.34), SIMDE_FLOAT64_C( -577.83), SIMDE_FLOAT64_C( 912.49), SIMDE_FLOAT64_C( 456.63), SIMDE_FLOAT64_C( 537.81), SIMDE_FLOAT64_C( -118.19), SIMDE_FLOAT64_C( -481.60), SIMDE_FLOAT64_C( -146.63) }, { SIMDE_FLOAT64_C( -375.77), SIMDE_FLOAT64_C( -415.43), SIMDE_FLOAT64_C( -969.41), SIMDE_FLOAT64_C( 298.64), SIMDE_FLOAT64_C( -121.05), SIMDE_FLOAT64_C( -751.77), SIMDE_FLOAT64_C( -130.56), SIMDE_FLOAT64_C( -703.22) }, { SIMDE_FLOAT64_C( -862.34), SIMDE_FLOAT64_C( -577.83), SIMDE_FLOAT64_C( -912.49), SIMDE_FLOAT64_C( 456.63), SIMDE_FLOAT64_C( -537.81), SIMDE_FLOAT64_C( -118.19), SIMDE_FLOAT64_C( -481.60), SIMDE_FLOAT64_C( -146.63) } }, { { SIMDE_FLOAT64_C( -411.48), SIMDE_FLOAT64_C( -250.89), SIMDE_FLOAT64_C( -670.33), SIMDE_FLOAT64_C( 834.38), SIMDE_FLOAT64_C( 531.22), SIMDE_FLOAT64_C( -265.52), SIMDE_FLOAT64_C( 230.85), SIMDE_FLOAT64_C( 89.18) }, { SIMDE_FLOAT64_C( -395.20), SIMDE_FLOAT64_C( -753.80), SIMDE_FLOAT64_C( 375.43), SIMDE_FLOAT64_C( 224.64), SIMDE_FLOAT64_C( 63.71), SIMDE_FLOAT64_C( -218.65), SIMDE_FLOAT64_C( 730.41), SIMDE_FLOAT64_C( -73.95) }, { SIMDE_FLOAT64_C( -411.48), SIMDE_FLOAT64_C( -250.89), SIMDE_FLOAT64_C( 670.33), SIMDE_FLOAT64_C( 834.38), SIMDE_FLOAT64_C( 531.22), SIMDE_FLOAT64_C( -265.52), SIMDE_FLOAT64_C( 230.85), SIMDE_FLOAT64_C( -89.18) } }, { { SIMDE_FLOAT64_C( 203.52), SIMDE_FLOAT64_C( 642.90), SIMDE_FLOAT64_C( -617.32), SIMDE_FLOAT64_C( -258.67), SIMDE_FLOAT64_C( -475.29), SIMDE_FLOAT64_C( -98.92), SIMDE_FLOAT64_C( 594.70), SIMDE_FLOAT64_C( 148.94) }, { SIMDE_FLOAT64_C( 485.65), SIMDE_FLOAT64_C( 625.29), SIMDE_FLOAT64_C( -552.43), SIMDE_FLOAT64_C( -635.40), SIMDE_FLOAT64_C( 873.52), SIMDE_FLOAT64_C( 317.01), SIMDE_FLOAT64_C( -338.62), SIMDE_FLOAT64_C( -537.96) }, { SIMDE_FLOAT64_C( 203.52), SIMDE_FLOAT64_C( 642.90), SIMDE_FLOAT64_C( -617.32), SIMDE_FLOAT64_C( -258.67), SIMDE_FLOAT64_C( 475.29), SIMDE_FLOAT64_C( 98.92), SIMDE_FLOAT64_C( -594.70), SIMDE_FLOAT64_C( -148.94) } }, { { SIMDE_FLOAT64_C( -933.88), SIMDE_FLOAT64_C( -8.95), SIMDE_FLOAT64_C( -703.59), SIMDE_FLOAT64_C( 597.35), SIMDE_FLOAT64_C( 725.52), SIMDE_FLOAT64_C( 527.26), SIMDE_FLOAT64_C( -313.48), SIMDE_FLOAT64_C( -669.68) }, { SIMDE_FLOAT64_C( 773.46), SIMDE_FLOAT64_C( -938.05), SIMDE_FLOAT64_C( 554.96), SIMDE_FLOAT64_C( -162.83), SIMDE_FLOAT64_C( -156.70), SIMDE_FLOAT64_C( 285.37), SIMDE_FLOAT64_C( 763.23), SIMDE_FLOAT64_C( -953.18) }, { SIMDE_FLOAT64_C( 933.88), SIMDE_FLOAT64_C( -8.95), SIMDE_FLOAT64_C( 703.59), SIMDE_FLOAT64_C( -597.35), SIMDE_FLOAT64_C( -725.52), SIMDE_FLOAT64_C( 527.26), SIMDE_FLOAT64_C( 313.48), SIMDE_FLOAT64_C( -669.68) } }, { { SIMDE_FLOAT64_C( -71.73), SIMDE_FLOAT64_C( -854.09), SIMDE_FLOAT64_C( -211.85), SIMDE_FLOAT64_C( 452.98), SIMDE_FLOAT64_C( 46.99), SIMDE_FLOAT64_C( -617.15), SIMDE_FLOAT64_C( -398.09), SIMDE_FLOAT64_C( -467.35) }, { SIMDE_FLOAT64_C( -991.86), SIMDE_FLOAT64_C( 49.48), SIMDE_FLOAT64_C( -102.75), SIMDE_FLOAT64_C( 881.66), SIMDE_FLOAT64_C( -633.50), SIMDE_FLOAT64_C( 558.63), SIMDE_FLOAT64_C( -656.30), SIMDE_FLOAT64_C( -567.38) }, { SIMDE_FLOAT64_C( -71.73), SIMDE_FLOAT64_C( 854.09), SIMDE_FLOAT64_C( -211.85), SIMDE_FLOAT64_C( 452.98), SIMDE_FLOAT64_C( -46.99), SIMDE_FLOAT64_C( 617.15), SIMDE_FLOAT64_C( -398.09), SIMDE_FLOAT64_C( -467.35) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__m512d r = simde_x_mm512_copysign_pd(a, b); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512d a = simde_test_x86_random_f64x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m512d b = simde_test_x86_random_f64x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m512d r = simde_x_mm512_copysign_pd(a, b); simde_test_x86_write_f64x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f64x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(x_mm512_copysign_ps) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm512_copysign_pd) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/cvt.c000066400000000000000000001533711400333146700162260ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN cvt #include #include #include static int test_simde_mm512_cvtepi16_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m256i r; } test_vec[8] = { { simde_mm512_set_epi16(INT16_C( 14423), INT16_C( 3775), INT16_C( 16156), INT16_C( 17811), INT16_C(-14881), INT16_C(-30283), INT16_C( 27295), INT16_C(-12290), INT16_C( 12394), INT16_C( 32764), INT16_C( 8681), INT16_C( 21255), INT16_C(-21785), INT16_C(-24065), INT16_C(-28005), INT16_C( 15206), INT16_C( 6131), INT16_C(-29323), INT16_C( -9530), INT16_C( -6655), INT16_C( 14785), INT16_C( -9158), INT16_C( 7009), INT16_C( 4834), INT16_C(-15579), INT16_C( 5296), INT16_C( 20054), INT16_C( 12832), INT16_C( 15724), INT16_C( 5918), INT16_C( 25398), INT16_C( 14084)), simde_mm256_set_epi8(INT8_C( 87), INT8_C( -65), INT8_C( 28), INT8_C(-109), INT8_C( -33), INT8_C( -75), INT8_C( -97), INT8_C( -2), INT8_C( 106), INT8_C( -4), INT8_C( -23), INT8_C( 7), INT8_C( -25), INT8_C( -1), INT8_C(-101), INT8_C( 102), INT8_C( -13), INT8_C( 117), INT8_C( -58), INT8_C( 1), INT8_C( -63), INT8_C( 58), INT8_C( 97), INT8_C( -30), INT8_C( 37), INT8_C( -80), INT8_C( 86), INT8_C( 32), INT8_C( 108), INT8_C( 30), INT8_C( 54), INT8_C( 4)) }, { simde_mm512_set_epi16(INT16_C( 8455), INT16_C( 1140), INT16_C(-23383), INT16_C( 22825), INT16_C(-21438), INT16_C( 8713), INT16_C(-25940), INT16_C(-31180), INT16_C(-13214), INT16_C( 10200), INT16_C(-21253), INT16_C( 2612), INT16_C(-27891), INT16_C( 14031), INT16_C( -9014), INT16_C( 10287), INT16_C(-11660), INT16_C( 26858), INT16_C(-19518), INT16_C( 2472), INT16_C( 27637), INT16_C( 14857), INT16_C( 30034), INT16_C(-24153), INT16_C( 31935), INT16_C( -6397), INT16_C( -2502), INT16_C( 31062), INT16_C( 30236), INT16_C( 5156), INT16_C( 18439), INT16_C(-13074)), simde_mm256_set_epi8(INT8_C( 7), INT8_C( 116), INT8_C( -87), INT8_C( 41), INT8_C( 66), INT8_C( 9), INT8_C( -84), INT8_C( 52), INT8_C( 98), INT8_C( -40), INT8_C( -5), INT8_C( 52), INT8_C( 13), INT8_C( -49), INT8_C( -54), INT8_C( 47), INT8_C( 116), INT8_C( -22), INT8_C( -62), INT8_C( -88), INT8_C( -11), INT8_C( 9), INT8_C( 82), INT8_C( -89), INT8_C( -65), INT8_C( 3), INT8_C( 58), INT8_C( 86), INT8_C( 28), INT8_C( 36), INT8_C( 7), INT8_C( -18)) }, { simde_mm512_set_epi16(INT16_C( 18175), INT16_C( -3760), INT16_C( 10318), INT16_C(-31849), INT16_C(-32429), INT16_C(-26500), INT16_C( 24084), INT16_C(-23946), INT16_C( 2525), INT16_C( 2478), INT16_C(-15141), INT16_C(-27410), INT16_C( 30961), INT16_C(-31554), INT16_C( -9533), INT16_C(-20012), INT16_C(-21820), INT16_C( 11767), INT16_C(-17849), INT16_C( 24518), INT16_C(-22206), INT16_C(-24996), INT16_C(-19566), INT16_C( 17826), INT16_C( 25765), INT16_C( 29123), INT16_C( 28065), INT16_C( 1432), INT16_C(-24949), INT16_C( 30580), INT16_C( 20499), INT16_C(-29164)), simde_mm256_set_epi8(INT8_C( -1), INT8_C( 80), INT8_C( 78), INT8_C(-105), INT8_C( 83), INT8_C( 124), INT8_C( 20), INT8_C( 118), INT8_C( -35), INT8_C( -82), INT8_C( -37), INT8_C( -18), INT8_C( -15), INT8_C( -66), INT8_C( -61), INT8_C( -44), INT8_C( -60), INT8_C( -9), INT8_C( 71), INT8_C( -58), INT8_C( 66), INT8_C( 92), INT8_C(-110), INT8_C( -94), INT8_C( -91), INT8_C( -61), INT8_C( -95), INT8_C(-104), INT8_C(-117), INT8_C( 116), INT8_C( 19), INT8_C( 20)) }, { simde_mm512_set_epi16(INT16_C( 10816), INT16_C( 16713), INT16_C( 29707), INT16_C( 15186), INT16_C( 31860), INT16_C(-28520), INT16_C( 18947), INT16_C(-27460), INT16_C( 10883), INT16_C( 310), INT16_C( 8277), INT16_C(-28768), INT16_C( -4553), INT16_C( 23273), INT16_C(-27696), INT16_C(-20678), INT16_C( 13089), INT16_C( -6620), INT16_C( 31575), INT16_C(-20169), INT16_C( 14440), INT16_C( -9264), INT16_C(-26919), INT16_C(-25720), INT16_C(-18371), INT16_C( 25765), INT16_C(-13162), INT16_C(-16808), INT16_C( 5695), INT16_C(-25080), INT16_C( 19142), INT16_C( 3825)), simde_mm256_set_epi8(INT8_C( 64), INT8_C( 73), INT8_C( 11), INT8_C( 82), INT8_C( 116), INT8_C(-104), INT8_C( 3), INT8_C( -68), INT8_C(-125), INT8_C( 54), INT8_C( 85), INT8_C( -96), INT8_C( 55), INT8_C( -23), INT8_C( -48), INT8_C( 58), INT8_C( 33), INT8_C( 36), INT8_C( 87), INT8_C( 55), INT8_C( 104), INT8_C( -48), INT8_C( -39), INT8_C(-120), INT8_C( 61), INT8_C( -91), INT8_C(-106), INT8_C( 88), INT8_C( 63), INT8_C( 8), INT8_C( -58), INT8_C( -15)) }, { simde_mm512_set_epi16(INT16_C( 5079), INT16_C(-24746), INT16_C( 23487), INT16_C(-22087), INT16_C( -8346), INT16_C( 29848), INT16_C( 14241), INT16_C( 18254), INT16_C( -3124), INT16_C(-16186), INT16_C(-13364), INT16_C( 10652), INT16_C( 31028), INT16_C( 21346), INT16_C( 1443), INT16_C(-20222), INT16_C(-17028), INT16_C(-21899), INT16_C( 18933), INT16_C( 6935), INT16_C( 24619), INT16_C( 1737), INT16_C( 12596), INT16_C( 31606), INT16_C(-32691), INT16_C( 11392), INT16_C( 32126), INT16_C(-32712), INT16_C( 20927), INT16_C(-27859), INT16_C( 22640), INT16_C( 8969)), simde_mm256_set_epi8(INT8_C( -41), INT8_C( 86), INT8_C( -65), INT8_C( -71), INT8_C( 102), INT8_C(-104), INT8_C( -95), INT8_C( 78), INT8_C( -52), INT8_C( -58), INT8_C( -52), INT8_C(-100), INT8_C( 52), INT8_C( 98), INT8_C( -93), INT8_C( 2), INT8_C( 124), INT8_C( 117), INT8_C( -11), INT8_C( 23), INT8_C( 43), INT8_C( -55), INT8_C( 52), INT8_C( 118), INT8_C( 77), INT8_C(-128), INT8_C( 126), INT8_C( 56), INT8_C( -65), INT8_C( 45), INT8_C( 112), INT8_C( 9)) }, { simde_mm512_set_epi16(INT16_C( 6901), INT16_C(-23435), INT16_C(-26040), INT16_C(-11295), INT16_C( 623), INT16_C(-23058), INT16_C( 17549), INT16_C(-23291), INT16_C( 17215), INT16_C( -4892), INT16_C( -849), INT16_C( 21086), INT16_C(-13056), INT16_C( 19549), INT16_C( 16492), INT16_C(-22767), INT16_C(-24079), INT16_C( 6429), INT16_C( 15302), INT16_C( -9175), INT16_C( 17671), INT16_C(-29856), INT16_C(-12718), INT16_C(-22914), INT16_C(-19613), INT16_C( 14088), INT16_C(-10443), INT16_C( 31757), INT16_C( 24994), INT16_C( 24174), INT16_C( -9596), INT16_C(-22481)), simde_mm256_set_epi8(INT8_C( -11), INT8_C( 117), INT8_C( 72), INT8_C( -31), INT8_C( 111), INT8_C( -18), INT8_C(-115), INT8_C( 5), INT8_C( 63), INT8_C( -28), INT8_C( -81), INT8_C( 94), INT8_C( 0), INT8_C( 93), INT8_C( 108), INT8_C( 17), INT8_C( -15), INT8_C( 29), INT8_C( -58), INT8_C( 41), INT8_C( 7), INT8_C( 96), INT8_C( 82), INT8_C( 126), INT8_C( 99), INT8_C( 8), INT8_C( 53), INT8_C( 13), INT8_C( -94), INT8_C( 110), INT8_C(-124), INT8_C( 47)) }, { simde_mm512_set_epi16(INT16_C( 15520), INT16_C( 15679), INT16_C( 8541), INT16_C(-20376), INT16_C( 8861), INT16_C( 12926), INT16_C( 25712), INT16_C( -8433), INT16_C( -7066), INT16_C(-23691), INT16_C(-20251), INT16_C( 18056), INT16_C( 5498), INT16_C(-18751), INT16_C(-26321), INT16_C( 7918), INT16_C( 1647), INT16_C( 21774), INT16_C( 5430), INT16_C(-19512), INT16_C(-14894), INT16_C( 12466), INT16_C( -9612), INT16_C(-23130), INT16_C( 18357), INT16_C( 32349), INT16_C(-25760), INT16_C( -6559), INT16_C(-24198), INT16_C( 13614), INT16_C( 13473), INT16_C(-25578)), simde_mm256_set_epi8(INT8_C( -96), INT8_C( 63), INT8_C( 93), INT8_C( 104), INT8_C( -99), INT8_C( 126), INT8_C( 112), INT8_C( 15), INT8_C( 102), INT8_C( 117), INT8_C( -27), INT8_C(-120), INT8_C( 122), INT8_C( -63), INT8_C( 47), INT8_C( -18), INT8_C( 111), INT8_C( 14), INT8_C( 54), INT8_C( -56), INT8_C( -46), INT8_C( -78), INT8_C( 116), INT8_C( -90), INT8_C( -75), INT8_C( 93), INT8_C( 96), INT8_C( 97), INT8_C( 122), INT8_C( 46), INT8_C( -95), INT8_C( 22)) }, { simde_mm512_set_epi16(INT16_C(-13944), INT16_C( 30422), INT16_C( 10523), INT16_C( 28986), INT16_C(-23789), INT16_C(-20754), INT16_C( 29282), INT16_C(-10845), INT16_C( 10721), INT16_C( 2777), INT16_C(-18838), INT16_C( 8324), INT16_C( 19192), INT16_C( 114), INT16_C( -9073), INT16_C( 2615), INT16_C( 21008), INT16_C( 12652), INT16_C(-14859), INT16_C( 5734), INT16_C( -5598), INT16_C(-10707), INT16_C( 2170), INT16_C( 23903), INT16_C( 29988), INT16_C( 24405), INT16_C( 5383), INT16_C(-29994), INT16_C( 7143), INT16_C( 22270), INT16_C( -1480), INT16_C( 15491)), simde_mm256_set_epi8(INT8_C(-120), INT8_C( -42), INT8_C( 27), INT8_C( 58), INT8_C( 19), INT8_C( -18), INT8_C( 98), INT8_C( -93), INT8_C( -31), INT8_C( -39), INT8_C( 106), INT8_C(-124), INT8_C( -8), INT8_C( 114), INT8_C(-113), INT8_C( 55), INT8_C( 16), INT8_C( 108), INT8_C( -11), INT8_C( 102), INT8_C( 34), INT8_C( 45), INT8_C( 122), INT8_C( 95), INT8_C( 36), INT8_C( 85), INT8_C( 7), INT8_C( -42), INT8_C( -25), INT8_C( -2), INT8_C( 56), INT8_C(-125)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm512_cvtepi16_epi8(test_vec[i].a); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_cvtepi16_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i src; simde__mmask32 k; simde__m512i a; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi8(INT8_C(-112), INT8_C( 50), INT8_C( -90), INT8_C( -47), INT8_C( 24), INT8_C( -14), INT8_C( -76), INT8_C( -4), INT8_C(-104), INT8_C( 115), INT8_C( -75), INT8_C( 98), INT8_C( -58), INT8_C( -14), INT8_C( 98), INT8_C( 29), INT8_C( -34), INT8_C( 91), INT8_C( -9), INT8_C( -32), INT8_C( 105), INT8_C( -54), INT8_C( 11), INT8_C( 76), INT8_C( 83), INT8_C( 3), INT8_C( 48), INT8_C( 2), INT8_C( 92), INT8_C( -54), INT8_C( 99), INT8_C( 95)), UINT32_C( 36055), simde_mm512_set_epi16(INT16_C( 29253), INT16_C(-14914), INT16_C( 8284), INT16_C( 18521), INT16_C( 32034), INT16_C( 27278), INT16_C( -3730), INT16_C( -7695), INT16_C( 8989), INT16_C(-29300), INT16_C(-14890), INT16_C( 11419), INT16_C( -1355), INT16_C( 25284), INT16_C(-28026), INT16_C( 1548), INT16_C( 26140), INT16_C( -8634), INT16_C( 26242), INT16_C( 1035), INT16_C(-29578), INT16_C( -2997), INT16_C( 22546), INT16_C(-28782), INT16_C(-11973), INT16_C( 12912), INT16_C(-22923), INT16_C(-12898), INT16_C( 4984), INT16_C( 989), INT16_C( 2511), INT16_C( 26483)), simde_mm256_set_epi8(INT8_C(-112), INT8_C( 50), INT8_C( -90), INT8_C( -47), INT8_C( 24), INT8_C( -14), INT8_C( -76), INT8_C( -4), INT8_C(-104), INT8_C( 115), INT8_C( -75), INT8_C( 98), INT8_C( -58), INT8_C( -14), INT8_C( 98), INT8_C( 29), INT8_C( 28), INT8_C( 91), INT8_C( -9), INT8_C( -32), INT8_C( 118), INT8_C( 75), INT8_C( 11), INT8_C( 76), INT8_C( 59), INT8_C( 112), INT8_C( 48), INT8_C( -98), INT8_C( 92), INT8_C( -35), INT8_C( -49), INT8_C( 115)) }, { simde_mm256_set_epi8(INT8_C( -93), INT8_C( -75), INT8_C( 109), INT8_C( 43), INT8_C( -79), INT8_C( -91), INT8_C( -13), INT8_C( 103), INT8_C( -6), INT8_C( -39), INT8_C( 3), INT8_C(-115), INT8_C( 30), INT8_C( -62), INT8_C( 30), INT8_C( 115), INT8_C( -28), INT8_C( -92), INT8_C( 110), INT8_C( -10), INT8_C( 20), INT8_C( -82), INT8_C( 59), INT8_C( 62), INT8_C( 57), INT8_C( 97), INT8_C( 29), INT8_C( -4), INT8_C( -48), INT8_C( 1), INT8_C( 47), INT8_C( 43)), UINT32_C( 13848), simde_mm512_set_epi16(INT16_C( 19920), INT16_C( 28417), INT16_C(-26944), INT16_C( -1327), INT16_C(-18966), INT16_C(-19374), INT16_C( 9639), INT16_C(-25572), INT16_C(-16315), INT16_C( 16363), INT16_C( -4686), INT16_C(-14474), INT16_C( 26743), INT16_C( 20737), INT16_C(-16355), INT16_C( 24251), INT16_C(-20830), INT16_C( 19809), INT16_C(-32085), INT16_C(-29115), INT16_C(-21999), INT16_C( 14843), INT16_C( 13075), INT16_C(-28846), INT16_C(-12894), INT16_C( 31357), INT16_C( 16553), INT16_C(-16546), INT16_C(-16544), INT16_C( 30528), INT16_C( -9494), INT16_C( 8241)), simde_mm256_set_epi8(INT8_C( -93), INT8_C( -75), INT8_C( 109), INT8_C( 43), INT8_C( -79), INT8_C( -91), INT8_C( -13), INT8_C( 103), INT8_C( -6), INT8_C( -39), INT8_C( 3), INT8_C(-115), INT8_C( 30), INT8_C( -62), INT8_C( 30), INT8_C( 115), INT8_C( -28), INT8_C( -92), INT8_C( -85), INT8_C( 69), INT8_C( 20), INT8_C( -5), INT8_C( 19), INT8_C( 62), INT8_C( 57), INT8_C( 97), INT8_C( 29), INT8_C( 94), INT8_C( 96), INT8_C( 1), INT8_C( 47), INT8_C( 43)) }, { simde_mm256_set_epi8(INT8_C( 57), INT8_C( 119), INT8_C( 6), INT8_C( -62), INT8_C( -27), INT8_C( -22), INT8_C( -69), INT8_C( -61), INT8_C( 8), INT8_C(-101), INT8_C( -24), INT8_C( 69), INT8_C(-111), INT8_C( 66), INT8_C( -48), INT8_C(-122), INT8_C( -19), INT8_C( -25), INT8_C( -88), INT8_C( 96), INT8_C( -81), INT8_C( 28), INT8_C( -73), INT8_C(-105), INT8_C( 109), INT8_C( -84), INT8_C( 26), INT8_C( 108), INT8_C( 16), INT8_C( 69), INT8_C( -67), INT8_C(-122)), UINT32_C( 52950), simde_mm512_set_epi16(INT16_C(-28100), INT16_C( 2824), INT16_C(-32113), INT16_C(-30059), INT16_C(-19864), INT16_C(-29923), INT16_C( 19573), INT16_C(-11183), INT16_C(-18980), INT16_C( 26281), INT16_C( -7946), INT16_C( 14491), INT16_C( 28715), INT16_C( 26138), INT16_C( 16023), INT16_C( 24398), INT16_C( 20578), INT16_C( -1642), INT16_C( 24774), INT16_C( 26937), INT16_C(-19881), INT16_C(-20408), INT16_C( 26365), INT16_C( -2980), INT16_C( -4479), INT16_C(-10298), INT16_C( 13784), INT16_C(-25535), INT16_C(-26583), INT16_C(-31618), INT16_C( -202), INT16_C( 28295)), simde_mm256_set_epi8(INT8_C( 57), INT8_C( 119), INT8_C( 6), INT8_C( -62), INT8_C( -27), INT8_C( -22), INT8_C( -69), INT8_C( -61), INT8_C( 8), INT8_C(-101), INT8_C( -24), INT8_C( 69), INT8_C(-111), INT8_C( 66), INT8_C( -48), INT8_C(-122), INT8_C( 98), INT8_C(-106), INT8_C( -88), INT8_C( 96), INT8_C( 87), INT8_C( 72), INT8_C( -3), INT8_C(-105), INT8_C(-127), INT8_C( -58), INT8_C( 26), INT8_C( 65), INT8_C( 16), INT8_C( 126), INT8_C( 54), INT8_C(-122)) }, { simde_mm256_set_epi8(INT8_C( 89), INT8_C( 16), INT8_C( 86), INT8_C( 124), INT8_C(-106), INT8_C( 54), INT8_C( 30), INT8_C( -60), INT8_C( 41), INT8_C( 45), INT8_C(-103), INT8_C( -75), INT8_C( -46), INT8_C( -2), INT8_C( 119), INT8_C( 28), INT8_C( 69), INT8_C( -84), INT8_C( 78), INT8_C( -36), INT8_C( 42), INT8_C( -59), INT8_C( 42), INT8_C( 5), INT8_C( -74), INT8_C( -70), INT8_C( 107), INT8_C( 22), INT8_C( 91), INT8_C( 10), INT8_C( -44), INT8_C( 28)), UINT32_C( 4183), simde_mm512_set_epi16(INT16_C( 8531), INT16_C( 2537), INT16_C( 7090), INT16_C( 32184), INT16_C( 918), INT16_C( -4406), INT16_C( -1230), INT16_C(-20248), INT16_C( 28454), INT16_C( -8033), INT16_C( 29491), INT16_C( 9038), INT16_C( 31537), INT16_C(-32476), INT16_C( 15213), INT16_C( 2771), INT16_C( 9158), INT16_C( 15700), INT16_C( 24392), INT16_C(-14500), INT16_C( 20701), INT16_C( -9424), INT16_C( -5862), INT16_C( 8150), INT16_C(-14293), INT16_C( 29409), INT16_C(-21051), INT16_C(-16951), INT16_C(-32102), INT16_C(-16442), INT16_C( 4517), INT16_C(-32738)), simde_mm256_set_epi8(INT8_C( 89), INT8_C( 16), INT8_C( 86), INT8_C( 124), INT8_C(-106), INT8_C( 54), INT8_C( 30), INT8_C( -60), INT8_C( 41), INT8_C( 45), INT8_C(-103), INT8_C( -75), INT8_C( -46), INT8_C( -2), INT8_C( 119), INT8_C( 28), INT8_C( 69), INT8_C( -84), INT8_C( 78), INT8_C( 92), INT8_C( 42), INT8_C( -59), INT8_C( 42), INT8_C( 5), INT8_C( -74), INT8_C( -31), INT8_C( 107), INT8_C( -55), INT8_C( 91), INT8_C( -58), INT8_C( -91), INT8_C( 30)) }, { simde_mm256_set_epi8(INT8_C( 66), INT8_C( -53), INT8_C( -22), INT8_C(-109), INT8_C(-122), INT8_C( -34), INT8_C( 49), INT8_C( -51), INT8_C( 45), INT8_C( 96), INT8_C( 21), INT8_C( 9), INT8_C(-107), INT8_C( 88), INT8_C( 41), INT8_C( 63), INT8_C( -15), INT8_C( 66), INT8_C( -60), INT8_C( 80), INT8_C( -27), INT8_C( 9), INT8_C( 30), INT8_C( -73), INT8_C( -55), INT8_C( -22), INT8_C(-122), INT8_C( 86), INT8_C( -35), INT8_C( -54), INT8_C( 95), INT8_C( -17)), UINT32_C( 34749), simde_mm512_set_epi16(INT16_C( 6349), INT16_C( -1940), INT16_C( 12009), INT16_C( 26974), INT16_C( 15374), INT16_C( 6913), INT16_C(-19915), INT16_C(-14530), INT16_C(-31337), INT16_C( 22983), INT16_C( 6281), INT16_C( -506), INT16_C(-24168), INT16_C(-22228), INT16_C(-32449), INT16_C(-30658), INT16_C(-16400), INT16_C( -7823), INT16_C( -6600), INT16_C( -5428), INT16_C( 10840), INT16_C(-16201), INT16_C(-15359), INT16_C(-30650), INT16_C( 6966), INT16_C(-30042), INT16_C( 32539), INT16_C(-32588), INT16_C(-23367), INT16_C(-13235), INT16_C(-19835), INT16_C( 15017)), simde_mm256_set_epi8(INT8_C( 66), INT8_C( -53), INT8_C( -22), INT8_C(-109), INT8_C(-122), INT8_C( -34), INT8_C( 49), INT8_C( -51), INT8_C( 45), INT8_C( 96), INT8_C( 21), INT8_C( 9), INT8_C(-107), INT8_C( 88), INT8_C( 41), INT8_C( 63), INT8_C( -16), INT8_C( 66), INT8_C( -60), INT8_C( 80), INT8_C( -27), INT8_C( -73), INT8_C( 1), INT8_C( 70), INT8_C( 54), INT8_C( -22), INT8_C( 27), INT8_C( -76), INT8_C( -71), INT8_C( 77), INT8_C( 95), INT8_C( -87)) }, { simde_mm256_set_epi8(INT8_C(-124), INT8_C( 59), INT8_C( -81), INT8_C( 66), INT8_C( -65), INT8_C( -38), INT8_C( -36), INT8_C( 5), INT8_C( 15), INT8_C( 28), INT8_C( -18), INT8_C( -54), INT8_C( 82), INT8_C( 30), INT8_C(-110), INT8_C(-114), INT8_C( 3), INT8_C( 71), INT8_C( 64), INT8_C( 21), INT8_C( 115), INT8_C( 123), INT8_C( -22), INT8_C(-111), INT8_C( -10), INT8_C( 18), INT8_C( 3), INT8_C( -8), INT8_C( -97), INT8_C( 26), INT8_C( 72), INT8_C( -94)), UINT32_C( 31044), simde_mm512_set_epi16(INT16_C(-26750), INT16_C(-23902), INT16_C( 29963), INT16_C( 2819), INT16_C( 9258), INT16_C( 16800), INT16_C(-21230), INT16_C( -2332), INT16_C(-12889), INT16_C( 23107), INT16_C( 17922), INT16_C( 3552), INT16_C( 16956), INT16_C(-21244), INT16_C( -9865), INT16_C( 24672), INT16_C(-32513), INT16_C( -3970), INT16_C( 14993), INT16_C(-21626), INT16_C(-29335), INT16_C( -2219), INT16_C( 4209), INT16_C( 11969), INT16_C( -6560), INT16_C(-26729), INT16_C( 7233), INT16_C( 27170), INT16_C( 5881), INT16_C( -9473), INT16_C(-30967), INT16_C( 3275)), simde_mm256_set_epi8(INT8_C(-124), INT8_C( 59), INT8_C( -81), INT8_C( 66), INT8_C( -65), INT8_C( -38), INT8_C( -36), INT8_C( 5), INT8_C( 15), INT8_C( 28), INT8_C( -18), INT8_C( -54), INT8_C( 82), INT8_C( 30), INT8_C(-110), INT8_C(-114), INT8_C( 3), INT8_C( 126), INT8_C(-111), INT8_C(-122), INT8_C( 105), INT8_C( 123), INT8_C( -22), INT8_C( -63), INT8_C( -10), INT8_C(-105), INT8_C( 3), INT8_C( -8), INT8_C( -97), INT8_C( -1), INT8_C( 72), INT8_C( -94)) }, { simde_mm256_set_epi8(INT8_C( 76), INT8_C( -68), INT8_C( 3), INT8_C( 100), INT8_C( 64), INT8_C( -71), INT8_C( -39), INT8_C( 30), INT8_C( 110), INT8_C( 44), INT8_C( 96), INT8_C( 10), INT8_C( 66), INT8_C( 40), INT8_C( 31), INT8_C( -85), INT8_C( 120), INT8_C( 70), INT8_C( -37), INT8_C( -25), INT8_C( 51), INT8_C( -19), INT8_C( 124), INT8_C( -52), INT8_C( 69), INT8_C( 107), INT8_C( 96), INT8_C( 106), INT8_C(-126), INT8_C( 61), INT8_C( -71), INT8_C( 9)), UINT32_C( 63997), simde_mm512_set_epi16(INT16_C( 25271), INT16_C( 20153), INT16_C(-23804), INT16_C(-24091), INT16_C( 6064), INT16_C( 3189), INT16_C( -2682), INT16_C( 5283), INT16_C( 14900), INT16_C( 731), INT16_C(-14623), INT16_C( 14729), INT16_C( -3836), INT16_C( 26379), INT16_C( 13131), INT16_C( 14975), INT16_C( 19045), INT16_C( 14845), INT16_C(-21672), INT16_C( 4155), INT16_C( 9032), INT16_C(-30375), INT16_C( 14167), INT16_C( 25860), INT16_C( -6683), INT16_C(-21473), INT16_C( -6588), INT16_C( 22432), INT16_C( -4408), INT16_C( -2180), INT16_C( 26333), INT16_C( 18369)), simde_mm256_set_epi8(INT8_C( 76), INT8_C( -68), INT8_C( 3), INT8_C( 100), INT8_C( 64), INT8_C( -71), INT8_C( -39), INT8_C( 30), INT8_C( 110), INT8_C( 44), INT8_C( 96), INT8_C( 10), INT8_C( 66), INT8_C( 40), INT8_C( 31), INT8_C( -85), INT8_C( 101), INT8_C( -3), INT8_C( 88), INT8_C( 59), INT8_C( 72), INT8_C( -19), INT8_C( 124), INT8_C( 4), INT8_C( -27), INT8_C( 31), INT8_C( 68), INT8_C( -96), INT8_C( -56), INT8_C( 124), INT8_C( -71), INT8_C( -63)) }, { simde_mm256_set_epi8(INT8_C( 40), INT8_C( -41), INT8_C(-126), INT8_C( 8), INT8_C(-115), INT8_C( 108), INT8_C( 31), INT8_C( 41), INT8_C( -21), INT8_C( -60), INT8_C( 76), INT8_C( 74), INT8_C( 86), INT8_C( 39), INT8_C( 41), INT8_C( -61), INT8_C( 120), INT8_C( -6), INT8_C(-117), INT8_C( 43), INT8_C( 64), INT8_C( -40), INT8_C( -63), INT8_C( 39), INT8_C( 82), INT8_C( -3), INT8_C( -8), INT8_C(-102), INT8_C( 21), INT8_C(-109), INT8_C( -6), INT8_C( 102)), UINT32_C( 16734), simde_mm512_set_epi16(INT16_C(-25905), INT16_C( 19727), INT16_C( 28735), INT16_C( 3852), INT16_C(-23084), INT16_C( -6530), INT16_C( -1505), INT16_C( 9601), INT16_C( -7362), INT16_C( 8505), INT16_C(-26382), INT16_C( 25139), INT16_C( 4198), INT16_C( -1011), INT16_C( -5955), INT16_C( 29084), INT16_C( 25996), INT16_C( 30463), INT16_C( -4775), INT16_C( 11032), INT16_C(-28689), INT16_C(-14740), INT16_C( -1416), INT16_C( 8406), INT16_C(-23209), INT16_C( 25079), INT16_C( 23521), INT16_C( 23507), INT16_C( 15383), INT16_C(-27993), INT16_C( 2371), INT16_C(-19992)), simde_mm256_set_epi8(INT8_C( 40), INT8_C( -41), INT8_C(-126), INT8_C( 8), INT8_C(-115), INT8_C( 108), INT8_C( 31), INT8_C( 41), INT8_C( -21), INT8_C( -60), INT8_C( 76), INT8_C( 74), INT8_C( 86), INT8_C( 39), INT8_C( 41), INT8_C( -61), INT8_C( 120), INT8_C( -1), INT8_C(-117), INT8_C( 43), INT8_C( 64), INT8_C( -40), INT8_C( -63), INT8_C( -42), INT8_C( 82), INT8_C( -9), INT8_C( -8), INT8_C( -45), INT8_C( 23), INT8_C( -89), INT8_C( 67), INT8_C( 102)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm512_mask_cvtepi16_epi8(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_cvtepi16_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask32 k; simde__m512i a; simde__m256i r; } test_vec[8] = { { UINT32_C( 25439), simde_mm512_set_epi16(INT16_C( 26140), INT16_C( -8634), INT16_C( 26242), INT16_C( 1035), INT16_C(-29578), INT16_C( -2997), INT16_C( 22546), INT16_C(-28782), INT16_C(-11973), INT16_C( 12912), INT16_C(-22923), INT16_C(-12898), INT16_C( 4984), INT16_C( 989), INT16_C( 2511), INT16_C( 26483), INT16_C(-18247), INT16_C( 15612), INT16_C( -5009), INT16_C(-29481), INT16_C(-28622), INT16_C(-22831), INT16_C( 6386), INT16_C(-19204), INT16_C(-26509), INT16_C(-19102), INT16_C(-14606), INT16_C( 25117), INT16_C( -8613), INT16_C( -2080), INT16_C( 27082), INT16_C( 2892)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -4), INT8_C( 111), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -14), INT8_C( -4), INT8_C( 0), INT8_C( 98), INT8_C( 0), INT8_C( 29), INT8_C( 91), INT8_C( -32), INT8_C( -54), INT8_C( 76)) }, { UINT32_C( 1548), simde_mm512_set_epi16(INT16_C( -5255), INT16_C( 10791), INT16_C(-28009), INT16_C( 13848), INT16_C(-23627), INT16_C( 27947), INT16_C(-20059), INT16_C( -3225), INT16_C( -1319), INT16_C( 909), INT16_C( 7874), INT16_C( 7795), INT16_C( -7004), INT16_C( 28406), INT16_C( 5294), INT16_C( 15166), INT16_C( 14689), INT16_C( 7676), INT16_C(-12287), INT16_C( 12075), INT16_C( 29253), INT16_C(-14914), INT16_C( 8284), INT16_C( 18521), INT16_C( 32034), INT16_C( 27278), INT16_C( -3730), INT16_C( -7695), INT16_C( 8989), INT16_C(-29300), INT16_C(-14890), INT16_C( 11419)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -66), INT8_C( 92), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 29), INT8_C(-116), INT8_C( 0), INT8_C( 0)) }, { UINT32_C( 8241), simde_mm512_set_epi16(INT16_C( 28076), INT16_C( 6764), INT16_C( 4165), INT16_C(-17018), INT16_C( 19920), INT16_C( 28417), INT16_C(-26944), INT16_C( -1327), INT16_C(-18966), INT16_C(-19374), INT16_C( 9639), INT16_C(-25572), INT16_C(-16315), INT16_C( 16363), INT16_C( -4686), INT16_C(-14474), INT16_C( 26743), INT16_C( 20737), INT16_C(-16355), INT16_C( 24251), INT16_C(-20830), INT16_C( 19809), INT16_C(-32085), INT16_C(-29115), INT16_C(-21999), INT16_C( 14843), INT16_C( 13075), INT16_C(-28846), INT16_C(-12894), INT16_C( 31357), INT16_C( 16553), INT16_C(-16546)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 29), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 19), INT8_C( 82), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 94)) }, { UINT32_C( 46999), simde_mm512_set_epi16(INT16_C( 28715), INT16_C( 26138), INT16_C( 16023), INT16_C( 24398), INT16_C( 20578), INT16_C( -1642), INT16_C( 24774), INT16_C( 26937), INT16_C(-19881), INT16_C(-20408), INT16_C( 26365), INT16_C( -2980), INT16_C( -4479), INT16_C(-10298), INT16_C( 13784), INT16_C(-25535), INT16_C(-26583), INT16_C(-31618), INT16_C( -202), INT16_C( 28295), INT16_C(-12554), INT16_C( -5929), INT16_C(-27764), INT16_C(-12586), INT16_C( 14711), INT16_C( 1730), INT16_C( -6678), INT16_C(-17469), INT16_C( 2203), INT16_C( -6075), INT16_C(-28350), INT16_C(-12154)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 41), INT8_C( 0), INT8_C( 54), INT8_C(-121), INT8_C( 0), INT8_C( -41), INT8_C(-116), INT8_C( -42), INT8_C( 119), INT8_C( 0), INT8_C( 0), INT8_C( -61), INT8_C( 0), INT8_C( 69), INT8_C( 66), INT8_C(-122)) }, { UINT32_C( 14491), simde_mm512_set_epi16(INT16_C(-32102), INT16_C(-16442), INT16_C( 4517), INT16_C(-32738), INT16_C( -320), INT16_C( 2839), INT16_C( 18963), INT16_C( 4183), INT16_C( 22800), INT16_C( 22140), INT16_C(-27082), INT16_C( 7876), INT16_C( 10541), INT16_C(-26187), INT16_C(-11522), INT16_C( 30492), INT16_C( 17836), INT16_C( 20188), INT16_C( 10949), INT16_C( 10757), INT16_C(-18758), INT16_C( 27414), INT16_C( 23306), INT16_C(-11236), INT16_C(-28100), INT16_C( 2824), INT16_C(-32113), INT16_C(-30059), INT16_C(-19864), INT16_C(-29923), INT16_C( 19573), INT16_C(-11183)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -59), INT8_C( 5), INT8_C( -70), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 60), INT8_C( 0), INT8_C( 0), INT8_C(-107), INT8_C( 104), INT8_C( 0), INT8_C( 117), INT8_C( 81)) }, { UINT32_C( 48585), simde_mm512_set_epi16(INT16_C( -3774), INT16_C(-15280), INT16_C( -6903), INT16_C( 7863), INT16_C(-13846), INT16_C(-31146), INT16_C( -8758), INT16_C( 24559), INT16_C( 8531), INT16_C( 2537), INT16_C( 7090), INT16_C( 32184), INT16_C( 918), INT16_C( -4406), INT16_C( -1230), INT16_C(-20248), INT16_C( 28454), INT16_C( -8033), INT16_C( 29491), INT16_C( 9038), INT16_C( 31537), INT16_C(-32476), INT16_C( 15213), INT16_C( 2771), INT16_C( 9158), INT16_C( 15700), INT16_C( 24392), INT16_C(-14500), INT16_C( 20701), INT16_C( -9424), INT16_C( -5862), INT16_C( 8150)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 38), INT8_C( 0), INT8_C( 51), INT8_C( 78), INT8_C( 49), INT8_C( 36), INT8_C( 0), INT8_C( -45), INT8_C( -58), INT8_C( 84), INT8_C( 0), INT8_C( 0), INT8_C( -35), INT8_C( 0), INT8_C( 0), INT8_C( -42)) }, { UINT32_C( 10559), simde_mm512_set_epi16(INT16_C(-31337), INT16_C( 22983), INT16_C( 6281), INT16_C( -506), INT16_C(-24168), INT16_C(-22228), INT16_C(-32449), INT16_C(-30658), INT16_C(-16400), INT16_C( -7823), INT16_C( -6600), INT16_C( -5428), INT16_C( 10840), INT16_C(-16201), INT16_C(-15359), INT16_C(-30650), INT16_C( 6966), INT16_C(-30042), INT16_C( 32539), INT16_C(-32588), INT16_C(-23367), INT16_C(-13235), INT16_C(-19835), INT16_C( 15017), INT16_C( -4677), INT16_C(-14834), INT16_C( 9957), INT16_C(-30787), INT16_C( 17099), INT16_C( -5485), INT16_C(-31010), INT16_C( 12749)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 27), INT8_C( 0), INT8_C( -71), INT8_C( 0), INT8_C( 0), INT8_C( -87), INT8_C( 0), INT8_C( 0), INT8_C( -27), INT8_C( -67), INT8_C( -53), INT8_C(-109), INT8_C( -34), INT8_C( -51)) }, { UINT32_C( 51006), simde_mm512_set_epi16(INT16_C( -6560), INT16_C(-26729), INT16_C( 7233), INT16_C( 27170), INT16_C( 5881), INT16_C( -9473), INT16_C(-30967), INT16_C( 3275), INT16_C( -2646), INT16_C( 14621), INT16_C( 19871), INT16_C( 31044), INT16_C(-31685), INT16_C(-20670), INT16_C(-16422), INT16_C( -9211), INT16_C( 3868), INT16_C( -4406), INT16_C( 21022), INT16_C(-28018), INT16_C( 839), INT16_C( 16405), INT16_C( 29563), INT16_C( -5487), INT16_C( -2542), INT16_C( 1016), INT16_C(-24806), INT16_C( 18594), INT16_C( 6349), INT16_C( -1940), INT16_C( 12009), INT16_C( 26974)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 28), INT8_C( -54), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 21), INT8_C( 123), INT8_C(-111), INT8_C( 0), INT8_C( 0), INT8_C( 26), INT8_C( -94), INT8_C( -51), INT8_C( 108), INT8_C( -23), INT8_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm512_maskz_cvtepi16_epi8(test_vec[i].k, test_vec[i].a); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_cvtepi8_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m512i r; } test_vec[8] = { { simde_mm256_set_epi8(INT8_C( 7), INT8_C( 68), INT8_C( -86), INT8_C( -36), INT8_C( -19), INT8_C( 73), INT8_C( 92), INT8_C( -27), INT8_C( 55), INT8_C( -65), INT8_C( -50), INT8_C( 19), INT8_C(-111), INT8_C( -79), INT8_C( -16), INT8_C( 70), INT8_C( 27), INT8_C( -28), INT8_C( 116), INT8_C( 42), INT8_C( -4), INT8_C( 78), INT8_C( 31), INT8_C( 51), INT8_C( 92), INT8_C( 39), INT8_C(-125), INT8_C( 94), INT8_C( -78), INT8_C( 67), INT8_C( -43), INT8_C( -71)), simde_mm512_set_epi16(INT16_C( 7), INT16_C( 68), INT16_C( -86), INT16_C( -36), INT16_C( -19), INT16_C( 73), INT16_C( 92), INT16_C( -27), INT16_C( 55), INT16_C( -65), INT16_C( -50), INT16_C( 19), INT16_C( -111), INT16_C( -79), INT16_C( -16), INT16_C( 70), INT16_C( 27), INT16_C( -28), INT16_C( 116), INT16_C( 42), INT16_C( -4), INT16_C( 78), INT16_C( 31), INT16_C( 51), INT16_C( 92), INT16_C( 39), INT16_C( -125), INT16_C( 94), INT16_C( -78), INT16_C( 67), INT16_C( -43), INT16_C( -71)) }, { simde_mm256_set_epi8(INT8_C( 29), INT8_C( -37), INT8_C( 27), INT8_C( 10), INT8_C( -22), INT8_C( -9), INT8_C(-125), INT8_C( -3), INT8_C( -53), INT8_C( 92), INT8_C( 103), INT8_C( 92), INT8_C( 123), INT8_C( 74), INT8_C( 36), INT8_C( 59), INT8_C( 46), INT8_C( -29), INT8_C(-103), INT8_C( -4), INT8_C( 109), INT8_C( -54), INT8_C( 41), INT8_C( 79), INT8_C( 15), INT8_C( -92), INT8_C( 102), INT8_C( 116), INT8_C( -42), INT8_C( 52), INT8_C( -61), INT8_C( -99)), simde_mm512_set_epi16(INT16_C( 29), INT16_C( -37), INT16_C( 27), INT16_C( 10), INT16_C( -22), INT16_C( -9), INT16_C( -125), INT16_C( -3), INT16_C( -53), INT16_C( 92), INT16_C( 103), INT16_C( 92), INT16_C( 123), INT16_C( 74), INT16_C( 36), INT16_C( 59), INT16_C( 46), INT16_C( -29), INT16_C( -103), INT16_C( -4), INT16_C( 109), INT16_C( -54), INT16_C( 41), INT16_C( 79), INT16_C( 15), INT16_C( -92), INT16_C( 102), INT16_C( 116), INT16_C( -42), INT16_C( 52), INT16_C( -61), INT16_C( -99)) }, { simde_mm256_set_epi8(INT8_C( -9), INT8_C( -47), INT8_C( 107), INT8_C( -74), INT8_C(-126), INT8_C( 34), INT8_C( 64), INT8_C( 115), INT8_C( -65), INT8_C(-124), INT8_C( 54), INT8_C( 27), INT8_C( 41), INT8_C( 112), INT8_C( 61), INT8_C( 6), INT8_C( 7), INT8_C( 39), INT8_C(-109), INT8_C( -99), INT8_C( 63), INT8_C( -35), INT8_C(-111), INT8_C( -72), INT8_C( 109), INT8_C( -39), INT8_C( -99), INT8_C( 26), INT8_C( 66), INT8_C( -78), INT8_C( 30), INT8_C( 38)), simde_mm512_set_epi16(INT16_C( -9), INT16_C( -47), INT16_C( 107), INT16_C( -74), INT16_C( -126), INT16_C( 34), INT16_C( 64), INT16_C( 115), INT16_C( -65), INT16_C( -124), INT16_C( 54), INT16_C( 27), INT16_C( 41), INT16_C( 112), INT16_C( 61), INT16_C( 6), INT16_C( 7), INT16_C( 39), INT16_C( -109), INT16_C( -99), INT16_C( 63), INT16_C( -35), INT16_C( -111), INT16_C( -72), INT16_C( 109), INT16_C( -39), INT16_C( -99), INT16_C( 26), INT16_C( 66), INT16_C( -78), INT16_C( 30), INT16_C( 38)) }, { simde_mm256_set_epi8(INT8_C( -72), INT8_C( -80), INT8_C( 101), INT8_C( 81), INT8_C( 23), INT8_C( -68), INT8_C( -57), INT8_C(-111), INT8_C( -3), INT8_C( 21), INT8_C( 121), INT8_C( -22), INT8_C(-104), INT8_C( -10), INT8_C( -37), INT8_C( 66), INT8_C( -93), INT8_C( -80), INT8_C( 34), INT8_C( 104), INT8_C( -39), INT8_C( -99), INT8_C( 18), INT8_C( 110), INT8_C(-118), INT8_C( 38), INT8_C( 112), INT8_C( -67), INT8_C( 60), INT8_C( 47), INT8_C( 32), INT8_C( 33)), simde_mm512_set_epi16(INT16_C( -72), INT16_C( -80), INT16_C( 101), INT16_C( 81), INT16_C( 23), INT16_C( -68), INT16_C( -57), INT16_C( -111), INT16_C( -3), INT16_C( 21), INT16_C( 121), INT16_C( -22), INT16_C( -104), INT16_C( -10), INT16_C( -37), INT16_C( 66), INT16_C( -93), INT16_C( -80), INT16_C( 34), INT16_C( 104), INT16_C( -39), INT16_C( -99), INT16_C( 18), INT16_C( 110), INT16_C( -118), INT16_C( 38), INT16_C( 112), INT16_C( -67), INT16_C( 60), INT16_C( 47), INT16_C( 32), INT16_C( 33)) }, { simde_mm256_set_epi8(INT8_C( 120), INT8_C( -90), INT8_C(-101), INT8_C(-106), INT8_C( 70), INT8_C( -49), INT8_C( 29), INT8_C( -43), INT8_C( -42), INT8_C( 38), INT8_C( 16), INT8_C( -43), INT8_C( -40), INT8_C( -76), INT8_C( -67), INT8_C( 53), INT8_C( -73), INT8_C( -17), INT8_C( 66), INT8_C( 57), INT8_C( -65), INT8_C( -63), INT8_C( 17), INT8_C( -9), INT8_C( 95), INT8_C( -50), INT8_C(-118), INT8_C( 114), INT8_C( 58), INT8_C( -28), INT8_C( -81), INT8_C( -37)), simde_mm512_set_epi16(INT16_C( 120), INT16_C( -90), INT16_C( -101), INT16_C( -106), INT16_C( 70), INT16_C( -49), INT16_C( 29), INT16_C( -43), INT16_C( -42), INT16_C( 38), INT16_C( 16), INT16_C( -43), INT16_C( -40), INT16_C( -76), INT16_C( -67), INT16_C( 53), INT16_C( -73), INT16_C( -17), INT16_C( 66), INT16_C( 57), INT16_C( -65), INT16_C( -63), INT16_C( 17), INT16_C( -9), INT16_C( 95), INT16_C( -50), INT16_C( -118), INT16_C( 114), INT16_C( 58), INT16_C( -28), INT16_C( -81), INT16_C( -37)) }, { simde_mm256_set_epi8(INT8_C( -97), INT8_C( 10), INT8_C( -75), INT8_C(-120), INT8_C( -32), INT8_C(-105), INT8_C( -75), INT8_C(-101), INT8_C( 71), INT8_C(-122), INT8_C(-112), INT8_C( -2), INT8_C( 60), INT8_C( -71), INT8_C( 101), INT8_C( -1), INT8_C( 95), INT8_C( -58), INT8_C( -70), INT8_C( 102), INT8_C( 115), INT8_C( -68), INT8_C(-110), INT8_C( -36), INT8_C( 6), INT8_C( 58), INT8_C( 73), INT8_C( 97), INT8_C( -51), INT8_C( -4), INT8_C( 58), INT8_C( 31)), simde_mm512_set_epi16(INT16_C( -97), INT16_C( 10), INT16_C( -75), INT16_C( -120), INT16_C( -32), INT16_C( -105), INT16_C( -75), INT16_C( -101), INT16_C( 71), INT16_C( -122), INT16_C( -112), INT16_C( -2), INT16_C( 60), INT16_C( -71), INT16_C( 101), INT16_C( -1), INT16_C( 95), INT16_C( -58), INT16_C( -70), INT16_C( 102), INT16_C( 115), INT16_C( -68), INT16_C( -110), INT16_C( -36), INT16_C( 6), INT16_C( 58), INT16_C( 73), INT16_C( 97), INT16_C( -51), INT16_C( -4), INT16_C( 58), INT16_C( 31)) }, { simde_mm256_set_epi8(INT8_C( -73), INT8_C(-123), INT8_C( -11), INT8_C( 62), INT8_C( -96), INT8_C(-103), INT8_C( 85), INT8_C( 88), INT8_C( -19), INT8_C( 28), INT8_C(-107), INT8_C( -81), INT8_C(-125), INT8_C( 88), INT8_C( 84), INT8_C( 115), INT8_C( 105), INT8_C( -47), INT8_C( 68), INT8_C(-124), INT8_C( 32), INT8_C(-100), INT8_C( 10), INT8_C( -69), INT8_C( 124), INT8_C( -51), INT8_C( -89), INT8_C( -72), INT8_C( -92), INT8_C( -5), INT8_C( -46), INT8_C( 115)), simde_mm512_set_epi16(INT16_C( -73), INT16_C( -123), INT16_C( -11), INT16_C( 62), INT16_C( -96), INT16_C( -103), INT16_C( 85), INT16_C( 88), INT16_C( -19), INT16_C( 28), INT16_C( -107), INT16_C( -81), INT16_C( -125), INT16_C( 88), INT16_C( 84), INT16_C( 115), INT16_C( 105), INT16_C( -47), INT16_C( 68), INT16_C( -124), INT16_C( 32), INT16_C( -100), INT16_C( 10), INT16_C( -69), INT16_C( 124), INT16_C( -51), INT16_C( -89), INT16_C( -72), INT16_C( -92), INT16_C( -5), INT16_C( -46), INT16_C( 115)) }, { simde_mm256_set_epi8(INT8_C( 104), INT8_C( 66), INT8_C( 51), INT8_C( 81), INT8_C( -69), INT8_C( 104), INT8_C( 126), INT8_C( -43), INT8_C( -40), INT8_C( 23), INT8_C(-124), INT8_C( 98), INT8_C(-125), INT8_C( 95), INT8_C( -36), INT8_C( 46), INT8_C(-115), INT8_C( -93), INT8_C( 2), INT8_C( -77), INT8_C( 80), INT8_C(-116), INT8_C( 61), INT8_C( -89), INT8_C( -37), INT8_C( 9), INT8_C( 84), INT8_C( -64), INT8_C( 94), INT8_C( 67), INT8_C( -53), INT8_C( 111)), simde_mm512_set_epi16(INT16_C( 104), INT16_C( 66), INT16_C( 51), INT16_C( 81), INT16_C( -69), INT16_C( 104), INT16_C( 126), INT16_C( -43), INT16_C( -40), INT16_C( 23), INT16_C( -124), INT16_C( 98), INT16_C( -125), INT16_C( 95), INT16_C( -36), INT16_C( 46), INT16_C( -115), INT16_C( -93), INT16_C( 2), INT16_C( -77), INT16_C( 80), INT16_C( -116), INT16_C( 61), INT16_C( -89), INT16_C( -37), INT16_C( 9), INT16_C( 84), INT16_C( -64), INT16_C( 94), INT16_C( 67), INT16_C( -53), INT16_C( 111)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_cvtepi8_epi16(test_vec[i].a); simde_assert_m512i_i16(r, ==, test_vec[i].r); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cvtepi8_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cvtepi16_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_cvtepi16_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_cvtepi16_epi8) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/cvts.c000066400000000000000000005402461400333146700164120ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN cvts #include #include #include static int test_simde_mm_cvtsepi16_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C( -385), INT16_C(-14682), INT16_C( -6), INT16_C( 418), INT16_C( 0), INT16_C(-24263), INT16_C(-21423), INT16_C( -13)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C( -6), INT8_C( 127), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C( -13)) }, { simde_mm_set_epi16(INT16_C( 12), INT16_C( -1449), INT16_C( -1), INT16_C( -8), INT16_C( 151), INT16_C( 68), INT16_C( -857), INT16_C( -1)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 12), INT8_C(-128), INT8_C( -1), INT8_C( -8), INT8_C( 127), INT8_C( 68), INT8_C(-128), INT8_C( -1)) }, { simde_mm_set_epi16(INT16_C( 2343), INT16_C(-10678), INT16_C( -7895), INT16_C(-27557), INT16_C( 4), INT16_C( 6853), INT16_C( -1), INT16_C( -4386)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 4), INT8_C( 127), INT8_C( -1), INT8_C(-128)) }, { simde_mm_set_epi16(INT16_C( 2603), INT16_C(-10075), INT16_C( 1), INT16_C( -1), INT16_C( -267), INT16_C( 0), INT16_C( 0), INT16_C( -3)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C(-128), INT8_C( 1), INT8_C( -1), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( -3)) }, { simde_mm_set_epi16(INT16_C( 318), INT16_C( 609), INT16_C( 127), INT16_C( 2), INT16_C( 326), INT16_C( 20), INT16_C( -1), INT16_C( -7)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 2), INT8_C( 127), INT8_C( 20), INT8_C( -1), INT8_C( -7)) }, { simde_mm_set_epi16(INT16_C( -57), INT16_C( 2093), INT16_C( 3059), INT16_C( 12), INT16_C( 10), INT16_C( 274), INT16_C( 50), INT16_C( -7)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -57), INT8_C( 127), INT8_C( 127), INT8_C( 12), INT8_C( 10), INT8_C( 127), INT8_C( 50), INT8_C( -7)) }, { simde_mm_set_epi16(INT16_C( 0), INT16_C( -3570), INT16_C( 1), INT16_C( 0), INT16_C( -36), INT16_C( 0), INT16_C( 54), INT16_C( -5)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 1), INT8_C( 0), INT8_C( -36), INT8_C( 0), INT8_C( 54), INT8_C( -5)) }, { simde_mm_set_epi16(INT16_C( 54), INT16_C( 92), INT16_C( 2), INT16_C( 185), INT16_C( 4), INT16_C( 1983), INT16_C( 2567), INT16_C( 136)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 54), INT8_C( 92), INT8_C( 2), INT8_C( 127), INT8_C( 4), INT8_C( 127), INT8_C( 127), INT8_C( 127)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_cvtsepi16_epi8(test_vec[i].a); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_cvtsepi16_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m128i r; } test_vec[8] = { { simde_mm256_set_epi16(INT16_C( 447), INT16_C( -3887), INT16_C( 9), INT16_C( 6277), INT16_C( 2), INT16_C( -314), INT16_C( 1617), INT16_C( 64), INT16_C( 0), INT16_C( 1725), INT16_C( 801), INT16_C( -2), INT16_C( -782), INT16_C( -381), INT16_C( 0), INT16_C( -1)), simde_mm_set_epi8(INT8_C( 127), INT8_C(-128), INT8_C( 9), INT8_C( 127), INT8_C( 2), INT8_C(-128), INT8_C( 127), INT8_C( 64), INT8_C( 0), INT8_C( 127), INT8_C( 127), INT8_C( -2), INT8_C(-128), INT8_C(-128), INT8_C( 0), INT8_C( -1)) }, { simde_mm256_set_epi16(INT16_C( -1378), INT16_C( 1), INT16_C( 2482), INT16_C( 0), INT16_C( 0), INT16_C( -2), INT16_C( -107), INT16_C( 7074), INT16_C( -117), INT16_C( 3648), INT16_C( -25), INT16_C( -225), INT16_C( 587), INT16_C( 15), INT16_C( 116), INT16_C( -1)), simde_mm_set_epi8(INT8_C(-128), INT8_C( 1), INT8_C( 127), INT8_C( 0), INT8_C( 0), INT8_C( -2), INT8_C(-107), INT8_C( 127), INT8_C(-117), INT8_C( 127), INT8_C( -25), INT8_C(-128), INT8_C( 127), INT8_C( 15), INT8_C( 116), INT8_C( -1)) }, { simde_mm256_set_epi16(INT16_C( -602), INT16_C( 22836), INT16_C( -36), INT16_C( -417), INT16_C( 7314), INT16_C( -135), INT16_C( 1), INT16_C( -1693), INT16_C( -121), INT16_C( -342), INT16_C( 7), INT16_C( 3079), INT16_C( 14), INT16_C( 56), INT16_C( -16), INT16_C( -10)), simde_mm_set_epi8(INT8_C(-128), INT8_C( 127), INT8_C( -36), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 1), INT8_C(-128), INT8_C(-121), INT8_C(-128), INT8_C( 7), INT8_C( 127), INT8_C( 14), INT8_C( 56), INT8_C( -16), INT8_C( -10)) }, { simde_mm256_set_epi16(INT16_C( -171), INT16_C( 138), INT16_C( 235), INT16_C( 33), INT16_C( 102), INT16_C( -4), INT16_C( 2), INT16_C( 461), INT16_C( -30), INT16_C( -120), INT16_C( 34), INT16_C( 1), INT16_C( 1637), INT16_C( 2), INT16_C( 2), INT16_C( -2)), simde_mm_set_epi8(INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 33), INT8_C( 102), INT8_C( -4), INT8_C( 2), INT8_C( 127), INT8_C( -30), INT8_C(-120), INT8_C( 34), INT8_C( 1), INT8_C( 127), INT8_C( 2), INT8_C( 2), INT8_C( -2)) }, { simde_mm256_set_epi16(INT16_C( -3), INT16_C( 0), INT16_C(-16994), INT16_C( 475), INT16_C( 1), INT16_C( 5629), INT16_C( -14), INT16_C( 3), INT16_C( -1612), INT16_C( 2680), INT16_C( -183), INT16_C( -202), INT16_C( 0), INT16_C( -114), INT16_C( 11175), INT16_C( -3)), simde_mm_set_epi8(INT8_C( -3), INT8_C( 0), INT8_C(-128), INT8_C( 127), INT8_C( 1), INT8_C( 127), INT8_C( -14), INT8_C( 3), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C( 0), INT8_C(-114), INT8_C( 127), INT8_C( -3)) }, { simde_mm256_set_epi16(INT16_C( -62), INT16_C( -29), INT16_C( -13), INT16_C( 0), INT16_C( -4), INT16_C( 6), INT16_C( 21), INT16_C( -2), INT16_C( -55), INT16_C( 5), INT16_C( -7030), INT16_C(-31314), INT16_C( 65), INT16_C( -7656), INT16_C( -53), INT16_C( -12)), simde_mm_set_epi8(INT8_C( -62), INT8_C( -29), INT8_C( -13), INT8_C( 0), INT8_C( -4), INT8_C( 6), INT8_C( 21), INT8_C( -2), INT8_C( -55), INT8_C( 5), INT8_C(-128), INT8_C(-128), INT8_C( 65), INT8_C(-128), INT8_C( -53), INT8_C( -12)) }, { simde_mm256_set_epi16(INT16_C( -52), INT16_C( 250), INT16_C( -4), INT16_C( 163), INT16_C( -1), INT16_C( -72), INT16_C( -689), INT16_C( -98), INT16_C( -1), INT16_C( 27), INT16_C(-29046), INT16_C( 504), INT16_C( 1), INT16_C( -668), INT16_C( 6), INT16_C( -130)), simde_mm_set_epi8(INT8_C( -52), INT8_C( 127), INT8_C( -4), INT8_C( 127), INT8_C( -1), INT8_C( -72), INT8_C(-128), INT8_C( -98), INT8_C( -1), INT8_C( 27), INT8_C(-128), INT8_C( 127), INT8_C( 1), INT8_C(-128), INT8_C( 6), INT8_C(-128)) }, { simde_mm256_set_epi16(INT16_C( 3869), INT16_C( -3), INT16_C( 3307), INT16_C( -5), INT16_C( -61), INT16_C( -5), INT16_C( -43), INT16_C( -7512), INT16_C( 226), INT16_C( 75), INT16_C( 0), INT16_C( -1), INT16_C( 1923), INT16_C( -25), INT16_C( 4919), INT16_C( -1)), simde_mm_set_epi8(INT8_C( 127), INT8_C( -3), INT8_C( 127), INT8_C( -5), INT8_C( -61), INT8_C( -5), INT8_C( -43), INT8_C(-128), INT8_C( 127), INT8_C( 75), INT8_C( 0), INT8_C( -1), INT8_C( 127), INT8_C( -25), INT8_C( 127), INT8_C( -1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm256_cvtsepi16_epi8(test_vec[i].a); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cvtsepi32_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C(-2145190814), INT32_C( 369095719), INT32_C( 35558368), INT32_C( -760875473)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C(-128)) }, { simde_mm_set_epi32(INT32_C( -891201105), INT32_C( 2065808871), INT32_C( 421929391), INT32_C( 587313056)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 127)) }, { simde_mm_set_epi32(INT32_C(-1908802801), INT32_C( -442332083), INT32_C(-1878740578), INT32_C(-1559213492)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C(-128)) }, { simde_mm_set_epi32(INT32_C( 615110155), INT32_C( -809405494), INT32_C( 1459512749), INT32_C( -889064834)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128)) }, { simde_mm_set_epi32(INT32_C( -125398563), INT32_C( 1544839586), INT32_C( 657472508), INT32_C( -763651133)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C(-128)) }, { simde_mm_set_epi32(INT32_C( 1244639853), INT32_C(-1576601619), INT32_C( 458581376), INT32_C(-1764676112)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128)) }, { simde_mm_set_epi32(INT32_C(-1516069112), INT32_C( 1985219066), INT32_C( 436268231), INT32_C( -342699987)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C(-128)) }, { simde_mm_set_epi32(INT32_C( -672406129), INT32_C( 1062677939), INT32_C( 54896059), INT32_C( -667800710)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C(-128)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_cvtsepi32_epi8(test_vec[i].a); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_cvtsepi32_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m128i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C( 1740492550), INT32_C( 140736582), INT32_C( -304624647), INT32_C( 1856868246), INT32_C(-1035207889), INT32_C( -259975534), INT32_C( 927209588), INT32_C( -783560978)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C(-128)) }, { simde_mm256_set_epi32(INT32_C( 1448477867), INT32_C(-1308743699), INT32_C(-2012880494), INT32_C(-1194643628), INT32_C( 66901057), INT32_C( 1866223729), INT32_C( 1843270593), INT32_C(-1834364931)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C(-128)) }, { simde_mm256_set_epi32(INT32_C( -227624299), INT32_C( 620224848), INT32_C(-1588616610), INT32_C( -511683185), INT32_C( -58755612), INT32_C( 729520635), INT32_C(-1442538979), INT32_C( -899237550)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C(-128)) }, { simde_mm256_set_epi32(INT32_C(-1758822725), INT32_C( -49856508), INT32_C(-1924985834), INT32_C( 33750707), INT32_C(-1943493798), INT32_C( -295278060), INT32_C( 589269723), INT32_C( 667584093)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 127)) }, { simde_mm256_set_epi32(INT32_C( 675031972), INT32_C(-2141215046), INT32_C( 2136698513), INT32_C(-1948084218), INT32_C( -864097131), INT32_C( -578863840), INT32_C( 1120830480), INT32_C(-1905153237)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C(-128)) }, { simde_mm256_set_epi32(INT32_C(-1013885891), INT32_C( 2044207481), INT32_C( -247026840), INT32_C( 1877771374), INT32_C( 239002947), INT32_C( 331448889), INT32_C( 1715894971), INT32_C(-1281958023)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C(-128)) }, { simde_mm256_set_epi32(INT32_C( -97799786), INT32_C( -153068806), INT32_C( 1991567237), INT32_C( 909652836), INT32_C( 1381550522), INT32_C( 725653218), INT32_C(-1527531673), INT32_C( 1671453787)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 127)) }, { simde_mm256_set_epi32(INT32_C( 1505264140), INT32_C( -659446700), INT32_C( -504294088), INT32_C( -895465708), INT32_C( -790990314), INT32_C(-2012027711), INT32_C( 606981184), INT32_C( -261121366)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C(-128)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm256_cvtsepi32_epi8(test_vec[i].a); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cvtsepi32_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 115673074), INT32_C(-1486060937), INT32_C( -104167420), INT32_C( 40162333)), simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767)) }, { simde_mm_set_epi32(INT32_C(-2128159397), INT32_C(-1560280329), INT32_C( 394598915), INT32_C( 425183512)), simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767)) }, { simde_mm_set_epi32(INT32_C( -746969032), INT32_C( -718350441), INT32_C( -917354043), INT32_C( -740787295)), simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768)) }, { simde_mm_set_epi32(INT32_C( 1489645447), INT32_C( 206085999), INT32_C(-1252698771), INT32_C( 1551612893)), simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767)) }, { simde_mm_set_epi32(INT32_C( -503631451), INT32_C( -997616848), INT32_C( -691694514), INT32_C( -383740168)), simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768)) }, { simde_mm_set_epi32(INT32_C(-1905774960), INT32_C( -8801329), INT32_C( 1184144481), INT32_C( 1001484550)), simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767)) }, { simde_mm_set_epi32(INT32_C( 1461694678), INT32_C( 2088398452), INT32_C(-1522574509), INT32_C(-1848551844)), simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768)) }, { simde_mm_set_epi32(INT32_C( 344147543), INT32_C( 1208328320), INT32_C( 846887925), INT32_C(-1224326570)), simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_cvtsepi32_epi16(test_vec[i].a); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_cvtsepi32_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m128i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C( 293632982), INT32_C( -353952507), INT32_C( -369979604), INT32_C(-1836849217), INT32_C( -251503260), INT32_C(-1183044723), INT32_C(-1145018690), INT32_C( 1658446911)), simde_mm_set_epi16(INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767)) }, { simde_mm256_set_epi32(INT32_C( 2004115928), INT32_C(-2131417756), INT32_C( 420074936), INT32_C( 696094435), INT32_C(-1971968852), INT32_C( 1786370502), INT32_C(-1223946451), INT32_C( 1160235100)), simde_mm_set_epi16(INT16_C( 32767), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767)) }, { simde_mm256_set_epi32(INT32_C( 275993791), INT32_C( -287244774), INT32_C(-1437747171), INT32_C( 389173048), INT32_C( 902365137), INT32_C( 1626837983), INT32_C( -529930026), INT32_C( -839513231)), simde_mm_set_epi16(INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768)) }, { simde_mm256_set_epi32(INT32_C( 105733041), INT32_C( 1860469307), INT32_C( -229572975), INT32_C( 1382662419), INT32_C( 1098864098), INT32_C( -335162889), INT32_C( -180211643), INT32_C(-1043436692)), simde_mm_set_epi16(INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768)) }, { simde_mm256_set_epi32(INT32_C( 891400005), INT32_C(-1094002629), INT32_C( -334052966), INT32_C( 1506672550), INT32_C( 1278355339), INT32_C( 30990556), INT32_C(-1937540327), INT32_C( 1090305372)), simde_mm_set_epi16(INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767)) }, { simde_mm256_set_epi32(INT32_C( 756031161), INT32_C( -626600217), INT32_C( -910464378), INT32_C( 1938343335), INT32_C( -214510069), INT32_C( 2030441734), INT32_C( 196869826), INT32_C( 1790672058)), simde_mm_set_epi16(INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767)) }, { simde_mm256_set_epi32(INT32_C( 1995451372), INT32_C(-1306372252), INT32_C(-1445161199), INT32_C(-1208121843), INT32_C( -34821446), INT32_C(-1602733180), INT32_C( -116240092), INT32_C( -493988056)), simde_mm_set_epi16(INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768)) }, { simde_mm256_set_epi32(INT32_C( -324372118), INT32_C( 1343179053), INT32_C( -573491227), INT32_C(-1553210763), INT32_C(-1920172306), INT32_C(-1677417189), INT32_C( 982179672), INT32_C(-2112317112)), simde_mm_set_epi16(INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C(-32768)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm256_cvtsepi32_epi16(test_vec[i].a); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cvtsepi64_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi64x(INT64_C(-7549065086020638499), INT64_C(-3321927609022761093)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C(-128)) }, { simde_mm_set_epi64x(INT64_C(-4277312397687066005), INT64_C( 2320456666692343657)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 127)) }, { simde_mm_set_epi64x(INT64_C( -103344442256467696), INT64_C( 2955664840703358830)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 127)) }, { simde_mm_set_epi64x(INT64_C( 5268893576481852159), INT64_C(-9185651458279974860)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C(-128)) }, { simde_mm_set_epi64x(INT64_C(-5896269746913404379), INT64_C( 7979713383766604250)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 127)) }, { simde_mm_set_epi64x(INT64_C(-2336810237452023823), INT64_C(-3290098590496967741)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C(-128)) }, { simde_mm_set_epi64x(INT64_C(-7739538779128774601), INT64_C( 8105208141968845994)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 127)) }, { simde_mm_set_epi64x(INT64_C( -871381348062974225), INT64_C( 3367136188494551863)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 127)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_cvtsepi64_epi8(test_vec[i].a); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_cvtsepi64_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m128i r; } test_vec[8] = { { simde_mm256_set_epi64x(INT64_C( 7571764882458171438), INT64_C(-5921591509803744983), INT64_C( 6658090239555345361), INT64_C(-2357401607469764832)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128)) }, { simde_mm256_set_epi64x(INT64_C(-7394691671820149099), INT64_C(-5986584038912522225), INT64_C(-6456224054777082128), INT64_C(-1619710544369908736)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C(-128)) }, { simde_mm256_set_epi64x(INT64_C( 253771227790268661), INT64_C(-8177257137918582254), INT64_C( 9193080332106867266), INT64_C(-7500787245705959716)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128)) }, { simde_mm256_set_epi64x(INT64_C( -378633448466384692), INT64_C( 8377241746990476361), INT64_C(-3577751476639945070), INT64_C( 3071274498813504095)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127)) }, { simde_mm256_set_epi64x(INT64_C( 6831871583847420748), INT64_C( 3270263401510298982), INT64_C( 7119350019975334051), INT64_C( 2198565260202909270)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127)) }, { simde_mm256_set_epi64x(INT64_C( 1387880433219135865), INT64_C( 3250534069269084917), INT64_C(-1277711946882622375), INT64_C(-5810920510833024903)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C(-128)) }, { simde_mm256_set_epi64x(INT64_C(-3285074807695571420), INT64_C(-5576331776659409304), INT64_C( 3478574654500178090), INT64_C( 7240016417068788730)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 127)) }, { simde_mm256_set_epi64x(INT64_C( -444210268504052724), INT64_C(-1278644105244443763), INT64_C(-1392943174058285672), INT64_C( 6768383435631406852)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 127)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm256_cvtsepi64_epi8(test_vec[i].a); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_cvtsepi16_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m256i r; } test_vec[8] = { { simde_mm512_set_epi16(INT16_C( 8002), INT16_C( -42), INT16_C( 317), INT16_C( 3), INT16_C( 2), INT16_C( 22), INT16_C( 2), INT16_C( 102), INT16_C( 0), INT16_C( 130), INT16_C( 297), INT16_C( 4068), INT16_C( -202), INT16_C( 4593), INT16_C( 1545), INT16_C( -2), INT16_C( -4392), INT16_C( -3947), INT16_C( 301), INT16_C(-11526), INT16_C( -1), INT16_C( 1), INT16_C( -437), INT16_C( 4), INT16_C( -139), INT16_C( -4859), INT16_C( -1), INT16_C( 12), INT16_C( -591), INT16_C( -40), INT16_C( -269), INT16_C( -24)), simde_mm256_set_epi8(INT8_C( 127), INT8_C( -42), INT8_C( 127), INT8_C( 3), INT8_C( 2), INT8_C( 22), INT8_C( 2), INT8_C( 102), INT8_C( 0), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( -2), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( -1), INT8_C( 1), INT8_C(-128), INT8_C( 4), INT8_C(-128), INT8_C(-128), INT8_C( -1), INT8_C( 12), INT8_C(-128), INT8_C( -40), INT8_C(-128), INT8_C( -24)) }, { simde_mm512_set_epi16(INT16_C( 2), INT16_C( 49), INT16_C( 2), INT16_C( 131), INT16_C( 1), INT16_C(-15743), INT16_C( 1984), INT16_C( 9), INT16_C(-20648), INT16_C( -5458), INT16_C( -1), INT16_C( -1), INT16_C( -3713), INT16_C( -60), INT16_C( -1328), INT16_C( 4), INT16_C( 150), INT16_C( -8), INT16_C( 1864), INT16_C( -14), INT16_C( 20), INT16_C( 194), INT16_C( 0), INT16_C( 5954), INT16_C( -13), INT16_C( -288), INT16_C( 3162), INT16_C( 1), INT16_C( 14), INT16_C( 2628), INT16_C( -1), INT16_C( -8)), simde_mm256_set_epi8(INT8_C( 2), INT8_C( 49), INT8_C( 2), INT8_C( 127), INT8_C( 1), INT8_C(-128), INT8_C( 127), INT8_C( 9), INT8_C(-128), INT8_C(-128), INT8_C( -1), INT8_C( -1), INT8_C(-128), INT8_C( -60), INT8_C(-128), INT8_C( 4), INT8_C( 127), INT8_C( -8), INT8_C( 127), INT8_C( -14), INT8_C( 20), INT8_C( 127), INT8_C( 0), INT8_C( 127), INT8_C( -13), INT8_C(-128), INT8_C( 127), INT8_C( 1), INT8_C( 14), INT8_C( 127), INT8_C( -1), INT8_C( -8)) }, { simde_mm512_set_epi16(INT16_C( -562), INT16_C( -4), INT16_C( 14830), INT16_C( 23439), INT16_C( -872), INT16_C( 2), INT16_C( -152), INT16_C( 42), INT16_C( 23882), INT16_C( -7), INT16_C(-27326), INT16_C( 5025), INT16_C( 7663), INT16_C(-14858), INT16_C( 0), INT16_C( -1), INT16_C( 27), INT16_C( 3), INT16_C( -6), INT16_C( 54), INT16_C( -16), INT16_C( -120), INT16_C( -1), INT16_C( 0), INT16_C( 50), INT16_C( -3), INT16_C( 1), INT16_C( -128), INT16_C( -6425), INT16_C( 1), INT16_C( -1), INT16_C( -167)), simde_mm256_set_epi8(INT8_C(-128), INT8_C( -4), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 2), INT8_C(-128), INT8_C( 42), INT8_C( 127), INT8_C( -7), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 0), INT8_C( -1), INT8_C( 27), INT8_C( 3), INT8_C( -6), INT8_C( 54), INT8_C( -16), INT8_C(-120), INT8_C( -1), INT8_C( 0), INT8_C( 50), INT8_C( -3), INT8_C( 1), INT8_C(-128), INT8_C(-128), INT8_C( 1), INT8_C( -1), INT8_C(-128)) }, { simde_mm512_set_epi16(INT16_C( 117), INT16_C( 75), INT16_C( -56), INT16_C( -1), INT16_C( 53), INT16_C( -283), INT16_C( 138), INT16_C( 3), INT16_C( -1), INT16_C( 6705), INT16_C( 138), INT16_C( -3521), INT16_C( 22), INT16_C( 1779), INT16_C( 2888), INT16_C( -1), INT16_C( -189), INT16_C( 17712), INT16_C( -18), INT16_C( 923), INT16_C( 1), INT16_C( 1), INT16_C( 2605), INT16_C( 353), INT16_C( 201), INT16_C( -120), INT16_C(-28865), INT16_C( -13), INT16_C( -701), INT16_C( 945), INT16_C( -2270), INT16_C( 0)), simde_mm256_set_epi8(INT8_C( 117), INT8_C( 75), INT8_C( -56), INT8_C( -1), INT8_C( 53), INT8_C(-128), INT8_C( 127), INT8_C( 3), INT8_C( -1), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 22), INT8_C( 127), INT8_C( 127), INT8_C( -1), INT8_C(-128), INT8_C( 127), INT8_C( -18), INT8_C( 127), INT8_C( 1), INT8_C( 1), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C(-120), INT8_C(-128), INT8_C( -13), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 0)) }, { simde_mm512_set_epi16(INT16_C( 127), INT16_C( 406), INT16_C( 74), INT16_C( -2), INT16_C( -12), INT16_C( 196), INT16_C( -4), INT16_C( -4852), INT16_C( 38), INT16_C( -10), INT16_C( -408), INT16_C( 453), INT16_C( -6833), INT16_C( 37), INT16_C( -3464), INT16_C( -2), INT16_C( -488), INT16_C( 0), INT16_C( 643), INT16_C( 6), INT16_C( 51), INT16_C( 0), INT16_C( -4), INT16_C( -1), INT16_C(-14928), INT16_C( -46), INT16_C( 58), INT16_C( -3), INT16_C( 0), INT16_C( -343), INT16_C(-10019), INT16_C( 0)), simde_mm256_set_epi8(INT8_C( 127), INT8_C( 127), INT8_C( 74), INT8_C( -2), INT8_C( -12), INT8_C( 127), INT8_C( -4), INT8_C(-128), INT8_C( 38), INT8_C( -10), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 37), INT8_C(-128), INT8_C( -2), INT8_C(-128), INT8_C( 0), INT8_C( 127), INT8_C( 6), INT8_C( 51), INT8_C( 0), INT8_C( -4), INT8_C( -1), INT8_C(-128), INT8_C( -46), INT8_C( 58), INT8_C( -3), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C( 0)) }, { simde_mm512_set_epi16(INT16_C( -1), INT16_C( -378), INT16_C( 3797), INT16_C( 1), INT16_C( -61), INT16_C( -6468), INT16_C( 0), INT16_C(-18745), INT16_C( 3), INT16_C( -5), INT16_C( -1), INT16_C( 4066), INT16_C( -1), INT16_C( -11), INT16_C( -2), INT16_C( 881), INT16_C( -116), INT16_C( 22), INT16_C( 1), INT16_C( -1127), INT16_C( 15), INT16_C( 2276), INT16_C( -149), INT16_C( 7345), INT16_C( 850), INT16_C( -84), INT16_C( -2), INT16_C( 0), INT16_C( -6), INT16_C( -3696), INT16_C( -2), INT16_C( 2136)), simde_mm256_set_epi8(INT8_C( -1), INT8_C(-128), INT8_C( 127), INT8_C( 1), INT8_C( -61), INT8_C(-128), INT8_C( 0), INT8_C(-128), INT8_C( 3), INT8_C( -5), INT8_C( -1), INT8_C( 127), INT8_C( -1), INT8_C( -11), INT8_C( -2), INT8_C( 127), INT8_C(-116), INT8_C( 22), INT8_C( 1), INT8_C(-128), INT8_C( 15), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( -84), INT8_C( -2), INT8_C( 0), INT8_C( -6), INT8_C(-128), INT8_C( -2), INT8_C( 127)) }, { simde_mm512_set_epi16(INT16_C( -368), INT16_C( -1403), INT16_C( -10), INT16_C( 2787), INT16_C( 6145), INT16_C( -228), INT16_C( 2519), INT16_C( -1), INT16_C( -244), INT16_C( 10), INT16_C( 14), INT16_C( 113), INT16_C( 23), INT16_C( 1), INT16_C( 21613), INT16_C( -2), INT16_C( -2), INT16_C( -29), INT16_C( 9866), INT16_C( 356), INT16_C( -254), INT16_C( -21), INT16_C( 9), INT16_C( 9), INT16_C( 249), INT16_C( 3052), INT16_C( -8880), INT16_C( 7), INT16_C( 0), INT16_C( -4602), INT16_C( -8839), INT16_C( 18288)), simde_mm256_set_epi8(INT8_C(-128), INT8_C(-128), INT8_C( -10), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( -1), INT8_C(-128), INT8_C( 10), INT8_C( 14), INT8_C( 113), INT8_C( 23), INT8_C( 1), INT8_C( 127), INT8_C( -2), INT8_C( -2), INT8_C( -29), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( -21), INT8_C( 9), INT8_C( 9), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 7), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C( 127)) }, { simde_mm512_set_epi16(INT16_C( 502), INT16_C( 22), INT16_C( -4), INT16_C( 1), INT16_C( 2), INT16_C( -1148), INT16_C( 1), INT16_C( 8), INT16_C( 25), INT16_C( 8616), INT16_C( -1), INT16_C( 0), INT16_C( 1091), INT16_C( -370), INT16_C( 0), INT16_C( -1), INT16_C( -1549), INT16_C( 12), INT16_C( -2), INT16_C( 4), INT16_C( -3952), INT16_C( 1681), INT16_C( 3), INT16_C( 35), INT16_C( -6), INT16_C( 0), INT16_C( -2), INT16_C( 51), INT16_C( 0), INT16_C( 3760), INT16_C( -1), INT16_C( -1)), simde_mm256_set_epi8(INT8_C( 127), INT8_C( 22), INT8_C( -4), INT8_C( 1), INT8_C( 2), INT8_C(-128), INT8_C( 1), INT8_C( 8), INT8_C( 25), INT8_C( 127), INT8_C( -1), INT8_C( 0), INT8_C( 127), INT8_C(-128), INT8_C( 0), INT8_C( -1), INT8_C(-128), INT8_C( 12), INT8_C( -2), INT8_C( 4), INT8_C(-128), INT8_C( 127), INT8_C( 3), INT8_C( 35), INT8_C( -6), INT8_C( 0), INT8_C( -2), INT8_C( 51), INT8_C( 0), INT8_C( 127), INT8_C( -1), INT8_C( -1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm512_cvtsepi16_epi8(test_vec[i].a); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_cvtsepi16_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i src; simde__mmask32 k; simde__m512i a; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi8(INT8_C( -40), INT8_C( 93), INT8_C( -64), INT8_C( 127), INT8_C(-121), INT8_C( -33), INT8_C( -51), INT8_C( 86), INT8_C(-123), INT8_C(-117), INT8_C(-108), INT8_C( -18), INT8_C( 37), INT8_C( 26), INT8_C( -88), INT8_C( 80), INT8_C( 30), INT8_C( 81), INT8_C(-126), INT8_C( 115), INT8_C(-113), INT8_C( 80), INT8_C( -70), INT8_C( 14), INT8_C( 67), INT8_C(-122), INT8_C( -52), INT8_C( -49), INT8_C( -61), INT8_C( -10), INT8_C( -10), INT8_C( 9)), UINT32_C( 9464246), simde_mm512_set_epi16(INT16_C( 2661), INT16_C( 8241), INT16_C(-31365), INT16_C(-28794), INT16_C( 31420), INT16_C( 5279), INT16_C(-11307), INT16_C( 8621), INT16_C( 25450), INT16_C(-29219), INT16_C( 7142), INT16_C( 6454), INT16_C( 22666), INT16_C( 22972), INT16_C( 22678), INT16_C(-10349), INT16_C( 26453), INT16_C(-25565), INT16_C(-27471), INT16_C(-25692), INT16_C( 24633), INT16_C(-12996), INT16_C( -7464), INT16_C( 6163), INT16_C(-14301), INT16_C( 4709), INT16_C( 31142), INT16_C(-11353), INT16_C(-11078), INT16_C( -7297), INT16_C( -6389), INT16_C(-27388)), simde_mm256_set_epi8(INT8_C( -40), INT8_C( 93), INT8_C( -64), INT8_C( 127), INT8_C(-121), INT8_C( -33), INT8_C( -51), INT8_C( 86), INT8_C( 127), INT8_C(-117), INT8_C(-108), INT8_C( 127), INT8_C( 37), INT8_C( 26), INT8_C( -88), INT8_C( 80), INT8_C( 30), INT8_C(-128), INT8_C(-128), INT8_C( 115), INT8_C( 127), INT8_C( 80), INT8_C( -70), INT8_C( 127), INT8_C(-128), INT8_C(-122), INT8_C( 127), INT8_C(-128), INT8_C( -61), INT8_C(-128), INT8_C(-128), INT8_C( 9)) }, { simde_mm256_set_epi8(INT8_C(-127), INT8_C( 79), INT8_C( -17), INT8_C( -57), INT8_C( 84), INT8_C( 25), INT8_C( -42), INT8_C( -98), INT8_C( -48), INT8_C( -88), INT8_C( 64), INT8_C(-126), INT8_C( 55), INT8_C( 85), INT8_C( 23), INT8_C( 101), INT8_C( 98), INT8_C( -22), INT8_C( 51), INT8_C( 32), INT8_C( -91), INT8_C( 43), INT8_C( 124), INT8_C( -87), INT8_C( -45), INT8_C( 117), INT8_C( -35), INT8_C( 88), INT8_C( 36), INT8_C( 62), INT8_C( -51), INT8_C( 54)), UINT32_C( 16139739), simde_mm512_set_epi16(INT16_C( 29247), INT16_C( -8471), INT16_C( -1085), INT16_C( 30870), INT16_C( 25891), INT16_C(-21158), INT16_C( 12488), INT16_C( -6908), INT16_C( -1323), INT16_C(-29158), INT16_C( 19744), INT16_C( -9433), INT16_C( 32012), INT16_C(-30047), INT16_C( 12083), INT16_C( 5703), INT16_C(-23507), INT16_C( 9265), INT16_C( 9307), INT16_C( 19873), INT16_C( -2618), INT16_C( -1149), INT16_C( 18681), INT16_C( 7098), INT16_C(-13814), INT16_C( 19632), INT16_C(-21387), INT16_C(-19281), INT16_C(-23963), INT16_C(-20730), INT16_C(-32754), INT16_C(-15371)), simde_mm256_set_epi8(INT8_C(-127), INT8_C( 79), INT8_C( -17), INT8_C( -57), INT8_C( 84), INT8_C( 25), INT8_C( -42), INT8_C( -98), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 55), INT8_C(-128), INT8_C( 127), INT8_C( 101), INT8_C( 98), INT8_C( 127), INT8_C( 51), INT8_C( 32), INT8_C( -91), INT8_C(-128), INT8_C( 124), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( -35), INT8_C(-128), INT8_C(-128), INT8_C( 62), INT8_C(-128), INT8_C(-128)) }, { simde_mm256_set_epi8(INT8_C(-113), INT8_C( -29), INT8_C( -74), INT8_C(-125), INT8_C( 84), INT8_C( 106), INT8_C(-102), INT8_C(-121), INT8_C(-106), INT8_C( 7), INT8_C( 31), INT8_C( -22), INT8_C( 0), INT8_C( -95), INT8_C( -48), INT8_C(-118), INT8_C( 70), INT8_C( -51), INT8_C( 81), INT8_C( -73), INT8_C( 6), INT8_C( -50), INT8_C( -28), INT8_C(-105), INT8_C( 13), INT8_C( -36), INT8_C( 105), INT8_C( -99), INT8_C( -94), INT8_C( -98), INT8_C( -55), INT8_C( 107)), UINT32_C( 885729), simde_mm512_set_epi16(INT16_C( 27294), INT16_C( 16674), INT16_C( -5346), INT16_C(-13180), INT16_C( -6683), INT16_C( 30235), INT16_C(-31280), INT16_C( -5669), INT16_C( 12288), INT16_C(-24546), INT16_C( 9831), INT16_C(-31819), INT16_C( 14268), INT16_C(-17608), INT16_C( 4735), INT16_C( 29092), INT16_C( 3348), INT16_C( 20490), INT16_C( 9048), INT16_C(-14230), INT16_C(-24731), INT16_C(-20916), INT16_C( 15138), INT16_C( 30554), INT16_C( 12356), INT16_C(-12154), INT16_C( -1803), INT16_C( -2954), INT16_C( -6857), INT16_C( -6626), INT16_C(-23034), INT16_C( 23374)), simde_mm256_set_epi8(INT8_C(-113), INT8_C( -29), INT8_C( -74), INT8_C(-125), INT8_C( 84), INT8_C( 106), INT8_C(-102), INT8_C(-121), INT8_C(-106), INT8_C( 7), INT8_C( 31), INT8_C( -22), INT8_C( 127), INT8_C(-128), INT8_C( -48), INT8_C( 127), INT8_C( 127), INT8_C( -51), INT8_C( 81), INT8_C( -73), INT8_C( 6), INT8_C( -50), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C( -99), INT8_C( -94), INT8_C( -98), INT8_C( -55), INT8_C( 127)) }, { simde_mm256_set_epi8(INT8_C( 106), INT8_C( 125), INT8_C( -9), INT8_C( -42), INT8_C( 10), INT8_C( -67), INT8_C( -69), INT8_C( -15), INT8_C(-128), INT8_C( -93), INT8_C( -51), INT8_C( 63), INT8_C( -48), INT8_C( -19), INT8_C( -88), INT8_C( 0), INT8_C( 21), INT8_C(-113), INT8_C( 68), INT8_C( 50), INT8_C( 14), INT8_C( -17), INT8_C( -38), INT8_C( 21), INT8_C( 68), INT8_C( 38), INT8_C( -14), INT8_C(-115), INT8_C( 96), INT8_C( -92), INT8_C( -38), INT8_C( 61)), UINT32_C( 14868571), simde_mm512_set_epi16(INT16_C( -4902), INT16_C(-30912), INT16_C( -9873), INT16_C( -3317), INT16_C( 28529), INT16_C( 10810), INT16_C( 14756), INT16_C(-11980), INT16_C( 8230), INT16_C( 14663), INT16_C(-28044), INT16_C( 14810), INT16_C( 3087), INT16_C( 22573), INT16_C(-19115), INT16_C( 26647), INT16_C( 3404), INT16_C(-13826), INT16_C( 16604), INT16_C( 11097), INT16_C( 31149), INT16_C(-30463), INT16_C(-25032), INT16_C(-30305), INT16_C( 14499), INT16_C(-27532), INT16_C(-28183), INT16_C( 13580), INT16_C( 32147), INT16_C(-28462), INT16_C( 24823), INT16_C( 22687)), simde_mm256_set_epi8(INT8_C( 106), INT8_C( 125), INT8_C( -9), INT8_C( -42), INT8_C( 10), INT8_C( -67), INT8_C( -69), INT8_C( -15), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 63), INT8_C( -48), INT8_C( -19), INT8_C(-128), INT8_C( 0), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 50), INT8_C( 14), INT8_C( -17), INT8_C( -38), INT8_C( 21), INT8_C( 68), INT8_C(-128), INT8_C( -14), INT8_C( 127), INT8_C( 127), INT8_C( -92), INT8_C( 127), INT8_C( 127)) }, { simde_mm256_set_epi8(INT8_C( 2), INT8_C( -9), INT8_C( -57), INT8_C( 115), INT8_C( 59), INT8_C( 73), INT8_C( 48), INT8_C( -68), INT8_C( 25), INT8_C( 22), INT8_C( 111), INT8_C( -11), INT8_C( 15), INT8_C( 80), INT8_C( -28), INT8_C( 103), INT8_C( 112), INT8_C( 70), INT8_C( -85), INT8_C( 93), INT8_C( 119), INT8_C( 109), INT8_C( -22), INT8_C( 25), INT8_C( -22), INT8_C( 45), INT8_C( -15), INT8_C( -72), INT8_C( 118), INT8_C( -32), INT8_C( -96), INT8_C( 60)), UINT32_C( 8515470), simde_mm512_set_epi16(INT16_C(-15796), INT16_C( 30043), INT16_C( 18714), INT16_C(-25916), INT16_C( 32734), INT16_C( -8707), INT16_C( 8469), INT16_C( -164), INT16_C( 16857), INT16_C( 3751), INT16_C(-17844), INT16_C( 25466), INT16_C( 11414), INT16_C(-10297), INT16_C( 19393), INT16_C(-20590), INT16_C(-15444), INT16_C(-25233), INT16_C( 18470), INT16_C(-31576), INT16_C(-31191), INT16_C(-25805), INT16_C( 18489), INT16_C(-13757), INT16_C( 25106), INT16_C( 2901), INT16_C( 15043), INT16_C( 7932), INT16_C( 16442), INT16_C(-31838), INT16_C( 6259), INT16_C( -3601)), simde_mm256_set_epi8(INT8_C( 2), INT8_C( -9), INT8_C( -57), INT8_C( 115), INT8_C( 59), INT8_C( 73), INT8_C( 48), INT8_C( -68), INT8_C( 127), INT8_C( 22), INT8_C( 111), INT8_C( -11), INT8_C( 15), INT8_C( 80), INT8_C( -28), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 93), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 45), INT8_C( -15), INT8_C( -72), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 60)) }, { simde_mm256_set_epi8(INT8_C( -4), INT8_C( -3), INT8_C( 85), INT8_C( -85), INT8_C( -30), INT8_C( -90), INT8_C( 17), INT8_C( 33), INT8_C( 39), INT8_C(-100), INT8_C( -52), INT8_C( 125), INT8_C( -92), INT8_C( -97), INT8_C( 56), INT8_C( -59), INT8_C( -44), INT8_C( -99), INT8_C( 111), INT8_C( -71), INT8_C( -30), INT8_C( 31), INT8_C( -11), INT8_C( 118), INT8_C( 7), INT8_C(-107), INT8_C( 79), INT8_C( 3), INT8_C( 24), INT8_C( -11), INT8_C( 32), INT8_C( -4)), UINT32_C( 6113317), simde_mm512_set_epi16(INT16_C( 11384), INT16_C(-29809), INT16_C( 14601), INT16_C(-32436), INT16_C( 2836), INT16_C( 23731), INT16_C( 16401), INT16_C( 1507), INT16_C( -6852), INT16_C( 24555), INT16_C(-18463), INT16_C( 1023), INT16_C(-29174), INT16_C( 23081), INT16_C( 30328), INT16_C(-13892), INT16_C( -2629), INT16_C( -6761), INT16_C( -2701), INT16_C( 4637), INT16_C( 12641), INT16_C( 27159), INT16_C( -5860), INT16_C( 17953), INT16_C( 4257), INT16_C( 13148), INT16_C(-26937), INT16_C( 3270), INT16_C(-27177), INT16_C( -7723), INT16_C(-15174), INT16_C(-31459)), simde_mm256_set_epi8(INT8_C( -4), INT8_C( -3), INT8_C( 85), INT8_C( -85), INT8_C( -30), INT8_C( -90), INT8_C( 17), INT8_C( 33), INT8_C( 39), INT8_C( 127), INT8_C( -52), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 56), INT8_C(-128), INT8_C( -44), INT8_C(-128), INT8_C( 111), INT8_C( -71), INT8_C( 127), INT8_C( 31), INT8_C( -11), INT8_C( 118), INT8_C( 7), INT8_C(-107), INT8_C(-128), INT8_C( 3), INT8_C( 24), INT8_C(-128), INT8_C( 32), INT8_C(-128)) }, { simde_mm256_set_epi8(INT8_C( 81), INT8_C( -88), INT8_C( -80), INT8_C( 3), INT8_C( 13), INT8_C(-121), INT8_C( 22), INT8_C( 32), INT8_C( -40), INT8_C( 127), INT8_C( -26), INT8_C( -3), INT8_C( 106), INT8_C( -33), INT8_C( -31), INT8_C(-105), INT8_C( 59), INT8_C( -77), INT8_C( -5), INT8_C(-117), INT8_C( 83), INT8_C( 56), INT8_C( -44), INT8_C( -77), INT8_C( 117), INT8_C( 36), INT8_C( -73), INT8_C( -39), INT8_C( 3), INT8_C( -92), INT8_C( 31), INT8_C( -95)), UINT32_C( 10953837), simde_mm512_set_epi16(INT16_C(-11142), INT16_C(-22777), INT16_C( -6414), INT16_C( 9569), INT16_C( 17271), INT16_C( -3594), INT16_C( 10850), INT16_C(-11973), INT16_C( 9012), INT16_C( 10724), INT16_C(-16786), INT16_C( -2912), INT16_C( -9979), INT16_C(-18969), INT16_C( 4915), INT16_C( 10628), INT16_C( 8002), INT16_C( -445), INT16_C( 4038), INT16_C( -403), INT16_C( -4668), INT16_C(-24641), INT16_C(-17959), INT16_C( 5928), INT16_C(-23858), INT16_C(-14464), INT16_C( 22098), INT16_C(-14446), INT16_C(-19436), INT16_C(-24649), INT16_C(-32418), INT16_C( 13277)), simde_mm256_set_epi8(INT8_C( 81), INT8_C( -88), INT8_C( -80), INT8_C( 3), INT8_C( 13), INT8_C(-121), INT8_C( 22), INT8_C( 32), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( -3), INT8_C( 106), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 59), INT8_C( -77), INT8_C( 127), INT8_C(-117), INT8_C( 83), INT8_C(-128), INT8_C( -44), INT8_C( -77), INT8_C( 117), INT8_C(-128), INT8_C( 127), INT8_C( -39), INT8_C(-128), INT8_C(-128), INT8_C( 31), INT8_C( 127)) }, { simde_mm256_set_epi8(INT8_C( 83), INT8_C( 121), INT8_C( 87), INT8_C( 97), INT8_C( 69), INT8_C( 33), INT8_C( -12), INT8_C(-117), INT8_C( 99), INT8_C( -61), INT8_C( 40), INT8_C( -77), INT8_C( 19), INT8_C( -55), INT8_C( 3), INT8_C( -96), INT8_C( -32), INT8_C( -61), INT8_C( -5), INT8_C( -43), INT8_C( -47), INT8_C( 118), INT8_C( 58), INT8_C( 64), INT8_C( 116), INT8_C( -63), INT8_C( 114), INT8_C( -16), INT8_C( 31), INT8_C( 76), INT8_C( -13), INT8_C(-112)), UINT32_C( 4699534), simde_mm512_set_epi16(INT16_C( 27075), INT16_C( -2643), INT16_C(-27919), INT16_C( 22926), INT16_C( -6606), INT16_C( 2828), INT16_C( -5375), INT16_C(-23218), INT16_C( -30), INT16_C( -9639), INT16_C( 28038), INT16_C( 11270), INT16_C(-27543), INT16_C( 22578), INT16_C( 30460), INT16_C( 13726), INT16_C( 28595), INT16_C( -2815), INT16_C(-21539), INT16_C(-25818), INT16_C( 5191), INT16_C( 18511), INT16_C( 23592), INT16_C( 27668), INT16_C(-21627), INT16_C(-28967), INT16_C(-19124), INT16_C( 27342), INT16_C( 10864), INT16_C(-23627), INT16_C( 10714), INT16_C(-29389)), simde_mm256_set_epi8(INT8_C( 83), INT8_C( 121), INT8_C( 87), INT8_C( 97), INT8_C( 69), INT8_C( 33), INT8_C( -12), INT8_C(-117), INT8_C( 99), INT8_C(-128), INT8_C( 40), INT8_C( -77), INT8_C( 19), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( -61), INT8_C(-128), INT8_C(-128), INT8_C( -47), INT8_C( 127), INT8_C( 58), INT8_C( 127), INT8_C(-128), INT8_C( -63), INT8_C( 114), INT8_C( -16), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-112)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm512_mask_cvtsepi16_epi8(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_cvtsepi16_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask32 k; simde__m512i a; simde__m256i r; } test_vec[8] = { { UINT32_C( 16184841), simde_mm512_set_epi16(INT16_C( 26453), INT16_C(-25565), INT16_C(-27471), INT16_C(-25692), INT16_C( 24633), INT16_C(-12996), INT16_C( -7464), INT16_C( 6163), INT16_C(-14301), INT16_C( 4709), INT16_C( 31142), INT16_C(-11353), INT16_C(-11078), INT16_C( -7297), INT16_C( -6389), INT16_C(-27388), INT16_C( 18739), INT16_C(-14274), INT16_C(-17264), INT16_C( 27062), INT16_C(-10147), INT16_C(-16257), INT16_C(-30753), INT16_C(-12970), INT16_C(-31349), INT16_C(-27410), INT16_C( 9498), INT16_C(-22448), INT16_C( 7761), INT16_C(-32141), INT16_C(-28848), INT16_C(-17906)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C( 0), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( 0), INT8_C(-128)) }, { UINT32_C( 9885587), simde_mm512_set_epi16(INT16_C(-16097), INT16_C( -410), INT16_C( -7690), INT16_C( 17883), INT16_C(-32433), INT16_C( -4153), INT16_C( 21529), INT16_C(-10594), INT16_C(-12120), INT16_C( 16514), INT16_C( 14165), INT16_C( 5989), INT16_C( 25322), INT16_C( 13088), INT16_C(-23253), INT16_C( 31913), INT16_C(-11403), INT16_C( -8872), INT16_C( 9278), INT16_C(-13002), INT16_C( 2661), INT16_C( 8241), INT16_C(-31365), INT16_C(-28794), INT16_C( 31420), INT16_C( 5279), INT16_C(-11307), INT16_C( 8621), INT16_C( 25450), INT16_C(-29219), INT16_C( 7142), INT16_C( 6454)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( 127), INT8_C(-128), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C( 127)) }, { UINT32_C( 967669), simde_mm512_set_epi16(INT16_C( 3548), INT16_C( 27037), INT16_C(-23906), INT16_C(-13973), INT16_C( 29247), INT16_C( -8471), INT16_C( -1085), INT16_C( 30870), INT16_C( 25891), INT16_C(-21158), INT16_C( 12488), INT16_C( -6908), INT16_C( -1323), INT16_C(-29158), INT16_C( 19744), INT16_C( -9433), INT16_C( 32012), INT16_C(-30047), INT16_C( 12083), INT16_C( 5703), INT16_C(-23507), INT16_C( 9265), INT16_C( 9307), INT16_C( 19873), INT16_C( -2618), INT16_C( -1149), INT16_C( 18681), INT16_C( 7098), INT16_C(-13814), INT16_C( 19632), INT16_C(-21387), INT16_C(-19281)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 0), INT8_C( 127), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C(-128)) }, { UINT32_C( 13558935), simde_mm512_set_epi16(INT16_C( 14268), INT16_C(-17608), INT16_C( 4735), INT16_C( 29092), INT16_C( 3348), INT16_C( 20490), INT16_C( 9048), INT16_C(-14230), INT16_C(-24731), INT16_C(-20916), INT16_C( 15138), INT16_C( 30554), INT16_C( 12356), INT16_C(-12154), INT16_C( -1803), INT16_C( -2954), INT16_C( -6857), INT16_C( -6626), INT16_C(-23034), INT16_C( 23374), INT16_C( 26785), INT16_C(-11163), INT16_C( 28429), INT16_C(-31775), INT16_C(-28701), INT16_C(-18813), INT16_C( 21610), INT16_C(-25977), INT16_C(-27129), INT16_C( 8170), INT16_C( 161), INT16_C(-12150)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 127), INT8_C( 127), INT8_C(-128)) }, { UINT32_C( 6783925), simde_mm512_set_epi16(INT16_C( 32147), INT16_C(-28462), INT16_C( 24823), INT16_C( 22687), INT16_C(-15505), INT16_C( -251), INT16_C( -9246), INT16_C( -8101), INT16_C( 27261), INT16_C( -2090), INT16_C( 2749), INT16_C(-17423), INT16_C(-32605), INT16_C(-12993), INT16_C(-12051), INT16_C(-22528), INT16_C( 5519), INT16_C( 17458), INT16_C( 3823), INT16_C( -9707), INT16_C( 17446), INT16_C( -3443), INT16_C( 24740), INT16_C( -9667), INT16_C( 27294), INT16_C( 16674), INT16_C( -5346), INT16_C(-13180), INT16_C( -6683), INT16_C( 30235), INT16_C(-31280), INT16_C( -5669)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 127), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C(-128)) }, { UINT32_C( 15283468), simde_mm512_set_epi16(INT16_C( 28742), INT16_C(-21667), INT16_C( 30573), INT16_C( -5607), INT16_C( -5587), INT16_C( -3656), INT16_C( 30432), INT16_C(-24516), INT16_C( -4902), INT16_C(-30912), INT16_C( -9873), INT16_C( -3317), INT16_C( 28529), INT16_C( 10810), INT16_C( 14756), INT16_C(-11980), INT16_C( 8230), INT16_C( 14663), INT16_C(-28044), INT16_C( 14810), INT16_C( 3087), INT16_C( 22573), INT16_C(-19115), INT16_C( 26647), INT16_C( 3404), INT16_C(-13826), INT16_C( 16604), INT16_C( 11097), INT16_C( 31149), INT16_C(-30463), INT16_C(-25032), INT16_C(-30305)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 127), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C(-128), INT8_C( 0), INT8_C( 0)) }, { UINT32_C( 5301351), simde_mm512_set_epi16(INT16_C( 16857), INT16_C( 3751), INT16_C(-17844), INT16_C( 25466), INT16_C( 11414), INT16_C(-10297), INT16_C( 19393), INT16_C(-20590), INT16_C(-15444), INT16_C(-25233), INT16_C( 18470), INT16_C(-31576), INT16_C(-31191), INT16_C(-25805), INT16_C( 18489), INT16_C(-13757), INT16_C( 25106), INT16_C( 2901), INT16_C( 15043), INT16_C( 7932), INT16_C( 16442), INT16_C(-31838), INT16_C( 6259), INT16_C( -3601), INT16_C( 27012), INT16_C( 21875), INT16_C(-15231), INT16_C( -4210), INT16_C( 759), INT16_C(-14477), INT16_C( 15177), INT16_C( 12476)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 127), INT8_C( 127)) }, { UINT32_C( 1441628), simde_mm512_set_epi16(INT16_C( 4257), INT16_C( 13148), INT16_C(-26937), INT16_C( 3270), INT16_C(-27177), INT16_C( -7723), INT16_C(-15174), INT16_C(-31459), INT16_C(-29502), INT16_C(-16960), INT16_C( -675), INT16_C( 18469), INT16_C( -771), INT16_C( 21931), INT16_C( -7514), INT16_C( 4385), INT16_C( 10140), INT16_C(-13187), INT16_C(-23393), INT16_C( 14533), INT16_C(-11107), INT16_C( 28601), INT16_C( -7649), INT16_C( -2698), INT16_C( 1941), INT16_C( 20227), INT16_C( 6389), INT16_C( 8444), INT16_C(-15796), INT16_C( 30043), INT16_C( 18714), INT16_C(-25916)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 0), INT8_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm512_maskz_cvtsepi16_epi8(test_vec[i].k, test_vec[i].a); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_cvtsepi32_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m128i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C( 699026811), INT32_C( -375318237), INT32_C( 900674930), INT32_C( -532379219), INT32_C( 259624037), INT32_C( -680802854), INT32_C( 1547540196), INT32_C( -396867814), INT32_C( -680186334), INT32_C( -646546417), INT32_C( 1050185959), INT32_C( 210164141), INT32_C( -363246440), INT32_C( -713541510), INT32_C(-2055712079), INT32_C( 1173262878)), simde_mm_set_epi8(INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 127)) }, { simde_mm512_set_epi32(INT32_C( -577654055), INT32_C( 1305263210), INT32_C(-1667480872), INT32_C(-1872423369), INT32_C( 1597611840), INT32_C( 1615412737), INT32_C( -741695305), INT32_C(-2095777141), INT32_C(-1892042790), INT32_C( 882313442), INT32_C( -646755171), INT32_C( 911535864), INT32_C( 1767815726), INT32_C( 1917649804), INT32_C(-2128748387), INT32_C( -486061354)), simde_mm_set_epi8(INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C(-128)) }, { simde_mm512_set_epi32(INT32_C( 1571463905), INT32_C( -29910853), INT32_C( -395190633), INT32_C(-1244489996), INT32_C( -209986706), INT32_C( 1448112069), INT32_C( 665030078), INT32_C( -422459827), INT32_C( 944452940), INT32_C( -118153072), INT32_C(-1276500342), INT32_C( -169030554), INT32_C( -426966016), INT32_C(-1701128651), INT32_C( 2062590869), INT32_C( -398665788)), simde_mm_set_epi8(INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C(-128)) }, { simde_mm512_set_epi32(INT32_C(-1419625975), INT32_C(-1694295907), INT32_C( 1865288725), INT32_C(-1595213023), INT32_C( 1905686792), INT32_C( 988618194), INT32_C( 317245247), INT32_C( 1735742912), INT32_C(-1998627679), INT32_C( 1116885338), INT32_C(-1515593887), INT32_C( 2092259348), INT32_C(-1378434205), INT32_C(-1489378877), INT32_C(-2045535759), INT32_C( -659443643)), simde_mm_set_epi8(INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C(-128)) }, { simde_mm512_set_epi32(INT32_C(-2129053891), INT32_C( 1544504796), INT32_C( -507196434), INT32_C( 1930810889), INT32_C( 483646719), INT32_C(-1089855428), INT32_C( 2145343900), INT32_C( -881615810), INT32_C( 1970504702), INT32_C( 1583273127), INT32_C( 1163729707), INT32_C(-2126388173), INT32_C( -806581107), INT32_C(-1698532218), INT32_C(-1392314532), INT32_C(-1512591869)), simde_mm_set_epi8(INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C(-128)) }, { simde_mm512_set_epi32(INT32_C( 1328977658), INT32_C( 842060052), INT32_C(-1244029168), INT32_C( 509188382), INT32_C( 982259454), INT32_C(-1037867093), INT32_C( 686186344), INT32_C( 324188035), INT32_C( 1498892321), INT32_C( 508595369), INT32_C( -602085962), INT32_C(-1205094693), INT32_C( -758631374), INT32_C(-1994460640), INT32_C( 1443985396), INT32_C(-1807079691)), simde_mm_set_epi8(INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C(-128)) }, { simde_mm512_set_epi32(INT32_C( 1080310926), INT32_C(-1355901593), INT32_C( 256636159), INT32_C( 1796880446), INT32_C(-1357394192), INT32_C( 1762335377), INT32_C( -735301194), INT32_C( 230956482), INT32_C( 1868400485), INT32_C( -33472709), INT32_C( 1961785078), INT32_C( 1853410726), INT32_C(-1472588796), INT32_C( 1430279086), INT32_C( -445655125), INT32_C( 1987885572)), simde_mm_set_epi8(INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127)) }, { simde_mm512_set_epi32(INT32_C( 270394269), INT32_C( -407998192), INT32_C(-1084410926), INT32_C(-1333891767), INT32_C( 1497122895), INT32_C( -215247173), INT32_C(-1835983824), INT32_C( -84201182), INT32_C(-1074778144), INT32_C( 1293769723), INT32_C( 1950241896), INT32_C(-1359838077), INT32_C( 595655233), INT32_C( 2095576879), INT32_C(-1995468186), INT32_C(-1602668019)), simde_mm_set_epi8(INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C(-128)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm512_cvtsepi32_epi8(test_vec[i].a); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_cvtsepi32_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i src; simde__mmask16 k; simde__m512i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( 4), INT8_C( 110), INT8_C( 8), INT8_C( 124), INT8_C( -85), INT8_C( -65), INT8_C( -62), INT8_C(-114), INT8_C( 110), INT8_C( 33), INT8_C( -28), INT8_C( 106), INT8_C( 107), INT8_C( 102), INT8_C( 89), INT8_C( 124)), UINT16_C(12710), simde_mm512_set_epi32(INT32_C(-1797606071), INT32_C( 433393018), INT32_C( 712958933), INT32_C( -930490644), INT32_C( 760893683), INT32_C(-2027734617), INT32_C( 1683947105), INT32_C( -424320007), INT32_C( 107722959), INT32_C( -13745640), INT32_C(-1276316442), INT32_C(-1722135079), INT32_C( 1405989540), INT32_C(-1209232739), INT32_C( 265793764), INT32_C(-1369604571)), simde_mm_set_epi8(INT8_C( 4), INT8_C( 110), INT8_C( 127), INT8_C(-128), INT8_C( -85), INT8_C( -65), INT8_C( -62), INT8_C(-128), INT8_C( 127), INT8_C( 33), INT8_C(-128), INT8_C( 106), INT8_C( 107), INT8_C(-128), INT8_C( 127), INT8_C( 124)) }, { simde_mm_set_epi8(INT8_C(-103), INT8_C(-117), INT8_C( 21), INT8_C( -66), INT8_C( 4), INT8_C( 101), INT8_C(-112), INT8_C( -12), INT8_C( -5), INT8_C( -50), INT8_C( 26), INT8_C( -76), INT8_C( -83), INT8_C( -93), INT8_C( 76), INT8_C( -53)), UINT16_C(12983), simde_mm512_set_epi32(INT32_C( -521071455), INT32_C( -376568333), INT32_C(-1920414563), INT32_C(-1600156196), INT32_C( -699126682), INT32_C( 420932189), INT32_C( 368322579), INT32_C( 983236120), INT32_C( 564176453), INT32_C( -258554655), INT32_C(-1141343253), INT32_C( -63087550), INT32_C( 1360677357), INT32_C(-1430308733), INT32_C( -736696329), INT32_C( 172239397)), simde_mm_set_epi8(INT8_C(-103), INT8_C(-117), INT8_C(-128), INT8_C(-128), INT8_C( 4), INT8_C( 101), INT8_C( 127), INT8_C( -12), INT8_C( 127), INT8_C( -50), INT8_C(-128), INT8_C(-128), INT8_C( -83), INT8_C(-128), INT8_C(-128), INT8_C( 127)) }, { simde_mm_set_epi8(INT8_C( 103), INT8_C( -24), INT8_C( -57), INT8_C( -16), INT8_C( -92), INT8_C( -81), INT8_C(-124), INT8_C( 85), INT8_C( 75), INT8_C( 77), INT8_C( 63), INT8_C( 75), INT8_C( 108), INT8_C( -84), INT8_C( -66), INT8_C( -39)), UINT16_C(58066), simde_mm512_set_epi32(INT32_C( 123157162), INT32_C( -87325004), INT32_C( -74930983), INT32_C( 1838315906), INT32_C( -81455038), INT32_C( -220592487), INT32_C( 1565085824), INT32_C( 60698648), INT32_C( 1617237598), INT32_C( 1724869908), INT32_C( 1766262170), INT32_C(-1382435444), INT32_C( -17747961), INT32_C( -126136440), INT32_C( -844801591), INT32_C(-1631191639)), simde_mm_set_epi8(INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C( -16), INT8_C( -92), INT8_C( -81), INT8_C( 127), INT8_C( 85), INT8_C( 127), INT8_C( 127), INT8_C( 63), INT8_C(-128), INT8_C( 108), INT8_C( -84), INT8_C(-128), INT8_C( -39)) }, { simde_mm_set_epi8(INT8_C( 26), INT8_C(-116), INT8_C( -21), INT8_C( -72), INT8_C( 76), INT8_C( -52), INT8_C( -71), INT8_C( 116), INT8_C( 41), INT8_C(-107), INT8_C( -34), INT8_C( 88), INT8_C( 29), INT8_C( 112), INT8_C( 89), INT8_C( 127)), UINT16_C(63967), simde_mm512_set_epi32(INT32_C(-1851657392), INT32_C( 834971347), INT32_C( 96523257), INT32_C( 1786925287), INT32_C( -210779015), INT32_C( 1982975326), INT32_C( 1450447838), INT32_C( 2089131393), INT32_C(-1680682639), INT32_C( -651728462), INT32_C(-1976749038), INT32_C(-1119382179), INT32_C( 674486709), INT32_C( -51017687), INT32_C( -85403955), INT32_C( -771014813)), simde_mm_set_epi8(INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( -52), INT8_C( -71), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C( -34), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C(-128)) }, { simde_mm_set_epi8(INT8_C( -70), INT8_C( -89), INT8_C( 47), INT8_C( 24), INT8_C( 102), INT8_C( -2), INT8_C( 77), INT8_C( 93), INT8_C( 8), INT8_C(-121), INT8_C(-101), INT8_C( 37), INT8_C( 18), INT8_C(-107), INT8_C( 62), INT8_C( -51)), UINT16_C(26024), simde_mm512_set_epi32(INT32_C( 1802290864), INT32_C(-1214702907), INT32_C( 821368379), INT32_C( 1594494278), INT32_C( 958208413), INT32_C(-1231281503), INT32_C( 509306793), INT32_C( 1685930534), INT32_C( -983619151), INT32_C( 293070963), INT32_C( 1991494863), INT32_C( 1867270897), INT32_C(-1968548850), INT32_C(-1694953429), INT32_C( 342647810), INT32_C( 475783090)), simde_mm_set_epi8(INT8_C( -70), INT8_C(-128), INT8_C( 127), INT8_C( 24), INT8_C( 102), INT8_C(-128), INT8_C( 77), INT8_C( 127), INT8_C(-128), INT8_C(-121), INT8_C( 127), INT8_C( 37), INT8_C(-128), INT8_C(-107), INT8_C( 62), INT8_C( -51)) }, { simde_mm_set_epi8(INT8_C( 39), INT8_C( 109), INT8_C( 102), INT8_C( -61), INT8_C( -24), INT8_C( 15), INT8_C( 101), INT8_C( -27), INT8_C( 60), INT8_C(-116), INT8_C( 88), INT8_C( 9), INT8_C( 98), INT8_C( -87), INT8_C( -42), INT8_C( 93)), UINT16_C(31919), simde_mm512_set_epi32(INT32_C(-2040024665), INT32_C( 961434729), INT32_C(-1271404275), INT32_C( 1259406574), INT32_C(-1600364807), INT32_C( 1013354479), INT32_C( 720414379), INT32_C( -600382162), INT32_C( 1552024491), INT32_C(-1608825765), INT32_C( 1791475411), INT32_C( 664867443), INT32_C( 674611871), INT32_C( 2098484402), INT32_C(-1985583060), INT32_C(-2005775341)), simde_mm_set_epi8(INT8_C( 39), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 101), INT8_C( -27), INT8_C( 127), INT8_C(-116), INT8_C( 127), INT8_C( 9), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C(-128)) }, { simde_mm_set_epi8(INT8_C( -81), INT8_C( 41), INT8_C( 25), INT8_C( -5), INT8_C( -64), INT8_C( 70), INT8_C( 124), INT8_C( -34), INT8_C( -2), INT8_C(-101), INT8_C( 45), INT8_C( -46), INT8_C( 80), INT8_C( 30), INT8_C( -59), INT8_C(-110)), UINT16_C(11111), simde_mm512_set_epi32(INT32_C( -897513851), INT32_C( 631520322), INT32_C( 634536018), INT32_C(-2012793591), INT32_C( 1627606334), INT32_C(-1721387558), INT32_C(-1230866277), INT32_C(-1212419278), INT32_C( 577821894), INT32_C( 1429584547), INT32_C(-2040986931), INT32_C( 2040118421), INT32_C( 936070741), INT32_C( 920738578), INT32_C(-1305390023), INT32_C(-1363026646)), simde_mm_set_epi8(INT8_C( -81), INT8_C( 41), INT8_C( 127), INT8_C( -5), INT8_C( 127), INT8_C( 70), INT8_C(-128), INT8_C(-128), INT8_C( -2), INT8_C( 127), INT8_C(-128), INT8_C( -46), INT8_C( 80), INT8_C( 127), INT8_C(-128), INT8_C(-128)) }, { simde_mm_set_epi8(INT8_C(-107), INT8_C( -52), INT8_C( 65), INT8_C( 94), INT8_C( -32), INT8_C( -76), INT8_C( 70), INT8_C( -85), INT8_C( -67), INT8_C( 72), INT8_C( 36), INT8_C( -31), INT8_C( 47), INT8_C( 126), INT8_C( 14), INT8_C(-117)), UINT16_C(53720), simde_mm512_set_epi32(INT32_C( -215671891), INT32_C( 1165933857), INT32_C( 229060973), INT32_C(-1525363693), INT32_C(-1670687960), INT32_C( 159553420), INT32_C( -802431529), INT32_C(-1862869598), INT32_C(-1575511501), INT32_C( 1180622586), INT32_C( 1144679036), INT32_C( 1714786815), INT32_C( 1463480444), INT32_C( 1916349718), INT32_C( -744963447), INT32_C(-1790859765)), simde_mm_set_epi8(INT8_C(-128), INT8_C( 127), INT8_C( 65), INT8_C(-128), INT8_C( -32), INT8_C( -76), INT8_C( 70), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 36), INT8_C( 127), INT8_C( 127), INT8_C( 126), INT8_C( 14), INT8_C(-117)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm512_mask_cvtsepi32_epi8(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_cvtsepi32_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m512i a; simde__m128i r; } test_vec[8] = { { UINT16_C(22908), simde_mm512_set_epi32(INT32_C( 760893683), INT32_C(-2027734617), INT32_C( 1683947105), INT32_C( -424320007), INT32_C( 107722959), INT32_C( -13745640), INT32_C(-1276316442), INT32_C(-1722135079), INT32_C( 1405989540), INT32_C(-1209232739), INT32_C( 265793764), INT32_C(-1369604571), INT32_C( -598522698), INT32_C( 1529360806), INT32_C( 74319996), INT32_C(-1413496178)), simde_mm_set_epi8(INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C(-128), INT8_C( 127), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 0), INT8_C( 0)) }, { UINT16_C(55020), simde_mm512_set_epi32(INT32_C( 564176453), INT32_C( -258554655), INT32_C(-1141343253), INT32_C( -63087550), INT32_C( 1360677357), INT32_C(-1430308733), INT32_C( -736696329), INT32_C( 172239397), INT32_C( 696452905), INT32_C( 1625371319), INT32_C(-1718938178), INT32_C( 73765108), INT32_C( -70378828), INT32_C(-1381806901), INT32_C(-1797606071), INT32_C( 433393018)), simde_mm_set_epi8(INT8_C( 127), INT8_C(-128), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C( 0), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C( 0), INT8_C( 0)) }, { UINT16_C(65048), simde_mm512_set_epi32(INT32_C( -17747961), INT32_C( -126136440), INT32_C( -844801591), INT32_C(-1631191639), INT32_C( 1959138923), INT32_C( 1255989970), INT32_C( 1743308784), INT32_C(-1532001195), INT32_C( 1263353675), INT32_C( 1823260377), INT32_C( -521071455), INT32_C( -376568333), INT32_C(-1920414563), INT32_C(-1600156196), INT32_C( -699126682), INT32_C( 420932189)), simde_mm_set_epi8(INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { UINT16_C(46476), simde_mm512_set_epi32(INT32_C( 1491415646), INT32_C( -327353889), INT32_C( 445443000), INT32_C( 1288485236), INT32_C( 697687640), INT32_C( 493902207), INT32_C( 123157162), INT32_C( -87325004), INT32_C( -74930983), INT32_C( 1838315906), INT32_C( -81455038), INT32_C( -220592487), INT32_C( 1565085824), INT32_C( 60698648), INT32_C( 1617237598), INT32_C( 1724869908)), simde_mm_set_epi8(INT8_C( 127), INT8_C( 0), INT8_C( 127), INT8_C( 127), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C( 127), INT8_C( 0), INT8_C( 0)) }, { UINT16_C(16227), simde_mm512_set_epi32(INT32_C( 143104805), INT32_C( 311770829), INT32_C(-1851657392), INT32_C( 834971347), INT32_C( 96523257), INT32_C( 1786925287), INT32_C( -210779015), INT32_C( 1982975326), INT32_C( 1450447838), INT32_C( 2089131393), INT32_C(-1680682639), INT32_C( -651728462), INT32_C(-1976749038), INT32_C(-1119382179), INT32_C( 674486709), INT32_C( -51017687)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 0), INT8_C( 127), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C(-128)) }, { UINT16_C(19805), simde_mm512_set_epi32(INT32_C( 821368379), INT32_C( 1594494278), INT32_C( 958208413), INT32_C(-1231281503), INT32_C( 509306793), INT32_C( 1685930534), INT32_C( -983619151), INT32_C( 293070963), INT32_C( 1991494863), INT32_C( 1867270897), INT32_C(-1968548850), INT32_C(-1694953429), INT32_C( 342647810), INT32_C( 475783090), INT32_C(-1901860575), INT32_C(-1695455832)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C( 127), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 0), INT8_C(-128)) }, { UINT16_C( 6853), simde_mm512_set_epi32(INT32_C( 720414379), INT32_C( -600382162), INT32_C( 1552024491), INT32_C(-1608825765), INT32_C( 1791475411), INT32_C( 664867443), INT32_C( 674611871), INT32_C( 2098484402), INT32_C(-1985583060), INT32_C(-2005775341), INT32_C(-1904713988), INT32_C( -21726033), INT32_C( 661481155), INT32_C( -401644059), INT32_C( 1015830537), INT32_C( 1655297629)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 127), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 127)) }, { UINT16_C(36847), simde_mm512_set_epi32(INT32_C(-2040986931), INT32_C( 2040118421), INT32_C( 936070741), INT32_C( 920738578), INT32_C(-1305390023), INT32_C(-1363026646), INT32_C( 965440518), INT32_C(-1619252377), INT32_C(-1356260869), INT32_C(-1069122338), INT32_C( -23384622), INT32_C( 1344193938), INT32_C(-2040024665), INT32_C( 961434729), INT32_C(-1271404275), INT32_C( 1259406574)), simde_mm_set_epi8(INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 0), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm512_maskz_cvtsepi32_epi8(test_vec[i].k, test_vec[i].a); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_cvtsepi32_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m256i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C( -633826313), INT32_C( -624858207), INT32_C( 1490386470), INT32_C(-2098903851), INT32_C(-1539984349), INT32_C( 1958536651), INT32_C(-1468703883), INT32_C( -330293651), INT32_C( 2111698546), INT32_C(-1712476271), INT32_C( 1928035775), INT32_C( 815855626), INT32_C(-1671100889), INT32_C(-1198252639), INT32_C(-1543874579), INT32_C( 564524240)), simde_mm256_set_epi16(INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767)) }, { simde_mm512_set_epi32(INT32_C( 1797360798), INT32_C( 205569425), INT32_C( 1156136221), INT32_C( 1772840561), INT32_C( 2126360842), INT32_C( 1592304637), INT32_C( 1062735606), INT32_C( -473740091), INT32_C( 1443682851), INT32_C( -765250611), INT32_C( -738272039), INT32_C( 1732082686), INT32_C( -881631866), INT32_C( -99399017), INT32_C( 252546807), INT32_C(-1695736037)), simde_mm256_set_epi16(INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C(-32768)) }, { simde_mm512_set_epi32(INT32_C( -260564878), INT32_C( 427961207), INT32_C(-1056239301), INT32_C( 1225809734), INT32_C( 213123881), INT32_C( 379025644), INT32_C( 1973103858), INT32_C(-2053346996), INT32_C(-1954883006), INT32_C( 667947760), INT32_C( 184139306), INT32_C( 168675179), INT32_C(-1651207657), INT32_C(-1932451475), INT32_C( 207773420), INT32_C(-1587464239)), simde_mm256_set_epi16(INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C(-32768)) }, { simde_mm512_set_epi32(INT32_C( -301891179), INT32_C( 1118988231), INT32_C( -81115331), INT32_C( 95289597), INT32_C( 79711365), INT32_C( -252265140), INT32_C(-1901421271), INT32_C( -86214221), INT32_C( 494361026), INT32_C( 1393707090), INT32_C( 1367330444), INT32_C( 1529415750), INT32_C( -219576728), INT32_C( 785923707), INT32_C(-1615246269), INT32_C( -860551446)), simde_mm256_set_epi16(INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768)) }, { simde_mm512_set_epi32(INT32_C( -668182583), INT32_C( 778563571), INT32_C( 1803655402), INT32_C( 668307371), INT32_C( 166146078), INT32_C( 2089905810), INT32_C( 540370), INT32_C( -610320208), INT32_C( -644851790), INT32_C( 1731401690), INT32_C( 1162165084), INT32_C( -702531425), INT32_C( 1412040872), INT32_C( -506509249), INT32_C(-1429255709), INT32_C( 1954724351)), simde_mm256_set_epi16(INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767)) }, { simde_mm512_set_epi32(INT32_C( 895849603), INT32_C( 1425609255), INT32_C( 649927889), INT32_C( 1727611756), INT32_C( 1297009704), INT32_C(-1218308449), INT32_C(-1305018130), INT32_C( 1967276695), INT32_C( -413179024), INT32_C(-1726322338), INT32_C( 1840183236), INT32_C( 284256212), INT32_C( 1788242258), INT32_C( 806002384), INT32_C( -365421949), INT32_C(-2081268803)), simde_mm256_set_epi16(INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768)) }, { simde_mm512_set_epi32(INT32_C( -850026299), INT32_C( 763115562), INT32_C( -698858672), INT32_C(-2063362395), INT32_C(-1349430084), INT32_C( 963716730), INT32_C( 1798713564), INT32_C(-1893101926), INT32_C( 662898969), INT32_C( 188387073), INT32_C( -332183490), INT32_C(-1234385790), INT32_C( 839752488), INT32_C( 1421488669), INT32_C( 544371590), INT32_C( 832408505)), simde_mm256_set_epi16(INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767)) }, { simde_mm512_set_epi32(INT32_C( 1070222476), INT32_C( 1830989582), INT32_C( 732250943), INT32_C(-1460385002), INT32_C(-1298211555), INT32_C(-1342658291), INT32_C( 646581591), INT32_C(-2104719353), INT32_C( 1887679149), INT32_C( 1571180495), INT32_C( 1241438931), INT32_C( 97459949), INT32_C( 1558770394), INT32_C(-1780666235), INT32_C( 1621650638), INT32_C( -918750867)), simde_mm256_set_epi16(INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767), INT16_C(-32768)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm512_cvtsepi32_epi16(test_vec[i].a); simde_assert_m256i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_cvtsepi32_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i src; simde__mmask16 k; simde__m512i a; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi16(INT16_C( 4055), INT16_C(-20252), INT16_C(-20899), INT16_C( 32293), INT16_C( -9133), INT16_C( 17590), INT16_C( 23336), INT16_C( 12710), INT16_C( 1134), INT16_C( 2172), INT16_C(-21569), INT16_C(-15730), INT16_C( 28193), INT16_C( -7062), INT16_C( 27494), INT16_C( 22908)), UINT16_C(37533), simde_mm512_set_epi32(INT32_C(-1718938178), INT32_C( 73765108), INT32_C( -70378828), INT32_C(-1381806901), INT32_C(-1797606071), INT32_C( 433393018), INT32_C( 712958933), INT32_C( -930490644), INT32_C( 760893683), INT32_C(-2027734617), INT32_C( 1683947105), INT32_C( -424320007), INT32_C( 107722959), INT32_C( -13745640), INT32_C(-1276316442), INT32_C(-1722135079)), simde_mm256_set_epi16(INT16_C(-32768), INT16_C(-20252), INT16_C(-20899), INT16_C(-32768), INT16_C( -9133), INT16_C( 17590), INT16_C( 32767), INT16_C( 12710), INT16_C( 32767), INT16_C( 2172), INT16_C(-21569), INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C( 27494), INT16_C(-32768)) }, { simde_mm256_set_epi16(INT16_C(-17416), INT16_C( 31723), INT16_C( -963), INT16_C( 23618), INT16_C( 20762), INT16_C( 18925), INT16_C(-21825), INT16_C( 14467), INT16_C(-11242), INT16_C( -6153), INT16_C( 2628), INT16_C( 10789), INT16_C( 10627), INT16_C( 1833), INT16_C( 24801), INT16_C( 12983)), UINT16_C(50401), simde_mm512_set_epi32(INT32_C( -844801591), INT32_C(-1631191639), INT32_C( 1959138923), INT32_C( 1255989970), INT32_C( 1743308784), INT32_C(-1532001195), INT32_C( 1263353675), INT32_C( 1823260377), INT32_C( -521071455), INT32_C( -376568333), INT32_C(-1920414563), INT32_C(-1600156196), INT32_C( -699126682), INT32_C( 420932189), INT32_C( 368322579), INT32_C( 983236120)), simde_mm256_set_epi16(INT16_C(-32768), INT16_C(-32768), INT16_C( -963), INT16_C( 23618), INT16_C( 20762), INT16_C(-32768), INT16_C(-21825), INT16_C( 14467), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C( 10789), INT16_C( 10627), INT16_C( 1833), INT16_C( 24801), INT16_C( 32767)) }, { simde_mm256_set_epi16(INT16_C( 23881), INT16_C( 20608), INT16_C( 926), INT16_C( 12312), INT16_C( 24677), INT16_C( 5726), INT16_C( 26319), INT16_C( 27924), INT16_C( 26951), INT16_C( 1434), INT16_C(-21095), INT16_C(-19060), INT16_C( -271), INT16_C( 12295), INT16_C( -1925), INT16_C( 20360)), UINT16_C( 1689), simde_mm512_set_epi32(INT32_C(-1976749038), INT32_C(-1119382179), INT32_C( 674486709), INT32_C( -51017687), INT32_C( -85403955), INT32_C( -771014813), INT32_C( 1491415646), INT32_C( -327353889), INT32_C( 445443000), INT32_C( 1288485236), INT32_C( 697687640), INT32_C( 493902207), INT32_C( 123157162), INT32_C( -87325004), INT32_C( -74930983), INT32_C( 1838315906)), simde_mm256_set_epi16(INT16_C( 23881), INT16_C( 20608), INT16_C( 926), INT16_C( 12312), INT16_C( 24677), INT16_C(-32768), INT16_C( 32767), INT16_C( 27924), INT16_C( 32767), INT16_C( 1434), INT16_C(-21095), INT16_C( 32767), INT16_C( 32767), INT16_C( 12295), INT16_C( -1925), INT16_C( 32767)) }, { simde_mm256_set_epi16(INT16_C( 1472), INT16_C(-11271), INT16_C( 27266), INT16_C( 20711), INT16_C( -3217), INT16_C(-15239), INT16_C( 30257), INT16_C(-12962), INT16_C( 22132), INT16_C( 5086), INT16_C( 31877), INT16_C(-25215), INT16_C(-25646), INT16_C(-11919), INT16_C( -9945), INT16_C( 27058)), UINT16_C(42707), simde_mm512_set_epi32(INT32_C( 509306793), INT32_C( 1685930534), INT32_C( -983619151), INT32_C( 293070963), INT32_C( 1991494863), INT32_C( 1867270897), INT32_C(-1968548850), INT32_C(-1694953429), INT32_C( 342647810), INT32_C( 475783090), INT32_C(-1901860575), INT32_C(-1695455832), INT32_C(-1163448552), INT32_C( 1727941981), INT32_C( 143104805), INT32_C( 311770829)), simde_mm256_set_epi16(INT16_C( 32767), INT16_C(-11271), INT16_C(-32768), INT16_C( 20711), INT16_C( -3217), INT16_C( 32767), INT16_C(-32768), INT16_C(-12962), INT16_C( 32767), INT16_C( 32767), INT16_C( 31877), INT16_C(-32768), INT16_C(-25646), INT16_C(-11919), INT16_C( 32767), INT16_C( 32767)) }, { simde_mm256_set_epi16(INT16_C( 15500), INT16_C( 22537), INT16_C( 25257), INT16_C(-10659), INT16_C( 27500), INT16_C(-14672), INT16_C(-18535), INT16_C( 6853), INT16_C( 12533), INT16_C( 5691), INT16_C( 24330), INT16_C( 3398), INT16_C( 14621), INT16_C( 6557), INT16_C(-18788), INT16_C( 8865)), UINT16_C(26085), simde_mm512_set_epi32(INT32_C(-1271404275), INT32_C( 1259406574), INT32_C(-1600364807), INT32_C( 1013354479), INT32_C( 720414379), INT32_C( -600382162), INT32_C( 1552024491), INT32_C(-1608825765), INT32_C( 1791475411), INT32_C( 664867443), INT32_C( 674611871), INT32_C( 2098484402), INT32_C(-1985583060), INT32_C(-2005775341), INT32_C(-1904713988), INT32_C( -21726033)), simde_mm256_set_epi16(INT16_C( 15500), INT16_C( 32767), INT16_C(-32768), INT16_C(-10659), INT16_C( 27500), INT16_C(-32768), INT16_C(-18535), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 3398), INT16_C( 14621), INT16_C(-32768), INT16_C(-18788), INT16_C(-32768)) }, { simde_mm256_set_epi16(INT16_C( 14731), INT16_C( 29702), INT16_C(-24708), INT16_C( 11111), INT16_C(-20695), INT16_C( 6651), INT16_C(-16314), INT16_C( 31966), INT16_C( -357), INT16_C( 11730), INT16_C( 20510), INT16_C(-14958), INT16_C(-31129), INT16_C(-20057), INT16_C( 14670), INT16_C( 21609)), UINT16_C(56618), simde_mm512_set_epi32(INT32_C(-1119345439), INT32_C( 796790411), INT32_C( -897513851), INT32_C( 631520322), INT32_C( 634536018), INT32_C(-2012793591), INT32_C( 1627606334), INT32_C(-1721387558), INT32_C(-1230866277), INT32_C(-1212419278), INT32_C( 577821894), INT32_C( 1429584547), INT32_C(-2040986931), INT32_C( 2040118421), INT32_C( 936070741), INT32_C( 920738578)), simde_mm256_set_epi16(INT16_C(-32768), INT16_C( 32767), INT16_C(-24708), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C(-16314), INT16_C(-32768), INT16_C( -357), INT16_C( 11730), INT16_C( 32767), INT16_C(-14958), INT16_C(-32768), INT16_C(-20057), INT16_C( 32767), INT16_C( 21609)) }, { simde_mm256_set_epi16(INT16_C( 22330), INT16_C( -3972), INT16_C( 29241), INT16_C( 11542), INT16_C(-11368), INT16_C(-15735), INT16_C(-27327), INT16_C(-23029), INT16_C( -8383), INT16_C(-32273), INT16_C( 3895), INT16_C(-11816), INT16_C(-27188), INT16_C( 16734), INT16_C( -8012), INT16_C( 18091)), UINT16_C(37375), simde_mm512_set_epi32(INT32_C( -221392236), INT32_C( -711063988), INT32_C( 2133711027), INT32_C( 509873922), INT32_C( 1729615213), INT32_C( -831792372), INT32_C( -215671891), INT32_C( 1165933857), INT32_C( 229060973), INT32_C(-1525363693), INT32_C(-1670687960), INT32_C( 159553420), INT32_C( -802431529), INT32_C(-1862869598), INT32_C(-1575511501), INT32_C( 1180622586)), simde_mm256_set_epi16(INT16_C(-32768), INT16_C( -3972), INT16_C( 29241), INT16_C( 32767), INT16_C(-11368), INT16_C(-15735), INT16_C(-27327), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767)) }, { simde_mm256_set_epi16(INT16_C( -456), INT16_C( -6277), INT16_C(-13154), INT16_C( 20424), INT16_C( 7701), INT16_C( 9153), INT16_C( 18208), INT16_C( 1720), INT16_C(-14211), INT16_C( 26958), INT16_C( 27767), INT16_C(-15259), INT16_C(-16868), INT16_C(-31112), INT16_C( -6815), INT16_C( 21503)), UINT16_C(29475), simde_mm512_set_epi32(INT32_C( 441964111), INT32_C(-1041336788), INT32_C( 1240459905), INT32_C( 1190379131), INT32_C( 660048661), INT32_C( 992314379), INT32_C( 13012539), INT32_C(-1751743734), INT32_C(-1323762798), INT32_C( 640110239), INT32_C( 1866432721), INT32_C(-1867947321), INT32_C(-2066921506), INT32_C( 704072659), INT32_C( 929476148), INT32_C(-1455589491)), simde_mm256_set_epi16(INT16_C( -456), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C( 7701), INT16_C( 9153), INT16_C( 32767), INT16_C(-32768), INT16_C(-14211), INT16_C( 26958), INT16_C( 32767), INT16_C(-15259), INT16_C(-16868), INT16_C(-31112), INT16_C( 32767), INT16_C(-32768)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm512_mask_cvtsepi32_epi16(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m256i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_cvtsepi32_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m512i a; simde__m256i r; } test_vec[8] = { { UINT16_C(22908), simde_mm512_set_epi32(INT32_C( 760893683), INT32_C(-2027734617), INT32_C( 1683947105), INT32_C( -424320007), INT32_C( 107722959), INT32_C( -13745640), INT32_C(-1276316442), INT32_C(-1722135079), INT32_C( 1405989540), INT32_C(-1209232739), INT32_C( 265793764), INT32_C(-1369604571), INT32_C( -598522698), INT32_C( 1529360806), INT32_C( 74319996), INT32_C(-1413496178)), simde_mm256_set_epi16(INT16_C( 0), INT16_C(-32768), INT16_C( 0), INT16_C(-32768), INT16_C( 32767), INT16_C( 0), INT16_C( 0), INT16_C(-32768), INT16_C( 0), INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C( 0), INT16_C( 0)) }, { UINT16_C(55020), simde_mm512_set_epi32(INT32_C( 564176453), INT32_C( -258554655), INT32_C(-1141343253), INT32_C( -63087550), INT32_C( 1360677357), INT32_C(-1430308733), INT32_C( -736696329), INT32_C( 172239397), INT32_C( 696452905), INT32_C( 1625371319), INT32_C(-1718938178), INT32_C( 73765108), INT32_C( -70378828), INT32_C(-1381806901), INT32_C(-1797606071), INT32_C( 433393018)), simde_mm256_set_epi16(INT16_C( 32767), INT16_C(-32768), INT16_C( 0), INT16_C(-32768), INT16_C( 0), INT16_C(-32768), INT16_C(-32768), INT16_C( 0), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C( 0), INT16_C(-32768), INT16_C(-32768), INT16_C( 0), INT16_C( 0)) }, { UINT16_C(65048), simde_mm512_set_epi32(INT32_C( -17747961), INT32_C( -126136440), INT32_C( -844801591), INT32_C(-1631191639), INT32_C( 1959138923), INT32_C( 1255989970), INT32_C( 1743308784), INT32_C(-1532001195), INT32_C( 1263353675), INT32_C( 1823260377), INT32_C( -521071455), INT32_C( -376568333), INT32_C(-1920414563), INT32_C(-1600156196), INT32_C( -699126682), INT32_C( 420932189)), simde_mm256_set_epi16(INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C(-32768), INT16_C(-32768), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { UINT16_C(46476), simde_mm512_set_epi32(INT32_C( 1491415646), INT32_C( -327353889), INT32_C( 445443000), INT32_C( 1288485236), INT32_C( 697687640), INT32_C( 493902207), INT32_C( 123157162), INT32_C( -87325004), INT32_C( -74930983), INT32_C( 1838315906), INT32_C( -81455038), INT32_C( -220592487), INT32_C( 1565085824), INT32_C( 60698648), INT32_C( 1617237598), INT32_C( 1724869908)), simde_mm256_set_epi16(INT16_C( 32767), INT16_C( 0), INT16_C( 32767), INT16_C( 32767), INT16_C( 0), INT16_C( 32767), INT16_C( 0), INT16_C(-32768), INT16_C(-32768), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 32767), INT16_C( 32767), INT16_C( 0), INT16_C( 0)) }, { UINT16_C(16227), simde_mm512_set_epi32(INT32_C( 143104805), INT32_C( 311770829), INT32_C(-1851657392), INT32_C( 834971347), INT32_C( 96523257), INT32_C( 1786925287), INT32_C( -210779015), INT32_C( 1982975326), INT32_C( 1450447838), INT32_C( 2089131393), INT32_C(-1680682639), INT32_C( -651728462), INT32_C(-1976749038), INT32_C(-1119382179), INT32_C( 674486709), INT32_C( -51017687)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767), INT16_C( 0), INT16_C( 32767), INT16_C(-32768), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 32767), INT16_C(-32768)) }, { UINT16_C(19805), simde_mm512_set_epi32(INT32_C( 821368379), INT32_C( 1594494278), INT32_C( 958208413), INT32_C(-1231281503), INT32_C( 509306793), INT32_C( 1685930534), INT32_C( -983619151), INT32_C( 293070963), INT32_C( 1991494863), INT32_C( 1867270897), INT32_C(-1968548850), INT32_C(-1694953429), INT32_C( 342647810), INT32_C( 475783090), INT32_C(-1901860575), INT32_C(-1695455832)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 32767), INT16_C( 0), INT16_C( 0), INT16_C( 32767), INT16_C( 32767), INT16_C( 0), INT16_C( 32767), INT16_C( 0), INT16_C( 32767), INT16_C( 0), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C( 0), INT16_C(-32768)) }, { UINT16_C( 6853), simde_mm512_set_epi32(INT32_C( 720414379), INT32_C( -600382162), INT32_C( 1552024491), INT32_C(-1608825765), INT32_C( 1791475411), INT32_C( 664867443), INT32_C( 674611871), INT32_C( 2098484402), INT32_C(-1985583060), INT32_C(-2005775341), INT32_C(-1904713988), INT32_C( -21726033), INT32_C( 661481155), INT32_C( -401644059), INT32_C( 1015830537), INT32_C( 1655297629)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C(-32768), INT16_C( 32767), INT16_C( 0), INT16_C( 32767), INT16_C( 0), INT16_C(-32768), INT16_C(-32768), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C(-32768), INT16_C( 0), INT16_C( 32767)) }, { UINT16_C(36847), simde_mm512_set_epi32(INT32_C(-2040986931), INT32_C( 2040118421), INT32_C( 936070741), INT32_C( 920738578), INT32_C(-1305390023), INT32_C(-1363026646), INT32_C( 965440518), INT32_C(-1619252377), INT32_C(-1356260869), INT32_C(-1069122338), INT32_C( -23384622), INT32_C( 1344193938), INT32_C(-2040024665), INT32_C( 961434729), INT32_C(-1271404275), INT32_C( 1259406574)), simde_mm256_set_epi16(INT16_C(-32768), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C( 0), INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm512_maskz_cvtsepi32_epi16(test_vec[i].k, test_vec[i].a); simde_assert_m256i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_cvtsepi64_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m128i r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C( 5688878986790062607), INT64_C( -507006338933993777), INT64_C( 2731700857838766689), INT64_C( 5038766546414012764), INT64_C( 8031668245477288096), INT64_C( 8558843731862564067), INT64_C( 1445459848667085535), INT64_C( -83609451038325995)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C(-128)) }, { simde_mm512_set_epi64(INT64_C( 8541199063845314870), INT64_C( 6183904058691300593), INT64_C( 4987917255037318931), INT64_C(-6461883516210235787), INT64_C(-2290177652468594952), INT64_C( -292256871616436513), INT64_C( 8655434451305349549), INT64_C( 2098144080369590904)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 127)) }, { simde_mm512_set_epi64(INT64_C( 8921116119188857611), INT64_C(-7689618306950712122), INT64_C( 1518593000980919604), INT64_C( 4285824591643310377), INT64_C(-5934398452121416225), INT64_C( 3925581736765257012), INT64_C(-1280589966661740296), INT64_C(-6610380819072272984)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C(-128)) }, { simde_mm512_set_epi64(INT64_C(-7117386000751069153), INT64_C(-4042166984900833962), INT64_C( 5586053217269340515), INT64_C(-7584975210127513398), INT64_C( 3657404296828616096), INT64_C(-2770219948143520502), INT64_C( 4495347402256698165), INT64_C( -398794429961863290)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128)) }, { simde_mm512_set_epi64(INT64_C(-7309298589786727464), INT64_C( 6351016797136664888), INT64_C( -796932205202576292), INT64_C( 5974868289408841942), INT64_C(-5246045528720698174), INT64_C( 5897717133320969396), INT64_C( 8855007060554502516), INT64_C( 1795078486534561817)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 127)) }, { simde_mm512_set_epi64(INT64_C(-8235362769944622173), INT64_C(-1394979073669202107), INT64_C(-1990341901794640861), INT64_C(-5381553921365904442), INT64_C( 2258222102802041167), INT64_C( 8110919934642332525), INT64_C( 3859146607814864086), INT64_C( 5989226152769229183)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127)) }, { simde_mm512_set_epi64(INT64_C( 7694422639508462949), INT64_C(-2072064725478552758), INT64_C( 3296208651842008367), INT64_C(-5674066605955291159), INT64_C( 5400731755389512823), INT64_C( 4001845542455980591), INT64_C( 1263947732552642640), INT64_C( 1152590716152552703)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127)) }, { simde_mm512_set_epi64(INT64_C(-5372614012128921165), INT64_C(-2041172040005057172), INT64_C(-8233280709947200170), INT64_C( 1449409854914395254), INT64_C( 6224671476337807114), INT64_C( 4022680139615958951), INT64_C( -277976409085756360), INT64_C(-4710983618327201696)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C(-128)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm512_cvtsepi64_epi8(test_vec[i].a); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_cvtsepi64_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i src; simde__mmask8 k; simde__m512i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( -98), INT8_C(-118), INT8_C( -55), INT8_C( 84), INT8_C( -38), INT8_C(-100), INT8_C( 69), INT8_C( -11), INT8_C( -76), INT8_C( -44), INT8_C( 111), INT8_C( 57), INT8_C(-108), INT8_C( 117), INT8_C( -94), INT8_C( 99)), UINT8_C( 89), simde_mm512_set_epi64(INT64_C(-6464916445333598184), INT64_C(-9074050598773963479), INT64_C(-6773163771856001287), INT64_C( 8528895860955669022), INT64_C( 743395091751495893), INT64_C(-5753646205421077345), INT64_C( 6781795189594641427), INT64_C( 7308036127418939455)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -76), INT8_C(-128), INT8_C( 111), INT8_C( 127), INT8_C( 127), INT8_C( 117), INT8_C( -94), INT8_C( 127)) }, { simde_mm_set_epi8(INT8_C( -29), INT8_C( -88), INT8_C( 59), INT8_C( -11), INT8_C(-100), INT8_C(-112), INT8_C( 21), INT8_C( -50), INT8_C( 53), INT8_C( -68), INT8_C(-121), INT8_C( 94), INT8_C(-100), INT8_C( -25), INT8_C( 102), INT8_C( 120)), UINT8_C(108), simde_mm512_set_epi64(INT64_C(-8224370586313845882), INT64_C(-1993121323347227003), INT64_C( 8831696545371448920), INT64_C( 2185815767663877612), INT64_C( -270570670623534210), INT64_C(-7835605414239696560), INT64_C(-1637430132815230795), INT64_C( 7232010829548987022)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 53), INT8_C(-128), INT8_C( 127), INT8_C( 94), INT8_C(-128), INT8_C(-128), INT8_C( 102), INT8_C( 120)) }, { simde_mm_set_epi8(INT8_C( -93), INT8_C( -72), INT8_C( 3), INT8_C( -86), INT8_C(-106), INT8_C( 54), INT8_C( -93), INT8_C( 77), INT8_C( 111), INT8_C(-126), INT8_C( 23), INT8_C( -40), INT8_C( -4), INT8_C( -3), INT8_C( -74), INT8_C( 6)), UINT8_C(212), simde_mm512_set_epi64(INT64_C(-3956629285800542954), INT64_C( 7768661847448918660), INT64_C( 8414129281940348180), INT64_C(-3282225164507364365), INT64_C( 3972788221445796445), INT64_C(-1066584669561441235), INT64_C( 2182845185502488635), INT64_C(-2591044625591562731)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 127), INT8_C( 23), INT8_C(-128), INT8_C( -4), INT8_C(-128), INT8_C( -74), INT8_C( 6)) }, { simde_mm_set_epi8(INT8_C( 95), INT8_C( 126), INT8_C( 102), INT8_C( -2), INT8_C( 37), INT8_C( -94), INT8_C( 95), INT8_C( -94), INT8_C(-125), INT8_C( -9), INT8_C( -95), INT8_C( 41), INT8_C( -4), INT8_C( -36), INT8_C( -88), INT8_C( 79)), UINT8_C( 80), simde_mm512_set_epi64(INT64_C( 5062048906197334415), INT64_C( -115992959878725320), INT64_C( 1136786795492082678), INT64_C(-1948109654186813959), INT64_C( 120681793319816518), INT64_C(-3982161075345546564), INT64_C(-4623140196798581694), INT64_C(-1633613586256931500)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-125), INT8_C(-128), INT8_C( -95), INT8_C(-128), INT8_C( -4), INT8_C( -36), INT8_C( -88), INT8_C( 79)) }, { simde_mm_set_epi8(INT8_C( -59), INT8_C( -75), INT8_C( -72), INT8_C( -42), INT8_C( -55), INT8_C(-119), INT8_C( -60), INT8_C( 67), INT8_C( 109), INT8_C( -73), INT8_C( 91), INT8_C( 51), INT8_C( 78), INT8_C( 83), INT8_C(-115), INT8_C( 62)), UINT8_C(249), simde_mm512_set_epi64(INT64_C( 5851174439164188465), INT64_C(-6319779881941429071), INT64_C( 1879763181654775792), INT64_C( 4044922446109520806), INT64_C( 1345671618220988839), INT64_C(-8367274893382946539), INT64_C( 4386554571925238747), INT64_C(-2885812676818812761)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 83), INT8_C(-115), INT8_C(-128)) }, { simde_mm_set_epi8(INT8_C( 79), INT8_C( 39), INT8_C( 96), INT8_C( 42), INT8_C( -85), INT8_C( 114), INT8_C( 97), INT8_C( -62), INT8_C( 8), INT8_C( 38), INT8_C( 93), INT8_C( 127), INT8_C(-112), INT8_C( 122), INT8_C(-125), INT8_C( -69)), UINT8_C( 46), simde_mm512_set_epi64(INT64_C(-5543414139022216695), INT64_C(-7351051862345725962), INT64_C( 7765938089573807601), INT64_C(-8520500957815241015), INT64_C(-4114690540063739335), INT64_C( 2127377446843006162), INT64_C( 4629741254325605495), INT64_C( 6283226406328232321)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 8), INT8_C( 38), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( -69)) }, { simde_mm_set_epi8(INT8_C( -81), INT8_C(-127), INT8_C( -86), INT8_C( 87), INT8_C( 54), INT8_C( 74), INT8_C( -3), INT8_C( 90), INT8_C( -47), INT8_C( -23), INT8_C( 73), INT8_C( 26), INT8_C( 65), INT8_C( 65), INT8_C( 86), INT8_C(-128)), UINT8_C(244), simde_mm512_set_epi64(INT64_C( 3168330803816226018), INT64_C(-5665190690683619181), INT64_C(-6479275574786056503), INT64_C(-4854337280374936929), INT64_C( 9110007504327473973), INT64_C( 2306966881245130829), INT64_C( 3371747741827789856), INT64_C( 4883552887250537067)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 65), INT8_C( 127), INT8_C( 86), INT8_C(-128)) }, { simde_mm_set_epi8(INT8_C( -36), INT8_C( -15), INT8_C( 94), INT8_C( 96), INT8_C( 41), INT8_C( 71), INT8_C(-125), INT8_C( 50), INT8_C( -5), INT8_C( -42), INT8_C( 6), INT8_C( 31), INT8_C( -82), INT8_C( -3), INT8_C( -84), INT8_C( 118)), UINT8_C( 84), simde_mm512_set_epi64(INT64_C(-5802305007366524132), INT64_C( 6368102336095963395), INT64_C( 26715019440636716), INT64_C( 3396530625665467524), INT64_C( 4010180268457941346), INT64_C( 5473964637387956828), INT64_C( 6527063328512873018), INT64_C( 7600981363267795639)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -5), INT8_C( 127), INT8_C( 6), INT8_C( 127), INT8_C( -82), INT8_C( 127), INT8_C( -84), INT8_C( 118)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm512_mask_cvtsepi64_epi8(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_cvtsepi64_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512i a; simde__m128i r; } test_vec[8] = { { UINT8_C( 99), simde_mm512_set_epi64(INT64_C(-6773163771856001287), INT64_C( 8528895860955669022), INT64_C( 743395091751495893), INT64_C(-5753646205421077345), INT64_C( 6781795189594641427), INT64_C( 7308036127418939455), INT64_C( 616165601606027604), INT64_C(-2694201555471274183)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C( 127), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C(-128)) }, { UINT8_C( 41), simde_mm512_set_epi64(INT64_C( -270570670623534210), INT64_C(-7835605414239696560), INT64_C(-1637430132815230795), INT64_C( 7232010829548987022), INT64_C( 564423766678453237), INT64_C(-7165203031737464994), INT64_C(-7140626015800786754), INT64_C(-2499691201975648498)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( 0), INT8_C(-128)) }, { UINT8_C(236), simde_mm512_set_epi64(INT64_C( 2182845185502488635), INT64_C(-2591044625591562731), INT64_C( 634126652413576106), INT64_C(-7622725766299379752), INT64_C( -216817067777774970), INT64_C(-4785193871492054345), INT64_C( 7467476729839254839), INT64_C( 3956665538785416156)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C( 0), INT8_C( 0)) }, { UINT8_C( 45), simde_mm512_set_epi64(INT64_C(-8852586683955255554), INT64_C( 2711835077233844521), INT64_C( -226120820645741920), INT64_C(-5768129162479938358), INT64_C( 4995224830392729088), INT64_C(-9204129394426957414), INT64_C( 6102592944456478166), INT64_C(-5538879581396485404)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 127), INT8_C(-128), INT8_C( 0), INT8_C(-128)) }, { UINT8_C( 84), simde_mm512_set_epi64(INT64_C( 5644010056631452329), INT64_C( 687917764947732740), INT64_C( 9077991551678197137), INT64_C( 5379835588808993943), INT64_C( 5669915352590237555), INT64_C(-7084358722419653223), INT64_C(-5646349838748004740), INT64_C( 5115442951238204697)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 0)) }, { UINT8_C( 51), simde_mm512_set_epi64(INT64_C( 6212879174325387943), INT64_C(-9195198218682338424), INT64_C(-6314975051621021387), INT64_C( 316124606243108816), INT64_C(-1018533604686287836), INT64_C( 7455766202425772200), INT64_C( -151643922692032263), INT64_C(-4200247844809358269)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 127), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C(-128)) }, { UINT8_C(174), simde_mm512_set_epi64(INT64_C(-4824473637125509439), INT64_C( 2222300162020472452), INT64_C( 4485933577556469028), INT64_C( 3606420641901871392), INT64_C( 7533512282749681966), INT64_C( 5703633189469315522), INT64_C( 587259603884606395), INT64_C( 5851174439164188465)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 0)) }, { UINT8_C(133), simde_mm512_set_epi64(INT64_C(-2795345254402041164), INT64_C( -585762482098414214), INT64_C( 1037739329164009972), INT64_C(-5800167553526727334), INT64_C(-3321042873096251776), INT64_C(-5543414139022216695), INT64_C(-7351051862345725962), INT64_C( 7765938089573807601)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 127)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm512_maskz_cvtsepi64_epi8(test_vec[i].k, test_vec[i].a); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_cvtsepi64_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m128i r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C(-2030970610590957423), INT64_C(-6407881172688895992), INT64_C( 5763311992085393311), INT64_C( 5413217893862876377), INT64_C( 2736248102150189299), INT64_C(-3905239043220002295), INT64_C(-8525695447592722282), INT64_C( 8985359849763220184)), simde_mm_set_epi16(INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767)) }, { simde_mm512_set_epi64(INT64_C( 5528142976218439915), INT64_C( 2800801417967116565), INT64_C(-5498223206200113790), INT64_C(-7735651953136898344), INT64_C( -45944962922223273), INT64_C( 7270573409050727432), INT64_C(-3005715738589766193), INT64_C( 8104807772879505)), simde_mm_set_epi16(INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767)) }, { simde_mm512_set_epi64(INT64_C( 1914743572707752888), INT64_C(-1522220651072788958), INT64_C( 774824395663784136), INT64_C(-5948238846445238616), INT64_C(-6703970016956217448), INT64_C(-4949181030820609545), INT64_C(-1484143836373726158), INT64_C( 7023453391392224586)), simde_mm_set_epi16(INT16_C( 32767), INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767)) }, { simde_mm512_set_epi64(INT64_C( 877787331628902271), INT64_C(-4271522850214152795), INT64_C( -36139528410486948), INT64_C( 8221636509979881717), INT64_C( 8317903343708448256), INT64_C( 273589563704309286), INT64_C(-3793565254975032249), INT64_C(-8693302524009918115)), simde_mm_set_epi16(INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768)) }, { simde_mm512_set_epi64(INT64_C( 8982303740345517000), INT64_C( 3826887655534630950), INT64_C( 3239767036578666476), INT64_C(-6247325025317220634), INT64_C( 7671225941352774255), INT64_C(-4520433707995010156), INT64_C( -202556859458160671), INT64_C( 8376976669674082741)), simde_mm_set_epi16(INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767)) }, { simde_mm512_set_epi64(INT64_C(-6735519140496673702), INT64_C(-3340797945830539185), INT64_C(-4234248954061729169), INT64_C(-8216832367433725114), INT64_C( 7806307918239696158), INT64_C( 1170496629302395613), INT64_C(-6389283806078878937), INT64_C( 6713220048127768064)), simde_mm_set_epi16(INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767)) }, { simde_mm512_set_epi64(INT64_C( 4236272701979881947), INT64_C( 3804598066273696798), INT64_C(-3344748723935369850), INT64_C( 1825331027945283861), INT64_C( 238286709106496427), INT64_C( 5804239832581888150), INT64_C( 140213339004639903), INT64_C(-8021982786001039013)), simde_mm_set_epi16(INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768)) }, { simde_mm512_set_epi64(INT64_C(-4062543779038977729), INT64_C( 2624543651353099153), INT64_C( 8730480163779377063), INT64_C( 1087955593743156912), INT64_C(-7352034341207303173), INT64_C( -816150536575471214), INT64_C( 427511002321984596), INT64_C( 6609731989001691179)), simde_mm_set_epi16(INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm512_cvtsepi64_epi16(test_vec[i].a); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_cvtsepi64_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i src; simde__mmask8 k; simde__m512i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C(-24950), INT16_C(-13996), INT16_C( -9572), INT16_C( 17909), INT16_C(-19244), INT16_C( 28473), INT16_C(-27531), INT16_C(-23965)), UINT8_C( 89), simde_mm512_set_epi64(INT64_C(-6464916445333598184), INT64_C(-9074050598773963479), INT64_C(-6773163771856001287), INT64_C( 8528895860955669022), INT64_C( 743395091751495893), INT64_C(-5753646205421077345), INT64_C( 6781795189594641427), INT64_C( 7308036127418939455)), simde_mm_set_epi16(INT16_C(-24950), INT16_C(-32768), INT16_C( -9572), INT16_C( 32767), INT16_C( 32767), INT16_C( 28473), INT16_C(-27531), INT16_C( 32767)) }, { simde_mm_set_epi16(INT16_C( -7256), INT16_C( 15349), INT16_C(-25456), INT16_C( 5582), INT16_C( 13756), INT16_C(-30882), INT16_C(-25369), INT16_C( 26232)), UINT8_C(108), simde_mm512_set_epi64(INT64_C(-8224370586313845882), INT64_C(-1993121323347227003), INT64_C( 8831696545371448920), INT64_C( 2185815767663877612), INT64_C( -270570670623534210), INT64_C(-7835605414239696560), INT64_C(-1637430132815230795), INT64_C( 7232010829548987022)), simde_mm_set_epi16(INT16_C( -7256), INT16_C(-32768), INT16_C( 32767), INT16_C( 5582), INT16_C(-32768), INT16_C(-32768), INT16_C(-25369), INT16_C( 26232)) }, { simde_mm_set_epi16(INT16_C(-23624), INT16_C( 938), INT16_C(-27082), INT16_C(-23731), INT16_C( 28546), INT16_C( 6104), INT16_C( -771), INT16_C(-18938)), UINT8_C(212), simde_mm512_set_epi64(INT64_C(-3956629285800542954), INT64_C( 7768661847448918660), INT64_C( 8414129281940348180), INT64_C(-3282225164507364365), INT64_C( 3972788221445796445), INT64_C(-1066584669561441235), INT64_C( 2182845185502488635), INT64_C(-2591044625591562731)), simde_mm_set_epi16(INT16_C(-32768), INT16_C( 32767), INT16_C(-27082), INT16_C(-32768), INT16_C( 28546), INT16_C(-32768), INT16_C( -771), INT16_C(-18938)) }, { simde_mm_set_epi16(INT16_C( 24446), INT16_C( 26366), INT16_C( 9634), INT16_C( 24482), INT16_C(-31753), INT16_C(-24279), INT16_C( -804), INT16_C(-22449)), UINT8_C( 80), simde_mm512_set_epi64(INT64_C( 5062048906197334415), INT64_C( -115992959878725320), INT64_C( 1136786795492082678), INT64_C(-1948109654186813959), INT64_C( 120681793319816518), INT64_C(-3982161075345546564), INT64_C(-4623140196798581694), INT64_C(-1633613586256931500)), simde_mm_set_epi16(INT16_C( 24446), INT16_C(-32768), INT16_C( 9634), INT16_C(-32768), INT16_C(-31753), INT16_C(-24279), INT16_C( -804), INT16_C(-22449)) }, { simde_mm_set_epi16(INT16_C(-14923), INT16_C(-18218), INT16_C(-13943), INT16_C(-15293), INT16_C( 28087), INT16_C( 23347), INT16_C( 20051), INT16_C(-29378)), UINT8_C(249), simde_mm512_set_epi64(INT64_C( 5851174439164188465), INT64_C(-6319779881941429071), INT64_C( 1879763181654775792), INT64_C( 4044922446109520806), INT64_C( 1345671618220988839), INT64_C(-8367274893382946539), INT64_C( 4386554571925238747), INT64_C(-2885812676818812761)), simde_mm_set_epi16(INT16_C( 32767), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 23347), INT16_C( 20051), INT16_C(-32768)) }, { simde_mm_set_epi16(INT16_C( 20263), INT16_C( 24618), INT16_C(-21646), INT16_C( 25026), INT16_C( 2086), INT16_C( 23935), INT16_C(-28550), INT16_C(-31813)), UINT8_C( 46), simde_mm512_set_epi64(INT64_C(-5543414139022216695), INT64_C(-7351051862345725962), INT64_C( 7765938089573807601), INT64_C(-8520500957815241015), INT64_C(-4114690540063739335), INT64_C( 2127377446843006162), INT64_C( 4629741254325605495), INT64_C( 6283226406328232321)), simde_mm_set_epi16(INT16_C( 20263), INT16_C( 24618), INT16_C( 32767), INT16_C( 25026), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C(-31813)) }, { simde_mm_set_epi16(INT16_C(-20607), INT16_C(-21929), INT16_C( 13898), INT16_C( -678), INT16_C(-11799), INT16_C( 18714), INT16_C( 16705), INT16_C( 22144)), UINT8_C(244), simde_mm512_set_epi64(INT64_C( 3168330803816226018), INT64_C(-5665190690683619181), INT64_C(-6479275574786056503), INT64_C(-4854337280374936929), INT64_C( 9110007504327473973), INT64_C( 2306966881245130829), INT64_C( 3371747741827789856), INT64_C( 4883552887250537067)), simde_mm_set_epi16(INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C(-11799), INT16_C( 32767), INT16_C( 16705), INT16_C( 22144)) }, { simde_mm_set_epi16(INT16_C( -8975), INT16_C( 24160), INT16_C( 10567), INT16_C(-31950), INT16_C( -1066), INT16_C( 1567), INT16_C(-20739), INT16_C(-21386)), UINT8_C( 84), simde_mm512_set_epi64(INT64_C(-5802305007366524132), INT64_C( 6368102336095963395), INT64_C( 26715019440636716), INT64_C( 3396530625665467524), INT64_C( 4010180268457941346), INT64_C( 5473964637387956828), INT64_C( 6527063328512873018), INT64_C( 7600981363267795639)), simde_mm_set_epi16(INT16_C( -8975), INT16_C( 32767), INT16_C( 10567), INT16_C( 32767), INT16_C( -1066), INT16_C( 32767), INT16_C(-20739), INT16_C(-21386)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm512_mask_cvtsepi64_epi16(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_cvtsepi64_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512i a; simde__m128i r; } test_vec[8] = { { UINT8_C( 99), simde_mm512_set_epi64(INT64_C(-6773163771856001287), INT64_C( 8528895860955669022), INT64_C( 743395091751495893), INT64_C(-5753646205421077345), INT64_C( 6781795189594641427), INT64_C( 7308036127418939455), INT64_C( 616165601606027604), INT64_C(-2694201555471274183)), simde_mm_set_epi16(INT16_C( 0), INT16_C( 32767), INT16_C( 32767), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 32767), INT16_C(-32768)) }, { UINT8_C( 41), simde_mm512_set_epi64(INT64_C( -270570670623534210), INT64_C(-7835605414239696560), INT64_C(-1637430132815230795), INT64_C( 7232010829548987022), INT64_C( 564423766678453237), INT64_C(-7165203031737464994), INT64_C(-7140626015800786754), INT64_C(-2499691201975648498)), simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C(-32768), INT16_C( 0), INT16_C( 32767), INT16_C( 0), INT16_C( 0), INT16_C(-32768)) }, { UINT8_C(236), simde_mm512_set_epi64(INT64_C( 2182845185502488635), INT64_C(-2591044625591562731), INT64_C( 634126652413576106), INT64_C(-7622725766299379752), INT64_C( -216817067777774970), INT64_C(-4785193871492054345), INT64_C( 7467476729839254839), INT64_C( 3956665538785416156)), simde_mm_set_epi16(INT16_C( 32767), INT16_C(-32768), INT16_C( 32767), INT16_C( 0), INT16_C(-32768), INT16_C(-32768), INT16_C( 0), INT16_C( 0)) }, { UINT8_C( 45), simde_mm512_set_epi64(INT64_C(-8852586683955255554), INT64_C( 2711835077233844521), INT64_C( -226120820645741920), INT64_C(-5768129162479938358), INT64_C( 4995224830392729088), INT64_C(-9204129394426957414), INT64_C( 6102592944456478166), INT64_C(-5538879581396485404)), simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C(-32768), INT16_C( 0), INT16_C( 32767), INT16_C(-32768), INT16_C( 0), INT16_C(-32768)) }, { UINT8_C( 84), simde_mm512_set_epi64(INT64_C( 5644010056631452329), INT64_C( 687917764947732740), INT64_C( 9077991551678197137), INT64_C( 5379835588808993943), INT64_C( 5669915352590237555), INT64_C(-7084358722419653223), INT64_C(-5646349838748004740), INT64_C( 5115442951238204697)), simde_mm_set_epi16(INT16_C( 0), INT16_C( 32767), INT16_C( 0), INT16_C( 32767), INT16_C( 0), INT16_C(-32768), INT16_C( 0), INT16_C( 0)) }, { UINT8_C( 51), simde_mm512_set_epi64(INT64_C( 6212879174325387943), INT64_C(-9195198218682338424), INT64_C(-6314975051621021387), INT64_C( 316124606243108816), INT64_C(-1018533604686287836), INT64_C( 7455766202425772200), INT64_C( -151643922692032263), INT64_C(-4200247844809358269)), simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C(-32768), INT16_C( 32767), INT16_C( 0), INT16_C( 0), INT16_C(-32768), INT16_C(-32768)) }, { UINT8_C(174), simde_mm512_set_epi64(INT64_C(-4824473637125509439), INT64_C( 2222300162020472452), INT64_C( 4485933577556469028), INT64_C( 3606420641901871392), INT64_C( 7533512282749681966), INT64_C( 5703633189469315522), INT64_C( 587259603884606395), INT64_C( 5851174439164188465)), simde_mm_set_epi16(INT16_C(-32768), INT16_C( 0), INT16_C( 32767), INT16_C( 0), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 0)) }, { UINT8_C(133), simde_mm512_set_epi64(INT64_C(-2795345254402041164), INT64_C( -585762482098414214), INT64_C( 1037739329164009972), INT64_C(-5800167553526727334), INT64_C(-3321042873096251776), INT64_C(-5543414139022216695), INT64_C(-7351051862345725962), INT64_C( 7765938089573807601)), simde_mm_set_epi16(INT16_C(-32768), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C(-32768), INT16_C( 0), INT16_C( 32767)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm512_maskz_cvtsepi64_epi16(test_vec[i].k, test_vec[i].a); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_cvtsepi64_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m256i r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C( -1385298216494496), INT64_C( -996445051636), INT64_C( 3207004), INT64_C( -651667828674554), INT64_C( -3476602607657036), INT64_C( -4627787), INT64_C( -2474879574406724), INT64_C( 508842849663014820)), simde_mm256_set_epi32( INT32_MIN , INT32_MIN , INT32_C( 3207004), INT32_MIN , INT32_MIN , INT32_C( -4627787), INT32_MIN , INT32_MAX ) }, { simde_mm512_set_epi64(INT64_C( -2), INT64_C( -7621), INT64_C( 296832081577), INT64_C( -10040), INT64_C( 10421), INT64_C( 316486368), INT64_C( -279), INT64_C( 1776399948)), simde_mm256_set_epi32(INT32_C( -2), INT32_C( -7621), INT32_MAX , INT32_C( -10040), INT32_C( 10421), INT32_C( 316486368), INT32_C( -279), INT32_C( 1776399948)) }, { simde_mm512_set_epi64(INT64_C( 69786007435882969), INT64_C(-6327619736082157669), INT64_C( 24285899089645), INT64_C( -54009961), INT64_C( 216203910725), INT64_C( 34664), INT64_C( 760577), INT64_C( 52093180175)), simde_mm256_set_epi32( INT32_MAX , INT32_MIN , INT32_MAX , INT32_C( -54009961), INT32_MAX , INT32_C( 34664), INT32_C( 760577), INT32_MAX ) }, { simde_mm512_set_epi64(INT64_C( -212538005711), INT64_C( 1541), INT64_C( -1), INT64_C( 700703313895845440), INT64_C( -2820), INT64_C( -13888321195009228), INT64_C( -125614148245680479), INT64_C( -251)), simde_mm256_set_epi32( INT32_MIN , INT32_C( 1541), INT32_C( -1), INT32_MAX , INT32_C( -2820), INT32_MIN , INT32_MIN , INT32_C( -251)) }, { simde_mm512_set_epi64(INT64_C( 8722215763), INT64_C( 69834045587926754), INT64_C( 14995086304428), INT64_C( 32908), INT64_C( 36), INT64_C( 3), INT64_C( 480756836), INT64_C( 20)), simde_mm256_set_epi32( INT32_MAX , INT32_MAX , INT32_MAX , INT32_C( 32908), INT32_C( 36), INT32_C( 3), INT32_C( 480756836), INT32_C( 20)) }, { simde_mm512_set_epi64(INT64_C( 393479), INT64_C( -33215582), INT64_C( 220007333), INT64_C( 93), INT64_C( 53663901), INT64_C( 456460283212472), INT64_C(-4461760402649505374), INT64_C( 1569855624019)), simde_mm256_set_epi32(INT32_C( 393479), INT32_C( -33215582), INT32_C( 220007333), INT32_C( 93), INT32_C( 53663901), INT32_MAX , INT32_MIN , INT32_MAX ) }, { simde_mm512_set_epi64(INT64_C( -360269), INT64_C( -838041), INT64_C( 68701792440150), INT64_C( -8279642051532), INT64_C( 12194321408), INT64_C( -16610408706990781), INT64_C( 31156899843), INT64_C( -1087719579897)), simde_mm256_set_epi32(INT32_C( -360269), INT32_C( -838041), INT32_MAX , INT32_MIN , INT32_MAX , INT32_MIN , INT32_MAX , INT32_MIN ) }, { simde_mm512_set_epi64(INT64_C( -583546092), INT64_C( -1051671055), INT64_C( -4042501098), INT64_C( -81260673017), INT64_C( 16230), INT64_C( -3), INT64_C( 15889333151091), INT64_C( -747559120874)), simde_mm256_set_epi32(INT32_C( -583546092), INT32_C(-1051671055), INT32_MIN , INT32_MIN , INT32_C( 16230), INT32_C( -3), INT32_MAX , INT32_MIN ) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm512_cvtsepi64_epi32(test_vec[i].a); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_cvtsepi64_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i src; simde__mmask8 k; simde__m512i a; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C( -783872818), INT32_C( 224826276), INT32_C( 833953142), INT32_C( 704393899), INT32_C(-1282792525), INT32_C(-1682931810), INT32_C( 1715663188), INT32_C(-1736532826)), UINT8_C(184), simde_mm512_set_epi64(INT64_C( -304424330925858839), INT64_C( 6920061078002113788), INT64_C( 5392365268250659625), INT64_C( 7451707948021880068), INT64_C(-8094918606566868518), INT64_C( 4039990350021559518), INT64_C(-5079608809355355713), INT64_C( 4544449552448075830)), simde_mm256_set_epi32(INT32_MIN, INT32_C( 224826276), INT32_C( 2147483647), INT32_C( 2147483647), INT32_MIN, INT32_C(-1682931810), INT32_C( 1715663188), INT32_C(-1736532826)) }, { simde_mm256_set_epi32(INT32_C( 228147153), INT32_C( 1769984359), INT32_C( -876802121), INT32_C( -647616802), INT32_C(-1031550247), INT32_C( -304505632), INT32_C( 452449791), INT32_C( -324230599)), UINT8_C( 65), simde_mm512_set_epi64(INT64_C( 7961945492670935), INT64_C(-8501526825185861704), INT64_C( 8872615053841923423), INT64_C( -146553742277366704), INT64_C( 634942473771684266), INT64_C(-8149949883580988621), INT64_C(-5593558253107684500), INT64_C( -775182801274770337)), simde_mm256_set_epi32(INT32_C( 228147153), INT32_MIN, INT32_C( -876802121), INT32_C( -647616802), INT32_C(-1031550247), INT32_C( -304505632), INT32_C( 452449791), INT32_MIN) }, { simde_mm256_set_epi32(INT32_C( 1131322125), INT32_C(-1676568554), INT32_C( -296320699), INT32_C( 1382446137), INT32_C( 1414899713), INT32_C( 334199932), INT32_C( -144765655), INT32_C( 1792521151)), UINT8_C( 87), simde_mm512_set_epi64(INT64_C( 5926848964672818106), INT64_C( 5125881974217215893), INT64_C(-4592732829056100767), INT64_C(-5617139660166741281), INT64_C( 8408189234922807969), INT64_C( 46860512120483389), INT64_C( 4524750203780244200), INT64_C(-5543529261566757021)), simde_mm256_set_epi32(INT32_C( 1131322125), INT32_C( 2147483647), INT32_C( -296320699), INT32_MIN, INT32_C( 1414899713), INT32_C( 2147483647), INT32_C( 2147483647), INT32_MIN) }, { simde_mm256_set_epi32(INT32_C( 492196155), INT32_C( 311401987), INT32_C(-1826918634), INT32_C( 466373339), INT32_C( 793993811), INT32_C( -275847262), INT32_C( 1108600799), INT32_C( -126394960)), UINT8_C(202), simde_mm512_set_epi64(INT64_C( 5858518275788586214), INT64_C( 6325882691723539358), INT64_C( 1467602667059784657), INT64_C(-5469777716636073985), INT64_C(-4718566456661627980), INT64_C(-8823362601370356535), INT64_C( 3910769586217516674), INT64_C( 5379736184341507060)), simde_mm256_set_epi32(INT32_C( 2147483647), INT32_C( 2147483647), INT32_C(-1826918634), INT32_C( 466373339), INT32_MIN, INT32_C( -275847262), INT32_C( 2147483647), INT32_C( -126394960)) }, { simde_mm256_set_epi32(INT32_C( 1821029326), INT32_C( 1839096524), INT32_C( 594162196), INT32_C(-2140380523), INT32_C( 445328546), INT32_C( 1418264985), INT32_C( 986213431), INT32_C(-1974658128)), UINT8_C(130), simde_mm512_set_epi64(INT64_C( 4654693694425511327), INT64_C( 4987555405601157538), INT64_C(-3482406609442521151), INT64_C( -74847652664381928), INT64_C( 8566618987618331801), INT64_C( 7168824261247067964), INT64_C( 3636052218697288232), INT64_C( 1769491692739488645)), simde_mm256_set_epi32(INT32_C( 2147483647), INT32_C( 1839096524), INT32_C( 594162196), INT32_C(-2140380523), INT32_C( 445328546), INT32_C( 1418264985), INT32_C( 2147483647), INT32_C(-1974658128)) }, { simde_mm256_set_epi32(INT32_C( -386374366), INT32_C( 1264203001), INT32_C( 448591089), INT32_C( -477219931), INT32_C( 412806942), INT32_C( 1851486104), INT32_C(-1806146323), INT32_C( -50243203)), UINT8_C( 4), simde_mm512_set_epi64(INT64_C( 2504249540805731307), INT64_C(-4838934383064989013), INT64_C( 8244741120151641494), INT64_C( 4465455276942587795), INT64_C(-6984757728940989206), INT64_C(-6611841259510994786), INT64_C(-5737746940667504182), INT64_C( 5990707151189672678)), simde_mm256_set_epi32(INT32_C( -386374366), INT32_C( 1264203001), INT32_C( 448591089), INT32_C( -477219931), INT32_C( 412806942), INT32_MIN, INT32_C(-1806146323), INT32_C( -50243203)) }, { simde_mm256_set_epi32(INT32_C( 694739216), INT32_C( -856286446), INT32_C( 1100751549), INT32_C(-1569791268), INT32_C(-1217857754), INT32_C(-1399806754), INT32_C( 531097456), INT32_C(-1371358872)), UINT8_C( 39), simde_mm512_set_epi64(INT64_C( 5846906676369557746), INT64_C(-2200894904770223764), INT64_C( 3050515010749934860), INT64_C( 1771796209347881841), INT64_C(-8449543381956761958), INT64_C(-8910958770806120087), INT64_C( 4697259477864506225), INT64_C( 1343234160387478022)), simde_mm256_set_epi32(INT32_C( 694739216), INT32_C( -856286446), INT32_C( 2147483647), INT32_C(-1569791268), INT32_C(-1217857754), INT32_MIN, INT32_C( 2147483647), INT32_C( 2147483647)) }, { simde_mm256_set_epi32(INT32_C(-1373417385), INT32_C( 748657731), INT32_C( 1483755877), INT32_C( 2104018565), INT32_C(-1239060882), INT32_C( 1131976241), INT32_C( 1213166902), INT32_C( 1657511546)), UINT8_C( 72), simde_mm512_set_epi64(INT64_C( 8062252864411540983), INT64_C(-8797914700722318653), INT64_C(-7262681553986948486), INT64_C( 5851217154060748384), INT64_C(-8009878144581101693), INT64_C(-5600336949614751156), INT64_C( 6240654256081358479), INT64_C(-7094305606247183129)), simde_mm256_set_epi32(INT32_C(-1373417385), INT32_MIN, INT32_C( 1483755877), INT32_C( 2104018565), INT32_MIN, INT32_C( 1131976241), INT32_C( 1213166902), INT32_C( 1657511546)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm512_mask_cvtsepi64_epi32(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_cvtsepi64_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512i a; simde__m256i r; } test_vec[8] = { { UINT8_C(166), simde_mm512_set_epi64(INT64_C(-8094918606566868518), INT64_C( 4039990350021559518), INT64_C(-5079608809355355713), INT64_C( 4544449552448075830), INT64_C( 214030627736325326), INT64_C( 965621503535422838), INT64_C( 3025348762719101875), INT64_C(-7228137083632422572)), simde_mm256_set_epi32(INT32_MIN, INT32_C( 0), INT32_MIN, INT32_C( 0), INT32_C( 0), INT32_C( 2147483647), INT32_C( 2147483647), INT32_C( 0)) }, { UINT8_C( 4), simde_mm512_set_epi64(INT64_C(-2040127053262929967), INT64_C( 7602024939754688439), INT64_C(-2781492981666690343), INT64_C(-1307841730435361281), INT64_C(-1392559814843402321), INT64_C( 1598109267473792738), INT64_C(-1562013513342941276), INT64_C( 4124183631221082341)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 2147483647), INT32_C( 0), INT32_C( 0)) }, { UINT8_C( 95), simde_mm512_set_epi64(INT64_C( 7698819720935131480), INT64_C(-1959125385312173467), INT64_C( 4636349956089830497), INT64_C(-2231536501614225883), INT64_C(-2662282344960899888), INT64_C(-2740420949015425086), INT64_C(-2599910470561320857), INT64_C(-7129238424086773927)), simde_mm256_set_epi32(INT32_C( 0), INT32_MIN, INT32_C( 0), INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN) }, { UINT8_C( 41), simde_mm512_set_epi64(INT64_C( 7926549609163252623), INT64_C(-8658624789933032605), INT64_C( 3670203061911626838), INT64_C( 5886946783247035777), INT64_C(-1561491900908529833), INT64_C( 4858991530734622742), INT64_C(-1272687709950413767), INT64_C( 6076947994788985980)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 2147483647), INT32_C( 0), INT32_MIN, INT32_C( 0), INT32_C( 0), INT32_C( 2147483647)) }, { UINT8_C( 61), simde_mm512_set_epi64(INT64_C( 9006543894860367562), INT64_C( 2113966389253348867), INT64_C(-7846555785016620325), INT64_C( 3410177455490525090), INT64_C( 4761404180193041840), INT64_C( 5926848964672818106), INT64_C( 5125881974217215893), INT64_C(-4592732829056100767)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 0), INT32_MIN, INT32_C( 2147483647), INT32_C( 2147483647), INT32_C( 2147483647), INT32_C( 0), INT32_MIN) }, { UINT8_C(142), simde_mm512_set_epi64(INT64_C( 4235754435341261744), INT64_C( 5858518275788586214), INT64_C( 6325882691723539358), INT64_C( 1467602667059784657), INT64_C(-5469777716636073985), INT64_C(-4718566456661627980), INT64_C(-8823362601370356535), INT64_C( 3910769586217516674)), simde_mm256_set_epi32(INT32_C( 2147483647), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_MIN, INT32_MIN, INT32_MIN, INT32_C( 0)) }, { UINT8_C(153), simde_mm512_set_epi64(INT64_C( -74847652664381928), INT64_C( 8566618987618331801), INT64_C( 7168824261247067964), INT64_C( 3636052218697288232), INT64_C( 1769491692739488645), INT64_C(-4519209933294881842), INT64_C( 7898859425361441300), INT64_C(-9192864346835047262)), simde_mm256_set_epi32(INT32_MIN, INT32_C( 0), INT32_C( 0), INT32_C( 2147483647), INT32_C( 2147483647), INT32_C( 0), INT32_C( 0), INT32_MIN) }, { UINT8_C(193), simde_mm512_set_epi64(INT64_C( 5990707151189672678), INT64_C( 2863208564165993762), INT64_C( 5429710545248646385), INT64_C(-2049643996231569634), INT64_C( 7952072268167275757), INT64_C( -215792912647533760), INT64_C(-2126906703336744137), INT64_C( 6372507310579515811)), simde_mm256_set_epi32(INT32_C( 2147483647), INT32_C( 2147483647), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 2147483647)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm512_maskz_cvtsepi64_epi32(test_vec[i].k, test_vec[i].a); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsepi16_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsepi32_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsepi32_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsepi64_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cvtsepi16_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cvtsepi32_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cvtsepi32_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cvtsepi64_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cvtsepi16_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_cvtsepi16_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_cvtsepi16_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cvtsepi32_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_cvtsepi32_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_cvtsepi32_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cvtsepi32_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_cvtsepi32_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_cvtsepi32_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cvtsepi64_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_cvtsepi64_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_cvtsepi64_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cvtsepi64_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_cvtsepi64_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_cvtsepi64_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cvtsepi64_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_cvtsepi64_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_cvtsepi64_epi32) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/declare-suites.h000066400000000000000000000045351400333146700203450ustar00rootroot00000000000000SIMDE_TEST_DECLARE_SUITE(2intersect) SIMDE_TEST_DECLARE_SUITE(abs) SIMDE_TEST_DECLARE_SUITE(add) SIMDE_TEST_DECLARE_SUITE(adds) SIMDE_TEST_DECLARE_SUITE(and) SIMDE_TEST_DECLARE_SUITE(andnot) SIMDE_TEST_DECLARE_SUITE(avg) SIMDE_TEST_DECLARE_SUITE(blend) SIMDE_TEST_DECLARE_SUITE(broadcast) SIMDE_TEST_DECLARE_SUITE(cast) SIMDE_TEST_DECLARE_SUITE(cmpeq) SIMDE_TEST_DECLARE_SUITE(cmpge) SIMDE_TEST_DECLARE_SUITE(cmpgt) SIMDE_TEST_DECLARE_SUITE(cmp) SIMDE_TEST_DECLARE_SUITE(cmple) SIMDE_TEST_DECLARE_SUITE(cmplt) SIMDE_TEST_DECLARE_SUITE(copysign) SIMDE_TEST_DECLARE_SUITE(cvt) SIMDE_TEST_DECLARE_SUITE(cvts) SIMDE_TEST_DECLARE_SUITE(div) SIMDE_TEST_DECLARE_SUITE(extract) SIMDE_TEST_DECLARE_SUITE(fmadd) SIMDE_TEST_DECLARE_SUITE(fmsub) SIMDE_TEST_DECLARE_SUITE(fnmadd) SIMDE_TEST_DECLARE_SUITE(fnmsub) SIMDE_TEST_DECLARE_SUITE(insert) SIMDE_TEST_DECLARE_SUITE(kshift) SIMDE_TEST_DECLARE_SUITE(load) SIMDE_TEST_DECLARE_SUITE(loadu) SIMDE_TEST_DECLARE_SUITE(lzcnt) SIMDE_TEST_DECLARE_SUITE(madd) SIMDE_TEST_DECLARE_SUITE(maddubs) SIMDE_TEST_DECLARE_SUITE(max) SIMDE_TEST_DECLARE_SUITE(min) SIMDE_TEST_DECLARE_SUITE(mov) SIMDE_TEST_DECLARE_SUITE(mov_mask) SIMDE_TEST_DECLARE_SUITE(movm) SIMDE_TEST_DECLARE_SUITE(mul) SIMDE_TEST_DECLARE_SUITE(mulhi) SIMDE_TEST_DECLARE_SUITE(mulhrs) SIMDE_TEST_DECLARE_SUITE(mullo) SIMDE_TEST_DECLARE_SUITE(negate) SIMDE_TEST_DECLARE_SUITE(or) SIMDE_TEST_DECLARE_SUITE(packs) SIMDE_TEST_DECLARE_SUITE(packus) SIMDE_TEST_DECLARE_SUITE(permutex2var) SIMDE_TEST_DECLARE_SUITE(permutexvar) SIMDE_TEST_DECLARE_SUITE(sad) SIMDE_TEST_DECLARE_SUITE(set1) SIMDE_TEST_DECLARE_SUITE(set4) SIMDE_TEST_DECLARE_SUITE(set) SIMDE_TEST_DECLARE_SUITE(setone) SIMDE_TEST_DECLARE_SUITE(setr4) SIMDE_TEST_DECLARE_SUITE(setr) SIMDE_TEST_DECLARE_SUITE(setzero) SIMDE_TEST_DECLARE_SUITE(shuffle) SIMDE_TEST_DECLARE_SUITE(sll) SIMDE_TEST_DECLARE_SUITE(slli) SIMDE_TEST_DECLARE_SUITE(sllv) SIMDE_TEST_DECLARE_SUITE(sqrt) SIMDE_TEST_DECLARE_SUITE(sra) SIMDE_TEST_DECLARE_SUITE(srai) SIMDE_TEST_DECLARE_SUITE(srav) SIMDE_TEST_DECLARE_SUITE(srl) SIMDE_TEST_DECLARE_SUITE(srli) SIMDE_TEST_DECLARE_SUITE(srlv) SIMDE_TEST_DECLARE_SUITE(store) SIMDE_TEST_DECLARE_SUITE(storeu) SIMDE_TEST_DECLARE_SUITE(sub) SIMDE_TEST_DECLARE_SUITE(subs) SIMDE_TEST_DECLARE_SUITE(test) SIMDE_TEST_DECLARE_SUITE(unpackhi) SIMDE_TEST_DECLARE_SUITE(unpacklo) SIMDE_TEST_DECLARE_SUITE(xor) SIMDE_TEST_DECLARE_SUITE(xorsign) simde-0.7.2/test/x86/avx512/div.c000066400000000000000000002212541400333146700162100ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN div #include #include #include static int test_simde_mm512_div_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__m512 b; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( 653.62), SIMDE_FLOAT32_C( 981.74), SIMDE_FLOAT32_C( 780.10), SIMDE_FLOAT32_C( 59.38), SIMDE_FLOAT32_C( -795.11), SIMDE_FLOAT32_C( 923.87), SIMDE_FLOAT32_C( -270.01), SIMDE_FLOAT32_C( -411.99), SIMDE_FLOAT32_C( -97.83), SIMDE_FLOAT32_C( -393.82), SIMDE_FLOAT32_C( 934.81), SIMDE_FLOAT32_C( 74.53), SIMDE_FLOAT32_C( 843.79), SIMDE_FLOAT32_C( 465.05), SIMDE_FLOAT32_C( -42.07), SIMDE_FLOAT32_C( -685.83)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 596.54), SIMDE_FLOAT32_C( -116.40), SIMDE_FLOAT32_C( -989.77), SIMDE_FLOAT32_C( -794.40), SIMDE_FLOAT32_C( 183.38), SIMDE_FLOAT32_C( -185.75), SIMDE_FLOAT32_C( 429.70), SIMDE_FLOAT32_C( 664.04), SIMDE_FLOAT32_C( 296.78), SIMDE_FLOAT32_C( -698.78), SIMDE_FLOAT32_C( 908.33), SIMDE_FLOAT32_C( 181.85), SIMDE_FLOAT32_C( -397.89), SIMDE_FLOAT32_C( -586.75), SIMDE_FLOAT32_C( 904.99), SIMDE_FLOAT32_C( -321.15)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.10), SIMDE_FLOAT32_C( -8.43), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( -4.34), SIMDE_FLOAT32_C( -4.97), SIMDE_FLOAT32_C( -0.63), SIMDE_FLOAT32_C( -0.62), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 1.03), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( -2.12), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( -0.05), SIMDE_FLOAT32_C( 2.14)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 729.63), SIMDE_FLOAT32_C( -908.06), SIMDE_FLOAT32_C( -769.77), SIMDE_FLOAT32_C( -70.66), SIMDE_FLOAT32_C( 482.71), SIMDE_FLOAT32_C( 244.66), SIMDE_FLOAT32_C( -615.83), SIMDE_FLOAT32_C( 841.42), SIMDE_FLOAT32_C( -571.10), SIMDE_FLOAT32_C( 971.96), SIMDE_FLOAT32_C( 149.38), SIMDE_FLOAT32_C( 497.71), SIMDE_FLOAT32_C( 988.69), SIMDE_FLOAT32_C( 479.68), SIMDE_FLOAT32_C( -128.24), SIMDE_FLOAT32_C( 585.28)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 359.65), SIMDE_FLOAT32_C( -730.08), SIMDE_FLOAT32_C( 977.98), SIMDE_FLOAT32_C( -215.53), SIMDE_FLOAT32_C( -315.50), SIMDE_FLOAT32_C( 80.64), SIMDE_FLOAT32_C( -996.10), SIMDE_FLOAT32_C( -556.83), SIMDE_FLOAT32_C( -628.68), SIMDE_FLOAT32_C( 938.60), SIMDE_FLOAT32_C( -147.98), SIMDE_FLOAT32_C( 378.31), SIMDE_FLOAT32_C( 246.47), SIMDE_FLOAT32_C( 109.18), SIMDE_FLOAT32_C( -575.64), SIMDE_FLOAT32_C( -426.86)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 2.03), SIMDE_FLOAT32_C( 1.24), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( -1.53), SIMDE_FLOAT32_C( 3.03), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( -1.51), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( -1.01), SIMDE_FLOAT32_C( 1.32), SIMDE_FLOAT32_C( 4.01), SIMDE_FLOAT32_C( 4.39), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( -1.37)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -148.70), SIMDE_FLOAT32_C( -327.17), SIMDE_FLOAT32_C( -310.14), SIMDE_FLOAT32_C( -718.80), SIMDE_FLOAT32_C( 382.69), SIMDE_FLOAT32_C( -181.61), SIMDE_FLOAT32_C( -214.09), SIMDE_FLOAT32_C( 55.72), SIMDE_FLOAT32_C( 438.03), SIMDE_FLOAT32_C( -458.01), SIMDE_FLOAT32_C( 144.59), SIMDE_FLOAT32_C( 165.00), SIMDE_FLOAT32_C( -331.04), SIMDE_FLOAT32_C( 406.96), SIMDE_FLOAT32_C( -326.43), SIMDE_FLOAT32_C( 373.82)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 791.83), SIMDE_FLOAT32_C( 191.69), SIMDE_FLOAT32_C( -460.58), SIMDE_FLOAT32_C( -915.08), SIMDE_FLOAT32_C( -877.38), SIMDE_FLOAT32_C( -915.27), SIMDE_FLOAT32_C( 207.85), SIMDE_FLOAT32_C( 567.35), SIMDE_FLOAT32_C( 304.30), SIMDE_FLOAT32_C( -777.07), SIMDE_FLOAT32_C( -683.73), SIMDE_FLOAT32_C( -113.32), SIMDE_FLOAT32_C( -701.16), SIMDE_FLOAT32_C( -942.92), SIMDE_FLOAT32_C( -489.97), SIMDE_FLOAT32_C( 911.34)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( -1.71), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( -1.03), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 1.44), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -1.46), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.41)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -869.58), SIMDE_FLOAT32_C( 763.75), SIMDE_FLOAT32_C( -558.93), SIMDE_FLOAT32_C( 756.19), SIMDE_FLOAT32_C( 509.82), SIMDE_FLOAT32_C( -855.71), SIMDE_FLOAT32_C( -965.40), SIMDE_FLOAT32_C( -279.29), SIMDE_FLOAT32_C( -798.08), SIMDE_FLOAT32_C( 256.40), SIMDE_FLOAT32_C( 739.89), SIMDE_FLOAT32_C( -903.46), SIMDE_FLOAT32_C( -771.75), SIMDE_FLOAT32_C( -54.77), SIMDE_FLOAT32_C( 397.04), SIMDE_FLOAT32_C( 925.94)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -355.51), SIMDE_FLOAT32_C( 136.73), SIMDE_FLOAT32_C( 586.70), SIMDE_FLOAT32_C( 712.56), SIMDE_FLOAT32_C( 135.88), SIMDE_FLOAT32_C( -693.91), SIMDE_FLOAT32_C( -131.33), SIMDE_FLOAT32_C( -933.79), SIMDE_FLOAT32_C( 864.29), SIMDE_FLOAT32_C( -834.00), SIMDE_FLOAT32_C( 475.52), SIMDE_FLOAT32_C( 502.31), SIMDE_FLOAT32_C( -746.87), SIMDE_FLOAT32_C( -364.10), SIMDE_FLOAT32_C( -995.18), SIMDE_FLOAT32_C( 683.54)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 2.45), SIMDE_FLOAT32_C( 5.59), SIMDE_FLOAT32_C( -0.95), SIMDE_FLOAT32_C( 1.06), SIMDE_FLOAT32_C( 3.75), SIMDE_FLOAT32_C( 1.23), SIMDE_FLOAT32_C( 7.35), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( 1.56), SIMDE_FLOAT32_C( -1.80), SIMDE_FLOAT32_C( 1.03), SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( 1.35)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 119.21), SIMDE_FLOAT32_C( 360.54), SIMDE_FLOAT32_C( 885.26), SIMDE_FLOAT32_C( -618.98), SIMDE_FLOAT32_C( -8.97), SIMDE_FLOAT32_C( -881.58), SIMDE_FLOAT32_C( -89.25), SIMDE_FLOAT32_C( -937.64), SIMDE_FLOAT32_C( -660.18), SIMDE_FLOAT32_C( -649.17), SIMDE_FLOAT32_C( -279.52), SIMDE_FLOAT32_C( 812.95), SIMDE_FLOAT32_C( -471.80), SIMDE_FLOAT32_C( 805.98), SIMDE_FLOAT32_C( 532.44), SIMDE_FLOAT32_C( 126.30)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 944.81), SIMDE_FLOAT32_C( 946.29), SIMDE_FLOAT32_C( 161.37), SIMDE_FLOAT32_C( -637.11), SIMDE_FLOAT32_C( 16.54), SIMDE_FLOAT32_C( 417.79), SIMDE_FLOAT32_C( 257.34), SIMDE_FLOAT32_C( -857.05), SIMDE_FLOAT32_C( 770.17), SIMDE_FLOAT32_C( -559.67), SIMDE_FLOAT32_C( -862.75), SIMDE_FLOAT32_C( -541.96), SIMDE_FLOAT32_C( 412.30), SIMDE_FLOAT32_C( -147.64), SIMDE_FLOAT32_C( 553.94), SIMDE_FLOAT32_C( -736.63)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 5.49), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( -2.11), SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( 1.09), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( 1.16), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( -1.50), SIMDE_FLOAT32_C( -1.14), SIMDE_FLOAT32_C( -5.46), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( -0.17)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -81.24), SIMDE_FLOAT32_C( -934.88), SIMDE_FLOAT32_C( -84.21), SIMDE_FLOAT32_C( -265.16), SIMDE_FLOAT32_C( -978.34), SIMDE_FLOAT32_C( -425.47), SIMDE_FLOAT32_C( 792.31), SIMDE_FLOAT32_C( -306.03), SIMDE_FLOAT32_C( 911.07), SIMDE_FLOAT32_C( 992.01), SIMDE_FLOAT32_C( 172.45), SIMDE_FLOAT32_C( -135.31), SIMDE_FLOAT32_C( 652.11), SIMDE_FLOAT32_C( -529.15), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( 883.05)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -110.89), SIMDE_FLOAT32_C( -325.07), SIMDE_FLOAT32_C( 834.96), SIMDE_FLOAT32_C( -681.06), SIMDE_FLOAT32_C( -877.63), SIMDE_FLOAT32_C( -653.45), SIMDE_FLOAT32_C( 40.48), SIMDE_FLOAT32_C( -644.02), SIMDE_FLOAT32_C( -687.76), SIMDE_FLOAT32_C( -660.68), SIMDE_FLOAT32_C( 802.46), SIMDE_FLOAT32_C( -477.95), SIMDE_FLOAT32_C( -125.80), SIMDE_FLOAT32_C( -475.50), SIMDE_FLOAT32_C( -806.50), SIMDE_FLOAT32_C( -778.62)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 2.88), SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 1.11), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 19.57), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( -1.32), SIMDE_FLOAT32_C( -1.50), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( -5.18), SIMDE_FLOAT32_C( 1.11), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -1.13)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -387.95), SIMDE_FLOAT32_C( 255.55), SIMDE_FLOAT32_C( 948.01), SIMDE_FLOAT32_C( 520.84), SIMDE_FLOAT32_C( 310.00), SIMDE_FLOAT32_C( -412.39), SIMDE_FLOAT32_C( 412.17), SIMDE_FLOAT32_C( -913.22), SIMDE_FLOAT32_C( 810.06), SIMDE_FLOAT32_C( -696.65), SIMDE_FLOAT32_C( 807.84), SIMDE_FLOAT32_C( 63.85), SIMDE_FLOAT32_C( -2.75), SIMDE_FLOAT32_C( -763.61), SIMDE_FLOAT32_C( -850.85), SIMDE_FLOAT32_C( 913.88)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -915.78), SIMDE_FLOAT32_C( 471.39), SIMDE_FLOAT32_C( -324.79), SIMDE_FLOAT32_C( -855.69), SIMDE_FLOAT32_C( 966.81), SIMDE_FLOAT32_C( 668.44), SIMDE_FLOAT32_C( 925.33), SIMDE_FLOAT32_C( 564.88), SIMDE_FLOAT32_C( -130.24), SIMDE_FLOAT32_C( 360.71), SIMDE_FLOAT32_C( 966.21), SIMDE_FLOAT32_C( -919.67), SIMDE_FLOAT32_C( 198.47), SIMDE_FLOAT32_C( -796.49), SIMDE_FLOAT32_C( 428.08), SIMDE_FLOAT32_C( 264.02)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( -2.92), SIMDE_FLOAT32_C( -0.61), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( -0.62), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( -1.62), SIMDE_FLOAT32_C( -6.22), SIMDE_FLOAT32_C( -1.93), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( -1.99), SIMDE_FLOAT32_C( 3.46)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 534.55), SIMDE_FLOAT32_C( -263.46), SIMDE_FLOAT32_C( -958.21), SIMDE_FLOAT32_C( 927.39), SIMDE_FLOAT32_C( 830.49), SIMDE_FLOAT32_C( -394.19), SIMDE_FLOAT32_C( -755.65), SIMDE_FLOAT32_C( -594.24), SIMDE_FLOAT32_C( -371.00), SIMDE_FLOAT32_C( 623.04), SIMDE_FLOAT32_C( 879.76), SIMDE_FLOAT32_C( 838.28), SIMDE_FLOAT32_C( -100.77), SIMDE_FLOAT32_C( -708.14), SIMDE_FLOAT32_C( -206.06), SIMDE_FLOAT32_C( -203.03)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 65.94), SIMDE_FLOAT32_C( 158.39), SIMDE_FLOAT32_C( 532.17), SIMDE_FLOAT32_C( -1.61), SIMDE_FLOAT32_C( -802.21), SIMDE_FLOAT32_C( -782.13), SIMDE_FLOAT32_C( 831.96), SIMDE_FLOAT32_C( -692.14), SIMDE_FLOAT32_C( 581.38), SIMDE_FLOAT32_C( 943.65), SIMDE_FLOAT32_C( 585.87), SIMDE_FLOAT32_C( 329.94), SIMDE_FLOAT32_C( -747.39), SIMDE_FLOAT32_C( 976.32), SIMDE_FLOAT32_C( 362.23), SIMDE_FLOAT32_C( -137.03)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 8.11), SIMDE_FLOAT32_C( -1.66), SIMDE_FLOAT32_C( -1.80), SIMDE_FLOAT32_C( -576.02), SIMDE_FLOAT32_C( -1.04), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( -0.91), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 1.50), SIMDE_FLOAT32_C( 2.54), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( -0.73), SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( 1.48)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_div_ps(test_vec[i].a, test_vec[i].b); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_div_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 src; simde__mmask16 k; simde__m512 a; simde__m512 b; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -745.89), SIMDE_FLOAT32_C( 663.97), SIMDE_FLOAT32_C( 886.69), SIMDE_FLOAT32_C( -271.39), SIMDE_FLOAT32_C( 845.36), SIMDE_FLOAT32_C( -391.34), SIMDE_FLOAT32_C( -606.86), SIMDE_FLOAT32_C( 818.59), SIMDE_FLOAT32_C( 953.36), SIMDE_FLOAT32_C( 863.40), SIMDE_FLOAT32_C( 241.85), SIMDE_FLOAT32_C( -815.86), SIMDE_FLOAT32_C( 460.12), SIMDE_FLOAT32_C( -674.64), SIMDE_FLOAT32_C( 868.62), SIMDE_FLOAT32_C( -710.40)), UINT16_C( 9207), simde_mm512_set_ps(SIMDE_FLOAT32_C( -956.83), SIMDE_FLOAT32_C( -855.01), SIMDE_FLOAT32_C( -219.41), SIMDE_FLOAT32_C( 94.89), SIMDE_FLOAT32_C( -270.85), SIMDE_FLOAT32_C( 356.85), SIMDE_FLOAT32_C( 872.24), SIMDE_FLOAT32_C( 100.53), SIMDE_FLOAT32_C( 234.39), SIMDE_FLOAT32_C( -639.13), SIMDE_FLOAT32_C( 981.49), SIMDE_FLOAT32_C( 706.62), SIMDE_FLOAT32_C( -983.90), SIMDE_FLOAT32_C( 124.15), SIMDE_FLOAT32_C( -820.87), SIMDE_FLOAT32_C( -946.81)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -17.46), SIMDE_FLOAT32_C( 371.67), SIMDE_FLOAT32_C( 390.77), SIMDE_FLOAT32_C( -132.40), SIMDE_FLOAT32_C( 276.69), SIMDE_FLOAT32_C( -338.80), SIMDE_FLOAT32_C( 359.09), SIMDE_FLOAT32_C( -631.66), SIMDE_FLOAT32_C( -455.96), SIMDE_FLOAT32_C( 16.63), SIMDE_FLOAT32_C( 194.96), SIMDE_FLOAT32_C( -407.18), SIMDE_FLOAT32_C( -447.59), SIMDE_FLOAT32_C( -276.48), SIMDE_FLOAT32_C( 631.98), SIMDE_FLOAT32_C( 430.67)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -745.89), SIMDE_FLOAT32_C( 663.97), SIMDE_FLOAT32_C( -0.56), SIMDE_FLOAT32_C( -271.39), SIMDE_FLOAT32_C( 845.36), SIMDE_FLOAT32_C( -391.34), SIMDE_FLOAT32_C( 2.43), SIMDE_FLOAT32_C( -0.16), SIMDE_FLOAT32_C( -0.51), SIMDE_FLOAT32_C( -38.43), SIMDE_FLOAT32_C( 5.03), SIMDE_FLOAT32_C( -1.74), SIMDE_FLOAT32_C( 460.12), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( -1.30), SIMDE_FLOAT32_C( -2.20)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 769.85), SIMDE_FLOAT32_C( -75.51), SIMDE_FLOAT32_C( 417.80), SIMDE_FLOAT32_C( 327.87), SIMDE_FLOAT32_C( 287.24), SIMDE_FLOAT32_C( -627.46), SIMDE_FLOAT32_C( 540.48), SIMDE_FLOAT32_C( -625.88), SIMDE_FLOAT32_C( -108.88), SIMDE_FLOAT32_C( 663.67), SIMDE_FLOAT32_C( -412.74), SIMDE_FLOAT32_C( -226.36), SIMDE_FLOAT32_C( 757.77), SIMDE_FLOAT32_C( -897.40), SIMDE_FLOAT32_C( 27.15), SIMDE_FLOAT32_C( -443.34)), UINT16_C(26651), simde_mm512_set_ps(SIMDE_FLOAT32_C( -896.67), SIMDE_FLOAT32_C( -181.49), SIMDE_FLOAT32_C( -338.89), SIMDE_FLOAT32_C( -19.28), SIMDE_FLOAT32_C( 886.35), SIMDE_FLOAT32_C( -662.07), SIMDE_FLOAT32_C( 925.60), SIMDE_FLOAT32_C( 651.41), SIMDE_FLOAT32_C( 597.16), SIMDE_FLOAT32_C( 67.32), SIMDE_FLOAT32_C( -911.68), SIMDE_FLOAT32_C( 202.35), SIMDE_FLOAT32_C( 208.06), SIMDE_FLOAT32_C( 747.61), SIMDE_FLOAT32_C( 81.71), SIMDE_FLOAT32_C( 40.88)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 429.04), SIMDE_FLOAT32_C( 13.69), SIMDE_FLOAT32_C( 491.03), SIMDE_FLOAT32_C( 366.42), SIMDE_FLOAT32_C( -264.60), SIMDE_FLOAT32_C( 201.75), SIMDE_FLOAT32_C( -598.58), SIMDE_FLOAT32_C( -939.94), SIMDE_FLOAT32_C( 118.06), SIMDE_FLOAT32_C( 355.92), SIMDE_FLOAT32_C( 978.59), SIMDE_FLOAT32_C( -224.11), SIMDE_FLOAT32_C( -71.37), SIMDE_FLOAT32_C( 333.99), SIMDE_FLOAT32_C( -515.40), SIMDE_FLOAT32_C( -38.06)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 769.85), SIMDE_FLOAT32_C( -13.26), SIMDE_FLOAT32_C( -0.69), SIMDE_FLOAT32_C( 327.87), SIMDE_FLOAT32_C( -3.35), SIMDE_FLOAT32_C( -627.46), SIMDE_FLOAT32_C( 540.48), SIMDE_FLOAT32_C( -625.88), SIMDE_FLOAT32_C( -108.88), SIMDE_FLOAT32_C( 663.67), SIMDE_FLOAT32_C( -412.74), SIMDE_FLOAT32_C( -0.90), SIMDE_FLOAT32_C( -2.92), SIMDE_FLOAT32_C( -897.40), SIMDE_FLOAT32_C( -0.16), SIMDE_FLOAT32_C( -1.07)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -301.18), SIMDE_FLOAT32_C( -952.56), SIMDE_FLOAT32_C( 361.18), SIMDE_FLOAT32_C( 53.08), SIMDE_FLOAT32_C( 179.94), SIMDE_FLOAT32_C( -914.68), SIMDE_FLOAT32_C( -695.32), SIMDE_FLOAT32_C( -492.39), SIMDE_FLOAT32_C( -86.02), SIMDE_FLOAT32_C( 123.88), SIMDE_FLOAT32_C( 274.86), SIMDE_FLOAT32_C( 554.74), SIMDE_FLOAT32_C( -845.80), SIMDE_FLOAT32_C( -156.28), SIMDE_FLOAT32_C( -737.31), SIMDE_FLOAT32_C( 590.88)), UINT16_C(31164), simde_mm512_set_ps(SIMDE_FLOAT32_C( 694.79), SIMDE_FLOAT32_C( 687.99), SIMDE_FLOAT32_C( -648.58), SIMDE_FLOAT32_C( -272.14), SIMDE_FLOAT32_C( -52.85), SIMDE_FLOAT32_C( -298.63), SIMDE_FLOAT32_C( 917.21), SIMDE_FLOAT32_C( -876.76), SIMDE_FLOAT32_C( 677.44), SIMDE_FLOAT32_C( -857.42), SIMDE_FLOAT32_C( -56.60), SIMDE_FLOAT32_C( 488.58), SIMDE_FLOAT32_C( 876.79), SIMDE_FLOAT32_C( -578.18), SIMDE_FLOAT32_C( -335.03), SIMDE_FLOAT32_C( 980.62)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 247.15), SIMDE_FLOAT32_C( 126.77), SIMDE_FLOAT32_C( 867.78), SIMDE_FLOAT32_C( -450.16), SIMDE_FLOAT32_C( 94.73), SIMDE_FLOAT32_C( -587.88), SIMDE_FLOAT32_C( 776.58), SIMDE_FLOAT32_C( -595.96), SIMDE_FLOAT32_C( 345.82), SIMDE_FLOAT32_C( -768.91), SIMDE_FLOAT32_C( -31.17), SIMDE_FLOAT32_C( -4.10), SIMDE_FLOAT32_C( -234.58), SIMDE_FLOAT32_C( 278.53), SIMDE_FLOAT32_C( -336.24), SIMDE_FLOAT32_C( -974.01)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -301.18), SIMDE_FLOAT32_C( 5.43), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( -0.56), SIMDE_FLOAT32_C( -914.68), SIMDE_FLOAT32_C( -695.32), SIMDE_FLOAT32_C( 1.47), SIMDE_FLOAT32_C( 1.96), SIMDE_FLOAT32_C( 123.88), SIMDE_FLOAT32_C( 1.82), SIMDE_FLOAT32_C( -119.17), SIMDE_FLOAT32_C( -3.74), SIMDE_FLOAT32_C( -2.08), SIMDE_FLOAT32_C( -737.31), SIMDE_FLOAT32_C( 590.88)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -412.81), SIMDE_FLOAT32_C( -265.44), SIMDE_FLOAT32_C( -550.71), SIMDE_FLOAT32_C( -725.27), SIMDE_FLOAT32_C( -302.34), SIMDE_FLOAT32_C( -375.85), SIMDE_FLOAT32_C( 423.25), SIMDE_FLOAT32_C( 778.83), SIMDE_FLOAT32_C( 480.23), SIMDE_FLOAT32_C( -401.59), SIMDE_FLOAT32_C( 489.09), SIMDE_FLOAT32_C( 775.60), SIMDE_FLOAT32_C( -569.06), SIMDE_FLOAT32_C( -632.55), SIMDE_FLOAT32_C( -156.10), SIMDE_FLOAT32_C( 658.93)), UINT16_C( 3671), simde_mm512_set_ps(SIMDE_FLOAT32_C( -474.43), SIMDE_FLOAT32_C( -465.91), SIMDE_FLOAT32_C( 545.15), SIMDE_FLOAT32_C( 262.71), SIMDE_FLOAT32_C( 599.41), SIMDE_FLOAT32_C( -408.02), SIMDE_FLOAT32_C( -664.44), SIMDE_FLOAT32_C( 446.87), SIMDE_FLOAT32_C( 816.32), SIMDE_FLOAT32_C( 622.16), SIMDE_FLOAT32_C( 40.97), SIMDE_FLOAT32_C( -230.30), SIMDE_FLOAT32_C( 122.84), SIMDE_FLOAT32_C( 457.98), SIMDE_FLOAT32_C( -118.87), SIMDE_FLOAT32_C( -211.46)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 271.75), SIMDE_FLOAT32_C( 598.59), SIMDE_FLOAT32_C( -130.09), SIMDE_FLOAT32_C( 474.47), SIMDE_FLOAT32_C( -94.60), SIMDE_FLOAT32_C( 846.28), SIMDE_FLOAT32_C( 108.99), SIMDE_FLOAT32_C( -793.22), SIMDE_FLOAT32_C( -12.05), SIMDE_FLOAT32_C( -325.70), SIMDE_FLOAT32_C( -510.95), SIMDE_FLOAT32_C( 213.60), SIMDE_FLOAT32_C( -818.29), SIMDE_FLOAT32_C( -431.12), SIMDE_FLOAT32_C( -186.49), SIMDE_FLOAT32_C( 53.27)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -412.81), SIMDE_FLOAT32_C( -265.44), SIMDE_FLOAT32_C( -550.71), SIMDE_FLOAT32_C( -725.27), SIMDE_FLOAT32_C( -6.34), SIMDE_FLOAT32_C( -0.48), SIMDE_FLOAT32_C( -6.10), SIMDE_FLOAT32_C( 778.83), SIMDE_FLOAT32_C( 480.23), SIMDE_FLOAT32_C( -1.91), SIMDE_FLOAT32_C( 489.09), SIMDE_FLOAT32_C( -1.08), SIMDE_FLOAT32_C( -569.06), SIMDE_FLOAT32_C( -1.06), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( -3.97)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -254.94), SIMDE_FLOAT32_C( 234.45), SIMDE_FLOAT32_C( 235.56), SIMDE_FLOAT32_C( 930.35), SIMDE_FLOAT32_C( 137.93), SIMDE_FLOAT32_C( 979.46), SIMDE_FLOAT32_C( 688.15), SIMDE_FLOAT32_C( 707.95), SIMDE_FLOAT32_C( 35.42), SIMDE_FLOAT32_C( 748.55), SIMDE_FLOAT32_C( 649.98), SIMDE_FLOAT32_C( 702.04), SIMDE_FLOAT32_C( 443.56), SIMDE_FLOAT32_C( -944.39), SIMDE_FLOAT32_C( 717.51), SIMDE_FLOAT32_C( 716.62)), UINT16_C(24144), simde_mm512_set_ps(SIMDE_FLOAT32_C( -982.71), SIMDE_FLOAT32_C( 639.75), SIMDE_FLOAT32_C( 842.03), SIMDE_FLOAT32_C( 717.68), SIMDE_FLOAT32_C( 294.25), SIMDE_FLOAT32_C( -411.52), SIMDE_FLOAT32_C( 632.28), SIMDE_FLOAT32_C( 531.91), SIMDE_FLOAT32_C( -198.66), SIMDE_FLOAT32_C( 722.92), SIMDE_FLOAT32_C( -890.25), SIMDE_FLOAT32_C( -36.77), SIMDE_FLOAT32_C( -651.17), SIMDE_FLOAT32_C( 559.24), SIMDE_FLOAT32_C( 496.39), SIMDE_FLOAT32_C( -143.68)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -779.19), SIMDE_FLOAT32_C( 587.24), SIMDE_FLOAT32_C( 850.25), SIMDE_FLOAT32_C( 172.75), SIMDE_FLOAT32_C( 237.73), SIMDE_FLOAT32_C( 792.79), SIMDE_FLOAT32_C( -225.26), SIMDE_FLOAT32_C( 810.16), SIMDE_FLOAT32_C( 235.61), SIMDE_FLOAT32_C( 123.68), SIMDE_FLOAT32_C( -869.51), SIMDE_FLOAT32_C( 811.23), SIMDE_FLOAT32_C( 292.28), SIMDE_FLOAT32_C( 158.60), SIMDE_FLOAT32_C( -861.10), SIMDE_FLOAT32_C( 297.31)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -254.94), SIMDE_FLOAT32_C( 1.09), SIMDE_FLOAT32_C( 235.56), SIMDE_FLOAT32_C( 4.15), SIMDE_FLOAT32_C( 1.24), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( -2.81), SIMDE_FLOAT32_C( 707.95), SIMDE_FLOAT32_C( 35.42), SIMDE_FLOAT32_C( 5.85), SIMDE_FLOAT32_C( 649.98), SIMDE_FLOAT32_C( -0.05), SIMDE_FLOAT32_C( 443.56), SIMDE_FLOAT32_C( -944.39), SIMDE_FLOAT32_C( 717.51), SIMDE_FLOAT32_C( 716.62)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 711.46), SIMDE_FLOAT32_C( -417.42), SIMDE_FLOAT32_C( -736.14), SIMDE_FLOAT32_C( -654.73), SIMDE_FLOAT32_C( -297.59), SIMDE_FLOAT32_C( 899.88), SIMDE_FLOAT32_C( 819.21), SIMDE_FLOAT32_C( -451.55), SIMDE_FLOAT32_C( 831.09), SIMDE_FLOAT32_C( 694.55), SIMDE_FLOAT32_C( -231.88), SIMDE_FLOAT32_C( -711.25), SIMDE_FLOAT32_C( -213.96), SIMDE_FLOAT32_C( -411.84), SIMDE_FLOAT32_C( -325.79), SIMDE_FLOAT32_C( -424.22)), UINT16_C( 4465), simde_mm512_set_ps(SIMDE_FLOAT32_C( 985.56), SIMDE_FLOAT32_C( -969.44), SIMDE_FLOAT32_C( -91.63), SIMDE_FLOAT32_C( -416.19), SIMDE_FLOAT32_C( 716.00), SIMDE_FLOAT32_C( 579.33), SIMDE_FLOAT32_C( 678.78), SIMDE_FLOAT32_C( 650.46), SIMDE_FLOAT32_C( -988.30), SIMDE_FLOAT32_C( 206.47), SIMDE_FLOAT32_C( 214.00), SIMDE_FLOAT32_C( -226.18), SIMDE_FLOAT32_C( -410.63), SIMDE_FLOAT32_C( -238.02), SIMDE_FLOAT32_C( 520.82), SIMDE_FLOAT32_C( -882.63)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 373.48), SIMDE_FLOAT32_C( -376.14), SIMDE_FLOAT32_C( 103.99), SIMDE_FLOAT32_C( 900.82), SIMDE_FLOAT32_C( 827.14), SIMDE_FLOAT32_C( -50.15), SIMDE_FLOAT32_C( 675.06), SIMDE_FLOAT32_C( 239.90), SIMDE_FLOAT32_C( 531.97), SIMDE_FLOAT32_C( 52.69), SIMDE_FLOAT32_C( -376.06), SIMDE_FLOAT32_C( -290.42), SIMDE_FLOAT32_C( -325.12), SIMDE_FLOAT32_C( -471.17), SIMDE_FLOAT32_C( -511.21), SIMDE_FLOAT32_C( -90.11)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 711.46), SIMDE_FLOAT32_C( -417.42), SIMDE_FLOAT32_C( -736.14), SIMDE_FLOAT32_C( -0.46), SIMDE_FLOAT32_C( -297.59), SIMDE_FLOAT32_C( 899.88), SIMDE_FLOAT32_C( 819.21), SIMDE_FLOAT32_C( 2.71), SIMDE_FLOAT32_C( 831.09), SIMDE_FLOAT32_C( 3.92), SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( -213.96), SIMDE_FLOAT32_C( -411.84), SIMDE_FLOAT32_C( -325.79), SIMDE_FLOAT32_C( 9.80)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -788.99), SIMDE_FLOAT32_C( 888.94), SIMDE_FLOAT32_C( 861.99), SIMDE_FLOAT32_C( -655.94), SIMDE_FLOAT32_C( -815.78), SIMDE_FLOAT32_C( 460.30), SIMDE_FLOAT32_C( -596.09), SIMDE_FLOAT32_C( 480.08), SIMDE_FLOAT32_C( -800.23), SIMDE_FLOAT32_C( -511.53), SIMDE_FLOAT32_C( 235.71), SIMDE_FLOAT32_C( 833.52), SIMDE_FLOAT32_C( 343.49), SIMDE_FLOAT32_C( 413.97), SIMDE_FLOAT32_C( 264.73), SIMDE_FLOAT32_C( 769.22)), UINT16_C(57880), simde_mm512_set_ps(SIMDE_FLOAT32_C( -782.73), SIMDE_FLOAT32_C( -41.33), SIMDE_FLOAT32_C( 183.64), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( -449.70), SIMDE_FLOAT32_C( 153.64), SIMDE_FLOAT32_C( 543.55), SIMDE_FLOAT32_C( -321.17), SIMDE_FLOAT32_C( 944.46), SIMDE_FLOAT32_C( -863.15), SIMDE_FLOAT32_C( 155.57), SIMDE_FLOAT32_C( 671.09), SIMDE_FLOAT32_C( 138.46), SIMDE_FLOAT32_C( 937.90), SIMDE_FLOAT32_C( 367.36), SIMDE_FLOAT32_C( -187.79)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -177.92), SIMDE_FLOAT32_C( 382.46), SIMDE_FLOAT32_C( -344.53), SIMDE_FLOAT32_C( 306.51), SIMDE_FLOAT32_C( 804.79), SIMDE_FLOAT32_C( 74.50), SIMDE_FLOAT32_C( -171.92), SIMDE_FLOAT32_C( -865.07), SIMDE_FLOAT32_C( 788.06), SIMDE_FLOAT32_C( -723.82), SIMDE_FLOAT32_C( 43.98), SIMDE_FLOAT32_C( -303.25), SIMDE_FLOAT32_C( -511.21), SIMDE_FLOAT32_C( 460.56), SIMDE_FLOAT32_C( 217.57), SIMDE_FLOAT32_C( -900.02)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 4.40), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( -655.94), SIMDE_FLOAT32_C( -815.78), SIMDE_FLOAT32_C( 460.30), SIMDE_FLOAT32_C( -3.16), SIMDE_FLOAT32_C( 480.08), SIMDE_FLOAT32_C( -800.23), SIMDE_FLOAT32_C( -511.53), SIMDE_FLOAT32_C( 235.71), SIMDE_FLOAT32_C( -2.21), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 413.97), SIMDE_FLOAT32_C( 264.73), SIMDE_FLOAT32_C( 769.22)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 54.65), SIMDE_FLOAT32_C( 276.51), SIMDE_FLOAT32_C( 227.98), SIMDE_FLOAT32_C( -128.31), SIMDE_FLOAT32_C( -191.48), SIMDE_FLOAT32_C( 348.63), SIMDE_FLOAT32_C( 444.48), SIMDE_FLOAT32_C( 206.11), SIMDE_FLOAT32_C( -692.44), SIMDE_FLOAT32_C( -865.72), SIMDE_FLOAT32_C( 763.64), SIMDE_FLOAT32_C( -849.66), SIMDE_FLOAT32_C( 804.26), SIMDE_FLOAT32_C( 570.08), SIMDE_FLOAT32_C( 125.91), SIMDE_FLOAT32_C( 149.60)), UINT16_C(24771), simde_mm512_set_ps(SIMDE_FLOAT32_C( 747.34), SIMDE_FLOAT32_C( 607.83), SIMDE_FLOAT32_C( 25.24), SIMDE_FLOAT32_C( -542.52), SIMDE_FLOAT32_C( 568.70), SIMDE_FLOAT32_C( 899.42), SIMDE_FLOAT32_C( 120.86), SIMDE_FLOAT32_C( -424.59), SIMDE_FLOAT32_C( 377.13), SIMDE_FLOAT32_C( 761.91), SIMDE_FLOAT32_C( -902.23), SIMDE_FLOAT32_C( -759.84), SIMDE_FLOAT32_C( 430.99), SIMDE_FLOAT32_C( 555.32), SIMDE_FLOAT32_C( -397.14), SIMDE_FLOAT32_C( 608.52)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -615.94), SIMDE_FLOAT32_C( 113.16), SIMDE_FLOAT32_C( 26.04), SIMDE_FLOAT32_C( -142.02), SIMDE_FLOAT32_C( 273.49), SIMDE_FLOAT32_C( 374.88), SIMDE_FLOAT32_C( 453.99), SIMDE_FLOAT32_C( -241.36), SIMDE_FLOAT32_C( 181.97), SIMDE_FLOAT32_C( 143.35), SIMDE_FLOAT32_C( 400.04), SIMDE_FLOAT32_C( 610.27), SIMDE_FLOAT32_C( -726.06), SIMDE_FLOAT32_C( -819.96), SIMDE_FLOAT32_C( 674.91), SIMDE_FLOAT32_C( 406.86)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 54.65), SIMDE_FLOAT32_C( 5.37), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( -128.31), SIMDE_FLOAT32_C( -191.48), SIMDE_FLOAT32_C( 348.63), SIMDE_FLOAT32_C( 444.48), SIMDE_FLOAT32_C( 206.11), SIMDE_FLOAT32_C( 2.07), SIMDE_FLOAT32_C( 5.32), SIMDE_FLOAT32_C( 763.64), SIMDE_FLOAT32_C( -849.66), SIMDE_FLOAT32_C( 804.26), SIMDE_FLOAT32_C( 570.08), SIMDE_FLOAT32_C( -0.59), SIMDE_FLOAT32_C( 1.50)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mask_div_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_maskz_div_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m512 a; simde__m512 b; simde__m512 r; } test_vec[8] = { { UINT16_C(32824), simde_mm512_set_ps(SIMDE_FLOAT32_C( 745.69), SIMDE_FLOAT32_C( -258.59), SIMDE_FLOAT32_C( -549.06), SIMDE_FLOAT32_C( 646.98), SIMDE_FLOAT32_C( 925.86), SIMDE_FLOAT32_C( 378.90), SIMDE_FLOAT32_C( -524.10), SIMDE_FLOAT32_C( -563.31), SIMDE_FLOAT32_C( 112.08), SIMDE_FLOAT32_C( 712.48), SIMDE_FLOAT32_C( -754.71), SIMDE_FLOAT32_C( 256.61), SIMDE_FLOAT32_C( 768.73), SIMDE_FLOAT32_C( 227.99), SIMDE_FLOAT32_C( 174.97), SIMDE_FLOAT32_C( 338.39)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 841.82), SIMDE_FLOAT32_C( -330.14), SIMDE_FLOAT32_C( -498.05), SIMDE_FLOAT32_C( -706.46), SIMDE_FLOAT32_C( -284.71), SIMDE_FLOAT32_C( -940.98), SIMDE_FLOAT32_C( -491.84), SIMDE_FLOAT32_C( 52.49), SIMDE_FLOAT32_C( 759.92), SIMDE_FLOAT32_C( 629.58), SIMDE_FLOAT32_C( 23.76), SIMDE_FLOAT32_C( 980.95), SIMDE_FLOAT32_C( 224.97), SIMDE_FLOAT32_C( 818.07), SIMDE_FLOAT32_C( -531.75), SIMDE_FLOAT32_C( -531.67)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -31.76), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( 3.42), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C( 4283), simde_mm512_set_ps(SIMDE_FLOAT32_C( 838.22), SIMDE_FLOAT32_C( 464.78), SIMDE_FLOAT32_C( -248.37), SIMDE_FLOAT32_C( 28.49), SIMDE_FLOAT32_C( -176.67), SIMDE_FLOAT32_C( -468.39), SIMDE_FLOAT32_C( -893.30), SIMDE_FLOAT32_C( 771.96), SIMDE_FLOAT32_C( -167.30), SIMDE_FLOAT32_C( -738.71), SIMDE_FLOAT32_C( -816.67), SIMDE_FLOAT32_C( 43.31), SIMDE_FLOAT32_C( -98.40), SIMDE_FLOAT32_C( 217.89), SIMDE_FLOAT32_C( 626.98), SIMDE_FLOAT32_C( -409.09)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -156.59), SIMDE_FLOAT32_C( 153.69), SIMDE_FLOAT32_C( 895.38), SIMDE_FLOAT32_C( -242.63), SIMDE_FLOAT32_C( 994.17), SIMDE_FLOAT32_C( -265.23), SIMDE_FLOAT32_C( -57.91), SIMDE_FLOAT32_C( -586.11), SIMDE_FLOAT32_C( -443.71), SIMDE_FLOAT32_C( -786.78), SIMDE_FLOAT32_C( -92.41), SIMDE_FLOAT32_C( -378.62), SIMDE_FLOAT32_C( 632.49), SIMDE_FLOAT32_C( -867.20), SIMDE_FLOAT32_C( 977.79), SIMDE_FLOAT32_C( -788.71)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.12), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 8.84), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( -0.16), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.52)) }, { UINT16_C(27708), simde_mm512_set_ps(SIMDE_FLOAT32_C( -309.30), SIMDE_FLOAT32_C( -478.69), SIMDE_FLOAT32_C( -499.66), SIMDE_FLOAT32_C( -834.97), SIMDE_FLOAT32_C( -926.76), SIMDE_FLOAT32_C( 306.74), SIMDE_FLOAT32_C( 350.68), SIMDE_FLOAT32_C( 698.74), SIMDE_FLOAT32_C( -748.23), SIMDE_FLOAT32_C( 960.31), SIMDE_FLOAT32_C( -52.56), SIMDE_FLOAT32_C( -18.49), SIMDE_FLOAT32_C( -174.79), SIMDE_FLOAT32_C( -875.70), SIMDE_FLOAT32_C( 270.45), SIMDE_FLOAT32_C( 571.57)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -891.46), SIMDE_FLOAT32_C( 298.88), SIMDE_FLOAT32_C( 907.92), SIMDE_FLOAT32_C( 585.94), SIMDE_FLOAT32_C( 976.00), SIMDE_FLOAT32_C( 860.60), SIMDE_FLOAT32_C( -807.57), SIMDE_FLOAT32_C( -501.53), SIMDE_FLOAT32_C( 887.26), SIMDE_FLOAT32_C( -380.63), SIMDE_FLOAT32_C( 603.15), SIMDE_FLOAT32_C( 906.17), SIMDE_FLOAT32_C( -446.90), SIMDE_FLOAT32_C( 518.96), SIMDE_FLOAT32_C( 325.09), SIMDE_FLOAT32_C( 394.29)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -1.60), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.95), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.09), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( -1.69), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C(21979), simde_mm512_set_ps(SIMDE_FLOAT32_C( -722.04), SIMDE_FLOAT32_C( -251.19), SIMDE_FLOAT32_C( 885.20), SIMDE_FLOAT32_C( -718.95), SIMDE_FLOAT32_C( -995.18), SIMDE_FLOAT32_C( 316.41), SIMDE_FLOAT32_C( 425.49), SIMDE_FLOAT32_C( -889.60), SIMDE_FLOAT32_C( -764.37), SIMDE_FLOAT32_C( -698.84), SIMDE_FLOAT32_C( 111.54), SIMDE_FLOAT32_C( 627.05), SIMDE_FLOAT32_C( 619.20), SIMDE_FLOAT32_C( 107.79), SIMDE_FLOAT32_C( 830.07), SIMDE_FLOAT32_C( -991.50)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -9.32), SIMDE_FLOAT32_C( 588.15), SIMDE_FLOAT32_C( 740.36), SIMDE_FLOAT32_C( 589.64), SIMDE_FLOAT32_C( -146.10), SIMDE_FLOAT32_C( 771.62), SIMDE_FLOAT32_C( -975.31), SIMDE_FLOAT32_C( 550.04), SIMDE_FLOAT32_C( 902.97), SIMDE_FLOAT32_C( -970.67), SIMDE_FLOAT32_C( -396.71), SIMDE_FLOAT32_C( 740.42), SIMDE_FLOAT32_C( -740.07), SIMDE_FLOAT32_C( 691.95), SIMDE_FLOAT32_C( -434.89), SIMDE_FLOAT32_C( 270.74)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -1.22), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -1.62), SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -1.91), SIMDE_FLOAT32_C( -3.66)) }, { UINT16_C( 1193), simde_mm512_set_ps(SIMDE_FLOAT32_C( 347.59), SIMDE_FLOAT32_C( 162.65), SIMDE_FLOAT32_C( 724.10), SIMDE_FLOAT32_C( 124.00), SIMDE_FLOAT32_C( -823.97), SIMDE_FLOAT32_C( -185.15), SIMDE_FLOAT32_C( 33.85), SIMDE_FLOAT32_C( -430.54), SIMDE_FLOAT32_C( -534.02), SIMDE_FLOAT32_C( 815.29), SIMDE_FLOAT32_C( 942.25), SIMDE_FLOAT32_C( -825.08), SIMDE_FLOAT32_C( 638.03), SIMDE_FLOAT32_C( 599.07), SIMDE_FLOAT32_C( 164.45), SIMDE_FLOAT32_C( 429.94)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -817.35), SIMDE_FLOAT32_C( -889.89), SIMDE_FLOAT32_C( 528.79), SIMDE_FLOAT32_C( -600.85), SIMDE_FLOAT32_C( -168.12), SIMDE_FLOAT32_C( -798.12), SIMDE_FLOAT32_C( -637.75), SIMDE_FLOAT32_C( -580.73), SIMDE_FLOAT32_C( 697.23), SIMDE_FLOAT32_C( 654.25), SIMDE_FLOAT32_C( -236.09), SIMDE_FLOAT32_C( 234.13), SIMDE_FLOAT32_C( -696.60), SIMDE_FLOAT32_C( -486.03), SIMDE_FLOAT32_C( 69.79), SIMDE_FLOAT32_C( 435.18)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -3.99), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.99)) }, { UINT16_C(47777), simde_mm512_set_ps(SIMDE_FLOAT32_C( 96.65), SIMDE_FLOAT32_C( -38.76), SIMDE_FLOAT32_C( 585.22), SIMDE_FLOAT32_C( -683.52), SIMDE_FLOAT32_C( 268.64), SIMDE_FLOAT32_C( -393.28), SIMDE_FLOAT32_C( 102.94), SIMDE_FLOAT32_C( 786.82), SIMDE_FLOAT32_C( 138.90), SIMDE_FLOAT32_C( 225.78), SIMDE_FLOAT32_C( 449.88), SIMDE_FLOAT32_C( 347.32), SIMDE_FLOAT32_C( 33.80), SIMDE_FLOAT32_C( -559.14), SIMDE_FLOAT32_C( -159.05), SIMDE_FLOAT32_C( -491.42)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -55.95), SIMDE_FLOAT32_C( -837.50), SIMDE_FLOAT32_C( -575.23), SIMDE_FLOAT32_C( 248.03), SIMDE_FLOAT32_C( 907.04), SIMDE_FLOAT32_C( -74.96), SIMDE_FLOAT32_C( -821.80), SIMDE_FLOAT32_C( -847.93), SIMDE_FLOAT32_C( -925.94), SIMDE_FLOAT32_C( 664.01), SIMDE_FLOAT32_C( -745.59), SIMDE_FLOAT32_C( -301.31), SIMDE_FLOAT32_C( 146.53), SIMDE_FLOAT32_C( -440.81), SIMDE_FLOAT32_C( 427.27), SIMDE_FLOAT32_C( -219.59)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -1.73), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -1.02), SIMDE_FLOAT32_C( -2.76), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.24)) }, { UINT16_C(50336), simde_mm512_set_ps(SIMDE_FLOAT32_C( -724.66), SIMDE_FLOAT32_C( -778.29), SIMDE_FLOAT32_C( -888.11), SIMDE_FLOAT32_C( -623.31), SIMDE_FLOAT32_C( 617.08), SIMDE_FLOAT32_C( 42.91), SIMDE_FLOAT32_C( 907.40), SIMDE_FLOAT32_C( -402.88), SIMDE_FLOAT32_C( -278.23), SIMDE_FLOAT32_C( -640.08), SIMDE_FLOAT32_C( 108.85), SIMDE_FLOAT32_C( -527.72), SIMDE_FLOAT32_C( -791.82), SIMDE_FLOAT32_C( -207.31), SIMDE_FLOAT32_C( -642.88), SIMDE_FLOAT32_C( 536.44)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 766.31), SIMDE_FLOAT32_C( 592.01), SIMDE_FLOAT32_C( 324.90), SIMDE_FLOAT32_C( 55.55), SIMDE_FLOAT32_C( -34.13), SIMDE_FLOAT32_C( -588.88), SIMDE_FLOAT32_C( 991.78), SIMDE_FLOAT32_C( -468.91), SIMDE_FLOAT32_C( 78.86), SIMDE_FLOAT32_C( 18.25), SIMDE_FLOAT32_C( 295.51), SIMDE_FLOAT32_C( -293.26), SIMDE_FLOAT32_C( -877.24), SIMDE_FLOAT32_C( 952.33), SIMDE_FLOAT32_C( -274.18), SIMDE_FLOAT32_C( 654.17)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.95), SIMDE_FLOAT32_C( -1.31), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -3.53), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C( 740), simde_mm512_set_ps(SIMDE_FLOAT32_C( -222.38), SIMDE_FLOAT32_C( -847.72), SIMDE_FLOAT32_C( -497.04), SIMDE_FLOAT32_C( 862.38), SIMDE_FLOAT32_C( -840.40), SIMDE_FLOAT32_C( 998.10), SIMDE_FLOAT32_C( -257.93), SIMDE_FLOAT32_C( -204.46), SIMDE_FLOAT32_C( -373.11), SIMDE_FLOAT32_C( -912.42), SIMDE_FLOAT32_C( 207.13), SIMDE_FLOAT32_C( 784.69), SIMDE_FLOAT32_C( 82.66), SIMDE_FLOAT32_C( 123.09), SIMDE_FLOAT32_C( -384.17), SIMDE_FLOAT32_C( -845.08)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 839.49), SIMDE_FLOAT32_C( -285.39), SIMDE_FLOAT32_C( -548.76), SIMDE_FLOAT32_C( -35.10), SIMDE_FLOAT32_C( -295.04), SIMDE_FLOAT32_C( 738.77), SIMDE_FLOAT32_C( 340.04), SIMDE_FLOAT32_C( -585.87), SIMDE_FLOAT32_C( -711.46), SIMDE_FLOAT32_C( 926.37), SIMDE_FLOAT32_C( 696.23), SIMDE_FLOAT32_C( 766.17), SIMDE_FLOAT32_C( -330.24), SIMDE_FLOAT32_C( 369.18), SIMDE_FLOAT32_C( -498.71), SIMDE_FLOAT32_C( -288.61)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_maskz_div_ps(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_div_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d a; simde__m512d b; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( -97.83), SIMDE_FLOAT64_C( -393.82), SIMDE_FLOAT64_C( 934.81), SIMDE_FLOAT64_C( 74.53), SIMDE_FLOAT64_C( 843.79), SIMDE_FLOAT64_C( 465.05), SIMDE_FLOAT64_C( -42.07), SIMDE_FLOAT64_C( -685.83)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 296.78), SIMDE_FLOAT64_C( -698.78), SIMDE_FLOAT64_C( 908.33), SIMDE_FLOAT64_C( 181.85), SIMDE_FLOAT64_C( -397.89), SIMDE_FLOAT64_C( -586.75), SIMDE_FLOAT64_C( 904.99), SIMDE_FLOAT64_C( -321.15)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.33), SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( 1.03), SIMDE_FLOAT64_C( 0.41), SIMDE_FLOAT64_C( -2.12), SIMDE_FLOAT64_C( -0.79), SIMDE_FLOAT64_C( -0.05), SIMDE_FLOAT64_C( 2.14)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 653.62), SIMDE_FLOAT64_C( 981.74), SIMDE_FLOAT64_C( 780.10), SIMDE_FLOAT64_C( 59.38), SIMDE_FLOAT64_C( -795.11), SIMDE_FLOAT64_C( 923.87), SIMDE_FLOAT64_C( -270.01), SIMDE_FLOAT64_C( -411.99)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 596.54), SIMDE_FLOAT64_C( -116.40), SIMDE_FLOAT64_C( -989.77), SIMDE_FLOAT64_C( -794.40), SIMDE_FLOAT64_C( 183.38), SIMDE_FLOAT64_C( -185.75), SIMDE_FLOAT64_C( 429.70), SIMDE_FLOAT64_C( 664.04)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.10), SIMDE_FLOAT64_C( -8.43), SIMDE_FLOAT64_C( -0.79), SIMDE_FLOAT64_C( -0.07), SIMDE_FLOAT64_C( -4.34), SIMDE_FLOAT64_C( -4.97), SIMDE_FLOAT64_C( -0.63), SIMDE_FLOAT64_C( -0.62)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -571.10), SIMDE_FLOAT64_C( 971.96), SIMDE_FLOAT64_C( 149.38), SIMDE_FLOAT64_C( 497.71), SIMDE_FLOAT64_C( 988.69), SIMDE_FLOAT64_C( 479.68), SIMDE_FLOAT64_C( -128.24), SIMDE_FLOAT64_C( 585.28)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -628.68), SIMDE_FLOAT64_C( 938.60), SIMDE_FLOAT64_C( -147.98), SIMDE_FLOAT64_C( 378.31), SIMDE_FLOAT64_C( 246.47), SIMDE_FLOAT64_C( 109.18), SIMDE_FLOAT64_C( -575.64), SIMDE_FLOAT64_C( -426.86)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.91), SIMDE_FLOAT64_C( 1.04), SIMDE_FLOAT64_C( -1.01), SIMDE_FLOAT64_C( 1.32), SIMDE_FLOAT64_C( 4.01), SIMDE_FLOAT64_C( 4.39), SIMDE_FLOAT64_C( 0.22), SIMDE_FLOAT64_C( -1.37)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 729.63), SIMDE_FLOAT64_C( -908.06), SIMDE_FLOAT64_C( -769.77), SIMDE_FLOAT64_C( -70.66), SIMDE_FLOAT64_C( 482.71), SIMDE_FLOAT64_C( 244.66), SIMDE_FLOAT64_C( -615.83), SIMDE_FLOAT64_C( 841.42)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 359.65), SIMDE_FLOAT64_C( -730.08), SIMDE_FLOAT64_C( 977.98), SIMDE_FLOAT64_C( -215.53), SIMDE_FLOAT64_C( -315.50), SIMDE_FLOAT64_C( 80.64), SIMDE_FLOAT64_C( -996.10), SIMDE_FLOAT64_C( -556.83)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.03), SIMDE_FLOAT64_C( 1.24), SIMDE_FLOAT64_C( -0.79), SIMDE_FLOAT64_C( 0.33), SIMDE_FLOAT64_C( -1.53), SIMDE_FLOAT64_C( 3.03), SIMDE_FLOAT64_C( 0.62), SIMDE_FLOAT64_C( -1.51)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 438.03), SIMDE_FLOAT64_C( -458.01), SIMDE_FLOAT64_C( 144.59), SIMDE_FLOAT64_C( 165.00), SIMDE_FLOAT64_C( -331.04), SIMDE_FLOAT64_C( 406.96), SIMDE_FLOAT64_C( -326.43), SIMDE_FLOAT64_C( 373.82)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 304.30), SIMDE_FLOAT64_C( -777.07), SIMDE_FLOAT64_C( -683.73), SIMDE_FLOAT64_C( -113.32), SIMDE_FLOAT64_C( -701.16), SIMDE_FLOAT64_C( -942.92), SIMDE_FLOAT64_C( -489.97), SIMDE_FLOAT64_C( 911.34)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.44), SIMDE_FLOAT64_C( 0.59), SIMDE_FLOAT64_C( -0.21), SIMDE_FLOAT64_C( -1.46), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( -0.43), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 0.41)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -148.70), SIMDE_FLOAT64_C( -327.17), SIMDE_FLOAT64_C( -310.14), SIMDE_FLOAT64_C( -718.80), SIMDE_FLOAT64_C( 382.69), SIMDE_FLOAT64_C( -181.61), SIMDE_FLOAT64_C( -214.09), SIMDE_FLOAT64_C( 55.72)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 791.83), SIMDE_FLOAT64_C( 191.69), SIMDE_FLOAT64_C( -460.58), SIMDE_FLOAT64_C( -915.08), SIMDE_FLOAT64_C( -877.38), SIMDE_FLOAT64_C( -915.27), SIMDE_FLOAT64_C( 207.85), SIMDE_FLOAT64_C( 567.35)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( -1.71), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 0.79), SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( 0.20), SIMDE_FLOAT64_C( -1.03), SIMDE_FLOAT64_C( 0.10)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -798.08), SIMDE_FLOAT64_C( 256.40), SIMDE_FLOAT64_C( 739.89), SIMDE_FLOAT64_C( -903.46), SIMDE_FLOAT64_C( -771.75), SIMDE_FLOAT64_C( -54.77), SIMDE_FLOAT64_C( 397.04), SIMDE_FLOAT64_C( 925.94)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 864.29), SIMDE_FLOAT64_C( -834.00), SIMDE_FLOAT64_C( 475.52), SIMDE_FLOAT64_C( 502.31), SIMDE_FLOAT64_C( -746.87), SIMDE_FLOAT64_C( -364.10), SIMDE_FLOAT64_C( -995.18), SIMDE_FLOAT64_C( 683.54)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.92), SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( 1.56), SIMDE_FLOAT64_C( -1.80), SIMDE_FLOAT64_C( 1.03), SIMDE_FLOAT64_C( 0.15), SIMDE_FLOAT64_C( -0.40), SIMDE_FLOAT64_C( 1.35)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -869.58), SIMDE_FLOAT64_C( 763.75), SIMDE_FLOAT64_C( -558.93), SIMDE_FLOAT64_C( 756.19), SIMDE_FLOAT64_C( 509.82), SIMDE_FLOAT64_C( -855.71), SIMDE_FLOAT64_C( -965.40), SIMDE_FLOAT64_C( -279.29)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -355.51), SIMDE_FLOAT64_C( 136.73), SIMDE_FLOAT64_C( 586.70), SIMDE_FLOAT64_C( 712.56), SIMDE_FLOAT64_C( 135.88), SIMDE_FLOAT64_C( -693.91), SIMDE_FLOAT64_C( -131.33), SIMDE_FLOAT64_C( -933.79)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.45), SIMDE_FLOAT64_C( 5.59), SIMDE_FLOAT64_C( -0.95), SIMDE_FLOAT64_C( 1.06), SIMDE_FLOAT64_C( 3.75), SIMDE_FLOAT64_C( 1.23), SIMDE_FLOAT64_C( 7.35), SIMDE_FLOAT64_C( 0.30)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_div_pd(test_vec[i].a, test_vec[i].b); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_div_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d src; simde__mmask8 k; simde__m512d a; simde__m512d b; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( -962.94), SIMDE_FLOAT64_C( 989.45), SIMDE_FLOAT64_C( -190.71), SIMDE_FLOAT64_C( -80.90), SIMDE_FLOAT64_C( -820.03), SIMDE_FLOAT64_C( 710.84), SIMDE_FLOAT64_C( 742.77), SIMDE_FLOAT64_C( -124.19)), UINT8_C( 62), simde_mm512_set_pd(SIMDE_FLOAT64_C( 764.73), SIMDE_FLOAT64_C( -738.72), SIMDE_FLOAT64_C( 462.89), SIMDE_FLOAT64_C( -909.36), SIMDE_FLOAT64_C( 920.77), SIMDE_FLOAT64_C( 830.94), SIMDE_FLOAT64_C( -436.90), SIMDE_FLOAT64_C( -984.49)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 318.55), SIMDE_FLOAT64_C( -958.54), SIMDE_FLOAT64_C( -878.41), SIMDE_FLOAT64_C( 198.47), SIMDE_FLOAT64_C( 585.51), SIMDE_FLOAT64_C( -97.52), SIMDE_FLOAT64_C( -112.08), SIMDE_FLOAT64_C( -145.20)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -962.94), SIMDE_FLOAT64_C( 989.45), SIMDE_FLOAT64_C( -0.53), SIMDE_FLOAT64_C( -4.58), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( -8.52), SIMDE_FLOAT64_C( 3.90), SIMDE_FLOAT64_C( -124.19)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 267.17), SIMDE_FLOAT64_C( -878.34), SIMDE_FLOAT64_C( 132.07), SIMDE_FLOAT64_C( 827.87), SIMDE_FLOAT64_C( 178.51), SIMDE_FLOAT64_C( 362.39), SIMDE_FLOAT64_C( 200.13), SIMDE_FLOAT64_C( -407.98)), UINT8_C( 51), simde_mm512_set_pd(SIMDE_FLOAT64_C( -126.54), SIMDE_FLOAT64_C( -164.31), SIMDE_FLOAT64_C( -971.32), SIMDE_FLOAT64_C( 611.23), SIMDE_FLOAT64_C( 591.83), SIMDE_FLOAT64_C( 793.58), SIMDE_FLOAT64_C( 171.77), SIMDE_FLOAT64_C( 109.83)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 225.35), SIMDE_FLOAT64_C( -734.84), SIMDE_FLOAT64_C( 728.29), SIMDE_FLOAT64_C( -721.11), SIMDE_FLOAT64_C( -448.10), SIMDE_FLOAT64_C( 310.61), SIMDE_FLOAT64_C( -362.27), SIMDE_FLOAT64_C( -413.07)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 267.17), SIMDE_FLOAT64_C( -878.34), SIMDE_FLOAT64_C( -1.33), SIMDE_FLOAT64_C( -0.85), SIMDE_FLOAT64_C( 178.51), SIMDE_FLOAT64_C( 362.39), SIMDE_FLOAT64_C( -0.47), SIMDE_FLOAT64_C( -0.27)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 439.30), SIMDE_FLOAT64_C( 935.39), SIMDE_FLOAT64_C( 20.00), SIMDE_FLOAT64_C( -941.65), SIMDE_FLOAT64_C( 988.79), SIMDE_FLOAT64_C( 773.96), SIMDE_FLOAT64_C( -788.78), SIMDE_FLOAT64_C( -311.91)), UINT8_C(178), simde_mm512_set_pd(SIMDE_FLOAT64_C( -374.30), SIMDE_FLOAT64_C( 599.21), SIMDE_FLOAT64_C( 966.83), SIMDE_FLOAT64_C( 775.18), SIMDE_FLOAT64_C( 846.32), SIMDE_FLOAT64_C( 124.04), SIMDE_FLOAT64_C( -883.36), SIMDE_FLOAT64_C( -405.09)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 467.70), SIMDE_FLOAT64_C( -626.02), SIMDE_FLOAT64_C( 355.93), SIMDE_FLOAT64_C( 294.34), SIMDE_FLOAT64_C( -575.79), SIMDE_FLOAT64_C( -504.82), SIMDE_FLOAT64_C( 854.52), SIMDE_FLOAT64_C( -173.82)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.80), SIMDE_FLOAT64_C( 935.39), SIMDE_FLOAT64_C( 2.72), SIMDE_FLOAT64_C( 2.63), SIMDE_FLOAT64_C( 988.79), SIMDE_FLOAT64_C( 773.96), SIMDE_FLOAT64_C( -1.03), SIMDE_FLOAT64_C( -311.91)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -274.81), SIMDE_FLOAT64_C( 196.78), SIMDE_FLOAT64_C( -805.22), SIMDE_FLOAT64_C( 855.89), SIMDE_FLOAT64_C( -996.67), SIMDE_FLOAT64_C( 424.78), SIMDE_FLOAT64_C( 489.73), SIMDE_FLOAT64_C( 635.35)), UINT8_C( 38), simde_mm512_set_pd(SIMDE_FLOAT64_C( 79.19), SIMDE_FLOAT64_C( -114.25), SIMDE_FLOAT64_C( 983.59), SIMDE_FLOAT64_C( 645.66), SIMDE_FLOAT64_C( 982.80), SIMDE_FLOAT64_C( -683.73), SIMDE_FLOAT64_C( 259.13), SIMDE_FLOAT64_C( 186.09)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 665.49), SIMDE_FLOAT64_C( -110.92), SIMDE_FLOAT64_C( 978.65), SIMDE_FLOAT64_C( 104.45), SIMDE_FLOAT64_C( 903.68), SIMDE_FLOAT64_C( -580.74), SIMDE_FLOAT64_C( 776.44), SIMDE_FLOAT64_C( 571.14)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -274.81), SIMDE_FLOAT64_C( 196.78), SIMDE_FLOAT64_C( 1.01), SIMDE_FLOAT64_C( 855.89), SIMDE_FLOAT64_C( -996.67), SIMDE_FLOAT64_C( 1.18), SIMDE_FLOAT64_C( 0.33), SIMDE_FLOAT64_C( 635.35)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 105.93), SIMDE_FLOAT64_C( -486.73), SIMDE_FLOAT64_C( 293.04), SIMDE_FLOAT64_C( 328.58), SIMDE_FLOAT64_C( -725.03), SIMDE_FLOAT64_C( 3.53), SIMDE_FLOAT64_C( 663.75), SIMDE_FLOAT64_C( -59.32)), UINT8_C( 67), simde_mm512_set_pd(SIMDE_FLOAT64_C( 917.98), SIMDE_FLOAT64_C( -430.92), SIMDE_FLOAT64_C( 839.77), SIMDE_FLOAT64_C( -412.68), SIMDE_FLOAT64_C( -397.37), SIMDE_FLOAT64_C( -947.31), SIMDE_FLOAT64_C( 584.59), SIMDE_FLOAT64_C( -352.12)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 781.61), SIMDE_FLOAT64_C( 978.32), SIMDE_FLOAT64_C( 374.68), SIMDE_FLOAT64_C( -857.00), SIMDE_FLOAT64_C( 821.72), SIMDE_FLOAT64_C( -88.08), SIMDE_FLOAT64_C( 243.00), SIMDE_FLOAT64_C( -640.77)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 105.93), SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( 293.04), SIMDE_FLOAT64_C( 328.58), SIMDE_FLOAT64_C( -725.03), SIMDE_FLOAT64_C( 3.53), SIMDE_FLOAT64_C( 2.41), SIMDE_FLOAT64_C( 0.55)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -591.91), SIMDE_FLOAT64_C( 615.29), SIMDE_FLOAT64_C( -726.07), SIMDE_FLOAT64_C( 857.36), SIMDE_FLOAT64_C( 636.31), SIMDE_FLOAT64_C( 104.40), SIMDE_FLOAT64_C( -167.77), SIMDE_FLOAT64_C( -372.65)), UINT8_C( 15), simde_mm512_set_pd(SIMDE_FLOAT64_C( 411.16), SIMDE_FLOAT64_C( 928.95), SIMDE_FLOAT64_C( 110.13), SIMDE_FLOAT64_C( 933.76), SIMDE_FLOAT64_C( 836.76), SIMDE_FLOAT64_C( 628.60), SIMDE_FLOAT64_C( -586.52), SIMDE_FLOAT64_C( 293.24)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -457.28), SIMDE_FLOAT64_C( 705.56), SIMDE_FLOAT64_C( -798.08), SIMDE_FLOAT64_C( 773.61), SIMDE_FLOAT64_C( -590.48), SIMDE_FLOAT64_C( -291.69), SIMDE_FLOAT64_C( 654.27), SIMDE_FLOAT64_C( -537.59)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -591.91), SIMDE_FLOAT64_C( 615.29), SIMDE_FLOAT64_C( -726.07), SIMDE_FLOAT64_C( 857.36), SIMDE_FLOAT64_C( -1.42), SIMDE_FLOAT64_C( -2.16), SIMDE_FLOAT64_C( -0.90), SIMDE_FLOAT64_C( -0.55)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 270.92), SIMDE_FLOAT64_C( -517.94), SIMDE_FLOAT64_C( 36.22), SIMDE_FLOAT64_C( 204.54), SIMDE_FLOAT64_C( 579.30), SIMDE_FLOAT64_C( 257.34), SIMDE_FLOAT64_C( -998.24), SIMDE_FLOAT64_C( -146.41)), UINT8_C(152), simde_mm512_set_pd(SIMDE_FLOAT64_C( 268.93), SIMDE_FLOAT64_C( -893.46), SIMDE_FLOAT64_C( -476.89), SIMDE_FLOAT64_C( -696.00), SIMDE_FLOAT64_C( -817.69), SIMDE_FLOAT64_C( 127.75), SIMDE_FLOAT64_C( -366.34), SIMDE_FLOAT64_C( -437.04)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -372.16), SIMDE_FLOAT64_C( 900.88), SIMDE_FLOAT64_C( -550.65), SIMDE_FLOAT64_C( 567.85), SIMDE_FLOAT64_C( 968.56), SIMDE_FLOAT64_C( -695.12), SIMDE_FLOAT64_C( 555.56), SIMDE_FLOAT64_C( 952.92)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.72), SIMDE_FLOAT64_C( -517.94), SIMDE_FLOAT64_C( 36.22), SIMDE_FLOAT64_C( -1.23), SIMDE_FLOAT64_C( -0.84), SIMDE_FLOAT64_C( 257.34), SIMDE_FLOAT64_C( -998.24), SIMDE_FLOAT64_C( -146.41)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -468.36), SIMDE_FLOAT64_C( 186.16), SIMDE_FLOAT64_C( -910.43), SIMDE_FLOAT64_C( -280.07), SIMDE_FLOAT64_C( -96.94), SIMDE_FLOAT64_C( 387.95), SIMDE_FLOAT64_C( 198.14), SIMDE_FLOAT64_C( -504.51)), UINT8_C( 21), simde_mm512_set_pd(SIMDE_FLOAT64_C( 573.90), SIMDE_FLOAT64_C( 496.67), SIMDE_FLOAT64_C( -823.61), SIMDE_FLOAT64_C( 204.56), SIMDE_FLOAT64_C( -856.87), SIMDE_FLOAT64_C( -449.28), SIMDE_FLOAT64_C( 9.73), SIMDE_FLOAT64_C( -739.12)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -471.24), SIMDE_FLOAT64_C( -711.43), SIMDE_FLOAT64_C( -281.50), SIMDE_FLOAT64_C( 493.76), SIMDE_FLOAT64_C( 103.01), SIMDE_FLOAT64_C( -996.35), SIMDE_FLOAT64_C( 670.04), SIMDE_FLOAT64_C( -895.53)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -468.36), SIMDE_FLOAT64_C( 186.16), SIMDE_FLOAT64_C( -910.43), SIMDE_FLOAT64_C( 0.41), SIMDE_FLOAT64_C( -96.94), SIMDE_FLOAT64_C( 0.45), SIMDE_FLOAT64_C( 198.14), SIMDE_FLOAT64_C( 0.83)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mask_div_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_maskz_div_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512d a; simde__m512d b; simde__m512d r; } test_vec[8] = { { UINT8_C(113), simde_mm512_set_pd(SIMDE_FLOAT64_C( 112.08), SIMDE_FLOAT64_C( 712.48), SIMDE_FLOAT64_C( -754.71), SIMDE_FLOAT64_C( 256.61), SIMDE_FLOAT64_C( 768.73), SIMDE_FLOAT64_C( 227.99), SIMDE_FLOAT64_C( 174.97), SIMDE_FLOAT64_C( 338.39)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 759.92), SIMDE_FLOAT64_C( 629.58), SIMDE_FLOAT64_C( 23.76), SIMDE_FLOAT64_C( 980.95), SIMDE_FLOAT64_C( 224.97), SIMDE_FLOAT64_C( 818.07), SIMDE_FLOAT64_C( -531.75), SIMDE_FLOAT64_C( -531.67)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 1.13), SIMDE_FLOAT64_C( -31.76), SIMDE_FLOAT64_C( 0.26), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -0.64)) }, { UINT8_C( 88), simde_mm512_set_pd(SIMDE_FLOAT64_C( 841.82), SIMDE_FLOAT64_C( -330.14), SIMDE_FLOAT64_C( -498.05), SIMDE_FLOAT64_C( -706.46), SIMDE_FLOAT64_C( -284.71), SIMDE_FLOAT64_C( -940.98), SIMDE_FLOAT64_C( -491.84), SIMDE_FLOAT64_C( 52.49)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 259.38), SIMDE_FLOAT64_C( 745.69), SIMDE_FLOAT64_C( -258.59), SIMDE_FLOAT64_C( -549.06), SIMDE_FLOAT64_C( 646.98), SIMDE_FLOAT64_C( 925.86), SIMDE_FLOAT64_C( 378.90), SIMDE_FLOAT64_C( -524.10)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 1.29), SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(184), simde_mm512_set_pd(SIMDE_FLOAT64_C( -167.30), SIMDE_FLOAT64_C( -738.71), SIMDE_FLOAT64_C( -816.67), SIMDE_FLOAT64_C( 43.31), SIMDE_FLOAT64_C( -98.40), SIMDE_FLOAT64_C( 217.89), SIMDE_FLOAT64_C( 626.98), SIMDE_FLOAT64_C( -409.09)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -443.71), SIMDE_FLOAT64_C( -786.78), SIMDE_FLOAT64_C( -92.41), SIMDE_FLOAT64_C( -378.62), SIMDE_FLOAT64_C( 632.49), SIMDE_FLOAT64_C( -867.20), SIMDE_FLOAT64_C( 977.79), SIMDE_FLOAT64_C( -788.71)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.38), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 8.84), SIMDE_FLOAT64_C( -0.11), SIMDE_FLOAT64_C( -0.16), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(119), simde_mm512_set_pd(SIMDE_FLOAT64_C( -156.59), SIMDE_FLOAT64_C( 153.69), SIMDE_FLOAT64_C( 895.38), SIMDE_FLOAT64_C( -242.63), SIMDE_FLOAT64_C( 994.17), SIMDE_FLOAT64_C( -265.23), SIMDE_FLOAT64_C( -57.91), SIMDE_FLOAT64_C( -586.11)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -851.62), SIMDE_FLOAT64_C( 838.22), SIMDE_FLOAT64_C( 464.78), SIMDE_FLOAT64_C( -248.37), SIMDE_FLOAT64_C( 28.49), SIMDE_FLOAT64_C( -176.67), SIMDE_FLOAT64_C( -468.39), SIMDE_FLOAT64_C( -893.30)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( 1.93), SIMDE_FLOAT64_C( 0.98), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 1.50), SIMDE_FLOAT64_C( 0.12), SIMDE_FLOAT64_C( 0.66)) }, { UINT8_C(181), simde_mm512_set_pd(SIMDE_FLOAT64_C( -748.23), SIMDE_FLOAT64_C( 960.31), SIMDE_FLOAT64_C( -52.56), SIMDE_FLOAT64_C( -18.49), SIMDE_FLOAT64_C( -174.79), SIMDE_FLOAT64_C( -875.70), SIMDE_FLOAT64_C( 270.45), SIMDE_FLOAT64_C( 571.57)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 887.26), SIMDE_FLOAT64_C( -380.63), SIMDE_FLOAT64_C( 603.15), SIMDE_FLOAT64_C( 906.17), SIMDE_FLOAT64_C( -446.90), SIMDE_FLOAT64_C( 518.96), SIMDE_FLOAT64_C( 325.09), SIMDE_FLOAT64_C( 394.29)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.84), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -0.09), SIMDE_FLOAT64_C( -0.02), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -1.69), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 1.45)) }, { UINT8_C(108), simde_mm512_set_pd(SIMDE_FLOAT64_C( -891.46), SIMDE_FLOAT64_C( 298.88), SIMDE_FLOAT64_C( 907.92), SIMDE_FLOAT64_C( 585.94), SIMDE_FLOAT64_C( 976.00), SIMDE_FLOAT64_C( 860.60), SIMDE_FLOAT64_C( -807.57), SIMDE_FLOAT64_C( -501.53)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -634.78), SIMDE_FLOAT64_C( -309.30), SIMDE_FLOAT64_C( -478.69), SIMDE_FLOAT64_C( -499.66), SIMDE_FLOAT64_C( -834.97), SIMDE_FLOAT64_C( -926.76), SIMDE_FLOAT64_C( 306.74), SIMDE_FLOAT64_C( 350.68)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -0.97), SIMDE_FLOAT64_C( -1.90), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -1.17), SIMDE_FLOAT64_C( -0.93), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C( 5), simde_mm512_set_pd(SIMDE_FLOAT64_C( -764.37), SIMDE_FLOAT64_C( -698.84), SIMDE_FLOAT64_C( 111.54), SIMDE_FLOAT64_C( 627.05), SIMDE_FLOAT64_C( 619.20), SIMDE_FLOAT64_C( 107.79), SIMDE_FLOAT64_C( 830.07), SIMDE_FLOAT64_C( -991.50)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 902.97), SIMDE_FLOAT64_C( -970.67), SIMDE_FLOAT64_C( -396.71), SIMDE_FLOAT64_C( 740.42), SIMDE_FLOAT64_C( -740.07), SIMDE_FLOAT64_C( 691.95), SIMDE_FLOAT64_C( -434.89), SIMDE_FLOAT64_C( 270.74)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.16), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -3.66)) }, { UINT8_C( 94), simde_mm512_set_pd(SIMDE_FLOAT64_C( -9.32), SIMDE_FLOAT64_C( 588.15), SIMDE_FLOAT64_C( 740.36), SIMDE_FLOAT64_C( 589.64), SIMDE_FLOAT64_C( -146.10), SIMDE_FLOAT64_C( 771.62), SIMDE_FLOAT64_C( -975.31), SIMDE_FLOAT64_C( 550.04)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 380.47), SIMDE_FLOAT64_C( -722.04), SIMDE_FLOAT64_C( -251.19), SIMDE_FLOAT64_C( 885.20), SIMDE_FLOAT64_C( -718.95), SIMDE_FLOAT64_C( -995.18), SIMDE_FLOAT64_C( 316.41), SIMDE_FLOAT64_C( 425.49)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -0.81), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 0.20), SIMDE_FLOAT64_C( -0.78), SIMDE_FLOAT64_C( -3.08), SIMDE_FLOAT64_C( 0.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_maskz_div_pd(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_div_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_div_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_div_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_div_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_div_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_div_pd) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/extract.c000066400000000000000000002771611400333146700171100ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #define SIMDE_TEST_X86_AVX512_INSN extract #include #include #include static int test_simde_mm512_extractf32x4_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__m128 r0; simde__m128 r1; simde__m128 r2; simde__m128 r3; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -563.83), SIMDE_FLOAT32_C( 799.30), SIMDE_FLOAT32_C( 938.85), SIMDE_FLOAT32_C( -576.01), SIMDE_FLOAT32_C( -465.05), SIMDE_FLOAT32_C( 439.15), SIMDE_FLOAT32_C( -104.57), SIMDE_FLOAT32_C( -28.15), SIMDE_FLOAT32_C( -431.26), SIMDE_FLOAT32_C( 481.25), SIMDE_FLOAT32_C( -57.75), SIMDE_FLOAT32_C( -784.26), SIMDE_FLOAT32_C( 438.04), SIMDE_FLOAT32_C( 549.03), SIMDE_FLOAT32_C( 729.46), SIMDE_FLOAT32_C( 582.53)), simde_mm_set_ps(SIMDE_FLOAT32_C( 438.04), SIMDE_FLOAT32_C( 549.03), SIMDE_FLOAT32_C( 729.46), SIMDE_FLOAT32_C( 582.53)), simde_mm_set_ps(SIMDE_FLOAT32_C( -431.26), SIMDE_FLOAT32_C( 481.25), SIMDE_FLOAT32_C( -57.75), SIMDE_FLOAT32_C( -784.26)), simde_mm_set_ps(SIMDE_FLOAT32_C( -465.05), SIMDE_FLOAT32_C( 439.15), SIMDE_FLOAT32_C( -104.57), SIMDE_FLOAT32_C( -28.15)), simde_mm_set_ps(SIMDE_FLOAT32_C( -563.83), SIMDE_FLOAT32_C( 799.30), SIMDE_FLOAT32_C( 938.85), SIMDE_FLOAT32_C( -576.01)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 120.10), SIMDE_FLOAT32_C( -64.06), SIMDE_FLOAT32_C( -620.03), SIMDE_FLOAT32_C( 559.81), SIMDE_FLOAT32_C( 185.23), SIMDE_FLOAT32_C( -423.61), SIMDE_FLOAT32_C( -11.91), SIMDE_FLOAT32_C( 407.56), SIMDE_FLOAT32_C( 355.11), SIMDE_FLOAT32_C( -787.72), SIMDE_FLOAT32_C( 472.82), SIMDE_FLOAT32_C( -703.51), SIMDE_FLOAT32_C( -202.49), SIMDE_FLOAT32_C( -470.36), SIMDE_FLOAT32_C( 966.37), SIMDE_FLOAT32_C( 135.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( -202.49), SIMDE_FLOAT32_C( -470.36), SIMDE_FLOAT32_C( 966.37), SIMDE_FLOAT32_C( 135.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( 355.11), SIMDE_FLOAT32_C( -787.72), SIMDE_FLOAT32_C( 472.82), SIMDE_FLOAT32_C( -703.51)), simde_mm_set_ps(SIMDE_FLOAT32_C( 185.23), SIMDE_FLOAT32_C( -423.61), SIMDE_FLOAT32_C( -11.91), SIMDE_FLOAT32_C( 407.56)), simde_mm_set_ps(SIMDE_FLOAT32_C( 120.10), SIMDE_FLOAT32_C( -64.06), SIMDE_FLOAT32_C( -620.03), SIMDE_FLOAT32_C( 559.81)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 193.01), SIMDE_FLOAT32_C( -435.27), SIMDE_FLOAT32_C( -84.06), SIMDE_FLOAT32_C( 298.40), SIMDE_FLOAT32_C( 208.07), SIMDE_FLOAT32_C( -94.60), SIMDE_FLOAT32_C( 834.28), SIMDE_FLOAT32_C( 260.50), SIMDE_FLOAT32_C( -859.51), SIMDE_FLOAT32_C( -69.45), SIMDE_FLOAT32_C( 40.36), SIMDE_FLOAT32_C( 95.61), SIMDE_FLOAT32_C( -743.10), SIMDE_FLOAT32_C( -688.01), SIMDE_FLOAT32_C( 442.76), SIMDE_FLOAT32_C( 931.17)), simde_mm_set_ps(SIMDE_FLOAT32_C( -743.10), SIMDE_FLOAT32_C( -688.01), SIMDE_FLOAT32_C( 442.76), SIMDE_FLOAT32_C( 931.17)), simde_mm_set_ps(SIMDE_FLOAT32_C( -859.51), SIMDE_FLOAT32_C( -69.45), SIMDE_FLOAT32_C( 40.36), SIMDE_FLOAT32_C( 95.61)), simde_mm_set_ps(SIMDE_FLOAT32_C( 208.07), SIMDE_FLOAT32_C( -94.60), SIMDE_FLOAT32_C( 834.28), SIMDE_FLOAT32_C( 260.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( 193.01), SIMDE_FLOAT32_C( -435.27), SIMDE_FLOAT32_C( -84.06), SIMDE_FLOAT32_C( 298.40)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 664.52), SIMDE_FLOAT32_C( -224.13), SIMDE_FLOAT32_C( 633.65), SIMDE_FLOAT32_C( -834.15), SIMDE_FLOAT32_C( -157.33), SIMDE_FLOAT32_C( -819.46), SIMDE_FLOAT32_C( 541.44), SIMDE_FLOAT32_C( 112.81), SIMDE_FLOAT32_C( -98.08), SIMDE_FLOAT32_C( 464.19), SIMDE_FLOAT32_C( 711.12), SIMDE_FLOAT32_C( 282.83), SIMDE_FLOAT32_C( -774.08), SIMDE_FLOAT32_C( 841.24), SIMDE_FLOAT32_C( -414.07), SIMDE_FLOAT32_C( 79.76)), simde_mm_set_ps(SIMDE_FLOAT32_C( -774.08), SIMDE_FLOAT32_C( 841.24), SIMDE_FLOAT32_C( -414.07), SIMDE_FLOAT32_C( 79.76)), simde_mm_set_ps(SIMDE_FLOAT32_C( -98.08), SIMDE_FLOAT32_C( 464.19), SIMDE_FLOAT32_C( 711.12), SIMDE_FLOAT32_C( 282.83)), simde_mm_set_ps(SIMDE_FLOAT32_C( -157.33), SIMDE_FLOAT32_C( -819.46), SIMDE_FLOAT32_C( 541.44), SIMDE_FLOAT32_C( 112.81)), simde_mm_set_ps(SIMDE_FLOAT32_C( 664.52), SIMDE_FLOAT32_C( -224.13), SIMDE_FLOAT32_C( 633.65), SIMDE_FLOAT32_C( -834.15)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 141.08), SIMDE_FLOAT32_C( -832.50), SIMDE_FLOAT32_C( -990.15), SIMDE_FLOAT32_C( 438.46), SIMDE_FLOAT32_C( -887.47), SIMDE_FLOAT32_C( 336.35), SIMDE_FLOAT32_C( -396.24), SIMDE_FLOAT32_C( 99.21), SIMDE_FLOAT32_C( -2.60), SIMDE_FLOAT32_C( -38.88), SIMDE_FLOAT32_C( 165.88), SIMDE_FLOAT32_C( 218.73), SIMDE_FLOAT32_C( 375.27), SIMDE_FLOAT32_C( -966.90), SIMDE_FLOAT32_C( -512.98), SIMDE_FLOAT32_C( -737.78)), simde_mm_set_ps(SIMDE_FLOAT32_C( 375.27), SIMDE_FLOAT32_C( -966.90), SIMDE_FLOAT32_C( -512.98), SIMDE_FLOAT32_C( -737.78)), simde_mm_set_ps(SIMDE_FLOAT32_C( -2.60), SIMDE_FLOAT32_C( -38.88), SIMDE_FLOAT32_C( 165.88), SIMDE_FLOAT32_C( 218.73)), simde_mm_set_ps(SIMDE_FLOAT32_C( -887.47), SIMDE_FLOAT32_C( 336.35), SIMDE_FLOAT32_C( -396.24), SIMDE_FLOAT32_C( 99.21)), simde_mm_set_ps(SIMDE_FLOAT32_C( 141.08), SIMDE_FLOAT32_C( -832.50), SIMDE_FLOAT32_C( -990.15), SIMDE_FLOAT32_C( 438.46)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -179.98), SIMDE_FLOAT32_C( 258.23), SIMDE_FLOAT32_C( 246.22), SIMDE_FLOAT32_C( 97.85), SIMDE_FLOAT32_C( 666.32), SIMDE_FLOAT32_C( 364.80), SIMDE_FLOAT32_C( 759.27), SIMDE_FLOAT32_C( -524.19), SIMDE_FLOAT32_C( -726.51), SIMDE_FLOAT32_C( 381.71), SIMDE_FLOAT32_C( 819.12), SIMDE_FLOAT32_C( 145.28), SIMDE_FLOAT32_C( -99.37), SIMDE_FLOAT32_C( -151.02), SIMDE_FLOAT32_C( 551.65), SIMDE_FLOAT32_C( 155.58)), simde_mm_set_ps(SIMDE_FLOAT32_C( -99.37), SIMDE_FLOAT32_C( -151.02), SIMDE_FLOAT32_C( 551.65), SIMDE_FLOAT32_C( 155.58)), simde_mm_set_ps(SIMDE_FLOAT32_C( -726.51), SIMDE_FLOAT32_C( 381.71), SIMDE_FLOAT32_C( 819.12), SIMDE_FLOAT32_C( 145.28)), simde_mm_set_ps(SIMDE_FLOAT32_C( 666.32), SIMDE_FLOAT32_C( 364.80), SIMDE_FLOAT32_C( 759.27), SIMDE_FLOAT32_C( -524.19)), simde_mm_set_ps(SIMDE_FLOAT32_C( -179.98), SIMDE_FLOAT32_C( 258.23), SIMDE_FLOAT32_C( 246.22), SIMDE_FLOAT32_C( 97.85)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 254.48), SIMDE_FLOAT32_C( -211.73), SIMDE_FLOAT32_C( 755.70), SIMDE_FLOAT32_C( 908.31), SIMDE_FLOAT32_C( -363.93), SIMDE_FLOAT32_C( -144.11), SIMDE_FLOAT32_C( 789.10), SIMDE_FLOAT32_C( -343.92), SIMDE_FLOAT32_C( 344.74), SIMDE_FLOAT32_C( 961.65), SIMDE_FLOAT32_C( 652.93), SIMDE_FLOAT32_C( 754.42), SIMDE_FLOAT32_C( 184.91), SIMDE_FLOAT32_C( -432.97), SIMDE_FLOAT32_C( -455.33), SIMDE_FLOAT32_C( 164.52)), simde_mm_set_ps(SIMDE_FLOAT32_C( 184.91), SIMDE_FLOAT32_C( -432.97), SIMDE_FLOAT32_C( -455.33), SIMDE_FLOAT32_C( 164.52)), simde_mm_set_ps(SIMDE_FLOAT32_C( 344.74), SIMDE_FLOAT32_C( 961.65), SIMDE_FLOAT32_C( 652.93), SIMDE_FLOAT32_C( 754.42)), simde_mm_set_ps(SIMDE_FLOAT32_C( -363.93), SIMDE_FLOAT32_C( -144.11), SIMDE_FLOAT32_C( 789.10), SIMDE_FLOAT32_C( -343.92)), simde_mm_set_ps(SIMDE_FLOAT32_C( 254.48), SIMDE_FLOAT32_C( -211.73), SIMDE_FLOAT32_C( 755.70), SIMDE_FLOAT32_C( 908.31)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -985.32), SIMDE_FLOAT32_C( 485.76), SIMDE_FLOAT32_C( 234.60), SIMDE_FLOAT32_C( 786.03), SIMDE_FLOAT32_C( 859.59), SIMDE_FLOAT32_C( 489.95), SIMDE_FLOAT32_C( -409.35), SIMDE_FLOAT32_C( 796.52), SIMDE_FLOAT32_C( -846.10), SIMDE_FLOAT32_C( -248.07), SIMDE_FLOAT32_C( -411.92), SIMDE_FLOAT32_C( -88.91), SIMDE_FLOAT32_C( 481.68), SIMDE_FLOAT32_C( 170.00), SIMDE_FLOAT32_C( -341.91), SIMDE_FLOAT32_C( 366.57)), simde_mm_set_ps(SIMDE_FLOAT32_C( 481.68), SIMDE_FLOAT32_C( 170.00), SIMDE_FLOAT32_C( -341.91), SIMDE_FLOAT32_C( 366.57)), simde_mm_set_ps(SIMDE_FLOAT32_C( -846.10), SIMDE_FLOAT32_C( -248.07), SIMDE_FLOAT32_C( -411.92), SIMDE_FLOAT32_C( -88.91)), simde_mm_set_ps(SIMDE_FLOAT32_C( 859.59), SIMDE_FLOAT32_C( 489.95), SIMDE_FLOAT32_C( -409.35), SIMDE_FLOAT32_C( 796.52)), simde_mm_set_ps(SIMDE_FLOAT32_C( -985.32), SIMDE_FLOAT32_C( 485.76), SIMDE_FLOAT32_C( 234.60), SIMDE_FLOAT32_C( 786.03)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r; r = simde_mm512_extractf32x4_ps(test_vec[i].a, 0); simde_assert_m128_close(r, test_vec[i].r0, 1); r = simde_mm512_extractf32x4_ps(test_vec[i].a, 1); simde_assert_m128_close(r, test_vec[i].r1, 1); r = simde_mm512_extractf32x4_ps(test_vec[i].a, 2); simde_assert_m128_close(r, test_vec[i].r2, 1); r = simde_mm512_extractf32x4_ps(test_vec[i].a, 3); simde_assert_m128_close(r, test_vec[i].r3, 1); } return 0; } static int test_simde_mm512_mask_extractf32x4_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 src; simde__mmask8 k; simde__m512 a; simde__m128 r0; simde__m128 r1; simde__m128 r2; simde__m128 r3; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -172.36), SIMDE_FLOAT32_C( 393.53), SIMDE_FLOAT32_C( 36.69), SIMDE_FLOAT32_C( -135.52)), UINT8_C( 25), simde_mm512_set_ps(SIMDE_FLOAT32_C( 903.50), SIMDE_FLOAT32_C( -43.35), SIMDE_FLOAT32_C( 309.91), SIMDE_FLOAT32_C( 846.15), SIMDE_FLOAT32_C( -514.56), SIMDE_FLOAT32_C( -860.98), SIMDE_FLOAT32_C( -280.30), SIMDE_FLOAT32_C( 128.51), SIMDE_FLOAT32_C( 522.06), SIMDE_FLOAT32_C( -932.28), SIMDE_FLOAT32_C( 600.12), SIMDE_FLOAT32_C( -491.12), SIMDE_FLOAT32_C( -139.11), SIMDE_FLOAT32_C( -268.86), SIMDE_FLOAT32_C( -71.72), SIMDE_FLOAT32_C( 98.47)), simde_mm_set_ps(SIMDE_FLOAT32_C( -139.11), SIMDE_FLOAT32_C( 393.53), SIMDE_FLOAT32_C( 36.69), SIMDE_FLOAT32_C( 98.47)), simde_mm_set_ps(SIMDE_FLOAT32_C( 522.06), SIMDE_FLOAT32_C( 393.53), SIMDE_FLOAT32_C( 36.69), SIMDE_FLOAT32_C( -491.12)), simde_mm_set_ps(SIMDE_FLOAT32_C( -514.56), SIMDE_FLOAT32_C( 393.53), SIMDE_FLOAT32_C( 36.69), SIMDE_FLOAT32_C( 128.51)), simde_mm_set_ps(SIMDE_FLOAT32_C( 903.50), SIMDE_FLOAT32_C( 393.53), SIMDE_FLOAT32_C( 36.69), SIMDE_FLOAT32_C( 846.15)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -895.71), SIMDE_FLOAT32_C( -736.92), SIMDE_FLOAT32_C( 283.06), SIMDE_FLOAT32_C( -333.94)), UINT8_C( 61), simde_mm512_set_ps(SIMDE_FLOAT32_C( 337.35), SIMDE_FLOAT32_C( -278.32), SIMDE_FLOAT32_C( -744.41), SIMDE_FLOAT32_C( 39.32), SIMDE_FLOAT32_C( 29.68), SIMDE_FLOAT32_C( -490.28), SIMDE_FLOAT32_C( 841.53), SIMDE_FLOAT32_C( 526.21), SIMDE_FLOAT32_C( -203.04), SIMDE_FLOAT32_C( -80.71), SIMDE_FLOAT32_C( 632.01), SIMDE_FLOAT32_C( 456.89), SIMDE_FLOAT32_C( 51.33), SIMDE_FLOAT32_C( -868.59), SIMDE_FLOAT32_C( -921.00), SIMDE_FLOAT32_C( -471.60)), simde_mm_set_ps(SIMDE_FLOAT32_C( 51.33), SIMDE_FLOAT32_C( -868.59), SIMDE_FLOAT32_C( 283.06), SIMDE_FLOAT32_C( -471.60)), simde_mm_set_ps(SIMDE_FLOAT32_C( -203.04), SIMDE_FLOAT32_C( -80.71), SIMDE_FLOAT32_C( 283.06), SIMDE_FLOAT32_C( 456.89)), simde_mm_set_ps(SIMDE_FLOAT32_C( 29.68), SIMDE_FLOAT32_C( -490.28), SIMDE_FLOAT32_C( 283.06), SIMDE_FLOAT32_C( 526.21)), simde_mm_set_ps(SIMDE_FLOAT32_C( 337.35), SIMDE_FLOAT32_C( -278.32), SIMDE_FLOAT32_C( 283.06), SIMDE_FLOAT32_C( 39.32)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 957.37), SIMDE_FLOAT32_C( -934.92), SIMDE_FLOAT32_C( -657.02), SIMDE_FLOAT32_C( -629.37)), UINT8_C(214), simde_mm512_set_ps(SIMDE_FLOAT32_C( -207.87), SIMDE_FLOAT32_C( -765.42), SIMDE_FLOAT32_C( 138.83), SIMDE_FLOAT32_C( 699.07), SIMDE_FLOAT32_C( -143.73), SIMDE_FLOAT32_C( 709.96), SIMDE_FLOAT32_C( -767.34), SIMDE_FLOAT32_C( -588.28), SIMDE_FLOAT32_C( 586.29), SIMDE_FLOAT32_C( -760.88), SIMDE_FLOAT32_C( -617.12), SIMDE_FLOAT32_C( -751.58), SIMDE_FLOAT32_C( 907.23), SIMDE_FLOAT32_C( -359.60), SIMDE_FLOAT32_C( -213.75), SIMDE_FLOAT32_C( 403.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( 957.37), SIMDE_FLOAT32_C( -359.60), SIMDE_FLOAT32_C( -213.75), SIMDE_FLOAT32_C( -629.37)), simde_mm_set_ps(SIMDE_FLOAT32_C( 957.37), SIMDE_FLOAT32_C( -760.88), SIMDE_FLOAT32_C( -617.12), SIMDE_FLOAT32_C( -629.37)), simde_mm_set_ps(SIMDE_FLOAT32_C( 957.37), SIMDE_FLOAT32_C( 709.96), SIMDE_FLOAT32_C( -767.34), SIMDE_FLOAT32_C( -629.37)), simde_mm_set_ps(SIMDE_FLOAT32_C( 957.37), SIMDE_FLOAT32_C( -765.42), SIMDE_FLOAT32_C( 138.83), SIMDE_FLOAT32_C( -629.37)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 799.57), SIMDE_FLOAT32_C( -820.22), SIMDE_FLOAT32_C( -959.11), SIMDE_FLOAT32_C( 268.99)), UINT8_C(196), simde_mm512_set_ps(SIMDE_FLOAT32_C( -659.70), SIMDE_FLOAT32_C( 493.30), SIMDE_FLOAT32_C( 831.29), SIMDE_FLOAT32_C( -619.50), SIMDE_FLOAT32_C( 952.47), SIMDE_FLOAT32_C( -492.61), SIMDE_FLOAT32_C( -68.16), SIMDE_FLOAT32_C( 717.69), SIMDE_FLOAT32_C( -663.74), SIMDE_FLOAT32_C( 179.29), SIMDE_FLOAT32_C( 989.70), SIMDE_FLOAT32_C( -695.21), SIMDE_FLOAT32_C( -786.23), SIMDE_FLOAT32_C( 873.30), SIMDE_FLOAT32_C( 241.45), SIMDE_FLOAT32_C( -432.13)), simde_mm_set_ps(SIMDE_FLOAT32_C( 799.57), SIMDE_FLOAT32_C( 873.30), SIMDE_FLOAT32_C( -959.11), SIMDE_FLOAT32_C( 268.99)), simde_mm_set_ps(SIMDE_FLOAT32_C( 799.57), SIMDE_FLOAT32_C( 179.29), SIMDE_FLOAT32_C( -959.11), SIMDE_FLOAT32_C( 268.99)), simde_mm_set_ps(SIMDE_FLOAT32_C( 799.57), SIMDE_FLOAT32_C( -492.61), SIMDE_FLOAT32_C( -959.11), SIMDE_FLOAT32_C( 268.99)), simde_mm_set_ps(SIMDE_FLOAT32_C( 799.57), SIMDE_FLOAT32_C( 493.30), SIMDE_FLOAT32_C( -959.11), SIMDE_FLOAT32_C( 268.99)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -789.54), SIMDE_FLOAT32_C( -790.16), SIMDE_FLOAT32_C( -415.61), SIMDE_FLOAT32_C( 994.61)), UINT8_C( 8), simde_mm512_set_ps(SIMDE_FLOAT32_C( -388.47), SIMDE_FLOAT32_C( -643.43), SIMDE_FLOAT32_C( -331.34), SIMDE_FLOAT32_C( 72.67), SIMDE_FLOAT32_C( -870.79), SIMDE_FLOAT32_C( -722.44), SIMDE_FLOAT32_C( 529.44), SIMDE_FLOAT32_C( -949.73), SIMDE_FLOAT32_C( 280.87), SIMDE_FLOAT32_C( 380.83), SIMDE_FLOAT32_C( -236.67), SIMDE_FLOAT32_C( -211.91), SIMDE_FLOAT32_C( -925.76), SIMDE_FLOAT32_C( -915.62), SIMDE_FLOAT32_C( -30.05), SIMDE_FLOAT32_C( -70.79)), simde_mm_set_ps(SIMDE_FLOAT32_C( -925.76), SIMDE_FLOAT32_C( -790.16), SIMDE_FLOAT32_C( -415.61), SIMDE_FLOAT32_C( 994.61)), simde_mm_set_ps(SIMDE_FLOAT32_C( 280.87), SIMDE_FLOAT32_C( -790.16), SIMDE_FLOAT32_C( -415.61), SIMDE_FLOAT32_C( 994.61)), simde_mm_set_ps(SIMDE_FLOAT32_C( -870.79), SIMDE_FLOAT32_C( -790.16), SIMDE_FLOAT32_C( -415.61), SIMDE_FLOAT32_C( 994.61)), simde_mm_set_ps(SIMDE_FLOAT32_C( -388.47), SIMDE_FLOAT32_C( -790.16), SIMDE_FLOAT32_C( -415.61), SIMDE_FLOAT32_C( 994.61)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -352.24), SIMDE_FLOAT32_C( -479.79), SIMDE_FLOAT32_C( 602.83), SIMDE_FLOAT32_C( 2.55)), UINT8_C( 60), simde_mm512_set_ps(SIMDE_FLOAT32_C( -607.82), SIMDE_FLOAT32_C( 296.47), SIMDE_FLOAT32_C( -327.04), SIMDE_FLOAT32_C( -23.06), SIMDE_FLOAT32_C( -95.21), SIMDE_FLOAT32_C( 10.75), SIMDE_FLOAT32_C( -668.43), SIMDE_FLOAT32_C( -210.00), SIMDE_FLOAT32_C( 915.68), SIMDE_FLOAT32_C( -53.79), SIMDE_FLOAT32_C( 703.31), SIMDE_FLOAT32_C( 930.79), SIMDE_FLOAT32_C( 111.33), SIMDE_FLOAT32_C( -176.75), SIMDE_FLOAT32_C( -316.94), SIMDE_FLOAT32_C( 639.68)), simde_mm_set_ps(SIMDE_FLOAT32_C( 111.33), SIMDE_FLOAT32_C( -176.75), SIMDE_FLOAT32_C( 602.83), SIMDE_FLOAT32_C( 2.55)), simde_mm_set_ps(SIMDE_FLOAT32_C( 915.68), SIMDE_FLOAT32_C( -53.79), SIMDE_FLOAT32_C( 602.83), SIMDE_FLOAT32_C( 2.55)), simde_mm_set_ps(SIMDE_FLOAT32_C( -95.21), SIMDE_FLOAT32_C( 10.75), SIMDE_FLOAT32_C( 602.83), SIMDE_FLOAT32_C( 2.55)), simde_mm_set_ps(SIMDE_FLOAT32_C( -607.82), SIMDE_FLOAT32_C( 296.47), SIMDE_FLOAT32_C( 602.83), SIMDE_FLOAT32_C( 2.55)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -334.42), SIMDE_FLOAT32_C( 660.53), SIMDE_FLOAT32_C( 748.73), SIMDE_FLOAT32_C( 996.15)), UINT8_C( 47), simde_mm512_set_ps(SIMDE_FLOAT32_C( 383.31), SIMDE_FLOAT32_C( 641.22), SIMDE_FLOAT32_C( -747.07), SIMDE_FLOAT32_C( -762.67), SIMDE_FLOAT32_C( 744.11), SIMDE_FLOAT32_C( 350.11), SIMDE_FLOAT32_C( 409.27), SIMDE_FLOAT32_C( 481.83), SIMDE_FLOAT32_C( 601.37), SIMDE_FLOAT32_C( -660.24), SIMDE_FLOAT32_C( -675.56), SIMDE_FLOAT32_C( -194.09), SIMDE_FLOAT32_C( 149.22), SIMDE_FLOAT32_C( 161.52), SIMDE_FLOAT32_C( 632.78), SIMDE_FLOAT32_C( 346.90)), simde_mm_set_ps(SIMDE_FLOAT32_C( 149.22), SIMDE_FLOAT32_C( 161.52), SIMDE_FLOAT32_C( 632.78), SIMDE_FLOAT32_C( 346.90)), simde_mm_set_ps(SIMDE_FLOAT32_C( 601.37), SIMDE_FLOAT32_C( -660.24), SIMDE_FLOAT32_C( -675.56), SIMDE_FLOAT32_C( -194.09)), simde_mm_set_ps(SIMDE_FLOAT32_C( 744.11), SIMDE_FLOAT32_C( 350.11), SIMDE_FLOAT32_C( 409.27), SIMDE_FLOAT32_C( 481.83)), simde_mm_set_ps(SIMDE_FLOAT32_C( 383.31), SIMDE_FLOAT32_C( 641.22), SIMDE_FLOAT32_C( -747.07), SIMDE_FLOAT32_C( -762.67)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 122.69), SIMDE_FLOAT32_C( 65.13), SIMDE_FLOAT32_C( -972.27), SIMDE_FLOAT32_C( 628.22)), UINT8_C(171), simde_mm512_set_ps(SIMDE_FLOAT32_C( -653.71), SIMDE_FLOAT32_C( 371.73), SIMDE_FLOAT32_C( 757.18), SIMDE_FLOAT32_C( 214.84), SIMDE_FLOAT32_C( 830.24), SIMDE_FLOAT32_C( 903.53), SIMDE_FLOAT32_C( -831.08), SIMDE_FLOAT32_C( 815.07), SIMDE_FLOAT32_C( 196.06), SIMDE_FLOAT32_C( -83.06), SIMDE_FLOAT32_C( 687.82), SIMDE_FLOAT32_C( -517.82), SIMDE_FLOAT32_C( -294.36), SIMDE_FLOAT32_C( 702.71), SIMDE_FLOAT32_C( -920.22), SIMDE_FLOAT32_C( -923.04)), simde_mm_set_ps(SIMDE_FLOAT32_C( -294.36), SIMDE_FLOAT32_C( 65.13), SIMDE_FLOAT32_C( -920.22), SIMDE_FLOAT32_C( -923.04)), simde_mm_set_ps(SIMDE_FLOAT32_C( 196.06), SIMDE_FLOAT32_C( 65.13), SIMDE_FLOAT32_C( 687.82), SIMDE_FLOAT32_C( -517.82)), simde_mm_set_ps(SIMDE_FLOAT32_C( 830.24), SIMDE_FLOAT32_C( 65.13), SIMDE_FLOAT32_C( -831.08), SIMDE_FLOAT32_C( 815.07)), simde_mm_set_ps(SIMDE_FLOAT32_C( -653.71), SIMDE_FLOAT32_C( 65.13), SIMDE_FLOAT32_C( 757.18), SIMDE_FLOAT32_C( 214.84)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r; r = simde_mm512_mask_extractf32x4_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a, 0); simde_assert_m128_close(r, test_vec[i].r0, 1); r = simde_mm512_mask_extractf32x4_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a, 1); simde_assert_m128_close(r, test_vec[i].r1, 1); r = simde_mm512_mask_extractf32x4_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a, 2); simde_assert_m128_close(r, test_vec[i].r2, 1); r = simde_mm512_mask_extractf32x4_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a, 3); simde_assert_m128_close(r, test_vec[i].r3, 1); } return 0; } static int test_simde_mm512_maskz_extractf32x4_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512 a; simde__m128 r0; simde__m128 r1; simde__m128 r2; simde__m128 r3; } test_vec[8] = { { UINT8_C( 63), simde_mm512_set_ps(SIMDE_FLOAT32_C( 522.06), SIMDE_FLOAT32_C( 160.98), SIMDE_FLOAT32_C( -932.28), SIMDE_FLOAT32_C( 391.82), SIMDE_FLOAT32_C( 600.12), SIMDE_FLOAT32_C( -569.99), SIMDE_FLOAT32_C( -491.12), SIMDE_FLOAT32_C( -327.63), SIMDE_FLOAT32_C( -139.11), SIMDE_FLOAT32_C( -172.36), SIMDE_FLOAT32_C( -268.86), SIMDE_FLOAT32_C( 393.53), SIMDE_FLOAT32_C( -71.72), SIMDE_FLOAT32_C( 36.69), SIMDE_FLOAT32_C( 98.47), SIMDE_FLOAT32_C( -135.52)), simde_mm_set_ps(SIMDE_FLOAT32_C( -71.72), SIMDE_FLOAT32_C( 36.69), SIMDE_FLOAT32_C( 98.47), SIMDE_FLOAT32_C( -135.52)), simde_mm_set_ps(SIMDE_FLOAT32_C( -139.11), SIMDE_FLOAT32_C( -172.36), SIMDE_FLOAT32_C( -268.86), SIMDE_FLOAT32_C( 393.53)), simde_mm_set_ps(SIMDE_FLOAT32_C( 600.12), SIMDE_FLOAT32_C( -569.99), SIMDE_FLOAT32_C( -491.12), SIMDE_FLOAT32_C( -327.63)), simde_mm_set_ps(SIMDE_FLOAT32_C( 522.06), SIMDE_FLOAT32_C( 160.98), SIMDE_FLOAT32_C( -932.28), SIMDE_FLOAT32_C( 391.82)) }, { UINT8_C(157), simde_mm512_set_ps(SIMDE_FLOAT32_C( 483.08), SIMDE_FLOAT32_C( 903.50), SIMDE_FLOAT32_C( 232.04), SIMDE_FLOAT32_C( -43.35), SIMDE_FLOAT32_C( 774.81), SIMDE_FLOAT32_C( 309.91), SIMDE_FLOAT32_C( -599.01), SIMDE_FLOAT32_C( 846.15), SIMDE_FLOAT32_C( 69.04), SIMDE_FLOAT32_C( -514.56), SIMDE_FLOAT32_C( -149.02), SIMDE_FLOAT32_C( -860.98), SIMDE_FLOAT32_C( 240.79), SIMDE_FLOAT32_C( -280.30), SIMDE_FLOAT32_C( -839.80), SIMDE_FLOAT32_C( 128.51)), simde_mm_set_ps(SIMDE_FLOAT32_C( 240.79), SIMDE_FLOAT32_C( -280.30), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 128.51)), simde_mm_set_ps(SIMDE_FLOAT32_C( 69.04), SIMDE_FLOAT32_C( -514.56), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -860.98)), simde_mm_set_ps(SIMDE_FLOAT32_C( 774.81), SIMDE_FLOAT32_C( 309.91), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 846.15)), simde_mm_set_ps(SIMDE_FLOAT32_C( 483.08), SIMDE_FLOAT32_C( 903.50), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -43.35)) }, { UINT8_C( 33), simde_mm512_set_ps(SIMDE_FLOAT32_C( -835.53), SIMDE_FLOAT32_C( -203.04), SIMDE_FLOAT32_C( 571.79), SIMDE_FLOAT32_C( -80.71), SIMDE_FLOAT32_C( 675.92), SIMDE_FLOAT32_C( 632.01), SIMDE_FLOAT32_C( 490.41), SIMDE_FLOAT32_C( 456.89), SIMDE_FLOAT32_C( 47.59), SIMDE_FLOAT32_C( 51.33), SIMDE_FLOAT32_C( -895.71), SIMDE_FLOAT32_C( -868.59), SIMDE_FLOAT32_C( -736.92), SIMDE_FLOAT32_C( -921.00), SIMDE_FLOAT32_C( 283.06), SIMDE_FLOAT32_C( -471.60)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -471.60)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -868.59)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 456.89)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -80.71)) }, { UINT8_C(176), simde_mm512_set_ps(SIMDE_FLOAT32_C( -629.37), SIMDE_FLOAT32_C( -198.67), SIMDE_FLOAT32_C( 337.35), SIMDE_FLOAT32_C( 447.98), SIMDE_FLOAT32_C( -278.32), SIMDE_FLOAT32_C( -925.69), SIMDE_FLOAT32_C( -744.41), SIMDE_FLOAT32_C( 717.83), SIMDE_FLOAT32_C( 39.32), SIMDE_FLOAT32_C( -489.88), SIMDE_FLOAT32_C( 29.68), SIMDE_FLOAT32_C( -37.49), SIMDE_FLOAT32_C( -490.28), SIMDE_FLOAT32_C( -373.66), SIMDE_FLOAT32_C( 841.53), SIMDE_FLOAT32_C( -292.35)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { UINT8_C(169), simde_mm512_set_ps(SIMDE_FLOAT32_C( -588.28), SIMDE_FLOAT32_C( 846.67), SIMDE_FLOAT32_C( 586.29), SIMDE_FLOAT32_C( 670.52), SIMDE_FLOAT32_C( -760.88), SIMDE_FLOAT32_C( 149.72), SIMDE_FLOAT32_C( -617.12), SIMDE_FLOAT32_C( 213.24), SIMDE_FLOAT32_C( -751.58), SIMDE_FLOAT32_C( -577.36), SIMDE_FLOAT32_C( 907.23), SIMDE_FLOAT32_C( 957.37), SIMDE_FLOAT32_C( -359.60), SIMDE_FLOAT32_C( -934.92), SIMDE_FLOAT32_C( -213.75), SIMDE_FLOAT32_C( -657.02)), simde_mm_set_ps(SIMDE_FLOAT32_C( -359.60), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -657.02)), simde_mm_set_ps(SIMDE_FLOAT32_C( -751.58), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 957.37)), simde_mm_set_ps(SIMDE_FLOAT32_C( -760.88), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 213.24)), simde_mm_set_ps(SIMDE_FLOAT32_C( -588.28), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 670.52)) }, { UINT8_C( 52), simde_mm512_set_ps(SIMDE_FLOAT32_C( -432.13), SIMDE_FLOAT32_C( 268.99), SIMDE_FLOAT32_C( -842.15), SIMDE_FLOAT32_C( -207.87), SIMDE_FLOAT32_C( 908.84), SIMDE_FLOAT32_C( -765.42), SIMDE_FLOAT32_C( -315.78), SIMDE_FLOAT32_C( 138.83), SIMDE_FLOAT32_C( -86.06), SIMDE_FLOAT32_C( 699.07), SIMDE_FLOAT32_C( -413.85), SIMDE_FLOAT32_C( -143.73), SIMDE_FLOAT32_C( 752.26), SIMDE_FLOAT32_C( 709.96), SIMDE_FLOAT32_C( 609.29), SIMDE_FLOAT32_C( -767.34)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 709.96), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 699.07), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -765.42), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 268.99), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { UINT8_C(217), simde_mm512_set_ps(SIMDE_FLOAT32_C( 180.78), SIMDE_FLOAT32_C( 717.69), SIMDE_FLOAT32_C( -289.23), SIMDE_FLOAT32_C( -663.74), SIMDE_FLOAT32_C( 918.52), SIMDE_FLOAT32_C( 179.29), SIMDE_FLOAT32_C( -422.76), SIMDE_FLOAT32_C( 989.70), SIMDE_FLOAT32_C( -433.33), SIMDE_FLOAT32_C( -695.21), SIMDE_FLOAT32_C( 48.49), SIMDE_FLOAT32_C( -786.23), SIMDE_FLOAT32_C( 799.57), SIMDE_FLOAT32_C( 873.30), SIMDE_FLOAT32_C( -820.22), SIMDE_FLOAT32_C( 241.45)), simde_mm_set_ps(SIMDE_FLOAT32_C( 799.57), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 241.45)), simde_mm_set_ps(SIMDE_FLOAT32_C( -433.33), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -786.23)), simde_mm_set_ps(SIMDE_FLOAT32_C( 918.52), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 989.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( 180.78), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -663.74)) }, { UINT8_C(237), simde_mm512_set_ps(SIMDE_FLOAT32_C( -415.61), SIMDE_FLOAT32_C( -70.79), SIMDE_FLOAT32_C( 994.61), SIMDE_FLOAT32_C( 493.65), SIMDE_FLOAT32_C( -659.70), SIMDE_FLOAT32_C( 52.79), SIMDE_FLOAT32_C( 493.30), SIMDE_FLOAT32_C( 835.54), SIMDE_FLOAT32_C( 831.29), SIMDE_FLOAT32_C( -712.24), SIMDE_FLOAT32_C( -619.50), SIMDE_FLOAT32_C( 518.12), SIMDE_FLOAT32_C( 952.47), SIMDE_FLOAT32_C( -173.80), SIMDE_FLOAT32_C( -492.61), SIMDE_FLOAT32_C( 487.08)), simde_mm_set_ps(SIMDE_FLOAT32_C( 952.47), SIMDE_FLOAT32_C( -173.80), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 487.08)), simde_mm_set_ps(SIMDE_FLOAT32_C( 831.29), SIMDE_FLOAT32_C( -712.24), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 518.12)), simde_mm_set_ps(SIMDE_FLOAT32_C( -659.70), SIMDE_FLOAT32_C( 52.79), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 835.54)), simde_mm_set_ps(SIMDE_FLOAT32_C( -415.61), SIMDE_FLOAT32_C( -70.79), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 493.65)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r; r = simde_mm512_maskz_extractf32x4_ps(test_vec[i].k, test_vec[i].a, 0); simde_assert_m128_close(r, test_vec[i].r0, 1); r = simde_mm512_maskz_extractf32x4_ps(test_vec[i].k, test_vec[i].a, 1); simde_assert_m128_close(r, test_vec[i].r1, 1); r = simde_mm512_maskz_extractf32x4_ps(test_vec[i].k, test_vec[i].a, 2); simde_assert_m128_close(r, test_vec[i].r2, 1); r = simde_mm512_maskz_extractf32x4_ps(test_vec[i].k, test_vec[i].a, 3); simde_assert_m128_close(r, test_vec[i].r3, 1); } return 0; } static int test_simde_mm512_extractf64x4_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d a; simde__m256d r0; simde__m256d r1; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( -431.26), SIMDE_FLOAT64_C( 481.25), SIMDE_FLOAT64_C( -57.75), SIMDE_FLOAT64_C( -784.26), SIMDE_FLOAT64_C( 438.04), SIMDE_FLOAT64_C( 549.03), SIMDE_FLOAT64_C( 729.46), SIMDE_FLOAT64_C( 582.53)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 438.04), SIMDE_FLOAT64_C( 549.03), SIMDE_FLOAT64_C( 729.46), SIMDE_FLOAT64_C( 582.53)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -431.26), SIMDE_FLOAT64_C( 481.25), SIMDE_FLOAT64_C( -57.75), SIMDE_FLOAT64_C( -784.26)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -563.83), SIMDE_FLOAT64_C( 799.30), SIMDE_FLOAT64_C( 938.85), SIMDE_FLOAT64_C( -576.01), SIMDE_FLOAT64_C( -465.05), SIMDE_FLOAT64_C( 439.15), SIMDE_FLOAT64_C( -104.57), SIMDE_FLOAT64_C( -28.15)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -465.05), SIMDE_FLOAT64_C( 439.15), SIMDE_FLOAT64_C( -104.57), SIMDE_FLOAT64_C( -28.15)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -563.83), SIMDE_FLOAT64_C( 799.30), SIMDE_FLOAT64_C( 938.85), SIMDE_FLOAT64_C( -576.01)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 355.11), SIMDE_FLOAT64_C( -787.72), SIMDE_FLOAT64_C( 472.82), SIMDE_FLOAT64_C( -703.51), SIMDE_FLOAT64_C( -202.49), SIMDE_FLOAT64_C( -470.36), SIMDE_FLOAT64_C( 966.37), SIMDE_FLOAT64_C( 135.20)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -202.49), SIMDE_FLOAT64_C( -470.36), SIMDE_FLOAT64_C( 966.37), SIMDE_FLOAT64_C( 135.20)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 355.11), SIMDE_FLOAT64_C( -787.72), SIMDE_FLOAT64_C( 472.82), SIMDE_FLOAT64_C( -703.51)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 120.10), SIMDE_FLOAT64_C( -64.06), SIMDE_FLOAT64_C( -620.03), SIMDE_FLOAT64_C( 559.81), SIMDE_FLOAT64_C( 185.23), SIMDE_FLOAT64_C( -423.61), SIMDE_FLOAT64_C( -11.91), SIMDE_FLOAT64_C( 407.56)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 185.23), SIMDE_FLOAT64_C( -423.61), SIMDE_FLOAT64_C( -11.91), SIMDE_FLOAT64_C( 407.56)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 120.10), SIMDE_FLOAT64_C( -64.06), SIMDE_FLOAT64_C( -620.03), SIMDE_FLOAT64_C( 559.81)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -859.51), SIMDE_FLOAT64_C( -69.45), SIMDE_FLOAT64_C( 40.36), SIMDE_FLOAT64_C( 95.61), SIMDE_FLOAT64_C( -743.10), SIMDE_FLOAT64_C( -688.01), SIMDE_FLOAT64_C( 442.76), SIMDE_FLOAT64_C( 931.17)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -743.10), SIMDE_FLOAT64_C( -688.01), SIMDE_FLOAT64_C( 442.76), SIMDE_FLOAT64_C( 931.17)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -859.51), SIMDE_FLOAT64_C( -69.45), SIMDE_FLOAT64_C( 40.36), SIMDE_FLOAT64_C( 95.61)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 193.01), SIMDE_FLOAT64_C( -435.27), SIMDE_FLOAT64_C( -84.06), SIMDE_FLOAT64_C( 298.40), SIMDE_FLOAT64_C( 208.07), SIMDE_FLOAT64_C( -94.60), SIMDE_FLOAT64_C( 834.28), SIMDE_FLOAT64_C( 260.50)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 208.07), SIMDE_FLOAT64_C( -94.60), SIMDE_FLOAT64_C( 834.28), SIMDE_FLOAT64_C( 260.50)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 193.01), SIMDE_FLOAT64_C( -435.27), SIMDE_FLOAT64_C( -84.06), SIMDE_FLOAT64_C( 298.40)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -98.08), SIMDE_FLOAT64_C( 464.19), SIMDE_FLOAT64_C( 711.12), SIMDE_FLOAT64_C( 282.83), SIMDE_FLOAT64_C( -774.08), SIMDE_FLOAT64_C( 841.24), SIMDE_FLOAT64_C( -414.07), SIMDE_FLOAT64_C( 79.76)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -774.08), SIMDE_FLOAT64_C( 841.24), SIMDE_FLOAT64_C( -414.07), SIMDE_FLOAT64_C( 79.76)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -98.08), SIMDE_FLOAT64_C( 464.19), SIMDE_FLOAT64_C( 711.12), SIMDE_FLOAT64_C( 282.83)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 664.52), SIMDE_FLOAT64_C( -224.13), SIMDE_FLOAT64_C( 633.65), SIMDE_FLOAT64_C( -834.15), SIMDE_FLOAT64_C( -157.33), SIMDE_FLOAT64_C( -819.46), SIMDE_FLOAT64_C( 541.44), SIMDE_FLOAT64_C( 112.81)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -157.33), SIMDE_FLOAT64_C( -819.46), SIMDE_FLOAT64_C( 541.44), SIMDE_FLOAT64_C( 112.81)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 664.52), SIMDE_FLOAT64_C( -224.13), SIMDE_FLOAT64_C( 633.65), SIMDE_FLOAT64_C( -834.15)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r; r = simde_mm512_extractf64x4_pd(test_vec[i].a, 0); simde_assert_m256d_close(r, test_vec[i].r0, 1); r = simde_mm512_extractf64x4_pd(test_vec[i].a, 1); simde_assert_m256d_close(r, test_vec[i].r1, 1); } return 0; } static int test_simde_mm512_mask_extractf64x4_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d src; simde__mmask8 k; simde__m512d a; simde__m256d r0; simde__m256d r1; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -172.36), SIMDE_FLOAT64_C( 393.53), SIMDE_FLOAT64_C( 36.69), SIMDE_FLOAT64_C( -135.52)), UINT8_C( 63), simde_mm512_set_pd(SIMDE_FLOAT64_C( 522.06), SIMDE_FLOAT64_C( -932.28), SIMDE_FLOAT64_C( 600.12), SIMDE_FLOAT64_C( -491.12), SIMDE_FLOAT64_C( -139.11), SIMDE_FLOAT64_C( -268.86), SIMDE_FLOAT64_C( -71.72), SIMDE_FLOAT64_C( 98.47)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -139.11), SIMDE_FLOAT64_C( -268.86), SIMDE_FLOAT64_C( -71.72), SIMDE_FLOAT64_C( 98.47)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 522.06), SIMDE_FLOAT64_C( -932.28), SIMDE_FLOAT64_C( 600.12), SIMDE_FLOAT64_C( -491.12)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -514.56), SIMDE_FLOAT64_C( -860.98), SIMDE_FLOAT64_C( -280.30), SIMDE_FLOAT64_C( 128.51)), UINT8_C(157), simde_mm512_set_pd(SIMDE_FLOAT64_C( 483.08), SIMDE_FLOAT64_C( 232.04), SIMDE_FLOAT64_C( 774.81), SIMDE_FLOAT64_C( -599.01), SIMDE_FLOAT64_C( 69.04), SIMDE_FLOAT64_C( -149.02), SIMDE_FLOAT64_C( 240.79), SIMDE_FLOAT64_C( -839.80)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 69.04), SIMDE_FLOAT64_C( -149.02), SIMDE_FLOAT64_C( -280.30), SIMDE_FLOAT64_C( -839.80)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 483.08), SIMDE_FLOAT64_C( 232.04), SIMDE_FLOAT64_C( -280.30), SIMDE_FLOAT64_C( -599.01)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 51.33), SIMDE_FLOAT64_C( -868.59), SIMDE_FLOAT64_C( -921.00), SIMDE_FLOAT64_C( -471.60)), UINT8_C( 33), simde_mm512_set_pd(SIMDE_FLOAT64_C( -835.53), SIMDE_FLOAT64_C( 571.79), SIMDE_FLOAT64_C( 675.92), SIMDE_FLOAT64_C( 490.41), SIMDE_FLOAT64_C( 47.59), SIMDE_FLOAT64_C( -895.71), SIMDE_FLOAT64_C( -736.92), SIMDE_FLOAT64_C( 283.06)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 51.33), SIMDE_FLOAT64_C( -868.59), SIMDE_FLOAT64_C( -921.00), SIMDE_FLOAT64_C( 283.06)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 51.33), SIMDE_FLOAT64_C( -868.59), SIMDE_FLOAT64_C( -921.00), SIMDE_FLOAT64_C( 490.41)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -489.88), SIMDE_FLOAT64_C( -37.49), SIMDE_FLOAT64_C( -373.66), SIMDE_FLOAT64_C( -292.35)), UINT8_C(176), simde_mm512_set_pd(SIMDE_FLOAT64_C( -629.37), SIMDE_FLOAT64_C( 337.35), SIMDE_FLOAT64_C( -278.32), SIMDE_FLOAT64_C( -744.41), SIMDE_FLOAT64_C( 39.32), SIMDE_FLOAT64_C( 29.68), SIMDE_FLOAT64_C( -490.28), SIMDE_FLOAT64_C( 841.53)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -489.88), SIMDE_FLOAT64_C( -37.49), SIMDE_FLOAT64_C( -373.66), SIMDE_FLOAT64_C( -292.35)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -489.88), SIMDE_FLOAT64_C( -37.49), SIMDE_FLOAT64_C( -373.66), SIMDE_FLOAT64_C( -292.35)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -577.36), SIMDE_FLOAT64_C( 957.37), SIMDE_FLOAT64_C( -934.92), SIMDE_FLOAT64_C( -657.02)), UINT8_C(169), simde_mm512_set_pd(SIMDE_FLOAT64_C( -588.28), SIMDE_FLOAT64_C( 586.29), SIMDE_FLOAT64_C( -760.88), SIMDE_FLOAT64_C( -617.12), SIMDE_FLOAT64_C( -751.58), SIMDE_FLOAT64_C( 907.23), SIMDE_FLOAT64_C( -359.60), SIMDE_FLOAT64_C( -213.75)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -751.58), SIMDE_FLOAT64_C( 957.37), SIMDE_FLOAT64_C( -934.92), SIMDE_FLOAT64_C( -213.75)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -588.28), SIMDE_FLOAT64_C( 957.37), SIMDE_FLOAT64_C( -934.92), SIMDE_FLOAT64_C( -617.12)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 699.07), SIMDE_FLOAT64_C( -143.73), SIMDE_FLOAT64_C( 709.96), SIMDE_FLOAT64_C( -767.34)), UINT8_C( 52), simde_mm512_set_pd(SIMDE_FLOAT64_C( -432.13), SIMDE_FLOAT64_C( -842.15), SIMDE_FLOAT64_C( 908.84), SIMDE_FLOAT64_C( -315.78), SIMDE_FLOAT64_C( -86.06), SIMDE_FLOAT64_C( -413.85), SIMDE_FLOAT64_C( 752.26), SIMDE_FLOAT64_C( 609.29)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 699.07), SIMDE_FLOAT64_C( -413.85), SIMDE_FLOAT64_C( 709.96), SIMDE_FLOAT64_C( -767.34)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 699.07), SIMDE_FLOAT64_C( -842.15), SIMDE_FLOAT64_C( 709.96), SIMDE_FLOAT64_C( -767.34)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -695.21), SIMDE_FLOAT64_C( -786.23), SIMDE_FLOAT64_C( 873.30), SIMDE_FLOAT64_C( 241.45)), UINT8_C(217), simde_mm512_set_pd(SIMDE_FLOAT64_C( 180.78), SIMDE_FLOAT64_C( -289.23), SIMDE_FLOAT64_C( 918.52), SIMDE_FLOAT64_C( -422.76), SIMDE_FLOAT64_C( -433.33), SIMDE_FLOAT64_C( 48.49), SIMDE_FLOAT64_C( 799.57), SIMDE_FLOAT64_C( -820.22)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -433.33), SIMDE_FLOAT64_C( -786.23), SIMDE_FLOAT64_C( 873.30), SIMDE_FLOAT64_C( -820.22)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 180.78), SIMDE_FLOAT64_C( -786.23), SIMDE_FLOAT64_C( 873.30), SIMDE_FLOAT64_C( -422.76)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -712.24), SIMDE_FLOAT64_C( 518.12), SIMDE_FLOAT64_C( -173.80), SIMDE_FLOAT64_C( 487.08)), UINT8_C(237), simde_mm512_set_pd(SIMDE_FLOAT64_C( -415.61), SIMDE_FLOAT64_C( 994.61), SIMDE_FLOAT64_C( -659.70), SIMDE_FLOAT64_C( 493.30), SIMDE_FLOAT64_C( 831.29), SIMDE_FLOAT64_C( -619.50), SIMDE_FLOAT64_C( 952.47), SIMDE_FLOAT64_C( -492.61)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 831.29), SIMDE_FLOAT64_C( -619.50), SIMDE_FLOAT64_C( -173.80), SIMDE_FLOAT64_C( -492.61)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -415.61), SIMDE_FLOAT64_C( 994.61), SIMDE_FLOAT64_C( -173.80), SIMDE_FLOAT64_C( 493.30)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r; r = simde_mm512_mask_extractf64x4_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a, 0); simde_assert_m256d_close(r, test_vec[i].r0, 1); r = simde_mm512_mask_extractf64x4_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a, 1); simde_assert_m256d_close(r, test_vec[i].r1, 1); } return 0; } static int test_simde_mm512_maskz_extractf64x4_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512d a; simde__m256d r0; simde__m256d r1; } test_vec[8] = { { UINT8_C( 21), simde_mm512_set_pd(SIMDE_FLOAT64_C( -139.11), SIMDE_FLOAT64_C( -172.36), SIMDE_FLOAT64_C( -268.86), SIMDE_FLOAT64_C( 393.53), SIMDE_FLOAT64_C( -71.72), SIMDE_FLOAT64_C( 36.69), SIMDE_FLOAT64_C( 98.47), SIMDE_FLOAT64_C( -135.52)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 36.69), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -135.52)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -172.36), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 393.53)) }, { UINT8_C(150), simde_mm512_set_pd(SIMDE_FLOAT64_C( -556.90), SIMDE_FLOAT64_C( 522.06), SIMDE_FLOAT64_C( 160.98), SIMDE_FLOAT64_C( -932.28), SIMDE_FLOAT64_C( 391.82), SIMDE_FLOAT64_C( 600.12), SIMDE_FLOAT64_C( -569.99), SIMDE_FLOAT64_C( -491.12)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 600.12), SIMDE_FLOAT64_C( -569.99), SIMDE_FLOAT64_C( 0.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 522.06), SIMDE_FLOAT64_C( 160.98), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(132), simde_mm512_set_pd(SIMDE_FLOAT64_C( 846.15), SIMDE_FLOAT64_C( 69.04), SIMDE_FLOAT64_C( -514.56), SIMDE_FLOAT64_C( -149.02), SIMDE_FLOAT64_C( -860.98), SIMDE_FLOAT64_C( 240.79), SIMDE_FLOAT64_C( -280.30), SIMDE_FLOAT64_C( -839.80)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 240.79), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 69.04), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(158), simde_mm512_set_pd(SIMDE_FLOAT64_C( -471.60), SIMDE_FLOAT64_C( -333.94), SIMDE_FLOAT64_C( 483.08), SIMDE_FLOAT64_C( 903.50), SIMDE_FLOAT64_C( 232.04), SIMDE_FLOAT64_C( -43.35), SIMDE_FLOAT64_C( 774.81), SIMDE_FLOAT64_C( 309.91)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 232.04), SIMDE_FLOAT64_C( -43.35), SIMDE_FLOAT64_C( 774.81), SIMDE_FLOAT64_C( 0.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -471.60), SIMDE_FLOAT64_C( -333.94), SIMDE_FLOAT64_C( 483.08), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(192), simde_mm512_set_pd(SIMDE_FLOAT64_C( 490.41), SIMDE_FLOAT64_C( 456.89), SIMDE_FLOAT64_C( 47.59), SIMDE_FLOAT64_C( 51.33), SIMDE_FLOAT64_C( -895.71), SIMDE_FLOAT64_C( -868.59), SIMDE_FLOAT64_C( -736.92), SIMDE_FLOAT64_C( -921.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(209), simde_mm512_set_pd(SIMDE_FLOAT64_C( 841.53), SIMDE_FLOAT64_C( -292.35), SIMDE_FLOAT64_C( 526.21), SIMDE_FLOAT64_C( -835.53), SIMDE_FLOAT64_C( -203.04), SIMDE_FLOAT64_C( 571.79), SIMDE_FLOAT64_C( -80.71), SIMDE_FLOAT64_C( 675.92)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 675.92)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -835.53)) }, { UINT8_C( 43), simde_mm512_set_pd(SIMDE_FLOAT64_C( -925.69), SIMDE_FLOAT64_C( -744.41), SIMDE_FLOAT64_C( 717.83), SIMDE_FLOAT64_C( 39.32), SIMDE_FLOAT64_C( -489.88), SIMDE_FLOAT64_C( 29.68), SIMDE_FLOAT64_C( -37.49), SIMDE_FLOAT64_C( -490.28)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -489.88), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -37.49), SIMDE_FLOAT64_C( -490.28)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -925.69), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 717.83), SIMDE_FLOAT64_C( 39.32)) }, { UINT8_C(120), simde_mm512_set_pd(SIMDE_FLOAT64_C( -934.92), SIMDE_FLOAT64_C( -213.75), SIMDE_FLOAT64_C( -657.02), SIMDE_FLOAT64_C( 403.00), SIMDE_FLOAT64_C( -629.37), SIMDE_FLOAT64_C( -198.67), SIMDE_FLOAT64_C( 337.35), SIMDE_FLOAT64_C( 447.98)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -629.37), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -934.92), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r; r = simde_mm512_maskz_extractf64x4_pd(test_vec[i].k, test_vec[i].a, 0); simde_assert_m256d_close(r, test_vec[i].r0, 1); r = simde_mm512_maskz_extractf64x4_pd(test_vec[i].k, test_vec[i].a, 1); simde_assert_m256d_close(r, test_vec[i].r1, 1); } return 0; } static int test_simde_mm512_extracti32x4_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m128i r0; simde__m128i r1; simde__m128i r2; simde__m128i r3; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C( 936676195), INT32_C( -430989686), INT32_C( -131327474), INT32_C( 910508384), INT32_C( 1148801293), INT32_C(-1204409147), INT32_C( 1922921929), INT32_C( 2087027240), INT32_C( 1221368626), INT32_C(-1114006136), INT32_C( 2023469730), INT32_C( 463308257), INT32_C(-1206798920), INT32_C( -968449396), INT32_C( -580990777), INT32_C( -896508445)), simde_mm_set_epi32(INT32_C(-1206798920), INT32_C( -968449396), INT32_C( -580990777), INT32_C( -896508445)), simde_mm_set_epi32(INT32_C( 1221368626), INT32_C(-1114006136), INT32_C( 2023469730), INT32_C( 463308257)), simde_mm_set_epi32(INT32_C( 1148801293), INT32_C(-1204409147), INT32_C( 1922921929), INT32_C( 2087027240)), simde_mm_set_epi32(INT32_C( 936676195), INT32_C( -430989686), INT32_C( -131327474), INT32_C( 910508384)) }, { simde_mm512_set_epi32(INT32_C(-1889562474), INT32_C( 2009910179), INT32_C( 815981096), INT32_C( -945310665), INT32_C(-1749696319), INT32_C( 1237778758), INT32_C( 2121903540), INT32_C(-1272250958), INT32_C(-1384883580), INT32_C( 455864550), INT32_C(-1132110758), INT32_C( 636699315), INT32_C( 1712647786), INT32_C( 1137382661), INT32_C( -72212648), INT32_C(-1857153584)), simde_mm_set_epi32(INT32_C( 1712647786), INT32_C( 1137382661), INT32_C( -72212648), INT32_C(-1857153584)), simde_mm_set_epi32(INT32_C(-1384883580), INT32_C( 455864550), INT32_C(-1132110758), INT32_C( 636699315)), simde_mm_set_epi32(INT32_C(-1749696319), INT32_C( 1237778758), INT32_C( 2121903540), INT32_C(-1272250958)), simde_mm_set_epi32(INT32_C(-1889562474), INT32_C( 2009910179), INT32_C( 815981096), INT32_C( -945310665)) }, { simde_mm512_set_epi32(INT32_C(-1732993162), INT32_C( 1212743926), INT32_C( 1966971402), INT32_C(-1506668774), INT32_C(-1700657265), INT32_C( 1944327234), INT32_C( -355879099), INT32_C(-1588067414), INT32_C( 301696052), INT32_C( 1998339065), INT32_C(-2060809025), INT32_C(-1942156019), INT32_C( 551689125), INT32_C( 669995747), INT32_C(-1196653219), INT32_C( -147816939)), simde_mm_set_epi32(INT32_C( 551689125), INT32_C( 669995747), INT32_C(-1196653219), INT32_C( -147816939)), simde_mm_set_epi32(INT32_C( 301696052), INT32_C( 1998339065), INT32_C(-2060809025), INT32_C(-1942156019)), simde_mm_set_epi32(INT32_C(-1700657265), INT32_C( 1944327234), INT32_C( -355879099), INT32_C(-1588067414)), simde_mm_set_epi32(INT32_C(-1732993162), INT32_C( 1212743926), INT32_C( 1966971402), INT32_C(-1506668774)) }, { simde_mm512_set_epi32(INT32_C( -720429002), INT32_C( 1666176035), INT32_C( -786738545), INT32_C( 356149527), INT32_C( 1809623523), INT32_C( 387697241), INT32_C( -984752565), INT32_C(-1905225073), INT32_C( 1936855390), INT32_C(-1150638889), INT32_C( -620356961), INT32_C(-1540113901), INT32_C( 485150966), INT32_C( -340934070), INT32_C( 1258270405), INT32_C(-1976197296)), simde_mm_set_epi32(INT32_C( 485150966), INT32_C( -340934070), INT32_C( 1258270405), INT32_C(-1976197296)), simde_mm_set_epi32(INT32_C( 1936855390), INT32_C(-1150638889), INT32_C( -620356961), INT32_C(-1540113901)), simde_mm_set_epi32(INT32_C( 1809623523), INT32_C( 387697241), INT32_C( -984752565), INT32_C(-1905225073)), simde_mm_set_epi32(INT32_C( -720429002), INT32_C( 1666176035), INT32_C( -786738545), INT32_C( 356149527)) }, { simde_mm512_set_epi32(INT32_C(-1844524534), INT32_C( 359706932), INT32_C( 21147132), INT32_C(-1205907433), INT32_C( 241660444), INT32_C(-1425169590), INT32_C( 1296561443), INT32_C(-1934442075), INT32_C( 2141890625), INT32_C( 2063982974), INT32_C(-1791266937), INT32_C(-1677757015), INT32_C(-1341587157), INT32_C( 71085124), INT32_C( 1045857655), INT32_C( 563120574)), simde_mm_set_epi32(INT32_C(-1341587157), INT32_C( 71085124), INT32_C( 1045857655), INT32_C( 563120574)), simde_mm_set_epi32(INT32_C( 2141890625), INT32_C( 2063982974), INT32_C(-1791266937), INT32_C(-1677757015)), simde_mm_set_epi32(INT32_C( 241660444), INT32_C(-1425169590), INT32_C( 1296561443), INT32_C(-1934442075)), simde_mm_set_epi32(INT32_C(-1844524534), INT32_C( 359706932), INT32_C( 21147132), INT32_C(-1205907433)) }, { simde_mm512_set_epi32(INT32_C( 1760980702), INT32_C(-1592941833), INT32_C(-1618734568), INT32_C(-1937346052), INT32_C( -716563340), INT32_C(-1364071584), INT32_C( -516953475), INT32_C( 1021791773), INT32_C( 587319712), INT32_C(-1327772936), INT32_C( -388433125), INT32_C(-1835488163), INT32_C( 1934085090), INT32_C( 1823172786), INT32_C( -962834173), INT32_C(-1813383694)), simde_mm_set_epi32(INT32_C( 1934085090), INT32_C( 1823172786), INT32_C( -962834173), INT32_C(-1813383694)), simde_mm_set_epi32(INT32_C( 587319712), INT32_C(-1327772936), INT32_C( -388433125), INT32_C(-1835488163)), simde_mm_set_epi32(INT32_C( -716563340), INT32_C(-1364071584), INT32_C( -516953475), INT32_C( 1021791773)), simde_mm_set_epi32(INT32_C( 1760980702), INT32_C(-1592941833), INT32_C(-1618734568), INT32_C(-1937346052)) }, { simde_mm512_set_epi32(INT32_C(-1600993635), INT32_C( 1692797667), INT32_C( -524624106), INT32_C( -196896874), INT32_C( 1365949044), INT32_C( 1838002887), INT32_C( -452898509), INT32_C( 1408911553), INT32_C(-1407150071), INT32_C( -82352116), INT32_C( -745337283), INT32_C( -527368953), INT32_C(-1750389986), INT32_C( 1217697098), INT32_C( 1169663592), INT32_C(-1794175196)), simde_mm_set_epi32(INT32_C(-1750389986), INT32_C( 1217697098), INT32_C( 1169663592), INT32_C(-1794175196)), simde_mm_set_epi32(INT32_C(-1407150071), INT32_C( -82352116), INT32_C( -745337283), INT32_C( -527368953)), simde_mm_set_epi32(INT32_C( 1365949044), INT32_C( 1838002887), INT32_C( -452898509), INT32_C( 1408911553)), simde_mm_set_epi32(INT32_C(-1600993635), INT32_C( 1692797667), INT32_C( -524624106), INT32_C( -196896874)) }, { simde_mm512_set_epi32(INT32_C( 31532768), INT32_C(-1104316005), INT32_C(-1643683522), INT32_C( -459507150), INT32_C( -301521916), INT32_C(-1095317885), INT32_C( 1268414902), INT32_C( -436965349), INT32_C( 330503221), INT32_C( 1614750696), INT32_C( 1262893786), INT32_C( 1956553172), INT32_C(-1113093793), INT32_C(-1782413198), INT32_C( 1413241306), INT32_C(-1360271723)), simde_mm_set_epi32(INT32_C(-1113093793), INT32_C(-1782413198), INT32_C( 1413241306), INT32_C(-1360271723)), simde_mm_set_epi32(INT32_C( 330503221), INT32_C( 1614750696), INT32_C( 1262893786), INT32_C( 1956553172)), simde_mm_set_epi32(INT32_C( -301521916), INT32_C(-1095317885), INT32_C( 1268414902), INT32_C( -436965349)), simde_mm_set_epi32(INT32_C( 31532768), INT32_C(-1104316005), INT32_C(-1643683522), INT32_C( -459507150)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r; r = simde_mm512_extracti32x4_epi32(test_vec[i].a, 0); simde_assert_m128i_i32(r, ==, test_vec[i].r0); r = simde_mm512_extracti32x4_epi32(test_vec[i].a, 1); simde_assert_m128i_i32(r, ==, test_vec[i].r1); r = simde_mm512_extracti32x4_epi32(test_vec[i].a, 2); simde_assert_m128i_i32(r, ==, test_vec[i].r2); r = simde_mm512_extracti32x4_epi32(test_vec[i].a, 3); simde_assert_m128i_i32(r, ==, test_vec[i].r3); } return 0; } static int test_simde_mm512_mask_extracti32x4_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i src; simde__mmask8 k; simde__m512i a; simde__m128i r0; simde__m128i r1; simde__m128i r2; simde__m128i r3; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 1993455974), INT32_C(-2068684593), INT32_C(-1936012201), INT32_C( 1856459607)), UINT8_C( 6), simde_mm512_set_epi32(INT32_C(-1630396605), INT32_C( 1545554432), INT32_C( 344023940), INT32_C(-1871515754), INT32_C( 951544639), INT32_C(-1026363374), INT32_C(-1801776439), INT32_C( 145438126), INT32_C(-1306064352), INT32_C( -858736392), INT32_C( 923442479), INT32_C( 1092805562), INT32_C( 1443901717), INT32_C( 1848749100), INT32_C( 1777333881), INT32_C( 1570116932)), simde_mm_set_epi32(INT32_C( 1993455974), INT32_C( 1848749100), INT32_C( 1777333881), INT32_C( 1856459607)), simde_mm_set_epi32(INT32_C( 1993455974), INT32_C( -858736392), INT32_C( 923442479), INT32_C( 1856459607)), simde_mm_set_epi32(INT32_C( 1993455974), INT32_C(-1026363374), INT32_C(-1801776439), INT32_C( 1856459607)), simde_mm_set_epi32(INT32_C( 1993455974), INT32_C( 1545554432), INT32_C( 344023940), INT32_C( 1856459607)) }, { simde_mm_set_epi32(INT32_C(-1999224530), INT32_C( 1042470181), INT32_C( 1827473477), INT32_C( 298546792)), UINT8_C(109), simde_mm512_set_epi32(INT32_C(-2045280751), INT32_C(-2037261521), INT32_C( 223952317), INT32_C( 282198336), INT32_C( 564965997), INT32_C( 169645898), INT32_C(-1539616610), INT32_C( 1134735685), INT32_C( 1430356381), INT32_C(-1110068455), INT32_C( -207240031), INT32_C(-1649179267), INT32_C( 2054398444), INT32_C( -483586503), INT32_C(-1481960002), INT32_C( 861125508)), simde_mm_set_epi32(INT32_C( 2054398444), INT32_C( -483586503), INT32_C( 1827473477), INT32_C( 861125508)), simde_mm_set_epi32(INT32_C( 1430356381), INT32_C(-1110068455), INT32_C( 1827473477), INT32_C(-1649179267)), simde_mm_set_epi32(INT32_C( 564965997), INT32_C( 169645898), INT32_C( 1827473477), INT32_C( 1134735685)), simde_mm_set_epi32(INT32_C(-2045280751), INT32_C(-2037261521), INT32_C( 1827473477), INT32_C( 282198336)) }, { simde_mm_set_epi32(INT32_C( -695949043), INT32_C( -790242624), INT32_C(-1094331335), INT32_C(-1166320093)), UINT8_C(181), simde_mm512_set_epi32(INT32_C( 1549802795), INT32_C( 159583350), INT32_C( 548883180), INT32_C( -605945909), INT32_C(-2063050181), INT32_C( 1095467003), INT32_C(-2083755741), INT32_C( 2066979701), INT32_C( 1094609712), INT32_C( 1345059025), INT32_C( -340318359), INT32_C( 1519671047), INT32_C(-1017461983), INT32_C( 353198331), INT32_C( 1711460779), INT32_C( -919570191)), simde_mm_set_epi32(INT32_C( -695949043), INT32_C( 353198331), INT32_C(-1094331335), INT32_C( -919570191)), simde_mm_set_epi32(INT32_C( -695949043), INT32_C( 1345059025), INT32_C(-1094331335), INT32_C( 1519671047)), simde_mm_set_epi32(INT32_C( -695949043), INT32_C( 1095467003), INT32_C(-1094331335), INT32_C( 2066979701)), simde_mm_set_epi32(INT32_C( -695949043), INT32_C( 159583350), INT32_C(-1094331335), INT32_C( -605945909)) }, { simde_mm_set_epi32(INT32_C( 795925067), INT32_C( 1720852541), INT32_C(-1423023772), INT32_C(-1185448755)), UINT8_C(176), simde_mm512_set_epi32(INT32_C( 884163960), INT32_C( -329275629), INT32_C( -888441293), INT32_C( -707551350), INT32_C( 513515868), INT32_C(-1825967755), INT32_C( 822222164), INT32_C(-1689559027), INT32_C( 533478787), INT32_C( 907615417), INT32_C( -199229058), INT32_C( -91537812), INT32_C( 1375258232), INT32_C( 139748399), INT32_C( 1688468565), INT32_C( 736544549)), simde_mm_set_epi32(INT32_C( 795925067), INT32_C( 1720852541), INT32_C(-1423023772), INT32_C(-1185448755)), simde_mm_set_epi32(INT32_C( 795925067), INT32_C( 1720852541), INT32_C(-1423023772), INT32_C(-1185448755)), simde_mm_set_epi32(INT32_C( 795925067), INT32_C( 1720852541), INT32_C(-1423023772), INT32_C(-1185448755)), simde_mm_set_epi32(INT32_C( 795925067), INT32_C( 1720852541), INT32_C(-1423023772), INT32_C(-1185448755)) }, { simde_mm_set_epi32(INT32_C( -622852205), INT32_C( -839037220), INT32_C( 499633910), INT32_C( -260167255)), UINT8_C( 21), simde_mm512_set_epi32(INT32_C( -272088075), INT32_C( 386072301), INT32_C(-1628984154), INT32_C( 87817524), INT32_C( 1219490517), INT32_C(-1569831145), INT32_C( 338985942), INT32_C( 1701079465), INT32_C( -195770682), INT32_C( 503748315), INT32_C( 1469355417), INT32_C(-1849349632), INT32_C( 1962664621), INT32_C( -646247370), INT32_C( 1258747662), INT32_C( 1838830023)), simde_mm_set_epi32(INT32_C( -622852205), INT32_C( -646247370), INT32_C( 499633910), INT32_C( 1838830023)), simde_mm_set_epi32(INT32_C( -622852205), INT32_C( 503748315), INT32_C( 499633910), INT32_C(-1849349632)), simde_mm_set_epi32(INT32_C( -622852205), INT32_C(-1569831145), INT32_C( 499633910), INT32_C( 1701079465)), simde_mm_set_epi32(INT32_C( -622852205), INT32_C( 386072301), INT32_C( 499633910), INT32_C( 87817524)) }, { simde_mm_set_epi32(INT32_C( 654527510), INT32_C(-2043358500), INT32_C( 459072440), INT32_C( -430427651)), UINT8_C(229), simde_mm512_set_epi32(INT32_C( 617951303), INT32_C( 817116152), INT32_C(-1034835761), INT32_C( -102069057), INT32_C( 1774242298), INT32_C( 1089620040), INT32_C(-1101477862), INT32_C( 2001101785), INT32_C(-1759250988), INT32_C( -606254738), INT32_C( 1526367108), INT32_C( 722122834), INT32_C( -174985661), INT32_C(-1762469023), INT32_C( 1239606494), INT32_C( -22119232)), simde_mm_set_epi32(INT32_C( 654527510), INT32_C(-1762469023), INT32_C( 459072440), INT32_C( -22119232)), simde_mm_set_epi32(INT32_C( 654527510), INT32_C( -606254738), INT32_C( 459072440), INT32_C( 722122834)), simde_mm_set_epi32(INT32_C( 654527510), INT32_C( 1089620040), INT32_C( 459072440), INT32_C( 2001101785)), simde_mm_set_epi32(INT32_C( 654527510), INT32_C( 817116152), INT32_C( 459072440), INT32_C( -102069057)) }, { simde_mm_set_epi32(INT32_C(-2034110695), INT32_C(-1088138491), INT32_C( -353174912), INT32_C( -362301616)), UINT8_C( 42), simde_mm512_set_epi32(INT32_C( 204417556), INT32_C(-1329665093), INT32_C(-2039025377), INT32_C( 1639231015), INT32_C( 1541217841), INT32_C( 1692413538), INT32_C( 738521275), INT32_C( 159429100), INT32_C( 451955897), INT32_C( 181201098), INT32_C( 450627934), INT32_C( 2082954477), INT32_C( 1254960767), INT32_C( 1995459397), INT32_C( -11572946), INT32_C(-1087388220)), simde_mm_set_epi32(INT32_C( 1254960767), INT32_C(-1088138491), INT32_C( -11572946), INT32_C( -362301616)), simde_mm_set_epi32(INT32_C( 451955897), INT32_C(-1088138491), INT32_C( 450627934), INT32_C( -362301616)), simde_mm_set_epi32(INT32_C( 1541217841), INT32_C(-1088138491), INT32_C( 738521275), INT32_C( -362301616)), simde_mm_set_epi32(INT32_C( 204417556), INT32_C(-1088138491), INT32_C(-2039025377), INT32_C( -362301616)) }, { simde_mm_set_epi32(INT32_C(-1687118128), INT32_C( 107945377), INT32_C( 1174128677), INT32_C(-1544325740)), UINT8_C(132), simde_mm512_set_epi32(INT32_C( -852914371), INT32_C( -773785464), INT32_C(-2142007253), INT32_C( 466013192), INT32_C( 1313258175), INT32_C( 1928049651), INT32_C( 765730488), INT32_C( -85899231), INT32_C( 1435935141), INT32_C(-2098236580), INT32_C(-1991433794), INT32_C( 1298943776), INT32_C( 277470244), INT32_C(-1834748849), INT32_C( 596054477), INT32_C( 1827419510)), simde_mm_set_epi32(INT32_C(-1687118128), INT32_C(-1834748849), INT32_C( 1174128677), INT32_C(-1544325740)), simde_mm_set_epi32(INT32_C(-1687118128), INT32_C(-2098236580), INT32_C( 1174128677), INT32_C(-1544325740)), simde_mm_set_epi32(INT32_C(-1687118128), INT32_C( 1928049651), INT32_C( 1174128677), INT32_C(-1544325740)), simde_mm_set_epi32(INT32_C(-1687118128), INT32_C( -773785464), INT32_C( 1174128677), INT32_C(-1544325740)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r; r = simde_mm512_mask_extracti32x4_epi32(test_vec[i].src, test_vec[i].k, test_vec[i].a, 0); simde_assert_m128i_i32(r, ==, test_vec[i].r0); r = simde_mm512_mask_extracti32x4_epi32(test_vec[i].src, test_vec[i].k, test_vec[i].a, 1); simde_assert_m128i_i32(r, ==, test_vec[i].r1); r = simde_mm512_mask_extracti32x4_epi32(test_vec[i].src, test_vec[i].k, test_vec[i].a, 2); simde_assert_m128i_i32(r, ==, test_vec[i].r2); r = simde_mm512_mask_extracti32x4_epi32(test_vec[i].src, test_vec[i].k, test_vec[i].a, 3); simde_assert_m128i_i32(r, ==, test_vec[i].r3); } return 0; } static int test_simde_mm512_maskz_extracti32x4_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512i a; simde__m128i r0; simde__m128i r1; simde__m128i r2; simde__m128i r3; } test_vec[8] = { { UINT8_C( 87), simde_mm512_set_epi32(INT32_C( 951544639), INT32_C(-1026363374), INT32_C(-1801776439), INT32_C( 145438126), INT32_C(-1306064352), INT32_C( -858736392), INT32_C( 923442479), INT32_C( 1092805562), INT32_C( 1443901717), INT32_C( 1848749100), INT32_C( 1777333881), INT32_C( 1570116932), INT32_C(-1302383354), INT32_C( 1993455974), INT32_C(-2068684593), INT32_C(-1936012201)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 1993455974), INT32_C(-2068684593), INT32_C(-1936012201)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 1848749100), INT32_C( 1777333881), INT32_C( 1570116932)), simde_mm_set_epi32(INT32_C( 0), INT32_C( -858736392), INT32_C( 923442479), INT32_C( 1092805562)), simde_mm_set_epi32(INT32_C( 0), INT32_C(-1026363374), INT32_C(-1801776439), INT32_C( 145438126)) }, { UINT8_C(150), simde_mm512_set_epi32(INT32_C( 1430356381), INT32_C(-1110068455), INT32_C( -207240031), INT32_C(-1649179267), INT32_C( 2054398444), INT32_C( -483586503), INT32_C(-1481960002), INT32_C( 861125508), INT32_C( -330381203), INT32_C(-1999224530), INT32_C( 1042470181), INT32_C( 1827473477), INT32_C( 298546792), INT32_C(-1630396605), INT32_C( 1545554432), INT32_C( 344023940)), simde_mm_set_epi32(INT32_C( 0), INT32_C(-1630396605), INT32_C( 1545554432), INT32_C( 0)), simde_mm_set_epi32(INT32_C( 0), INT32_C(-1999224530), INT32_C( 1042470181), INT32_C( 0)), simde_mm_set_epi32(INT32_C( 0), INT32_C( -483586503), INT32_C(-1481960002), INT32_C( 0)), simde_mm_set_epi32(INT32_C( 0), INT32_C(-1110068455), INT32_C( -207240031), INT32_C( 0)) }, { UINT8_C( 69), simde_mm512_set_epi32(INT32_C(-1017461983), INT32_C( 353198331), INT32_C( 1711460779), INT32_C( -919570191), INT32_C( 1974152373), INT32_C( -695949043), INT32_C( -790242624), INT32_C(-1094331335), INT32_C(-1166320093), INT32_C(-2045280751), INT32_C(-2037261521), INT32_C( 223952317), INT32_C( 282198336), INT32_C( 564965997), INT32_C( 169645898), INT32_C(-1539616610)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 564965997), INT32_C( 0), INT32_C(-1539616610)), simde_mm_set_epi32(INT32_C( 0), INT32_C(-2045280751), INT32_C( 0), INT32_C( 223952317)), simde_mm_set_epi32(INT32_C( 0), INT32_C( -695949043), INT32_C( 0), INT32_C(-1094331335)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 353198331), INT32_C( 0), INT32_C( -919570191)) }, { UINT8_C( 7), simde_mm512_set_epi32(INT32_C(-1282057552), INT32_C( 795925067), INT32_C( 1720852541), INT32_C(-1423023772), INT32_C(-1185448755), INT32_C( 1549802795), INT32_C( 159583350), INT32_C( 548883180), INT32_C( -605945909), INT32_C(-2063050181), INT32_C( 1095467003), INT32_C(-2083755741), INT32_C( 2066979701), INT32_C( 1094609712), INT32_C( 1345059025), INT32_C( -340318359)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 1094609712), INT32_C( 1345059025), INT32_C( -340318359)), simde_mm_set_epi32(INT32_C( 0), INT32_C(-2063050181), INT32_C( 1095467003), INT32_C(-2083755741)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 1549802795), INT32_C( 159583350), INT32_C( 548883180)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 795925067), INT32_C( 1720852541), INT32_C(-1423023772)) }, { UINT8_C( 37), simde_mm512_set_epi32(INT32_C( -260167255), INT32_C( 884163960), INT32_C( -329275629), INT32_C( -888441293), INT32_C( -707551350), INT32_C( 513515868), INT32_C(-1825967755), INT32_C( 822222164), INT32_C(-1689559027), INT32_C( 533478787), INT32_C( 907615417), INT32_C( -199229058), INT32_C( -91537812), INT32_C( 1375258232), INT32_C( 139748399), INT32_C( 1688468565)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 1375258232), INT32_C( 0), INT32_C( 1688468565)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 533478787), INT32_C( 0), INT32_C( -199229058)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 513515868), INT32_C( 0), INT32_C( 822222164)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 884163960), INT32_C( 0), INT32_C( -888441293)) }, { UINT8_C(246), simde_mm512_set_epi32(INT32_C( 87817524), INT32_C( 1219490517), INT32_C(-1569831145), INT32_C( 338985942), INT32_C( 1701079465), INT32_C( -195770682), INT32_C( 503748315), INT32_C( 1469355417), INT32_C(-1849349632), INT32_C( 1962664621), INT32_C( -646247370), INT32_C( 1258747662), INT32_C( 1838830023), INT32_C( -532007659), INT32_C( -622852205), INT32_C( -839037220)), simde_mm_set_epi32(INT32_C( 0), INT32_C( -532007659), INT32_C( -622852205), INT32_C( 0)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 1962664621), INT32_C( -646247370), INT32_C( 0)), simde_mm_set_epi32(INT32_C( 0), INT32_C( -195770682), INT32_C( 503748315), INT32_C( 0)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 1219490517), INT32_C(-1569831145), INT32_C( 0)) }, { UINT8_C(166), simde_mm512_set_epi32(INT32_C( 2001101785), INT32_C(-1759250988), INT32_C( -606254738), INT32_C( 1526367108), INT32_C( 722122834), INT32_C( -174985661), INT32_C(-1762469023), INT32_C( 1239606494), INT32_C( -22119232), INT32_C( 1216907749), INT32_C( 654527510), INT32_C(-2043358500), INT32_C( 459072440), INT32_C( -430427651), INT32_C( -272088075), INT32_C( 386072301)), simde_mm_set_epi32(INT32_C( 0), INT32_C( -430427651), INT32_C( -272088075), INT32_C( 0)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 1216907749), INT32_C( 654527510), INT32_C( 0)), simde_mm_set_epi32(INT32_C( 0), INT32_C( -174985661), INT32_C(-1762469023), INT32_C( 0)), simde_mm_set_epi32(INT32_C( 0), INT32_C(-1759250988), INT32_C( -606254738), INT32_C( 0)) }, { UINT8_C( 26), simde_mm512_set_epi32(INT32_C( 2082954477), INT32_C( 1254960767), INT32_C( 1995459397), INT32_C( -11572946), INT32_C(-1087388220), INT32_C( 730787370), INT32_C(-2034110695), INT32_C(-1088138491), INT32_C( -353174912), INT32_C( -362301616), INT32_C( 617951303), INT32_C( 817116152), INT32_C(-1034835761), INT32_C( -102069057), INT32_C( 1774242298), INT32_C( 1089620040)), simde_mm_set_epi32(INT32_C(-1034835761), INT32_C( 0), INT32_C( 1774242298), INT32_C( 0)), simde_mm_set_epi32(INT32_C( -353174912), INT32_C( 0), INT32_C( 617951303), INT32_C( 0)), simde_mm_set_epi32(INT32_C(-1087388220), INT32_C( 0), INT32_C(-2034110695), INT32_C( 0)), simde_mm_set_epi32(INT32_C( 2082954477), INT32_C( 0), INT32_C( 1995459397), INT32_C( 0)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r; r = simde_mm512_maskz_extracti32x4_epi32(test_vec[i].k, test_vec[i].a, 0); simde_assert_m128i_i32(r, ==, test_vec[i].r0); r = simde_mm512_maskz_extracti32x4_epi32(test_vec[i].k, test_vec[i].a, 1); simde_assert_m128i_i32(r, ==, test_vec[i].r1); r = simde_mm512_maskz_extracti32x4_epi32(test_vec[i].k, test_vec[i].a, 2); simde_assert_m128i_i32(r, ==, test_vec[i].r2); r = simde_mm512_maskz_extracti32x4_epi32(test_vec[i].k, test_vec[i].a, 3); simde_assert_m128i_i32(r, ==, test_vec[i].r3); } return 0; } static int test_simde_mm512_extracti64x4_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m256i r0; simde__m256i r1; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C( 4022993628330696330), INT64_C( -564047204985781920), INT64_C( 4934063986128071877), INT64_C( 8258886799903261224), INT64_C( 5245738308211416456), INT64_C( 8690736315259258337), INT64_C(-5183161890921602420), INT64_C(-2495336383094170141)), simde_mm256_set_epi64x(INT64_C( 5245738308211416456), INT64_C( 8690736315259258337), INT64_C(-5183161890921602420), INT64_C(-2495336383094170141)), simde_mm256_set_epi64x(INT64_C( 4022993628330696330), INT64_C( -564047204985781920), INT64_C( 4934063986128071877), INT64_C( 8258886799903261224)) }, { simde_mm512_set_epi64(INT64_C(-8115609027568940125), INT64_C( 3504612124823893047), INT64_C(-7514888466798804666), INT64_C( 9113506312589344178), INT64_C(-5948029684411535130), INT64_C(-4862378680423071053), INT64_C( 7355766231574189317), INT64_C( -310150959079746096)), simde_mm256_set_epi64x(INT64_C(-5948029684411535130), INT64_C(-4862378680423071053), INT64_C( 7355766231574189317), INT64_C( -310150959079746096)), simde_mm256_set_epi64x(INT64_C(-8115609027568940125), INT64_C( 3504612124823893047), INT64_C(-7514888466798804666), INT64_C( 9113506312589344178)) }, { simde_mm512_set_epi64(INT64_C(-7443148953768886026), INT64_C( 8448077846545567514), INT64_C(-7304267332935478206), INT64_C(-1528489088828046422), INT64_C( 1295774678670654457), INT64_C(-8851107363323835123), INT64_C( 2369486750103851747), INT64_C(-5139586436110975467)), simde_mm256_set_epi64x(INT64_C( 1295774678670654457), INT64_C(-8851107363323835123), INT64_C( 2369486750103851747), INT64_C(-5139586436110975467)), simde_mm256_set_epi64x(INT64_C(-7443148953768886026), INT64_C( 8448077846545567514), INT64_C(-7304267332935478206), INT64_C(-1528489088828046422)) }, { simde_mm512_set_epi64(INT64_C(-3094219001013742557), INT64_C(-3379016320921474793), INT64_C( 7772273849745001049), INT64_C(-4229480058937372017), INT64_C( 8318730560275653847), INT64_C(-2664412856586094061), INT64_C( 2083707536546841162), INT64_C( 5404230241318444880)), simde_mm256_set_epi64x(INT64_C( 8318730560275653847), INT64_C(-2664412856586094061), INT64_C( 2083707536546841162), INT64_C( 5404230241318444880)), simde_mm256_set_epi64x(INT64_C(-3094219001013742557), INT64_C(-3379016320921474793), INT64_C( 7772273849745001049), INT64_C(-4229480058937372017)) }, { simde_mm512_set_epi64(INT64_C(-7922172549839933132), INT64_C( 90826243433254935), INT64_C( 1037923706586637130), INT64_C( 5568688997300093349), INT64_C( 9199350188047982974), INT64_C(-7693432910203882071), INT64_C(-5762072963977532348), INT64_C( 4491924425059371454)), simde_mm256_set_epi64x(INT64_C( 9199350188047982974), INT64_C(-7693432910203882071), INT64_C(-5762072963977532348), INT64_C( 4491924425059371454)), simde_mm256_set_epi64x(INT64_C(-7922172549839933132), INT64_C( 90826243433254935), INT64_C( 1037923706586637130), INT64_C( 5568688997300093349)) }, { simde_mm512_set_epi64(INT64_C( 7563354526679147255), INT64_C(-6952412028107066884), INT64_C(-3077616107881632928), INT64_C(-2220298267656761827), INT64_C( 2522518958303333112), INT64_C(-1668307566098600867), INT64_C( 8306832211054389426), INT64_C(-4135341282024622606)), simde_mm256_set_epi64x(INT64_C( 2522518958303333112), INT64_C(-1668307566098600867), INT64_C( 8306832211054389426), INT64_C(-4135341282024622606)), simde_mm256_set_epi64x(INT64_C( 7563354526679147255), INT64_C(-6952412028107066884), INT64_C(-3077616107881632928), INT64_C(-2220298267656761827)) }, { simde_mm512_set_epi64(INT64_C(-6876215301736363293), INT64_C(-2253243373865166954), INT64_C( 5866706473820467911), INT64_C(-1945184283153250111), INT64_C(-6043663531296462836), INT64_C(-3201199251206898425), INT64_C(-7517867743898200758), INT64_C( 5023666877462679332)), simde_mm256_set_epi64x(INT64_C(-6043663531296462836), INT64_C(-3201199251206898425), INT64_C(-7517867743898200758), INT64_C( 5023666877462679332)), simde_mm256_set_epi64x(INT64_C(-6876215301736363293), INT64_C(-2253243373865166954), INT64_C( 5866706473820467911), INT64_C(-1945184283153250111)) }, { simde_mm512_set_epi64(INT64_C( 135432210503006619), INT64_C(-7059566968128636366), INT64_C(-1295026765047609725), INT64_C( 5447800525707046939), INT64_C( 1419500527032411112), INT64_C( 5424087511148175828), INT64_C(-4780701435803039630), INT64_C( 6069825193561024149)), simde_mm256_set_epi64x(INT64_C( 1419500527032411112), INT64_C( 5424087511148175828), INT64_C(-4780701435803039630), INT64_C( 6069825193561024149)), simde_mm256_set_epi64x(INT64_C( 135432210503006619), INT64_C(-7059566968128636366), INT64_C(-1295026765047609725), INT64_C( 5447800525707046939)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r; r = simde_mm512_extracti64x4_epi64(test_vec[i].a, 0); simde_assert_m256i_i64(r, ==, test_vec[i].r0); r = simde_mm512_extracti64x4_epi64(test_vec[i].a, 1); simde_assert_m256i_i64(r, ==, test_vec[i].r1); } return 0; } static int test_simde_mm512_mask_extracti64x4_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i src; simde__mmask8 k; simde__m512i a; simde__m256i r0; simde__m256i r1; } test_vec[8] = { { simde_mm256_set_epi64x(INT64_C( 7940316924786767481), INT64_C( 6743600876828439814), INT64_C( 8561828216572109007), INT64_C(-8315109086095518889)), UINT8_C( 21), simde_mm512_set_epi64(INT64_C(-8586603972668500699), INT64_C( 7848938818320954984), INT64_C(-7002500096438875648), INT64_C( 1477571573764517782), INT64_C( 4086853108457730066), INT64_C(-7738570880062900818), INT64_C(-5609503674875201288), INT64_C( 3966155248134972346)), simde_mm256_set_epi64x(INT64_C( 7940316924786767481), INT64_C(-7738570880062900818), INT64_C( 8561828216572109007), INT64_C( 3966155248134972346)), simde_mm256_set_epi64x(INT64_C( 7940316924786767481), INT64_C( 7848938818320954984), INT64_C( 8561828216572109007), INT64_C( 1477571573764517782)) }, { simde_mm256_set_epi64x(INT64_C(-4767707706458520415), INT64_C(-7083171014951853588), INT64_C(-2076988212358998594), INT64_C( 3698505898575972461)), UINT8_C(157), simde_mm512_set_epi64(INT64_C( 1516975282358243755), INT64_C(-3949523894747321163), INT64_C(-2989078375862773056), INT64_C(-4700117291684372957), INT64_C(-8784413934425613521), INT64_C( 961867877660623168), INT64_C( 2426510480636680010), INT64_C(-6612602987193650875)), simde_mm256_set_epi64x(INT64_C(-8784413934425613521), INT64_C( 961867877660623168), INT64_C(-2076988212358998594), INT64_C(-6612602987193650875)), simde_mm256_set_epi64x(INT64_C( 1516975282358243755), INT64_C(-3949523894747321163), INT64_C(-2076988212358998594), INT64_C(-4700117291684372957)) }, { simde_mm256_set_epi64x(INT64_C( 4704994953943345443), INT64_C( 8877610218385468208), INT64_C( 5776984527519295337), INT64_C( 6526937450820584225)), UINT8_C( 59), simde_mm512_set_epi64(INT64_C( 3898178537456140670), INT64_C( -393151907512138120), INT64_C( 600214805061827669), INT64_C( 3163434753014979248), INT64_C( 3418472134552461373), INT64_C(-6111840559061041971), INT64_C( 6656352319933975670), INT64_C( 2357435311113502667)), simde_mm256_set_epi64x(INT64_C( 3418472134552461373), INT64_C( 8877610218385468208), INT64_C( 6656352319933975670), INT64_C( 2357435311113502667)), simde_mm256_set_epi64x(INT64_C( 3898178537456140670), INT64_C( 8877610218385468208), INT64_C( 600214805061827669), INT64_C( 3163434753014979248)) }, { simde_mm256_set_epi64x(INT64_C(-1414228054518303181), INT64_C(-3038909907977133732), INT64_C(-7842471790453318316), INT64_C(-7256600765093102205)), UINT8_C(120), simde_mm512_set_epi64(INT64_C(-6742373427678247978), INT64_C( 7306080674171373254), INT64_C( 2163582539809461657), INT64_C(-7942896186346970451), INT64_C(-2775611318017263858), INT64_C( 7897714815450887445), INT64_C(-2675129847260557604), INT64_C( 2145911307457407401)), simde_mm256_set_epi64x(INT64_C(-2775611318017263858), INT64_C(-3038909907977133732), INT64_C(-7842471790453318316), INT64_C(-7256600765093102205)), simde_mm256_set_epi64x(INT64_C(-6742373427678247978), INT64_C(-3038909907977133732), INT64_C(-7842471790453318316), INT64_C(-7256600765093102205)) }, { simde_mm256_set_epi64x(INT64_C(-8776157931044543560), INT64_C(-1848672680316222475), INT64_C( 1658167909352451238), INT64_C( 377173394815185621)), UINT8_C( 22), simde_mm512_set_epi64(INT64_C( 3509487153133496527), INT64_C( -438383259974317574), INT64_C( 4679882440059701274), INT64_C( 8594666725077939668), INT64_C(-2603844271228681340), INT64_C( 3101493959844818499), INT64_C(-7569746812758465314), INT64_C( -95001376835728923)), simde_mm256_set_epi64x(INT64_C(-8776157931044543560), INT64_C( 3101493959844818499), INT64_C(-7569746812758465314), INT64_C( 377173394815185621)), simde_mm256_set_epi64x(INT64_C(-8776157931044543560), INT64_C( -438383259974317574), INT64_C( 4679882440059701274), INT64_C( 377173394815185621)) }, { simde_mm256_set_epi64x(INT64_C( -49705421380794940), INT64_C( 3138707856740708121), INT64_C(-4673519228421997952), INT64_C(-1556073591389999033)), UINT8_C( 69), simde_mm512_set_epi64(INT64_C( 463621865143519269), INT64_C(-6632828547466581484), INT64_C(-5710868086811856609), INT64_C( 7040443601555103281), INT64_C( 7268860797756174523), INT64_C( 684742770982669497), INT64_C( 778252790359918942), INT64_C( 8946221359026744959)), simde_mm256_set_epi64x(INT64_C( -49705421380794940), INT64_C( 684742770982669497), INT64_C(-4673519228421997952), INT64_C( 8946221359026744959)), simde_mm256_set_epi64x(INT64_C( -49705421380794940), INT64_C(-6632828547466581484), INT64_C(-4673519228421997952), INT64_C( 7040443601555103281)) }, { simde_mm256_set_epi64x(INT64_C(-8553143016080257248), INT64_C( 1191725626053358671), INT64_C( 2560034487176803702), INT64_C(-4340183042637127984)), UINT8_C( 92), simde_mm512_set_epi64(INT64_C( -638332694652688568), INT64_C(-8196543121330681227), INT64_C( 7593109912492073141), INT64_C( 6300090425305304893), INT64_C(-3323383259847225301), INT64_C( 2001511420457827007), INT64_C( 8280910196874944184), INT64_C( -368934386460614235)), simde_mm256_set_epi64x(INT64_C(-3323383259847225301), INT64_C( 2001511420457827007), INT64_C( 2560034487176803702), INT64_C(-4340183042637127984)), simde_mm256_set_epi64x(INT64_C( -638332694652688568), INT64_C(-8196543121330681227), INT64_C( 2560034487176803702), INT64_C(-4340183042637127984)) }, { simde_mm256_set_epi64x(INT64_C( 7286481320132913626), INT64_C( -777692308098335861), INT64_C( 8727238559278288416), INT64_C(-2736507802934917164)), UINT8_C(160), simde_mm512_set_epi64(INT64_C(-6023807055599376167), INT64_C( 2056379472574346663), INT64_C(-3486865648830471282), INT64_C( 8151787653682140580), INT64_C( -831601358278995789), INT64_C(-2800664419916301039), INT64_C( 3280702275774868225), INT64_C(-4735905134864699368)), simde_mm256_set_epi64x(INT64_C( 7286481320132913626), INT64_C( -777692308098335861), INT64_C( 8727238559278288416), INT64_C(-2736507802934917164)), simde_mm256_set_epi64x(INT64_C( 7286481320132913626), INT64_C( -777692308098335861), INT64_C( 8727238559278288416), INT64_C(-2736507802934917164)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r; r = simde_mm512_mask_extracti64x4_epi64(test_vec[i].src, test_vec[i].k, test_vec[i].a, 0); simde_assert_m256i_i64(r, ==, test_vec[i].r0); r = simde_mm512_mask_extracti64x4_epi64(test_vec[i].src, test_vec[i].k, test_vec[i].a, 1); simde_assert_m256i_i64(r, ==, test_vec[i].r1); } return 0; } static int test_simde_mm512_maskz_extracti64x4_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512i a; simde__m256i r0; simde__m256i r1; } test_vec[8] = { { UINT8_C( 87), simde_mm512_set_epi64(INT64_C( 4086853108457730066), INT64_C(-7738570880062900818), INT64_C(-5609503674875201288), INT64_C( 3966155248134972346), INT64_C( 6201510655001996332), INT64_C( 7633590894537872708), INT64_C(-5593693910291334810), INT64_C(-8884932670315115433)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C( 7633590894537872708), INT64_C(-5593693910291334810), INT64_C(-8884932670315115433)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C(-7738570880062900818), INT64_C(-5609503674875201288), INT64_C( 3966155248134972346)) }, { UINT8_C(150), simde_mm512_set_epi64(INT64_C( 6143333881204814617), INT64_C( -890089152921238147), INT64_C( 8823574133744668217), INT64_C(-6364969741708969084), INT64_C(-1418976459802394322), INT64_C( 4477375336277674053), INT64_C( 1282248710630285123), INT64_C( 6638105739971879812)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C( 4477375336277674053), INT64_C( 1282248710630285123), INT64_C( 0)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C( -890089152921238147), INT64_C( 8823574133744668217), INT64_C( 0)) }, { UINT8_C( 69), simde_mm512_set_epi64(INT64_C(-4369965941555109637), INT64_C( 7350668077567080689), INT64_C( 8478919882954811661), INT64_C(-3394066222784588743), INT64_C(-5009306653852991983), INT64_C(-8749971605870264899), INT64_C( 1212032624670585453), INT64_C( 728623586565902494)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C(-8749971605870264899), INT64_C( 0), INT64_C( 728623586565902494)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C( 7350668077567080689), INT64_C( 0), INT64_C(-3394066222784588743)) }, { UINT8_C( 7), simde_mm512_set_epi64(INT64_C(-5506395256633894325), INT64_C( 7391005387705442660), INT64_C(-5091463632259113685), INT64_C( 685405269785004780), INT64_C(-2602517860068074949), INT64_C( 4704994953943345443), INT64_C( 8877610218385468208), INT64_C( 5776984527519295337)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C( 4704994953943345443), INT64_C( 8877610218385468208), INT64_C( 5776984527519295337)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C( 7391005387705442660), INT64_C(-5091463632259113685), INT64_C( 685405269785004780)) }, { UINT8_C( 37), simde_mm512_set_epi64(INT64_C(-1117409850830928520), INT64_C(-1414228054518303181), INT64_C(-3038909907977133732), INT64_C(-7842471790453318316), INT64_C(-7256600765093102205), INT64_C( 3898178537456140670), INT64_C( -393151907512138120), INT64_C( 600214805061827669)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C( 3898178537456140670), INT64_C( 0), INT64_C( 600214805061827669)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C(-1414228054518303181), INT64_C( 0), INT64_C(-7842471790453318316)) }, { UINT8_C(246), simde_mm512_set_epi64(INT64_C( 377173394815185621), INT64_C(-6742373427678247978), INT64_C( 7306080674171373254), INT64_C( 2163582539809461657), INT64_C(-7942896186346970451), INT64_C(-2775611318017263858), INT64_C( 7897714815450887445), INT64_C(-2675129847260557604)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C(-2775611318017263858), INT64_C( 7897714815450887445), INT64_C( 0)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C(-6742373427678247978), INT64_C( 7306080674171373254), INT64_C( 0)) }, { UINT8_C(166), simde_mm512_set_epi64(INT64_C( 8594666725077939668), INT64_C(-2603844271228681340), INT64_C( 3101493959844818499), INT64_C(-7569746812758465314), INT64_C( -95001376835728923), INT64_C( 2811174252033921756), INT64_C( 1971701120159461885), INT64_C(-1168609383370522899)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C( 2811174252033921756), INT64_C( 1971701120159461885), INT64_C( 0)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C(-2603844271228681340), INT64_C( 3101493959844818499), INT64_C( 0)) }, { UINT8_C( 26), simde_mm512_set_epi64(INT64_C( 8946221359026744959), INT64_C( 8570432854894274862), INT64_C(-4670296842224865750), INT64_C(-8736438908262001915), INT64_C(-1516874692875012272), INT64_C( 2654080637722702840), INT64_C(-4444585746033374017), INT64_C( 7620312646179506248)), simde_mm256_set_epi64x(INT64_C(-1516874692875012272), INT64_C( 0), INT64_C(-4444585746033374017), INT64_C( 0)), simde_mm256_set_epi64x(INT64_C( 8946221359026744959), INT64_C( 0), INT64_C(-4670296842224865750), INT64_C( 0)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r; r = simde_mm512_maskz_extracti64x4_epi64(test_vec[i].k, test_vec[i].a, 0); simde_assert_m256i_i64(r, ==, test_vec[i].r0); r = simde_mm512_maskz_extracti64x4_epi64(test_vec[i].k, test_vec[i].a, 1); simde_assert_m256i_i64(r, ==, test_vec[i].r1); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_extractf32x4_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_extractf32x4_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_extractf32x4_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_extractf64x4_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_extractf64x4_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_extractf64x4_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_extracti32x4_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_extracti32x4_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_extracti32x4_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_extracti64x4_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_extracti64x4_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_extracti64x4_epi64) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/fmadd.c000066400000000000000000001626131400333146700165040ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur */ #define SIMDE_TEST_X86_AVX512_INSN fmadd #include #include #include static int test_simde_mm512_fmadd_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 b[16]; const simde_float32 c[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 86.91), SIMDE_FLOAT32_C( -86.70), SIMDE_FLOAT32_C( -99.59), SIMDE_FLOAT32_C( -96.56), SIMDE_FLOAT32_C( 26.42), SIMDE_FLOAT32_C( -58.48), SIMDE_FLOAT32_C( 87.49), SIMDE_FLOAT32_C( -42.24), SIMDE_FLOAT32_C( -29.05), SIMDE_FLOAT32_C( -90.54), SIMDE_FLOAT32_C( -3.16), SIMDE_FLOAT32_C( -0.51), SIMDE_FLOAT32_C( -85.10), SIMDE_FLOAT32_C( 22.86), SIMDE_FLOAT32_C( -29.23), SIMDE_FLOAT32_C( 68.63) }, { SIMDE_FLOAT32_C( -8.87), SIMDE_FLOAT32_C( 38.36), SIMDE_FLOAT32_C( -41.84), SIMDE_FLOAT32_C( -42.50), SIMDE_FLOAT32_C( -40.58), SIMDE_FLOAT32_C( -31.31), SIMDE_FLOAT32_C( 6.98), SIMDE_FLOAT32_C( 83.81), SIMDE_FLOAT32_C( -8.57), SIMDE_FLOAT32_C( -15.34), SIMDE_FLOAT32_C( -23.78), SIMDE_FLOAT32_C( -77.95), SIMDE_FLOAT32_C( -47.16), SIMDE_FLOAT32_C( 79.32), SIMDE_FLOAT32_C( 33.92), SIMDE_FLOAT32_C( -60.25) }, { SIMDE_FLOAT32_C( 926.25), SIMDE_FLOAT32_C( 343.33), SIMDE_FLOAT32_C( -568.12), SIMDE_FLOAT32_C( 190.42), SIMDE_FLOAT32_C( 758.48), SIMDE_FLOAT32_C( -693.26), SIMDE_FLOAT32_C( 768.06), SIMDE_FLOAT32_C( -532.03), SIMDE_FLOAT32_C( -598.68), SIMDE_FLOAT32_C( -263.53), SIMDE_FLOAT32_C( 462.87), SIMDE_FLOAT32_C( -449.64), SIMDE_FLOAT32_C( 965.10), SIMDE_FLOAT32_C( -829.43), SIMDE_FLOAT32_C( -763.34), SIMDE_FLOAT32_C( -123.62) }, { SIMDE_FLOAT32_C( 155.36), SIMDE_FLOAT32_C( -2982.48), SIMDE_FLOAT32_C( 3598.73), SIMDE_FLOAT32_C( 4294.22), SIMDE_FLOAT32_C( -313.64), SIMDE_FLOAT32_C( 1137.75), SIMDE_FLOAT32_C( 1378.74), SIMDE_FLOAT32_C( -4072.16), SIMDE_FLOAT32_C( -349.72), SIMDE_FLOAT32_C( 1125.35), SIMDE_FLOAT32_C( 538.01), SIMDE_FLOAT32_C( -409.89), SIMDE_FLOAT32_C( 4978.42), SIMDE_FLOAT32_C( 983.83), SIMDE_FLOAT32_C( -1754.82), SIMDE_FLOAT32_C( -4258.58) } }, { { SIMDE_FLOAT32_C( 55.42), SIMDE_FLOAT32_C( -18.17), SIMDE_FLOAT32_C( 45.14), SIMDE_FLOAT32_C( -85.16), SIMDE_FLOAT32_C( 50.52), SIMDE_FLOAT32_C( -47.88), SIMDE_FLOAT32_C( 98.65), SIMDE_FLOAT32_C( -58.05), SIMDE_FLOAT32_C( 36.78), SIMDE_FLOAT32_C( -25.13), SIMDE_FLOAT32_C( -36.01), SIMDE_FLOAT32_C( 89.63), SIMDE_FLOAT32_C( -45.81), SIMDE_FLOAT32_C( 97.91), SIMDE_FLOAT32_C( -70.62), SIMDE_FLOAT32_C( -53.18) }, { SIMDE_FLOAT32_C( 32.24), SIMDE_FLOAT32_C( -27.44), SIMDE_FLOAT32_C( 65.86), SIMDE_FLOAT32_C( 8.09), SIMDE_FLOAT32_C( 3.24), SIMDE_FLOAT32_C( 42.66), SIMDE_FLOAT32_C( 54.89), SIMDE_FLOAT32_C( 43.37), SIMDE_FLOAT32_C( -83.69), SIMDE_FLOAT32_C( 1.17), SIMDE_FLOAT32_C( 98.41), SIMDE_FLOAT32_C( -87.18), SIMDE_FLOAT32_C( 18.23), SIMDE_FLOAT32_C( -77.93), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( -26.35) }, { SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( -544.03), SIMDE_FLOAT32_C( -115.13), SIMDE_FLOAT32_C( -455.82), SIMDE_FLOAT32_C( -22.84), SIMDE_FLOAT32_C( -128.64), SIMDE_FLOAT32_C( -36.36), SIMDE_FLOAT32_C( -655.01), SIMDE_FLOAT32_C( 620.05), SIMDE_FLOAT32_C( 603.55), SIMDE_FLOAT32_C( -758.76), SIMDE_FLOAT32_C( -838.04), SIMDE_FLOAT32_C( 582.66), SIMDE_FLOAT32_C( -464.98), SIMDE_FLOAT32_C( -369.88), SIMDE_FLOAT32_C( -94.91) }, { SIMDE_FLOAT32_C( 1825.75), SIMDE_FLOAT32_C( -45.45), SIMDE_FLOAT32_C( 2857.79), SIMDE_FLOAT32_C( -1144.76), SIMDE_FLOAT32_C( 140.84), SIMDE_FLOAT32_C( -2171.20), SIMDE_FLOAT32_C( 5378.54), SIMDE_FLOAT32_C( -3172.64), SIMDE_FLOAT32_C( -2458.07), SIMDE_FLOAT32_C( 574.15), SIMDE_FLOAT32_C( -4302.50), SIMDE_FLOAT32_C( -8651.98), SIMDE_FLOAT32_C( -252.46), SIMDE_FLOAT32_C( -8095.11), SIMDE_FLOAT32_C( -402.37), SIMDE_FLOAT32_C( 1306.38) } }, { { SIMDE_FLOAT32_C( 26.07), SIMDE_FLOAT32_C( -71.13), SIMDE_FLOAT32_C( 98.60), SIMDE_FLOAT32_C( -70.70), SIMDE_FLOAT32_C( 71.53), SIMDE_FLOAT32_C( 53.49), SIMDE_FLOAT32_C( 72.68), SIMDE_FLOAT32_C( 87.84), SIMDE_FLOAT32_C( -45.34), SIMDE_FLOAT32_C( 71.08), SIMDE_FLOAT32_C( -99.33), SIMDE_FLOAT32_C( 72.89), SIMDE_FLOAT32_C( 93.15), SIMDE_FLOAT32_C( 1.12), SIMDE_FLOAT32_C( -53.46), SIMDE_FLOAT32_C( -2.95) }, { SIMDE_FLOAT32_C( 46.72), SIMDE_FLOAT32_C( 35.03), SIMDE_FLOAT32_C( 51.47), SIMDE_FLOAT32_C( -55.56), SIMDE_FLOAT32_C( -77.83), SIMDE_FLOAT32_C( -52.16), SIMDE_FLOAT32_C( -21.06), SIMDE_FLOAT32_C( 84.17), SIMDE_FLOAT32_C( -91.81), SIMDE_FLOAT32_C( 3.06), SIMDE_FLOAT32_C( -99.63), SIMDE_FLOAT32_C( 66.46), SIMDE_FLOAT32_C( 56.56), SIMDE_FLOAT32_C( -36.62), SIMDE_FLOAT32_C( -43.03), SIMDE_FLOAT32_C( -17.37) }, { SIMDE_FLOAT32_C( -77.52), SIMDE_FLOAT32_C( -444.33), SIMDE_FLOAT32_C( 119.34), SIMDE_FLOAT32_C( -362.18), SIMDE_FLOAT32_C( -909.45), SIMDE_FLOAT32_C( -153.91), SIMDE_FLOAT32_C( -483.73), SIMDE_FLOAT32_C( -362.83), SIMDE_FLOAT32_C( -443.10), SIMDE_FLOAT32_C( -477.07), SIMDE_FLOAT32_C( -633.88), SIMDE_FLOAT32_C( -511.56), SIMDE_FLOAT32_C( 534.17), SIMDE_FLOAT32_C( -168.46), SIMDE_FLOAT32_C( 458.99), SIMDE_FLOAT32_C( 1.39) }, { SIMDE_FLOAT32_C( 1140.47), SIMDE_FLOAT32_C( -2936.01), SIMDE_FLOAT32_C( 5194.28), SIMDE_FLOAT32_C( 3565.91), SIMDE_FLOAT32_C( -6476.63), SIMDE_FLOAT32_C( -2943.95), SIMDE_FLOAT32_C( -2014.37), SIMDE_FLOAT32_C( 7030.66), SIMDE_FLOAT32_C( 3719.57), SIMDE_FLOAT32_C( -259.57), SIMDE_FLOAT32_C( 9262.37), SIMDE_FLOAT32_C( 4332.71), SIMDE_FLOAT32_C( 5802.73), SIMDE_FLOAT32_C( -209.47), SIMDE_FLOAT32_C( 2759.37), SIMDE_FLOAT32_C( 52.63) } }, { { SIMDE_FLOAT32_C( -81.82), SIMDE_FLOAT32_C( -2.63), SIMDE_FLOAT32_C( 44.58), SIMDE_FLOAT32_C( -59.65), SIMDE_FLOAT32_C( 45.21), SIMDE_FLOAT32_C( -76.49), SIMDE_FLOAT32_C( -75.48), SIMDE_FLOAT32_C( 53.40), SIMDE_FLOAT32_C( 26.57), SIMDE_FLOAT32_C( -75.11), SIMDE_FLOAT32_C( 19.86), SIMDE_FLOAT32_C( -16.86), SIMDE_FLOAT32_C( -11.73), SIMDE_FLOAT32_C( 76.83), SIMDE_FLOAT32_C( 65.77), SIMDE_FLOAT32_C( 80.51) }, { SIMDE_FLOAT32_C( -67.61), SIMDE_FLOAT32_C( -22.30), SIMDE_FLOAT32_C( -55.70), SIMDE_FLOAT32_C( -58.55), SIMDE_FLOAT32_C( 62.31), SIMDE_FLOAT32_C( -4.08), SIMDE_FLOAT32_C( 5.17), SIMDE_FLOAT32_C( -82.00), SIMDE_FLOAT32_C( 48.22), SIMDE_FLOAT32_C( 41.78), SIMDE_FLOAT32_C( -33.16), SIMDE_FLOAT32_C( 1.63), SIMDE_FLOAT32_C( -75.07), SIMDE_FLOAT32_C( -87.26), SIMDE_FLOAT32_C( -98.23), SIMDE_FLOAT32_C( -56.88) }, { SIMDE_FLOAT32_C( 101.14), SIMDE_FLOAT32_C( 463.49), SIMDE_FLOAT32_C( -165.36), SIMDE_FLOAT32_C( -446.77), SIMDE_FLOAT32_C( 698.62), SIMDE_FLOAT32_C( 79.84), SIMDE_FLOAT32_C( -912.75), SIMDE_FLOAT32_C( -35.63), SIMDE_FLOAT32_C( 328.70), SIMDE_FLOAT32_C( 285.86), SIMDE_FLOAT32_C( 795.73), SIMDE_FLOAT32_C( -788.64), SIMDE_FLOAT32_C( 54.13), SIMDE_FLOAT32_C( 453.39), SIMDE_FLOAT32_C( -983.51), SIMDE_FLOAT32_C( 378.08) }, { SIMDE_FLOAT32_C( 5632.99), SIMDE_FLOAT32_C( 522.14), SIMDE_FLOAT32_C( -2648.47), SIMDE_FLOAT32_C( 3045.74), SIMDE_FLOAT32_C( 3515.66), SIMDE_FLOAT32_C( 391.92), SIMDE_FLOAT32_C( -1302.98), SIMDE_FLOAT32_C( -4414.43), SIMDE_FLOAT32_C( 1609.91), SIMDE_FLOAT32_C( -2852.24), SIMDE_FLOAT32_C( 137.17), SIMDE_FLOAT32_C( -816.12), SIMDE_FLOAT32_C( 934.70), SIMDE_FLOAT32_C( -6250.80), SIMDE_FLOAT32_C( -7444.10), SIMDE_FLOAT32_C( -4201.33) } }, { { SIMDE_FLOAT32_C( -76.96), SIMDE_FLOAT32_C( -54.06), SIMDE_FLOAT32_C( 79.26), SIMDE_FLOAT32_C( 85.35), SIMDE_FLOAT32_C( 41.87), SIMDE_FLOAT32_C( -15.58), SIMDE_FLOAT32_C( -96.66), SIMDE_FLOAT32_C( -9.92), SIMDE_FLOAT32_C( -73.80), SIMDE_FLOAT32_C( -29.81), SIMDE_FLOAT32_C( 91.72), SIMDE_FLOAT32_C( -48.87), SIMDE_FLOAT32_C( -17.07), SIMDE_FLOAT32_C( 93.49), SIMDE_FLOAT32_C( -5.75), SIMDE_FLOAT32_C( 93.04) }, { SIMDE_FLOAT32_C( 39.84), SIMDE_FLOAT32_C( 77.71), SIMDE_FLOAT32_C( -51.63), SIMDE_FLOAT32_C( 9.70), SIMDE_FLOAT32_C( -14.30), SIMDE_FLOAT32_C( -42.91), SIMDE_FLOAT32_C( -93.87), SIMDE_FLOAT32_C( -81.43), SIMDE_FLOAT32_C( 85.68), SIMDE_FLOAT32_C( 85.71), SIMDE_FLOAT32_C( -60.30), SIMDE_FLOAT32_C( -8.91), SIMDE_FLOAT32_C( 31.05), SIMDE_FLOAT32_C( -58.65), SIMDE_FLOAT32_C( -71.10), SIMDE_FLOAT32_C( 54.08) }, { SIMDE_FLOAT32_C( -127.02), SIMDE_FLOAT32_C( -918.44), SIMDE_FLOAT32_C( 394.30), SIMDE_FLOAT32_C( -708.34), SIMDE_FLOAT32_C( -74.20), SIMDE_FLOAT32_C( 427.75), SIMDE_FLOAT32_C( 192.49), SIMDE_FLOAT32_C( 187.82), SIMDE_FLOAT32_C( -870.38), SIMDE_FLOAT32_C( 109.64), SIMDE_FLOAT32_C( 699.16), SIMDE_FLOAT32_C( -41.09), SIMDE_FLOAT32_C( 44.51), SIMDE_FLOAT32_C( -358.33), SIMDE_FLOAT32_C( -110.66), SIMDE_FLOAT32_C( -557.13) }, { SIMDE_FLOAT32_C( -3193.11), SIMDE_FLOAT32_C( -5119.44), SIMDE_FLOAT32_C( -3697.89), SIMDE_FLOAT32_C( 119.55), SIMDE_FLOAT32_C( -672.94), SIMDE_FLOAT32_C( 1096.29), SIMDE_FLOAT32_C( 9265.96), SIMDE_FLOAT32_C( 995.61), SIMDE_FLOAT32_C( -7193.56), SIMDE_FLOAT32_C( -2445.38), SIMDE_FLOAT32_C( -4831.56), SIMDE_FLOAT32_C( 394.34), SIMDE_FLOAT32_C( -485.51), SIMDE_FLOAT32_C( -5841.52), SIMDE_FLOAT32_C( 298.16), SIMDE_FLOAT32_C( 4474.47) } }, { { SIMDE_FLOAT32_C( -58.12), SIMDE_FLOAT32_C( 37.30), SIMDE_FLOAT32_C( 53.98), SIMDE_FLOAT32_C( 27.58), SIMDE_FLOAT32_C( 94.39), SIMDE_FLOAT32_C( 60.12), SIMDE_FLOAT32_C( 46.15), SIMDE_FLOAT32_C( 80.07), SIMDE_FLOAT32_C( 45.83), SIMDE_FLOAT32_C( 85.85), SIMDE_FLOAT32_C( -28.84), SIMDE_FLOAT32_C( -23.13), SIMDE_FLOAT32_C( -72.79), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( -69.04), SIMDE_FLOAT32_C( 14.50) }, { SIMDE_FLOAT32_C( 8.21), SIMDE_FLOAT32_C( 70.39), SIMDE_FLOAT32_C( 43.67), SIMDE_FLOAT32_C( -99.21), SIMDE_FLOAT32_C( 13.16), SIMDE_FLOAT32_C( -37.08), SIMDE_FLOAT32_C( 19.57), SIMDE_FLOAT32_C( 26.13), SIMDE_FLOAT32_C( 73.88), SIMDE_FLOAT32_C( -10.51), SIMDE_FLOAT32_C( -77.98), SIMDE_FLOAT32_C( -21.67), SIMDE_FLOAT32_C( 53.66), SIMDE_FLOAT32_C( 10.95), SIMDE_FLOAT32_C( 22.62), SIMDE_FLOAT32_C( 95.54) }, { SIMDE_FLOAT32_C( -517.50), SIMDE_FLOAT32_C( -233.95), SIMDE_FLOAT32_C( 231.18), SIMDE_FLOAT32_C( -573.59), SIMDE_FLOAT32_C( -632.76), SIMDE_FLOAT32_C( -307.34), SIMDE_FLOAT32_C( -772.91), SIMDE_FLOAT32_C( 825.52), SIMDE_FLOAT32_C( -448.81), SIMDE_FLOAT32_C( -61.32), SIMDE_FLOAT32_C( -405.74), SIMDE_FLOAT32_C( -176.75), SIMDE_FLOAT32_C( 939.26), SIMDE_FLOAT32_C( -96.15), SIMDE_FLOAT32_C( 968.29), SIMDE_FLOAT32_C( 21.39) }, { SIMDE_FLOAT32_C( -994.67), SIMDE_FLOAT32_C( 2391.60), SIMDE_FLOAT32_C( 2588.49), SIMDE_FLOAT32_C( -3309.80), SIMDE_FLOAT32_C( 609.41), SIMDE_FLOAT32_C( -2536.59), SIMDE_FLOAT32_C( 130.25), SIMDE_FLOAT32_C( 2917.75), SIMDE_FLOAT32_C( 2937.11), SIMDE_FLOAT32_C( -963.60), SIMDE_FLOAT32_C( 1843.20), SIMDE_FLOAT32_C( 324.48), SIMDE_FLOAT32_C( -2966.65), SIMDE_FLOAT32_C( -95.49), SIMDE_FLOAT32_C( -593.39), SIMDE_FLOAT32_C( 1406.72) } }, { { SIMDE_FLOAT32_C( -39.23), SIMDE_FLOAT32_C( 40.50), SIMDE_FLOAT32_C( 2.93), SIMDE_FLOAT32_C( 73.94), SIMDE_FLOAT32_C( -96.58), SIMDE_FLOAT32_C( -77.49), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 77.30), SIMDE_FLOAT32_C( 12.00), SIMDE_FLOAT32_C( 22.08), SIMDE_FLOAT32_C( -44.36), SIMDE_FLOAT32_C( -34.35), SIMDE_FLOAT32_C( -66.97), SIMDE_FLOAT32_C( 78.26), SIMDE_FLOAT32_C( -38.81), SIMDE_FLOAT32_C( -18.72) }, { SIMDE_FLOAT32_C( -45.14), SIMDE_FLOAT32_C( 84.31), SIMDE_FLOAT32_C( 23.92), SIMDE_FLOAT32_C( -8.41), SIMDE_FLOAT32_C( -46.42), SIMDE_FLOAT32_C( 46.63), SIMDE_FLOAT32_C( -25.86), SIMDE_FLOAT32_C( 8.70), SIMDE_FLOAT32_C( -59.50), SIMDE_FLOAT32_C( 33.56), SIMDE_FLOAT32_C( 91.02), SIMDE_FLOAT32_C( -65.58), SIMDE_FLOAT32_C( -76.05), SIMDE_FLOAT32_C( 87.85), SIMDE_FLOAT32_C( 36.56), SIMDE_FLOAT32_C( -15.28) }, { SIMDE_FLOAT32_C( 283.51), SIMDE_FLOAT32_C( -605.06), SIMDE_FLOAT32_C( -413.40), SIMDE_FLOAT32_C( 317.70), SIMDE_FLOAT32_C( -379.99), SIMDE_FLOAT32_C( 587.24), SIMDE_FLOAT32_C( 90.72), SIMDE_FLOAT32_C( 739.98), SIMDE_FLOAT32_C( -191.96), SIMDE_FLOAT32_C( 647.07), SIMDE_FLOAT32_C( -603.47), SIMDE_FLOAT32_C( 138.34), SIMDE_FLOAT32_C( 429.62), SIMDE_FLOAT32_C( 8.46), SIMDE_FLOAT32_C( 951.13), SIMDE_FLOAT32_C( 978.23) }, { SIMDE_FLOAT32_C( 2054.35), SIMDE_FLOAT32_C( 2809.49), SIMDE_FLOAT32_C( -343.31), SIMDE_FLOAT32_C( -304.14), SIMDE_FLOAT32_C( 4103.25), SIMDE_FLOAT32_C( -3026.12), SIMDE_FLOAT32_C( 89.17), SIMDE_FLOAT32_C( 1412.49), SIMDE_FLOAT32_C( -905.96), SIMDE_FLOAT32_C( 1388.07), SIMDE_FLOAT32_C( -4641.12), SIMDE_FLOAT32_C( 2391.01), SIMDE_FLOAT32_C( 5522.69), SIMDE_FLOAT32_C( 6883.60), SIMDE_FLOAT32_C( -467.76), SIMDE_FLOAT32_C( 1264.27) } }, { { SIMDE_FLOAT32_C( -14.84), SIMDE_FLOAT32_C( 19.03), SIMDE_FLOAT32_C( -10.59), SIMDE_FLOAT32_C( 38.74), SIMDE_FLOAT32_C( -34.34), SIMDE_FLOAT32_C( 63.55), SIMDE_FLOAT32_C( -52.57), SIMDE_FLOAT32_C( 6.16), SIMDE_FLOAT32_C( -2.89), SIMDE_FLOAT32_C( -61.55), SIMDE_FLOAT32_C( 40.58), SIMDE_FLOAT32_C( 21.06), SIMDE_FLOAT32_C( -73.69), SIMDE_FLOAT32_C( -22.85), SIMDE_FLOAT32_C( -94.22), SIMDE_FLOAT32_C( 54.66) }, { SIMDE_FLOAT32_C( 16.64), SIMDE_FLOAT32_C( -35.56), SIMDE_FLOAT32_C( -13.57), SIMDE_FLOAT32_C( 78.64), SIMDE_FLOAT32_C( -76.84), SIMDE_FLOAT32_C( 95.50), SIMDE_FLOAT32_C( 52.64), SIMDE_FLOAT32_C( 3.97), SIMDE_FLOAT32_C( 60.20), SIMDE_FLOAT32_C( 92.29), SIMDE_FLOAT32_C( -82.20), SIMDE_FLOAT32_C( 3.17), SIMDE_FLOAT32_C( -6.86), SIMDE_FLOAT32_C( -87.09), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 78.29) }, { SIMDE_FLOAT32_C( 319.47), SIMDE_FLOAT32_C( 903.98), SIMDE_FLOAT32_C( 170.30), SIMDE_FLOAT32_C( 976.10), SIMDE_FLOAT32_C( 539.44), SIMDE_FLOAT32_C( 644.62), SIMDE_FLOAT32_C( 37.70), SIMDE_FLOAT32_C( -489.48), SIMDE_FLOAT32_C( -970.84), SIMDE_FLOAT32_C( -556.47), SIMDE_FLOAT32_C( 721.09), SIMDE_FLOAT32_C( -707.78), SIMDE_FLOAT32_C( 214.99), SIMDE_FLOAT32_C( 778.88), SIMDE_FLOAT32_C( 838.78), SIMDE_FLOAT32_C( -618.61) }, { SIMDE_FLOAT32_C( 72.53), SIMDE_FLOAT32_C( 227.27), SIMDE_FLOAT32_C( 314.01), SIMDE_FLOAT32_C( 4022.61), SIMDE_FLOAT32_C( 3178.13), SIMDE_FLOAT32_C( 6713.65), SIMDE_FLOAT32_C( -2729.58), SIMDE_FLOAT32_C( -465.02), SIMDE_FLOAT32_C( -1144.82), SIMDE_FLOAT32_C( -6236.92), SIMDE_FLOAT32_C( -2614.59), SIMDE_FLOAT32_C( -641.02), SIMDE_FLOAT32_C( 720.50), SIMDE_FLOAT32_C( 2768.89), SIMDE_FLOAT32_C( 745.50), SIMDE_FLOAT32_C( 3660.72) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 c = simde_mm512_loadu_ps(test_vec[i].c); simde__m512 r = simde_mm512_fmadd_ps(a, b, c); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_fmadd_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde__mmask8 k; const simde_float32 b[16]; const simde_float32 c[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 438.16), SIMDE_FLOAT32_C( 227.24), SIMDE_FLOAT32_C( 799.36), SIMDE_FLOAT32_C( 426.66), SIMDE_FLOAT32_C( 500.62), SIMDE_FLOAT32_C( 608.47), SIMDE_FLOAT32_C( -161.69), SIMDE_FLOAT32_C( 411.36), SIMDE_FLOAT32_C( -860.72), SIMDE_FLOAT32_C( -33.50), SIMDE_FLOAT32_C( 31.57), SIMDE_FLOAT32_C( -730.88), SIMDE_FLOAT32_C( -717.60), SIMDE_FLOAT32_C( 156.67), SIMDE_FLOAT32_C( 890.78), SIMDE_FLOAT32_C( 110.30) }, UINT8_C(213), { SIMDE_FLOAT32_C( -29.44), SIMDE_FLOAT32_C( -9.88), SIMDE_FLOAT32_C( -43.91), SIMDE_FLOAT32_C( 34.52), SIMDE_FLOAT32_C( -56.88), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( -37.10), SIMDE_FLOAT32_C( 14.16), SIMDE_FLOAT32_C( -20.56), SIMDE_FLOAT32_C( -7.94), SIMDE_FLOAT32_C( 10.90), SIMDE_FLOAT32_C( 73.59), SIMDE_FLOAT32_C( -31.00), SIMDE_FLOAT32_C( -73.00), SIMDE_FLOAT32_C( 17.41), SIMDE_FLOAT32_C( 91.72) }, { SIMDE_FLOAT32_C( -930.67), SIMDE_FLOAT32_C( -399.24), SIMDE_FLOAT32_C( 417.85), SIMDE_FLOAT32_C( 677.80), SIMDE_FLOAT32_C( 439.07), SIMDE_FLOAT32_C( -170.79), SIMDE_FLOAT32_C( 817.08), SIMDE_FLOAT32_C( -594.43), SIMDE_FLOAT32_C( 860.77), SIMDE_FLOAT32_C( -913.80), SIMDE_FLOAT32_C( -312.03), SIMDE_FLOAT32_C( 17.44), SIMDE_FLOAT32_C( 976.98), SIMDE_FLOAT32_C( 798.27), SIMDE_FLOAT32_C( 96.05), SIMDE_FLOAT32_C( -317.46) }, { SIMDE_FLOAT32_C(-13830.10), SIMDE_FLOAT32_C( 227.24), SIMDE_FLOAT32_C(-34682.05), SIMDE_FLOAT32_C( 426.66), SIMDE_FLOAT32_C(-28036.20), SIMDE_FLOAT32_C( 608.47), SIMDE_FLOAT32_C( 6815.78), SIMDE_FLOAT32_C( 5230.43), SIMDE_FLOAT32_C( -860.72), SIMDE_FLOAT32_C( -33.50), SIMDE_FLOAT32_C( 31.57), SIMDE_FLOAT32_C( -730.88), SIMDE_FLOAT32_C( -717.60), SIMDE_FLOAT32_C( 156.67), SIMDE_FLOAT32_C( 890.78), SIMDE_FLOAT32_C( 110.30) } }, { { SIMDE_FLOAT32_C( -300.53), SIMDE_FLOAT32_C( 656.99), SIMDE_FLOAT32_C( -972.24), SIMDE_FLOAT32_C( 130.63), SIMDE_FLOAT32_C( -342.25), SIMDE_FLOAT32_C( -343.28), SIMDE_FLOAT32_C( -727.81), SIMDE_FLOAT32_C( 452.11), SIMDE_FLOAT32_C( 577.27), SIMDE_FLOAT32_C( 381.21), SIMDE_FLOAT32_C( 188.06), SIMDE_FLOAT32_C( -732.74), SIMDE_FLOAT32_C( 651.17), SIMDE_FLOAT32_C( -637.83), SIMDE_FLOAT32_C( -815.51), SIMDE_FLOAT32_C( 720.50) }, UINT8_C(204), { SIMDE_FLOAT32_C( 60.23), SIMDE_FLOAT32_C( 39.83), SIMDE_FLOAT32_C( -59.80), SIMDE_FLOAT32_C( -56.85), SIMDE_FLOAT32_C( 21.54), SIMDE_FLOAT32_C( -19.24), SIMDE_FLOAT32_C( -70.77), SIMDE_FLOAT32_C( 30.16), SIMDE_FLOAT32_C( 49.55), SIMDE_FLOAT32_C( 30.98), SIMDE_FLOAT32_C( 27.86), SIMDE_FLOAT32_C( 29.38), SIMDE_FLOAT32_C( -59.42), SIMDE_FLOAT32_C( 96.11), SIMDE_FLOAT32_C( 99.33), SIMDE_FLOAT32_C( -93.72) }, { SIMDE_FLOAT32_C( 988.87), SIMDE_FLOAT32_C( 123.92), SIMDE_FLOAT32_C( -279.45), SIMDE_FLOAT32_C( -354.42), SIMDE_FLOAT32_C( 396.10), SIMDE_FLOAT32_C( -827.34), SIMDE_FLOAT32_C( -777.14), SIMDE_FLOAT32_C( -222.69), SIMDE_FLOAT32_C( 360.72), SIMDE_FLOAT32_C( -509.88), SIMDE_FLOAT32_C( -571.52), SIMDE_FLOAT32_C( 722.89), SIMDE_FLOAT32_C( -325.39), SIMDE_FLOAT32_C( -851.01), SIMDE_FLOAT32_C( -314.17), SIMDE_FLOAT32_C( -723.05) }, { SIMDE_FLOAT32_C( -300.53), SIMDE_FLOAT32_C( 656.99), SIMDE_FLOAT32_C( 57860.50), SIMDE_FLOAT32_C( -7780.74), SIMDE_FLOAT32_C( -342.25), SIMDE_FLOAT32_C( -343.28), SIMDE_FLOAT32_C( 50729.97), SIMDE_FLOAT32_C( 13412.95), SIMDE_FLOAT32_C( 577.27), SIMDE_FLOAT32_C( 381.21), SIMDE_FLOAT32_C( 188.06), SIMDE_FLOAT32_C( -732.74), SIMDE_FLOAT32_C( 651.17), SIMDE_FLOAT32_C( -637.83), SIMDE_FLOAT32_C( -815.51), SIMDE_FLOAT32_C( 720.50) } }, { { SIMDE_FLOAT32_C( 547.29), SIMDE_FLOAT32_C( 87.84), SIMDE_FLOAT32_C( -291.51), SIMDE_FLOAT32_C( -237.32), SIMDE_FLOAT32_C( 895.42), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( -935.74), SIMDE_FLOAT32_C( 390.97), SIMDE_FLOAT32_C( -689.43), SIMDE_FLOAT32_C( 342.83), SIMDE_FLOAT32_C( -315.22), SIMDE_FLOAT32_C( -283.62), SIMDE_FLOAT32_C( 303.94), SIMDE_FLOAT32_C( -321.93), SIMDE_FLOAT32_C( -220.83), SIMDE_FLOAT32_C( 292.81) }, UINT8_C( 14), { SIMDE_FLOAT32_C( 49.97), SIMDE_FLOAT32_C( 93.84), SIMDE_FLOAT32_C( 19.81), SIMDE_FLOAT32_C( 67.24), SIMDE_FLOAT32_C( -83.88), SIMDE_FLOAT32_C( 97.54), SIMDE_FLOAT32_C( 3.31), SIMDE_FLOAT32_C( -34.86), SIMDE_FLOAT32_C( -59.61), SIMDE_FLOAT32_C( -24.40), SIMDE_FLOAT32_C( 32.60), SIMDE_FLOAT32_C( -44.71), SIMDE_FLOAT32_C( 44.18), SIMDE_FLOAT32_C( 60.29), SIMDE_FLOAT32_C( -89.98), SIMDE_FLOAT32_C( -47.03) }, { SIMDE_FLOAT32_C( -688.58), SIMDE_FLOAT32_C( -137.17), SIMDE_FLOAT32_C( -574.91), SIMDE_FLOAT32_C( 312.23), SIMDE_FLOAT32_C( -72.90), SIMDE_FLOAT32_C( 816.06), SIMDE_FLOAT32_C( 622.80), SIMDE_FLOAT32_C( -730.07), SIMDE_FLOAT32_C( -499.16), SIMDE_FLOAT32_C( -660.83), SIMDE_FLOAT32_C( 573.87), SIMDE_FLOAT32_C( 178.90), SIMDE_FLOAT32_C( 118.35), SIMDE_FLOAT32_C( -133.32), SIMDE_FLOAT32_C( -19.11), SIMDE_FLOAT32_C( -381.93) }, { SIMDE_FLOAT32_C( 547.29), SIMDE_FLOAT32_C( 8105.73), SIMDE_FLOAT32_C( -6349.72), SIMDE_FLOAT32_C(-15645.17), SIMDE_FLOAT32_C( 895.42), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( -935.74), SIMDE_FLOAT32_C( 390.97), SIMDE_FLOAT32_C( -689.43), SIMDE_FLOAT32_C( 342.83), SIMDE_FLOAT32_C( -315.22), SIMDE_FLOAT32_C( -283.62), SIMDE_FLOAT32_C( 303.94), SIMDE_FLOAT32_C( -321.93), SIMDE_FLOAT32_C( -220.83), SIMDE_FLOAT32_C( 292.81) } }, { { SIMDE_FLOAT32_C( -194.93), SIMDE_FLOAT32_C( -821.03), SIMDE_FLOAT32_C( -709.55), SIMDE_FLOAT32_C( -33.68), SIMDE_FLOAT32_C( -845.63), SIMDE_FLOAT32_C( 323.55), SIMDE_FLOAT32_C( 617.68), SIMDE_FLOAT32_C( -441.75), SIMDE_FLOAT32_C( -920.45), SIMDE_FLOAT32_C( -56.34), SIMDE_FLOAT32_C( 111.11), SIMDE_FLOAT32_C( 521.37), SIMDE_FLOAT32_C( -453.42), SIMDE_FLOAT32_C( 211.27), SIMDE_FLOAT32_C( -948.96), SIMDE_FLOAT32_C( -142.01) }, UINT8_C(248), { SIMDE_FLOAT32_C( -52.39), SIMDE_FLOAT32_C( -82.98), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( -70.78), SIMDE_FLOAT32_C( 79.30), SIMDE_FLOAT32_C( 27.11), SIMDE_FLOAT32_C( -20.70), SIMDE_FLOAT32_C( -86.78), SIMDE_FLOAT32_C( -15.50), SIMDE_FLOAT32_C( 97.19), SIMDE_FLOAT32_C( 25.05), SIMDE_FLOAT32_C( 71.17), SIMDE_FLOAT32_C( -4.72), SIMDE_FLOAT32_C( 86.86), SIMDE_FLOAT32_C( -48.33), SIMDE_FLOAT32_C( 13.18) }, { SIMDE_FLOAT32_C( -840.95), SIMDE_FLOAT32_C( 483.06), SIMDE_FLOAT32_C( 286.15), SIMDE_FLOAT32_C( 482.60), SIMDE_FLOAT32_C( 100.74), SIMDE_FLOAT32_C( 844.40), SIMDE_FLOAT32_C( 562.15), SIMDE_FLOAT32_C( -955.60), SIMDE_FLOAT32_C( -44.49), SIMDE_FLOAT32_C( 83.52), SIMDE_FLOAT32_C( -409.02), SIMDE_FLOAT32_C( -833.21), SIMDE_FLOAT32_C( 134.56), SIMDE_FLOAT32_C( 448.97), SIMDE_FLOAT32_C( -759.11), SIMDE_FLOAT32_C( 610.69) }, { SIMDE_FLOAT32_C( -194.93), SIMDE_FLOAT32_C( -821.03), SIMDE_FLOAT32_C( -709.55), SIMDE_FLOAT32_C( 2866.47), SIMDE_FLOAT32_C(-66957.72), SIMDE_FLOAT32_C( 9615.84), SIMDE_FLOAT32_C(-12223.83), SIMDE_FLOAT32_C( 37379.46), SIMDE_FLOAT32_C( -920.45), SIMDE_FLOAT32_C( -56.34), SIMDE_FLOAT32_C( 111.11), SIMDE_FLOAT32_C( 521.37), SIMDE_FLOAT32_C( -453.42), SIMDE_FLOAT32_C( 211.27), SIMDE_FLOAT32_C( -948.96), SIMDE_FLOAT32_C( -142.01) } }, { { SIMDE_FLOAT32_C( 619.19), SIMDE_FLOAT32_C( 242.10), SIMDE_FLOAT32_C( 902.88), SIMDE_FLOAT32_C( 412.21), SIMDE_FLOAT32_C( -486.77), SIMDE_FLOAT32_C( -304.10), SIMDE_FLOAT32_C( 544.40), SIMDE_FLOAT32_C( 358.23), SIMDE_FLOAT32_C( -332.18), SIMDE_FLOAT32_C( -205.06), SIMDE_FLOAT32_C( 69.91), SIMDE_FLOAT32_C( 620.63), SIMDE_FLOAT32_C( -336.46), SIMDE_FLOAT32_C( 586.66), SIMDE_FLOAT32_C( -247.59), SIMDE_FLOAT32_C( -177.40) }, UINT8_C( 4), { SIMDE_FLOAT32_C( -96.14), SIMDE_FLOAT32_C( -69.48), SIMDE_FLOAT32_C( -82.95), SIMDE_FLOAT32_C( 88.30), SIMDE_FLOAT32_C( 86.73), SIMDE_FLOAT32_C( -78.51), SIMDE_FLOAT32_C( -16.15), SIMDE_FLOAT32_C( -4.91), SIMDE_FLOAT32_C( -19.42), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( -91.46), SIMDE_FLOAT32_C( -74.52), SIMDE_FLOAT32_C( 24.62), SIMDE_FLOAT32_C( 69.61), SIMDE_FLOAT32_C( 87.40), SIMDE_FLOAT32_C( -51.17) }, { SIMDE_FLOAT32_C( 598.99), SIMDE_FLOAT32_C( 286.21), SIMDE_FLOAT32_C( 1.49), SIMDE_FLOAT32_C( -705.11), SIMDE_FLOAT32_C( -169.39), SIMDE_FLOAT32_C( -640.28), SIMDE_FLOAT32_C( -37.29), SIMDE_FLOAT32_C( 625.55), SIMDE_FLOAT32_C( 429.63), SIMDE_FLOAT32_C( -416.66), SIMDE_FLOAT32_C( -710.90), SIMDE_FLOAT32_C( 16.29), SIMDE_FLOAT32_C( 335.76), SIMDE_FLOAT32_C( 111.69), SIMDE_FLOAT32_C( -913.99), SIMDE_FLOAT32_C( 374.32) }, { SIMDE_FLOAT32_C( 619.19), SIMDE_FLOAT32_C( 242.10), SIMDE_FLOAT32_C(-74892.41), SIMDE_FLOAT32_C( 412.21), SIMDE_FLOAT32_C( -486.77), SIMDE_FLOAT32_C( -304.10), SIMDE_FLOAT32_C( 544.40), SIMDE_FLOAT32_C( 358.23), SIMDE_FLOAT32_C( -332.18), SIMDE_FLOAT32_C( -205.06), SIMDE_FLOAT32_C( 69.91), SIMDE_FLOAT32_C( 620.63), SIMDE_FLOAT32_C( -336.46), SIMDE_FLOAT32_C( 586.66), SIMDE_FLOAT32_C( -247.59), SIMDE_FLOAT32_C( -177.40) } }, { { SIMDE_FLOAT32_C( 416.89), SIMDE_FLOAT32_C( -743.53), SIMDE_FLOAT32_C( 257.29), SIMDE_FLOAT32_C( 284.23), SIMDE_FLOAT32_C( -528.66), SIMDE_FLOAT32_C( -904.23), SIMDE_FLOAT32_C( -764.90), SIMDE_FLOAT32_C( 277.18), SIMDE_FLOAT32_C( 101.04), SIMDE_FLOAT32_C( -679.48), SIMDE_FLOAT32_C( 531.99), SIMDE_FLOAT32_C( -652.80), SIMDE_FLOAT32_C( -983.36), SIMDE_FLOAT32_C( 405.99), SIMDE_FLOAT32_C( -164.55), SIMDE_FLOAT32_C( 615.63) }, UINT8_C(234), { SIMDE_FLOAT32_C( 83.69), SIMDE_FLOAT32_C( 91.05), SIMDE_FLOAT32_C( 52.28), SIMDE_FLOAT32_C( -80.33), SIMDE_FLOAT32_C( -12.68), SIMDE_FLOAT32_C( 14.84), SIMDE_FLOAT32_C( 62.63), SIMDE_FLOAT32_C( 45.66), SIMDE_FLOAT32_C( 43.75), SIMDE_FLOAT32_C( -35.74), SIMDE_FLOAT32_C( -20.77), SIMDE_FLOAT32_C( -45.08), SIMDE_FLOAT32_C( -27.14), SIMDE_FLOAT32_C( -83.33), SIMDE_FLOAT32_C( 96.61), SIMDE_FLOAT32_C( -1.49) }, { SIMDE_FLOAT32_C( 423.95), SIMDE_FLOAT32_C( 250.29), SIMDE_FLOAT32_C( 456.40), SIMDE_FLOAT32_C( 519.71), SIMDE_FLOAT32_C( 485.39), SIMDE_FLOAT32_C( -266.42), SIMDE_FLOAT32_C( -379.25), SIMDE_FLOAT32_C( 805.92), SIMDE_FLOAT32_C( -734.43), SIMDE_FLOAT32_C( -32.05), SIMDE_FLOAT32_C( 822.56), SIMDE_FLOAT32_C( 671.56), SIMDE_FLOAT32_C( 803.40), SIMDE_FLOAT32_C( 438.19), SIMDE_FLOAT32_C( -636.23), SIMDE_FLOAT32_C( 640.34) }, { SIMDE_FLOAT32_C( 416.89), SIMDE_FLOAT32_C(-67448.12), SIMDE_FLOAT32_C( 257.29), SIMDE_FLOAT32_C(-22312.49), SIMDE_FLOAT32_C( -528.66), SIMDE_FLOAT32_C(-13685.19), SIMDE_FLOAT32_C(-48284.94), SIMDE_FLOAT32_C( 13461.96), SIMDE_FLOAT32_C( 101.04), SIMDE_FLOAT32_C( -679.48), SIMDE_FLOAT32_C( 531.99), SIMDE_FLOAT32_C( -652.80), SIMDE_FLOAT32_C( -983.36), SIMDE_FLOAT32_C( 405.99), SIMDE_FLOAT32_C( -164.55), SIMDE_FLOAT32_C( 615.63) } }, { { SIMDE_FLOAT32_C( 348.71), SIMDE_FLOAT32_C( 886.59), SIMDE_FLOAT32_C( 837.01), SIMDE_FLOAT32_C( -778.06), SIMDE_FLOAT32_C( 34.96), SIMDE_FLOAT32_C( 463.30), SIMDE_FLOAT32_C( 678.52), SIMDE_FLOAT32_C( -527.56), SIMDE_FLOAT32_C( -894.11), SIMDE_FLOAT32_C( -529.15), SIMDE_FLOAT32_C( 21.61), SIMDE_FLOAT32_C( -165.52), SIMDE_FLOAT32_C( -362.49), SIMDE_FLOAT32_C( -12.33), SIMDE_FLOAT32_C( 819.55), SIMDE_FLOAT32_C( -938.55) }, UINT8_C(233), { SIMDE_FLOAT32_C( 27.60), SIMDE_FLOAT32_C( 58.12), SIMDE_FLOAT32_C( 72.34), SIMDE_FLOAT32_C( -99.05), SIMDE_FLOAT32_C( -79.81), SIMDE_FLOAT32_C( 52.93), SIMDE_FLOAT32_C( -72.49), SIMDE_FLOAT32_C( 16.99), SIMDE_FLOAT32_C( 35.18), SIMDE_FLOAT32_C( 94.67), SIMDE_FLOAT32_C( -2.67), SIMDE_FLOAT32_C( -21.00), SIMDE_FLOAT32_C( -68.96), SIMDE_FLOAT32_C( -38.64), SIMDE_FLOAT32_C( -86.13), SIMDE_FLOAT32_C( -80.30) }, { SIMDE_FLOAT32_C( -549.39), SIMDE_FLOAT32_C( -639.33), SIMDE_FLOAT32_C( 231.98), SIMDE_FLOAT32_C( 913.92), SIMDE_FLOAT32_C( -960.81), SIMDE_FLOAT32_C( 704.41), SIMDE_FLOAT32_C( -980.19), SIMDE_FLOAT32_C( -489.96), SIMDE_FLOAT32_C( -273.98), SIMDE_FLOAT32_C( -145.71), SIMDE_FLOAT32_C( 147.55), SIMDE_FLOAT32_C( 713.69), SIMDE_FLOAT32_C( -326.16), SIMDE_FLOAT32_C( 209.00), SIMDE_FLOAT32_C( 951.65), SIMDE_FLOAT32_C( 949.79) }, { SIMDE_FLOAT32_C( 9075.01), SIMDE_FLOAT32_C( 886.59), SIMDE_FLOAT32_C( 837.01), SIMDE_FLOAT32_C( 77980.77), SIMDE_FLOAT32_C( 34.96), SIMDE_FLOAT32_C( 25226.88), SIMDE_FLOAT32_C(-50166.11), SIMDE_FLOAT32_C( -9453.20), SIMDE_FLOAT32_C( -894.11), SIMDE_FLOAT32_C( -529.15), SIMDE_FLOAT32_C( 21.61), SIMDE_FLOAT32_C( -165.52), SIMDE_FLOAT32_C( -362.49), SIMDE_FLOAT32_C( -12.33), SIMDE_FLOAT32_C( 819.55), SIMDE_FLOAT32_C( -938.55) } }, { { SIMDE_FLOAT32_C( -209.83), SIMDE_FLOAT32_C( 675.00), SIMDE_FLOAT32_C( 959.32), SIMDE_FLOAT32_C( -7.91), SIMDE_FLOAT32_C( 204.27), SIMDE_FLOAT32_C( -765.58), SIMDE_FLOAT32_C( -838.05), SIMDE_FLOAT32_C( -443.90), SIMDE_FLOAT32_C( -818.92), SIMDE_FLOAT32_C( 135.21), SIMDE_FLOAT32_C( 346.12), SIMDE_FLOAT32_C( -508.49), SIMDE_FLOAT32_C( 748.82), SIMDE_FLOAT32_C( 484.85), SIMDE_FLOAT32_C( -311.47), SIMDE_FLOAT32_C( -800.57) }, UINT8_C( 77), { SIMDE_FLOAT32_C( 92.05), SIMDE_FLOAT32_C( -88.67), SIMDE_FLOAT32_C( 88.47), SIMDE_FLOAT32_C( 62.49), SIMDE_FLOAT32_C( -86.68), SIMDE_FLOAT32_C( -60.52), SIMDE_FLOAT32_C( -64.91), SIMDE_FLOAT32_C( -1.26), SIMDE_FLOAT32_C( 54.23), SIMDE_FLOAT32_C( -93.54), SIMDE_FLOAT32_C( 66.13), SIMDE_FLOAT32_C( -24.87), SIMDE_FLOAT32_C( -98.37), SIMDE_FLOAT32_C( 61.11), SIMDE_FLOAT32_C( 54.15), SIMDE_FLOAT32_C( 69.13) }, { SIMDE_FLOAT32_C( 570.40), SIMDE_FLOAT32_C( -466.42), SIMDE_FLOAT32_C( -104.45), SIMDE_FLOAT32_C( 804.83), SIMDE_FLOAT32_C( -304.47), SIMDE_FLOAT32_C( 451.65), SIMDE_FLOAT32_C( 985.91), SIMDE_FLOAT32_C( 830.74), SIMDE_FLOAT32_C( -202.23), SIMDE_FLOAT32_C( -522.58), SIMDE_FLOAT32_C( 579.55), SIMDE_FLOAT32_C( -717.38), SIMDE_FLOAT32_C( 165.95), SIMDE_FLOAT32_C( 778.98), SIMDE_FLOAT32_C( -871.85), SIMDE_FLOAT32_C( 86.45) }, { SIMDE_FLOAT32_C(-18744.45), SIMDE_FLOAT32_C( 675.00), SIMDE_FLOAT32_C( 84766.59), SIMDE_FLOAT32_C( 310.53), SIMDE_FLOAT32_C( 204.27), SIMDE_FLOAT32_C( -765.58), SIMDE_FLOAT32_C( 55383.74), SIMDE_FLOAT32_C( -443.90), SIMDE_FLOAT32_C( -818.92), SIMDE_FLOAT32_C( 135.21), SIMDE_FLOAT32_C( 346.12), SIMDE_FLOAT32_C( -508.49), SIMDE_FLOAT32_C( 748.82), SIMDE_FLOAT32_C( 484.85), SIMDE_FLOAT32_C( -311.47), SIMDE_FLOAT32_C( -800.57) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 c = simde_mm512_loadu_ps(test_vec[i].c); simde__m512 r = simde_mm512_mask_fmadd_ps(a, test_vec[i].k, b, c); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_maskz_fmadd_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask16 k; const simde_float32 a[16]; const simde_float32 b[16]; const simde_float32 c[16]; const simde_float32 r[16]; } test_vec[] = { { UINT16_C(14967), { SIMDE_FLOAT32_C( 18.11), SIMDE_FLOAT32_C( -41.35), SIMDE_FLOAT32_C( 32.27), SIMDE_FLOAT32_C( -1.66), SIMDE_FLOAT32_C( -21.64), SIMDE_FLOAT32_C( 77.20), SIMDE_FLOAT32_C( 35.46), SIMDE_FLOAT32_C( -90.91), SIMDE_FLOAT32_C( -64.23), SIMDE_FLOAT32_C( -16.52), SIMDE_FLOAT32_C( 22.87), SIMDE_FLOAT32_C( 30.16), SIMDE_FLOAT32_C( -6.85), SIMDE_FLOAT32_C( 51.65), SIMDE_FLOAT32_C( -2.38), SIMDE_FLOAT32_C( -7.27) }, { SIMDE_FLOAT32_C( -26.01), SIMDE_FLOAT32_C( 50.75), SIMDE_FLOAT32_C( 26.70), SIMDE_FLOAT32_C( 21.59), SIMDE_FLOAT32_C( 39.55), SIMDE_FLOAT32_C( 43.61), SIMDE_FLOAT32_C( -16.85), SIMDE_FLOAT32_C( 62.00), SIMDE_FLOAT32_C( -10.33), SIMDE_FLOAT32_C( -32.63), SIMDE_FLOAT32_C( 29.56), SIMDE_FLOAT32_C( 2.54), SIMDE_FLOAT32_C( 72.74), SIMDE_FLOAT32_C( -37.07), SIMDE_FLOAT32_C( -74.38), SIMDE_FLOAT32_C( -9.15) }, { SIMDE_FLOAT32_C( 215.80), SIMDE_FLOAT32_C( 578.85), SIMDE_FLOAT32_C( 892.00), SIMDE_FLOAT32_C( 999.36), SIMDE_FLOAT32_C( 350.88), SIMDE_FLOAT32_C( 246.63), SIMDE_FLOAT32_C( -909.73), SIMDE_FLOAT32_C( 708.55), SIMDE_FLOAT32_C( -918.55), SIMDE_FLOAT32_C( 318.97), SIMDE_FLOAT32_C( 10.19), SIMDE_FLOAT32_C( 12.96), SIMDE_FLOAT32_C( -164.54), SIMDE_FLOAT32_C( 986.37), SIMDE_FLOAT32_C( 940.27), SIMDE_FLOAT32_C( 575.40) }, { SIMDE_FLOAT32_C( -255.24), SIMDE_FLOAT32_C( -1519.66), SIMDE_FLOAT32_C( 1753.61), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -504.98), SIMDE_FLOAT32_C( 3613.32), SIMDE_FLOAT32_C( -1507.23), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 858.02), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 89.57), SIMDE_FLOAT32_C( -662.81), SIMDE_FLOAT32_C( -928.30), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { UINT16_C(49253), { SIMDE_FLOAT32_C( -20.87), SIMDE_FLOAT32_C( -11.07), SIMDE_FLOAT32_C( -35.67), SIMDE_FLOAT32_C( 62.27), SIMDE_FLOAT32_C( -49.07), SIMDE_FLOAT32_C( 54.00), SIMDE_FLOAT32_C( -70.36), SIMDE_FLOAT32_C( 80.49), SIMDE_FLOAT32_C( -43.47), SIMDE_FLOAT32_C( -97.62), SIMDE_FLOAT32_C( -56.58), SIMDE_FLOAT32_C( -17.85), SIMDE_FLOAT32_C( -6.76), SIMDE_FLOAT32_C( 65.00), SIMDE_FLOAT32_C( -59.96), SIMDE_FLOAT32_C( -17.56) }, { SIMDE_FLOAT32_C( 64.94), SIMDE_FLOAT32_C( 75.12), SIMDE_FLOAT32_C( -92.90), SIMDE_FLOAT32_C( 73.96), SIMDE_FLOAT32_C( 45.98), SIMDE_FLOAT32_C( -84.75), SIMDE_FLOAT32_C( 5.86), SIMDE_FLOAT32_C( -53.00), SIMDE_FLOAT32_C( 16.54), SIMDE_FLOAT32_C( 89.41), SIMDE_FLOAT32_C( -54.37), SIMDE_FLOAT32_C( 10.57), SIMDE_FLOAT32_C( 46.95), SIMDE_FLOAT32_C( 95.02), SIMDE_FLOAT32_C( -68.71), SIMDE_FLOAT32_C( -73.93) }, { SIMDE_FLOAT32_C( -160.52), SIMDE_FLOAT32_C( -43.76), SIMDE_FLOAT32_C( 883.44), SIMDE_FLOAT32_C( 348.76), SIMDE_FLOAT32_C( -503.79), SIMDE_FLOAT32_C( -820.14), SIMDE_FLOAT32_C( 153.62), SIMDE_FLOAT32_C( 61.54), SIMDE_FLOAT32_C( -796.31), SIMDE_FLOAT32_C( 587.81), SIMDE_FLOAT32_C( 883.04), SIMDE_FLOAT32_C( 136.08), SIMDE_FLOAT32_C( 237.80), SIMDE_FLOAT32_C( -716.60), SIMDE_FLOAT32_C( 960.45), SIMDE_FLOAT32_C( -112.85) }, { SIMDE_FLOAT32_C( -1515.82), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 4197.18), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -5396.64), SIMDE_FLOAT32_C( -258.69), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 5080.30), SIMDE_FLOAT32_C( 1185.36) } }, { UINT16_C(33012), { SIMDE_FLOAT32_C( -37.32), SIMDE_FLOAT32_C( 49.44), SIMDE_FLOAT32_C( -81.61), SIMDE_FLOAT32_C( 68.54), SIMDE_FLOAT32_C( 96.44), SIMDE_FLOAT32_C( 34.93), SIMDE_FLOAT32_C( 57.94), SIMDE_FLOAT32_C( -57.93), SIMDE_FLOAT32_C( -54.50), SIMDE_FLOAT32_C( 4.89), SIMDE_FLOAT32_C( -62.91), SIMDE_FLOAT32_C( -23.20), SIMDE_FLOAT32_C( 30.96), SIMDE_FLOAT32_C( 21.04), SIMDE_FLOAT32_C( 72.42), SIMDE_FLOAT32_C( 19.30) }, { SIMDE_FLOAT32_C( -44.08), SIMDE_FLOAT32_C( -77.96), SIMDE_FLOAT32_C( 37.29), SIMDE_FLOAT32_C( 71.28), SIMDE_FLOAT32_C( 28.20), SIMDE_FLOAT32_C( 57.66), SIMDE_FLOAT32_C( 30.06), SIMDE_FLOAT32_C( 16.50), SIMDE_FLOAT32_C( -28.73), SIMDE_FLOAT32_C( -46.16), SIMDE_FLOAT32_C( 44.84), SIMDE_FLOAT32_C( -32.69), SIMDE_FLOAT32_C( 42.55), SIMDE_FLOAT32_C( 48.31), SIMDE_FLOAT32_C( -29.54), SIMDE_FLOAT32_C( -94.77) }, { SIMDE_FLOAT32_C( -22.52), SIMDE_FLOAT32_C( -111.49), SIMDE_FLOAT32_C( 737.68), SIMDE_FLOAT32_C( -58.11), SIMDE_FLOAT32_C( -762.15), SIMDE_FLOAT32_C( 317.10), SIMDE_FLOAT32_C( 362.64), SIMDE_FLOAT32_C( -307.11), SIMDE_FLOAT32_C( -634.03), SIMDE_FLOAT32_C( 733.57), SIMDE_FLOAT32_C( 460.86), SIMDE_FLOAT32_C( 675.57), SIMDE_FLOAT32_C( -56.01), SIMDE_FLOAT32_C( 185.08), SIMDE_FLOAT32_C( -131.39), SIMDE_FLOAT32_C( 503.16) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -2305.56), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1957.46), SIMDE_FLOAT32_C( 2331.16), SIMDE_FLOAT32_C( 2104.32), SIMDE_FLOAT32_C( -1262.95), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -1325.90) } }, { UINT16_C(30249), { SIMDE_FLOAT32_C( 21.60), SIMDE_FLOAT32_C( -31.25), SIMDE_FLOAT32_C( 81.81), SIMDE_FLOAT32_C( -48.34), SIMDE_FLOAT32_C( 85.25), SIMDE_FLOAT32_C( -46.92), SIMDE_FLOAT32_C( 5.49), SIMDE_FLOAT32_C( 30.09), SIMDE_FLOAT32_C( 20.39), SIMDE_FLOAT32_C( -51.95), SIMDE_FLOAT32_C( -21.61), SIMDE_FLOAT32_C( 90.85), SIMDE_FLOAT32_C( -46.72), SIMDE_FLOAT32_C( 76.14), SIMDE_FLOAT32_C( -20.30), SIMDE_FLOAT32_C( -72.95) }, { SIMDE_FLOAT32_C( -29.67), SIMDE_FLOAT32_C( 3.48), SIMDE_FLOAT32_C( 58.76), SIMDE_FLOAT32_C( -93.40), SIMDE_FLOAT32_C( 72.77), SIMDE_FLOAT32_C( 95.35), SIMDE_FLOAT32_C( 79.95), SIMDE_FLOAT32_C( 18.86), SIMDE_FLOAT32_C( 62.91), SIMDE_FLOAT32_C( -25.65), SIMDE_FLOAT32_C( -62.63), SIMDE_FLOAT32_C( -50.23), SIMDE_FLOAT32_C( -75.33), SIMDE_FLOAT32_C( 77.92), SIMDE_FLOAT32_C( -26.08), SIMDE_FLOAT32_C( 46.26) }, { SIMDE_FLOAT32_C( -533.34), SIMDE_FLOAT32_C( -442.67), SIMDE_FLOAT32_C( 979.18), SIMDE_FLOAT32_C( -680.86), SIMDE_FLOAT32_C( 88.10), SIMDE_FLOAT32_C( 34.12), SIMDE_FLOAT32_C( 620.02), SIMDE_FLOAT32_C( -708.00), SIMDE_FLOAT32_C( 514.61), SIMDE_FLOAT32_C( -596.04), SIMDE_FLOAT32_C( -799.52), SIMDE_FLOAT32_C( -952.60), SIMDE_FLOAT32_C( -834.61), SIMDE_FLOAT32_C( -2.53), SIMDE_FLOAT32_C( -682.13), SIMDE_FLOAT32_C( -131.30) }, { SIMDE_FLOAT32_C( -1174.21), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 3834.10), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -4439.70), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 736.48), SIMDE_FLOAT32_C( 553.91), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2684.81), SIMDE_FLOAT32_C( 5930.30), SIMDE_FLOAT32_C( -152.71), SIMDE_FLOAT32_C( 0.00) } }, { UINT16_C(36814), { SIMDE_FLOAT32_C( -6.53), SIMDE_FLOAT32_C( 76.01), SIMDE_FLOAT32_C( 85.90), SIMDE_FLOAT32_C( -26.58), SIMDE_FLOAT32_C( -5.13), SIMDE_FLOAT32_C( 48.81), SIMDE_FLOAT32_C( 47.77), SIMDE_FLOAT32_C( 32.23), SIMDE_FLOAT32_C( 98.58), SIMDE_FLOAT32_C( 72.44), SIMDE_FLOAT32_C( 10.15), SIMDE_FLOAT32_C( -27.49), SIMDE_FLOAT32_C( 18.70), SIMDE_FLOAT32_C( 56.82), SIMDE_FLOAT32_C( 28.24), SIMDE_FLOAT32_C( 16.62) }, { SIMDE_FLOAT32_C( 88.73), SIMDE_FLOAT32_C( -62.95), SIMDE_FLOAT32_C( -79.97), SIMDE_FLOAT32_C( 50.73), SIMDE_FLOAT32_C( -33.75), SIMDE_FLOAT32_C( 71.49), SIMDE_FLOAT32_C( 91.13), SIMDE_FLOAT32_C( -13.70), SIMDE_FLOAT32_C( 76.23), SIMDE_FLOAT32_C( -92.33), SIMDE_FLOAT32_C( 86.04), SIMDE_FLOAT32_C( -91.98), SIMDE_FLOAT32_C( -5.46), SIMDE_FLOAT32_C( 89.28), SIMDE_FLOAT32_C( 98.56), SIMDE_FLOAT32_C( 88.01) }, { SIMDE_FLOAT32_C( 652.81), SIMDE_FLOAT32_C( 844.61), SIMDE_FLOAT32_C( -385.77), SIMDE_FLOAT32_C( -398.54), SIMDE_FLOAT32_C( 332.71), SIMDE_FLOAT32_C( -908.07), SIMDE_FLOAT32_C( 923.80), SIMDE_FLOAT32_C( 318.53), SIMDE_FLOAT32_C( 816.30), SIMDE_FLOAT32_C( 25.32), SIMDE_FLOAT32_C( -956.42), SIMDE_FLOAT32_C( 3.30), SIMDE_FLOAT32_C( -406.50), SIMDE_FLOAT32_C( 325.97), SIMDE_FLOAT32_C( -830.53), SIMDE_FLOAT32_C( -519.19) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -3940.22), SIMDE_FLOAT32_C( -7255.19), SIMDE_FLOAT32_C( -1746.94), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 5277.08), SIMDE_FLOAT32_C( -123.02), SIMDE_FLOAT32_C( 8331.05), SIMDE_FLOAT32_C( -6663.07), SIMDE_FLOAT32_C( -83.11), SIMDE_FLOAT32_C( 2531.83), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 943.54) } }, { UINT16_C(21105), { SIMDE_FLOAT32_C( 98.81), SIMDE_FLOAT32_C( -64.11), SIMDE_FLOAT32_C( -91.53), SIMDE_FLOAT32_C( 89.95), SIMDE_FLOAT32_C( 22.19), SIMDE_FLOAT32_C( 84.70), SIMDE_FLOAT32_C( 97.61), SIMDE_FLOAT32_C( 8.23), SIMDE_FLOAT32_C( 92.72), SIMDE_FLOAT32_C( -7.85), SIMDE_FLOAT32_C( -2.49), SIMDE_FLOAT32_C( 91.28), SIMDE_FLOAT32_C( -19.84), SIMDE_FLOAT32_C( -37.21), SIMDE_FLOAT32_C( 75.74), SIMDE_FLOAT32_C( 41.58) }, { SIMDE_FLOAT32_C( 22.93), SIMDE_FLOAT32_C( 9.01), SIMDE_FLOAT32_C( 50.78), SIMDE_FLOAT32_C( 15.31), SIMDE_FLOAT32_C( -59.14), SIMDE_FLOAT32_C( 32.41), SIMDE_FLOAT32_C( -82.15), SIMDE_FLOAT32_C( -54.78), SIMDE_FLOAT32_C( -67.27), SIMDE_FLOAT32_C( -22.80), SIMDE_FLOAT32_C( 77.82), SIMDE_FLOAT32_C( -50.32), SIMDE_FLOAT32_C( 25.28), SIMDE_FLOAT32_C( 47.46), SIMDE_FLOAT32_C( -13.34), SIMDE_FLOAT32_C( 24.09) }, { SIMDE_FLOAT32_C( 833.57), SIMDE_FLOAT32_C( -48.74), SIMDE_FLOAT32_C( 140.37), SIMDE_FLOAT32_C( 55.46), SIMDE_FLOAT32_C( -201.75), SIMDE_FLOAT32_C( 116.51), SIMDE_FLOAT32_C( -862.21), SIMDE_FLOAT32_C( -274.59), SIMDE_FLOAT32_C( -961.95), SIMDE_FLOAT32_C( 112.88), SIMDE_FLOAT32_C( -361.81), SIMDE_FLOAT32_C( -160.36), SIMDE_FLOAT32_C( 740.76), SIMDE_FLOAT32_C( -604.42), SIMDE_FLOAT32_C( -744.54), SIMDE_FLOAT32_C( -29.89) }, { SIMDE_FLOAT32_C( 3099.28), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -1514.07), SIMDE_FLOAT32_C( 2861.64), SIMDE_FLOAT32_C( -8880.87), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 291.86), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 239.20), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -1754.91), SIMDE_FLOAT32_C( 0.00) } }, { UINT16_C(37070), { SIMDE_FLOAT32_C( -87.67), SIMDE_FLOAT32_C( 89.43), SIMDE_FLOAT32_C( 8.73), SIMDE_FLOAT32_C( -69.83), SIMDE_FLOAT32_C( -65.35), SIMDE_FLOAT32_C( 41.46), SIMDE_FLOAT32_C( 7.37), SIMDE_FLOAT32_C( -87.53), SIMDE_FLOAT32_C( 91.14), SIMDE_FLOAT32_C( -67.36), SIMDE_FLOAT32_C( 59.93), SIMDE_FLOAT32_C( -22.20), SIMDE_FLOAT32_C( 56.74), SIMDE_FLOAT32_C( 43.29), SIMDE_FLOAT32_C( 72.93), SIMDE_FLOAT32_C( -29.23) }, { SIMDE_FLOAT32_C( -51.16), SIMDE_FLOAT32_C( -47.25), SIMDE_FLOAT32_C( 82.42), SIMDE_FLOAT32_C( -37.38), SIMDE_FLOAT32_C( 25.29), SIMDE_FLOAT32_C( 86.23), SIMDE_FLOAT32_C( 73.90), SIMDE_FLOAT32_C( 89.11), SIMDE_FLOAT32_C( -29.81), SIMDE_FLOAT32_C( 47.98), SIMDE_FLOAT32_C( -71.33), SIMDE_FLOAT32_C( -4.26), SIMDE_FLOAT32_C( -55.01), SIMDE_FLOAT32_C( 77.24), SIMDE_FLOAT32_C( -27.94), SIMDE_FLOAT32_C( -42.68) }, { SIMDE_FLOAT32_C( 666.71), SIMDE_FLOAT32_C( 807.87), SIMDE_FLOAT32_C( -125.12), SIMDE_FLOAT32_C( -986.75), SIMDE_FLOAT32_C( 222.48), SIMDE_FLOAT32_C( 948.55), SIMDE_FLOAT32_C( -862.04), SIMDE_FLOAT32_C( 133.91), SIMDE_FLOAT32_C( -725.01), SIMDE_FLOAT32_C( 737.31), SIMDE_FLOAT32_C( 911.94), SIMDE_FLOAT32_C( 842.35), SIMDE_FLOAT32_C( 170.22), SIMDE_FLOAT32_C( 641.22), SIMDE_FLOAT32_C( -449.91), SIMDE_FLOAT32_C( 658.60) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -3417.70), SIMDE_FLOAT32_C( 594.41), SIMDE_FLOAT32_C( 1623.50), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -317.40), SIMDE_FLOAT32_C( -7665.89), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -2951.05), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1906.14) } }, { UINT16_C( 2209), { SIMDE_FLOAT32_C( -71.52), SIMDE_FLOAT32_C( 42.17), SIMDE_FLOAT32_C( -76.34), SIMDE_FLOAT32_C( -97.62), SIMDE_FLOAT32_C( 31.28), SIMDE_FLOAT32_C( -6.14), SIMDE_FLOAT32_C( 50.36), SIMDE_FLOAT32_C( 59.95), SIMDE_FLOAT32_C( 89.60), SIMDE_FLOAT32_C( 95.35), SIMDE_FLOAT32_C( 37.19), SIMDE_FLOAT32_C( -38.34), SIMDE_FLOAT32_C( -47.33), SIMDE_FLOAT32_C( 3.86), SIMDE_FLOAT32_C( -57.56), SIMDE_FLOAT32_C( 40.16) }, { SIMDE_FLOAT32_C( 5.19), SIMDE_FLOAT32_C( 64.69), SIMDE_FLOAT32_C( 35.01), SIMDE_FLOAT32_C( 18.99), SIMDE_FLOAT32_C( -21.92), SIMDE_FLOAT32_C( 62.51), SIMDE_FLOAT32_C( -7.28), SIMDE_FLOAT32_C( -30.72), SIMDE_FLOAT32_C( 46.75), SIMDE_FLOAT32_C( -90.26), SIMDE_FLOAT32_C( -66.60), SIMDE_FLOAT32_C( -98.24), SIMDE_FLOAT32_C( 75.60), SIMDE_FLOAT32_C( -49.72), SIMDE_FLOAT32_C( -60.81), SIMDE_FLOAT32_C( -95.93) }, { SIMDE_FLOAT32_C( 924.44), SIMDE_FLOAT32_C( -371.47), SIMDE_FLOAT32_C( -935.45), SIMDE_FLOAT32_C( 237.27), SIMDE_FLOAT32_C( 567.09), SIMDE_FLOAT32_C( 568.16), SIMDE_FLOAT32_C( -163.20), SIMDE_FLOAT32_C( 463.06), SIMDE_FLOAT32_C( 521.69), SIMDE_FLOAT32_C( -791.26), SIMDE_FLOAT32_C( -920.37), SIMDE_FLOAT32_C( -951.61), SIMDE_FLOAT32_C( 247.38), SIMDE_FLOAT32_C( -495.92), SIMDE_FLOAT32_C( 449.97), SIMDE_FLOAT32_C( -700.73) }, { SIMDE_FLOAT32_C( 553.25), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 184.35), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -1378.60), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2814.91), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 c = simde_mm512_loadu_ps(test_vec[i].c); simde__m512 r = simde_mm512_maskz_fmadd_ps(test_vec[i].k, a, b, c); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_fmadd_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 b[8]; const simde_float64 c[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( -15.20), SIMDE_FLOAT64_C( -92.09), SIMDE_FLOAT64_C( -5.68), SIMDE_FLOAT64_C( -60.32), SIMDE_FLOAT64_C( 11.66), SIMDE_FLOAT64_C( -39.24), SIMDE_FLOAT64_C( -47.98), SIMDE_FLOAT64_C( -9.95) }, { SIMDE_FLOAT64_C( -67.23), SIMDE_FLOAT64_C( -68.40), SIMDE_FLOAT64_C( -47.95), SIMDE_FLOAT64_C( 26.22), SIMDE_FLOAT64_C( -82.14), SIMDE_FLOAT64_C( 74.05), SIMDE_FLOAT64_C( -33.99), SIMDE_FLOAT64_C( 90.24) }, { SIMDE_FLOAT64_C( 198.57), SIMDE_FLOAT64_C( -636.25), SIMDE_FLOAT64_C( 470.89), SIMDE_FLOAT64_C( 299.50), SIMDE_FLOAT64_C( -364.82), SIMDE_FLOAT64_C( 440.56), SIMDE_FLOAT64_C( 486.88), SIMDE_FLOAT64_C( 920.40) }, { SIMDE_FLOAT64_C( 1220.47), SIMDE_FLOAT64_C( 5662.71), SIMDE_FLOAT64_C( 743.25), SIMDE_FLOAT64_C( -1282.09), SIMDE_FLOAT64_C( -1322.57), SIMDE_FLOAT64_C( -2465.16), SIMDE_FLOAT64_C( 2117.72), SIMDE_FLOAT64_C( 22.51) } }, { { SIMDE_FLOAT64_C( -84.36), SIMDE_FLOAT64_C( -64.18), SIMDE_FLOAT64_C( -17.96), SIMDE_FLOAT64_C( -13.18), SIMDE_FLOAT64_C( -53.67), SIMDE_FLOAT64_C( 26.82), SIMDE_FLOAT64_C( -6.48), SIMDE_FLOAT64_C( 64.66) }, { SIMDE_FLOAT64_C( -48.92), SIMDE_FLOAT64_C( -80.30), SIMDE_FLOAT64_C( -78.02), SIMDE_FLOAT64_C( -69.90), SIMDE_FLOAT64_C( 29.55), SIMDE_FLOAT64_C( 51.81), SIMDE_FLOAT64_C( -58.76), SIMDE_FLOAT64_C( -43.34) }, { SIMDE_FLOAT64_C( -369.54), SIMDE_FLOAT64_C( -938.59), SIMDE_FLOAT64_C( -949.35), SIMDE_FLOAT64_C( 294.30), SIMDE_FLOAT64_C( 268.45), SIMDE_FLOAT64_C( -199.17), SIMDE_FLOAT64_C( 580.15), SIMDE_FLOAT64_C( 625.12) }, { SIMDE_FLOAT64_C( 3757.35), SIMDE_FLOAT64_C( 4215.06), SIMDE_FLOAT64_C( 451.89), SIMDE_FLOAT64_C( 1215.58), SIMDE_FLOAT64_C( -1317.50), SIMDE_FLOAT64_C( 1190.37), SIMDE_FLOAT64_C( 960.91), SIMDE_FLOAT64_C( -2177.24) } }, { { SIMDE_FLOAT64_C( 42.20), SIMDE_FLOAT64_C( 81.03), SIMDE_FLOAT64_C( 93.61), SIMDE_FLOAT64_C( 85.46), SIMDE_FLOAT64_C( -54.78), SIMDE_FLOAT64_C( -43.11), SIMDE_FLOAT64_C( 68.99), SIMDE_FLOAT64_C( -72.24) }, { SIMDE_FLOAT64_C( 31.24), SIMDE_FLOAT64_C( 44.02), SIMDE_FLOAT64_C( 35.86), SIMDE_FLOAT64_C( 36.00), SIMDE_FLOAT64_C( 49.85), SIMDE_FLOAT64_C( 71.08), SIMDE_FLOAT64_C( 95.94), SIMDE_FLOAT64_C( -61.93) }, { SIMDE_FLOAT64_C( -70.68), SIMDE_FLOAT64_C( 113.51), SIMDE_FLOAT64_C( -246.66), SIMDE_FLOAT64_C( 329.53), SIMDE_FLOAT64_C( 345.01), SIMDE_FLOAT64_C( 517.47), SIMDE_FLOAT64_C( -899.02), SIMDE_FLOAT64_C( 163.03) }, { SIMDE_FLOAT64_C( 1247.65), SIMDE_FLOAT64_C( 3680.45), SIMDE_FLOAT64_C( 3110.19), SIMDE_FLOAT64_C( 3406.09), SIMDE_FLOAT64_C( -2385.77), SIMDE_FLOAT64_C( -2546.79), SIMDE_FLOAT64_C( 5719.88), SIMDE_FLOAT64_C( 4636.85) } }, { { SIMDE_FLOAT64_C( 1.76), SIMDE_FLOAT64_C( -74.31), SIMDE_FLOAT64_C( 62.56), SIMDE_FLOAT64_C( -37.50), SIMDE_FLOAT64_C( 13.98), SIMDE_FLOAT64_C( -68.32), SIMDE_FLOAT64_C( -66.19), SIMDE_FLOAT64_C( -8.98) }, { SIMDE_FLOAT64_C( -46.81), SIMDE_FLOAT64_C( 39.06), SIMDE_FLOAT64_C( -86.48), SIMDE_FLOAT64_C( -52.33), SIMDE_FLOAT64_C( 78.57), SIMDE_FLOAT64_C( 2.96), SIMDE_FLOAT64_C( -70.43), SIMDE_FLOAT64_C( 77.87) }, { SIMDE_FLOAT64_C( -708.96), SIMDE_FLOAT64_C( 589.10), SIMDE_FLOAT64_C( -927.53), SIMDE_FLOAT64_C( 949.55), SIMDE_FLOAT64_C( -830.02), SIMDE_FLOAT64_C( -118.83), SIMDE_FLOAT64_C( 850.20), SIMDE_FLOAT64_C( -768.46) }, { SIMDE_FLOAT64_C( -791.35), SIMDE_FLOAT64_C( -2313.45), SIMDE_FLOAT64_C( -6337.72), SIMDE_FLOAT64_C( 2911.92), SIMDE_FLOAT64_C( 268.39), SIMDE_FLOAT64_C( -321.06), SIMDE_FLOAT64_C( 5511.96), SIMDE_FLOAT64_C( -1467.73) } }, { { SIMDE_FLOAT64_C( 53.04), SIMDE_FLOAT64_C( 11.01), SIMDE_FLOAT64_C( -88.73), SIMDE_FLOAT64_C( -90.01), SIMDE_FLOAT64_C( 26.51), SIMDE_FLOAT64_C( 88.06), SIMDE_FLOAT64_C( 55.77), SIMDE_FLOAT64_C( 81.19) }, { SIMDE_FLOAT64_C( 88.52), SIMDE_FLOAT64_C( 75.58), SIMDE_FLOAT64_C( -7.51), SIMDE_FLOAT64_C( -80.56), SIMDE_FLOAT64_C( 54.57), SIMDE_FLOAT64_C( -95.80), SIMDE_FLOAT64_C( -84.04), SIMDE_FLOAT64_C( 22.23) }, { SIMDE_FLOAT64_C( -554.02), SIMDE_FLOAT64_C( 517.62), SIMDE_FLOAT64_C( -314.36), SIMDE_FLOAT64_C( 517.56), SIMDE_FLOAT64_C( 675.22), SIMDE_FLOAT64_C( 462.96), SIMDE_FLOAT64_C( -994.29), SIMDE_FLOAT64_C( 896.33) }, { SIMDE_FLOAT64_C( 4141.08), SIMDE_FLOAT64_C( 1349.76), SIMDE_FLOAT64_C( 352.00), SIMDE_FLOAT64_C( 7768.77), SIMDE_FLOAT64_C( 2121.87), SIMDE_FLOAT64_C( -7973.19), SIMDE_FLOAT64_C( -5681.20), SIMDE_FLOAT64_C( 2701.18) } }, { { SIMDE_FLOAT64_C( 62.43), SIMDE_FLOAT64_C( 38.95), SIMDE_FLOAT64_C( 97.62), SIMDE_FLOAT64_C( 23.91), SIMDE_FLOAT64_C( -90.70), SIMDE_FLOAT64_C( -35.25), SIMDE_FLOAT64_C( -55.92), SIMDE_FLOAT64_C( 91.62) }, { SIMDE_FLOAT64_C( 49.07), SIMDE_FLOAT64_C( -48.16), SIMDE_FLOAT64_C( 37.98), SIMDE_FLOAT64_C( -89.53), SIMDE_FLOAT64_C( 30.97), SIMDE_FLOAT64_C( -76.20), SIMDE_FLOAT64_C( -18.45), SIMDE_FLOAT64_C( -36.22) }, { SIMDE_FLOAT64_C( 706.29), SIMDE_FLOAT64_C( 533.56), SIMDE_FLOAT64_C( -534.21), SIMDE_FLOAT64_C( -539.66), SIMDE_FLOAT64_C( -64.45), SIMDE_FLOAT64_C( 847.44), SIMDE_FLOAT64_C( 900.30), SIMDE_FLOAT64_C( -706.27) }, { SIMDE_FLOAT64_C( 3769.73), SIMDE_FLOAT64_C( -1342.27), SIMDE_FLOAT64_C( 3173.40), SIMDE_FLOAT64_C( -2680.32), SIMDE_FLOAT64_C( -2873.43), SIMDE_FLOAT64_C( 3533.49), SIMDE_FLOAT64_C( 1932.02), SIMDE_FLOAT64_C( -4024.75) } }, { { SIMDE_FLOAT64_C( -28.07), SIMDE_FLOAT64_C( 78.32), SIMDE_FLOAT64_C( -34.12), SIMDE_FLOAT64_C( -3.25), SIMDE_FLOAT64_C( -7.25), SIMDE_FLOAT64_C( 10.89), SIMDE_FLOAT64_C( 81.19), SIMDE_FLOAT64_C( -24.26) }, { SIMDE_FLOAT64_C( 64.22), SIMDE_FLOAT64_C( 13.80), SIMDE_FLOAT64_C( -24.84), SIMDE_FLOAT64_C( -54.57), SIMDE_FLOAT64_C( 4.33), SIMDE_FLOAT64_C( 28.21), SIMDE_FLOAT64_C( -30.50), SIMDE_FLOAT64_C( -96.96) }, { SIMDE_FLOAT64_C( 40.88), SIMDE_FLOAT64_C( 992.17), SIMDE_FLOAT64_C( -527.77), SIMDE_FLOAT64_C( -236.95), SIMDE_FLOAT64_C( -475.32), SIMDE_FLOAT64_C( -695.07), SIMDE_FLOAT64_C( -17.12), SIMDE_FLOAT64_C( 223.14) }, { SIMDE_FLOAT64_C( -1761.78), SIMDE_FLOAT64_C( 2072.99), SIMDE_FLOAT64_C( 319.77), SIMDE_FLOAT64_C( -59.60), SIMDE_FLOAT64_C( -506.71), SIMDE_FLOAT64_C( -387.86), SIMDE_FLOAT64_C( -2493.41), SIMDE_FLOAT64_C( 2575.39) } }, { { SIMDE_FLOAT64_C( 35.24), SIMDE_FLOAT64_C( 65.04), SIMDE_FLOAT64_C( -19.35), SIMDE_FLOAT64_C( -9.52), SIMDE_FLOAT64_C( 46.63), SIMDE_FLOAT64_C( 88.76), SIMDE_FLOAT64_C( 6.35), SIMDE_FLOAT64_C( -20.93) }, { SIMDE_FLOAT64_C( -26.93), SIMDE_FLOAT64_C( -13.31), SIMDE_FLOAT64_C( 37.30), SIMDE_FLOAT64_C( -56.20), SIMDE_FLOAT64_C( 92.98), SIMDE_FLOAT64_C( -92.79), SIMDE_FLOAT64_C( 60.93), SIMDE_FLOAT64_C( -90.76) }, { SIMDE_FLOAT64_C( 453.61), SIMDE_FLOAT64_C( -208.18), SIMDE_FLOAT64_C( -835.16), SIMDE_FLOAT64_C( -496.06), SIMDE_FLOAT64_C( 796.10), SIMDE_FLOAT64_C( 16.91), SIMDE_FLOAT64_C( 288.58), SIMDE_FLOAT64_C( 203.13) }, { SIMDE_FLOAT64_C( -495.40), SIMDE_FLOAT64_C( -1073.86), SIMDE_FLOAT64_C( -1556.91), SIMDE_FLOAT64_C( 38.96), SIMDE_FLOAT64_C( 5131.76), SIMDE_FLOAT64_C( -8219.13), SIMDE_FLOAT64_C( 675.49), SIMDE_FLOAT64_C( 2102.74) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__m512d c = simde_mm512_loadu_pd(test_vec[i].c); simde__m512d r = simde_mm512_fmadd_pd(a, b, c); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_fmadd_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_fmadd_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_fmadd_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_fmadd_pd) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/fmsub.c000066400000000000000000000627121400333146700165440ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 kitegi */ #define SIMDE_TEST_X86_AVX512_INSN fmsub #include #include #include static int test_simde_mm512_fmsub_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 b[16]; const simde_float32 c[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 39.18), SIMDE_FLOAT32_C( 72.95), SIMDE_FLOAT32_C( 2.39), SIMDE_FLOAT32_C( -99.28), SIMDE_FLOAT32_C( -27.76), SIMDE_FLOAT32_C( 78.92), SIMDE_FLOAT32_C( 97.46), SIMDE_FLOAT32_C( -75.13), SIMDE_FLOAT32_C( -78.86), SIMDE_FLOAT32_C( 62.73), SIMDE_FLOAT32_C( 46.15), SIMDE_FLOAT32_C( -15.69), SIMDE_FLOAT32_C( -66.26), SIMDE_FLOAT32_C( -2.97), SIMDE_FLOAT32_C( 64.58), SIMDE_FLOAT32_C( -96.46) }, { SIMDE_FLOAT32_C( 87.93), SIMDE_FLOAT32_C( -57.28), SIMDE_FLOAT32_C( -38.98), SIMDE_FLOAT32_C( 16.76), SIMDE_FLOAT32_C( -7.19), SIMDE_FLOAT32_C( -49.24), SIMDE_FLOAT32_C( -45.18), SIMDE_FLOAT32_C( 16.45), SIMDE_FLOAT32_C( -68.84), SIMDE_FLOAT32_C( -51.68), SIMDE_FLOAT32_C( 46.34), SIMDE_FLOAT32_C( 24.50), SIMDE_FLOAT32_C( -28.63), SIMDE_FLOAT32_C( -74.05), SIMDE_FLOAT32_C( -21.93), SIMDE_FLOAT32_C( -89.45) }, { SIMDE_FLOAT32_C( 98.91), SIMDE_FLOAT32_C( 80.47), SIMDE_FLOAT32_C( -88.73), SIMDE_FLOAT32_C( -28.86), SIMDE_FLOAT32_C( 59.39), SIMDE_FLOAT32_C( -91.27), SIMDE_FLOAT32_C( -3.99), SIMDE_FLOAT32_C( 80.52), SIMDE_FLOAT32_C( 71.46), SIMDE_FLOAT32_C( -57.84), SIMDE_FLOAT32_C( -35.17), SIMDE_FLOAT32_C( -94.81), SIMDE_FLOAT32_C( 39.19), SIMDE_FLOAT32_C( -70.59), SIMDE_FLOAT32_C( -91.26), SIMDE_FLOAT32_C( 27.12) }, { SIMDE_FLOAT32_C( 3346.19), SIMDE_FLOAT32_C( -4259.05), SIMDE_FLOAT32_C( -4.43), SIMDE_FLOAT32_C( -1635.07), SIMDE_FLOAT32_C( 140.20), SIMDE_FLOAT32_C( -3794.75), SIMDE_FLOAT32_C( -4399.25), SIMDE_FLOAT32_C( -1316.41), SIMDE_FLOAT32_C( 5357.26), SIMDE_FLOAT32_C( -3184.05), SIMDE_FLOAT32_C( 2173.76), SIMDE_FLOAT32_C( -289.60), SIMDE_FLOAT32_C( 1857.83), SIMDE_FLOAT32_C( 290.52), SIMDE_FLOAT32_C( -1324.98), SIMDE_FLOAT32_C( 8601.23) } }, { { SIMDE_FLOAT32_C( -27.87), SIMDE_FLOAT32_C( -30.25), SIMDE_FLOAT32_C( -56.12), SIMDE_FLOAT32_C( 64.94), SIMDE_FLOAT32_C( 20.51), SIMDE_FLOAT32_C( -1.30), SIMDE_FLOAT32_C( -18.61), SIMDE_FLOAT32_C( 51.67), SIMDE_FLOAT32_C( 47.02), SIMDE_FLOAT32_C( -72.27), SIMDE_FLOAT32_C( -23.83), SIMDE_FLOAT32_C( -81.60), SIMDE_FLOAT32_C( -46.31), SIMDE_FLOAT32_C( 54.24), SIMDE_FLOAT32_C( -71.05), SIMDE_FLOAT32_C( -47.41) }, { SIMDE_FLOAT32_C( 34.71), SIMDE_FLOAT32_C( -59.79), SIMDE_FLOAT32_C( 23.73), SIMDE_FLOAT32_C( -5.90), SIMDE_FLOAT32_C( -51.06), SIMDE_FLOAT32_C( -80.26), SIMDE_FLOAT32_C( -25.38), SIMDE_FLOAT32_C( -79.60), SIMDE_FLOAT32_C( -38.10), SIMDE_FLOAT32_C( 39.45), SIMDE_FLOAT32_C( -74.41), SIMDE_FLOAT32_C( -98.91), SIMDE_FLOAT32_C( 68.87), SIMDE_FLOAT32_C( -65.67), SIMDE_FLOAT32_C( 28.21), SIMDE_FLOAT32_C( -59.01) }, { SIMDE_FLOAT32_C( 4.08), SIMDE_FLOAT32_C( 72.09), SIMDE_FLOAT32_C( -94.06), SIMDE_FLOAT32_C( -75.41), SIMDE_FLOAT32_C( -29.20), SIMDE_FLOAT32_C( -12.67), SIMDE_FLOAT32_C( 76.26), SIMDE_FLOAT32_C( -82.18), SIMDE_FLOAT32_C( 15.06), SIMDE_FLOAT32_C( -47.57), SIMDE_FLOAT32_C( -63.79), SIMDE_FLOAT32_C( 68.75), SIMDE_FLOAT32_C( -93.33), SIMDE_FLOAT32_C( -34.84), SIMDE_FLOAT32_C( -78.66), SIMDE_FLOAT32_C( 41.38) }, { SIMDE_FLOAT32_C( -971.45), SIMDE_FLOAT32_C( 1736.56), SIMDE_FLOAT32_C( -1237.67), SIMDE_FLOAT32_C( -307.74), SIMDE_FLOAT32_C( -1018.04), SIMDE_FLOAT32_C( 117.01), SIMDE_FLOAT32_C( 396.06), SIMDE_FLOAT32_C( -4030.75), SIMDE_FLOAT32_C( -1806.52), SIMDE_FLOAT32_C( -2803.48), SIMDE_FLOAT32_C( 1836.98), SIMDE_FLOAT32_C( 8002.31), SIMDE_FLOAT32_C( -3096.04), SIMDE_FLOAT32_C( -3527.10), SIMDE_FLOAT32_C( -1925.66), SIMDE_FLOAT32_C( 2756.28) } }, { { SIMDE_FLOAT32_C( 5.37), SIMDE_FLOAT32_C( 45.08), SIMDE_FLOAT32_C( -64.53), SIMDE_FLOAT32_C( 54.31), SIMDE_FLOAT32_C( 64.82), SIMDE_FLOAT32_C( 10.09), SIMDE_FLOAT32_C( 74.71), SIMDE_FLOAT32_C( -73.28), SIMDE_FLOAT32_C( -50.46), SIMDE_FLOAT32_C( -99.70), SIMDE_FLOAT32_C( -72.19), SIMDE_FLOAT32_C( -81.59), SIMDE_FLOAT32_C( -65.37), SIMDE_FLOAT32_C( 56.02), SIMDE_FLOAT32_C( -40.60), SIMDE_FLOAT32_C( 38.71) }, { SIMDE_FLOAT32_C( 28.11), SIMDE_FLOAT32_C( -34.66), SIMDE_FLOAT32_C( 63.29), SIMDE_FLOAT32_C( 98.90), SIMDE_FLOAT32_C( 52.67), SIMDE_FLOAT32_C( 39.55), SIMDE_FLOAT32_C( -83.28), SIMDE_FLOAT32_C( -32.27), SIMDE_FLOAT32_C( 91.98), SIMDE_FLOAT32_C( -47.07), SIMDE_FLOAT32_C( -63.52), SIMDE_FLOAT32_C( 98.65), SIMDE_FLOAT32_C( 18.09), SIMDE_FLOAT32_C( -42.18), SIMDE_FLOAT32_C( 40.03), SIMDE_FLOAT32_C( -76.54) }, { SIMDE_FLOAT32_C( -97.10), SIMDE_FLOAT32_C( 75.50), SIMDE_FLOAT32_C( 77.78), SIMDE_FLOAT32_C( 67.71), SIMDE_FLOAT32_C( -14.41), SIMDE_FLOAT32_C( 52.49), SIMDE_FLOAT32_C( 94.43), SIMDE_FLOAT32_C( 35.13), SIMDE_FLOAT32_C( 52.79), SIMDE_FLOAT32_C( -77.76), SIMDE_FLOAT32_C( 53.54), SIMDE_FLOAT32_C( 87.41), SIMDE_FLOAT32_C( 78.26), SIMDE_FLOAT32_C( -87.05), SIMDE_FLOAT32_C( 26.12), SIMDE_FLOAT32_C( 6.37) }, { SIMDE_FLOAT32_C( 248.05), SIMDE_FLOAT32_C( -1637.97), SIMDE_FLOAT32_C( -4161.88), SIMDE_FLOAT32_C( 5303.55), SIMDE_FLOAT32_C( 3428.48), SIMDE_FLOAT32_C( 346.57), SIMDE_FLOAT32_C( -6316.28), SIMDE_FLOAT32_C( 2329.62), SIMDE_FLOAT32_C( -4694.10), SIMDE_FLOAT32_C( 4770.64), SIMDE_FLOAT32_C( 4531.97), SIMDE_FLOAT32_C( -8136.26), SIMDE_FLOAT32_C( -1260.80), SIMDE_FLOAT32_C( -2275.87), SIMDE_FLOAT32_C( -1651.34), SIMDE_FLOAT32_C( -2969.23) } }, { { SIMDE_FLOAT32_C( -21.71), SIMDE_FLOAT32_C( -10.59), SIMDE_FLOAT32_C( 5.27), SIMDE_FLOAT32_C( -69.04), SIMDE_FLOAT32_C( -71.03), SIMDE_FLOAT32_C( 22.00), SIMDE_FLOAT32_C( -1.31), SIMDE_FLOAT32_C( -79.05), SIMDE_FLOAT32_C( 74.93), SIMDE_FLOAT32_C( 35.16), SIMDE_FLOAT32_C( -80.40), SIMDE_FLOAT32_C( -6.98), SIMDE_FLOAT32_C( 92.98), SIMDE_FLOAT32_C( 59.62), SIMDE_FLOAT32_C( 16.48), SIMDE_FLOAT32_C( 95.88) }, { SIMDE_FLOAT32_C( 35.12), SIMDE_FLOAT32_C( -5.74), SIMDE_FLOAT32_C( 63.59), SIMDE_FLOAT32_C( -79.29), SIMDE_FLOAT32_C( -53.25), SIMDE_FLOAT32_C( 58.02), SIMDE_FLOAT32_C( 55.85), SIMDE_FLOAT32_C( 99.54), SIMDE_FLOAT32_C( 80.26), SIMDE_FLOAT32_C( 9.39), SIMDE_FLOAT32_C( 86.95), SIMDE_FLOAT32_C( 58.52), SIMDE_FLOAT32_C( 22.34), SIMDE_FLOAT32_C( 13.07), SIMDE_FLOAT32_C( -35.11), SIMDE_FLOAT32_C( -99.37) }, { SIMDE_FLOAT32_C( -97.51), SIMDE_FLOAT32_C( 70.17), SIMDE_FLOAT32_C( -68.42), SIMDE_FLOAT32_C( -68.55), SIMDE_FLOAT32_C( -7.84), SIMDE_FLOAT32_C( 30.27), SIMDE_FLOAT32_C( -47.60), SIMDE_FLOAT32_C( -32.91), SIMDE_FLOAT32_C( -34.57), SIMDE_FLOAT32_C( -28.00), SIMDE_FLOAT32_C( 60.11), SIMDE_FLOAT32_C( -41.59), SIMDE_FLOAT32_C( -68.38), SIMDE_FLOAT32_C( -23.41), SIMDE_FLOAT32_C( -45.71), SIMDE_FLOAT32_C( 66.75) }, { SIMDE_FLOAT32_C( -664.95), SIMDE_FLOAT32_C( -9.38), SIMDE_FLOAT32_C( 403.54), SIMDE_FLOAT32_C( 5542.73), SIMDE_FLOAT32_C( 3790.19), SIMDE_FLOAT32_C( 1246.17), SIMDE_FLOAT32_C( -25.56), SIMDE_FLOAT32_C( -7835.73), SIMDE_FLOAT32_C( 6048.45), SIMDE_FLOAT32_C( 358.15), SIMDE_FLOAT32_C( -7050.89), SIMDE_FLOAT32_C( -366.88), SIMDE_FLOAT32_C( 2145.55), SIMDE_FLOAT32_C( 802.64), SIMDE_FLOAT32_C( -532.90), SIMDE_FLOAT32_C( -9594.35) } }, { { SIMDE_FLOAT32_C( 70.85), SIMDE_FLOAT32_C( -82.11), SIMDE_FLOAT32_C( 87.46), SIMDE_FLOAT32_C( -82.40), SIMDE_FLOAT32_C( 75.91), SIMDE_FLOAT32_C( 43.31), SIMDE_FLOAT32_C( -82.86), SIMDE_FLOAT32_C( 56.17), SIMDE_FLOAT32_C( -47.30), SIMDE_FLOAT32_C( -95.91), SIMDE_FLOAT32_C( 14.69), SIMDE_FLOAT32_C( 75.04), SIMDE_FLOAT32_C( 17.16), SIMDE_FLOAT32_C( 79.59), SIMDE_FLOAT32_C( 75.66), SIMDE_FLOAT32_C( 19.64) }, { SIMDE_FLOAT32_C( 49.75), SIMDE_FLOAT32_C( -92.75), SIMDE_FLOAT32_C( 51.10), SIMDE_FLOAT32_C( -58.08), SIMDE_FLOAT32_C( 37.51), SIMDE_FLOAT32_C( -96.50), SIMDE_FLOAT32_C( 9.01), SIMDE_FLOAT32_C( -97.06), SIMDE_FLOAT32_C( -24.50), SIMDE_FLOAT32_C( -30.88), SIMDE_FLOAT32_C( -38.64), SIMDE_FLOAT32_C( 7.12), SIMDE_FLOAT32_C( 45.71), SIMDE_FLOAT32_C( 15.65), SIMDE_FLOAT32_C( -26.14), SIMDE_FLOAT32_C( 16.56) }, { SIMDE_FLOAT32_C( 33.54), SIMDE_FLOAT32_C( -38.68), SIMDE_FLOAT32_C( 34.16), SIMDE_FLOAT32_C( 9.45), SIMDE_FLOAT32_C( -95.37), SIMDE_FLOAT32_C( 51.30), SIMDE_FLOAT32_C( -34.38), SIMDE_FLOAT32_C( -42.67), SIMDE_FLOAT32_C( 55.39), SIMDE_FLOAT32_C( 80.31), SIMDE_FLOAT32_C( -67.64), SIMDE_FLOAT32_C( -27.46), SIMDE_FLOAT32_C( 59.90), SIMDE_FLOAT32_C( -91.97), SIMDE_FLOAT32_C( 92.19), SIMDE_FLOAT32_C( 9.65) }, { SIMDE_FLOAT32_C( 3491.25), SIMDE_FLOAT32_C( 7654.38), SIMDE_FLOAT32_C( 4435.05), SIMDE_FLOAT32_C( 4776.34), SIMDE_FLOAT32_C( 2942.75), SIMDE_FLOAT32_C( -4230.71), SIMDE_FLOAT32_C( -712.19), SIMDE_FLOAT32_C( -5409.19), SIMDE_FLOAT32_C( 1103.46), SIMDE_FLOAT32_C( 2881.39), SIMDE_FLOAT32_C( -499.98), SIMDE_FLOAT32_C( 561.74), SIMDE_FLOAT32_C( 724.48), SIMDE_FLOAT32_C( 1337.55), SIMDE_FLOAT32_C( -2069.94), SIMDE_FLOAT32_C( 315.59) } }, { { SIMDE_FLOAT32_C( -84.73), SIMDE_FLOAT32_C( 43.28), SIMDE_FLOAT32_C( 51.57), SIMDE_FLOAT32_C( 52.79), SIMDE_FLOAT32_C( 46.78), SIMDE_FLOAT32_C( -39.42), SIMDE_FLOAT32_C( 55.73), SIMDE_FLOAT32_C( -77.72), SIMDE_FLOAT32_C( 29.69), SIMDE_FLOAT32_C( -82.91), SIMDE_FLOAT32_C( 29.40), SIMDE_FLOAT32_C( -24.60), SIMDE_FLOAT32_C( 32.74), SIMDE_FLOAT32_C( -96.74), SIMDE_FLOAT32_C( 91.96), SIMDE_FLOAT32_C( -33.72) }, { SIMDE_FLOAT32_C( -35.42), SIMDE_FLOAT32_C( 26.13), SIMDE_FLOAT32_C( 75.73), SIMDE_FLOAT32_C( -30.79), SIMDE_FLOAT32_C( -22.57), SIMDE_FLOAT32_C( -58.65), SIMDE_FLOAT32_C( 26.54), SIMDE_FLOAT32_C( -67.19), SIMDE_FLOAT32_C( -78.34), SIMDE_FLOAT32_C( 58.90), SIMDE_FLOAT32_C( 5.36), SIMDE_FLOAT32_C( 81.57), SIMDE_FLOAT32_C( 66.92), SIMDE_FLOAT32_C( -2.46), SIMDE_FLOAT32_C( -8.78), SIMDE_FLOAT32_C( 82.20) }, { SIMDE_FLOAT32_C( -59.17), SIMDE_FLOAT32_C( -57.21), SIMDE_FLOAT32_C( 34.98), SIMDE_FLOAT32_C( 87.61), SIMDE_FLOAT32_C( 3.36), SIMDE_FLOAT32_C( -9.29), SIMDE_FLOAT32_C( -90.11), SIMDE_FLOAT32_C( -66.95), SIMDE_FLOAT32_C( 7.80), SIMDE_FLOAT32_C( 39.28), SIMDE_FLOAT32_C( 8.46), SIMDE_FLOAT32_C( -59.46), SIMDE_FLOAT32_C( 42.54), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 6.82), SIMDE_FLOAT32_C( -92.87) }, { SIMDE_FLOAT32_C( 3060.31), SIMDE_FLOAT32_C( 1188.12), SIMDE_FLOAT32_C( 3870.42), SIMDE_FLOAT32_C( -1713.01), SIMDE_FLOAT32_C( -1059.18), SIMDE_FLOAT32_C( 2321.27), SIMDE_FLOAT32_C( 1569.18), SIMDE_FLOAT32_C( 5288.96), SIMDE_FLOAT32_C( -2333.71), SIMDE_FLOAT32_C( -4922.68), SIMDE_FLOAT32_C( 149.12), SIMDE_FLOAT32_C( -1947.16), SIMDE_FLOAT32_C( 2148.42), SIMDE_FLOAT32_C( 237.56), SIMDE_FLOAT32_C( -814.23), SIMDE_FLOAT32_C( -2678.91) } }, { { SIMDE_FLOAT32_C( -73.46), SIMDE_FLOAT32_C( -17.45), SIMDE_FLOAT32_C( -23.66), SIMDE_FLOAT32_C( 3.97), SIMDE_FLOAT32_C( 23.90), SIMDE_FLOAT32_C( -97.13), SIMDE_FLOAT32_C( 36.78), SIMDE_FLOAT32_C( 45.56), SIMDE_FLOAT32_C( 61.77), SIMDE_FLOAT32_C( -57.86), SIMDE_FLOAT32_C( 27.13), SIMDE_FLOAT32_C( 28.69), SIMDE_FLOAT32_C( 39.68), SIMDE_FLOAT32_C( -81.65), SIMDE_FLOAT32_C( 10.89), SIMDE_FLOAT32_C( 80.51) }, { SIMDE_FLOAT32_C( -38.86), SIMDE_FLOAT32_C( -54.13), SIMDE_FLOAT32_C( 68.12), SIMDE_FLOAT32_C( 64.50), SIMDE_FLOAT32_C( 36.58), SIMDE_FLOAT32_C( 78.01), SIMDE_FLOAT32_C( 97.56), SIMDE_FLOAT32_C( -55.62), SIMDE_FLOAT32_C( 17.29), SIMDE_FLOAT32_C( 6.01), SIMDE_FLOAT32_C( -15.08), SIMDE_FLOAT32_C( -40.17), SIMDE_FLOAT32_C( -93.57), SIMDE_FLOAT32_C( 91.74), SIMDE_FLOAT32_C( -33.04), SIMDE_FLOAT32_C( -67.03) }, { SIMDE_FLOAT32_C( -25.71), SIMDE_FLOAT32_C( 43.29), SIMDE_FLOAT32_C( 36.94), SIMDE_FLOAT32_C( 98.19), SIMDE_FLOAT32_C( 46.17), SIMDE_FLOAT32_C( -26.28), SIMDE_FLOAT32_C( 43.76), SIMDE_FLOAT32_C( 7.94), SIMDE_FLOAT32_C( 15.86), SIMDE_FLOAT32_C( -29.11), SIMDE_FLOAT32_C( -63.37), SIMDE_FLOAT32_C( -44.46), SIMDE_FLOAT32_C( -10.77), SIMDE_FLOAT32_C( 47.52), SIMDE_FLOAT32_C( -63.95), SIMDE_FLOAT32_C( 50.37) }, { SIMDE_FLOAT32_C( 2880.37), SIMDE_FLOAT32_C( 901.28), SIMDE_FLOAT32_C( -1648.66), SIMDE_FLOAT32_C( 157.88), SIMDE_FLOAT32_C( 828.09), SIMDE_FLOAT32_C( -7550.83), SIMDE_FLOAT32_C( 3544.50), SIMDE_FLOAT32_C( -2541.99), SIMDE_FLOAT32_C( 1052.14), SIMDE_FLOAT32_C( -318.63), SIMDE_FLOAT32_C( -345.75), SIMDE_FLOAT32_C( -1108.02), SIMDE_FLOAT32_C( -3702.09), SIMDE_FLOAT32_C( -7538.09), SIMDE_FLOAT32_C( -295.86), SIMDE_FLOAT32_C( -5446.96) } }, { { SIMDE_FLOAT32_C( 93.39), SIMDE_FLOAT32_C( -95.84), SIMDE_FLOAT32_C( 14.87), SIMDE_FLOAT32_C( 29.97), SIMDE_FLOAT32_C( 82.17), SIMDE_FLOAT32_C( 12.43), SIMDE_FLOAT32_C( 74.35), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( -81.56), SIMDE_FLOAT32_C( -40.72), SIMDE_FLOAT32_C( 59.29), SIMDE_FLOAT32_C( -75.13), SIMDE_FLOAT32_C( -48.98), SIMDE_FLOAT32_C( -73.75), SIMDE_FLOAT32_C( -42.16), SIMDE_FLOAT32_C( 25.31) }, { SIMDE_FLOAT32_C( 69.55), SIMDE_FLOAT32_C( 94.78), SIMDE_FLOAT32_C( 23.51), SIMDE_FLOAT32_C( 15.71), SIMDE_FLOAT32_C( -31.49), SIMDE_FLOAT32_C( -32.74), SIMDE_FLOAT32_C( -76.35), SIMDE_FLOAT32_C( 84.37), SIMDE_FLOAT32_C( 38.15), SIMDE_FLOAT32_C( -39.72), SIMDE_FLOAT32_C( -60.09), SIMDE_FLOAT32_C( -72.62), SIMDE_FLOAT32_C( -92.20), SIMDE_FLOAT32_C( -24.04), SIMDE_FLOAT32_C( 77.76), SIMDE_FLOAT32_C( -98.81) }, { SIMDE_FLOAT32_C( -19.88), SIMDE_FLOAT32_C( -7.37), SIMDE_FLOAT32_C( 31.16), SIMDE_FLOAT32_C( -37.71), SIMDE_FLOAT32_C( -94.94), SIMDE_FLOAT32_C( 5.51), SIMDE_FLOAT32_C( 61.75), SIMDE_FLOAT32_C( -76.50), SIMDE_FLOAT32_C( 64.79), SIMDE_FLOAT32_C( 21.04), SIMDE_FLOAT32_C( -51.63), SIMDE_FLOAT32_C( -84.19), SIMDE_FLOAT32_C( 47.29), SIMDE_FLOAT32_C( 6.21), SIMDE_FLOAT32_C( 41.12), SIMDE_FLOAT32_C( 16.84) }, { SIMDE_FLOAT32_C( 6515.15), SIMDE_FLOAT32_C( -9076.34), SIMDE_FLOAT32_C( 318.43), SIMDE_FLOAT32_C( 508.54), SIMDE_FLOAT32_C( -2492.59), SIMDE_FLOAT32_C( -412.47), SIMDE_FLOAT32_C( -5738.37), SIMDE_FLOAT32_C( 30.94), SIMDE_FLOAT32_C( -3176.30), SIMDE_FLOAT32_C( 1596.36), SIMDE_FLOAT32_C( -3511.11), SIMDE_FLOAT32_C( 5540.13), SIMDE_FLOAT32_C( 4468.67), SIMDE_FLOAT32_C( 1766.74), SIMDE_FLOAT32_C( -3319.48), SIMDE_FLOAT32_C( -2517.72) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 c = simde_mm512_loadu_ps(test_vec[i].c); simde__m512 r = simde_mm512_fmsub_ps(a, b, c); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_fmsub_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 b[8]; const simde_float64 c[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( -46.12), SIMDE_FLOAT64_C( -61.46), SIMDE_FLOAT64_C( 12.22), SIMDE_FLOAT64_C( -13.13), SIMDE_FLOAT64_C( 17.13), SIMDE_FLOAT64_C( 34.93), SIMDE_FLOAT64_C( 51.88), SIMDE_FLOAT64_C( 19.93) }, { SIMDE_FLOAT64_C( 73.17), SIMDE_FLOAT64_C( -90.86), SIMDE_FLOAT64_C( -64.69), SIMDE_FLOAT64_C( 30.40), SIMDE_FLOAT64_C( -78.85), SIMDE_FLOAT64_C( -2.23), SIMDE_FLOAT64_C( 38.81), SIMDE_FLOAT64_C( -25.38) }, { SIMDE_FLOAT64_C( 6.25), SIMDE_FLOAT64_C( -22.61), SIMDE_FLOAT64_C( 8.61), SIMDE_FLOAT64_C( -27.29), SIMDE_FLOAT64_C( -33.10), SIMDE_FLOAT64_C( 99.80), SIMDE_FLOAT64_C( -95.28), SIMDE_FLOAT64_C( 59.13) }, { SIMDE_FLOAT64_C( -3380.85), SIMDE_FLOAT64_C( 5606.87), SIMDE_FLOAT64_C( -799.12), SIMDE_FLOAT64_C( -371.86), SIMDE_FLOAT64_C( -1317.60), SIMDE_FLOAT64_C( -177.69), SIMDE_FLOAT64_C( 2108.74), SIMDE_FLOAT64_C( -564.95) } }, { { SIMDE_FLOAT64_C( 28.50), SIMDE_FLOAT64_C( -32.67), SIMDE_FLOAT64_C( -95.72), SIMDE_FLOAT64_C( 21.20), SIMDE_FLOAT64_C( 40.97), SIMDE_FLOAT64_C( -90.91), SIMDE_FLOAT64_C( 40.73), SIMDE_FLOAT64_C( 94.85) }, { SIMDE_FLOAT64_C( -52.37), SIMDE_FLOAT64_C( -47.05), SIMDE_FLOAT64_C( -18.28), SIMDE_FLOAT64_C( 64.76), SIMDE_FLOAT64_C( 87.88), SIMDE_FLOAT64_C( -66.39), SIMDE_FLOAT64_C( -15.31), SIMDE_FLOAT64_C( 61.05) }, { SIMDE_FLOAT64_C( -57.26), SIMDE_FLOAT64_C( 19.99), SIMDE_FLOAT64_C( -8.55), SIMDE_FLOAT64_C( -36.11), SIMDE_FLOAT64_C( -82.23), SIMDE_FLOAT64_C( -69.73), SIMDE_FLOAT64_C( 38.51), SIMDE_FLOAT64_C( 24.02) }, { SIMDE_FLOAT64_C( -1435.28), SIMDE_FLOAT64_C( 1517.13), SIMDE_FLOAT64_C( 1758.31), SIMDE_FLOAT64_C( 1409.02), SIMDE_FLOAT64_C( 3682.67), SIMDE_FLOAT64_C( 6105.24), SIMDE_FLOAT64_C( -662.09), SIMDE_FLOAT64_C( 5766.57) } }, { { SIMDE_FLOAT64_C( 7.65), SIMDE_FLOAT64_C( -52.88), SIMDE_FLOAT64_C( 96.73), SIMDE_FLOAT64_C( 74.56), SIMDE_FLOAT64_C( -53.08), SIMDE_FLOAT64_C( -98.55), SIMDE_FLOAT64_C( 33.69), SIMDE_FLOAT64_C( 75.41) }, { SIMDE_FLOAT64_C( -31.22), SIMDE_FLOAT64_C( 37.96), SIMDE_FLOAT64_C( -3.39), SIMDE_FLOAT64_C( -90.25), SIMDE_FLOAT64_C( 47.06), SIMDE_FLOAT64_C( -62.65), SIMDE_FLOAT64_C( -95.40), SIMDE_FLOAT64_C( 94.69) }, { SIMDE_FLOAT64_C( -9.70), SIMDE_FLOAT64_C( -13.67), SIMDE_FLOAT64_C( 59.45), SIMDE_FLOAT64_C( -21.82), SIMDE_FLOAT64_C( 19.93), SIMDE_FLOAT64_C( -55.86), SIMDE_FLOAT64_C( -60.77), SIMDE_FLOAT64_C( 62.67) }, { SIMDE_FLOAT64_C( -229.13), SIMDE_FLOAT64_C( -1993.65), SIMDE_FLOAT64_C( -387.36), SIMDE_FLOAT64_C( -6707.22), SIMDE_FLOAT64_C( -2517.87), SIMDE_FLOAT64_C( 6230.02), SIMDE_FLOAT64_C( -3153.26), SIMDE_FLOAT64_C( 7077.90) } }, { { SIMDE_FLOAT64_C( 64.13), SIMDE_FLOAT64_C( 30.68), SIMDE_FLOAT64_C( -73.44), SIMDE_FLOAT64_C( 81.90), SIMDE_FLOAT64_C( 60.95), SIMDE_FLOAT64_C( 65.08), SIMDE_FLOAT64_C( 5.91), SIMDE_FLOAT64_C( -31.40) }, { SIMDE_FLOAT64_C( -87.81), SIMDE_FLOAT64_C( 2.64), SIMDE_FLOAT64_C( -56.84), SIMDE_FLOAT64_C( -40.89), SIMDE_FLOAT64_C( 4.09), SIMDE_FLOAT64_C( 76.84), SIMDE_FLOAT64_C( -65.48), SIMDE_FLOAT64_C( 72.87) }, { SIMDE_FLOAT64_C( 14.81), SIMDE_FLOAT64_C( 31.14), SIMDE_FLOAT64_C( 82.62), SIMDE_FLOAT64_C( -38.13), SIMDE_FLOAT64_C( 68.49), SIMDE_FLOAT64_C( 87.23), SIMDE_FLOAT64_C( -43.44), SIMDE_FLOAT64_C( -41.22) }, { SIMDE_FLOAT64_C( -5646.07), SIMDE_FLOAT64_C( 49.86), SIMDE_FLOAT64_C( 4091.71), SIMDE_FLOAT64_C( -3310.76), SIMDE_FLOAT64_C( 180.80), SIMDE_FLOAT64_C( 4913.52), SIMDE_FLOAT64_C( -343.55), SIMDE_FLOAT64_C( -2246.90) } }, { { SIMDE_FLOAT64_C( -26.44), SIMDE_FLOAT64_C( -83.99), SIMDE_FLOAT64_C( 36.96), SIMDE_FLOAT64_C( 93.49), SIMDE_FLOAT64_C( -39.85), SIMDE_FLOAT64_C( 76.19), SIMDE_FLOAT64_C( 56.16), SIMDE_FLOAT64_C( -75.72) }, { SIMDE_FLOAT64_C( 6.87), SIMDE_FLOAT64_C( 82.72), SIMDE_FLOAT64_C( -93.82), SIMDE_FLOAT64_C( -32.18), SIMDE_FLOAT64_C( 47.80), SIMDE_FLOAT64_C( 12.09), SIMDE_FLOAT64_C( 36.42), SIMDE_FLOAT64_C( 59.99) }, { SIMDE_FLOAT64_C( -85.27), SIMDE_FLOAT64_C( 79.58), SIMDE_FLOAT64_C( -80.90), SIMDE_FLOAT64_C( 18.82), SIMDE_FLOAT64_C( 56.42), SIMDE_FLOAT64_C( -46.38), SIMDE_FLOAT64_C( -8.31), SIMDE_FLOAT64_C( -28.77) }, { SIMDE_FLOAT64_C( -96.37), SIMDE_FLOAT64_C( -7027.23), SIMDE_FLOAT64_C( -3386.69), SIMDE_FLOAT64_C( -3027.33), SIMDE_FLOAT64_C( -1961.25), SIMDE_FLOAT64_C( 967.52), SIMDE_FLOAT64_C( 2053.66), SIMDE_FLOAT64_C( -4513.67) } }, { { SIMDE_FLOAT64_C( 84.76), SIMDE_FLOAT64_C( -25.68), SIMDE_FLOAT64_C( 33.09), SIMDE_FLOAT64_C( 53.25), SIMDE_FLOAT64_C( -38.45), SIMDE_FLOAT64_C( 89.65), SIMDE_FLOAT64_C( -87.97), SIMDE_FLOAT64_C( 35.10) }, { SIMDE_FLOAT64_C( -94.34), SIMDE_FLOAT64_C( 48.99), SIMDE_FLOAT64_C( 28.59), SIMDE_FLOAT64_C( -34.19), SIMDE_FLOAT64_C( 25.18), SIMDE_FLOAT64_C( -15.25), SIMDE_FLOAT64_C( -9.91), SIMDE_FLOAT64_C( -67.95) }, { SIMDE_FLOAT64_C( -32.53), SIMDE_FLOAT64_C( -3.73), SIMDE_FLOAT64_C( -0.13), SIMDE_FLOAT64_C( -84.73), SIMDE_FLOAT64_C( -91.64), SIMDE_FLOAT64_C( -63.71), SIMDE_FLOAT64_C( 75.26), SIMDE_FLOAT64_C( -76.91) }, { SIMDE_FLOAT64_C( -7963.73), SIMDE_FLOAT64_C( -1254.33), SIMDE_FLOAT64_C( 946.17), SIMDE_FLOAT64_C( -1735.89), SIMDE_FLOAT64_C( -876.53), SIMDE_FLOAT64_C( -1303.45), SIMDE_FLOAT64_C( 796.52), SIMDE_FLOAT64_C( -2308.14) } }, { { SIMDE_FLOAT64_C( -84.14), SIMDE_FLOAT64_C( 94.36), SIMDE_FLOAT64_C( 41.92), SIMDE_FLOAT64_C( 72.28), SIMDE_FLOAT64_C( -52.01), SIMDE_FLOAT64_C( -66.39), SIMDE_FLOAT64_C( -56.49), SIMDE_FLOAT64_C( -67.25) }, { SIMDE_FLOAT64_C( 7.93), SIMDE_FLOAT64_C( 76.60), SIMDE_FLOAT64_C( 85.99), SIMDE_FLOAT64_C( 69.48), SIMDE_FLOAT64_C( 66.25), SIMDE_FLOAT64_C( 98.03), SIMDE_FLOAT64_C( 4.58), SIMDE_FLOAT64_C( 71.92) }, { SIMDE_FLOAT64_C( 47.02), SIMDE_FLOAT64_C( -66.83), SIMDE_FLOAT64_C( -62.27), SIMDE_FLOAT64_C( -27.80), SIMDE_FLOAT64_C( 17.92), SIMDE_FLOAT64_C( 27.82), SIMDE_FLOAT64_C( 4.25), SIMDE_FLOAT64_C( 85.39) }, { SIMDE_FLOAT64_C( -714.25), SIMDE_FLOAT64_C( 7294.81), SIMDE_FLOAT64_C( 3666.97), SIMDE_FLOAT64_C( 5049.81), SIMDE_FLOAT64_C( -3463.58), SIMDE_FLOAT64_C( -6536.03), SIMDE_FLOAT64_C( -262.97), SIMDE_FLOAT64_C( -4922.01) } }, { { SIMDE_FLOAT64_C( -75.91), SIMDE_FLOAT64_C( -95.88), SIMDE_FLOAT64_C( -99.34), SIMDE_FLOAT64_C( -67.54), SIMDE_FLOAT64_C( -59.59), SIMDE_FLOAT64_C( 75.92), SIMDE_FLOAT64_C( -44.45), SIMDE_FLOAT64_C( -43.72) }, { SIMDE_FLOAT64_C( 70.29), SIMDE_FLOAT64_C( 97.47), SIMDE_FLOAT64_C( -71.44), SIMDE_FLOAT64_C( -81.73), SIMDE_FLOAT64_C( -68.92), SIMDE_FLOAT64_C( -27.93), SIMDE_FLOAT64_C( -48.98), SIMDE_FLOAT64_C( 39.01) }, { SIMDE_FLOAT64_C( -51.32), SIMDE_FLOAT64_C( -62.98), SIMDE_FLOAT64_C( 8.48), SIMDE_FLOAT64_C( -85.07), SIMDE_FLOAT64_C( -64.96), SIMDE_FLOAT64_C( -86.94), SIMDE_FLOAT64_C( 86.85), SIMDE_FLOAT64_C( 82.06) }, { SIMDE_FLOAT64_C( -5284.39), SIMDE_FLOAT64_C( -9282.44), SIMDE_FLOAT64_C( 7088.37), SIMDE_FLOAT64_C( 5605.11), SIMDE_FLOAT64_C( 4171.90), SIMDE_FLOAT64_C( -2033.51), SIMDE_FLOAT64_C( 2090.31), SIMDE_FLOAT64_C( -1787.58) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__m512d c = simde_mm512_loadu_pd(test_vec[i].c); simde__m512d r = simde_mm512_fmsub_pd(a, b, c); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_fmsub_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_fmsub_pd) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/fnmadd.c000066400000000000000000000627221400333146700166620ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 kitegi */ #define SIMDE_TEST_X86_AVX512_INSN fnmadd #include #include #include static int test_simde_mm512_fnmadd_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 b[16]; const simde_float32 c[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( -61.24), SIMDE_FLOAT32_C( 66.05), SIMDE_FLOAT32_C( -37.34), SIMDE_FLOAT32_C( -97.14), SIMDE_FLOAT32_C( 46.24), SIMDE_FLOAT32_C( -6.81), SIMDE_FLOAT32_C( 13.43), SIMDE_FLOAT32_C( -17.56), SIMDE_FLOAT32_C( -58.55), SIMDE_FLOAT32_C( -25.60), SIMDE_FLOAT32_C( -35.79), SIMDE_FLOAT32_C( 89.11), SIMDE_FLOAT32_C( 42.18), SIMDE_FLOAT32_C( 22.19), SIMDE_FLOAT32_C( 87.14), SIMDE_FLOAT32_C( -55.50) }, { SIMDE_FLOAT32_C( 51.65), SIMDE_FLOAT32_C( -43.85), SIMDE_FLOAT32_C( -44.69), SIMDE_FLOAT32_C( 2.09), SIMDE_FLOAT32_C( 37.82), SIMDE_FLOAT32_C( 8.27), SIMDE_FLOAT32_C( -31.95), SIMDE_FLOAT32_C( 70.84), SIMDE_FLOAT32_C( -62.34), SIMDE_FLOAT32_C( 50.49), SIMDE_FLOAT32_C( -46.36), SIMDE_FLOAT32_C( -70.84), SIMDE_FLOAT32_C( 55.23), SIMDE_FLOAT32_C( -80.94), SIMDE_FLOAT32_C( 23.06), SIMDE_FLOAT32_C( 93.98) }, { SIMDE_FLOAT32_C( 85.11), SIMDE_FLOAT32_C( 85.72), SIMDE_FLOAT32_C( 96.84), SIMDE_FLOAT32_C( 31.35), SIMDE_FLOAT32_C( -21.08), SIMDE_FLOAT32_C( 10.28), SIMDE_FLOAT32_C( -86.21), SIMDE_FLOAT32_C( 20.37), SIMDE_FLOAT32_C( 84.68), SIMDE_FLOAT32_C( -22.01), SIMDE_FLOAT32_C( 9.48), SIMDE_FLOAT32_C( 26.86), SIMDE_FLOAT32_C( -99.81), SIMDE_FLOAT32_C( -3.39), SIMDE_FLOAT32_C( 71.36), SIMDE_FLOAT32_C( 51.84) }, { SIMDE_FLOAT32_C( 3248.16), SIMDE_FLOAT32_C( 2982.01), SIMDE_FLOAT32_C( -1571.88), SIMDE_FLOAT32_C( 234.37), SIMDE_FLOAT32_C( -1769.88), SIMDE_FLOAT32_C( 66.60), SIMDE_FLOAT32_C( 342.88), SIMDE_FLOAT32_C( 1264.32), SIMDE_FLOAT32_C( -3565.33), SIMDE_FLOAT32_C( 1270.53), SIMDE_FLOAT32_C( -1649.74), SIMDE_FLOAT32_C( 6339.41), SIMDE_FLOAT32_C( -2429.41), SIMDE_FLOAT32_C( 1792.67), SIMDE_FLOAT32_C( -1938.09), SIMDE_FLOAT32_C( 5267.73) } }, { { SIMDE_FLOAT32_C( 52.77), SIMDE_FLOAT32_C( -73.33), SIMDE_FLOAT32_C( -46.07), SIMDE_FLOAT32_C( -9.41), SIMDE_FLOAT32_C( 34.95), SIMDE_FLOAT32_C( 21.97), SIMDE_FLOAT32_C( -38.57), SIMDE_FLOAT32_C( 72.60), SIMDE_FLOAT32_C( -27.54), SIMDE_FLOAT32_C( 15.07), SIMDE_FLOAT32_C( -98.24), SIMDE_FLOAT32_C( -72.31), SIMDE_FLOAT32_C( 34.13), SIMDE_FLOAT32_C( 24.83), SIMDE_FLOAT32_C( -78.32), SIMDE_FLOAT32_C( 19.24) }, { SIMDE_FLOAT32_C( 10.55), SIMDE_FLOAT32_C( -81.48), SIMDE_FLOAT32_C( -49.41), SIMDE_FLOAT32_C( 89.47), SIMDE_FLOAT32_C( 28.80), SIMDE_FLOAT32_C( -35.62), SIMDE_FLOAT32_C( 9.83), SIMDE_FLOAT32_C( 13.47), SIMDE_FLOAT32_C( 42.37), SIMDE_FLOAT32_C( -80.69), SIMDE_FLOAT32_C( -59.67), SIMDE_FLOAT32_C( 42.56), SIMDE_FLOAT32_C( 15.92), SIMDE_FLOAT32_C( -88.31), SIMDE_FLOAT32_C( -5.60), SIMDE_FLOAT32_C( -31.31) }, { SIMDE_FLOAT32_C( -61.63), SIMDE_FLOAT32_C( 48.33), SIMDE_FLOAT32_C( 59.28), SIMDE_FLOAT32_C( 73.32), SIMDE_FLOAT32_C( -29.70), SIMDE_FLOAT32_C( -79.29), SIMDE_FLOAT32_C( 45.92), SIMDE_FLOAT32_C( 42.77), SIMDE_FLOAT32_C( 35.79), SIMDE_FLOAT32_C( 47.68), SIMDE_FLOAT32_C( 70.46), SIMDE_FLOAT32_C( -30.08), SIMDE_FLOAT32_C( -27.50), SIMDE_FLOAT32_C( 92.14), SIMDE_FLOAT32_C( 89.16), SIMDE_FLOAT32_C( 83.05) }, { SIMDE_FLOAT32_C( -618.35), SIMDE_FLOAT32_C( -5926.60), SIMDE_FLOAT32_C( -2217.04), SIMDE_FLOAT32_C( 915.23), SIMDE_FLOAT32_C( -1036.26), SIMDE_FLOAT32_C( 703.28), SIMDE_FLOAT32_C( 425.06), SIMDE_FLOAT32_C( -935.15), SIMDE_FLOAT32_C( 1202.66), SIMDE_FLOAT32_C( 1263.68), SIMDE_FLOAT32_C( -5791.52), SIMDE_FLOAT32_C( 3047.43), SIMDE_FLOAT32_C( -570.85), SIMDE_FLOAT32_C( 2284.88), SIMDE_FLOAT32_C( -349.43), SIMDE_FLOAT32_C( 685.45) } }, { { SIMDE_FLOAT32_C( -89.34), SIMDE_FLOAT32_C( -60.25), SIMDE_FLOAT32_C( 72.52), SIMDE_FLOAT32_C( 39.45), SIMDE_FLOAT32_C( 4.13), SIMDE_FLOAT32_C( -17.65), SIMDE_FLOAT32_C( -47.07), SIMDE_FLOAT32_C( -53.49), SIMDE_FLOAT32_C( 1.66), SIMDE_FLOAT32_C( -6.74), SIMDE_FLOAT32_C( 89.07), SIMDE_FLOAT32_C( -82.41), SIMDE_FLOAT32_C( 4.96), SIMDE_FLOAT32_C( -16.53), SIMDE_FLOAT32_C( -13.72), SIMDE_FLOAT32_C( 43.33) }, { SIMDE_FLOAT32_C( -68.20), SIMDE_FLOAT32_C( -54.44), SIMDE_FLOAT32_C( 16.64), SIMDE_FLOAT32_C( 2.10), SIMDE_FLOAT32_C( -33.73), SIMDE_FLOAT32_C( -37.44), SIMDE_FLOAT32_C( -55.13), SIMDE_FLOAT32_C( -97.94), SIMDE_FLOAT32_C( -89.76), SIMDE_FLOAT32_C( -84.67), SIMDE_FLOAT32_C( -28.02), SIMDE_FLOAT32_C( -17.26), SIMDE_FLOAT32_C( -92.53), SIMDE_FLOAT32_C( -38.86), SIMDE_FLOAT32_C( -34.20), SIMDE_FLOAT32_C( -81.88) }, { SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( -61.69), SIMDE_FLOAT32_C( 57.58), SIMDE_FLOAT32_C( -94.97), SIMDE_FLOAT32_C( 20.67), SIMDE_FLOAT32_C( -89.49), SIMDE_FLOAT32_C( -48.46), SIMDE_FLOAT32_C( -77.67), SIMDE_FLOAT32_C( 3.77), SIMDE_FLOAT32_C( -59.39), SIMDE_FLOAT32_C( -60.08), SIMDE_FLOAT32_C( -91.27), SIMDE_FLOAT32_C( 24.08), SIMDE_FLOAT32_C( 26.19), SIMDE_FLOAT32_C( 52.05), SIMDE_FLOAT32_C( 55.88) }, { SIMDE_FLOAT32_C( -6092.10), SIMDE_FLOAT32_C( -3341.70), SIMDE_FLOAT32_C( -1149.15), SIMDE_FLOAT32_C( -177.82), SIMDE_FLOAT32_C( 159.97), SIMDE_FLOAT32_C( -750.31), SIMDE_FLOAT32_C( -2643.43), SIMDE_FLOAT32_C( -5316.48), SIMDE_FLOAT32_C( 152.77), SIMDE_FLOAT32_C( -630.07), SIMDE_FLOAT32_C( 2435.66), SIMDE_FLOAT32_C( -1513.67), SIMDE_FLOAT32_C( 483.03), SIMDE_FLOAT32_C( -616.17), SIMDE_FLOAT32_C( -417.17), SIMDE_FLOAT32_C( 3603.74) } }, { { SIMDE_FLOAT32_C( 71.75), SIMDE_FLOAT32_C( -31.30), SIMDE_FLOAT32_C( -42.02), SIMDE_FLOAT32_C( -61.98), SIMDE_FLOAT32_C( 31.26), SIMDE_FLOAT32_C( 2.85), SIMDE_FLOAT32_C( -59.92), SIMDE_FLOAT32_C( 41.49), SIMDE_FLOAT32_C( 18.19), SIMDE_FLOAT32_C( 12.07), SIMDE_FLOAT32_C( -75.76), SIMDE_FLOAT32_C( 25.65), SIMDE_FLOAT32_C( 73.21), SIMDE_FLOAT32_C( -9.97), SIMDE_FLOAT32_C( 43.78), SIMDE_FLOAT32_C( -25.90) }, { SIMDE_FLOAT32_C( 28.35), SIMDE_FLOAT32_C( 1.36), SIMDE_FLOAT32_C( -20.87), SIMDE_FLOAT32_C( -50.99), SIMDE_FLOAT32_C( 11.86), SIMDE_FLOAT32_C( 30.66), SIMDE_FLOAT32_C( -28.66), SIMDE_FLOAT32_C( -84.37), SIMDE_FLOAT32_C( 71.27), SIMDE_FLOAT32_C( 11.26), SIMDE_FLOAT32_C( -75.64), SIMDE_FLOAT32_C( -4.65), SIMDE_FLOAT32_C( -62.54), SIMDE_FLOAT32_C( 76.41), SIMDE_FLOAT32_C( -48.78), SIMDE_FLOAT32_C( -90.79) }, { SIMDE_FLOAT32_C( -54.89), SIMDE_FLOAT32_C( 9.21), SIMDE_FLOAT32_C( -52.77), SIMDE_FLOAT32_C( 76.37), SIMDE_FLOAT32_C( -87.94), SIMDE_FLOAT32_C( -12.68), SIMDE_FLOAT32_C( 17.86), SIMDE_FLOAT32_C( 30.24), SIMDE_FLOAT32_C( 99.38), SIMDE_FLOAT32_C( 42.10), SIMDE_FLOAT32_C( -44.10), SIMDE_FLOAT32_C( 72.59), SIMDE_FLOAT32_C( -67.87), SIMDE_FLOAT32_C( 99.68), SIMDE_FLOAT32_C( -53.31), SIMDE_FLOAT32_C( 60.48) }, { SIMDE_FLOAT32_C( -2089.00), SIMDE_FLOAT32_C( 51.78), SIMDE_FLOAT32_C( -929.73), SIMDE_FLOAT32_C( -3083.99), SIMDE_FLOAT32_C( -458.68), SIMDE_FLOAT32_C( -100.06), SIMDE_FLOAT32_C( -1699.45), SIMDE_FLOAT32_C( 3530.75), SIMDE_FLOAT32_C( -1197.02), SIMDE_FLOAT32_C( -93.81), SIMDE_FLOAT32_C( -5774.59), SIMDE_FLOAT32_C( 191.86), SIMDE_FLOAT32_C( 4510.68), SIMDE_FLOAT32_C( 861.49), SIMDE_FLOAT32_C( 2082.28), SIMDE_FLOAT32_C( -2290.98) } }, { { SIMDE_FLOAT32_C( 1.03), SIMDE_FLOAT32_C( 25.82), SIMDE_FLOAT32_C( -90.51), SIMDE_FLOAT32_C( -87.11), SIMDE_FLOAT32_C( -43.51), SIMDE_FLOAT32_C( -19.16), SIMDE_FLOAT32_C( -71.47), SIMDE_FLOAT32_C( -72.24), SIMDE_FLOAT32_C( 92.10), SIMDE_FLOAT32_C( -47.11), SIMDE_FLOAT32_C( 23.11), SIMDE_FLOAT32_C( -70.45), SIMDE_FLOAT32_C( -70.70), SIMDE_FLOAT32_C( 74.33), SIMDE_FLOAT32_C( -61.24), SIMDE_FLOAT32_C( -25.59) }, { SIMDE_FLOAT32_C( -16.46), SIMDE_FLOAT32_C( -14.01), SIMDE_FLOAT32_C( -49.22), SIMDE_FLOAT32_C( -4.41), SIMDE_FLOAT32_C( 73.31), SIMDE_FLOAT32_C( 68.64), SIMDE_FLOAT32_C( -74.16), SIMDE_FLOAT32_C( 72.69), SIMDE_FLOAT32_C( 10.74), SIMDE_FLOAT32_C( -18.27), SIMDE_FLOAT32_C( 45.28), SIMDE_FLOAT32_C( 42.87), SIMDE_FLOAT32_C( -18.59), SIMDE_FLOAT32_C( 91.98), SIMDE_FLOAT32_C( 3.34), SIMDE_FLOAT32_C( 82.44) }, { SIMDE_FLOAT32_C( 17.80), SIMDE_FLOAT32_C( 12.84), SIMDE_FLOAT32_C( 95.33), SIMDE_FLOAT32_C( 74.28), SIMDE_FLOAT32_C( 93.67), SIMDE_FLOAT32_C( -76.14), SIMDE_FLOAT32_C( -97.96), SIMDE_FLOAT32_C( 85.77), SIMDE_FLOAT32_C( -23.25), SIMDE_FLOAT32_C( 25.15), SIMDE_FLOAT32_C( -84.68), SIMDE_FLOAT32_C( 6.05), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( -45.92), SIMDE_FLOAT32_C( 80.46), SIMDE_FLOAT32_C( 83.01) }, { SIMDE_FLOAT32_C( 34.75), SIMDE_FLOAT32_C( 374.58), SIMDE_FLOAT32_C( -4359.57), SIMDE_FLOAT32_C( -309.88), SIMDE_FLOAT32_C( 3283.39), SIMDE_FLOAT32_C( 1239.00), SIMDE_FLOAT32_C( -5398.18), SIMDE_FLOAT32_C( 5336.90), SIMDE_FLOAT32_C( -1012.40), SIMDE_FLOAT32_C( -835.55), SIMDE_FLOAT32_C( -1131.10), SIMDE_FLOAT32_C( 3026.24), SIMDE_FLOAT32_C( -1314.83), SIMDE_FLOAT32_C( -6882.79), SIMDE_FLOAT32_C( 285.00), SIMDE_FLOAT32_C( 2192.65) } }, { { SIMDE_FLOAT32_C( 40.08), SIMDE_FLOAT32_C( -68.77), SIMDE_FLOAT32_C( -21.40), SIMDE_FLOAT32_C( 13.39), SIMDE_FLOAT32_C( 99.87), SIMDE_FLOAT32_C( 4.44), SIMDE_FLOAT32_C( -13.92), SIMDE_FLOAT32_C( 10.61), SIMDE_FLOAT32_C( 86.17), SIMDE_FLOAT32_C( -68.63), SIMDE_FLOAT32_C( -46.52), SIMDE_FLOAT32_C( -32.42), SIMDE_FLOAT32_C( -76.66), SIMDE_FLOAT32_C( 56.82), SIMDE_FLOAT32_C( -49.98), SIMDE_FLOAT32_C( 41.14) }, { SIMDE_FLOAT32_C( -30.34), SIMDE_FLOAT32_C( -54.64), SIMDE_FLOAT32_C( 15.43), SIMDE_FLOAT32_C( -36.67), SIMDE_FLOAT32_C( -30.78), SIMDE_FLOAT32_C( 17.47), SIMDE_FLOAT32_C( -50.90), SIMDE_FLOAT32_C( 45.97), SIMDE_FLOAT32_C( -57.39), SIMDE_FLOAT32_C( -35.58), SIMDE_FLOAT32_C( -47.99), SIMDE_FLOAT32_C( 42.09), SIMDE_FLOAT32_C( 18.50), SIMDE_FLOAT32_C( -67.53), SIMDE_FLOAT32_C( 25.10), SIMDE_FLOAT32_C( -41.42) }, { SIMDE_FLOAT32_C( -36.29), SIMDE_FLOAT32_C( -96.30), SIMDE_FLOAT32_C( 71.97), SIMDE_FLOAT32_C( -36.42), SIMDE_FLOAT32_C( 8.14), SIMDE_FLOAT32_C( -41.94), SIMDE_FLOAT32_C( 74.19), SIMDE_FLOAT32_C( -5.68), SIMDE_FLOAT32_C( -10.58), SIMDE_FLOAT32_C( -72.33), SIMDE_FLOAT32_C( 61.90), SIMDE_FLOAT32_C( 12.77), SIMDE_FLOAT32_C( 84.49), SIMDE_FLOAT32_C( -88.07), SIMDE_FLOAT32_C( -46.09), SIMDE_FLOAT32_C( -45.85) }, { SIMDE_FLOAT32_C( 1179.74), SIMDE_FLOAT32_C( -3853.89), SIMDE_FLOAT32_C( 402.17), SIMDE_FLOAT32_C( 454.59), SIMDE_FLOAT32_C( 3082.14), SIMDE_FLOAT32_C( -119.51), SIMDE_FLOAT32_C( -634.34), SIMDE_FLOAT32_C( -493.42), SIMDE_FLOAT32_C( 4934.72), SIMDE_FLOAT32_C( -2514.19), SIMDE_FLOAT32_C( -2170.59), SIMDE_FLOAT32_C( 1377.33), SIMDE_FLOAT32_C( 1502.70), SIMDE_FLOAT32_C( 3748.98), SIMDE_FLOAT32_C( 1208.41), SIMDE_FLOAT32_C( 1658.17) } }, { { SIMDE_FLOAT32_C( -42.71), SIMDE_FLOAT32_C( 69.33), SIMDE_FLOAT32_C( 17.48), SIMDE_FLOAT32_C( 26.51), SIMDE_FLOAT32_C( -13.20), SIMDE_FLOAT32_C( 66.58), SIMDE_FLOAT32_C( -27.52), SIMDE_FLOAT32_C( 29.41), SIMDE_FLOAT32_C( -69.00), SIMDE_FLOAT32_C( 24.49), SIMDE_FLOAT32_C( -28.50), SIMDE_FLOAT32_C( 49.50), SIMDE_FLOAT32_C( 56.96), SIMDE_FLOAT32_C( 96.60), SIMDE_FLOAT32_C( -91.91), SIMDE_FLOAT32_C( -79.34) }, { SIMDE_FLOAT32_C( -99.70), SIMDE_FLOAT32_C( 80.06), SIMDE_FLOAT32_C( -15.76), SIMDE_FLOAT32_C( 8.44), SIMDE_FLOAT32_C( -61.89), SIMDE_FLOAT32_C( -41.56), SIMDE_FLOAT32_C( -97.24), SIMDE_FLOAT32_C( 27.54), SIMDE_FLOAT32_C( -13.89), SIMDE_FLOAT32_C( 64.67), SIMDE_FLOAT32_C( -59.70), SIMDE_FLOAT32_C( -29.40), SIMDE_FLOAT32_C( 76.59), SIMDE_FLOAT32_C( -5.79), SIMDE_FLOAT32_C( 24.75), SIMDE_FLOAT32_C( -66.12) }, { SIMDE_FLOAT32_C( -36.45), SIMDE_FLOAT32_C( -57.76), SIMDE_FLOAT32_C( 60.39), SIMDE_FLOAT32_C( 50.35), SIMDE_FLOAT32_C( -91.18), SIMDE_FLOAT32_C( -67.13), SIMDE_FLOAT32_C( -20.24), SIMDE_FLOAT32_C( -60.18), SIMDE_FLOAT32_C( 57.36), SIMDE_FLOAT32_C( 51.26), SIMDE_FLOAT32_C( 89.32), SIMDE_FLOAT32_C( 14.31), SIMDE_FLOAT32_C( 47.86), SIMDE_FLOAT32_C( 97.41), SIMDE_FLOAT32_C( 34.98), SIMDE_FLOAT32_C( 48.16) }, { SIMDE_FLOAT32_C( -4294.64), SIMDE_FLOAT32_C( -5608.32), SIMDE_FLOAT32_C( 335.87), SIMDE_FLOAT32_C( -173.39), SIMDE_FLOAT32_C( -908.13), SIMDE_FLOAT32_C( 2699.94), SIMDE_FLOAT32_C( -2696.28), SIMDE_FLOAT32_C( -870.13), SIMDE_FLOAT32_C( -901.05), SIMDE_FLOAT32_C( -1532.51), SIMDE_FLOAT32_C( -1612.13), SIMDE_FLOAT32_C( 1469.61), SIMDE_FLOAT32_C( -4314.71), SIMDE_FLOAT32_C( 656.72), SIMDE_FLOAT32_C( 2309.75), SIMDE_FLOAT32_C( -5197.80) } }, { { SIMDE_FLOAT32_C( 77.47), SIMDE_FLOAT32_C( -80.78), SIMDE_FLOAT32_C( -43.40), SIMDE_FLOAT32_C( -84.42), SIMDE_FLOAT32_C( -22.34), SIMDE_FLOAT32_C( -40.64), SIMDE_FLOAT32_C( 43.12), SIMDE_FLOAT32_C( 63.76), SIMDE_FLOAT32_C( -75.97), SIMDE_FLOAT32_C( 83.43), SIMDE_FLOAT32_C( -65.64), SIMDE_FLOAT32_C( -99.38), SIMDE_FLOAT32_C( -22.36), SIMDE_FLOAT32_C( 59.12), SIMDE_FLOAT32_C( -65.50), SIMDE_FLOAT32_C( 41.19) }, { SIMDE_FLOAT32_C( -98.64), SIMDE_FLOAT32_C( 94.89), SIMDE_FLOAT32_C( -8.47), SIMDE_FLOAT32_C( -89.83), SIMDE_FLOAT32_C( -72.24), SIMDE_FLOAT32_C( 71.29), SIMDE_FLOAT32_C( -50.01), SIMDE_FLOAT32_C( 85.11), SIMDE_FLOAT32_C( 22.55), SIMDE_FLOAT32_C( -60.68), SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( -29.59), SIMDE_FLOAT32_C( -63.27), SIMDE_FLOAT32_C( -65.59), SIMDE_FLOAT32_C( -81.44), SIMDE_FLOAT32_C( -85.80) }, { SIMDE_FLOAT32_C( -46.37), SIMDE_FLOAT32_C( -24.84), SIMDE_FLOAT32_C( -70.22), SIMDE_FLOAT32_C( 31.29), SIMDE_FLOAT32_C( 34.52), SIMDE_FLOAT32_C( 72.90), SIMDE_FLOAT32_C( -4.95), SIMDE_FLOAT32_C( 58.55), SIMDE_FLOAT32_C( 56.33), SIMDE_FLOAT32_C( 29.42), SIMDE_FLOAT32_C( 59.17), SIMDE_FLOAT32_C( -66.03), SIMDE_FLOAT32_C( -11.46), SIMDE_FLOAT32_C( 93.67), SIMDE_FLOAT32_C( 75.15), SIMDE_FLOAT32_C( -10.11) }, { SIMDE_FLOAT32_C( 7595.27), SIMDE_FLOAT32_C( 7640.37), SIMDE_FLOAT32_C( -437.82), SIMDE_FLOAT32_C( -7552.16), SIMDE_FLOAT32_C( -1579.32), SIMDE_FLOAT32_C( 2970.13), SIMDE_FLOAT32_C( 2151.48), SIMDE_FLOAT32_C( -5368.06), SIMDE_FLOAT32_C( 1769.45), SIMDE_FLOAT32_C( 5091.95), SIMDE_FLOAT32_C( 21.76), SIMDE_FLOAT32_C( -3006.68), SIMDE_FLOAT32_C( -1426.18), SIMDE_FLOAT32_C( 3971.35), SIMDE_FLOAT32_C( -5259.17), SIMDE_FLOAT32_C( 3523.99) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 c = simde_mm512_loadu_ps(test_vec[i].c); simde__m512 r = simde_mm512_fnmadd_ps(a, b, c); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_fnmadd_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 b[8]; const simde_float64 c[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( -33.42), SIMDE_FLOAT64_C( 18.77), SIMDE_FLOAT64_C( -64.80), SIMDE_FLOAT64_C( -31.25), SIMDE_FLOAT64_C( 13.14), SIMDE_FLOAT64_C( -25.39), SIMDE_FLOAT64_C( -32.34), SIMDE_FLOAT64_C( 4.66) }, { SIMDE_FLOAT64_C( 75.32), SIMDE_FLOAT64_C( 64.02), SIMDE_FLOAT64_C( 55.28), SIMDE_FLOAT64_C( 78.84), SIMDE_FLOAT64_C( 52.87), SIMDE_FLOAT64_C( -80.75), SIMDE_FLOAT64_C( -10.62), SIMDE_FLOAT64_C( 16.16) }, { SIMDE_FLOAT64_C( 95.97), SIMDE_FLOAT64_C( 62.64), SIMDE_FLOAT64_C( -14.97), SIMDE_FLOAT64_C( -48.39), SIMDE_FLOAT64_C( 80.72), SIMDE_FLOAT64_C( 32.09), SIMDE_FLOAT64_C( 81.91), SIMDE_FLOAT64_C( 89.26) }, { SIMDE_FLOAT64_C( 2613.16), SIMDE_FLOAT64_C( -1139.02), SIMDE_FLOAT64_C( 3567.17), SIMDE_FLOAT64_C( 2415.36), SIMDE_FLOAT64_C( -613.99), SIMDE_FLOAT64_C( -2018.15), SIMDE_FLOAT64_C( -261.54), SIMDE_FLOAT64_C( 13.95) } }, { { SIMDE_FLOAT64_C( -94.10), SIMDE_FLOAT64_C( 84.15), SIMDE_FLOAT64_C( 9.79), SIMDE_FLOAT64_C( -9.15), SIMDE_FLOAT64_C( 9.78), SIMDE_FLOAT64_C( -92.82), SIMDE_FLOAT64_C( -64.66), SIMDE_FLOAT64_C( 76.36) }, { SIMDE_FLOAT64_C( 25.95), SIMDE_FLOAT64_C( -29.46), SIMDE_FLOAT64_C( -54.89), SIMDE_FLOAT64_C( -60.91), SIMDE_FLOAT64_C( 45.15), SIMDE_FLOAT64_C( 12.77), SIMDE_FLOAT64_C( 43.76), SIMDE_FLOAT64_C( 20.47) }, { SIMDE_FLOAT64_C( -23.21), SIMDE_FLOAT64_C( -0.96), SIMDE_FLOAT64_C( -0.69), SIMDE_FLOAT64_C( -70.34), SIMDE_FLOAT64_C( 18.29), SIMDE_FLOAT64_C( 88.69), SIMDE_FLOAT64_C( 45.82), SIMDE_FLOAT64_C( 14.26) }, { SIMDE_FLOAT64_C( 2418.68), SIMDE_FLOAT64_C( 2478.10), SIMDE_FLOAT64_C( 536.68), SIMDE_FLOAT64_C( -627.67), SIMDE_FLOAT64_C( -423.28), SIMDE_FLOAT64_C( 1274.00), SIMDE_FLOAT64_C( 2875.34), SIMDE_FLOAT64_C( -1548.83) } }, { { SIMDE_FLOAT64_C( 51.33), SIMDE_FLOAT64_C( -69.15), SIMDE_FLOAT64_C( 65.87), SIMDE_FLOAT64_C( 32.05), SIMDE_FLOAT64_C( 62.93), SIMDE_FLOAT64_C( 47.79), SIMDE_FLOAT64_C( 21.31), SIMDE_FLOAT64_C( 68.84) }, { SIMDE_FLOAT64_C( 31.94), SIMDE_FLOAT64_C( -68.91), SIMDE_FLOAT64_C( -40.32), SIMDE_FLOAT64_C( -58.28), SIMDE_FLOAT64_C( -61.72), SIMDE_FLOAT64_C( -4.98), SIMDE_FLOAT64_C( -81.91), SIMDE_FLOAT64_C( 64.23) }, { SIMDE_FLOAT64_C( 65.56), SIMDE_FLOAT64_C( -36.80), SIMDE_FLOAT64_C( -96.68), SIMDE_FLOAT64_C( 10.71), SIMDE_FLOAT64_C( 75.97), SIMDE_FLOAT64_C( 47.08), SIMDE_FLOAT64_C( -68.83), SIMDE_FLOAT64_C( -47.24) }, { SIMDE_FLOAT64_C( -1573.92), SIMDE_FLOAT64_C( -4801.93), SIMDE_FLOAT64_C( 2559.20), SIMDE_FLOAT64_C( 1878.58), SIMDE_FLOAT64_C( 3960.01), SIMDE_FLOAT64_C( 285.07), SIMDE_FLOAT64_C( 1676.67), SIMDE_FLOAT64_C( -4468.83) } }, { { SIMDE_FLOAT64_C( -53.88), SIMDE_FLOAT64_C( 30.48), SIMDE_FLOAT64_C( -17.58), SIMDE_FLOAT64_C( 64.42), SIMDE_FLOAT64_C( 19.17), SIMDE_FLOAT64_C( -71.76), SIMDE_FLOAT64_C( -21.32), SIMDE_FLOAT64_C( -29.50) }, { SIMDE_FLOAT64_C( -40.91), SIMDE_FLOAT64_C( -55.45), SIMDE_FLOAT64_C( -97.45), SIMDE_FLOAT64_C( -77.98), SIMDE_FLOAT64_C( 92.34), SIMDE_FLOAT64_C( 23.85), SIMDE_FLOAT64_C( 90.86), SIMDE_FLOAT64_C( 24.27) }, { SIMDE_FLOAT64_C( 54.95), SIMDE_FLOAT64_C( -49.46), SIMDE_FLOAT64_C( 66.00), SIMDE_FLOAT64_C( 93.22), SIMDE_FLOAT64_C( 45.56), SIMDE_FLOAT64_C( 84.08), SIMDE_FLOAT64_C( 57.45), SIMDE_FLOAT64_C( 11.12) }, { SIMDE_FLOAT64_C( -2149.28), SIMDE_FLOAT64_C( 1640.66), SIMDE_FLOAT64_C( -1647.17), SIMDE_FLOAT64_C( 5116.69), SIMDE_FLOAT64_C( -1724.60), SIMDE_FLOAT64_C( 1795.56), SIMDE_FLOAT64_C( 1994.59), SIMDE_FLOAT64_C( 727.09) } }, { { SIMDE_FLOAT64_C( -52.72), SIMDE_FLOAT64_C( 60.77), SIMDE_FLOAT64_C( -78.17), SIMDE_FLOAT64_C( -76.75), SIMDE_FLOAT64_C( 7.85), SIMDE_FLOAT64_C( -47.00), SIMDE_FLOAT64_C( -23.99), SIMDE_FLOAT64_C( 53.98) }, { SIMDE_FLOAT64_C( 83.49), SIMDE_FLOAT64_C( 58.43), SIMDE_FLOAT64_C( 18.39), SIMDE_FLOAT64_C( 2.66), SIMDE_FLOAT64_C( 86.67), SIMDE_FLOAT64_C( 97.07), SIMDE_FLOAT64_C( 73.16), SIMDE_FLOAT64_C( -54.24) }, { SIMDE_FLOAT64_C( -58.38), SIMDE_FLOAT64_C( 75.70), SIMDE_FLOAT64_C( -32.22), SIMDE_FLOAT64_C( -66.05), SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( -41.36), SIMDE_FLOAT64_C( 58.23), SIMDE_FLOAT64_C( -45.50) }, { SIMDE_FLOAT64_C( 4343.21), SIMDE_FLOAT64_C( -3475.09), SIMDE_FLOAT64_C( 1405.33), SIMDE_FLOAT64_C( 138.11), SIMDE_FLOAT64_C( -680.80), SIMDE_FLOAT64_C( 4520.93), SIMDE_FLOAT64_C( 1813.34), SIMDE_FLOAT64_C( 2882.38) } }, { { SIMDE_FLOAT64_C( 9.18), SIMDE_FLOAT64_C( 24.22), SIMDE_FLOAT64_C( -52.28), SIMDE_FLOAT64_C( -45.26), SIMDE_FLOAT64_C( 8.30), SIMDE_FLOAT64_C( -94.83), SIMDE_FLOAT64_C( 65.86), SIMDE_FLOAT64_C( 55.58) }, { SIMDE_FLOAT64_C( 65.95), SIMDE_FLOAT64_C( 87.69), SIMDE_FLOAT64_C( 78.83), SIMDE_FLOAT64_C( -26.20), SIMDE_FLOAT64_C( -59.31), SIMDE_FLOAT64_C( -45.15), SIMDE_FLOAT64_C( -72.22), SIMDE_FLOAT64_C( -75.82) }, { SIMDE_FLOAT64_C( -86.72), SIMDE_FLOAT64_C( 46.17), SIMDE_FLOAT64_C( 26.84), SIMDE_FLOAT64_C( 99.95), SIMDE_FLOAT64_C( 43.24), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( -54.29), SIMDE_FLOAT64_C( 84.85) }, { SIMDE_FLOAT64_C( -692.14), SIMDE_FLOAT64_C( -2077.68), SIMDE_FLOAT64_C( 4148.07), SIMDE_FLOAT64_C( -1085.86), SIMDE_FLOAT64_C( 535.51), SIMDE_FLOAT64_C( -4281.57), SIMDE_FLOAT64_C( 4702.12), SIMDE_FLOAT64_C( 4298.93) } }, { { SIMDE_FLOAT64_C( -24.30), SIMDE_FLOAT64_C( 13.49), SIMDE_FLOAT64_C( -81.19), SIMDE_FLOAT64_C( 75.26), SIMDE_FLOAT64_C( 72.13), SIMDE_FLOAT64_C( 77.04), SIMDE_FLOAT64_C( -70.24), SIMDE_FLOAT64_C( -18.69) }, { SIMDE_FLOAT64_C( 1.26), SIMDE_FLOAT64_C( -22.52), SIMDE_FLOAT64_C( 36.06), SIMDE_FLOAT64_C( -90.44), SIMDE_FLOAT64_C( -17.34), SIMDE_FLOAT64_C( 1.92), SIMDE_FLOAT64_C( 65.15), SIMDE_FLOAT64_C( -51.39) }, { SIMDE_FLOAT64_C( -10.39), SIMDE_FLOAT64_C( 43.98), SIMDE_FLOAT64_C( 22.41), SIMDE_FLOAT64_C( 30.30), SIMDE_FLOAT64_C( 98.83), SIMDE_FLOAT64_C( 50.19), SIMDE_FLOAT64_C( 54.48), SIMDE_FLOAT64_C( -87.89) }, { SIMDE_FLOAT64_C( 20.23), SIMDE_FLOAT64_C( 347.77), SIMDE_FLOAT64_C( 2950.12), SIMDE_FLOAT64_C( 6836.81), SIMDE_FLOAT64_C( 1349.56), SIMDE_FLOAT64_C( -97.73), SIMDE_FLOAT64_C( 4630.62), SIMDE_FLOAT64_C( -1048.37) } }, { { SIMDE_FLOAT64_C( -3.64), SIMDE_FLOAT64_C( -18.68), SIMDE_FLOAT64_C( -87.95), SIMDE_FLOAT64_C( -60.40), SIMDE_FLOAT64_C( 81.32), SIMDE_FLOAT64_C( -42.24), SIMDE_FLOAT64_C( -75.55), SIMDE_FLOAT64_C( -42.97) }, { SIMDE_FLOAT64_C( 71.25), SIMDE_FLOAT64_C( -56.74), SIMDE_FLOAT64_C( -67.71), SIMDE_FLOAT64_C( 43.39), SIMDE_FLOAT64_C( -79.71), SIMDE_FLOAT64_C( -37.95), SIMDE_FLOAT64_C( -75.30), SIMDE_FLOAT64_C( 21.55) }, { SIMDE_FLOAT64_C( 39.53), SIMDE_FLOAT64_C( 60.76), SIMDE_FLOAT64_C( 31.12), SIMDE_FLOAT64_C( -77.81), SIMDE_FLOAT64_C( -37.33), SIMDE_FLOAT64_C( -3.74), SIMDE_FLOAT64_C( -29.20), SIMDE_FLOAT64_C( 52.28) }, { SIMDE_FLOAT64_C( 298.88), SIMDE_FLOAT64_C( -999.14), SIMDE_FLOAT64_C( -5923.97), SIMDE_FLOAT64_C( 2542.95), SIMDE_FLOAT64_C( 6444.69), SIMDE_FLOAT64_C( -1606.75), SIMDE_FLOAT64_C( -5718.11), SIMDE_FLOAT64_C( 978.28) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__m512d c = simde_mm512_loadu_pd(test_vec[i].c); simde__m512d r = simde_mm512_fnmadd_pd(a, b, c); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_fnmadd_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_fnmadd_pd) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/fnmsub.c000066400000000000000000000627221400333146700167230ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 kitegi */ #define SIMDE_TEST_X86_AVX512_INSN fnmsub #include #include #include static int test_simde_mm512_fnmsub_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 b[16]; const simde_float32 c[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 38.95), SIMDE_FLOAT32_C( -13.06), SIMDE_FLOAT32_C( 27.88), SIMDE_FLOAT32_C( 62.28), SIMDE_FLOAT32_C( 66.10), SIMDE_FLOAT32_C( -82.38), SIMDE_FLOAT32_C( 5.33), SIMDE_FLOAT32_C( 59.08), SIMDE_FLOAT32_C( 11.50), SIMDE_FLOAT32_C( 2.93), SIMDE_FLOAT32_C( 86.03), SIMDE_FLOAT32_C( -85.53), SIMDE_FLOAT32_C( 41.10), SIMDE_FLOAT32_C( -96.18), SIMDE_FLOAT32_C( 60.27), SIMDE_FLOAT32_C( 88.72) }, { SIMDE_FLOAT32_C( 84.81), SIMDE_FLOAT32_C( 87.90), SIMDE_FLOAT32_C( 94.42), SIMDE_FLOAT32_C( -72.92), SIMDE_FLOAT32_C( -94.27), SIMDE_FLOAT32_C( 82.63), SIMDE_FLOAT32_C( 39.45), SIMDE_FLOAT32_C( 66.67), SIMDE_FLOAT32_C( 16.72), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( -40.95), SIMDE_FLOAT32_C( 62.84), SIMDE_FLOAT32_C( -22.56), SIMDE_FLOAT32_C( -32.15), SIMDE_FLOAT32_C( 60.85), SIMDE_FLOAT32_C( -83.61) }, { SIMDE_FLOAT32_C( 54.78), SIMDE_FLOAT32_C( -11.27), SIMDE_FLOAT32_C( 78.67), SIMDE_FLOAT32_C( 20.88), SIMDE_FLOAT32_C( 6.35), SIMDE_FLOAT32_C( -16.00), SIMDE_FLOAT32_C( -20.03), SIMDE_FLOAT32_C( -82.16), SIMDE_FLOAT32_C( 86.93), SIMDE_FLOAT32_C( -34.00), SIMDE_FLOAT32_C( -67.68), SIMDE_FLOAT32_C( 28.02), SIMDE_FLOAT32_C( -30.18), SIMDE_FLOAT32_C( 92.59), SIMDE_FLOAT32_C( 16.74), SIMDE_FLOAT32_C( -45.37) }, { SIMDE_FLOAT32_C( -3358.13), SIMDE_FLOAT32_C( 1159.24), SIMDE_FLOAT32_C( -2711.10), SIMDE_FLOAT32_C( 4520.58), SIMDE_FLOAT32_C( 6224.90), SIMDE_FLOAT32_C( 6823.06), SIMDE_FLOAT32_C( -190.24), SIMDE_FLOAT32_C( -3856.70), SIMDE_FLOAT32_C( -279.21), SIMDE_FLOAT32_C( 31.74), SIMDE_FLOAT32_C( 3590.61), SIMDE_FLOAT32_C( 5346.69), SIMDE_FLOAT32_C( 957.40), SIMDE_FLOAT32_C( -3184.78), SIMDE_FLOAT32_C( -3684.17), SIMDE_FLOAT32_C( 7463.25) } }, { { SIMDE_FLOAT32_C( 80.49), SIMDE_FLOAT32_C( 11.16), SIMDE_FLOAT32_C( -18.30), SIMDE_FLOAT32_C( 86.22), SIMDE_FLOAT32_C( -6.21), SIMDE_FLOAT32_C( -78.85), SIMDE_FLOAT32_C( 52.89), SIMDE_FLOAT32_C( -89.49), SIMDE_FLOAT32_C( 21.92), SIMDE_FLOAT32_C( -88.06), SIMDE_FLOAT32_C( 73.35), SIMDE_FLOAT32_C( 99.36), SIMDE_FLOAT32_C( -20.21), SIMDE_FLOAT32_C( 34.20), SIMDE_FLOAT32_C( -84.24), SIMDE_FLOAT32_C( -65.43) }, { SIMDE_FLOAT32_C( -77.07), SIMDE_FLOAT32_C( 94.43), SIMDE_FLOAT32_C( 55.45), SIMDE_FLOAT32_C( 29.28), SIMDE_FLOAT32_C( -21.57), SIMDE_FLOAT32_C( -64.58), SIMDE_FLOAT32_C( 47.12), SIMDE_FLOAT32_C( -34.64), SIMDE_FLOAT32_C( 1.42), SIMDE_FLOAT32_C( 79.44), SIMDE_FLOAT32_C( 93.38), SIMDE_FLOAT32_C( 71.24), SIMDE_FLOAT32_C( 72.03), SIMDE_FLOAT32_C( 10.12), SIMDE_FLOAT32_C( -74.14), SIMDE_FLOAT32_C( 52.52) }, { SIMDE_FLOAT32_C( -78.71), SIMDE_FLOAT32_C( 7.57), SIMDE_FLOAT32_C( 38.73), SIMDE_FLOAT32_C( 15.08), SIMDE_FLOAT32_C( 28.72), SIMDE_FLOAT32_C( -8.38), SIMDE_FLOAT32_C( 25.59), SIMDE_FLOAT32_C( -49.36), SIMDE_FLOAT32_C( 3.56), SIMDE_FLOAT32_C( -1.06), SIMDE_FLOAT32_C( -50.00), SIMDE_FLOAT32_C( 83.35), SIMDE_FLOAT32_C( -66.86), SIMDE_FLOAT32_C( -34.24), SIMDE_FLOAT32_C( -82.08), SIMDE_FLOAT32_C( -43.92) }, { SIMDE_FLOAT32_C( 6282.07), SIMDE_FLOAT32_C( -1061.41), SIMDE_FLOAT32_C( 976.01), SIMDE_FLOAT32_C( -2539.60), SIMDE_FLOAT32_C( -162.67), SIMDE_FLOAT32_C( -5083.75), SIMDE_FLOAT32_C( -2517.77), SIMDE_FLOAT32_C( -3050.57), SIMDE_FLOAT32_C( -34.69), SIMDE_FLOAT32_C( 6996.55), SIMDE_FLOAT32_C( -6799.42), SIMDE_FLOAT32_C( -7161.76), SIMDE_FLOAT32_C( 1522.59), SIMDE_FLOAT32_C( -311.86), SIMDE_FLOAT32_C( -6163.47), SIMDE_FLOAT32_C( 3480.30) } }, { { SIMDE_FLOAT32_C( -39.81), SIMDE_FLOAT32_C( 73.37), SIMDE_FLOAT32_C( 85.36), SIMDE_FLOAT32_C( 38.62), SIMDE_FLOAT32_C( -91.20), SIMDE_FLOAT32_C( 32.48), SIMDE_FLOAT32_C( -96.03), SIMDE_FLOAT32_C( 10.22), SIMDE_FLOAT32_C( 11.92), SIMDE_FLOAT32_C( 97.35), SIMDE_FLOAT32_C( -18.55), SIMDE_FLOAT32_C( -16.05), SIMDE_FLOAT32_C( 7.48), SIMDE_FLOAT32_C( 7.32), SIMDE_FLOAT32_C( -63.53), SIMDE_FLOAT32_C( 28.76) }, { SIMDE_FLOAT32_C( -85.12), SIMDE_FLOAT32_C( 75.20), SIMDE_FLOAT32_C( -56.16), SIMDE_FLOAT32_C( 43.60), SIMDE_FLOAT32_C( -33.18), SIMDE_FLOAT32_C( 69.43), SIMDE_FLOAT32_C( 94.24), SIMDE_FLOAT32_C( 70.38), SIMDE_FLOAT32_C( -31.63), SIMDE_FLOAT32_C( -55.76), SIMDE_FLOAT32_C( 53.73), SIMDE_FLOAT32_C( 1.51), SIMDE_FLOAT32_C( 10.00), SIMDE_FLOAT32_C( 71.65), SIMDE_FLOAT32_C( 57.59), SIMDE_FLOAT32_C( 70.19) }, { SIMDE_FLOAT32_C( 45.02), SIMDE_FLOAT32_C( 42.95), SIMDE_FLOAT32_C( 8.80), SIMDE_FLOAT32_C( 53.81), SIMDE_FLOAT32_C( -24.57), SIMDE_FLOAT32_C( 12.78), SIMDE_FLOAT32_C( -35.97), SIMDE_FLOAT32_C( 87.34), SIMDE_FLOAT32_C( 10.13), SIMDE_FLOAT32_C( 45.48), SIMDE_FLOAT32_C( -28.70), SIMDE_FLOAT32_C( -82.39), SIMDE_FLOAT32_C( -47.20), SIMDE_FLOAT32_C( 7.76), SIMDE_FLOAT32_C( 46.37), SIMDE_FLOAT32_C( -32.32) }, { SIMDE_FLOAT32_C( -3433.65), SIMDE_FLOAT32_C( -5560.37), SIMDE_FLOAT32_C( 4785.02), SIMDE_FLOAT32_C( -1737.64), SIMDE_FLOAT32_C( -3001.45), SIMDE_FLOAT32_C( -2267.87), SIMDE_FLOAT32_C( 9085.84), SIMDE_FLOAT32_C( -806.62), SIMDE_FLOAT32_C( 366.90), SIMDE_FLOAT32_C( 5382.76), SIMDE_FLOAT32_C( 1025.39), SIMDE_FLOAT32_C( 106.63), SIMDE_FLOAT32_C( -27.60), SIMDE_FLOAT32_C( -532.24), SIMDE_FLOAT32_C( 3612.32), SIMDE_FLOAT32_C( -1986.34) } }, { { SIMDE_FLOAT32_C( -17.04), SIMDE_FLOAT32_C( 90.21), SIMDE_FLOAT32_C( -88.72), SIMDE_FLOAT32_C( 49.79), SIMDE_FLOAT32_C( 59.64), SIMDE_FLOAT32_C( -94.48), SIMDE_FLOAT32_C( 20.17), SIMDE_FLOAT32_C( -71.99), SIMDE_FLOAT32_C( -50.24), SIMDE_FLOAT32_C( -26.10), SIMDE_FLOAT32_C( 29.52), SIMDE_FLOAT32_C( 59.76), SIMDE_FLOAT32_C( -54.46), SIMDE_FLOAT32_C( -12.89), SIMDE_FLOAT32_C( 29.94), SIMDE_FLOAT32_C( 90.56) }, { SIMDE_FLOAT32_C( -69.94), SIMDE_FLOAT32_C( -61.25), SIMDE_FLOAT32_C( 44.38), SIMDE_FLOAT32_C( 5.48), SIMDE_FLOAT32_C( 51.53), SIMDE_FLOAT32_C( -91.59), SIMDE_FLOAT32_C( -7.17), SIMDE_FLOAT32_C( -38.34), SIMDE_FLOAT32_C( 53.89), SIMDE_FLOAT32_C( 64.12), SIMDE_FLOAT32_C( -20.73), SIMDE_FLOAT32_C( -93.31), SIMDE_FLOAT32_C( -28.11), SIMDE_FLOAT32_C( -74.36), SIMDE_FLOAT32_C( -25.63), SIMDE_FLOAT32_C( 54.85) }, { SIMDE_FLOAT32_C( -84.15), SIMDE_FLOAT32_C( -14.35), SIMDE_FLOAT32_C( 4.64), SIMDE_FLOAT32_C( 75.49), SIMDE_FLOAT32_C( -8.83), SIMDE_FLOAT32_C( -75.19), SIMDE_FLOAT32_C( -96.50), SIMDE_FLOAT32_C( 40.92), SIMDE_FLOAT32_C( -1.30), SIMDE_FLOAT32_C( 33.03), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 44.25), SIMDE_FLOAT32_C( -79.86), SIMDE_FLOAT32_C( -69.37), SIMDE_FLOAT32_C( 34.81), SIMDE_FLOAT32_C( -49.80) }, { SIMDE_FLOAT32_C( -1107.63), SIMDE_FLOAT32_C( 5539.71), SIMDE_FLOAT32_C( 3932.75), SIMDE_FLOAT32_C( -348.34), SIMDE_FLOAT32_C( -3064.42), SIMDE_FLOAT32_C( -8578.23), SIMDE_FLOAT32_C( 241.12), SIMDE_FLOAT32_C( -2801.02), SIMDE_FLOAT32_C( 2708.73), SIMDE_FLOAT32_C( 1640.50), SIMDE_FLOAT32_C( 611.27), SIMDE_FLOAT32_C( 5531.96), SIMDE_FLOAT32_C( -1451.01), SIMDE_FLOAT32_C( -889.13), SIMDE_FLOAT32_C( 732.55), SIMDE_FLOAT32_C( -4917.42) } }, { { SIMDE_FLOAT32_C( -30.63), SIMDE_FLOAT32_C( -20.81), SIMDE_FLOAT32_C( 55.68), SIMDE_FLOAT32_C( -79.10), SIMDE_FLOAT32_C( -12.41), SIMDE_FLOAT32_C( -51.49), SIMDE_FLOAT32_C( -17.44), SIMDE_FLOAT32_C( -58.52), SIMDE_FLOAT32_C( -87.36), SIMDE_FLOAT32_C( 61.82), SIMDE_FLOAT32_C( -51.83), SIMDE_FLOAT32_C( -15.47), SIMDE_FLOAT32_C( 87.46), SIMDE_FLOAT32_C( 22.53), SIMDE_FLOAT32_C( -60.62), SIMDE_FLOAT32_C( -96.69) }, { SIMDE_FLOAT32_C( -91.82), SIMDE_FLOAT32_C( 44.02), SIMDE_FLOAT32_C( 78.80), SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( 68.82), SIMDE_FLOAT32_C( 82.31), SIMDE_FLOAT32_C( -59.73), SIMDE_FLOAT32_C( -32.48), SIMDE_FLOAT32_C( 15.33), SIMDE_FLOAT32_C( 40.95), SIMDE_FLOAT32_C( -88.23), SIMDE_FLOAT32_C( 35.47), SIMDE_FLOAT32_C( 71.58), SIMDE_FLOAT32_C( 46.58), SIMDE_FLOAT32_C( 85.67), SIMDE_FLOAT32_C( -59.05) }, { SIMDE_FLOAT32_C( -74.23), SIMDE_FLOAT32_C( 41.35), SIMDE_FLOAT32_C( -38.15), SIMDE_FLOAT32_C( 13.36), SIMDE_FLOAT32_C( 89.87), SIMDE_FLOAT32_C( 44.41), SIMDE_FLOAT32_C( 54.84), SIMDE_FLOAT32_C( -97.49), SIMDE_FLOAT32_C( 6.23), SIMDE_FLOAT32_C( -97.00), SIMDE_FLOAT32_C( -12.97), SIMDE_FLOAT32_C( -6.30), SIMDE_FLOAT32_C( 25.54), SIMDE_FLOAT32_C( 26.41), SIMDE_FLOAT32_C( -2.99), SIMDE_FLOAT32_C( 33.72) }, { SIMDE_FLOAT32_C( -2738.22), SIMDE_FLOAT32_C( 874.71), SIMDE_FLOAT32_C( -4349.43), SIMDE_FLOAT32_C( -64.77), SIMDE_FLOAT32_C( 764.19), SIMDE_FLOAT32_C( 4193.73), SIMDE_FLOAT32_C( -1096.53), SIMDE_FLOAT32_C( -1803.24), SIMDE_FLOAT32_C( 1333.00), SIMDE_FLOAT32_C( -2434.53), SIMDE_FLOAT32_C( -4559.99), SIMDE_FLOAT32_C( 555.02), SIMDE_FLOAT32_C( -6285.93), SIMDE_FLOAT32_C( -1075.86), SIMDE_FLOAT32_C( 5196.31), SIMDE_FLOAT32_C( -5743.26) } }, { { SIMDE_FLOAT32_C( -29.57), SIMDE_FLOAT32_C( -24.19), SIMDE_FLOAT32_C( -66.94), SIMDE_FLOAT32_C( -60.75), SIMDE_FLOAT32_C( -41.88), SIMDE_FLOAT32_C( -26.67), SIMDE_FLOAT32_C( 6.77), SIMDE_FLOAT32_C( 73.45), SIMDE_FLOAT32_C( -85.71), SIMDE_FLOAT32_C( 18.54), SIMDE_FLOAT32_C( 8.92), SIMDE_FLOAT32_C( 85.87), SIMDE_FLOAT32_C( -34.88), SIMDE_FLOAT32_C( -5.41), SIMDE_FLOAT32_C( -73.18), SIMDE_FLOAT32_C( -9.11) }, { SIMDE_FLOAT32_C( -64.05), SIMDE_FLOAT32_C( -11.32), SIMDE_FLOAT32_C( -95.75), SIMDE_FLOAT32_C( -74.18), SIMDE_FLOAT32_C( -66.91), SIMDE_FLOAT32_C( 59.08), SIMDE_FLOAT32_C( -71.68), SIMDE_FLOAT32_C( 39.32), SIMDE_FLOAT32_C( 62.09), SIMDE_FLOAT32_C( 15.35), SIMDE_FLOAT32_C( -66.98), SIMDE_FLOAT32_C( -12.37), SIMDE_FLOAT32_C( -58.24), SIMDE_FLOAT32_C( 30.03), SIMDE_FLOAT32_C( -78.66), SIMDE_FLOAT32_C( 12.19) }, { SIMDE_FLOAT32_C( -94.16), SIMDE_FLOAT32_C( -45.59), SIMDE_FLOAT32_C( 51.44), SIMDE_FLOAT32_C( -36.04), SIMDE_FLOAT32_C( 27.74), SIMDE_FLOAT32_C( -41.79), SIMDE_FLOAT32_C( -62.59), SIMDE_FLOAT32_C( 42.03), SIMDE_FLOAT32_C( 76.75), SIMDE_FLOAT32_C( 46.33), SIMDE_FLOAT32_C( 27.90), SIMDE_FLOAT32_C( -58.12), SIMDE_FLOAT32_C( -59.08), SIMDE_FLOAT32_C( 54.72), SIMDE_FLOAT32_C( 32.77), SIMDE_FLOAT32_C( -23.13) }, { SIMDE_FLOAT32_C( -1799.80), SIMDE_FLOAT32_C( -228.24), SIMDE_FLOAT32_C( -6460.95), SIMDE_FLOAT32_C( -4470.40), SIMDE_FLOAT32_C( -2829.93), SIMDE_FLOAT32_C( 1617.45), SIMDE_FLOAT32_C( 547.86), SIMDE_FLOAT32_C( -2930.08), SIMDE_FLOAT32_C( 5244.98), SIMDE_FLOAT32_C( -330.92), SIMDE_FLOAT32_C( 569.56), SIMDE_FLOAT32_C( 1120.33), SIMDE_FLOAT32_C( -1972.33), SIMDE_FLOAT32_C( 107.74), SIMDE_FLOAT32_C( -5789.11), SIMDE_FLOAT32_C( 134.18) } }, { { SIMDE_FLOAT32_C( -56.60), SIMDE_FLOAT32_C( 37.01), SIMDE_FLOAT32_C( 2.69), SIMDE_FLOAT32_C( -23.52), SIMDE_FLOAT32_C( -3.90), SIMDE_FLOAT32_C( 31.01), SIMDE_FLOAT32_C( -84.20), SIMDE_FLOAT32_C( -41.81), SIMDE_FLOAT32_C( -53.64), SIMDE_FLOAT32_C( -51.18), SIMDE_FLOAT32_C( 45.81), SIMDE_FLOAT32_C( -11.87), SIMDE_FLOAT32_C( 78.85), SIMDE_FLOAT32_C( 67.16), SIMDE_FLOAT32_C( -99.68), SIMDE_FLOAT32_C( 84.69) }, { SIMDE_FLOAT32_C( -78.44), SIMDE_FLOAT32_C( 51.75), SIMDE_FLOAT32_C( -51.36), SIMDE_FLOAT32_C( 49.31), SIMDE_FLOAT32_C( -90.04), SIMDE_FLOAT32_C( -13.95), SIMDE_FLOAT32_C( -8.66), SIMDE_FLOAT32_C( 86.71), SIMDE_FLOAT32_C( -67.62), SIMDE_FLOAT32_C( -80.76), SIMDE_FLOAT32_C( -71.41), SIMDE_FLOAT32_C( -26.70), SIMDE_FLOAT32_C( 73.96), SIMDE_FLOAT32_C( 61.35), SIMDE_FLOAT32_C( 50.17), SIMDE_FLOAT32_C( -82.65) }, { SIMDE_FLOAT32_C( -1.64), SIMDE_FLOAT32_C( -47.14), SIMDE_FLOAT32_C( -6.17), SIMDE_FLOAT32_C( 94.46), SIMDE_FLOAT32_C( 83.87), SIMDE_FLOAT32_C( 9.63), SIMDE_FLOAT32_C( -47.35), SIMDE_FLOAT32_C( -69.77), SIMDE_FLOAT32_C( 58.45), SIMDE_FLOAT32_C( 98.46), SIMDE_FLOAT32_C( 18.36), SIMDE_FLOAT32_C( 37.30), SIMDE_FLOAT32_C( 65.61), SIMDE_FLOAT32_C( 18.67), SIMDE_FLOAT32_C( 21.98), SIMDE_FLOAT32_C( 87.18) }, { SIMDE_FLOAT32_C( -4438.06), SIMDE_FLOAT32_C( -1868.13), SIMDE_FLOAT32_C( 144.33), SIMDE_FLOAT32_C( 1065.31), SIMDE_FLOAT32_C( -435.03), SIMDE_FLOAT32_C( 422.96), SIMDE_FLOAT32_C( -681.82), SIMDE_FLOAT32_C( 3695.12), SIMDE_FLOAT32_C( -3685.59), SIMDE_FLOAT32_C( -4231.76), SIMDE_FLOAT32_C( 3252.93), SIMDE_FLOAT32_C( -354.23), SIMDE_FLOAT32_C( -5897.36), SIMDE_FLOAT32_C( -4138.94), SIMDE_FLOAT32_C( 4978.97), SIMDE_FLOAT32_C( 6912.45) } }, { { SIMDE_FLOAT32_C( -29.57), SIMDE_FLOAT32_C( 70.63), SIMDE_FLOAT32_C( 36.48), SIMDE_FLOAT32_C( -19.61), SIMDE_FLOAT32_C( -43.32), SIMDE_FLOAT32_C( -72.18), SIMDE_FLOAT32_C( -32.90), SIMDE_FLOAT32_C( -10.94), SIMDE_FLOAT32_C( -52.94), SIMDE_FLOAT32_C( -4.31), SIMDE_FLOAT32_C( 62.36), SIMDE_FLOAT32_C( -78.99), SIMDE_FLOAT32_C( -42.96), SIMDE_FLOAT32_C( 12.54), SIMDE_FLOAT32_C( -61.64), SIMDE_FLOAT32_C( 55.40) }, { SIMDE_FLOAT32_C( 65.40), SIMDE_FLOAT32_C( 32.20), SIMDE_FLOAT32_C( 49.86), SIMDE_FLOAT32_C( 49.27), SIMDE_FLOAT32_C( -58.17), SIMDE_FLOAT32_C( -97.49), SIMDE_FLOAT32_C( 79.50), SIMDE_FLOAT32_C( -99.71), SIMDE_FLOAT32_C( -99.03), SIMDE_FLOAT32_C( -2.14), SIMDE_FLOAT32_C( 37.59), SIMDE_FLOAT32_C( 66.58), SIMDE_FLOAT32_C( -83.46), SIMDE_FLOAT32_C( -40.43), SIMDE_FLOAT32_C( 53.75), SIMDE_FLOAT32_C( -13.04) }, { SIMDE_FLOAT32_C( -69.80), SIMDE_FLOAT32_C( -9.76), SIMDE_FLOAT32_C( 67.35), SIMDE_FLOAT32_C( -13.12), SIMDE_FLOAT32_C( 18.06), SIMDE_FLOAT32_C( -65.56), SIMDE_FLOAT32_C( 75.94), SIMDE_FLOAT32_C( 65.11), SIMDE_FLOAT32_C( 30.13), SIMDE_FLOAT32_C( 38.30), SIMDE_FLOAT32_C( 86.12), SIMDE_FLOAT32_C( 87.17), SIMDE_FLOAT32_C( -49.16), SIMDE_FLOAT32_C( -75.51), SIMDE_FLOAT32_C( 42.57), SIMDE_FLOAT32_C( -83.76) }, { SIMDE_FLOAT32_C( 2003.68), SIMDE_FLOAT32_C( -2264.53), SIMDE_FLOAT32_C( -1886.24), SIMDE_FLOAT32_C( 979.30), SIMDE_FLOAT32_C( -2537.98), SIMDE_FLOAT32_C( -6971.27), SIMDE_FLOAT32_C( 2539.61), SIMDE_FLOAT32_C( -1155.94), SIMDE_FLOAT32_C( -5272.78), SIMDE_FLOAT32_C( -47.52), SIMDE_FLOAT32_C( -2430.23), SIMDE_FLOAT32_C( 5171.98), SIMDE_FLOAT32_C( -3536.28), SIMDE_FLOAT32_C( 582.50), SIMDE_FLOAT32_C( 3270.58), SIMDE_FLOAT32_C( 806.18) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 c = simde_mm512_loadu_ps(test_vec[i].c); simde__m512 r = simde_mm512_fnmsub_ps(a, b, c); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_fnmsub_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 b[8]; const simde_float64 c[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 65.73), SIMDE_FLOAT64_C( 16.84), SIMDE_FLOAT64_C( -62.22), SIMDE_FLOAT64_C( 41.28), SIMDE_FLOAT64_C( 41.92), SIMDE_FLOAT64_C( 45.60), SIMDE_FLOAT64_C( 20.93), SIMDE_FLOAT64_C( 8.26) }, { SIMDE_FLOAT64_C( 7.90), SIMDE_FLOAT64_C( 31.90), SIMDE_FLOAT64_C( -14.89), SIMDE_FLOAT64_C( -98.38), SIMDE_FLOAT64_C( -98.27), SIMDE_FLOAT64_C( 60.31), SIMDE_FLOAT64_C( -39.17), SIMDE_FLOAT64_C( 82.51) }, { SIMDE_FLOAT64_C( 35.14), SIMDE_FLOAT64_C( -85.00), SIMDE_FLOAT64_C( -10.86), SIMDE_FLOAT64_C( 33.90), SIMDE_FLOAT64_C( -68.39), SIMDE_FLOAT64_C( -87.95), SIMDE_FLOAT64_C( -87.51), SIMDE_FLOAT64_C( -78.91) }, { SIMDE_FLOAT64_C( -554.41), SIMDE_FLOAT64_C( -452.20), SIMDE_FLOAT64_C( -915.60), SIMDE_FLOAT64_C( 4027.23), SIMDE_FLOAT64_C( 4187.87), SIMDE_FLOAT64_C( -2662.19), SIMDE_FLOAT64_C( 907.34), SIMDE_FLOAT64_C( -602.62) } }, { { SIMDE_FLOAT64_C( -48.41), SIMDE_FLOAT64_C( 52.12), SIMDE_FLOAT64_C( -76.82), SIMDE_FLOAT64_C( -20.81), SIMDE_FLOAT64_C( 40.27), SIMDE_FLOAT64_C( -85.67), SIMDE_FLOAT64_C( -20.01), SIMDE_FLOAT64_C( 6.00) }, { SIMDE_FLOAT64_C( 31.17), SIMDE_FLOAT64_C( 17.77), SIMDE_FLOAT64_C( -52.72), SIMDE_FLOAT64_C( -26.91), SIMDE_FLOAT64_C( -36.63), SIMDE_FLOAT64_C( 68.22), SIMDE_FLOAT64_C( 81.34), SIMDE_FLOAT64_C( 71.27) }, { SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( -33.55), SIMDE_FLOAT64_C( 72.89), SIMDE_FLOAT64_C( 1.84), SIMDE_FLOAT64_C( -73.23), SIMDE_FLOAT64_C( -66.27), SIMDE_FLOAT64_C( -15.65), SIMDE_FLOAT64_C( 61.91) }, { SIMDE_FLOAT64_C( 1508.83), SIMDE_FLOAT64_C( -892.62), SIMDE_FLOAT64_C( -4122.84), SIMDE_FLOAT64_C( -561.84), SIMDE_FLOAT64_C( 1548.32), SIMDE_FLOAT64_C( 5910.68), SIMDE_FLOAT64_C( 1643.26), SIMDE_FLOAT64_C( -489.53) } }, { { SIMDE_FLOAT64_C( -51.27), SIMDE_FLOAT64_C( 73.48), SIMDE_FLOAT64_C( -4.19), SIMDE_FLOAT64_C( -19.66), SIMDE_FLOAT64_C( 85.53), SIMDE_FLOAT64_C( 8.30), SIMDE_FLOAT64_C( 1.43), SIMDE_FLOAT64_C( -62.88) }, { SIMDE_FLOAT64_C( -39.58), SIMDE_FLOAT64_C( 24.60), SIMDE_FLOAT64_C( 16.31), SIMDE_FLOAT64_C( -99.31), SIMDE_FLOAT64_C( 38.94), SIMDE_FLOAT64_C( 96.30), SIMDE_FLOAT64_C( 6.69), SIMDE_FLOAT64_C( -29.89) }, { SIMDE_FLOAT64_C( 14.08), SIMDE_FLOAT64_C( 53.98), SIMDE_FLOAT64_C( 43.19), SIMDE_FLOAT64_C( 77.45), SIMDE_FLOAT64_C( 22.19), SIMDE_FLOAT64_C( 24.54), SIMDE_FLOAT64_C( 48.72), SIMDE_FLOAT64_C( -77.69) }, { SIMDE_FLOAT64_C( -2043.35), SIMDE_FLOAT64_C( -1861.59), SIMDE_FLOAT64_C( 25.15), SIMDE_FLOAT64_C( -2029.88), SIMDE_FLOAT64_C( -3352.73), SIMDE_FLOAT64_C( -823.83), SIMDE_FLOAT64_C( -58.29), SIMDE_FLOAT64_C( -1801.79) } }, { { SIMDE_FLOAT64_C( 90.99), SIMDE_FLOAT64_C( 21.61), SIMDE_FLOAT64_C( 24.15), SIMDE_FLOAT64_C( -82.24), SIMDE_FLOAT64_C( 55.34), SIMDE_FLOAT64_C( -91.51), SIMDE_FLOAT64_C( 79.67), SIMDE_FLOAT64_C( -95.94) }, { SIMDE_FLOAT64_C( 81.97), SIMDE_FLOAT64_C( -24.52), SIMDE_FLOAT64_C( -15.60), SIMDE_FLOAT64_C( 67.51), SIMDE_FLOAT64_C( 83.77), SIMDE_FLOAT64_C( 85.83), SIMDE_FLOAT64_C( -95.37), SIMDE_FLOAT64_C( -55.81) }, { SIMDE_FLOAT64_C( 10.43), SIMDE_FLOAT64_C( 20.94), SIMDE_FLOAT64_C( -55.12), SIMDE_FLOAT64_C( -50.63), SIMDE_FLOAT64_C( 17.24), SIMDE_FLOAT64_C( 51.57), SIMDE_FLOAT64_C( 19.47), SIMDE_FLOAT64_C( -68.68) }, { SIMDE_FLOAT64_C( -7468.88), SIMDE_FLOAT64_C( 508.94), SIMDE_FLOAT64_C( 431.86), SIMDE_FLOAT64_C( 5602.65), SIMDE_FLOAT64_C( -4653.07), SIMDE_FLOAT64_C( 7802.73), SIMDE_FLOAT64_C( 7578.66), SIMDE_FLOAT64_C( -5285.73) } }, { { SIMDE_FLOAT64_C( 5.55), SIMDE_FLOAT64_C( -37.33), SIMDE_FLOAT64_C( -91.23), SIMDE_FLOAT64_C( -72.26), SIMDE_FLOAT64_C( 87.21), SIMDE_FLOAT64_C( 57.48), SIMDE_FLOAT64_C( -49.95), SIMDE_FLOAT64_C( 78.20) }, { SIMDE_FLOAT64_C( -20.90), SIMDE_FLOAT64_C( 74.20), SIMDE_FLOAT64_C( 95.96), SIMDE_FLOAT64_C( -65.57), SIMDE_FLOAT64_C( 82.69), SIMDE_FLOAT64_C( 75.63), SIMDE_FLOAT64_C( -61.50), SIMDE_FLOAT64_C( 64.66) }, { SIMDE_FLOAT64_C( -48.89), SIMDE_FLOAT64_C( 22.90), SIMDE_FLOAT64_C( 32.17), SIMDE_FLOAT64_C( -65.12), SIMDE_FLOAT64_C( 8.73), SIMDE_FLOAT64_C( 36.80), SIMDE_FLOAT64_C( -20.93), SIMDE_FLOAT64_C( -80.84) }, { SIMDE_FLOAT64_C( 164.88), SIMDE_FLOAT64_C( 2746.99), SIMDE_FLOAT64_C( 8722.26), SIMDE_FLOAT64_C( -4672.97), SIMDE_FLOAT64_C( -7220.12), SIMDE_FLOAT64_C( -4384.01), SIMDE_FLOAT64_C( -3051.00), SIMDE_FLOAT64_C( -4975.57) } }, { { SIMDE_FLOAT64_C( -42.25), SIMDE_FLOAT64_C( 23.95), SIMDE_FLOAT64_C( -31.47), SIMDE_FLOAT64_C( 74.99), SIMDE_FLOAT64_C( -24.48), SIMDE_FLOAT64_C( 88.00), SIMDE_FLOAT64_C( -93.69), SIMDE_FLOAT64_C( 81.07) }, { SIMDE_FLOAT64_C( -49.33), SIMDE_FLOAT64_C( -84.92), SIMDE_FLOAT64_C( -91.19), SIMDE_FLOAT64_C( -62.12), SIMDE_FLOAT64_C( 72.56), SIMDE_FLOAT64_C( -41.14), SIMDE_FLOAT64_C( -83.92), SIMDE_FLOAT64_C( -48.34) }, { SIMDE_FLOAT64_C( -66.94), SIMDE_FLOAT64_C( -87.96), SIMDE_FLOAT64_C( -13.91), SIMDE_FLOAT64_C( -84.25), SIMDE_FLOAT64_C( 87.67), SIMDE_FLOAT64_C( 24.58), SIMDE_FLOAT64_C( 80.41), SIMDE_FLOAT64_C( -61.22) }, { SIMDE_FLOAT64_C( -2017.25), SIMDE_FLOAT64_C( 2121.79), SIMDE_FLOAT64_C( -2855.84), SIMDE_FLOAT64_C( 4742.63), SIMDE_FLOAT64_C( 1688.60), SIMDE_FLOAT64_C( 3595.74), SIMDE_FLOAT64_C( -7942.87), SIMDE_FLOAT64_C( 3980.14) } }, { { SIMDE_FLOAT64_C( -52.52), SIMDE_FLOAT64_C( 12.58), SIMDE_FLOAT64_C( -26.34), SIMDE_FLOAT64_C( 56.21), SIMDE_FLOAT64_C( -50.62), SIMDE_FLOAT64_C( 52.73), SIMDE_FLOAT64_C( 75.37), SIMDE_FLOAT64_C( 7.13) }, { SIMDE_FLOAT64_C( -23.32), SIMDE_FLOAT64_C( -56.10), SIMDE_FLOAT64_C( -17.88), SIMDE_FLOAT64_C( 52.20), SIMDE_FLOAT64_C( -68.10), SIMDE_FLOAT64_C( -11.57), SIMDE_FLOAT64_C( 33.27), SIMDE_FLOAT64_C( -17.43) }, { SIMDE_FLOAT64_C( 3.50), SIMDE_FLOAT64_C( 42.08), SIMDE_FLOAT64_C( 20.45), SIMDE_FLOAT64_C( -23.94), SIMDE_FLOAT64_C( -99.05), SIMDE_FLOAT64_C( 36.53), SIMDE_FLOAT64_C( 27.72), SIMDE_FLOAT64_C( -66.00) }, { SIMDE_FLOAT64_C( -1228.27), SIMDE_FLOAT64_C( 663.66), SIMDE_FLOAT64_C( -491.41), SIMDE_FLOAT64_C( -2910.22), SIMDE_FLOAT64_C( -3348.17), SIMDE_FLOAT64_C( 573.56), SIMDE_FLOAT64_C( -2535.28), SIMDE_FLOAT64_C( 190.28) } }, { { SIMDE_FLOAT64_C( 48.57), SIMDE_FLOAT64_C( -86.20), SIMDE_FLOAT64_C( -50.25), SIMDE_FLOAT64_C( 36.24), SIMDE_FLOAT64_C( 38.39), SIMDE_FLOAT64_C( -69.84), SIMDE_FLOAT64_C( 75.01), SIMDE_FLOAT64_C( 85.87) }, { SIMDE_FLOAT64_C( 42.73), SIMDE_FLOAT64_C( -51.33), SIMDE_FLOAT64_C( 42.08), SIMDE_FLOAT64_C( 92.12), SIMDE_FLOAT64_C( -98.60), SIMDE_FLOAT64_C( 17.44), SIMDE_FLOAT64_C( -0.76), SIMDE_FLOAT64_C( -21.92) }, { SIMDE_FLOAT64_C( 61.34), SIMDE_FLOAT64_C( 81.36), SIMDE_FLOAT64_C( -69.73), SIMDE_FLOAT64_C( 93.24), SIMDE_FLOAT64_C( -30.21), SIMDE_FLOAT64_C( 63.55), SIMDE_FLOAT64_C( -24.19), SIMDE_FLOAT64_C( 73.29) }, { SIMDE_FLOAT64_C( -2136.74), SIMDE_FLOAT64_C( -4506.01), SIMDE_FLOAT64_C( 2184.25), SIMDE_FLOAT64_C( -3431.67), SIMDE_FLOAT64_C( 3815.46), SIMDE_FLOAT64_C( 1154.46), SIMDE_FLOAT64_C( 81.20), SIMDE_FLOAT64_C( 1808.98) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__m512d c = simde_mm512_loadu_pd(test_vec[i].c); simde__m512d r = simde_mm512_fnmsub_pd(a, b, c); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_fnmsub_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_fnmsub_pd) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/insert.c000066400000000000000000006340721400333146700167400ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #define SIMDE_TEST_X86_AVX512_INSN insert #include #include #include static int test_simde_mm512_insertf32x4(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__m128 b; simde__m512 r0; simde__m512 r1; simde__m512 r2; simde__m512 r3; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -563.83), SIMDE_FLOAT32_C( 799.30), SIMDE_FLOAT32_C( 938.85), SIMDE_FLOAT32_C( -576.01), SIMDE_FLOAT32_C( -465.05), SIMDE_FLOAT32_C( 439.15), SIMDE_FLOAT32_C( -104.57), SIMDE_FLOAT32_C( -28.15), SIMDE_FLOAT32_C( -431.26), SIMDE_FLOAT32_C( 481.25), SIMDE_FLOAT32_C( -57.75), SIMDE_FLOAT32_C( -784.26), SIMDE_FLOAT32_C( 438.04), SIMDE_FLOAT32_C( 549.03), SIMDE_FLOAT32_C( 729.46), SIMDE_FLOAT32_C( 582.53)), simde_mm_set_ps(SIMDE_FLOAT32_C( -202.49), SIMDE_FLOAT32_C( -470.36), SIMDE_FLOAT32_C( 966.37), SIMDE_FLOAT32_C( 135.20)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -563.83), SIMDE_FLOAT32_C( 799.30), SIMDE_FLOAT32_C( 938.85), SIMDE_FLOAT32_C( -576.01), SIMDE_FLOAT32_C( -465.05), SIMDE_FLOAT32_C( 439.15), SIMDE_FLOAT32_C( -104.57), SIMDE_FLOAT32_C( -28.15), SIMDE_FLOAT32_C( -431.26), SIMDE_FLOAT32_C( 481.25), SIMDE_FLOAT32_C( -57.75), SIMDE_FLOAT32_C( -784.26), SIMDE_FLOAT32_C( -202.49), SIMDE_FLOAT32_C( -470.36), SIMDE_FLOAT32_C( 966.37), SIMDE_FLOAT32_C( 135.20)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -563.83), SIMDE_FLOAT32_C( 799.30), SIMDE_FLOAT32_C( 938.85), SIMDE_FLOAT32_C( -576.01), SIMDE_FLOAT32_C( -465.05), SIMDE_FLOAT32_C( 439.15), SIMDE_FLOAT32_C( -104.57), SIMDE_FLOAT32_C( -28.15), SIMDE_FLOAT32_C( -202.49), SIMDE_FLOAT32_C( -470.36), SIMDE_FLOAT32_C( 966.37), SIMDE_FLOAT32_C( 135.20), SIMDE_FLOAT32_C( 438.04), SIMDE_FLOAT32_C( 549.03), SIMDE_FLOAT32_C( 729.46), SIMDE_FLOAT32_C( 582.53)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -563.83), SIMDE_FLOAT32_C( 799.30), SIMDE_FLOAT32_C( 938.85), SIMDE_FLOAT32_C( -576.01), SIMDE_FLOAT32_C( -202.49), SIMDE_FLOAT32_C( -470.36), SIMDE_FLOAT32_C( 966.37), SIMDE_FLOAT32_C( 135.20), SIMDE_FLOAT32_C( -431.26), SIMDE_FLOAT32_C( 481.25), SIMDE_FLOAT32_C( -57.75), SIMDE_FLOAT32_C( -784.26), SIMDE_FLOAT32_C( 438.04), SIMDE_FLOAT32_C( 549.03), SIMDE_FLOAT32_C( 729.46), SIMDE_FLOAT32_C( 582.53)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -202.49), SIMDE_FLOAT32_C( -470.36), SIMDE_FLOAT32_C( 966.37), SIMDE_FLOAT32_C( 135.20), SIMDE_FLOAT32_C( -465.05), SIMDE_FLOAT32_C( 439.15), SIMDE_FLOAT32_C( -104.57), SIMDE_FLOAT32_C( -28.15), SIMDE_FLOAT32_C( -431.26), SIMDE_FLOAT32_C( 481.25), SIMDE_FLOAT32_C( -57.75), SIMDE_FLOAT32_C( -784.26), SIMDE_FLOAT32_C( 438.04), SIMDE_FLOAT32_C( 549.03), SIMDE_FLOAT32_C( 729.46), SIMDE_FLOAT32_C( 582.53)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -743.10), SIMDE_FLOAT32_C( -688.01), SIMDE_FLOAT32_C( 442.76), SIMDE_FLOAT32_C( 931.17), SIMDE_FLOAT32_C( 120.10), SIMDE_FLOAT32_C( -64.06), SIMDE_FLOAT32_C( -620.03), SIMDE_FLOAT32_C( 559.81), SIMDE_FLOAT32_C( 185.23), SIMDE_FLOAT32_C( -423.61), SIMDE_FLOAT32_C( -11.91), SIMDE_FLOAT32_C( 407.56), SIMDE_FLOAT32_C( 355.11), SIMDE_FLOAT32_C( -787.72), SIMDE_FLOAT32_C( 472.82), SIMDE_FLOAT32_C( -703.51)), simde_mm_set_ps(SIMDE_FLOAT32_C( -859.51), SIMDE_FLOAT32_C( -69.45), SIMDE_FLOAT32_C( 40.36), SIMDE_FLOAT32_C( 95.61)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -743.10), SIMDE_FLOAT32_C( -688.01), SIMDE_FLOAT32_C( 442.76), SIMDE_FLOAT32_C( 931.17), SIMDE_FLOAT32_C( 120.10), SIMDE_FLOAT32_C( -64.06), SIMDE_FLOAT32_C( -620.03), SIMDE_FLOAT32_C( 559.81), SIMDE_FLOAT32_C( 185.23), SIMDE_FLOAT32_C( -423.61), SIMDE_FLOAT32_C( -11.91), SIMDE_FLOAT32_C( 407.56), SIMDE_FLOAT32_C( -859.51), SIMDE_FLOAT32_C( -69.45), SIMDE_FLOAT32_C( 40.36), SIMDE_FLOAT32_C( 95.61)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -743.10), SIMDE_FLOAT32_C( -688.01), SIMDE_FLOAT32_C( 442.76), SIMDE_FLOAT32_C( 931.17), SIMDE_FLOAT32_C( 120.10), SIMDE_FLOAT32_C( -64.06), SIMDE_FLOAT32_C( -620.03), SIMDE_FLOAT32_C( 559.81), SIMDE_FLOAT32_C( -859.51), SIMDE_FLOAT32_C( -69.45), SIMDE_FLOAT32_C( 40.36), SIMDE_FLOAT32_C( 95.61), SIMDE_FLOAT32_C( 355.11), SIMDE_FLOAT32_C( -787.72), SIMDE_FLOAT32_C( 472.82), SIMDE_FLOAT32_C( -703.51)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -743.10), SIMDE_FLOAT32_C( -688.01), SIMDE_FLOAT32_C( 442.76), SIMDE_FLOAT32_C( 931.17), SIMDE_FLOAT32_C( -859.51), SIMDE_FLOAT32_C( -69.45), SIMDE_FLOAT32_C( 40.36), SIMDE_FLOAT32_C( 95.61), SIMDE_FLOAT32_C( 185.23), SIMDE_FLOAT32_C( -423.61), SIMDE_FLOAT32_C( -11.91), SIMDE_FLOAT32_C( 407.56), SIMDE_FLOAT32_C( 355.11), SIMDE_FLOAT32_C( -787.72), SIMDE_FLOAT32_C( 472.82), SIMDE_FLOAT32_C( -703.51)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -859.51), SIMDE_FLOAT32_C( -69.45), SIMDE_FLOAT32_C( 40.36), SIMDE_FLOAT32_C( 95.61), SIMDE_FLOAT32_C( 120.10), SIMDE_FLOAT32_C( -64.06), SIMDE_FLOAT32_C( -620.03), SIMDE_FLOAT32_C( 559.81), SIMDE_FLOAT32_C( 185.23), SIMDE_FLOAT32_C( -423.61), SIMDE_FLOAT32_C( -11.91), SIMDE_FLOAT32_C( 407.56), SIMDE_FLOAT32_C( 355.11), SIMDE_FLOAT32_C( -787.72), SIMDE_FLOAT32_C( 472.82), SIMDE_FLOAT32_C( -703.51)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -98.08), SIMDE_FLOAT32_C( 464.19), SIMDE_FLOAT32_C( 711.12), SIMDE_FLOAT32_C( 282.83), SIMDE_FLOAT32_C( -774.08), SIMDE_FLOAT32_C( 841.24), SIMDE_FLOAT32_C( -414.07), SIMDE_FLOAT32_C( 79.76), SIMDE_FLOAT32_C( 193.01), SIMDE_FLOAT32_C( -435.27), SIMDE_FLOAT32_C( -84.06), SIMDE_FLOAT32_C( 298.40), SIMDE_FLOAT32_C( 208.07), SIMDE_FLOAT32_C( -94.60), SIMDE_FLOAT32_C( 834.28), SIMDE_FLOAT32_C( 260.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( -157.33), SIMDE_FLOAT32_C( -819.46), SIMDE_FLOAT32_C( 541.44), SIMDE_FLOAT32_C( 112.81)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -98.08), SIMDE_FLOAT32_C( 464.19), SIMDE_FLOAT32_C( 711.12), SIMDE_FLOAT32_C( 282.83), SIMDE_FLOAT32_C( -774.08), SIMDE_FLOAT32_C( 841.24), SIMDE_FLOAT32_C( -414.07), SIMDE_FLOAT32_C( 79.76), SIMDE_FLOAT32_C( 193.01), SIMDE_FLOAT32_C( -435.27), SIMDE_FLOAT32_C( -84.06), SIMDE_FLOAT32_C( 298.40), SIMDE_FLOAT32_C( -157.33), SIMDE_FLOAT32_C( -819.46), SIMDE_FLOAT32_C( 541.44), SIMDE_FLOAT32_C( 112.81)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -98.08), SIMDE_FLOAT32_C( 464.19), SIMDE_FLOAT32_C( 711.12), SIMDE_FLOAT32_C( 282.83), SIMDE_FLOAT32_C( -774.08), SIMDE_FLOAT32_C( 841.24), SIMDE_FLOAT32_C( -414.07), SIMDE_FLOAT32_C( 79.76), SIMDE_FLOAT32_C( -157.33), SIMDE_FLOAT32_C( -819.46), SIMDE_FLOAT32_C( 541.44), SIMDE_FLOAT32_C( 112.81), SIMDE_FLOAT32_C( 208.07), SIMDE_FLOAT32_C( -94.60), SIMDE_FLOAT32_C( 834.28), SIMDE_FLOAT32_C( 260.50)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -98.08), SIMDE_FLOAT32_C( 464.19), SIMDE_FLOAT32_C( 711.12), SIMDE_FLOAT32_C( 282.83), SIMDE_FLOAT32_C( -157.33), SIMDE_FLOAT32_C( -819.46), SIMDE_FLOAT32_C( 541.44), SIMDE_FLOAT32_C( 112.81), SIMDE_FLOAT32_C( 193.01), SIMDE_FLOAT32_C( -435.27), SIMDE_FLOAT32_C( -84.06), SIMDE_FLOAT32_C( 298.40), SIMDE_FLOAT32_C( 208.07), SIMDE_FLOAT32_C( -94.60), SIMDE_FLOAT32_C( 834.28), SIMDE_FLOAT32_C( 260.50)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -157.33), SIMDE_FLOAT32_C( -819.46), SIMDE_FLOAT32_C( 541.44), SIMDE_FLOAT32_C( 112.81), SIMDE_FLOAT32_C( -774.08), SIMDE_FLOAT32_C( 841.24), SIMDE_FLOAT32_C( -414.07), SIMDE_FLOAT32_C( 79.76), SIMDE_FLOAT32_C( 193.01), SIMDE_FLOAT32_C( -435.27), SIMDE_FLOAT32_C( -84.06), SIMDE_FLOAT32_C( 298.40), SIMDE_FLOAT32_C( 208.07), SIMDE_FLOAT32_C( -94.60), SIMDE_FLOAT32_C( 834.28), SIMDE_FLOAT32_C( 260.50)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -887.47), SIMDE_FLOAT32_C( 336.35), SIMDE_FLOAT32_C( -396.24), SIMDE_FLOAT32_C( 99.21), SIMDE_FLOAT32_C( -2.60), SIMDE_FLOAT32_C( -38.88), SIMDE_FLOAT32_C( 165.88), SIMDE_FLOAT32_C( 218.73), SIMDE_FLOAT32_C( 375.27), SIMDE_FLOAT32_C( -966.90), SIMDE_FLOAT32_C( -512.98), SIMDE_FLOAT32_C( -737.78), SIMDE_FLOAT32_C( 664.52), SIMDE_FLOAT32_C( -224.13), SIMDE_FLOAT32_C( 633.65), SIMDE_FLOAT32_C( -834.15)), simde_mm_set_ps(SIMDE_FLOAT32_C( 141.08), SIMDE_FLOAT32_C( -832.50), SIMDE_FLOAT32_C( -990.15), SIMDE_FLOAT32_C( 438.46)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -887.47), SIMDE_FLOAT32_C( 336.35), SIMDE_FLOAT32_C( -396.24), SIMDE_FLOAT32_C( 99.21), SIMDE_FLOAT32_C( -2.60), SIMDE_FLOAT32_C( -38.88), SIMDE_FLOAT32_C( 165.88), SIMDE_FLOAT32_C( 218.73), SIMDE_FLOAT32_C( 375.27), SIMDE_FLOAT32_C( -966.90), SIMDE_FLOAT32_C( -512.98), SIMDE_FLOAT32_C( -737.78), SIMDE_FLOAT32_C( 141.08), SIMDE_FLOAT32_C( -832.50), SIMDE_FLOAT32_C( -990.15), SIMDE_FLOAT32_C( 438.46)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -887.47), SIMDE_FLOAT32_C( 336.35), SIMDE_FLOAT32_C( -396.24), SIMDE_FLOAT32_C( 99.21), SIMDE_FLOAT32_C( -2.60), SIMDE_FLOAT32_C( -38.88), SIMDE_FLOAT32_C( 165.88), SIMDE_FLOAT32_C( 218.73), SIMDE_FLOAT32_C( 141.08), SIMDE_FLOAT32_C( -832.50), SIMDE_FLOAT32_C( -990.15), SIMDE_FLOAT32_C( 438.46), SIMDE_FLOAT32_C( 664.52), SIMDE_FLOAT32_C( -224.13), SIMDE_FLOAT32_C( 633.65), SIMDE_FLOAT32_C( -834.15)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -887.47), SIMDE_FLOAT32_C( 336.35), SIMDE_FLOAT32_C( -396.24), SIMDE_FLOAT32_C( 99.21), SIMDE_FLOAT32_C( 141.08), SIMDE_FLOAT32_C( -832.50), SIMDE_FLOAT32_C( -990.15), SIMDE_FLOAT32_C( 438.46), SIMDE_FLOAT32_C( 375.27), SIMDE_FLOAT32_C( -966.90), SIMDE_FLOAT32_C( -512.98), SIMDE_FLOAT32_C( -737.78), SIMDE_FLOAT32_C( 664.52), SIMDE_FLOAT32_C( -224.13), SIMDE_FLOAT32_C( 633.65), SIMDE_FLOAT32_C( -834.15)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 141.08), SIMDE_FLOAT32_C( -832.50), SIMDE_FLOAT32_C( -990.15), SIMDE_FLOAT32_C( 438.46), SIMDE_FLOAT32_C( -2.60), SIMDE_FLOAT32_C( -38.88), SIMDE_FLOAT32_C( 165.88), SIMDE_FLOAT32_C( 218.73), SIMDE_FLOAT32_C( 375.27), SIMDE_FLOAT32_C( -966.90), SIMDE_FLOAT32_C( -512.98), SIMDE_FLOAT32_C( -737.78), SIMDE_FLOAT32_C( 664.52), SIMDE_FLOAT32_C( -224.13), SIMDE_FLOAT32_C( 633.65), SIMDE_FLOAT32_C( -834.15)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -179.98), SIMDE_FLOAT32_C( 258.23), SIMDE_FLOAT32_C( 246.22), SIMDE_FLOAT32_C( 97.85), SIMDE_FLOAT32_C( 666.32), SIMDE_FLOAT32_C( 364.80), SIMDE_FLOAT32_C( 759.27), SIMDE_FLOAT32_C( -524.19), SIMDE_FLOAT32_C( -726.51), SIMDE_FLOAT32_C( 381.71), SIMDE_FLOAT32_C( 819.12), SIMDE_FLOAT32_C( 145.28), SIMDE_FLOAT32_C( -99.37), SIMDE_FLOAT32_C( -151.02), SIMDE_FLOAT32_C( 551.65), SIMDE_FLOAT32_C( 155.58)), simde_mm_set_ps(SIMDE_FLOAT32_C( 184.91), SIMDE_FLOAT32_C( -432.97), SIMDE_FLOAT32_C( -455.33), SIMDE_FLOAT32_C( 164.52)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -179.98), SIMDE_FLOAT32_C( 258.23), SIMDE_FLOAT32_C( 246.22), SIMDE_FLOAT32_C( 97.85), SIMDE_FLOAT32_C( 666.32), SIMDE_FLOAT32_C( 364.80), SIMDE_FLOAT32_C( 759.27), SIMDE_FLOAT32_C( -524.19), SIMDE_FLOAT32_C( -726.51), SIMDE_FLOAT32_C( 381.71), SIMDE_FLOAT32_C( 819.12), SIMDE_FLOAT32_C( 145.28), SIMDE_FLOAT32_C( 184.91), SIMDE_FLOAT32_C( -432.97), SIMDE_FLOAT32_C( -455.33), SIMDE_FLOAT32_C( 164.52)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -179.98), SIMDE_FLOAT32_C( 258.23), SIMDE_FLOAT32_C( 246.22), SIMDE_FLOAT32_C( 97.85), SIMDE_FLOAT32_C( 666.32), SIMDE_FLOAT32_C( 364.80), SIMDE_FLOAT32_C( 759.27), SIMDE_FLOAT32_C( -524.19), SIMDE_FLOAT32_C( 184.91), SIMDE_FLOAT32_C( -432.97), SIMDE_FLOAT32_C( -455.33), SIMDE_FLOAT32_C( 164.52), SIMDE_FLOAT32_C( -99.37), SIMDE_FLOAT32_C( -151.02), SIMDE_FLOAT32_C( 551.65), SIMDE_FLOAT32_C( 155.58)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -179.98), SIMDE_FLOAT32_C( 258.23), SIMDE_FLOAT32_C( 246.22), SIMDE_FLOAT32_C( 97.85), SIMDE_FLOAT32_C( 184.91), SIMDE_FLOAT32_C( -432.97), SIMDE_FLOAT32_C( -455.33), SIMDE_FLOAT32_C( 164.52), SIMDE_FLOAT32_C( -726.51), SIMDE_FLOAT32_C( 381.71), SIMDE_FLOAT32_C( 819.12), SIMDE_FLOAT32_C( 145.28), SIMDE_FLOAT32_C( -99.37), SIMDE_FLOAT32_C( -151.02), SIMDE_FLOAT32_C( 551.65), SIMDE_FLOAT32_C( 155.58)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 184.91), SIMDE_FLOAT32_C( -432.97), SIMDE_FLOAT32_C( -455.33), SIMDE_FLOAT32_C( 164.52), SIMDE_FLOAT32_C( 666.32), SIMDE_FLOAT32_C( 364.80), SIMDE_FLOAT32_C( 759.27), SIMDE_FLOAT32_C( -524.19), SIMDE_FLOAT32_C( -726.51), SIMDE_FLOAT32_C( 381.71), SIMDE_FLOAT32_C( 819.12), SIMDE_FLOAT32_C( 145.28), SIMDE_FLOAT32_C( -99.37), SIMDE_FLOAT32_C( -151.02), SIMDE_FLOAT32_C( 551.65), SIMDE_FLOAT32_C( 155.58)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 481.68), SIMDE_FLOAT32_C( 170.00), SIMDE_FLOAT32_C( -341.91), SIMDE_FLOAT32_C( 366.57), SIMDE_FLOAT32_C( 254.48), SIMDE_FLOAT32_C( -211.73), SIMDE_FLOAT32_C( 755.70), SIMDE_FLOAT32_C( 908.31), SIMDE_FLOAT32_C( -363.93), SIMDE_FLOAT32_C( -144.11), SIMDE_FLOAT32_C( 789.10), SIMDE_FLOAT32_C( -343.92), SIMDE_FLOAT32_C( 344.74), SIMDE_FLOAT32_C( 961.65), SIMDE_FLOAT32_C( 652.93), SIMDE_FLOAT32_C( 754.42)), simde_mm_set_ps(SIMDE_FLOAT32_C( -846.10), SIMDE_FLOAT32_C( -248.07), SIMDE_FLOAT32_C( -411.92), SIMDE_FLOAT32_C( -88.91)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 481.68), SIMDE_FLOAT32_C( 170.00), SIMDE_FLOAT32_C( -341.91), SIMDE_FLOAT32_C( 366.57), SIMDE_FLOAT32_C( 254.48), SIMDE_FLOAT32_C( -211.73), SIMDE_FLOAT32_C( 755.70), SIMDE_FLOAT32_C( 908.31), SIMDE_FLOAT32_C( -363.93), SIMDE_FLOAT32_C( -144.11), SIMDE_FLOAT32_C( 789.10), SIMDE_FLOAT32_C( -343.92), SIMDE_FLOAT32_C( -846.10), SIMDE_FLOAT32_C( -248.07), SIMDE_FLOAT32_C( -411.92), SIMDE_FLOAT32_C( -88.91)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 481.68), SIMDE_FLOAT32_C( 170.00), SIMDE_FLOAT32_C( -341.91), SIMDE_FLOAT32_C( 366.57), SIMDE_FLOAT32_C( 254.48), SIMDE_FLOAT32_C( -211.73), SIMDE_FLOAT32_C( 755.70), SIMDE_FLOAT32_C( 908.31), SIMDE_FLOAT32_C( -846.10), SIMDE_FLOAT32_C( -248.07), SIMDE_FLOAT32_C( -411.92), SIMDE_FLOAT32_C( -88.91), SIMDE_FLOAT32_C( 344.74), SIMDE_FLOAT32_C( 961.65), SIMDE_FLOAT32_C( 652.93), SIMDE_FLOAT32_C( 754.42)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 481.68), SIMDE_FLOAT32_C( 170.00), SIMDE_FLOAT32_C( -341.91), SIMDE_FLOAT32_C( 366.57), SIMDE_FLOAT32_C( -846.10), SIMDE_FLOAT32_C( -248.07), SIMDE_FLOAT32_C( -411.92), SIMDE_FLOAT32_C( -88.91), SIMDE_FLOAT32_C( -363.93), SIMDE_FLOAT32_C( -144.11), SIMDE_FLOAT32_C( 789.10), SIMDE_FLOAT32_C( -343.92), SIMDE_FLOAT32_C( 344.74), SIMDE_FLOAT32_C( 961.65), SIMDE_FLOAT32_C( 652.93), SIMDE_FLOAT32_C( 754.42)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -846.10), SIMDE_FLOAT32_C( -248.07), SIMDE_FLOAT32_C( -411.92), SIMDE_FLOAT32_C( -88.91), SIMDE_FLOAT32_C( 254.48), SIMDE_FLOAT32_C( -211.73), SIMDE_FLOAT32_C( 755.70), SIMDE_FLOAT32_C( 908.31), SIMDE_FLOAT32_C( -363.93), SIMDE_FLOAT32_C( -144.11), SIMDE_FLOAT32_C( 789.10), SIMDE_FLOAT32_C( -343.92), SIMDE_FLOAT32_C( 344.74), SIMDE_FLOAT32_C( 961.65), SIMDE_FLOAT32_C( 652.93), SIMDE_FLOAT32_C( 754.42)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -382.22), SIMDE_FLOAT32_C( 146.15), SIMDE_FLOAT32_C( -320.26), SIMDE_FLOAT32_C( 360.04), SIMDE_FLOAT32_C( -199.21), SIMDE_FLOAT32_C( 134.15), SIMDE_FLOAT32_C( -764.34), SIMDE_FLOAT32_C( -376.86), SIMDE_FLOAT32_C( -985.32), SIMDE_FLOAT32_C( 485.76), SIMDE_FLOAT32_C( 234.60), SIMDE_FLOAT32_C( 786.03), SIMDE_FLOAT32_C( 859.59), SIMDE_FLOAT32_C( 489.95), SIMDE_FLOAT32_C( -409.35), SIMDE_FLOAT32_C( 796.52)), simde_mm_set_ps(SIMDE_FLOAT32_C( 713.78), SIMDE_FLOAT32_C( -342.76), SIMDE_FLOAT32_C( -632.29), SIMDE_FLOAT32_C( -698.47)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -382.22), SIMDE_FLOAT32_C( 146.15), SIMDE_FLOAT32_C( -320.26), SIMDE_FLOAT32_C( 360.04), SIMDE_FLOAT32_C( -199.21), SIMDE_FLOAT32_C( 134.15), SIMDE_FLOAT32_C( -764.34), SIMDE_FLOAT32_C( -376.86), SIMDE_FLOAT32_C( -985.32), SIMDE_FLOAT32_C( 485.76), SIMDE_FLOAT32_C( 234.60), SIMDE_FLOAT32_C( 786.03), SIMDE_FLOAT32_C( 713.78), SIMDE_FLOAT32_C( -342.76), SIMDE_FLOAT32_C( -632.29), SIMDE_FLOAT32_C( -698.47)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -382.22), SIMDE_FLOAT32_C( 146.15), SIMDE_FLOAT32_C( -320.26), SIMDE_FLOAT32_C( 360.04), SIMDE_FLOAT32_C( -199.21), SIMDE_FLOAT32_C( 134.15), SIMDE_FLOAT32_C( -764.34), SIMDE_FLOAT32_C( -376.86), SIMDE_FLOAT32_C( 713.78), SIMDE_FLOAT32_C( -342.76), SIMDE_FLOAT32_C( -632.29), SIMDE_FLOAT32_C( -698.47), SIMDE_FLOAT32_C( 859.59), SIMDE_FLOAT32_C( 489.95), SIMDE_FLOAT32_C( -409.35), SIMDE_FLOAT32_C( 796.52)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -382.22), SIMDE_FLOAT32_C( 146.15), SIMDE_FLOAT32_C( -320.26), SIMDE_FLOAT32_C( 360.04), SIMDE_FLOAT32_C( 713.78), SIMDE_FLOAT32_C( -342.76), SIMDE_FLOAT32_C( -632.29), SIMDE_FLOAT32_C( -698.47), SIMDE_FLOAT32_C( -985.32), SIMDE_FLOAT32_C( 485.76), SIMDE_FLOAT32_C( 234.60), SIMDE_FLOAT32_C( 786.03), SIMDE_FLOAT32_C( 859.59), SIMDE_FLOAT32_C( 489.95), SIMDE_FLOAT32_C( -409.35), SIMDE_FLOAT32_C( 796.52)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 713.78), SIMDE_FLOAT32_C( -342.76), SIMDE_FLOAT32_C( -632.29), SIMDE_FLOAT32_C( -698.47), SIMDE_FLOAT32_C( -199.21), SIMDE_FLOAT32_C( 134.15), SIMDE_FLOAT32_C( -764.34), SIMDE_FLOAT32_C( -376.86), SIMDE_FLOAT32_C( -985.32), SIMDE_FLOAT32_C( 485.76), SIMDE_FLOAT32_C( 234.60), SIMDE_FLOAT32_C( 786.03), SIMDE_FLOAT32_C( 859.59), SIMDE_FLOAT32_C( 489.95), SIMDE_FLOAT32_C( -409.35), SIMDE_FLOAT32_C( 796.52)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -766.40), SIMDE_FLOAT32_C( -462.89), SIMDE_FLOAT32_C( -495.88), SIMDE_FLOAT32_C( -905.06), SIMDE_FLOAT32_C( -794.42), SIMDE_FLOAT32_C( 266.49), SIMDE_FLOAT32_C( -263.27), SIMDE_FLOAT32_C( -537.47), SIMDE_FLOAT32_C( 74.85), SIMDE_FLOAT32_C( -705.33), SIMDE_FLOAT32_C( 127.81), SIMDE_FLOAT32_C( 699.62), SIMDE_FLOAT32_C( 197.25), SIMDE_FLOAT32_C( 358.04), SIMDE_FLOAT32_C( 348.19), SIMDE_FLOAT32_C( -974.37)), simde_mm_set_ps(SIMDE_FLOAT32_C( 156.66), SIMDE_FLOAT32_C( 658.18), SIMDE_FLOAT32_C( 198.89), SIMDE_FLOAT32_C( -330.38)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -766.40), SIMDE_FLOAT32_C( -462.89), SIMDE_FLOAT32_C( -495.88), SIMDE_FLOAT32_C( -905.06), SIMDE_FLOAT32_C( -794.42), SIMDE_FLOAT32_C( 266.49), SIMDE_FLOAT32_C( -263.27), SIMDE_FLOAT32_C( -537.47), SIMDE_FLOAT32_C( 74.85), SIMDE_FLOAT32_C( -705.33), SIMDE_FLOAT32_C( 127.81), SIMDE_FLOAT32_C( 699.62), SIMDE_FLOAT32_C( 156.66), SIMDE_FLOAT32_C( 658.18), SIMDE_FLOAT32_C( 198.89), SIMDE_FLOAT32_C( -330.38)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -766.40), SIMDE_FLOAT32_C( -462.89), SIMDE_FLOAT32_C( -495.88), SIMDE_FLOAT32_C( -905.06), SIMDE_FLOAT32_C( -794.42), SIMDE_FLOAT32_C( 266.49), SIMDE_FLOAT32_C( -263.27), SIMDE_FLOAT32_C( -537.47), SIMDE_FLOAT32_C( 156.66), SIMDE_FLOAT32_C( 658.18), SIMDE_FLOAT32_C( 198.89), SIMDE_FLOAT32_C( -330.38), SIMDE_FLOAT32_C( 197.25), SIMDE_FLOAT32_C( 358.04), SIMDE_FLOAT32_C( 348.19), SIMDE_FLOAT32_C( -974.37)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -766.40), SIMDE_FLOAT32_C( -462.89), SIMDE_FLOAT32_C( -495.88), SIMDE_FLOAT32_C( -905.06), SIMDE_FLOAT32_C( 156.66), SIMDE_FLOAT32_C( 658.18), SIMDE_FLOAT32_C( 198.89), SIMDE_FLOAT32_C( -330.38), SIMDE_FLOAT32_C( 74.85), SIMDE_FLOAT32_C( -705.33), SIMDE_FLOAT32_C( 127.81), SIMDE_FLOAT32_C( 699.62), SIMDE_FLOAT32_C( 197.25), SIMDE_FLOAT32_C( 358.04), SIMDE_FLOAT32_C( 348.19), SIMDE_FLOAT32_C( -974.37)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 156.66), SIMDE_FLOAT32_C( 658.18), SIMDE_FLOAT32_C( 198.89), SIMDE_FLOAT32_C( -330.38), SIMDE_FLOAT32_C( -794.42), SIMDE_FLOAT32_C( 266.49), SIMDE_FLOAT32_C( -263.27), SIMDE_FLOAT32_C( -537.47), SIMDE_FLOAT32_C( 74.85), SIMDE_FLOAT32_C( -705.33), SIMDE_FLOAT32_C( 127.81), SIMDE_FLOAT32_C( 699.62), SIMDE_FLOAT32_C( 197.25), SIMDE_FLOAT32_C( 358.04), SIMDE_FLOAT32_C( 348.19), SIMDE_FLOAT32_C( -974.37)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r; r = simde_mm512_insertf32x4(test_vec[i].a, test_vec[i].b, 0); simde_assert_m512_close(r, test_vec[i].r0, 1); r = simde_mm512_insertf32x4(test_vec[i].a, test_vec[i].b, 1); simde_assert_m512_close(r, test_vec[i].r1, 1); r = simde_mm512_insertf32x4(test_vec[i].a, test_vec[i].b, 2); simde_assert_m512_close(r, test_vec[i].r2, 1); r = simde_mm512_insertf32x4(test_vec[i].a, test_vec[i].b, 3); simde_assert_m512_close(r, test_vec[i].r3, 1); } return 0; } static int test_simde_mm512_mask_insertf32x4(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 src; simde__mmask16 k; simde__m512 a; simde__m128 b; simde__m512 r0; simde__m512 r1; simde__m512 r2; simde__m512 r3; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( 903.50), SIMDE_FLOAT32_C( -43.35), SIMDE_FLOAT32_C( 309.91), SIMDE_FLOAT32_C( 846.15), SIMDE_FLOAT32_C( -514.56), SIMDE_FLOAT32_C( -860.98), SIMDE_FLOAT32_C( -280.30), SIMDE_FLOAT32_C( 128.51), SIMDE_FLOAT32_C( 522.06), SIMDE_FLOAT32_C( -932.28), SIMDE_FLOAT32_C( 600.12), SIMDE_FLOAT32_C( -491.12), SIMDE_FLOAT32_C( -139.11), SIMDE_FLOAT32_C( -268.86), SIMDE_FLOAT32_C( -71.72), SIMDE_FLOAT32_C( 98.47)), UINT16_C(21335), simde_mm512_set_ps(SIMDE_FLOAT32_C( 483.08), SIMDE_FLOAT32_C( 232.04), SIMDE_FLOAT32_C( 774.81), SIMDE_FLOAT32_C( -599.01), SIMDE_FLOAT32_C( 69.04), SIMDE_FLOAT32_C( -149.02), SIMDE_FLOAT32_C( 240.79), SIMDE_FLOAT32_C( -839.80), SIMDE_FLOAT32_C( -556.90), SIMDE_FLOAT32_C( 160.98), SIMDE_FLOAT32_C( 391.82), SIMDE_FLOAT32_C( -569.99), SIMDE_FLOAT32_C( -327.63), SIMDE_FLOAT32_C( -172.36), SIMDE_FLOAT32_C( 393.53), SIMDE_FLOAT32_C( 36.69)), simde_mm_set_ps(SIMDE_FLOAT32_C( -921.00), SIMDE_FLOAT32_C( 283.06), SIMDE_FLOAT32_C( -471.60), SIMDE_FLOAT32_C( -333.94)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 903.50), SIMDE_FLOAT32_C( 232.04), SIMDE_FLOAT32_C( 309.91), SIMDE_FLOAT32_C( -599.01), SIMDE_FLOAT32_C( -514.56), SIMDE_FLOAT32_C( -860.98), SIMDE_FLOAT32_C( 240.79), SIMDE_FLOAT32_C( -839.80), SIMDE_FLOAT32_C( 522.06), SIMDE_FLOAT32_C( 160.98), SIMDE_FLOAT32_C( 600.12), SIMDE_FLOAT32_C( -569.99), SIMDE_FLOAT32_C( -139.11), SIMDE_FLOAT32_C( 283.06), SIMDE_FLOAT32_C( -471.60), SIMDE_FLOAT32_C( -333.94)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 903.50), SIMDE_FLOAT32_C( 232.04), SIMDE_FLOAT32_C( 309.91), SIMDE_FLOAT32_C( -599.01), SIMDE_FLOAT32_C( -514.56), SIMDE_FLOAT32_C( -860.98), SIMDE_FLOAT32_C( 240.79), SIMDE_FLOAT32_C( -839.80), SIMDE_FLOAT32_C( 522.06), SIMDE_FLOAT32_C( 283.06), SIMDE_FLOAT32_C( 600.12), SIMDE_FLOAT32_C( -333.94), SIMDE_FLOAT32_C( -139.11), SIMDE_FLOAT32_C( -172.36), SIMDE_FLOAT32_C( 393.53), SIMDE_FLOAT32_C( 36.69)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 903.50), SIMDE_FLOAT32_C( 232.04), SIMDE_FLOAT32_C( 309.91), SIMDE_FLOAT32_C( -599.01), SIMDE_FLOAT32_C( -514.56), SIMDE_FLOAT32_C( -860.98), SIMDE_FLOAT32_C( -471.60), SIMDE_FLOAT32_C( -333.94), SIMDE_FLOAT32_C( 522.06), SIMDE_FLOAT32_C( 160.98), SIMDE_FLOAT32_C( 600.12), SIMDE_FLOAT32_C( -569.99), SIMDE_FLOAT32_C( -139.11), SIMDE_FLOAT32_C( -172.36), SIMDE_FLOAT32_C( 393.53), SIMDE_FLOAT32_C( 36.69)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 903.50), SIMDE_FLOAT32_C( 283.06), SIMDE_FLOAT32_C( 309.91), SIMDE_FLOAT32_C( -333.94), SIMDE_FLOAT32_C( -514.56), SIMDE_FLOAT32_C( -860.98), SIMDE_FLOAT32_C( 240.79), SIMDE_FLOAT32_C( -839.80), SIMDE_FLOAT32_C( 522.06), SIMDE_FLOAT32_C( 160.98), SIMDE_FLOAT32_C( 600.12), SIMDE_FLOAT32_C( -569.99), SIMDE_FLOAT32_C( -139.11), SIMDE_FLOAT32_C( -172.36), SIMDE_FLOAT32_C( 393.53), SIMDE_FLOAT32_C( 36.69)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -657.02), SIMDE_FLOAT32_C( -629.37), SIMDE_FLOAT32_C( 337.35), SIMDE_FLOAT32_C( -278.32), SIMDE_FLOAT32_C( -744.41), SIMDE_FLOAT32_C( 39.32), SIMDE_FLOAT32_C( 29.68), SIMDE_FLOAT32_C( -490.28), SIMDE_FLOAT32_C( 841.53), SIMDE_FLOAT32_C( 526.21), SIMDE_FLOAT32_C( -203.04), SIMDE_FLOAT32_C( -80.71), SIMDE_FLOAT32_C( 632.01), SIMDE_FLOAT32_C( 456.89), SIMDE_FLOAT32_C( 51.33), SIMDE_FLOAT32_C( -868.59)), UINT16_C(45677), simde_mm512_set_ps(SIMDE_FLOAT32_C( -213.75), SIMDE_FLOAT32_C( 403.00), SIMDE_FLOAT32_C( -198.67), SIMDE_FLOAT32_C( 447.98), SIMDE_FLOAT32_C( -925.69), SIMDE_FLOAT32_C( 717.83), SIMDE_FLOAT32_C( -489.88), SIMDE_FLOAT32_C( -37.49), SIMDE_FLOAT32_C( -373.66), SIMDE_FLOAT32_C( -292.35), SIMDE_FLOAT32_C( -835.53), SIMDE_FLOAT32_C( 571.79), SIMDE_FLOAT32_C( 675.92), SIMDE_FLOAT32_C( 490.41), SIMDE_FLOAT32_C( 47.59), SIMDE_FLOAT32_C( -895.71)), simde_mm_set_ps(SIMDE_FLOAT32_C( 907.23), SIMDE_FLOAT32_C( 957.37), SIMDE_FLOAT32_C( -359.60), SIMDE_FLOAT32_C( -934.92)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -213.75), SIMDE_FLOAT32_C( -629.37), SIMDE_FLOAT32_C( -198.67), SIMDE_FLOAT32_C( 447.98), SIMDE_FLOAT32_C( -744.41), SIMDE_FLOAT32_C( 39.32), SIMDE_FLOAT32_C( -489.88), SIMDE_FLOAT32_C( -490.28), SIMDE_FLOAT32_C( 841.53), SIMDE_FLOAT32_C( -292.35), SIMDE_FLOAT32_C( -835.53), SIMDE_FLOAT32_C( -80.71), SIMDE_FLOAT32_C( 907.23), SIMDE_FLOAT32_C( 957.37), SIMDE_FLOAT32_C( 51.33), SIMDE_FLOAT32_C( -934.92)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -213.75), SIMDE_FLOAT32_C( -629.37), SIMDE_FLOAT32_C( -198.67), SIMDE_FLOAT32_C( 447.98), SIMDE_FLOAT32_C( -744.41), SIMDE_FLOAT32_C( 39.32), SIMDE_FLOAT32_C( -489.88), SIMDE_FLOAT32_C( -490.28), SIMDE_FLOAT32_C( 841.53), SIMDE_FLOAT32_C( 957.37), SIMDE_FLOAT32_C( -359.60), SIMDE_FLOAT32_C( -80.71), SIMDE_FLOAT32_C( 675.92), SIMDE_FLOAT32_C( 490.41), SIMDE_FLOAT32_C( 51.33), SIMDE_FLOAT32_C( -895.71)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -213.75), SIMDE_FLOAT32_C( -629.37), SIMDE_FLOAT32_C( -198.67), SIMDE_FLOAT32_C( 447.98), SIMDE_FLOAT32_C( -744.41), SIMDE_FLOAT32_C( 39.32), SIMDE_FLOAT32_C( -359.60), SIMDE_FLOAT32_C( -490.28), SIMDE_FLOAT32_C( 841.53), SIMDE_FLOAT32_C( -292.35), SIMDE_FLOAT32_C( -835.53), SIMDE_FLOAT32_C( -80.71), SIMDE_FLOAT32_C( 675.92), SIMDE_FLOAT32_C( 490.41), SIMDE_FLOAT32_C( 51.33), SIMDE_FLOAT32_C( -895.71)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 907.23), SIMDE_FLOAT32_C( -629.37), SIMDE_FLOAT32_C( -359.60), SIMDE_FLOAT32_C( -934.92), SIMDE_FLOAT32_C( -744.41), SIMDE_FLOAT32_C( 39.32), SIMDE_FLOAT32_C( -489.88), SIMDE_FLOAT32_C( -490.28), SIMDE_FLOAT32_C( 841.53), SIMDE_FLOAT32_C( -292.35), SIMDE_FLOAT32_C( -835.53), SIMDE_FLOAT32_C( -80.71), SIMDE_FLOAT32_C( 675.92), SIMDE_FLOAT32_C( 490.41), SIMDE_FLOAT32_C( 51.33), SIMDE_FLOAT32_C( -895.71)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 799.57), SIMDE_FLOAT32_C( -820.22), SIMDE_FLOAT32_C( -959.11), SIMDE_FLOAT32_C( 268.99), SIMDE_FLOAT32_C( -207.87), SIMDE_FLOAT32_C( -765.42), SIMDE_FLOAT32_C( 138.83), SIMDE_FLOAT32_C( 699.07), SIMDE_FLOAT32_C( -143.73), SIMDE_FLOAT32_C( 709.96), SIMDE_FLOAT32_C( -767.34), SIMDE_FLOAT32_C( -588.28), SIMDE_FLOAT32_C( 586.29), SIMDE_FLOAT32_C( -760.88), SIMDE_FLOAT32_C( -617.12), SIMDE_FLOAT32_C( -751.58)), UINT16_C( 7353), simde_mm512_set_ps(SIMDE_FLOAT32_C( -786.23), SIMDE_FLOAT32_C( 873.30), SIMDE_FLOAT32_C( 241.45), SIMDE_FLOAT32_C( -432.13), SIMDE_FLOAT32_C( -842.15), SIMDE_FLOAT32_C( 908.84), SIMDE_FLOAT32_C( -315.78), SIMDE_FLOAT32_C( -86.06), SIMDE_FLOAT32_C( -413.85), SIMDE_FLOAT32_C( 752.26), SIMDE_FLOAT32_C( 609.29), SIMDE_FLOAT32_C( 878.85), SIMDE_FLOAT32_C( 846.67), SIMDE_FLOAT32_C( 670.52), SIMDE_FLOAT32_C( 149.72), SIMDE_FLOAT32_C( 213.24)), simde_mm_set_ps(SIMDE_FLOAT32_C( 989.70), SIMDE_FLOAT32_C( -433.33), SIMDE_FLOAT32_C( -695.21), SIMDE_FLOAT32_C( 48.49)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 799.57), SIMDE_FLOAT32_C( -820.22), SIMDE_FLOAT32_C( -959.11), SIMDE_FLOAT32_C( -432.13), SIMDE_FLOAT32_C( -842.15), SIMDE_FLOAT32_C( 908.84), SIMDE_FLOAT32_C( 138.83), SIMDE_FLOAT32_C( 699.07), SIMDE_FLOAT32_C( -413.85), SIMDE_FLOAT32_C( 709.96), SIMDE_FLOAT32_C( 609.29), SIMDE_FLOAT32_C( 878.85), SIMDE_FLOAT32_C( 989.70), SIMDE_FLOAT32_C( -760.88), SIMDE_FLOAT32_C( -617.12), SIMDE_FLOAT32_C( 48.49)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 799.57), SIMDE_FLOAT32_C( -820.22), SIMDE_FLOAT32_C( -959.11), SIMDE_FLOAT32_C( -432.13), SIMDE_FLOAT32_C( -842.15), SIMDE_FLOAT32_C( 908.84), SIMDE_FLOAT32_C( 138.83), SIMDE_FLOAT32_C( 699.07), SIMDE_FLOAT32_C( 989.70), SIMDE_FLOAT32_C( 709.96), SIMDE_FLOAT32_C( -695.21), SIMDE_FLOAT32_C( 48.49), SIMDE_FLOAT32_C( 846.67), SIMDE_FLOAT32_C( -760.88), SIMDE_FLOAT32_C( -617.12), SIMDE_FLOAT32_C( 213.24)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 799.57), SIMDE_FLOAT32_C( -820.22), SIMDE_FLOAT32_C( -959.11), SIMDE_FLOAT32_C( -432.13), SIMDE_FLOAT32_C( 989.70), SIMDE_FLOAT32_C( -433.33), SIMDE_FLOAT32_C( 138.83), SIMDE_FLOAT32_C( 699.07), SIMDE_FLOAT32_C( -413.85), SIMDE_FLOAT32_C( 709.96), SIMDE_FLOAT32_C( 609.29), SIMDE_FLOAT32_C( 878.85), SIMDE_FLOAT32_C( 846.67), SIMDE_FLOAT32_C( -760.88), SIMDE_FLOAT32_C( -617.12), SIMDE_FLOAT32_C( 213.24)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 799.57), SIMDE_FLOAT32_C( -820.22), SIMDE_FLOAT32_C( -959.11), SIMDE_FLOAT32_C( 48.49), SIMDE_FLOAT32_C( -842.15), SIMDE_FLOAT32_C( 908.84), SIMDE_FLOAT32_C( 138.83), SIMDE_FLOAT32_C( 699.07), SIMDE_FLOAT32_C( -413.85), SIMDE_FLOAT32_C( 709.96), SIMDE_FLOAT32_C( 609.29), SIMDE_FLOAT32_C( 878.85), SIMDE_FLOAT32_C( 846.67), SIMDE_FLOAT32_C( -760.88), SIMDE_FLOAT32_C( -617.12), SIMDE_FLOAT32_C( 213.24)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -282.31), SIMDE_FLOAT32_C( -656.10), SIMDE_FLOAT32_C( -789.54), SIMDE_FLOAT32_C( -790.16), SIMDE_FLOAT32_C( -415.61), SIMDE_FLOAT32_C( 994.61), SIMDE_FLOAT32_C( -659.70), SIMDE_FLOAT32_C( 493.30), SIMDE_FLOAT32_C( 831.29), SIMDE_FLOAT32_C( -619.50), SIMDE_FLOAT32_C( 952.47), SIMDE_FLOAT32_C( -492.61), SIMDE_FLOAT32_C( -68.16), SIMDE_FLOAT32_C( 717.69), SIMDE_FLOAT32_C( -663.74), SIMDE_FLOAT32_C( 179.29)), UINT16_C(58590), simde_mm512_set_ps(SIMDE_FLOAT32_C( -236.67), SIMDE_FLOAT32_C( -211.91), SIMDE_FLOAT32_C( -925.76), SIMDE_FLOAT32_C( -915.62), SIMDE_FLOAT32_C( -30.05), SIMDE_FLOAT32_C( -70.79), SIMDE_FLOAT32_C( 493.65), SIMDE_FLOAT32_C( 52.79), SIMDE_FLOAT32_C( 835.54), SIMDE_FLOAT32_C( -712.24), SIMDE_FLOAT32_C( 518.12), SIMDE_FLOAT32_C( -173.80), SIMDE_FLOAT32_C( 487.08), SIMDE_FLOAT32_C( 180.78), SIMDE_FLOAT32_C( -289.23), SIMDE_FLOAT32_C( 918.52)), simde_mm_set_ps(SIMDE_FLOAT32_C( 280.87), SIMDE_FLOAT32_C( -904.81), SIMDE_FLOAT32_C( 380.83), SIMDE_FLOAT32_C( 50.50)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -236.67), SIMDE_FLOAT32_C( -211.91), SIMDE_FLOAT32_C( -925.76), SIMDE_FLOAT32_C( -790.16), SIMDE_FLOAT32_C( -415.61), SIMDE_FLOAT32_C( -70.79), SIMDE_FLOAT32_C( -659.70), SIMDE_FLOAT32_C( 493.30), SIMDE_FLOAT32_C( 835.54), SIMDE_FLOAT32_C( -712.24), SIMDE_FLOAT32_C( 952.47), SIMDE_FLOAT32_C( -173.80), SIMDE_FLOAT32_C( 280.87), SIMDE_FLOAT32_C( -904.81), SIMDE_FLOAT32_C( 380.83), SIMDE_FLOAT32_C( 179.29)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -236.67), SIMDE_FLOAT32_C( -211.91), SIMDE_FLOAT32_C( -925.76), SIMDE_FLOAT32_C( -790.16), SIMDE_FLOAT32_C( -415.61), SIMDE_FLOAT32_C( -70.79), SIMDE_FLOAT32_C( -659.70), SIMDE_FLOAT32_C( 493.30), SIMDE_FLOAT32_C( 280.87), SIMDE_FLOAT32_C( -904.81), SIMDE_FLOAT32_C( 952.47), SIMDE_FLOAT32_C( 50.50), SIMDE_FLOAT32_C( 487.08), SIMDE_FLOAT32_C( 180.78), SIMDE_FLOAT32_C( -289.23), SIMDE_FLOAT32_C( 179.29)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -236.67), SIMDE_FLOAT32_C( -211.91), SIMDE_FLOAT32_C( -925.76), SIMDE_FLOAT32_C( -790.16), SIMDE_FLOAT32_C( -415.61), SIMDE_FLOAT32_C( -904.81), SIMDE_FLOAT32_C( -659.70), SIMDE_FLOAT32_C( 493.30), SIMDE_FLOAT32_C( 835.54), SIMDE_FLOAT32_C( -712.24), SIMDE_FLOAT32_C( 952.47), SIMDE_FLOAT32_C( -173.80), SIMDE_FLOAT32_C( 487.08), SIMDE_FLOAT32_C( 180.78), SIMDE_FLOAT32_C( -289.23), SIMDE_FLOAT32_C( 179.29)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 280.87), SIMDE_FLOAT32_C( -904.81), SIMDE_FLOAT32_C( 380.83), SIMDE_FLOAT32_C( -790.16), SIMDE_FLOAT32_C( -415.61), SIMDE_FLOAT32_C( -70.79), SIMDE_FLOAT32_C( -659.70), SIMDE_FLOAT32_C( 493.30), SIMDE_FLOAT32_C( 835.54), SIMDE_FLOAT32_C( -712.24), SIMDE_FLOAT32_C( 952.47), SIMDE_FLOAT32_C( -173.80), SIMDE_FLOAT32_C( 487.08), SIMDE_FLOAT32_C( 180.78), SIMDE_FLOAT32_C( -289.23), SIMDE_FLOAT32_C( 179.29)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -318.40), SIMDE_FLOAT32_C( -844.73), SIMDE_FLOAT32_C( 757.65), SIMDE_FLOAT32_C( 677.41), SIMDE_FLOAT32_C( -352.24), SIMDE_FLOAT32_C( -479.79), SIMDE_FLOAT32_C( 602.83), SIMDE_FLOAT32_C( 2.55), SIMDE_FLOAT32_C( -388.47), SIMDE_FLOAT32_C( -643.43), SIMDE_FLOAT32_C( -331.34), SIMDE_FLOAT32_C( 72.67), SIMDE_FLOAT32_C( -870.79), SIMDE_FLOAT32_C( -722.44), SIMDE_FLOAT32_C( 529.44), SIMDE_FLOAT32_C( -949.73)), UINT16_C(51237), simde_mm512_set_ps(SIMDE_FLOAT32_C( 915.68), SIMDE_FLOAT32_C( -53.79), SIMDE_FLOAT32_C( 703.31), SIMDE_FLOAT32_C( 930.79), SIMDE_FLOAT32_C( 111.33), SIMDE_FLOAT32_C( -176.75), SIMDE_FLOAT32_C( -316.94), SIMDE_FLOAT32_C( 639.68), SIMDE_FLOAT32_C( -783.00), SIMDE_FLOAT32_C( -102.18), SIMDE_FLOAT32_C( 960.00), SIMDE_FLOAT32_C( 22.93), SIMDE_FLOAT32_C( -395.13), SIMDE_FLOAT32_C( 145.63), SIMDE_FLOAT32_C( -149.04), SIMDE_FLOAT32_C( 214.37)), simde_mm_set_ps(SIMDE_FLOAT32_C( -668.43), SIMDE_FLOAT32_C( -235.48), SIMDE_FLOAT32_C( -210.00), SIMDE_FLOAT32_C( 476.89)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 915.68), SIMDE_FLOAT32_C( -53.79), SIMDE_FLOAT32_C( 757.65), SIMDE_FLOAT32_C( 677.41), SIMDE_FLOAT32_C( 111.33), SIMDE_FLOAT32_C( -479.79), SIMDE_FLOAT32_C( 602.83), SIMDE_FLOAT32_C( 2.55), SIMDE_FLOAT32_C( -388.47), SIMDE_FLOAT32_C( -643.43), SIMDE_FLOAT32_C( 960.00), SIMDE_FLOAT32_C( 72.67), SIMDE_FLOAT32_C( -870.79), SIMDE_FLOAT32_C( -235.48), SIMDE_FLOAT32_C( 529.44), SIMDE_FLOAT32_C( 476.89)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 915.68), SIMDE_FLOAT32_C( -53.79), SIMDE_FLOAT32_C( 757.65), SIMDE_FLOAT32_C( 677.41), SIMDE_FLOAT32_C( 111.33), SIMDE_FLOAT32_C( -479.79), SIMDE_FLOAT32_C( 602.83), SIMDE_FLOAT32_C( 2.55), SIMDE_FLOAT32_C( -388.47), SIMDE_FLOAT32_C( -643.43), SIMDE_FLOAT32_C( -210.00), SIMDE_FLOAT32_C( 72.67), SIMDE_FLOAT32_C( -870.79), SIMDE_FLOAT32_C( 145.63), SIMDE_FLOAT32_C( 529.44), SIMDE_FLOAT32_C( 214.37)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 915.68), SIMDE_FLOAT32_C( -53.79), SIMDE_FLOAT32_C( 757.65), SIMDE_FLOAT32_C( 677.41), SIMDE_FLOAT32_C( -668.43), SIMDE_FLOAT32_C( -479.79), SIMDE_FLOAT32_C( 602.83), SIMDE_FLOAT32_C( 2.55), SIMDE_FLOAT32_C( -388.47), SIMDE_FLOAT32_C( -643.43), SIMDE_FLOAT32_C( 960.00), SIMDE_FLOAT32_C( 72.67), SIMDE_FLOAT32_C( -870.79), SIMDE_FLOAT32_C( 145.63), SIMDE_FLOAT32_C( 529.44), SIMDE_FLOAT32_C( 214.37)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -668.43), SIMDE_FLOAT32_C( -235.48), SIMDE_FLOAT32_C( 757.65), SIMDE_FLOAT32_C( 677.41), SIMDE_FLOAT32_C( 111.33), SIMDE_FLOAT32_C( -479.79), SIMDE_FLOAT32_C( 602.83), SIMDE_FLOAT32_C( 2.55), SIMDE_FLOAT32_C( -388.47), SIMDE_FLOAT32_C( -643.43), SIMDE_FLOAT32_C( 960.00), SIMDE_FLOAT32_C( 72.67), SIMDE_FLOAT32_C( -870.79), SIMDE_FLOAT32_C( 145.63), SIMDE_FLOAT32_C( 529.44), SIMDE_FLOAT32_C( 214.37)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -96.70), SIMDE_FLOAT32_C( -44.05), SIMDE_FLOAT32_C( 432.42), SIMDE_FLOAT32_C( 421.92), SIMDE_FLOAT32_C( 718.96), SIMDE_FLOAT32_C( -864.59), SIMDE_FLOAT32_C( -334.42), SIMDE_FLOAT32_C( 660.53), SIMDE_FLOAT32_C( 748.73), SIMDE_FLOAT32_C( 996.15), SIMDE_FLOAT32_C( -607.82), SIMDE_FLOAT32_C( 296.47), SIMDE_FLOAT32_C( -327.04), SIMDE_FLOAT32_C( -23.06), SIMDE_FLOAT32_C( -95.21), SIMDE_FLOAT32_C( 10.75)), UINT16_C(44895), simde_mm512_set_ps(SIMDE_FLOAT32_C( 409.27), SIMDE_FLOAT32_C( 481.83), SIMDE_FLOAT32_C( 601.37), SIMDE_FLOAT32_C( -660.24), SIMDE_FLOAT32_C( -675.56), SIMDE_FLOAT32_C( -194.09), SIMDE_FLOAT32_C( 149.22), SIMDE_FLOAT32_C( 161.52), SIMDE_FLOAT32_C( 632.78), SIMDE_FLOAT32_C( 346.90), SIMDE_FLOAT32_C( -777.05), SIMDE_FLOAT32_C( 621.95), SIMDE_FLOAT32_C( -116.18), SIMDE_FLOAT32_C( 909.84), SIMDE_FLOAT32_C( 696.35), SIMDE_FLOAT32_C( -644.31)), simde_mm_set_ps(SIMDE_FLOAT32_C( 744.11), SIMDE_FLOAT32_C( 849.34), SIMDE_FLOAT32_C( 350.11), SIMDE_FLOAT32_C( 112.68)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 409.27), SIMDE_FLOAT32_C( -44.05), SIMDE_FLOAT32_C( 601.37), SIMDE_FLOAT32_C( 421.92), SIMDE_FLOAT32_C( -675.56), SIMDE_FLOAT32_C( -194.09), SIMDE_FLOAT32_C( 149.22), SIMDE_FLOAT32_C( 161.52), SIMDE_FLOAT32_C( 748.73), SIMDE_FLOAT32_C( 346.90), SIMDE_FLOAT32_C( -607.82), SIMDE_FLOAT32_C( 621.95), SIMDE_FLOAT32_C( 744.11), SIMDE_FLOAT32_C( 849.34), SIMDE_FLOAT32_C( 350.11), SIMDE_FLOAT32_C( 112.68)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 409.27), SIMDE_FLOAT32_C( -44.05), SIMDE_FLOAT32_C( 601.37), SIMDE_FLOAT32_C( 421.92), SIMDE_FLOAT32_C( -675.56), SIMDE_FLOAT32_C( -194.09), SIMDE_FLOAT32_C( 149.22), SIMDE_FLOAT32_C( 161.52), SIMDE_FLOAT32_C( 748.73), SIMDE_FLOAT32_C( 849.34), SIMDE_FLOAT32_C( -607.82), SIMDE_FLOAT32_C( 112.68), SIMDE_FLOAT32_C( -116.18), SIMDE_FLOAT32_C( 909.84), SIMDE_FLOAT32_C( 696.35), SIMDE_FLOAT32_C( -644.31)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 409.27), SIMDE_FLOAT32_C( -44.05), SIMDE_FLOAT32_C( 601.37), SIMDE_FLOAT32_C( 421.92), SIMDE_FLOAT32_C( 744.11), SIMDE_FLOAT32_C( 849.34), SIMDE_FLOAT32_C( 350.11), SIMDE_FLOAT32_C( 112.68), SIMDE_FLOAT32_C( 748.73), SIMDE_FLOAT32_C( 346.90), SIMDE_FLOAT32_C( -607.82), SIMDE_FLOAT32_C( 621.95), SIMDE_FLOAT32_C( -116.18), SIMDE_FLOAT32_C( 909.84), SIMDE_FLOAT32_C( 696.35), SIMDE_FLOAT32_C( -644.31)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 744.11), SIMDE_FLOAT32_C( -44.05), SIMDE_FLOAT32_C( 350.11), SIMDE_FLOAT32_C( 421.92), SIMDE_FLOAT32_C( -675.56), SIMDE_FLOAT32_C( -194.09), SIMDE_FLOAT32_C( 149.22), SIMDE_FLOAT32_C( 161.52), SIMDE_FLOAT32_C( 748.73), SIMDE_FLOAT32_C( 346.90), SIMDE_FLOAT32_C( -607.82), SIMDE_FLOAT32_C( 621.95), SIMDE_FLOAT32_C( -116.18), SIMDE_FLOAT32_C( 909.84), SIMDE_FLOAT32_C( 696.35), SIMDE_FLOAT32_C( -644.31)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -291.83), SIMDE_FLOAT32_C( 951.57), SIMDE_FLOAT32_C( 272.52), SIMDE_FLOAT32_C( -735.05), SIMDE_FLOAT32_C( 5.84), SIMDE_FLOAT32_C( 39.14), SIMDE_FLOAT32_C( 124.68), SIMDE_FLOAT32_C( -448.70), SIMDE_FLOAT32_C( 122.69), SIMDE_FLOAT32_C( 65.13), SIMDE_FLOAT32_C( -972.27), SIMDE_FLOAT32_C( 628.22), SIMDE_FLOAT32_C( 383.31), SIMDE_FLOAT32_C( 641.22), SIMDE_FLOAT32_C( -747.07), SIMDE_FLOAT32_C( -762.67)), UINT16_C(25691), simde_mm512_set_ps(SIMDE_FLOAT32_C( 830.24), SIMDE_FLOAT32_C( 903.53), SIMDE_FLOAT32_C( -831.08), SIMDE_FLOAT32_C( 815.07), SIMDE_FLOAT32_C( 196.06), SIMDE_FLOAT32_C( -83.06), SIMDE_FLOAT32_C( 687.82), SIMDE_FLOAT32_C( -517.82), SIMDE_FLOAT32_C( -294.36), SIMDE_FLOAT32_C( 702.71), SIMDE_FLOAT32_C( -920.22), SIMDE_FLOAT32_C( -923.04), SIMDE_FLOAT32_C( -251.41), SIMDE_FLOAT32_C( 223.24), SIMDE_FLOAT32_C( -528.18), SIMDE_FLOAT32_C( 813.86)), simde_mm_set_ps(SIMDE_FLOAT32_C( 757.18), SIMDE_FLOAT32_C( 318.14), SIMDE_FLOAT32_C( 214.84), SIMDE_FLOAT32_C( 399.19)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -291.83), SIMDE_FLOAT32_C( 903.53), SIMDE_FLOAT32_C( -831.08), SIMDE_FLOAT32_C( -735.05), SIMDE_FLOAT32_C( 5.84), SIMDE_FLOAT32_C( -83.06), SIMDE_FLOAT32_C( 124.68), SIMDE_FLOAT32_C( -448.70), SIMDE_FLOAT32_C( 122.69), SIMDE_FLOAT32_C( 702.71), SIMDE_FLOAT32_C( -972.27), SIMDE_FLOAT32_C( -923.04), SIMDE_FLOAT32_C( 757.18), SIMDE_FLOAT32_C( 641.22), SIMDE_FLOAT32_C( 214.84), SIMDE_FLOAT32_C( 399.19)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -291.83), SIMDE_FLOAT32_C( 903.53), SIMDE_FLOAT32_C( -831.08), SIMDE_FLOAT32_C( -735.05), SIMDE_FLOAT32_C( 5.84), SIMDE_FLOAT32_C( -83.06), SIMDE_FLOAT32_C( 124.68), SIMDE_FLOAT32_C( -448.70), SIMDE_FLOAT32_C( 122.69), SIMDE_FLOAT32_C( 318.14), SIMDE_FLOAT32_C( -972.27), SIMDE_FLOAT32_C( 399.19), SIMDE_FLOAT32_C( -251.41), SIMDE_FLOAT32_C( 641.22), SIMDE_FLOAT32_C( -528.18), SIMDE_FLOAT32_C( 813.86)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -291.83), SIMDE_FLOAT32_C( 903.53), SIMDE_FLOAT32_C( -831.08), SIMDE_FLOAT32_C( -735.05), SIMDE_FLOAT32_C( 5.84), SIMDE_FLOAT32_C( 318.14), SIMDE_FLOAT32_C( 124.68), SIMDE_FLOAT32_C( -448.70), SIMDE_FLOAT32_C( 122.69), SIMDE_FLOAT32_C( 702.71), SIMDE_FLOAT32_C( -972.27), SIMDE_FLOAT32_C( -923.04), SIMDE_FLOAT32_C( -251.41), SIMDE_FLOAT32_C( 641.22), SIMDE_FLOAT32_C( -528.18), SIMDE_FLOAT32_C( 813.86)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -291.83), SIMDE_FLOAT32_C( 318.14), SIMDE_FLOAT32_C( 214.84), SIMDE_FLOAT32_C( -735.05), SIMDE_FLOAT32_C( 5.84), SIMDE_FLOAT32_C( -83.06), SIMDE_FLOAT32_C( 124.68), SIMDE_FLOAT32_C( -448.70), SIMDE_FLOAT32_C( 122.69), SIMDE_FLOAT32_C( 702.71), SIMDE_FLOAT32_C( -972.27), SIMDE_FLOAT32_C( -923.04), SIMDE_FLOAT32_C( -251.41), SIMDE_FLOAT32_C( 641.22), SIMDE_FLOAT32_C( -528.18), SIMDE_FLOAT32_C( 813.86)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -836.14), SIMDE_FLOAT32_C( 203.81), SIMDE_FLOAT32_C( 976.04), SIMDE_FLOAT32_C( -539.32), SIMDE_FLOAT32_C( -223.45), SIMDE_FLOAT32_C( 385.89), SIMDE_FLOAT32_C( 181.57), SIMDE_FLOAT32_C( 739.21), SIMDE_FLOAT32_C( 355.61), SIMDE_FLOAT32_C( -450.77), SIMDE_FLOAT32_C( 343.42), SIMDE_FLOAT32_C( -703.95), SIMDE_FLOAT32_C( 169.29), SIMDE_FLOAT32_C( 932.53), SIMDE_FLOAT32_C( -653.71), SIMDE_FLOAT32_C( 371.73)), UINT16_C(40766), simde_mm512_set_ps(SIMDE_FLOAT32_C( -508.71), SIMDE_FLOAT32_C( -257.73), SIMDE_FLOAT32_C( 937.76), SIMDE_FLOAT32_C( -978.21), SIMDE_FLOAT32_C( 677.79), SIMDE_FLOAT32_C( 768.71), SIMDE_FLOAT32_C( -498.71), SIMDE_FLOAT32_C( 101.56), SIMDE_FLOAT32_C( 211.34), SIMDE_FLOAT32_C( -159.54), SIMDE_FLOAT32_C( 297.01), SIMDE_FLOAT32_C( 147.67), SIMDE_FLOAT32_C( -855.53), SIMDE_FLOAT32_C( 391.08), SIMDE_FLOAT32_C( -710.54), SIMDE_FLOAT32_C( -140.51)), simde_mm_set_ps(SIMDE_FLOAT32_C( 397.77), SIMDE_FLOAT32_C( 348.86), SIMDE_FLOAT32_C( 24.57), SIMDE_FLOAT32_C( -908.02)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -508.71), SIMDE_FLOAT32_C( 203.81), SIMDE_FLOAT32_C( 976.04), SIMDE_FLOAT32_C( -978.21), SIMDE_FLOAT32_C( 677.79), SIMDE_FLOAT32_C( 768.71), SIMDE_FLOAT32_C( -498.71), SIMDE_FLOAT32_C( 101.56), SIMDE_FLOAT32_C( 355.61), SIMDE_FLOAT32_C( -450.77), SIMDE_FLOAT32_C( 297.01), SIMDE_FLOAT32_C( 147.67), SIMDE_FLOAT32_C( 397.77), SIMDE_FLOAT32_C( 348.86), SIMDE_FLOAT32_C( 24.57), SIMDE_FLOAT32_C( 371.73)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -508.71), SIMDE_FLOAT32_C( 203.81), SIMDE_FLOAT32_C( 976.04), SIMDE_FLOAT32_C( -978.21), SIMDE_FLOAT32_C( 677.79), SIMDE_FLOAT32_C( 768.71), SIMDE_FLOAT32_C( -498.71), SIMDE_FLOAT32_C( 101.56), SIMDE_FLOAT32_C( 355.61), SIMDE_FLOAT32_C( -450.77), SIMDE_FLOAT32_C( 24.57), SIMDE_FLOAT32_C( -908.02), SIMDE_FLOAT32_C( -855.53), SIMDE_FLOAT32_C( 391.08), SIMDE_FLOAT32_C( -710.54), SIMDE_FLOAT32_C( 371.73)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -508.71), SIMDE_FLOAT32_C( 203.81), SIMDE_FLOAT32_C( 976.04), SIMDE_FLOAT32_C( -978.21), SIMDE_FLOAT32_C( 397.77), SIMDE_FLOAT32_C( 348.86), SIMDE_FLOAT32_C( 24.57), SIMDE_FLOAT32_C( -908.02), SIMDE_FLOAT32_C( 355.61), SIMDE_FLOAT32_C( -450.77), SIMDE_FLOAT32_C( 297.01), SIMDE_FLOAT32_C( 147.67), SIMDE_FLOAT32_C( -855.53), SIMDE_FLOAT32_C( 391.08), SIMDE_FLOAT32_C( -710.54), SIMDE_FLOAT32_C( 371.73)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 397.77), SIMDE_FLOAT32_C( 203.81), SIMDE_FLOAT32_C( 976.04), SIMDE_FLOAT32_C( -908.02), SIMDE_FLOAT32_C( 677.79), SIMDE_FLOAT32_C( 768.71), SIMDE_FLOAT32_C( -498.71), SIMDE_FLOAT32_C( 101.56), SIMDE_FLOAT32_C( 355.61), SIMDE_FLOAT32_C( -450.77), SIMDE_FLOAT32_C( 297.01), SIMDE_FLOAT32_C( 147.67), SIMDE_FLOAT32_C( -855.53), SIMDE_FLOAT32_C( 391.08), SIMDE_FLOAT32_C( -710.54), SIMDE_FLOAT32_C( 371.73)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r; r = simde_mm512_mask_insertf32x4(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b, 0); simde_assert_m512_close(r, test_vec[i].r0, 1); r = simde_mm512_mask_insertf32x4(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b, 1); simde_assert_m512_close(r, test_vec[i].r1, 1); r = simde_mm512_mask_insertf32x4(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b, 2); simde_assert_m512_close(r, test_vec[i].r2, 1); r = simde_mm512_mask_insertf32x4(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b, 3); simde_assert_m512_close(r, test_vec[i].r3, 1); } return 0; } static int test_simde_mm512_maskz_insertf32x4(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m512 a; simde__m128 b; simde__m512 r0; simde__m512 r1; simde__m512 r2; simde__m512 r3; } test_vec[8] = { { UINT16_C(21335), simde_mm512_set_ps(SIMDE_FLOAT32_C( -556.90), SIMDE_FLOAT32_C( 522.06), SIMDE_FLOAT32_C( 160.98), SIMDE_FLOAT32_C( -932.28), SIMDE_FLOAT32_C( 391.82), SIMDE_FLOAT32_C( 600.12), SIMDE_FLOAT32_C( -569.99), SIMDE_FLOAT32_C( -491.12), SIMDE_FLOAT32_C( -327.63), SIMDE_FLOAT32_C( -139.11), SIMDE_FLOAT32_C( -172.36), SIMDE_FLOAT32_C( -268.86), SIMDE_FLOAT32_C( 393.53), SIMDE_FLOAT32_C( -71.72), SIMDE_FLOAT32_C( 36.69), SIMDE_FLOAT32_C( 98.47)), simde_mm_set_ps(SIMDE_FLOAT32_C( 240.79), SIMDE_FLOAT32_C( -280.30), SIMDE_FLOAT32_C( -839.80), SIMDE_FLOAT32_C( 128.51)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 522.06), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -932.28), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -569.99), SIMDE_FLOAT32_C( -491.12), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -139.11), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -268.86), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -280.30), SIMDE_FLOAT32_C( -839.80), SIMDE_FLOAT32_C( 128.51)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 522.06), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -932.28), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -569.99), SIMDE_FLOAT32_C( -491.12), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -280.30), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 128.51), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -71.72), SIMDE_FLOAT32_C( 36.69), SIMDE_FLOAT32_C( 98.47)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 522.06), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -932.28), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -839.80), SIMDE_FLOAT32_C( 128.51), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -139.11), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -268.86), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -71.72), SIMDE_FLOAT32_C( 36.69), SIMDE_FLOAT32_C( 98.47)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -280.30), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 128.51), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -569.99), SIMDE_FLOAT32_C( -491.12), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -139.11), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -268.86), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -71.72), SIMDE_FLOAT32_C( 36.69), SIMDE_FLOAT32_C( 98.47)) }, { UINT16_C(30312), simde_mm512_set_ps(SIMDE_FLOAT32_C( -736.92), SIMDE_FLOAT32_C( -921.00), SIMDE_FLOAT32_C( 283.06), SIMDE_FLOAT32_C( -471.60), SIMDE_FLOAT32_C( -333.94), SIMDE_FLOAT32_C( 483.08), SIMDE_FLOAT32_C( 903.50), SIMDE_FLOAT32_C( 232.04), SIMDE_FLOAT32_C( -43.35), SIMDE_FLOAT32_C( 774.81), SIMDE_FLOAT32_C( 309.91), SIMDE_FLOAT32_C( -599.01), SIMDE_FLOAT32_C( 846.15), SIMDE_FLOAT32_C( 69.04), SIMDE_FLOAT32_C( -514.56), SIMDE_FLOAT32_C( -149.02)), simde_mm_set_ps(SIMDE_FLOAT32_C( 47.59), SIMDE_FLOAT32_C( 51.33), SIMDE_FLOAT32_C( -895.71), SIMDE_FLOAT32_C( -868.59)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -921.00), SIMDE_FLOAT32_C( 283.06), SIMDE_FLOAT32_C( -471.60), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 483.08), SIMDE_FLOAT32_C( 903.50), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 774.81), SIMDE_FLOAT32_C( 309.91), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 47.59), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -921.00), SIMDE_FLOAT32_C( 283.06), SIMDE_FLOAT32_C( -471.60), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 483.08), SIMDE_FLOAT32_C( 903.50), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 51.33), SIMDE_FLOAT32_C( -895.71), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 846.15), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -921.00), SIMDE_FLOAT32_C( 283.06), SIMDE_FLOAT32_C( -471.60), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 51.33), SIMDE_FLOAT32_C( -895.71), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 774.81), SIMDE_FLOAT32_C( 309.91), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 846.15), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 51.33), SIMDE_FLOAT32_C( -895.71), SIMDE_FLOAT32_C( -868.59), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 483.08), SIMDE_FLOAT32_C( 903.50), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 774.81), SIMDE_FLOAT32_C( 309.91), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 846.15), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C(24099), simde_mm512_set_ps(SIMDE_FLOAT32_C( 39.32), SIMDE_FLOAT32_C( -489.88), SIMDE_FLOAT32_C( 29.68), SIMDE_FLOAT32_C( -37.49), SIMDE_FLOAT32_C( -490.28), SIMDE_FLOAT32_C( -373.66), SIMDE_FLOAT32_C( 841.53), SIMDE_FLOAT32_C( -292.35), SIMDE_FLOAT32_C( 526.21), SIMDE_FLOAT32_C( -835.53), SIMDE_FLOAT32_C( -203.04), SIMDE_FLOAT32_C( 571.79), SIMDE_FLOAT32_C( -80.71), SIMDE_FLOAT32_C( 675.92), SIMDE_FLOAT32_C( 632.01), SIMDE_FLOAT32_C( 490.41)), simde_mm_set_ps(SIMDE_FLOAT32_C( -278.32), SIMDE_FLOAT32_C( -925.69), SIMDE_FLOAT32_C( -744.41), SIMDE_FLOAT32_C( 717.83)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -489.88), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -37.49), SIMDE_FLOAT32_C( -490.28), SIMDE_FLOAT32_C( -373.66), SIMDE_FLOAT32_C( 841.53), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -203.04), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -744.41), SIMDE_FLOAT32_C( 717.83)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -489.88), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -37.49), SIMDE_FLOAT32_C( -490.28), SIMDE_FLOAT32_C( -373.66), SIMDE_FLOAT32_C( 841.53), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -744.41), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 632.01), SIMDE_FLOAT32_C( 490.41)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -489.88), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -37.49), SIMDE_FLOAT32_C( -278.32), SIMDE_FLOAT32_C( -925.69), SIMDE_FLOAT32_C( -744.41), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -203.04), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 632.01), SIMDE_FLOAT32_C( 490.41)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -925.69), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 717.83), SIMDE_FLOAT32_C( -490.28), SIMDE_FLOAT32_C( -373.66), SIMDE_FLOAT32_C( 841.53), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -203.04), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 632.01), SIMDE_FLOAT32_C( 490.41)) }, { UINT16_C(31949), simde_mm512_set_ps(SIMDE_FLOAT32_C( -760.88), SIMDE_FLOAT32_C( 149.72), SIMDE_FLOAT32_C( -617.12), SIMDE_FLOAT32_C( 213.24), SIMDE_FLOAT32_C( -751.58), SIMDE_FLOAT32_C( -577.36), SIMDE_FLOAT32_C( 907.23), SIMDE_FLOAT32_C( 957.37), SIMDE_FLOAT32_C( -359.60), SIMDE_FLOAT32_C( -934.92), SIMDE_FLOAT32_C( -213.75), SIMDE_FLOAT32_C( -657.02), SIMDE_FLOAT32_C( 403.00), SIMDE_FLOAT32_C( -629.37), SIMDE_FLOAT32_C( -198.67), SIMDE_FLOAT32_C( 337.35)), simde_mm_set_ps(SIMDE_FLOAT32_C( -588.28), SIMDE_FLOAT32_C( 846.67), SIMDE_FLOAT32_C( 586.29), SIMDE_FLOAT32_C( 670.52)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 149.72), SIMDE_FLOAT32_C( -617.12), SIMDE_FLOAT32_C( 213.24), SIMDE_FLOAT32_C( -751.58), SIMDE_FLOAT32_C( -577.36), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -359.60), SIMDE_FLOAT32_C( -934.92), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -588.28), SIMDE_FLOAT32_C( 846.67), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 670.52)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 149.72), SIMDE_FLOAT32_C( -617.12), SIMDE_FLOAT32_C( 213.24), SIMDE_FLOAT32_C( -751.58), SIMDE_FLOAT32_C( -577.36), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -588.28), SIMDE_FLOAT32_C( 846.67), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 403.00), SIMDE_FLOAT32_C( -629.37), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 337.35)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 149.72), SIMDE_FLOAT32_C( -617.12), SIMDE_FLOAT32_C( 213.24), SIMDE_FLOAT32_C( -588.28), SIMDE_FLOAT32_C( 846.67), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -359.60), SIMDE_FLOAT32_C( -934.92), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 403.00), SIMDE_FLOAT32_C( -629.37), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 337.35)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 846.67), SIMDE_FLOAT32_C( 586.29), SIMDE_FLOAT32_C( 670.52), SIMDE_FLOAT32_C( -751.58), SIMDE_FLOAT32_C( -577.36), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -359.60), SIMDE_FLOAT32_C( -934.92), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 403.00), SIMDE_FLOAT32_C( -629.37), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 337.35)) }, { UINT16_C(10665), simde_mm512_set_ps(SIMDE_FLOAT32_C( -432.13), SIMDE_FLOAT32_C( 268.99), SIMDE_FLOAT32_C( -842.15), SIMDE_FLOAT32_C( -207.87), SIMDE_FLOAT32_C( 908.84), SIMDE_FLOAT32_C( -765.42), SIMDE_FLOAT32_C( -315.78), SIMDE_FLOAT32_C( 138.83), SIMDE_FLOAT32_C( -86.06), SIMDE_FLOAT32_C( 699.07), SIMDE_FLOAT32_C( -413.85), SIMDE_FLOAT32_C( -143.73), SIMDE_FLOAT32_C( 752.26), SIMDE_FLOAT32_C( 709.96), SIMDE_FLOAT32_C( 609.29), SIMDE_FLOAT32_C( -767.34)), simde_mm_set_ps(SIMDE_FLOAT32_C( 873.30), SIMDE_FLOAT32_C( -820.22), SIMDE_FLOAT32_C( 241.45), SIMDE_FLOAT32_C( -959.11)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -842.15), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 908.84), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 138.83), SIMDE_FLOAT32_C( -86.06), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -413.85), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 873.30), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -959.11)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -842.15), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 908.84), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 138.83), SIMDE_FLOAT32_C( 873.30), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 241.45), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 752.26), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -767.34)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -842.15), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 873.30), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -959.11), SIMDE_FLOAT32_C( -86.06), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -413.85), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 752.26), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -767.34)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 241.45), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 908.84), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 138.83), SIMDE_FLOAT32_C( -86.06), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -413.85), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 752.26), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -767.34)) }, { UINT16_C(12797), simde_mm512_set_ps(SIMDE_FLOAT32_C( -173.80), SIMDE_FLOAT32_C( -492.61), SIMDE_FLOAT32_C( 487.08), SIMDE_FLOAT32_C( -68.16), SIMDE_FLOAT32_C( 180.78), SIMDE_FLOAT32_C( 717.69), SIMDE_FLOAT32_C( -289.23), SIMDE_FLOAT32_C( -663.74), SIMDE_FLOAT32_C( 918.52), SIMDE_FLOAT32_C( 179.29), SIMDE_FLOAT32_C( -422.76), SIMDE_FLOAT32_C( 989.70), SIMDE_FLOAT32_C( -433.33), SIMDE_FLOAT32_C( -695.21), SIMDE_FLOAT32_C( 48.49), SIMDE_FLOAT32_C( -786.23)), simde_mm_set_ps(SIMDE_FLOAT32_C( -712.24), SIMDE_FLOAT32_C( -619.50), SIMDE_FLOAT32_C( 518.12), SIMDE_FLOAT32_C( 952.47)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 487.08), SIMDE_FLOAT32_C( -68.16), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -663.74), SIMDE_FLOAT32_C( 918.52), SIMDE_FLOAT32_C( 179.29), SIMDE_FLOAT32_C( -422.76), SIMDE_FLOAT32_C( 989.70), SIMDE_FLOAT32_C( -712.24), SIMDE_FLOAT32_C( -619.50), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 952.47)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 487.08), SIMDE_FLOAT32_C( -68.16), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -663.74), SIMDE_FLOAT32_C( -712.24), SIMDE_FLOAT32_C( -619.50), SIMDE_FLOAT32_C( 518.12), SIMDE_FLOAT32_C( 952.47), SIMDE_FLOAT32_C( -433.33), SIMDE_FLOAT32_C( -695.21), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -786.23)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 487.08), SIMDE_FLOAT32_C( -68.16), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 952.47), SIMDE_FLOAT32_C( 918.52), SIMDE_FLOAT32_C( 179.29), SIMDE_FLOAT32_C( -422.76), SIMDE_FLOAT32_C( 989.70), SIMDE_FLOAT32_C( -433.33), SIMDE_FLOAT32_C( -695.21), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -786.23)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 518.12), SIMDE_FLOAT32_C( 952.47), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -663.74), SIMDE_FLOAT32_C( 918.52), SIMDE_FLOAT32_C( 179.29), SIMDE_FLOAT32_C( -422.76), SIMDE_FLOAT32_C( 989.70), SIMDE_FLOAT32_C( -433.33), SIMDE_FLOAT32_C( -695.21), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -786.23)) }, { UINT16_C(46928), simde_mm512_set_ps(SIMDE_FLOAT32_C( -282.31), SIMDE_FLOAT32_C( -211.91), SIMDE_FLOAT32_C( -656.10), SIMDE_FLOAT32_C( -925.76), SIMDE_FLOAT32_C( -789.54), SIMDE_FLOAT32_C( -915.62), SIMDE_FLOAT32_C( -790.16), SIMDE_FLOAT32_C( -30.05), SIMDE_FLOAT32_C( -415.61), SIMDE_FLOAT32_C( -70.79), SIMDE_FLOAT32_C( 994.61), SIMDE_FLOAT32_C( 493.65), SIMDE_FLOAT32_C( -659.70), SIMDE_FLOAT32_C( 52.79), SIMDE_FLOAT32_C( 493.30), SIMDE_FLOAT32_C( 835.54)), simde_mm_set_ps(SIMDE_FLOAT32_C( -904.81), SIMDE_FLOAT32_C( 380.83), SIMDE_FLOAT32_C( 50.50), SIMDE_FLOAT32_C( -236.67)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -282.31), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -656.10), SIMDE_FLOAT32_C( -925.76), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -915.62), SIMDE_FLOAT32_C( -790.16), SIMDE_FLOAT32_C( -30.05), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -70.79), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 493.65), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -282.31), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -656.10), SIMDE_FLOAT32_C( -925.76), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -915.62), SIMDE_FLOAT32_C( -790.16), SIMDE_FLOAT32_C( -30.05), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 380.83), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -236.67), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -282.31), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -656.10), SIMDE_FLOAT32_C( -925.76), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 380.83), SIMDE_FLOAT32_C( 50.50), SIMDE_FLOAT32_C( -236.67), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -70.79), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 493.65), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -904.81), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 50.50), SIMDE_FLOAT32_C( -236.67), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -915.62), SIMDE_FLOAT32_C( -790.16), SIMDE_FLOAT32_C( -30.05), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -70.79), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 493.65), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C(30100), simde_mm512_set_ps(SIMDE_FLOAT32_C( -388.47), SIMDE_FLOAT32_C( -102.18), SIMDE_FLOAT32_C( -643.43), SIMDE_FLOAT32_C( 960.00), SIMDE_FLOAT32_C( -331.34), SIMDE_FLOAT32_C( 22.93), SIMDE_FLOAT32_C( 72.67), SIMDE_FLOAT32_C( -395.13), SIMDE_FLOAT32_C( -870.79), SIMDE_FLOAT32_C( 145.63), SIMDE_FLOAT32_C( -722.44), SIMDE_FLOAT32_C( -149.04), SIMDE_FLOAT32_C( 529.44), SIMDE_FLOAT32_C( 214.37), SIMDE_FLOAT32_C( -949.73), SIMDE_FLOAT32_C( -453.25)), simde_mm_set_ps(SIMDE_FLOAT32_C( 602.83), SIMDE_FLOAT32_C( 639.68), SIMDE_FLOAT32_C( 2.55), SIMDE_FLOAT32_C( -783.00)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -102.18), SIMDE_FLOAT32_C( -643.43), SIMDE_FLOAT32_C( 960.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 22.93), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -395.13), SIMDE_FLOAT32_C( -870.79), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -149.04), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 639.68), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -102.18), SIMDE_FLOAT32_C( -643.43), SIMDE_FLOAT32_C( 960.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 22.93), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -395.13), SIMDE_FLOAT32_C( 602.83), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -783.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 214.37), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -102.18), SIMDE_FLOAT32_C( -643.43), SIMDE_FLOAT32_C( 960.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 639.68), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -783.00), SIMDE_FLOAT32_C( -870.79), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -149.04), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 214.37), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 639.68), SIMDE_FLOAT32_C( 2.55), SIMDE_FLOAT32_C( -783.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 22.93), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -395.13), SIMDE_FLOAT32_C( -870.79), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -149.04), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 214.37), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r; r = simde_mm512_maskz_insertf32x4(test_vec[i].k, test_vec[i].a, test_vec[i].b, 0); simde_assert_m512_close(r, test_vec[i].r0, 1); r = simde_mm512_maskz_insertf32x4(test_vec[i].k, test_vec[i].a, test_vec[i].b, 1); simde_assert_m512_close(r, test_vec[i].r1, 1); r = simde_mm512_maskz_insertf32x4(test_vec[i].k, test_vec[i].a, test_vec[i].b, 2); simde_assert_m512_close(r, test_vec[i].r2, 1); r = simde_mm512_maskz_insertf32x4(test_vec[i].k, test_vec[i].a, test_vec[i].b, 3); simde_assert_m512_close(r, test_vec[i].r3, 1); } return 0; } static int test_simde_mm512_insertf64x4(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d a; simde__m256d b; simde__m512d r0; simde__m512d r1; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( -431.26), SIMDE_FLOAT64_C( 481.25), SIMDE_FLOAT64_C( -57.75), SIMDE_FLOAT64_C( -784.26), SIMDE_FLOAT64_C( 438.04), SIMDE_FLOAT64_C( 549.03), SIMDE_FLOAT64_C( 729.46), SIMDE_FLOAT64_C( 582.53)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -465.05), SIMDE_FLOAT64_C( 439.15), SIMDE_FLOAT64_C( -104.57), SIMDE_FLOAT64_C( -28.15)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -431.26), SIMDE_FLOAT64_C( 481.25), SIMDE_FLOAT64_C( -57.75), SIMDE_FLOAT64_C( -784.26), SIMDE_FLOAT64_C( -465.05), SIMDE_FLOAT64_C( 439.15), SIMDE_FLOAT64_C( -104.57), SIMDE_FLOAT64_C( -28.15)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -465.05), SIMDE_FLOAT64_C( 439.15), SIMDE_FLOAT64_C( -104.57), SIMDE_FLOAT64_C( -28.15), SIMDE_FLOAT64_C( 438.04), SIMDE_FLOAT64_C( 549.03), SIMDE_FLOAT64_C( 729.46), SIMDE_FLOAT64_C( 582.53)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -202.49), SIMDE_FLOAT64_C( -470.36), SIMDE_FLOAT64_C( 966.37), SIMDE_FLOAT64_C( 135.20), SIMDE_FLOAT64_C( -563.83), SIMDE_FLOAT64_C( 799.30), SIMDE_FLOAT64_C( 938.85), SIMDE_FLOAT64_C( -576.01)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 355.11), SIMDE_FLOAT64_C( -787.72), SIMDE_FLOAT64_C( 472.82), SIMDE_FLOAT64_C( -703.51)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -202.49), SIMDE_FLOAT64_C( -470.36), SIMDE_FLOAT64_C( 966.37), SIMDE_FLOAT64_C( 135.20), SIMDE_FLOAT64_C( 355.11), SIMDE_FLOAT64_C( -787.72), SIMDE_FLOAT64_C( 472.82), SIMDE_FLOAT64_C( -703.51)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 355.11), SIMDE_FLOAT64_C( -787.72), SIMDE_FLOAT64_C( 472.82), SIMDE_FLOAT64_C( -703.51), SIMDE_FLOAT64_C( -563.83), SIMDE_FLOAT64_C( 799.30), SIMDE_FLOAT64_C( 938.85), SIMDE_FLOAT64_C( -576.01)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 120.10), SIMDE_FLOAT64_C( -64.06), SIMDE_FLOAT64_C( -620.03), SIMDE_FLOAT64_C( 559.81), SIMDE_FLOAT64_C( 185.23), SIMDE_FLOAT64_C( -423.61), SIMDE_FLOAT64_C( -11.91), SIMDE_FLOAT64_C( 407.56)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -743.10), SIMDE_FLOAT64_C( -688.01), SIMDE_FLOAT64_C( 442.76), SIMDE_FLOAT64_C( 931.17)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 120.10), SIMDE_FLOAT64_C( -64.06), SIMDE_FLOAT64_C( -620.03), SIMDE_FLOAT64_C( 559.81), SIMDE_FLOAT64_C( -743.10), SIMDE_FLOAT64_C( -688.01), SIMDE_FLOAT64_C( 442.76), SIMDE_FLOAT64_C( 931.17)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -743.10), SIMDE_FLOAT64_C( -688.01), SIMDE_FLOAT64_C( 442.76), SIMDE_FLOAT64_C( 931.17), SIMDE_FLOAT64_C( 185.23), SIMDE_FLOAT64_C( -423.61), SIMDE_FLOAT64_C( -11.91), SIMDE_FLOAT64_C( 407.56)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 208.07), SIMDE_FLOAT64_C( -94.60), SIMDE_FLOAT64_C( 834.28), SIMDE_FLOAT64_C( 260.50), SIMDE_FLOAT64_C( -859.51), SIMDE_FLOAT64_C( -69.45), SIMDE_FLOAT64_C( 40.36), SIMDE_FLOAT64_C( 95.61)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 193.01), SIMDE_FLOAT64_C( -435.27), SIMDE_FLOAT64_C( -84.06), SIMDE_FLOAT64_C( 298.40)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 208.07), SIMDE_FLOAT64_C( -94.60), SIMDE_FLOAT64_C( 834.28), SIMDE_FLOAT64_C( 260.50), SIMDE_FLOAT64_C( 193.01), SIMDE_FLOAT64_C( -435.27), SIMDE_FLOAT64_C( -84.06), SIMDE_FLOAT64_C( 298.40)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 193.01), SIMDE_FLOAT64_C( -435.27), SIMDE_FLOAT64_C( -84.06), SIMDE_FLOAT64_C( 298.40), SIMDE_FLOAT64_C( -859.51), SIMDE_FLOAT64_C( -69.45), SIMDE_FLOAT64_C( 40.36), SIMDE_FLOAT64_C( 95.61)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -98.08), SIMDE_FLOAT64_C( 464.19), SIMDE_FLOAT64_C( 711.12), SIMDE_FLOAT64_C( 282.83), SIMDE_FLOAT64_C( -774.08), SIMDE_FLOAT64_C( 841.24), SIMDE_FLOAT64_C( -414.07), SIMDE_FLOAT64_C( 79.76)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -157.33), SIMDE_FLOAT64_C( -819.46), SIMDE_FLOAT64_C( 541.44), SIMDE_FLOAT64_C( 112.81)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -98.08), SIMDE_FLOAT64_C( 464.19), SIMDE_FLOAT64_C( 711.12), SIMDE_FLOAT64_C( 282.83), SIMDE_FLOAT64_C( -157.33), SIMDE_FLOAT64_C( -819.46), SIMDE_FLOAT64_C( 541.44), SIMDE_FLOAT64_C( 112.81)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -157.33), SIMDE_FLOAT64_C( -819.46), SIMDE_FLOAT64_C( 541.44), SIMDE_FLOAT64_C( 112.81), SIMDE_FLOAT64_C( -774.08), SIMDE_FLOAT64_C( 841.24), SIMDE_FLOAT64_C( -414.07), SIMDE_FLOAT64_C( 79.76)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 375.27), SIMDE_FLOAT64_C( -966.90), SIMDE_FLOAT64_C( -512.98), SIMDE_FLOAT64_C( -737.78), SIMDE_FLOAT64_C( 664.52), SIMDE_FLOAT64_C( -224.13), SIMDE_FLOAT64_C( 633.65), SIMDE_FLOAT64_C( -834.15)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -2.60), SIMDE_FLOAT64_C( -38.88), SIMDE_FLOAT64_C( 165.88), SIMDE_FLOAT64_C( 218.73)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 375.27), SIMDE_FLOAT64_C( -966.90), SIMDE_FLOAT64_C( -512.98), SIMDE_FLOAT64_C( -737.78), SIMDE_FLOAT64_C( -2.60), SIMDE_FLOAT64_C( -38.88), SIMDE_FLOAT64_C( 165.88), SIMDE_FLOAT64_C( 218.73)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -2.60), SIMDE_FLOAT64_C( -38.88), SIMDE_FLOAT64_C( 165.88), SIMDE_FLOAT64_C( 218.73), SIMDE_FLOAT64_C( 664.52), SIMDE_FLOAT64_C( -224.13), SIMDE_FLOAT64_C( 633.65), SIMDE_FLOAT64_C( -834.15)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 141.08), SIMDE_FLOAT64_C( -832.50), SIMDE_FLOAT64_C( -990.15), SIMDE_FLOAT64_C( 438.46), SIMDE_FLOAT64_C( -887.47), SIMDE_FLOAT64_C( 336.35), SIMDE_FLOAT64_C( -396.24), SIMDE_FLOAT64_C( 99.21)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -99.37), SIMDE_FLOAT64_C( -151.02), SIMDE_FLOAT64_C( 551.65), SIMDE_FLOAT64_C( 155.58)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 141.08), SIMDE_FLOAT64_C( -832.50), SIMDE_FLOAT64_C( -990.15), SIMDE_FLOAT64_C( 438.46), SIMDE_FLOAT64_C( -99.37), SIMDE_FLOAT64_C( -151.02), SIMDE_FLOAT64_C( 551.65), SIMDE_FLOAT64_C( 155.58)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -99.37), SIMDE_FLOAT64_C( -151.02), SIMDE_FLOAT64_C( 551.65), SIMDE_FLOAT64_C( 155.58), SIMDE_FLOAT64_C( -887.47), SIMDE_FLOAT64_C( 336.35), SIMDE_FLOAT64_C( -396.24), SIMDE_FLOAT64_C( 99.21)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 666.32), SIMDE_FLOAT64_C( 364.80), SIMDE_FLOAT64_C( 759.27), SIMDE_FLOAT64_C( -524.19), SIMDE_FLOAT64_C( -726.51), SIMDE_FLOAT64_C( 381.71), SIMDE_FLOAT64_C( 819.12), SIMDE_FLOAT64_C( 145.28)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -179.98), SIMDE_FLOAT64_C( 258.23), SIMDE_FLOAT64_C( 246.22), SIMDE_FLOAT64_C( 97.85)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 666.32), SIMDE_FLOAT64_C( 364.80), SIMDE_FLOAT64_C( 759.27), SIMDE_FLOAT64_C( -524.19), SIMDE_FLOAT64_C( -179.98), SIMDE_FLOAT64_C( 258.23), SIMDE_FLOAT64_C( 246.22), SIMDE_FLOAT64_C( 97.85)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -179.98), SIMDE_FLOAT64_C( 258.23), SIMDE_FLOAT64_C( 246.22), SIMDE_FLOAT64_C( 97.85), SIMDE_FLOAT64_C( -726.51), SIMDE_FLOAT64_C( 381.71), SIMDE_FLOAT64_C( 819.12), SIMDE_FLOAT64_C( 145.28)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r; r = simde_mm512_insertf64x4(test_vec[i].a, test_vec[i].b, 0); simde_assert_m512d_close(r, test_vec[i].r0, 1); r = simde_mm512_insertf64x4(test_vec[i].a, test_vec[i].b, 1); simde_assert_m512d_close(r, test_vec[i].r1, 1); } return 0; } static int test_simde_mm512_mask_insertf64x4(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d src; simde__mmask8 k; simde__m512d a; simde__m256d b; simde__m512d r0; simde__m512d r1; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( 160.98), SIMDE_FLOAT64_C( -932.28), SIMDE_FLOAT64_C( -569.99), SIMDE_FLOAT64_C( -327.63), SIMDE_FLOAT64_C( -172.36), SIMDE_FLOAT64_C( 393.53), SIMDE_FLOAT64_C( 128.51), SIMDE_FLOAT64_C( -556.90)), UINT8_C( 67), simde_mm512_set_pd(SIMDE_FLOAT64_C( 522.06), SIMDE_FLOAT64_C( -932.28), SIMDE_FLOAT64_C( 600.12), SIMDE_FLOAT64_C( -491.12), SIMDE_FLOAT64_C( -139.11), SIMDE_FLOAT64_C( -268.86), SIMDE_FLOAT64_C( -71.72), SIMDE_FLOAT64_C( 98.47)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -280.30), SIMDE_FLOAT64_C( -839.80), SIMDE_FLOAT64_C( 128.51), SIMDE_FLOAT64_C( -556.90)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 160.98), SIMDE_FLOAT64_C( -932.28), SIMDE_FLOAT64_C( -569.99), SIMDE_FLOAT64_C( -327.63), SIMDE_FLOAT64_C( -172.36), SIMDE_FLOAT64_C( 393.53), SIMDE_FLOAT64_C( 128.51), SIMDE_FLOAT64_C( -556.90)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 160.98), SIMDE_FLOAT64_C( -839.80), SIMDE_FLOAT64_C( -569.99), SIMDE_FLOAT64_C( -327.63), SIMDE_FLOAT64_C( -172.36), SIMDE_FLOAT64_C( 393.53), SIMDE_FLOAT64_C( -71.72), SIMDE_FLOAT64_C( 98.47)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 283.06), SIMDE_FLOAT64_C( -333.94), SIMDE_FLOAT64_C( 903.50), SIMDE_FLOAT64_C( 232.04), SIMDE_FLOAT64_C( 309.91), SIMDE_FLOAT64_C( 846.15), SIMDE_FLOAT64_C( -514.56), SIMDE_FLOAT64_C( -736.92)), UINT8_C( 17), simde_mm512_set_pd(SIMDE_FLOAT64_C( -921.00), SIMDE_FLOAT64_C( -471.60), SIMDE_FLOAT64_C( 483.08), SIMDE_FLOAT64_C( 232.04), SIMDE_FLOAT64_C( 774.81), SIMDE_FLOAT64_C( -599.01), SIMDE_FLOAT64_C( 69.04), SIMDE_FLOAT64_C( -149.02)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 51.33), SIMDE_FLOAT64_C( -895.71), SIMDE_FLOAT64_C( -868.59), SIMDE_FLOAT64_C( -736.92)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 283.06), SIMDE_FLOAT64_C( -333.94), SIMDE_FLOAT64_C( 903.50), SIMDE_FLOAT64_C( 232.04), SIMDE_FLOAT64_C( 309.91), SIMDE_FLOAT64_C( 846.15), SIMDE_FLOAT64_C( -514.56), SIMDE_FLOAT64_C( -736.92)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 283.06), SIMDE_FLOAT64_C( -333.94), SIMDE_FLOAT64_C( 903.50), SIMDE_FLOAT64_C( -736.92), SIMDE_FLOAT64_C( 309.91), SIMDE_FLOAT64_C( 846.15), SIMDE_FLOAT64_C( -514.56), SIMDE_FLOAT64_C( -149.02)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 29.68), SIMDE_FLOAT64_C( -490.28), SIMDE_FLOAT64_C( -373.66), SIMDE_FLOAT64_C( 526.21), SIMDE_FLOAT64_C( -925.69), SIMDE_FLOAT64_C( -80.71), SIMDE_FLOAT64_C( 717.83), SIMDE_FLOAT64_C( 39.32)), UINT8_C( 43), simde_mm512_set_pd(SIMDE_FLOAT64_C( -489.88), SIMDE_FLOAT64_C( -37.49), SIMDE_FLOAT64_C( -373.66), SIMDE_FLOAT64_C( -292.35), SIMDE_FLOAT64_C( -835.53), SIMDE_FLOAT64_C( 571.79), SIMDE_FLOAT64_C( 675.92), SIMDE_FLOAT64_C( 490.41)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -925.69), SIMDE_FLOAT64_C( -744.41), SIMDE_FLOAT64_C( 717.83), SIMDE_FLOAT64_C( 39.32)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 29.68), SIMDE_FLOAT64_C( -490.28), SIMDE_FLOAT64_C( -373.66), SIMDE_FLOAT64_C( 526.21), SIMDE_FLOAT64_C( -925.69), SIMDE_FLOAT64_C( -80.71), SIMDE_FLOAT64_C( 717.83), SIMDE_FLOAT64_C( 39.32)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 29.68), SIMDE_FLOAT64_C( -490.28), SIMDE_FLOAT64_C( 717.83), SIMDE_FLOAT64_C( 526.21), SIMDE_FLOAT64_C( -835.53), SIMDE_FLOAT64_C( -80.71), SIMDE_FLOAT64_C( 675.92), SIMDE_FLOAT64_C( 490.41)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -617.12), SIMDE_FLOAT64_C( 213.24), SIMDE_FLOAT64_C( -577.36), SIMDE_FLOAT64_C( 957.37), SIMDE_FLOAT64_C( 846.67), SIMDE_FLOAT64_C( 403.00), SIMDE_FLOAT64_C( -198.67), SIMDE_FLOAT64_C( 447.98)), UINT8_C(120), simde_mm512_set_pd(SIMDE_FLOAT64_C( 149.72), SIMDE_FLOAT64_C( 213.24), SIMDE_FLOAT64_C( -577.36), SIMDE_FLOAT64_C( 957.37), SIMDE_FLOAT64_C( -934.92), SIMDE_FLOAT64_C( -657.02), SIMDE_FLOAT64_C( -629.37), SIMDE_FLOAT64_C( 337.35)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 846.67), SIMDE_FLOAT64_C( 586.29), SIMDE_FLOAT64_C( 670.52), SIMDE_FLOAT64_C( -760.88)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -617.12), SIMDE_FLOAT64_C( 213.24), SIMDE_FLOAT64_C( -577.36), SIMDE_FLOAT64_C( 957.37), SIMDE_FLOAT64_C( 846.67), SIMDE_FLOAT64_C( 403.00), SIMDE_FLOAT64_C( -198.67), SIMDE_FLOAT64_C( 447.98)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -617.12), SIMDE_FLOAT64_C( 586.29), SIMDE_FLOAT64_C( 670.52), SIMDE_FLOAT64_C( -760.88), SIMDE_FLOAT64_C( -934.92), SIMDE_FLOAT64_C( 403.00), SIMDE_FLOAT64_C( -198.67), SIMDE_FLOAT64_C( 447.98)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 268.99), SIMDE_FLOAT64_C( -207.87), SIMDE_FLOAT64_C( -765.42), SIMDE_FLOAT64_C( 138.83), SIMDE_FLOAT64_C( -413.85), SIMDE_FLOAT64_C( 241.45), SIMDE_FLOAT64_C( 609.29), SIMDE_FLOAT64_C( -432.13)), UINT8_C(245), simde_mm512_set_pd(SIMDE_FLOAT64_C( 268.99), SIMDE_FLOAT64_C( -207.87), SIMDE_FLOAT64_C( -765.42), SIMDE_FLOAT64_C( 138.83), SIMDE_FLOAT64_C( 699.07), SIMDE_FLOAT64_C( -143.73), SIMDE_FLOAT64_C( 709.96), SIMDE_FLOAT64_C( -767.34)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -820.22), SIMDE_FLOAT64_C( 241.45), SIMDE_FLOAT64_C( -959.11), SIMDE_FLOAT64_C( -432.13)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 268.99), SIMDE_FLOAT64_C( -207.87), SIMDE_FLOAT64_C( -765.42), SIMDE_FLOAT64_C( 138.83), SIMDE_FLOAT64_C( -413.85), SIMDE_FLOAT64_C( 241.45), SIMDE_FLOAT64_C( 609.29), SIMDE_FLOAT64_C( -432.13)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -820.22), SIMDE_FLOAT64_C( 241.45), SIMDE_FLOAT64_C( -959.11), SIMDE_FLOAT64_C( -432.13), SIMDE_FLOAT64_C( -413.85), SIMDE_FLOAT64_C( -143.73), SIMDE_FLOAT64_C( 609.29), SIMDE_FLOAT64_C( -767.34)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 487.08), SIMDE_FLOAT64_C( -68.16), SIMDE_FLOAT64_C( -289.23), SIMDE_FLOAT64_C( 918.52), SIMDE_FLOAT64_C( -422.76), SIMDE_FLOAT64_C( 518.12), SIMDE_FLOAT64_C( 952.47), SIMDE_FLOAT64_C( -173.80)), UINT8_C( 71), simde_mm512_set_pd(SIMDE_FLOAT64_C( -492.61), SIMDE_FLOAT64_C( -68.16), SIMDE_FLOAT64_C( 717.69), SIMDE_FLOAT64_C( -663.74), SIMDE_FLOAT64_C( 179.29), SIMDE_FLOAT64_C( 989.70), SIMDE_FLOAT64_C( -695.21), SIMDE_FLOAT64_C( -786.23)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -619.50), SIMDE_FLOAT64_C( 518.12), SIMDE_FLOAT64_C( 952.47), SIMDE_FLOAT64_C( -173.80)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 487.08), SIMDE_FLOAT64_C( -68.16), SIMDE_FLOAT64_C( -289.23), SIMDE_FLOAT64_C( 918.52), SIMDE_FLOAT64_C( -422.76), SIMDE_FLOAT64_C( 518.12), SIMDE_FLOAT64_C( 952.47), SIMDE_FLOAT64_C( -173.80)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 487.08), SIMDE_FLOAT64_C( 518.12), SIMDE_FLOAT64_C( -289.23), SIMDE_FLOAT64_C( 918.52), SIMDE_FLOAT64_C( -422.76), SIMDE_FLOAT64_C( 989.70), SIMDE_FLOAT64_C( -695.21), SIMDE_FLOAT64_C( -786.23)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -656.10), SIMDE_FLOAT64_C( -789.54), SIMDE_FLOAT64_C( -790.16), SIMDE_FLOAT64_C( -30.05), SIMDE_FLOAT64_C( 994.61), SIMDE_FLOAT64_C( 50.50), SIMDE_FLOAT64_C( 493.30), SIMDE_FLOAT64_C( 831.29)), UINT8_C( 20), simde_mm512_set_pd(SIMDE_FLOAT64_C( -211.91), SIMDE_FLOAT64_C( -925.76), SIMDE_FLOAT64_C( -915.62), SIMDE_FLOAT64_C( -30.05), SIMDE_FLOAT64_C( -70.79), SIMDE_FLOAT64_C( 493.65), SIMDE_FLOAT64_C( 52.79), SIMDE_FLOAT64_C( 835.54)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 380.83), SIMDE_FLOAT64_C( 50.50), SIMDE_FLOAT64_C( -236.67), SIMDE_FLOAT64_C( -282.31)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -656.10), SIMDE_FLOAT64_C( -789.54), SIMDE_FLOAT64_C( -790.16), SIMDE_FLOAT64_C( -30.05), SIMDE_FLOAT64_C( 994.61), SIMDE_FLOAT64_C( 50.50), SIMDE_FLOAT64_C( 493.30), SIMDE_FLOAT64_C( 831.29)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -656.10), SIMDE_FLOAT64_C( -789.54), SIMDE_FLOAT64_C( -790.16), SIMDE_FLOAT64_C( -282.31), SIMDE_FLOAT64_C( 994.61), SIMDE_FLOAT64_C( 493.65), SIMDE_FLOAT64_C( 493.30), SIMDE_FLOAT64_C( 831.29)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -643.43), SIMDE_FLOAT64_C( -331.34), SIMDE_FLOAT64_C( 22.93), SIMDE_FLOAT64_C( -395.13), SIMDE_FLOAT64_C( 639.68), SIMDE_FLOAT64_C( 2.55), SIMDE_FLOAT64_C( -949.73), SIMDE_FLOAT64_C( -388.47)), UINT8_C( 61), simde_mm512_set_pd(SIMDE_FLOAT64_C( -102.18), SIMDE_FLOAT64_C( 960.00), SIMDE_FLOAT64_C( 22.93), SIMDE_FLOAT64_C( -395.13), SIMDE_FLOAT64_C( 145.63), SIMDE_FLOAT64_C( -149.04), SIMDE_FLOAT64_C( 214.37), SIMDE_FLOAT64_C( -453.25)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 639.68), SIMDE_FLOAT64_C( 2.55), SIMDE_FLOAT64_C( -783.00), SIMDE_FLOAT64_C( -388.47)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -643.43), SIMDE_FLOAT64_C( -331.34), SIMDE_FLOAT64_C( 22.93), SIMDE_FLOAT64_C( -395.13), SIMDE_FLOAT64_C( 639.68), SIMDE_FLOAT64_C( 2.55), SIMDE_FLOAT64_C( -949.73), SIMDE_FLOAT64_C( -388.47)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -643.43), SIMDE_FLOAT64_C( -331.34), SIMDE_FLOAT64_C( -783.00), SIMDE_FLOAT64_C( -388.47), SIMDE_FLOAT64_C( 145.63), SIMDE_FLOAT64_C( -149.04), SIMDE_FLOAT64_C( -949.73), SIMDE_FLOAT64_C( -453.25)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r; r = simde_mm512_mask_insertf64x4(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b, 0); simde_assert_m512d_close(r, test_vec[i].r0, 1); r = simde_mm512_mask_insertf64x4(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b, 1); simde_assert_m512d_close(r, test_vec[i].r1, 1); } return 0; } static int test_simde_mm512_maskz_insertf64x4(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512d a; simde__m256d b; simde__m512d r0; simde__m512d r1; } test_vec[8] = { { UINT8_C( 32), simde_mm512_set_pd(SIMDE_FLOAT64_C( -139.11), SIMDE_FLOAT64_C( -172.36), SIMDE_FLOAT64_C( -268.86), SIMDE_FLOAT64_C( 393.53), SIMDE_FLOAT64_C( -71.72), SIMDE_FLOAT64_C( 36.69), SIMDE_FLOAT64_C( 98.47), SIMDE_FLOAT64_C( -135.52)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 600.12), SIMDE_FLOAT64_C( -569.99), SIMDE_FLOAT64_C( -491.12), SIMDE_FLOAT64_C( -327.63)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -268.86), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -491.12), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(109), simde_mm512_set_pd(SIMDE_FLOAT64_C( 240.79), SIMDE_FLOAT64_C( -280.30), SIMDE_FLOAT64_C( -839.80), SIMDE_FLOAT64_C( 128.51), SIMDE_FLOAT64_C( -556.90), SIMDE_FLOAT64_C( 522.06), SIMDE_FLOAT64_C( 160.98), SIMDE_FLOAT64_C( -932.28)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 69.04), SIMDE_FLOAT64_C( -514.56), SIMDE_FLOAT64_C( -149.02), SIMDE_FLOAT64_C( -860.98)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -280.30), SIMDE_FLOAT64_C( -839.80), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 69.04), SIMDE_FLOAT64_C( -514.56), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -860.98)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -514.56), SIMDE_FLOAT64_C( -149.02), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -556.90), SIMDE_FLOAT64_C( 522.06), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -932.28)) }, { UINT8_C( 64), simde_mm512_set_pd(SIMDE_FLOAT64_C( -333.94), SIMDE_FLOAT64_C( 483.08), SIMDE_FLOAT64_C( 903.50), SIMDE_FLOAT64_C( 232.04), SIMDE_FLOAT64_C( -43.35), SIMDE_FLOAT64_C( 774.81), SIMDE_FLOAT64_C( 309.91), SIMDE_FLOAT64_C( -599.01)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -736.92), SIMDE_FLOAT64_C( -921.00), SIMDE_FLOAT64_C( 283.06), SIMDE_FLOAT64_C( -471.60)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 483.08), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -921.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C( 7), simde_mm512_set_pd(SIMDE_FLOAT64_C( -80.71), SIMDE_FLOAT64_C( 675.92), SIMDE_FLOAT64_C( 632.01), SIMDE_FLOAT64_C( 490.41), SIMDE_FLOAT64_C( 456.89), SIMDE_FLOAT64_C( 47.59), SIMDE_FLOAT64_C( 51.33), SIMDE_FLOAT64_C( -895.71)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 526.21), SIMDE_FLOAT64_C( -835.53), SIMDE_FLOAT64_C( -203.04), SIMDE_FLOAT64_C( 571.79)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -835.53), SIMDE_FLOAT64_C( -203.04), SIMDE_FLOAT64_C( 571.79)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 47.59), SIMDE_FLOAT64_C( 51.33), SIMDE_FLOAT64_C( -895.71)) }, { UINT8_C(100), simde_mm512_set_pd(SIMDE_FLOAT64_C( 717.83), SIMDE_FLOAT64_C( 39.32), SIMDE_FLOAT64_C( -489.88), SIMDE_FLOAT64_C( 29.68), SIMDE_FLOAT64_C( -37.49), SIMDE_FLOAT64_C( -490.28), SIMDE_FLOAT64_C( -373.66), SIMDE_FLOAT64_C( 841.53)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 447.98), SIMDE_FLOAT64_C( -278.32), SIMDE_FLOAT64_C( -925.69), SIMDE_FLOAT64_C( -744.41)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 39.32), SIMDE_FLOAT64_C( -489.88), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -278.32), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -278.32), SIMDE_FLOAT64_C( -925.69), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -490.28), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C( 84), simde_mm512_set_pd(SIMDE_FLOAT64_C( 957.37), SIMDE_FLOAT64_C( -359.60), SIMDE_FLOAT64_C( -934.92), SIMDE_FLOAT64_C( -213.75), SIMDE_FLOAT64_C( -657.02), SIMDE_FLOAT64_C( 403.00), SIMDE_FLOAT64_C( -629.37), SIMDE_FLOAT64_C( -198.67)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 213.24), SIMDE_FLOAT64_C( -751.58), SIMDE_FLOAT64_C( -577.36), SIMDE_FLOAT64_C( 907.23)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -359.60), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -213.75), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -751.58), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -751.58), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 907.23), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 403.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C( 14), simde_mm512_set_pd(SIMDE_FLOAT64_C( -767.34), SIMDE_FLOAT64_C( 878.85), SIMDE_FLOAT64_C( -588.28), SIMDE_FLOAT64_C( 846.67), SIMDE_FLOAT64_C( 586.29), SIMDE_FLOAT64_C( 670.52), SIMDE_FLOAT64_C( -760.88), SIMDE_FLOAT64_C( 149.72)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -143.73), SIMDE_FLOAT64_C( 752.26), SIMDE_FLOAT64_C( 709.96), SIMDE_FLOAT64_C( 609.29)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -143.73), SIMDE_FLOAT64_C( 752.26), SIMDE_FLOAT64_C( 709.96), SIMDE_FLOAT64_C( 0.00)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 586.29), SIMDE_FLOAT64_C( 670.52), SIMDE_FLOAT64_C( -760.88), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(237), simde_mm512_set_pd(SIMDE_FLOAT64_C( -842.15), SIMDE_FLOAT64_C( -207.87), SIMDE_FLOAT64_C( 908.84), SIMDE_FLOAT64_C( -765.42), SIMDE_FLOAT64_C( -315.78), SIMDE_FLOAT64_C( 138.83), SIMDE_FLOAT64_C( -86.06), SIMDE_FLOAT64_C( 699.07)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 241.45), SIMDE_FLOAT64_C( -959.11), SIMDE_FLOAT64_C( -432.13), SIMDE_FLOAT64_C( 268.99)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -842.15), SIMDE_FLOAT64_C( -207.87), SIMDE_FLOAT64_C( 908.84), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 241.45), SIMDE_FLOAT64_C( -959.11), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 268.99)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 241.45), SIMDE_FLOAT64_C( -959.11), SIMDE_FLOAT64_C( -432.13), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -315.78), SIMDE_FLOAT64_C( 138.83), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 699.07)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r; r = simde_mm512_maskz_insertf64x4(test_vec[i].k, test_vec[i].a, test_vec[i].b, 0); simde_assert_m512d_close(r, test_vec[i].r0, 1); r = simde_mm512_maskz_insertf64x4(test_vec[i].k, test_vec[i].a, test_vec[i].b, 1); simde_assert_m512d_close(r, test_vec[i].r1, 1); } return 0; } static int test_simde_mm512_inserti32x4(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m128i b; simde__m512i r0; simde__m512i r1; simde__m512i r2; simde__m512i r3; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C(-1026363374), INT32_C(-1801776439), INT32_C( 145438126), INT32_C(-1306064352), INT32_C( -858736392), INT32_C( 923442479), INT32_C( 1092805562), INT32_C( 1443901717), INT32_C( 1848749100), INT32_C( 1777333881), INT32_C( 1570116932), INT32_C(-1302383354), INT32_C( 1993455974), INT32_C(-2068684593), INT32_C(-1936012201), INT32_C( 1856459607)), simde_mm_set_epi32(INT32_C( 1545554432), INT32_C( 344023940), INT32_C(-1871515754), INT32_C( 951544639)), simde_mm512_set_epi32(INT32_C(-1026363374), INT32_C(-1801776439), INT32_C( 145438126), INT32_C(-1306064352), INT32_C( -858736392), INT32_C( 923442479), INT32_C( 1092805562), INT32_C( 1443901717), INT32_C( 1848749100), INT32_C( 1777333881), INT32_C( 1570116932), INT32_C(-1302383354), INT32_C( 1545554432), INT32_C( 344023940), INT32_C(-1871515754), INT32_C( 951544639)), simde_mm512_set_epi32(INT32_C(-1026363374), INT32_C(-1801776439), INT32_C( 145438126), INT32_C(-1306064352), INT32_C( -858736392), INT32_C( 923442479), INT32_C( 1092805562), INT32_C( 1443901717), INT32_C( 1545554432), INT32_C( 344023940), INT32_C(-1871515754), INT32_C( 951544639), INT32_C( 1993455974), INT32_C(-2068684593), INT32_C(-1936012201), INT32_C( 1856459607)), simde_mm512_set_epi32(INT32_C(-1026363374), INT32_C(-1801776439), INT32_C( 145438126), INT32_C(-1306064352), INT32_C( 1545554432), INT32_C( 344023940), INT32_C(-1871515754), INT32_C( 951544639), INT32_C( 1848749100), INT32_C( 1777333881), INT32_C( 1570116932), INT32_C(-1302383354), INT32_C( 1993455974), INT32_C(-2068684593), INT32_C(-1936012201), INT32_C( 1856459607)), simde_mm512_set_epi32(INT32_C( 1545554432), INT32_C( 344023940), INT32_C(-1871515754), INT32_C( 951544639), INT32_C( -858736392), INT32_C( 923442479), INT32_C( 1092805562), INT32_C( 1443901717), INT32_C( 1848749100), INT32_C( 1777333881), INT32_C( 1570116932), INT32_C(-1302383354), INT32_C( 1993455974), INT32_C(-2068684593), INT32_C(-1936012201), INT32_C( 1856459607)) }, { simde_mm512_set_epi32(INT32_C(-1539616610), INT32_C( 1134735685), INT32_C( 1430356381), INT32_C(-1110068455), INT32_C( -207240031), INT32_C(-1649179267), INT32_C( 2054398444), INT32_C( -483586503), INT32_C(-1481960002), INT32_C( 861125508), INT32_C( -330381203), INT32_C(-1999224530), INT32_C( 1042470181), INT32_C( 1827473477), INT32_C( 298546792), INT32_C(-1630396605)), simde_mm_set_epi32(INT32_C( 223952317), INT32_C( 282198336), INT32_C( 564965997), INT32_C( 169645898)), simde_mm512_set_epi32(INT32_C(-1539616610), INT32_C( 1134735685), INT32_C( 1430356381), INT32_C(-1110068455), INT32_C( -207240031), INT32_C(-1649179267), INT32_C( 2054398444), INT32_C( -483586503), INT32_C(-1481960002), INT32_C( 861125508), INT32_C( -330381203), INT32_C(-1999224530), INT32_C( 223952317), INT32_C( 282198336), INT32_C( 564965997), INT32_C( 169645898)), simde_mm512_set_epi32(INT32_C(-1539616610), INT32_C( 1134735685), INT32_C( 1430356381), INT32_C(-1110068455), INT32_C( -207240031), INT32_C(-1649179267), INT32_C( 2054398444), INT32_C( -483586503), INT32_C( 223952317), INT32_C( 282198336), INT32_C( 564965997), INT32_C( 169645898), INT32_C( 1042470181), INT32_C( 1827473477), INT32_C( 298546792), INT32_C(-1630396605)), simde_mm512_set_epi32(INT32_C(-1539616610), INT32_C( 1134735685), INT32_C( 1430356381), INT32_C(-1110068455), INT32_C( 223952317), INT32_C( 282198336), INT32_C( 564965997), INT32_C( 169645898), INT32_C(-1481960002), INT32_C( 861125508), INT32_C( -330381203), INT32_C(-1999224530), INT32_C( 1042470181), INT32_C( 1827473477), INT32_C( 298546792), INT32_C(-1630396605)), simde_mm512_set_epi32(INT32_C( 223952317), INT32_C( 282198336), INT32_C( 564965997), INT32_C( 169645898), INT32_C( -207240031), INT32_C(-1649179267), INT32_C( 2054398444), INT32_C( -483586503), INT32_C(-1481960002), INT32_C( 861125508), INT32_C( -330381203), INT32_C(-1999224530), INT32_C( 1042470181), INT32_C( 1827473477), INT32_C( 298546792), INT32_C(-1630396605)) }, { simde_mm512_set_epi32(INT32_C( 2066979701), INT32_C( 1094609712), INT32_C( 1345059025), INT32_C( -340318359), INT32_C( 1519671047), INT32_C(-1017461983), INT32_C( 353198331), INT32_C( 1711460779), INT32_C( -919570191), INT32_C( 1974152373), INT32_C( -695949043), INT32_C( -790242624), INT32_C(-1094331335), INT32_C(-1166320093), INT32_C(-2045280751), INT32_C(-2037261521)), simde_mm_set_epi32(INT32_C( -605945909), INT32_C(-2063050181), INT32_C( 1095467003), INT32_C(-2083755741)), simde_mm512_set_epi32(INT32_C( 2066979701), INT32_C( 1094609712), INT32_C( 1345059025), INT32_C( -340318359), INT32_C( 1519671047), INT32_C(-1017461983), INT32_C( 353198331), INT32_C( 1711460779), INT32_C( -919570191), INT32_C( 1974152373), INT32_C( -695949043), INT32_C( -790242624), INT32_C( -605945909), INT32_C(-2063050181), INT32_C( 1095467003), INT32_C(-2083755741)), simde_mm512_set_epi32(INT32_C( 2066979701), INT32_C( 1094609712), INT32_C( 1345059025), INT32_C( -340318359), INT32_C( 1519671047), INT32_C(-1017461983), INT32_C( 353198331), INT32_C( 1711460779), INT32_C( -605945909), INT32_C(-2063050181), INT32_C( 1095467003), INT32_C(-2083755741), INT32_C(-1094331335), INT32_C(-1166320093), INT32_C(-2045280751), INT32_C(-2037261521)), simde_mm512_set_epi32(INT32_C( 2066979701), INT32_C( 1094609712), INT32_C( 1345059025), INT32_C( -340318359), INT32_C( -605945909), INT32_C(-2063050181), INT32_C( 1095467003), INT32_C(-2083755741), INT32_C( -919570191), INT32_C( 1974152373), INT32_C( -695949043), INT32_C( -790242624), INT32_C(-1094331335), INT32_C(-1166320093), INT32_C(-2045280751), INT32_C(-2037261521)), simde_mm512_set_epi32(INT32_C( -605945909), INT32_C(-2063050181), INT32_C( 1095467003), INT32_C(-2083755741), INT32_C( 1519671047), INT32_C(-1017461983), INT32_C( 353198331), INT32_C( 1711460779), INT32_C( -919570191), INT32_C( 1974152373), INT32_C( -695949043), INT32_C( -790242624), INT32_C(-1094331335), INT32_C(-1166320093), INT32_C(-2045280751), INT32_C(-2037261521)) }, { simde_mm512_set_epi32(INT32_C( 533478787), INT32_C( 907615417), INT32_C( -199229058), INT32_C( -91537812), INT32_C( 1375258232), INT32_C( 139748399), INT32_C( 1688468565), INT32_C( 736544549), INT32_C(-1282057552), INT32_C( 795925067), INT32_C( 1720852541), INT32_C(-1423023772), INT32_C(-1185448755), INT32_C( 1549802795), INT32_C( 159583350), INT32_C( 548883180)), simde_mm_set_epi32(INT32_C( 513515868), INT32_C(-1825967755), INT32_C( 822222164), INT32_C(-1689559027)), simde_mm512_set_epi32(INT32_C( 533478787), INT32_C( 907615417), INT32_C( -199229058), INT32_C( -91537812), INT32_C( 1375258232), INT32_C( 139748399), INT32_C( 1688468565), INT32_C( 736544549), INT32_C(-1282057552), INT32_C( 795925067), INT32_C( 1720852541), INT32_C(-1423023772), INT32_C( 513515868), INT32_C(-1825967755), INT32_C( 822222164), INT32_C(-1689559027)), simde_mm512_set_epi32(INT32_C( 533478787), INT32_C( 907615417), INT32_C( -199229058), INT32_C( -91537812), INT32_C( 1375258232), INT32_C( 139748399), INT32_C( 1688468565), INT32_C( 736544549), INT32_C( 513515868), INT32_C(-1825967755), INT32_C( 822222164), INT32_C(-1689559027), INT32_C(-1185448755), INT32_C( 1549802795), INT32_C( 159583350), INT32_C( 548883180)), simde_mm512_set_epi32(INT32_C( 533478787), INT32_C( 907615417), INT32_C( -199229058), INT32_C( -91537812), INT32_C( 513515868), INT32_C(-1825967755), INT32_C( 822222164), INT32_C(-1689559027), INT32_C(-1282057552), INT32_C( 795925067), INT32_C( 1720852541), INT32_C(-1423023772), INT32_C(-1185448755), INT32_C( 1549802795), INT32_C( 159583350), INT32_C( 548883180)), simde_mm512_set_epi32(INT32_C( 513515868), INT32_C(-1825967755), INT32_C( 822222164), INT32_C(-1689559027), INT32_C( 1375258232), INT32_C( 139748399), INT32_C( 1688468565), INT32_C( 736544549), INT32_C(-1282057552), INT32_C( 795925067), INT32_C( 1720852541), INT32_C(-1423023772), INT32_C(-1185448755), INT32_C( 1549802795), INT32_C( 159583350), INT32_C( 548883180)) }, { simde_mm512_set_epi32(INT32_C( 503748315), INT32_C( 1469355417), INT32_C(-1849349632), INT32_C( 1962664621), INT32_C( -646247370), INT32_C( 1258747662), INT32_C( 1838830023), INT32_C( -532007659), INT32_C( -622852205), INT32_C( -839037220), INT32_C( 499633910), INT32_C( -260167255), INT32_C( 884163960), INT32_C( -329275629), INT32_C( -888441293), INT32_C( -707551350)), simde_mm_set_epi32(INT32_C(-1569831145), INT32_C( 338985942), INT32_C( 1701079465), INT32_C( -195770682)), simde_mm512_set_epi32(INT32_C( 503748315), INT32_C( 1469355417), INT32_C(-1849349632), INT32_C( 1962664621), INT32_C( -646247370), INT32_C( 1258747662), INT32_C( 1838830023), INT32_C( -532007659), INT32_C( -622852205), INT32_C( -839037220), INT32_C( 499633910), INT32_C( -260167255), INT32_C(-1569831145), INT32_C( 338985942), INT32_C( 1701079465), INT32_C( -195770682)), simde_mm512_set_epi32(INT32_C( 503748315), INT32_C( 1469355417), INT32_C(-1849349632), INT32_C( 1962664621), INT32_C( -646247370), INT32_C( 1258747662), INT32_C( 1838830023), INT32_C( -532007659), INT32_C(-1569831145), INT32_C( 338985942), INT32_C( 1701079465), INT32_C( -195770682), INT32_C( 884163960), INT32_C( -329275629), INT32_C( -888441293), INT32_C( -707551350)), simde_mm512_set_epi32(INT32_C( 503748315), INT32_C( 1469355417), INT32_C(-1849349632), INT32_C( 1962664621), INT32_C(-1569831145), INT32_C( 338985942), INT32_C( 1701079465), INT32_C( -195770682), INT32_C( -622852205), INT32_C( -839037220), INT32_C( 499633910), INT32_C( -260167255), INT32_C( 884163960), INT32_C( -329275629), INT32_C( -888441293), INT32_C( -707551350)), simde_mm512_set_epi32(INT32_C(-1569831145), INT32_C( 338985942), INT32_C( 1701079465), INT32_C( -195770682), INT32_C( -646247370), INT32_C( 1258747662), INT32_C( 1838830023), INT32_C( -532007659), INT32_C( -622852205), INT32_C( -839037220), INT32_C( 499633910), INT32_C( -260167255), INT32_C( 884163960), INT32_C( -329275629), INT32_C( -888441293), INT32_C( -707551350)) }, { simde_mm512_set_epi32(INT32_C( 1526367108), INT32_C( 722122834), INT32_C( -174985661), INT32_C(-1762469023), INT32_C( 1239606494), INT32_C( -22119232), INT32_C( 1216907749), INT32_C( 654527510), INT32_C(-2043358500), INT32_C( 459072440), INT32_C( -430427651), INT32_C( -272088075), INT32_C( 386072301), INT32_C(-1628984154), INT32_C( 87817524), INT32_C( 1219490517)), simde_mm_set_epi32(INT32_C(-1101477862), INT32_C( 2001101785), INT32_C(-1759250988), INT32_C( -606254738)), simde_mm512_set_epi32(INT32_C( 1526367108), INT32_C( 722122834), INT32_C( -174985661), INT32_C(-1762469023), INT32_C( 1239606494), INT32_C( -22119232), INT32_C( 1216907749), INT32_C( 654527510), INT32_C(-2043358500), INT32_C( 459072440), INT32_C( -430427651), INT32_C( -272088075), INT32_C(-1101477862), INT32_C( 2001101785), INT32_C(-1759250988), INT32_C( -606254738)), simde_mm512_set_epi32(INT32_C( 1526367108), INT32_C( 722122834), INT32_C( -174985661), INT32_C(-1762469023), INT32_C( 1239606494), INT32_C( -22119232), INT32_C( 1216907749), INT32_C( 654527510), INT32_C(-1101477862), INT32_C( 2001101785), INT32_C(-1759250988), INT32_C( -606254738), INT32_C( 386072301), INT32_C(-1628984154), INT32_C( 87817524), INT32_C( 1219490517)), simde_mm512_set_epi32(INT32_C( 1526367108), INT32_C( 722122834), INT32_C( -174985661), INT32_C(-1762469023), INT32_C(-1101477862), INT32_C( 2001101785), INT32_C(-1759250988), INT32_C( -606254738), INT32_C(-2043358500), INT32_C( 459072440), INT32_C( -430427651), INT32_C( -272088075), INT32_C( 386072301), INT32_C(-1628984154), INT32_C( 87817524), INT32_C( 1219490517)), simde_mm512_set_epi32(INT32_C(-1101477862), INT32_C( 2001101785), INT32_C(-1759250988), INT32_C( -606254738), INT32_C( 1239606494), INT32_C( -22119232), INT32_C( 1216907749), INT32_C( 654527510), INT32_C(-2043358500), INT32_C( 459072440), INT32_C( -430427651), INT32_C( -272088075), INT32_C( 386072301), INT32_C(-1628984154), INT32_C( 87817524), INT32_C( 1219490517)) }, { simde_mm512_set_epi32(INT32_C( 2082954477), INT32_C( 1254960767), INT32_C( 1995459397), INT32_C( -11572946), INT32_C(-1087388220), INT32_C( 730787370), INT32_C(-2034110695), INT32_C(-1088138491), INT32_C( -353174912), INT32_C( -362301616), INT32_C( 617951303), INT32_C( 817116152), INT32_C(-1034835761), INT32_C( -102069057), INT32_C( 1774242298), INT32_C( 1089620040)), simde_mm_set_epi32(INT32_C( 159429100), INT32_C( 451955897), INT32_C( 181201098), INT32_C( 450627934)), simde_mm512_set_epi32(INT32_C( 2082954477), INT32_C( 1254960767), INT32_C( 1995459397), INT32_C( -11572946), INT32_C(-1087388220), INT32_C( 730787370), INT32_C(-2034110695), INT32_C(-1088138491), INT32_C( -353174912), INT32_C( -362301616), INT32_C( 617951303), INT32_C( 817116152), INT32_C( 159429100), INT32_C( 451955897), INT32_C( 181201098), INT32_C( 450627934)), simde_mm512_set_epi32(INT32_C( 2082954477), INT32_C( 1254960767), INT32_C( 1995459397), INT32_C( -11572946), INT32_C(-1087388220), INT32_C( 730787370), INT32_C(-2034110695), INT32_C(-1088138491), INT32_C( 159429100), INT32_C( 451955897), INT32_C( 181201098), INT32_C( 450627934), INT32_C(-1034835761), INT32_C( -102069057), INT32_C( 1774242298), INT32_C( 1089620040)), simde_mm512_set_epi32(INT32_C( 2082954477), INT32_C( 1254960767), INT32_C( 1995459397), INT32_C( -11572946), INT32_C( 159429100), INT32_C( 451955897), INT32_C( 181201098), INT32_C( 450627934), INT32_C( -353174912), INT32_C( -362301616), INT32_C( 617951303), INT32_C( 817116152), INT32_C(-1034835761), INT32_C( -102069057), INT32_C( 1774242298), INT32_C( 1089620040)), simde_mm512_set_epi32(INT32_C( 159429100), INT32_C( 451955897), INT32_C( 181201098), INT32_C( 450627934), INT32_C(-1087388220), INT32_C( 730787370), INT32_C(-2034110695), INT32_C(-1088138491), INT32_C( -353174912), INT32_C( -362301616), INT32_C( 617951303), INT32_C( 817116152), INT32_C(-1034835761), INT32_C( -102069057), INT32_C( 1774242298), INT32_C( 1089620040)) }, { simde_mm512_set_epi32(INT32_C( 277470244), INT32_C(-1834748849), INT32_C( 596054477), INT32_C( 1827419510), INT32_C(-1010527612), INT32_C(-1687118128), INT32_C( 107945377), INT32_C( 1174128677), INT32_C(-1544325740), INT32_C( 204417556), INT32_C(-1329665093), INT32_C(-2039025377), INT32_C( 1639231015), INT32_C( 1541217841), INT32_C( 1692413538), INT32_C( 738521275)), simde_mm_set_epi32(INT32_C( 1435935141), INT32_C(-2098236580), INT32_C(-1991433794), INT32_C( 1298943776)), simde_mm512_set_epi32(INT32_C( 277470244), INT32_C(-1834748849), INT32_C( 596054477), INT32_C( 1827419510), INT32_C(-1010527612), INT32_C(-1687118128), INT32_C( 107945377), INT32_C( 1174128677), INT32_C(-1544325740), INT32_C( 204417556), INT32_C(-1329665093), INT32_C(-2039025377), INT32_C( 1435935141), INT32_C(-2098236580), INT32_C(-1991433794), INT32_C( 1298943776)), simde_mm512_set_epi32(INT32_C( 277470244), INT32_C(-1834748849), INT32_C( 596054477), INT32_C( 1827419510), INT32_C(-1010527612), INT32_C(-1687118128), INT32_C( 107945377), INT32_C( 1174128677), INT32_C( 1435935141), INT32_C(-2098236580), INT32_C(-1991433794), INT32_C( 1298943776), INT32_C( 1639231015), INT32_C( 1541217841), INT32_C( 1692413538), INT32_C( 738521275)), simde_mm512_set_epi32(INT32_C( 277470244), INT32_C(-1834748849), INT32_C( 596054477), INT32_C( 1827419510), INT32_C( 1435935141), INT32_C(-2098236580), INT32_C(-1991433794), INT32_C( 1298943776), INT32_C(-1544325740), INT32_C( 204417556), INT32_C(-1329665093), INT32_C(-2039025377), INT32_C( 1639231015), INT32_C( 1541217841), INT32_C( 1692413538), INT32_C( 738521275)), simde_mm512_set_epi32(INT32_C( 1435935141), INT32_C(-2098236580), INT32_C(-1991433794), INT32_C( 1298943776), INT32_C(-1010527612), INT32_C(-1687118128), INT32_C( 107945377), INT32_C( 1174128677), INT32_C(-1544325740), INT32_C( 204417556), INT32_C(-1329665093), INT32_C(-2039025377), INT32_C( 1639231015), INT32_C( 1541217841), INT32_C( 1692413538), INT32_C( 738521275)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r; r = simde_mm512_inserti32x4(test_vec[i].a, test_vec[i].b, 0); simde_assert_m512i_i32(r, ==, test_vec[i].r0); r = simde_mm512_inserti32x4(test_vec[i].a, test_vec[i].b, 1); simde_assert_m512i_i32(r, ==, test_vec[i].r1); r = simde_mm512_inserti32x4(test_vec[i].a, test_vec[i].b, 2); simde_assert_m512i_i32(r, ==, test_vec[i].r2); r = simde_mm512_inserti32x4(test_vec[i].a, test_vec[i].b, 3); simde_assert_m512i_i32(r, ==, test_vec[i].r3); } return 0; } static int test_simde_mm512_mask_inserti32x4(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask16 k; simde__m512i a; simde__m128i b; simde__m512i r0; simde__m512i r1; simde__m512i r2; simde__m512i r3; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C(-1026363374), INT32_C(-1801776439), INT32_C( 145438126), INT32_C(-1306064352), INT32_C( -858736392), INT32_C( 923442479), INT32_C( 1092805562), INT32_C( 1443901717), INT32_C( 1848749100), INT32_C( 1777333881), INT32_C( 1570116932), INT32_C(-1302383354), INT32_C( 1993455974), INT32_C(-2068684593), INT32_C(-1936012201), INT32_C( 1856459607)), UINT16_C(27455), simde_mm512_set_epi32(INT32_C(-1110068455), INT32_C( -207240031), INT32_C(-1649179267), INT32_C( 2054398444), INT32_C( -483586503), INT32_C(-1481960002), INT32_C( 861125508), INT32_C( -330381203), INT32_C(-1999224530), INT32_C( 1042470181), INT32_C( 1827473477), INT32_C( 298546792), INT32_C(-1630396605), INT32_C( 1545554432), INT32_C( 344023940), INT32_C(-1871515754)), simde_mm_set_epi32(INT32_C( 169645898), INT32_C(-1539616610), INT32_C( 1134735685), INT32_C( 1430356381)), simde_mm512_set_epi32(INT32_C(-1026363374), INT32_C( -207240031), INT32_C(-1649179267), INT32_C(-1306064352), INT32_C( -483586503), INT32_C( 923442479), INT32_C( 861125508), INT32_C( -330381203), INT32_C( 1848749100), INT32_C( 1777333881), INT32_C( 1827473477), INT32_C( 298546792), INT32_C( 169645898), INT32_C(-1539616610), INT32_C( 1134735685), INT32_C( 1430356381)), simde_mm512_set_epi32(INT32_C(-1026363374), INT32_C( -207240031), INT32_C(-1649179267), INT32_C(-1306064352), INT32_C( -483586503), INT32_C( 923442479), INT32_C( 861125508), INT32_C( -330381203), INT32_C( 1848749100), INT32_C( 1777333881), INT32_C( 1134735685), INT32_C( 1430356381), INT32_C(-1630396605), INT32_C( 1545554432), INT32_C( 344023940), INT32_C(-1871515754)), simde_mm512_set_epi32(INT32_C(-1026363374), INT32_C( -207240031), INT32_C(-1649179267), INT32_C(-1306064352), INT32_C( 169645898), INT32_C( 923442479), INT32_C( 1134735685), INT32_C( 1430356381), INT32_C( 1848749100), INT32_C( 1777333881), INT32_C( 1827473477), INT32_C( 298546792), INT32_C(-1630396605), INT32_C( 1545554432), INT32_C( 344023940), INT32_C(-1871515754)), simde_mm512_set_epi32(INT32_C(-1026363374), INT32_C(-1539616610), INT32_C( 1134735685), INT32_C(-1306064352), INT32_C( -483586503), INT32_C( 923442479), INT32_C( 861125508), INT32_C( -330381203), INT32_C( 1848749100), INT32_C( 1777333881), INT32_C( 1827473477), INT32_C( 298546792), INT32_C(-1630396605), INT32_C( 1545554432), INT32_C( 344023940), INT32_C(-1871515754)) }, { simde_mm512_set_epi32(INT32_C( -340318359), INT32_C( 1519671047), INT32_C(-1017461983), INT32_C( 353198331), INT32_C( 1711460779), INT32_C( -919570191), INT32_C( 1974152373), INT32_C( -695949043), INT32_C( -790242624), INT32_C(-1094331335), INT32_C(-1166320093), INT32_C(-2045280751), INT32_C(-2037261521), INT32_C( 223952317), INT32_C( 282198336), INT32_C( 564965997)), UINT16_C(63697), simde_mm512_set_epi32(INT32_C( 1688468565), INT32_C( 736544549), INT32_C(-1282057552), INT32_C( 795925067), INT32_C( 1720852541), INT32_C(-1423023772), INT32_C(-1185448755), INT32_C( 1549802795), INT32_C( 159583350), INT32_C( 548883180), INT32_C( -605945909), INT32_C(-2063050181), INT32_C( 1095467003), INT32_C(-2083755741), INT32_C( 2066979701), INT32_C( 1094609712)), simde_mm_set_epi32(INT32_C( -199229058), INT32_C( -91537812), INT32_C( 1375258232), INT32_C( 139748399)), simde_mm512_set_epi32(INT32_C( 1688468565), INT32_C( 736544549), INT32_C(-1282057552), INT32_C( 795925067), INT32_C( 1720852541), INT32_C( -919570191), INT32_C( 1974152373), INT32_C( -695949043), INT32_C( 159583350), INT32_C( 548883180), INT32_C(-1166320093), INT32_C(-2063050181), INT32_C(-2037261521), INT32_C( 223952317), INT32_C( 282198336), INT32_C( 139748399)), simde_mm512_set_epi32(INT32_C( 1688468565), INT32_C( 736544549), INT32_C(-1282057552), INT32_C( 795925067), INT32_C( 1720852541), INT32_C( -919570191), INT32_C( 1974152373), INT32_C( -695949043), INT32_C( -199229058), INT32_C( -91537812), INT32_C(-1166320093), INT32_C( 139748399), INT32_C(-2037261521), INT32_C( 223952317), INT32_C( 282198336), INT32_C( 1094609712)), simde_mm512_set_epi32(INT32_C( 1688468565), INT32_C( 736544549), INT32_C(-1282057552), INT32_C( 795925067), INT32_C( -199229058), INT32_C( -919570191), INT32_C( 1974152373), INT32_C( -695949043), INT32_C( 159583350), INT32_C( 548883180), INT32_C(-1166320093), INT32_C(-2063050181), INT32_C(-2037261521), INT32_C( 223952317), INT32_C( 282198336), INT32_C( 1094609712)), simde_mm512_set_epi32(INT32_C( -199229058), INT32_C( -91537812), INT32_C( 1375258232), INT32_C( 139748399), INT32_C( 1720852541), INT32_C( -919570191), INT32_C( 1974152373), INT32_C( -695949043), INT32_C( 159583350), INT32_C( 548883180), INT32_C(-1166320093), INT32_C(-2063050181), INT32_C(-2037261521), INT32_C( 223952317), INT32_C( 282198336), INT32_C( 1094609712)) }, { simde_mm512_set_epi32(INT32_C( 1838830023), INT32_C( -532007659), INT32_C( -622852205), INT32_C( -839037220), INT32_C( 499633910), INT32_C( -260167255), INT32_C( 884163960), INT32_C( -329275629), INT32_C( -888441293), INT32_C( -707551350), INT32_C( 513515868), INT32_C(-1825967755), INT32_C( 822222164), INT32_C(-1689559027), INT32_C( 533478787), INT32_C( 907615417)), UINT16_C(63246), simde_mm512_set_epi32(INT32_C( 459072440), INT32_C( -430427651), INT32_C( -272088075), INT32_C( 386072301), INT32_C(-1628984154), INT32_C( 87817524), INT32_C( 1219490517), INT32_C(-1569831145), INT32_C( 338985942), INT32_C( 1701079465), INT32_C( -195770682), INT32_C( 503748315), INT32_C( 1469355417), INT32_C(-1849349632), INT32_C( 1962664621), INT32_C( -646247370)), simde_mm_set_epi32(INT32_C( -22119232), INT32_C( 1216907749), INT32_C( 654527510), INT32_C(-2043358500)), simde_mm512_set_epi32(INT32_C( 459072440), INT32_C( -430427651), INT32_C( -272088075), INT32_C( 386072301), INT32_C( 499633910), INT32_C( 87817524), INT32_C( 1219490517), INT32_C(-1569831145), INT32_C( -888441293), INT32_C( -707551350), INT32_C( 513515868), INT32_C(-1825967755), INT32_C( -22119232), INT32_C( 1216907749), INT32_C( 654527510), INT32_C( 907615417)), simde_mm512_set_epi32(INT32_C( 459072440), INT32_C( -430427651), INT32_C( -272088075), INT32_C( 386072301), INT32_C( 499633910), INT32_C( 87817524), INT32_C( 1219490517), INT32_C(-1569831145), INT32_C( -888441293), INT32_C( -707551350), INT32_C( 513515868), INT32_C(-1825967755), INT32_C( 1469355417), INT32_C(-1849349632), INT32_C( 1962664621), INT32_C( 907615417)), simde_mm512_set_epi32(INT32_C( 459072440), INT32_C( -430427651), INT32_C( -272088075), INT32_C( 386072301), INT32_C( 499633910), INT32_C( 1216907749), INT32_C( 654527510), INT32_C(-2043358500), INT32_C( -888441293), INT32_C( -707551350), INT32_C( 513515868), INT32_C(-1825967755), INT32_C( 1469355417), INT32_C(-1849349632), INT32_C( 1962664621), INT32_C( 907615417)), simde_mm512_set_epi32(INT32_C( -22119232), INT32_C( 1216907749), INT32_C( 654527510), INT32_C(-2043358500), INT32_C( 499633910), INT32_C( 87817524), INT32_C( 1219490517), INT32_C(-1569831145), INT32_C( -888441293), INT32_C( -707551350), INT32_C( 513515868), INT32_C(-1825967755), INT32_C( 1469355417), INT32_C(-1849349632), INT32_C( 1962664621), INT32_C( 907615417)) }, { simde_mm512_set_epi32(INT32_C( -362301616), INT32_C( 617951303), INT32_C( 817116152), INT32_C(-1034835761), INT32_C( -102069057), INT32_C( 1774242298), INT32_C( 1089620040), INT32_C(-1101477862), INT32_C( 2001101785), INT32_C(-1759250988), INT32_C( -606254738), INT32_C( 1526367108), INT32_C( 722122834), INT32_C( -174985661), INT32_C(-1762469023), INT32_C( 1239606494)), UINT16_C(64128), simde_mm512_set_epi32(INT32_C( 1639231015), INT32_C( 1541217841), INT32_C( 1692413538), INT32_C( 738521275), INT32_C( 159429100), INT32_C( 451955897), INT32_C( 181201098), INT32_C( 450627934), INT32_C( 2082954477), INT32_C( 1254960767), INT32_C( 1995459397), INT32_C( -11572946), INT32_C(-1087388220), INT32_C( 730787370), INT32_C(-2034110695), INT32_C(-1088138491)), simde_mm_set_epi32(INT32_C(-1544325740), INT32_C( 204417556), INT32_C(-1329665093), INT32_C(-2039025377)), simde_mm512_set_epi32(INT32_C( 1639231015), INT32_C( 1541217841), INT32_C( 1692413538), INT32_C( 738521275), INT32_C( 159429100), INT32_C( 1774242298), INT32_C( 181201098), INT32_C(-1101477862), INT32_C( 2082954477), INT32_C(-1759250988), INT32_C( -606254738), INT32_C( 1526367108), INT32_C( 722122834), INT32_C( -174985661), INT32_C(-1762469023), INT32_C( 1239606494)), simde_mm512_set_epi32(INT32_C( 1639231015), INT32_C( 1541217841), INT32_C( 1692413538), INT32_C( 738521275), INT32_C( 159429100), INT32_C( 1774242298), INT32_C( 181201098), INT32_C(-1101477862), INT32_C(-1544325740), INT32_C(-1759250988), INT32_C( -606254738), INT32_C( 1526367108), INT32_C( 722122834), INT32_C( -174985661), INT32_C(-1762469023), INT32_C( 1239606494)), simde_mm512_set_epi32(INT32_C( 1639231015), INT32_C( 1541217841), INT32_C( 1692413538), INT32_C( 738521275), INT32_C(-1544325740), INT32_C( 1774242298), INT32_C(-1329665093), INT32_C(-1101477862), INT32_C( 2082954477), INT32_C(-1759250988), INT32_C( -606254738), INT32_C( 1526367108), INT32_C( 722122834), INT32_C( -174985661), INT32_C(-1762469023), INT32_C( 1239606494)), simde_mm512_set_epi32(INT32_C(-1544325740), INT32_C( 204417556), INT32_C(-1329665093), INT32_C(-2039025377), INT32_C( 159429100), INT32_C( 1774242298), INT32_C( 181201098), INT32_C(-1101477862), INT32_C( 2082954477), INT32_C(-1759250988), INT32_C( -606254738), INT32_C( 1526367108), INT32_C( 722122834), INT32_C( -174985661), INT32_C(-1762469023), INT32_C( 1239606494)) }, { simde_mm512_set_epi32(INT32_C( 1313258175), INT32_C( 1928049651), INT32_C( 765730488), INT32_C( -85899231), INT32_C( 1435935141), INT32_C(-2098236580), INT32_C(-1991433794), INT32_C( 1298943776), INT32_C( 277470244), INT32_C(-1834748849), INT32_C( 596054477), INT32_C( 1827419510), INT32_C(-1010527612), INT32_C(-1687118128), INT32_C( 107945377), INT32_C( 1174128677)), UINT16_C(52232), simde_mm512_set_epi32(INT32_C( -181070601), INT32_C( 1463729035), INT32_C( 2031968571), INT32_C( 333434400), INT32_C( -637142874), INT32_C( -520435756), INT32_C( -148623413), INT32_C( -692754616), INT32_C(-1908406411), INT32_C( 1391053429), INT32_C( 1767908668), INT32_C( 1117151413), INT32_C( 1466854108), INT32_C( -852914371), INT32_C( -773785464), INT32_C(-2142007253)), simde_mm_set_epi32(INT32_C( 712044568), INT32_C( 1641785760), INT32_C( 1696516135), INT32_C(-1123374630)), simde_mm512_set_epi32(INT32_C( -181070601), INT32_C( 1463729035), INT32_C( 765730488), INT32_C( -85899231), INT32_C( -637142874), INT32_C( -520435756), INT32_C(-1991433794), INT32_C( 1298943776), INT32_C( 277470244), INT32_C(-1834748849), INT32_C( 596054477), INT32_C( 1827419510), INT32_C( 712044568), INT32_C(-1687118128), INT32_C( 107945377), INT32_C( 1174128677)), simde_mm512_set_epi32(INT32_C( -181070601), INT32_C( 1463729035), INT32_C( 765730488), INT32_C( -85899231), INT32_C( -637142874), INT32_C( -520435756), INT32_C(-1991433794), INT32_C( 1298943776), INT32_C( 277470244), INT32_C(-1834748849), INT32_C( 596054477), INT32_C( 1827419510), INT32_C( 1466854108), INT32_C(-1687118128), INT32_C( 107945377), INT32_C( 1174128677)), simde_mm512_set_epi32(INT32_C( -181070601), INT32_C( 1463729035), INT32_C( 765730488), INT32_C( -85899231), INT32_C( 712044568), INT32_C( 1641785760), INT32_C(-1991433794), INT32_C( 1298943776), INT32_C( 277470244), INT32_C(-1834748849), INT32_C( 596054477), INT32_C( 1827419510), INT32_C( 1466854108), INT32_C(-1687118128), INT32_C( 107945377), INT32_C( 1174128677)), simde_mm512_set_epi32(INT32_C( 712044568), INT32_C( 1641785760), INT32_C( 765730488), INT32_C( -85899231), INT32_C( -637142874), INT32_C( -520435756), INT32_C(-1991433794), INT32_C( 1298943776), INT32_C( 277470244), INT32_C(-1834748849), INT32_C( 596054477), INT32_C( 1827419510), INT32_C( 1466854108), INT32_C(-1687118128), INT32_C( 107945377), INT32_C( 1174128677)) }, { simde_mm512_set_epi32(INT32_C( -539592973), INT32_C(-1402526875), INT32_C( -8263463), INT32_C( 478788156), INT32_C( 842200487), INT32_C( -811849174), INT32_C(-1510825074), INT32_C( 1897985966), INT32_C( 1445172644), INT32_C( -193622280), INT32_C( 2097959091), INT32_C( -652080500), INT32_C( 1943026961), INT32_C( 763848022), INT32_C(-2124387583), INT32_C(-1102663841)), UINT16_C(53796), simde_mm512_set_epi32(INT32_C(-1268587914), INT32_C( 1939823644), INT32_C(-1112752789), INT32_C( 2052878307), INT32_C( -856056848), INT32_C(-1218860495), INT32_C( 729621709), INT32_C(-1241407128), INT32_C( 696721321), INT32_C( -603523965), INT32_C( 1730687689), INT32_C( 290786615), INT32_C(-1827031380), INT32_C( 1429317129), INT32_C(-1800615955), INT32_C( -728999228)), simde_mm_set_epi32(INT32_C( -549528402), INT32_C( -323547130), INT32_C(-1395624565), INT32_C(-1905505546)), simde_mm512_set_epi32(INT32_C(-1268587914), INT32_C( 1939823644), INT32_C( -8263463), INT32_C( 2052878307), INT32_C( 842200487), INT32_C( -811849174), INT32_C( 729621709), INT32_C( 1897985966), INT32_C( 1445172644), INT32_C( -193622280), INT32_C( 1730687689), INT32_C( -652080500), INT32_C( 1943026961), INT32_C( -323547130), INT32_C(-2124387583), INT32_C(-1102663841)), simde_mm512_set_epi32(INT32_C(-1268587914), INT32_C( 1939823644), INT32_C( -8263463), INT32_C( 2052878307), INT32_C( 842200487), INT32_C( -811849174), INT32_C( 729621709), INT32_C( 1897985966), INT32_C( 1445172644), INT32_C( -193622280), INT32_C(-1395624565), INT32_C( -652080500), INT32_C( 1943026961), INT32_C( 1429317129), INT32_C(-2124387583), INT32_C(-1102663841)), simde_mm512_set_epi32(INT32_C(-1268587914), INT32_C( 1939823644), INT32_C( -8263463), INT32_C( 2052878307), INT32_C( 842200487), INT32_C( -811849174), INT32_C(-1395624565), INT32_C( 1897985966), INT32_C( 1445172644), INT32_C( -193622280), INT32_C( 1730687689), INT32_C( -652080500), INT32_C( 1943026961), INT32_C( 1429317129), INT32_C(-2124387583), INT32_C(-1102663841)), simde_mm512_set_epi32(INT32_C( -549528402), INT32_C( -323547130), INT32_C( -8263463), INT32_C(-1905505546), INT32_C( 842200487), INT32_C( -811849174), INT32_C( 729621709), INT32_C( 1897985966), INT32_C( 1445172644), INT32_C( -193622280), INT32_C( 1730687689), INT32_C( -652080500), INT32_C( 1943026961), INT32_C( 1429317129), INT32_C(-2124387583), INT32_C(-1102663841)) }, { simde_mm512_set_epi32(INT32_C(-1884003639), INT32_C( -638430290), INT32_C(-2007622482), INT32_C( 171336877), INT32_C( 59553613), INT32_C( 165266600), INT32_C( -798384264), INT32_C( 1607584815), INT32_C(-1324336584), INT32_C(-1668086905), INT32_C( -770469750), INT32_C( 1013231130), INT32_C( 543156562), INT32_C( -399740514), INT32_C( 509655415), INT32_C( -160537509)), UINT16_C(22542), simde_mm512_set_epi32(INT32_C( -364563113), INT32_C( 1520783126), INT32_C( -207159885), INT32_C( -104006691), INT32_C( 362759403), INT32_C(-1562242573), INT32_C( -397133039), INT32_C( 568974515), INT32_C(-1726442446), INT32_C(-2134949944), INT32_C( 1969107101), INT32_C(-2063427243), INT32_C( -670405092), INT32_C(-1879729053), INT32_C( 1035482990), INT32_C( 1183910939)), simde_mm_set_epi32(INT32_C( -521443925), INT32_C(-1464291783), INT32_C(-1686112999), INT32_C(-1290233716)), simde_mm512_set_epi32(INT32_C(-1884003639), INT32_C( 1520783126), INT32_C(-2007622482), INT32_C( -104006691), INT32_C( 362759403), INT32_C( 165266600), INT32_C( -798384264), INT32_C( 1607584815), INT32_C(-1324336584), INT32_C(-1668086905), INT32_C( -770469750), INT32_C( 1013231130), INT32_C( -521443925), INT32_C(-1464291783), INT32_C(-1686112999), INT32_C( -160537509)), simde_mm512_set_epi32(INT32_C(-1884003639), INT32_C( 1520783126), INT32_C(-2007622482), INT32_C( -104006691), INT32_C( 362759403), INT32_C( 165266600), INT32_C( -798384264), INT32_C( 1607584815), INT32_C(-1324336584), INT32_C(-1668086905), INT32_C( -770469750), INT32_C( 1013231130), INT32_C( -670405092), INT32_C(-1879729053), INT32_C( 1035482990), INT32_C( -160537509)), simde_mm512_set_epi32(INT32_C(-1884003639), INT32_C( 1520783126), INT32_C(-2007622482), INT32_C( -104006691), INT32_C( -521443925), INT32_C( 165266600), INT32_C( -798384264), INT32_C( 1607584815), INT32_C(-1324336584), INT32_C(-1668086905), INT32_C( -770469750), INT32_C( 1013231130), INT32_C( -670405092), INT32_C(-1879729053), INT32_C( 1035482990), INT32_C( -160537509)), simde_mm512_set_epi32(INT32_C(-1884003639), INT32_C(-1464291783), INT32_C(-2007622482), INT32_C(-1290233716), INT32_C( 362759403), INT32_C( 165266600), INT32_C( -798384264), INT32_C( 1607584815), INT32_C(-1324336584), INT32_C(-1668086905), INT32_C( -770469750), INT32_C( 1013231130), INT32_C( -670405092), INT32_C(-1879729053), INT32_C( 1035482990), INT32_C( -160537509)) }, { simde_mm512_set_epi32(INT32_C(-1383827159), INT32_C( 1804875719), INT32_C( 1179452315), INT32_C(-1509656190), INT32_C(-1409992701), INT32_C(-1830359468), INT32_C( 635753031), INT32_C( 310246197), INT32_C(-1783943034), INT32_C(-1307643183), INT32_C( -144888334), INT32_C( 621611179), INT32_C( 743650285), INT32_C( 1845744981), INT32_C(-1349190316), INT32_C(-1403674818)), UINT16_C( 4521), simde_mm512_set_epi32(INT32_C( 1055036471), INT32_C( 351897115), INT32_C( 1594003471), INT32_C(-1709813294), INT32_C( -133653364), INT32_C( -51462036), INT32_C( 46796230), INT32_C( 989301899), INT32_C( -691937914), INT32_C( 1667629581), INT32_C( -496700661), INT32_C(-1318801755), INT32_C( 1076515270), INT32_C(-1757573505), INT32_C(-1929379353), INT32_C( -560036292)), simde_mm_set_epi32(INT32_C(-1293286075), INT32_C(-1398303881), INT32_C(-2094713086), INT32_C( 197529411)), simde_mm512_set_epi32(INT32_C(-1383827159), INT32_C( 1804875719), INT32_C( 1179452315), INT32_C(-1709813294), INT32_C(-1409992701), INT32_C(-1830359468), INT32_C( 635753031), INT32_C( 989301899), INT32_C( -691937914), INT32_C(-1307643183), INT32_C( -496700661), INT32_C( 621611179), INT32_C(-1293286075), INT32_C( 1845744981), INT32_C(-1349190316), INT32_C( 197529411)), simde_mm512_set_epi32(INT32_C(-1383827159), INT32_C( 1804875719), INT32_C( 1179452315), INT32_C(-1709813294), INT32_C(-1409992701), INT32_C(-1830359468), INT32_C( 635753031), INT32_C( 989301899), INT32_C(-1293286075), INT32_C(-1307643183), INT32_C(-2094713086), INT32_C( 621611179), INT32_C( 1076515270), INT32_C( 1845744981), INT32_C(-1349190316), INT32_C( -560036292)), simde_mm512_set_epi32(INT32_C(-1383827159), INT32_C( 1804875719), INT32_C( 1179452315), INT32_C(-1709813294), INT32_C(-1409992701), INT32_C(-1830359468), INT32_C( 635753031), INT32_C( 197529411), INT32_C( -691937914), INT32_C(-1307643183), INT32_C( -496700661), INT32_C( 621611179), INT32_C( 1076515270), INT32_C( 1845744981), INT32_C(-1349190316), INT32_C( -560036292)), simde_mm512_set_epi32(INT32_C(-1383827159), INT32_C( 1804875719), INT32_C( 1179452315), INT32_C( 197529411), INT32_C(-1409992701), INT32_C(-1830359468), INT32_C( 635753031), INT32_C( 989301899), INT32_C( -691937914), INT32_C(-1307643183), INT32_C( -496700661), INT32_C( 621611179), INT32_C( 1076515270), INT32_C( 1845744981), INT32_C(-1349190316), INT32_C( -560036292)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r; r = simde_mm512_mask_inserti32x4(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b, 0); simde_assert_m512i_i32(r, ==, test_vec[i].r0); r = simde_mm512_mask_inserti32x4(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b, 1); simde_assert_m512i_i32(r, ==, test_vec[i].r1); r = simde_mm512_mask_inserti32x4(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b, 2); simde_assert_m512i_i32(r, ==, test_vec[i].r2); r = simde_mm512_mask_inserti32x4(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b, 3); simde_assert_m512i_i32(r, ==, test_vec[i].r3); } return 0; } static int test_simde_mm512_maskz_inserti32x4(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m512i a; simde__m128i b; simde__m512i r0; simde__m512i r1; simde__m512i r2; simde__m512i r3; } test_vec[8] = { { UINT16_C(21335), simde_mm512_set_epi32(INT32_C( 951544639), INT32_C(-1026363374), INT32_C(-1801776439), INT32_C( 145438126), INT32_C(-1306064352), INT32_C( -858736392), INT32_C( 923442479), INT32_C( 1092805562), INT32_C( 1443901717), INT32_C( 1848749100), INT32_C( 1777333881), INT32_C( 1570116932), INT32_C(-1302383354), INT32_C( 1993455974), INT32_C(-2068684593), INT32_C(-1936012201)), simde_mm_set_epi32(INT32_C(-1630396605), INT32_C( 1545554432), INT32_C( 344023940), INT32_C(-1871515754)), simde_mm512_set_epi32(INT32_C( 0), INT32_C(-1026363374), INT32_C( 0), INT32_C( 145438126), INT32_C( 0), INT32_C( 0), INT32_C( 923442479), INT32_C( 1092805562), INT32_C( 0), INT32_C( 1848749100), INT32_C( 0), INT32_C( 1570116932), INT32_C( 0), INT32_C( 1545554432), INT32_C( 344023940), INT32_C(-1871515754)), simde_mm512_set_epi32(INT32_C( 0), INT32_C(-1026363374), INT32_C( 0), INT32_C( 145438126), INT32_C( 0), INT32_C( 0), INT32_C( 923442479), INT32_C( 1092805562), INT32_C( 0), INT32_C( 1545554432), INT32_C( 0), INT32_C(-1871515754), INT32_C( 0), INT32_C( 1993455974), INT32_C(-2068684593), INT32_C(-1936012201)), simde_mm512_set_epi32(INT32_C( 0), INT32_C(-1026363374), INT32_C( 0), INT32_C( 145438126), INT32_C( 0), INT32_C( 0), INT32_C( 344023940), INT32_C(-1871515754), INT32_C( 0), INT32_C( 1848749100), INT32_C( 0), INT32_C( 1570116932), INT32_C( 0), INT32_C( 1993455974), INT32_C(-2068684593), INT32_C(-1936012201)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 1545554432), INT32_C( 0), INT32_C(-1871515754), INT32_C( 0), INT32_C( 0), INT32_C( 923442479), INT32_C( 1092805562), INT32_C( 0), INT32_C( 1848749100), INT32_C( 0), INT32_C( 1570116932), INT32_C( 0), INT32_C( 1993455974), INT32_C(-2068684593), INT32_C(-1936012201)) }, { UINT16_C(30312), simde_mm512_set_epi32(INT32_C( 564965997), INT32_C( 169645898), INT32_C(-1539616610), INT32_C( 1134735685), INT32_C( 1430356381), INT32_C(-1110068455), INT32_C( -207240031), INT32_C(-1649179267), INT32_C( 2054398444), INT32_C( -483586503), INT32_C(-1481960002), INT32_C( 861125508), INT32_C( -330381203), INT32_C(-1999224530), INT32_C( 1042470181), INT32_C( 1827473477)), simde_mm_set_epi32(INT32_C(-2045280751), INT32_C(-2037261521), INT32_C( 223952317), INT32_C( 282198336)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 169645898), INT32_C(-1539616610), INT32_C( 1134735685), INT32_C( 0), INT32_C(-1110068455), INT32_C( -207240031), INT32_C( 0), INT32_C( 0), INT32_C( -483586503), INT32_C(-1481960002), INT32_C( 0), INT32_C(-2045280751), INT32_C( 0), INT32_C( 0), INT32_C( 0)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 169645898), INT32_C(-1539616610), INT32_C( 1134735685), INT32_C( 0), INT32_C(-1110068455), INT32_C( -207240031), INT32_C( 0), INT32_C( 0), INT32_C(-2037261521), INT32_C( 223952317), INT32_C( 0), INT32_C( -330381203), INT32_C( 0), INT32_C( 0), INT32_C( 0)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 169645898), INT32_C(-1539616610), INT32_C( 1134735685), INT32_C( 0), INT32_C(-2037261521), INT32_C( 223952317), INT32_C( 0), INT32_C( 0), INT32_C( -483586503), INT32_C(-1481960002), INT32_C( 0), INT32_C( -330381203), INT32_C( 0), INT32_C( 0), INT32_C( 0)), simde_mm512_set_epi32(INT32_C( 0), INT32_C(-2037261521), INT32_C( 223952317), INT32_C( 282198336), INT32_C( 0), INT32_C(-1110068455), INT32_C( -207240031), INT32_C( 0), INT32_C( 0), INT32_C( -483586503), INT32_C(-1481960002), INT32_C( 0), INT32_C( -330381203), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { UINT16_C(24099), simde_mm512_set_epi32(INT32_C(-2063050181), INT32_C( 1095467003), INT32_C(-2083755741), INT32_C( 2066979701), INT32_C( 1094609712), INT32_C( 1345059025), INT32_C( -340318359), INT32_C( 1519671047), INT32_C(-1017461983), INT32_C( 353198331), INT32_C( 1711460779), INT32_C( -919570191), INT32_C( 1974152373), INT32_C( -695949043), INT32_C( -790242624), INT32_C(-1094331335)), simde_mm_set_epi32(INT32_C( 1549802795), INT32_C( 159583350), INT32_C( 548883180), INT32_C( -605945909)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 1095467003), INT32_C( 0), INT32_C( 2066979701), INT32_C( 1094609712), INT32_C( 1345059025), INT32_C( -340318359), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1711460779), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 548883180), INT32_C( -605945909)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 1095467003), INT32_C( 0), INT32_C( 2066979701), INT32_C( 1094609712), INT32_C( 1345059025), INT32_C( -340318359), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 548883180), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -790242624), INT32_C(-1094331335)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 1095467003), INT32_C( 0), INT32_C( 2066979701), INT32_C( 1549802795), INT32_C( 159583350), INT32_C( 548883180), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1711460779), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -790242624), INT32_C(-1094331335)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 159583350), INT32_C( 0), INT32_C( -605945909), INT32_C( 1094609712), INT32_C( 1345059025), INT32_C( -340318359), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1711460779), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -790242624), INT32_C(-1094331335)) }, { UINT16_C(31949), simde_mm512_set_epi32(INT32_C( 513515868), INT32_C(-1825967755), INT32_C( 822222164), INT32_C(-1689559027), INT32_C( 533478787), INT32_C( 907615417), INT32_C( -199229058), INT32_C( -91537812), INT32_C( 1375258232), INT32_C( 139748399), INT32_C( 1688468565), INT32_C( 736544549), INT32_C(-1282057552), INT32_C( 795925067), INT32_C( 1720852541), INT32_C(-1423023772)), simde_mm_set_epi32(INT32_C( 884163960), INT32_C( -329275629), INT32_C( -888441293), INT32_C( -707551350)), simde_mm512_set_epi32(INT32_C( 0), INT32_C(-1825967755), INT32_C( 822222164), INT32_C(-1689559027), INT32_C( 533478787), INT32_C( 907615417), INT32_C( 0), INT32_C( 0), INT32_C( 1375258232), INT32_C( 139748399), INT32_C( 0), INT32_C( 0), INT32_C( 884163960), INT32_C( -329275629), INT32_C( 0), INT32_C( -707551350)), simde_mm512_set_epi32(INT32_C( 0), INT32_C(-1825967755), INT32_C( 822222164), INT32_C(-1689559027), INT32_C( 533478787), INT32_C( 907615417), INT32_C( 0), INT32_C( 0), INT32_C( 884163960), INT32_C( -329275629), INT32_C( 0), INT32_C( 0), INT32_C(-1282057552), INT32_C( 795925067), INT32_C( 0), INT32_C(-1423023772)), simde_mm512_set_epi32(INT32_C( 0), INT32_C(-1825967755), INT32_C( 822222164), INT32_C(-1689559027), INT32_C( 884163960), INT32_C( -329275629), INT32_C( 0), INT32_C( 0), INT32_C( 1375258232), INT32_C( 139748399), INT32_C( 0), INT32_C( 0), INT32_C(-1282057552), INT32_C( 795925067), INT32_C( 0), INT32_C(-1423023772)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( -329275629), INT32_C( -888441293), INT32_C( -707551350), INT32_C( 533478787), INT32_C( 907615417), INT32_C( 0), INT32_C( 0), INT32_C( 1375258232), INT32_C( 139748399), INT32_C( 0), INT32_C( 0), INT32_C(-1282057552), INT32_C( 795925067), INT32_C( 0), INT32_C(-1423023772)) }, { UINT16_C(10665), simde_mm512_set_epi32(INT32_C( 1219490517), INT32_C(-1569831145), INT32_C( 338985942), INT32_C( 1701079465), INT32_C( -195770682), INT32_C( 503748315), INT32_C( 1469355417), INT32_C(-1849349632), INT32_C( 1962664621), INT32_C( -646247370), INT32_C( 1258747662), INT32_C( 1838830023), INT32_C( -532007659), INT32_C( -622852205), INT32_C( -839037220), INT32_C( 499633910)), simde_mm_set_epi32(INT32_C( -272088075), INT32_C( 386072301), INT32_C(-1628984154), INT32_C( 87817524)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 338985942), INT32_C( 0), INT32_C( -195770682), INT32_C( 0), INT32_C( 0), INT32_C(-1849349632), INT32_C( 1962664621), INT32_C( 0), INT32_C( 1258747662), INT32_C( 0), INT32_C( -272088075), INT32_C( 0), INT32_C( 0), INT32_C( 87817524)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 338985942), INT32_C( 0), INT32_C( -195770682), INT32_C( 0), INT32_C( 0), INT32_C(-1849349632), INT32_C( -272088075), INT32_C( 0), INT32_C(-1628984154), INT32_C( 0), INT32_C( -532007659), INT32_C( 0), INT32_C( 0), INT32_C( 499633910)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 338985942), INT32_C( 0), INT32_C( -272088075), INT32_C( 0), INT32_C( 0), INT32_C( 87817524), INT32_C( 1962664621), INT32_C( 0), INT32_C( 1258747662), INT32_C( 0), INT32_C( -532007659), INT32_C( 0), INT32_C( 0), INT32_C( 499633910)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C(-1628984154), INT32_C( 0), INT32_C( -195770682), INT32_C( 0), INT32_C( 0), INT32_C(-1849349632), INT32_C( 1962664621), INT32_C( 0), INT32_C( 1258747662), INT32_C( 0), INT32_C( -532007659), INT32_C( 0), INT32_C( 0), INT32_C( 499633910)) }, { UINT16_C(12797), simde_mm512_set_epi32(INT32_C( 1774242298), INT32_C( 1089620040), INT32_C(-1101477862), INT32_C( 2001101785), INT32_C(-1759250988), INT32_C( -606254738), INT32_C( 1526367108), INT32_C( 722122834), INT32_C( -174985661), INT32_C(-1762469023), INT32_C( 1239606494), INT32_C( -22119232), INT32_C( 1216907749), INT32_C( 654527510), INT32_C(-2043358500), INT32_C( 459072440)), simde_mm_set_epi32(INT32_C( 617951303), INT32_C( 817116152), INT32_C(-1034835761), INT32_C( -102069057)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C(-1101477862), INT32_C( 2001101785), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 722122834), INT32_C( -174985661), INT32_C(-1762469023), INT32_C( 1239606494), INT32_C( -22119232), INT32_C( 617951303), INT32_C( 817116152), INT32_C( 0), INT32_C( -102069057)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C(-1101477862), INT32_C( 2001101785), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 722122834), INT32_C( 617951303), INT32_C( 817116152), INT32_C(-1034835761), INT32_C( -102069057), INT32_C( 1216907749), INT32_C( 654527510), INT32_C( 0), INT32_C( 459072440)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C(-1101477862), INT32_C( 2001101785), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -102069057), INT32_C( -174985661), INT32_C(-1762469023), INT32_C( 1239606494), INT32_C( -22119232), INT32_C( 1216907749), INT32_C( 654527510), INT32_C( 0), INT32_C( 459072440)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C(-1034835761), INT32_C( -102069057), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 722122834), INT32_C( -174985661), INT32_C(-1762469023), INT32_C( 1239606494), INT32_C( -22119232), INT32_C( 1216907749), INT32_C( 654527510), INT32_C( 0), INT32_C( 459072440)) }, { UINT16_C(46928), simde_mm512_set_epi32(INT32_C( 1541217841), INT32_C( 1692413538), INT32_C( 738521275), INT32_C( 159429100), INT32_C( 451955897), INT32_C( 181201098), INT32_C( 450627934), INT32_C( 2082954477), INT32_C( 1254960767), INT32_C( 1995459397), INT32_C( -11572946), INT32_C(-1087388220), INT32_C( 730787370), INT32_C(-2034110695), INT32_C(-1088138491), INT32_C( -353174912)), simde_mm_set_epi32(INT32_C( 204417556), INT32_C(-1329665093), INT32_C(-2039025377), INT32_C( 1639231015)), simde_mm512_set_epi32(INT32_C( 1541217841), INT32_C( 0), INT32_C( 738521275), INT32_C( 159429100), INT32_C( 0), INT32_C( 181201098), INT32_C( 450627934), INT32_C( 2082954477), INT32_C( 0), INT32_C( 1995459397), INT32_C( 0), INT32_C(-1087388220), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)), simde_mm512_set_epi32(INT32_C( 1541217841), INT32_C( 0), INT32_C( 738521275), INT32_C( 159429100), INT32_C( 0), INT32_C( 181201098), INT32_C( 450627934), INT32_C( 2082954477), INT32_C( 0), INT32_C(-1329665093), INT32_C( 0), INT32_C( 1639231015), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)), simde_mm512_set_epi32(INT32_C( 1541217841), INT32_C( 0), INT32_C( 738521275), INT32_C( 159429100), INT32_C( 0), INT32_C(-1329665093), INT32_C(-2039025377), INT32_C( 1639231015), INT32_C( 0), INT32_C( 1995459397), INT32_C( 0), INT32_C(-1087388220), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)), simde_mm512_set_epi32(INT32_C( 204417556), INT32_C( 0), INT32_C(-2039025377), INT32_C( 1639231015), INT32_C( 0), INT32_C( 181201098), INT32_C( 450627934), INT32_C( 2082954477), INT32_C( 0), INT32_C( 1995459397), INT32_C( 0), INT32_C(-1087388220), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { UINT16_C(30100), simde_mm512_set_epi32(INT32_C( 1313258175), INT32_C( 1928049651), INT32_C( 765730488), INT32_C( -85899231), INT32_C( 1435935141), INT32_C(-2098236580), INT32_C(-1991433794), INT32_C( 1298943776), INT32_C( 277470244), INT32_C(-1834748849), INT32_C( 596054477), INT32_C( 1827419510), INT32_C(-1010527612), INT32_C(-1687118128), INT32_C( 107945377), INT32_C( 1174128677)), simde_mm_set_epi32(INT32_C( -852914371), INT32_C( -773785464), INT32_C(-2142007253), INT32_C( 466013192)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 1928049651), INT32_C( 765730488), INT32_C( -85899231), INT32_C( 0), INT32_C(-2098236580), INT32_C( 0), INT32_C( 1298943776), INT32_C( 277470244), INT32_C( 0), INT32_C( 0), INT32_C( 1827419510), INT32_C( 0), INT32_C( -773785464), INT32_C( 0), INT32_C( 0)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 1928049651), INT32_C( 765730488), INT32_C( -85899231), INT32_C( 0), INT32_C(-2098236580), INT32_C( 0), INT32_C( 1298943776), INT32_C( -852914371), INT32_C( 0), INT32_C( 0), INT32_C( 466013192), INT32_C( 0), INT32_C(-1687118128), INT32_C( 0), INT32_C( 0)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 1928049651), INT32_C( 765730488), INT32_C( -85899231), INT32_C( 0), INT32_C( -773785464), INT32_C( 0), INT32_C( 466013192), INT32_C( 277470244), INT32_C( 0), INT32_C( 0), INT32_C( 1827419510), INT32_C( 0), INT32_C(-1687118128), INT32_C( 0), INT32_C( 0)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( -773785464), INT32_C(-2142007253), INT32_C( 466013192), INT32_C( 0), INT32_C(-2098236580), INT32_C( 0), INT32_C( 1298943776), INT32_C( 277470244), INT32_C( 0), INT32_C( 0), INT32_C( 1827419510), INT32_C( 0), INT32_C(-1687118128), INT32_C( 0), INT32_C( 0)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r; r = simde_mm512_maskz_inserti32x4(test_vec[i].k, test_vec[i].a, test_vec[i].b, 0); simde_assert_m512i_i32(r, ==, test_vec[i].r0); r = simde_mm512_maskz_inserti32x4(test_vec[i].k, test_vec[i].a, test_vec[i].b, 1); simde_assert_m512i_i32(r, ==, test_vec[i].r1); r = simde_mm512_maskz_inserti32x4(test_vec[i].k, test_vec[i].a, test_vec[i].b, 2); simde_assert_m512i_i32(r, ==, test_vec[i].r2); r = simde_mm512_maskz_inserti32x4(test_vec[i].k, test_vec[i].a, test_vec[i].b, 3); simde_assert_m512i_i32(r, ==, test_vec[i].r3); } return 0; } static int test_simde_mm512_inserti64x4(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m256i b; simde__m512i r0; simde__m512i r1; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C( 4022993628330696330), INT64_C( -564047204985781920), INT64_C( 4934063986128071877), INT64_C( 8258886799903261224), INT64_C( 5245738308211416456), INT64_C( 8690736315259258337), INT64_C(-5183161890921602420), INT64_C(-2495336383094170141)), simde_mm256_set_epi64x(INT64_C(-5948029684411535130), INT64_C(-4862378680423071053), INT64_C( 7355766231574189317), INT64_C( -310150959079746096)), simde_mm512_set_epi64(INT64_C( 4022993628330696330), INT64_C( -564047204985781920), INT64_C( 4934063986128071877), INT64_C( 8258886799903261224), INT64_C(-5948029684411535130), INT64_C(-4862378680423071053), INT64_C( 7355766231574189317), INT64_C( -310150959079746096)), simde_mm512_set_epi64(INT64_C(-5948029684411535130), INT64_C(-4862378680423071053), INT64_C( 7355766231574189317), INT64_C( -310150959079746096), INT64_C( 5245738308211416456), INT64_C( 8690736315259258337), INT64_C(-5183161890921602420), INT64_C(-2495336383094170141)) }, { simde_mm512_set_epi64(INT64_C( 1295774678670654457), INT64_C(-8851107363323835123), INT64_C( 2369486750103851747), INT64_C(-5139586436110975467), INT64_C(-8115609027568940125), INT64_C( 3504612124823893047), INT64_C(-7514888466798804666), INT64_C( 9113506312589344178)), simde_mm256_set_epi64x(INT64_C(-7443148953768886026), INT64_C( 8448077846545567514), INT64_C(-7304267332935478206), INT64_C(-1528489088828046422)), simde_mm512_set_epi64(INT64_C( 1295774678670654457), INT64_C(-8851107363323835123), INT64_C( 2369486750103851747), INT64_C(-5139586436110975467), INT64_C(-7443148953768886026), INT64_C( 8448077846545567514), INT64_C(-7304267332935478206), INT64_C(-1528489088828046422)), simde_mm512_set_epi64(INT64_C(-7443148953768886026), INT64_C( 8448077846545567514), INT64_C(-7304267332935478206), INT64_C(-1528489088828046422), INT64_C(-8115609027568940125), INT64_C( 3504612124823893047), INT64_C(-7514888466798804666), INT64_C( 9113506312589344178)) }, { simde_mm512_set_epi64(INT64_C(-3094219001013742557), INT64_C(-3379016320921474793), INT64_C( 7772273849745001049), INT64_C(-4229480058937372017), INT64_C( 8318730560275653847), INT64_C(-2664412856586094061), INT64_C( 2083707536546841162), INT64_C( 5404230241318444880)), simde_mm256_set_epi64x(INT64_C( 9199350188047982974), INT64_C(-7693432910203882071), INT64_C(-5762072963977532348), INT64_C( 4491924425059371454)), simde_mm512_set_epi64(INT64_C(-3094219001013742557), INT64_C(-3379016320921474793), INT64_C( 7772273849745001049), INT64_C(-4229480058937372017), INT64_C( 9199350188047982974), INT64_C(-7693432910203882071), INT64_C(-5762072963977532348), INT64_C( 4491924425059371454)), simde_mm512_set_epi64(INT64_C( 9199350188047982974), INT64_C(-7693432910203882071), INT64_C(-5762072963977532348), INT64_C( 4491924425059371454), INT64_C( 8318730560275653847), INT64_C(-2664412856586094061), INT64_C( 2083707536546841162), INT64_C( 5404230241318444880)) }, { simde_mm512_set_epi64(INT64_C( 2522518958303333112), INT64_C(-1668307566098600867), INT64_C( 8306832211054389426), INT64_C(-4135341282024622606), INT64_C(-7922172549839933132), INT64_C( 90826243433254935), INT64_C( 1037923706586637130), INT64_C( 5568688997300093349)), simde_mm256_set_epi64x(INT64_C( 7563354526679147255), INT64_C(-6952412028107066884), INT64_C(-3077616107881632928), INT64_C(-2220298267656761827)), simde_mm512_set_epi64(INT64_C( 2522518958303333112), INT64_C(-1668307566098600867), INT64_C( 8306832211054389426), INT64_C(-4135341282024622606), INT64_C( 7563354526679147255), INT64_C(-6952412028107066884), INT64_C(-3077616107881632928), INT64_C(-2220298267656761827)), simde_mm512_set_epi64(INT64_C( 7563354526679147255), INT64_C(-6952412028107066884), INT64_C(-3077616107881632928), INT64_C(-2220298267656761827), INT64_C(-7922172549839933132), INT64_C( 90826243433254935), INT64_C( 1037923706586637130), INT64_C( 5568688997300093349)) }, { simde_mm512_set_epi64(INT64_C(-6876215301736363293), INT64_C(-2253243373865166954), INT64_C( 5866706473820467911), INT64_C(-1945184283153250111), INT64_C(-6043663531296462836), INT64_C(-3201199251206898425), INT64_C(-7517867743898200758), INT64_C( 5023666877462679332)), simde_mm256_set_epi64x(INT64_C( 1419500527032411112), INT64_C( 5424087511148175828), INT64_C(-4780701435803039630), INT64_C( 6069825193561024149)), simde_mm512_set_epi64(INT64_C(-6876215301736363293), INT64_C(-2253243373865166954), INT64_C( 5866706473820467911), INT64_C(-1945184283153250111), INT64_C( 1419500527032411112), INT64_C( 5424087511148175828), INT64_C(-4780701435803039630), INT64_C( 6069825193561024149)), simde_mm512_set_epi64(INT64_C( 1419500527032411112), INT64_C( 5424087511148175828), INT64_C(-4780701435803039630), INT64_C( 6069825193561024149), INT64_C(-6043663531296462836), INT64_C(-3201199251206898425), INT64_C(-7517867743898200758), INT64_C( 5023666877462679332)) }, { simde_mm512_set_epi64(INT64_C( 5698026186558744964), INT64_C( 6269499859520580584), INT64_C( 7385991043015762011), INT64_C( 2173549174497415259), INT64_C( 135432210503006619), INT64_C(-7059566968128636366), INT64_C(-1295026765047609725), INT64_C( 5447800525707046939)), simde_mm256_set_epi64x(INT64_C(-7404082530836275478), INT64_C(-6011864495242619751), INT64_C(-2639903919112693390), INT64_C( 3391502071027493622)), simde_mm512_set_epi64(INT64_C( 5698026186558744964), INT64_C( 6269499859520580584), INT64_C( 7385991043015762011), INT64_C( 2173549174497415259), INT64_C(-7404082530836275478), INT64_C(-6011864495242619751), INT64_C(-2639903919112693390), INT64_C( 3391502071027493622)), simde_mm512_set_epi64(INT64_C(-7404082530836275478), INT64_C(-6011864495242619751), INT64_C(-2639903919112693390), INT64_C( 3391502071027493622), INT64_C( 135432210503006619), INT64_C(-7059566968128636366), INT64_C(-1295026765047609725), INT64_C( 5447800525707046939)) }, { simde_mm512_set_epi64(INT64_C(-7778482448656032654), INT64_C(-7388935565641111344), INT64_C( 2154583157079273400), INT64_C( 4649728279138736034), INT64_C( 1896125478609903946), INT64_C( 6795120210135498653), INT64_C(-8532964392806396349), INT64_C(-8044512602622188161)), simde_mm256_set_epi64x(INT64_C(-7828848640852632692), INT64_C(-9058711782958006347), INT64_C(-6631984369075385878), INT64_C( 312385656423386943)), simde_mm512_set_epi64(INT64_C(-7778482448656032654), INT64_C(-7388935565641111344), INT64_C( 2154583157079273400), INT64_C( 4649728279138736034), INT64_C(-7828848640852632692), INT64_C(-9058711782958006347), INT64_C(-6631984369075385878), INT64_C( 312385656423386943)), simde_mm512_set_epi64(INT64_C(-7828848640852632692), INT64_C(-9058711782958006347), INT64_C(-6631984369075385878), INT64_C( 312385656423386943), INT64_C( 1896125478609903946), INT64_C( 6795120210135498653), INT64_C(-8532964392806396349), INT64_C(-8044512602622188161)) }, { simde_mm512_set_epi64(INT64_C(-8313572030703954107), INT64_C( 8197482817575228316), INT64_C( 7734796813438689885), INT64_C(-7308014241195865956), INT64_C( 8618855955534148826), INT64_C(-1358620153905394442), INT64_C(-6300735262609682931), INT64_C(-6423460006708841158)), simde_mm256_set_epi64x(INT64_C( 2330675318709913935), INT64_C( 8512876982035459145), INT64_C( -754385814369639096), INT64_C(-5873166547629617678)), simde_mm512_set_epi64(INT64_C(-8313572030703954107), INT64_C( 8197482817575228316), INT64_C( 7734796813438689885), INT64_C(-7308014241195865956), INT64_C( 2330675318709913935), INT64_C( 8512876982035459145), INT64_C( -754385814369639096), INT64_C(-5873166547629617678)), simde_mm512_set_epi64(INT64_C( 2330675318709913935), INT64_C( 8512876982035459145), INT64_C( -754385814369639096), INT64_C(-5873166547629617678), INT64_C( 8618855955534148826), INT64_C(-1358620153905394442), INT64_C(-6300735262609682931), INT64_C(-6423460006708841158)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r; r = simde_mm512_inserti64x4(test_vec[i].a, test_vec[i].b, 0); simde_assert_m512i_i64(r, ==, test_vec[i].r0); r = simde_mm512_inserti64x4(test_vec[i].a, test_vec[i].b, 1); simde_assert_m512i_i64(r, ==, test_vec[i].r1); } return 0; } static int test_simde_mm512_mask_inserti64x4(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask8 k; simde__m512i a; simde__m256i b; simde__m512i r0; simde__m512i r1; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C(-4408197122649025847), INT64_C( 624651997750430240), INT64_C(-3688244718601593553), INT64_C( 4693564151120802069), INT64_C( 7940316924786767481), INT64_C( 6743600876828439814), INT64_C( 8561828216572109007), INT64_C(-8315109086095518889)), UINT8_C( 47), simde_mm512_set_epi64(INT64_C( -890089152921238147), INT64_C( 8823574133744668217), INT64_C(-6364969741708969084), INT64_C(-1418976459802394322), INT64_C( 4477375336277674053), INT64_C( 1282248710630285123), INT64_C( 6638105739971879812), INT64_C(-8038098956427236545)), simde_mm256_set_epi64x(INT64_C( 961867877660623168), INT64_C( 2426510480636680010), INT64_C(-6612602987193650875), INT64_C( 6143333881204814617)), simde_mm512_set_epi64(INT64_C(-4408197122649025847), INT64_C( 624651997750430240), INT64_C(-6364969741708969084), INT64_C( 4693564151120802069), INT64_C( 961867877660623168), INT64_C( 2426510480636680010), INT64_C(-6612602987193650875), INT64_C( 6143333881204814617)), simde_mm512_set_epi64(INT64_C(-4408197122649025847), INT64_C( 624651997750430240), INT64_C(-6612602987193650875), INT64_C( 4693564151120802069), INT64_C( 4477375336277674053), INT64_C( 1282248710630285123), INT64_C( 6638105739971879812), INT64_C(-8038098956427236545)) }, { simde_mm512_set_epi64(INT64_C(-8949662758380266635), INT64_C( 4701312916269037777), INT64_C(-1461656220613716217), INT64_C(-4369965941555109637), INT64_C( 7350668077567080689), INT64_C( 8478919882954811661), INT64_C(-3394066222784588743), INT64_C(-5009306653852991983)), UINT8_C( 51), simde_mm512_set_epi64(INT64_C( -393151907512138120), INT64_C( 600214805061827669), INT64_C( 3163434753014979248), INT64_C( 3418472134552461373), INT64_C(-6111840559061041971), INT64_C( 6656352319933975670), INT64_C( 2357435311113502667), INT64_C(-8860733056306413573)), simde_mm256_set_epi64x(INT64_C(-3038909907977133732), INT64_C(-7842471790453318316), INT64_C(-7256600765093102205), INT64_C( 3898178537456140670)), simde_mm512_set_epi64(INT64_C(-8949662758380266635), INT64_C( 4701312916269037777), INT64_C( 3163434753014979248), INT64_C( 3418472134552461373), INT64_C( 7350668077567080689), INT64_C( 8478919882954811661), INT64_C(-7256600765093102205), INT64_C( 3898178537456140670)), simde_mm512_set_epi64(INT64_C(-8949662758380266635), INT64_C( 4701312916269037777), INT64_C(-7256600765093102205), INT64_C( 3898178537456140670), INT64_C( 7350668077567080689), INT64_C( 8478919882954811661), INT64_C( 2357435311113502667), INT64_C(-8860733056306413573)) }, { simde_mm512_set_epi64(INT64_C( 7306080674171373254), INT64_C( 2163582539809461657), INT64_C(-7942896186346970451), INT64_C(-2775611318017263858), INT64_C( 7897714815450887445), INT64_C(-2675129847260557604), INT64_C( 2145911307457407401), INT64_C( 3797455296467543827)), UINT8_C(191), simde_mm512_set_epi64(INT64_C( -751557688731444383), INT64_C( 5324069355912068288), INT64_C( 5226578984858504214), INT64_C(-8776157931044543560), INT64_C(-1848672680316222475), INT64_C( 1658167909352451238), INT64_C( 377173394815185621), INT64_C(-6742373427678247978)), simde_mm256_set_epi64x(INT64_C( 7620312646179506248), INT64_C(-4730811392556899367), INT64_C(-7555925455226975890), INT64_C( 6555696811272222802)), simde_mm512_set_epi64(INT64_C( -751557688731444383), INT64_C( 2163582539809461657), INT64_C( 5226578984858504214), INT64_C(-8776157931044543560), INT64_C( 7620312646179506248), INT64_C(-4730811392556899367), INT64_C(-7555925455226975890), INT64_C( 6555696811272222802)), simde_mm512_set_epi64(INT64_C( 7620312646179506248), INT64_C( 2163582539809461657), INT64_C(-7555925455226975890), INT64_C( 6555696811272222802), INT64_C(-1848672680316222475), INT64_C( 1658167909352451238), INT64_C( 377173394815185621), INT64_C(-6742373427678247978)) }, { simde_mm512_set_epi64(INT64_C( 1941135797030545610), INT64_C( 1935432241277000941), INT64_C( 5390015454023535429), INT64_C( -49705421380794940), INT64_C( 3138707856740708121), INT64_C(-4673519228421997952), INT64_C(-1556073591389999033), INT64_C( 3509487153133496527)), UINT8_C(191), simde_mm512_set_epi64(INT64_C(-7880186302232587827), INT64_C( 7848707034806784644), INT64_C(-7246117184140796511), INT64_C( 5042844271761388948), INT64_C( 877966720713550779), INT64_C(-8757547308289839577), INT64_C( 6619480224799141474), INT64_C( 3171924723684651500)), simde_mm256_set_epi64x(INT64_C( 8280910196874944184), INT64_C( -368934386460614235), INT64_C(-9011857488067354178), INT64_C( 5578921037540219940)), simde_mm512_set_epi64(INT64_C(-7880186302232587827), INT64_C( 1935432241277000941), INT64_C(-7246117184140796511), INT64_C( 5042844271761388948), INT64_C( 8280910196874944184), INT64_C( -368934386460614235), INT64_C(-9011857488067354178), INT64_C( 5578921037540219940)), simde_mm512_set_epi64(INT64_C( 8280910196874944184), INT64_C( 1935432241277000941), INT64_C(-9011857488067354178), INT64_C( 5578921037540219940), INT64_C( 877966720713550779), INT64_C(-8757547308289839577), INT64_C( 6619480224799141474), INT64_C( 3171924723684651500)) }, { simde_mm512_set_epi64(INT64_C( 6286668337562607931), INT64_C( 1432089847019206822), INT64_C(-2235254547542691893), INT64_C(-2975358417486477451), INT64_C( 5974528986311566652), INT64_C( 4798128784982043356), INT64_C(-3663239326212228984), INT64_C(-9199851098963784696)), UINT8_C( 9), simde_mm512_set_epi64(INT64_C(-3486865648830471282), INT64_C( 8151787653682140580), INT64_C( -831601358278995789), INT64_C(-2800664419916301039), INT64_C( 3280702275774868225), INT64_C(-4735905134864699368), INT64_C( 7051416147935021095), INT64_C(-4824857292892203785)), simde_mm256_set_epi64x(INT64_C(-7733586635814839612), INT64_C(-3387038813920004365), INT64_C(-6023807055599376167), INT64_C( 2056379472574346663)), simde_mm512_set_epi64(INT64_C( 6286668337562607931), INT64_C( 1432089847019206822), INT64_C(-2235254547542691893), INT64_C(-2975358417486477451), INT64_C(-7733586635814839612), INT64_C( 4798128784982043356), INT64_C(-3663239326212228984), INT64_C( 2056379472574346663)), simde_mm512_set_epi64(INT64_C( 6286668337562607931), INT64_C( 1432089847019206822), INT64_C(-2235254547542691893), INT64_C(-2975358417486477451), INT64_C( 3280702275774868225), INT64_C( 4798128784982043356), INT64_C(-3663239326212228984), INT64_C(-4824857292892203785)) }, { simde_mm512_set_epi64(INT64_C(-1389624339165317749), INT64_C(-8184083999390244234), INT64_C( 8331479114169761131), INT64_C( 8817045194671758320), INT64_C(-5234965963681749811), INT64_C(-5331803015084564567), INT64_C(-2592115690296560951), INT64_C( 1248919004007478956)), UINT8_C(200), simde_mm512_set_epi64(INT64_C(-2742037214038451026), INT64_C( 735886283373328205), INT64_C( 709814645617696632), INT64_C( 6904524208941840952), INT64_C(-7164378700336361334), INT64_C( 4351794567182281042), INT64_C(-1716872434006574729), INT64_C( -689503347190866770)), simde_mm256_set_epi64x(INT64_C( 8457250603347908949), INT64_C(-2879367942796632989), INT64_C( 4447365578798205979), INT64_C( 6508361231067538121)), simde_mm512_set_epi64(INT64_C(-2742037214038451026), INT64_C( 735886283373328205), INT64_C( 8331479114169761131), INT64_C( 8817045194671758320), INT64_C( 8457250603347908949), INT64_C(-5331803015084564567), INT64_C(-2592115690296560951), INT64_C( 1248919004007478956)), simde_mm512_set_epi64(INT64_C( 8457250603347908949), INT64_C(-2879367942796632989), INT64_C( 8331479114169761131), INT64_C( 8817045194671758320), INT64_C(-7164378700336361334), INT64_C(-5331803015084564567), INT64_C(-2592115690296560951), INT64_C( 1248919004007478956)) }, { simde_mm512_set_epi64(INT64_C( 7927414333096918356), INT64_C(-6028737433755228757), INT64_C(-6289085317177674471), INT64_C(-5541511610486147753), INT64_C( 6531713794566454707), INT64_C( -446705336047418133), INT64_C(-6709780755556058351), INT64_C( 2443726936750986290)), UINT8_C(108), simde_mm512_set_epi64(INT64_C(-8286621218977708484), INT64_C(-7274138402675197655), INT64_C( 7751882187628938139), INT64_C(-6483923961368987645), INT64_C(-7861334054348205497), INT64_C( 1332497272334397574), INT64_C(-5616284701672264206), INT64_C( 2669799685376652269)), simde_mm256_set_epi64x(INT64_C( 200988278415395979), INT64_C(-2971850709824830963), INT64_C(-2133313091920417115), INT64_C( 4623597880832003711)), simde_mm512_set_epi64(INT64_C( 7927414333096918356), INT64_C(-7274138402675197655), INT64_C( 7751882187628938139), INT64_C(-5541511610486147753), INT64_C( 200988278415395979), INT64_C(-2971850709824830963), INT64_C(-6709780755556058351), INT64_C( 2443726936750986290)), simde_mm512_set_epi64(INT64_C( 7927414333096918356), INT64_C(-2971850709824830963), INT64_C(-2133313091920417115), INT64_C(-5541511610486147753), INT64_C(-7861334054348205497), INT64_C( 1332497272334397574), INT64_C(-6709780755556058351), INT64_C( 2443726936750986290)) }, { simde_mm512_set_epi64(INT64_C(-2107226359725098394), INT64_C(-4111796092278699383), INT64_C(-7795021097560230286), INT64_C( -756190175563217595), INT64_C(-6005669436564621566), INT64_C( 848382361298179127), INT64_C( 1511386602075754511), INT64_C(-7343592175834719092)), UINT8_C(114), simde_mm512_set_epi64(INT64_C(-5446377680450598123), INT64_C( 6545920141858823590), INT64_C(-6109676225565942508), INT64_C( 7950769636462343621), INT64_C(-2554331370738443173), INT64_C( 3394005203063129217), INT64_C(-2129862572478557053), INT64_C( 6740280704142266940)), simde_mm256_set_epi64x(INT64_C(-7150215929147729012), INT64_C( 4777590230543990164), INT64_C(-7212709010936609343), INT64_C(-5423311652387614668)), simde_mm512_set_epi64(INT64_C(-2107226359725098394), INT64_C( 6545920141858823590), INT64_C(-6109676225565942508), INT64_C( 7950769636462343621), INT64_C(-6005669436564621566), INT64_C( 848382361298179127), INT64_C(-7212709010936609343), INT64_C(-7343592175834719092)), simde_mm512_set_epi64(INT64_C(-2107226359725098394), INT64_C( 4777590230543990164), INT64_C(-7212709010936609343), INT64_C(-5423311652387614668), INT64_C(-6005669436564621566), INT64_C( 848382361298179127), INT64_C(-2129862572478557053), INT64_C(-7343592175834719092)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r; r = simde_mm512_mask_inserti64x4(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b, 0); simde_assert_m512i_i64(r, ==, test_vec[i].r0); r = simde_mm512_mask_inserti64x4(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b, 1); simde_assert_m512i_i64(r, ==, test_vec[i].r1); } return 0; } static int test_simde_mm512_maskz_inserti64x4(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512i a; simde__m256i b; simde__m512i r0; simde__m512i r1; } test_vec[8] = { { UINT8_C( 46), simde_mm512_set_epi64(INT64_C(-4408197122649025847), INT64_C( 624651997750430240), INT64_C(-3688244718601593553), INT64_C( 4693564151120802069), INT64_C( 7940316924786767481), INT64_C( 6743600876828439814), INT64_C( 8561828216572109007), INT64_C(-8315109086095518889)), simde_mm256_set_epi64x(INT64_C( 4477375336277674053), INT64_C( 1282248710630285123), INT64_C( 6638105739971879812), INT64_C(-8038098956427236545)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C(-3688244718601593553), INT64_C( 0), INT64_C( 4477375336277674053), INT64_C( 1282248710630285123), INT64_C( 6638105739971879812), INT64_C( 0)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( 6638105739971879812), INT64_C( 0), INT64_C( 7940316924786767481), INT64_C( 6743600876828439814), INT64_C( 8561828216572109007), INT64_C( 0)) }, { UINT8_C(251), simde_mm512_set_epi64(INT64_C(-8749971605870264899), INT64_C( 1212032624670585453), INT64_C( 728623586565902494), INT64_C( 4873652658109514141), INT64_C(-4767707706458520415), INT64_C(-7083171014951853588), INT64_C(-2076988212358998594), INT64_C( 3698505898575972461)), simde_mm256_set_epi64x(INT64_C( 7350668077567080689), INT64_C( 8478919882954811661), INT64_C(-3394066222784588743), INT64_C(-5009306653852991983)), simde_mm512_set_epi64(INT64_C(-8749971605870264899), INT64_C( 1212032624670585453), INT64_C( 728623586565902494), INT64_C( 4873652658109514141), INT64_C( 7350668077567080689), INT64_C( 0), INT64_C(-3394066222784588743), INT64_C(-5009306653852991983)), simde_mm512_set_epi64(INT64_C( 7350668077567080689), INT64_C( 8478919882954811661), INT64_C(-3394066222784588743), INT64_C(-5009306653852991983), INT64_C(-4767707706458520415), INT64_C( 0), INT64_C(-2076988212358998594), INT64_C( 3698505898575972461)) }, { UINT8_C(185), simde_mm512_set_epi64(INT64_C( 7391005387705442660), INT64_C(-5091463632259113685), INT64_C( 685405269785004780), INT64_C(-2602517860068074949), INT64_C( 4704994953943345443), INT64_C( 8877610218385468208), INT64_C( 5776984527519295337), INT64_C( 6526937450820584225)), simde_mm256_set_epi64x(INT64_C( -855682284319457684), INT64_C( 5906689130134529071), INT64_C( 7251917267735594789), INT64_C(-5506395256633894325)), simde_mm512_set_epi64(INT64_C( 7391005387705442660), INT64_C( 0), INT64_C( 685405269785004780), INT64_C(-2602517860068074949), INT64_C( -855682284319457684), INT64_C( 0), INT64_C( 0), INT64_C(-5506395256633894325)), simde_mm512_set_epi64(INT64_C( -855682284319457684), INT64_C( 0), INT64_C( 7251917267735594789), INT64_C(-5506395256633894325), INT64_C( 4704994953943345443), INT64_C( 0), INT64_C( 0), INT64_C( 6526937450820584225)) }, { UINT8_C( 23), simde_mm512_set_epi64(INT64_C( 5406280044045291975), INT64_C(-2284955492954404973), INT64_C(-3603637419527123210), INT64_C(-1117409850830928520), INT64_C(-1414228054518303181), INT64_C(-3038909907977133732), INT64_C(-7842471790453318316), INT64_C(-7256600765093102205)), simde_mm256_set_epi64x(INT64_C( 1455933536394832297), INT64_C( -840828676201867557), INT64_C( 6310833464661060096), INT64_C( 8429580363859954742)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C(-1117409850830928520), INT64_C( 0), INT64_C( -840828676201867557), INT64_C( 6310833464661060096), INT64_C( 8429580363859954742)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 8429580363859954742), INT64_C( 0), INT64_C(-3038909907977133732), INT64_C(-7842471790453318316), INT64_C(-7256600765093102205)) }, { UINT8_C(248), simde_mm512_set_epi64(INT64_C( 6555696811272222802), INT64_C( -751557688731444383), INT64_C( 5324069355912068288), INT64_C( 5226578984858504214), INT64_C(-8776157931044543560), INT64_C(-1848672680316222475), INT64_C( 1658167909352451238), INT64_C( 377173394815185621)), simde_mm256_set_epi64x(INT64_C(-4444585746033374017), INT64_C( 7620312646179506248), INT64_C(-4730811392556899367), INT64_C(-7555925455226975890)), simde_mm512_set_epi64(INT64_C( 6555696811272222802), INT64_C( -751557688731444383), INT64_C( 5324069355912068288), INT64_C( 5226578984858504214), INT64_C(-4444585746033374017), INT64_C( 0), INT64_C( 0), INT64_C( 0)), simde_mm512_set_epi64(INT64_C(-4444585746033374017), INT64_C( 7620312646179506248), INT64_C(-4730811392556899367), INT64_C(-7555925455226975890), INT64_C(-8776157931044543560), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, { UINT8_C(161), simde_mm512_set_epi64(INT64_C( 3171924723684651500), INT64_C( 1941135797030545610), INT64_C( 1935432241277000941), INT64_C( 5390015454023535429), INT64_C( -49705421380794940), INT64_C( 3138707856740708121), INT64_C(-4673519228421997952), INT64_C(-1556073591389999033)), simde_mm256_set_epi64x(INT64_C( 5042844271761388948), INT64_C( 877966720713550779), INT64_C(-8757547308289839577), INT64_C( 6619480224799141474)), simde_mm512_set_epi64(INT64_C( 3171924723684651500), INT64_C( 0), INT64_C( 1935432241277000941), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 6619480224799141474)), simde_mm512_set_epi64(INT64_C( 5042844271761388948), INT64_C( 0), INT64_C(-8757547308289839577), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C(-1556073591389999033)) }, { UINT8_C(203), simde_mm512_set_epi64(INT64_C(-9199851098963784696), INT64_C( 5640400914757694451), INT64_C( 3288787407719188513), INT64_C( 6167294471968879452), INT64_C(-8553143016080257248), INT64_C( 1191725626053358671), INT64_C( 2560034487176803702), INT64_C(-4340183042637127984)), simde_mm256_set_epi64x(INT64_C(-2975358417486477451), INT64_C( 5974528986311566652), INT64_C( 4798128784982043356), INT64_C(-3663239326212228984)), simde_mm512_set_epi64(INT64_C(-9199851098963784696), INT64_C( 5640400914757694451), INT64_C( 0), INT64_C( 0), INT64_C(-2975358417486477451), INT64_C( 0), INT64_C( 4798128784982043356), INT64_C(-3663239326212228984)), simde_mm512_set_epi64(INT64_C(-2975358417486477451), INT64_C( 5974528986311566652), INT64_C( 0), INT64_C( 0), INT64_C(-8553143016080257248), INT64_C( 0), INT64_C( 2560034487176803702), INT64_C(-4340183042637127984)) }, { UINT8_C(101), simde_mm512_set_epi64(INT64_C( 9010665687833774732), INT64_C( 8345237253505115478), INT64_C(-9124175189821182113), INT64_C( 3058208134496233888), INT64_C( 7286481320132913626), INT64_C( -777692308098335861), INT64_C( 8727238559278288416), INT64_C(-2736507802934917164)), simde_mm256_set_epi64x(INT64_C( -35491302857917892), INT64_C( 3617223551823391274), INT64_C(-6488944280908793938), INT64_C( 6206969247155195640)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 8345237253505115478), INT64_C(-9124175189821182113), INT64_C( 0), INT64_C( 0), INT64_C( 3617223551823391274), INT64_C( 0), INT64_C( 6206969247155195640)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 3617223551823391274), INT64_C(-6488944280908793938), INT64_C( 0), INT64_C( 0), INT64_C( -777692308098335861), INT64_C( 0), INT64_C(-2736507802934917164)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r; r = simde_mm512_maskz_inserti64x4(test_vec[i].k, test_vec[i].a, test_vec[i].b, 0); simde_assert_m512i_i64(r, ==, test_vec[i].r0); r = simde_mm512_maskz_inserti64x4(test_vec[i].k, test_vec[i].a, test_vec[i].b, 1); simde_assert_m512i_i64(r, ==, test_vec[i].r1); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_insertf32x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_insertf32x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_insertf32x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_insertf64x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_insertf64x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_insertf64x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_inserti32x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_inserti32x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_inserti32x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_inserti64x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_inserti64x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_inserti64x4) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/kshift.c000066400000000000000000000365351400333146700167240ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #define SIMDE_TEST_X86_AVX512_INSN kshift #include #include #define GENERATE_VECTORS 0 #if GENERATE_VECTORS #define PROBABILITY 80 #define probability(p) (rand() < ((HEDLEY_STATIC_CAST(int64_t, RAND_MAX) * (p)) / 100)) #endif static int test_simde_kshiftli_mask16 (SIMDE_MUNIT_TEST_ARGS) { #if GENERATE_VECTORS fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask16 r; simde__mmask16 a = simde_test_x86_random_mmask16(); unsigned int count = HEDLEY_STATIC_CAST(unsigned int, munit_rand_int_range(0, 255)); if (probability(PROBABILITY)) count &= 15; SIMDE_CONSTIFY_16_(simde_kshiftli_mask16, r, 0, count, a); simde_test_x86_write_mmask16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u32(2, count, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_mmask16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #else static const struct { const simde__mmask16 a; const unsigned int count; const simde__mmask16 r; } test_vec[] = { { UINT16_C(32998), UINT32_C( 98), UINT16_C( 0) }, { UINT16_C(11635), UINT32_C( 226), UINT16_C( 0) }, { UINT16_C(31681), UINT32_C( 10), UINT16_C( 1024) }, { UINT16_C(20229), UINT32_C( 10), UINT16_C( 5120) }, { UINT16_C( 3580), UINT32_C( 12), UINT16_C(49152) }, { UINT16_C(47873), UINT32_C( 0), UINT16_C(47873) }, { UINT16_C(50829), UINT32_C( 8), UINT16_C(36096) }, { UINT16_C(36232), UINT32_C( 11), UINT16_C(16384) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__mmask16 r; SIMDE_CONSTIFY_16_(simde_kshiftli_mask16, r, 0, test_vec[i].count, test_vec[i].a); simde_assert_equal_mmask16(r, test_vec[i].r); } return 0; #endif } static int test_simde_kshiftli_mask32 (SIMDE_MUNIT_TEST_ARGS) { #if GENERATE_VECTORS fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask32 r; simde__mmask32 a = simde_test_x86_random_mmask32(); unsigned int count = HEDLEY_STATIC_CAST(unsigned int, munit_rand_int_range(0, 255)); if (probability(PROBABILITY)) count &= 31; SIMDE_CONSTIFY_32_(simde_kshiftli_mask32, r, 0, count, a); simde_test_x86_write_mmask32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u32(2, count, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_mmask32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #else static const struct { const simde__mmask32 a; const unsigned int count; const simde__mmask32 r; } test_vec[] = { { UINT32_C(3866362599), UINT32_C( 178), UINT32_C( 0) }, { UINT32_C(1420251803), UINT32_C( 18), UINT32_C(1248591872) }, { UINT32_C(2944902744), UINT32_C( 25), UINT32_C(2952790016) }, { UINT32_C( 515275237), UINT32_C( 12), UINT32_C(1738428416) }, { UINT32_C(3507266298), UINT32_C( 29), UINT32_C(1073741824) }, { UINT32_C(2178785751), UINT32_C( 7), UINT32_C(4006669184) }, { UINT32_C(3901712499), UINT32_C( 213), UINT32_C( 0) }, { UINT32_C( 866157452), UINT32_C( 18), UINT32_C( 238026752) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__mmask32 r; SIMDE_CONSTIFY_32_(simde_kshiftli_mask32, r, 0, test_vec[i].count, test_vec[i].a); simde_assert_equal_mmask32(r, test_vec[i].r); } return 0; #endif } static int test_simde_kshiftli_mask64 (SIMDE_MUNIT_TEST_ARGS) { #if GENERATE_VECTORS fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask64 r; simde__mmask64 a = simde_test_x86_random_mmask64(); unsigned int count = HEDLEY_STATIC_CAST(unsigned int, munit_rand_int_range(0, 255)); if (probability(PROBABILITY)) count &= 63; SIMDE_CONSTIFY_64_(simde_kshiftli_mask64, r, 0, count, a); simde_test_x86_write_mmask64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u32(2, count, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_mmask64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #else static const struct { const simde__mmask64 a; const unsigned int count; const simde__mmask64 r; } test_vec[] = { { UINT64_C( 1295334839708707992), UINT32_C( 50), UINT64_C(12853273336515395584) }, { UINT64_C( 7249544805129316044), UINT32_C( 50), UINT64_C(11182437874760941568) }, { UINT64_C(17573762057435916769), UINT32_C( 121), UINT64_C( 0) }, { UINT64_C( 9388101999642308565), UINT32_C( 44), UINT64_C( 503646694265585664) }, { UINT64_C( 8448015434585439037), UINT32_C( 29), UINT64_C(16676670436032905216) }, { UINT64_C( 3549407090849943755), UINT32_C( 39), UINT64_C(12576413559862329344) }, { UINT64_C( 3228769676502045645), UINT32_C( 21), UINT64_C(11330967200335921152) }, { UINT64_C(17289294451460437515), UINT32_C( 50), UINT64_C(15576825211167703040) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__mmask64 r; SIMDE_CONSTIFY_64_(simde_kshiftli_mask64, r, 0, test_vec[i].count, test_vec[i].a); simde_assert_equal_mmask64(r, test_vec[i].r); } return 0; #endif } static int test_simde_kshiftli_mask8 (SIMDE_MUNIT_TEST_ARGS) { #if GENERATE_VECTORS fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 r; simde__mmask8 a = simde_test_x86_random_mmask8(); unsigned int count = HEDLEY_STATIC_CAST(unsigned int, munit_rand_int_range(0, 255)); if (probability(PROBABILITY)) count &= 7; SIMDE_CONSTIFY_8_(simde_kshiftli_mask8, r, 0, count, a); simde_test_x86_write_mmask8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u32(2, count, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_mmask8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #else static const struct { const simde__mmask8 a; const unsigned int count; const simde__mmask8 r; } test_vec[] = { { UINT8_C(193), UINT32_C( 2), UINT8_C( 4) }, { UINT8_C(220), UINT32_C( 2), UINT8_C(112) }, { UINT8_C(209), UINT32_C( 1), UINT8_C(162) }, { UINT8_C( 18), UINT32_C( 108), UINT8_C( 0) }, { UINT8_C( 96), UINT32_C( 5), UINT8_C( 0) }, { UINT8_C(119), UINT32_C( 39), UINT8_C( 0) }, { UINT8_C(156), UINT32_C( 5), UINT8_C(128) }, { UINT8_C(250), UINT32_C( 2), UINT8_C(232) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__mmask8 r; SIMDE_CONSTIFY_8_(simde_kshiftli_mask8, r, 0, test_vec[i].count, test_vec[i].a); simde_assert_equal_mmask8(r, test_vec[i].r); } return 0; #endif } static int test_simde_kshiftri_mask16 (SIMDE_MUNIT_TEST_ARGS) { #if GENERATE_VECTORS fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask16 r; simde__mmask16 a = simde_test_x86_random_mmask16(); unsigned int count = HEDLEY_STATIC_CAST(unsigned int, munit_rand_int_range(0, 255)); if (probability(PROBABILITY)) count &= 15; SIMDE_CONSTIFY_16_(simde_kshiftri_mask16, r, 0, count, a); simde_test_x86_write_mmask16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u32(2, count, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_mmask16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #else static const struct { const simde__mmask16 a; const unsigned int count; const simde__mmask16 r; } test_vec[] = { { UINT16_C(48214), UINT32_C( 8), UINT16_C( 188) }, { UINT16_C(62048), UINT32_C( 1), UINT16_C(31024) }, { UINT16_C(60270), UINT32_C( 14), UINT16_C( 3) }, { UINT16_C(61859), UINT32_C( 187), UINT16_C( 0) }, { UINT16_C(48213), UINT32_C( 12), UINT16_C( 11) }, { UINT16_C(27255), UINT32_C( 2), UINT16_C( 6813) }, { UINT16_C(40508), UINT32_C( 8), UINT16_C( 158) }, { UINT16_C(56544), UINT32_C( 9), UINT16_C( 110) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__mmask16 r; SIMDE_CONSTIFY_16_(simde_kshiftri_mask16, r, 0, test_vec[i].count, test_vec[i].a); simde_assert_equal_mmask16(r, test_vec[i].r); } return 0; #endif } static int test_simde_kshiftri_mask32 (SIMDE_MUNIT_TEST_ARGS) { #if GENERATE_VECTORS fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask32 r; simde__mmask32 a = simde_test_x86_random_mmask32(); unsigned int count = HEDLEY_STATIC_CAST(unsigned int, munit_rand_int_range(0, 255)); if (probability(PROBABILITY)) count &= 31; SIMDE_CONSTIFY_32_(simde_kshiftri_mask32, r, 0, count, a); simde_test_x86_write_mmask32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u32(2, count, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_mmask32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #else static const struct { const simde__mmask32 a; const unsigned int count; const simde__mmask32 r; } test_vec[] = { { UINT32_C(2251082273), UINT32_C( 8), UINT32_C( 8793290) }, { UINT32_C(4066024717), UINT32_C( 129), UINT32_C( 0) }, { UINT32_C(1983822470), UINT32_C( 30), UINT32_C( 1) }, { UINT32_C( 149028471), UINT32_C( 27), UINT32_C( 1) }, { UINT32_C( 429851039), UINT32_C( 28), UINT32_C( 1) }, { UINT32_C(2881428320), UINT32_C( 18), UINT32_C( 10991) }, { UINT32_C( 827796637), UINT32_C( 1), UINT32_C( 413898318) }, { UINT32_C( 418383038), UINT32_C( 9), UINT32_C( 817154) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__mmask32 r; SIMDE_CONSTIFY_32_(simde_kshiftri_mask32, r, 0, test_vec[i].count, test_vec[i].a); simde_assert_equal_mmask32(r, test_vec[i].r); } return 0; #endif } static int test_simde_kshiftri_mask64 (SIMDE_MUNIT_TEST_ARGS) { #if GENERATE_VECTORS fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask64 r; simde__mmask64 a = simde_test_x86_random_mmask64(); unsigned int count = HEDLEY_STATIC_CAST(unsigned int, munit_rand_int_range(0, 255)); if (probability(PROBABILITY)) count &= 63; SIMDE_CONSTIFY_64_(simde_kshiftri_mask64, r, 0, count, a); simde_test_x86_write_mmask64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u32(2, count, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_mmask64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #else static const struct { const simde__mmask64 a; const unsigned int count; const simde__mmask64 r; } test_vec[] = { { UINT64_C(12929489738497449333), UINT32_C( 8), UINT64_C( 50505819291005661) }, { UINT64_C( 1761782705914904934), UINT32_C( 1), UINT64_C( 880891352957452467) }, { UINT64_C(13413149345940726503), UINT32_C( 30), UINT64_C( 12491968782) }, { UINT64_C( 7664406914680423996), UINT32_C( 187), UINT64_C( 0) }, { UINT64_C(11883749540069562403), UINT32_C( 28), UINT64_C( 44270416870) }, { UINT64_C(11681796465351530248), UINT32_C( 18), UINT64_C( 44562517033964) }, { UINT64_C( 3441339471496333362), UINT32_C( 56), UINT64_C( 47) }, { UINT64_C( 2277648988691598643), UINT32_C( 41), UINT64_C( 1035754) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__mmask64 r; SIMDE_CONSTIFY_64_(simde_kshiftri_mask64, r, 0, test_vec[i].count, test_vec[i].a); simde_assert_equal_mmask64(r, test_vec[i].r); } return 0; #endif } static int test_simde_kshiftri_mask8 (SIMDE_MUNIT_TEST_ARGS) { #if GENERATE_VECTORS fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 r; simde__mmask8 a = simde_test_x86_random_mmask8(); unsigned int count = HEDLEY_STATIC_CAST(unsigned int, munit_rand_int_range(0, 255)); if (probability(PROBABILITY)) count &= 7; SIMDE_CONSTIFY_8_(simde_kshiftri_mask8, r, 0, count, a); simde_test_x86_write_mmask8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u32(2, count, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_mmask8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #else static const struct { const simde__mmask8 a; const unsigned int count; const simde__mmask8 r; } test_vec[] = { { UINT8_C( 89), UINT32_C( 200), UINT8_C( 0) }, { UINT8_C(165), UINT32_C( 1), UINT8_C( 82) }, { UINT8_C(153), UINT32_C( 6), UINT8_C( 2) }, { UINT8_C(170), UINT32_C( 3), UINT8_C( 21) }, { UINT8_C( 93), UINT32_C( 4), UINT8_C( 5) }, { UINT8_C(100), UINT32_C( 2), UINT8_C( 25) }, { UINT8_C( 51), UINT32_C( 0), UINT8_C( 51) }, { UINT8_C(163), UINT32_C( 1), UINT8_C( 81) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__mmask8 r; SIMDE_CONSTIFY_8_(simde_kshiftri_mask8, r, 0, test_vec[i].count, test_vec[i].a); simde_assert_equal_mmask8(r, test_vec[i].r); } return 0; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(kshiftli_mask16) SIMDE_TEST_FUNC_LIST_ENTRY(kshiftli_mask32) SIMDE_TEST_FUNC_LIST_ENTRY(kshiftli_mask64) SIMDE_TEST_FUNC_LIST_ENTRY(kshiftli_mask8) SIMDE_TEST_FUNC_LIST_ENTRY(kshiftri_mask16) SIMDE_TEST_FUNC_LIST_ENTRY(kshiftri_mask32) SIMDE_TEST_FUNC_LIST_ENTRY(kshiftri_mask64) SIMDE_TEST_FUNC_LIST_ENTRY(kshiftri_mask8) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/load.c000066400000000000000000000163771400333146700163550ustar00rootroot00000000000000#define SIMDE_TEST_X86_AVX512_INSN load #include #include static int test_simde_mm512_load_si512 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { SIMDE_ALIGN_LIKE_64(simde__m512i) const int32_t a[16]; const int32_t r[16]; } test_vec[] = { { { -INT32_C( 1085279312), INT32_C( 1689654203), -INT32_C( 1704027444), INT32_C( 1992004399), INT32_C( 1598136076), -INT32_C( 1107314712), INT32_C( 1634510512), INT32_C( 2144272078), -INT32_C( 1405215247), -INT32_C( 65931984), -INT32_C( 1097433201), INT32_C( 523522579), -INT32_C( 629246223), -INT32_C( 560497363), -INT32_C( 230751453), -INT32_C( 210694911) }, { -INT32_C( 1085279312), INT32_C( 1689654203), -INT32_C( 1704027444), INT32_C( 1992004399), INT32_C( 1598136076), -INT32_C( 1107314712), INT32_C( 1634510512), INT32_C( 2144272078), -INT32_C( 1405215247), -INT32_C( 65931984), -INT32_C( 1097433201), INT32_C( 523522579), -INT32_C( 629246223), -INT32_C( 560497363), -INT32_C( 230751453), -INT32_C( 210694911) } }, { { INT32_C( 1537191723), INT32_C( 878227620), INT32_C( 1139994160), INT32_C( 845293376), -INT32_C( 905125475), -INT32_C( 2102877346), -INT32_C( 1468733529), INT32_C( 547087861), INT32_C( 964377492), INT32_C( 460182507), INT32_C( 39739330), INT32_C( 590659974), INT32_C( 15614114), -INT32_C( 1954375964), INT32_C( 1932785278), INT32_C( 1888735195) }, { INT32_C( 1537191723), INT32_C( 878227620), INT32_C( 1139994160), INT32_C( 845293376), -INT32_C( 905125475), -INT32_C( 2102877346), -INT32_C( 1468733529), INT32_C( 547087861), INT32_C( 964377492), INT32_C( 460182507), INT32_C( 39739330), INT32_C( 590659974), INT32_C( 15614114), -INT32_C( 1954375964), INT32_C( 1932785278), INT32_C( 1888735195) } }, { { -INT32_C( 173470198), -INT32_C( 1542383902), -INT32_C( 56201355), -INT32_C( 769664208), -INT32_C( 2945765), INT32_C( 579491236), INT32_C( 664125004), -INT32_C( 1751701363), INT32_C( 411844662), -INT32_C( 860054186), INT32_C( 1036542733), INT32_C( 1494279998), -INT32_C( 1722162187), -INT32_C( 2068061384), INT32_C( 783044769), -INT32_C( 1362803848) }, { -INT32_C( 173470198), -INT32_C( 1542383902), -INT32_C( 56201355), -INT32_C( 769664208), -INT32_C( 2945765), INT32_C( 579491236), INT32_C( 664125004), -INT32_C( 1751701363), INT32_C( 411844662), -INT32_C( 860054186), INT32_C( 1036542733), INT32_C( 1494279998), -INT32_C( 1722162187), -INT32_C( 2068061384), INT32_C( 783044769), -INT32_C( 1362803848) } }, { { -INT32_C( 624471420), -INT32_C( 56196113), INT32_C( 607809254), INT32_C( 1266567766), INT32_C( 1709496109), INT32_C( 1558880186), INT32_C( 1737135855), INT32_C( 1561678041), -INT32_C( 1858544478), INT32_C( 1183768160), -INT32_C( 1553217459), INT32_C( 1072621842), INT32_C( 2057622208), INT32_C( 1624673905), -INT32_C( 20487900), INT32_C( 1398529201) }, { -INT32_C( 624471420), -INT32_C( 56196113), INT32_C( 607809254), INT32_C( 1266567766), INT32_C( 1709496109), INT32_C( 1558880186), INT32_C( 1737135855), INT32_C( 1561678041), -INT32_C( 1858544478), INT32_C( 1183768160), -INT32_C( 1553217459), INT32_C( 1072621842), INT32_C( 2057622208), INT32_C( 1624673905), -INT32_C( 20487900), INT32_C( 1398529201) } }, { { INT32_C( 434410425), -INT32_C( 1084263822), INT32_C( 1281542714), INT32_C( 1938510003), -INT32_C( 1813106654), -INT32_C( 470563650), -INT32_C( 689849819), INT32_C( 1328102550), INT32_C( 1114115792), -INT32_C( 1157511040), INT32_C( 1174889362), -INT32_C( 709258317), -INT32_C( 2123847741), -INT32_C( 1855693972), -INT32_C( 1419229931), INT32_C( 1392218498) }, { INT32_C( 434410425), -INT32_C( 1084263822), INT32_C( 1281542714), INT32_C( 1938510003), -INT32_C( 1813106654), -INT32_C( 470563650), -INT32_C( 689849819), INT32_C( 1328102550), INT32_C( 1114115792), -INT32_C( 1157511040), INT32_C( 1174889362), -INT32_C( 709258317), -INT32_C( 2123847741), -INT32_C( 1855693972), -INT32_C( 1419229931), INT32_C( 1392218498) } }, { { INT32_C( 546595743), -INT32_C( 1092905685), -INT32_C( 1425743112), INT32_C( 947961205), -INT32_C( 776279963), INT32_C( 1482825283), -INT32_C( 435959196), -INT32_C( 80150948), -INT32_C( 1927558046), INT32_C( 1498150497), INT32_C( 1308905433), INT32_C( 1921483789), -INT32_C( 1354546836), -INT32_C( 1022909089), -INT32_C( 861336976), INT32_C( 1808261385) }, { INT32_C( 546595743), -INT32_C( 1092905685), -INT32_C( 1425743112), INT32_C( 947961205), -INT32_C( 776279963), INT32_C( 1482825283), -INT32_C( 435959196), -INT32_C( 80150948), -INT32_C( 1927558046), INT32_C( 1498150497), INT32_C( 1308905433), INT32_C( 1921483789), -INT32_C( 1354546836), -INT32_C( 1022909089), -INT32_C( 861336976), INT32_C( 1808261385) } }, { { INT32_C( 251192237), -INT32_C( 1301855015), -INT32_C( 1610519661), INT32_C( 1527941359), INT32_C( 671765961), INT32_C( 1810633211), INT32_C( 624399644), INT32_C( 613482103), -INT32_C( 1154250527), INT32_C( 1617795788), -INT32_C( 184521210), -INT32_C( 1085205514), INT32_C( 1676172136), -INT32_C( 1982933907), -INT32_C( 525466263), -INT32_C( 452641276) }, { INT32_C( 251192237), -INT32_C( 1301855015), -INT32_C( 1610519661), INT32_C( 1527941359), INT32_C( 671765961), INT32_C( 1810633211), INT32_C( 624399644), INT32_C( 613482103), -INT32_C( 1154250527), INT32_C( 1617795788), -INT32_C( 184521210), -INT32_C( 1085205514), INT32_C( 1676172136), -INT32_C( 1982933907), -INT32_C( 525466263), -INT32_C( 452641276) } }, { { -INT32_C( 1818216250), -INT32_C( 655159598), INT32_C( 1942942588), INT32_C( 1865555718), -INT32_C( 405661062), INT32_C( 1483776494), -INT32_C( 1439162714), INT32_C( 596655452), INT32_C( 1219899509), -INT32_C( 1155487426), -INT32_C( 1557205348), -INT32_C( 2012061683), INT32_C( 1768940667), INT32_C( 750903429), INT32_C( 1540815614), -INT32_C( 1384225225) }, { -INT32_C( 1818216250), -INT32_C( 655159598), INT32_C( 1942942588), INT32_C( 1865555718), -INT32_C( 405661062), INT32_C( 1483776494), -INT32_C( 1439162714), INT32_C( 596655452), INT32_C( 1219899509), -INT32_C( 1155487426), -INT32_C( 1557205348), -INT32_C( 2012061683), INT32_C( 1768940667), INT32_C( 750903429), INT32_C( 1540815614), -INT32_C( 1384225225) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_test_x86_assert_equal_i32x16(simde_mm512_load_si512(test_vec[i].a), simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i a = simde_test_x86_random_i32x16(); simde__m512i r = a; simde_test_x86_write_i32x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_load_si512) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/loadu.c000066400000000000000000002415501400333146700165330ustar00rootroot00000000000000#define SIMDE_TEST_X86_AVX512_INSN loadu #include #include #include static int test_simde_mm_loadu_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { SIMDE_ALIGN_LIKE_16(simde__m128i) const int8_t a[16]; const int8_t r[16]; } test_vec[] = { { { INT8_C( 98), INT8_C( 124), -INT8_C( 57), -INT8_C( 74), INT8_C( 104), INT8_C( 59), INT8_C( 69), -INT8_C( 25), INT8_C( 20), -INT8_C( 48), -INT8_C( 57), -INT8_C( 59), INT8_C( 21), INT8_C( 119), -INT8_C( 97), INT8_C( 28) }, { INT8_C( 98), INT8_C( 124), -INT8_C( 57), -INT8_C( 74), INT8_C( 104), INT8_C( 59), INT8_C( 69), -INT8_C( 25), INT8_C( 20), -INT8_C( 48), -INT8_C( 57), -INT8_C( 59), INT8_C( 21), INT8_C( 119), -INT8_C( 97), INT8_C( 28) } }, { { INT8_C( 70), -INT8_C( 33), INT8_C( 29), INT8_C( 10), INT8_C( 39), INT8_C( 31), -INT8_C( 91), INT8_C( 90), INT8_C( 111), -INT8_C( 46), -INT8_C( 65), INT8_C( 124), -INT8_C( 81), INT8_C( 57), -INT8_C( 34), INT8_C( 17) }, { INT8_C( 70), -INT8_C( 33), INT8_C( 29), INT8_C( 10), INT8_C( 39), INT8_C( 31), -INT8_C( 91), INT8_C( 90), INT8_C( 111), -INT8_C( 46), -INT8_C( 65), INT8_C( 124), -INT8_C( 81), INT8_C( 57), -INT8_C( 34), INT8_C( 17) } }, { { -INT8_C( 75), -INT8_C( 91), -INT8_C( 56), INT8_C( 30), -INT8_C( 31), INT8_C( 13), INT8_C( 5), -INT8_C( 11), -INT8_C( 35), -INT8_C( 52), -INT8_C( 70), -INT8_C( 14), INT8_C( 67), INT8_C( 89), INT8_C( 15), -INT8_C( 118) }, { -INT8_C( 75), -INT8_C( 91), -INT8_C( 56), INT8_C( 30), -INT8_C( 31), INT8_C( 13), INT8_C( 5), -INT8_C( 11), -INT8_C( 35), -INT8_C( 52), -INT8_C( 70), -INT8_C( 14), INT8_C( 67), INT8_C( 89), INT8_C( 15), -INT8_C( 118) } }, { { INT8_C( 56), INT8_C( 44), -INT8_C( 108), INT8_C( 96), INT8_C( 75), INT8_C( 57), -INT8_C( 70), -INT8_C( 70), INT8_C( 12), INT8_C( 121), INT8_C( 55), -INT8_C( 69), -INT8_C( 78), INT8_C( 21), -INT8_C( 51), INT8_C( 104) }, { INT8_C( 56), INT8_C( 44), -INT8_C( 108), INT8_C( 96), INT8_C( 75), INT8_C( 57), -INT8_C( 70), -INT8_C( 70), INT8_C( 12), INT8_C( 121), INT8_C( 55), -INT8_C( 69), -INT8_C( 78), INT8_C( 21), -INT8_C( 51), INT8_C( 104) } }, { { -INT8_C( 69), -INT8_C( 107), -INT8_C( 122), -INT8_C( 100), -INT8_C( 94), -INT8_C( 117), -INT8_C( 111), INT8_MAX, INT8_C( 87), INT8_C( 75), INT8_C( 114), -INT8_C( 102), -INT8_C( 91), -INT8_C( 127), INT8_C( 36), -INT8_C( 35) }, { -INT8_C( 69), -INT8_C( 107), -INT8_C( 122), -INT8_C( 100), -INT8_C( 94), -INT8_C( 117), -INT8_C( 111), INT8_MAX, INT8_C( 87), INT8_C( 75), INT8_C( 114), -INT8_C( 102), -INT8_C( 91), -INT8_C( 127), INT8_C( 36), -INT8_C( 35) } }, { { -INT8_C( 83), -INT8_C( 72), INT8_C( 61), -INT8_C( 8), -INT8_C( 14), -INT8_C( 8), -INT8_C( 78), -INT8_C( 2), INT8_C( 113), -INT8_C( 23), -INT8_C( 71), INT8_C( 36), -INT8_C( 1), -INT8_C( 122), -INT8_C( 116), -INT8_C( 70) }, { -INT8_C( 83), -INT8_C( 72), INT8_C( 61), -INT8_C( 8), -INT8_C( 14), -INT8_C( 8), -INT8_C( 78), -INT8_C( 2), INT8_C( 113), -INT8_C( 23), -INT8_C( 71), INT8_C( 36), -INT8_C( 1), -INT8_C( 122), -INT8_C( 116), -INT8_C( 70) } }, { { INT8_C( 27), INT8_C( 18), INT8_C( 86), -INT8_C( 67), -INT8_C( 99), -INT8_C( 25), INT8_C( 61), -INT8_C( 12), INT8_C( 50), -INT8_C( 81), -INT8_C( 114), -INT8_C( 41), INT8_C( 48), -INT8_C( 77), -INT8_C( 75), -INT8_C( 35) }, { INT8_C( 27), INT8_C( 18), INT8_C( 86), -INT8_C( 67), -INT8_C( 99), -INT8_C( 25), INT8_C( 61), -INT8_C( 12), INT8_C( 50), -INT8_C( 81), -INT8_C( 114), -INT8_C( 41), INT8_C( 48), -INT8_C( 77), -INT8_C( 75), -INT8_C( 35) } }, { { INT8_C( 107), -INT8_C( 14), -INT8_C( 43), INT8_C( 93), -INT8_C( 22), -INT8_C( 121), INT8_C( 91), INT8_C( 92), INT8_C( 113), INT8_C( 21), INT8_MIN, INT8_C( 112), -INT8_C( 101), INT8_C( 12), INT8_C( 42), -INT8_C( 73) }, { INT8_C( 107), -INT8_C( 14), -INT8_C( 43), INT8_C( 93), -INT8_C( 22), -INT8_C( 121), INT8_C( 91), INT8_C( 92), INT8_C( 113), INT8_C( 21), INT8_MIN, INT8_C( 112), -INT8_C( 101), INT8_C( 12), INT8_C( 42), -INT8_C( 73) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_test_x86_assert_equal_i8x16(simde_mm_load_si128(SIMDE_ALIGN_CAST(simde__m128i const *, test_vec[i].a)), simde_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i8x16(); simde__m128i r = a; simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_loadu_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { SIMDE_ALIGN_LIKE_16(simde__m128i) const int16_t a[8]; const int16_t r[8]; } test_vec[] = { { { -INT16_C( 32738), -INT16_C( 17548), -INT16_C( 20121), -INT16_C( 26193), INT16_C( 15712), -INT16_C( 28559), INT16_C( 9968), INT16_C( 23661) }, { -INT16_C( 32738), -INT16_C( 17548), -INT16_C( 20121), -INT16_C( 26193), INT16_C( 15712), -INT16_C( 28559), INT16_C( 9968), INT16_C( 23661) } }, { { INT16_C( 16920), INT16_C( 953), INT16_C( 5578), INT16_C( 15199), -INT16_C( 8406), -INT16_C( 14933), -INT16_C( 10773), INT16_C( 2428) }, { INT16_C( 16920), INT16_C( 953), INT16_C( 5578), INT16_C( 15199), -INT16_C( 8406), -INT16_C( 14933), -INT16_C( 10773), INT16_C( 2428) } }, { { -INT16_C( 3755), -INT16_C( 17212), INT16_C( 29602), INT16_C( 853), -INT16_C( 14672), -INT16_C( 24173), INT16_C( 492), INT16_C( 1533) }, { -INT16_C( 3755), -INT16_C( 17212), INT16_C( 29602), INT16_C( 853), -INT16_C( 14672), -INT16_C( 24173), INT16_C( 492), INT16_C( 1533) } }, { { -INT16_C( 18877), INT16_C( 3336), INT16_C( 26571), -INT16_C( 2744), -INT16_C( 3258), INT16_C( 12731), INT16_C( 14280), INT16_C( 7482) }, { -INT16_C( 18877), INT16_C( 3336), INT16_C( 26571), -INT16_C( 2744), -INT16_C( 3258), INT16_C( 12731), INT16_C( 14280), INT16_C( 7482) } }, { { -INT16_C( 472), -INT16_C( 13351), INT16_C( 12145), INT16_C( 8654), INT16_C( 25077), -INT16_C( 7486), -INT16_C( 16542), -INT16_C( 22809) }, { -INT16_C( 472), -INT16_C( 13351), INT16_C( 12145), INT16_C( 8654), INT16_C( 25077), -INT16_C( 7486), -INT16_C( 16542), -INT16_C( 22809) } }, { { -INT16_C( 4234), INT16_C( 16819), -INT16_C( 938), -INT16_C( 25545), -INT16_C( 3345), -INT16_C( 18227), INT16_C( 1833), INT16_C( 21205) }, { -INT16_C( 4234), INT16_C( 16819), -INT16_C( 938), -INT16_C( 25545), -INT16_C( 3345), -INT16_C( 18227), INT16_C( 1833), INT16_C( 21205) } }, { { -INT16_C( 20731), INT16_C( 30237), -INT16_C( 5154), -INT16_C( 11369), INT16_C( 23116), -INT16_C( 20555), -INT16_C( 25575), -INT16_C( 28843) }, { -INT16_C( 20731), INT16_C( 30237), -INT16_C( 5154), -INT16_C( 11369), INT16_C( 23116), -INT16_C( 20555), -INT16_C( 25575), -INT16_C( 28843) } }, { { INT16_C( 2187), -INT16_C( 7727), INT16_C( 2052), -INT16_C( 2947), INT16_C( 19194), INT16_C( 9132), -INT16_C( 32431), INT16_C( 22133) }, { INT16_C( 2187), -INT16_C( 7727), INT16_C( 2052), -INT16_C( 2947), INT16_C( 19194), INT16_C( 9132), -INT16_C( 32431), INT16_C( 22133) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_test_x86_assert_equal_i16x8(simde_mm_load_si128(SIMDE_ALIGN_CAST(simde__m128i const *, test_vec[i].a)), simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i r = a; simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_loadu_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { SIMDE_ALIGN_LIKE_16(simde__m128i) const int32_t a[4]; const int32_t r[4]; } test_vec[] = { { { INT32_C( 248287792), -INT32_C( 891132803), -INT32_C( 679897154), -INT32_C( 1083716044) }, { INT32_C( 248287792), -INT32_C( 891132803), -INT32_C( 679897154), -INT32_C( 1083716044) } }, { { -INT32_C( 610191146), INT32_C( 986652224), -INT32_C( 1168278679), INT32_C( 756143100) }, { -INT32_C( 610191146), INT32_C( 986652224), -INT32_C( 1168278679), INT32_C( 756143100) } }, { { -INT32_C( 482615963), -INT32_C( 5431999), -INT32_C( 371775819), -INT32_C( 894943500) }, { -INT32_C( 482615963), -INT32_C( 5431999), -INT32_C( 371775819), -INT32_C( 894943500) } }, { { -INT32_C( 1230681738), -INT32_C( 772770712), -INT32_C( 326414865), -INT32_C( 2045141984) }, { -INT32_C( 1230681738), -INT32_C( 772770712), -INT32_C( 326414865), -INT32_C( 2045141984) } }, { { -INT32_C( 1150724998), INT32_C( 666572402), INT32_C( 806392380), INT32_C( 1190836432) }, { -INT32_C( 1150724998), INT32_C( 666572402), INT32_C( 806392380), INT32_C( 1190836432) } }, { { INT32_C( 1794940930), INT32_C( 71035924), INT32_C( 1525728825), -INT32_C( 572520093) }, { INT32_C( 1794940930), INT32_C( 71035924), INT32_C( 1525728825), -INT32_C( 572520093) } }, { { -INT32_C( 812103331), -INT32_C( 1678355617), -INT32_C( 1244985627), -INT32_C( 1040464449) }, { -INT32_C( 812103331), -INT32_C( 1678355617), -INT32_C( 1244985627), -INT32_C( 1040464449) } }, { { INT32_C( 2049701733), INT32_C( 494823139), -INT32_C( 1887998420), -INT32_C( 731097225) }, { INT32_C( 2049701733), INT32_C( 494823139), -INT32_C( 1887998420), -INT32_C( 731097225) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_test_x86_assert_equal_i32x4(simde_mm_load_si128(SIMDE_ALIGN_CAST(simde__m128i const *, test_vec[i].a)), simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i32x4(); simde__m128i r = a; simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_loadu_epi64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { SIMDE_ALIGN_LIKE_16(simde__m128i) const int64_t a[2]; const int64_t r[2]; } test_vec[] = { { { INT64_C( 4511087683801712032), -INT64_C( 8060898892722248287) }, { INT64_C( 4511087683801712032), -INT64_C( 8060898892722248287) } }, { { -INT64_C( 2384787176194159386), INT64_C( 6143431839469952758) }, { -INT64_C( 2384787176194159386), INT64_C( 6143431839469952758) } }, { { INT64_C( 2411376884971791839), INT64_C( 4543466100033153363) }, { INT64_C( 2411376884971791839), INT64_C( 4543466100033153363) } }, { { -INT64_C( 7674432146617329682), -INT64_C( 6460338043923272626) }, { -INT64_C( 7674432146617329682), -INT64_C( 6460338043923272626) } }, { { -INT64_C( 1312143318173438935), -INT64_C( 2642072646704280642) }, { -INT64_C( 1312143318173438935), -INT64_C( 2642072646704280642) } }, { { -INT64_C( 7150315094646497649), INT64_C( 3770910417545578470) }, { -INT64_C( 7150315094646497649), INT64_C( 3770910417545578470) } }, { { INT64_C( 4983981236450898595), -INT64_C( 7152365960020912652) }, { INT64_C( 4983981236450898595), -INT64_C( 7152365960020912652) } }, { { INT64_C( 1871967141139003407), INT64_C( 3861302942246541911) }, { INT64_C( 1871967141139003407), INT64_C( 3861302942246541911) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_test_x86_assert_equal_i64x2(simde_mm_load_si128(SIMDE_ALIGN_CAST(simde__m128i const *, test_vec[i].a)), simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i64x2(); simde__m128i r = a; simde_test_x86_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_loadu_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { SIMDE_ALIGN_LIKE_32(simde__m256i) const int8_t a[32]; const int8_t r[32]; } test_vec[] = { { { INT8_C( 29), -INT8_C( 94), INT8_C( 76), INT8_C( 20), INT8_C( 54), -INT8_C( 63), INT8_MAX, INT8_C( 69), -INT8_C( 2), INT8_C( 122), INT8_C( 91), INT8_C( 14), INT8_C( 107), INT8_C( 122), -INT8_C( 75), INT8_C( 113), INT8_C( 74), -INT8_C( 31), -INT8_C( 17), -INT8_C( 25), INT8_C( 39), INT8_C( 64), -INT8_C( 71), INT8_C( 123), INT8_MAX, -INT8_C( 97), -INT8_C( 125), -INT8_C( 43), -INT8_C( 70), -INT8_C( 126), INT8_C( 107), -INT8_C( 41) }, { INT8_C( 29), -INT8_C( 94), INT8_C( 76), INT8_C( 20), INT8_C( 54), -INT8_C( 63), INT8_MAX, INT8_C( 69), -INT8_C( 2), INT8_C( 122), INT8_C( 91), INT8_C( 14), INT8_C( 107), INT8_C( 122), -INT8_C( 75), INT8_C( 113), INT8_C( 74), -INT8_C( 31), -INT8_C( 17), -INT8_C( 25), INT8_C( 39), INT8_C( 64), -INT8_C( 71), INT8_C( 123), INT8_MAX, -INT8_C( 97), -INT8_C( 125), -INT8_C( 43), -INT8_C( 70), -INT8_C( 126), INT8_C( 107), -INT8_C( 41) } }, { { INT8_C( 37), -INT8_C( 73), -INT8_C( 21), INT8_C( 91), INT8_C( 121), INT8_C( 107), -INT8_C( 96), INT8_C( 119), -INT8_C( 27), -INT8_C( 5), -INT8_C( 123), INT8_C( 80), INT8_C( 118), INT8_C( 59), -INT8_C( 63), -INT8_C( 64), INT8_C( 28), -INT8_C( 80), -INT8_C( 89), INT8_C( 67), -INT8_C( 16), INT8_C( 96), -INT8_C( 65), INT8_C( 112), -INT8_C( 1), INT8_C( 66), INT8_C( 69), -INT8_C( 71), -INT8_C( 59), -INT8_C( 80), -INT8_C( 111), -INT8_C( 22) }, { INT8_C( 37), -INT8_C( 73), -INT8_C( 21), INT8_C( 91), INT8_C( 121), INT8_C( 107), -INT8_C( 96), INT8_C( 119), -INT8_C( 27), -INT8_C( 5), -INT8_C( 123), INT8_C( 80), INT8_C( 118), INT8_C( 59), -INT8_C( 63), -INT8_C( 64), INT8_C( 28), -INT8_C( 80), -INT8_C( 89), INT8_C( 67), -INT8_C( 16), INT8_C( 96), -INT8_C( 65), INT8_C( 112), -INT8_C( 1), INT8_C( 66), INT8_C( 69), -INT8_C( 71), -INT8_C( 59), -INT8_C( 80), -INT8_C( 111), -INT8_C( 22) } }, { { INT8_C( 103), INT8_C( 124), INT8_C( 69), -INT8_C( 32), -INT8_C( 25), -INT8_C( 27), INT8_C( 87), -INT8_C( 52), -INT8_C( 31), -INT8_C( 35), INT8_C( 28), INT8_C( 87), INT8_C( 24), -INT8_C( 34), INT8_C( 23), INT8_C( 52), -INT8_C( 114), -INT8_C( 66), INT8_C( 120), INT8_MAX, INT8_C( 30), INT8_C( 55), -INT8_C( 17), INT8_C( 29), INT8_C( 121), INT8_C( 52), -INT8_C( 42), INT8_C( 62), -INT8_C( 28), INT8_C( 103), INT8_C( 40), INT8_C( 75) }, { INT8_C( 103), INT8_C( 124), INT8_C( 69), -INT8_C( 32), -INT8_C( 25), -INT8_C( 27), INT8_C( 87), -INT8_C( 52), -INT8_C( 31), -INT8_C( 35), INT8_C( 28), INT8_C( 87), INT8_C( 24), -INT8_C( 34), INT8_C( 23), INT8_C( 52), -INT8_C( 114), -INT8_C( 66), INT8_C( 120), INT8_MAX, INT8_C( 30), INT8_C( 55), -INT8_C( 17), INT8_C( 29), INT8_C( 121), INT8_C( 52), -INT8_C( 42), INT8_C( 62), -INT8_C( 28), INT8_C( 103), INT8_C( 40), INT8_C( 75) } }, { { -INT8_C( 28), INT8_C( 109), INT8_C( 44), -INT8_C( 53), INT8_C( 83), -INT8_C( 125), -INT8_C( 104), INT8_C( 52), INT8_C( 96), -INT8_C( 76), -INT8_C( 117), INT8_C( 120), -INT8_C( 110), -INT8_C( 94), -INT8_C( 83), INT8_C( 33), INT8_C( 96), INT8_C( 37), -INT8_C( 96), INT8_C( 126), INT8_C( 92), -INT8_C( 113), -INT8_C( 101), -INT8_C( 43), -INT8_C( 61), INT8_C( 113), INT8_C( 20), -INT8_C( 89), -INT8_C( 39), INT8_C( 60), -INT8_C( 14), -INT8_C( 67) }, { -INT8_C( 28), INT8_C( 109), INT8_C( 44), -INT8_C( 53), INT8_C( 83), -INT8_C( 125), -INT8_C( 104), INT8_C( 52), INT8_C( 96), -INT8_C( 76), -INT8_C( 117), INT8_C( 120), -INT8_C( 110), -INT8_C( 94), -INT8_C( 83), INT8_C( 33), INT8_C( 96), INT8_C( 37), -INT8_C( 96), INT8_C( 126), INT8_C( 92), -INT8_C( 113), -INT8_C( 101), -INT8_C( 43), -INT8_C( 61), INT8_C( 113), INT8_C( 20), -INT8_C( 89), -INT8_C( 39), INT8_C( 60), -INT8_C( 14), -INT8_C( 67) } }, { { -INT8_C( 86), INT8_C( 30), -INT8_C( 120), -INT8_C( 3), -INT8_C( 94), INT8_C( 32), INT8_C( 49), INT8_C( 2), -INT8_C( 43), -INT8_C( 68), INT8_C( 123), INT8_C( 103), INT8_C( 94), INT8_C( 40), -INT8_C( 120), -INT8_C( 66), INT8_C( 77), INT8_C( 40), INT8_C( 60), -INT8_C( 87), -INT8_C( 73), -INT8_C( 41), INT8_C( 126), INT8_C( 122), INT8_C( 72), -INT8_C( 110), INT8_C( 33), INT8_C( 33), -INT8_C( 49), INT8_C( 20), -INT8_C( 34), INT8_C( 121) }, { -INT8_C( 86), INT8_C( 30), -INT8_C( 120), -INT8_C( 3), -INT8_C( 94), INT8_C( 32), INT8_C( 49), INT8_C( 2), -INT8_C( 43), -INT8_C( 68), INT8_C( 123), INT8_C( 103), INT8_C( 94), INT8_C( 40), -INT8_C( 120), -INT8_C( 66), INT8_C( 77), INT8_C( 40), INT8_C( 60), -INT8_C( 87), -INT8_C( 73), -INT8_C( 41), INT8_C( 126), INT8_C( 122), INT8_C( 72), -INT8_C( 110), INT8_C( 33), INT8_C( 33), -INT8_C( 49), INT8_C( 20), -INT8_C( 34), INT8_C( 121) } }, { { INT8_C( 50), INT8_C( 103), INT8_C( 118), -INT8_C( 44), -INT8_C( 121), -INT8_C( 89), -INT8_C( 41), INT8_C( 92), INT8_C( 99), INT8_C( 82), -INT8_C( 60), -INT8_C( 63), INT8_C( 122), INT8_C( 76), INT8_MAX, -INT8_C( 57), INT8_C( 117), -INT8_C( 69), INT8_C( 112), INT8_C( 44), -INT8_C( 110), -INT8_C( 18), -INT8_C( 89), -INT8_C( 38), -INT8_C( 127), -INT8_C( 56), -INT8_C( 4), INT8_C( 80), -INT8_C( 36), -INT8_C( 38), -INT8_C( 55), INT8_C( 15) }, { INT8_C( 50), INT8_C( 103), INT8_C( 118), -INT8_C( 44), -INT8_C( 121), -INT8_C( 89), -INT8_C( 41), INT8_C( 92), INT8_C( 99), INT8_C( 82), -INT8_C( 60), -INT8_C( 63), INT8_C( 122), INT8_C( 76), INT8_MAX, -INT8_C( 57), INT8_C( 117), -INT8_C( 69), INT8_C( 112), INT8_C( 44), -INT8_C( 110), -INT8_C( 18), -INT8_C( 89), -INT8_C( 38), -INT8_C( 127), -INT8_C( 56), -INT8_C( 4), INT8_C( 80), -INT8_C( 36), -INT8_C( 38), -INT8_C( 55), INT8_C( 15) } }, { { INT8_C( 65), INT8_C( 63), -INT8_C( 29), -INT8_C( 55), -INT8_C( 26), -INT8_C( 70), INT8_C( 37), INT8_C( 73), INT8_C( 12), -INT8_C( 23), INT8_C( 10), -INT8_C( 122), INT8_C( 54), -INT8_C( 119), INT8_C( 77), -INT8_C( 85), INT8_C( 68), -INT8_C( 67), -INT8_C( 41), -INT8_C( 42), -INT8_C( 84), INT8_C( 126), -INT8_C( 80), INT8_C( 45), INT8_C( 71), -INT8_C( 84), INT8_C( 125), INT8_C( 35), -INT8_C( 121), INT8_C( 70), INT8_C( 50), -INT8_C( 56) }, { INT8_C( 65), INT8_C( 63), -INT8_C( 29), -INT8_C( 55), -INT8_C( 26), -INT8_C( 70), INT8_C( 37), INT8_C( 73), INT8_C( 12), -INT8_C( 23), INT8_C( 10), -INT8_C( 122), INT8_C( 54), -INT8_C( 119), INT8_C( 77), -INT8_C( 85), INT8_C( 68), -INT8_C( 67), -INT8_C( 41), -INT8_C( 42), -INT8_C( 84), INT8_C( 126), -INT8_C( 80), INT8_C( 45), INT8_C( 71), -INT8_C( 84), INT8_C( 125), INT8_C( 35), -INT8_C( 121), INT8_C( 70), INT8_C( 50), -INT8_C( 56) } }, { { -INT8_C( 123), INT8_C( 22), -INT8_C( 111), INT8_C( 107), -INT8_C( 48), -INT8_C( 73), -INT8_C( 76), -INT8_C( 35), -INT8_C( 96), -INT8_C( 66), INT8_C( 99), -INT8_C( 42), INT8_C( 71), -INT8_C( 79), -INT8_C( 127), -INT8_C( 117), INT8_C( 110), INT8_C( 89), INT8_C( 97), INT8_C( 26), -INT8_C( 41), INT8_C( 17), INT8_C( 71), INT8_C( 30), -INT8_C( 66), -INT8_C( 60), INT8_C( 66), INT8_C( 69), INT8_C( 10), INT8_C( 116), INT8_C( 13), -INT8_C( 113) }, { -INT8_C( 123), INT8_C( 22), -INT8_C( 111), INT8_C( 107), -INT8_C( 48), -INT8_C( 73), -INT8_C( 76), -INT8_C( 35), -INT8_C( 96), -INT8_C( 66), INT8_C( 99), -INT8_C( 42), INT8_C( 71), -INT8_C( 79), -INT8_C( 127), -INT8_C( 117), INT8_C( 110), INT8_C( 89), INT8_C( 97), INT8_C( 26), -INT8_C( 41), INT8_C( 17), INT8_C( 71), INT8_C( 30), -INT8_C( 66), -INT8_C( 60), INT8_C( 66), INT8_C( 69), INT8_C( 10), INT8_C( 116), INT8_C( 13), -INT8_C( 113) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_test_x86_assert_equal_i8x32(simde_mm256_load_si256(SIMDE_ALIGN_CAST(simde__m256i const *, test_vec[i].a)), simde_mm256_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i a = simde_test_x86_random_i8x32(); simde__m256i r = a; simde_test_x86_write_i8x32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_loadu_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { SIMDE_ALIGN_LIKE_32(simde__m256i) const int16_t a[16]; const int16_t r[16]; } test_vec[] = { { { -INT16_C( 24694), INT16_C( 23546), -INT16_C( 20906), -INT16_C( 2504), -INT16_C( 25748), -INT16_C( 19507), INT16_C( 20044), -INT16_C( 17602), -INT16_C( 24665), INT16_C( 32725), INT16_C( 7601), INT16_C( 28573), -INT16_C( 8223), -INT16_C( 4940), -INT16_C( 16044), -INT16_C( 8581) }, { -INT16_C( 24694), INT16_C( 23546), -INT16_C( 20906), -INT16_C( 2504), -INT16_C( 25748), -INT16_C( 19507), INT16_C( 20044), -INT16_C( 17602), -INT16_C( 24665), INT16_C( 32725), INT16_C( 7601), INT16_C( 28573), -INT16_C( 8223), -INT16_C( 4940), -INT16_C( 16044), -INT16_C( 8581) } }, { { INT16_C( 30304), -INT16_C( 18887), INT16_C( 28964), -INT16_C( 28243), INT16_C( 31245), INT16_C( 22852), -INT16_C( 31800), INT16_C( 28692), -INT16_C( 5598), -INT16_C( 11281), -INT16_C( 29689), -INT16_C( 6078), -INT16_C( 2452), -INT16_C( 16172), INT16_C( 20664), INT16_C( 6302) }, { INT16_C( 30304), -INT16_C( 18887), INT16_C( 28964), -INT16_C( 28243), INT16_C( 31245), INT16_C( 22852), -INT16_C( 31800), INT16_C( 28692), -INT16_C( 5598), -INT16_C( 11281), -INT16_C( 29689), -INT16_C( 6078), -INT16_C( 2452), -INT16_C( 16172), INT16_C( 20664), INT16_C( 6302) } }, { { -INT16_C( 10042), -INT16_C( 5425), INT16_C( 31817), INT16_C( 22139), -INT16_C( 16138), -INT16_C( 16720), -INT16_C( 15293), INT16_C( 25902), INT16_C( 7598), -INT16_C( 19143), INT16_C( 31658), INT16_C( 5790), INT16_C( 29298), INT16_C( 10966), INT16_C( 29890), -INT16_C( 30654) }, { -INT16_C( 10042), -INT16_C( 5425), INT16_C( 31817), INT16_C( 22139), -INT16_C( 16138), -INT16_C( 16720), -INT16_C( 15293), INT16_C( 25902), INT16_C( 7598), -INT16_C( 19143), INT16_C( 31658), INT16_C( 5790), INT16_C( 29298), INT16_C( 10966), INT16_C( 29890), -INT16_C( 30654) } }, { { INT16_C( 4428), -INT16_C( 27021), -INT16_C( 4467), -INT16_C( 31764), -INT16_C( 25426), -INT16_C( 3774), INT16_C( 28769), INT16_C( 3927), -INT16_C( 28530), INT16_C( 14533), INT16_C( 25355), INT16_C( 32078), INT16_C( 9429), -INT16_C( 26457), -INT16_C( 5480), -INT16_C( 6880) }, { INT16_C( 4428), -INT16_C( 27021), -INT16_C( 4467), -INT16_C( 31764), -INT16_C( 25426), -INT16_C( 3774), INT16_C( 28769), INT16_C( 3927), -INT16_C( 28530), INT16_C( 14533), INT16_C( 25355), INT16_C( 32078), INT16_C( 9429), -INT16_C( 26457), -INT16_C( 5480), -INT16_C( 6880) } }, { { -INT16_C( 27653), -INT16_C( 30341), INT16_C( 26498), INT16_C( 12300), INT16_C( 19972), INT16_C( 25890), INT16_C( 31167), INT16_C( 19828), INT16_C( 14601), INT16_C( 5253), -INT16_C( 11364), INT16_C( 29330), INT16_C( 14839), -INT16_C( 28918), INT16_C( 10787), INT16_C( 8052) }, { -INT16_C( 27653), -INT16_C( 30341), INT16_C( 26498), INT16_C( 12300), INT16_C( 19972), INT16_C( 25890), INT16_C( 31167), INT16_C( 19828), INT16_C( 14601), INT16_C( 5253), -INT16_C( 11364), INT16_C( 29330), INT16_C( 14839), -INT16_C( 28918), INT16_C( 10787), INT16_C( 8052) } }, { { -INT16_C( 4162), INT16_C( 16552), -INT16_C( 19369), INT16_C( 23408), -INT16_C( 28157), -INT16_C( 15680), INT16_C( 13323), INT16_C( 5135), -INT16_C( 27538), INT16_C( 2601), -INT16_C( 17561), INT16_C( 24188), -INT16_C( 30988), INT16_C( 6381), INT16_C( 25265), INT16_C( 28471) }, { -INT16_C( 4162), INT16_C( 16552), -INT16_C( 19369), INT16_C( 23408), -INT16_C( 28157), -INT16_C( 15680), INT16_C( 13323), INT16_C( 5135), -INT16_C( 27538), INT16_C( 2601), -INT16_C( 17561), INT16_C( 24188), -INT16_C( 30988), INT16_C( 6381), INT16_C( 25265), INT16_C( 28471) } }, { { -INT16_C( 8367), -INT16_C( 22353), INT16_C( 8083), -INT16_C( 27133), -INT16_C( 15438), -INT16_C( 17064), INT16_C( 26616), INT16_C( 26322), -INT16_C( 1029), INT16_C( 25200), -INT16_C( 4682), -INT16_C( 21824), -INT16_C( 20877), INT16_C( 9410), -INT16_C( 1776), INT16_C( 24979) }, { -INT16_C( 8367), -INT16_C( 22353), INT16_C( 8083), -INT16_C( 27133), -INT16_C( 15438), -INT16_C( 17064), INT16_C( 26616), INT16_C( 26322), -INT16_C( 1029), INT16_C( 25200), -INT16_C( 4682), -INT16_C( 21824), -INT16_C( 20877), INT16_C( 9410), -INT16_C( 1776), INT16_C( 24979) } }, { { INT16_C( 17112), INT16_C( 27658), INT16_C( 3426), INT16_C( 5122), INT16_C( 23505), -INT16_C( 13871), -INT16_C( 23614), -INT16_C( 16849), -INT16_C( 24674), INT16_C( 21536), -INT16_C( 7796), INT16_C( 255), -INT16_C( 15985), -INT16_C( 24796), -INT16_C( 18245), -INT16_C( 27904) }, { INT16_C( 17112), INT16_C( 27658), INT16_C( 3426), INT16_C( 5122), INT16_C( 23505), -INT16_C( 13871), -INT16_C( 23614), -INT16_C( 16849), -INT16_C( 24674), INT16_C( 21536), -INT16_C( 7796), INT16_C( 255), -INT16_C( 15985), -INT16_C( 24796), -INT16_C( 18245), -INT16_C( 27904) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_test_x86_assert_equal_i16x16(simde_mm256_load_si256(SIMDE_ALIGN_CAST(simde__m256i const *, test_vec[i].a)), simde_mm256_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i a = simde_test_x86_random_i16x16(); simde__m256i r = a; simde_test_x86_write_i16x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_loadu_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { SIMDE_ALIGN_LIKE_32(simde__m256i) const int32_t a[8]; const int32_t r[8]; } test_vec[] = { { { INT32_C( 1560218362), -INT32_C( 378535400), INT32_C( 531776093), -INT32_C( 2065833499), INT32_C( 232324736), INT32_C( 1846400991), INT32_C( 1410150809), -INT32_C( 454619671) }, { INT32_C( 1560218362), -INT32_C( 378535400), INT32_C( 531776093), -INT32_C( 2065833499), INT32_C( 232324736), INT32_C( 1846400991), INT32_C( 1410150809), -INT32_C( 454619671) } }, { { INT32_C( 809559832), INT32_C( 1176089065), -INT32_C( 664417293), INT32_C( 744244140), INT32_C( 540620097), -INT32_C( 1517402612), INT32_C( 1643748216), -INT32_C( 1069162072) }, { INT32_C( 809559832), INT32_C( 1176089065), -INT32_C( 664417293), INT32_C( 744244140), INT32_C( 540620097), -INT32_C( 1517402612), INT32_C( 1643748216), -INT32_C( 1069162072) } }, { { -INT32_C( 1309636920), INT32_C( 720832823), -INT32_C( 2147328812), -INT32_C( 525508705), -INT32_C( 1610553708), -INT32_C( 1522102739), -INT32_C( 771342551), -INT32_C( 393065440) }, { -INT32_C( 1309636920), INT32_C( 720832823), -INT32_C( 2147328812), -INT32_C( 525508705), -INT32_C( 1610553708), -INT32_C( 1522102739), -INT32_C( 771342551), -INT32_C( 393065440) } }, { { INT32_C( 161055698), INT32_C( 1630769292), -INT32_C( 1931397651), INT32_C( 678268564), -INT32_C( 1563857547), INT32_C( 625414140), INT32_C( 1878478158), INT32_C( 1800899225) }, { INT32_C( 161055698), INT32_C( 1630769292), -INT32_C( 1931397651), INT32_C( 678268564), -INT32_C( 1563857547), INT32_C( 625414140), INT32_C( 1878478158), INT32_C( 1800899225) } }, { { -INT32_C( 1720389363), INT32_C( 1861920641), INT32_C( 1912331485), -INT32_C( 543528854), -INT32_C( 780049451), -INT32_C( 1057503118), -INT32_C( 1355813354), -INT32_C( 2061793416) }, { -INT32_C( 1720389363), INT32_C( 1861920641), INT32_C( 1912331485), -INT32_C( 543528854), -INT32_C( 780049451), -INT32_C( 1057503118), -INT32_C( 1355813354), -INT32_C( 2061793416) } }, { { -INT32_C( 115372168), INT32_C( 342366519), INT32_C( 1619354613), -INT32_C( 1606475829), -INT32_C( 193805950), -INT32_C( 1615500919), -INT32_C( 800070569), -INT32_C( 480941461) }, { -INT32_C( 115372168), INT32_C( 342366519), INT32_C( 1619354613), -INT32_C( 1606475829), -INT32_C( 193805950), -INT32_C( 1615500919), -INT32_C( 800070569), -INT32_C( 480941461) } }, { { INT32_C( 819819769), -INT32_C( 2092677746), INT32_C( 1944308392), INT32_C( 1813193705), INT32_C( 1835042276), INT32_C( 1175262702), INT32_C( 1695964410), -INT32_C( 1085707322) }, { INT32_C( 819819769), -INT32_C( 2092677746), INT32_C( 1944308392), INT32_C( 1813193705), INT32_C( 1835042276), INT32_C( 1175262702), INT32_C( 1695964410), -INT32_C( 1085707322) } }, { { INT32_C( 1861232352), INT32_C( 334574699), -INT32_C( 393816578), -INT32_C( 598435336), INT32_C( 222934047), -INT32_C( 1001171254), INT32_C( 2015979954), -INT32_C( 1254591787) }, { INT32_C( 1861232352), INT32_C( 334574699), -INT32_C( 393816578), -INT32_C( 598435336), INT32_C( 222934047), -INT32_C( 1001171254), INT32_C( 2015979954), -INT32_C( 1254591787) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_test_x86_assert_equal_i32x8(simde_mm256_load_si256(SIMDE_ALIGN_CAST(simde__m256i const *, test_vec[i].a)), simde_mm256_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i a = simde_test_x86_random_i32x8(); simde__m256i r = a; simde_test_x86_write_i32x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_loadu_epi64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { SIMDE_ALIGN_LIKE_32(simde__m256i) const int64_t a[4]; const int64_t r[4]; } test_vec[] = { { { -INT64_C( 2730480485383468799), INT64_C( 3961809025040497319), -INT64_C( 7760876929369986550), INT64_C( 2630957754019333904) }, { -INT64_C( 2730480485383468799), INT64_C( 3961809025040497319), -INT64_C( 7760876929369986550), INT64_C( 2630957754019333904) } }, { { INT64_C( 2406427958756382740), INT64_C( 1484904791614610964), -INT64_C( 2360563711534695397), -INT64_C( 4781223386344087970) }, { INT64_C( 2406427958756382740), INT64_C( 1484904791614610964), -INT64_C( 2360563711534695397), -INT64_C( 4781223386344087970) } }, { { INT64_C( 5864722717878051783), INT64_C( 7398096197995911564), -INT64_C( 1370136013387598003), -INT64_C( 7183219597633509398) }, { INT64_C( 5864722717878051783), INT64_C( 7398096197995911564), -INT64_C( 1370136013387598003), -INT64_C( 7183219597633509398) } }, { { -INT64_C( 3205083785718752777), -INT64_C( 2680742885939594470), INT64_C( 1263244472435006221), INT64_C( 39394444786922777) }, { -INT64_C( 3205083785718752777), -INT64_C( 2680742885939594470), INT64_C( 1263244472435006221), INT64_C( 39394444786922777) } }, { { -INT64_C( 4203624633801086578), INT64_C( 3916713484056400884), INT64_C( 6704269143766553041), INT64_C( 3699662719747403598) }, { -INT64_C( 4203624633801086578), INT64_C( 3916713484056400884), INT64_C( 6704269143766553041), INT64_C( 3699662719747403598) } }, { { INT64_C( 4084959771584138049), -INT64_C( 6690919879014753339), -INT64_C( 7060055079283591580), INT64_C( 7578517111345336660) }, { INT64_C( 4084959771584138049), -INT64_C( 6690919879014753339), -INT64_C( 7060055079283591580), INT64_C( 7578517111345336660) } }, { { -INT64_C( 6100963634310728488), -INT64_C( 8766892043372261664), -INT64_C( 1539611403198992203), -INT64_C( 4821648871914235772) }, { -INT64_C( 6100963634310728488), -INT64_C( 8766892043372261664), -INT64_C( 1539611403198992203), -INT64_C( 4821648871914235772) } }, { { INT64_C( 5896672549719927620), -INT64_C( 2096610149610066370), -INT64_C( 3618209875233467063), INT64_C( 55047857207160097) }, { INT64_C( 5896672549719927620), -INT64_C( 2096610149610066370), -INT64_C( 3618209875233467063), INT64_C( 55047857207160097) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_test_x86_assert_equal_i64x4(simde_mm256_load_si256(SIMDE_ALIGN_CAST(simde__m256i const *, test_vec[i].a)), simde_mm256_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i a = simde_test_x86_random_i64x4(); simde__m256i r = a; simde_test_x86_write_i64x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_loadu_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { SIMDE_ALIGN_LIKE_64(simde__m512i) const int8_t a[64]; const int8_t r[64]; } test_vec[] = { { { INT8_C( 115), INT8_C( 0), -INT8_C( 90), -INT8_C( 57), INT8_C( 50), INT8_C( 15), -INT8_C( 121), INT8_C( 47), -INT8_C( 4), -INT8_C( 87), -INT8_C( 26), -INT8_C( 116), INT8_C( 32), -INT8_C( 59), INT8_C( 96), INT8_C( 45), INT8_C( 33), -INT8_C( 50), -INT8_C( 83), -INT8_C( 99), INT8_C( 12), INT8_C( 64), -INT8_C( 87), INT8_C( 109), -INT8_C( 66), -INT8_C( 55), INT8_C( 19), -INT8_C( 76), -INT8_C( 80), INT8_C( 43), INT8_C( 80), INT8_C( 36), INT8_C( 43), -INT8_C( 9), -INT8_C( 21), INT8_C( 94), INT8_C( 6), INT8_C( 114), -INT8_C( 115), INT8_C( 2), INT8_C( 27), INT8_C( 115), -INT8_C( 114), INT8_C( 59), INT8_C( 56), -INT8_C( 17), INT8_C( 104), INT8_C( 89), -INT8_C( 67), INT8_C( 21), -INT8_C( 10), -INT8_C( 55), INT8_C( 85), -INT8_C( 96), INT8_C( 55), INT8_C( 20), INT8_C( 105), INT8_C( 74), -INT8_C( 56), INT8_C( 26), INT8_C( 117), INT8_C( 24), INT8_C( 62), -INT8_C( 95) }, { INT8_C( 115), INT8_C( 0), -INT8_C( 90), -INT8_C( 57), INT8_C( 50), INT8_C( 15), -INT8_C( 121), INT8_C( 47), -INT8_C( 4), -INT8_C( 87), -INT8_C( 26), -INT8_C( 116), INT8_C( 32), -INT8_C( 59), INT8_C( 96), INT8_C( 45), INT8_C( 33), -INT8_C( 50), -INT8_C( 83), -INT8_C( 99), INT8_C( 12), INT8_C( 64), -INT8_C( 87), INT8_C( 109), -INT8_C( 66), -INT8_C( 55), INT8_C( 19), -INT8_C( 76), -INT8_C( 80), INT8_C( 43), INT8_C( 80), INT8_C( 36), INT8_C( 43), -INT8_C( 9), -INT8_C( 21), INT8_C( 94), INT8_C( 6), INT8_C( 114), -INT8_C( 115), INT8_C( 2), INT8_C( 27), INT8_C( 115), -INT8_C( 114), INT8_C( 59), INT8_C( 56), -INT8_C( 17), INT8_C( 104), INT8_C( 89), -INT8_C( 67), INT8_C( 21), -INT8_C( 10), -INT8_C( 55), INT8_C( 85), -INT8_C( 96), INT8_C( 55), INT8_C( 20), INT8_C( 105), INT8_C( 74), -INT8_C( 56), INT8_C( 26), INT8_C( 117), INT8_C( 24), INT8_C( 62), -INT8_C( 95) } }, { { INT8_C( 15), INT8_C( 41), -INT8_C( 1), INT8_C( 22), -INT8_C( 101), -INT8_C( 116), INT8_C( 24), -INT8_C( 74), -INT8_C( 1), -INT8_C( 89), -INT8_C( 15), INT8_C( 55), -INT8_C( 106), INT8_C( 90), -INT8_C( 112), INT8_C( 83), INT8_C( 111), -INT8_C( 121), INT8_C( 29), -INT8_C( 59), INT8_C( 39), INT8_C( 84), -INT8_C( 39), -INT8_C( 112), -INT8_C( 98), -INT8_C( 95), -INT8_C( 86), INT8_C( 20), -INT8_C( 71), -INT8_C( 24), -INT8_C( 75), -INT8_C( 55), INT8_C( 17), -INT8_C( 76), -INT8_C( 33), -INT8_C( 84), INT8_C( 64), -INT8_C( 9), INT8_C( 98), INT8_C( 63), -INT8_C( 98), INT8_C( 84), INT8_C( 118), INT8_C( 52), -INT8_C( 82), INT8_C( 6), -INT8_C( 120), INT8_C( 29), -INT8_C( 115), -INT8_C( 91), -INT8_C( 30), -INT8_C( 76), -INT8_C( 7), -INT8_C( 69), INT8_C( 69), -INT8_C( 105), INT8_C( 92), -INT8_C( 17), -INT8_C( 85), INT8_C( 22), -INT8_C( 40), INT8_C( 96), -INT8_C( 33), -INT8_C( 23) }, { INT8_C( 15), INT8_C( 41), -INT8_C( 1), INT8_C( 22), -INT8_C( 101), -INT8_C( 116), INT8_C( 24), -INT8_C( 74), -INT8_C( 1), -INT8_C( 89), -INT8_C( 15), INT8_C( 55), -INT8_C( 106), INT8_C( 90), -INT8_C( 112), INT8_C( 83), INT8_C( 111), -INT8_C( 121), INT8_C( 29), -INT8_C( 59), INT8_C( 39), INT8_C( 84), -INT8_C( 39), -INT8_C( 112), -INT8_C( 98), -INT8_C( 95), -INT8_C( 86), INT8_C( 20), -INT8_C( 71), -INT8_C( 24), -INT8_C( 75), -INT8_C( 55), INT8_C( 17), -INT8_C( 76), -INT8_C( 33), -INT8_C( 84), INT8_C( 64), -INT8_C( 9), INT8_C( 98), INT8_C( 63), -INT8_C( 98), INT8_C( 84), INT8_C( 118), INT8_C( 52), -INT8_C( 82), INT8_C( 6), -INT8_C( 120), INT8_C( 29), -INT8_C( 115), -INT8_C( 91), -INT8_C( 30), -INT8_C( 76), -INT8_C( 7), -INT8_C( 69), INT8_C( 69), -INT8_C( 105), INT8_C( 92), -INT8_C( 17), -INT8_C( 85), INT8_C( 22), -INT8_C( 40), INT8_C( 96), -INT8_C( 33), -INT8_C( 23) } }, { { INT8_C( 20), -INT8_C( 66), -INT8_C( 106), INT8_C( 84), -INT8_C( 75), -INT8_C( 8), -INT8_C( 109), INT8_C( 84), INT8_C( 76), INT8_C( 9), -INT8_C( 120), -INT8_C( 6), INT8_C( 16), INT8_C( 16), INT8_C( 24), -INT8_C( 99), -INT8_C( 75), -INT8_C( 6), INT8_C( 82), -INT8_C( 82), -INT8_C( 74), -INT8_C( 105), INT8_C( 70), INT8_C( 18), -INT8_C( 122), -INT8_C( 15), INT8_C( 40), INT8_C( 94), INT8_C( 82), INT8_C( 7), INT8_C( 72), INT8_C( 102), -INT8_C( 59), -INT8_C( 34), -INT8_C( 69), INT8_C( 123), -INT8_C( 42), INT8_C( 78), -INT8_C( 49), INT8_C( 35), INT8_C( 88), INT8_C( 87), INT8_C( 29), INT8_C( 104), INT8_C( 104), INT8_C( 53), INT8_C( 5), INT8_C( 29), INT8_C( 48), INT8_C( 87), -INT8_C( 52), -INT8_C( 26), -INT8_C( 18), INT8_C( 18), -INT8_C( 8), INT8_C( 117), INT8_C( 3), INT8_C( 33), -INT8_C( 45), INT8_C( 85), INT8_C( 40), INT8_C( 27), -INT8_C( 68), -INT8_C( 18) }, { INT8_C( 20), -INT8_C( 66), -INT8_C( 106), INT8_C( 84), -INT8_C( 75), -INT8_C( 8), -INT8_C( 109), INT8_C( 84), INT8_C( 76), INT8_C( 9), -INT8_C( 120), -INT8_C( 6), INT8_C( 16), INT8_C( 16), INT8_C( 24), -INT8_C( 99), -INT8_C( 75), -INT8_C( 6), INT8_C( 82), -INT8_C( 82), -INT8_C( 74), -INT8_C( 105), INT8_C( 70), INT8_C( 18), -INT8_C( 122), -INT8_C( 15), INT8_C( 40), INT8_C( 94), INT8_C( 82), INT8_C( 7), INT8_C( 72), INT8_C( 102), -INT8_C( 59), -INT8_C( 34), -INT8_C( 69), INT8_C( 123), -INT8_C( 42), INT8_C( 78), -INT8_C( 49), INT8_C( 35), INT8_C( 88), INT8_C( 87), INT8_C( 29), INT8_C( 104), INT8_C( 104), INT8_C( 53), INT8_C( 5), INT8_C( 29), INT8_C( 48), INT8_C( 87), -INT8_C( 52), -INT8_C( 26), -INT8_C( 18), INT8_C( 18), -INT8_C( 8), INT8_C( 117), INT8_C( 3), INT8_C( 33), -INT8_C( 45), INT8_C( 85), INT8_C( 40), INT8_C( 27), -INT8_C( 68), -INT8_C( 18) } }, { { -INT8_C( 7), INT8_C( 119), INT8_C( 105), -INT8_C( 48), -INT8_C( 59), INT8_C( 56), -INT8_C( 13), INT8_C( 29), -INT8_C( 113), INT8_C( 16), -INT8_C( 123), -INT8_C( 9), INT8_C( 70), -INT8_C( 117), INT8_C( 21), INT8_C( 118), -INT8_C( 30), -INT8_C( 31), INT8_C( 92), -INT8_C( 47), -INT8_C( 13), INT8_C( 84), INT8_C( 70), -INT8_C( 10), INT8_C( 117), INT8_C( 25), INT8_C( 76), -INT8_C( 98), INT8_C( 53), INT8_C( 8), -INT8_C( 116), INT8_C( 46), INT8_MAX, -INT8_C( 11), -INT8_C( 2), INT8_C( 68), INT8_C( 45), -INT8_C( 15), INT8_C( 98), -INT8_C( 68), INT8_C( 2), -INT8_C( 25), -INT8_C( 76), INT8_C( 72), INT8_C( 114), -INT8_C( 55), -INT8_C( 66), INT8_C( 85), -INT8_C( 86), INT8_C( 26), INT8_C( 38), -INT8_C( 99), INT8_C( 110), INT8_C( 108), -INT8_C( 109), -INT8_C( 28), -INT8_C( 123), -INT8_C( 33), -INT8_C( 126), -INT8_C( 70), -INT8_C( 25), INT8_C( 14), -INT8_C( 23), INT8_C( 102) }, { -INT8_C( 7), INT8_C( 119), INT8_C( 105), -INT8_C( 48), -INT8_C( 59), INT8_C( 56), -INT8_C( 13), INT8_C( 29), -INT8_C( 113), INT8_C( 16), -INT8_C( 123), -INT8_C( 9), INT8_C( 70), -INT8_C( 117), INT8_C( 21), INT8_C( 118), -INT8_C( 30), -INT8_C( 31), INT8_C( 92), -INT8_C( 47), -INT8_C( 13), INT8_C( 84), INT8_C( 70), -INT8_C( 10), INT8_C( 117), INT8_C( 25), INT8_C( 76), -INT8_C( 98), INT8_C( 53), INT8_C( 8), -INT8_C( 116), INT8_C( 46), INT8_MAX, -INT8_C( 11), -INT8_C( 2), INT8_C( 68), INT8_C( 45), -INT8_C( 15), INT8_C( 98), -INT8_C( 68), INT8_C( 2), -INT8_C( 25), -INT8_C( 76), INT8_C( 72), INT8_C( 114), -INT8_C( 55), -INT8_C( 66), INT8_C( 85), -INT8_C( 86), INT8_C( 26), INT8_C( 38), -INT8_C( 99), INT8_C( 110), INT8_C( 108), -INT8_C( 109), -INT8_C( 28), -INT8_C( 123), -INT8_C( 33), -INT8_C( 126), -INT8_C( 70), -INT8_C( 25), INT8_C( 14), -INT8_C( 23), INT8_C( 102) } }, { { INT8_C( 3), -INT8_C( 25), -INT8_C( 85), INT8_C( 48), -INT8_C( 39), INT8_C( 13), -INT8_C( 20), -INT8_C( 37), -INT8_C( 12), -INT8_C( 96), INT8_C( 35), INT8_C( 103), INT8_C( 105), -INT8_C( 31), -INT8_C( 68), INT8_C( 19), -INT8_C( 5), -INT8_C( 30), -INT8_C( 80), INT8_C( 105), INT8_C( 78), INT8_C( 68), INT8_C( 77), -INT8_C( 45), INT8_C( 35), -INT8_C( 49), -INT8_C( 114), INT8_C( 11), -INT8_C( 35), INT8_C( 119), INT8_C( 113), -INT8_C( 32), INT8_C( 94), INT8_C( 28), INT8_C( 16), INT8_C( 55), INT8_C( 41), -INT8_C( 3), INT8_C( 18), INT8_C( 30), -INT8_C( 99), INT8_C( 53), -INT8_C( 123), INT8_C( 7), INT8_C( 22), INT8_C( 65), INT8_C( 26), INT8_C( 17), INT8_C( 35), -INT8_C( 53), INT8_C( 123), INT8_C( 113), INT8_C( 15), -INT8_C( 56), INT8_C( 68), INT8_C( 50), -INT8_C( 104), -INT8_C( 46), INT8_C( 61), INT8_C( 117), INT8_C( 73), -INT8_C( 81), INT8_C( 86), -INT8_C( 88) }, { INT8_C( 3), -INT8_C( 25), -INT8_C( 85), INT8_C( 48), -INT8_C( 39), INT8_C( 13), -INT8_C( 20), -INT8_C( 37), -INT8_C( 12), -INT8_C( 96), INT8_C( 35), INT8_C( 103), INT8_C( 105), -INT8_C( 31), -INT8_C( 68), INT8_C( 19), -INT8_C( 5), -INT8_C( 30), -INT8_C( 80), INT8_C( 105), INT8_C( 78), INT8_C( 68), INT8_C( 77), -INT8_C( 45), INT8_C( 35), -INT8_C( 49), -INT8_C( 114), INT8_C( 11), -INT8_C( 35), INT8_C( 119), INT8_C( 113), -INT8_C( 32), INT8_C( 94), INT8_C( 28), INT8_C( 16), INT8_C( 55), INT8_C( 41), -INT8_C( 3), INT8_C( 18), INT8_C( 30), -INT8_C( 99), INT8_C( 53), -INT8_C( 123), INT8_C( 7), INT8_C( 22), INT8_C( 65), INT8_C( 26), INT8_C( 17), INT8_C( 35), -INT8_C( 53), INT8_C( 123), INT8_C( 113), INT8_C( 15), -INT8_C( 56), INT8_C( 68), INT8_C( 50), -INT8_C( 104), -INT8_C( 46), INT8_C( 61), INT8_C( 117), INT8_C( 73), -INT8_C( 81), INT8_C( 86), -INT8_C( 88) } }, { { -INT8_C( 53), INT8_C( 102), -INT8_C( 33), -INT8_C( 11), INT8_C( 99), -INT8_C( 14), INT8_C( 19), INT8_C( 1), INT8_C( 39), -INT8_C( 104), INT8_C( 8), INT8_C( 62), -INT8_C( 39), INT8_C( 34), INT8_C( 79), -INT8_C( 4), -INT8_C( 19), -INT8_C( 54), INT8_C( 109), -INT8_C( 4), -INT8_C( 109), -INT8_C( 79), INT8_C( 47), INT8_C( 43), -INT8_C( 124), INT8_C( 108), -INT8_C( 96), -INT8_C( 51), INT8_C( 27), -INT8_C( 10), INT8_C( 117), -INT8_C( 25), INT8_C( 93), INT8_C( 85), -INT8_C( 36), -INT8_C( 64), INT8_C( 71), -INT8_C( 17), -INT8_C( 63), INT8_C( 110), -INT8_C( 121), -INT8_C( 55), -INT8_C( 84), INT8_C( 96), -INT8_C( 20), -INT8_C( 4), INT8_C( 92), -INT8_C( 39), -INT8_C( 58), -INT8_C( 55), -INT8_C( 42), INT8_C( 89), INT8_C( 122), INT8_C( 5), -INT8_C( 124), -INT8_C( 2), INT8_C( 113), INT8_C( 37), -INT8_C( 52), -INT8_C( 115), INT8_C( 27), INT8_C( 65), INT8_C( 116), INT8_C( 120) }, { -INT8_C( 53), INT8_C( 102), -INT8_C( 33), -INT8_C( 11), INT8_C( 99), -INT8_C( 14), INT8_C( 19), INT8_C( 1), INT8_C( 39), -INT8_C( 104), INT8_C( 8), INT8_C( 62), -INT8_C( 39), INT8_C( 34), INT8_C( 79), -INT8_C( 4), -INT8_C( 19), -INT8_C( 54), INT8_C( 109), -INT8_C( 4), -INT8_C( 109), -INT8_C( 79), INT8_C( 47), INT8_C( 43), -INT8_C( 124), INT8_C( 108), -INT8_C( 96), -INT8_C( 51), INT8_C( 27), -INT8_C( 10), INT8_C( 117), -INT8_C( 25), INT8_C( 93), INT8_C( 85), -INT8_C( 36), -INT8_C( 64), INT8_C( 71), -INT8_C( 17), -INT8_C( 63), INT8_C( 110), -INT8_C( 121), -INT8_C( 55), -INT8_C( 84), INT8_C( 96), -INT8_C( 20), -INT8_C( 4), INT8_C( 92), -INT8_C( 39), -INT8_C( 58), -INT8_C( 55), -INT8_C( 42), INT8_C( 89), INT8_C( 122), INT8_C( 5), -INT8_C( 124), -INT8_C( 2), INT8_C( 113), INT8_C( 37), -INT8_C( 52), -INT8_C( 115), INT8_C( 27), INT8_C( 65), INT8_C( 116), INT8_C( 120) } }, { { -INT8_C( 106), INT8_C( 80), INT8_C( 57), -INT8_C( 35), INT8_C( 63), -INT8_C( 6), INT8_C( 76), -INT8_C( 58), -INT8_C( 60), -INT8_C( 8), INT8_C( 38), -INT8_C( 80), -INT8_C( 12), -INT8_C( 126), -INT8_C( 119), -INT8_C( 69), INT8_C( 75), INT8_C( 95), INT8_C( 20), -INT8_C( 59), INT8_C( 100), -INT8_C( 103), -INT8_C( 60), -INT8_C( 42), -INT8_C( 66), -INT8_C( 112), INT8_C( 99), -INT8_C( 39), -INT8_C( 47), -INT8_C( 41), INT8_C( 82), INT8_C( 104), INT8_C( 39), -INT8_C( 117), INT8_C( 69), INT8_C( 102), -INT8_C( 123), -INT8_C( 111), INT8_C( 44), INT8_C( 73), -INT8_C( 118), INT8_C( 82), -INT8_C( 7), INT8_C( 126), -INT8_C( 44), -INT8_C( 125), INT8_C( 57), INT8_C( 31), -INT8_C( 30), INT8_C( 78), -INT8_C( 28), INT8_C( 71), -INT8_C( 25), -INT8_C( 88), INT8_C( 29), -INT8_C( 91), INT8_C( 56), INT8_MIN, INT8_C( 126), INT8_C( 10), INT8_C( 87), -INT8_C( 48), INT8_C( 114), INT8_C( 126) }, { -INT8_C( 106), INT8_C( 80), INT8_C( 57), -INT8_C( 35), INT8_C( 63), -INT8_C( 6), INT8_C( 76), -INT8_C( 58), -INT8_C( 60), -INT8_C( 8), INT8_C( 38), -INT8_C( 80), -INT8_C( 12), -INT8_C( 126), -INT8_C( 119), -INT8_C( 69), INT8_C( 75), INT8_C( 95), INT8_C( 20), -INT8_C( 59), INT8_C( 100), -INT8_C( 103), -INT8_C( 60), -INT8_C( 42), -INT8_C( 66), -INT8_C( 112), INT8_C( 99), -INT8_C( 39), -INT8_C( 47), -INT8_C( 41), INT8_C( 82), INT8_C( 104), INT8_C( 39), -INT8_C( 117), INT8_C( 69), INT8_C( 102), -INT8_C( 123), -INT8_C( 111), INT8_C( 44), INT8_C( 73), -INT8_C( 118), INT8_C( 82), -INT8_C( 7), INT8_C( 126), -INT8_C( 44), -INT8_C( 125), INT8_C( 57), INT8_C( 31), -INT8_C( 30), INT8_C( 78), -INT8_C( 28), INT8_C( 71), -INT8_C( 25), -INT8_C( 88), INT8_C( 29), -INT8_C( 91), INT8_C( 56), INT8_MIN, INT8_C( 126), INT8_C( 10), INT8_C( 87), -INT8_C( 48), INT8_C( 114), INT8_C( 126) } }, { { INT8_C( 91), -INT8_C( 73), -INT8_C( 28), -INT8_C( 31), INT8_C( 73), INT8_C( 16), INT8_C( 42), -INT8_C( 45), INT8_C( 98), INT8_C( 36), INT8_C( 81), INT8_C( 54), -INT8_C( 89), -INT8_C( 117), INT8_C( 85), -INT8_C( 119), -INT8_C( 39), INT8_C( 57), -INT8_C( 48), -INT8_C( 64), -INT8_C( 30), -INT8_C( 19), INT8_C( 101), INT8_C( 26), INT8_C( 109), -INT8_C( 29), INT8_C( 36), -INT8_C( 60), -INT8_C( 76), -INT8_C( 106), INT8_C( 66), INT8_C( 15), INT8_C( 78), INT8_C( 38), -INT8_C( 16), -INT8_C( 105), INT8_C( 54), INT8_C( 27), INT8_C( 106), -INT8_C( 104), INT8_C( 63), -INT8_C( 69), -INT8_C( 50), -INT8_C( 26), INT8_C( 70), INT8_C( 35), INT8_C( 111), INT8_C( 31), INT8_C( 93), INT8_C( 64), -INT8_C( 33), INT8_C( 63), INT8_C( 45), INT8_C( 68), INT8_C( 89), -INT8_C( 101), INT8_C( 40), INT8_C( 126), INT8_C( 95), -INT8_C( 36), INT8_C( 20), -INT8_C( 94), -INT8_C( 21), INT8_C( 98) }, { INT8_C( 91), -INT8_C( 73), -INT8_C( 28), -INT8_C( 31), INT8_C( 73), INT8_C( 16), INT8_C( 42), -INT8_C( 45), INT8_C( 98), INT8_C( 36), INT8_C( 81), INT8_C( 54), -INT8_C( 89), -INT8_C( 117), INT8_C( 85), -INT8_C( 119), -INT8_C( 39), INT8_C( 57), -INT8_C( 48), -INT8_C( 64), -INT8_C( 30), -INT8_C( 19), INT8_C( 101), INT8_C( 26), INT8_C( 109), -INT8_C( 29), INT8_C( 36), -INT8_C( 60), -INT8_C( 76), -INT8_C( 106), INT8_C( 66), INT8_C( 15), INT8_C( 78), INT8_C( 38), -INT8_C( 16), -INT8_C( 105), INT8_C( 54), INT8_C( 27), INT8_C( 106), -INT8_C( 104), INT8_C( 63), -INT8_C( 69), -INT8_C( 50), -INT8_C( 26), INT8_C( 70), INT8_C( 35), INT8_C( 111), INT8_C( 31), INT8_C( 93), INT8_C( 64), -INT8_C( 33), INT8_C( 63), INT8_C( 45), INT8_C( 68), INT8_C( 89), -INT8_C( 101), INT8_C( 40), INT8_C( 126), INT8_C( 95), -INT8_C( 36), INT8_C( 20), -INT8_C( 94), -INT8_C( 21), INT8_C( 98) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_test_x86_assert_equal_i8x64(simde_mm512_load_si512(test_vec[i].a), simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i a = simde_test_x86_random_i8x64(); simde__m512i r = a; simde_test_x86_write_i8x64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_loadu_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { SIMDE_ALIGN_LIKE_64(simde__m512i) const int16_t a[32]; const int16_t r[32]; } test_vec[] = { { { INT16_C( 1628), -INT16_C( 19656), -INT16_C( 13922), INT16_C( 835), -INT16_C( 2787), -INT16_C( 10395), INT16_C( 6399), INT16_C( 11765), INT16_C( 4017), INT16_C( 12521), INT16_C( 21092), -INT16_C( 30322), INT16_C( 5175), INT16_C( 12717), -INT16_C( 28003), -INT16_C( 1686), -INT16_C( 23911), INT16_C( 14252), -INT16_C( 4245), -INT16_C( 30662), -INT16_C( 24604), -INT16_C( 7329), INT16_C( 21687), INT16_C( 26897), -INT16_C( 1436), -INT16_C( 14183), INT16_C( 10060), -INT16_C( 31919), -INT16_C( 452), -INT16_C( 9803), INT16_C( 8081), INT16_C( 10962) }, { INT16_C( 1628), -INT16_C( 19656), -INT16_C( 13922), INT16_C( 835), -INT16_C( 2787), -INT16_C( 10395), INT16_C( 6399), INT16_C( 11765), INT16_C( 4017), INT16_C( 12521), INT16_C( 21092), -INT16_C( 30322), INT16_C( 5175), INT16_C( 12717), -INT16_C( 28003), -INT16_C( 1686), -INT16_C( 23911), INT16_C( 14252), -INT16_C( 4245), -INT16_C( 30662), -INT16_C( 24604), -INT16_C( 7329), INT16_C( 21687), INT16_C( 26897), -INT16_C( 1436), -INT16_C( 14183), INT16_C( 10060), -INT16_C( 31919), -INT16_C( 452), -INT16_C( 9803), INT16_C( 8081), INT16_C( 10962) } }, { { INT16_C( 32450), INT16_C( 11617), -INT16_C( 25746), INT16_C( 21174), INT16_C( 5434), -INT16_C( 3786), INT16_C( 18282), -INT16_C( 12710), -INT16_C( 3263), -INT16_C( 29034), -INT16_C( 6373), INT16_C( 22289), -INT16_C( 14618), INT16_C( 30512), INT16_C( 742), -INT16_C( 22367), INT16_C( 640), -INT16_C( 4395), -INT16_C( 29795), -INT16_C( 10431), INT16_C( 30625), INT16_C( 3016), INT16_C( 9150), -INT16_C( 39), INT16_C( 28438), INT16_C( 12685), -INT16_C( 24746), INT16_C( 15496), -INT16_C( 18331), INT16_C( 19379), INT16_C( 21690), INT16_C( 15347) }, { INT16_C( 32450), INT16_C( 11617), -INT16_C( 25746), INT16_C( 21174), INT16_C( 5434), -INT16_C( 3786), INT16_C( 18282), -INT16_C( 12710), -INT16_C( 3263), -INT16_C( 29034), -INT16_C( 6373), INT16_C( 22289), -INT16_C( 14618), INT16_C( 30512), INT16_C( 742), -INT16_C( 22367), INT16_C( 640), -INT16_C( 4395), -INT16_C( 29795), -INT16_C( 10431), INT16_C( 30625), INT16_C( 3016), INT16_C( 9150), -INT16_C( 39), INT16_C( 28438), INT16_C( 12685), -INT16_C( 24746), INT16_C( 15496), -INT16_C( 18331), INT16_C( 19379), INT16_C( 21690), INT16_C( 15347) } }, { { -INT16_C( 13994), -INT16_C( 3287), INT16_C( 27220), -INT16_C( 2614), -INT16_C( 27679), -INT16_C( 24832), -INT16_C( 9802), -INT16_C( 13153), INT16_C( 11336), -INT16_C( 24578), -INT16_C( 31029), INT16_C( 12763), -INT16_C( 28865), -INT16_C( 1668), INT16_C( 28899), INT16_C( 14900), INT16_C( 24121), -INT16_C( 29395), -INT16_C( 1848), -INT16_C( 21885), -INT16_C( 31861), INT16_C( 16713), -INT16_C( 6051), -INT16_C( 23283), INT16_C( 2837), -INT16_C( 8124), INT16_C( 8338), -INT16_C( 12015), -INT16_C( 29009), -INT16_C( 27958), -INT16_C( 2), INT16_C( 14284) }, { -INT16_C( 13994), -INT16_C( 3287), INT16_C( 27220), -INT16_C( 2614), -INT16_C( 27679), -INT16_C( 24832), -INT16_C( 9802), -INT16_C( 13153), INT16_C( 11336), -INT16_C( 24578), -INT16_C( 31029), INT16_C( 12763), -INT16_C( 28865), -INT16_C( 1668), INT16_C( 28899), INT16_C( 14900), INT16_C( 24121), -INT16_C( 29395), -INT16_C( 1848), -INT16_C( 21885), -INT16_C( 31861), INT16_C( 16713), -INT16_C( 6051), -INT16_C( 23283), INT16_C( 2837), -INT16_C( 8124), INT16_C( 8338), -INT16_C( 12015), -INT16_C( 29009), -INT16_C( 27958), -INT16_C( 2), INT16_C( 14284) } }, { { -INT16_C( 1443), INT16_C( 9668), INT16_C( 18418), INT16_C( 32207), INT16_C( 6603), INT16_C( 10430), -INT16_C( 13567), INT16_C( 5837), INT16_C( 4823), INT16_C( 27127), INT16_C( 2098), -INT16_C( 7878), INT16_C( 1174), -INT16_C( 27533), INT16_C( 16387), INT16_C( 24779), -INT16_C( 28614), INT16_C( 11398), INT16_C( 21975), -INT16_C( 23895), INT16_C( 26478), INT16_C( 28874), -INT16_C( 26574), INT16_C( 2438), INT16_C( 32170), -INT16_C( 9102), -INT16_C( 21370), INT16_C( 7357), INT16_C( 12465), -INT16_C( 19279), INT16_C( 31856), -INT16_C( 21995) }, { -INT16_C( 1443), INT16_C( 9668), INT16_C( 18418), INT16_C( 32207), INT16_C( 6603), INT16_C( 10430), -INT16_C( 13567), INT16_C( 5837), INT16_C( 4823), INT16_C( 27127), INT16_C( 2098), -INT16_C( 7878), INT16_C( 1174), -INT16_C( 27533), INT16_C( 16387), INT16_C( 24779), -INT16_C( 28614), INT16_C( 11398), INT16_C( 21975), -INT16_C( 23895), INT16_C( 26478), INT16_C( 28874), -INT16_C( 26574), INT16_C( 2438), INT16_C( 32170), -INT16_C( 9102), -INT16_C( 21370), INT16_C( 7357), INT16_C( 12465), -INT16_C( 19279), INT16_C( 31856), -INT16_C( 21995) } }, { { -INT16_C( 25844), -INT16_C( 6954), INT16_C( 32752), INT16_C( 24454), INT16_C( 20966), INT16_C( 6607), INT16_C( 21993), -INT16_C( 27870), -INT16_C( 27181), INT16_C( 22895), INT16_C( 11329), -INT16_C( 3467), INT16_C( 9820), -INT16_C( 12889), -INT16_C( 17245), -INT16_C( 20617), INT16_C( 20055), INT16_C( 18323), INT16_C( 6861), -INT16_C( 19290), INT16_C( 30059), INT16_C( 21709), -INT16_C( 4149), -INT16_C( 24857), INT16_C( 22148), -INT16_C( 14601), INT16_C( 27778), -INT16_C( 8520), INT16_C( 24467), INT16_C( 13995), INT16_C( 8987), INT16_C( 29413) }, { -INT16_C( 25844), -INT16_C( 6954), INT16_C( 32752), INT16_C( 24454), INT16_C( 20966), INT16_C( 6607), INT16_C( 21993), -INT16_C( 27870), -INT16_C( 27181), INT16_C( 22895), INT16_C( 11329), -INT16_C( 3467), INT16_C( 9820), -INT16_C( 12889), -INT16_C( 17245), -INT16_C( 20617), INT16_C( 20055), INT16_C( 18323), INT16_C( 6861), -INT16_C( 19290), INT16_C( 30059), INT16_C( 21709), -INT16_C( 4149), -INT16_C( 24857), INT16_C( 22148), -INT16_C( 14601), INT16_C( 27778), -INT16_C( 8520), INT16_C( 24467), INT16_C( 13995), INT16_C( 8987), INT16_C( 29413) } }, { { INT16_C( 31089), INT16_C( 16058), INT16_C( 24723), -INT16_C( 270), -INT16_C( 16426), -INT16_C( 24238), INT16_C( 14767), INT16_C( 13119), INT16_C( 13967), INT16_C( 4601), -INT16_C( 19806), INT16_C( 13807), -INT16_C( 25839), INT16_C( 11627), INT16_C( 20926), INT16_C( 12191), INT16_C( 22986), INT16_C( 23917), INT16_C( 24762), -INT16_C( 28581), -INT16_C( 21217), -INT16_C( 12751), INT16_C( 28902), INT16_C( 29954), -INT16_C( 1114), INT16_C( 18566), INT16_C( 30125), -INT16_C( 16514), -INT16_C( 5872), -INT16_C( 12564), -INT16_C( 29894), INT16_C( 1277) }, { INT16_C( 31089), INT16_C( 16058), INT16_C( 24723), -INT16_C( 270), -INT16_C( 16426), -INT16_C( 24238), INT16_C( 14767), INT16_C( 13119), INT16_C( 13967), INT16_C( 4601), -INT16_C( 19806), INT16_C( 13807), -INT16_C( 25839), INT16_C( 11627), INT16_C( 20926), INT16_C( 12191), INT16_C( 22986), INT16_C( 23917), INT16_C( 24762), -INT16_C( 28581), -INT16_C( 21217), -INT16_C( 12751), INT16_C( 28902), INT16_C( 29954), -INT16_C( 1114), INT16_C( 18566), INT16_C( 30125), -INT16_C( 16514), -INT16_C( 5872), -INT16_C( 12564), -INT16_C( 29894), INT16_C( 1277) } }, { { INT16_C( 27621), -INT16_C( 24735), -INT16_C( 17205), -INT16_C( 5585), INT16_C( 24681), INT16_C( 20409), -INT16_C( 17456), INT16_C( 30404), INT16_C( 19126), INT16_C( 25790), INT16_C( 15552), -INT16_C( 12253), INT16_C( 3878), INT16_C( 24735), -INT16_C( 25446), INT16_C( 32613), -INT16_C( 14841), -INT16_C( 11746), INT16_C( 19843), -INT16_C( 4931), INT16_C( 30381), INT16_C( 32060), INT16_C( 49), -INT16_C( 6157), -INT16_C( 19893), INT16_C( 2891), INT16_C( 28398), INT16_C( 5339), INT16_C( 31357), INT16_C( 6261), -INT16_C( 9705), INT16_C( 7831) }, { INT16_C( 27621), -INT16_C( 24735), -INT16_C( 17205), -INT16_C( 5585), INT16_C( 24681), INT16_C( 20409), -INT16_C( 17456), INT16_C( 30404), INT16_C( 19126), INT16_C( 25790), INT16_C( 15552), -INT16_C( 12253), INT16_C( 3878), INT16_C( 24735), -INT16_C( 25446), INT16_C( 32613), -INT16_C( 14841), -INT16_C( 11746), INT16_C( 19843), -INT16_C( 4931), INT16_C( 30381), INT16_C( 32060), INT16_C( 49), -INT16_C( 6157), -INT16_C( 19893), INT16_C( 2891), INT16_C( 28398), INT16_C( 5339), INT16_C( 31357), INT16_C( 6261), -INT16_C( 9705), INT16_C( 7831) } }, { { -INT16_C( 18784), INT16_C( 9201), -INT16_C( 20989), -INT16_C( 20208), INT16_C( 19492), INT16_C( 21806), INT16_C( 8780), -INT16_C( 26820), -INT16_C( 30508), -INT16_C( 15710), INT16_C( 32502), INT16_C( 29911), INT16_C( 19704), INT16_C( 3980), INT16_C( 8998), -INT16_C( 14802), INT16_C( 8153), -INT16_C( 8726), -INT16_C( 1331), -INT16_C( 3698), -INT16_C( 17338), -INT16_C( 28090), -INT16_C( 32034), -INT16_C( 19926), -INT16_C( 13302), INT16_C( 373), INT16_C( 19530), INT16_C( 17269), INT16_C( 408), -INT16_C( 16814), -INT16_C( 32732), -INT16_C( 380) }, { -INT16_C( 18784), INT16_C( 9201), -INT16_C( 20989), -INT16_C( 20208), INT16_C( 19492), INT16_C( 21806), INT16_C( 8780), -INT16_C( 26820), -INT16_C( 30508), -INT16_C( 15710), INT16_C( 32502), INT16_C( 29911), INT16_C( 19704), INT16_C( 3980), INT16_C( 8998), -INT16_C( 14802), INT16_C( 8153), -INT16_C( 8726), -INT16_C( 1331), -INT16_C( 3698), -INT16_C( 17338), -INT16_C( 28090), -INT16_C( 32034), -INT16_C( 19926), -INT16_C( 13302), INT16_C( 373), INT16_C( 19530), INT16_C( 17269), INT16_C( 408), -INT16_C( 16814), -INT16_C( 32732), -INT16_C( 380) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_test_x86_assert_equal_i16x32(simde_mm512_load_si512(test_vec[i].a), simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i a = simde_test_x86_random_i16x32(); simde__m512i r = a; simde_test_x86_write_i16x32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_loadu_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { SIMDE_ALIGN_LIKE_64(simde__m512i) const int32_t a[16]; const int32_t r[16]; } test_vec[] = { { { -INT32_C( 1085279312), INT32_C( 1689654203), -INT32_C( 1704027444), INT32_C( 1992004399), INT32_C( 1598136076), -INT32_C( 1107314712), INT32_C( 1634510512), INT32_C( 2144272078), -INT32_C( 1405215247), -INT32_C( 65931984), -INT32_C( 1097433201), INT32_C( 523522579), -INT32_C( 629246223), -INT32_C( 560497363), -INT32_C( 230751453), -INT32_C( 210694911) }, { -INT32_C( 1085279312), INT32_C( 1689654203), -INT32_C( 1704027444), INT32_C( 1992004399), INT32_C( 1598136076), -INT32_C( 1107314712), INT32_C( 1634510512), INT32_C( 2144272078), -INT32_C( 1405215247), -INT32_C( 65931984), -INT32_C( 1097433201), INT32_C( 523522579), -INT32_C( 629246223), -INT32_C( 560497363), -INT32_C( 230751453), -INT32_C( 210694911) } }, { { INT32_C( 1537191723), INT32_C( 878227620), INT32_C( 1139994160), INT32_C( 845293376), -INT32_C( 905125475), -INT32_C( 2102877346), -INT32_C( 1468733529), INT32_C( 547087861), INT32_C( 964377492), INT32_C( 460182507), INT32_C( 39739330), INT32_C( 590659974), INT32_C( 15614114), -INT32_C( 1954375964), INT32_C( 1932785278), INT32_C( 1888735195) }, { INT32_C( 1537191723), INT32_C( 878227620), INT32_C( 1139994160), INT32_C( 845293376), -INT32_C( 905125475), -INT32_C( 2102877346), -INT32_C( 1468733529), INT32_C( 547087861), INT32_C( 964377492), INT32_C( 460182507), INT32_C( 39739330), INT32_C( 590659974), INT32_C( 15614114), -INT32_C( 1954375964), INT32_C( 1932785278), INT32_C( 1888735195) } }, { { -INT32_C( 173470198), -INT32_C( 1542383902), -INT32_C( 56201355), -INT32_C( 769664208), -INT32_C( 2945765), INT32_C( 579491236), INT32_C( 664125004), -INT32_C( 1751701363), INT32_C( 411844662), -INT32_C( 860054186), INT32_C( 1036542733), INT32_C( 1494279998), -INT32_C( 1722162187), -INT32_C( 2068061384), INT32_C( 783044769), -INT32_C( 1362803848) }, { -INT32_C( 173470198), -INT32_C( 1542383902), -INT32_C( 56201355), -INT32_C( 769664208), -INT32_C( 2945765), INT32_C( 579491236), INT32_C( 664125004), -INT32_C( 1751701363), INT32_C( 411844662), -INT32_C( 860054186), INT32_C( 1036542733), INT32_C( 1494279998), -INT32_C( 1722162187), -INT32_C( 2068061384), INT32_C( 783044769), -INT32_C( 1362803848) } }, { { -INT32_C( 624471420), -INT32_C( 56196113), INT32_C( 607809254), INT32_C( 1266567766), INT32_C( 1709496109), INT32_C( 1558880186), INT32_C( 1737135855), INT32_C( 1561678041), -INT32_C( 1858544478), INT32_C( 1183768160), -INT32_C( 1553217459), INT32_C( 1072621842), INT32_C( 2057622208), INT32_C( 1624673905), -INT32_C( 20487900), INT32_C( 1398529201) }, { -INT32_C( 624471420), -INT32_C( 56196113), INT32_C( 607809254), INT32_C( 1266567766), INT32_C( 1709496109), INT32_C( 1558880186), INT32_C( 1737135855), INT32_C( 1561678041), -INT32_C( 1858544478), INT32_C( 1183768160), -INT32_C( 1553217459), INT32_C( 1072621842), INT32_C( 2057622208), INT32_C( 1624673905), -INT32_C( 20487900), INT32_C( 1398529201) } }, { { INT32_C( 434410425), -INT32_C( 1084263822), INT32_C( 1281542714), INT32_C( 1938510003), -INT32_C( 1813106654), -INT32_C( 470563650), -INT32_C( 689849819), INT32_C( 1328102550), INT32_C( 1114115792), -INT32_C( 1157511040), INT32_C( 1174889362), -INT32_C( 709258317), -INT32_C( 2123847741), -INT32_C( 1855693972), -INT32_C( 1419229931), INT32_C( 1392218498) }, { INT32_C( 434410425), -INT32_C( 1084263822), INT32_C( 1281542714), INT32_C( 1938510003), -INT32_C( 1813106654), -INT32_C( 470563650), -INT32_C( 689849819), INT32_C( 1328102550), INT32_C( 1114115792), -INT32_C( 1157511040), INT32_C( 1174889362), -INT32_C( 709258317), -INT32_C( 2123847741), -INT32_C( 1855693972), -INT32_C( 1419229931), INT32_C( 1392218498) } }, { { INT32_C( 546595743), -INT32_C( 1092905685), -INT32_C( 1425743112), INT32_C( 947961205), -INT32_C( 776279963), INT32_C( 1482825283), -INT32_C( 435959196), -INT32_C( 80150948), -INT32_C( 1927558046), INT32_C( 1498150497), INT32_C( 1308905433), INT32_C( 1921483789), -INT32_C( 1354546836), -INT32_C( 1022909089), -INT32_C( 861336976), INT32_C( 1808261385) }, { INT32_C( 546595743), -INT32_C( 1092905685), -INT32_C( 1425743112), INT32_C( 947961205), -INT32_C( 776279963), INT32_C( 1482825283), -INT32_C( 435959196), -INT32_C( 80150948), -INT32_C( 1927558046), INT32_C( 1498150497), INT32_C( 1308905433), INT32_C( 1921483789), -INT32_C( 1354546836), -INT32_C( 1022909089), -INT32_C( 861336976), INT32_C( 1808261385) } }, { { INT32_C( 251192237), -INT32_C( 1301855015), -INT32_C( 1610519661), INT32_C( 1527941359), INT32_C( 671765961), INT32_C( 1810633211), INT32_C( 624399644), INT32_C( 613482103), -INT32_C( 1154250527), INT32_C( 1617795788), -INT32_C( 184521210), -INT32_C( 1085205514), INT32_C( 1676172136), -INT32_C( 1982933907), -INT32_C( 525466263), -INT32_C( 452641276) }, { INT32_C( 251192237), -INT32_C( 1301855015), -INT32_C( 1610519661), INT32_C( 1527941359), INT32_C( 671765961), INT32_C( 1810633211), INT32_C( 624399644), INT32_C( 613482103), -INT32_C( 1154250527), INT32_C( 1617795788), -INT32_C( 184521210), -INT32_C( 1085205514), INT32_C( 1676172136), -INT32_C( 1982933907), -INT32_C( 525466263), -INT32_C( 452641276) } }, { { -INT32_C( 1818216250), -INT32_C( 655159598), INT32_C( 1942942588), INT32_C( 1865555718), -INT32_C( 405661062), INT32_C( 1483776494), -INT32_C( 1439162714), INT32_C( 596655452), INT32_C( 1219899509), -INT32_C( 1155487426), -INT32_C( 1557205348), -INT32_C( 2012061683), INT32_C( 1768940667), INT32_C( 750903429), INT32_C( 1540815614), -INT32_C( 1384225225) }, { -INT32_C( 1818216250), -INT32_C( 655159598), INT32_C( 1942942588), INT32_C( 1865555718), -INT32_C( 405661062), INT32_C( 1483776494), -INT32_C( 1439162714), INT32_C( 596655452), INT32_C( 1219899509), -INT32_C( 1155487426), -INT32_C( 1557205348), -INT32_C( 2012061683), INT32_C( 1768940667), INT32_C( 750903429), INT32_C( 1540815614), -INT32_C( 1384225225) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_test_x86_assert_equal_i32x16(simde_mm512_load_si512(test_vec[i].a), simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i a = simde_test_x86_random_i32x16(); simde__m512i r = a; simde_test_x86_write_i32x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_loadu_epi64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { SIMDE_ALIGN_LIKE_64(simde__m512i) const int64_t a[8]; const int64_t r[8]; } test_vec[] = { { { -INT64_C( 2031689546876671122), -INT64_C( 4181824216786536295), INT64_C( 3378378289711491617), INT64_C( 1413316256384642707), -INT64_C( 4698950795030485050), INT64_C( 7475600748512024817), -INT64_C( 2293462173326056235), INT64_C( 2603685720003003242) }, { -INT64_C( 2031689546876671122), -INT64_C( 4181824216786536295), INT64_C( 3378378289711491617), INT64_C( 1413316256384642707), -INT64_C( 4698950795030485050), INT64_C( 7475600748512024817), -INT64_C( 2293462173326056235), INT64_C( 2603685720003003242) } }, { { INT64_C( 4049249843808980558), INT64_C( 7392641815426491883), INT64_C( 1801878204460544724), -INT64_C( 48330471621752111), INT64_C( 2024547467117354649), INT64_C( 1044804576756910729), INT64_C( 4782031642370761366), INT64_C( 7668159918304822970) }, { INT64_C( 4049249843808980558), INT64_C( 7392641815426491883), INT64_C( 1801878204460544724), -INT64_C( 48330471621752111), INT64_C( 2024547467117354649), INT64_C( 1044804576756910729), INT64_C( 4782031642370761366), INT64_C( 7668159918304822970) } }, { { INT64_C( 680433322035960868), INT64_C( 4032026382637907372), INT64_C( 1024807869850854276), -INT64_C( 7738621839182026145), -INT64_C( 916101787114937152), -INT64_C( 7858554787118552041), -INT64_C( 6533667226337645326), INT64_C( 8173594282907061610) }, { INT64_C( 680433322035960868), INT64_C( 4032026382637907372), INT64_C( 1024807869850854276), -INT64_C( 7738621839182026145), -INT64_C( 916101787114937152), -INT64_C( 7858554787118552041), -INT64_C( 6533667226337645326), INT64_C( 8173594282907061610) } }, { { -INT64_C( 3994697604197623979), INT64_C( 3028796336221808999), -INT64_C( 4986958888383311650), INT64_C( 7327921812528210064), -INT64_C( 4048013273381903271), -INT64_C( 6603326236083268358), INT64_C( 2296716578005830869), -INT64_C( 3555290135981427917) }, { -INT64_C( 3994697604197623979), INT64_C( 3028796336221808999), -INT64_C( 4986958888383311650), INT64_C( 7327921812528210064), -INT64_C( 4048013273381903271), -INT64_C( 6603326236083268358), INT64_C( 2296716578005830869), -INT64_C( 3555290135981427917) } }, { { INT64_C( 3935770298369485431), -INT64_C( 762861917337756674), -INT64_C( 558453203728190831), INT64_C( 2931813335080607596), INT64_C( 3458938454811838351), -INT64_C( 4643389136534410887), INT64_C( 1174628764682791568), -INT64_C( 245550163283572547) }, { INT64_C( 3935770298369485431), -INT64_C( 762861917337756674), -INT64_C( 558453203728190831), INT64_C( 2931813335080607596), INT64_C( 3458938454811838351), -INT64_C( 4643389136534410887), INT64_C( 1174628764682791568), -INT64_C( 245550163283572547) } }, { { -INT64_C( 5869378661672118744), INT64_C( 7934735468561203248), -INT64_C( 2939425477300585343), -INT64_C( 1152397282285115752), -INT64_C( 4659583426481174413), -INT64_C( 675194194085700267), INT64_C( 947814707075179574), -INT64_C( 4886946240843846537) }, { -INT64_C( 5869378661672118744), INT64_C( 7934735468561203248), -INT64_C( 2939425477300585343), -INT64_C( 1152397282285115752), -INT64_C( 4659583426481174413), -INT64_C( 675194194085700267), INT64_C( 947814707075179574), -INT64_C( 4886946240843846537) } }, { { -INT64_C( 6617272956007253540), -INT64_C( 5205464620909246634), -INT64_C( 6530450158184309283), INT64_C( 7034361509239288218), INT64_C( 1332492355739845515), -INT64_C( 6091786111122778819), INT64_C( 4994593874853592189), -INT64_C( 3335539744629574450) }, { -INT64_C( 6617272956007253540), -INT64_C( 5205464620909246634), -INT64_C( 6530450158184309283), INT64_C( 7034361509239288218), INT64_C( 1332492355739845515), -INT64_C( 6091786111122778819), INT64_C( 4994593874853592189), -INT64_C( 3335539744629574450) } }, { { -INT64_C( 5361209383270579765), -INT64_C( 7640663431528024195), INT64_C( 2185812967214347366), INT64_C( 1286946775314366149), INT64_C( 3158766812587919016), -INT64_C( 7397886743846434135), -INT64_C( 1382324539653187999), INT64_C( 1284884244920222333) }, { -INT64_C( 5361209383270579765), -INT64_C( 7640663431528024195), INT64_C( 2185812967214347366), INT64_C( 1286946775314366149), INT64_C( 3158766812587919016), -INT64_C( 7397886743846434135), -INT64_C( 1382324539653187999), INT64_C( 1284884244920222333) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_test_x86_assert_equal_i64x8(simde_mm512_load_si512(test_vec[i].a), simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i a = simde_test_x86_random_i64x8(); simde__m512i r = a; simde_test_x86_write_i64x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_loadu_si512(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i r; } test_vec[8] = { { simde_x_mm512_set_epu32(UINT32_C(2465927924), UINT32_C(3593197775), UINT32_C( 612910812), UINT32_C(3812769805), UINT32_C(4149829677), UINT32_C(3483799324), UINT32_C(1459962882), UINT32_C(4149819515), UINT32_C(2650201844), UINT32_C( 758753621), UINT32_C(1440172455), UINT32_C(1093653043), UINT32_C(4135659774), UINT32_C(2249292246), UINT32_C(3926208727), UINT32_C( 363227362)), simde_x_mm512_set_epu32(UINT32_C(2465927924), UINT32_C(3593197775), UINT32_C( 612910812), UINT32_C(3812769805), UINT32_C(4149829677), UINT32_C(3483799324), UINT32_C(1459962882), UINT32_C(4149819515), UINT32_C(2650201844), UINT32_C( 758753621), UINT32_C(1440172455), UINT32_C(1093653043), UINT32_C(4135659774), UINT32_C(2249292246), UINT32_C(3926208727), UINT32_C( 363227362)) }, { simde_x_mm512_set_epu32(UINT32_C( 468967701), UINT32_C(1464888328), UINT32_C(2623912787), UINT32_C(3584306317), UINT32_C(3441172772), UINT32_C(1957813224), UINT32_C(3956090282), UINT32_C(2819645236), UINT32_C(2119397630), UINT32_C(3325357179), UINT32_C( 910080153), UINT32_C(3698201489), UINT32_C(3945376801), UINT32_C(2699586726), UINT32_C(1169343086), UINT32_C(2983594096)), simde_x_mm512_set_epu32(UINT32_C( 468967701), UINT32_C(1464888328), UINT32_C(2623912787), UINT32_C(3584306317), UINT32_C(3441172772), UINT32_C(1957813224), UINT32_C(3956090282), UINT32_C(2819645236), UINT32_C(2119397630), UINT32_C(3325357179), UINT32_C( 910080153), UINT32_C(3698201489), UINT32_C(3945376801), UINT32_C(2699586726), UINT32_C(1169343086), UINT32_C(2983594096)) }, { simde_x_mm512_set_epu32(UINT32_C(3220925730), UINT32_C(4163700514), UINT32_C( 208162340), UINT32_C( 72282893), UINT32_C(2784701415), UINT32_C(2960668076), UINT32_C(2280551509), UINT32_C( 511971347), UINT32_C(3142311802), UINT32_C(3582165504), UINT32_C(3533175269), UINT32_C(3138584679), UINT32_C(3117232701), UINT32_C(1582887517), UINT32_C(2957127939), UINT32_C(3388466484)), simde_x_mm512_set_epu32(UINT32_C(3220925730), UINT32_C(4163700514), UINT32_C( 208162340), UINT32_C( 72282893), UINT32_C(2784701415), UINT32_C(2960668076), UINT32_C(2280551509), UINT32_C( 511971347), UINT32_C(3142311802), UINT32_C(3582165504), UINT32_C(3533175269), UINT32_C(3138584679), UINT32_C(3117232701), UINT32_C(1582887517), UINT32_C(2957127939), UINT32_C(3388466484)) }, { simde_x_mm512_set_epu32(UINT32_C(2382371522), UINT32_C( 66180421), UINT32_C(3915007092), UINT32_C(3548556152), UINT32_C(3063171483), UINT32_C( 175336822), UINT32_C(2621074902), UINT32_C(2785523281), UINT32_C(3351907467), UINT32_C(3611626580), UINT32_C(3274777282), UINT32_C(2819588991), UINT32_C(4142757399), UINT32_C(3841212820), UINT32_C(1375549108), UINT32_C(3217099434)), simde_x_mm512_set_epu32(UINT32_C(2382371522), UINT32_C( 66180421), UINT32_C(3915007092), UINT32_C(3548556152), UINT32_C(3063171483), UINT32_C( 175336822), UINT32_C(2621074902), UINT32_C(2785523281), UINT32_C(3351907467), UINT32_C(3611626580), UINT32_C(3274777282), UINT32_C(2819588991), UINT32_C(4142757399), UINT32_C(3841212820), UINT32_C(1375549108), UINT32_C(3217099434)) }, { simde_x_mm512_set_epu32(UINT32_C(1625945136), UINT32_C( 82950125), UINT32_C(3598722192), UINT32_C(2456005821), UINT32_C(3054050921), UINT32_C(3350002014), UINT32_C(1546778759), UINT32_C(3175686900), UINT32_C(3418645543), UINT32_C(1247476579), UINT32_C(2559569107), UINT32_C(3884223622), UINT32_C(2206347705), UINT32_C(1195297710), UINT32_C(4206427691), UINT32_C(2187435296)), simde_x_mm512_set_epu32(UINT32_C(1625945136), UINT32_C( 82950125), UINT32_C(3598722192), UINT32_C(2456005821), UINT32_C(3054050921), UINT32_C(3350002014), UINT32_C(1546778759), UINT32_C(3175686900), UINT32_C(3418645543), UINT32_C(1247476579), UINT32_C(2559569107), UINT32_C(3884223622), UINT32_C(2206347705), UINT32_C(1195297710), UINT32_C(4206427691), UINT32_C(2187435296)) }, { simde_x_mm512_set_epu32(UINT32_C(3055114510), UINT32_C( 314498376), UINT32_C( 259740532), UINT32_C(2845634146), UINT32_C(3528445754), UINT32_C(1438308061), UINT32_C(1618483487), UINT32_C(4280155704), UINT32_C(4191548278), UINT32_C( 955760205), UINT32_C(3071952989), UINT32_C(3353486020), UINT32_C(3091053226), UINT32_C(2241572393), UINT32_C(3491849165), UINT32_C(2750648051)), simde_x_mm512_set_epu32(UINT32_C(3055114510), UINT32_C( 314498376), UINT32_C( 259740532), UINT32_C(2845634146), UINT32_C(3528445754), UINT32_C(1438308061), UINT32_C(1618483487), UINT32_C(4280155704), UINT32_C(4191548278), UINT32_C( 955760205), UINT32_C(3071952989), UINT32_C(3353486020), UINT32_C(3091053226), UINT32_C(2241572393), UINT32_C(3491849165), UINT32_C(2750648051)) }, { simde_x_mm512_set_epu32(UINT32_C(2791699552), UINT32_C(1697626027), UINT32_C(3068022880), UINT32_C( 492436222), UINT32_C(2413088982), UINT32_C(1530446668), UINT32_C(1370127960), UINT32_C(2402932897), UINT32_C(4061542194), UINT32_C( 154485056), UINT32_C(3577835063), UINT32_C(3500138573), UINT32_C( 48074834), UINT32_C(1773313389), UINT32_C(3571862316), UINT32_C(1059958902)), simde_x_mm512_set_epu32(UINT32_C(2791699552), UINT32_C(1697626027), UINT32_C(3068022880), UINT32_C( 492436222), UINT32_C(2413088982), UINT32_C(1530446668), UINT32_C(1370127960), UINT32_C(2402932897), UINT32_C(4061542194), UINT32_C( 154485056), UINT32_C(3577835063), UINT32_C(3500138573), UINT32_C( 48074834), UINT32_C(1773313389), UINT32_C(3571862316), UINT32_C(1059958902)) }, { simde_x_mm512_set_epu32(UINT32_C(1166001194), UINT32_C( 115042765), UINT32_C( 557502548), UINT32_C(2408114255), UINT32_C( 870354895), UINT32_C( 955362708), UINT32_C(1149136654), UINT32_C(1920883489), UINT32_C(3238897491), UINT32_C(1952390233), UINT32_C( 223001918), UINT32_C( 310736118), UINT32_C(2747509005), UINT32_C( 134376306), UINT32_C(1234549716), UINT32_C( 594304164)), simde_x_mm512_set_epu32(UINT32_C(1166001194), UINT32_C( 115042765), UINT32_C( 557502548), UINT32_C(2408114255), UINT32_C( 870354895), UINT32_C( 955362708), UINT32_C(1149136654), UINT32_C(1920883489), UINT32_C(3238897491), UINT32_C(1952390233), UINT32_C( 223001918), UINT32_C( 310736118), UINT32_C(2747509005), UINT32_C( 134376306), UINT32_C(1234549716), UINT32_C( 594304164)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_loadu_si512(&(test_vec[i].a)); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadu_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadu_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadu_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadu_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_loadu_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_loadu_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_loadu_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_loadu_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_loadu_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_loadu_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_loadu_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_loadu_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_loadu_si512) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/lzcnt.c000066400000000000000000000223111400333146700165510ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #define SIMDE_TEST_X86_AVX512_INSN lzcnt #include #include static int test_simde_mm_lzcnt_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32( INT32_MAX, INT32_MIN, ~INT32_C(0), INT32_C( 0)), simde_mm_set_epi32(INT32_C( 1), INT32_C( 0), INT32_C( 0), INT32_C( 32)) }, { simde_mm_set_epi32(INT32_C( 179), INT32_C( -17551), INT32_C( -2202065), INT32_C( -743837)), simde_mm_set_epi32(INT32_C( 24), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( -2559), INT32_C( 388806146), INT32_C( 1927808), INT32_C( -112)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 3), INT32_C( 11), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 22), INT32_C( -189678), INT32_C( 252), INT32_C( 27703)), simde_mm_set_epi32(INT32_C( 27), INT32_C( 0), INT32_C( 24), INT32_C( 17)) }, { simde_mm_set_epi32(INT32_C( -9106380), INT32_C( 8952567), INT32_C( -4), INT32_C( 685169)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 8), INT32_C( 0), INT32_C( 12)) }, { simde_mm_set_epi32(INT32_C( 267298), INT32_C( -3422), INT32_C( 4), INT32_C( 31229)), simde_mm_set_epi32(INT32_C( 13), INT32_C( 0), INT32_C( 29), INT32_C( 17)) }, { simde_mm_set_epi32(INT32_C( 167383), INT32_C( 214), INT32_C( 0), INT32_C( -20257)), simde_mm_set_epi32(INT32_C( 14), INT32_C( 24), INT32_C( 32), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( -147), INT32_C( -1774263), INT32_C( 143922), INT32_C( -914728)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 14), INT32_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_lzcnt_epi32(test_vec[i].a); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_mask_lzcnt_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t src[4]; const simde__mmask8 k; const int32_t a[4]; const int32_t r[4]; } test_vec[] = { { { -INT32_C( 1967906480), INT32_C( 444241631), INT32_C( 1751328815), INT32_C( 1797987849) }, UINT8_C( 10), { INT32_C( 5428843), INT32_C( 517032875), INT32_C( 698580121), INT32_C( 725459874) }, { -INT32_C( 1967906480), INT32_C( 3), INT32_C( 1751328815), INT32_C( 2) } }, { { -INT32_C( 69694592), INT32_C( 153290917), -INT32_C( 1675154320), -INT32_C( 246713960) }, UINT8_C( 13), { INT32_C( 18677), INT32_C( 21570), INT32_C( 30363), INT32_C( 9014) }, { INT32_C( 17), INT32_C( 153290917), INT32_C( 17), INT32_C( 18) } }, { { -INT32_C( 1957041304), INT32_C( 481872372), -INT32_C( 1332916123), -INT32_C( 503559615) }, UINT8_C( 6), { INT32_C( 0), INT32_C( 6), INT32_C( 6), INT32_C( 0) }, { -INT32_C( 1957041304), INT32_C( 29), INT32_C( 29), -INT32_C( 503559615) } }, { { -INT32_C( 2084109621), -INT32_C( 1412223970), -INT32_C( 638184227), -INT32_C( 244896523) }, UINT8_C( 2), { INT32_C( 315764), INT32_C( 402356), INT32_C( 357196), INT32_C( 345035) }, { -INT32_C( 2084109621), INT32_C( 13), -INT32_C( 638184227), -INT32_C( 244896523) } }, { { -INT32_C( 816134404), INT32_C( 1743009649), INT32_C( 2022955280), INT32_C( 1643201995) }, UINT8_C( 4), { INT32_C( 4198), INT32_C( 523483), INT32_C( 334642), INT32_C( 359676) }, { -INT32_C( 816134404), INT32_C( 1743009649), INT32_C( 13), INT32_C( 1643201995) } }, { { INT32_C( 1394876527), INT32_C( 455882120), -INT32_C( 1637746771), INT32_C( 707450200) }, UINT8_C( 1), { INT32_C( 820684), INT32_C( 155800), INT32_C( 822191), INT32_C( 791418) }, { INT32_C( 12), INT32_C( 455882120), -INT32_C( 1637746771), INT32_C( 707450200) } }, { { -INT32_C( 1241429622), -INT32_C( 961630619), INT32_C( 2006648396), -INT32_C( 224101327) }, UINT8_C( 8), { INT32_C( 15383396), INT32_C( 19320589), INT32_C( 17462288), INT32_C( 2498061) }, { -INT32_C( 1241429622), -INT32_C( 961630619), INT32_C( 2006648396), INT32_C( 10) } }, { { INT32_C( 1941272773), -INT32_C( 646315458), INT32_C( 492153721), INT32_C( 1334390173) }, UINT8_C( 10), { INT32_C( 50), INT32_C( 123), INT32_C( 121), INT32_C( 96) }, { INT32_C( 1941272773), INT32_C( 25), INT32_C( 492153721), INT32_C( 25) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i src = simde_x_mm_loadu_epi32(test_vec[i].src); simde__m128i a = simde_x_mm_loadu_epi32(test_vec[i].a); simde__m128i r = simde_mm_mask_lzcnt_epi32(src, test_vec[i].k, a); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm_maskz_lzcnt_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const int32_t a[16]; const int32_t r[16]; } test_vec[] = { { UINT8_C( 11), { INT32_C( 6), INT32_C( 2), INT32_C( 1), INT32_C( 7) }, { INT32_C( 29), INT32_C( 30), INT32_C( 0), INT32_C( 29) } }, { UINT8_C( 1), { INT32_C( 57768613), INT32_C( 44212542), INT32_C( 220122657), INT32_C( 188272304) }, { INT32_C( 6), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { UINT8_C( 1), { INT32_C( 15428), INT32_C( 3147), INT32_C( 201), INT32_C( 13035) }, { INT32_C( 18), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { UINT8_C( 14), { INT32_C( 7895), INT32_C( 6520), INT32_C( 2703), INT32_C( 1256) }, { INT32_C( 0), INT32_C( 19), INT32_C( 20), INT32_C( 21) } }, { UINT8_C( 18), { INT32_C( 3584232), INT32_C( 3831674), INT32_C( 372002), INT32_C( 3456164) }, { INT32_C( 0), INT32_C( 10), INT32_C( 0), INT32_C( 0) } }, { UINT8_C( 16), { INT32_C( 1915), INT32_C( 47708), INT32_C( 61410), INT32_C( 63376) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { UINT8_C( 4), { INT32_C( 576116464), INT32_C( 682438940), INT32_C( 1066509946), INT32_C( 1013501310) }, { INT32_C( 0), INT32_C( 0), INT32_C( 2), INT32_C( 0) } }, { UINT8_C( 22), { INT32_C( 121955), INT32_C( 108474), INT32_C( 112020), INT32_C( 114447) }, { INT32_C( 0), INT32_C( 15), INT32_C( 15), INT32_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi32(test_vec[i].a); simde__m128i r = simde_mm_maskz_lzcnt_epi32(test_vec[i].k, a); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm_lzcnt_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_lzcnt_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_lzcnt_epi32) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/madd.c000066400000000000000000002213241400333146700163310ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Ashleigh Newman-Jones */ #define SIMDE_TEST_X86_AVX512_INSN madd #include #include static int test_simde_mm_mask_madd_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t src[8]; const simde__mmask8 k; const int16_t a[8]; const int16_t b[8]; const int32_t r[4]; } test_vec[] = { { { INT16_C( 21813), INT16_C( 32222), INT16_C( 7623), INT16_C( 12885), -INT16_C( 31767), -INT16_C( 17157), -INT16_C( 26419), INT16_C( 27124) }, UINT8_C( 87), { INT16_C( 4964), INT16_C( 13070), INT16_C( 11795), -INT16_C( 30925), -INT16_C( 4013), -INT16_C( 24845), INT16_C( 17494), -INT16_C( 21549) }, { INT16_C( 20514), INT16_C( 16243), -INT16_C( 23131), INT16_C( 10280), -INT16_C( 6752), INT16_C( 14582), INT16_C( 24537), INT16_C( 15760) }, { INT32_C( 314127506), -INT32_C( 590739145), -INT32_C( 335194014), INT32_C( 1777637581) } }, { { -INT16_C( 24973), -INT16_C( 31120), -INT16_C( 23603), INT16_C( 8205), INT16_C( 404), -INT16_C( 5442), -INT16_C( 28347), INT16_C( 26517) }, UINT8_C(226), { -INT16_C( 23032), -INT16_C( 21113), -INT16_C( 20274), -INT16_C( 19635), -INT16_C( 31066), INT16_C( 1420), -INT16_C( 13802), -INT16_C( 19336) }, { -INT16_C( 198), -INT16_C( 8575), -INT16_C( 24052), INT16_C( 3442), INT16_C( 23648), -INT16_C( 3502), -INT16_C( 17935), -INT16_C( 1324) }, { -INT32_C( 2039439757), INT32_C( 420046578), -INT32_C( 356646508), INT32_C( 1737855301) } }, { { INT16_C( 23391), INT16_C( 11943), -INT16_C( 2805), -INT16_C( 19999), INT16_C( 28283), -INT16_C( 28233), INT16_C( 12088), INT16_C( 29253) }, UINT8_C( 46), { INT16_C( 20679), INT16_C( 26939), INT16_C( 18626), INT16_C( 7881), -INT16_C( 17509), INT16_C( 21520), INT16_C( 2703), -INT16_C( 5196) }, { -INT16_C( 7503), -INT16_C( 22794), -INT16_C( 22333), INT16_C( 12577), -INT16_C( 19873), -INT16_C( 29079), -INT16_C( 8968), -INT16_C( 16451) }, { INT32_C( 782719839), -INT32_C( 316855121), -INT32_C( 277823723), INT32_C( 61238892) } }, { { -INT16_C( 2004), -INT16_C( 4312), -INT16_C( 3776), -INT16_C( 9459), INT16_C( 7597), INT16_C( 15408), -INT16_C( 7129), -INT16_C( 9945) }, UINT8_C(198), { INT16_C( 32542), -INT16_C( 14711), -INT16_C( 17503), INT16_C( 21285), -INT16_C( 19676), INT16_C( 75), INT16_C( 2672), INT16_C( 26669) }, { INT16_C( 7218), INT16_C( 9385), -INT16_C( 31703), INT16_C( 18385), INT16_C( 3508), -INT16_C( 26514), INT16_C( 18229), INT16_C( 21342) }, { -INT32_C( 282527700), INT32_C( 946222334), -INT32_C( 71011958), -INT32_C( 651697113) } }, { { -INT16_C( 5945), INT16_C( 26649), INT16_C( 16035), -INT16_C( 14405), INT16_C( 2033), INT16_C( 25288), -INT16_C( 2799), INT16_C( 17610) }, UINT8_C( 17), { INT16_C( 26739), -INT16_C( 1990), -INT16_C( 32455), INT16_C( 18092), INT16_C( 17904), INT16_C( 14203), -INT16_C( 12637), -INT16_C( 29698) }, { INT16_C( 26343), INT16_C( 9518), -INT16_C( 2526), INT16_C( 10519), INT16_C( 31166), -INT16_C( 19654), INT16_C( 32323), -INT16_C( 18492) }, { INT32_C( 685444657), -INT32_C( 944030045), INT32_C( 1657276401), INT32_C( 1154151697) } }, { { -INT16_C( 282), INT16_C( 8111), INT16_C( 23424), INT16_C( 28774), -INT16_C( 7776), INT16_C( 17575), -INT16_C( 22864), -INT16_C( 26673) }, UINT8_C( 12), { -INT16_C( 16898), -INT16_C( 3026), INT16_C( 22484), INT16_C( 19890), INT16_C( 26002), INT16_C( 4240), INT16_C( 18217), INT16_C( 10231) }, { INT16_C( 5878), INT16_C( 21159), INT16_C( 6012), INT16_C( 24306), INT16_C( 14015), INT16_C( 25870), -INT16_C( 23290), INT16_C( 1137) }, { INT32_C( 531627750), INT32_C( 1885756288), INT32_C( 474106830), -INT32_C( 412641283) } }, { { -INT16_C( 24478), INT16_C( 14072), -INT16_C( 21769), -INT16_C( 30333), INT16_C( 5135), INT16_C( 14490), -INT16_C( 28325), INT16_C( 21087) }, UINT8_C(167), { -INT16_C( 23545), INT16_C( 7716), -INT16_C( 32106), -INT16_C( 12835), INT16_C( 17040), INT16_C( 13779), -INT16_C( 10316), INT16_C( 21656) }, { -INT16_C( 12593), INT16_C( 31051), -INT16_C( 10926), INT16_C( 26248), -INT16_C( 16273), INT16_C( 193), INT16_C( 4895), INT16_C( 9895) }, { INT32_C( 536091701), INT32_C( 13897076), -INT32_C( 274632573), INT32_C( 1381994843) } }, { { -INT16_C( 13385), INT16_C( 20037), INT16_C( 8781), -INT16_C( 8933), -INT16_C( 4507), INT16_C( 6419), -INT16_C( 21563), -INT16_C( 27539) }, UINT8_C(121), { INT16_C( 3512), -INT16_C( 29237), INT16_C( 12693), INT16_C( 22012), -INT16_C( 781), INT16_C( 1652), -INT16_C( 25692), INT16_C( 28606) }, { INT16_C( 3296), INT16_C( 701), -INT16_C( 26073), INT16_C( 5479), -INT16_C( 32595), INT16_C( 22746), INT16_C( 28397), -INT16_C( 22830) }, { -INT32_C( 8919585), -INT32_C( 585424307), INT32_C( 420736613), -INT32_C( 1382650704) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i src = simde_mm_loadu_epi16(test_vec[i].src); simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_mask_madd_epi16(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm_maskz_madd_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const int16_t a[8]; const int16_t b[8]; const int32_t r[4]; } test_vec[] = { { UINT8_C( 77), { INT16_C( 19733), INT16_C( 18694), -INT16_C( 2158), -INT16_C( 12949), -INT16_C( 22664), -INT16_C( 3805), INT16_C( 30779), INT16_C( 1600) }, { INT16_C( 11918), INT16_C( 7268), INT16_C( 32199), INT16_C( 14806), -INT16_C( 14152), -INT16_C( 23986), INT16_C( 16941), INT16_C( 17135) }, { INT32_C( 371045886), INT32_C( 0), INT32_C( 412007658), INT32_C( 548843039) } }, { UINT8_C(143), { -INT16_C( 29707), -INT16_C( 5087), -INT16_C( 4361), -INT16_C( 24987), INT16_C( 22033), -INT16_C( 30247), -INT16_C( 8041), -INT16_C( 15080) }, { INT16_C( 13380), -INT16_C( 15988), -INT16_C( 15094), -INT16_C( 11398), INT16_C( 7187), INT16_C( 21760), INT16_C( 17163), INT16_C( 228) }, { -INT32_C( 316148704), INT32_C( 350626760), -INT32_C( 499823549), -INT32_C( 141445923) } }, { UINT8_C(206), { -INT16_C( 5115), -INT16_C( 3131), INT16_C( 25681), -INT16_C( 22524), -INT16_C( 29123), INT16_C( 7487), INT16_C( 1190), -INT16_C( 9631) }, { INT16_C( 9105), INT16_C( 22244), -INT16_C( 18531), -INT16_C( 18070), -INT16_C( 16456), -INT16_C( 1084), -INT16_C( 15196), -INT16_C( 22071) }, { INT32_C( 0), -INT32_C( 68885931), INT32_C( 471132180), INT32_C( 194482561) } }, { UINT8_C(176), { -INT16_C( 25201), -INT16_C( 3326), -INT16_C( 21855), INT16_C( 12080), INT16_C( 20201), -INT16_C( 4651), -INT16_C( 20561), -INT16_C( 11650) }, { -INT16_C( 10860), INT16_C( 19311), INT16_C( 10303), -INT16_C( 509), -INT16_C( 276), -INT16_C( 20318), INT16_C( 19656), INT16_C( 22369) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { UINT8_C(233), { INT16_C( 19043), INT16_C( 3466), -INT16_C( 17798), -INT16_C( 14090), -INT16_C( 7281), INT16_C( 16248), INT16_C( 19042), INT16_C( 14291) }, { INT16_C( 7866), -INT16_C( 7562), INT16_C( 29730), INT16_C( 8399), INT16_C( 32535), INT16_C( 25576), INT16_C( 16352), INT16_C( 17228) }, { INT32_C( 123582346), INT32_C( 0), INT32_C( 0), INT32_C( 557580132) } }, { UINT8_C(137), { INT16_C( 20694), -INT16_C( 28668), -INT16_C( 13242), INT16_C( 10784), INT16_C( 24388), -INT16_C( 28788), -INT16_C( 15566), INT16_C( 20553) }, { INT16_C( 11065), -INT16_C( 21134), -INT16_C( 27654), INT16_C( 31428), INT16_C( 10107), -INT16_C( 17574), -INT16_C( 24973), INT16_C( 19012) }, { INT32_C( 834848622), INT32_C( 0), INT32_C( 0), INT32_C( 779483354) } }, { UINT8_C(238), { -INT16_C( 9656), INT16_C( 5429), INT16_C( 24570), INT16_C( 22873), -INT16_C( 5909), -INT16_C( 20853), -INT16_C( 9167), INT16_C( 24039) }, { -INT16_C( 27570), -INT16_C( 7849), -INT16_C( 11943), -INT16_C( 32675), INT16_C( 6188), -INT16_C( 13580), INT16_C( 15964), -INT16_C( 23112) }, { INT32_C( 0), -INT32_C( 1040814785), INT32_C( 246618848), -INT32_C( 701931356) } }, { UINT8_C( 24), { -INT16_C( 17683), INT16_C( 19475), INT16_C( 27667), -INT16_C( 969), -INT16_C( 6664), -INT16_C( 11219), -INT16_C( 30004), INT16_C( 24866) }, { INT16_C( 1250), -INT16_C( 19526), INT16_C( 14945), INT16_C( 31199), -INT16_C( 22226), INT16_C( 27861), INT16_C( 31330), INT16_C( 20357) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 433828158) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_maskz_madd_epi16(test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm256_mask_madd_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t src[16]; const simde__mmask8 k; const int16_t a[16]; const int16_t b[16]; const int32_t r[8]; } test_vec[] = { { { INT16_C( 16074), -INT16_C( 23692), -INT16_C( 14496), INT16_C( 27418), -INT16_C( 12637), INT16_C( 14730), INT16_C( 24645), INT16_C( 3541), -INT16_C( 19839), INT16_C( 7216), INT16_C( 21625), -INT16_C( 4144), INT16_C( 21389), INT16_C( 15792), INT16_C( 9810), INT16_C( 7279) }, UINT8_C(100), { -INT16_C( 16412), -INT16_C( 21564), INT16_C( 12250), -INT16_C( 22449), -INT16_C( 30535), INT16_C( 6637), -INT16_C( 1187), INT16_C( 4251), -INT16_C( 18645), -INT16_C( 32631), INT16_C( 30855), -INT16_C( 9715), INT16_C( 18984), INT16_C( 20012), INT16_C( 18874), -INT16_C( 24910) }, { INT16_C( 30472), -INT16_C( 7607), -INT16_C( 26458), INT16_C( 24714), INT16_C( 30752), INT16_C( 32377), INT16_C( 5235), -INT16_C( 24946), INT16_C( 6091), INT16_C( 21022), INT16_C( 11151), -INT16_C( 18643), INT16_C( 22902), INT16_C( 12294), -INT16_C( 18270), -INT16_C( 21554) }, { -INT32_C( 1552662838), INT32_C( 1796917088), -INT32_C( 724126171), INT32_C( 232087621), INT32_C( 472953473), INT32_C( 525180850), INT32_C( 680799096), INT32_C( 477046354) } }, { { INT16_C( 5935), -INT16_C( 10611), INT16_C( 6320), -INT16_C( 12234), -INT16_C( 20592), INT16_C( 846), -INT16_C( 9020), -INT16_C( 28767), -INT16_C( 16141), -INT16_C( 32030), INT16_C( 4075), INT16_C( 24890), INT16_C( 16488), INT16_C( 2961), INT16_C( 24568), INT16_C( 10422) }, UINT8_C(119), { -INT16_C( 445), INT16_C( 23335), -INT16_C( 2252), -INT16_C( 7189), -INT16_C( 4538), INT16_C( 8871), INT16_C( 14224), INT16_C( 20502), -INT16_C( 26599), INT16_C( 10299), -INT16_C( 25134), INT16_C( 4752), -INT16_C( 25810), -INT16_C( 29173), INT16_C( 13137), -INT16_C( 27387) }, { INT16_C( 11313), INT16_C( 26096), -INT16_C( 9181), INT16_C( 26952), -INT16_C( 3894), INT16_C( 23180), -INT16_C( 24025), INT16_C( 16554), -INT16_C( 6598), INT16_C( 3432), -INT16_C( 1917), -INT16_C( 20193), INT16_C( 10900), -INT16_C( 6849), INT16_C( 17501), -INT16_C( 29062) }, { INT32_C( 603915875), -INT32_C( 173082316), INT32_C( 223300752), -INT32_C( 1885217596), INT32_C( 210846370), -INT32_C( 47775258), -INT32_C( 81523123), INT32_C( 683040760) } }, { { INT16_C( 27504), -INT16_C( 27405), INT16_C( 15431), INT16_C( 4605), -INT16_C( 30420), INT16_C( 21356), INT16_C( 5675), INT16_C( 26259), -INT16_C( 1028), INT16_C( 32627), -INT16_C( 27917), -INT16_C( 30927), INT16_C( 28861), INT16_C( 6765), -INT16_C( 6219), INT16_C( 9641) }, UINT8_C( 82), { -INT16_C( 18020), -INT16_C( 10087), -INT16_C( 21577), INT16_C( 16388), INT16_C( 22295), INT16_C( 11628), -INT16_C( 11542), -INT16_C( 6870), -INT16_C( 22203), -INT16_C( 10279), INT16_C( 24794), INT16_C( 19348), -INT16_C( 20531), -INT16_C( 19200), INT16_C( 9560), -INT16_C( 3065) }, { -INT16_C( 24097), -INT16_C( 26931), -INT16_C( 11956), INT16_C( 25558), INT16_C( 16937), INT16_C( 5008), -INT16_C( 17900), INT16_C( 23033), -INT16_C( 11676), INT16_C( 15921), -INT16_C( 15054), INT16_C( 137), -INT16_C( 30348), -INT16_C( 13131), -INT16_C( 17233), -INT16_C( 28991) }, { -INT32_C( 1795986576), INT32_C( 676819116), INT32_C( 1399621932), INT32_C( 1720915499), INT32_C( 95590269), -INT32_C( 2026794253), INT32_C( 875189988), INT32_C( 631891893) } }, { { -INT16_C( 29091), -INT16_C( 22236), -INT16_C( 1441), -INT16_C( 30708), -INT16_C( 25283), INT16_C( 20892), -INT16_C( 27305), -INT16_C( 17493), -INT16_C( 9113), -INT16_C( 26118), -INT16_C( 31839), INT16_C( 5785), INT16_C( 19981), -INT16_C( 17182), -INT16_C( 23797), INT16_C( 26698) }, UINT8_C( 49), { INT16_C( 4718), INT16_C( 26769), INT16_C( 6430), -INT16_C( 17499), -INT16_C( 2123), INT16_C( 18963), -INT16_C( 12638), INT16_C( 32433), INT16_C( 19400), INT16_C( 19487), INT16_C( 13796), INT16_C( 13145), INT16_C( 5400), -INT16_C( 17602), -INT16_C( 22945), -INT16_C( 12819) }, { INT16_C( 32440), -INT16_C( 10443), -INT16_C( 9321), INT16_C( 19858), -INT16_C( 23086), INT16_C( 29847), INT16_C( 18804), INT16_C( 15602), INT16_C( 4500), INT16_C( 30856), -INT16_C( 7865), INT16_C( 24491), -INT16_C( 5642), INT16_C( 21786), INT16_C( 1936), INT16_C( 18466) }, { -INT32_C( 126496747), -INT32_C( 2012415393), INT32_C( 1369218365), -INT32_C( 1146383017), INT32_C( 688590872), INT32_C( 213428655), -INT32_C( 1126019571), INT32_C( 1749721867) } }, { { INT16_C( 22661), INT16_C( 7455), -INT16_C( 19917), INT16_C( 1386), INT16_C( 343), -INT16_C( 13447), INT16_C( 27466), -INT16_C( 8696), -INT16_C( 28548), -INT16_C( 15529), INT16_C( 626), INT16_C( 26658), INT16_C( 15852), INT16_C( 31934), -INT16_C( 8124), -INT16_C( 13628) }, UINT8_C( 56), { -INT16_C( 6172), -INT16_C( 27029), INT16_C( 28753), INT16_C( 21229), -INT16_C( 17943), INT16_C( 21661), INT16_C( 31681), INT16_C( 20945), -INT16_C( 27438), -INT16_C( 10813), INT16_C( 11447), -INT16_C( 2879), INT16_C( 15850), -INT16_C( 13768), INT16_C( 513), -INT16_C( 6909) }, { INT16_C( 28393), INT16_C( 14971), INT16_C( 27103), -INT16_C( 14195), INT16_C( 10786), -INT16_C( 7395), -INT16_C( 4443), INT16_C( 30772), -INT16_C( 1918), INT16_C( 14669), INT16_C( 3620), INT16_C( 3629), INT16_C( 26187), INT16_C( 19672), -INT16_C( 9368), INT16_C( 21042) }, { INT32_C( 488593541), INT32_C( 90878515), -INT32_C( 881262249), INT32_C( 503760857), -INT32_C( 105989813), INT32_C( 30990249), INT32_C( 2092842476), -INT32_C( 893067196) } }, { { -INT16_C( 21174), INT16_C( 10636), INT16_C( 6422), INT16_C( 14577), INT16_C( 3651), -INT16_C( 5861), INT16_C( 20732), INT16_C( 32609), -INT16_C( 20920), INT16_C( 27832), -INT16_C( 6468), INT16_C( 1914), INT16_C( 21068), -INT16_C( 19373), -INT16_C( 31442), INT16_C( 30726) }, UINT8_C( 51), { -INT16_C( 24173), -INT16_C( 21431), -INT16_C( 32110), -INT16_C( 24080), -INT16_C( 9827), -INT16_C( 4707), INT16_C( 7226), -INT16_C( 6091), -INT16_C( 24107), -INT16_C( 17500), -INT16_C( 21733), INT16_C( 28167), -INT16_C( 17410), -INT16_C( 31588), INT16_C( 5314), INT16_C( 21943) }, { INT16_C( 181), INT16_C( 18177), -INT16_C( 3710), INT16_C( 8424), -INT16_C( 31030), INT16_C( 1037), INT16_C( 17314), INT16_C( 30700), -INT16_C( 28444), INT16_C( 50), INT16_C( 14651), INT16_C( 14958), INT16_C( 2805), -INT16_C( 18498), INT16_C( 29982), -INT16_C( 11508) }, { -INT32_C( 393926600), -INT32_C( 83721820), -INT32_C( 384102845), INT32_C( 2137084156), INT32_C( 684824508), INT32_C( 102911803), -INT32_C( 1269607860), INT32_C( 2013693230) } }, { { INT16_C( 3445), -INT16_C( 2022), INT16_C( 1023), -INT16_C( 14056), INT16_C( 9609), INT16_C( 11214), -INT16_C( 17816), INT16_C( 19875), -INT16_C( 10933), -INT16_C( 31155), -INT16_C( 17649), INT16_C( 1216), INT16_C( 32453), -INT16_C( 7237), -INT16_C( 14349), INT16_C( 27062) }, UINT8_C(212), { INT16_C( 25040), -INT16_C( 11309), -INT16_C( 25223), -INT16_C( 24996), -INT16_C( 30613), INT16_C( 9479), INT16_C( 21547), INT16_C( 112), -INT16_C( 2143), INT16_C( 23567), INT16_C( 5047), INT16_C( 13857), INT16_C( 1230), -INT16_C( 27351), -INT16_C( 27974), -INT16_C( 30102) }, { INT16_C( 15859), INT16_C( 27742), -INT16_C( 17702), INT16_C( 17675), INT16_C( 4674), INT16_C( 28011), -INT16_C( 9370), INT16_C( 1902), INT16_C( 32210), -INT16_C( 30109), -INT16_C( 31599), INT16_C( 24512), -INT16_C( 5752), INT16_C( 17141), INT16_C( 24444), INT16_C( 28620) }, { -INT32_C( 132510347), -INT32_C( 921172993), INT32_C( 122431107), INT32_C( 1302575720), -INT32_C( 778604833), INT32_C( 79739663), -INT32_C( 475898451), -INT32_C( 1545315696) } }, { { INT16_C( 10908), INT16_C( 30684), -INT16_C( 6171), INT16_C( 10172), INT16_C( 10233), INT16_C( 24469), INT16_C( 771), -INT16_C( 10906), -INT16_C( 13952), INT16_C( 4447), INT16_C( 8013), -INT16_C( 10895), INT16_C( 26121), -INT16_C( 31465), -INT16_C( 7227), INT16_C( 25076) }, UINT8_C( 14), { -INT16_C( 10032), -INT16_C( 18445), INT16_C( 6805), -INT16_C( 17232), INT16_C( 4015), -INT16_C( 19777), -INT16_C( 27275), INT16_C( 15923), INT16_C( 17652), INT16_C( 5259), INT16_C( 24757), INT16_C( 6941), -INT16_C( 23945), INT16_C( 23520), INT16_C( 17046), INT16_C( 26473) }, { INT16_C( 23578), -INT16_C( 20706), -INT16_C( 12426), INT16_C( 9836), INT16_C( 11230), INT16_C( 21720), INT16_C( 3008), -INT16_C( 19054), INT16_C( 7760), INT16_C( 1481), -INT16_C( 6530), -INT16_C( 2527), INT16_C( 392), INT16_C( 7761), -INT16_C( 17853), INT16_C( 24197) }, { INT32_C( 2010917532), -INT32_C( 254052882), -INT32_C( 384467990), -INT32_C( 385440042), INT32_C( 291490176), -INT32_C( 714006707), -INT32_C( 2062064119), INT32_C( 1643439045) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i src = simde_x_mm256_loadu_epi16(test_vec[i].src); simde__m256i a = simde_x_mm256_loadu_epi16(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi16(test_vec[i].b); simde__m256i r = simde_mm256_mask_madd_epi16(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x8(r, simde_x_mm256_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm256_maskz_madd_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const int16_t a[16]; const int16_t b[16]; const int32_t r[8]; } test_vec[] = { { UINT8_C(217), { -INT16_C( 16454), -INT16_C( 5745), -INT16_C( 10179), INT16_C( 22530), INT16_C( 26767), -INT16_C( 17839), -INT16_C( 21048), -INT16_C( 156), INT16_C( 7387), INT16_C( 4894), -INT16_C( 9375), INT16_C( 29558), INT16_C( 5931), -INT16_C( 28270), INT16_C( 6388), -INT16_C( 20885) }, { -INT16_C( 1321), INT16_C( 5271), -INT16_C( 26158), INT16_C( 24940), -INT16_C( 17151), -INT16_C( 14053), -INT16_C( 32661), INT16_C( 18121), -INT16_C( 6244), -INT16_C( 679), -INT16_C( 12350), -INT16_C( 4495), INT16_C( 998), -INT16_C( 9601), -INT16_C( 5605), -INT16_C( 3448) }, { -INT32_C( 8546161), INT32_C( 0), INT32_C( 0), INT32_C( 684621852), -INT32_C( 49447454), INT32_C( 0), INT32_C( 277339408), INT32_C( 36206740) } }, { UINT8_C(228), { INT16_C( 1568), -INT16_C( 17993), INT16_C( 6259), INT16_C( 12475), -INT16_C( 31692), -INT16_C( 19301), -INT16_C( 7859), INT16_C( 13392), INT16_C( 20026), INT16_C( 2807), -INT16_C( 6721), -INT16_C( 15632), -INT16_C( 13468), INT16_C( 20445), -INT16_C( 12461), INT16_C( 29491) }, { -INT16_C( 5419), INT16_C( 18477), -INT16_C( 6141), INT16_C( 14201), INT16_C( 5228), -INT16_C( 17685), INT16_C( 15350), INT16_C( 12526), -INT16_C( 6775), INT16_C( 18490), INT16_C( 11210), INT16_C( 12042), -INT16_C( 6154), INT16_C( 18814), -INT16_C( 20042), -INT16_C( 29507) }, { INT32_C( 0), INT32_C( 0), INT32_C( 175652409), INT32_C( 0), INT32_C( 0), -INT32_C( 263582954), INT32_C( 467534302), -INT32_C( 620447575) } }, { UINT8_C(156), { -INT16_C( 11030), -INT16_C( 11617), -INT16_C( 10675), INT16_C( 25150), -INT16_C( 1855), -INT16_C( 936), -INT16_C( 30489), -INT16_C( 13178), -INT16_C( 12605), -INT16_C( 4457), -INT16_C( 14631), -INT16_C( 16156), INT16_C( 11588), -INT16_C( 2697), INT16_C( 1002), -INT16_C( 11119) }, { INT16_C( 12503), INT16_C( 9638), -INT16_C( 6906), -INT16_C( 14457), -INT16_C( 8227), -INT16_C( 15164), INT16_C( 19047), INT16_C( 10897), INT16_C( 10264), -INT16_C( 3816), -INT16_C( 786), INT16_C( 12978), INT16_C( 10538), INT16_C( 5159), -INT16_C( 18132), INT16_C( 1001) }, { INT32_C( 0), INT32_C( 0), INT32_C( 29454589), -INT32_C( 724324649), -INT32_C( 112369808), INT32_C( 0), INT32_C( 0), -INT32_C( 29298383) } }, { UINT8_C(233), { INT16_C( 10383), INT16_C( 29936), -INT16_C( 18513), -INT16_C( 29102), INT16_C( 5755), -INT16_C( 14858), INT16_C( 8359), -INT16_C( 12322), -INT16_C( 12487), INT16_C( 13757), -INT16_C( 4223), -INT16_C( 21921), INT16_C( 29719), -INT16_C( 12074), -INT16_C( 9635), -INT16_C( 4935) }, { -INT16_C( 22270), -INT16_C( 19871), -INT16_C( 19615), -INT16_C( 9152), INT16_C( 14025), INT16_C( 29090), -INT16_C( 32681), -INT16_C( 28608), -INT16_C( 433), -INT16_C( 11835), INT16_C( 9709), INT16_C( 1147), INT16_C( 21145), -INT16_C( 2348), -INT16_C( 29140), INT16_C( 12002) }, { -INT32_C( 826087666), INT32_C( 0), INT32_C( 0), INT32_C( 79327297), INT32_C( 0), -INT32_C( 66144494), INT32_C( 656758007), INT32_C( 221534030) } }, { UINT8_C( 55), { -INT16_C( 8125), -INT16_C( 2408), INT16_C( 29985), INT16_C( 22464), INT16_C( 12567), -INT16_C( 26706), INT16_C( 15985), INT16_C( 28646), -INT16_C( 18684), INT16_C( 10589), INT16_C( 24883), -INT16_C( 31294), -INT16_C( 18378), -INT16_C( 15183), -INT16_C( 8294), -INT16_C( 8453) }, { -INT16_C( 27456), -INT16_C( 7724), -INT16_C( 27639), INT16_C( 8248), -INT16_C( 6203), INT16_C( 14263), -INT16_C( 25307), INT16_C( 10662), INT16_C( 853), -INT16_C( 30638), INT16_C( 5221), -INT16_C( 25843), -INT16_C( 16692), INT16_C( 26463), INT16_C( 23197), INT16_C( 23877) }, { INT32_C( 241679392), -INT32_C( 643472343), -INT32_C( 458860779), INT32_C( 0), -INT32_C( 340363234), INT32_C( 938644985), INT32_C( 0), INT32_C( 0) } }, { UINT8_C(238), { INT16_C( 15897), -INT16_C( 20745), INT16_C( 6007), INT16_C( 24179), -INT16_C( 21810), INT16_C( 27779), -INT16_C( 21167), INT16_C( 21697), INT16_C( 18943), INT16_C( 5305), INT16_C( 21590), INT16_C( 5344), INT16_C( 18355), INT16_C( 3761), INT16_C( 3980), -INT16_C( 22788) }, { -INT16_C( 2995), -INT16_C( 15276), -INT16_C( 14581), -INT16_C( 9694), -INT16_C( 22926), -INT16_C( 15546), INT16_C( 1875), INT16_C( 21015), -INT16_C( 11952), -INT16_C( 22938), INT16_C( 18213), -INT16_C( 9798), INT16_C( 27534), INT16_C( 7143), -INT16_C( 7302), -INT16_C( 14143) }, { INT32_C( 0), -INT32_C( 321979293), INT32_C( 68163726), INT32_C( 416274330), INT32_C( 0), INT32_C( 340858158), INT32_C( 532251393), INT32_C( 293228724) } }, { UINT8_C(215), { -INT16_C( 29675), -INT16_C( 8989), -INT16_C( 16977), INT16_C( 21838), INT16_C( 4355), INT16_C( 2728), -INT16_C( 1495), -INT16_C( 1446), INT16_C( 97), -INT16_C( 22497), -INT16_C( 1862), INT16_C( 9526), INT16_C( 20959), -INT16_C( 15456), INT16_C( 26642), INT16_C( 10138) }, { INT16_C( 32244), -INT16_C( 23804), INT16_C( 21050), INT16_C( 15864), -INT16_C( 24476), -INT16_C( 29369), -INT16_C( 24165), -INT16_C( 889), -INT16_C( 22879), INT16_C( 23460), -INT16_C( 9569), INT16_C( 32385), INT16_C( 8492), INT16_C( 15937), -INT16_C( 9079), INT16_C( 32102) }, { -INT32_C( 742866544), -INT32_C( 10927818), -INT32_C( 186711612), INT32_C( 0), -INT32_C( 529998883), INT32_C( 0), -INT32_C( 68338444), INT32_C( 83567358) } }, { UINT8_C( 89), { INT16_C( 8554), -INT16_C( 17260), -INT16_C( 12007), -INT16_C( 17888), -INT16_C( 21223), -INT16_C( 17835), INT16_C( 20788), -INT16_C( 9380), -INT16_C( 18443), -INT16_C( 12422), -INT16_C( 1992), INT16_C( 23035), INT16_C( 14906), INT16_C( 5858), INT16_C( 24736), INT16_C( 2671) }, { INT16_C( 897), -INT16_C( 25914), -INT16_C( 6187), -INT16_C( 4524), -INT16_C( 22124), -INT16_C( 13912), INT16_C( 1274), -INT16_C( 4188), INT16_C( 7868), -INT16_C( 2881), -INT16_C( 17898), INT16_C( 20558), INT16_C( 12532), -INT16_C( 27546), -INT16_C( 10608), INT16_C( 4510) }, { INT32_C( 454948578), INT32_C( 0), INT32_C( 0), INT32_C( 65767352), -INT32_C( 109321742), INT32_C( 0), INT32_C( 25437524), INT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_mm256_loadu_epi16(test_vec[i].a); simde__m256i b = simde_mm256_loadu_epi16(test_vec[i].b); simde__m256i r = simde_mm256_maskz_madd_epi16(test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_madd_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[32]; const int16_t b[32]; const int32_t r[16]; } test_vec[] = { { { INT16_C( 28000), INT16_C( 31877), -INT16_C( 10728), INT16_C( 28413), -INT16_C( 7321), -INT16_C( 24759), INT16_C( 27401), INT16_C( 4695), -INT16_C( 5593), -INT16_C( 3334), -INT16_C( 10804), INT16_C( 22250), INT16_C( 24880), -INT16_C( 25050), INT16_C( 4800), INT16_C( 8447), -INT16_C( 31616), -INT16_C( 26467), -INT16_C( 26021), -INT16_C( 15866), INT16_C( 20349), -INT16_C( 31135), -INT16_C( 18245), -INT16_C( 7528), -INT16_C( 27997), INT16_C( 28629), -INT16_C( 16537), -INT16_C( 26427), -INT16_C( 5344), -INT16_C( 7882), INT16_C( 13821), INT16_C( 32001) }, { -INT16_C( 24903), INT16_C( 5142), INT16_C( 7224), -INT16_C( 18986), INT16_C( 14188), INT16_C( 10043), -INT16_C( 11280), -INT16_C( 27895), -INT16_C( 8602), -INT16_C( 13054), -INT16_C( 14435), -INT16_C( 16795), -INT16_C( 25678), -INT16_C( 20577), -INT16_C( 24368), -INT16_C( 30163), INT16_C( 17215), INT16_C( 30622), INT16_C( 30047), -INT16_C( 13523), INT16_C( 26796), -INT16_C( 25358), -INT16_C( 964), -INT16_C( 24017), INT16_C( 12762), INT16_C( 30831), -INT16_C( 10760), -INT16_C( 21962), -INT16_C( 10896), INT16_C( 16730), -INT16_C( 30859), -INT16_C( 19253) }, { -INT32_C( 533372466), -INT32_C( 616948290), -INT32_C( 352524985), -INT32_C( 440050305), INT32_C( 91633022), -INT32_C( 217733010), -INT32_C( 123414790), -INT32_C( 371753261), -INT32_C( 1354741914), -INT32_C( 567297069), INT32_C( 1334793134), INT32_C( 198388156), INT32_C( 525362985), INT32_C( 758327894), -INT32_C( 73637636), -INT32_C( 1042617492) } }, { { INT16_C( 27082), INT16_C( 10540), INT16_C( 23006), -INT16_C( 29707), -INT16_C( 6207), -INT16_C( 729), INT16_C( 22499), -INT16_C( 16737), INT16_C( 3976), -INT16_C( 32458), INT16_C( 27876), INT16_C( 21547), -INT16_C( 31423), -INT16_C( 18795), INT16_C( 24588), -INT16_C( 10645), -INT16_C( 26678), -INT16_C( 22528), -INT16_C( 2576), -INT16_C( 20173), INT16_C( 23516), -INT16_C( 16209), INT16_C( 20146), INT16_C( 14974), -INT16_C( 19363), INT16_C( 16827), -INT16_C( 6368), INT16_C( 24982), INT16_C( 11116), INT16_C( 30999), -INT16_C( 32116), INT16_C( 22095) }, { INT16_C( 20249), INT16_C( 2558), INT16_C( 12868), INT16_C( 8635), INT16_C( 27277), INT16_C( 16353), INT16_C( 24504), INT16_C( 5753), INT16_C( 13587), INT16_C( 13143), -INT16_C( 4836), -INT16_C( 30572), -INT16_C( 21735), -INT16_C( 23295), INT16_C( 20782), INT16_C( 18427), -INT16_C( 1632), -INT16_C( 6831), INT16_C( 3115), -INT16_C( 18426), -INT16_C( 6282), INT16_C( 12023), INT16_C( 28998), INT16_C( 22852), -INT16_C( 25434), -INT16_C( 15732), INT16_C( 8329), -INT16_C( 23990), INT16_C( 19659), -INT16_C( 1721), INT16_C( 17053), INT16_C( 15681) }, { INT32_C( 575344738), INT32_C( 39521263), -INT32_C( 181229676), INT32_C( 455027535), -INT32_C( 372573582), -INT32_C( 793543220), INT32_C( 1120808430), INT32_C( 314832401), INT32_C( 197427264), INT32_C( 363683458), -INT32_C( 342608319), INT32_C( 926379556), INT32_C( 227756178), -INT32_C( 652357252), INT32_C( 165180165), -INT32_C( 201202453) } }, { { -INT16_C( 28100), INT16_C( 26402), INT16_C( 10398), INT16_C( 5152), INT16_C( 5903), INT16_C( 21826), -INT16_C( 30840), INT16_C( 11950), INT16_C( 14883), -INT16_C( 21264), INT16_C( 15194), INT16_C( 9807), -INT16_C( 27001), INT16_C( 9247), INT16_C( 24793), INT16_C( 5473), -INT16_C( 31502), -INT16_C( 28548), -INT16_C( 25428), -INT16_C( 17244), -INT16_C( 6220), INT16_C( 15377), -INT16_C( 16274), -INT16_C( 28309), INT16_C( 23546), INT16_C( 21821), -INT16_C( 29546), INT16_C( 7547), -INT16_C( 26077), -INT16_C( 959), -INT16_C( 23557), -INT16_C( 4847) }, { -INT16_C( 29401), -INT16_C( 11394), INT16_C( 8746), -INT16_C( 8561), -INT16_C( 24311), INT16_C( 30490), -INT16_C( 31391), INT16_C( 23304), INT16_C( 18145), INT16_C( 30640), INT16_C( 11218), -INT16_C( 2667), -INT16_C( 10554), -INT16_C( 15887), INT16_C( 633), -INT16_C( 24402), INT16_C( 11408), -INT16_C( 17804), INT16_C( 847), INT16_C( 22680), -INT16_C( 19804), INT16_C( 1488), -INT16_C( 10184), INT16_C( 6497), INT16_C( 4382), -INT16_C( 3696), INT16_C( 9533), INT16_C( 998), -INT16_C( 9988), INT16_C( 30148), INT16_C( 29402), INT16_C( 27158) }, { INT32_C( 525343712), INT32_C( 46834636), INT32_C( 521966907), INT32_C( 1246581240), -INT32_C( 381476925), INT32_C( 144291023), INT32_C( 138061465), -INT32_C( 117858177), INT32_C( 148893776), -INT32_C( 412631436), INT32_C( 146061856), -INT32_C( 18189157), INT32_C( 22528156), -INT32_C( 274130112), INT32_C( 231545144), -INT32_C( 824257740) } }, { { -INT16_C( 30049), -INT16_C( 4572), -INT16_C( 17267), INT16_C( 12870), INT16_C( 5743), -INT16_C( 22729), -INT16_C( 26385), INT16_C( 3520), INT16_C( 20650), -INT16_C( 6146), -INT16_C( 6794), INT16_C( 29418), -INT16_C( 20803), -INT16_C( 26649), -INT16_C( 736), -INT16_C( 16638), INT16_C( 9863), INT16_C( 5549), -INT16_C( 2845), INT16_C( 21063), INT16_C( 32266), -INT16_C( 1543), -INT16_C( 18153), -INT16_C( 16121), INT16_C( 1289), INT16_C( 32680), -INT16_C( 27926), -INT16_C( 22543), -INT16_C( 9920), INT16_C( 24639), INT16_C( 16854), INT16_C( 24096) }, { -INT16_C( 12953), INT16_C( 19059), -INT16_C( 17727), -INT16_C( 13156), -INT16_C( 27336), INT16_C( 20421), -INT16_C( 13234), INT16_C( 22544), -INT16_C( 18222), -INT16_C( 17193), -INT16_C( 14006), -INT16_C( 30108), -INT16_C( 23646), INT16_C( 30955), INT16_C( 3044), INT16_C( 19414), INT16_C( 18904), -INT16_C( 25962), INT16_C( 12803), INT16_C( 15462), INT16_C( 11208), INT16_C( 5771), -INT16_C( 25352), -INT16_C( 13714), INT16_C( 18004), -INT16_C( 24698), -INT16_C( 5617), -INT16_C( 20183), INT16_C( 5261), INT16_C( 28969), INT16_C( 31), -INT16_C( 1859) }, { INT32_C( 302086949), INT32_C( 136774389), -INT32_C( 621139557), INT32_C( 428533970), -INT32_C( 270616122), -INT32_C( 790560380), -INT32_C( 333012057), -INT32_C( 325250516), INT32_C( 42387014), INT32_C( 289251571), INT32_C( 352732675), INT32_C( 681298250), -INT32_C( 783923484), INT32_C( 611845711), INT32_C( 661578071), -INT32_C( 44271990) } }, { { INT16_C( 21321), INT16_C( 19858), -INT16_C( 1915), INT16_C( 19849), INT16_C( 5155), INT16_C( 7012), -INT16_C( 11600), INT16_C( 1509), INT16_C( 27672), INT16_C( 10148), -INT16_C( 12970), -INT16_C( 6952), INT16_C( 738), INT16_C( 341), INT16_C( 4610), INT16_C( 19449), -INT16_C( 29851), -INT16_C( 5224), INT16_C( 8579), -INT16_C( 22728), -INT16_C( 25546), -INT16_C( 6462), -INT16_C( 22417), -INT16_C( 30741), -INT16_C( 28908), INT16_C( 27311), -INT16_C( 30883), INT16_C( 16206), -INT16_C( 23415), -INT16_C( 29888), INT16_C( 15030), INT16_C( 7383) }, { INT16_C( 28613), INT16_C( 18695), INT16_C( 16273), -INT16_C( 14352), -INT16_C( 19748), INT16_C( 19373), -INT16_C( 26278), INT16_C( 28370), -INT16_C( 32472), -INT16_C( 31271), INT16_C( 9993), -INT16_C( 27964), INT16_C( 1483), -INT16_C( 32226), -INT16_C( 2753), INT16_C( 1182), -INT16_C( 23196), -INT16_C( 2739), INT16_C( 15844), -INT16_C( 16196), INT16_C( 27376), INT16_C( 18955), -INT16_C( 8701), INT16_C( 11193), -INT16_C( 28065), INT16_C( 26801), INT16_C( 30137), -INT16_C( 31237), INT16_C( 6522), -INT16_C( 18169), -INT16_C( 23282), INT16_C( 29374) }, { INT32_C( 981303083), -INT32_C( 316035643), INT32_C( 34042536), INT32_C( 347635130), -INT32_C( 1215903292), INT32_C( 64796518), -INT32_C( 9894612), INT32_C( 10297388), INT32_C( 706732332), INT32_C( 504028364), -INT32_C( 821834506), -INT32_C( 149033696), INT32_C( 1543265131), -INT32_C( 1436947793), INT32_C( 390322442), -INT32_C( 133060218) } }, { { INT16_C( 2890), INT16_C( 11880), INT16_C( 9289), INT16_C( 14831), -INT16_C( 1394), -INT16_C( 28285), INT16_C( 15576), INT16_C( 14525), INT16_C( 28366), -INT16_C( 30560), -INT16_C( 25629), INT16_C( 24077), INT16_C( 5300), -INT16_C( 15849), -INT16_C( 10823), INT16_C( 821), -INT16_C( 25119), INT16_C( 10801), INT16_C( 8385), INT16_C( 20579), -INT16_C( 6629), -INT16_C( 3103), -INT16_C( 25053), -INT16_C( 3797), -INT16_C( 13300), -INT16_C( 3975), -INT16_C( 31129), INT16_C( 7246), INT16_C( 26010), INT16_C( 21470), INT16_C( 4923), INT16_C( 7254) }, { -INT16_C( 30544), INT16_C( 29254), -INT16_C( 22104), -INT16_C( 15422), -INT16_C( 23665), -INT16_C( 19785), -INT16_C( 7614), INT16_C( 20132), INT16_C( 7598), INT16_C( 5694), -INT16_C( 29532), INT16_C( 15922), INT16_C( 4338), INT16_C( 11666), -INT16_C( 6108), -INT16_C( 11191), -INT16_C( 28816), INT16_C( 6470), INT16_C( 2104), -INT16_C( 14372), -INT16_C( 27732), -INT16_C( 4486), INT16_C( 7798), INT16_C( 9276), INT16_C( 31547), -INT16_C( 8390), INT16_C( 27655), -INT16_C( 1762), -INT16_C( 20355), -INT16_C( 24282), INT16_C( 28568), INT16_C( 2421) }, { INT32_C( 259265360), -INT32_C( 434047738), INT32_C( 592607735), INT32_C( 173821636), INT32_C( 41516228), INT32_C( 1140229622), -INT32_C( 161903034), INT32_C( 56919073), INT32_C( 793711574), -INT32_C( 278119348), INT32_C( 197755486), -INT32_C( 230584266), -INT32_C( 386224850), -INT32_C( 873639947), -INT32_C( 1050768090), INT32_C( 158202198) } }, { { -INT16_C( 17154), INT16_C( 13858), -INT16_C( 316), INT16_C( 28926), INT16_C( 30866), INT16_C( 2142), -INT16_C( 25706), -INT16_C( 11988), INT16_C( 26390), INT16_C( 7601), -INT16_C( 12333), INT16_C( 20503), INT16_C( 15743), INT16_C( 6129), INT16_C( 26541), -INT16_C( 21728), INT16_C( 16931), -INT16_C( 6174), -INT16_C( 8127), -INT16_C( 11432), -INT16_C( 18856), -INT16_C( 4389), INT16_C( 1873), INT16_C( 26559), INT16_C( 28782), INT16_C( 17029), -INT16_C( 25537), -INT16_C( 16750), -INT16_C( 31527), -INT16_C( 31018), -INT16_C( 2325), INT16_C( 3634) }, { INT16_C( 5177), INT16_C( 31477), INT16_C( 19956), INT16_C( 19533), INT16_C( 10244), INT16_C( 21818), -INT16_C( 1745), -INT16_C( 24899), INT16_C( 17002), -INT16_C( 22048), INT16_C( 29406), -INT16_C( 18584), INT16_C( 16118), -INT16_C( 7874), INT16_C( 28724), INT16_C( 28143), -INT16_C( 6780), INT16_C( 30951), INT16_C( 13362), INT16_C( 14020), -INT16_C( 420), -INT16_C( 29556), INT16_C( 18935), INT16_C( 24874), INT16_C( 2699), INT16_C( 26891), INT16_C( 29564), INT16_C( 29472), INT16_C( 24241), -INT16_C( 6828), INT16_C( 17614), INT16_C( 21075) }, { INT32_C( 347402008), INT32_C( 558705462), INT32_C( 362925460), INT32_C( 343346182), INT32_C( 281095932), -INT32_C( 743691950), INT32_C( 205485928), INT32_C( 150872580), -INT32_C( 305883654), -INT32_C( 268869614), INT32_C( 137640804), INT32_C( 696093821), INT32_C( 535609457), -INT32_C( 1248631868), -INT32_C( 552455103), INT32_C( 35634000) } }, { { INT16_C( 14889), INT16_C( 23498), -INT16_C( 29073), -INT16_C( 13422), INT16_C( 7820), -INT16_C( 31657), -INT16_C( 32409), -INT16_C( 3355), -INT16_C( 3957), INT16_C( 2139), INT16_C( 31587), INT16_C( 5243), -INT16_C( 12326), -INT16_C( 22278), INT16_C( 19731), INT16_C( 15611), -INT16_C( 14969), -INT16_C( 2408), INT16_C( 10836), -INT16_C( 7998), INT16_C( 6472), -INT16_C( 20636), INT16_C( 19099), INT16_C( 9889), -INT16_C( 966), -INT16_C( 25042), -INT16_C( 22153), INT16_C( 20914), -INT16_C( 21383), -INT16_C( 29446), -INT16_C( 2567), -INT16_C( 32311) }, { INT16_C( 25018), INT16_C( 3703), INT16_C( 14731), -INT16_C( 11281), INT16_C( 21331), -INT16_C( 4478), INT16_C( 9117), -INT16_C( 10220), INT16_C( 17183), -INT16_C( 27018), INT16_C( 10476), INT16_C( 26088), -INT16_C( 7467), -INT16_C( 12558), -INT16_C( 17449), -INT16_C( 28337), -INT16_C( 14564), -INT16_C( 22624), -INT16_C( 28928), INT16_C( 21370), -INT16_C( 798), -INT16_C( 32703), INT16_C( 22047), INT16_C( 15960), -INT16_C( 12647), -INT16_C( 31276), -INT16_C( 17162), -INT16_C( 13333), -INT16_C( 8802), INT16_C( 30106), -INT16_C( 5736), -INT16_C( 19449) }, { INT32_C( 459506096), -INT32_C( 276860781), INT32_C( 308568466), -INT32_C( 261184753), -INT32_C( 125784633), INT32_C( 467684796), INT32_C( 371805366), -INT32_C( 786655126), INT32_C( 272487108), -INT32_C( 484381068), INT32_C( 669694452), INT32_C( 578904093), INT32_C( 795430594), INT32_C( 101343424), -INT32_C( 698288110), INT32_C( 643140951) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_madd_epi16(a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_madd_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t src[32]; const simde__mmask16 k; const int16_t a[32]; const int16_t b[32]; const int32_t r[16]; } test_vec[] = { { { -INT16_C( 24666), -INT16_C( 29655), INT16_C( 25716), -INT16_C( 4900), INT16_C( 25090), -INT16_C( 2830), INT16_C( 11305), -INT16_C( 29651), -INT16_C( 19186), INT16_C( 399), -INT16_C( 21850), -INT16_C( 23497), -INT16_C( 8300), INT16_C( 7570), -INT16_C( 14461), INT16_C( 10749), INT16_C( 10086), -INT16_C( 9547), -INT16_C( 28277), -INT16_C( 29242), -INT16_C( 18188), INT16_C( 7553), -INT16_C( 20764), -INT16_C( 3159), INT16_C( 14435), INT16_C( 2548), INT16_C( 11234), INT16_C( 30637), INT16_C( 16394), -INT16_C( 29292), -INT16_C( 28153), INT16_C( 28087) }, UINT16_C(27833), { INT16_C( 17479), INT16_C( 3582), -INT16_C( 3375), INT16_C( 21189), -INT16_C( 22257), -INT16_C( 18432), INT16_C( 25500), -INT16_C( 28432), -INT16_C( 11667), INT16_C( 6844), -INT16_C( 14775), -INT16_C( 8614), INT16_C( 24916), INT16_C( 2928), INT16_C( 10702), INT16_C( 5495), INT16_C( 30061), INT16_C( 15906), -INT16_C( 6297), INT16_C( 30352), -INT16_C( 28527), INT16_C( 11566), INT16_C( 7923), INT16_C( 24766), INT16_C( 31473), INT16_C( 14971), -INT16_C( 10944), -INT16_C( 27624), -INT16_C( 30665), INT16_C( 1439), INT16_C( 6065), INT16_C( 7707) }, { INT16_C( 15756), -INT16_C( 2980), -INT16_C( 5083), -INT16_C( 18838), -INT16_C( 26244), INT16_C( 28899), -INT16_C( 24137), -INT16_C( 22320), INT16_C( 19227), INT16_C( 23779), -INT16_C( 1247), INT16_C( 22768), -INT16_C( 28540), INT16_C( 13661), INT16_C( 30887), INT16_C( 13140), -INT16_C( 20298), -INT16_C( 9433), -INT16_C( 28003), INT16_C( 6545), INT16_C( 29739), -INT16_C( 7543), INT16_C( 23062), INT16_C( 12683), INT16_C( 28325), -INT16_C( 14707), INT16_C( 32361), -INT16_C( 4834), INT16_C( 31758), -INT16_C( 19165), INT16_C( 30708), -INT16_C( 21784) }, { INT32_C( 264724764), -INT32_C( 321100684), -INT32_C( 185441790), INT32_C( 19108740), -INT32_C( 61577933), -INT32_C( 177699127), INT32_C( 496164756), INT32_C( 402756974), -INT32_C( 625662106), -INT32_C( 1916366453), -INT32_C( 935606791), INT32_C( 496827404), INT32_C( 167000163), -INT32_C( 220624368), -INT32_C( 1001437505), INT32_C( 1840747015) } }, { { INT16_C( 4135), -INT16_C( 15227), INT16_C( 5794), -INT16_C( 12834), INT16_C( 26507), -INT16_C( 24145), INT16_C( 15041), INT16_C( 26578), INT16_C( 24744), INT16_C( 4653), INT16_C( 19678), -INT16_C( 4865), INT16_C( 8904), -INT16_C( 17247), -INT16_C( 30311), -INT16_C( 16025), -INT16_C( 4967), INT16_C( 15237), INT16_C( 25347), -INT16_C( 29176), -INT16_C( 18229), -INT16_C( 29649), INT16_C( 498), -INT16_C( 25613), INT16_C( 8545), INT16_C( 16301), -INT16_C( 21395), INT16_C( 13611), -INT16_C( 13105), INT16_C( 26865), INT16_C( 22614), -INT16_C( 4311) }, UINT16_C(44869), { INT16_C( 18475), INT16_C( 13074), -INT16_C( 8746), INT16_C( 1515), -INT16_C( 8598), INT16_C( 23814), INT16_C( 26745), INT16_C( 9854), -INT16_C( 5209), -INT16_C( 11310), -INT16_C( 24288), INT16_C( 4767), -INT16_C( 2806), INT16_C( 13162), -INT16_C( 20507), INT16_C( 4322), -INT16_C( 2569), -INT16_C( 12989), INT16_C( 12242), INT16_C( 15570), -INT16_C( 9971), -INT16_C( 31078), INT16_C( 6209), -INT16_C( 5972), INT16_C( 32260), INT16_C( 9403), INT16_C( 23328), INT16_C( 10806), -INT16_C( 24240), INT16_C( 13661), INT16_C( 16464), INT16_C( 18501) }, { -INT16_C( 30411), INT16_C( 1813), -INT16_C( 5960), -INT16_C( 15036), -INT16_C( 8511), INT16_C( 587), -INT16_C( 2058), -INT16_C( 1302), -INT16_C( 22923), -INT16_C( 27361), INT16_C( 21761), INT16_C( 20927), INT16_C( 7670), INT16_C( 18311), -INT16_C( 13219), -INT16_C( 28017), -INT16_C( 23467), INT16_C( 3481), -INT16_C( 8820), INT16_C( 19922), INT16_C( 7611), -INT16_C( 19889), INT16_C( 14868), -INT16_C( 30036), -INT16_C( 13344), -INT16_C( 7905), -INT16_C( 8415), INT16_C( 5938), -INT16_C( 17924), INT16_C( 22878), -INT16_C( 4730), -INT16_C( 9237) }, { -INT32_C( 538140063), -INT32_C( 841083230), INT32_C( 87156396), INT32_C( 1741830849), INT32_C( 304963752), -INT32_C( 318812962), INT32_C( 219487362), -INT32_C( 1050179175), INT32_C( 15072014), INT32_C( 202211100), INT32_C( 542221061), INT32_C( 271690404), INT32_C( 1068310881), -INT32_C( 132139092), INT32_C( 1760677071), -INT32_C( 248768457) } }, { { -INT16_C( 31598), INT16_C( 7913), -INT16_C( 17566), INT16_C( 7532), -INT16_C( 17447), -INT16_C( 4657), INT16_C( 31989), -INT16_C( 10889), -INT16_C( 26809), INT16_C( 26806), -INT16_C( 5770), INT16_C( 29312), -INT16_C( 8542), INT16_C( 10443), -INT16_C( 18740), INT16_C( 24068), -INT16_C( 4806), -INT16_C( 25476), -INT16_C( 5976), -INT16_C( 32326), -INT16_C( 30300), -INT16_C( 26257), -INT16_C( 6651), INT16_C( 19823), INT16_C( 9597), -INT16_C( 3147), INT16_C( 13582), -INT16_C( 20123), INT16_C( 12308), -INT16_C( 7975), -INT16_C( 8730), INT16_C( 8510) }, UINT16_C(47818), { INT16_C( 29629), INT16_C( 30627), INT16_C( 18420), INT16_C( 25345), INT16_C( 1760), INT16_C( 20298), -INT16_C( 14509), INT16_C( 2421), -INT16_C( 31813), INT16_C( 8254), INT16_C( 21044), INT16_C( 3665), INT16_C( 14130), INT16_C( 28907), -INT16_C( 18856), INT16_C( 5675), -INT16_C( 12759), INT16_C( 7565), -INT16_C( 29163), -INT16_C( 2687), -INT16_C( 13419), -INT16_C( 6075), -INT16_C( 17774), INT16_C( 19953), INT16_C( 12349), INT16_C( 29294), -INT16_C( 16510), -INT16_C( 19072), INT16_C( 27638), INT16_C( 20261), INT16_C( 20513), INT16_C( 19045) }, { -INT16_C( 3554), INT16_C( 13160), -INT16_C( 5759), INT16_C( 5673), INT16_C( 28340), INT16_C( 18174), -INT16_C( 4056), INT16_C( 26004), INT16_C( 544), -INT16_C( 23849), INT16_C( 22465), -INT16_C( 18601), INT16_C( 32195), -INT16_C( 7162), INT16_C( 27597), -INT16_C( 5073), -INT16_C( 26786), -INT16_C( 8417), INT16_C( 18560), INT16_C( 13557), -INT16_C( 3146), -INT16_C( 8582), INT16_C( 3811), INT16_C( 836), INT16_C( 6928), -INT16_C( 11866), -INT16_C( 653), INT16_C( 13961), -INT16_C( 28806), INT16_C( 18458), INT16_C( 18939), INT16_C( 22836) }, { INT32_C( 518620306), INT32_C( 37701405), -INT32_C( 305153063), INT32_C( 121804188), INT32_C( 1756796743), INT32_C( 1921050998), INT32_C( 247883416), -INT32_C( 549158307), -INT32_C( 1669534406), -INT32_C( 577692939), -INT32_C( 1720743516), -INT32_C( 51056006), -INT32_C( 262048732), -INT32_C( 255483162), -INT32_C( 522637292), INT32_C( 823407327) } }, { { INT16_C( 21472), INT16_C( 24632), INT16_C( 11676), INT16_C( 21140), INT16_C( 3872), INT16_C( 1073), INT16_C( 29981), INT16_C( 11783), -INT16_C( 21104), INT16_C( 1023), -INT16_C( 30549), INT16_C( 9529), INT16_C( 21528), INT16_C( 4973), -INT16_C( 24163), INT16_C( 32364), -INT16_C( 23307), -INT16_C( 28194), INT16_C( 29649), -INT16_C( 3613), INT16_C( 5250), -INT16_C( 24587), -INT16_C( 631), INT16_C( 6861), -INT16_C( 12886), INT16_C( 21789), INT16_C( 22357), INT16_C( 28027), -INT16_C( 5973), INT16_C( 18560), -INT16_C( 4982), INT16_C( 32710) }, UINT16_C(42384), { INT16_C( 24848), -INT16_C( 3304), -INT16_C( 26029), INT16_C( 18440), -INT16_C( 28359), INT16_C( 1861), -INT16_C( 3925), -INT16_C( 13868), INT16_C( 10565), -INT16_C( 16352), -INT16_C( 13417), INT16_C( 6057), INT16_C( 13075), -INT16_C( 9724), -INT16_C( 27470), -INT16_C( 15745), -INT16_C( 26634), INT16_C( 18869), -INT16_C( 17103), INT16_C( 27281), -INT16_C( 10417), -INT16_C( 1423), INT16_C( 17863), INT16_C( 3267), -INT16_C( 7313), INT16_C( 1741), INT16_C( 30382), -INT16_C( 15843), INT16_C( 8617), INT16_C( 23452), INT16_C( 7094), -INT16_C( 21475) }, { -INT16_C( 11598), -INT16_C( 7179), -INT16_C( 31088), -INT16_C( 8371), -INT16_C( 16547), INT16_C( 9433), -INT16_C( 25340), INT16_C( 29489), -INT16_C( 384), INT16_C( 12153), -INT16_C( 26764), INT16_C( 7665), -INT16_C( 29256), INT16_C( 28280), -INT16_C( 27224), INT16_C( 23066), INT16_C( 3943), -INT16_C( 2243), -INT16_C( 30058), -INT16_C( 3114), -INT16_C( 20407), INT16_C( 19992), INT16_C( 18765), -INT16_C( 12863), INT16_C( 15175), -INT16_C( 17412), -INT16_C( 4654), -INT16_C( 29992), INT16_C( 20602), INT16_C( 8953), INT16_C( 5093), INT16_C( 19580) }, { INT32_C( 1614304224), INT32_C( 1385442716), INT32_C( 70324000), INT32_C( 772240669), -INT32_C( 202782816), INT32_C( 624527531), INT32_C( 325932056), INT32_C( 384669110), -INT32_C( 147341029), -INT32_C( 236751919), INT32_C( 184131103), INT32_C( 449707401), INT32_C( 1428016554), INT32_C( 333765428), INT32_C( 1216407723), -INT32_C( 384350758) } }, { { -INT16_C( 18141), -INT16_C( 18108), INT16_C( 6724), -INT16_C( 29268), -INT16_C( 15158), INT16_C( 6107), -INT16_C( 25331), INT16_C( 21733), -INT16_C( 7720), -INT16_C( 22001), -INT16_C( 6193), INT16_C( 18740), INT16_C( 11575), INT16_C( 7276), -INT16_C( 6079), INT16_C( 25705), -INT16_C( 21086), -INT16_C( 6627), -INT16_C( 13881), -INT16_C( 28045), INT16_C( 20366), -INT16_C( 25687), -INT16_C( 28948), -INT16_C( 15120), -INT16_C( 144), INT16_C( 16238), -INT16_C( 23833), INT16_C( 7816), -INT16_C( 2864), INT16_C( 4411), -INT16_C( 23331), INT16_C( 32629) }, UINT16_C(37457), { INT16_C( 6245), -INT16_C( 10149), -INT16_C( 5718), INT16_C( 21543), INT16_C( 4997), INT16_C( 30178), INT16_C( 21207), INT16_C( 17780), INT16_C( 23441), INT16_C( 6888), -INT16_C( 18310), -INT16_C( 19186), -INT16_C( 5175), INT16_C( 15961), -INT16_C( 21910), -INT16_C( 12336), INT16_C( 11202), INT16_C( 28072), -INT16_C( 12523), -INT16_C( 25919), -INT16_C( 23581), -INT16_C( 17905), -INT16_C( 31754), -INT16_C( 30976), -INT16_C( 5921), INT16_C( 22945), -INT16_C( 20320), INT16_C( 26894), INT16_C( 26523), INT16_C( 1703), INT16_C( 30481), -INT16_C( 11307) }, { INT16_C( 32162), -INT16_C( 18624), INT16_C( 333), INT16_C( 12369), INT16_C( 24741), -INT16_C( 25622), -INT16_C( 5404), -INT16_C( 15582), -INT16_C( 15150), INT16_C( 29212), INT16_C( 10868), INT16_C( 4059), -INT16_C( 32111), -INT16_C( 24043), -INT16_C( 5127), -INT16_C( 25483), -INT16_C( 18840), -INT16_C( 19117), -INT16_C( 23113), INT16_C( 23781), -INT16_C( 12283), -INT16_C( 5641), INT16_C( 6842), -INT16_C( 29268), -INT16_C( 14114), INT16_C( 21247), -INT16_C( 9230), -INT16_C( 31903), INT16_C( 30557), INT16_C( 22309), -INT16_C( 25758), -INT16_C( 13581) }, { INT32_C( 389866666), -INT32_C( 1918100924), INT32_C( 400278730), INT32_C( 1424334093), -INT32_C( 153918894), INT32_C( 1228203983), -INT32_C( 217575898), INT32_C( 1684662337), -INT32_C( 434262622), -INT32_C( 326935640), -INT32_C( 1683402866), -INT32_C( 990867732), INT32_C( 571081409), INT32_C( 512271079), INT32_C( 289141968), -INT32_C( 631569231) } }, { { INT16_C( 18001), INT16_C( 2176), INT16_C( 26091), -INT16_C( 3739), INT16_C( 23605), -INT16_C( 3878), -INT16_C( 30858), INT16_C( 21629), INT16_C( 31823), INT16_C( 17062), INT16_C( 2135), -INT16_C( 19003), -INT16_C( 5249), -INT16_C( 7924), -INT16_C( 122), -INT16_C( 10325), INT16_C( 11077), INT16_C( 12767), INT16_C( 17553), -INT16_C( 14814), -INT16_C( 863), INT16_C( 6070), INT16_C( 13187), -INT16_C( 11412), INT16_C( 4784), INT16_C( 1813), -INT16_C( 9702), -INT16_C( 26180), -INT16_C( 14139), INT16_C( 19322), INT16_C( 9927), INT16_C( 3362) }, UINT16_C( 593), { -INT16_C( 7618), INT16_C( 24646), -INT16_C( 6231), INT16_C( 24412), -INT16_C( 7937), INT16_C( 27539), INT16_C( 17331), -INT16_C( 14211), -INT16_C( 26550), INT16_C( 1954), INT16_C( 26673), -INT16_C( 21297), -INT16_C( 26701), -INT16_C( 10542), INT16_C( 9124), -INT16_C( 7464), INT16_C( 7686), -INT16_C( 20670), -INT16_C( 25082), INT16_C( 1294), -INT16_C( 24194), INT16_C( 12656), -INT16_C( 4636), INT16_C( 12281), -INT16_C( 25467), -INT16_C( 18634), INT16_C( 1284), -INT16_C( 18589), INT16_C( 13724), INT16_C( 16525), INT16_C( 25944), INT16_C( 24098) }, { INT16_C( 25732), -INT16_C( 30195), INT16_C( 7171), -INT16_C( 32369), -INT16_C( 67), -INT16_C( 23885), -INT16_C( 21268), INT16_C( 29393), INT16_C( 1864), INT16_C( 19497), -INT16_C( 29684), -INT16_C( 22268), -INT16_C( 28223), INT16_C( 6633), INT16_C( 3319), INT16_C( 31608), -INT16_C( 31376), INT16_C( 29445), -INT16_C( 27487), INT16_C( 24565), -INT16_C( 22381), INT16_C( 32513), -INT16_C( 11692), -INT16_C( 25103), INT16_C( 6873), -INT16_C( 6679), -INT16_C( 4698), INT16_C( 26510), INT16_C( 30847), INT16_C( 30337), -INT16_C( 1660), -INT16_C( 2831) }, { -INT32_C( 940212346), -INT32_C( 245013013), -INT32_C( 254125003), INT32_C( 1417512822), -INT32_C( 11392062), -INT32_C( 1245378473), INT32_C( 683657237), -INT32_C( 676593786), INT32_C( 836709189), INT32_C( 721216044), INT32_C( 397868193), -INT32_C( 747883645), INT32_C( 118821552), -INT32_C( 1715676646), INT32_C( 1266337989), INT32_C( 220341959) } }, { { -INT16_C( 2434), INT16_C( 8296), INT16_C( 23946), INT16_C( 7551), -INT16_C( 32763), INT16_C( 22940), -INT16_C( 29102), INT16_C( 11254), -INT16_C( 8024), INT16_C( 20240), -INT16_C( 24627), INT16_C( 19638), INT16_C( 14103), -INT16_C( 25662), -INT16_C( 19664), -INT16_C( 20593), -INT16_C( 2135), INT16_C( 13263), INT16_C( 20052), INT16_C( 22864), -INT16_C( 4658), INT16_C( 8371), -INT16_C( 22149), INT16_C( 9035), INT16_C( 23433), INT16_C( 22386), INT16_C( 10746), INT16_C( 4515), INT16_C( 26208), -INT16_C( 28244), INT16_C( 15385), -INT16_C( 15552) }, UINT16_C( 3891), { -INT16_C( 30474), INT16_C( 18269), INT16_C( 11233), -INT16_C( 27596), -INT16_C( 20661), -INT16_C( 27074), -INT16_C( 14382), INT16_C( 17905), -INT16_C( 5090), -INT16_C( 15762), -INT16_C( 12547), -INT16_C( 21976), INT16_C( 16735), -INT16_C( 24602), INT16_C( 6404), -INT16_C( 1106), INT16_C( 2977), -INT16_C( 31934), INT16_C( 30262), -INT16_C( 32489), INT16_C( 21797), -INT16_C( 2281), INT16_C( 2333), INT16_C( 15164), -INT16_C( 21771), -INT16_C( 3331), INT16_C( 9593), -INT16_C( 10084), -INT16_C( 32153), INT16_C( 27512), INT16_C( 9884), INT16_C( 15718) }, { -INT16_C( 22478), INT16_C( 26816), -INT16_C( 10210), INT16_C( 17386), INT16_C( 301), INT16_C( 19003), INT16_C( 30474), -INT16_C( 122), -INT16_C( 31966), -INT16_C( 25614), -INT16_C( 29015), INT16_C( 4211), -INT16_C( 5359), -INT16_C( 21125), -INT16_C( 7662), INT16_C( 17642), -INT16_C( 21622), -INT16_C( 22100), -INT16_C( 27005), -INT16_C( 20244), INT16_C( 10136), -INT16_C( 23813), -INT16_C( 32353), -INT16_C( 15966), -INT16_C( 27644), -INT16_C( 21156), -INT16_C( 12510), INT16_C( 13245), INT16_C( 14779), -INT16_C( 12832), -INT16_C( 13541), -INT16_C( 23279) }, { INT32_C( 1174896076), -INT32_C( 594472986), INT32_C( 1503428613), INT32_C( 737578578), INT32_C( 566434808), INT32_C( 271510269), -INT32_C( 1681770729), -INT32_C( 1349536976), INT32_C( 641372706), -INT32_C( 159517994), INT32_C( 275251845), -INT32_C( 317587973), INT32_C( 1467112329), INT32_C( 295905786), -INT32_C( 1850972576), -INT32_C( 1019200487) } }, { { -INT16_C( 17034), -INT16_C( 1714), INT16_C( 15188), -INT16_C( 4951), -INT16_C( 23454), INT16_C( 398), INT16_C( 12325), INT16_C( 10946), INT16_C( 7876), -INT16_C( 6185), -INT16_C( 27154), -INT16_C( 22246), -INT16_C( 1074), -INT16_C( 5770), -INT16_C( 30778), INT16_C( 15502), -INT16_C( 8892), -INT16_C( 26571), -INT16_C( 8680), INT16_C( 31364), INT16_C( 4995), -INT16_C( 22404), INT16_C( 15939), INT16_C( 2258), -INT16_C( 21923), INT16_C( 19439), INT16_C( 2367), INT16_C( 3572), INT16_C( 27140), -INT16_C( 13578), -INT16_C( 31503), INT16_C( 13574) }, UINT16_C(15201), { INT16_C( 31182), INT16_C( 21018), -INT16_C( 25100), INT16_C( 28773), -INT16_C( 22203), INT16_C( 6318), INT16_C( 2993), -INT16_C( 24382), INT16_C( 342), INT16_C( 19113), -INT16_C( 20978), INT16_C( 1204), -INT16_C( 23176), INT16_C( 32648), -INT16_C( 5413), -INT16_C( 22086), -INT16_C( 11165), INT16_C( 22523), INT16_C( 24945), -INT16_C( 18489), INT16_C( 30218), -INT16_C( 17457), -INT16_C( 28287), -INT16_C( 10149), INT16_C( 1170), -INT16_C( 24542), -INT16_C( 10318), INT16_C( 11172), INT16_C( 11388), INT16_C( 22442), INT16_C( 25622), INT16_C( 31232) }, { -INT16_C( 967), -INT16_C( 21807), -INT16_C( 26275), INT16_C( 26465), INT16_C( 12303), -INT16_C( 28638), INT16_C( 32193), INT16_C( 21352), -INT16_C( 29823), INT16_C( 13555), -INT16_C( 26782), -INT16_C( 8609), INT16_C( 2500), -INT16_C( 9674), INT16_C( 13933), -INT16_C( 22956), INT16_C( 9778), -INT16_C( 28847), -INT16_C( 19777), -INT16_C( 12554), INT16_C( 6371), -INT16_C( 23458), -INT16_C( 14443), INT16_C( 6136), -INT16_C( 5294), -INT16_C( 19381), -INT16_C( 21885), INT16_C( 18322), -INT16_C( 14157), INT16_C( 8225), INT16_C( 30463), INT16_C( 12743) }, { -INT32_C( 488492520), -INT32_C( 324453548), INT32_C( 26125410), INT32_C( 717369381), -INT32_C( 405332284), INT32_C( 551467560), -INT32_C( 373776752), INT32_C( 1015973830), -INT32_C( 758892351), -INT32_C( 261226359), -INT32_C( 1468263549), INT32_C( 346274877), INT32_C( 469454522), INT32_C( 430502814), -INT32_C( 889820668), INT32_C( 889619697) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi16(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_mask_madd_epi16(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_madd_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask16 k; const int16_t a[32]; const int16_t b[32]; const int32_t r[16]; } test_vec[] = { { UINT16_C(49133), { -INT16_C( 12436), INT16_C( 8143), INT16_C( 3509), -INT16_C( 8650), INT16_C( 19172), -INT16_C( 17007), -INT16_C( 26437), INT16_C( 27681), -INT16_C( 6686), -INT16_C( 11200), -INT16_C( 18005), -INT16_C( 6237), -INT16_C( 26567), -INT16_C( 3727), INT16_C( 24180), -INT16_C( 7760), INT16_C( 32558), -INT16_C( 7424), INT16_C( 14220), INT16_C( 29121), INT16_C( 21121), INT16_C( 15406), INT16_C( 20459), -INT16_C( 12887), -INT16_C( 5836), -INT16_C( 8031), INT16_C( 17826), -INT16_C( 9273), INT16_C( 14557), INT16_C( 20940), INT16_C( 31894), -INT16_C( 15310) }, { INT16_C( 13308), -INT16_C( 30553), INT16_C( 26986), -INT16_C( 5127), INT16_C( 10171), -INT16_C( 23000), -INT16_C( 11913), -INT16_C( 21645), INT16_C( 5562), INT16_C( 23947), INT16_C( 21082), INT16_C( 14136), INT16_C( 1418), INT16_C( 8584), -INT16_C( 17535), INT16_C( 32229), -INT16_C( 29202), INT16_C( 22534), -INT16_C( 10), -INT16_C( 20157), INT16_C( 27431), -INT16_C( 25000), -INT16_C( 13508), -INT16_C( 2231), -INT16_C( 10784), INT16_C( 14932), -INT16_C( 29657), -INT16_C( 19855), -INT16_C( 1391), INT16_C( 5075), -INT16_C( 18251), -INT16_C( 23664) }, { -INT32_C( 414291367), INT32_C( 0), INT32_C( 586159412), -INT32_C( 284211264), INT32_C( 0), -INT32_C( 467747642), -INT32_C( 69664574), -INT32_C( 674093340), -INT32_C( 1118051132), -INT32_C( 587134197), INT32_C( 194220151), -INT32_C( 247609275), -INT32_C( 56983468), -INT32_C( 344550267), INT32_C( 0), -INT32_C( 219801554) } }, { UINT16_C(38469), { INT16_C( 15355), INT16_C( 16022), -INT16_C( 16915), INT16_C( 17834), -INT16_C( 6565), -INT16_C( 23536), -INT16_C( 3619), INT16_C( 12665), -INT16_C( 24277), -INT16_C( 25154), INT16_C( 20307), INT16_C( 9879), INT16_C( 19554), -INT16_C( 3106), INT16_C( 9455), -INT16_C( 5495), INT16_C( 8031), INT16_C( 19496), -INT16_C( 11556), INT16_C( 14225), -INT16_C( 23879), -INT16_C( 26916), INT16_C( 21907), -INT16_C( 16696), -INT16_C( 30986), INT16_C( 18779), -INT16_C( 3371), INT16_C( 14447), INT16_C( 20030), INT16_C( 11563), -INT16_C( 19342), -INT16_C( 12009) }, { INT16_C( 16596), -INT16_C( 20450), -INT16_C( 20718), -INT16_C( 13336), -INT16_C( 15279), -INT16_C( 7070), INT16_C( 10777), INT16_C( 4259), -INT16_C( 336), -INT16_C( 31399), -INT16_C( 13839), INT16_C( 12221), -INT16_C( 6121), -INT16_C( 30371), INT16_C( 29853), INT16_C( 29018), INT16_C( 30900), -INT16_C( 14559), INT16_C( 2344), INT16_C( 31122), -INT16_C( 2867), -INT16_C( 6306), INT16_C( 286), -INT16_C( 12553), INT16_C( 20735), -INT16_C( 4012), INT16_C( 4377), INT16_C( 12320), INT16_C( 32250), -INT16_C( 26695), INT16_C( 5361), -INT16_C( 23032) }, { -INT32_C( 72818320), INT32_C( 0), INT32_C( 266706155), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 25357708), INT32_C( 0), INT32_C( 0), INT32_C( 415623186), INT32_C( 238193389), INT32_C( 0), -INT32_C( 717836058), INT32_C( 0), INT32_C( 0), INT32_C( 172898826) } }, { UINT16_C(10636), { -INT16_C( 19347), -INT16_C( 205), INT16_C( 46), -INT16_C( 29452), INT16_C( 4839), -INT16_C( 8563), -INT16_C( 29471), INT16_C( 13615), INT16_C( 18557), -INT16_C( 25274), INT16_C( 16505), INT16_C( 12826), INT16_C( 3031), -INT16_C( 8378), -INT16_C( 11343), INT16_C( 7689), INT16_C( 15495), -INT16_C( 19170), INT16_C( 4668), INT16_C( 9281), -INT16_C( 12764), INT16_C( 1282), INT16_C( 12635), -INT16_C( 10182), -INT16_C( 32390), -INT16_C( 3211), -INT16_C( 28735), -INT16_C( 26331), INT16_C( 27802), INT16_C( 19576), -INT16_C( 32449), -INT16_C( 14742) }, { -INT16_C( 30531), -INT16_C( 1412), -INT16_C( 16998), -INT16_C( 16610), INT16_C( 8332), -INT16_C( 6204), -INT16_C( 174), -INT16_C( 13121), INT16_C( 13440), INT16_C( 16831), -INT16_C( 6973), INT16_C( 24026), INT16_C( 21328), -INT16_C( 28759), INT16_C( 5332), -INT16_C( 28074), -INT16_C( 11620), INT16_C( 14220), -INT16_C( 21873), INT16_C( 7158), -INT16_C( 17718), INT16_C( 7170), -INT16_C( 15943), INT16_C( 14824), -INT16_C( 22539), -INT16_C( 18309), INT16_C( 21900), -INT16_C( 9194), -INT16_C( 16472), INT16_C( 32108), -INT16_C( 15661), INT16_C( 28687) }, { INT32_C( 0), INT32_C( 0), INT32_C( 93443400), -INT32_C( 173514461), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 276341862), -INT32_C( 452649300), INT32_C( 0), INT32_C( 0), -INT32_C( 352377773), INT32_C( 0), -INT32_C( 387209286), INT32_C( 0), INT32_C( 0) } }, { UINT16_C(39828), { INT16_C( 9127), -INT16_C( 25275), INT16_C( 3903), INT16_C( 16727), INT16_C( 4396), INT16_C( 5123), -INT16_C( 1974), -INT16_C( 14916), INT16_C( 18609), -INT16_C( 14565), -INT16_C( 15580), -INT16_C( 28538), INT16_C( 23104), INT16_C( 20306), -INT16_C( 6454), INT16_C( 29162), INT16_C( 12042), INT16_C( 18702), INT16_C( 25919), INT16_C( 27530), -INT16_C( 29322), -INT16_C( 16001), INT16_C( 15238), INT16_C( 14214), -INT16_C( 24189), -INT16_C( 22274), -INT16_C( 31643), -INT16_C( 23240), -INT16_C( 29730), -INT16_C( 22283), -INT16_C( 8335), INT16_C( 31513) }, { INT16_C( 9999), INT16_C( 20164), INT16_C( 20365), INT16_C( 953), INT16_C( 14556), INT16_C( 25284), INT16_C( 19316), -INT16_C( 2151), -INT16_C( 26644), INT16_C( 20895), -INT16_C( 10212), -INT16_C( 1289), -INT16_C( 5021), -INT16_C( 11101), -INT16_C( 17205), -INT16_C( 9648), INT16_C( 5348), INT16_C( 28968), -INT16_C( 7837), INT16_C( 16500), INT16_C( 14618), -INT16_C( 29022), INT16_C( 15492), INT16_C( 28805), INT16_C( 9683), -INT16_C( 4158), -INT16_C( 17923), INT16_C( 24810), -INT16_C( 29275), INT16_C( 28724), -INT16_C( 31671), INT16_C( 11595) }, { INT32_C( 0), INT32_C( 0), INT32_C( 193518108), INT32_C( 0), -INT32_C( 800153871), INT32_C( 0), INT32_C( 0), -INT32_C( 170313906), INT32_C( 606160152), INT32_C( 251117797), INT32_C( 0), INT32_C( 645501366), -INT32_C( 141606795), INT32_C( 0), INT32_C( 0), INT32_C( 629371020) } }, { UINT16_C(29593), { -INT16_C( 866), INT16_C( 4949), INT16_C( 28476), -INT16_C( 8372), -INT16_C( 12035), -INT16_C( 32229), -INT16_C( 4544), INT16_C( 679), -INT16_C( 23330), -INT16_C( 14149), INT16_C( 24580), INT16_C( 14677), -INT16_C( 24879), INT16_C( 7357), INT16_C( 22220), INT16_C( 27279), -INT16_C( 7085), -INT16_C( 28803), -INT16_C( 13997), INT16_C( 20590), -INT16_C( 30311), -INT16_C( 9517), INT16_C( 31352), INT16_C( 22236), -INT16_C( 26593), INT16_C( 8990), INT16_C( 29688), -INT16_C( 13988), INT16_C( 6673), -INT16_C( 8731), INT16_C( 30064), -INT16_C( 15544) }, { -INT16_C( 15015), -INT16_C( 21165), -INT16_C( 15985), INT16_C( 10493), -INT16_C( 12213), -INT16_C( 15614), -INT16_C( 8373), INT16_C( 27161), INT16_C( 14199), INT16_C( 28557), -INT16_C( 5462), -INT16_C( 17607), INT16_C( 7684), INT16_C( 29849), -INT16_C( 7789), -INT16_C( 4808), -INT16_C( 29786), INT16_C( 13722), -INT16_C( 26804), -INT16_C( 26786), INT16_C( 24680), -INT16_C( 19622), INT16_C( 29503), -INT16_C( 18915), -INT16_C( 21846), INT16_C( 21542), INT16_C( 24468), -INT16_C( 26608), -INT16_C( 22147), INT16_C( 4365), INT16_C( 17802), INT16_C( 12542) }, { -INT32_C( 91742595), INT32_C( 0), INT32_C( 0), INT32_C( 56489231), -INT32_C( 735315663), INT32_C( 0), INT32_C( 0), -INT32_C( 304229012), -INT32_C( 184200956), -INT32_C( 176348152), INT32_C( 0), INT32_C( 0), INT32_C( 774613258), INT32_C( 1098598688), -INT32_C( 185897746), INT32_C( 0) } }, { UINT16_C(39120), { INT16_C( 7270), -INT16_C( 15313), -INT16_C( 26700), INT16_C( 3620), INT16_C( 25674), INT16_C( 26498), INT16_C( 11290), INT16_C( 16402), -INT16_C( 22911), -INT16_C( 28257), INT16_C( 7487), INT16_C( 19514), -INT16_C( 15314), INT16_C( 11409), INT16_C( 25076), INT16_C( 23236), -INT16_C( 3203), INT16_C( 12574), INT16_C( 17291), -INT16_C( 10944), -INT16_C( 15705), -INT16_C( 16067), INT16_C( 20462), INT16_C( 28418), -INT16_C( 24075), INT16_C( 13312), INT16_C( 15038), -INT16_C( 4992), INT16_C( 4606), -INT16_C( 3304), -INT16_C( 9102), -INT16_C( 4019) }, { INT16_C( 27856), INT16_C( 23329), INT16_C( 25007), INT16_C( 22064), INT16_C( 27939), INT16_C( 4631), INT16_C( 6588), -INT16_C( 19839), -INT16_C( 32069), INT16_C( 31206), INT16_C( 26556), -INT16_C( 17562), INT16_C( 32376), -INT16_C( 5202), -INT16_C( 1189), INT16_C( 11227), -INT16_C( 921), INT16_C( 5766), -INT16_C( 18850), -INT16_C( 32404), -INT16_C( 31708), -INT16_C( 8045), INT16_C( 5533), INT16_C( 22674), INT16_C( 31127), INT16_C( 21458), INT16_C( 14560), INT16_C( 22542), -INT16_C( 17226), INT16_C( 4419), INT16_C( 7864), INT16_C( 7996) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 147055083), INT32_C( 0), -INT32_C( 555155682), INT32_C( 231055208), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 757565978), -INT32_C( 463733629), INT32_C( 0), INT32_C( 0), -INT32_C( 103714052) } }, { UINT16_C(49691), { INT16_C( 31030), -INT16_C( 23943), -INT16_C( 25094), -INT16_C( 29146), -INT16_C( 15235), INT16_C( 4259), INT16_C( 14876), -INT16_C( 4471), INT16_C( 27021), -INT16_C( 25562), -INT16_C( 8767), INT16_C( 1368), INT16_C( 4334), INT16_C( 11043), INT16_C( 15920), INT16_C( 26349), INT16_C( 26295), -INT16_C( 19960), INT16_C( 12035), -INT16_C( 32448), -INT16_C( 7181), INT16_C( 3985), INT16_C( 6685), -INT16_C( 21762), INT16_C( 9347), INT16_C( 17478), -INT16_C( 24831), -INT16_C( 4023), INT16_C( 28079), -INT16_C( 8421), INT16_C( 2219), INT16_C( 25413) }, { INT16_C( 20079), INT16_C( 29205), INT16_C( 21885), INT16_C( 28915), -INT16_C( 31688), INT16_C( 21887), INT16_C( 32158), INT16_C( 8703), INT16_C( 18082), -INT16_C( 23706), -INT16_C( 20507), -INT16_C( 27501), -INT16_C( 20964), -INT16_C( 14220), -INT16_C( 17993), INT16_C( 9771), INT16_C( 16391), -INT16_C( 31592), -INT16_C( 29547), -INT16_C( 12812), INT16_C( 29712), -INT16_C( 20702), INT16_C( 8689), -INT16_C( 27696), INT16_C( 13927), INT16_C( 19511), -INT16_C( 13594), INT16_C( 737), INT16_C( 21881), INT16_C( 12490), -INT16_C( 2802), INT16_C( 5718) }, { -INT32_C( 76203945), -INT32_C( 1391938780), INT32_C( 0), INT32_C( 439471295), INT32_C( 1094566494), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 60125631), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 509218309), INT32_C( 139093896) } }, { UINT16_C(60981), { -INT16_C( 13670), -INT16_C( 28806), -INT16_C( 29801), -INT16_C( 18173), -INT16_C( 3014), INT16_C( 2779), INT16_C( 17032), -INT16_C( 16575), INT16_C( 10127), INT16_C( 28809), INT16_C( 553), -INT16_C( 2875), -INT16_C( 11470), -INT16_C( 30487), INT16_C( 8169), -INT16_C( 31625), -INT16_C( 3607), -INT16_C( 32493), INT16_C( 5756), -INT16_C( 18886), INT16_C( 5386), -INT16_C( 27967), INT16_C( 600), -INT16_C( 6319), -INT16_C( 9431), INT16_C( 21079), INT16_C( 7389), INT16_C( 4166), INT16_C( 12527), -INT16_C( 9832), INT16_C( 3919), INT16_C( 14429) }, { INT16_C( 28673), INT16_C( 32185), -INT16_C( 2938), -INT16_C( 28620), -INT16_C( 2807), INT16_C( 24867), INT16_C( 29943), INT16_C( 8264), -INT16_C( 24753), INT16_C( 11634), -INT16_C( 17989), -INT16_C( 21699), -INT16_C( 10775), INT16_C( 14468), -INT16_C( 7707), -INT16_C( 6544), INT16_C( 10833), -INT16_C( 10397), -INT16_C( 26850), INT16_C( 10087), -INT16_C( 30068), -INT16_C( 31863), -INT16_C( 11777), INT16_C( 20131), INT16_C( 5745), INT16_C( 11387), -INT16_C( 18225), -INT16_C( 18217), INT16_C( 23438), INT16_C( 29680), INT16_C( 24636), -INT16_C( 29351) }, { -INT32_C( 1319081020), INT32_C( 0), INT32_C( 77565691), INT32_C( 0), INT32_C( 84490275), INT32_C( 52436708), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 345051682), INT32_C( 729166273), -INT32_C( 134273989), INT32_C( 0), -INT32_C( 210556547), INT32_C( 1794066), -INT32_C( 326957095) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_maskz_madd_epi16(test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_madd_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_madd_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_madd_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_madd_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_madd_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_madd_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_madd_epi16) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/maddubs.c000066400000000000000000003430321400333146700170440ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Ashleigh Newman-Jones */ #define SIMDE_TEST_X86_AVX512_INSN maddubs #include #include static int test_simde_mm_mask_maddubs_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t src[8]; const simde__mmask8 k; const int8_t a[16]; const int8_t b[16]; const int16_t r[8]; } test_vec[] = { { { -INT16_C( 28646), -INT16_C( 10601), -INT16_C( 25326), -INT16_C( 24892), INT16_C( 24744), INT16_C( 4254), INT16_C( 10934), -INT16_C( 11273) }, UINT8_C( 25), { INT8_C( 38), -INT8_C( 48), INT8_C( 116), INT8_C( 16), INT8_C( 1), INT8_C( 122), -INT8_C( 94), INT8_C( 26), INT8_C( 29), -INT8_C( 5), -INT8_C( 54), -INT8_C( 74), INT8_C( 51), -INT8_C( 120), -INT8_C( 48), -INT8_C( 61) }, { INT8_C( 31), -INT8_C( 90), -INT8_C( 43), -INT8_C( 68), INT8_C( 106), INT8_C( 115), INT8_C( 101), -INT8_C( 53), INT8_C( 17), INT8_C( 117), -INT8_C( 127), INT8_C( 60), INT8_C( 108), INT8_C( 85), INT8_C( 85), -INT8_C( 110) }, { -INT16_C( 17542), -INT16_C( 10601), -INT16_C( 25326), INT16_C( 14984), INT16_C( 29860), INT16_C( 4254), INT16_C( 10934), -INT16_C( 11273) } }, { { -INT16_C( 14043), INT16_C( 9890), INT16_C( 17731), INT16_C( 24641), INT16_C( 2880), INT16_C( 29462), -INT16_C( 6509), -INT16_C( 19658) }, UINT8_C(140), { INT8_C( 12), INT8_C( 111), -INT8_C( 9), INT8_MAX, -INT8_C( 44), -INT8_C( 62), -INT8_C( 111), INT8_C( 74), INT8_C( 67), -INT8_C( 51), -INT8_C( 74), -INT8_C( 104), INT8_C( 34), INT8_C( 73), -INT8_C( 67), -INT8_C( 21) }, { -INT8_C( 21), -INT8_C( 28), INT8_C( 46), INT8_C( 48), INT8_C( 37), -INT8_C( 113), INT8_C( 112), INT8_C( 48), -INT8_C( 91), -INT8_C( 29), -INT8_C( 60), -INT8_C( 116), INT8_C( 26), INT8_C( 119), INT8_C( 24), INT8_C( 38) }, { -INT16_C( 14043), INT16_C( 9890), -INT16_C( 14078), INT16_C( 19792), INT16_C( 2880), INT16_C( 29462), -INT16_C( 6509), INT16_C( 13466) } }, { { INT16_C( 4070), -INT16_C( 17499), INT16_C( 14033), INT16_C( 5381), -INT16_C( 17661), INT16_C( 9645), INT16_C( 27396), -INT16_C( 4080) }, UINT8_C( 79), { INT8_C( 63), INT8_C( 32), INT8_C( 116), -INT8_C( 50), -INT8_C( 111), -INT8_C( 92), INT8_C( 115), INT8_C( 116), INT8_C( 104), -INT8_C( 1), -INT8_C( 114), -INT8_C( 33), INT8_C( 24), -INT8_C( 76), -INT8_C( 58), INT8_C( 39) }, { INT8_C( 90), -INT8_C( 127), -INT8_C( 7), -INT8_C( 112), -INT8_C( 122), INT8_C( 14), -INT8_C( 108), INT8_C( 65), -INT8_C( 69), -INT8_C( 71), INT8_C( 70), INT8_C( 38), -INT8_C( 54), INT8_C( 54), INT8_C( 117), INT8_C( 9) }, { INT16_C( 1606), -INT16_C( 23884), -INT16_C( 15394), -INT16_C( 4880), -INT16_C( 17661), INT16_C( 9645), INT16_C( 8424), -INT16_C( 4080) } }, { { -INT16_C( 5802), -INT16_C( 6185), INT16_C( 19086), -INT16_C( 2468), -INT16_C( 5558), INT16_C( 25302), -INT16_C( 25441), -INT16_C( 1655) }, UINT8_C( 29), { -INT8_C( 126), -INT8_C( 119), -INT8_C( 93), -INT8_C( 112), INT8_C( 29), -INT8_C( 28), INT8_C( 76), -INT8_C( 41), INT8_C( 42), INT8_C( 114), -INT8_C( 95), INT8_C( 96), -INT8_C( 24), -INT8_C( 86), -INT8_C( 73), -INT8_C( 47) }, { -INT8_C( 127), -INT8_C( 98), INT8_C( 95), -INT8_C( 53), -INT8_C( 6), INT8_C( 86), INT8_C( 21), -INT8_C( 27), INT8_C( 44), INT8_C( 119), -INT8_C( 124), -INT8_C( 56), INT8_C( 1), INT8_C( 125), -INT8_C( 27), -INT8_C( 125) }, { -INT16_C( 29936), -INT16_C( 6185), INT16_C( 19434), -INT16_C( 4209), INT16_C( 15414), INT16_C( 25302), -INT16_C( 25441), -INT16_C( 1655) } }, { { -INT16_C( 30714), INT16_C( 9236), INT16_C( 24684), -INT16_C( 26629), -INT16_C( 25390), -INT16_C( 17673), -INT16_C( 20922), -INT16_C( 14452) }, UINT8_C( 77), { -INT8_C( 21), -INT8_C( 110), INT8_C( 71), INT8_C( 65), -INT8_C( 88), INT8_C( 44), INT8_C( 109), INT8_C( 31), -INT8_C( 80), INT8_C( 53), INT8_C( 32), INT8_C( 45), INT8_C( 26), -INT8_C( 92), INT8_C( 52), -INT8_C( 94) }, { -INT8_C( 72), INT8_C( 88), INT8_C( 15), INT8_C( 24), INT8_C( 83), -INT8_C( 90), -INT8_C( 22), -INT8_C( 17), -INT8_C( 99), -INT8_C( 91), INT8_C( 53), INT8_C( 76), INT8_C( 49), -INT8_C( 4), -INT8_C( 103), INT8_C( 28) }, { -INT16_C( 4072), INT16_C( 9236), INT16_C( 9984), -INT16_C( 2925), -INT16_C( 25390), -INT16_C( 17673), INT16_C( 618), -INT16_C( 14452) } }, { { -INT16_C( 8050), INT16_C( 13918), -INT16_C( 13555), -INT16_C( 17066), INT16_C( 30209), INT16_C( 7147), INT16_C( 7962), -INT16_C( 11586) }, UINT8_C(119), { -INT8_C( 51), -INT8_C( 22), -INT8_C( 54), INT8_C( 115), -INT8_C( 43), -INT8_C( 71), INT8_C( 16), INT8_C( 122), -INT8_C( 18), INT8_C( 92), -INT8_C( 85), -INT8_C( 22), -INT8_C( 11), -INT8_C( 57), INT8_C( 120), -INT8_C( 42) }, { INT8_C( 37), -INT8_C( 81), -INT8_C( 29), -INT8_C( 15), INT8_C( 5), -INT8_C( 96), -INT8_C( 14), INT8_C( 123), -INT8_C( 117), INT8_C( 13), -INT8_C( 106), -INT8_C( 86), -INT8_C( 53), INT8_C( 104), INT8_C( 33), -INT8_C( 104) }, { -INT16_C( 11369), -INT16_C( 7583), -INT16_C( 16695), -INT16_C( 17066), -INT16_C( 26650), INT16_MIN, INT16_C( 7711), -INT16_C( 11586) } }, { { -INT16_C( 5293), INT16_C( 10251), INT16_C( 7332), -INT16_C( 27998), INT16_C( 19832), INT16_C( 28284), -INT16_C( 2796), INT16_C( 14916) }, UINT8_C(164), { INT8_C( 39), INT8_C( 43), -INT8_C( 87), -INT8_C( 57), INT8_C( 29), INT8_C( 36), INT8_C( 83), INT8_C( 42), -INT8_C( 70), -INT8_C( 3), -INT8_C( 10), INT8_C( 35), INT8_C( 31), -INT8_C( 114), INT8_C( 118), INT8_C( 10) }, { -INT8_C( 102), -INT8_C( 98), -INT8_C( 81), -INT8_C( 74), INT8_C( 64), INT8_C( 65), INT8_C( 46), -INT8_C( 115), -INT8_C( 66), -INT8_C( 100), -INT8_C( 95), -INT8_C( 77), -INT8_C( 32), -INT8_C( 37), INT8_C( 87), INT8_C( 7) }, { -INT16_C( 5293), INT16_C( 10251), INT16_C( 4196), -INT16_C( 27998), INT16_C( 19832), -INT16_C( 26065), -INT16_C( 2796), INT16_C( 10336) } }, { { INT16_C( 6), INT16_C( 9167), INT16_C( 8740), -INT16_C( 8370), INT16_C( 17439), INT16_C( 15874), INT16_C( 30930), INT16_C( 27721) }, UINT8_C( 22), { -INT8_C( 8), INT8_C( 34), INT8_C( 86), INT8_C( 57), INT8_C( 81), -INT8_C( 29), -INT8_C( 9), -INT8_C( 19), -INT8_C( 124), -INT8_C( 86), -INT8_C( 50), INT8_C( 96), INT8_C( 1), -INT8_C( 43), INT8_C( 102), INT8_C( 1) }, { -INT8_C( 92), -INT8_C( 118), INT8_C( 38), -INT8_C( 58), -INT8_C( 40), INT8_C( 5), -INT8_C( 26), INT8_C( 28), INT8_C( 7), INT8_C( 36), -INT8_C( 18), INT8_MAX, INT8_C( 109), INT8_C( 91), -INT8_C( 107), INT8_C( 101) }, { INT16_C( 6), -INT16_C( 38), -INT16_C( 2105), -INT16_C( 8370), INT16_C( 7044), INT16_C( 15874), INT16_C( 30930), INT16_C( 27721) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i src = simde_mm_loadu_epi16(test_vec[i].src); simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_mask_maddubs_epi16(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i src = simde_test_x86_random_i16x8(); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128i a = simde_test_x86_random_i8x16(); simde__m128i b = simde_test_x86_random_i8x16(); simde__m128i r = simde_mm_mask_maddubs_epi16(src, k, a, b); simde_test_x86_write_i16x8(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_maskz_maddubs_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask8 k; const int8_t a[16]; const int8_t b[16]; const int16_t r[8]; } test_vec[] = { { UINT8_C( 16), { INT8_C( 57), -INT8_C( 82), -INT8_C( 102), -INT8_C( 24), INT8_C( 14), INT8_C( 111), -INT8_C( 56), -INT8_C( 118), INT8_C( 113), -INT8_C( 121), -INT8_C( 90), -INT8_C( 2), INT8_C( 80), INT8_C( 40), INT8_C( 67), INT8_C( 27) }, { INT8_C( 38), INT8_C( 86), INT8_C( 79), INT8_C( 6), INT8_C( 90), INT8_C( 89), INT8_C( 5), INT8_C( 52), -INT8_C( 89), -INT8_C( 28), INT8_C( 95), INT8_C( 13), INT8_C( 19), -INT8_C( 75), INT8_C( 29), INT8_C( 76) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 13837), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { UINT8_C( 99), { -INT8_C( 73), INT8_C( 52), INT8_C( 113), INT8_C( 39), -INT8_C( 3), -INT8_C( 5), -INT8_C( 104), -INT8_C( 124), -INT8_C( 95), -INT8_C( 106), -INT8_C( 43), -INT8_C( 55), -INT8_C( 39), -INT8_C( 16), -INT8_C( 17), INT8_C( 47) }, { INT8_C( 63), -INT8_C( 10), -INT8_C( 119), -INT8_C( 104), -INT8_C( 5), -INT8_C( 66), INT8_C( 64), -INT8_C( 33), INT8_C( 29), INT8_C( 77), -INT8_C( 14), -INT8_C( 46), INT8_C( 106), INT8_C( 63), INT8_C( 53), INT8_C( 33) }, { INT16_C( 11009), -INT16_C( 17503), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 12228), INT16_MAX, INT16_C( 0) } }, { UINT8_C(115), { -INT8_C( 90), INT8_C( 72), INT8_C( 112), -INT8_C( 94), -INT8_C( 32), -INT8_C( 11), INT8_C( 67), INT8_C( 119), -INT8_C( 54), INT8_C( 13), INT8_C( 80), -INT8_C( 70), -INT8_C( 4), INT8_MIN, -INT8_C( 7), -INT8_C( 14) }, { INT8_C( 9), -INT8_C( 110), -INT8_C( 18), -INT8_C( 57), -INT8_C( 46), -INT8_C( 51), -INT8_C( 28), INT8_C( 31), -INT8_C( 64), -INT8_C( 74), -INT8_C( 119), -INT8_C( 1), -INT8_C( 21), -INT8_C( 86), INT8_C( 114), -INT8_C( 110) }, { -INT16_C( 6426), -INT16_C( 11250), INT16_C( 0), INT16_C( 0), -INT16_C( 13890), -INT16_C( 9706), -INT16_C( 16300), INT16_C( 0) } }, { UINT8_C(243), { -INT8_C( 29), INT8_C( 52), -INT8_C( 45), -INT8_C( 40), INT8_C( 119), INT8_C( 74), -INT8_C( 94), -INT8_C( 124), -INT8_C( 101), INT8_C( 92), -INT8_C( 127), INT8_C( 27), INT8_C( 85), INT8_C( 115), INT8_C( 36), -INT8_C( 25) }, { INT8_C( 97), -INT8_C( 20), -INT8_C( 71), INT8_C( 47), -INT8_C( 48), -INT8_C( 40), -INT8_C( 17), -INT8_C( 121), INT8_C( 97), -INT8_C( 18), INT8_C( 114), INT8_C( 12), INT8_C( 96), INT8_C( 4), -INT8_C( 1), INT8_C( 67) }, { INT16_C( 20979), -INT16_C( 4829), INT16_C( 0), INT16_C( 0), INT16_C( 13379), INT16_C( 15030), INT16_C( 8620), INT16_C( 15441) } }, { UINT8_C( 56), { -INT8_C( 46), INT8_C( 27), -INT8_C( 80), INT8_C( 29), -INT8_C( 67), INT8_C( 52), -INT8_C( 72), INT8_C( 25), -INT8_C( 75), -INT8_C( 45), INT8_C( 111), INT8_C( 41), -INT8_C( 9), INT8_C( 86), -INT8_C( 118), -INT8_C( 29) }, { INT8_C( 16), -INT8_C( 71), -INT8_C( 76), -INT8_C( 24), -INT8_C( 88), INT8_C( 59), INT8_C( 74), -INT8_C( 106), -INT8_C( 83), INT8_C( 86), -INT8_C( 9), -INT8_C( 78), INT8_C( 85), INT8_C( 58), -INT8_C( 22), INT8_C( 39) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 10966), INT16_C( 3123), -INT16_C( 4197), INT16_C( 0), INT16_C( 0) } }, { UINT8_C( 86), { -INT8_C( 102), INT8_C( 68), INT8_C( 19), -INT8_C( 49), -INT8_C( 4), INT8_C( 45), -INT8_C( 124), -INT8_C( 49), -INT8_C( 100), -INT8_C( 83), -INT8_C( 57), -INT8_C( 14), INT8_C( 56), -INT8_C( 86), INT8_C( 2), -INT8_C( 15) }, { INT8_C( 94), -INT8_C( 21), -INT8_C( 102), -INT8_C( 103), INT8_C( 53), INT8_C( 48), INT8_C( 71), -INT8_C( 117), INT8_C( 39), -INT8_C( 7), -INT8_C( 32), INT8_C( 98), -INT8_C( 29), INT8_C( 7), -INT8_C( 72), INT8_C( 126) }, { INT16_C( 0), -INT16_C( 23259), INT16_C( 15516), INT16_C( 0), INT16_C( 4873), INT16_C( 0), -INT16_C( 434), INT16_C( 0) } }, { UINT8_C( 76), { -INT8_C( 53), INT8_C( 77), INT8_C( 72), -INT8_C( 8), -INT8_C( 47), INT8_C( 24), -INT8_C( 108), INT8_MAX, -INT8_C( 33), -INT8_C( 121), -INT8_C( 73), -INT8_C( 119), -INT8_C( 119), -INT8_C( 88), -INT8_C( 24), INT8_C( 116) }, { INT8_C( 66), -INT8_C( 127), -INT8_C( 87), INT8_C( 115), -INT8_C( 56), INT8_C( 52), -INT8_C( 102), -INT8_C( 63), INT8_C( 20), -INT8_C( 4), -INT8_C( 91), INT8_C( 28), -INT8_C( 76), INT8_C( 35), INT8_C( 104), INT8_MIN }, { INT16_C( 0), INT16_C( 0), -INT16_C( 10456), -INT16_C( 23097), INT16_C( 0), INT16_C( 0), -INT16_C( 4532), INT16_C( 0) } }, { UINT8_C(112), { -INT8_C( 80), INT8_C( 120), INT8_C( 65), -INT8_C( 56), INT8_C( 13), -INT8_C( 64), -INT8_C( 89), -INT8_C( 108), INT8_C( 119), INT8_C( 49), INT8_C( 29), INT8_C( 32), INT8_C( 25), -INT8_C( 110), INT8_C( 98), -INT8_C( 102) }, { INT8_C( 59), -INT8_C( 43), INT8_C( 99), INT8_C( 112), INT8_C( 112), INT8_C( 36), -INT8_C( 124), INT8_C( 108), -INT8_C( 55), -INT8_C( 96), INT8_C( 33), -INT8_C( 20), INT8_C( 8), -INT8_C( 95), INT8_C( 92), -INT8_C( 71) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 11249), INT16_C( 317), -INT16_C( 13670), INT16_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_maskz_maddubs_epi16(test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x8(r, simde_x_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128i a = simde_test_x86_random_i8x16(); simde__m128i b = simde_test_x86_random_i8x16(); simde__m128i r = simde_mm_maskz_maddubs_epi16(k, a, b); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask_maddubs_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t src[16]; const simde__mmask16 k; const int8_t a[32]; const int8_t b[32]; const int16_t r[16]; } test_vec[] = { { { -INT16_C( 25063), INT16_C( 9857), INT16_C( 10590), -INT16_C( 10566), -INT16_C( 10150), INT16_C( 29686), INT16_C( 22634), -INT16_C( 23283), INT16_C( 28718), -INT16_C( 25067), -INT16_C( 25963), INT16_C( 24074), INT16_C( 11066), INT16_C( 17227), -INT16_C( 22580), -INT16_C( 6404) }, UINT16_C(32069), { INT8_C( 12), -INT8_C( 92), -INT8_C( 90), -INT8_C( 57), INT8_C( 122), INT8_C( 0), -INT8_C( 97), INT8_C( 112), INT8_C( 115), INT8_C( 9), -INT8_C( 56), -INT8_C( 127), -INT8_C( 82), -INT8_C( 10), -INT8_C( 15), -INT8_C( 60), -INT8_C( 108), -INT8_C( 122), INT8_C( 94), -INT8_C( 97), -INT8_C( 27), -INT8_C( 104), -INT8_C( 54), INT8_C( 48), -INT8_C( 37), -INT8_C( 105), -INT8_C( 41), -INT8_C( 41), INT8_C( 125), INT8_C( 29), INT8_C( 85), -INT8_C( 119) }, { -INT8_C( 63), -INT8_C( 5), INT8_C( 80), INT8_C( 59), -INT8_C( 4), -INT8_C( 17), -INT8_C( 85), INT8_C( 111), -INT8_C( 8), INT8_C( 115), -INT8_C( 16), -INT8_C( 89), INT8_C( 106), -INT8_C( 30), INT8_C( 107), -INT8_C( 2), INT8_C( 104), -INT8_C( 55), -INT8_C( 99), INT8_C( 77), INT8_C( 97), INT8_C( 104), INT8_C( 125), INT8_C( 61), -INT8_C( 1), INT8_C( 85), INT8_C( 20), INT8_C( 124), INT8_C( 114), INT8_C( 105), INT8_C( 5), INT8_C( 51) }, { -INT16_C( 1576), INT16_C( 9857), -INT16_C( 488), -INT16_C( 10566), -INT16_C( 10150), INT16_C( 29686), INT16_C( 11064), -INT16_C( 23283), INT16_C( 8022), -INT16_C( 25067), INT16_MAX, INT16_C( 28178), INT16_C( 12616), INT16_C( 30960), INT16_C( 17295), -INT16_C( 6404) } }, { { INT16_C( 22117), INT16_C( 24942), INT16_C( 6469), INT16_C( 16080), -INT16_C( 15988), -INT16_C( 2331), INT16_C( 20643), INT16_C( 3061), -INT16_C( 28135), INT16_C( 31321), -INT16_C( 10502), -INT16_C( 1609), -INT16_C( 13269), -INT16_C( 25227), INT16_C( 31541), -INT16_C( 25904) }, UINT16_C(16081), { -INT8_C( 5), INT8_C( 22), INT8_C( 87), -INT8_C( 52), INT8_C( 84), -INT8_C( 28), -INT8_C( 115), INT8_C( 57), -INT8_C( 38), INT8_C( 48), -INT8_C( 119), -INT8_C( 49), INT8_C( 59), -INT8_C( 94), INT8_C( 98), -INT8_C( 108), INT8_C( 29), INT8_C( 92), INT8_C( 107), -INT8_C( 44), INT8_C( 86), -INT8_C( 106), -INT8_C( 96), -INT8_C( 53), INT8_C( 52), -INT8_C( 42), INT8_C( 70), INT8_C( 4), INT8_C( 112), INT8_C( 23), INT8_C( 67), INT8_C( 108) }, { INT8_C( 46), -INT8_C( 102), INT8_C( 56), -INT8_C( 126), INT8_C( 126), -INT8_C( 59), -INT8_C( 68), INT8_C( 89), -INT8_C( 11), INT8_C( 69), INT8_C( 40), INT8_C( 48), -INT8_C( 24), -INT8_C( 118), -INT8_C( 59), INT8_C( 5), -INT8_C( 25), INT8_C( 48), -INT8_C( 39), INT8_C( 61), -INT8_C( 58), INT8_C( 122), INT8_C( 8), -INT8_C( 6), INT8_C( 80), INT8_C( 79), -INT8_C( 1), -INT8_C( 64), INT8_C( 102), INT8_C( 66), INT8_C( 44), -INT8_C( 108) }, { INT16_C( 9302), INT16_C( 24942), INT16_C( 6469), INT16_C( 16080), INT16_C( 914), -INT16_C( 2331), -INT16_C( 20532), -INT16_C( 5042), -INT16_C( 28135), INT16_C( 8759), INT16_C( 13312), INT16_C( 62), INT16_C( 21066), -INT16_C( 326), INT16_C( 31541), -INT16_C( 25904) } }, { { INT16_C( 25820), INT16_C( 23319), -INT16_C( 11479), INT16_C( 7860), -INT16_C( 9192), INT16_C( 79), INT16_C( 5223), INT16_C( 19973), -INT16_C( 8380), INT16_C( 2699), -INT16_C( 27815), -INT16_C( 22267), INT16_C( 1250), INT16_C( 18793), -INT16_C( 27066), INT16_C( 8925) }, UINT16_C(62714), { INT8_C( 125), INT8_C( 36), -INT8_C( 57), INT8_C( 49), INT8_C( 66), -INT8_C( 32), INT8_C( 14), -INT8_C( 111), -INT8_C( 32), INT8_C( 117), -INT8_C( 91), -INT8_C( 26), -INT8_C( 61), -INT8_C( 23), -INT8_C( 59), INT8_C( 78), -INT8_C( 12), INT8_C( 30), -INT8_C( 31), -INT8_C( 7), -INT8_C( 57), -INT8_C( 60), -INT8_C( 3), INT8_C( 48), INT8_C( 13), INT8_C( 67), -INT8_C( 58), -INT8_C( 22), INT8_C( 101), -INT8_C( 63), -INT8_C( 33), -INT8_C( 29) }, { -INT8_C( 27), -INT8_C( 90), INT8_C( 20), INT8_C( 39), -INT8_C( 122), INT8_C( 34), -INT8_C( 71), INT8_C( 103), -INT8_C( 105), INT8_C( 94), INT8_C( 77), INT8_C( 90), INT8_C( 72), INT8_C( 18), -INT8_C( 88), INT8_C( 60), INT8_C( 48), -INT8_C( 118), INT8_C( 53), -INT8_C( 9), INT8_C( 78), INT8_C( 50), INT8_C( 39), INT8_C( 91), INT8_C( 117), -INT8_C( 18), INT8_C( 69), -INT8_C( 38), -INT8_C( 81), INT8_C( 36), -INT8_C( 67), -INT8_C( 108) }, { INT16_C( 25820), INT16_C( 5891), -INT16_C( 11479), INT16_C( 13941), -INT16_C( 12522), INT16_MAX, INT16_C( 18234), -INT16_C( 12656), -INT16_C( 8380), INT16_C( 2699), INT16_C( 25322), -INT16_C( 22267), INT16_C( 315), INT16_C( 4770), -INT16_C( 1233), INT16_MIN } }, { { -INT16_C( 11573), INT16_C( 20923), INT16_C( 29940), -INT16_C( 29512), INT16_C( 1491), INT16_C( 7142), -INT16_C( 28905), INT16_C( 18263), -INT16_C( 29671), INT16_C( 26430), INT16_C( 26302), INT16_C( 13250), INT16_C( 1876), INT16_C( 781), -INT16_C( 13524), -INT16_C( 2153) }, UINT16_C(21149), { INT8_C( 72), -INT8_C( 111), -INT8_C( 57), INT8_C( 1), INT8_C( 29), -INT8_C( 102), INT8_C( 6), INT8_C( 4), -INT8_C( 75), INT8_C( 30), -INT8_C( 109), INT8_C( 12), INT8_C( 101), -INT8_C( 84), -INT8_C( 104), -INT8_C( 92), INT8_C( 19), INT8_C( 86), INT8_C( 10), -INT8_C( 43), -INT8_C( 119), INT8_C( 94), -INT8_C( 36), -INT8_C( 106), INT8_C( 97), INT8_C( 8), INT8_C( 97), -INT8_C( 8), -INT8_C( 1), -INT8_C( 2), INT8_C( 74), INT8_C( 72) }, { -INT8_C( 112), INT8_C( 17), INT8_C( 73), -INT8_C( 83), -INT8_C( 85), INT8_C( 79), -INT8_C( 79), INT8_C( 96), INT8_C( 109), INT8_C( 68), INT8_C( 108), -INT8_C( 45), -INT8_C( 16), INT8_C( 4), INT8_C( 119), INT8_C( 3), INT8_C( 90), -INT8_C( 127), -INT8_C( 40), -INT8_C( 29), -INT8_C( 33), -INT8_C( 75), INT8_C( 122), INT8_C( 64), -INT8_C( 67), -INT8_C( 37), INT8_C( 56), -INT8_C( 67), -INT8_C( 38), -INT8_C( 126), INT8_C( 5), INT8_C( 106) }, { -INT16_C( 5599), INT16_C( 20923), INT16_C( 9701), -INT16_C( 90), INT16_C( 21769), INT16_C( 7142), -INT16_C( 28905), INT16_C( 18580), -INT16_C( 29671), -INT16_C( 6577), INT16_C( 26302), INT16_C( 13250), -INT16_C( 6795), INT16_C( 781), INT16_MIN, -INT16_C( 2153) } }, { { INT16_C( 20116), INT16_C( 16151), -INT16_C( 13923), INT16_C( 2976), INT16_C( 3085), -INT16_C( 290), INT16_C( 21777), INT16_C( 27393), -INT16_C( 9514), -INT16_C( 19121), -INT16_C( 13937), INT16_C( 19701), INT16_C( 11684), INT16_C( 32265), INT16_C( 3759), INT16_C( 17384) }, UINT16_C( 92), { -INT8_C( 125), -INT8_C( 6), -INT8_C( 55), INT8_C( 35), INT8_C( 5), -INT8_C( 42), INT8_C( 47), -INT8_C( 29), -INT8_C( 44), INT8_C( 64), INT8_C( 56), -INT8_C( 42), -INT8_C( 84), INT8_C( 14), -INT8_C( 80), -INT8_C( 5), -INT8_C( 61), INT8_C( 63), -INT8_C( 60), -INT8_C( 72), -INT8_C( 117), INT8_C( 104), -INT8_C( 27), -INT8_C( 107), -INT8_C( 25), -INT8_C( 108), -INT8_C( 93), -INT8_C( 49), -INT8_C( 40), INT8_C( 0), -INT8_C( 49), INT8_C( 91) }, { -INT8_C( 6), -INT8_C( 104), INT8_C( 126), -INT8_C( 1), INT8_C( 111), -INT8_C( 83), -INT8_C( 30), INT8_C( 67), -INT8_C( 18), INT8_C( 26), INT8_C( 25), -INT8_C( 102), INT8_C( 40), -INT8_C( 55), -INT8_C( 107), -INT8_C( 21), INT8_C( 8), INT8_C( 89), -INT8_C( 93), -INT8_C( 108), -INT8_C( 63), -INT8_C( 120), INT8_C( 41), -INT8_C( 88), INT8_C( 28), -INT8_C( 52), INT8_C( 120), -INT8_C( 12), -INT8_C( 52), INT8_C( 71), INT8_C( 79), -INT8_C( 58) }, { INT16_C( 20116), INT16_C( 16151), -INT16_C( 17207), INT16_C( 13799), -INT16_C( 2152), -INT16_C( 290), INT16_C( 6110), INT16_C( 27393), -INT16_C( 9514), -INT16_C( 19121), -INT16_C( 13937), INT16_C( 19701), INT16_C( 11684), INT16_C( 32265), INT16_C( 3759), INT16_C( 17384) } }, { { -INT16_C( 12832), INT16_C( 20421), -INT16_C( 22661), INT16_C( 27026), -INT16_C( 21311), -INT16_C( 5885), -INT16_C( 26507), INT16_C( 32468), INT16_C( 30705), -INT16_C( 19950), INT16_C( 15359), INT16_C( 7259), -INT16_C( 11513), -INT16_C( 11248), INT16_C( 24602), -INT16_C( 1382) }, UINT16_C(24621), { INT8_C( 73), -INT8_C( 88), INT8_C( 7), -INT8_C( 36), INT8_C( 17), -INT8_C( 55), -INT8_C( 120), INT8_C( 20), -INT8_C( 78), -INT8_C( 3), -INT8_C( 84), -INT8_C( 121), INT8_C( 123), -INT8_C( 99), -INT8_C( 2), -INT8_C( 115), INT8_C( 80), -INT8_C( 2), -INT8_C( 56), -INT8_C( 85), INT8_C( 26), -INT8_C( 48), INT8_C( 126), INT8_C( 42), -INT8_C( 92), -INT8_C( 104), -INT8_C( 118), INT8_C( 62), -INT8_C( 109), -INT8_C( 72), -INT8_C( 98), -INT8_C( 36) }, { INT8_C( 96), -INT8_C( 90), -INT8_C( 72), INT8_C( 114), INT8_C( 111), INT8_C( 64), -INT8_C( 122), INT8_C( 33), INT8_C( 62), INT8_C( 51), -INT8_C( 88), -INT8_C( 71), -INT8_C( 48), -INT8_C( 89), INT8_C( 71), INT8_C( 32), -INT8_C( 91), INT8_C( 15), -INT8_C( 53), -INT8_C( 65), -INT8_C( 33), INT8_C( 73), -INT8_C( 23), -INT8_C( 125), -INT8_C( 30), INT8_C( 116), -INT8_C( 62), INT8_C( 117), INT8_C( 44), INT8_C( 96), INT8_C( 81), -INT8_C( 116) }, { -INT16_C( 8112), INT16_C( 20421), INT16_C( 14751), -INT16_C( 15932), -INT16_C( 21311), -INT16_C( 24721), -INT16_C( 26507), INT16_C( 32468), INT16_C( 30705), -INT16_C( 19950), INT16_C( 15359), INT16_C( 7259), -INT16_C( 11513), -INT16_C( 1302), INT16_C( 24132), -INT16_C( 1382) } }, { { INT16_C( 2566), INT16_C( 30206), -INT16_C( 31414), -INT16_C( 30569), INT16_C( 16312), -INT16_C( 30654), -INT16_C( 30234), -INT16_C( 29783), INT16_C( 29848), INT16_C( 30794), INT16_C( 13502), -INT16_C( 24325), -INT16_C( 16984), -INT16_C( 11243), INT16_C( 26142), INT16_C( 9312) }, UINT16_C(24432), { -INT8_C( 102), -INT8_C( 69), -INT8_C( 28), INT8_C( 49), INT8_C( 67), -INT8_C( 100), INT8_C( 112), -INT8_C( 123), INT8_C( 36), INT8_C( 87), INT8_C( 14), -INT8_C( 51), -INT8_C( 30), -INT8_C( 89), INT8_C( 66), INT8_C( 45), INT8_C( 31), INT8_C( 0), INT8_C( 97), INT8_C( 26), -INT8_C( 96), INT8_C( 9), -INT8_C( 40), -INT8_C( 75), -INT8_C( 35), -INT8_C( 10), INT8_C( 27), INT8_C( 61), INT8_C( 26), -INT8_C( 116), -INT8_C( 100), -INT8_C( 76) }, { INT8_C( 71), INT8_MIN, -INT8_C( 27), -INT8_C( 118), INT8_C( 28), INT8_C( 86), INT8_C( 16), INT8_C( 65), -INT8_C( 83), INT8_C( 30), INT8_C( 14), -INT8_C( 113), -INT8_C( 59), INT8_C( 80), -INT8_C( 68), -INT8_C( 28), INT8_C( 80), INT8_C( 29), -INT8_C( 1), -INT8_C( 16), INT8_C( 38), -INT8_C( 41), -INT8_C( 91), INT8_C( 3), -INT8_C( 51), -INT8_C( 63), INT8_C( 65), -INT8_C( 25), INT8_C( 77), -INT8_C( 35), -INT8_C( 100), -INT8_C( 108) }, { INT16_C( 2566), INT16_C( 30206), -INT16_C( 31414), -INT16_C( 30569), -INT16_C( 378), -INT16_C( 22969), INT16_C( 26), -INT16_C( 29783), INT16_C( 2480), -INT16_C( 513), INT16_C( 5711), -INT16_C( 19113), -INT16_C( 26769), -INT16_C( 11243), -INT16_C( 2898), INT16_C( 9312) } }, { { -INT16_C( 32418), INT16_C( 31262), INT16_C( 11991), -INT16_C( 31557), -INT16_C( 13747), INT16_C( 4628), -INT16_C( 12262), INT16_C( 27639), -INT16_C( 2322), INT16_C( 5211), INT16_C( 461), -INT16_C( 26088), INT16_C( 22978), INT16_C( 3969), INT16_C( 7478), -INT16_C( 27485) }, UINT16_C(49567), { INT8_C( 15), INT8_C( 118), -INT8_C( 16), -INT8_C( 54), -INT8_C( 5), INT8_C( 61), -INT8_C( 108), INT8_C( 15), INT8_C( 79), -INT8_C( 81), -INT8_C( 33), INT8_C( 70), INT8_C( 26), -INT8_C( 51), INT8_C( 60), INT8_C( 117), -INT8_C( 30), INT8_C( 9), INT8_C( 118), -INT8_C( 6), -INT8_C( 93), INT8_C( 56), INT8_C( 83), INT8_C( 37), INT8_C( 71), -INT8_C( 119), INT8_C( 66), -INT8_C( 22), INT8_C( 30), -INT8_C( 31), -INT8_C( 84), INT8_C( 45) }, { INT8_C( 88), -INT8_C( 100), -INT8_C( 9), INT8_C( 83), -INT8_C( 39), -INT8_C( 116), INT8_C( 98), INT8_C( 40), INT8_C( 59), INT8_C( 65), INT8_C( 111), INT8_C( 85), INT8_C( 15), -INT8_C( 85), -INT8_C( 54), -INT8_C( 15), -INT8_C( 75), INT8_C( 65), -INT8_C( 21), INT8_C( 88), INT8_C( 121), INT8_C( 62), INT8_C( 125), -INT8_C( 63), -INT8_C( 57), -INT8_C( 64), -INT8_C( 85), -INT8_C( 27), -INT8_C( 95), INT8_C( 87), INT8_C( 18), -INT8_C( 7) }, { -INT16_C( 10480), INT16_C( 14606), -INT16_C( 16865), INT16_C( 15104), INT16_C( 16036), INT16_C( 4628), -INT16_C( 12262), -INT16_C( 4995), -INT16_C( 16365), INT16_C( 5211), INT16_C( 461), -INT16_C( 26088), INT16_C( 22978), INT16_C( 3969), INT16_C( 16725), INT16_C( 2781) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i src = simde_x_mm256_loadu_epi16(test_vec[i].src); simde__m256i a = simde_x_mm256_loadu_epi8(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi8(test_vec[i].b); simde__m256i r = simde_mm256_mask_maddubs_epi16(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x16(r, simde_x_mm256_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i src = simde_test_x86_random_i16x16(); simde__mmask16 k = simde_test_x86_random_mmask16(); simde__m256i a = simde_test_x86_random_i8x32(); simde__m256i b = simde_test_x86_random_i8x32(); simde__m256i r = simde_mm256_mask_maddubs_epi16(src, k, a, b); simde_test_x86_write_i16x16(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask16(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x32(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_maskz_maddubs_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask16 k; const int8_t a[32]; const int8_t b[32]; const int16_t r[16]; } test_vec[] = { { UINT16_C(26598), { INT8_C( 114), INT8_C( 2), INT8_C( 26), INT8_C( 124), -INT8_C( 18), INT8_C( 2), INT8_C( 62), -INT8_C( 43), INT8_C( 83), -INT8_C( 95), INT8_C( 69), -INT8_C( 118), -INT8_C( 83), INT8_C( 46), INT8_C( 85), -INT8_C( 9), -INT8_C( 123), INT8_C( 123), -INT8_C( 55), -INT8_C( 52), INT8_C( 82), INT8_C( 7), INT8_C( 82), INT8_C( 79), -INT8_C( 95), -INT8_C( 65), INT8_MAX, INT8_C( 115), -INT8_C( 97), INT8_C( 102), -INT8_C( 38), INT8_C( 17) }, { INT8_C( 104), -INT8_C( 12), -INT8_C( 115), INT8_C( 86), -INT8_C( 9), -INT8_C( 53), INT8_C( 43), INT8_C( 74), INT8_C( 108), INT8_C( 112), -INT8_C( 44), INT8_C( 25), -INT8_C( 98), INT8_C( 42), INT8_C( 16), INT8_C( 35), -INT8_C( 91), -INT8_C( 38), -INT8_C( 17), -INT8_C( 8), -INT8_C( 31), INT8_C( 65), INT8_C( 71), -INT8_C( 126), INT8_C( 1), -INT8_C( 58), -INT8_C( 11), -INT8_C( 96), INT8_C( 44), -INT8_C( 48), -INT8_C( 79), -INT8_C( 108) }, { INT16_C( 0), INT16_C( 7674), -INT16_C( 2248), INT16_C( 0), INT16_C( 0), INT16_C( 414), -INT16_C( 15022), INT16_C( 10005), -INT16_C( 16777), -INT16_C( 5049), -INT16_C( 2087), INT16_C( 0), INT16_C( 0), -INT16_C( 12437), INT16_C( 2100), INT16_C( 0) } }, { UINT16_C(16068), { -INT8_C( 22), -INT8_C( 69), INT8_C( 9), INT8_C( 21), INT8_C( 5), INT8_C( 118), -INT8_C( 123), -INT8_C( 38), -INT8_C( 113), INT8_C( 36), INT8_C( 4), -INT8_C( 96), INT8_C( 71), -INT8_C( 87), INT8_C( 122), INT8_C( 55), -INT8_C( 95), INT8_C( 91), INT8_C( 120), -INT8_C( 24), -INT8_C( 35), INT8_C( 121), -INT8_C( 81), -INT8_C( 46), INT8_C( 25), -INT8_C( 37), -INT8_C( 94), -INT8_C( 54), INT8_C( 112), INT8_C( 103), INT8_C( 8), INT8_C( 90) }, { INT8_C( 34), INT8_C( 18), INT8_C( 112), INT8_C( 40), -INT8_C( 120), -INT8_C( 11), INT8_C( 2), INT8_C( 23), INT8_C( 25), INT8_C( 6), -INT8_C( 73), INT8_C( 97), -INT8_C( 81), INT8_C( 49), -INT8_C( 104), INT8_C( 81), -INT8_C( 116), INT8_C( 16), INT8_C( 57), INT8_C( 105), -INT8_C( 118), -INT8_C( 24), INT8_C( 60), -INT8_C( 93), -INT8_C( 60), -INT8_C( 34), INT8_C( 110), INT8_C( 52), INT8_C( 69), INT8_C( 118), -INT8_C( 114), INT8_C( 104) }, { INT16_C( 0), INT16_C( 0), -INT16_C( 1898), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 2530), -INT16_C( 8233), INT16_C( 0), INT16_C( 31200), -INT16_C( 28982), -INT16_C( 9030), -INT16_C( 8946), INT16_C( 28324), INT16_C( 0), INT16_C( 0) } }, { UINT16_C(65160), { -INT8_C( 112), INT8_C( 16), -INT8_C( 12), -INT8_C( 110), INT8_C( 40), INT8_C( 13), -INT8_C( 104), -INT8_C( 33), INT8_C( 110), INT8_C( 71), INT8_C( 17), INT8_C( 6), -INT8_C( 104), -INT8_C( 99), INT8_C( 23), -INT8_C( 46), INT8_C( 7), -INT8_C( 95), -INT8_C( 70), INT8_C( 67), INT8_C( 68), INT8_C( 126), INT8_C( 33), -INT8_C( 78), -INT8_C( 78), INT8_C( 103), INT8_C( 41), INT8_C( 65), -INT8_C( 49), -INT8_C( 79), INT8_C( 63), INT8_C( 95) }, { -INT8_C( 62), INT8_C( 51), -INT8_C( 15), -INT8_C( 22), INT8_C( 65), -INT8_C( 119), -INT8_C( 55), -INT8_C( 81), -INT8_C( 48), -INT8_C( 38), -INT8_C( 74), INT8_C( 105), INT8_C( 120), -INT8_C( 51), INT8_C( 59), INT8_MAX, INT8_C( 110), -INT8_C( 11), -INT8_C( 62), -INT8_C( 78), INT8_C( 116), -INT8_C( 29), INT8_C( 101), INT8_C( 38), INT8_C( 74), -INT8_C( 114), INT8_C( 103), INT8_C( 25), INT8_C( 63), -INT8_C( 89), INT8_C( 120), INT8_C( 1) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 26423), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 28027), INT16_C( 0), -INT16_C( 16758), INT16_C( 4234), INT16_C( 10097), INT16_C( 1430), INT16_C( 5848), -INT16_C( 2712), INT16_C( 7655) } }, { UINT16_C(27098), { -INT8_C( 21), INT8_C( 27), -INT8_C( 14), -INT8_C( 75), -INT8_C( 53), -INT8_C( 61), -INT8_C( 113), -INT8_C( 127), INT8_C( 44), INT8_C( 7), INT8_C( 78), INT8_C( 103), -INT8_C( 122), -INT8_C( 68), INT8_C( 92), INT8_C( 72), INT8_C( 110), -INT8_C( 48), INT8_C( 44), -INT8_C( 45), -INT8_C( 9), INT8_C( 118), INT8_C( 97), INT8_C( 94), -INT8_C( 112), -INT8_C( 95), INT8_C( 5), INT8_C( 8), -INT8_C( 94), -INT8_C( 32), INT8_C( 114), -INT8_C( 114) }, { -INT8_C( 5), INT8_C( 100), INT8_C( 67), -INT8_C( 58), INT8_C( 39), -INT8_C( 46), INT8_C( 71), INT8_C( 83), -INT8_C( 38), -INT8_C( 107), -INT8_C( 70), INT8_C( 96), INT8_C( 81), INT8_C( 23), -INT8_C( 87), -INT8_C( 64), -INT8_C( 25), -INT8_C( 43), -INT8_C( 109), -INT8_C( 34), INT8_C( 75), -INT8_C( 11), INT8_C( 61), -INT8_C( 37), -INT8_C( 106), INT8_C( 66), -INT8_C( 28), INT8_C( 56), INT8_C( 34), INT8_C( 86), -INT8_C( 58), INT8_C( 30) }, { INT16_C( 0), INT16_C( 5716), INT16_C( 0), INT16_C( 20860), -INT16_C( 2421), INT16_C( 0), INT16_C( 15178), -INT16_C( 12612), -INT16_C( 11694), INT16_C( 0), INT16_C( 0), INT16_C( 2439), INT16_C( 0), INT16_C( 308), INT16_C( 24772), INT16_C( 0) } }, { UINT16_C( 2490), { -INT8_C( 28), -INT8_C( 30), -INT8_C( 36), INT8_C( 44), INT8_C( 53), -INT8_C( 74), -INT8_C( 63), -INT8_C( 16), INT8_C( 22), INT8_C( 19), INT8_C( 7), -INT8_C( 65), -INT8_C( 45), -INT8_C( 18), -INT8_C( 108), INT8_C( 102), -INT8_C( 51), -INT8_C( 32), INT8_C( 91), INT8_C( 10), -INT8_C( 69), -INT8_C( 15), INT8_C( 76), -INT8_C( 97), INT8_C( 42), INT8_C( 111), -INT8_C( 11), -INT8_C( 16), -INT8_C( 115), -INT8_C( 80), -INT8_C( 6), INT8_C( 113) }, { -INT8_C( 110), -INT8_C( 42), -INT8_C( 99), -INT8_C( 57), -INT8_C( 116), INT8_C( 95), -INT8_C( 73), -INT8_C( 94), INT8_C( 114), -INT8_C( 66), INT8_C( 98), INT8_C( 69), -INT8_C( 83), -INT8_C( 10), -INT8_C( 85), INT8_C( 122), -INT8_C( 42), INT8_C( 7), -INT8_C( 124), -INT8_C( 110), -INT8_C( 8), -INT8_C( 48), INT8_C( 49), INT8_C( 34), INT8_C( 63), INT8_C( 39), INT8_C( 19), -INT8_C( 52), -INT8_C( 41), INT8_C( 13), INT8_C( 62), INT8_C( 105) }, { INT16_C( 0), -INT16_C( 24288), INT16_C( 0), INT16_MIN, INT16_C( 1254), INT16_C( 13865), INT16_C( 0), -INT16_C( 136), -INT16_C( 7042), INT16_C( 0), INT16_C( 0), INT16_C( 9130), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { UINT16_C(56291), { INT8_C( 48), INT8_C( 111), INT8_C( 58), -INT8_C( 24), INT8_C( 17), -INT8_C( 84), -INT8_C( 90), INT8_C( 115), -INT8_C( 15), INT8_C( 83), INT8_C( 106), -INT8_C( 99), -INT8_C( 51), INT8_C( 64), -INT8_C( 92), INT8_C( 81), -INT8_C( 46), -INT8_C( 100), INT8_C( 34), INT8_C( 4), -INT8_C( 65), INT8_C( 97), INT8_C( 43), -INT8_C( 46), INT8_C( 46), INT8_C( 2), -INT8_C( 33), INT8_C( 108), INT8_C( 107), -INT8_C( 62), INT8_C( 71), -INT8_C( 101) }, { INT8_C( 49), -INT8_C( 126), -INT8_C( 125), INT8_C( 66), INT8_C( 46), INT8_C( 42), -INT8_C( 74), INT8_C( 32), INT8_C( 125), INT8_C( 32), -INT8_C( 67), INT8_C( 75), INT8_C( 96), INT8_C( 97), -INT8_C( 100), INT8_C( 51), -INT8_C( 3), -INT8_C( 66), INT8_C( 55), -INT8_C( 68), INT8_C( 32), INT8_C( 98), -INT8_C( 114), INT8_C( 78), INT8_C( 100), INT8_C( 109), -INT8_C( 70), -INT8_C( 49), INT8_C( 47), INT8_C( 1), INT8_C( 106), INT8_C( 96) }, { -INT16_C( 11634), INT16_C( 8062), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 4673), INT16_C( 25888), -INT16_C( 12269), -INT16_C( 10926), INT16_C( 1598), INT16_C( 0), INT16_C( 11478), INT16_C( 4818), INT16_C( 0), INT16_C( 5223), INT16_C( 22406) } }, { UINT16_C(61059), { -INT8_C( 93), -INT8_C( 78), INT8_C( 24), INT8_C( 89), -INT8_C( 46), -INT8_C( 107), INT8_C( 121), -INT8_C( 113), -INT8_C( 32), -INT8_C( 39), -INT8_C( 16), INT8_C( 125), INT8_C( 12), -INT8_C( 19), INT8_C( 59), INT8_C( 67), -INT8_C( 86), INT8_C( 91), -INT8_C( 91), INT8_C( 56), -INT8_C( 87), INT8_C( 9), -INT8_C( 90), INT8_C( 99), -INT8_C( 40), -INT8_C( 43), INT8_C( 101), INT8_C( 67), INT8_C( 54), -INT8_C( 24), INT8_C( 49), -INT8_C( 39) }, { -INT8_C( 102), INT8_C( 73), INT8_C( 50), INT8_C( 108), -INT8_C( 34), -INT8_C( 85), -INT8_C( 5), -INT8_C( 65), -INT8_C( 124), -INT8_C( 21), INT8_C( 60), -INT8_C( 111), -INT8_C( 39), INT8_C( 119), -INT8_C( 44), -INT8_C( 125), -INT8_C( 45), INT8_C( 122), -INT8_C( 69), INT8_C( 124), -INT8_C( 125), INT8_C( 97), -INT8_C( 32), INT8_C( 92), INT8_C( 55), INT8_C( 69), -INT8_C( 97), INT8_C( 109), INT8_C( 45), -INT8_C( 48), INT8_C( 70), -INT8_C( 56) }, { -INT16_C( 3632), INT16_C( 10812), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 10971), INT16_C( 0), -INT16_C( 4441), -INT16_C( 20252), INT16_C( 3796), INT16_C( 0), -INT16_C( 2494), -INT16_C( 8706), -INT16_C( 8722) } }, { UINT16_C(30745), { INT8_C( 52), -INT8_C( 9), INT8_C( 35), INT8_C( 48), -INT8_C( 74), -INT8_C( 89), INT8_C( 27), -INT8_C( 14), INT8_C( 56), -INT8_C( 12), INT8_C( 106), INT8_C( 13), INT8_C( 119), INT8_C( 61), -INT8_C( 121), INT8_C( 51), -INT8_C( 71), INT8_C( 10), -INT8_C( 108), -INT8_C( 103), INT8_C( 102), -INT8_C( 53), -INT8_C( 34), INT8_C( 5), INT8_C( 56), INT8_C( 12), -INT8_C( 43), INT8_C( 126), -INT8_C( 44), -INT8_C( 18), -INT8_C( 10), INT8_C( 8) }, { -INT8_C( 26), INT8_C( 25), INT8_C( 56), -INT8_C( 100), -INT8_C( 63), INT8_C( 84), -INT8_C( 113), -INT8_C( 7), INT8_C( 72), -INT8_C( 7), INT8_C( 6), -INT8_C( 64), INT8_C( 54), -INT8_C( 115), -INT8_C( 13), -INT8_C( 17), -INT8_C( 104), -INT8_C( 121), -INT8_C( 119), -INT8_C( 2), INT8_C( 83), INT8_C( 103), INT8_C( 4), -INT8_C( 117), INT8_C( 115), -INT8_C( 39), INT8_C( 10), INT8_C( 71), -INT8_C( 56), INT8_C( 0), INT8_C( 80), -INT8_C( 82) }, { INT16_C( 4823), INT16_C( 0), INT16_C( 0), -INT16_C( 4745), INT16_C( 2324), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 303), INT16_C( 5972), INT16_C( 11076), -INT16_C( 11872), INT16_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi8(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi8(test_vec[i].b); simde__m256i r = simde_mm256_maskz_maddubs_epi16(test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x16(r, simde_x_mm256_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask16 k = simde_test_x86_random_mmask16(); simde__m256i a = simde_test_x86_random_i8x32(); simde__m256i b = simde_test_x86_random_i8x32(); simde__m256i r = simde_mm256_maskz_maddubs_epi16(k, a, b); simde_test_x86_write_mmask16(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x32(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_maddubs_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t a[64]; const int8_t b[64]; const int16_t r[32]; } test_vec[] = { { { INT8_C( 26), -INT8_C( 120), INT8_C( 74), -INT8_C( 37), -INT8_C( 36), -INT8_C( 39), -INT8_C( 44), INT8_C( 37), -INT8_C( 46), -INT8_C( 37), -INT8_C( 27), INT8_C( 8), INT8_C( 104), -INT8_C( 40), -INT8_C( 8), INT8_C( 0), INT8_C( 95), -INT8_C( 127), -INT8_C( 1), -INT8_C( 78), -INT8_C( 24), INT8_C( 3), INT8_C( 62), INT8_C( 92), -INT8_C( 36), INT8_C( 72), -INT8_C( 93), -INT8_C( 92), INT8_C( 72), -INT8_C( 13), INT8_C( 82), INT8_C( 98), INT8_C( 124), -INT8_C( 99), INT8_C( 61), INT8_C( 88), INT8_C( 118), INT8_C( 18), INT8_C( 125), INT8_C( 73), -INT8_C( 19), INT8_C( 98), INT8_C( 81), INT8_C( 85), INT8_C( 58), INT8_C( 73), INT8_C( 86), -INT8_C( 102), -INT8_C( 54), INT8_C( 85), INT8_C( 76), -INT8_C( 77), INT8_C( 88), -INT8_C( 118), INT8_C( 15), INT8_C( 52), -INT8_C( 46), -INT8_C( 78), -INT8_C( 39), INT8_C( 27), -INT8_C( 90), INT8_C( 43), INT8_C( 125), INT8_C( 34) }, { -INT8_C( 56), -INT8_C( 69), INT8_C( 122), INT8_C( 63), -INT8_C( 51), -INT8_C( 8), -INT8_C( 120), -INT8_C( 70), INT8_C( 90), -INT8_C( 39), INT8_C( 15), -INT8_C( 107), INT8_C( 35), INT8_C( 101), INT8_C( 47), -INT8_C( 19), -INT8_C( 70), INT8_C( 123), -INT8_C( 96), INT8_C( 18), INT8_C( 6), -INT8_C( 81), INT8_C( 71), -INT8_C( 40), INT8_C( 98), INT8_C( 32), -INT8_C( 13), INT8_C( 8), INT8_C( 75), INT8_C( 113), INT8_C( 42), INT8_C( 20), INT8_C( 44), -INT8_C( 92), INT8_C( 83), -INT8_C( 7), -INT8_C( 100), -INT8_C( 37), -INT8_C( 77), -INT8_C( 9), -INT8_C( 76), -INT8_C( 62), -INT8_C( 116), -INT8_C( 41), INT8_C( 40), -INT8_C( 69), -INT8_C( 59), -INT8_C( 30), INT8_C( 54), INT8_C( 101), -INT8_C( 11), INT8_C( 60), INT8_C( 21), INT8_C( 60), INT8_C( 21), INT8_C( 119), INT8_C( 92), INT8_C( 8), INT8_MAX, -INT8_C( 89), INT8_C( 121), -INT8_C( 87), -INT8_C( 69), -INT8_C( 91) }, { -INT16_C( 10840), INT16_C( 22825), -INT16_C( 12956), -INT16_C( 28030), INT16_C( 10359), INT16_C( 2579), INT16_C( 25456), INT16_C( 11656), INT16_C( 9217), -INT16_C( 21276), INT16_C( 1149), INT16_C( 722), INT16_C( 23864), -INT16_C( 807), INT16_MAX, INT16_C( 5404), -INT16_C( 8988), INT16_C( 4447), -INT16_C( 12466), -INT16_C( 10282), -INT16_C( 24088), -INT16_C( 12881), -INT16_C( 2717), -INT16_C( 9694), INT16_C( 19493), INT16_C( 9904), INT16_C( 10128), INT16_C( 6503), INT16_C( 20744), INT16_C( 25156), INT16_C( 16345), -INT16_C( 11719) } }, { { INT8_C( 77), INT8_C( 14), -INT8_C( 98), -INT8_C( 22), -INT8_C( 23), INT8_C( 81), -INT8_C( 31), -INT8_C( 98), INT8_C( 20), INT8_C( 109), INT8_C( 117), INT8_C( 60), INT8_C( 40), INT8_C( 58), INT8_C( 30), INT8_C( 94), -INT8_C( 96), INT8_C( 19), -INT8_C( 101), -INT8_C( 75), INT8_C( 79), -INT8_C( 80), INT8_C( 44), -INT8_C( 85), -INT8_C( 72), -INT8_C( 85), INT8_C( 83), INT8_C( 50), INT8_C( 84), INT8_C( 14), -INT8_C( 41), -INT8_C( 95), INT8_C( 29), INT8_C( 118), -INT8_C( 117), INT8_C( 6), -INT8_C( 57), INT8_C( 108), -INT8_C( 92), -INT8_C( 37), -INT8_C( 39), INT8_C( 26), INT8_C( 23), INT8_C( 1), INT8_C( 84), INT8_C( 54), INT8_C( 96), -INT8_C( 12), INT8_C( 73), -INT8_C( 5), -INT8_C( 87), -INT8_C( 103), -INT8_C( 85), -INT8_C( 43), INT8_C( 68), INT8_C( 99), INT8_MIN, -INT8_C( 105), -INT8_C( 107), -INT8_C( 44), -INT8_C( 90), INT8_C( 109), INT8_C( 118), -INT8_C( 61) }, { -INT8_C( 29), INT8_C( 1), -INT8_C( 55), -INT8_C( 86), INT8_C( 110), INT8_C( 110), -INT8_C( 122), INT8_C( 71), -INT8_C( 120), -INT8_C( 99), INT8_C( 73), -INT8_C( 36), -INT8_C( 45), -INT8_C( 87), -INT8_C( 47), INT8_C( 29), -INT8_C( 92), INT8_C( 122), -INT8_C( 74), INT8_C( 79), INT8_C( 80), -INT8_C( 6), -INT8_C( 78), -INT8_C( 48), -INT8_C( 110), INT8_C( 72), -INT8_C( 91), INT8_C( 56), -INT8_C( 75), INT8_C( 27), -INT8_C( 5), -INT8_C( 104), INT8_C( 28), -INT8_C( 60), INT8_C( 66), -INT8_C( 118), INT8_C( 50), -INT8_C( 56), -INT8_C( 46), -INT8_C( 70), INT8_C( 102), INT8_C( 27), -INT8_C( 105), INT8_C( 57), -INT8_C( 60), INT8_C( 104), INT8_C( 86), INT8_C( 104), -INT8_C( 30), INT8_C( 12), -INT8_C( 73), INT8_C( 50), INT8_C( 7), INT8_C( 105), INT8_C( 3), -INT8_C( 103), -INT8_C( 79), -INT8_C( 88), -INT8_C( 47), INT8_C( 102), -INT8_C( 61), -INT8_C( 52), -INT8_C( 2), -INT8_C( 33) }, { -INT16_C( 2219), -INT16_C( 28814), INT16_MAX, -INT16_C( 16232), -INT16_C( 13191), INT16_C( 6381), -INT16_C( 6846), INT16_C( 1316), -INT16_C( 12402), INT16_C( 2829), INT16_C( 5264), -INT16_C( 11640), -INT16_C( 7928), -INT16_C( 4753), -INT16_C( 5922), -INT16_C( 17819), -INT16_C( 6268), INT16_C( 8466), INT16_C( 3902), -INT16_C( 22874), INT16_C( 22836), -INT16_C( 2358), INT16_C( 576), INT16_MAX, INT16_C( 822), -INT16_C( 4687), INT16_C( 23562), -INT16_C( 9993), -INT16_C( 23400), INT16_C( 14621), -INT16_C( 15794), -INT16_C( 6671) } }, { { -INT8_C( 112), INT8_C( 65), INT8_C( 106), -INT8_C( 61), INT8_C( 9), INT8_C( 60), INT8_C( 125), INT8_C( 111), INT8_C( 87), INT8_C( 20), -INT8_C( 87), INT8_C( 27), INT8_C( 124), -INT8_C( 1), -INT8_C( 125), INT8_C( 95), INT8_C( 12), INT8_C( 58), -INT8_C( 111), INT8_C( 19), -INT8_C( 93), -INT8_C( 108), -INT8_C( 84), INT8_C( 85), INT8_C( 60), INT8_C( 125), -INT8_C( 69), -INT8_C( 1), INT8_C( 73), -INT8_C( 70), -INT8_C( 33), -INT8_C( 39), -INT8_C( 5), INT8_C( 73), -INT8_C( 100), INT8_C( 4), -INT8_C( 123), INT8_C( 26), INT8_C( 116), -INT8_C( 36), INT8_C( 46), INT8_C( 29), -INT8_C( 9), -INT8_C( 85), INT8_C( 28), INT8_C( 122), INT8_C( 10), INT8_C( 40), -INT8_C( 76), -INT8_C( 101), INT8_C( 59), INT8_C( 87), INT8_C( 48), -INT8_C( 25), -INT8_C( 84), INT8_C( 108), INT8_C( 100), INT8_C( 104), INT8_C( 108), -INT8_C( 83), INT8_C( 34), INT8_C( 75), -INT8_C( 121), INT8_C( 29) }, { -INT8_C( 108), INT8_C( 35), INT8_C( 33), INT8_C( 25), INT8_C( 61), -INT8_C( 107), -INT8_C( 11), INT8_C( 108), -INT8_C( 78), -INT8_C( 20), INT8_C( 23), -INT8_C( 49), INT8_C( 102), INT8_C( 33), -INT8_C( 9), INT8_C( 26), -INT8_C( 68), INT8_C( 51), INT8_C( 113), -INT8_C( 20), INT8_C( 26), INT8_C( 30), INT8_C( 89), INT8_MAX, -INT8_C( 122), -INT8_C( 59), INT8_C( 44), -INT8_C( 88), INT8_C( 16), -INT8_C( 77), -INT8_C( 59), -INT8_C( 92), -INT8_C( 41), -INT8_C( 26), -INT8_C( 67), INT8_C( 20), INT8_C( 124), -INT8_C( 78), INT8_MIN, INT8_C( 46), -INT8_C( 98), -INT8_C( 105), -INT8_C( 3), INT8_C( 4), -INT8_C( 72), -INT8_C( 11), INT8_C( 30), INT8_C( 117), INT8_C( 40), -INT8_C( 113), INT8_C( 97), INT8_C( 66), -INT8_C( 83), -INT8_C( 70), -INT8_C( 63), INT8_C( 51), INT8_MAX, -INT8_C( 18), -INT8_C( 37), -INT8_C( 113), -INT8_C( 95), -INT8_C( 96), INT8_C( 51), INT8_C( 120) }, { -INT16_C( 13277), INT16_C( 8373), -INT16_C( 5871), INT16_C( 10613), -INT16_C( 7186), INT16_C( 2564), INT16_C( 21063), INT16_C( 1291), INT16_C( 2142), INT16_C( 16005), INT16_C( 8678), INT16_C( 26103), -INT16_C( 14695), -INT16_C( 14212), -INT16_C( 13154), INT16_MIN, -INT16_C( 12189), -INT16_C( 10372), INT16_C( 14464), -INT16_C( 4728), -INT16_C( 7553), -INT16_C( 57), -INT16_C( 3358), INT16_C( 4980), -INT16_C( 10315), INT16_C( 11465), -INT16_C( 20154), -INT16_C( 5328), INT16_C( 10828), -INT16_C( 23545), -INT16_C( 10430), INT16_C( 10365) } }, { { -INT8_C( 121), -INT8_C( 16), -INT8_C( 115), INT8_C( 3), -INT8_C( 94), INT8_C( 13), INT8_C( 49), INT8_C( 64), -INT8_C( 91), INT8_C( 47), INT8_C( 68), INT8_C( 93), INT8_C( 36), INT8_C( 98), -INT8_C( 46), INT8_C( 76), -INT8_C( 14), INT8_C( 52), -INT8_C( 114), -INT8_C( 97), -INT8_C( 18), INT8_C( 80), -INT8_C( 45), INT8_C( 110), INT8_C( 62), -INT8_C( 82), -INT8_C( 3), -INT8_C( 33), INT8_C( 79), INT8_C( 49), INT8_C( 88), -INT8_C( 42), INT8_C( 33), -INT8_C( 27), -INT8_C( 39), -INT8_C( 60), -INT8_C( 14), INT8_C( 10), INT8_C( 4), -INT8_C( 105), INT8_C( 57), INT8_C( 73), -INT8_C( 11), INT8_C( 93), -INT8_C( 85), -INT8_C( 57), -INT8_C( 87), -INT8_C( 99), -INT8_C( 5), INT8_C( 56), INT8_C( 61), -INT8_C( 22), -INT8_C( 120), INT8_C( 16), INT8_C( 88), -INT8_C( 58), -INT8_C( 66), INT8_C( 85), -INT8_C( 91), INT8_C( 13), -INT8_C( 122), -INT8_C( 3), -INT8_C( 29), -INT8_C( 88) }, { -INT8_C( 30), -INT8_C( 68), INT8_C( 108), -INT8_C( 43), -INT8_C( 57), INT8_C( 112), INT8_C( 108), INT8_C( 0), -INT8_C( 71), INT8_C( 97), INT8_C( 94), INT8_C( 101), INT8_C( 41), INT8_C( 7), INT8_C( 2), INT8_C( 36), INT8_C( 63), INT8_C( 63), INT8_C( 14), -INT8_C( 57), INT8_C( 79), INT8_C( 102), -INT8_C( 115), INT8_C( 14), -INT8_C( 68), INT8_C( 51), INT8_C( 27), INT8_C( 66), INT8_C( 48), -INT8_C( 1), -INT8_C( 22), INT8_C( 19), -INT8_C( 69), INT8_C( 86), -INT8_C( 24), -INT8_C( 126), -INT8_C( 57), INT8_C( 84), -INT8_C( 125), INT8_MIN, -INT8_C( 74), -INT8_C( 31), -INT8_C( 27), -INT8_C( 33), -INT8_C( 24), -INT8_C( 24), INT8_C( 3), INT8_C( 40), INT8_C( 39), INT8_C( 18), -INT8_C( 17), INT8_C( 119), INT8_C( 120), INT8_C( 125), -INT8_C( 123), INT8_C( 52), -INT8_C( 80), -INT8_C( 96), INT8_C( 119), -INT8_C( 32), -INT8_C( 97), INT8_C( 97), -INT8_C( 13), INT8_C( 91) }, { -INT16_C( 20370), INT16_C( 15099), -INT16_C( 7778), INT16_C( 5292), -INT16_C( 7156), INT16_C( 15785), INT16_C( 2162), INT16_C( 3156), INT16_C( 18522), -INT16_C( 7075), INT16_C( 26962), -INT16_C( 22725), INT16_C( 4658), INT16_C( 21549), INT16_C( 3743), INT16_C( 2130), INT16_C( 17417), -INT16_C( 29904), -INT16_C( 12954), -INT16_C( 19828), -INT16_C( 6481), -INT16_C( 9684), -INT16_C( 8880), INT16_C( 6787), INT16_C( 10797), INT16_C( 26809), INT16_C( 18320), -INT16_C( 528), -INT16_C( 23360), INT16_C( 19219), INT16_C( 11543), INT16_C( 12337) } }, { { -INT8_C( 72), -INT8_C( 37), -INT8_C( 35), INT8_MAX, INT8_C( 48), INT8_C( 96), -INT8_C( 1), -INT8_C( 26), INT8_C( 65), -INT8_C( 27), -INT8_C( 59), INT8_C( 42), -INT8_C( 51), -INT8_C( 56), INT8_C( 82), -INT8_C( 12), -INT8_C( 38), INT8_C( 65), INT8_C( 107), INT8_C( 83), -INT8_C( 66), -INT8_C( 16), -INT8_C( 121), INT8_C( 110), -INT8_C( 111), -INT8_C( 2), INT8_C( 79), INT8_C( 48), INT8_C( 96), INT8_C( 66), -INT8_C( 117), INT8_C( 24), INT8_C( 30), INT8_C( 105), -INT8_C( 105), INT8_C( 78), -INT8_C( 55), -INT8_C( 106), INT8_C( 52), INT8_C( 11), INT8_C( 123), -INT8_C( 7), INT8_C( 53), INT8_C( 72), -INT8_C( 63), -INT8_C( 121), INT8_C( 61), -INT8_C( 100), -INT8_C( 56), -INT8_C( 88), -INT8_C( 17), -INT8_C( 121), -INT8_C( 103), INT8_C( 118), -INT8_C( 11), INT8_C( 42), INT8_C( 117), INT8_C( 68), INT8_C( 90), -INT8_C( 43), -INT8_C( 121), -INT8_C( 26), -INT8_C( 19), -INT8_C( 91) }, { INT8_C( 79), -INT8_C( 124), -INT8_C( 13), INT8_C( 24), INT8_C( 26), INT8_C( 39), INT8_C( 35), -INT8_C( 106), INT8_C( 32), INT8_C( 88), -INT8_C( 34), -INT8_C( 31), -INT8_C( 33), INT8_C( 27), INT8_C( 125), -INT8_C( 88), -INT8_C( 60), INT8_C( 108), INT8_C( 47), INT8_C( 93), -INT8_C( 29), INT8_C( 36), -INT8_C( 121), INT8_C( 88), INT8_C( 105), -INT8_C( 31), INT8_C( 45), -INT8_C( 16), -INT8_C( 57), INT8_C( 26), -INT8_C( 107), INT8_C( 22), -INT8_C( 98), -INT8_C( 120), INT8_C( 47), -INT8_C( 72), -INT8_C( 81), INT8_C( 82), INT8_C( 78), -INT8_C( 49), -INT8_C( 85), INT8_C( 45), -INT8_C( 80), -INT8_C( 118), INT8_C( 72), INT8_C( 46), INT8_C( 50), INT8_C( 12), -INT8_C( 102), INT8_C( 97), INT8_C( 105), INT8_C( 125), -INT8_C( 122), -INT8_C( 16), -INT8_C( 43), -INT8_C( 17), -INT8_C( 46), INT8_C( 2), -INT8_C( 33), -INT8_C( 103), INT8_C( 28), INT8_C( 116), -INT8_C( 80), -INT8_C( 70) }, { -INT16_C( 12620), INT16_C( 175), INT16_C( 4992), -INT16_C( 15455), INT16_C( 22232), -INT16_C( 8000), -INT16_C( 1365), -INT16_C( 11222), -INT16_C( 6060), INT16_C( 12748), INT16_C( 3130), -INT16_C( 6655), INT16_C( 7351), INT16_C( 2787), -INT16_C( 3756), -INT16_C( 14345), -INT16_C( 15540), INT16_C( 1481), -INT16_C( 3981), INT16_C( 3517), INT16_C( 750), -INT16_C( 12736), INT16_C( 20106), INT16_C( 4922), -INT16_C( 4104), INT16_MAX, -INT16_C( 20554), -INT16_C( 11249), -INT16_C( 5246), -INT16_C( 24909), INT16_C( 30460), -INT16_C( 30510) } }, { { -INT8_C( 4), -INT8_C( 33), INT8_C( 115), -INT8_C( 85), INT8_C( 49), -INT8_C( 63), INT8_C( 122), -INT8_C( 36), -INT8_C( 18), INT8_C( 42), INT8_C( 103), INT8_C( 55), INT8_C( 88), -INT8_C( 103), INT8_C( 67), -INT8_C( 13), -INT8_C( 5), -INT8_C( 83), INT8_C( 112), -INT8_C( 127), -INT8_C( 99), INT8_C( 70), INT8_C( 112), INT8_C( 111), INT8_C( 72), INT8_C( 79), INT8_C( 9), INT8_C( 101), -INT8_C( 61), -INT8_C( 71), INT8_C( 31), -INT8_C( 65), -INT8_C( 104), -INT8_C( 110), INT8_C( 106), -INT8_C( 55), INT8_C( 84), -INT8_C( 28), -INT8_C( 90), INT8_C( 66), INT8_C( 14), INT8_C( 13), INT8_C( 121), INT8_C( 103), -INT8_C( 90), -INT8_C( 67), INT8_C( 90), -INT8_C( 95), INT8_C( 106), -INT8_C( 54), INT8_C( 34), INT8_C( 7), INT8_C( 16), -INT8_C( 110), INT8_C( 119), INT8_C( 89), -INT8_C( 31), INT8_MIN, -INT8_C( 66), -INT8_C( 92), INT8_C( 57), -INT8_C( 35), INT8_C( 99), -INT8_C( 47) }, { INT8_C( 112), -INT8_C( 51), -INT8_C( 102), -INT8_C( 60), -INT8_C( 79), INT8_C( 64), INT8_C( 6), -INT8_C( 64), INT8_C( 77), INT8_MIN, INT8_C( 39), -INT8_C( 12), INT8_C( 61), -INT8_C( 127), -INT8_C( 107), -INT8_C( 89), INT8_C( 75), -INT8_C( 72), -INT8_C( 82), INT8_C( 92), INT8_C( 74), INT8_C( 37), -INT8_C( 75), INT8_C( 44), -INT8_C( 91), INT8_C( 115), -INT8_C( 48), -INT8_C( 34), INT8_C( 80), INT8_C( 52), -INT8_C( 81), -INT8_C( 64), INT8_C( 1), INT8_C( 74), -INT8_C( 124), -INT8_C( 77), -INT8_C( 118), -INT8_C( 117), INT8_C( 115), -INT8_C( 40), INT8_C( 11), -INT8_C( 102), -INT8_C( 52), INT8_C( 72), INT8_C( 27), INT8_C( 97), -INT8_C( 17), INT8_C( 102), INT8_C( 25), -INT8_C( 99), -INT8_C( 62), INT8_C( 100), -INT8_C( 61), INT8_C( 119), -INT8_C( 112), INT8_C( 104), -INT8_C( 22), INT8_C( 96), INT8_C( 71), INT8_C( 59), -INT8_C( 108), -INT8_C( 10), -INT8_C( 5), -INT8_C( 106) }, { INT16_C( 16851), -INT16_C( 21990), INT16_C( 8481), -INT16_C( 13348), INT16_C( 12950), INT16_C( 3357), -INT16_C( 14063), -INT16_C( 28796), INT16_C( 6369), INT16_C( 2684), INT16_C( 14208), -INT16_C( 3516), INT16_C( 2533), -INT16_C( 3866), INT16_C( 25220), -INT16_C( 14735), INT16_C( 10956), -INT16_C( 28621), INT16_MIN, INT16_C( 16450), -INT16_C( 1172), INT16_C( 1124), INT16_C( 22815), INT16_C( 14892), -INT16_C( 17348), -INT16_C( 1408), INT16_C( 16398), -INT16_C( 4072), INT16_C( 7338), INT16_C( 23166), -INT16_C( 8366), -INT16_C( 22649) } }, { { INT8_C( 64), INT8_MIN, INT8_C( 73), -INT8_C( 53), INT8_C( 11), -INT8_C( 68), -INT8_C( 93), INT8_C( 22), INT8_C( 86), INT8_C( 111), INT8_C( 94), INT8_C( 113), -INT8_C( 48), INT8_C( 77), -INT8_C( 41), -INT8_C( 22), -INT8_C( 22), -INT8_C( 102), INT8_C( 78), -INT8_C( 83), INT8_C( 17), -INT8_C( 34), INT8_C( 22), -INT8_C( 4), INT8_C( 62), INT8_C( 93), INT8_C( 55), -INT8_C( 45), INT8_C( 83), INT8_C( 50), INT8_C( 105), -INT8_C( 108), -INT8_C( 78), -INT8_C( 78), INT8_C( 95), -INT8_C( 67), INT8_C( 110), INT8_C( 2), -INT8_C( 45), -INT8_C( 60), INT8_C( 113), INT8_C( 49), INT8_C( 53), INT8_C( 65), INT8_C( 126), INT8_C( 12), INT8_C( 43), INT8_C( 105), -INT8_C( 90), INT8_C( 121), INT8_C( 22), -INT8_C( 72), INT8_C( 87), INT8_C( 44), -INT8_C( 76), -INT8_C( 106), -INT8_C( 119), -INT8_C( 21), INT8_C( 105), -INT8_C( 35), INT8_C( 29), -INT8_C( 46), INT8_C( 113), -INT8_C( 48) }, { -INT8_C( 124), -INT8_C( 48), -INT8_C( 115), -INT8_C( 14), -INT8_C( 46), INT8_C( 97), -INT8_C( 74), INT8_C( 67), -INT8_C( 110), -INT8_C( 21), -INT8_C( 124), INT8_C( 17), -INT8_C( 9), -INT8_C( 80), INT8_C( 122), -INT8_C( 98), INT8_C( 41), -INT8_C( 112), INT8_C( 86), -INT8_C( 127), -INT8_C( 67), INT8_C( 10), INT8_C( 23), INT8_C( 70), -INT8_C( 11), INT8_MIN, INT8_C( 35), INT8_C( 18), INT8_C( 82), -INT8_C( 108), -INT8_C( 30), -INT8_C( 42), INT8_C( 100), INT8_C( 112), -INT8_C( 56), INT8_C( 54), -INT8_C( 47), INT8_C( 126), INT8_C( 121), INT8_C( 99), INT8_C( 105), -INT8_C( 2), INT8_C( 116), INT8_C( 96), -INT8_C( 82), -INT8_C( 18), -INT8_C( 2), -INT8_C( 41), INT8_MAX, INT8_C( 84), INT8_C( 88), INT8_C( 60), INT8_C( 94), INT8_C( 111), -INT8_C( 126), INT8_C( 83), -INT8_C( 17), -INT8_C( 90), INT8_C( 102), INT8_C( 65), INT8_C( 58), INT8_C( 72), INT8_C( 23), -INT8_C( 97) }, { -INT16_C( 14080), -INT16_C( 11237), INT16_C( 17730), -INT16_C( 10588), -INT16_C( 11791), -INT16_C( 9735), -INT16_C( 8032), INT16_C( 3298), -INT16_C( 7654), -INT16_C( 15263), INT16_C( 1081), INT16_C( 18146), -INT16_C( 12586), INT16_C( 5723), INT16_C( 1406), -INT16_C( 9366), INT16_MAX, INT16_C( 4886), -INT16_C( 4918), INT16_MAX, INT16_C( 11767), INT16_C( 12388), -INT16_C( 10548), -INT16_C( 4391), INT16_C( 31246), INT16_C( 12976), INT16_C( 13062), -INT16_C( 10230), -INT16_C( 23479), INT16_C( 25075), INT16_C( 16802), -INT16_C( 17577) } }, { { -INT8_C( 72), -INT8_C( 33), -INT8_C( 43), -INT8_C( 119), INT8_C( 93), INT8_C( 79), -INT8_C( 19), -INT8_C( 58), INT8_C( 77), INT8_C( 97), INT8_C( 39), -INT8_C( 5), INT8_C( 80), INT8_C( 37), -INT8_C( 46), -INT8_C( 49), INT8_C( 122), INT8_C( 43), INT8_C( 11), -INT8_C( 40), -INT8_C( 102), -INT8_C( 115), INT8_C( 44), -INT8_C( 118), INT8_C( 51), -INT8_C( 110), -INT8_C( 53), INT8_C( 110), -INT8_C( 38), -INT8_C( 29), INT8_C( 13), -INT8_C( 109), -INT8_C( 62), -INT8_C( 30), INT8_C( 28), INT8_C( 32), INT8_C( 49), INT8_C( 9), -INT8_C( 26), INT8_C( 126), INT8_C( 107), INT8_C( 13), INT8_C( 121), -INT8_C( 69), INT8_C( 51), INT8_C( 76), -INT8_C( 118), -INT8_C( 83), INT8_C( 119), -INT8_C( 107), -INT8_C( 123), INT8_C( 17), INT8_C( 34), -INT8_C( 79), -INT8_C( 101), INT8_C( 86), INT8_C( 67), INT8_C( 103), -INT8_C( 60), INT8_C( 30), INT8_C( 74), -INT8_C( 47), -INT8_C( 79), INT8_C( 12) }, { -INT8_C( 77), -INT8_C( 51), INT8_C( 44), -INT8_C( 27), -INT8_C( 41), INT8_C( 19), INT8_C( 99), INT8_C( 66), INT8_C( 32), -INT8_C( 35), -INT8_C( 3), INT8_C( 83), INT8_C( 41), -INT8_C( 121), INT8_C( 0), -INT8_C( 96), INT8_C( 28), -INT8_C( 122), -INT8_C( 79), INT8_C( 62), INT8_C( 55), INT8_C( 77), -INT8_C( 108), INT8_C( 123), -INT8_C( 76), INT8_C( 88), -INT8_C( 103), -INT8_C( 2), INT8_C( 41), INT8_C( 74), INT8_C( 10), -INT8_C( 35), INT8_C( 23), INT8_C( 55), -INT8_C( 62), -INT8_C( 18), INT8_C( 74), INT8_C( 37), INT8_C( 48), INT8_C( 106), INT8_C( 2), INT8_C( 45), -INT8_C( 66), INT8_C( 43), -INT8_C( 76), -INT8_C( 66), -INT8_C( 53), -INT8_C( 48), INT8_C( 68), INT8_C( 125), INT8_C( 15), INT8_C( 124), -INT8_C( 54), -INT8_C( 93), -INT8_C( 9), INT8_C( 126), -INT8_C( 4), -INT8_C( 112), INT8_C( 124), INT8_C( 37), -INT8_C( 38), -INT8_C( 122), INT8_C( 2), -INT8_C( 15) }, { -INT16_C( 25541), INT16_C( 5673), -INT16_C( 2312), INT16_MAX, -INT16_C( 931), INT16_C( 20716), -INT16_C( 1197), -INT16_C( 19872), -INT16_C( 1830), INT16_C( 12523), INT16_C( 19327), INT16_C( 12222), INT16_C( 8972), -INT16_C( 21129), INT16_C( 25736), -INT16_C( 5015), INT16_C( 16892), -INT16_C( 2312), INT16_C( 3959), INT16_C( 24396), INT16_C( 799), INT16_C( 55), -INT16_C( 8892), -INT16_C( 15618), INT16_C( 26717), INT16_C( 4103), -INT16_C( 18297), INT16_C( 9441), -INT16_C( 11804), INT16_C( 25414), -INT16_C( 28310), INT16_C( 174) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_maddubs_epi16(a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i a = simde_test_x86_random_i8x64(); simde__m512i b = simde_test_x86_random_i8x64(); simde__m512i r = simde_mm512_maddubs_epi16(a, b); simde_test_x86_write_i8x64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_mask_maddubs_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t src[32]; const simde__mmask32 k; const int8_t a[64]; const int8_t b[64]; const int16_t r[32]; } test_vec[] = { { { -INT16_C( 15171), INT16_C( 2016), INT16_C( 4330), -INT16_C( 5006), INT16_C( 12350), -INT16_C( 3560), -INT16_C( 7186), INT16_C( 13251), -INT16_C( 11680), INT16_C( 10927), -INT16_C( 22923), INT16_C( 29096), INT16_C( 9270), INT16_C( 4247), -INT16_C( 26197), INT16_C( 26625), -INT16_C( 7842), INT16_C( 18544), -INT16_C( 7438), INT16_C( 12340), INT16_C( 19474), INT16_C( 34), -INT16_C( 6864), -INT16_C( 28621), -INT16_C( 7497), INT16_C( 11707), INT16_C( 25480), -INT16_C( 16738), INT16_C( 13704), INT16_C( 13262), -INT16_C( 12081), INT16_C( 11675) }, UINT32_C(2742356913), { -INT8_C( 19), -INT8_C( 87), -INT8_C( 45), -INT8_C( 1), -INT8_C( 10), -INT8_C( 10), INT8_C( 0), INT8_C( 38), -INT8_C( 37), INT8_C( 51), -INT8_C( 74), -INT8_C( 109), INT8_C( 22), INT8_C( 113), -INT8_C( 64), -INT8_C( 98), -INT8_C( 43), INT8_C( 94), INT8_C( 93), INT8_C( 93), -INT8_C( 108), INT8_C( 43), -INT8_C( 112), INT8_C( 99), -INT8_C( 5), INT8_C( 43), -INT8_C( 112), -INT8_C( 83), INT8_C( 55), INT8_C( 5), INT8_C( 80), INT8_C( 36), -INT8_C( 82), INT8_C( 36), INT8_C( 36), -INT8_C( 92), INT8_C( 26), INT8_C( 36), -INT8_C( 54), -INT8_C( 11), INT8_C( 87), -INT8_C( 127), -INT8_C( 120), INT8_C( 109), -INT8_C( 14), INT8_C( 72), INT8_C( 12), -INT8_C( 57), -INT8_C( 89), INT8_C( 105), INT8_C( 36), INT8_C( 59), -INT8_C( 108), -INT8_C( 76), -INT8_C( 98), -INT8_C( 112), -INT8_C( 32), INT8_C( 46), INT8_C( 61), INT8_C( 23), INT8_C( 51), -INT8_C( 115), INT8_C( 59), -INT8_C( 31) }, { -INT8_C( 79), INT8_C( 95), -INT8_C( 122), -INT8_C( 53), -INT8_C( 125), INT8_C( 80), -INT8_C( 63), -INT8_C( 37), -INT8_C( 47), INT8_C( 73), INT8_C( 72), -INT8_C( 60), -INT8_C( 110), INT8_C( 84), -INT8_C( 117), INT8_C( 57), -INT8_C( 67), -INT8_C( 80), INT8_C( 116), INT8_C( 82), INT8_C( 100), INT8_C( 18), -INT8_C( 30), INT8_C( 68), INT8_C( 64), INT8_C( 31), INT8_C( 91), INT8_C( 115), -INT8_C( 84), -INT8_C( 105), INT8_C( 84), INT8_C( 94), -INT8_C( 10), -INT8_C( 38), INT8_C( 41), INT8_C( 122), INT8_C( 43), -INT8_C( 22), INT8_C( 85), -INT8_C( 4), INT8_C( 52), -INT8_C( 99), -INT8_C( 64), -INT8_C( 58), -INT8_C( 14), INT8_C( 76), -INT8_C( 1), -INT8_C( 81), -INT8_C( 4), INT8_C( 115), INT8_C( 1), INT8_C( 96), -INT8_C( 123), -INT8_C( 29), -INT8_C( 91), -INT8_C( 59), INT8_C( 2), INT8_C( 0), INT8_C( 56), -INT8_C( 81), -INT8_C( 105), -INT8_C( 116), INT8_C( 13), -INT8_C( 114) }, { -INT16_C( 2668), INT16_C( 2016), INT16_C( 4330), -INT16_C( 5006), -INT16_C( 6570), INT16_C( 4284), -INT16_C( 7186), -INT16_C( 13458), -INT16_C( 21791), INT16_C( 18414), -INT16_C( 22923), INT16_C( 2412), INT16_C( 9270), INT16_C( 4247), -INT16_C( 26197), INT16_C( 26625), -INT16_C( 3108), INT16_C( 18544), INT16_C( 326), INT16_C( 12340), -INT16_C( 8247), -INT16_C( 15026), INT16_C( 2084), -INT16_C( 28621), INT16_C( 11407), INT16_C( 5700), INT16_C( 25480), -INT16_C( 16738), INT16_C( 13704), INT16_C( 1553), -INT16_C( 12081), -INT16_C( 24883) } }, { { INT16_C( 13927), -INT16_C( 28152), INT16_C( 23841), INT16_C( 21902), INT16_C( 20474), -INT16_C( 5093), INT16_C( 6811), -INT16_C( 26724), -INT16_C( 25203), INT16_C( 4855), -INT16_C( 25471), -INT16_C( 31785), INT16_C( 3997), INT16_C( 13362), INT16_C( 16283), INT16_C( 706), -INT16_C( 13706), -INT16_C( 26732), INT16_C( 8999), INT16_C( 8940), INT16_C( 1906), INT16_C( 3342), -INT16_C( 21983), -INT16_C( 20828), -INT16_C( 25784), -INT16_C( 13888), -INT16_C( 26824), -INT16_C( 10932), INT16_C( 32678), INT16_C( 16649), -INT16_C( 13122), INT16_C( 13380) }, UINT32_C(3201030294), { -INT8_C( 5), -INT8_C( 73), -INT8_C( 32), INT8_C( 109), -INT8_C( 66), -INT8_C( 18), INT8_C( 122), -INT8_C( 33), -INT8_C( 103), INT8_C( 30), -INT8_C( 115), -INT8_C( 31), -INT8_C( 70), INT8_C( 77), -INT8_C( 86), -INT8_C( 14), -INT8_C( 28), -INT8_C( 10), -INT8_C( 57), -INT8_C( 118), INT8_C( 117), -INT8_C( 48), -INT8_C( 52), INT8_C( 52), -INT8_C( 100), INT8_C( 16), INT8_C( 104), INT8_C( 51), -INT8_C( 24), INT8_C( 52), -INT8_C( 15), -INT8_C( 28), -INT8_C( 21), -INT8_C( 47), INT8_C( 81), -INT8_C( 86), -INT8_C( 65), -INT8_C( 52), -INT8_C( 119), INT8_C( 88), -INT8_C( 22), INT8_C( 23), INT8_C( 57), -INT8_C( 92), INT8_C( 100), -INT8_C( 29), -INT8_C( 106), INT8_C( 73), -INT8_C( 38), INT8_C( 93), -INT8_C( 45), INT8_C( 79), INT8_C( 46), -INT8_C( 97), -INT8_C( 125), -INT8_C( 54), -INT8_C( 81), -INT8_C( 20), -INT8_C( 3), -INT8_C( 104), INT8_C( 32), -INT8_C( 18), INT8_C( 124), INT8_C( 11) }, { -INT8_C( 65), -INT8_C( 51), -INT8_C( 75), INT8_MAX, -INT8_C( 103), INT8_C( 63), -INT8_C( 41), -INT8_C( 124), INT8_C( 86), INT8_C( 17), INT8_C( 40), -INT8_C( 70), -INT8_C( 12), -INT8_C( 65), INT8_C( 3), -INT8_C( 50), INT8_C( 28), -INT8_C( 41), INT8_C( 30), INT8_C( 74), INT8_C( 118), -INT8_C( 95), INT8_C( 21), INT8_C( 38), -INT8_C( 115), INT8_C( 18), -INT8_C( 66), -INT8_C( 83), INT8_C( 1), INT8_C( 58), -INT8_C( 71), -INT8_C( 64), INT8_C( 7), INT8_C( 110), INT8_C( 63), -INT8_C( 95), -INT8_C( 83), INT8_C( 23), INT8_C( 37), INT8_C( 3), INT8_C( 40), INT8_C( 77), -INT8_C( 66), INT8_C( 28), INT8_C( 12), -INT8_C( 63), -INT8_C( 21), INT8_C( 41), -INT8_C( 104), INT8_C( 9), INT8_C( 115), INT8_C( 15), -INT8_C( 86), -INT8_C( 120), INT8_C( 53), INT8_C( 56), -INT8_C( 101), -INT8_C( 13), -INT8_C( 27), -INT8_C( 100), INT8_C( 45), -INT8_C( 98), INT8_C( 92), INT8_C( 52) }, { INT16_C( 13927), -INT16_C( 2957), -INT16_C( 4576), INT16_C( 21902), INT16_C( 13668), -INT16_C( 5093), INT16_C( 6811), -INT16_C( 11590), -INT16_C( 25203), INT16_C( 4855), -INT16_C( 25471), INT16_C( 6260), -INT16_C( 17652), INT16_C( 13362), INT16_C( 3248), -INT16_C( 31703), INT16_C( 24635), -INT16_C( 11047), INT16_C( 8999), INT16_C( 5333), INT16_C( 1906), INT16_C( 3342), -INT16_C( 13101), -INT16_C( 157), -INT16_C( 25784), INT16_C( 25450), -INT16_C( 23036), INT16_C( 18255), -INT16_C( 20743), -INT16_C( 22031), -INT16_C( 13122), INT16_C( 11980) } }, { { -INT16_C( 25587), -INT16_C( 17707), -INT16_C( 1357), -INT16_C( 9282), INT16_C( 31816), INT16_C( 21751), -INT16_C( 7619), -INT16_C( 10627), -INT16_C( 3605), -INT16_C( 26907), INT16_C( 6777), INT16_C( 5326), -INT16_C( 19699), INT16_C( 15024), INT16_C( 3410), INT16_C( 24430), INT16_C( 17577), INT16_C( 23577), -INT16_C( 10434), -INT16_C( 31177), INT16_C( 11859), -INT16_C( 28197), INT16_C( 22545), -INT16_C( 921), INT16_C( 19529), -INT16_C( 15470), INT16_C( 24678), INT16_C( 29655), -INT16_C( 30700), INT16_C( 26285), INT16_C( 7061), INT16_C( 16069) }, UINT32_C(2660949599), { -INT8_C( 74), -INT8_C( 47), INT8_C( 36), INT8_C( 9), -INT8_C( 1), -INT8_C( 1), -INT8_C( 102), INT8_C( 16), INT8_C( 88), INT8_C( 1), INT8_C( 13), -INT8_C( 95), INT8_C( 77), -INT8_C( 97), INT8_C( 100), -INT8_C( 77), INT8_C( 0), INT8_C( 60), INT8_C( 38), INT8_C( 20), -INT8_C( 60), -INT8_C( 45), INT8_C( 122), INT8_C( 89), -INT8_C( 17), INT8_C( 63), -INT8_C( 105), INT8_C( 78), INT8_C( 29), INT8_C( 49), -INT8_C( 20), -INT8_C( 45), INT8_C( 2), INT8_C( 17), -INT8_C( 35), INT8_C( 1), INT8_C( 16), INT8_C( 119), INT8_C( 18), INT8_C( 104), INT8_C( 121), INT8_C( 31), INT8_C( 10), -INT8_C( 58), -INT8_C( 66), INT8_C( 110), INT8_C( 122), -INT8_C( 66), -INT8_C( 86), -INT8_C( 96), -INT8_C( 46), INT8_C( 110), INT8_C( 116), INT8_C( 76), -INT8_C( 57), INT8_C( 99), -INT8_C( 117), INT8_C( 94), -INT8_C( 79), -INT8_C( 87), -INT8_C( 113), -INT8_C( 98), INT8_C( 124), -INT8_C( 111) }, { -INT8_C( 81), INT8_C( 89), -INT8_C( 109), -INT8_C( 65), -INT8_C( 47), -INT8_C( 91), INT8_C( 40), INT8_C( 74), -INT8_C( 60), INT8_C( 50), INT8_C( 16), -INT8_C( 126), -INT8_C( 96), -INT8_C( 118), INT8_C( 65), INT8_C( 75), INT8_C( 43), INT8_C( 19), -INT8_C( 71), -INT8_C( 97), INT8_C( 96), -INT8_C( 127), INT8_C( 2), -INT8_C( 21), -INT8_C( 33), -INT8_C( 77), -INT8_C( 108), INT8_C( 111), INT8_C( 81), INT8_C( 17), INT8_C( 0), INT8_C( 0), INT8_C( 106), -INT8_C( 109), -INT8_C( 64), INT8_C( 59), INT8_C( 56), -INT8_C( 24), -INT8_C( 123), -INT8_C( 4), INT8_C( 26), -INT8_C( 106), INT8_MAX, -INT8_C( 70), INT8_C( 32), -INT8_C( 64), INT8_C( 5), INT8_C( 75), -INT8_C( 45), -INT8_C( 65), -INT8_C( 22), INT8_C( 51), INT8_C( 64), -INT8_C( 20), INT8_C( 31), INT8_C( 31), -INT8_C( 96), -INT8_C( 77), -INT8_C( 114), -INT8_C( 15), -INT8_C( 60), -INT8_C( 113), -INT8_C( 14), INT8_C( 47) }, { INT16_C( 3859), -INT16_C( 4509), INT16_MIN, INT16_C( 7344), -INT16_C( 5230), INT16_C( 21751), -INT16_C( 26154), -INT16_C( 10627), -INT16_C( 3605), -INT16_C( 4638), -INT16_C( 7981), -INT16_C( 1625), -INT16_C( 12738), INT16_C( 15024), INT16_C( 3182), INT16_C( 0), INT16_C( 17577), -INT16_C( 14085), -INT16_C( 10434), -INT16_C( 2630), -INT16_C( 140), -INT16_C( 28197), INT16_C( 22545), INT16_C( 14860), INT16_C( 19529), INT16_C( 990), INT16_C( 5904), INT16_C( 9238), -INT16_C( 20582), INT16_C( 26285), INT16_C( 7061), INT16_C( 5079) } }, { { -INT16_C( 19934), INT16_C( 23402), -INT16_C( 3942), -INT16_C( 19369), -INT16_C( 10618), -INT16_C( 22930), INT16_C( 29846), INT16_C( 27378), -INT16_C( 9165), INT16_C( 29597), -INT16_C( 17207), INT16_C( 27026), INT16_C( 8560), INT16_C( 13402), INT16_C( 19632), -INT16_C( 11677), -INT16_C( 12546), -INT16_C( 26579), -INT16_C( 31298), INT16_C( 17484), -INT16_C( 17573), -INT16_C( 3350), -INT16_C( 9169), INT16_C( 25180), -INT16_C( 1607), -INT16_C( 32043), INT16_C( 26550), INT16_C( 9963), INT16_C( 17800), INT16_C( 14426), -INT16_C( 16750), -INT16_C( 28661) }, UINT32_C(1244215436), { -INT8_C( 67), INT8_C( 117), -INT8_C( 114), INT8_C( 25), INT8_C( 48), INT8_C( 120), INT8_C( 11), INT8_C( 95), INT8_C( 85), INT8_C( 103), -INT8_C( 63), INT8_C( 14), INT8_C( 96), -INT8_C( 106), -INT8_C( 112), INT8_C( 22), -INT8_C( 2), INT8_C( 123), INT8_C( 60), -INT8_C( 122), -INT8_C( 64), -INT8_C( 105), -INT8_C( 65), INT8_C( 82), INT8_C( 85), -INT8_C( 54), -INT8_C( 29), -INT8_C( 31), INT8_C( 2), INT8_C( 12), INT8_C( 43), -INT8_C( 64), -INT8_C( 127), -INT8_C( 71), -INT8_C( 39), -INT8_C( 78), INT8_C( 49), -INT8_C( 28), INT8_C( 17), -INT8_C( 122), INT8_C( 75), -INT8_C( 45), -INT8_C( 108), -INT8_C( 85), INT8_C( 105), INT8_C( 36), -INT8_C( 62), INT8_C( 103), -INT8_C( 97), -INT8_C( 2), -INT8_C( 18), INT8_C( 96), -INT8_C( 107), -INT8_C( 83), -INT8_C( 78), -INT8_C( 22), INT8_C( 119), -INT8_C( 107), -INT8_C( 53), INT8_C( 121), -INT8_C( 95), -INT8_C( 10), INT8_C( 57), INT8_C( 35) }, { -INT8_C( 81), INT8_C( 18), -INT8_C( 43), -INT8_C( 31), -INT8_C( 10), -INT8_C( 26), INT8_C( 103), INT8_C( 65), -INT8_C( 71), -INT8_C( 4), -INT8_C( 19), INT8_C( 35), INT8_C( 32), -INT8_C( 81), -INT8_C( 118), -INT8_C( 64), -INT8_C( 83), INT8_C( 120), INT8_C( 32), INT8_C( 67), INT8_C( 37), -INT8_C( 46), INT8_C( 45), -INT8_C( 100), INT8_C( 104), -INT8_C( 7), INT8_C( 22), INT8_C( 9), -INT8_C( 17), INT8_C( 79), INT8_C( 44), -INT8_C( 97), INT8_C( 98), INT8_C( 1), INT8_MIN, INT8_C( 88), -INT8_C( 24), -INT8_C( 25), -INT8_C( 102), -INT8_C( 95), -INT8_C( 29), -INT8_C( 121), -INT8_C( 60), INT8_C( 4), INT8_C( 54), INT8_C( 79), -INT8_C( 60), -INT8_C( 29), -INT8_C( 57), -INT8_C( 28), INT8_C( 38), -INT8_C( 19), -INT8_C( 74), INT8_C( 84), -INT8_C( 119), INT8_C( 30), INT8_C( 77), -INT8_C( 97), INT8_C( 40), INT8_C( 60), -INT8_C( 17), INT8_C( 84), -INT8_C( 37), INT8_C( 81) }, { -INT16_C( 19934), INT16_C( 23402), -INT16_C( 3600), INT16_C( 7308), -INT16_C( 10618), -INT16_C( 22930), INT16_C( 29846), -INT16_C( 18400), -INT16_C( 9165), INT16_C( 29597), -INT16_C( 17207), INT16_C( 395), INT16_C( 7426), INT16_C( 7019), INT16_C( 19632), -INT16_C( 11677), INT16_C( 12827), -INT16_C( 26579), -INT16_C( 31298), -INT16_C( 14464), -INT16_C( 17573), -INT16_C( 8196), -INT16_C( 9169), INT16_C( 25180), -INT16_C( 1607), INT16_C( 7220), INT16_C( 26550), -INT16_C( 14162), INT16_C( 17800), INT16_C( 14426), INT16_C( 17927), -INT16_C( 28661) } }, { { INT16_C( 23382), INT16_C( 16041), INT16_C( 17219), INT16_C( 9951), -INT16_C( 23350), INT16_C( 42), -INT16_C( 4365), -INT16_C( 17692), INT16_C( 2770), -INT16_C( 30297), INT16_C( 12638), -INT16_C( 21593), -INT16_C( 12336), -INT16_C( 16408), -INT16_C( 15580), INT16_C( 31248), -INT16_C( 17889), INT16_C( 25272), -INT16_C( 26627), -INT16_C( 14200), -INT16_C( 19653), INT16_C( 11976), -INT16_C( 21343), INT16_C( 29929), -INT16_C( 28489), INT16_C( 5629), -INT16_C( 23359), -INT16_C( 27967), -INT16_C( 22156), -INT16_C( 26543), INT16_C( 25196), -INT16_C( 29934) }, UINT32_C( 435014172), { INT8_C( 97), INT8_C( 118), -INT8_C( 31), -INT8_C( 99), INT8_C( 41), -INT8_C( 86), -INT8_C( 53), -INT8_C( 54), INT8_C( 86), -INT8_C( 76), INT8_C( 62), INT8_C( 13), INT8_C( 69), INT8_C( 59), INT8_C( 35), INT8_C( 6), -INT8_C( 32), -INT8_C( 28), -INT8_C( 104), INT8_C( 84), -INT8_C( 115), -INT8_C( 22), -INT8_C( 20), -INT8_C( 7), INT8_C( 76), -INT8_C( 2), -INT8_C( 123), INT8_C( 104), -INT8_C( 56), INT8_C( 114), -INT8_C( 127), INT8_C( 41), -INT8_C( 24), INT8_C( 99), -INT8_C( 58), INT8_C( 17), INT8_C( 13), -INT8_C( 110), -INT8_C( 36), INT8_C( 99), INT8_C( 70), INT8_C( 26), INT8_C( 113), -INT8_C( 117), INT8_C( 86), -INT8_C( 108), -INT8_C( 110), INT8_C( 54), INT8_C( 120), INT8_C( 42), -INT8_C( 118), INT8_C( 5), INT8_C( 20), INT8_C( 118), -INT8_C( 2), INT8_C( 96), INT8_C( 116), -INT8_C( 125), -INT8_C( 56), INT8_C( 60), -INT8_C( 10), INT8_C( 74), INT8_C( 101), -INT8_C( 34) }, { -INT8_C( 83), INT8_C( 44), -INT8_C( 16), -INT8_C( 70), -INT8_C( 66), -INT8_C( 52), INT8_C( 29), INT8_C( 4), -INT8_C( 26), -INT8_C( 114), -INT8_C( 112), INT8_C( 60), INT8_C( 34), INT8_C( 34), INT8_C( 114), -INT8_C( 102), INT8_C( 76), -INT8_C( 4), -INT8_C( 97), INT8_C( 97), INT8_C( 114), -INT8_C( 98), -INT8_C( 63), -INT8_C( 26), INT8_C( 33), -INT8_C( 118), INT8_C( 34), INT8_C( 23), -INT8_C( 44), -INT8_C( 120), -INT8_C( 10), -INT8_C( 127), -INT8_C( 76), -INT8_C( 26), INT8_C( 59), INT8_C( 114), -INT8_C( 78), INT8_C( 88), INT8_C( 118), -INT8_C( 104), -INT8_C( 25), INT8_C( 6), -INT8_C( 43), INT8_C( 9), INT8_C( 40), INT8_C( 71), -INT8_C( 92), INT8_C( 117), INT8_C( 68), INT8_C( 67), -INT8_C( 42), -INT8_C( 74), -INT8_C( 31), -INT8_C( 105), -INT8_C( 99), INT8_C( 3), INT8_C( 33), -INT8_C( 65), INT8_C( 26), -INT8_C( 11), INT8_C( 71), INT8_C( 16), INT8_C( 118), -INT8_C( 5) }, { INT16_C( 23382), INT16_C( 16041), -INT16_C( 11546), INT16_C( 6695), -INT16_C( 22756), INT16_C( 42), -INT16_C( 4365), -INT16_C( 17692), INT16_C( 2770), -INT16_C( 6596), INT16_C( 12638), -INT16_C( 21342), -INT16_C( 12336), -INT16_C( 16408), -INT16_C( 22480), -INT16_C( 6497), -INT16_C( 20206), INT16_C( 25272), INT16_C( 11834), INT16_C( 15664), -INT16_C( 19653), -INT16_C( 3608), INT16_C( 13948), -INT16_C( 7114), INT16_C( 10974), INT16_C( 5629), -INT16_C( 23359), -INT16_C( 24858), -INT16_C( 4687), -INT16_C( 26543), INT16_C( 25196), -INT16_C( 29934) } }, { { -INT16_C( 19978), -INT16_C( 22419), -INT16_C( 7158), -INT16_C( 3775), INT16_C( 5866), INT16_C( 5114), -INT16_C( 24995), -INT16_C( 24184), INT16_C( 24290), -INT16_C( 15528), -INT16_C( 2571), INT16_C( 6086), -INT16_C( 7756), -INT16_C( 1012), -INT16_C( 31759), -INT16_C( 5897), INT16_C( 25908), INT16_C( 16016), -INT16_C( 11959), INT16_C( 13103), INT16_C( 10983), INT16_C( 17734), -INT16_C( 12600), -INT16_C( 21786), INT16_C( 15916), INT16_C( 8814), INT16_C( 13363), -INT16_C( 6087), INT16_C( 17685), INT16_C( 2020), -INT16_C( 9272), -INT16_C( 529) }, UINT32_C(2302377792), { INT8_C( 81), INT8_C( 107), -INT8_C( 67), INT8_C( 56), -INT8_C( 107), INT8_C( 3), INT8_C( 125), INT8_C( 93), -INT8_C( 46), INT8_C( 100), INT8_C( 8), -INT8_C( 2), -INT8_C( 94), INT8_C( 118), INT8_C( 32), -INT8_C( 42), -INT8_C( 86), INT8_C( 89), -INT8_C( 66), -INT8_C( 64), -INT8_C( 97), -INT8_C( 94), -INT8_C( 57), INT8_C( 103), INT8_C( 125), -INT8_C( 74), INT8_C( 100), -INT8_C( 66), INT8_C( 53), -INT8_C( 96), INT8_C( 71), -INT8_C( 122), INT8_C( 11), INT8_C( 4), -INT8_C( 65), -INT8_C( 96), INT8_C( 8), INT8_C( 60), -INT8_C( 3), -INT8_C( 38), -INT8_C( 96), INT8_C( 5), -INT8_C( 40), INT8_C( 67), INT8_C( 123), -INT8_C( 7), INT8_C( 25), INT8_C( 38), INT8_C( 82), -INT8_C( 41), -INT8_C( 26), -INT8_C( 15), INT8_C( 121), -INT8_C( 83), INT8_C( 89), -INT8_C( 10), INT8_C( 99), -INT8_C( 67), -INT8_C( 76), -INT8_C( 104), INT8_C( 93), -INT8_C( 4), INT8_C( 31), INT8_C( 104) }, { INT8_C( 0), -INT8_C( 34), INT8_C( 8), INT8_C( 8), INT8_C( 26), INT8_C( 6), -INT8_C( 30), -INT8_C( 69), INT8_C( 11), -INT8_C( 69), -INT8_C( 2), -INT8_C( 121), -INT8_C( 76), INT8_C( 23), -INT8_C( 83), INT8_C( 6), -INT8_C( 18), -INT8_C( 109), -INT8_C( 8), INT8_C( 103), INT8_C( 64), INT8_C( 81), INT8_C( 93), -INT8_C( 93), INT8_C( 14), INT8_C( 18), INT8_C( 59), INT8_C( 108), INT8_C( 14), INT8_C( 90), -INT8_C( 44), INT8_C( 14), INT8_C( 56), -INT8_C( 35), INT8_C( 23), INT8_C( 83), -INT8_C( 29), -INT8_C( 7), INT8_C( 14), -INT8_C( 18), -INT8_C( 76), INT8_C( 12), INT8_C( 117), INT8_C( 104), INT8_C( 35), INT8_C( 34), INT8_C( 111), INT8_C( 17), -INT8_C( 75), INT8_C( 103), INT8_C( 120), -INT8_C( 11), -INT8_C( 72), -INT8_C( 43), -INT8_C( 104), -INT8_C( 58), -INT8_C( 25), -INT8_C( 44), INT8_C( 50), -INT8_C( 11), INT8_C( 46), INT8_C( 7), INT8_C( 4), INT8_C( 103) }, { -INT16_C( 19978), -INT16_C( 22419), -INT16_C( 7158), -INT16_C( 3775), INT16_C( 5866), INT16_C( 5114), -INT16_C( 9598), -INT16_C( 24184), -INT16_C( 12761), INT16_C( 18256), INT16_C( 23298), INT16_C( 8928), INT16_C( 5026), INT16_C( 26420), INT16_C( 15142), -INT16_C( 5897), INT16_C( 476), INT16_C( 17673), -INT16_C( 11959), -INT16_C( 382), -INT16_C( 12100), INT16_C( 32240), -INT16_C( 12600), -INT16_C( 21786), INT16_C( 15995), INT16_C( 8814), INT16_C( 13363), -INT16_C( 23524), INT16_C( 17685), INT16_C( 2020), -INT16_C( 9272), INT16_C( 10836) } }, { { INT16_C( 7140), -INT16_C( 14406), -INT16_C( 14316), -INT16_C( 13899), INT16_C( 11220), -INT16_C( 2255), -INT16_C( 24499), INT16_C( 776), -INT16_C( 32761), -INT16_C( 16392), -INT16_C( 28331), INT16_C( 15750), -INT16_C( 18331), -INT16_C( 27854), INT16_C( 14015), -INT16_C( 23558), -INT16_C( 19375), INT16_C( 26218), INT16_C( 8316), INT16_C( 20527), INT16_C( 24651), -INT16_C( 26553), INT16_C( 20225), INT16_C( 2203), -INT16_C( 27441), INT16_C( 9672), INT16_C( 20005), -INT16_C( 30110), -INT16_C( 27642), -INT16_C( 14819), INT16_C( 6347), INT16_C( 7273) }, UINT32_C(1233310924), { -INT8_C( 12), -INT8_C( 79), -INT8_C( 103), INT8_C( 63), INT8_C( 18), -INT8_C( 31), -INT8_C( 41), INT8_C( 19), INT8_C( 48), INT8_C( 115), INT8_C( 27), INT8_C( 0), INT8_C( 7), -INT8_C( 29), INT8_C( 37), INT8_C( 44), INT8_C( 49), -INT8_C( 121), -INT8_C( 74), INT8_C( 56), INT8_C( 27), -INT8_C( 45), -INT8_C( 2), -INT8_C( 26), -INT8_C( 21), INT8_C( 103), INT8_C( 3), -INT8_C( 72), INT8_C( 59), -INT8_C( 123), INT8_C( 1), INT8_C( 47), INT8_C( 55), -INT8_C( 102), INT8_C( 110), INT8_C( 73), INT8_C( 123), INT8_C( 70), INT8_C( 92), -INT8_C( 84), -INT8_C( 71), INT8_C( 119), -INT8_C( 84), -INT8_C( 64), INT8_C( 91), -INT8_C( 47), -INT8_C( 20), -INT8_C( 116), INT8_C( 88), -INT8_C( 94), -INT8_C( 60), INT8_C( 115), INT8_C( 117), -INT8_C( 62), INT8_C( 90), INT8_C( 97), INT8_C( 42), INT8_C( 93), INT8_C( 25), INT8_C( 101), -INT8_C( 30), INT8_C( 26), -INT8_C( 107), INT8_C( 25) }, { -INT8_C( 76), INT8_C( 3), INT8_C( 98), INT8_C( 48), INT8_C( 73), -INT8_C( 66), -INT8_C( 36), INT8_C( 2), INT8_C( 54), -INT8_C( 120), -INT8_C( 62), -INT8_C( 111), INT8_C( 89), -INT8_C( 82), INT8_C( 29), -INT8_C( 79), INT8_C( 80), -INT8_C( 30), INT8_C( 36), -INT8_C( 58), -INT8_C( 92), INT8_C( 126), INT8_C( 39), -INT8_C( 50), -INT8_C( 37), INT8_C( 64), INT8_C( 52), -INT8_C( 66), INT8_C( 90), -INT8_C( 55), -INT8_C( 41), INT8_C( 14), -INT8_C( 52), INT8_C( 58), INT8_C( 62), INT8_C( 22), -INT8_C( 8), INT8_C( 26), INT8_C( 24), INT8_C( 46), -INT8_C( 94), -INT8_C( 37), -INT8_C( 65), -INT8_C( 5), -INT8_C( 119), -INT8_C( 35), -INT8_C( 84), -INT8_C( 38), -INT8_C( 65), -INT8_C( 47), -INT8_C( 96), INT8_C( 99), INT8_C( 79), -INT8_C( 57), INT8_C( 50), INT8_C( 43), INT8_C( 7), INT8_C( 102), -INT8_C( 23), INT8_C( 97), INT8_C( 47), -INT8_C( 64), INT8_C( 111), -INT8_C( 5) }, { INT16_C( 7140), -INT16_C( 14406), -INT16_C( 13536), -INT16_C( 7702), INT16_C( 11220), -INT16_C( 2255), -INT16_C( 17991), -INT16_C( 2403), -INT16_C( 32761), -INT16_C( 16392), INT16_C( 24102), INT16_C( 15750), -INT16_C( 2103), -INT16_C( 27854), -INT16_C( 2005), INT16_C( 617), -INT16_C( 19375), INT16_C( 8426), INT16_C( 8316), INT16_C( 20527), INT16_C( 24651), -INT16_C( 26553), INT16_C( 20225), -INT16_C( 25144), -INT16_C( 13334), INT16_C( 9672), INT16_C( 20005), INT16_C( 8671), -INT16_C( 27642), -INT16_C( 14819), INT16_C( 8958), INT16_C( 7273) } }, { { -INT16_C( 20742), -INT16_C( 3311), INT16_C( 10952), INT16_C( 27425), -INT16_C( 7931), -INT16_C( 29082), INT16_C( 5054), INT16_C( 32104), INT16_C( 2276), INT16_C( 13280), INT16_C( 4815), -INT16_C( 10658), INT16_C( 18296), -INT16_C( 22729), -INT16_C( 22776), INT16_C( 675), -INT16_C( 19371), INT16_C( 7669), INT16_C( 6110), -INT16_C( 7288), -INT16_C( 4104), -INT16_C( 18830), -INT16_C( 9726), -INT16_C( 6605), INT16_C( 5091), -INT16_C( 19943), INT16_C( 30758), -INT16_C( 24951), -INT16_C( 16193), -INT16_C( 14522), -INT16_C( 5785), -INT16_C( 17206) }, UINT32_C(2094710685), { -INT8_C( 42), INT8_C( 98), INT8_C( 95), -INT8_C( 50), INT8_C( 81), -INT8_C( 47), -INT8_C( 124), INT8_C( 83), -INT8_C( 84), -INT8_C( 73), INT8_C( 57), -INT8_C( 113), -INT8_C( 53), INT8_C( 83), INT8_C( 65), -INT8_C( 15), -INT8_C( 53), -INT8_C( 54), -INT8_C( 113), -INT8_C( 118), -INT8_C( 117), -INT8_C( 43), INT8_C( 82), -INT8_C( 14), -INT8_C( 66), INT8_C( 28), -INT8_C( 81), INT8_C( 92), -INT8_C( 37), -INT8_C( 119), -INT8_C( 40), -INT8_C( 78), -INT8_C( 21), INT8_C( 55), INT8_MIN, INT8_C( 61), INT8_C( 9), INT8_C( 5), -INT8_C( 112), -INT8_C( 75), -INT8_C( 68), -INT8_C( 54), INT8_C( 68), -INT8_C( 121), INT8_C( 29), -INT8_C( 123), INT8_C( 120), -INT8_C( 24), INT8_C( 80), INT8_C( 8), INT8_C( 114), -INT8_C( 37), -INT8_C( 35), -INT8_C( 60), -INT8_C( 51), -INT8_C( 100), -INT8_C( 32), INT8_C( 124), -INT8_C( 8), -INT8_C( 68), INT8_C( 5), -INT8_C( 48), INT8_C( 110), -INT8_C( 15) }, { INT8_C( 7), -INT8_C( 18), INT8_C( 46), INT8_C( 16), -INT8_C( 13), -INT8_C( 66), -INT8_C( 59), -INT8_C( 80), -INT8_C( 120), INT8_C( 9), INT8_C( 55), -INT8_C( 91), -INT8_C( 113), -INT8_C( 80), -INT8_C( 115), -INT8_C( 33), -INT8_C( 72), INT8_C( 0), -INT8_C( 70), -INT8_C( 107), -INT8_C( 60), -INT8_C( 121), INT8_C( 49), -INT8_C( 91), INT8_C( 4), INT8_C( 41), INT8_C( 97), INT8_C( 9), -INT8_C( 7), -INT8_C( 49), -INT8_C( 6), INT8_C( 1), -INT8_C( 67), INT8_C( 40), INT8_C( 17), -INT8_C( 79), -INT8_C( 25), -INT8_C( 41), INT8_C( 97), INT8_C( 111), -INT8_C( 32), -INT8_C( 104), INT8_C( 21), INT8_C( 111), INT8_C( 72), -INT8_C( 94), INT8_C( 78), INT8_C( 0), -INT8_C( 94), INT8_C( 8), -INT8_C( 106), INT8_C( 103), -INT8_C( 112), -INT8_C( 57), INT8_C( 12), -INT8_C( 108), -INT8_C( 15), INT8_C( 109), -INT8_C( 99), -INT8_C( 22), INT8_C( 60), -INT8_C( 104), -INT8_C( 21), -INT8_C( 7) }, { -INT16_C( 266), -INT16_C( 3311), -INT16_C( 14847), -INT16_C( 14428), -INT16_C( 18993), -INT16_C( 29082), INT16_C( 5054), -INT16_C( 15428), -INT16_C( 14616), -INT16_C( 24776), INT16_MIN, -INT16_C( 18004), INT16_C( 1908), INT16_C( 17803), -INT16_C( 22776), -INT16_C( 1118), -INT16_C( 19371), -INT16_C( 2643), INT16_C( 6110), INT16_MAX, -INT16_C( 27024), -INT16_C( 18830), -INT16_C( 10414), INT16_C( 9360), INT16_C( 5091), -INT16_C( 19943), INT16_MIN, -INT16_C( 14388), INT16_C( 10156), -INT16_C( 28688), -INT16_C( 21332), -INT16_C( 17206) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi16(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_mask_maddubs_epi16(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i src = simde_test_x86_random_i16x32(); simde__mmask32 k = simde_test_x86_random_mmask32(); simde__m512i a = simde_test_x86_random_i8x64(); simde__m512i b = simde_test_x86_random_i8x64(); simde__m512i r = simde_mm512_mask_maddubs_epi16(src, k, a, b); simde_test_x86_write_i16x32(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask32(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x64(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_maskz_maddubs_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask32 k; const int8_t a[64]; const int8_t b[64]; const int16_t r[32]; } test_vec[] = { { UINT32_C( 574325395), { INT8_C( 51), -INT8_C( 84), INT8_C( 45), -INT8_C( 123), INT8_C( 17), INT8_C( 41), INT8_C( 63), INT8_C( 1), -INT8_C( 45), -INT8_C( 38), -INT8_C( 114), INT8_C( 51), INT8_C( 104), -INT8_C( 110), -INT8_C( 55), INT8_C( 70), -INT8_C( 31), INT8_C( 101), INT8_C( 36), INT8_C( 0), -INT8_C( 50), INT8_C( 105), -INT8_C( 71), -INT8_C( 113), INT8_MAX, INT8_C( 22), INT8_C( 122), INT8_C( 19), -INT8_C( 104), -INT8_C( 75), INT8_C( 53), -INT8_C( 53), INT8_C( 97), INT8_C( 98), INT8_C( 80), INT8_C( 114), -INT8_C( 116), -INT8_C( 112), INT8_C( 116), INT8_C( 95), INT8_C( 106), INT8_C( 2), -INT8_C( 110), -INT8_C( 46), -INT8_C( 108), INT8_C( 91), INT8_C( 24), INT8_C( 118), -INT8_C( 64), INT8_C( 61), INT8_C( 118), -INT8_C( 114), -INT8_C( 90), INT8_C( 47), INT8_C( 29), INT8_C( 38), INT8_C( 69), -INT8_C( 105), INT8_C( 57), -INT8_C( 34), INT8_C( 76), INT8_C( 110), -INT8_C( 87), -INT8_C( 82) }, { -INT8_C( 47), -INT8_C( 6), INT8_C( 32), INT8_C( 93), -INT8_C( 118), -INT8_C( 108), -INT8_C( 68), -INT8_C( 12), -INT8_C( 105), INT8_C( 78), -INT8_C( 57), INT8_C( 43), -INT8_C( 87), -INT8_C( 33), -INT8_C( 95), INT8_C( 106), INT8_C( 28), INT8_C( 24), -INT8_C( 8), -INT8_C( 61), INT8_C( 71), INT8_C( 22), -INT8_C( 23), -INT8_C( 115), -INT8_C( 83), INT8_C( 34), INT8_C( 107), -INT8_C( 6), -INT8_C( 112), INT8_C( 20), -INT8_C( 88), INT8_C( 97), INT8_C( 14), -INT8_C( 56), -INT8_C( 66), -INT8_C( 104), INT8_C( 93), INT8_C( 122), -INT8_C( 115), -INT8_C( 12), -INT8_C( 56), INT8_C( 84), INT8_C( 31), INT8_C( 114), INT8_C( 51), -INT8_C( 63), -INT8_C( 36), INT8_C( 80), -INT8_C( 39), -INT8_C( 44), INT8_C( 19), INT8_C( 32), -INT8_C( 22), -INT8_C( 4), -INT8_C( 83), -INT8_C( 104), INT8_C( 30), INT8_C( 24), -INT8_C( 110), -INT8_C( 82), INT8_C( 45), INT8_C( 58), INT8_C( 16), INT8_C( 59) }, { -INT16_C( 3429), INT16_C( 13809), INT16_C( 0), INT16_C( 0), -INT16_C( 5151), INT16_C( 0), INT16_C( 0), -INT16_C( 11675), INT16_C( 0), -INT16_C( 288), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 15027), -INT16_C( 4130), -INT16_C( 17136), INT16_C( 0), -INT16_C( 14480), -INT16_C( 5768), INT16_C( 28466), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 6786), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 24474), INT16_C( 0), INT16_C( 0) } }, { UINT32_C(1607781890), { INT8_C( 73), INT8_C( 97), INT8_C( 83), INT8_C( 17), -INT8_C( 75), INT8_C( 115), -INT8_C( 125), -INT8_C( 24), INT8_C( 52), INT8_C( 95), INT8_C( 56), INT8_C( 13), INT8_C( 52), INT8_C( 75), INT8_C( 45), INT8_C( 30), INT8_C( 71), -INT8_C( 37), -INT8_C( 74), INT8_C( 101), -INT8_C( 13), INT8_C( 72), INT8_C( 20), INT8_C( 32), -INT8_C( 126), INT8_C( 36), INT8_C( 92), -INT8_C( 123), -INT8_C( 14), INT8_C( 48), -INT8_C( 28), INT8_C( 59), -INT8_C( 111), INT8_C( 56), INT8_C( 77), INT8_C( 70), -INT8_C( 85), -INT8_C( 48), INT8_C( 46), -INT8_C( 33), INT8_C( 48), INT8_C( 103), -INT8_C( 20), INT8_C( 100), -INT8_C( 78), INT8_C( 25), -INT8_C( 126), -INT8_C( 6), -INT8_C( 12), INT8_C( 57), INT8_C( 95), -INT8_C( 24), -INT8_C( 127), INT8_C( 115), INT8_C( 8), INT8_C( 4), -INT8_C( 105), INT8_C( 100), -INT8_C( 119), -INT8_C( 118), -INT8_C( 108), INT8_C( 109), -INT8_C( 59), INT8_C( 37) }, { -INT8_C( 91), INT8_C( 18), INT8_C( 107), INT8_C( 80), -INT8_C( 29), -INT8_C( 102), INT8_C( 47), INT8_C( 19), INT8_C( 1), INT8_C( 27), INT8_C( 119), -INT8_C( 77), INT8_C( 53), -INT8_C( 7), -INT8_C( 83), INT8_C( 41), INT8_C( 50), INT8_C( 13), INT8_C( 17), -INT8_C( 76), INT8_MIN, INT8_C( 26), -INT8_C( 72), INT8_C( 24), INT8_C( 126), INT8_C( 65), -INT8_C( 94), INT8_C( 19), -INT8_C( 82), INT8_C( 103), INT8_C( 56), INT8_C( 84), INT8_C( 122), -INT8_C( 92), -INT8_C( 92), INT8_C( 93), INT8_C( 62), -INT8_C( 44), INT8_C( 112), INT8_C( 63), -INT8_C( 17), -INT8_C( 25), -INT8_C( 14), INT8_C( 36), -INT8_C( 32), -INT8_C( 96), INT8_C( 78), INT8_C( 19), -INT8_C( 83), INT8_C( 95), -INT8_C( 57), INT8_C( 45), INT8_C( 121), INT8_MAX, INT8_C( 69), -INT8_C( 8), -INT8_C( 64), -INT8_C( 25), INT8_C( 11), INT8_C( 110), INT8_C( 79), INT8_C( 67), -INT8_C( 62), -INT8_C( 55) }, { INT16_C( 0), INT16_C( 10241), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 4582), -INT16_C( 29232), -INT16_C( 672), INT16_C( 0), INT16_C( 0), -INT16_C( 14900), INT16_C( 17724), INT16_C( 0), INT16_C( 0), INT16_C( 1450), INT16_C( 0), -INT16_C( 3391), INT16_C( 0), -INT16_C( 8096), INT16_C( 14890), -INT16_C( 14837), INT16_C( 5025), INT16_C( 30214), INT16_C( 520), -INT16_C( 12164), INT16_C( 0), INT16_C( 18995), INT16_C( 0) } }, { UINT32_C( 623273959), { INT8_C( 59), -INT8_C( 106), INT8_C( 100), INT8_C( 42), INT8_C( 125), INT8_C( 87), INT8_C( 79), INT8_C( 93), -INT8_C( 9), -INT8_C( 99), INT8_C( 112), -INT8_C( 92), -INT8_C( 4), INT8_C( 55), -INT8_C( 47), INT8_C( 118), -INT8_C( 74), INT8_C( 23), INT8_C( 110), INT8_C( 118), -INT8_C( 2), INT8_C( 121), -INT8_C( 27), INT8_C( 77), -INT8_C( 68), -INT8_C( 89), INT8_C( 22), -INT8_C( 92), INT8_C( 14), INT8_C( 60), -INT8_C( 55), INT8_C( 73), -INT8_C( 46), INT8_C( 46), INT8_C( 116), INT8_C( 79), -INT8_C( 123), -INT8_C( 61), -INT8_C( 83), INT8_C( 124), INT8_C( 96), INT8_C( 29), INT8_C( 32), INT8_C( 92), INT8_C( 85), -INT8_C( 15), -INT8_C( 46), INT8_C( 11), INT8_C( 8), INT8_C( 64), -INT8_C( 126), INT8_C( 7), -INT8_C( 71), INT8_C( 103), INT8_C( 84), INT8_C( 118), INT8_C( 14), INT8_C( 107), INT8_C( 26), INT8_C( 29), -INT8_C( 89), -INT8_C( 29), INT8_C( 102), INT8_C( 122) }, { INT8_C( 17), -INT8_C( 38), -INT8_C( 55), -INT8_C( 106), -INT8_C( 99), INT8_C( 118), INT8_C( 18), -INT8_C( 3), -INT8_C( 108), INT8_C( 50), INT8_C( 90), -INT8_C( 23), INT8_C( 36), INT8_C( 44), -INT8_C( 12), INT8_C( 44), INT8_C( 109), INT8_C( 118), INT8_C( 51), INT8_C( 38), -INT8_C( 35), -INT8_C( 120), -INT8_C( 100), -INT8_C( 20), -INT8_C( 13), -INT8_C( 74), INT8_C( 9), -INT8_C( 102), -INT8_C( 102), INT8_C( 111), INT8_C( 20), -INT8_C( 85), INT8_C( 74), -INT8_C( 34), INT8_C( 66), -INT8_C( 25), INT8_C( 84), INT8_C( 84), -INT8_C( 27), -INT8_C( 24), -INT8_C( 121), INT8_C( 63), -INT8_C( 47), -INT8_C( 85), INT8_C( 107), -INT8_C( 58), -INT8_C( 41), -INT8_C( 40), INT8_C( 60), INT8_C( 11), -INT8_C( 1), INT8_C( 26), -INT8_C( 109), -INT8_C( 101), INT8_C( 6), -INT8_C( 122), INT8_C( 82), INT8_C( 15), INT8_C( 32), -INT8_C( 20), INT8_C( 126), INT8_C( 53), -INT8_C( 105), -INT8_C( 56) }, { -INT16_C( 4697), -INT16_C( 9952), -INT16_C( 2109), INT16_C( 0), INT16_C( 0), INT16_C( 6308), INT16_C( 11492), INT16_C( 2684), INT16_C( 22552), INT16_C( 10094), -INT16_C( 23410), INT16_C( 0), INT16_C( 0), -INT16_C( 16530), INT16_C( 5232), INT16_C( 0), INT16_C( 0), INT16_C( 5681), INT16_C( 27552), INT16_C( 0), INT16_C( 0), -INT16_C( 9324), INT16_C( 0), INT16_C( 0), INT16_C( 1184), INT16_C( 0), -INT16_C( 30568), INT16_C( 0), INT16_C( 0), INT16_C( 252), INT16_C( 0), INT16_C( 0) } }, { UINT32_C(1739643155), { INT8_C( 46), -INT8_C( 107), INT8_C( 80), -INT8_C( 75), -INT8_C( 44), INT8_C( 33), INT8_C( 96), INT8_C( 63), -INT8_C( 25), INT8_C( 55), INT8_C( 24), INT8_C( 36), INT8_C( 66), INT8_C( 23), INT8_C( 62), -INT8_C( 43), -INT8_C( 78), INT8_C( 68), INT8_C( 91), INT8_C( 4), INT8_C( 83), INT8_C( 124), -INT8_C( 16), -INT8_C( 47), -INT8_C( 79), -INT8_C( 120), -INT8_C( 102), -INT8_C( 60), INT8_C( 97), INT8_C( 74), INT8_C( 43), -INT8_C( 113), -INT8_C( 33), INT8_C( 123), INT8_C( 68), -INT8_C( 77), -INT8_C( 99), -INT8_C( 92), -INT8_C( 14), -INT8_C( 124), -INT8_C( 36), INT8_C( 10), -INT8_C( 88), INT8_C( 30), INT8_C( 33), -INT8_C( 26), -INT8_C( 12), -INT8_C( 44), INT8_C( 42), INT8_C( 79), -INT8_C( 40), INT8_C( 125), -INT8_C( 53), -INT8_C( 55), INT8_C( 79), INT8_C( 124), INT8_C( 81), -INT8_C( 23), INT8_C( 64), -INT8_C( 78), INT8_C( 51), INT8_C( 108), INT8_C( 66), INT8_C( 18) }, { -INT8_C( 25), -INT8_C( 122), -INT8_C( 59), -INT8_C( 124), INT8_C( 43), -INT8_C( 73), INT8_C( 9), INT8_C( 7), -INT8_C( 62), -INT8_C( 79), INT8_C( 37), -INT8_C( 29), -INT8_C( 104), INT8_C( 25), -INT8_C( 73), -INT8_C( 62), INT8_C( 105), -INT8_C( 112), INT8_C( 64), INT8_C( 52), INT8_C( 89), -INT8_C( 113), -INT8_C( 79), -INT8_C( 86), INT8_C( 120), -INT8_C( 15), INT8_C( 92), -INT8_C( 85), INT8_C( 93), -INT8_C( 98), -INT8_C( 67), INT8_C( 69), INT8_C( 37), -INT8_C( 126), -INT8_C( 55), INT8_C( 80), INT8_C( 57), -INT8_C( 46), INT8_C( 87), -INT8_C( 5), -INT8_C( 124), INT8_C( 124), -INT8_C( 33), INT8_C( 28), -INT8_C( 106), -INT8_C( 106), -INT8_C( 34), -INT8_C( 1), INT8_C( 38), INT8_C( 30), INT8_C( 51), INT8_MAX, -INT8_C( 83), -INT8_C( 28), INT8_C( 41), INT8_C( 37), -INT8_C( 42), -INT8_C( 122), -INT8_C( 48), INT8_C( 51), INT8_C( 36), -INT8_C( 115), INT8_C( 120), INT8_C( 73) }, { -INT16_C( 19328), -INT16_C( 27164), INT16_C( 0), INT16_C( 0), -INT16_C( 18667), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 11074), INT16_C( 0), INT16_C( 0), INT16_MIN, INT16_C( 19200), INT16_C( 0), INT16_C( 1769), INT16_C( 6986), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 26040), -INT16_C( 4704), INT16_C( 0), -INT16_C( 8508), INT16_C( 3966), INT16_C( 26891), -INT16_C( 22477), INT16_C( 0), INT16_C( 0), INT16_C( 6006), -INT16_C( 10584), INT16_C( 0) } }, { UINT32_C(1234780687), { INT8_C( 20), -INT8_C( 16), INT8_C( 68), -INT8_C( 104), INT8_C( 109), INT8_C( 35), -INT8_C( 76), INT8_C( 3), -INT8_C( 70), -INT8_C( 109), INT8_C( 2), -INT8_C( 32), -INT8_C( 79), INT8_C( 53), INT8_C( 96), INT8_C( 95), INT8_C( 26), -INT8_C( 119), -INT8_C( 124), -INT8_C( 16), INT8_C( 15), INT8_C( 85), INT8_C( 35), INT8_C( 52), -INT8_C( 30), -INT8_C( 100), INT8_C( 125), -INT8_C( 14), -INT8_C( 34), INT8_C( 23), INT8_C( 59), -INT8_C( 14), INT8_C( 7), INT8_MAX, -INT8_C( 117), INT8_C( 116), -INT8_C( 93), INT8_C( 63), INT8_C( 119), INT8_C( 93), -INT8_C( 46), INT8_C( 121), INT8_C( 61), -INT8_C( 124), -INT8_C( 81), -INT8_C( 99), -INT8_C( 29), -INT8_C( 55), INT8_C( 39), INT8_C( 103), -INT8_C( 71), INT8_C( 54), -INT8_C( 68), -INT8_C( 36), INT8_C( 106), -INT8_C( 97), INT8_C( 120), -INT8_C( 24), -INT8_C( 111), INT8_C( 86), -INT8_C( 1), -INT8_C( 52), INT8_C( 73), INT8_C( 6) }, { INT8_C( 75), -INT8_C( 44), INT8_C( 123), -INT8_C( 18), INT8_C( 19), -INT8_C( 14), INT8_C( 75), -INT8_C( 26), INT8_C( 108), -INT8_C( 119), INT8_C( 106), INT8_C( 27), INT8_C( 38), INT8_C( 77), -INT8_C( 28), INT8_C( 77), -INT8_C( 76), -INT8_C( 99), -INT8_C( 124), INT8_C( 113), INT8_C( 121), -INT8_C( 18), INT8_C( 16), -INT8_C( 14), -INT8_C( 42), -INT8_C( 95), INT8_C( 72), -INT8_C( 43), INT8_C( 109), -INT8_C( 111), -INT8_C( 36), -INT8_C( 72), INT8_C( 101), INT8_C( 87), -INT8_C( 89), INT8_C( 121), INT8_C( 73), -INT8_C( 14), INT8_C( 95), -INT8_C( 75), INT8_C( 123), -INT8_C( 55), -INT8_C( 48), -INT8_C( 94), INT8_C( 22), -INT8_C( 76), -INT8_C( 17), -INT8_C( 54), INT8_C( 81), INT8_C( 115), INT8_C( 59), -INT8_C( 53), INT8_C( 98), INT8_C( 75), -INT8_C( 67), INT8_C( 56), -INT8_C( 20), INT8_C( 5), INT8_C( 14), INT8_C( 89), -INT8_C( 105), -INT8_C( 22), INT8_C( 18), -INT8_C( 4) }, { -INT16_C( 9060), INT16_C( 5628), INT16_C( 1581), INT16_C( 13422), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 10752), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 21645), INT16_C( 0), INT16_C( 11756), INT16_C( 0), INT16_C( 0), INT16_C( 4330), INT16_C( 19175), INT16_C( 0), INT16_C( 0), -INT16_C( 14713), INT16_C( 15004), INT16_C( 0), INT16_C( 0), INT16_C( 1802), INT16_C( 0), INT16_C( 0), -INT16_C( 31263), INT16_C( 0) } }, { UINT32_C(2322970945), { -INT8_C( 85), -INT8_C( 44), INT8_C( 64), INT8_C( 39), -INT8_C( 99), INT8_C( 16), -INT8_C( 55), -INT8_C( 77), -INT8_C( 59), -INT8_C( 72), INT8_C( 126), INT8_C( 22), INT8_C( 44), -INT8_C( 71), -INT8_C( 31), -INT8_C( 114), INT8_C( 5), -INT8_C( 98), -INT8_C( 58), -INT8_C( 15), -INT8_C( 92), -INT8_C( 44), INT8_C( 75), INT8_C( 59), -INT8_C( 66), INT8_C( 93), INT8_C( 55), -INT8_C( 1), INT8_C( 22), -INT8_C( 83), -INT8_C( 118), -INT8_C( 63), -INT8_C( 127), -INT8_C( 54), -INT8_C( 24), INT8_C( 31), -INT8_C( 38), -INT8_C( 79), -INT8_C( 46), -INT8_C( 97), INT8_C( 106), INT8_C( 80), -INT8_C( 74), -INT8_C( 106), INT8_C( 10), -INT8_C( 105), INT8_C( 36), INT8_C( 15), INT8_C( 54), -INT8_C( 22), INT8_C( 0), -INT8_C( 38), -INT8_C( 65), INT8_C( 75), INT8_C( 21), INT8_C( 125), -INT8_C( 88), INT8_C( 76), INT8_C( 125), -INT8_C( 66), -INT8_C( 7), INT8_C( 7), INT8_MIN, INT8_C( 123) }, { -INT8_C( 47), INT8_C( 104), -INT8_C( 102), -INT8_C( 85), INT8_C( 26), INT8_C( 108), INT8_C( 75), -INT8_C( 124), -INT8_C( 67), INT8_C( 1), INT8_C( 26), -INT8_C( 57), -INT8_C( 104), INT8_C( 62), -INT8_C( 42), -INT8_C( 50), INT8_C( 40), -INT8_C( 42), -INT8_C( 88), -INT8_C( 25), INT8_C( 34), -INT8_C( 67), INT8_C( 101), -INT8_C( 54), INT8_C( 10), -INT8_C( 30), -INT8_C( 119), INT8_C( 3), -INT8_C( 23), INT8_C( 9), INT8_C( 126), -INT8_C( 70), INT8_C( 113), INT8_C( 24), INT8_C( 101), -INT8_C( 117), -INT8_C( 123), -INT8_C( 80), INT8_C( 15), INT8_C( 66), -INT8_C( 79), INT8_C( 41), INT8_C( 9), INT8_C( 74), INT8_C( 103), -INT8_C( 33), INT8_C( 24), -INT8_C( 112), -INT8_C( 75), -INT8_C( 63), INT8_C( 119), -INT8_C( 41), INT8_C( 126), -INT8_C( 36), -INT8_C( 94), -INT8_C( 120), -INT8_C( 66), INT8_C( 43), -INT8_C( 116), -INT8_C( 89), INT8_C( 52), INT8_C( 10), INT8_C( 97), -INT8_C( 91) }, { INT16_C( 14011), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 6894), INT16_C( 0), -INT16_C( 6436), INT16_C( 0), INT16_C( 0), INT16_C( 4389), -INT16_C( 890), -INT16_C( 5780), INT16_C( 0), INT16_C( 3878), INT16_C( 19425), INT16_C( 0), INT16_MIN, INT16_C( 0), -INT16_C( 5094), INT16_C( 12738), -INT16_C( 3953), INT16_C( 0), INT16_C( 0), -INT16_C( 8938), INT16_C( 0), -INT16_C( 16974), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 1223) } }, { UINT32_C(2821834531), { INT8_C( 119), INT8_C( 64), -INT8_C( 22), INT8_C( 41), INT8_C( 106), -INT8_C( 13), INT8_C( 115), -INT8_C( 47), -INT8_C( 46), -INT8_C( 117), INT8_C( 97), -INT8_C( 121), INT8_C( 76), -INT8_C( 39), INT8_C( 95), -INT8_C( 53), -INT8_C( 75), INT8_C( 1), INT8_C( 83), INT8_C( 116), INT8_C( 44), -INT8_C( 33), INT8_C( 27), INT8_C( 96), -INT8_C( 22), INT8_C( 125), INT8_C( 5), INT8_C( 13), INT8_C( 68), INT8_C( 54), -INT8_C( 75), -INT8_C( 69), INT8_C( 119), -INT8_C( 97), -INT8_C( 28), -INT8_C( 31), -INT8_C( 110), INT8_C( 87), -INT8_C( 78), INT8_C( 100), -INT8_C( 29), INT8_C( 20), -INT8_C( 21), INT8_C( 47), -INT8_C( 19), INT8_C( 74), -INT8_C( 6), -INT8_C( 94), INT8_C( 75), INT8_C( 78), INT8_C( 22), INT8_C( 119), INT8_C( 45), INT8_C( 50), -INT8_C( 41), INT8_C( 23), -INT8_C( 81), -INT8_C( 35), INT8_C( 36), -INT8_C( 13), INT8_C( 19), -INT8_C( 39), -INT8_C( 82), -INT8_C( 118) }, { INT8_C( 120), -INT8_C( 109), INT8_C( 107), INT8_C( 10), -INT8_C( 22), INT8_C( 30), INT8_C( 110), -INT8_C( 51), INT8_C( 50), INT8_C( 90), -INT8_C( 3), INT8_C( 31), -INT8_C( 92), -INT8_C( 9), -INT8_C( 63), -INT8_C( 16), INT8_C( 69), -INT8_C( 40), INT8_C( 103), INT8_C( 115), INT8_C( 10), INT8_C( 63), -INT8_C( 118), -INT8_C( 71), INT8_C( 28), -INT8_C( 81), -INT8_C( 84), INT8_C( 47), -INT8_C( 120), INT8_C( 90), -INT8_C( 70), INT8_C( 1), -INT8_C( 19), INT8_C( 37), INT8_C( 11), -INT8_C( 40), INT8_C( 67), INT8_C( 122), -INT8_C( 91), INT8_C( 117), -INT8_C( 44), -INT8_C( 94), -INT8_C( 108), INT8_C( 120), -INT8_C( 102), INT8_C( 86), INT8_C( 104), -INT8_C( 33), INT8_C( 46), -INT8_C( 48), INT8_C( 82), INT8_C( 56), INT8_C( 15), -INT8_C( 35), -INT8_C( 15), INT8_C( 43), -INT8_C( 116), -INT8_C( 99), INT8_C( 90), INT8_C( 20), -INT8_C( 9), INT8_C( 20), INT8_C( 21), -INT8_C( 27) }, { INT16_C( 7304), INT16_C( 25448), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 3894), INT16_C( 0), INT16_C( 0), INT16_C( 12449), INT16_C( 21889), INT16_C( 14489), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 3300), -INT16_C( 12483), INT16_C( 3622), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 11868), -INT16_C( 19740), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 2236), INT16_C( 0), INT16_C( 8100), INT16_C( 0), -INT16_C( 72) } }, { UINT32_C(2109546810), { -INT8_C( 101), INT8_C( 98), -INT8_C( 13), INT8_C( 111), INT8_C( 5), -INT8_C( 121), -INT8_C( 25), -INT8_C( 97), -INT8_C( 35), INT8_C( 80), INT8_C( 126), INT8_C( 11), INT8_C( 32), -INT8_C( 47), INT8_C( 67), INT8_C( 47), -INT8_C( 82), INT8_C( 52), INT8_C( 90), INT8_C( 58), -INT8_C( 47), -INT8_C( 76), INT8_C( 78), -INT8_C( 55), -INT8_C( 55), INT8_C( 100), -INT8_C( 82), INT8_C( 3), -INT8_C( 123), INT8_C( 107), INT8_MIN, INT8_C( 32), -INT8_C( 51), INT8_C( 115), -INT8_C( 113), -INT8_C( 46), -INT8_C( 5), INT8_C( 118), INT8_C( 113), -INT8_C( 40), -INT8_C( 58), -INT8_C( 16), -INT8_C( 28), -INT8_C( 26), -INT8_C( 63), INT8_C( 39), INT8_C( 21), INT8_C( 111), INT8_C( 92), INT8_C( 111), -INT8_C( 87), INT8_C( 45), INT8_C( 36), -INT8_C( 9), -INT8_C( 10), -INT8_C( 19), INT8_C( 91), -INT8_C( 92), -INT8_C( 16), -INT8_C( 32), INT8_C( 15), INT8_C( 112), INT8_C( 0), -INT8_C( 35) }, { -INT8_C( 28), -INT8_C( 113), -INT8_C( 81), -INT8_C( 33), INT8_C( 6), INT8_C( 33), -INT8_C( 73), -INT8_C( 52), INT8_C( 17), -INT8_C( 101), -INT8_C( 77), -INT8_C( 46), -INT8_C( 61), -INT8_C( 56), INT8_C( 65), INT8_C( 31), INT8_C( 56), -INT8_C( 22), INT8_C( 76), INT8_C( 92), -INT8_C( 31), INT8_C( 67), INT8_C( 73), INT8_C( 61), -INT8_C( 25), INT8_C( 57), INT8_C( 29), -INT8_C( 9), -INT8_C( 87), INT8_C( 30), -INT8_C( 44), -INT8_C( 115), -INT8_C( 83), -INT8_C( 125), INT8_C( 108), -INT8_C( 77), -INT8_C( 92), INT8_C( 36), INT8_MIN, -INT8_C( 75), -INT8_C( 65), INT8_C( 51), -INT8_C( 121), -INT8_C( 126), -INT8_C( 5), -INT8_C( 56), -INT8_C( 95), INT8_C( 51), -INT8_C( 78), -INT8_C( 18), -INT8_C( 113), -INT8_C( 108), INT8_C( 49), -INT8_C( 40), -INT8_C( 47), INT8_C( 24), INT8_C( 17), -INT8_C( 18), INT8_C( 15), -INT8_C( 69), INT8_C( 12), -INT8_C( 29), INT8_C( 72), -INT8_C( 70) }, { INT16_C( 0), -INT16_C( 23346), INT16_C( 0), -INT16_C( 25131), -INT16_C( 4323), -INT16_C( 10208), INT16_C( 0), INT16_C( 0), INT16_C( 8600), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 5019), INT16_C( 0), INT16_C( 0), -INT16_C( 31390), INT16_C( 0), -INT16_C( 18844), -INT16_C( 30664), -INT16_C( 630), INT16_MIN, INT16_C( 0), INT16_C( 3666), -INT16_C( 9174), INT16_C( 0), -INT16_C( 8116), -INT16_C( 5874), -INT16_C( 1405), -INT16_C( 11856), -INT16_C( 3068), INT16_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_maskz_maddubs_epi16(test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask32 k = simde_test_x86_random_mmask32(); simde__m512i a = simde_test_x86_random_i8x64(); simde__m512i b = simde_test_x86_random_i8x64(); simde__m512i r = simde_mm512_maskz_maddubs_epi16(k, a, b); simde_test_x86_write_mmask32(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x64(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_maddubs_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_maddubs_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_maddubs_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_maddubs_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maddubs_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_maddubs_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_maddubs_epi16) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/max.c000066400000000000000000015433161400333146700162220ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN max #include #include #include static int test_simde_mm512_max_epi8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int8_t a[64]; const int8_t b[64]; const int8_t r[64]; } test_vec[] = { { { INT8_C( 99), INT8_C( 57), INT8_C( 67), -INT8_C( 9), -INT8_C( 6), INT8_C( 33), -INT8_C( 124), INT8_C( 36), INT8_C( 33), INT8_C( 54), -INT8_C( 88), -INT8_C( 42), -INT8_C( 2), INT8_C( 100), -INT8_C( 20), -INT8_C( 26), INT8_C( 12), INT8_C( 68), -INT8_C( 19), INT8_C( 5), INT8_C( 93), -INT8_C( 21), INT8_MAX, INT8_C( 103), INT8_C( 108), INT8_C( 29), INT8_C( 35), -INT8_C( 11), INT8_C( 48), INT8_C( 37), -INT8_C( 11), -INT8_C( 108), INT8_C( 94), INT8_C( 56), -INT8_C( 117), INT8_C( 88), INT8_C( 89), INT8_C( 15), INT8_C( 124), INT8_C( 122), INT8_C( 69), INT8_C( 37), INT8_C( 80), INT8_C( 67), -INT8_C( 119), INT8_C( 60), INT8_C( 42), -INT8_C( 107), INT8_MIN, INT8_C( 23), -INT8_C( 102), -INT8_C( 34), INT8_C( 2), INT8_C( 26), INT8_C( 69), INT8_C( 111), INT8_C( 55), INT8_C( 104), INT8_C( 100), INT8_C( 104), -INT8_C( 114), INT8_C( 89), -INT8_C( 4), -INT8_C( 20) }, { -INT8_C( 111), -INT8_C( 121), INT8_C( 69), -INT8_C( 22), -INT8_C( 106), -INT8_C( 63), INT8_C( 101), -INT8_C( 37), -INT8_C( 26), -INT8_C( 75), INT8_C( 31), INT8_C( 111), -INT8_C( 14), INT8_C( 73), INT8_C( 4), INT8_C( 114), INT8_C( 96), -INT8_C( 97), INT8_C( 80), INT8_C( 98), -INT8_C( 71), -INT8_C( 106), -INT8_C( 47), -INT8_C( 16), -INT8_C( 2), INT8_C( 54), INT8_C( 88), -INT8_C( 116), -INT8_C( 113), INT8_C( 84), INT8_C( 121), INT8_C( 33), -INT8_C( 37), -INT8_C( 66), INT8_C( 11), INT8_C( 113), INT8_MAX, INT8_C( 112), INT8_C( 77), INT8_C( 102), INT8_C( 38), INT8_C( 108), -INT8_C( 43), INT8_C( 24), -INT8_C( 75), -INT8_C( 38), -INT8_C( 118), INT8_C( 21), INT8_C( 121), -INT8_C( 37), INT8_C( 119), INT8_C( 50), INT8_C( 113), INT8_C( 73), INT8_C( 34), INT8_C( 111), INT8_MAX, INT8_C( 123), -INT8_C( 4), INT8_C( 14), -INT8_C( 49), INT8_C( 117), INT8_C( 47), -INT8_C( 85) }, { INT8_C( 99), INT8_C( 57), INT8_C( 69), -INT8_C( 9), -INT8_C( 6), INT8_C( 33), INT8_C( 101), INT8_C( 36), INT8_C( 33), INT8_C( 54), INT8_C( 31), INT8_C( 111), -INT8_C( 2), INT8_C( 100), INT8_C( 4), INT8_C( 114), INT8_C( 96), INT8_C( 68), INT8_C( 80), INT8_C( 98), INT8_C( 93), -INT8_C( 21), INT8_MAX, INT8_C( 103), INT8_C( 108), INT8_C( 54), INT8_C( 88), -INT8_C( 11), INT8_C( 48), INT8_C( 84), INT8_C( 121), INT8_C( 33), INT8_C( 94), INT8_C( 56), INT8_C( 11), INT8_C( 113), INT8_MAX, INT8_C( 112), INT8_C( 124), INT8_C( 122), INT8_C( 69), INT8_C( 108), INT8_C( 80), INT8_C( 67), -INT8_C( 75), INT8_C( 60), INT8_C( 42), INT8_C( 21), INT8_C( 121), INT8_C( 23), INT8_C( 119), INT8_C( 50), INT8_C( 113), INT8_C( 73), INT8_C( 69), INT8_C( 111), INT8_MAX, INT8_C( 123), INT8_C( 100), INT8_C( 104), -INT8_C( 49), INT8_C( 117), INT8_C( 47), -INT8_C( 20) } }, { { INT8_C( 51), INT8_C( 59), INT8_C( 28), -INT8_C( 78), -INT8_C( 85), INT8_C( 105), INT8_C( 24), -INT8_C( 47), -INT8_C( 43), -INT8_C( 18), -INT8_C( 23), -INT8_C( 118), -INT8_C( 56), INT8_C( 116), -INT8_C( 97), INT8_C( 65), INT8_C( 79), INT8_C( 23), INT8_C( 115), -INT8_C( 64), INT8_C( 96), -INT8_C( 107), INT8_C( 47), -INT8_C( 33), INT8_C( 16), INT8_C( 43), -INT8_C( 19), -INT8_C( 32), -INT8_C( 96), INT8_C( 29), -INT8_C( 117), -INT8_C( 45), INT8_C( 88), -INT8_C( 89), -INT8_C( 122), INT8_C( 3), INT8_C( 17), -INT8_C( 98), -INT8_C( 43), -INT8_C( 26), -INT8_C( 116), -INT8_C( 66), INT8_C( 113), INT8_C( 84), INT8_C( 50), INT8_C( 16), -INT8_C( 107), -INT8_C( 127), INT8_C( 39), INT8_C( 8), INT8_C( 65), -INT8_C( 121), -INT8_C( 98), INT8_C( 113), INT8_C( 102), -INT8_C( 82), -INT8_C( 100), INT8_C( 84), -INT8_C( 114), INT8_C( 61), INT8_C( 113), INT8_C( 25), INT8_C( 16), -INT8_C( 55) }, { -INT8_C( 63), -INT8_C( 106), -INT8_C( 52), -INT8_C( 46), INT8_C( 53), -INT8_C( 95), -INT8_C( 72), -INT8_C( 63), INT8_C( 96), INT8_C( 41), INT8_C( 22), -INT8_C( 110), INT8_C( 58), -INT8_C( 85), INT8_C( 20), INT8_C( 97), -INT8_C( 76), INT8_C( 85), -INT8_C( 23), INT8_C( 82), -INT8_C( 58), INT8_C( 79), INT8_C( 0), INT8_C( 99), -INT8_C( 93), -INT8_C( 113), -INT8_C( 96), INT8_C( 20), -INT8_C( 88), -INT8_C( 80), -INT8_C( 35), INT8_C( 105), INT8_C( 71), -INT8_C( 86), INT8_C( 59), INT8_C( 124), INT8_C( 75), -INT8_C( 12), INT8_C( 61), -INT8_C( 85), INT8_C( 29), INT8_C( 83), INT8_C( 62), INT8_C( 87), -INT8_C( 1), INT8_C( 82), -INT8_C( 71), -INT8_C( 77), -INT8_C( 89), -INT8_C( 94), INT8_C( 5), INT8_C( 110), -INT8_C( 15), INT8_C( 5), -INT8_C( 47), -INT8_C( 107), -INT8_C( 108), INT8_C( 113), -INT8_C( 87), INT8_C( 61), INT8_C( 33), -INT8_C( 121), -INT8_C( 90), INT8_C( 104) }, { INT8_C( 51), INT8_C( 59), INT8_C( 28), -INT8_C( 46), INT8_C( 53), INT8_C( 105), INT8_C( 24), -INT8_C( 47), INT8_C( 96), INT8_C( 41), INT8_C( 22), -INT8_C( 110), INT8_C( 58), INT8_C( 116), INT8_C( 20), INT8_C( 97), INT8_C( 79), INT8_C( 85), INT8_C( 115), INT8_C( 82), INT8_C( 96), INT8_C( 79), INT8_C( 47), INT8_C( 99), INT8_C( 16), INT8_C( 43), -INT8_C( 19), INT8_C( 20), -INT8_C( 88), INT8_C( 29), -INT8_C( 35), INT8_C( 105), INT8_C( 88), -INT8_C( 86), INT8_C( 59), INT8_C( 124), INT8_C( 75), -INT8_C( 12), INT8_C( 61), -INT8_C( 26), INT8_C( 29), INT8_C( 83), INT8_C( 113), INT8_C( 87), INT8_C( 50), INT8_C( 82), -INT8_C( 71), -INT8_C( 77), INT8_C( 39), INT8_C( 8), INT8_C( 65), INT8_C( 110), -INT8_C( 15), INT8_C( 113), INT8_C( 102), -INT8_C( 82), -INT8_C( 100), INT8_C( 113), -INT8_C( 87), INT8_C( 61), INT8_C( 113), INT8_C( 25), INT8_C( 16), INT8_C( 104) } }, { { INT8_C( 49), -INT8_C( 30), -INT8_C( 28), INT8_C( 124), -INT8_C( 42), INT8_C( 34), INT8_C( 40), -INT8_C( 13), INT8_C( 117), INT8_C( 102), INT8_C( 75), INT8_C( 116), -INT8_C( 72), INT8_C( 4), INT8_C( 39), INT8_C( 95), -INT8_C( 90), INT8_C( 44), -INT8_C( 51), -INT8_C( 105), INT8_C( 50), -INT8_C( 98), INT8_C( 44), -INT8_C( 58), INT8_C( 15), -INT8_C( 42), INT8_C( 3), INT8_C( 49), INT8_C( 93), -INT8_C( 86), -INT8_C( 103), -INT8_C( 114), -INT8_C( 116), INT8_C( 126), INT8_C( 10), INT8_C( 98), -INT8_C( 96), INT8_C( 50), INT8_C( 85), INT8_C( 21), -INT8_C( 104), -INT8_C( 96), -INT8_C( 118), INT8_C( 80), -INT8_C( 92), -INT8_C( 79), -INT8_C( 80), INT8_C( 74), -INT8_C( 34), INT8_C( 125), -INT8_C( 30), INT8_C( 16), INT8_C( 28), INT8_C( 14), -INT8_C( 42), INT8_C( 43), -INT8_C( 28), -INT8_C( 38), INT8_C( 92), INT8_C( 65), -INT8_C( 124), -INT8_C( 10), -INT8_C( 49), INT8_C( 16) }, { INT8_C( 116), -INT8_C( 38), INT8_C( 114), INT8_C( 20), INT8_C( 12), -INT8_C( 57), INT8_C( 41), -INT8_C( 91), INT8_C( 104), -INT8_C( 77), -INT8_C( 11), INT8_C( 12), INT8_C( 101), -INT8_C( 91), INT8_C( 87), INT8_C( 67), INT8_C( 35), INT8_C( 57), INT8_C( 83), INT8_C( 63), INT8_C( 71), INT8_C( 41), INT8_C( 106), INT8_C( 44), INT8_C( 3), -INT8_C( 57), INT8_C( 109), -INT8_C( 121), -INT8_C( 67), INT8_C( 61), -INT8_C( 105), INT8_C( 49), INT8_C( 23), INT8_C( 9), INT8_C( 69), INT8_C( 35), -INT8_C( 47), INT8_C( 110), -INT8_C( 56), INT8_C( 57), INT8_C( 34), -INT8_C( 66), INT8_C( 69), -INT8_C( 121), INT8_C( 99), -INT8_C( 100), -INT8_C( 54), -INT8_C( 122), -INT8_C( 43), INT8_C( 29), -INT8_C( 59), INT8_C( 29), INT8_C( 70), INT8_C( 48), INT8_C( 73), INT8_C( 74), -INT8_C( 9), -INT8_C( 74), -INT8_C( 47), -INT8_C( 76), -INT8_C( 13), INT8_C( 105), -INT8_C( 27), INT8_C( 10) }, { INT8_C( 116), -INT8_C( 30), INT8_C( 114), INT8_C( 124), INT8_C( 12), INT8_C( 34), INT8_C( 41), -INT8_C( 13), INT8_C( 117), INT8_C( 102), INT8_C( 75), INT8_C( 116), INT8_C( 101), INT8_C( 4), INT8_C( 87), INT8_C( 95), INT8_C( 35), INT8_C( 57), INT8_C( 83), INT8_C( 63), INT8_C( 71), INT8_C( 41), INT8_C( 106), INT8_C( 44), INT8_C( 15), -INT8_C( 42), INT8_C( 109), INT8_C( 49), INT8_C( 93), INT8_C( 61), -INT8_C( 103), INT8_C( 49), INT8_C( 23), INT8_C( 126), INT8_C( 69), INT8_C( 98), -INT8_C( 47), INT8_C( 110), INT8_C( 85), INT8_C( 57), INT8_C( 34), -INT8_C( 66), INT8_C( 69), INT8_C( 80), INT8_C( 99), -INT8_C( 79), -INT8_C( 54), INT8_C( 74), -INT8_C( 34), INT8_C( 125), -INT8_C( 30), INT8_C( 29), INT8_C( 70), INT8_C( 48), INT8_C( 73), INT8_C( 74), -INT8_C( 9), -INT8_C( 38), INT8_C( 92), INT8_C( 65), -INT8_C( 13), INT8_C( 105), -INT8_C( 27), INT8_C( 16) } }, { { INT8_C( 114), INT8_C( 42), INT8_C( 46), INT8_C( 67), -INT8_C( 104), -INT8_C( 10), INT8_C( 124), -INT8_C( 70), -INT8_C( 76), -INT8_C( 62), INT8_C( 65), INT8_C( 24), INT8_C( 94), INT8_C( 11), -INT8_C( 98), INT8_C( 52), INT8_C( 40), INT8_C( 100), INT8_C( 81), INT8_C( 111), -INT8_C( 108), -INT8_C( 102), -INT8_C( 71), -INT8_C( 117), INT8_C( 80), -INT8_C( 118), INT8_C( 63), INT8_C( 68), -INT8_C( 13), INT8_C( 36), INT8_C( 78), INT8_C( 102), INT8_C( 78), INT8_C( 124), -INT8_C( 87), -INT8_C( 26), INT8_C( 115), INT8_C( 38), -INT8_C( 95), INT8_C( 39), -INT8_C( 24), -INT8_C( 30), INT8_C( 63), INT8_C( 70), -INT8_C( 18), -INT8_C( 34), INT8_C( 122), INT8_C( 22), INT8_C( 66), -INT8_C( 53), -INT8_C( 123), -INT8_C( 42), INT8_C( 101), INT8_C( 62), INT8_C( 97), -INT8_C( 74), -INT8_C( 55), -INT8_C( 96), -INT8_C( 6), -INT8_C( 68), -INT8_C( 60), INT8_C( 72), INT8_C( 34), INT8_C( 18) }, { -INT8_C( 59), -INT8_C( 52), -INT8_C( 8), INT8_C( 56), -INT8_C( 14), -INT8_C( 103), INT8_C( 95), -INT8_C( 38), INT8_C( 124), -INT8_C( 97), INT8_C( 32), INT8_C( 106), INT8_C( 125), -INT8_C( 101), INT8_MIN, -INT8_C( 65), INT8_C( 102), INT8_C( 6), -INT8_C( 107), -INT8_C( 52), INT8_C( 68), -INT8_C( 10), -INT8_C( 126), INT8_C( 13), -INT8_C( 106), INT8_C( 124), -INT8_C( 54), INT8_C( 90), -INT8_C( 60), -INT8_C( 20), INT8_C( 108), -INT8_C( 119), -INT8_C( 72), INT8_C( 100), -INT8_C( 63), -INT8_C( 86), -INT8_C( 2), INT8_C( 33), -INT8_C( 124), INT8_C( 122), -INT8_C( 64), -INT8_C( 91), -INT8_C( 28), INT8_C( 61), INT8_C( 64), INT8_C( 100), -INT8_C( 4), -INT8_C( 90), INT8_C( 106), -INT8_C( 111), INT8_C( 114), -INT8_C( 81), -INT8_C( 121), -INT8_C( 12), -INT8_C( 68), INT8_C( 29), INT8_C( 112), -INT8_C( 122), INT8_C( 119), INT8_C( 53), INT8_C( 115), -INT8_C( 29), -INT8_C( 66), INT8_C( 43) }, { INT8_C( 114), INT8_C( 42), INT8_C( 46), INT8_C( 67), -INT8_C( 14), -INT8_C( 10), INT8_C( 124), -INT8_C( 38), INT8_C( 124), -INT8_C( 62), INT8_C( 65), INT8_C( 106), INT8_C( 125), INT8_C( 11), -INT8_C( 98), INT8_C( 52), INT8_C( 102), INT8_C( 100), INT8_C( 81), INT8_C( 111), INT8_C( 68), -INT8_C( 10), -INT8_C( 71), INT8_C( 13), INT8_C( 80), INT8_C( 124), INT8_C( 63), INT8_C( 90), -INT8_C( 13), INT8_C( 36), INT8_C( 108), INT8_C( 102), INT8_C( 78), INT8_C( 124), -INT8_C( 63), -INT8_C( 26), INT8_C( 115), INT8_C( 38), -INT8_C( 95), INT8_C( 122), -INT8_C( 24), -INT8_C( 30), INT8_C( 63), INT8_C( 70), INT8_C( 64), INT8_C( 100), INT8_C( 122), INT8_C( 22), INT8_C( 106), -INT8_C( 53), INT8_C( 114), -INT8_C( 42), INT8_C( 101), INT8_C( 62), INT8_C( 97), INT8_C( 29), INT8_C( 112), -INT8_C( 96), INT8_C( 119), INT8_C( 53), INT8_C( 115), INT8_C( 72), INT8_C( 34), INT8_C( 43) } }, { { INT8_C( 71), INT8_MIN, -INT8_C( 42), INT8_C( 69), -INT8_C( 95), INT8_C( 90), -INT8_C( 65), INT8_C( 97), -INT8_C( 1), -INT8_C( 93), -INT8_C( 98), INT8_C( 63), INT8_C( 8), -INT8_C( 102), -INT8_C( 26), INT8_C( 114), INT8_C( 43), INT8_C( 88), INT8_C( 33), -INT8_C( 78), INT8_C( 77), -INT8_C( 34), -INT8_C( 49), -INT8_C( 67), INT8_C( 100), INT8_C( 70), -INT8_C( 14), -INT8_C( 41), INT8_C( 41), -INT8_C( 79), INT8_C( 3), INT8_C( 112), INT8_C( 49), -INT8_C( 39), -INT8_C( 74), -INT8_C( 46), INT8_C( 51), INT8_C( 117), INT8_C( 51), INT8_C( 51), INT8_C( 25), -INT8_C( 47), INT8_C( 114), INT8_C( 33), INT8_C( 107), INT8_C( 88), -INT8_C( 109), -INT8_C( 106), -INT8_C( 79), -INT8_C( 75), INT8_C( 72), -INT8_C( 2), -INT8_C( 109), INT8_C( 23), -INT8_C( 69), -INT8_C( 9), INT8_C( 93), -INT8_C( 82), -INT8_C( 49), -INT8_C( 122), INT8_C( 95), -INT8_C( 46), -INT8_C( 10), -INT8_C( 112) }, { -INT8_C( 85), -INT8_C( 84), INT8_C( 98), -INT8_C( 34), INT8_C( 34), -INT8_C( 107), INT8_C( 17), INT8_C( 59), INT8_C( 102), -INT8_C( 124), INT8_C( 92), -INT8_C( 47), -INT8_C( 36), -INT8_C( 17), INT8_C( 103), -INT8_C( 115), -INT8_C( 92), -INT8_C( 81), -INT8_C( 117), INT8_C( 55), -INT8_C( 58), INT8_C( 71), INT8_C( 47), INT8_C( 35), -INT8_C( 11), -INT8_C( 2), -INT8_C( 87), INT8_C( 84), -INT8_C( 48), -INT8_C( 97), -INT8_C( 28), INT8_C( 123), INT8_C( 76), INT8_C( 70), INT8_C( 89), INT8_C( 110), -INT8_C( 37), INT8_C( 107), -INT8_C( 87), INT8_C( 65), -INT8_C( 17), INT8_C( 5), INT8_C( 18), -INT8_C( 53), -INT8_C( 12), INT8_C( 121), INT8_C( 89), -INT8_C( 103), INT8_C( 40), -INT8_C( 28), -INT8_C( 48), -INT8_C( 18), INT8_C( 43), -INT8_C( 1), INT8_C( 17), INT8_C( 32), -INT8_C( 3), -INT8_C( 70), INT8_C( 116), -INT8_C( 51), INT8_C( 89), INT8_C( 88), INT8_C( 72), -INT8_C( 91) }, { INT8_C( 71), -INT8_C( 84), INT8_C( 98), INT8_C( 69), INT8_C( 34), INT8_C( 90), INT8_C( 17), INT8_C( 97), INT8_C( 102), -INT8_C( 93), INT8_C( 92), INT8_C( 63), INT8_C( 8), -INT8_C( 17), INT8_C( 103), INT8_C( 114), INT8_C( 43), INT8_C( 88), INT8_C( 33), INT8_C( 55), INT8_C( 77), INT8_C( 71), INT8_C( 47), INT8_C( 35), INT8_C( 100), INT8_C( 70), -INT8_C( 14), INT8_C( 84), INT8_C( 41), -INT8_C( 79), INT8_C( 3), INT8_C( 123), INT8_C( 76), INT8_C( 70), INT8_C( 89), INT8_C( 110), INT8_C( 51), INT8_C( 117), INT8_C( 51), INT8_C( 65), INT8_C( 25), INT8_C( 5), INT8_C( 114), INT8_C( 33), INT8_C( 107), INT8_C( 121), INT8_C( 89), -INT8_C( 103), INT8_C( 40), -INT8_C( 28), INT8_C( 72), -INT8_C( 2), INT8_C( 43), INT8_C( 23), INT8_C( 17), INT8_C( 32), INT8_C( 93), -INT8_C( 70), INT8_C( 116), -INT8_C( 51), INT8_C( 95), INT8_C( 88), INT8_C( 72), -INT8_C( 91) } }, { { -INT8_C( 98), -INT8_C( 94), INT8_C( 19), INT8_C( 121), INT8_C( 13), -INT8_C( 68), -INT8_C( 70), -INT8_C( 4), -INT8_C( 63), -INT8_C( 52), -INT8_C( 57), -INT8_C( 74), INT8_C( 69), INT8_C( 32), INT8_C( 79), INT8_C( 109), INT8_C( 5), INT8_C( 31), INT8_C( 91), INT8_C( 48), INT8_C( 31), INT8_C( 108), INT8_C( 81), INT8_C( 28), INT8_C( 38), -INT8_C( 59), -INT8_C( 22), INT8_MIN, INT8_C( 30), INT8_C( 50), INT8_C( 37), -INT8_C( 68), -INT8_C( 44), INT8_C( 57), INT8_C( 54), -INT8_C( 31), -INT8_C( 11), -INT8_C( 16), -INT8_C( 35), -INT8_C( 73), -INT8_C( 67), -INT8_C( 91), INT8_C( 109), INT8_C( 2), -INT8_C( 59), -INT8_C( 68), INT8_C( 112), -INT8_C( 54), -INT8_C( 37), -INT8_C( 53), -INT8_C( 5), -INT8_C( 6), INT8_C( 56), INT8_C( 76), INT8_C( 23), INT8_C( 94), INT8_C( 17), INT8_C( 1), -INT8_C( 34), INT8_C( 47), INT8_C( 51), INT8_C( 4), -INT8_C( 20), INT8_C( 8) }, { INT8_C( 61), INT8_C( 34), -INT8_C( 23), INT8_C( 50), INT8_C( 18), -INT8_C( 57), -INT8_C( 23), -INT8_C( 49), INT8_C( 108), INT8_C( 86), -INT8_C( 46), INT8_C( 49), INT8_C( 18), INT8_C( 66), -INT8_C( 4), -INT8_C( 18), INT8_C( 13), -INT8_C( 9), -INT8_C( 24), INT8_C( 69), INT8_C( 67), -INT8_C( 1), -INT8_C( 92), INT8_C( 84), INT8_C( 0), -INT8_C( 126), -INT8_C( 124), INT8_C( 52), -INT8_C( 122), INT8_C( 112), INT8_C( 60), -INT8_C( 61), -INT8_C( 110), INT8_C( 37), -INT8_C( 10), -INT8_C( 92), -INT8_C( 20), -INT8_C( 33), INT8_C( 116), INT8_C( 88), INT8_C( 54), INT8_C( 70), -INT8_C( 118), INT8_C( 72), -INT8_C( 120), -INT8_C( 122), INT8_C( 54), -INT8_C( 107), INT8_C( 125), INT8_C( 31), -INT8_C( 37), -INT8_C( 64), INT8_C( 30), INT8_MAX, INT8_C( 20), INT8_C( 31), INT8_C( 1), -INT8_C( 104), INT8_C( 83), -INT8_C( 120), INT8_C( 8), -INT8_C( 113), INT8_C( 75), -INT8_C( 102) }, { INT8_C( 61), INT8_C( 34), INT8_C( 19), INT8_C( 121), INT8_C( 18), -INT8_C( 57), -INT8_C( 23), -INT8_C( 4), INT8_C( 108), INT8_C( 86), -INT8_C( 46), INT8_C( 49), INT8_C( 69), INT8_C( 66), INT8_C( 79), INT8_C( 109), INT8_C( 13), INT8_C( 31), INT8_C( 91), INT8_C( 69), INT8_C( 67), INT8_C( 108), INT8_C( 81), INT8_C( 84), INT8_C( 38), -INT8_C( 59), -INT8_C( 22), INT8_C( 52), INT8_C( 30), INT8_C( 112), INT8_C( 60), -INT8_C( 61), -INT8_C( 44), INT8_C( 57), INT8_C( 54), -INT8_C( 31), -INT8_C( 11), -INT8_C( 16), INT8_C( 116), INT8_C( 88), INT8_C( 54), INT8_C( 70), INT8_C( 109), INT8_C( 72), -INT8_C( 59), -INT8_C( 68), INT8_C( 112), -INT8_C( 54), INT8_C( 125), INT8_C( 31), -INT8_C( 5), -INT8_C( 6), INT8_C( 56), INT8_MAX, INT8_C( 23), INT8_C( 94), INT8_C( 17), INT8_C( 1), INT8_C( 83), INT8_C( 47), INT8_C( 51), INT8_C( 4), INT8_C( 75), INT8_C( 8) } }, { { -INT8_C( 76), INT8_C( 65), INT8_C( 63), -INT8_C( 95), INT8_C( 33), -INT8_C( 77), -INT8_C( 7), INT8_C( 87), -INT8_C( 7), -INT8_C( 125), -INT8_C( 97), -INT8_C( 127), INT8_C( 9), -INT8_C( 42), INT8_C( 22), -INT8_C( 122), -INT8_C( 11), -INT8_C( 15), INT8_C( 70), INT8_C( 19), INT8_C( 112), INT8_C( 91), INT8_C( 50), INT8_C( 114), -INT8_C( 13), -INT8_C( 123), -INT8_C( 6), -INT8_C( 4), INT8_C( 20), INT8_C( 69), -INT8_C( 106), -INT8_C( 55), -INT8_C( 121), -INT8_C( 43), INT8_C( 106), -INT8_C( 88), -INT8_C( 120), INT8_C( 99), -INT8_C( 1), -INT8_C( 127), -INT8_C( 25), -INT8_C( 98), INT8_C( 2), -INT8_C( 16), INT8_C( 116), INT8_C( 25), INT8_C( 119), INT8_C( 105), INT8_C( 10), -INT8_C( 67), INT8_C( 125), INT8_C( 123), INT8_C( 24), -INT8_C( 81), -INT8_C( 19), INT8_C( 12), INT8_C( 53), -INT8_C( 25), INT8_C( 8), INT8_C( 73), INT8_C( 44), -INT8_C( 98), INT8_C( 18), -INT8_C( 77) }, { INT8_C( 116), INT8_C( 124), INT8_C( 91), -INT8_C( 4), -INT8_C( 32), INT8_C( 90), INT8_C( 126), -INT8_C( 57), -INT8_C( 7), INT8_MIN, -INT8_C( 73), INT8_C( 109), -INT8_C( 103), INT8_C( 46), -INT8_C( 41), -INT8_C( 92), -INT8_C( 20), INT8_C( 84), INT8_C( 31), INT8_C( 4), INT8_C( 3), INT8_C( 12), INT8_C( 16), INT8_C( 56), -INT8_C( 13), INT8_C( 24), -INT8_C( 126), INT8_C( 31), -INT8_C( 73), -INT8_C( 108), -INT8_C( 45), INT8_C( 43), INT8_C( 17), INT8_C( 46), INT8_C( 39), -INT8_C( 15), -INT8_C( 119), -INT8_C( 91), -INT8_C( 72), -INT8_C( 126), INT8_C( 38), INT8_C( 111), -INT8_C( 17), -INT8_C( 65), -INT8_C( 98), -INT8_C( 58), INT8_C( 99), -INT8_C( 118), INT8_C( 26), -INT8_C( 126), -INT8_C( 114), INT8_C( 30), -INT8_C( 114), -INT8_C( 97), INT8_C( 86), -INT8_C( 127), -INT8_C( 73), -INT8_C( 40), -INT8_C( 95), INT8_C( 110), INT8_C( 109), INT8_C( 116), -INT8_C( 103), INT8_C( 126) }, { INT8_C( 116), INT8_C( 124), INT8_C( 91), -INT8_C( 4), INT8_C( 33), INT8_C( 90), INT8_C( 126), INT8_C( 87), -INT8_C( 7), -INT8_C( 125), -INT8_C( 73), INT8_C( 109), INT8_C( 9), INT8_C( 46), INT8_C( 22), -INT8_C( 92), -INT8_C( 11), INT8_C( 84), INT8_C( 70), INT8_C( 19), INT8_C( 112), INT8_C( 91), INT8_C( 50), INT8_C( 114), -INT8_C( 13), INT8_C( 24), -INT8_C( 6), INT8_C( 31), INT8_C( 20), INT8_C( 69), -INT8_C( 45), INT8_C( 43), INT8_C( 17), INT8_C( 46), INT8_C( 106), -INT8_C( 15), -INT8_C( 119), INT8_C( 99), -INT8_C( 1), -INT8_C( 126), INT8_C( 38), INT8_C( 111), INT8_C( 2), -INT8_C( 16), INT8_C( 116), INT8_C( 25), INT8_C( 119), INT8_C( 105), INT8_C( 26), -INT8_C( 67), INT8_C( 125), INT8_C( 123), INT8_C( 24), -INT8_C( 81), INT8_C( 86), INT8_C( 12), INT8_C( 53), -INT8_C( 25), INT8_C( 8), INT8_C( 110), INT8_C( 109), INT8_C( 116), INT8_C( 18), INT8_C( 126) } }, { { -INT8_C( 94), -INT8_C( 63), INT8_C( 111), INT8_C( 43), INT8_C( 102), INT8_C( 39), -INT8_C( 83), -INT8_C( 116), -INT8_C( 106), -INT8_C( 99), INT8_C( 76), INT8_C( 52), INT8_C( 99), -INT8_C( 81), -INT8_C( 66), INT8_C( 126), INT8_C( 50), INT8_C( 77), -INT8_C( 100), -INT8_C( 64), -INT8_C( 20), -INT8_C( 14), INT8_C( 66), -INT8_C( 93), -INT8_C( 53), -INT8_C( 29), INT8_C( 18), INT8_C( 56), INT8_C( 87), -INT8_C( 85), -INT8_C( 74), -INT8_C( 7), INT8_C( 108), INT8_C( 37), INT8_C( 37), -INT8_C( 45), INT8_C( 76), -INT8_C( 46), INT8_C( 95), -INT8_C( 30), INT8_C( 111), -INT8_C( 85), INT8_C( 23), -INT8_C( 45), INT8_C( 91), -INT8_C( 43), INT8_C( 81), -INT8_C( 115), INT8_C( 34), -INT8_C( 19), INT8_C( 77), INT8_C( 14), -INT8_C( 33), -INT8_C( 113), -INT8_C( 78), -INT8_C( 86), INT8_C( 114), -INT8_C( 60), -INT8_C( 30), -INT8_C( 55), INT8_C( 111), -INT8_C( 104), -INT8_C( 61), -INT8_C( 36) }, { -INT8_C( 67), -INT8_C( 24), -INT8_C( 81), INT8_C( 9), -INT8_C( 70), INT8_C( 14), -INT8_C( 20), INT8_C( 42), -INT8_C( 70), INT8_C( 3), -INT8_C( 3), INT8_C( 21), -INT8_C( 40), INT8_C( 78), -INT8_C( 94), -INT8_C( 5), INT8_C( 59), -INT8_C( 17), INT8_C( 9), INT8_C( 26), INT8_MAX, -INT8_C( 69), -INT8_C( 59), -INT8_C( 15), INT8_MAX, -INT8_C( 89), -INT8_C( 69), -INT8_C( 17), INT8_C( 64), INT8_C( 126), -INT8_C( 53), -INT8_C( 3), INT8_C( 102), INT8_C( 122), INT8_C( 7), INT8_C( 32), -INT8_C( 120), -INT8_C( 13), INT8_C( 74), INT8_C( 66), -INT8_C( 10), INT8_C( 71), INT8_C( 87), -INT8_C( 50), -INT8_C( 107), -INT8_C( 7), -INT8_C( 55), -INT8_C( 48), -INT8_C( 23), -INT8_C( 45), -INT8_C( 21), INT8_C( 104), -INT8_C( 114), -INT8_C( 80), INT8_C( 89), INT8_C( 14), INT8_C( 87), INT8_C( 20), -INT8_C( 3), -INT8_C( 105), -INT8_C( 110), -INT8_C( 56), -INT8_C( 107), -INT8_C( 8) }, { -INT8_C( 67), -INT8_C( 24), INT8_C( 111), INT8_C( 43), INT8_C( 102), INT8_C( 39), -INT8_C( 20), INT8_C( 42), -INT8_C( 70), INT8_C( 3), INT8_C( 76), INT8_C( 52), INT8_C( 99), INT8_C( 78), -INT8_C( 66), INT8_C( 126), INT8_C( 59), INT8_C( 77), INT8_C( 9), INT8_C( 26), INT8_MAX, -INT8_C( 14), INT8_C( 66), -INT8_C( 15), INT8_MAX, -INT8_C( 29), INT8_C( 18), INT8_C( 56), INT8_C( 87), INT8_C( 126), -INT8_C( 53), -INT8_C( 3), INT8_C( 108), INT8_C( 122), INT8_C( 37), INT8_C( 32), INT8_C( 76), -INT8_C( 13), INT8_C( 95), INT8_C( 66), INT8_C( 111), INT8_C( 71), INT8_C( 87), -INT8_C( 45), INT8_C( 91), -INT8_C( 7), INT8_C( 81), -INT8_C( 48), INT8_C( 34), -INT8_C( 19), INT8_C( 77), INT8_C( 104), -INT8_C( 33), -INT8_C( 80), INT8_C( 89), INT8_C( 14), INT8_C( 114), INT8_C( 20), -INT8_C( 3), -INT8_C( 55), INT8_C( 111), -INT8_C( 56), -INT8_C( 61), -INT8_C( 8) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_max_epi8(a, b); simde_test_x86_assert_equal_i8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_max_epi8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int8_t src[64]; const simde__mmask64 k; const int8_t a[64]; const int8_t b[64]; const int8_t r[64]; } test_vec[] = { { { INT8_C( 47), -INT8_C( 108), -INT8_C( 103), INT8_C( 35), INT8_C( 58), -INT8_C( 33), -INT8_C( 38), INT8_C( 76), INT8_C( 90), INT8_C( 55), -INT8_C( 80), INT8_C( 59), INT8_C( 118), INT8_C( 110), INT8_C( 101), INT8_C( 47), -INT8_C( 123), -INT8_C( 34), -INT8_C( 109), INT8_C( 94), -INT8_C( 124), INT8_C( 125), INT8_C( 100), -INT8_C( 88), INT8_C( 110), -INT8_C( 4), -INT8_C( 17), -INT8_C( 46), INT8_C( 96), -INT8_C( 12), -INT8_C( 10), -INT8_C( 113), -INT8_C( 120), -INT8_C( 113), -INT8_C( 78), -INT8_C( 62), INT8_C( 110), -INT8_C( 115), INT8_C( 15), -INT8_C( 55), -INT8_C( 60), -INT8_C( 65), INT8_C( 4), INT8_C( 58), INT8_C( 46), INT8_C( 105), INT8_C( 106), -INT8_C( 77), INT8_C( 72), -INT8_C( 3), INT8_C( 17), -INT8_C( 52), INT8_C( 122), INT8_C( 118), INT8_C( 116), -INT8_C( 24), INT8_C( 114), INT8_C( 100), -INT8_C( 70), -INT8_C( 46), INT8_C( 88), -INT8_C( 79), INT8_C( 97), -INT8_C( 31) }, UINT64_C( 7311790402542179392), { INT8_C( 114), INT8_C( 124), -INT8_C( 97), -INT8_C( 96), -INT8_C( 27), INT8_C( 9), INT8_C( 83), INT8_C( 45), INT8_C( 6), INT8_C( 101), -INT8_C( 7), INT8_MIN, -INT8_C( 37), INT8_C( 110), INT8_C( 104), INT8_C( 77), -INT8_C( 46), INT8_C( 35), INT8_C( 31), INT8_C( 42), -INT8_C( 44), INT8_MIN, INT8_C( 11), INT8_C( 20), -INT8_C( 108), -INT8_C( 81), -INT8_C( 61), INT8_C( 53), INT8_C( 97), INT8_C( 59), -INT8_C( 102), -INT8_C( 45), -INT8_C( 73), INT8_C( 58), INT8_C( 115), -INT8_C( 99), INT8_C( 67), -INT8_C( 57), -INT8_C( 54), INT8_C( 74), INT8_C( 44), -INT8_C( 60), -INT8_C( 54), INT8_C( 7), INT8_C( 50), INT8_C( 51), INT8_C( 84), INT8_C( 4), INT8_C( 86), INT8_C( 115), INT8_C( 46), INT8_C( 42), -INT8_C( 13), INT8_C( 58), INT8_C( 62), -INT8_C( 120), -INT8_C( 23), INT8_C( 2), -INT8_C( 67), INT8_C( 74), INT8_C( 61), INT8_C( 88), INT8_C( 30), -INT8_C( 11) }, { -INT8_C( 110), -INT8_C( 111), -INT8_C( 110), -INT8_C( 43), INT8_C( 88), INT8_C( 92), INT8_C( 31), -INT8_C( 124), INT8_C( 32), -INT8_C( 22), -INT8_C( 117), INT8_C( 82), INT8_C( 29), -INT8_C( 33), INT8_C( 86), INT8_C( 115), INT8_C( 82), -INT8_C( 123), -INT8_C( 99), INT8_C( 70), -INT8_C( 65), -INT8_C( 37), -INT8_C( 50), -INT8_C( 88), -INT8_C( 35), -INT8_C( 117), -INT8_C( 14), INT8_C( 27), -INT8_C( 29), INT8_C( 16), INT8_C( 16), INT8_C( 117), -INT8_C( 94), -INT8_C( 94), INT8_C( 75), -INT8_C( 6), -INT8_C( 2), INT8_C( 106), INT8_MAX, INT8_C( 31), INT8_C( 84), INT8_C( 10), INT8_C( 113), INT8_C( 113), -INT8_C( 22), -INT8_C( 56), -INT8_C( 28), INT8_C( 60), INT8_C( 77), -INT8_C( 127), -INT8_C( 126), INT8_C( 12), INT8_C( 93), INT8_C( 80), -INT8_C( 76), INT8_C( 58), -INT8_C( 36), -INT8_C( 90), INT8_C( 85), -INT8_C( 65), -INT8_C( 73), INT8_C( 101), INT8_C( 53), INT8_C( 89) }, { INT8_C( 47), -INT8_C( 108), -INT8_C( 103), INT8_C( 35), INT8_C( 58), -INT8_C( 33), INT8_C( 83), INT8_C( 76), INT8_C( 90), INT8_C( 55), -INT8_C( 7), INT8_C( 59), INT8_C( 29), INT8_C( 110), INT8_C( 101), INT8_C( 47), INT8_C( 82), INT8_C( 35), -INT8_C( 109), INT8_C( 94), -INT8_C( 124), -INT8_C( 37), INT8_C( 100), INT8_C( 20), -INT8_C( 35), -INT8_C( 81), -INT8_C( 14), INT8_C( 53), INT8_C( 96), INT8_C( 59), -INT8_C( 10), INT8_C( 117), -INT8_C( 73), -INT8_C( 113), -INT8_C( 78), -INT8_C( 62), INT8_C( 110), INT8_C( 106), INT8_C( 15), INT8_C( 74), -INT8_C( 60), INT8_C( 10), INT8_C( 4), INT8_C( 58), INT8_C( 50), INT8_C( 51), INT8_C( 106), INT8_C( 60), INT8_C( 72), -INT8_C( 3), INT8_C( 17), INT8_C( 42), INT8_C( 93), INT8_C( 80), INT8_C( 62), -INT8_C( 24), -INT8_C( 23), INT8_C( 100), INT8_C( 85), -INT8_C( 46), INT8_C( 88), INT8_C( 101), INT8_C( 53), -INT8_C( 31) } }, { { INT8_C( 7), INT8_MIN, INT8_C( 83), INT8_C( 6), -INT8_C( 22), -INT8_C( 46), INT8_C( 37), INT8_C( 63), -INT8_C( 35), -INT8_C( 106), -INT8_C( 80), -INT8_C( 57), INT8_C( 94), -INT8_C( 107), INT8_C( 3), -INT8_C( 85), INT8_C( 22), -INT8_C( 122), -INT8_C( 73), INT8_C( 115), -INT8_C( 42), INT8_C( 107), -INT8_C( 82), -INT8_C( 78), INT8_C( 18), INT8_C( 3), INT8_C( 114), -INT8_C( 55), INT8_C( 105), -INT8_C( 89), INT8_C( 34), INT8_C( 112), INT8_C( 39), INT8_C( 117), INT8_C( 118), INT8_C( 17), INT8_C( 72), -INT8_C( 101), INT8_C( 80), INT8_C( 37), INT8_C( 50), INT8_C( 1), -INT8_C( 20), -INT8_C( 112), -INT8_C( 106), -INT8_C( 17), INT8_C( 60), -INT8_C( 84), INT8_C( 117), -INT8_C( 13), INT8_C( 32), INT8_C( 76), INT8_C( 95), -INT8_C( 50), -INT8_C( 2), INT8_C( 113), -INT8_C( 47), INT8_C( 112), INT8_C( 58), INT8_C( 58), INT8_C( 23), INT8_C( 92), -INT8_C( 85), INT8_C( 62) }, UINT64_C(17239393157654782417), { -INT8_C( 95), INT8_C( 42), INT8_MAX, INT8_C( 55), INT8_C( 26), -INT8_C( 69), -INT8_C( 28), -INT8_C( 113), -INT8_C( 81), INT8_C( 4), -INT8_C( 37), INT8_C( 14), -INT8_C( 46), -INT8_C( 38), INT8_MAX, -INT8_C( 93), INT8_C( 74), -INT8_C( 71), -INT8_C( 34), INT8_C( 98), INT8_C( 21), -INT8_C( 119), -INT8_C( 96), -INT8_C( 26), -INT8_C( 86), -INT8_C( 16), INT8_C( 0), INT8_C( 103), -INT8_C( 111), INT8_C( 62), INT8_C( 86), INT8_C( 50), INT8_C( 105), -INT8_C( 42), INT8_C( 106), -INT8_C( 125), -INT8_C( 111), INT8_C( 78), INT8_C( 18), INT8_C( 64), INT8_C( 82), -INT8_C( 18), INT8_C( 78), INT8_C( 36), -INT8_C( 56), -INT8_C( 51), -INT8_C( 57), INT8_C( 18), -INT8_C( 122), -INT8_C( 91), INT8_C( 116), -INT8_C( 101), INT8_C( 46), INT8_C( 21), -INT8_C( 126), -INT8_C( 39), INT8_C( 5), -INT8_C( 126), INT8_C( 64), -INT8_C( 106), -INT8_C( 64), -INT8_C( 105), -INT8_C( 55), INT8_C( 41) }, { INT8_C( 109), INT8_C( 51), -INT8_C( 84), -INT8_C( 2), -INT8_C( 127), -INT8_C( 65), INT8_C( 63), -INT8_C( 45), -INT8_C( 83), -INT8_C( 115), -INT8_C( 9), INT8_C( 117), INT8_C( 91), -INT8_C( 66), -INT8_C( 121), -INT8_C( 31), INT8_C( 100), -INT8_C( 4), INT8_C( 125), -INT8_C( 110), INT8_C( 17), -INT8_C( 1), INT8_C( 107), INT8_C( 22), -INT8_C( 127), -INT8_C( 84), -INT8_C( 83), INT8_C( 65), INT8_C( 67), INT8_C( 118), INT8_C( 107), -INT8_C( 80), -INT8_C( 87), INT8_C( 23), -INT8_C( 82), INT8_C( 42), -INT8_C( 42), -INT8_C( 19), -INT8_C( 3), -INT8_C( 125), INT8_C( 123), -INT8_C( 12), -INT8_C( 8), -INT8_C( 42), -INT8_C( 78), INT8_MIN, -INT8_C( 73), INT8_C( 22), INT8_C( 124), INT8_C( 52), -INT8_C( 87), -INT8_C( 115), INT8_C( 51), INT8_C( 20), -INT8_C( 93), -INT8_C( 76), -INT8_C( 64), INT8_C( 80), -INT8_C( 10), INT8_C( 3), -INT8_C( 58), INT8_C( 97), -INT8_C( 77), INT8_C( 111) }, { INT8_C( 109), INT8_MIN, INT8_C( 83), INT8_C( 6), INT8_C( 26), -INT8_C( 46), INT8_C( 63), -INT8_C( 45), -INT8_C( 81), -INT8_C( 106), -INT8_C( 80), -INT8_C( 57), INT8_C( 94), -INT8_C( 38), INT8_C( 3), -INT8_C( 85), INT8_C( 22), -INT8_C( 122), -INT8_C( 73), INT8_C( 115), INT8_C( 21), INT8_C( 107), INT8_C( 107), -INT8_C( 78), -INT8_C( 86), INT8_C( 3), INT8_C( 114), INT8_C( 103), INT8_C( 67), -INT8_C( 89), INT8_C( 34), INT8_C( 112), INT8_C( 105), INT8_C( 117), INT8_C( 106), INT8_C( 42), -INT8_C( 42), INT8_C( 78), INT8_C( 80), INT8_C( 64), INT8_C( 50), INT8_C( 1), -INT8_C( 20), -INT8_C( 112), -INT8_C( 106), -INT8_C( 51), INT8_C( 60), INT8_C( 22), INT8_C( 117), INT8_C( 52), INT8_C( 116), -INT8_C( 101), INT8_C( 51), INT8_C( 21), -INT8_C( 2), INT8_C( 113), INT8_C( 5), INT8_C( 80), INT8_C( 64), INT8_C( 3), INT8_C( 23), INT8_C( 97), -INT8_C( 55), INT8_C( 111) } }, { { INT8_C( 120), INT8_C( 98), -INT8_C( 103), INT8_C( 79), INT8_C( 79), -INT8_C( 106), -INT8_C( 46), -INT8_C( 54), -INT8_C( 118), -INT8_C( 53), -INT8_C( 96), INT8_C( 61), INT8_C( 75), INT8_C( 88), INT8_C( 83), -INT8_C( 57), -INT8_C( 116), -INT8_C( 4), INT8_C( 84), -INT8_C( 64), INT8_C( 17), -INT8_C( 9), INT8_C( 116), -INT8_C( 47), INT8_C( 72), INT8_C( 106), -INT8_C( 43), INT8_C( 14), -INT8_C( 53), -INT8_C( 120), INT8_C( 126), INT8_C( 68), -INT8_C( 22), INT8_C( 23), -INT8_C( 109), INT8_C( 58), -INT8_C( 82), INT8_C( 101), INT8_C( 4), INT8_C( 56), INT8_C( 48), -INT8_C( 91), INT8_C( 117), INT8_C( 123), -INT8_C( 3), -INT8_C( 55), INT8_C( 66), -INT8_C( 119), -INT8_C( 59), -INT8_C( 106), INT8_C( 73), -INT8_C( 42), -INT8_C( 114), -INT8_C( 66), -INT8_C( 88), -INT8_C( 42), INT8_C( 40), INT8_C( 125), -INT8_C( 28), -INT8_C( 12), INT8_C( 5), INT8_C( 98), INT8_C( 56), -INT8_C( 16) }, UINT64_C( 7016659003810433914), { -INT8_C( 45), -INT8_C( 42), -INT8_C( 36), -INT8_C( 48), -INT8_C( 97), INT8_C( 31), INT8_C( 90), INT8_C( 100), -INT8_C( 75), -INT8_C( 93), INT8_C( 59), INT8_C( 67), INT8_C( 97), -INT8_C( 29), INT8_C( 25), -INT8_C( 118), INT8_C( 96), -INT8_C( 2), INT8_C( 126), INT8_C( 101), INT8_C( 96), -INT8_C( 74), INT8_C( 85), -INT8_C( 38), -INT8_C( 127), INT8_MAX, INT8_C( 2), -INT8_C( 79), -INT8_C( 82), INT8_C( 99), INT8_C( 18), -INT8_C( 127), INT8_C( 57), -INT8_C( 17), INT8_C( 82), -INT8_C( 40), INT8_C( 14), -INT8_C( 84), INT8_C( 60), -INT8_C( 61), INT8_C( 79), INT8_C( 119), INT8_C( 7), -INT8_C( 79), INT8_C( 90), INT8_C( 32), INT8_C( 59), -INT8_C( 70), INT8_C( 30), -INT8_C( 71), INT8_C( 32), INT8_MAX, INT8_C( 111), INT8_C( 117), INT8_C( 89), -INT8_C( 16), -INT8_C( 11), INT8_C( 92), -INT8_C( 95), -INT8_C( 93), -INT8_C( 65), -INT8_C( 76), INT8_C( 36), -INT8_C( 8) }, { -INT8_C( 93), INT8_C( 118), -INT8_C( 48), -INT8_C( 79), INT8_C( 34), INT8_C( 12), INT8_C( 116), INT8_C( 114), -INT8_C( 124), INT8_C( 123), INT8_C( 35), -INT8_C( 34), -INT8_C( 100), INT8_C( 94), -INT8_C( 103), -INT8_C( 70), INT8_C( 23), -INT8_C( 71), INT8_C( 57), -INT8_C( 122), INT8_C( 46), -INT8_C( 109), INT8_C( 118), INT8_C( 35), -INT8_C( 17), INT8_C( 23), -INT8_C( 58), -INT8_C( 82), -INT8_C( 53), -INT8_C( 21), -INT8_C( 90), INT8_C( 110), INT8_C( 97), INT8_C( 118), INT8_C( 31), -INT8_C( 124), -INT8_C( 126), -INT8_C( 108), -INT8_C( 10), INT8_C( 6), INT8_C( 15), INT8_C( 25), -INT8_C( 27), -INT8_C( 85), INT8_C( 119), INT8_C( 126), INT8_C( 102), -INT8_C( 114), INT8_C( 55), -INT8_C( 97), INT8_C( 20), INT8_C( 101), INT8_C( 50), -INT8_C( 118), -INT8_C( 119), INT8_C( 33), -INT8_C( 95), INT8_C( 79), -INT8_C( 49), INT8_C( 109), INT8_C( 58), INT8_C( 117), -INT8_C( 37), -INT8_C( 100) }, { INT8_C( 120), INT8_C( 118), -INT8_C( 103), -INT8_C( 48), INT8_C( 34), INT8_C( 31), INT8_C( 116), -INT8_C( 54), -INT8_C( 75), INT8_C( 123), -INT8_C( 96), INT8_C( 67), INT8_C( 75), INT8_C( 88), INT8_C( 25), -INT8_C( 70), -INT8_C( 116), -INT8_C( 2), INT8_C( 84), INT8_C( 101), INT8_C( 17), -INT8_C( 74), INT8_C( 116), -INT8_C( 47), INT8_C( 72), INT8_C( 106), -INT8_C( 43), -INT8_C( 79), -INT8_C( 53), INT8_C( 99), INT8_C( 126), INT8_C( 68), -INT8_C( 22), INT8_C( 23), -INT8_C( 109), INT8_C( 58), INT8_C( 14), -INT8_C( 84), INT8_C( 4), INT8_C( 56), INT8_C( 48), INT8_C( 119), INT8_C( 7), -INT8_C( 79), -INT8_C( 3), INT8_C( 126), INT8_C( 66), -INT8_C( 119), -INT8_C( 59), -INT8_C( 106), INT8_C( 73), -INT8_C( 42), -INT8_C( 114), INT8_C( 117), INT8_C( 89), -INT8_C( 42), -INT8_C( 11), INT8_C( 125), -INT8_C( 28), -INT8_C( 12), INT8_C( 5), INT8_C( 117), INT8_C( 36), -INT8_C( 16) } }, { { -INT8_C( 21), -INT8_C( 5), INT8_C( 32), INT8_C( 110), -INT8_C( 113), INT8_C( 22), INT8_C( 116), -INT8_C( 98), INT8_C( 47), INT8_C( 89), INT8_C( 74), -INT8_C( 90), -INT8_C( 41), -INT8_C( 80), INT8_C( 52), INT8_C( 14), INT8_C( 79), INT8_C( 72), INT8_C( 116), -INT8_C( 126), -INT8_C( 46), -INT8_C( 3), -INT8_C( 93), INT8_C( 115), INT8_C( 76), INT8_C( 115), -INT8_C( 32), -INT8_C( 121), -INT8_C( 24), -INT8_C( 68), INT8_C( 35), -INT8_C( 44), -INT8_C( 73), INT8_C( 67), INT8_C( 66), INT8_C( 70), INT8_C( 89), -INT8_C( 74), -INT8_C( 28), -INT8_C( 120), INT8_C( 16), INT8_C( 46), INT8_C( 46), -INT8_C( 25), -INT8_C( 34), INT8_C( 98), -INT8_C( 10), INT8_C( 46), -INT8_C( 86), INT8_C( 106), -INT8_C( 80), INT8_C( 124), INT8_C( 103), INT8_C( 83), -INT8_C( 17), -INT8_C( 77), -INT8_C( 58), -INT8_C( 48), INT8_C( 58), -INT8_C( 81), -INT8_C( 116), INT8_C( 93), -INT8_C( 125), INT8_C( 67) }, UINT64_C(10052436222502618528), { -INT8_C( 100), -INT8_C( 81), INT8_C( 115), INT8_C( 122), INT8_C( 17), INT8_C( 105), -INT8_C( 88), -INT8_C( 69), -INT8_C( 45), INT8_C( 88), INT8_C( 55), INT8_C( 58), -INT8_C( 84), INT8_C( 39), -INT8_C( 19), INT8_C( 114), -INT8_C( 9), INT8_C( 40), INT8_C( 33), -INT8_C( 125), -INT8_C( 123), -INT8_C( 92), -INT8_C( 58), INT8_C( 38), INT8_C( 105), INT8_C( 79), INT8_C( 31), -INT8_C( 27), -INT8_C( 68), -INT8_C( 95), INT8_C( 112), INT8_C( 88), INT8_C( 80), -INT8_C( 29), -INT8_C( 45), INT8_C( 98), INT8_C( 76), INT8_C( 123), INT8_C( 29), INT8_C( 31), -INT8_C( 44), INT8_C( 85), INT8_C( 89), INT8_MIN, INT8_C( 124), INT8_C( 71), -INT8_C( 14), INT8_C( 115), INT8_C( 111), INT8_C( 20), -INT8_C( 10), -INT8_C( 12), -INT8_C( 72), -INT8_C( 68), INT8_C( 26), INT8_C( 34), INT8_C( 11), INT8_C( 58), INT8_C( 7), -INT8_C( 57), -INT8_C( 37), INT8_C( 119), INT8_C( 32), INT8_C( 43) }, { INT8_C( 91), -INT8_C( 13), -INT8_C( 115), -INT8_C( 89), INT8_C( 110), -INT8_C( 85), -INT8_C( 57), INT8_C( 66), INT8_C( 0), INT8_C( 32), -INT8_C( 62), INT8_C( 124), INT8_C( 103), -INT8_C( 75), -INT8_C( 17), -INT8_C( 42), -INT8_C( 55), -INT8_C( 27), -INT8_C( 53), -INT8_C( 127), -INT8_C( 95), -INT8_C( 27), -INT8_C( 93), -INT8_C( 84), INT8_C( 31), -INT8_C( 86), INT8_C( 115), -INT8_C( 6), INT8_C( 34), -INT8_C( 109), INT8_C( 38), INT8_C( 125), -INT8_C( 122), -INT8_C( 77), INT8_C( 36), -INT8_C( 11), INT8_C( 94), -INT8_C( 21), INT8_C( 55), INT8_C( 94), INT8_C( 12), -INT8_C( 6), -INT8_C( 38), INT8_C( 115), -INT8_C( 81), -INT8_C( 55), INT8_C( 74), INT8_C( 120), -INT8_C( 82), INT8_C( 21), -INT8_C( 7), INT8_C( 79), -INT8_C( 6), -INT8_C( 99), -INT8_C( 5), INT8_C( 26), INT8_C( 71), INT8_C( 111), INT8_C( 20), INT8_C( 105), INT8_C( 2), INT8_C( 58), -INT8_C( 26), -INT8_C( 119) }, { -INT8_C( 21), -INT8_C( 5), INT8_C( 32), INT8_C( 110), -INT8_C( 113), INT8_C( 105), INT8_C( 116), INT8_C( 66), INT8_C( 0), INT8_C( 89), INT8_C( 55), -INT8_C( 90), -INT8_C( 41), -INT8_C( 80), -INT8_C( 17), INT8_C( 114), -INT8_C( 9), INT8_C( 72), INT8_C( 116), -INT8_C( 125), -INT8_C( 46), -INT8_C( 3), -INT8_C( 93), INT8_C( 38), INT8_C( 105), INT8_C( 115), -INT8_C( 32), -INT8_C( 6), INT8_C( 34), -INT8_C( 95), INT8_C( 112), INT8_C( 125), INT8_C( 80), -INT8_C( 29), INT8_C( 66), INT8_C( 98), INT8_C( 94), INT8_C( 123), INT8_C( 55), -INT8_C( 120), INT8_C( 12), INT8_C( 46), INT8_C( 89), INT8_C( 115), -INT8_C( 34), INT8_C( 71), INT8_C( 74), INT8_C( 46), INT8_C( 111), INT8_C( 106), -INT8_C( 80), INT8_C( 124), INT8_C( 103), INT8_C( 83), -INT8_C( 17), INT8_C( 34), INT8_C( 71), INT8_C( 111), INT8_C( 58), INT8_C( 105), -INT8_C( 116), INT8_C( 93), -INT8_C( 125), INT8_C( 43) } }, { { -INT8_C( 18), INT8_C( 11), INT8_C( 126), INT8_C( 76), -INT8_C( 10), -INT8_C( 75), -INT8_C( 85), INT8_C( 2), -INT8_C( 81), -INT8_C( 123), INT8_C( 118), INT8_C( 94), INT8_C( 79), -INT8_C( 64), -INT8_C( 42), -INT8_C( 3), -INT8_C( 43), -INT8_C( 48), INT8_C( 77), -INT8_C( 49), INT8_C( 109), INT8_C( 72), -INT8_C( 23), -INT8_C( 76), -INT8_C( 73), -INT8_C( 2), INT8_C( 30), -INT8_C( 70), INT8_C( 56), INT8_C( 4), INT8_C( 67), INT8_C( 38), INT8_C( 15), -INT8_C( 63), INT8_C( 115), INT8_C( 6), INT8_C( 118), INT8_C( 30), INT8_C( 8), INT8_C( 38), -INT8_C( 93), INT8_C( 126), -INT8_C( 124), -INT8_C( 14), INT8_C( 62), INT8_C( 91), -INT8_C( 16), INT8_C( 19), INT8_C( 43), INT8_C( 61), -INT8_C( 29), -INT8_C( 104), -INT8_C( 123), -INT8_C( 52), INT8_C( 76), INT8_C( 61), -INT8_C( 54), INT8_C( 106), -INT8_C( 9), INT8_C( 3), INT8_C( 111), INT8_C( 58), INT8_C( 41), INT8_C( 126) }, UINT64_C( 6816072392956484859), { INT8_C( 11), INT8_C( 28), INT8_C( 80), INT8_C( 74), INT8_C( 119), INT8_C( 64), INT8_C( 93), -INT8_C( 94), INT8_C( 125), INT8_C( 64), INT8_C( 58), INT8_C( 3), INT8_C( 13), -INT8_C( 122), INT8_C( 64), -INT8_C( 41), -INT8_C( 15), INT8_C( 55), -INT8_C( 38), INT8_C( 96), INT8_C( 113), INT8_C( 4), -INT8_C( 34), INT8_C( 108), -INT8_C( 96), INT8_C( 99), -INT8_C( 35), INT8_C( 91), -INT8_C( 16), INT8_C( 117), -INT8_C( 71), -INT8_C( 5), -INT8_C( 111), INT8_C( 9), INT8_C( 69), INT8_C( 8), INT8_C( 74), -INT8_C( 93), -INT8_C( 86), -INT8_C( 57), -INT8_C( 29), -INT8_C( 28), -INT8_C( 54), -INT8_C( 16), INT8_C( 106), INT8_C( 10), -INT8_C( 56), INT8_C( 91), INT8_C( 65), -INT8_C( 94), -INT8_C( 69), -INT8_C( 78), -INT8_C( 90), -INT8_C( 102), INT8_C( 30), INT8_C( 71), -INT8_C( 3), -INT8_C( 4), -INT8_C( 94), -INT8_C( 19), INT8_C( 113), INT8_C( 91), -INT8_C( 24), INT8_C( 2) }, { INT8_C( 100), INT8_C( 46), INT8_C( 10), -INT8_C( 82), -INT8_C( 47), -INT8_C( 76), INT8_C( 118), -INT8_C( 76), -INT8_C( 104), INT8_C( 64), -INT8_C( 91), INT8_C( 2), INT8_C( 75), INT8_C( 109), INT8_C( 94), -INT8_C( 116), INT8_C( 15), INT8_C( 25), INT8_C( 63), -INT8_C( 74), -INT8_C( 77), INT8_C( 93), -INT8_C( 3), -INT8_C( 80), INT8_C( 89), -INT8_C( 97), -INT8_C( 99), -INT8_C( 54), -INT8_C( 6), -INT8_C( 122), -INT8_C( 52), INT8_C( 94), -INT8_C( 76), -INT8_C( 42), INT8_C( 13), -INT8_C( 123), -INT8_C( 118), -INT8_C( 125), INT8_C( 57), INT8_C( 34), -INT8_C( 61), -INT8_C( 34), INT8_C( 37), INT8_C( 14), INT8_C( 75), -INT8_C( 125), -INT8_C( 101), INT8_C( 91), -INT8_C( 100), -INT8_C( 38), INT8_C( 17), INT8_C( 80), INT8_C( 55), INT8_C( 14), INT8_C( 0), -INT8_C( 111), -INT8_C( 83), -INT8_C( 98), INT8_C( 91), -INT8_C( 89), INT8_C( 36), INT8_C( 40), INT8_C( 5), -INT8_C( 40) }, { INT8_C( 100), INT8_C( 46), INT8_C( 126), INT8_C( 74), INT8_C( 119), INT8_C( 64), INT8_C( 118), -INT8_C( 76), -INT8_C( 81), -INT8_C( 123), INT8_C( 58), INT8_C( 3), INT8_C( 75), -INT8_C( 64), -INT8_C( 42), -INT8_C( 41), -INT8_C( 43), -INT8_C( 48), INT8_C( 63), -INT8_C( 49), INT8_C( 109), INT8_C( 72), -INT8_C( 23), INT8_C( 108), INT8_C( 89), -INT8_C( 2), INT8_C( 30), -INT8_C( 70), -INT8_C( 6), INT8_C( 117), -INT8_C( 52), INT8_C( 38), INT8_C( 15), INT8_C( 9), INT8_C( 115), INT8_C( 8), INT8_C( 74), -INT8_C( 93), INT8_C( 8), INT8_C( 34), -INT8_C( 29), INT8_C( 126), INT8_C( 37), INT8_C( 14), INT8_C( 62), INT8_C( 91), -INT8_C( 16), INT8_C( 91), INT8_C( 65), -INT8_C( 38), INT8_C( 17), -INT8_C( 104), INT8_C( 55), -INT8_C( 52), INT8_C( 76), INT8_C( 71), -INT8_C( 54), -INT8_C( 4), INT8_C( 91), -INT8_C( 19), INT8_C( 113), INT8_C( 58), INT8_C( 5), INT8_C( 126) } }, { { -INT8_C( 2), INT8_C( 18), INT8_C( 93), -INT8_C( 119), -INT8_C( 107), -INT8_C( 106), -INT8_C( 85), INT8_C( 89), INT8_C( 117), -INT8_C( 48), INT8_C( 103), -INT8_C( 64), INT8_C( 83), INT8_C( 2), INT8_C( 27), -INT8_C( 16), -INT8_C( 36), INT8_C( 44), INT8_C( 64), INT8_C( 20), INT8_C( 58), INT8_C( 64), -INT8_C( 91), -INT8_C( 25), -INT8_C( 34), INT8_C( 0), -INT8_C( 114), INT8_C( 2), INT8_C( 40), -INT8_C( 108), -INT8_C( 38), INT8_C( 39), -INT8_C( 90), INT8_C( 55), -INT8_C( 80), INT8_C( 60), -INT8_C( 50), INT8_C( 91), -INT8_C( 107), INT8_C( 67), INT8_C( 44), -INT8_C( 4), INT8_C( 3), INT8_MAX, -INT8_C( 1), INT8_C( 31), INT8_C( 111), -INT8_C( 37), INT8_C( 75), -INT8_C( 81), -INT8_C( 17), -INT8_C( 122), -INT8_C( 16), -INT8_C( 108), INT8_C( 109), -INT8_C( 50), -INT8_C( 107), -INT8_C( 4), -INT8_C( 47), -INT8_C( 67), -INT8_C( 112), -INT8_C( 85), -INT8_C( 28), INT8_C( 54) }, UINT64_C( 2086301257730004195), { INT8_C( 4), -INT8_C( 9), -INT8_C( 101), INT8_C( 3), INT8_C( 22), INT8_C( 11), -INT8_C( 34), INT8_C( 98), -INT8_C( 70), -INT8_C( 50), -INT8_C( 24), -INT8_C( 86), INT8_C( 98), INT8_C( 85), INT8_C( 121), -INT8_C( 9), INT8_C( 81), INT8_C( 74), -INT8_C( 75), -INT8_C( 31), -INT8_C( 11), -INT8_C( 103), INT8_C( 24), -INT8_C( 40), INT8_C( 46), -INT8_C( 118), -INT8_C( 119), INT8_C( 30), -INT8_C( 110), INT8_C( 125), INT8_C( 58), -INT8_C( 106), INT8_C( 117), -INT8_C( 43), -INT8_C( 103), -INT8_C( 117), -INT8_C( 32), INT8_C( 119), -INT8_C( 19), -INT8_C( 101), INT8_C( 69), -INT8_C( 43), INT8_C( 69), -INT8_C( 88), INT8_C( 43), -INT8_C( 66), -INT8_C( 97), INT8_C( 124), INT8_C( 8), INT8_C( 84), INT8_C( 94), -INT8_C( 2), -INT8_C( 18), INT8_C( 118), -INT8_C( 42), INT8_C( 28), INT8_C( 0), INT8_C( 96), INT8_C( 58), -INT8_C( 110), -INT8_C( 35), INT8_C( 116), INT8_C( 40), INT8_C( 82) }, { INT8_C( 73), -INT8_C( 63), -INT8_C( 34), INT8_C( 42), INT8_C( 57), -INT8_C( 53), -INT8_C( 59), INT8_C( 126), -INT8_C( 95), INT8_C( 10), INT8_C( 38), -INT8_C( 52), -INT8_C( 55), -INT8_C( 58), INT8_C( 72), -INT8_C( 47), INT8_C( 26), -INT8_C( 90), -INT8_C( 49), INT8_C( 8), INT8_C( 28), -INT8_C( 90), INT8_C( 36), INT8_C( 29), INT8_C( 6), INT8_C( 94), -INT8_C( 81), -INT8_C( 29), -INT8_C( 46), -INT8_C( 40), INT8_C( 54), INT8_C( 28), -INT8_C( 103), INT8_C( 20), INT8_C( 70), -INT8_C( 46), -INT8_C( 33), INT8_C( 11), INT8_C( 81), INT8_MIN, INT8_C( 21), INT8_C( 119), INT8_C( 76), -INT8_C( 34), INT8_C( 61), -INT8_C( 107), -INT8_C( 80), INT8_C( 88), INT8_C( 59), INT8_MAX, INT8_C( 96), INT8_C( 88), INT8_C( 37), -INT8_C( 123), INT8_C( 117), INT8_C( 43), -INT8_C( 29), INT8_C( 36), INT8_C( 15), -INT8_C( 74), -INT8_C( 4), INT8_C( 69), -INT8_C( 46), -INT8_C( 106) }, { INT8_C( 73), -INT8_C( 9), INT8_C( 93), -INT8_C( 119), -INT8_C( 107), INT8_C( 11), -INT8_C( 34), INT8_C( 126), INT8_C( 117), -INT8_C( 48), INT8_C( 38), -INT8_C( 64), INT8_C( 98), INT8_C( 2), INT8_C( 27), -INT8_C( 9), -INT8_C( 36), INT8_C( 74), INT8_C( 64), INT8_C( 20), INT8_C( 28), -INT8_C( 90), INT8_C( 36), -INT8_C( 25), INT8_C( 46), INT8_C( 0), -INT8_C( 114), INT8_C( 2), -INT8_C( 46), INT8_C( 125), -INT8_C( 38), INT8_C( 28), -INT8_C( 90), INT8_C( 55), -INT8_C( 80), INT8_C( 60), -INT8_C( 32), INT8_C( 119), INT8_C( 81), -INT8_C( 101), INT8_C( 69), INT8_C( 119), INT8_C( 76), INT8_MAX, -INT8_C( 1), INT8_C( 31), INT8_C( 111), -INT8_C( 37), INT8_C( 75), -INT8_C( 81), INT8_C( 96), -INT8_C( 122), INT8_C( 37), INT8_C( 118), INT8_C( 117), INT8_C( 43), -INT8_C( 107), -INT8_C( 4), INT8_C( 58), -INT8_C( 74), -INT8_C( 4), -INT8_C( 85), -INT8_C( 28), INT8_C( 54) } }, { { INT8_C( 89), INT8_C( 24), INT8_C( 104), INT8_C( 56), INT8_C( 35), -INT8_C( 71), -INT8_C( 71), INT8_C( 56), INT8_C( 49), INT8_C( 5), INT8_C( 23), INT8_C( 110), -INT8_C( 102), -INT8_C( 57), -INT8_C( 58), -INT8_C( 42), INT8_C( 70), INT8_C( 39), INT8_C( 46), INT8_C( 108), -INT8_C( 84), -INT8_C( 93), -INT8_C( 105), -INT8_C( 113), -INT8_C( 57), -INT8_C( 90), INT8_C( 69), -INT8_C( 60), -INT8_C( 21), INT8_C( 23), INT8_C( 90), INT8_C( 68), INT8_C( 47), -INT8_C( 62), INT8_C( 125), INT8_C( 82), INT8_C( 124), INT8_C( 54), -INT8_C( 117), -INT8_C( 83), INT8_C( 59), -INT8_C( 94), INT8_C( 27), -INT8_C( 42), INT8_C( 105), -INT8_C( 30), -INT8_C( 84), -INT8_C( 81), INT8_C( 9), -INT8_C( 38), INT8_C( 27), -INT8_C( 75), INT8_C( 125), -INT8_C( 77), INT8_C( 68), INT8_C( 68), INT8_C( 89), -INT8_C( 118), INT8_C( 8), INT8_C( 69), -INT8_C( 95), INT8_C( 98), -INT8_C( 119), -INT8_C( 47) }, UINT64_C( 8669057908159481381), { INT8_C( 80), INT8_C( 105), INT8_C( 78), -INT8_C( 71), INT8_C( 75), -INT8_C( 6), INT8_C( 105), INT8_C( 84), -INT8_C( 44), -INT8_C( 124), INT8_C( 9), INT8_C( 81), INT8_C( 55), INT8_C( 78), -INT8_C( 107), -INT8_C( 111), -INT8_C( 40), -INT8_C( 98), -INT8_C( 42), INT8_C( 121), INT8_C( 0), INT8_C( 95), INT8_C( 74), INT8_C( 37), INT8_C( 102), INT8_C( 110), -INT8_C( 58), -INT8_C( 94), INT8_C( 28), INT8_C( 20), INT8_C( 26), INT8_C( 109), INT8_C( 126), INT8_C( 104), INT8_C( 38), -INT8_C( 55), INT8_C( 98), -INT8_C( 113), INT8_C( 30), INT8_C( 54), INT8_C( 20), INT8_C( 39), -INT8_C( 121), INT8_C( 75), INT8_C( 117), INT8_C( 29), -INT8_C( 36), INT8_C( 77), -INT8_C( 69), -INT8_C( 78), -INT8_C( 57), -INT8_C( 69), INT8_C( 18), INT8_C( 17), -INT8_C( 31), INT8_C( 120), INT8_MAX, -INT8_C( 89), INT8_C( 26), -INT8_C( 100), -INT8_C( 68), INT8_C( 53), INT8_C( 9), INT8_C( 58) }, { -INT8_C( 99), INT8_C( 47), INT8_C( 3), INT8_C( 0), -INT8_C( 65), INT8_C( 33), INT8_C( 54), -INT8_C( 45), INT8_C( 73), -INT8_C( 66), INT8_C( 30), -INT8_C( 66), -INT8_C( 37), -INT8_C( 5), INT8_C( 12), -INT8_C( 106), -INT8_C( 83), -INT8_C( 45), INT8_C( 81), -INT8_C( 65), -INT8_C( 28), INT8_C( 50), INT8_C( 55), INT8_C( 100), -INT8_C( 38), INT8_C( 82), INT8_C( 0), -INT8_C( 106), -INT8_C( 121), INT8_C( 9), -INT8_C( 48), INT8_C( 36), INT8_C( 56), -INT8_C( 45), INT8_C( 36), -INT8_C( 9), -INT8_C( 11), INT8_C( 91), -INT8_C( 54), INT8_C( 62), INT8_C( 25), -INT8_C( 23), -INT8_C( 4), -INT8_C( 12), -INT8_C( 28), INT8_C( 8), -INT8_C( 118), -INT8_C( 111), -INT8_C( 37), -INT8_C( 37), INT8_C( 81), -INT8_C( 64), INT8_C( 14), -INT8_C( 120), INT8_C( 36), -INT8_C( 24), -INT8_C( 38), INT8_C( 36), INT8_C( 126), INT8_C( 97), INT8_C( 45), INT8_C( 78), -INT8_C( 122), INT8_C( 101) }, { INT8_C( 80), INT8_C( 24), INT8_C( 78), INT8_C( 56), INT8_C( 35), INT8_C( 33), -INT8_C( 71), INT8_C( 56), INT8_C( 49), -INT8_C( 66), INT8_C( 30), INT8_C( 110), -INT8_C( 102), -INT8_C( 57), -INT8_C( 58), -INT8_C( 42), -INT8_C( 40), -INT8_C( 45), INT8_C( 46), INT8_C( 108), -INT8_C( 84), INT8_C( 95), -INT8_C( 105), -INT8_C( 113), INT8_C( 102), -INT8_C( 90), INT8_C( 69), -INT8_C( 60), -INT8_C( 21), INT8_C( 20), INT8_C( 90), INT8_C( 109), INT8_C( 47), -INT8_C( 62), INT8_C( 38), -INT8_C( 9), INT8_C( 98), INT8_C( 91), -INT8_C( 117), -INT8_C( 83), INT8_C( 59), INT8_C( 39), -INT8_C( 4), INT8_C( 75), INT8_C( 105), INT8_C( 29), -INT8_C( 84), INT8_C( 77), INT8_C( 9), -INT8_C( 37), INT8_C( 81), -INT8_C( 64), INT8_C( 125), -INT8_C( 77), INT8_C( 36), INT8_C( 68), INT8_C( 89), -INT8_C( 118), INT8_C( 8), INT8_C( 97), INT8_C( 45), INT8_C( 78), INT8_C( 9), -INT8_C( 47) } }, { { INT8_C( 33), -INT8_C( 86), INT8_C( 93), INT8_C( 22), INT8_C( 5), INT8_C( 39), INT8_C( 84), INT8_C( 30), INT8_C( 16), INT8_C( 81), INT8_C( 18), -INT8_C( 12), INT8_C( 89), -INT8_C( 100), -INT8_C( 122), INT8_C( 53), INT8_C( 120), -INT8_C( 41), -INT8_C( 11), -INT8_C( 122), INT8_C( 95), INT8_C( 25), INT8_C( 110), INT8_C( 58), INT8_C( 61), -INT8_C( 20), -INT8_C( 101), INT8_C( 106), INT8_C( 58), INT8_C( 33), -INT8_C( 49), INT8_C( 91), -INT8_C( 52), INT8_C( 44), INT8_C( 114), -INT8_C( 47), INT8_C( 84), -INT8_C( 58), -INT8_C( 16), INT8_C( 100), INT8_C( 23), INT8_C( 2), INT8_C( 89), INT8_C( 113), -INT8_C( 97), -INT8_C( 33), -INT8_C( 90), INT8_C( 23), -INT8_C( 74), -INT8_C( 101), -INT8_C( 99), INT8_C( 21), -INT8_C( 76), INT8_C( 11), INT8_C( 79), -INT8_C( 15), -INT8_C( 9), -INT8_C( 21), INT8_C( 91), INT8_C( 49), INT8_C( 12), INT8_C( 42), -INT8_C( 116), -INT8_C( 40) }, UINT64_C(15857062986774150743), { -INT8_C( 100), INT8_C( 104), INT8_C( 77), INT8_C( 59), INT8_C( 71), -INT8_C( 13), INT8_C( 82), -INT8_C( 3), -INT8_C( 114), -INT8_C( 17), INT8_C( 19), INT8_C( 66), -INT8_C( 6), INT8_C( 98), INT8_C( 51), -INT8_C( 15), INT8_C( 77), -INT8_C( 114), INT8_C( 34), INT8_C( 90), -INT8_C( 71), -INT8_C( 81), INT8_C( 50), INT8_C( 16), -INT8_C( 83), -INT8_C( 36), -INT8_C( 69), INT8_C( 114), INT8_C( 118), -INT8_C( 54), INT8_C( 79), INT8_C( 19), INT8_C( 51), -INT8_C( 100), INT8_C( 78), INT8_C( 122), -INT8_C( 112), -INT8_C( 95), INT8_C( 120), INT8_C( 30), -INT8_C( 112), -INT8_C( 117), INT8_C( 97), -INT8_C( 117), -INT8_C( 19), -INT8_C( 108), INT8_C( 124), INT8_C( 59), INT8_C( 35), -INT8_C( 97), -INT8_C( 107), -INT8_C( 36), INT8_C( 78), -INT8_C( 57), -INT8_C( 20), -INT8_C( 5), -INT8_C( 92), -INT8_C( 89), INT8_C( 110), INT8_C( 26), INT8_C( 113), -INT8_C( 67), INT8_C( 45), -INT8_C( 92) }, { INT8_C( 89), INT8_C( 124), INT8_C( 31), -INT8_C( 23), INT8_C( 29), -INT8_C( 105), INT8_C( 8), -INT8_C( 83), INT8_C( 34), INT8_C( 105), INT8_C( 56), INT8_C( 15), -INT8_C( 3), -INT8_C( 75), INT8_C( 74), INT8_C( 32), INT8_C( 84), -INT8_C( 33), -INT8_C( 4), -INT8_C( 94), -INT8_C( 89), -INT8_C( 24), -INT8_C( 99), INT8_C( 75), -INT8_C( 113), INT8_C( 11), INT8_C( 101), INT8_C( 1), -INT8_C( 56), -INT8_C( 109), -INT8_C( 91), INT8_C( 34), INT8_C( 15), -INT8_C( 60), INT8_C( 11), INT8_C( 44), INT8_C( 91), INT8_C( 19), -INT8_C( 39), INT8_C( 125), INT8_C( 124), INT8_C( 18), -INT8_C( 115), INT8_C( 122), -INT8_C( 57), -INT8_C( 41), -INT8_C( 102), INT8_C( 27), -INT8_C( 73), -INT8_C( 105), -INT8_C( 67), INT8_C( 94), INT8_MAX, INT8_C( 90), -INT8_C( 87), INT8_C( 15), INT8_C( 102), INT8_C( 14), INT8_C( 16), INT8_C( 46), -INT8_C( 95), -INT8_C( 75), INT8_C( 80), -INT8_C( 80) }, { INT8_C( 89), INT8_C( 124), INT8_C( 77), INT8_C( 22), INT8_C( 71), INT8_C( 39), INT8_C( 82), INT8_C( 30), INT8_C( 16), INT8_C( 105), INT8_C( 56), INT8_C( 66), -INT8_C( 3), INT8_C( 98), INT8_C( 74), INT8_C( 32), INT8_C( 120), -INT8_C( 33), -INT8_C( 11), INT8_C( 90), INT8_C( 95), -INT8_C( 24), INT8_C( 110), INT8_C( 75), -INT8_C( 83), INT8_C( 11), -INT8_C( 101), INT8_C( 114), INT8_C( 58), -INT8_C( 54), -INT8_C( 49), INT8_C( 34), INT8_C( 51), INT8_C( 44), INT8_C( 78), -INT8_C( 47), INT8_C( 84), -INT8_C( 58), INT8_C( 120), INT8_C( 125), INT8_C( 23), INT8_C( 18), INT8_C( 89), INT8_C( 122), -INT8_C( 19), -INT8_C( 33), -INT8_C( 90), INT8_C( 59), INT8_C( 35), -INT8_C( 97), -INT8_C( 67), INT8_C( 94), -INT8_C( 76), INT8_C( 11), INT8_C( 79), -INT8_C( 15), -INT8_C( 9), -INT8_C( 21), INT8_C( 110), INT8_C( 46), INT8_C( 113), INT8_C( 42), INT8_C( 80), -INT8_C( 80) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi8(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_mask_max_epi8(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_max_epi8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask64 k; const int8_t a[64]; const int8_t b[64]; const int8_t r[64]; } test_vec[] = { { UINT64_C(15100473841603180603), { -INT8_C( 76), -INT8_C( 30), INT8_C( 8), INT8_C( 48), INT8_C( 20), -INT8_C( 118), -INT8_C( 64), -INT8_C( 125), -INT8_C( 42), -INT8_C( 8), -INT8_C( 20), INT8_C( 95), INT8_C( 50), -INT8_C( 41), -INT8_C( 18), -INT8_C( 33), -INT8_C( 10), -INT8_C( 104), INT8_C( 32), -INT8_C( 105), INT8_C( 107), INT8_C( 117), -INT8_C( 9), -INT8_C( 90), INT8_C( 109), -INT8_C( 52), -INT8_C( 86), INT8_C( 97), INT8_C( 116), INT8_C( 58), INT8_C( 50), INT8_C( 40), INT8_C( 28), INT8_C( 58), INT8_C( 89), INT8_C( 48), -INT8_C( 59), INT8_C( 25), -INT8_C( 77), -INT8_C( 101), INT8_C( 17), -INT8_C( 96), -INT8_C( 5), INT8_C( 68), INT8_C( 119), -INT8_C( 23), INT8_C( 35), INT8_C( 110), -INT8_C( 127), INT8_C( 67), INT8_C( 5), -INT8_C( 19), -INT8_C( 72), -INT8_C( 4), -INT8_C( 109), INT8_C( 37), -INT8_C( 56), INT8_C( 62), -INT8_C( 122), INT8_C( 61), INT8_C( 120), -INT8_C( 72), INT8_C( 101), -INT8_C( 108) }, { -INT8_C( 14), -INT8_C( 66), -INT8_C( 60), -INT8_C( 73), -INT8_C( 41), INT8_C( 120), INT8_C( 83), -INT8_C( 23), INT8_C( 24), INT8_C( 78), INT8_C( 45), -INT8_C( 113), INT8_C( 55), INT8_C( 80), -INT8_C( 3), -INT8_C( 71), -INT8_C( 109), INT8_C( 2), -INT8_C( 90), INT8_C( 75), -INT8_C( 2), INT8_C( 57), INT8_C( 112), -INT8_C( 57), INT8_C( 119), -INT8_C( 10), INT8_C( 4), -INT8_C( 17), -INT8_C( 82), INT8_C( 105), -INT8_C( 125), -INT8_C( 96), INT8_C( 40), INT8_C( 72), INT8_C( 88), -INT8_C( 1), -INT8_C( 64), -INT8_C( 85), -INT8_C( 24), -INT8_C( 40), -INT8_C( 7), INT8_C( 21), INT8_C( 103), INT8_C( 48), INT8_C( 101), INT8_C( 101), -INT8_C( 23), -INT8_C( 8), INT8_C( 103), -INT8_C( 113), INT8_C( 67), INT8_C( 102), -INT8_C( 55), -INT8_C( 77), INT8_C( 45), INT8_C( 64), -INT8_C( 87), INT8_C( 49), INT8_C( 48), INT8_C( 87), -INT8_C( 102), -INT8_C( 77), -INT8_C( 8), -INT8_C( 62) }, { -INT8_C( 14), -INT8_C( 30), INT8_C( 0), INT8_C( 48), INT8_C( 20), INT8_C( 120), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 95), INT8_C( 55), INT8_C( 80), -INT8_C( 3), -INT8_C( 33), -INT8_C( 10), INT8_C( 0), INT8_C( 32), INT8_C( 0), INT8_C( 107), INT8_C( 0), INT8_C( 112), -INT8_C( 57), INT8_C( 0), INT8_C( 0), INT8_C( 4), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 89), INT8_C( 0), -INT8_C( 59), INT8_C( 25), -INT8_C( 24), -INT8_C( 40), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 68), INT8_C( 0), INT8_C( 101), INT8_C( 0), INT8_C( 110), INT8_C( 103), INT8_C( 67), INT8_C( 67), INT8_C( 102), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 64), -INT8_C( 56), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 120), INT8_C( 0), INT8_C( 101), -INT8_C( 62) } }, { UINT64_C(17623617764994470139), { -INT8_C( 64), -INT8_C( 5), INT8_C( 36), INT8_C( 37), INT8_C( 96), INT8_C( 14), INT8_C( 30), -INT8_C( 57), -INT8_C( 99), INT8_C( 97), INT8_C( 45), INT8_C( 102), INT8_C( 21), INT8_C( 90), -INT8_C( 89), -INT8_C( 66), -INT8_C( 117), -INT8_C( 41), INT8_C( 22), INT8_C( 38), -INT8_C( 118), INT8_C( 14), -INT8_C( 24), -INT8_C( 122), INT8_C( 94), -INT8_C( 86), INT8_C( 65), INT8_C( 89), INT8_C( 85), -INT8_C( 43), INT8_C( 77), INT8_C( 21), -INT8_C( 48), INT8_C( 113), INT8_C( 58), INT8_C( 48), INT8_MAX, INT8_C( 88), -INT8_C( 9), INT8_C( 29), -INT8_C( 70), INT8_C( 37), -INT8_C( 125), -INT8_C( 49), INT8_MAX, INT8_C( 42), -INT8_C( 115), INT8_C( 11), INT8_C( 1), -INT8_C( 93), INT8_C( 49), -INT8_C( 116), -INT8_C( 79), INT8_C( 25), INT8_C( 18), INT8_C( 15), -INT8_C( 60), INT8_C( 83), INT8_C( 104), INT8_C( 25), INT8_C( 40), -INT8_C( 75), INT8_C( 46), -INT8_C( 8) }, { INT8_C( 39), INT8_C( 104), INT8_C( 40), -INT8_C( 90), -INT8_C( 63), INT8_C( 32), -INT8_C( 61), INT8_C( 123), INT8_C( 69), INT8_C( 71), INT8_C( 74), -INT8_C( 60), INT8_C( 113), -INT8_C( 41), -INT8_C( 49), INT8_C( 115), INT8_C( 123), INT8_C( 0), -INT8_C( 1), INT8_C( 44), INT8_C( 26), INT8_C( 17), INT8_C( 60), -INT8_C( 34), INT8_C( 100), -INT8_C( 92), -INT8_C( 9), -INT8_C( 115), INT8_C( 90), INT8_C( 37), -INT8_C( 123), -INT8_C( 127), -INT8_C( 115), -INT8_C( 82), INT8_C( 39), INT8_C( 78), -INT8_C( 50), -INT8_C( 21), -INT8_C( 55), INT8_C( 19), INT8_C( 50), INT8_C( 19), -INT8_C( 41), -INT8_C( 93), -INT8_C( 21), -INT8_C( 89), INT8_C( 22), INT8_C( 102), -INT8_C( 89), INT8_C( 21), -INT8_C( 110), -INT8_C( 63), INT8_C( 38), -INT8_C( 50), -INT8_C( 97), -INT8_C( 117), INT8_C( 115), -INT8_C( 106), INT8_C( 24), -INT8_C( 51), -INT8_C( 69), -INT8_C( 99), INT8_C( 78), INT8_C( 73) }, { INT8_C( 39), INT8_C( 104), INT8_C( 0), INT8_C( 37), INT8_C( 96), INT8_C( 32), INT8_C( 30), INT8_C( 123), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 113), INT8_C( 0), -INT8_C( 49), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 60), -INT8_C( 34), INT8_C( 100), -INT8_C( 86), INT8_C( 0), INT8_C( 89), INT8_C( 90), INT8_C( 37), INT8_C( 0), INT8_C( 21), -INT8_C( 48), INT8_C( 113), INT8_C( 0), INT8_C( 78), INT8_MAX, INT8_C( 88), -INT8_C( 9), INT8_C( 29), INT8_C( 0), INT8_C( 37), INT8_C( 0), -INT8_C( 49), INT8_C( 0), INT8_C( 42), INT8_C( 0), INT8_C( 102), INT8_C( 1), INT8_C( 21), INT8_C( 0), INT8_C( 0), INT8_C( 38), INT8_C( 0), INT8_C( 0), INT8_C( 15), INT8_C( 0), INT8_C( 0), INT8_C( 104), INT8_C( 0), INT8_C( 40), -INT8_C( 75), INT8_C( 78), INT8_C( 73) } }, { UINT64_C(10532900693886858571), { INT8_C( 116), INT8_C( 4), INT8_C( 54), INT8_C( 95), -INT8_C( 85), INT8_C( 76), -INT8_C( 59), INT8_C( 82), INT8_C( 98), INT8_C( 88), INT8_C( 20), -INT8_C( 120), INT8_C( 38), -INT8_C( 77), INT8_C( 19), -INT8_C( 103), INT8_C( 74), INT8_C( 43), INT8_C( 102), INT8_C( 5), -INT8_C( 55), -INT8_C( 76), INT8_C( 78), INT8_C( 20), INT8_C( 42), -INT8_C( 26), INT8_C( 46), -INT8_C( 118), INT8_C( 71), INT8_C( 90), INT8_C( 29), -INT8_C( 69), INT8_C( 94), INT8_C( 83), INT8_C( 27), INT8_C( 9), -INT8_C( 97), -INT8_C( 32), INT8_C( 92), INT8_C( 1), INT8_C( 56), INT8_C( 112), -INT8_C( 118), INT8_C( 95), INT8_C( 35), -INT8_C( 99), -INT8_C( 8), INT8_C( 109), -INT8_C( 55), INT8_C( 95), INT8_C( 115), -INT8_C( 110), INT8_C( 19), -INT8_C( 63), -INT8_C( 90), INT8_C( 61), -INT8_C( 89), -INT8_C( 44), -INT8_C( 56), -INT8_C( 18), INT8_C( 47), -INT8_C( 27), -INT8_C( 86), -INT8_C( 115) }, { INT8_C( 56), -INT8_C( 59), -INT8_C( 105), -INT8_C( 41), -INT8_C( 91), -INT8_C( 13), -INT8_C( 39), -INT8_C( 34), INT8_C( 99), INT8_C( 99), INT8_C( 61), -INT8_C( 122), INT8_C( 0), INT8_C( 53), -INT8_C( 12), -INT8_C( 55), -INT8_C( 108), INT8_C( 103), INT8_C( 91), -INT8_C( 88), INT8_C( 40), INT8_C( 2), -INT8_C( 27), -INT8_C( 48), -INT8_C( 42), -INT8_C( 83), -INT8_C( 66), INT8_C( 5), -INT8_C( 110), INT8_C( 104), -INT8_C( 109), -INT8_C( 54), INT8_C( 45), INT8_C( 42), -INT8_C( 94), -INT8_C( 45), INT8_C( 29), INT8_C( 123), -INT8_C( 79), INT8_MIN, -INT8_C( 34), -INT8_C( 18), INT8_C( 6), -INT8_C( 34), INT8_C( 35), -INT8_C( 6), -INT8_C( 88), -INT8_C( 72), INT8_C( 97), INT8_C( 3), INT8_C( 96), -INT8_C( 118), INT8_C( 5), INT8_C( 69), INT8_C( 90), -INT8_C( 36), -INT8_C( 13), INT8_C( 24), -INT8_C( 31), -INT8_C( 123), -INT8_C( 127), INT8_C( 116), INT8_C( 80), -INT8_C( 82) }, { INT8_C( 116), INT8_C( 4), INT8_C( 0), INT8_C( 95), INT8_C( 0), INT8_C( 0), -INT8_C( 39), INT8_C( 0), INT8_C( 99), INT8_C( 0), INT8_C( 61), INT8_C( 0), INT8_C( 38), INT8_C( 53), INT8_C( 19), INT8_C( 0), INT8_C( 74), INT8_C( 103), INT8_C( 102), INT8_C( 0), INT8_C( 40), INT8_C( 0), INT8_C( 0), INT8_C( 20), INT8_C( 42), INT8_C( 0), INT8_C( 0), INT8_C( 5), INT8_C( 71), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 123), INT8_C( 92), INT8_C( 0), INT8_C( 56), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 6), -INT8_C( 8), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 115), -INT8_C( 110), INT8_C( 0), INT8_C( 69), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 24), INT8_C( 0), INT8_C( 0), INT8_C( 47), INT8_C( 0), INT8_C( 0), -INT8_C( 82) } }, { UINT64_C( 5420982023349203614), { INT8_C( 32), INT8_C( 66), INT8_C( 41), INT8_C( 68), INT8_C( 60), -INT8_C( 47), -INT8_C( 4), -INT8_C( 98), -INT8_C( 43), INT8_C( 92), INT8_C( 40), -INT8_C( 38), -INT8_C( 95), -INT8_C( 126), -INT8_C( 74), -INT8_C( 108), -INT8_C( 102), -INT8_C( 104), INT8_C( 26), INT8_C( 27), INT8_C( 12), INT8_C( 106), -INT8_C( 54), -INT8_C( 85), INT8_C( 92), INT8_C( 75), INT8_C( 102), -INT8_C( 55), INT8_C( 126), -INT8_C( 94), INT8_C( 20), -INT8_C( 98), -INT8_C( 28), INT8_C( 61), -INT8_C( 30), INT8_C( 32), INT8_C( 15), -INT8_C( 34), -INT8_C( 66), -INT8_C( 28), INT8_C( 58), -INT8_C( 26), -INT8_C( 66), -INT8_C( 36), INT8_C( 104), INT8_C( 117), INT8_C( 112), INT8_C( 3), INT8_C( 13), -INT8_C( 118), INT8_C( 30), INT8_C( 25), -INT8_C( 12), -INT8_C( 24), -INT8_C( 60), INT8_C( 80), INT8_C( 52), INT8_C( 43), INT8_C( 25), -INT8_C( 78), -INT8_C( 51), INT8_C( 45), INT8_C( 80), -INT8_C( 79) }, { INT8_C( 107), INT8_C( 51), -INT8_C( 47), INT8_C( 122), INT8_C( 17), -INT8_C( 112), INT8_C( 94), INT8_C( 76), INT8_C( 118), INT8_C( 28), INT8_C( 40), -INT8_C( 33), -INT8_C( 111), -INT8_C( 104), -INT8_C( 30), -INT8_C( 98), INT8_C( 35), INT8_C( 0), -INT8_C( 72), INT8_C( 23), -INT8_C( 23), INT8_C( 124), INT8_C( 104), INT8_C( 29), -INT8_C( 89), -INT8_C( 127), -INT8_C( 49), INT8_C( 116), -INT8_C( 81), INT8_C( 31), INT8_C( 37), INT8_C( 26), INT8_C( 82), -INT8_C( 9), -INT8_C( 108), INT8_C( 100), -INT8_C( 121), -INT8_C( 14), -INT8_C( 80), -INT8_C( 3), INT8_C( 14), -INT8_C( 40), -INT8_C( 36), -INT8_C( 96), INT8_C( 112), -INT8_C( 66), INT8_C( 62), -INT8_C( 109), -INT8_C( 65), -INT8_C( 10), -INT8_C( 85), -INT8_C( 88), INT8_C( 115), INT8_C( 19), -INT8_C( 59), INT8_C( 26), -INT8_C( 108), -INT8_C( 108), -INT8_C( 113), INT8_C( 67), -INT8_C( 77), -INT8_C( 76), INT8_C( 93), INT8_C( 6) }, { INT8_C( 0), INT8_C( 66), INT8_C( 41), INT8_C( 122), INT8_C( 60), INT8_C( 0), INT8_C( 0), INT8_C( 76), INT8_C( 0), INT8_C( 92), INT8_C( 0), INT8_C( 0), -INT8_C( 95), -INT8_C( 104), -INT8_C( 30), -INT8_C( 98), INT8_C( 35), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 29), INT8_C( 92), INT8_C( 75), INT8_C( 0), INT8_C( 116), INT8_C( 126), INT8_C( 31), INT8_C( 0), INT8_C( 26), INT8_C( 82), INT8_C( 0), -INT8_C( 30), INT8_C( 100), INT8_C( 0), -INT8_C( 14), -INT8_C( 66), INT8_C( 0), INT8_C( 0), -INT8_C( 26), INT8_C( 0), INT8_C( 0), INT8_C( 112), INT8_C( 117), INT8_C( 0), INT8_C( 0), INT8_C( 13), -INT8_C( 10), INT8_C( 0), INT8_C( 25), INT8_C( 115), INT8_C( 19), INT8_C( 0), INT8_C( 0), INT8_C( 52), INT8_C( 43), INT8_C( 0), INT8_C( 67), INT8_C( 0), INT8_C( 0), INT8_C( 93), INT8_C( 0) } }, { UINT64_C(17451478119166439851), { -INT8_C( 14), INT8_C( 12), -INT8_C( 110), INT8_C( 98), -INT8_C( 53), -INT8_C( 48), -INT8_C( 10), -INT8_C( 118), -INT8_C( 57), -INT8_C( 95), INT8_C( 50), INT8_C( 58), -INT8_C( 76), -INT8_C( 9), INT8_C( 84), INT8_C( 72), -INT8_C( 117), -INT8_C( 29), -INT8_C( 116), INT8_C( 62), -INT8_C( 104), -INT8_C( 23), INT8_C( 68), INT8_C( 67), -INT8_C( 37), -INT8_C( 82), INT8_C( 118), -INT8_C( 66), -INT8_C( 56), -INT8_C( 90), -INT8_C( 80), -INT8_C( 70), -INT8_C( 78), INT8_C( 66), INT8_C( 29), INT8_C( 125), INT8_C( 19), INT8_C( 19), INT8_C( 7), -INT8_C( 38), -INT8_C( 76), INT8_C( 57), INT8_C( 20), INT8_C( 104), INT8_C( 48), INT8_C( 104), -INT8_C( 80), -INT8_C( 69), INT8_C( 76), INT8_C( 60), -INT8_C( 6), -INT8_C( 28), INT8_C( 38), INT8_C( 62), INT8_C( 39), INT8_C( 1), -INT8_C( 19), -INT8_C( 99), -INT8_C( 65), -INT8_C( 75), INT8_C( 67), INT8_C( 112), INT8_C( 112), -INT8_C( 10) }, { -INT8_C( 78), -INT8_C( 115), INT8_C( 115), -INT8_C( 59), -INT8_C( 96), INT8_C( 123), -INT8_C( 97), INT8_C( 84), -INT8_C( 76), -INT8_C( 77), -INT8_C( 68), -INT8_C( 27), INT8_C( 28), INT8_C( 108), -INT8_C( 96), INT8_C( 104), -INT8_C( 87), -INT8_C( 102), INT8_C( 76), -INT8_C( 49), -INT8_C( 39), INT8_C( 115), -INT8_C( 48), -INT8_C( 58), INT8_C( 17), -INT8_C( 113), INT8_C( 123), INT8_C( 84), -INT8_C( 1), -INT8_C( 21), INT8_C( 74), -INT8_C( 78), INT8_C( 120), -INT8_C( 66), INT8_C( 119), INT8_C( 24), INT8_C( 57), INT8_C( 23), INT8_C( 108), -INT8_C( 19), -INT8_C( 54), INT8_C( 40), -INT8_C( 46), -INT8_C( 26), -INT8_C( 107), INT8_C( 115), INT8_C( 78), INT8_C( 62), INT8_C( 13), -INT8_C( 102), INT8_C( 13), -INT8_C( 26), INT8_C( 14), -INT8_C( 35), -INT8_C( 84), INT8_C( 31), INT8_C( 108), INT8_C( 40), INT8_C( 115), INT8_C( 108), INT8_C( 19), -INT8_C( 66), INT8_C( 30), -INT8_C( 116) }, { -INT8_C( 14), INT8_C( 12), INT8_C( 0), INT8_C( 98), INT8_C( 0), INT8_C( 123), INT8_C( 0), INT8_C( 84), -INT8_C( 57), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 28), INT8_C( 108), INT8_C( 84), INT8_C( 104), INT8_C( 0), -INT8_C( 29), INT8_C( 0), INT8_C( 62), INT8_C( 0), INT8_C( 115), INT8_C( 68), INT8_C( 0), INT8_C( 0), -INT8_C( 82), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 21), INT8_C( 0), INT8_C( 0), INT8_C( 120), INT8_C( 66), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 23), INT8_C( 108), -INT8_C( 19), INT8_C( 0), INT8_C( 57), INT8_C( 0), INT8_C( 104), INT8_C( 48), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 38), INT8_C( 62), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 40), INT8_C( 0), INT8_C( 0), INT8_C( 67), INT8_C( 112), INT8_C( 112), -INT8_C( 10) } }, { UINT64_C( 8620472070220060028), { INT8_C( 57), INT8_C( 117), INT8_C( 93), -INT8_C( 50), -INT8_C( 24), -INT8_C( 84), INT8_C( 12), -INT8_C( 11), INT8_C( 70), INT8_C( 25), -INT8_C( 36), INT8_C( 84), -INT8_C( 10), -INT8_C( 120), INT8_C( 115), INT8_C( 99), -INT8_C( 80), -INT8_C( 25), -INT8_C( 49), -INT8_C( 60), -INT8_C( 91), -INT8_C( 19), INT8_C( 80), INT8_C( 33), -INT8_C( 126), -INT8_C( 12), -INT8_C( 42), INT8_C( 47), INT8_C( 5), INT8_C( 120), -INT8_C( 90), INT8_C( 63), -INT8_C( 19), INT8_C( 3), INT8_C( 13), -INT8_C( 43), -INT8_C( 81), INT8_C( 26), -INT8_C( 53), -INT8_C( 10), INT8_C( 51), -INT8_C( 89), INT8_C( 74), INT8_C( 42), INT8_C( 47), -INT8_C( 66), -INT8_C( 115), -INT8_C( 32), -INT8_C( 91), INT8_C( 92), -INT8_C( 92), INT8_C( 74), INT8_C( 73), -INT8_C( 12), INT8_C( 107), -INT8_C( 53), -INT8_C( 24), INT8_C( 65), -INT8_C( 6), -INT8_C( 18), -INT8_C( 71), -INT8_C( 96), INT8_C( 45), -INT8_C( 89) }, { -INT8_C( 92), INT8_C( 58), INT8_C( 124), INT8_C( 83), INT8_C( 84), INT8_C( 71), INT8_C( 73), -INT8_C( 120), -INT8_C( 18), -INT8_C( 108), -INT8_C( 78), INT8_C( 30), INT8_C( 82), INT8_C( 63), -INT8_C( 2), -INT8_C( 9), -INT8_C( 101), -INT8_C( 94), INT8_C( 65), -INT8_C( 28), -INT8_C( 106), -INT8_C( 84), -INT8_C( 81), INT8_C( 126), -INT8_C( 19), -INT8_C( 86), INT8_C( 108), -INT8_C( 90), INT8_C( 74), -INT8_C( 103), INT8_C( 77), -INT8_C( 18), -INT8_C( 44), -INT8_C( 54), INT8_C( 66), INT8_C( 40), INT8_C( 17), -INT8_C( 117), -INT8_C( 80), INT8_C( 0), INT8_C( 31), INT8_C( 98), INT8_C( 30), INT8_C( 113), -INT8_C( 95), INT8_C( 28), INT8_C( 104), INT8_C( 60), -INT8_C( 66), -INT8_C( 87), INT8_C( 32), INT8_C( 84), INT8_C( 85), -INT8_C( 48), -INT8_C( 46), INT8_C( 66), INT8_C( 122), INT8_C( 63), -INT8_C( 23), -INT8_C( 60), -INT8_C( 40), INT8_C( 54), -INT8_C( 77), -INT8_C( 84) }, { INT8_C( 0), INT8_C( 0), INT8_C( 124), INT8_C( 83), INT8_C( 84), INT8_C( 71), INT8_C( 73), INT8_C( 0), INT8_C( 70), INT8_C( 0), -INT8_C( 36), INT8_C( 0), INT8_C( 82), INT8_C( 0), INT8_C( 0), INT8_C( 99), INT8_C( 0), INT8_C( 0), INT8_C( 65), INT8_C( 0), INT8_C( 0), -INT8_C( 19), INT8_C( 0), INT8_C( 126), -INT8_C( 19), INT8_C( 0), INT8_C( 108), INT8_C( 0), INT8_C( 74), INT8_C( 120), INT8_C( 0), INT8_C( 63), INT8_C( 0), INT8_C( 0), INT8_C( 66), INT8_C( 40), INT8_C( 0), INT8_C( 26), INT8_C( 0), INT8_C( 0), INT8_C( 51), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 47), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 92), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 12), INT8_C( 0), INT8_C( 66), INT8_C( 122), INT8_C( 65), -INT8_C( 6), INT8_C( 0), -INT8_C( 40), INT8_C( 54), INT8_C( 45), INT8_C( 0) } }, { UINT64_C(11534428380767581440), { -INT8_C( 24), INT8_C( 48), INT8_C( 17), -INT8_C( 119), INT8_C( 76), INT8_C( 122), -INT8_C( 58), INT8_C( 10), INT8_C( 35), -INT8_C( 26), INT8_C( 94), INT8_C( 121), -INT8_C( 74), INT8_C( 48), -INT8_C( 69), INT8_C( 48), INT8_C( 111), -INT8_C( 92), -INT8_C( 11), INT8_C( 72), -INT8_C( 37), -INT8_C( 88), -INT8_C( 12), -INT8_C( 37), -INT8_C( 99), -INT8_C( 55), -INT8_C( 19), INT8_C( 29), INT8_C( 79), -INT8_C( 1), -INT8_C( 67), INT8_C( 55), INT8_C( 47), -INT8_C( 49), -INT8_C( 64), INT8_C( 123), INT8_C( 73), -INT8_C( 122), -INT8_C( 123), INT8_C( 108), INT8_C( 109), -INT8_C( 29), -INT8_C( 27), INT8_C( 35), INT8_C( 20), -INT8_C( 95), INT8_C( 84), -INT8_C( 125), INT8_C( 69), INT8_C( 73), -INT8_C( 53), INT8_C( 32), -INT8_C( 15), -INT8_C( 64), -INT8_C( 4), -INT8_C( 114), -INT8_C( 119), -INT8_C( 23), -INT8_C( 85), -INT8_C( 40), -INT8_C( 23), INT8_C( 105), INT8_C( 15), INT8_C( 24) }, { INT8_C( 56), -INT8_C( 48), -INT8_C( 108), -INT8_C( 127), INT8_C( 86), INT8_C( 25), -INT8_C( 19), -INT8_C( 61), -INT8_C( 3), -INT8_C( 45), -INT8_C( 25), INT8_C( 17), INT8_C( 116), INT8_C( 59), -INT8_C( 108), -INT8_C( 71), -INT8_C( 124), INT8_C( 96), -INT8_C( 38), INT8_C( 117), INT8_C( 32), -INT8_C( 42), INT8_C( 3), -INT8_C( 87), -INT8_C( 65), -INT8_C( 82), -INT8_C( 126), -INT8_C( 88), INT8_C( 23), -INT8_C( 111), -INT8_C( 63), INT8_C( 79), INT8_C( 97), INT8_C( 85), -INT8_C( 48), -INT8_C( 72), INT8_C( 110), -INT8_C( 66), INT8_C( 123), INT8_C( 107), -INT8_C( 111), INT8_C( 98), INT8_C( 124), INT8_C( 5), -INT8_C( 99), INT8_C( 17), -INT8_C( 66), INT8_C( 33), INT8_C( 113), -INT8_C( 104), -INT8_C( 106), -INT8_C( 111), INT8_C( 110), -INT8_C( 103), INT8_C( 58), INT8_C( 46), INT8_C( 72), -INT8_C( 68), -INT8_C( 42), INT8_C( 95), INT8_C( 78), -INT8_C( 105), -INT8_C( 81), -INT8_C( 81) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 35), INT8_C( 0), INT8_C( 94), INT8_C( 0), INT8_C( 116), INT8_C( 59), -INT8_C( 69), INT8_C( 48), INT8_C( 111), INT8_C( 0), -INT8_C( 11), INT8_C( 0), INT8_C( 32), INT8_C( 0), INT8_C( 3), -INT8_C( 37), INT8_C( 0), -INT8_C( 55), INT8_C( 0), INT8_C( 0), INT8_C( 79), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 108), INT8_C( 109), INT8_C( 0), INT8_C( 124), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 33), INT8_C( 0), INT8_C( 73), INT8_C( 0), INT8_C( 0), INT8_C( 110), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 105), INT8_C( 0), INT8_C( 24) } }, { UINT64_C(14899846269867884524), { INT8_C( 69), INT8_C( 67), -INT8_C( 45), -INT8_C( 29), INT8_C( 84), -INT8_C( 110), INT8_C( 4), -INT8_C( 59), INT8_C( 42), -INT8_C( 101), INT8_C( 86), -INT8_C( 103), INT8_C( 52), -INT8_C( 112), -INT8_C( 57), INT8_C( 124), INT8_C( 77), -INT8_C( 99), -INT8_C( 36), -INT8_C( 101), INT8_C( 53), -INT8_C( 117), INT8_C( 74), INT8_C( 33), INT8_C( 10), -INT8_C( 78), INT8_C( 124), INT8_C( 72), -INT8_C( 107), INT8_C( 67), INT8_C( 22), -INT8_C( 38), -INT8_C( 122), -INT8_C( 22), -INT8_C( 67), -INT8_C( 38), INT8_C( 124), -INT8_C( 62), -INT8_C( 97), -INT8_C( 90), INT8_C( 93), -INT8_C( 11), INT8_C( 63), -INT8_C( 111), -INT8_C( 123), INT8_C( 6), INT8_C( 14), -INT8_C( 46), -INT8_C( 92), -INT8_C( 22), INT8_C( 109), -INT8_C( 39), INT8_C( 117), -INT8_C( 72), -INT8_C( 6), INT8_MAX, INT8_C( 106), INT8_C( 119), -INT8_C( 57), -INT8_C( 1), -INT8_C( 70), -INT8_C( 34), -INT8_C( 39), INT8_C( 64) }, { -INT8_C( 56), -INT8_C( 105), INT8_C( 26), INT8_C( 68), INT8_C( 89), -INT8_C( 71), -INT8_C( 22), -INT8_C( 74), -INT8_C( 82), INT8_C( 42), INT8_C( 71), INT8_C( 51), INT8_C( 48), INT8_C( 85), INT8_C( 6), -INT8_C( 44), INT8_C( 63), INT8_C( 115), -INT8_C( 83), -INT8_C( 76), INT8_C( 43), -INT8_C( 88), INT8_C( 52), -INT8_C( 107), INT8_C( 31), -INT8_C( 5), -INT8_C( 108), -INT8_C( 39), -INT8_C( 39), INT8_C( 110), INT8_C( 25), -INT8_C( 95), INT8_C( 5), INT8_C( 51), -INT8_C( 27), INT8_C( 94), -INT8_C( 20), -INT8_C( 48), INT8_C( 20), -INT8_C( 102), -INT8_C( 6), INT8_C( 91), -INT8_C( 51), INT8_C( 42), -INT8_C( 79), -INT8_C( 45), -INT8_C( 1), -INT8_C( 16), INT8_C( 71), -INT8_C( 84), -INT8_C( 91), INT8_C( 114), INT8_C( 84), -INT8_C( 39), INT8_C( 8), INT8_C( 115), -INT8_C( 44), -INT8_C( 100), INT8_C( 76), -INT8_C( 82), INT8_C( 10), INT8_C( 101), INT8_C( 79), INT8_C( 15) }, { INT8_C( 0), INT8_C( 0), INT8_C( 26), INT8_C( 68), INT8_C( 0), -INT8_C( 71), INT8_C( 4), -INT8_C( 59), INT8_C( 42), INT8_C( 42), INT8_C( 86), INT8_C( 51), INT8_C( 52), INT8_C( 85), INT8_C( 6), INT8_C( 0), INT8_C( 77), INT8_C( 115), -INT8_C( 36), INT8_C( 0), INT8_C( 0), -INT8_C( 88), INT8_C( 74), INT8_C( 0), INT8_C( 31), -INT8_C( 5), INT8_C( 0), INT8_C( 72), -INT8_C( 39), INT8_C( 0), INT8_C( 25), INT8_C( 0), INT8_C( 5), INT8_C( 0), -INT8_C( 27), INT8_C( 94), INT8_C( 124), -INT8_C( 48), INT8_C( 0), INT8_C( 0), INT8_C( 93), INT8_C( 91), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 6), INT8_C( 14), -INT8_C( 16), INT8_C( 0), -INT8_C( 22), INT8_C( 109), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 8), INT8_MAX, INT8_C( 0), INT8_C( 119), INT8_C( 76), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 79), INT8_C( 64) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_maskz_max_epi8(test_vec[i].k, a, b); simde_test_x86_assert_equal_i8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm512_max_epu8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const uint8_t a[64]; const uint8_t b[64]; const uint8_t r[64]; } test_vec[] = { { { UINT8_C( 59), UINT8_C(162), UINT8_C( 5), UINT8_C( 15), UINT8_C(230), UINT8_C(146), UINT8_C(192), UINT8_C( 69), UINT8_C(125), UINT8_C(138), UINT8_C(150), UINT8_C( 98), UINT8_C(245), UINT8_C(241), UINT8_C(112), UINT8_C(249), UINT8_C(152), UINT8_C(155), UINT8_C( 67), UINT8_C(153), UINT8_C(113), UINT8_C(166), UINT8_C(206), UINT8_C(155), UINT8_C( 29), UINT8_C(173), UINT8_C( 53), UINT8_C( 7), UINT8_C(175), UINT8_C(212), UINT8_C(130), UINT8_C(234), UINT8_C(118), UINT8_C(135), UINT8_C(249), UINT8_C( 92), UINT8_C( 26), UINT8_C(185), UINT8_C(161), UINT8_C(151), UINT8_C( 67), UINT8_C( 56), UINT8_C(249), UINT8_C( 57), UINT8_C( 41), UINT8_C(105), UINT8_C( 50), UINT8_C(193), UINT8_C( 4), UINT8_C(117), UINT8_C( 90), UINT8_C(118), UINT8_C( 27), UINT8_C( 40), UINT8_C( 17), UINT8_C( 56), UINT8_C(214), UINT8_C( 70), UINT8_C( 63), UINT8_C(133), UINT8_C( 26), UINT8_C(193), UINT8_C(111), UINT8_C(144) }, { UINT8_C( 73), UINT8_C(104), UINT8_C(237), UINT8_C( 99), UINT8_C( 33), UINT8_C(142), UINT8_C(250), UINT8_C(100), UINT8_C(198), UINT8_C(244), UINT8_C(157), UINT8_C(240), UINT8_C( 93), UINT8_C(207), UINT8_C(177), UINT8_C( 98), UINT8_C( 68), UINT8_C( 12), UINT8_C(216), UINT8_C( 95), UINT8_C( 52), UINT8_C(233), UINT8_C(152), UINT8_C( 10), UINT8_C( 47), UINT8_C(215), UINT8_C(143), UINT8_C( 73), UINT8_C(153), UINT8_C(254), UINT8_C(217), UINT8_C(226), UINT8_C(102), UINT8_C(198), UINT8_C( 69), UINT8_C(135), UINT8_C( 85), UINT8_C( 63), UINT8_C(236), UINT8_C( 27), UINT8_C( 51), UINT8_C(137), UINT8_C( 11), UINT8_C(145), UINT8_C( 89), UINT8_C(189), UINT8_C(243), UINT8_C(157), UINT8_C(201), UINT8_C(203), UINT8_C(253), UINT8_C(253), UINT8_C(180), UINT8_C(149), UINT8_C( 8), UINT8_C(227), UINT8_C(108), UINT8_C(151), UINT8_C( 44), UINT8_C( 5), UINT8_C(150), UINT8_C( 5), UINT8_C(231), UINT8_C(252) }, { UINT8_C( 73), UINT8_C(162), UINT8_C(237), UINT8_C( 99), UINT8_C(230), UINT8_C(146), UINT8_C(250), UINT8_C(100), UINT8_C(198), UINT8_C(244), UINT8_C(157), UINT8_C(240), UINT8_C(245), UINT8_C(241), UINT8_C(177), UINT8_C(249), UINT8_C(152), UINT8_C(155), UINT8_C(216), UINT8_C(153), UINT8_C(113), UINT8_C(233), UINT8_C(206), UINT8_C(155), UINT8_C( 47), UINT8_C(215), UINT8_C(143), UINT8_C( 73), UINT8_C(175), UINT8_C(254), UINT8_C(217), UINT8_C(234), UINT8_C(118), UINT8_C(198), UINT8_C(249), UINT8_C(135), UINT8_C( 85), UINT8_C(185), UINT8_C(236), UINT8_C(151), UINT8_C( 67), UINT8_C(137), UINT8_C(249), UINT8_C(145), UINT8_C( 89), UINT8_C(189), UINT8_C(243), UINT8_C(193), UINT8_C(201), UINT8_C(203), UINT8_C(253), UINT8_C(253), UINT8_C(180), UINT8_C(149), UINT8_C( 17), UINT8_C(227), UINT8_C(214), UINT8_C(151), UINT8_C( 63), UINT8_C(133), UINT8_C(150), UINT8_C(193), UINT8_C(231), UINT8_C(252) } }, { { UINT8_C(204), UINT8_C( 44), UINT8_C(132), UINT8_C( 33), UINT8_C(108), UINT8_C(112), UINT8_C( 60), UINT8_C(159), UINT8_C(249), UINT8_C( 72), UINT8_C( 48), UINT8_C( 82), UINT8_C( 5), UINT8_C( 35), UINT8_C(240), UINT8_C(206), UINT8_C(238), UINT8_C(237), UINT8_C(203), UINT8_C(162), UINT8_C(130), UINT8_C(211), UINT8_C(133), UINT8_C(238), UINT8_C(107), UINT8_C(177), UINT8_C(244), UINT8_C( 1), UINT8_C(183), UINT8_C(219), UINT8_C(253), UINT8_C(131), UINT8_C( 8), UINT8_C(129), UINT8_C(164), UINT8_C(116), UINT8_C(241), UINT8_C(224), UINT8_C( 19), UINT8_C(235), UINT8_C( 40), UINT8_C( 68), UINT8_C( 61), UINT8_C( 45), UINT8_C(103), UINT8_C( 45), UINT8_C(251), UINT8_C( 86), UINT8_C( 26), UINT8_C(199), UINT8_C(248), UINT8_C(156), UINT8_C(154), UINT8_C(126), UINT8_C(139), UINT8_C( 5), UINT8_C( 47), UINT8_C(127), UINT8_C( 6), UINT8_C(230), UINT8_C( 90), UINT8_C( 4), UINT8_C(105), UINT8_C( 98) }, { UINT8_C(133), UINT8_C( 13), UINT8_C(214), UINT8_C(119), UINT8_C(238), UINT8_C(234), UINT8_C( 98), UINT8_C( 22), UINT8_C( 46), UINT8_C(159), UINT8_C( 68), UINT8_C(149), UINT8_C(205), UINT8_C( 63), UINT8_C(235), UINT8_C(231), UINT8_C( 6), UINT8_C(228), UINT8_C(132), UINT8_C(161), UINT8_C( 98), UINT8_C( 15), UINT8_C(166), UINT8_C(145), UINT8_C(142), UINT8_C(173), UINT8_C(120), UINT8_C(232), UINT8_C(177), UINT8_C(225), UINT8_C( 75), UINT8_C( 54), UINT8_C(239), UINT8_C( 33), UINT8_C(173), UINT8_C(221), UINT8_C( 11), UINT8_C( 15), UINT8_C(243), UINT8_C( 57), UINT8_C(175), UINT8_C( 55), UINT8_C(207), UINT8_C(124), UINT8_C(119), UINT8_C(186), UINT8_C( 99), UINT8_C(125), UINT8_C(158), UINT8_C(231), UINT8_C( 30), UINT8_C( 0), UINT8_C(246), UINT8_C(197), UINT8_C(146), UINT8_C(132), UINT8_C(114), UINT8_C( 10), UINT8_C(109), UINT8_C( 35), UINT8_C(235), UINT8_C(184), UINT8_C( 89), UINT8_C(218) }, { UINT8_C(204), UINT8_C( 44), UINT8_C(214), UINT8_C(119), UINT8_C(238), UINT8_C(234), UINT8_C( 98), UINT8_C(159), UINT8_C(249), UINT8_C(159), UINT8_C( 68), UINT8_C(149), UINT8_C(205), UINT8_C( 63), UINT8_C(240), UINT8_C(231), UINT8_C(238), UINT8_C(237), UINT8_C(203), UINT8_C(162), UINT8_C(130), UINT8_C(211), UINT8_C(166), UINT8_C(238), UINT8_C(142), UINT8_C(177), UINT8_C(244), UINT8_C(232), UINT8_C(183), UINT8_C(225), UINT8_C(253), UINT8_C(131), UINT8_C(239), UINT8_C(129), UINT8_C(173), UINT8_C(221), UINT8_C(241), UINT8_C(224), UINT8_C(243), UINT8_C(235), UINT8_C(175), UINT8_C( 68), UINT8_C(207), UINT8_C(124), UINT8_C(119), UINT8_C(186), UINT8_C(251), UINT8_C(125), UINT8_C(158), UINT8_C(231), UINT8_C(248), UINT8_C(156), UINT8_C(246), UINT8_C(197), UINT8_C(146), UINT8_C(132), UINT8_C(114), UINT8_C(127), UINT8_C(109), UINT8_C(230), UINT8_C(235), UINT8_C(184), UINT8_C(105), UINT8_C(218) } }, { { UINT8_C(217), UINT8_C( 7), UINT8_C(183), UINT8_C(229), UINT8_C( 22), UINT8_C(171), UINT8_C( 30), UINT8_C(197), UINT8_C(226), UINT8_C(237), UINT8_C( 65), UINT8_C( 89), UINT8_C(168), UINT8_C(165), UINT8_C(215), UINT8_C( 70), UINT8_C(140), UINT8_C(245), UINT8_C( 71), UINT8_C(131), UINT8_C(186), UINT8_C(217), UINT8_C( 7), UINT8_C( 44), UINT8_C(227), UINT8_C(116), UINT8_C( 79), UINT8_C(206), UINT8_C( 44), UINT8_C(169), UINT8_C(169), UINT8_C( 6), UINT8_C(176), UINT8_C( 96), UINT8_C(235), UINT8_C(198), UINT8_C( 11), UINT8_C( 9), UINT8_C(140), UINT8_C(238), UINT8_C(247), UINT8_C(205), UINT8_C( 71), UINT8_C(159), UINT8_C(114), UINT8_C( 30), UINT8_C(229), UINT8_MAX, UINT8_C( 20), UINT8_C( 44), UINT8_C(130), UINT8_C(206), UINT8_C( 5), UINT8_C(137), UINT8_C(251), UINT8_C(232), UINT8_C(254), UINT8_C( 74), UINT8_C(183), UINT8_C( 42), UINT8_C(243), UINT8_C( 96), UINT8_C( 48), UINT8_C(163) }, { UINT8_C(192), UINT8_C( 27), UINT8_C(106), UINT8_C(204), UINT8_C( 37), UINT8_C(246), UINT8_C(186), UINT8_C( 28), UINT8_C(195), UINT8_C( 1), UINT8_C(187), UINT8_C( 54), UINT8_C( 32), UINT8_C(160), UINT8_C( 53), UINT8_C( 52), UINT8_C(205), UINT8_C(183), UINT8_C( 2), UINT8_C(210), UINT8_C( 64), UINT8_C(253), UINT8_C(187), UINT8_C( 62), UINT8_C( 72), UINT8_C(114), UINT8_C(105), UINT8_C( 59), UINT8_C(210), UINT8_C(153), UINT8_C(223), UINT8_C(146), UINT8_C(181), UINT8_C( 73), UINT8_C( 94), UINT8_C(218), UINT8_C( 63), UINT8_C( 24), UINT8_C(246), UINT8_C( 2), UINT8_C( 26), UINT8_C(177), UINT8_C( 56), UINT8_C( 58), UINT8_C( 81), UINT8_C(109), UINT8_C(110), UINT8_C( 30), UINT8_C( 36), UINT8_C(112), UINT8_C(241), UINT8_C(101), UINT8_C(110), UINT8_C(172), UINT8_C(163), UINT8_C(182), UINT8_C( 30), UINT8_C( 12), UINT8_C(241), UINT8_C(240), UINT8_C(166), UINT8_C(208), UINT8_C(130), UINT8_C( 91) }, { UINT8_C(217), UINT8_C( 27), UINT8_C(183), UINT8_C(229), UINT8_C( 37), UINT8_C(246), UINT8_C(186), UINT8_C(197), UINT8_C(226), UINT8_C(237), UINT8_C(187), UINT8_C( 89), UINT8_C(168), UINT8_C(165), UINT8_C(215), UINT8_C( 70), UINT8_C(205), UINT8_C(245), UINT8_C( 71), UINT8_C(210), UINT8_C(186), UINT8_C(253), UINT8_C(187), UINT8_C( 62), UINT8_C(227), UINT8_C(116), UINT8_C(105), UINT8_C(206), UINT8_C(210), UINT8_C(169), UINT8_C(223), UINT8_C(146), UINT8_C(181), UINT8_C( 96), UINT8_C(235), UINT8_C(218), UINT8_C( 63), UINT8_C( 24), UINT8_C(246), UINT8_C(238), UINT8_C(247), UINT8_C(205), UINT8_C( 71), UINT8_C(159), UINT8_C(114), UINT8_C(109), UINT8_C(229), UINT8_MAX, UINT8_C( 36), UINT8_C(112), UINT8_C(241), UINT8_C(206), UINT8_C(110), UINT8_C(172), UINT8_C(251), UINT8_C(232), UINT8_C(254), UINT8_C( 74), UINT8_C(241), UINT8_C(240), UINT8_C(243), UINT8_C(208), UINT8_C(130), UINT8_C(163) } }, { { UINT8_C( 25), UINT8_C(225), UINT8_C( 53), UINT8_C( 88), UINT8_C(249), UINT8_C( 43), UINT8_C( 91), UINT8_C( 19), UINT8_C(220), UINT8_C(147), UINT8_C( 77), UINT8_C( 45), UINT8_C( 1), UINT8_C(187), UINT8_C( 76), UINT8_C( 37), UINT8_C( 44), UINT8_C( 61), UINT8_C(138), UINT8_C(154), UINT8_C(233), UINT8_C( 46), UINT8_C( 80), UINT8_C( 7), UINT8_C( 58), UINT8_C( 65), UINT8_C(247), UINT8_C(224), UINT8_C( 18), UINT8_C(121), UINT8_C( 59), UINT8_C( 43), UINT8_C( 90), UINT8_C(112), UINT8_C(132), UINT8_C( 84), UINT8_C(155), UINT8_C(223), UINT8_C(103), UINT8_C(119), UINT8_C(114), UINT8_C(181), UINT8_C(165), UINT8_C(115), UINT8_C(112), UINT8_C(241), UINT8_C(153), UINT8_C(156), UINT8_C( 46), UINT8_C( 35), UINT8_C( 54), UINT8_C( 23), UINT8_C( 81), UINT8_C(134), UINT8_C( 30), UINT8_C(140), UINT8_C(200), UINT8_C( 21), UINT8_C(108), UINT8_C(218), UINT8_C(142), UINT8_C(168), UINT8_C( 5), UINT8_C(233) }, { UINT8_C( 24), UINT8_C(137), UINT8_C( 61), UINT8_C(180), UINT8_C(104), UINT8_C(164), UINT8_C( 43), UINT8_C(219), UINT8_C( 89), UINT8_C(208), UINT8_C( 78), UINT8_C(202), UINT8_C(193), UINT8_C(231), UINT8_C(102), UINT8_C(239), UINT8_C( 11), UINT8_C(157), UINT8_C( 6), UINT8_C( 92), UINT8_C( 35), UINT8_C( 36), UINT8_C(232), UINT8_C(235), UINT8_C( 57), UINT8_C( 85), UINT8_C(197), UINT8_C(200), UINT8_C(253), UINT8_C(203), UINT8_C(177), UINT8_C( 21), UINT8_C( 84), UINT8_C(238), UINT8_C(201), UINT8_C(189), UINT8_C(146), UINT8_C(245), UINT8_C(152), UINT8_C(236), UINT8_C(197), UINT8_C(230), UINT8_C(182), UINT8_C(135), UINT8_C(206), UINT8_C( 28), UINT8_C(118), UINT8_C(217), UINT8_C(185), UINT8_C(125), UINT8_C( 53), UINT8_C(221), UINT8_C(161), UINT8_C( 30), UINT8_C(200), UINT8_C(219), UINT8_C(115), UINT8_C(142), UINT8_C(163), UINT8_C(112), UINT8_C( 89), UINT8_C( 84), UINT8_C(133), UINT8_C(173) }, { UINT8_C( 25), UINT8_C(225), UINT8_C( 61), UINT8_C(180), UINT8_C(249), UINT8_C(164), UINT8_C( 91), UINT8_C(219), UINT8_C(220), UINT8_C(208), UINT8_C( 78), UINT8_C(202), UINT8_C(193), UINT8_C(231), UINT8_C(102), UINT8_C(239), UINT8_C( 44), UINT8_C(157), UINT8_C(138), UINT8_C(154), UINT8_C(233), UINT8_C( 46), UINT8_C(232), UINT8_C(235), UINT8_C( 58), UINT8_C( 85), UINT8_C(247), UINT8_C(224), UINT8_C(253), UINT8_C(203), UINT8_C(177), UINT8_C( 43), UINT8_C( 90), UINT8_C(238), UINT8_C(201), UINT8_C(189), UINT8_C(155), UINT8_C(245), UINT8_C(152), UINT8_C(236), UINT8_C(197), UINT8_C(230), UINT8_C(182), UINT8_C(135), UINT8_C(206), UINT8_C(241), UINT8_C(153), UINT8_C(217), UINT8_C(185), UINT8_C(125), UINT8_C( 54), UINT8_C(221), UINT8_C(161), UINT8_C(134), UINT8_C(200), UINT8_C(219), UINT8_C(200), UINT8_C(142), UINT8_C(163), UINT8_C(218), UINT8_C(142), UINT8_C(168), UINT8_C(133), UINT8_C(233) } }, { { UINT8_C( 66), UINT8_C( 79), UINT8_C(106), UINT8_C(212), UINT8_C( 68), UINT8_C( 2), UINT8_C(192), UINT8_C( 9), UINT8_C(233), UINT8_C(118), UINT8_C(144), UINT8_C(183), UINT8_C(147), UINT8_C( 7), UINT8_C(144), UINT8_C( 76), UINT8_C(132), UINT8_C(197), UINT8_C( 41), UINT8_C( 37), UINT8_C(227), UINT8_C(242), UINT8_C( 0), UINT8_C( 86), UINT8_C(128), UINT8_C(163), UINT8_C(198), UINT8_C(217), UINT8_C(247), UINT8_C( 76), UINT8_C(134), UINT8_C( 57), UINT8_C(155), UINT8_C(241), UINT8_C( 14), UINT8_C(223), UINT8_C(243), UINT8_C(206), UINT8_C(232), UINT8_C(220), UINT8_C( 69), UINT8_C(121), UINT8_C(147), UINT8_C(216), UINT8_C(128), UINT8_C( 35), UINT8_C( 36), UINT8_C( 4), UINT8_C(233), UINT8_C( 78), UINT8_C( 41), UINT8_C(204), UINT8_C( 64), UINT8_C( 42), UINT8_C( 35), UINT8_C(192), UINT8_C(205), UINT8_C(233), UINT8_C(153), UINT8_C(197), UINT8_C( 53), UINT8_C( 31), UINT8_C(254), UINT8_C(208) }, { UINT8_C( 16), UINT8_C( 12), UINT8_C(175), UINT8_C( 4), UINT8_C(219), UINT8_C(152), UINT8_C(224), UINT8_C( 32), UINT8_C( 17), UINT8_C(116), UINT8_C(248), UINT8_C(145), UINT8_C(151), UINT8_C( 28), UINT8_C(149), UINT8_C(128), UINT8_C(106), UINT8_C(190), UINT8_C( 77), UINT8_C(170), UINT8_C(232), UINT8_C(112), UINT8_C(106), UINT8_C(182), UINT8_C( 89), UINT8_C( 3), UINT8_C(123), UINT8_C(143), UINT8_C( 35), UINT8_C(121), UINT8_C( 95), UINT8_C( 51), UINT8_C(134), UINT8_C( 15), UINT8_C( 55), UINT8_C( 97), UINT8_C(167), UINT8_C( 24), UINT8_C(129), UINT8_C(184), UINT8_C(140), UINT8_C(121), UINT8_C( 73), UINT8_C( 35), UINT8_C(149), UINT8_C(222), UINT8_C(164), UINT8_C( 0), UINT8_C(156), UINT8_C(241), UINT8_C(170), UINT8_C(133), UINT8_C( 97), UINT8_C( 21), UINT8_C( 59), UINT8_C(186), UINT8_C( 24), UINT8_C(182), UINT8_C( 73), UINT8_C( 59), UINT8_C( 47), UINT8_C(169), UINT8_C(111), UINT8_C(181) }, { UINT8_C( 66), UINT8_C( 79), UINT8_C(175), UINT8_C(212), UINT8_C(219), UINT8_C(152), UINT8_C(224), UINT8_C( 32), UINT8_C(233), UINT8_C(118), UINT8_C(248), UINT8_C(183), UINT8_C(151), UINT8_C( 28), UINT8_C(149), UINT8_C(128), UINT8_C(132), UINT8_C(197), UINT8_C( 77), UINT8_C(170), UINT8_C(232), UINT8_C(242), UINT8_C(106), UINT8_C(182), UINT8_C(128), UINT8_C(163), UINT8_C(198), UINT8_C(217), UINT8_C(247), UINT8_C(121), UINT8_C(134), UINT8_C( 57), UINT8_C(155), UINT8_C(241), UINT8_C( 55), UINT8_C(223), UINT8_C(243), UINT8_C(206), UINT8_C(232), UINT8_C(220), UINT8_C(140), UINT8_C(121), UINT8_C(147), UINT8_C(216), UINT8_C(149), UINT8_C(222), UINT8_C(164), UINT8_C( 4), UINT8_C(233), UINT8_C(241), UINT8_C(170), UINT8_C(204), UINT8_C( 97), UINT8_C( 42), UINT8_C( 59), UINT8_C(192), UINT8_C(205), UINT8_C(233), UINT8_C(153), UINT8_C(197), UINT8_C( 53), UINT8_C(169), UINT8_C(254), UINT8_C(208) } }, { { UINT8_C(184), UINT8_C(166), UINT8_C( 22), UINT8_C( 95), UINT8_C(190), UINT8_C(151), UINT8_C( 23), UINT8_C( 74), UINT8_C( 16), UINT8_C( 96), UINT8_C(110), UINT8_C(166), UINT8_C( 62), UINT8_C( 18), UINT8_C(166), UINT8_C(218), UINT8_C( 3), UINT8_C( 80), UINT8_C( 95), UINT8_C(100), UINT8_C(101), UINT8_C(154), UINT8_C( 30), UINT8_C(126), UINT8_C( 80), UINT8_C(104), UINT8_C(185), UINT8_C(128), UINT8_C( 17), UINT8_C( 40), UINT8_C( 53), UINT8_C(201), UINT8_C(207), UINT8_C( 76), UINT8_C( 40), UINT8_C(141), UINT8_C(227), UINT8_C( 63), UINT8_C(216), UINT8_C(244), UINT8_C(159), UINT8_C( 70), UINT8_C(154), UINT8_C(221), UINT8_C( 88), UINT8_C( 64), UINT8_C(183), UINT8_C( 91), UINT8_C(144), UINT8_C( 23), UINT8_C(191), UINT8_C(246), UINT8_C(177), UINT8_C(221), UINT8_C(116), UINT8_C( 2), UINT8_C( 69), UINT8_C( 45), UINT8_C(130), UINT8_C( 86), UINT8_C( 86), UINT8_C(183), UINT8_C( 31), UINT8_C( 37) }, { UINT8_C( 3), UINT8_C( 71), UINT8_C(178), UINT8_C(231), UINT8_C(134), UINT8_C(138), UINT8_C(219), UINT8_C( 37), UINT8_C(208), UINT8_C(117), UINT8_C( 2), UINT8_C( 40), UINT8_C(181), UINT8_C(186), UINT8_C(131), UINT8_C( 69), UINT8_C(209), UINT8_C( 66), UINT8_C( 59), UINT8_C(130), UINT8_C( 32), UINT8_C(175), UINT8_C(132), UINT8_C(101), UINT8_C(221), UINT8_C( 6), UINT8_C(188), UINT8_C( 51), UINT8_C(190), UINT8_C(219), UINT8_C( 88), UINT8_C(193), UINT8_C( 35), UINT8_C( 10), UINT8_C(168), UINT8_C(169), UINT8_C(149), UINT8_C(131), UINT8_C(207), UINT8_C(101), UINT8_C(248), UINT8_C(209), UINT8_C(142), UINT8_C(173), UINT8_C(139), UINT8_C( 17), UINT8_C(243), UINT8_C( 92), UINT8_C( 84), UINT8_C( 46), UINT8_C(223), UINT8_C(116), UINT8_C(222), UINT8_C( 99), UINT8_C(217), UINT8_C(187), UINT8_C(106), UINT8_C(149), UINT8_C(238), UINT8_C( 40), UINT8_C(113), UINT8_C( 70), UINT8_C(233), UINT8_C(148) }, { UINT8_C(184), UINT8_C(166), UINT8_C(178), UINT8_C(231), UINT8_C(190), UINT8_C(151), UINT8_C(219), UINT8_C( 74), UINT8_C(208), UINT8_C(117), UINT8_C(110), UINT8_C(166), UINT8_C(181), UINT8_C(186), UINT8_C(166), UINT8_C(218), UINT8_C(209), UINT8_C( 80), UINT8_C( 95), UINT8_C(130), UINT8_C(101), UINT8_C(175), UINT8_C(132), UINT8_C(126), UINT8_C(221), UINT8_C(104), UINT8_C(188), UINT8_C(128), UINT8_C(190), UINT8_C(219), UINT8_C( 88), UINT8_C(201), UINT8_C(207), UINT8_C( 76), UINT8_C(168), UINT8_C(169), UINT8_C(227), UINT8_C(131), UINT8_C(216), UINT8_C(244), UINT8_C(248), UINT8_C(209), UINT8_C(154), UINT8_C(221), UINT8_C(139), UINT8_C( 64), UINT8_C(243), UINT8_C( 92), UINT8_C(144), UINT8_C( 46), UINT8_C(223), UINT8_C(246), UINT8_C(222), UINT8_C(221), UINT8_C(217), UINT8_C(187), UINT8_C(106), UINT8_C(149), UINT8_C(238), UINT8_C( 86), UINT8_C(113), UINT8_C(183), UINT8_C(233), UINT8_C(148) } }, { { UINT8_C( 80), UINT8_C(146), UINT8_C( 61), UINT8_C(229), UINT8_C( 21), UINT8_C( 12), UINT8_C( 75), UINT8_C( 14), UINT8_C(222), UINT8_C(217), UINT8_C(187), UINT8_C(105), UINT8_C(234), UINT8_C(174), UINT8_C(198), UINT8_C( 62), UINT8_C(221), UINT8_C(165), UINT8_C(178), UINT8_C(187), UINT8_C( 8), UINT8_C(140), UINT8_C(118), UINT8_C(114), UINT8_C( 33), UINT8_C(100), UINT8_C(154), UINT8_C(146), UINT8_C(170), UINT8_C(132), UINT8_C( 38), UINT8_C(250), UINT8_C( 22), UINT8_C(100), UINT8_C(224), UINT8_C( 43), UINT8_C(112), UINT8_C( 43), UINT8_C( 57), UINT8_C( 78), UINT8_C( 4), UINT8_C(245), UINT8_C(184), UINT8_C(238), UINT8_C(163), UINT8_C(126), UINT8_C( 45), UINT8_C(128), UINT8_C( 35), UINT8_C(223), UINT8_C( 59), UINT8_C( 43), UINT8_C(107), UINT8_C(177), UINT8_C(158), UINT8_C(141), UINT8_C( 21), UINT8_C( 56), UINT8_C( 31), UINT8_C(191), UINT8_C(188), UINT8_C( 70), UINT8_C(186), UINT8_C(210) }, { UINT8_C(170), UINT8_C(154), UINT8_C(254), UINT8_C( 26), UINT8_C(197), UINT8_C( 55), UINT8_C(105), UINT8_C(201), UINT8_C( 44), UINT8_C( 33), UINT8_C(183), UINT8_C(208), UINT8_C(159), UINT8_C(228), UINT8_C( 80), UINT8_C(194), UINT8_C(196), UINT8_C(140), UINT8_C(237), UINT8_C( 47), UINT8_C( 61), UINT8_C(139), UINT8_C(188), UINT8_C( 83), UINT8_C(196), UINT8_C(220), UINT8_C( 18), UINT8_C(128), UINT8_C( 34), UINT8_C(204), UINT8_C( 83), UINT8_C(204), UINT8_C(102), UINT8_C( 81), UINT8_C(230), UINT8_C( 43), UINT8_C(136), UINT8_C( 79), UINT8_C(244), UINT8_C(181), UINT8_C(112), UINT8_C(172), UINT8_C(133), UINT8_C( 15), UINT8_C(144), UINT8_C(213), UINT8_C(209), UINT8_C( 84), UINT8_C( 97), UINT8_C(191), UINT8_C(132), UINT8_C(159), UINT8_C( 74), UINT8_C( 64), UINT8_C(242), UINT8_C( 14), UINT8_C( 28), UINT8_C( 4), UINT8_C(143), UINT8_C( 62), UINT8_C(209), UINT8_C(226), UINT8_C( 10), UINT8_C( 55) }, { UINT8_C(170), UINT8_C(154), UINT8_C(254), UINT8_C(229), UINT8_C(197), UINT8_C( 55), UINT8_C(105), UINT8_C(201), UINT8_C(222), UINT8_C(217), UINT8_C(187), UINT8_C(208), UINT8_C(234), UINT8_C(228), UINT8_C(198), UINT8_C(194), UINT8_C(221), UINT8_C(165), UINT8_C(237), UINT8_C(187), UINT8_C( 61), UINT8_C(140), UINT8_C(188), UINT8_C(114), UINT8_C(196), UINT8_C(220), UINT8_C(154), UINT8_C(146), UINT8_C(170), UINT8_C(204), UINT8_C( 83), UINT8_C(250), UINT8_C(102), UINT8_C(100), UINT8_C(230), UINT8_C( 43), UINT8_C(136), UINT8_C( 79), UINT8_C(244), UINT8_C(181), UINT8_C(112), UINT8_C(245), UINT8_C(184), UINT8_C(238), UINT8_C(163), UINT8_C(213), UINT8_C(209), UINT8_C(128), UINT8_C( 97), UINT8_C(223), UINT8_C(132), UINT8_C(159), UINT8_C(107), UINT8_C(177), UINT8_C(242), UINT8_C(141), UINT8_C( 28), UINT8_C( 56), UINT8_C(143), UINT8_C(191), UINT8_C(209), UINT8_C(226), UINT8_C(186), UINT8_C(210) } }, { { UINT8_C( 51), UINT8_C(241), UINT8_C( 99), UINT8_C(187), UINT8_C( 64), UINT8_C( 87), UINT8_C(112), UINT8_C(177), UINT8_C( 3), UINT8_C(245), UINT8_C(192), UINT8_C(148), UINT8_C(203), UINT8_C(146), UINT8_C(232), UINT8_C( 44), UINT8_C( 81), UINT8_C(108), UINT8_C(203), UINT8_C(155), UINT8_C(173), UINT8_C(189), UINT8_C(170), UINT8_C(201), UINT8_C(194), UINT8_C( 57), UINT8_C( 8), UINT8_C(147), UINT8_C( 27), UINT8_C( 18), UINT8_C(202), UINT8_C( 78), UINT8_C( 3), UINT8_C( 45), UINT8_C( 9), UINT8_C( 68), UINT8_C(133), UINT8_C(122), UINT8_C(245), UINT8_C(136), UINT8_C(111), UINT8_C(181), UINT8_C( 28), UINT8_C( 58), UINT8_C( 71), UINT8_C( 5), UINT8_C(103), UINT8_C(152), UINT8_C(113), UINT8_C( 50), UINT8_C( 52), UINT8_C( 30), UINT8_C(240), UINT8_C(222), UINT8_C(232), UINT8_C(178), UINT8_C( 23), UINT8_C(240), UINT8_C( 69), UINT8_C( 50), UINT8_C( 2), UINT8_C( 15), UINT8_C(128), UINT8_C( 6) }, { UINT8_C( 61), UINT8_C(137), UINT8_C( 74), UINT8_C(194), UINT8_C( 3), UINT8_C( 63), UINT8_C( 74), UINT8_C(115), UINT8_C(244), UINT8_C(103), UINT8_C(173), UINT8_C( 60), UINT8_C(108), UINT8_C( 20), UINT8_C(212), UINT8_C(221), UINT8_C( 71), UINT8_C( 8), UINT8_C(252), UINT8_C( 55), UINT8_C(230), UINT8_C(228), UINT8_C(233), UINT8_C(253), UINT8_C(212), UINT8_C( 46), UINT8_C( 47), UINT8_C(214), UINT8_C( 61), UINT8_C(175), UINT8_C(220), UINT8_C(122), UINT8_C( 57), UINT8_C( 38), UINT8_C( 60), UINT8_C( 60), UINT8_C(101), UINT8_C(135), UINT8_C(175), UINT8_C( 90), UINT8_C(238), UINT8_C( 93), UINT8_C(150), UINT8_C( 90), UINT8_C(113), UINT8_C(106), UINT8_C( 55), UINT8_C(184), UINT8_C(115), UINT8_C( 51), UINT8_C(239), UINT8_C( 89), UINT8_C( 23), UINT8_C(216), UINT8_C( 87), UINT8_C(235), UINT8_C( 6), UINT8_C(134), UINT8_C(194), UINT8_C( 68), UINT8_C( 54), UINT8_C(158), UINT8_C(190), UINT8_C(111) }, { UINT8_C( 61), UINT8_C(241), UINT8_C( 99), UINT8_C(194), UINT8_C( 64), UINT8_C( 87), UINT8_C(112), UINT8_C(177), UINT8_C(244), UINT8_C(245), UINT8_C(192), UINT8_C(148), UINT8_C(203), UINT8_C(146), UINT8_C(232), UINT8_C(221), UINT8_C( 81), UINT8_C(108), UINT8_C(252), UINT8_C(155), UINT8_C(230), UINT8_C(228), UINT8_C(233), UINT8_C(253), UINT8_C(212), UINT8_C( 57), UINT8_C( 47), UINT8_C(214), UINT8_C( 61), UINT8_C(175), UINT8_C(220), UINT8_C(122), UINT8_C( 57), UINT8_C( 45), UINT8_C( 60), UINT8_C( 68), UINT8_C(133), UINT8_C(135), UINT8_C(245), UINT8_C(136), UINT8_C(238), UINT8_C(181), UINT8_C(150), UINT8_C( 90), UINT8_C(113), UINT8_C(106), UINT8_C(103), UINT8_C(184), UINT8_C(115), UINT8_C( 51), UINT8_C(239), UINT8_C( 89), UINT8_C(240), UINT8_C(222), UINT8_C(232), UINT8_C(235), UINT8_C( 23), UINT8_C(240), UINT8_C(194), UINT8_C( 68), UINT8_C( 54), UINT8_C(158), UINT8_C(190), UINT8_C(111) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_max_epu8(a, b); simde_test_x86_assert_equal_u8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_max_epu8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int8_t src[64]; const simde__mmask64 k; const uint8_t a[64]; const uint8_t b[64]; const uint8_t r[64]; } test_vec[] = { { { -INT8_C( 111), INT8_C( 15), INT8_C( 4), -INT8_C( 59), INT8_C( 83), INT8_C( 68), -INT8_C( 114), INT8_C( 0), -INT8_C( 53), INT8_C( 19), INT8_C( 68), -INT8_C( 50), INT8_C( 36), -INT8_C( 111), -INT8_C( 120), INT8_C( 56), INT8_C( 81), -INT8_C( 111), INT8_C( 12), -INT8_C( 107), -INT8_C( 59), -INT8_C( 106), -INT8_C( 82), -INT8_C( 55), -INT8_C( 82), -INT8_C( 56), -INT8_C( 95), INT8_C( 57), -INT8_C( 111), -INT8_C( 99), INT8_C( 95), INT8_C( 34), -INT8_C( 84), INT8_C( 99), -INT8_C( 24), -INT8_C( 1), -INT8_C( 89), INT8_C( 118), -INT8_C( 1), INT8_C( 114), -INT8_C( 118), INT8_C( 67), INT8_C( 64), -INT8_C( 82), -INT8_C( 44), -INT8_C( 56), -INT8_C( 25), INT8_C( 38), INT8_C( 90), -INT8_C( 13), -INT8_C( 69), INT8_C( 31), -INT8_C( 118), INT8_C( 106), -INT8_C( 24), INT8_C( 56), INT8_C( 50), -INT8_C( 119), INT8_C( 113), -INT8_C( 60), INT8_C( 38), -INT8_C( 48), -INT8_C( 26), -INT8_C( 46) }, UINT64_C(14938106012493794868), { UINT8_C( 20), UINT8_C(142), UINT8_C(125), UINT8_C(232), UINT8_C( 87), UINT8_C(100), UINT8_C( 14), UINT8_C(177), UINT8_C( 88), UINT8_C(202), UINT8_C(208), UINT8_C(226), UINT8_C( 52), UINT8_C(184), UINT8_C( 26), UINT8_C(102), UINT8_C( 65), UINT8_C(139), UINT8_C( 42), UINT8_C(104), UINT8_C( 91), UINT8_C( 17), UINT8_C( 58), UINT8_C(143), UINT8_C(223), UINT8_C( 12), UINT8_C(107), UINT8_C( 36), UINT8_C(220), UINT8_C(185), UINT8_C(243), UINT8_C(240), UINT8_C( 71), UINT8_C(113), UINT8_C(217), UINT8_C(158), UINT8_C(213), UINT8_C(231), UINT8_C( 79), UINT8_C( 45), UINT8_C(177), UINT8_C( 31), UINT8_C( 15), UINT8_C(229), UINT8_C(215), UINT8_C( 41), UINT8_C( 76), UINT8_C( 25), UINT8_C(180), UINT8_C(118), UINT8_C(129), UINT8_C( 16), UINT8_C(135), UINT8_C(187), UINT8_C(159), UINT8_C(103), UINT8_C(199), UINT8_C( 10), UINT8_C(139), UINT8_C(164), UINT8_C(195), UINT8_C(127), UINT8_C(148), UINT8_C( 11) }, { UINT8_C(240), UINT8_C(109), UINT8_C(169), UINT8_C(197), UINT8_C( 85), UINT8_C(249), UINT8_C(243), UINT8_C( 6), UINT8_C( 24), UINT8_C( 2), UINT8_C(236), UINT8_C(240), UINT8_C( 44), UINT8_C( 56), UINT8_C( 9), UINT8_C(224), UINT8_C(174), UINT8_C(138), UINT8_C(240), UINT8_C( 54), UINT8_C( 69), UINT8_C(144), UINT8_C(157), UINT8_C( 13), UINT8_C(154), UINT8_C( 40), UINT8_C(177), UINT8_C( 94), UINT8_C(167), UINT8_C( 69), UINT8_C(105), UINT8_C(151), UINT8_C(179), UINT8_C( 18), UINT8_C( 93), UINT8_C( 8), UINT8_C( 11), UINT8_C( 80), UINT8_C( 14), UINT8_C( 36), UINT8_C( 82), UINT8_C(250), UINT8_C( 20), UINT8_C(126), UINT8_C( 50), UINT8_C( 29), UINT8_C( 95), UINT8_C(225), UINT8_C(167), UINT8_C( 79), UINT8_C( 23), UINT8_C(236), UINT8_C(223), UINT8_C(180), UINT8_C(249), UINT8_C(122), UINT8_C(220), UINT8_C(170), UINT8_C(216), UINT8_C(132), UINT8_C(240), UINT8_C( 65), UINT8_C( 27), UINT8_C(163) }, { UINT8_C(145), UINT8_C( 15), UINT8_C(169), UINT8_C(197), UINT8_C( 87), UINT8_C(249), UINT8_C(142), UINT8_C( 0), UINT8_C(203), UINT8_C(202), UINT8_C(236), UINT8_C(240), UINT8_C( 36), UINT8_C(145), UINT8_C( 26), UINT8_C(224), UINT8_C(174), UINT8_C(145), UINT8_C( 12), UINT8_C(149), UINT8_C( 91), UINT8_C(150), UINT8_C(157), UINT8_C(143), UINT8_C(223), UINT8_C( 40), UINT8_C(161), UINT8_C( 94), UINT8_C(220), UINT8_C(157), UINT8_C(243), UINT8_C(240), UINT8_C(179), UINT8_C( 99), UINT8_C(217), UINT8_MAX, UINT8_C(167), UINT8_C(118), UINT8_C( 79), UINT8_C(114), UINT8_C(138), UINT8_C( 67), UINT8_C( 64), UINT8_C(174), UINT8_C(215), UINT8_C(200), UINT8_C( 95), UINT8_C(225), UINT8_C( 90), UINT8_C(118), UINT8_C(129), UINT8_C(236), UINT8_C(138), UINT8_C(106), UINT8_C(249), UINT8_C( 56), UINT8_C(220), UINT8_C(170), UINT8_C(216), UINT8_C(164), UINT8_C( 38), UINT8_C(208), UINT8_C(148), UINT8_C(163) } }, { { INT8_C( 83), INT8_C( 120), -INT8_C( 85), INT8_C( 95), -INT8_C( 56), -INT8_C( 71), -INT8_C( 125), INT8_C( 27), -INT8_C( 76), -INT8_C( 105), -INT8_C( 103), -INT8_C( 26), -INT8_C( 76), -INT8_C( 8), -INT8_C( 57), INT8_C( 91), INT8_C( 72), -INT8_C( 34), INT8_C( 71), INT8_C( 39), -INT8_C( 110), INT8_C( 65), -INT8_C( 95), INT8_C( 111), -INT8_C( 21), INT8_C( 121), -INT8_C( 13), -INT8_C( 37), -INT8_C( 70), INT8_C( 14), INT8_C( 126), INT8_C( 14), -INT8_C( 121), INT8_C( 41), INT8_C( 109), INT8_C( 79), -INT8_C( 29), -INT8_C( 16), INT8_C( 106), -INT8_C( 105), -INT8_C( 121), INT8_C( 4), INT8_C( 125), INT8_C( 59), -INT8_C( 4), INT8_C( 69), -INT8_C( 106), INT8_C( 68), INT8_C( 35), -INT8_C( 35), INT8_C( 108), -INT8_C( 74), INT8_C( 30), INT8_C( 13), INT8_C( 37), INT8_C( 10), -INT8_C( 121), INT8_C( 24), -INT8_C( 27), INT8_C( 65), INT8_C( 38), INT8_C( 100), INT8_C( 79), -INT8_C( 83) }, UINT64_C( 3677021611099012237), { UINT8_C(107), UINT8_C(133), UINT8_C(110), UINT8_C(104), UINT8_C(202), UINT8_C( 4), UINT8_C(172), UINT8_C(237), UINT8_C(226), UINT8_C( 24), UINT8_C(163), UINT8_C( 0), UINT8_C( 38), UINT8_C(200), UINT8_C( 10), UINT8_C(173), UINT8_C(224), UINT8_C(240), UINT8_C(238), UINT8_C( 7), UINT8_C( 84), UINT8_C( 62), UINT8_C(180), UINT8_C(225), UINT8_C(250), UINT8_C(177), UINT8_C( 82), UINT8_C(167), UINT8_C( 25), UINT8_C( 89), UINT8_C(218), UINT8_C(132), UINT8_C(222), UINT8_C( 73), UINT8_C(236), UINT8_C(168), UINT8_C( 77), UINT8_C(153), UINT8_C(150), UINT8_C( 47), UINT8_C(177), UINT8_C( 57), UINT8_C( 48), UINT8_C(215), UINT8_C( 2), UINT8_C( 58), UINT8_C(132), UINT8_C(226), UINT8_C( 42), UINT8_C(115), UINT8_C(233), UINT8_C(126), UINT8_C(177), UINT8_C(158), UINT8_C( 96), UINT8_C(171), UINT8_C( 79), UINT8_C(178), UINT8_C( 82), UINT8_C(104), UINT8_C( 11), UINT8_C( 45), UINT8_C(237), UINT8_C(234) }, { UINT8_C(118), UINT8_C(217), UINT8_C(146), UINT8_C(195), UINT8_C(114), UINT8_C( 40), UINT8_C(243), UINT8_C( 36), UINT8_C( 98), UINT8_C( 35), UINT8_C(251), UINT8_C(100), UINT8_C( 93), UINT8_C(128), UINT8_C( 70), UINT8_C(136), UINT8_C(243), UINT8_C( 48), UINT8_C( 6), UINT8_C(164), UINT8_C(206), UINT8_C(102), UINT8_C( 79), UINT8_C( 29), UINT8_C( 24), UINT8_C(162), UINT8_C(134), UINT8_C( 36), UINT8_C(207), UINT8_C(115), UINT8_C( 14), UINT8_C( 69), UINT8_C( 76), UINT8_C(160), UINT8_C( 8), UINT8_C(191), UINT8_C(201), UINT8_C(251), UINT8_C(227), UINT8_C( 43), UINT8_C( 30), UINT8_C(222), UINT8_C(143), UINT8_C(124), UINT8_C( 94), UINT8_C(213), UINT8_C( 4), UINT8_C( 81), UINT8_C( 5), UINT8_C( 10), UINT8_C(245), UINT8_C(211), UINT8_C(113), UINT8_C( 69), UINT8_C(241), UINT8_C(137), UINT8_C(231), UINT8_C(119), UINT8_C(173), UINT8_C(182), UINT8_C(234), UINT8_C(187), UINT8_C(251), UINT8_C( 54) }, { UINT8_C(118), UINT8_C(120), UINT8_C(146), UINT8_C(195), UINT8_C(200), UINT8_C(185), UINT8_C(131), UINT8_C(237), UINT8_C(180), UINT8_C(151), UINT8_C(251), UINT8_C(100), UINT8_C( 93), UINT8_C(200), UINT8_C(199), UINT8_C(173), UINT8_C(243), UINT8_C(222), UINT8_C(238), UINT8_C(164), UINT8_C(206), UINT8_C(102), UINT8_C(180), UINT8_C(225), UINT8_C(235), UINT8_C(121), UINT8_C(243), UINT8_C(219), UINT8_C(207), UINT8_C(115), UINT8_C(218), UINT8_C( 14), UINT8_C(135), UINT8_C( 41), UINT8_C(236), UINT8_C(191), UINT8_C(227), UINT8_C(251), UINT8_C(106), UINT8_C( 47), UINT8_C(177), UINT8_C(222), UINT8_C(143), UINT8_C( 59), UINT8_C(252), UINT8_C(213), UINT8_C(132), UINT8_C( 68), UINT8_C( 42), UINT8_C(115), UINT8_C(245), UINT8_C(182), UINT8_C( 30), UINT8_C( 13), UINT8_C( 37), UINT8_C( 10), UINT8_C(231), UINT8_C(178), UINT8_C(229), UINT8_C( 65), UINT8_C(234), UINT8_C(187), UINT8_C( 79), UINT8_C(173) } }, { { INT8_C( 92), INT8_C( 3), -INT8_C( 11), INT8_C( 37), -INT8_C( 1), -INT8_C( 40), INT8_C( 80), INT8_C( 29), -INT8_C( 73), -INT8_C( 33), -INT8_C( 103), INT8_C( 21), -INT8_C( 76), -INT8_C( 99), INT8_C( 103), -INT8_C( 70), -INT8_C( 88), INT8_C( 92), -INT8_C( 115), INT8_C( 25), -INT8_C( 95), INT8_C( 126), -INT8_C( 94), -INT8_C( 120), -INT8_C( 11), INT8_C( 80), INT8_C( 62), -INT8_C( 33), INT8_C( 11), INT8_C( 57), INT8_C( 22), INT8_C( 103), INT8_C( 61), INT8_C( 11), -INT8_C( 116), INT8_C( 60), -INT8_C( 28), -INT8_C( 36), INT8_C( 89), -INT8_C( 101), -INT8_C( 69), -INT8_C( 13), -INT8_C( 80), INT8_C( 112), -INT8_C( 112), INT8_C( 23), INT8_C( 42), INT8_C( 56), INT8_C( 116), -INT8_C( 73), INT8_C( 81), INT8_C( 21), INT8_C( 54), -INT8_C( 12), -INT8_C( 98), INT8_C( 43), INT8_C( 68), -INT8_C( 36), INT8_C( 11), INT8_C( 79), INT8_C( 22), INT8_C( 33), -INT8_C( 73), INT8_C( 83) }, UINT64_C(15829000539738161964), { UINT8_C(219), UINT8_C( 92), UINT8_C( 75), UINT8_C(108), UINT8_C(115), UINT8_C(117), UINT8_C(164), UINT8_C(231), UINT8_C( 45), UINT8_C(246), UINT8_C(253), UINT8_C( 99), UINT8_C(234), UINT8_C(155), UINT8_C(142), UINT8_C( 46), UINT8_C(119), UINT8_C(153), UINT8_C(125), UINT8_C(141), UINT8_C(186), UINT8_C( 52), UINT8_C(224), UINT8_C(231), UINT8_C(120), UINT8_C(111), UINT8_C(247), UINT8_C(152), UINT8_C( 88), UINT8_C(163), UINT8_C(115), UINT8_C( 51), UINT8_MAX, UINT8_C(191), UINT8_C(159), UINT8_C(114), UINT8_C( 52), UINT8_C( 68), UINT8_C( 90), UINT8_C( 97), UINT8_C( 58), UINT8_C( 87), UINT8_C(196), UINT8_C( 36), UINT8_C(242), UINT8_C( 83), UINT8_C( 82), UINT8_C(105), UINT8_C(236), UINT8_C(207), UINT8_C(247), UINT8_C(167), UINT8_C( 4), UINT8_C(215), UINT8_C(142), UINT8_C(124), UINT8_C( 71), UINT8_C(133), UINT8_C( 20), UINT8_C(159), UINT8_C( 40), UINT8_C(135), UINT8_C(210), UINT8_C( 39) }, { UINT8_C( 70), UINT8_C(114), UINT8_C(154), UINT8_C(123), UINT8_C(182), UINT8_C(244), UINT8_C(220), UINT8_C(240), UINT8_C( 75), UINT8_C(161), UINT8_C( 20), UINT8_C( 61), UINT8_C(244), UINT8_C(102), UINT8_C(166), UINT8_C(224), UINT8_C( 53), UINT8_C(157), UINT8_C(135), UINT8_C( 57), UINT8_C(117), UINT8_C( 21), UINT8_C(181), UINT8_C(188), UINT8_C(155), UINT8_C(201), UINT8_C( 91), UINT8_C(195), UINT8_C( 81), UINT8_C( 45), UINT8_C(235), UINT8_C(151), UINT8_C(159), UINT8_C(133), UINT8_C( 18), UINT8_C( 85), UINT8_C(121), UINT8_C(239), UINT8_C( 69), UINT8_C(196), UINT8_C(144), UINT8_C( 89), UINT8_C( 1), UINT8_C(132), UINT8_C(191), UINT8_C(167), UINT8_C(100), UINT8_C(245), UINT8_C( 69), UINT8_C(236), UINT8_C( 46), UINT8_C(186), UINT8_C( 1), UINT8_C(228), UINT8_C(118), UINT8_C(156), UINT8_C(173), UINT8_C(209), UINT8_C( 96), UINT8_C(254), UINT8_C(254), UINT8_C( 75), UINT8_C(150), UINT8_C(158) }, { UINT8_C( 92), UINT8_C( 3), UINT8_C(154), UINT8_C(123), UINT8_MAX, UINT8_C(244), UINT8_C( 80), UINT8_C( 29), UINT8_C( 75), UINT8_C(246), UINT8_C(153), UINT8_C( 21), UINT8_C(180), UINT8_C(157), UINT8_C(166), UINT8_C(186), UINT8_C(119), UINT8_C(157), UINT8_C(135), UINT8_C(141), UINT8_C(161), UINT8_C(126), UINT8_C(162), UINT8_C(231), UINT8_C(245), UINT8_C( 80), UINT8_C( 62), UINT8_C(223), UINT8_C( 88), UINT8_C( 57), UINT8_C( 22), UINT8_C(103), UINT8_C( 61), UINT8_C( 11), UINT8_C(140), UINT8_C( 60), UINT8_C(228), UINT8_C(239), UINT8_C( 89), UINT8_C(155), UINT8_C(187), UINT8_C(243), UINT8_C(176), UINT8_C(132), UINT8_C(144), UINT8_C(167), UINT8_C(100), UINT8_C(245), UINT8_C(236), UINT8_C(236), UINT8_C( 81), UINT8_C(186), UINT8_C( 54), UINT8_C(228), UINT8_C(158), UINT8_C(156), UINT8_C(173), UINT8_C(209), UINT8_C( 11), UINT8_C(254), UINT8_C(254), UINT8_C( 33), UINT8_C(210), UINT8_C(158) } }, { { -INT8_C( 48), -INT8_C( 88), -INT8_C( 13), INT8_C( 73), -INT8_C( 105), INT8_C( 57), INT8_C( 13), INT8_C( 39), -INT8_C( 110), INT8_C( 14), -INT8_C( 85), INT8_C( 82), -INT8_C( 75), INT8_C( 16), INT8_C( 71), -INT8_C( 6), -INT8_C( 4), INT8_C( 117), -INT8_C( 76), -INT8_C( 3), INT8_C( 89), INT8_C( 42), -INT8_C( 102), INT8_C( 7), -INT8_C( 5), -INT8_C( 6), INT8_C( 5), -INT8_C( 6), INT8_C( 69), -INT8_C( 101), -INT8_C( 104), INT8_C( 21), INT8_C( 68), -INT8_C( 117), INT8_C( 94), -INT8_C( 37), -INT8_C( 60), INT8_C( 107), INT8_C( 3), INT8_C( 87), INT8_C( 121), -INT8_C( 82), -INT8_C( 87), INT8_C( 46), -INT8_C( 66), -INT8_C( 16), INT8_C( 41), -INT8_C( 70), INT8_C( 101), -INT8_C( 35), -INT8_C( 72), -INT8_C( 65), INT8_C( 8), INT8_C( 82), -INT8_C( 58), INT8_C( 3), INT8_C( 76), -INT8_C( 53), -INT8_C( 3), -INT8_C( 111), INT8_C( 103), -INT8_C( 107), -INT8_C( 90), -INT8_C( 85) }, UINT64_C(16734401429087061025), { UINT8_C( 56), UINT8_C(229), UINT8_C( 22), UINT8_C(246), UINT8_C(213), UINT8_C( 63), UINT8_C(177), UINT8_C( 59), UINT8_C( 29), UINT8_C(105), UINT8_C(250), UINT8_C( 37), UINT8_C(187), UINT8_C(192), UINT8_C( 40), UINT8_C( 7), UINT8_C(139), UINT8_C( 38), UINT8_C(152), UINT8_C(242), UINT8_C(187), UINT8_C( 62), UINT8_C(157), UINT8_C(220), UINT8_C( 66), UINT8_C( 36), UINT8_C(194), UINT8_C(177), UINT8_C(173), UINT8_C(254), UINT8_C(153), UINT8_C(229), UINT8_C(228), UINT8_C(175), UINT8_C(220), UINT8_C(185), UINT8_C(239), UINT8_C(141), UINT8_C(244), UINT8_C( 12), UINT8_C(246), UINT8_C(238), UINT8_C( 49), UINT8_C(177), UINT8_C(174), UINT8_C( 89), UINT8_C(184), UINT8_C( 58), UINT8_C(127), UINT8_C( 80), UINT8_C( 44), UINT8_C( 59), UINT8_C(142), UINT8_C(202), UINT8_C( 23), UINT8_C(208), UINT8_C(238), UINT8_C(217), UINT8_C(129), UINT8_C(155), UINT8_C(216), UINT8_C( 26), UINT8_C(129), UINT8_C(188) }, { UINT8_C(201), UINT8_C( 93), UINT8_C(117), UINT8_C(184), UINT8_C(234), UINT8_C(106), UINT8_C(196), UINT8_C(224), UINT8_C( 88), UINT8_C(245), UINT8_C(145), UINT8_C( 7), UINT8_C( 79), UINT8_C( 73), UINT8_C( 65), UINT8_C(206), UINT8_C(153), UINT8_C(109), UINT8_C( 9), UINT8_C( 39), UINT8_C( 55), UINT8_C( 33), UINT8_C(247), UINT8_C( 37), UINT8_C(250), UINT8_C(120), UINT8_C(193), UINT8_C(210), UINT8_C(146), UINT8_C( 66), UINT8_C(142), UINT8_C( 91), UINT8_C(159), UINT8_C( 4), UINT8_C( 20), UINT8_C(137), UINT8_C(110), UINT8_C(216), UINT8_C(105), UINT8_C(198), UINT8_C(206), UINT8_C(250), UINT8_C(205), UINT8_C( 29), UINT8_C( 67), UINT8_C( 14), UINT8_C(235), UINT8_C(220), UINT8_C(124), UINT8_C(245), UINT8_C( 3), UINT8_C(179), UINT8_C( 22), UINT8_C(250), UINT8_C(217), UINT8_C( 16), UINT8_C(114), UINT8_C(154), UINT8_C(227), UINT8_C( 4), UINT8_C(220), UINT8_C(113), UINT8_C( 95), UINT8_C(123) }, { UINT8_C(201), UINT8_C(168), UINT8_C(243), UINT8_C( 73), UINT8_C(151), UINT8_C(106), UINT8_C( 13), UINT8_C( 39), UINT8_C(146), UINT8_C( 14), UINT8_C(250), UINT8_C( 82), UINT8_C(181), UINT8_C( 16), UINT8_C( 71), UINT8_C(250), UINT8_C(252), UINT8_C(109), UINT8_C(152), UINT8_C(253), UINT8_C( 89), UINT8_C( 42), UINT8_C(154), UINT8_C(220), UINT8_C(250), UINT8_C(250), UINT8_C(194), UINT8_C(250), UINT8_C( 69), UINT8_C(254), UINT8_C(153), UINT8_C(229), UINT8_C(228), UINT8_C(175), UINT8_C(220), UINT8_C(185), UINT8_C(196), UINT8_C(216), UINT8_C(244), UINT8_C( 87), UINT8_C(246), UINT8_C(174), UINT8_C(169), UINT8_C(177), UINT8_C(190), UINT8_C(240), UINT8_C( 41), UINT8_C(220), UINT8_C(101), UINT8_C(221), UINT8_C( 44), UINT8_C(179), UINT8_C(142), UINT8_C(250), UINT8_C(198), UINT8_C( 3), UINT8_C( 76), UINT8_C(203), UINT8_C(253), UINT8_C(155), UINT8_C(103), UINT8_C(113), UINT8_C(129), UINT8_C(188) } }, { { INT8_C( 117), INT8_C( 115), INT8_C( 4), -INT8_C( 29), INT8_C( 76), INT8_C( 109), -INT8_C( 86), INT8_C( 26), INT8_C( 103), INT8_C( 119), INT8_C( 55), -INT8_C( 86), -INT8_C( 122), INT8_C( 34), -INT8_C( 122), INT8_C( 2), INT8_C( 23), -INT8_C( 119), -INT8_C( 75), INT8_C( 45), -INT8_C( 125), -INT8_C( 114), INT8_C( 62), -INT8_C( 11), INT8_C( 40), INT8_C( 33), -INT8_C( 7), INT8_C( 4), -INT8_C( 110), INT8_C( 88), INT8_MAX, INT8_C( 8), -INT8_C( 52), -INT8_C( 125), -INT8_C( 21), INT8_C( 24), -INT8_C( 16), -INT8_C( 107), INT8_C( 50), INT8_C( 87), INT8_C( 13), INT8_C( 105), INT8_C( 1), -INT8_C( 109), -INT8_C( 117), -INT8_C( 121), -INT8_C( 107), -INT8_C( 93), INT8_C( 16), INT8_C( 74), -INT8_C( 48), -INT8_C( 109), -INT8_C( 39), INT8_C( 14), -INT8_C( 120), INT8_C( 1), INT8_C( 47), -INT8_C( 127), INT8_C( 6), -INT8_C( 62), -INT8_C( 38), -INT8_C( 123), -INT8_C( 54), -INT8_C( 90) }, UINT64_C( 6364131957554459913), { UINT8_C( 89), UINT8_C( 82), UINT8_C(235), UINT8_C(228), UINT8_C(218), UINT8_C(128), UINT8_C(135), UINT8_C(234), UINT8_C(202), UINT8_C( 88), UINT8_C(126), UINT8_C(163), UINT8_C(102), UINT8_C( 6), UINT8_C(165), UINT8_C(150), UINT8_C(136), UINT8_C(171), UINT8_C( 88), UINT8_C( 98), UINT8_C( 48), UINT8_C( 34), UINT8_C( 8), UINT8_C( 57), UINT8_C(215), UINT8_C(198), UINT8_C( 51), UINT8_C( 34), UINT8_C(182), UINT8_C(132), UINT8_C(122), UINT8_C( 15), UINT8_C(214), UINT8_C(101), UINT8_C(243), UINT8_C(176), UINT8_C(229), UINT8_C(123), UINT8_C(155), UINT8_C(176), UINT8_C(211), UINT8_C( 25), UINT8_C( 83), UINT8_C( 57), UINT8_C( 31), UINT8_C(248), UINT8_C(207), UINT8_C(167), UINT8_C(163), UINT8_C( 39), UINT8_C( 9), UINT8_C(212), UINT8_C( 73), UINT8_C( 17), UINT8_C( 13), UINT8_C( 33), UINT8_C(215), UINT8_C( 64), UINT8_C( 67), UINT8_C(141), UINT8_C(196), UINT8_C(190), UINT8_C(156), UINT8_C(155) }, { UINT8_C( 35), UINT8_C(144), UINT8_C( 75), UINT8_C( 9), UINT8_C( 11), UINT8_C(230), UINT8_C(185), UINT8_C(222), UINT8_MAX, UINT8_C( 12), UINT8_C( 23), UINT8_C( 31), UINT8_C( 5), UINT8_C(231), UINT8_C(198), UINT8_C(168), UINT8_C( 14), UINT8_C(208), UINT8_C(124), UINT8_C( 88), UINT8_C(225), UINT8_C(138), UINT8_C(121), UINT8_C(185), UINT8_C(202), UINT8_C(188), UINT8_C( 70), UINT8_C(143), UINT8_C(122), UINT8_C(227), UINT8_C( 42), UINT8_C(158), UINT8_C(115), UINT8_C(117), UINT8_C(167), UINT8_C(126), UINT8_C( 92), UINT8_C( 96), UINT8_C( 92), UINT8_C( 91), UINT8_C(108), UINT8_C(115), UINT8_C(122), UINT8_C(113), UINT8_C( 90), UINT8_C( 65), UINT8_C( 26), UINT8_C(105), UINT8_C( 17), UINT8_C(150), UINT8_C(193), UINT8_C(242), UINT8_C( 32), UINT8_C( 58), UINT8_C(171), UINT8_C(235), UINT8_C(246), UINT8_C(242), UINT8_C(122), UINT8_C(113), UINT8_C(213), UINT8_C(164), UINT8_C( 15), UINT8_C( 72) }, { UINT8_C( 89), UINT8_C(115), UINT8_C( 4), UINT8_C(228), UINT8_C( 76), UINT8_C(109), UINT8_C(170), UINT8_C( 26), UINT8_MAX, UINT8_C(119), UINT8_C(126), UINT8_C(170), UINT8_C(102), UINT8_C(231), UINT8_C(134), UINT8_C(168), UINT8_C( 23), UINT8_C(208), UINT8_C(124), UINT8_C( 98), UINT8_C(225), UINT8_C(138), UINT8_C( 62), UINT8_C(185), UINT8_C(215), UINT8_C( 33), UINT8_C(249), UINT8_C(143), UINT8_C(182), UINT8_C(227), UINT8_C(122), UINT8_C(158), UINT8_C(214), UINT8_C(117), UINT8_C(235), UINT8_C(176), UINT8_C(240), UINT8_C(149), UINT8_C(155), UINT8_C( 87), UINT8_C( 13), UINT8_C(105), UINT8_C( 1), UINT8_C(147), UINT8_C( 90), UINT8_C(248), UINT8_C(207), UINT8_C(167), UINT8_C(163), UINT8_C( 74), UINT8_C(208), UINT8_C(147), UINT8_C( 73), UINT8_C( 14), UINT8_C(171), UINT8_C( 1), UINT8_C( 47), UINT8_C(129), UINT8_C( 6), UINT8_C(141), UINT8_C(213), UINT8_C(133), UINT8_C(156), UINT8_C(166) } }, { { INT8_C( 25), -INT8_C( 74), -INT8_C( 58), INT8_C( 117), INT8_C( 22), INT8_C( 34), -INT8_C( 47), -INT8_C( 126), -INT8_C( 107), INT8_C( 75), -INT8_C( 12), -INT8_C( 16), -INT8_C( 116), INT8_C( 14), INT8_C( 89), -INT8_C( 99), -INT8_C( 92), INT8_C( 26), -INT8_C( 112), -INT8_C( 59), INT8_C( 84), INT8_C( 59), -INT8_C( 80), INT8_C( 74), INT8_C( 45), INT8_C( 42), -INT8_C( 69), INT8_C( 2), -INT8_C( 50), -INT8_C( 54), INT8_C( 74), -INT8_C( 25), INT8_MIN, INT8_C( 16), INT8_C( 93), -INT8_C( 106), INT8_C( 50), INT8_C( 46), INT8_C( 25), -INT8_C( 56), INT8_C( 121), INT8_C( 13), -INT8_C( 72), INT8_C( 6), INT8_C( 27), INT8_C( 17), -INT8_C( 93), -INT8_C( 65), INT8_C( 43), INT8_C( 51), -INT8_C( 124), INT8_MAX, INT8_C( 111), INT8_C( 52), -INT8_C( 55), -INT8_C( 100), INT8_C( 94), -INT8_C( 123), -INT8_C( 97), INT8_C( 44), INT8_C( 79), -INT8_C( 23), INT8_C( 20), -INT8_C( 48) }, UINT64_C( 1798202472849109498), { UINT8_C(140), UINT8_C(172), UINT8_C( 30), UINT8_C(167), UINT8_C(189), UINT8_C(194), UINT8_C(103), UINT8_C(232), UINT8_C(245), UINT8_C(235), UINT8_C(103), UINT8_C(100), UINT8_C( 32), UINT8_C( 49), UINT8_C( 1), UINT8_C(126), UINT8_C(182), UINT8_C(160), UINT8_C(171), UINT8_C( 5), UINT8_C(137), UINT8_C(191), UINT8_C(213), UINT8_C(131), UINT8_C( 48), UINT8_C( 60), UINT8_C(176), UINT8_C(207), UINT8_C(187), UINT8_C(164), UINT8_C(231), UINT8_C( 72), UINT8_C( 81), UINT8_C( 6), UINT8_C(239), UINT8_C( 14), UINT8_C(200), UINT8_C( 86), UINT8_C(247), UINT8_C(189), UINT8_C( 66), UINT8_C( 94), UINT8_C( 34), UINT8_C( 98), UINT8_C(143), UINT8_C( 35), UINT8_C(224), UINT8_C( 69), UINT8_C(195), UINT8_C(139), UINT8_C( 75), UINT8_C( 76), UINT8_C( 74), UINT8_C( 32), UINT8_C(208), UINT8_C(122), UINT8_C( 92), UINT8_C(128), UINT8_C( 73), UINT8_C( 24), UINT8_C( 36), UINT8_C( 49), UINT8_C( 96), UINT8_C(117) }, { UINT8_C( 55), UINT8_C( 79), UINT8_C(132), UINT8_MAX, UINT8_C(166), UINT8_C(123), UINT8_C(188), UINT8_C(232), UINT8_C(217), UINT8_C(222), UINT8_C( 74), UINT8_C(105), UINT8_C( 1), UINT8_C( 42), UINT8_C(174), UINT8_C(196), UINT8_C(182), UINT8_C(249), UINT8_C( 17), UINT8_C( 0), UINT8_C( 26), UINT8_C(225), UINT8_C(123), UINT8_C(118), UINT8_C( 97), UINT8_C(196), UINT8_C(142), UINT8_C(133), UINT8_C(245), UINT8_C(238), UINT8_C(251), UINT8_C( 44), UINT8_C( 62), UINT8_C(127), UINT8_C( 43), UINT8_C(228), UINT8_C(250), UINT8_C(232), UINT8_C(204), UINT8_C(211), UINT8_C(198), UINT8_C( 22), UINT8_C( 60), UINT8_C(200), UINT8_C( 64), UINT8_C(235), UINT8_C(140), UINT8_C(246), UINT8_C(228), UINT8_C(157), UINT8_C(247), UINT8_C(254), UINT8_C(126), UINT8_C(114), UINT8_C(117), UINT8_C(223), UINT8_C( 54), UINT8_C( 3), UINT8_C(101), UINT8_C( 44), UINT8_C(242), UINT8_C( 96), UINT8_C( 88), UINT8_C( 48) }, { UINT8_C( 25), UINT8_C(172), UINT8_C(198), UINT8_MAX, UINT8_C(189), UINT8_C(194), UINT8_C(188), UINT8_C(232), UINT8_C(245), UINT8_C( 75), UINT8_C(244), UINT8_C(240), UINT8_C( 32), UINT8_C( 49), UINT8_C(174), UINT8_C(157), UINT8_C(164), UINT8_C(249), UINT8_C(171), UINT8_C(197), UINT8_C( 84), UINT8_C(225), UINT8_C(213), UINT8_C( 74), UINT8_C( 45), UINT8_C( 42), UINT8_C(176), UINT8_C(207), UINT8_C(206), UINT8_C(238), UINT8_C( 74), UINT8_C(231), UINT8_C( 81), UINT8_C(127), UINT8_C(239), UINT8_C(228), UINT8_C(250), UINT8_C( 46), UINT8_C( 25), UINT8_C(211), UINT8_C(198), UINT8_C( 94), UINT8_C( 60), UINT8_C(200), UINT8_C(143), UINT8_C(235), UINT8_C(224), UINT8_C(191), UINT8_C( 43), UINT8_C( 51), UINT8_C(247), UINT8_C(127), UINT8_C(126), UINT8_C(114), UINT8_C(208), UINT8_C(223), UINT8_C( 94), UINT8_C(133), UINT8_C(159), UINT8_C( 44), UINT8_C(242), UINT8_C(233), UINT8_C( 20), UINT8_C(208) } }, { { -INT8_C( 33), -INT8_C( 124), INT8_C( 20), -INT8_C( 39), INT8_C( 108), -INT8_C( 32), -INT8_C( 84), INT8_C( 50), -INT8_C( 10), -INT8_C( 23), -INT8_C( 6), INT8_C( 54), -INT8_C( 44), -INT8_C( 121), INT8_C( 45), -INT8_C( 72), INT8_C( 36), INT8_C( 36), -INT8_C( 73), -INT8_C( 93), -INT8_C( 106), INT8_C( 44), -INT8_C( 126), -INT8_C( 52), INT8_C( 47), -INT8_C( 25), -INT8_C( 8), INT8_C( 33), INT8_C( 71), INT8_C( 81), INT8_C( 81), INT8_C( 38), -INT8_C( 43), INT8_C( 101), -INT8_C( 1), INT8_C( 65), INT8_C( 69), -INT8_C( 84), INT8_C( 115), INT8_C( 59), -INT8_C( 107), INT8_C( 110), INT8_C( 114), INT8_C( 105), -INT8_C( 11), -INT8_C( 97), INT8_C( 33), INT8_C( 25), -INT8_C( 61), -INT8_C( 40), -INT8_C( 68), INT8_C( 89), INT8_C( 4), INT8_C( 63), INT8_C( 37), INT8_C( 52), INT8_C( 38), INT8_C( 30), INT8_C( 85), INT8_C( 110), INT8_C( 111), -INT8_C( 89), -INT8_C( 108), INT8_C( 68) }, UINT64_C(15388228456940934156), { UINT8_C(102), UINT8_MAX, UINT8_C( 62), UINT8_C( 91), UINT8_C(158), UINT8_C( 95), UINT8_C(117), UINT8_C( 97), UINT8_C( 56), UINT8_C( 49), UINT8_C(186), UINT8_C( 60), UINT8_C(112), UINT8_C(224), UINT8_C(112), UINT8_C(151), UINT8_C(254), UINT8_C(198), UINT8_C( 5), UINT8_C(109), UINT8_C(109), UINT8_C(153), UINT8_C(177), UINT8_C(121), UINT8_C( 45), UINT8_C( 54), UINT8_C(203), UINT8_C(109), UINT8_C( 46), UINT8_C( 89), UINT8_C( 66), UINT8_C(149), UINT8_C( 88), UINT8_C(128), UINT8_C(240), UINT8_C(247), UINT8_C(224), UINT8_C(101), UINT8_C( 88), UINT8_C( 24), UINT8_C(151), UINT8_C( 19), UINT8_C( 84), UINT8_C( 7), UINT8_C(243), UINT8_C(197), UINT8_C(158), UINT8_C(241), UINT8_C(139), UINT8_C(163), UINT8_C( 94), UINT8_C(248), UINT8_C( 61), UINT8_C( 15), UINT8_C(113), UINT8_C(106), UINT8_C( 69), UINT8_C( 61), UINT8_C(216), UINT8_C(115), UINT8_C(150), UINT8_C( 26), UINT8_C( 8), UINT8_C(238) }, { UINT8_C(155), UINT8_C(249), UINT8_C(229), UINT8_C(123), UINT8_C( 94), UINT8_C( 62), UINT8_C(147), UINT8_C(245), UINT8_C( 81), UINT8_C(231), UINT8_C(253), UINT8_C( 68), UINT8_C(172), UINT8_C(155), UINT8_C( 53), UINT8_C( 55), UINT8_C( 63), UINT8_C(147), UINT8_C( 47), UINT8_C(124), UINT8_C(162), UINT8_C(161), UINT8_C(230), UINT8_C(231), UINT8_C(222), UINT8_C(190), UINT8_C( 90), UINT8_C(116), UINT8_C(217), UINT8_C( 99), UINT8_C( 98), UINT8_C(116), UINT8_C( 92), UINT8_C( 72), UINT8_C(239), UINT8_C(186), UINT8_C(134), UINT8_C(130), UINT8_C(176), UINT8_C(215), UINT8_C(105), UINT8_C(173), UINT8_C( 27), UINT8_C( 22), UINT8_C( 72), UINT8_C( 80), UINT8_C( 77), UINT8_C(135), UINT8_C(227), UINT8_C(125), UINT8_C( 3), UINT8_C(133), UINT8_C( 30), UINT8_C(234), UINT8_C(108), UINT8_C(252), UINT8_C(168), UINT8_C(198), UINT8_C(112), UINT8_C(129), UINT8_C( 41), UINT8_C(210), UINT8_C(245), UINT8_C(133) }, { UINT8_C(223), UINT8_C(132), UINT8_C(229), UINT8_C(123), UINT8_C(108), UINT8_C(224), UINT8_C(172), UINT8_C( 50), UINT8_C(246), UINT8_C(233), UINT8_C(253), UINT8_C( 54), UINT8_C(172), UINT8_C(135), UINT8_C( 45), UINT8_C(151), UINT8_C(254), UINT8_C( 36), UINT8_C( 47), UINT8_C(163), UINT8_C(150), UINT8_C( 44), UINT8_C(130), UINT8_C(231), UINT8_C( 47), UINT8_C(190), UINT8_C(248), UINT8_C( 33), UINT8_C(217), UINT8_C( 81), UINT8_C( 98), UINT8_C( 38), UINT8_C(213), UINT8_C(101), UINT8_MAX, UINT8_C( 65), UINT8_C( 69), UINT8_C(172), UINT8_C(176), UINT8_C( 59), UINT8_C(149), UINT8_C(110), UINT8_C(114), UINT8_C( 22), UINT8_C(243), UINT8_C(197), UINT8_C(158), UINT8_C(241), UINT8_C(227), UINT8_C(216), UINT8_C( 94), UINT8_C(248), UINT8_C( 4), UINT8_C( 63), UINT8_C( 37), UINT8_C(252), UINT8_C(168), UINT8_C( 30), UINT8_C(216), UINT8_C(110), UINT8_C(150), UINT8_C(167), UINT8_C(245), UINT8_C(238) } }, { { INT8_C( 26), -INT8_C( 28), INT8_C( 64), -INT8_C( 96), INT8_C( 102), -INT8_C( 16), INT8_C( 119), -INT8_C( 48), -INT8_C( 99), -INT8_C( 110), -INT8_C( 26), -INT8_C( 27), -INT8_C( 30), INT8_C( 51), INT8_C( 109), -INT8_C( 59), -INT8_C( 80), INT8_C( 112), INT8_C( 74), -INT8_C( 50), INT8_C( 90), -INT8_C( 74), -INT8_C( 54), INT8_C( 3), INT8_C( 125), INT8_C( 58), -INT8_C( 124), -INT8_C( 90), INT8_C( 13), INT8_C( 122), INT8_C( 44), INT8_C( 39), INT8_C( 94), INT8_C( 108), -INT8_C( 56), -INT8_C( 59), INT8_C( 92), INT8_C( 63), -INT8_C( 107), -INT8_C( 7), -INT8_C( 46), INT8_C( 123), -INT8_C( 34), -INT8_C( 76), -INT8_C( 82), INT8_C( 75), INT8_C( 122), INT8_C( 95), -INT8_C( 68), -INT8_C( 60), INT8_C( 45), INT8_C( 22), INT8_C( 123), -INT8_C( 8), INT8_C( 25), -INT8_C( 8), INT8_C( 50), -INT8_C( 98), -INT8_C( 98), INT8_C( 63), INT8_C( 24), -INT8_C( 54), INT8_C( 103), INT8_C( 118) }, UINT64_C( 4651040213508501302), { UINT8_C( 75), UINT8_C(106), UINT8_C(245), UINT8_C(250), UINT8_C(181), UINT8_C(111), UINT8_C( 89), UINT8_C(113), UINT8_C( 51), UINT8_C(134), UINT8_C(136), UINT8_C(174), UINT8_C(126), UINT8_C(161), UINT8_C(166), UINT8_C(177), UINT8_C( 63), UINT8_C( 69), UINT8_C(240), UINT8_C( 87), UINT8_C( 15), UINT8_C( 87), UINT8_C(206), UINT8_C( 70), UINT8_C(134), UINT8_C( 9), UINT8_C(216), UINT8_C(245), UINT8_C(218), UINT8_C(100), UINT8_C( 53), UINT8_C( 37), UINT8_C(206), UINT8_C( 42), UINT8_C( 31), UINT8_C(131), UINT8_C(153), UINT8_C(120), UINT8_C(245), UINT8_C(205), UINT8_MAX, UINT8_C(125), UINT8_C(123), UINT8_C(125), UINT8_C( 30), UINT8_C( 34), UINT8_C( 46), UINT8_C( 94), UINT8_C(103), UINT8_C( 31), UINT8_C(181), UINT8_C(118), UINT8_C(118), UINT8_C(131), UINT8_C(188), UINT8_C(253), UINT8_C(141), UINT8_C(149), UINT8_C(242), UINT8_C(103), UINT8_C(249), UINT8_C( 39), UINT8_C(140), UINT8_C(199) }, { UINT8_C( 82), UINT8_C(172), UINT8_C( 74), UINT8_C(235), UINT8_C( 36), UINT8_C( 63), UINT8_C(184), UINT8_C( 35), UINT8_C(188), UINT8_C( 52), UINT8_C(161), UINT8_C(219), UINT8_C( 86), UINT8_C(207), UINT8_C( 57), UINT8_C(189), UINT8_C(238), UINT8_C(238), UINT8_C( 51), UINT8_C(101), UINT8_C(114), UINT8_C(240), UINT8_C( 98), UINT8_MAX, UINT8_C(133), UINT8_C( 84), UINT8_C(102), UINT8_C(126), UINT8_C(123), UINT8_C(242), UINT8_C( 69), UINT8_C(205), UINT8_C(158), UINT8_C(143), UINT8_C(185), UINT8_C(195), UINT8_C(207), UINT8_C(113), UINT8_C(230), UINT8_C(139), UINT8_C(165), UINT8_C(135), UINT8_C(102), UINT8_C(251), UINT8_C( 87), UINT8_C(159), UINT8_C(184), UINT8_C( 69), UINT8_C(142), UINT8_C(236), UINT8_C(170), UINT8_C( 0), UINT8_C(220), UINT8_C( 12), UINT8_MAX, UINT8_C( 97), UINT8_C( 96), UINT8_C(101), UINT8_C(223), UINT8_C(220), UINT8_C( 87), UINT8_C( 36), UINT8_C(169), UINT8_C(246) }, { UINT8_C( 26), UINT8_C(172), UINT8_C(245), UINT8_C(160), UINT8_C(181), UINT8_C(111), UINT8_C(119), UINT8_C(208), UINT8_C(188), UINT8_C(134), UINT8_C(161), UINT8_C(219), UINT8_C(226), UINT8_C(207), UINT8_C(109), UINT8_C(197), UINT8_C(238), UINT8_C(238), UINT8_C( 74), UINT8_C(101), UINT8_C(114), UINT8_C(240), UINT8_C(202), UINT8_C( 3), UINT8_C(125), UINT8_C( 84), UINT8_C(132), UINT8_C(166), UINT8_C(218), UINT8_C(122), UINT8_C( 44), UINT8_C(205), UINT8_C( 94), UINT8_C(143), UINT8_C(185), UINT8_C(195), UINT8_C( 92), UINT8_C(120), UINT8_C(245), UINT8_C(249), UINT8_C(210), UINT8_C(123), UINT8_C(222), UINT8_C(180), UINT8_C( 87), UINT8_C( 75), UINT8_C(184), UINT8_C( 94), UINT8_C(142), UINT8_C(236), UINT8_C( 45), UINT8_C(118), UINT8_C(123), UINT8_C(248), UINT8_C( 25), UINT8_C(253), UINT8_C( 50), UINT8_C(158), UINT8_C(158), UINT8_C( 63), UINT8_C( 24), UINT8_C(202), UINT8_C(169), UINT8_C(118) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi8(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_mask_max_epu8(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_u8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_max_epu8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask64 k; const uint8_t a[64]; const uint8_t b[64]; const uint8_t r[64]; } test_vec[] = { { UINT64_C( 768989401926260750), { UINT8_C(200), UINT8_C(153), UINT8_C(184), UINT8_C(140), UINT8_C(198), UINT8_C(202), UINT8_C(130), UINT8_C(163), UINT8_C( 22), UINT8_C(136), UINT8_C(176), UINT8_C(145), UINT8_C( 38), UINT8_C( 80), UINT8_C(249), UINT8_C( 26), UINT8_C(160), UINT8_C( 32), UINT8_C(211), UINT8_C( 88), UINT8_C(134), UINT8_C(202), UINT8_C(182), UINT8_C(148), UINT8_C(163), UINT8_C(144), UINT8_C(253), UINT8_C(108), UINT8_C(143), UINT8_C(168), UINT8_C(118), UINT8_C( 87), UINT8_C( 65), UINT8_C( 47), UINT8_C(228), UINT8_C( 8), UINT8_C(249), UINT8_C(102), UINT8_C(171), UINT8_C( 15), UINT8_C(238), UINT8_C( 91), UINT8_C(160), UINT8_C( 21), UINT8_C(171), UINT8_C(153), UINT8_C( 47), UINT8_C( 75), UINT8_C(185), UINT8_C( 2), UINT8_C(163), UINT8_C( 64), UINT8_C(205), UINT8_C( 89), UINT8_C(212), UINT8_C(112), UINT8_C(233), UINT8_C(210), UINT8_C(220), UINT8_C(121), UINT8_C(122), UINT8_C( 83), UINT8_C(208), UINT8_C(188) }, { UINT8_C(130), UINT8_C(180), UINT8_C(196), UINT8_C(123), UINT8_C( 26), UINT8_C(111), UINT8_C(138), UINT8_C( 9), UINT8_C(202), UINT8_C( 43), UINT8_C( 30), UINT8_C(117), UINT8_C(196), UINT8_C( 77), UINT8_C(192), UINT8_C(126), UINT8_C( 80), UINT8_C( 99), UINT8_C(190), UINT8_C( 29), UINT8_C(188), UINT8_C(146), UINT8_C(141), UINT8_C(165), UINT8_C(100), UINT8_C(105), UINT8_C( 30), UINT8_C(223), UINT8_C(188), UINT8_C(239), UINT8_C(155), UINT8_C( 62), UINT8_C(163), UINT8_C( 95), UINT8_C(185), UINT8_C(190), UINT8_C(206), UINT8_C( 68), UINT8_C(199), UINT8_C(152), UINT8_C(111), UINT8_C(229), UINT8_C( 13), UINT8_C( 51), UINT8_C( 50), UINT8_C(205), UINT8_C(177), UINT8_C(130), UINT8_C( 48), UINT8_C(111), UINT8_C(159), UINT8_C(236), UINT8_C( 2), UINT8_C( 44), UINT8_C(145), UINT8_C(102), UINT8_C(150), UINT8_C(176), UINT8_C( 69), UINT8_C( 82), UINT8_C(159), UINT8_C(224), UINT8_C(145), UINT8_C( 66) }, { UINT8_C( 0), UINT8_C(180), UINT8_C(196), UINT8_C(140), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(145), UINT8_C(196), UINT8_C( 0), UINT8_C(249), UINT8_C(126), UINT8_C( 0), UINT8_C( 99), UINT8_C( 0), UINT8_C( 88), UINT8_C(188), UINT8_C( 0), UINT8_C(182), UINT8_C(165), UINT8_C(163), UINT8_C( 0), UINT8_C( 0), UINT8_C(223), UINT8_C( 0), UINT8_C(239), UINT8_C(155), UINT8_C( 0), UINT8_C(163), UINT8_C( 0), UINT8_C( 0), UINT8_C(190), UINT8_C( 0), UINT8_C( 0), UINT8_C(199), UINT8_C(152), UINT8_C(238), UINT8_C(229), UINT8_C(160), UINT8_C( 51), UINT8_C(171), UINT8_C(205), UINT8_C(177), UINT8_C(130), UINT8_C(185), UINT8_C(111), UINT8_C( 0), UINT8_C(236), UINT8_C( 0), UINT8_C( 89), UINT8_C( 0), UINT8_C(112), UINT8_C( 0), UINT8_C(210), UINT8_C( 0), UINT8_C(121), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { UINT64_C(18277234075670432319), { UINT8_C(172), UINT8_C(178), UINT8_C( 49), UINT8_C(223), UINT8_C(127), UINT8_C(226), UINT8_C( 97), UINT8_C(175), UINT8_C( 82), UINT8_C( 1), UINT8_C(155), UINT8_C( 84), UINT8_C( 45), UINT8_C( 45), UINT8_C(186), UINT8_C(195), UINT8_C(221), UINT8_C( 0), UINT8_C( 22), UINT8_C(124), UINT8_C(224), UINT8_C(167), UINT8_C(190), UINT8_C( 32), UINT8_C(241), UINT8_C(191), UINT8_C( 45), UINT8_C(128), UINT8_C(134), UINT8_C(211), UINT8_C(125), UINT8_C( 51), UINT8_C(133), UINT8_C(174), UINT8_C( 18), UINT8_C( 5), UINT8_C(145), UINT8_C(115), UINT8_C(180), UINT8_C(227), UINT8_C(116), UINT8_C( 80), UINT8_C( 55), UINT8_C(162), UINT8_C(125), UINT8_C(241), UINT8_C(101), UINT8_C( 90), UINT8_C(241), UINT8_C(123), UINT8_C(214), UINT8_C(210), UINT8_C( 34), UINT8_C(148), UINT8_C(242), UINT8_C( 20), UINT8_C( 83), UINT8_C( 31), UINT8_C(148), UINT8_C(218), UINT8_C(242), UINT8_C( 17), UINT8_C( 13), UINT8_C(120) }, { UINT8_C(192), UINT8_C( 31), UINT8_C(125), UINT8_C( 81), UINT8_C(146), UINT8_C( 49), UINT8_C( 52), UINT8_C( 7), UINT8_C(129), UINT8_C(107), UINT8_C(169), UINT8_C(254), UINT8_C( 92), UINT8_C( 14), UINT8_C( 88), UINT8_C( 78), UINT8_C(138), UINT8_C( 46), UINT8_C( 32), UINT8_C(172), UINT8_C(195), UINT8_C( 18), UINT8_C(192), UINT8_C( 22), UINT8_C( 49), UINT8_C( 84), UINT8_C(240), UINT8_C( 36), UINT8_C(102), UINT8_C(253), UINT8_C(156), UINT8_C( 38), UINT8_C( 28), UINT8_C( 25), UINT8_C(119), UINT8_C(175), UINT8_C( 74), UINT8_C(171), UINT8_C(182), UINT8_C(204), UINT8_C( 22), UINT8_C( 95), UINT8_C(202), UINT8_C(114), UINT8_C(109), UINT8_C( 35), UINT8_C(192), UINT8_C(247), UINT8_C( 81), UINT8_C(224), UINT8_C(164), UINT8_C( 20), UINT8_C(242), UINT8_C(100), UINT8_C( 43), UINT8_C( 36), UINT8_C(185), UINT8_C( 27), UINT8_C( 72), UINT8_C( 31), UINT8_C( 25), UINT8_C(228), UINT8_C( 69), UINT8_C( 53) }, { UINT8_C(192), UINT8_C(178), UINT8_C(125), UINT8_C(223), UINT8_C(146), UINT8_C(226), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(107), UINT8_C( 0), UINT8_C(254), UINT8_C( 0), UINT8_C( 0), UINT8_C(186), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(241), UINT8_C( 0), UINT8_C(240), UINT8_C(128), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(174), UINT8_C(119), UINT8_C(175), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(227), UINT8_C(116), UINT8_C( 95), UINT8_C(202), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(192), UINT8_C(247), UINT8_C(241), UINT8_C( 0), UINT8_C(214), UINT8_C( 0), UINT8_C( 0), UINT8_C(148), UINT8_C( 0), UINT8_C( 36), UINT8_C(185), UINT8_C( 0), UINT8_C(148), UINT8_C(218), UINT8_C(242), UINT8_C(228), UINT8_C( 69), UINT8_C(120) } }, { UINT64_C( 9012717047676976381), { UINT8_C(249), UINT8_C(222), UINT8_C(239), UINT8_C(103), UINT8_C( 1), UINT8_C(176), UINT8_C( 94), UINT8_C( 82), UINT8_C(144), UINT8_C( 2), UINT8_C(103), UINT8_C(131), UINT8_C(103), UINT8_C(146), UINT8_C(167), UINT8_C( 32), UINT8_C(173), UINT8_C(239), UINT8_C( 63), UINT8_C(198), UINT8_C(211), UINT8_C(132), UINT8_C(252), UINT8_C(208), UINT8_C( 64), UINT8_C(224), UINT8_C( 23), UINT8_C(167), UINT8_C(123), UINT8_C( 43), UINT8_C( 36), UINT8_C(116), UINT8_C( 9), UINT8_C( 19), UINT8_C(219), UINT8_C( 10), UINT8_C(195), UINT8_C( 58), UINT8_C( 92), UINT8_C( 84), UINT8_C( 60), UINT8_C(195), UINT8_C(215), UINT8_C(163), UINT8_C( 85), UINT8_C(126), UINT8_C(195), UINT8_C( 3), UINT8_C(109), UINT8_C( 2), UINT8_C(201), UINT8_C( 64), UINT8_C(134), UINT8_C(197), UINT8_C( 16), UINT8_C(198), UINT8_C(166), UINT8_C( 39), UINT8_C(109), UINT8_C( 33), UINT8_C( 82), UINT8_C(145), UINT8_C(149), UINT8_C( 91) }, { UINT8_C(165), UINT8_C(113), UINT8_C(101), UINT8_C(104), UINT8_C(171), UINT8_C(194), UINT8_C(188), UINT8_C(231), UINT8_C(133), UINT8_C(147), UINT8_C(139), UINT8_C(219), UINT8_C( 17), UINT8_C( 78), UINT8_C(222), UINT8_C(126), UINT8_C( 81), UINT8_C(167), UINT8_C(190), UINT8_C(215), UINT8_C(109), UINT8_C(206), UINT8_C(158), UINT8_C( 19), UINT8_C(246), UINT8_C( 11), UINT8_C( 52), UINT8_C( 72), UINT8_C(157), UINT8_C(201), UINT8_C(164), UINT8_C( 66), UINT8_C( 58), UINT8_C( 9), UINT8_C(170), UINT8_C(229), UINT8_C(203), UINT8_C(103), UINT8_C(205), UINT8_C( 81), UINT8_C(250), UINT8_C( 88), UINT8_C( 44), UINT8_C( 12), UINT8_C(166), UINT8_C( 10), UINT8_C(138), UINT8_C(247), UINT8_C(177), UINT8_C( 73), UINT8_C(207), UINT8_C( 30), UINT8_C( 23), UINT8_C(109), UINT8_C( 49), UINT8_C( 13), UINT8_C(120), UINT8_C(101), UINT8_C( 86), UINT8_C( 21), UINT8_C( 47), UINT8_C(250), UINT8_C( 87), UINT8_C(105) }, { UINT8_C(249), UINT8_C( 0), UINT8_C(239), UINT8_C(104), UINT8_C(171), UINT8_C(194), UINT8_C(188), UINT8_C(231), UINT8_C( 0), UINT8_C( 0), UINT8_C(139), UINT8_C(219), UINT8_C(103), UINT8_C(146), UINT8_C( 0), UINT8_C(126), UINT8_C( 0), UINT8_C( 0), UINT8_C(190), UINT8_C( 0), UINT8_C( 0), UINT8_C(206), UINT8_C(252), UINT8_C(208), UINT8_C(246), UINT8_C(224), UINT8_C( 52), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(164), UINT8_C( 0), UINT8_C( 58), UINT8_C( 19), UINT8_C(219), UINT8_C( 0), UINT8_C( 0), UINT8_C(103), UINT8_C(205), UINT8_C( 0), UINT8_C( 0), UINT8_C(195), UINT8_C( 0), UINT8_C(163), UINT8_C(166), UINT8_C( 0), UINT8_C( 0), UINT8_C(247), UINT8_C(177), UINT8_C( 73), UINT8_C( 0), UINT8_C( 0), UINT8_C(134), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(166), UINT8_C( 0), UINT8_C(109), UINT8_C( 33), UINT8_C( 82), UINT8_C(250), UINT8_C(149), UINT8_C( 0) } }, { UINT64_C( 7142740249784812035), { UINT8_C(116), UINT8_C( 76), UINT8_C(111), UINT8_C( 26), UINT8_C( 86), UINT8_C(250), UINT8_C( 18), UINT8_C( 7), UINT8_C( 67), UINT8_C(225), UINT8_C( 38), UINT8_C( 90), UINT8_C( 78), UINT8_C( 87), UINT8_C(104), UINT8_C(198), UINT8_C(189), UINT8_C(190), UINT8_C(220), UINT8_C(236), UINT8_C(184), UINT8_C( 51), UINT8_C( 85), UINT8_C(187), UINT8_C( 53), UINT8_C(164), UINT8_C(138), UINT8_C(158), UINT8_C(192), UINT8_C(170), UINT8_C( 2), UINT8_C( 52), UINT8_C(246), UINT8_C(113), UINT8_C( 79), UINT8_C( 76), UINT8_C(107), UINT8_C( 97), UINT8_C( 84), UINT8_C(174), UINT8_C( 66), UINT8_C(122), UINT8_C( 9), UINT8_C(144), UINT8_C(209), UINT8_C(113), UINT8_C( 86), UINT8_C(142), UINT8_C( 47), UINT8_C( 50), UINT8_C(122), UINT8_C(231), UINT8_C(102), UINT8_C(208), UINT8_C(162), UINT8_C(155), UINT8_C(116), UINT8_C( 45), UINT8_C( 58), UINT8_C( 53), UINT8_C(215), UINT8_C( 60), UINT8_C(105), UINT8_C(206) }, { UINT8_C(173), UINT8_C(184), UINT8_C( 26), UINT8_C( 25), UINT8_C( 25), UINT8_C(110), UINT8_C(199), UINT8_C( 91), UINT8_C(232), UINT8_C(208), UINT8_C(235), UINT8_C(186), UINT8_C( 65), UINT8_C( 66), UINT8_C( 72), UINT8_C(112), UINT8_C(116), UINT8_C(195), UINT8_C( 87), UINT8_C(218), UINT8_C(147), UINT8_C(250), UINT8_C(118), UINT8_C( 7), UINT8_C( 39), UINT8_C(176), UINT8_C( 60), UINT8_C(254), UINT8_C(236), UINT8_C(166), UINT8_C(204), UINT8_C(153), UINT8_C( 94), UINT8_C(231), UINT8_C(178), UINT8_C(120), UINT8_C( 85), UINT8_C(122), UINT8_C(211), UINT8_C( 62), UINT8_C( 74), UINT8_C(191), UINT8_C(248), UINT8_C(140), UINT8_C( 1), UINT8_C( 64), UINT8_C(252), UINT8_C(117), UINT8_C( 3), UINT8_C( 84), UINT8_C( 80), UINT8_C(150), UINT8_C( 78), UINT8_C(198), UINT8_C(158), UINT8_C(117), UINT8_C(118), UINT8_C(218), UINT8_C(115), UINT8_C( 98), UINT8_C(128), UINT8_C( 64), UINT8_C(251), UINT8_C(223) }, { UINT8_C(173), UINT8_C(184), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(225), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(189), UINT8_C(195), UINT8_C(220), UINT8_C(236), UINT8_C( 0), UINT8_C( 0), UINT8_C(118), UINT8_C( 0), UINT8_C( 53), UINT8_C(176), UINT8_C(138), UINT8_C(254), UINT8_C( 0), UINT8_C( 0), UINT8_C(204), UINT8_C(153), UINT8_C(246), UINT8_C( 0), UINT8_C( 0), UINT8_C(120), UINT8_C( 0), UINT8_C(122), UINT8_C(211), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(248), UINT8_C(144), UINT8_C(209), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(208), UINT8_C( 0), UINT8_C( 0), UINT8_C(118), UINT8_C(218), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 64), UINT8_C(251), UINT8_C( 0) } }, { UINT64_C( 8266966419365146151), { UINT8_C(233), UINT8_C(178), UINT8_C(254), UINT8_C(234), UINT8_C(243), UINT8_C(251), UINT8_C( 96), UINT8_C(246), UINT8_C( 79), UINT8_C(176), UINT8_C(141), UINT8_C(157), UINT8_C(118), UINT8_C( 43), UINT8_C( 18), UINT8_C(236), UINT8_C( 5), UINT8_C(133), UINT8_C( 78), UINT8_C(134), UINT8_C(197), UINT8_C( 73), UINT8_C(101), UINT8_C(236), UINT8_C(247), UINT8_C(188), UINT8_C(105), UINT8_C( 31), UINT8_C(230), UINT8_C( 35), UINT8_C(146), UINT8_C(208), UINT8_C(214), UINT8_C(144), UINT8_C(186), UINT8_C(201), UINT8_C(139), UINT8_C( 26), UINT8_C(191), UINT8_C(218), UINT8_C(202), UINT8_C( 76), UINT8_C(119), UINT8_C( 64), UINT8_C(119), UINT8_C(137), UINT8_C( 44), UINT8_C(125), UINT8_C( 15), UINT8_C(122), UINT8_C( 3), UINT8_C(212), UINT8_C(196), UINT8_C(104), UINT8_C(193), UINT8_C(187), UINT8_C( 36), UINT8_C( 42), UINT8_C(219), UINT8_C( 10), UINT8_C( 77), UINT8_C(109), UINT8_C(218), UINT8_C( 35) }, { UINT8_C(253), UINT8_C(149), UINT8_C(236), UINT8_C(137), UINT8_C(175), UINT8_C(172), UINT8_C( 99), UINT8_C(122), UINT8_C(248), UINT8_C(219), UINT8_C(186), UINT8_C(112), UINT8_C(100), UINT8_C(231), UINT8_C(237), UINT8_C(115), UINT8_C( 97), UINT8_C(240), UINT8_C( 72), UINT8_C( 37), UINT8_C( 88), UINT8_C( 9), UINT8_C(225), UINT8_C(124), UINT8_C( 51), UINT8_C(188), UINT8_C(134), UINT8_C(128), UINT8_C( 41), UINT8_C( 97), UINT8_C(164), UINT8_C( 38), UINT8_C(246), UINT8_C(144), UINT8_C(175), UINT8_C(165), UINT8_C( 60), UINT8_C( 19), UINT8_C( 31), UINT8_C( 53), UINT8_C(238), UINT8_C(218), UINT8_C(165), UINT8_C( 82), UINT8_C(193), UINT8_C(146), UINT8_C(198), UINT8_C( 34), UINT8_C(130), UINT8_C( 14), UINT8_C( 72), UINT8_C(218), UINT8_C( 23), UINT8_C( 41), UINT8_C( 86), UINT8_C( 74), UINT8_C(229), UINT8_C(220), UINT8_C(202), UINT8_C( 14), UINT8_C( 61), UINT8_C(110), UINT8_C( 52), UINT8_C( 51) }, { UINT8_C(253), UINT8_C(178), UINT8_C(254), UINT8_C( 0), UINT8_C( 0), UINT8_C(251), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(219), UINT8_C(186), UINT8_C(157), UINT8_C( 0), UINT8_C(231), UINT8_C( 0), UINT8_C(236), UINT8_C( 97), UINT8_C(240), UINT8_C( 78), UINT8_C( 0), UINT8_C(197), UINT8_C( 0), UINT8_C(225), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(134), UINT8_C(128), UINT8_C(230), UINT8_C( 97), UINT8_C(164), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(201), UINT8_C( 0), UINT8_C( 26), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(218), UINT8_C( 0), UINT8_C( 82), UINT8_C( 0), UINT8_C(146), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(122), UINT8_C( 0), UINT8_C(218), UINT8_C(196), UINT8_C(104), UINT8_C( 0), UINT8_C(187), UINT8_C( 0), UINT8_C(220), UINT8_C( 0), UINT8_C( 0), UINT8_C( 77), UINT8_C(110), UINT8_C(218), UINT8_C( 0) } }, { UINT64_C(16532987972821837055), { UINT8_C(210), UINT8_C( 21), UINT8_C( 55), UINT8_C(147), UINT8_C(167), UINT8_C(253), UINT8_C(182), UINT8_C( 41), UINT8_C( 11), UINT8_C(254), UINT8_C( 3), UINT8_C( 34), UINT8_C( 39), UINT8_C( 89), UINT8_C(108), UINT8_C( 12), UINT8_C( 54), UINT8_C( 55), UINT8_C( 26), UINT8_C(115), UINT8_C(165), UINT8_C( 78), UINT8_C(167), UINT8_C(164), UINT8_C( 50), UINT8_C(128), UINT8_C(224), UINT8_C( 41), UINT8_C(120), UINT8_C( 80), UINT8_C( 14), UINT8_C( 75), UINT8_C(102), UINT8_C( 70), UINT8_C(222), UINT8_C( 13), UINT8_C( 67), UINT8_C(148), UINT8_C( 55), UINT8_C( 79), UINT8_C(146), UINT8_C( 58), UINT8_C(113), UINT8_C(185), UINT8_C(148), UINT8_C(222), UINT8_C(197), UINT8_C(202), UINT8_C( 21), UINT8_C(223), UINT8_C( 61), UINT8_C(186), UINT8_C( 46), UINT8_C(228), UINT8_C( 95), UINT8_C( 96), UINT8_C(100), UINT8_C( 63), UINT8_C(138), UINT8_C(221), UINT8_C(143), UINT8_C(152), UINT8_C( 40), UINT8_C(245) }, { UINT8_C(222), UINT8_C( 6), UINT8_C( 3), UINT8_C( 34), UINT8_C(155), UINT8_C( 58), UINT8_C(113), UINT8_C( 45), UINT8_C(116), UINT8_C(226), UINT8_C(231), UINT8_C( 8), UINT8_C(192), UINT8_C(172), UINT8_C(210), UINT8_C(213), UINT8_C(140), UINT8_C( 16), UINT8_C(144), UINT8_C(186), UINT8_C(244), UINT8_C(239), UINT8_C( 26), UINT8_C( 89), UINT8_C( 46), UINT8_C(164), UINT8_C( 54), UINT8_C(189), UINT8_C( 61), UINT8_C( 94), UINT8_C(179), UINT8_C( 27), UINT8_C(100), UINT8_C(182), UINT8_C( 61), UINT8_MAX, UINT8_C(240), UINT8_C(174), UINT8_C( 45), UINT8_C(100), UINT8_C(145), UINT8_C( 20), UINT8_C(109), UINT8_C( 81), UINT8_C(192), UINT8_C( 63), UINT8_C( 39), UINT8_C( 76), UINT8_C( 79), UINT8_C(183), UINT8_C( 6), UINT8_C( 68), UINT8_C(166), UINT8_C( 33), UINT8_C(157), UINT8_C(212), UINT8_C(197), UINT8_C(211), UINT8_C(145), UINT8_C( 2), UINT8_C( 49), UINT8_C( 68), UINT8_C( 30), UINT8_C(149) }, { UINT8_C(222), UINT8_C( 21), UINT8_C( 55), UINT8_C(147), UINT8_C(167), UINT8_C(253), UINT8_C(182), UINT8_C( 45), UINT8_C( 0), UINT8_C( 0), UINT8_C(231), UINT8_C( 0), UINT8_C( 0), UINT8_C(172), UINT8_C(210), UINT8_C(213), UINT8_C(140), UINT8_C( 0), UINT8_C( 0), UINT8_C(186), UINT8_C(244), UINT8_C( 0), UINT8_C(167), UINT8_C(164), UINT8_C( 50), UINT8_C(164), UINT8_C( 0), UINT8_C(189), UINT8_C(120), UINT8_C( 94), UINT8_C( 0), UINT8_C( 0), UINT8_C(102), UINT8_C(182), UINT8_C(222), UINT8_C( 0), UINT8_C(240), UINT8_C(174), UINT8_C( 55), UINT8_C(100), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(185), UINT8_C(192), UINT8_C(222), UINT8_C(197), UINT8_C(202), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(166), UINT8_C(228), UINT8_C(157), UINT8_C( 0), UINT8_C(197), UINT8_C( 0), UINT8_C(145), UINT8_C( 0), UINT8_C( 0), UINT8_C(152), UINT8_C( 40), UINT8_C(245) } }, { UINT64_C(11191376951180090362), { UINT8_C(214), UINT8_C(188), UINT8_C(236), UINT8_C(150), UINT8_C(251), UINT8_C( 19), UINT8_C(227), UINT8_C( 75), UINT8_C(202), UINT8_C(233), UINT8_C(143), UINT8_C(112), UINT8_C( 10), UINT8_C( 44), UINT8_C( 68), UINT8_C(208), UINT8_MAX, UINT8_C(214), UINT8_C(210), UINT8_C( 48), UINT8_C( 26), UINT8_C(240), UINT8_C(197), UINT8_C( 21), UINT8_C( 76), UINT8_C( 90), UINT8_MAX, UINT8_C( 86), UINT8_C( 28), UINT8_C( 78), UINT8_C(241), UINT8_C(242), UINT8_C( 10), UINT8_C(221), UINT8_C(137), UINT8_C( 6), UINT8_C(241), UINT8_C(108), UINT8_C( 81), UINT8_C(187), UINT8_C( 85), UINT8_C(224), UINT8_C( 44), UINT8_C( 96), UINT8_C( 12), UINT8_C(112), UINT8_C( 48), UINT8_C( 11), UINT8_C( 70), UINT8_C( 2), UINT8_C( 59), UINT8_C( 97), UINT8_C(243), UINT8_C( 0), UINT8_C(118), UINT8_C( 63), UINT8_C( 91), UINT8_C(117), UINT8_C(149), UINT8_C(119), UINT8_C(196), UINT8_C(134), UINT8_C(106), UINT8_C(206) }, { UINT8_C( 99), UINT8_C(243), UINT8_C(212), UINT8_C( 84), UINT8_C( 95), UINT8_C( 37), UINT8_C( 16), UINT8_C(180), UINT8_C( 5), UINT8_C( 60), UINT8_C( 20), UINT8_C( 17), UINT8_C(172), UINT8_C( 68), UINT8_C( 28), UINT8_C(243), UINT8_C( 71), UINT8_C( 87), UINT8_C( 84), UINT8_C( 58), UINT8_C( 88), UINT8_C(202), UINT8_C(121), UINT8_C(179), UINT8_C( 63), UINT8_C( 14), UINT8_C( 42), UINT8_C( 3), UINT8_C(148), UINT8_C(148), UINT8_C(210), UINT8_C(247), UINT8_C(135), UINT8_C(166), UINT8_C( 76), UINT8_C(230), UINT8_C(204), UINT8_C( 92), UINT8_C(155), UINT8_C(209), UINT8_C(152), UINT8_C(175), UINT8_C(227), UINT8_C( 68), UINT8_C(244), UINT8_MAX, UINT8_C( 55), UINT8_C( 59), UINT8_C( 87), UINT8_C(139), UINT8_C(117), UINT8_C(175), UINT8_C( 85), UINT8_C(238), UINT8_C( 98), UINT8_C(149), UINT8_C(252), UINT8_C(140), UINT8_C(152), UINT8_C(144), UINT8_C( 33), UINT8_C(106), UINT8_C(135), UINT8_C(168) }, { UINT8_C( 0), UINT8_C(243), UINT8_C( 0), UINT8_C(150), UINT8_C(251), UINT8_C( 37), UINT8_C(227), UINT8_C(180), UINT8_C(202), UINT8_C(233), UINT8_C( 0), UINT8_C(112), UINT8_C(172), UINT8_C( 0), UINT8_C( 68), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C(210), UINT8_C( 0), UINT8_C( 88), UINT8_C( 0), UINT8_C( 0), UINT8_C(179), UINT8_C( 0), UINT8_C( 90), UINT8_C( 0), UINT8_C( 86), UINT8_C( 0), UINT8_C(148), UINT8_C(241), UINT8_C(247), UINT8_C( 0), UINT8_C(221), UINT8_C( 0), UINT8_C(230), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(224), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 55), UINT8_C( 59), UINT8_C( 87), UINT8_C(139), UINT8_C(117), UINT8_C(175), UINT8_C( 0), UINT8_C( 0), UINT8_C(118), UINT8_C( 0), UINT8_C(252), UINT8_C(140), UINT8_C( 0), UINT8_C(144), UINT8_C(196), UINT8_C( 0), UINT8_C( 0), UINT8_C(206) } }, { UINT64_C(14388484244564333329), { UINT8_C(217), UINT8_C(145), UINT8_C( 12), UINT8_C(205), UINT8_C(145), UINT8_C( 67), UINT8_C( 8), UINT8_C(232), UINT8_C(207), UINT8_C(125), UINT8_C(151), UINT8_C( 36), UINT8_C(107), UINT8_C(249), UINT8_C(185), UINT8_C(103), UINT8_C(133), UINT8_C( 82), UINT8_C(247), UINT8_C(166), UINT8_C(188), UINT8_C(127), UINT8_C( 79), UINT8_C(205), UINT8_C( 82), UINT8_C(222), UINT8_C(170), UINT8_C(130), UINT8_C( 8), UINT8_C( 89), UINT8_C( 73), UINT8_C(225), UINT8_C(234), UINT8_C( 85), UINT8_C(175), UINT8_C(123), UINT8_C(153), UINT8_C(183), UINT8_C( 99), UINT8_C(104), UINT8_C( 53), UINT8_C(250), UINT8_C(140), UINT8_C(160), UINT8_C(243), UINT8_C( 70), UINT8_C( 8), UINT8_C(121), UINT8_C(152), UINT8_MAX, UINT8_C( 31), UINT8_C( 84), UINT8_C(126), UINT8_C(110), UINT8_C( 34), UINT8_C(209), UINT8_C( 76), UINT8_C(204), UINT8_C( 83), UINT8_C( 84), UINT8_C( 37), UINT8_C(156), UINT8_C( 54), UINT8_C( 16) }, { UINT8_C(242), UINT8_C(229), UINT8_C(139), UINT8_C(139), UINT8_C(156), UINT8_C(239), UINT8_C(243), UINT8_C(209), UINT8_C(233), UINT8_C(127), UINT8_C(114), UINT8_C(221), UINT8_C(197), UINT8_C(122), UINT8_C( 86), UINT8_C( 93), UINT8_C(121), UINT8_C(117), UINT8_C(178), UINT8_C(248), UINT8_C(228), UINT8_C(212), UINT8_C(201), UINT8_C( 48), UINT8_C(160), UINT8_C( 28), UINT8_C(133), UINT8_C(198), UINT8_C(184), UINT8_C(187), UINT8_C(214), UINT8_C(170), UINT8_C(160), UINT8_C( 97), UINT8_C( 53), UINT8_C( 60), UINT8_C( 80), UINT8_C( 40), UINT8_C( 14), UINT8_C( 58), UINT8_C(168), UINT8_C(128), UINT8_C( 23), UINT8_C(109), UINT8_C(250), UINT8_C(109), UINT8_C(203), UINT8_C(115), UINT8_C(226), UINT8_C(125), UINT8_C(107), UINT8_C(198), UINT8_C( 81), UINT8_C( 52), UINT8_C(247), UINT8_C(241), UINT8_C( 80), UINT8_C(124), UINT8_C(183), UINT8_C( 9), UINT8_C( 55), UINT8_C(141), UINT8_C(179), UINT8_C(215) }, { UINT8_C(242), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(156), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(233), UINT8_C(127), UINT8_C( 0), UINT8_C( 0), UINT8_C(197), UINT8_C( 0), UINT8_C(185), UINT8_C(103), UINT8_C(133), UINT8_C(117), UINT8_C(247), UINT8_C(248), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(205), UINT8_C(160), UINT8_C( 0), UINT8_C(170), UINT8_C(198), UINT8_C(184), UINT8_C( 0), UINT8_C(214), UINT8_C(225), UINT8_C(234), UINT8_C( 97), UINT8_C(175), UINT8_C(123), UINT8_C( 0), UINT8_C(183), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(250), UINT8_C( 0), UINT8_C(160), UINT8_C( 0), UINT8_C(109), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C(107), UINT8_C(198), UINT8_C( 0), UINT8_C(110), UINT8_C( 0), UINT8_C(241), UINT8_C( 80), UINT8_C(204), UINT8_C(183), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(179), UINT8_C(215) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_maskz_max_epu8(test_vec[i].k, a, b); simde_test_x86_assert_equal_u8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm512_max_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[32]; const int16_t b[32]; const int16_t r[32]; } test_vec[] = { { { INT16_C( 14691), -INT16_C( 2237), INT16_C( 8698), INT16_C( 9348), INT16_C( 13857), -INT16_C( 10584), INT16_C( 25854), -INT16_C( 6420), INT16_C( 17420), INT16_C( 1517), -INT16_C( 5283), INT16_C( 26495), INT16_C( 7532), -INT16_C( 2781), INT16_C( 9520), -INT16_C( 27403), INT16_C( 14430), INT16_C( 22667), INT16_C( 3929), INT16_C( 31356), INT16_C( 9541), INT16_C( 17232), INT16_C( 15497), -INT16_C( 27350), INT16_C( 6016), -INT16_C( 8550), INT16_C( 6658), INT16_C( 28485), INT16_C( 26679), INT16_C( 26724), INT16_C( 22926), -INT16_C( 4868) }, { -INT16_C( 30831), -INT16_C( 5563), -INT16_C( 15978), -INT16_C( 9371), -INT16_C( 18970), INT16_C( 28447), INT16_C( 18930), INT16_C( 29188), -INT16_C( 24736), INT16_C( 25168), -INT16_C( 26951), -INT16_C( 3887), INT16_C( 14078), -INT16_C( 29608), INT16_C( 21647), INT16_C( 8569), -INT16_C( 16677), INT16_C( 28939), INT16_C( 28799), INT16_C( 26189), INT16_C( 27686), INT16_C( 6357), -INT16_C( 9547), INT16_C( 5514), -INT16_C( 9351), INT16_C( 12919), INT16_C( 18801), INT16_C( 28450), INT16_C( 31615), INT16_C( 3836), INT16_C( 30159), -INT16_C( 21713) }, { INT16_C( 14691), -INT16_C( 2237), INT16_C( 8698), INT16_C( 9348), INT16_C( 13857), INT16_C( 28447), INT16_C( 25854), INT16_C( 29188), INT16_C( 17420), INT16_C( 25168), -INT16_C( 5283), INT16_C( 26495), INT16_C( 14078), -INT16_C( 2781), INT16_C( 21647), INT16_C( 8569), INT16_C( 14430), INT16_C( 28939), INT16_C( 28799), INT16_C( 31356), INT16_C( 27686), INT16_C( 17232), INT16_C( 15497), INT16_C( 5514), INT16_C( 6016), INT16_C( 12919), INT16_C( 18801), INT16_C( 28485), INT16_C( 31615), INT16_C( 26724), INT16_C( 30159), -INT16_C( 4868) } }, { { INT16_C( 15155), -INT16_C( 19940), INT16_C( 27051), -INT16_C( 12008), -INT16_C( 4395), -INT16_C( 29975), INT16_C( 29896), INT16_C( 16799), INT16_C( 5967), -INT16_C( 16269), -INT16_C( 27296), -INT16_C( 8401), INT16_C( 11024), -INT16_C( 7955), INT16_C( 7584), -INT16_C( 11381), -INT16_C( 22696), INT16_C( 902), -INT16_C( 25071), -INT16_C( 6443), -INT16_C( 16756), INT16_C( 21617), INT16_C( 4146), -INT16_C( 32363), INT16_C( 2087), -INT16_C( 30911), INT16_C( 29086), -INT16_C( 20890), INT16_C( 21660), INT16_C( 15758), INT16_C( 6513), -INT16_C( 14064) }, { -INT16_C( 26943), -INT16_C( 11572), -INT16_C( 24267), -INT16_C( 15944), INT16_C( 10592), -INT16_C( 28138), -INT16_C( 21702), INT16_C( 24852), INT16_C( 21940), INT16_C( 21225), INT16_C( 20422), INT16_C( 25344), -INT16_C( 28765), INT16_C( 5280), -INT16_C( 20312), INT16_C( 27101), -INT16_C( 21945), INT16_C( 31803), -INT16_C( 2997), -INT16_C( 21699), INT16_C( 21277), INT16_C( 22334), INT16_C( 21247), -INT16_C( 19527), -INT16_C( 23897), INT16_C( 28165), INT16_C( 1521), -INT16_C( 27183), INT16_C( 29076), INT16_C( 15785), -INT16_C( 30943), INT16_C( 26790) }, { INT16_C( 15155), -INT16_C( 11572), INT16_C( 27051), -INT16_C( 12008), INT16_C( 10592), -INT16_C( 28138), INT16_C( 29896), INT16_C( 24852), INT16_C( 21940), INT16_C( 21225), INT16_C( 20422), INT16_C( 25344), INT16_C( 11024), INT16_C( 5280), INT16_C( 7584), INT16_C( 27101), -INT16_C( 21945), INT16_C( 31803), -INT16_C( 2997), -INT16_C( 6443), INT16_C( 21277), INT16_C( 22334), INT16_C( 21247), -INT16_C( 19527), INT16_C( 2087), INT16_C( 28165), INT16_C( 29086), -INT16_C( 20890), INT16_C( 29076), INT16_C( 15785), INT16_C( 6513), INT16_C( 26790) } }, { { -INT16_C( 7631), INT16_C( 31972), INT16_C( 8918), -INT16_C( 3288), INT16_C( 26229), INT16_C( 29771), INT16_C( 1208), INT16_C( 24359), INT16_C( 11430), -INT16_C( 26675), -INT16_C( 25038), -INT16_C( 14804), -INT16_C( 10737), INT16_C( 12547), -INT16_C( 21923), -INT16_C( 29031), INT16_C( 32396), INT16_C( 25098), INT16_C( 12960), INT16_C( 5461), -INT16_C( 24424), INT16_C( 20618), -INT16_C( 20060), INT16_C( 19120), INT16_C( 32222), INT16_C( 4322), INT16_C( 3612), INT16_C( 11222), -INT16_C( 9500), INT16_C( 16732), -INT16_C( 2428), INT16_C( 4303) }, { -INT16_C( 9612), INT16_C( 5234), -INT16_C( 14580), -INT16_C( 23255), -INT16_C( 19608), INT16_C( 3317), -INT16_C( 23195), INT16_C( 17239), INT16_C( 14627), INT16_C( 16211), INT16_C( 10567), INT16_C( 11370), -INT16_C( 14589), -INT16_C( 30867), INT16_C( 15805), INT16_C( 12695), INT16_C( 2327), INT16_C( 9029), INT16_C( 28369), INT16_C( 14792), -INT16_C( 16862), -INT16_C( 30907), -INT16_C( 25501), -INT16_C( 31030), INT16_C( 7637), INT16_C( 7621), INT16_C( 12358), INT16_C( 19017), -INT16_C( 18697), -INT16_C( 19247), INT16_C( 27123), INT16_C( 2789) }, { -INT16_C( 7631), INT16_C( 31972), INT16_C( 8918), -INT16_C( 3288), INT16_C( 26229), INT16_C( 29771), INT16_C( 1208), INT16_C( 24359), INT16_C( 14627), INT16_C( 16211), INT16_C( 10567), INT16_C( 11370), -INT16_C( 10737), INT16_C( 12547), INT16_C( 15805), INT16_C( 12695), INT16_C( 32396), INT16_C( 25098), INT16_C( 28369), INT16_C( 14792), -INT16_C( 16862), INT16_C( 20618), -INT16_C( 20060), INT16_C( 19120), INT16_C( 32222), INT16_C( 7621), INT16_C( 12358), INT16_C( 19017), -INT16_C( 9500), INT16_C( 16732), INT16_C( 27123), INT16_C( 4303) } }, { { INT16_C( 10866), INT16_C( 17198), -INT16_C( 2408), -INT16_C( 17796), -INT16_C( 15692), INT16_C( 6209), INT16_C( 2910), INT16_C( 13470), INT16_C( 25640), INT16_C( 28497), -INT16_C( 25964), -INT16_C( 29767), -INT16_C( 30128), INT16_C( 17471), INT16_C( 9459), INT16_C( 26190), INT16_C( 31822), -INT16_C( 6487), INT16_C( 9843), INT16_C( 10145), -INT16_C( 7448), INT16_C( 17983), -INT16_C( 8466), INT16_C( 5754), -INT16_C( 13502), -INT16_C( 10619), INT16_C( 15973), -INT16_C( 18847), -INT16_C( 24375), -INT16_C( 17158), INT16_C( 18628), INT16_C( 4642) }, { -INT16_C( 13115), INT16_C( 14584), -INT16_C( 26126), -INT16_C( 9633), -INT16_C( 24708), INT16_C( 27168), -INT16_C( 25731), -INT16_C( 16512), INT16_C( 1638), -INT16_C( 13163), -INT16_C( 2492), INT16_C( 3458), INT16_C( 31894), INT16_C( 23242), -INT16_C( 4924), -INT16_C( 30356), INT16_C( 25784), -INT16_C( 21823), INT16_C( 8702), INT16_C( 31364), -INT16_C( 23104), INT16_C( 15844), INT16_C( 25664), -INT16_C( 22788), -INT16_C( 28310), -INT16_C( 20622), -INT16_C( 2937), INT16_C( 7612), -INT16_C( 31120), INT16_C( 13687), -INT16_C( 7309), INT16_C( 11198) }, { INT16_C( 10866), INT16_C( 17198), -INT16_C( 2408), -INT16_C( 9633), -INT16_C( 15692), INT16_C( 27168), INT16_C( 2910), INT16_C( 13470), INT16_C( 25640), INT16_C( 28497), -INT16_C( 2492), INT16_C( 3458), INT16_C( 31894), INT16_C( 23242), INT16_C( 9459), INT16_C( 26190), INT16_C( 31822), -INT16_C( 6487), INT16_C( 9843), INT16_C( 31364), -INT16_C( 7448), INT16_C( 17983), INT16_C( 25664), INT16_C( 5754), -INT16_C( 13502), -INT16_C( 10619), INT16_C( 15973), INT16_C( 7612), -INT16_C( 24375), INT16_C( 13687), INT16_C( 18628), INT16_C( 11198) } }, { { -INT16_C( 32697), INT16_C( 17878), INT16_C( 23201), INT16_C( 25023), -INT16_C( 23553), INT16_C( 16286), -INT16_C( 26104), INT16_C( 29414), INT16_C( 22571), -INT16_C( 19935), -INT16_C( 8627), -INT16_C( 16945), INT16_C( 18020), -INT16_C( 10254), -INT16_C( 20183), INT16_C( 28675), -INT16_C( 9935), -INT16_C( 11594), INT16_C( 30003), INT16_C( 13107), -INT16_C( 12007), INT16_C( 8562), INT16_C( 22635), -INT16_C( 26989), -INT16_C( 19023), -INT16_C( 440), INT16_C( 6035), -INT16_C( 2117), -INT16_C( 20899), -INT16_C( 31025), -INT16_C( 11681), -INT16_C( 28426) }, { -INT16_C( 21333), -INT16_C( 8606), -INT16_C( 27358), INT16_C( 15121), -INT16_C( 31642), -INT16_C( 11940), -INT16_C( 4132), -INT16_C( 29337), -INT16_C( 20572), INT16_C( 14219), INT16_C( 18374), INT16_C( 9007), -INT16_C( 267), INT16_C( 21673), -INT16_C( 24624), INT16_C( 31716), INT16_C( 17996), INT16_C( 28249), INT16_C( 27611), INT16_C( 16809), INT16_C( 1519), -INT16_C( 13550), INT16_C( 31220), -INT16_C( 26279), -INT16_C( 7128), -INT16_C( 4400), -INT16_C( 213), INT16_C( 8209), -INT16_C( 17667), -INT16_C( 12940), INT16_C( 22617), -INT16_C( 23224) }, { -INT16_C( 21333), INT16_C( 17878), INT16_C( 23201), INT16_C( 25023), -INT16_C( 23553), INT16_C( 16286), -INT16_C( 4132), INT16_C( 29414), INT16_C( 22571), INT16_C( 14219), INT16_C( 18374), INT16_C( 9007), INT16_C( 18020), INT16_C( 21673), -INT16_C( 20183), INT16_C( 31716), INT16_C( 17996), INT16_C( 28249), INT16_C( 30003), INT16_C( 16809), INT16_C( 1519), INT16_C( 8562), INT16_C( 31220), -INT16_C( 26279), -INT16_C( 7128), -INT16_C( 440), INT16_C( 6035), INT16_C( 8209), -INT16_C( 17667), -INT16_C( 12940), INT16_C( 22617), -INT16_C( 23224) } }, { { -INT16_C( 23906), INT16_C( 30995), -INT16_C( 17395), -INT16_C( 838), -INT16_C( 13119), -INT16_C( 18745), INT16_C( 8261), INT16_C( 27983), INT16_C( 7941), INT16_C( 12379), INT16_C( 27679), INT16_C( 7249), -INT16_C( 15066), -INT16_C( 32534), INT16_C( 12830), -INT16_C( 17371), INT16_C( 14804), -INT16_C( 7882), -INT16_C( 3851), -INT16_C( 18467), -INT16_C( 23107), INT16_C( 621), -INT16_C( 17211), -INT16_C( 13712), -INT16_C( 13349), -INT16_C( 1285), INT16_C( 19512), INT16_C( 24087), INT16_C( 273), INT16_C( 12254), INT16_C( 1075), INT16_C( 2284) }, { INT16_C( 8765), INT16_C( 13033), -INT16_C( 14574), -INT16_C( 12311), INT16_C( 22124), INT16_C( 12754), INT16_C( 16914), -INT16_C( 4356), -INT16_C( 2291), INT16_C( 17896), -INT16_C( 189), INT16_C( 21668), -INT16_C( 32256), INT16_C( 13444), INT16_C( 28806), -INT16_C( 15556), INT16_C( 9618), -INT16_C( 23306), -INT16_C( 8212), INT16_C( 22644), INT16_C( 17974), INT16_C( 18570), -INT16_C( 31096), -INT16_C( 27338), INT16_C( 8061), -INT16_C( 16165), INT16_C( 32542), INT16_C( 7956), -INT16_C( 26623), -INT16_C( 30637), -INT16_C( 28920), -INT16_C( 26037) }, { INT16_C( 8765), INT16_C( 30995), -INT16_C( 14574), -INT16_C( 838), INT16_C( 22124), INT16_C( 12754), INT16_C( 16914), INT16_C( 27983), INT16_C( 7941), INT16_C( 17896), INT16_C( 27679), INT16_C( 21668), -INT16_C( 15066), INT16_C( 13444), INT16_C( 28806), -INT16_C( 15556), INT16_C( 14804), -INT16_C( 7882), -INT16_C( 3851), INT16_C( 22644), INT16_C( 17974), INT16_C( 18570), -INT16_C( 17211), -INT16_C( 13712), INT16_C( 8061), -INT16_C( 1285), INT16_C( 32542), INT16_C( 24087), INT16_C( 273), INT16_C( 12254), INT16_C( 1075), INT16_C( 2284) } }, { { INT16_C( 16820), -INT16_C( 24257), -INT16_C( 19679), INT16_C( 22521), -INT16_C( 31751), -INT16_C( 32353), -INT16_C( 10743), -INT16_C( 31210), -INT16_C( 3595), INT16_C( 4934), INT16_C( 23408), INT16_C( 29234), -INT16_C( 31245), -INT16_C( 774), INT16_C( 17684), -INT16_C( 13930), -INT16_C( 10873), -INT16_C( 22422), INT16_C( 25480), -INT16_C( 32257), -INT16_C( 24857), -INT16_C( 4094), INT16_C( 6516), INT16_C( 26999), -INT16_C( 17142), INT16_C( 31613), -INT16_C( 20712), INT16_C( 3309), -INT16_C( 6347), INT16_C( 18696), -INT16_C( 25044), -INT16_C( 19694) }, { INT16_C( 31860), -INT16_C( 933), INT16_C( 23264), -INT16_C( 14466), -INT16_C( 32519), INT16_C( 28087), INT16_C( 11929), -INT16_C( 23337), INT16_C( 21740), INT16_C( 1055), INT16_C( 3075), INT16_C( 14352), INT16_C( 6387), INT16_C( 8066), -INT16_C( 27465), INT16_C( 11219), INT16_C( 11793), -INT16_C( 3801), -INT16_C( 23159), -INT16_C( 32072), INT16_C( 28454), -INT16_C( 16401), -INT16_C( 14690), -INT16_C( 30109), -INT16_C( 32230), INT16_C( 7822), -INT16_C( 24690), -INT16_C( 32426), -INT16_C( 10057), INT16_C( 28321), INT16_C( 29805), INT16_C( 32409) }, { INT16_C( 31860), -INT16_C( 933), INT16_C( 23264), INT16_C( 22521), -INT16_C( 31751), INT16_C( 28087), INT16_C( 11929), -INT16_C( 23337), INT16_C( 21740), INT16_C( 4934), INT16_C( 23408), INT16_C( 29234), INT16_C( 6387), INT16_C( 8066), INT16_C( 17684), INT16_C( 11219), INT16_C( 11793), -INT16_C( 3801), INT16_C( 25480), -INT16_C( 32072), INT16_C( 28454), -INT16_C( 4094), INT16_C( 6516), INT16_C( 26999), -INT16_C( 17142), INT16_C( 31613), -INT16_C( 20712), INT16_C( 3309), -INT16_C( 6347), INT16_C( 28321), INT16_C( 29805), INT16_C( 32409) } }, { { -INT16_C( 15966), INT16_C( 11119), INT16_C( 10086), -INT16_C( 29523), -INT16_C( 25194), INT16_C( 13388), -INT16_C( 20637), INT16_C( 32446), INT16_C( 19762), -INT16_C( 16228), -INT16_C( 3348), -INT16_C( 23742), -INT16_C( 7221), INT16_C( 14354), -INT16_C( 21673), -INT16_C( 1610), INT16_C( 9580), -INT16_C( 11483), -INT16_C( 11700), -INT16_C( 7585), -INT16_C( 21649), -INT16_C( 11497), -INT16_C( 10917), -INT16_C( 29359), -INT16_C( 4830), INT16_C( 3661), -INT16_C( 28705), -INT16_C( 21838), -INT16_C( 15246), -INT16_C( 13854), -INT16_C( 26513), -INT16_C( 9021) }, { -INT16_C( 5955), INT16_C( 2479), INT16_C( 3770), INT16_C( 10988), INT16_C( 954), INT16_C( 5629), INT16_C( 20184), -INT16_C( 1118), -INT16_C( 4293), INT16_C( 6665), -INT16_C( 17537), -INT16_C( 3643), -INT16_C( 22657), -INT16_C( 4165), INT16_C( 32320), -INT16_C( 565), INT16_C( 31334), INT16_C( 8199), -INT16_C( 3192), INT16_C( 16970), INT16_C( 18422), -INT16_C( 12713), -INT16_C( 1643), -INT16_C( 12087), -INT16_C( 11287), INT16_C( 26859), -INT16_C( 20338), INT16_C( 3673), INT16_C( 5207), -INT16_C( 26627), -INT16_C( 14190), -INT16_C( 1899) }, { -INT16_C( 5955), INT16_C( 11119), INT16_C( 10086), INT16_C( 10988), INT16_C( 954), INT16_C( 13388), INT16_C( 20184), INT16_C( 32446), INT16_C( 19762), INT16_C( 6665), -INT16_C( 3348), -INT16_C( 3643), -INT16_C( 7221), INT16_C( 14354), INT16_C( 32320), -INT16_C( 565), INT16_C( 31334), INT16_C( 8199), -INT16_C( 3192), INT16_C( 16970), INT16_C( 18422), -INT16_C( 11497), -INT16_C( 1643), -INT16_C( 12087), -INT16_C( 4830), INT16_C( 26859), -INT16_C( 20338), INT16_C( 3673), INT16_C( 5207), -INT16_C( 13854), -INT16_C( 14190), -INT16_C( 1899) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_max_epi16(a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_max_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t src[32]; const simde__mmask32 k; const int16_t a[32]; const int16_t b[32]; const int16_t r[32]; } test_vec[] = { { { -INT16_C( 31856), INT16_C( 27575), INT16_C( 18767), INT16_C( 28047), INT16_C( 25446), -INT16_C( 31139), INT16_C( 7112), INT16_C( 14683), INT16_C( 7442), -INT16_C( 5599), INT16_C( 11607), INT16_C( 12590), INT16_C( 4923), INT16_C( 25627), INT16_C( 2220), INT16_C( 15731), INT16_C( 11147), -INT16_C( 9560), INT16_C( 14196), -INT16_C( 9657), -INT16_C( 23142), INT16_C( 25441), -INT16_C( 17216), -INT16_C( 11620), -INT16_C( 16935), INT16_C( 12733), -INT16_C( 5142), INT16_C( 9826), INT16_C( 32254), -INT16_C( 21622), -INT16_C( 379), INT16_C( 4328) }, UINT32_C(2649395241), { -INT16_C( 10026), -INT16_C( 26684), INT16_C( 24980), INT16_C( 28265), INT16_C( 9758), INT16_C( 2463), INT16_C( 273), INT16_C( 4143), -INT16_C( 18050), INT16_C( 955), -INT16_C( 23625), -INT16_C( 8173), -INT16_C( 717), -INT16_C( 1411), -INT16_C( 2770), INT16_C( 1371), INT16_C( 8397), INT16_C( 25244), INT16_C( 1409), -INT16_C( 24624), INT16_C( 28460), INT16_C( 15784), -INT16_C( 10384), -INT16_C( 4531), INT16_C( 2193), INT16_C( 18673), INT16_C( 1195), -INT16_C( 8663), -INT16_C( 23039), INT16_C( 12248), INT16_C( 13467), INT16_C( 26932) }, { -INT16_C( 12204), -INT16_C( 10805), -INT16_C( 25642), INT16_C( 628), INT16_C( 7434), INT16_C( 31295), -INT16_C( 29196), -INT16_C( 31384), INT16_C( 22933), INT16_C( 16846), -INT16_C( 2211), INT16_C( 24095), -INT16_C( 1891), INT16_C( 14733), -INT16_C( 15828), -INT16_C( 32606), INT16_C( 28050), INT16_C( 26709), -INT16_C( 14072), INT16_C( 4714), -INT16_C( 21786), -INT16_C( 9332), -INT16_C( 3017), -INT16_C( 13216), INT16_C( 11853), -INT16_C( 22003), INT16_C( 11557), -INT16_C( 15608), -INT16_C( 27355), INT16_C( 20988), -INT16_C( 25001), -INT16_C( 5423) }, { -INT16_C( 10026), INT16_C( 27575), INT16_C( 18767), INT16_C( 28265), INT16_C( 25446), INT16_C( 31295), INT16_C( 7112), INT16_C( 14683), INT16_C( 7442), -INT16_C( 5599), INT16_C( 11607), INT16_C( 12590), -INT16_C( 717), INT16_C( 25627), INT16_C( 2220), INT16_C( 1371), INT16_C( 11147), INT16_C( 26709), INT16_C( 14196), INT16_C( 4714), -INT16_C( 23142), INT16_C( 15784), -INT16_C( 3017), -INT16_C( 4531), INT16_C( 11853), INT16_C( 12733), INT16_C( 11557), -INT16_C( 8663), -INT16_C( 23039), -INT16_C( 21622), -INT16_C( 379), INT16_C( 26932) } }, { { INT16_C( 9739), INT16_C( 4946), -INT16_C( 16913), -INT16_C( 10715), -INT16_C( 20121), -INT16_C( 24911), INT16_C( 4517), -INT16_C( 3478), INT16_C( 30784), INT16_C( 26012), -INT16_C( 23387), -INT16_C( 13784), INT16_C( 9273), -INT16_C( 28389), -INT16_C( 4926), -INT16_C( 12933), -INT16_C( 13038), INT16_C( 480), INT16_C( 1418), -INT16_C( 3625), -INT16_C( 30538), INT16_C( 23439), -INT16_C( 1382), -INT16_C( 9651), -INT16_C( 5774), INT16_C( 5951), INT16_C( 26765), -INT16_C( 14367), -INT16_C( 884), INT16_C( 20312), -INT16_C( 11288), -INT16_C( 1508) }, UINT32_C( 737934752), { -INT16_C( 21413), -INT16_C( 2796), INT16_C( 25254), INT16_C( 6351), INT16_C( 3915), -INT16_C( 9937), INT16_C( 4215), INT16_C( 928), -INT16_C( 2036), -INT16_C( 2990), INT16_C( 28619), INT16_C( 27630), -INT16_C( 5780), INT16_C( 28310), -INT16_C( 19524), INT16_C( 6183), INT16_C( 15455), INT16_C( 1293), -INT16_C( 8802), -INT16_C( 5859), INT16_C( 19692), INT16_C( 25538), INT16_C( 25180), INT16_C( 26726), -INT16_C( 18086), INT16_C( 9564), INT16_C( 18984), -INT16_C( 27503), INT16_C( 10035), -INT16_C( 4094), INT16_C( 10970), INT16_C( 14600) }, { INT16_C( 5478), INT16_C( 1086), INT16_C( 23538), -INT16_C( 8467), -INT16_C( 20313), INT16_C( 833), -INT16_C( 22510), INT16_C( 28011), -INT16_C( 14495), -INT16_C( 30318), INT16_C( 8977), INT16_C( 17693), INT16_C( 8011), INT16_C( 9525), INT16_C( 15689), -INT16_C( 20641), -INT16_C( 25262), INT16_C( 17843), -INT16_C( 24071), -INT16_C( 24541), INT16_C( 25937), INT16_C( 25508), INT16_C( 3853), INT16_C( 28368), INT16_C( 25559), -INT16_C( 5897), INT16_C( 5254), -INT16_C( 11987), INT16_C( 25139), INT16_C( 32247), INT16_C( 22175), -INT16_C( 3540) }, { INT16_C( 9739), INT16_C( 4946), -INT16_C( 16913), -INT16_C( 10715), -INT16_C( 20121), INT16_C( 833), INT16_C( 4517), INT16_C( 28011), -INT16_C( 2036), INT16_C( 26012), INT16_C( 28619), INT16_C( 27630), INT16_C( 8011), INT16_C( 28310), INT16_C( 15689), INT16_C( 6183), INT16_C( 15455), INT16_C( 17843), INT16_C( 1418), -INT16_C( 5859), INT16_C( 25937), INT16_C( 25538), INT16_C( 25180), INT16_C( 28368), INT16_C( 25559), INT16_C( 9564), INT16_C( 26765), -INT16_C( 11987), -INT16_C( 884), INT16_C( 32247), -INT16_C( 11288), -INT16_C( 1508) } }, { { -INT16_C( 7949), -INT16_C( 5065), INT16_C( 23169), -INT16_C( 11635), INT16_C( 12735), -INT16_C( 13259), INT16_C( 1600), INT16_C( 5946), INT16_C( 12649), -INT16_C( 4352), INT16_C( 11589), INT16_C( 31169), -INT16_C( 18288), INT16_C( 12278), INT16_C( 8718), INT16_C( 289), INT16_C( 22530), -INT16_C( 31762), INT16_C( 31667), INT16_C( 29269), -INT16_C( 29780), -INT16_C( 5057), INT16_C( 31121), -INT16_C( 1532), INT16_C( 1195), -INT16_C( 3863), -INT16_C( 21967), -INT16_C( 16023), INT16_C( 24418), INT16_C( 28913), INT16_C( 4738), -INT16_C( 31630) }, UINT32_C( 503865451), { -INT16_C( 12312), INT16_C( 31091), INT16_C( 30537), -INT16_C( 2957), INT16_C( 23931), -INT16_C( 21020), INT16_C( 19975), INT16_C( 27246), INT16_C( 24493), INT16_C( 12250), INT16_C( 19570), -INT16_C( 8780), -INT16_C( 17236), -INT16_C( 30725), -INT16_C( 29927), INT16_C( 526), -INT16_C( 32165), -INT16_C( 23429), -INT16_C( 4103), INT16_C( 30104), INT16_C( 31820), INT16_C( 21282), -INT16_C( 28470), INT16_C( 30909), -INT16_C( 26384), INT16_C( 25255), INT16_C( 23524), -INT16_C( 28353), INT16_C( 14871), INT16_C( 12568), INT16_C( 10181), INT16_C( 8243) }, { -INT16_C( 20823), -INT16_C( 23868), INT16_C( 23709), -INT16_C( 5865), INT16_C( 14809), -INT16_C( 23747), -INT16_C( 1334), -INT16_C( 17893), -INT16_C( 15470), INT16_C( 30492), INT16_C( 23326), INT16_C( 13832), INT16_C( 8341), INT16_C( 23143), -INT16_C( 26041), -INT16_C( 3973), INT16_C( 16200), -INT16_C( 6509), -INT16_C( 21860), INT16_C( 30159), INT16_C( 3300), -INT16_C( 20968), INT16_C( 13319), -INT16_C( 26264), -INT16_C( 31497), INT16_C( 5392), INT16_C( 6367), INT16_C( 29771), -INT16_C( 19911), -INT16_C( 32562), INT16_C( 18764), -INT16_C( 27279) }, { -INT16_C( 12312), INT16_C( 31091), INT16_C( 23169), -INT16_C( 2957), INT16_C( 12735), -INT16_C( 21020), INT16_C( 19975), INT16_C( 5946), INT16_C( 12649), -INT16_C( 4352), INT16_C( 11589), INT16_C( 31169), -INT16_C( 18288), INT16_C( 23143), -INT16_C( 26041), INT16_C( 289), INT16_C( 22530), -INT16_C( 31762), INT16_C( 31667), INT16_C( 30159), -INT16_C( 29780), -INT16_C( 5057), INT16_C( 31121), -INT16_C( 1532), INT16_C( 1195), INT16_C( 25255), INT16_C( 23524), INT16_C( 29771), INT16_C( 14871), INT16_C( 28913), INT16_C( 4738), -INT16_C( 31630) } }, { { INT16_C( 1161), INT16_C( 9595), INT16_C( 19118), -INT16_C( 28006), -INT16_C( 19881), INT16_C( 24128), -INT16_C( 22298), -INT16_C( 8713), INT16_C( 2092), INT16_C( 3059), INT16_C( 15904), INT16_C( 22911), INT16_C( 20209), INT16_C( 15834), INT16_C( 19351), INT16_C( 8402), INT16_C( 19791), -INT16_C( 699), -INT16_C( 8296), -INT16_C( 4208), -INT16_C( 12142), INT16_C( 30797), INT16_C( 17529), -INT16_C( 23210), INT16_C( 18764), INT16_C( 28081), INT16_C( 12423), INT16_C( 30918), -INT16_C( 24450), INT16_C( 5814), -INT16_C( 30485), INT16_C( 14902) }, UINT32_C(1849195734), { -INT16_C( 21864), INT16_C( 4454), -INT16_C( 17170), INT16_C( 15287), INT16_C( 26629), -INT16_C( 29528), INT16_C( 28312), INT16_C( 5893), -INT16_C( 17649), -INT16_C( 1491), INT16_C( 25411), INT16_C( 6453), INT16_C( 28127), INT16_C( 15239), -INT16_C( 7115), -INT16_C( 13016), -INT16_C( 29042), INT16_C( 32223), -INT16_C( 27062), INT16_C( 20408), INT16_C( 24830), -INT16_C( 26916), -INT16_C( 7730), -INT16_C( 8787), -INT16_C( 9572), -INT16_C( 8232), INT16_C( 3390), INT16_C( 7673), -INT16_C( 32646), -INT16_C( 20648), -INT16_C( 32411), -INT16_C( 3204) }, { INT16_C( 23311), INT16_C( 23152), INT16_C( 10481), -INT16_C( 4183), -INT16_C( 31352), INT16_C( 22406), INT16_C( 13158), INT16_C( 564), INT16_C( 3086), INT16_C( 19682), -INT16_C( 9447), -INT16_C( 27799), -INT16_C( 15781), -INT16_C( 16318), -INT16_C( 16573), INT16_C( 21172), INT16_C( 9242), INT16_C( 3244), INT16_C( 22093), -INT16_C( 10757), -INT16_C( 32293), INT16_C( 16940), INT16_C( 25013), -INT16_C( 15548), INT16_C( 9837), -INT16_C( 30961), INT16_C( 30721), INT16_C( 23834), INT16_C( 23866), INT16_C( 32029), -INT16_C( 12004), INT16_C( 14032) }, { INT16_C( 1161), INT16_C( 23152), INT16_C( 10481), -INT16_C( 28006), INT16_C( 26629), INT16_C( 24128), INT16_C( 28312), INT16_C( 5893), INT16_C( 2092), INT16_C( 3059), INT16_C( 25411), INT16_C( 6453), INT16_C( 28127), INT16_C( 15239), -INT16_C( 7115), INT16_C( 8402), INT16_C( 19791), -INT16_C( 699), -INT16_C( 8296), INT16_C( 20408), INT16_C( 24830), INT16_C( 16940), INT16_C( 17529), -INT16_C( 23210), INT16_C( 18764), -INT16_C( 8232), INT16_C( 30721), INT16_C( 23834), -INT16_C( 24450), INT16_C( 32029), -INT16_C( 12004), INT16_C( 14902) } }, { { INT16_C( 31990), INT16_C( 17218), INT16_C( 16082), -INT16_C( 20968), INT16_C( 17855), INT16_C( 29936), INT16_C( 13478), INT16_C( 4919), INT16_C( 18011), INT16_C( 23706), -INT16_C( 19009), -INT16_C( 1607), -INT16_C( 10478), INT16_C( 11895), INT16_C( 18344), -INT16_C( 24988), -INT16_C( 22589), -INT16_C( 26911), -INT16_C( 1307), -INT16_C( 23484), INT16_C( 13375), -INT16_C( 6887), INT16_C( 20584), -INT16_C( 15368), -INT16_C( 27753), INT16_C( 22048), -INT16_C( 9912), INT16_C( 23119), -INT16_C( 14672), INT16_C( 22920), -INT16_C( 5107), -INT16_C( 11785) }, UINT32_C(2020071827), { INT16_C( 14047), INT16_C( 18423), -INT16_C( 4218), INT16_C( 7435), INT16_C( 11138), -INT16_C( 13709), -INT16_C( 15612), -INT16_C( 19164), -INT16_C( 21367), -INT16_C( 26866), INT16_C( 1433), INT16_C( 11368), -INT16_C( 12322), -INT16_C( 20059), -INT16_C( 15750), INT16_C( 22979), -INT16_C( 17672), INT16_C( 32416), -INT16_C( 21590), INT16_C( 11420), INT16_C( 4054), -INT16_C( 9225), INT16_C( 7122), INT16_C( 23696), -INT16_C( 24888), INT16_C( 25075), INT16_C( 23459), -INT16_C( 32115), INT16_C( 12842), -INT16_C( 23501), -INT16_C( 2060), -INT16_C( 4867) }, { -INT16_C( 25167), INT16_C( 23403), INT16_C( 1865), INT16_C( 8072), INT16_C( 32534), -INT16_C( 5638), -INT16_C( 30054), INT16_C( 25157), INT16_C( 14376), -INT16_C( 13117), INT16_C( 20883), -INT16_C( 17074), -INT16_C( 32381), INT16_C( 30817), INT16_C( 24184), INT16_C( 10852), -INT16_C( 12293), INT16_C( 17541), INT16_C( 3542), -INT16_C( 4764), INT16_C( 24204), INT16_C( 10198), INT16_C( 7145), INT16_C( 4489), INT16_C( 19795), -INT16_C( 6435), INT16_C( 11166), INT16_C( 8611), INT16_C( 1197), INT16_C( 9625), -INT16_C( 414), INT16_C( 23887) }, { INT16_C( 14047), INT16_C( 23403), INT16_C( 16082), -INT16_C( 20968), INT16_C( 32534), INT16_C( 29936), INT16_C( 13478), INT16_C( 25157), INT16_C( 14376), INT16_C( 23706), -INT16_C( 19009), INT16_C( 11368), -INT16_C( 12322), INT16_C( 11895), INT16_C( 24184), INT16_C( 22979), -INT16_C( 12293), INT16_C( 32416), INT16_C( 3542), -INT16_C( 23484), INT16_C( 13375), INT16_C( 10198), INT16_C( 7145), -INT16_C( 15368), -INT16_C( 27753), INT16_C( 22048), -INT16_C( 9912), INT16_C( 8611), INT16_C( 12842), INT16_C( 9625), -INT16_C( 414), -INT16_C( 11785) } }, { { -INT16_C( 10803), -INT16_C( 23390), INT16_C( 1762), INT16_C( 28561), INT16_C( 26468), INT16_C( 19862), INT16_C( 8066), -INT16_C( 10913), INT16_C( 15468), INT16_C( 2747), INT16_C( 24168), INT16_C( 5420), -INT16_C( 15006), -INT16_C( 15302), -INT16_C( 30013), -INT16_C( 28383), -INT16_C( 15521), INT16_C( 16693), -INT16_C( 14647), INT16_C( 11952), INT16_C( 17965), -INT16_C( 20613), -INT16_C( 9626), -INT16_C( 11644), INT16_C( 16151), INT16_C( 32733), INT16_C( 2461), -INT16_C( 108), -INT16_C( 12594), -INT16_C( 27965), -INT16_C( 7080), -INT16_C( 18653) }, UINT32_C(1912166568), { INT16_C( 7152), INT16_C( 22266), INT16_C( 32501), INT16_C( 3112), INT16_C( 1469), INT16_C( 23179), INT16_C( 7950), -INT16_C( 8871), INT16_C( 7406), INT16_C( 18031), -INT16_C( 28160), -INT16_C( 22274), -INT16_C( 2070), INT16_C( 2074), -INT16_C( 18016), -INT16_C( 28589), INT16_C( 19924), -INT16_C( 13594), INT16_C( 4043), -INT16_C( 30506), INT16_C( 25108), INT16_C( 9186), INT16_C( 15233), INT16_C( 28416), INT16_C( 28503), INT16_C( 22454), -INT16_C( 19455), -INT16_C( 5376), INT16_C( 6827), INT16_C( 19443), INT16_C( 18131), -INT16_C( 22308) }, { -INT16_C( 15725), INT16_C( 24178), INT16_C( 18641), -INT16_C( 6426), -INT16_C( 14166), INT16_C( 11273), INT16_C( 2307), INT16_C( 23195), INT16_C( 20856), INT16_C( 31153), -INT16_C( 20219), -INT16_C( 20380), INT16_C( 22475), -INT16_C( 24580), -INT16_C( 10083), INT16_C( 12359), -INT16_C( 18022), INT16_C( 27790), INT16_C( 29697), -INT16_C( 21422), INT16_C( 23356), INT16_C( 16344), INT16_C( 29540), -INT16_C( 9063), INT16_C( 19141), -INT16_C( 13739), -INT16_C( 17924), -INT16_C( 14469), INT16_C( 30480), -INT16_C( 21146), -INT16_C( 21169), -INT16_C( 5667) }, { -INT16_C( 10803), -INT16_C( 23390), INT16_C( 1762), INT16_C( 3112), INT16_C( 26468), INT16_C( 23179), INT16_C( 8066), INT16_C( 23195), INT16_C( 15468), INT16_C( 2747), INT16_C( 24168), -INT16_C( 20380), INT16_C( 22475), -INT16_C( 15302), -INT16_C( 10083), -INT16_C( 28383), INT16_C( 19924), INT16_C( 16693), -INT16_C( 14647), -INT16_C( 21422), INT16_C( 25108), INT16_C( 16344), INT16_C( 29540), INT16_C( 28416), INT16_C( 28503), INT16_C( 32733), INT16_C( 2461), -INT16_C( 108), INT16_C( 30480), INT16_C( 19443), INT16_C( 18131), -INT16_C( 18653) } }, { { INT16_C( 27494), INT16_C( 26709), -INT16_C( 22561), INT16_C( 6932), -INT16_C( 5118), INT16_C( 26202), -INT16_C( 3233), INT16_C( 9282), -INT16_C( 26819), INT16_C( 14831), INT16_C( 27216), INT16_C( 24577), INT16_C( 26593), INT16_C( 12301), -INT16_C( 5611), INT16_C( 31513), INT16_C( 28501), INT16_C( 13539), -INT16_C( 2282), INT16_C( 6479), -INT16_C( 22045), INT16_C( 17279), -INT16_C( 15716), -INT16_C( 9625), INT16_C( 22105), -INT16_C( 21997), INT16_C( 5312), -INT16_C( 24310), INT16_C( 6268), -INT16_C( 28207), -INT16_C( 5374), INT16_C( 22540) }, UINT32_C(1888284762), { INT16_C( 2437), INT16_C( 8718), INT16_C( 30155), INT16_C( 9468), INT16_C( 4044), -INT16_C( 29490), -INT16_C( 9948), -INT16_C( 24530), -INT16_C( 15), -INT16_C( 3279), INT16_C( 15850), INT16_C( 17483), -INT16_C( 10195), INT16_C( 5557), INT16_C( 16052), INT16_C( 14816), -INT16_C( 4537), INT16_C( 4699), INT16_C( 22371), INT16_C( 12087), INT16_C( 1383), -INT16_C( 29764), -INT16_C( 5410), -INT16_C( 12501), INT16_C( 23785), -INT16_C( 11069), INT16_C( 3737), -INT16_C( 14568), -INT16_C( 12826), -INT16_C( 25892), -INT16_C( 17396), INT16_C( 21460) }, { INT16_C( 12202), INT16_C( 3430), -INT16_C( 25209), -INT16_C( 4547), -INT16_C( 1630), -INT16_C( 32391), -INT16_C( 23325), -INT16_C( 13232), INT16_C( 4864), -INT16_C( 26208), -INT16_C( 18142), INT16_C( 2144), INT16_C( 15494), -INT16_C( 27997), INT16_C( 30712), -INT16_C( 23834), INT16_C( 19622), INT16_C( 11696), -INT16_C( 4631), -INT16_C( 29925), -INT16_C( 27418), -INT16_C( 14068), INT16_C( 23864), INT16_C( 14485), INT16_C( 13936), -INT16_C( 27950), INT16_C( 13039), INT16_C( 30107), INT16_C( 15983), INT16_C( 26376), -INT16_C( 4427), INT16_C( 23306) }, { INT16_C( 27494), INT16_C( 8718), -INT16_C( 22561), INT16_C( 9468), INT16_C( 4044), INT16_C( 26202), -INT16_C( 9948), INT16_C( 9282), -INT16_C( 26819), INT16_C( 14831), INT16_C( 27216), INT16_C( 24577), INT16_C( 15494), INT16_C( 5557), INT16_C( 30712), INT16_C( 14816), INT16_C( 28501), INT16_C( 13539), INT16_C( 22371), INT16_C( 12087), -INT16_C( 22045), INT16_C( 17279), -INT16_C( 15716), INT16_C( 14485), INT16_C( 22105), -INT16_C( 21997), INT16_C( 5312), -INT16_C( 24310), INT16_C( 15983), INT16_C( 26376), -INT16_C( 4427), INT16_C( 22540) } }, { { -INT16_C( 17862), INT16_C( 9097), -INT16_C( 23385), -INT16_C( 29266), -INT16_C( 17607), INT16_C( 29014), -INT16_C( 5352), -INT16_C( 30550), INT16_C( 31777), INT16_C( 4123), -INT16_C( 18770), INT16_C( 7558), -INT16_C( 28940), -INT16_C( 22139), -INT16_C( 28804), -INT16_C( 18940), -INT16_C( 29367), -INT16_C( 3879), -INT16_C( 30926), INT16_C( 27517), -INT16_C( 11454), INT16_C( 23260), -INT16_C( 31042), -INT16_C( 7965), -INT16_C( 510), -INT16_C( 19984), INT16_C( 30388), -INT16_C( 22322), INT16_C( 21252), -INT16_C( 32687), INT16_C( 21986), INT16_C( 11062) }, UINT32_C( 354095075), { INT16_C( 23659), INT16_C( 10804), INT16_C( 6115), -INT16_C( 6902), -INT16_C( 1515), -INT16_C( 13930), INT16_C( 25969), INT16_C( 30065), -INT16_C( 15688), -INT16_C( 25610), INT16_C( 11287), -INT16_C( 1338), -INT16_C( 7620), -INT16_C( 11505), -INT16_C( 28806), -INT16_C( 6484), -INT16_C( 7956), -INT16_C( 12528), INT16_C( 6903), INT16_C( 3252), INT16_C( 19220), -INT16_C( 31275), INT16_C( 18096), INT16_C( 26875), -INT16_C( 3832), INT16_C( 8195), -INT16_C( 13795), INT16_C( 22810), INT16_C( 10924), INT16_C( 9772), -INT16_C( 9799), -INT16_C( 23284) }, { INT16_C( 7353), -INT16_C( 20108), INT16_C( 10550), INT16_C( 19389), -INT16_C( 27788), INT16_C( 9424), -INT16_C( 13351), -INT16_C( 7540), -INT16_C( 28484), -INT16_C( 9726), INT16_C( 7258), INT16_C( 1587), INT16_C( 24646), INT16_C( 44), INT16_C( 14649), -INT16_C( 3419), INT16_C( 6741), -INT16_C( 29533), INT16_C( 24899), -INT16_C( 18473), -INT16_C( 22540), -INT16_C( 12837), INT16_C( 26483), INT16_C( 12207), -INT16_C( 19977), INT16_C( 20745), INT16_C( 15822), INT16_C( 5207), -INT16_C( 31587), -INT16_C( 10732), -INT16_C( 17731), INT16_C( 4808) }, { INT16_C( 23659), INT16_C( 10804), -INT16_C( 23385), -INT16_C( 29266), -INT16_C( 17607), INT16_C( 9424), INT16_C( 25969), INT16_C( 30065), -INT16_C( 15688), -INT16_C( 9726), INT16_C( 11287), INT16_C( 1587), -INT16_C( 28940), -INT16_C( 22139), -INT16_C( 28804), -INT16_C( 18940), INT16_C( 6741), -INT16_C( 12528), -INT16_C( 30926), INT16_C( 3252), INT16_C( 19220), INT16_C( 23260), -INT16_C( 31042), -INT16_C( 7965), -INT16_C( 3832), -INT16_C( 19984), INT16_C( 15822), -INT16_C( 22322), INT16_C( 10924), -INT16_C( 32687), INT16_C( 21986), INT16_C( 11062) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi16(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_mask_max_epi16(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_max_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask32 k; const int16_t a[32]; const int16_t b[32]; const int16_t r[32]; } test_vec[] = { { UINT32_C(3465174033), { INT16_C( 193), -INT16_C( 29734), -INT16_C( 20936), INT16_C( 94), INT16_C( 26271), INT16_C( 26954), -INT16_C( 16407), INT16_C( 11234), -INT16_C( 22766), -INT16_C( 30451), INT16_C( 983), -INT16_C( 6005), INT16_C( 5471), INT16_C( 22967), -INT16_C( 24720), INT16_C( 12840), INT16_C( 672), -INT16_C( 10051), INT16_C( 7088), INT16_C( 20697), INT16_C( 9089), INT16_C( 27577), -INT16_C( 25374), -INT16_C( 2666), -INT16_C( 23741), INT16_C( 6782), INT16_C( 2470), INT16_C( 1283), -INT16_C( 17889), -INT16_C( 28834), -INT16_C( 30887), -INT16_C( 1599) }, { INT16_C( 32393), INT16_C( 15058), -INT16_C( 21607), INT16_C( 7050), INT16_C( 17358), -INT16_C( 20346), INT16_C( 7391), INT16_C( 9125), INT16_C( 9152), INT16_C( 26173), INT16_C( 16429), INT16_C( 19564), -INT16_C( 13574), INT16_C( 21723), -INT16_C( 25263), -INT16_C( 9395), INT16_C( 7963), -INT16_C( 19179), -INT16_C( 24630), -INT16_C( 26416), INT16_C( 22242), -INT16_C( 15799), -INT16_C( 4494), INT16_C( 13029), INT16_C( 8722), INT16_C( 16281), INT16_C( 1379), INT16_C( 23947), INT16_C( 26319), INT16_C( 8625), -INT16_C( 253), INT16_C( 8188) }, { INT16_C( 32393), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 26271), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 16429), INT16_C( 19564), INT16_C( 5471), INT16_C( 0), -INT16_C( 24720), INT16_C( 0), INT16_C( 0), -INT16_C( 10051), INT16_C( 0), INT16_C( 20697), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 13029), INT16_C( 0), INT16_C( 16281), INT16_C( 2470), INT16_C( 23947), INT16_C( 0), INT16_C( 0), -INT16_C( 253), INT16_C( 8188) } }, { UINT32_C(3922989342), { -INT16_C( 13574), INT16_C( 27732), INT16_C( 14777), -INT16_C( 13409), INT16_C( 14428), -INT16_C( 16630), -INT16_C( 27331), INT16_C( 3100), -INT16_C( 12549), -INT16_C( 211), INT16_C( 10701), -INT16_C( 5346), -INT16_C( 3526), -INT16_C( 5420), INT16_C( 22166), -INT16_C( 28547), -INT16_C( 12000), -INT16_C( 9732), -INT16_C( 25845), INT16_C( 26532), -INT16_C( 20781), INT16_C( 4134), INT16_C( 16963), INT16_C( 16157), INT16_C( 18960), -INT16_C( 8898), INT16_C( 23668), -INT16_C( 20791), -INT16_C( 25266), -INT16_C( 7015), INT16_C( 5875), INT16_C( 5236) }, { INT16_C( 28903), -INT16_C( 3347), -INT16_C( 28148), -INT16_C( 8359), INT16_C( 32576), -INT16_C( 31504), INT16_C( 3522), -INT16_C( 11581), INT16_C( 343), -INT16_C( 13392), INT16_C( 31069), -INT16_C( 21638), INT16_C( 4886), INT16_C( 2703), INT16_C( 809), INT16_C( 4126), INT16_C( 2931), INT16_C( 32515), INT16_C( 23709), -INT16_C( 8609), INT16_C( 20444), -INT16_C( 24990), INT16_C( 9564), -INT16_C( 19600), INT16_C( 8230), -INT16_C( 31873), -INT16_C( 1639), -INT16_C( 20434), -INT16_C( 17140), INT16_C( 13754), -INT16_C( 10048), INT16_C( 13125) }, { INT16_C( 0), INT16_C( 27732), INT16_C( 14777), -INT16_C( 8359), INT16_C( 32576), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 343), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 4886), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 23709), INT16_C( 0), INT16_C( 20444), INT16_C( 0), INT16_C( 16963), INT16_C( 16157), INT16_C( 18960), INT16_C( 0), INT16_C( 0), -INT16_C( 20434), INT16_C( 0), INT16_C( 13754), INT16_C( 5875), INT16_C( 13125) } }, { UINT32_C(2176010467), { -INT16_C( 16031), -INT16_C( 17121), -INT16_C( 28698), INT16_C( 3184), -INT16_C( 4176), INT16_C( 18831), -INT16_C( 16920), -INT16_C( 2823), -INT16_C( 19590), INT16_C( 14889), INT16_C( 28555), INT16_C( 28525), INT16_C( 8375), INT16_C( 23792), INT16_C( 20274), -INT16_C( 27683), -INT16_C( 1008), -INT16_C( 2480), -INT16_C( 15988), INT16_C( 15362), -INT16_C( 28240), -INT16_C( 26235), INT16_C( 32590), -INT16_C( 14195), -INT16_C( 18638), -INT16_C( 16894), INT16_C( 28454), -INT16_C( 8915), INT16_C( 7568), -INT16_C( 15814), INT16_C( 5996), INT16_C( 31830) }, { -INT16_C( 23020), -INT16_C( 24462), INT16_C( 29799), INT16_C( 6364), INT16_C( 24837), INT16_C( 21425), INT16_C( 16096), INT16_C( 4891), INT16_C( 7669), INT16_C( 7121), -INT16_C( 372), INT16_C( 7417), INT16_C( 13083), -INT16_C( 30753), INT16_C( 13642), INT16_C( 24067), INT16_C( 30171), INT16_C( 17406), -INT16_C( 9495), -INT16_C( 4517), INT16_C( 3132), INT16_C( 7233), INT16_C( 23626), INT16_C( 16431), INT16_C( 121), INT16_C( 1371), INT16_C( 21758), INT16_C( 6434), INT16_C( 391), -INT16_C( 11616), -INT16_C( 23754), INT16_C( 4400) }, { -INT16_C( 16031), -INT16_C( 17121), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 21425), INT16_C( 16096), INT16_C( 4891), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 28525), INT16_C( 0), INT16_C( 0), INT16_C( 20274), INT16_C( 0), INT16_C( 30171), INT16_C( 17406), INT16_C( 0), INT16_C( 0), INT16_C( 3132), INT16_C( 7233), INT16_C( 0), INT16_C( 16431), INT16_C( 121), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 31830) } }, { UINT32_C( 22294296), { INT16_C( 12475), INT16_C( 1634), -INT16_C( 28276), INT16_C( 1350), -INT16_C( 24174), -INT16_C( 28661), INT16_C( 11766), INT16_C( 32170), INT16_C( 18990), INT16_C( 25679), -INT16_C( 32530), INT16_C( 1653), -INT16_C( 13649), -INT16_C( 18424), -INT16_C( 2183), INT16_C( 13822), INT16_C( 24616), -INT16_C( 19397), -INT16_C( 32271), -INT16_C( 31814), -INT16_C( 15070), INT16_C( 6164), -INT16_C( 16654), INT16_C( 8342), -INT16_C( 6904), -INT16_C( 2428), -INT16_C( 1691), INT16_C( 5373), INT16_C( 1475), INT16_C( 15821), -INT16_C( 13316), INT16_C( 9330) }, { -INT16_C( 21205), INT16_C( 7385), -INT16_C( 27858), INT16_C( 20640), -INT16_C( 19368), INT16_C( 19049), -INT16_C( 142), INT16_C( 31338), -INT16_C( 4380), INT16_C( 19057), INT16_C( 28391), -INT16_C( 21666), INT16_C( 11123), INT16_C( 28648), INT16_C( 23286), INT16_C( 8596), INT16_C( 27911), INT16_C( 13630), -INT16_C( 8704), INT16_C( 22661), -INT16_C( 4462), INT16_C( 1186), INT16_C( 3309), -INT16_C( 11650), -INT16_C( 4102), -INT16_C( 7908), INT16_C( 31325), -INT16_C( 12148), INT16_C( 29862), -INT16_C( 25536), -INT16_C( 11058), -INT16_C( 10818) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 20640), -INT16_C( 19368), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 18990), INT16_C( 25679), INT16_C( 28391), INT16_C( 1653), INT16_C( 0), INT16_C( 28648), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 8704), INT16_C( 0), -INT16_C( 4462), INT16_C( 0), INT16_C( 3309), INT16_C( 0), -INT16_C( 4102), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { UINT32_C(1091238977), { INT16_C( 15230), INT16_C( 27760), -INT16_C( 4537), INT16_C( 16702), INT16_C( 23262), INT16_C( 15138), -INT16_C( 20524), INT16_C( 31244), INT16_C( 19491), -INT16_C( 3561), -INT16_C( 10976), INT16_C( 25031), -INT16_C( 11567), -INT16_C( 21598), INT16_C( 15202), -INT16_C( 8169), -INT16_C( 30858), -INT16_C( 17076), -INT16_C( 30091), INT16_C( 21502), INT16_C( 8420), -INT16_C( 18033), -INT16_C( 25649), -INT16_C( 3277), INT16_C( 19175), INT16_C( 2021), -INT16_C( 21473), -INT16_C( 3992), INT16_C( 2686), -INT16_C( 8037), -INT16_C( 19899), -INT16_C( 17471) }, { INT16_C( 3385), -INT16_C( 20616), INT16_C( 30360), INT16_C( 31746), -INT16_C( 28266), INT16_C( 26165), INT16_C( 26924), INT16_C( 4953), INT16_C( 16051), -INT16_C( 11494), -INT16_C( 32022), INT16_C( 27075), INT16_C( 24460), -INT16_C( 11959), INT16_C( 2577), INT16_C( 19340), INT16_C( 1048), -INT16_C( 20230), -INT16_C( 902), INT16_C( 4396), INT16_C( 25230), -INT16_C( 17801), -INT16_C( 12085), INT16_C( 32462), -INT16_C( 6130), -INT16_C( 1967), INT16_C( 5483), -INT16_C( 2207), -INT16_C( 21644), -INT16_C( 31287), INT16_C( 21941), -INT16_C( 12848) }, { INT16_C( 15230), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 26924), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 10976), INT16_C( 27075), INT16_C( 24460), -INT16_C( 11959), INT16_C( 15202), INT16_C( 19340), INT16_C( 0), -INT16_C( 17076), INT16_C( 0), INT16_C( 21502), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 19175), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 21941), INT16_C( 0) } }, { UINT32_C(3565013594), { INT16_C( 23564), -INT16_C( 10481), -INT16_C( 8916), INT16_C( 14933), -INT16_C( 22586), INT16_C( 12595), -INT16_C( 27460), INT16_C( 12328), -INT16_C( 3777), -INT16_C( 2635), -INT16_C( 31161), -INT16_C( 24126), INT16_C( 16464), INT16_C( 6005), INT16_C( 23530), -INT16_C( 2452), INT16_C( 31927), -INT16_C( 6963), INT16_C( 8793), INT16_C( 7966), INT16_C( 20937), -INT16_C( 31408), INT16_C( 31206), INT16_C( 9653), INT16_C( 27498), -INT16_C( 20198), -INT16_C( 8719), INT16_C( 16722), -INT16_C( 14307), INT16_C( 1881), -INT16_C( 15069), -INT16_C( 9475) }, { -INT16_C( 13759), -INT16_C( 25666), -INT16_C( 8724), -INT16_C( 18758), INT16_C( 2862), INT16_C( 5179), -INT16_C( 3708), -INT16_C( 4550), INT16_C( 21596), INT16_C( 19872), -INT16_C( 3535), INT16_C( 20110), -INT16_C( 6214), -INT16_C( 8875), INT16_C( 21165), -INT16_C( 4424), INT16_C( 30236), INT16_C( 2441), INT16_C( 17491), -INT16_C( 32065), -INT16_C( 1457), -INT16_C( 11370), -INT16_C( 12053), INT16_C( 18369), INT16_C( 24869), INT16_C( 22164), INT16_C( 9044), INT16_C( 3749), -INT16_C( 1526), -INT16_C( 18452), -INT16_C( 23475), INT16_C( 27046) }, { INT16_C( 0), -INT16_C( 10481), INT16_C( 0), INT16_C( 14933), INT16_C( 2862), INT16_C( 0), -INT16_C( 3708), INT16_C( 0), INT16_C( 0), INT16_C( 19872), INT16_C( 0), INT16_C( 20110), INT16_C( 0), INT16_C( 0), INT16_C( 23530), -INT16_C( 2452), INT16_C( 31927), INT16_C( 0), INT16_C( 17491), INT16_C( 7966), INT16_C( 20937), -INT16_C( 11370), INT16_C( 31206), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 9044), INT16_C( 0), -INT16_C( 1526), INT16_C( 0), -INT16_C( 15069), INT16_C( 27046) } }, { UINT32_C(1852976922), { -INT16_C( 31188), INT16_C( 6037), INT16_C( 22359), INT16_C( 31839), -INT16_C( 3144), INT16_C( 3282), INT16_C( 30486), INT16_C( 8475), INT16_C( 1906), -INT16_C( 16424), INT16_C( 32427), -INT16_C( 15064), -INT16_C( 25682), INT16_C( 8499), INT16_C( 9164), -INT16_C( 1820), INT16_C( 31146), INT16_C( 272), INT16_C( 28624), -INT16_C( 30339), INT16_C( 20322), INT16_C( 31125), -INT16_C( 20281), INT16_C( 14746), INT16_C( 29367), INT16_C( 25336), INT16_C( 8433), -INT16_C( 24792), INT16_C( 23483), -INT16_C( 30528), -INT16_C( 23425), INT16_C( 10624) }, { -INT16_C( 28642), -INT16_C( 4566), -INT16_C( 22529), INT16_C( 25207), INT16_C( 3574), -INT16_C( 16933), INT16_C( 30141), INT16_C( 30198), -INT16_C( 4377), -INT16_C( 10025), -INT16_C( 241), -INT16_C( 13705), INT16_C( 14427), -INT16_C( 9646), -INT16_C( 11300), -INT16_C( 1533), INT16_C( 11619), INT16_C( 25577), INT16_C( 24788), -INT16_C( 13627), -INT16_C( 24467), INT16_C( 11144), INT16_C( 32277), -INT16_C( 864), INT16_C( 30573), INT16_C( 31957), INT16_C( 19575), -INT16_C( 11706), -INT16_C( 26236), INT16_C( 25004), -INT16_C( 20628), -INT16_C( 12453) }, { INT16_C( 0), INT16_C( 6037), INT16_C( 0), INT16_C( 31839), INT16_C( 3574), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 1906), -INT16_C( 10025), INT16_C( 32427), -INT16_C( 13705), INT16_C( 0), INT16_C( 8499), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 25577), INT16_C( 0), INT16_C( 0), INT16_C( 20322), INT16_C( 31125), INT16_C( 32277), INT16_C( 0), INT16_C( 0), INT16_C( 31957), INT16_C( 19575), -INT16_C( 11706), INT16_C( 0), INT16_C( 25004), -INT16_C( 20628), INT16_C( 0) } }, { UINT32_C(2956084444), { INT16_C( 663), -INT16_C( 21443), -INT16_C( 8831), -INT16_C( 4439), INT16_C( 32341), -INT16_C( 13206), -INT16_C( 20278), INT16_C( 20382), INT16_C( 19017), -INT16_C( 19024), INT16_C( 3065), -INT16_C( 10875), -INT16_C( 18608), -INT16_C( 2683), -INT16_C( 81), INT16_C( 17927), INT16_C( 17666), -INT16_C( 31757), -INT16_C( 25566), INT16_C( 30577), -INT16_C( 9446), -INT16_C( 7101), -INT16_C( 7797), -INT16_C( 10957), -INT16_C( 7381), INT16_C( 9354), INT16_C( 4079), INT16_C( 16377), INT16_C( 32455), INT16_C( 30260), INT16_C( 15230), -INT16_C( 32580) }, { -INT16_C( 20608), -INT16_C( 23805), INT16_C( 29771), INT16_C( 25882), INT16_C( 24143), -INT16_C( 9654), INT16_C( 32063), INT16_C( 27567), INT16_C( 14945), INT16_C( 20623), -INT16_C( 30391), INT16_C( 4239), -INT16_C( 15609), -INT16_C( 31354), INT16_C( 17406), INT16_C( 32517), INT16_C( 2290), INT16_C( 15906), INT16_C( 15484), -INT16_C( 13405), -INT16_C( 4710), -INT16_C( 9562), INT16_C( 21867), -INT16_C( 13243), -INT16_C( 11121), -INT16_C( 9956), -INT16_C( 21667), INT16_C( 26089), INT16_C( 28782), INT16_C( 27882), -INT16_C( 3917), -INT16_C( 23061) }, { INT16_C( 0), INT16_C( 0), INT16_C( 29771), INT16_C( 25882), INT16_C( 32341), INT16_C( 0), INT16_C( 32063), INT16_C( 27567), INT16_C( 0), INT16_C( 0), INT16_C( 3065), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 17406), INT16_C( 0), INT16_C( 0), INT16_C( 15906), INT16_C( 0), INT16_C( 0), -INT16_C( 4710), -INT16_C( 7101), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 32455), INT16_C( 30260), INT16_C( 0), -INT16_C( 23061) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_maskz_max_epi16(test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_max_epu16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const uint16_t a[32]; const uint16_t b[32]; const uint16_t r[32]; } test_vec[] = { { { UINT16_C(39893), UINT16_C(12168), UINT16_C(15734), UINT16_C(45768), UINT16_C( 4464), UINT16_C(63629), UINT16_C(35362), UINT16_C(32306), UINT16_C(62286), UINT16_C(19220), UINT16_C(14705), UINT16_C(45343), UINT16_C(57947), UINT16_C(40021), UINT16_C(16202), UINT16_C( 8136), UINT16_C(20699), UINT16_C(20814), UINT16_C( 5774), UINT16_C(65027), UINT16_C(37160), UINT16_C(19190), UINT16_C(10523), UINT16_C(27080), UINT16_C(56604), UINT16_C(36276), UINT16_C(54038), UINT16_C(28990), UINT16_C(37813), UINT16_C(65293), UINT16_C(54739), UINT16_C(44574) }, { UINT16_C(27686), UINT16_C(46335), UINT16_C( 898), UINT16_C(43698), UINT16_C(43412), UINT16_C(45045), UINT16_C(48594), UINT16_C(60952), UINT16_C(52378), UINT16_C(45435), UINT16_C(47775), UINT16_C(21538), UINT16_C(12365), UINT16_C( 8275), UINT16_C(28933), UINT16_C(11214), UINT16_C(52957), UINT16_C(24543), UINT16_C(37585), UINT16_C(25866), UINT16_C(65339), UINT16_C( 3348), UINT16_C(11452), UINT16_C(22523), UINT16_C(30456), UINT16_C(38664), UINT16_C(10800), UINT16_C(32491), UINT16_C(15962), UINT16_C(24734), UINT16_C(28079), UINT16_C(35979) }, { UINT16_C(39893), UINT16_C(46335), UINT16_C(15734), UINT16_C(45768), UINT16_C(43412), UINT16_C(63629), UINT16_C(48594), UINT16_C(60952), UINT16_C(62286), UINT16_C(45435), UINT16_C(47775), UINT16_C(45343), UINT16_C(57947), UINT16_C(40021), UINT16_C(28933), UINT16_C(11214), UINT16_C(52957), UINT16_C(24543), UINT16_C(37585), UINT16_C(65027), UINT16_C(65339), UINT16_C(19190), UINT16_C(11452), UINT16_C(27080), UINT16_C(56604), UINT16_C(38664), UINT16_C(54038), UINT16_C(32491), UINT16_C(37813), UINT16_C(65293), UINT16_C(54739), UINT16_C(44574) } }, { { UINT16_C(27451), UINT16_C( 3307), UINT16_C(62973), UINT16_C(14449), UINT16_C(34292), UINT16_C(45381), UINT16_C(16561), UINT16_C(43272), UINT16_C( 4278), UINT16_C(59200), UINT16_C(11066), UINT16_C(38245), UINT16_C( 873), UINT16_C( 6389), UINT16_C(32880), UINT16_C(43940), UINT16_C(36843), UINT16_C(59575), UINT16_C(10373), UINT16_C(31008), UINT16_C(26029), UINT16_C(24106), UINT16_C(12965), UINT16_C(23559), UINT16_C(18242), UINT16_C(32067), UINT16_C(43122), UINT16_C(56082), UINT16_C( 1963), UINT16_C( 7411), UINT16_C(38791), UINT16_C(29639) }, { UINT16_C(32551), UINT16_C(44123), UINT16_C(31911), UINT16_C(21797), UINT16_C(20705), UINT16_C(34739), UINT16_C(48002), UINT16_C(50659), UINT16_C( 9730), UINT16_C(30018), UINT16_C(21710), UINT16_C(31056), UINT16_C(17499), UINT16_C(58005), UINT16_C(24027), UINT16_C( 597), UINT16_C(45532), UINT16_C(33710), UINT16_C(54317), UINT16_C( 3800), UINT16_C(35876), UINT16_C(42645), UINT16_C(30791), UINT16_C(18795), UINT16_C(44446), UINT16_C(27838), UINT16_C( 3841), UINT16_C(23782), UINT16_C(31571), UINT16_C(11839), UINT16_C(38104), UINT16_C(46129) }, { UINT16_C(32551), UINT16_C(44123), UINT16_C(62973), UINT16_C(21797), UINT16_C(34292), UINT16_C(45381), UINT16_C(48002), UINT16_C(50659), UINT16_C( 9730), UINT16_C(59200), UINT16_C(21710), UINT16_C(38245), UINT16_C(17499), UINT16_C(58005), UINT16_C(32880), UINT16_C(43940), UINT16_C(45532), UINT16_C(59575), UINT16_C(54317), UINT16_C(31008), UINT16_C(35876), UINT16_C(42645), UINT16_C(30791), UINT16_C(23559), UINT16_C(44446), UINT16_C(32067), UINT16_C(43122), UINT16_C(56082), UINT16_C(31571), UINT16_C(11839), UINT16_C(38791), UINT16_C(46129) } }, { { UINT16_C(57157), UINT16_C(29240), UINT16_C( 4275), UINT16_C(55169), UINT16_C( 5788), UINT16_C(58238), UINT16_C(59791), UINT16_C(11565), UINT16_C(60311), UINT16_C(39066), UINT16_C(33018), UINT16_C(19957), UINT16_C(13563), UINT16_C(54396), UINT16_C(44488), UINT16_C( 3720), UINT16_C(49292), UINT16_C(16512), UINT16_C( 465), UINT16_C(27927), UINT16_C(38168), UINT16_C(42833), UINT16_C(32383), UINT16_C( 5844), UINT16_C(28265), UINT16_C(25774), UINT16_C(41966), UINT16_C(60081), UINT16_C(11735), UINT16_C(41150), UINT16_C(18138), UINT16_C(26542) }, { UINT16_C(11783), UINT16_C(55463), UINT16_C(48688), UINT16_C(18501), UINT16_C(38484), UINT16_C(54255), UINT16_C(49940), UINT16_C(32489), UINT16_C(38706), UINT16_C( 8418), UINT16_C(37691), UINT16_C( 4618), UINT16_C(51393), UINT16_C(39858), UINT16_C(24591), UINT16_C( 5634), UINT16_C(43407), UINT16_C(49134), UINT16_C(13160), UINT16_C(48135), UINT16_C(63178), UINT16_C(56975), UINT16_C(30905), UINT16_C(60252), UINT16_C(15887), UINT16_C(18956), UINT16_C( 5842), UINT16_C(37725), UINT16_C( 4063), UINT16_C(60974), UINT16_C(12656), UINT16_C(65284) }, { UINT16_C(57157), UINT16_C(55463), UINT16_C(48688), UINT16_C(55169), UINT16_C(38484), UINT16_C(58238), UINT16_C(59791), UINT16_C(32489), UINT16_C(60311), UINT16_C(39066), UINT16_C(37691), UINT16_C(19957), UINT16_C(51393), UINT16_C(54396), UINT16_C(44488), UINT16_C( 5634), UINT16_C(49292), UINT16_C(49134), UINT16_C(13160), UINT16_C(48135), UINT16_C(63178), UINT16_C(56975), UINT16_C(32383), UINT16_C(60252), UINT16_C(28265), UINT16_C(25774), UINT16_C(41966), UINT16_C(60081), UINT16_C(11735), UINT16_C(60974), UINT16_C(18138), UINT16_C(65284) } }, { { UINT16_C(62170), UINT16_C(17086), UINT16_C(50469), UINT16_C(61438), UINT16_C(36283), UINT16_C(29902), UINT16_C(10757), UINT16_C( 5472), UINT16_C(27753), UINT16_C(15199), UINT16_C(48258), UINT16_C(25038), UINT16_C(64716), UINT16_C(15439), UINT16_C(21293), UINT16_C( 2107), UINT16_C(63813), UINT16_C(27466), UINT16_C(18878), UINT16_C(31066), UINT16_C(10454), UINT16_C(56557), UINT16_C(19795), UINT16_C(48369), UINT16_C(20665), UINT16_C(15607), UINT16_C(50445), UINT16_C(55709), UINT16_C(60865), UINT16_C(61205), UINT16_C(20544), UINT16_C(34551) }, { UINT16_C(16713), UINT16_C( 2033), UINT16_C(19338), UINT16_C(24960), UINT16_C(28020), UINT16_C(51005), UINT16_C(11963), UINT16_C(29827), UINT16_C(31358), UINT16_C(35760), UINT16_C(20031), UINT16_C( 100), UINT16_C(31035), UINT16_C(31727), UINT16_C(59081), UINT16_C( 4609), UINT16_C(61992), UINT16_C(45593), UINT16_C(39230), UINT16_C(45587), UINT16_C(20487), UINT16_C(49785), UINT16_C(64638), UINT16_C(64822), UINT16_C(59254), UINT16_C(46472), UINT16_C(60725), UINT16_C(28853), UINT16_C(42342), UINT16_C(12523), UINT16_C(60811), UINT16_C(45890) }, { UINT16_C(62170), UINT16_C(17086), UINT16_C(50469), UINT16_C(61438), UINT16_C(36283), UINT16_C(51005), UINT16_C(11963), UINT16_C(29827), UINT16_C(31358), UINT16_C(35760), UINT16_C(48258), UINT16_C(25038), UINT16_C(64716), UINT16_C(31727), UINT16_C(59081), UINT16_C( 4609), UINT16_C(63813), UINT16_C(45593), UINT16_C(39230), UINT16_C(45587), UINT16_C(20487), UINT16_C(56557), UINT16_C(64638), UINT16_C(64822), UINT16_C(59254), UINT16_C(46472), UINT16_C(60725), UINT16_C(55709), UINT16_C(60865), UINT16_C(61205), UINT16_C(60811), UINT16_C(45890) } }, { { UINT16_C(23775), UINT16_C( 7526), UINT16_C(31221), UINT16_C(64719), UINT16_C(18634), UINT16_C(18622), UINT16_C(62788), UINT16_C(47685), UINT16_C(52956), UINT16_C( 4463), UINT16_C( 9659), UINT16_C( 8577), UINT16_C(27850), UINT16_C(21841), UINT16_C(37977), UINT16_C(14601), UINT16_C(28656), UINT16_C(58710), UINT16_C( 9960), UINT16_C(45794), UINT16_C(41070), UINT16_C(46075), UINT16_C(16533), UINT16_C(29037), UINT16_C(56590), UINT16_C(51586), UINT16_C( 770), UINT16_C(52459), UINT16_C(15472), UINT16_C(51489), UINT16_C(10960), UINT16_C(49154) }, { UINT16_C(22937), UINT16_C(33446), UINT16_C(34943), UINT16_C(60724), UINT16_C(12072), UINT16_C(48800), UINT16_C( 3696), UINT16_C(32303), UINT16_C(45803), UINT16_C(60744), UINT16_C(13237), UINT16_C( 9657), UINT16_C(55919), UINT16_C(16623), UINT16_C(61701), UINT16_C(40448), UINT16_C(42570), UINT16_C(51488), UINT16_C(21806), UINT16_C(22455), UINT16_C(22404), UINT16_C(62485), UINT16_C(17509), UINT16_C(20595), UINT16_C(48118), UINT16_C(44093), UINT16_C(63214), UINT16_C(24017), UINT16_C(49361), UINT16_C(54941), UINT16_C(40626), UINT16_C(64628) }, { UINT16_C(23775), UINT16_C(33446), UINT16_C(34943), UINT16_C(64719), UINT16_C(18634), UINT16_C(48800), UINT16_C(62788), UINT16_C(47685), UINT16_C(52956), UINT16_C(60744), UINT16_C(13237), UINT16_C( 9657), UINT16_C(55919), UINT16_C(21841), UINT16_C(61701), UINT16_C(40448), UINT16_C(42570), UINT16_C(58710), UINT16_C(21806), UINT16_C(45794), UINT16_C(41070), UINT16_C(62485), UINT16_C(17509), UINT16_C(29037), UINT16_C(56590), UINT16_C(51586), UINT16_C(63214), UINT16_C(52459), UINT16_C(49361), UINT16_C(54941), UINT16_C(40626), UINT16_C(64628) } }, { { UINT16_C(38212), UINT16_C(29638), UINT16_C(32234), UINT16_C(28362), UINT16_C(57300), UINT16_C(14947), UINT16_C(54819), UINT16_C( 6794), UINT16_C(51345), UINT16_C(32710), UINT16_C(38846), UINT16_C(36828), UINT16_C(31320), UINT16_C( 2661), UINT16_C(55832), UINT16_C(23558), UINT16_C(52335), UINT16_C(22991), UINT16_C(39241), UINT16_C( 7879), UINT16_C(10872), UINT16_C(40024), UINT16_C(57856), UINT16_C(37302), UINT16_C(31914), UINT16_C(26896), UINT16_C(60691), UINT16_C(27640), UINT16_C(24167), UINT16_C(32629), UINT16_C(31800), UINT16_C(42971) }, { UINT16_C(43848), UINT16_C(37376), UINT16_C(51012), UINT16_C(48560), UINT16_C( 2290), UINT16_C(62041), UINT16_C( 4074), UINT16_C(38276), UINT16_C(38027), UINT16_C(40702), UINT16_C(63105), UINT16_C(59402), UINT16_C(32596), UINT16_C(35943), UINT16_C(17403), UINT16_C(17459), UINT16_C(13294), UINT16_C(13014), UINT16_C(34555), UINT16_C(60911), UINT16_C(18574), UINT16_C(30943), UINT16_C(25431), UINT16_C(57869), UINT16_C( 3064), UINT16_C(31105), UINT16_C(35586), UINT16_C(22114), UINT16_C(51466), UINT16_C( 1763), UINT16_C( 5644), UINT16_C(64074) }, { UINT16_C(43848), UINT16_C(37376), UINT16_C(51012), UINT16_C(48560), UINT16_C(57300), UINT16_C(62041), UINT16_C(54819), UINT16_C(38276), UINT16_C(51345), UINT16_C(40702), UINT16_C(63105), UINT16_C(59402), UINT16_C(32596), UINT16_C(35943), UINT16_C(55832), UINT16_C(23558), UINT16_C(52335), UINT16_C(22991), UINT16_C(39241), UINT16_C(60911), UINT16_C(18574), UINT16_C(40024), UINT16_C(57856), UINT16_C(57869), UINT16_C(31914), UINT16_C(31105), UINT16_C(60691), UINT16_C(27640), UINT16_C(51466), UINT16_C(32629), UINT16_C(31800), UINT16_C(64074) } }, { { UINT16_C( 8266), UINT16_C(17709), UINT16_C( 7334), UINT16_C(13362), UINT16_C( 4453), UINT16_C(48300), UINT16_C(47733), UINT16_C(28063), UINT16_C( 8389), UINT16_C(51174), UINT16_C(18603), UINT16_C(46366), UINT16_C( 274), UINT16_C( 7867), UINT16_C( 1303), UINT16_C(24857), UINT16_C(17957), UINT16_C(52134), UINT16_C(55394), UINT16_C(51199), UINT16_C(44266), UINT16_C(24452), UINT16_C( 9062), UINT16_C(11212), UINT16_C(45635), UINT16_C(61171), UINT16_C( 4603), UINT16_C( 3491), UINT16_C(24338), UINT16_C(10539), UINT16_C(17508), UINT16_C(35467) }, { UINT16_C(12682), UINT16_C(60757), UINT16_C(21770), UINT16_C(62644), UINT16_C(14337), UINT16_C(26451), UINT16_C( 8027), UINT16_C(40594), UINT16_C(34257), UINT16_C(52364), UINT16_C(12438), UINT16_C(43225), UINT16_C( 1423), UINT16_C(62418), UINT16_C(23881), UINT16_C(54397), UINT16_C(54158), UINT16_C(39105), UINT16_C(29992), UINT16_C(10636), UINT16_C(57262), UINT16_C( 2448), UINT16_C( 8958), UINT16_C(53416), UINT16_C(13480), UINT16_C(16028), UINT16_C(30308), UINT16_C(62439), UINT16_C(47483), UINT16_C(50407), UINT16_C(25622), UINT16_C(42136) }, { UINT16_C(12682), UINT16_C(60757), UINT16_C(21770), UINT16_C(62644), UINT16_C(14337), UINT16_C(48300), UINT16_C(47733), UINT16_C(40594), UINT16_C(34257), UINT16_C(52364), UINT16_C(18603), UINT16_C(46366), UINT16_C( 1423), UINT16_C(62418), UINT16_C(23881), UINT16_C(54397), UINT16_C(54158), UINT16_C(52134), UINT16_C(55394), UINT16_C(51199), UINT16_C(57262), UINT16_C(24452), UINT16_C( 9062), UINT16_C(53416), UINT16_C(45635), UINT16_C(61171), UINT16_C(30308), UINT16_C(62439), UINT16_C(47483), UINT16_C(50407), UINT16_C(25622), UINT16_C(42136) } }, { { UINT16_C(22839), UINT16_C(24381), UINT16_C(51663), UINT16_C(32136), UINT16_C( 6313), UINT16_C(42886), UINT16_C(11835), UINT16_C(58231), UINT16_C( 5219), UINT16_C(50977), UINT16_C( 2186), UINT16_C( 1467), UINT16_C(41665), UINT16_C(55241), UINT16_C(25094), UINT16_C(15996), UINT16_C(47547), UINT16_C(35485), UINT16_C( 9858), UINT16_C(11015), UINT16_C(36414), UINT16_C(31187), UINT16_C(19132), UINT16_C( 8028), UINT16_C(32350), UINT16_C(59623), UINT16_C(41606), UINT16_C(18669), UINT16_C(46916), UINT16_C(18975), UINT16_C(39705), UINT16_C(54408) }, { UINT16_C( 9812), UINT16_C(55135), UINT16_C(26188), UINT16_C(35330), UINT16_C(54772), UINT16_C(45316), UINT16_C(24608), UINT16_C(32464), UINT16_C(47070), UINT16_C(25959), UINT16_C(21593), UINT16_C(40365), UINT16_C(52235), UINT16_C( 9448), UINT16_C(28776), UINT16_C(48377), UINT16_C(22678), UINT16_C(58003), UINT16_C(38590), UINT16_C(45933), UINT16_C(29035), UINT16_C(35684), UINT16_C(13521), UINT16_C(45066), UINT16_C(29164), UINT16_C(17685), UINT16_C(49861), UINT16_C(53731), UINT16_C(52110), UINT16_C(63221), UINT16_C(60987), UINT16_C(53939) }, { UINT16_C(22839), UINT16_C(55135), UINT16_C(51663), UINT16_C(35330), UINT16_C(54772), UINT16_C(45316), UINT16_C(24608), UINT16_C(58231), UINT16_C(47070), UINT16_C(50977), UINT16_C(21593), UINT16_C(40365), UINT16_C(52235), UINT16_C(55241), UINT16_C(28776), UINT16_C(48377), UINT16_C(47547), UINT16_C(58003), UINT16_C(38590), UINT16_C(45933), UINT16_C(36414), UINT16_C(35684), UINT16_C(19132), UINT16_C(45066), UINT16_C(32350), UINT16_C(59623), UINT16_C(49861), UINT16_C(53731), UINT16_C(52110), UINT16_C(63221), UINT16_C(60987), UINT16_C(54408) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_max_epu16(a, b); simde_test_x86_assert_equal_u16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_max_epu16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t src[32]; const simde__mmask32 k; const uint16_t a[32]; const uint16_t b[32]; const uint16_t r[32]; } test_vec[] = { { { -INT16_C( 17970), -INT16_C( 32047), INT16_C( 11126), INT16_C( 18607), INT16_C( 28846), -INT16_C( 15027), INT16_C( 10228), INT16_C( 3519), -INT16_C( 3833), -INT16_C( 8977), -INT16_C( 12540), -INT16_C( 4980), -INT16_C( 26471), -INT16_C( 1341), INT16_C( 26461), INT16_C( 11071), INT16_C( 4128), -INT16_C( 26962), INT16_C( 23868), -INT16_C( 5409), INT16_C( 11469), -INT16_C( 15953), INT16_C( 28243), INT16_C( 23246), -INT16_C( 17056), INT16_C( 25654), -INT16_C( 15732), INT16_C( 9808), INT16_C( 5210), -INT16_C( 18400), INT16_C( 24443), -INT16_C( 25629) }, UINT32_C(2872217967), { UINT16_C(17469), UINT16_C(36990), UINT16_C(19635), UINT16_C( 5098), UINT16_C( 8458), UINT16_C(38519), UINT16_C(51171), UINT16_C(16060), UINT16_C(56539), UINT16_C(22262), UINT16_C(55611), UINT16_C(44018), UINT16_C( 9323), UINT16_C(23126), UINT16_C(60469), UINT16_C(29206), UINT16_C(37936), UINT16_C(58114), UINT16_C(60641), UINT16_C(60406), UINT16_C(27917), UINT16_C(61825), UINT16_C(15925), UINT16_C( 4143), UINT16_C( 9498), UINT16_C(22119), UINT16_C(23038), UINT16_C(26881), UINT16_C(22397), UINT16_C(45763), UINT16_C(55875), UINT16_C(29732) }, { UINT16_C( 9838), UINT16_C(20311), UINT16_C(19986), UINT16_C( 8250), UINT16_C(48315), UINT16_C(61457), UINT16_C(16634), UINT16_C( 5121), UINT16_C(26725), UINT16_C(25450), UINT16_C(27585), UINT16_C(16077), UINT16_C(37059), UINT16_C( 1776), UINT16_C( 5226), UINT16_C(55674), UINT16_C(53818), UINT16_C(19496), UINT16_C(25376), UINT16_C(56172), UINT16_C(32031), UINT16_C( 6604), UINT16_C(52669), UINT16_C( 8749), UINT16_C(38965), UINT16_C(63110), UINT16_C(21251), UINT16_C(50740), UINT16_C( 9443), UINT16_C(20173), UINT16_C(18232), UINT16_C(29223) }, { UINT16_C(17469), UINT16_C(36990), UINT16_C(19986), UINT16_C( 8250), UINT16_C(28846), UINT16_C(61457), UINT16_C(51171), UINT16_C( 3519), UINT16_C(56539), UINT16_C(56559), UINT16_C(52996), UINT16_C(60556), UINT16_C(37059), UINT16_C(64195), UINT16_C(26461), UINT16_C(55674), UINT16_C( 4128), UINT16_C(58114), UINT16_C(23868), UINT16_C(60127), UINT16_C(32031), UINT16_C(61825), UINT16_C(28243), UINT16_C(23246), UINT16_C(38965), UINT16_C(63110), UINT16_C(49804), UINT16_C(50740), UINT16_C( 5210), UINT16_C(45763), UINT16_C(24443), UINT16_C(29732) } }, { { INT16_C( 20249), INT16_C( 14782), INT16_C( 11186), -INT16_C( 12011), -INT16_C( 7768), INT16_C( 26346), INT16_C( 6318), -INT16_C( 7288), INT16_C( 3760), -INT16_C( 19495), INT16_C( 3425), INT16_C( 17786), INT16_C( 18225), INT16_C( 27027), -INT16_C( 17778), -INT16_C( 22309), -INT16_C( 26359), -INT16_C( 17183), -INT16_C( 2364), INT16_C( 28045), INT16_C( 30935), -INT16_C( 31277), INT16_C( 23440), INT16_C( 16488), INT16_C( 16746), -INT16_C( 13325), INT16_C( 27982), INT16_C( 32528), -INT16_C( 23628), INT16_C( 17384), -INT16_C( 15523), INT16_C( 26603) }, UINT32_C( 555994205), { UINT16_C(24872), UINT16_C(47136), UINT16_C(35005), UINT16_C(10232), UINT16_C(60618), UINT16_C( 6386), UINT16_C( 857), UINT16_C( 3736), UINT16_C(32934), UINT16_C( 1105), UINT16_C(15428), UINT16_C(41323), UINT16_C(36360), UINT16_C(52162), UINT16_C(20798), UINT16_C(26470), UINT16_C(34482), UINT16_C(28447), UINT16_C( 6158), UINT16_C(55446), UINT16_C(35076), UINT16_C(24049), UINT16_C(35212), UINT16_C(12907), UINT16_C(48137), UINT16_C(19766), UINT16_C(41464), UINT16_C( 494), UINT16_C(45359), UINT16_C(28364), UINT16_C(12802), UINT16_C(46293) }, { UINT16_C(62648), UINT16_C(50980), UINT16_C(47628), UINT16_C( 4255), UINT16_C(36931), UINT16_C(53102), UINT16_C(55577), UINT16_C( 8962), UINT16_C(14486), UINT16_C(36464), UINT16_C(24538), UINT16_C( 2447), UINT16_C(23568), UINT16_C( 4727), UINT16_C(19598), UINT16_C(18374), UINT16_C(59969), UINT16_C(19726), UINT16_C(44453), UINT16_C(59486), UINT16_C(52286), UINT16_C(22456), UINT16_C(47781), UINT16_C(15226), UINT16_C(60402), UINT16_C(52426), UINT16_C(22858), UINT16_C(23254), UINT16_C(19893), UINT16_C(17516), UINT16_C(12954), UINT16_C(56203) }, { UINT16_C(62648), UINT16_C(14782), UINT16_C(47628), UINT16_C(10232), UINT16_C(60618), UINT16_C(26346), UINT16_C(55577), UINT16_C(58248), UINT16_C( 3760), UINT16_C(46041), UINT16_C(24538), UINT16_C(41323), UINT16_C(18225), UINT16_C(27027), UINT16_C(20798), UINT16_C(26470), UINT16_C(59969), UINT16_C(28447), UINT16_C(63172), UINT16_C(28045), UINT16_C(30935), UINT16_C(24049), UINT16_C(23440), UINT16_C(16488), UINT16_C(60402), UINT16_C(52211), UINT16_C(27982), UINT16_C(32528), UINT16_C(41908), UINT16_C(28364), UINT16_C(50013), UINT16_C(26603) } }, { { -INT16_C( 26339), -INT16_C( 15832), -INT16_C( 31162), -INT16_C( 31574), INT16_C( 25170), -INT16_C( 1828), INT16_C( 22044), INT16_C( 3891), -INT16_C( 703), -INT16_C( 29733), -INT16_C( 20137), INT16_C( 3301), INT16_C( 20991), -INT16_C( 26288), -INT16_C( 9340), -INT16_C( 24204), -INT16_C( 25484), -INT16_C( 17565), INT16_C( 3363), INT16_C( 30015), INT16_C( 7024), -INT16_C( 29587), -INT16_C( 24206), -INT16_C( 19557), INT16_C( 30622), -INT16_C( 2753), INT16_C( 9256), INT16_C( 9986), INT16_C( 21110), -INT16_C( 1344), INT16_C( 13358), -INT16_C( 23909) }, UINT32_C(4099800785), { UINT16_C(55224), UINT16_C(10760), UINT16_C(41848), UINT16_C( 5854), UINT16_C( 7450), UINT16_C(17164), UINT16_C( 3649), UINT16_C(46954), UINT16_C(11104), UINT16_C(36529), UINT16_C(19551), UINT16_C(12337), UINT16_C(36426), UINT16_C(22052), UINT16_C(36395), UINT16_C(58577), UINT16_C(55653), UINT16_C(56590), UINT16_C(60541), UINT16_C(38899), UINT16_C(65289), UINT16_C(19418), UINT16_C(17677), UINT16_C(28162), UINT16_C(46192), UINT16_C(53244), UINT16_C(11520), UINT16_C(19200), UINT16_C( 9404), UINT16_C(59297), UINT16_C(29362), UINT16_C( 6091) }, { UINT16_C(55884), UINT16_C(51700), UINT16_C(59590), UINT16_C(53344), UINT16_C(15335), UINT16_C(62747), UINT16_C( 7552), UINT16_C(61539), UINT16_C(24529), UINT16_C(53951), UINT16_C(49037), UINT16_C(18717), UINT16_C(48868), UINT16_C(38448), UINT16_C(64560), UINT16_C(31918), UINT16_C(41686), UINT16_C(40005), UINT16_C(42634), UINT16_C(29292), UINT16_C(34785), UINT16_C(24935), UINT16_C(51877), UINT16_C(30289), UINT16_C( 4137), UINT16_C(46664), UINT16_C(26064), UINT16_C(46335), UINT16_C(12323), UINT16_C(21578), UINT16_C(63532), UINT16_C( 720) }, { UINT16_C(55884), UINT16_C(49704), UINT16_C(34374), UINT16_C(33962), UINT16_C(15335), UINT16_C(63708), UINT16_C( 7552), UINT16_C(61539), UINT16_C(64833), UINT16_C(53951), UINT16_C(49037), UINT16_C(18717), UINT16_C(48868), UINT16_C(38448), UINT16_C(64560), UINT16_C(58577), UINT16_C(55653), UINT16_C(47971), UINT16_C(60541), UINT16_C(38899), UINT16_C(65289), UINT16_C(35949), UINT16_C(51877), UINT16_C(45979), UINT16_C(30622), UINT16_C(62783), UINT16_C(26064), UINT16_C( 9986), UINT16_C(12323), UINT16_C(59297), UINT16_C(63532), UINT16_C( 6091) } }, { { INT16_C( 5787), INT16_C( 9630), INT16_C( 3004), -INT16_C( 25193), -INT16_C( 366), INT16_C( 14334), INT16_C( 20424), -INT16_C( 3410), -INT16_C( 2465), INT16_C( 12200), -INT16_C( 22436), INT16_C( 32739), INT16_C( 11992), INT16_C( 1235), -INT16_C( 23514), -INT16_C( 16122), -INT16_C( 23366), INT16_C( 30439), INT16_C( 32431), INT16_C( 16915), INT16_C( 4477), INT16_C( 17785), INT16_C( 10080), -INT16_C( 16585), -INT16_C( 8162), INT16_C( 31471), -INT16_C( 11640), INT16_C( 24825), -INT16_C( 13056), INT16_C( 10084), INT16_C( 27249), INT16_C( 11240) }, UINT32_C(3198275342), { UINT16_C(31173), UINT16_C( 9488), UINT16_C(18593), UINT16_C(49124), UINT16_C(54056), UINT16_C(45113), UINT16_C(12966), UINT16_C(42512), UINT16_C(29951), UINT16_C(28877), UINT16_C(46814), UINT16_C(60571), UINT16_C(15493), UINT16_C(54186), UINT16_C(43760), UINT16_C(46494), UINT16_C(44836), UINT16_C(50650), UINT16_C(49143), UINT16_C( 8068), UINT16_C(48530), UINT16_C(14543), UINT16_C(57327), UINT16_C(61407), UINT16_C(44115), UINT16_C(12639), UINT16_C(64354), UINT16_C(59421), UINT16_C(51255), UINT16_C(10427), UINT16_C(23154), UINT16_C(38621) }, { UINT16_C(47113), UINT16_C( 91), UINT16_C(57207), UINT16_C( 2335), UINT16_C(61084), UINT16_C(35906), UINT16_C( 8653), UINT16_C( 8315), UINT16_C(56013), UINT16_C(12369), UINT16_C(28373), UINT16_C( 3352), UINT16_C(54070), UINT16_C(43317), UINT16_C( 4653), UINT16_C(13887), UINT16_C(39882), UINT16_C(16694), UINT16_C(21882), UINT16_C( 5963), UINT16_C(36163), UINT16_C( 4259), UINT16_C( 7854), UINT16_C(31536), UINT16_C(33272), UINT16_C(52907), UINT16_C(50160), UINT16_C( 9947), UINT16_C( 4247), UINT16_C(50383), UINT16_C( 3874), UINT16_C(60923) }, { UINT16_C( 5787), UINT16_C( 9488), UINT16_C(57207), UINT16_C(49124), UINT16_C(65170), UINT16_C(14334), UINT16_C(20424), UINT16_C(62126), UINT16_C(56013), UINT16_C(28877), UINT16_C(46814), UINT16_C(60571), UINT16_C(11992), UINT16_C( 1235), UINT16_C(43760), UINT16_C(46494), UINT16_C(44836), UINT16_C(30439), UINT16_C(32431), UINT16_C(16915), UINT16_C( 4477), UINT16_C(14543), UINT16_C(10080), UINT16_C(61407), UINT16_C(57374), UINT16_C(52907), UINT16_C(64354), UINT16_C(59421), UINT16_C(51255), UINT16_C(50383), UINT16_C(27249), UINT16_C(60923) } }, { { INT16_C( 12714), INT16_C( 9262), INT16_C( 31111), -INT16_C( 13765), -INT16_C( 8698), -INT16_C( 19237), INT16_C( 3068), -INT16_C( 2768), -INT16_C( 9331), INT16_C( 32195), -INT16_C( 24929), INT16_C( 13987), INT16_C( 29614), -INT16_C( 12038), -INT16_C( 2686), INT16_C( 11453), -INT16_C( 5081), -INT16_C( 20912), -INT16_C( 29595), INT16_C( 27768), INT16_C( 21354), INT16_C( 26400), INT16_C( 20575), -INT16_C( 5028), INT16_C( 7980), -INT16_C( 13463), INT16_C( 3261), INT16_C( 27393), -INT16_C( 1153), INT16_C( 315), -INT16_C( 1551), INT16_C( 6189) }, UINT32_C(1254522597), { UINT16_C(55186), UINT16_C(61915), UINT16_C(14119), UINT16_C(21469), UINT16_C(18006), UINT16_C( 4894), UINT16_C( 8018), UINT16_C(53886), UINT16_C(47643), UINT16_C( 3283), UINT16_C( 435), UINT16_C(38948), UINT16_C(60031), UINT16_C(35298), UINT16_C(39208), UINT16_C(47869), UINT16_C(55664), UINT16_C(38827), UINT16_C(34832), UINT16_C(26603), UINT16_C( 2510), UINT16_C( 8570), UINT16_C(63785), UINT16_C(17651), UINT16_C(50867), UINT16_C(26192), UINT16_C(29895), UINT16_C(18174), UINT16_C(57438), UINT16_C(34511), UINT16_C(52601), UINT16_C(59713) }, { UINT16_C(60582), UINT16_C(46721), UINT16_C(27765), UINT16_C(17181), UINT16_C(39029), UINT16_C(40548), UINT16_C(22417), UINT16_C(17634), UINT16_C(12830), UINT16_C(58794), UINT16_C(43174), UINT16_C( 1068), UINT16_C(64392), UINT16_C( 651), UINT16_C(52424), UINT16_C(28395), UINT16_C(27832), UINT16_C(11557), UINT16_C(17112), UINT16_C(20081), UINT16_C(54746), UINT16_C(27628), UINT16_C(53037), UINT16_C(19375), UINT16_C(22785), UINT16_C(43056), UINT16_C(23553), UINT16_C(35500), UINT16_C(14168), UINT16_C( 8332), UINT16_C(30467), UINT16_C(48271) }, { UINT16_C(60582), UINT16_C( 9262), UINT16_C(27765), UINT16_C(51771), UINT16_C(56838), UINT16_C(40548), UINT16_C(22417), UINT16_C(53886), UINT16_C(56205), UINT16_C(58794), UINT16_C(43174), UINT16_C(38948), UINT16_C(64392), UINT16_C(35298), UINT16_C(52424), UINT16_C(11453), UINT16_C(60455), UINT16_C(38827), UINT16_C(34832), UINT16_C(27768), UINT16_C(21354), UINT16_C(26400), UINT16_C(63785), UINT16_C(19375), UINT16_C( 7980), UINT16_C(43056), UINT16_C( 3261), UINT16_C(35500), UINT16_C(64383), UINT16_C( 315), UINT16_C(52601), UINT16_C( 6189) } }, { { -INT16_C( 19228), -INT16_C( 17175), INT16_C( 23286), -INT16_C( 12022), -INT16_C( 2256), INT16_C( 23868), -INT16_C( 4922), -INT16_C( 14424), -INT16_C( 10171), INT16_C( 18287), INT16_C( 7221), -INT16_C( 29231), INT16_C( 23891), INT16_C( 22445), INT16_C( 15572), -INT16_C( 18413), -INT16_C( 784), -INT16_C( 6283), INT16_C( 32599), -INT16_C( 30792), -INT16_C( 2954), INT16_C( 15588), -INT16_C( 29472), INT16_C( 9732), INT16_C( 29540), -INT16_C( 26259), INT16_C( 16015), -INT16_C( 7386), -INT16_C( 11109), INT16_C( 28474), INT16_C( 19728), INT16_C( 296) }, UINT32_C(2699599177), { UINT16_C( 2964), UINT16_C(30159), UINT16_C(54167), UINT16_C(64667), UINT16_C( 2119), UINT16_C(54933), UINT16_C(48198), UINT16_C(57785), UINT16_C(62352), UINT16_C(41040), UINT16_C(30784), UINT16_C(35489), UINT16_C(35093), UINT16_C(12842), UINT16_C(21033), UINT16_C(48837), UINT16_C(37981), UINT16_C(62771), UINT16_C(52840), UINT16_C(45041), UINT16_C(34518), UINT16_C( 7301), UINT16_C(16194), UINT16_C(54013), UINT16_C(19762), UINT16_C(29555), UINT16_C( 5318), UINT16_C(56317), UINT16_C(10142), UINT16_C(50957), UINT16_C(53881), UINT16_C(55173) }, { UINT16_C(47207), UINT16_C(53196), UINT16_C(48518), UINT16_C(23678), UINT16_C( 835), UINT16_C(34424), UINT16_C(30018), UINT16_C(30040), UINT16_C(52163), UINT16_C(35304), UINT16_C(58848), UINT16_C(32356), UINT16_C(29196), UINT16_C(34373), UINT16_C(52036), UINT16_C(43869), UINT16_C(10627), UINT16_C( 2682), UINT16_C(63718), UINT16_C(10598), UINT16_C(57340), UINT16_C(16047), UINT16_C( 2132), UINT16_C( 6067), UINT16_C(39891), UINT16_C(45984), UINT16_C( 1408), UINT16_C(36145), UINT16_C(30583), UINT16_C(47891), UINT16_C(28738), UINT16_C(50535) }, { UINT16_C(47207), UINT16_C(48361), UINT16_C(23286), UINT16_C(64667), UINT16_C(63280), UINT16_C(23868), UINT16_C(48198), UINT16_C(51112), UINT16_C(62352), UINT16_C(18287), UINT16_C(58848), UINT16_C(35489), UINT16_C(35093), UINT16_C(22445), UINT16_C(15572), UINT16_C(48837), UINT16_C(64752), UINT16_C(59253), UINT16_C(32599), UINT16_C(45041), UINT16_C(62582), UINT16_C(16047), UINT16_C(16194), UINT16_C(54013), UINT16_C(29540), UINT16_C(39277), UINT16_C(16015), UINT16_C(58150), UINT16_C(54427), UINT16_C(50957), UINT16_C(19728), UINT16_C(55173) } }, { { -INT16_C( 7783), INT16_C( 32719), INT16_C( 14042), -INT16_C( 10584), INT16_C( 22549), INT16_C( 26900), -INT16_C( 14240), INT16_C( 13185), INT16_C( 8547), -INT16_C( 6937), INT16_C( 6182), -INT16_C( 25231), -INT16_C( 31601), -INT16_C( 11943), -INT16_C( 16140), -INT16_C( 29289), INT16_C( 26273), INT16_C( 31500), -INT16_C( 19300), -INT16_C( 20143), INT16_C( 26124), INT16_C( 27675), -INT16_C( 25554), -INT16_C( 28256), -INT16_C( 30787), -INT16_C( 7051), -INT16_C( 6497), INT16_C( 12161), -INT16_C( 9622), INT16_C( 24064), -INT16_C( 26726), INT16_C( 15595) }, UINT32_C(2595747838), { UINT16_C(26479), UINT16_C(40229), UINT16_C(50435), UINT16_C(49198), UINT16_C(42060), UINT16_C(60324), UINT16_C( 9866), UINT16_C(62746), UINT16_C( 6912), UINT16_C(39763), UINT16_C(16306), UINT16_C(45271), UINT16_C(36406), UINT16_C(57931), UINT16_C(38807), UINT16_C( 1691), UINT16_C(49406), UINT16_C( 419), UINT16_C(53893), UINT16_C(53697), UINT16_C(26230), UINT16_C( 188), UINT16_C(55180), UINT16_C(36085), UINT16_C(18930), UINT16_C(42023), UINT16_C(65160), UINT16_C(48725), UINT16_C(41101), UINT16_C( 9377), UINT16_C(15415), UINT16_C(13611) }, { UINT16_C(52988), UINT16_C(33078), UINT16_C(63392), UINT16_C( 5714), UINT16_C( 3677), UINT16_C(59671), UINT16_C( 3301), UINT16_C(55158), UINT16_C(40277), UINT16_C(56700), UINT16_C(53660), UINT16_C(10652), UINT16_C(15729), UINT16_C(43085), UINT16_C(30841), UINT16_C(30173), UINT16_C( 4935), UINT16_C(59382), UINT16_C(18442), UINT16_C(26878), UINT16_C( 5462), UINT16_C(15441), UINT16_C(50977), UINT16_C(30483), UINT16_C(36709), UINT16_C( 340), UINT16_C(61536), UINT16_C(53546), UINT16_C(30509), UINT16_C(42617), UINT16_C(22256), UINT16_C(14107) }, { UINT16_C(57753), UINT16_C(40229), UINT16_C(63392), UINT16_C(49198), UINT16_C(42060), UINT16_C(60324), UINT16_C( 9866), UINT16_C(62746), UINT16_C(40277), UINT16_C(56700), UINT16_C(53660), UINT16_C(40305), UINT16_C(36406), UINT16_C(57931), UINT16_C(38807), UINT16_C(30173), UINT16_C(49406), UINT16_C(59382), UINT16_C(53893), UINT16_C(45393), UINT16_C(26230), UINT16_C(15441), UINT16_C(39982), UINT16_C(36085), UINT16_C(34749), UINT16_C(42023), UINT16_C(59039), UINT16_C(53546), UINT16_C(41101), UINT16_C(24064), UINT16_C(38810), UINT16_C(14107) } }, { { INT16_C( 4457), INT16_C( 29726), INT16_C( 7257), -INT16_C( 20260), INT16_C( 11569), INT16_C( 21484), -INT16_C( 11), INT16_C( 23242), INT16_C( 7823), -INT16_C( 4261), -INT16_C( 31473), INT16_C( 15553), INT16_C( 15100), -INT16_C( 4893), -INT16_C( 367), -INT16_C( 1501), INT16_C( 16912), INT16_C( 26990), INT16_C( 19038), -INT16_C( 28647), INT16_C( 1400), INT16_C( 28131), -INT16_C( 21243), -INT16_C( 27449), INT16_C( 8907), -INT16_C( 9597), INT16_C( 17575), -INT16_C( 23785), -INT16_C( 1409), INT16_C( 4240), -INT16_C( 19464), INT16_C( 2058) }, UINT32_C(1416788469), { UINT16_C(51089), UINT16_C(38568), UINT16_C(28532), UINT16_C(16170), UINT16_C(44433), UINT16_C(14362), UINT16_C(12786), UINT16_C(29148), UINT16_C(27691), UINT16_C( 9089), UINT16_C(35615), UINT16_C( 5420), UINT16_C(40452), UINT16_C(51305), UINT16_C(19753), UINT16_C(47619), UINT16_C(44052), UINT16_C(34896), UINT16_C(31259), UINT16_C(44487), UINT16_C(57640), UINT16_C( 6885), UINT16_C(49426), UINT16_C(15755), UINT16_C( 3117), UINT16_C(19809), UINT16_C(36247), UINT16_C(40034), UINT16_C(52011), UINT16_C(21604), UINT16_C(26392), UINT16_C(11279) }, { UINT16_C(24339), UINT16_C(12212), UINT16_C(31706), UINT16_C( 732), UINT16_C(49501), UINT16_C(28444), UINT16_C(42883), UINT16_C(45229), UINT16_C( 3763), UINT16_C(19197), UINT16_C(24475), UINT16_C(50918), UINT16_C(18986), UINT16_C(16922), UINT16_C(10674), UINT16_C(50542), UINT16_C( 8841), UINT16_C(25588), UINT16_C(53406), UINT16_C(64357), UINT16_C(33170), UINT16_C( 5482), UINT16_C( 5928), UINT16_C(56261), UINT16_C(49957), UINT16_C(49189), UINT16_C( 3106), UINT16_C(19846), UINT16_C(41302), UINT16_C( 2191), UINT16_C(65226), UINT16_C(21454) }, { UINT16_C(51089), UINT16_C(29726), UINT16_C(31706), UINT16_C(45276), UINT16_C(49501), UINT16_C(28444), UINT16_C(42883), UINT16_C(45229), UINT16_C(27691), UINT16_C(61275), UINT16_C(34063), UINT16_C(50918), UINT16_C(40452), UINT16_C(51305), UINT16_C(19753), UINT16_C(64035), UINT16_C(16912), UINT16_C(34896), UINT16_C(19038), UINT16_C(36889), UINT16_C(57640), UINT16_C( 6885), UINT16_C(49426), UINT16_C(38087), UINT16_C( 8907), UINT16_C(55939), UINT16_C(36247), UINT16_C(41751), UINT16_C(52011), UINT16_C( 4240), UINT16_C(65226), UINT16_C( 2058) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi16(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_mask_max_epu16(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_u16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_max_epu16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask32 k; const uint16_t a[32]; const uint16_t b[32]; const uint16_t r[32]; } test_vec[] = { { UINT32_C(2682918652), { UINT16_C( 3360), UINT16_C(28585), UINT16_C(34194), UINT16_C(19929), UINT16_C(64935), UINT16_C(52907), UINT16_C( 3569), UINT16_C(10171), UINT16_C( 8743), UINT16_C(32403), UINT16_C(16511), UINT16_C(31580), UINT16_C(18006), UINT16_C( 5403), UINT16_C( 9109), UINT16_C(46787), UINT16_C(27697), UINT16_C(49957), UINT16_C(65265), UINT16_C(38928), UINT16_C(48380), UINT16_C(60774), UINT16_C( 8649), UINT16_C(61460), UINT16_C(42820), UINT16_C(50031), UINT16_C(52200), UINT16_C(15934), UINT16_C(22801), UINT16_C(42580), UINT16_C( 6013), UINT16_C(44636) }, { UINT16_C(33155), UINT16_C(29809), UINT16_C(33408), UINT16_C(31756), UINT16_C(29246), UINT16_C( 1897), UINT16_C(32147), UINT16_C(55287), UINT16_C(26148), UINT16_C( 3226), UINT16_C(55601), UINT16_C(16971), UINT16_C(40754), UINT16_C(45033), UINT16_C(17846), UINT16_C(14685), UINT16_C(53191), UINT16_C(18349), UINT16_C(47441), UINT16_C(36803), UINT16_C(11307), UINT16_C(48790), UINT16_C(36265), UINT16_C(52630), UINT16_C(12532), UINT16_C( 9690), UINT16_C( 9481), UINT16_C(15464), UINT16_C(20932), UINT16_C(31467), UINT16_C(18838), UINT16_C(23987) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C(34194), UINT16_C(31756), UINT16_C(64935), UINT16_C(52907), UINT16_C(32147), UINT16_C(55287), UINT16_C( 0), UINT16_C(32403), UINT16_C(55601), UINT16_C( 0), UINT16_C(40754), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(49957), UINT16_C( 0), UINT16_C(38928), UINT16_C( 0), UINT16_C(60774), UINT16_C(36265), UINT16_C(61460), UINT16_C(42820), UINT16_C(50031), UINT16_C(52200), UINT16_C(15934), UINT16_C(22801), UINT16_C( 0), UINT16_C( 0), UINT16_C(44636) } }, { UINT32_C(1772380184), { UINT16_C(36499), UINT16_C(15362), UINT16_C(38939), UINT16_C( 3850), UINT16_C(58569), UINT16_C(53813), UINT16_C(40201), UINT16_C(52494), UINT16_C(64238), UINT16_C(33863), UINT16_C(64067), UINT16_C(23522), UINT16_C(34394), UINT16_C(29636), UINT16_C(48366), UINT16_C(33207), UINT16_C(47434), UINT16_C(26046), UINT16_C(51282), UINT16_C( 7029), UINT16_C(43692), UINT16_C(46573), UINT16_C(64583), UINT16_C(13698), UINT16_C(51702), UINT16_C(14777), UINT16_C(39875), UINT16_C( 7572), UINT16_C(22562), UINT16_C( 4240), UINT16_C(18196), UINT16_C(24209) }, { UINT16_C(20224), UINT16_C(21187), UINT16_C(14359), UINT16_C(50029), UINT16_C(23522), UINT16_C(10616), UINT16_C(64087), UINT16_C(19806), UINT16_C( 6339), UINT16_C(34438), UINT16_C( 6835), UINT16_C(54691), UINT16_C(13170), UINT16_C(34533), UINT16_C(30586), UINT16_C(31716), UINT16_C(42950), UINT16_C(57037), UINT16_C(15328), UINT16_C(49825), UINT16_C( 6806), UINT16_C(60908), UINT16_C(18964), UINT16_C(55354), UINT16_C(49250), UINT16_C( 5726), UINT16_C( 730), UINT16_C(19691), UINT16_C(53557), UINT16_C(45266), UINT16_C(46664), UINT16_C( 3627) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(50029), UINT16_C(58569), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(34533), UINT16_C(48366), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(51282), UINT16_C( 0), UINT16_C( 0), UINT16_C(60908), UINT16_C( 0), UINT16_C(55354), UINT16_C(51702), UINT16_C( 0), UINT16_C( 0), UINT16_C(19691), UINT16_C( 0), UINT16_C(45266), UINT16_C(46664), UINT16_C( 0) } }, { UINT32_C(1038940253), { UINT16_C(60584), UINT16_C(48310), UINT16_C(61494), UINT16_C(39316), UINT16_C(62384), UINT16_C(35503), UINT16_C(39669), UINT16_C(10966), UINT16_C(43115), UINT16_C(46042), UINT16_C( 1374), UINT16_C(48322), UINT16_C(44798), UINT16_C(12793), UINT16_C(63804), UINT16_C(58619), UINT16_C(45541), UINT16_C( 7329), UINT16_C(13730), UINT16_C(21173), UINT16_C(25640), UINT16_C( 7645), UINT16_C(46078), UINT16_C(27208), UINT16_C( 8796), UINT16_C(47645), UINT16_C(57128), UINT16_C( 9846), UINT16_C(28814), UINT16_C(51799), UINT16_C(21097), UINT16_C(20399) }, { UINT16_C(20484), UINT16_C(42603), UINT16_C( 8325), UINT16_C(44792), UINT16_C(54660), UINT16_C(33483), UINT16_C( 5001), UINT16_C(58860), UINT16_C( 2614), UINT16_C(24223), UINT16_C( 5865), UINT16_C(30596), UINT16_C(56198), UINT16_C(61250), UINT16_C(61742), UINT16_C(12862), UINT16_C(43329), UINT16_C(50904), UINT16_C(53449), UINT16_C(19828), UINT16_C(16550), UINT16_C(12240), UINT16_C(48211), UINT16_C(35092), UINT16_C(46022), UINT16_C(45287), UINT16_C(27593), UINT16_C(20263), UINT16_C(26951), UINT16_C(30015), UINT16_C(32090), UINT16_C(39847) }, { UINT16_C(60584), UINT16_C( 0), UINT16_C(61494), UINT16_C(44792), UINT16_C(62384), UINT16_C( 0), UINT16_C(39669), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(48322), UINT16_C(56198), UINT16_C(61250), UINT16_C(63804), UINT16_C(58619), UINT16_C( 0), UINT16_C( 0), UINT16_C(53449), UINT16_C(21173), UINT16_C( 0), UINT16_C(12240), UINT16_C(48211), UINT16_C(35092), UINT16_C(46022), UINT16_C( 0), UINT16_C(57128), UINT16_C(20263), UINT16_C(28814), UINT16_C(51799), UINT16_C( 0), UINT16_C( 0) } }, { UINT32_C(4032986919), { UINT16_C( 3606), UINT16_C(27172), UINT16_C(14538), UINT16_C(37363), UINT16_C(56300), UINT16_C(46401), UINT16_C(26694), UINT16_C(36101), UINT16_C(17618), UINT16_C(11266), UINT16_C(43457), UINT16_C(59592), UINT16_C(10792), UINT16_C(30937), UINT16_C( 5888), UINT16_C( 5997), UINT16_C(37413), UINT16_C(61313), UINT16_C(29898), UINT16_C(46720), UINT16_C(49487), UINT16_C(38508), UINT16_C(28970), UINT16_C(64547), UINT16_C( 9909), UINT16_C(30248), UINT16_C(61647), UINT16_C(63583), UINT16_C(14362), UINT16_C( 7024), UINT16_C(56655), UINT16_C(29746) }, { UINT16_C(45935), UINT16_C(14947), UINT16_C(58407), UINT16_C(30704), UINT16_C(23717), UINT16_C(53005), UINT16_C(12493), UINT16_C(33483), UINT16_C(62550), UINT16_C( 9977), UINT16_C(22756), UINT16_C(65310), UINT16_C(36496), UINT16_C(57114), UINT16_C(19563), UINT16_C(56147), UINT16_C(46847), UINT16_C( 9749), UINT16_C( 1434), UINT16_C(16541), UINT16_C(43618), UINT16_C(12047), UINT16_C(56283), UINT16_C(12722), UINT16_C(43983), UINT16_C(45911), UINT16_C(29955), UINT16_C(37810), UINT16_C(52227), UINT16_C(28530), UINT16_C(50456), UINT16_C( 5962) }, { UINT16_C(45935), UINT16_C(27172), UINT16_C(58407), UINT16_C( 0), UINT16_C( 0), UINT16_C(53005), UINT16_C( 0), UINT16_C( 0), UINT16_C(62550), UINT16_C(11266), UINT16_C(43457), UINT16_C(65310), UINT16_C(36496), UINT16_C(57114), UINT16_C(19563), UINT16_C( 0), UINT16_C( 0), UINT16_C(61313), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(38508), UINT16_C(56283), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(52227), UINT16_C(28530), UINT16_C(56655), UINT16_C(29746) } }, { UINT32_C( 373186427), { UINT16_C(25990), UINT16_C(25078), UINT16_C(43072), UINT16_C( 3986), UINT16_C(59987), UINT16_C(22211), UINT16_C(30047), UINT16_C(25577), UINT16_C(23362), UINT16_C(23250), UINT16_C( 7200), UINT16_C(39794), UINT16_C(45179), UINT16_C(57265), UINT16_C( 1931), UINT16_C( 4518), UINT16_C(40045), UINT16_C(44402), UINT16_C( 1348), UINT16_C(38845), UINT16_C(33007), UINT16_C(20205), UINT16_C(55029), UINT16_C(14257), UINT16_C(33585), UINT16_C(20882), UINT16_C( 1183), UINT16_C( 6892), UINT16_C(40628), UINT16_C(16378), UINT16_C(41125), UINT16_C( 4689) }, { UINT16_C(49980), UINT16_C(32960), UINT16_C(32200), UINT16_C(46871), UINT16_C( 1277), UINT16_C(61958), UINT16_C(47066), UINT16_C( 2858), UINT16_C(48187), UINT16_C(55900), UINT16_C(18624), UINT16_C(29941), UINT16_C(61414), UINT16_C(36019), UINT16_C( 1167), UINT16_C(52126), UINT16_C(24264), UINT16_C(36939), UINT16_C(25307), UINT16_C(55368), UINT16_C(20070), UINT16_C(16587), UINT16_C(62725), UINT16_C(16459), UINT16_C(42929), UINT16_C(28955), UINT16_C( 4335), UINT16_C(55013), UINT16_C(39167), UINT16_C(36450), UINT16_C( 157), UINT16_C(25945) }, { UINT16_C(49980), UINT16_C(32960), UINT16_C( 0), UINT16_C(46871), UINT16_C(59987), UINT16_C(61958), UINT16_C(47066), UINT16_C( 0), UINT16_C(48187), UINT16_C(55900), UINT16_C(18624), UINT16_C(39794), UINT16_C(61414), UINT16_C( 0), UINT16_C( 1931), UINT16_C( 0), UINT16_C( 0), UINT16_C(44402), UINT16_C(25307), UINT16_C(55368), UINT16_C(33007), UINT16_C(20205), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(28955), UINT16_C( 4335), UINT16_C( 0), UINT16_C(40628), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { UINT32_C( 989176927), { UINT16_C(56971), UINT16_C(37292), UINT16_C(63443), UINT16_C(34001), UINT16_C(60574), UINT16_C(36341), UINT16_C(56060), UINT16_C(64355), UINT16_C(50546), UINT16_C( 3977), UINT16_C(58054), UINT16_C( 9588), UINT16_C(27270), UINT16_C(35935), UINT16_C(29351), UINT16_C(13304), UINT16_C(42064), UINT16_C( 9156), UINT16_C(38299), UINT16_C(14759), UINT16_C(40066), UINT16_C(32455), UINT16_C(10870), UINT16_C(59770), UINT16_C( 1008), UINT16_C(46840), UINT16_C(28134), UINT16_C(27867), UINT16_C(15063), UINT16_C(32505), UINT16_C(61869), UINT16_C(64945) }, { UINT16_C(30102), UINT16_C(12577), UINT16_C(51211), UINT16_C(36203), UINT16_C(12901), UINT16_C(56075), UINT16_C(34140), UINT16_C(19652), UINT16_C(48521), UINT16_C(28418), UINT16_C(56618), UINT16_C( 475), UINT16_C(54296), UINT16_C(50559), UINT16_C(12742), UINT16_C(23746), UINT16_C(58278), UINT16_C(45453), UINT16_C(63660), UINT16_C( 4414), UINT16_C(18986), UINT16_C(34796), UINT16_C(45519), UINT16_C(22739), UINT16_C(54894), UINT16_C(39111), UINT16_C(41907), UINT16_C(52121), UINT16_C( 6263), UINT16_C(15760), UINT16_C(21321), UINT16_C(61593) }, { UINT16_C(56971), UINT16_C(37292), UINT16_C(63443), UINT16_C(36203), UINT16_C(60574), UINT16_C( 0), UINT16_C(56060), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(58054), UINT16_C( 0), UINT16_C( 0), UINT16_C(50559), UINT16_C( 0), UINT16_C(23746), UINT16_C(58278), UINT16_C( 0), UINT16_C(63660), UINT16_C( 0), UINT16_C(40066), UINT16_C(34796), UINT16_C(45519), UINT16_C(59770), UINT16_C( 0), UINT16_C(46840), UINT16_C( 0), UINT16_C(52121), UINT16_C(15063), UINT16_C(32505), UINT16_C( 0), UINT16_C( 0) } }, { UINT32_C(3802212150), { UINT16_C(57386), UINT16_C(63953), UINT16_C(42129), UINT16_C(65362), UINT16_C( 6522), UINT16_C(11927), UINT16_C(12476), UINT16_C(13561), UINT16_C(35400), UINT16_C(37489), UINT16_C( 3037), UINT16_C( 4994), UINT16_C( 9010), UINT16_C(20982), UINT16_C(59651), UINT16_C(11675), UINT16_C(27849), UINT16_C(23079), UINT16_C(30993), UINT16_C(35673), UINT16_C(61586), UINT16_C(20409), UINT16_C(45856), UINT16_C(27011), UINT16_C(62525), UINT16_C( 6907), UINT16_C(32255), UINT16_C(12589), UINT16_C( 9120), UINT16_C(42115), UINT16_C( 7693), UINT16_C(54993) }, { UINT16_C(63627), UINT16_C(39985), UINT16_C(35441), UINT16_C( 1063), UINT16_C(57723), UINT16_C(39763), UINT16_C(54932), UINT16_C(53508), UINT16_C(65482), UINT16_C(51947), UINT16_C( 6268), UINT16_C( 7675), UINT16_C(32316), UINT16_C(18881), UINT16_C(37533), UINT16_C(10271), UINT16_C(20619), UINT16_C(64708), UINT16_C(60379), UINT16_C(22016), UINT16_C(21452), UINT16_C(24817), UINT16_C(63017), UINT16_C(62513), UINT16_C( 7413), UINT16_C(29374), UINT16_C(47413), UINT16_C(29071), UINT16_C(20536), UINT16_C(54714), UINT16_C(55778), UINT16_C(28157) }, { UINT16_C( 0), UINT16_C(63953), UINT16_C(42129), UINT16_C( 0), UINT16_C(57723), UINT16_C(39763), UINT16_C( 0), UINT16_C( 0), UINT16_C(65482), UINT16_C(51947), UINT16_C( 6268), UINT16_C( 0), UINT16_C( 0), UINT16_C(20982), UINT16_C( 0), UINT16_C( 0), UINT16_C(27849), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(24817), UINT16_C( 0), UINT16_C(62513), UINT16_C( 0), UINT16_C(29374), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(54714), UINT16_C(55778), UINT16_C(54993) } }, { UINT32_C( 90882346), { UINT16_C(19646), UINT16_C(59353), UINT16_C( 2882), UINT16_C(14555), UINT16_C(39207), UINT16_C(23722), UINT16_C(14675), UINT16_C(35789), UINT16_C(34697), UINT16_C(27488), UINT16_C(23905), UINT16_C(35801), UINT16_C(17182), UINT16_C(51856), UINT16_C(60333), UINT16_C(27459), UINT16_C( 7479), UINT16_C(31315), UINT16_C(11816), UINT16_C(20402), UINT16_C(23752), UINT16_C( 7084), UINT16_C(31125), UINT16_C( 7846), UINT16_C( 1537), UINT16_C(25225), UINT16_C(25187), UINT16_C(33261), UINT16_C(32165), UINT16_C(21323), UINT16_C(36712), UINT16_C(40894) }, { UINT16_C( 4524), UINT16_C(54297), UINT16_C(52032), UINT16_C( 2083), UINT16_C(53031), UINT16_C(48163), UINT16_C(51529), UINT16_C(19162), UINT16_C(25807), UINT16_C(12972), UINT16_C(39366), UINT16_C(27827), UINT16_C(65046), UINT16_C(32447), UINT16_C(32141), UINT16_C(14621), UINT16_C(14223), UINT16_C(53005), UINT16_C(12546), UINT16_C(10967), UINT16_C(64000), UINT16_C(18918), UINT16_C(49603), UINT16_C(37523), UINT16_C(16165), UINT16_C(60356), UINT16_C(30680), UINT16_C(61015), UINT16_C( 5749), UINT16_C( 876), UINT16_C(35476), UINT16_C( 9020) }, { UINT16_C( 0), UINT16_C(59353), UINT16_C( 0), UINT16_C(14555), UINT16_C( 0), UINT16_C(48163), UINT16_C( 0), UINT16_C( 0), UINT16_C(34697), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(60333), UINT16_C(27459), UINT16_C( 0), UINT16_C(53005), UINT16_C( 0), UINT16_C(20402), UINT16_C( 0), UINT16_C(18918), UINT16_C(49603), UINT16_C( 0), UINT16_C(16165), UINT16_C( 0), UINT16_C(30680), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_maskz_max_epu16(test_vec[i].k, a, b); simde_test_x86_assert_equal_u16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_max_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[16]; const int32_t b[16]; const int32_t r[16]; } test_vec[] = { { { -INT32_C( 58487515), INT32_C( 1267763067), -INT32_C( 198522948), INT32_C( 1686761573), -INT32_C( 964970336), -INT32_C( 792797143), -INT32_C( 195002861), -INT32_C( 1960123547), -INT32_C( 343429776), -INT32_C( 248047820), INT32_C( 1155883487), -INT32_C( 240554159), INT32_C( 1354179623), INT32_C( 404780548), INT32_C( 1510768884), -INT32_C( 723175836) }, { INT32_C( 415263972), INT32_C( 1661597572), -INT32_C( 1448612008), -INT32_C( 2053418914), INT32_C( 2044023412), -INT32_C( 1114507576), -INT32_C( 635986570), -INT32_C( 1213203245), -INT32_C( 321884312), -INT32_C( 1102063258), INT32_C( 644347848), -INT32_C( 1129643449), INT32_C( 490045781), -INT32_C( 304429449), INT32_C( 919138659), INT32_C( 1458468845) }, { INT32_C( 415263972), INT32_C( 1661597572), -INT32_C( 198522948), INT32_C( 1686761573), INT32_C( 2044023412), -INT32_C( 792797143), -INT32_C( 195002861), -INT32_C( 1213203245), -INT32_C( 321884312), -INT32_C( 248047820), INT32_C( 1155883487), -INT32_C( 240554159), INT32_C( 1354179623), INT32_C( 404780548), INT32_C( 1510768884), INT32_C( 1458468845) } }, { { INT32_C( 1279442662), INT32_C( 1611305623), -INT32_C( 796495479), -INT32_C( 913558924), INT32_C( 719765939), -INT32_C( 367541881), -INT32_C( 1608392782), INT32_C( 1022758742), INT32_C( 1686649037), INT32_C( 1405391562), INT32_C( 2015644420), INT32_C( 809611389), -INT32_C( 111532174), -INT32_C( 1679527448), -INT32_C( 1489304239), -INT32_C( 505138924) }, { INT32_C( 876964969), INT32_C( 59181823), -INT32_C( 763647147), -INT32_C( 838681508), -INT32_C( 859349789), INT32_C( 510110669), -INT32_C( 993615184), INT32_C( 1017490131), INT32_C( 359721750), INT32_C( 1243150581), -INT32_C( 14904413), INT32_C( 869080655), INT32_C( 1207932282), -INT32_C( 244947392), -INT32_C( 608883704), -INT32_C( 334013482) }, { INT32_C( 1279442662), INT32_C( 1611305623), -INT32_C( 763647147), -INT32_C( 838681508), INT32_C( 719765939), INT32_C( 510110669), -INT32_C( 993615184), INT32_C( 1022758742), INT32_C( 1686649037), INT32_C( 1405391562), INT32_C( 2015644420), INT32_C( 869080655), INT32_C( 1207932282), -INT32_C( 244947392), -INT32_C( 608883704), -INT32_C( 334013482) } }, { { INT32_C( 990021702), INT32_C( 595925632), -INT32_C( 47996498), INT32_C( 959508671), -INT32_C( 964677755), -INT32_C( 1648892267), -INT32_C( 394761198), INT32_C( 232100039), -INT32_C( 1740056808), -INT32_C( 1615081999), INT32_C( 765320814), INT32_C( 1416023503), -INT32_C( 1843730435), -INT32_C( 533671475), INT32_C( 97036350), INT32_C( 1343462712) }, { INT32_C( 1726503796), -INT32_C( 1761237975), INT32_C( 1371906690), INT32_C( 1839606640), -INT32_C( 520110062), -INT32_C( 792711278), INT32_C( 282429656), -INT32_C( 1704859610), INT32_C( 1828735300), INT32_C( 1879312109), INT32_C( 415353256), INT32_C( 126183413), -INT32_C( 1159232216), -INT32_C( 1937070156), INT32_C( 1453154096), -INT32_C( 1930363320) }, { INT32_C( 1726503796), INT32_C( 595925632), INT32_C( 1371906690), INT32_C( 1839606640), -INT32_C( 520110062), -INT32_C( 792711278), INT32_C( 282429656), INT32_C( 232100039), INT32_C( 1828735300), INT32_C( 1879312109), INT32_C( 765320814), INT32_C( 1416023503), -INT32_C( 1159232216), -INT32_C( 533671475), INT32_C( 1453154096), INT32_C( 1343462712) } }, { { INT32_C( 905572679), -INT32_C( 1616511497), -INT32_C( 1128765753), -INT32_C( 154976818), INT32_C( 2008067010), -INT32_C( 2113717678), -INT32_C( 505896807), -INT32_C( 429012578), -INT32_C( 1323604294), INT32_C( 726712420), -INT32_C( 186185690), INT32_C( 149596742), -INT32_C( 1468032427), INT32_C( 1848280020), -INT32_C( 1035009245), -INT32_C( 2035761716) }, { -INT32_C( 2026388701), -INT32_C( 1447917693), -INT32_C( 694249072), -INT32_C( 1713469372), -INT32_C( 146711005), INT32_C( 73755873), INT32_C( 1002878319), -INT32_C( 1782485390), -INT32_C( 1273104335), INT32_C( 257871743), -INT32_C( 1377436567), -INT32_C( 1488534396), INT32_C( 60786722), INT32_C( 1661404404), INT32_C( 731827897), INT32_C( 1858166588) }, { INT32_C( 905572679), -INT32_C( 1447917693), -INT32_C( 694249072), -INT32_C( 154976818), INT32_C( 2008067010), INT32_C( 73755873), INT32_C( 1002878319), -INT32_C( 429012578), -INT32_C( 1273104335), INT32_C( 726712420), -INT32_C( 186185690), INT32_C( 149596742), INT32_C( 60786722), INT32_C( 1848280020), INT32_C( 731827897), INT32_C( 1858166588) } }, { { -INT32_C( 702357929), INT32_C( 384204973), INT32_C( 29608828), -INT32_C( 1314387313), -INT32_C( 2035005550), INT32_C( 99204172), -INT32_C( 969832566), INT32_C( 1026880230), INT32_C( 2098419664), INT32_C( 1419049431), INT32_C( 1414879173), -INT32_C( 217645727), -INT32_C( 1854293435), INT32_C( 9855606), -INT32_C( 808990743), -INT32_C( 1995637831) }, { INT32_C( 705110098), -INT32_C( 562128103), INT32_C( 1412682738), INT32_C( 356989392), INT32_C( 1789313523), INT32_C( 225066275), -INT32_C( 1092865788), INT32_C( 2135419181), INT32_C( 581520905), -INT32_C( 603904023), -INT32_C( 886033158), INT32_C( 1625323373), INT32_C( 1556776760), -INT32_C( 932629052), -INT32_C( 1819916954), INT32_C( 924044846) }, { INT32_C( 705110098), INT32_C( 384204973), INT32_C( 1412682738), INT32_C( 356989392), INT32_C( 1789313523), INT32_C( 225066275), -INT32_C( 969832566), INT32_C( 2135419181), INT32_C( 2098419664), INT32_C( 1419049431), INT32_C( 1414879173), INT32_C( 1625323373), INT32_C( 1556776760), INT32_C( 9855606), -INT32_C( 808990743), INT32_C( 924044846) } }, { { INT32_C( 106609692), -INT32_C( 555590684), -INT32_C( 56028529), -INT32_C( 1034122615), -INT32_C( 719444207), -INT32_C( 1029863588), -INT32_C( 78240564), INT32_C( 238184946), INT32_C( 152341541), INT32_C( 1994979047), -INT32_C( 1837985528), INT32_C( 743755547), INT32_C( 1375826678), -INT32_C( 988504071), -INT32_C( 1245680957), -INT32_C( 104598573) }, { INT32_C( 1728239743), -INT32_C( 673322290), -INT32_C( 1754705796), INT32_C( 365215007), INT32_C( 677889327), INT32_C( 669875044), -INT32_C( 1176719642), INT32_C( 548577441), INT32_C( 1183298936), INT32_C( 454911391), -INT32_C( 726432075), INT32_C( 1927903043), -INT32_C( 1583722436), -INT32_C( 1312257845), -INT32_C( 680811210), -INT32_C( 1107878587) }, { INT32_C( 1728239743), -INT32_C( 555590684), -INT32_C( 56028529), INT32_C( 365215007), INT32_C( 677889327), INT32_C( 669875044), -INT32_C( 78240564), INT32_C( 548577441), INT32_C( 1183298936), INT32_C( 1994979047), -INT32_C( 726432075), INT32_C( 1927903043), INT32_C( 1375826678), -INT32_C( 988504071), -INT32_C( 680811210), -INT32_C( 104598573) } }, { { INT32_C( 1912831954), -INT32_C( 1718803996), -INT32_C( 345161561), -INT32_C( 195209545), INT32_C( 1905653926), -INT32_C( 1239196288), INT32_C( 1200459266), INT32_C( 2114225323), -INT32_C( 403699709), -INT32_C( 796885719), INT32_C( 1975250366), -INT32_C( 378988221), -INT32_C( 1856242159), INT32_C( 1581743708), -INT32_C( 1213803508), INT32_C( 1547020888) }, { -INT32_C( 616356430), INT32_C( 1638712483), -INT32_C( 170498127), -INT32_C( 1847705472), -INT32_C( 1709033154), -INT32_C( 1007064649), -INT32_C( 1770283203), -INT32_C( 51204023), INT32_C( 2044147158), -INT32_C( 1411742727), INT32_C( 1805693163), INT32_C( 805142256), INT32_C( 1875451832), -INT32_C( 969686391), -INT32_C( 1419989407), INT32_C( 883379806) }, { INT32_C( 1912831954), INT32_C( 1638712483), -INT32_C( 170498127), -INT32_C( 195209545), INT32_C( 1905653926), -INT32_C( 1007064649), INT32_C( 1200459266), INT32_C( 2114225323), INT32_C( 2044147158), -INT32_C( 796885719), INT32_C( 1975250366), INT32_C( 805142256), INT32_C( 1875451832), INT32_C( 1581743708), -INT32_C( 1213803508), INT32_C( 1547020888) } }, { { INT32_C( 2108522116), -INT32_C( 316111102), INT32_C( 676907064), -INT32_C( 11053753), -INT32_C( 26336907), INT32_C( 1170514403), -INT32_C( 1359994545), -INT32_C( 203253905), INT32_C( 393318421), INT32_C( 1325701399), -INT32_C( 1451729566), INT32_C( 665374642), -INT32_C( 735766800), INT32_C( 119139000), INT32_C( 2058684683), -INT32_C( 1251043168) }, { INT32_C( 1070456616), -INT32_C( 628108936), -INT32_C( 511506642), -INT32_C( 955765802), -INT32_C( 90493374), INT32_C( 587314200), INT32_C( 1570617277), INT32_C( 1997671247), INT32_C( 1672929258), -INT32_C( 549632591), INT32_C( 599834956), INT32_C( 787139052), INT32_C( 254313975), -INT32_C( 164484551), INT32_C( 810799073), -INT32_C( 978885157) }, { INT32_C( 2108522116), -INT32_C( 316111102), INT32_C( 676907064), -INT32_C( 11053753), -INT32_C( 26336907), INT32_C( 1170514403), INT32_C( 1570617277), INT32_C( 1997671247), INT32_C( 1672929258), INT32_C( 1325701399), INT32_C( 599834956), INT32_C( 787139052), INT32_C( 254313975), INT32_C( 119139000), INT32_C( 2058684683), -INT32_C( 978885157) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_max_epi32(a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_max_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t src[16]; const simde__mmask16 k; const int32_t a[16]; const int32_t b[16]; const int32_t r[16]; } test_vec[] = { { { -INT32_C( 1607096062), -INT32_C( 163478492), INT32_C( 1326370578), -INT32_C( 1482691356), INT32_C( 36315194), INT32_C( 459690145), INT32_C( 441074651), INT32_C( 373429012), -INT32_C( 524912708), INT32_C( 249035003), -INT32_C( 1403132473), INT32_C( 206830802), -INT32_C( 1123123940), -INT32_C( 1462143795), -INT32_C( 876469321), -INT32_C( 203357130) }, UINT16_C(39035), { -INT32_C( 1433372717), -INT32_C( 493856891), INT32_C( 1457480195), INT32_C( 2111044462), -INT32_C( 1829658439), INT32_C( 196454728), INT32_C( 1443884148), -INT32_C( 1292989473), -INT32_C( 2023916030), -INT32_C( 647369259), -INT32_C( 1120974769), -INT32_C( 46529724), -INT32_C( 326161245), INT32_C( 1240941781), INT32_C( 262080048), -INT32_C( 1815966319) }, { -INT32_C( 518381813), INT32_C( 1538950156), INT32_C( 270068172), -INT32_C( 1878174740), INT32_C( 1417452671), INT32_C( 295531489), INT32_C( 186727801), -INT32_C( 694230070), INT32_C( 246921474), INT32_C( 157905213), INT32_C( 1192919386), INT32_C( 1406609364), -INT32_C( 1515760700), INT32_C( 1068910022), INT32_C( 1279973250), -INT32_C( 1138562630) }, { -INT32_C( 518381813), INT32_C( 1538950156), INT32_C( 1326370578), INT32_C( 2111044462), INT32_C( 1417452671), INT32_C( 295531489), INT32_C( 1443884148), INT32_C( 373429012), -INT32_C( 524912708), INT32_C( 249035003), -INT32_C( 1403132473), INT32_C( 1406609364), -INT32_C( 326161245), -INT32_C( 1462143795), -INT32_C( 876469321), -INT32_C( 1138562630) } }, { { -INT32_C( 540354142), -INT32_C( 1511509174), -INT32_C( 1981021515), -INT32_C( 287456470), -INT32_C( 594312170), INT32_C( 1260079561), -INT32_C( 610769375), -INT32_C( 241649073), -INT32_C( 556768620), INT32_C( 1266923670), -INT32_C( 439062597), INT32_C( 1221833010), -INT32_C( 14391754), -INT32_C( 800374609), -INT32_C( 173218906), INT32_C( 837174429) }, UINT16_C(46758), { -INT32_C( 1821492208), INT32_C( 1543711111), -INT32_C( 519228146), -INT32_C( 1572387970), -INT32_C( 1897728189), INT32_C( 1936820423), INT32_C( 1672941181), INT32_C( 1343839808), INT32_C( 585336986), -INT32_C( 1065425231), -INT32_C( 1700689124), INT32_C( 305981903), -INT32_C( 1482678304), INT32_C( 622530983), INT32_C( 1602802207), -INT32_C( 877682128) }, { -INT32_C( 605187286), -INT32_C( 1784976519), -INT32_C( 986759690), INT32_C( 114781222), INT32_C( 850229131), -INT32_C( 1470642040), -INT32_C( 888676197), -INT32_C( 1399408766), -INT32_C( 1014529206), -INT32_C( 447142930), -INT32_C( 2035644320), INT32_C( 2139914996), -INT32_C( 2102248967), -INT32_C( 1658189566), INT32_C( 1801990633), INT32_C( 840433640) }, { -INT32_C( 540354142), INT32_C( 1543711111), -INT32_C( 519228146), -INT32_C( 287456470), -INT32_C( 594312170), INT32_C( 1936820423), -INT32_C( 610769375), INT32_C( 1343839808), -INT32_C( 556768620), -INT32_C( 447142930), -INT32_C( 1700689124), INT32_C( 1221833010), -INT32_C( 1482678304), INT32_C( 622530983), -INT32_C( 173218906), INT32_C( 840433640) } }, { { INT32_C( 1911988098), INT32_C( 576081858), -INT32_C( 861404969), INT32_C( 2085303426), INT32_C( 1878981997), -INT32_C( 267638777), INT32_C( 1113355609), -INT32_C( 160140428), -INT32_C( 731420142), -INT32_C( 1846100551), INT32_C( 1079877310), INT32_C( 1086105810), -INT32_C( 1380992346), INT32_C( 1016970466), -INT32_C( 1518405327), INT32_C( 2140926573) }, UINT16_C( 861), { INT32_C( 1254102612), INT32_C( 82411175), INT32_C( 2074983359), -INT32_C( 1422502917), -INT32_C( 1654188032), -INT32_C( 761817004), -INT32_C( 658176963), -INT32_C( 673504637), -INT32_C( 534602696), -INT32_C( 639366374), -INT32_C( 1034579514), -INT32_C( 462582812), -INT32_C( 125643613), -INT32_C( 1446373012), -INT32_C( 1602121955), -INT32_C( 361210447) }, { INT32_C( 332045049), INT32_C( 1810738853), INT32_C( 606945856), INT32_C( 1879677645), -INT32_C( 630682770), -INT32_C( 1048366172), INT32_C( 1952515522), INT32_C( 1532942690), INT32_C( 409872499), INT32_C( 377773014), INT32_C( 1782296989), -INT32_C( 1160035252), INT32_C( 1939162063), INT32_C( 959715446), INT32_C( 2142082333), -INT32_C( 489026705) }, { INT32_C( 1254102612), INT32_C( 576081858), INT32_C( 2074983359), INT32_C( 1879677645), -INT32_C( 630682770), -INT32_C( 267638777), INT32_C( 1952515522), -INT32_C( 160140428), INT32_C( 409872499), INT32_C( 377773014), INT32_C( 1079877310), INT32_C( 1086105810), -INT32_C( 1380992346), INT32_C( 1016970466), -INT32_C( 1518405327), INT32_C( 2140926573) } }, { { INT32_C( 167463219), INT32_C( 1109426084), INT32_C( 2091670320), INT32_C( 1849132959), INT32_C( 1105317067), INT32_C( 41555428), INT32_C( 427894698), INT32_C( 1711037490), INT32_C( 1232074661), -INT32_C( 1500803210), -INT32_C( 1994180374), -INT32_C( 1963500865), INT32_C( 181196838), -INT32_C( 1760803091), -INT32_C( 1598976402), -INT32_C( 1895387670) }, UINT16_C(30116), { INT32_C( 1677990616), -INT32_C( 476254528), INT32_C( 1849514871), -INT32_C( 1304009754), INT32_C( 2063086446), INT32_C( 2064148170), INT32_C( 220787207), INT32_C( 1518521473), -INT32_C( 1480685850), -INT32_C( 343254412), -INT32_C( 1688614731), -INT32_C( 1722966229), -INT32_C( 1676392750), -INT32_C( 1290265428), -INT32_C( 1866448881), -INT32_C( 202751475) }, { INT32_C( 1016768712), -INT32_C( 1205394174), INT32_C( 408125677), -INT32_C( 239951585), -INT32_C( 1819359513), -INT32_C( 246962462), -INT32_C( 209582106), INT32_C( 317156426), INT32_C( 391086357), -INT32_C( 1815120218), INT32_C( 380380151), -INT32_C( 1425514812), INT32_C( 104764964), INT32_C( 586712380), -INT32_C( 686392691), -INT32_C( 68551194) }, { INT32_C( 167463219), INT32_C( 1109426084), INT32_C( 1849514871), INT32_C( 1849132959), INT32_C( 1105317067), INT32_C( 2064148170), INT32_C( 427894698), INT32_C( 1518521473), INT32_C( 391086357), -INT32_C( 1500803210), INT32_C( 380380151), -INT32_C( 1963500865), INT32_C( 104764964), INT32_C( 586712380), -INT32_C( 686392691), -INT32_C( 1895387670) } }, { { INT32_C( 622016638), -INT32_C( 1497832785), -INT32_C( 910400507), -INT32_C( 428555070), -INT32_C( 1762806950), -INT32_C( 977672904), INT32_C( 1167904607), INT32_C( 1245808332), INT32_C( 1836012734), INT32_C( 1007888438), INT32_C( 1325781132), -INT32_C( 281707884), INT32_C( 1703223853), INT32_C( 1714109959), -INT32_C( 642988275), INT32_C( 203746637) }, UINT16_C(37697), { -INT32_C( 1917094023), -INT32_C( 1185068877), -INT32_C( 869011049), INT32_C( 1726963936), -INT32_C( 257624379), INT32_C( 163099229), -INT32_C( 1342831221), -INT32_C( 1958529263), INT32_C( 1645805230), -INT32_C( 585403066), INT32_C( 1202343526), INT32_C( 1940756910), -INT32_C( 328969841), -INT32_C( 1879761917), INT32_C( 859761441), -INT32_C( 776044254) }, { -INT32_C( 969681280), -INT32_C( 1314632117), INT32_C( 1257787036), INT32_C( 1992140263), -INT32_C( 94166537), INT32_C( 1602836541), INT32_C( 1720895556), -INT32_C( 919121847), INT32_C( 1905289766), INT32_C( 1411527864), INT32_C( 1771969410), -INT32_C( 1210098496), -INT32_C( 1145945475), -INT32_C( 551928933), INT32_C( 1296411651), INT32_C( 571899388) }, { -INT32_C( 969681280), -INT32_C( 1497832785), -INT32_C( 910400507), -INT32_C( 428555070), -INT32_C( 1762806950), -INT32_C( 977672904), INT32_C( 1720895556), INT32_C( 1245808332), INT32_C( 1905289766), INT32_C( 1411527864), INT32_C( 1325781132), -INT32_C( 281707884), -INT32_C( 328969841), INT32_C( 1714109959), -INT32_C( 642988275), INT32_C( 571899388) } }, { { -INT32_C( 1600936217), INT32_C( 1559541210), -INT32_C( 1849322544), INT32_C( 1816700399), -INT32_C( 2111309081), INT32_C( 962674998), -INT32_C( 377051155), INT32_C( 185310500), INT32_C( 514563651), INT32_C( 612016212), INT32_C( 582303795), -INT32_C( 863043867), INT32_C( 776976120), -INT32_C( 446123785), INT32_C( 2077158999), -INT32_C( 813180277) }, UINT16_C(12920), { INT32_C( 1741868269), -INT32_C( 1499003407), -INT32_C( 1230730201), -INT32_C( 1469276839), INT32_C( 861430731), INT32_C( 388149320), -INT32_C( 1292784341), INT32_C( 1776642428), INT32_C( 668055350), -INT32_C( 456296259), INT32_C( 1587180037), -INT32_C( 637139441), -INT32_C( 1307681174), INT32_C( 986263566), INT32_C( 1525463773), INT32_C( 1522782500) }, { INT32_C( 1182897289), INT32_C( 304762381), -INT32_C( 814692928), INT32_C( 900363979), -INT32_C( 471287596), -INT32_C( 987909656), -INT32_C( 1877014164), INT32_C( 1693115355), -INT32_C( 2069206153), INT32_C( 2056705209), INT32_C( 1699284633), INT32_C( 1369109372), -INT32_C( 1825275221), -INT32_C( 1604759244), INT32_C( 892368986), -INT32_C( 744940965) }, { -INT32_C( 1600936217), INT32_C( 1559541210), -INT32_C( 1849322544), INT32_C( 900363979), INT32_C( 861430731), INT32_C( 388149320), -INT32_C( 1292784341), INT32_C( 185310500), INT32_C( 514563651), INT32_C( 2056705209), INT32_C( 582303795), -INT32_C( 863043867), -INT32_C( 1307681174), INT32_C( 986263566), INT32_C( 2077158999), -INT32_C( 813180277) } }, { { INT32_C( 1045906309), -INT32_C( 1313280488), INT32_C( 1897267956), -INT32_C( 1581075979), INT32_C( 1731524147), -INT32_C( 1593340601), INT32_C( 1641494278), -INT32_C( 701206447), -INT32_C( 871002956), INT32_C( 1853738362), -INT32_C( 975203121), INT32_C( 2019991877), -INT32_C( 555705705), INT32_C( 780199720), INT32_C( 1888442143), INT32_C( 2068300999) }, UINT16_C(23632), { -INT32_C( 987116985), INT32_C( 408549688), INT32_C( 616144574), -INT32_C( 155299562), -INT32_C( 1344346577), -INT32_C( 1543045868), -INT32_C( 1268199827), -INT32_C( 1861175223), -INT32_C( 1168754046), -INT32_C( 237850829), INT32_C( 1662356557), INT32_C( 207279069), -INT32_C( 826525510), -INT32_C( 1569537483), -INT32_C( 631776624), INT32_C( 342583186) }, { -INT32_C( 724581983), -INT32_C( 1111121552), INT32_C( 169925165), INT32_C( 1746369198), -INT32_C( 415837262), INT32_C( 646621589), INT32_C( 369156483), -INT32_C( 366318776), -INT32_C( 1665205972), -INT32_C( 933657445), INT32_C( 215185758), -INT32_C( 1502287116), INT32_C( 1385081789), -INT32_C( 679995308), INT32_C( 1106082041), INT32_C( 254482659) }, { INT32_C( 1045906309), -INT32_C( 1313280488), INT32_C( 1897267956), -INT32_C( 1581075979), -INT32_C( 415837262), -INT32_C( 1593340601), INT32_C( 369156483), -INT32_C( 701206447), -INT32_C( 871002956), INT32_C( 1853738362), INT32_C( 1662356557), INT32_C( 207279069), INT32_C( 1385081789), INT32_C( 780199720), INT32_C( 1106082041), INT32_C( 2068300999) } }, { { -INT32_C( 1398019567), -INT32_C( 864746386), INT32_C( 1926842494), -INT32_C( 283620046), INT32_C( 1279371000), -INT32_C( 1222329666), INT32_C( 385421618), INT32_C( 992289833), INT32_C( 2095567118), INT32_C( 1397316821), -INT32_C( 691723612), INT32_C( 1757797999), INT32_C( 1135871876), -INT32_C( 201664319), INT32_C( 319419370), INT32_C( 642658072) }, UINT16_C(13824), { -INT32_C( 325921373), -INT32_C( 301058263), -INT32_C( 741573363), -INT32_C( 1696968219), -INT32_C( 1905091692), -INT32_C( 1736287090), -INT32_C( 1094215056), INT32_C( 1676986304), -INT32_C( 984643684), -INT32_C( 877371970), -INT32_C( 1063354149), INT32_C( 1700427985), -INT32_C( 1561015021), -INT32_C( 1724221911), -INT32_C( 799538928), INT32_C( 1681149128) }, { -INT32_C( 1859484717), -INT32_C( 1134698783), INT32_C( 813497182), -INT32_C( 2020223116), -INT32_C( 852915804), INT32_C( 258434047), INT32_C( 752926564), -INT32_C( 544140277), INT32_C( 2020653975), -INT32_C( 147534439), INT32_C( 1026011593), INT32_C( 751091080), INT32_C( 1190784582), -INT32_C( 1235918767), INT32_C( 736245023), -INT32_C( 519408823) }, { -INT32_C( 1398019567), -INT32_C( 864746386), INT32_C( 1926842494), -INT32_C( 283620046), INT32_C( 1279371000), -INT32_C( 1222329666), INT32_C( 385421618), INT32_C( 992289833), INT32_C( 2095567118), -INT32_C( 147534439), INT32_C( 1026011593), INT32_C( 1757797999), INT32_C( 1190784582), -INT32_C( 1235918767), INT32_C( 319419370), INT32_C( 642658072) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi32(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_mask_max_epi32(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_max_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask16 k; const int32_t a[16]; const int32_t b[16]; const int32_t r[16]; } test_vec[] = { { UINT16_C(46951), { INT32_C( 1840390330), -INT32_C( 963235308), -INT32_C( 839300271), -INT32_C( 1164858157), -INT32_C( 133013581), -INT32_C( 1111663537), -INT32_C( 38173341), INT32_C( 1907695798), INT32_C( 1423861568), -INT32_C( 350521958), -INT32_C( 1816652608), INT32_C( 1934510528), -INT32_C( 143957848), INT32_C( 548677820), INT32_C( 1679650477), -INT32_C( 808070514) }, { -INT32_C( 735857862), -INT32_C( 390119896), INT32_C( 326924370), INT32_C( 1787218625), -INT32_C( 413011414), -INT32_C( 939059686), INT32_C( 304882820), INT32_C( 836829687), -INT32_C( 586873420), -INT32_C( 1765424061), -INT32_C( 22462148), INT32_C( 912797451), INT32_C( 1008584993), INT32_C( 1661215967), INT32_C( 1064710216), -INT32_C( 445622479) }, { INT32_C( 1840390330), -INT32_C( 390119896), INT32_C( 326924370), INT32_C( 0), INT32_C( 0), -INT32_C( 939059686), INT32_C( 304882820), INT32_C( 0), INT32_C( 1423861568), -INT32_C( 350521958), -INT32_C( 22462148), INT32_C( 0), INT32_C( 1008584993), INT32_C( 1661215967), INT32_C( 0), -INT32_C( 445622479) } }, { UINT16_C(30044), { -INT32_C( 2009423678), -INT32_C( 540445130), -INT32_C( 603007628), INT32_C( 681979915), INT32_C( 1884063084), INT32_C( 1604359401), INT32_C( 1152831956), INT32_C( 2042237878), -INT32_C( 385747789), -INT32_C( 540489110), -INT32_C( 1430530401), INT32_C( 1926390022), -INT32_C( 790487321), -INT32_C( 2026929485), INT32_C( 181134675), -INT32_C( 1417443848) }, { -INT32_C( 460028807), -INT32_C( 289186738), INT32_C( 966295091), -INT32_C( 945001504), INT32_C( 1016565385), INT32_C( 1690551825), -INT32_C( 1536258133), -INT32_C( 1907363564), -INT32_C( 999103371), INT32_C( 1941058880), -INT32_C( 1817359693), INT32_C( 1062885813), -INT32_C( 126094873), INT32_C( 1667055543), -INT32_C( 502805554), INT32_C( 846223037) }, { INT32_C( 0), INT32_C( 0), INT32_C( 966295091), INT32_C( 681979915), INT32_C( 1884063084), INT32_C( 0), INT32_C( 1152831956), INT32_C( 0), -INT32_C( 385747789), INT32_C( 0), -INT32_C( 1430530401), INT32_C( 0), -INT32_C( 126094873), INT32_C( 1667055543), INT32_C( 181134675), INT32_C( 0) } }, { UINT16_C(57914), { -INT32_C( 1474855946), -INT32_C( 1678521362), -INT32_C( 1175148450), INT32_C( 1672142055), INT32_C( 832725716), -INT32_C( 855805755), -INT32_C( 1021134254), -INT32_C( 475701780), -INT32_C( 963920424), -INT32_C( 429752696), INT32_C( 245323303), INT32_C( 124865074), INT32_C( 1899500460), -INT32_C( 700631677), -INT32_C( 593928209), -INT32_C( 1799405892) }, { -INT32_C( 2091169029), -INT32_C( 261440055), INT32_C( 1191053587), -INT32_C( 11702189), INT32_C( 124814723), -INT32_C( 1428312645), -INT32_C( 913934835), -INT32_C( 1335999052), INT32_C( 1496562064), -INT32_C( 1991664266), -INT32_C( 87079001), INT32_C( 1006247095), INT32_C( 1564633762), INT32_C( 621223704), INT32_C( 1240370837), INT32_C( 1677282515) }, { INT32_C( 0), -INT32_C( 261440055), INT32_C( 0), INT32_C( 1672142055), INT32_C( 832725716), -INT32_C( 855805755), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 429752696), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 621223704), INT32_C( 1240370837), INT32_C( 1677282515) } }, { UINT16_C(11525), { INT32_C( 97156028), -INT32_C( 733122299), INT32_C( 1727071340), -INT32_C( 2117037249), -INT32_C( 140449552), -INT32_C( 58378995), INT32_C( 2018007423), INT32_C( 2040876732), -INT32_C( 830574391), INT32_C( 1302580193), INT32_C( 263427280), INT32_C( 395412519), INT32_C( 2047750508), -INT32_C( 428436377), -INT32_C( 1453408531), -INT32_C( 702413812) }, { INT32_C( 1436852596), INT32_C( 1017333612), INT32_C( 38557403), INT32_C( 1192877530), INT32_C( 1975592974), -INT32_C( 1453639748), INT32_C( 38976245), INT32_C( 853046718), -INT32_C( 2105050090), -INT32_C( 1614861628), INT32_C( 1537346433), -INT32_C( 157107224), -INT32_C( 1620286493), -INT32_C( 1874278502), INT32_C( 1066572673), INT32_C( 644966928) }, { INT32_C( 1436852596), INT32_C( 0), INT32_C( 1727071340), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 830574391), INT32_C( 0), INT32_C( 1537346433), INT32_C( 395412519), INT32_C( 0), -INT32_C( 428436377), INT32_C( 0), INT32_C( 0) } }, { UINT16_C(63974), { INT32_C( 1730390696), -INT32_C( 328031158), -INT32_C( 1566090752), -INT32_C( 1123644591), INT32_C( 1938071594), -INT32_C( 1039268304), -INT32_C( 1221845435), -INT32_C( 324005052), INT32_C( 122999741), INT32_C( 2029241976), INT32_C( 1914346273), INT32_C( 1345265702), -INT32_C( 238832703), -INT32_C( 4927047), INT32_C( 867623151), -INT32_C( 1323276557) }, { -INT32_C( 1313311687), INT32_C( 1529457722), INT32_C( 1842168903), INT32_C( 633207908), INT32_C( 1763148208), INT32_C( 1114164050), -INT32_C( 1619714389), -INT32_C( 1102015100), INT32_C( 1148127241), -INT32_C( 73426508), INT32_C( 1097362909), INT32_C( 426190441), -INT32_C( 108822873), -INT32_C( 197399735), -INT32_C( 1902923510), INT32_C( 1347216198) }, { INT32_C( 0), INT32_C( 1529457722), INT32_C( 1842168903), INT32_C( 0), INT32_C( 0), INT32_C( 1114164050), -INT32_C( 1221845435), -INT32_C( 324005052), INT32_C( 1148127241), INT32_C( 0), INT32_C( 0), INT32_C( 1345265702), -INT32_C( 108822873), -INT32_C( 4927047), INT32_C( 867623151), INT32_C( 1347216198) } }, { UINT16_C(48364), { INT32_C( 861249684), INT32_C( 77607580), -INT32_C( 634779021), -INT32_C( 1504128733), INT32_C( 110272971), INT32_C( 699899030), INT32_C( 1997405738), -INT32_C( 499910322), INT32_C( 890603673), INT32_C( 758822586), -INT32_C( 485989184), INT32_C( 25845814), -INT32_C( 1744364542), -INT32_C( 490618952), INT32_C( 190435005), INT32_C( 1642958023) }, { -INT32_C( 829029868), INT32_C( 2029834424), INT32_C( 1801192501), INT32_C( 1718412900), -INT32_C( 1157729534), -INT32_C( 274939854), INT32_C( 1459287694), -INT32_C( 1749555326), -INT32_C( 1570419222), INT32_C( 1394303262), -INT32_C( 893487259), INT32_C( 1596992093), -INT32_C( 803655779), INT32_C( 2109715951), INT32_C( 785627819), -INT32_C( 1949988191) }, { INT32_C( 0), INT32_C( 0), INT32_C( 1801192501), INT32_C( 1718412900), INT32_C( 0), INT32_C( 699899030), INT32_C( 1997405738), -INT32_C( 499910322), INT32_C( 0), INT32_C( 0), -INT32_C( 485989184), INT32_C( 1596992093), -INT32_C( 803655779), INT32_C( 2109715951), INT32_C( 0), INT32_C( 1642958023) } }, { UINT16_C(10968), { INT32_C( 1233974830), INT32_C( 130085193), -INT32_C( 332325445), -INT32_C( 1793339780), INT32_C( 1581910686), INT32_C( 1528362631), -INT32_C( 370820828), INT32_C( 1930673477), -INT32_C( 54747213), INT32_C( 1275296913), INT32_C( 356005017), -INT32_C( 1582673149), -INT32_C( 436210595), INT32_C( 239081450), INT32_C( 385295825), -INT32_C( 1769403421) }, { INT32_C( 999507370), INT32_C( 1518900929), -INT32_C( 831536949), INT32_C( 1903106324), -INT32_C( 128553203), INT32_C( 1460049542), -INT32_C( 1620181316), -INT32_C( 1288309239), -INT32_C( 51394501), INT32_C( 710309727), INT32_C( 1274594615), -INT32_C( 323131426), INT32_C( 1575228374), INT32_C( 1723132586), -INT32_C( 251321624), INT32_C( 1420114456) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1903106324), INT32_C( 1581910686), INT32_C( 0), -INT32_C( 370820828), INT32_C( 1930673477), INT32_C( 0), INT32_C( 1275296913), INT32_C( 0), -INT32_C( 323131426), INT32_C( 0), INT32_C( 1723132586), INT32_C( 0), INT32_C( 0) } }, { UINT16_C(37891), { -INT32_C( 1492426160), -INT32_C( 2073279860), INT32_C( 1273711502), INT32_C( 459194935), -INT32_C( 754644961), INT32_C( 1945497198), -INT32_C( 2068967713), -INT32_C( 1307004574), -INT32_C( 1621548269), -INT32_C( 198982042), INT32_C( 1212091921), -INT32_C( 278684208), INT32_C( 348350630), INT32_C( 914929750), INT32_C( 683292358), -INT32_C( 86256665) }, { INT32_C( 1553544438), INT32_C( 189840634), -INT32_C( 1689022518), -INT32_C( 745884115), INT32_C( 1978092831), -INT32_C( 861180154), -INT32_C( 1930074459), INT32_C( 797364281), -INT32_C( 7594236), -INT32_C( 1509237541), -INT32_C( 1723769236), INT32_C( 862767892), INT32_C( 531190553), INT32_C( 1760253123), -INT32_C( 201989958), -INT32_C( 1255965776) }, { INT32_C( 1553544438), INT32_C( 189840634), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1212091921), INT32_C( 0), INT32_C( 531190553), INT32_C( 0), INT32_C( 0), -INT32_C( 86256665) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_maskz_max_epi32(test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_max_epu32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const uint32_t a[16]; const uint32_t b[16]; const uint32_t r[16]; } test_vec[] = { { { UINT32_C(2243118331), UINT32_C(1578755567), UINT32_C(1899903292), UINT32_C(3548006149), UINT32_C(1967957742), UINT32_C(3104828962), UINT32_C(3870576571), UINT32_C(3545536983), UINT32_C(3428321501), UINT32_C( 690647533), UINT32_C(2946197674), UINT32_C(2659390895), UINT32_C(3725840315), UINT32_C(2023170749), UINT32_C(1214204785), UINT32_C(3239818212) }, { UINT32_C(2827842747), UINT32_C(2429728997), UINT32_C(3493817632), UINT32_C(1047446146), UINT32_C(1327268242), UINT32_C( 348697507), UINT32_C(3797690366), UINT32_C(2527295706), UINT32_C(3510513900), UINT32_C( 157356264), UINT32_C( 14262653), UINT32_C(4114499427), UINT32_C(1799707336), UINT32_C( 192875789), UINT32_C( 250469172), UINT32_C(1067749459) }, { UINT32_C(2827842747), UINT32_C(2429728997), UINT32_C(3493817632), UINT32_C(3548006149), UINT32_C(1967957742), UINT32_C(3104828962), UINT32_C(3870576571), UINT32_C(3545536983), UINT32_C(3510513900), UINT32_C( 690647533), UINT32_C(2946197674), UINT32_C(4114499427), UINT32_C(3725840315), UINT32_C(2023170749), UINT32_C(1214204785), UINT32_C(3239818212) } }, { { UINT32_C(2836521920), UINT32_C(1907520243), UINT32_C(2003929875), UINT32_C(2590814162), UINT32_C( 369471753), UINT32_C(4062282942), UINT32_C(3003190879), UINT32_C(1592960414), UINT32_C(2080834440), UINT32_C(2314058102), UINT32_C( 369122884), UINT32_C( 380661005), UINT32_C(3710694686), UINT32_C(2580499769), UINT32_C(4182560859), UINT32_C(4267130486) }, { UINT32_C(3095027522), UINT32_C(1564567320), UINT32_C(3530769093), UINT32_C(3454543023), UINT32_C( 329913561), UINT32_C(3165420129), UINT32_C(3233151050), UINT32_C(2025786678), UINT32_C(2217752940), UINT32_C(1726050977), UINT32_C(1664701876), UINT32_C(1378886009), UINT32_C(2540034870), UINT32_C(2673086805), UINT32_C(1080035593), UINT32_C(2193104662) }, { UINT32_C(3095027522), UINT32_C(1907520243), UINT32_C(3530769093), UINT32_C(3454543023), UINT32_C( 369471753), UINT32_C(4062282942), UINT32_C(3233151050), UINT32_C(2025786678), UINT32_C(2217752940), UINT32_C(2314058102), UINT32_C(1664701876), UINT32_C(1378886009), UINT32_C(3710694686), UINT32_C(2673086805), UINT32_C(4182560859), UINT32_C(4267130486) } }, { { UINT32_C(4178045272), UINT32_C( 258009179), UINT32_C(3060963645), UINT32_C(4027163322), UINT32_C(3532156541), UINT32_C(2306006144), UINT32_C(4241085157), UINT32_C(1233027825), UINT32_C(3326313835), UINT32_C(2882904942), UINT32_C(4133635900), UINT32_C(1743219689), UINT32_C(1496936409), UINT32_C( 820226891), UINT32_C(1848421501), UINT32_C(2579016494) }, { UINT32_C(2657090352), UINT32_C(3662296222), UINT32_C(1708174459), UINT32_C(4039948055), UINT32_C(1900676390), UINT32_C( 782380465), UINT32_C( 144559833), UINT32_C(2862699897), UINT32_C(3997696336), UINT32_C(2982711861), UINT32_C(1427544126), UINT32_C(1984356944), UINT32_C(2565378279), UINT32_C(2529659581), UINT32_C(3533595736), UINT32_C( 159137977) }, { UINT32_C(4178045272), UINT32_C(3662296222), UINT32_C(3060963645), UINT32_C(4039948055), UINT32_C(3532156541), UINT32_C(2306006144), UINT32_C(4241085157), UINT32_C(2862699897), UINT32_C(3997696336), UINT32_C(2982711861), UINT32_C(4133635900), UINT32_C(1984356944), UINT32_C(2565378279), UINT32_C(2529659581), UINT32_C(3533595736), UINT32_C(2579016494) } }, { { UINT32_C(1995949121), UINT32_C(2502410071), UINT32_C(2817211735), UINT32_C( 119419167), UINT32_C(2124351169), UINT32_C(3893651088), UINT32_C(2210051018), UINT32_C( 881604339), UINT32_C(1386906619), UINT32_C(2598883906), UINT32_C( 792842767), UINT32_C(3291897603), UINT32_C(4114797925), UINT32_C( 115234620), UINT32_C(4253718538), UINT32_C(3392214735) }, { UINT32_C(3692878746), UINT32_C(3178628013), UINT32_C(3656169686), UINT32_C(2107515415), UINT32_C( 863166711), UINT32_C(1094340663), UINT32_C(3091121385), UINT32_C(1954705370), UINT32_C(4166098507), UINT32_C(2008401825), UINT32_C(2538709375), UINT32_C(3138711491), UINT32_C( 133072591), UINT32_C(3225954519), UINT32_C(3346565100), UINT32_C(1094449910) }, { UINT32_C(3692878746), UINT32_C(3178628013), UINT32_C(3656169686), UINT32_C(2107515415), UINT32_C(2124351169), UINT32_C(3893651088), UINT32_C(3091121385), UINT32_C(1954705370), UINT32_C(4166098507), UINT32_C(2598883906), UINT32_C(2538709375), UINT32_C(3291897603), UINT32_C(4114797925), UINT32_C(3225954519), UINT32_C(4253718538), UINT32_C(3392214735) } }, { { UINT32_C( 960138392), UINT32_C(3551653716), UINT32_C(1416233617), UINT32_C(3222241009), UINT32_C(3704094213), UINT32_C( 328994854), UINT32_C(2379879575), UINT32_C(2798589198), UINT32_C(4141812130), UINT32_C(2311688440), UINT32_C(2212377746), UINT32_C(3074747826), UINT32_C( 311626731), UINT32_C(2988781339), UINT32_C(1363214147), UINT32_C(3069644564) }, { UINT32_C( 279762712), UINT32_C(4204426855), UINT32_C(1551726762), UINT32_C(1360314725), UINT32_C(3898845133), UINT32_C( 446400727), UINT32_C(2607602567), UINT32_C( 38953962), UINT32_C(2719153722), UINT32_C( 513584244), UINT32_C(2323323172), UINT32_C(2832961499), UINT32_C( 227556918), UINT32_C(1294478278), UINT32_C(4041774086), UINT32_C( 854735607) }, { UINT32_C( 960138392), UINT32_C(4204426855), UINT32_C(1551726762), UINT32_C(3222241009), UINT32_C(3898845133), UINT32_C( 446400727), UINT32_C(2607602567), UINT32_C(2798589198), UINT32_C(4141812130), UINT32_C(2311688440), UINT32_C(2323323172), UINT32_C(3074747826), UINT32_C( 311626731), UINT32_C(2988781339), UINT32_C(4041774086), UINT32_C(3069644564) } }, { { UINT32_C(2916353337), UINT32_C(3603722417), UINT32_C(1684031369), UINT32_C( 202128342), UINT32_C(1058708857), UINT32_C(3482075848), UINT32_C(3451876566), UINT32_C(3909071535), UINT32_C(1754649527), UINT32_C(3443417411), UINT32_C(2117181096), UINT32_C(1384857305), UINT32_C(2744231387), UINT32_C(3178372583), UINT32_C(1099575954), UINT32_C(2603191012) }, { UINT32_C(2701377117), UINT32_C(3362669088), UINT32_C(3125256160), UINT32_C(3087848157), UINT32_C(1583128183), UINT32_C(1293668027), UINT32_C(3834553600), UINT32_C(2373957423), UINT32_C(2519630710), UINT32_C(2774441157), UINT32_C( 425698619), UINT32_C(4006702199), UINT32_C(3310103818), UINT32_C(4229130236), UINT32_C(1021419789), UINT32_C(3486081113) }, { UINT32_C(2916353337), UINT32_C(3603722417), UINT32_C(3125256160), UINT32_C(3087848157), UINT32_C(1583128183), UINT32_C(3482075848), UINT32_C(3834553600), UINT32_C(3909071535), UINT32_C(2519630710), UINT32_C(3443417411), UINT32_C(2117181096), UINT32_C(4006702199), UINT32_C(3310103818), UINT32_C(4229130236), UINT32_C(1099575954), UINT32_C(3486081113) } }, { { UINT32_C(2825254883), UINT32_C(3478045587), UINT32_C(3773345129), UINT32_C( 600815897), UINT32_C(3823705063), UINT32_C(2430598275), UINT32_C(4140613789), UINT32_C( 80057889), UINT32_C( 564996749), UINT32_C(1475410926), UINT32_C(3258439848), UINT32_C(2028275345), UINT32_C(2774257186), UINT32_C(1748319178), UINT32_C( 475922939), UINT32_C( 622929047) }, { UINT32_C(1011273294), UINT32_C(1905473225), UINT32_C(2670971662), UINT32_C(4078442961), UINT32_C(2996335591), UINT32_C(2853883310), UINT32_C(1724283087), UINT32_C(3951814556), UINT32_C(2116538805), UINT32_C( 368098055), UINT32_C(1471488902), UINT32_C( 608947516), UINT32_C(4023837504), UINT32_C(2157572273), UINT32_C( 98983784), UINT32_C(4243616327) }, { UINT32_C(2825254883), UINT32_C(3478045587), UINT32_C(3773345129), UINT32_C(4078442961), UINT32_C(3823705063), UINT32_C(2853883310), UINT32_C(4140613789), UINT32_C(3951814556), UINT32_C(2116538805), UINT32_C(1475410926), UINT32_C(3258439848), UINT32_C(2028275345), UINT32_C(4023837504), UINT32_C(2157572273), UINT32_C( 475922939), UINT32_C(4243616327) } }, { { UINT32_C(1266358083), UINT32_C(1482714066), UINT32_C(3417314702), UINT32_C( 602930146), UINT32_C(2400372190), UINT32_C( 487566261), UINT32_C(1361245706), UINT32_C(2874020456), UINT32_C(4244031786), UINT32_C(3260372788), UINT32_C(1334642028), UINT32_C(3732044800), UINT32_C(4134437953), UINT32_C( 957644079), UINT32_C(3683333747), UINT32_C(1938282825) }, { UINT32_C(3597630882), UINT32_C(1100530900), UINT32_C(3381667529), UINT32_C(3836215970), UINT32_C(3050968710), UINT32_C( 133099155), UINT32_C(1860335909), UINT32_C(4108413266), UINT32_C(3150598375), UINT32_C(3741082389), UINT32_C( 732466313), UINT32_C( 336547982), UINT32_C(4190759526), UINT32_C(4244682968), UINT32_C(2221663025), UINT32_C( 863521868) }, { UINT32_C(3597630882), UINT32_C(1482714066), UINT32_C(3417314702), UINT32_C(3836215970), UINT32_C(3050968710), UINT32_C( 487566261), UINT32_C(1860335909), UINT32_C(4108413266), UINT32_C(4244031786), UINT32_C(3741082389), UINT32_C(1334642028), UINT32_C(3732044800), UINT32_C(4190759526), UINT32_C(4244682968), UINT32_C(3683333747), UINT32_C(1938282825) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_max_epu32(a, b); simde_test_x86_assert_equal_u32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_max_epu32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const uint32_t src[16]; const simde__mmask16 k; const uint32_t a[16]; const uint32_t b[16]; const uint32_t r[16]; } test_vec[] = { { { UINT32_C(1064309063), UINT32_C(3842122435), UINT32_C( 973388532), UINT32_C( 462482396), UINT32_C(3579167017), UINT32_C(3341136776), UINT32_C(2124560456), UINT32_C( 856035564), UINT32_C(3849549346), UINT32_C(2378855833), UINT32_C( 214421296), UINT32_C(3827783610), UINT32_C(2713287961), UINT32_C(2020138544), UINT32_C( 49744406), UINT32_C(1027013915) }, UINT16_C(43379), { UINT32_C(3978169378), UINT32_C(1639730841), UINT32_C(2193127003), UINT32_C( 335532378), UINT32_C(3690016627), UINT32_C(2682652584), UINT32_C(1083965706), UINT32_C(1625886526), UINT32_C(3041724188), UINT32_C(2971011414), UINT32_C(3660828544), UINT32_C( 334377888), UINT32_C( 183492450), UINT32_C(4054439399), UINT32_C( 338773462), UINT32_C(1903434325) }, { UINT32_C(1999028769), UINT32_C(1260928459), UINT32_C(2888195084), UINT32_C(4055897231), UINT32_C(3673993203), UINT32_C(1523295620), UINT32_C(1081015531), UINT32_C( 951181846), UINT32_C(1890572196), UINT32_C( 549181460), UINT32_C(3285049652), UINT32_C(3920924149), UINT32_C(3234050108), UINT32_C(1092259670), UINT32_C(2726398091), UINT32_C( 265957994) }, { UINT32_C(3978169378), UINT32_C(1639730841), UINT32_C( 973388532), UINT32_C( 462482396), UINT32_C(3690016627), UINT32_C(2682652584), UINT32_C(1083965706), UINT32_C( 856035564), UINT32_C(3041724188), UINT32_C(2378855833), UINT32_C( 214421296), UINT32_C(3920924149), UINT32_C(2713287961), UINT32_C(4054439399), UINT32_C( 49744406), UINT32_C(1903434325) } }, { { UINT32_C( 511674633), UINT32_C(2503883361), UINT32_C( 290982684), UINT32_C(3573157272), UINT32_C( 328515261), UINT32_C(3629428301), UINT32_C(2709181750), UINT32_C( 296768519), UINT32_C(1060057054), UINT32_C(2245356905), UINT32_C( 295120249), UINT32_C(4175860026), UINT32_C(2617997903), UINT32_C(1601527849), UINT32_C(1023471413), UINT32_C( 575582276) }, UINT16_C(32223), { UINT32_C( 921323873), UINT32_C(1701077966), UINT32_C(1542954613), UINT32_C(2731886230), UINT32_C(1476591331), UINT32_C(1581725534), UINT32_C(3255798644), UINT32_C( 255848109), UINT32_C( 105196087), UINT32_C( 40610189), UINT32_C(3730661960), UINT32_C(2357212073), UINT32_C(2414117425), UINT32_C( 787360698), UINT32_C(1693580727), UINT32_C( 594751723) }, { UINT32_C(3895048538), UINT32_C(2867500130), UINT32_C(2693351671), UINT32_C(2888567163), UINT32_C(1178341516), UINT32_C(4067699259), UINT32_C( 307717415), UINT32_C(4030057110), UINT32_C(3872939651), UINT32_C(3935355891), UINT32_C(2257197323), UINT32_C(2939336227), UINT32_C( 32861894), UINT32_C(3220466072), UINT32_C(1708280783), UINT32_C(2572486421) }, { UINT32_C(3895048538), UINT32_C(2867500130), UINT32_C(2693351671), UINT32_C(2888567163), UINT32_C(1476591331), UINT32_C(3629428301), UINT32_C(3255798644), UINT32_C(4030057110), UINT32_C(3872939651), UINT32_C(2245356905), UINT32_C(3730661960), UINT32_C(2939336227), UINT32_C(2414117425), UINT32_C(3220466072), UINT32_C(1708280783), UINT32_C( 575582276) } }, { { UINT32_C(1501507174), UINT32_C(4232253425), UINT32_C(1283640617), UINT32_C(1241232515), UINT32_C(3142250531), UINT32_C( 679165529), UINT32_C(2676837769), UINT32_C(3124290388), UINT32_C( 34846481), UINT32_C(4026422982), UINT32_C(2788917283), UINT32_C(1475294772), UINT32_C(2148743718), UINT32_C( 44600952), UINT32_C( 799094491), UINT32_C( 720034073) }, UINT16_C(64912), { UINT32_C( 710170156), UINT32_C(2175432518), UINT32_C( 230219294), UINT32_C(1229446710), UINT32_C( 131579998), UINT32_C(1664987842), UINT32_C(3409729249), UINT32_C(2898906240), UINT32_C(1758862626), UINT32_C(3001712788), UINT32_C(2495652446), UINT32_C(3755804544), UINT32_C(2313598151), UINT32_C(1223435110), UINT32_C(4178782329), UINT32_C( 396745972) }, { UINT32_C(2390719481), UINT32_C(1497393659), UINT32_C(2364407819), UINT32_C(3479948040), UINT32_C(3864613248), UINT32_C(3979232628), UINT32_C(1659257454), UINT32_C( 410618654), UINT32_C( 27719942), UINT32_C(1851450978), UINT32_C(4026157287), UINT32_C(2495505684), UINT32_C( 712644534), UINT32_C(3407325533), UINT32_C( 154009067), UINT32_C(2384570248) }, { UINT32_C(1501507174), UINT32_C(4232253425), UINT32_C(1283640617), UINT32_C(1241232515), UINT32_C(3864613248), UINT32_C( 679165529), UINT32_C(2676837769), UINT32_C(2898906240), UINT32_C(1758862626), UINT32_C(4026422982), UINT32_C(4026157287), UINT32_C(3755804544), UINT32_C(2313598151), UINT32_C(3407325533), UINT32_C(4178782329), UINT32_C(2384570248) } }, { { UINT32_C( 42977184), UINT32_C(2507205038), UINT32_C(1183083058), UINT32_C(2245673679), UINT32_C(3081720922), UINT32_C(3900884733), UINT32_C(1274195907), UINT32_C(4141421398), UINT32_C(2314823899), UINT32_C(2216585554), UINT32_C(2747966164), UINT32_C(1042916580), UINT32_C(4143307000), UINT32_C(1658746783), UINT32_C(2108608551), UINT32_C(3212085220) }, UINT16_C(28144), { UINT32_C(1725317704), UINT32_C(2416487110), UINT32_C(1999957070), UINT32_C( 542059563), UINT32_C( 26799650), UINT32_C(4291936081), UINT32_C(2961618236), UINT32_C(3156047476), UINT32_C(2116220088), UINT32_C(3960351390), UINT32_C(1113801239), UINT32_C(2439164783), UINT32_C(4069718689), UINT32_C(4143015097), UINT32_C(2393274393), UINT32_C(2806695150) }, { UINT32_C(1445293496), UINT32_C(2923639959), UINT32_C(3857753718), UINT32_C(4218901337), UINT32_C( 132974925), UINT32_C(2281561965), UINT32_C(1159045975), UINT32_C( 535584615), UINT32_C(1685459660), UINT32_C(3155343686), UINT32_C(3114402655), UINT32_C(2746489174), UINT32_C(2427101474), UINT32_C(3608651648), UINT32_C(2988256331), UINT32_C(1490160011) }, { UINT32_C( 42977184), UINT32_C(2507205038), UINT32_C(1183083058), UINT32_C(2245673679), UINT32_C( 132974925), UINT32_C(4291936081), UINT32_C(2961618236), UINT32_C(3156047476), UINT32_C(2116220088), UINT32_C(2216585554), UINT32_C(3114402655), UINT32_C(2746489174), UINT32_C(4143307000), UINT32_C(4143015097), UINT32_C(2988256331), UINT32_C(3212085220) } }, { { UINT32_C(1639729179), UINT32_C(1612631553), UINT32_C( 655999185), UINT32_C(4224437721), UINT32_C(4018894191), UINT32_C(1757913629), UINT32_C(1511711950), UINT32_C( 162721005), UINT32_C( 896167476), UINT32_C( 244746300), UINT32_C( 557166408), UINT32_C(3961323645), UINT32_C(2480646262), UINT32_C( 435921483), UINT32_C(1953699206), UINT32_C( 914171138) }, UINT16_C(59283), { UINT32_C( 40947820), UINT32_C( 330414302), UINT32_C(4145295066), UINT32_C(4137650714), UINT32_C( 412674589), UINT32_C(1999445764), UINT32_C( 278736787), UINT32_C(3539415142), UINT32_C(3738461952), UINT32_C(4210197792), UINT32_C(3471902388), UINT32_C(2915340432), UINT32_C(2143640955), UINT32_C( 267842172), UINT32_C(2283770658), UINT32_C(3294238404) }, { UINT32_C(2661494398), UINT32_C(1738053043), UINT32_C( 724994459), UINT32_C(2497247769), UINT32_C(3541278039), UINT32_C(2984381071), UINT32_C(1631125917), UINT32_C(2519110424), UINT32_C(1966393793), UINT32_C(4191997022), UINT32_C(1847857749), UINT32_C(1677982733), UINT32_C( 674764441), UINT32_C(3201964576), UINT32_C( 874451740), UINT32_C(1758086567) }, { UINT32_C(2661494398), UINT32_C(1738053043), UINT32_C( 655999185), UINT32_C(4224437721), UINT32_C(3541278039), UINT32_C(1757913629), UINT32_C(1511711950), UINT32_C(3539415142), UINT32_C(3738461952), UINT32_C(4210197792), UINT32_C(3471902388), UINT32_C(3961323645), UINT32_C(2480646262), UINT32_C(3201964576), UINT32_C(2283770658), UINT32_C(3294238404) } }, { { UINT32_C(1826487822), UINT32_C( 526760650), UINT32_C(3649931724), UINT32_C( 507416709), UINT32_C(3343349415), UINT32_C(2894406032), UINT32_C(3688932660), UINT32_C(4182026986), UINT32_C(1919230376), UINT32_C(2828127195), UINT32_C(3665895252), UINT32_C(1459142575), UINT32_C(3323871029), UINT32_C(2507318112), UINT32_C( 862999368), UINT32_C(2787947773) }, UINT16_C(37334), { UINT32_C(2858201368), UINT32_C(3687428441), UINT32_C(2207938699), UINT32_C(3989033167), UINT32_C( 143664022), UINT32_C( 693885368), UINT32_C( 954030348), UINT32_C( 399094783), UINT32_C(3200329317), UINT32_C(1654229719), UINT32_C(3538236419), UINT32_C(2596251652), UINT32_C(2225229772), UINT32_C( 883818024), UINT32_C(1449954135), UINT32_C(2741843518) }, { UINT32_C( 862072668), UINT32_C(3163945913), UINT32_C( 864975407), UINT32_C(4023209251), UINT32_C(3312677021), UINT32_C(3321504110), UINT32_C(3927664300), UINT32_C(4170090652), UINT32_C(1898705079), UINT32_C( 455983339), UINT32_C(1582218299), UINT32_C(2790071305), UINT32_C(4201431180), UINT32_C(2378131169), UINT32_C(1769528012), UINT32_C( 442566242) }, { UINT32_C(1826487822), UINT32_C(3687428441), UINT32_C(2207938699), UINT32_C( 507416709), UINT32_C(3312677021), UINT32_C(2894406032), UINT32_C(3927664300), UINT32_C(4170090652), UINT32_C(3200329317), UINT32_C(2828127195), UINT32_C(3665895252), UINT32_C(1459142575), UINT32_C(4201431180), UINT32_C(2507318112), UINT32_C( 862999368), UINT32_C(2741843518) } }, { { UINT32_C(3784019446), UINT32_C(2298263629), UINT32_C(2129021812), UINT32_C(4079235943), UINT32_C(3589116148), UINT32_C(3278089462), UINT32_C(3895253894), UINT32_C(3607268833), UINT32_C(1756925210), UINT32_C(3136337222), UINT32_C(1731778304), UINT32_C(4267334922), UINT32_C(3839117293), UINT32_C(2041001971), UINT32_C(4083274514), UINT32_C(2076861536) }, UINT16_C(33521), { UINT32_C(3543611363), UINT32_C( 715798514), UINT32_C(4169643422), UINT32_C(2269083059), UINT32_C( 12464729), UINT32_C( 231985323), UINT32_C(2373006275), UINT32_C(2433770158), UINT32_C(2355447706), UINT32_C( 498470783), UINT32_C(2014723780), UINT32_C( 218060211), UINT32_C( 856473224), UINT32_C(1379983246), UINT32_C(3252662546), UINT32_C(2924670740) }, { UINT32_C(3057301303), UINT32_C(2345922759), UINT32_C(3775129902), UINT32_C(3354198847), UINT32_C(1341848001), UINT32_C(3953212376), UINT32_C( 11305452), UINT32_C(2813263472), UINT32_C(2103306422), UINT32_C( 117977561), UINT32_C(1508445210), UINT32_C(3491812879), UINT32_C(2820611024), UINT32_C(1116979542), UINT32_C(2990751554), UINT32_C(4099600702) }, { UINT32_C(3543611363), UINT32_C(2298263629), UINT32_C(2129021812), UINT32_C(4079235943), UINT32_C(1341848001), UINT32_C(3953212376), UINT32_C(2373006275), UINT32_C(2813263472), UINT32_C(1756925210), UINT32_C( 498470783), UINT32_C(1731778304), UINT32_C(4267334922), UINT32_C(3839117293), UINT32_C(2041001971), UINT32_C(4083274514), UINT32_C(4099600702) } }, { { UINT32_C(3010574298), UINT32_C( 62552552), UINT32_C(2489099141), UINT32_C(1248099706), UINT32_C(4008871064), UINT32_C(2268104261), UINT32_C( 54096837), UINT32_C(1073189733), UINT32_C( 871524427), UINT32_C(1731636450), UINT32_C(3405550416), UINT32_C(2819907600), UINT32_C( 697698020), UINT32_C(1387316876), UINT32_C(2673207866), UINT32_C(3370012029) }, UINT16_C(53429), { UINT32_C( 847026172), UINT32_C(4224044287), UINT32_C(2858145174), UINT32_C( 330383485), UINT32_C( 450510185), UINT32_C(3842249871), UINT32_C(2436006323), UINT32_C(1180821322), UINT32_C(2121850239), UINT32_C(1081687722), UINT32_C(2448151571), UINT32_C(2124717076), UINT32_C(1771601625), UINT32_C(1162779794), UINT32_C(1742110749), UINT32_C(3870111591) }, { UINT32_C(3211011605), UINT32_C(2013257060), UINT32_C(3322473138), UINT32_C(1615113606), UINT32_C(3200900139), UINT32_C(2516785016), UINT32_C(4278049431), UINT32_C( 652585745), UINT32_C( 904219089), UINT32_C(3651986727), UINT32_C(1453307343), UINT32_C(2377573474), UINT32_C( 961249216), UINT32_C( 768561046), UINT32_C( 975948841), UINT32_C(1214320759) }, { UINT32_C(3211011605), UINT32_C( 62552552), UINT32_C(3322473138), UINT32_C(1248099706), UINT32_C(3200900139), UINT32_C(3842249871), UINT32_C( 54096837), UINT32_C(1180821322), UINT32_C( 871524427), UINT32_C(1731636450), UINT32_C(3405550416), UINT32_C(2819907600), UINT32_C(1771601625), UINT32_C(1387316876), UINT32_C(1742110749), UINT32_C(3870111591) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi32(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_mask_max_epu32(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_u32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_max_epu32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask16 k; const uint32_t a[16]; const uint32_t b[16]; const uint32_t r[16]; } test_vec[] = { { UINT16_C(21775), { UINT32_C(4091585630), UINT32_C(1443008046), UINT32_C( 544481075), UINT32_C(3827144372), UINT32_C(1458616163), UINT32_C( 822659735), UINT32_C(1373879613), UINT32_C(2544300601), UINT32_C(3314190231), UINT32_C(1226542357), UINT32_C(1617530796), UINT32_C(2303035173), UINT32_C(3588175166), UINT32_C(1007085567), UINT32_C(3733842341), UINT32_C(1920349147) }, { UINT32_C(3493265594), UINT32_C( 957961101), UINT32_C( 144343778), UINT32_C(1200742153), UINT32_C( 320630804), UINT32_C(4249821784), UINT32_C(3889945611), UINT32_C(3394851087), UINT32_C(3751448914), UINT32_C(3323573220), UINT32_C(1070510901), UINT32_C(2793824146), UINT32_C( 683254736), UINT32_C(3508865221), UINT32_C(4088922340), UINT32_C(2763854162) }, { UINT32_C(4091585630), UINT32_C(1443008046), UINT32_C( 544481075), UINT32_C(3827144372), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C(3751448914), UINT32_C( 0), UINT32_C(1617530796), UINT32_C( 0), UINT32_C(3588175166), UINT32_C( 0), UINT32_C(4088922340), UINT32_C( 0) } }, { UINT16_C(22434), { UINT32_C(2617935491), UINT32_C( 458178637), UINT32_C( 92004735), UINT32_C(1084771207), UINT32_C(2554883699), UINT32_C(4153945151), UINT32_C(3708348960), UINT32_C( 305507214), UINT32_C(2125348657), UINT32_C(4271570559), UINT32_C(1728320991), UINT32_C(3550981216), UINT32_C(1500246042), UINT32_C(1011876636), UINT32_C(2082101742), UINT32_C( 898518788) }, { UINT32_C( 246693262), UINT32_C( 437014075), UINT32_C(3280015459), UINT32_C(3616942525), UINT32_C( 892403993), UINT32_C(4067590404), UINT32_C(3731852506), UINT32_C(1762983387), UINT32_C(1970784314), UINT32_C(2039514134), UINT32_C(1362891156), UINT32_C(1395249722), UINT32_C(3616103123), UINT32_C(3066756059), UINT32_C(1653881223), UINT32_C(1909172278) }, { UINT32_C( 0), UINT32_C( 458178637), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C(4153945151), UINT32_C( 0), UINT32_C(1762983387), UINT32_C(2125348657), UINT32_C(4271570559), UINT32_C(1728320991), UINT32_C( 0), UINT32_C(3616103123), UINT32_C( 0), UINT32_C(2082101742), UINT32_C( 0) } }, { UINT16_C(17264), { UINT32_C(1992787686), UINT32_C( 998792191), UINT32_C(3591226029), UINT32_C(2670780438), UINT32_C(2191133624), UINT32_C(1455104449), UINT32_C(1325330819), UINT32_C(1234268002), UINT32_C(4122958069), UINT32_C(1630554036), UINT32_C( 540491274), UINT32_C(3602867998), UINT32_C( 878205298), UINT32_C(4253684602), UINT32_C(1733003269), UINT32_C(3987791351) }, { UINT32_C(3923931189), UINT32_C(3242857143), UINT32_C(1877049680), UINT32_C(1531289832), UINT32_C(1938792185), UINT32_C(3060799921), UINT32_C(2568928417), UINT32_C(3464941209), UINT32_C(4139280446), UINT32_C(3417768570), UINT32_C(1815779716), UINT32_C( 868712249), UINT32_C(3483784733), UINT32_C( 293934959), UINT32_C(1823122387), UINT32_C(2956603506) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C(2191133624), UINT32_C(3060799921), UINT32_C(2568928417), UINT32_C( 0), UINT32_C(4139280446), UINT32_C(3417768570), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C(1823122387), UINT32_C( 0) } }, { UINT16_C(62104), { UINT32_C(1593119398), UINT32_C( 402094557), UINT32_C(2912366821), UINT32_C( 168014947), UINT32_C( 153187203), UINT32_C( 783086724), UINT32_C(2589859424), UINT32_C(1972238031), UINT32_C(3872621064), UINT32_C(3774728955), UINT32_C(1586337019), UINT32_C(3429405001), UINT32_C(2295695620), UINT32_C(3719725693), UINT32_C(1870140576), UINT32_C( 316998922) }, { UINT32_C(2197338247), UINT32_C(2120414851), UINT32_C(3554472074), UINT32_C(2241873281), UINT32_C(1275950542), UINT32_C(2552873975), UINT32_C(3775373783), UINT32_C( 770960550), UINT32_C( 682618021), UINT32_C(1822823138), UINT32_C(2202042882), UINT32_C(2517164231), UINT32_C(1306662229), UINT32_C(2951023576), UINT32_C(1402006701), UINT32_C(2122417113) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C(2241873281), UINT32_C(1275950542), UINT32_C( 0), UINT32_C( 0), UINT32_C(1972238031), UINT32_C( 0), UINT32_C(3774728955), UINT32_C( 0), UINT32_C( 0), UINT32_C(2295695620), UINT32_C(3719725693), UINT32_C(1870140576), UINT32_C(2122417113) } }, { UINT16_C(12399), { UINT32_C(1279414694), UINT32_C(4274930878), UINT32_C(3487471303), UINT32_C( 249836332), UINT32_C(1696185472), UINT32_C(4216505963), UINT32_C(2608802586), UINT32_C(1338764969), UINT32_C(4271574592), UINT32_C( 452749650), UINT32_C( 736746239), UINT32_C(2386152973), UINT32_C(4143141770), UINT32_C( 871449881), UINT32_C( 432959600), UINT32_C(2674432607) }, { UINT32_C(4204594088), UINT32_C(1813289325), UINT32_C(2157510259), UINT32_C(1443811788), UINT32_C(1045168676), UINT32_C(3094429255), UINT32_C( 231817390), UINT32_C(2192325338), UINT32_C(2860271933), UINT32_C(1427608034), UINT32_C(1540796303), UINT32_C(2779899008), UINT32_C( 786693862), UINT32_C(3940963388), UINT32_C(1861793684), UINT32_C( 804300017) }, { UINT32_C(4204594088), UINT32_C(4274930878), UINT32_C(3487471303), UINT32_C(1443811788), UINT32_C( 0), UINT32_C(4216505963), UINT32_C(2608802586), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C(4143141770), UINT32_C(3940963388), UINT32_C( 0), UINT32_C( 0) } }, { UINT16_C(28142), { UINT32_C(4043231449), UINT32_C(4238314790), UINT32_C(2581602536), UINT32_C(2828519365), UINT32_C(3690779637), UINT32_C(3063058878), UINT32_C(4032464127), UINT32_C(2354923699), UINT32_C(1065179929), UINT32_C(3493534952), UINT32_C( 23665468), UINT32_C(3618177506), UINT32_C(2461181652), UINT32_C( 910705975), UINT32_C(2082907081), UINT32_C( 67666923) }, { UINT32_C(3326313950), UINT32_C(3734404770), UINT32_C(2095055002), UINT32_C(3579087105), UINT32_C(1718093359), UINT32_C( 345878603), UINT32_C(1066451795), UINT32_C( 625187143), UINT32_C(3236726558), UINT32_C(2678030853), UINT32_C(2199682946), UINT32_C( 945385480), UINT32_C(3265184118), UINT32_C(3319151473), UINT32_C(1174693887), UINT32_C( 510347008) }, { UINT32_C( 0), UINT32_C(4238314790), UINT32_C(2581602536), UINT32_C(3579087105), UINT32_C( 0), UINT32_C(3063058878), UINT32_C(4032464127), UINT32_C(2354923699), UINT32_C(3236726558), UINT32_C( 0), UINT32_C(2199682946), UINT32_C(3618177506), UINT32_C( 0), UINT32_C(3319151473), UINT32_C(2082907081), UINT32_C( 0) } }, { UINT16_C(22478), { UINT32_C(2128270559), UINT32_C(2415746163), UINT32_C( 973014496), UINT32_C(3707401789), UINT32_C( 236415800), UINT32_C( 880088624), UINT32_C(3363599708), UINT32_C(1931430548), UINT32_C(2465331486), UINT32_C( 908193366), UINT32_C( 829366771), UINT32_C(3473762711), UINT32_C( 98378964), UINT32_C(2537116475), UINT32_C(1549776328), UINT32_C( 516914944) }, { UINT32_C(3467690104), UINT32_C(2718225070), UINT32_C(2329113587), UINT32_C(2975457500), UINT32_C(1068905988), UINT32_C(1389883273), UINT32_C(2779657893), UINT32_C( 784563893), UINT32_C(3992745022), UINT32_C( 965673286), UINT32_C(1371759220), UINT32_C(1174543426), UINT32_C(3699816530), UINT32_C(1278107047), UINT32_C(1240587411), UINT32_C(2574759258) }, { UINT32_C( 0), UINT32_C(2718225070), UINT32_C(2329113587), UINT32_C(3707401789), UINT32_C( 0), UINT32_C( 0), UINT32_C(3363599708), UINT32_C(1931430548), UINT32_C(3992745022), UINT32_C( 965673286), UINT32_C(1371759220), UINT32_C( 0), UINT32_C(3699816530), UINT32_C( 0), UINT32_C(1549776328), UINT32_C( 0) } }, { UINT16_C(29481), { UINT32_C( 359952262), UINT32_C(1803020712), UINT32_C(1015527738), UINT32_C(2247416319), UINT32_C(3823279029), UINT32_C(3653269224), UINT32_C( 915282623), UINT32_C( 967423923), UINT32_C(3461226022), UINT32_C(1094305031), UINT32_C(2122170494), UINT32_C(1325625754), UINT32_C(4097041932), UINT32_C( 466547548), UINT32_C(3243334669), UINT32_C( 989526548) }, { UINT32_C( 520702232), UINT32_C(2371895822), UINT32_C(2634800387), UINT32_C(1542196814), UINT32_C(1347362804), UINT32_C( 543890706), UINT32_C(2397158522), UINT32_C(3486047159), UINT32_C( 871354660), UINT32_C( 398479124), UINT32_C(2075446061), UINT32_C(3470172377), UINT32_C(3525191360), UINT32_C(3186788931), UINT32_C(4249604934), UINT32_C(3553432751) }, { UINT32_C( 520702232), UINT32_C( 0), UINT32_C( 0), UINT32_C(2247416319), UINT32_C( 0), UINT32_C(3653269224), UINT32_C( 0), UINT32_C( 0), UINT32_C(3461226022), UINT32_C(1094305031), UINT32_C( 0), UINT32_C( 0), UINT32_C(4097041932), UINT32_C(3186788931), UINT32_C(4249604934), UINT32_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_maskz_max_epu32(test_vec[i].k, a, b); simde_test_x86_assert_equal_u32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_max_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t a[8]; const int64_t b[8]; const int64_t r[8]; } test_vec[] = { { { -INT64_C( 2014748860252733256), INT64_C( 8310013920193191870), INT64_C( 8627312520704586488), INT64_C( 2141911759069453499), -INT64_C( 6389418296559551015), INT64_C( 1147794582725394137), -INT64_C( 1676750639909974469), -INT64_C( 4640073323493241832) }, { -INT64_C( 2561841185703343390), -INT64_C( 1249391887751124606), -INT64_C( 60820695810971118), -INT64_C( 7359981615789075913), INT64_C( 1207483183845870561), INT64_C( 7953448598462142656), -INT64_C( 5466291888746294979), -INT64_C( 7442288857198780570) }, { -INT64_C( 2014748860252733256), INT64_C( 8310013920193191870), INT64_C( 8627312520704586488), INT64_C( 2141911759069453499), INT64_C( 1207483183845870561), INT64_C( 7953448598462142656), -INT64_C( 1676750639909974469), -INT64_C( 4640073323493241832) } }, { { -INT64_C( 5523876103837639496), -INT64_C( 7916898971599699625), INT64_C( 597247717906238999), -INT64_C( 9166832090146792840), -INT64_C( 2621060327344224774), INT64_C( 7135864873221539104), INT64_C( 1020136329125019005), INT64_C( 2784075781284712027) }, { -INT64_C( 6148623457466487989), INT64_C( 7432952495457909369), -INT64_C( 5005559260738347037), INT64_C( 1679985374884906749), -INT64_C( 8061281604004781076), INT64_C( 5340561513718759629), INT64_C( 5743579719547462811), -INT64_C( 2875137586523631) }, { -INT64_C( 5523876103837639496), INT64_C( 7432952495457909369), INT64_C( 597247717906238999), INT64_C( 1679985374884906749), -INT64_C( 2621060327344224774), INT64_C( 7135864873221539104), INT64_C( 5743579719547462811), INT64_C( 2784075781284712027) } }, { { INT64_C( 4383012266978958380), INT64_C( 4577364368851937494), -INT64_C( 6388656697539736908), -INT64_C( 4209606055450849407), INT64_C( 8431544031154538838), -INT64_C( 4978949266078501628), -INT64_C( 8016663458088935294), INT64_C( 1388437709158368231) }, { INT64_C( 3303348722969628098), INT64_C( 4940374856950974927), INT64_C( 5655143890238066908), INT64_C( 1886465463547764830), -INT64_C( 5641510730089101473), INT64_C( 2585371054122109660), -INT64_C( 5252452426412311195), INT64_C( 6601434808538294570) }, { INT64_C( 4383012266978958380), INT64_C( 4940374856950974927), INT64_C( 5655143890238066908), INT64_C( 1886465463547764830), INT64_C( 8431544031154538838), INT64_C( 2585371054122109660), -INT64_C( 5252452426412311195), INT64_C( 6601434808538294570) } }, { { INT64_C( 5231662610203744520), -INT64_C( 7309967587911851997), INT64_C( 5698322525522991876), -INT64_C( 1170531388486392213), INT64_C( 7006601834514720136), -INT64_C( 1340849032208375404), -INT64_C( 5158488212946367500), INT64_C( 3610726547756825412) }, { -INT64_C( 277989074899897211), INT64_C( 4348552306242220163), INT64_C( 5042271269930050548), INT64_C( 9067590998807353594), -INT64_C( 2169217286705972095), -INT64_C( 3901904170037433516), -INT64_C( 1688844438773026999), INT64_C( 1193843738599489820) }, { INT64_C( 5231662610203744520), INT64_C( 4348552306242220163), INT64_C( 5698322525522991876), INT64_C( 9067590998807353594), INT64_C( 7006601834514720136), -INT64_C( 1340849032208375404), -INT64_C( 1688844438773026999), INT64_C( 3610726547756825412) } }, { { -INT64_C( 8458077628258463394), -INT64_C( 2655152826666718326), INT64_C( 5910893379497374256), INT64_C( 6077317384298206135), -INT64_C( 5149955032525746232), -INT64_C( 2375579881357988198), -INT64_C( 7208005823505813904), INT64_C( 2153344412445400728) }, { INT64_C( 4426654303458685831), INT64_C( 3809888892118205777), INT64_C( 8727927568400571551), -INT64_C( 4637325287291944911), -INT64_C( 5495268532625817635), INT64_C( 3990068972844426855), INT64_C( 4524104472594102638), -INT64_C( 7579160454283014182) }, { INT64_C( 4426654303458685831), INT64_C( 3809888892118205777), INT64_C( 8727927568400571551), INT64_C( 6077317384298206135), -INT64_C( 5149955032525746232), INT64_C( 3990068972844426855), INT64_C( 4524104472594102638), INT64_C( 2153344412445400728) } }, { { -INT64_C( 8124825606155844311), -INT64_C( 1025103812337405448), INT64_C( 3791196745065660755), -INT64_C( 781348367953927463), INT64_C( 510241631673269597), INT64_C( 4261352924285226927), INT64_C( 5146831995218388190), INT64_C( 2908201432506807451) }, { INT64_C( 5284343705789914174), INT64_C( 2933424775004679313), INT64_C( 2574035371966943235), -INT64_C( 3425015475534655101), INT64_C( 8621425594407462082), -INT64_C( 4407996268128690080), -INT64_C( 8745169126165367562), -INT64_C( 3035905454064194436) }, { INT64_C( 5284343705789914174), INT64_C( 2933424775004679313), INT64_C( 3791196745065660755), -INT64_C( 781348367953927463), INT64_C( 8621425594407462082), INT64_C( 4261352924285226927), INT64_C( 5146831995218388190), INT64_C( 2908201432506807451) } }, { { -INT64_C( 3328486192785982096), INT64_C( 6591386827922128888), INT64_C( 1372890451679030403), -INT64_C( 6948492173882826072), -INT64_C( 7908386253090405380), -INT64_C( 8266988188849292412), INT64_C( 4834652249182707566), INT64_C( 3878320804479318276) }, { INT64_C( 1189199396536043603), -INT64_C( 417638992092411491), INT64_C( 8015308288830753118), INT64_C( 2215899434236132178), -INT64_C( 2100493519837961412), INT64_C( 8132584015426868053), INT64_C( 5107547021236624391), INT64_C( 3876353501048177889) }, { INT64_C( 1189199396536043603), INT64_C( 6591386827922128888), INT64_C( 8015308288830753118), INT64_C( 2215899434236132178), -INT64_C( 2100493519837961412), INT64_C( 8132584015426868053), INT64_C( 5107547021236624391), INT64_C( 3878320804479318276) } }, { { -INT64_C( 5107689581159983115), INT64_C( 7795298184369711019), INT64_C( 2273683656811648850), -INT64_C( 1841523710254883005), -INT64_C( 1041669315400470673), -INT64_C( 1173225514552318234), -INT64_C( 7434946741277387404), -INT64_C( 6630911411376317150) }, { INT64_C( 4678115603191831476), -INT64_C( 1390694773466359001), -INT64_C( 3475530227149510185), -INT64_C( 7933973800668719092), -INT64_C( 8965691194758964488), INT64_C( 4068996085191220754), -INT64_C( 7971608261304248861), INT64_C( 1598416259887808960) }, { INT64_C( 4678115603191831476), INT64_C( 7795298184369711019), INT64_C( 2273683656811648850), -INT64_C( 1841523710254883005), -INT64_C( 1041669315400470673), INT64_C( 4068996085191220754), -INT64_C( 7434946741277387404), INT64_C( 1598416259887808960) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_max_epi64(a, b); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_max_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t src[8]; const simde__mmask8 k; const int64_t a[8]; const int64_t b[8]; const int64_t r[8]; } test_vec[] = { { { -INT64_C( 5345905637586622780), -INT64_C( 5692600239673997336), INT64_C( 3447846270946787721), INT64_C( 5280092904555912861), -INT64_C( 7374479798287586610), -INT64_C( 1077563827958409956), -INT64_C( 3597455596750832390), -INT64_C( 6290796570429148701) }, UINT8_C(195), { -INT64_C( 2798408533011031176), -INT64_C( 8323722415045507640), -INT64_C( 6501073103404244578), INT64_C( 6574454888980230368), -INT64_C( 8583698441112538686), -INT64_C( 993740444610715368), INT64_C( 6845927178435490791), INT64_C( 7641789052216770766) }, { INT64_C( 2382210729759504071), -INT64_C( 8666897520686196366), -INT64_C( 301490291198423055), -INT64_C( 8685549501623195291), -INT64_C( 6243645810378086608), INT64_C( 1395034114652840758), -INT64_C( 8511991153765892169), INT64_C( 8522231367853247742) }, { INT64_C( 2382210729759504071), -INT64_C( 8323722415045507640), INT64_C( 3447846270946787721), INT64_C( 5280092904555912861), -INT64_C( 7374479798287586610), -INT64_C( 1077563827958409956), INT64_C( 6845927178435490791), INT64_C( 8522231367853247742) } }, { { -INT64_C( 6425500390993734669), INT64_C( 2477780700413772589), INT64_C( 4234762695997843223), INT64_C( 6426422704697006706), INT64_C( 1560030974883127184), INT64_C( 1236739449352888987), -INT64_C( 8636759399566856274), -INT64_C( 1501263990414566037) }, UINT8_C( 79), { -INT64_C( 5431255861060637778), -INT64_C( 60869167830620238), INT64_C( 5947457634382975244), INT64_C( 3719578040572664798), -INT64_C( 3041661328608864637), INT64_C( 4850505679108994944), -INT64_C( 2301643962556783226), -INT64_C( 2882360576230638778) }, { -INT64_C( 7882506535313852254), -INT64_C( 1826154263602861523), INT64_C( 4213593490977851799), INT64_C( 1699551908358170732), INT64_C( 7413057174183445309), -INT64_C( 8239208018902006942), INT64_C( 7398183810386774652), -INT64_C( 877150009380392632) }, { -INT64_C( 5431255861060637778), -INT64_C( 60869167830620238), INT64_C( 5947457634382975244), INT64_C( 3719578040572664798), INT64_C( 1560030974883127184), INT64_C( 1236739449352888987), INT64_C( 7398183810386774652), -INT64_C( 1501263990414566037) } }, { { -INT64_C( 8367932021124452289), -INT64_C( 7392389279746512155), INT64_C( 8548032402407722559), -INT64_C( 4462778595530258841), INT64_C( 6587387977829929911), -INT64_C( 1262522271584044604), -INT64_C( 6714083937197980371), -INT64_C( 4407667190287825521) }, UINT8_C( 37), { -INT64_C( 4040301650481798641), -INT64_C( 3532683264081408467), INT64_C( 3162559544451224715), -INT64_C( 2782105502057237140), INT64_C( 2554087405900726172), -INT64_C( 3038266968933144898), -INT64_C( 680311230200947139), INT64_C( 6603569803635770881) }, { INT64_C( 6174565478091302550), -INT64_C( 4216439588620820643), -INT64_C( 5435642772517771760), -INT64_C( 965983240995224451), INT64_C( 8193506861635313353), INT64_C( 6060601839996899790), INT64_C( 8764427069845029947), INT64_C( 977930121442107459) }, { INT64_C( 6174565478091302550), -INT64_C( 7392389279746512155), INT64_C( 3162559544451224715), -INT64_C( 4462778595530258841), INT64_C( 6587387977829929911), INT64_C( 6060601839996899790), -INT64_C( 6714083937197980371), -INT64_C( 4407667190287825521) } }, { { INT64_C( 1040932122173182444), INT64_C( 6614521354654157619), INT64_C( 8951443263840631236), INT64_C( 3052223651288706826), INT64_C( 2093503034409339070), -INT64_C( 5214218449360489944), -INT64_C( 2247946204451705831), INT64_C( 6126735624116300191) }, UINT8_C( 81), { -INT64_C( 3245026734168648911), -INT64_C( 3501974344529788691), INT64_C( 7945060601169295347), INT64_C( 6237302025420545716), INT64_C( 1288061534104570797), INT64_C( 1445871127478838621), -INT64_C( 1121403750364760708), INT64_C( 8832611983379297047) }, { INT64_C( 3656474114891692168), INT64_C( 6719797122166889484), INT64_C( 676892280935610424), -INT64_C( 2844066805624499648), -INT64_C( 8964060507010756719), -INT64_C( 4062824591738794913), INT64_C( 641094207007357930), -INT64_C( 7756996244792792527) }, { INT64_C( 3656474114891692168), INT64_C( 6614521354654157619), INT64_C( 8951443263840631236), INT64_C( 3052223651288706826), INT64_C( 1288061534104570797), -INT64_C( 5214218449360489944), INT64_C( 641094207007357930), INT64_C( 6126735624116300191) } }, { { INT64_C( 9198201055202696620), INT64_C( 3744281605296838303), INT64_C( 155361891174003031), -INT64_C( 8667779074086453986), -INT64_C( 2064530701811011398), -INT64_C( 3809474135993542489), INT64_C( 4903312945094209849), INT64_C( 2788039795700764751) }, UINT8_C(203), { INT64_C( 8166135916793823324), -INT64_C( 7546994602521836797), -INT64_C( 1514616460234961510), INT64_C( 3624410160372786534), INT64_C( 5712871432940116605), INT64_C( 8751230606422650485), -INT64_C( 7697179325750759702), INT64_C( 9173377252184196421) }, { -INT64_C( 3073812990146499140), INT64_C( 4045396086568825293), -INT64_C( 5902904741977044656), INT64_C( 5310333901049834032), INT64_C( 8392925918063036485), -INT64_C( 7142633917275690662), INT64_C( 4154060525654465934), -INT64_C( 3661392923705184166) }, { INT64_C( 8166135916793823324), INT64_C( 4045396086568825293), INT64_C( 155361891174003031), INT64_C( 5310333901049834032), -INT64_C( 2064530701811011398), -INT64_C( 3809474135993542489), INT64_C( 4154060525654465934), INT64_C( 9173377252184196421) } }, { { -INT64_C( 8916305743155461850), INT64_C( 3889999190665486868), -INT64_C( 6724487464277502102), INT64_C( 6744062616282929474), INT64_C( 642166417825401146), -INT64_C( 8238099514877536560), -INT64_C( 1268415667300607620), -INT64_C( 2136024915793875257) }, UINT8_C( 89), { INT64_C( 5099575821829228423), -INT64_C( 4422825203354485314), -INT64_C( 5301479173784706312), INT64_C( 5601703632838683412), INT64_C( 2232950201730075270), INT64_C( 6265034152244963141), -INT64_C( 3477225610252886207), -INT64_C( 2096160250809541420) }, { INT64_C( 8609358780718197500), -INT64_C( 2688200817640031491), -INT64_C( 1549061152969609738), -INT64_C( 6702643060250659651), INT64_C( 2900731760192951447), INT64_C( 8405464573246957362), -INT64_C( 1665304729403160094), INT64_C( 7900154688119597146) }, { INT64_C( 8609358780718197500), INT64_C( 3889999190665486868), -INT64_C( 6724487464277502102), INT64_C( 5601703632838683412), INT64_C( 2900731760192951447), -INT64_C( 8238099514877536560), -INT64_C( 1665304729403160094), -INT64_C( 2136024915793875257) } }, { { INT64_C( 5468346420173447312), INT64_C( 9102827748989560416), INT64_C( 8744400713309190215), -INT64_C( 1655886121147999037), INT64_C( 1522365889094368444), INT64_C( 4253446389175105517), -INT64_C( 7253600422308512065), -INT64_C( 6294561215247757212) }, UINT8_C(118), { -INT64_C( 3116982845547250359), -INT64_C( 3730946081185773773), INT64_C( 4404028325641852594), -INT64_C( 3953085697309943180), -INT64_C( 4413148788968239537), INT64_C( 4663888145844832927), INT64_C( 1239924339176529291), INT64_C( 9168451639147716339) }, { -INT64_C( 3638578444647016911), -INT64_C( 4718238374845301322), -INT64_C( 8394449565981966127), INT64_C( 5978874995294486346), INT64_C( 5191968197482538257), INT64_C( 883007048760805457), INT64_C( 3366154728906684562), INT64_C( 121213393199281466) }, { INT64_C( 5468346420173447312), -INT64_C( 3730946081185773773), INT64_C( 4404028325641852594), -INT64_C( 1655886121147999037), INT64_C( 5191968197482538257), INT64_C( 4663888145844832927), INT64_C( 3366154728906684562), -INT64_C( 6294561215247757212) } }, { { -INT64_C( 6565879561257369593), INT64_C( 1395950409363254807), INT64_C( 1113784204694313569), -INT64_C( 4063627027580052055), INT64_C( 4814419655343004888), -INT64_C( 4376667185308370769), INT64_C( 2211968311192289486), INT64_C( 945847410351414426) }, UINT8_C( 47), { INT64_C( 175813054986664320), INT64_C( 7202426655844114482), INT64_C( 7370411586518292273), INT64_C( 4769779031566164398), INT64_C( 6696417419348708578), -INT64_C( 6027373776964791532), -INT64_C( 2900702815815323122), -INT64_C( 2069394128779060925) }, { -INT64_C( 3090666984198681451), INT64_C( 2537820259726695697), INT64_C( 8599404911143916120), -INT64_C( 3497109734551637870), INT64_C( 1560120159091181073), -INT64_C( 1117216409145740730), -INT64_C( 4316761598062364309), INT64_C( 7638613325489083640) }, { INT64_C( 175813054986664320), INT64_C( 7202426655844114482), INT64_C( 8599404911143916120), INT64_C( 4769779031566164398), INT64_C( 4814419655343004888), -INT64_C( 1117216409145740730), INT64_C( 2211968311192289486), INT64_C( 945847410351414426) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi64(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_mask_max_epi64(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_max_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const int64_t a[8]; const int64_t b[8]; const int64_t r[8]; } test_vec[] = { { UINT8_C( 63), { -INT64_C( 6926915754504825890), -INT64_C( 7397189029588016527), -INT64_C( 7536201324114600452), INT64_C( 6873027948649756836), INT64_C( 2894290321799581881), INT64_C( 816700812018984951), INT64_C( 6055501674159253406), INT64_C( 4688202719175287342) }, { INT64_C( 6036784323415033575), INT64_C( 1096738046026444830), -INT64_C( 1724727222163454964), INT64_C( 5040836808604616235), -INT64_C( 3018632514112339604), -INT64_C( 7198655944328992103), INT64_C( 5702235678228126336), -INT64_C( 3633288481376912657) }, { INT64_C( 6036784323415033575), INT64_C( 1096738046026444830), -INT64_C( 1724727222163454964), INT64_C( 6873027948649756836), INT64_C( 2894290321799581881), INT64_C( 816700812018984951), INT64_C( 0), INT64_C( 0) } }, { UINT8_C(184), { -INT64_C( 2686037980477352586), INT64_C( 8576831563879859274), INT64_C( 2213495460442358366), INT64_C( 1052519153161667820), -INT64_C( 7175387239475704863), INT64_C( 4179388676098479531), INT64_C( 8282322599611046765), -INT64_C( 7177909069199085635) }, { INT64_C( 2588330053329588476), -INT64_C( 6448119779903664530), -INT64_C( 414216551051786936), -INT64_C( 8994056214273878569), INT64_C( 7052000346529422146), INT64_C( 9199497477670800075), INT64_C( 7127946467432276915), INT64_C( 3327072624578935331) }, { INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 1052519153161667820), INT64_C( 7052000346529422146), INT64_C( 9199497477670800075), INT64_C( 0), INT64_C( 3327072624578935331) } }, { UINT8_C(125), { INT64_C( 5730217847577176318), INT64_C( 6367815225705925245), INT64_C( 8503998119877496915), -INT64_C( 2821345899037151943), -INT64_C( 7526332901285017480), INT64_C( 7867685305908925346), -INT64_C( 8024479576668368142), INT64_C( 986784841744451071) }, { -INT64_C( 90678527186484213), -INT64_C( 7750126914822143765), INT64_C( 1296303397577867286), -INT64_C( 5328149050863916949), INT64_C( 2034107717976349152), INT64_C( 3013351336526811034), INT64_C( 7239417625381868371), -INT64_C( 2315568197507194245) }, { INT64_C( 5730217847577176318), INT64_C( 0), INT64_C( 8503998119877496915), -INT64_C( 2821345899037151943), INT64_C( 2034107717976349152), INT64_C( 7867685305908925346), INT64_C( 7239417625381868371), INT64_C( 0) } }, { UINT8_C(203), { INT64_C( 7344835900854770617), -INT64_C( 8060982651336971462), INT64_C( 6938056904573290297), INT64_C( 400606287485627985), -INT64_C( 2372193426292044711), INT64_C( 7637769824989187441), -INT64_C( 4200594613847357610), -INT64_C( 6252094350377282836) }, { INT64_C( 5560094904066545061), -INT64_C( 246649431242582022), INT64_C( 3636942875797801024), -INT64_C( 4535223658831346922), -INT64_C( 3574955593694484677), INT64_C( 1706173592363343371), INT64_C( 1786597550360100179), INT64_C( 1595838715907683656) }, { INT64_C( 7344835900854770617), -INT64_C( 246649431242582022), INT64_C( 0), INT64_C( 400606287485627985), INT64_C( 0), INT64_C( 0), INT64_C( 1786597550360100179), INT64_C( 1595838715907683656) } }, { UINT8_C(245), { -INT64_C( 5877030867296506832), -INT64_C( 3715859129705291377), INT64_C( 645957393419470697), -INT64_C( 5771088594544141724), -INT64_C( 2854011911252233947), -INT64_C( 1134799871686743387), INT64_C( 6432148469508345448), INT64_C( 4979911498035570414) }, { INT64_C( 6342861248878227685), -INT64_C( 4341455357083846328), -INT64_C( 2132099336853627109), -INT64_C( 3617115179733502872), INT64_C( 7973624404321748892), -INT64_C( 7126665479367789317), -INT64_C( 4525248627699017890), -INT64_C( 8394965961651158070) }, { INT64_C( 6342861248878227685), INT64_C( 0), INT64_C( 645957393419470697), INT64_C( 0), INT64_C( 7973624404321748892), -INT64_C( 1134799871686743387), INT64_C( 6432148469508345448), INT64_C( 4979911498035570414) } }, { UINT8_C(222), { -INT64_C( 614896919360775558), -INT64_C( 284265650225361743), INT64_C( 1631172314283728834), INT64_C( 5716353073522496864), INT64_C( 8999391399028414570), INT64_C( 3814613149780040719), -INT64_C( 4953202734526544487), INT64_C( 4767771417161910021) }, { INT64_C( 5126806359902254559), INT64_C( 2672371312965145199), -INT64_C( 2844141002291010704), INT64_C( 2230932456099527132), -INT64_C( 6390064476090522414), INT64_C( 2878537624090872896), -INT64_C( 5445561303790566207), -INT64_C( 3489904893107888077) }, { INT64_C( 0), INT64_C( 2672371312965145199), INT64_C( 1631172314283728834), INT64_C( 5716353073522496864), INT64_C( 8999391399028414570), INT64_C( 0), -INT64_C( 4953202734526544487), INT64_C( 4767771417161910021) } }, { UINT8_C(130), { -INT64_C( 5149760434878162343), -INT64_C( 8410024495666997783), INT64_C( 7653612797919747466), -INT64_C( 7755419307346515584), INT64_C( 7130434581909215505), -INT64_C( 8823901891757185863), -INT64_C( 8663307170344210672), INT64_C( 8446717037667167593) }, { INT64_C( 3655135608368600285), -INT64_C( 1834444071780572406), INT64_C( 4340071271745262509), INT64_C( 4615372009170313012), INT64_C( 7986940857370000940), INT64_C( 8408218063844084211), INT64_C( 3179651257537720592), INT64_C( 5019210216756215811) }, { INT64_C( 0), -INT64_C( 1834444071780572406), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 8446717037667167593) } }, { UINT8_C(143), { -INT64_C( 3782633628547880386), -INT64_C( 1521082004566553898), -INT64_C( 1687144622622324945), -INT64_C( 2219352522735092526), INT64_C( 6898934679470024497), -INT64_C( 8782556560020516806), -INT64_C( 2112558692907286050), INT64_C( 2752184211040743340) }, { -INT64_C( 3762521568646160161), -INT64_C( 1079704945889903834), INT64_C( 8723584410143104287), INT64_C( 3328434238193702420), INT64_C( 5113379014405736858), INT64_C( 3701614834299958875), -INT64_C( 8202336425020942875), -INT64_C( 8681593259977805048) }, { -INT64_C( 3762521568646160161), -INT64_C( 1079704945889903834), INT64_C( 8723584410143104287), INT64_C( 3328434238193702420), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 2752184211040743340) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_maskz_max_epi64(test_vec[i].k, a, b); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_max_epu64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const uint64_t a[8]; const uint64_t b[8]; const uint64_t r[8]; } test_vec[] = { { { UINT64_C(14405647450865401052), UINT64_C(15069161228696060266), UINT64_C( 1509971145447934469), UINT64_C( 8101907096340504770), UINT64_C( 6164751603358090941), UINT64_C( 9383154700410950170), UINT64_C(18016976009369467443), UINT64_C( 2291622348271100360) }, { UINT64_C( 3206970931547568704), UINT64_C(16610341617040230789), UINT64_C(12085098299692660611), UINT64_C( 8937340941237469573), UINT64_C(16467737902515219530), UINT64_C( 7623021755386698802), UINT64_C( 1127830146735514912), UINT64_C(14219269727095073437) }, { UINT64_C(14405647450865401052), UINT64_C(16610341617040230789), UINT64_C(12085098299692660611), UINT64_C( 8937340941237469573), UINT64_C(16467737902515219530), UINT64_C( 9383154700410950170), UINT64_C(18016976009369467443), UINT64_C(14219269727095073437) } }, { { UINT64_C( 6210089108541471978), UINT64_C(11391298349110596807), UINT64_C(12917524493384956843), UINT64_C( 2607267771482651630), UINT64_C(14075891762244505820), UINT64_C( 4885709158955913905), UINT64_C(11424432347470654401), UINT64_C(15300194644856870904) }, { UINT64_C( 1244168190067852165), UINT64_C(18129817794156475583), UINT64_C(14323520279921431161), UINT64_C( 9962047057146990452), UINT64_C( 36678889460405521), UINT64_C( 5204175241816293891), UINT64_C(15895518007174171139), UINT64_C(17264136708841574408) }, { UINT64_C( 6210089108541471978), UINT64_C(18129817794156475583), UINT64_C(14323520279921431161), UINT64_C( 9962047057146990452), UINT64_C(14075891762244505820), UINT64_C( 5204175241816293891), UINT64_C(15895518007174171139), UINT64_C(17264136708841574408) } }, { { UINT64_C(17573013795190645149), UINT64_C( 3223565956230952868), UINT64_C( 7978010633431821683), UINT64_C( 2806887743663833127), UINT64_C(15751309145066001587), UINT64_C(11776923128482875163), UINT64_C( 3101912537289879095), UINT64_C( 5468536085979105077) }, { UINT64_C(16827848225631234424), UINT64_C( 1594447851292579405), UINT64_C( 665337386996375051), UINT64_C( 588752815020010311), UINT64_C(17098830368325704340), UINT64_C( 2309092160385546261), UINT64_C(14269491042304638762), UINT64_C( 1112056481645514710) }, { UINT64_C(17573013795190645149), UINT64_C( 3223565956230952868), UINT64_C( 7978010633431821683), UINT64_C( 2806887743663833127), UINT64_C(17098830368325704340), UINT64_C(11776923128482875163), UINT64_C(14269491042304638762), UINT64_C( 5468536085979105077) } }, { { UINT64_C( 8825010614149653995), UINT64_C(11054502478261248820), UINT64_C(15672700442101913658), UINT64_C(16354731852084225645), UINT64_C( 6391423864432060627), UINT64_C(15551222658663260873), UINT64_C( 8394166579517024418), UINT64_C( 4472729099770314040) }, { UINT64_C( 3402232465802559675), UINT64_C( 9637485374303950922), UINT64_C(11177276091413450914), UINT64_C( 9876356383904594534), UINT64_C(17938858413209978205), UINT64_C( 3954335932376701816), UINT64_C( 1940485961097874159), UINT64_C(17567974339967170679) }, { UINT64_C( 8825010614149653995), UINT64_C(11054502478261248820), UINT64_C(15672700442101913658), UINT64_C(16354731852084225645), UINT64_C(17938858413209978205), UINT64_C(15551222658663260873), UINT64_C( 8394166579517024418), UINT64_C(17567974339967170679) } }, { { UINT64_C( 5631656596421883520), UINT64_C(10794998180465936132), UINT64_C( 2549552700474240916), UINT64_C(14417488366027623820), UINT64_C( 8759253289225669483), UINT64_C( 6224224011284527397), UINT64_C(12205035486931994769), UINT64_C( 7448356734173431628) }, { UINT64_C(10924105908965889195), UINT64_C( 2272842877965085809), UINT64_C( 8417434579905554442), UINT64_C( 2803602349141564292), UINT64_C( 4162137255479578809), UINT64_C(17382759758752982157), UINT64_C(15617050106511530015), UINT64_C(16295502471031800707) }, { UINT64_C(10924105908965889195), UINT64_C(10794998180465936132), UINT64_C( 8417434579905554442), UINT64_C(14417488366027623820), UINT64_C( 8759253289225669483), UINT64_C(17382759758752982157), UINT64_C(15617050106511530015), UINT64_C(16295502471031800707) } }, { { UINT64_C(15768175752069868753), UINT64_C( 6254710672982425844), UINT64_C( 5906285979108238794), UINT64_C( 7072188056615276570), UINT64_C(17800706234978677473), UINT64_C(18131104183864196880), UINT64_C(12512143889682480005), UINT64_C( 5355929401625212000) }, { UINT64_C( 3549259957032996936), UINT64_C( 4083665022662284416), UINT64_C( 3932540599173629267), UINT64_C(16273894252460147748), UINT64_C( 5917287713101074892), UINT64_C( 102931529247987585), UINT64_C( 5584880430196717940), UINT64_C(17400418183870654975) }, { UINT64_C(15768175752069868753), UINT64_C( 6254710672982425844), UINT64_C( 5906285979108238794), UINT64_C(16273894252460147748), UINT64_C(17800706234978677473), UINT64_C(18131104183864196880), UINT64_C(12512143889682480005), UINT64_C(17400418183870654975) } }, { { UINT64_C(10748399147852670469), UINT64_C( 5711470167293832339), UINT64_C(11936539738650585904), UINT64_C( 4312961039629910724), UINT64_C( 5261958865101175133), UINT64_C( 4076547143300272231), UINT64_C( 811835713104456953), UINT64_C(10893589821946888891) }, { UINT64_C(16322749821918658439), UINT64_C(15644862852804973022), UINT64_C(11688457208457637859), UINT64_C( 9155749836091566399), UINT64_C( 242704158681732864), UINT64_C( 2092773298875761491), UINT64_C( 9241241581640975541), UINT64_C(10744190770404184997) }, { UINT64_C(16322749821918658439), UINT64_C(15644862852804973022), UINT64_C(11936539738650585904), UINT64_C( 9155749836091566399), UINT64_C( 5261958865101175133), UINT64_C( 4076547143300272231), UINT64_C( 9241241581640975541), UINT64_C(10893589821946888891) } }, { { UINT64_C( 7802607409231353631), UINT64_C(17098103143538831857), UINT64_C(12749631220573966126), UINT64_C( 1136992811779745949), UINT64_C( 4019072642946750272), UINT64_C( 4536438805688968654), UINT64_C(16642943881719938619), UINT64_C(17042992821668693125) }, { UINT64_C( 4914566798546229686), UINT64_C( 9060749055168845681), UINT64_C(10298812095332117693), UINT64_C(11067745496159421695), UINT64_C( 6565063991793999456), UINT64_C( 7071102926157735521), UINT64_C(11501442069804147974), UINT64_C( 9860035617323917400) }, { UINT64_C( 7802607409231353631), UINT64_C(17098103143538831857), UINT64_C(12749631220573966126), UINT64_C(11067745496159421695), UINT64_C( 6565063991793999456), UINT64_C( 7071102926157735521), UINT64_C(16642943881719938619), UINT64_C(17042992821668693125) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_max_epu64(a, b); simde_test_x86_assert_equal_u64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_max_epu64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const uint64_t src[8]; const simde__mmask8 k; const uint64_t a[8]; const uint64_t b[8]; const uint64_t r[8]; } test_vec[] = { { { UINT64_C(15079150202399567154), UINT64_C(11996494373043193425), UINT64_C( 4858319910155527770), UINT64_C( 972352950523515139), UINT64_C( 9273669397617985450), UINT64_C( 6895137495000279543), UINT64_C(17516062391167727514), UINT64_C( 9027402588605666910) }, UINT8_C( 14), { UINT64_C(18041063940208484277), UINT64_C(15042407688237443251), UINT64_C(18332944238196593279), UINT64_C(11731964907599537659), UINT64_C( 8987609023050681518), UINT64_C(10932316678593427918), UINT64_C( 2782115345532745719), UINT64_C( 7728943035109109428) }, { UINT64_C( 9226438237850586052), UINT64_C( 670224985266564510), UINT64_C(13034899466499912085), UINT64_C( 5393790103083929010), UINT64_C( 9733653087995233506), UINT64_C( 3362652464225287055), UINT64_C( 5130114089551221116), UINT64_C( 31683652269541024) }, { UINT64_C(15079150202399567154), UINT64_C(15042407688237443251), UINT64_C(18332944238196593279), UINT64_C(11731964907599537659), UINT64_C( 9273669397617985450), UINT64_C( 6895137495000279543), UINT64_C(17516062391167727514), UINT64_C( 9027402588605666910) } }, { { UINT64_C(13260189574033065877), UINT64_C( 5254016093149282851), UINT64_C( 5418383040647124820), UINT64_C( 980062302016497788), UINT64_C(17872975842067033384), UINT64_C(13467907369983042258), UINT64_C( 2192080535954959478), UINT64_C(16592195325271792829) }, UINT8_C(127), { UINT64_C(11759546520185354191), UINT64_C( 7284224645044752641), UINT64_C( 9149668071190247015), UINT64_C( 4804815730559955166), UINT64_C( 5854350008285004633), UINT64_C( 8197122012874885029), UINT64_C(14870507672627431998), UINT64_C( 1625144965279357655) }, { UINT64_C(12128455497450794927), UINT64_C( 9777535007936889397), UINT64_C(15947464007483874962), UINT64_C(12192288702057696864), UINT64_C( 545490614618743781), UINT64_C(17441305971535615802), UINT64_C(11040791336636937045), UINT64_C(18065883231876964371) }, { UINT64_C(12128455497450794927), UINT64_C( 9777535007936889397), UINT64_C(15947464007483874962), UINT64_C(12192288702057696864), UINT64_C( 5854350008285004633), UINT64_C(17441305971535615802), UINT64_C(14870507672627431998), UINT64_C(16592195325271792829) } }, { { UINT64_C(10495203897032007373), UINT64_C( 4205272321756622091), UINT64_C(12206669887595467155), UINT64_C(15221441089743756333), UINT64_C(11507005547386904778), UINT64_C( 8801554193032332806), UINT64_C(13147886965225929527), UINT64_C( 7107303191896206537) }, UINT8_C( 87), { UINT64_C(18100964383557124884), UINT64_C(18317085556504403605), UINT64_C(11752773760238157987), UINT64_C(11584276992475588918), UINT64_C(14938721689529069345), UINT64_C( 3702237685978116894), UINT64_C( 6492111642770532350), UINT64_C( 1491688678203282567) }, { UINT64_C(11547998559192908089), UINT64_C( 626389620384468462), UINT64_C( 6469868170235425866), UINT64_C( 6120989043794415850), UINT64_C( 486962808488418464), UINT64_C( 8082330919157154839), UINT64_C( 2924428514014766954), UINT64_C(13954112213641134392) }, { UINT64_C(18100964383557124884), UINT64_C(18317085556504403605), UINT64_C(11752773760238157987), UINT64_C(15221441089743756333), UINT64_C(14938721689529069345), UINT64_C( 8801554193032332806), UINT64_C( 6492111642770532350), UINT64_C( 7107303191896206537) } }, { { UINT64_C(10336506031410212436), UINT64_C(10808878990613346153), UINT64_C(13828135013600911234), UINT64_C( 4056257706092260712), UINT64_C( 4264090615561858342), UINT64_C( 4238391616941513998), UINT64_C( 8354143271978116009), UINT64_C(16135067853370687950) }, UINT8_C(115), { UINT64_C( 9778582161350557071), UINT64_C( 7187597396203794251), UINT64_C(16150720662526160755), UINT64_C( 5735466887821251806), UINT64_C(12188616764912164597), UINT64_C( 5961779504480216729), UINT64_C(16946139457334422381), UINT64_C( 3651198003916621213) }, { UINT64_C(16469655367179149703), UINT64_C( 4148089912404299358), UINT64_C(11249344253358650916), UINT64_C( 6766839682207067512), UINT64_C( 1113746667938756933), UINT64_C(14926580266168070432), UINT64_C(15469334059930397459), UINT64_C(10412167630026417995) }, { UINT64_C(16469655367179149703), UINT64_C( 7187597396203794251), UINT64_C(13828135013600911234), UINT64_C( 4056257706092260712), UINT64_C(12188616764912164597), UINT64_C(14926580266168070432), UINT64_C(16946139457334422381), UINT64_C(16135067853370687950) } }, { { UINT64_C(13037521338386924077), UINT64_C( 3152173500096068421), UINT64_C( 2856949971750403953), UINT64_C(15091220011794641043), UINT64_C( 7481214700885085834), UINT64_C(12113580427719439064), UINT64_C(15769385185188469460), UINT64_C( 8341273345579819341) }, UINT8_C(181), { UINT64_C(15413357944398975345), UINT64_C( 5656721194440579222), UINT64_C( 3140818780600676653), UINT64_C(13475764358446679847), UINT64_C(12777751299412908826), UINT64_C( 4813184810654457993), UINT64_C(17673570581272616975), UINT64_C(18207569383574952618) }, { UINT64_C(12965807433526704162), UINT64_C( 4217053884531690541), UINT64_C(15933902827174433116), UINT64_C(14830775423911159026), UINT64_C( 5032203140213722104), UINT64_C( 6893617963061478982), UINT64_C( 9885308373498002974), UINT64_C( 8612906137515065359) }, { UINT64_C(15413357944398975345), UINT64_C( 3152173500096068421), UINT64_C(15933902827174433116), UINT64_C(15091220011794641043), UINT64_C(12777751299412908826), UINT64_C( 6893617963061478982), UINT64_C(15769385185188469460), UINT64_C(18207569383574952618) } }, { { UINT64_C(15265355061528203899), UINT64_C(10149125018022601077), UINT64_C( 2021567634450834157), UINT64_C( 1730612183287884813), UINT64_C( 9390151511762050544), UINT64_C( 8134295338509571303), UINT64_C(15735299803182383157), UINT64_C( 9521852691968832879) }, UINT8_C( 8), { UINT64_C(16518242809352028624), UINT64_C( 3790017080827875985), UINT64_C( 8016648382363851725), UINT64_C( 4662500432227290177), UINT64_C(17347534123791927432), UINT64_C(14703387462753003108), UINT64_C( 2986129441964166599), UINT64_C( 9428437529299088168) }, { UINT64_C(11862790659114757714), UINT64_C(12036583450803500095), UINT64_C(12368601479159260821), UINT64_C( 6271574766159953036), UINT64_C(13722091476665001354), UINT64_C( 515106296207043230), UINT64_C( 8420372200233946796), UINT64_C(14268534173768294311) }, { UINT64_C(15265355061528203899), UINT64_C(10149125018022601077), UINT64_C( 2021567634450834157), UINT64_C( 6271574766159953036), UINT64_C( 9390151511762050544), UINT64_C( 8134295338509571303), UINT64_C(15735299803182383157), UINT64_C( 9521852691968832879) } }, { { UINT64_C( 6099843906370800745), UINT64_C( 6080977323875803881), UINT64_C(13412387178399721671), UINT64_C(10051869590686145918), UINT64_C(17906621146379522167), UINT64_C( 1421088320658887611), UINT64_C( 1832371980796509344), UINT64_C(13091773068631790337) }, UINT8_C( 98), { UINT64_C(13184122309087191586), UINT64_C( 5698765551812369342), UINT64_C( 701439175578798551), UINT64_C(12793033908292149461), UINT64_C( 2520016210279398110), UINT64_C(16691094554133723712), UINT64_C( 7257091820740578423), UINT64_C(15672269395207192126) }, { UINT64_C(10490592523055688720), UINT64_C( 5982054485007281677), UINT64_C( 4829747781398734354), UINT64_C(13224978256132870836), UINT64_C(14042155147592442620), UINT64_C( 6637992811178214383), UINT64_C( 9930442493608730249), UINT64_C( 7851393113686335894) }, { UINT64_C( 6099843906370800745), UINT64_C( 5982054485007281677), UINT64_C(13412387178399721671), UINT64_C(10051869590686145918), UINT64_C(17906621146379522167), UINT64_C(16691094554133723712), UINT64_C( 9930442493608730249), UINT64_C(13091773068631790337) } }, { { UINT64_C(15121567895206371427), UINT64_C( 2667181685818112348), UINT64_C(11184833735020380634), UINT64_C(10683045405007573348), UINT64_C(17339288014067399662), UINT64_C(15276380338257346111), UINT64_C(11113682348762444699), UINT64_C(14984232292701836076) }, UINT8_C(170), { UINT64_C( 9179795368013897631), UINT64_C(17652788107595563206), UINT64_C(16841427232288840656), UINT64_C(12073192964517140018), UINT64_C(13904919900873149240), UINT64_C(10279215180359430514), UINT64_C(14444681820091568566), UINT64_C( 1339599723839504615) }, { UINT64_C( 701298851474793473), UINT64_C(12412031583756540916), UINT64_C( 1338605017375463733), UINT64_C( 8491988394434510318), UINT64_C(15825135513782740222), UINT64_C(17630129911493384711), UINT64_C( 9179552724599956263), UINT64_C(18408217190605416335) }, { UINT64_C(15121567895206371427), UINT64_C(17652788107595563206), UINT64_C(11184833735020380634), UINT64_C(12073192964517140018), UINT64_C(17339288014067399662), UINT64_C(17630129911493384711), UINT64_C(11113682348762444699), UINT64_C(18408217190605416335) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi64(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_mask_max_epu64(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_u64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_max_epu64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const uint64_t a[8]; const uint64_t b[8]; const uint64_t r[8]; } test_vec[] = { { UINT8_C(126), { UINT64_C( 1027662572529108014), UINT64_C( 3982020794999916420), UINT64_C( 252009961771637167), UINT64_C( 6944830119902470000), UINT64_C( 8177651732383854531), UINT64_C( 6136279454117243937), UINT64_C( 3190361335639665290), UINT64_C( 4639110574336016079) }, { UINT64_C(17706792351699958109), UINT64_C(16560034190430038741), UINT64_C( 171540522247766650), UINT64_C( 4299642270912835566), UINT64_C( 6280340608305682526), UINT64_C(10982772191809179434), UINT64_C(17409841952036131687), UINT64_C( 7185219446340624287) }, { UINT64_C( 0), UINT64_C(16560034190430038741), UINT64_C( 252009961771637167), UINT64_C( 6944830119902470000), UINT64_C( 8177651732383854531), UINT64_C(10982772191809179434), UINT64_C(17409841952036131687), UINT64_C( 0) } }, { UINT8_C( 95), { UINT64_C(14746537515878028604), UINT64_C( 2961542913226124044), UINT64_C(17158746807588402001), UINT64_C(10439438857185500281), UINT64_C( 537533619700089323), UINT64_C( 3863756309488230623), UINT64_C( 7116486656671533956), UINT64_C(17750869812158051699) }, { UINT64_C(17835685094218491012), UINT64_C(10704785900324011637), UINT64_C(10313288350108698069), UINT64_C( 5323445825086816990), UINT64_C( 729949913378946834), UINT64_C( 7867492332007034251), UINT64_C(11449077962828912184), UINT64_C(17896011782137788749) }, { UINT64_C(17835685094218491012), UINT64_C(10704785900324011637), UINT64_C(17158746807588402001), UINT64_C(10439438857185500281), UINT64_C( 729949913378946834), UINT64_C( 0), UINT64_C(11449077962828912184), UINT64_C( 0) } }, { UINT8_C(231), { UINT64_C(15058776303739806243), UINT64_C( 6490766529442387433), UINT64_C( 9482530545143998208), UINT64_C( 2994157107972582207), UINT64_C( 8618082702921894277), UINT64_C(15440704395747197226), UINT64_C( 6385181889134682574), UINT64_C(17119462463658395236) }, { UINT64_C(17049308509582536341), UINT64_C( 2520927636245114448), UINT64_C( 4320596734292729220), UINT64_C( 1455571422874629085), UINT64_C( 9806538951819323752), UINT64_C(10968703895700697793), UINT64_C( 2871091262402163655), UINT64_C( 2428178768665017886) }, { UINT64_C(17049308509582536341), UINT64_C( 6490766529442387433), UINT64_C( 9482530545143998208), UINT64_C( 0), UINT64_C( 0), UINT64_C(15440704395747197226), UINT64_C( 6385181889134682574), UINT64_C(17119462463658395236) } }, { UINT8_C(136), { UINT64_C( 6232498531844771050), UINT64_C( 4496566926057313270), UINT64_C( 6665802288877536568), UINT64_C(13421913059590741532), UINT64_C( 4845298489065145475), UINT64_C(16398533863126902665), UINT64_C(16684367445016058704), UINT64_C( 6372847278295785445) }, { UINT64_C( 7384633303577389291), UINT64_C(18136363674212458276), UINT64_C(11825242876091692905), UINT64_C(10340487550843141714), UINT64_C( 6986845799318012082), UINT64_C( 7586842434398564770), UINT64_C( 8663440408587367128), UINT64_C( 1273148012031415415) }, { UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C(13421913059590741532), UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C( 6372847278295785445) } }, { UINT8_C(225), { UINT64_C( 5285113357058023794), UINT64_C( 7018212331969160621), UINT64_C(14663719519278070863), UINT64_C(13304261424280411040), UINT64_C(16915175217715658717), UINT64_C( 701397955142748011), UINT64_C(16274761387061887705), UINT64_C( 2101567219713574188) }, { UINT64_C(14568108359905613828), UINT64_C( 6565086627270796376), UINT64_C(16808637488467487777), UINT64_C( 3434423712426485323), UINT64_C(16521345885245815582), UINT64_C( 7718345448690772800), UINT64_C( 1865917201317201982), UINT64_C(16524355500467569144) }, { UINT64_C(14568108359905613828), UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C( 7718345448690772800), UINT64_C(16274761387061887705), UINT64_C(16524355500467569144) } }, { UINT8_C(137), { UINT64_C( 1486182007407207106), UINT64_C( 451144455750050289), UINT64_C( 7251056137478618775), UINT64_C(15593732495406515090), UINT64_C( 7652133053253779059), UINT64_C(17347214139548602424), UINT64_C( 1257974888838828525), UINT64_C( 2732094745310437885) }, { UINT64_C( 3510965994342457815), UINT64_C(14145423620710999812), UINT64_C( 2145202301845235509), UINT64_C(16556105305213154795), UINT64_C( 9035608956746401084), UINT64_C( 2571601493381302805), UINT64_C(13496897546967549200), UINT64_C(15295662050699148881) }, { UINT64_C( 3510965994342457815), UINT64_C( 0), UINT64_C( 0), UINT64_C(16556105305213154795), UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C(15295662050699148881) } }, { UINT8_C( 33), { UINT64_C(13222032409913591930), UINT64_C(11469762365481118451), UINT64_C( 2921757168886570143), UINT64_C(12447831939719752834), UINT64_C( 7260247287519336862), UINT64_C(14823787487486306046), UINT64_C(15338298609045612297), UINT64_C(15015055251481992577) }, { UINT64_C( 603653916778526090), UINT64_C(10880654425188327683), UINT64_C( 2172466323090179841), UINT64_C( 6108624688056998083), UINT64_C( 4663671193139716519), UINT64_C(13021438911752362385), UINT64_C(16731766677901344632), UINT64_C(15988803656344274117) }, { UINT64_C(13222032409913591930), UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C(14823787487486306046), UINT64_C( 0), UINT64_C( 0) } }, { UINT8_C( 69), { UINT64_C( 2266932696683023667), UINT64_C( 5268796264959611957), UINT64_C( 7746005982826607501), UINT64_C( 2602657211870154293), UINT64_C(10984698831648104459), UINT64_C( 6314994226182374161), UINT64_C(11412843190501315216), UINT64_C(10139174519801578399) }, { UINT64_C( 5418198939811626293), UINT64_C( 6751821412729974930), UINT64_C(17609837972454425691), UINT64_C(11801464494138644921), UINT64_C( 9690461018278110710), UINT64_C(14197763210179977694), UINT64_C(14818227143754472795), UINT64_C( 6401249366518474948) }, { UINT64_C( 5418198939811626293), UINT64_C( 0), UINT64_C(17609837972454425691), UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C(14818227143754472795), UINT64_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_maskz_max_epu64(test_vec[i].k, a, b); simde_test_x86_assert_equal_u64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_max_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 b[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( -467.49), SIMDE_FLOAT32_C( 68.96), SIMDE_FLOAT32_C( 93.32), SIMDE_FLOAT32_C( -192.23), SIMDE_FLOAT32_C( 206.98), SIMDE_FLOAT32_C( -60.64), SIMDE_FLOAT32_C( 236.48), SIMDE_FLOAT32_C( -938.37), SIMDE_FLOAT32_C( 629.22), SIMDE_FLOAT32_C( -771.32), SIMDE_FLOAT32_C( -922.17), SIMDE_FLOAT32_C( -910.14), SIMDE_FLOAT32_C( -100.30), SIMDE_FLOAT32_C( 480.89), SIMDE_FLOAT32_C( -423.93), SIMDE_FLOAT32_C( 266.34) }, { SIMDE_FLOAT32_C( -755.61), SIMDE_FLOAT32_C( -797.61), SIMDE_FLOAT32_C( -135.50), SIMDE_FLOAT32_C( -267.60), SIMDE_FLOAT32_C( -951.29), SIMDE_FLOAT32_C( 951.42), SIMDE_FLOAT32_C( 213.79), SIMDE_FLOAT32_C( 234.83), SIMDE_FLOAT32_C( 263.79), SIMDE_FLOAT32_C( 144.04), SIMDE_FLOAT32_C( 457.99), SIMDE_FLOAT32_C( -680.15), SIMDE_FLOAT32_C( -615.84), SIMDE_FLOAT32_C( 601.67), SIMDE_FLOAT32_C( 458.35), SIMDE_FLOAT32_C( -83.34) }, { SIMDE_FLOAT32_C( -467.49), SIMDE_FLOAT32_C( 68.96), SIMDE_FLOAT32_C( 93.32), SIMDE_FLOAT32_C( -192.23), SIMDE_FLOAT32_C( 206.98), SIMDE_FLOAT32_C( 951.42), SIMDE_FLOAT32_C( 236.48), SIMDE_FLOAT32_C( 234.83), SIMDE_FLOAT32_C( 629.22), SIMDE_FLOAT32_C( 144.04), SIMDE_FLOAT32_C( 457.99), SIMDE_FLOAT32_C( -680.15), SIMDE_FLOAT32_C( -100.30), SIMDE_FLOAT32_C( 601.67), SIMDE_FLOAT32_C( 458.35), SIMDE_FLOAT32_C( 266.34) } }, { { SIMDE_FLOAT32_C( -329.37), SIMDE_FLOAT32_C( -448.33), SIMDE_FLOAT32_C( 724.43), SIMDE_FLOAT32_C( 877.61), SIMDE_FLOAT32_C( 491.03), SIMDE_FLOAT32_C( -39.09), SIMDE_FLOAT32_C( 939.24), SIMDE_FLOAT32_C( 120.25), SIMDE_FLOAT32_C( 189.59), SIMDE_FLOAT32_C( -982.93), SIMDE_FLOAT32_C( 210.11), SIMDE_FLOAT32_C( -910.71), SIMDE_FLOAT32_C( 497.97), SIMDE_FLOAT32_C( 786.19), SIMDE_FLOAT32_C( 355.63), SIMDE_FLOAT32_C( 742.36) }, { SIMDE_FLOAT32_C( 988.58), SIMDE_FLOAT32_C( -779.87), SIMDE_FLOAT32_C( -525.24), SIMDE_FLOAT32_C( -962.71), SIMDE_FLOAT32_C( -828.45), SIMDE_FLOAT32_C( 688.56), SIMDE_FLOAT32_C( 272.12), SIMDE_FLOAT32_C( 435.34), SIMDE_FLOAT32_C( -167.41), SIMDE_FLOAT32_C( -269.90), SIMDE_FLOAT32_C( 755.19), SIMDE_FLOAT32_C( 216.75), SIMDE_FLOAT32_C( -668.23), SIMDE_FLOAT32_C( 213.55), SIMDE_FLOAT32_C( -866.59), SIMDE_FLOAT32_C( 2.41) }, { SIMDE_FLOAT32_C( 988.58), SIMDE_FLOAT32_C( -448.33), SIMDE_FLOAT32_C( 724.43), SIMDE_FLOAT32_C( 877.61), SIMDE_FLOAT32_C( 491.03), SIMDE_FLOAT32_C( 688.56), SIMDE_FLOAT32_C( 939.24), SIMDE_FLOAT32_C( 435.34), SIMDE_FLOAT32_C( 189.59), SIMDE_FLOAT32_C( -269.90), SIMDE_FLOAT32_C( 755.19), SIMDE_FLOAT32_C( 216.75), SIMDE_FLOAT32_C( 497.97), SIMDE_FLOAT32_C( 786.19), SIMDE_FLOAT32_C( 355.63), SIMDE_FLOAT32_C( 742.36) } }, { { SIMDE_FLOAT32_C( 765.22), SIMDE_FLOAT32_C( 857.85), SIMDE_FLOAT32_C( -119.98), SIMDE_FLOAT32_C( 256.25), SIMDE_FLOAT32_C( -181.25), SIMDE_FLOAT32_C( -180.73), SIMDE_FLOAT32_C( -623.50), SIMDE_FLOAT32_C( -991.66), SIMDE_FLOAT32_C( -163.66), SIMDE_FLOAT32_C( 586.61), SIMDE_FLOAT32_C( -902.37), SIMDE_FLOAT32_C( -665.69), SIMDE_FLOAT32_C( 372.80), SIMDE_FLOAT32_C( 453.26), SIMDE_FLOAT32_C( -923.33), SIMDE_FLOAT32_C( 361.38) }, { SIMDE_FLOAT32_C( 673.39), SIMDE_FLOAT32_C( -448.57), SIMDE_FLOAT32_C( 398.67), SIMDE_FLOAT32_C( 844.95), SIMDE_FLOAT32_C( -760.02), SIMDE_FLOAT32_C( -329.21), SIMDE_FLOAT32_C( 280.29), SIMDE_FLOAT32_C( 72.58), SIMDE_FLOAT32_C( 400.89), SIMDE_FLOAT32_C( 35.48), SIMDE_FLOAT32_C( -710.67), SIMDE_FLOAT32_C( 732.67), SIMDE_FLOAT32_C( -750.97), SIMDE_FLOAT32_C( -577.26), SIMDE_FLOAT32_C( -264.92), SIMDE_FLOAT32_C( -985.75) }, { SIMDE_FLOAT32_C( 765.22), SIMDE_FLOAT32_C( 857.85), SIMDE_FLOAT32_C( 398.67), SIMDE_FLOAT32_C( 844.95), SIMDE_FLOAT32_C( -181.25), SIMDE_FLOAT32_C( -180.73), SIMDE_FLOAT32_C( 280.29), SIMDE_FLOAT32_C( 72.58), SIMDE_FLOAT32_C( 400.89), SIMDE_FLOAT32_C( 586.61), SIMDE_FLOAT32_C( -710.67), SIMDE_FLOAT32_C( 732.67), SIMDE_FLOAT32_C( 372.80), SIMDE_FLOAT32_C( 453.26), SIMDE_FLOAT32_C( -264.92), SIMDE_FLOAT32_C( 361.38) } }, { { SIMDE_FLOAT32_C( -719.41), SIMDE_FLOAT32_C( 615.10), SIMDE_FLOAT32_C( 270.50), SIMDE_FLOAT32_C( 99.34), SIMDE_FLOAT32_C( -565.63), SIMDE_FLOAT32_C( 647.00), SIMDE_FLOAT32_C( 107.68), SIMDE_FLOAT32_C( 270.71), SIMDE_FLOAT32_C( 233.61), SIMDE_FLOAT32_C( 205.31), SIMDE_FLOAT32_C( 605.02), SIMDE_FLOAT32_C( -393.59), SIMDE_FLOAT32_C( -341.43), SIMDE_FLOAT32_C( 681.68), SIMDE_FLOAT32_C( 967.80), SIMDE_FLOAT32_C( -668.04) }, { SIMDE_FLOAT32_C( -766.89), SIMDE_FLOAT32_C( 366.47), SIMDE_FLOAT32_C( -823.10), SIMDE_FLOAT32_C( -526.90), SIMDE_FLOAT32_C( -962.74), SIMDE_FLOAT32_C( 457.19), SIMDE_FLOAT32_C( 545.67), SIMDE_FLOAT32_C( 438.16), SIMDE_FLOAT32_C( -507.32), SIMDE_FLOAT32_C( 835.00), SIMDE_FLOAT32_C( 170.82), SIMDE_FLOAT32_C( -258.30), SIMDE_FLOAT32_C( -742.26), SIMDE_FLOAT32_C( 905.90), SIMDE_FLOAT32_C( -244.05), SIMDE_FLOAT32_C( -461.67) }, { SIMDE_FLOAT32_C( -719.41), SIMDE_FLOAT32_C( 615.10), SIMDE_FLOAT32_C( 270.50), SIMDE_FLOAT32_C( 99.34), SIMDE_FLOAT32_C( -565.63), SIMDE_FLOAT32_C( 647.00), SIMDE_FLOAT32_C( 545.67), SIMDE_FLOAT32_C( 438.16), SIMDE_FLOAT32_C( 233.61), SIMDE_FLOAT32_C( 835.00), SIMDE_FLOAT32_C( 605.02), SIMDE_FLOAT32_C( -258.30), SIMDE_FLOAT32_C( -341.43), SIMDE_FLOAT32_C( 905.90), SIMDE_FLOAT32_C( 967.80), SIMDE_FLOAT32_C( -461.67) } }, { { SIMDE_FLOAT32_C( 521.00), SIMDE_FLOAT32_C( -973.55), SIMDE_FLOAT32_C( 637.67), SIMDE_FLOAT32_C( 955.37), SIMDE_FLOAT32_C( 673.44), SIMDE_FLOAT32_C( -254.65), SIMDE_FLOAT32_C( 226.08), SIMDE_FLOAT32_C( -92.95), SIMDE_FLOAT32_C( 950.66), SIMDE_FLOAT32_C( -168.90), SIMDE_FLOAT32_C( 513.47), SIMDE_FLOAT32_C( -390.77), SIMDE_FLOAT32_C( -487.22), SIMDE_FLOAT32_C( 481.27), SIMDE_FLOAT32_C( -58.81), SIMDE_FLOAT32_C( -254.11) }, { SIMDE_FLOAT32_C( -152.26), SIMDE_FLOAT32_C( 118.09), SIMDE_FLOAT32_C( 218.99), SIMDE_FLOAT32_C( -115.00), SIMDE_FLOAT32_C( -424.72), SIMDE_FLOAT32_C( -235.34), SIMDE_FLOAT32_C( -676.84), SIMDE_FLOAT32_C( 67.96), SIMDE_FLOAT32_C( -400.33), SIMDE_FLOAT32_C( 493.98), SIMDE_FLOAT32_C( 809.66), SIMDE_FLOAT32_C( -142.59), SIMDE_FLOAT32_C( 399.88), SIMDE_FLOAT32_C( -434.39), SIMDE_FLOAT32_C( 395.74), SIMDE_FLOAT32_C( -79.11) }, { SIMDE_FLOAT32_C( 521.00), SIMDE_FLOAT32_C( 118.09), SIMDE_FLOAT32_C( 637.67), SIMDE_FLOAT32_C( 955.37), SIMDE_FLOAT32_C( 673.44), SIMDE_FLOAT32_C( -235.34), SIMDE_FLOAT32_C( 226.08), SIMDE_FLOAT32_C( 67.96), SIMDE_FLOAT32_C( 950.66), SIMDE_FLOAT32_C( 493.98), SIMDE_FLOAT32_C( 809.66), SIMDE_FLOAT32_C( -142.59), SIMDE_FLOAT32_C( 399.88), SIMDE_FLOAT32_C( 481.27), SIMDE_FLOAT32_C( 395.74), SIMDE_FLOAT32_C( -79.11) } }, { { SIMDE_FLOAT32_C( -407.94), SIMDE_FLOAT32_C( 33.41), SIMDE_FLOAT32_C( -123.74), SIMDE_FLOAT32_C( -734.49), SIMDE_FLOAT32_C( 778.76), SIMDE_FLOAT32_C( -897.66), SIMDE_FLOAT32_C( 172.56), SIMDE_FLOAT32_C( 729.42), SIMDE_FLOAT32_C( -66.56), SIMDE_FLOAT32_C( -313.97), SIMDE_FLOAT32_C( -661.35), SIMDE_FLOAT32_C( 446.22), SIMDE_FLOAT32_C( -832.70), SIMDE_FLOAT32_C( 279.83), SIMDE_FLOAT32_C( -807.89), SIMDE_FLOAT32_C( 15.04) }, { SIMDE_FLOAT32_C( -602.07), SIMDE_FLOAT32_C( 411.10), SIMDE_FLOAT32_C( 900.04), SIMDE_FLOAT32_C( -26.79), SIMDE_FLOAT32_C( -824.23), SIMDE_FLOAT32_C( -776.81), SIMDE_FLOAT32_C( -958.83), SIMDE_FLOAT32_C( -224.57), SIMDE_FLOAT32_C( 717.17), SIMDE_FLOAT32_C( 850.83), SIMDE_FLOAT32_C( 632.84), SIMDE_FLOAT32_C( 117.06), SIMDE_FLOAT32_C( -583.55), SIMDE_FLOAT32_C( 28.58), SIMDE_FLOAT32_C( -962.05), SIMDE_FLOAT32_C( 8.51) }, { SIMDE_FLOAT32_C( -407.94), SIMDE_FLOAT32_C( 411.10), SIMDE_FLOAT32_C( 900.04), SIMDE_FLOAT32_C( -26.79), SIMDE_FLOAT32_C( 778.76), SIMDE_FLOAT32_C( -776.81), SIMDE_FLOAT32_C( 172.56), SIMDE_FLOAT32_C( 729.42), SIMDE_FLOAT32_C( 717.17), SIMDE_FLOAT32_C( 850.83), SIMDE_FLOAT32_C( 632.84), SIMDE_FLOAT32_C( 446.22), SIMDE_FLOAT32_C( -583.55), SIMDE_FLOAT32_C( 279.83), SIMDE_FLOAT32_C( -807.89), SIMDE_FLOAT32_C( 15.04) } }, { { SIMDE_FLOAT32_C( -938.01), SIMDE_FLOAT32_C( -85.80), SIMDE_FLOAT32_C( 274.02), SIMDE_FLOAT32_C( 840.75), SIMDE_FLOAT32_C( 16.55), SIMDE_FLOAT32_C( -553.42), SIMDE_FLOAT32_C( 570.17), SIMDE_FLOAT32_C( 949.99), SIMDE_FLOAT32_C( 132.61), SIMDE_FLOAT32_C( 908.82), SIMDE_FLOAT32_C( 396.21), SIMDE_FLOAT32_C( 299.91), SIMDE_FLOAT32_C( 188.66), SIMDE_FLOAT32_C( 588.32), SIMDE_FLOAT32_C( -685.05), SIMDE_FLOAT32_C( 586.58) }, { SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( -785.02), SIMDE_FLOAT32_C( -440.21), SIMDE_FLOAT32_C( 175.19), SIMDE_FLOAT32_C( -561.82), SIMDE_FLOAT32_C( -399.04), SIMDE_FLOAT32_C( 950.62), SIMDE_FLOAT32_C( -844.65), SIMDE_FLOAT32_C( -548.21), SIMDE_FLOAT32_C( 583.46), SIMDE_FLOAT32_C( 272.41), SIMDE_FLOAT32_C( -131.76), SIMDE_FLOAT32_C( -387.96), SIMDE_FLOAT32_C( 310.36), SIMDE_FLOAT32_C( 876.75), SIMDE_FLOAT32_C( -325.97) }, { SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( -85.80), SIMDE_FLOAT32_C( 274.02), SIMDE_FLOAT32_C( 840.75), SIMDE_FLOAT32_C( 16.55), SIMDE_FLOAT32_C( -399.04), SIMDE_FLOAT32_C( 950.62), SIMDE_FLOAT32_C( 949.99), SIMDE_FLOAT32_C( 132.61), SIMDE_FLOAT32_C( 908.82), SIMDE_FLOAT32_C( 396.21), SIMDE_FLOAT32_C( 299.91), SIMDE_FLOAT32_C( 188.66), SIMDE_FLOAT32_C( 588.32), SIMDE_FLOAT32_C( 876.75), SIMDE_FLOAT32_C( 586.58) } }, { { SIMDE_FLOAT32_C( -775.44), SIMDE_FLOAT32_C( 150.76), SIMDE_FLOAT32_C( -485.21), SIMDE_FLOAT32_C( 241.11), SIMDE_FLOAT32_C( 597.34), SIMDE_FLOAT32_C( -915.04), SIMDE_FLOAT32_C( 191.10), SIMDE_FLOAT32_C( -270.05), SIMDE_FLOAT32_C( 993.78), SIMDE_FLOAT32_C( -412.69), SIMDE_FLOAT32_C( -970.14), SIMDE_FLOAT32_C( 182.44), SIMDE_FLOAT32_C( -824.37), SIMDE_FLOAT32_C( -655.20), SIMDE_FLOAT32_C( -230.98), SIMDE_FLOAT32_C( 175.06) }, { SIMDE_FLOAT32_C( -440.21), SIMDE_FLOAT32_C( 328.81), SIMDE_FLOAT32_C( -649.75), SIMDE_FLOAT32_C( -2.03), SIMDE_FLOAT32_C( 929.77), SIMDE_FLOAT32_C( -699.13), SIMDE_FLOAT32_C( 153.32), SIMDE_FLOAT32_C( -618.43), SIMDE_FLOAT32_C( 884.33), SIMDE_FLOAT32_C( -574.27), SIMDE_FLOAT32_C( 249.80), SIMDE_FLOAT32_C( -503.62), SIMDE_FLOAT32_C( 736.09), SIMDE_FLOAT32_C( 126.55), SIMDE_FLOAT32_C( 170.41), SIMDE_FLOAT32_C( 960.65) }, { SIMDE_FLOAT32_C( -440.21), SIMDE_FLOAT32_C( 328.81), SIMDE_FLOAT32_C( -485.21), SIMDE_FLOAT32_C( 241.11), SIMDE_FLOAT32_C( 929.77), SIMDE_FLOAT32_C( -699.13), SIMDE_FLOAT32_C( 191.10), SIMDE_FLOAT32_C( -270.05), SIMDE_FLOAT32_C( 993.78), SIMDE_FLOAT32_C( -412.69), SIMDE_FLOAT32_C( 249.80), SIMDE_FLOAT32_C( 182.44), SIMDE_FLOAT32_C( 736.09), SIMDE_FLOAT32_C( 126.55), SIMDE_FLOAT32_C( 170.41), SIMDE_FLOAT32_C( 960.65) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 r = simde_mm512_max_ps(a, b); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_max_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[16]; const simde__mmask8 k; const simde_float32 a[16]; const simde_float32 b[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( -88.50), SIMDE_FLOAT32_C( -78.42), SIMDE_FLOAT32_C( 962.39), SIMDE_FLOAT32_C( 987.97), SIMDE_FLOAT32_C( -302.96), SIMDE_FLOAT32_C( 654.54), SIMDE_FLOAT32_C( -803.66), SIMDE_FLOAT32_C( -72.57), SIMDE_FLOAT32_C( 33.69), SIMDE_FLOAT32_C( -319.65), SIMDE_FLOAT32_C( -278.97), SIMDE_FLOAT32_C( -83.08), SIMDE_FLOAT32_C( 959.63), SIMDE_FLOAT32_C( 923.05), SIMDE_FLOAT32_C( -533.10), SIMDE_FLOAT32_C( 171.10) }, UINT8_C(175), { SIMDE_FLOAT32_C( 55.01), SIMDE_FLOAT32_C( 862.08), SIMDE_FLOAT32_C( 740.08), SIMDE_FLOAT32_C( 496.31), SIMDE_FLOAT32_C( 640.80), SIMDE_FLOAT32_C( 251.55), SIMDE_FLOAT32_C( 355.76), SIMDE_FLOAT32_C( -259.95), SIMDE_FLOAT32_C( 393.94), SIMDE_FLOAT32_C( 515.66), SIMDE_FLOAT32_C( 507.69), SIMDE_FLOAT32_C( -319.00), SIMDE_FLOAT32_C( 639.33), SIMDE_FLOAT32_C( 388.01), SIMDE_FLOAT32_C( 592.50), SIMDE_FLOAT32_C( -439.09) }, { SIMDE_FLOAT32_C( 350.40), SIMDE_FLOAT32_C( 580.47), SIMDE_FLOAT32_C( 257.94), SIMDE_FLOAT32_C( 4.94), SIMDE_FLOAT32_C( 776.81), SIMDE_FLOAT32_C( -814.62), SIMDE_FLOAT32_C( -961.37), SIMDE_FLOAT32_C( -542.84), SIMDE_FLOAT32_C( -93.60), SIMDE_FLOAT32_C( -44.45), SIMDE_FLOAT32_C( -583.22), SIMDE_FLOAT32_C( -170.54), SIMDE_FLOAT32_C( 422.45), SIMDE_FLOAT32_C( 587.88), SIMDE_FLOAT32_C( 859.23), SIMDE_FLOAT32_C( -522.55) }, { SIMDE_FLOAT32_C( 350.40), SIMDE_FLOAT32_C( 862.08), SIMDE_FLOAT32_C( 740.08), SIMDE_FLOAT32_C( 496.31), SIMDE_FLOAT32_C( -302.96), SIMDE_FLOAT32_C( 251.55), SIMDE_FLOAT32_C( -803.66), SIMDE_FLOAT32_C( -259.95), SIMDE_FLOAT32_C( 33.69), SIMDE_FLOAT32_C( -319.65), SIMDE_FLOAT32_C( -278.97), SIMDE_FLOAT32_C( -83.08), SIMDE_FLOAT32_C( 959.63), SIMDE_FLOAT32_C( 923.05), SIMDE_FLOAT32_C( -533.10), SIMDE_FLOAT32_C( 171.10) } }, { { SIMDE_FLOAT32_C( 449.96), SIMDE_FLOAT32_C( 599.32), SIMDE_FLOAT32_C( 973.77), SIMDE_FLOAT32_C( 90.75), SIMDE_FLOAT32_C( -149.14), SIMDE_FLOAT32_C( 329.53), SIMDE_FLOAT32_C( 830.80), SIMDE_FLOAT32_C( -755.20), SIMDE_FLOAT32_C( -154.81), SIMDE_FLOAT32_C( 338.49), SIMDE_FLOAT32_C( -74.20), SIMDE_FLOAT32_C( -515.48), SIMDE_FLOAT32_C( -273.51), SIMDE_FLOAT32_C( -481.70), SIMDE_FLOAT32_C( 45.43), SIMDE_FLOAT32_C( -923.11) }, UINT8_C(180), { SIMDE_FLOAT32_C( -696.63), SIMDE_FLOAT32_C( 81.84), SIMDE_FLOAT32_C( 875.57), SIMDE_FLOAT32_C( -511.25), SIMDE_FLOAT32_C( 120.47), SIMDE_FLOAT32_C( -667.27), SIMDE_FLOAT32_C( 395.15), SIMDE_FLOAT32_C( -923.99), SIMDE_FLOAT32_C( -250.49), SIMDE_FLOAT32_C( -775.40), SIMDE_FLOAT32_C( 498.46), SIMDE_FLOAT32_C( -662.62), SIMDE_FLOAT32_C( -916.16), SIMDE_FLOAT32_C( 975.92), SIMDE_FLOAT32_C( 787.34), SIMDE_FLOAT32_C( 683.15) }, { SIMDE_FLOAT32_C( 949.68), SIMDE_FLOAT32_C( -121.91), SIMDE_FLOAT32_C( -465.99), SIMDE_FLOAT32_C( 279.22), SIMDE_FLOAT32_C( -291.11), SIMDE_FLOAT32_C( -221.19), SIMDE_FLOAT32_C( -875.60), SIMDE_FLOAT32_C( -952.62), SIMDE_FLOAT32_C( 704.61), SIMDE_FLOAT32_C( -391.08), SIMDE_FLOAT32_C( -226.12), SIMDE_FLOAT32_C( -777.10), SIMDE_FLOAT32_C( 654.35), SIMDE_FLOAT32_C( -149.23), SIMDE_FLOAT32_C( -678.33), SIMDE_FLOAT32_C( 957.72) }, { SIMDE_FLOAT32_C( 449.96), SIMDE_FLOAT32_C( 599.32), SIMDE_FLOAT32_C( 875.57), SIMDE_FLOAT32_C( 90.75), SIMDE_FLOAT32_C( 120.47), SIMDE_FLOAT32_C( -221.19), SIMDE_FLOAT32_C( 830.80), SIMDE_FLOAT32_C( -923.99), SIMDE_FLOAT32_C( -154.81), SIMDE_FLOAT32_C( 338.49), SIMDE_FLOAT32_C( -74.20), SIMDE_FLOAT32_C( -515.48), SIMDE_FLOAT32_C( -273.51), SIMDE_FLOAT32_C( -481.70), SIMDE_FLOAT32_C( 45.43), SIMDE_FLOAT32_C( -923.11) } }, { { SIMDE_FLOAT32_C( 932.61), SIMDE_FLOAT32_C( -802.76), SIMDE_FLOAT32_C( -553.53), SIMDE_FLOAT32_C( 53.07), SIMDE_FLOAT32_C( -470.04), SIMDE_FLOAT32_C( 841.61), SIMDE_FLOAT32_C( 129.09), SIMDE_FLOAT32_C( 279.47), SIMDE_FLOAT32_C( -933.78), SIMDE_FLOAT32_C( -372.45), SIMDE_FLOAT32_C( 616.85), SIMDE_FLOAT32_C( -849.95), SIMDE_FLOAT32_C( -396.53), SIMDE_FLOAT32_C( 404.19), SIMDE_FLOAT32_C( 833.21), SIMDE_FLOAT32_C( -446.85) }, UINT8_C( 58), { SIMDE_FLOAT32_C( -632.78), SIMDE_FLOAT32_C( 832.37), SIMDE_FLOAT32_C( -8.82), SIMDE_FLOAT32_C( 146.03), SIMDE_FLOAT32_C( 956.77), SIMDE_FLOAT32_C( 38.57), SIMDE_FLOAT32_C( -149.36), SIMDE_FLOAT32_C( -434.30), SIMDE_FLOAT32_C( 812.44), SIMDE_FLOAT32_C( 73.54), SIMDE_FLOAT32_C( -779.95), SIMDE_FLOAT32_C( -336.78), SIMDE_FLOAT32_C( 395.21), SIMDE_FLOAT32_C( -822.23), SIMDE_FLOAT32_C( -404.17), SIMDE_FLOAT32_C( 592.45) }, { SIMDE_FLOAT32_C( -375.77), SIMDE_FLOAT32_C( 648.90), SIMDE_FLOAT32_C( -877.58), SIMDE_FLOAT32_C( -534.15), SIMDE_FLOAT32_C( -222.01), SIMDE_FLOAT32_C( 401.89), SIMDE_FLOAT32_C( -467.94), SIMDE_FLOAT32_C( 405.54), SIMDE_FLOAT32_C( 18.74), SIMDE_FLOAT32_C( -317.88), SIMDE_FLOAT32_C( -990.99), SIMDE_FLOAT32_C( -577.06), SIMDE_FLOAT32_C( -484.68), SIMDE_FLOAT32_C( -437.84), SIMDE_FLOAT32_C( -294.78), SIMDE_FLOAT32_C( -117.46) }, { SIMDE_FLOAT32_C( 932.61), SIMDE_FLOAT32_C( 832.37), SIMDE_FLOAT32_C( -553.53), SIMDE_FLOAT32_C( 146.03), SIMDE_FLOAT32_C( 956.77), SIMDE_FLOAT32_C( 401.89), SIMDE_FLOAT32_C( 129.09), SIMDE_FLOAT32_C( 279.47), SIMDE_FLOAT32_C( -933.78), SIMDE_FLOAT32_C( -372.45), SIMDE_FLOAT32_C( 616.85), SIMDE_FLOAT32_C( -849.95), SIMDE_FLOAT32_C( -396.53), SIMDE_FLOAT32_C( 404.19), SIMDE_FLOAT32_C( 833.21), SIMDE_FLOAT32_C( -446.85) } }, { { SIMDE_FLOAT32_C( -605.48), SIMDE_FLOAT32_C( 696.41), SIMDE_FLOAT32_C( -971.43), SIMDE_FLOAT32_C( -648.70), SIMDE_FLOAT32_C( -265.03), SIMDE_FLOAT32_C( -120.79), SIMDE_FLOAT32_C( -83.01), SIMDE_FLOAT32_C( -452.58), SIMDE_FLOAT32_C( 952.75), SIMDE_FLOAT32_C( 137.04), SIMDE_FLOAT32_C( 210.63), SIMDE_FLOAT32_C( 347.96), SIMDE_FLOAT32_C( 314.80), SIMDE_FLOAT32_C( 806.46), SIMDE_FLOAT32_C( -59.59), SIMDE_FLOAT32_C( 939.04) }, UINT8_C( 95), { SIMDE_FLOAT32_C( 62.83), SIMDE_FLOAT32_C( -595.12), SIMDE_FLOAT32_C( -766.65), SIMDE_FLOAT32_C( -535.28), SIMDE_FLOAT32_C( -63.05), SIMDE_FLOAT32_C( 638.89), SIMDE_FLOAT32_C( 483.46), SIMDE_FLOAT32_C( 619.06), SIMDE_FLOAT32_C( 647.90), SIMDE_FLOAT32_C( 906.39), SIMDE_FLOAT32_C( -865.61), SIMDE_FLOAT32_C( -789.95), SIMDE_FLOAT32_C( -388.38), SIMDE_FLOAT32_C( 16.93), SIMDE_FLOAT32_C( -395.42), SIMDE_FLOAT32_C( -691.98) }, { SIMDE_FLOAT32_C( 45.50), SIMDE_FLOAT32_C( -44.13), SIMDE_FLOAT32_C( 42.99), SIMDE_FLOAT32_C( 924.71), SIMDE_FLOAT32_C( 872.86), SIMDE_FLOAT32_C( 590.41), SIMDE_FLOAT32_C( 877.46), SIMDE_FLOAT32_C( 9.90), SIMDE_FLOAT32_C( -198.96), SIMDE_FLOAT32_C( 225.42), SIMDE_FLOAT32_C( -675.30), SIMDE_FLOAT32_C( -392.50), SIMDE_FLOAT32_C( -834.17), SIMDE_FLOAT32_C( -736.26), SIMDE_FLOAT32_C( -937.14), SIMDE_FLOAT32_C( 228.66) }, { SIMDE_FLOAT32_C( 62.83), SIMDE_FLOAT32_C( -44.13), SIMDE_FLOAT32_C( 42.99), SIMDE_FLOAT32_C( 924.71), SIMDE_FLOAT32_C( 872.86), SIMDE_FLOAT32_C( -120.79), SIMDE_FLOAT32_C( 877.46), SIMDE_FLOAT32_C( -452.58), SIMDE_FLOAT32_C( 952.75), SIMDE_FLOAT32_C( 137.04), SIMDE_FLOAT32_C( 210.63), SIMDE_FLOAT32_C( 347.96), SIMDE_FLOAT32_C( 314.80), SIMDE_FLOAT32_C( 806.46), SIMDE_FLOAT32_C( -59.59), SIMDE_FLOAT32_C( 939.04) } }, { { SIMDE_FLOAT32_C( -331.37), SIMDE_FLOAT32_C( -703.79), SIMDE_FLOAT32_C( 693.38), SIMDE_FLOAT32_C( 605.57), SIMDE_FLOAT32_C( 935.10), SIMDE_FLOAT32_C( 176.83), SIMDE_FLOAT32_C( 224.64), SIMDE_FLOAT32_C( 583.00), SIMDE_FLOAT32_C( 83.23), SIMDE_FLOAT32_C( 359.02), SIMDE_FLOAT32_C( 793.05), SIMDE_FLOAT32_C( 694.84), SIMDE_FLOAT32_C( -624.05), SIMDE_FLOAT32_C( -602.37), SIMDE_FLOAT32_C( -997.13), SIMDE_FLOAT32_C( 421.45) }, UINT8_C( 17), { SIMDE_FLOAT32_C( 45.86), SIMDE_FLOAT32_C( 346.16), SIMDE_FLOAT32_C( 226.37), SIMDE_FLOAT32_C( -363.73), SIMDE_FLOAT32_C( 223.61), SIMDE_FLOAT32_C( -763.73), SIMDE_FLOAT32_C( 437.31), SIMDE_FLOAT32_C( -550.97), SIMDE_FLOAT32_C( -439.03), SIMDE_FLOAT32_C( -955.19), SIMDE_FLOAT32_C( -385.13), SIMDE_FLOAT32_C( -175.29), SIMDE_FLOAT32_C( -892.33), SIMDE_FLOAT32_C( 843.53), SIMDE_FLOAT32_C( 493.34), SIMDE_FLOAT32_C( -596.12) }, { SIMDE_FLOAT32_C( 536.91), SIMDE_FLOAT32_C( 98.91), SIMDE_FLOAT32_C( -661.01), SIMDE_FLOAT32_C( -286.26), SIMDE_FLOAT32_C( -676.45), SIMDE_FLOAT32_C( 921.98), SIMDE_FLOAT32_C( 796.97), SIMDE_FLOAT32_C( 682.58), SIMDE_FLOAT32_C( 715.04), SIMDE_FLOAT32_C( 491.81), SIMDE_FLOAT32_C( -941.47), SIMDE_FLOAT32_C( -887.33), SIMDE_FLOAT32_C( 494.68), SIMDE_FLOAT32_C( 479.98), SIMDE_FLOAT32_C( 466.17), SIMDE_FLOAT32_C( -459.46) }, { SIMDE_FLOAT32_C( 536.91), SIMDE_FLOAT32_C( -703.79), SIMDE_FLOAT32_C( 693.38), SIMDE_FLOAT32_C( 605.57), SIMDE_FLOAT32_C( 223.61), SIMDE_FLOAT32_C( 176.83), SIMDE_FLOAT32_C( 224.64), SIMDE_FLOAT32_C( 583.00), SIMDE_FLOAT32_C( 83.23), SIMDE_FLOAT32_C( 359.02), SIMDE_FLOAT32_C( 793.05), SIMDE_FLOAT32_C( 694.84), SIMDE_FLOAT32_C( -624.05), SIMDE_FLOAT32_C( -602.37), SIMDE_FLOAT32_C( -997.13), SIMDE_FLOAT32_C( 421.45) } }, { { SIMDE_FLOAT32_C( -173.87), SIMDE_FLOAT32_C( -307.46), SIMDE_FLOAT32_C( 176.81), SIMDE_FLOAT32_C( -950.25), SIMDE_FLOAT32_C( -71.19), SIMDE_FLOAT32_C( -385.88), SIMDE_FLOAT32_C( -501.22), SIMDE_FLOAT32_C( 489.78), SIMDE_FLOAT32_C( -341.06), SIMDE_FLOAT32_C( 113.65), SIMDE_FLOAT32_C( -685.50), SIMDE_FLOAT32_C( -233.39), SIMDE_FLOAT32_C( -42.82), SIMDE_FLOAT32_C( 807.84), SIMDE_FLOAT32_C( 170.49), SIMDE_FLOAT32_C( -505.92) }, UINT8_C(163), { SIMDE_FLOAT32_C( 509.48), SIMDE_FLOAT32_C( 207.82), SIMDE_FLOAT32_C( 230.31), SIMDE_FLOAT32_C( 431.47), SIMDE_FLOAT32_C( 4.79), SIMDE_FLOAT32_C( -87.12), SIMDE_FLOAT32_C( 146.50), SIMDE_FLOAT32_C( -503.40), SIMDE_FLOAT32_C( -28.59), SIMDE_FLOAT32_C( 259.17), SIMDE_FLOAT32_C( 991.27), SIMDE_FLOAT32_C( -548.61), SIMDE_FLOAT32_C( -274.65), SIMDE_FLOAT32_C( -468.19), SIMDE_FLOAT32_C( 277.53), SIMDE_FLOAT32_C( 417.89) }, { SIMDE_FLOAT32_C( 708.62), SIMDE_FLOAT32_C( 327.28), SIMDE_FLOAT32_C( -653.30), SIMDE_FLOAT32_C( -677.26), SIMDE_FLOAT32_C( 826.06), SIMDE_FLOAT32_C( 836.49), SIMDE_FLOAT32_C( -18.32), SIMDE_FLOAT32_C( -60.30), SIMDE_FLOAT32_C( -849.01), SIMDE_FLOAT32_C( 748.29), SIMDE_FLOAT32_C( 896.88), SIMDE_FLOAT32_C( 958.83), SIMDE_FLOAT32_C( -81.22), SIMDE_FLOAT32_C( -609.04), SIMDE_FLOAT32_C( -134.42), SIMDE_FLOAT32_C( -571.74) }, { SIMDE_FLOAT32_C( 708.62), SIMDE_FLOAT32_C( 327.28), SIMDE_FLOAT32_C( 176.81), SIMDE_FLOAT32_C( -950.25), SIMDE_FLOAT32_C( -71.19), SIMDE_FLOAT32_C( 836.49), SIMDE_FLOAT32_C( -501.22), SIMDE_FLOAT32_C( -60.30), SIMDE_FLOAT32_C( -341.06), SIMDE_FLOAT32_C( 113.65), SIMDE_FLOAT32_C( -685.50), SIMDE_FLOAT32_C( -233.39), SIMDE_FLOAT32_C( -42.82), SIMDE_FLOAT32_C( 807.84), SIMDE_FLOAT32_C( 170.49), SIMDE_FLOAT32_C( -505.92) } }, { { SIMDE_FLOAT32_C( 598.79), SIMDE_FLOAT32_C( -904.11), SIMDE_FLOAT32_C( 859.73), SIMDE_FLOAT32_C( -396.42), SIMDE_FLOAT32_C( 8.77), SIMDE_FLOAT32_C( 6.23), SIMDE_FLOAT32_C( 100.17), SIMDE_FLOAT32_C( 980.19), SIMDE_FLOAT32_C( -734.59), SIMDE_FLOAT32_C( 91.45), SIMDE_FLOAT32_C( -568.42), SIMDE_FLOAT32_C( -9.25), SIMDE_FLOAT32_C( 623.26), SIMDE_FLOAT32_C( 709.11), SIMDE_FLOAT32_C( -591.36), SIMDE_FLOAT32_C( 331.88) }, UINT8_C( 89), { SIMDE_FLOAT32_C( -244.65), SIMDE_FLOAT32_C( 654.62), SIMDE_FLOAT32_C( -137.56), SIMDE_FLOAT32_C( -408.16), SIMDE_FLOAT32_C( -363.71), SIMDE_FLOAT32_C( 802.14), SIMDE_FLOAT32_C( -257.18), SIMDE_FLOAT32_C( -615.42), SIMDE_FLOAT32_C( 699.03), SIMDE_FLOAT32_C( -298.35), SIMDE_FLOAT32_C( 303.36), SIMDE_FLOAT32_C( -910.01), SIMDE_FLOAT32_C( 567.23), SIMDE_FLOAT32_C( 731.63), SIMDE_FLOAT32_C( 688.78), SIMDE_FLOAT32_C( 663.12) }, { SIMDE_FLOAT32_C( 591.36), SIMDE_FLOAT32_C( -707.65), SIMDE_FLOAT32_C( -328.11), SIMDE_FLOAT32_C( -402.41), SIMDE_FLOAT32_C( 392.52), SIMDE_FLOAT32_C( -347.92), SIMDE_FLOAT32_C( -137.01), SIMDE_FLOAT32_C( -516.03), SIMDE_FLOAT32_C( 83.66), SIMDE_FLOAT32_C( 853.75), SIMDE_FLOAT32_C( -892.77), SIMDE_FLOAT32_C( -207.23), SIMDE_FLOAT32_C( -737.61), SIMDE_FLOAT32_C( 439.10), SIMDE_FLOAT32_C( 829.15), SIMDE_FLOAT32_C( 17.74) }, { SIMDE_FLOAT32_C( 591.36), SIMDE_FLOAT32_C( -904.11), SIMDE_FLOAT32_C( 859.73), SIMDE_FLOAT32_C( -402.41), SIMDE_FLOAT32_C( 392.52), SIMDE_FLOAT32_C( 6.23), SIMDE_FLOAT32_C( -137.01), SIMDE_FLOAT32_C( 980.19), SIMDE_FLOAT32_C( -734.59), SIMDE_FLOAT32_C( 91.45), SIMDE_FLOAT32_C( -568.42), SIMDE_FLOAT32_C( -9.25), SIMDE_FLOAT32_C( 623.26), SIMDE_FLOAT32_C( 709.11), SIMDE_FLOAT32_C( -591.36), SIMDE_FLOAT32_C( 331.88) } }, { { SIMDE_FLOAT32_C( 93.72), SIMDE_FLOAT32_C( -308.41), SIMDE_FLOAT32_C( 609.58), SIMDE_FLOAT32_C( 730.01), SIMDE_FLOAT32_C( -506.26), SIMDE_FLOAT32_C( -647.60), SIMDE_FLOAT32_C( -885.40), SIMDE_FLOAT32_C( -807.24), SIMDE_FLOAT32_C( 54.05), SIMDE_FLOAT32_C( 417.96), SIMDE_FLOAT32_C( -717.25), SIMDE_FLOAT32_C( -378.72), SIMDE_FLOAT32_C( 149.59), SIMDE_FLOAT32_C( 971.53), SIMDE_FLOAT32_C( -715.60), SIMDE_FLOAT32_C( -259.06) }, UINT8_C(249), { SIMDE_FLOAT32_C( -43.71), SIMDE_FLOAT32_C( 338.53), SIMDE_FLOAT32_C( 656.40), SIMDE_FLOAT32_C( 608.37), SIMDE_FLOAT32_C( -798.47), SIMDE_FLOAT32_C( -859.62), SIMDE_FLOAT32_C( -307.97), SIMDE_FLOAT32_C( -944.73), SIMDE_FLOAT32_C( -752.40), SIMDE_FLOAT32_C( 484.80), SIMDE_FLOAT32_C( -682.33), SIMDE_FLOAT32_C( 686.71), SIMDE_FLOAT32_C( 313.95), SIMDE_FLOAT32_C( 335.41), SIMDE_FLOAT32_C( -219.57), SIMDE_FLOAT32_C( -994.46) }, { SIMDE_FLOAT32_C( -55.01), SIMDE_FLOAT32_C( -489.56), SIMDE_FLOAT32_C( -500.73), SIMDE_FLOAT32_C( 297.39), SIMDE_FLOAT32_C( -374.96), SIMDE_FLOAT32_C( -307.96), SIMDE_FLOAT32_C( -648.56), SIMDE_FLOAT32_C( -957.00), SIMDE_FLOAT32_C( -25.21), SIMDE_FLOAT32_C( -27.28), SIMDE_FLOAT32_C( 192.58), SIMDE_FLOAT32_C( -53.69), SIMDE_FLOAT32_C( 257.13), SIMDE_FLOAT32_C( 933.53), SIMDE_FLOAT32_C( 210.19), SIMDE_FLOAT32_C( -786.58) }, { SIMDE_FLOAT32_C( -43.71), SIMDE_FLOAT32_C( -308.41), SIMDE_FLOAT32_C( 609.58), SIMDE_FLOAT32_C( 608.37), SIMDE_FLOAT32_C( -374.96), SIMDE_FLOAT32_C( -307.96), SIMDE_FLOAT32_C( -307.97), SIMDE_FLOAT32_C( -944.73), SIMDE_FLOAT32_C( 54.05), SIMDE_FLOAT32_C( 417.96), SIMDE_FLOAT32_C( -717.25), SIMDE_FLOAT32_C( -378.72), SIMDE_FLOAT32_C( 149.59), SIMDE_FLOAT32_C( 971.53), SIMDE_FLOAT32_C( -715.60), SIMDE_FLOAT32_C( -259.06) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 r = simde_mm512_mask_max_ps(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_maskz_max_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask16 k; const simde_float32 a[16]; const simde_float32 b[16]; const simde_float32 r[16]; } test_vec[] = { { UINT16_C(49115), { SIMDE_FLOAT32_C( 433.64), SIMDE_FLOAT32_C( 811.97), SIMDE_FLOAT32_C( -935.59), SIMDE_FLOAT32_C( -291.75), SIMDE_FLOAT32_C( -969.46), SIMDE_FLOAT32_C( 402.84), SIMDE_FLOAT32_C( -536.08), SIMDE_FLOAT32_C( -34.52), SIMDE_FLOAT32_C( 235.92), SIMDE_FLOAT32_C( -199.87), SIMDE_FLOAT32_C( 393.12), SIMDE_FLOAT32_C( -850.22), SIMDE_FLOAT32_C( -499.40), SIMDE_FLOAT32_C( -229.12), SIMDE_FLOAT32_C( 441.37), SIMDE_FLOAT32_C( -903.49) }, { SIMDE_FLOAT32_C( 235.24), SIMDE_FLOAT32_C( -719.15), SIMDE_FLOAT32_C( -316.51), SIMDE_FLOAT32_C( 336.59), SIMDE_FLOAT32_C( -863.10), SIMDE_FLOAT32_C( 919.24), SIMDE_FLOAT32_C( -654.44), SIMDE_FLOAT32_C( 266.97), SIMDE_FLOAT32_C( -701.10), SIMDE_FLOAT32_C( 297.71), SIMDE_FLOAT32_C( 440.40), SIMDE_FLOAT32_C( -385.85), SIMDE_FLOAT32_C( -935.58), SIMDE_FLOAT32_C( 821.31), SIMDE_FLOAT32_C( 136.40), SIMDE_FLOAT32_C( 498.06) }, { SIMDE_FLOAT32_C( 433.64), SIMDE_FLOAT32_C( 811.97), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 336.59), SIMDE_FLOAT32_C( -863.10), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -536.08), SIMDE_FLOAT32_C( 266.97), SIMDE_FLOAT32_C( 235.92), SIMDE_FLOAT32_C( 297.71), SIMDE_FLOAT32_C( 440.40), SIMDE_FLOAT32_C( -385.85), SIMDE_FLOAT32_C( -499.40), SIMDE_FLOAT32_C( 821.31), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 498.06) } }, { UINT16_C( 4724), { SIMDE_FLOAT32_C( -793.69), SIMDE_FLOAT32_C( 663.82), SIMDE_FLOAT32_C( -396.36), SIMDE_FLOAT32_C( -329.77), SIMDE_FLOAT32_C( -370.70), SIMDE_FLOAT32_C( 839.57), SIMDE_FLOAT32_C( 470.36), SIMDE_FLOAT32_C( -977.58), SIMDE_FLOAT32_C( 989.34), SIMDE_FLOAT32_C( 970.96), SIMDE_FLOAT32_C( -206.70), SIMDE_FLOAT32_C( 430.71), SIMDE_FLOAT32_C( -932.53), SIMDE_FLOAT32_C( -971.45), SIMDE_FLOAT32_C( 711.56), SIMDE_FLOAT32_C( -249.04) }, { SIMDE_FLOAT32_C( 365.13), SIMDE_FLOAT32_C( 848.47), SIMDE_FLOAT32_C( -329.80), SIMDE_FLOAT32_C( 710.69), SIMDE_FLOAT32_C( 115.44), SIMDE_FLOAT32_C( -30.90), SIMDE_FLOAT32_C( 8.40), SIMDE_FLOAT32_C( -444.16), SIMDE_FLOAT32_C( 583.25), SIMDE_FLOAT32_C( 72.82), SIMDE_FLOAT32_C( -622.85), SIMDE_FLOAT32_C( -280.35), SIMDE_FLOAT32_C( -429.13), SIMDE_FLOAT32_C( -989.57), SIMDE_FLOAT32_C( 920.46), SIMDE_FLOAT32_C( -222.82) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -329.80), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 115.44), SIMDE_FLOAT32_C( 839.57), SIMDE_FLOAT32_C( 470.36), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 970.96), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -429.13), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { UINT16_C(54631), { SIMDE_FLOAT32_C( 447.41), SIMDE_FLOAT32_C( -696.45), SIMDE_FLOAT32_C( -636.33), SIMDE_FLOAT32_C( -82.23), SIMDE_FLOAT32_C( -674.04), SIMDE_FLOAT32_C( -646.99), SIMDE_FLOAT32_C( -111.28), SIMDE_FLOAT32_C( 119.27), SIMDE_FLOAT32_C( 783.72), SIMDE_FLOAT32_C( -43.81), SIMDE_FLOAT32_C( 147.81), SIMDE_FLOAT32_C( 495.29), SIMDE_FLOAT32_C( 707.15), SIMDE_FLOAT32_C( -487.06), SIMDE_FLOAT32_C( 343.75), SIMDE_FLOAT32_C( -622.65) }, { SIMDE_FLOAT32_C( -776.37), SIMDE_FLOAT32_C( -540.80), SIMDE_FLOAT32_C( 346.45), SIMDE_FLOAT32_C( 232.03), SIMDE_FLOAT32_C( 15.04), SIMDE_FLOAT32_C( -70.30), SIMDE_FLOAT32_C( -695.16), SIMDE_FLOAT32_C( 392.19), SIMDE_FLOAT32_C( 649.35), SIMDE_FLOAT32_C( -124.29), SIMDE_FLOAT32_C( 402.62), SIMDE_FLOAT32_C( 569.81), SIMDE_FLOAT32_C( 652.89), SIMDE_FLOAT32_C( 76.87), SIMDE_FLOAT32_C( -906.09), SIMDE_FLOAT32_C( 100.30) }, { SIMDE_FLOAT32_C( 447.41), SIMDE_FLOAT32_C( -540.80), SIMDE_FLOAT32_C( 346.45), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -70.30), SIMDE_FLOAT32_C( -111.28), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 783.72), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 402.62), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 707.15), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 343.75), SIMDE_FLOAT32_C( 100.30) } }, { UINT16_C(65099), { SIMDE_FLOAT32_C( -981.93), SIMDE_FLOAT32_C( 706.38), SIMDE_FLOAT32_C( -189.41), SIMDE_FLOAT32_C( -93.21), SIMDE_FLOAT32_C( -174.35), SIMDE_FLOAT32_C( -405.68), SIMDE_FLOAT32_C( 862.98), SIMDE_FLOAT32_C( 973.46), SIMDE_FLOAT32_C( -910.40), SIMDE_FLOAT32_C( 570.13), SIMDE_FLOAT32_C( -513.60), SIMDE_FLOAT32_C( 433.36), SIMDE_FLOAT32_C( 947.48), SIMDE_FLOAT32_C( -289.97), SIMDE_FLOAT32_C( 892.56), SIMDE_FLOAT32_C( 293.93) }, { SIMDE_FLOAT32_C( 942.06), SIMDE_FLOAT32_C( -92.40), SIMDE_FLOAT32_C( -776.37), SIMDE_FLOAT32_C( -753.10), SIMDE_FLOAT32_C( -700.21), SIMDE_FLOAT32_C( 872.99), SIMDE_FLOAT32_C( 122.61), SIMDE_FLOAT32_C( 702.41), SIMDE_FLOAT32_C( 442.80), SIMDE_FLOAT32_C( -224.50), SIMDE_FLOAT32_C( -220.72), SIMDE_FLOAT32_C( 536.71), SIMDE_FLOAT32_C( 875.80), SIMDE_FLOAT32_C( -840.31), SIMDE_FLOAT32_C( 994.29), SIMDE_FLOAT32_C( 893.87) }, { SIMDE_FLOAT32_C( 942.06), SIMDE_FLOAT32_C( 706.38), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -93.21), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 862.98), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 570.13), SIMDE_FLOAT32_C( -220.72), SIMDE_FLOAT32_C( 536.71), SIMDE_FLOAT32_C( 947.48), SIMDE_FLOAT32_C( -289.97), SIMDE_FLOAT32_C( 994.29), SIMDE_FLOAT32_C( 893.87) } }, { UINT16_C(30402), { SIMDE_FLOAT32_C( -199.34), SIMDE_FLOAT32_C( -308.28), SIMDE_FLOAT32_C( 399.20), SIMDE_FLOAT32_C( -336.36), SIMDE_FLOAT32_C( -334.82), SIMDE_FLOAT32_C( 488.81), SIMDE_FLOAT32_C( -766.23), SIMDE_FLOAT32_C( 151.58), SIMDE_FLOAT32_C( -77.83), SIMDE_FLOAT32_C( -818.75), SIMDE_FLOAT32_C( 861.61), SIMDE_FLOAT32_C( -185.28), SIMDE_FLOAT32_C( 475.18), SIMDE_FLOAT32_C( 803.67), SIMDE_FLOAT32_C( 722.32), SIMDE_FLOAT32_C( 698.81) }, { SIMDE_FLOAT32_C( -949.43), SIMDE_FLOAT32_C( -977.90), SIMDE_FLOAT32_C( 571.80), SIMDE_FLOAT32_C( 173.18), SIMDE_FLOAT32_C( 724.51), SIMDE_FLOAT32_C( 14.60), SIMDE_FLOAT32_C( 948.68), SIMDE_FLOAT32_C( -496.21), SIMDE_FLOAT32_C( -448.69), SIMDE_FLOAT32_C( 824.48), SIMDE_FLOAT32_C( -336.52), SIMDE_FLOAT32_C( -454.40), SIMDE_FLOAT32_C( 718.35), SIMDE_FLOAT32_C( -470.45), SIMDE_FLOAT32_C( 350.48), SIMDE_FLOAT32_C( -480.99) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -308.28), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 948.68), SIMDE_FLOAT32_C( 151.58), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 824.48), SIMDE_FLOAT32_C( 861.61), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 718.35), SIMDE_FLOAT32_C( 803.67), SIMDE_FLOAT32_C( 722.32), SIMDE_FLOAT32_C( 0.00) } }, { UINT16_C(63759), { SIMDE_FLOAT32_C( 182.66), SIMDE_FLOAT32_C( 886.45), SIMDE_FLOAT32_C( -761.51), SIMDE_FLOAT32_C( 416.43), SIMDE_FLOAT32_C( 38.03), SIMDE_FLOAT32_C( 160.66), SIMDE_FLOAT32_C( 597.67), SIMDE_FLOAT32_C( -100.36), SIMDE_FLOAT32_C( 975.38), SIMDE_FLOAT32_C( 72.85), SIMDE_FLOAT32_C( -296.70), SIMDE_FLOAT32_C( 697.70), SIMDE_FLOAT32_C( -228.34), SIMDE_FLOAT32_C( -246.13), SIMDE_FLOAT32_C( 719.81), SIMDE_FLOAT32_C( -656.54) }, { SIMDE_FLOAT32_C( 927.05), SIMDE_FLOAT32_C( 444.32), SIMDE_FLOAT32_C( 358.06), SIMDE_FLOAT32_C( 875.72), SIMDE_FLOAT32_C( 948.10), SIMDE_FLOAT32_C( 909.36), SIMDE_FLOAT32_C( 700.21), SIMDE_FLOAT32_C( -388.42), SIMDE_FLOAT32_C( -545.04), SIMDE_FLOAT32_C( 418.56), SIMDE_FLOAT32_C( 141.13), SIMDE_FLOAT32_C( 805.45), SIMDE_FLOAT32_C( 937.57), SIMDE_FLOAT32_C( -637.60), SIMDE_FLOAT32_C( -444.87), SIMDE_FLOAT32_C( 120.23) }, { SIMDE_FLOAT32_C( 927.05), SIMDE_FLOAT32_C( 886.45), SIMDE_FLOAT32_C( 358.06), SIMDE_FLOAT32_C( 875.72), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 975.38), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 805.45), SIMDE_FLOAT32_C( 937.57), SIMDE_FLOAT32_C( -246.13), SIMDE_FLOAT32_C( 719.81), SIMDE_FLOAT32_C( 120.23) } }, { UINT16_C(33342), { SIMDE_FLOAT32_C( -463.34), SIMDE_FLOAT32_C( 286.87), SIMDE_FLOAT32_C( 954.28), SIMDE_FLOAT32_C( -865.67), SIMDE_FLOAT32_C( -813.49), SIMDE_FLOAT32_C( 929.67), SIMDE_FLOAT32_C( 207.18), SIMDE_FLOAT32_C( -110.18), SIMDE_FLOAT32_C( 627.37), SIMDE_FLOAT32_C( 978.84), SIMDE_FLOAT32_C( 643.69), SIMDE_FLOAT32_C( 347.17), SIMDE_FLOAT32_C( -677.70), SIMDE_FLOAT32_C( 570.73), SIMDE_FLOAT32_C( -208.51), SIMDE_FLOAT32_C( 680.36) }, { SIMDE_FLOAT32_C( 446.46), SIMDE_FLOAT32_C( -260.41), SIMDE_FLOAT32_C( 589.73), SIMDE_FLOAT32_C( 146.67), SIMDE_FLOAT32_C( 351.17), SIMDE_FLOAT32_C( -955.31), SIMDE_FLOAT32_C( -434.77), SIMDE_FLOAT32_C( -507.69), SIMDE_FLOAT32_C( 850.13), SIMDE_FLOAT32_C( -497.20), SIMDE_FLOAT32_C( -145.29), SIMDE_FLOAT32_C( -594.74), SIMDE_FLOAT32_C( 623.03), SIMDE_FLOAT32_C( 103.56), SIMDE_FLOAT32_C( 198.89), SIMDE_FLOAT32_C( -840.31) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 286.87), SIMDE_FLOAT32_C( 954.28), SIMDE_FLOAT32_C( 146.67), SIMDE_FLOAT32_C( 351.17), SIMDE_FLOAT32_C( 929.67), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 978.84), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 680.36) } }, { UINT16_C(27663), { SIMDE_FLOAT32_C( -705.98), SIMDE_FLOAT32_C( -423.06), SIMDE_FLOAT32_C( 82.84), SIMDE_FLOAT32_C( 501.20), SIMDE_FLOAT32_C( 466.76), SIMDE_FLOAT32_C( -289.79), SIMDE_FLOAT32_C( 480.05), SIMDE_FLOAT32_C( 110.45), SIMDE_FLOAT32_C( -942.62), SIMDE_FLOAT32_C( 802.35), SIMDE_FLOAT32_C( -318.82), SIMDE_FLOAT32_C( -151.13), SIMDE_FLOAT32_C( 482.71), SIMDE_FLOAT32_C( -872.36), SIMDE_FLOAT32_C( 588.47), SIMDE_FLOAT32_C( 72.44) }, { SIMDE_FLOAT32_C( 274.30), SIMDE_FLOAT32_C( -60.36), SIMDE_FLOAT32_C( 117.12), SIMDE_FLOAT32_C( 839.53), SIMDE_FLOAT32_C( 431.95), SIMDE_FLOAT32_C( -32.74), SIMDE_FLOAT32_C( -657.67), SIMDE_FLOAT32_C( -713.34), SIMDE_FLOAT32_C( 372.52), SIMDE_FLOAT32_C( 965.36), SIMDE_FLOAT32_C( 390.22), SIMDE_FLOAT32_C( -428.59), SIMDE_FLOAT32_C( -874.95), SIMDE_FLOAT32_C( 780.65), SIMDE_FLOAT32_C( 724.58), SIMDE_FLOAT32_C( -580.93) }, { SIMDE_FLOAT32_C( 274.30), SIMDE_FLOAT32_C( -60.36), SIMDE_FLOAT32_C( 117.12), SIMDE_FLOAT32_C( 839.53), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 390.22), SIMDE_FLOAT32_C( -151.13), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 780.65), SIMDE_FLOAT32_C( 724.58), SIMDE_FLOAT32_C( 0.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 r = simde_mm512_maskz_max_ps(test_vec[i].k, a, b); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_max_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 b[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 117.27), SIMDE_FLOAT64_C( 605.12), SIMDE_FLOAT64_C( -94.57), SIMDE_FLOAT64_C( -717.29), SIMDE_FLOAT64_C( -4.92), SIMDE_FLOAT64_C( -587.44), SIMDE_FLOAT64_C( 507.51), SIMDE_FLOAT64_C( 596.61) }, { SIMDE_FLOAT64_C( 821.81), SIMDE_FLOAT64_C( 612.82), SIMDE_FLOAT64_C( 712.15), SIMDE_FLOAT64_C( 612.33), SIMDE_FLOAT64_C( -249.83), SIMDE_FLOAT64_C( -211.57), SIMDE_FLOAT64_C( -312.67), SIMDE_FLOAT64_C( 671.52) }, { SIMDE_FLOAT64_C( 821.81), SIMDE_FLOAT64_C( 612.82), SIMDE_FLOAT64_C( 712.15), SIMDE_FLOAT64_C( 612.33), SIMDE_FLOAT64_C( -4.92), SIMDE_FLOAT64_C( -211.57), SIMDE_FLOAT64_C( 507.51), SIMDE_FLOAT64_C( 671.52) } }, { { SIMDE_FLOAT64_C( 418.12), SIMDE_FLOAT64_C( -94.27), SIMDE_FLOAT64_C( 381.31), SIMDE_FLOAT64_C( 262.88), SIMDE_FLOAT64_C( -485.88), SIMDE_FLOAT64_C( -131.08), SIMDE_FLOAT64_C( -132.09), SIMDE_FLOAT64_C( -583.59) }, { SIMDE_FLOAT64_C( -922.36), SIMDE_FLOAT64_C( 502.23), SIMDE_FLOAT64_C( 540.91), SIMDE_FLOAT64_C( -336.70), SIMDE_FLOAT64_C( -809.27), SIMDE_FLOAT64_C( 810.18), SIMDE_FLOAT64_C( 666.45), SIMDE_FLOAT64_C( 308.00) }, { SIMDE_FLOAT64_C( 418.12), SIMDE_FLOAT64_C( 502.23), SIMDE_FLOAT64_C( 540.91), SIMDE_FLOAT64_C( 262.88), SIMDE_FLOAT64_C( -485.88), SIMDE_FLOAT64_C( 810.18), SIMDE_FLOAT64_C( 666.45), SIMDE_FLOAT64_C( 308.00) } }, { { SIMDE_FLOAT64_C( 415.30), SIMDE_FLOAT64_C( -428.12), SIMDE_FLOAT64_C( 590.71), SIMDE_FLOAT64_C( -589.62), SIMDE_FLOAT64_C( -15.56), SIMDE_FLOAT64_C( 98.21), SIMDE_FLOAT64_C( -993.01), SIMDE_FLOAT64_C( -193.75) }, { SIMDE_FLOAT64_C( -288.97), SIMDE_FLOAT64_C( 719.14), SIMDE_FLOAT64_C( -581.43), SIMDE_FLOAT64_C( 461.20), SIMDE_FLOAT64_C( -492.43), SIMDE_FLOAT64_C( 105.90), SIMDE_FLOAT64_C( 132.72), SIMDE_FLOAT64_C( 925.69) }, { SIMDE_FLOAT64_C( 415.30), SIMDE_FLOAT64_C( 719.14), SIMDE_FLOAT64_C( 590.71), SIMDE_FLOAT64_C( 461.20), SIMDE_FLOAT64_C( -15.56), SIMDE_FLOAT64_C( 105.90), SIMDE_FLOAT64_C( 132.72), SIMDE_FLOAT64_C( 925.69) } }, { { SIMDE_FLOAT64_C( -988.37), SIMDE_FLOAT64_C( -485.97), SIMDE_FLOAT64_C( 188.58), SIMDE_FLOAT64_C( -474.25), SIMDE_FLOAT64_C( 382.95), SIMDE_FLOAT64_C( -943.52), SIMDE_FLOAT64_C( -57.85), SIMDE_FLOAT64_C( 460.59) }, { SIMDE_FLOAT64_C( 558.72), SIMDE_FLOAT64_C( -516.94), SIMDE_FLOAT64_C( -876.11), SIMDE_FLOAT64_C( 749.44), SIMDE_FLOAT64_C( -706.75), SIMDE_FLOAT64_C( 790.34), SIMDE_FLOAT64_C( 57.44), SIMDE_FLOAT64_C( 708.55) }, { SIMDE_FLOAT64_C( 558.72), SIMDE_FLOAT64_C( -485.97), SIMDE_FLOAT64_C( 188.58), SIMDE_FLOAT64_C( 749.44), SIMDE_FLOAT64_C( 382.95), SIMDE_FLOAT64_C( 790.34), SIMDE_FLOAT64_C( 57.44), SIMDE_FLOAT64_C( 708.55) } }, { { SIMDE_FLOAT64_C( -637.79), SIMDE_FLOAT64_C( -351.85), SIMDE_FLOAT64_C( -881.08), SIMDE_FLOAT64_C( 346.65), SIMDE_FLOAT64_C( 746.36), SIMDE_FLOAT64_C( -874.09), SIMDE_FLOAT64_C( -847.10), SIMDE_FLOAT64_C( -542.61) }, { SIMDE_FLOAT64_C( 845.05), SIMDE_FLOAT64_C( -428.53), SIMDE_FLOAT64_C( 918.60), SIMDE_FLOAT64_C( -647.38), SIMDE_FLOAT64_C( 677.37), SIMDE_FLOAT64_C( 51.31), SIMDE_FLOAT64_C( -721.68), SIMDE_FLOAT64_C( 689.00) }, { SIMDE_FLOAT64_C( 845.05), SIMDE_FLOAT64_C( -351.85), SIMDE_FLOAT64_C( 918.60), SIMDE_FLOAT64_C( 346.65), SIMDE_FLOAT64_C( 746.36), SIMDE_FLOAT64_C( 51.31), SIMDE_FLOAT64_C( -721.68), SIMDE_FLOAT64_C( 689.00) } }, { { SIMDE_FLOAT64_C( 565.34), SIMDE_FLOAT64_C( 466.89), SIMDE_FLOAT64_C( -785.25), SIMDE_FLOAT64_C( -51.71), SIMDE_FLOAT64_C( 523.38), SIMDE_FLOAT64_C( 156.90), SIMDE_FLOAT64_C( -591.12), SIMDE_FLOAT64_C( 82.09) }, { SIMDE_FLOAT64_C( 639.96), SIMDE_FLOAT64_C( -467.23), SIMDE_FLOAT64_C( -168.46), SIMDE_FLOAT64_C( 933.21), SIMDE_FLOAT64_C( -676.90), SIMDE_FLOAT64_C( 888.98), SIMDE_FLOAT64_C( 641.75), SIMDE_FLOAT64_C( -314.68) }, { SIMDE_FLOAT64_C( 639.96), SIMDE_FLOAT64_C( 466.89), SIMDE_FLOAT64_C( -168.46), SIMDE_FLOAT64_C( 933.21), SIMDE_FLOAT64_C( 523.38), SIMDE_FLOAT64_C( 888.98), SIMDE_FLOAT64_C( 641.75), SIMDE_FLOAT64_C( 82.09) } }, { { SIMDE_FLOAT64_C( -462.87), SIMDE_FLOAT64_C( 760.67), SIMDE_FLOAT64_C( -968.03), SIMDE_FLOAT64_C( -716.51), SIMDE_FLOAT64_C( 886.59), SIMDE_FLOAT64_C( -815.14), SIMDE_FLOAT64_C( -259.11), SIMDE_FLOAT64_C( 731.64) }, { SIMDE_FLOAT64_C( -243.67), SIMDE_FLOAT64_C( -340.52), SIMDE_FLOAT64_C( -915.74), SIMDE_FLOAT64_C( -566.30), SIMDE_FLOAT64_C( 710.79), SIMDE_FLOAT64_C( -637.42), SIMDE_FLOAT64_C( -877.29), SIMDE_FLOAT64_C( 276.14) }, { SIMDE_FLOAT64_C( -243.67), SIMDE_FLOAT64_C( 760.67), SIMDE_FLOAT64_C( -915.74), SIMDE_FLOAT64_C( -566.30), SIMDE_FLOAT64_C( 886.59), SIMDE_FLOAT64_C( -637.42), SIMDE_FLOAT64_C( -259.11), SIMDE_FLOAT64_C( 731.64) } }, { { SIMDE_FLOAT64_C( 829.47), SIMDE_FLOAT64_C( -662.55), SIMDE_FLOAT64_C( -775.57), SIMDE_FLOAT64_C( 352.85), SIMDE_FLOAT64_C( 494.35), SIMDE_FLOAT64_C( -366.69), SIMDE_FLOAT64_C( -565.06), SIMDE_FLOAT64_C( 134.31) }, { SIMDE_FLOAT64_C( 166.07), SIMDE_FLOAT64_C( 266.48), SIMDE_FLOAT64_C( 67.52), SIMDE_FLOAT64_C( 489.17), SIMDE_FLOAT64_C( 155.45), SIMDE_FLOAT64_C( -290.73), SIMDE_FLOAT64_C( -825.51), SIMDE_FLOAT64_C( 692.58) }, { SIMDE_FLOAT64_C( 829.47), SIMDE_FLOAT64_C( 266.48), SIMDE_FLOAT64_C( 67.52), SIMDE_FLOAT64_C( 489.17), SIMDE_FLOAT64_C( 494.35), SIMDE_FLOAT64_C( -290.73), SIMDE_FLOAT64_C( -565.06), SIMDE_FLOAT64_C( 692.58) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__m512d r = simde_mm512_max_pd(a, b); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_max_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 b[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 912.18), SIMDE_FLOAT64_C( 251.75), SIMDE_FLOAT64_C( 555.54), SIMDE_FLOAT64_C( -456.14), SIMDE_FLOAT64_C( 118.39), SIMDE_FLOAT64_C( -179.75), SIMDE_FLOAT64_C( -362.75), SIMDE_FLOAT64_C( -441.43) }, UINT8_MAX, { SIMDE_FLOAT64_C( 852.32), SIMDE_FLOAT64_C( -482.61), SIMDE_FLOAT64_C( 790.90), SIMDE_FLOAT64_C( 430.23), SIMDE_FLOAT64_C( 991.14), SIMDE_FLOAT64_C( -742.88), SIMDE_FLOAT64_C( -953.53), SIMDE_FLOAT64_C( -242.10) }, { SIMDE_FLOAT64_C( 221.50), SIMDE_FLOAT64_C( -769.53), SIMDE_FLOAT64_C( 584.91), SIMDE_FLOAT64_C( -479.69), SIMDE_FLOAT64_C( 132.60), SIMDE_FLOAT64_C( 485.66), SIMDE_FLOAT64_C( 494.57), SIMDE_FLOAT64_C( 677.48) }, { SIMDE_FLOAT64_C( 852.32), SIMDE_FLOAT64_C( -482.61), SIMDE_FLOAT64_C( 790.90), SIMDE_FLOAT64_C( 430.23), SIMDE_FLOAT64_C( 991.14), SIMDE_FLOAT64_C( 485.66), SIMDE_FLOAT64_C( 494.57), SIMDE_FLOAT64_C( 677.48) } }, { { SIMDE_FLOAT64_C( 714.12), SIMDE_FLOAT64_C( -800.14), SIMDE_FLOAT64_C( -471.78), SIMDE_FLOAT64_C( -757.58), SIMDE_FLOAT64_C( 282.32), SIMDE_FLOAT64_C( 76.37), SIMDE_FLOAT64_C( -845.40), SIMDE_FLOAT64_C( -465.93) }, UINT8_C(187), { SIMDE_FLOAT64_C( -301.54), SIMDE_FLOAT64_C( 652.46), SIMDE_FLOAT64_C( 452.17), SIMDE_FLOAT64_C( 335.71), SIMDE_FLOAT64_C( -788.96), SIMDE_FLOAT64_C( 82.72), SIMDE_FLOAT64_C( 188.03), SIMDE_FLOAT64_C( -271.58) }, { SIMDE_FLOAT64_C( -126.37), SIMDE_FLOAT64_C( -381.74), SIMDE_FLOAT64_C( -280.44), SIMDE_FLOAT64_C( 130.75), SIMDE_FLOAT64_C( -335.28), SIMDE_FLOAT64_C( 477.46), SIMDE_FLOAT64_C( -647.75), SIMDE_FLOAT64_C( -104.81) }, { SIMDE_FLOAT64_C( -126.37), SIMDE_FLOAT64_C( 652.46), SIMDE_FLOAT64_C( -471.78), SIMDE_FLOAT64_C( 335.71), SIMDE_FLOAT64_C( -335.28), SIMDE_FLOAT64_C( 477.46), SIMDE_FLOAT64_C( -845.40), SIMDE_FLOAT64_C( -104.81) } }, { { SIMDE_FLOAT64_C( 62.37), SIMDE_FLOAT64_C( -127.44), SIMDE_FLOAT64_C( -972.21), SIMDE_FLOAT64_C( -451.97), SIMDE_FLOAT64_C( -632.87), SIMDE_FLOAT64_C( 705.27), SIMDE_FLOAT64_C( -737.85), SIMDE_FLOAT64_C( -433.01) }, UINT8_MAX, { SIMDE_FLOAT64_C( -495.44), SIMDE_FLOAT64_C( 849.31), SIMDE_FLOAT64_C( 309.86), SIMDE_FLOAT64_C( -340.84), SIMDE_FLOAT64_C( -616.63), SIMDE_FLOAT64_C( 941.78), SIMDE_FLOAT64_C( 357.62), SIMDE_FLOAT64_C( -964.16) }, { SIMDE_FLOAT64_C( 393.95), SIMDE_FLOAT64_C( -306.67), SIMDE_FLOAT64_C( -753.13), SIMDE_FLOAT64_C( -523.33), SIMDE_FLOAT64_C( 881.36), SIMDE_FLOAT64_C( -24.71), SIMDE_FLOAT64_C( 350.30), SIMDE_FLOAT64_C( -500.38) }, { SIMDE_FLOAT64_C( 393.95), SIMDE_FLOAT64_C( 849.31), SIMDE_FLOAT64_C( 309.86), SIMDE_FLOAT64_C( -340.84), SIMDE_FLOAT64_C( 881.36), SIMDE_FLOAT64_C( 941.78), SIMDE_FLOAT64_C( 357.62), SIMDE_FLOAT64_C( -500.38) } }, { { SIMDE_FLOAT64_C( 694.85), SIMDE_FLOAT64_C( -518.96), SIMDE_FLOAT64_C( 164.34), SIMDE_FLOAT64_C( 172.31), SIMDE_FLOAT64_C( -166.71), SIMDE_FLOAT64_C( -940.46), SIMDE_FLOAT64_C( -765.32), SIMDE_FLOAT64_C( 705.85) }, UINT8_C(121), { SIMDE_FLOAT64_C( -217.29), SIMDE_FLOAT64_C( -927.02), SIMDE_FLOAT64_C( 792.60), SIMDE_FLOAT64_C( 44.86), SIMDE_FLOAT64_C( -360.03), SIMDE_FLOAT64_C( -973.91), SIMDE_FLOAT64_C( 549.42), SIMDE_FLOAT64_C( -510.72) }, { SIMDE_FLOAT64_C( 335.95), SIMDE_FLOAT64_C( -791.41), SIMDE_FLOAT64_C( -127.34), SIMDE_FLOAT64_C( 277.73), SIMDE_FLOAT64_C( 566.21), SIMDE_FLOAT64_C( -91.51), SIMDE_FLOAT64_C( -328.32), SIMDE_FLOAT64_C( -740.46) }, { SIMDE_FLOAT64_C( 335.95), SIMDE_FLOAT64_C( -518.96), SIMDE_FLOAT64_C( 164.34), SIMDE_FLOAT64_C( 277.73), SIMDE_FLOAT64_C( 566.21), SIMDE_FLOAT64_C( -91.51), SIMDE_FLOAT64_C( 549.42), SIMDE_FLOAT64_C( 705.85) } }, { { SIMDE_FLOAT64_C( 155.37), SIMDE_FLOAT64_C( 148.35), SIMDE_FLOAT64_C( -859.10), SIMDE_FLOAT64_C( -869.34), SIMDE_FLOAT64_C( -501.36), SIMDE_FLOAT64_C( -359.48), SIMDE_FLOAT64_C( 825.51), SIMDE_FLOAT64_C( -20.31) }, UINT8_C(220), { SIMDE_FLOAT64_C( -2.17), SIMDE_FLOAT64_C( 812.98), SIMDE_FLOAT64_C( 864.40), SIMDE_FLOAT64_C( 232.51), SIMDE_FLOAT64_C( 518.84), SIMDE_FLOAT64_C( 951.72), SIMDE_FLOAT64_C( -984.78), SIMDE_FLOAT64_C( 591.82) }, { SIMDE_FLOAT64_C( 744.32), SIMDE_FLOAT64_C( 60.08), SIMDE_FLOAT64_C( -768.21), SIMDE_FLOAT64_C( 770.41), SIMDE_FLOAT64_C( -390.49), SIMDE_FLOAT64_C( -278.93), SIMDE_FLOAT64_C( 106.36), SIMDE_FLOAT64_C( -181.91) }, { SIMDE_FLOAT64_C( 155.37), SIMDE_FLOAT64_C( 148.35), SIMDE_FLOAT64_C( 864.40), SIMDE_FLOAT64_C( 770.41), SIMDE_FLOAT64_C( 518.84), SIMDE_FLOAT64_C( -359.48), SIMDE_FLOAT64_C( 106.36), SIMDE_FLOAT64_C( 591.82) } }, { { SIMDE_FLOAT64_C( 593.73), SIMDE_FLOAT64_C( -615.91), SIMDE_FLOAT64_C( -615.70), SIMDE_FLOAT64_C( -497.78), SIMDE_FLOAT64_C( 55.77), SIMDE_FLOAT64_C( -356.16), SIMDE_FLOAT64_C( 657.59), SIMDE_FLOAT64_C( -795.89) }, UINT8_C(145), { SIMDE_FLOAT64_C( 788.25), SIMDE_FLOAT64_C( -297.24), SIMDE_FLOAT64_C( 425.26), SIMDE_FLOAT64_C( 613.76), SIMDE_FLOAT64_C( 682.44), SIMDE_FLOAT64_C( 230.12), SIMDE_FLOAT64_C( -388.41), SIMDE_FLOAT64_C( 495.42) }, { SIMDE_FLOAT64_C( 94.51), SIMDE_FLOAT64_C( 844.10), SIMDE_FLOAT64_C( 14.26), SIMDE_FLOAT64_C( 46.24), SIMDE_FLOAT64_C( 859.32), SIMDE_FLOAT64_C( -393.92), SIMDE_FLOAT64_C( -209.45), SIMDE_FLOAT64_C( -80.60) }, { SIMDE_FLOAT64_C( 788.25), SIMDE_FLOAT64_C( -615.91), SIMDE_FLOAT64_C( -615.70), SIMDE_FLOAT64_C( -497.78), SIMDE_FLOAT64_C( 859.32), SIMDE_FLOAT64_C( -356.16), SIMDE_FLOAT64_C( 657.59), SIMDE_FLOAT64_C( 495.42) } }, { { SIMDE_FLOAT64_C( -162.13), SIMDE_FLOAT64_C( -439.04), SIMDE_FLOAT64_C( 528.91), SIMDE_FLOAT64_C( 558.95), SIMDE_FLOAT64_C( 667.32), SIMDE_FLOAT64_C( -653.00), SIMDE_FLOAT64_C( 152.68), SIMDE_FLOAT64_C( -948.59) }, UINT8_C(192), { SIMDE_FLOAT64_C( 654.90), SIMDE_FLOAT64_C( 107.18), SIMDE_FLOAT64_C( 375.14), SIMDE_FLOAT64_C( 312.49), SIMDE_FLOAT64_C( 311.29), SIMDE_FLOAT64_C( -840.12), SIMDE_FLOAT64_C( 100.74), SIMDE_FLOAT64_C( -985.95) }, { SIMDE_FLOAT64_C( 585.14), SIMDE_FLOAT64_C( -285.51), SIMDE_FLOAT64_C( 696.49), SIMDE_FLOAT64_C( -184.75), SIMDE_FLOAT64_C( 326.08), SIMDE_FLOAT64_C( 191.91), SIMDE_FLOAT64_C( 909.77), SIMDE_FLOAT64_C( 170.18) }, { SIMDE_FLOAT64_C( -162.13), SIMDE_FLOAT64_C( -439.04), SIMDE_FLOAT64_C( 528.91), SIMDE_FLOAT64_C( 558.95), SIMDE_FLOAT64_C( 667.32), SIMDE_FLOAT64_C( -653.00), SIMDE_FLOAT64_C( 909.77), SIMDE_FLOAT64_C( 170.18) } }, { { SIMDE_FLOAT64_C( -793.83), SIMDE_FLOAT64_C( -44.00), SIMDE_FLOAT64_C( 29.50), SIMDE_FLOAT64_C( -187.75), SIMDE_FLOAT64_C( 746.56), SIMDE_FLOAT64_C( 948.90), SIMDE_FLOAT64_C( 650.12), SIMDE_FLOAT64_C( -692.48) }, UINT8_C(200), { SIMDE_FLOAT64_C( 209.07), SIMDE_FLOAT64_C( 974.84), SIMDE_FLOAT64_C( 824.81), SIMDE_FLOAT64_C( -638.25), SIMDE_FLOAT64_C( -973.75), SIMDE_FLOAT64_C( -443.88), SIMDE_FLOAT64_C( -983.35), SIMDE_FLOAT64_C( 133.43) }, { SIMDE_FLOAT64_C( 931.26), SIMDE_FLOAT64_C( 329.14), SIMDE_FLOAT64_C( -555.28), SIMDE_FLOAT64_C( -908.86), SIMDE_FLOAT64_C( -570.13), SIMDE_FLOAT64_C( -541.23), SIMDE_FLOAT64_C( 676.28), SIMDE_FLOAT64_C( 144.37) }, { SIMDE_FLOAT64_C( -793.83), SIMDE_FLOAT64_C( -44.00), SIMDE_FLOAT64_C( 29.50), SIMDE_FLOAT64_C( -638.25), SIMDE_FLOAT64_C( 746.56), SIMDE_FLOAT64_C( 948.90), SIMDE_FLOAT64_C( 676.28), SIMDE_FLOAT64_C( 144.37) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__m512d r = simde_mm512_mask_max_pd(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_maskz_max_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 b[8]; const simde_float64 r[8]; } test_vec[] = { { UINT8_C(124), { SIMDE_FLOAT64_C( 659.04), SIMDE_FLOAT64_C( -119.01), SIMDE_FLOAT64_C( 237.02), SIMDE_FLOAT64_C( -321.23), SIMDE_FLOAT64_C( -24.75), SIMDE_FLOAT64_C( 582.04), SIMDE_FLOAT64_C( -389.52), SIMDE_FLOAT64_C( 699.41) }, { SIMDE_FLOAT64_C( 180.67), SIMDE_FLOAT64_C( -25.56), SIMDE_FLOAT64_C( -928.91), SIMDE_FLOAT64_C( 898.38), SIMDE_FLOAT64_C( -813.04), SIMDE_FLOAT64_C( -166.50), SIMDE_FLOAT64_C( 96.18), SIMDE_FLOAT64_C( -720.66) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 237.02), SIMDE_FLOAT64_C( 898.38), SIMDE_FLOAT64_C( -24.75), SIMDE_FLOAT64_C( 582.04), SIMDE_FLOAT64_C( 96.18), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C( 43), { SIMDE_FLOAT64_C( -775.97), SIMDE_FLOAT64_C( -789.28), SIMDE_FLOAT64_C( 689.62), SIMDE_FLOAT64_C( 225.24), SIMDE_FLOAT64_C( 957.81), SIMDE_FLOAT64_C( -143.72), SIMDE_FLOAT64_C( 478.66), SIMDE_FLOAT64_C( 320.21) }, { SIMDE_FLOAT64_C( 845.85), SIMDE_FLOAT64_C( 504.25), SIMDE_FLOAT64_C( 94.13), SIMDE_FLOAT64_C( 696.20), SIMDE_FLOAT64_C( -502.89), SIMDE_FLOAT64_C( -685.24), SIMDE_FLOAT64_C( 355.24), SIMDE_FLOAT64_C( 378.11) }, { SIMDE_FLOAT64_C( 845.85), SIMDE_FLOAT64_C( 504.25), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 696.20), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -143.72), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C(238), { SIMDE_FLOAT64_C( -965.99), SIMDE_FLOAT64_C( -646.65), SIMDE_FLOAT64_C( 133.82), SIMDE_FLOAT64_C( -355.50), SIMDE_FLOAT64_C( -947.23), SIMDE_FLOAT64_C( -685.51), SIMDE_FLOAT64_C( 618.94), SIMDE_FLOAT64_C( -876.14) }, { SIMDE_FLOAT64_C( -787.13), SIMDE_FLOAT64_C( 805.90), SIMDE_FLOAT64_C( -42.65), SIMDE_FLOAT64_C( 309.05), SIMDE_FLOAT64_C( -914.76), SIMDE_FLOAT64_C( 958.41), SIMDE_FLOAT64_C( 533.08), SIMDE_FLOAT64_C( -704.04) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 805.90), SIMDE_FLOAT64_C( 133.82), SIMDE_FLOAT64_C( 309.05), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 958.41), SIMDE_FLOAT64_C( 618.94), SIMDE_FLOAT64_C( -704.04) } }, { UINT8_C( 68), { SIMDE_FLOAT64_C( -241.67), SIMDE_FLOAT64_C( -746.23), SIMDE_FLOAT64_C( -495.69), SIMDE_FLOAT64_C( -763.01), SIMDE_FLOAT64_C( 573.99), SIMDE_FLOAT64_C( -649.84), SIMDE_FLOAT64_C( 741.23), SIMDE_FLOAT64_C( -331.89) }, { SIMDE_FLOAT64_C( -953.63), SIMDE_FLOAT64_C( -761.65), SIMDE_FLOAT64_C( -17.12), SIMDE_FLOAT64_C( 401.61), SIMDE_FLOAT64_C( 616.45), SIMDE_FLOAT64_C( -465.34), SIMDE_FLOAT64_C( 435.63), SIMDE_FLOAT64_C( 969.81) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -17.12), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 741.23), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C(161), { SIMDE_FLOAT64_C( -919.88), SIMDE_FLOAT64_C( -977.43), SIMDE_FLOAT64_C( 982.97), SIMDE_FLOAT64_C( 699.06), SIMDE_FLOAT64_C( -853.57), SIMDE_FLOAT64_C( -804.15), SIMDE_FLOAT64_C( 504.96), SIMDE_FLOAT64_C( 103.79) }, { SIMDE_FLOAT64_C( 504.90), SIMDE_FLOAT64_C( 590.20), SIMDE_FLOAT64_C( 62.20), SIMDE_FLOAT64_C( 37.98), SIMDE_FLOAT64_C( 886.16), SIMDE_FLOAT64_C( -289.77), SIMDE_FLOAT64_C( 796.31), SIMDE_FLOAT64_C( -860.07) }, { SIMDE_FLOAT64_C( 504.90), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -289.77), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 103.79) } }, { UINT8_C( 77), { SIMDE_FLOAT64_C( -966.71), SIMDE_FLOAT64_C( 713.91), SIMDE_FLOAT64_C( 564.71), SIMDE_FLOAT64_C( 774.53), SIMDE_FLOAT64_C( -617.98), SIMDE_FLOAT64_C( 611.07), SIMDE_FLOAT64_C( -987.13), SIMDE_FLOAT64_C( 364.90) }, { SIMDE_FLOAT64_C( 12.69), SIMDE_FLOAT64_C( 629.33), SIMDE_FLOAT64_C( 899.56), SIMDE_FLOAT64_C( -551.68), SIMDE_FLOAT64_C( 599.14), SIMDE_FLOAT64_C( 568.04), SIMDE_FLOAT64_C( -471.56), SIMDE_FLOAT64_C( 621.71) }, { SIMDE_FLOAT64_C( 12.69), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 899.56), SIMDE_FLOAT64_C( 774.53), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -471.56), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C(168), { SIMDE_FLOAT64_C( -772.50), SIMDE_FLOAT64_C( 768.14), SIMDE_FLOAT64_C( 746.85), SIMDE_FLOAT64_C( 732.46), SIMDE_FLOAT64_C( -128.07), SIMDE_FLOAT64_C( 251.75), SIMDE_FLOAT64_C( 322.66), SIMDE_FLOAT64_C( 934.13) }, { SIMDE_FLOAT64_C( -710.27), SIMDE_FLOAT64_C( 208.82), SIMDE_FLOAT64_C( -355.64), SIMDE_FLOAT64_C( -913.97), SIMDE_FLOAT64_C( 348.75), SIMDE_FLOAT64_C( 858.91), SIMDE_FLOAT64_C( -880.67), SIMDE_FLOAT64_C( 62.66) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 732.46), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 858.91), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 934.13) } }, { UINT8_C(222), { SIMDE_FLOAT64_C( 893.86), SIMDE_FLOAT64_C( 444.68), SIMDE_FLOAT64_C( 34.69), SIMDE_FLOAT64_C( 906.73), SIMDE_FLOAT64_C( -190.42), SIMDE_FLOAT64_C( -952.63), SIMDE_FLOAT64_C( 536.06), SIMDE_FLOAT64_C( -290.86) }, { SIMDE_FLOAT64_C( -504.31), SIMDE_FLOAT64_C( 135.19), SIMDE_FLOAT64_C( -722.83), SIMDE_FLOAT64_C( 24.13), SIMDE_FLOAT64_C( -243.10), SIMDE_FLOAT64_C( 828.18), SIMDE_FLOAT64_C( 251.63), SIMDE_FLOAT64_C( -474.96) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 444.68), SIMDE_FLOAT64_C( 34.69), SIMDE_FLOAT64_C( 906.73), SIMDE_FLOAT64_C( -190.42), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 536.06), SIMDE_FLOAT64_C( -290.86) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__m512d r = simde_mm512_maskz_max_pd(test_vec[i].k, a, b); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_max_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_max_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_max_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_max_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_max_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_max_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_max_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_max_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_max_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_max_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_max_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_max_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_max_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_max_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_max_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_max_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_max_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_max_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_max_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_max_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_max_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_max_epu64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_max_epu64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_max_epu64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_max_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_max_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_max_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_max_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_max_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_max_pd) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/meson.build000066400000000000000000000017701400333146700174230ustar00rootroot00000000000000simde_test_x86_avx512_sources = [] foreach name : simde_avx512_families foreach lang : ['c', 'cpp'] source_file = name + '.c' if lang == 'cpp' source_file = configure_file(input: name + '.c', output: name + '.cpp', copy: true) endif simde_test_x86_avx512_sources += source_file foreach emul : ['emul', 'native'] extra_flags = ['-DSIMDE_TEST_BARE'] if emul == 'emul' extra_flags += '-DSIMDE_NO_NATIVE' endif x = executable(name + '-' + emul + '-' + lang, source_file, c_args: simde_c_args + simde_c_defs + simde_native_c_flags + extra_flags, cpp_args: simde_c_args + simde_c_defs + simde_native_c_flags + extra_flags, include_directories: simde_include_dir, dependencies: simde_deps) test('x86/avx512/' + name + '/' + emul + '/' + lang, x, protocol: 'tap', # Emscripten tests must be run from builddir workdir: meson.current_build_dir()) endforeach endforeach endforeach simde-0.7.2/test/x86/avx512/min.c000066400000000000000000015437161400333146700162240ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN min #include #include #include static int test_simde_mm512_min_epi8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int8_t a[64]; const int8_t b[64]; const int8_t r[64]; } test_vec[] = { { { INT8_C( 99), INT8_C( 57), INT8_C( 67), -INT8_C( 9), -INT8_C( 6), INT8_C( 33), -INT8_C( 124), INT8_C( 36), INT8_C( 33), INT8_C( 54), -INT8_C( 88), -INT8_C( 42), -INT8_C( 2), INT8_C( 100), -INT8_C( 20), -INT8_C( 26), INT8_C( 12), INT8_C( 68), -INT8_C( 19), INT8_C( 5), INT8_C( 93), -INT8_C( 21), INT8_MAX, INT8_C( 103), INT8_C( 108), INT8_C( 29), INT8_C( 35), -INT8_C( 11), INT8_C( 48), INT8_C( 37), -INT8_C( 11), -INT8_C( 108), INT8_C( 94), INT8_C( 56), -INT8_C( 117), INT8_C( 88), INT8_C( 89), INT8_C( 15), INT8_C( 124), INT8_C( 122), INT8_C( 69), INT8_C( 37), INT8_C( 80), INT8_C( 67), -INT8_C( 119), INT8_C( 60), INT8_C( 42), -INT8_C( 107), INT8_MIN, INT8_C( 23), -INT8_C( 102), -INT8_C( 34), INT8_C( 2), INT8_C( 26), INT8_C( 69), INT8_C( 111), INT8_C( 55), INT8_C( 104), INT8_C( 100), INT8_C( 104), -INT8_C( 114), INT8_C( 89), -INT8_C( 4), -INT8_C( 20) }, { -INT8_C( 111), -INT8_C( 121), INT8_C( 69), -INT8_C( 22), -INT8_C( 106), -INT8_C( 63), INT8_C( 101), -INT8_C( 37), -INT8_C( 26), -INT8_C( 75), INT8_C( 31), INT8_C( 111), -INT8_C( 14), INT8_C( 73), INT8_C( 4), INT8_C( 114), INT8_C( 96), -INT8_C( 97), INT8_C( 80), INT8_C( 98), -INT8_C( 71), -INT8_C( 106), -INT8_C( 47), -INT8_C( 16), -INT8_C( 2), INT8_C( 54), INT8_C( 88), -INT8_C( 116), -INT8_C( 113), INT8_C( 84), INT8_C( 121), INT8_C( 33), -INT8_C( 37), -INT8_C( 66), INT8_C( 11), INT8_C( 113), INT8_MAX, INT8_C( 112), INT8_C( 77), INT8_C( 102), INT8_C( 38), INT8_C( 108), -INT8_C( 43), INT8_C( 24), -INT8_C( 75), -INT8_C( 38), -INT8_C( 118), INT8_C( 21), INT8_C( 121), -INT8_C( 37), INT8_C( 119), INT8_C( 50), INT8_C( 113), INT8_C( 73), INT8_C( 34), INT8_C( 111), INT8_MAX, INT8_C( 123), -INT8_C( 4), INT8_C( 14), -INT8_C( 49), INT8_C( 117), INT8_C( 47), -INT8_C( 85) }, { -INT8_C( 111), -INT8_C( 121), INT8_C( 67), -INT8_C( 22), -INT8_C( 106), -INT8_C( 63), -INT8_C( 124), -INT8_C( 37), -INT8_C( 26), -INT8_C( 75), -INT8_C( 88), -INT8_C( 42), -INT8_C( 14), INT8_C( 73), -INT8_C( 20), -INT8_C( 26), INT8_C( 12), -INT8_C( 97), -INT8_C( 19), INT8_C( 5), -INT8_C( 71), -INT8_C( 106), -INT8_C( 47), -INT8_C( 16), -INT8_C( 2), INT8_C( 29), INT8_C( 35), -INT8_C( 116), -INT8_C( 113), INT8_C( 37), -INT8_C( 11), -INT8_C( 108), -INT8_C( 37), -INT8_C( 66), -INT8_C( 117), INT8_C( 88), INT8_C( 89), INT8_C( 15), INT8_C( 77), INT8_C( 102), INT8_C( 38), INT8_C( 37), -INT8_C( 43), INT8_C( 24), -INT8_C( 119), -INT8_C( 38), -INT8_C( 118), -INT8_C( 107), INT8_MIN, -INT8_C( 37), -INT8_C( 102), -INT8_C( 34), INT8_C( 2), INT8_C( 26), INT8_C( 34), INT8_C( 111), INT8_C( 55), INT8_C( 104), -INT8_C( 4), INT8_C( 14), -INT8_C( 114), INT8_C( 89), -INT8_C( 4), -INT8_C( 85) } }, { { INT8_C( 51), INT8_C( 59), INT8_C( 28), -INT8_C( 78), -INT8_C( 85), INT8_C( 105), INT8_C( 24), -INT8_C( 47), -INT8_C( 43), -INT8_C( 18), -INT8_C( 23), -INT8_C( 118), -INT8_C( 56), INT8_C( 116), -INT8_C( 97), INT8_C( 65), INT8_C( 79), INT8_C( 23), INT8_C( 115), -INT8_C( 64), INT8_C( 96), -INT8_C( 107), INT8_C( 47), -INT8_C( 33), INT8_C( 16), INT8_C( 43), -INT8_C( 19), -INT8_C( 32), -INT8_C( 96), INT8_C( 29), -INT8_C( 117), -INT8_C( 45), INT8_C( 88), -INT8_C( 89), -INT8_C( 122), INT8_C( 3), INT8_C( 17), -INT8_C( 98), -INT8_C( 43), -INT8_C( 26), -INT8_C( 116), -INT8_C( 66), INT8_C( 113), INT8_C( 84), INT8_C( 50), INT8_C( 16), -INT8_C( 107), -INT8_C( 127), INT8_C( 39), INT8_C( 8), INT8_C( 65), -INT8_C( 121), -INT8_C( 98), INT8_C( 113), INT8_C( 102), -INT8_C( 82), -INT8_C( 100), INT8_C( 84), -INT8_C( 114), INT8_C( 61), INT8_C( 113), INT8_C( 25), INT8_C( 16), -INT8_C( 55) }, { -INT8_C( 63), -INT8_C( 106), -INT8_C( 52), -INT8_C( 46), INT8_C( 53), -INT8_C( 95), -INT8_C( 72), -INT8_C( 63), INT8_C( 96), INT8_C( 41), INT8_C( 22), -INT8_C( 110), INT8_C( 58), -INT8_C( 85), INT8_C( 20), INT8_C( 97), -INT8_C( 76), INT8_C( 85), -INT8_C( 23), INT8_C( 82), -INT8_C( 58), INT8_C( 79), INT8_C( 0), INT8_C( 99), -INT8_C( 93), -INT8_C( 113), -INT8_C( 96), INT8_C( 20), -INT8_C( 88), -INT8_C( 80), -INT8_C( 35), INT8_C( 105), INT8_C( 71), -INT8_C( 86), INT8_C( 59), INT8_C( 124), INT8_C( 75), -INT8_C( 12), INT8_C( 61), -INT8_C( 85), INT8_C( 29), INT8_C( 83), INT8_C( 62), INT8_C( 87), -INT8_C( 1), INT8_C( 82), -INT8_C( 71), -INT8_C( 77), -INT8_C( 89), -INT8_C( 94), INT8_C( 5), INT8_C( 110), -INT8_C( 15), INT8_C( 5), -INT8_C( 47), -INT8_C( 107), -INT8_C( 108), INT8_C( 113), -INT8_C( 87), INT8_C( 61), INT8_C( 33), -INT8_C( 121), -INT8_C( 90), INT8_C( 104) }, { -INT8_C( 63), -INT8_C( 106), -INT8_C( 52), -INT8_C( 78), -INT8_C( 85), -INT8_C( 95), -INT8_C( 72), -INT8_C( 63), -INT8_C( 43), -INT8_C( 18), -INT8_C( 23), -INT8_C( 118), -INT8_C( 56), -INT8_C( 85), -INT8_C( 97), INT8_C( 65), -INT8_C( 76), INT8_C( 23), -INT8_C( 23), -INT8_C( 64), -INT8_C( 58), -INT8_C( 107), INT8_C( 0), -INT8_C( 33), -INT8_C( 93), -INT8_C( 113), -INT8_C( 96), -INT8_C( 32), -INT8_C( 96), -INT8_C( 80), -INT8_C( 117), -INT8_C( 45), INT8_C( 71), -INT8_C( 89), -INT8_C( 122), INT8_C( 3), INT8_C( 17), -INT8_C( 98), -INT8_C( 43), -INT8_C( 85), -INT8_C( 116), -INT8_C( 66), INT8_C( 62), INT8_C( 84), -INT8_C( 1), INT8_C( 16), -INT8_C( 107), -INT8_C( 127), -INT8_C( 89), -INT8_C( 94), INT8_C( 5), -INT8_C( 121), -INT8_C( 98), INT8_C( 5), -INT8_C( 47), -INT8_C( 107), -INT8_C( 108), INT8_C( 84), -INT8_C( 114), INT8_C( 61), INT8_C( 33), -INT8_C( 121), -INT8_C( 90), -INT8_C( 55) } }, { { INT8_C( 49), -INT8_C( 30), -INT8_C( 28), INT8_C( 124), -INT8_C( 42), INT8_C( 34), INT8_C( 40), -INT8_C( 13), INT8_C( 117), INT8_C( 102), INT8_C( 75), INT8_C( 116), -INT8_C( 72), INT8_C( 4), INT8_C( 39), INT8_C( 95), -INT8_C( 90), INT8_C( 44), -INT8_C( 51), -INT8_C( 105), INT8_C( 50), -INT8_C( 98), INT8_C( 44), -INT8_C( 58), INT8_C( 15), -INT8_C( 42), INT8_C( 3), INT8_C( 49), INT8_C( 93), -INT8_C( 86), -INT8_C( 103), -INT8_C( 114), -INT8_C( 116), INT8_C( 126), INT8_C( 10), INT8_C( 98), -INT8_C( 96), INT8_C( 50), INT8_C( 85), INT8_C( 21), -INT8_C( 104), -INT8_C( 96), -INT8_C( 118), INT8_C( 80), -INT8_C( 92), -INT8_C( 79), -INT8_C( 80), INT8_C( 74), -INT8_C( 34), INT8_C( 125), -INT8_C( 30), INT8_C( 16), INT8_C( 28), INT8_C( 14), -INT8_C( 42), INT8_C( 43), -INT8_C( 28), -INT8_C( 38), INT8_C( 92), INT8_C( 65), -INT8_C( 124), -INT8_C( 10), -INT8_C( 49), INT8_C( 16) }, { INT8_C( 116), -INT8_C( 38), INT8_C( 114), INT8_C( 20), INT8_C( 12), -INT8_C( 57), INT8_C( 41), -INT8_C( 91), INT8_C( 104), -INT8_C( 77), -INT8_C( 11), INT8_C( 12), INT8_C( 101), -INT8_C( 91), INT8_C( 87), INT8_C( 67), INT8_C( 35), INT8_C( 57), INT8_C( 83), INT8_C( 63), INT8_C( 71), INT8_C( 41), INT8_C( 106), INT8_C( 44), INT8_C( 3), -INT8_C( 57), INT8_C( 109), -INT8_C( 121), -INT8_C( 67), INT8_C( 61), -INT8_C( 105), INT8_C( 49), INT8_C( 23), INT8_C( 9), INT8_C( 69), INT8_C( 35), -INT8_C( 47), INT8_C( 110), -INT8_C( 56), INT8_C( 57), INT8_C( 34), -INT8_C( 66), INT8_C( 69), -INT8_C( 121), INT8_C( 99), -INT8_C( 100), -INT8_C( 54), -INT8_C( 122), -INT8_C( 43), INT8_C( 29), -INT8_C( 59), INT8_C( 29), INT8_C( 70), INT8_C( 48), INT8_C( 73), INT8_C( 74), -INT8_C( 9), -INT8_C( 74), -INT8_C( 47), -INT8_C( 76), -INT8_C( 13), INT8_C( 105), -INT8_C( 27), INT8_C( 10) }, { INT8_C( 49), -INT8_C( 38), -INT8_C( 28), INT8_C( 20), -INT8_C( 42), -INT8_C( 57), INT8_C( 40), -INT8_C( 91), INT8_C( 104), -INT8_C( 77), -INT8_C( 11), INT8_C( 12), -INT8_C( 72), -INT8_C( 91), INT8_C( 39), INT8_C( 67), -INT8_C( 90), INT8_C( 44), -INT8_C( 51), -INT8_C( 105), INT8_C( 50), -INT8_C( 98), INT8_C( 44), -INT8_C( 58), INT8_C( 3), -INT8_C( 57), INT8_C( 3), -INT8_C( 121), -INT8_C( 67), -INT8_C( 86), -INT8_C( 105), -INT8_C( 114), -INT8_C( 116), INT8_C( 9), INT8_C( 10), INT8_C( 35), -INT8_C( 96), INT8_C( 50), -INT8_C( 56), INT8_C( 21), -INT8_C( 104), -INT8_C( 96), -INT8_C( 118), -INT8_C( 121), -INT8_C( 92), -INT8_C( 100), -INT8_C( 80), -INT8_C( 122), -INT8_C( 43), INT8_C( 29), -INT8_C( 59), INT8_C( 16), INT8_C( 28), INT8_C( 14), -INT8_C( 42), INT8_C( 43), -INT8_C( 28), -INT8_C( 74), -INT8_C( 47), -INT8_C( 76), -INT8_C( 124), -INT8_C( 10), -INT8_C( 49), INT8_C( 10) } }, { { INT8_C( 114), INT8_C( 42), INT8_C( 46), INT8_C( 67), -INT8_C( 104), -INT8_C( 10), INT8_C( 124), -INT8_C( 70), -INT8_C( 76), -INT8_C( 62), INT8_C( 65), INT8_C( 24), INT8_C( 94), INT8_C( 11), -INT8_C( 98), INT8_C( 52), INT8_C( 40), INT8_C( 100), INT8_C( 81), INT8_C( 111), -INT8_C( 108), -INT8_C( 102), -INT8_C( 71), -INT8_C( 117), INT8_C( 80), -INT8_C( 118), INT8_C( 63), INT8_C( 68), -INT8_C( 13), INT8_C( 36), INT8_C( 78), INT8_C( 102), INT8_C( 78), INT8_C( 124), -INT8_C( 87), -INT8_C( 26), INT8_C( 115), INT8_C( 38), -INT8_C( 95), INT8_C( 39), -INT8_C( 24), -INT8_C( 30), INT8_C( 63), INT8_C( 70), -INT8_C( 18), -INT8_C( 34), INT8_C( 122), INT8_C( 22), INT8_C( 66), -INT8_C( 53), -INT8_C( 123), -INT8_C( 42), INT8_C( 101), INT8_C( 62), INT8_C( 97), -INT8_C( 74), -INT8_C( 55), -INT8_C( 96), -INT8_C( 6), -INT8_C( 68), -INT8_C( 60), INT8_C( 72), INT8_C( 34), INT8_C( 18) }, { -INT8_C( 59), -INT8_C( 52), -INT8_C( 8), INT8_C( 56), -INT8_C( 14), -INT8_C( 103), INT8_C( 95), -INT8_C( 38), INT8_C( 124), -INT8_C( 97), INT8_C( 32), INT8_C( 106), INT8_C( 125), -INT8_C( 101), INT8_MIN, -INT8_C( 65), INT8_C( 102), INT8_C( 6), -INT8_C( 107), -INT8_C( 52), INT8_C( 68), -INT8_C( 10), -INT8_C( 126), INT8_C( 13), -INT8_C( 106), INT8_C( 124), -INT8_C( 54), INT8_C( 90), -INT8_C( 60), -INT8_C( 20), INT8_C( 108), -INT8_C( 119), -INT8_C( 72), INT8_C( 100), -INT8_C( 63), -INT8_C( 86), -INT8_C( 2), INT8_C( 33), -INT8_C( 124), INT8_C( 122), -INT8_C( 64), -INT8_C( 91), -INT8_C( 28), INT8_C( 61), INT8_C( 64), INT8_C( 100), -INT8_C( 4), -INT8_C( 90), INT8_C( 106), -INT8_C( 111), INT8_C( 114), -INT8_C( 81), -INT8_C( 121), -INT8_C( 12), -INT8_C( 68), INT8_C( 29), INT8_C( 112), -INT8_C( 122), INT8_C( 119), INT8_C( 53), INT8_C( 115), -INT8_C( 29), -INT8_C( 66), INT8_C( 43) }, { -INT8_C( 59), -INT8_C( 52), -INT8_C( 8), INT8_C( 56), -INT8_C( 104), -INT8_C( 103), INT8_C( 95), -INT8_C( 70), -INT8_C( 76), -INT8_C( 97), INT8_C( 32), INT8_C( 24), INT8_C( 94), -INT8_C( 101), INT8_MIN, -INT8_C( 65), INT8_C( 40), INT8_C( 6), -INT8_C( 107), -INT8_C( 52), -INT8_C( 108), -INT8_C( 102), -INT8_C( 126), -INT8_C( 117), -INT8_C( 106), -INT8_C( 118), -INT8_C( 54), INT8_C( 68), -INT8_C( 60), -INT8_C( 20), INT8_C( 78), -INT8_C( 119), -INT8_C( 72), INT8_C( 100), -INT8_C( 87), -INT8_C( 86), -INT8_C( 2), INT8_C( 33), -INT8_C( 124), INT8_C( 39), -INT8_C( 64), -INT8_C( 91), -INT8_C( 28), INT8_C( 61), -INT8_C( 18), -INT8_C( 34), -INT8_C( 4), -INT8_C( 90), INT8_C( 66), -INT8_C( 111), -INT8_C( 123), -INT8_C( 81), -INT8_C( 121), -INT8_C( 12), -INT8_C( 68), -INT8_C( 74), -INT8_C( 55), -INT8_C( 122), -INT8_C( 6), -INT8_C( 68), -INT8_C( 60), -INT8_C( 29), -INT8_C( 66), INT8_C( 18) } }, { { INT8_C( 71), INT8_MIN, -INT8_C( 42), INT8_C( 69), -INT8_C( 95), INT8_C( 90), -INT8_C( 65), INT8_C( 97), -INT8_C( 1), -INT8_C( 93), -INT8_C( 98), INT8_C( 63), INT8_C( 8), -INT8_C( 102), -INT8_C( 26), INT8_C( 114), INT8_C( 43), INT8_C( 88), INT8_C( 33), -INT8_C( 78), INT8_C( 77), -INT8_C( 34), -INT8_C( 49), -INT8_C( 67), INT8_C( 100), INT8_C( 70), -INT8_C( 14), -INT8_C( 41), INT8_C( 41), -INT8_C( 79), INT8_C( 3), INT8_C( 112), INT8_C( 49), -INT8_C( 39), -INT8_C( 74), -INT8_C( 46), INT8_C( 51), INT8_C( 117), INT8_C( 51), INT8_C( 51), INT8_C( 25), -INT8_C( 47), INT8_C( 114), INT8_C( 33), INT8_C( 107), INT8_C( 88), -INT8_C( 109), -INT8_C( 106), -INT8_C( 79), -INT8_C( 75), INT8_C( 72), -INT8_C( 2), -INT8_C( 109), INT8_C( 23), -INT8_C( 69), -INT8_C( 9), INT8_C( 93), -INT8_C( 82), -INT8_C( 49), -INT8_C( 122), INT8_C( 95), -INT8_C( 46), -INT8_C( 10), -INT8_C( 112) }, { -INT8_C( 85), -INT8_C( 84), INT8_C( 98), -INT8_C( 34), INT8_C( 34), -INT8_C( 107), INT8_C( 17), INT8_C( 59), INT8_C( 102), -INT8_C( 124), INT8_C( 92), -INT8_C( 47), -INT8_C( 36), -INT8_C( 17), INT8_C( 103), -INT8_C( 115), -INT8_C( 92), -INT8_C( 81), -INT8_C( 117), INT8_C( 55), -INT8_C( 58), INT8_C( 71), INT8_C( 47), INT8_C( 35), -INT8_C( 11), -INT8_C( 2), -INT8_C( 87), INT8_C( 84), -INT8_C( 48), -INT8_C( 97), -INT8_C( 28), INT8_C( 123), INT8_C( 76), INT8_C( 70), INT8_C( 89), INT8_C( 110), -INT8_C( 37), INT8_C( 107), -INT8_C( 87), INT8_C( 65), -INT8_C( 17), INT8_C( 5), INT8_C( 18), -INT8_C( 53), -INT8_C( 12), INT8_C( 121), INT8_C( 89), -INT8_C( 103), INT8_C( 40), -INT8_C( 28), -INT8_C( 48), -INT8_C( 18), INT8_C( 43), -INT8_C( 1), INT8_C( 17), INT8_C( 32), -INT8_C( 3), -INT8_C( 70), INT8_C( 116), -INT8_C( 51), INT8_C( 89), INT8_C( 88), INT8_C( 72), -INT8_C( 91) }, { -INT8_C( 85), INT8_MIN, -INT8_C( 42), -INT8_C( 34), -INT8_C( 95), -INT8_C( 107), -INT8_C( 65), INT8_C( 59), -INT8_C( 1), -INT8_C( 124), -INT8_C( 98), -INT8_C( 47), -INT8_C( 36), -INT8_C( 102), -INT8_C( 26), -INT8_C( 115), -INT8_C( 92), -INT8_C( 81), -INT8_C( 117), -INT8_C( 78), -INT8_C( 58), -INT8_C( 34), -INT8_C( 49), -INT8_C( 67), -INT8_C( 11), -INT8_C( 2), -INT8_C( 87), -INT8_C( 41), -INT8_C( 48), -INT8_C( 97), -INT8_C( 28), INT8_C( 112), INT8_C( 49), -INT8_C( 39), -INT8_C( 74), -INT8_C( 46), -INT8_C( 37), INT8_C( 107), -INT8_C( 87), INT8_C( 51), -INT8_C( 17), -INT8_C( 47), INT8_C( 18), -INT8_C( 53), -INT8_C( 12), INT8_C( 88), -INT8_C( 109), -INT8_C( 106), -INT8_C( 79), -INT8_C( 75), -INT8_C( 48), -INT8_C( 18), -INT8_C( 109), -INT8_C( 1), -INT8_C( 69), -INT8_C( 9), -INT8_C( 3), -INT8_C( 82), -INT8_C( 49), -INT8_C( 122), INT8_C( 89), -INT8_C( 46), -INT8_C( 10), -INT8_C( 112) } }, { { -INT8_C( 98), -INT8_C( 94), INT8_C( 19), INT8_C( 121), INT8_C( 13), -INT8_C( 68), -INT8_C( 70), -INT8_C( 4), -INT8_C( 63), -INT8_C( 52), -INT8_C( 57), -INT8_C( 74), INT8_C( 69), INT8_C( 32), INT8_C( 79), INT8_C( 109), INT8_C( 5), INT8_C( 31), INT8_C( 91), INT8_C( 48), INT8_C( 31), INT8_C( 108), INT8_C( 81), INT8_C( 28), INT8_C( 38), -INT8_C( 59), -INT8_C( 22), INT8_MIN, INT8_C( 30), INT8_C( 50), INT8_C( 37), -INT8_C( 68), -INT8_C( 44), INT8_C( 57), INT8_C( 54), -INT8_C( 31), -INT8_C( 11), -INT8_C( 16), -INT8_C( 35), -INT8_C( 73), -INT8_C( 67), -INT8_C( 91), INT8_C( 109), INT8_C( 2), -INT8_C( 59), -INT8_C( 68), INT8_C( 112), -INT8_C( 54), -INT8_C( 37), -INT8_C( 53), -INT8_C( 5), -INT8_C( 6), INT8_C( 56), INT8_C( 76), INT8_C( 23), INT8_C( 94), INT8_C( 17), INT8_C( 1), -INT8_C( 34), INT8_C( 47), INT8_C( 51), INT8_C( 4), -INT8_C( 20), INT8_C( 8) }, { INT8_C( 61), INT8_C( 34), -INT8_C( 23), INT8_C( 50), INT8_C( 18), -INT8_C( 57), -INT8_C( 23), -INT8_C( 49), INT8_C( 108), INT8_C( 86), -INT8_C( 46), INT8_C( 49), INT8_C( 18), INT8_C( 66), -INT8_C( 4), -INT8_C( 18), INT8_C( 13), -INT8_C( 9), -INT8_C( 24), INT8_C( 69), INT8_C( 67), -INT8_C( 1), -INT8_C( 92), INT8_C( 84), INT8_C( 0), -INT8_C( 126), -INT8_C( 124), INT8_C( 52), -INT8_C( 122), INT8_C( 112), INT8_C( 60), -INT8_C( 61), -INT8_C( 110), INT8_C( 37), -INT8_C( 10), -INT8_C( 92), -INT8_C( 20), -INT8_C( 33), INT8_C( 116), INT8_C( 88), INT8_C( 54), INT8_C( 70), -INT8_C( 118), INT8_C( 72), -INT8_C( 120), -INT8_C( 122), INT8_C( 54), -INT8_C( 107), INT8_C( 125), INT8_C( 31), -INT8_C( 37), -INT8_C( 64), INT8_C( 30), INT8_MAX, INT8_C( 20), INT8_C( 31), INT8_C( 1), -INT8_C( 104), INT8_C( 83), -INT8_C( 120), INT8_C( 8), -INT8_C( 113), INT8_C( 75), -INT8_C( 102) }, { -INT8_C( 98), -INT8_C( 94), -INT8_C( 23), INT8_C( 50), INT8_C( 13), -INT8_C( 68), -INT8_C( 70), -INT8_C( 49), -INT8_C( 63), -INT8_C( 52), -INT8_C( 57), -INT8_C( 74), INT8_C( 18), INT8_C( 32), -INT8_C( 4), -INT8_C( 18), INT8_C( 5), -INT8_C( 9), -INT8_C( 24), INT8_C( 48), INT8_C( 31), -INT8_C( 1), -INT8_C( 92), INT8_C( 28), INT8_C( 0), -INT8_C( 126), -INT8_C( 124), INT8_MIN, -INT8_C( 122), INT8_C( 50), INT8_C( 37), -INT8_C( 68), -INT8_C( 110), INT8_C( 37), -INT8_C( 10), -INT8_C( 92), -INT8_C( 20), -INT8_C( 33), -INT8_C( 35), -INT8_C( 73), -INT8_C( 67), -INT8_C( 91), -INT8_C( 118), INT8_C( 2), -INT8_C( 120), -INT8_C( 122), INT8_C( 54), -INT8_C( 107), -INT8_C( 37), -INT8_C( 53), -INT8_C( 37), -INT8_C( 64), INT8_C( 30), INT8_C( 76), INT8_C( 20), INT8_C( 31), INT8_C( 1), -INT8_C( 104), -INT8_C( 34), -INT8_C( 120), INT8_C( 8), -INT8_C( 113), -INT8_C( 20), -INT8_C( 102) } }, { { -INT8_C( 76), INT8_C( 65), INT8_C( 63), -INT8_C( 95), INT8_C( 33), -INT8_C( 77), -INT8_C( 7), INT8_C( 87), -INT8_C( 7), -INT8_C( 125), -INT8_C( 97), -INT8_C( 127), INT8_C( 9), -INT8_C( 42), INT8_C( 22), -INT8_C( 122), -INT8_C( 11), -INT8_C( 15), INT8_C( 70), INT8_C( 19), INT8_C( 112), INT8_C( 91), INT8_C( 50), INT8_C( 114), -INT8_C( 13), -INT8_C( 123), -INT8_C( 6), -INT8_C( 4), INT8_C( 20), INT8_C( 69), -INT8_C( 106), -INT8_C( 55), -INT8_C( 121), -INT8_C( 43), INT8_C( 106), -INT8_C( 88), -INT8_C( 120), INT8_C( 99), -INT8_C( 1), -INT8_C( 127), -INT8_C( 25), -INT8_C( 98), INT8_C( 2), -INT8_C( 16), INT8_C( 116), INT8_C( 25), INT8_C( 119), INT8_C( 105), INT8_C( 10), -INT8_C( 67), INT8_C( 125), INT8_C( 123), INT8_C( 24), -INT8_C( 81), -INT8_C( 19), INT8_C( 12), INT8_C( 53), -INT8_C( 25), INT8_C( 8), INT8_C( 73), INT8_C( 44), -INT8_C( 98), INT8_C( 18), -INT8_C( 77) }, { INT8_C( 116), INT8_C( 124), INT8_C( 91), -INT8_C( 4), -INT8_C( 32), INT8_C( 90), INT8_C( 126), -INT8_C( 57), -INT8_C( 7), INT8_MIN, -INT8_C( 73), INT8_C( 109), -INT8_C( 103), INT8_C( 46), -INT8_C( 41), -INT8_C( 92), -INT8_C( 20), INT8_C( 84), INT8_C( 31), INT8_C( 4), INT8_C( 3), INT8_C( 12), INT8_C( 16), INT8_C( 56), -INT8_C( 13), INT8_C( 24), -INT8_C( 126), INT8_C( 31), -INT8_C( 73), -INT8_C( 108), -INT8_C( 45), INT8_C( 43), INT8_C( 17), INT8_C( 46), INT8_C( 39), -INT8_C( 15), -INT8_C( 119), -INT8_C( 91), -INT8_C( 72), -INT8_C( 126), INT8_C( 38), INT8_C( 111), -INT8_C( 17), -INT8_C( 65), -INT8_C( 98), -INT8_C( 58), INT8_C( 99), -INT8_C( 118), INT8_C( 26), -INT8_C( 126), -INT8_C( 114), INT8_C( 30), -INT8_C( 114), -INT8_C( 97), INT8_C( 86), -INT8_C( 127), -INT8_C( 73), -INT8_C( 40), -INT8_C( 95), INT8_C( 110), INT8_C( 109), INT8_C( 116), -INT8_C( 103), INT8_C( 126) }, { -INT8_C( 76), INT8_C( 65), INT8_C( 63), -INT8_C( 95), -INT8_C( 32), -INT8_C( 77), -INT8_C( 7), -INT8_C( 57), -INT8_C( 7), INT8_MIN, -INT8_C( 97), -INT8_C( 127), -INT8_C( 103), -INT8_C( 42), -INT8_C( 41), -INT8_C( 122), -INT8_C( 20), -INT8_C( 15), INT8_C( 31), INT8_C( 4), INT8_C( 3), INT8_C( 12), INT8_C( 16), INT8_C( 56), -INT8_C( 13), -INT8_C( 123), -INT8_C( 126), -INT8_C( 4), -INT8_C( 73), -INT8_C( 108), -INT8_C( 106), -INT8_C( 55), -INT8_C( 121), -INT8_C( 43), INT8_C( 39), -INT8_C( 88), -INT8_C( 120), -INT8_C( 91), -INT8_C( 72), -INT8_C( 127), -INT8_C( 25), -INT8_C( 98), -INT8_C( 17), -INT8_C( 65), -INT8_C( 98), -INT8_C( 58), INT8_C( 99), -INT8_C( 118), INT8_C( 10), -INT8_C( 126), -INT8_C( 114), INT8_C( 30), -INT8_C( 114), -INT8_C( 97), -INT8_C( 19), -INT8_C( 127), -INT8_C( 73), -INT8_C( 40), -INT8_C( 95), INT8_C( 73), INT8_C( 44), -INT8_C( 98), -INT8_C( 103), -INT8_C( 77) } }, { { -INT8_C( 94), -INT8_C( 63), INT8_C( 111), INT8_C( 43), INT8_C( 102), INT8_C( 39), -INT8_C( 83), -INT8_C( 116), -INT8_C( 106), -INT8_C( 99), INT8_C( 76), INT8_C( 52), INT8_C( 99), -INT8_C( 81), -INT8_C( 66), INT8_C( 126), INT8_C( 50), INT8_C( 77), -INT8_C( 100), -INT8_C( 64), -INT8_C( 20), -INT8_C( 14), INT8_C( 66), -INT8_C( 93), -INT8_C( 53), -INT8_C( 29), INT8_C( 18), INT8_C( 56), INT8_C( 87), -INT8_C( 85), -INT8_C( 74), -INT8_C( 7), INT8_C( 108), INT8_C( 37), INT8_C( 37), -INT8_C( 45), INT8_C( 76), -INT8_C( 46), INT8_C( 95), -INT8_C( 30), INT8_C( 111), -INT8_C( 85), INT8_C( 23), -INT8_C( 45), INT8_C( 91), -INT8_C( 43), INT8_C( 81), -INT8_C( 115), INT8_C( 34), -INT8_C( 19), INT8_C( 77), INT8_C( 14), -INT8_C( 33), -INT8_C( 113), -INT8_C( 78), -INT8_C( 86), INT8_C( 114), -INT8_C( 60), -INT8_C( 30), -INT8_C( 55), INT8_C( 111), -INT8_C( 104), -INT8_C( 61), -INT8_C( 36) }, { -INT8_C( 67), -INT8_C( 24), -INT8_C( 81), INT8_C( 9), -INT8_C( 70), INT8_C( 14), -INT8_C( 20), INT8_C( 42), -INT8_C( 70), INT8_C( 3), -INT8_C( 3), INT8_C( 21), -INT8_C( 40), INT8_C( 78), -INT8_C( 94), -INT8_C( 5), INT8_C( 59), -INT8_C( 17), INT8_C( 9), INT8_C( 26), INT8_MAX, -INT8_C( 69), -INT8_C( 59), -INT8_C( 15), INT8_MAX, -INT8_C( 89), -INT8_C( 69), -INT8_C( 17), INT8_C( 64), INT8_C( 126), -INT8_C( 53), -INT8_C( 3), INT8_C( 102), INT8_C( 122), INT8_C( 7), INT8_C( 32), -INT8_C( 120), -INT8_C( 13), INT8_C( 74), INT8_C( 66), -INT8_C( 10), INT8_C( 71), INT8_C( 87), -INT8_C( 50), -INT8_C( 107), -INT8_C( 7), -INT8_C( 55), -INT8_C( 48), -INT8_C( 23), -INT8_C( 45), -INT8_C( 21), INT8_C( 104), -INT8_C( 114), -INT8_C( 80), INT8_C( 89), INT8_C( 14), INT8_C( 87), INT8_C( 20), -INT8_C( 3), -INT8_C( 105), -INT8_C( 110), -INT8_C( 56), -INT8_C( 107), -INT8_C( 8) }, { -INT8_C( 94), -INT8_C( 63), -INT8_C( 81), INT8_C( 9), -INT8_C( 70), INT8_C( 14), -INT8_C( 83), -INT8_C( 116), -INT8_C( 106), -INT8_C( 99), -INT8_C( 3), INT8_C( 21), -INT8_C( 40), -INT8_C( 81), -INT8_C( 94), -INT8_C( 5), INT8_C( 50), -INT8_C( 17), -INT8_C( 100), -INT8_C( 64), -INT8_C( 20), -INT8_C( 69), -INT8_C( 59), -INT8_C( 93), -INT8_C( 53), -INT8_C( 89), -INT8_C( 69), -INT8_C( 17), INT8_C( 64), -INT8_C( 85), -INT8_C( 74), -INT8_C( 7), INT8_C( 102), INT8_C( 37), INT8_C( 7), -INT8_C( 45), -INT8_C( 120), -INT8_C( 46), INT8_C( 74), -INT8_C( 30), -INT8_C( 10), -INT8_C( 85), INT8_C( 23), -INT8_C( 50), -INT8_C( 107), -INT8_C( 43), -INT8_C( 55), -INT8_C( 115), -INT8_C( 23), -INT8_C( 45), -INT8_C( 21), INT8_C( 14), -INT8_C( 114), -INT8_C( 113), -INT8_C( 78), -INT8_C( 86), INT8_C( 87), -INT8_C( 60), -INT8_C( 30), -INT8_C( 105), -INT8_C( 110), -INT8_C( 104), -INT8_C( 107), -INT8_C( 36) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_min_epi8(a, b); simde_test_x86_assert_equal_i8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_min_epi8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int8_t src[64]; const simde__mmask64 k; const int8_t a[64]; const int8_t b[64]; const int8_t r[64]; } test_vec[] = { { { INT8_C( 69), INT8_C( 77), INT8_C( 104), INT8_C( 74), -INT8_C( 56), INT8_C( 84), INT8_C( 106), INT8_C( 109), -INT8_C( 29), -INT8_C( 33), INT8_C( 11), INT8_C( 57), INT8_C( 28), -INT8_C( 111), INT8_C( 107), -INT8_C( 70), INT8_C( 59), -INT8_C( 51), INT8_C( 3), -INT8_C( 78), INT8_C( 49), INT8_C( 6), INT8_C( 85), -INT8_C( 50), INT8_C( 24), INT8_C( 48), -INT8_C( 61), -INT8_C( 7), -INT8_C( 125), INT8_C( 2), -INT8_C( 65), -INT8_C( 55), INT8_C( 79), INT8_C( 39), INT8_C( 19), INT8_C( 24), INT8_C( 123), INT8_C( 126), -INT8_C( 123), INT8_C( 94), INT8_C( 93), -INT8_C( 112), -INT8_C( 105), INT8_C( 121), INT8_C( 34), INT8_C( 3), INT8_C( 52), INT8_C( 93), -INT8_C( 48), INT8_C( 55), INT8_C( 15), INT8_C( 2), INT8_C( 61), INT8_C( 100), -INT8_C( 48), INT8_C( 85), -INT8_C( 108), -INT8_C( 108), INT8_C( 79), INT8_C( 24), -INT8_C( 106), INT8_C( 14), -INT8_C( 31), -INT8_C( 27) }, UINT64_C(14920570094114174006), { INT8_C( 19), -INT8_C( 89), INT8_C( 73), INT8_C( 53), -INT8_C( 86), INT8_C( 125), -INT8_C( 110), INT8_C( 123), -INT8_C( 76), -INT8_C( 94), INT8_C( 125), -INT8_C( 15), INT8_C( 6), INT8_C( 77), INT8_C( 71), -INT8_C( 101), -INT8_C( 31), -INT8_C( 106), -INT8_C( 77), INT8_C( 119), -INT8_C( 92), -INT8_C( 108), INT8_C( 93), -INT8_C( 38), -INT8_C( 120), INT8_C( 90), -INT8_C( 116), -INT8_C( 5), -INT8_C( 35), -INT8_C( 100), -INT8_C( 54), -INT8_C( 15), INT8_C( 67), INT8_C( 19), INT8_C( 38), -INT8_C( 18), -INT8_C( 112), -INT8_C( 71), INT8_C( 105), INT8_C( 68), INT8_C( 91), -INT8_C( 26), INT8_C( 54), INT8_C( 97), INT8_C( 51), INT8_C( 125), -INT8_C( 4), INT8_C( 21), INT8_C( 19), -INT8_C( 81), -INT8_C( 116), -INT8_C( 73), INT8_C( 67), -INT8_C( 23), -INT8_C( 110), -INT8_C( 52), INT8_C( 68), INT8_C( 30), -INT8_C( 57), INT8_C( 33), -INT8_C( 70), -INT8_C( 111), INT8_C( 18), -INT8_C( 3) }, { -INT8_C( 91), INT8_C( 57), -INT8_C( 21), INT8_C( 53), -INT8_C( 14), INT8_C( 84), INT8_C( 122), INT8_C( 77), INT8_C( 58), -INT8_C( 80), -INT8_C( 82), INT8_C( 110), INT8_C( 45), -INT8_C( 85), -INT8_C( 125), INT8_C( 64), INT8_C( 90), INT8_C( 15), -INT8_C( 9), -INT8_C( 98), -INT8_C( 7), -INT8_C( 119), INT8_C( 106), INT8_C( 61), -INT8_C( 89), INT8_C( 49), INT8_C( 94), INT8_C( 97), -INT8_C( 62), INT8_C( 113), INT8_C( 95), INT8_C( 103), -INT8_C( 86), INT8_C( 74), -INT8_C( 99), -INT8_C( 100), -INT8_C( 97), INT8_C( 23), -INT8_C( 23), -INT8_C( 39), -INT8_C( 57), -INT8_C( 105), INT8_C( 71), -INT8_C( 12), INT8_C( 66), -INT8_C( 54), INT8_C( 52), -INT8_C( 99), -INT8_C( 38), INT8_C( 43), INT8_C( 59), -INT8_C( 45), -INT8_C( 75), -INT8_C( 91), INT8_C( 16), INT8_C( 92), -INT8_C( 42), INT8_C( 110), -INT8_C( 66), -INT8_C( 104), -INT8_C( 33), INT8_C( 29), INT8_C( 0), -INT8_C( 119) }, { INT8_C( 69), -INT8_C( 89), -INT8_C( 21), INT8_C( 74), -INT8_C( 86), INT8_C( 84), INT8_C( 106), INT8_C( 109), -INT8_C( 29), -INT8_C( 33), -INT8_C( 82), INT8_C( 57), INT8_C( 6), -INT8_C( 85), -INT8_C( 125), -INT8_C( 101), -INT8_C( 31), -INT8_C( 51), -INT8_C( 77), -INT8_C( 98), -INT8_C( 92), -INT8_C( 119), INT8_C( 93), -INT8_C( 38), -INT8_C( 120), INT8_C( 48), -INT8_C( 61), -INT8_C( 7), -INT8_C( 62), -INT8_C( 100), -INT8_C( 65), -INT8_C( 15), INT8_C( 79), INT8_C( 19), INT8_C( 19), INT8_C( 24), -INT8_C( 112), -INT8_C( 71), -INT8_C( 23), INT8_C( 94), -INT8_C( 57), -INT8_C( 105), -INT8_C( 105), INT8_C( 121), INT8_C( 34), INT8_C( 3), INT8_C( 52), -INT8_C( 99), -INT8_C( 48), INT8_C( 55), INT8_C( 15), INT8_C( 2), -INT8_C( 75), INT8_C( 100), -INT8_C( 48), INT8_C( 85), -INT8_C( 42), INT8_C( 30), -INT8_C( 66), -INT8_C( 104), -INT8_C( 106), INT8_C( 14), INT8_C( 0), -INT8_C( 119) } }, { { INT8_C( 103), -INT8_C( 99), INT8_C( 37), INT8_C( 6), -INT8_C( 76), INT8_C( 14), -INT8_C( 32), INT8_C( 123), -INT8_C( 90), INT8_C( 39), INT8_C( 111), -INT8_C( 24), -INT8_C( 14), -INT8_C( 93), -INT8_C( 123), -INT8_C( 52), -INT8_C( 50), -INT8_C( 64), -INT8_C( 97), -INT8_C( 125), INT8_C( 101), -INT8_C( 81), -INT8_C( 32), INT8_C( 59), INT8_C( 29), -INT8_C( 98), -INT8_C( 44), -INT8_C( 3), -INT8_C( 69), -INT8_C( 44), -INT8_C( 122), INT8_C( 34), INT8_C( 113), -INT8_C( 84), INT8_C( 41), INT8_C( 37), -INT8_C( 70), INT8_C( 9), -INT8_C( 96), INT8_C( 96), INT8_C( 48), INT8_C( 15), INT8_C( 73), INT8_C( 34), -INT8_C( 78), -INT8_C( 50), -INT8_C( 18), INT8_MIN, -INT8_C( 113), -INT8_C( 115), INT8_C( 4), -INT8_C( 12), INT8_C( 60), -INT8_C( 28), INT8_C( 48), INT8_C( 90), -INT8_C( 126), INT8_C( 4), INT8_C( 87), INT8_C( 61), -INT8_C( 40), -INT8_C( 35), INT8_C( 95), INT8_C( 73) }, UINT64_C(14025351156619708553), { INT8_C( 29), -INT8_C( 19), -INT8_C( 28), -INT8_C( 49), -INT8_C( 68), -INT8_C( 45), INT8_C( 79), INT8_C( 75), INT8_C( 96), INT8_C( 83), INT8_C( 63), -INT8_C( 99), INT8_C( 55), INT8_C( 111), -INT8_C( 9), -INT8_C( 71), INT8_C( 115), INT8_C( 78), -INT8_C( 10), INT8_C( 75), INT8_C( 43), INT8_C( 86), -INT8_C( 108), -INT8_C( 75), -INT8_C( 34), INT8_C( 2), -INT8_C( 7), INT8_C( 112), INT8_C( 16), -INT8_C( 99), INT8_C( 50), INT8_C( 45), -INT8_C( 117), INT8_C( 22), -INT8_C( 4), INT8_C( 71), -INT8_C( 23), INT8_C( 76), -INT8_C( 110), INT8_C( 74), -INT8_C( 97), -INT8_C( 47), -INT8_C( 25), -INT8_C( 41), INT8_C( 65), -INT8_C( 34), -INT8_C( 112), -INT8_C( 76), INT8_C( 44), -INT8_C( 121), INT8_C( 0), INT8_C( 87), -INT8_C( 35), -INT8_C( 108), INT8_C( 12), -INT8_C( 69), -INT8_C( 105), INT8_C( 5), INT8_C( 43), -INT8_C( 89), -INT8_C( 93), INT8_C( 93), -INT8_C( 43), INT8_C( 46) }, { INT8_C( 116), -INT8_C( 47), INT8_C( 117), INT8_C( 93), INT8_C( 29), INT8_C( 7), -INT8_C( 89), -INT8_C( 67), -INT8_C( 40), -INT8_C( 114), -INT8_C( 108), INT8_C( 25), INT8_C( 108), INT8_C( 36), -INT8_C( 50), -INT8_C( 104), -INT8_C( 85), -INT8_C( 50), -INT8_C( 16), -INT8_C( 120), INT8_C( 98), -INT8_C( 4), INT8_C( 68), -INT8_C( 7), INT8_C( 2), INT8_C( 111), -INT8_C( 95), -INT8_C( 91), -INT8_C( 51), INT8_C( 118), -INT8_C( 45), INT8_C( 65), INT8_C( 71), INT8_C( 72), -INT8_C( 98), INT8_C( 101), INT8_C( 79), INT8_C( 70), INT8_C( 34), INT8_C( 39), -INT8_C( 44), -INT8_C( 74), INT8_C( 65), INT8_C( 65), -INT8_C( 38), INT8_C( 15), -INT8_C( 39), -INT8_C( 122), -INT8_C( 35), -INT8_C( 55), INT8_C( 14), INT8_C( 63), -INT8_C( 58), INT8_C( 82), INT8_C( 57), -INT8_C( 56), -INT8_C( 62), -INT8_C( 38), INT8_C( 109), -INT8_C( 113), INT8_C( 80), INT8_C( 64), -INT8_C( 48), -INT8_C( 105) }, { INT8_C( 29), -INT8_C( 99), INT8_C( 37), -INT8_C( 49), -INT8_C( 76), INT8_C( 14), -INT8_C( 32), -INT8_C( 67), -INT8_C( 90), INT8_C( 39), INT8_C( 111), -INT8_C( 99), -INT8_C( 14), -INT8_C( 93), -INT8_C( 123), -INT8_C( 104), -INT8_C( 50), -INT8_C( 50), -INT8_C( 16), -INT8_C( 120), INT8_C( 101), -INT8_C( 4), -INT8_C( 108), INT8_C( 59), INT8_C( 29), -INT8_C( 98), -INT8_C( 95), -INT8_C( 3), -INT8_C( 69), -INT8_C( 44), -INT8_C( 45), INT8_C( 34), -INT8_C( 117), -INT8_C( 84), INT8_C( 41), INT8_C( 37), -INT8_C( 23), INT8_C( 9), -INT8_C( 96), INT8_C( 39), INT8_C( 48), -INT8_C( 74), -INT8_C( 25), -INT8_C( 41), -INT8_C( 78), -INT8_C( 50), -INT8_C( 18), INT8_MIN, -INT8_C( 113), -INT8_C( 115), INT8_C( 0), -INT8_C( 12), INT8_C( 60), -INT8_C( 108), INT8_C( 48), -INT8_C( 69), -INT8_C( 126), -INT8_C( 38), INT8_C( 87), INT8_C( 61), -INT8_C( 40), -INT8_C( 35), -INT8_C( 48), -INT8_C( 105) } }, { { -INT8_C( 120), INT8_C( 110), -INT8_C( 4), -INT8_C( 41), -INT8_C( 76), INT8_C( 30), -INT8_C( 2), -INT8_C( 119), -INT8_C( 44), INT8_C( 63), -INT8_C( 54), -INT8_C( 81), INT8_C( 78), -INT8_C( 93), INT8_C( 53), INT8_C( 43), INT8_C( 109), INT8_C( 67), INT8_C( 107), INT8_C( 51), -INT8_C( 106), -INT8_C( 92), -INT8_C( 5), INT8_C( 88), INT8_C( 126), INT8_C( 104), -INT8_C( 25), -INT8_C( 50), -INT8_C( 88), -INT8_C( 73), INT8_C( 101), INT8_C( 48), INT8_C( 37), INT8_C( 98), INT8_C( 7), -INT8_C( 38), INT8_MIN, INT8_C( 5), INT8_C( 99), INT8_C( 85), INT8_C( 69), INT8_C( 45), INT8_C( 4), -INT8_C( 109), -INT8_C( 48), INT8_C( 57), -INT8_C( 65), INT8_C( 61), INT8_C( 124), INT8_C( 42), INT8_C( 112), INT8_C( 18), -INT8_C( 50), INT8_C( 107), INT8_C( 106), INT8_C( 76), -INT8_C( 45), INT8_C( 81), INT8_C( 26), INT8_C( 123), INT8_C( 8), INT8_MAX, -INT8_C( 85), INT8_C( 46) }, UINT64_C(18282199651996709601), { -INT8_C( 104), -INT8_C( 69), -INT8_C( 112), INT8_C( 104), -INT8_C( 12), INT8_C( 79), -INT8_C( 90), INT8_C( 112), INT8_C( 121), INT8_C( 22), -INT8_C( 125), INT8_C( 71), -INT8_C( 126), -INT8_C( 19), -INT8_C( 109), INT8_C( 85), INT8_C( 63), -INT8_C( 83), -INT8_C( 47), INT8_C( 71), INT8_C( 45), INT8_C( 124), INT8_C( 117), INT8_C( 14), INT8_C( 47), INT8_C( 125), INT8_C( 112), -INT8_C( 25), -INT8_C( 24), INT8_C( 39), -INT8_C( 28), INT8_MIN, -INT8_C( 30), INT8_C( 116), -INT8_C( 23), -INT8_C( 42), -INT8_C( 60), -INT8_C( 113), INT8_C( 71), INT8_C( 61), -INT8_C( 91), -INT8_C( 54), -INT8_C( 123), INT8_C( 39), -INT8_C( 73), INT8_C( 24), INT8_C( 125), -INT8_C( 10), -INT8_C( 58), INT8_C( 78), INT8_C( 62), -INT8_C( 13), -INT8_C( 54), -INT8_C( 77), INT8_C( 1), -INT8_C( 7), INT8_C( 49), INT8_C( 114), -INT8_C( 32), INT8_C( 25), -INT8_C( 103), -INT8_C( 60), -INT8_C( 102), INT8_C( 124) }, { INT8_C( 57), -INT8_C( 125), INT8_C( 82), -INT8_C( 3), INT8_C( 18), -INT8_C( 103), INT8_C( 58), -INT8_C( 73), INT8_C( 99), -INT8_C( 65), -INT8_C( 33), INT8_C( 27), -INT8_C( 40), INT8_C( 92), INT8_C( 17), -INT8_C( 98), -INT8_C( 86), INT8_C( 79), -INT8_C( 111), INT8_C( 116), INT8_C( 3), -INT8_C( 110), INT8_C( 110), INT8_C( 52), INT8_C( 4), INT8_C( 78), INT8_C( 77), -INT8_C( 98), INT8_C( 19), -INT8_C( 25), INT8_C( 26), INT8_C( 76), INT8_C( 106), INT8_C( 108), INT8_C( 73), INT8_C( 124), INT8_C( 6), -INT8_C( 125), INT8_C( 52), INT8_C( 105), INT8_C( 67), INT8_C( 19), -INT8_C( 124), INT8_C( 27), INT8_C( 111), -INT8_C( 106), -INT8_C( 71), INT8_C( 25), -INT8_C( 27), INT8_C( 74), -INT8_C( 115), -INT8_C( 24), -INT8_C( 36), -INT8_C( 5), INT8_C( 28), -INT8_C( 31), INT8_C( 74), INT8_C( 106), INT8_MAX, INT8_C( 93), INT8_C( 81), -INT8_C( 103), -INT8_C( 87), -INT8_C( 68) }, { -INT8_C( 104), INT8_C( 110), -INT8_C( 4), -INT8_C( 41), -INT8_C( 76), -INT8_C( 103), -INT8_C( 90), -INT8_C( 73), -INT8_C( 44), -INT8_C( 65), -INT8_C( 54), -INT8_C( 81), -INT8_C( 126), -INT8_C( 19), INT8_C( 53), -INT8_C( 98), INT8_C( 109), INT8_C( 67), INT8_C( 107), INT8_C( 71), -INT8_C( 106), -INT8_C( 92), -INT8_C( 5), INT8_C( 88), INT8_C( 126), INT8_C( 78), -INT8_C( 25), -INT8_C( 50), -INT8_C( 88), -INT8_C( 25), -INT8_C( 28), INT8_C( 48), INT8_C( 37), INT8_C( 98), INT8_C( 7), -INT8_C( 42), -INT8_C( 60), -INT8_C( 125), INT8_C( 99), INT8_C( 61), -INT8_C( 91), -INT8_C( 54), INT8_C( 4), INT8_C( 27), -INT8_C( 48), -INT8_C( 106), -INT8_C( 71), INT8_C( 61), -INT8_C( 58), INT8_C( 74), -INT8_C( 115), INT8_C( 18), -INT8_C( 54), -INT8_C( 77), INT8_C( 106), -INT8_C( 31), INT8_C( 49), INT8_C( 81), -INT8_C( 32), INT8_C( 25), -INT8_C( 103), -INT8_C( 103), -INT8_C( 102), -INT8_C( 68) } }, { { INT8_C( 5), -INT8_C( 14), INT8_C( 56), INT8_C( 11), INT8_C( 117), INT8_C( 108), INT8_C( 117), -INT8_C( 72), INT8_MAX, -INT8_C( 7), -INT8_C( 45), -INT8_C( 18), -INT8_C( 113), -INT8_C( 116), INT8_C( 7), INT8_C( 117), -INT8_C( 42), -INT8_C( 107), INT8_C( 93), -INT8_C( 77), -INT8_C( 112), INT8_C( 122), -INT8_C( 108), -INT8_C( 38), -INT8_C( 28), INT8_C( 19), INT8_C( 55), INT8_C( 53), -INT8_C( 84), -INT8_C( 32), -INT8_C( 15), -INT8_C( 79), -INT8_C( 46), INT8_C( 42), -INT8_C( 67), INT8_C( 72), -INT8_C( 106), INT8_C( 50), INT8_C( 0), INT8_C( 22), INT8_C( 43), -INT8_C( 44), INT8_C( 4), -INT8_C( 69), INT8_C( 96), INT8_C( 12), INT8_C( 48), INT8_C( 55), -INT8_C( 95), -INT8_C( 115), -INT8_C( 22), INT8_C( 49), INT8_C( 7), INT8_C( 126), INT8_C( 12), -INT8_C( 21), -INT8_C( 111), INT8_C( 67), INT8_C( 33), INT8_C( 61), INT8_C( 36), INT8_C( 18), -INT8_C( 18), -INT8_C( 10) }, UINT64_C( 714172237879356220), { INT8_C( 19), -INT8_C( 19), -INT8_C( 60), INT8_C( 115), -INT8_C( 7), -INT8_C( 12), -INT8_C( 86), -INT8_C( 102), -INT8_C( 127), -INT8_C( 108), -INT8_C( 52), -INT8_C( 119), INT8_C( 18), -INT8_C( 40), INT8_C( 116), -INT8_C( 93), INT8_C( 27), -INT8_C( 107), -INT8_C( 32), INT8_C( 63), -INT8_C( 88), -INT8_C( 49), INT8_C( 54), -INT8_C( 28), INT8_C( 122), INT8_C( 116), -INT8_C( 73), INT8_C( 88), -INT8_C( 77), -INT8_C( 96), INT8_C( 97), -INT8_C( 58), -INT8_C( 114), INT8_C( 37), INT8_C( 58), -INT8_C( 121), INT8_C( 25), -INT8_C( 28), INT8_C( 34), -INT8_C( 102), INT8_C( 121), -INT8_C( 18), INT8_C( 35), -INT8_C( 117), -INT8_C( 58), -INT8_C( 104), INT8_C( 47), -INT8_C( 31), INT8_C( 45), INT8_C( 15), INT8_C( 33), -INT8_C( 43), -INT8_C( 34), INT8_C( 87), -INT8_C( 70), INT8_C( 89), -INT8_C( 53), INT8_C( 113), -INT8_C( 79), INT8_MAX, INT8_C( 18), INT8_C( 18), INT8_C( 69), -INT8_C( 96) }, { INT8_C( 55), INT8_MAX, INT8_C( 39), INT8_C( 80), INT8_C( 100), INT8_C( 73), -INT8_C( 22), -INT8_C( 35), INT8_C( 55), INT8_C( 14), INT8_C( 104), -INT8_C( 3), -INT8_C( 90), -INT8_C( 105), -INT8_C( 33), -INT8_C( 45), -INT8_C( 89), INT8_C( 0), -INT8_C( 87), -INT8_C( 123), INT8_C( 87), INT8_C( 99), -INT8_C( 34), INT8_C( 34), -INT8_C( 44), -INT8_C( 113), -INT8_C( 95), -INT8_C( 26), -INT8_C( 95), -INT8_C( 25), -INT8_C( 122), -INT8_C( 40), INT8_C( 102), -INT8_C( 82), INT8_C( 40), -INT8_C( 54), -INT8_C( 9), INT8_C( 19), -INT8_C( 89), INT8_C( 47), INT8_C( 33), INT8_C( 16), INT8_C( 44), -INT8_C( 57), -INT8_C( 89), INT8_C( 11), -INT8_C( 102), INT8_C( 78), INT8_C( 11), INT8_C( 67), -INT8_C( 44), INT8_C( 98), -INT8_C( 90), -INT8_C( 78), -INT8_C( 123), INT8_C( 123), INT8_C( 66), INT8_C( 38), INT8_C( 97), -INT8_C( 29), INT8_C( 13), -INT8_C( 24), -INT8_C( 68), INT8_C( 116) }, { INT8_C( 5), -INT8_C( 14), -INT8_C( 60), INT8_C( 80), -INT8_C( 7), -INT8_C( 12), INT8_C( 117), -INT8_C( 72), -INT8_C( 127), -INT8_C( 108), -INT8_C( 45), -INT8_C( 119), -INT8_C( 113), -INT8_C( 105), INT8_C( 7), -INT8_C( 93), -INT8_C( 42), -INT8_C( 107), -INT8_C( 87), -INT8_C( 123), -INT8_C( 88), -INT8_C( 49), -INT8_C( 108), -INT8_C( 38), -INT8_C( 44), -INT8_C( 113), INT8_C( 55), INT8_C( 53), -INT8_C( 95), -INT8_C( 32), -INT8_C( 122), -INT8_C( 58), -INT8_C( 114), INT8_C( 42), INT8_C( 40), -INT8_C( 121), -INT8_C( 9), INT8_C( 50), -INT8_C( 89), -INT8_C( 102), INT8_C( 33), -INT8_C( 18), INT8_C( 35), -INT8_C( 117), -INT8_C( 89), -INT8_C( 104), INT8_C( 48), INT8_C( 55), INT8_C( 11), -INT8_C( 115), -INT8_C( 22), -INT8_C( 43), INT8_C( 7), -INT8_C( 78), -INT8_C( 123), INT8_C( 89), -INT8_C( 53), INT8_C( 67), INT8_C( 33), -INT8_C( 29), INT8_C( 36), INT8_C( 18), -INT8_C( 18), -INT8_C( 10) } }, { { -INT8_C( 106), -INT8_C( 28), INT8_C( 62), -INT8_C( 115), -INT8_C( 9), -INT8_C( 26), -INT8_C( 68), INT8_C( 24), -INT8_C( 10), -INT8_C( 23), -INT8_C( 33), -INT8_C( 99), -INT8_C( 12), INT8_C( 122), -INT8_C( 20), INT8_C( 0), -INT8_C( 67), -INT8_C( 64), INT8_C( 98), INT8_C( 100), INT8_C( 114), -INT8_C( 25), -INT8_C( 33), -INT8_C( 76), INT8_C( 14), INT8_C( 64), -INT8_C( 104), INT8_C( 27), INT8_C( 40), INT8_C( 84), -INT8_C( 113), -INT8_C( 66), INT8_C( 56), -INT8_C( 50), INT8_C( 76), INT8_C( 48), -INT8_C( 76), INT8_C( 8), INT8_C( 72), -INT8_C( 86), -INT8_C( 15), INT8_C( 40), INT8_C( 71), -INT8_C( 26), -INT8_C( 94), INT8_C( 51), -INT8_C( 26), INT8_C( 95), -INT8_C( 13), INT8_C( 72), -INT8_C( 61), INT8_C( 102), INT8_C( 48), -INT8_C( 94), INT8_C( 26), INT8_C( 62), -INT8_C( 29), -INT8_C( 78), INT8_C( 89), INT8_C( 11), INT8_C( 6), -INT8_C( 23), -INT8_C( 54), INT8_C( 63) }, UINT64_C( 1159033820397115063), { -INT8_C( 33), INT8_C( 92), -INT8_C( 10), -INT8_C( 127), -INT8_C( 112), -INT8_C( 36), -INT8_C( 31), -INT8_C( 125), INT8_C( 36), -INT8_C( 92), -INT8_C( 23), INT8_C( 84), INT8_C( 71), INT8_C( 4), -INT8_C( 110), INT8_C( 42), -INT8_C( 74), -INT8_C( 20), INT8_C( 53), -INT8_C( 67), -INT8_C( 43), -INT8_C( 1), -INT8_C( 4), -INT8_C( 116), INT8_C( 21), INT8_C( 107), -INT8_C( 9), INT8_C( 52), INT8_C( 34), INT8_C( 12), INT8_C( 68), INT8_C( 2), INT8_C( 104), INT8_C( 58), -INT8_C( 125), -INT8_C( 8), INT8_C( 22), INT8_C( 100), INT8_C( 124), INT8_C( 58), INT8_C( 9), INT8_C( 101), -INT8_C( 113), INT8_C( 80), INT8_C( 105), INT8_C( 33), INT8_C( 122), INT8_C( 32), INT8_C( 13), -INT8_C( 81), -INT8_C( 35), -INT8_C( 30), -INT8_C( 81), -INT8_C( 39), INT8_C( 110), -INT8_C( 60), INT8_C( 68), INT8_C( 101), -INT8_C( 8), INT8_C( 102), INT8_C( 113), INT8_C( 60), INT8_C( 104), -INT8_C( 38) }, { INT8_C( 118), -INT8_C( 20), -INT8_C( 46), -INT8_C( 116), INT8_C( 80), INT8_C( 78), -INT8_C( 57), INT8_C( 89), -INT8_C( 76), INT8_C( 86), -INT8_C( 87), INT8_C( 29), INT8_C( 119), INT8_C( 35), INT8_C( 61), -INT8_C( 123), -INT8_C( 45), INT8_C( 26), INT8_C( 103), -INT8_C( 126), -INT8_C( 13), -INT8_C( 42), INT8_C( 70), INT8_C( 55), INT8_C( 59), INT8_C( 63), -INT8_C( 98), -INT8_C( 83), INT8_C( 123), INT8_C( 6), -INT8_C( 121), -INT8_C( 14), -INT8_C( 14), INT8_C( 89), INT8_C( 126), INT8_C( 67), -INT8_C( 88), INT8_C( 69), -INT8_C( 100), INT8_C( 92), -INT8_C( 101), INT8_C( 70), INT8_C( 121), INT8_C( 19), INT8_C( 105), -INT8_C( 73), -INT8_C( 104), INT8_C( 60), -INT8_C( 47), -INT8_C( 1), -INT8_C( 66), -INT8_C( 59), -INT8_C( 43), INT8_C( 5), -INT8_C( 4), INT8_C( 17), INT8_C( 68), -INT8_C( 102), -INT8_C( 66), -INT8_C( 65), -INT8_C( 95), INT8_C( 69), -INT8_C( 79), -INT8_C( 109) }, { -INT8_C( 33), -INT8_C( 20), -INT8_C( 46), -INT8_C( 115), -INT8_C( 112), -INT8_C( 36), -INT8_C( 68), -INT8_C( 125), -INT8_C( 10), -INT8_C( 92), -INT8_C( 87), -INT8_C( 99), INT8_C( 71), INT8_C( 122), -INT8_C( 20), INT8_C( 0), -INT8_C( 74), -INT8_C( 20), INT8_C( 53), -INT8_C( 126), INT8_C( 114), -INT8_C( 42), -INT8_C( 4), -INT8_C( 76), INT8_C( 21), INT8_C( 63), -INT8_C( 104), -INT8_C( 83), INT8_C( 40), INT8_C( 6), -INT8_C( 121), -INT8_C( 66), INT8_C( 56), INT8_C( 58), -INT8_C( 125), -INT8_C( 8), -INT8_C( 88), INT8_C( 8), INT8_C( 72), -INT8_C( 86), -INT8_C( 101), INT8_C( 70), -INT8_C( 113), -INT8_C( 26), INT8_C( 105), -INT8_C( 73), -INT8_C( 26), INT8_C( 32), -INT8_C( 47), INT8_C( 72), -INT8_C( 66), INT8_C( 102), -INT8_C( 81), -INT8_C( 94), INT8_C( 26), INT8_C( 62), -INT8_C( 29), -INT8_C( 78), INT8_C( 89), INT8_C( 11), -INT8_C( 95), -INT8_C( 23), -INT8_C( 54), INT8_C( 63) } }, { { -INT8_C( 98), INT8_C( 48), -INT8_C( 42), INT8_C( 70), INT8_C( 117), INT8_C( 115), -INT8_C( 94), INT8_C( 17), -INT8_C( 71), INT8_C( 28), INT8_C( 36), INT8_C( 34), -INT8_C( 45), -INT8_C( 68), INT8_C( 95), -INT8_C( 92), -INT8_C( 69), INT8_C( 29), INT8_C( 105), -INT8_C( 111), INT8_C( 34), INT8_C( 102), -INT8_C( 94), INT8_C( 102), INT8_C( 0), INT8_C( 96), INT8_C( 38), -INT8_C( 95), -INT8_C( 91), -INT8_C( 41), INT8_C( 53), INT8_C( 67), INT8_C( 7), INT8_C( 11), -INT8_C( 118), INT8_C( 125), INT8_C( 126), INT8_C( 44), -INT8_C( 114), INT8_C( 55), INT8_C( 72), -INT8_C( 78), INT8_C( 90), INT8_C( 27), INT8_C( 110), -INT8_C( 71), -INT8_C( 64), INT8_C( 41), -INT8_C( 42), INT8_C( 41), -INT8_C( 70), -INT8_C( 7), -INT8_C( 113), INT8_C( 92), INT8_C( 95), -INT8_C( 112), -INT8_C( 68), -INT8_C( 123), INT8_C( 49), INT8_C( 97), INT8_C( 93), INT8_C( 102), -INT8_C( 91), INT8_C( 100) }, UINT64_C(11828826861962604402), { INT8_C( 33), -INT8_C( 126), -INT8_C( 65), -INT8_C( 113), INT8_C( 59), INT8_MAX, -INT8_C( 71), INT8_C( 17), -INT8_C( 87), INT8_C( 115), INT8_C( 10), INT8_C( 56), -INT8_C( 48), INT8_C( 106), -INT8_C( 56), -INT8_C( 116), -INT8_C( 17), -INT8_C( 6), -INT8_C( 18), INT8_C( 76), INT8_C( 96), -INT8_C( 109), -INT8_C( 79), -INT8_C( 46), -INT8_C( 62), -INT8_C( 110), -INT8_C( 61), INT8_C( 29), INT8_C( 2), -INT8_C( 21), -INT8_C( 63), INT8_C( 35), INT8_C( 109), -INT8_C( 127), -INT8_C( 77), -INT8_C( 88), INT8_C( 0), INT8_C( 108), -INT8_C( 71), -INT8_C( 87), -INT8_C( 33), -INT8_C( 60), -INT8_C( 30), -INT8_C( 81), INT8_C( 46), -INT8_C( 86), INT8_C( 60), INT8_C( 29), -INT8_C( 92), INT8_C( 42), INT8_C( 106), INT8_C( 5), -INT8_C( 67), INT8_C( 27), -INT8_C( 41), INT8_MAX, -INT8_C( 83), -INT8_C( 102), -INT8_C( 100), -INT8_C( 81), -INT8_C( 123), INT8_C( 94), -INT8_C( 45), -INT8_C( 14) }, { -INT8_C( 33), -INT8_C( 122), -INT8_C( 102), -INT8_C( 33), -INT8_C( 14), INT8_C( 84), -INT8_C( 119), -INT8_C( 47), INT8_C( 24), INT8_C( 107), -INT8_C( 127), INT8_C( 70), INT8_C( 21), -INT8_C( 67), INT8_C( 99), -INT8_C( 70), -INT8_C( 25), -INT8_C( 51), -INT8_C( 65), -INT8_C( 92), -INT8_C( 24), -INT8_C( 106), INT8_C( 35), -INT8_C( 106), INT8_C( 49), -INT8_C( 65), INT8_C( 69), -INT8_C( 74), INT8_C( 29), INT8_C( 24), -INT8_C( 87), -INT8_C( 4), -INT8_C( 98), INT8_C( 67), -INT8_C( 36), -INT8_C( 112), -INT8_C( 105), INT8_C( 101), INT8_C( 98), -INT8_C( 81), -INT8_C( 48), -INT8_C( 29), -INT8_C( 11), -INT8_C( 27), -INT8_C( 96), INT8_C( 89), -INT8_C( 97), -INT8_C( 121), INT8_C( 38), INT8_C( 94), INT8_C( 43), INT8_C( 15), -INT8_C( 11), INT8_C( 78), -INT8_C( 91), INT8_C( 38), INT8_C( 13), -INT8_C( 22), -INT8_C( 36), INT8_C( 43), INT8_C( 3), -INT8_C( 123), INT8_C( 39), -INT8_C( 95) }, { -INT8_C( 98), -INT8_C( 126), -INT8_C( 42), INT8_C( 70), -INT8_C( 14), INT8_C( 84), -INT8_C( 119), INT8_C( 17), -INT8_C( 87), INT8_C( 107), -INT8_C( 127), INT8_C( 56), -INT8_C( 45), -INT8_C( 67), INT8_C( 95), -INT8_C( 92), -INT8_C( 25), INT8_C( 29), INT8_C( 105), -INT8_C( 111), INT8_C( 34), -INT8_C( 109), -INT8_C( 79), -INT8_C( 106), INT8_C( 0), INT8_C( 96), INT8_C( 38), -INT8_C( 95), INT8_C( 2), -INT8_C( 21), -INT8_C( 87), -INT8_C( 4), -INT8_C( 98), -INT8_C( 127), -INT8_C( 118), -INT8_C( 112), -INT8_C( 105), INT8_C( 44), -INT8_C( 71), INT8_C( 55), -INT8_C( 48), -INT8_C( 60), -INT8_C( 30), -INT8_C( 81), INT8_C( 110), -INT8_C( 86), -INT8_C( 97), INT8_C( 41), -INT8_C( 42), INT8_C( 41), -INT8_C( 70), INT8_C( 5), -INT8_C( 113), INT8_C( 27), INT8_C( 95), -INT8_C( 112), -INT8_C( 68), -INT8_C( 123), -INT8_C( 100), INT8_C( 97), INT8_C( 93), -INT8_C( 123), -INT8_C( 91), -INT8_C( 95) } }, { { -INT8_C( 55), INT8_C( 3), INT8_C( 50), INT8_C( 96), INT8_C( 104), -INT8_C( 108), INT8_C( 16), INT8_C( 56), INT8_C( 119), INT8_C( 5), INT8_C( 30), INT8_C( 23), INT8_C( 94), -INT8_C( 67), -INT8_C( 98), -INT8_C( 123), INT8_C( 28), -INT8_C( 55), -INT8_C( 108), INT8_C( 17), INT8_C( 23), INT8_C( 57), INT8_C( 55), INT8_C( 36), INT8_C( 35), INT8_C( 19), INT8_C( 79), INT8_C( 38), -INT8_C( 103), INT8_C( 119), -INT8_C( 56), INT8_C( 98), INT8_C( 122), -INT8_C( 6), -INT8_C( 62), -INT8_C( 29), -INT8_C( 114), -INT8_C( 46), INT8_C( 27), INT8_C( 5), -INT8_C( 40), INT8_C( 57), INT8_C( 28), INT8_C( 54), -INT8_C( 9), -INT8_C( 70), -INT8_C( 69), INT8_C( 19), -INT8_C( 125), INT8_C( 79), INT8_C( 36), -INT8_C( 102), -INT8_C( 120), INT8_C( 91), -INT8_C( 66), -INT8_C( 84), INT8_C( 110), INT8_C( 14), -INT8_C( 46), INT8_C( 7), -INT8_C( 123), -INT8_C( 102), INT8_C( 105), -INT8_C( 1) }, UINT64_C(15431583015668690068), { INT8_C( 55), INT8_C( 67), INT8_C( 13), INT8_C( 46), -INT8_C( 3), -INT8_C( 56), INT8_C( 65), INT8_MIN, INT8_C( 24), INT8_C( 101), INT8_C( 26), -INT8_C( 96), -INT8_C( 64), -INT8_C( 39), INT8_C( 76), INT8_C( 47), -INT8_C( 25), INT8_C( 31), INT8_C( 54), INT8_C( 108), -INT8_C( 71), -INT8_C( 96), INT8_C( 107), INT8_C( 78), -INT8_C( 52), INT8_C( 78), INT8_C( 112), -INT8_C( 54), INT8_C( 76), -INT8_C( 104), -INT8_C( 95), -INT8_C( 125), -INT8_C( 37), -INT8_C( 82), -INT8_C( 78), -INT8_C( 39), INT8_C( 118), -INT8_C( 13), INT8_C( 89), -INT8_C( 114), INT8_C( 89), INT8_C( 116), INT8_C( 47), INT8_C( 25), INT8_C( 77), INT8_C( 123), INT8_C( 72), INT8_C( 52), -INT8_C( 102), INT8_MAX, -INT8_C( 96), INT8_C( 84), INT8_C( 31), INT8_C( 11), -INT8_C( 94), -INT8_C( 21), INT8_C( 89), INT8_C( 18), -INT8_C( 75), -INT8_C( 91), -INT8_C( 86), INT8_C( 86), INT8_C( 41), -INT8_C( 122) }, { INT8_C( 4), -INT8_C( 37), INT8_C( 95), INT8_C( 123), -INT8_C( 50), -INT8_C( 72), INT8_C( 9), INT8_C( 39), INT8_C( 44), INT8_C( 56), INT8_C( 65), INT8_C( 121), -INT8_C( 76), -INT8_C( 119), -INT8_C( 83), INT8_C( 78), INT8_C( 8), INT8_C( 77), -INT8_C( 94), INT8_C( 39), INT8_C( 89), INT8_C( 68), INT8_C( 18), -INT8_C( 78), INT8_C( 87), -INT8_C( 56), INT8_C( 88), INT8_C( 1), INT8_C( 30), -INT8_C( 127), -INT8_C( 121), INT8_C( 35), INT8_C( 92), -INT8_C( 26), -INT8_C( 98), INT8_C( 42), -INT8_C( 97), -INT8_C( 89), INT8_C( 82), -INT8_C( 53), -INT8_C( 32), -INT8_C( 109), INT8_C( 69), -INT8_C( 108), INT8_C( 28), -INT8_C( 14), -INT8_C( 30), INT8_C( 37), INT8_C( 64), -INT8_C( 123), INT8_C( 76), -INT8_C( 103), -INT8_C( 55), INT8_C( 95), INT8_C( 75), INT8_C( 32), INT8_C( 39), -INT8_C( 93), INT8_C( 34), INT8_C( 69), INT8_C( 36), -INT8_C( 87), INT8_C( 104), INT8_MIN }, { -INT8_C( 55), INT8_C( 3), INT8_C( 13), INT8_C( 96), -INT8_C( 50), -INT8_C( 108), INT8_C( 16), INT8_MIN, INT8_C( 119), INT8_C( 5), INT8_C( 26), -INT8_C( 96), INT8_C( 94), -INT8_C( 119), -INT8_C( 98), -INT8_C( 123), INT8_C( 28), INT8_C( 31), -INT8_C( 108), INT8_C( 17), INT8_C( 23), -INT8_C( 96), INT8_C( 18), -INT8_C( 78), INT8_C( 35), -INT8_C( 56), INT8_C( 79), INT8_C( 38), -INT8_C( 103), -INT8_C( 127), -INT8_C( 56), INT8_C( 98), INT8_C( 122), -INT8_C( 82), -INT8_C( 98), -INT8_C( 39), -INT8_C( 97), -INT8_C( 89), INT8_C( 82), -INT8_C( 114), -INT8_C( 40), -INT8_C( 109), INT8_C( 47), -INT8_C( 108), INT8_C( 28), -INT8_C( 14), -INT8_C( 30), INT8_C( 37), -INT8_C( 102), -INT8_C( 123), -INT8_C( 96), -INT8_C( 102), -INT8_C( 120), INT8_C( 11), -INT8_C( 66), -INT8_C( 84), INT8_C( 110), -INT8_C( 93), -INT8_C( 75), INT8_C( 7), -INT8_C( 86), -INT8_C( 102), INT8_C( 41), INT8_MIN } }, { { -INT8_C( 112), INT8_C( 6), -INT8_C( 85), INT8_C( 47), -INT8_C( 82), -INT8_C( 3), -INT8_C( 6), -INT8_C( 114), -INT8_C( 112), INT8_C( 63), INT8_C( 34), -INT8_C( 84), INT8_C( 50), INT8_C( 4), -INT8_C( 47), INT8_C( 114), -INT8_C( 119), INT8_C( 30), INT8_C( 11), INT8_C( 83), INT8_C( 125), INT8_C( 86), INT8_C( 115), -INT8_C( 92), -INT8_C( 6), -INT8_C( 107), -INT8_C( 23), INT8_C( 30), INT8_C( 63), INT8_C( 82), -INT8_C( 97), -INT8_C( 49), INT8_C( 88), INT8_C( 74), -INT8_C( 2), INT8_C( 6), INT8_C( 71), -INT8_C( 8), -INT8_C( 108), -INT8_C( 41), INT8_C( 56), -INT8_C( 74), -INT8_C( 125), INT8_C( 106), -INT8_C( 69), INT8_C( 85), -INT8_C( 36), INT8_C( 68), INT8_C( 115), -INT8_C( 25), -INT8_C( 105), -INT8_C( 16), INT8_C( 61), INT8_C( 11), -INT8_C( 108), INT8_C( 55), -INT8_C( 96), INT8_C( 125), INT8_C( 86), -INT8_C( 33), -INT8_C( 49), -INT8_C( 11), -INT8_C( 82), INT8_C( 40) }, UINT64_C(15951120570904390719), { INT8_C( 121), -INT8_C( 32), INT8_C( 71), INT8_C( 52), INT8_C( 53), INT8_C( 35), INT8_C( 121), -INT8_C( 88), INT8_C( 10), INT8_C( 16), -INT8_C( 104), INT8_C( 71), INT8_C( 27), INT8_C( 44), INT8_MAX, -INT8_C( 68), -INT8_C( 86), -INT8_C( 43), -INT8_C( 101), INT8_C( 121), -INT8_C( 54), INT8_C( 74), -INT8_C( 95), INT8_C( 9), -INT8_C( 10), -INT8_C( 48), -INT8_C( 113), -INT8_C( 101), -INT8_C( 109), -INT8_C( 20), INT8_C( 120), INT8_C( 12), -INT8_C( 52), -INT8_C( 65), INT8_C( 65), INT8_C( 2), -INT8_C( 30), -INT8_C( 70), -INT8_C( 86), -INT8_C( 20), -INT8_C( 54), INT8_C( 67), INT8_C( 52), -INT8_C( 26), INT8_C( 111), -INT8_C( 77), -INT8_C( 94), INT8_C( 25), -INT8_C( 120), INT8_C( 61), -INT8_C( 109), INT8_C( 82), -INT8_C( 121), INT8_C( 52), INT8_C( 91), INT8_C( 126), INT8_C( 4), -INT8_C( 22), INT8_C( 25), -INT8_C( 105), -INT8_C( 42), -INT8_C( 110), -INT8_C( 92), -INT8_C( 94) }, { INT8_C( 81), -INT8_C( 27), -INT8_C( 92), INT8_C( 52), -INT8_C( 97), INT8_C( 79), INT8_C( 32), INT8_C( 105), -INT8_C( 110), INT8_C( 84), INT8_C( 79), INT8_C( 1), INT8_C( 7), -INT8_C( 15), INT8_C( 27), -INT8_C( 113), INT8_C( 47), -INT8_C( 82), -INT8_C( 31), -INT8_C( 74), -INT8_C( 30), INT8_C( 60), INT8_C( 52), -INT8_C( 25), INT8_C( 38), INT8_C( 78), INT8_C( 126), -INT8_C( 4), -INT8_C( 32), INT8_C( 34), -INT8_C( 97), INT8_C( 49), INT8_C( 7), INT8_C( 67), INT8_C( 101), -INT8_C( 90), -INT8_C( 110), -INT8_C( 122), INT8_C( 16), INT8_C( 36), -INT8_C( 38), INT8_C( 95), INT8_C( 38), -INT8_C( 30), INT8_C( 81), INT8_C( 65), INT8_C( 113), INT8_MIN, -INT8_C( 17), INT8_C( 83), INT8_C( 54), -INT8_C( 47), -INT8_C( 113), INT8_C( 107), -INT8_C( 72), -INT8_C( 74), -INT8_C( 71), INT8_C( 55), -INT8_C( 78), -INT8_C( 103), INT8_C( 89), INT8_C( 81), -INT8_C( 54), INT8_C( 97) }, { INT8_C( 81), -INT8_C( 32), -INT8_C( 92), INT8_C( 52), -INT8_C( 97), INT8_C( 35), -INT8_C( 6), -INT8_C( 114), -INT8_C( 112), INT8_C( 63), -INT8_C( 104), INT8_C( 1), INT8_C( 50), -INT8_C( 15), -INT8_C( 47), -INT8_C( 113), -INT8_C( 119), -INT8_C( 82), -INT8_C( 101), -INT8_C( 74), INT8_C( 125), INT8_C( 60), INT8_C( 115), -INT8_C( 92), -INT8_C( 6), -INT8_C( 48), -INT8_C( 113), INT8_C( 30), INT8_C( 63), INT8_C( 82), -INT8_C( 97), INT8_C( 12), -INT8_C( 52), INT8_C( 74), INT8_C( 65), INT8_C( 6), INT8_C( 71), -INT8_C( 122), -INT8_C( 108), -INT8_C( 20), -INT8_C( 54), INT8_C( 67), -INT8_C( 125), INT8_C( 106), -INT8_C( 69), INT8_C( 85), -INT8_C( 94), INT8_MIN, -INT8_C( 120), -INT8_C( 25), -INT8_C( 109), -INT8_C( 47), -INT8_C( 121), INT8_C( 11), -INT8_C( 72), INT8_C( 55), -INT8_C( 71), INT8_C( 125), -INT8_C( 78), -INT8_C( 105), -INT8_C( 42), -INT8_C( 11), -INT8_C( 92), -INT8_C( 94) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi8(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_mask_min_epi8(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_min_epi8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask64 k; const int8_t a[64]; const int8_t b[64]; const int8_t r[64]; } test_vec[] = { { UINT64_C(14163406931381216590), { INT8_C( 107), INT8_C( 119), -INT8_C( 72), INT8_C( 51), -INT8_C( 83), -INT8_C( 51), INT8_C( 101), INT8_C( 96), -INT8_C( 35), INT8_C( 42), -INT8_C( 114), -INT8_C( 41), -INT8_C( 96), -INT8_C( 15), -INT8_C( 3), INT8_C( 112), -INT8_C( 7), INT8_C( 37), INT8_C( 15), -INT8_C( 120), -INT8_C( 21), INT8_C( 107), INT8_C( 77), INT8_C( 58), -INT8_C( 48), -INT8_C( 107), INT8_C( 100), INT8_C( 97), INT8_C( 29), -INT8_C( 14), INT8_C( 37), -INT8_C( 120), INT8_C( 105), -INT8_C( 35), -INT8_C( 68), INT8_C( 22), -INT8_C( 86), INT8_C( 33), INT8_C( 118), -INT8_C( 121), INT8_C( 75), INT8_C( 4), INT8_C( 94), -INT8_C( 21), -INT8_C( 10), INT8_C( 91), INT8_C( 91), -INT8_C( 17), INT8_MIN, INT8_C( 106), INT8_C( 119), INT8_C( 108), -INT8_C( 42), -INT8_C( 60), -INT8_C( 90), -INT8_C( 90), INT8_C( 90), INT8_C( 10), INT8_C( 7), INT8_C( 119), -INT8_C( 3), INT8_C( 44), -INT8_C( 1), INT8_C( 102) }, { INT8_C( 9), -INT8_C( 69), INT8_C( 125), -INT8_C( 77), -INT8_C( 36), -INT8_C( 13), INT8_C( 59), INT8_C( 39), -INT8_C( 8), -INT8_C( 103), INT8_C( 19), -INT8_C( 18), -INT8_C( 11), INT8_C( 110), -INT8_C( 35), INT8_C( 117), -INT8_C( 39), INT8_C( 85), -INT8_C( 31), -INT8_C( 81), INT8_C( 25), -INT8_C( 121), INT8_C( 85), INT8_C( 115), -INT8_C( 110), INT8_C( 93), -INT8_C( 22), -INT8_C( 113), -INT8_C( 119), -INT8_C( 22), -INT8_C( 11), -INT8_C( 109), -INT8_C( 91), INT8_C( 114), INT8_C( 70), -INT8_C( 126), INT8_C( 102), -INT8_C( 127), -INT8_C( 87), INT8_C( 94), INT8_C( 27), -INT8_C( 68), INT8_C( 76), INT8_C( 16), INT8_C( 43), INT8_C( 41), -INT8_C( 123), INT8_C( 4), INT8_C( 126), INT8_C( 103), -INT8_C( 77), -INT8_C( 104), -INT8_C( 18), INT8_C( 8), INT8_C( 11), INT8_MIN, INT8_C( 101), -INT8_C( 10), INT8_C( 15), -INT8_C( 17), -INT8_C( 32), INT8_C( 5), -INT8_C( 126), -INT8_C( 123) }, { INT8_C( 0), -INT8_C( 69), -INT8_C( 72), -INT8_C( 77), INT8_C( 0), INT8_C( 0), INT8_C( 59), INT8_C( 0), -INT8_C( 35), INT8_C( 0), -INT8_C( 114), INT8_C( 0), INT8_C( 0), -INT8_C( 15), -INT8_C( 35), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 120), INT8_C( 0), INT8_C( 0), INT8_C( 77), INT8_C( 0), INT8_C( 0), -INT8_C( 107), INT8_C( 0), -INT8_C( 113), INT8_C( 0), -INT8_C( 22), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 86), INT8_C( 0), INT8_C( 0), -INT8_C( 121), INT8_C( 27), -INT8_C( 68), INT8_C( 76), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 17), INT8_C( 0), INT8_C( 103), -INT8_C( 77), -INT8_C( 104), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_C( 7), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 126), -INT8_C( 123) } }, { UINT64_C( 7294618956550621303), { INT8_C( 109), -INT8_C( 121), INT8_C( 117), -INT8_C( 104), -INT8_C( 79), -INT8_C( 6), -INT8_C( 100), INT8_C( 47), INT8_C( 97), INT8_C( 79), -INT8_C( 57), INT8_C( 80), INT8_C( 88), -INT8_C( 45), -INT8_C( 48), -INT8_C( 67), -INT8_C( 55), -INT8_C( 32), -INT8_C( 84), -INT8_C( 87), -INT8_C( 27), INT8_C( 46), INT8_C( 46), INT8_C( 92), -INT8_C( 9), INT8_C( 54), INT8_C( 58), INT8_C( 65), -INT8_C( 25), INT8_C( 117), -INT8_C( 90), INT8_C( 84), -INT8_C( 3), INT8_C( 27), -INT8_C( 19), -INT8_C( 82), INT8_C( 21), -INT8_C( 119), -INT8_C( 35), INT8_C( 119), -INT8_C( 39), -INT8_C( 91), -INT8_C( 57), INT8_C( 49), INT8_C( 120), -INT8_C( 105), -INT8_C( 18), INT8_C( 65), INT8_C( 119), -INT8_C( 101), -INT8_C( 22), INT8_C( 92), -INT8_C( 55), INT8_C( 24), -INT8_C( 71), -INT8_C( 64), INT8_C( 78), -INT8_C( 13), INT8_C( 1), INT8_C( 53), INT8_C( 104), -INT8_C( 89), -INT8_C( 118), INT8_C( 101) }, { -INT8_C( 62), INT8_C( 119), INT8_C( 19), -INT8_C( 40), INT8_C( 0), -INT8_C( 15), INT8_C( 79), -INT8_C( 39), -INT8_C( 106), INT8_C( 22), INT8_C( 10), INT8_C( 14), -INT8_C( 83), -INT8_C( 7), INT8_C( 79), INT8_C( 37), -INT8_C( 108), INT8_C( 57), -INT8_C( 127), INT8_C( 93), INT8_C( 81), INT8_C( 58), INT8_C( 30), -INT8_C( 96), INT8_C( 45), INT8_C( 31), -INT8_C( 43), -INT8_C( 106), -INT8_C( 57), INT8_C( 95), -INT8_C( 5), -INT8_C( 119), -INT8_C( 42), INT8_C( 15), INT8_C( 97), -INT8_C( 41), INT8_C( 0), -INT8_C( 80), -INT8_C( 80), -INT8_C( 106), -INT8_C( 58), -INT8_C( 69), -INT8_C( 92), INT8_C( 116), -INT8_C( 76), -INT8_C( 13), -INT8_C( 103), INT8_C( 72), INT8_C( 44), INT8_C( 26), -INT8_C( 91), INT8_C( 125), INT8_C( 85), -INT8_C( 61), INT8_C( 29), -INT8_C( 126), -INT8_C( 29), -INT8_C( 13), INT8_C( 24), -INT8_C( 86), INT8_C( 82), INT8_C( 20), INT8_C( 51), INT8_C( 41) }, { -INT8_C( 62), -INT8_C( 121), INT8_C( 19), INT8_C( 0), -INT8_C( 79), -INT8_C( 15), -INT8_C( 100), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 14), INT8_C( 0), INT8_C( 0), -INT8_C( 48), -INT8_C( 67), -INT8_C( 108), -INT8_C( 32), -INT8_C( 127), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 9), INT8_C( 0), -INT8_C( 43), -INT8_C( 106), -INT8_C( 57), INT8_C( 0), -INT8_C( 90), -INT8_C( 119), INT8_C( 0), INT8_C( 15), INT8_C( 0), -INT8_C( 82), INT8_C( 0), INT8_C( 0), -INT8_C( 80), INT8_C( 0), -INT8_C( 58), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 76), -INT8_C( 105), INT8_C( 0), INT8_C( 65), INT8_C( 44), -INT8_C( 101), INT8_C( 0), INT8_C( 92), -INT8_C( 55), -INT8_C( 61), INT8_C( 0), INT8_C( 0), -INT8_C( 29), INT8_C( 0), INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 89), -INT8_C( 118), INT8_C( 0) } }, { UINT64_C( 916957810133079331), { INT8_C( 107), INT8_C( 93), INT8_MIN, INT8_C( 31), INT8_C( 80), INT8_C( 25), INT8_C( 103), INT8_C( 124), INT8_C( 51), INT8_C( 13), -INT8_C( 7), -INT8_C( 120), -INT8_C( 48), INT8_C( 23), INT8_C( 11), -INT8_C( 77), INT8_C( 10), INT8_C( 35), INT8_C( 93), INT8_C( 92), INT8_C( 55), -INT8_C( 111), -INT8_C( 123), INT8_C( 90), INT8_C( 38), -INT8_C( 123), INT8_C( 125), INT8_C( 107), INT8_C( 54), INT8_C( 54), INT8_C( 119), -INT8_C( 95), -INT8_C( 109), -INT8_C( 9), -INT8_C( 63), -INT8_C( 29), INT8_C( 16), INT8_C( 40), INT8_C( 95), INT8_C( 68), INT8_C( 53), INT8_C( 89), -INT8_C( 52), INT8_C( 6), INT8_C( 112), -INT8_C( 41), -INT8_C( 71), INT8_C( 122), -INT8_C( 5), INT8_C( 23), -INT8_C( 42), INT8_C( 50), -INT8_C( 88), INT8_C( 92), -INT8_C( 115), -INT8_C( 50), -INT8_C( 31), INT8_C( 10), INT8_C( 57), INT8_C( 23), INT8_C( 65), -INT8_C( 79), -INT8_C( 71), -INT8_C( 44) }, { -INT8_C( 88), INT8_C( 122), -INT8_C( 72), -INT8_C( 71), -INT8_C( 94), INT8_C( 23), -INT8_C( 3), -INT8_C( 40), INT8_C( 112), -INT8_C( 55), -INT8_C( 34), -INT8_C( 32), -INT8_C( 95), -INT8_C( 105), INT8_C( 90), -INT8_C( 100), -INT8_C( 82), INT8_C( 49), -INT8_C( 50), INT8_C( 86), -INT8_C( 115), INT8_C( 91), INT8_C( 36), INT8_C( 110), INT8_C( 102), INT8_C( 94), -INT8_C( 122), -INT8_C( 89), INT8_C( 15), INT8_C( 63), INT8_C( 123), -INT8_C( 73), -INT8_C( 71), INT8_C( 51), INT8_C( 112), INT8_C( 91), INT8_C( 75), INT8_C( 109), INT8_C( 51), -INT8_C( 69), INT8_C( 55), INT8_C( 17), -INT8_C( 100), -INT8_C( 40), -INT8_C( 87), -INT8_C( 10), INT8_C( 116), INT8_C( 87), INT8_C( 39), INT8_C( 66), -INT8_C( 82), -INT8_C( 76), -INT8_C( 98), -INT8_C( 46), INT8_C( 35), INT8_C( 4), INT8_C( 48), -INT8_C( 87), -INT8_C( 85), INT8_C( 63), -INT8_C( 24), INT8_C( 38), -INT8_C( 9), -INT8_C( 95) }, { -INT8_C( 88), INT8_C( 93), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 23), INT8_C( 0), INT8_C( 0), INT8_C( 51), INT8_C( 0), -INT8_C( 34), INT8_C( 0), -INT8_C( 95), INT8_C( 0), INT8_C( 0), -INT8_C( 100), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 38), -INT8_C( 123), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 54), INT8_C( 0), INT8_C( 0), -INT8_C( 109), INT8_C( 0), -INT8_C( 63), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 51), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 87), -INT8_C( 41), INT8_C( 0), INT8_C( 87), -INT8_C( 5), INT8_C( 0), INT8_C( 0), -INT8_C( 76), -INT8_C( 98), -INT8_C( 46), INT8_C( 0), -INT8_C( 50), INT8_C( 0), INT8_C( 0), -INT8_C( 85), INT8_C( 23), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { UINT64_C( 891766420390307674), { INT8_C( 65), -INT8_C( 4), -INT8_C( 28), -INT8_C( 22), -INT8_C( 13), INT8_C( 88), INT8_C( 66), INT8_C( 26), -INT8_C( 102), -INT8_C( 16), -INT8_C( 49), INT8_C( 56), -INT8_C( 62), -INT8_C( 14), INT8_C( 60), -INT8_C( 13), -INT8_C( 101), -INT8_C( 25), INT8_C( 50), -INT8_C( 125), INT8_C( 14), INT8_C( 41), INT8_C( 36), INT8_C( 104), -INT8_C( 111), INT8_C( 32), INT8_C( 13), INT8_C( 102), INT8_C( 80), INT8_C( 109), INT8_C( 114), -INT8_C( 110), INT8_C( 106), INT8_C( 86), INT8_C( 124), INT8_C( 93), -INT8_C( 82), -INT8_C( 66), INT8_C( 119), INT8_C( 72), -INT8_C( 82), INT8_C( 70), -INT8_C( 127), INT8_C( 113), INT8_C( 56), -INT8_C( 67), INT8_C( 100), -INT8_C( 45), -INT8_C( 91), -INT8_C( 106), INT8_C( 86), -INT8_C( 77), -INT8_C( 64), INT8_C( 122), INT8_C( 27), INT8_C( 81), -INT8_C( 101), INT8_C( 40), -INT8_C( 73), -INT8_C( 21), -INT8_C( 107), INT8_C( 41), INT8_C( 125), -INT8_C( 1) }, { INT8_MAX, -INT8_C( 6), INT8_C( 92), INT8_C( 45), -INT8_C( 72), -INT8_C( 44), INT8_C( 117), INT8_C( 103), INT8_C( 26), -INT8_C( 10), -INT8_C( 40), INT8_C( 83), -INT8_C( 76), INT8_C( 60), INT8_C( 38), INT8_C( 89), -INT8_C( 46), INT8_C( 125), INT8_C( 12), -INT8_C( 110), -INT8_C( 9), INT8_C( 39), -INT8_C( 29), -INT8_C( 110), INT8_C( 79), -INT8_C( 102), INT8_C( 126), -INT8_C( 28), -INT8_C( 61), -INT8_C( 5), -INT8_C( 28), INT8_C( 66), -INT8_C( 11), INT8_C( 64), INT8_C( 111), -INT8_C( 82), INT8_C( 20), -INT8_C( 27), INT8_C( 21), INT8_C( 47), -INT8_C( 37), -INT8_C( 19), -INT8_C( 126), -INT8_C( 113), INT8_C( 41), -INT8_C( 88), -INT8_C( 24), -INT8_C( 5), INT8_C( 37), -INT8_C( 12), -INT8_C( 114), INT8_C( 29), INT8_C( 27), INT8_C( 113), -INT8_C( 81), INT8_C( 106), INT8_C( 12), INT8_C( 45), INT8_C( 79), -INT8_C( 49), INT8_C( 41), INT8_C( 51), INT8_C( 18), INT8_C( 30) }, { INT8_C( 0), -INT8_C( 6), INT8_C( 0), -INT8_C( 22), -INT8_C( 72), INT8_C( 0), INT8_C( 66), INT8_C( 0), -INT8_C( 102), -INT8_C( 16), -INT8_C( 49), INT8_C( 0), INT8_C( 0), -INT8_C( 14), INT8_C( 38), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 12), -INT8_C( 125), -INT8_C( 9), INT8_C( 39), -INT8_C( 29), -INT8_C( 110), -INT8_C( 111), INT8_C( 0), INT8_C( 13), INT8_C( 0), INT8_C( 0), -INT8_C( 5), INT8_C( 0), -INT8_C( 110), -INT8_C( 11), INT8_C( 0), INT8_C( 111), INT8_C( 0), -INT8_C( 82), INT8_C( 0), INT8_C( 21), INT8_C( 47), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 41), -INT8_C( 88), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 113), -INT8_C( 81), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 73), -INT8_C( 49), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { UINT64_C( 4807558957739311475), { -INT8_C( 50), INT8_C( 57), -INT8_C( 47), -INT8_C( 9), -INT8_C( 31), -INT8_C( 70), -INT8_C( 13), INT8_C( 7), -INT8_C( 82), -INT8_C( 127), INT8_C( 36), -INT8_C( 54), -INT8_C( 14), -INT8_C( 45), INT8_C( 52), -INT8_C( 2), INT8_C( 1), -INT8_C( 125), -INT8_C( 50), INT8_C( 42), -INT8_C( 74), -INT8_C( 32), INT8_C( 72), INT8_C( 42), INT8_C( 97), INT8_C( 21), -INT8_C( 78), -INT8_C( 56), -INT8_C( 10), INT8_C( 105), INT8_C( 10), -INT8_C( 59), -INT8_C( 94), -INT8_C( 37), -INT8_C( 68), -INT8_C( 125), -INT8_C( 107), -INT8_C( 81), -INT8_C( 118), INT8_C( 68), INT8_C( 48), -INT8_C( 82), INT8_C( 14), INT8_C( 35), -INT8_C( 126), INT8_C( 66), INT8_C( 33), -INT8_C( 125), -INT8_C( 58), -INT8_C( 17), -INT8_C( 83), INT8_C( 124), -INT8_C( 49), -INT8_C( 11), -INT8_C( 90), INT8_C( 49), INT8_C( 10), INT8_C( 88), -INT8_C( 7), INT8_C( 1), -INT8_C( 63), INT8_C( 3), -INT8_C( 58), INT8_C( 99) }, { -INT8_C( 34), -INT8_C( 126), -INT8_C( 25), INT8_C( 116), INT8_C( 50), INT8_C( 113), -INT8_C( 72), INT8_C( 98), INT8_C( 32), -INT8_C( 58), -INT8_C( 123), -INT8_C( 94), INT8_C( 8), -INT8_C( 89), INT8_C( 37), -INT8_C( 50), -INT8_C( 106), -INT8_C( 46), INT8_C( 75), INT8_C( 102), -INT8_C( 57), -INT8_C( 15), -INT8_C( 105), -INT8_C( 46), INT8_C( 74), -INT8_C( 112), -INT8_C( 45), INT8_C( 11), -INT8_C( 109), -INT8_C( 103), INT8_C( 111), INT8_C( 113), INT8_C( 27), INT8_C( 86), -INT8_C( 27), INT8_C( 77), -INT8_C( 57), -INT8_C( 99), -INT8_C( 80), -INT8_C( 25), INT8_C( 99), INT8_C( 53), -INT8_C( 119), INT8_C( 108), -INT8_C( 36), -INT8_C( 82), INT8_C( 58), INT8_C( 115), INT8_MIN, -INT8_C( 123), -INT8_C( 39), INT8_C( 72), INT8_C( 119), INT8_C( 112), INT8_C( 26), -INT8_C( 63), INT8_C( 0), -INT8_C( 19), -INT8_C( 52), -INT8_C( 109), -INT8_C( 122), INT8_C( 59), INT8_C( 4), -INT8_C( 95) }, { -INT8_C( 50), -INT8_C( 126), INT8_C( 0), INT8_C( 0), -INT8_C( 31), -INT8_C( 70), -INT8_C( 72), INT8_C( 0), -INT8_C( 82), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 50), INT8_C( 0), INT8_C( 0), -INT8_C( 50), INT8_C( 42), INT8_C( 0), INT8_C( 0), -INT8_C( 105), -INT8_C( 46), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 56), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 59), INT8_C( 0), -INT8_C( 37), -INT8_C( 68), INT8_C( 0), INT8_C( 0), -INT8_C( 99), -INT8_C( 118), INT8_C( 0), INT8_C( 48), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 82), INT8_C( 33), -INT8_C( 125), INT8_MIN, -INT8_C( 123), -INT8_C( 83), INT8_C( 0), -INT8_C( 49), -INT8_C( 11), INT8_C( 0), -INT8_C( 63), INT8_C( 0), -INT8_C( 19), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 58), INT8_C( 0) } }, { UINT64_C(16951724401100843665), { -INT8_C( 44), -INT8_C( 54), INT8_C( 87), -INT8_C( 79), INT8_C( 120), -INT8_C( 111), INT8_C( 36), -INT8_C( 7), INT8_C( 23), -INT8_C( 3), INT8_C( 65), -INT8_C( 114), INT8_C( 109), INT8_C( 91), INT8_C( 79), INT8_C( 109), INT8_C( 72), INT8_C( 27), INT8_C( 0), -INT8_C( 50), INT8_C( 87), INT8_C( 4), INT8_C( 111), -INT8_C( 24), -INT8_C( 18), INT8_C( 94), INT8_C( 65), INT8_C( 118), -INT8_C( 3), -INT8_C( 126), INT8_C( 97), -INT8_C( 46), INT8_C( 76), -INT8_C( 72), -INT8_C( 125), -INT8_C( 60), INT8_C( 73), -INT8_C( 89), -INT8_C( 67), INT8_C( 96), -INT8_C( 92), -INT8_C( 2), -INT8_C( 18), INT8_C( 17), INT8_C( 89), INT8_C( 61), INT8_C( 126), -INT8_C( 95), INT8_C( 89), INT8_C( 126), INT8_C( 111), -INT8_C( 80), -INT8_C( 126), -INT8_C( 33), -INT8_C( 104), INT8_C( 113), INT8_C( 61), -INT8_C( 38), -INT8_C( 25), INT8_C( 59), INT8_C( 92), INT8_C( 72), INT8_C( 13), -INT8_C( 88) }, { INT8_C( 0), -INT8_C( 112), INT8_C( 108), INT8_C( 73), INT8_C( 55), INT8_C( 42), -INT8_C( 86), -INT8_C( 37), INT8_C( 40), -INT8_C( 104), -INT8_C( 20), -INT8_C( 126), -INT8_C( 42), INT8_C( 106), INT8_C( 35), INT8_C( 47), -INT8_C( 24), -INT8_C( 109), -INT8_C( 33), INT8_C( 106), INT8_C( 114), INT8_C( 119), -INT8_C( 37), -INT8_C( 81), INT8_C( 81), -INT8_C( 62), -INT8_C( 22), -INT8_C( 83), INT8_C( 10), -INT8_C( 9), INT8_C( 85), INT8_C( 10), -INT8_C( 121), -INT8_C( 62), INT8_C( 84), -INT8_C( 66), -INT8_C( 20), -INT8_C( 2), -INT8_C( 103), INT8_C( 20), -INT8_C( 106), -INT8_C( 123), -INT8_C( 106), INT8_C( 108), -INT8_C( 17), -INT8_C( 70), -INT8_C( 101), -INT8_C( 41), INT8_C( 77), INT8_C( 122), INT8_C( 66), -INT8_C( 65), -INT8_C( 14), INT8_C( 29), INT8_C( 110), INT8_C( 67), -INT8_C( 32), INT8_C( 89), -INT8_C( 15), -INT8_C( 22), INT8_C( 80), INT8_C( 70), -INT8_C( 11), -INT8_C( 40) }, { -INT8_C( 44), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 55), INT8_C( 0), INT8_C( 0), -INT8_C( 37), INT8_C( 0), -INT8_C( 104), INT8_C( 0), -INT8_C( 126), INT8_C( 0), INT8_C( 91), INT8_C( 35), INT8_C( 47), -INT8_C( 24), -INT8_C( 109), -INT8_C( 33), -INT8_C( 50), INT8_C( 0), INT8_C( 4), -INT8_C( 37), -INT8_C( 81), -INT8_C( 18), INT8_C( 0), INT8_C( 0), -INT8_C( 83), -INT8_C( 3), INT8_C( 0), INT8_C( 85), INT8_C( 0), -INT8_C( 121), -INT8_C( 72), -INT8_C( 125), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 20), -INT8_C( 106), -INT8_C( 123), -INT8_C( 106), INT8_C( 17), -INT8_C( 17), INT8_C( 0), INT8_C( 0), -INT8_C( 95), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 104), INT8_C( 0), -INT8_C( 32), -INT8_C( 38), INT8_C( 0), -INT8_C( 22), INT8_C( 0), INT8_C( 70), -INT8_C( 11), -INT8_C( 88) } }, { UINT64_C(15927314642776770824), { -INT8_C( 75), -INT8_C( 97), INT8_C( 74), -INT8_C( 91), INT8_C( 89), -INT8_C( 27), INT8_C( 124), -INT8_C( 90), INT8_C( 96), -INT8_C( 66), INT8_C( 101), INT8_C( 82), -INT8_C( 36), -INT8_C( 44), -INT8_C( 107), -INT8_C( 68), INT8_C( 45), -INT8_C( 122), -INT8_C( 90), INT8_C( 125), -INT8_C( 51), -INT8_C( 101), INT8_C( 85), -INT8_C( 43), -INT8_C( 28), -INT8_C( 20), -INT8_C( 54), INT8_C( 43), INT8_C( 28), -INT8_C( 45), INT8_C( 9), -INT8_C( 47), INT8_C( 114), INT8_C( 83), INT8_C( 118), -INT8_C( 52), INT8_C( 56), -INT8_C( 13), INT8_C( 114), -INT8_C( 104), -INT8_C( 79), -INT8_C( 40), -INT8_C( 22), -INT8_C( 115), -INT8_C( 84), INT8_MIN, INT8_C( 73), -INT8_C( 39), INT8_C( 6), -INT8_C( 16), INT8_C( 86), -INT8_C( 45), -INT8_C( 117), -INT8_C( 84), -INT8_C( 87), INT8_C( 112), -INT8_C( 104), INT8_C( 115), -INT8_C( 101), -INT8_C( 76), INT8_C( 70), -INT8_C( 92), -INT8_C( 123), -INT8_C( 72) }, { -INT8_C( 9), -INT8_C( 4), -INT8_C( 124), INT8_C( 48), -INT8_C( 17), -INT8_C( 9), -INT8_C( 56), -INT8_C( 96), -INT8_C( 49), -INT8_C( 77), INT8_C( 46), INT8_C( 123), INT8_C( 51), INT8_C( 119), INT8_C( 84), INT8_C( 57), INT8_C( 103), -INT8_C( 86), INT8_C( 13), -INT8_C( 13), INT8_C( 86), -INT8_C( 74), INT8_C( 99), -INT8_C( 18), INT8_C( 41), -INT8_C( 2), -INT8_C( 94), INT8_C( 111), -INT8_C( 93), INT8_C( 40), INT8_C( 39), -INT8_C( 102), INT8_C( 36), -INT8_C( 84), -INT8_C( 54), INT8_C( 19), -INT8_C( 93), -INT8_C( 109), -INT8_C( 77), INT8_C( 114), INT8_C( 70), -INT8_C( 31), -INT8_C( 19), INT8_C( 121), INT8_C( 89), INT8_C( 65), -INT8_C( 78), -INT8_C( 64), -INT8_C( 21), -INT8_C( 65), -INT8_C( 77), INT8_C( 66), INT8_C( 117), INT8_C( 22), INT8_C( 48), -INT8_C( 98), INT8_C( 21), -INT8_C( 45), INT8_C( 13), -INT8_C( 72), -INT8_C( 5), INT8_C( 53), INT8_C( 82), INT8_C( 31) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 91), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 49), INT8_C( 0), INT8_C( 0), INT8_C( 82), INT8_C( 0), INT8_C( 0), -INT8_C( 107), INT8_C( 0), INT8_C( 0), -INT8_C( 122), -INT8_C( 90), INT8_C( 0), -INT8_C( 51), INT8_C( 0), INT8_C( 0), -INT8_C( 43), INT8_C( 0), INT8_C( 0), -INT8_C( 94), INT8_C( 0), -INT8_C( 93), -INT8_C( 45), INT8_C( 9), -INT8_C( 102), INT8_C( 36), -INT8_C( 84), -INT8_C( 54), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 77), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 84), INT8_MIN, INT8_C( 0), INT8_C( 0), -INT8_C( 21), INT8_C( 0), INT8_C( 0), -INT8_C( 45), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 104), INT8_C( 0), -INT8_C( 101), -INT8_C( 76), -INT8_C( 5), INT8_C( 0), -INT8_C( 123), -INT8_C( 72) } }, { UINT64_C(17795663523895975393), { -INT8_C( 57), -INT8_C( 29), INT8_C( 111), INT8_C( 32), INT8_C( 36), INT8_C( 33), -INT8_C( 32), INT8_C( 15), -INT8_C( 31), -INT8_C( 108), INT8_C( 81), INT8_C( 86), -INT8_C( 86), -INT8_C( 126), -INT8_C( 11), -INT8_C( 65), INT8_C( 85), INT8_C( 2), INT8_C( 119), INT8_C( 80), INT8_C( 55), -INT8_C( 54), INT8_C( 111), INT8_C( 24), -INT8_C( 25), -INT8_C( 95), -INT8_C( 100), -INT8_C( 105), -INT8_C( 122), -INT8_C( 110), -INT8_C( 115), INT8_C( 77), INT8_C( 117), -INT8_C( 4), INT8_C( 109), -INT8_C( 103), INT8_C( 29), INT8_C( 78), -INT8_C( 87), -INT8_C( 2), -INT8_C( 30), -INT8_C( 6), INT8_C( 85), -INT8_C( 116), INT8_C( 124), INT8_C( 74), INT8_C( 76), -INT8_C( 47), INT8_C( 76), -INT8_C( 61), INT8_C( 33), -INT8_C( 124), -INT8_C( 115), -INT8_C( 112), -INT8_C( 100), INT8_C( 116), INT8_C( 49), INT8_C( 57), INT8_C( 11), -INT8_C( 72), -INT8_C( 53), -INT8_C( 104), INT8_C( 5), INT8_C( 65) }, { -INT8_C( 108), INT8_C( 115), -INT8_C( 38), -INT8_C( 78), -INT8_C( 63), -INT8_C( 125), -INT8_C( 80), -INT8_C( 93), INT8_C( 126), INT8_C( 5), INT8_C( 47), -INT8_C( 6), INT8_C( 79), INT8_C( 123), -INT8_C( 52), -INT8_C( 100), INT8_C( 63), -INT8_C( 19), INT8_C( 32), -INT8_C( 52), INT8_C( 126), -INT8_C( 68), INT8_C( 65), -INT8_C( 81), -INT8_C( 11), INT8_C( 76), INT8_C( 103), -INT8_C( 63), -INT8_C( 27), INT8_C( 109), INT8_C( 2), INT8_C( 121), -INT8_C( 32), -INT8_C( 36), INT8_C( 43), -INT8_C( 95), INT8_C( 96), -INT8_C( 36), INT8_C( 68), -INT8_C( 34), -INT8_C( 31), INT8_C( 115), -INT8_C( 40), INT8_C( 49), -INT8_C( 17), -INT8_C( 92), -INT8_C( 51), INT8_C( 46), -INT8_C( 110), -INT8_C( 19), -INT8_C( 6), INT8_C( 16), -INT8_C( 87), INT8_C( 59), -INT8_C( 65), -INT8_C( 97), -INT8_C( 120), INT8_C( 39), INT8_C( 96), INT8_C( 109), -INT8_C( 108), INT8_C( 98), -INT8_C( 26), INT8_C( 116) }, { -INT8_C( 108), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 125), -INT8_C( 80), -INT8_C( 93), -INT8_C( 31), INT8_C( 0), INT8_C( 47), -INT8_C( 6), -INT8_C( 86), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 19), INT8_C( 0), INT8_C( 0), INT8_C( 55), -INT8_C( 68), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 100), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 77), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 29), -INT8_C( 36), INT8_C( 0), -INT8_C( 34), -INT8_C( 31), INT8_C( 0), -INT8_C( 40), INT8_C( 0), INT8_C( 0), -INT8_C( 92), -INT8_C( 51), -INT8_C( 47), INT8_C( 0), -INT8_C( 61), -INT8_C( 6), INT8_C( 0), -INT8_C( 115), -INT8_C( 112), -INT8_C( 100), -INT8_C( 97), INT8_C( 0), INT8_C( 39), INT8_C( 11), INT8_C( 0), -INT8_C( 108), -INT8_C( 104), -INT8_C( 26), INT8_C( 65) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_maskz_min_epi8(test_vec[i].k, a, b); simde_test_x86_assert_equal_i8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm512_min_epu8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const uint8_t a[64]; const uint8_t b[64]; const uint8_t r[64]; } test_vec[] = { { { UINT8_C( 32), UINT8_C(217), UINT8_C( 27), UINT8_C(164), UINT8_C(193), UINT8_C(103), UINT8_C(171), UINT8_C(196), UINT8_C(224), UINT8_C(209), UINT8_C( 8), UINT8_C( 68), UINT8_C(152), UINT8_C(243), UINT8_C( 32), UINT8_C( 54), UINT8_C(148), UINT8_C(251), UINT8_C(191), UINT8_C(149), UINT8_C(223), UINT8_C( 23), UINT8_C(177), UINT8_C( 37), UINT8_C(144), UINT8_C(177), UINT8_C( 93), UINT8_C(117), UINT8_C(236), UINT8_C( 93), UINT8_C(156), UINT8_C( 12), UINT8_C( 54), UINT8_C(183), UINT8_C(176), UINT8_C(247), UINT8_C( 31), UINT8_C( 91), UINT8_C(187), UINT8_MAX, UINT8_C( 44), UINT8_C(196), UINT8_C( 67), UINT8_C(197), UINT8_C(183), UINT8_C( 99), UINT8_C(251), UINT8_C( 75), UINT8_C( 94), UINT8_C(186), UINT8_C(224), UINT8_C( 61), UINT8_C(210), UINT8_C(145), UINT8_C( 99), UINT8_C( 98), UINT8_C( 66), UINT8_C(192), UINT8_C(215), UINT8_C( 47), UINT8_C( 29), UINT8_C(115), UINT8_C( 59), UINT8_C( 83) }, { UINT8_C( 43), UINT8_C(236), UINT8_C( 75), UINT8_C( 74), UINT8_C( 71), UINT8_C( 6), UINT8_C( 73), UINT8_C(116), UINT8_C(202), UINT8_C(140), UINT8_C( 57), UINT8_C(129), UINT8_C(240), UINT8_C( 52), UINT8_C(204), UINT8_C( 78), UINT8_C(239), UINT8_C(172), UINT8_C(140), UINT8_C(193), UINT8_C( 62), UINT8_C(239), UINT8_C( 35), UINT8_C(128), UINT8_C(175), UINT8_C(250), UINT8_C(175), UINT8_C(204), UINT8_C(110), UINT8_C(235), UINT8_C( 31), UINT8_C(153), UINT8_C(215), UINT8_C(106), UINT8_C(227), UINT8_C( 30), UINT8_C(113), UINT8_C( 44), UINT8_C(146), UINT8_C( 59), UINT8_C(184), UINT8_C(203), UINT8_C(189), UINT8_C(168), UINT8_C( 0), UINT8_C(137), UINT8_C(247), UINT8_C(239), UINT8_C( 54), UINT8_C(131), UINT8_C(176), UINT8_C(116), UINT8_C(114), UINT8_C(211), UINT8_C(244), UINT8_C( 33), UINT8_C(205), UINT8_C(164), UINT8_C(237), UINT8_C( 59), UINT8_C(143), UINT8_C( 12), UINT8_C(212), UINT8_C(102) }, { UINT8_C( 32), UINT8_C(217), UINT8_C( 27), UINT8_C( 74), UINT8_C( 71), UINT8_C( 6), UINT8_C( 73), UINT8_C(116), UINT8_C(202), UINT8_C(140), UINT8_C( 8), UINT8_C( 68), UINT8_C(152), UINT8_C( 52), UINT8_C( 32), UINT8_C( 54), UINT8_C(148), UINT8_C(172), UINT8_C(140), UINT8_C(149), UINT8_C( 62), UINT8_C( 23), UINT8_C( 35), UINT8_C( 37), UINT8_C(144), UINT8_C(177), UINT8_C( 93), UINT8_C(117), UINT8_C(110), UINT8_C( 93), UINT8_C( 31), UINT8_C( 12), UINT8_C( 54), UINT8_C(106), UINT8_C(176), UINT8_C( 30), UINT8_C( 31), UINT8_C( 44), UINT8_C(146), UINT8_C( 59), UINT8_C( 44), UINT8_C(196), UINT8_C( 67), UINT8_C(168), UINT8_C( 0), UINT8_C( 99), UINT8_C(247), UINT8_C( 75), UINT8_C( 54), UINT8_C(131), UINT8_C(176), UINT8_C( 61), UINT8_C(114), UINT8_C(145), UINT8_C( 99), UINT8_C( 33), UINT8_C( 66), UINT8_C(164), UINT8_C(215), UINT8_C( 47), UINT8_C( 29), UINT8_C( 12), UINT8_C( 59), UINT8_C( 83) } }, { { UINT8_C(119), UINT8_C(183), UINT8_C(132), UINT8_C(232), UINT8_C(227), UINT8_C( 23), UINT8_C( 35), UINT8_C(156), UINT8_C(226), UINT8_C(224), UINT8_C( 68), UINT8_C(226), UINT8_C(106), UINT8_C( 59), UINT8_C(209), UINT8_C(160), UINT8_C(190), UINT8_C(129), UINT8_C( 20), UINT8_C( 48), UINT8_C( 84), UINT8_C( 8), UINT8_C( 81), UINT8_C( 34), UINT8_C(172), UINT8_C( 62), UINT8_C( 93), UINT8_C( 59), UINT8_C( 75), UINT8_C( 50), UINT8_C(161), UINT8_C(194), UINT8_C(233), UINT8_C( 38), UINT8_C(170), UINT8_C(205), UINT8_C( 61), UINT8_C(205), UINT8_C(105), UINT8_C( 31), UINT8_C(174), UINT8_C(173), UINT8_C( 2), UINT8_C( 24), UINT8_C(233), UINT8_C(211), UINT8_C(184), UINT8_C(167), UINT8_C( 85), UINT8_C(204), UINT8_C(216), UINT8_C(169), UINT8_C(212), UINT8_C( 41), UINT8_C(203), UINT8_C(129), UINT8_C(104), UINT8_C( 41), UINT8_C(188), UINT8_C(179), UINT8_C( 91), UINT8_C( 94), UINT8_C(117), UINT8_C( 68) }, { UINT8_C(132), UINT8_C( 31), UINT8_C( 17), UINT8_C(193), UINT8_C(236), UINT8_C(122), UINT8_C(224), UINT8_C(154), UINT8_C( 40), UINT8_C(226), UINT8_C(178), UINT8_C( 17), UINT8_C(182), UINT8_C(106), UINT8_C(184), UINT8_C( 11), UINT8_C( 54), UINT8_C(144), UINT8_C(180), UINT8_C( 11), UINT8_C(186), UINT8_C(128), UINT8_C(140), UINT8_C( 34), UINT8_C(169), UINT8_C( 72), UINT8_C(213), UINT8_C( 4), UINT8_C(166), UINT8_C( 74), UINT8_C( 72), UINT8_C( 42), UINT8_C(105), UINT8_C( 90), UINT8_C(235), UINT8_C( 85), UINT8_C(212), UINT8_C(204), UINT8_C(240), UINT8_C(252), UINT8_C(174), UINT8_C(162), UINT8_C( 13), UINT8_C(100), UINT8_C( 13), UINT8_C(198), UINT8_C(111), UINT8_C( 67), UINT8_C( 86), UINT8_C( 36), UINT8_C( 78), UINT8_C( 16), UINT8_C(164), UINT8_C(218), UINT8_C( 50), UINT8_C( 77), UINT8_C( 35), UINT8_C( 7), UINT8_C( 81), UINT8_C(201), UINT8_C( 81), UINT8_C(153), UINT8_C(244), UINT8_C(186) }, { UINT8_C(119), UINT8_C( 31), UINT8_C( 17), UINT8_C(193), UINT8_C(227), UINT8_C( 23), UINT8_C( 35), UINT8_C(154), UINT8_C( 40), UINT8_C(224), UINT8_C( 68), UINT8_C( 17), UINT8_C(106), UINT8_C( 59), UINT8_C(184), UINT8_C( 11), UINT8_C( 54), UINT8_C(129), UINT8_C( 20), UINT8_C( 11), UINT8_C( 84), UINT8_C( 8), UINT8_C( 81), UINT8_C( 34), UINT8_C(169), UINT8_C( 62), UINT8_C( 93), UINT8_C( 4), UINT8_C( 75), UINT8_C( 50), UINT8_C( 72), UINT8_C( 42), UINT8_C(105), UINT8_C( 38), UINT8_C(170), UINT8_C( 85), UINT8_C( 61), UINT8_C(204), UINT8_C(105), UINT8_C( 31), UINT8_C(174), UINT8_C(162), UINT8_C( 2), UINT8_C( 24), UINT8_C( 13), UINT8_C(198), UINT8_C(111), UINT8_C( 67), UINT8_C( 85), UINT8_C( 36), UINT8_C( 78), UINT8_C( 16), UINT8_C(164), UINT8_C( 41), UINT8_C( 50), UINT8_C( 77), UINT8_C( 35), UINT8_C( 7), UINT8_C( 81), UINT8_C(179), UINT8_C( 81), UINT8_C( 94), UINT8_C(117), UINT8_C( 68) } }, { { UINT8_C(243), UINT8_C(223), UINT8_C( 16), UINT8_C(200), UINT8_C(171), UINT8_C( 0), UINT8_C(196), UINT8_C( 90), UINT8_C(162), UINT8_C(210), UINT8_C(190), UINT8_C(175), UINT8_C(152), UINT8_C( 46), UINT8_C(243), UINT8_C(238), UINT8_C( 82), UINT8_C( 65), UINT8_MAX, UINT8_C(246), UINT8_C( 28), UINT8_C( 49), UINT8_C( 67), UINT8_C( 63), UINT8_C( 57), UINT8_C(148), UINT8_C( 8), UINT8_C(138), UINT8_C( 45), UINT8_C(252), UINT8_C( 69), UINT8_C( 33), UINT8_C(220), UINT8_C( 85), UINT8_C(233), UINT8_C(135), UINT8_C( 85), UINT8_C(173), UINT8_C(225), UINT8_C(247), UINT8_C(127), UINT8_C(160), UINT8_C(167), UINT8_C( 23), UINT8_C(206), UINT8_C(154), UINT8_C( 6), UINT8_C( 32), UINT8_C(219), UINT8_C( 5), UINT8_C( 22), UINT8_C(247), UINT8_C( 54), UINT8_C( 89), UINT8_C( 54), UINT8_C(111), UINT8_C(237), UINT8_C( 63), UINT8_C(250), UINT8_C( 26), UINT8_C( 59), UINT8_C( 63), UINT8_C( 59), UINT8_C( 23) }, { UINT8_C(148), UINT8_C( 36), UINT8_C(159), UINT8_C(233), UINT8_C(210), UINT8_C(128), UINT8_C(224), UINT8_C( 81), UINT8_C( 32), UINT8_C(135), UINT8_C(105), UINT8_C(238), UINT8_C( 33), UINT8_C(111), UINT8_C( 14), UINT8_C(253), UINT8_C(116), UINT8_C( 36), UINT8_C(244), UINT8_C(170), UINT8_C(125), UINT8_C( 43), UINT8_C( 26), UINT8_C(106), UINT8_C(106), UINT8_C( 20), UINT8_C(133), UINT8_C(165), UINT8_C( 83), UINT8_C(192), UINT8_C(189), UINT8_C(231), UINT8_C(229), UINT8_C( 92), UINT8_C(208), UINT8_C(183), UINT8_C(220), UINT8_C(176), UINT8_C( 8), UINT8_C(253), UINT8_C( 56), UINT8_C(113), UINT8_C(235), UINT8_C( 89), UINT8_C(224), UINT8_C(250), UINT8_C( 86), UINT8_C( 84), UINT8_C( 30), UINT8_C( 75), UINT8_MAX, UINT8_C(156), UINT8_C(118), UINT8_C( 25), UINT8_C( 6), UINT8_C(224), UINT8_C( 45), UINT8_C(139), UINT8_C(133), UINT8_C(128), UINT8_C( 76), UINT8_C( 66), UINT8_C(103), UINT8_C( 49) }, { UINT8_C(148), UINT8_C( 36), UINT8_C( 16), UINT8_C(200), UINT8_C(171), UINT8_C( 0), UINT8_C(196), UINT8_C( 81), UINT8_C( 32), UINT8_C(135), UINT8_C(105), UINT8_C(175), UINT8_C( 33), UINT8_C( 46), UINT8_C( 14), UINT8_C(238), UINT8_C( 82), UINT8_C( 36), UINT8_C(244), UINT8_C(170), UINT8_C( 28), UINT8_C( 43), UINT8_C( 26), UINT8_C( 63), UINT8_C( 57), UINT8_C( 20), UINT8_C( 8), UINT8_C(138), UINT8_C( 45), UINT8_C(192), UINT8_C( 69), UINT8_C( 33), UINT8_C(220), UINT8_C( 85), UINT8_C(208), UINT8_C(135), UINT8_C( 85), UINT8_C(173), UINT8_C( 8), UINT8_C(247), UINT8_C( 56), UINT8_C(113), UINT8_C(167), UINT8_C( 23), UINT8_C(206), UINT8_C(154), UINT8_C( 6), UINT8_C( 32), UINT8_C( 30), UINT8_C( 5), UINT8_C( 22), UINT8_C(156), UINT8_C( 54), UINT8_C( 25), UINT8_C( 6), UINT8_C(111), UINT8_C( 45), UINT8_C( 63), UINT8_C(133), UINT8_C( 26), UINT8_C( 59), UINT8_C( 63), UINT8_C( 59), UINT8_C( 23) } }, { { UINT8_C(158), UINT8_C( 55), UINT8_C(232), UINT8_C(123), UINT8_C(231), UINT8_C(240), UINT8_C(120), UINT8_C( 31), UINT8_C( 98), UINT8_C( 99), UINT8_C(121), UINT8_C( 66), UINT8_C( 93), UINT8_C(207), UINT8_C(151), UINT8_C(124), UINT8_C( 26), UINT8_C(150), UINT8_C( 24), UINT8_C(144), UINT8_C(175), UINT8_C( 30), UINT8_C(112), UINT8_C(220), UINT8_C(170), UINT8_C(246), UINT8_C( 92), UINT8_C(246), UINT8_C( 56), UINT8_C(195), UINT8_C( 39), UINT8_C(215), UINT8_C(250), UINT8_C( 15), UINT8_C( 82), UINT8_C(225), UINT8_MAX, UINT8_C(202), UINT8_C( 1), UINT8_C( 97), UINT8_C( 45), UINT8_C(122), UINT8_C(164), UINT8_C(139), UINT8_C( 73), UINT8_C( 59), UINT8_C( 7), UINT8_C(100), UINT8_C(209), UINT8_C( 31), UINT8_C(244), UINT8_C(128), UINT8_C( 61), UINT8_C(101), UINT8_C( 92), UINT8_C(231), UINT8_C( 91), UINT8_C(184), UINT8_C(221), UINT8_C(147), UINT8_C(123), UINT8_C( 4), UINT8_C(106), UINT8_C(117) }, { UINT8_C( 19), UINT8_C(188), UINT8_C( 86), UINT8_C( 19), UINT8_C(134), UINT8_C( 87), UINT8_C(116), UINT8_C(180), UINT8_C(209), UINT8_C( 24), UINT8_C( 63), UINT8_C( 27), UINT8_C( 83), UINT8_C( 70), UINT8_C(127), UINT8_C( 36), UINT8_C(101), UINT8_C(115), UINT8_C(164), UINT8_C(162), UINT8_C(216), UINT8_C( 0), UINT8_C(138), UINT8_C( 51), UINT8_C(184), UINT8_C(103), UINT8_C(199), UINT8_C( 51), UINT8_C(108), UINT8_C( 49), UINT8_C(168), UINT8_C(127), UINT8_C(238), UINT8_MAX, UINT8_C(146), UINT8_C(116), UINT8_C( 86), UINT8_C( 7), UINT8_C( 40), UINT8_C( 40), UINT8_C( 31), UINT8_C(103), UINT8_C( 67), UINT8_C(115), UINT8_C(173), UINT8_C(194), UINT8_C(151), UINT8_C( 18), UINT8_C( 53), UINT8_C( 60), UINT8_C(181), UINT8_C( 14), UINT8_C( 60), UINT8_C( 63), UINT8_C( 65), UINT8_C(245), UINT8_C(166), UINT8_C( 8), UINT8_C( 40), UINT8_C( 18), UINT8_C( 58), UINT8_C(209), UINT8_C(146), UINT8_C( 40) }, { UINT8_C( 19), UINT8_C( 55), UINT8_C( 86), UINT8_C( 19), UINT8_C(134), UINT8_C( 87), UINT8_C(116), UINT8_C( 31), UINT8_C( 98), UINT8_C( 24), UINT8_C( 63), UINT8_C( 27), UINT8_C( 83), UINT8_C( 70), UINT8_C(127), UINT8_C( 36), UINT8_C( 26), UINT8_C(115), UINT8_C( 24), UINT8_C(144), UINT8_C(175), UINT8_C( 0), UINT8_C(112), UINT8_C( 51), UINT8_C(170), UINT8_C(103), UINT8_C( 92), UINT8_C( 51), UINT8_C( 56), UINT8_C( 49), UINT8_C( 39), UINT8_C(127), UINT8_C(238), UINT8_C( 15), UINT8_C( 82), UINT8_C(116), UINT8_C( 86), UINT8_C( 7), UINT8_C( 1), UINT8_C( 40), UINT8_C( 31), UINT8_C(103), UINT8_C( 67), UINT8_C(115), UINT8_C( 73), UINT8_C( 59), UINT8_C( 7), UINT8_C( 18), UINT8_C( 53), UINT8_C( 31), UINT8_C(181), UINT8_C( 14), UINT8_C( 60), UINT8_C( 63), UINT8_C( 65), UINT8_C(231), UINT8_C( 91), UINT8_C( 8), UINT8_C( 40), UINT8_C( 18), UINT8_C( 58), UINT8_C( 4), UINT8_C(106), UINT8_C( 40) } }, { { UINT8_C(208), UINT8_C( 36), UINT8_C(156), UINT8_C( 38), UINT8_C( 43), UINT8_C(197), UINT8_C( 78), UINT8_C( 75), UINT8_C( 44), UINT8_C(145), UINT8_C(190), UINT8_C(218), UINT8_C( 83), UINT8_C( 85), UINT8_C(236), UINT8_C(137), UINT8_C(145), UINT8_C(161), UINT8_C(151), UINT8_C(206), UINT8_C(224), UINT8_C(216), UINT8_C(195), UINT8_C(135), UINT8_C(225), UINT8_C(235), UINT8_C(153), UINT8_C( 27), UINT8_C(188), UINT8_C( 43), UINT8_C( 67), UINT8_C(140), UINT8_C( 80), UINT8_C(223), UINT8_C(179), UINT8_C(123), UINT8_C(164), UINT8_C( 1), UINT8_C(198), UINT8_C(209), UINT8_C(147), UINT8_C(132), UINT8_C(171), UINT8_C(230), UINT8_C(218), UINT8_C(151), UINT8_C(111), UINT8_C(107), UINT8_C( 57), UINT8_C( 6), UINT8_C( 57), UINT8_C( 25), UINT8_C(223), UINT8_C(252), UINT8_C(160), UINT8_C(192), UINT8_C(232), UINT8_C( 58), UINT8_C(219), UINT8_C(164), UINT8_C(101), UINT8_C( 30), UINT8_C( 49), UINT8_C(181) }, { UINT8_C(253), UINT8_C(228), UINT8_C( 49), UINT8_C(162), UINT8_C(229), UINT8_C(247), UINT8_C(115), UINT8_C(120), UINT8_C(124), UINT8_C( 30), UINT8_C( 95), UINT8_C( 86), UINT8_C(181), UINT8_C(206), UINT8_C(193), UINT8_C(238), UINT8_C(213), UINT8_C(251), UINT8_C( 8), UINT8_C(180), UINT8_C(247), UINT8_C(168), UINT8_C(116), UINT8_C(223), UINT8_C(226), UINT8_C( 79), UINT8_C(132), UINT8_C( 72), UINT8_C(109), UINT8_C(181), UINT8_C(253), UINT8_C(106), UINT8_C(153), UINT8_C( 46), UINT8_C( 12), UINT8_C(126), UINT8_C( 38), UINT8_C(127), UINT8_C(247), UINT8_C(162), UINT8_C(157), UINT8_C( 86), UINT8_C(248), UINT8_C( 83), UINT8_C( 36), UINT8_C(185), UINT8_C( 65), UINT8_C(249), UINT8_C(180), UINT8_C( 73), UINT8_C(173), UINT8_C(172), UINT8_C(242), UINT8_C( 33), UINT8_C(139), UINT8_C(212), UINT8_C(112), UINT8_C( 15), UINT8_C( 28), UINT8_C(221), UINT8_C(196), UINT8_C( 26), UINT8_C( 72), UINT8_C( 93) }, { UINT8_C(208), UINT8_C( 36), UINT8_C( 49), UINT8_C( 38), UINT8_C( 43), UINT8_C(197), UINT8_C( 78), UINT8_C( 75), UINT8_C( 44), UINT8_C( 30), UINT8_C( 95), UINT8_C( 86), UINT8_C( 83), UINT8_C( 85), UINT8_C(193), UINT8_C(137), UINT8_C(145), UINT8_C(161), UINT8_C( 8), UINT8_C(180), UINT8_C(224), UINT8_C(168), UINT8_C(116), UINT8_C(135), UINT8_C(225), UINT8_C( 79), UINT8_C(132), UINT8_C( 27), UINT8_C(109), UINT8_C( 43), UINT8_C( 67), UINT8_C(106), UINT8_C( 80), UINT8_C( 46), UINT8_C( 12), UINT8_C(123), UINT8_C( 38), UINT8_C( 1), UINT8_C(198), UINT8_C(162), UINT8_C(147), UINT8_C( 86), UINT8_C(171), UINT8_C( 83), UINT8_C( 36), UINT8_C(151), UINT8_C( 65), UINT8_C(107), UINT8_C( 57), UINT8_C( 6), UINT8_C( 57), UINT8_C( 25), UINT8_C(223), UINT8_C( 33), UINT8_C(139), UINT8_C(192), UINT8_C(112), UINT8_C( 15), UINT8_C( 28), UINT8_C(164), UINT8_C(101), UINT8_C( 26), UINT8_C( 49), UINT8_C( 93) } }, { { UINT8_C( 72), UINT8_C( 84), UINT8_C(220), UINT8_C(110), UINT8_C(212), UINT8_C(211), UINT8_C( 16), UINT8_C(113), UINT8_C( 41), UINT8_C( 8), UINT8_C(196), UINT8_C( 77), UINT8_C(194), UINT8_C( 6), UINT8_C( 71), UINT8_C(118), UINT8_C( 79), UINT8_C(244), UINT8_C( 34), UINT8_C( 65), UINT8_C( 22), UINT8_C(174), UINT8_C( 22), UINT8_C(134), UINT8_C(189), UINT8_C( 50), UINT8_C(100), UINT8_C(130), UINT8_C( 76), UINT8_C(172), UINT8_C(223), UINT8_C(149), UINT8_C( 0), UINT8_C(187), UINT8_C( 3), UINT8_C(212), UINT8_C(142), UINT8_C( 20), UINT8_C( 70), UINT8_C(183), UINT8_C( 28), UINT8_C( 10), UINT8_C( 5), UINT8_C(222), UINT8_C( 16), UINT8_C( 76), UINT8_C( 85), UINT8_C( 96), UINT8_C( 64), UINT8_C(119), UINT8_C(161), UINT8_C( 86), UINT8_C( 37), UINT8_C(183), UINT8_C(221), UINT8_C(227), UINT8_C(234), UINT8_C( 65), UINT8_C(101), UINT8_C( 54), UINT8_C(237), UINT8_C( 68), UINT8_C(203), UINT8_C(237) }, { UINT8_C( 0), UINT8_C(207), UINT8_C(194), UINT8_C(142), UINT8_C(227), UINT8_C( 8), UINT8_C( 70), UINT8_MAX, UINT8_C( 18), UINT8_C( 75), UINT8_C(222), UINT8_C( 35), UINT8_C(151), UINT8_C( 51), UINT8_C(131), UINT8_C(215), UINT8_C(170), UINT8_C( 36), UINT8_C( 46), UINT8_C(208), UINT8_C(220), UINT8_C( 11), UINT8_C(179), UINT8_C(198), UINT8_C( 76), UINT8_C( 24), UINT8_C(252), UINT8_C( 57), UINT8_C( 92), UINT8_C(200), UINT8_C( 38), UINT8_C( 92), UINT8_C(151), UINT8_C(232), UINT8_C(235), UINT8_C(122), UINT8_C(240), UINT8_C( 49), UINT8_C(121), UINT8_C( 3), UINT8_C(124), UINT8_C( 87), UINT8_C( 38), UINT8_C( 19), UINT8_C(138), UINT8_C(169), UINT8_C(234), UINT8_C( 53), UINT8_C(205), UINT8_C( 24), UINT8_C( 5), UINT8_C(169), UINT8_C( 35), UINT8_C(184), UINT8_C(111), UINT8_C(111), UINT8_C(208), UINT8_C(108), UINT8_C(168), UINT8_C( 44), UINT8_C( 52), UINT8_C(207), UINT8_C(137), UINT8_C(203) }, { UINT8_C( 0), UINT8_C( 84), UINT8_C(194), UINT8_C(110), UINT8_C(212), UINT8_C( 8), UINT8_C( 16), UINT8_C(113), UINT8_C( 18), UINT8_C( 8), UINT8_C(196), UINT8_C( 35), UINT8_C(151), UINT8_C( 6), UINT8_C( 71), UINT8_C(118), UINT8_C( 79), UINT8_C( 36), UINT8_C( 34), UINT8_C( 65), UINT8_C( 22), UINT8_C( 11), UINT8_C( 22), UINT8_C(134), UINT8_C( 76), UINT8_C( 24), UINT8_C(100), UINT8_C( 57), UINT8_C( 76), UINT8_C(172), UINT8_C( 38), UINT8_C( 92), UINT8_C( 0), UINT8_C(187), UINT8_C( 3), UINT8_C(122), UINT8_C(142), UINT8_C( 20), UINT8_C( 70), UINT8_C( 3), UINT8_C( 28), UINT8_C( 10), UINT8_C( 5), UINT8_C( 19), UINT8_C( 16), UINT8_C( 76), UINT8_C( 85), UINT8_C( 53), UINT8_C( 64), UINT8_C( 24), UINT8_C( 5), UINT8_C( 86), UINT8_C( 35), UINT8_C(183), UINT8_C(111), UINT8_C(111), UINT8_C(208), UINT8_C( 65), UINT8_C(101), UINT8_C( 44), UINT8_C( 52), UINT8_C( 68), UINT8_C(137), UINT8_C(203) } }, { { UINT8_C(183), UINT8_C(116), UINT8_C( 69), UINT8_C(168), UINT8_C(165), UINT8_C(190), UINT8_C(171), UINT8_C( 33), UINT8_C( 22), UINT8_C(209), UINT8_C( 52), UINT8_C(160), UINT8_C(122), UINT8_C( 30), UINT8_C(213), UINT8_C( 71), UINT8_C( 55), UINT8_C(218), UINT8_C(241), UINT8_C( 90), UINT8_C(146), UINT8_C( 96), UINT8_C(202), UINT8_C( 98), UINT8_C(204), UINT8_C(114), UINT8_C(143), UINT8_C( 0), UINT8_C( 65), UINT8_C( 24), UINT8_C(203), UINT8_C(249), UINT8_C(140), UINT8_C( 16), UINT8_C(161), UINT8_C( 49), UINT8_C(207), UINT8_C( 76), UINT8_C( 82), UINT8_C(229), UINT8_C( 29), UINT8_C(134), UINT8_C(133), UINT8_C(151), UINT8_C(164), UINT8_C( 91), UINT8_C(222), UINT8_C(219), UINT8_C( 53), UINT8_C(207), UINT8_C( 54), UINT8_C(200), UINT8_C( 48), UINT8_C( 0), UINT8_C( 42), UINT8_C(252), UINT8_C(114), UINT8_C(185), UINT8_C(253), UINT8_C(180), UINT8_C(209), UINT8_C(200), UINT8_C(173), UINT8_C( 93) }, { UINT8_C(217), UINT8_C( 78), UINT8_C(142), UINT8_C(168), UINT8_C(154), UINT8_C(224), UINT8_C(141), UINT8_C(183), UINT8_C(102), UINT8_C( 18), UINT8_C( 78), UINT8_C( 11), UINT8_C(109), UINT8_C( 44), UINT8_C(230), UINT8_C(163), UINT8_C(252), UINT8_C( 28), UINT8_C(107), UINT8_C( 44), UINT8_C( 28), UINT8_C(149), UINT8_C( 40), UINT8_C(143), UINT8_C( 79), UINT8_C( 37), UINT8_C( 67), UINT8_C( 32), UINT8_C(238), UINT8_C(240), UINT8_C(126), UINT8_C(199), UINT8_C( 62), UINT8_C( 12), UINT8_C(111), UINT8_C(216), UINT8_C(237), UINT8_C(252), UINT8_C(143), UINT8_C( 83), UINT8_C( 14), UINT8_C(221), UINT8_C( 94), UINT8_C(124), UINT8_C( 9), UINT8_C( 69), UINT8_C( 31), UINT8_C( 5), UINT8_C( 97), UINT8_C(138), UINT8_C( 49), UINT8_C(126), UINT8_C( 31), UINT8_C( 90), UINT8_C( 13), UINT8_C(110), UINT8_C(127), UINT8_C( 80), UINT8_C(143), UINT8_C(109), UINT8_C( 64), UINT8_C( 13), UINT8_C( 52), UINT8_C(126) }, { UINT8_C(183), UINT8_C( 78), UINT8_C( 69), UINT8_C(168), UINT8_C(154), UINT8_C(190), UINT8_C(141), UINT8_C( 33), UINT8_C( 22), UINT8_C( 18), UINT8_C( 52), UINT8_C( 11), UINT8_C(109), UINT8_C( 30), UINT8_C(213), UINT8_C( 71), UINT8_C( 55), UINT8_C( 28), UINT8_C(107), UINT8_C( 44), UINT8_C( 28), UINT8_C( 96), UINT8_C( 40), UINT8_C( 98), UINT8_C( 79), UINT8_C( 37), UINT8_C( 67), UINT8_C( 0), UINT8_C( 65), UINT8_C( 24), UINT8_C(126), UINT8_C(199), UINT8_C( 62), UINT8_C( 12), UINT8_C(111), UINT8_C( 49), UINT8_C(207), UINT8_C( 76), UINT8_C( 82), UINT8_C( 83), UINT8_C( 14), UINT8_C(134), UINT8_C( 94), UINT8_C(124), UINT8_C( 9), UINT8_C( 69), UINT8_C( 31), UINT8_C( 5), UINT8_C( 53), UINT8_C(138), UINT8_C( 49), UINT8_C(126), UINT8_C( 31), UINT8_C( 0), UINT8_C( 13), UINT8_C(110), UINT8_C(114), UINT8_C( 80), UINT8_C(143), UINT8_C(109), UINT8_C( 64), UINT8_C( 13), UINT8_C( 52), UINT8_C( 93) } }, { { UINT8_C( 25), UINT8_C(163), UINT8_C( 86), UINT8_C( 6), UINT8_C(159), UINT8_C(229), UINT8_C( 90), UINT8_C(174), UINT8_C(194), UINT8_C(184), UINT8_C( 42), UINT8_C(203), UINT8_C(253), UINT8_C( 73), UINT8_C(209), UINT8_C( 95), UINT8_C(211), UINT8_C( 2), UINT8_C(221), UINT8_C(242), UINT8_C( 92), UINT8_C(234), UINT8_C( 97), UINT8_C(220), UINT8_C( 58), UINT8_C(240), UINT8_C( 73), UINT8_C(122), UINT8_C(253), UINT8_C(126), UINT8_C(248), UINT8_C( 22), UINT8_C( 33), UINT8_C( 78), UINT8_C( 29), UINT8_C(193), UINT8_C( 51), UINT8_C(119), UINT8_C(111), UINT8_C(245), UINT8_C( 47), UINT8_C(153), UINT8_C(192), UINT8_C( 45), UINT8_C(226), UINT8_C(145), UINT8_C(140), UINT8_C(181), UINT8_C(148), UINT8_C(105), UINT8_C(167), UINT8_C(240), UINT8_C( 83), UINT8_C( 8), UINT8_C(204), UINT8_C(141), UINT8_C(248), UINT8_C( 22), UINT8_C( 7), UINT8_C(245), UINT8_C(148), UINT8_MAX, UINT8_C( 12), UINT8_C(181) }, { UINT8_C( 77), UINT8_C( 41), UINT8_C(118), UINT8_C(128), UINT8_C(160), UINT8_C(229), UINT8_C(117), UINT8_C(207), UINT8_C(126), UINT8_C( 53), UINT8_C(252), UINT8_C( 96), UINT8_C(199), UINT8_C(136), UINT8_C( 21), UINT8_C( 91), UINT8_C(241), UINT8_C(189), UINT8_C( 75), UINT8_C( 68), UINT8_C(197), UINT8_C( 24), UINT8_C(209), UINT8_C(190), UINT8_C( 46), UINT8_C(216), UINT8_C(179), UINT8_C(194), UINT8_C(215), UINT8_C(191), UINT8_C(119), UINT8_C( 36), UINT8_C(232), UINT8_C(238), UINT8_C(164), UINT8_C(136), UINT8_C(211), UINT8_C( 25), UINT8_C( 88), UINT8_C( 82), UINT8_C( 79), UINT8_C( 84), UINT8_C(178), UINT8_C( 22), UINT8_C(221), UINT8_C(200), UINT8_C(113), UINT8_C(206), UINT8_C(133), UINT8_C(188), UINT8_C( 19), UINT8_C( 74), UINT8_C(212), UINT8_C(228), UINT8_C( 8), UINT8_C( 2), UINT8_C(189), UINT8_C(188), UINT8_C(196), UINT8_C(148), UINT8_C(123), UINT8_C( 60), UINT8_C(185), UINT8_C(100) }, { UINT8_C( 25), UINT8_C( 41), UINT8_C( 86), UINT8_C( 6), UINT8_C(159), UINT8_C(229), UINT8_C( 90), UINT8_C(174), UINT8_C(126), UINT8_C( 53), UINT8_C( 42), UINT8_C( 96), UINT8_C(199), UINT8_C( 73), UINT8_C( 21), UINT8_C( 91), UINT8_C(211), UINT8_C( 2), UINT8_C( 75), UINT8_C( 68), UINT8_C( 92), UINT8_C( 24), UINT8_C( 97), UINT8_C(190), UINT8_C( 46), UINT8_C(216), UINT8_C( 73), UINT8_C(122), UINT8_C(215), UINT8_C(126), UINT8_C(119), UINT8_C( 22), UINT8_C( 33), UINT8_C( 78), UINT8_C( 29), UINT8_C(136), UINT8_C( 51), UINT8_C( 25), UINT8_C( 88), UINT8_C( 82), UINT8_C( 47), UINT8_C( 84), UINT8_C(178), UINT8_C( 22), UINT8_C(221), UINT8_C(145), UINT8_C(113), UINT8_C(181), UINT8_C(133), UINT8_C(105), UINT8_C( 19), UINT8_C( 74), UINT8_C( 83), UINT8_C( 8), UINT8_C( 8), UINT8_C( 2), UINT8_C(189), UINT8_C( 22), UINT8_C( 7), UINT8_C(148), UINT8_C(123), UINT8_C( 60), UINT8_C( 12), UINT8_C(100) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_min_epu8(a, b); simde_test_x86_assert_equal_u8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_min_epu8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int8_t src[64]; const simde__mmask64 k; const uint8_t a[64]; const uint8_t b[64]; const uint8_t r[64]; } test_vec[] = { { { INT8_C( 25), -INT8_C( 115), INT8_C( 79), -INT8_C( 105), -INT8_C( 118), -INT8_C( 48), INT8_C( 60), INT8_C( 124), INT8_C( 36), INT8_C( 36), INT8_C( 79), -INT8_C( 57), INT8_C( 83), INT8_C( 122), -INT8_C( 25), INT8_C( 107), -INT8_C( 54), INT8_C( 79), -INT8_C( 111), INT8_C( 105), INT8_C( 61), -INT8_C( 3), -INT8_C( 33), -INT8_C( 44), -INT8_C( 34), INT8_C( 97), INT8_C( 26), INT8_C( 1), INT8_C( 36), -INT8_C( 49), -INT8_C( 78), INT8_C( 61), INT8_C( 92), INT8_C( 1), -INT8_C( 44), -INT8_C( 26), -INT8_C( 47), INT8_C( 16), INT8_C( 99), -INT8_C( 10), INT8_C( 52), -INT8_C( 78), -INT8_C( 67), -INT8_C( 121), INT8_C( 44), -INT8_C( 92), -INT8_C( 14), -INT8_C( 10), -INT8_C( 13), -INT8_C( 124), INT8_C( 95), INT8_C( 49), -INT8_C( 127), INT8_C( 63), INT8_C( 5), INT8_C( 96), -INT8_C( 96), INT8_C( 31), INT8_C( 97), -INT8_C( 60), -INT8_C( 18), INT8_C( 19), INT8_C( 2), INT8_C( 74) }, UINT64_C( 2007643257620780565), { UINT8_C( 70), UINT8_C(153), UINT8_C(163), UINT8_C(114), UINT8_C( 61), UINT8_C(149), UINT8_C(104), UINT8_C( 49), UINT8_C( 25), UINT8_C(199), UINT8_C( 98), UINT8_C(155), UINT8_C( 6), UINT8_C(103), UINT8_C(251), UINT8_C(167), UINT8_C(135), UINT8_C( 92), UINT8_C(107), UINT8_C(117), UINT8_C(112), UINT8_C(109), UINT8_C(192), UINT8_C(133), UINT8_C( 68), UINT8_C(241), UINT8_C(107), UINT8_C( 43), UINT8_C(133), UINT8_C( 72), UINT8_C( 70), UINT8_C(203), UINT8_C(225), UINT8_C(233), UINT8_C( 61), UINT8_C( 31), UINT8_C(127), UINT8_C(165), UINT8_C( 80), UINT8_C(152), UINT8_C(108), UINT8_C(178), UINT8_C( 51), UINT8_C(115), UINT8_C( 25), UINT8_C( 46), UINT8_C( 26), UINT8_C(160), UINT8_C(139), UINT8_C(133), UINT8_C( 22), UINT8_C(251), UINT8_C(243), UINT8_C(214), UINT8_C(128), UINT8_C( 55), UINT8_C(199), UINT8_C(235), UINT8_C( 98), UINT8_C( 76), UINT8_C( 51), UINT8_C(168), UINT8_C( 23), UINT8_C( 21) }, { UINT8_C(146), UINT8_C( 84), UINT8_C( 52), UINT8_C( 17), UINT8_C(249), UINT8_C(132), UINT8_C(169), UINT8_C(101), UINT8_C( 54), UINT8_C(221), UINT8_C(216), UINT8_C( 79), UINT8_C( 11), UINT8_C(242), UINT8_C(240), UINT8_C(150), UINT8_C(120), UINT8_C( 6), UINT8_C(145), UINT8_C(107), UINT8_C(220), UINT8_C( 17), UINT8_C(162), UINT8_C(163), UINT8_C(253), UINT8_C( 4), UINT8_C(239), UINT8_C( 48), UINT8_C(172), UINT8_C( 6), UINT8_C( 69), UINT8_C( 62), UINT8_C( 90), UINT8_C(121), UINT8_C( 79), UINT8_C( 83), UINT8_C(253), UINT8_C(249), UINT8_C(184), UINT8_C( 51), UINT8_C(214), UINT8_C(145), UINT8_C(131), UINT8_C(225), UINT8_C(131), UINT8_C(115), UINT8_C(120), UINT8_C(251), UINT8_C(121), UINT8_C( 9), UINT8_C(102), UINT8_C( 85), UINT8_C( 27), UINT8_C( 8), UINT8_C(248), UINT8_C( 24), UINT8_C( 12), UINT8_C(231), UINT8_C( 72), UINT8_C(185), UINT8_C(237), UINT8_C(142), UINT8_C(247), UINT8_C( 71) }, { UINT8_C( 70), UINT8_C(141), UINT8_C( 52), UINT8_C(151), UINT8_C( 61), UINT8_C(208), UINT8_C( 60), UINT8_C(124), UINT8_C( 36), UINT8_C(199), UINT8_C( 98), UINT8_C(199), UINT8_C( 6), UINT8_C(122), UINT8_C(240), UINT8_C(150), UINT8_C(120), UINT8_C( 79), UINT8_C(145), UINT8_C(105), UINT8_C(112), UINT8_C( 17), UINT8_C(223), UINT8_C(212), UINT8_C(222), UINT8_C( 4), UINT8_C(107), UINT8_C( 1), UINT8_C( 36), UINT8_C( 6), UINT8_C( 69), UINT8_C( 62), UINT8_C( 90), UINT8_C(121), UINT8_C( 61), UINT8_C(230), UINT8_C(209), UINT8_C(165), UINT8_C( 80), UINT8_C( 51), UINT8_C( 52), UINT8_C(178), UINT8_C( 51), UINT8_C(135), UINT8_C( 25), UINT8_C(164), UINT8_C(242), UINT8_C(160), UINT8_C(243), UINT8_C(132), UINT8_C( 22), UINT8_C( 85), UINT8_C( 27), UINT8_C( 63), UINT8_C(128), UINT8_C( 24), UINT8_C( 12), UINT8_C(231), UINT8_C( 97), UINT8_C( 76), UINT8_C( 51), UINT8_C( 19), UINT8_C( 2), UINT8_C( 74) } }, { { INT8_C( 7), INT8_C( 71), -INT8_C( 102), INT8_C( 5), INT8_C( 64), INT8_C( 82), INT8_C( 56), INT8_C( 22), -INT8_C( 29), -INT8_C( 69), -INT8_C( 9), INT8_C( 103), INT8_C( 46), INT8_C( 111), INT8_C( 98), -INT8_C( 89), INT8_C( 121), -INT8_C( 55), -INT8_C( 4), -INT8_C( 108), -INT8_C( 47), -INT8_C( 12), -INT8_C( 84), -INT8_C( 34), -INT8_C( 37), -INT8_C( 12), -INT8_C( 105), -INT8_C( 56), -INT8_C( 126), -INT8_C( 114), INT8_C( 15), -INT8_C( 118), -INT8_C( 43), -INT8_C( 87), -INT8_C( 113), INT8_C( 21), -INT8_C( 4), -INT8_C( 57), INT8_C( 43), -INT8_C( 33), -INT8_C( 125), INT8_C( 35), INT8_C( 70), -INT8_C( 79), -INT8_C( 110), -INT8_C( 87), INT8_C( 89), INT8_C( 11), INT8_C( 114), INT8_C( 85), -INT8_C( 97), INT8_C( 67), INT8_C( 74), INT8_C( 75), INT8_C( 33), INT8_C( 37), INT8_C( 64), -INT8_C( 72), -INT8_C( 18), -INT8_C( 62), INT8_C( 71), -INT8_C( 3), INT8_C( 76), INT8_C( 28) }, UINT64_C( 2774883277812718503), { UINT8_C(128), UINT8_C(201), UINT8_C(215), UINT8_C( 19), UINT8_C(114), UINT8_C( 48), UINT8_C( 30), UINT8_C(228), UINT8_C(134), UINT8_C(190), UINT8_C( 39), UINT8_C(208), UINT8_C( 9), UINT8_C( 73), UINT8_C(245), UINT8_C( 73), UINT8_C( 1), UINT8_C(227), UINT8_C( 12), UINT8_C( 72), UINT8_C(225), UINT8_C( 88), UINT8_C(101), UINT8_C(136), UINT8_C( 52), UINT8_C(151), UINT8_C( 43), UINT8_C(215), UINT8_C(244), UINT8_C(173), UINT8_C(253), UINT8_C(117), UINT8_C(118), UINT8_C(212), UINT8_C(136), UINT8_C(232), UINT8_C( 5), UINT8_C(166), UINT8_C(204), UINT8_C(139), UINT8_C(100), UINT8_C(244), UINT8_C( 91), UINT8_C(110), UINT8_C( 61), UINT8_C( 80), UINT8_C(183), UINT8_C( 62), UINT8_C( 52), UINT8_C(195), UINT8_C(135), UINT8_C( 21), UINT8_C( 28), UINT8_C(236), UINT8_C(157), UINT8_C( 80), UINT8_C(131), UINT8_C(200), UINT8_C( 39), UINT8_C(119), UINT8_C(117), UINT8_C( 36), UINT8_C(236), UINT8_C(236) }, { UINT8_C(248), UINT8_C(116), UINT8_C(212), UINT8_C(253), UINT8_C( 27), UINT8_C(161), UINT8_C(136), UINT8_C(127), UINT8_C(149), UINT8_C(227), UINT8_C(237), UINT8_C(210), UINT8_C( 52), UINT8_C(165), UINT8_C( 16), UINT8_C(104), UINT8_C(104), UINT8_C(151), UINT8_C(125), UINT8_C(132), UINT8_C(131), UINT8_C( 26), UINT8_C(212), UINT8_C( 6), UINT8_C(226), UINT8_C(251), UINT8_C(126), UINT8_C( 87), UINT8_C( 31), UINT8_C(106), UINT8_C( 67), UINT8_C( 24), UINT8_C(223), UINT8_C( 24), UINT8_C( 21), UINT8_C(250), UINT8_C(185), UINT8_C(158), UINT8_C(121), UINT8_C( 78), UINT8_C(129), UINT8_C(103), UINT8_C( 32), UINT8_C(181), UINT8_C( 12), UINT8_C( 48), UINT8_C( 29), UINT8_C(116), UINT8_C(200), UINT8_C(154), UINT8_C(249), UINT8_C( 75), UINT8_C(180), UINT8_C(205), UINT8_C( 82), UINT8_C(150), UINT8_C(201), UINT8_C(208), UINT8_C(238), UINT8_C(232), UINT8_C( 58), UINT8_C( 49), UINT8_C( 0), UINT8_C( 25) }, { UINT8_C(128), UINT8_C(116), UINT8_C(212), UINT8_C( 5), UINT8_C( 64), UINT8_C( 48), UINT8_C( 56), UINT8_C(127), UINT8_C(134), UINT8_C(190), UINT8_C(247), UINT8_C(208), UINT8_C( 9), UINT8_C(111), UINT8_C( 16), UINT8_C( 73), UINT8_C(121), UINT8_C(151), UINT8_C(252), UINT8_C(148), UINT8_C(131), UINT8_C( 26), UINT8_C(172), UINT8_C(222), UINT8_C( 52), UINT8_C(151), UINT8_C(151), UINT8_C(200), UINT8_C(130), UINT8_C(106), UINT8_C( 15), UINT8_C( 24), UINT8_C(118), UINT8_C( 24), UINT8_C(143), UINT8_C( 21), UINT8_C(252), UINT8_C(158), UINT8_C( 43), UINT8_C( 78), UINT8_C(100), UINT8_C( 35), UINT8_C( 32), UINT8_C(110), UINT8_C( 12), UINT8_C(169), UINT8_C( 29), UINT8_C( 11), UINT8_C(114), UINT8_C(154), UINT8_C(159), UINT8_C( 67), UINT8_C( 74), UINT8_C( 75), UINT8_C( 33), UINT8_C( 80), UINT8_C( 64), UINT8_C(200), UINT8_C( 39), UINT8_C(194), UINT8_C( 71), UINT8_C( 36), UINT8_C( 76), UINT8_C( 28) } }, { { INT8_C( 73), INT8_C( 22), INT8_C( 19), INT8_C( 2), -INT8_C( 76), -INT8_C( 115), INT8_C( 80), INT8_C( 53), -INT8_C( 12), INT8_C( 112), -INT8_C( 21), INT8_C( 0), -INT8_C( 95), INT8_C( 8), INT8_C( 116), INT8_C( 105), -INT8_C( 93), INT8_C( 109), -INT8_C( 76), INT8_C( 87), INT8_C( 59), INT8_C( 6), -INT8_C( 18), INT8_C( 4), -INT8_C( 42), -INT8_C( 36), -INT8_C( 20), INT8_C( 17), INT8_C( 13), -INT8_C( 19), INT8_C( 42), INT8_C( 87), INT8_C( 3), INT8_C( 62), INT8_C( 89), -INT8_C( 73), -INT8_C( 53), -INT8_C( 86), -INT8_C( 20), -INT8_C( 65), INT8_C( 26), -INT8_C( 41), -INT8_C( 65), -INT8_C( 69), -INT8_C( 32), INT8_C( 51), INT8_C( 36), -INT8_C( 125), -INT8_C( 95), -INT8_C( 39), -INT8_C( 38), -INT8_C( 36), -INT8_C( 33), -INT8_C( 56), -INT8_C( 32), -INT8_C( 74), -INT8_C( 92), -INT8_C( 52), -INT8_C( 57), -INT8_C( 78), -INT8_C( 71), -INT8_C( 15), INT8_C( 9), -INT8_C( 68) }, UINT64_C( 2862424647028400687), { UINT8_C( 55), UINT8_C(120), UINT8_C(226), UINT8_C( 23), UINT8_C(172), UINT8_C( 7), UINT8_C(154), UINT8_C( 77), UINT8_C(224), UINT8_C(117), UINT8_C( 41), UINT8_C(191), UINT8_C( 61), UINT8_C( 9), UINT8_C(117), UINT8_C(226), UINT8_C(213), UINT8_C( 60), UINT8_C(148), UINT8_C(143), UINT8_C( 46), UINT8_C(157), UINT8_C( 75), UINT8_C( 93), UINT8_MAX, UINT8_C(191), UINT8_C( 88), UINT8_C( 12), UINT8_C( 31), UINT8_C( 17), UINT8_C( 51), UINT8_C( 86), UINT8_C(138), UINT8_C( 21), UINT8_C(110), UINT8_C( 54), UINT8_C( 28), UINT8_C( 8), UINT8_C(131), UINT8_C(252), UINT8_C(125), UINT8_C(172), UINT8_C(188), UINT8_C(187), UINT8_C(181), UINT8_C( 49), UINT8_C(157), UINT8_C(138), UINT8_C(110), UINT8_C( 49), UINT8_C( 25), UINT8_C(156), UINT8_C(206), UINT8_C(101), UINT8_C(249), UINT8_C(205), UINT8_C( 36), UINT8_C( 81), UINT8_C(217), UINT8_C( 67), UINT8_C( 99), UINT8_C( 12), UINT8_C(153), UINT8_C(237) }, { UINT8_C( 34), UINT8_C( 7), UINT8_C( 35), UINT8_C( 62), UINT8_C( 16), UINT8_C(166), UINT8_C( 59), UINT8_C(141), UINT8_C( 82), UINT8_C(247), UINT8_C( 72), UINT8_C( 7), UINT8_C( 40), UINT8_C(229), UINT8_C(145), UINT8_C(150), UINT8_C( 22), UINT8_C(171), UINT8_C( 50), UINT8_C(228), UINT8_C( 16), UINT8_C( 44), UINT8_C(178), UINT8_C( 52), UINT8_C(125), UINT8_C(139), UINT8_C(119), UINT8_C(224), UINT8_C(152), UINT8_C( 16), UINT8_C(205), UINT8_C(186), UINT8_C( 24), UINT8_C(240), UINT8_C(248), UINT8_C( 40), UINT8_C(150), UINT8_C( 51), UINT8_C(181), UINT8_C(232), UINT8_C( 42), UINT8_C(254), UINT8_C(239), UINT8_C( 83), UINT8_C(227), UINT8_C(129), UINT8_C(233), UINT8_C(250), UINT8_C( 44), UINT8_C( 28), UINT8_C(222), UINT8_C( 60), UINT8_C( 72), UINT8_C(144), UINT8_C(112), UINT8_C(197), UINT8_C( 28), UINT8_C(231), UINT8_C(166), UINT8_C(180), UINT8_C(247), UINT8_C(115), UINT8_C(110), UINT8_C( 15) }, { UINT8_C( 34), UINT8_C( 7), UINT8_C( 35), UINT8_C( 23), UINT8_C(180), UINT8_C( 7), UINT8_C( 80), UINT8_C( 53), UINT8_C(244), UINT8_C(117), UINT8_C(235), UINT8_C( 0), UINT8_C(161), UINT8_C( 9), UINT8_C(117), UINT8_C(105), UINT8_C( 22), UINT8_C( 60), UINT8_C(180), UINT8_C( 87), UINT8_C( 16), UINT8_C( 44), UINT8_C( 75), UINT8_C( 4), UINT8_C(214), UINT8_C(139), UINT8_C(236), UINT8_C( 12), UINT8_C( 31), UINT8_C( 16), UINT8_C( 51), UINT8_C( 86), UINT8_C( 3), UINT8_C( 62), UINT8_C(110), UINT8_C( 40), UINT8_C(203), UINT8_C(170), UINT8_C(236), UINT8_C(191), UINT8_C( 26), UINT8_C(215), UINT8_C(191), UINT8_C(187), UINT8_C(224), UINT8_C( 49), UINT8_C(157), UINT8_C(131), UINT8_C( 44), UINT8_C(217), UINT8_C(218), UINT8_C( 60), UINT8_C( 72), UINT8_C(101), UINT8_C(224), UINT8_C(197), UINT8_C( 28), UINT8_C( 81), UINT8_C(166), UINT8_C(178), UINT8_C(185), UINT8_C( 12), UINT8_C( 9), UINT8_C(188) } }, { { INT8_C( 100), INT8_C( 102), INT8_C( 55), -INT8_C( 6), -INT8_C( 102), -INT8_C( 19), -INT8_C( 29), -INT8_C( 60), -INT8_C( 21), -INT8_C( 46), INT8_C( 23), -INT8_C( 50), INT8_C( 83), INT8_C( 1), -INT8_C( 56), INT8_MAX, INT8_C( 29), -INT8_C( 89), -INT8_C( 69), INT8_C( 101), INT8_C( 55), INT8_C( 43), INT8_C( 42), INT8_C( 83), INT8_C( 18), -INT8_C( 48), INT8_C( 7), INT8_C( 10), INT8_C( 68), INT8_C( 117), INT8_C( 25), -INT8_C( 88), -INT8_C( 36), INT8_C( 81), -INT8_C( 94), INT8_C( 118), INT8_C( 62), -INT8_C( 123), INT8_C( 58), INT8_C( 41), INT8_C( 88), INT8_C( 82), -INT8_C( 9), -INT8_C( 85), INT8_C( 83), -INT8_C( 64), INT8_C( 43), INT8_C( 112), INT8_C( 103), -INT8_C( 26), -INT8_C( 43), -INT8_C( 98), INT8_C( 18), -INT8_C( 1), -INT8_C( 14), INT8_C( 36), -INT8_C( 48), -INT8_C( 7), INT8_C( 46), INT8_C( 20), INT8_C( 111), INT8_C( 72), -INT8_C( 68), INT8_C( 75) }, UINT64_C( 4323732602566565529), { UINT8_C( 77), UINT8_C(247), UINT8_C(231), UINT8_C(160), UINT8_C(183), UINT8_C( 18), UINT8_C( 16), UINT8_C( 30), UINT8_C(249), UINT8_C(229), UINT8_C(189), UINT8_C( 11), UINT8_C(229), UINT8_C(175), UINT8_C( 47), UINT8_C(181), UINT8_C(168), UINT8_C( 94), UINT8_C(201), UINT8_C( 23), UINT8_C(166), UINT8_C(133), UINT8_C( 98), UINT8_C( 63), UINT8_C(227), UINT8_C( 35), UINT8_C( 22), UINT8_C(199), UINT8_C( 31), UINT8_C( 22), UINT8_C( 3), UINT8_C(108), UINT8_C( 13), UINT8_C(235), UINT8_C( 13), UINT8_C(197), UINT8_C(253), UINT8_C( 29), UINT8_C(227), UINT8_C(246), UINT8_C( 3), UINT8_C(160), UINT8_C( 1), UINT8_C(232), UINT8_C( 79), UINT8_C( 49), UINT8_C(157), UINT8_C(248), UINT8_C(143), UINT8_C(102), UINT8_C( 15), UINT8_C( 53), UINT8_C(235), UINT8_C(114), UINT8_C(116), UINT8_C(206), UINT8_C(149), UINT8_C(138), UINT8_C(150), UINT8_C(180), UINT8_C(160), UINT8_C(153), UINT8_C( 33), UINT8_C(173) }, { UINT8_C(132), UINT8_C( 46), UINT8_C(114), UINT8_C(130), UINT8_C( 75), UINT8_C( 86), UINT8_C(120), UINT8_C( 78), UINT8_C(246), UINT8_C(122), UINT8_C( 54), UINT8_C( 70), UINT8_C(171), UINT8_C(211), UINT8_C( 62), UINT8_C( 58), UINT8_C( 57), UINT8_C( 77), UINT8_C(111), UINT8_C( 36), UINT8_C(191), UINT8_C(227), UINT8_C(243), UINT8_C( 85), UINT8_C(109), UINT8_C(137), UINT8_C( 9), UINT8_C( 13), UINT8_C( 34), UINT8_C( 42), UINT8_C(186), UINT8_C(167), UINT8_C( 88), UINT8_C( 45), UINT8_C( 41), UINT8_C(164), UINT8_C(131), UINT8_C(161), UINT8_C(242), UINT8_C(121), UINT8_C( 27), UINT8_C( 41), UINT8_C(191), UINT8_C(198), UINT8_C(252), UINT8_C(253), UINT8_C( 0), UINT8_C( 54), UINT8_C( 75), UINT8_C(111), UINT8_C( 90), UINT8_C( 10), UINT8_C( 82), UINT8_C( 77), UINT8_C( 95), UINT8_C(191), UINT8_C(214), UINT8_C(105), UINT8_C(204), UINT8_C(249), UINT8_C(147), UINT8_C(135), UINT8_C(160), UINT8_C(236) }, { UINT8_C( 77), UINT8_C(102), UINT8_C( 55), UINT8_C(130), UINT8_C( 75), UINT8_C(237), UINT8_C(227), UINT8_C( 30), UINT8_C(235), UINT8_C(122), UINT8_C( 54), UINT8_C( 11), UINT8_C(171), UINT8_C( 1), UINT8_C( 47), UINT8_C(127), UINT8_C( 57), UINT8_C(167), UINT8_C(187), UINT8_C(101), UINT8_C( 55), UINT8_C( 43), UINT8_C( 98), UINT8_C( 63), UINT8_C(109), UINT8_C( 35), UINT8_C( 9), UINT8_C( 10), UINT8_C( 31), UINT8_C(117), UINT8_C( 3), UINT8_C(108), UINT8_C(220), UINT8_C( 81), UINT8_C( 13), UINT8_C(118), UINT8_C( 62), UINT8_C( 29), UINT8_C(227), UINT8_C(121), UINT8_C( 3), UINT8_C( 41), UINT8_C(247), UINT8_C(198), UINT8_C( 79), UINT8_C( 49), UINT8_C( 0), UINT8_C( 54), UINT8_C(103), UINT8_C(230), UINT8_C(213), UINT8_C(158), UINT8_C( 18), UINT8_MAX, UINT8_C(242), UINT8_C( 36), UINT8_C(208), UINT8_C(249), UINT8_C(150), UINT8_C(180), UINT8_C(147), UINT8_C(135), UINT8_C(188), UINT8_C( 75) } }, { { -INT8_C( 76), -INT8_C( 55), -INT8_C( 112), INT8_C( 55), INT8_C( 106), -INT8_C( 126), -INT8_C( 80), -INT8_C( 122), -INT8_C( 85), INT8_C( 112), INT8_C( 76), -INT8_C( 88), INT8_C( 109), INT8_C( 77), -INT8_C( 34), -INT8_C( 72), -INT8_C( 68), INT8_C( 56), -INT8_C( 61), INT8_C( 15), -INT8_C( 122), INT8_C( 34), -INT8_C( 50), INT8_C( 92), -INT8_C( 117), -INT8_C( 101), INT8_C( 85), INT8_C( 31), INT8_C( 34), -INT8_C( 11), INT8_C( 11), -INT8_C( 42), -INT8_C( 66), -INT8_C( 101), INT8_C( 13), INT8_C( 41), INT8_C( 29), -INT8_C( 67), -INT8_C( 81), -INT8_C( 55), INT8_C( 45), -INT8_C( 5), INT8_C( 113), -INT8_C( 101), INT8_C( 72), INT8_C( 79), INT8_C( 83), INT8_C( 5), -INT8_C( 121), INT8_C( 22), INT8_C( 20), INT8_C( 13), INT8_C( 57), -INT8_C( 30), INT8_C( 106), -INT8_C( 60), INT8_C( 125), -INT8_C( 65), -INT8_C( 29), -INT8_C( 97), -INT8_C( 75), -INT8_C( 18), INT8_C( 117), INT8_C( 115) }, UINT64_C( 7885885688587780745), { UINT8_C( 71), UINT8_C(225), UINT8_C( 8), UINT8_C(143), UINT8_C( 48), UINT8_C( 92), UINT8_C(148), UINT8_C(183), UINT8_C(114), UINT8_C(168), UINT8_C(197), UINT8_C(171), UINT8_C(139), UINT8_C( 47), UINT8_C(112), UINT8_C( 8), UINT8_C(238), UINT8_C( 83), UINT8_C(168), UINT8_C(163), UINT8_C( 66), UINT8_C( 29), UINT8_C( 23), UINT8_C(203), UINT8_C(160), UINT8_C(179), UINT8_C(114), UINT8_C(224), UINT8_MAX, UINT8_C(226), UINT8_C( 77), UINT8_C( 70), UINT8_C(195), UINT8_C( 86), UINT8_C(213), UINT8_C(243), UINT8_C(178), UINT8_C(106), UINT8_C(171), UINT8_C( 36), UINT8_C( 18), UINT8_C(112), UINT8_C(208), UINT8_C(157), UINT8_C(159), UINT8_C( 64), UINT8_C(166), UINT8_C(141), UINT8_C(147), UINT8_C( 78), UINT8_C( 49), UINT8_C(213), UINT8_C(107), UINT8_C( 72), UINT8_C(161), UINT8_C( 11), UINT8_C(251), UINT8_C( 19), UINT8_C(235), UINT8_C(250), UINT8_C(246), UINT8_C( 57), UINT8_C( 64), UINT8_C(185) }, { UINT8_C(143), UINT8_C( 22), UINT8_C(173), UINT8_C( 65), UINT8_C(128), UINT8_C( 88), UINT8_C(101), UINT8_C(146), UINT8_C(200), UINT8_C( 53), UINT8_C( 48), UINT8_C(103), UINT8_C(117), UINT8_C(214), UINT8_C(244), UINT8_C( 9), UINT8_C( 36), UINT8_C( 37), UINT8_C(222), UINT8_C(143), UINT8_C(109), UINT8_C(127), UINT8_C(155), UINT8_C(105), UINT8_C(147), UINT8_C(134), UINT8_C( 99), UINT8_C(137), UINT8_C(191), UINT8_C(164), UINT8_C( 66), UINT8_C( 78), UINT8_C(186), UINT8_C(239), UINT8_C(143), UINT8_C( 58), UINT8_C( 71), UINT8_C(245), UINT8_C(204), UINT8_C( 15), UINT8_C( 42), UINT8_C(252), UINT8_C(118), UINT8_C(160), UINT8_C(210), UINT8_C(107), UINT8_C(169), UINT8_C(246), UINT8_C(144), UINT8_C(135), UINT8_C(134), UINT8_C(254), UINT8_C( 7), UINT8_C( 33), UINT8_C(103), UINT8_C(154), UINT8_C(167), UINT8_C(202), UINT8_C( 35), UINT8_C(103), UINT8_C(110), UINT8_C(101), UINT8_C(181), UINT8_C( 40) }, { UINT8_C( 71), UINT8_C(201), UINT8_C(144), UINT8_C( 65), UINT8_C(106), UINT8_C(130), UINT8_C(176), UINT8_C(146), UINT8_C(171), UINT8_C( 53), UINT8_C( 76), UINT8_C(168), UINT8_C(109), UINT8_C( 77), UINT8_C(222), UINT8_C( 8), UINT8_C(188), UINT8_C( 56), UINT8_C(168), UINT8_C(143), UINT8_C( 66), UINT8_C( 34), UINT8_C(206), UINT8_C(105), UINT8_C(147), UINT8_C(134), UINT8_C( 99), UINT8_C( 31), UINT8_C( 34), UINT8_C(164), UINT8_C( 11), UINT8_C( 70), UINT8_C(190), UINT8_C(155), UINT8_C( 13), UINT8_C( 41), UINT8_C( 29), UINT8_C(189), UINT8_C(171), UINT8_C(201), UINT8_C( 18), UINT8_C(112), UINT8_C(113), UINT8_C(157), UINT8_C( 72), UINT8_C( 79), UINT8_C(166), UINT8_C( 5), UINT8_C(135), UINT8_C( 22), UINT8_C( 20), UINT8_C( 13), UINT8_C( 7), UINT8_C( 33), UINT8_C(103), UINT8_C(196), UINT8_C(167), UINT8_C(191), UINT8_C( 35), UINT8_C(103), UINT8_C(181), UINT8_C( 57), UINT8_C( 64), UINT8_C(115) } }, { { INT8_C( 85), INT8_C( 69), INT8_C( 98), -INT8_C( 100), INT8_C( 58), INT8_C( 47), -INT8_C( 84), INT8_C( 100), INT8_C( 43), INT8_C( 34), INT8_C( 4), -INT8_C( 2), -INT8_C( 115), -INT8_C( 83), -INT8_C( 12), INT8_C( 30), INT8_C( 53), INT8_C( 122), INT8_C( 28), INT8_C( 60), -INT8_C( 101), -INT8_C( 125), -INT8_C( 42), INT8_C( 67), INT8_C( 77), -INT8_C( 7), -INT8_C( 86), -INT8_C( 68), INT8_C( 94), INT8_C( 95), -INT8_C( 28), -INT8_C( 77), -INT8_C( 92), INT8_C( 71), INT8_C( 80), -INT8_C( 34), INT8_C( 118), -INT8_C( 4), INT8_C( 67), -INT8_C( 95), INT8_C( 30), INT8_C( 71), -INT8_C( 97), -INT8_C( 84), -INT8_C( 11), -INT8_C( 108), -INT8_C( 54), INT8_C( 42), INT8_C( 14), -INT8_C( 26), INT8_C( 102), -INT8_C( 86), INT8_C( 105), INT8_C( 60), -INT8_C( 19), -INT8_C( 74), INT8_C( 53), -INT8_C( 105), INT8_C( 114), -INT8_C( 109), -INT8_C( 10), INT8_C( 87), INT8_C( 71), -INT8_C( 101) }, UINT64_C(12805348455387600798), { UINT8_C( 4), UINT8_C( 85), UINT8_C( 93), UINT8_C(249), UINT8_C(233), UINT8_C( 39), UINT8_C( 35), UINT8_C(247), UINT8_C( 13), UINT8_C(137), UINT8_C(161), UINT8_C(118), UINT8_C(197), UINT8_C(142), UINT8_C( 45), UINT8_C(250), UINT8_C( 37), UINT8_C(159), UINT8_C(141), UINT8_C( 28), UINT8_C(246), UINT8_C(212), UINT8_C(183), UINT8_C(148), UINT8_C(107), UINT8_C( 48), UINT8_C(168), UINT8_C(254), UINT8_C(237), UINT8_C( 94), UINT8_C(176), UINT8_C(241), UINT8_C(179), UINT8_C( 13), UINT8_C(234), UINT8_C(156), UINT8_C( 53), UINT8_C( 13), UINT8_C(147), UINT8_C( 66), UINT8_C(150), UINT8_C( 53), UINT8_C(185), UINT8_C( 91), UINT8_C(195), UINT8_C(230), UINT8_C( 85), UINT8_C(233), UINT8_C(133), UINT8_C(226), UINT8_C( 5), UINT8_C(124), UINT8_C(183), UINT8_C(188), UINT8_C( 16), UINT8_C( 34), UINT8_C(236), UINT8_C(185), UINT8_C( 33), UINT8_C(217), UINT8_C( 23), UINT8_C(209), UINT8_C(202), UINT8_C(202) }, { UINT8_C(222), UINT8_C(180), UINT8_C(102), UINT8_C( 19), UINT8_C(193), UINT8_C(249), UINT8_C( 86), UINT8_C( 87), UINT8_C( 46), UINT8_C( 15), UINT8_C(178), UINT8_C(242), UINT8_C(245), UINT8_C( 7), UINT8_C(219), UINT8_C(122), UINT8_C(234), UINT8_C(224), UINT8_C(246), UINT8_C(161), UINT8_C(156), UINT8_C( 7), UINT8_C(195), UINT8_C(136), UINT8_C(192), UINT8_C(228), UINT8_C( 98), UINT8_C(215), UINT8_C(181), UINT8_C( 44), UINT8_C(161), UINT8_C(148), UINT8_C(225), UINT8_C( 7), UINT8_C(167), UINT8_C(162), UINT8_C( 0), UINT8_C(253), UINT8_C(250), UINT8_C( 47), UINT8_C( 12), UINT8_C(172), UINT8_C( 33), UINT8_C( 1), UINT8_C(180), UINT8_C(252), UINT8_C(124), UINT8_C(158), UINT8_C(220), UINT8_C(114), UINT8_C( 63), UINT8_C(120), UINT8_C(121), UINT8_C( 2), UINT8_C( 0), UINT8_C( 57), UINT8_C(231), UINT8_C( 98), UINT8_C( 16), UINT8_C(156), UINT8_C(143), UINT8_C(177), UINT8_C( 48), UINT8_C(112) }, { UINT8_C( 85), UINT8_C( 85), UINT8_C( 93), UINT8_C( 19), UINT8_C(193), UINT8_C( 47), UINT8_C(172), UINT8_C( 87), UINT8_C( 13), UINT8_C( 15), UINT8_C(161), UINT8_C(254), UINT8_C(197), UINT8_C(173), UINT8_C(244), UINT8_C(122), UINT8_C( 37), UINT8_C(122), UINT8_C( 28), UINT8_C( 28), UINT8_C(156), UINT8_C( 7), UINT8_C(183), UINT8_C( 67), UINT8_C( 77), UINT8_C(249), UINT8_C( 98), UINT8_C(188), UINT8_C(181), UINT8_C( 95), UINT8_C(228), UINT8_C(179), UINT8_C(179), UINT8_C( 7), UINT8_C( 80), UINT8_C(222), UINT8_C( 0), UINT8_C(252), UINT8_C( 67), UINT8_C( 47), UINT8_C( 30), UINT8_C( 71), UINT8_C( 33), UINT8_C( 1), UINT8_C(180), UINT8_C(230), UINT8_C(202), UINT8_C(158), UINT8_C(133), UINT8_C(230), UINT8_C( 5), UINT8_C(170), UINT8_C(121), UINT8_C( 2), UINT8_C(237), UINT8_C( 34), UINT8_C(231), UINT8_C(151), UINT8_C(114), UINT8_C(147), UINT8_C( 23), UINT8_C(177), UINT8_C( 71), UINT8_C(112) } }, { { -INT8_C( 72), -INT8_C( 40), INT8_C( 18), -INT8_C( 71), -INT8_C( 43), INT8_C( 12), -INT8_C( 24), -INT8_C( 30), -INT8_C( 71), INT8_C( 9), -INT8_C( 29), INT8_C( 109), INT8_C( 5), INT8_C( 95), INT8_C( 11), -INT8_C( 31), -INT8_C( 46), INT8_C( 74), INT8_C( 89), INT8_C( 75), INT8_C( 76), INT8_C( 89), -INT8_C( 123), INT8_C( 51), -INT8_C( 68), -INT8_C( 107), -INT8_C( 48), INT8_C( 75), INT8_C( 71), INT8_C( 0), -INT8_C( 69), -INT8_C( 1), -INT8_C( 40), -INT8_C( 51), -INT8_C( 72), -INT8_C( 82), -INT8_C( 38), -INT8_C( 96), -INT8_C( 112), -INT8_C( 109), -INT8_C( 87), INT8_C( 115), INT8_C( 0), -INT8_C( 82), -INT8_C( 45), INT8_C( 11), -INT8_C( 113), -INT8_C( 91), INT8_C( 85), -INT8_C( 24), -INT8_C( 16), -INT8_C( 95), INT8_C( 66), INT8_C( 117), -INT8_C( 43), -INT8_C( 2), INT8_C( 11), -INT8_C( 91), INT8_C( 73), INT8_C( 82), -INT8_C( 91), INT8_C( 4), INT8_C( 81), INT8_C( 126) }, UINT64_C( 6070496788944259793), { UINT8_C( 47), UINT8_C( 62), UINT8_C( 2), UINT8_C( 2), UINT8_C( 73), UINT8_C(146), UINT8_C(167), UINT8_C(158), UINT8_C(122), UINT8_C(152), UINT8_C( 64), UINT8_C(188), UINT8_C( 13), UINT8_C( 21), UINT8_C(186), UINT8_C( 24), UINT8_C(186), UINT8_C( 3), UINT8_C(106), UINT8_C( 95), UINT8_C( 7), UINT8_C(188), UINT8_C(221), UINT8_C(217), UINT8_C(198), UINT8_C( 9), UINT8_C(132), UINT8_C(112), UINT8_C(197), UINT8_C(195), UINT8_C(196), UINT8_C(245), UINT8_C( 1), UINT8_C(199), UINT8_C(247), UINT8_C( 75), UINT8_C( 89), UINT8_C(159), UINT8_C(233), UINT8_C(211), UINT8_C( 55), UINT8_C( 41), UINT8_C(144), UINT8_C( 68), UINT8_C( 62), UINT8_C( 74), UINT8_C( 93), UINT8_C(248), UINT8_C( 78), UINT8_C(199), UINT8_C( 88), UINT8_C( 85), UINT8_C(131), UINT8_C( 53), UINT8_C( 46), UINT8_C( 73), UINT8_C( 63), UINT8_C(179), UINT8_C(186), UINT8_C( 4), UINT8_C(118), UINT8_C(126), UINT8_C(249), UINT8_C(119) }, { UINT8_C( 69), UINT8_C(241), UINT8_C(194), UINT8_C(158), UINT8_C(144), UINT8_C(172), UINT8_C(114), UINT8_C(199), UINT8_C(213), UINT8_C( 2), UINT8_C( 11), UINT8_C( 20), UINT8_C( 76), UINT8_C(104), UINT8_C( 12), UINT8_C(154), UINT8_C( 48), UINT8_C(100), UINT8_C(240), UINT8_C(179), UINT8_C(154), UINT8_C( 30), UINT8_C(253), UINT8_C(217), UINT8_C(209), UINT8_C(183), UINT8_C(221), UINT8_C( 71), UINT8_C( 53), UINT8_C(215), UINT8_C(191), UINT8_C(123), UINT8_C(200), UINT8_C(129), UINT8_C( 25), UINT8_C( 88), UINT8_C( 45), UINT8_C(139), UINT8_C( 31), UINT8_C( 3), UINT8_C(141), UINT8_C( 42), UINT8_C( 23), UINT8_C(218), UINT8_C(147), UINT8_C( 35), UINT8_C(116), UINT8_C(195), UINT8_C(136), UINT8_C(100), UINT8_C(118), UINT8_C( 34), UINT8_C(131), UINT8_C(115), UINT8_C(251), UINT8_C( 84), UINT8_C( 42), UINT8_C(216), UINT8_C(156), UINT8_C( 96), UINT8_C(175), UINT8_C( 91), UINT8_C(219), UINT8_C(119) }, { UINT8_C( 47), UINT8_C(216), UINT8_C( 18), UINT8_C(185), UINT8_C( 73), UINT8_C( 12), UINT8_C(114), UINT8_C(158), UINT8_C(185), UINT8_C( 2), UINT8_C(227), UINT8_C( 20), UINT8_C( 5), UINT8_C( 95), UINT8_C( 11), UINT8_C(225), UINT8_C(210), UINT8_C( 74), UINT8_C(106), UINT8_C( 95), UINT8_C( 76), UINT8_C( 30), UINT8_C(133), UINT8_C( 51), UINT8_C(198), UINT8_C( 9), UINT8_C(208), UINT8_C( 71), UINT8_C( 71), UINT8_C(195), UINT8_C(187), UINT8_C(123), UINT8_C(216), UINT8_C(129), UINT8_C(184), UINT8_C( 75), UINT8_C(218), UINT8_C(139), UINT8_C(144), UINT8_C( 3), UINT8_C(169), UINT8_C(115), UINT8_C( 23), UINT8_C( 68), UINT8_C( 62), UINT8_C( 35), UINT8_C(143), UINT8_C(195), UINT8_C( 85), UINT8_C(100), UINT8_C( 88), UINT8_C( 34), UINT8_C(131), UINT8_C( 53), UINT8_C(213), UINT8_C(254), UINT8_C( 11), UINT8_C(165), UINT8_C(156), UINT8_C( 82), UINT8_C(118), UINT8_C( 4), UINT8_C(219), UINT8_C(126) } }, { { -INT8_C( 36), -INT8_C( 12), -INT8_C( 49), INT8_C( 10), INT8_MIN, -INT8_C( 18), INT8_C( 13), INT8_C( 13), INT8_C( 25), INT8_C( 36), -INT8_C( 25), -INT8_C( 84), INT8_C( 71), INT8_C( 92), INT8_C( 111), -INT8_C( 49), -INT8_C( 64), -INT8_C( 27), -INT8_C( 15), INT8_C( 67), INT8_C( 89), -INT8_C( 20), -INT8_C( 104), -INT8_C( 125), -INT8_C( 59), INT8_C( 52), -INT8_C( 29), INT8_C( 116), -INT8_C( 113), -INT8_C( 66), -INT8_C( 20), INT8_C( 107), -INT8_C( 77), -INT8_C( 69), INT8_C( 117), INT8_C( 51), -INT8_C( 86), -INT8_C( 126), INT8_C( 64), -INT8_C( 61), -INT8_C( 90), INT8_C( 40), INT8_C( 111), -INT8_C( 18), -INT8_C( 124), -INT8_C( 34), -INT8_C( 67), INT8_C( 68), -INT8_C( 61), -INT8_C( 81), -INT8_C( 120), INT8_C( 28), -INT8_C( 101), INT8_C( 32), -INT8_C( 96), INT8_C( 96), INT8_C( 84), -INT8_C( 125), -INT8_C( 43), -INT8_C( 29), INT8_C( 66), -INT8_C( 63), INT8_C( 78), -INT8_C( 11) }, UINT64_C(17143348107059709052), { UINT8_C(144), UINT8_C( 88), UINT8_C(219), UINT8_C( 20), UINT8_C( 54), UINT8_C(152), UINT8_C( 89), UINT8_C(250), UINT8_C( 71), UINT8_C(225), UINT8_C( 22), UINT8_C(227), UINT8_C( 1), UINT8_C(182), UINT8_C( 67), UINT8_C( 85), UINT8_C( 58), UINT8_C( 24), UINT8_C( 56), UINT8_C(124), UINT8_C(217), UINT8_C(134), UINT8_C(113), UINT8_C( 86), UINT8_C( 74), UINT8_C(153), UINT8_C(124), UINT8_C(145), UINT8_C( 1), UINT8_C(102), UINT8_C(126), UINT8_C(146), UINT8_C(190), UINT8_C( 89), UINT8_C(166), UINT8_C(245), UINT8_C(241), UINT8_MAX, UINT8_C(239), UINT8_C( 57), UINT8_C(224), UINT8_C( 5), UINT8_C( 28), UINT8_C(225), UINT8_C(188), UINT8_C( 95), UINT8_C( 54), UINT8_C(246), UINT8_C(120), UINT8_C(110), UINT8_C(114), UINT8_C( 81), UINT8_C(245), UINT8_C(227), UINT8_C(167), UINT8_C( 63), UINT8_C(124), UINT8_C( 36), UINT8_C(208), UINT8_C(125), UINT8_C(138), UINT8_C( 78), UINT8_C( 15), UINT8_C( 72) }, { UINT8_C(167), UINT8_C(182), UINT8_C( 61), UINT8_C(153), UINT8_C(181), UINT8_C( 44), UINT8_C(210), UINT8_C(150), UINT8_C( 50), UINT8_C(238), UINT8_C(119), UINT8_C(238), UINT8_C( 77), UINT8_C(174), UINT8_C(228), UINT8_C(197), UINT8_C( 28), UINT8_C( 86), UINT8_C( 23), UINT8_C( 17), UINT8_C( 57), UINT8_C(190), UINT8_C( 81), UINT8_C(181), UINT8_C(226), UINT8_C( 33), UINT8_C( 50), UINT8_C(108), UINT8_C(112), UINT8_C( 66), UINT8_C(181), UINT8_C( 23), UINT8_C(248), UINT8_C(242), UINT8_C(176), UINT8_C(173), UINT8_C( 31), UINT8_C(130), UINT8_C( 67), UINT8_C( 81), UINT8_C(112), UINT8_C(187), UINT8_C( 63), UINT8_C(190), UINT8_C(105), UINT8_C( 35), UINT8_C(131), UINT8_C(133), UINT8_C(121), UINT8_C(154), UINT8_C(151), UINT8_C(178), UINT8_C( 89), UINT8_C(232), UINT8_C(103), UINT8_C( 59), UINT8_C( 9), UINT8_C(153), UINT8_C(168), UINT8_C(121), UINT8_C(219), UINT8_C( 93), UINT8_C(145), UINT8_C(211) }, { UINT8_C(220), UINT8_C(244), UINT8_C( 61), UINT8_C( 20), UINT8_C( 54), UINT8_C( 44), UINT8_C( 89), UINT8_C( 13), UINT8_C( 25), UINT8_C( 36), UINT8_C( 22), UINT8_C(172), UINT8_C( 71), UINT8_C( 92), UINT8_C( 67), UINT8_C( 85), UINT8_C(192), UINT8_C(229), UINT8_C(241), UINT8_C( 17), UINT8_C( 89), UINT8_C(134), UINT8_C(152), UINT8_C(131), UINT8_C(197), UINT8_C( 33), UINT8_C( 50), UINT8_C(116), UINT8_C(143), UINT8_C( 66), UINT8_C(236), UINT8_C(107), UINT8_C(179), UINT8_C( 89), UINT8_C(166), UINT8_C( 51), UINT8_C(170), UINT8_C(130), UINT8_C( 67), UINT8_C(195), UINT8_C(166), UINT8_C( 40), UINT8_C(111), UINT8_C(190), UINT8_C(132), UINT8_C( 35), UINT8_C( 54), UINT8_C( 68), UINT8_C(120), UINT8_C(175), UINT8_C(136), UINT8_C( 81), UINT8_C(155), UINT8_C(227), UINT8_C(103), UINT8_C( 59), UINT8_C( 9), UINT8_C(131), UINT8_C(168), UINT8_C(121), UINT8_C( 66), UINT8_C( 78), UINT8_C( 15), UINT8_C( 72) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi8(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_mask_min_epu8(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_u8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_min_epu8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask64 k; const uint8_t a[64]; const uint8_t b[64]; const uint8_t r[64]; } test_vec[] = { { UINT64_C(17082286413152396594), { UINT8_C(176), UINT8_C(246), UINT8_C( 65), UINT8_C(226), UINT8_C( 97), UINT8_C(118), UINT8_C( 24), UINT8_C( 47), UINT8_C( 52), UINT8_C(202), UINT8_C( 14), UINT8_C( 44), UINT8_C(237), UINT8_C(159), UINT8_C( 11), UINT8_C(251), UINT8_C(132), UINT8_C(170), UINT8_C( 10), UINT8_C(208), UINT8_C( 36), UINT8_C( 92), UINT8_C( 27), UINT8_C( 86), UINT8_C(129), UINT8_C(137), UINT8_C(253), UINT8_C(125), UINT8_C( 1), UINT8_C( 13), UINT8_C(107), UINT8_C(178), UINT8_C( 3), UINT8_C(172), UINT8_C(148), UINT8_C(101), UINT8_C( 34), UINT8_C(172), UINT8_C(148), UINT8_C( 86), UINT8_C(119), UINT8_C(162), UINT8_C(131), UINT8_C(100), UINT8_C( 66), UINT8_C(142), UINT8_C( 95), UINT8_C(198), UINT8_C( 56), UINT8_C(106), UINT8_C(151), UINT8_C( 92), UINT8_C(198), UINT8_C(178), UINT8_C(178), UINT8_C( 72), UINT8_C( 59), UINT8_C(175), UINT8_C(197), UINT8_C( 61), UINT8_C(189), UINT8_C( 48), UINT8_C(239), UINT8_C(192) }, { UINT8_C(220), UINT8_C(131), UINT8_C( 37), UINT8_C(254), UINT8_C( 48), UINT8_C(185), UINT8_C( 85), UINT8_C(167), UINT8_C( 92), UINT8_C(216), UINT8_C( 11), UINT8_C(158), UINT8_C(102), UINT8_C(107), UINT8_C(100), UINT8_C(158), UINT8_C(213), UINT8_C(251), UINT8_C(251), UINT8_C(155), UINT8_C(173), UINT8_C(173), UINT8_C(227), UINT8_C(233), UINT8_C( 93), UINT8_C(169), UINT8_C( 38), UINT8_C( 26), UINT8_C(217), UINT8_C( 21), UINT8_C(218), UINT8_C(182), UINT8_C(152), UINT8_C( 0), UINT8_C(180), UINT8_C(200), UINT8_C(185), UINT8_C( 9), UINT8_C(111), UINT8_C( 21), UINT8_C(225), UINT8_C(123), UINT8_C(179), UINT8_C( 71), UINT8_C(230), UINT8_C( 24), UINT8_C(230), UINT8_C(187), UINT8_C( 19), UINT8_C(225), UINT8_C( 86), UINT8_C(193), UINT8_C(142), UINT8_C( 58), UINT8_C(170), UINT8_C(235), UINT8_C(227), UINT8_C(208), UINT8_C( 5), UINT8_C(188), UINT8_C(229), UINT8_C(224), UINT8_C(114), UINT8_C(125) }, { UINT8_C( 0), UINT8_C(131), UINT8_C( 0), UINT8_C( 0), UINT8_C( 48), UINT8_C(118), UINT8_C( 0), UINT8_C( 0), UINT8_C( 52), UINT8_C( 0), UINT8_C( 11), UINT8_C( 0), UINT8_C( 0), UINT8_C(107), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(170), UINT8_C( 10), UINT8_C(155), UINT8_C( 0), UINT8_C( 92), UINT8_C( 27), UINT8_C( 0), UINT8_C( 93), UINT8_C(137), UINT8_C( 38), UINT8_C( 0), UINT8_C( 0), UINT8_C( 13), UINT8_C( 0), UINT8_C(178), UINT8_C( 0), UINT8_C( 0), UINT8_C(148), UINT8_C(101), UINT8_C( 34), UINT8_C( 9), UINT8_C(111), UINT8_C( 21), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 71), UINT8_C( 66), UINT8_C( 24), UINT8_C( 95), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(142), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 59), UINT8_C( 0), UINT8_C( 5), UINT8_C( 61), UINT8_C( 0), UINT8_C( 48), UINT8_C(114), UINT8_C(125) } }, { UINT64_C( 1346494033941637088), { UINT8_C( 48), UINT8_C( 98), UINT8_C( 89), UINT8_C( 22), UINT8_C(122), UINT8_C( 63), UINT8_C(209), UINT8_C(142), UINT8_C( 32), UINT8_C( 40), UINT8_C( 79), UINT8_C(175), UINT8_C( 98), UINT8_C(249), UINT8_C(154), UINT8_C( 69), UINT8_C(201), UINT8_C(160), UINT8_C( 1), UINT8_C(174), UINT8_C(128), UINT8_C(116), UINT8_C( 43), UINT8_C( 96), UINT8_C(155), UINT8_C(113), UINT8_C(249), UINT8_C(203), UINT8_C( 39), UINT8_C(168), UINT8_C(221), UINT8_C( 87), UINT8_C( 11), UINT8_C( 55), UINT8_C(110), UINT8_C(133), UINT8_C(118), UINT8_C( 63), UINT8_C( 19), UINT8_C(151), UINT8_C(103), UINT8_C( 98), UINT8_C( 70), UINT8_C(201), UINT8_C( 91), UINT8_C(224), UINT8_C( 14), UINT8_C( 36), UINT8_C(128), UINT8_C( 16), UINT8_C(210), UINT8_C( 0), UINT8_C(132), UINT8_C(254), UINT8_C( 96), UINT8_C( 31), UINT8_C(111), UINT8_C( 90), UINT8_C(234), UINT8_C(150), UINT8_C( 2), UINT8_C(200), UINT8_C(238), UINT8_C( 13) }, { UINT8_MAX, UINT8_C( 92), UINT8_C(147), UINT8_C(117), UINT8_C(155), UINT8_C(166), UINT8_C( 12), UINT8_C( 3), UINT8_C( 9), UINT8_C( 82), UINT8_C(204), UINT8_C(100), UINT8_C( 51), UINT8_C(219), UINT8_C(137), UINT8_C(179), UINT8_C(235), UINT8_C( 91), UINT8_C(180), UINT8_C(111), UINT8_C( 89), UINT8_C( 20), UINT8_C(142), UINT8_C(201), UINT8_C(110), UINT8_C(120), UINT8_C( 95), UINT8_C(113), UINT8_C( 64), UINT8_C( 77), UINT8_C(126), UINT8_C( 63), UINT8_C(169), UINT8_C( 17), UINT8_C(181), UINT8_C( 69), UINT8_C(184), UINT8_C(193), UINT8_C( 72), UINT8_C(193), UINT8_C( 20), UINT8_C( 20), UINT8_C( 37), UINT8_C( 71), UINT8_C(239), UINT8_C(174), UINT8_C(250), UINT8_C(218), UINT8_C( 10), UINT8_C(174), UINT8_C( 73), UINT8_C( 99), UINT8_C(195), UINT8_C(215), UINT8_C( 44), UINT8_C( 49), UINT8_C( 80), UINT8_C(140), UINT8_C(162), UINT8_C(144), UINT8_C(217), UINT8_C( 33), UINT8_C(208), UINT8_C(131) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 63), UINT8_C( 12), UINT8_C( 3), UINT8_C( 9), UINT8_C( 40), UINT8_C( 79), UINT8_C( 0), UINT8_C( 0), UINT8_C(219), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 91), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 43), UINT8_C( 0), UINT8_C(110), UINT8_C( 0), UINT8_C( 0), UINT8_C(113), UINT8_C( 39), UINT8_C( 0), UINT8_C( 0), UINT8_C( 63), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(118), UINT8_C( 63), UINT8_C( 0), UINT8_C( 0), UINT8_C( 20), UINT8_C( 0), UINT8_C( 37), UINT8_C( 0), UINT8_C( 91), UINT8_C(174), UINT8_C( 0), UINT8_C( 36), UINT8_C( 10), UINT8_C( 16), UINT8_C( 73), UINT8_C( 0), UINT8_C( 0), UINT8_C(215), UINT8_C( 0), UINT8_C( 31), UINT8_C( 0), UINT8_C( 90), UINT8_C( 0), UINT8_C( 0), UINT8_C( 2), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { UINT64_C( 6533333581203801394), { UINT8_C( 36), UINT8_C(209), UINT8_C(161), UINT8_C( 20), UINT8_C(127), UINT8_C(156), UINT8_C(238), UINT8_C(137), UINT8_C( 74), UINT8_C( 56), UINT8_C(237), UINT8_C( 13), UINT8_C( 15), UINT8_C( 25), UINT8_C( 63), UINT8_C( 95), UINT8_C(165), UINT8_C(225), UINT8_C(240), UINT8_C(127), UINT8_C( 2), UINT8_C(192), UINT8_C( 2), UINT8_C( 53), UINT8_C( 69), UINT8_C(202), UINT8_C( 31), UINT8_C(139), UINT8_C(218), UINT8_C(203), UINT8_C(230), UINT8_C(254), UINT8_C(156), UINT8_C(135), UINT8_C( 18), UINT8_C( 27), UINT8_C( 35), UINT8_C( 1), UINT8_C(165), UINT8_C(110), UINT8_C( 57), UINT8_C(146), UINT8_C(123), UINT8_C( 72), UINT8_C(171), UINT8_C(186), UINT8_C(168), UINT8_C( 81), UINT8_C(156), UINT8_C(152), UINT8_C(208), UINT8_C(158), UINT8_C( 88), UINT8_C(210), UINT8_C(211), UINT8_C(157), UINT8_C(156), UINT8_C(243), UINT8_C( 40), UINT8_C(118), UINT8_C(190), UINT8_C( 14), UINT8_C(116), UINT8_C( 90) }, { UINT8_C(150), UINT8_C(135), UINT8_C(117), UINT8_C(185), UINT8_C(136), UINT8_C( 26), UINT8_C( 39), UINT8_C(193), UINT8_C(172), UINT8_C(163), UINT8_C( 9), UINT8_C( 88), UINT8_C( 93), UINT8_C(177), UINT8_C(169), UINT8_C(249), UINT8_C( 73), UINT8_C(121), UINT8_C(152), UINT8_C(161), UINT8_C( 75), UINT8_C(107), UINT8_C( 62), UINT8_C(231), UINT8_C( 94), UINT8_C(103), UINT8_C( 93), UINT8_C( 28), UINT8_C(117), UINT8_C(209), UINT8_C(118), UINT8_C( 11), UINT8_C( 88), UINT8_C(236), UINT8_C(197), UINT8_C(224), UINT8_C( 6), UINT8_C(236), UINT8_C(161), UINT8_C(179), UINT8_C(143), UINT8_C(171), UINT8_C( 11), UINT8_C(237), UINT8_C( 92), UINT8_C(180), UINT8_C(230), UINT8_C(166), UINT8_C( 45), UINT8_C(126), UINT8_C( 71), UINT8_C(120), UINT8_C(234), UINT8_C(134), UINT8_C( 95), UINT8_C( 72), UINT8_C(237), UINT8_C(188), UINT8_C(101), UINT8_C( 98), UINT8_C(141), UINT8_C(219), UINT8_C(110), UINT8_C(230) }, { UINT8_C( 0), UINT8_C(135), UINT8_C( 0), UINT8_C( 0), UINT8_C(127), UINT8_C( 26), UINT8_C( 0), UINT8_C( 0), UINT8_C( 74), UINT8_C( 0), UINT8_C( 9), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 95), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(127), UINT8_C( 0), UINT8_C( 0), UINT8_C( 2), UINT8_C( 53), UINT8_C( 0), UINT8_C(103), UINT8_C( 0), UINT8_C( 28), UINT8_C( 0), UINT8_C(203), UINT8_C(118), UINT8_C( 11), UINT8_C( 0), UINT8_C(135), UINT8_C( 18), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(161), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 92), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 45), UINT8_C(126), UINT8_C( 0), UINT8_C(120), UINT8_C( 0), UINT8_C(134), UINT8_C( 0), UINT8_C( 72), UINT8_C( 0), UINT8_C(188), UINT8_C( 0), UINT8_C( 98), UINT8_C(141), UINT8_C( 0), UINT8_C(110), UINT8_C( 0) } }, { UINT64_C(12646503714455434183), { UINT8_C( 19), UINT8_C(140), UINT8_C(156), UINT8_C(111), UINT8_C( 64), UINT8_C(130), UINT8_C( 21), UINT8_C(109), UINT8_C( 1), UINT8_C( 93), UINT8_C(229), UINT8_C(235), UINT8_C(227), UINT8_C( 68), UINT8_C( 51), UINT8_C(208), UINT8_C( 0), UINT8_C(152), UINT8_C( 50), UINT8_C(141), UINT8_C(116), UINT8_C(160), UINT8_C(115), UINT8_C( 59), UINT8_C(211), UINT8_C( 58), UINT8_C( 9), UINT8_C(243), UINT8_C(162), UINT8_C(138), UINT8_C(162), UINT8_C(181), UINT8_C( 22), UINT8_C( 62), UINT8_C( 36), UINT8_C( 86), UINT8_C(192), UINT8_C( 58), UINT8_C(195), UINT8_C(193), UINT8_C(151), UINT8_C(168), UINT8_C(172), UINT8_C(122), UINT8_C(236), UINT8_C(224), UINT8_C( 74), UINT8_C(236), UINT8_C(120), UINT8_C(124), UINT8_C(122), UINT8_C(236), UINT8_C( 29), UINT8_C(237), UINT8_C( 40), UINT8_C(240), UINT8_C( 39), UINT8_C( 49), UINT8_C(227), UINT8_C(201), UINT8_C(188), UINT8_C(133), UINT8_C(126), UINT8_C(210) }, { UINT8_C(195), UINT8_C(163), UINT8_C( 41), UINT8_C(132), UINT8_C(221), UINT8_C(236), UINT8_C( 69), UINT8_C(116), UINT8_C(149), UINT8_C(242), UINT8_C(238), UINT8_C(129), UINT8_C(210), UINT8_C( 56), UINT8_C(110), UINT8_C( 74), UINT8_C(180), UINT8_C(232), UINT8_C( 55), UINT8_C(209), UINT8_C(213), UINT8_C( 95), UINT8_C(194), UINT8_C(253), UINT8_C(144), UINT8_C(165), UINT8_C(198), UINT8_C( 76), UINT8_C( 43), UINT8_C( 69), UINT8_C( 31), UINT8_C(238), UINT8_C(232), UINT8_C( 72), UINT8_C(114), UINT8_C(197), UINT8_C( 52), UINT8_C(184), UINT8_C( 57), UINT8_C(201), UINT8_C(170), UINT8_C( 39), UINT8_C( 75), UINT8_C(124), UINT8_C( 95), UINT8_C(185), UINT8_C(198), UINT8_C( 19), UINT8_C(161), UINT8_C(253), UINT8_C(229), UINT8_C(118), UINT8_C( 92), UINT8_C(167), UINT8_C(115), UINT8_C(237), UINT8_C( 76), UINT8_C( 58), UINT8_C( 57), UINT8_C(119), UINT8_C(127), UINT8_C( 88), UINT8_C(102), UINT8_C(103) }, { UINT8_C( 19), UINT8_C(140), UINT8_C( 41), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 21), UINT8_C(109), UINT8_C( 1), UINT8_C( 93), UINT8_C( 0), UINT8_C( 0), UINT8_C(210), UINT8_C( 56), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(152), UINT8_C( 50), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(115), UINT8_C( 59), UINT8_C( 0), UINT8_C( 58), UINT8_C( 9), UINT8_C( 76), UINT8_C( 0), UINT8_C( 0), UINT8_C( 31), UINT8_C(181), UINT8_C( 22), UINT8_C( 62), UINT8_C( 36), UINT8_C( 86), UINT8_C( 52), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(122), UINT8_C( 0), UINT8_C(185), UINT8_C( 74), UINT8_C( 0), UINT8_C(120), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(237), UINT8_C( 39), UINT8_C( 49), UINT8_C( 57), UINT8_C(119), UINT8_C( 0), UINT8_C( 88), UINT8_C( 0), UINT8_C(103) } }, { UINT64_C( 4223925173246285984), { UINT8_C(140), UINT8_C(233), UINT8_C(182), UINT8_C(235), UINT8_C(162), UINT8_C(125), UINT8_C(254), UINT8_C( 67), UINT8_C(122), UINT8_C(227), UINT8_C(186), UINT8_C(215), UINT8_C(138), UINT8_C( 45), UINT8_C(196), UINT8_C(215), UINT8_C(103), UINT8_C(253), UINT8_C( 78), UINT8_C(230), UINT8_C( 86), UINT8_C(180), UINT8_C( 77), UINT8_C(246), UINT8_C(141), UINT8_C(121), UINT8_C(203), UINT8_C( 29), UINT8_C(222), UINT8_C(106), UINT8_C( 88), UINT8_C(106), UINT8_C( 83), UINT8_C( 14), UINT8_C( 85), UINT8_C(246), UINT8_C(139), UINT8_C( 84), UINT8_C( 57), UINT8_C( 6), UINT8_C( 55), UINT8_C(243), UINT8_C(221), UINT8_C(194), UINT8_C( 33), UINT8_C(161), UINT8_C(153), UINT8_C(136), UINT8_C(158), UINT8_C(231), UINT8_C(111), UINT8_C(244), UINT8_C(156), UINT8_C(188), UINT8_C(235), UINT8_C( 41), UINT8_C( 54), UINT8_C(182), UINT8_C( 70), UINT8_C( 20), UINT8_C( 32), UINT8_C(158), UINT8_C(127), UINT8_C(116) }, { UINT8_C(173), UINT8_C(212), UINT8_C(106), UINT8_C( 56), UINT8_C( 40), UINT8_C(163), UINT8_C( 62), UINT8_C( 96), UINT8_C(151), UINT8_C( 27), UINT8_C( 34), UINT8_C(184), UINT8_C(188), UINT8_C(187), UINT8_C( 64), UINT8_C( 91), UINT8_C(162), UINT8_C(175), UINT8_C( 79), UINT8_C( 62), UINT8_C(108), UINT8_C( 58), UINT8_C(103), UINT8_C(162), UINT8_C(241), UINT8_C(174), UINT8_C(182), UINT8_C( 17), UINT8_C( 76), UINT8_C( 53), UINT8_C(133), UINT8_C(249), UINT8_C( 10), UINT8_C(239), UINT8_C( 50), UINT8_C( 50), UINT8_C(147), UINT8_C(112), UINT8_C(146), UINT8_C( 42), UINT8_C(140), UINT8_C(180), UINT8_C(226), UINT8_C( 72), UINT8_C(111), UINT8_C( 34), UINT8_C(163), UINT8_C( 18), UINT8_C(210), UINT8_C(243), UINT8_C( 80), UINT8_C( 62), UINT8_C( 45), UINT8_C(184), UINT8_C(224), UINT8_C( 30), UINT8_C(102), UINT8_C(150), UINT8_C( 48), UINT8_C(178), UINT8_C(204), UINT8_C(181), UINT8_C(172), UINT8_C(214) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(125), UINT8_C( 0), UINT8_C( 67), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(184), UINT8_C(138), UINT8_C( 0), UINT8_C( 64), UINT8_C( 91), UINT8_C( 0), UINT8_C( 0), UINT8_C( 78), UINT8_C( 62), UINT8_C( 0), UINT8_C( 58), UINT8_C( 0), UINT8_C( 0), UINT8_C(141), UINT8_C( 0), UINT8_C(182), UINT8_C( 0), UINT8_C( 76), UINT8_C( 0), UINT8_C( 88), UINT8_C(106), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(139), UINT8_C( 0), UINT8_C( 0), UINT8_C( 6), UINT8_C( 55), UINT8_C( 0), UINT8_C(221), UINT8_C( 0), UINT8_C( 0), UINT8_C( 34), UINT8_C(153), UINT8_C( 0), UINT8_C( 0), UINT8_C(231), UINT8_C( 80), UINT8_C( 62), UINT8_C( 45), UINT8_C( 0), UINT8_C( 0), UINT8_C( 30), UINT8_C( 0), UINT8_C(150), UINT8_C( 0), UINT8_C( 20), UINT8_C( 32), UINT8_C(158), UINT8_C( 0), UINT8_C( 0) } }, { UINT64_C(15736310808235794085), { UINT8_C( 79), UINT8_C( 68), UINT8_C( 35), UINT8_C(191), UINT8_C(102), UINT8_C(198), UINT8_C(209), UINT8_C( 56), UINT8_C(185), UINT8_C( 33), UINT8_C(118), UINT8_C(231), UINT8_C(217), UINT8_C( 86), UINT8_C( 5), UINT8_C( 63), UINT8_C(237), UINT8_C( 53), UINT8_C(242), UINT8_C(185), UINT8_C(235), UINT8_C(158), UINT8_C(143), UINT8_C(144), UINT8_C(124), UINT8_C(151), UINT8_C(200), UINT8_C(202), UINT8_C( 50), UINT8_C( 42), UINT8_C(165), UINT8_C(130), UINT8_C(110), UINT8_C(200), UINT8_C( 65), UINT8_C(212), UINT8_C(142), UINT8_C( 18), UINT8_C( 13), UINT8_C( 72), UINT8_C( 51), UINT8_C(131), UINT8_C( 47), UINT8_C( 13), UINT8_C(218), UINT8_C( 52), UINT8_C( 76), UINT8_C(199), UINT8_C(106), UINT8_C( 62), UINT8_C(128), UINT8_C( 85), UINT8_C(220), UINT8_C( 15), UINT8_C(229), UINT8_C( 88), UINT8_C(166), UINT8_C(173), UINT8_C( 35), UINT8_C(217), UINT8_C(215), UINT8_C(200), UINT8_C( 91), UINT8_C( 69) }, { UINT8_C(144), UINT8_C(156), UINT8_C( 25), UINT8_C( 30), UINT8_C(174), UINT8_C( 38), UINT8_C(102), UINT8_C(225), UINT8_C(170), UINT8_C(149), UINT8_C(238), UINT8_C(132), UINT8_C(202), UINT8_C( 59), UINT8_C( 75), UINT8_C( 52), UINT8_C(121), UINT8_C(203), UINT8_C(137), UINT8_C( 86), UINT8_C(218), UINT8_C(110), UINT8_C(174), UINT8_C(128), UINT8_C( 27), UINT8_C(209), UINT8_C( 89), UINT8_C(242), UINT8_C(153), UINT8_C(180), UINT8_C( 55), UINT8_C( 41), UINT8_C( 80), UINT8_C( 80), UINT8_C( 72), UINT8_C(254), UINT8_C(119), UINT8_C(174), UINT8_C(224), UINT8_C( 33), UINT8_C( 68), UINT8_C(206), UINT8_C(165), UINT8_C( 14), UINT8_C( 9), UINT8_C(240), UINT8_C( 66), UINT8_C(131), UINT8_C(187), UINT8_C(203), UINT8_C(217), UINT8_C(149), UINT8_C( 57), UINT8_C(135), UINT8_C( 21), UINT8_C( 84), UINT8_C( 89), UINT8_C(111), UINT8_C( 70), UINT8_C(242), UINT8_C( 35), UINT8_C(125), UINT8_C( 28), UINT8_C(116) }, { UINT8_C( 79), UINT8_C( 0), UINT8_C( 25), UINT8_C( 0), UINT8_C( 0), UINT8_C( 38), UINT8_C( 0), UINT8_C( 56), UINT8_C( 0), UINT8_C( 33), UINT8_C(118), UINT8_C(132), UINT8_C(202), UINT8_C( 0), UINT8_C( 5), UINT8_C( 52), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 86), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(202), UINT8_C( 50), UINT8_C( 42), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 80), UINT8_C( 65), UINT8_C(212), UINT8_C( 0), UINT8_C( 0), UINT8_C( 13), UINT8_C( 0), UINT8_C( 51), UINT8_C(131), UINT8_C( 0), UINT8_C( 13), UINT8_C( 9), UINT8_C( 0), UINT8_C( 0), UINT8_C(131), UINT8_C( 0), UINT8_C( 62), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 15), UINT8_C( 21), UINT8_C( 0), UINT8_C( 0), UINT8_C(111), UINT8_C( 0), UINT8_C(217), UINT8_C( 35), UINT8_C( 0), UINT8_C( 28), UINT8_C( 69) } }, { UINT64_C( 6225472298320815309), { UINT8_C( 33), UINT8_C( 10), UINT8_C(100), UINT8_C( 42), UINT8_C(250), UINT8_C(166), UINT8_C(173), UINT8_C(181), UINT8_C(113), UINT8_C(134), UINT8_C( 74), UINT8_C(170), UINT8_C( 14), UINT8_C( 96), UINT8_C(254), UINT8_C(103), UINT8_C(207), UINT8_C( 68), UINT8_C( 89), UINT8_C(242), UINT8_C(193), UINT8_C(117), UINT8_C(102), UINT8_C(143), UINT8_C(217), UINT8_C(217), UINT8_C(211), UINT8_C(236), UINT8_C( 43), UINT8_C( 57), UINT8_C( 66), UINT8_C( 76), UINT8_C( 67), UINT8_C(167), UINT8_C(119), UINT8_C( 62), UINT8_C( 77), UINT8_C( 36), UINT8_C(243), UINT8_C(191), UINT8_C(171), UINT8_C( 62), UINT8_C(105), UINT8_C(185), UINT8_C(158), UINT8_C(104), UINT8_C( 32), UINT8_C(109), UINT8_C(172), UINT8_C(121), UINT8_C( 95), UINT8_C(110), UINT8_C(239), UINT8_C(198), UINT8_C(253), UINT8_C(200), UINT8_C(159), UINT8_C(208), UINT8_C(180), UINT8_C(202), UINT8_C( 9), UINT8_C(247), UINT8_C( 23), UINT8_C( 77) }, { UINT8_C(158), UINT8_C(142), UINT8_C(139), UINT8_C(235), UINT8_C(178), UINT8_C(126), UINT8_C(170), UINT8_C( 93), UINT8_C(188), UINT8_C( 20), UINT8_C( 22), UINT8_C( 90), UINT8_C(124), UINT8_C( 54), UINT8_C(199), UINT8_C( 40), UINT8_C(176), UINT8_C( 39), UINT8_C(150), UINT8_C(159), UINT8_C(237), UINT8_C(147), UINT8_C(103), UINT8_C(140), UINT8_C(100), UINT8_C( 28), UINT8_C( 86), UINT8_C(109), UINT8_C( 19), UINT8_C(109), UINT8_C(186), UINT8_C(177), UINT8_C(251), UINT8_C( 69), UINT8_C(156), UINT8_C(174), UINT8_C(196), UINT8_C( 71), UINT8_C( 11), UINT8_C(128), UINT8_C( 91), UINT8_C( 34), UINT8_C(219), UINT8_C(215), UINT8_C( 88), UINT8_C(162), UINT8_MAX, UINT8_C( 8), UINT8_C(201), UINT8_C(150), UINT8_C(167), UINT8_C(182), UINT8_C( 41), UINT8_C( 15), UINT8_C( 66), UINT8_C(141), UINT8_C( 43), UINT8_C(153), UINT8_C(251), UINT8_C( 62), UINT8_C( 6), UINT8_C(181), UINT8_C(239), UINT8_C( 2) }, { UINT8_C( 33), UINT8_C( 0), UINT8_C(100), UINT8_C( 42), UINT8_C( 0), UINT8_C( 0), UINT8_C(170), UINT8_C( 93), UINT8_C( 0), UINT8_C( 0), UINT8_C( 22), UINT8_C( 0), UINT8_C( 0), UINT8_C( 54), UINT8_C(199), UINT8_C( 0), UINT8_C( 0), UINT8_C( 39), UINT8_C( 0), UINT8_C( 0), UINT8_C(193), UINT8_C(117), UINT8_C(102), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 86), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 66), UINT8_C( 0), UINT8_C( 0), UINT8_C( 69), UINT8_C( 0), UINT8_C( 0), UINT8_C( 77), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 34), UINT8_C( 0), UINT8_C( 0), UINT8_C( 88), UINT8_C( 0), UINT8_C( 32), UINT8_C( 0), UINT8_C(172), UINT8_C( 0), UINT8_C( 95), UINT8_C( 0), UINT8_C( 0), UINT8_C( 15), UINT8_C( 66), UINT8_C( 0), UINT8_C( 0), UINT8_C(153), UINT8_C(180), UINT8_C( 0), UINT8_C( 6), UINT8_C( 0), UINT8_C( 23), UINT8_C( 0) } }, { UINT64_C( 3260531169073073147), { UINT8_C(221), UINT8_C( 26), UINT8_C( 4), UINT8_C( 54), UINT8_C(189), UINT8_C( 4), UINT8_C( 62), UINT8_C(134), UINT8_C(154), UINT8_C(230), UINT8_C( 61), UINT8_C(195), UINT8_C(245), UINT8_C(127), UINT8_C( 81), UINT8_C( 32), UINT8_C( 24), UINT8_C( 76), UINT8_C( 94), UINT8_C( 31), UINT8_C( 1), UINT8_C( 77), UINT8_C( 33), UINT8_C(252), UINT8_C(216), UINT8_C(209), UINT8_C(187), UINT8_C(171), UINT8_C(140), UINT8_C(251), UINT8_C(216), UINT8_C(106), UINT8_C( 21), UINT8_C(221), UINT8_C(160), UINT8_C(210), UINT8_C(225), UINT8_C(222), UINT8_C( 89), UINT8_C(123), UINT8_C(196), UINT8_C(150), UINT8_C( 62), UINT8_C(185), UINT8_C( 21), UINT8_C(143), UINT8_C(217), UINT8_C( 46), UINT8_C(219), UINT8_C( 55), UINT8_C( 77), UINT8_C(221), UINT8_C(132), UINT8_C(110), UINT8_C(217), UINT8_C( 93), UINT8_C( 63), UINT8_C(149), UINT8_C( 8), UINT8_C(203), UINT8_C(144), UINT8_C(224), UINT8_C( 53), UINT8_C(165) }, { UINT8_C(189), UINT8_C(213), UINT8_C(120), UINT8_C(158), UINT8_C(180), UINT8_C(209), UINT8_C( 25), UINT8_C(120), UINT8_C(103), UINT8_C( 88), UINT8_C( 50), UINT8_C(124), UINT8_C(231), UINT8_C( 11), UINT8_C(170), UINT8_C(195), UINT8_C( 67), UINT8_C(247), UINT8_C(160), UINT8_C(199), UINT8_C(101), UINT8_C(121), UINT8_C( 36), UINT8_C(164), UINT8_C( 14), UINT8_C( 44), UINT8_C(112), UINT8_C(158), UINT8_C( 13), UINT8_C(165), UINT8_C( 68), UINT8_C(202), UINT8_C(123), UINT8_C(188), UINT8_C(105), UINT8_C( 47), UINT8_C(141), UINT8_C(130), UINT8_C(167), UINT8_C(244), UINT8_C(218), UINT8_C(217), UINT8_C(112), UINT8_C(194), UINT8_C(229), UINT8_C( 27), UINT8_C(133), UINT8_C( 40), UINT8_C( 18), UINT8_C( 37), UINT8_C(239), UINT8_C(120), UINT8_C(158), UINT8_C( 20), UINT8_C( 28), UINT8_C(173), UINT8_C( 64), UINT8_C(140), UINT8_C( 75), UINT8_C( 77), UINT8_C( 50), UINT8_C(143), UINT8_C( 24), UINT8_C(173) }, { UINT8_C(189), UINT8_C( 26), UINT8_C( 0), UINT8_C( 54), UINT8_C(180), UINT8_C( 4), UINT8_C( 25), UINT8_C(120), UINT8_C(103), UINT8_C( 88), UINT8_C( 0), UINT8_C(124), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 32), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 77), UINT8_C( 0), UINT8_C(164), UINT8_C( 14), UINT8_C( 44), UINT8_C(112), UINT8_C(158), UINT8_C( 13), UINT8_C(165), UINT8_C( 0), UINT8_C(106), UINT8_C( 0), UINT8_C(188), UINT8_C( 0), UINT8_C( 0), UINT8_C(141), UINT8_C( 0), UINT8_C( 89), UINT8_C(123), UINT8_C(196), UINT8_C(150), UINT8_C( 0), UINT8_C(185), UINT8_C( 21), UINT8_C( 27), UINT8_C( 0), UINT8_C( 40), UINT8_C( 18), UINT8_C( 37), UINT8_C( 77), UINT8_C(120), UINT8_C(132), UINT8_C( 20), UINT8_C( 0), UINT8_C( 0), UINT8_C( 63), UINT8_C( 0), UINT8_C( 8), UINT8_C( 77), UINT8_C( 0), UINT8_C(143), UINT8_C( 0), UINT8_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_maskz_min_epu8(test_vec[i].k, a, b); simde_test_x86_assert_equal_u8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm512_min_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[32]; const int16_t b[32]; const int16_t r[32]; } test_vec[] = { { { INT16_C( 14691), -INT16_C( 2237), INT16_C( 8698), INT16_C( 9348), INT16_C( 13857), -INT16_C( 10584), INT16_C( 25854), -INT16_C( 6420), INT16_C( 17420), INT16_C( 1517), -INT16_C( 5283), INT16_C( 26495), INT16_C( 7532), -INT16_C( 2781), INT16_C( 9520), -INT16_C( 27403), INT16_C( 14430), INT16_C( 22667), INT16_C( 3929), INT16_C( 31356), INT16_C( 9541), INT16_C( 17232), INT16_C( 15497), -INT16_C( 27350), INT16_C( 6016), -INT16_C( 8550), INT16_C( 6658), INT16_C( 28485), INT16_C( 26679), INT16_C( 26724), INT16_C( 22926), -INT16_C( 4868) }, { -INT16_C( 30831), -INT16_C( 5563), -INT16_C( 15978), -INT16_C( 9371), -INT16_C( 18970), INT16_C( 28447), INT16_C( 18930), INT16_C( 29188), -INT16_C( 24736), INT16_C( 25168), -INT16_C( 26951), -INT16_C( 3887), INT16_C( 14078), -INT16_C( 29608), INT16_C( 21647), INT16_C( 8569), -INT16_C( 16677), INT16_C( 28939), INT16_C( 28799), INT16_C( 26189), INT16_C( 27686), INT16_C( 6357), -INT16_C( 9547), INT16_C( 5514), -INT16_C( 9351), INT16_C( 12919), INT16_C( 18801), INT16_C( 28450), INT16_C( 31615), INT16_C( 3836), INT16_C( 30159), -INT16_C( 21713) }, { -INT16_C( 30831), -INT16_C( 5563), -INT16_C( 15978), -INT16_C( 9371), -INT16_C( 18970), -INT16_C( 10584), INT16_C( 18930), -INT16_C( 6420), -INT16_C( 24736), INT16_C( 1517), -INT16_C( 26951), -INT16_C( 3887), INT16_C( 7532), -INT16_C( 29608), INT16_C( 9520), -INT16_C( 27403), -INT16_C( 16677), INT16_C( 22667), INT16_C( 3929), INT16_C( 26189), INT16_C( 9541), INT16_C( 6357), -INT16_C( 9547), -INT16_C( 27350), -INT16_C( 9351), -INT16_C( 8550), INT16_C( 6658), INT16_C( 28450), INT16_C( 26679), INT16_C( 3836), INT16_C( 22926), -INT16_C( 21713) } }, { { INT16_C( 15155), -INT16_C( 19940), INT16_C( 27051), -INT16_C( 12008), -INT16_C( 4395), -INT16_C( 29975), INT16_C( 29896), INT16_C( 16799), INT16_C( 5967), -INT16_C( 16269), -INT16_C( 27296), -INT16_C( 8401), INT16_C( 11024), -INT16_C( 7955), INT16_C( 7584), -INT16_C( 11381), -INT16_C( 22696), INT16_C( 902), -INT16_C( 25071), -INT16_C( 6443), -INT16_C( 16756), INT16_C( 21617), INT16_C( 4146), -INT16_C( 32363), INT16_C( 2087), -INT16_C( 30911), INT16_C( 29086), -INT16_C( 20890), INT16_C( 21660), INT16_C( 15758), INT16_C( 6513), -INT16_C( 14064) }, { -INT16_C( 26943), -INT16_C( 11572), -INT16_C( 24267), -INT16_C( 15944), INT16_C( 10592), -INT16_C( 28138), -INT16_C( 21702), INT16_C( 24852), INT16_C( 21940), INT16_C( 21225), INT16_C( 20422), INT16_C( 25344), -INT16_C( 28765), INT16_C( 5280), -INT16_C( 20312), INT16_C( 27101), -INT16_C( 21945), INT16_C( 31803), -INT16_C( 2997), -INT16_C( 21699), INT16_C( 21277), INT16_C( 22334), INT16_C( 21247), -INT16_C( 19527), -INT16_C( 23897), INT16_C( 28165), INT16_C( 1521), -INT16_C( 27183), INT16_C( 29076), INT16_C( 15785), -INT16_C( 30943), INT16_C( 26790) }, { -INT16_C( 26943), -INT16_C( 19940), -INT16_C( 24267), -INT16_C( 15944), -INT16_C( 4395), -INT16_C( 29975), -INT16_C( 21702), INT16_C( 16799), INT16_C( 5967), -INT16_C( 16269), -INT16_C( 27296), -INT16_C( 8401), -INT16_C( 28765), -INT16_C( 7955), -INT16_C( 20312), -INT16_C( 11381), -INT16_C( 22696), INT16_C( 902), -INT16_C( 25071), -INT16_C( 21699), -INT16_C( 16756), INT16_C( 21617), INT16_C( 4146), -INT16_C( 32363), -INT16_C( 23897), -INT16_C( 30911), INT16_C( 1521), -INT16_C( 27183), INT16_C( 21660), INT16_C( 15758), -INT16_C( 30943), -INT16_C( 14064) } }, { { -INT16_C( 7631), INT16_C( 31972), INT16_C( 8918), -INT16_C( 3288), INT16_C( 26229), INT16_C( 29771), INT16_C( 1208), INT16_C( 24359), INT16_C( 11430), -INT16_C( 26675), -INT16_C( 25038), -INT16_C( 14804), -INT16_C( 10737), INT16_C( 12547), -INT16_C( 21923), -INT16_C( 29031), INT16_C( 32396), INT16_C( 25098), INT16_C( 12960), INT16_C( 5461), -INT16_C( 24424), INT16_C( 20618), -INT16_C( 20060), INT16_C( 19120), INT16_C( 32222), INT16_C( 4322), INT16_C( 3612), INT16_C( 11222), -INT16_C( 9500), INT16_C( 16732), -INT16_C( 2428), INT16_C( 4303) }, { -INT16_C( 9612), INT16_C( 5234), -INT16_C( 14580), -INT16_C( 23255), -INT16_C( 19608), INT16_C( 3317), -INT16_C( 23195), INT16_C( 17239), INT16_C( 14627), INT16_C( 16211), INT16_C( 10567), INT16_C( 11370), -INT16_C( 14589), -INT16_C( 30867), INT16_C( 15805), INT16_C( 12695), INT16_C( 2327), INT16_C( 9029), INT16_C( 28369), INT16_C( 14792), -INT16_C( 16862), -INT16_C( 30907), -INT16_C( 25501), -INT16_C( 31030), INT16_C( 7637), INT16_C( 7621), INT16_C( 12358), INT16_C( 19017), -INT16_C( 18697), -INT16_C( 19247), INT16_C( 27123), INT16_C( 2789) }, { -INT16_C( 9612), INT16_C( 5234), -INT16_C( 14580), -INT16_C( 23255), -INT16_C( 19608), INT16_C( 3317), -INT16_C( 23195), INT16_C( 17239), INT16_C( 11430), -INT16_C( 26675), -INT16_C( 25038), -INT16_C( 14804), -INT16_C( 14589), -INT16_C( 30867), -INT16_C( 21923), -INT16_C( 29031), INT16_C( 2327), INT16_C( 9029), INT16_C( 12960), INT16_C( 5461), -INT16_C( 24424), -INT16_C( 30907), -INT16_C( 25501), -INT16_C( 31030), INT16_C( 7637), INT16_C( 4322), INT16_C( 3612), INT16_C( 11222), -INT16_C( 18697), -INT16_C( 19247), -INT16_C( 2428), INT16_C( 2789) } }, { { INT16_C( 10866), INT16_C( 17198), -INT16_C( 2408), -INT16_C( 17796), -INT16_C( 15692), INT16_C( 6209), INT16_C( 2910), INT16_C( 13470), INT16_C( 25640), INT16_C( 28497), -INT16_C( 25964), -INT16_C( 29767), -INT16_C( 30128), INT16_C( 17471), INT16_C( 9459), INT16_C( 26190), INT16_C( 31822), -INT16_C( 6487), INT16_C( 9843), INT16_C( 10145), -INT16_C( 7448), INT16_C( 17983), -INT16_C( 8466), INT16_C( 5754), -INT16_C( 13502), -INT16_C( 10619), INT16_C( 15973), -INT16_C( 18847), -INT16_C( 24375), -INT16_C( 17158), INT16_C( 18628), INT16_C( 4642) }, { -INT16_C( 13115), INT16_C( 14584), -INT16_C( 26126), -INT16_C( 9633), -INT16_C( 24708), INT16_C( 27168), -INT16_C( 25731), -INT16_C( 16512), INT16_C( 1638), -INT16_C( 13163), -INT16_C( 2492), INT16_C( 3458), INT16_C( 31894), INT16_C( 23242), -INT16_C( 4924), -INT16_C( 30356), INT16_C( 25784), -INT16_C( 21823), INT16_C( 8702), INT16_C( 31364), -INT16_C( 23104), INT16_C( 15844), INT16_C( 25664), -INT16_C( 22788), -INT16_C( 28310), -INT16_C( 20622), -INT16_C( 2937), INT16_C( 7612), -INT16_C( 31120), INT16_C( 13687), -INT16_C( 7309), INT16_C( 11198) }, { -INT16_C( 13115), INT16_C( 14584), -INT16_C( 26126), -INT16_C( 17796), -INT16_C( 24708), INT16_C( 6209), -INT16_C( 25731), -INT16_C( 16512), INT16_C( 1638), -INT16_C( 13163), -INT16_C( 25964), -INT16_C( 29767), -INT16_C( 30128), INT16_C( 17471), -INT16_C( 4924), -INT16_C( 30356), INT16_C( 25784), -INT16_C( 21823), INT16_C( 8702), INT16_C( 10145), -INT16_C( 23104), INT16_C( 15844), -INT16_C( 8466), -INT16_C( 22788), -INT16_C( 28310), -INT16_C( 20622), -INT16_C( 2937), -INT16_C( 18847), -INT16_C( 31120), -INT16_C( 17158), -INT16_C( 7309), INT16_C( 4642) } }, { { -INT16_C( 32697), INT16_C( 17878), INT16_C( 23201), INT16_C( 25023), -INT16_C( 23553), INT16_C( 16286), -INT16_C( 26104), INT16_C( 29414), INT16_C( 22571), -INT16_C( 19935), -INT16_C( 8627), -INT16_C( 16945), INT16_C( 18020), -INT16_C( 10254), -INT16_C( 20183), INT16_C( 28675), -INT16_C( 9935), -INT16_C( 11594), INT16_C( 30003), INT16_C( 13107), -INT16_C( 12007), INT16_C( 8562), INT16_C( 22635), -INT16_C( 26989), -INT16_C( 19023), -INT16_C( 440), INT16_C( 6035), -INT16_C( 2117), -INT16_C( 20899), -INT16_C( 31025), -INT16_C( 11681), -INT16_C( 28426) }, { -INT16_C( 21333), -INT16_C( 8606), -INT16_C( 27358), INT16_C( 15121), -INT16_C( 31642), -INT16_C( 11940), -INT16_C( 4132), -INT16_C( 29337), -INT16_C( 20572), INT16_C( 14219), INT16_C( 18374), INT16_C( 9007), -INT16_C( 267), INT16_C( 21673), -INT16_C( 24624), INT16_C( 31716), INT16_C( 17996), INT16_C( 28249), INT16_C( 27611), INT16_C( 16809), INT16_C( 1519), -INT16_C( 13550), INT16_C( 31220), -INT16_C( 26279), -INT16_C( 7128), -INT16_C( 4400), -INT16_C( 213), INT16_C( 8209), -INT16_C( 17667), -INT16_C( 12940), INT16_C( 22617), -INT16_C( 23224) }, { -INT16_C( 32697), -INT16_C( 8606), -INT16_C( 27358), INT16_C( 15121), -INT16_C( 31642), -INT16_C( 11940), -INT16_C( 26104), -INT16_C( 29337), -INT16_C( 20572), -INT16_C( 19935), -INT16_C( 8627), -INT16_C( 16945), -INT16_C( 267), -INT16_C( 10254), -INT16_C( 24624), INT16_C( 28675), -INT16_C( 9935), -INT16_C( 11594), INT16_C( 27611), INT16_C( 13107), -INT16_C( 12007), -INT16_C( 13550), INT16_C( 22635), -INT16_C( 26989), -INT16_C( 19023), -INT16_C( 4400), -INT16_C( 213), -INT16_C( 2117), -INT16_C( 20899), -INT16_C( 31025), -INT16_C( 11681), -INT16_C( 28426) } }, { { -INT16_C( 23906), INT16_C( 30995), -INT16_C( 17395), -INT16_C( 838), -INT16_C( 13119), -INT16_C( 18745), INT16_C( 8261), INT16_C( 27983), INT16_C( 7941), INT16_C( 12379), INT16_C( 27679), INT16_C( 7249), -INT16_C( 15066), -INT16_C( 32534), INT16_C( 12830), -INT16_C( 17371), INT16_C( 14804), -INT16_C( 7882), -INT16_C( 3851), -INT16_C( 18467), -INT16_C( 23107), INT16_C( 621), -INT16_C( 17211), -INT16_C( 13712), -INT16_C( 13349), -INT16_C( 1285), INT16_C( 19512), INT16_C( 24087), INT16_C( 273), INT16_C( 12254), INT16_C( 1075), INT16_C( 2284) }, { INT16_C( 8765), INT16_C( 13033), -INT16_C( 14574), -INT16_C( 12311), INT16_C( 22124), INT16_C( 12754), INT16_C( 16914), -INT16_C( 4356), -INT16_C( 2291), INT16_C( 17896), -INT16_C( 189), INT16_C( 21668), -INT16_C( 32256), INT16_C( 13444), INT16_C( 28806), -INT16_C( 15556), INT16_C( 9618), -INT16_C( 23306), -INT16_C( 8212), INT16_C( 22644), INT16_C( 17974), INT16_C( 18570), -INT16_C( 31096), -INT16_C( 27338), INT16_C( 8061), -INT16_C( 16165), INT16_C( 32542), INT16_C( 7956), -INT16_C( 26623), -INT16_C( 30637), -INT16_C( 28920), -INT16_C( 26037) }, { -INT16_C( 23906), INT16_C( 13033), -INT16_C( 17395), -INT16_C( 12311), -INT16_C( 13119), -INT16_C( 18745), INT16_C( 8261), -INT16_C( 4356), -INT16_C( 2291), INT16_C( 12379), -INT16_C( 189), INT16_C( 7249), -INT16_C( 32256), -INT16_C( 32534), INT16_C( 12830), -INT16_C( 17371), INT16_C( 9618), -INT16_C( 23306), -INT16_C( 8212), -INT16_C( 18467), -INT16_C( 23107), INT16_C( 621), -INT16_C( 31096), -INT16_C( 27338), -INT16_C( 13349), -INT16_C( 16165), INT16_C( 19512), INT16_C( 7956), -INT16_C( 26623), -INT16_C( 30637), -INT16_C( 28920), -INT16_C( 26037) } }, { { INT16_C( 16820), -INT16_C( 24257), -INT16_C( 19679), INT16_C( 22521), -INT16_C( 31751), -INT16_C( 32353), -INT16_C( 10743), -INT16_C( 31210), -INT16_C( 3595), INT16_C( 4934), INT16_C( 23408), INT16_C( 29234), -INT16_C( 31245), -INT16_C( 774), INT16_C( 17684), -INT16_C( 13930), -INT16_C( 10873), -INT16_C( 22422), INT16_C( 25480), -INT16_C( 32257), -INT16_C( 24857), -INT16_C( 4094), INT16_C( 6516), INT16_C( 26999), -INT16_C( 17142), INT16_C( 31613), -INT16_C( 20712), INT16_C( 3309), -INT16_C( 6347), INT16_C( 18696), -INT16_C( 25044), -INT16_C( 19694) }, { INT16_C( 31860), -INT16_C( 933), INT16_C( 23264), -INT16_C( 14466), -INT16_C( 32519), INT16_C( 28087), INT16_C( 11929), -INT16_C( 23337), INT16_C( 21740), INT16_C( 1055), INT16_C( 3075), INT16_C( 14352), INT16_C( 6387), INT16_C( 8066), -INT16_C( 27465), INT16_C( 11219), INT16_C( 11793), -INT16_C( 3801), -INT16_C( 23159), -INT16_C( 32072), INT16_C( 28454), -INT16_C( 16401), -INT16_C( 14690), -INT16_C( 30109), -INT16_C( 32230), INT16_C( 7822), -INT16_C( 24690), -INT16_C( 32426), -INT16_C( 10057), INT16_C( 28321), INT16_C( 29805), INT16_C( 32409) }, { INT16_C( 16820), -INT16_C( 24257), -INT16_C( 19679), -INT16_C( 14466), -INT16_C( 32519), -INT16_C( 32353), -INT16_C( 10743), -INT16_C( 31210), -INT16_C( 3595), INT16_C( 1055), INT16_C( 3075), INT16_C( 14352), -INT16_C( 31245), -INT16_C( 774), -INT16_C( 27465), -INT16_C( 13930), -INT16_C( 10873), -INT16_C( 22422), -INT16_C( 23159), -INT16_C( 32257), -INT16_C( 24857), -INT16_C( 16401), -INT16_C( 14690), -INT16_C( 30109), -INT16_C( 32230), INT16_C( 7822), -INT16_C( 24690), -INT16_C( 32426), -INT16_C( 10057), INT16_C( 18696), -INT16_C( 25044), -INT16_C( 19694) } }, { { -INT16_C( 15966), INT16_C( 11119), INT16_C( 10086), -INT16_C( 29523), -INT16_C( 25194), INT16_C( 13388), -INT16_C( 20637), INT16_C( 32446), INT16_C( 19762), -INT16_C( 16228), -INT16_C( 3348), -INT16_C( 23742), -INT16_C( 7221), INT16_C( 14354), -INT16_C( 21673), -INT16_C( 1610), INT16_C( 9580), -INT16_C( 11483), -INT16_C( 11700), -INT16_C( 7585), -INT16_C( 21649), -INT16_C( 11497), -INT16_C( 10917), -INT16_C( 29359), -INT16_C( 4830), INT16_C( 3661), -INT16_C( 28705), -INT16_C( 21838), -INT16_C( 15246), -INT16_C( 13854), -INT16_C( 26513), -INT16_C( 9021) }, { -INT16_C( 5955), INT16_C( 2479), INT16_C( 3770), INT16_C( 10988), INT16_C( 954), INT16_C( 5629), INT16_C( 20184), -INT16_C( 1118), -INT16_C( 4293), INT16_C( 6665), -INT16_C( 17537), -INT16_C( 3643), -INT16_C( 22657), -INT16_C( 4165), INT16_C( 32320), -INT16_C( 565), INT16_C( 31334), INT16_C( 8199), -INT16_C( 3192), INT16_C( 16970), INT16_C( 18422), -INT16_C( 12713), -INT16_C( 1643), -INT16_C( 12087), -INT16_C( 11287), INT16_C( 26859), -INT16_C( 20338), INT16_C( 3673), INT16_C( 5207), -INT16_C( 26627), -INT16_C( 14190), -INT16_C( 1899) }, { -INT16_C( 15966), INT16_C( 2479), INT16_C( 3770), -INT16_C( 29523), -INT16_C( 25194), INT16_C( 5629), -INT16_C( 20637), -INT16_C( 1118), -INT16_C( 4293), -INT16_C( 16228), -INT16_C( 17537), -INT16_C( 23742), -INT16_C( 22657), -INT16_C( 4165), -INT16_C( 21673), -INT16_C( 1610), INT16_C( 9580), -INT16_C( 11483), -INT16_C( 11700), -INT16_C( 7585), -INT16_C( 21649), -INT16_C( 12713), -INT16_C( 10917), -INT16_C( 29359), -INT16_C( 11287), INT16_C( 3661), -INT16_C( 28705), -INT16_C( 21838), -INT16_C( 15246), -INT16_C( 26627), -INT16_C( 26513), -INT16_C( 9021) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_min_epi16(a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_min_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t src[32]; const simde__mmask32 k; const int16_t a[32]; const int16_t b[32]; const int16_t r[32]; } test_vec[] = { { { INT16_C( 9710), INT16_C( 8237), INT16_C( 26211), INT16_C( 1978), INT16_C( 14790), INT16_C( 9775), -INT16_C( 11510), -INT16_C( 20158), INT16_C( 13508), -INT16_C( 18579), INT16_C( 4844), -INT16_C( 4275), -INT16_C( 8222), -INT16_C( 28513), INT16_C( 21293), INT16_C( 7273), -INT16_C( 27015), -INT16_C( 9156), -INT16_C( 2308), -INT16_C( 15389), INT16_C( 4911), INT16_C( 14825), INT16_C( 11238), -INT16_C( 21781), INT16_C( 22623), INT16_C( 19298), -INT16_C( 20629), INT16_C( 19770), -INT16_C( 9586), -INT16_C( 17187), INT16_C( 17965), -INT16_C( 22824) }, UINT32_C(3632403676), { -INT16_C( 31623), INT16_C( 24436), INT16_C( 24495), INT16_C( 3593), INT16_C( 27575), INT16_C( 8794), -INT16_C( 27622), -INT16_C( 22161), INT16_C( 19566), -INT16_C( 25499), INT16_C( 15762), INT16_C( 28226), -INT16_C( 15023), INT16_C( 23623), -INT16_C( 7637), -INT16_C( 23401), INT16_C( 2919), INT16_C( 5635), INT16_C( 3178), INT16_C( 8485), INT16_C( 32632), -INT16_C( 28092), -INT16_C( 19693), -INT16_C( 32197), -INT16_C( 24576), -INT16_C( 28130), INT16_C( 24797), INT16_C( 12033), INT16_C( 18469), INT16_C( 20619), INT16_C( 8746), -INT16_C( 28172) }, { -INT16_C( 2259), -INT16_C( 26712), -INT16_C( 13052), INT16_C( 31929), -INT16_C( 692), INT16_C( 24334), INT16_C( 19120), -INT16_C( 20255), -INT16_C( 22), -INT16_C( 14269), INT16_C( 17504), -INT16_C( 31241), -INT16_C( 32116), -INT16_C( 18730), -INT16_C( 13659), -INT16_C( 11704), -INT16_C( 3902), -INT16_C( 14742), INT16_C( 9149), INT16_C( 2370), INT16_C( 20512), -INT16_C( 12184), INT16_C( 19098), -INT16_C( 31359), -INT16_C( 15287), -INT16_C( 22195), INT16_C( 17416), -INT16_C( 27601), INT16_C( 1478), INT16_C( 27466), -INT16_C( 27953), -INT16_C( 28354) }, { INT16_C( 9710), INT16_C( 8237), -INT16_C( 13052), INT16_C( 3593), -INT16_C( 692), INT16_C( 9775), -INT16_C( 27622), -INT16_C( 22161), INT16_C( 13508), -INT16_C( 18579), INT16_C( 15762), -INT16_C( 4275), -INT16_C( 32116), -INT16_C( 28513), INT16_C( 21293), INT16_C( 7273), -INT16_C( 27015), -INT16_C( 14742), -INT16_C( 2308), -INT16_C( 15389), INT16_C( 4911), INT16_C( 14825), INT16_C( 11238), -INT16_C( 32197), INT16_C( 22623), INT16_C( 19298), -INT16_C( 20629), -INT16_C( 27601), INT16_C( 1478), -INT16_C( 17187), -INT16_C( 27953), -INT16_C( 28354) } }, { { -INT16_C( 22398), INT16_C( 16215), -INT16_C( 26165), -INT16_C( 5304), -INT16_C( 19990), -INT16_C( 31557), INT16_C( 15611), INT16_C( 17417), INT16_C( 22016), INT16_C( 2286), INT16_C( 7578), INT16_C( 24988), -INT16_C( 6366), -INT16_C( 3636), INT16_C( 2681), -INT16_C( 893), -INT16_C( 9550), INT16_C( 32059), -INT16_C( 31628), INT16_C( 24168), INT16_C( 9269), INT16_C( 12514), -INT16_C( 5024), INT16_C( 24948), INT16_C( 25154), -INT16_C( 8855), INT16_C( 1663), -INT16_C( 24258), INT16_C( 2797), INT16_C( 26259), INT16_C( 5653), -INT16_C( 14494) }, UINT32_C(1682284272), { -INT16_C( 23087), INT16_C( 12935), -INT16_C( 1135), -INT16_C( 11373), -INT16_C( 930), -INT16_C( 8784), -INT16_C( 4606), -INT16_C( 4225), INT16_C( 4857), INT16_C( 3670), -INT16_C( 18392), INT16_C( 6357), INT16_C( 6742), INT16_C( 30845), INT16_C( 16328), -INT16_C( 26161), INT16_C( 22244), INT16_C( 30155), INT16_C( 24146), -INT16_C( 20407), -INT16_C( 1701), INT16_C( 23949), INT16_C( 3304), -INT16_C( 7859), -INT16_C( 23778), INT16_C( 18159), -INT16_C( 15269), -INT16_C( 19873), -INT16_C( 8993), -INT16_C( 22742), -INT16_C( 1509), INT16_C( 64) }, { INT16_C( 3152), -INT16_C( 23947), -INT16_C( 16790), -INT16_C( 15022), -INT16_C( 8008), -INT16_C( 24541), INT16_C( 28908), INT16_C( 2945), INT16_C( 28691), INT16_C( 28241), -INT16_C( 20428), INT16_C( 4896), INT16_C( 19340), -INT16_C( 22342), -INT16_C( 1211), -INT16_C( 27224), INT16_C( 7431), INT16_C( 28984), -INT16_C( 29988), -INT16_C( 27593), INT16_C( 23146), INT16_C( 22324), -INT16_C( 18998), -INT16_C( 8862), -INT16_C( 19675), INT16_C( 22859), INT16_C( 27748), -INT16_C( 3987), INT16_C( 10167), -INT16_C( 872), INT16_C( 16418), INT16_C( 10641) }, { -INT16_C( 22398), INT16_C( 16215), -INT16_C( 26165), -INT16_C( 5304), -INT16_C( 8008), -INT16_C( 24541), -INT16_C( 4606), -INT16_C( 4225), INT16_C( 22016), INT16_C( 3670), -INT16_C( 20428), INT16_C( 4896), INT16_C( 6742), -INT16_C( 3636), INT16_C( 2681), -INT16_C( 27224), INT16_C( 7431), INT16_C( 32059), -INT16_C( 29988), INT16_C( 24168), INT16_C( 9269), INT16_C( 12514), -INT16_C( 18998), INT16_C( 24948), INT16_C( 25154), -INT16_C( 8855), -INT16_C( 15269), -INT16_C( 24258), INT16_C( 2797), -INT16_C( 22742), -INT16_C( 1509), -INT16_C( 14494) } }, { { -INT16_C( 13986), INT16_C( 15003), -INT16_C( 11692), -INT16_C( 16690), INT16_C( 556), -INT16_C( 2539), INT16_C( 30647), -INT16_C( 9005), INT16_C( 7723), -INT16_C( 28875), -INT16_C( 23926), INT16_C( 16767), INT16_C( 6346), -INT16_C( 5059), -INT16_C( 12456), -INT16_C( 18922), -INT16_C( 20072), -INT16_C( 4880), -INT16_C( 16765), -INT16_C( 20565), -INT16_C( 16192), INT16_C( 30629), INT16_C( 30776), INT16_C( 25427), -INT16_C( 30314), INT16_C( 8690), INT16_C( 28971), -INT16_C( 2718), -INT16_C( 24439), -INT16_C( 7454), -INT16_C( 1937), INT16_C( 1944) }, UINT32_C( 754223529), { -INT16_C( 32673), -INT16_C( 26753), -INT16_C( 11272), -INT16_C( 28934), -INT16_C( 5028), -INT16_C( 30801), INT16_C( 4702), -INT16_C( 6275), INT16_C( 24498), INT16_C( 8649), INT16_C( 25175), INT16_C( 40), INT16_C( 7403), INT16_C( 12844), INT16_C( 1979), INT16_C( 6970), -INT16_C( 17785), INT16_C( 32690), -INT16_C( 21107), -INT16_C( 5875), -INT16_C( 16999), -INT16_C( 2192), -INT16_C( 4657), -INT16_C( 32289), -INT16_C( 22452), -INT16_C( 23646), -INT16_C( 13814), -INT16_C( 2653), -INT16_C( 12313), -INT16_C( 24024), INT16_C( 25302), INT16_C( 23997) }, { INT16_C( 28700), -INT16_C( 22052), -INT16_C( 5603), -INT16_C( 18798), INT16_C( 935), INT16_C( 30382), -INT16_C( 29200), INT16_C( 15863), -INT16_C( 26315), INT16_C( 16608), -INT16_C( 31645), INT16_C( 18997), INT16_C( 23891), INT16_C( 10989), -INT16_C( 21824), -INT16_C( 9081), INT16_C( 25626), INT16_C( 14214), INT16_C( 6222), -INT16_C( 2578), -INT16_C( 25573), INT16_C( 3179), INT16_C( 25129), INT16_C( 24137), INT16_C( 10747), INT16_C( 24222), -INT16_C( 11091), INT16_C( 425), -INT16_C( 27087), -INT16_C( 3797), -INT16_C( 19904), INT16_C( 23502) }, { -INT16_C( 32673), INT16_C( 15003), -INT16_C( 11692), -INT16_C( 28934), INT16_C( 556), -INT16_C( 30801), INT16_C( 30647), -INT16_C( 6275), -INT16_C( 26315), -INT16_C( 28875), -INT16_C( 23926), INT16_C( 40), INT16_C( 6346), -INT16_C( 5059), -INT16_C( 12456), -INT16_C( 9081), -INT16_C( 20072), -INT16_C( 4880), -INT16_C( 21107), -INT16_C( 20565), -INT16_C( 25573), -INT16_C( 2192), -INT16_C( 4657), -INT16_C( 32289), -INT16_C( 30314), INT16_C( 8690), -INT16_C( 13814), -INT16_C( 2653), -INT16_C( 24439), -INT16_C( 24024), -INT16_C( 1937), INT16_C( 1944) } }, { { INT16_C( 21526), INT16_C( 25746), -INT16_C( 32660), -INT16_C( 30631), -INT16_C( 15332), INT16_C( 17812), -INT16_C( 8922), INT16_C( 8612), INT16_C( 16902), -INT16_C( 19328), INT16_C( 10518), INT16_C( 18613), -INT16_C( 8001), -INT16_C( 199), INT16_C( 1938), -INT16_C( 22182), -INT16_C( 4773), -INT16_C( 14323), INT16_C( 26477), -INT16_C( 30128), -INT16_C( 7125), INT16_C( 21199), INT16_C( 29633), -INT16_C( 14477), -INT16_C( 3146), -INT16_C( 13189), INT16_C( 12316), -INT16_C( 9452), INT16_C( 19984), -INT16_C( 23589), INT16_C( 13653), -INT16_C( 20148) }, UINT32_C(2423871778), { -INT16_C( 5715), INT16_C( 28222), -INT16_C( 20131), INT16_C( 4917), -INT16_C( 20059), -INT16_C( 15905), -INT16_C( 2847), -INT16_C( 3427), INT16_C( 30786), -INT16_C( 26731), -INT16_C( 7763), -INT16_C( 12216), -INT16_C( 16070), -INT16_C( 1184), INT16_C( 31370), INT16_C( 14311), INT16_C( 9571), -INT16_C( 16219), -INT16_C( 9258), INT16_C( 31699), -INT16_C( 19572), INT16_C( 27965), -INT16_C( 9561), -INT16_C( 5793), -INT16_C( 2990), -INT16_C( 128), -INT16_C( 13867), INT16_C( 4303), INT16_C( 12170), INT16_C( 5387), -INT16_C( 3415), INT16_C( 3404) }, { -INT16_C( 3561), -INT16_C( 4659), -INT16_C( 24115), INT16_C( 22889), -INT16_C( 22956), -INT16_C( 1082), INT16_C( 9856), -INT16_C( 11548), INT16_C( 25626), -INT16_C( 3887), -INT16_C( 24275), -INT16_C( 18432), INT16_C( 3024), INT16_C( 31437), INT16_C( 6653), INT16_C( 5255), INT16_C( 21515), -INT16_C( 10239), INT16_C( 27381), INT16_C( 18737), -INT16_C( 2032), -INT16_C( 28604), INT16_C( 10270), INT16_C( 14434), INT16_C( 13453), -INT16_C( 17880), INT16_C( 10453), -INT16_C( 23182), INT16_C( 16179), INT16_C( 12319), -INT16_C( 22951), INT16_C( 25668) }, { INT16_C( 21526), -INT16_C( 4659), -INT16_C( 32660), -INT16_C( 30631), -INT16_C( 15332), -INT16_C( 15905), -INT16_C( 8922), INT16_C( 8612), INT16_C( 25626), -INT16_C( 19328), INT16_C( 10518), -INT16_C( 18432), -INT16_C( 16070), -INT16_C( 199), INT16_C( 6653), -INT16_C( 22182), INT16_C( 9571), -INT16_C( 14323), INT16_C( 26477), INT16_C( 18737), -INT16_C( 19572), -INT16_C( 28604), -INT16_C( 9561), -INT16_C( 14477), -INT16_C( 3146), -INT16_C( 13189), INT16_C( 12316), -INT16_C( 9452), INT16_C( 12170), -INT16_C( 23589), INT16_C( 13653), INT16_C( 3404) } }, { { INT16_C( 18171), -INT16_C( 4035), INT16_C( 28336), -INT16_C( 16070), INT16_C( 32358), -INT16_C( 31663), -INT16_C( 19289), INT16_C( 13501), -INT16_C( 6680), -INT16_C( 16914), INT16_C( 24846), INT16_C( 16738), -INT16_C( 32096), -INT16_C( 1678), -INT16_C( 18904), INT16_C( 9054), -INT16_C( 25604), -INT16_C( 21228), INT16_C( 19977), INT16_C( 28782), -INT16_C( 16436), INT16_C( 29684), -INT16_C( 20109), INT16_C( 23463), -INT16_C( 26985), -INT16_C( 23272), INT16_C( 31735), -INT16_C( 26650), INT16_C( 22781), INT16_C( 9617), -INT16_C( 4337), INT16_C( 2889) }, UINT32_C(2478333322), { -INT16_C( 1818), INT16_C( 23019), -INT16_C( 27991), INT16_C( 16565), -INT16_C( 13016), INT16_C( 8165), -INT16_C( 13240), INT16_C( 17847), INT16_C( 18468), INT16_C( 13163), -INT16_C( 19401), -INT16_C( 16065), -INT16_C( 2287), -INT16_C( 17324), INT16_C( 22558), INT16_C( 1075), INT16_C( 7760), -INT16_C( 1699), INT16_C( 4785), -INT16_C( 9926), INT16_C( 8160), INT16_C( 10489), -INT16_C( 20245), INT16_C( 4206), -INT16_C( 9736), INT16_C( 12099), -INT16_C( 32115), -INT16_C( 24848), INT16_C( 17530), -INT16_C( 26534), -INT16_C( 29284), -INT16_C( 4964) }, { -INT16_C( 1620), INT16_C( 24038), INT16_C( 8204), -INT16_C( 5066), INT16_C( 12095), INT16_C( 11028), -INT16_C( 32033), -INT16_C( 10437), INT16_C( 32347), -INT16_C( 6138), -INT16_C( 2559), INT16_C( 31622), -INT16_C( 8133), -INT16_C( 10477), -INT16_C( 20626), INT16_C( 6852), -INT16_C( 21848), -INT16_C( 19337), -INT16_C( 21046), INT16_C( 2464), -INT16_C( 18979), -INT16_C( 17356), INT16_C( 28471), -INT16_C( 27756), -INT16_C( 25874), -INT16_C( 4229), INT16_C( 657), -INT16_C( 13206), INT16_C( 32226), INT16_C( 20643), INT16_C( 26412), -INT16_C( 11158) }, { INT16_C( 18171), INT16_C( 23019), INT16_C( 28336), -INT16_C( 5066), INT16_C( 32358), -INT16_C( 31663), -INT16_C( 19289), -INT16_C( 10437), INT16_C( 18468), -INT16_C( 16914), -INT16_C( 19401), -INT16_C( 16065), -INT16_C( 8133), -INT16_C( 1678), -INT16_C( 20626), INT16_C( 9054), -INT16_C( 25604), -INT16_C( 21228), INT16_C( 19977), -INT16_C( 9926), -INT16_C( 18979), -INT16_C( 17356), -INT16_C( 20109), -INT16_C( 27756), -INT16_C( 25874), -INT16_C( 4229), INT16_C( 31735), -INT16_C( 26650), INT16_C( 17530), INT16_C( 9617), -INT16_C( 4337), -INT16_C( 11158) } }, { { -INT16_C( 7919), -INT16_C( 9335), INT16_C( 10639), INT16_C( 27877), INT16_C( 6622), INT16_C( 5672), -INT16_C( 17271), INT16_C( 30633), INT16_C( 9303), -INT16_C( 6042), -INT16_C( 12250), INT16_C( 2484), INT16_C( 22349), INT16_C( 31065), -INT16_C( 15169), -INT16_C( 12211), -INT16_C( 10587), INT16_C( 13484), -INT16_C( 28416), -INT16_C( 8544), -INT16_C( 13910), INT16_C( 13300), -INT16_C( 25211), -INT16_C( 9046), INT16_C( 4290), -INT16_C( 5948), INT16_C( 30944), INT16_C( 11761), INT16_C( 19408), -INT16_C( 28762), -INT16_C( 3057), -INT16_C( 19361) }, UINT32_C(3404270538), { -INT16_C( 25262), -INT16_C( 10118), INT16_C( 9531), -INT16_C( 588), INT16_C( 31029), INT16_C( 5861), -INT16_C( 10255), -INT16_C( 16061), -INT16_C( 5598), INT16_C( 12624), -INT16_C( 20258), -INT16_C( 22299), -INT16_C( 12613), INT16_C( 22643), INT16_C( 7256), -INT16_C( 21857), INT16_C( 6585), -INT16_C( 2942), INT16_C( 14142), INT16_C( 29937), -INT16_C( 10320), -INT16_C( 24182), -INT16_C( 12882), -INT16_C( 12189), -INT16_C( 19529), -INT16_C( 27391), -INT16_C( 6557), INT16_C( 7998), -INT16_C( 20043), INT16_C( 3447), INT16_C( 5837), -INT16_C( 31049) }, { INT16_C( 14895), INT16_C( 28283), INT16_C( 27761), INT16_C( 8674), INT16_C( 27715), -INT16_C( 3646), INT16_C( 9529), -INT16_C( 3647), -INT16_C( 15655), INT16_C( 15494), -INT16_C( 15191), INT16_C( 24155), -INT16_C( 11659), INT16_C( 17003), INT16_C( 8936), INT16_C( 6345), INT16_C( 17500), -INT16_C( 12922), INT16_C( 26800), -INT16_C( 2834), -INT16_C( 20012), INT16_C( 3557), -INT16_C( 22570), -INT16_C( 20482), -INT16_C( 31383), INT16_C( 4844), INT16_C( 18249), -INT16_C( 16528), -INT16_C( 9446), INT16_C( 513), -INT16_C( 13570), INT16_C( 23066) }, { -INT16_C( 7919), -INT16_C( 10118), INT16_C( 10639), -INT16_C( 588), INT16_C( 6622), INT16_C( 5672), -INT16_C( 10255), -INT16_C( 16061), -INT16_C( 15655), INT16_C( 12624), -INT16_C( 12250), -INT16_C( 22299), INT16_C( 22349), INT16_C( 31065), -INT16_C( 15169), -INT16_C( 12211), INT16_C( 6585), INT16_C( 13484), -INT16_C( 28416), -INT16_C( 2834), -INT16_C( 13910), -INT16_C( 24182), -INT16_C( 22570), -INT16_C( 20482), INT16_C( 4290), -INT16_C( 27391), INT16_C( 30944), -INT16_C( 16528), INT16_C( 19408), -INT16_C( 28762), -INT16_C( 13570), -INT16_C( 31049) } }, { { -INT16_C( 24562), -INT16_C( 16600), INT16_C( 5640), -INT16_C( 9037), -INT16_C( 26425), -INT16_C( 24854), -INT16_C( 6081), -INT16_C( 22195), INT16_C( 14701), -INT16_C( 18501), INT16_C( 11393), -INT16_C( 25738), INT16_C( 30471), INT16_C( 1437), -INT16_C( 18366), INT16_C( 20576), -INT16_C( 30632), INT16_C( 24847), -INT16_C( 15714), INT16_C( 26173), INT16_C( 10075), -INT16_C( 26108), INT16_C( 20752), INT16_C( 32067), -INT16_C( 117), INT16_C( 3124), -INT16_C( 21973), INT16_C( 12967), INT16_C( 17442), INT16_C( 25656), -INT16_C( 26372), INT16_C( 21940) }, UINT32_C(3199648800), { INT16_C( 10267), INT16_C( 11132), -INT16_C( 16518), INT16_C( 1448), -INT16_C( 8770), -INT16_C( 5871), -INT16_C( 18297), -INT16_C( 22244), INT16_C( 21756), -INT16_C( 1779), -INT16_C( 15636), INT16_C( 3150), INT16_C( 1158), INT16_C( 3274), -INT16_C( 4105), INT16_C( 4846), INT16_C( 27159), -INT16_C( 28355), -INT16_C( 6615), -INT16_C( 5994), -INT16_C( 22589), INT16_C( 19153), -INT16_C( 4769), INT16_C( 23796), INT16_C( 321), INT16_C( 11605), -INT16_C( 23613), INT16_C( 18745), INT16_C( 1191), -INT16_C( 25002), INT16_C( 17651), INT16_C( 2737) }, { -INT16_C( 4434), -INT16_C( 10340), INT16_C( 13012), -INT16_C( 26689), -INT16_C( 28198), INT16_C( 14818), -INT16_C( 10626), -INT16_C( 16235), -INT16_C( 5417), -INT16_C( 25619), INT16_C( 10125), INT16_C( 13540), INT16_C( 14891), INT16_C( 7891), -INT16_C( 31618), INT16_C( 11304), -INT16_C( 15246), INT16_C( 18180), -INT16_C( 15369), -INT16_C( 11810), -INT16_C( 16300), -INT16_C( 11510), -INT16_C( 24426), INT16_C( 28307), -INT16_C( 32630), INT16_C( 6153), -INT16_C( 4697), -INT16_C( 11700), INT16_C( 7976), -INT16_C( 22800), INT16_C( 6563), INT16_C( 5843) }, { -INT16_C( 24562), -INT16_C( 16600), INT16_C( 5640), -INT16_C( 9037), -INT16_C( 26425), -INT16_C( 5871), -INT16_C( 6081), -INT16_C( 22195), INT16_C( 14701), -INT16_C( 18501), -INT16_C( 15636), -INT16_C( 25738), INT16_C( 30471), INT16_C( 1437), -INT16_C( 31618), INT16_C( 4846), -INT16_C( 30632), -INT16_C( 28355), -INT16_C( 15369), INT16_C( 26173), -INT16_C( 22589), -INT16_C( 11510), INT16_C( 20752), INT16_C( 23796), -INT16_C( 117), INT16_C( 6153), -INT16_C( 23613), -INT16_C( 11700), INT16_C( 1191), -INT16_C( 25002), -INT16_C( 26372), INT16_C( 2737) } }, { { -INT16_C( 10275), -INT16_C( 11171), INT16_C( 15258), -INT16_C( 4187), -INT16_C( 20228), -INT16_C( 27966), INT16_C( 21840), -INT16_C( 9728), INT16_C( 2517), INT16_C( 32242), INT16_C( 16375), INT16_C( 8015), INT16_C( 16478), INT16_C( 709), -INT16_C( 26535), INT16_C( 13848), INT16_C( 30063), INT16_C( 2571), -INT16_C( 20304), -INT16_C( 21255), -INT16_C( 17568), -INT16_C( 20417), INT16_C( 16144), -INT16_C( 6773), INT16_C( 32073), INT16_C( 16482), -INT16_C( 19780), INT16_C( 7007), INT16_C( 9458), INT16_C( 19229), INT16_C( 13757), INT16_C( 11393) }, UINT32_C(1513524394), { INT16_C( 18154), -INT16_C( 1458), -INT16_C( 9851), -INT16_C( 12576), INT16_C( 16982), INT16_C( 4878), INT16_C( 28148), -INT16_C( 6610), INT16_C( 19346), INT16_C( 20273), -INT16_C( 19584), INT16_C( 10875), -INT16_C( 19905), INT16_C( 31876), -INT16_C( 29727), -INT16_C( 13286), INT16_C( 26833), INT16_C( 22470), -INT16_C( 22975), -INT16_C( 26843), INT16_C( 13545), -INT16_C( 8790), -INT16_C( 10079), INT16_C( 13252), -INT16_C( 2781), -INT16_C( 23678), -INT16_C( 344), -INT16_C( 5939), INT16_C( 21168), -INT16_C( 28316), INT16_C( 32477), -INT16_C( 20643) }, { INT16_C( 9446), INT16_C( 9990), INT16_C( 11210), -INT16_C( 19521), INT16_C( 26975), INT16_C( 401), INT16_C( 21826), INT16_C( 25908), -INT16_C( 18614), -INT16_C( 3319), -INT16_C( 10571), INT16_C( 26075), INT16_C( 16168), INT16_C( 1782), INT16_C( 21694), -INT16_C( 23371), -INT16_C( 17544), INT16_C( 17100), -INT16_C( 29722), INT16_C( 18166), -INT16_C( 30732), INT16_C( 13895), INT16_C( 31708), INT16_C( 9884), -INT16_C( 23246), -INT16_C( 6375), -INT16_C( 2949), -INT16_C( 23476), INT16_C( 17204), -INT16_C( 3414), INT16_C( 24471), INT16_C( 3990) }, { -INT16_C( 10275), -INT16_C( 1458), INT16_C( 15258), -INT16_C( 19521), -INT16_C( 20228), INT16_C( 401), INT16_C( 21840), -INT16_C( 6610), INT16_C( 2517), INT16_C( 32242), -INT16_C( 19584), INT16_C( 10875), INT16_C( 16478), INT16_C( 709), -INT16_C( 26535), -INT16_C( 23371), INT16_C( 30063), INT16_C( 17100), -INT16_C( 29722), -INT16_C( 21255), -INT16_C( 30732), -INT16_C( 8790), INT16_C( 16144), -INT16_C( 6773), INT16_C( 32073), -INT16_C( 23678), -INT16_C( 19780), -INT16_C( 23476), INT16_C( 17204), INT16_C( 19229), INT16_C( 24471), INT16_C( 11393) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi16(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_mask_min_epi16(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_min_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask32 k; const int16_t a[32]; const int16_t b[32]; const int16_t r[32]; } test_vec[] = { { UINT32_C(2426184294), { INT16_C( 31879), INT16_C( 10074), INT16_C( 3945), INT16_C( 30364), INT16_C( 4967), -INT16_C( 14898), -INT16_C( 23290), -INT16_C( 27628), -INT16_C( 28502), INT16_C( 18211), INT16_C( 31392), INT16_C( 1793), -INT16_C( 25315), -INT16_C( 2409), -INT16_C( 26870), -INT16_C( 28355), -INT16_C( 26861), INT16_C( 31928), INT16_C( 21670), INT16_C( 3570), -INT16_C( 16281), INT16_C( 28114), -INT16_C( 6298), INT16_C( 4098), INT16_C( 9591), INT16_C( 6231), INT16_C( 22943), -INT16_C( 17377), -INT16_C( 18698), INT16_C( 178), -INT16_C( 4275), INT16_C( 24721) }, { INT16_C( 18823), INT16_C( 11740), -INT16_C( 12387), INT16_C( 1083), INT16_C( 3471), -INT16_C( 2702), INT16_C( 29940), INT16_C( 27653), INT16_C( 23961), INT16_C( 14468), -INT16_C( 23626), -INT16_C( 21259), -INT16_C( 22695), -INT16_C( 22611), INT16_C( 16023), INT16_C( 7687), -INT16_C( 7032), INT16_C( 9547), -INT16_C( 31053), INT16_C( 16938), -INT16_C( 25452), -INT16_C( 30664), INT16_C( 15632), -INT16_C( 22028), INT16_C( 30874), INT16_C( 20705), -INT16_C( 10725), INT16_C( 30205), -INT16_C( 21890), INT16_C( 5404), INT16_C( 9192), INT16_C( 28723) }, { INT16_C( 0), INT16_C( 10074), -INT16_C( 12387), INT16_C( 0), INT16_C( 0), -INT16_C( 14898), -INT16_C( 23290), INT16_C( 0), INT16_C( 0), INT16_C( 14468), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 22611), INT16_C( 0), -INT16_C( 28355), INT16_C( 0), INT16_C( 0), -INT16_C( 31053), INT16_C( 3570), -INT16_C( 25452), INT16_C( 0), INT16_C( 0), -INT16_C( 22028), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 21890), INT16_C( 0), INT16_C( 0), INT16_C( 24721) } }, { UINT32_C(3130424839), { INT16_C( 13660), INT16_C( 27681), INT16_C( 5746), INT16_C( 3349), -INT16_C( 2418), -INT16_C( 21923), INT16_C( 23245), INT16_C( 19231), INT16_C( 15108), -INT16_C( 4768), -INT16_C( 27810), INT16_C( 26205), -INT16_C( 3311), INT16_C( 5664), INT16_C( 7603), INT16_C( 4015), -INT16_C( 11950), -INT16_C( 14981), -INT16_C( 28441), INT16_C( 30162), INT16_C( 12167), INT16_C( 21535), INT16_C( 16010), -INT16_C( 29025), -INT16_C( 135), -INT16_C( 10117), -INT16_C( 9838), -INT16_C( 23746), INT16_C( 24268), -INT16_C( 32582), INT16_C( 27004), -INT16_C( 12657) }, { INT16_C( 2874), INT16_C( 8595), INT16_C( 26011), INT16_C( 8855), -INT16_C( 18795), INT16_C( 8054), INT16_C( 5621), INT16_C( 28333), INT16_C( 10516), -INT16_C( 22970), -INT16_C( 31742), -INT16_C( 12726), INT16_C( 1251), INT16_C( 24398), -INT16_C( 8595), -INT16_C( 22483), -INT16_C( 15895), -INT16_C( 31543), INT16_C( 24614), -INT16_C( 17497), INT16_C( 7447), INT16_C( 3290), -INT16_C( 30669), INT16_C( 18298), -INT16_C( 15951), -INT16_C( 19474), INT16_C( 14405), INT16_C( 10369), -INT16_C( 12228), -INT16_C( 22137), -INT16_C( 19026), -INT16_C( 26799) }, { INT16_C( 2874), INT16_C( 8595), INT16_C( 5746), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 22970), -INT16_C( 31742), -INT16_C( 12726), -INT16_C( 3311), INT16_C( 5664), -INT16_C( 8595), INT16_C( 0), INT16_C( 0), -INT16_C( 31543), -INT16_C( 28441), INT16_C( 0), INT16_C( 7447), INT16_C( 0), INT16_C( 0), -INT16_C( 29025), INT16_C( 0), -INT16_C( 19474), INT16_C( 0), -INT16_C( 23746), -INT16_C( 12228), -INT16_C( 32582), INT16_C( 0), -INT16_C( 26799) } }, { UINT32_C(2619022198), { INT16_C( 13024), INT16_C( 5022), INT16_C( 6586), INT16_C( 27482), INT16_C( 18650), INT16_C( 7966), -INT16_C( 24448), -INT16_C( 17336), -INT16_C( 12432), INT16_C( 7782), -INT16_C( 18556), -INT16_C( 1355), -INT16_C( 12078), INT16_C( 20119), -INT16_C( 4205), INT16_C( 29664), INT16_C( 32545), -INT16_C( 9082), -INT16_C( 8040), INT16_C( 29255), INT16_C( 26153), -INT16_C( 22127), -INT16_C( 9978), INT16_C( 30310), -INT16_C( 13143), INT16_C( 11668), INT16_C( 18819), INT16_C( 22056), -INT16_C( 16615), -INT16_C( 21340), -INT16_C( 31570), -INT16_C( 12513) }, { -INT16_C( 23293), -INT16_C( 25685), -INT16_C( 3194), -INT16_C( 20723), -INT16_C( 24743), INT16_C( 24408), -INT16_C( 16776), INT16_C( 8661), INT16_C( 27018), INT16_C( 3663), INT16_C( 30642), -INT16_C( 13468), INT16_C( 2102), -INT16_C( 7048), -INT16_C( 26740), -INT16_C( 28493), INT16_C( 24381), -INT16_C( 15573), INT16_C( 14674), -INT16_C( 21646), -INT16_C( 13608), INT16_C( 20490), -INT16_C( 8311), INT16_C( 4978), -INT16_C( 16056), -INT16_C( 1503), -INT16_C( 31432), INT16_C( 28357), INT16_C( 15757), INT16_C( 6738), INT16_C( 1493), INT16_C( 4778) }, { INT16_C( 0), -INT16_C( 25685), -INT16_C( 3194), INT16_C( 0), -INT16_C( 24743), INT16_C( 7966), -INT16_C( 24448), INT16_C( 0), -INT16_C( 12432), INT16_C( 3663), INT16_C( 0), -INT16_C( 13468), -INT16_C( 12078), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 24381), -INT16_C( 15573), INT16_C( 0), -INT16_C( 21646), -INT16_C( 13608), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 31432), INT16_C( 22056), -INT16_C( 16615), INT16_C( 0), INT16_C( 0), -INT16_C( 12513) } }, { UINT32_C(3067467108), { INT16_C( 27409), -INT16_C( 26057), -INT16_C( 22198), -INT16_C( 27986), -INT16_C( 12438), -INT16_C( 23924), INT16_C( 21077), -INT16_C( 7664), INT16_C( 25231), INT16_C( 25852), -INT16_C( 22937), -INT16_C( 13194), INT16_C( 19324), -INT16_C( 30078), -INT16_C( 7022), -INT16_C( 23439), -INT16_C( 22449), -INT16_C( 26050), -INT16_C( 5039), -INT16_C( 17620), -INT16_C( 17988), INT16_C( 4445), INT16_C( 27915), -INT16_C( 25869), -INT16_C( 3889), INT16_C( 14079), INT16_C( 30102), INT16_C( 4610), -INT16_C( 31295), INT16_C( 21405), INT16_C( 3689), -INT16_C( 18185) }, { INT16_C( 14006), INT16_C( 1874), INT16_C( 32546), -INT16_C( 8510), INT16_C( 7992), INT16_C( 17391), -INT16_C( 7284), INT16_C( 23517), -INT16_C( 9005), INT16_C( 27025), -INT16_C( 27566), INT16_C( 4988), INT16_C( 6425), -INT16_C( 32154), INT16_C( 24103), -INT16_C( 8902), -INT16_C( 29292), -INT16_C( 18716), -INT16_C( 23028), INT16_C( 17557), -INT16_C( 31547), INT16_C( 20871), INT16_C( 25703), INT16_C( 15020), INT16_C( 15681), -INT16_C( 27740), INT16_C( 8401), -INT16_C( 5466), INT16_C( 3129), INT16_C( 24684), -INT16_C( 22678), -INT16_C( 451) }, { INT16_C( 0), INT16_C( 0), -INT16_C( 22198), INT16_C( 0), INT16_C( 0), -INT16_C( 23924), -INT16_C( 7284), INT16_C( 0), -INT16_C( 9005), INT16_C( 0), -INT16_C( 27566), INT16_C( 0), INT16_C( 6425), INT16_C( 0), -INT16_C( 7022), -INT16_C( 23439), -INT16_C( 29292), INT16_C( 0), -INT16_C( 23028), INT16_C( 0), -INT16_C( 31547), INT16_C( 0), INT16_C( 25703), -INT16_C( 25869), INT16_C( 0), -INT16_C( 27740), INT16_C( 8401), INT16_C( 0), -INT16_C( 31295), INT16_C( 21405), INT16_C( 0), -INT16_C( 18185) } }, { UINT32_C(1085612340), { INT16_C( 3022), INT16_C( 14045), -INT16_C( 30353), -INT16_C( 20368), INT16_C( 5318), -INT16_C( 26557), -INT16_C( 5836), INT16_C( 28034), -INT16_C( 4106), INT16_C( 24781), INT16_C( 2710), -INT16_C( 13729), INT16_C( 5163), -INT16_C( 3574), -INT16_C( 29090), INT16_C( 11390), INT16_C( 23449), INT16_C( 2146), -INT16_C( 11292), -INT16_C( 21575), -INT16_C( 793), INT16_C( 7235), -INT16_C( 14874), -INT16_C( 9079), INT16_C( 22452), INT16_C( 19004), -INT16_C( 25759), -INT16_C( 29420), INT16_C( 7855), INT16_C( 3455), -INT16_C( 340), INT16_C( 17722) }, { -INT16_C( 25511), INT16_C( 15950), INT16_C( 1903), INT16_C( 22505), INT16_C( 11267), -INT16_C( 5773), -INT16_C( 783), -INT16_C( 22843), INT16_C( 595), -INT16_C( 18960), INT16_C( 1437), INT16_C( 19778), -INT16_C( 16093), -INT16_C( 12198), -INT16_C( 27457), INT16_C( 6421), INT16_C( 25393), -INT16_C( 24489), INT16_C( 16490), INT16_C( 28407), INT16_C( 27244), INT16_C( 23895), INT16_C( 7527), -INT16_C( 17917), -INT16_C( 3041), -INT16_C( 17297), -INT16_C( 19975), INT16_C( 7177), INT16_C( 25715), INT16_C( 13036), INT16_C( 760), INT16_C( 10571) }, { INT16_C( 0), INT16_C( 0), -INT16_C( 30353), INT16_C( 0), INT16_C( 5318), -INT16_C( 26557), INT16_C( 0), INT16_C( 0), -INT16_C( 4106), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 12198), INT16_C( 0), INT16_C( 0), INT16_C( 23449), INT16_C( 0), -INT16_C( 11292), INT16_C( 0), -INT16_C( 793), INT16_C( 7235), INT16_C( 0), -INT16_C( 17917), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 340), INT16_C( 0) } }, { UINT32_C(3502940773), { -INT16_C( 27348), -INT16_C( 27732), -INT16_C( 20558), -INT16_C( 11955), -INT16_C( 16989), -INT16_C( 25458), -INT16_C( 26770), -INT16_C( 7751), -INT16_C( 23045), -INT16_C( 3052), INT16_C( 24487), INT16_C( 3357), -INT16_C( 6398), -INT16_C( 6947), INT16_C( 7081), -INT16_C( 10957), -INT16_C( 8272), INT16_C( 25448), -INT16_C( 19058), INT16_C( 12852), -INT16_C( 15758), -INT16_C( 7730), -INT16_C( 30886), INT16_C( 21954), -INT16_C( 10707), -INT16_C( 11191), INT16_C( 26422), INT16_C( 14561), -INT16_C( 16818), -INT16_C( 2276), INT16_C( 20441), -INT16_C( 30004) }, { INT16_C( 13358), -INT16_C( 16915), INT16_C( 8682), INT16_C( 23791), -INT16_C( 16924), INT16_C( 15933), INT16_C( 69), INT16_C( 29331), -INT16_C( 8746), INT16_C( 3142), INT16_C( 10308), -INT16_C( 28092), INT16_C( 25062), -INT16_C( 16246), INT16_C( 22192), -INT16_C( 8374), INT16_C( 14219), INT16_C( 30108), -INT16_C( 29864), INT16_C( 15569), INT16_C( 3912), -INT16_C( 29318), INT16_C( 3599), -INT16_C( 6657), INT16_C( 18155), INT16_C( 12274), INT16_C( 13934), INT16_C( 21697), INT16_C( 19351), INT16_C( 18452), INT16_C( 24226), INT16_C( 11559) }, { -INT16_C( 27348), INT16_C( 0), -INT16_C( 20558), INT16_C( 0), INT16_C( 0), -INT16_C( 25458), -INT16_C( 26770), INT16_C( 0), INT16_C( 0), -INT16_C( 3052), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 16246), INT16_C( 0), -INT16_C( 10957), INT16_C( 0), INT16_C( 25448), INT16_C( 0), INT16_C( 12852), INT16_C( 0), INT16_C( 0), -INT16_C( 30886), -INT16_C( 6657), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 16818), INT16_C( 0), INT16_C( 20441), -INT16_C( 30004) } }, { UINT32_C(4003644309), { -INT16_C( 23166), -INT16_C( 28380), INT16_C( 9139), -INT16_C( 24969), INT16_C( 26985), -INT16_C( 10291), -INT16_C( 29025), INT16_C( 14124), INT16_C( 16602), INT16_C( 31871), -INT16_C( 22881), INT16_C( 13481), INT16_C( 19305), -INT16_C( 18654), INT16_C( 19902), INT16_C( 16717), INT16_C( 29170), -INT16_C( 23086), INT16_C( 18837), -INT16_C( 445), INT16_C( 4274), INT16_C( 21206), INT16_C( 670), INT16_C( 30857), INT16_C( 2114), -INT16_C( 7692), -INT16_C( 25170), INT16_C( 5910), INT16_C( 14568), -INT16_C( 22578), INT16_C( 7045), INT16_C( 30696) }, { -INT16_C( 17779), INT16_C( 8732), INT16_C( 24324), -INT16_C( 18912), -INT16_C( 2449), INT16_C( 3592), -INT16_C( 28168), INT16_C( 15238), INT16_C( 31641), INT16_C( 18204), INT16_C( 12824), INT16_C( 350), INT16_C( 11371), -INT16_C( 3928), -INT16_C( 28600), -INT16_C( 10904), -INT16_C( 31670), INT16_C( 20215), INT16_C( 6116), INT16_C( 21253), INT16_C( 3342), INT16_C( 1633), -INT16_C( 5985), INT16_C( 14401), INT16_C( 24163), INT16_C( 31616), -INT16_C( 8560), -INT16_C( 1156), INT16_C( 9227), INT16_C( 21484), INT16_C( 21684), -INT16_C( 216) }, { -INT16_C( 23166), INT16_C( 0), INT16_C( 9139), INT16_C( 0), -INT16_C( 2449), INT16_C( 0), INT16_C( 0), INT16_C( 14124), INT16_C( 16602), INT16_C( 18204), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 28600), -INT16_C( 10904), INT16_C( 0), -INT16_C( 23086), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 1633), INT16_C( 0), INT16_C( 14401), INT16_C( 0), -INT16_C( 7692), -INT16_C( 25170), -INT16_C( 1156), INT16_C( 0), -INT16_C( 22578), INT16_C( 7045), -INT16_C( 216) } }, { UINT32_C(3159171032), { INT16_C( 29024), -INT16_C( 181), -INT16_C( 29607), -INT16_C( 17353), -INT16_C( 18454), INT16_C( 31544), -INT16_C( 19306), -INT16_C( 24202), INT16_C( 25305), -INT16_C( 29196), INT16_C( 7350), -INT16_C( 28788), -INT16_C( 9669), INT16_C( 29003), INT16_C( 23340), -INT16_C( 29514), INT16_C( 461), INT16_C( 9867), -INT16_C( 15475), INT16_C( 30947), INT16_C( 7034), INT16_C( 4339), INT16_C( 27087), -INT16_C( 22351), -INT16_C( 23092), -INT16_C( 32202), -INT16_C( 15679), -INT16_C( 1007), INT16_C( 23964), -INT16_C( 13970), INT16_C( 9400), -INT16_C( 31403) }, { -INT16_C( 7899), -INT16_C( 19796), -INT16_C( 28764), INT16_C( 7722), INT16_C( 7594), INT16_C( 31023), -INT16_C( 8057), INT16_C( 21282), INT16_C( 22662), INT16_C( 18389), -INT16_C( 6374), -INT16_C( 18620), -INT16_C( 19900), -INT16_C( 896), -INT16_C( 10794), -INT16_C( 1150), INT16_C( 11958), INT16_C( 23213), -INT16_C( 10051), INT16_C( 26489), -INT16_C( 22283), INT16_C( 31968), INT16_C( 648), INT16_C( 3791), -INT16_C( 23206), INT16_C( 30038), -INT16_C( 25972), -INT16_C( 12244), -INT16_C( 21428), INT16_C( 8908), INT16_C( 20097), INT16_C( 14365) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 17353), -INT16_C( 18454), INT16_C( 0), -INT16_C( 19306), -INT16_C( 24202), INT16_C( 22662), -INT16_C( 29196), -INT16_C( 6374), -INT16_C( 28788), -INT16_C( 19900), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 461), INT16_C( 0), -INT16_C( 15475), INT16_C( 26489), INT16_C( 0), INT16_C( 0), INT16_C( 648), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 25972), -INT16_C( 12244), -INT16_C( 21428), -INT16_C( 13970), INT16_C( 0), -INT16_C( 31403) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_maskz_min_epi16(test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_min_epu16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const uint16_t a[32]; const uint16_t b[32]; const uint16_t r[32]; } test_vec[] = { { { UINT16_C( 1431), UINT16_C(60088), UINT16_C(59670), UINT16_C(46197), UINT16_C(28207), UINT16_C(42628), UINT16_C(38831), UINT16_C( 7804), UINT16_C(27384), UINT16_C(63519), UINT16_C(45488), UINT16_C(32855), UINT16_C(60315), UINT16_C(36942), UINT16_C(33367), UINT16_C(61079), UINT16_C(20616), UINT16_C(40664), UINT16_C(19769), UINT16_C(26706), UINT16_C(54972), UINT16_C(27406), UINT16_C(35693), UINT16_C(25993), UINT16_C(43253), UINT16_C(42333), UINT16_C(46425), UINT16_C(62502), UINT16_C(29856), UINT16_C(63365), UINT16_C( 7415), UINT16_C(32741) }, { UINT16_C(48748), UINT16_C(42525), UINT16_C(28427), UINT16_C(50958), UINT16_C( 7494), UINT16_C(45875), UINT16_C(48296), UINT16_C(40217), UINT16_C(30309), UINT16_C(48706), UINT16_C(26667), UINT16_C(52403), UINT16_C(14557), UINT16_C(54467), UINT16_C(43348), UINT16_C(49491), UINT16_C(28775), UINT16_C(29287), UINT16_C(30175), UINT16_C( 9530), UINT16_C(28050), UINT16_C(15065), UINT16_C(61993), UINT16_C(36567), UINT16_C( 6760), UINT16_C(37965), UINT16_C( 130), UINT16_C(24416), UINT16_C( 9016), UINT16_C(35891), UINT16_C(34508), UINT16_C(13133) }, { UINT16_C( 1431), UINT16_C(42525), UINT16_C(28427), UINT16_C(46197), UINT16_C( 7494), UINT16_C(42628), UINT16_C(38831), UINT16_C( 7804), UINT16_C(27384), UINT16_C(48706), UINT16_C(26667), UINT16_C(32855), UINT16_C(14557), UINT16_C(36942), UINT16_C(33367), UINT16_C(49491), UINT16_C(20616), UINT16_C(29287), UINT16_C(19769), UINT16_C( 9530), UINT16_C(28050), UINT16_C(15065), UINT16_C(35693), UINT16_C(25993), UINT16_C( 6760), UINT16_C(37965), UINT16_C( 130), UINT16_C(24416), UINT16_C( 9016), UINT16_C(35891), UINT16_C( 7415), UINT16_C(13133) } }, { { UINT16_C(46326), UINT16_C(54950), UINT16_C(57386), UINT16_C(48379), UINT16_C(54349), UINT16_C(30455), UINT16_C(52934), UINT16_C(12037), UINT16_C(21224), UINT16_C(27587), UINT16_C( 9042), UINT16_C(35530), UINT16_C(65094), UINT16_C( 4886), UINT16_C(25732), UINT16_C(31558), UINT16_C(60440), UINT16_C(16977), UINT16_C(19660), UINT16_C( 6655), UINT16_C(63009), UINT16_C(59280), UINT16_C(38340), UINT16_C(44310), UINT16_C(55783), UINT16_C(14616), UINT16_C(58108), UINT16_C(17347), UINT16_C(55776), UINT16_C(25942), UINT16_C(39997), UINT16_C(22240) }, { UINT16_C(12681), UINT16_C(21912), UINT16_C(38781), UINT16_C(40559), UINT16_C(65421), UINT16_C(21126), UINT16_C(40084), UINT16_C(31743), UINT16_C( 6006), UINT16_C(29364), UINT16_C(30713), UINT16_C(55989), UINT16_C( 2896), UINT16_C(36415), UINT16_C( 8104), UINT16_C(12772), UINT16_C(31824), UINT16_C(52614), UINT16_C(62740), UINT16_C(41324), UINT16_C(62196), UINT16_C(35059), UINT16_C(62094), UINT16_C( 1027), UINT16_C(46857), UINT16_C( 887), UINT16_C(11310), UINT16_C(32733), UINT16_C( 7224), UINT16_C(57357), UINT16_C(61755), UINT16_C(35601) }, { UINT16_C(12681), UINT16_C(21912), UINT16_C(38781), UINT16_C(40559), UINT16_C(54349), UINT16_C(21126), UINT16_C(40084), UINT16_C(12037), UINT16_C( 6006), UINT16_C(27587), UINT16_C( 9042), UINT16_C(35530), UINT16_C( 2896), UINT16_C( 4886), UINT16_C( 8104), UINT16_C(12772), UINT16_C(31824), UINT16_C(16977), UINT16_C(19660), UINT16_C( 6655), UINT16_C(62196), UINT16_C(35059), UINT16_C(38340), UINT16_C( 1027), UINT16_C(46857), UINT16_C( 887), UINT16_C(11310), UINT16_C(17347), UINT16_C( 7224), UINT16_C(25942), UINT16_C(39997), UINT16_C(22240) } }, { { UINT16_C(38765), UINT16_C(33112), UINT16_C(50317), UINT16_C(33059), UINT16_C( 5814), UINT16_C(17674), UINT16_C( 3337), UINT16_C( 4681), UINT16_C(49349), UINT16_C(62229), UINT16_C(62189), UINT16_C( 9586), UINT16_C(32526), UINT16_C(18693), UINT16_C( 5744), UINT16_C(57044), UINT16_C(11693), UINT16_C(14943), UINT16_C(33521), UINT16_C(43196), UINT16_C(50841), UINT16_C(41709), UINT16_C(14035), UINT16_C(39092), UINT16_C(51959), UINT16_C(58508), UINT16_C(65212), UINT16_C(51977), UINT16_C( 3710), UINT16_C(60948), UINT16_C(59684), UINT16_C(53708) }, { UINT16_C(11286), UINT16_C( 1804), UINT16_C(51374), UINT16_C(18351), UINT16_C(40078), UINT16_C(25065), UINT16_C(40659), UINT16_C(51962), UINT16_C(34408), UINT16_C( 9390), UINT16_C(46980), UINT16_C( 751), UINT16_C( 1221), UINT16_C(59889), UINT16_C(48621), UINT16_C( 954), UINT16_C(50921), UINT16_C(38922), UINT16_C(47758), UINT16_C( 7391), UINT16_C(51542), UINT16_C(10622), UINT16_C(30823), UINT16_C(53235), UINT16_C(41470), UINT16_C(33523), UINT16_C(58200), UINT16_C( 7557), UINT16_C(30439), UINT16_C(54278), UINT16_C(49459), UINT16_C( 7639) }, { UINT16_C(11286), UINT16_C( 1804), UINT16_C(50317), UINT16_C(18351), UINT16_C( 5814), UINT16_C(17674), UINT16_C( 3337), UINT16_C( 4681), UINT16_C(34408), UINT16_C( 9390), UINT16_C(46980), UINT16_C( 751), UINT16_C( 1221), UINT16_C(18693), UINT16_C( 5744), UINT16_C( 954), UINT16_C(11693), UINT16_C(14943), UINT16_C(33521), UINT16_C( 7391), UINT16_C(50841), UINT16_C(10622), UINT16_C(14035), UINT16_C(39092), UINT16_C(41470), UINT16_C(33523), UINT16_C(58200), UINT16_C( 7557), UINT16_C( 3710), UINT16_C(54278), UINT16_C(49459), UINT16_C( 7639) } }, { { UINT16_C(57735), UINT16_C( 5813), UINT16_C(38043), UINT16_C(62002), UINT16_C(45149), UINT16_C(50203), UINT16_C( 3880), UINT16_C( 9875), UINT16_C(34736), UINT16_C( 2473), UINT16_C(11882), UINT16_C(20774), UINT16_C(11684), UINT16_C(55077), UINT16_C(64750), UINT16_C(30196), UINT16_C(43485), UINT16_C(31115), UINT16_C(48702), UINT16_C(39787), UINT16_C(34414), UINT16_C(38752), UINT16_C(62357), UINT16_C(18109), UINT16_C(26234), UINT16_C(58447), UINT16_C(30100), UINT16_C(14389), UINT16_C(23202), UINT16_C(36880), UINT16_C( 1110), UINT16_C(13318) }, { UINT16_C(37294), UINT16_C(60589), UINT16_C( 6223), UINT16_C(48775), UINT16_C(59294), UINT16_C(13397), UINT16_C( 4827), UINT16_C(21882), UINT16_C(51577), UINT16_C( 3386), UINT16_C(28478), UINT16_C(57670), UINT16_C(22218), UINT16_C( 8305), UINT16_C(30554), UINT16_C( 2132), UINT16_C( 265), UINT16_C(22772), UINT16_C(31769), UINT16_C(47126), UINT16_C(27491), UINT16_C(16108), UINT16_C(26238), UINT16_C(63380), UINT16_C(52783), UINT16_C(27908), UINT16_C(19005), UINT16_C( 1870), UINT16_C(49312), UINT16_C(64296), UINT16_C(31799), UINT16_C(16387) }, { UINT16_C(37294), UINT16_C( 5813), UINT16_C( 6223), UINT16_C(48775), UINT16_C(45149), UINT16_C(13397), UINT16_C( 3880), UINT16_C( 9875), UINT16_C(34736), UINT16_C( 2473), UINT16_C(11882), UINT16_C(20774), UINT16_C(11684), UINT16_C( 8305), UINT16_C(30554), UINT16_C( 2132), UINT16_C( 265), UINT16_C(22772), UINT16_C(31769), UINT16_C(39787), UINT16_C(27491), UINT16_C(16108), UINT16_C(26238), UINT16_C(18109), UINT16_C(26234), UINT16_C(27908), UINT16_C(19005), UINT16_C( 1870), UINT16_C(23202), UINT16_C(36880), UINT16_C( 1110), UINT16_C(13318) } }, { { UINT16_C(63614), UINT16_C(38809), UINT16_C(44916), UINT16_C(55119), UINT16_C(15131), UINT16_C(39190), UINT16_C(43681), UINT16_C(53392), UINT16_C(38008), UINT16_C(46398), UINT16_C(36063), UINT16_C(32701), UINT16_C(58700), UINT16_C(33914), UINT16_C(32353), UINT16_C(57284), UINT16_C(23926), UINT16_C(60023), UINT16_C(50701), UINT16_C(10433), UINT16_C(55042), UINT16_C(41921), UINT16_C(20865), UINT16_C(63860), UINT16_C(45797), UINT16_C(50351), UINT16_C(27710), UINT16_C(35652), UINT16_C(48721), UINT16_C(45583), UINT16_C(54076), UINT16_C(45714) }, { UINT16_C( 2353), UINT16_C(16028), UINT16_C(24271), UINT16_C(53606), UINT16_C(10037), UINT16_C(46965), UINT16_C(59768), UINT16_C(23984), UINT16_C(24475), UINT16_C(55586), UINT16_C(26315), UINT16_C( 7268), UINT16_C(29476), UINT16_C(25039), UINT16_C(24903), UINT16_C(30739), UINT16_C(45162), UINT16_C(14774), UINT16_C( 7182), UINT16_C(17163), UINT16_C(32835), UINT16_C(48122), UINT16_C(43881), UINT16_C( 1048), UINT16_C(14858), UINT16_C(55005), UINT16_C(17056), UINT16_C(50674), UINT16_C(49589), UINT16_C(64550), UINT16_C(14626), UINT16_C(35956) }, { UINT16_C( 2353), UINT16_C(16028), UINT16_C(24271), UINT16_C(53606), UINT16_C(10037), UINT16_C(39190), UINT16_C(43681), UINT16_C(23984), UINT16_C(24475), UINT16_C(46398), UINT16_C(26315), UINT16_C( 7268), UINT16_C(29476), UINT16_C(25039), UINT16_C(24903), UINT16_C(30739), UINT16_C(23926), UINT16_C(14774), UINT16_C( 7182), UINT16_C(10433), UINT16_C(32835), UINT16_C(41921), UINT16_C(20865), UINT16_C( 1048), UINT16_C(14858), UINT16_C(50351), UINT16_C(17056), UINT16_C(35652), UINT16_C(48721), UINT16_C(45583), UINT16_C(14626), UINT16_C(35956) } }, { { UINT16_C(10985), UINT16_C(63430), UINT16_C(53574), UINT16_C(35131), UINT16_C(13649), UINT16_C(47684), UINT16_C(24032), UINT16_C(60350), UINT16_C(39831), UINT16_C(14529), UINT16_C(46045), UINT16_C(37885), UINT16_C( 9077), UINT16_C(38799), UINT16_C( 1116), UINT16_C(17956), UINT16_C(59950), UINT16_C(30013), UINT16_C(30907), UINT16_C( 3326), UINT16_C(17326), UINT16_C(36550), UINT16_C(33952), UINT16_C(14201), UINT16_C(14879), UINT16_C(64879), UINT16_C(27886), UINT16_C(25488), UINT16_C( 8079), UINT16_C(60666), UINT16_C( 7715), UINT16_C(21042) }, { UINT16_C(28424), UINT16_C(50119), UINT16_C(50664), UINT16_C(38607), UINT16_C(38152), UINT16_C(43044), UINT16_C(40473), UINT16_C(14816), UINT16_C(20440), UINT16_C(50742), UINT16_C(50876), UINT16_C(19241), UINT16_C( 9445), UINT16_C( 2359), UINT16_C(26946), UINT16_C(19291), UINT16_C( 8921), UINT16_C(49422), UINT16_C(57063), UINT16_C(61527), UINT16_C(31603), UINT16_C(36248), UINT16_C(30745), UINT16_C(62150), UINT16_C(64712), UINT16_C(33976), UINT16_C(58050), UINT16_C(42959), UINT16_C( 1798), UINT16_C(18608), UINT16_C( 2928), UINT16_C(18835) }, { UINT16_C(10985), UINT16_C(50119), UINT16_C(50664), UINT16_C(35131), UINT16_C(13649), UINT16_C(43044), UINT16_C(24032), UINT16_C(14816), UINT16_C(20440), UINT16_C(14529), UINT16_C(46045), UINT16_C(19241), UINT16_C( 9077), UINT16_C( 2359), UINT16_C( 1116), UINT16_C(17956), UINT16_C( 8921), UINT16_C(30013), UINT16_C(30907), UINT16_C( 3326), UINT16_C(17326), UINT16_C(36248), UINT16_C(30745), UINT16_C(14201), UINT16_C(14879), UINT16_C(33976), UINT16_C(27886), UINT16_C(25488), UINT16_C( 1798), UINT16_C(18608), UINT16_C( 2928), UINT16_C(18835) } }, { { UINT16_C(41517), UINT16_C( 5386), UINT16_C(24960), UINT16_C(62213), UINT16_C(40413), UINT16_C(63104), UINT16_C(17942), UINT16_C(57064), UINT16_C(41282), UINT16_C( 1122), UINT16_C(12675), UINT16_C(35244), UINT16_C(23608), UINT16_C(43473), UINT16_C(25960), UINT16_C(38386), UINT16_C(64775), UINT16_C(34730), UINT16_C(44894), UINT16_C(15226), UINT16_C(64333), UINT16_C(25394), UINT16_C( 6721), UINT16_C(33857), UINT16_C(41915), UINT16_C(16008), UINT16_C(13524), UINT16_C( 3527), UINT16_C(39313), UINT16_C(63926), UINT16_C(43262), UINT16_C( 1422) }, { UINT16_C(14757), UINT16_C( 1164), UINT16_C( 1768), UINT16_C(13631), UINT16_C(28929), UINT16_C(17304), UINT16_C(55692), UINT16_C(18375), UINT16_C(20348), UINT16_C(20870), UINT16_C(19844), UINT16_C( 5470), UINT16_C( 5350), UINT16_C(58382), UINT16_C(40124), UINT16_C(25321), UINT16_C(30165), UINT16_C(48742), UINT16_C(42364), UINT16_C(32243), UINT16_C(35863), UINT16_C(41920), UINT16_C(34661), UINT16_C(58090), UINT16_C(28887), UINT16_C(23347), UINT16_C(37310), UINT16_C(42096), UINT16_C(32421), UINT16_C(24969), UINT16_C(29210), UINT16_C(61635) }, { UINT16_C(14757), UINT16_C( 1164), UINT16_C( 1768), UINT16_C(13631), UINT16_C(28929), UINT16_C(17304), UINT16_C(17942), UINT16_C(18375), UINT16_C(20348), UINT16_C( 1122), UINT16_C(12675), UINT16_C( 5470), UINT16_C( 5350), UINT16_C(43473), UINT16_C(25960), UINT16_C(25321), UINT16_C(30165), UINT16_C(34730), UINT16_C(42364), UINT16_C(15226), UINT16_C(35863), UINT16_C(25394), UINT16_C( 6721), UINT16_C(33857), UINT16_C(28887), UINT16_C(16008), UINT16_C(13524), UINT16_C( 3527), UINT16_C(32421), UINT16_C(24969), UINT16_C(29210), UINT16_C( 1422) } }, { { UINT16_C(10728), UINT16_C(25774), UINT16_C(41423), UINT16_C(59105), UINT16_C(41517), UINT16_C(37769), UINT16_C(29481), UINT16_C( 117), UINT16_C(43236), UINT16_C(41563), UINT16_C(52025), UINT16_C(56902), UINT16_C(53065), UINT16_C(25663), UINT16_C( 834), UINT16_C(10836), UINT16_C( 556), UINT16_C(64398), UINT16_C(28579), UINT16_C(53729), UINT16_C(27153), UINT16_C(15204), UINT16_C(55774), UINT16_C(49723), UINT16_C(38785), UINT16_C(47716), UINT16_C(43618), UINT16_C(44184), UINT16_C(55162), UINT16_C(48144), UINT16_C(25818), UINT16_C( 2022) }, { UINT16_C(29798), UINT16_C( 2306), UINT16_C(58595), UINT16_C(62938), UINT16_C(15950), UINT16_C(11312), UINT16_C(27415), UINT16_C(39150), UINT16_C(20994), UINT16_C(25938), UINT16_C(60157), UINT16_C(30481), UINT16_C( 8642), UINT16_C(39987), UINT16_C( 6533), UINT16_C(60323), UINT16_C(42637), UINT16_C(28916), UINT16_C(53130), UINT16_C(55397), UINT16_C(38157), UINT16_C( 9477), UINT16_C(62209), UINT16_C( 957), UINT16_C( 4166), UINT16_C(17256), UINT16_C(31226), UINT16_C(48314), UINT16_C(60826), UINT16_C( 8025), UINT16_C(64518), UINT16_C(37642) }, { UINT16_C(10728), UINT16_C( 2306), UINT16_C(41423), UINT16_C(59105), UINT16_C(15950), UINT16_C(11312), UINT16_C(27415), UINT16_C( 117), UINT16_C(20994), UINT16_C(25938), UINT16_C(52025), UINT16_C(30481), UINT16_C( 8642), UINT16_C(25663), UINT16_C( 834), UINT16_C(10836), UINT16_C( 556), UINT16_C(28916), UINT16_C(28579), UINT16_C(53729), UINT16_C(27153), UINT16_C( 9477), UINT16_C(55774), UINT16_C( 957), UINT16_C( 4166), UINT16_C(17256), UINT16_C(31226), UINT16_C(44184), UINT16_C(55162), UINT16_C( 8025), UINT16_C(25818), UINT16_C( 2022) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_min_epu16(a, b); simde_test_x86_assert_equal_u16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_min_epu16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t src[32]; const simde__mmask32 k; const uint16_t a[32]; const uint16_t b[32]; const uint16_t r[32]; } test_vec[] = { { { -INT16_C( 3079), INT16_C( 8612), -INT16_C( 25643), INT16_C( 29615), -INT16_C( 23644), INT16_C( 18229), INT16_C( 13600), INT16_C( 16573), INT16_C( 11343), INT16_C( 21901), -INT16_C( 11893), -INT16_C( 6952), -INT16_C( 26016), INT16_C( 21084), INT16_C( 1957), -INT16_C( 25055), -INT16_C( 14854), -INT16_C( 12097), INT16_C( 28256), INT16_C( 1347), INT16_C( 30737), INT16_C( 12620), INT16_C( 2734), -INT16_C( 655), -INT16_C( 458), -INT16_C( 15790), INT16_C( 11215), INT16_C( 12454), INT16_C( 709), INT16_C( 27266), -INT16_C( 23798), INT16_C( 1032) }, UINT32_C(3386165097), { UINT16_C( 7056), UINT16_C(15993), UINT16_C(59941), UINT16_C(23355), UINT16_C(36585), UINT16_C(47133), UINT16_C(50361), UINT16_C(32488), UINT16_C(27590), UINT16_C(53481), UINT16_C(61710), UINT16_C(30677), UINT16_C(43449), UINT16_C(61249), UINT16_C( 4033), UINT16_C(21046), UINT16_C(44842), UINT16_C(20368), UINT16_C(52378), UINT16_C(33707), UINT16_C(51290), UINT16_C( 4923), UINT16_C( 9356), UINT16_C(21393), UINT16_C(31375), UINT16_C(40227), UINT16_C(63596), UINT16_C( 9493), UINT16_C(22178), UINT16_C(25364), UINT16_C(19045), UINT16_C(37045) }, { UINT16_C(18170), UINT16_C(38111), UINT16_C(35346), UINT16_C(27671), UINT16_C(21075), UINT16_C(57215), UINT16_C( 4214), UINT16_C( 1330), UINT16_C(22155), UINT16_C(63395), UINT16_C(47182), UINT16_C(61468), UINT16_C(12302), UINT16_C(29524), UINT16_C( 2426), UINT16_C(29699), UINT16_C(58191), UINT16_C(24840), UINT16_C( 8045), UINT16_C(49357), UINT16_C(19570), UINT16_C(59552), UINT16_C(53853), UINT16_C(59630), UINT16_C(37160), UINT16_C(30687), UINT16_C(64329), UINT16_C(22375), UINT16_C(47915), UINT16_C(42442), UINT16_C(52933), UINT16_C( 5146) }, { UINT16_C( 7056), UINT16_C( 8612), UINT16_C(39893), UINT16_C(23355), UINT16_C(41892), UINT16_C(47133), UINT16_C( 4214), UINT16_C(16573), UINT16_C(22155), UINT16_C(53481), UINT16_C(47182), UINT16_C(58584), UINT16_C(39520), UINT16_C(21084), UINT16_C( 2426), UINT16_C(21046), UINT16_C(50682), UINT16_C(53439), UINT16_C( 8045), UINT16_C( 1347), UINT16_C(19570), UINT16_C(12620), UINT16_C( 9356), UINT16_C(21393), UINT16_C(31375), UINT16_C(49746), UINT16_C(11215), UINT16_C( 9493), UINT16_C( 709), UINT16_C(27266), UINT16_C(19045), UINT16_C( 5146) } }, { { INT16_C( 8881), INT16_C( 7798), INT16_C( 17218), -INT16_C( 19233), INT16_C( 32656), -INT16_C( 4708), -INT16_C( 30127), INT16_C( 31445), -INT16_C( 19429), INT16_C( 25841), INT16_C( 22703), -INT16_C( 9541), -INT16_C( 31212), -INT16_C( 9857), -INT16_C( 26284), INT16_C( 1517), INT16_C( 25532), -INT16_C( 477), INT16_C( 679), INT16_C( 14258), INT16_C( 20097), -INT16_C( 11484), -INT16_C( 1575), -INT16_C( 2995), INT16_C( 16045), INT16_C( 23641), INT16_C( 5270), -INT16_C( 21962), -INT16_C( 19046), -INT16_C( 4477), INT16_C( 29007), INT16_C( 3059) }, UINT32_C(2064193492), { UINT16_C(54793), UINT16_C(57966), UINT16_C(48079), UINT16_C(31959), UINT16_C(12537), UINT16_C(36824), UINT16_C( 3652), UINT16_C(57146), UINT16_C(48580), UINT16_C( 5069), UINT16_C(49454), UINT16_C( 798), UINT16_C(10200), UINT16_C(61822), UINT16_C(12770), UINT16_C(60300), UINT16_C(64007), UINT16_C(55246), UINT16_C(42421), UINT16_C(44627), UINT16_C(11477), UINT16_C( 6462), UINT16_C(30778), UINT16_C(65272), UINT16_C(50741), UINT16_C(25617), UINT16_C(12167), UINT16_C(24423), UINT16_C(58710), UINT16_C(14416), UINT16_C(56598), UINT16_C( 7716) }, { UINT16_C(62167), UINT16_C(36341), UINT16_C(18583), UINT16_C(27707), UINT16_C(31092), UINT16_C(44933), UINT16_C(32497), UINT16_C(10157), UINT16_C(48964), UINT16_C(52107), UINT16_C(62190), UINT16_C(17706), UINT16_C(31447), UINT16_C(61053), UINT16_C(41303), UINT16_C(12044), UINT16_C( 403), UINT16_C(10940), UINT16_C(63305), UINT16_C(48790), UINT16_C( 7281), UINT16_C(25197), UINT16_C( 6810), UINT16_C(56969), UINT16_C( 5337), UINT16_C(51369), UINT16_C(54022), UINT16_C(56845), UINT16_C(35405), UINT16_C(42444), UINT16_C(55340), UINT16_C(49108) }, { UINT16_C( 8881), UINT16_C( 7798), UINT16_C(18583), UINT16_C(46303), UINT16_C(12537), UINT16_C(60828), UINT16_C( 3652), UINT16_C(10157), UINT16_C(48580), UINT16_C( 5069), UINT16_C(49454), UINT16_C(55995), UINT16_C(10200), UINT16_C(55679), UINT16_C(39252), UINT16_C( 1517), UINT16_C( 403), UINT16_C(65059), UINT16_C( 679), UINT16_C(44627), UINT16_C(20097), UINT16_C(54052), UINT16_C(63961), UINT16_C(62541), UINT16_C( 5337), UINT16_C(25617), UINT16_C( 5270), UINT16_C(24423), UINT16_C(35405), UINT16_C(14416), UINT16_C(55340), UINT16_C( 3059) } }, { { -INT16_C( 28455), INT16_C( 8938), -INT16_C( 32633), -INT16_C( 1824), INT16_C( 19868), INT16_C( 13915), -INT16_C( 7064), INT16_C( 16660), -INT16_C( 16903), -INT16_C( 247), INT16_C( 5776), -INT16_C( 8483), -INT16_C( 22111), -INT16_C( 12925), INT16_C( 22401), INT16_C( 23180), INT16_C( 30439), INT16_C( 28285), INT16_C( 24055), -INT16_C( 27801), -INT16_C( 15701), INT16_C( 5066), -INT16_C( 8538), -INT16_C( 24748), INT16_C( 24220), INT16_C( 11423), INT16_C( 31860), INT16_C( 5386), -INT16_C( 29402), -INT16_C( 22558), INT16_C( 28644), -INT16_C( 13566) }, UINT32_C(3694821349), { UINT16_C(14947), UINT16_C( 2458), UINT16_C(61208), UINT16_C(46249), UINT16_C(18509), UINT16_C(49633), UINT16_C(60356), UINT16_C(60119), UINT16_C(47481), UINT16_C(23954), UINT16_C(37928), UINT16_C( 3625), UINT16_C(25363), UINT16_C(61418), UINT16_C(23044), UINT16_C(26487), UINT16_C( 4500), UINT16_C(44400), UINT16_C( 6400), UINT16_C(19809), UINT16_C(16993), UINT16_C( 9743), UINT16_C(58926), UINT16_C(42768), UINT16_C(41631), UINT16_C(51204), UINT16_C(11574), UINT16_C(18902), UINT16_C(49296), UINT16_C(37945), UINT16_C(45083), UINT16_C(45051) }, { UINT16_C(27841), UINT16_C(49756), UINT16_C(48773), UINT16_C(59151), UINT16_C( 7680), UINT16_C(11789), UINT16_C( 7428), UINT16_C(42197), UINT16_C(56000), UINT16_C(63084), UINT16_C(16903), UINT16_C(38976), UINT16_C(30978), UINT16_C( 7468), UINT16_C(10281), UINT16_C(60109), UINT16_C(10644), UINT16_C( 6572), UINT16_C(48359), UINT16_C(59392), UINT16_C( 3546), UINT16_C(57110), UINT16_C(60459), UINT16_C(60291), UINT16_C(61382), UINT16_C(52705), UINT16_C( 8497), UINT16_C(13157), UINT16_C(37530), UINT16_C(50001), UINT16_C( 7866), UINT16_C(20142) }, { UINT16_C(14947), UINT16_C( 8938), UINT16_C(48773), UINT16_C(63712), UINT16_C(19868), UINT16_C(11789), UINT16_C( 7428), UINT16_C(42197), UINT16_C(47481), UINT16_C(23954), UINT16_C(16903), UINT16_C( 3625), UINT16_C(25363), UINT16_C( 7468), UINT16_C(10281), UINT16_C(23180), UINT16_C(30439), UINT16_C( 6572), UINT16_C(24055), UINT16_C(19809), UINT16_C( 3546), UINT16_C( 9743), UINT16_C(56998), UINT16_C(40788), UINT16_C(24220), UINT16_C(11423), UINT16_C( 8497), UINT16_C(13157), UINT16_C(37530), UINT16_C(42978), UINT16_C( 7866), UINT16_C(20142) } }, { { INT16_C( 23111), INT16_C( 12135), INT16_C( 26646), -INT16_C( 3817), INT16_C( 11637), -INT16_C( 24368), INT16_C( 21273), -INT16_C( 8309), INT16_C( 27970), INT16_C( 29613), INT16_C( 4750), INT16_C( 10662), -INT16_C( 2140), INT16_C( 24300), -INT16_C( 26091), INT16_C( 23980), INT16_C( 5365), INT16_C( 2956), -INT16_C( 23684), -INT16_C( 3588), -INT16_C( 13104), -INT16_C( 5486), INT16_C( 7455), INT16_C( 25033), INT16_C( 30346), INT16_C( 6612), INT16_C( 31625), INT16_C( 11586), INT16_C( 11890), -INT16_C( 30580), INT16_C( 14537), -INT16_C( 16667) }, UINT32_C(3368644940), { UINT16_C(19602), UINT16_C(45774), UINT16_C(39017), UINT16_C(62483), UINT16_C(59406), UINT16_C(38669), UINT16_C(20323), UINT16_C(54725), UINT16_C(20861), UINT16_C(18013), UINT16_C(17033), UINT16_C(54788), UINT16_C(52915), UINT16_C(51102), UINT16_C(22676), UINT16_C( 9900), UINT16_C(31396), UINT16_C( 3800), UINT16_C(60434), UINT16_C( 8450), UINT16_C( 4052), UINT16_C(14264), UINT16_C(32094), UINT16_C(56076), UINT16_C(27342), UINT16_C(22562), UINT16_C( 9900), UINT16_C(24622), UINT16_C(52468), UINT16_C(34855), UINT16_C(54053), UINT16_C(51631) }, { UINT16_C(34638), UINT16_C(24791), UINT16_C(55667), UINT16_C(18305), UINT16_C(15080), UINT16_C(18046), UINT16_C(35767), UINT16_C(34338), UINT16_C(17653), UINT16_C(41438), UINT16_C( 3178), UINT16_C(24321), UINT16_C(10712), UINT16_C(64999), UINT16_C(38652), UINT16_C(19143), UINT16_C(40478), UINT16_C(37291), UINT16_C(11384), UINT16_C(24793), UINT16_C(22374), UINT16_C( 7847), UINT16_C(51682), UINT16_C(55204), UINT16_C(33293), UINT16_C(30585), UINT16_C(31374), UINT16_C(26326), UINT16_C(48803), UINT16_C(41060), UINT16_C(11092), UINT16_C(29418) }, { UINT16_C(23111), UINT16_C(12135), UINT16_C(39017), UINT16_C(18305), UINT16_C(11637), UINT16_C(41168), UINT16_C(20323), UINT16_C(57227), UINT16_C(17653), UINT16_C(29613), UINT16_C( 4750), UINT16_C(10662), UINT16_C(10712), UINT16_C(51102), UINT16_C(22676), UINT16_C(23980), UINT16_C(31396), UINT16_C( 2956), UINT16_C(41852), UINT16_C( 8450), UINT16_C(52432), UINT16_C(60050), UINT16_C(32094), UINT16_C(55204), UINT16_C(30346), UINT16_C( 6612), UINT16_C(31625), UINT16_C(24622), UINT16_C(11890), UINT16_C(34956), UINT16_C(11092), UINT16_C(29418) } }, { { -INT16_C( 27191), INT16_C( 16644), -INT16_C( 8766), INT16_C( 10402), INT16_C( 18740), INT16_C( 5958), -INT16_C( 5614), INT16_C( 8174), INT16_C( 26476), -INT16_C( 1386), INT16_C( 28130), -INT16_C( 31391), -INT16_C( 15061), INT16_C( 32549), INT16_C( 4336), -INT16_C( 17934), -INT16_C( 2395), INT16_C( 26619), -INT16_C( 25133), INT16_C( 1936), -INT16_C( 10522), -INT16_C( 2018), INT16_C( 3521), INT16_C( 11543), -INT16_C( 21132), INT16_C( 22056), -INT16_C( 30438), INT16_C( 17884), INT16_C( 334), INT16_C( 16069), -INT16_C( 18671), -INT16_C( 18441) }, UINT32_C(2149511853), { UINT16_C(42629), UINT16_C(18029), UINT16_C(33971), UINT16_C(10099), UINT16_C(39730), UINT16_C(19582), UINT16_C(23076), UINT16_C(29330), UINT16_C(22363), UINT16_C(28080), UINT16_C(43022), UINT16_C(47908), UINT16_C(17050), UINT16_C(10811), UINT16_C(49905), UINT16_C(30367), UINT16_C( 3432), UINT16_C( 7100), UINT16_C(12177), UINT16_C(49987), UINT16_C(49611), UINT16_C(61200), UINT16_C(41499), UINT16_C(30306), UINT16_C( 4857), UINT16_C( 2019), UINT16_C( 1978), UINT16_C(21954), UINT16_C(64842), UINT16_C(15231), UINT16_C( 7871), UINT16_C(10417) }, { UINT16_C(27947), UINT16_C(48451), UINT16_C(34460), UINT16_C(26496), UINT16_C(36935), UINT16_C(25175), UINT16_C(47410), UINT16_C(11225), UINT16_C(48331), UINT16_C(34354), UINT16_C(62660), UINT16_C( 3803), UINT16_C(23281), UINT16_C(45385), UINT16_C(64120), UINT16_C(42201), UINT16_C( 7271), UINT16_C( 865), UINT16_C(57763), UINT16_C(60011), UINT16_C(49778), UINT16_C(42061), UINT16_C( 9851), UINT16_C(18128), UINT16_C( 738), UINT16_C(42700), UINT16_C(42999), UINT16_C(59572), UINT16_C(64769), UINT16_C(31385), UINT16_C(29431), UINT16_C(24094) }, { UINT16_C(27947), UINT16_C(16644), UINT16_C(33971), UINT16_C(10099), UINT16_C(18740), UINT16_C(19582), UINT16_C(59922), UINT16_C(11225), UINT16_C(26476), UINT16_C(28080), UINT16_C(28130), UINT16_C(34145), UINT16_C(17050), UINT16_C(10811), UINT16_C(49905), UINT16_C(30367), UINT16_C(63141), UINT16_C( 865), UINT16_C(12177), UINT16_C(49987), UINT16_C(49611), UINT16_C(63518), UINT16_C( 3521), UINT16_C(11543), UINT16_C(44404), UINT16_C(22056), UINT16_C(35098), UINT16_C(17884), UINT16_C( 334), UINT16_C(16069), UINT16_C(46865), UINT16_C(10417) } }, { { INT16_C( 32655), INT16_C( 12898), -INT16_C( 12960), -INT16_C( 11748), INT16_C( 27023), INT16_C( 2679), INT16_C( 18319), INT16_C( 29264), INT16_C( 7497), INT16_C( 16408), -INT16_C( 12860), -INT16_C( 14807), -INT16_C( 15670), -INT16_C( 15808), INT16_C( 24117), -INT16_C( 15328), -INT16_C( 32035), INT16_C( 15862), INT16_C( 4687), -INT16_C( 8688), -INT16_C( 30852), INT16_C( 3048), INT16_C( 14798), INT16_C( 6013), -INT16_C( 27050), INT16_C( 6744), -INT16_C( 32413), INT16_C( 11744), INT16_C( 8259), INT16_C( 30959), INT16_C( 4222), INT16_C( 23356) }, UINT32_C(3801690770), { UINT16_C(43312), UINT16_C(65228), UINT16_C(19170), UINT16_C(14357), UINT16_C(28128), UINT16_C(17234), UINT16_C(13294), UINT16_C(12912), UINT16_C(24659), UINT16_C(53930), UINT16_C(59248), UINT16_C( 557), UINT16_C(50713), UINT16_C(24292), UINT16_C(42351), UINT16_C(40735), UINT16_C(60494), UINT16_C(12445), UINT16_C(45878), UINT16_C( 5736), UINT16_C(47648), UINT16_C( 3929), UINT16_C(51693), UINT16_C(16705), UINT16_C(60201), UINT16_C(39187), UINT16_C(16594), UINT16_C(60572), UINT16_C(32775), UINT16_C(30282), UINT16_C(27173), UINT16_C(29462) }, { UINT16_C(45910), UINT16_C(36003), UINT16_C( 2918), UINT16_C(34722), UINT16_C(64454), UINT16_C(45974), UINT16_C(55236), UINT16_C(61172), UINT16_C( 1986), UINT16_C(38279), UINT16_C( 9032), UINT16_C(20353), UINT16_C(52132), UINT16_C(51653), UINT16_C(56117), UINT16_C(35645), UINT16_C(57487), UINT16_C(62743), UINT16_C(47596), UINT16_C(45692), UINT16_C( 4788), UINT16_C(31077), UINT16_C(23273), UINT16_C(44135), UINT16_C(61025), UINT16_C(43329), UINT16_C(49682), UINT16_C(46840), UINT16_C(48781), UINT16_C(50047), UINT16_C(48281), UINT16_C(10318) }, { UINT16_C(32655), UINT16_C(36003), UINT16_C(52576), UINT16_C(53788), UINT16_C(28128), UINT16_C( 2679), UINT16_C(18319), UINT16_C(12912), UINT16_C( 7497), UINT16_C(38279), UINT16_C(52676), UINT16_C(50729), UINT16_C(50713), UINT16_C(24292), UINT16_C(24117), UINT16_C(50208), UINT16_C(57487), UINT16_C(15862), UINT16_C( 4687), UINT16_C( 5736), UINT16_C( 4788), UINT16_C( 3048), UINT16_C(14798), UINT16_C(16705), UINT16_C(38486), UINT16_C(39187), UINT16_C(33123), UINT16_C(11744), UINT16_C( 8259), UINT16_C(30282), UINT16_C(27173), UINT16_C(10318) } }, { { INT16_C( 26269), -INT16_C( 30434), -INT16_C( 26081), -INT16_C( 11205), -INT16_C( 24403), -INT16_C( 27059), -INT16_C( 19206), INT16_C( 23618), -INT16_C( 31838), -INT16_C( 19451), -INT16_C( 443), -INT16_C( 11414), -INT16_C( 5444), INT16_C( 21910), -INT16_C( 7002), INT16_C( 17278), -INT16_C( 25526), INT16_C( 27340), INT16_C( 1846), -INT16_C( 7362), -INT16_C( 29784), -INT16_C( 23942), -INT16_C( 17345), -INT16_C( 7682), INT16_C( 1088), -INT16_C( 31338), INT16_C( 2), -INT16_C( 16808), -INT16_C( 4374), -INT16_C( 28397), -INT16_C( 28205), INT16_C( 7636) }, UINT32_C(1686610221), { UINT16_C(49488), UINT16_C(36851), UINT16_C(61822), UINT16_C(48753), UINT16_C( 2037), UINT16_C(63299), UINT16_C(39943), UINT16_C(62133), UINT16_C(51594), UINT16_C(23939), UINT16_C(22362), UINT16_C(34939), UINT16_C( 760), UINT16_C(41452), UINT16_C(13256), UINT16_C( 6385), UINT16_C(58613), UINT16_C(29608), UINT16_C( 6614), UINT16_C(52017), UINT16_C(29728), UINT16_C(10179), UINT16_C(30736), UINT16_C(39705), UINT16_C(40001), UINT16_C(40184), UINT16_C(29684), UINT16_C(60452), UINT16_C( 4214), UINT16_C(16013), UINT16_C(32579), UINT16_C(14422) }, { UINT16_C(65123), UINT16_C(14763), UINT16_C(56343), UINT16_C(14085), UINT16_C(51281), UINT16_C(24927), UINT16_C(30784), UINT16_C(33532), UINT16_C(62741), UINT16_C( 2334), UINT16_C(17000), UINT16_C(57077), UINT16_C(33618), UINT16_C(38172), UINT16_C(29442), UINT16_C(26062), UINT16_C(31089), UINT16_C(35231), UINT16_C(42070), UINT16_C(42944), UINT16_C( 8044), UINT16_C(44040), UINT16_C( 1432), UINT16_C(44334), UINT16_C(19706), UINT16_C(25270), UINT16_C(43918), UINT16_C(57409), UINT16_C(23854), UINT16_C(12406), UINT16_C(17616), UINT16_C(17046) }, { UINT16_C(49488), UINT16_C(35102), UINT16_C(56343), UINT16_C(14085), UINT16_C(41133), UINT16_C(24927), UINT16_C(46330), UINT16_C(23618), UINT16_C(51594), UINT16_C(46085), UINT16_C(65093), UINT16_C(54122), UINT16_C(60092), UINT16_C(38172), UINT16_C(58534), UINT16_C( 6385), UINT16_C(31089), UINT16_C(29608), UINT16_C( 6614), UINT16_C(58174), UINT16_C(35752), UINT16_C(41594), UINT16_C(48191), UINT16_C(39705), UINT16_C( 1088), UINT16_C(34198), UINT16_C(29684), UINT16_C(48728), UINT16_C(61162), UINT16_C(12406), UINT16_C(17616), UINT16_C( 7636) } }, { { INT16_C( 13757), INT16_C( 5067), -INT16_C( 29735), INT16_C( 17850), -INT16_C( 15445), INT16_C( 17393), INT16_C( 8392), -INT16_C( 15632), -INT16_C( 22932), -INT16_C( 1244), INT16_C( 25937), -INT16_C( 32549), INT16_C( 20931), -INT16_C( 27728), INT16_C( 18069), INT16_C( 21461), -INT16_C( 24453), INT16_C( 21606), INT16_C( 8492), -INT16_C( 10343), -INT16_C( 29724), -INT16_C( 21478), INT16_C( 2731), INT16_C( 5998), -INT16_C( 27984), INT16_C( 274), -INT16_C( 4360), -INT16_C( 17535), INT16_C( 12863), -INT16_C( 10930), INT16_C( 9336), -INT16_C( 3032) }, UINT32_C(4031286980), { UINT16_C(57709), UINT16_C( 6207), UINT16_C(44523), UINT16_C(39727), UINT16_C(16960), UINT16_C(14493), UINT16_C( 7728), UINT16_C(28659), UINT16_C(16720), UINT16_C(51524), UINT16_C(27749), UINT16_C(10941), UINT16_C( 1531), UINT16_C(43546), UINT16_C(58087), UINT16_C(21566), UINT16_C(32195), UINT16_C(44908), UINT16_C(39979), UINT16_C(27466), UINT16_C(59358), UINT16_C( 3747), UINT16_C(38406), UINT16_C(22141), UINT16_C(49879), UINT16_C(15647), UINT16_C(56366), UINT16_C(10599), UINT16_C(33250), UINT16_C(51668), UINT16_C( 4707), UINT16_C(10014) }, { UINT16_C(35471), UINT16_C(47830), UINT16_C( 8230), UINT16_C( 1061), UINT16_C(51208), UINT16_C( 3602), UINT16_C(36958), UINT16_C(13924), UINT16_C(33874), UINT16_C(32883), UINT16_C(55904), UINT16_C(17066), UINT16_C(32347), UINT16_C(48908), UINT16_C(10896), UINT16_C( 8166), UINT16_C(48308), UINT16_C(56282), UINT16_C(65500), UINT16_C(58591), UINT16_C(62152), UINT16_C( 9970), UINT16_C(22402), UINT16_C(54364), UINT16_C(53211), UINT16_C(15188), UINT16_C(65193), UINT16_C( 1406), UINT16_C(35452), UINT16_C( 3268), UINT16_C(43700), UINT16_C(26668) }, { UINT16_C(13757), UINT16_C( 5067), UINT16_C( 8230), UINT16_C(17850), UINT16_C(50091), UINT16_C(17393), UINT16_C( 7728), UINT16_C(13924), UINT16_C(42604), UINT16_C(32883), UINT16_C(27749), UINT16_C(10941), UINT16_C(20931), UINT16_C(37808), UINT16_C(18069), UINT16_C( 8166), UINT16_C(41083), UINT16_C(21606), UINT16_C( 8492), UINT16_C(27466), UINT16_C(35812), UINT16_C(44058), UINT16_C(22402), UINT16_C( 5998), UINT16_C(37552), UINT16_C( 274), UINT16_C(61176), UINT16_C(48001), UINT16_C(33250), UINT16_C( 3268), UINT16_C( 4707), UINT16_C(10014) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi16(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_mask_min_epu16(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_u16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_min_epu16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask32 k; const uint16_t a[32]; const uint16_t b[32]; const uint16_t r[32]; } test_vec[] = { { UINT32_C(2325775073), { UINT16_C(31861), UINT16_C(36471), UINT16_C( 6552), UINT16_C(33113), UINT16_C(15773), UINT16_C(39996), UINT16_C(39459), UINT16_C( 8945), UINT16_C(22730), UINT16_C( 7867), UINT16_C(31654), UINT16_C(34707), UINT16_C(13565), UINT16_C(10514), UINT16_C(50106), UINT16_C(12078), UINT16_C(42303), UINT16_C(55230), UINT16_C( 6078), UINT16_C(23385), UINT16_C(38228), UINT16_C(30711), UINT16_C(59439), UINT16_C(64154), UINT16_C(21824), UINT16_C(58904), UINT16_C(44241), UINT16_C(52846), UINT16_C(32992), UINT16_C(39672), UINT16_C( 9795), UINT16_C(33481) }, { UINT16_C(34763), UINT16_C(35162), UINT16_C(45982), UINT16_C(62436), UINT16_C(56136), UINT16_C(30570), UINT16_C( 1219), UINT16_C( 1137), UINT16_C(35418), UINT16_C(11242), UINT16_C(22582), UINT16_C( 5881), UINT16_C(61912), UINT16_C( 7088), UINT16_C(30999), UINT16_C(58014), UINT16_C(63489), UINT16_C(40811), UINT16_C(20395), UINT16_C(62354), UINT16_C(64810), UINT16_C(61034), UINT16_C(56321), UINT16_C(23538), UINT16_C(56422), UINT16_C(40070), UINT16_C(32821), UINT16_C( 3506), UINT16_C(25201), UINT16_C(35113), UINT16_C(51163), UINT16_C(56427) }, { UINT16_C(31861), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(30570), UINT16_C( 1219), UINT16_C( 1137), UINT16_C( 0), UINT16_C( 7867), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(12078), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(30711), UINT16_C( 0), UINT16_C(23538), UINT16_C( 0), UINT16_C(40070), UINT16_C( 0), UINT16_C( 3506), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(33481) } }, { UINT32_C(1786566591), { UINT16_C(50955), UINT16_C( 3391), UINT16_C(12707), UINT16_C( 2408), UINT16_C(61197), UINT16_C(17061), UINT16_C(22383), UINT16_C(57424), UINT16_C(31161), UINT16_C(38249), UINT16_C(54592), UINT16_C(65393), UINT16_C(60844), UINT16_C(53865), UINT16_C(50940), UINT16_C( 1827), UINT16_C(25229), UINT16_C(12564), UINT16_C(32147), UINT16_C(41274), UINT16_C(57452), UINT16_C(56291), UINT16_C(13111), UINT16_C(61883), UINT16_C( 9644), UINT16_C(60550), UINT16_C(63482), UINT16_C(42731), UINT16_C(21733), UINT16_C(57720), UINT16_C(39962), UINT16_C(43240) }, { UINT16_C(65022), UINT16_C(37593), UINT16_C( 4986), UINT16_C(58931), UINT16_C( 5875), UINT16_C(11201), UINT16_C(31818), UINT16_C(63004), UINT16_C(41633), UINT16_C(39907), UINT16_C(52889), UINT16_C(32321), UINT16_C(47651), UINT16_C(15711), UINT16_C(18518), UINT16_C(21733), UINT16_C(48709), UINT16_C(49126), UINT16_C( 6610), UINT16_C(50597), UINT16_C(26160), UINT16_C(31472), UINT16_C( 3298), UINT16_C(33904), UINT16_C(21422), UINT16_C(18463), UINT16_C(24866), UINT16_C(17862), UINT16_C( 9755), UINT16_C(29058), UINT16_C(26734), UINT16_C(46021) }, { UINT16_C(50955), UINT16_C( 3391), UINT16_C( 4986), UINT16_C( 2408), UINT16_C( 5875), UINT16_C(11201), UINT16_C( 0), UINT16_C(57424), UINT16_C(31161), UINT16_C(38249), UINT16_C(52889), UINT16_C( 0), UINT16_C(47651), UINT16_C( 0), UINT16_C(18518), UINT16_C( 1827), UINT16_C( 0), UINT16_C( 0), UINT16_C( 6610), UINT16_C(41274), UINT16_C(26160), UINT16_C(31472), UINT16_C( 3298), UINT16_C( 0), UINT16_C( 0), UINT16_C(18463), UINT16_C( 0), UINT16_C(17862), UINT16_C( 0), UINT16_C(29058), UINT16_C(26734), UINT16_C( 0) } }, { UINT32_C(4168264742), { UINT16_C(44669), UINT16_C(24431), UINT16_C(57531), UINT16_C(27107), UINT16_C( 819), UINT16_C(21937), UINT16_C(30820), UINT16_C(32666), UINT16_C( 7582), UINT16_C( 3312), UINT16_C(46469), UINT16_C(43967), UINT16_C(12641), UINT16_C(10148), UINT16_C(25160), UINT16_C(50460), UINT16_C(35856), UINT16_C(52004), UINT16_C( 2156), UINT16_C(40757), UINT16_C(58891), UINT16_C(28661), UINT16_C(36702), UINT16_C(64750), UINT16_C(57004), UINT16_C(12552), UINT16_C(51091), UINT16_C(62941), UINT16_C(33272), UINT16_C(16412), UINT16_C(14563), UINT16_C(62213) }, { UINT16_C(10948), UINT16_C(12479), UINT16_C(62514), UINT16_C(15824), UINT16_C(50650), UINT16_C(14764), UINT16_C(39508), UINT16_C( 309), UINT16_C(15992), UINT16_C( 2866), UINT16_C( 3845), UINT16_C(65024), UINT16_C( 7312), UINT16_C(29502), UINT16_C(17493), UINT16_C( 6503), UINT16_C( 9838), UINT16_C(41034), UINT16_C( 6682), UINT16_C(62685), UINT16_C(35295), UINT16_C(13101), UINT16_C(25379), UINT16_C(39732), UINT16_C(26529), UINT16_C(42662), UINT16_C(42870), UINT16_C( 1956), UINT16_C(58307), UINT16_C( 6266), UINT16_C(57639), UINT16_C(38194) }, { UINT16_C( 0), UINT16_C(12479), UINT16_C(57531), UINT16_C( 0), UINT16_C( 0), UINT16_C(14764), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 3845), UINT16_C(43967), UINT16_C( 0), UINT16_C(10148), UINT16_C( 0), UINT16_C( 6503), UINT16_C( 0), UINT16_C(41034), UINT16_C( 0), UINT16_C( 0), UINT16_C(35295), UINT16_C(13101), UINT16_C(25379), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 1956), UINT16_C(33272), UINT16_C( 6266), UINT16_C(14563), UINT16_C(38194) } }, { UINT32_C( 557153287), { UINT16_C(17307), UINT16_C(48808), UINT16_C(56742), UINT16_C(18265), UINT16_C(65348), UINT16_C(47854), UINT16_C(37542), UINT16_C(27329), UINT16_C(15477), UINT16_C(40066), UINT16_C(46109), UINT16_C( 9521), UINT16_C(26160), UINT16_C(50758), UINT16_C(23672), UINT16_C( 4923), UINT16_C(58528), UINT16_C(18129), UINT16_C(10945), UINT16_C( 1422), UINT16_C(31786), UINT16_C(53439), UINT16_C(33038), UINT16_C(33850), UINT16_C(48573), UINT16_C(55840), UINT16_C(21105), UINT16_C(41727), UINT16_C(18104), UINT16_C(12648), UINT16_C(42146), UINT16_C(16964) }, { UINT16_C( 5768), UINT16_C(18825), UINT16_C( 5952), UINT16_C(27214), UINT16_C( 3475), UINT16_C(41275), UINT16_C(30094), UINT16_C(19237), UINT16_C(17970), UINT16_C(42022), UINT16_C( 9624), UINT16_C(20550), UINT16_C(44651), UINT16_C( 3713), UINT16_C(50770), UINT16_C(55888), UINT16_C(55772), UINT16_C( 7203), UINT16_C(29168), UINT16_C(33671), UINT16_C(49791), UINT16_C( 3365), UINT16_C(18999), UINT16_C(27225), UINT16_C(32656), UINT16_C(10254), UINT16_C(21668), UINT16_C( 4217), UINT16_C(64002), UINT16_C(21790), UINT16_C(28352), UINT16_C(39983) }, { UINT16_C( 5768), UINT16_C(18825), UINT16_C( 5952), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 9624), UINT16_C( 9521), UINT16_C(26160), UINT16_C( 3713), UINT16_C(23672), UINT16_C( 0), UINT16_C(55772), UINT16_C( 0), UINT16_C(10945), UINT16_C( 0), UINT16_C(31786), UINT16_C( 3365), UINT16_C( 0), UINT16_C( 0), UINT16_C(32656), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(12648), UINT16_C( 0), UINT16_C( 0) } }, { UINT32_C( 951669576), { UINT16_C(57602), UINT16_C(14673), UINT16_C(43563), UINT16_C(48291), UINT16_C(45353), UINT16_C(52708), UINT16_C(23813), UINT16_C( 2269), UINT16_C(64344), UINT16_C( 6237), UINT16_C(35946), UINT16_C(45749), UINT16_C(28383), UINT16_C(42218), UINT16_C(42670), UINT16_C(45287), UINT16_C(14471), UINT16_C(46057), UINT16_C(36322), UINT16_C( 2927), UINT16_C(21310), UINT16_C(17625), UINT16_C(46769), UINT16_C( 2380), UINT16_C(43442), UINT16_C( 7201), UINT16_C(54837), UINT16_C( 5582), UINT16_C(47172), UINT16_C(62137), UINT16_C(41055), UINT16_C(59042) }, { UINT16_C(36057), UINT16_C(48025), UINT16_C( 2073), UINT16_C(22471), UINT16_C(41052), UINT16_C( 3483), UINT16_C(59222), UINT16_C( 2070), UINT16_C(14224), UINT16_C(50724), UINT16_C(61966), UINT16_C(21211), UINT16_C(38059), UINT16_C( 2629), UINT16_C(59188), UINT16_C( 3568), UINT16_C(35443), UINT16_C(36041), UINT16_C(37010), UINT16_C(61156), UINT16_C(32560), UINT16_C(34555), UINT16_C( 4455), UINT16_C(63375), UINT16_C(45897), UINT16_C(22461), UINT16_C(39078), UINT16_C(20905), UINT16_C(60972), UINT16_C(24923), UINT16_C(19414), UINT16_C(18798) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(22471), UINT16_C( 0), UINT16_C( 0), UINT16_C(23813), UINT16_C( 0), UINT16_C(14224), UINT16_C( 6237), UINT16_C( 0), UINT16_C( 0), UINT16_C(28383), UINT16_C( 0), UINT16_C(42670), UINT16_C( 0), UINT16_C(14471), UINT16_C( 0), UINT16_C( 0), UINT16_C( 2927), UINT16_C(21310), UINT16_C(17625), UINT16_C( 0), UINT16_C( 2380), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 5582), UINT16_C(47172), UINT16_C(24923), UINT16_C( 0), UINT16_C( 0) } }, { UINT32_C(1758869461), { UINT16_C(21049), UINT16_C(41086), UINT16_C( 3427), UINT16_C(44184), UINT16_C(21952), UINT16_C(26115), UINT16_C(44526), UINT16_C( 6839), UINT16_C( 4763), UINT16_C(29051), UINT16_C(59998), UINT16_C(13243), UINT16_C(37153), UINT16_C(59803), UINT16_C(62027), UINT16_C(34016), UINT16_C(24132), UINT16_C(42789), UINT16_C(48491), UINT16_C(11348), UINT16_C(22290), UINT16_C( 146), UINT16_C(18948), UINT16_C(40987), UINT16_C(38492), UINT16_C(47633), UINT16_C(52352), UINT16_C(41710), UINT16_C(35165), UINT16_C(43147), UINT16_C(27515), UINT16_C(48941) }, { UINT16_C(21194), UINT16_C(13671), UINT16_C(47887), UINT16_C( 8545), UINT16_C(62482), UINT16_C( 5922), UINT16_C(15678), UINT16_C(39607), UINT16_C(51411), UINT16_C(21589), UINT16_C(17301), UINT16_C(62198), UINT16_C(33228), UINT16_C(18587), UINT16_C(51436), UINT16_C(46599), UINT16_C(28186), UINT16_C(10732), UINT16_C(19753), UINT16_C(15434), UINT16_C(27713), UINT16_C(32595), UINT16_C( 2729), UINT16_C(32026), UINT16_C(28626), UINT16_C(26577), UINT16_C(51122), UINT16_C(32346), UINT16_C(62792), UINT16_C(13510), UINT16_C(52925), UINT16_C(55275) }, { UINT16_C(21049), UINT16_C( 0), UINT16_C( 3427), UINT16_C( 0), UINT16_C(21952), UINT16_C( 0), UINT16_C(15678), UINT16_C( 6839), UINT16_C( 4763), UINT16_C(21589), UINT16_C(17301), UINT16_C( 0), UINT16_C(33228), UINT16_C(18587), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(10732), UINT16_C(19753), UINT16_C( 0), UINT16_C(22290), UINT16_C( 0), UINT16_C( 2729), UINT16_C(32026), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(32346), UINT16_C( 0), UINT16_C(13510), UINT16_C(27515), UINT16_C( 0) } }, { UINT32_C(1711331132), { UINT16_C(62903), UINT16_C(24805), UINT16_MAX, UINT16_C(53725), UINT16_C(44654), UINT16_C( 8249), UINT16_C(37749), UINT16_C(48543), UINT16_C(25992), UINT16_C(17906), UINT16_C(56627), UINT16_C(28700), UINT16_C( 7348), UINT16_C(55510), UINT16_C(30822), UINT16_C( 7486), UINT16_C( 9325), UINT16_C(27774), UINT16_C(23331), UINT16_C(37437), UINT16_C(30218), UINT16_C(32690), UINT16_C(20745), UINT16_C(37181), UINT16_C(12215), UINT16_C(60118), UINT16_C(61964), UINT16_C(49242), UINT16_C(12302), UINT16_C(30104), UINT16_C(55208), UINT16_C( 5522) }, { UINT16_C( 4347), UINT16_C( 7809), UINT16_C(49004), UINT16_C(30384), UINT16_C(25397), UINT16_C(16373), UINT16_C(12980), UINT16_C(27600), UINT16_C(42849), UINT16_C(27990), UINT16_C(45209), UINT16_C(43053), UINT16_C(50913), UINT16_C(35101), UINT16_C(44957), UINT16_C(39071), UINT16_C( 8384), UINT16_C(11446), UINT16_C(26591), UINT16_C( 5538), UINT16_C(38858), UINT16_C(32340), UINT16_C( 9418), UINT16_C(11242), UINT16_C(16587), UINT16_C(26009), UINT16_C(50928), UINT16_C(53517), UINT16_C(10892), UINT16_C(10587), UINT16_C(64217), UINT16_C(39361) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C(49004), UINT16_C(30384), UINT16_C(25397), UINT16_C( 8249), UINT16_C( 0), UINT16_C( 0), UINT16_C(25992), UINT16_C(17906), UINT16_C(45209), UINT16_C( 0), UINT16_C( 7348), UINT16_C( 0), UINT16_C(30822), UINT16_C( 7486), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(26009), UINT16_C(50928), UINT16_C( 0), UINT16_C( 0), UINT16_C(10587), UINT16_C(55208), UINT16_C( 0) } }, { UINT32_C(4207245338), { UINT16_C(25599), UINT16_C(51495), UINT16_C( 4487), UINT16_C(21492), UINT16_C(36177), UINT16_C(17080), UINT16_C(50516), UINT16_C(57363), UINT16_C(28399), UINT16_C(51210), UINT16_C(52072), UINT16_C(33634), UINT16_C(10051), UINT16_C( 8829), UINT16_C(35983), UINT16_C(36555), UINT16_C(62447), UINT16_C(30295), UINT16_C(19204), UINT16_C(22217), UINT16_C(33241), UINT16_C(11672), UINT16_C(43846), UINT16_C(13581), UINT16_C( 5914), UINT16_C(33534), UINT16_C(24803), UINT16_C( 9733), UINT16_C(33415), UINT16_C( 5705), UINT16_C( 5134), UINT16_C(64932) }, { UINT16_C(64263), UINT16_C( 3188), UINT16_C(15687), UINT16_C( 8290), UINT16_C(64191), UINT16_C( 1357), UINT16_C(23205), UINT16_C(48955), UINT16_C(14706), UINT16_C(21826), UINT16_C(18329), UINT16_C( 8315), UINT16_C(50378), UINT16_C(55351), UINT16_C(56281), UINT16_C(57558), UINT16_C(19159), UINT16_C( 7916), UINT16_C(20103), UINT16_C(17982), UINT16_C(35656), UINT16_C(61004), UINT16_C(34789), UINT16_C(22445), UINT16_C(61376), UINT16_C(22956), UINT16_C(10295), UINT16_C( 377), UINT16_C(45292), UINT16_C(50649), UINT16_C(44940), UINT16_C(25510) }, { UINT16_C( 0), UINT16_C( 3188), UINT16_C( 0), UINT16_C( 8290), UINT16_C(36177), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 8315), UINT16_C(10051), UINT16_C( 8829), UINT16_C(35983), UINT16_C( 0), UINT16_C(19159), UINT16_C( 0), UINT16_C(19204), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(34789), UINT16_C(13581), UINT16_C( 0), UINT16_C(22956), UINT16_C( 0), UINT16_C( 377), UINT16_C(33415), UINT16_C( 5705), UINT16_C( 5134), UINT16_C(25510) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_maskz_min_epu16(test_vec[i].k, a, b); simde_test_x86_assert_equal_u16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_min_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[16]; const int32_t b[16]; const int32_t r[16]; } test_vec[] = { { { INT32_C( 851604017), -INT32_C( 751915793), INT32_C( 1683302456), INT32_C( 1978536698), -INT32_C( 1145431871), -INT32_C( 1558774560), INT32_C( 1709346334), -INT32_C( 1299143295), INT32_C( 132469272), INT32_C( 920327166), INT32_C( 899297339), INT32_C( 78285123), -INT32_C( 2118163039), INT32_C( 2133120353), -INT32_C( 438040988), -INT32_C( 1013484373) }, { -INT32_C( 976519994), -INT32_C( 889477489), INT32_C( 419469014), -INT32_C( 1071797729), INT32_C( 1900141839), INT32_C( 384853426), INT32_C( 385602923), INT32_C( 282760009), -INT32_C( 1613388529), INT32_C( 560582731), -INT32_C( 2059703962), INT32_C( 591747092), -INT32_C( 392919499), INT32_C( 1459521003), -INT32_C( 1553073574), -INT32_C( 1665972339) }, { -INT32_C( 976519994), -INT32_C( 889477489), INT32_C( 419469014), -INT32_C( 1071797729), -INT32_C( 1145431871), -INT32_C( 1558774560), INT32_C( 385602923), -INT32_C( 1299143295), -INT32_C( 1613388529), INT32_C( 560582731), -INT32_C( 2059703962), INT32_C( 78285123), -INT32_C( 2118163039), INT32_C( 1459521003), -INT32_C( 1553073574), -INT32_C( 1665972339) } }, { { INT32_C( 926648556), -INT32_C( 1084709543), INT32_C( 574984974), INT32_C( 558271212), -INT32_C( 66463215), -INT32_C( 1185740705), -INT32_C( 1889681406), -INT32_C( 215281657), -INT32_C( 232101991), INT32_C( 447840780), INT32_C( 37615126), -INT32_C( 1843166335), -INT32_C( 1114755747), INT32_C( 930537781), -INT32_C( 1463364703), INT32_C( 2107372004) }, { INT32_C( 1685046616), INT32_C( 1585389640), -INT32_C( 1738490857), -INT32_C( 1674935489), -INT32_C( 447104848), INT32_C( 991744154), -INT32_C( 2015042909), INT32_C( 738492372), -INT32_C( 1919847611), -INT32_C( 1410658156), INT32_C( 188959692), INT32_C( 2141679054), -INT32_C( 1083965147), INT32_C( 1962639825), INT32_C( 939319139), -INT32_C( 1553727394) }, { INT32_C( 926648556), -INT32_C( 1084709543), -INT32_C( 1738490857), -INT32_C( 1674935489), -INT32_C( 447104848), -INT32_C( 1185740705), -INT32_C( 2015042909), -INT32_C( 215281657), -INT32_C( 1919847611), -INT32_C( 1410658156), INT32_C( 37615126), -INT32_C( 1843166335), -INT32_C( 1114755747), INT32_C( 930537781), -INT32_C( 1463364703), -INT32_C( 1553727394) } }, { { INT32_C( 137426292), -INT32_C( 776791291), INT32_C( 903673446), -INT32_C( 2001435805), INT32_C( 1430788228), -INT32_C( 37076071), -INT32_C( 2144025054), INT32_C( 975476934), -INT32_C( 1841146739), -INT32_C( 698092176), INT32_C( 1309360107), INT32_C( 1205321667), INT32_C( 1906122712), -INT32_C( 2073139358), -INT32_C( 217799891), -INT32_C( 919721925) }, { -INT32_C( 312774531), INT32_C( 1371783014), -INT32_C( 1046425602), INT32_C( 1711896462), -INT32_C( 120019306), INT32_C( 981222925), INT32_C( 623739113), INT32_C( 653155241), INT32_C( 840124876), INT32_C( 126080520), INT32_C( 885531557), INT32_C( 815452570), -INT32_C( 2077724041), -INT32_C( 1564564295), -INT32_C( 825758683), INT32_C( 334804295) }, { -INT32_C( 312774531), -INT32_C( 776791291), -INT32_C( 1046425602), -INT32_C( 2001435805), -INT32_C( 120019306), -INT32_C( 37076071), -INT32_C( 2144025054), INT32_C( 653155241), -INT32_C( 1841146739), -INT32_C( 698092176), INT32_C( 885531557), INT32_C( 815452570), -INT32_C( 2077724041), -INT32_C( 2073139358), -INT32_C( 825758683), -INT32_C( 919721925) } }, { { INT32_C( 121964543), -INT32_C( 2096182819), -INT32_C( 2017994772), INT32_C( 548884904), INT32_C( 2107957444), -INT32_C( 1457560700), -INT32_C( 1770526897), -INT32_C( 1683330148), INT32_C( 1352920946), -INT32_C( 1512853064), INT32_C( 825002632), -INT32_C( 1622023205), INT32_C( 1209857475), -INT32_C( 1477362600), -INT32_C( 1086428893), INT32_C( 1197205716) }, { -INT32_C( 1869087017), INT32_C( 943024815), -INT32_C( 815177228), INT32_C( 141539908), INT32_C( 139496367), -INT32_C( 357613113), INT32_C( 2141908394), -INT32_C( 1379531307), -INT32_C( 1304601341), -INT32_C( 1142263097), INT32_C( 394941395), -INT32_C( 1121978099), INT32_C( 1288007557), INT32_C( 1530361009), INT32_C( 937091426), -INT32_C( 370892570) }, { -INT32_C( 1869087017), -INT32_C( 2096182819), -INT32_C( 2017994772), INT32_C( 141539908), INT32_C( 139496367), -INT32_C( 1457560700), -INT32_C( 1770526897), -INT32_C( 1683330148), -INT32_C( 1304601341), -INT32_C( 1512853064), INT32_C( 394941395), -INT32_C( 1622023205), INT32_C( 1209857475), -INT32_C( 1477362600), -INT32_C( 1086428893), -INT32_C( 370892570) } }, { { -INT32_C( 996466179), INT32_C( 1719633555), -INT32_C( 411170087), -INT32_C( 2002477821), -INT32_C( 1093310195), INT32_C( 1058606301), -INT32_C( 747113235), -INT32_C( 1833149548), INT32_C( 274093949), -INT32_C( 1216882979), -INT32_C( 476121632), -INT32_C( 1620295022), -INT32_C( 2007154261), INT32_C( 986216269), -INT32_C( 15909013), INT32_C( 395430298) }, { -INT32_C( 30873568), -INT32_C( 1632264258), INT32_C( 646009748), INT32_C( 1086778773), INT32_C( 2076713774), INT32_C( 95785114), INT32_C( 1778762447), -INT32_C( 1400793461), INT32_C( 1017817470), -INT32_C( 589668536), INT32_C( 1191402674), INT32_C( 2022164809), -INT32_C( 2014097428), -INT32_C( 1349735968), -INT32_C( 149319317), -INT32_C( 1499227352) }, { -INT32_C( 996466179), -INT32_C( 1632264258), -INT32_C( 411170087), -INT32_C( 2002477821), -INT32_C( 1093310195), INT32_C( 95785114), -INT32_C( 747113235), -INT32_C( 1833149548), INT32_C( 274093949), -INT32_C( 1216882979), -INT32_C( 476121632), -INT32_C( 1620295022), -INT32_C( 2014097428), -INT32_C( 1349735968), -INT32_C( 149319317), -INT32_C( 1499227352) } }, { { -INT32_C( 1914483388), INT32_C( 1583988140), INT32_C( 1671785497), INT32_C( 584789045), INT32_C( 1537855099), -INT32_C( 485804681), -INT32_C( 270916409), INT32_C( 76905919), INT32_C( 1989245130), INT32_C( 1339357750), -INT32_C( 1666025113), INT32_C( 549359013), -INT32_C( 746821796), INT32_C( 1689683869), INT32_C( 1800638635), -INT32_C( 680531955) }, { -INT32_C( 1756561311), INT32_C( 1642471930), INT32_C( 1073650074), -INT32_C( 2107589594), -INT32_C( 1051272156), INT32_C( 237309027), -INT32_C( 1434879843), -INT32_C( 1048385440), -INT32_C( 480718872), -INT32_C( 1958461455), -INT32_C( 20233512), INT32_C( 595667967), INT32_C( 1793382151), -INT32_C( 2139616797), -INT32_C( 517213567), -INT32_C( 1012683302) }, { -INT32_C( 1914483388), INT32_C( 1583988140), INT32_C( 1073650074), -INT32_C( 2107589594), -INT32_C( 1051272156), -INT32_C( 485804681), -INT32_C( 1434879843), -INT32_C( 1048385440), -INT32_C( 480718872), -INT32_C( 1958461455), -INT32_C( 1666025113), INT32_C( 549359013), -INT32_C( 746821796), -INT32_C( 2139616797), -INT32_C( 517213567), -INT32_C( 1012683302) } }, { { INT32_C( 1839659900), INT32_C( 318368314), INT32_C( 739361837), -INT32_C( 162557201), INT32_C( 1281373033), -INT32_C( 1110583236), -INT32_C( 1516308278), INT32_C( 560480677), INT32_C( 2005863997), INT32_C( 629836024), INT32_C( 995203916), -INT32_C( 1775132627), INT32_C( 266506707), INT32_C( 885829481), INT32_C( 1289317287), -INT32_C( 378650196) }, { INT32_C( 1197604175), -INT32_C( 781325435), -INT32_C( 1291010426), INT32_C( 843660639), INT32_C( 927083470), -INT32_C( 2106913061), INT32_C( 651117689), -INT32_C( 737198715), -INT32_C( 1088655302), -INT32_C( 510621349), -INT32_C( 1500210105), -INT32_C( 1478894119), -INT32_C( 455206135), -INT32_C( 1553577431), INT32_C( 348730766), -INT32_C( 1394026382) }, { INT32_C( 1197604175), -INT32_C( 781325435), -INT32_C( 1291010426), -INT32_C( 162557201), INT32_C( 927083470), -INT32_C( 2106913061), -INT32_C( 1516308278), -INT32_C( 737198715), -INT32_C( 1088655302), -INT32_C( 510621349), -INT32_C( 1500210105), -INT32_C( 1775132627), -INT32_C( 455206135), -INT32_C( 1553577431), INT32_C( 348730766), -INT32_C( 1394026382) } }, { { -INT32_C( 1536490423), -INT32_C( 712574067), INT32_C( 1887115927), INT32_C( 18306296), -INT32_C( 1712982417), -INT32_C( 834909376), -INT32_C( 203291263), INT32_C( 648072157), INT32_C( 1573587919), -INT32_C( 1640869625), INT32_C( 1661971819), INT32_C( 1902388738), INT32_C( 1544177948), INT32_C( 371934869), INT32_C( 688459083), -INT32_C( 1471174184) }, { -INT32_C( 1140516171), -INT32_C( 681953429), -INT32_C( 432379420), -INT32_C( 1403478128), INT32_C( 2097767144), -INT32_C( 191679319), INT32_C( 438148417), -INT32_C( 71144122), -INT32_C( 206059641), -INT32_C( 490073346), INT32_C( 197723259), -INT32_C( 1934089821), INT32_C( 738836867), INT32_C( 908172789), -INT32_C( 2142224838), INT32_C( 863769259) }, { -INT32_C( 1536490423), -INT32_C( 712574067), -INT32_C( 432379420), -INT32_C( 1403478128), -INT32_C( 1712982417), -INT32_C( 834909376), -INT32_C( 203291263), -INT32_C( 71144122), -INT32_C( 206059641), -INT32_C( 1640869625), INT32_C( 197723259), -INT32_C( 1934089821), INT32_C( 738836867), INT32_C( 371934869), -INT32_C( 2142224838), -INT32_C( 1471174184) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_min_epi32(a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_min_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t src[16]; const simde__mmask16 k; const int32_t a[16]; const int32_t b[16]; const int32_t r[16]; } test_vec[] = { { { INT32_C( 122954917), INT32_C( 657547680), -INT32_C( 1965187134), INT32_C( 1812295177), INT32_C( 1035689364), -INT32_C( 1614746190), INT32_C( 1490439621), -INT32_C( 1382961144), INT32_C( 1958077908), INT32_C( 161277511), -INT32_C( 1886160507), INT32_C( 2080086247), -INT32_C( 1464224010), INT32_C( 1766357411), -INT32_C( 826204474), -INT32_C( 1552133426) }, UINT16_C(12600), { -INT32_C( 1290305769), INT32_C( 455974024), INT32_C( 649270315), INT32_C( 1239198096), -INT32_C( 1698529198), -INT32_C( 1430746903), -INT32_C( 738425001), INT32_C( 1090794538), INT32_C( 1005919155), -INT32_C( 480894281), -INT32_C( 972486090), INT32_C( 84928179), INT32_C( 1335873894), -INT32_C( 1309058982), INT32_C( 159708639), -INT32_C( 582318038) }, { INT32_C( 1511604131), -INT32_C( 1740804255), INT32_C( 761153401), -INT32_C( 1825411539), -INT32_C( 1713188289), INT32_C( 155900714), -INT32_C( 99430704), -INT32_C( 103261098), -INT32_C( 44764772), -INT32_C( 627732128), INT32_C( 84407512), -INT32_C( 1583859358), INT32_C( 893090315), INT32_C( 641631573), -INT32_C( 1440722860), INT32_C( 1235548333) }, { INT32_C( 122954917), INT32_C( 657547680), -INT32_C( 1965187134), -INT32_C( 1825411539), -INT32_C( 1713188289), -INT32_C( 1430746903), INT32_C( 1490439621), -INT32_C( 1382961144), -INT32_C( 44764772), INT32_C( 161277511), -INT32_C( 1886160507), INT32_C( 2080086247), INT32_C( 893090315), -INT32_C( 1309058982), -INT32_C( 826204474), -INT32_C( 1552133426) } }, { { INT32_C( 1246165225), INT32_C( 1646582921), INT32_C( 845622224), INT32_C( 1892876388), -INT32_C( 794489222), -INT32_C( 386472812), -INT32_C( 510519756), -INT32_C( 131451377), -INT32_C( 1203605202), INT32_C( 488269389), -INT32_C( 162561647), -INT32_C( 60415359), -INT32_C( 976483535), INT32_C( 615367407), -INT32_C( 419086376), -INT32_C( 1528811402) }, UINT16_C( 8865), { INT32_C( 1988750940), INT32_C( 1526209035), INT32_C( 1987868944), INT32_C( 1099083125), INT32_C( 520319346), -INT32_C( 1688216427), -INT32_C( 1546922557), -INT32_C( 691639175), INT32_C( 1699499866), INT32_C( 2059355241), INT32_C( 871381950), INT32_C( 1534423785), INT32_C( 2038135012), INT32_C( 404019796), INT32_C( 683401135), -INT32_C( 1510047413) }, { INT32_C( 973753296), INT32_C( 1303693711), -INT32_C( 293493755), -INT32_C( 79038953), -INT32_C( 1015757714), INT32_C( 1306233246), -INT32_C( 1267296664), -INT32_C( 396790760), INT32_C( 1327654079), INT32_C( 849139245), -INT32_C( 1843389061), -INT32_C( 2104661485), -INT32_C( 851115730), -INT32_C( 216391542), -INT32_C( 827879242), -INT32_C( 994639867) }, { INT32_C( 973753296), INT32_C( 1646582921), INT32_C( 845622224), INT32_C( 1892876388), -INT32_C( 794489222), -INT32_C( 1688216427), -INT32_C( 510519756), -INT32_C( 691639175), -INT32_C( 1203605202), INT32_C( 849139245), -INT32_C( 162561647), -INT32_C( 60415359), -INT32_C( 976483535), -INT32_C( 216391542), -INT32_C( 419086376), -INT32_C( 1528811402) } }, { { -INT32_C( 1844192924), INT32_C( 734310576), -INT32_C( 507648563), INT32_C( 2103659087), -INT32_C( 699750325), INT32_C( 2127128008), -INT32_C( 95588107), -INT32_C( 708901776), -INT32_C( 1922575651), INT32_C( 1354247042), INT32_C( 1597076752), INT32_C( 199070911), INT32_C( 81864508), -INT32_C( 2105365876), -INT32_C( 1971532006), -INT32_C( 1319158829) }, UINT16_C(50702), { -INT32_C( 135098306), INT32_C( 292291296), INT32_C( 1067789410), INT32_C( 409395511), INT32_C( 1757606885), -INT32_C( 247997323), -INT32_C( 970126490), -INT32_C( 91472964), -INT32_C( 1393459509), -INT32_C( 474128767), -INT32_C( 1054710902), INT32_C( 718899268), -INT32_C( 258827397), -INT32_C( 572339849), -INT32_C( 1868361772), INT32_C( 361443402) }, { INT32_C( 801209518), INT32_C( 1678933978), INT32_C( 639972578), INT32_C( 944832189), INT32_C( 271180441), -INT32_C( 2131883092), INT32_C( 1678872858), INT32_C( 1887018177), -INT32_C( 207668456), -INT32_C( 1671974214), -INT32_C( 1547534874), INT32_C( 366744443), -INT32_C( 1591409163), INT32_C( 706876176), INT32_C( 1720595365), -INT32_C( 405403697) }, { -INT32_C( 1844192924), INT32_C( 292291296), INT32_C( 639972578), INT32_C( 409395511), -INT32_C( 699750325), INT32_C( 2127128008), -INT32_C( 95588107), -INT32_C( 708901776), -INT32_C( 1922575651), -INT32_C( 1671974214), -INT32_C( 1547534874), INT32_C( 199070911), INT32_C( 81864508), -INT32_C( 2105365876), -INT32_C( 1868361772), -INT32_C( 405403697) } }, { { -INT32_C( 52791742), INT32_C( 244855336), INT32_C( 716331951), INT32_C( 1665109614), -INT32_C( 1559927405), INT32_C( 499984248), INT32_C( 696539994), -INT32_C( 1525654942), -INT32_C( 123606064), -INT32_C( 871941603), INT32_C( 66501013), -INT32_C( 630835641), INT32_C( 326986651), -INT32_C( 332313966), INT32_C( 118863269), -INT32_C( 525588977) }, UINT16_C(19985), { -INT32_C( 544723240), -INT32_C( 258466310), -INT32_C( 2044272864), INT32_C( 921878969), INT32_C( 109085909), -INT32_C( 2034555535), -INT32_C( 642987475), -INT32_C( 2094547542), -INT32_C( 446517269), -INT32_C( 321455156), -INT32_C( 1804337958), -INT32_C( 1815452226), INT32_C( 1519995881), -INT32_C( 1646177168), -INT32_C( 914911970), INT32_C( 910990923) }, { INT32_C( 438087246), -INT32_C( 2079853911), -INT32_C( 1407681810), -INT32_C( 935337249), -INT32_C( 1641818067), INT32_C( 1262158892), -INT32_C( 602623343), -INT32_C( 1626120111), -INT32_C( 1195823346), INT32_C( 255639585), INT32_C( 431772730), INT32_C( 1692597046), INT32_C( 132564), -INT32_C( 1706345207), INT32_C( 1098342384), -INT32_C( 824145217) }, { -INT32_C( 544723240), INT32_C( 244855336), INT32_C( 716331951), INT32_C( 1665109614), -INT32_C( 1641818067), INT32_C( 499984248), INT32_C( 696539994), -INT32_C( 1525654942), -INT32_C( 123606064), -INT32_C( 321455156), -INT32_C( 1804337958), -INT32_C( 1815452226), INT32_C( 326986651), -INT32_C( 332313966), -INT32_C( 914911970), -INT32_C( 525588977) } }, { { -INT32_C( 628713031), -INT32_C( 1796619686), INT32_C( 1286513942), INT32_C( 1974505633), -INT32_C( 1636453739), -INT32_C( 533151248), -INT32_C( 534663392), -INT32_C( 223477447), -INT32_C( 187943782), INT32_C( 210351862), -INT32_C( 61327525), INT32_C( 1550911943), -INT32_C( 1409620037), -INT32_C( 930401624), INT32_C( 464039138), INT32_C( 1208899245) }, UINT16_C(55946), { -INT32_C( 980385732), -INT32_C( 436409204), INT32_C( 1525597160), INT32_C( 423733535), -INT32_C( 531830443), INT32_C( 1519201969), INT32_C( 1471167049), -INT32_C( 1087227006), INT32_C( 1216660155), -INT32_C( 1758625362), INT32_C( 1693522756), INT32_C( 427635396), -INT32_C( 855979749), INT32_C( 1093044215), INT32_C( 1150867393), -INT32_C( 855389678) }, { INT32_C( 991266701), INT32_C( 1305625096), INT32_C( 582075229), INT32_C( 272314101), -INT32_C( 270715400), INT32_C( 2117075900), -INT32_C( 322778662), INT32_C( 549111187), INT32_C( 1432145740), INT32_C( 1872899602), -INT32_C( 409906190), INT32_C( 2046348673), -INT32_C( 1083583230), -INT32_C( 1287808552), -INT32_C( 157286558), INT32_C( 269900228) }, { -INT32_C( 628713031), -INT32_C( 436409204), INT32_C( 1286513942), INT32_C( 272314101), -INT32_C( 1636453739), -INT32_C( 533151248), -INT32_C( 534663392), -INT32_C( 1087227006), -INT32_C( 187943782), -INT32_C( 1758625362), -INT32_C( 61327525), INT32_C( 427635396), -INT32_C( 1083583230), -INT32_C( 930401624), -INT32_C( 157286558), -INT32_C( 855389678) } }, { { INT32_C( 996504105), -INT32_C( 1817573471), -INT32_C( 595968934), INT32_C( 190149129), INT32_C( 550157895), -INT32_C( 1160575144), -INT32_C( 894406138), -INT32_C( 170145844), -INT32_C( 634372039), -INT32_C( 1569859000), INT32_C( 528410646), -INT32_C( 1574185894), -INT32_C( 356321902), INT32_C( 61183485), -INT32_C( 741452537), INT32_C( 1455991068) }, UINT16_C(63721), { -INT32_C( 1630326480), INT32_C( 1384573396), INT32_C( 874962953), INT32_C( 1143585154), INT32_C( 1222190755), -INT32_C( 123805398), INT32_C( 2107751092), INT32_C( 1098222096), -INT32_C( 1864415044), INT32_C( 1004692786), -INT32_C( 932247227), INT32_C( 1695324354), -INT32_C( 374413633), INT32_C( 2078362823), INT32_C( 402162182), -INT32_C( 933728756) }, { -INT32_C( 396871754), -INT32_C( 517784676), INT32_C( 111776324), -INT32_C( 362039765), INT32_C( 1674779036), INT32_C( 1826534501), INT32_C( 1115936566), -INT32_C( 83109051), -INT32_C( 1360764142), -INT32_C( 493942882), -INT32_C( 974636646), -INT32_C( 1951443729), -INT32_C( 739343763), INT32_C( 1816120374), -INT32_C( 391200093), -INT32_C( 1343964771) }, { -INT32_C( 1630326480), -INT32_C( 1817573471), -INT32_C( 595968934), -INT32_C( 362039765), INT32_C( 550157895), -INT32_C( 123805398), INT32_C( 1115936566), -INT32_C( 83109051), -INT32_C( 634372039), -INT32_C( 1569859000), INT32_C( 528410646), -INT32_C( 1951443729), -INT32_C( 739343763), INT32_C( 1816120374), -INT32_C( 391200093), -INT32_C( 1343964771) } }, { { -INT32_C( 1151481827), INT32_C( 1772022991), INT32_C( 338593317), INT32_C( 1218436570), -INT32_C( 1793356449), -INT32_C( 50242982), -INT32_C( 1176063972), -INT32_C( 2039952791), INT32_C( 1631765906), -INT32_C( 674504527), INT32_C( 1105983846), INT32_C( 914983895), INT32_C( 1926013976), INT32_C( 443600382), -INT32_C( 405580163), -INT32_C( 1301464288) }, UINT16_C(44800), { -INT32_C( 561008365), INT32_C( 1960375944), -INT32_C( 1056985289), -INT32_C( 1318840347), -INT32_C( 92314998), INT32_C( 1330707580), INT32_C( 1368027363), INT32_C( 889227810), -INT32_C( 1005285317), -INT32_C( 1120342906), -INT32_C( 2139277413), -INT32_C( 667754162), -INT32_C( 1076711101), -INT32_C( 1861344595), -INT32_C( 1260218222), INT32_C( 1575674402) }, { -INT32_C( 131989902), -INT32_C( 2051712534), -INT32_C( 553307504), INT32_C( 1454847763), -INT32_C( 1776973080), INT32_C( 1059529644), -INT32_C( 571274821), INT32_C( 1580981739), -INT32_C( 984195877), INT32_C( 1179258038), INT32_C( 1378177086), INT32_C( 1890114951), INT32_C( 319209063), -INT32_C( 1655558687), INT32_C( 561661494), -INT32_C( 8407773) }, { -INT32_C( 1151481827), INT32_C( 1772022991), INT32_C( 338593317), INT32_C( 1218436570), -INT32_C( 1793356449), -INT32_C( 50242982), -INT32_C( 1176063972), -INT32_C( 2039952791), -INT32_C( 1005285317), -INT32_C( 1120342906), -INT32_C( 2139277413), -INT32_C( 667754162), INT32_C( 1926013976), -INT32_C( 1861344595), -INT32_C( 405580163), -INT32_C( 8407773) } }, { { -INT32_C( 926624238), INT32_C( 537792482), -INT32_C( 428723105), INT32_C( 2018908945), -INT32_C( 1165271847), -INT32_C( 1084760439), INT32_C( 1205981732), -INT32_C( 1723441017), INT32_C( 409013046), INT32_C( 2033807386), -INT32_C( 1252021340), -INT32_C( 1624394042), -INT32_C( 1671776238), -INT32_C( 1168395882), INT32_C( 184695939), -INT32_C( 744208227) }, UINT16_C( 1619), { INT32_C( 628518380), INT32_C( 1188043494), -INT32_C( 50554929), -INT32_C( 1867248074), INT32_C( 105008042), INT32_C( 121816325), INT32_C( 1968234448), -INT32_C( 1619287117), -INT32_C( 138088175), -INT32_C( 633498613), INT32_C( 1658206507), -INT32_C( 218985912), -INT32_C( 604490539), -INT32_C( 891143174), -INT32_C( 851496422), -INT32_C( 412304682) }, { -INT32_C( 1226952533), -INT32_C( 258990907), -INT32_C( 1655544235), -INT32_C( 963623439), INT32_C( 1939966073), -INT32_C( 566328125), -INT32_C( 1934918218), -INT32_C( 478996424), INT32_C( 228217416), -INT32_C( 1006753170), -INT32_C( 2107551599), INT32_C( 256438677), INT32_C( 1031989881), INT32_C( 605798510), INT32_C( 1991362110), INT32_C( 660153566) }, { -INT32_C( 1226952533), -INT32_C( 258990907), -INT32_C( 428723105), INT32_C( 2018908945), INT32_C( 105008042), -INT32_C( 1084760439), -INT32_C( 1934918218), -INT32_C( 1723441017), INT32_C( 409013046), -INT32_C( 1006753170), -INT32_C( 2107551599), -INT32_C( 1624394042), -INT32_C( 1671776238), -INT32_C( 1168395882), INT32_C( 184695939), -INT32_C( 744208227) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi32(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_mask_min_epi32(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_min_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask16 k; const int32_t a[16]; const int32_t b[16]; const int32_t r[16]; } test_vec[] = { { UINT16_C(20760), { INT32_C( 213830387), -INT32_C( 9123383), INT32_C( 845115943), INT32_C( 8198242), -INT32_C( 76107379), -INT32_C( 862611029), INT32_C( 1709952286), INT32_C( 1102447438), -INT32_C( 1773243187), INT32_C( 1704313405), -INT32_C( 1768426444), -INT32_C( 1701374707), INT32_C( 1905593798), -INT32_C( 985781337), INT32_C( 1177168376), -INT32_C( 108470228) }, { -INT32_C( 1852844460), -INT32_C( 856283752), INT32_C( 643993113), INT32_C( 1774254499), -INT32_C( 1361422841), INT32_C( 2037586049), INT32_C( 1841274177), -INT32_C( 781826179), -INT32_C( 1251805667), INT32_C( 880892187), -INT32_C( 1973689113), -INT32_C( 453829667), -INT32_C( 225260175), INT32_C( 661325286), INT32_C( 529869730), -INT32_C( 1863255182) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 8198242), -INT32_C( 1361422841), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 1773243187), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 225260175), INT32_C( 0), INT32_C( 529869730), INT32_C( 0) } }, { UINT16_C(21489), { -INT32_C( 945025979), -INT32_C( 1666477247), INT32_C( 280463389), -INT32_C( 2283155), -INT32_C( 2029665509), -INT32_C( 2135775253), -INT32_C( 1216666425), -INT32_C( 99979852), -INT32_C( 1161709959), INT32_C( 1716939849), INT32_C( 1635127028), INT32_C( 1382110263), INT32_C( 47801879), -INT32_C( 729642227), INT32_C( 1686961840), -INT32_C( 463563157) }, { -INT32_C( 1767956659), -INT32_C( 2130840181), INT32_C( 987853571), -INT32_C( 544390457), -INT32_C( 1226742104), -INT32_C( 1567988494), -INT32_C( 855239070), -INT32_C( 89037395), INT32_C( 277893252), INT32_C( 1234210118), -INT32_C( 930844415), INT32_C( 1554452916), INT32_C( 1762822519), INT32_C( 1326161389), INT32_C( 1612452531), -INT32_C( 77935241) }, { -INT32_C( 1767956659), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 2029665509), -INT32_C( 2135775253), -INT32_C( 1216666425), -INT32_C( 99979852), -INT32_C( 1161709959), INT32_C( 1234210118), INT32_C( 0), INT32_C( 0), INT32_C( 47801879), INT32_C( 0), INT32_C( 1612452531), INT32_C( 0) } }, { UINT16_C(60190), { -INT32_C( 1686608885), INT32_C( 822966701), -INT32_C( 381501118), INT32_C( 812825117), INT32_C( 801988387), -INT32_C( 901676882), INT32_C( 999864545), -INT32_C( 1087981901), -INT32_C( 950362342), INT32_C( 1526294296), INT32_C( 1178876712), INT32_C( 427210485), -INT32_C( 1001897194), -INT32_C( 1534096957), -INT32_C( 186636479), -INT32_C( 139262243) }, { -INT32_C( 1094840667), -INT32_C( 1642547339), -INT32_C( 387687181), INT32_C( 687954451), INT32_C( 1626163613), INT32_C( 319126738), -INT32_C( 2146900573), -INT32_C( 1854424085), INT32_C( 1062155977), -INT32_C( 522360851), -INT32_C( 674643516), -INT32_C( 1207907813), -INT32_C( 434574060), INT32_C( 234495338), -INT32_C( 292683262), -INT32_C( 2021718595) }, { INT32_C( 0), -INT32_C( 1642547339), -INT32_C( 387687181), INT32_C( 687954451), INT32_C( 801988387), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 950362342), -INT32_C( 522360851), INT32_C( 0), -INT32_C( 1207907813), INT32_C( 0), -INT32_C( 1534096957), -INT32_C( 292683262), -INT32_C( 2021718595) } }, { UINT16_C(52795), { -INT32_C( 1556796986), -INT32_C( 765134583), -INT32_C( 794984496), INT32_C( 1337897271), -INT32_C( 1855117161), -INT32_C( 1013747915), -INT32_C( 590786211), -INT32_C( 1633024808), INT32_C( 927064109), -INT32_C( 1442208295), INT32_C( 1534764580), -INT32_C( 274057129), -INT32_C( 1769990304), -INT32_C( 463924089), INT32_C( 1036067429), INT32_C( 1423665959) }, { INT32_C( 629873739), -INT32_C( 439380543), -INT32_C( 1824503493), -INT32_C( 494736766), -INT32_C( 1988623870), INT32_C( 2070794774), INT32_C( 465055476), -INT32_C( 445607014), INT32_C( 1879767983), -INT32_C( 866788976), -INT32_C( 1520462557), -INT32_C( 2105024128), -INT32_C( 99942173), -INT32_C( 965379886), INT32_C( 1105342119), INT32_C( 1898336961) }, { -INT32_C( 1556796986), -INT32_C( 765134583), INT32_C( 0), -INT32_C( 494736766), -INT32_C( 1988623870), -INT32_C( 1013747915), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 1442208295), -INT32_C( 1520462557), -INT32_C( 2105024128), INT32_C( 0), INT32_C( 0), INT32_C( 1036067429), INT32_C( 1423665959) } }, { UINT16_C(12621), { INT32_C( 923459297), INT32_C( 164375978), INT32_C( 1525304530), -INT32_C( 648360498), INT32_C( 1028795591), -INT32_C( 731121166), INT32_C( 1613114426), INT32_C( 2140239005), -INT32_C( 55141294), -INT32_C( 1677360439), -INT32_C( 1644761137), -INT32_C( 2072555332), INT32_C( 1858193788), -INT32_C( 62706494), -INT32_C( 161715880), INT32_C( 796258013) }, { INT32_C( 1395338122), INT32_C( 2096050349), -INT32_C( 602217185), -INT32_C( 1319071435), INT32_C( 471867738), -INT32_C( 1525128371), -INT32_C( 1432652596), -INT32_C( 321318814), -INT32_C( 381680325), INT32_C( 1432694581), INT32_C( 1244757781), INT32_C( 1794937104), -INT32_C( 7988046), INT32_C( 1269079679), INT32_C( 1979006995), -INT32_C( 1939681456) }, { INT32_C( 923459297), INT32_C( 0), -INT32_C( 602217185), -INT32_C( 1319071435), INT32_C( 0), INT32_C( 0), -INT32_C( 1432652596), INT32_C( 0), -INT32_C( 381680325), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 7988046), -INT32_C( 62706494), INT32_C( 0), INT32_C( 0) } }, { UINT16_C(41684), { -INT32_C( 623834763), -INT32_C( 1889868194), INT32_C( 757099057), INT32_C( 1531564757), INT32_C( 1996146897), INT32_C( 162925843), -INT32_C( 455604606), INT32_C( 126266514), INT32_C( 350378165), INT32_C( 1872968766), INT32_C( 2073871526), INT32_C( 1758979478), -INT32_C( 1042361939), INT32_C( 1623889118), INT32_C( 759538330), INT32_C( 53791566) }, { INT32_C( 1645680163), -INT32_C( 103695534), INT32_C( 359951999), INT32_C( 24988499), -INT32_C( 87925988), -INT32_C( 1973711633), INT32_C( 2092408878), -INT32_C( 1887442069), INT32_C( 1425118978), -INT32_C( 783433134), -INT32_C( 2065251792), INT32_C( 713384973), -INT32_C( 1356576833), INT32_C( 20545491), -INT32_C( 1954680801), -INT32_C( 585433893) }, { INT32_C( 0), INT32_C( 0), INT32_C( 359951999), INT32_C( 0), -INT32_C( 87925988), INT32_C( 0), -INT32_C( 455604606), -INT32_C( 1887442069), INT32_C( 0), -INT32_C( 783433134), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 20545491), INT32_C( 0), -INT32_C( 585433893) } }, { UINT16_C( 2963), { INT32_C( 2144199986), -INT32_C( 1656619338), INT32_C( 117526402), -INT32_C( 1655783303), -INT32_C( 1474485905), -INT32_C( 1617413086), -INT32_C( 509905721), -INT32_C( 2081673391), -INT32_C( 888948204), INT32_C( 979911864), INT32_C( 205613459), -INT32_C( 1716875479), -INT32_C( 767440976), -INT32_C( 881731069), -INT32_C( 1616114610), INT32_C( 1344510267) }, { INT32_C( 186328659), -INT32_C( 28998806), INT32_C( 353011436), -INT32_C( 978406379), INT32_C( 2140663931), INT32_C( 407505098), INT32_C( 1354233364), -INT32_C( 492774769), INT32_C( 1810742016), INT32_C( 711537214), -INT32_C( 851479624), -INT32_C( 1550651864), -INT32_C( 1474156066), INT32_C( 1187015729), -INT32_C( 225020061), INT32_C( 1389704786) }, { INT32_C( 186328659), -INT32_C( 1656619338), INT32_C( 0), INT32_C( 0), -INT32_C( 1474485905), INT32_C( 0), INT32_C( 0), -INT32_C( 2081673391), -INT32_C( 888948204), INT32_C( 711537214), INT32_C( 0), -INT32_C( 1716875479), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { UINT16_C(49905), { INT32_C( 653602749), -INT32_C( 1734693543), INT32_C( 226935419), -INT32_C( 2009635739), -INT32_C( 856397812), INT32_C( 1145329582), INT32_C( 528127562), -INT32_C( 1495176216), -INT32_C( 204679526), -INT32_C( 7575932), -INT32_C( 1911811544), -INT32_C( 2095692937), INT32_C( 1515195052), -INT32_C( 1398827934), INT32_C( 315300138), INT32_C( 532196485) }, { INT32_C( 118719875), INT32_C( 335978475), INT32_C( 681710257), INT32_C( 44873814), INT32_C( 610073794), -INT32_C( 1160709232), -INT32_C( 1697866987), -INT32_C( 877034168), -INT32_C( 170734582), INT32_C( 487184491), INT32_C( 1078307818), INT32_C( 641921379), INT32_C( 2102042605), -INT32_C( 1355342950), -INT32_C( 28769098), -INT32_C( 1815542903) }, { INT32_C( 118719875), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 856397812), -INT32_C( 1160709232), -INT32_C( 1697866987), -INT32_C( 1495176216), INT32_C( 0), -INT32_C( 7575932), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 28769098), -INT32_C( 1815542903) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_maskz_min_epi32(test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_min_epu32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const uint32_t a[16]; const uint32_t b[16]; const uint32_t r[16]; } test_vec[] = { { { UINT32_C(1289220019), UINT32_C(2127009800), UINT32_C(3136822521), UINT32_C(1627778699), UINT32_C(2140200231), UINT32_C(3389450485), UINT32_C( 107671816), UINT32_C( 779174523), UINT32_C( 981092401), UINT32_C(3669508833), UINT32_C(3918835806), UINT32_C(3326843295), UINT32_C(1799740278), UINT32_C(3409267651), UINT32_C(3100745788), UINT32_C( 350634722) }, { UINT32_C(1817076107), UINT32_C( 21366435), UINT32_C(1458297527), UINT32_C(3927717492), UINT32_C(3562430737), UINT32_C(3919612844), UINT32_C( 245461291), UINT32_C(1059227572), UINT32_C(2343268584), UINT32_C( 764277110), UINT32_C(1065580747), UINT32_C(3207241646), UINT32_C(2912124928), UINT32_C( 932590347), UINT32_C(1497708453), UINT32_C(2811783102) }, { UINT32_C(1289220019), UINT32_C( 21366435), UINT32_C(1458297527), UINT32_C(1627778699), UINT32_C(2140200231), UINT32_C(3389450485), UINT32_C( 107671816), UINT32_C( 779174523), UINT32_C( 981092401), UINT32_C( 764277110), UINT32_C(1065580747), UINT32_C(3207241646), UINT32_C(1799740278), UINT32_C( 932590347), UINT32_C(1497708453), UINT32_C( 350634722) } }, { { UINT32_C(1295139799), UINT32_C(4286299956), UINT32_C(3846176311), UINT32_C(2661575070), UINT32_C(4098570473), UINT32_C( 271311211), UINT32_C(3614011416), UINT32_C(2927493591), UINT32_C(2029826116), UINT32_C(2809689968), UINT32_C( 328054645), UINT32_C( 162672928), UINT32_C(3590192489), UINT32_C(4158990815), UINT32_C(1909346201), UINT32_C(2501856336) }, { UINT32_C(1829575677), UINT32_C( 135562642), UINT32_C(1545314620), UINT32_C(1013304787), UINT32_C(2853266379), UINT32_C( 631371660), UINT32_C(2526441542), UINT32_C(3106649788), UINT32_C(1680226769), UINT32_C(4218174398), UINT32_C(2941749212), UINT32_C( 552385877), UINT32_C(2898984224), UINT32_C( 986803188), UINT32_C(2563860699), UINT32_C(4015127582) }, { UINT32_C(1295139799), UINT32_C( 135562642), UINT32_C(1545314620), UINT32_C(1013304787), UINT32_C(2853266379), UINT32_C( 271311211), UINT32_C(2526441542), UINT32_C(2927493591), UINT32_C(1680226769), UINT32_C(2809689968), UINT32_C( 328054645), UINT32_C( 162672928), UINT32_C(2898984224), UINT32_C( 986803188), UINT32_C(1909346201), UINT32_C(2501856336) } }, { { UINT32_C(4099110965), UINT32_C(2414854067), UINT32_C(2621392455), UINT32_C( 599534339), UINT32_C( 500139560), UINT32_C(3445072369), UINT32_C( 660940809), UINT32_C(1511437861), UINT32_C(3780012590), UINT32_C(1886469417), UINT32_C(2265755780), UINT32_C( 61589723), UINT32_C(1075870286), UINT32_C( 604862491), UINT32_C(3310056096), UINT32_C(1461740072) }, { UINT32_C(4114116300), UINT32_C( 812034476), UINT32_C( 884437593), UINT32_C(2302173755), UINT32_C(4173945053), UINT32_C(1897780944), UINT32_C(1899391048), UINT32_C(2529711818), UINT32_C(1905000645), UINT32_C( 60945066), UINT32_C(2671269988), UINT32_C(2552852667), UINT32_C(2576413384), UINT32_C( 285912521), UINT32_C(3766632470), UINT32_C(1551321751) }, { UINT32_C(4099110965), UINT32_C( 812034476), UINT32_C( 884437593), UINT32_C( 599534339), UINT32_C( 500139560), UINT32_C(1897780944), UINT32_C( 660940809), UINT32_C(1511437861), UINT32_C(1905000645), UINT32_C( 60945066), UINT32_C(2265755780), UINT32_C( 61589723), UINT32_C(1075870286), UINT32_C( 285912521), UINT32_C(3310056096), UINT32_C(1461740072) } }, { { UINT32_C(4123853643), UINT32_C(1509453557), UINT32_C(2180591814), UINT32_C(1763254944), UINT32_C(3707939348), UINT32_C(1844382807), UINT32_C(3813568844), UINT32_C( 121619900), UINT32_C(3187412168), UINT32_C(1092023418), UINT32_C(3317829413), UINT32_C(1177476145), UINT32_C(3710070918), UINT32_C(2303398460), UINT32_C(1080859012), UINT32_C( 642231390) }, { UINT32_C( 870532024), UINT32_C(1551169847), UINT32_C( 975320585), UINT32_C(2558545938), UINT32_C(3178669185), UINT32_C( 977715638), UINT32_C(3095049050), UINT32_C( 400474463), UINT32_C(1011532036), UINT32_C(3281567418), UINT32_C( 134134517), UINT32_C(2359328267), UINT32_C(3645445666), UINT32_C( 823365847), UINT32_C(2733215299), UINT32_C(1421461327) }, { UINT32_C( 870532024), UINT32_C(1509453557), UINT32_C( 975320585), UINT32_C(1763254944), UINT32_C(3178669185), UINT32_C( 977715638), UINT32_C(3095049050), UINT32_C( 121619900), UINT32_C(1011532036), UINT32_C(1092023418), UINT32_C( 134134517), UINT32_C(1177476145), UINT32_C(3645445666), UINT32_C( 823365847), UINT32_C(1080859012), UINT32_C( 642231390) } }, { { UINT32_C(1116734600), UINT32_C(3070634178), UINT32_C(4005496035), UINT32_C(2776260482), UINT32_C(1283375989), UINT32_C(2524811603), UINT32_C(1865967135), UINT32_C(3049517613), UINT32_C(3103216630), UINT32_C(1584463227), UINT32_C(2219585281), UINT32_C( 53069454), UINT32_C(3712984970), UINT32_C(1484049464), UINT32_C(1606921266), UINT32_C(2484374174) }, { UINT32_C(1481444317), UINT32_C( 179813641), UINT32_C(2056127468), UINT32_C(1417525194), UINT32_C(2536623198), UINT32_C(3404703128), UINT32_C(4029265490), UINT32_C( 495271232), UINT32_C(1366676040), UINT32_C(2069638287), UINT32_C(4210420272), UINT32_C( 5141154), UINT32_C(3600252734), UINT32_C(2007008805), UINT32_C(2087176508), UINT32_C(1318710278) }, { UINT32_C(1116734600), UINT32_C( 179813641), UINT32_C(2056127468), UINT32_C(1417525194), UINT32_C(1283375989), UINT32_C(2524811603), UINT32_C(1865967135), UINT32_C( 495271232), UINT32_C(1366676040), UINT32_C(1584463227), UINT32_C(2219585281), UINT32_C( 5141154), UINT32_C(3600252734), UINT32_C(1484049464), UINT32_C(1606921266), UINT32_C(1318710278) } }, { { UINT32_C(1302335422), UINT32_C(1808333883), UINT32_C(2288369126), UINT32_C(1837740847), UINT32_C(1480794163), UINT32_C(3822052263), UINT32_C(2992649900), UINT32_C(3775002915), UINT32_C(1143972104), UINT32_C(2209347485), UINT32_C(3825997237), UINT32_C(4216493512), UINT32_C(1548981685), UINT32_C( 624960121), UINT32_C(2094571609), UINT32_C(2724059545) }, { UINT32_C( 400985210), UINT32_C( 966432132), UINT32_C(1931323050), UINT32_C(4050546491), UINT32_C(2119025157), UINT32_C(1034128868), UINT32_C(3350821677), UINT32_C(3462993748), UINT32_C( 669339555), UINT32_C(2405466340), UINT32_C(1644330534), UINT32_C(4065554669), UINT32_C( 393257010), UINT32_C(1532236846), UINT32_C(3827437199), UINT32_C(3367144229) }, { UINT32_C( 400985210), UINT32_C( 966432132), UINT32_C(1931323050), UINT32_C(1837740847), UINT32_C(1480794163), UINT32_C(1034128868), UINT32_C(2992649900), UINT32_C(3462993748), UINT32_C( 669339555), UINT32_C(2209347485), UINT32_C(1644330534), UINT32_C(4065554669), UINT32_C( 393257010), UINT32_C( 624960121), UINT32_C(2094571609), UINT32_C(2724059545) } }, { { UINT32_C(3220216026), UINT32_C(1045319704), UINT32_C(3164623054), UINT32_C(4088329152), UINT32_C(3255443348), UINT32_C(3256704563), UINT32_C(2443591788), UINT32_C(2790939083), UINT32_C( 157633265), UINT32_C(1766306714), UINT32_C(3274041347), UINT32_C(1874252763), UINT32_C( 624017650), UINT32_C(2347257631), UINT32_C(1511886479), UINT32_C(3623909351) }, { UINT32_C(1541498305), UINT32_C( 465840408), UINT32_C(3974097169), UINT32_C(2942080445), UINT32_C(1976929622), UINT32_C(1795210716), UINT32_C( 868621643), UINT32_C(1426835092), UINT32_C(1152511276), UINT32_C( 660632854), UINT32_C( 471023455), UINT32_C( 717975508), UINT32_C(3651117309), UINT32_C(2839912541), UINT32_C(1390152637), UINT32_C(4255639505) }, { UINT32_C(1541498305), UINT32_C( 465840408), UINT32_C(3164623054), UINT32_C(2942080445), UINT32_C(1976929622), UINT32_C(1795210716), UINT32_C( 868621643), UINT32_C(1426835092), UINT32_C( 157633265), UINT32_C( 660632854), UINT32_C( 471023455), UINT32_C( 717975508), UINT32_C( 624017650), UINT32_C(2347257631), UINT32_C(1390152637), UINT32_C(3623909351) } }, { { UINT32_C(3930216660), UINT32_C( 756130510), UINT32_C(3041469921), UINT32_C(2447381652), UINT32_C( 309034933), UINT32_C(3720065055), UINT32_C(2351929275), UINT32_C(1401607807), UINT32_C(4248751151), UINT32_C(1328172910), UINT32_C( 151286644), UINT32_C(1016784007), UINT32_C(2202994020), UINT32_C(1885342389), UINT32_C( 570265506), UINT32_C(2507442022) }, { UINT32_C(3247616595), UINT32_C(1980808194), UINT32_C(3061781551), UINT32_C(1576213241), UINT32_C(3588243999), UINT32_C(3997516108), UINT32_C( 906969808), UINT32_C( 483099849), UINT32_C( 954031414), UINT32_C(1219489049), UINT32_C(4227804674), UINT32_C(1750659656), UINT32_C(2151495732), UINT32_C(1248821881), UINT32_C(2390785733), UINT32_C( 967527426) }, { UINT32_C(3247616595), UINT32_C( 756130510), UINT32_C(3041469921), UINT32_C(1576213241), UINT32_C( 309034933), UINT32_C(3720065055), UINT32_C( 906969808), UINT32_C( 483099849), UINT32_C( 954031414), UINT32_C(1219489049), UINT32_C( 151286644), UINT32_C(1016784007), UINT32_C(2151495732), UINT32_C(1248821881), UINT32_C( 570265506), UINT32_C( 967527426) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_min_epu32(a, b); simde_test_x86_assert_equal_u32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_min_epu32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t src[16]; const simde__mmask16 k; const uint32_t a[16]; const uint32_t b[16]; const uint32_t r[16]; } test_vec[] = { { { INT32_C( 1219780957), -INT32_C( 361567476), INT32_C( 1237692958), INT32_C( 2064988168), -INT32_C( 1941892903), INT32_C( 2002512175), INT32_C( 1267491998), INT32_C( 579990469), INT32_C( 1617642836), INT32_C( 1313594672), -INT32_C( 1734930288), INT32_C( 554937672), -INT32_C( 307342147), -INT32_C( 597423810), INT32_C( 841478253), INT32_C( 928364771) }, UINT16_C(49405), { UINT32_C(3818728856), UINT32_C( 317926779), UINT32_C(3653188806), UINT32_C( 187596125), UINT32_C(3457510506), UINT32_C(1874756168), UINT32_C( 170369717), UINT32_C(1909073113), UINT32_C(3428083537), UINT32_C(1541294229), UINT32_C(3778322052), UINT32_C(2246926875), UINT32_C( 374538958), UINT32_C( 965022084), UINT32_C(2370022579), UINT32_C( 570297808) }, { UINT32_C( 183325557), UINT32_C( 526765211), UINT32_C(2231409002), UINT32_C(3389713916), UINT32_C(1960926703), UINT32_C( 581789294), UINT32_C(3819958547), UINT32_C(1929752062), UINT32_C(2608722432), UINT32_C( 683336382), UINT32_C(2024651644), UINT32_C(2554509224), UINT32_C(2198610708), UINT32_C(2644883850), UINT32_C(2843759786), UINT32_C( 35423489) }, { UINT32_C( 183325557), UINT32_C(3933399820), UINT32_C(2231409002), UINT32_C( 187596125), UINT32_C(1960926703), UINT32_C( 581789294), UINT32_C( 170369717), UINT32_C(1909073113), UINT32_C(1617642836), UINT32_C(1313594672), UINT32_C(2560037008), UINT32_C( 554937672), UINT32_C(3987625149), UINT32_C(3697543486), UINT32_C(2370022579), UINT32_C( 35423489) } }, { { INT32_C( 916298360), -INT32_C( 127969156), -INT32_C( 1150284781), -INT32_C( 665603132), INT32_C( 1616601046), -INT32_C( 1006829543), INT32_C( 1449950804), INT32_C( 2069399811), -INT32_C( 1615661789), INT32_C( 1620578637), -INT32_C( 518256611), -INT32_C( 1833275461), -INT32_C( 386787889), INT32_C( 1789653014), INT32_C( 1908414574), -INT32_C( 991094623) }, UINT16_C(40718), { UINT32_C(4222638947), UINT32_C(3624193468), UINT32_C(1766309807), UINT32_C(1149113937), UINT32_C(2855572734), UINT32_C(3217203967), UINT32_C( 14181139), UINT32_C(2325734951), UINT32_C(4253437761), UINT32_C(3436546589), UINT32_C(2603949385), UINT32_C( 853521203), UINT32_C(1205605192), UINT32_C(3355877045), UINT32_C( 684318209), UINT32_C( 112355524) }, { UINT32_C(3573758136), UINT32_C( 211933634), UINT32_C( 698865398), UINT32_C(3512436361), UINT32_C(1310209945), UINT32_C(3591774165), UINT32_C(3271483389), UINT32_C( 13152584), UINT32_C(2899692521), UINT32_C(2595780260), UINT32_C(3586416460), UINT32_C(2124881893), UINT32_C( 751615831), UINT32_C(3691242206), UINT32_C( 178127298), UINT32_C(2617927346) }, { UINT32_C( 916298360), UINT32_C( 211933634), UINT32_C( 698865398), UINT32_C(1149113937), UINT32_C(1616601046), UINT32_C(3288137753), UINT32_C(1449950804), UINT32_C(2069399811), UINT32_C(2899692521), UINT32_C(2595780260), UINT32_C(2603949385), UINT32_C( 853521203), UINT32_C( 751615831), UINT32_C(1789653014), UINT32_C(1908414574), UINT32_C( 112355524) } }, { { -INT32_C( 699867343), -INT32_C( 1586495403), INT32_C( 1148597343), -INT32_C( 1413341868), -INT32_C( 1143501091), INT32_C( 848812656), -INT32_C( 1908656676), -INT32_C( 852867429), INT32_C( 2057531941), -INT32_C( 786754702), -INT32_C( 1676307896), -INT32_C( 1941448785), -INT32_C( 699916699), -INT32_C( 720838663), -INT32_C( 1335671531), -INT32_C( 1317171573) }, UINT16_C( 8192), { UINT32_C(1194619691), UINT32_C(1524202564), UINT32_C(1597081624), UINT32_C(1568511765), UINT32_C(1966896749), UINT32_C(2948223307), UINT32_C(2134722050), UINT32_C( 580926967), UINT32_C(4117353648), UINT32_C(1750024784), UINT32_C(3771171019), UINT32_C(2218607639), UINT32_C( 117078459), UINT32_C(3451237579), UINT32_C(4048351994), UINT32_C(3759467568) }, { UINT32_C( 282426816), UINT32_C(2339906752), UINT32_C(3161145253), UINT32_C(1061267588), UINT32_C(3963960097), UINT32_C(3938057199), UINT32_C( 500893421), UINT32_C(3019829234), UINT32_C( 767808365), UINT32_C(2646097144), UINT32_C(4284031867), UINT32_C(3963525835), UINT32_C(3319366869), UINT32_C(1823445631), UINT32_C(2341112472), UINT32_C(3879635066) }, { UINT32_C(3595099953), UINT32_C(2708471893), UINT32_C(1148597343), UINT32_C(2881625428), UINT32_C(3151466205), UINT32_C( 848812656), UINT32_C(2386310620), UINT32_C(3442099867), UINT32_C(2057531941), UINT32_C(3508212594), UINT32_C(2618659400), UINT32_C(2353518511), UINT32_C(3595050597), UINT32_C(1823445631), UINT32_C(2959295765), UINT32_C(2977795723) } }, { { INT32_C( 1393819995), -INT32_C( 1175401411), -INT32_C( 1162327313), -INT32_C( 1163462684), -INT32_C( 92307589), -INT32_C( 1436144110), INT32_C( 842395832), -INT32_C( 736529544), -INT32_C( 1306055307), -INT32_C( 395634439), INT32_C( 1185031266), -INT32_C( 1778366181), -INT32_C( 628064312), INT32_C( 1720055469), INT32_C( 1603844839), -INT32_C( 1556893138) }, UINT16_C(23519), { UINT32_C(3245594965), UINT32_C(1692784065), UINT32_C( 481099803), UINT32_C( 647722390), UINT32_C(3575400784), UINT32_C(1200554927), UINT32_C(2532949347), UINT32_C(3069303136), UINT32_C(1937204402), UINT32_C(1440177209), UINT32_C(4067525724), UINT32_C(1243090170), UINT32_C( 69153877), UINT32_C(2605493816), UINT32_C(3425781100), UINT32_C(3498189598) }, { UINT32_C(3242523015), UINT32_C(2971016021), UINT32_C(2594408352), UINT32_C(3924081555), UINT32_C( 686621680), UINT32_C( 499333553), UINT32_C(2649420927), UINT32_C(2674813975), UINT32_C(3143676518), UINT32_C(1835890381), UINT32_C(2416382205), UINT32_C(3162106828), UINT32_C(2699323374), UINT32_C( 532522912), UINT32_C(3015550875), UINT32_C(2052205332) }, { UINT32_C(3242523015), UINT32_C(1692784065), UINT32_C( 481099803), UINT32_C( 647722390), UINT32_C( 686621680), UINT32_C(2858823186), UINT32_C(2532949347), UINT32_C(2674813975), UINT32_C(1937204402), UINT32_C(1440177209), UINT32_C(1185031266), UINT32_C(1243090170), UINT32_C( 69153877), UINT32_C(1720055469), UINT32_C(3015550875), UINT32_C(2738074158) } }, { { -INT32_C( 1439321379), INT32_C( 622371368), INT32_C( 2142576563), -INT32_C( 113561845), INT32_C( 916004758), INT32_C( 1633048518), INT32_C( 303305726), INT32_C( 462186046), INT32_C( 1086702104), INT32_C( 392551780), INT32_C( 144055293), -INT32_C( 536751798), -INT32_C( 1240032272), -INT32_C( 266834702), -INT32_C( 1123865473), -INT32_C( 1411870829) }, UINT16_C(40529), { UINT32_C(1367062252), UINT32_C(1684830413), UINT32_C(2184558208), UINT32_C(2904368790), UINT32_C(4095283164), UINT32_C( 35756543), UINT32_C( 798143574), UINT32_C(1271784287), UINT32_C(1738360985), UINT32_C(1103825345), UINT32_C(1455620288), UINT32_C( 50585638), UINT32_C(4025949679), UINT32_C( 217127094), UINT32_C( 742097868), UINT32_C( 7800935) }, { UINT32_C( 308745297), UINT32_C(3729994270), UINT32_C(1496586035), UINT32_C(3881580791), UINT32_C( 198595669), UINT32_C( 957859692), UINT32_C(2992984907), UINT32_C(2897402971), UINT32_C( 264116977), UINT32_C(2146243148), UINT32_C( 551100713), UINT32_C(2919707993), UINT32_C(4139376009), UINT32_C(4029665701), UINT32_C(2141361188), UINT32_C(1630295152) }, { UINT32_C( 308745297), UINT32_C( 622371368), UINT32_C(2142576563), UINT32_C(4181405451), UINT32_C( 198595669), UINT32_C(1633048518), UINT32_C( 798143574), UINT32_C( 462186046), UINT32_C(1086702104), UINT32_C(1103825345), UINT32_C( 551100713), UINT32_C( 50585638), UINT32_C(4025949679), UINT32_C(4028132594), UINT32_C(3171101823), UINT32_C( 7800935) } }, { { -INT32_C( 1150227858), INT32_C( 624582140), -INT32_C( 666496129), -INT32_C( 762884791), -INT32_C( 792182741), INT32_C( 901838609), -INT32_C( 55221621), INT32_C( 626909622), -INT32_C( 924791093), -INT32_C( 1427301845), INT32_C( 2005087022), -INT32_C( 1404499327), INT32_C( 1551635018), -INT32_C( 1785644023), INT32_C( 1418806942), -INT32_C( 210112985) }, UINT16_C(22972), { UINT32_C(2826234043), UINT32_C( 366781074), UINT32_C(1646222617), UINT32_C(2238999049), UINT32_C(1472298694), UINT32_C(2761842451), UINT32_C( 764593587), UINT32_C(1938182072), UINT32_C(3374119479), UINT32_C(3051354268), UINT32_C(1578696277), UINT32_C( 786664552), UINT32_C( 495363082), UINT32_C(3066110979), UINT32_C(2732807401), UINT32_C(3658836643) }, { UINT32_C( 27472228), UINT32_C(2125890089), UINT32_C(3923562113), UINT32_C(1696120667), UINT32_C(1719901795), UINT32_C(2870822082), UINT32_C( 994902168), UINT32_C(3474285418), UINT32_C(3201350036), UINT32_C(3158083131), UINT32_C(2963675477), UINT32_C(1008058072), UINT32_C( 513972316), UINT32_C(1976156125), UINT32_C( 716249024), UINT32_C( 251250298) }, { UINT32_C(3144739438), UINT32_C( 624582140), UINT32_C(1646222617), UINT32_C(1696120667), UINT32_C(1472298694), UINT32_C(2761842451), UINT32_C(4239745675), UINT32_C(1938182072), UINT32_C(3201350036), UINT32_C(2867665451), UINT32_C(2005087022), UINT32_C( 786664552), UINT32_C( 495363082), UINT32_C(2509323273), UINT32_C( 716249024), UINT32_C(4084854311) } }, { { -INT32_C( 1144206977), -INT32_C( 1518925488), -INT32_C( 95085278), INT32_C( 926313179), -INT32_C( 531244797), INT32_C( 1481973656), -INT32_C( 1333590474), INT32_C( 1287552205), -INT32_C( 1777890490), -INT32_C( 1237614700), INT32_C( 2024837276), -INT32_C( 38803462), INT32_C( 1490879936), INT32_C( 1521562404), INT32_C( 101332025), -INT32_C( 162281296) }, UINT16_C(23123), { UINT32_C(3352946572), UINT32_C(1314354845), UINT32_C(2637517550), UINT32_C( 765654351), UINT32_C(4267755085), UINT32_C( 707959072), UINT32_C(4092847008), UINT32_C(1716340441), UINT32_C(3408733998), UINT32_C(2333705629), UINT32_C( 640175831), UINT32_C(2438187843), UINT32_C(2995762065), UINT32_C(3990667853), UINT32_C(2128662437), UINT32_C(1155804438) }, { UINT32_C(4044296788), UINT32_C(1853630871), UINT32_C(3147081079), UINT32_C( 21817456), UINT32_C(3904101275), UINT32_C(1121292445), UINT32_C(1975629151), UINT32_C( 934913507), UINT32_C(1311361463), UINT32_C(1773970930), UINT32_C(3122942282), UINT32_C(3569119289), UINT32_C(3921506124), UINT32_C(1596756735), UINT32_C( 735374664), UINT32_C(1247973010) }, { UINT32_C(3352946572), UINT32_C(1314354845), UINT32_C(4199882018), UINT32_C( 926313179), UINT32_C(3904101275), UINT32_C(1481973656), UINT32_C(1975629151), UINT32_C(1287552205), UINT32_C(2517076806), UINT32_C(1773970930), UINT32_C(2024837276), UINT32_C(2438187843), UINT32_C(2995762065), UINT32_C(1521562404), UINT32_C( 735374664), UINT32_C(4132686000) } }, { { INT32_C( 1234733911), INT32_C( 2075284785), -INT32_C( 550053978), -INT32_C( 1816923577), INT32_C( 1635610721), INT32_C( 1270917379), INT32_C( 678859926), INT32_C( 2037569570), -INT32_C( 1782445212), INT32_C( 101741920), -INT32_C( 1813690804), -INT32_C( 1708681160), INT32_C( 217818121), -INT32_C( 480789683), INT32_C( 1913376079), INT32_C( 166428325) }, UINT16_C(44681), { UINT32_C(2938366366), UINT32_C(3572854767), UINT32_C( 694955522), UINT32_C(3285022152), UINT32_C(3632142977), UINT32_C( 161861117), UINT32_C( 730286911), UINT32_C(4091088980), UINT32_C(3902995705), UINT32_C(1841076075), UINT32_C(2375493829), UINT32_C( 525362334), UINT32_C(2096680575), UINT32_C(3682966940), UINT32_C(1023806696), UINT32_C( 355524380) }, { UINT32_C(1191039707), UINT32_C( 800373097), UINT32_C(2159823842), UINT32_C( 782175663), UINT32_C(2007734235), UINT32_C( 491991093), UINT32_C(1499093309), UINT32_C( 342854201), UINT32_C(3327880284), UINT32_C( 150277926), UINT32_C( 159953242), UINT32_C(2587371454), UINT32_C(4094813119), UINT32_C(1343317011), UINT32_C(4155141310), UINT32_C(1393236470) }, { UINT32_C(1191039707), UINT32_C(2075284785), UINT32_C(3744913318), UINT32_C( 782175663), UINT32_C(1635610721), UINT32_C(1270917379), UINT32_C( 678859926), UINT32_C( 342854201), UINT32_C(2512522084), UINT32_C( 150277926), UINT32_C( 159953242), UINT32_C( 525362334), UINT32_C( 217818121), UINT32_C(1343317011), UINT32_C(1913376079), UINT32_C( 355524380) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi32(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_mask_min_epu32(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_u32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_min_epu32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask16 k; const uint32_t a[16]; const uint32_t b[16]; const uint32_t r[16]; } test_vec[] = { { UINT16_C(39416), { UINT32_C(1465973120), UINT32_C(2116295527), UINT32_C(3331100915), UINT32_C(1680350051), UINT32_C( 383124669), UINT32_C( 7555327), UINT32_C( 691857575), UINT32_C(1187198150), UINT32_C(2493326125), UINT32_C( 823312958), UINT32_C(3069747026), UINT32_C(1998201018), UINT32_C( 596504612), UINT32_C(3760390456), UINT32_C(2953404393), UINT32_C(3237399699) }, { UINT32_C( 777295087), UINT32_C(2824824662), UINT32_C(3227408134), UINT32_C(2604169591), UINT32_C(2730411369), UINT32_C(2961367494), UINT32_C(3546319680), UINT32_C(1200838232), UINT32_C(1081469162), UINT32_C(1441387855), UINT32_C(2752857900), UINT32_C( 708791744), UINT32_C(3654090259), UINT32_C( 545869535), UINT32_C( 838068697), UINT32_C( 712607552) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C(1680350051), UINT32_C( 383124669), UINT32_C( 7555327), UINT32_C( 691857575), UINT32_C(1187198150), UINT32_C(1081469162), UINT32_C( 0), UINT32_C( 0), UINT32_C( 708791744), UINT32_C( 596504612), UINT32_C( 0), UINT32_C( 0), UINT32_C( 712607552) } }, { UINT16_C(61039), { UINT32_C(1422114411), UINT32_C( 681308179), UINT32_C(3547749524), UINT32_C(1389463942), UINT32_C(3969954146), UINT32_C(3302324689), UINT32_C( 608900523), UINT32_C(2870131264), UINT32_C(2348799608), UINT32_C(1521785542), UINT32_C(2083334902), UINT32_C( 365887411), UINT32_C(2164354736), UINT32_C(2470828008), UINT32_C( 750227948), UINT32_C(3302476107) }, { UINT32_C(1699731103), UINT32_C(1740571505), UINT32_C(3773099309), UINT32_C(2633413356), UINT32_C( 152958753), UINT32_C(3147588302), UINT32_C(1072124915), UINT32_C(3154362140), UINT32_C( 102847125), UINT32_C(2205081942), UINT32_C(3127136974), UINT32_C( 626416132), UINT32_C( 539915089), UINT32_C(3386624725), UINT32_C( 973652509), UINT32_C( 402000769) }, { UINT32_C(1422114411), UINT32_C( 681308179), UINT32_C(3547749524), UINT32_C(1389463942), UINT32_C( 0), UINT32_C(3147588302), UINT32_C( 608900523), UINT32_C( 0), UINT32_C( 0), UINT32_C(1521785542), UINT32_C(2083334902), UINT32_C( 365887411), UINT32_C( 0), UINT32_C(2470828008), UINT32_C( 750227948), UINT32_C( 402000769) } }, { UINT16_C( 5981), { UINT32_C(2348331805), UINT32_C(2615002679), UINT32_C(3606438528), UINT32_C( 910771719), UINT32_C(1090527078), UINT32_C(4026801896), UINT32_C(1325106520), UINT32_C(3127203996), UINT32_C(1128619532), UINT32_C(2782798628), UINT32_C( 209441541), UINT32_C(2151859481), UINT32_C(3435217892), UINT32_C(3116156257), UINT32_C(3876042571), UINT32_C( 463563791) }, { UINT32_C(4015974346), UINT32_C( 261372938), UINT32_C( 689639183), UINT32_C(3098107604), UINT32_C( 42232481), UINT32_C(2075869232), UINT32_C( 123912951), UINT32_C(4179756078), UINT32_C(4125655531), UINT32_C(3439623357), UINT32_C(1626742667), UINT32_C( 504930173), UINT32_C( 958438665), UINT32_C(3585399773), UINT32_C(3436976029), UINT32_C( 113638939) }, { UINT32_C(2348331805), UINT32_C( 0), UINT32_C( 689639183), UINT32_C( 910771719), UINT32_C( 42232481), UINT32_C( 0), UINT32_C( 123912951), UINT32_C( 0), UINT32_C(1128619532), UINT32_C(2782798628), UINT32_C( 209441541), UINT32_C( 0), UINT32_C( 958438665), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { UINT16_C(44415), { UINT32_C(4280892923), UINT32_C( 2012170), UINT32_C( 765434900), UINT32_C(3687491770), UINT32_C(2528552930), UINT32_C(1487754364), UINT32_C(3847735328), UINT32_C(3381843662), UINT32_C( 499694355), UINT32_C(2216552303), UINT32_C(1035058307), UINT32_C(1192786789), UINT32_C(2682113826), UINT32_C(1140296483), UINT32_C( 573066835), UINT32_C( 954972709) }, { UINT32_C(3830887541), UINT32_C( 510161819), UINT32_C(2505775408), UINT32_C(3084678292), UINT32_C(1716959555), UINT32_C(2561232196), UINT32_C(3250246044), UINT32_C( 16360843), UINT32_C(4108603225), UINT32_C(4078063043), UINT32_C(4220022374), UINT32_C( 632448226), UINT32_C(1653278749), UINT32_C(4059706453), UINT32_C(2427630597), UINT32_C(3012602969) }, { UINT32_C(3830887541), UINT32_C( 2012170), UINT32_C( 765434900), UINT32_C(3084678292), UINT32_C(1716959555), UINT32_C(1487754364), UINT32_C(3250246044), UINT32_C( 0), UINT32_C( 499694355), UINT32_C( 0), UINT32_C(1035058307), UINT32_C( 632448226), UINT32_C( 0), UINT32_C(1140296483), UINT32_C( 0), UINT32_C( 954972709) } }, { UINT16_C(29947), { UINT32_C(3133259431), UINT32_C( 958933169), UINT32_C(3583838755), UINT32_C(3135093551), UINT32_C( 401486365), UINT32_C(3603690276), UINT32_C( 327296131), UINT32_C(2139586263), UINT32_C(3996731708), UINT32_C(2485608817), UINT32_C(2590623083), UINT32_C(2639545984), UINT32_C(2629059192), UINT32_C(3094576949), UINT32_C(2076964259), UINT32_C(2969195123) }, { UINT32_C( 211694491), UINT32_C(4288726420), UINT32_C( 177801610), UINT32_C(3366448463), UINT32_C(1684298543), UINT32_C(2115819482), UINT32_C(1090119629), UINT32_C(3589337913), UINT32_C(3135344166), UINT32_C(3736699476), UINT32_C(3689501323), UINT32_C(1856213055), UINT32_C(3335653356), UINT32_C(2890198751), UINT32_C( 250363349), UINT32_C(1457773872) }, { UINT32_C( 211694491), UINT32_C( 958933169), UINT32_C( 0), UINT32_C(3135093551), UINT32_C( 401486365), UINT32_C(2115819482), UINT32_C( 327296131), UINT32_C(2139586263), UINT32_C( 0), UINT32_C( 0), UINT32_C(2590623083), UINT32_C( 0), UINT32_C(2629059192), UINT32_C(2890198751), UINT32_C( 250363349), UINT32_C( 0) } }, { UINT16_C(50539), { UINT32_C(3376922384), UINT32_C(2266747550), UINT32_C(1343707821), UINT32_C(2589459400), UINT32_C( 243808202), UINT32_C(3477888483), UINT32_C(1336704108), UINT32_C(3809745107), UINT32_C(1974295511), UINT32_C(3690776622), UINT32_C(3945534499), UINT32_C(3783689239), UINT32_C(2666532539), UINT32_C(3631037548), UINT32_C(2334595768), UINT32_C( 158284850) }, { UINT32_C(3313441943), UINT32_C( 128023524), UINT32_C(2817772943), UINT32_C( 210270545), UINT32_C(4088035463), UINT32_C(1842026420), UINT32_C(1677259569), UINT32_C(3329058607), UINT32_C(1754066051), UINT32_C(4151258471), UINT32_C(1268671226), UINT32_C(1666655963), UINT32_C(1398145439), UINT32_C(1254105624), UINT32_C(1152235797), UINT32_C(2752125472) }, { UINT32_C(3313441943), UINT32_C( 128023524), UINT32_C( 0), UINT32_C( 210270545), UINT32_C( 0), UINT32_C(1842026420), UINT32_C(1336704108), UINT32_C( 0), UINT32_C(1754066051), UINT32_C( 0), UINT32_C(1268671226), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C(1152235797), UINT32_C( 158284850) } }, { UINT16_C(38406), { UINT32_C(2076405260), UINT32_C( 64929125), UINT32_C(1596569864), UINT32_C(1935722524), UINT32_C(3700783388), UINT32_C(1888856771), UINT32_C(4169905902), UINT32_C(1720684890), UINT32_C(1692488447), UINT32_C( 409452304), UINT32_C(2507706745), UINT32_C(1963513945), UINT32_C( 340958545), UINT32_C( 897967943), UINT32_C(4146991261), UINT32_C(2707275169) }, { UINT32_C( 520437519), UINT32_C(2000186878), UINT32_C(1460515070), UINT32_C(3670873480), UINT32_C(4209909683), UINT32_C(2754638598), UINT32_C( 630939267), UINT32_C( 717682971), UINT32_C( 910871352), UINT32_C( 917406264), UINT32_C(3129916210), UINT32_C(2207538128), UINT32_C(2155774842), UINT32_C(2049224438), UINT32_C( 664780812), UINT32_C(4048643513) }, { UINT32_C( 0), UINT32_C( 64929125), UINT32_C(1460515070), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 409452304), UINT32_C(2507706745), UINT32_C( 0), UINT32_C( 340958545), UINT32_C( 0), UINT32_C( 0), UINT32_C(2707275169) } }, { UINT16_C(39728), { UINT32_C(3575474471), UINT32_C( 764432287), UINT32_C(2659737866), UINT32_C(1646330596), UINT32_C(2802849923), UINT32_C( 828841106), UINT32_C(2509643843), UINT32_C( 959497745), UINT32_C(3473821231), UINT32_C(2818351005), UINT32_C(3829826816), UINT32_C( 172451719), UINT32_C( 296900479), UINT32_C(3074562420), UINT32_C(1263327290), UINT32_C(3464789407) }, { UINT32_C(1755157451), UINT32_C( 839948850), UINT32_C(2786481695), UINT32_C(1035034045), UINT32_C( 693003189), UINT32_C(3068170620), UINT32_C( 134360425), UINT32_C(1977058986), UINT32_C(1272804377), UINT32_C( 763227406), UINT32_C( 30708803), UINT32_C(2789115377), UINT32_C(1691323624), UINT32_C(2266738717), UINT32_C(2274303453), UINT32_C(3187435171) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 693003189), UINT32_C( 828841106), UINT32_C( 0), UINT32_C( 0), UINT32_C(1272804377), UINT32_C( 763227406), UINT32_C( 0), UINT32_C( 172451719), UINT32_C( 296900479), UINT32_C( 0), UINT32_C( 0), UINT32_C(3187435171) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_maskz_min_epu32(test_vec[i].k, a, b); simde_test_x86_assert_equal_u32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_min_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t a[8]; const int64_t b[8]; const int64_t r[8]; } test_vec[] = { { { INT64_C( 8926487841177630399), -INT64_C( 198981365653089870), -INT64_C( 1142750507646648470), -INT64_C( 317195656128959639), -INT64_C( 3931785308317887090), INT64_C( 4705300458382083023), -INT64_C( 6125974934284591212), -INT64_C( 4229869377263821291) }, { INT64_C( 3832889178177792081), -INT64_C( 5619417979985557013), INT64_C( 5250275990352824560), -INT64_C( 8299037646502903848), -INT64_C( 6887931595367019315), -INT64_C( 5800242970600739152), -INT64_C( 3321196944853811346), INT64_C( 3766050967148003707) }, { INT64_C( 3832889178177792081), -INT64_C( 5619417979985557013), -INT64_C( 1142750507646648470), -INT64_C( 8299037646502903848), -INT64_C( 6887931595367019315), -INT64_C( 5800242970600739152), -INT64_C( 6125974934284591212), -INT64_C( 4229869377263821291) } }, { { INT64_C( 1050976293818480703), -INT64_C( 3806282354404755521), INT64_C( 5073116734339878557), INT64_C( 4138878397256058581), INT64_C( 7630437703371053175), -INT64_C( 4836279826553257542), INT64_C( 8589413019448134947), -INT64_C( 1787327138275053568) }, { -INT64_C( 2590237040503332613), INT64_C( 3260558896362943843), INT64_C( 5421012663790113059), INT64_C( 924494742396661374), -INT64_C( 7577660192562507305), -INT64_C( 4401609342217303629), -INT64_C( 3350745935121086999), INT64_C( 1283124824431838128) }, { -INT64_C( 2590237040503332613), -INT64_C( 3806282354404755521), INT64_C( 5073116734339878557), INT64_C( 924494742396661374), -INT64_C( 7577660192562507305), -INT64_C( 4836279826553257542), -INT64_C( 3350745935121086999), -INT64_C( 1787327138275053568) } }, { { -INT64_C( 1013906683223900380), INT64_C( 5047296433345988266), INT64_C( 343110664857490078), INT64_C( 4037922458203226557), -INT64_C( 6970307830657051628), INT64_C( 3109722953645421443), INT64_C( 6404083055616369286), INT64_C( 5741061732584957280) }, { INT64_C( 4396047299971726445), INT64_C( 7318427319765232344), -INT64_C( 1451250295857173272), -INT64_C( 986837498692948796), -INT64_C( 5962671533283020984), -INT64_C( 8321347385694200256), -INT64_C( 6285075766588685233), INT64_C( 8831546987744011544) }, { -INT64_C( 1013906683223900380), INT64_C( 5047296433345988266), -INT64_C( 1451250295857173272), -INT64_C( 986837498692948796), -INT64_C( 6970307830657051628), -INT64_C( 8321347385694200256), -INT64_C( 6285075766588685233), INT64_C( 5741061732584957280) } }, { { INT64_C( 8676110158968710116), INT64_C( 7731585570087336219), INT64_C( 6947940732284263648), -INT64_C( 1379073418233834703), INT64_C( 3467786099733453167), -INT64_C( 5472651092515833978), -INT64_C( 8124242631632333928), INT64_C( 4101599252628782583) }, { INT64_C( 2697092806972772647), INT64_C( 3548508411849563575), -INT64_C( 7992110764606245336), -INT64_C( 103727006372329330), INT64_C( 2725144442825305869), -INT64_C( 7651072137327765498), -INT64_C( 4408687360240459099), -INT64_C( 1509082058199506630) }, { INT64_C( 2697092806972772647), INT64_C( 3548508411849563575), -INT64_C( 7992110764606245336), -INT64_C( 1379073418233834703), INT64_C( 2725144442825305869), -INT64_C( 7651072137327765498), -INT64_C( 8124242631632333928), -INT64_C( 1509082058199506630) } }, { { -INT64_C( 527931665442977512), -INT64_C( 1151962406489465856), -INT64_C( 8412442278925230261), INT64_C( 2101679115640527714), INT64_C( 3088995634827805172), INT64_C( 3019834932107703725), INT64_C( 8834066958588057787), -INT64_C( 3285405759755897787) }, { INT64_C( 790828539241303206), -INT64_C( 1723775649920610036), -INT64_C( 1614948779877418237), -INT64_C( 2634153652428517184), -INT64_C( 3742873095679366489), INT64_C( 21051238396596533), -INT64_C( 8353416673669398652), -INT64_C( 8641390768869915133) }, { -INT64_C( 527931665442977512), -INT64_C( 1723775649920610036), -INT64_C( 8412442278925230261), -INT64_C( 2634153652428517184), -INT64_C( 3742873095679366489), INT64_C( 21051238396596533), -INT64_C( 8353416673669398652), -INT64_C( 8641390768869915133) } }, { { -INT64_C( 7542515202943828282), INT64_C( 6388713222282283692), INT64_C( 8996946829836928643), INT64_C( 7584845323688019673), INT64_C( 1549312393974173318), INT64_C( 4789973744992811597), INT64_C( 7431903165732223533), INT64_C( 2845541178263328882) }, { -INT64_C( 94417599201317582), INT64_C( 219155580128816649), INT64_C( 8757193430941735826), INT64_C( 4570039869208635557), INT64_C( 1524621353927998584), INT64_C( 8274211893155809273), -INT64_C( 1224388340765000318), INT64_C( 1372931147674456002) }, { -INT64_C( 7542515202943828282), INT64_C( 219155580128816649), INT64_C( 8757193430941735826), INT64_C( 4570039869208635557), INT64_C( 1524621353927998584), INT64_C( 4789973744992811597), -INT64_C( 1224388340765000318), INT64_C( 1372931147674456002) } }, { { INT64_C( 7799500575434663965), -INT64_C( 8935688111334352212), INT64_C( 7837686853406593420), -INT64_C( 5239013914309822050), -INT64_C( 7489453278118246352), -INT64_C( 1748202205642208200), INT64_C( 8560079382561802676), INT64_C( 9209292026337429115) }, { INT64_C( 218198956258274690), INT64_C( 198432500651666302), INT64_C( 8867918617604357571), INT64_C( 4323278318117961522), -INT64_C( 3181035208830213620), -INT64_C( 3229805441535174948), -INT64_C( 1412582467337023766), INT64_C( 6932003363654334014) }, { INT64_C( 218198956258274690), -INT64_C( 8935688111334352212), INT64_C( 7837686853406593420), -INT64_C( 5239013914309822050), -INT64_C( 7489453278118246352), -INT64_C( 3229805441535174948), -INT64_C( 1412582467337023766), INT64_C( 6932003363654334014) } }, { { INT64_C( 7761410313214745998), -INT64_C( 5040720063136112088), INT64_C( 3961208308217706834), INT64_C( 7040360772965132031), INT64_C( 2682451021070134079), -INT64_C( 1952758411399671972), -INT64_C( 7921855298783835423), -INT64_C( 2340858468243259824) }, { INT64_C( 2208987861044021125), INT64_C( 5560872881523131573), INT64_C( 6331837906581593530), -INT64_C( 2020993227263654797), INT64_C( 4369631314671149253), -INT64_C( 2825532546702053872), -INT64_C( 7481318849734381618), INT64_C( 7641094149959821257) }, { INT64_C( 2208987861044021125), -INT64_C( 5040720063136112088), INT64_C( 3961208308217706834), -INT64_C( 2020993227263654797), INT64_C( 2682451021070134079), -INT64_C( 2825532546702053872), -INT64_C( 7921855298783835423), -INT64_C( 2340858468243259824) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_min_epi64(a, b); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_min_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t src[8]; const simde__mmask8 k; const int64_t a[8]; const int64_t b[8]; const int64_t r[8]; } test_vec[] = { { { -INT64_C( 832893060298135842), -INT64_C( 5562894295615803047), -INT64_C( 1796689962277743011), -INT64_C( 8401131206823195650), -INT64_C( 4109912428081810219), -INT64_C( 5014718200183997287), INT64_C( 3666331645962441979), -INT64_C( 4580075145466234056) }, UINT8_C(193), { INT64_C( 8496871624244314792), INT64_C( 6659570441877889100), INT64_C( 256637315434178281), -INT64_C( 8235765515890559834), INT64_C( 2422877041933082993), -INT64_C( 1743458215748603543), -INT64_C( 4130420842561708445), -INT64_C( 4921469802541114559) }, { -INT64_C( 4303888341886725553), INT64_C( 3952193257319290613), INT64_C( 2614215216499595104), -INT64_C( 4814349726284906198), -INT64_C( 8237322755203066736), INT64_C( 5118158349405250813), -INT64_C( 6883735029549569936), INT64_C( 5781570515681722896) }, { -INT64_C( 4303888341886725553), -INT64_C( 5562894295615803047), -INT64_C( 1796689962277743011), -INT64_C( 8401131206823195650), -INT64_C( 4109912428081810219), -INT64_C( 5014718200183997287), -INT64_C( 6883735029549569936), -INT64_C( 4921469802541114559) } }, { { INT64_C( 4718904091369741911), -INT64_C( 4180438498205827890), -INT64_C( 2010697995710315519), -INT64_C( 4078960058780406701), INT64_C( 6359983766621364157), INT64_C( 8018950981776397615), INT64_C( 5009691578083009229), INT64_C( 5000048401435977771) }, UINT8_C( 92), { -INT64_C( 2270588773879888102), -INT64_C( 5970719450183368372), INT64_C( 3589339532494881957), -INT64_C( 1704613768996492482), -INT64_C( 3913713297744642491), INT64_C( 7038074796686442164), INT64_C( 7963160395127278343), INT64_C( 9148430668855327551) }, { -INT64_C( 7978692037101868033), -INT64_C( 5829843022299240965), INT64_C( 3541944710442524040), INT64_C( 5797879096948003006), -INT64_C( 4769854329868991393), INT64_C( 2580155333917106039), INT64_C( 1210909003179937707), -INT64_C( 6830413334550409837) }, { INT64_C( 4718904091369741911), -INT64_C( 4180438498205827890), INT64_C( 3541944710442524040), -INT64_C( 1704613768996492482), -INT64_C( 4769854329868991393), INT64_C( 8018950981776397615), INT64_C( 1210909003179937707), INT64_C( 5000048401435977771) } }, { { -INT64_C( 8375660580469603773), INT64_C( 8127722742837032972), INT64_C( 4443606607624775495), INT64_C( 8488537840863589097), INT64_C( 7275914123013696346), INT64_C( 7182070542727160693), INT64_C( 9107868980994685310), INT64_C( 4251248379543849049) }, UINT8_C( 34), { INT64_C( 4912828654192142458), -INT64_C( 3728767101341570837), INT64_C( 6250680509905594559), -INT64_C( 4457595751606367862), INT64_C( 7637444705809767960), -INT64_C( 2382566796882584561), -INT64_C( 4973941905533218603), -INT64_C( 5235751640300310709) }, { -INT64_C( 8762477137892421507), -INT64_C( 3258750163570093623), INT64_C( 6328522899614665448), -INT64_C( 1444740217538179427), -INT64_C( 7346132058166107376), INT64_C( 3867546778731722460), INT64_C( 3365914675463987795), -INT64_C( 1078507789801054033) }, { -INT64_C( 8375660580469603773), -INT64_C( 3728767101341570837), INT64_C( 4443606607624775495), INT64_C( 8488537840863589097), INT64_C( 7275914123013696346), -INT64_C( 2382566796882584561), INT64_C( 9107868980994685310), INT64_C( 4251248379543849049) } }, { { INT64_C( 1597799553933373121), INT64_C( 5386197365871914556), -INT64_C( 2414669172691104321), INT64_C( 563516506348709888), INT64_C( 1027601171459165169), -INT64_C( 8163239626675834252), -INT64_C( 4651554586725818523), INT64_C( 7030526320972950851) }, UINT8_C(190), { INT64_C( 100379707640417578), -INT64_C( 1876770117458473161), INT64_C( 7483966608753592381), INT64_C( 2392319562060621315), -INT64_C( 7379581873131764794), INT64_C( 337520141491791685), -INT64_C( 5113983927075384411), -INT64_C( 2452102142569226528) }, { -INT64_C( 4314156524689552858), INT64_C( 737533043426675056), INT64_C( 3675695217147304338), -INT64_C( 5078930547069537905), INT64_C( 8093357759854147341), -INT64_C( 5756756880866615125), INT64_C( 451762347170186685), INT64_C( 6879584137258028844) }, { INT64_C( 1597799553933373121), -INT64_C( 1876770117458473161), INT64_C( 3675695217147304338), -INT64_C( 5078930547069537905), -INT64_C( 7379581873131764794), -INT64_C( 5756756880866615125), -INT64_C( 4651554586725818523), -INT64_C( 2452102142569226528) } }, { { -INT64_C( 3992093498748978648), -INT64_C( 2095311661344145124), INT64_C( 808177189403223226), INT64_C( 4483408289686348935), INT64_C( 8999598743634715646), INT64_C( 7874723935063358784), -INT64_C( 400022725246174329), -INT64_C( 9219981684985610823) }, UINT8_C( 47), { INT64_C( 4445909899155577715), -INT64_C( 7138948265198806845), INT64_C( 326522826273019130), -INT64_C( 3019656523962492963), INT64_C( 8457474992241223585), -INT64_C( 6624291842926359276), INT64_C( 642321683028647965), INT64_C( 1624965441752493907) }, { -INT64_C( 1740476525155199312), -INT64_C( 7335961936294792221), -INT64_C( 738476390311090803), INT64_C( 7665754087545942486), INT64_C( 1354075163570510096), INT64_C( 2337034589997835864), INT64_C( 3102303205289684342), INT64_C( 319149691154905673) }, { -INT64_C( 1740476525155199312), -INT64_C( 7335961936294792221), -INT64_C( 738476390311090803), -INT64_C( 3019656523962492963), INT64_C( 8999598743634715646), -INT64_C( 6624291842926359276), -INT64_C( 400022725246174329), -INT64_C( 9219981684985610823) } }, { { INT64_C( 3941689668636457586), -INT64_C( 7454731569466748201), INT64_C( 5036923225950413670), -INT64_C( 6381400331417723784), -INT64_C( 2232102020741310224), -INT64_C( 1150433814732140467), -INT64_C( 1262255333637900230), INT64_C( 7767244237088408814) }, UINT8_C( 35), { INT64_C( 5789954839688264210), -INT64_C( 8473017124609406631), -INT64_C( 2308394859603386506), -INT64_C( 776933732096796343), -INT64_C( 3850194564950105088), -INT64_C( 5944360945412576475), -INT64_C( 4972629915507261181), INT64_C( 2205644646238804164) }, { INT64_C( 2280775221522701930), INT64_C( 450244529351991387), INT64_C( 7152598701790524441), -INT64_C( 9027845778457357702), INT64_C( 7015767115569108292), INT64_C( 2642823581980128419), INT64_C( 7937860376831382054), INT64_C( 2062011840757903875) }, { INT64_C( 2280775221522701930), -INT64_C( 8473017124609406631), INT64_C( 5036923225950413670), -INT64_C( 6381400331417723784), -INT64_C( 2232102020741310224), -INT64_C( 5944360945412576475), -INT64_C( 1262255333637900230), INT64_C( 7767244237088408814) } }, { { -INT64_C( 6757448935706788910), -INT64_C( 8981314151640388821), -INT64_C( 132169012556068408), -INT64_C( 4641320135375048542), -INT64_C( 3219000985198858062), INT64_C( 6618996856079424762), INT64_C( 3992795749843149935), -INT64_C( 1360570621868539022) }, UINT8_C( 56), { -INT64_C( 8250958517886738763), -INT64_C( 2984625747510962498), -INT64_C( 1565386353147183470), INT64_C( 8778380734301054547), INT64_C( 2751273211206153392), INT64_C( 6546791058494934354), INT64_C( 4120640716419253725), INT64_C( 1610222026580317210) }, { INT64_C( 2660191223321941596), -INT64_C( 984877554862574285), INT64_C( 1326423302095733576), INT64_C( 4699587459626909115), INT64_C( 618945698813308831), INT64_C( 804834549619212069), -INT64_C( 8870786378313866533), -INT64_C( 4754150703106763297) }, { -INT64_C( 6757448935706788910), -INT64_C( 8981314151640388821), -INT64_C( 132169012556068408), INT64_C( 4699587459626909115), INT64_C( 618945698813308831), INT64_C( 804834549619212069), INT64_C( 3992795749843149935), -INT64_C( 1360570621868539022) } }, { { INT64_C( 2281854940981758669), -INT64_C( 2282616902916723323), INT64_C( 1023327771410205926), -INT64_C( 8986131827922839188), -INT64_C( 4180238476611585309), -INT64_C( 4656335156241738628), INT64_C( 1669043992684008683), -INT64_C( 5208995898362430341) }, UINT8_C(103), { -INT64_C( 1934739621314849574), INT64_C( 2144694229274568824), INT64_C( 146613306642718187), -INT64_C( 3902493397346731645), INT64_C( 7104597645489045027), -INT64_C( 4798306323316189631), -INT64_C( 7718235099635240486), INT64_C( 7801320088162056844) }, { -INT64_C( 7951746912189696423), -INT64_C( 5215654503480762610), -INT64_C( 4099547540182472586), INT64_C( 6047074235598315318), -INT64_C( 3450536032744371653), -INT64_C( 5427313369072666341), -INT64_C( 560321285490784713), INT64_C( 4306111947729238901) }, { -INT64_C( 7951746912189696423), -INT64_C( 5215654503480762610), -INT64_C( 4099547540182472586), -INT64_C( 8986131827922839188), -INT64_C( 4180238476611585309), -INT64_C( 5427313369072666341), -INT64_C( 7718235099635240486), -INT64_C( 5208995898362430341) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi64(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_mask_min_epi64(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_min_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const int64_t a[8]; const int64_t b[8]; const int64_t r[8]; } test_vec[] = { { UINT8_C(108), { -INT64_C( 2472476313199327400), -INT64_C( 7262106806340770128), INT64_C( 6834306869630970124), INT64_C( 7105441106113742796), INT64_C( 8800778905699404384), INT64_C( 1890579520473873996), -INT64_C( 4856193287171761574), -INT64_C( 934068582790530660) }, { -INT64_C( 1039388523364876465), INT64_C( 1711595172339518615), INT64_C( 3392599831143005544), -INT64_C( 3211666298667401496), -INT64_C( 7605313139610180130), INT64_C( 4422812822769763980), -INT64_C( 8359901762306067398), -INT64_C( 8214719999122222681) }, { INT64_C( 0), INT64_C( 0), INT64_C( 3392599831143005544), -INT64_C( 3211666298667401496), INT64_C( 0), INT64_C( 1890579520473873996), -INT64_C( 8359901762306067398), INT64_C( 0) } }, { UINT8_C( 24), { -INT64_C( 8433082092495695637), INT64_C( 6061735838320902376), -INT64_C( 5151692566412399823), -INT64_C( 7560710701117362809), -INT64_C( 4749897011430283003), INT64_C( 4624565765711033145), INT64_C( 3336213663237940397), -INT64_C( 5869250613222001591) }, { -INT64_C( 4397961263633100956), INT64_C( 725300940480682688), INT64_C( 1207786892513127405), -INT64_C( 8505451256934438241), INT64_C( 4353778509370568480), -INT64_C( 4775190202030487979), -INT64_C( 2008865721453290936), -INT64_C( 2977967092224219907) }, { INT64_C( 0), INT64_C( 0), INT64_C( 0), -INT64_C( 8505451256934438241), -INT64_C( 4749897011430283003), INT64_C( 0), INT64_C( 0), INT64_C( 0) } }, { UINT8_C( 81), { -INT64_C( 1117162181707858065), INT64_C( 5078791458224353302), -INT64_C( 3612327075680384306), INT64_C( 2093990958789231003), -INT64_C( 165851541855019816), -INT64_C( 4816128180187270409), -INT64_C( 4706968442486475722), INT64_C( 7010568091717021822) }, { INT64_C( 5713614752012538215), INT64_C( 6770417838826093384), -INT64_C( 8074695672785431537), INT64_C( 6703052751799872283), INT64_C( 5205867603656583831), -INT64_C( 2669378062331645840), INT64_C( 4083353214102811146), INT64_C( 341117350895773288) }, { -INT64_C( 1117162181707858065), INT64_C( 0), INT64_C( 0), INT64_C( 0), -INT64_C( 165851541855019816), INT64_C( 0), -INT64_C( 4706968442486475722), INT64_C( 0) } }, { UINT8_C( 14), { -INT64_C( 6716461120010390683), -INT64_C( 8622343426074058281), -INT64_C( 8954883270350900651), INT64_C( 4084188523046836155), -INT64_C( 1676644108240503833), -INT64_C( 4063218342201841218), -INT64_C( 2693484496080584194), INT64_C( 7562712012916624873) }, { -INT64_C( 6711703869941774623), -INT64_C( 7232970539122945946), INT64_C( 8326404236480264084), INT64_C( 993926816314885858), -INT64_C( 1283565989249659735), -INT64_C( 3664692903285430805), INT64_C( 1232031570882255389), INT64_C( 1535234252872108052) }, { INT64_C( 0), -INT64_C( 8622343426074058281), -INT64_C( 8954883270350900651), INT64_C( 993926816314885858), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0) } }, { UINT8_C(139), { -INT64_C( 5326347886935170482), -INT64_C( 7184441242637328952), INT64_C( 6007762368993161532), INT64_C( 4403065706787086416), -INT64_C( 505112140204069514), -INT64_C( 1912216021743768551), INT64_C( 7908758677676310882), -INT64_C( 3865752779607998418) }, { INT64_C( 6155028968523284548), INT64_C( 7316280124921319931), INT64_C( 8462988578737273063), -INT64_C( 3949115340514758893), INT64_C( 5493732190204019372), -INT64_C( 386896068955116230), INT64_C( 2011827034937848880), INT64_C( 1598976232500919777) }, { -INT64_C( 5326347886935170482), -INT64_C( 7184441242637328952), INT64_C( 0), -INT64_C( 3949115340514758893), INT64_C( 0), INT64_C( 0), INT64_C( 0), -INT64_C( 3865752779607998418) } }, { UINT8_C(246), { -INT64_C( 6246612271875228051), -INT64_C( 6833167700991374205), INT64_C( 723325325721313718), INT64_C( 932679017548771142), INT64_C( 5723355825735612552), -INT64_C( 7580554467111982595), -INT64_C( 9174176340532890346), INT64_C( 3810726624785736246) }, { INT64_C( 8340497693800989860), -INT64_C( 7780413115436496496), -INT64_C( 4496027792861248288), INT64_C( 2659510273166480392), -INT64_C( 2852938791133060738), INT64_C( 5939771209448687196), -INT64_C( 7755754307279084014), -INT64_C( 1243502087413107174) }, { INT64_C( 0), -INT64_C( 7780413115436496496), -INT64_C( 4496027792861248288), INT64_C( 0), -INT64_C( 2852938791133060738), -INT64_C( 7580554467111982595), -INT64_C( 9174176340532890346), -INT64_C( 1243502087413107174) } }, { UINT8_C( 18), { INT64_C( 5837001892527006546), -INT64_C( 1091536548743881862), -INT64_C( 3701657805628016767), INT64_C( 5592673567830195290), -INT64_C( 6509121848692508659), INT64_C( 219067255490440655), -INT64_C( 981455309446150209), -INT64_C( 3049187875246727833) }, { INT64_C( 7341668478055522579), INT64_C( 6326018816358633541), -INT64_C( 1758006394332818417), INT64_C( 5082866555416025324), -INT64_C( 3128763075952134247), -INT64_C( 6150397998391422282), -INT64_C( 3665668545094446407), -INT64_C( 396868304485914363) }, { INT64_C( 0), -INT64_C( 1091536548743881862), INT64_C( 0), INT64_C( 0), -INT64_C( 6509121848692508659), INT64_C( 0), INT64_C( 0), INT64_C( 0) } }, { UINT8_C(194), { -INT64_C( 3450559144571885311), -INT64_C( 6638282687324132415), -INT64_C( 6747991784971560600), INT64_C( 1270012968322699986), -INT64_C( 2831191765649067993), -INT64_C( 7082227944931778558), INT64_C( 6905509768676391929), -INT64_C( 143873471909406498) }, { INT64_C( 1277500638311446987), -INT64_C( 7959026466587100744), -INT64_C( 6340617704767718122), -INT64_C( 1997743254139355791), INT64_C( 8732902719398226192), INT64_C( 5072162797323179321), INT64_C( 3594412088944568175), INT64_C( 2244128586564466728) }, { INT64_C( 0), -INT64_C( 7959026466587100744), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 3594412088944568175), -INT64_C( 143873471909406498) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_maskz_min_epi64(test_vec[i].k, a, b); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_min_epu64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const uint64_t a[8]; const uint64_t b[8]; const uint64_t r[8]; } test_vec[] = { { { UINT64_C( 9044969195737560887), UINT64_C(15694047717360971943), UINT64_C( 3921003438290561397), UINT64_C( 1509185112255430061), UINT64_C( 1186214072345967210), UINT64_C(13118825461627079671), UINT64_C( 6081951486148113239), UINT64_C(14942198529738257003) }, { UINT64_C( 6633026429662010772), UINT64_C( 8358859155088518921), UINT64_C(15929527043916891157), UINT64_C( 7562250571942820542), UINT64_C( 171263199686277792), UINT64_C(12299977736181314337), UINT64_C( 2494845092740159312), UINT64_C(16658313952610403826) }, { UINT64_C( 6633026429662010772), UINT64_C( 8358859155088518921), UINT64_C( 3921003438290561397), UINT64_C( 1509185112255430061), UINT64_C( 171263199686277792), UINT64_C(12299977736181314337), UINT64_C( 2494845092740159312), UINT64_C(14942198529738257003) } }, { { UINT64_C( 2877320800269800435), UINT64_C(15600214605897743167), UINT64_C( 5833914546256537199), UINT64_C( 7434955301018025215), UINT64_C(11962478044776700631), UINT64_C(17356021396500277629), UINT64_C(12722029792799782879), UINT64_C(13750093188666139638) }, { UINT64_C(16450416231898096958), UINT64_C(11419375756601050375), UINT64_C(17121957871900966204), UINT64_C(17848655622549509133), UINT64_C(17625936570506563151), UINT64_C( 9658802046534190252), UINT64_C( 6635510132672554546), UINT64_C(16596895574919311013) }, { UINT64_C( 2877320800269800435), UINT64_C(11419375756601050375), UINT64_C( 5833914546256537199), UINT64_C( 7434955301018025215), UINT64_C(11962478044776700631), UINT64_C( 9658802046534190252), UINT64_C( 6635510132672554546), UINT64_C(13750093188666139638) } }, { { UINT64_C( 9525738658961682514), UINT64_C(15531782222786834364), UINT64_C(13081574900903247839), UINT64_C( 3603838339067862149), UINT64_C(10715088087348781143), UINT64_C( 8921109267966698066), UINT64_C( 7703748995652963876), UINT64_C(13413279222790093586) }, { UINT64_C(10130456184787408426), UINT64_C( 8671207557433601854), UINT64_C( 5291893621416263712), UINT64_C(12370650962216155025), UINT64_C( 8196062254738544376), UINT64_C(12916219912397734514), UINT64_C( 1306371004577737890), UINT64_C(13208129442792496416) }, { UINT64_C( 9525738658961682514), UINT64_C( 8671207557433601854), UINT64_C( 5291893621416263712), UINT64_C( 3603838339067862149), UINT64_C( 8196062254738544376), UINT64_C( 8921109267966698066), UINT64_C( 1306371004577737890), UINT64_C(13208129442792496416) } }, { { UINT64_C(12604914810315471178), UINT64_C( 8948852409597508073), UINT64_C( 3617927695919606177), UINT64_C( 4056089523943324628), UINT64_C(14124311914971738904), UINT64_C(14858062561793898715), UINT64_C(15177173618446665563), UINT64_C(12360322224545428321) }, { UINT64_C(13510803141578210786), UINT64_C(10265853317895799163), UINT64_C(17292586229111154731), UINT64_C(17387772606307852303), UINT64_C(14744273155009629444), UINT64_C(16046585726442475503), UINT64_C( 6256346815122615381), UINT64_C(15952390729833648738) }, { UINT64_C(12604914810315471178), UINT64_C( 8948852409597508073), UINT64_C( 3617927695919606177), UINT64_C( 4056089523943324628), UINT64_C(14124311914971738904), UINT64_C(14858062561793898715), UINT64_C( 6256346815122615381), UINT64_C(12360322224545428321) } }, { { UINT64_C( 3292236047043268499), UINT64_C( 7421650784503145838), UINT64_C( 7658966040575608492), UINT64_C( 2082138057202156079), UINT64_C( 8338326768641418573), UINT64_C(12920145667649963989), UINT64_C(16360997679937263514), UINT64_C( 8347369299134638387) }, { UINT64_C( 6696207814458529653), UINT64_C(12715509021071868348), UINT64_C( 4665271876632911143), UINT64_C(17213834719280277833), UINT64_C( 6021125334691799467), UINT64_C( 9915955935570285271), UINT64_C(11094655412113567658), UINT64_C( 8300958507587990731) }, { UINT64_C( 3292236047043268499), UINT64_C( 7421650784503145838), UINT64_C( 4665271876632911143), UINT64_C( 2082138057202156079), UINT64_C( 6021125334691799467), UINT64_C( 9915955935570285271), UINT64_C(11094655412113567658), UINT64_C( 8300958507587990731) } }, { { UINT64_C(14391592291341278158), UINT64_C( 644384583809552390), UINT64_C( 4767544504217523520), UINT64_C(17215213124685542317), UINT64_C(14414911635327323476), UINT64_C( 5783222324588298461), UINT64_C( 7508075331079635576), UINT64_C(14216673739421890621) }, { UINT64_C( 9934121281375608658), UINT64_C(12121163729190784726), UINT64_C( 6786020984921528073), UINT64_C( 2357924465355721090), UINT64_C(12672024176126968742), UINT64_C(12752449938371551264), UINT64_C(15953052092863910372), UINT64_C(14814462500888715433) }, { UINT64_C( 9934121281375608658), UINT64_C( 644384583809552390), UINT64_C( 4767544504217523520), UINT64_C( 2357924465355721090), UINT64_C(12672024176126968742), UINT64_C( 5783222324588298461), UINT64_C( 7508075331079635576), UINT64_C(14216673739421890621) } }, { { UINT64_C(18363425136224291918), UINT64_C( 2302544724584525213), UINT64_C(12759129887644936409), UINT64_C(13591512307622011817), UINT64_C( 6442888255085264524), UINT64_C(15673531658565171241), UINT64_C(15831312885479221498), UINT64_C( 2966827195079318786) }, { UINT64_C(17734329592381182216), UINT64_C( 5084807300864719542), UINT64_C(13804375898621320837), UINT64_C(10349654056069184987), UINT64_C(16875110073847920236), UINT64_C(17130341031865322025), UINT64_C(16018237363539150288), UINT64_C( 4866974850039053172) }, { UINT64_C(17734329592381182216), UINT64_C( 2302544724584525213), UINT64_C(12759129887644936409), UINT64_C(10349654056069184987), UINT64_C( 6442888255085264524), UINT64_C(15673531658565171241), UINT64_C(15831312885479221498), UINT64_C( 2966827195079318786) } }, { { UINT64_C( 8380262446387747885), UINT64_C(17261353027049745719), UINT64_C(15819958463686783402), UINT64_C( 458629218341151043), UINT64_C(13690295832671672637), UINT64_C( 3868847766836668065), UINT64_C( 4061241865194843161), UINT64_C(15000838980395742030) }, { UINT64_C(16846935173581345929), UINT64_C( 3097730047321647164), UINT64_C(11291376720116703366), UINT64_C( 4512308540320450106), UINT64_C( 9994922769949521796), UINT64_C( 4502561380537360193), UINT64_C( 7573204294845409071), UINT64_C( 5793834518460226675) }, { UINT64_C( 8380262446387747885), UINT64_C( 3097730047321647164), UINT64_C(11291376720116703366), UINT64_C( 458629218341151043), UINT64_C( 9994922769949521796), UINT64_C( 3868847766836668065), UINT64_C( 4061241865194843161), UINT64_C( 5793834518460226675) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_min_epu64(a, b); simde_test_x86_assert_equal_u64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_min_epu64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const uint64_t src[8]; const simde__mmask8 k; const uint64_t a[8]; const uint64_t b[8]; const uint64_t r[8]; } test_vec[] = { { { UINT64_C(13686068365161150177), UINT64_C(17532401932033290988), UINT64_C( 5071801821781528843), UINT64_C(16403007056865297244), UINT64_C( 7120014496599492455), UINT64_C(12424961470197204689), UINT64_C(16793558581555422693), UINT64_C( 6061130335265078328) }, UINT8_C( 60), { UINT64_C( 4233125491838707086), UINT64_C(10328029007246474010), UINT64_C(16700761572999175385), UINT64_C(12360454758603227095), UINT64_C( 7709134865548524429), UINT64_C( 4020638450122872724), UINT64_C(14292889128457857353), UINT64_C(16452384284722015882) }, { UINT64_C(15677771670748589271), UINT64_C(16217712503761038202), UINT64_C(16825922508532602316), UINT64_C( 583465947924305118), UINT64_C(15847536385992998607), UINT64_C(12873754930782077742), UINT64_C(10007543373175324056), UINT64_C( 2647227444091646595) }, { UINT64_C(13686068365161150177), UINT64_C(17532401932033290988), UINT64_C(16700761572999175385), UINT64_C( 583465947924305118), UINT64_C( 7709134865548524429), UINT64_C( 4020638450122872724), UINT64_C(16793558581555422693), UINT64_C( 6061130335265078328) } }, { { UINT64_C(15911821866855623830), UINT64_C(16267366106206012712), UINT64_C(11637225703779938120), UINT64_C(12368592210698255917), UINT64_C( 3136884699593629437), UINT64_C(13701353551232117754), UINT64_C(16639167544660590123), UINT64_C( 5706357946148470896) }, UINT8_C(236), { UINT64_C(12628685456267017171), UINT64_C(12068772517308106069), UINT64_C(17237886769549067671), UINT64_C(14098677992813346478), UINT64_C( 557687355998967691), UINT64_C( 6783499416983102418), UINT64_C( 8662780305157358489), UINT64_C(10723595218708691230) }, { UINT64_C(18099509682323512437), UINT64_C(17539437360796042819), UINT64_C( 963616319494393629), UINT64_C(16684126273053365484), UINT64_C(14982416636392123592), UINT64_C( 1805734526670431170), UINT64_C(12307318254061243088), UINT64_C(11334761829393033460) }, { UINT64_C(15911821866855623830), UINT64_C(16267366106206012712), UINT64_C( 963616319494393629), UINT64_C(14098677992813346478), UINT64_C( 3136884699593629437), UINT64_C( 1805734526670431170), UINT64_C( 8662780305157358489), UINT64_C(10723595218708691230) } }, { { UINT64_C( 6462801357531142997), UINT64_C(11165695382941698526), UINT64_C( 5275727633078416591), UINT64_C( 6232978593844025102), UINT64_C( 7442735538344824408), UINT64_C( 1357612375565516658), UINT64_C( 7262009690649735210), UINT64_C(14701656238056765358) }, UINT8_C(212), { UINT64_C(14238345752624348222), UINT64_C(15881130087530681606), UINT64_C( 2468139358930330810), UINT64_C( 3709105674812583902), UINT64_C(11667404532817254175), UINT64_C(14434109435563340743), UINT64_C(18011190249750430502), UINT64_C( 986877581458312073) }, { UINT64_C(14279540026880756974), UINT64_C( 8207680901140318224), UINT64_C( 7214829116955246861), UINT64_C(14176934525860560579), UINT64_C( 2263193020303077794), UINT64_C( 4125056658046886530), UINT64_C( 9053785262858994278), UINT64_C(13968737911998155071) }, { UINT64_C( 6462801357531142997), UINT64_C(11165695382941698526), UINT64_C( 2468139358930330810), UINT64_C( 6232978593844025102), UINT64_C( 2263193020303077794), UINT64_C( 1357612375565516658), UINT64_C( 9053785262858994278), UINT64_C( 986877581458312073) } }, { { UINT64_C( 2670014054728411358), UINT64_C(15479489228914707309), UINT64_C(17576630924552385204), UINT64_C(16584191199209331269), UINT64_C(13966472048727525643), UINT64_C(12281647096496047403), UINT64_C( 9945361146332153950), UINT64_C(18090139399423462687) }, UINT8_C(158), { UINT64_C( 4678311457047357664), UINT64_C( 9410238406936922125), UINT64_C(13555361740489206266), UINT64_C( 3743519687341524288), UINT64_C(11892564712869913935), UINT64_C( 2081918817811710312), UINT64_C( 4434420541052136223), UINT64_C( 3171808324837586559) }, { UINT64_C( 4731314915132307563), UINT64_C( 2125848550798755906), UINT64_C( 6083366755163151154), UINT64_C(17344816288595574443), UINT64_C(17311165913264073762), UINT64_C(11115399998573174821), UINT64_C( 5591559383428967536), UINT64_C( 840075166449319732) }, { UINT64_C( 2670014054728411358), UINT64_C( 2125848550798755906), UINT64_C( 6083366755163151154), UINT64_C( 3743519687341524288), UINT64_C(11892564712869913935), UINT64_C(12281647096496047403), UINT64_C( 9945361146332153950), UINT64_C( 840075166449319732) } }, { { UINT64_C( 6905219148390308919), UINT64_C( 7937529465821931812), UINT64_C( 6548318686262128880), UINT64_C( 8647398651486975500), UINT64_C( 8138340206561200215), UINT64_C( 2938075631335601242), UINT64_C(15318039516875029012), UINT64_C(13333693271013762897) }, UINT8_C(211), { UINT64_C(12750129510458805094), UINT64_C( 9999416211519588748), UINT64_C( 4302951487301156811), UINT64_C(13443058330370918897), UINT64_C(14595395900362829473), UINT64_C( 5847712488547317132), UINT64_C(10452414521711032639), UINT64_C(11801361770630458297) }, { UINT64_C(16686415356729759882), UINT64_C(10041808137218165891), UINT64_C( 4275724205147689764), UINT64_C( 845285298388116503), UINT64_C(14801473021009935398), UINT64_C(13567149509055081841), UINT64_C( 2955443355653350981), UINT64_C( 9225620924617870204) }, { UINT64_C(12750129510458805094), UINT64_C( 9999416211519588748), UINT64_C( 6548318686262128880), UINT64_C( 8647398651486975500), UINT64_C(14595395900362829473), UINT64_C( 2938075631335601242), UINT64_C( 2955443355653350981), UINT64_C( 9225620924617870204) } }, { { UINT64_C(12777110438840228409), UINT64_C(11505772749216688215), UINT64_C( 9338610774410931549), UINT64_C( 1337153306673208244), UINT64_C( 5859438178814300000), UINT64_C(15206665234644320015), UINT64_C(10133624556884291098), UINT64_C( 406494557947699128) }, UINT8_C(184), { UINT64_C(12983882077076366394), UINT64_C( 1102534877555366212), UINT64_C( 3302954976424717377), UINT64_C( 5111540549564917774), UINT64_C(16799719242515063163), UINT64_C(10223283634242735664), UINT64_C( 5336063231971281106), UINT64_C( 3547553062589685737) }, { UINT64_C( 7611019846458974737), UINT64_C(17504958627519838485), UINT64_C( 318030552002370019), UINT64_C( 7901061263188945854), UINT64_C( 2556569104159033559), UINT64_C(12938405019769943419), UINT64_C(18314515981370810379), UINT64_C( 3526064901190787878) }, { UINT64_C(12777110438840228409), UINT64_C(11505772749216688215), UINT64_C( 9338610774410931549), UINT64_C( 5111540549564917774), UINT64_C( 2556569104159033559), UINT64_C(10223283634242735664), UINT64_C(10133624556884291098), UINT64_C( 3526064901190787878) } }, { { UINT64_C( 5734133271646769761), UINT64_C(10805113565693548400), UINT64_C( 2817253359809709529), UINT64_C( 401269673593244425), UINT64_C(18360621039424426666), UINT64_C( 810140176142547231), UINT64_C(16726437048221039352), UINT64_C( 968796610631486152) }, UINT8_C(174), { UINT64_C( 1300478550309899237), UINT64_C(17123900690666927481), UINT64_C( 9762314968632237429), UINT64_C( 4362289670808856319), UINT64_C( 8786019296073074050), UINT64_C(17927644309883270122), UINT64_C( 5596606879962404966), UINT64_C( 1053193104358151434) }, { UINT64_C(16712816420545283958), UINT64_C( 4866017203671340044), UINT64_C( 6900659587811520809), UINT64_C( 1531608282320399782), UINT64_C( 1155307462581758756), UINT64_C( 9704736429709446542), UINT64_C(14703058879847928919), UINT64_C( 5576089834784541615) }, { UINT64_C( 5734133271646769761), UINT64_C( 4866017203671340044), UINT64_C( 6900659587811520809), UINT64_C( 1531608282320399782), UINT64_C(18360621039424426666), UINT64_C( 9704736429709446542), UINT64_C(16726437048221039352), UINT64_C( 1053193104358151434) } }, { { UINT64_C(17690269522951335275), UINT64_C(11396093078602260547), UINT64_C(10814645631567144227), UINT64_C(13877919579589776417), UINT64_C(13875784505449514547), UINT64_C( 2501056896776139216), UINT64_C( 8587099319091068846), UINT64_C( 9847781756449656469) }, UINT8_C( 96), { UINT64_C(11843233039555306458), UINT64_C( 2753606095282139903), UINT64_C( 308764815373683506), UINT64_C(13145692697590837940), UINT64_C(16671372443939588868), UINT64_C(17896079815005372430), UINT64_C( 9181701415467997040), UINT64_C(12230082949249090598) }, { UINT64_C(11823845224917039686), UINT64_C(11203606959026177468), UINT64_C( 4708070426801219340), UINT64_C( 2630100940438692657), UINT64_C( 6355538520778661880), UINT64_C( 5671253772179541486), UINT64_C( 6114334836932327038), UINT64_C( 1540417015589248862) }, { UINT64_C(17690269522951335275), UINT64_C(11396093078602260547), UINT64_C(10814645631567144227), UINT64_C(13877919579589776417), UINT64_C(13875784505449514547), UINT64_C( 5671253772179541486), UINT64_C( 6114334836932327038), UINT64_C( 9847781756449656469) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi64(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_mask_min_epu64(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_u64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_min_epu64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const uint64_t a[8]; const uint64_t b[8]; const uint64_t r[8]; } test_vec[] = { { UINT8_C(137), { UINT64_C(16503860676597753011), UINT64_C(10173198177510302297), UINT64_C(13867996816985933723), UINT64_C(14223197835479593264), UINT64_C( 6797315046681642903), UINT64_C(10119936847272377100), UINT64_C( 6297465112980759043), UINT64_C(14817004633568920265) }, { UINT64_C( 8610479422572363803), UINT64_C(14666477725613198348), UINT64_C( 895323810527390957), UINT64_C( 5483562667113684247), UINT64_C(17575837300568118792), UINT64_C(18233238772285260918), UINT64_C( 7275141121168275622), UINT64_C( 7897565958014819868) }, { UINT64_C( 8610479422572363803), UINT64_C( 0), UINT64_C( 0), UINT64_C( 5483562667113684247), UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C( 7897565958014819868) } }, { UINT8_C( 32), { UINT64_C( 965453440346480645), UINT64_C( 2597480412835849195), UINT64_C(14683658851831641644), UINT64_C(12785893805021729670), UINT64_C( 6725287602638845822), UINT64_C(13292883976785195688), UINT64_C( 5079543337340562118), UINT64_C(14598988069481131580) }, { UINT64_C(16810643562660248445), UINT64_C(11825809262842285785), UINT64_C(16548430274960523169), UINT64_C(13878519842170879363), UINT64_C(17063569526524652707), UINT64_C( 7100609541574822408), UINT64_C( 4214079208781242862), UINT64_C( 6172927327602362791) }, { UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C( 7100609541574822408), UINT64_C( 0), UINT64_C( 0) } }, { UINT8_C( 81), { UINT64_C( 1250682409119398232), UINT64_C(12537797938699714978), UINT64_C( 6487609060712814071), UINT64_C(17626352940028326568), UINT64_C(17704673040212099739), UINT64_C(12708320018572936675), UINT64_C( 506119906825191835), UINT64_C( 1196815613617705739) }, { UINT64_C( 7510708422366651123), UINT64_C(16308506526666408682), UINT64_C( 9843625777144161333), UINT64_C(15217042879377567656), UINT64_C( 3502204316404015277), UINT64_C( 6611327300880544150), UINT64_C(10129030848468504459), UINT64_C( 4120185132418711930) }, { UINT64_C( 1250682409119398232), UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C( 3502204316404015277), UINT64_C( 0), UINT64_C( 506119906825191835), UINT64_C( 0) } }, { UINT8_C(182), { UINT64_C(13200380385403104070), UINT64_C( 2283950427686637567), UINT64_C( 949854131022183208), UINT64_C(18286254640890218808), UINT64_C(17076464433000661310), UINT64_C( 4163794530850822361), UINT64_C(10581800351259544448), UINT64_C( 9608541157854311332) }, { UINT64_C(10520625028717473830), UINT64_C( 6405530274141770652), UINT64_C(10573043374034834126), UINT64_C(16662550890379303794), UINT64_C( 7386460613380802101), UINT64_C(10321679428363730189), UINT64_C(15153331130976395979), UINT64_C(13003238469991610829) }, { UINT64_C( 0), UINT64_C( 2283950427686637567), UINT64_C( 949854131022183208), UINT64_C( 0), UINT64_C( 7386460613380802101), UINT64_C( 4163794530850822361), UINT64_C( 0), UINT64_C( 9608541157854311332) } }, { UINT8_C(207), { UINT64_C(16246454337152852847), UINT64_C(18218803880263834643), UINT64_C( 1452182024010633192), UINT64_C(10975630910976865722), UINT64_C(18370035455526890473), UINT64_C(12352213528684892629), UINT64_C(12780703332646111343), UINT64_C( 2424208970889818594) }, { UINT64_C(17607137338204333680), UINT64_C(12834642397369754288), UINT64_C( 3620713026983568279), UINT64_C( 7540400133595034444), UINT64_C(11716990684039992199), UINT64_C( 7382300077774405502), UINT64_C( 1645842233799503701), UINT64_C( 9009238808538518695) }, { UINT64_C(16246454337152852847), UINT64_C(12834642397369754288), UINT64_C( 1452182024010633192), UINT64_C( 7540400133595034444), UINT64_C( 0), UINT64_C( 0), UINT64_C( 1645842233799503701), UINT64_C( 2424208970889818594) } }, { UINT8_C( 18), { UINT64_C(13719273708005369034), UINT64_C( 595023364407174586), UINT64_C(15186957891871631079), UINT64_C( 3662347238212395395), UINT64_C( 4100101387752888169), UINT64_C( 5495067080298623906), UINT64_C( 9357400296842007884), UINT64_C( 5990938598357247114) }, { UINT64_C( 6603230027166989549), UINT64_C( 8829263283251169237), UINT64_C(11703835191775915757), UINT64_C(12844470392711392772), UINT64_C(17513021339003108598), UINT64_C( 7219187153931968391), UINT64_C( 1683707529127768995), UINT64_C(11736457234736280170) }, { UINT64_C( 0), UINT64_C( 595023364407174586), UINT64_C( 0), UINT64_C( 0), UINT64_C( 4100101387752888169), UINT64_C( 0), UINT64_C( 0), UINT64_C( 0) } }, { UINT8_C( 1), { UINT64_C(18033840468099289597), UINT64_C(15895408885510634047), UINT64_C(11694448209877226022), UINT64_C( 6363850544142776842), UINT64_C(13949436408542621505), UINT64_C( 385423765887322196), UINT64_C( 6480453700705208478), UINT64_C( 7953067963763408300) }, { UINT64_C( 2570325922406888552), UINT64_C( 5372225549456400553), UINT64_C( 5974737005137327170), UINT64_C(17609276354712867524), UINT64_C(13050456509037570859), UINT64_C(18005333824593903081), UINT64_C(13994783000903710271), UINT64_C( 8252305664839553031) }, { UINT64_C( 2570325922406888552), UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C( 0) } }, { UINT8_C(127), { UINT64_C( 5032281689989802852), UINT64_C(13946228895540603775), UINT64_C( 4033959350094425984), UINT64_C( 8775527843854454324), UINT64_C( 1452478593801540699), UINT64_C(11147790180098647208), UINT64_C( 2825598596790776408), UINT64_C(18421193228111885336) }, { UINT64_C(17381092220628831373), UINT64_C(15412611488663508016), UINT64_C( 1451578536620225035), UINT64_C( 6656855502611388094), UINT64_C( 9236236667495269618), UINT64_C(13085963715764425032), UINT64_C( 3905140362312904224), UINT64_C( 5447660485473759854) }, { UINT64_C( 5032281689989802852), UINT64_C(13946228895540603775), UINT64_C( 1451578536620225035), UINT64_C( 6656855502611388094), UINT64_C( 1452478593801540699), UINT64_C(11147790180098647208), UINT64_C( 2825598596790776408), UINT64_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_maskz_min_epu64(test_vec[i].k, a, b); simde_test_x86_assert_equal_u64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_min_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 b[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( -30.78), SIMDE_FLOAT32_C( 230.02), SIMDE_FLOAT32_C( 650.41), SIMDE_FLOAT32_C( -352.49), SIMDE_FLOAT32_C( -59.64), SIMDE_FLOAT32_C( 790.85), SIMDE_FLOAT32_C( 797.78), SIMDE_FLOAT32_C( -606.64), SIMDE_FLOAT32_C( -87.74), SIMDE_FLOAT32_C( 822.54), SIMDE_FLOAT32_C( -957.59), SIMDE_FLOAT32_C( 103.00), SIMDE_FLOAT32_C( 800.32), SIMDE_FLOAT32_C( -762.75), SIMDE_FLOAT32_C( 593.42), SIMDE_FLOAT32_C( -283.97) }, { SIMDE_FLOAT32_C( -801.78), SIMDE_FLOAT32_C( 192.58), SIMDE_FLOAT32_C( -152.69), SIMDE_FLOAT32_C( -913.41), SIMDE_FLOAT32_C( 31.03), SIMDE_FLOAT32_C( 411.15), SIMDE_FLOAT32_C( -317.27), SIMDE_FLOAT32_C( 57.37), SIMDE_FLOAT32_C( -966.49), SIMDE_FLOAT32_C( 636.65), SIMDE_FLOAT32_C( 28.95), SIMDE_FLOAT32_C( 832.42), SIMDE_FLOAT32_C( -321.67), SIMDE_FLOAT32_C( -832.42), SIMDE_FLOAT32_C( -939.05), SIMDE_FLOAT32_C( 647.55) }, { SIMDE_FLOAT32_C( -801.78), SIMDE_FLOAT32_C( 192.58), SIMDE_FLOAT32_C( -152.69), SIMDE_FLOAT32_C( -913.41), SIMDE_FLOAT32_C( -59.64), SIMDE_FLOAT32_C( 411.15), SIMDE_FLOAT32_C( -317.27), SIMDE_FLOAT32_C( -606.64), SIMDE_FLOAT32_C( -966.49), SIMDE_FLOAT32_C( 636.65), SIMDE_FLOAT32_C( -957.59), SIMDE_FLOAT32_C( 103.00), SIMDE_FLOAT32_C( -321.67), SIMDE_FLOAT32_C( -832.42), SIMDE_FLOAT32_C( -939.05), SIMDE_FLOAT32_C( -283.97) } }, { { SIMDE_FLOAT32_C( 397.60), SIMDE_FLOAT32_C( 711.36), SIMDE_FLOAT32_C( -704.94), SIMDE_FLOAT32_C( -662.04), SIMDE_FLOAT32_C( 502.21), SIMDE_FLOAT32_C( -907.16), SIMDE_FLOAT32_C( -268.68), SIMDE_FLOAT32_C( -585.53), SIMDE_FLOAT32_C( 915.38), SIMDE_FLOAT32_C( -226.27), SIMDE_FLOAT32_C( 517.47), SIMDE_FLOAT32_C( 715.70), SIMDE_FLOAT32_C( 10.98), SIMDE_FLOAT32_C( 110.89), SIMDE_FLOAT32_C( -568.27), SIMDE_FLOAT32_C( 209.20) }, { SIMDE_FLOAT32_C( -696.52), SIMDE_FLOAT32_C( 279.04), SIMDE_FLOAT32_C( 295.79), SIMDE_FLOAT32_C( 334.51), SIMDE_FLOAT32_C( -309.81), SIMDE_FLOAT32_C( 978.52), SIMDE_FLOAT32_C( -608.12), SIMDE_FLOAT32_C( -276.30), SIMDE_FLOAT32_C( 615.17), SIMDE_FLOAT32_C( 420.83), SIMDE_FLOAT32_C( -443.88), SIMDE_FLOAT32_C( -706.50), SIMDE_FLOAT32_C( 588.41), SIMDE_FLOAT32_C( -382.93), SIMDE_FLOAT32_C( 941.05), SIMDE_FLOAT32_C( -13.98) }, { SIMDE_FLOAT32_C( -696.52), SIMDE_FLOAT32_C( 279.04), SIMDE_FLOAT32_C( -704.94), SIMDE_FLOAT32_C( -662.04), SIMDE_FLOAT32_C( -309.81), SIMDE_FLOAT32_C( -907.16), SIMDE_FLOAT32_C( -608.12), SIMDE_FLOAT32_C( -585.53), SIMDE_FLOAT32_C( 615.17), SIMDE_FLOAT32_C( -226.27), SIMDE_FLOAT32_C( -443.88), SIMDE_FLOAT32_C( -706.50), SIMDE_FLOAT32_C( 10.98), SIMDE_FLOAT32_C( -382.93), SIMDE_FLOAT32_C( -568.27), SIMDE_FLOAT32_C( -13.98) } }, { { SIMDE_FLOAT32_C( -671.57), SIMDE_FLOAT32_C( -763.89), SIMDE_FLOAT32_C( 323.97), SIMDE_FLOAT32_C( 830.64), SIMDE_FLOAT32_C( -671.05), SIMDE_FLOAT32_C( -944.70), SIMDE_FLOAT32_C( -754.89), SIMDE_FLOAT32_C( -755.67), SIMDE_FLOAT32_C( -170.97), SIMDE_FLOAT32_C( 762.58), SIMDE_FLOAT32_C( 960.04), SIMDE_FLOAT32_C( 840.01), SIMDE_FLOAT32_C( -126.53), SIMDE_FLOAT32_C( -608.23), SIMDE_FLOAT32_C( 49.21), SIMDE_FLOAT32_C( 176.95) }, { SIMDE_FLOAT32_C( 670.81), SIMDE_FLOAT32_C( -655.00), SIMDE_FLOAT32_C( -488.53), SIMDE_FLOAT32_C( -639.00), SIMDE_FLOAT32_C( -676.48), SIMDE_FLOAT32_C( -96.65), SIMDE_FLOAT32_C( 84.71), SIMDE_FLOAT32_C( 938.70), SIMDE_FLOAT32_C( -675.82), SIMDE_FLOAT32_C( 640.83), SIMDE_FLOAT32_C( -767.81), SIMDE_FLOAT32_C( 912.60), SIMDE_FLOAT32_C( -742.11), SIMDE_FLOAT32_C( -826.76), SIMDE_FLOAT32_C( -101.39), SIMDE_FLOAT32_C( -413.68) }, { SIMDE_FLOAT32_C( -671.57), SIMDE_FLOAT32_C( -763.89), SIMDE_FLOAT32_C( -488.53), SIMDE_FLOAT32_C( -639.00), SIMDE_FLOAT32_C( -676.48), SIMDE_FLOAT32_C( -944.70), SIMDE_FLOAT32_C( -754.89), SIMDE_FLOAT32_C( -755.67), SIMDE_FLOAT32_C( -675.82), SIMDE_FLOAT32_C( 640.83), SIMDE_FLOAT32_C( -767.81), SIMDE_FLOAT32_C( 840.01), SIMDE_FLOAT32_C( -742.11), SIMDE_FLOAT32_C( -826.76), SIMDE_FLOAT32_C( -101.39), SIMDE_FLOAT32_C( -413.68) } }, { { SIMDE_FLOAT32_C( -590.65), SIMDE_FLOAT32_C( -777.42), SIMDE_FLOAT32_C( -583.04), SIMDE_FLOAT32_C( -261.70), SIMDE_FLOAT32_C( -722.12), SIMDE_FLOAT32_C( -337.93), SIMDE_FLOAT32_C( -17.36), SIMDE_FLOAT32_C( 106.91), SIMDE_FLOAT32_C( -575.35), SIMDE_FLOAT32_C( -57.33), SIMDE_FLOAT32_C( -53.08), SIMDE_FLOAT32_C( 298.13), SIMDE_FLOAT32_C( 334.44), SIMDE_FLOAT32_C( 996.13), SIMDE_FLOAT32_C( -524.92), SIMDE_FLOAT32_C( 5.25) }, { SIMDE_FLOAT32_C( -658.87), SIMDE_FLOAT32_C( -13.45), SIMDE_FLOAT32_C( 366.26), SIMDE_FLOAT32_C( -335.35), SIMDE_FLOAT32_C( 889.90), SIMDE_FLOAT32_C( -549.04), SIMDE_FLOAT32_C( -396.65), SIMDE_FLOAT32_C( -785.92), SIMDE_FLOAT32_C( -908.21), SIMDE_FLOAT32_C( -164.46), SIMDE_FLOAT32_C( -873.33), SIMDE_FLOAT32_C( -650.32), SIMDE_FLOAT32_C( 8.78), SIMDE_FLOAT32_C( 25.28), SIMDE_FLOAT32_C( -63.99), SIMDE_FLOAT32_C( 418.13) }, { SIMDE_FLOAT32_C( -658.87), SIMDE_FLOAT32_C( -777.42), SIMDE_FLOAT32_C( -583.04), SIMDE_FLOAT32_C( -335.35), SIMDE_FLOAT32_C( -722.12), SIMDE_FLOAT32_C( -549.04), SIMDE_FLOAT32_C( -396.65), SIMDE_FLOAT32_C( -785.92), SIMDE_FLOAT32_C( -908.21), SIMDE_FLOAT32_C( -164.46), SIMDE_FLOAT32_C( -873.33), SIMDE_FLOAT32_C( -650.32), SIMDE_FLOAT32_C( 8.78), SIMDE_FLOAT32_C( 25.28), SIMDE_FLOAT32_C( -524.92), SIMDE_FLOAT32_C( 5.25) } }, { { SIMDE_FLOAT32_C( 247.87), SIMDE_FLOAT32_C( 352.97), SIMDE_FLOAT32_C( -843.57), SIMDE_FLOAT32_C( 525.75), SIMDE_FLOAT32_C( -984.96), SIMDE_FLOAT32_C( 139.07), SIMDE_FLOAT32_C( -367.35), SIMDE_FLOAT32_C( -560.31), SIMDE_FLOAT32_C( -918.25), SIMDE_FLOAT32_C( 579.57), SIMDE_FLOAT32_C( 737.82), SIMDE_FLOAT32_C( 416.19), SIMDE_FLOAT32_C( 575.70), SIMDE_FLOAT32_C( -787.10), SIMDE_FLOAT32_C( -578.56), SIMDE_FLOAT32_C( 916.82) }, { SIMDE_FLOAT32_C( 199.45), SIMDE_FLOAT32_C( 787.70), SIMDE_FLOAT32_C( -418.53), SIMDE_FLOAT32_C( 89.35), SIMDE_FLOAT32_C( -761.34), SIMDE_FLOAT32_C( 184.82), SIMDE_FLOAT32_C( 303.43), SIMDE_FLOAT32_C( -669.55), SIMDE_FLOAT32_C( -979.64), SIMDE_FLOAT32_C( 430.10), SIMDE_FLOAT32_C( -319.87), SIMDE_FLOAT32_C( 29.14), SIMDE_FLOAT32_C( -544.62), SIMDE_FLOAT32_C( 616.14), SIMDE_FLOAT32_C( -552.73), SIMDE_FLOAT32_C( 703.25) }, { SIMDE_FLOAT32_C( 199.45), SIMDE_FLOAT32_C( 352.97), SIMDE_FLOAT32_C( -843.57), SIMDE_FLOAT32_C( 89.35), SIMDE_FLOAT32_C( -984.96), SIMDE_FLOAT32_C( 139.07), SIMDE_FLOAT32_C( -367.35), SIMDE_FLOAT32_C( -669.55), SIMDE_FLOAT32_C( -979.64), SIMDE_FLOAT32_C( 430.10), SIMDE_FLOAT32_C( -319.87), SIMDE_FLOAT32_C( 29.14), SIMDE_FLOAT32_C( -544.62), SIMDE_FLOAT32_C( -787.10), SIMDE_FLOAT32_C( -578.56), SIMDE_FLOAT32_C( 703.25) } }, { { SIMDE_FLOAT32_C( -30.90), SIMDE_FLOAT32_C( -396.30), SIMDE_FLOAT32_C( 229.00), SIMDE_FLOAT32_C( -15.86), SIMDE_FLOAT32_C( 742.77), SIMDE_FLOAT32_C( 861.65), SIMDE_FLOAT32_C( 423.84), SIMDE_FLOAT32_C( 824.52), SIMDE_FLOAT32_C( 441.22), SIMDE_FLOAT32_C( 161.66), SIMDE_FLOAT32_C( 240.71), SIMDE_FLOAT32_C( 16.92), SIMDE_FLOAT32_C( 374.56), SIMDE_FLOAT32_C( 662.15), SIMDE_FLOAT32_C( -66.25), SIMDE_FLOAT32_C( -425.99) }, { SIMDE_FLOAT32_C( 449.85), SIMDE_FLOAT32_C( 515.22), SIMDE_FLOAT32_C( 663.36), SIMDE_FLOAT32_C( 688.52), SIMDE_FLOAT32_C( -299.96), SIMDE_FLOAT32_C( -33.22), SIMDE_FLOAT32_C( -981.03), SIMDE_FLOAT32_C( -279.61), SIMDE_FLOAT32_C( -603.12), SIMDE_FLOAT32_C( -300.90), SIMDE_FLOAT32_C( 749.53), SIMDE_FLOAT32_C( -147.73), SIMDE_FLOAT32_C( -684.77), SIMDE_FLOAT32_C( -803.20), SIMDE_FLOAT32_C( -444.48), SIMDE_FLOAT32_C( 284.34) }, { SIMDE_FLOAT32_C( -30.90), SIMDE_FLOAT32_C( -396.30), SIMDE_FLOAT32_C( 229.00), SIMDE_FLOAT32_C( -15.86), SIMDE_FLOAT32_C( -299.96), SIMDE_FLOAT32_C( -33.22), SIMDE_FLOAT32_C( -981.03), SIMDE_FLOAT32_C( -279.61), SIMDE_FLOAT32_C( -603.12), SIMDE_FLOAT32_C( -300.90), SIMDE_FLOAT32_C( 240.71), SIMDE_FLOAT32_C( -147.73), SIMDE_FLOAT32_C( -684.77), SIMDE_FLOAT32_C( -803.20), SIMDE_FLOAT32_C( -444.48), SIMDE_FLOAT32_C( -425.99) } }, { { SIMDE_FLOAT32_C( -199.50), SIMDE_FLOAT32_C( 784.52), SIMDE_FLOAT32_C( -731.52), SIMDE_FLOAT32_C( -456.72), SIMDE_FLOAT32_C( 646.17), SIMDE_FLOAT32_C( 692.32), SIMDE_FLOAT32_C( -632.20), SIMDE_FLOAT32_C( 87.40), SIMDE_FLOAT32_C( -146.02), SIMDE_FLOAT32_C( 608.51), SIMDE_FLOAT32_C( -895.68), SIMDE_FLOAT32_C( -771.46), SIMDE_FLOAT32_C( 270.66), SIMDE_FLOAT32_C( 38.06), SIMDE_FLOAT32_C( -197.45), SIMDE_FLOAT32_C( -279.49) }, { SIMDE_FLOAT32_C( -446.72), SIMDE_FLOAT32_C( -534.09), SIMDE_FLOAT32_C( -590.97), SIMDE_FLOAT32_C( 253.32), SIMDE_FLOAT32_C( 432.69), SIMDE_FLOAT32_C( -572.00), SIMDE_FLOAT32_C( 973.71), SIMDE_FLOAT32_C( 829.57), SIMDE_FLOAT32_C( 127.09), SIMDE_FLOAT32_C( 723.24), SIMDE_FLOAT32_C( -318.16), SIMDE_FLOAT32_C( 442.33), SIMDE_FLOAT32_C( 920.04), SIMDE_FLOAT32_C( 237.36), SIMDE_FLOAT32_C( -273.33), SIMDE_FLOAT32_C( -279.46) }, { SIMDE_FLOAT32_C( -446.72), SIMDE_FLOAT32_C( -534.09), SIMDE_FLOAT32_C( -731.52), SIMDE_FLOAT32_C( -456.72), SIMDE_FLOAT32_C( 432.69), SIMDE_FLOAT32_C( -572.00), SIMDE_FLOAT32_C( -632.20), SIMDE_FLOAT32_C( 87.40), SIMDE_FLOAT32_C( -146.02), SIMDE_FLOAT32_C( 608.51), SIMDE_FLOAT32_C( -895.68), SIMDE_FLOAT32_C( -771.46), SIMDE_FLOAT32_C( 270.66), SIMDE_FLOAT32_C( 38.06), SIMDE_FLOAT32_C( -273.33), SIMDE_FLOAT32_C( -279.49) } }, { { SIMDE_FLOAT32_C( 21.88), SIMDE_FLOAT32_C( -4.85), SIMDE_FLOAT32_C( 263.82), SIMDE_FLOAT32_C( -331.95), SIMDE_FLOAT32_C( -312.53), SIMDE_FLOAT32_C( 631.61), SIMDE_FLOAT32_C( 755.44), SIMDE_FLOAT32_C( 541.44), SIMDE_FLOAT32_C( 240.12), SIMDE_FLOAT32_C( 859.76), SIMDE_FLOAT32_C( 769.98), SIMDE_FLOAT32_C( -489.22), SIMDE_FLOAT32_C( -102.18), SIMDE_FLOAT32_C( -427.47), SIMDE_FLOAT32_C( 231.29), SIMDE_FLOAT32_C( 451.10) }, { SIMDE_FLOAT32_C( 38.43), SIMDE_FLOAT32_C( 640.32), SIMDE_FLOAT32_C( -295.58), SIMDE_FLOAT32_C( -528.88), SIMDE_FLOAT32_C( -931.68), SIMDE_FLOAT32_C( -321.87), SIMDE_FLOAT32_C( -699.30), SIMDE_FLOAT32_C( 195.41), SIMDE_FLOAT32_C( -598.63), SIMDE_FLOAT32_C( -17.46), SIMDE_FLOAT32_C( -362.26), SIMDE_FLOAT32_C( -678.60), SIMDE_FLOAT32_C( -780.10), SIMDE_FLOAT32_C( 364.41), SIMDE_FLOAT32_C( 41.95), SIMDE_FLOAT32_C( 241.77) }, { SIMDE_FLOAT32_C( 21.88), SIMDE_FLOAT32_C( -4.85), SIMDE_FLOAT32_C( -295.58), SIMDE_FLOAT32_C( -528.88), SIMDE_FLOAT32_C( -931.68), SIMDE_FLOAT32_C( -321.87), SIMDE_FLOAT32_C( -699.30), SIMDE_FLOAT32_C( 195.41), SIMDE_FLOAT32_C( -598.63), SIMDE_FLOAT32_C( -17.46), SIMDE_FLOAT32_C( -362.26), SIMDE_FLOAT32_C( -678.60), SIMDE_FLOAT32_C( -780.10), SIMDE_FLOAT32_C( -427.47), SIMDE_FLOAT32_C( 41.95), SIMDE_FLOAT32_C( 241.77) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 r = simde_mm512_min_ps(a, b); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_min_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[16]; const simde__mmask8 k; const simde_float32 a[16]; const simde_float32 b[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( -36.57), SIMDE_FLOAT32_C( 69.10), SIMDE_FLOAT32_C( -983.85), SIMDE_FLOAT32_C( -248.71), SIMDE_FLOAT32_C( 274.57), SIMDE_FLOAT32_C( 653.48), SIMDE_FLOAT32_C( 360.57), SIMDE_FLOAT32_C( -54.84), SIMDE_FLOAT32_C( 250.76), SIMDE_FLOAT32_C( -841.58), SIMDE_FLOAT32_C( -977.10), SIMDE_FLOAT32_C( -197.41), SIMDE_FLOAT32_C( -411.19), SIMDE_FLOAT32_C( 191.35), SIMDE_FLOAT32_C( -80.14), SIMDE_FLOAT32_C( 640.19) }, UINT8_C( 32), { SIMDE_FLOAT32_C( 655.14), SIMDE_FLOAT32_C( 247.62), SIMDE_FLOAT32_C( -329.20), SIMDE_FLOAT32_C( -310.43), SIMDE_FLOAT32_C( -42.22), SIMDE_FLOAT32_C( -251.97), SIMDE_FLOAT32_C( -986.50), SIMDE_FLOAT32_C( 336.60), SIMDE_FLOAT32_C( -919.96), SIMDE_FLOAT32_C( -555.47), SIMDE_FLOAT32_C( -1.94), SIMDE_FLOAT32_C( -858.96), SIMDE_FLOAT32_C( -877.47), SIMDE_FLOAT32_C( -290.85), SIMDE_FLOAT32_C( 104.46), SIMDE_FLOAT32_C( 191.63) }, { SIMDE_FLOAT32_C( -274.70), SIMDE_FLOAT32_C( 855.75), SIMDE_FLOAT32_C( -533.80), SIMDE_FLOAT32_C( -621.22), SIMDE_FLOAT32_C( 216.32), SIMDE_FLOAT32_C( 411.35), SIMDE_FLOAT32_C( 629.54), SIMDE_FLOAT32_C( 374.74), SIMDE_FLOAT32_C( 434.26), SIMDE_FLOAT32_C( -567.87), SIMDE_FLOAT32_C( 963.55), SIMDE_FLOAT32_C( -374.39), SIMDE_FLOAT32_C( 351.98), SIMDE_FLOAT32_C( 603.74), SIMDE_FLOAT32_C( 320.68), SIMDE_FLOAT32_C( 7.12) }, { SIMDE_FLOAT32_C( -36.57), SIMDE_FLOAT32_C( 69.10), SIMDE_FLOAT32_C( -983.85), SIMDE_FLOAT32_C( -248.71), SIMDE_FLOAT32_C( 274.57), SIMDE_FLOAT32_C( -251.97), SIMDE_FLOAT32_C( 360.57), SIMDE_FLOAT32_C( -54.84), SIMDE_FLOAT32_C( 250.76), SIMDE_FLOAT32_C( -841.58), SIMDE_FLOAT32_C( -977.10), SIMDE_FLOAT32_C( -197.41), SIMDE_FLOAT32_C( -411.19), SIMDE_FLOAT32_C( 191.35), SIMDE_FLOAT32_C( -80.14), SIMDE_FLOAT32_C( 640.19) } }, { { SIMDE_FLOAT32_C( -148.64), SIMDE_FLOAT32_C( 991.49), SIMDE_FLOAT32_C( 696.69), SIMDE_FLOAT32_C( 809.15), SIMDE_FLOAT32_C( -260.48), SIMDE_FLOAT32_C( 710.19), SIMDE_FLOAT32_C( 145.75), SIMDE_FLOAT32_C( -180.44), SIMDE_FLOAT32_C( -845.29), SIMDE_FLOAT32_C( -856.19), SIMDE_FLOAT32_C( -39.40), SIMDE_FLOAT32_C( -722.76), SIMDE_FLOAT32_C( -147.04), SIMDE_FLOAT32_C( -934.94), SIMDE_FLOAT32_C( 468.87), SIMDE_FLOAT32_C( 578.26) }, UINT8_C(246), { SIMDE_FLOAT32_C( 935.06), SIMDE_FLOAT32_C( 957.03), SIMDE_FLOAT32_C( 137.14), SIMDE_FLOAT32_C( 346.42), SIMDE_FLOAT32_C( 586.58), SIMDE_FLOAT32_C( -488.13), SIMDE_FLOAT32_C( -219.32), SIMDE_FLOAT32_C( -981.30), SIMDE_FLOAT32_C( -524.58), SIMDE_FLOAT32_C( 406.29), SIMDE_FLOAT32_C( 370.69), SIMDE_FLOAT32_C( -920.84), SIMDE_FLOAT32_C( -273.03), SIMDE_FLOAT32_C( -622.19), SIMDE_FLOAT32_C( -69.48), SIMDE_FLOAT32_C( -281.54) }, { SIMDE_FLOAT32_C( -925.50), SIMDE_FLOAT32_C( -260.33), SIMDE_FLOAT32_C( 457.98), SIMDE_FLOAT32_C( 784.68), SIMDE_FLOAT32_C( 885.42), SIMDE_FLOAT32_C( -722.47), SIMDE_FLOAT32_C( 939.40), SIMDE_FLOAT32_C( -970.77), SIMDE_FLOAT32_C( 238.13), SIMDE_FLOAT32_C( -783.36), SIMDE_FLOAT32_C( -117.81), SIMDE_FLOAT32_C( 303.19), SIMDE_FLOAT32_C( 685.51), SIMDE_FLOAT32_C( -539.55), SIMDE_FLOAT32_C( 224.00), SIMDE_FLOAT32_C( 620.57) }, { SIMDE_FLOAT32_C( -148.64), SIMDE_FLOAT32_C( -260.33), SIMDE_FLOAT32_C( 137.14), SIMDE_FLOAT32_C( 809.15), SIMDE_FLOAT32_C( 586.58), SIMDE_FLOAT32_C( -722.47), SIMDE_FLOAT32_C( -219.32), SIMDE_FLOAT32_C( -981.30), SIMDE_FLOAT32_C( -845.29), SIMDE_FLOAT32_C( -856.19), SIMDE_FLOAT32_C( -39.40), SIMDE_FLOAT32_C( -722.76), SIMDE_FLOAT32_C( -147.04), SIMDE_FLOAT32_C( -934.94), SIMDE_FLOAT32_C( 468.87), SIMDE_FLOAT32_C( 578.26) } }, { { SIMDE_FLOAT32_C( -582.52), SIMDE_FLOAT32_C( -638.86), SIMDE_FLOAT32_C( -33.01), SIMDE_FLOAT32_C( -995.94), SIMDE_FLOAT32_C( -126.99), SIMDE_FLOAT32_C( 747.67), SIMDE_FLOAT32_C( -977.24), SIMDE_FLOAT32_C( 348.44), SIMDE_FLOAT32_C( 153.95), SIMDE_FLOAT32_C( 393.45), SIMDE_FLOAT32_C( 427.60), SIMDE_FLOAT32_C( 880.92), SIMDE_FLOAT32_C( 771.26), SIMDE_FLOAT32_C( -641.88), SIMDE_FLOAT32_C( -400.62), SIMDE_FLOAT32_C( 845.75) }, UINT8_MAX, { SIMDE_FLOAT32_C( -942.64), SIMDE_FLOAT32_C( 630.44), SIMDE_FLOAT32_C( -16.79), SIMDE_FLOAT32_C( -665.11), SIMDE_FLOAT32_C( 569.83), SIMDE_FLOAT32_C( 12.44), SIMDE_FLOAT32_C( 573.02), SIMDE_FLOAT32_C( 786.47), SIMDE_FLOAT32_C( 894.64), SIMDE_FLOAT32_C( -123.79), SIMDE_FLOAT32_C( 471.98), SIMDE_FLOAT32_C( -644.91), SIMDE_FLOAT32_C( -899.79), SIMDE_FLOAT32_C( 92.56), SIMDE_FLOAT32_C( -227.43), SIMDE_FLOAT32_C( -538.65) }, { SIMDE_FLOAT32_C( -940.45), SIMDE_FLOAT32_C( -223.37), SIMDE_FLOAT32_C( 334.37), SIMDE_FLOAT32_C( 807.22), SIMDE_FLOAT32_C( -200.61), SIMDE_FLOAT32_C( -317.20), SIMDE_FLOAT32_C( -38.83), SIMDE_FLOAT32_C( -807.16), SIMDE_FLOAT32_C( -889.60), SIMDE_FLOAT32_C( -157.90), SIMDE_FLOAT32_C( 964.10), SIMDE_FLOAT32_C( -531.48), SIMDE_FLOAT32_C( 441.48), SIMDE_FLOAT32_C( 809.85), SIMDE_FLOAT32_C( 566.31), SIMDE_FLOAT32_C( 498.84) }, { SIMDE_FLOAT32_C( -942.64), SIMDE_FLOAT32_C( -223.37), SIMDE_FLOAT32_C( -16.79), SIMDE_FLOAT32_C( -665.11), SIMDE_FLOAT32_C( -200.61), SIMDE_FLOAT32_C( -317.20), SIMDE_FLOAT32_C( -38.83), SIMDE_FLOAT32_C( -807.16), SIMDE_FLOAT32_C( 153.95), SIMDE_FLOAT32_C( 393.45), SIMDE_FLOAT32_C( 427.60), SIMDE_FLOAT32_C( 880.92), SIMDE_FLOAT32_C( 771.26), SIMDE_FLOAT32_C( -641.88), SIMDE_FLOAT32_C( -400.62), SIMDE_FLOAT32_C( 845.75) } }, { { SIMDE_FLOAT32_C( 440.29), SIMDE_FLOAT32_C( -450.48), SIMDE_FLOAT32_C( 833.73), SIMDE_FLOAT32_C( 10.12), SIMDE_FLOAT32_C( 561.96), SIMDE_FLOAT32_C( 406.75), SIMDE_FLOAT32_C( -203.40), SIMDE_FLOAT32_C( 456.60), SIMDE_FLOAT32_C( -717.04), SIMDE_FLOAT32_C( -731.42), SIMDE_FLOAT32_C( 811.69), SIMDE_FLOAT32_C( -616.83), SIMDE_FLOAT32_C( 361.14), SIMDE_FLOAT32_C( -415.74), SIMDE_FLOAT32_C( -155.48), SIMDE_FLOAT32_C( 420.69) }, UINT8_C(150), { SIMDE_FLOAT32_C( -821.11), SIMDE_FLOAT32_C( 227.91), SIMDE_FLOAT32_C( -839.72), SIMDE_FLOAT32_C( -138.31), SIMDE_FLOAT32_C( -810.92), SIMDE_FLOAT32_C( -646.88), SIMDE_FLOAT32_C( -27.91), SIMDE_FLOAT32_C( 31.18), SIMDE_FLOAT32_C( -682.77), SIMDE_FLOAT32_C( 440.61), SIMDE_FLOAT32_C( -527.34), SIMDE_FLOAT32_C( -872.92), SIMDE_FLOAT32_C( 6.92), SIMDE_FLOAT32_C( 971.50), SIMDE_FLOAT32_C( 567.37), SIMDE_FLOAT32_C( 556.44) }, { SIMDE_FLOAT32_C( 805.23), SIMDE_FLOAT32_C( -422.50), SIMDE_FLOAT32_C( 118.40), SIMDE_FLOAT32_C( 211.98), SIMDE_FLOAT32_C( 374.09), SIMDE_FLOAT32_C( -425.00), SIMDE_FLOAT32_C( 494.94), SIMDE_FLOAT32_C( 642.68), SIMDE_FLOAT32_C( -613.31), SIMDE_FLOAT32_C( 878.11), SIMDE_FLOAT32_C( 3.82), SIMDE_FLOAT32_C( -29.05), SIMDE_FLOAT32_C( -277.36), SIMDE_FLOAT32_C( -575.49), SIMDE_FLOAT32_C( -668.17), SIMDE_FLOAT32_C( -98.47) }, { SIMDE_FLOAT32_C( 440.29), SIMDE_FLOAT32_C( -422.50), SIMDE_FLOAT32_C( -839.72), SIMDE_FLOAT32_C( 10.12), SIMDE_FLOAT32_C( -810.92), SIMDE_FLOAT32_C( 406.75), SIMDE_FLOAT32_C( -203.40), SIMDE_FLOAT32_C( 31.18), SIMDE_FLOAT32_C( -717.04), SIMDE_FLOAT32_C( -731.42), SIMDE_FLOAT32_C( 811.69), SIMDE_FLOAT32_C( -616.83), SIMDE_FLOAT32_C( 361.14), SIMDE_FLOAT32_C( -415.74), SIMDE_FLOAT32_C( -155.48), SIMDE_FLOAT32_C( 420.69) } }, { { SIMDE_FLOAT32_C( 652.42), SIMDE_FLOAT32_C( -507.89), SIMDE_FLOAT32_C( 763.22), SIMDE_FLOAT32_C( 841.50), SIMDE_FLOAT32_C( -154.77), SIMDE_FLOAT32_C( -264.68), SIMDE_FLOAT32_C( -127.32), SIMDE_FLOAT32_C( 162.46), SIMDE_FLOAT32_C( -824.07), SIMDE_FLOAT32_C( 345.34), SIMDE_FLOAT32_C( 289.54), SIMDE_FLOAT32_C( 182.85), SIMDE_FLOAT32_C( 316.84), SIMDE_FLOAT32_C( -143.09), SIMDE_FLOAT32_C( -260.71), SIMDE_FLOAT32_C( 122.07) }, UINT8_C( 26), { SIMDE_FLOAT32_C( 857.69), SIMDE_FLOAT32_C( -665.95), SIMDE_FLOAT32_C( -191.50), SIMDE_FLOAT32_C( -567.30), SIMDE_FLOAT32_C( 828.99), SIMDE_FLOAT32_C( -548.82), SIMDE_FLOAT32_C( -180.61), SIMDE_FLOAT32_C( 707.10), SIMDE_FLOAT32_C( 455.00), SIMDE_FLOAT32_C( 790.33), SIMDE_FLOAT32_C( -570.26), SIMDE_FLOAT32_C( 879.51), SIMDE_FLOAT32_C( -877.84), SIMDE_FLOAT32_C( 331.27), SIMDE_FLOAT32_C( 531.93), SIMDE_FLOAT32_C( -385.73) }, { SIMDE_FLOAT32_C( 94.49), SIMDE_FLOAT32_C( 373.43), SIMDE_FLOAT32_C( 459.51), SIMDE_FLOAT32_C( 829.81), SIMDE_FLOAT32_C( -753.89), SIMDE_FLOAT32_C( -378.03), SIMDE_FLOAT32_C( -994.27), SIMDE_FLOAT32_C( 591.46), SIMDE_FLOAT32_C( 911.50), SIMDE_FLOAT32_C( 188.58), SIMDE_FLOAT32_C( -91.70), SIMDE_FLOAT32_C( -231.58), SIMDE_FLOAT32_C( 927.87), SIMDE_FLOAT32_C( -969.63), SIMDE_FLOAT32_C( -797.18), SIMDE_FLOAT32_C( 785.57) }, { SIMDE_FLOAT32_C( 652.42), SIMDE_FLOAT32_C( -665.95), SIMDE_FLOAT32_C( 763.22), SIMDE_FLOAT32_C( -567.30), SIMDE_FLOAT32_C( -753.89), SIMDE_FLOAT32_C( -264.68), SIMDE_FLOAT32_C( -127.32), SIMDE_FLOAT32_C( 162.46), SIMDE_FLOAT32_C( -824.07), SIMDE_FLOAT32_C( 345.34), SIMDE_FLOAT32_C( 289.54), SIMDE_FLOAT32_C( 182.85), SIMDE_FLOAT32_C( 316.84), SIMDE_FLOAT32_C( -143.09), SIMDE_FLOAT32_C( -260.71), SIMDE_FLOAT32_C( 122.07) } }, { { SIMDE_FLOAT32_C( -635.58), SIMDE_FLOAT32_C( 11.32), SIMDE_FLOAT32_C( -781.74), SIMDE_FLOAT32_C( -806.59), SIMDE_FLOAT32_C( 462.50), SIMDE_FLOAT32_C( 37.65), SIMDE_FLOAT32_C( 900.51), SIMDE_FLOAT32_C( -82.50), SIMDE_FLOAT32_C( -172.02), SIMDE_FLOAT32_C( -669.76), SIMDE_FLOAT32_C( -202.99), SIMDE_FLOAT32_C( -49.85), SIMDE_FLOAT32_C( 661.51), SIMDE_FLOAT32_C( -671.06), SIMDE_FLOAT32_C( 564.42), SIMDE_FLOAT32_C( -244.00) }, UINT8_C( 76), { SIMDE_FLOAT32_C( 23.92), SIMDE_FLOAT32_C( -414.19), SIMDE_FLOAT32_C( 948.48), SIMDE_FLOAT32_C( 645.89), SIMDE_FLOAT32_C( -408.46), SIMDE_FLOAT32_C( 539.94), SIMDE_FLOAT32_C( 557.39), SIMDE_FLOAT32_C( 780.12), SIMDE_FLOAT32_C( -551.76), SIMDE_FLOAT32_C( -674.19), SIMDE_FLOAT32_C( 708.00), SIMDE_FLOAT32_C( -521.39), SIMDE_FLOAT32_C( -471.37), SIMDE_FLOAT32_C( 493.56), SIMDE_FLOAT32_C( -156.98), SIMDE_FLOAT32_C( 539.96) }, { SIMDE_FLOAT32_C( 711.83), SIMDE_FLOAT32_C( 36.43), SIMDE_FLOAT32_C( 2.46), SIMDE_FLOAT32_C( -250.52), SIMDE_FLOAT32_C( -63.07), SIMDE_FLOAT32_C( 919.96), SIMDE_FLOAT32_C( 577.46), SIMDE_FLOAT32_C( 267.18), SIMDE_FLOAT32_C( -283.03), SIMDE_FLOAT32_C( -472.40), SIMDE_FLOAT32_C( -71.31), SIMDE_FLOAT32_C( 45.91), SIMDE_FLOAT32_C( -907.98), SIMDE_FLOAT32_C( 684.69), SIMDE_FLOAT32_C( -251.73), SIMDE_FLOAT32_C( 115.95) }, { SIMDE_FLOAT32_C( -635.58), SIMDE_FLOAT32_C( 11.32), SIMDE_FLOAT32_C( 2.46), SIMDE_FLOAT32_C( -250.52), SIMDE_FLOAT32_C( 462.50), SIMDE_FLOAT32_C( 37.65), SIMDE_FLOAT32_C( 557.39), SIMDE_FLOAT32_C( -82.50), SIMDE_FLOAT32_C( -172.02), SIMDE_FLOAT32_C( -669.76), SIMDE_FLOAT32_C( -202.99), SIMDE_FLOAT32_C( -49.85), SIMDE_FLOAT32_C( 661.51), SIMDE_FLOAT32_C( -671.06), SIMDE_FLOAT32_C( 564.42), SIMDE_FLOAT32_C( -244.00) } }, { { SIMDE_FLOAT32_C( -729.51), SIMDE_FLOAT32_C( -303.24), SIMDE_FLOAT32_C( -238.16), SIMDE_FLOAT32_C( -137.97), SIMDE_FLOAT32_C( -763.30), SIMDE_FLOAT32_C( -680.77), SIMDE_FLOAT32_C( -357.84), SIMDE_FLOAT32_C( -315.06), SIMDE_FLOAT32_C( -354.96), SIMDE_FLOAT32_C( -649.85), SIMDE_FLOAT32_C( 163.54), SIMDE_FLOAT32_C( 173.67), SIMDE_FLOAT32_C( 843.72), SIMDE_FLOAT32_C( -993.43), SIMDE_FLOAT32_C( -286.37), SIMDE_FLOAT32_C( 555.54) }, UINT8_C(138), { SIMDE_FLOAT32_C( 716.09), SIMDE_FLOAT32_C( -694.98), SIMDE_FLOAT32_C( 979.93), SIMDE_FLOAT32_C( 636.05), SIMDE_FLOAT32_C( 882.48), SIMDE_FLOAT32_C( 247.11), SIMDE_FLOAT32_C( -646.99), SIMDE_FLOAT32_C( -589.92), SIMDE_FLOAT32_C( -824.20), SIMDE_FLOAT32_C( 398.92), SIMDE_FLOAT32_C( -497.90), SIMDE_FLOAT32_C( 860.48), SIMDE_FLOAT32_C( -852.81), SIMDE_FLOAT32_C( 618.05), SIMDE_FLOAT32_C( -869.02), SIMDE_FLOAT32_C( -156.05) }, { SIMDE_FLOAT32_C( -620.12), SIMDE_FLOAT32_C( -6.99), SIMDE_FLOAT32_C( 80.65), SIMDE_FLOAT32_C( -300.88), SIMDE_FLOAT32_C( 635.17), SIMDE_FLOAT32_C( 765.59), SIMDE_FLOAT32_C( 344.16), SIMDE_FLOAT32_C( 985.32), SIMDE_FLOAT32_C( -70.87), SIMDE_FLOAT32_C( -482.17), SIMDE_FLOAT32_C( 829.04), SIMDE_FLOAT32_C( -64.30), SIMDE_FLOAT32_C( 231.46), SIMDE_FLOAT32_C( 384.58), SIMDE_FLOAT32_C( 978.69), SIMDE_FLOAT32_C( -52.45) }, { SIMDE_FLOAT32_C( -729.51), SIMDE_FLOAT32_C( -694.98), SIMDE_FLOAT32_C( -238.16), SIMDE_FLOAT32_C( -300.88), SIMDE_FLOAT32_C( -763.30), SIMDE_FLOAT32_C( -680.77), SIMDE_FLOAT32_C( -357.84), SIMDE_FLOAT32_C( -589.92), SIMDE_FLOAT32_C( -354.96), SIMDE_FLOAT32_C( -649.85), SIMDE_FLOAT32_C( 163.54), SIMDE_FLOAT32_C( 173.67), SIMDE_FLOAT32_C( 843.72), SIMDE_FLOAT32_C( -993.43), SIMDE_FLOAT32_C( -286.37), SIMDE_FLOAT32_C( 555.54) } }, { { SIMDE_FLOAT32_C( 689.60), SIMDE_FLOAT32_C( 958.62), SIMDE_FLOAT32_C( -416.41), SIMDE_FLOAT32_C( 572.08), SIMDE_FLOAT32_C( 205.73), SIMDE_FLOAT32_C( -63.40), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( 381.53), SIMDE_FLOAT32_C( -664.48), SIMDE_FLOAT32_C( -515.74), SIMDE_FLOAT32_C( 242.01), SIMDE_FLOAT32_C( -517.29), SIMDE_FLOAT32_C( -897.69), SIMDE_FLOAT32_C( 372.99), SIMDE_FLOAT32_C( 326.67), SIMDE_FLOAT32_C( -517.81) }, UINT8_C(170), { SIMDE_FLOAT32_C( -592.68), SIMDE_FLOAT32_C( 181.31), SIMDE_FLOAT32_C( -998.84), SIMDE_FLOAT32_C( -827.09), SIMDE_FLOAT32_C( -474.53), SIMDE_FLOAT32_C( 986.49), SIMDE_FLOAT32_C( 102.04), SIMDE_FLOAT32_C( 43.30), SIMDE_FLOAT32_C( 815.53), SIMDE_FLOAT32_C( -962.26), SIMDE_FLOAT32_C( -725.24), SIMDE_FLOAT32_C( 200.11), SIMDE_FLOAT32_C( -983.57), SIMDE_FLOAT32_C( 222.30), SIMDE_FLOAT32_C( -110.29), SIMDE_FLOAT32_C( 975.06) }, { SIMDE_FLOAT32_C( 805.89), SIMDE_FLOAT32_C( -538.21), SIMDE_FLOAT32_C( 180.79), SIMDE_FLOAT32_C( -257.50), SIMDE_FLOAT32_C( -556.06), SIMDE_FLOAT32_C( -437.68), SIMDE_FLOAT32_C( 78.02), SIMDE_FLOAT32_C( -71.80), SIMDE_FLOAT32_C( 804.33), SIMDE_FLOAT32_C( 560.73), SIMDE_FLOAT32_C( 30.51), SIMDE_FLOAT32_C( 177.31), SIMDE_FLOAT32_C( -112.60), SIMDE_FLOAT32_C( 512.71), SIMDE_FLOAT32_C( 543.31), SIMDE_FLOAT32_C( 294.71) }, { SIMDE_FLOAT32_C( 689.60), SIMDE_FLOAT32_C( -538.21), SIMDE_FLOAT32_C( -416.41), SIMDE_FLOAT32_C( -827.09), SIMDE_FLOAT32_C( 205.73), SIMDE_FLOAT32_C( -437.68), SIMDE_FLOAT32_C( 982.16), SIMDE_FLOAT32_C( -71.80), SIMDE_FLOAT32_C( -664.48), SIMDE_FLOAT32_C( -515.74), SIMDE_FLOAT32_C( 242.01), SIMDE_FLOAT32_C( -517.29), SIMDE_FLOAT32_C( -897.69), SIMDE_FLOAT32_C( 372.99), SIMDE_FLOAT32_C( 326.67), SIMDE_FLOAT32_C( -517.81) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 r = simde_mm512_mask_min_ps(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_maskz_min_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask16 k; const simde_float32 a[16]; const simde_float32 b[16]; const simde_float32 r[16]; } test_vec[] = { { UINT16_C( 9901), { SIMDE_FLOAT32_C( -544.18), SIMDE_FLOAT32_C( 601.49), SIMDE_FLOAT32_C( -304.11), SIMDE_FLOAT32_C( 528.27), SIMDE_FLOAT32_C( -713.39), SIMDE_FLOAT32_C( -743.38), SIMDE_FLOAT32_C( 273.42), SIMDE_FLOAT32_C( -14.77), SIMDE_FLOAT32_C( -697.44), SIMDE_FLOAT32_C( -402.15), SIMDE_FLOAT32_C( -91.86), SIMDE_FLOAT32_C( 77.83), SIMDE_FLOAT32_C( -962.39), SIMDE_FLOAT32_C( 568.28), SIMDE_FLOAT32_C( 531.52), SIMDE_FLOAT32_C( 83.25) }, { SIMDE_FLOAT32_C( 426.04), SIMDE_FLOAT32_C( -179.26), SIMDE_FLOAT32_C( 988.33), SIMDE_FLOAT32_C( 950.60), SIMDE_FLOAT32_C( 437.57), SIMDE_FLOAT32_C( 78.30), SIMDE_FLOAT32_C( -903.72), SIMDE_FLOAT32_C( -100.80), SIMDE_FLOAT32_C( -508.84), SIMDE_FLOAT32_C( -791.31), SIMDE_FLOAT32_C( 900.01), SIMDE_FLOAT32_C( 830.49), SIMDE_FLOAT32_C( -949.05), SIMDE_FLOAT32_C( -690.37), SIMDE_FLOAT32_C( 246.57), SIMDE_FLOAT32_C( -493.23) }, { SIMDE_FLOAT32_C( -544.18), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -304.11), SIMDE_FLOAT32_C( 528.27), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -743.38), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -100.80), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -791.31), SIMDE_FLOAT32_C( -91.86), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -690.37), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { UINT16_C( 3227), { SIMDE_FLOAT32_C( -964.97), SIMDE_FLOAT32_C( -802.27), SIMDE_FLOAT32_C( -800.92), SIMDE_FLOAT32_C( 308.45), SIMDE_FLOAT32_C( 182.97), SIMDE_FLOAT32_C( -498.36), SIMDE_FLOAT32_C( 906.30), SIMDE_FLOAT32_C( -908.89), SIMDE_FLOAT32_C( 579.47), SIMDE_FLOAT32_C( 943.91), SIMDE_FLOAT32_C( 659.39), SIMDE_FLOAT32_C( 111.00), SIMDE_FLOAT32_C( 27.16), SIMDE_FLOAT32_C( 85.43), SIMDE_FLOAT32_C( 931.73), SIMDE_FLOAT32_C( 15.49) }, { SIMDE_FLOAT32_C( 36.03), SIMDE_FLOAT32_C( 369.30), SIMDE_FLOAT32_C( -906.21), SIMDE_FLOAT32_C( 132.31), SIMDE_FLOAT32_C( -731.50), SIMDE_FLOAT32_C( -415.05), SIMDE_FLOAT32_C( 341.00), SIMDE_FLOAT32_C( -831.49), SIMDE_FLOAT32_C( -584.56), SIMDE_FLOAT32_C( 391.95), SIMDE_FLOAT32_C( -521.86), SIMDE_FLOAT32_C( 662.01), SIMDE_FLOAT32_C( 898.72), SIMDE_FLOAT32_C( -610.74), SIMDE_FLOAT32_C( 604.47), SIMDE_FLOAT32_C( 933.75) }, { SIMDE_FLOAT32_C( -964.97), SIMDE_FLOAT32_C( -802.27), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 132.31), SIMDE_FLOAT32_C( -731.50), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -908.89), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -521.86), SIMDE_FLOAT32_C( 111.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { UINT16_C(42061), { SIMDE_FLOAT32_C( 242.20), SIMDE_FLOAT32_C( 769.96), SIMDE_FLOAT32_C( -694.80), SIMDE_FLOAT32_C( 148.51), SIMDE_FLOAT32_C( 861.07), SIMDE_FLOAT32_C( 884.67), SIMDE_FLOAT32_C( 92.42), SIMDE_FLOAT32_C( 520.46), SIMDE_FLOAT32_C( -4.33), SIMDE_FLOAT32_C( -880.42), SIMDE_FLOAT32_C( -394.11), SIMDE_FLOAT32_C( -72.60), SIMDE_FLOAT32_C( 135.07), SIMDE_FLOAT32_C( 641.92), SIMDE_FLOAT32_C( -703.30), SIMDE_FLOAT32_C( 228.86) }, { SIMDE_FLOAT32_C( -225.77), SIMDE_FLOAT32_C( -434.80), SIMDE_FLOAT32_C( 813.81), SIMDE_FLOAT32_C( -884.77), SIMDE_FLOAT32_C( -266.29), SIMDE_FLOAT32_C( -770.76), SIMDE_FLOAT32_C( 507.18), SIMDE_FLOAT32_C( 211.84), SIMDE_FLOAT32_C( 891.25), SIMDE_FLOAT32_C( 405.90), SIMDE_FLOAT32_C( 601.10), SIMDE_FLOAT32_C( 495.72), SIMDE_FLOAT32_C( 339.65), SIMDE_FLOAT32_C( -811.90), SIMDE_FLOAT32_C( 299.27), SIMDE_FLOAT32_C( -418.15) }, { SIMDE_FLOAT32_C( -225.77), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -694.80), SIMDE_FLOAT32_C( -884.77), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 92.42), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -394.11), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -811.90), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -418.15) } }, { UINT16_C(16182), { SIMDE_FLOAT32_C( 730.36), SIMDE_FLOAT32_C( 819.13), SIMDE_FLOAT32_C( 489.14), SIMDE_FLOAT32_C( -177.22), SIMDE_FLOAT32_C( 339.58), SIMDE_FLOAT32_C( -515.19), SIMDE_FLOAT32_C( -57.64), SIMDE_FLOAT32_C( 945.47), SIMDE_FLOAT32_C( 412.21), SIMDE_FLOAT32_C( -922.57), SIMDE_FLOAT32_C( 587.39), SIMDE_FLOAT32_C( 708.91), SIMDE_FLOAT32_C( 306.29), SIMDE_FLOAT32_C( -638.38), SIMDE_FLOAT32_C( -725.89), SIMDE_FLOAT32_C( 120.10) }, { SIMDE_FLOAT32_C( -523.15), SIMDE_FLOAT32_C( 7.82), SIMDE_FLOAT32_C( 349.34), SIMDE_FLOAT32_C( 984.04), SIMDE_FLOAT32_C( -780.34), SIMDE_FLOAT32_C( 240.59), SIMDE_FLOAT32_C( 389.94), SIMDE_FLOAT32_C( 820.76), SIMDE_FLOAT32_C( -263.69), SIMDE_FLOAT32_C( -270.41), SIMDE_FLOAT32_C( -991.14), SIMDE_FLOAT32_C( -964.41), SIMDE_FLOAT32_C( 311.44), SIMDE_FLOAT32_C( 966.92), SIMDE_FLOAT32_C( 640.06), SIMDE_FLOAT32_C( 41.80) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 7.82), SIMDE_FLOAT32_C( 349.34), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -780.34), SIMDE_FLOAT32_C( -515.19), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -263.69), SIMDE_FLOAT32_C( -922.57), SIMDE_FLOAT32_C( -991.14), SIMDE_FLOAT32_C( -964.41), SIMDE_FLOAT32_C( 306.29), SIMDE_FLOAT32_C( -638.38), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { UINT16_C( 5758), { SIMDE_FLOAT32_C( 864.58), SIMDE_FLOAT32_C( 125.63), SIMDE_FLOAT32_C( 614.01), SIMDE_FLOAT32_C( -193.06), SIMDE_FLOAT32_C( 71.10), SIMDE_FLOAT32_C( 26.23), SIMDE_FLOAT32_C( -115.63), SIMDE_FLOAT32_C( -341.51), SIMDE_FLOAT32_C( -264.86), SIMDE_FLOAT32_C( -809.33), SIMDE_FLOAT32_C( 20.10), SIMDE_FLOAT32_C( 9.25), SIMDE_FLOAT32_C( 310.77), SIMDE_FLOAT32_C( 496.96), SIMDE_FLOAT32_C( -982.93), SIMDE_FLOAT32_C( -339.89) }, { SIMDE_FLOAT32_C( 481.00), SIMDE_FLOAT32_C( -763.28), SIMDE_FLOAT32_C( 900.70), SIMDE_FLOAT32_C( -129.07), SIMDE_FLOAT32_C( -942.51), SIMDE_FLOAT32_C( -362.99), SIMDE_FLOAT32_C( 600.52), SIMDE_FLOAT32_C( -933.65), SIMDE_FLOAT32_C( -327.41), SIMDE_FLOAT32_C( -88.04), SIMDE_FLOAT32_C( -966.73), SIMDE_FLOAT32_C( -687.35), SIMDE_FLOAT32_C( 953.77), SIMDE_FLOAT32_C( 819.32), SIMDE_FLOAT32_C( 441.85), SIMDE_FLOAT32_C( 818.35) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -763.28), SIMDE_FLOAT32_C( 614.01), SIMDE_FLOAT32_C( -193.06), SIMDE_FLOAT32_C( -942.51), SIMDE_FLOAT32_C( -362.99), SIMDE_FLOAT32_C( -115.63), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -809.33), SIMDE_FLOAT32_C( -966.73), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 310.77), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { UINT16_C( 3949), { SIMDE_FLOAT32_C( -374.71), SIMDE_FLOAT32_C( -983.95), SIMDE_FLOAT32_C( -917.91), SIMDE_FLOAT32_C( 509.66), SIMDE_FLOAT32_C( -325.47), SIMDE_FLOAT32_C( -182.77), SIMDE_FLOAT32_C( 700.33), SIMDE_FLOAT32_C( 694.64), SIMDE_FLOAT32_C( 826.48), SIMDE_FLOAT32_C( 11.09), SIMDE_FLOAT32_C( 191.60), SIMDE_FLOAT32_C( 843.55), SIMDE_FLOAT32_C( 671.20), SIMDE_FLOAT32_C( -327.41), SIMDE_FLOAT32_C( -919.73), SIMDE_FLOAT32_C( 571.90) }, { SIMDE_FLOAT32_C( 543.53), SIMDE_FLOAT32_C( -862.24), SIMDE_FLOAT32_C( -791.09), SIMDE_FLOAT32_C( 144.05), SIMDE_FLOAT32_C( -795.89), SIMDE_FLOAT32_C( -118.50), SIMDE_FLOAT32_C( -943.99), SIMDE_FLOAT32_C( -762.62), SIMDE_FLOAT32_C( 194.16), SIMDE_FLOAT32_C( -990.23), SIMDE_FLOAT32_C( -943.30), SIMDE_FLOAT32_C( -363.99), SIMDE_FLOAT32_C( 828.12), SIMDE_FLOAT32_C( 1.65), SIMDE_FLOAT32_C( 691.87), SIMDE_FLOAT32_C( -546.59) }, { SIMDE_FLOAT32_C( -374.71), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -917.91), SIMDE_FLOAT32_C( 144.05), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -182.77), SIMDE_FLOAT32_C( -943.99), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 194.16), SIMDE_FLOAT32_C( -990.23), SIMDE_FLOAT32_C( -943.30), SIMDE_FLOAT32_C( -363.99), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { UINT16_C( 9530), { SIMDE_FLOAT32_C( 963.07), SIMDE_FLOAT32_C( 692.24), SIMDE_FLOAT32_C( -408.80), SIMDE_FLOAT32_C( 663.40), SIMDE_FLOAT32_C( 386.88), SIMDE_FLOAT32_C( -582.32), SIMDE_FLOAT32_C( -325.51), SIMDE_FLOAT32_C( -421.52), SIMDE_FLOAT32_C( -738.77), SIMDE_FLOAT32_C( -654.30), SIMDE_FLOAT32_C( 251.07), SIMDE_FLOAT32_C( -658.50), SIMDE_FLOAT32_C( 917.60), SIMDE_FLOAT32_C( -205.40), SIMDE_FLOAT32_C( -520.74), SIMDE_FLOAT32_C( -873.49) }, { SIMDE_FLOAT32_C( 938.65), SIMDE_FLOAT32_C( -316.63), SIMDE_FLOAT32_C( 8.01), SIMDE_FLOAT32_C( 994.66), SIMDE_FLOAT32_C( -79.25), SIMDE_FLOAT32_C( -797.83), SIMDE_FLOAT32_C( -995.57), SIMDE_FLOAT32_C( -22.54), SIMDE_FLOAT32_C( -161.82), SIMDE_FLOAT32_C( 832.55), SIMDE_FLOAT32_C( 979.11), SIMDE_FLOAT32_C( -469.95), SIMDE_FLOAT32_C( -714.04), SIMDE_FLOAT32_C( -3.19), SIMDE_FLOAT32_C( -695.98), SIMDE_FLOAT32_C( -750.96) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -316.63), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 663.40), SIMDE_FLOAT32_C( -79.25), SIMDE_FLOAT32_C( -797.83), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -738.77), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 251.07), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -205.40), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { UINT16_C(20037), { SIMDE_FLOAT32_C( 912.44), SIMDE_FLOAT32_C( -924.07), SIMDE_FLOAT32_C( 312.90), SIMDE_FLOAT32_C( -413.07), SIMDE_FLOAT32_C( -345.59), SIMDE_FLOAT32_C( 574.13), SIMDE_FLOAT32_C( -67.37), SIMDE_FLOAT32_C( 905.48), SIMDE_FLOAT32_C( 915.63), SIMDE_FLOAT32_C( -149.77), SIMDE_FLOAT32_C( -299.92), SIMDE_FLOAT32_C( -605.11), SIMDE_FLOAT32_C( -23.27), SIMDE_FLOAT32_C( -361.27), SIMDE_FLOAT32_C( 78.26), SIMDE_FLOAT32_C( 984.74) }, { SIMDE_FLOAT32_C( -366.61), SIMDE_FLOAT32_C( 999.01), SIMDE_FLOAT32_C( -813.09), SIMDE_FLOAT32_C( -362.18), SIMDE_FLOAT32_C( -23.53), SIMDE_FLOAT32_C( 25.08), SIMDE_FLOAT32_C( -529.63), SIMDE_FLOAT32_C( -44.42), SIMDE_FLOAT32_C( 555.13), SIMDE_FLOAT32_C( -243.67), SIMDE_FLOAT32_C( 952.39), SIMDE_FLOAT32_C( 859.15), SIMDE_FLOAT32_C( 5.37), SIMDE_FLOAT32_C( -358.55), SIMDE_FLOAT32_C( -245.64), SIMDE_FLOAT32_C( -82.19) }, { SIMDE_FLOAT32_C( -366.61), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -813.09), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -529.63), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -243.67), SIMDE_FLOAT32_C( -299.92), SIMDE_FLOAT32_C( -605.11), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -245.64), SIMDE_FLOAT32_C( 0.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 r = simde_mm512_maskz_min_ps(test_vec[i].k, a, b); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_min_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 b[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 157.06), SIMDE_FLOAT64_C( 547.17), SIMDE_FLOAT64_C( 473.32), SIMDE_FLOAT64_C( -357.52), SIMDE_FLOAT64_C( 296.15), SIMDE_FLOAT64_C( -304.34), SIMDE_FLOAT64_C( 366.32), SIMDE_FLOAT64_C( 139.64) }, { SIMDE_FLOAT64_C( -267.42), SIMDE_FLOAT64_C( -660.77), SIMDE_FLOAT64_C( 238.48), SIMDE_FLOAT64_C( -953.73), SIMDE_FLOAT64_C( 511.02), SIMDE_FLOAT64_C( 236.50), SIMDE_FLOAT64_C( -563.78), SIMDE_FLOAT64_C( -854.07) }, { SIMDE_FLOAT64_C( -267.42), SIMDE_FLOAT64_C( -660.77), SIMDE_FLOAT64_C( 238.48), SIMDE_FLOAT64_C( -953.73), SIMDE_FLOAT64_C( 296.15), SIMDE_FLOAT64_C( -304.34), SIMDE_FLOAT64_C( -563.78), SIMDE_FLOAT64_C( -854.07) } }, { { SIMDE_FLOAT64_C( 95.48), SIMDE_FLOAT64_C( -524.56), SIMDE_FLOAT64_C( -361.69), SIMDE_FLOAT64_C( 32.98), SIMDE_FLOAT64_C( -239.25), SIMDE_FLOAT64_C( 730.39), SIMDE_FLOAT64_C( -362.23), SIMDE_FLOAT64_C( -99.17) }, { SIMDE_FLOAT64_C( -102.36), SIMDE_FLOAT64_C( 530.27), SIMDE_FLOAT64_C( -659.60), SIMDE_FLOAT64_C( -663.13), SIMDE_FLOAT64_C( 28.77), SIMDE_FLOAT64_C( -184.41), SIMDE_FLOAT64_C( 436.90), SIMDE_FLOAT64_C( -814.16) }, { SIMDE_FLOAT64_C( -102.36), SIMDE_FLOAT64_C( -524.56), SIMDE_FLOAT64_C( -659.60), SIMDE_FLOAT64_C( -663.13), SIMDE_FLOAT64_C( -239.25), SIMDE_FLOAT64_C( -184.41), SIMDE_FLOAT64_C( -362.23), SIMDE_FLOAT64_C( -814.16) } }, { { SIMDE_FLOAT64_C( -637.24), SIMDE_FLOAT64_C( -89.78), SIMDE_FLOAT64_C( -171.68), SIMDE_FLOAT64_C( 658.92), SIMDE_FLOAT64_C( 605.88), SIMDE_FLOAT64_C( -805.36), SIMDE_FLOAT64_C( -201.45), SIMDE_FLOAT64_C( -661.54) }, { SIMDE_FLOAT64_C( -466.13), SIMDE_FLOAT64_C( -962.97), SIMDE_FLOAT64_C( -615.27), SIMDE_FLOAT64_C( -955.11), SIMDE_FLOAT64_C( 273.54), SIMDE_FLOAT64_C( -179.05), SIMDE_FLOAT64_C( -809.18), SIMDE_FLOAT64_C( -630.99) }, { SIMDE_FLOAT64_C( -637.24), SIMDE_FLOAT64_C( -962.97), SIMDE_FLOAT64_C( -615.27), SIMDE_FLOAT64_C( -955.11), SIMDE_FLOAT64_C( 273.54), SIMDE_FLOAT64_C( -805.36), SIMDE_FLOAT64_C( -809.18), SIMDE_FLOAT64_C( -661.54) } }, { { SIMDE_FLOAT64_C( 296.39), SIMDE_FLOAT64_C( -170.87), SIMDE_FLOAT64_C( 401.99), SIMDE_FLOAT64_C( -942.85), SIMDE_FLOAT64_C( -440.48), SIMDE_FLOAT64_C( -960.24), SIMDE_FLOAT64_C( -42.02), SIMDE_FLOAT64_C( 457.16) }, { SIMDE_FLOAT64_C( 570.04), SIMDE_FLOAT64_C( 298.38), SIMDE_FLOAT64_C( 794.03), SIMDE_FLOAT64_C( -401.19), SIMDE_FLOAT64_C( -886.02), SIMDE_FLOAT64_C( 230.93), SIMDE_FLOAT64_C( -215.35), SIMDE_FLOAT64_C( -523.26) }, { SIMDE_FLOAT64_C( 296.39), SIMDE_FLOAT64_C( -170.87), SIMDE_FLOAT64_C( 401.99), SIMDE_FLOAT64_C( -942.85), SIMDE_FLOAT64_C( -886.02), SIMDE_FLOAT64_C( -960.24), SIMDE_FLOAT64_C( -215.35), SIMDE_FLOAT64_C( -523.26) } }, { { SIMDE_FLOAT64_C( -858.85), SIMDE_FLOAT64_C( 612.96), SIMDE_FLOAT64_C( -864.34), SIMDE_FLOAT64_C( 747.03), SIMDE_FLOAT64_C( 807.61), SIMDE_FLOAT64_C( -65.79), SIMDE_FLOAT64_C( -914.51), SIMDE_FLOAT64_C( -658.53) }, { SIMDE_FLOAT64_C( -28.75), SIMDE_FLOAT64_C( -529.78), SIMDE_FLOAT64_C( -613.63), SIMDE_FLOAT64_C( -755.21), SIMDE_FLOAT64_C( 291.18), SIMDE_FLOAT64_C( -422.81), SIMDE_FLOAT64_C( -386.20), SIMDE_FLOAT64_C( -412.43) }, { SIMDE_FLOAT64_C( -858.85), SIMDE_FLOAT64_C( -529.78), SIMDE_FLOAT64_C( -864.34), SIMDE_FLOAT64_C( -755.21), SIMDE_FLOAT64_C( 291.18), SIMDE_FLOAT64_C( -422.81), SIMDE_FLOAT64_C( -914.51), SIMDE_FLOAT64_C( -658.53) } }, { { SIMDE_FLOAT64_C( 406.31), SIMDE_FLOAT64_C( -984.21), SIMDE_FLOAT64_C( -355.28), SIMDE_FLOAT64_C( 965.83), SIMDE_FLOAT64_C( -944.44), SIMDE_FLOAT64_C( 602.70), SIMDE_FLOAT64_C( 422.99), SIMDE_FLOAT64_C( 625.59) }, { SIMDE_FLOAT64_C( -98.92), SIMDE_FLOAT64_C( 217.03), SIMDE_FLOAT64_C( -775.60), SIMDE_FLOAT64_C( 15.06), SIMDE_FLOAT64_C( -552.04), SIMDE_FLOAT64_C( 9.05), SIMDE_FLOAT64_C( 491.80), SIMDE_FLOAT64_C( -410.89) }, { SIMDE_FLOAT64_C( -98.92), SIMDE_FLOAT64_C( -984.21), SIMDE_FLOAT64_C( -775.60), SIMDE_FLOAT64_C( 15.06), SIMDE_FLOAT64_C( -944.44), SIMDE_FLOAT64_C( 9.05), SIMDE_FLOAT64_C( 422.99), SIMDE_FLOAT64_C( -410.89) } }, { { SIMDE_FLOAT64_C( -377.99), SIMDE_FLOAT64_C( 627.46), SIMDE_FLOAT64_C( -663.86), SIMDE_FLOAT64_C( -570.38), SIMDE_FLOAT64_C( -438.33), SIMDE_FLOAT64_C( -578.38), SIMDE_FLOAT64_C( -228.91), SIMDE_FLOAT64_C( 532.92) }, { SIMDE_FLOAT64_C( -108.15), SIMDE_FLOAT64_C( 157.46), SIMDE_FLOAT64_C( 777.71), SIMDE_FLOAT64_C( -816.98), SIMDE_FLOAT64_C( 734.65), SIMDE_FLOAT64_C( -608.49), SIMDE_FLOAT64_C( -229.41), SIMDE_FLOAT64_C( 140.96) }, { SIMDE_FLOAT64_C( -377.99), SIMDE_FLOAT64_C( 157.46), SIMDE_FLOAT64_C( -663.86), SIMDE_FLOAT64_C( -816.98), SIMDE_FLOAT64_C( -438.33), SIMDE_FLOAT64_C( -608.49), SIMDE_FLOAT64_C( -229.41), SIMDE_FLOAT64_C( 140.96) } }, { { SIMDE_FLOAT64_C( -592.70), SIMDE_FLOAT64_C( 415.31), SIMDE_FLOAT64_C( 106.79), SIMDE_FLOAT64_C( -537.14), SIMDE_FLOAT64_C( 18.00), SIMDE_FLOAT64_C( -470.22), SIMDE_FLOAT64_C( -911.55), SIMDE_FLOAT64_C( 919.08) }, { SIMDE_FLOAT64_C( 746.81), SIMDE_FLOAT64_C( -687.15), SIMDE_FLOAT64_C( -65.86), SIMDE_FLOAT64_C( -805.23), SIMDE_FLOAT64_C( 321.90), SIMDE_FLOAT64_C( -574.06), SIMDE_FLOAT64_C( -216.12), SIMDE_FLOAT64_C( 943.91) }, { SIMDE_FLOAT64_C( -592.70), SIMDE_FLOAT64_C( -687.15), SIMDE_FLOAT64_C( -65.86), SIMDE_FLOAT64_C( -805.23), SIMDE_FLOAT64_C( 18.00), SIMDE_FLOAT64_C( -574.06), SIMDE_FLOAT64_C( -911.55), SIMDE_FLOAT64_C( 919.08) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__m512d r = simde_mm512_min_pd(a, b); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_min_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 b[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( -740.28), SIMDE_FLOAT64_C( 179.12), SIMDE_FLOAT64_C( 703.74), SIMDE_FLOAT64_C( -735.57), SIMDE_FLOAT64_C( 454.00), SIMDE_FLOAT64_C( 876.10), SIMDE_FLOAT64_C( -57.17), SIMDE_FLOAT64_C( 149.70) }, UINT8_C(199), { SIMDE_FLOAT64_C( -721.50), SIMDE_FLOAT64_C( 591.47), SIMDE_FLOAT64_C( 852.88), SIMDE_FLOAT64_C( -826.41), SIMDE_FLOAT64_C( -586.93), SIMDE_FLOAT64_C( 924.98), SIMDE_FLOAT64_C( 87.20), SIMDE_FLOAT64_C( -494.10) }, { SIMDE_FLOAT64_C( 281.40), SIMDE_FLOAT64_C( 142.23), SIMDE_FLOAT64_C( 313.92), SIMDE_FLOAT64_C( 960.39), SIMDE_FLOAT64_C( 379.03), SIMDE_FLOAT64_C( -507.24), SIMDE_FLOAT64_C( 349.76), SIMDE_FLOAT64_C( -502.39) }, { SIMDE_FLOAT64_C( -721.50), SIMDE_FLOAT64_C( 142.23), SIMDE_FLOAT64_C( 313.92), SIMDE_FLOAT64_C( -735.57), SIMDE_FLOAT64_C( 454.00), SIMDE_FLOAT64_C( 876.10), SIMDE_FLOAT64_C( 87.20), SIMDE_FLOAT64_C( -502.39) } }, { { SIMDE_FLOAT64_C( -361.36), SIMDE_FLOAT64_C( 147.20), SIMDE_FLOAT64_C( 968.90), SIMDE_FLOAT64_C( -206.13), SIMDE_FLOAT64_C( -276.91), SIMDE_FLOAT64_C( 774.59), SIMDE_FLOAT64_C( 53.59), SIMDE_FLOAT64_C( 902.22) }, UINT8_C(242), { SIMDE_FLOAT64_C( 318.02), SIMDE_FLOAT64_C( 356.22), SIMDE_FLOAT64_C( 354.43), SIMDE_FLOAT64_C( -739.16), SIMDE_FLOAT64_C( -494.09), SIMDE_FLOAT64_C( -704.46), SIMDE_FLOAT64_C( -460.65), SIMDE_FLOAT64_C( -902.62) }, { SIMDE_FLOAT64_C( -851.58), SIMDE_FLOAT64_C( -287.06), SIMDE_FLOAT64_C( -489.54), SIMDE_FLOAT64_C( -926.59), SIMDE_FLOAT64_C( 800.14), SIMDE_FLOAT64_C( 16.35), SIMDE_FLOAT64_C( 354.81), SIMDE_FLOAT64_C( -57.63) }, { SIMDE_FLOAT64_C( -361.36), SIMDE_FLOAT64_C( -287.06), SIMDE_FLOAT64_C( 968.90), SIMDE_FLOAT64_C( -206.13), SIMDE_FLOAT64_C( -494.09), SIMDE_FLOAT64_C( -704.46), SIMDE_FLOAT64_C( -460.65), SIMDE_FLOAT64_C( -902.62) } }, { { SIMDE_FLOAT64_C( -669.73), SIMDE_FLOAT64_C( 315.20), SIMDE_FLOAT64_C( -678.61), SIMDE_FLOAT64_C( -176.97), SIMDE_FLOAT64_C( -335.04), SIMDE_FLOAT64_C( -181.00), SIMDE_FLOAT64_C( 461.67), SIMDE_FLOAT64_C( 812.17) }, UINT8_C(154), { SIMDE_FLOAT64_C( -744.46), SIMDE_FLOAT64_C( -464.74), SIMDE_FLOAT64_C( -437.51), SIMDE_FLOAT64_C( 309.13), SIMDE_FLOAT64_C( -562.52), SIMDE_FLOAT64_C( -959.18), SIMDE_FLOAT64_C( -372.85), SIMDE_FLOAT64_C( 793.70) }, { SIMDE_FLOAT64_C( 395.24), SIMDE_FLOAT64_C( -112.01), SIMDE_FLOAT64_C( -700.39), SIMDE_FLOAT64_C( 690.79), SIMDE_FLOAT64_C( 427.34), SIMDE_FLOAT64_C( -603.01), SIMDE_FLOAT64_C( 839.21), SIMDE_FLOAT64_C( -859.72) }, { SIMDE_FLOAT64_C( -669.73), SIMDE_FLOAT64_C( -464.74), SIMDE_FLOAT64_C( -678.61), SIMDE_FLOAT64_C( 309.13), SIMDE_FLOAT64_C( -562.52), SIMDE_FLOAT64_C( -181.00), SIMDE_FLOAT64_C( 461.67), SIMDE_FLOAT64_C( -859.72) } }, { { SIMDE_FLOAT64_C( -92.55), SIMDE_FLOAT64_C( 912.62), SIMDE_FLOAT64_C( 940.42), SIMDE_FLOAT64_C( 923.80), SIMDE_FLOAT64_C( 267.42), SIMDE_FLOAT64_C( -117.22), SIMDE_FLOAT64_C( -745.93), SIMDE_FLOAT64_C( -417.38) }, UINT8_C(242), { SIMDE_FLOAT64_C( 77.10), SIMDE_FLOAT64_C( 247.59), SIMDE_FLOAT64_C( -976.82), SIMDE_FLOAT64_C( -461.22), SIMDE_FLOAT64_C( 59.76), SIMDE_FLOAT64_C( -188.92), SIMDE_FLOAT64_C( -205.68), SIMDE_FLOAT64_C( 595.02) }, { SIMDE_FLOAT64_C( 373.57), SIMDE_FLOAT64_C( -896.55), SIMDE_FLOAT64_C( -967.50), SIMDE_FLOAT64_C( 414.38), SIMDE_FLOAT64_C( -269.40), SIMDE_FLOAT64_C( 826.19), SIMDE_FLOAT64_C( -190.37), SIMDE_FLOAT64_C( 618.59) }, { SIMDE_FLOAT64_C( -92.55), SIMDE_FLOAT64_C( -896.55), SIMDE_FLOAT64_C( 940.42), SIMDE_FLOAT64_C( 923.80), SIMDE_FLOAT64_C( -269.40), SIMDE_FLOAT64_C( -188.92), SIMDE_FLOAT64_C( -205.68), SIMDE_FLOAT64_C( 595.02) } }, { { SIMDE_FLOAT64_C( -874.20), SIMDE_FLOAT64_C( -499.59), SIMDE_FLOAT64_C( 45.93), SIMDE_FLOAT64_C( -477.21), SIMDE_FLOAT64_C( -660.38), SIMDE_FLOAT64_C( 186.20), SIMDE_FLOAT64_C( 430.24), SIMDE_FLOAT64_C( -747.76) }, UINT8_C(234), { SIMDE_FLOAT64_C( 354.05), SIMDE_FLOAT64_C( 519.67), SIMDE_FLOAT64_C( -990.60), SIMDE_FLOAT64_C( 608.12), SIMDE_FLOAT64_C( -897.71), SIMDE_FLOAT64_C( 213.58), SIMDE_FLOAT64_C( -314.78), SIMDE_FLOAT64_C( 349.88) }, { SIMDE_FLOAT64_C( 236.76), SIMDE_FLOAT64_C( 223.99), SIMDE_FLOAT64_C( -590.36), SIMDE_FLOAT64_C( -952.16), SIMDE_FLOAT64_C( -981.69), SIMDE_FLOAT64_C( -995.35), SIMDE_FLOAT64_C( 421.40), SIMDE_FLOAT64_C( -878.24) }, { SIMDE_FLOAT64_C( -874.20), SIMDE_FLOAT64_C( 223.99), SIMDE_FLOAT64_C( 45.93), SIMDE_FLOAT64_C( -952.16), SIMDE_FLOAT64_C( -660.38), SIMDE_FLOAT64_C( -995.35), SIMDE_FLOAT64_C( -314.78), SIMDE_FLOAT64_C( -878.24) } }, { { SIMDE_FLOAT64_C( -962.85), SIMDE_FLOAT64_C( -164.21), SIMDE_FLOAT64_C( -147.64), SIMDE_FLOAT64_C( 863.35), SIMDE_FLOAT64_C( 645.41), SIMDE_FLOAT64_C( -529.05), SIMDE_FLOAT64_C( 989.15), SIMDE_FLOAT64_C( -854.17) }, UINT8_C(104), { SIMDE_FLOAT64_C( -488.06), SIMDE_FLOAT64_C( -514.55), SIMDE_FLOAT64_C( -296.92), SIMDE_FLOAT64_C( 942.19), SIMDE_FLOAT64_C( -262.31), SIMDE_FLOAT64_C( 829.69), SIMDE_FLOAT64_C( 296.23), SIMDE_FLOAT64_C( -742.64) }, { SIMDE_FLOAT64_C( 839.10), SIMDE_FLOAT64_C( -95.65), SIMDE_FLOAT64_C( -640.35), SIMDE_FLOAT64_C( 52.68), SIMDE_FLOAT64_C( 589.57), SIMDE_FLOAT64_C( 709.53), SIMDE_FLOAT64_C( -710.56), SIMDE_FLOAT64_C( -186.44) }, { SIMDE_FLOAT64_C( -962.85), SIMDE_FLOAT64_C( -164.21), SIMDE_FLOAT64_C( -147.64), SIMDE_FLOAT64_C( 52.68), SIMDE_FLOAT64_C( 645.41), SIMDE_FLOAT64_C( 709.53), SIMDE_FLOAT64_C( -710.56), SIMDE_FLOAT64_C( -854.17) } }, { { SIMDE_FLOAT64_C( -880.84), SIMDE_FLOAT64_C( -662.73), SIMDE_FLOAT64_C( -168.13), SIMDE_FLOAT64_C( -876.18), SIMDE_FLOAT64_C( 758.68), SIMDE_FLOAT64_C( -46.37), SIMDE_FLOAT64_C( -839.03), SIMDE_FLOAT64_C( -405.54) }, UINT8_C(236), { SIMDE_FLOAT64_C( -975.68), SIMDE_FLOAT64_C( -760.13), SIMDE_FLOAT64_C( -723.06), SIMDE_FLOAT64_C( -986.53), SIMDE_FLOAT64_C( -614.30), SIMDE_FLOAT64_C( 793.81), SIMDE_FLOAT64_C( -474.59), SIMDE_FLOAT64_C( -128.85) }, { SIMDE_FLOAT64_C( -503.11), SIMDE_FLOAT64_C( -532.40), SIMDE_FLOAT64_C( 608.84), SIMDE_FLOAT64_C( -673.42), SIMDE_FLOAT64_C( 763.83), SIMDE_FLOAT64_C( 866.20), SIMDE_FLOAT64_C( -834.32), SIMDE_FLOAT64_C( -331.82) }, { SIMDE_FLOAT64_C( -880.84), SIMDE_FLOAT64_C( -662.73), SIMDE_FLOAT64_C( -723.06), SIMDE_FLOAT64_C( -986.53), SIMDE_FLOAT64_C( 758.68), SIMDE_FLOAT64_C( 793.81), SIMDE_FLOAT64_C( -834.32), SIMDE_FLOAT64_C( -331.82) } }, { { SIMDE_FLOAT64_C( -774.15), SIMDE_FLOAT64_C( 218.36), SIMDE_FLOAT64_C( -742.25), SIMDE_FLOAT64_C( 935.38), SIMDE_FLOAT64_C( 507.80), SIMDE_FLOAT64_C( 71.31), SIMDE_FLOAT64_C( -945.46), SIMDE_FLOAT64_C( 845.07) }, UINT8_C(135), { SIMDE_FLOAT64_C( -821.64), SIMDE_FLOAT64_C( 603.75), SIMDE_FLOAT64_C( -143.18), SIMDE_FLOAT64_C( -660.67), SIMDE_FLOAT64_C( -801.79), SIMDE_FLOAT64_C( -337.19), SIMDE_FLOAT64_C( -636.35), SIMDE_FLOAT64_C( -561.92) }, { SIMDE_FLOAT64_C( -60.25), SIMDE_FLOAT64_C( -622.88), SIMDE_FLOAT64_C( -176.22), SIMDE_FLOAT64_C( -266.43), SIMDE_FLOAT64_C( -97.47), SIMDE_FLOAT64_C( 694.93), SIMDE_FLOAT64_C( 230.45), SIMDE_FLOAT64_C( 370.13) }, { SIMDE_FLOAT64_C( -821.64), SIMDE_FLOAT64_C( -622.88), SIMDE_FLOAT64_C( -176.22), SIMDE_FLOAT64_C( 935.38), SIMDE_FLOAT64_C( 507.80), SIMDE_FLOAT64_C( 71.31), SIMDE_FLOAT64_C( -945.46), SIMDE_FLOAT64_C( -561.92) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__m512d r = simde_mm512_mask_min_pd(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_maskz_min_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 b[8]; const simde_float64 r[8]; } test_vec[] = { { UINT8_C(121), { SIMDE_FLOAT64_C( 840.17), SIMDE_FLOAT64_C( -139.99), SIMDE_FLOAT64_C( -628.50), SIMDE_FLOAT64_C( 530.40), SIMDE_FLOAT64_C( -947.05), SIMDE_FLOAT64_C( -129.73), SIMDE_FLOAT64_C( -962.59), SIMDE_FLOAT64_C( 370.53) }, { SIMDE_FLOAT64_C( -874.72), SIMDE_FLOAT64_C( 38.87), SIMDE_FLOAT64_C( 333.13), SIMDE_FLOAT64_C( 818.57), SIMDE_FLOAT64_C( 354.94), SIMDE_FLOAT64_C( -397.93), SIMDE_FLOAT64_C( -985.56), SIMDE_FLOAT64_C( -200.08) }, { SIMDE_FLOAT64_C( -874.72), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 530.40), SIMDE_FLOAT64_C( -947.05), SIMDE_FLOAT64_C( -397.93), SIMDE_FLOAT64_C( -985.56), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C(198), { SIMDE_FLOAT64_C( 177.52), SIMDE_FLOAT64_C( 545.59), SIMDE_FLOAT64_C( -650.55), SIMDE_FLOAT64_C( -557.90), SIMDE_FLOAT64_C( 528.51), SIMDE_FLOAT64_C( 639.67), SIMDE_FLOAT64_C( 580.27), SIMDE_FLOAT64_C( -791.42) }, { SIMDE_FLOAT64_C( 803.04), SIMDE_FLOAT64_C( -947.55), SIMDE_FLOAT64_C( 983.07), SIMDE_FLOAT64_C( -118.88), SIMDE_FLOAT64_C( -1.42), SIMDE_FLOAT64_C( 763.30), SIMDE_FLOAT64_C( -278.71), SIMDE_FLOAT64_C( 858.59) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -947.55), SIMDE_FLOAT64_C( -650.55), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -278.71), SIMDE_FLOAT64_C( -791.42) } }, { UINT8_C(198), { SIMDE_FLOAT64_C( -748.31), SIMDE_FLOAT64_C( 911.54), SIMDE_FLOAT64_C( 5.07), SIMDE_FLOAT64_C( -710.90), SIMDE_FLOAT64_C( 282.08), SIMDE_FLOAT64_C( 130.34), SIMDE_FLOAT64_C( 327.97), SIMDE_FLOAT64_C( -384.80) }, { SIMDE_FLOAT64_C( -51.09), SIMDE_FLOAT64_C( -317.09), SIMDE_FLOAT64_C( 217.28), SIMDE_FLOAT64_C( -36.65), SIMDE_FLOAT64_C( 482.83), SIMDE_FLOAT64_C( 174.63), SIMDE_FLOAT64_C( -859.13), SIMDE_FLOAT64_C( 28.43) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -317.09), SIMDE_FLOAT64_C( 5.07), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -859.13), SIMDE_FLOAT64_C( -384.80) } }, { UINT8_C(251), { SIMDE_FLOAT64_C( -417.03), SIMDE_FLOAT64_C( -443.06), SIMDE_FLOAT64_C( 163.75), SIMDE_FLOAT64_C( -836.77), SIMDE_FLOAT64_C( -234.48), SIMDE_FLOAT64_C( -33.21), SIMDE_FLOAT64_C( -784.32), SIMDE_FLOAT64_C( -251.41) }, { SIMDE_FLOAT64_C( 847.91), SIMDE_FLOAT64_C( 214.26), SIMDE_FLOAT64_C( -488.11), SIMDE_FLOAT64_C( -430.80), SIMDE_FLOAT64_C( 72.85), SIMDE_FLOAT64_C( -353.31), SIMDE_FLOAT64_C( -179.10), SIMDE_FLOAT64_C( -15.60) }, { SIMDE_FLOAT64_C( -417.03), SIMDE_FLOAT64_C( -443.06), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -836.77), SIMDE_FLOAT64_C( -234.48), SIMDE_FLOAT64_C( -353.31), SIMDE_FLOAT64_C( -784.32), SIMDE_FLOAT64_C( -251.41) } }, { UINT8_C(217), { SIMDE_FLOAT64_C( 110.00), SIMDE_FLOAT64_C( -733.53), SIMDE_FLOAT64_C( -217.90), SIMDE_FLOAT64_C( -562.03), SIMDE_FLOAT64_C( -118.32), SIMDE_FLOAT64_C( 731.01), SIMDE_FLOAT64_C( 120.88), SIMDE_FLOAT64_C( -901.05) }, { SIMDE_FLOAT64_C( -305.64), SIMDE_FLOAT64_C( -396.29), SIMDE_FLOAT64_C( 273.58), SIMDE_FLOAT64_C( -164.78), SIMDE_FLOAT64_C( 632.14), SIMDE_FLOAT64_C( -202.34), SIMDE_FLOAT64_C( 418.19), SIMDE_FLOAT64_C( -810.93) }, { SIMDE_FLOAT64_C( -305.64), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -562.03), SIMDE_FLOAT64_C( -118.32), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 120.88), SIMDE_FLOAT64_C( -901.05) } }, { UINT8_C(130), { SIMDE_FLOAT64_C( 581.42), SIMDE_FLOAT64_C( -45.41), SIMDE_FLOAT64_C( -71.79), SIMDE_FLOAT64_C( 797.11), SIMDE_FLOAT64_C( 703.18), SIMDE_FLOAT64_C( -223.88), SIMDE_FLOAT64_C( 11.37), SIMDE_FLOAT64_C( -784.93) }, { SIMDE_FLOAT64_C( 345.32), SIMDE_FLOAT64_C( -915.78), SIMDE_FLOAT64_C( -138.24), SIMDE_FLOAT64_C( -833.78), SIMDE_FLOAT64_C( 68.62), SIMDE_FLOAT64_C( -486.48), SIMDE_FLOAT64_C( 276.21), SIMDE_FLOAT64_C( 335.10) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -915.78), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -784.93) } }, { UINT8_C(163), { SIMDE_FLOAT64_C( 714.18), SIMDE_FLOAT64_C( -783.23), SIMDE_FLOAT64_C( 26.63), SIMDE_FLOAT64_C( -164.94), SIMDE_FLOAT64_C( -684.28), SIMDE_FLOAT64_C( 720.98), SIMDE_FLOAT64_C( 438.77), SIMDE_FLOAT64_C( 589.30) }, { SIMDE_FLOAT64_C( -443.80), SIMDE_FLOAT64_C( 70.90), SIMDE_FLOAT64_C( -613.04), SIMDE_FLOAT64_C( 974.39), SIMDE_FLOAT64_C( 259.98), SIMDE_FLOAT64_C( -651.62), SIMDE_FLOAT64_C( 555.82), SIMDE_FLOAT64_C( -785.43) }, { SIMDE_FLOAT64_C( -443.80), SIMDE_FLOAT64_C( -783.23), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -651.62), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -785.43) } }, { UINT8_C(162), { SIMDE_FLOAT64_C( 352.93), SIMDE_FLOAT64_C( 917.75), SIMDE_FLOAT64_C( -947.30), SIMDE_FLOAT64_C( -635.70), SIMDE_FLOAT64_C( -867.18), SIMDE_FLOAT64_C( 398.02), SIMDE_FLOAT64_C( -551.47), SIMDE_FLOAT64_C( -5.42) }, { SIMDE_FLOAT64_C( 564.24), SIMDE_FLOAT64_C( 517.15), SIMDE_FLOAT64_C( 508.10), SIMDE_FLOAT64_C( -159.55), SIMDE_FLOAT64_C( -147.76), SIMDE_FLOAT64_C( -196.29), SIMDE_FLOAT64_C( -445.36), SIMDE_FLOAT64_C( 69.01) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 517.15), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -196.29), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -5.42) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__m512d r = simde_mm512_maskz_min_pd(test_vec[i].k, a, b); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_min_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_min_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_min_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_min_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_min_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_min_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_min_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_min_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_min_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_min_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_min_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_min_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_min_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_min_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_min_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_min_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_min_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_min_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_min_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_min_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_min_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_min_epu64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_min_epu64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_min_epu64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_min_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_min_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_min_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_min_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_min_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_min_pd) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/mov.c000066400000000000000000010717001400333146700162270ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #define SIMDE_TEST_X86_AVX512_INSN mov #include #include static int test_simde_mm_mask_mov_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i src; simde__mmask16 k; simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( -98), INT8_C( -63), INT8_C( -58), INT8_C( 25), INT8_C( 9), INT8_C( 49), INT8_C( -12), INT8_C( -31), INT8_C( -48), INT8_C( 0), INT8_C( -99), INT8_C( -81), INT8_C( -77), INT8_C( 27), INT8_C( -33), INT8_C(-124)), UINT16_C(64699), simde_mm_set_epi8(INT8_C( 79), INT8_C( 100), INT8_C(-123), INT8_C( 95), INT8_C( -79), INT8_C( 48), INT8_C( 112), INT8_C( 8), INT8_C( 69), INT8_C( -78), INT8_C( 54), INT8_C( -48), INT8_C(-119), INT8_C( -5), INT8_C( -97), INT8_C( -44)), simde_mm_set_epi8(INT8_C( 79), INT8_C( 100), INT8_C(-123), INT8_C( 95), INT8_C( -79), INT8_C( 48), INT8_C( -12), INT8_C( -31), INT8_C( 69), INT8_C( 0), INT8_C( 54), INT8_C( -48), INT8_C(-119), INT8_C( 27), INT8_C( -97), INT8_C( -44)) }, { simde_mm_set_epi8(INT8_C( 47), INT8_C( 36), INT8_C( 45), INT8_C( -44), INT8_C( 27), INT8_C( -15), INT8_C( 105), INT8_C( -69), INT8_C( -10), INT8_C( 1), INT8_C( 12), INT8_C( -44), INT8_C( -32), INT8_C( 113), INT8_C( 105), INT8_C( -92)), UINT16_C(33046), simde_mm_set_epi8(INT8_C(-118), INT8_C( -61), INT8_C( 108), INT8_C( 4), INT8_C( 56), INT8_C( 96), INT8_C( -73), INT8_C( -39), INT8_C(-112), INT8_C(-115), INT8_C(-113), INT8_C( -74), INT8_C( -79), INT8_C(-116), INT8_C( 117), INT8_C( -3)), simde_mm_set_epi8(INT8_C(-118), INT8_C( 36), INT8_C( 45), INT8_C( -44), INT8_C( 27), INT8_C( -15), INT8_C( 105), INT8_C( -39), INT8_C( -10), INT8_C( 1), INT8_C( 12), INT8_C( -74), INT8_C( -32), INT8_C(-116), INT8_C( 117), INT8_C( -92)) }, { simde_mm_set_epi8(INT8_C( 41), INT8_C(-106), INT8_C( -67), INT8_C(-116), INT8_C( -34), INT8_C( 21), INT8_C( 64), INT8_C( 44), INT8_C( 97), INT8_C( -46), INT8_C( 122), INT8_C( 42), INT8_C( -54), INT8_C( -79), INT8_C( 21), INT8_C( 59)), UINT16_C(27487), simde_mm_set_epi8(INT8_C( 6), INT8_C(-124), INT8_C(-111), INT8_C( -39), INT8_C( 55), INT8_C( -55), INT8_C( -72), INT8_C( 77), INT8_C( 51), INT8_C(-103), INT8_C( -80), INT8_C( 75), INT8_C( -87), INT8_C(-120), INT8_C( -14), INT8_C( 99)), simde_mm_set_epi8(INT8_C( 41), INT8_C(-124), INT8_C(-111), INT8_C(-116), INT8_C( 55), INT8_C( 21), INT8_C( -72), INT8_C( 77), INT8_C( 97), INT8_C(-103), INT8_C( 122), INT8_C( 75), INT8_C( -87), INT8_C(-120), INT8_C( -14), INT8_C( 99)) }, { simde_mm_set_epi8(INT8_C( 31), INT8_C( -90), INT8_C(-127), INT8_C( 105), INT8_C( -89), INT8_C(-121), INT8_C(-110), INT8_C( -58), INT8_C( -95), INT8_C(-101), INT8_C( -56), INT8_C( 22), INT8_C( 18), INT8_C( 2), INT8_C( 46), INT8_C(-125)), UINT16_C(48165), simde_mm_set_epi8(INT8_C( 103), INT8_C( 26), INT8_C( 108), INT8_C( 4), INT8_C( -49), INT8_C( -62), INT8_C(-103), INT8_C( -42), INT8_C( 103), INT8_C( 115), INT8_C( 126), INT8_C(-112), INT8_C( -81), INT8_C( -35), INT8_C(-106), INT8_C( 45)), simde_mm_set_epi8(INT8_C( 103), INT8_C( -90), INT8_C( 108), INT8_C( 4), INT8_C( -49), INT8_C( -62), INT8_C(-110), INT8_C( -58), INT8_C( -95), INT8_C(-101), INT8_C( 126), INT8_C( 22), INT8_C( 18), INT8_C( -35), INT8_C( 46), INT8_C( 45)) }, { simde_mm_set_epi8(INT8_C( 106), INT8_C( 23), INT8_C( -78), INT8_C( -57), INT8_C( 24), INT8_C( 56), INT8_C( -46), INT8_C( -15), INT8_C( -33), INT8_C( 28), INT8_C( -40), INT8_C(-116), INT8_C( -34), INT8_C( 92), INT8_C( 109), INT8_C( 33)), UINT16_C(14870), simde_mm_set_epi8(INT8_C( -75), INT8_C( 55), INT8_C(-127), INT8_C( 70), INT8_C( 78), INT8_C( 126), INT8_C( -96), INT8_C( 119), INT8_C( 108), INT8_C( 50), INT8_C( 17), INT8_C( -71), INT8_C( 127), INT8_C( 91), INT8_C( 110), INT8_C( -90)), simde_mm_set_epi8(INT8_C( 106), INT8_C( 23), INT8_C(-127), INT8_C( 70), INT8_C( 78), INT8_C( 56), INT8_C( -96), INT8_C( -15), INT8_C( -33), INT8_C( 28), INT8_C( -40), INT8_C( -71), INT8_C( -34), INT8_C( 91), INT8_C( 110), INT8_C( 33)) }, { simde_mm_set_epi8(INT8_C( -21), INT8_C(-122), INT8_C(-127), INT8_C( 95), INT8_C( -34), INT8_C( -51), INT8_C( 107), INT8_C( 75), INT8_C( 63), INT8_C(-117), INT8_C(-118), INT8_C( 52), INT8_C( 15), INT8_C( 123), INT8_C( -76), INT8_C(-117)), UINT16_C(54314), simde_mm_set_epi8(INT8_C( 124), INT8_C( -12), INT8_C( 0), INT8_C( -14), INT8_C( -54), INT8_C( 92), INT8_C( 73), INT8_C( 69), INT8_C( -47), INT8_C( -62), INT8_C( 113), INT8_C( 100), INT8_C( 31), INT8_C( -98), INT8_C( -86), INT8_C( 19)), simde_mm_set_epi8(INT8_C( 124), INT8_C( -12), INT8_C(-127), INT8_C( -14), INT8_C( -34), INT8_C( 92), INT8_C( 107), INT8_C( 75), INT8_C( 63), INT8_C(-117), INT8_C( 113), INT8_C( 52), INT8_C( 31), INT8_C( 123), INT8_C( -86), INT8_C(-117)) }, { simde_mm_set_epi8(INT8_C( -9), INT8_C( -43), INT8_C( 83), INT8_C( 21), INT8_C( 88), INT8_C( -52), INT8_C(-115), INT8_C( 63), INT8_C( 92), INT8_C( -15), INT8_C( -24), INT8_C( -84), INT8_C(-120), INT8_C( -96), INT8_C( 46), INT8_C( -78)), UINT16_C(44998), simde_mm_set_epi8(INT8_C( -10), INT8_C( 79), INT8_C(-113), INT8_C( -93), INT8_C( 24), INT8_C( 78), INT8_C( 40), INT8_C( 22), INT8_C( 31), INT8_C( -15), INT8_C( -8), INT8_C( 60), INT8_C( 114), INT8_C( -85), INT8_C(-105), INT8_C( -47)), simde_mm_set_epi8(INT8_C( -10), INT8_C( -43), INT8_C(-113), INT8_C( 21), INT8_C( 24), INT8_C( 78), INT8_C( 40), INT8_C( 22), INT8_C( 31), INT8_C( -15), INT8_C( -24), INT8_C( -84), INT8_C(-120), INT8_C( -85), INT8_C(-105), INT8_C( -78)) }, { simde_mm_set_epi8(INT8_C( -62), INT8_C( 117), INT8_C(-114), INT8_C( 7), INT8_C( 17), INT8_C( 123), INT8_C( -2), INT8_C( -15), INT8_C(-120), INT8_C( 77), INT8_C( 81), INT8_C( -39), INT8_C(-114), INT8_C( -52), INT8_C(-119), INT8_C( 82)), UINT16_C(48425), simde_mm_set_epi8(INT8_C( 68), INT8_C( -65), INT8_C( 13), INT8_C( -27), INT8_C( 55), INT8_C( 2), INT8_C( -43), INT8_C( 9), INT8_C( -57), INT8_C( 65), INT8_C(-111), INT8_C( -60), INT8_C( 75), INT8_C( 74), INT8_C( 16), INT8_C( 19)), simde_mm_set_epi8(INT8_C( 68), INT8_C( 117), INT8_C( 13), INT8_C( -27), INT8_C( 55), INT8_C( 2), INT8_C( -2), INT8_C( 9), INT8_C(-120), INT8_C( 77), INT8_C(-111), INT8_C( -39), INT8_C( 75), INT8_C( -52), INT8_C(-119), INT8_C( 19)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_mask_mov_epi8(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_mask_mov_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i src; simde__mmask8 k; simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C(-14576), INT16_C( 14205), INT16_C( -2433), INT16_C(-27972), INT16_C( 6192), INT16_C(-29093), INT16_C( 24144), INT16_C(-19045)), UINT8_C(231), simde_mm_set_epi16(INT16_C(-16025), INT16_C( -5226), INT16_C( -6596), INT16_C(-11796), INT16_C(-24692), INT16_C( 20335), INT16_C( 26237), INT16_C( 23499)), simde_mm_set_epi16(INT16_C(-16025), INT16_C( -5226), INT16_C( -6596), INT16_C(-27972), INT16_C( 6192), INT16_C( 20335), INT16_C( 26237), INT16_C( 23499)) }, { simde_mm_set_epi16(INT16_C( -839), INT16_C(-10951), INT16_C( 1310), INT16_C( -6285), INT16_C(-21252), INT16_C( -7582), INT16_C(-12381), INT16_C( 24902)), UINT8_C( 7), simde_mm_set_epi16(INT16_C( -3233), INT16_C( 25022), INT16_C(-12043), INT16_C( 17022), INT16_C(-25543), INT16_C(-17145), INT16_C( 8881), INT16_C( 28844)), simde_mm_set_epi16(INT16_C( -839), INT16_C(-10951), INT16_C( 1310), INT16_C( -6285), INT16_C(-21252), INT16_C(-17145), INT16_C( 8881), INT16_C( 28844)) }, { simde_mm_set_epi16(INT16_C( 30807), INT16_C( 12936), INT16_C(-14387), INT16_C(-15179), INT16_C( 23907), INT16_C(-17160), INT16_C( 23916), INT16_C( 14132)), UINT8_C(139), simde_mm_set_epi16(INT16_C( -1315), INT16_C(-31661), INT16_C(-10075), INT16_C(-22609), INT16_C( 9167), INT16_C( 6456), INT16_C( -7329), INT16_C( -8326)), simde_mm_set_epi16(INT16_C( -1315), INT16_C( 12936), INT16_C(-14387), INT16_C(-15179), INT16_C( 9167), INT16_C(-17160), INT16_C( -7329), INT16_C( -8326)) }, { simde_mm_set_epi16(INT16_C( 26421), INT16_C(-12708), INT16_C( 22525), INT16_C(-31426), INT16_C( 15010), INT16_C(-27490), INT16_C(-12766), INT16_C(-25791)), UINT8_C( 65), simde_mm_set_epi16(INT16_C( -1553), INT16_C(-19304), INT16_C( 20094), INT16_C( -2808), INT16_C(-12327), INT16_C( 15252), INT16_C( 25789), INT16_C(-23968)), simde_mm_set_epi16(INT16_C( 26421), INT16_C(-19304), INT16_C( 22525), INT16_C(-31426), INT16_C( 15010), INT16_C(-27490), INT16_C(-12766), INT16_C(-23968)) }, { simde_mm_set_epi16(INT16_C( 7823), INT16_C( 19443), INT16_C( 13219), INT16_C( 17015), INT16_C(-11739), INT16_C(-13030), INT16_C(-14482), INT16_C(-27926)), UINT8_C(249), simde_mm_set_epi16(INT16_C(-25131), INT16_C( 30189), INT16_C(-22900), INT16_C( 28700), INT16_C( 1116), INT16_C( 30184), INT16_C(-12164), INT16_C( -7443)), simde_mm_set_epi16(INT16_C(-25131), INT16_C( 30189), INT16_C(-22900), INT16_C( 28700), INT16_C( 1116), INT16_C(-13030), INT16_C(-14482), INT16_C( -7443)) }, { simde_mm_set_epi16(INT16_C(-26628), INT16_C( 25963), INT16_C(-26322), INT16_C( -8077), INT16_C(-22868), INT16_C( 28633), INT16_C( -4168), INT16_C( 28595)), UINT8_C(112), simde_mm_set_epi16(INT16_C( 14185), INT16_C( -5351), INT16_C( -8435), INT16_C(-11233), INT16_C( -8273), INT16_C(-29718), INT16_C( -8221), INT16_C( 18236)), simde_mm_set_epi16(INT16_C(-26628), INT16_C( -5351), INT16_C( -8435), INT16_C(-11233), INT16_C(-22868), INT16_C( 28633), INT16_C( -4168), INT16_C( 28595)) }, { simde_mm_set_epi16(INT16_C(-14557), INT16_C(-28064), INT16_C( 11696), INT16_C(-19213), INT16_C( 15613), INT16_C( 26380), INT16_C( 30063), INT16_C( 26293)), UINT8_C( 24), simde_mm_set_epi16(INT16_C( 23790), INT16_C( 10772), INT16_C( -8418), INT16_C(-27527), INT16_C( -163), INT16_C( 10898), INT16_C(-12995), INT16_C( 287)), simde_mm_set_epi16(INT16_C(-14557), INT16_C(-28064), INT16_C( 11696), INT16_C(-27527), INT16_C( -163), INT16_C( 26380), INT16_C( 30063), INT16_C( 26293)) }, { simde_mm_set_epi16(INT16_C(-14768), INT16_C(-23816), INT16_C(-22775), INT16_C( -4812), INT16_C(-19595), INT16_C(-14349), INT16_C( 11039), INT16_C( 15081)), UINT8_C( 22), simde_mm_set_epi16(INT16_C( 27063), INT16_C( 8226), INT16_C(-13582), INT16_C( 14344), INT16_C(-27643), INT16_C( -1125), INT16_C(-27147), INT16_C( -4132)), simde_mm_set_epi16(INT16_C(-14768), INT16_C(-23816), INT16_C(-22775), INT16_C( 14344), INT16_C(-19595), INT16_C( -1125), INT16_C(-27147), INT16_C( 15081)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_mask_mov_epi16(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_mask_mov_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i src; simde__mmask8 k; simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C(-1311777535), INT32_C( 871351059), INT32_C(-1795529748), INT32_C(-1018886524)), UINT8_C(193), simde_mm_set_epi32(INT32_C(-1402384713), INT32_C( 349677639), INT32_C(-2062419968), INT32_C(-2110667873)), simde_mm_set_epi32(INT32_C(-1311777535), INT32_C( 871351059), INT32_C(-1795529748), INT32_C(-2110667873)) }, { simde_mm_set_epi32(INT32_C( 738895957), INT32_C(-2052149671), INT32_C( 1275190943), INT32_C(-1073987906)), UINT8_C(211), simde_mm_set_epi32(INT32_C( 899624021), INT32_C(-1740875066), INT32_C( 196568235), INT32_C( 146964985)), simde_mm_set_epi32(INT32_C( 738895957), INT32_C(-2052149671), INT32_C( 196568235), INT32_C( 146964985)) }, { simde_mm_set_epi32(INT32_C( 692992965), INT32_C( 836600954), INT32_C(-1461227321), INT32_C( -625910795)), UINT8_C(122), simde_mm_set_epi32(INT32_C(-1617549669), INT32_C( 1989374100), INT32_C(-1502577107), INT32_C(-1017994073)), simde_mm_set_epi32(INT32_C(-1617549669), INT32_C( 836600954), INT32_C(-1502577107), INT32_C( -625910795)) }, { simde_mm_set_epi32(INT32_C( 1143677167), INT32_C( 846204550), INT32_C( -804913221), INT32_C( 1445583278)), UINT8_C(231), simde_mm_set_epi32(INT32_C(-1730413187), INT32_C(-1695584840), INT32_C( -227526716), INT32_C( -3425875)), simde_mm_set_epi32(INT32_C( 1143677167), INT32_C(-1695584840), INT32_C( -227526716), INT32_C( -3425875)) }, { simde_mm_set_epi32(INT32_C( 645689114), INT32_C(-2084714818), INT32_C( 1764055823), INT32_C( 52635923)), UINT8_C( 92), simde_mm_set_epi32(INT32_C(-1571852402), INT32_C( 630152776), INT32_C( -128726906), INT32_C( 1269444726)), simde_mm_set_epi32(INT32_C(-1571852402), INT32_C( 630152776), INT32_C( 1764055823), INT32_C( 52635923)) }, { simde_mm_set_epi32(INT32_C( 1563221), INT32_C( -134802286), INT32_C( 714712077), INT32_C(-1827172967)), UINT8_C( 81), simde_mm_set_epi32(INT32_C( 1929131576), INT32_C(-1816110300), INT32_C( 1278219947), INT32_C( 1799312980)), simde_mm_set_epi32(INT32_C( 1563221), INT32_C( -134802286), INT32_C( 714712077), INT32_C( 1799312980)) }, { simde_mm_set_epi32(INT32_C( 398082434), INT32_C(-1574168894), INT32_C( -78364073), INT32_C(-1210427726)), UINT8_C( 81), simde_mm_set_epi32(INT32_C( -743499294), INT32_C(-2007549651), INT32_C( 404949426), INT32_C(-1228263526)), simde_mm_set_epi32(INT32_C( 398082434), INT32_C(-1574168894), INT32_C( -78364073), INT32_C(-1228263526)) }, { simde_mm_set_epi32(INT32_C( -588057094), INT32_C(-1885829296), INT32_C( 1969228625), INT32_C( 1326338893)), UINT8_C(219), simde_mm_set_epi32(INT32_C( 1932026039), INT32_C(-1013786585), INT32_C( 1485053584), INT32_C( 1979373999)), simde_mm_set_epi32(INT32_C( 1932026039), INT32_C(-1885829296), INT32_C( 1485053584), INT32_C( 1979373999)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_mask_mov_epi32(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_mask_mov_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i src; simde__mmask8 k; simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi64x(INT64_C( 2277107027088284737), INT64_C( -794576880036979785)), UINT8_C(133), simde_mm_set_epi64x(INT64_C( 8097713530582561529), INT64_C( 1585963766693842069)), simde_mm_set_epi64x(INT64_C( 2277107027088284737), INT64_C( 1585963766693842069)) }, { simde_mm_set_epi64x(INT64_C( 386114209698075166), INT64_C( 5207265957388900927)), UINT8_C(158), simde_mm_set_epi64x(INT64_C( 8803705323655107871), INT64_C(-8422781366242531322)), simde_mm_set_epi64x(INT64_C( 8803705323655107871), INT64_C( 5207265957388900927)) }, { simde_mm_set_epi64x(INT64_C(-2685854854617637911), INT64_C( 5000183764696508529)), UINT8_C(188), simde_mm_set_epi64x(INT64_C( 3366037084418714211), INT64_C(-4379786006937181803)), simde_mm_set_epi64x(INT64_C(-2685854854617637911), INT64_C( 5000183764696508529)) }, { simde_mm_set_epi64x(INT64_C( 5087362917606608352), INT64_C( 7748994405920281726)), UINT8_C( 72), simde_mm_set_epi64x(INT64_C(-3993157906773187111), INT64_C( 5848124444216740966)), simde_mm_set_epi64x(INT64_C( 5087362917606608352), INT64_C( 7748994405920281726)) }, { simde_mm_set_epi64x(INT64_C(-6262495515547444433), INT64_C( 3943684472219148405)), UINT8_C( 56), simde_mm_set_epi64x(INT64_C( 6021985363878171356), INT64_C(-9003751561505293092)), simde_mm_set_epi64x(INT64_C(-6262495515547444433), INT64_C( 3943684472219148405)) }, { simde_mm_set_epi64x(INT64_C( 7378184861631570903), INT64_C( 5065745925883054243)), UINT8_C(107), simde_mm_set_epi64x(INT64_C( 3940656342452910480), INT64_C( 3350136105944417294)), simde_mm_set_epi64x(INT64_C( 3940656342452910480), INT64_C( 3350136105944417294)) }, { simde_mm_set_epi64x(INT64_C( 4422823463426654219), INT64_C( 1827699444722609855)), UINT8_C( 23), simde_mm_set_epi64x(INT64_C(-2966751886069965026), INT64_C(-8494473672325004777)), simde_mm_set_epi64x(INT64_C(-2966751886069965026), INT64_C(-8494473672325004777)) }, { simde_mm_set_epi64x(INT64_C(-8917676865649705108), INT64_C( 6229148348133862992)), UINT8_C( 48), simde_mm_set_epi64x(INT64_C(-7968457113297908477), INT64_C(-6793891334661924961)), simde_mm_set_epi64x(INT64_C(-8917676865649705108), INT64_C( 6229148348133862992)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_mask_mov_epi64(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_mask_mov_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d src; simde__mmask8 k; simde__m128d a; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)), UINT8_C(210), simde_mm_set_pd(SIMDE_FLOAT64_C( 55.56), SIMDE_FLOAT64_C( 306.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( 55.56), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)), UINT8_C( 7), simde_mm_set_pd(SIMDE_FLOAT64_C( 202.21), SIMDE_FLOAT64_C( -678.71)), simde_mm_set_pd(SIMDE_FLOAT64_C( 202.21), SIMDE_FLOAT64_C( -678.71)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)), UINT8_C( 50), simde_mm_set_pd(SIMDE_FLOAT64_C( 680.40), SIMDE_FLOAT64_C( 906.67)), simde_mm_set_pd(SIMDE_FLOAT64_C( 680.40), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)), UINT8_C(229), simde_mm_set_pd(SIMDE_FLOAT64_C( -422.72), SIMDE_FLOAT64_C( 572.83)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 572.83)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)), UINT8_C(117), simde_mm_set_pd(SIMDE_FLOAT64_C( -76.19), SIMDE_FLOAT64_C( -654.60)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -654.60)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)), UINT8_C(130), simde_mm_set_pd(SIMDE_FLOAT64_C( -711.42), SIMDE_FLOAT64_C( -22.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( -711.42), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)), UINT8_C( 62), simde_mm_set_pd(SIMDE_FLOAT64_C( -413.23), SIMDE_FLOAT64_C( 547.52)), simde_mm_set_pd(SIMDE_FLOAT64_C( -413.23), SIMDE_FLOAT64_C( 0.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)), UINT8_C(165), simde_mm_set_pd(SIMDE_FLOAT64_C( 575.41), SIMDE_FLOAT64_C( -702.01)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -702.01)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_mask_mov_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_mask_mov_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 src; simde__mmask8 k; simde__m128 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)), UINT16_C( 126), simde_mm_set_ps(SIMDE_FLOAT32_C( -678.71), SIMDE_FLOAT32_C( 675.53), SIMDE_FLOAT32_C( 55.56), SIMDE_FLOAT32_C( 306.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( -678.71), SIMDE_FLOAT32_C( 675.53), SIMDE_FLOAT32_C( 55.56), SIMDE_FLOAT32_C( 0.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)), UINT16_C( 44), simde_mm_set_ps(SIMDE_FLOAT32_C( 941.87), SIMDE_FLOAT32_C( 680.40), SIMDE_FLOAT32_C( 906.67), SIMDE_FLOAT32_C( -364.25)), simde_mm_set_ps(SIMDE_FLOAT32_C( 941.87), SIMDE_FLOAT32_C( 680.40), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)), UINT16_C( 117), simde_mm_set_ps(SIMDE_FLOAT32_C( -76.19), SIMDE_FLOAT32_C( -654.60), SIMDE_FLOAT32_C( -721.91), SIMDE_FLOAT32_C( -422.72)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -654.60), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -422.72)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)), UINT16_C( 76), simde_mm_set_ps(SIMDE_FLOAT32_C( 547.52), SIMDE_FLOAT32_C( -627.17), SIMDE_FLOAT32_C( -711.42), SIMDE_FLOAT32_C( -22.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( 547.52), SIMDE_FLOAT32_C( -627.17), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)), UINT16_C( 101), simde_mm_set_ps(SIMDE_FLOAT32_C( -822.97), SIMDE_FLOAT32_C( 575.41), SIMDE_FLOAT32_C( -702.01), SIMDE_FLOAT32_C( -488.76)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 575.41), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -488.76)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)), UINT16_C( 149), simde_mm_set_ps(SIMDE_FLOAT32_C( 804.55), SIMDE_FLOAT32_C( -888.85), SIMDE_FLOAT32_C( 750.71), SIMDE_FLOAT32_C( 346.51)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -888.85), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 346.51)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)), UINT16_C( 115), simde_mm_set_ps(SIMDE_FLOAT32_C( -17.38), SIMDE_FLOAT32_C( 623.33), SIMDE_FLOAT32_C( 459.80), SIMDE_FLOAT32_C( 837.15)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 459.80), SIMDE_FLOAT32_C( 837.15)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)), UINT16_C( 50), simde_mm_set_ps(SIMDE_FLOAT32_C( 197.69), SIMDE_FLOAT32_C( 233.42), SIMDE_FLOAT32_C( 153.73), SIMDE_FLOAT32_C( 616.58)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 153.73), SIMDE_FLOAT32_C( 0.00)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_mask_mov_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_mask_mov_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i src; simde__mmask32 k; simde__m256i a; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi8(INT8_C( -82), INT8_C( -32), INT8_C( -73), INT8_C( -78), INT8_C( -21), INT8_C( 76), INT8_C( 33), INT8_C( 90), INT8_C( -57), INT8_C( -12), INT8_C(-121), INT8_C( 101), INT8_C( 6), INT8_C( -36), INT8_C( -50), INT8_C( -33), INT8_C( -83), INT8_C( -92), INT8_C( 2), INT8_C( 69), INT8_C( 62), INT8_C( 89), INT8_C( 105), INT8_C( 58), INT8_C( 125), INT8_C( -76), INT8_C( 27), INT8_C( 51), INT8_C( 79), INT8_C( 101), INT8_C( -42), INT8_C( -69)), UINT32_C( 391141390), simde_mm256_set_epi8(INT8_C( -9), INT8_C( -52), INT8_C( 27), INT8_C( -40), INT8_C( 57), INT8_C( -80), INT8_C( -28), INT8_C( 64), INT8_C( 70), INT8_C( -40), INT8_C( 14), INT8_C( -38), INT8_C( -38), INT8_C( -99), INT8_C( -37), INT8_C( -35), INT8_C( -82), INT8_C( -60), INT8_C( -40), INT8_C( -40), INT8_C( -5), INT8_C( 8), INT8_C( 109), INT8_C( 95), INT8_C( 124), INT8_C( 34), INT8_C( 19), INT8_C( -65), INT8_C( -2), INT8_C( -92), INT8_C( 18), INT8_C( -33)), simde_mm256_set_epi8(INT8_C( -82), INT8_C( -32), INT8_C( -73), INT8_C( -40), INT8_C( -21), INT8_C( -80), INT8_C( -28), INT8_C( 64), INT8_C( -57), INT8_C( -40), INT8_C(-121), INT8_C( -38), INT8_C( 6), INT8_C( -36), INT8_C( -50), INT8_C( -33), INT8_C( -83), INT8_C( -60), INT8_C( 2), INT8_C( -40), INT8_C( -5), INT8_C( 89), INT8_C( 105), INT8_C( 58), INT8_C( 125), INT8_C( -76), INT8_C( 27), INT8_C( 51), INT8_C( -2), INT8_C( -92), INT8_C( 18), INT8_C( -69)) }, { simde_mm256_set_epi8(INT8_C( -54), INT8_C( -68), INT8_C( 19), INT8_C( 39), INT8_C( 17), INT8_C( -32), INT8_C( -47), INT8_C( -26), INT8_C( -23), INT8_C( 30), INT8_C( 98), INT8_C( 3), INT8_C( -92), INT8_C( -30), INT8_C( -8), INT8_C( -30), INT8_C( 26), INT8_C(-116), INT8_C( 76), INT8_C( -76), INT8_C( -29), INT8_C( -31), INT8_C( -31), INT8_C( 78), INT8_C( 23), INT8_C( 6), INT8_C( 61), INT8_C( 68), INT8_C( -53), INT8_C(-110), INT8_C( 53), INT8_C( -67)), UINT32_C( 757878650), simde_mm256_set_epi8(INT8_C( 36), INT8_C(-115), INT8_C( -95), INT8_C( 4), INT8_C( 50), INT8_C( -54), INT8_C( 94), INT8_C( 54), INT8_C( 109), INT8_C(-103), INT8_C(-124), INT8_C( 34), INT8_C( -16), INT8_C( 97), INT8_C( -7), INT8_C( 98), INT8_C(-125), INT8_C( -49), INT8_C( 3), INT8_C( -91), INT8_C( -99), INT8_C( 85), INT8_C( -25), INT8_C( 6), INT8_C( -42), INT8_C( 44), INT8_C( 70), INT8_C( -24), INT8_C( -86), INT8_C( 112), INT8_C( 116), INT8_C( -61)), simde_mm256_set_epi8(INT8_C( -54), INT8_C( -68), INT8_C( -95), INT8_C( 39), INT8_C( 50), INT8_C( -54), INT8_C( -47), INT8_C( 54), INT8_C( -23), INT8_C( 30), INT8_C(-124), INT8_C( 3), INT8_C( -16), INT8_C( 97), INT8_C( -8), INT8_C( -30), INT8_C( 26), INT8_C( -49), INT8_C( 76), INT8_C( -76), INT8_C( -99), INT8_C( 85), INT8_C( -25), INT8_C( 6), INT8_C( 23), INT8_C( 44), INT8_C( 70), INT8_C( -24), INT8_C( -86), INT8_C(-110), INT8_C( 116), INT8_C( -67)) }, { simde_mm256_set_epi8(INT8_C( 48), INT8_C( -19), INT8_C( -87), INT8_C( 100), INT8_C( -44), INT8_C( -79), INT8_C( -72), INT8_C( 73), INT8_C( -36), INT8_C( 58), INT8_C(-113), INT8_C( -42), INT8_C( -85), INT8_C( 123), INT8_C(-106), INT8_C( -57), INT8_C( -53), INT8_C( 96), INT8_C( 40), INT8_C( -52), INT8_C( -17), INT8_C( -6), INT8_C(-108), INT8_C( 33), INT8_C( -15), INT8_C( 113), INT8_C( 31), INT8_C( -14), INT8_C( 124), INT8_C( 15), INT8_C( 90), INT8_C( 1)), UINT32_C(2771863762), simde_mm256_set_epi8(INT8_C( 72), INT8_C( -95), INT8_C( 104), INT8_C( -28), INT8_C( 25), INT8_C( 84), INT8_C( 66), INT8_C( 19), INT8_C( 79), INT8_C( -84), INT8_C( 46), INT8_C( 23), INT8_C( -85), INT8_C( 12), INT8_C( 6), INT8_C( -9), INT8_C(-108), INT8_C( 14), INT8_C( 103), INT8_C( 32), INT8_C( 25), INT8_C(-108), INT8_C( -56), INT8_C(-111), INT8_C( 23), INT8_C( -20), INT8_C( 4), INT8_C( 81), INT8_C( 39), INT8_C( 39), INT8_C( 82), INT8_C( -15)), simde_mm256_set_epi8(INT8_C( 72), INT8_C( -19), INT8_C( 104), INT8_C( 100), INT8_C( -44), INT8_C( 84), INT8_C( -72), INT8_C( 19), INT8_C( -36), INT8_C( 58), INT8_C( 46), INT8_C( 23), INT8_C( -85), INT8_C( 12), INT8_C( 6), INT8_C( -9), INT8_C( -53), INT8_C( 14), INT8_C( 40), INT8_C( -52), INT8_C( 25), INT8_C( -6), INT8_C(-108), INT8_C( 33), INT8_C( 23), INT8_C( -20), INT8_C( 31), INT8_C( 81), INT8_C( 124), INT8_C( 15), INT8_C( 82), INT8_C( 1)) }, { simde_mm256_set_epi8(INT8_C( 57), INT8_C( -52), INT8_C( 127), INT8_C( -70), INT8_C( 97), INT8_C( 95), INT8_C( -96), INT8_C( -99), INT8_C( 22), INT8_C(-112), INT8_C( 66), INT8_C( -76), INT8_C( 79), INT8_C(-100), INT8_C( -47), INT8_C(-114), INT8_C( -72), INT8_C( 67), INT8_C( 3), INT8_C( -9), INT8_C( 88), INT8_C( -5), INT8_C(-111), INT8_C(-100), INT8_C( -94), INT8_C( -72), INT8_C( -45), INT8_C( -95), INT8_C( 119), INT8_C( -81), INT8_C( 38), INT8_C(-111)), UINT32_C(4224621908), simde_mm256_set_epi8(INT8_C(-112), INT8_C( 63), INT8_C( 75), INT8_C( 90), INT8_C( -7), INT8_C( 116), INT8_C(-123), INT8_C( -34), INT8_C( 81), INT8_C( 114), INT8_C( -76), INT8_C( -63), INT8_C( 30), INT8_C( 66), INT8_C( 18), INT8_C(-119), INT8_C( 26), INT8_C( 28), INT8_C( 56), INT8_C( 127), INT8_C( -81), INT8_C( -7), INT8_C( -20), INT8_C( -35), INT8_C( -7), INT8_C( 37), INT8_C( -47), INT8_C( 78), INT8_C( 114), INT8_C( -18), INT8_C( 72), INT8_C( -8)), simde_mm256_set_epi8(INT8_C(-112), INT8_C( 63), INT8_C( 75), INT8_C( 90), INT8_C( -7), INT8_C( 95), INT8_C(-123), INT8_C( -34), INT8_C( 81), INT8_C( 114), INT8_C( 66), INT8_C( -76), INT8_C( 30), INT8_C( 66), INT8_C( 18), INT8_C(-114), INT8_C( 26), INT8_C( 67), INT8_C( 3), INT8_C( 127), INT8_C( -81), INT8_C( -7), INT8_C(-111), INT8_C( -35), INT8_C( -94), INT8_C( 37), INT8_C( -45), INT8_C( 78), INT8_C( 119), INT8_C( -18), INT8_C( 38), INT8_C(-111)) }, { simde_mm256_set_epi8(INT8_C( -29), INT8_C(-121), INT8_C( -23), INT8_C( 64), INT8_C( 12), INT8_C( 5), INT8_C( 73), INT8_C( 52), INT8_C( -53), INT8_C( 62), INT8_C( 8), INT8_C(-112), INT8_C( -8), INT8_C( 99), INT8_C( -12), INT8_C(-118), INT8_C( -33), INT8_C( -37), INT8_C( -98), INT8_C( -94), INT8_C(-119), INT8_C( 79), INT8_C( -25), INT8_C( 47), INT8_C( 80), INT8_C( 89), INT8_C( 5), INT8_C( 9), INT8_C( -36), INT8_C( 79), INT8_C( 8), INT8_C( 89)), UINT32_C(1663316267), simde_mm256_set_epi8(INT8_C( 103), INT8_C( -43), INT8_C( 6), INT8_C( 112), INT8_C( -45), INT8_C( 82), INT8_C( 16), INT8_C( 3), INT8_C( 34), INT8_C( -45), INT8_C( 75), INT8_C(-106), INT8_C(-107), INT8_C( -45), INT8_C( -85), INT8_C( -53), INT8_C( 11), INT8_C( 28), INT8_C( 126), INT8_C( 24), INT8_C( -69), INT8_C( 35), INT8_C( -37), INT8_C( 95), INT8_C( 85), INT8_C( 3), INT8_C( -77), INT8_C( -35), INT8_C( -83), INT8_C( -1), INT8_C( -73), INT8_C( -18)), simde_mm256_set_epi8(INT8_C( -29), INT8_C( -43), INT8_C( 6), INT8_C( 64), INT8_C( 12), INT8_C( 5), INT8_C( 16), INT8_C( 3), INT8_C( -53), INT8_C( 62), INT8_C( 75), INT8_C(-112), INT8_C( -8), INT8_C( -45), INT8_C( -12), INT8_C(-118), INT8_C( -33), INT8_C( -37), INT8_C( 126), INT8_C( 24), INT8_C(-119), INT8_C( 79), INT8_C( -25), INT8_C( 95), INT8_C( 80), INT8_C( 89), INT8_C( -77), INT8_C( 9), INT8_C( -83), INT8_C( 79), INT8_C( -73), INT8_C( -18)) }, { simde_mm256_set_epi8(INT8_C( -15), INT8_C( 22), INT8_C( -61), INT8_C( -49), INT8_C( -4), INT8_C( -4), INT8_C( 91), INT8_C( -15), INT8_C( 47), INT8_C( -16), INT8_C(-118), INT8_C( 86), INT8_C( -37), INT8_C( -51), INT8_C( 66), INT8_C( -18), INT8_C( -38), INT8_C( -22), INT8_C( 6), INT8_C( 33), INT8_C( 109), INT8_C(-110), INT8_C( -53), INT8_C(-118), INT8_C( 48), INT8_C( -55), INT8_C( 70), INT8_C( -1), INT8_C(-125), INT8_C( -38), INT8_C( 109), INT8_C( -62)), UINT32_C(1252303865), simde_mm256_set_epi8(INT8_C(-103), INT8_C(-118), INT8_C(-127), INT8_C( -69), INT8_C( 28), INT8_C( 82), INT8_C( -48), INT8_C(-119), INT8_C( -31), INT8_C( -65), INT8_C(-127), INT8_C( -41), INT8_C( 86), INT8_C( -70), INT8_C( -6), INT8_C( 33), INT8_C( -51), INT8_C(-122), INT8_C( -14), INT8_C( 119), INT8_C( 75), INT8_C( 63), INT8_C( -36), INT8_C( 31), INT8_C( -76), INT8_C( 48), INT8_C( 50), INT8_C(-113), INT8_C( 15), INT8_C( -75), INT8_C( -26), INT8_C( 94)), simde_mm256_set_epi8(INT8_C( -15), INT8_C(-118), INT8_C( -61), INT8_C( -49), INT8_C( 28), INT8_C( -4), INT8_C( -48), INT8_C( -15), INT8_C( -31), INT8_C( -16), INT8_C(-127), INT8_C( 86), INT8_C( -37), INT8_C( -70), INT8_C( 66), INT8_C( -18), INT8_C( -51), INT8_C( -22), INT8_C( -14), INT8_C( 33), INT8_C( 109), INT8_C(-110), INT8_C( -36), INT8_C( 31), INT8_C( -76), INT8_C( 48), INT8_C( 50), INT8_C(-113), INT8_C( 15), INT8_C( -38), INT8_C( 109), INT8_C( 94)) }, { simde_mm256_set_epi8(INT8_C(-106), INT8_C( 63), INT8_C( -91), INT8_C( -65), INT8_C(-114), INT8_C( -79), INT8_C( 118), INT8_C( 65), INT8_C(-123), INT8_C( 42), INT8_C( -51), INT8_C( 112), INT8_C( -55), INT8_C( 120), INT8_C( 62), INT8_C( -91), INT8_C( -74), INT8_C( 98), INT8_C( -26), INT8_C( -13), INT8_C( -94), INT8_C( 105), INT8_C( -49), INT8_C( -31), INT8_C( 18), INT8_C( 49), INT8_C( -11), INT8_C( 72), INT8_C( -9), INT8_C( -16), INT8_C( 100), INT8_C( -64)), UINT32_C( 648334209), simde_mm256_set_epi8(INT8_C( 1), INT8_C( -60), INT8_C( -73), INT8_C( -13), INT8_C( 63), INT8_C(-117), INT8_C(-106), INT8_C( -9), INT8_C( -71), INT8_C(-116), INT8_C( -20), INT8_C( 61), INT8_C( 48), INT8_C(-114), INT8_C(-114), INT8_C( -45), INT8_C( -77), INT8_C( 123), INT8_C(-120), INT8_C(-126), INT8_C( 112), INT8_C( -73), INT8_C( -89), INT8_C( 6), INT8_C(-118), INT8_C( 2), INT8_C( 106), INT8_C( -46), INT8_C( -87), INT8_C( 71), INT8_C( -71), INT8_C( -5)), simde_mm256_set_epi8(INT8_C(-106), INT8_C( 63), INT8_C( -73), INT8_C( -65), INT8_C(-114), INT8_C(-117), INT8_C(-106), INT8_C( 65), INT8_C( -71), INT8_C( 42), INT8_C( -20), INT8_C( 112), INT8_C( -55), INT8_C(-114), INT8_C( 62), INT8_C( -91), INT8_C( -77), INT8_C( 123), INT8_C( -26), INT8_C( -13), INT8_C( 112), INT8_C( 105), INT8_C( -89), INT8_C( 6), INT8_C(-118), INT8_C( 49), INT8_C( -11), INT8_C( 72), INT8_C( -9), INT8_C( -16), INT8_C( 100), INT8_C( -5)) }, { simde_mm256_set_epi8(INT8_C( -48), INT8_C(-113), INT8_C( 21), INT8_C( 68), INT8_C( 115), INT8_C( 93), INT8_C( 99), INT8_C( -68), INT8_C( -9), INT8_C( 34), INT8_C( 15), INT8_C( 118), INT8_C( 54), INT8_C( -58), INT8_C( 11), INT8_C( 91), INT8_C( 122), INT8_C( 59), INT8_C( 108), INT8_C( -59), INT8_C( -39), INT8_C( 74), INT8_C( -25), INT8_C( 1), INT8_C( -26), INT8_C( -59), INT8_C( 91), INT8_C( -81), INT8_C( -8), INT8_C( -5), INT8_C( -55), INT8_C( -59)), UINT32_C(2027822108), simde_mm256_set_epi8(INT8_C( -93), INT8_C( -25), INT8_C( 14), INT8_C( -22), INT8_C( 85), INT8_C( -47), INT8_C( -59), INT8_C( -81), INT8_C( 94), INT8_C( -67), INT8_C( -69), INT8_C( -79), INT8_C( 61), INT8_C( 49), INT8_C( -27), INT8_C( 124), INT8_C( 89), INT8_C( 80), INT8_C( 55), INT8_C( -47), INT8_C( 45), INT8_C(-120), INT8_C( 28), INT8_C( -89), INT8_C( -69), INT8_C(-127), INT8_C( 65), INT8_C( 53), INT8_C( -35), INT8_C( -30), INT8_C( -74), INT8_C( -10)), simde_mm256_set_epi8(INT8_C( -48), INT8_C( -25), INT8_C( 14), INT8_C( -22), INT8_C( 85), INT8_C( 93), INT8_C( 99), INT8_C( -68), INT8_C( 94), INT8_C( -67), INT8_C( 15), INT8_C( -79), INT8_C( 61), INT8_C( 49), INT8_C( -27), INT8_C( 91), INT8_C( 122), INT8_C( 59), INT8_C( 108), INT8_C( -47), INT8_C( 45), INT8_C(-120), INT8_C( -25), INT8_C( 1), INT8_C( -26), INT8_C( -59), INT8_C( 91), INT8_C( 53), INT8_C( -35), INT8_C( -30), INT8_C( -55), INT8_C( -59)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_mask_mov_epi8(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_mask_mov_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i src; simde__mmask16 k; simde__m256i a; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi16(INT16_C(-23030), INT16_C( 6803), INT16_C(-21055), INT16_C( -910), INT16_C( -6009), INT16_C( 10471), INT16_C(-29834), INT16_C(-14111), INT16_C( -2981), INT16_C( 28733), INT16_C( 11699), INT16_C( 7781), INT16_C( 29036), INT16_C( -8103), INT16_C(-21310), INT16_C( 9176)), UINT16_C(32768), simde_mm256_set_epi16(INT16_C( 30563), INT16_C( -5523), INT16_C(-18306), INT16_C( 14754), INT16_C(-23068), INT16_C(-17313), INT16_C( 21598), INT16_C( 12635), INT16_C( 17053), INT16_C( 3377), INT16_C( 28887), INT16_C( 29062), INT16_C( 26146), INT16_C( -4849), INT16_C( 17375), INT16_C(-24515)), simde_mm256_set_epi16(INT16_C( 30563), INT16_C( 6803), INT16_C(-21055), INT16_C( -910), INT16_C( -6009), INT16_C( 10471), INT16_C(-29834), INT16_C(-14111), INT16_C( -2981), INT16_C( 28733), INT16_C( 11699), INT16_C( 7781), INT16_C( 29036), INT16_C( -8103), INT16_C(-21310), INT16_C( 9176)) }, { simde_mm256_set_epi16(INT16_C( 9971), INT16_C( -9002), INT16_C(-22233), INT16_C(-13917), INT16_C(-13732), INT16_C( -199), INT16_C( 9707), INT16_C( 31342), INT16_C(-13386), INT16_C(-15675), INT16_C( 10143), INT16_C( 19953), INT16_C(-25473), INT16_C( 27175), INT16_C(-12968), INT16_C(-11899)), UINT16_C(15492), simde_mm256_set_epi16(INT16_C(-30515), INT16_C(-13927), INT16_C( 24112), INT16_C( 9227), INT16_C(-20054), INT16_C(-11664), INT16_C( -7103), INT16_C(-13246), INT16_C( 4285), INT16_C(-23471), INT16_C( 24470), INT16_C(-13226), INT16_C( 4085), INT16_C( 10000), INT16_C(-17688), INT16_C( 28540)), simde_mm256_set_epi16(INT16_C( 9971), INT16_C( -9002), INT16_C( 24112), INT16_C( 9227), INT16_C(-20054), INT16_C(-11664), INT16_C( 9707), INT16_C( 31342), INT16_C( 4285), INT16_C(-15675), INT16_C( 10143), INT16_C( 19953), INT16_C(-25473), INT16_C( 10000), INT16_C(-12968), INT16_C(-11899)) }, { simde_mm256_set_epi16(INT16_C(-17362), INT16_C( -1830), INT16_C(-16587), INT16_C(-17056), INT16_C(-14539), INT16_C( 7972), INT16_C(-26491), INT16_C( 20406), INT16_C( 26939), INT16_C( 20968), INT16_C(-31196), INT16_C( 11313), INT16_C(-25947), INT16_C( 19467), INT16_C( 22325), INT16_C( 14960)), UINT16_C(53867), simde_mm256_set_epi16(INT16_C( 15597), INT16_C(-30582), INT16_C(-21551), INT16_C(-25534), INT16_C( 13374), INT16_C( 17137), INT16_C(-27681), INT16_C(-10912), INT16_C(-10124), INT16_C( 1110), INT16_C( 1704), INT16_C(-17853), INT16_C( -7561), INT16_C(-19432), INT16_C( 22127), INT16_C(-30033)), simde_mm256_set_epi16(INT16_C( 15597), INT16_C(-30582), INT16_C(-16587), INT16_C(-25534), INT16_C(-14539), INT16_C( 7972), INT16_C(-27681), INT16_C( 20406), INT16_C( 26939), INT16_C( 1110), INT16_C( 1704), INT16_C( 11313), INT16_C( -7561), INT16_C( 19467), INT16_C( 22127), INT16_C(-30033)) }, { simde_mm256_set_epi16(INT16_C( 14671), INT16_C( 16470), INT16_C( 30174), INT16_C( -7130), INT16_C( 31852), INT16_C( 11282), INT16_C( 29705), INT16_C(-21158), INT16_C( 16917), INT16_C( 10042), INT16_C( 5958), INT16_C( -4695), INT16_C(-20590), INT16_C( 17528), INT16_C( -6738), INT16_C(-26754)), UINT16_C(25018), simde_mm256_set_epi16(INT16_C(-21192), INT16_C( 6104), INT16_C(-12947), INT16_C( 12440), INT16_C( 12048), INT16_C( -8528), INT16_C(-31627), INT16_C( 26711), INT16_C( -4678), INT16_C( 32013), INT16_C( 814), INT16_C( 19873), INT16_C( 32199), INT16_C( -7421), INT16_C( 21197), INT16_C( 25563)), simde_mm256_set_epi16(INT16_C( 14671), INT16_C( 6104), INT16_C(-12947), INT16_C( -7130), INT16_C( 31852), INT16_C( 11282), INT16_C( 29705), INT16_C( 26711), INT16_C( -4678), INT16_C( 10042), INT16_C( 814), INT16_C( 19873), INT16_C( 32199), INT16_C( 17528), INT16_C( 21197), INT16_C(-26754)) }, { simde_mm256_set_epi16(INT16_C( 30594), INT16_C(-11819), INT16_C( 16854), INT16_C( 8281), INT16_C( 32229), INT16_C( -2511), INT16_C(-10942), INT16_C(-28733), INT16_C( -8714), INT16_C( -6616), INT16_C( 4922), INT16_C( 1537), INT16_C( -8589), INT16_C( 6229), INT16_C(-12142), INT16_C( 12862)), UINT16_C(62562), simde_mm256_set_epi16(INT16_C( 28902), INT16_C( 31472), INT16_C( -9808), INT16_C(-22935), INT16_C( 4498), INT16_C(-13447), INT16_C(-31030), INT16_C(-31086), INT16_C( 6386), INT16_C(-11676), INT16_C( 9598), INT16_C(-30958), INT16_C(-24145), INT16_C(-18452), INT16_C( -8547), INT16_C(-20619)), simde_mm256_set_epi16(INT16_C( 28902), INT16_C( 31472), INT16_C( -9808), INT16_C(-22935), INT16_C( 32229), INT16_C(-13447), INT16_C(-10942), INT16_C(-28733), INT16_C( -8714), INT16_C(-11676), INT16_C( 9598), INT16_C( 1537), INT16_C( -8589), INT16_C( 6229), INT16_C( -8547), INT16_C( 12862)) }, { simde_mm256_set_epi16(INT16_C( -1185), INT16_C( 28882), INT16_C(-25549), INT16_C(-18169), INT16_C( -7221), INT16_C( 4400), INT16_C(-25724), INT16_C(-28761), INT16_C(-20506), INT16_C(-24341), INT16_C( 5349), INT16_C( -9608), INT16_C(-30698), INT16_C( 7741), INT16_C( 6648), INT16_C( 2085)), UINT16_C(40999), simde_mm256_set_epi16(INT16_C( 17256), INT16_C(-15790), INT16_C( 23704), INT16_C(-17336), INT16_C( -4418), INT16_C( 28004), INT16_C(-27022), INT16_C( 29950), INT16_C(-28093), INT16_C( 901), INT16_C(-13716), INT16_C(-16668), INT16_C(-12954), INT16_C( 4373), INT16_C( 25556), INT16_C(-31530)), simde_mm256_set_epi16(INT16_C( 17256), INT16_C( 28882), INT16_C( 23704), INT16_C(-18169), INT16_C( -7221), INT16_C( 4400), INT16_C(-25724), INT16_C(-28761), INT16_C(-20506), INT16_C(-24341), INT16_C(-13716), INT16_C( -9608), INT16_C(-30698), INT16_C( 4373), INT16_C( 25556), INT16_C(-31530)) }, { simde_mm256_set_epi16(INT16_C( -2894), INT16_C(-32472), INT16_C( 11220), INT16_C( 6669), INT16_C( 23064), INT16_C(-27024), INT16_C(-15827), INT16_C(-11722), INT16_C(-26431), INT16_C( 6527), INT16_C(-14361), INT16_C(-27595), INT16_C(-18051), INT16_C( -3890), INT16_C(-26121), INT16_C(-29481)), UINT16_C( 8894), simde_mm256_set_epi16(INT16_C( 18291), INT16_C( 26196), INT16_C(-27505), INT16_C( -8229), INT16_C(-25273), INT16_C( -2374), INT16_C( 25602), INT16_C( 26391), INT16_C( 16833), INT16_C(-18212), INT16_C( 6765), INT16_C( 22695), INT16_C( 31217), INT16_C( 10116), INT16_C( 12733), INT16_C( 11434)), simde_mm256_set_epi16(INT16_C( -2894), INT16_C(-32472), INT16_C(-27505), INT16_C( 6669), INT16_C( 23064), INT16_C(-27024), INT16_C( 25602), INT16_C(-11722), INT16_C( 16833), INT16_C( 6527), INT16_C( 6765), INT16_C( 22695), INT16_C( 31217), INT16_C( 10116), INT16_C( 12733), INT16_C(-29481)) }, { simde_mm256_set_epi16(INT16_C( 31730), INT16_C(-24704), INT16_C( -9707), INT16_C(-27923), INT16_C( 12026), INT16_C( -8313), INT16_C(-30875), INT16_C( -3866), INT16_C( 13477), INT16_C( -8690), INT16_C( 7980), INT16_C( 29046), INT16_C(-16244), INT16_C(-14526), INT16_C( -1470), INT16_C( 9637)), UINT16_C(47578), simde_mm256_set_epi16(INT16_C(-27085), INT16_C(-21439), INT16_C( -6499), INT16_C(-12213), INT16_C( 32648), INT16_C(-16468), INT16_C(-15892), INT16_C( 21695), INT16_C(-24474), INT16_C( -770), INT16_C(-22665), INT16_C(-20908), INT16_C( -267), INT16_C( -8958), INT16_C( -8601), INT16_C( 15369)), simde_mm256_set_epi16(INT16_C(-27085), INT16_C(-24704), INT16_C( -6499), INT16_C(-12213), INT16_C( 32648), INT16_C( -8313), INT16_C(-30875), INT16_C( 21695), INT16_C(-24474), INT16_C( -770), INT16_C( 7980), INT16_C(-20908), INT16_C( -267), INT16_C(-14526), INT16_C( -8601), INT16_C( 9637)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_mask_mov_epi16(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m256i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_mask_mov_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i src; simde__mmask8 k; simde__m256i a; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C(-2051902106), INT32_C(-1489562810), INT32_C( -627115156), INT32_C( 913274595), INT32_C(-1198634499), INT32_C( 139959001), INT32_C(-1600412710), INT32_C( 934654383)), UINT8_C(164), simde_mm256_set_epi32(INT32_C(-2030579644), INT32_C( 386335945), INT32_C( -809785463), INT32_C( 2050465670), INT32_C( 458083110), INT32_C( 911315275), INT32_C( 438060664), INT32_C( 1293777364)), simde_mm256_set_epi32(INT32_C(-2030579644), INT32_C(-1489562810), INT32_C( -809785463), INT32_C( 913274595), INT32_C(-1198634499), INT32_C( 911315275), INT32_C(-1600412710), INT32_C( 934654383)) }, { simde_mm256_set_epi32(INT32_C( 1771679392), INT32_C( 747779042), INT32_C( 1568509396), INT32_C( -126295667), INT32_C( 1160475018), INT32_C( 343988166), INT32_C( 1516295700), INT32_C(-1359069473)), UINT8_C(178), simde_mm256_set_epi32(INT32_C( 1326620113), INT32_C(-1696986714), INT32_C( -201743610), INT32_C( 1745319425), INT32_C(-1761511775), INT32_C( 1270104738), INT32_C( 1013012890), INT32_C( 875163254)), simde_mm256_set_epi32(INT32_C( 1326620113), INT32_C( 747779042), INT32_C( -201743610), INT32_C( 1745319425), INT32_C( 1160475018), INT32_C( 343988166), INT32_C( 1013012890), INT32_C(-1359069473)) }, { simde_mm256_set_epi32(INT32_C( 518286759), INT32_C(-1532979566), INT32_C(-1858515332), INT32_C( 132974279), INT32_C( 761595911), INT32_C(-1701198420), INT32_C( 1222823321), INT32_C( -238072978)), UINT8_C(112), simde_mm256_set_epi32(INT32_C( -801582728), INT32_C( 1471437069), INT32_C( 1970067030), INT32_C( 1007722212), INT32_C( -224938211), INT32_C( -282706876), INT32_C( 1478523622), INT32_C( 630801793)), simde_mm256_set_epi32(INT32_C( 518286759), INT32_C( 1471437069), INT32_C( 1970067030), INT32_C( 1007722212), INT32_C( 761595911), INT32_C(-1701198420), INT32_C( 1222823321), INT32_C( -238072978)) }, { simde_mm256_set_epi32(INT32_C(-1331251138), INT32_C(-1232220609), INT32_C( -83499690), INT32_C(-1933771795), INT32_C( 1431588209), INT32_C( 9145992), INT32_C( 1554181542), INT32_C(-1595697445)), UINT8_C(209), simde_mm256_set_epi32(INT32_C(-1567962509), INT32_C(-1474212928), INT32_C(-1912431565), INT32_C( -269915367), INT32_C( -487478944), INT32_C(-1785315433), INT32_C(-1130207739), INT32_C( -388075219)), simde_mm256_set_epi32(INT32_C(-1567962509), INT32_C(-1474212928), INT32_C( -83499690), INT32_C( -269915367), INT32_C( 1431588209), INT32_C( 9145992), INT32_C( 1554181542), INT32_C( -388075219)) }, { simde_mm256_set_epi32(INT32_C( 1834864917), INT32_C( -675288826), INT32_C( 1896194121), INT32_C( 1512557303), INT32_C( -545693873), INT32_C( 513757285), INT32_C( 1710853511), INT32_C( 367108805)), UINT8_C(141), simde_mm256_set_epi32(INT32_C(-1942300637), INT32_C( 1717002604), INT32_C( -236253831), INT32_C( 993211905), INT32_C( 884769165), INT32_C( 1081163766), INT32_C( 1932456000), INT32_C( -153656708)), simde_mm256_set_epi32(INT32_C(-1942300637), INT32_C( -675288826), INT32_C( 1896194121), INT32_C( 1512557303), INT32_C( 884769165), INT32_C( 1081163766), INT32_C( 1710853511), INT32_C( -153656708)) }, { simde_mm256_set_epi32(INT32_C( 1057245798), INT32_C(-1988238659), INT32_C( 464652738), INT32_C(-1394070870), INT32_C( 410687111), INT32_C(-1023380740), INT32_C(-1345956426), INT32_C( 1062641002)), UINT8_C( 23), simde_mm256_set_epi32(INT32_C( 804151705), INT32_C(-1030405330), INT32_C(-1199759874), INT32_C( 1385588241), INT32_C(-1001762620), INT32_C( 1644327590), INT32_C( -999008446), INT32_C( 2086723218)), simde_mm256_set_epi32(INT32_C( 1057245798), INT32_C(-1988238659), INT32_C( 464652738), INT32_C( 1385588241), INT32_C( 410687111), INT32_C( 1644327590), INT32_C( -999008446), INT32_C( 2086723218)) }, { simde_mm256_set_epi32(INT32_C( 1481764690), INT32_C( 749562747), INT32_C( 1739109341), INT32_C( 1504825630), INT32_C(-1715949382), INT32_C( -901153926), INT32_C( -433640108), INT32_C( -201965406)), UINT8_C( 20), simde_mm256_set_epi32(INT32_C( 657000670), INT32_C( 71096321), INT32_C( 324839890), INT32_C( 1620447032), INT32_C( 1126601222), INT32_C(-1962686585), INT32_C( 174027827), INT32_C( 1092631470)), simde_mm256_set_epi32(INT32_C( 1481764690), INT32_C( 749562747), INT32_C( 1739109341), INT32_C( 1620447032), INT32_C(-1715949382), INT32_C(-1962686585), INT32_C( -433640108), INT32_C( -201965406)) }, { simde_mm256_set_epi32(INT32_C( 1112858374), INT32_C( 1689862137), INT32_C(-1548199384), INT32_C( 560346027), INT32_C(-1831151558), INT32_C( 1961484348), INT32_C( 1845841537), INT32_C(-1490051864)), UINT8_C( 81), simde_mm256_set_epi32(INT32_C( 1782794803), INT32_C(-1212843470), INT32_C( 702145811), INT32_C( 712189474), INT32_C( 1538408527), INT32_C( 1714734347), INT32_C( 509188796), INT32_C( 1218928521)), simde_mm256_set_epi32(INT32_C( 1112858374), INT32_C(-1212843470), INT32_C(-1548199384), INT32_C( 712189474), INT32_C(-1831151558), INT32_C( 1961484348), INT32_C( 1845841537), INT32_C( 1218928521)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_mask_mov_epi32(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_mask_mov_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i src; simde__mmask8 k; simde__m256i a; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi64x(INT64_C( -211287979567135941), INT64_C(-9075367401252635211), INT64_C( 960243121462097108), INT64_C( 2005878706758239899)), UINT8_C( 32), simde_mm256_set_epi64x(INT64_C(-6608524325915548957), INT64_C(-1314544444369440805), INT64_C( 1510534294895689397), INT64_C( 1845655456785432498)), simde_mm256_set_epi64x(INT64_C( -211287979567135941), INT64_C(-9075367401252635211), INT64_C( 960243121462097108), INT64_C( 2005878706758239899)) }, { simde_mm256_set_epi64x(INT64_C(-1723084715644559301), INT64_C(-2080563230649448126), INT64_C( 5959215642275768669), INT64_C( 2475827768754845699)), UINT8_C(214), simde_mm256_set_epi64x(INT64_C( 5190090124690883989), INT64_C(-4089440710650942034), INT64_C(-9158432549634317510), INT64_C( 8190130421956302558)), simde_mm256_set_epi64x(INT64_C(-1723084715644559301), INT64_C(-4089440710650942034), INT64_C(-9158432549634317510), INT64_C( 2475827768754845699)) }, { simde_mm256_set_epi64x(INT64_C(-3369675545100032670), INT64_C(-5453194687323465101), INT64_C(-7873359915838041141), INT64_C(-2715603020778233064)), UINT8_C(169), simde_mm256_set_epi64x(INT64_C( 6972842025751468465), INT64_C( -70349858703264913), INT64_C( -274794754558770720), INT64_C(-4632650321932570335)), simde_mm256_set_epi64x(INT64_C( 6972842025751468465), INT64_C(-5453194687323465101), INT64_C(-7873359915838041141), INT64_C(-4632650321932570335)) }, { simde_mm256_set_epi64x(INT64_C( 6027108319237370493), INT64_C(-1242194223738253269), INT64_C(-4627845169201021686), INT64_C( 6845596120956145572)), UINT8_C(143), simde_mm256_set_epi64x(INT64_C(-3877996964438243656), INT64_C(-4576357011277680458), INT64_C( 6353148636895875717), INT64_C( 4412973294027016788)), simde_mm256_set_epi64x(INT64_C(-3877996964438243656), INT64_C(-4576357011277680458), INT64_C( 6353148636895875717), INT64_C( 4412973294027016788)) }, { simde_mm256_set_epi64x(INT64_C( 9142894596557299884), INT64_C( 8214900458994780454), INT64_C( 8865669120860669544), INT64_C( 8653034493845742246)), UINT8_C(226), simde_mm256_set_epi64x(INT64_C( 1244643455152445841), INT64_C( 2297609102993095657), INT64_C(-5233775572318758587), INT64_C(-7732116011616278804)), simde_mm256_set_epi64x(INT64_C( 9142894596557299884), INT64_C( 8214900458994780454), INT64_C(-5233775572318758587), INT64_C( 8653034493845742246)) }, { simde_mm256_set_epi64x(INT64_C( 4960786529412164795), INT64_C( 8678743560946050948), INT64_C( 2843182024025655803), INT64_C( -83887347445242653)), UINT8_C( 74), simde_mm256_set_epi64x(INT64_C( 3754067458265850846), INT64_C(-6092043402181917138), INT64_C( 1306971064806148347), INT64_C(-5729735109094765451)), simde_mm256_set_epi64x(INT64_C( 3754067458265850846), INT64_C( 8678743560946050948), INT64_C( 1306971064806148347), INT64_C( -83887347445242653)) }, { simde_mm256_set_epi64x(INT64_C( 2112902535792085455), INT64_C(-6619508989181003755), INT64_C(-7221956771732279605), INT64_C( 6287623589682049686)), UINT8_C(191), simde_mm256_set_epi64x(INT64_C( 3797901248692596665), INT64_C( 7828643831964461331), INT64_C( 1067056404383166060), INT64_C(-2361551563160303879)), simde_mm256_set_epi64x(INT64_C( 3797901248692596665), INT64_C( 7828643831964461331), INT64_C( 1067056404383166060), INT64_C(-2361551563160303879)) }, { simde_mm256_set_epi64x(INT64_C( 6637695700610981441), INT64_C( 8064523188707259542), INT64_C(-3039387732265680328), INT64_C( 5125314073625570095)), UINT8_C(100), simde_mm256_set_epi64x(INT64_C( 4453523714429879071), INT64_C(-2274204535440821687), INT64_C(-3167205970195665497), INT64_C( 3325113155733044170)), simde_mm256_set_epi64x(INT64_C( 6637695700610981441), INT64_C(-2274204535440821687), INT64_C(-3039387732265680328), INT64_C( 5125314073625570095)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_mask_mov_epi64(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_mask_mov_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d src; simde__mmask8 k; simde__m256d a; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 774.53), SIMDE_FLOAT64_C( 377.61), SIMDE_FLOAT64_C( 717.45), SIMDE_FLOAT64_C( 713.04)), UINT8_C( 22), simde_mm256_set_pd(SIMDE_FLOAT64_C( 723.04), SIMDE_FLOAT64_C( 343.93), SIMDE_FLOAT64_C( 199.28), SIMDE_FLOAT64_C( -711.48)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 774.53), SIMDE_FLOAT64_C( 343.93), SIMDE_FLOAT64_C( 199.28), SIMDE_FLOAT64_C( 713.04)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -510.02), SIMDE_FLOAT64_C( 340.82), SIMDE_FLOAT64_C( 576.36), SIMDE_FLOAT64_C( -95.74)), UINT8_C(255), simde_mm256_set_pd(SIMDE_FLOAT64_C( 918.58), SIMDE_FLOAT64_C( 109.09), SIMDE_FLOAT64_C( -879.13), SIMDE_FLOAT64_C( 336.44)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 918.58), SIMDE_FLOAT64_C( 109.09), SIMDE_FLOAT64_C( -879.13), SIMDE_FLOAT64_C( 336.44)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 409.63), SIMDE_FLOAT64_C( -297.52), SIMDE_FLOAT64_C( 108.73), SIMDE_FLOAT64_C( 228.30)), UINT8_C(234), simde_mm256_set_pd(SIMDE_FLOAT64_C( -549.30), SIMDE_FLOAT64_C( -400.24), SIMDE_FLOAT64_C( -459.77), SIMDE_FLOAT64_C( -392.32)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -549.30), SIMDE_FLOAT64_C( -297.52), SIMDE_FLOAT64_C( -459.77), SIMDE_FLOAT64_C( 228.30)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -1.67), SIMDE_FLOAT64_C( -827.28), SIMDE_FLOAT64_C( 295.95), SIMDE_FLOAT64_C( 558.58)), UINT8_C(192), simde_mm256_set_pd(SIMDE_FLOAT64_C( 320.94), SIMDE_FLOAT64_C( -669.22), SIMDE_FLOAT64_C( 941.71), SIMDE_FLOAT64_C( -772.39)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -1.67), SIMDE_FLOAT64_C( -827.28), SIMDE_FLOAT64_C( 295.95), SIMDE_FLOAT64_C( 558.58)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -393.08), SIMDE_FLOAT64_C( 83.20), SIMDE_FLOAT64_C( 408.44), SIMDE_FLOAT64_C( 326.57)), UINT8_C( 97), simde_mm256_set_pd(SIMDE_FLOAT64_C( -490.95), SIMDE_FLOAT64_C( 526.06), SIMDE_FLOAT64_C( -564.61), SIMDE_FLOAT64_C( -582.24)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -393.08), SIMDE_FLOAT64_C( 83.20), SIMDE_FLOAT64_C( 408.44), SIMDE_FLOAT64_C( -582.24)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -808.43), SIMDE_FLOAT64_C( 58.34), SIMDE_FLOAT64_C( -379.04), SIMDE_FLOAT64_C( 54.10)), UINT8_C( 14), simde_mm256_set_pd(SIMDE_FLOAT64_C( 450.27), SIMDE_FLOAT64_C( -128.64), SIMDE_FLOAT64_C( -995.13), SIMDE_FLOAT64_C( 479.76)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 450.27), SIMDE_FLOAT64_C( -128.64), SIMDE_FLOAT64_C( -995.13), SIMDE_FLOAT64_C( 54.10)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 322.73), SIMDE_FLOAT64_C( 175.90), SIMDE_FLOAT64_C( -940.90), SIMDE_FLOAT64_C( -692.98)), UINT8_C(117), simde_mm256_set_pd(SIMDE_FLOAT64_C( -758.62), SIMDE_FLOAT64_C( 71.29), SIMDE_FLOAT64_C( 788.39), SIMDE_FLOAT64_C( -310.18)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 322.73), SIMDE_FLOAT64_C( 71.29), SIMDE_FLOAT64_C( -940.90), SIMDE_FLOAT64_C( -310.18)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -378.45), SIMDE_FLOAT64_C( 288.81), SIMDE_FLOAT64_C( 695.49), SIMDE_FLOAT64_C( -580.49)), UINT8_C( 27), simde_mm256_set_pd(SIMDE_FLOAT64_C( 130.47), SIMDE_FLOAT64_C( 632.45), SIMDE_FLOAT64_C( 808.39), SIMDE_FLOAT64_C( 627.49)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 130.47), SIMDE_FLOAT64_C( 288.81), SIMDE_FLOAT64_C( 808.39), SIMDE_FLOAT64_C( 627.49)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_mask_mov_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_mask_mov_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 src; simde__mmask8 k; simde__m256 a; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( -555.53), SIMDE_FLOAT32_C( 800.80), SIMDE_FLOAT32_C( 174.96), SIMDE_FLOAT32_C( 12.40), SIMDE_FLOAT32_C( -124.14), SIMDE_FLOAT32_C( 378.54), SIMDE_FLOAT32_C( -864.83), SIMDE_FLOAT32_C( 821.24)), UINT8_C(222), simde_mm256_set_ps(SIMDE_FLOAT32_C( 486.82), SIMDE_FLOAT32_C( 716.60), SIMDE_FLOAT32_C( 497.18), SIMDE_FLOAT32_C( -260.12), SIMDE_FLOAT32_C( 283.83), SIMDE_FLOAT32_C( 297.46), SIMDE_FLOAT32_C( 984.87), SIMDE_FLOAT32_C( 59.43)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 486.82), SIMDE_FLOAT32_C( 716.60), SIMDE_FLOAT32_C( 174.96), SIMDE_FLOAT32_C( -260.12), SIMDE_FLOAT32_C( 283.83), SIMDE_FLOAT32_C( 297.46), SIMDE_FLOAT32_C( 984.87), SIMDE_FLOAT32_C( 821.24)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -111.85), SIMDE_FLOAT32_C( -140.93), SIMDE_FLOAT32_C( 91.77), SIMDE_FLOAT32_C( 175.59), SIMDE_FLOAT32_C( -358.15), SIMDE_FLOAT32_C( -375.20), SIMDE_FLOAT32_C( 580.39), SIMDE_FLOAT32_C( 459.07)), UINT8_C(207), simde_mm256_set_ps(SIMDE_FLOAT32_C( 315.52), SIMDE_FLOAT32_C( -581.86), SIMDE_FLOAT32_C( 639.05), SIMDE_FLOAT32_C( 298.85), SIMDE_FLOAT32_C( -373.24), SIMDE_FLOAT32_C( -178.13), SIMDE_FLOAT32_C( 98.66), SIMDE_FLOAT32_C( -334.34)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 315.52), SIMDE_FLOAT32_C( -581.86), SIMDE_FLOAT32_C( 91.77), SIMDE_FLOAT32_C( 175.59), SIMDE_FLOAT32_C( -373.24), SIMDE_FLOAT32_C( -178.13), SIMDE_FLOAT32_C( 98.66), SIMDE_FLOAT32_C( -334.34)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 869.24), SIMDE_FLOAT32_C( 558.63), SIMDE_FLOAT32_C( 500.11), SIMDE_FLOAT32_C( 448.62), SIMDE_FLOAT32_C( -66.45), SIMDE_FLOAT32_C( -429.13), SIMDE_FLOAT32_C( -688.99), SIMDE_FLOAT32_C( -828.86)), UINT8_C(106), simde_mm256_set_ps(SIMDE_FLOAT32_C( -209.77), SIMDE_FLOAT32_C( 87.73), SIMDE_FLOAT32_C( 807.71), SIMDE_FLOAT32_C( -161.53), SIMDE_FLOAT32_C( -720.29), SIMDE_FLOAT32_C( -841.34), SIMDE_FLOAT32_C( -679.61), SIMDE_FLOAT32_C( -751.55)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 869.24), SIMDE_FLOAT32_C( 87.73), SIMDE_FLOAT32_C( 807.71), SIMDE_FLOAT32_C( 448.62), SIMDE_FLOAT32_C( -720.29), SIMDE_FLOAT32_C( -429.13), SIMDE_FLOAT32_C( -679.61), SIMDE_FLOAT32_C( -828.86)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 648.52), SIMDE_FLOAT32_C( -621.11), SIMDE_FLOAT32_C( 44.58), SIMDE_FLOAT32_C( 173.55), SIMDE_FLOAT32_C( 227.71), SIMDE_FLOAT32_C( -831.29), SIMDE_FLOAT32_C( 210.07), SIMDE_FLOAT32_C( 469.94)), UINT8_C(209), simde_mm256_set_ps(SIMDE_FLOAT32_C( -457.88), SIMDE_FLOAT32_C( -345.53), SIMDE_FLOAT32_C( -52.29), SIMDE_FLOAT32_C( 652.21), SIMDE_FLOAT32_C( 802.89), SIMDE_FLOAT32_C( 706.42), SIMDE_FLOAT32_C( 63.40), SIMDE_FLOAT32_C( 904.43)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -457.88), SIMDE_FLOAT32_C( -345.53), SIMDE_FLOAT32_C( 44.58), SIMDE_FLOAT32_C( 652.21), SIMDE_FLOAT32_C( 227.71), SIMDE_FLOAT32_C( -831.29), SIMDE_FLOAT32_C( 210.07), SIMDE_FLOAT32_C( 904.43)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 592.24), SIMDE_FLOAT32_C( -735.22), SIMDE_FLOAT32_C( 596.55), SIMDE_FLOAT32_C( -541.18), SIMDE_FLOAT32_C( -342.66), SIMDE_FLOAT32_C( 98.60), SIMDE_FLOAT32_C( 188.58), SIMDE_FLOAT32_C( -720.97)), UINT8_C( 39), simde_mm256_set_ps(SIMDE_FLOAT32_C( -832.08), SIMDE_FLOAT32_C( 690.51), SIMDE_FLOAT32_C( 197.88), SIMDE_FLOAT32_C( -345.06), SIMDE_FLOAT32_C( -603.10), SIMDE_FLOAT32_C( 528.02), SIMDE_FLOAT32_C( -679.70), SIMDE_FLOAT32_C( -757.75)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 592.24), SIMDE_FLOAT32_C( -735.22), SIMDE_FLOAT32_C( 197.88), SIMDE_FLOAT32_C( -541.18), SIMDE_FLOAT32_C( -342.66), SIMDE_FLOAT32_C( 528.02), SIMDE_FLOAT32_C( -679.70), SIMDE_FLOAT32_C( -757.75)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 630.75), SIMDE_FLOAT32_C( -765.52), SIMDE_FLOAT32_C( 644.64), SIMDE_FLOAT32_C( -522.11), SIMDE_FLOAT32_C( -647.87), SIMDE_FLOAT32_C( 408.91), SIMDE_FLOAT32_C( -874.53), SIMDE_FLOAT32_C( 777.74)), UINT8_C( 55), simde_mm256_set_ps(SIMDE_FLOAT32_C( -382.40), SIMDE_FLOAT32_C( 204.65), SIMDE_FLOAT32_C( 263.52), SIMDE_FLOAT32_C( 553.68), SIMDE_FLOAT32_C( 482.50), SIMDE_FLOAT32_C( -416.62), SIMDE_FLOAT32_C( 194.15), SIMDE_FLOAT32_C( -653.83)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 630.75), SIMDE_FLOAT32_C( -765.52), SIMDE_FLOAT32_C( 263.52), SIMDE_FLOAT32_C( 553.68), SIMDE_FLOAT32_C( -647.87), SIMDE_FLOAT32_C( -416.62), SIMDE_FLOAT32_C( 194.15), SIMDE_FLOAT32_C( -653.83)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -84.63), SIMDE_FLOAT32_C( 440.56), SIMDE_FLOAT32_C( 471.24), SIMDE_FLOAT32_C( 544.90), SIMDE_FLOAT32_C( -133.99), SIMDE_FLOAT32_C( -169.40), SIMDE_FLOAT32_C( 397.71), SIMDE_FLOAT32_C( 495.33)), UINT8_C(147), simde_mm256_set_ps(SIMDE_FLOAT32_C( -417.11), SIMDE_FLOAT32_C( -321.70), SIMDE_FLOAT32_C( 929.20), SIMDE_FLOAT32_C( -973.32), SIMDE_FLOAT32_C( 120.89), SIMDE_FLOAT32_C( 122.15), SIMDE_FLOAT32_C( 252.56), SIMDE_FLOAT32_C( 335.57)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -417.11), SIMDE_FLOAT32_C( 440.56), SIMDE_FLOAT32_C( 471.24), SIMDE_FLOAT32_C( -973.32), SIMDE_FLOAT32_C( -133.99), SIMDE_FLOAT32_C( -169.40), SIMDE_FLOAT32_C( 252.56), SIMDE_FLOAT32_C( 335.57)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 637.95), SIMDE_FLOAT32_C( 655.37), SIMDE_FLOAT32_C( 156.29), SIMDE_FLOAT32_C( -73.51), SIMDE_FLOAT32_C( -940.14), SIMDE_FLOAT32_C( 79.12), SIMDE_FLOAT32_C( -920.60), SIMDE_FLOAT32_C( 773.77)), UINT8_C(111), simde_mm256_set_ps(SIMDE_FLOAT32_C( -286.54), SIMDE_FLOAT32_C( -686.34), SIMDE_FLOAT32_C( 368.35), SIMDE_FLOAT32_C( -817.20), SIMDE_FLOAT32_C( -376.39), SIMDE_FLOAT32_C( 454.17), SIMDE_FLOAT32_C( 819.05), SIMDE_FLOAT32_C( 500.81)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 637.95), SIMDE_FLOAT32_C( -686.34), SIMDE_FLOAT32_C( 368.35), SIMDE_FLOAT32_C( -73.51), SIMDE_FLOAT32_C( -376.39), SIMDE_FLOAT32_C( 454.17), SIMDE_FLOAT32_C( 819.05), SIMDE_FLOAT32_C( 500.81)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_mask_mov_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_mov_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask64 k; simde__m512i a; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi8(INT8_C( -56), INT8_C( 10), INT8_C( 103), INT8_C( 84), INT8_C( 93), INT8_C( 24), INT8_C( -78), INT8_C( 35), INT8_C( 125), INT8_C( -63), INT8_C( 19), INT8_C( 4), INT8_C( 3), INT8_C( -87), INT8_C( 98), INT8_C(-113), INT8_C( 23), INT8_C(-124), INT8_C( -87), INT8_C( 63), INT8_C( 108), INT8_C( -18), INT8_C( -27), INT8_C(-127), INT8_C( -60), INT8_C( 60), INT8_C( -56), INT8_C( 3), INT8_C(-128), INT8_C( -62), INT8_C( 52), INT8_C( -74), INT8_C( -87), INT8_C( 32), INT8_C( 46), INT8_C(-128), INT8_C( 54), INT8_C( -19), INT8_C( 12), INT8_C( 22), INT8_C( -94), INT8_C( -84), INT8_C( -58), INT8_C( 92), INT8_C( -70), INT8_C( -25), INT8_C( 91), INT8_C( -45), INT8_C( 5), INT8_C( 109), INT8_C( -46), INT8_C( 37), INT8_C( 7), INT8_C( 44), INT8_C( 41), INT8_C(-106), INT8_C( 82), INT8_C( 48), INT8_C( -21), INT8_C( -90), INT8_C( 105), INT8_C( 117), INT8_C( -21), INT8_C( 39)), UINT64_C(11516165625622400866), simde_mm512_set_epi8(INT8_C( 43), INT8_C( -65), INT8_C( 47), INT8_C( 36), INT8_C(-101), INT8_C( 5), INT8_C( -76), INT8_C( -57), INT8_C( 77), INT8_C( 48), INT8_C( -46), INT8_C( -15), INT8_C( 78), INT8_C( 108), INT8_C( 114), INT8_C( 83), INT8_C( -72), INT8_C( 21), INT8_C( 100), INT8_C( 121), INT8_C( 29), INT8_C( -74), INT8_C( 81), INT8_C( -13), INT8_C( -57), INT8_C( -17), INT8_C( 20), INT8_C(-109), INT8_C( -87), INT8_C( 127), INT8_C( 92), INT8_C(-119), INT8_C( 26), INT8_C( 123), INT8_C( -51), INT8_C( 109), INT8_C( 30), INT8_C( -58), INT8_C(-117), INT8_C( 82), INT8_C( 111), INT8_C( -10), INT8_C( -10), INT8_C( -68), INT8_C( -4), INT8_C( -7), INT8_C( 117), INT8_C( 92), INT8_C( 94), INT8_C( -65), INT8_C( 109), INT8_C( 81), INT8_C( -71), INT8_C( -46), INT8_C( 113), INT8_C( 9), INT8_C( 123), INT8_C( -39), INT8_C( 76), INT8_C( 68), INT8_C( -3), INT8_C( 36), INT8_C( 0), INT8_C( 6)), simde_mm512_set_epi8(INT8_C( 43), INT8_C( 10), INT8_C( 103), INT8_C( 36), INT8_C(-101), INT8_C( 5), INT8_C( -76), INT8_C( -57), INT8_C( 77), INT8_C( 48), INT8_C( 19), INT8_C( -15), INT8_C( 3), INT8_C( -87), INT8_C( 98), INT8_C( 83), INT8_C( -72), INT8_C(-124), INT8_C( 100), INT8_C( 63), INT8_C( 108), INT8_C( -18), INT8_C( 81), INT8_C( -13), INT8_C( -57), INT8_C( 60), INT8_C( -56), INT8_C(-109), INT8_C( -87), INT8_C( 127), INT8_C( 92), INT8_C(-119), INT8_C( -87), INT8_C( 32), INT8_C( 46), INT8_C(-128), INT8_C( 30), INT8_C( -19), INT8_C( 12), INT8_C( 82), INT8_C( 111), INT8_C( -10), INT8_C( -58), INT8_C( 92), INT8_C( -4), INT8_C( -25), INT8_C( 91), INT8_C( 92), INT8_C( 94), INT8_C( 109), INT8_C( -46), INT8_C( 37), INT8_C( -71), INT8_C( -46), INT8_C( 113), INT8_C( 9), INT8_C( 82), INT8_C( -39), INT8_C( 76), INT8_C( -90), INT8_C( 105), INT8_C( 117), INT8_C( 0), INT8_C( 39)) }, { simde_mm512_set_epi8(INT8_C( -25), INT8_C(-127), INT8_C( 40), INT8_C( -10), INT8_C( 75), INT8_C(-123), INT8_C( 78), INT8_C( -2), INT8_C( -83), INT8_C( -74), INT8_C( -51), INT8_C( 46), INT8_C( 60), INT8_C( -39), INT8_C( 124), INT8_C(-117), INT8_C( 70), INT8_C( 66), INT8_C( -35), INT8_C( -51), INT8_C( -64), INT8_C( -61), INT8_C(-113), INT8_C( 2), INT8_C( -4), INT8_C( -72), INT8_C( 113), INT8_C( -63), INT8_C( -49), INT8_C( 70), INT8_C( -50), INT8_C( 52), INT8_C( 0), INT8_C( 13), INT8_C( 74), INT8_C( -60), INT8_C( 103), INT8_C( -7), INT8_C( -61), INT8_C( -37), INT8_C( -79), INT8_C( -77), INT8_C( -81), INT8_C( -83), INT8_C( 94), INT8_C( 52), INT8_C( -73), INT8_C( 76), INT8_C(-120), INT8_C( 80), INT8_C( -52), INT8_C(-126), INT8_C( -40), INT8_C( 119), INT8_C( -83), INT8_C( 62), INT8_C( 20), INT8_C( 23), INT8_C( 120), INT8_C( -13), INT8_C( 82), INT8_C( 32), INT8_C( -44), INT8_C( -44)), UINT64_C( 5249838983459854712), simde_mm512_set_epi8(INT8_C( -68), INT8_C( 121), INT8_C(-102), INT8_C( -30), INT8_C(-103), INT8_C( -31), INT8_C( 24), INT8_C( -55), INT8_C( -5), INT8_C( 8), INT8_C( -38), INT8_C( 37), INT8_C( 15), INT8_C(-120), INT8_C( 17), INT8_C( -63), INT8_C( 107), INT8_C( -41), INT8_C( -53), INT8_C(-107), INT8_C( 91), INT8_C( -9), INT8_C(-127), INT8_C( -39), INT8_C( 105), INT8_C( -27), INT8_C( 96), INT8_C( -96), INT8_C( 2), INT8_C( 44), INT8_C( 11), INT8_C( -43), INT8_C( -52), INT8_C( 126), INT8_C( 125), INT8_C( 121), INT8_C( 87), INT8_C( -95), INT8_C( 120), INT8_C( -46), INT8_C( 25), INT8_C( 71), INT8_C( 117), INT8_C( 47), INT8_C(-110), INT8_C( -87), INT8_C( -36), INT8_C( 25), INT8_C( 24), INT8_C( -92), INT8_C( 99), INT8_C( 15), INT8_C( 39), INT8_C( 38), INT8_C( 111), INT8_C( -29), INT8_C( 62), INT8_C( 34), INT8_C(-113), INT8_C( 121), INT8_C( -31), INT8_C(-111), INT8_C( 76), INT8_C(-113)), simde_mm512_set_epi8(INT8_C( -25), INT8_C( 121), INT8_C( 40), INT8_C( -10), INT8_C(-103), INT8_C(-123), INT8_C( 78), INT8_C( -2), INT8_C( -5), INT8_C( 8), INT8_C( -51), INT8_C( 37), INT8_C( 15), INT8_C( -39), INT8_C( 17), INT8_C( -63), INT8_C( 70), INT8_C( 66), INT8_C( -53), INT8_C( -51), INT8_C( 91), INT8_C( -9), INT8_C(-113), INT8_C( 2), INT8_C( 105), INT8_C( -72), INT8_C( 96), INT8_C( -96), INT8_C( 2), INT8_C( 44), INT8_C( -50), INT8_C( -43), INT8_C( -52), INT8_C( 13), INT8_C( 74), INT8_C( 121), INT8_C( 87), INT8_C( -7), INT8_C( -61), INT8_C( -46), INT8_C( -79), INT8_C( -77), INT8_C( -81), INT8_C( 47), INT8_C(-110), INT8_C( -87), INT8_C( -36), INT8_C( 25), INT8_C(-120), INT8_C( -92), INT8_C( -52), INT8_C( 15), INT8_C( 39), INT8_C( 119), INT8_C( -83), INT8_C( -29), INT8_C( 20), INT8_C( 34), INT8_C(-113), INT8_C( 121), INT8_C( -31), INT8_C( 32), INT8_C( -44), INT8_C( -44)) }, { simde_mm512_set_epi8(INT8_C( 117), INT8_C( 0), INT8_C( -58), INT8_C( -82), INT8_C( -40), INT8_C( -36), INT8_C( -30), INT8_C( -56), INT8_C( -68), INT8_C( -93), INT8_C( 25), INT8_C( -68), INT8_C( 8), INT8_C( 64), INT8_C( -70), INT8_C( -19), INT8_C( -64), INT8_C( -54), INT8_C( 120), INT8_C( 61), INT8_C( -73), INT8_C( 47), INT8_C(-113), INT8_C( 68), INT8_C( -44), INT8_C( -96), INT8_C(-106), INT8_C( -68), INT8_C( 75), INT8_C( -42), INT8_C( 94), INT8_C( -68), INT8_C( -10), INT8_C( 41), INT8_C( -90), INT8_C(-110), INT8_C(-116), INT8_C( -51), INT8_C( -75), INT8_C( 102), INT8_C( 14), INT8_C( 110), INT8_C( 89), INT8_C( 5), INT8_C( -49), INT8_C( 29), INT8_C( 63), INT8_C( -67), INT8_C( -85), INT8_C( 90), INT8_C( 97), INT8_C( -38), INT8_C( -35), INT8_C( 6), INT8_C( 37), INT8_C( 106), INT8_C( 102), INT8_C( 109), INT8_C( 47), INT8_C( 29), INT8_C( -81), INT8_C(-113), INT8_C( -49), INT8_C( 18)), UINT64_C(16853471664498189804), simde_mm512_set_epi8(INT8_C( -53), INT8_C( -59), INT8_C( -6), INT8_C( -57), INT8_C( 97), INT8_C( 68), INT8_C( -67), INT8_C( 117), INT8_C( -92), INT8_C( -3), INT8_C( 2), INT8_C( 59), INT8_C( 53), INT8_C( -13), INT8_C( -31), INT8_C( 47), INT8_C( -33), INT8_C( 67), INT8_C( -43), INT8_C( -53), INT8_C( -52), INT8_C( -3), INT8_C( 85), INT8_C( 48), INT8_C( -45), INT8_C( -72), INT8_C( 96), INT8_C( 85), INT8_C( 81), INT8_C( 28), INT8_C( -50), INT8_C(-107), INT8_C( -56), INT8_C( -85), INT8_C( -83), INT8_C( -25), INT8_C( 78), INT8_C( 13), INT8_C( 41), INT8_C( 86), INT8_C( -28), INT8_C( 90), INT8_C( 29), INT8_C(-115), INT8_C( -97), INT8_C(-121), INT8_C( -51), INT8_C( 53), INT8_C( -73), INT8_C( -64), INT8_C( -86), INT8_C( -65), INT8_C( 124), INT8_C(-109), INT8_C( 79), INT8_C(-111), INT8_C( 64), INT8_C( -98), INT8_C( -1), INT8_C( -43), INT8_C( -4), INT8_C( 72), INT8_C( 108), INT8_C( -95)), simde_mm512_set_epi8(INT8_C( -53), INT8_C( -59), INT8_C( -6), INT8_C( -82), INT8_C( 97), INT8_C( -36), INT8_C( -30), INT8_C( 117), INT8_C( -92), INT8_C( -3), INT8_C( 2), INT8_C( -68), INT8_C( 8), INT8_C( 64), INT8_C( -31), INT8_C( 47), INT8_C( -33), INT8_C( -54), INT8_C( 120), INT8_C( 61), INT8_C( -52), INT8_C( -3), INT8_C( 85), INT8_C( 48), INT8_C( -44), INT8_C( -96), INT8_C( 96), INT8_C( -68), INT8_C( 81), INT8_C( 28), INT8_C( -50), INT8_C(-107), INT8_C( -10), INT8_C( -85), INT8_C( -83), INT8_C( -25), INT8_C(-116), INT8_C( -51), INT8_C( 41), INT8_C( 102), INT8_C( -28), INT8_C( 110), INT8_C( 89), INT8_C( 5), INT8_C( -49), INT8_C(-121), INT8_C( -51), INT8_C( -67), INT8_C( -73), INT8_C( 90), INT8_C( -86), INT8_C( -38), INT8_C( -35), INT8_C(-109), INT8_C( 37), INT8_C(-111), INT8_C( 64), INT8_C( -98), INT8_C( -1), INT8_C( 29), INT8_C( -4), INT8_C( 72), INT8_C( -49), INT8_C( 18)) }, { simde_mm512_set_epi8(INT8_C( 37), INT8_C( 104), INT8_C( -81), INT8_C( 113), INT8_C( 31), INT8_C( -10), INT8_C( -32), INT8_C( -91), INT8_C( 51), INT8_C( -51), INT8_C( 60), INT8_C( 38), INT8_C( -1), INT8_C( -38), INT8_C( 2), INT8_C( 110), INT8_C( -61), INT8_C( 91), INT8_C( -50), INT8_C( 89), INT8_C( 27), INT8_C( -13), INT8_C( 111), INT8_C( -20), INT8_C( 51), INT8_C( -66), INT8_C( -26), INT8_C( 66), INT8_C( 45), INT8_C( -59), INT8_C( -45), INT8_C(-102), INT8_C( 84), INT8_C(-102), INT8_C( 103), INT8_C( 111), INT8_C( -47), INT8_C( 74), INT8_C( 111), INT8_C( 62), INT8_C( 41), INT8_C( -4), INT8_C( -19), INT8_C( 26), INT8_C(-127), INT8_C( -41), INT8_C( 14), INT8_C( 10), INT8_C( 63), INT8_C( 99), INT8_C( 51), INT8_C(-115), INT8_C( 118), INT8_C( -85), INT8_C(-111), INT8_C( 19), INT8_C( 43), INT8_C( -97), INT8_C( 107), INT8_C( 127), INT8_C(-100), INT8_C( 45), INT8_C( -77), INT8_C( 77)), UINT64_C( 3141946940694640625), simde_mm512_set_epi8(INT8_C( -47), INT8_C( -86), INT8_C( 35), INT8_C(-110), INT8_C( 95), INT8_C( -9), INT8_C( 86), INT8_C( 9), INT8_C( 31), INT8_C( 48), INT8_C( 63), INT8_C( -6), INT8_C( -36), INT8_C( -47), INT8_C( 95), INT8_C( -20), INT8_C( 21), INT8_C( -9), INT8_C( -2), INT8_C( 26), INT8_C( 63), INT8_C( 36), INT8_C( -33), INT8_C( 58), INT8_C( -40), INT8_C( 106), INT8_C( 2), INT8_C( -51), INT8_C( -13), INT8_C( -76), INT8_C( -77), INT8_C( -77), INT8_C( 65), INT8_C( 44), INT8_C( -48), INT8_C( 121), INT8_C(-106), INT8_C( 35), INT8_C( 49), INT8_C( -67), INT8_C( -35), INT8_C( -29), INT8_C( 89), INT8_C( 91), INT8_C( -53), INT8_C( -62), INT8_C( 107), INT8_C( -42), INT8_C(-115), INT8_C( 52), INT8_C( -17), INT8_C( 64), INT8_C(-105), INT8_C(-106), INT8_C( 65), INT8_C( 97), INT8_C( 85), INT8_C( 52), INT8_C( -17), INT8_C( 6), INT8_C( -73), INT8_C( 109), INT8_C( 99), INT8_C( 9)), simde_mm512_set_epi8(INT8_C( 37), INT8_C( 104), INT8_C( 35), INT8_C( 113), INT8_C( 95), INT8_C( -10), INT8_C( 86), INT8_C( 9), INT8_C( 31), INT8_C( -51), INT8_C( 60), INT8_C( -6), INT8_C( -36), INT8_C( -38), INT8_C( 95), INT8_C( 110), INT8_C( -61), INT8_C( -9), INT8_C( -2), INT8_C( 26), INT8_C( 27), INT8_C( -13), INT8_C( 111), INT8_C( -20), INT8_C( 51), INT8_C( -66), INT8_C( -26), INT8_C( -51), INT8_C( -13), INT8_C( -59), INT8_C( -45), INT8_C(-102), INT8_C( 65), INT8_C(-102), INT8_C( 103), INT8_C( 111), INT8_C( -47), INT8_C( 35), INT8_C( 49), INT8_C( -67), INT8_C( 41), INT8_C( -4), INT8_C( 89), INT8_C( 91), INT8_C( -53), INT8_C( -41), INT8_C( 107), INT8_C( 10), INT8_C(-115), INT8_C( 99), INT8_C( -17), INT8_C( 64), INT8_C( 118), INT8_C(-106), INT8_C( 65), INT8_C( 97), INT8_C( 85), INT8_C( 52), INT8_C( -17), INT8_C( 6), INT8_C(-100), INT8_C( 45), INT8_C( -77), INT8_C( 9)) }, { simde_mm512_set_epi8(INT8_C( -3), INT8_C( -87), INT8_C( 8), INT8_C( -39), INT8_C(-122), INT8_C( 94), INT8_C( -13), INT8_C( 31), INT8_C( 125), INT8_C( -74), INT8_C( 5), INT8_C( 127), INT8_C( 68), INT8_C( 61), INT8_C( 93), INT8_C( 69), INT8_C( 92), INT8_C( -67), INT8_C( 4), INT8_C( -4), INT8_C( 29), INT8_C( -70), INT8_C( 28), INT8_C( -34), INT8_C( -99), INT8_C( 28), INT8_C( -2), INT8_C( 39), INT8_C( -60), INT8_C( 91), INT8_C( 66), INT8_C(-121), INT8_C( 40), INT8_C( -99), INT8_C( 6), INT8_C( 105), INT8_C( 36), INT8_C( -85), INT8_C( 62), INT8_C( 102), INT8_C( 23), INT8_C(-110), INT8_C( -92), INT8_C( 59), INT8_C( 17), INT8_C( -54), INT8_C( 5), INT8_C( 81), INT8_C( -71), INT8_C( 68), INT8_C( 114), INT8_C( -60), INT8_C( 39), INT8_C( -49), INT8_C( -84), INT8_C( 114), INT8_C( -81), INT8_C( 122), INT8_C( 97), INT8_C( -16), INT8_C( 21), INT8_C( -76), INT8_C( -80), INT8_C( -61)), UINT64_C( 7453836348998775155), simde_mm512_set_epi8(INT8_C(-107), INT8_C( 74), INT8_C( -78), INT8_C( -91), INT8_C( 7), INT8_C( 9), INT8_C( 96), INT8_C( -14), INT8_C( 10), INT8_C( 85), INT8_C( 75), INT8_C( -98), INT8_C( -93), INT8_C( 66), INT8_C(-107), INT8_C( -73), INT8_C(-106), INT8_C( -46), INT8_C( 35), INT8_C( 89), INT8_C( -81), INT8_C( -42), INT8_C( -88), INT8_C( 17), INT8_C( 34), INT8_C( 81), INT8_C(-103), INT8_C( 99), INT8_C( -3), INT8_C( 116), INT8_C( -98), INT8_C(-111), INT8_C( -10), INT8_C( 120), INT8_C( 115), INT8_C( 38), INT8_C( -96), INT8_C( -48), INT8_C( -20), INT8_C( 25), INT8_C( 44), INT8_C( -60), INT8_C( -69), INT8_C( 1), INT8_C( 63), INT8_C( 5), INT8_C( -90), INT8_C( -83), INT8_C( -81), INT8_C( 119), INT8_C( -80), INT8_C( 7), INT8_C(-116), INT8_C( 46), INT8_C( -50), INT8_C( -16), INT8_C( -90), INT8_C( 31), INT8_C( 57), INT8_C( -10), INT8_C( 87), INT8_C(-123), INT8_C(-112), INT8_C(-115)), simde_mm512_set_epi8(INT8_C( -3), INT8_C( 74), INT8_C( -78), INT8_C( -39), INT8_C(-122), INT8_C( 9), INT8_C( 96), INT8_C( -14), INT8_C( 125), INT8_C( 85), INT8_C( 75), INT8_C( -98), INT8_C( 68), INT8_C( 61), INT8_C( 93), INT8_C( -73), INT8_C( 92), INT8_C( -46), INT8_C( 4), INT8_C( 89), INT8_C( -81), INT8_C( -70), INT8_C( 28), INT8_C( -34), INT8_C( 34), INT8_C( 28), INT8_C(-103), INT8_C( 39), INT8_C( -3), INT8_C( 91), INT8_C( -98), INT8_C(-121), INT8_C( -10), INT8_C( 120), INT8_C( 6), INT8_C( 38), INT8_C( 36), INT8_C( -85), INT8_C( -20), INT8_C( 25), INT8_C( 44), INT8_C( -60), INT8_C( -69), INT8_C( 1), INT8_C( 17), INT8_C( 5), INT8_C( -90), INT8_C( -83), INT8_C( -71), INT8_C( 119), INT8_C( -80), INT8_C( 7), INT8_C( 39), INT8_C( -49), INT8_C( -84), INT8_C( -16), INT8_C( -81), INT8_C( 31), INT8_C( 57), INT8_C( -10), INT8_C( 21), INT8_C( -76), INT8_C(-112), INT8_C(-115)) }, { simde_mm512_set_epi8(INT8_C( 44), INT8_C( 93), INT8_C( 98), INT8_C( 56), INT8_C(-118), INT8_C( -35), INT8_C( -11), INT8_C( 90), INT8_C(-105), INT8_C( 2), INT8_C( 120), INT8_C( -6), INT8_C( 31), INT8_C( 70), INT8_C( 48), INT8_C( 80), INT8_C( -45), INT8_C( 63), INT8_C(-108), INT8_C( -43), INT8_C( -1), INT8_C( 90), INT8_C( -88), INT8_C( -74), INT8_C( 36), INT8_C( 30), INT8_C(-102), INT8_C( 22), INT8_C( 127), INT8_C(-117), INT8_C( 6), INT8_C( -94), INT8_C(-110), INT8_C( -41), INT8_C( 20), INT8_C(-121), INT8_C(-106), INT8_C( 73), INT8_C( 119), INT8_C( -14), INT8_C( 107), INT8_C( 48), INT8_C( 4), INT8_C( 95), INT8_C( 84), INT8_C( -53), INT8_C( -11), INT8_C( -26), INT8_C( 53), INT8_C( 115), INT8_C( -51), INT8_C( -54), INT8_C( -28), INT8_C( 93), INT8_C(-128), INT8_C(-104), INT8_C( 35), INT8_C( 58), INT8_C(-101), INT8_C( 110), INT8_C(-115), INT8_C( -77), INT8_C( -98), INT8_C( 114)), UINT64_C(12105239831388369272), simde_mm512_set_epi8(INT8_C( 23), INT8_C( 124), INT8_C( 68), INT8_C( 41), INT8_C( 105), INT8_C( 81), INT8_C( -85), INT8_C( 1), INT8_C( 93), INT8_C( 15), INT8_C( -8), INT8_C( 44), INT8_C(-105), INT8_C( 88), INT8_C( 99), INT8_C( -39), INT8_C( 119), INT8_C( 69), INT8_C( 127), INT8_C( 121), INT8_C( 78), INT8_C( 25), INT8_C(-125), INT8_C( 52), INT8_C( -5), INT8_C( -83), INT8_C(-101), INT8_C( 76), INT8_C( -86), INT8_C( -10), INT8_C( -96), INT8_C( -15), INT8_C( 51), INT8_C( 115), INT8_C( 24), INT8_C( 5), INT8_C( -93), INT8_C( 76), INT8_C( -76), INT8_C(-120), INT8_C( 26), INT8_C( 95), INT8_C( -66), INT8_C(-119), INT8_C( -88), INT8_C( 113), INT8_C( -39), INT8_C( -13), INT8_C( -1), INT8_C( -15), INT8_C( -7), INT8_C(-103), INT8_C( 99), INT8_C( 122), INT8_C(-107), INT8_C( -48), INT8_C(-117), INT8_C( 1), INT8_C( -98), INT8_C( 41), INT8_C(-124), INT8_C( 15), INT8_C( 39), INT8_C(-108)), simde_mm512_set_epi8(INT8_C( 23), INT8_C( 93), INT8_C( 68), INT8_C( 56), INT8_C(-118), INT8_C( 81), INT8_C( -85), INT8_C( 1), INT8_C( 93), INT8_C( 15), INT8_C( -8), INT8_C( 44), INT8_C(-105), INT8_C( 88), INT8_C( 99), INT8_C( 80), INT8_C( -45), INT8_C( 69), INT8_C( 127), INT8_C( 121), INT8_C( -1), INT8_C( 90), INT8_C(-125), INT8_C( 52), INT8_C( 36), INT8_C( -83), INT8_C(-101), INT8_C( 76), INT8_C( -86), INT8_C( -10), INT8_C( 6), INT8_C( -15), INT8_C( 51), INT8_C( -41), INT8_C( 20), INT8_C(-121), INT8_C(-106), INT8_C( 76), INT8_C( -76), INT8_C( -14), INT8_C( 26), INT8_C( 95), INT8_C( 4), INT8_C(-119), INT8_C( 84), INT8_C( -53), INT8_C( -11), INT8_C( -26), INT8_C( 53), INT8_C( 115), INT8_C( -7), INT8_C(-103), INT8_C( -28), INT8_C( 93), INT8_C(-128), INT8_C( -48), INT8_C( 35), INT8_C( 1), INT8_C( -98), INT8_C( 41), INT8_C(-124), INT8_C( -77), INT8_C( -98), INT8_C( 114)) }, { simde_mm512_set_epi8(INT8_C( -95), INT8_C( 85), INT8_C( -91), INT8_C( 56), INT8_C( 91), INT8_C( -49), INT8_C( 106), INT8_C( 16), INT8_C( 15), INT8_C( 10), INT8_C( 30), INT8_C( 12), INT8_C( 22), INT8_C( -73), INT8_C( 68), INT8_C( 83), INT8_C( 121), INT8_C( 56), INT8_C( 108), INT8_C( -49), INT8_C(-107), INT8_C( 73), INT8_C( -10), INT8_C( 107), INT8_C( -99), INT8_C( 105), INT8_C( -46), INT8_C( 26), INT8_C( 20), INT8_C( -18), INT8_C( 82), INT8_C( 37), INT8_C( -80), INT8_C( -81), INT8_C( 99), INT8_C( 24), INT8_C( 88), INT8_C( 86), INT8_C( -71), INT8_C( 54), INT8_C(-121), INT8_C( 30), INT8_C( 98), INT8_C( -68), INT8_C( 1), INT8_C( 93), INT8_C( 79), INT8_C( -44), INT8_C( -93), INT8_C( -75), INT8_C( 53), INT8_C( 21), INT8_C( 44), INT8_C(-111), INT8_C( 104), INT8_C(-101), INT8_C( -63), INT8_C(-108), INT8_C( 57), INT8_C( -13), INT8_C( 20), INT8_C( -6), INT8_C( -84), INT8_C( 38)), UINT64_C(14977178912506627906), simde_mm512_set_epi8(INT8_C( 94), INT8_C(-107), INT8_C( 99), INT8_C( 86), INT8_C(-126), INT8_C( 79), INT8_C( 11), INT8_C(-123), INT8_C( 112), INT8_C( 11), INT8_C( 44), INT8_C( -11), INT8_C( -10), INT8_C( 70), INT8_C( -45), INT8_C( 124), INT8_C(-122), INT8_C( 27), INT8_C( 30), INT8_C( 57), INT8_C( -81), INT8_C( -89), INT8_C( 107), INT8_C( -36), INT8_C( 100), INT8_C( -65), INT8_C( -83), INT8_C( -7), INT8_C( 33), INT8_C( -77), INT8_C( -24), INT8_C( 93), INT8_C( -88), INT8_C( 0), INT8_C( 125), INT8_C( -84), INT8_C( 102), INT8_C( 110), INT8_C( 49), INT8_C( -75), INT8_C(-106), INT8_C( 92), INT8_C( 31), INT8_C( 93), INT8_C(-123), INT8_C( -68), INT8_C( 119), INT8_C( -49), INT8_C( -54), INT8_C( 105), INT8_C( 12), INT8_C(-117), INT8_C(-105), INT8_C( 27), INT8_C( 72), INT8_C( -27), INT8_C( 59), INT8_C(-110), INT8_C( 8), INT8_C(-113), INT8_C( -36), INT8_C( -7), INT8_C( -64), INT8_C( 96)), simde_mm512_set_epi8(INT8_C( 94), INT8_C(-107), INT8_C( -91), INT8_C( 56), INT8_C(-126), INT8_C( 79), INT8_C( 11), INT8_C(-123), INT8_C( 112), INT8_C( 11), INT8_C( 30), INT8_C( -11), INT8_C( -10), INT8_C( -73), INT8_C( 68), INT8_C( 124), INT8_C(-122), INT8_C( 56), INT8_C( 30), INT8_C( -49), INT8_C(-107), INT8_C( 73), INT8_C( -10), INT8_C( 107), INT8_C( 100), INT8_C( -65), INT8_C( -46), INT8_C( -7), INT8_C( 33), INT8_C( -77), INT8_C( -24), INT8_C( 37), INT8_C( -80), INT8_C( 0), INT8_C( 99), INT8_C( -84), INT8_C( 88), INT8_C( 86), INT8_C( -71), INT8_C( -75), INT8_C(-121), INT8_C( 92), INT8_C( 31), INT8_C( -68), INT8_C(-123), INT8_C( -68), INT8_C( 79), INT8_C( -49), INT8_C( -54), INT8_C( 105), INT8_C( 53), INT8_C( 21), INT8_C( 44), INT8_C( 27), INT8_C( 72), INT8_C( -27), INT8_C( -63), INT8_C(-110), INT8_C( 57), INT8_C( -13), INT8_C( 20), INT8_C( -6), INT8_C( -64), INT8_C( 38)) }, { simde_mm512_set_epi8(INT8_C( 60), INT8_C( -31), INT8_C( 26), INT8_C( 5), INT8_C( 69), INT8_C( -80), INT8_C( 85), INT8_C( 4), INT8_C( -32), INT8_C( 20), INT8_C( 122), INT8_C( -81), INT8_C( -84), INT8_C(-101), INT8_C(-122), INT8_C( 51), INT8_C( 95), INT8_C( 44), INT8_C(-103), INT8_C( 108), INT8_C( 104), INT8_C( 108), INT8_C( 116), INT8_C(-113), INT8_C( -40), INT8_C( 118), INT8_C( 107), INT8_C( 127), INT8_C( 64), INT8_C( -95), INT8_C( 118), INT8_C( 32), INT8_C( -48), INT8_C( 49), INT8_C( 12), INT8_C(-100), INT8_C( -76), INT8_C( 61), INT8_C( 79), INT8_C( 120), INT8_C( 50), INT8_C( -11), INT8_C( -35), INT8_C(-127), INT8_C( 54), INT8_C( -2), INT8_C( 71), INT8_C( 96), INT8_C( 27), INT8_C( -13), INT8_C( -56), INT8_C(-110), INT8_C( 65), INT8_C( -57), INT8_C( 119), INT8_C( 70), INT8_C( 114), INT8_C( -31), INT8_C( 120), INT8_C( 113), INT8_C( 92), INT8_C( 94), INT8_C( -85), INT8_C( 19)), UINT64_C(12789799828226766427), simde_mm512_set_epi8(INT8_C( -16), INT8_C( -99), INT8_C(-121), INT8_C( -68), INT8_C( -23), INT8_C( 79), INT8_C( 48), INT8_C( -98), INT8_C( -95), INT8_C(-117), INT8_C( 13), INT8_C( -11), INT8_C( 79), INT8_C( 21), INT8_C( 127), INT8_C( 88), INT8_C( 9), INT8_C(-119), INT8_C( 68), INT8_C( 72), INT8_C( 35), INT8_C( -56), INT8_C( -74), INT8_C( 10), INT8_C( 101), INT8_C( 124), INT8_C( 29), INT8_C( -55), INT8_C( -78), INT8_C( -56), INT8_C( 124), INT8_C( 35), INT8_C( 11), INT8_C( 106), INT8_C( 41), INT8_C( 59), INT8_C(-108), INT8_C( 82), INT8_C( -41), INT8_C( 100), INT8_C( 43), INT8_C( -34), INT8_C( 124), INT8_C( 15), INT8_C( 113), INT8_C( -20), INT8_C( -83), INT8_C( -87), INT8_C( -60), INT8_C( 22), INT8_C( -71), INT8_C( 86), INT8_C( 73), INT8_C( -38), INT8_C(-106), INT8_C( 112), INT8_C( 98), INT8_C( 51), INT8_C(-116), INT8_C(-126), INT8_C( -96), INT8_C( 1), INT8_C( 23), INT8_C(-109)), simde_mm512_set_epi8(INT8_C( -16), INT8_C( -31), INT8_C(-121), INT8_C( -68), INT8_C( 69), INT8_C( -80), INT8_C( 85), INT8_C( -98), INT8_C( -32), INT8_C(-117), INT8_C( 13), INT8_C( -11), INT8_C( 79), INT8_C( 21), INT8_C( 127), INT8_C( 51), INT8_C( 95), INT8_C(-119), INT8_C( 68), INT8_C( 72), INT8_C( 35), INT8_C( -56), INT8_C( -74), INT8_C( 10), INT8_C( -40), INT8_C( 118), INT8_C( 29), INT8_C( 127), INT8_C( -78), INT8_C( -56), INT8_C( 124), INT8_C( 32), INT8_C( -48), INT8_C( 49), INT8_C( 41), INT8_C( 59), INT8_C( -76), INT8_C( 82), INT8_C( 79), INT8_C( 100), INT8_C( 43), INT8_C( -34), INT8_C( -35), INT8_C(-127), INT8_C( 113), INT8_C( -20), INT8_C( -83), INT8_C( 96), INT8_C( -60), INT8_C( 22), INT8_C( -71), INT8_C( 86), INT8_C( 65), INT8_C( -38), INT8_C(-106), INT8_C( 70), INT8_C( 114), INT8_C( 51), INT8_C( 120), INT8_C(-126), INT8_C( -96), INT8_C( 94), INT8_C( 23), INT8_C(-109)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_mov_epi8(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_mov_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask32 k; simde__m512i a; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi16(INT16_C( -1573), INT16_C( -6208), INT16_C(-22615), INT16_C( -3799), INT16_C( -8282), INT16_C(-15214), INT16_C(-19149), INT16_C(-11524), INT16_C(-31971), INT16_C( -228), INT16_C(-27669), INT16_C( 30774), INT16_C( 14115), INT16_C(-29587), INT16_C( 15716), INT16_C( -9534), INT16_C( 31897), INT16_C(-25045), INT16_C(-20462), INT16_C( 20289), INT16_C( 31765), INT16_C( 26200), INT16_C( 22392), INT16_C( 19963), INT16_C( -9240), INT16_C( -2240), INT16_C( -8342), INT16_C( 31950), INT16_C(-15053), INT16_C( -6789), INT16_C( -5359), INT16_C( 9700)), UINT32_C( 175873983), simde_mm512_set_epi16(INT16_C( 11048), INT16_C(-23497), INT16_C(-22229), INT16_C( 22523), INT16_C( 32192), INT16_C( 17944), INT16_C( 1999), INT16_C( -512), INT16_C( 22838), INT16_C( 10573), INT16_C( 22536), INT16_C(-21942), INT16_C( -9055), INT16_C( -9938), INT16_C( 8369), INT16_C(-32672), INT16_C( 24766), INT16_C(-31364), INT16_C(-26690), INT16_C( 14381), INT16_C( 18820), INT16_C( -175), INT16_C(-17138), INT16_C( 8826), INT16_C( 16551), INT16_C( 18053), INT16_C( -1223), INT16_C(-28643), INT16_C( -5550), INT16_C( 5011), INT16_C( 22761), INT16_C( 728)), simde_mm512_set_epi16(INT16_C( -1573), INT16_C( -6208), INT16_C(-22615), INT16_C( -3799), INT16_C( 32192), INT16_C(-15214), INT16_C( 1999), INT16_C(-11524), INT16_C(-31971), INT16_C( 10573), INT16_C( 22536), INT16_C(-21942), INT16_C( -9055), INT16_C(-29587), INT16_C( 8369), INT16_C(-32672), INT16_C( 24766), INT16_C(-25045), INT16_C(-20462), INT16_C( 14381), INT16_C( 18820), INT16_C( -175), INT16_C(-17138), INT16_C( 8826), INT16_C( 16551), INT16_C( -2240), INT16_C( -1223), INT16_C(-28643), INT16_C( -5550), INT16_C( 5011), INT16_C( 22761), INT16_C( 728)) }, { simde_mm512_set_epi16(INT16_C(-26134), INT16_C(-18760), INT16_C( 11789), INT16_C( 30499), INT16_C(-14297), INT16_C(-24132), INT16_C( 2429), INT16_C( -7785), INT16_C(-19953), INT16_C(-20176), INT16_C( -1917), INT16_C( 18470), INT16_C( 24222), INT16_C( 23067), INT16_C(-11100), INT16_C( 10676), INT16_C(-21685), INT16_C( 31093), INT16_C( -8360), INT16_C( 1808), INT16_C(-12418), INT16_C( 11067), INT16_C(-31728), INT16_C( -5932), INT16_C(-22846), INT16_C( -963), INT16_C(-15933), INT16_C(-24302), INT16_C(-30670), INT16_C( 23129), INT16_C(-13017), INT16_C( 1590)), UINT32_C( 305590317), simde_mm512_set_epi16(INT16_C( -1674), INT16_C( -3241), INT16_C( 14220), INT16_C(-24128), INT16_C( 866), INT16_C(-16676), INT16_C(-25544), INT16_C( 108), INT16_C( 5014), INT16_C(-21407), INT16_C(-24139), INT16_C(-16531), INT16_C( -2292), INT16_C(-22143), INT16_C( -5932), INT16_C(-26498), INT16_C( 23176), INT16_C(-18719), INT16_C( 8259), INT16_C( -216), INT16_C(-21324), INT16_C( 14052), INT16_C( 27040), INT16_C(-18518), INT16_C(-27268), INT16_C( -5574), INT16_C( 30453), INT16_C( 27189), INT16_C( 26223), INT16_C(-14168), INT16_C(-11169), INT16_C( 22360)), simde_mm512_set_epi16(INT16_C(-26134), INT16_C(-18760), INT16_C( 11789), INT16_C(-24128), INT16_C(-14297), INT16_C(-24132), INT16_C(-25544), INT16_C( -7785), INT16_C(-19953), INT16_C(-20176), INT16_C(-24139), INT16_C(-16531), INT16_C( 24222), INT16_C(-22143), INT16_C( -5932), INT16_C( 10676), INT16_C( 23176), INT16_C(-18719), INT16_C( 8259), INT16_C( -216), INT16_C(-12418), INT16_C( 11067), INT16_C(-31728), INT16_C( -5932), INT16_C(-22846), INT16_C( -963), INT16_C( 30453), INT16_C(-24302), INT16_C( 26223), INT16_C(-14168), INT16_C(-13017), INT16_C( 22360)) }, { simde_mm512_set_epi16(INT16_C( -2488), INT16_C( 1592), INT16_C( -6444), INT16_C( 30598), INT16_C(-17786), INT16_C( -8406), INT16_C( 4184), INT16_C( 17081), INT16_C(-10288), INT16_C(-12158), INT16_C( -9059), INT16_C(-20947), INT16_C(-17395), INT16_C( 27392), INT16_C( 13857), INT16_C( 24137), INT16_C( 15083), INT16_C( -2381), INT16_C( 6197), INT16_C( 26607), INT16_C( -281), INT16_C( 20513), INT16_C( 11284), INT16_C( -8182), INT16_C( 154), INT16_C( 25062), INT16_C( -9545), INT16_C( -8470), INT16_C( 13769), INT16_C( 3698), INT16_C( 23943), INT16_C( 22626)), UINT32_C(3243316268), simde_mm512_set_epi16(INT16_C( -7817), INT16_C( 19901), INT16_C(-23323), INT16_C( 16418), INT16_C( 24031), INT16_C(-12678), INT16_C( 26071), INT16_C( 6078), INT16_C( -6446), INT16_C( 28656), INT16_C(-20287), INT16_C(-10682), INT16_C( 17023), INT16_C( 12770), INT16_C( 15020), INT16_C( 12339), INT16_C( 22254), INT16_C( -6532), INT16_C( 21585), INT16_C(-29214), INT16_C( -5140), INT16_C(-13775), INT16_C(-14838), INT16_C( 1876), INT16_C(-10206), INT16_C( -7669), INT16_C( 13226), INT16_C( 8231), INT16_C( -5215), INT16_C(-29950), INT16_C(-17119), INT16_C( 7959)), simde_mm512_set_epi16(INT16_C( -7817), INT16_C( 19901), INT16_C( -6444), INT16_C( 30598), INT16_C(-17786), INT16_C( -8406), INT16_C( 4184), INT16_C( 6078), INT16_C(-10288), INT16_C( 28656), INT16_C( -9059), INT16_C(-10682), INT16_C(-17395), INT16_C( 27392), INT16_C( 13857), INT16_C( 12339), INT16_C( 15083), INT16_C( -2381), INT16_C( 6197), INT16_C(-29214), INT16_C( -281), INT16_C(-13775), INT16_C( 11284), INT16_C( -8182), INT16_C( 154), INT16_C( 25062), INT16_C( 13226), INT16_C( -8470), INT16_C( -5215), INT16_C(-29950), INT16_C( 23943), INT16_C( 22626)) }, { simde_mm512_set_epi16(INT16_C(-17496), INT16_C(-16278), INT16_C( 28161), INT16_C( -9022), INT16_C( 14893), INT16_C( 20773), INT16_C( 13716), INT16_C(-18494), INT16_C( 22637), INT16_C(-20939), INT16_C(-10174), INT16_C( 12840), INT16_C(-22747), INT16_C(-14668), INT16_C( 4699), INT16_C( 31693), INT16_C( -8682), INT16_C(-21674), INT16_C( -4586), INT16_C( -243), INT16_C(-24920), INT16_C( 12309), INT16_C( 15037), INT16_C( 13960), INT16_C(-29756), INT16_C( -4367), INT16_C( 8434), INT16_C( 16542), INT16_C( 8529), INT16_C(-28527), INT16_C( -2939), INT16_C(-28531)), UINT32_C(3541815971), simde_mm512_set_epi16(INT16_C( 6266), INT16_C( 18547), INT16_C(-26004), INT16_C(-14807), INT16_C( 23049), INT16_C(-28984), INT16_C( 18071), INT16_C(-18277), INT16_C( 31923), INT16_C(-14090), INT16_C( -6209), INT16_C( 12842), INT16_C( 1554), INT16_C(-27194), INT16_C(-25297), INT16_C( 17174), INT16_C( 4338), INT16_C(-25809), INT16_C( 2041), INT16_C(-19046), INT16_C(-17853), INT16_C(-18639), INT16_C( 25727), INT16_C(-30630), INT16_C(-22895), INT16_C( 8885), INT16_C( 29491), INT16_C(-13154), INT16_C( 9738), INT16_C(-20851), INT16_C( 1418), INT16_C( 24102)), simde_mm512_set_epi16(INT16_C( 6266), INT16_C( 18547), INT16_C( 28161), INT16_C(-14807), INT16_C( 14893), INT16_C( 20773), INT16_C( 18071), INT16_C(-18277), INT16_C( 22637), INT16_C(-20939), INT16_C(-10174), INT16_C( 12842), INT16_C( 1554), INT16_C(-14668), INT16_C(-25297), INT16_C( 17174), INT16_C( 4338), INT16_C(-25809), INT16_C( -4586), INT16_C(-19046), INT16_C(-24920), INT16_C( 12309), INT16_C( 25727), INT16_C( 13960), INT16_C(-22895), INT16_C( -4367), INT16_C( 29491), INT16_C( 16542), INT16_C( 8529), INT16_C(-28527), INT16_C( 1418), INT16_C( 24102)) }, { simde_mm512_set_epi16(INT16_C( 3849), INT16_C( 25678), INT16_C( 20058), INT16_C(-14631), INT16_C( 9156), INT16_C( -9469), INT16_C( 26797), INT16_C( 4095), INT16_C( 10328), INT16_C( -2602), INT16_C( 29484), INT16_C( 23696), INT16_C( 10492), INT16_C( 15123), INT16_C( 12075), INT16_C( -22), INT16_C( -3095), INT16_C(-21257), INT16_C( 4948), INT16_C( 32515), INT16_C(-22489), INT16_C( 12880), INT16_C(-31816), INT16_C( 14894), INT16_C( 17736), INT16_C( 7904), INT16_C(-21771), INT16_C(-28666), INT16_C(-14552), INT16_C(-24798), INT16_C(-10273), INT16_C(-18470)), UINT32_C(3424030392), simde_mm512_set_epi16(INT16_C( -3100), INT16_C(-21068), INT16_C( 28535), INT16_C(-17256), INT16_C(-16628), INT16_C( 1662), INT16_C(-21371), INT16_C( 7545), INT16_C( -2558), INT16_C( 5671), INT16_C(-14288), INT16_C(-27939), INT16_C( 10529), INT16_C(-22955), INT16_C( 1055), INT16_C( 27502), INT16_C( 28704), INT16_C(-22359), INT16_C( 974), INT16_C(-13833), INT16_C(-10322), INT16_C( -9220), INT16_C(-23650), INT16_C( 7138), INT16_C(-26251), INT16_C(-26301), INT16_C(-11538), INT16_C( 7661), INT16_C( 25835), INT16_C( -1591), INT16_C(-31336), INT16_C(-13623)), simde_mm512_set_epi16(INT16_C( -3100), INT16_C(-21068), INT16_C( 20058), INT16_C(-14631), INT16_C(-16628), INT16_C( 1662), INT16_C( 26797), INT16_C( 4095), INT16_C( 10328), INT16_C( -2602), INT16_C( 29484), INT16_C(-27939), INT16_C( 10492), INT16_C(-22955), INT16_C( 1055), INT16_C( -22), INT16_C( 28704), INT16_C(-21257), INT16_C( 4948), INT16_C( 32515), INT16_C(-10322), INT16_C( -9220), INT16_C(-23650), INT16_C( 14894), INT16_C(-26251), INT16_C( 7904), INT16_C(-11538), INT16_C( 7661), INT16_C( 25835), INT16_C(-24798), INT16_C(-10273), INT16_C(-18470)) }, { simde_mm512_set_epi16(INT16_C( -8164), INT16_C(-26845), INT16_C( 11124), INT16_C( 8752), INT16_C( 22766), INT16_C( 8670), INT16_C( 20153), INT16_C( 18240), INT16_C( 9917), INT16_C( -9695), INT16_C( 13965), INT16_C( 22461), INT16_C(-14283), INT16_C(-28547), INT16_C( -3283), INT16_C( 28423), INT16_C( -7094), INT16_C(-23805), INT16_C(-29561), INT16_C( -8833), INT16_C( 19973), INT16_C( 4641), INT16_C( 26375), INT16_C(-24343), INT16_C(-25797), INT16_C( 10099), INT16_C( 15606), INT16_C( -3388), INT16_C( 27200), INT16_C( 17184), INT16_C( -8305), INT16_C( -2842)), UINT32_C(3498958446), simde_mm512_set_epi16(INT16_C( -8480), INT16_C( 28422), INT16_C(-27516), INT16_C( 21347), INT16_C(-25796), INT16_C(-16858), INT16_C( 12539), INT16_C(-24081), INT16_C( 21534), INT16_C(-24785), INT16_C( 27018), INT16_C( 5065), INT16_C(-18143), INT16_C( 8109), INT16_C(-17219), INT16_C( 31482), INT16_C( 9138), INT16_C( 22982), INT16_C(-21234), INT16_C( 25459), INT16_C( 6589), INT16_C(-13007), INT16_C( 15857), INT16_C(-20120), INT16_C( -7568), INT16_C(-12198), INT16_C(-11606), INT16_C( 12227), INT16_C(-14277), INT16_C( -5440), INT16_C( 23811), INT16_C( 16734)), simde_mm512_set_epi16(INT16_C( -8480), INT16_C( 28422), INT16_C( 11124), INT16_C( 21347), INT16_C( 22766), INT16_C( 8670), INT16_C( 20153), INT16_C( 18240), INT16_C( 21534), INT16_C( -9695), INT16_C( 13965), INT16_C( 22461), INT16_C(-18143), INT16_C( 8109), INT16_C( -3283), INT16_C( 31482), INT16_C( 9138), INT16_C( 22982), INT16_C(-29561), INT16_C( 25459), INT16_C( 6589), INT16_C(-13007), INT16_C( 15857), INT16_C(-24343), INT16_C(-25797), INT16_C(-12198), INT16_C(-11606), INT16_C( -3388), INT16_C(-14277), INT16_C( -5440), INT16_C( 23811), INT16_C( -2842)) }, { simde_mm512_set_epi16(INT16_C(-10740), INT16_C(-19800), INT16_C( 23089), INT16_C( 21852), INT16_C( 15397), INT16_C(-10864), INT16_C( 6811), INT16_C( 1049), INT16_C(-27986), INT16_C(-13885), INT16_C(-16896), INT16_C( 2159), INT16_C( 21619), INT16_C(-26860), INT16_C(-26036), INT16_C( 8638), INT16_C( -6244), INT16_C( 12305), INT16_C( 12521), INT16_C(-23200), INT16_C( 16405), INT16_C( 1911), INT16_C(-19978), INT16_C(-24716), INT16_C( 18780), INT16_C(-19576), INT16_C( 23239), INT16_C( 3968), INT16_C( 10340), INT16_C(-18924), INT16_C(-27656), INT16_C( 29459)), UINT32_C(1078376780), simde_mm512_set_epi16(INT16_C( 273), INT16_C( -1720), INT16_C( 22076), INT16_C( -2052), INT16_C(-17942), INT16_C( -7577), INT16_C(-30883), INT16_C(-19493), INT16_C(-19679), INT16_C( -1198), INT16_C( -2289), INT16_C( 6912), INT16_C(-20982), INT16_C(-18030), INT16_C( 27608), INT16_C( 2367), INT16_C( 1167), INT16_C(-16688), INT16_C(-14772), INT16_C(-28473), INT16_C(-30638), INT16_C(-20143), INT16_C( 18762), INT16_C( 11938), INT16_C( 3849), INT16_C( 10905), INT16_C( 14089), INT16_C(-29438), INT16_C( -8204), INT16_C(-31577), INT16_C( -4765), INT16_C( 1792)), simde_mm512_set_epi16(INT16_C(-10740), INT16_C( -1720), INT16_C( 23089), INT16_C( 21852), INT16_C( 15397), INT16_C(-10864), INT16_C( 6811), INT16_C( 1049), INT16_C(-27986), INT16_C( -1198), INT16_C(-16896), INT16_C( 2159), INT16_C( 21619), INT16_C(-18030), INT16_C( 27608), INT16_C( 8638), INT16_C( 1167), INT16_C( 12305), INT16_C(-14772), INT16_C(-28473), INT16_C(-30638), INT16_C( 1911), INT16_C(-19978), INT16_C( 11938), INT16_C( 18780), INT16_C( 10905), INT16_C( 23239), INT16_C( 3968), INT16_C( -8204), INT16_C(-31577), INT16_C(-27656), INT16_C( 29459)) }, { simde_mm512_set_epi16(INT16_C( 22307), INT16_C(-11389), INT16_C( 9226), INT16_C( 7897), INT16_C( 32155), INT16_C( 2611), INT16_C( 11978), INT16_C( 5179), INT16_C(-24755), INT16_C(-19543), INT16_C(-15643), INT16_C( -2365), INT16_C(-27002), INT16_C( 7884), INT16_C(-20138), INT16_C( -3743), INT16_C(-12844), INT16_C(-25331), INT16_C( -7592), INT16_C( 24295), INT16_C(-17679), INT16_C( -9896), INT16_C( -1721), INT16_C( 1797), INT16_C(-21616), INT16_C( 9515), INT16_C( 22554), INT16_C( 6233), INT16_C( 21958), INT16_C( -6794), INT16_C(-15370), INT16_C( 18181)), UINT32_C(2433004361), simde_mm512_set_epi16(INT16_C(-21831), INT16_C( -7695), INT16_C( 23301), INT16_C( 6159), INT16_C( 1766), INT16_C( 18873), INT16_C( 26864), INT16_C(-27621), INT16_C( -8001), INT16_C( -8493), INT16_C(-29763), INT16_C( 9106), INT16_C( 1393), INT16_C(-12504), INT16_C( 9424), INT16_C( 15142), INT16_C( 3894), INT16_C( -9649), INT16_C( -4354), INT16_C(-12373), INT16_C(-13406), INT16_C( 12423), INT16_C( 26727), INT16_C(-11193), INT16_C( 15482), INT16_C(-21938), INT16_C(-28148), INT16_C( -1462), INT16_C( -2537), INT16_C( 7119), INT16_C( -3240), INT16_C(-31830)), simde_mm512_set_epi16(INT16_C(-21831), INT16_C(-11389), INT16_C( 9226), INT16_C( 6159), INT16_C( 32155), INT16_C( 2611), INT16_C( 11978), INT16_C(-27621), INT16_C(-24755), INT16_C(-19543), INT16_C(-15643), INT16_C( -2365), INT16_C(-27002), INT16_C(-12504), INT16_C(-20138), INT16_C( -3743), INT16_C( 3894), INT16_C(-25331), INT16_C( -4354), INT16_C(-12373), INT16_C(-17679), INT16_C( -9896), INT16_C( 26727), INT16_C(-11193), INT16_C(-21616), INT16_C(-21938), INT16_C( 22554), INT16_C( 6233), INT16_C( -2537), INT16_C( -6794), INT16_C(-15370), INT16_C(-31830)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_mov_epi16(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_mov_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask16 k; simde__m512i a; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C(-1748841636), INT32_C( 600342911), INT32_C( 1346502861), INT32_C(-1119296012), INT32_C( 542725165), INT32_C( 811581991), INT32_C(-1753809264), INT32_C(-2095888677), INT32_C( 21844621), INT32_C( -668859652), INT32_C( 304402382), INT32_C( 1173008100), INT32_C( -727866068), INT32_C( 599757), INT32_C( 1586862788), INT32_C(-1998308703)), UINT16_C(60467), simde_mm512_set_epi32(INT32_C( 646279344), INT32_C(-1381858570), INT32_C( 1528846110), INT32_C( -686931066), INT32_C( 1116827472), INT32_C(-1909789352), INT32_C( 1965198777), INT32_C( 743332806), INT32_C( -485827488), INT32_C(-1346955971), INT32_C( 625279893), INT32_C( 390045432), INT32_C( 1242706406), INT32_C( -451702195), INT32_C( -797642518), INT32_C( 1682577743)), simde_mm512_set_epi32(INT32_C( 646279344), INT32_C(-1381858570), INT32_C( 1528846110), INT32_C(-1119296012), INT32_C( 1116827472), INT32_C(-1909789352), INT32_C(-1753809264), INT32_C(-2095888677), INT32_C( 21844621), INT32_C( -668859652), INT32_C( 625279893), INT32_C( 390045432), INT32_C( -727866068), INT32_C( 599757), INT32_C( -797642518), INT32_C( 1682577743)) }, { simde_mm512_set_epi32(INT32_C( 478337815), INT32_C( -537978403), INT32_C(-1351889488), INT32_C( 1090048308), INT32_C( 1261235095), INT32_C(-1289893124), INT32_C( -387446550), INT32_C(-1938729505), INT32_C(-1389958008), INT32_C( 1730413171), INT32_C( 1087827160), INT32_C( 1023459790), INT32_C(-1481706049), INT32_C( 911835427), INT32_C(-1124770978), INT32_C( 1212776438)), UINT16_C(65510), simde_mm512_set_epi32(INT32_C( -876793269), INT32_C( 354193822), INT32_C(-1942817736), INT32_C( 48337666), INT32_C(-1069034730), INT32_C( -258187388), INT32_C(-1735480646), INT32_C( 1239662333), INT32_C(-1087348321), INT32_C( 777072035), INT32_C( -223191004), INT32_C( -671373205), INT32_C( -333775053), INT32_C( 1946636837), INT32_C( 875386084), INT32_C( 41135181)), simde_mm512_set_epi32(INT32_C( -876793269), INT32_C( 354193822), INT32_C(-1942817736), INT32_C( 48337666), INT32_C(-1069034730), INT32_C( -258187388), INT32_C(-1735480646), INT32_C( 1239662333), INT32_C(-1087348321), INT32_C( 777072035), INT32_C( -223191004), INT32_C( 1023459790), INT32_C(-1481706049), INT32_C( 1946636837), INT32_C( 875386084), INT32_C( 1212776438)) }, { simde_mm512_set_epi32(INT32_C( 739047763), INT32_C( 1498945773), INT32_C( 1776295699), INT32_C( 1298376143), INT32_C(-1413206606), INT32_C(-1101195004), INT32_C( 1096357047), INT32_C( 1201409099), INT32_C(-1184934080), INT32_C(-1142871559), INT32_C(-1331799428), INT32_C( 2127606263), INT32_C( 1810587941), INT32_C(-1568035201), INT32_C(-1514801640), INT32_C( 1754146272)), UINT16_C(17782), simde_mm512_set_epi32(INT32_C(-1637684250), INT32_C( 1624419961), INT32_C(-1721698305), INT32_C( 1216991175), INT32_C( 1086797293), INT32_C( -544515074), INT32_C(-1866991972), INT32_C( 1497966040), INT32_C( 183681068), INT32_C( 1846911046), INT32_C( 396433769), INT32_C( 1567943719), INT32_C( 1544652060), INT32_C( 1999507462), INT32_C( -389522003), INT32_C( 660842170)), simde_mm512_set_epi32(INT32_C( 739047763), INT32_C( 1624419961), INT32_C( 1776295699), INT32_C( 1298376143), INT32_C(-1413206606), INT32_C( -544515074), INT32_C( 1096357047), INT32_C( 1497966040), INT32_C(-1184934080), INT32_C( 1846911046), INT32_C( 396433769), INT32_C( 1567943719), INT32_C( 1810587941), INT32_C( 1999507462), INT32_C( -389522003), INT32_C( 1754146272)) }, { simde_mm512_set_epi32(INT32_C(-1787060903), INT32_C( 1591528199), INT32_C( 1360730903), INT32_C( -392663993), INT32_C( 1833403381), INT32_C( 667948495), INT32_C(-1351186880), INT32_C(-1869951013), INT32_C(-1764668962), INT32_C( 1727501907), INT32_C(-1699520398), INT32_C(-2078068732), INT32_C(-1191187391), INT32_C( 809086335), INT32_C( -915516374), INT32_C( 2044786719)), UINT16_C(19153), simde_mm512_set_epi32(INT32_C(-1124863619), INT32_C( -733840886), INT32_C( 225375619), INT32_C( 2033345748), INT32_C( 62836182), INT32_C(-1797131359), INT32_C( -791707937), INT32_C(-1161020437), INT32_C( 1933148289), INT32_C(-1354039663), INT32_C( 533923030), INT32_C( 457770626), INT32_C(-2130199261), INT32_C( -201626469), INT32_C( 1603256738), INT32_C( 385840376)), simde_mm512_set_epi32(INT32_C(-1787060903), INT32_C( -733840886), INT32_C( 1360730903), INT32_C( -392663993), INT32_C( 62836182), INT32_C( 667948495), INT32_C( -791707937), INT32_C(-1869951013), INT32_C( 1933148289), INT32_C(-1354039663), INT32_C(-1699520398), INT32_C( 457770626), INT32_C(-1191187391), INT32_C( 809086335), INT32_C( -915516374), INT32_C( 385840376)) }, { simde_mm512_set_epi32(INT32_C(-1844996035), INT32_C( -483918772), INT32_C(-1530619556), INT32_C( -447486042), INT32_C( -153016391), INT32_C( 1772993408), INT32_C(-1557466731), INT32_C( 1884729185), INT32_C(-1170473640), INT32_C( -231873321), INT32_C( 1063107119), INT32_C( 1409583343), INT32_C( 131479252), INT32_C(-1464445699), INT32_C(-1859507666), INT32_C( 1142318206)), UINT16_C(39686), simde_mm512_set_epi32(INT32_C(-1710909147), INT32_C( 1655743921), INT32_C(-1520991125), INT32_C(-1200934587), INT32_C( -721899112), INT32_C( 1216881740), INT32_C( -481496777), INT32_C( -893026644), INT32_C(-2035526652), INT32_C( -294630589), INT32_C(-1446210787), INT32_C( -547573265), INT32_C( 1911285838), INT32_C(-1067024301), INT32_C(-1545394687), INT32_C( 1507767747)), simde_mm512_set_epi32(INT32_C(-1710909147), INT32_C( -483918772), INT32_C(-1530619556), INT32_C(-1200934587), INT32_C( -721899112), INT32_C( 1772993408), INT32_C( -481496777), INT32_C( -893026644), INT32_C(-1170473640), INT32_C( -231873321), INT32_C( 1063107119), INT32_C( 1409583343), INT32_C( 131479252), INT32_C(-1067024301), INT32_C(-1545394687), INT32_C( 1142318206)) }, { simde_mm512_set_epi32(INT32_C( 2003854537), INT32_C( 316518418), INT32_C(-2128378506), INT32_C( -814023178), INT32_C( 2134095257), INT32_C( -273917753), INT32_C( 269941696), INT32_C(-1761573676), INT32_C( -504711162), INT32_C( 1086943646), INT32_C( -304633534), INT32_C( -905159738), INT32_C(-1025692186), INT32_C(-2082862175), INT32_C(-1626855678), INT32_C(-1231176910)), UINT16_C(13329), simde_mm512_set_epi32(INT32_C( 838273890), INT32_C( 1209103370), INT32_C( 947433971), INT32_C( 91213725), INT32_C( 749577280), INT32_C( 157602752), INT32_C( 2125537515), INT32_C( -782796801), INT32_C( -120430288), INT32_C( -810448185), INT32_C( -659512402), INT32_C( 419195007), INT32_C( -830103963), INT32_C( -756234442), INT32_C( 376291679), INT32_C( -610488282)), simde_mm512_set_epi32(INT32_C( 2003854537), INT32_C( 316518418), INT32_C( 947433971), INT32_C( 91213725), INT32_C( 2134095257), INT32_C( 157602752), INT32_C( 269941696), INT32_C(-1761573676), INT32_C( -504711162), INT32_C( 1086943646), INT32_C( -304633534), INT32_C( 419195007), INT32_C(-1025692186), INT32_C(-2082862175), INT32_C(-1626855678), INT32_C( -610488282)) }, { simde_mm512_set_epi32(INT32_C( -974755823), INT32_C( -98121742), INT32_C( 1561555936), INT32_C(-1281058782), INT32_C(-2008886211), INT32_C( 1568326299), INT32_C( 1232828554), INT32_C( 127919997), INT32_C( 1015818460), INT32_C( -681833659), INT32_C( 340145717), INT32_C( 1048452961), INT32_C( 749206991), INT32_C( 1290937767), INT32_C(-1150545818), INT32_C( -48881570)), UINT16_C(55435), simde_mm512_set_epi32(INT32_C( 1177945769), INT32_C(-1878447950), INT32_C( -271391312), INT32_C(-2014500164), INT32_C(-2080120479), INT32_C( 1195569010), INT32_C(-1583493780), INT32_C( 1466155853), INT32_C( -735473338), INT32_C( 1922464741), INT32_C( -224185100), INT32_C( -929578437), INT32_C( 831459587), INT32_C(-1105963780), INT32_C(-1360707796), INT32_C( -211781248)), simde_mm512_set_epi32(INT32_C( 1177945769), INT32_C(-1878447950), INT32_C( 1561555936), INT32_C(-2014500164), INT32_C(-2080120479), INT32_C( 1568326299), INT32_C( 1232828554), INT32_C( 127919997), INT32_C( -735473338), INT32_C( -681833659), INT32_C( 340145717), INT32_C( 1048452961), INT32_C( 831459587), INT32_C( 1290937767), INT32_C(-1360707796), INT32_C( -211781248)) }, { simde_mm512_set_epi32(INT32_C( 1583932216), INT32_C(-1528139164), INT32_C( 665399981), INT32_C( 718332631), INT32_C( -984331868), INT32_C(-1317077859), INT32_C(-1440392153), INT32_C(-1978382578), INT32_C( 828185710), INT32_C( 1905160582), INT32_C( 120938992), INT32_C( 1613459128), INT32_C( -812252493), INT32_C(-1503952372), INT32_C( 231875300), INT32_C( -885498028)), UINT16_C(45743), simde_mm512_set_epi32(INT32_C(-1033540577), INT32_C( -995705628), INT32_C(-2098565905), INT32_C(-1609941379), INT32_C( 451122481), INT32_C( 898911803), INT32_C( -918933314), INT32_C( 1301755496), INT32_C( 654535343), INT32_C( 1915381036), INT32_C( -595265918), INT32_C( -204141630), INT32_C(-1824782722), INT32_C(-1457642917), INT32_C(-1358921472), INT32_C( 1013008616)), simde_mm512_set_epi32(INT32_C(-1033540577), INT32_C(-1528139164), INT32_C(-2098565905), INT32_C(-1609941379), INT32_C( -984331868), INT32_C(-1317077859), INT32_C( -918933314), INT32_C(-1978382578), INT32_C( 654535343), INT32_C( 1905160582), INT32_C( -595265918), INT32_C( 1613459128), INT32_C(-1824782722), INT32_C(-1457642917), INT32_C(-1358921472), INT32_C( 1013008616)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_mov_epi32(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_mov_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask8 k; simde__m512i a; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C( 8729250599109288206), INT64_C( 925123000700261284), INT64_C( -996462675499144949), INT64_C(-5486361937319788764), INT64_C(-1619246833501834651), INT64_C(-1914665916415518359), INT64_C( 4596079613709719053), INT64_C(-1669293344454375632)), UINT8_C(136), simde_mm512_set_epi64(INT64_C(-2718786087636304341), INT64_C( 6271007050593066413), INT64_C( 7325428114350079264), INT64_C( 8373606416957659495), INT64_C( 8585702140748752091), INT64_C(-6106352141912550191), INT64_C(-7415158757307660945), INT64_C(-4168322686232168747)), simde_mm512_set_epi64(INT64_C(-2718786087636304341), INT64_C( 925123000700261284), INT64_C( -996462675499144949), INT64_C(-5486361937319788764), INT64_C( 8585702140748752091), INT64_C(-1914665916415518359), INT64_C( 4596079613709719053), INT64_C(-1669293344454375632)) }, { simde_mm512_set_epi64(INT64_C( 8240025841211248490), INT64_C( 2437990159450284908), INT64_C( 2201815834941113848), INT64_C( 7879550161691977002), INT64_C( 3825487759520775297), INT64_C( 6674403996216424931), INT64_C(-5802137669857725171), INT64_C( 5686996017309487110)), UINT8_C(227), simde_mm512_set_epi64(INT64_C( 120730317606372397), INT64_C(-1410770079656234556), INT64_C( 4532617684378198659), INT64_C( 9004023903916376139), INT64_C( 7206885247739448460), INT64_C(-6411218032719574536), INT64_C( -962636034832057562), INT64_C(-6211267245753502041)), simde_mm512_set_epi64(INT64_C( 120730317606372397), INT64_C(-1410770079656234556), INT64_C( 4532617684378198659), INT64_C( 7879550161691977002), INT64_C( 3825487759520775297), INT64_C( 6674403996216424931), INT64_C( -962636034832057562), INT64_C(-6211267245753502041)) }, { simde_mm512_set_epi64(INT64_C( 4674722797399239366), INT64_C( 2000178744548395677), INT64_C(-3230169679464817239), INT64_C( 6675942378016655726), INT64_C(-4074632284771109640), INT64_C(-1969073951075376054), INT64_C(-7309602967246577272), INT64_C( 6746883208360816464)), UINT8_C(189), simde_mm512_set_epi64(INT64_C( 7111791735729821232), INT64_C(-6377956101145598745), INT64_C(-4955467359912007508), INT64_C( -340840922408165844), INT64_C( 3280430708356940081), INT64_C( 400669322893233577), INT64_C( 6742772793155919855), INT64_C(-1365845768056837484)), simde_mm512_set_epi64(INT64_C( 7111791735729821232), INT64_C( 2000178744548395677), INT64_C(-4955467359912007508), INT64_C( -340840922408165844), INT64_C( 3280430708356940081), INT64_C( 400669322893233577), INT64_C(-7309602967246577272), INT64_C(-1365845768056837484)) }, { simde_mm512_set_epi64(INT64_C(-5185665192936807952), INT64_C( 2873887117219468065), INT64_C( 944218707053685182), INT64_C(-6471325153303919649), INT64_C(-1551809186210791512), INT64_C( 8676397618641344048), INT64_C(-1480083839359048471), INT64_C(-2573286236881012052)), UINT8_C(135), simde_mm512_set_epi64(INT64_C( 4851071406626175825), INT64_C( 2006733877612279017), INT64_C( 9148059701805005067), INT64_C( 3484083856858518164), INT64_C( -542612751996632572), INT64_C( 6154040976669554118), INT64_C( 4310055852136225460), INT64_C( 6666177398356729891)), simde_mm512_set_epi64(INT64_C( 4851071406626175825), INT64_C( 2873887117219468065), INT64_C( 944218707053685182), INT64_C(-6471325153303919649), INT64_C(-1551809186210791512), INT64_C( 6154040976669554118), INT64_C( 4310055852136225460), INT64_C( 6666177398356729891)) }, { simde_mm512_set_epi64(INT64_C(-6362423492218583699), INT64_C( 4052676248150053459), INT64_C(-1785632160509127109), INT64_C( 4504790352522402260), INT64_C( 214305831990150369), INT64_C( 4122674741194642780), INT64_C(-9061446978520477770), INT64_C( -925260945734331795)), UINT8_C( 88), simde_mm512_set_epi64(INT64_C( 7816755513219693536), INT64_C(-8078701368125426812), INT64_C( 5999276564615449517), INT64_C(-3747208296317683129), INT64_C(-3767121149493822975), INT64_C( 3269862772677933078), INT64_C(-1274534447611012205), INT64_C( 367478185734650139)), simde_mm512_set_epi64(INT64_C(-6362423492218583699), INT64_C(-8078701368125426812), INT64_C(-1785632160509127109), INT64_C(-3747208296317683129), INT64_C(-3767121149493822975), INT64_C( 4122674741194642780), INT64_C(-9061446978520477770), INT64_C( -925260945734331795)) }, { simde_mm512_set_epi64(INT64_C(-6749425177074609965), INT64_C( 8453995530571484051), INT64_C(-7619559937003101591), INT64_C( 3005943923235484348), INT64_C( 4327678115781969631), INT64_C( 5990841649027118513), INT64_C(-1241607161778990291), INT64_C( -91855491071654622)), UINT8_C( 22), simde_mm512_set_epi64(INT64_C( 4461859928182214174), INT64_C(-5186049742858346871), INT64_C( -636993447067685727), INT64_C( 8339698509359201789), INT64_C( 4598711567911914631), INT64_C( 7428996315725576873), INT64_C( 6513452752711502515), INT64_C(-6603414145042292282)), simde_mm512_set_epi64(INT64_C(-6749425177074609965), INT64_C( 8453995530571484051), INT64_C(-7619559937003101591), INT64_C( 8339698509359201789), INT64_C( 4327678115781969631), INT64_C( 7428996315725576873), INT64_C( 6513452752711502515), INT64_C( -91855491071654622)) }, { simde_mm512_set_epi64(INT64_C(-7023609179598013523), INT64_C(-2166290313032224989), INT64_C(-1367963225958164233), INT64_C(-9082538196892642083), INT64_C(-7482977792619995502), INT64_C( 4800709110944492165), INT64_C( 3082355013095664677), INT64_C(-4286500001112695437)), UINT8_C( 42), simde_mm512_set_epi64(INT64_C( 522664068472938939), INT64_C(-5622535385140832229), INT64_C( 4829749372798053845), INT64_C( -330958976268778895), INT64_C(-2657198631452288613), INT64_C(-3805394135151266272), INT64_C( 4429043998616724751), INT64_C( 4131511442627175760)), simde_mm512_set_epi64(INT64_C(-7023609179598013523), INT64_C(-2166290313032224989), INT64_C( 4829749372798053845), INT64_C(-9082538196892642083), INT64_C(-2657198631452288613), INT64_C( 4800709110944492165), INT64_C( 4429043998616724751), INT64_C(-4286500001112695437)) }, { simde_mm512_set_epi64(INT64_C(-5567656428388000347), INT64_C( -971128712423557311), INT64_C( 3761317547504069574), INT64_C( 6096071933426825544), INT64_C( 3108166743366703612), INT64_C(-3435283790563075237), INT64_C( 3598996591046999900), INT64_C( 2520744130071328064)), UINT8_C( 13), simde_mm512_set_epi64(INT64_C( -976144998301952820), INT64_C( 5304141922221069696), INT64_C( 1153833608356774417), INT64_C(-5431879705444140176), INT64_C(-4200442870371425874), INT64_C( 9118970466689378415), INT64_C( 7182201605874776129), INT64_C( 6344954152679193639)), simde_mm512_set_epi64(INT64_C(-5567656428388000347), INT64_C( -971128712423557311), INT64_C( 3761317547504069574), INT64_C( 6096071933426825544), INT64_C(-4200442870371425874), INT64_C( 9118970466689378415), INT64_C( 3598996591046999900), INT64_C( 6344954152679193639)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_mov_epi64(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_mov_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d src; simde__mmask8 k; simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( -997.43), SIMDE_FLOAT64_C( -24.75), SIMDE_FLOAT64_C( 811.92), SIMDE_FLOAT64_C( 716.01), SIMDE_FLOAT64_C( -286.81), SIMDE_FLOAT64_C( 360.81), SIMDE_FLOAT64_C( -618.94), SIMDE_FLOAT64_C( 103.41)), UINT8_C( 17), simde_mm512_set_pd(SIMDE_FLOAT64_C( 779.73), SIMDE_FLOAT64_C( -71.34), SIMDE_FLOAT64_C( 74.67), SIMDE_FLOAT64_C( 569.44), SIMDE_FLOAT64_C( 765.94), SIMDE_FLOAT64_C( 114.94), SIMDE_FLOAT64_C( 85.69), SIMDE_FLOAT64_C( 982.40)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -997.43), SIMDE_FLOAT64_C( -24.75), SIMDE_FLOAT64_C( 811.92), SIMDE_FLOAT64_C( 569.44), SIMDE_FLOAT64_C( -286.81), SIMDE_FLOAT64_C( 360.81), SIMDE_FLOAT64_C( -618.94), SIMDE_FLOAT64_C( 982.40)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -989.28), SIMDE_FLOAT64_C( -906.64), SIMDE_FLOAT64_C( -211.36), SIMDE_FLOAT64_C( -108.84), SIMDE_FLOAT64_C( 211.05), SIMDE_FLOAT64_C( -602.13), SIMDE_FLOAT64_C( 19.95), SIMDE_FLOAT64_C( -745.56)), UINT8_C(115), simde_mm512_set_pd(SIMDE_FLOAT64_C( -995.20), SIMDE_FLOAT64_C( 66.82), SIMDE_FLOAT64_C( 747.55), SIMDE_FLOAT64_C( 590.56), SIMDE_FLOAT64_C( 522.53), SIMDE_FLOAT64_C( 340.37), SIMDE_FLOAT64_C( -323.43), SIMDE_FLOAT64_C( -598.33)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -989.28), SIMDE_FLOAT64_C( 66.82), SIMDE_FLOAT64_C( 747.55), SIMDE_FLOAT64_C( 590.56), SIMDE_FLOAT64_C( 211.05), SIMDE_FLOAT64_C( -602.13), SIMDE_FLOAT64_C( -323.43), SIMDE_FLOAT64_C( -598.33)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 435.94), SIMDE_FLOAT64_C( -117.09), SIMDE_FLOAT64_C( -343.63), SIMDE_FLOAT64_C( -686.94), SIMDE_FLOAT64_C( -632.13), SIMDE_FLOAT64_C( 520.11), SIMDE_FLOAT64_C( 584.62), SIMDE_FLOAT64_C( 269.90)), UINT8_C(142), simde_mm512_set_pd(SIMDE_FLOAT64_C( -307.53), SIMDE_FLOAT64_C( 533.35), SIMDE_FLOAT64_C( -283.32), SIMDE_FLOAT64_C( 860.26), SIMDE_FLOAT64_C( -955.05), SIMDE_FLOAT64_C( -767.10), SIMDE_FLOAT64_C( -553.49), SIMDE_FLOAT64_C( 540.17)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -307.53), SIMDE_FLOAT64_C( -117.09), SIMDE_FLOAT64_C( -343.63), SIMDE_FLOAT64_C( -686.94), SIMDE_FLOAT64_C( -955.05), SIMDE_FLOAT64_C( -767.10), SIMDE_FLOAT64_C( -553.49), SIMDE_FLOAT64_C( 269.90)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 591.80), SIMDE_FLOAT64_C( -733.65), SIMDE_FLOAT64_C( 371.96), SIMDE_FLOAT64_C( -998.26), SIMDE_FLOAT64_C( 61.01), SIMDE_FLOAT64_C( -918.19), SIMDE_FLOAT64_C( -797.48), SIMDE_FLOAT64_C( 81.07)), UINT8_C(155), simde_mm512_set_pd(SIMDE_FLOAT64_C( 378.17), SIMDE_FLOAT64_C( 574.36), SIMDE_FLOAT64_C( 687.12), SIMDE_FLOAT64_C( -618.22), SIMDE_FLOAT64_C( 388.77), SIMDE_FLOAT64_C( -731.92), SIMDE_FLOAT64_C( 958.30), SIMDE_FLOAT64_C( 51.30)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 378.17), SIMDE_FLOAT64_C( -733.65), SIMDE_FLOAT64_C( 371.96), SIMDE_FLOAT64_C( -618.22), SIMDE_FLOAT64_C( 388.77), SIMDE_FLOAT64_C( -918.19), SIMDE_FLOAT64_C( 958.30), SIMDE_FLOAT64_C( 51.30)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 721.16), SIMDE_FLOAT64_C( 21.28), SIMDE_FLOAT64_C( -269.14), SIMDE_FLOAT64_C( -241.41), SIMDE_FLOAT64_C( -307.10), SIMDE_FLOAT64_C( 78.73), SIMDE_FLOAT64_C( 336.91), SIMDE_FLOAT64_C( -793.36)), UINT8_C(174), simde_mm512_set_pd(SIMDE_FLOAT64_C( 944.42), SIMDE_FLOAT64_C( 986.58), SIMDE_FLOAT64_C( -765.43), SIMDE_FLOAT64_C( 392.41), SIMDE_FLOAT64_C( 229.44), SIMDE_FLOAT64_C( 52.87), SIMDE_FLOAT64_C( -238.79), SIMDE_FLOAT64_C( 440.21)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 944.42), SIMDE_FLOAT64_C( 21.28), SIMDE_FLOAT64_C( -765.43), SIMDE_FLOAT64_C( -241.41), SIMDE_FLOAT64_C( 229.44), SIMDE_FLOAT64_C( 52.87), SIMDE_FLOAT64_C( -238.79), SIMDE_FLOAT64_C( -793.36)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 456.66), SIMDE_FLOAT64_C( -366.58), SIMDE_FLOAT64_C( 715.22), SIMDE_FLOAT64_C( -16.79), SIMDE_FLOAT64_C( -320.68), SIMDE_FLOAT64_C( 273.81), SIMDE_FLOAT64_C( -581.56), SIMDE_FLOAT64_C( 277.97)), UINT8_C(205), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.81), SIMDE_FLOAT64_C( 801.66), SIMDE_FLOAT64_C( 310.16), SIMDE_FLOAT64_C( 634.68), SIMDE_FLOAT64_C( -889.89), SIMDE_FLOAT64_C( -998.37), SIMDE_FLOAT64_C( -493.27), SIMDE_FLOAT64_C( 120.40)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.81), SIMDE_FLOAT64_C( 801.66), SIMDE_FLOAT64_C( 715.22), SIMDE_FLOAT64_C( -16.79), SIMDE_FLOAT64_C( -889.89), SIMDE_FLOAT64_C( -998.37), SIMDE_FLOAT64_C( -581.56), SIMDE_FLOAT64_C( 120.40)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 44.26), SIMDE_FLOAT64_C( 891.25), SIMDE_FLOAT64_C( 290.62), SIMDE_FLOAT64_C( -70.18), SIMDE_FLOAT64_C( -3.54), SIMDE_FLOAT64_C( 783.54), SIMDE_FLOAT64_C( -718.82), SIMDE_FLOAT64_C( 922.75)), UINT8_C( 72), simde_mm512_set_pd(SIMDE_FLOAT64_C( -286.94), SIMDE_FLOAT64_C( -573.68), SIMDE_FLOAT64_C( -931.52), SIMDE_FLOAT64_C( 249.22), SIMDE_FLOAT64_C( 735.88), SIMDE_FLOAT64_C( 653.72), SIMDE_FLOAT64_C( 732.59), SIMDE_FLOAT64_C( 161.45)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 44.26), SIMDE_FLOAT64_C( -573.68), SIMDE_FLOAT64_C( 290.62), SIMDE_FLOAT64_C( -70.18), SIMDE_FLOAT64_C( 735.88), SIMDE_FLOAT64_C( 783.54), SIMDE_FLOAT64_C( -718.82), SIMDE_FLOAT64_C( 922.75)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 729.70), SIMDE_FLOAT64_C( -950.99), SIMDE_FLOAT64_C( 115.61), SIMDE_FLOAT64_C( -132.19), SIMDE_FLOAT64_C( 834.99), SIMDE_FLOAT64_C( 471.53), SIMDE_FLOAT64_C( 54.12), SIMDE_FLOAT64_C( 238.73)), UINT8_C(209), simde_mm512_set_pd(SIMDE_FLOAT64_C( -345.93), SIMDE_FLOAT64_C( 598.65), SIMDE_FLOAT64_C( 954.89), SIMDE_FLOAT64_C( -441.90), SIMDE_FLOAT64_C( 845.52), SIMDE_FLOAT64_C( -659.44), SIMDE_FLOAT64_C( -844.59), SIMDE_FLOAT64_C( 331.33)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -345.93), SIMDE_FLOAT64_C( 598.65), SIMDE_FLOAT64_C( 115.61), SIMDE_FLOAT64_C( -441.90), SIMDE_FLOAT64_C( 834.99), SIMDE_FLOAT64_C( 471.53), SIMDE_FLOAT64_C( 54.12), SIMDE_FLOAT64_C( 331.33)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mask_mov_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_mov_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 src; simde__mmask16 k; simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -278.44), SIMDE_FLOAT32_C( 958.04), SIMDE_FLOAT32_C( -686.18), SIMDE_FLOAT32_C( -120.52), SIMDE_FLOAT32_C( 759.91), SIMDE_FLOAT32_C( 470.87), SIMDE_FLOAT32_C( -723.57), SIMDE_FLOAT32_C( 170.04), SIMDE_FLOAT32_C( 559.73), SIMDE_FLOAT32_C( 984.13), SIMDE_FLOAT32_C( -84.72), SIMDE_FLOAT32_C( -543.95), SIMDE_FLOAT32_C( 998.02), SIMDE_FLOAT32_C( -559.31), SIMDE_FLOAT32_C( 134.12), SIMDE_FLOAT32_C( -230.64)), UINT16_C( 0), simde_mm512_set_ps(SIMDE_FLOAT32_C( -161.72), SIMDE_FLOAT32_C( 540.27), SIMDE_FLOAT32_C( -745.55), SIMDE_FLOAT32_C( 623.14), SIMDE_FLOAT32_C( -272.95), SIMDE_FLOAT32_C( 176.76), SIMDE_FLOAT32_C( -957.12), SIMDE_FLOAT32_C( -720.97), SIMDE_FLOAT32_C( -491.62), SIMDE_FLOAT32_C( 442.72), SIMDE_FLOAT32_C( 94.42), SIMDE_FLOAT32_C( -425.44), SIMDE_FLOAT32_C( 378.60), SIMDE_FLOAT32_C( -248.93), SIMDE_FLOAT32_C( 638.30), SIMDE_FLOAT32_C( -857.32)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -278.44), SIMDE_FLOAT32_C( 958.04), SIMDE_FLOAT32_C( -686.18), SIMDE_FLOAT32_C( -120.52), SIMDE_FLOAT32_C( 759.91), SIMDE_FLOAT32_C( 470.87), SIMDE_FLOAT32_C( -723.57), SIMDE_FLOAT32_C( 170.04), SIMDE_FLOAT32_C( 559.73), SIMDE_FLOAT32_C( 984.13), SIMDE_FLOAT32_C( -84.72), SIMDE_FLOAT32_C( -543.95), SIMDE_FLOAT32_C( 998.02), SIMDE_FLOAT32_C( -559.31), SIMDE_FLOAT32_C( 134.12), SIMDE_FLOAT32_C( -230.64)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -455.21), SIMDE_FLOAT32_C( -180.02), SIMDE_FLOAT32_C( -110.74), SIMDE_FLOAT32_C( -586.50), SIMDE_FLOAT32_C( -9.89), SIMDE_FLOAT32_C( -597.54), SIMDE_FLOAT32_C( 553.79), SIMDE_FLOAT32_C( 611.64), SIMDE_FLOAT32_C( 717.03), SIMDE_FLOAT32_C( -381.85), SIMDE_FLOAT32_C( 862.32), SIMDE_FLOAT32_C( 302.29), SIMDE_FLOAT32_C( 146.86), SIMDE_FLOAT32_C( -693.40), SIMDE_FLOAT32_C( -247.57), SIMDE_FLOAT32_C( -469.49)), UINT16_C( 0), simde_mm512_set_ps(SIMDE_FLOAT32_C( 842.67), SIMDE_FLOAT32_C( -856.89), SIMDE_FLOAT32_C( -490.76), SIMDE_FLOAT32_C( 922.81), SIMDE_FLOAT32_C( -69.36), SIMDE_FLOAT32_C( 380.23), SIMDE_FLOAT32_C( -846.01), SIMDE_FLOAT32_C( -485.23), SIMDE_FLOAT32_C( -171.14), SIMDE_FLOAT32_C( 602.88), SIMDE_FLOAT32_C( -717.33), SIMDE_FLOAT32_C( 336.05), SIMDE_FLOAT32_C( -432.71), SIMDE_FLOAT32_C( -881.01), SIMDE_FLOAT32_C( -255.82), SIMDE_FLOAT32_C( 168.04)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -455.21), SIMDE_FLOAT32_C( -180.02), SIMDE_FLOAT32_C( -110.74), SIMDE_FLOAT32_C( -586.50), SIMDE_FLOAT32_C( -9.89), SIMDE_FLOAT32_C( -597.54), SIMDE_FLOAT32_C( 553.79), SIMDE_FLOAT32_C( 611.64), SIMDE_FLOAT32_C( 717.03), SIMDE_FLOAT32_C( -381.85), SIMDE_FLOAT32_C( 862.32), SIMDE_FLOAT32_C( 302.29), SIMDE_FLOAT32_C( 146.86), SIMDE_FLOAT32_C( -693.40), SIMDE_FLOAT32_C( -247.57), SIMDE_FLOAT32_C( -469.49)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -694.40), SIMDE_FLOAT32_C( -404.01), SIMDE_FLOAT32_C( 766.51), SIMDE_FLOAT32_C( -392.19), SIMDE_FLOAT32_C( -908.15), SIMDE_FLOAT32_C( -690.12), SIMDE_FLOAT32_C( -262.73), SIMDE_FLOAT32_C( -353.25), SIMDE_FLOAT32_C( -451.03), SIMDE_FLOAT32_C( -88.58), SIMDE_FLOAT32_C( 658.99), SIMDE_FLOAT32_C( -961.05), SIMDE_FLOAT32_C( -743.39), SIMDE_FLOAT32_C( 747.85), SIMDE_FLOAT32_C( -989.89), SIMDE_FLOAT32_C( -48.62)), UINT16_C( 0), simde_mm512_set_ps(SIMDE_FLOAT32_C( -585.79), SIMDE_FLOAT32_C( -884.44), SIMDE_FLOAT32_C( -722.53), SIMDE_FLOAT32_C( 296.99), SIMDE_FLOAT32_C( 791.87), SIMDE_FLOAT32_C( 514.23), SIMDE_FLOAT32_C( 110.66), SIMDE_FLOAT32_C( -891.24), SIMDE_FLOAT32_C( -893.87), SIMDE_FLOAT32_C( 597.88), SIMDE_FLOAT32_C( -561.25), SIMDE_FLOAT32_C( -182.63), SIMDE_FLOAT32_C( -91.96), SIMDE_FLOAT32_C( 272.32), SIMDE_FLOAT32_C( -87.60), SIMDE_FLOAT32_C( 34.84)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -694.40), SIMDE_FLOAT32_C( -404.01), SIMDE_FLOAT32_C( 766.51), SIMDE_FLOAT32_C( -392.19), SIMDE_FLOAT32_C( -908.15), SIMDE_FLOAT32_C( -690.12), SIMDE_FLOAT32_C( -262.73), SIMDE_FLOAT32_C( -353.25), SIMDE_FLOAT32_C( -451.03), SIMDE_FLOAT32_C( -88.58), SIMDE_FLOAT32_C( 658.99), SIMDE_FLOAT32_C( -961.05), SIMDE_FLOAT32_C( -743.39), SIMDE_FLOAT32_C( 747.85), SIMDE_FLOAT32_C( -989.89), SIMDE_FLOAT32_C( -48.62)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 706.89), SIMDE_FLOAT32_C( 473.35), SIMDE_FLOAT32_C( 525.10), SIMDE_FLOAT32_C( 58.51), SIMDE_FLOAT32_C( -849.29), SIMDE_FLOAT32_C( 830.92), SIMDE_FLOAT32_C( 666.67), SIMDE_FLOAT32_C( 510.60), SIMDE_FLOAT32_C( 494.95), SIMDE_FLOAT32_C( -644.02), SIMDE_FLOAT32_C( 666.48), SIMDE_FLOAT32_C( 728.99), SIMDE_FLOAT32_C( 57.50), SIMDE_FLOAT32_C( -509.99), SIMDE_FLOAT32_C( -86.32), SIMDE_FLOAT32_C( 945.97)), UINT16_C( 0), simde_mm512_set_ps(SIMDE_FLOAT32_C( 396.65), SIMDE_FLOAT32_C( -337.05), SIMDE_FLOAT32_C( 13.39), SIMDE_FLOAT32_C( 374.11), SIMDE_FLOAT32_C( 941.83), SIMDE_FLOAT32_C( -80.39), SIMDE_FLOAT32_C( -533.82), SIMDE_FLOAT32_C( -81.97), SIMDE_FLOAT32_C( -76.37), SIMDE_FLOAT32_C( -466.22), SIMDE_FLOAT32_C( -527.13), SIMDE_FLOAT32_C( 285.31), SIMDE_FLOAT32_C( -159.19), SIMDE_FLOAT32_C( -769.18), SIMDE_FLOAT32_C( 908.64), SIMDE_FLOAT32_C( -647.66)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 706.89), SIMDE_FLOAT32_C( 473.35), SIMDE_FLOAT32_C( 525.10), SIMDE_FLOAT32_C( 58.51), SIMDE_FLOAT32_C( -849.29), SIMDE_FLOAT32_C( 830.92), SIMDE_FLOAT32_C( 666.67), SIMDE_FLOAT32_C( 510.60), SIMDE_FLOAT32_C( 494.95), SIMDE_FLOAT32_C( -644.02), SIMDE_FLOAT32_C( 666.48), SIMDE_FLOAT32_C( 728.99), SIMDE_FLOAT32_C( 57.50), SIMDE_FLOAT32_C( -509.99), SIMDE_FLOAT32_C( -86.32), SIMDE_FLOAT32_C( 945.97)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 305.38), SIMDE_FLOAT32_C( 354.29), SIMDE_FLOAT32_C( 625.78), SIMDE_FLOAT32_C( 840.33), SIMDE_FLOAT32_C( 398.08), SIMDE_FLOAT32_C( -775.15), SIMDE_FLOAT32_C( -749.75), SIMDE_FLOAT32_C( -579.50), SIMDE_FLOAT32_C( 326.67), SIMDE_FLOAT32_C( -369.97), SIMDE_FLOAT32_C( -888.36), SIMDE_FLOAT32_C( -369.43), SIMDE_FLOAT32_C( 587.01), SIMDE_FLOAT32_C( -977.20), SIMDE_FLOAT32_C( -154.58), SIMDE_FLOAT32_C( -264.71)), UINT16_C( 0), simde_mm512_set_ps(SIMDE_FLOAT32_C( 472.46), SIMDE_FLOAT32_C( -814.28), SIMDE_FLOAT32_C( 331.94), SIMDE_FLOAT32_C( -36.35), SIMDE_FLOAT32_C( -98.00), SIMDE_FLOAT32_C( 862.68), SIMDE_FLOAT32_C( -130.24), SIMDE_FLOAT32_C( 65.39), SIMDE_FLOAT32_C( -826.35), SIMDE_FLOAT32_C( 92.38), SIMDE_FLOAT32_C( -698.83), SIMDE_FLOAT32_C( 457.07), SIMDE_FLOAT32_C( -472.97), SIMDE_FLOAT32_C( -117.57), SIMDE_FLOAT32_C( -498.77), SIMDE_FLOAT32_C( 798.69)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 305.38), SIMDE_FLOAT32_C( 354.29), SIMDE_FLOAT32_C( 625.78), SIMDE_FLOAT32_C( 840.33), SIMDE_FLOAT32_C( 398.08), SIMDE_FLOAT32_C( -775.15), SIMDE_FLOAT32_C( -749.75), SIMDE_FLOAT32_C( -579.50), SIMDE_FLOAT32_C( 326.67), SIMDE_FLOAT32_C( -369.97), SIMDE_FLOAT32_C( -888.36), SIMDE_FLOAT32_C( -369.43), SIMDE_FLOAT32_C( 587.01), SIMDE_FLOAT32_C( -977.20), SIMDE_FLOAT32_C( -154.58), SIMDE_FLOAT32_C( -264.71)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 845.06), SIMDE_FLOAT32_C( -527.19), SIMDE_FLOAT32_C( -753.05), SIMDE_FLOAT32_C( -867.95), SIMDE_FLOAT32_C( -98.38), SIMDE_FLOAT32_C( -90.28), SIMDE_FLOAT32_C( 321.06), SIMDE_FLOAT32_C( -308.74), SIMDE_FLOAT32_C( 969.13), SIMDE_FLOAT32_C( -263.02), SIMDE_FLOAT32_C( -517.54), SIMDE_FLOAT32_C( 566.67), SIMDE_FLOAT32_C( -321.03), SIMDE_FLOAT32_C( -19.45), SIMDE_FLOAT32_C( -773.18), SIMDE_FLOAT32_C( -562.24)), UINT16_C( 0), simde_mm512_set_ps(SIMDE_FLOAT32_C( -313.43), SIMDE_FLOAT32_C( -900.90), SIMDE_FLOAT32_C( -480.72), SIMDE_FLOAT32_C( 288.15), SIMDE_FLOAT32_C( 603.38), SIMDE_FLOAT32_C( 964.29), SIMDE_FLOAT32_C( 140.98), SIMDE_FLOAT32_C( 269.46), SIMDE_FLOAT32_C( 960.77), SIMDE_FLOAT32_C( -220.33), SIMDE_FLOAT32_C( 524.23), SIMDE_FLOAT32_C( -633.14), SIMDE_FLOAT32_C( -680.30), SIMDE_FLOAT32_C( 880.56), SIMDE_FLOAT32_C( 661.76), SIMDE_FLOAT32_C( -794.03)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 845.06), SIMDE_FLOAT32_C( -527.19), SIMDE_FLOAT32_C( -753.05), SIMDE_FLOAT32_C( -867.95), SIMDE_FLOAT32_C( -98.38), SIMDE_FLOAT32_C( -90.28), SIMDE_FLOAT32_C( 321.06), SIMDE_FLOAT32_C( -308.74), SIMDE_FLOAT32_C( 969.13), SIMDE_FLOAT32_C( -263.02), SIMDE_FLOAT32_C( -517.54), SIMDE_FLOAT32_C( 566.67), SIMDE_FLOAT32_C( -321.03), SIMDE_FLOAT32_C( -19.45), SIMDE_FLOAT32_C( -773.18), SIMDE_FLOAT32_C( -562.24)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -595.71), SIMDE_FLOAT32_C( 923.49), SIMDE_FLOAT32_C( -968.66), SIMDE_FLOAT32_C( 136.30), SIMDE_FLOAT32_C( 658.04), SIMDE_FLOAT32_C( 31.08), SIMDE_FLOAT32_C( 664.79), SIMDE_FLOAT32_C( 525.95), SIMDE_FLOAT32_C( 643.61), SIMDE_FLOAT32_C( -559.86), SIMDE_FLOAT32_C( -291.18), SIMDE_FLOAT32_C( 35.13), SIMDE_FLOAT32_C( -188.19), SIMDE_FLOAT32_C( 767.03), SIMDE_FLOAT32_C( -828.01), SIMDE_FLOAT32_C( 801.09)), UINT16_C( 0), simde_mm512_set_ps(SIMDE_FLOAT32_C( -750.17), SIMDE_FLOAT32_C( 128.67), SIMDE_FLOAT32_C( 441.75), SIMDE_FLOAT32_C( 625.42), SIMDE_FLOAT32_C( 865.73), SIMDE_FLOAT32_C( -522.43), SIMDE_FLOAT32_C( 871.78), SIMDE_FLOAT32_C( 736.62), SIMDE_FLOAT32_C( -52.49), SIMDE_FLOAT32_C( -188.89), SIMDE_FLOAT32_C( 163.52), SIMDE_FLOAT32_C( 743.65), SIMDE_FLOAT32_C( -912.98), SIMDE_FLOAT32_C( -904.70), SIMDE_FLOAT32_C( 973.06), SIMDE_FLOAT32_C( -214.13)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -595.71), SIMDE_FLOAT32_C( 923.49), SIMDE_FLOAT32_C( -968.66), SIMDE_FLOAT32_C( 136.30), SIMDE_FLOAT32_C( 658.04), SIMDE_FLOAT32_C( 31.08), SIMDE_FLOAT32_C( 664.79), SIMDE_FLOAT32_C( 525.95), SIMDE_FLOAT32_C( 643.61), SIMDE_FLOAT32_C( -559.86), SIMDE_FLOAT32_C( -291.18), SIMDE_FLOAT32_C( 35.13), SIMDE_FLOAT32_C( -188.19), SIMDE_FLOAT32_C( 767.03), SIMDE_FLOAT32_C( -828.01), SIMDE_FLOAT32_C( 801.09)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -351.95), SIMDE_FLOAT32_C( 902.78), SIMDE_FLOAT32_C( -172.20), SIMDE_FLOAT32_C( 540.77), SIMDE_FLOAT32_C( -431.24), SIMDE_FLOAT32_C( 243.87), SIMDE_FLOAT32_C( 216.07), SIMDE_FLOAT32_C( 747.45), SIMDE_FLOAT32_C( -864.81), SIMDE_FLOAT32_C( -982.67), SIMDE_FLOAT32_C( -710.14), SIMDE_FLOAT32_C( -539.39), SIMDE_FLOAT32_C( -100.27), SIMDE_FLOAT32_C( -988.79), SIMDE_FLOAT32_C( -220.83), SIMDE_FLOAT32_C( 489.72)), UINT16_C( 0), simde_mm512_set_ps(SIMDE_FLOAT32_C( 688.70), SIMDE_FLOAT32_C( -942.30), SIMDE_FLOAT32_C( -353.35), SIMDE_FLOAT32_C( -645.42), SIMDE_FLOAT32_C( 206.41), SIMDE_FLOAT32_C( 546.87), SIMDE_FLOAT32_C( -878.90), SIMDE_FLOAT32_C( 614.84), SIMDE_FLOAT32_C( 757.82), SIMDE_FLOAT32_C( 388.29), SIMDE_FLOAT32_C( -767.39), SIMDE_FLOAT32_C( 567.68), SIMDE_FLOAT32_C( 464.76), SIMDE_FLOAT32_C( -828.44), SIMDE_FLOAT32_C( 843.54), SIMDE_FLOAT32_C( 504.38)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -351.95), SIMDE_FLOAT32_C( 902.78), SIMDE_FLOAT32_C( -172.20), SIMDE_FLOAT32_C( 540.77), SIMDE_FLOAT32_C( -431.24), SIMDE_FLOAT32_C( 243.87), SIMDE_FLOAT32_C( 216.07), SIMDE_FLOAT32_C( 747.45), SIMDE_FLOAT32_C( -864.81), SIMDE_FLOAT32_C( -982.67), SIMDE_FLOAT32_C( -710.14), SIMDE_FLOAT32_C( -539.39), SIMDE_FLOAT32_C( -100.27), SIMDE_FLOAT32_C( -988.79), SIMDE_FLOAT32_C( -220.83), SIMDE_FLOAT32_C( 489.72)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mask_mov_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_maskz_mov_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m128i a; simde__m128i r; } test_vec[8] = { { UINT16_C(54402), simde_mm_set_epi8(INT8_C( 36), INT8_C( 97), INT8_C(-122), INT8_C( 62), INT8_C( -43), INT8_C( -34), INT8_C( -14), INT8_C(-126), INT8_C( 82), INT8_C( -27), INT8_C(-110), INT8_C( -49), INT8_C( 86), INT8_C( 99), INT8_C( 100), INT8_C( -41)), simde_mm_set_epi8(INT8_C( 36), INT8_C( 97), INT8_C( 0), INT8_C( 62), INT8_C( 0), INT8_C( -34), INT8_C( 0), INT8_C( 0), INT8_C( 82), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 100), INT8_C( 0)) }, { UINT16_C( 9320), simde_mm_set_epi8(INT8_C( 42), INT8_C( -13), INT8_C( 59), INT8_C( -76), INT8_C( 44), INT8_C(-127), INT8_C( -33), INT8_C(-116), INT8_C( 13), INT8_C( 9), INT8_C( -47), INT8_C( 53), INT8_C( -56), INT8_C( 87), INT8_C( -89), INT8_C( 72)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 59), INT8_C( 0), INT8_C( 0), INT8_C(-127), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 9), INT8_C( -47), INT8_C( 0), INT8_C( -56), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { UINT16_C( 7828), simde_mm_set_epi8(INT8_C( -41), INT8_C( -58), INT8_C( 78), INT8_C( -99), INT8_C( -79), INT8_C( 93), INT8_C( 74), INT8_C( 5), INT8_C( 40), INT8_C( -62), INT8_C( 109), INT8_C( -74), INT8_C( 1), INT8_C( -60), INT8_C( 94), INT8_C( 12)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -99), INT8_C( -79), INT8_C( 93), INT8_C( 74), INT8_C( 0), INT8_C( 40), INT8_C( 0), INT8_C( 0), INT8_C( -74), INT8_C( 0), INT8_C( -60), INT8_C( 0), INT8_C( 0)) }, { UINT16_C(55181), simde_mm_set_epi8(INT8_C( 37), INT8_C( 84), INT8_C( -36), INT8_C(-122), INT8_C( 25), INT8_C( 108), INT8_C( 27), INT8_C( 95), INT8_C( -44), INT8_C(-128), INT8_C( 110), INT8_C( -66), INT8_C( 74), INT8_C( -16), INT8_C( 122), INT8_C( -30)), simde_mm_set_epi8(INT8_C( 37), INT8_C( 84), INT8_C( 0), INT8_C(-122), INT8_C( 0), INT8_C( 108), INT8_C( 27), INT8_C( 95), INT8_C( -44), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 74), INT8_C( -16), INT8_C( 0), INT8_C( -30)) }, { UINT16_C(57564), simde_mm_set_epi8(INT8_C( -26), INT8_C( -5), INT8_C( 7), INT8_C( -63), INT8_C( 47), INT8_C( 32), INT8_C( 62), INT8_C(-108), INT8_C( 26), INT8_C( 67), INT8_C( -45), INT8_C( 32), INT8_C( -38), INT8_C( 61), INT8_C(-123), INT8_C(-123)), simde_mm_set_epi8(INT8_C( -26), INT8_C( -5), INT8_C( 7), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 26), INT8_C( 67), INT8_C( 0), INT8_C( 32), INT8_C( -38), INT8_C( 61), INT8_C( 0), INT8_C( 0)) }, { UINT16_C(58988), simde_mm_set_epi8(INT8_C( 108), INT8_C( 5), INT8_C(-115), INT8_C( -87), INT8_C( 112), INT8_C( 24), INT8_C( 18), INT8_C( -62), INT8_C( 120), INT8_C( 62), INT8_C( -22), INT8_C( -32), INT8_C( 32), INT8_C( -91), INT8_C( 65), INT8_C( 79)), simde_mm_set_epi8(INT8_C( 108), INT8_C( 5), INT8_C(-115), INT8_C( 0), INT8_C( 0), INT8_C( 24), INT8_C( 18), INT8_C( 0), INT8_C( 0), INT8_C( 62), INT8_C( -22), INT8_C( 0), INT8_C( 32), INT8_C( -91), INT8_C( 0), INT8_C( 0)) }, { UINT16_C(50535), simde_mm_set_epi8(INT8_C(-119), INT8_C( -52), INT8_C(-117), INT8_C( 112), INT8_C( -70), INT8_C(-108), INT8_C( -6), INT8_C( 88), INT8_C( 5), INT8_C( -84), INT8_C( 11), INT8_C( -55), INT8_C(-116), INT8_C( 8), INT8_C( 68), INT8_C(-111)), simde_mm_set_epi8(INT8_C(-119), INT8_C( -52), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-108), INT8_C( 0), INT8_C( 88), INT8_C( 0), INT8_C( -84), INT8_C( 11), INT8_C( 0), INT8_C( 0), INT8_C( 8), INT8_C( 68), INT8_C(-111)) }, { UINT16_C(21029), simde_mm_set_epi8(INT8_C(-123), INT8_C(-110), INT8_C( 43), INT8_C( -78), INT8_C(-113), INT8_C( -6), INT8_C( -22), INT8_C(-111), INT8_C(-114), INT8_C( 91), INT8_C( 78), INT8_C( 20), INT8_C( 94), INT8_C( 5), INT8_C( 125), INT8_C( 13)), simde_mm_set_epi8(INT8_C( 0), INT8_C(-110), INT8_C( 0), INT8_C( -78), INT8_C( 0), INT8_C( 0), INT8_C( -22), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 78), INT8_C( 0), INT8_C( 0), INT8_C( 5), INT8_C( 0), INT8_C( 13)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_maskz_mov_epi8(test_vec[i].k, test_vec[i].a); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_maskz_mov_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m128i a; simde__m128i r; } test_vec[8] = { { UINT8_C(172), simde_mm_set_epi16(INT16_C( 31369), INT16_C( 24471), INT16_C( -2198), INT16_C( 13931), INT16_C( 8708), INT16_C(-30158), INT16_C( 19991), INT16_C(-25642)), simde_mm_set_epi16(INT16_C( 31369), INT16_C( 0), INT16_C( -2198), INT16_C( 0), INT16_C( 8708), INT16_C(-30158), INT16_C( 0), INT16_C( 0)) }, { UINT8_C(174), simde_mm_set_epi16(INT16_C( 15685), INT16_C( 28576), INT16_C( 31286), INT16_C( 30917), INT16_C( 32368), INT16_C( -7767), INT16_C( 5413), INT16_C( -7264)), simde_mm_set_epi16(INT16_C( 15685), INT16_C( 0), INT16_C( 31286), INT16_C( 0), INT16_C( 32368), INT16_C( -7767), INT16_C( 5413), INT16_C( 0)) }, { UINT8_C(204), simde_mm_set_epi16(INT16_C(-32746), INT16_C( 32574), INT16_C( 12624), INT16_C( 27372), INT16_C(-30923), INT16_C( 29148), INT16_C(-21083), INT16_C( 14295)), simde_mm_set_epi16(INT16_C(-32746), INT16_C( 32574), INT16_C( 0), INT16_C( 0), INT16_C(-30923), INT16_C( 29148), INT16_C( 0), INT16_C( 0)) }, { UINT8_C( 95), simde_mm_set_epi16(INT16_C(-30267), INT16_C(-15896), INT16_C( 22574), INT16_C( 2859), INT16_C( 2365), INT16_C( -901), INT16_C( 18813), INT16_C( 18335)), simde_mm_set_epi16(INT16_C( 0), INT16_C(-15896), INT16_C( 0), INT16_C( 2859), INT16_C( 2365), INT16_C( -901), INT16_C( 18813), INT16_C( 18335)) }, { UINT8_C( 67), simde_mm_set_epi16(INT16_C( 16076), INT16_C( 28949), INT16_C( 18472), INT16_C( 18435), INT16_C(-29130), INT16_C(-15163), INT16_C(-12433), INT16_C( -3463)), simde_mm_set_epi16(INT16_C( 0), INT16_C( 28949), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C(-12433), INT16_C( -3463)) }, { UINT8_C( 73), simde_mm_set_epi16(INT16_C(-30899), INT16_C(-31361), INT16_C(-22956), INT16_C(-14855), INT16_C( -601), INT16_C( 2058), INT16_C( 17396), INT16_C(-31263)), simde_mm_set_epi16(INT16_C( 0), INT16_C(-31361), INT16_C( 0), INT16_C( 0), INT16_C( -601), INT16_C( 0), INT16_C( 0), INT16_C(-31263)) }, { UINT8_C( 1), simde_mm_set_epi16(INT16_C( 5707), INT16_C(-20763), INT16_C( 8635), INT16_C( -4245), INT16_C( 27666), INT16_C(-18424), INT16_C(-22687), INT16_C( 15686)), simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 15686)) }, { UINT8_C(172), simde_mm_set_epi16(INT16_C( 8809), INT16_C( 29917), INT16_C( 520), INT16_C(-12425), INT16_C( 13592), INT16_C(-10913), INT16_C(-21871), INT16_C( 6317)), simde_mm_set_epi16(INT16_C( 8809), INT16_C( 0), INT16_C( 520), INT16_C( 0), INT16_C( 13592), INT16_C(-10913), INT16_C( 0), INT16_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_maskz_mov_epi16(test_vec[i].k, test_vec[i].a); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_maskz_mov_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m128i a; simde__m128i r; } test_vec[8] = { { UINT8_C(192), simde_mm_set_epi32(INT32_C( 656441296), INT32_C(-1852032257), INT32_C( 299494207), INT32_C(-1616873206)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { UINT8_C(138), simde_mm_set_epi32(INT32_C( 707152322), INT32_C(-1311270924), INT32_C(-1503159730), INT32_C(-2099401846)), simde_mm_set_epi32(INT32_C( 707152322), INT32_C( 0), INT32_C(-1503159730), INT32_C( 0)) }, { UINT8_C(202), simde_mm_set_epi32(INT32_C(-1455100666), INT32_C(-2025285461), INT32_C( -179772388), INT32_C( 1367812127)), simde_mm_set_epi32(INT32_C(-1455100666), INT32_C( 0), INT32_C( -179772388), INT32_C( 0)) }, { UINT8_C(144), simde_mm_set_epi32(INT32_C(-1107178304), INT32_C(-1037282057), INT32_C( 779093870), INT32_C( 1250766721)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { UINT8_C(121), simde_mm_set_epi32(INT32_C( -756555400), INT32_C( 1672370881), INT32_C( -263709411), INT32_C( 606108964)), simde_mm_set_epi32(INT32_C( -756555400), INT32_C( 0), INT32_C( 0), INT32_C( 606108964)) }, { UINT8_C( 11), simde_mm_set_epi32(INT32_C( 291215521), INT32_C( 371049029), INT32_C( 324114641), INT32_C( -986925670)), simde_mm_set_epi32(INT32_C( 291215521), INT32_C( 0), INT32_C( 324114641), INT32_C( -986925670)) }, { UINT8_C(200), simde_mm_set_epi32(INT32_C(-1248714533), INT32_C( 110176831), INT32_C(-1962006925), INT32_C( -973547490)), simde_mm_set_epi32(INT32_C(-1248714533), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { UINT8_C(138), simde_mm_set_epi32(INT32_C( -971622476), INT32_C( -95064376), INT32_C( -736538751), INT32_C( 7991884)), simde_mm_set_epi32(INT32_C( -971622476), INT32_C( 0), INT32_C( -736538751), INT32_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_maskz_mov_epi32(test_vec[i].k, test_vec[i].a); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_maskz_mov_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m128i a; simde__m128i r; } test_vec[8] = { { UINT8_C(140), simde_mm_set_epi64x(INT64_C( 3798083087260184318), INT64_C( 5657333801282264243)), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) }, { UINT8_C( 59), simde_mm_set_epi64x(INT64_C( 6150838870455976373), INT64_C(-1888156961938500809)), simde_mm_set_epi64x(INT64_C( 6150838870455976373), INT64_C(-1888156961938500809)) }, { UINT8_C( 85), simde_mm_set_epi64x(INT64_C(-2963288110518582462), INT64_C( 4379558933354650160)), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 4379558933354650160)) }, { UINT8_C(190), simde_mm_set_epi64x(INT64_C( 1293362407707663546), INT64_C( 3921888525347819158)), simde_mm_set_epi64x(INT64_C( 1293362407707663546), INT64_C( 0)) }, { UINT8_C(114), simde_mm_set_epi64x(INT64_C(-7166753234573077348), INT64_C( 1514796214136072870)), simde_mm_set_epi64x(INT64_C(-7166753234573077348), INT64_C( 0)) }, { UINT8_C( 57), simde_mm_set_epi64x(INT64_C(-5321356301108453394), INT64_C(-2450051547146928613)), simde_mm_set_epi64x(INT64_C( 0), INT64_C(-2450051547146928613)) }, { UINT8_C( 72), simde_mm_set_epi64x(INT64_C(-3635596340953309068), INT64_C(-4947516809045744754)), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) }, { UINT8_C( 27), simde_mm_set_epi64x(INT64_C(-4723518328184072824), INT64_C(-6365694246941149609)), simde_mm_set_epi64x(INT64_C(-4723518328184072824), INT64_C(-6365694246941149609)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_maskz_mov_epi64(test_vec[i].k, test_vec[i].a); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_maskz_mov_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m128d a; simde__m128d r; } test_vec[8] = { { UINT8_C(210), simde_mm_set_pd(SIMDE_FLOAT64_C( 55.56), SIMDE_FLOAT64_C( 306.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( 55.56), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C( 7), simde_mm_set_pd(SIMDE_FLOAT64_C( 202.21), SIMDE_FLOAT64_C( -678.71)), simde_mm_set_pd(SIMDE_FLOAT64_C( 202.21), SIMDE_FLOAT64_C( -678.71)) }, { UINT8_C( 50), simde_mm_set_pd(SIMDE_FLOAT64_C( 680.40), SIMDE_FLOAT64_C( 906.67)), simde_mm_set_pd(SIMDE_FLOAT64_C( 680.40), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(229), simde_mm_set_pd(SIMDE_FLOAT64_C( -422.72), SIMDE_FLOAT64_C( 572.83)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 572.83)) }, { UINT8_C(117), simde_mm_set_pd(SIMDE_FLOAT64_C( -76.19), SIMDE_FLOAT64_C( -654.60)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -654.60)) }, { UINT8_C(130), simde_mm_set_pd(SIMDE_FLOAT64_C( -711.42), SIMDE_FLOAT64_C( -22.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( -711.42), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C( 62), simde_mm_set_pd(SIMDE_FLOAT64_C( -413.23), SIMDE_FLOAT64_C( 547.52)), simde_mm_set_pd(SIMDE_FLOAT64_C( -413.23), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(165), simde_mm_set_pd(SIMDE_FLOAT64_C( 575.41), SIMDE_FLOAT64_C( -702.01)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -702.01)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_maskz_mov_pd(test_vec[i].k, test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_maskz_mov_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m128 a; simde__m128 r; } test_vec[8] = { { UINT16_C( 126), simde_mm_set_ps(SIMDE_FLOAT32_C( -678.71), SIMDE_FLOAT32_C( 675.53), SIMDE_FLOAT32_C( 55.56), SIMDE_FLOAT32_C( 306.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( -678.71), SIMDE_FLOAT32_C( 675.53), SIMDE_FLOAT32_C( 55.56), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C( 44), simde_mm_set_ps(SIMDE_FLOAT32_C( 941.87), SIMDE_FLOAT32_C( 680.40), SIMDE_FLOAT32_C( 906.67), SIMDE_FLOAT32_C( -364.25)), simde_mm_set_ps(SIMDE_FLOAT32_C( 941.87), SIMDE_FLOAT32_C( 680.40), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C( 117), simde_mm_set_ps(SIMDE_FLOAT32_C( -76.19), SIMDE_FLOAT32_C( -654.60), SIMDE_FLOAT32_C( -721.91), SIMDE_FLOAT32_C( -422.72)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -654.60), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -422.72)) }, { UINT16_C( 76), simde_mm_set_ps(SIMDE_FLOAT32_C( 547.52), SIMDE_FLOAT32_C( -627.17), SIMDE_FLOAT32_C( -711.42), SIMDE_FLOAT32_C( -22.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( 547.52), SIMDE_FLOAT32_C( -627.17), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C( 101), simde_mm_set_ps(SIMDE_FLOAT32_C( -822.97), SIMDE_FLOAT32_C( 575.41), SIMDE_FLOAT32_C( -702.01), SIMDE_FLOAT32_C( -488.76)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 575.41), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -488.76)) }, { UINT16_C( 149), simde_mm_set_ps(SIMDE_FLOAT32_C( 804.55), SIMDE_FLOAT32_C( -888.85), SIMDE_FLOAT32_C( 750.71), SIMDE_FLOAT32_C( 346.51)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -888.85), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 346.51)) }, { UINT16_C( 115), simde_mm_set_ps(SIMDE_FLOAT32_C( -17.38), SIMDE_FLOAT32_C( 623.33), SIMDE_FLOAT32_C( 459.80), SIMDE_FLOAT32_C( 837.15)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 459.80), SIMDE_FLOAT32_C( 837.15)) }, { UINT16_C( 50), simde_mm_set_ps(SIMDE_FLOAT32_C( 197.69), SIMDE_FLOAT32_C( 233.42), SIMDE_FLOAT32_C( 153.73), SIMDE_FLOAT32_C( 616.58)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 153.73), SIMDE_FLOAT32_C( 0.00)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_maskz_mov_ps(test_vec[i].k, test_vec[i].a); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_maskz_mov_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask32 k; simde__m256i a; simde__m256i r; } test_vec[8] = { { UINT32_C(1332074171), simde_mm256_set_epi8(INT8_C( 121), INT8_C( 75), INT8_C( 39), INT8_C(-100), INT8_C( 23), INT8_C( 80), INT8_C( 88), INT8_C( 14), INT8_C( -82), INT8_C( -32), INT8_C( -73), INT8_C( -78), INT8_C( -21), INT8_C( 76), INT8_C( 33), INT8_C( 90), INT8_C( -57), INT8_C( -12), INT8_C(-121), INT8_C( 101), INT8_C( 6), INT8_C( -36), INT8_C( -50), INT8_C( -33), INT8_C( -83), INT8_C( -92), INT8_C( 2), INT8_C( 69), INT8_C( 62), INT8_C( 89), INT8_C( 105), INT8_C( 58)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 75), INT8_C( 0), INT8_C( 0), INT8_C( 23), INT8_C( 80), INT8_C( 88), INT8_C( 14), INT8_C( 0), INT8_C( -32), INT8_C( -73), INT8_C( 0), INT8_C( 0), INT8_C( 76), INT8_C( 0), INT8_C( 90), INT8_C( -57), INT8_C( -12), INT8_C( 0), INT8_C( 101), INT8_C( 0), INT8_C( -36), INT8_C( -50), INT8_C( 0), INT8_C( -83), INT8_C( 0), INT8_C( 2), INT8_C( 69), INT8_C( 62), INT8_C( 0), INT8_C( 105), INT8_C( 58)) }, { UINT32_C(4272165599), simde_mm256_set_epi8(INT8_C( 23), INT8_C( 6), INT8_C( 61), INT8_C( 68), INT8_C( -53), INT8_C(-110), INT8_C( 53), INT8_C( -67), INT8_C( -9), INT8_C( -52), INT8_C( 27), INT8_C( -40), INT8_C( 57), INT8_C( -80), INT8_C( -28), INT8_C( 64), INT8_C( 70), INT8_C( -40), INT8_C( 14), INT8_C( -38), INT8_C( -38), INT8_C( -99), INT8_C( -37), INT8_C( -35), INT8_C( -82), INT8_C( -60), INT8_C( -40), INT8_C( -40), INT8_C( -5), INT8_C( 8), INT8_C( 109), INT8_C( 95)), simde_mm256_set_epi8(INT8_C( 23), INT8_C( 6), INT8_C( 61), INT8_C( 68), INT8_C( -53), INT8_C(-110), INT8_C( 53), INT8_C( 0), INT8_C( -9), INT8_C( 0), INT8_C( 27), INT8_C( 0), INT8_C( 0), INT8_C( -80), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -38), INT8_C( 0), INT8_C( 0), INT8_C( -37), INT8_C( 0), INT8_C( -82), INT8_C( -60), INT8_C( 0), INT8_C( -40), INT8_C( -5), INT8_C( 8), INT8_C( 109), INT8_C( 95)) }, { UINT32_C(3823231310), simde_mm256_set_epi8(INT8_C( -42), INT8_C( 44), INT8_C( 70), INT8_C( -24), INT8_C( -86), INT8_C( 112), INT8_C( 116), INT8_C( -61), INT8_C( 94), INT8_C( -56), INT8_C( -83), INT8_C( 37), INT8_C( 45), INT8_C( 44), INT8_C( 79), INT8_C( 122), INT8_C( -54), INT8_C( -68), INT8_C( 19), INT8_C( 39), INT8_C( 17), INT8_C( -32), INT8_C( -47), INT8_C( -26), INT8_C( -23), INT8_C( 30), INT8_C( 98), INT8_C( 3), INT8_C( -92), INT8_C( -30), INT8_C( -8), INT8_C( -30)), simde_mm256_set_epi8(INT8_C( -42), INT8_C( 44), INT8_C( 70), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 116), INT8_C( -61), INT8_C( 94), INT8_C( -56), INT8_C( -83), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 122), INT8_C( -54), INT8_C( -68), INT8_C( 19), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -26), INT8_C( 0), INT8_C( 30), INT8_C( 0), INT8_C( 0), INT8_C( -92), INT8_C( -30), INT8_C( -8), INT8_C( 0)) }, { UINT32_C(2639652614), simde_mm256_set_epi8(INT8_C( -53), INT8_C( 96), INT8_C( 40), INT8_C( -52), INT8_C( -17), INT8_C( -6), INT8_C(-108), INT8_C( 33), INT8_C( -15), INT8_C( 113), INT8_C( 31), INT8_C( -14), INT8_C( 124), INT8_C( 15), INT8_C( 90), INT8_C( 1), INT8_C( 36), INT8_C(-115), INT8_C( -95), INT8_C( 4), INT8_C( 50), INT8_C( -54), INT8_C( 94), INT8_C( 54), INT8_C( 109), INT8_C(-103), INT8_C(-124), INT8_C( 34), INT8_C( -16), INT8_C( 97), INT8_C( -7), INT8_C( 98)), simde_mm256_set_epi8(INT8_C( -53), INT8_C( 0), INT8_C( 0), INT8_C( -52), INT8_C( -17), INT8_C( -6), INT8_C( 0), INT8_C( 33), INT8_C( 0), INT8_C( 113), INT8_C( 0), INT8_C( -14), INT8_C( 0), INT8_C( 15), INT8_C( 0), INT8_C( 1), INT8_C( 36), INT8_C(-115), INT8_C( -95), INT8_C( 0), INT8_C( 0), INT8_C( -54), INT8_C( 94), INT8_C( 54), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 97), INT8_C( -7), INT8_C( 0)) }, { UINT32_C(2877003463), simde_mm256_set_epi8(INT8_C(-108), INT8_C( 14), INT8_C( 103), INT8_C( 32), INT8_C( 25), INT8_C(-108), INT8_C( -56), INT8_C(-111), INT8_C( 23), INT8_C( -20), INT8_C( 4), INT8_C( 81), INT8_C( 39), INT8_C( 39), INT8_C( 82), INT8_C( -15), INT8_C( -87), INT8_C( 90), INT8_C( -91), INT8_C( 3), INT8_C( -91), INT8_C( 55), INT8_C( 72), INT8_C( -46), INT8_C( 48), INT8_C( -19), INT8_C( -87), INT8_C( 100), INT8_C( -44), INT8_C( -79), INT8_C( -72), INT8_C( 73)), simde_mm256_set_epi8(INT8_C(-108), INT8_C( 0), INT8_C( 103), INT8_C( 0), INT8_C( 25), INT8_C( 0), INT8_C( -56), INT8_C(-111), INT8_C( 0), INT8_C( -20), INT8_C( 4), INT8_C( 81), INT8_C( 39), INT8_C( 0), INT8_C( 82), INT8_C( -15), INT8_C( -87), INT8_C( 0), INT8_C( 0), INT8_C( 3), INT8_C( 0), INT8_C( 55), INT8_C( 72), INT8_C( 0), INT8_C( 48), INT8_C( -19), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -79), INT8_C( -72), INT8_C( 73)) }, { UINT32_C(2869692151), simde_mm256_set_epi8(INT8_C( 22), INT8_C(-112), INT8_C( 66), INT8_C( -76), INT8_C( 79), INT8_C(-100), INT8_C( -47), INT8_C(-114), INT8_C( -72), INT8_C( 67), INT8_C( 3), INT8_C( -9), INT8_C( 88), INT8_C( -5), INT8_C(-111), INT8_C(-100), INT8_C( -94), INT8_C( -72), INT8_C( -45), INT8_C( -95), INT8_C( 119), INT8_C( -81), INT8_C( 38), INT8_C(-111), INT8_C( 72), INT8_C( -95), INT8_C( 104), INT8_C( -28), INT8_C( 25), INT8_C( 84), INT8_C( 66), INT8_C( 19)), simde_mm256_set_epi8(INT8_C( 22), INT8_C( 0), INT8_C( 66), INT8_C( 0), INT8_C( 79), INT8_C( 0), INT8_C( -47), INT8_C(-114), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 88), INT8_C( -5), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -81), INT8_C( 38), INT8_C( 0), INT8_C( 72), INT8_C( -95), INT8_C( 104), INT8_C( -28), INT8_C( 0), INT8_C( 84), INT8_C( 66), INT8_C( 19)) }, { UINT32_C(1633656989), simde_mm256_set_epi8(INT8_C( 81), INT8_C( 114), INT8_C( -76), INT8_C( -63), INT8_C( 30), INT8_C( 66), INT8_C( 18), INT8_C(-119), INT8_C( 26), INT8_C( 28), INT8_C( 56), INT8_C( 127), INT8_C( -81), INT8_C( -7), INT8_C( -20), INT8_C( -35), INT8_C( -7), INT8_C( 37), INT8_C( -47), INT8_C( 78), INT8_C( 114), INT8_C( -18), INT8_C( 72), INT8_C( -8), INT8_C(-101), INT8_C( -13), INT8_C( 76), INT8_C( -5), INT8_C( -5), INT8_C( -50), INT8_C( -99), INT8_C( 84)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 114), INT8_C( -76), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-119), INT8_C( 0), INT8_C( 28), INT8_C( 0), INT8_C( 127), INT8_C( -81), INT8_C( -7), INT8_C( -20), INT8_C( -35), INT8_C( -7), INT8_C( 0), INT8_C( -47), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-101), INT8_C( 0), INT8_C( 0), INT8_C( -5), INT8_C( -5), INT8_C( -50), INT8_C( 0), INT8_C( 84)) }, { UINT32_C(4185163230), simde_mm256_set_epi8(INT8_C( -29), INT8_C(-121), INT8_C( -23), INT8_C( 64), INT8_C( 12), INT8_C( 5), INT8_C( 73), INT8_C( 52), INT8_C( -53), INT8_C( 62), INT8_C( 8), INT8_C(-112), INT8_C( -8), INT8_C( 99), INT8_C( -12), INT8_C(-118), INT8_C( -33), INT8_C( -37), INT8_C( -98), INT8_C( -94), INT8_C(-119), INT8_C( 79), INT8_C( -25), INT8_C( 47), INT8_C( 80), INT8_C( 89), INT8_C( 5), INT8_C( 9), INT8_C( -36), INT8_C( 79), INT8_C( 8), INT8_C( 89)), simde_mm256_set_epi8(INT8_C( -29), INT8_C(-121), INT8_C( -23), INT8_C( 64), INT8_C( 12), INT8_C( 0), INT8_C( 0), INT8_C( 52), INT8_C( 0), INT8_C( 62), INT8_C( 8), INT8_C(-112), INT8_C( 0), INT8_C( 99), INT8_C( 0), INT8_C( 0), INT8_C( -33), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 79), INT8_C( 0), INT8_C( 47), INT8_C( 80), INT8_C( 89), INT8_C( 0), INT8_C( 9), INT8_C( -36), INT8_C( 79), INT8_C( 8), INT8_C( 0)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_maskz_mov_epi8(test_vec[i].k, test_vec[i].a); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_maskz_mov_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m256i a; simde__m256i r; } test_vec[8] = { { UINT16_C(41021), simde_mm256_set_epi16(INT16_C(-23030), INT16_C( 6803), INT16_C(-21055), INT16_C( -910), INT16_C( -6009), INT16_C( 10471), INT16_C(-29834), INT16_C(-14111), INT16_C( -2981), INT16_C( 28733), INT16_C( 11699), INT16_C( 7781), INT16_C( 29036), INT16_C( -8103), INT16_C(-21310), INT16_C( 9176)), simde_mm256_set_epi16(INT16_C(-23030), INT16_C( 0), INT16_C(-21055), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 11699), INT16_C( 7781), INT16_C( 29036), INT16_C( -8103), INT16_C( 0), INT16_C( 9176)) }, { UINT16_C(53637), simde_mm256_set_epi16(INT16_C(-17353), INT16_C(-24912), INT16_C(-16017), INT16_C(-32768), INT16_C( 30563), INT16_C( -5523), INT16_C(-18306), INT16_C( 14754), INT16_C(-23068), INT16_C(-17313), INT16_C( 21598), INT16_C( 12635), INT16_C( 17053), INT16_C( 3377), INT16_C( 28887), INT16_C( 29062)), simde_mm256_set_epi16(INT16_C(-17353), INT16_C(-24912), INT16_C( 0), INT16_C(-32768), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 14754), INT16_C(-23068), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 3377), INT16_C( 0), INT16_C( 29062)) }, { UINT16_C(52310), simde_mm256_set_epi16(INT16_C( 4085), INT16_C( 10000), INT16_C(-17688), INT16_C( 28540), INT16_C( 9971), INT16_C( -9002), INT16_C(-22233), INT16_C(-13917), INT16_C(-13732), INT16_C( -199), INT16_C( 9707), INT16_C( 31342), INT16_C(-13386), INT16_C(-15675), INT16_C( 10143), INT16_C( 19953)), simde_mm256_set_epi16(INT16_C( 4085), INT16_C( 10000), INT16_C( 0), INT16_C( 0), INT16_C( 9971), INT16_C( -9002), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -199), INT16_C( 0), INT16_C( 31342), INT16_C( 0), INT16_C(-15675), INT16_C( 10143), INT16_C( 0)) }, { UINT16_C(11313), simde_mm256_set_epi16(INT16_C(-25947), INT16_C( 19467), INT16_C( 22325), INT16_C( 14960), INT16_C( 16296), INT16_C(-12892), INT16_C( 9434), INT16_C( 15492), INT16_C(-30515), INT16_C(-13927), INT16_C( 24112), INT16_C( 9227), INT16_C(-20054), INT16_C(-11664), INT16_C( -7103), INT16_C(-13246)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 22325), INT16_C( 0), INT16_C( 16296), INT16_C(-12892), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 24112), INT16_C( 9227), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C(-13246)) }, { UINT16_C(54624), simde_mm256_set_epi16(INT16_C(-10124), INT16_C( 1110), INT16_C( 1704), INT16_C(-17853), INT16_C( -7561), INT16_C(-19432), INT16_C( 22127), INT16_C(-30033), INT16_C(-17362), INT16_C( -1830), INT16_C(-16587), INT16_C(-17056), INT16_C(-14539), INT16_C( 7972), INT16_C(-26491), INT16_C( 20406)), simde_mm256_set_epi16(INT16_C(-10124), INT16_C( 1110), INT16_C( 0), INT16_C(-17853), INT16_C( 0), INT16_C(-19432), INT16_C( 0), INT16_C(-30033), INT16_C( 0), INT16_C( -1830), INT16_C(-16587), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { UINT16_C(44378), simde_mm256_set_epi16(INT16_C( 16917), INT16_C( 10042), INT16_C( 5958), INT16_C( -4695), INT16_C(-20590), INT16_C( 17528), INT16_C( -6738), INT16_C(-26754), INT16_C( 30496), INT16_C( 8574), INT16_C( 3335), INT16_C(-11669), INT16_C( 15597), INT16_C(-30582), INT16_C(-21551), INT16_C(-25534)), simde_mm256_set_epi16(INT16_C( 16917), INT16_C( 0), INT16_C( 5958), INT16_C( 0), INT16_C(-20590), INT16_C( 17528), INT16_C( 0), INT16_C(-26754), INT16_C( 0), INT16_C( 8574), INT16_C( 0), INT16_C(-11669), INT16_C( 15597), INT16_C( 0), INT16_C(-21551), INT16_C( 0)) }, { UINT16_C(12440), simde_mm256_set_epi16(INT16_C( 12048), INT16_C( -8528), INT16_C(-31627), INT16_C( 26711), INT16_C( -4678), INT16_C( 32013), INT16_C( 814), INT16_C( 19873), INT16_C( 32199), INT16_C( -7421), INT16_C( 21197), INT16_C( 25563), INT16_C( 14671), INT16_C( 16470), INT16_C( 30174), INT16_C( -7130)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C(-31627), INT16_C( 26711), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 32199), INT16_C( 0), INT16_C( 0), INT16_C( 25563), INT16_C( 14671), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { UINT16_C( 8281), simde_mm256_set_epi16(INT16_C( 32229), INT16_C( -2511), INT16_C(-10942), INT16_C(-28733), INT16_C( -8714), INT16_C( -6616), INT16_C( 4922), INT16_C( 1537), INT16_C( -8589), INT16_C( 6229), INT16_C(-12142), INT16_C( 12862), INT16_C(-16969), INT16_C( 25143), INT16_C(-29570), INT16_C( 25018)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C(-10942), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 6229), INT16_C( 0), INT16_C( 12862), INT16_C(-16969), INT16_C( 0), INT16_C( 0), INT16_C( 25018)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_maskz_mov_epi16(test_vec[i].k, test_vec[i].a); simde_assert_m256i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_maskz_mov_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m256i a; simde__m256i r; } test_vec[8] = { { UINT8_C(205), simde_mm256_set_epi32(INT32_C( -433311806), INT32_C( 408583050), INT32_C( -306453652), INT32_C( -661693879), INT32_C( 1329919822), INT32_C( -49396337), INT32_C( -975523137), INT32_C( 228489302)), simde_mm256_set_epi32(INT32_C( -433311806), INT32_C( 408583050), INT32_C( 0), INT32_C( 0), INT32_C( 1329919822), INT32_C( -49396337), INT32_C( 0), INT32_C( 228489302)) }, { UINT8_C( 99), simde_mm256_set_epi32(INT32_C( 1010695071), INT32_C( 737167817), INT32_C( 1850343310), INT32_C( 1216609214), INT32_C(-1976576002), INT32_C( 1498708626), INT32_C( -621595293), INT32_C(-2111598997)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 737167817), INT32_C( 1850343310), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -621595293), INT32_C(-2111598997)) }, { UINT8_C(174), simde_mm256_set_epi32(INT32_C( 2023987434), INT32_C( 1558325646), INT32_C( 2137381681), INT32_C(-1489350015), INT32_C(-2044242394), INT32_C( 856733879), INT32_C( 1335704151), INT32_C(-1346912573)), simde_mm256_set_epi32(INT32_C( 2023987434), INT32_C( 0), INT32_C( 2137381681), INT32_C( 0), INT32_C(-2044242394), INT32_C( 856733879), INT32_C( 1335704151), INT32_C( 0)) }, { UINT8_C(179), simde_mm256_set_epi32(INT32_C( 1148504404), INT32_C( -491209584), INT32_C( -163352510), INT32_C( 998745259), INT32_C(-1986870978), INT32_C( -69159531), INT32_C(-1702010863), INT32_C( -273027352)), simde_mm256_set_epi32(INT32_C( 1148504404), INT32_C( 0), INT32_C( -163352510), INT32_C( 998745259), INT32_C( 0), INT32_C( 0), INT32_C(-1702010863), INT32_C( -273027352)) }, { UINT8_C(187), simde_mm256_set_epi32(INT32_C( -272101695), INT32_C(-1695498890), INT32_C( 700753329), INT32_C(-1444122689), INT32_C( 460626918), INT32_C( 1352716216), INT32_C( -651553055), INT32_C(-1336685992)), simde_mm256_set_epi32(INT32_C( -272101695), INT32_C( 0), INT32_C( 700753329), INT32_C(-1444122689), INT32_C( 460626918), INT32_C( 0), INT32_C( -651553055), INT32_C(-1336685992)) }, { UINT8_C(119), simde_mm256_set_epi32(INT32_C(-1143505851), INT32_C( 669916850), INT32_C( -262251672), INT32_C( 470970928), INT32_C( 1041120150), INT32_C(-1070284133), INT32_C( 347280872), INT32_C( -305201154)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 669916850), INT32_C( -262251672), INT32_C( 470970928), INT32_C( 0), INT32_C(-1070284133), INT32_C( 347280872), INT32_C( -305201154)) }, { UINT8_C( 36), simde_mm256_set_epi32(INT32_C( 1927265424), INT32_C(-1184012473), INT32_C( 1473357439), INT32_C( 1217146407), INT32_C( 1884345776), INT32_C( -662443681), INT32_C( -457310112), INT32_C(-2074706314)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 1473357439), INT32_C( 0), INT32_C( 0), INT32_C( -662443681), INT32_C( 0), INT32_C( 0)) }, { UINT8_C(161), simde_mm256_set_epi32(INT32_C( 454256305), INT32_C( -89518858), INT32_C( 575434377), INT32_C( -363661293), INT32_C( -271203820), INT32_C( -624953581), INT32_C( 1626853978), INT32_C(-1012779406)), simde_mm256_set_epi32(INT32_C( 454256305), INT32_C( 0), INT32_C( 575434377), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C(-1012779406)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_maskz_mov_epi32(test_vec[i].k, test_vec[i].a); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_maskz_mov_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m256i a; simde__m256i r; } test_vec[8] = { { UINT8_C(109), simde_mm256_set_epi64x(INT64_C( 7572002691338055356), INT64_C(-6931202421771137023), INT64_C(-6376895216110561530), INT64_C( 101010879856088318)), simde_mm256_set_epi64x(INT64_C( 7572002691338055356), INT64_C(-6931202421771137023), INT64_C( 0), INT64_C( 101010879856088318)) }, { UINT8_C( 84), simde_mm256_set_epi64x(INT64_C( 4863930517396634884), INT64_C( 1339559436234782312), INT64_C(-4687477333083103994), INT64_C( 2317514132307922590)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C( 1339559436234782312), INT64_C( 0), INT64_C( 0)) }, { UINT8_C( 4), simde_mm256_set_epi64x(INT64_C(-4280812707612271736), INT64_C( 1352195411881071619), INT64_C( 4401292390121558915), INT64_C( 1447000045443016421)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C( 1352195411881071619), INT64_C( 0), INT64_C( 0)) }, { UINT8_C(243), simde_mm256_set_epi64x(INT64_C(-1554191220639548558), INT64_C(-1009828379214636119), INT64_C( 87598411827204486), INT64_C( 8494576712865778531)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C( 0), INT64_C( 87598411827204486), INT64_C( 8494576712865778531)) }, { UINT8_C(102), simde_mm256_set_epi64x(INT64_C(-3199853677394167840), INT64_C(-8026951806327199947), INT64_C( 4533073424512347513), INT64_C( -348644671563309757)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C(-8026951806327199947), INT64_C( 4533073424512347513), INT64_C( 0)) }, { UINT8_C( 38), simde_mm256_set_epi64x(INT64_C(-8077475266882793195), INT64_C(-1380937485015239307), INT64_C(-3426685195142795196), INT64_C( 4855530362388048180)), simde_mm256_set_epi64x(INT64_C( 0), INT64_C(-1380937485015239307), INT64_C(-3426685195142795196), INT64_C( 0)) }, { UINT8_C(232), simde_mm256_set_epi64x(INT64_C(-4833519388014243665), INT64_C( 2573974298093740422), INT64_C( 3628954985408843732), INT64_C(-4157981558961121913)), simde_mm256_set_epi64x(INT64_C(-4833519388014243665), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, { UINT8_C(158), simde_mm256_set_epi64x(INT64_C( 8860262502878217231), INT64_C(-7256652440967705311), INT64_C( 8973660985157671450), INT64_C(-1395962117275720873)), simde_mm256_set_epi64x(INT64_C( 8860262502878217231), INT64_C(-7256652440967705311), INT64_C( 8973660985157671450), INT64_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_maskz_mov_epi64(test_vec[i].k, test_vec[i].a); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_maskz_mov_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m256d a; simde__m256d r; } test_vec[8] = { { UINT8_C(156), simde_mm256_set_pd(SIMDE_FLOAT64_C( -797.63), SIMDE_FLOAT64_C( 550.96), SIMDE_FLOAT64_C( 215.70), SIMDE_FLOAT64_C( -51.73)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -797.63), SIMDE_FLOAT64_C( 550.96), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(232), simde_mm256_set_pd(SIMDE_FLOAT64_C( 603.95), SIMDE_FLOAT64_C( 89.69), SIMDE_FLOAT64_C( 726.92), SIMDE_FLOAT64_C( 286.27)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 603.95), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C( 7), simde_mm256_set_pd(SIMDE_FLOAT64_C( -753.25), SIMDE_FLOAT64_C( 973.27), SIMDE_FLOAT64_C( 154.94), SIMDE_FLOAT64_C( 621.42)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 973.27), SIMDE_FLOAT64_C( 154.94), SIMDE_FLOAT64_C( 621.42)) }, { UINT8_C( 98), simde_mm256_set_pd(SIMDE_FLOAT64_C( -339.86), SIMDE_FLOAT64_C( -506.40), SIMDE_FLOAT64_C( 409.52), SIMDE_FLOAT64_C( 202.83)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 409.52), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C( 85), simde_mm256_set_pd(SIMDE_FLOAT64_C( 205.42), SIMDE_FLOAT64_C( -996.69), SIMDE_FLOAT64_C( -560.92), SIMDE_FLOAT64_C( 347.34)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -996.69), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 347.34)) }, { UINT8_C(149), simde_mm256_set_pd(SIMDE_FLOAT64_C( 226.47), SIMDE_FLOAT64_C( 459.36), SIMDE_FLOAT64_C( 864.34), SIMDE_FLOAT64_C( -365.19)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 459.36), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -365.19)) }, { UINT8_C( 67), simde_mm256_set_pd(SIMDE_FLOAT64_C( -158.13), SIMDE_FLOAT64_C( -903.74), SIMDE_FLOAT64_C( 370.86), SIMDE_FLOAT64_C( -800.55)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 370.86), SIMDE_FLOAT64_C( -800.55)) }, { UINT8_C(168), simde_mm256_set_pd(SIMDE_FLOAT64_C( -868.95), SIMDE_FLOAT64_C( 674.80), SIMDE_FLOAT64_C( -866.19), SIMDE_FLOAT64_C( -917.43)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -868.95), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_maskz_mov_pd(test_vec[i].k, test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_maskz_mov_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m256 a; simde__m256 r; } test_vec[8] = { { UINT8_C(230), simde_mm256_set_ps(SIMDE_FLOAT32_C( -916.16), SIMDE_FLOAT32_C( -17.54), SIMDE_FLOAT32_C( 72.07), SIMDE_FLOAT32_C( 358.38), SIMDE_FLOAT32_C( -323.81), SIMDE_FLOAT32_C( -500.50), SIMDE_FLOAT32_C( -957.58), SIMDE_FLOAT32_C( 95.32)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -916.16), SIMDE_FLOAT32_C( -17.54), SIMDE_FLOAT32_C( 72.07), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -500.50), SIMDE_FLOAT32_C( -957.58), SIMDE_FLOAT32_C( 0.00)) }, { UINT8_C(248), simde_mm256_set_ps(SIMDE_FLOAT32_C( 820.20), SIMDE_FLOAT32_C( -882.62), SIMDE_FLOAT32_C( 245.98), SIMDE_FLOAT32_C( 520.70), SIMDE_FLOAT32_C( 947.17), SIMDE_FLOAT32_C( -801.95), SIMDE_FLOAT32_C( 523.33), SIMDE_FLOAT32_C( 88.74)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 820.20), SIMDE_FLOAT32_C( -882.62), SIMDE_FLOAT32_C( 245.98), SIMDE_FLOAT32_C( 520.70), SIMDE_FLOAT32_C( 947.17), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { UINT8_C( 91), simde_mm256_set_ps(SIMDE_FLOAT32_C( 382.59), SIMDE_FLOAT32_C( -104.90), SIMDE_FLOAT32_C( 437.21), SIMDE_FLOAT32_C( 669.80), SIMDE_FLOAT32_C( 475.78), SIMDE_FLOAT32_C( 291.58), SIMDE_FLOAT32_C( 932.63), SIMDE_FLOAT32_C( 75.72)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -104.90), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 669.80), SIMDE_FLOAT32_C( 475.78), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 932.63), SIMDE_FLOAT32_C( 75.72)) }, { UINT8_C( 28), simde_mm256_set_ps(SIMDE_FLOAT32_C( 325.29), SIMDE_FLOAT32_C( 66.25), SIMDE_FLOAT32_C( 309.27), SIMDE_FLOAT32_C( 48.25), SIMDE_FLOAT32_C( -685.79), SIMDE_FLOAT32_C( 793.84), SIMDE_FLOAT32_C( -42.51), SIMDE_FLOAT32_C( -431.02)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 48.25), SIMDE_FLOAT32_C( -685.79), SIMDE_FLOAT32_C( 793.84), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { UINT8_C( 95), simde_mm256_set_ps(SIMDE_FLOAT32_C( -193.68), SIMDE_FLOAT32_C( -614.23), SIMDE_FLOAT32_C( 420.74), SIMDE_FLOAT32_C( 824.23), SIMDE_FLOAT32_C( 818.32), SIMDE_FLOAT32_C( -457.30), SIMDE_FLOAT32_C( -144.19), SIMDE_FLOAT32_C( 78.38)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -614.23), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 824.23), SIMDE_FLOAT32_C( 818.32), SIMDE_FLOAT32_C( -457.30), SIMDE_FLOAT32_C( -144.19), SIMDE_FLOAT32_C( 78.38)) }, { UINT8_C(213), simde_mm256_set_ps(SIMDE_FLOAT32_C( -960.32), SIMDE_FLOAT32_C( -433.91), SIMDE_FLOAT32_C( 640.12), SIMDE_FLOAT32_C( 816.31), SIMDE_FLOAT32_C( -667.16), SIMDE_FLOAT32_C( -891.50), SIMDE_FLOAT32_C( 639.25), SIMDE_FLOAT32_C( 310.94)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -960.32), SIMDE_FLOAT32_C( -433.91), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 816.31), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -891.50), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 310.94)) }, { UINT8_C(210), simde_mm256_set_ps(SIMDE_FLOAT32_C( 20.36), SIMDE_FLOAT32_C( -24.88), SIMDE_FLOAT32_C( 118.89), SIMDE_FLOAT32_C( 166.69), SIMDE_FLOAT32_C( 470.98), SIMDE_FLOAT32_C( -195.06), SIMDE_FLOAT32_C( -643.26), SIMDE_FLOAT32_C( -611.78)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 20.36), SIMDE_FLOAT32_C( -24.88), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 166.69), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -643.26), SIMDE_FLOAT32_C( 0.00)) }, { UINT8_C(247), simde_mm256_set_ps(SIMDE_FLOAT32_C( 3.65), SIMDE_FLOAT32_C( -38.51), SIMDE_FLOAT32_C( -896.47), SIMDE_FLOAT32_C( 773.97), SIMDE_FLOAT32_C( -241.05), SIMDE_FLOAT32_C( -597.57), SIMDE_FLOAT32_C( 632.97), SIMDE_FLOAT32_C( -804.93)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 3.65), SIMDE_FLOAT32_C( -38.51), SIMDE_FLOAT32_C( -896.47), SIMDE_FLOAT32_C( 773.97), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -597.57), SIMDE_FLOAT32_C( 632.97), SIMDE_FLOAT32_C( -804.93)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_maskz_mov_ps(test_vec[i].k, test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_maskz_mov_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask64 k; simde__m512i a; simde__m512i r; } test_vec[8] = { { UINT64_C( 5922492609958636327), simde_mm512_set_epi8(INT8_C( -97), INT8_C( -47), INT8_C( -93), INT8_C( -97), INT8_C( 9), INT8_C( -55), INT8_C(-113), INT8_C( 98), INT8_C( -56), INT8_C( 10), INT8_C( 103), INT8_C( 84), INT8_C( 93), INT8_C( 24), INT8_C( -78), INT8_C( 35), INT8_C( 125), INT8_C( -63), INT8_C( 19), INT8_C( 4), INT8_C( 3), INT8_C( -87), INT8_C( 98), INT8_C(-113), INT8_C( 23), INT8_C(-124), INT8_C( -87), INT8_C( 63), INT8_C( 108), INT8_C( -18), INT8_C( -27), INT8_C(-127), INT8_C( -60), INT8_C( 60), INT8_C( -56), INT8_C( 3), INT8_C(-128), INT8_C( -62), INT8_C( 52), INT8_C( -74), INT8_C( -87), INT8_C( 32), INT8_C( 46), INT8_C(-128), INT8_C( 54), INT8_C( -19), INT8_C( 12), INT8_C( 22), INT8_C( -94), INT8_C( -84), INT8_C( -58), INT8_C( 92), INT8_C( -70), INT8_C( -25), INT8_C( 91), INT8_C( -45), INT8_C( 5), INT8_C( 109), INT8_C( -46), INT8_C( 37), INT8_C( 7), INT8_C( 44), INT8_C( 41), INT8_C(-106)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( -47), INT8_C( 0), INT8_C( -97), INT8_C( 0), INT8_C( 0), INT8_C(-113), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 103), INT8_C( 84), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 125), INT8_C( -63), INT8_C( 19), INT8_C( 0), INT8_C( 3), INT8_C( 0), INT8_C( 98), INT8_C(-113), INT8_C( 23), INT8_C( 0), INT8_C( -87), INT8_C( 0), INT8_C( 0), INT8_C( -18), INT8_C( -27), INT8_C( 0), INT8_C( 0), INT8_C( 60), INT8_C( -56), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( -74), INT8_C( 0), INT8_C( 32), INT8_C( 46), INT8_C(-128), INT8_C( 0), INT8_C( -19), INT8_C( 0), INT8_C( 22), INT8_C( -94), INT8_C( -84), INT8_C( -58), INT8_C( 0), INT8_C( -70), INT8_C( 0), INT8_C( 91), INT8_C( -45), INT8_C( 0), INT8_C( 0), INT8_C( -46), INT8_C( 0), INT8_C( 0), INT8_C( 44), INT8_C( 41), INT8_C(-106)) }, { UINT64_C( 8924247995799830534), simde_mm512_set_epi8(INT8_C( 20), INT8_C( 23), INT8_C( 120), INT8_C( -13), INT8_C( 82), INT8_C( 32), INT8_C( -44), INT8_C( -44), INT8_C( 43), INT8_C( -65), INT8_C( 47), INT8_C( 36), INT8_C(-101), INT8_C( 5), INT8_C( -76), INT8_C( -57), INT8_C( 77), INT8_C( 48), INT8_C( -46), INT8_C( -15), INT8_C( 78), INT8_C( 108), INT8_C( 114), INT8_C( 83), INT8_C( -72), INT8_C( 21), INT8_C( 100), INT8_C( 121), INT8_C( 29), INT8_C( -74), INT8_C( 81), INT8_C( -13), INT8_C( -57), INT8_C( -17), INT8_C( 20), INT8_C(-109), INT8_C( -87), INT8_C( 127), INT8_C( 92), INT8_C(-119), INT8_C( 26), INT8_C( 123), INT8_C( -51), INT8_C( 109), INT8_C( 30), INT8_C( -58), INT8_C(-117), INT8_C( 82), INT8_C( 111), INT8_C( -10), INT8_C( -10), INT8_C( -68), INT8_C( -4), INT8_C( -7), INT8_C( 117), INT8_C( 92), INT8_C( 94), INT8_C( -65), INT8_C( 109), INT8_C( 81), INT8_C( -71), INT8_C( -46), INT8_C( 113), INT8_C( 9)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 23), INT8_C( 120), INT8_C( -13), INT8_C( 82), INT8_C( 0), INT8_C( -44), INT8_C( -44), INT8_C( 43), INT8_C( -65), INT8_C( 0), INT8_C( 36), INT8_C(-101), INT8_C( 0), INT8_C( 0), INT8_C( -57), INT8_C( 0), INT8_C( 48), INT8_C( 0), INT8_C( 0), INT8_C( 78), INT8_C( 108), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 21), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -74), INT8_C( 0), INT8_C( 0), INT8_C( -57), INT8_C( -17), INT8_C( 20), INT8_C(-109), INT8_C( -87), INT8_C( 127), INT8_C( 0), INT8_C(-119), INT8_C( 0), INT8_C( 0), INT8_C( -51), INT8_C( 0), INT8_C( 0), INT8_C( -58), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -46), INT8_C( 113), INT8_C( 0)) }, { UINT64_C( 9822575649644588350), simde_mm512_set_epi8(INT8_C( 62), INT8_C( 34), INT8_C(-113), INT8_C( 121), INT8_C( -31), INT8_C(-111), INT8_C( 76), INT8_C(-113), INT8_C( 72), INT8_C( -37), INT8_C( 44), INT8_C( -67), INT8_C(-103), INT8_C( 31), INT8_C( 89), INT8_C( 120), INT8_C( -25), INT8_C(-127), INT8_C( 40), INT8_C( -10), INT8_C( 75), INT8_C(-123), INT8_C( 78), INT8_C( -2), INT8_C( -83), INT8_C( -74), INT8_C( -51), INT8_C( 46), INT8_C( 60), INT8_C( -39), INT8_C( 124), INT8_C(-117), INT8_C( 70), INT8_C( 66), INT8_C( -35), INT8_C( -51), INT8_C( -64), INT8_C( -61), INT8_C(-113), INT8_C( 2), INT8_C( -4), INT8_C( -72), INT8_C( 113), INT8_C( -63), INT8_C( -49), INT8_C( 70), INT8_C( -50), INT8_C( 52), INT8_C( 0), INT8_C( 13), INT8_C( 74), INT8_C( -60), INT8_C( 103), INT8_C( -7), INT8_C( -61), INT8_C( -37), INT8_C( -79), INT8_C( -77), INT8_C( -81), INT8_C( -83), INT8_C( 94), INT8_C( 52), INT8_C( -73), INT8_C( 76)), simde_mm512_set_epi8(INT8_C( 62), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -31), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -37), INT8_C( 0), INT8_C( -67), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -25), INT8_C(-127), INT8_C( 0), INT8_C( 0), INT8_C( 75), INT8_C(-123), INT8_C( 0), INT8_C( 0), INT8_C( -83), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 124), INT8_C( 0), INT8_C( 70), INT8_C( 66), INT8_C( 0), INT8_C( -51), INT8_C( -64), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -72), INT8_C( 113), INT8_C( -63), INT8_C( 0), INT8_C( 70), INT8_C( -50), INT8_C( 52), INT8_C( 0), INT8_C( 0), INT8_C( 74), INT8_C( 0), INT8_C( 103), INT8_C( -7), INT8_C( 0), INT8_C( -37), INT8_C( 0), INT8_C( 0), INT8_C( -81), INT8_C( -83), INT8_C( 94), INT8_C( 52), INT8_C( -73), INT8_C( 0)) }, { UINT64_C( 1775653069823307747), simde_mm512_set_epi8(INT8_C( -85), INT8_C( 90), INT8_C( 97), INT8_C( -38), INT8_C( -35), INT8_C( 6), INT8_C( 37), INT8_C( 106), INT8_C( 102), INT8_C( 109), INT8_C( 47), INT8_C( 29), INT8_C( -81), INT8_C(-113), INT8_C( -49), INT8_C( 18), INT8_C( -68), INT8_C( 121), INT8_C(-102), INT8_C( -30), INT8_C(-103), INT8_C( -31), INT8_C( 24), INT8_C( -55), INT8_C( -5), INT8_C( 8), INT8_C( -38), INT8_C( 37), INT8_C( 15), INT8_C(-120), INT8_C( 17), INT8_C( -63), INT8_C( 107), INT8_C( -41), INT8_C( -53), INT8_C(-107), INT8_C( 91), INT8_C( -9), INT8_C(-127), INT8_C( -39), INT8_C( 105), INT8_C( -27), INT8_C( 96), INT8_C( -96), INT8_C( 2), INT8_C( 44), INT8_C( 11), INT8_C( -43), INT8_C( -52), INT8_C( 126), INT8_C( 125), INT8_C( 121), INT8_C( 87), INT8_C( -95), INT8_C( 120), INT8_C( -46), INT8_C( 25), INT8_C( 71), INT8_C( 117), INT8_C( 47), INT8_C(-110), INT8_C( -87), INT8_C( -36), INT8_C( 25)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -38), INT8_C( -35), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 102), INT8_C( 0), INT8_C( 47), INT8_C( 0), INT8_C( 0), INT8_C(-113), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 121), INT8_C(-102), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 24), INT8_C( -55), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 15), INT8_C(-120), INT8_C( 17), INT8_C( -63), INT8_C( 0), INT8_C( 0), INT8_C( -53), INT8_C( 0), INT8_C( 0), INT8_C( -9), INT8_C(-127), INT8_C( -39), INT8_C( 0), INT8_C( 0), INT8_C( 96), INT8_C( 0), INT8_C( 0), INT8_C( 44), INT8_C( 11), INT8_C( 0), INT8_C( 0), INT8_C( 126), INT8_C( 125), INT8_C( 0), INT8_C( 87), INT8_C( -95), INT8_C( 120), INT8_C( -46), INT8_C( 25), INT8_C( 71), INT8_C( 117), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -36), INT8_C( 25)) }, { UINT64_C( 1039866445453672381), simde_mm512_set_epi8(INT8_C( -73), INT8_C( -64), INT8_C( -86), INT8_C( -65), INT8_C( 124), INT8_C(-109), INT8_C( 79), INT8_C(-111), INT8_C( 64), INT8_C( -98), INT8_C( -1), INT8_C( -43), INT8_C( -4), INT8_C( 72), INT8_C( 108), INT8_C( -95), INT8_C( -23), INT8_C( -29), INT8_C(-113), INT8_C( 47), INT8_C( 114), INT8_C(-122), INT8_C( -91), INT8_C( -20), INT8_C( 117), INT8_C( 0), INT8_C( -58), INT8_C( -82), INT8_C( -40), INT8_C( -36), INT8_C( -30), INT8_C( -56), INT8_C( -68), INT8_C( -93), INT8_C( 25), INT8_C( -68), INT8_C( 8), INT8_C( 64), INT8_C( -70), INT8_C( -19), INT8_C( -64), INT8_C( -54), INT8_C( 120), INT8_C( 61), INT8_C( -73), INT8_C( 47), INT8_C(-113), INT8_C( 68), INT8_C( -44), INT8_C( -96), INT8_C(-106), INT8_C( -68), INT8_C( 75), INT8_C( -42), INT8_C( 94), INT8_C( -68), INT8_C( -10), INT8_C( 41), INT8_C( -90), INT8_C(-110), INT8_C(-116), INT8_C( -51), INT8_C( -75), INT8_C( 102)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 124), INT8_C(-109), INT8_C( 79), INT8_C( 0), INT8_C( 0), INT8_C( -98), INT8_C( -1), INT8_C( 0), INT8_C( -4), INT8_C( 72), INT8_C( 108), INT8_C( 0), INT8_C( 0), INT8_C( -29), INT8_C( 0), INT8_C( 47), INT8_C( 114), INT8_C( 0), INT8_C( 0), INT8_C( -20), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -36), INT8_C( 0), INT8_C( -56), INT8_C( -68), INT8_C( -93), INT8_C( 0), INT8_C( 0), INT8_C( 8), INT8_C( 64), INT8_C( -70), INT8_C( -19), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 61), INT8_C( -73), INT8_C( 47), INT8_C( 0), INT8_C( 68), INT8_C( 0), INT8_C( 0), INT8_C(-106), INT8_C( -68), INT8_C( 75), INT8_C( -42), INT8_C( 94), INT8_C( -68), INT8_C( -10), INT8_C( 0), INT8_C( -90), INT8_C(-110), INT8_C(-116), INT8_C( -51), INT8_C( 0), INT8_C( 102)) }, { UINT64_C(16454496682655599925), simde_mm512_set_epi8(INT8_C( 41), INT8_C( -4), INT8_C( -19), INT8_C( 26), INT8_C(-127), INT8_C( -41), INT8_C( 14), INT8_C( 10), INT8_C( 63), INT8_C( 99), INT8_C( 51), INT8_C(-115), INT8_C( 118), INT8_C( -85), INT8_C(-111), INT8_C( 19), INT8_C( 43), INT8_C( -97), INT8_C( 107), INT8_C( 127), INT8_C(-100), INT8_C( 45), INT8_C( -77), INT8_C( 77), INT8_C( -53), INT8_C( -59), INT8_C( -6), INT8_C( -57), INT8_C( 97), INT8_C( 68), INT8_C( -67), INT8_C( 117), INT8_C( -92), INT8_C( -3), INT8_C( 2), INT8_C( 59), INT8_C( 53), INT8_C( -13), INT8_C( -31), INT8_C( 47), INT8_C( -33), INT8_C( 67), INT8_C( -43), INT8_C( -53), INT8_C( -52), INT8_C( -3), INT8_C( 85), INT8_C( 48), INT8_C( -45), INT8_C( -72), INT8_C( 96), INT8_C( 85), INT8_C( 81), INT8_C( 28), INT8_C( -50), INT8_C(-107), INT8_C( -56), INT8_C( -85), INT8_C( -83), INT8_C( -25), INT8_C( 78), INT8_C( 13), INT8_C( 41), INT8_C( 86)), simde_mm512_set_epi8(INT8_C( 41), INT8_C( -4), INT8_C( -19), INT8_C( 0), INT8_C( 0), INT8_C( -41), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 99), INT8_C( 0), INT8_C(-115), INT8_C( 118), INT8_C( 0), INT8_C(-111), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C(-100), INT8_C( 45), INT8_C( 0), INT8_C( 77), INT8_C( -53), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 97), INT8_C( 68), INT8_C( 0), INT8_C( 117), INT8_C( -92), INT8_C( 0), INT8_C( 0), INT8_C( 59), INT8_C( 53), INT8_C( -13), INT8_C( -31), INT8_C( 47), INT8_C( -33), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -3), INT8_C( 85), INT8_C( 48), INT8_C( -45), INT8_C( -72), INT8_C( 0), INT8_C( 0), INT8_C( 81), INT8_C( 28), INT8_C( 0), INT8_C(-107), INT8_C( 0), INT8_C( 0), INT8_C( -83), INT8_C( -25), INT8_C( 0), INT8_C( 13), INT8_C( 0), INT8_C( 86)) }, { UINT64_C( 6096298775549734718), simde_mm512_set_epi8(INT8_C( -35), INT8_C( -29), INT8_C( 89), INT8_C( 91), INT8_C( -53), INT8_C( -62), INT8_C( 107), INT8_C( -42), INT8_C(-115), INT8_C( 52), INT8_C( -17), INT8_C( 64), INT8_C(-105), INT8_C(-106), INT8_C( 65), INT8_C( 97), INT8_C( 85), INT8_C( 52), INT8_C( -17), INT8_C( 6), INT8_C( -73), INT8_C( 109), INT8_C( 99), INT8_C( 9), INT8_C( 43), INT8_C(-102), INT8_C( 112), INT8_C( 24), INT8_C(-121), INT8_C( 58), INT8_C( -73), INT8_C( -15), INT8_C( 37), INT8_C( 104), INT8_C( -81), INT8_C( 113), INT8_C( 31), INT8_C( -10), INT8_C( -32), INT8_C( -91), INT8_C( 51), INT8_C( -51), INT8_C( 60), INT8_C( 38), INT8_C( -1), INT8_C( -38), INT8_C( 2), INT8_C( 110), INT8_C( -61), INT8_C( 91), INT8_C( -50), INT8_C( 89), INT8_C( 27), INT8_C( -13), INT8_C( 111), INT8_C( -20), INT8_C( 51), INT8_C( -66), INT8_C( -26), INT8_C( 66), INT8_C( 45), INT8_C( -59), INT8_C( -45), INT8_C(-102)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( -29), INT8_C( 0), INT8_C( 91), INT8_C( 0), INT8_C( -62), INT8_C( 0), INT8_C( 0), INT8_C(-115), INT8_C( 0), INT8_C( 0), INT8_C( 64), INT8_C(-105), INT8_C( 0), INT8_C( 65), INT8_C( 0), INT8_C( 0), INT8_C( 52), INT8_C( -17), INT8_C( 0), INT8_C( 0), INT8_C( 109), INT8_C( 99), INT8_C( 9), INT8_C( 0), INT8_C(-102), INT8_C( 112), INT8_C( 0), INT8_C(-121), INT8_C( 58), INT8_C( -73), INT8_C( -15), INT8_C( 37), INT8_C( 104), INT8_C( 0), INT8_C( 113), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -91), INT8_C( 0), INT8_C( -51), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 2), INT8_C( 0), INT8_C( 0), INT8_C( 91), INT8_C( -50), INT8_C( 0), INT8_C( 27), INT8_C( -13), INT8_C( 111), INT8_C( -20), INT8_C( 0), INT8_C( 0), INT8_C( -26), INT8_C( 66), INT8_C( 45), INT8_C( -59), INT8_C( -45), INT8_C( 0)) }, { UINT64_C( 4696357732069093821), simde_mm512_set_epi8(INT8_C( 40), INT8_C( -99), INT8_C( 6), INT8_C( 105), INT8_C( 36), INT8_C( -85), INT8_C( 62), INT8_C( 102), INT8_C( 23), INT8_C(-110), INT8_C( -92), INT8_C( 59), INT8_C( 17), INT8_C( -54), INT8_C( 5), INT8_C( 81), INT8_C( -71), INT8_C( 68), INT8_C( 114), INT8_C( -60), INT8_C( 39), INT8_C( -49), INT8_C( -84), INT8_C( 114), INT8_C( -81), INT8_C( 122), INT8_C( 97), INT8_C( -16), INT8_C( 21), INT8_C( -76), INT8_C( -80), INT8_C( -61), INT8_C( -47), INT8_C( -86), INT8_C( 35), INT8_C(-110), INT8_C( 95), INT8_C( -9), INT8_C( 86), INT8_C( 9), INT8_C( 31), INT8_C( 48), INT8_C( 63), INT8_C( -6), INT8_C( -36), INT8_C( -47), INT8_C( 95), INT8_C( -20), INT8_C( 21), INT8_C( -9), INT8_C( -2), INT8_C( 26), INT8_C( 63), INT8_C( 36), INT8_C( -33), INT8_C( 58), INT8_C( -40), INT8_C( 106), INT8_C( 2), INT8_C( -51), INT8_C( -13), INT8_C( -76), INT8_C( -77), INT8_C( -77)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( -99), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 102), INT8_C( 0), INT8_C( 0), INT8_C( -92), INT8_C( 0), INT8_C( 17), INT8_C( -54), INT8_C( 0), INT8_C( 0), INT8_C( -71), INT8_C( 68), INT8_C( 0), INT8_C( -60), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 122), INT8_C( 97), INT8_C( -16), INT8_C( 21), INT8_C( 0), INT8_C( 0), INT8_C( -61), INT8_C( -47), INT8_C( 0), INT8_C( 0), INT8_C(-110), INT8_C( 0), INT8_C( -9), INT8_C( 86), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 63), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 95), INT8_C( -20), INT8_C( 0), INT8_C( 0), INT8_C( -2), INT8_C( 26), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 58), INT8_C( -40), INT8_C( 0), INT8_C( 2), INT8_C( -51), INT8_C( -13), INT8_C( -76), INT8_C( 0), INT8_C( -77)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_mov_epi8(test_vec[i].k, test_vec[i].a); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_mov_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask32 k; simde__m512i a; simde__m512i r; } test_vec[8] = { { UINT32_C(4000530422), simde_mm512_set_epi16(INT16_C( -5942), INT16_C( 25831), INT16_C(-28539), INT16_C( -1873), INT16_C(-13655), INT16_C( 26989), INT16_C( 16263), INT16_C( 13938), INT16_C( 10041), INT16_C( 23778), INT16_C(-21283), INT16_C( 18765), INT16_C(-14856), INT16_C( 31462), INT16_C(-19403), INT16_C( 32735), INT16_C( 7009), INT16_C( 10124), INT16_C(-25355), INT16_C(-21077), INT16_C( 1261), INT16_C( -9315), INT16_C(-20637), INT16_C( 25513), INT16_C( 5169), INT16_C( 28434), INT16_C(-28809), INT16_C( -8631), INT16_C(-18627), INT16_C( 25166), INT16_C( -8628), INT16_C( 28868)), simde_mm512_set_epi16(INT16_C( -5942), INT16_C( 25831), INT16_C(-28539), INT16_C( 0), INT16_C(-13655), INT16_C( 26989), INT16_C( 16263), INT16_C( 0), INT16_C( 0), INT16_C( 23778), INT16_C(-21283), INT16_C( 18765), INT16_C( 0), INT16_C( 0), INT16_C(-19403), INT16_C( 32735), INT16_C( 0), INT16_C( 0), INT16_C(-25355), INT16_C(-21077), INT16_C( 1261), INT16_C( -9315), INT16_C(-20637), INT16_C( 25513), INT16_C( 5169), INT16_C( 28434), INT16_C(-28809), INT16_C( -8631), INT16_C( 0), INT16_C( 25166), INT16_C( -8628), INT16_C( 0)) }, { UINT32_C(4070875154), simde_mm512_set_epi16(INT16_C(-12225), INT16_C( 21369), INT16_C( -119), INT16_C(-28694), INT16_C(-23457), INT16_C(-22727), INT16_C(-11767), INT16_C(-23853), INT16_C(-22479), INT16_C( 23784), INT16_C( -5275), INT16_C(-13228), INT16_C(-17789), INT16_C(-22944), INT16_C(-14595), INT16_C(-10966), INT16_C( -2247), INT16_C(-10276), INT16_C( 27089), INT16_C(-12303), INT16_C( 28587), INT16_C(-26891), INT16_C( 24467), INT16_C( 22569), INT16_C( 14745), INT16_C(-19983), INT16_C( 19001), INT16_C( 25844), INT16_C(-17171), INT16_C( -2706), INT16_C( -6907), INT16_C( 24391)), simde_mm512_set_epi16(INT16_C(-12225), INT16_C( 21369), INT16_C( -119), INT16_C(-28694), INT16_C( 0), INT16_C( 0), INT16_C(-11767), INT16_C( 0), INT16_C(-22479), INT16_C( 0), INT16_C( -5275), INT16_C( 0), INT16_C( 0), INT16_C(-22944), INT16_C( 0), INT16_C( 0), INT16_C( -2247), INT16_C( 0), INT16_C( 27089), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 25844), INT16_C( 0), INT16_C( 0), INT16_C( -6907), INT16_C( 0)) }, { UINT32_C(3446806878), simde_mm512_set_epi16(INT16_C( -8234), INT16_C( 29915), INT16_C(-15715), INT16_C( 6824), INT16_C( 15576), INT16_C( 19574), INT16_C( 28649), INT16_C( 3361), INT16_C(-14218), INT16_C( 3388), INT16_C( -7950), INT16_C( 14208), INT16_C(-11822), INT16_C(-15586), INT16_C( 22828), INT16_C(-12231), INT16_C( 1557), INT16_C( 15030), INT16_C(-21739), INT16_C( 9138), INT16_C(-18261), INT16_C( 26404), INT16_C(-17358), INT16_C( 811), INT16_C( -9806), INT16_C(-30299), INT16_C( 28809), INT16_C( 31831), INT16_C(-23257), INT16_C( 4576), INT16_C( -7556), INT16_C( 7253)), simde_mm512_set_epi16(INT16_C( -8234), INT16_C( 29915), INT16_C( 0), INT16_C( 0), INT16_C( 15576), INT16_C( 19574), INT16_C( 0), INT16_C( 3361), INT16_C( 0), INT16_C( 3388), INT16_C( -7950), INT16_C( 14208), INT16_C( 0), INT16_C( 0), INT16_C( 22828), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 9138), INT16_C(-18261), INT16_C( 0), INT16_C( 0), INT16_C( 811), INT16_C( 0), INT16_C(-30299), INT16_C( 0), INT16_C( 31831), INT16_C(-23257), INT16_C( 4576), INT16_C( -7556), INT16_C( 0)) }, { UINT32_C( 343900451), simde_mm512_set_epi16(INT16_C(-31095), INT16_C(-23377), INT16_C( -4662), INT16_C(-21413), INT16_C( 30429), INT16_C( 8769), INT16_C(-28068), INT16_C( 27084), INT16_C( 27030), INT16_C(-23477), INT16_C(-21313), INT16_C(-17124), INT16_C(-18222), INT16_C( 32522), INT16_C( 29282), INT16_C( 28924), INT16_C( 27441), INT16_C(-21554), INT16_C( -5444), INT16_C( 30253), INT16_C(-29783), INT16_C(-26663), INT16_C(-11174), INT16_C(-25779), INT16_C(-27773), INT16_C( 24626), INT16_C( -6955), INT16_C(-12302), INT16_C( 26319), INT16_C(-26837), INT16_C(-16192), INT16_C( 18933)), simde_mm512_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C(-21413), INT16_C( 0), INT16_C( 8769), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C(-23477), INT16_C(-21313), INT16_C(-17124), INT16_C(-18222), INT16_C( 32522), INT16_C( 29282), INT16_C( 28924), INT16_C( 27441), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C(-25779), INT16_C( 0), INT16_C( 0), INT16_C( -6955), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C(-16192), INT16_C( 18933)) }, { UINT32_C( 610198307), simde_mm512_set_epi16(INT16_C( 28784), INT16_C(-14670), INT16_C(-17599), INT16_C(-27901), INT16_C( 10638), INT16_C(-12065), INT16_C( 21050), INT16_C( 10287), INT16_C( 11470), INT16_C( -4598), INT16_C( -40), INT16_C( 28251), INT16_C(-10212), INT16_C(-29606), INT16_C( -6193), INT16_C( 2935), INT16_C(-16438), INT16_C( 971), INT16_C( 3225), INT16_C( 17346), INT16_C( 28916), INT16_C( 25171), INT16_C( 10807), INT16_C( 1473), INT16_C( 15813), INT16_C( 32427), INT16_C(-23468), INT16_C(-21533), INT16_C( 13263), INT16_C(-22199), INT16_C( 13682), INT16_C( 226)), simde_mm512_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C(-17599), INT16_C( 0), INT16_C( 0), INT16_C(-12065), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -4598), INT16_C( 0), INT16_C( 28251), INT16_C(-10212), INT16_C(-29606), INT16_C( -6193), INT16_C( 0), INT16_C(-16438), INT16_C( 971), INT16_C( 3225), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 10807), INT16_C( 1473), INT16_C( 0), INT16_C( 0), INT16_C(-23468), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 13682), INT16_C( 226)) }, { UINT32_C(1063632252), simde_mm512_set_epi16(INT16_C( -3533), INT16_C(-15311), INT16_C( -9164), INT16_C(-27075), INT16_C( 30377), INT16_C( 29218), INT16_C( -8851), INT16_C(-29072), INT16_C( 28941), INT16_C( -5458), INT16_C( 29621), INT16_C( 18538), INT16_C(-22601), INT16_C( 13017), INT16_C( 26323), INT16_C( 2952), INT16_C(-17536), INT16_C( 11831), INT16_C( 27487), INT16_C( 29413), INT16_C( 5506), INT16_C( -8406), INT16_C( 23534), INT16_C( 31484), INT16_C( 17532), INT16_C( 11364), INT16_C( 26550), INT16_C(-26724), INT16_C( 23828), INT16_C(-27226), INT16_C(-30955), INT16_C( 28791)), simde_mm512_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( -9164), INT16_C(-27075), INT16_C( 30377), INT16_C( 29218), INT16_C( -8851), INT16_C(-29072), INT16_C( 0), INT16_C( -5458), INT16_C( 29621), INT16_C( 0), INT16_C( 0), INT16_C( 13017), INT16_C( 0), INT16_C( 2952), INT16_C(-17536), INT16_C( 0), INT16_C( 27487), INT16_C( 29413), INT16_C( 5506), INT16_C( -8406), INT16_C( 0), INT16_C( 31484), INT16_C( 0), INT16_C( 11364), INT16_C( 26550), INT16_C(-26724), INT16_C( 23828), INT16_C(-27226), INT16_C( 0), INT16_C( 0)) }, { UINT32_C(2981066031), simde_mm512_set_epi16(INT16_C(-15776), INT16_C( 14598), INT16_C( -3252), INT16_C( 10125), INT16_C( 14481), INT16_C( 12166), INT16_C( 2171), INT16_C( 29452), INT16_C(-31285), INT16_C( 18516), INT16_C( 27776), INT16_C( 10973), INT16_C(-32618), INT16_C( -356), INT16_C( 12910), INT16_C( 2992), INT16_C( -3498), INT16_C( -2944), INT16_C(-21668), INT16_C( 2835), INT16_C(-13850), INT16_C(-21988), INT16_C( 9656), INT16_C( 32264), INT16_C( 12816), INT16_C( 21193), INT16_C(-25247), INT16_C( -7370), INT16_C( 5319), INT16_C( 5949), INT16_C( 9112), INT16_C( -1168)), simde_mm512_set_epi16(INT16_C(-15776), INT16_C( 0), INT16_C( -3252), INT16_C( 10125), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 29452), INT16_C(-31285), INT16_C( 0), INT16_C( 27776), INT16_C( 0), INT16_C(-32618), INT16_C( -356), INT16_C( 12910), INT16_C( 2992), INT16_C( 0), INT16_C( -2944), INT16_C(-21668), INT16_C( 2835), INT16_C( 0), INT16_C(-21988), INT16_C( 0), INT16_C( 32264), INT16_C( 0), INT16_C( 0), INT16_C(-25247), INT16_C( 0), INT16_C( 5319), INT16_C( 5949), INT16_C( 9112), INT16_C( -1168)) }, { UINT32_C( 623103106), simde_mm512_set_epi16(INT16_C( 26221), INT16_C( 1202), INT16_C(-22573), INT16_C( 25677), INT16_C( -9440), INT16_C( -3817), INT16_C(-15802), INT16_C( 26698), INT16_C( 26873), INT16_C( 4596), INT16_C(-15991), INT16_C( 14118), INT16_C( -7802), INT16_C( 10352), INT16_C( 27984), INT16_C( 1876), INT16_C( 14808), INT16_C(-10243), INT16_C( 2806), INT16_C( 5765), INT16_C(-26054), INT16_C( 23235), INT16_C(-10396), INT16_C(-11996), INT16_C(-32195), INT16_C(-16209), INT16_C(-27816), INT16_C(-28484), INT16_C(-29121), INT16_C( 7946), INT16_C( -1915), INT16_C( 9449)), simde_mm512_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C(-22573), INT16_C( 0), INT16_C( 0), INT16_C( -3817), INT16_C( 0), INT16_C( 26698), INT16_C( 0), INT16_C( 0), INT16_C(-15991), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 27984), INT16_C( 1876), INT16_C( 14808), INT16_C(-10243), INT16_C( 0), INT16_C( 0), INT16_C(-26054), INT16_C( 23235), INT16_C( 0), INT16_C( 0), INT16_C(-32195), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1915), INT16_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_mov_epi16(test_vec[i].k, test_vec[i].a); simde_assert_m512i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_mov_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m512i a; simde__m512i r; } test_vec[8] = { { UINT16_C(23562), simde_mm512_set_epi32(INT32_C( 413218138), INT32_C(-2056039012), INT32_C( 359898417), INT32_C( 503742711), INT32_C( -964140572), INT32_C( 1845540628), INT32_C( 1555270769), INT32_C( 276306907), INT32_C( 923961977), INT32_C( 2070870327), INT32_C( -106769082), INT32_C( 21505510), INT32_C(-1894191102), INT32_C( -61868066), INT32_C(-1022555483), INT32_C( 842262872)), simde_mm512_set_epi32(INT32_C( 0), INT32_C(-2056039012), INT32_C( 0), INT32_C( 503742711), INT32_C( -964140572), INT32_C( 1845540628), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C(-1894191102), INT32_C( 0), INT32_C(-1022555483), INT32_C( 0)) }, { UINT16_C(36203), simde_mm512_set_epi32(INT32_C(-1836353351), INT32_C(-1955161161), INT32_C( 1387065895), INT32_C( 829477081), INT32_C( 1194773762), INT32_C( 1305535140), INT32_C( 692999175), INT32_C(-1162293370), INT32_C( 1281198604), INT32_C( -270591140), INT32_C( 23870431), INT32_C(-1469107120), INT32_C( 1859513610), INT32_C(-1425966851), INT32_C( 381161214), INT32_C( 706499700)), simde_mm512_set_epi32(INT32_C(-1836353351), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1194773762), INT32_C( 1305535140), INT32_C( 0), INT32_C(-1162293370), INT32_C( 0), INT32_C( -270591140), INT32_C( 23870431), INT32_C( 0), INT32_C( 1859513610), INT32_C( 0), INT32_C( 381161214), INT32_C( 706499700)) }, { UINT16_C(61846), simde_mm512_set_epi32(INT32_C(-1731705333), INT32_C( 1975072423), INT32_C( -536413935), INT32_C( 1477835290), INT32_C( 1453154713), INT32_C( -133697355), INT32_C( 1038848393), INT32_C( 897042603), INT32_C( 1474696001), INT32_C( 1692444627), INT32_C(-1157569404), INT32_C(-1969459150), INT32_C( 120064093), INT32_C(-1121934893), INT32_C( 1895180026), INT32_C( 1628067999)), simde_mm512_set_epi32(INT32_C(-1731705333), INT32_C( 1975072423), INT32_C( -536413935), INT32_C( 1477835290), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 897042603), INT32_C( 1474696001), INT32_C( 0), INT32_C( 0), INT32_C(-1969459150), INT32_C( 0), INT32_C(-1121934893), INT32_C( 1895180026), INT32_C( 0)) }, { UINT16_C( 9005), simde_mm512_set_epi32(INT32_C( 317112464), INT32_C( 741023218), INT32_C(-1717304973), INT32_C( 1768422162), INT32_C(-1938535542), INT32_C( -593182598), INT32_C( -560734377), INT32_C(-1833964883), INT32_C(-2069017846), INT32_C( 1509337971), INT32_C(-1663080670), INT32_C( -363349477), INT32_C( -761414190), INT32_C( 1575734613), INT32_C( 758160476), INT32_C( 434110055)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C(-1717304973), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -560734377), INT32_C(-1833964883), INT32_C( 0), INT32_C( 0), INT32_C(-1663080670), INT32_C( 0), INT32_C( -761414190), INT32_C( 1575734613), INT32_C( 0), INT32_C( 434110055)) }, { UINT16_C(16381), simde_mm512_set_epi32(INT32_C(-1241873035), INT32_C(-1720080742), INT32_C( 1575508697), INT32_C( 644418481), INT32_C( -191348066), INT32_C( 1363259829), INT32_C( -969945370), INT32_C(-1662256156), INT32_C( -483657475), INT32_C( 1693775573), INT32_C( -588936550), INT32_C( -831491481), INT32_C(-1533494499), INT32_C( 690127328), INT32_C( 1408818770), INT32_C( 1154640340)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 1575508697), INT32_C( 644418481), INT32_C( -191348066), INT32_C( 1363259829), INT32_C( -969945370), INT32_C(-1662256156), INT32_C( -483657475), INT32_C( 1693775573), INT32_C( -588936550), INT32_C( -831491481), INT32_C(-1533494499), INT32_C( 690127328), INT32_C( 0), INT32_C( 1154640340)) }, { UINT16_C(37447), simde_mm512_set_epi32(INT32_C(-2077483324), INT32_C( -857673646), INT32_C( 754202712), INT32_C( 120435698), INT32_C(-1765652094), INT32_C( -229167588), INT32_C(-1388415734), INT32_C( -902383521), INT32_C(-1071136130), INT32_C( 575343777), INT32_C( 2007077268), INT32_C( -686416210), INT32_C( -979195146), INT32_C( -793664277), INT32_C( 1970531286), INT32_C( -266532300)), simde_mm512_set_epi32(INT32_C(-2077483324), INT32_C( 0), INT32_C( 0), INT32_C( 120435698), INT32_C( 0), INT32_C( 0), INT32_C(-1388415734), INT32_C( 0), INT32_C( 0), INT32_C( 575343777), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -793664277), INT32_C( 1970531286), INT32_C( -266532300)) }, { UINT16_C(31625), simde_mm512_set_epi32(INT32_C(-1558092593), INT32_C( 725220263), INT32_C( 2072028486), INT32_C(-1343089166), INT32_C( 151067474), INT32_C( 1411237194), INT32_C(-1069461255), INT32_C( 79796340), INT32_C( -81868792), INT32_C( -238630197), INT32_C(-1945013502), INT32_C( -908401887), INT32_C( 1836974186), INT32_C(-1548825981), INT32_C( 1873806111), INT32_C(-2038561806)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 725220263), INT32_C( 2072028486), INT32_C(-1343089166), INT32_C( 151067474), INT32_C( 0), INT32_C(-1069461255), INT32_C( 79796340), INT32_C( -81868792), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1836974186), INT32_C( 0), INT32_C( 0), INT32_C(-2038561806)) }, { UINT16_C(29945), simde_mm512_set_epi32(INT32_C( 1874437031), INT32_C( 1791346696), INT32_C( 1351362877), INT32_C( 1434624201), INT32_C( 1114612735), INT32_C(-2006574951), INT32_C( 1470768291), INT32_C( 1700301025), INT32_C( 677818674), INT32_C( -624147248), INT32_C( 795562156), INT32_C(-1625864242), INT32_C( -323693444), INT32_C( 242932397), INT32_C( 1315868789), INT32_C(-1134215759)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 1791346696), INT32_C( 1351362877), INT32_C( 1434624201), INT32_C( 0), INT32_C(-2006574951), INT32_C( 0), INT32_C( 0), INT32_C( 677818674), INT32_C( -624147248), INT32_C( 795562156), INT32_C(-1625864242), INT32_C( -323693444), INT32_C( 0), INT32_C( 0), INT32_C(-1134215759)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_mov_epi32(test_vec[i].k, test_vec[i].a); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_mov_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512i a; simde__m512i r; } test_vec[8] = { { UINT8_C(248), simde_mm512_set_epi64(INT64_C( 2197185227781835820), INT64_C( 15935016481556146), INT64_C(-7676897351944758395), INT64_C( -396609189869225788), INT64_C( 2033032872247713203), INT64_C( 196856286260699291), INT64_C(-5445071775966286746), INT64_C( 4145146436042188996)), simde_mm512_set_epi64(INT64_C( 2197185227781835820), INT64_C( 15935016481556146), INT64_C(-7676897351944758395), INT64_C( -396609189869225788), INT64_C( 2033032872247713203), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, { UINT8_C( 50), simde_mm512_set_epi64(INT64_C(-5159763787063667600), INT64_C( -806315631695634460), INT64_C( 8295852346035342936), INT64_C(-3045923053405968902), INT64_C( 8238548627246121972), INT64_C( 6711306137119169451), INT64_C( 8909631005256860390), INT64_C(-3863575863957815519)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( 8295852346035342936), INT64_C(-3045923053405968902), INT64_C( 0), INT64_C( 0), INT64_C( 8909631005256860390), INT64_C( 0)) }, { UINT8_C(205), simde_mm512_set_epi64(INT64_C( -364876834429138531), INT64_C(-8701118401174655403), INT64_C(-4225146583063624142), INT64_C( 1748175868453859972), INT64_C(-2146322958238101234), INT64_C( 5260281165058225920), INT64_C( 6150323032540537551), INT64_C( 7787131310828538951)), simde_mm512_set_epi64(INT64_C( -364876834429138531), INT64_C(-8701118401174655403), INT64_C( 0), INT64_C( 0), INT64_C(-2146322958238101234), INT64_C( 5260281165058225920), INT64_C( 0), INT64_C( 7787131310828538951)) }, { UINT8_C( 37), simde_mm512_set_epi64(INT64_C( 5789616018161199708), INT64_C( -189925922123546982), INT64_C(-5486743783379366456), INT64_C( 576479268129213490), INT64_C( 6799755442903924910), INT64_C( 8415809909668152758), INT64_C(-5111257061290341882), INT64_C( -197393302827860380)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C(-5486743783379366456), INT64_C( 0), INT64_C( 0), INT64_C( 8415809909668152758), INT64_C( 0), INT64_C( -197393302827860380)) }, { UINT8_C( 88), simde_mm512_set_epi64(INT64_C(-1554832219963971622), INT64_C(-7375448098764531208), INT64_C( 8161779997769921522), INT64_C( -561105360908971667), INT64_C(-3236710360814756666), INT64_C( 5084844885557932166), INT64_C(-5492461044876086653), INT64_C(-7792360489043648145)), simde_mm512_set_epi64(INT64_C( 0), INT64_C(-7375448098764531208), INT64_C( 0), INT64_C( -561105360908971667), INT64_C(-3236710360814756666), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, { UINT8_C( 37), simde_mm512_set_epi64(INT64_C( 6519579675176262812), INT64_C(-7940748567058253670), INT64_C( 6289445638826848684), INT64_C( 1300334437315413424), INT64_C(-2416059830887765317), INT64_C(-5031784341515283026), INT64_C( 8433369475758597766), INT64_C( 5881534320792150012)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( 6289445638826848684), INT64_C( 0), INT64_C( 0), INT64_C(-5031784341515283026), INT64_C( 0), INT64_C( 5881534320792150012)) }, { UINT8_C( 16), simde_mm512_set_epi64(INT64_C(-5091799924273173479), INT64_C( 326582266571623592), INT64_C(-3763964521506166714), INT64_C( 4584033432636860229), INT64_C(-1921935435734596553), INT64_C( 3382451871995350760), INT64_C( 531323325001255908), INT64_C(-5053470530677804405)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 4584033432636860229), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, { UINT8_C( 33), simde_mm512_set_epi64(INT64_C(-6532064902097701697), INT64_C(-2430912179372724686), INT64_C( 3177343060104491288), INT64_C(-7094318047719451166), INT64_C(-3484792886859817284), INT64_C( -117759466073012358), INT64_C( 8855057132598654557), INT64_C( -457984409854209760)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( 3177343060104491288), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( -457984409854209760)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_mov_epi64(test_vec[i].k, test_vec[i].a); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_mov_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512d a; simde__m512d r; } test_vec[8] = { { UINT8_C(198), simde_mm512_set_pd(SIMDE_FLOAT64_C( -717.73), SIMDE_FLOAT64_C( -238.83), SIMDE_FLOAT64_C( -181.88), SIMDE_FLOAT64_C( -183.39), SIMDE_FLOAT64_C( 840.23), SIMDE_FLOAT64_C( 345.87), SIMDE_FLOAT64_C( 630.37), SIMDE_FLOAT64_C( 306.75)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -717.73), SIMDE_FLOAT64_C( -238.83), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 345.87), SIMDE_FLOAT64_C( 630.37), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(246), simde_mm512_set_pd(SIMDE_FLOAT64_C( -518.56), SIMDE_FLOAT64_C( -830.99), SIMDE_FLOAT64_C( 129.34), SIMDE_FLOAT64_C( 771.89), SIMDE_FLOAT64_C( -815.64), SIMDE_FLOAT64_C( -128.60), SIMDE_FLOAT64_C( -244.79), SIMDE_FLOAT64_C( -568.94)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -518.56), SIMDE_FLOAT64_C( -830.99), SIMDE_FLOAT64_C( 129.34), SIMDE_FLOAT64_C( 771.89), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -128.60), SIMDE_FLOAT64_C( -244.79), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(141), simde_mm512_set_pd(SIMDE_FLOAT64_C( 637.67), SIMDE_FLOAT64_C( 322.55), SIMDE_FLOAT64_C( 578.22), SIMDE_FLOAT64_C( -961.29), SIMDE_FLOAT64_C( 737.15), SIMDE_FLOAT64_C( 475.09), SIMDE_FLOAT64_C( 178.14), SIMDE_FLOAT64_C( -60.04)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 637.67), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 737.15), SIMDE_FLOAT64_C( 475.09), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -60.04)) }, { UINT8_C(231), simde_mm512_set_pd(SIMDE_FLOAT64_C( 304.19), SIMDE_FLOAT64_C( 154.72), SIMDE_FLOAT64_C( 74.11), SIMDE_FLOAT64_C( -64.46), SIMDE_FLOAT64_C( 202.28), SIMDE_FLOAT64_C( -444.38), SIMDE_FLOAT64_C( 774.34), SIMDE_FLOAT64_C( 215.79)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 304.19), SIMDE_FLOAT64_C( 154.72), SIMDE_FLOAT64_C( 74.11), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -444.38), SIMDE_FLOAT64_C( 774.34), SIMDE_FLOAT64_C( 215.79)) }, { UINT8_C( 62), simde_mm512_set_pd(SIMDE_FLOAT64_C( -983.34), SIMDE_FLOAT64_C( 259.69), SIMDE_FLOAT64_C( 303.29), SIMDE_FLOAT64_C( -160.70), SIMDE_FLOAT64_C( -787.06), SIMDE_FLOAT64_C( 198.77), SIMDE_FLOAT64_C( -144.49), SIMDE_FLOAT64_C( 944.24)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 303.29), SIMDE_FLOAT64_C( -160.70), SIMDE_FLOAT64_C( -787.06), SIMDE_FLOAT64_C( 198.77), SIMDE_FLOAT64_C( -144.49), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(229), simde_mm512_set_pd(SIMDE_FLOAT64_C( -172.38), SIMDE_FLOAT64_C( 210.60), SIMDE_FLOAT64_C( 840.69), SIMDE_FLOAT64_C( 875.33), SIMDE_FLOAT64_C( 702.20), SIMDE_FLOAT64_C( -408.83), SIMDE_FLOAT64_C( 172.51), SIMDE_FLOAT64_C( 896.66)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -172.38), SIMDE_FLOAT64_C( 210.60), SIMDE_FLOAT64_C( 840.69), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -408.83), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 896.66)) }, { UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( -853.39), SIMDE_FLOAT64_C( 281.51), SIMDE_FLOAT64_C( -719.72), SIMDE_FLOAT64_C( 342.79), SIMDE_FLOAT64_C( -679.92), SIMDE_FLOAT64_C( -623.46), SIMDE_FLOAT64_C( 756.10), SIMDE_FLOAT64_C( -762.35)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 281.51), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 342.79), SIMDE_FLOAT64_C( -679.92), SIMDE_FLOAT64_C( -623.46), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -762.35)) }, { UINT8_C(156), simde_mm512_set_pd(SIMDE_FLOAT64_C( -853.45), SIMDE_FLOAT64_C( 527.42), SIMDE_FLOAT64_C( -111.28), SIMDE_FLOAT64_C( 996.35), SIMDE_FLOAT64_C( 374.30), SIMDE_FLOAT64_C( 314.59), SIMDE_FLOAT64_C( -739.54), SIMDE_FLOAT64_C( 477.55)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -853.45), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 996.35), SIMDE_FLOAT64_C( 374.30), SIMDE_FLOAT64_C( 314.59), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_maskz_mov_pd(test_vec[i].k, test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_maskz_mov_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m512 a; simde__m512 r; } test_vec[8] = { { UINT16_C(42363), simde_mm512_set_ps(SIMDE_FLOAT32_C( 27.87), SIMDE_FLOAT32_C( -816.11), SIMDE_FLOAT32_C( 100.70), SIMDE_FLOAT32_C( -687.21), SIMDE_FLOAT32_C( 641.77), SIMDE_FLOAT32_C( 431.46), SIMDE_FLOAT32_C( -432.41), SIMDE_FLOAT32_C( 128.97), SIMDE_FLOAT32_C( 877.42), SIMDE_FLOAT32_C( 723.11), SIMDE_FLOAT32_C( 773.77), SIMDE_FLOAT32_C( 562.67), SIMDE_FLOAT32_C( -364.27), SIMDE_FLOAT32_C( 912.16), SIMDE_FLOAT32_C( -872.01), SIMDE_FLOAT32_C( -172.46)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 27.87), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 100.70), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 431.46), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 128.97), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 723.11), SIMDE_FLOAT32_C( 773.77), SIMDE_FLOAT32_C( 562.67), SIMDE_FLOAT32_C( -364.27), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -872.01), SIMDE_FLOAT32_C( -172.46)) }, { UINT16_C(38549), simde_mm512_set_ps(SIMDE_FLOAT32_C( 618.21), SIMDE_FLOAT32_C( 498.90), SIMDE_FLOAT32_C( -849.91), SIMDE_FLOAT32_C( -52.66), SIMDE_FLOAT32_C( 545.34), SIMDE_FLOAT32_C( 794.02), SIMDE_FLOAT32_C( -461.31), SIMDE_FLOAT32_C( 114.20), SIMDE_FLOAT32_C( 86.28), SIMDE_FLOAT32_C( -885.12), SIMDE_FLOAT32_C( 172.95), SIMDE_FLOAT32_C( 554.47), SIMDE_FLOAT32_C( -747.12), SIMDE_FLOAT32_C( -745.25), SIMDE_FLOAT32_C( -281.94), SIMDE_FLOAT32_C( 206.58)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 618.21), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -52.66), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 794.02), SIMDE_FLOAT32_C( -461.31), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 86.28), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 554.47), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -745.25), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 206.58)) }, { UINT16_C(52704), simde_mm512_set_ps(SIMDE_FLOAT32_C( 502.41), SIMDE_FLOAT32_C( 880.11), SIMDE_FLOAT32_C( -557.95), SIMDE_FLOAT32_C( -268.94), SIMDE_FLOAT32_C( 733.29), SIMDE_FLOAT32_C( 706.04), SIMDE_FLOAT32_C( -93.63), SIMDE_FLOAT32_C( -582.14), SIMDE_FLOAT32_C( -836.38), SIMDE_FLOAT32_C( 744.38), SIMDE_FLOAT32_C( -45.29), SIMDE_FLOAT32_C( -703.39), SIMDE_FLOAT32_C( -540.13), SIMDE_FLOAT32_C( 467.24), SIMDE_FLOAT32_C( -527.36), SIMDE_FLOAT32_C( 198.48)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 502.41), SIMDE_FLOAT32_C( 880.11), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 733.29), SIMDE_FLOAT32_C( 706.04), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -582.14), SIMDE_FLOAT32_C( -836.38), SIMDE_FLOAT32_C( 744.38), SIMDE_FLOAT32_C( -45.29), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C(22254), simde_mm512_set_ps(SIMDE_FLOAT32_C( -71.03), SIMDE_FLOAT32_C( 476.98), SIMDE_FLOAT32_C( 846.87), SIMDE_FLOAT32_C( 538.39), SIMDE_FLOAT32_C( 819.31), SIMDE_FLOAT32_C( -703.74), SIMDE_FLOAT32_C( 35.79), SIMDE_FLOAT32_C( -913.43), SIMDE_FLOAT32_C( 774.49), SIMDE_FLOAT32_C( -248.35), SIMDE_FLOAT32_C( -966.82), SIMDE_FLOAT32_C( -517.72), SIMDE_FLOAT32_C( -427.16), SIMDE_FLOAT32_C( -808.81), SIMDE_FLOAT32_C( 888.05), SIMDE_FLOAT32_C( -556.04)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 476.98), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 538.39), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -703.74), SIMDE_FLOAT32_C( 35.79), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 774.49), SIMDE_FLOAT32_C( -248.35), SIMDE_FLOAT32_C( -966.82), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -427.16), SIMDE_FLOAT32_C( -808.81), SIMDE_FLOAT32_C( 888.05), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C(52364), simde_mm512_set_ps(SIMDE_FLOAT32_C( 995.54), SIMDE_FLOAT32_C( 221.44), SIMDE_FLOAT32_C( 899.46), SIMDE_FLOAT32_C( 449.06), SIMDE_FLOAT32_C( -950.30), SIMDE_FLOAT32_C( -151.76), SIMDE_FLOAT32_C( -841.60), SIMDE_FLOAT32_C( 17.37), SIMDE_FLOAT32_C( -167.30), SIMDE_FLOAT32_C( -256.21), SIMDE_FLOAT32_C( -735.57), SIMDE_FLOAT32_C( -164.68), SIMDE_FLOAT32_C( 752.38), SIMDE_FLOAT32_C( 507.77), SIMDE_FLOAT32_C( -277.52), SIMDE_FLOAT32_C( 4.35)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 995.54), SIMDE_FLOAT32_C( 221.44), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -950.30), SIMDE_FLOAT32_C( -151.76), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -167.30), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 752.38), SIMDE_FLOAT32_C( 507.77), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C( 1779), simde_mm512_set_ps(SIMDE_FLOAT32_C( -523.28), SIMDE_FLOAT32_C( -985.42), SIMDE_FLOAT32_C( 56.90), SIMDE_FLOAT32_C( 872.34), SIMDE_FLOAT32_C( -127.19), SIMDE_FLOAT32_C( 894.80), SIMDE_FLOAT32_C( 377.19), SIMDE_FLOAT32_C( -135.98), SIMDE_FLOAT32_C( 185.79), SIMDE_FLOAT32_C( 425.67), SIMDE_FLOAT32_C( -947.39), SIMDE_FLOAT32_C( -417.93), SIMDE_FLOAT32_C( 872.23), SIMDE_FLOAT32_C( 491.12), SIMDE_FLOAT32_C( 994.51), SIMDE_FLOAT32_C( 86.62)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 894.80), SIMDE_FLOAT32_C( 377.19), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 185.79), SIMDE_FLOAT32_C( 425.67), SIMDE_FLOAT32_C( -947.39), SIMDE_FLOAT32_C( -417.93), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 994.51), SIMDE_FLOAT32_C( 86.62)) }, { UINT16_C(13470), simde_mm512_set_ps(SIMDE_FLOAT32_C( 900.57), SIMDE_FLOAT32_C( 485.77), SIMDE_FLOAT32_C( 272.94), SIMDE_FLOAT32_C( -275.02), SIMDE_FLOAT32_C( -912.01), SIMDE_FLOAT32_C( -611.34), SIMDE_FLOAT32_C( 325.35), SIMDE_FLOAT32_C( -148.93), SIMDE_FLOAT32_C( -884.16), SIMDE_FLOAT32_C( 545.87), SIMDE_FLOAT32_C( -690.64), SIMDE_FLOAT32_C( 883.50), SIMDE_FLOAT32_C( -329.16), SIMDE_FLOAT32_C( -369.50), SIMDE_FLOAT32_C( 429.82), SIMDE_FLOAT32_C( 530.37)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 272.94), SIMDE_FLOAT32_C( -275.02), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -611.34), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -884.16), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 883.50), SIMDE_FLOAT32_C( -329.16), SIMDE_FLOAT32_C( -369.50), SIMDE_FLOAT32_C( 429.82), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C(25684), simde_mm512_set_ps(SIMDE_FLOAT32_C( 446.23), SIMDE_FLOAT32_C( -618.66), SIMDE_FLOAT32_C( -992.21), SIMDE_FLOAT32_C( -692.36), SIMDE_FLOAT32_C( -952.61), SIMDE_FLOAT32_C( 923.35), SIMDE_FLOAT32_C( -322.87), SIMDE_FLOAT32_C( 288.88), SIMDE_FLOAT32_C( 653.23), SIMDE_FLOAT32_C( -162.04), SIMDE_FLOAT32_C( 847.98), SIMDE_FLOAT32_C( -826.91), SIMDE_FLOAT32_C( -738.77), SIMDE_FLOAT32_C( 279.48), SIMDE_FLOAT32_C( 397.18), SIMDE_FLOAT32_C( 127.10)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -618.66), SIMDE_FLOAT32_C( -992.21), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 923.35), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -162.04), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -826.91), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 279.48), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_maskz_mov_ps(test_vec[i].k, test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_mov_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_mov_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_mov_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_mov_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_mov_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_mov_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_mov_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_mov_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_mov_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_mov_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_mov_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_mov_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_mov_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_mov_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_mov_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_mov_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_mov_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_mov_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_mov_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_mov_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_mov_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_mov_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_mov_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_mov_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_mov_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_mov_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_mov_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_mov_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_mov_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_mov_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_mov_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_mov_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_mov_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_mov_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_mov_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_mov_ps) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/mov_mask.c000066400000000000000000001401471400333146700172430ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #define SIMDE_TEST_X86_AVX512_INSN mov_mask #include #include static int test_simde_mm_movepi8_mask (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t a[16]; const simde__mmask16 r; } test_vec[] = { { { INT8_C( 83), -INT8_C( 56), INT8_C( 2), -INT8_C( 120), -INT8_C( 72), -INT8_C( 102), INT8_C( 37), INT8_C( 73), -INT8_C( 17), -INT8_C( 44), INT8_C( 52), -INT8_C( 102), -INT8_C( 118), INT8_C( 122), -INT8_C( 126), INT8_C( 106) }, UINT16_C(23354) }, { { INT8_C( 41), -INT8_C( 40), INT8_C( 58), INT8_C( 91), INT8_C( 1), -INT8_C( 24), -INT8_C( 78), INT8_C( 57), -INT8_C( 57), -INT8_C( 64), -INT8_C( 68), INT8_C( 88), INT8_C( 44), INT8_C( 115), -INT8_C( 97), INT8_MAX }, UINT16_C(18274) }, { { INT8_C( 59), -INT8_C( 94), INT8_C( 7), -INT8_C( 12), INT8_C( 60), INT8_C( 44), INT8_C( 61), INT8_C( 43), INT8_C( 0), INT8_C( 113), -INT8_C( 59), -INT8_C( 118), -INT8_C( 21), INT8_C( 72), -INT8_C( 11), INT8_C( 20) }, UINT16_C(23562) }, { { INT8_C( 32), INT8_C( 47), INT8_C( 111), INT8_C( 33), INT8_C( 24), INT8_C( 34), INT8_C( 90), -INT8_C( 33), -INT8_C( 30), INT8_C( 22), INT8_C( 55), INT8_C( 14), -INT8_C( 119), -INT8_C( 42), -INT8_C( 114), -INT8_C( 60) }, UINT16_C(61824) }, { { INT8_C( 120), -INT8_C( 107), -INT8_C( 72), -INT8_C( 76), -INT8_C( 62), -INT8_C( 11), -INT8_C( 33), -INT8_C( 62), INT8_C( 103), -INT8_C( 91), INT8_C( 77), INT8_C( 82), -INT8_C( 19), INT8_C( 66), INT8_C( 103), INT8_C( 13) }, UINT16_C( 4862) }, { { INT8_C( 113), -INT8_C( 42), INT8_C( 46), -INT8_C( 119), -INT8_C( 8), -INT8_C( 120), INT8_C( 104), -INT8_C( 38), -INT8_C( 98), -INT8_C( 97), -INT8_C( 23), INT8_C( 39), INT8_C( 118), INT8_C( 119), -INT8_C( 21), -INT8_C( 18) }, UINT16_C(51130) }, { { INT8_C( 12), -INT8_C( 92), -INT8_C( 93), -INT8_C( 50), -INT8_C( 103), -INT8_C( 126), -INT8_C( 111), INT8_C( 0), INT8_C( 39), -INT8_C( 34), INT8_C( 83), INT8_C( 20), INT8_C( 32), -INT8_C( 70), INT8_C( 33), -INT8_C( 111) }, UINT16_C(41598) }, { { -INT8_C( 112), INT8_C( 79), INT8_C( 27), -INT8_C( 119), -INT8_C( 41), -INT8_C( 125), INT8_C( 99), INT8_C( 117), INT8_C( 35), INT8_C( 76), -INT8_C( 100), -INT8_C( 103), -INT8_C( 61), -INT8_C( 120), -INT8_C( 121), -INT8_C( 48) }, UINT16_C(64569) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__mmask16 r = simde_mm_movepi8_mask(a); simde_assert_equal_mmask16(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i8x16(); simde__mmask16 r = simde_mm_movepi8_mask(a); simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_movepi16_mask (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[8]; const simde__mmask8 r; } test_vec[] = { { { -INT16_C( 5581), -INT16_C( 3782), INT16_C( 30703), INT16_C( 26376), INT16_C( 15403), INT16_C( 10089), -INT16_C( 29155), -INT16_C( 5741) }, UINT8_C(195) }, { { INT16_C( 28768), -INT16_C( 25349), -INT16_C( 19427), INT16_C( 32429), INT16_C( 31304), -INT16_C( 15469), -INT16_C( 487), INT16_C( 19546) }, UINT8_C(102) }, { { -INT16_C( 27416), -INT16_C( 10435), INT16_C( 17932), INT16_C( 14143), -INT16_C( 22398), -INT16_C( 24481), -INT16_C( 3530), -INT16_C( 26743) }, UINT8_C(243) }, { { -INT16_C( 31646), -INT16_C( 32717), -INT16_C( 8135), -INT16_C( 32258), -INT16_C( 28326), INT16_C( 29765), -INT16_C( 24689), INT16_C( 30656) }, UINT8_C( 95) }, { { -INT16_C( 460), INT16_C( 16463), -INT16_C( 29116), -INT16_C( 14729), -INT16_C( 10698), INT16_C( 27750), -INT16_C( 3896), INT16_C( 11011) }, UINT8_C( 93) }, { { INT16_C( 13940), -INT16_C( 21077), -INT16_C( 22250), INT16_C( 28975), INT16_C( 29754), -INT16_C( 13595), -INT16_C( 23277), INT16_C( 18241) }, UINT8_C(102) }, { { -INT16_C( 28509), -INT16_C( 6265), -INT16_C( 226), INT16_C( 21678), INT16_C( 5333), -INT16_C( 24895), -INT16_C( 15356), INT16_C( 31177) }, UINT8_C(103) }, { { INT16_C( 29947), INT16_C( 4390), INT16_C( 21789), INT16_C( 22402), INT16_C( 26569), -INT16_C( 8927), INT16_C( 25357), -INT16_C( 20444) }, UINT8_C(160) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__mmask8 r = simde_mm_movepi16_mask(a); simde_assert_equal_mmask8(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i16x8(); simde__mmask8 r = simde_mm_movepi16_mask(a); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_movepi32_mask (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int32_t a[4]; const simde__mmask8 r; } test_vec[] = { { { -INT32_C( 782608639), -INT32_C( 887178557), -INT32_C( 2094607728), -INT32_C( 1600281805) }, UINT8_C( 15) }, { { INT32_C( 1082937331), INT32_C( 699699582), -INT32_C( 1301359037), -INT32_C( 925751866) }, UINT8_C( 12) }, { { INT32_C( 1083780221), INT32_C( 1980479462), -INT32_C( 1124519543), -INT32_C( 950233132) }, UINT8_C( 12) }, { { INT32_C( 1678305766), -INT32_C( 1098007430), INT32_C( 1500576659), -INT32_C( 1641987551) }, UINT8_C( 10) }, { { INT32_C( 1423882862), -INT32_C( 70588046), -INT32_C( 273103846), INT32_C( 1102451803) }, UINT8_C( 6) }, { { INT32_C( 2024193789), INT32_C( 238433147), INT32_C( 1348970031), INT32_C( 1458473448) }, UINT8_C( 0) }, { { -INT32_C( 1247097789), -INT32_C( 810453835), -INT32_C( 1816237768), INT32_C( 2077586813) }, UINT8_C( 7) }, { { -INT32_C( 1359774925), -INT32_C( 574871122), -INT32_C( 1221778225), -INT32_C( 267576403) }, UINT8_C( 15) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__mmask8 r = simde_mm_movepi32_mask(a); simde_assert_equal_mmask8(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i32x4(); simde__mmask8 r = simde_mm_movepi32_mask(a); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_movepi64_mask (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int64_t a[2]; const simde__mmask8 r; } test_vec[] = { { { -INT64_C( 8985098879671146388), -INT64_C( 1774927821684719271) }, UINT8_C( 3) }, { { -INT64_C( 6374873338066020493), -INT64_C( 2178519056744984205) }, UINT8_C( 3) }, { { INT64_C( 1364798191684170817), INT64_C( 1905382897975129854) }, UINT8_C( 0) }, { { INT64_C( 4809595702362958239), -INT64_C( 6609924829744198930) }, UINT8_C( 2) }, { { INT64_C( 7913952024705741577), -INT64_C( 6613420582078202505) }, UINT8_C( 2) }, { { -INT64_C( 2710186300933353902), INT64_C( 5876496338450740358) }, UINT8_C( 1) }, { { INT64_C( 1590589390765399605), -INT64_C( 4157266161983812955) }, UINT8_C( 2) }, { { INT64_C( 864774168005511738), -INT64_C( 839496098544009793) }, UINT8_C( 2) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi64(test_vec[i].a); simde__mmask8 r = simde_mm_movepi64_mask(a); simde_assert_equal_mmask8(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i64x2(); simde__mmask8 r = simde_mm_movepi64_mask(a); simde_test_x86_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_movepi8_mask (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t a[32]; const simde__mmask32 r; } test_vec[] = { { { -INT8_C( 88), INT8_C( 21), INT8_C( 41), -INT8_C( 119), -INT8_C( 58), -INT8_C( 90), INT8_MIN, -INT8_C( 29), INT8_C( 74), -INT8_C( 83), -INT8_C( 42), INT8_C( 27), INT8_C( 59), -INT8_C( 53), -INT8_C( 76), -INT8_C( 12), INT8_C( 36), -INT8_C( 27), INT8_C( 54), INT8_C( 119), -INT8_C( 63), -INT8_C( 79), -INT8_C( 72), -INT8_C( 30), INT8_C( 35), -INT8_C( 62), INT8_C( 69), -INT8_C( 29), INT8_C( 72), INT8_C( 50), -INT8_C( 80), -INT8_C( 16) }, UINT32_C(3404916473) }, { { INT8_C( 71), -INT8_C( 39), INT8_C( 121), INT8_C( 13), INT8_MIN, -INT8_C( 7), -INT8_C( 16), -INT8_C( 54), -INT8_C( 90), -INT8_C( 57), -INT8_C( 26), -INT8_C( 30), -INT8_C( 110), -INT8_C( 102), -INT8_C( 42), -INT8_C( 74), INT8_MAX, INT8_C( 12), INT8_C( 46), INT8_C( 64), -INT8_C( 67), -INT8_C( 26), INT8_C( 34), -INT8_C( 31), -INT8_C( 88), INT8_C( 104), -INT8_C( 60), -INT8_C( 16), -INT8_C( 102), INT8_C( 116), -INT8_C( 32), -INT8_C( 31) }, UINT32_C(3719364594) }, { { INT8_C( 77), INT8_C( 89), -INT8_C( 17), -INT8_C( 51), INT8_C( 82), -INT8_C( 33), -INT8_C( 104), -INT8_C( 8), -INT8_C( 90), INT8_C( 126), -INT8_C( 38), INT8_C( 57), INT8_C( 24), -INT8_C( 80), -INT8_C( 17), -INT8_C( 105), -INT8_C( 68), INT8_C( 29), -INT8_C( 40), INT8_C( 122), INT8_C( 3), -INT8_C( 6), INT8_C( 91), -INT8_C( 85), INT8_C( 98), INT8_C( 31), -INT8_C( 101), -INT8_C( 4), -INT8_C( 109), INT8_C( 123), -INT8_C( 34), -INT8_C( 32) }, UINT32_C(3701859820) }, { { -INT8_C( 44), -INT8_C( 51), -INT8_C( 82), INT8_C( 38), -INT8_C( 84), INT8_C( 70), INT8_C( 31), INT8_C( 83), -INT8_C( 60), -INT8_C( 7), -INT8_C( 116), -INT8_C( 36), -INT8_C( 86), INT8_C( 123), INT8_C( 115), INT8_C( 102), -INT8_C( 103), INT8_C( 75), -INT8_C( 32), -INT8_C( 100), INT8_C( 70), INT8_C( 59), INT8_C( 72), -INT8_C( 88), INT8_C( 90), -INT8_C( 29), -INT8_C( 91), -INT8_C( 19), INT8_C( 95), -INT8_C( 125), -INT8_C( 50), INT8_C( 51) }, UINT32_C(1854742295) }, { { INT8_C( 80), INT8_C( 124), INT8_C( 90), -INT8_C( 4), -INT8_C( 62), INT8_C( 121), INT8_C( 79), -INT8_C( 122), INT8_C( 114), -INT8_C( 37), INT8_C( 98), INT8_C( 28), INT8_C( 87), -INT8_C( 43), -INT8_C( 125), -INT8_C( 16), INT8_C( 33), INT8_C( 99), -INT8_C( 116), INT8_C( 103), -INT8_C( 97), -INT8_C( 44), INT8_C( 15), -INT8_C( 7), -INT8_C( 72), -INT8_C( 76), -INT8_C( 25), INT8_C( 23), INT8_C( 55), -INT8_C( 75), INT8_C( 74), -INT8_C( 121) }, UINT32_C(2813649560) }, { { INT8_C( 49), -INT8_C( 92), -INT8_C( 124), -INT8_C( 13), INT8_C( 29), -INT8_C( 45), INT8_C( 121), -INT8_C( 112), -INT8_C( 81), -INT8_C( 37), -INT8_C( 84), INT8_C( 6), -INT8_C( 80), INT8_C( 47), -INT8_C( 10), -INT8_C( 47), -INT8_C( 109), -INT8_C( 126), INT8_C( 56), INT8_C( 50), INT8_C( 87), INT8_C( 72), INT8_C( 43), INT8_C( 15), -INT8_C( 4), INT8_C( 18), INT8_C( 38), INT8_C( 52), -INT8_C( 57), INT8_C( 112), -INT8_C( 69), -INT8_C( 8) }, UINT32_C(3506689966) }, { { INT8_C( 21), INT8_C( 63), -INT8_C( 21), INT8_C( 50), INT8_C( 19), INT8_C( 100), -INT8_C( 62), -INT8_C( 62), INT8_C( 63), INT8_C( 111), -INT8_C( 56), -INT8_C( 16), -INT8_C( 98), -INT8_C( 66), -INT8_C( 63), INT8_C( 49), INT8_C( 64), -INT8_C( 6), INT8_C( 99), -INT8_C( 105), INT8_C( 66), -INT8_C( 113), -INT8_C( 90), INT8_C( 62), -INT8_C( 95), -INT8_C( 52), INT8_C( 114), INT8_C( 105), INT8_C( 61), INT8_C( 46), INT8_C( 97), INT8_C( 82) }, UINT32_C( 57310404) }, { { INT8_C( 109), INT8_C( 77), -INT8_C( 124), INT8_MIN, -INT8_C( 79), INT8_C( 71), INT8_C( 66), -INT8_C( 15), -INT8_C( 74), INT8_C( 10), -INT8_C( 31), INT8_C( 84), -INT8_C( 56), -INT8_C( 94), -INT8_C( 122), INT8_C( 9), -INT8_C( 100), -INT8_C( 23), -INT8_C( 96), -INT8_C( 34), INT8_C( 120), INT8_C( 71), INT8_C( 29), INT8_C( 26), INT8_C( 19), -INT8_C( 113), -INT8_C( 125), INT8_C( 80), -INT8_C( 67), -INT8_C( 28), -INT8_C( 94), INT8_C( 43) }, UINT32_C(1980724636) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_mm256_loadu_epi8(test_vec[i].a); simde__mmask32 r = simde_mm256_movepi8_mask(a); simde_assert_equal_mmask32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i a = simde_test_x86_random_i8x32(); simde__mmask32 r = simde_mm256_movepi8_mask(a); simde_test_x86_write_i8x32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_movepi16_mask (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[16]; const simde__mmask16 r; } test_vec[] = { { { INT16_C( 23942), -INT16_C( 13964), -INT16_C( 15829), -INT16_C( 4752), -INT16_C( 32593), -INT16_C( 16422), INT16_C( 6984), INT16_C( 21147), INT16_C( 6830), -INT16_C( 8372), INT16_C( 7384), INT16_C( 13776), -INT16_C( 28383), -INT16_C( 1156), INT16_C( 6875), INT16_C( 25327) }, UINT16_C(12862) }, { { INT16_C( 25463), -INT16_C( 24021), -INT16_C( 25818), -INT16_C( 10864), INT16_C( 27164), INT16_C( 25748), INT16_C( 12421), INT16_C( 13238), INT16_C( 586), INT16_C( 8722), -INT16_C( 7394), INT16_C( 16216), -INT16_C( 11148), INT16_C( 20283), INT16_C( 10990), INT16_C( 26289) }, UINT16_C( 5134) }, { { -INT16_C( 9075), -INT16_C( 19704), -INT16_C( 26504), -INT16_C( 27511), INT16_C( 7427), -INT16_C( 30472), -INT16_C( 20915), -INT16_C( 26692), -INT16_C( 12624), -INT16_C( 12614), INT16_C( 4785), INT16_C( 9485), INT16_C( 18662), -INT16_C( 11147), INT16_C( 9842), INT16_C( 58) }, UINT16_C( 9199) }, { { INT16_C( 17155), INT16_C( 31667), INT16_C( 15579), -INT16_C( 8689), INT16_C( 1882), -INT16_C( 22681), INT16_C( 9141), INT16_C( 25919), -INT16_C( 1551), -INT16_C( 23757), INT16_C( 16395), -INT16_C( 3640), INT16_C( 15753), -INT16_C( 1083), INT16_C( 100), INT16_C( 26619) }, UINT16_C(11048) }, { { -INT16_C( 20669), INT16_C( 7906), -INT16_C( 3605), INT16_C( 17917), INT16_C( 25848), -INT16_C( 21011), INT16_C( 11399), INT16_C( 30738), INT16_C( 17701), INT16_C( 12315), -INT16_C( 7035), INT16_C( 3617), -INT16_C( 6623), -INT16_C( 31478), INT16_C( 1510), INT16_C( 10732) }, UINT16_C(13349) }, { { -INT16_C( 12620), -INT16_C( 24504), INT16_C( 17855), -INT16_C( 18459), -INT16_C( 11607), INT16_C( 12388), INT16_C( 30462), INT16_C( 9128), -INT16_C( 15173), INT16_C( 16723), INT16_C( 29864), -INT16_C( 14001), INT16_C( 22875), INT16_C( 16719), INT16_C( 15199), INT16_C( 4971) }, UINT16_C( 2331) }, { { -INT16_C( 19702), -INT16_C( 13901), -INT16_C( 26120), -INT16_C( 24191), -INT16_C( 6805), INT16_C( 27345), INT16_C( 31068), INT16_C( 6029), -INT16_C( 7875), -INT16_C( 6824), -INT16_C( 22443), -INT16_C( 20305), -INT16_C( 511), INT16_C( 24818), INT16_C( 23865), INT16_C( 17268) }, UINT16_C( 7967) }, { { INT16_C( 10000), INT16_C( 2061), -INT16_C( 28992), INT16_C( 11433), INT16_C( 31347), -INT16_C( 12394), INT16_C( 9203), INT16_C( 12775), INT16_C( 16132), INT16_C( 23062), -INT16_C( 14873), -INT16_C( 5878), -INT16_C( 829), -INT16_C( 695), -INT16_C( 17063), INT16_C( 26944) }, UINT16_C(31780) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_mm256_loadu_epi16(test_vec[i].a); simde__mmask16 r = simde_mm256_movepi16_mask(a); simde_assert_equal_mmask16(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i a = simde_test_x86_random_i16x16(); simde__mmask16 r = simde_mm256_movepi16_mask(a); simde_test_x86_write_i16x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_movepi32_mask (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int32_t a[8]; const simde__mmask8 r; } test_vec[] = { { { -INT32_C( 2142769029), -INT32_C( 1306321235), INT32_C( 307118215), -INT32_C( 1222795281), INT32_C( 1900491781), INT32_C( 966748555), -INT32_C( 47661312), -INT32_C( 591974047) }, UINT8_C(203) }, { { -INT32_C( 832766431), -INT32_C( 1535082723), -INT32_C( 1346908224), INT32_C( 1952896110), -INT32_C( 1813664504), INT32_C( 399279127), -INT32_C( 1542130621), INT32_C( 1199623205) }, UINT8_C( 87) }, { { -INT32_C( 417997366), INT32_C( 478975580), -INT32_C( 741653659), INT32_C( 524759319), -INT32_C( 156095009), -INT32_C( 183664975), -INT32_C( 1751571854), -INT32_C( 1193403666) }, UINT8_C(245) }, { { INT32_C( 1403057399), -INT32_C( 277861238), -INT32_C( 2034091153), INT32_C( 1285884525), -INT32_C( 398305481), INT32_C( 1205686229), INT32_C( 1608415089), -INT32_C( 2011644527) }, UINT8_C(150) }, { { INT32_C( 1004255409), INT32_C( 1395280868), -INT32_C( 187044729), INT32_C( 759201526), -INT32_C( 1441365291), INT32_C( 1139930065), -INT32_C( 73216150), INT32_C( 1032043148) }, UINT8_C( 84) }, { { INT32_C( 1450729330), INT32_C( 833200810), -INT32_C( 2061139313), -INT32_C( 709663488), -INT32_C( 1182807577), INT32_C( 670855356), -INT32_C( 870146496), -INT32_C( 888494503) }, UINT8_C(220) }, { { -INT32_C( 1356692987), -INT32_C( 1260270811), INT32_C( 1312360014), INT32_C( 1394928748), INT32_C( 1913430965), INT32_C( 1419315220), INT32_C( 2145191), INT32_C( 1724590689) }, UINT8_C( 3) }, { { -INT32_C( 770249299), INT32_C( 126285753), INT32_C( 1767227389), INT32_C( 1639807404), INT32_C( 835963165), INT32_C( 2038787282), -INT32_C( 1988516568), INT32_C( 2112898256) }, UINT8_C( 65) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_mm256_loadu_epi32(test_vec[i].a); simde__mmask8 r = simde_mm256_movepi32_mask(a); simde_assert_equal_mmask8(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i a = simde_test_x86_random_i32x8(); simde__mmask8 r = simde_mm256_movepi32_mask(a); simde_test_x86_write_i32x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_movepi64_mask (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int64_t a[4]; const simde__mmask8 r; } test_vec[] = { { { -INT64_C( 1621470838541238162), -INT64_C( 2020267518521858509), -INT64_C( 140451368858056884), -INT64_C( 2778531396810572735) }, UINT8_C( 15) }, { { INT64_C( 2067811840525109484), -INT64_C( 3583349620661817091), -INT64_C( 8493721512905259799), -INT64_C( 8179256404884720071) }, UINT8_C( 14) }, { { -INT64_C( 6197223924085336582), -INT64_C( 5383681940865127687), -INT64_C( 1123093167729051242), -INT64_C( 3017955061275530514) }, UINT8_C( 15) }, { { INT64_C( 8699749956815779174), -INT64_C( 5790437138794035422), INT64_C( 7087530517990374581), INT64_C( 7355240209651527521) }, UINT8_C( 2) }, { { -INT64_C( 6378165230648111994), INT64_C( 6972481154558881787), -INT64_C( 6157503399613148746), INT64_C( 1548951913861971479) }, UINT8_C( 5) }, { { INT64_C( 2806752101191513799), INT64_C( 8796091663676770565), INT64_C( 2036243881630424399), -INT64_C( 1417594688471094865) }, UINT8_C( 8) }, { { INT64_C( 646313841939475880), -INT64_C( 9155313154145045580), INT64_C( 8559472380443399848), INT64_C( 6238620930062810500) }, UINT8_C( 2) }, { { -INT64_C( 6088547874471555979), -INT64_C( 5907011847308756396), -INT64_C( 150829383993844054), INT64_C( 1956597046960299298) }, UINT8_C( 7) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_mm256_loadu_epi64(test_vec[i].a); simde__mmask8 r = simde_mm256_movepi64_mask(a); simde_assert_equal_mmask8(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i a = simde_test_x86_random_i64x4(); simde__mmask8 r = simde_mm256_movepi64_mask(a); simde_test_x86_write_i64x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_movepi8_mask(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__mmask64 r; } test_vec[8] = { { simde_mm512_set_epi8(INT8_C( 75), INT8_C( 84), INT8_C( -79), INT8_C( 113), INT8_C( -44), INT8_C( 119), INT8_C( -99), INT8_C( -4), INT8_C( 89), INT8_C(-108), INT8_C( -20), INT8_C( 38), INT8_C( 62), INT8_C(-115), INT8_C( -29), INT8_C( -6), INT8_C( 109), INT8_C( 49), INT8_C( 126), INT8_C( 76), INT8_C( 52), INT8_C( -99), INT8_C( 65), INT8_C( -6), INT8_C( -62), INT8_C( -30), INT8_C( 123), INT8_C(-120), INT8_C( 125), INT8_C( -67), INT8_C( 19), INT8_C( -96), INT8_C( -23), INT8_C(-114), INT8_C( -85), INT8_C( 24), INT8_C( -96), INT8_C( 27), INT8_C( 107), INT8_C( -48), INT8_C( 68), INT8_C( 79), INT8_C(-118), INT8_C( 69), INT8_C(-100), INT8_C( 45), INT8_C(-127), INT8_C( -25), INT8_C( 9), INT8_C( -99), INT8_C( -49), INT8_C( 57), INT8_C( -33), INT8_C( -74), INT8_C( 36), INT8_C( -16), INT8_C( 9), INT8_C(-119), INT8_C( -47), INT8_C( -40), INT8_C( 1), INT8_C(-104), INT8_C( -83), INT8_C( -86)), UINT64_C( 3127474882530209143) }, { simde_mm512_set_epi8(INT8_C( -83), INT8_C( 52), INT8_C(-122), INT8_C( -1), INT8_C( -37), INT8_C( -46), INT8_C( -20), INT8_C( -41), INT8_C( -29), INT8_C( -98), INT8_C( -38), INT8_C( 119), INT8_C( -30), INT8_C( -12), INT8_C( 38), INT8_C( 8), INT8_C( -90), INT8_C( -98), INT8_C( 69), INT8_C( 44), INT8_C( -98), INT8_C( 64), INT8_C(-113), INT8_C( 32), INT8_C( -82), INT8_C( 31), INT8_C( -61), INT8_C( 74), INT8_C( 55), INT8_C( 19), INT8_C( -15), INT8_C( -10), INT8_C( 24), INT8_C( 71), INT8_C( -97), INT8_C( -89), INT8_C(-113), INT8_C( -22), INT8_C( 7), INT8_C( -79), INT8_C( 106), INT8_C( -5), INT8_C( 74), INT8_C( -28), INT8_C(-121), INT8_C( -90), INT8_C( -60), INT8_C( -53), INT8_C( 0), INT8_C(-112), INT8_C( -95), INT8_C( 115), INT8_C(-101), INT8_C( 14), INT8_C(-105), INT8_C( -35), INT8_C(-121), INT8_C( -9), INT8_C( 105), INT8_C( 14), INT8_C( 36), INT8_C( 103), INT8_C( -76), INT8_C( 42)), UINT64_C(13829651358206094274) }, { simde_mm512_set_epi8(INT8_C( -71), INT8_C( -56), INT8_C( -76), INT8_C( 52), INT8_C( 20), INT8_C(-117), INT8_C( -18), INT8_C(-112), INT8_C( 87), INT8_C(-104), INT8_C( 127), INT8_C( 76), INT8_C( 77), INT8_C( 75), INT8_C( -19), INT8_C( -45), INT8_C( -26), INT8_C( -13), INT8_C( 75), INT8_C( -95), INT8_C( -9), INT8_C( 77), INT8_C( -48), INT8_C( -15), INT8_C( -67), INT8_C( 74), INT8_C(-118), INT8_C( -27), INT8_C( 55), INT8_C( -43), INT8_C( 66), INT8_C( 14), INT8_C( -1), INT8_C(-106), INT8_C(-103), INT8_C( 31), INT8_C( 121), INT8_C( -29), INT8_C(-102), INT8_C( -49), INT8_C( 83), INT8_C( 107), INT8_C( 108), INT8_C( 3), INT8_C( 22), INT8_C( 3), INT8_C( -29), INT8_C( -27), INT8_C( 120), INT8_C( -58), INT8_C( 125), INT8_C( 0), INT8_C( -9), INT8_C( -3), INT8_C( 15), INT8_C( -66), INT8_C( -51), INT8_C( 101), INT8_C( 34), INT8_C( 126), INT8_C(-112), INT8_C(-122), INT8_C( 95), INT8_C( 48)), UINT64_C(16664404616217316748) }, { simde_mm512_set_epi8(INT8_C(-116), INT8_C(-100), INT8_C( -34), INT8_C(-123), INT8_C( -47), INT8_C( -44), INT8_C( 113), INT8_C( -91), INT8_C( 58), INT8_C( -81), INT8_C( -77), INT8_C( -24), INT8_C( 21), INT8_C( -62), INT8_C(-107), INT8_C( 106), INT8_C(-106), INT8_C( 105), INT8_C( 13), INT8_C(-125), INT8_C( -31), INT8_C( 28), INT8_C( -32), INT8_C( -10), INT8_C( 15), INT8_C( 23), INT8_C( 39), INT8_C( 16), INT8_C( 122), INT8_C( 67), INT8_C( -7), INT8_C( 68), INT8_C( 116), INT8_C( -71), INT8_C( -82), INT8_C( -91), INT8_C( -59), INT8_C( -18), INT8_C(-128), INT8_C( 16), INT8_C( -63), INT8_C( 127), INT8_C( -98), INT8_C( 94), INT8_C( -2), INT8_C( -80), INT8_C( -35), INT8_C( -93), INT8_C( -5), INT8_C( 3), INT8_C( 126), INT8_C( 13), INT8_C( -63), INT8_C( 43), INT8_C( 49), INT8_C( 77), INT8_C( 30), INT8_C( 66), INT8_C( 35), INT8_C(-127), INT8_C( 78), INT8_C( 40), INT8_C( 5), INT8_C( 66)), UINT64_C(18263955773865297936) }, { simde_mm512_set_epi8(INT8_C( -92), INT8_C( -13), INT8_C( 48), INT8_C( 12), INT8_C( 95), INT8_C( 98), INT8_C( 122), INT8_C( 111), INT8_C( 27), INT8_C( -60), INT8_C( -86), INT8_C( -36), INT8_C( 67), INT8_C( 45), INT8_C( 28), INT8_C( 89), INT8_C( 36), INT8_C( -13), INT8_C( -96), INT8_C( 48), INT8_C( 88), INT8_C( 9), INT8_C( -73), INT8_C( 80), INT8_C( -52), INT8_C(-107), INT8_C( -78), INT8_C( 89), INT8_C(-100), INT8_C( -11), INT8_C( 124), INT8_C( 50), INT8_C( 52), INT8_C( 14), INT8_C( 84), INT8_C( 75), INT8_C( -26), INT8_C( 56), INT8_C( -88), INT8_C( 97), INT8_C( 48), INT8_C( 37), INT8_C( 28), INT8_C( 73), INT8_C( 20), INT8_C( 113), INT8_C( -11), INT8_C(-118), INT8_C( 118), INT8_C( -13), INT8_C( 119), INT8_C( 10), INT8_C( 113), INT8_C( 86), INT8_C( -63), INT8_C( 67), INT8_C( 52), INT8_C( 18), INT8_C( 22), INT8_C( 21), INT8_C( 82), INT8_C( -77), INT8_C( 13), INT8_C( -3)), UINT64_C(13866692018593546757) }, { simde_mm512_set_epi8(INT8_C( -39), INT8_C( -32), INT8_C( -11), INT8_C( 52), INT8_C( 61), INT8_C( 14), INT8_C( 35), INT8_C( -8), INT8_C(-119), INT8_C(-116), INT8_C( -41), INT8_C( -89), INT8_C( -55), INT8_C( -86), INT8_C( 77), INT8_C( 46), INT8_C(-121), INT8_C( -53), INT8_C( -15), INT8_C( 39), INT8_C( 82), INT8_C( 120), INT8_C( -49), INT8_C( 93), INT8_C( 100), INT8_C( 121), INT8_C( 31), INT8_C( -32), INT8_C( 22), INT8_C( -32), INT8_C( 88), INT8_C( -52), INT8_C(-102), INT8_C( -24), INT8_C( 9), INT8_C( 67), INT8_C(-114), INT8_C( 4), INT8_C( 106), INT8_C(-121), INT8_C( -75), INT8_C( 30), INT8_C( 16), INT8_C( 48), INT8_C( 127), INT8_C( 71), INT8_C( -26), INT8_C( -22), INT8_C( -63), INT8_C( 35), INT8_C( -80), INT8_C( -51), INT8_C( 20), INT8_C( 120), INT8_C( 123), INT8_C( 28), INT8_C( 25), INT8_C( 66), INT8_C( -14), INT8_C( -79), INT8_C( 44), INT8_C( 96), INT8_C( 31), INT8_C( -24)), UINT64_C(16284138935867912241) }, { simde_mm512_set_epi8(INT8_C( -8), INT8_C(-106), INT8_C( -32), INT8_C(-125), INT8_C( -8), INT8_C( 59), INT8_C( 113), INT8_C( 106), INT8_C( 124), INT8_C( 123), INT8_C( -45), INT8_C( -6), INT8_C( 110), INT8_C( -44), INT8_C( 116), INT8_C( 65), INT8_C(-109), INT8_C( 54), INT8_C( -45), INT8_C(-108), INT8_C( 113), INT8_C( -80), INT8_C( -25), INT8_C( -81), INT8_C( 66), INT8_C( 5), INT8_C( -16), INT8_C( 69), INT8_C(-120), INT8_C( 101), INT8_C(-120), INT8_C(-105), INT8_C( 93), INT8_C( 26), INT8_C( 126), INT8_C( -64), INT8_C( 107), INT8_C( 115), INT8_C( -9), INT8_C( 38), INT8_C( 67), INT8_C( -62), INT8_C( 22), INT8_C( 68), INT8_C( -28), INT8_C( -64), INT8_C( 52), INT8_C( 39), INT8_C( 38), INT8_C( -87), INT8_C( -5), INT8_C( 99), INT8_C( 33), INT8_C( 4), INT8_C(-101), INT8_C(-121), INT8_C(-105), INT8_C( 71), INT8_C( -11), INT8_C( -20), INT8_C( 0), INT8_C( 19), INT8_C( 16), INT8_C(-121)), UINT64_C(17885121415813555121) }, { simde_mm512_set_epi8(INT8_C(-113), INT8_C( 2), INT8_C( 81), INT8_C( -78), INT8_C( -3), INT8_C(-106), INT8_C( -17), INT8_C(-117), INT8_C( 2), INT8_C( 20), INT8_C( 53), INT8_C( -65), INT8_C( 30), INT8_C( 26), INT8_C( 22), INT8_C( 99), INT8_C( -8), INT8_C( -85), INT8_C( 90), INT8_C( -79), INT8_C( -92), INT8_C( -73), INT8_C( -12), INT8_C( -69), INT8_C( 75), INT8_C(-113), INT8_C( 90), INT8_C( 28), INT8_C(-109), INT8_C(-116), INT8_C( 79), INT8_C( -43), INT8_C( -42), INT8_C( -70), INT8_C( 36), INT8_C( -40), INT8_C(-104), INT8_C( 116), INT8_C( -91), INT8_C( -94), INT8_C( 104), INT8_C( -96), INT8_C( 80), INT8_C( -49), INT8_C( 7), INT8_C( 18), INT8_C( 11), INT8_C( 65), INT8_C( -67), INT8_C( 98), INT8_C( 11), INT8_C( -90), INT8_C( 109), INT8_C(-116), INT8_C( 101), INT8_C( 94), INT8_C( 56), INT8_C( -34), INT8_C( 49), INT8_C( 75), INT8_C( 63), INT8_C(-104), INT8_C(-118), INT8_C( -65)), UINT64_C(11461906577142879303) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask64 r = simde_mm512_movepi8_mask(test_vec[i].a); simde_assert_equal_mmask64(r, test_vec[i].r); } return 0; } static int test_simde_mm512_movepi16_mask(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__mmask32 r; } test_vec[8] = { { simde_mm512_set_epi16(INT16_C(-17047), INT16_C(-27489), INT16_C(-15227), INT16_C( 31130), INT16_C( 7900), INT16_C( 4229), INT16_C( 19494), INT16_C( -313), INT16_C( -7407), INT16_C( 6880), INT16_C( 31932), INT16_C(-13004), INT16_C(-16593), INT16_C( 30037), INT16_C( 23370), INT16_C(-27033), INT16_C( -6275), INT16_C(-24391), INT16_C( 19922), INT16_C( 9007), INT16_C(-32282), INT16_C( 30675), INT16_C(-23274), INT16_C(-21419), INT16_C( 30850), INT16_C(-11557), INT16_C(-13740), INT16_C( 3686), INT16_C( 25062), INT16_C( -6498), INT16_C(-22187), INT16_C( 3349)), UINT32_C(3784952678) }, { simde_mm512_set_epi16(INT16_C( 13923), INT16_C( 27595), INT16_C(-30758), INT16_C( 6795), INT16_C( 23230), INT16_C( 13435), INT16_C( 30013), INT16_C( -139), INT16_C(-20677), INT16_C( 11858), INT16_C( 8955), INT16_C( 19017), INT16_C( 604), INT16_C(-27923), INT16_C( -6277), INT16_C(-19699), INT16_C( 25596), INT16_C( -5717), INT16_C(-26964), INT16_C( 16652), INT16_C( 24642), INT16_C( 18959), INT16_C(-25136), INT16_C( -8754), INT16_C( -5203), INT16_C(-23370), INT16_C(-26802), INT16_C(-30926), INT16_C( 20471), INT16_C(-28596), INT16_C(-19925), INT16_C( 29562)), UINT32_C(562521078) }, { simde_mm512_set_epi16(INT16_C( 1548), INT16_C( -4457), INT16_C( 28166), INT16_C(-19674), INT16_C( -618), INT16_C(-20513), INT16_C(-19054), INT16_C( -9253), INT16_C(-13318), INT16_C( -7485), INT16_C( 29771), INT16_C( 24344), INT16_C( 7181), INT16_C(-19395), INT16_C(-25613), INT16_C(-15382), INT16_C( 11288), INT16_C( 15682), INT16_C( 21669), INT16_C( 20625), INT16_C( 29057), INT16_C( -8121), INT16_C( -5440), INT16_C( 3057), INT16_C( -7315), INT16_C(-18718), INT16_C(-26828), INT16_C( -6341), INT16_C( -6106), INT16_C( -2348), INT16_C( 30905), INT16_C(-10037)), UINT32_C(1606878973) }, { simde_mm512_set_epi16(INT16_C( 14148), INT16_C( 31053), INT16_C(-19743), INT16_C(-19340), INT16_C(-12275), INT16_C( 27369), INT16_C(-24803), INT16_C( 5116), INT16_C(-16383), INT16_C( 28006), INT16_C(-24294), INT16_C(-24984), INT16_C( 12919), INT16_C(-17951), INT16_C( 31664), INT16_C( 7912), INT16_C( 12196), INT16_C(-27188), INT16_C( 23535), INT16_C(-31330), INT16_C(-27343), INT16_C( 14195), INT16_C( 5995), INT16_C( 10459), INT16_C( 12832), INT16_C( 13655), INT16_C(-20140), INT16_C(-26343), INT16_C(-13772), INT16_C( -9475), INT16_C(-28427), INT16_C(-11691)), UINT32_C(984897599) }, { simde_mm512_set_epi16(INT16_C(-22300), INT16_C(-10421), INT16_C( -5153), INT16_C( 5119), INT16_C( 12821), INT16_C( 17382), INT16_C( -4532), INT16_C(-11640), INT16_C( -1430), INT16_C( 18534), INT16_C( -8310), INT16_C(-13530), INT16_C( 24972), INT16_C(-18125), INT16_C( 24561), INT16_C( -2389), INT16_C(-27012), INT16_C(-15171), INT16_C( 25076), INT16_C( 17208), INT16_C(-10726), INT16_C(-14428), INT16_C( 18609), INT16_C( 16453), INT16_C( 17590), INT16_C( 596), INT16_C( 5139), INT16_C(-28922), INT16_C(-14748), INT16_C(-14808), INT16_C( 29503), INT16_C( 18271)), UINT32_C(3820342300) }, { simde_mm512_set_epi16(INT16_C( -1922), INT16_C( 28045), INT16_C( 7072), INT16_C( -6655), INT16_C(-10335), INT16_C( 3620), INT16_C(-24526), INT16_C( 22370), INT16_C( 21531), INT16_C( 30935), INT16_C( 8939), INT16_C(-11772), INT16_C( 17436), INT16_C( 5814), INT16_C(-22069), INT16_C( 15046), INT16_C( 4673), INT16_C( 28770), INT16_C( 154), INT16_C( 3548), INT16_C(-13962), INT16_C(-26257), INT16_C( -315), INT16_C( 21241), INT16_C( -5135), INT16_C( 2068), INT16_C( 4376), INT16_C(-32178), INT16_C( 14170), INT16_C( -3876), INT16_C( 30653), INT16_C( 15729)), UINT32_C(2584874644) }, { simde_mm512_set_epi16(INT16_C( 10719), INT16_C( 25050), INT16_C(-15358), INT16_C( -1679), INT16_C(-13098), INT16_C(-28834), INT16_C( -9407), INT16_C( 1027), INT16_C( -8964), INT16_C( 27458), INT16_C( 16082), INT16_C( 12178), INT16_C( 1218), INT16_C(-18769), INT16_C(-25942), INT16_C(-28951), INT16_C( 25249), INT16_C(-28972), INT16_C(-23822), INT16_C( 15695), INT16_C( 24838), INT16_C( -7902), INT16_C( 17882), INT16_C( -8203), INT16_C(-28681), INT16_C( -7333), INT16_C( 30548), INT16_C(-23603), INT16_C( 15565), INT16_C(-21742), INT16_C( 12497), INT16_C( -3539)), UINT32_C(1049060821) }, { simde_mm512_set_epi16(INT16_C( 27841), INT16_C( 5697), INT16_C( 21581), INT16_C(-21663), INT16_C(-31814), INT16_C( 15339), INT16_C(-24806), INT16_C( 28625), INT16_C( -2580), INT16_C(-27637), INT16_C( 20611), INT16_C( 26105), INT16_C( 20780), INT16_C( 32638), INT16_C( 17822), INT16_C( 21576), INT16_C( 16865), INT16_C(-18111), INT16_C( -8939), INT16_C(-14679), INT16_C(-25963), INT16_C( -8164), INT16_C( -7364), INT16_C( 13563), INT16_C( 4030), INT16_C(-20513), INT16_C(-30065), INT16_C(-30263), INT16_C( 23452), INT16_C( 9713), INT16_C(-23717), INT16_C( -7690)), UINT32_C(448822899) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask32 r = simde_mm512_movepi16_mask(test_vec[i].a); simde_assert_equal_mmask32(r, test_vec[i].r); } return 0; } static int test_simde_mm512_movepi32_mask(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__mmask16 r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C( 949963630), INT32_C( 1064968775), INT32_C(-1905189849), INT32_C(-1560216302), INT32_C(-1667094448), INT32_C( 1432096084), INT32_C( 614973119), INT32_C( -938109633), INT32_C( -573293838), INT32_C(-1613148160), INT32_C( 914563081), INT32_C( -664378047), INT32_C( 580894207), INT32_C( -5278152), INT32_C(-1290389695), INT32_C( 1268875821)), UINT16_C(14806) }, { simde_mm512_set_epi32(INT32_C( 1744063513), INT32_C( 199551019), INT32_C( 2145427118), INT32_C( 2117462748), INT32_C(-1526152394), INT32_C(-1859380110), INT32_C(-2006621963), INT32_C( 1533584129), INT32_C( 86408218), INT32_C( 1744524288), INT32_C( 1245749395), INT32_C( 2117755472), INT32_C( 1319846799), INT32_C( 684013019), INT32_C( -958932880), INT32_C(-1652765619)), UINT16_C(3587) }, { simde_mm512_set_epi32(INT32_C( -357876242), INT32_C(-1625926758), INT32_C(-2123740483), INT32_C( -660511291), INT32_C(-1776054627), INT32_C( 141209285), INT32_C(-1246421832), INT32_C( 1688546782), INT32_C( 466911744), INT32_C( 1735300956), INT32_C(-1996452777), INT32_C( 2033250973), INT32_C( 895403481), INT32_C( 890095351), INT32_C(-1050174441), INT32_C( 300665935)), UINT16_C(64034) }, { simde_mm512_set_epi32(INT32_C( 1350930633), INT32_C(-2104240005), INT32_C( -73808375), INT32_C( -223326096), INT32_C(-1133084350), INT32_C(-2121661437), INT32_C( -195344417), INT32_C( -410489209), INT32_C( 1846491503), INT32_C( 2122554983), INT32_C( 194022155), INT32_C(-1349250510), INT32_C( 1589005094), INT32_C( -533309401), INT32_C(-1277337937), INT32_C(-1321119452)), UINT16_C(32535) }, { simde_mm512_set_epi32(INT32_C( 918165817), INT32_C( 1913225422), INT32_C(-1721286233), INT32_C( 1673520973), INT32_C( 1260386883), INT32_C( -845723997), INT32_C( 607206184), INT32_C( 655955271), INT32_C( 1232293371), INT32_C( -686952046), INT32_C( -38269764), INT32_C( 1897508883), INT32_C( 232912531), INT32_C( 655019124), INT32_C( -684909810), INT32_C( 175412708)), UINT16_C(9314) }, { simde_mm512_set_epi32(INT32_C( -167347570), INT32_C( 2106669082), INT32_C( 414695275), INT32_C( -187234329), INT32_C( 687995662), INT32_C(-1870754825), INT32_C( -333242798), INT32_C( 41400550), INT32_C( 956450496), INT32_C( -690912424), INT32_C(-1689027692), INT32_C( -582951840), INT32_C( -348307758), INT32_C(-2091243549), INT32_C(-1450595733), INT32_C( 1635199293)), UINT16_C(38526) }, { simde_mm512_set_epi32(INT32_C( 747298364), INT32_C( 1882006238), INT32_C( -996575344), INT32_C( 1843963633), INT32_C( 705136482), INT32_C( 1503368131), INT32_C(-1576593524), INT32_C( 1439140790), INT32_C( -402748742), INT32_C( 664771884), INT32_C( 747175899), INT32_C( 405982929), INT32_C( 2131890450), INT32_C(-1594643895), INT32_C(-1014095286), INT32_C( 1590577530)), UINT16_C(8838) }, { simde_mm512_set_epi32(INT32_C( 919103752), INT32_C( 840271230), INT32_C( 580240564), INT32_C(-1788774714), INT32_C( 400169555), INT32_C(-1853738938), INT32_C(-1112922052), INT32_C(-1623445030), INT32_C( 1270846818), INT32_C(-2065143748), INT32_C( 2058102073), INT32_C( 986096322), INT32_C( 1628330811), INT32_C( 1467380999), INT32_C( -786808342), INT32_C(-1903665992)), UINT16_C(5955) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask16 r = simde_mm512_movepi32_mask(test_vec[i].a); simde_assert_equal_mmask16(r, test_vec[i].r); } return 0; } static int test_simde_mm512_movepi64_mask(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__mmask8 r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C( 4315835916621638572), INT64_C(-6674257362849511307), INT64_C(-1762049467229420749), INT64_C(-8132359750587133268), INT64_C( 6148678802780014396), INT64_C( 4664933343968897426), INT64_C( 1014284963407527334), INT64_C( 3422825313800233729)), UINT8_C(112) }, { simde_mm512_set_epi64(INT64_C( 574478659121970685), INT64_C(-6605171674412584848), INT64_C( 4638820310774021553), INT64_C( 7174745169641818539), INT64_C( 7314921907136775571), INT64_C(-2641502309573857208), INT64_C( 6620802922844884113), INT64_C(-7711278827686512216)), UINT8_C(69) }, { simde_mm512_set_epi64(INT64_C(-6147235395367117233), INT64_C( 6542122732864503940), INT64_C(-4893279094642154374), INT64_C( -710603276995935312), INT64_C(-6985907794246737252), INT64_C(-2323934421260723283), INT64_C( 2081892758864471809), INT64_C(-1783091129455251103)), UINT8_C(189) }, { simde_mm512_set_epi64(INT64_C(-7008686659697006374), INT64_C(-2302187120743568504), INT64_C(-7431666373478318054), INT64_C(-1666370852874022248), INT64_C(-5526045067730894819), INT64_C(-5366397760872850523), INT64_C(-8148401067962473275), INT64_C( 6379593722474677948)), UINT8_C(254) }, { simde_mm512_set_epi64(INT64_C( 5834225915365902233), INT64_C( 5212224477281615403), INT64_C(-9044565203383400851), INT64_C(-7453762113383947542), INT64_C( 7219309658228626013), INT64_C(-5724922725847615845), INT64_C( 4784587912595387278), INT64_C( 2886700108880873396)), UINT8_C(52) }, { simde_mm512_set_epi64(INT64_C( 1408273437008202704), INT64_C(-3431263432981659490), INT64_C( -308854217089784545), INT64_C(-4251857914359021805), INT64_C( 5524011446548464963), INT64_C( 7842350355250917332), INT64_C( 1150922164682347155), INT64_C(-6938335843482793568)), UINT8_C(113) }, { simde_mm512_set_epi64(INT64_C( 3466369033061057719), INT64_C( -853883159898808651), INT64_C(-8837301080476701753), INT64_C( 1624970283957331834), INT64_C( 8454378025299498630), INT64_C(-4025270946591081596), INT64_C( 1760420628646065087), INT64_C(-7311372421099691274)), UINT8_C(101) }, { simde_mm512_set_epi64(INT64_C( 6360525706956998040), INT64_C(-9084014240271266814), INT64_C(-5488573213012393938), INT64_C( 9146279599643928313), INT64_C(-2309198113695918692), INT64_C( 7647503988947974660), INT64_C( -340783369849195617), INT64_C(-7174097298174507843)), UINT8_C(107) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask8 r = simde_mm512_movepi64_mask(test_vec[i].a); simde_assert_equal_mmask8(r, test_vec[i].r); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm_movepi8_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm_movepi16_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm_movepi32_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm_movepi64_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_movepi8_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_movepi16_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_movepi32_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_movepi64_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_movepi8_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_movepi16_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_movepi32_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_movepi64_mask) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/movm.c000066400000000000000000001405051400333146700164030ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #define SIMDE_TEST_X86_AVX512_INSN movm #include #include static int test_simde_mm_movm_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m128i r; } test_vec[8] = { { UINT16_C(62934), simde_mm_set_epi8(INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0)) }, { UINT16_C( 3839), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1)) }, { UINT16_C(60519), simde_mm_set_epi8(INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1)) }, { UINT16_C(28066), simde_mm_set_epi8(INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0)) }, { UINT16_C( 8975), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1)) }, { UINT16_C(35700), simde_mm_set_epi8(INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0)) }, { UINT16_C(45525), simde_mm_set_epi8(INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1)) }, { UINT16_C( 9017), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_movm_epi8(test_vec[i].k); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_movm_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask32 k; simde__m256i r; } test_vec[8] = { { UINT32_C(3131962838), simde_mm256_set_epi8(INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0)) }, { UINT32_C(1926696703), simde_mm256_set_epi8(INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1)) }, { UINT32_C(2248141927), simde_mm256_set_epi8(INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1)) }, { UINT32_C(1480879522), simde_mm256_set_epi8(INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0)) }, { UINT32_C(1377641231), simde_mm256_set_epi8(INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1)) }, { UINT32_C( 395086708), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0)) }, { UINT32_C(1313583573), simde_mm256_set_epi8(INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1)) }, { UINT32_C(2432705337), simde_mm256_set_epi8(INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_movm_epi8(test_vec[i].k); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_movm_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask64 k; simde__m512i r; } test_vec[8] = { { UINT64_C( 4739015484227475748), simde_mm512_set_epi8(INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0)) }, { UINT64_C( 9729215686767344119), simde_mm512_set_epi8(INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1)) }, { UINT64_C(13732001478625865871), simde_mm512_set_epi8(INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1)) }, { UINT64_C( 1583258323140482986), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0)) }, { UINT64_C(11672091627232461942), simde_mm512_set_epi8(INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0)) }, { UINT64_C( 2094101018860790606), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0)) }, { UINT64_C( 4680871035071032016), simde_mm512_set_epi8(INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { UINT64_C( 4209047041590863189), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_movm_epi8(test_vec[i].k); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_movm_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m128i r; } test_vec[8] = { { UINT8_C(216), simde_mm_set_epi16(INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { UINT8_C( 89), simde_mm_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1)) }, { UINT8_C(101), simde_mm_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1)) }, { UINT8_C( 61), simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1)) }, { UINT8_C(225), simde_mm_set_epi16(INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1)) }, { UINT8_C(231), simde_mm_set_epi16(INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1)) }, { UINT8_C(114), simde_mm_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0)) }, { UINT8_C(147), simde_mm_set_epi16(INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_movm_epi16(test_vec[i].k); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_movm_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m256i r; } test_vec[8] = { { UINT16_C( 9176), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { UINT16_C( 7781), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1)) }, { UINT16_C(51425), simde_mm256_set_epi16(INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1)) }, { UINT16_C(64626), simde_mm256_set_epi16(INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0)) }, { UINT16_C(41021), simde_mm256_set_epi16(INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1)) }, { UINT16_C(29062), simde_mm256_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0)) }, { UINT16_C(12635), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1)) }, { UINT16_C(14754), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_movm_epi16(test_vec[i].k); simde_assert_m256i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_movm_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask32 k; simde__m512i r; } test_vec[8] = { { UINT32_C(2805036472), simde_mm512_set_epi16(INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { UINT32_C(2266796856), simde_mm512_set_epi16(INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { UINT32_C(3598176466), simde_mm512_set_epi16(INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0)) }, { UINT32_C( 689971098), simde_mm512_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0)) }, { UINT32_C(2581729150), simde_mm512_set_epi16(INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0)) }, { UINT32_C(1365267719), simde_mm512_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1)) }, { UINT32_C(4094538289), simde_mm512_set_epi16(INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1)) }, { UINT32_C(3608627761), simde_mm512_set_epi16(INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_movm_epi16(test_vec[i].k); simde_assert_m512i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_movm_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m128i r; } test_vec[8] = { { UINT8_C( 8), simde_mm_set_epi32(INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { UINT8_C( 9), simde_mm_set_epi32(INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( -1)) }, { UINT8_C( 5), simde_mm_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -1)) }, { UINT8_C( 13), simde_mm_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1)) }, { UINT8_C( 1), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -1)) }, { UINT8_C( 7), simde_mm_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( -1)) }, { UINT8_C( 2), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( 0)) }, { UINT8_C( 3), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -1)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_movm_epi32(test_vec[i].k); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_movm_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m256i r; } test_vec[8] = { { UINT8_C(216), simde_mm256_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { UINT8_C( 89), simde_mm256_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( -1)) }, { UINT8_C(101), simde_mm256_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -1)) }, { UINT8_C( 61), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1)) }, { UINT8_C(225), simde_mm256_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -1)) }, { UINT8_C(231), simde_mm256_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( -1)) }, { UINT8_C(114), simde_mm256_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( 0)) }, { UINT8_C(147), simde_mm256_set_epi32(INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -1)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_movm_epi32(test_vec[i].k); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_movm_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m512i r; } test_vec[8] = { { UINT16_C(30136), simde_mm512_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { UINT16_C(37688), simde_mm512_set_epi32(INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { UINT16_C(53458), simde_mm512_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( 0)) }, { UINT16_C( 8090), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( 0)) }, { UINT16_C( 3966), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( 0)) }, { UINT16_C(21767), simde_mm512_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( -1)) }, { UINT16_C(45617), simde_mm512_set_epi32(INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -1)) }, { UINT16_C(18993), simde_mm512_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -1)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_movm_epi32(test_vec[i].k); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_movm_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m128i r; } test_vec[8] = { { UINT8_C(184), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) }, { UINT8_C( 56), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) }, { UINT8_C(210), simde_mm_set_epi64x(INT64_C( -1), INT64_C( 0)) }, { UINT8_C(154), simde_mm_set_epi64x(INT64_C( -1), INT64_C( 0)) }, { UINT8_C(126), simde_mm_set_epi64x(INT64_C( -1), INT64_C( 0)) }, { UINT8_C( 7), simde_mm_set_epi64x(INT64_C( -1), INT64_C( -1)) }, { UINT8_C( 49), simde_mm_set_epi64x(INT64_C( 0), INT64_C( -1)) }, { UINT8_C( 49), simde_mm_set_epi64x(INT64_C( 0), INT64_C( -1)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_movm_epi64(test_vec[i].k); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_movm_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m256i r; } test_vec[8] = { { UINT8_C(184), simde_mm256_set_epi64x(INT64_C( -1), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, { UINT8_C( 56), simde_mm256_set_epi64x(INT64_C( -1), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, { UINT8_C(210), simde_mm256_set_epi64x(INT64_C( 0), INT64_C( 0), INT64_C( -1), INT64_C( 0)) }, { UINT8_C(154), simde_mm256_set_epi64x(INT64_C( -1), INT64_C( 0), INT64_C( -1), INT64_C( 0)) }, { UINT8_C(126), simde_mm256_set_epi64x(INT64_C( -1), INT64_C( -1), INT64_C( -1), INT64_C( 0)) }, { UINT8_C( 7), simde_mm256_set_epi64x(INT64_C( 0), INT64_C( -1), INT64_C( -1), INT64_C( -1)) }, { UINT8_C( 49), simde_mm256_set_epi64x(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( -1)) }, { UINT8_C( 49), simde_mm256_set_epi64x(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( -1)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_movm_epi64(test_vec[i].k); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_movm_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512i r; } test_vec[8] = { { UINT8_C(184), simde_mm512_set_epi64(INT64_C( -1), INT64_C( 0), INT64_C( -1), INT64_C( -1), INT64_C( -1), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, { UINT8_C( 56), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( -1), INT64_C( -1), INT64_C( -1), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, { UINT8_C(210), simde_mm512_set_epi64(INT64_C( -1), INT64_C( -1), INT64_C( 0), INT64_C( -1), INT64_C( 0), INT64_C( 0), INT64_C( -1), INT64_C( 0)) }, { UINT8_C(154), simde_mm512_set_epi64(INT64_C( -1), INT64_C( 0), INT64_C( 0), INT64_C( -1), INT64_C( -1), INT64_C( 0), INT64_C( -1), INT64_C( 0)) }, { UINT8_C(126), simde_mm512_set_epi64(INT64_C( 0), INT64_C( -1), INT64_C( -1), INT64_C( -1), INT64_C( -1), INT64_C( -1), INT64_C( -1), INT64_C( 0)) }, { UINT8_C( 7), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( -1), INT64_C( -1), INT64_C( -1)) }, { UINT8_C( 49), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( -1), INT64_C( -1), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( -1)) }, { UINT8_C( 49), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( -1), INT64_C( -1), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( -1)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_movm_epi64(test_vec[i].k); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm_movm_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_movm_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_movm_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_movm_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_movm_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_movm_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_movm_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_movm_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_movm_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_movm_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_movm_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_movm_epi64) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/mul.c000066400000000000000000004337101400333146700162250ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN mul #include #include #include static int test_simde_mm512_mul_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[16]; const int32_t b[16]; const int64_t r[8]; } test_vec[] = { { { INT32_C( 1303646110), -INT32_C( 1991094019), -INT32_C( 60179606), -INT32_C( 1143961245), INT32_C( 53151249), INT32_C( 1139066569), -INT32_C( 1576434937), -INT32_C( 2053958169), INT32_C( 584206116), INT32_C( 44770456), INT32_C( 1123947743), -INT32_C( 1342320995), -INT32_C( 1632491307), -INT32_C( 169699602), INT32_C( 9956121), INT32_C( 612706816) }, { INT32_C( 172382066), INT32_C( 1527574908), -INT32_C( 1549923834), -INT32_C( 1387095080), -INT32_C( 1303640636), -INT32_C( 1247269221), INT32_C( 431308569), -INT32_C( 599966870), INT32_C( 249988242), INT32_C( 2070606453), -INT32_C( 736163588), INT32_C( 1736602019), INT32_C( 303746678), INT32_C( 365413116), INT32_C( 1798208513), INT32_C( 1246194615) }, { INT64_C( 224725209774663260), INT64_C( 93273805660129404), -INT64_C( 69290128050554364), -INT64_C( 679929896799075153), INT64_C( 146044659904488072), -INT64_C( 827409403211381884), -INT64_C( 495863811365128146), INT64_C( 17903181538658073) } }, { { INT32_C( 1700277743), INT32_C( 467714591), INT32_C( 1861287882), -INT32_C( 405441935), INT32_C( 1023012672), -INT32_C( 1286487887), -INT32_C( 199327939), -INT32_C( 633444630), -INT32_C( 1287678061), INT32_C( 617488217), -INT32_C( 1869431265), INT32_C( 1886873392), INT32_C( 145518935), INT32_C( 1857813809), INT32_C( 1734597244), -INT32_C( 750673600) }, { -INT32_C( 1836679112), -INT32_C( 1095346785), INT32_C( 1145980947), INT32_C( 112510639), INT32_C( 1745838391), -INT32_C( 606614946), -INT32_C( 465421660), INT32_C( 347636699), -INT32_C( 1566163453), -INT32_C( 1486791533), INT32_C( 1407954852), -INT32_C( 1403347083), INT32_C( 1595238656), -INT32_C( 667227085), INT32_C( 29129766), INT32_C( 34960639) }, { -INT64_C( 3122864615166604216), INT64_C( 2133000449653984254), INT64_C( 1786014797257090752), INT64_C( 92771540253758740), INT64_C( 2016714318368104633), -INT64_C( 2632074820037247780), INT64_C( 232137430291951360), INT64_C( 50528411821964904) } }, { { INT32_C( 1185201075), -INT32_C( 1142094569), INT32_C( 705681589), INT32_C( 2027383160), INT32_C( 98036946), -INT32_C( 19066408), -INT32_C( 1929405811), -INT32_C( 1047653106), -INT32_C( 402115632), -INT32_C( 308021960), INT32_C( 1176023758), -INT32_C( 306254053), -INT32_C( 1275881765), INT32_C( 900845735), INT32_C( 2042736746), -INT32_C( 1774563131) }, { -INT32_C( 1149353341), INT32_C( 111747384), -INT32_C( 280182316), -INT32_C( 1948448080), INT32_C( 1212076192), INT32_C( 192802720), INT32_C( 1703165599), INT32_C( 301710990), INT32_C( 969767169), INT32_C( 1849652890), -INT32_C( 413234377), INT32_C( 913456021), -INT32_C( 1417760757), INT32_C( 1052179359), -INT32_C( 928826823), -INT32_C( 86401287) }, { -INT64_C( 1362214815308041575), -INT64_C( 197719501964580124), INT64_C( 118828248182989632), -INT64_C( 3286097603805895789), -INT64_C( 389958538055285808), -INT64_C( 485973444974328766), INT64_C( 1808895096988896105), -INT64_C( 1897348682012537958) } }, { { -INT32_C( 1305237993), INT32_C( 1394635292), -INT32_C( 1841660163), -INT32_C( 993481543), -INT32_C( 76528036), INT32_C( 2067408449), INT32_C( 1514397025), -INT32_C( 1823204228), -INT32_C( 549091389), -INT32_C( 164403463), -INT32_C( 1635226140), INT32_C( 1986154778), -INT32_C( 646786409), INT32_C( 1515498745), INT32_C( 95721353), INT32_C( 1989740723) }, { -INT32_C( 2007572849), INT32_C( 696158532), INT32_C( 281478902), -INT32_C( 276354729), -INT32_C( 171378180), INT32_C( 776936613), INT32_C( 1714684851), -INT32_C( 1696740085), -INT32_C( 266194005), -INT32_C( 1306943300), -INT32_C( 20717402), INT32_C( 99437065), -INT32_C( 386222781), -INT32_C( 2078914095), INT32_C( 1474972236), -INT32_C( 1007565033) }, { INT64_C( 2620360356230052057), -INT64_C( 518388480538381026), INT64_C( 13115235528654480), INT64_C( 2596713637166968275), INT64_C( 146164835948922945), INT64_C( 33877637303288280), INT64_C( 249803645596983429), INT64_C( 141186338067355308) } }, { { -INT32_C( 1229777926), INT32_C( 1516883123), -INT32_C( 1252512596), -INT32_C( 1178909322), -INT32_C( 878594566), INT32_C( 1263515647), INT32_C( 430127362), -INT32_C( 69430271), INT32_C( 1538428840), INT32_C( 129309531), -INT32_C( 1111683769), INT32_C( 1282832466), INT32_C( 739710765), -INT32_C( 797415730), -INT32_C( 1578493024), INT32_C( 1469892271) }, { -INT32_C( 1313649066), -INT32_C( 1330026391), -INT32_C( 932350346), INT32_C( 454419438), -INT32_C( 918016774), INT32_C( 865714323), -INT32_C( 1965784101), -INT32_C( 1595772854), INT32_C( 676435391), INT32_C( 1943603965), INT32_C( 1849443968), INT32_C( 613044522), INT32_C( 284021373), INT32_C( 1833142162), INT32_C( 1425479434), INT32_C( 1207228808) }, { INT64_C( 1615496623877317116), INT64_C( 1167780552249958216), INT64_C( 806564549133250084), -INT64_C( 845537529624671562), INT64_C( 1040647713911076440), -INT64_C( 2055996840900555392), INT64_C( 210093667098180345), -INT64_C( 2250109342424468416) } }, { { INT32_C( 1819231854), -INT32_C( 773896112), -INT32_C( 1187046513), -INT32_C( 354563732), INT32_C( 771410843), INT32_C( 1553612370), -INT32_C( 575565227), -INT32_C( 635132565), INT32_C( 1011258603), INT32_C( 1796023772), -INT32_C( 1390130111), -INT32_C( 1315503594), INT32_C( 534745805), INT32_C( 628849104), INT32_C( 1996696587), -INT32_C( 1118754862) }, { -INT32_C( 1728473157), -INT32_C( 50133316), INT32_C( 1772824659), -INT32_C( 149274070), -INT32_C( 1541998124), INT32_C( 2127204723), -INT32_C( 1862939202), -INT32_C( 1337112844), -INT32_C( 1706539043), -INT32_C( 1600697523), -INT32_C( 1660337549), INT32_C( 1502880901), -INT32_C( 1862358499), -INT32_C( 99628996), -INT32_C( 1987443563), INT32_C( 674879307) }, { -INT64_C( 3144493425998343078), -INT64_C( 2104425329626364067), -INT64_C( 1189514072739258532), INT64_C( 1072243024686328854), -INT64_C( 1725752288589136929), INT64_C( 2308085221288837939), -INT64_C( 995888394746346695), -INT64_C( 3968321779097219481) } }, { { INT32_C( 1824686366), INT32_C( 1074551501), INT32_C( 568202908), INT32_C( 1467707962), INT32_C( 1508407581), -INT32_C( 699140287), INT32_C( 1180687867), -INT32_C( 747660876), -INT32_C( 415289062), INT32_C( 673729419), -INT32_C( 1689713055), -INT32_C( 1779186568), INT32_C( 2129582909), -INT32_C( 850116142), -INT32_C( 753617890), INT32_C( 1738965837) }, { INT32_C( 1078977972), -INT32_C( 1838647504), -INT32_C( 181554819), -INT32_C( 1282727818), -INT32_C( 852329989), -INT32_C( 644118853), -INT32_C( 2018726086), -INT32_C( 420523470), INT32_C( 1747336759), INT32_C( 855281333), -INT32_C( 1238948032), INT32_C( 1131000392), -INT32_C( 418276564), INT32_C( 1556130850), -INT32_C( 1914409637), -INT32_C( 143404097) }, { INT64_C( 1968796394722729752), -INT64_C( 103159976117213652), -INT64_C( 1285661016921246609), -INT64_C( 2383485396536598562), -INT64_C( 725649843643230058), INT64_C( 2093466664136957760), -INT64_C( 890754621929644676), INT64_C( 1442733351231605930) } }, { { -INT32_C( 966813167), INT32_C( 1761106216), -INT32_C( 937549952), -INT32_C( 32732974), INT32_C( 1172643107), INT32_C( 614639049), -INT32_C( 760117742), INT32_C( 1791566937), -INT32_C( 416274242), INT32_C( 21964929), INT32_C( 432696903), INT32_C( 420992758), -INT32_C( 1134560013), -INT32_C( 1260387934), -INT32_C( 528051833), INT32_C( 1951027125) }, { -INT32_C( 128222601), -INT32_C( 369448286), INT32_C( 235127832), -INT32_C( 1926751590), -INT32_C( 1186363625), INT32_C( 258812296), INT32_C( 1877996730), -INT32_C( 1142736573), INT32_C( 1437810355), INT32_C( 20884969), INT32_C( 185614705), INT32_C( 1939355740), INT32_C( 1194123711), -INT32_C( 1000957686), -INT32_C( 785103475), INT32_C( 831264638) }, { INT64_C( 123967298953787367), -INT64_C( 220444087605464064), -INT64_C( 1391181127251782875), -INT64_C( 1427498633890983660), -INT64_C( 598523415667375910), INT64_C( 80314908004758615), -INT64_C( 1354805013075768243), INT64_C( 414575329068419675) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_mul_epi32(a, b); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_mul_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask8 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C( 8259215308803572895), INT64_C( 5002417564910761422), INT64_C( 4825945910792190995), INT64_C(-3854692997504557014), INT64_C(-5029859126276555558), INT64_C(-6821356987634887986), INT64_C( 4779218217009886481), INT64_C( 4803075487209946977)), UINT8_C(116), simde_mm512_set_epi32(INT32_C(-1946312318), INT32_C( 344802157), INT32_C( 1813783552), INT32_C( 136328242), INT32_C( -821129857), INT32_C( -162465728), INT32_C(-2000203088), INT32_C( 1070574927), INT32_C( 1169396458), INT32_C(-1183467128), INT32_C(-1936534523), INT32_C( 1077760263), INT32_C( 1655422492), INT32_C( 623663138), INT32_C( 29163868), INT32_C(-2106787203)), simde_mm512_set_epi32(INT32_C( 1815416844), INT32_C( 1421058674), INT32_C( -653559880), INT32_C(-1986451398), INT32_C( 72292265), INT32_C( -878074326), INT32_C( 9737947), INT32_C( 1206062441), INT32_C(-1794166530), INT32_C( 629576453), INT32_C(-1534533514), INT32_C( 511428134), INT32_C( 206078130), INT32_C( 438975617), INT32_C( 828494683), INT32_C( -995405524)), simde_mm512_set_epi64(INT64_C( 8259215308803572895), INT64_C( -270809426907782316), INT64_C( 142656984611699328), INT64_C( 1291180209731016807), INT64_C(-5029859126276555558), INT64_C( 551196920205439242), INT64_C( 4779218217009886481), INT64_C( 4803075487209946977)) }, { simde_mm512_set_epi64(INT64_C(-2096589957716905410), INT64_C( 5843054744461330548), INT64_C( 6974024204978419548), INT64_C( 3764508487718737373), INT64_C( 3631623951055726390), INT64_C( 3085354128371369606), INT64_C(-8195484891056484583), INT64_C( -289672705472788318)), UINT8_C(192), simde_mm512_set_epi32(INT32_C( 750022106), INT32_C( -164409985), INT32_C( 1508401218), INT32_C( 326736787), INT32_C( 1376534770), INT32_C( 433267140), INT32_C(-1811276142), INT32_C( -423374585), INT32_C( 316128811), INT32_C( 1066215241), INT32_C( -742321807), INT32_C( 771493183), INT32_C( 1158801588), INT32_C( 1819952183), INT32_C( 1670004358), INT32_C(-1282166628)), simde_mm512_set_epi32(INT32_C(-1440467958), INT32_C( 673155116), INT32_C( 1160373089), INT32_C(-1560331288), INT32_C( 1954920850), INT32_C( 443628207), INT32_C(-1358199964), INT32_C( 1999231031), INT32_C( -922196474), INT32_C(-1304354759), INT32_C( 1973769080), INT32_C(-1007908450), INT32_C(-1145320026), INT32_C( 1345168267), INT32_C(-1647348591), INT32_C(-2009668702)), simde_mm512_set_epi64(INT64_C( -110673422524233260), INT64_C( -509817631696691656), INT64_C( 6974024204978419548), INT64_C( 3764508487718737373), INT64_C( 3631623951055726390), INT64_C( 3085354128371369606), INT64_C(-8195484891056484583), INT64_C( -289672705472788318)) }, { simde_mm512_set_epi64(INT64_C(-5821818953001636176), INT64_C( 1997894375206593641), INT64_C(-1188496888106000468), INT64_C( 4574447963200493304), INT64_C( 3346200385521264609), INT64_C(-5642979348732921527), INT64_C( -695384029725146025), INT64_C(-7115212454065332556)), UINT8_C(247), simde_mm512_set_epi32(INT32_C( 133614784), INT32_C( 1104524722), INT32_C( -405494742), INT32_C( 33506731), INT32_C( 1866794314), INT32_C(-1942831246), INT32_C(-2066004046), INT32_C(-1057119888), INT32_C(-1508387315), INT32_C( 2140894614), INT32_C(-1227481958), INT32_C( -545548506), INT32_C( 2041568161), INT32_C(-1734631316), INT32_C( -578665178), INT32_C( 976865378)), simde_mm512_set_epi32(INT32_C( 1938874012), INT32_C( -315470352), INT32_C( 25227789), INT32_C( 348972975), INT32_C( -119098852), INT32_C( -117531009), INT32_C(-1256068989), INT32_C( -330670492), INT32_C(-1342579595), INT32_C( 1663139463), INT32_C( 1519977261), INT32_C(-2010660089), INT32_C( 811843811), INT32_C( 1950445467), INT32_C( -632407557), INT32_C( 132507618)), simde_mm512_set_epi64(INT64_C( -348444802842042144), INT64_C( 11692943599594725), INT64_C( 228342916659107214), INT64_C( 349558353467944896), INT64_C( 3346200385521264609), INT64_C( 1096912607627777034), INT64_C(-3383303787208444572), INT64_C( 129442104345449604)) }, { simde_mm512_set_epi64(INT64_C(-6399381415989804252), INT64_C(-4072646889620133673), INT64_C(-3499367054553152785), INT64_C(-2596410489019354993), INT64_C( 3709612225265967420), INT64_C( 1617021521015256349), INT64_C( 8518903223542129770), INT64_C(-7495998104551122449)), UINT8_C(125), simde_mm512_set_epi32(INT32_C( 1369528234), INT32_C(-2013461915), INT32_C( 979595496), INT32_C(-1220154251), INT32_C( 305231144), INT32_C( 243633364), INT32_C( 911946112), INT32_C( 158189864), INT32_C( 126572094), INT32_C( -5395242), INT32_C(-1561205257), INT32_C( 515958610), INT32_C( 1545815628), INT32_C( 1042892620), INT32_C( -956025439), INT32_C( -181963588)), simde_mm512_set_epi32(INT32_C( 1859688708), INT32_C( 1308950804), INT32_C(-1932687023), INT32_C( 2111441590), INT32_C( 1137586884), INT32_C( 1521953186), INT32_C( 1589240826), INT32_C( 1097366673), INT32_C( -573799426), INT32_C( 1360758617), INT32_C(-1894824063), INT32_C( -305567235), INT32_C(-1488593619), INT32_C( 1052029738), INT32_C(-1777311621), INT32_C( -525756513)), simde_mm512_set_epi64(INT64_C(-6399381415989804252), INT64_C(-2576284431776699090), INT64_C( 370798574555697704), INT64_C( 173592284760002472), INT64_C( -7341622042300314), INT64_C( -157660045832143350), INT64_C( 8518903223542129770), INT64_C( 95668541519848644)) }, { simde_mm512_set_epi64(INT64_C( 6860124546956220466), INT64_C(-1265261131078623514), INT64_C( 5737379338676836508), INT64_C(-3711065605003334500), INT64_C(-8479853253989282483), INT64_C( 7964407686671565496), INT64_C( 7785652122788440203), INT64_C(-3096894189429138445)), UINT8_C(214), simde_mm512_set_epi32(INT32_C( -814208176), INT32_C( 1449013393), INT32_C( 623550410), INT32_C( -805020885), INT32_C(-1088320756), INT32_C( 2022589200), INT32_C( 839176386), INT32_C( 1343270967), INT32_C( 111940457), INT32_C( 1537061703), INT32_C(-1460061235), INT32_C( 1515709350), INT32_C( 1650058892), INT32_C( 69963651), INT32_C( 758490839), INT32_C( 180779892)), simde_mm512_set_epi32(INT32_C(-1893053059), INT32_C( -525508532), INT32_C( 1089028030), INT32_C( 641037603), INT32_C( 776284580), INT32_C( 143220066), INT32_C( 609964739), INT32_C( 739061585), INT32_C( 1296320934), INT32_C( 1641387359), INT32_C( 450216201), INT32_C( -102009462), INT32_C( 184231048), INT32_C( 68801332), INT32_C( 1909515723), INT32_C(-1558553543)), simde_mm512_set_epi64(INT64_C( -761468901003769076), INT64_C( -516048658485338655), INT64_C( 5737379338676836508), INT64_C( 992759969955502695), INT64_C(-8479853253989282483), INT64_C( -154616695341869700), INT64_C( 4813592380383132), INT64_C(-3096894189429138445)) }, { simde_mm512_set_epi64(INT64_C(-2621488480535608616), INT64_C(-6848868720227948061), INT64_C( 6279616399573024356), INT64_C( 745095038278958047), INT64_C(-1323215695156753279), INT64_C( -383012613214998281), INT64_C( 1460565887768366290), INT64_C(-5348367197220594908)), UINT8_C( 92), simde_mm512_set_epi32(INT32_C(-1537831012), INT32_C(-1136146129), INT32_C( 928255499), INT32_C( 1369020603), INT32_C( 1021713905), INT32_C(-1374572733), INT32_C( 981266194), INT32_C( -209600569), INT32_C( -856684622), INT32_C( 1444842251), INT32_C(-1223337348), INT32_C(-1314813402), INT32_C( 630708065), INT32_C( 1782361994), INT32_C( 982404882), INT32_C( 968278192)), simde_mm512_set_epi32(INT32_C( -560531037), INT32_C( 2016874130), INT32_C( 1909033660), INT32_C( -288062633), INT32_C( 1926487797), INT32_C(-1384808965), INT32_C( 650303852), INT32_C( 1591608188), INT32_C( 1071082983), INT32_C( 1207794171), INT32_C(-2085192565), INT32_C( 656256578), INT32_C(-1465520335), INT32_C( 2093271192), INT32_C( 315880197), INT32_C( 1596114493)), simde_mm512_set_epi64(INT64_C(-2621488480535608616), INT64_C( -394363679531427699), INT64_C( 6279616399573024356), INT64_C( -333601981829858972), INT64_C( 1745072048772318921), INT64_C( -862854943905058356), INT64_C( 1460565887768366290), INT64_C(-5348367197220594908)) }, { simde_mm512_set_epi64(INT64_C( 1319224608096301911), INT64_C(-6587132379427165760), INT64_C(-1318415648940904266), INT64_C( 5083686936283500523), INT64_C( 2916706726526170303), INT64_C( 1232072806289907439), INT64_C(-4244069429267903156), INT64_C( 1868613955189624367)), UINT8_C( 45), simde_mm512_set_epi32(INT32_C( 1044553244), INT32_C( 448636134), INT32_C( 422274875), INT32_C(-1037497281), INT32_C( 533714637), INT32_C(-1738371545), INT32_C( -17938559), INT32_C(-1389744139), INT32_C( 827695522), INT32_C(-1482919408), INT32_C( 1233158285), INT32_C( 343037625), INT32_C(-1483824200), INT32_C( -901390751), INT32_C( -727066099), INT32_C( -648215186)), simde_mm512_set_epi32(INT32_C( 1981159106), INT32_C( 410835312), INT32_C( 2072880481), INT32_C( 105988514), INT32_C( 751462668), INT32_C( 1834849576), INT32_C( -217803098), INT32_C(-1411746849), INT32_C(-1237635210), INT32_C( -311304150), INT32_C( -986441771), INT32_C( 1680967167), INT32_C( 746636010), INT32_C(-2078030023), INT32_C( 843084787), INT32_C( 759454903)), simde_mm512_set_epi64(INT64_C( 1319224608096301911), INT64_C(-6587132379427165760), INT64_C(-3189650292273714920), INT64_C( 5083686936283500523), INT64_C( 461638965825943200), INT64_C( 576634984670658375), INT64_C(-4244069429267903156), INT64_C( -492290201206756958)) }, { simde_mm512_set_epi64(INT64_C(-1017619325410469279), INT64_C( 7670597165848860921), INT64_C(-5135734722746288063), INT64_C( 8555281953176040262), INT64_C( 2622398452638226743), INT64_C( 2072647407054444460), INT64_C( 5884644356355100584), INT64_C(-3677310731734481669)), UINT8_C(226), simde_mm512_set_epi32(INT32_C( 390006051), INT32_C( 789765807), INT32_C( -514015364), INT32_C( -970761836), INT32_C( -378978470), INT32_C( -73123202), INT32_C(-1325609418), INT32_C( 1232280698), INT32_C( 1916265121), INT32_C( 1820507576), INT32_C( -792248141), INT32_C( -262685644), INT32_C( 1624847858), INT32_C( -403255584), INT32_C( 1568995237), INT32_C( 1227106212)), simde_mm512_set_epi32(INT32_C( -769652371), INT32_C( -261880602), INT32_C( 85687930), INT32_C( 432371064), INT32_C( 1626214727), INT32_C( 1845517289), INT32_C(-2002810442), INT32_C(-2069468881), INT32_C(-1294326872), INT32_C(-1409401131), INT32_C(-1446683671), INT32_C( 2011451607), INT32_C( 1570003547), INT32_C(-1564123603), INT32_C( -200447069), INT32_C( -676297563)), simde_mm512_set_epi64(INT64_C( -206824344976175814), INT64_C( -419729327921913504), INT64_C( -134950133518039378), INT64_C( 8555281953176040262), INT64_C( 2622398452638226743), INT64_C( 2072647407054444460), INT64_C( 630741576975949152), INT64_C(-3677310731734481669)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_mul_epi32(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_mul_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { UINT8_C(138), simde_mm512_set_epi32(INT32_C( 1098716707), INT32_C(-1080185167), INT32_C( 796032668), INT32_C( 1756455873), INT32_C(-1031023150), INT32_C( 313996055), INT32_C(-1552434635), INT32_C( 82580470), INT32_C( -868810524), INT32_C(-1501290792), INT32_C( -628539172), INT32_C( 286404385), INT32_C(-2116183242), INT32_C( 925268541), INT32_C( 1423169798), INT32_C( 472979926)), simde_mm512_set_epi32(INT32_C(-1589762727), INT32_C( 1342398972), INT32_C( -162164967), INT32_C( 1184007139), INT32_C( 1973410894), INT32_C( 837116435), INT32_C(-1912965227), INT32_C( -221173809), INT32_C(-1524627531), INT32_C( 505638542), INT32_C( 1789154769), INT32_C( 1707140994), INT32_C( 111719139), INT32_C( 1287257616), INT32_C(-1103747425), INT32_C( 1951299418)), simde_mm512_set_epi64(INT64_C(-1450039457750448324), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( -759110487184905264), INT64_C( 0), INT64_C( 1191058976247458256), INT64_C( 0)) }, { UINT8_C(226), simde_mm512_set_epi32(INT32_C( 1851172912), INT32_C( 432012768), INT32_C(-1336678725), INT32_C( 141506650), INT32_C( 576471669), INT32_C(-2021849973), INT32_C( 610549751), INT32_C( 470887358), INT32_C( 1210740282), INT32_C( -720782218), INT32_C( 967227355), INT32_C(-1907082749), INT32_C( -376079371), INT32_C( 615957162), INT32_C( 189423181), INT32_C( 750118943)), simde_mm512_set_epi32(INT32_C(-1194827437), INT32_C( 1644918495), INT32_C(-1387747393), INT32_C(-1434123267), INT32_C( 1354817839), INT32_C( 1324343890), INT32_C( -595811004), INT32_C(-1790143018), INT32_C( -914188665), INT32_C( -647124032), INT32_C( 792952903), INT32_C( 2106780254), INT32_C( -65103351), INT32_C( -572558150), INT32_C( -801231269), INT32_C( -650481868)), simde_mm512_set_epi64(INT64_C( 710625792159344160), INT64_C( -202937979200225550), INT64_C(-2677624658239214970), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( -352671293153970300), INT64_C( 0)) }, { UINT8_C(205), simde_mm512_set_epi32(INT32_C( 1389849027), INT32_C( -968733779), INT32_C( -903985535), INT32_C( -505052458), INT32_C( 849997016), INT32_C( -665823569), INT32_C( 843681453), INT32_C( 1241052856), INT32_C( 1218361488), INT32_C( -511802096), INT32_C( 2056154947), INT32_C( -475453332), INT32_C( 1793883682), INT32_C( 1281268084), INT32_C(-1443305318), INT32_C(-2002775301)), simde_mm512_set_epi32(INT32_C( -480306273), INT32_C(-1942698584), INT32_C( 1404753532), INT32_C( 237623409), INT32_C( 1343658265), INT32_C(-1947335016), INT32_C( 1618135889), INT32_C( 726476998), INT32_C( 1324737144), INT32_C( 1048817456), INT32_C( 319312471), INT32_C( 1894816689), INT32_C( 1848939745), INT32_C(-1295730322), INT32_C( 1089929027), INT32_C( -785534579)), simde_mm512_set_epi64(INT64_C( 1881957740736268936), INT64_C( -120012286793789322), INT64_C( 0), INT64_C( 0), INT64_C( -536786972302187776), INT64_C( -900896908314257748), INT64_C( 0), INT64_C( 1573249252902633279)) }, { UINT8_C(206), simde_mm512_set_epi32(INT32_C( 163723168), INT32_C( 94537413), INT32_C( 1298848275), INT32_C( -99870655), INT32_C( 1537532032), INT32_C(-1949556986), INT32_C( -894015664), INT32_C(-1324496729), INT32_C( 850348293), INT32_C( 906352618), INT32_C(-1965873722), INT32_C(-2107953605), INT32_C( 559881293), INT32_C( -5815681), INT32_C(-1173896203), INT32_C( 1760080316)), simde_mm512_set_epi32(INT32_C( 1202706763), INT32_C(-1110213669), INT32_C( 1229627598), INT32_C( -147072860), INT32_C( 1883759514), INT32_C(-1191387298), INT32_C( 1673499534), INT32_C( 640453183), INT32_C(-1171836364), INT32_C( -982522972), INT32_C(-1480196612), INT32_C(-2077854762), INT32_C( -872251595), INT32_C( 1154127488), INT32_C( 896971913), INT32_C(-1585180342)), simde_mm512_set_epi64(INT64_C( -104956728144498297), INT64_C( 14688262860923300), INT64_C( 0), INT64_C( 0), INT64_C( -890512267917340696), INT64_C( 4380021436224317010), INT64_C( -6712037303539328), INT64_C( 0)) }, { UINT8_C(197), simde_mm512_set_epi32(INT32_C( -650041052), INT32_C( -647184441), INT32_C(-1880618021), INT32_C(-1812158288), INT32_C( 264100196), INT32_C( -263091932), INT32_C(-1182134909), INT32_C(-1890025577), INT32_C( 1421768266), INT32_C( 936126513), INT32_C( -213174057), INT32_C( -588951079), INT32_C( 217188364), INT32_C( 1950574682), INT32_C( -530860484), INT32_C( 713083418)), simde_mm512_set_epi32(INT32_C(-1419088193), INT32_C( 155768054), INT32_C( 575537364), INT32_C(-1651547513), INT32_C(-1310582959), INT32_C( 1366625247), INT32_C( 375333442), INT32_C( 516971366), INT32_C( 1204467496), INT32_C(-1684524880), INT32_C( 2029390656), INT32_C( 1244178650), INT32_C(-1765716319), INT32_C( -62663523), INT32_C( 233795696), INT32_C( 614711137)), simde_mm512_set_epi64(INT64_C( -100810660953647814), INT64_C( 2992865513708737744), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( -732760358386263350), INT64_C( 0), INT64_C( 438340318654626266)) }, { UINT8_C(221), simde_mm512_set_epi32(INT32_C(-1439155961), INT32_C( 680420491), INT32_C( 1277792130), INT32_C(-1440597466), INT32_C( -965757835), INT32_C( 2017786190), INT32_C(-1072056911), INT32_C( -897882665), INT32_C( 1375688), INT32_C( 1420052414), INT32_C( -331914389), INT32_C(-1649119241), INT32_C( -706010264), INT32_C( 713383150), INT32_C( 896627462), INT32_C( 1020243588)), simde_mm512_set_epi32(INT32_C( -359263092), INT32_C(-1196237833), INT32_C( 1322331949), INT32_C( 43567177), INT32_C( 414081468), INT32_C( 1462500900), INT32_C( 1902422273), INT32_C( -902219192), INT32_C(-1637598569), INT32_C( -626716515), INT32_C( 1485910176), INT32_C(-1246912099), INT32_C(-1032397276), INT32_C(-1436895618), INT32_C(-1408100359), INT32_C( -678052935)), simde_mm512_set_epi64(INT64_C( -813944733682636003), INT64_C( -62762764786973482), INT64_C( 0), INT64_C( 810086972527106680), INT64_C( -889970300019417210), INT64_C( 2056306734296596859), INT64_C( 0), INT64_C( -691779159258330780)) }, { UINT8_C(176), simde_mm512_set_epi32(INT32_C( 806025559), INT32_C( 277323133), INT32_C(-2040845209), INT32_C( 1514048177), INT32_C( 1299301232), INT32_C( 1804349866), INT32_C( 505045603), INT32_C(-1270991510), INT32_C(-1365476185), INT32_C( -470279784), INT32_C( 1957249393), INT32_C( 966280187), INT32_C( 550173580), INT32_C( 1419279519), INT32_C( 120074737), INT32_C( -623354205)), simde_mm512_set_epi32(INT32_C( 182628708), INT32_C( -711074484), INT32_C( 700640568), INT32_C( -182451726), INT32_C( 1928956599), INT32_C( 1423054326), INT32_C( 1016030809), INT32_C(-1086945734), INT32_C( 1392670038), INT32_C( 3796661), INT32_C( -232044152), INT32_C( 1236628648), INT32_C( 19789106), INT32_C(-2055501126), INT32_C( 1118019036), INT32_C( -365745616)), simde_mm512_set_epi64(INT64_C( -197197403699238372), INT64_C( 0), INT64_C( 2567687882428820316), INT64_C( 1381498799744718340), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, { UINT8_C(158), simde_mm512_set_epi32(INT32_C( 1841667236), INT32_C( 1484771017), INT32_C( -884031658), INT32_C(-1839125718), INT32_C(-1569039961), INT32_C( 1041098150), INT32_C( 1388925681), INT32_C( 863701002), INT32_C( 128435058), INT32_C( -263295419), INT32_C(-1184146866), INT32_C( 1159115917), INT32_C( 866281726), INT32_C(-1295662984), INT32_C( -351675537), INT32_C( -710944336)), simde_mm512_set_epi32(INT32_C(-1968953227), INT32_C( 227585281), INT32_C( -737334168), INT32_C( 1230090038), INT32_C(-1805794302), INT32_C( 1379277168), INT32_C( 744356262), INT32_C(-1333512317), INT32_C( -486348180), INT32_C( 1206532716), INT32_C( 1803086042), INT32_C(-1291499422), INT32_C( 1358104641), INT32_C(-2056773451), INT32_C(-1326911147), INT32_C( 1579123656)), simde_mm512_set_epi64(INT64_C( 337912029124600777), INT64_C( 0), INT64_C( 0), INT64_C(-1151755924372241634), INT64_C( -317674536996428004), INT64_C(-1496997536836499974), INT64_C( 2664885226934637784), INT64_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_mul_epi32(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_mul_epu32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { UINT8_C(166), simde_x_mm512_set_epu32(UINT32_C(4120514587), UINT32_C(1586964835), UINT32_C(1689003642), UINT32_C(2702971618), UINT32_C(2798377561), UINT32_C( 356472812), UINT32_C(2899999566), UINT32_C(3229978818), UINT32_C(1853356574), UINT32_C( 66069374), UINT32_C(1309687627), UINT32_C(1439779852), UINT32_C( 51916795), UINT32_C(2259276195), UINT32_C( 994758469), UINT32_C(3337558808)), simde_x_mm512_set_epu32(UINT32_C(2258908256), UINT32_C(3395137062), UINT32_C(4215249296), UINT32_C(2133148800), UINT32_C(2933274444), UINT32_C(1851927716), UINT32_C(4190919751), UINT32_C(1746820983), UINT32_C( 828520274), UINT32_C( 635229603), UINT32_C(1544367040), UINT32_C(1918376547), UINT32_C( 892222123), UINT32_C(3243361756), UINT32_C( 803359976), UINT32_C( 163739728)), simde_x_mm512_set_epu64(UINT64_C( 5387963127399214770), UINT64_C( 0), UINT64_C( 660161880543257392), UINT64_C( 0), UINT64_C( 0), UINT64_C( 2762039900919931044), UINT64_C( 7327650007104198420), UINT64_C( 0)) }, { UINT8_C(219), simde_x_mm512_set_epu32(UINT32_C(1410010955), UINT32_C( 503921354), UINT32_C(1418189156), UINT32_C( 444221777), UINT32_C( 148285537), UINT32_C(2288722231), UINT32_C( 344338098), UINT32_C(3454728003), UINT32_C(1478480780), UINT32_C(1743148264), UINT32_C(3822764711), UINT32_C(1636469832), UINT32_C(3280064546), UINT32_C( 933016241), UINT32_C(1726799481), UINT32_C(3824577952)), simde_x_mm512_set_epu32(UINT32_C(1037963842), UINT32_C(2512709916), UINT32_C( 937723538), UINT32_C(2708935661), UINT32_C(3914488889), UINT32_C(2550814880), UINT32_C(2900652427), UINT32_C(2176349091), UINT32_C(3028439158), UINT32_C(2773320535), UINT32_C( 444719300), UINT32_C(3233757255), UINT32_C(1004532908), UINT32_C( 43390785), UINT32_C( 738451500), UINT32_C(3536836475)), simde_x_mm512_set_epu64(UINT64_C( 1266208183079946264), UINT64_C( 1203368213108089597), UINT64_C( 0), UINT64_C( 7518694148981295273), UINT64_C( 4834308876100801240), UINT64_C( 0), UINT64_C( 40484307114739185), UINT64_C(13526906802114399200)) }, { UINT8_C(194), simde_x_mm512_set_epu32(UINT32_C(1176316177), UINT32_C(1751826934), UINT32_C(3378345958), UINT32_C( 543404964), UINT32_C(2579785136), UINT32_C(2416322328), UINT32_C( 75139728), UINT32_C(2416880998), UINT32_C(4234686409), UINT32_C(2660004756), UINT32_C(2106185379), UINT32_C( 797059438), UINT32_C(2372191392), UINT32_C( 269555244), UINT32_C(1767269404), UINT32_C(1625455101)), simde_x_mm512_set_epu32(UINT32_C(1851434759), UINT32_C(3604871411), UINT32_C(1327258228), UINT32_C(1673018657), UINT32_C( 305970996), UINT32_C(2819644549), UINT32_C(3721065063), UINT32_C(1020891301), UINT32_C(4202682046), UINT32_C(4200645833), UINT32_C( 117038385), UINT32_C(3020070100), UINT32_C(1353160823), UINT32_C( 440057333), UINT32_C(3056423613), UINT32_C(1896622192)), simde_x_mm512_set_epu64(UINT64_C( 6315110831396383874), UINT64_C( 909126643078413348), UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C( 118619761770804252), UINT64_C( 0)) }, { UINT8_C( 67), simde_x_mm512_set_epu32(UINT32_C(2693954212), UINT32_C( 277998850), UINT32_C(4169077113), UINT32_C(3680111513), UINT32_C(4156583103), UINT32_C(4105987148), UINT32_C( 11818996), UINT32_C( 514873926), UINT32_C(1191268288), UINT32_C(3638344486), UINT32_C(2361786195), UINT32_C( 500533201), UINT32_C(3058957194), UINT32_C( 190737734), UINT32_C(3837187385), UINT32_C(4003123598)), simde_x_mm512_set_epu32(UINT32_C(4122956852), UINT32_C(3007076678), UINT32_C(1011742851), UINT32_C( 831857768), UINT32_C(2217989187), UINT32_C(3681606305), UINT32_C(3147415754), UINT32_C( 236426985), UINT32_C( 216160186), UINT32_C(3279967715), UINT32_C(3874145825), UINT32_C(2203854710), UINT32_C(3676418261), UINT32_C( 401038296), UINT32_C(3825112812), UINT32_C(2929607534)), simde_x_mm512_set_epu64(UINT64_C( 0), UINT64_C( 3061329349195282984), UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C( 76493135826261264), UINT64_C(11727581052233987332)) }, { UINT8_C(169), simde_x_mm512_set_epu32(UINT32_C( 428651380), UINT32_C(1050238262), UINT32_C(4167120113), UINT32_C( 669121916), UINT32_C(1457539263), UINT32_C(3520615042), UINT32_C(1174118849), UINT32_C(1102257957), UINT32_C(1414101989), UINT32_C(3097425534), UINT32_C(1024087984), UINT32_C(1792583521), UINT32_C(3354797839), UINT32_C( 580554502), UINT32_C(1472515666), UINT32_C(3870057603)), simde_x_mm512_set_epu32(UINT32_C(2083050486), UINT32_C(1796942025), UINT32_C( 755961532), UINT32_C(2214717680), UINT32_C(3706324798), UINT32_C(1039769945), UINT32_C(3555811997), UINT32_C( 761202637), UINT32_C(1863011574), UINT32_C(1454498620), UINT32_C( 958628441), UINT32_C(2380256526), UINT32_C(4218133731), UINT32_C(3449338768), UINT32_C(3115502206), UINT32_C(2279816507)), simde_x_mm512_set_epu64(UINT64_C( 1887217269250760550), UINT64_C( 0), UINT64_C( 3660629708586512690), UINT64_C( 0), UINT64_C( 4505201164755763080), UINT64_C( 0), UINT64_C( 0), UINT64_C( 8823021206360252721)) }, { UINT8_C(203), simde_x_mm512_set_epu32(UINT32_C( 968785729), UINT32_C(3446816529), UINT32_C(1989948608), UINT32_C(3935090572), UINT32_C(2260595137), UINT32_C(3809743538), UINT32_C(1768049062), UINT32_C(1253090843), UINT32_C(4000901225), UINT32_C(2487234584), UINT32_C( 840765913), UINT32_C(1202598978), UINT32_C(2886819484), UINT32_C(2063363126), UINT32_C(2370412425), UINT32_C(1978444200)), simde_x_mm512_set_epu32(UINT32_C(1758358159), UINT32_C( 259726788), UINT32_C(1062244813), UINT32_C(1397736159), UINT32_C(1484315275), UINT32_C(2101001099), UINT32_C(2659688367), UINT32_C(1816554597), UINT32_C(1863116741), UINT32_C(3211066307), UINT32_C( 496281550), UINT32_C(3010953410), UINT32_C(1914417911), UINT32_C(1058492483), UINT32_C(1785378717), UINT32_C(1261746977)), simde_x_mm512_set_epu64(UINT64_C( 895230585902478852), UINT64_C( 5500218381424392948), UINT64_C( 0), UINT64_C( 0), UINT64_C( 7986675170287561288), UINT64_C( 0), UINT64_C( 2184054358570381858), UINT64_C( 2496295988513183400)) }, { UINT8_C( 89), simde_x_mm512_set_epu32(UINT32_C( 244202415), UINT32_C(1696418382), UINT32_C(4253734840), UINT32_C(1521382913), UINT32_C(2523120367), UINT32_C( 719365215), UINT32_C( 746887847), UINT32_C( 329869757), UINT32_C(2935442647), UINT32_C(3965449572), UINT32_C( 2046702), UINT32_C(3055578856), UINT32_C(2614828885), UINT32_C(2261447742), UINT32_C( 379053160), UINT32_C(1474182998)), simde_x_mm512_set_epu32(UINT32_C( 180314942), UINT32_C(3784268734), UINT32_C(2189933725), UINT32_C(1759707651), UINT32_C(4017470040), UINT32_C( 528482752), UINT32_C(2637497058), UINT32_C(3574995683), UINT32_C(2110412704), UINT32_C( 661885013), UINT32_C(3935066909), UINT32_C( 163101530), UINT32_C(3963037657), UINT32_C( 399559486), UINT32_C( 875430591), UINT32_C(1854318955)), simde_x_mm512_set_epu64(UINT64_C( 0), UINT64_C( 2677189152106767363), UINT64_C( 0), UINT64_C( 1179282957227259031), UINT64_C( 2624671641514064436), UINT64_C( 0), UINT64_C( 0), UINT64_C( 2733605476330127090)) }, { UINT8_C( 27), simde_x_mm512_set_epu32(UINT32_C(3300721541), UINT32_C(3440866090), UINT32_C(3838602911), UINT32_C(1016597887), UINT32_C( 287068752), UINT32_C(1521867279), UINT32_C(2420112012), UINT32_C(2417142414), UINT32_C( 344709524), UINT32_C(1803316517), UINT32_C( 467213234), UINT32_C( 327864893), UINT32_C(2661940215), UINT32_C(4228328219), UINT32_C(1396080639), UINT32_C(4001917131)), simde_x_mm512_set_epu32(UINT32_C(4155157678), UINT32_C(1774567103), UINT32_C(1949309963), UINT32_C( 729844445), UINT32_C(2587732272), UINT32_C( 138621029), UINT32_C(2703994882), UINT32_C(1904478113), UINT32_C(2402800240), UINT32_C( 959065024), UINT32_C(2558227042), UINT32_C(3067418732), UINT32_C(1398342314), UINT32_C(3263383247), UINT32_C(3963437622), UINT32_C(1585677583)), simde_x_mm512_set_epu64(UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C( 4603394823466984782), UINT64_C( 1729497798656201408), UINT64_C( 0), UINT64_C(13798655472701947093), UINT64_C( 6345750283650374373)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_mul_epu32(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_u64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mul_epu32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_x_mm512_set_epu32(UINT32_C( 768255153), UINT32_C(3116504916), UINT32_C(2849349603), UINT32_C(3380602699), UINT32_C(3667150171), UINT32_C(2606748140), UINT32_C( 256440763), UINT32_C(4236376754), UINT32_C( 137611130), UINT32_C(3608004165), UINT32_C( 23379469), UINT32_C( 634104346), UINT32_C( 752813209), UINT32_C( 304980816), UINT32_C( 873141861), UINT32_C( 42552677)), simde_x_mm512_set_epu32(UINT32_C(3537218346), UINT32_C( 554508152), UINT32_C(2540529856), UINT32_C(1492162297), UINT32_C(4027600118), UINT32_C( 897760677), UINT32_C( 832131936), UINT32_C(1226979254), UINT32_C(2823494192), UINT32_C(1370605687), UINT32_C(3043623483), UINT32_C( 310819447), UINT32_C(3262156609), UINT32_C(4133822383), UINT32_C( 153673388), UINT32_C( 507486240)), simde_x_mm512_set_epu64(UINT64_C( 1728127381670075232), UINT64_C( 5044407888584239603), UINT64_C( 2340235974934890780), UINT64_C( 5197946389285861516), UINT64_C( 4945151027268686355), UINT64_C( 197091962164016662), UINT64_C( 1260736523566404528), UINT64_C( 21594898052664480)) }, { simde_x_mm512_set_epu32(UINT32_C(3038228522), UINT32_C(2177263565), UINT32_C(1580156717), UINT32_C(3293644153), UINT32_C(3877520946), UINT32_C(1914222601), UINT32_C(2300352870), UINT32_C(3239916612), UINT32_C(2045429998), UINT32_C(2834457902), UINT32_C(2502406118), UINT32_C(3874567768), UINT32_C(1624909929), UINT32_C( 552025498), UINT32_C(3781080866), UINT32_C( 445279347)), simde_x_mm512_set_epu32(UINT32_C(3686614578), UINT32_C(2012284249), UINT32_C(4065027833), UINT32_C(2391570441), UINT32_C(2365238876), UINT32_C(1345195249), UINT32_C(3876854758), UINT32_C(2935715346), UINT32_C( 298066676), UINT32_C( 616522972), UINT32_C(2787002250), UINT32_C(2890879290), UINT32_C(2445030057), UINT32_C(1418356119), UINT32_C( 876914337), UINT32_C(2884175418)), simde_x_mm512_set_epu64(UINT64_C( 4381273177771087685), UINT64_C( 7876981999487281473), UINT64_C( 2575003148393622649), UINT64_C( 9511472917608727752), UINT64_C( 1747508409749924744), UINT64_C(11200907718212724720), UINT64_C( 782968742932322262), UINT64_C( 1284263746760492046)) }, { simde_x_mm512_set_epu32(UINT32_C(1327609198), UINT32_C(1911763444), UINT32_C(3596479631), UINT32_C(3933061513), UINT32_C(3463498323), UINT32_C( 687111330), UINT32_C( 635542403), UINT32_C(1153148129), UINT32_C(3815988413), UINT32_C(2677217701), UINT32_C( 271158343), UINT32_C(1601359912), UINT32_C(4276983578), UINT32_C(2555948345), UINT32_C(1846034446), UINT32_C(1127199678)), simde_x_mm512_set_epu32(UINT32_C( 657487984), UINT32_C(2570668084), UINT32_C( 753507331), UINT32_C(1705841966), UINT32_C(3937986766), UINT32_C(3019564702), UINT32_C(2409588030), UINT32_C( 467930148), UINT32_C(4115653696), UINT32_C(3587871831), UINT32_C(1753201197), UINT32_C( 778129766), UINT32_C(1742436576), UINT32_C(2505253535), UINT32_C(2950628411), UINT32_C(2064165361)), simde_x_mm512_set_epu64(UINT64_C( 4914509269648721296), UINT64_C( 6709181383734854558), UINT64_C( 2074777118412273660), UINT64_C( 539592774668893092), UINT64_C( 9605513974872480531), UINT64_C( 1246065813606340592), UINT64_C( 6403298626588649575), UINT64_C( 2326726530257953758)) }, { simde_x_mm512_set_epu32(UINT32_C(2919345837), UINT32_C(4236345846), UINT32_C(3784567990), UINT32_C(4027374119), UINT32_C(3288320277), UINT32_C( 390224653), UINT32_C(3910835486), UINT32_C( 865013699), UINT32_C(2748043226), UINT32_C( 449666617), UINT32_C( 232557914), UINT32_C( 838031623), UINT32_C(3874792609), UINT32_C( 112848728), UINT32_C( 173110782), UINT32_C(1917463852)), simde_x_mm512_set_epu32(UINT32_C(1885981625), UINT32_C( 167105269), UINT32_C(1642818873), UINT32_C( 795737603), UINT32_C( 583404702), UINT32_C(1590901338), UINT32_C(2535047020), UINT32_C(1665933152), UINT32_C( 271178891), UINT32_C( 492170774), UINT32_C(4067923128), UINT32_C(2457242362), UINT32_C(4206213856), UINT32_C(1232486026), UINT32_C(2718472187), UINT32_C(4168006676)), simde_x_mm512_set_epu64(UINT64_C( 707915712172862574), UINT64_C( 3204733027837296757), UINT64_C( 620808922578285714), UINT64_C( 1441054998098249248), UINT64_C( 221312766930851558), UINT64_C( 2059246804731213526), UINT64_C( 139084480311874928), UINT64_C( 7992002136124675952)) }, { simde_x_mm512_set_epu32(UINT32_C( 27104904), UINT32_C(4150065749), UINT32_C(3990632930), UINT32_C( 634032004), UINT32_C(2048919564), UINT32_C(1865014244), UINT32_C( 549754386), UINT32_C(2522098959), UINT32_C(2696620961), UINT32_C( 891563523), UINT32_C(2188909902), UINT32_C(2179241133), UINT32_C(1743310130), UINT32_C( 377093787), UINT32_C(2755680804), UINT32_C(3712100521)), simde_x_mm512_set_epu32(UINT32_C( 672807047), UINT32_C(2773804867), UINT32_C(4088841569), UINT32_C( 619049193), UINT32_C( 593052350), UINT32_C( 730103388), UINT32_C(1414198306), UINT32_C(4002179273), UINT32_C(1269785901), UINT32_C( 747703241), UINT32_C( 347333415), UINT32_C( 968811996), UINT32_C(4236163540), UINT32_C(2123412997), UINT32_C( 33837735), UINT32_C(1851248845)), simde_x_mm512_set_epu64(UINT64_C(11511472572946200383), UINT64_C( 392497000412372772), UINT64_C( 1361653218212658672), UINT64_C(10093892178164676807), UINT64_C( 666624935704478043), UINT64_C( 2111274951827031468), UINT64_C( 800725848403749639), UINT64_C( 6872021802025148245)) }, { simde_x_mm512_set_epu32(UINT32_C(3744995587), UINT32_C(2704878999), UINT32_C(2216207729), UINT32_C(3174220609), UINT32_C(2276590134), UINT32_C( 284809778), UINT32_C(2003404586), UINT32_C(1707085270), UINT32_C(2713648433), UINT32_C(2786430472), UINT32_C( 397019195), UINT32_C( 630796576), UINT32_C(1959866953), UINT32_C( 629006272), UINT32_C(2429347726), UINT32_C(3247824799)), simde_x_mm512_set_epu32(UINT32_C(3020299794), UINT32_C(2488516068), UINT32_C(3326847413), UINT32_C(1426347053), UINT32_C(3015511399), UINT32_C( 258677619), UINT32_C(3923020384), UINT32_C( 835454201), UINT32_C(3228303109), UINT32_C( 994730831), UINT32_C(2437482082), UINT32_C(1004732602), UINT32_C(3078918689), UINT32_C(1633253517), UINT32_C(1920589043), UINT32_C(3888518352)), simde_x_mm512_set_epu64(UINT64_C( 6731134851007255932), UINT64_C( 4527540211219015277), UINT64_C( 73673915240958582), UINT64_C( 1426191560286719270), UINT64_C( 2771748298936282232), UINT64_C( 633781885137170752), UINT64_C( 1027326705959058624), UINT64_C(12629226334992211248)) }, { simde_x_mm512_set_epu32(UINT32_C( 237961802), UINT32_C(1124052031), UINT32_C(3408632402), UINT32_C(1936321731), UINT32_C(3188356992), UINT32_C( 413227284), UINT32_C(1767960975), UINT32_C(2214647351), UINT32_C(4011124733), UINT32_C(3189426671), UINT32_C(3040561164), UINT32_C(3376223700), UINT32_C(2268266209), UINT32_C( 155837480), UINT32_C(1377610501), UINT32_C(1504228568)), simde_x_mm512_set_epu32(UINT32_C(1573768507), UINT32_C( 476780671), UINT32_C(2153500842), UINT32_C(1201914669), UINT32_C(1130822801), UINT32_C(3370243267), UINT32_C(1286308912), UINT32_C(2062398363), UINT32_C(1095401713), UINT32_C(4089334856), UINT32_C(2597794703), UINT32_C(2139321595), UINT32_C(2505322640), UINT32_C(2764790171), UINT32_C(3415336749), UINT32_C( 59419438)), simde_x_mm512_set_epu64(UINT64_C( 535926281579092801), UINT64_C( 2327293492392372039), UINT64_C( 1392676471641696828), UINT64_C( 4567485071324686413), UINT64_C(13042633656376344376), UINT64_C( 7222828270960801500), UINT64_C( 430857932977409080), UINT64_C( 89380416134104784)) }, { simde_x_mm512_set_epu32(UINT32_C( 493235400), UINT32_C( 189383962), UINT32_C(2622533649), UINT32_C( 943550019), UINT32_C( 227224723), UINT32_C(1724057992), UINT32_C(4133039778), UINT32_C(3416450213), UINT32_C(1064097074), UINT32_C(1615527431), UINT32_C( 106890087), UINT32_C(3131878508), UINT32_C(4228916541), UINT32_C(2298347901), UINT32_C(2681451816), UINT32_C( 956711717)), simde_x_mm512_set_epu32(UINT32_C(1959431707), UINT32_C(3425635109), UINT32_C(3493232750), UINT32_C(2950665544), UINT32_C(1223627161), UINT32_C(3625235337), UINT32_C( 456501342), UINT32_C(2245318318), UINT32_C(3915087897), UINT32_C(4086538960), UINT32_C(2510683850), UINT32_C(3689243003), UINT32_C(2734380582), UINT32_C(3715382302), UINT32_C(3353219492), UINT32_C(1828163673)), simde_x_mm512_set_epu64(UINT64_C( 648760349308721858), UINT64_C( 2784100530103845336), UINT64_C( 6250115955635663304), UINT64_C( 7671018245783901734), UINT64_C( 6601915787730211760), UINT64_C(11554260871885079524), UINT64_C( 8539241115214248102), UINT64_C( 1749025606552856541)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mul_epu32(test_vec[i].a, test_vec[i].b); simde_assert_m512i_u64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_mul_epu32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask8 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_x_mm512_set_epu64(UINT64_C(11617731129322750966), UINT64_C( 2428231924375211538), UINT64_C(14175135673172244792), UINT64_C( 5480651963328574733), UINT64_C(12032129819668007160), UINT64_C( 4424822542185790875), UINT64_C(14867552050688999361), UINT64_C(18178319731812771647)), UINT8_C(193), simde_x_mm512_set_epu32(UINT32_C(3263550415), UINT32_C(3722438839), UINT32_C(2250736680), UINT32_C(1691034658), UINT32_C(2554266733), UINT32_C(3217520562), UINT32_C(1461265118), UINT32_C( 788514619), UINT32_C(2189225773), UINT32_C(1631863219), UINT32_C(1750226365), UINT32_C( 568487836), UINT32_C(2400146531), UINT32_C(3640095823), UINT32_C( 206125598), UINT32_C( 929630688)), simde_x_mm512_set_epu32(UINT32_C(2599352978), UINT32_C(2499798093), UINT32_C(1296552276), UINT32_C( 187401912), UINT32_C(2689124664), UINT32_C(3631388300), UINT32_C( 697343737), UINT32_C(2062287739), UINT32_C(2446945975), UINT32_C( 568195668), UINT32_C( 631862630), UINT32_C( 893101483), UINT32_C( 859175541), UINT32_C( 104178488), UINT32_C(4045255037), UINT32_C(4203900757)), simde_x_mm512_set_epu64(UINT64_C( 9305345511041334027), UINT64_C( 316903128167466096), UINT64_C(14175135673172244792), UINT64_C( 5480651963328574733), UINT64_C(12032129819668007160), UINT64_C( 4424822542185790875), UINT64_C(14867552050688999361), UINT64_C( 3908075153013630816)) }, { simde_x_mm512_set_epu64(UINT64_C(13472523368263323530), UINT64_C( 9759174632444686247), UINT64_C(13193200280295594145), UINT64_C( 61830035715779390), UINT64_C( 3749566472430999385), UINT64_C( 4387274564618060685), UINT64_C( 6292382073951294857), UINT64_C( 2998314933539498774)), UINT8_C( 51), simde_x_mm512_set_epu32(UINT32_C(2328158325), UINT32_C(4044751844), UINT32_C(3983880162), UINT32_C(1865776280), UINT32_C( 663966634), UINT32_C(3835216878), UINT32_C(1416309334), UINT32_C(3569688362), UINT32_C( 671765842), UINT32_C(4201434406), UINT32_C(2145277385), UINT32_C( 232005336), UINT32_C(3364267605), UINT32_C(1204199577), UINT32_C(1905702483), UINT32_C(1405245895)), simde_x_mm512_set_epu32(UINT32_C(1978530737), UINT32_C(3532128238), UINT32_C(2586558058), UINT32_C(2555375701), UINT32_C(1328514887), UINT32_C(2070014178), UINT32_C(2738790052), UINT32_C(1824660691), UINT32_C(3424488035), UINT32_C(3798301173), UINT32_C( 335648721), UINT32_C( 829536855), UINT32_C( 101359129), UINT32_C( 500535839), UINT32_C(3768468917), UINT32_C(3000753624)), simde_x_mm512_set_epu64(UINT64_C(13472523368263323530), UINT64_C( 9759174632444686247), UINT64_C( 7938953313164896284), UINT64_C( 6513470033261578142), UINT64_C( 3749566472430999385), UINT64_C( 4387274564618060685), UINT64_C( 602745045597140103), UINT64_C( 4216796712032373480)) }, { simde_x_mm512_set_epu64(UINT64_C( 9490244949648135949), UINT64_C( 3952247228721925392), UINT64_C( 4800241040971682796), UINT64_C( 9619996883527725324), UINT64_C(15935750477416943804), UINT64_C( 545362928884482916), UINT64_C(13559318363578452842), UINT64_C( 7722701545450284407)), UINT8_C(134), simde_x_mm512_set_epu32(UINT32_C( 500898194), UINT32_C(4078085990), UINT32_C(2494049110), UINT32_C(1592224201), UINT32_C( 111635698), UINT32_C( 186713), UINT32_C(1765622469), UINT32_C(4017148467), UINT32_C(2543052619), UINT32_C(1161807732), UINT32_C(1925351794), UINT32_C(2298119068), UINT32_C( 457010151), UINT32_C(2589010019), UINT32_C( 502276479), UINT32_C(1967748710)), simde_x_mm512_set_epu32(UINT32_C(1919012105), UINT32_C(2908857333), UINT32_C(1122604656), UINT32_C(3433647442), UINT32_C(2386428500), UINT32_C( 463161035), UINT32_C( 504317420), UINT32_C(3353921428), UINT32_C(1582348389), UINT32_C(1180932658), UINT32_C(1476554796), UINT32_C(2783736621), UINT32_C( 330646602), UINT32_C(1769150036), UINT32_C(2020624655), UINT32_C(3683994282)), simde_x_mm512_set_epu64(UINT64_C(11862570336616064670), UINT64_C( 3952247228721925392), UINT64_C( 4800241040971682796), UINT64_C( 9619996883527725324), UINT64_C(15935750477416943804), UINT64_C( 6397358209009989228), UINT64_C( 4580347168318210684), UINT64_C( 7722701545450284407)) }, { simde_x_mm512_set_epu64(UINT64_C( 8956593975554634232), UINT64_C( 9593792923362730078), UINT64_C( 751700862087837721), UINT64_C( 7205298436209283097), UINT64_C( 7151721520472513082), UINT64_C( 8910303953543094872), UINT64_C( 533657364826431938), UINT64_C(13265804505255182490)), UINT8_C(251), simde_x_mm512_set_epu32(UINT32_C(1468179080), UINT32_C(3172744829), UINT32_C(1457928522), UINT32_C(1192418034), UINT32_C( 105193191), UINT32_C( 430546192), UINT32_C(1509518002), UINT32_C( 354607881), UINT32_C(3139371107), UINT32_C(2393204313), UINT32_C(1496510794), UINT32_C(3916080313), UINT32_C(3933358732), UINT32_C(2965437178), UINT32_C(2440098689), UINT32_C( 675981365)), simde_x_mm512_set_epu32(UINT32_C(3471092536), UINT32_C(4213288110), UINT32_C(3288478343), UINT32_C( 269318758), UINT32_C(2757016548), UINT32_C( 404238758), UINT32_C(3038240298), UINT32_C(3153052129), UINT32_C(1906833283), UINT32_C(1593207408), UINT32_C( 59630942), UINT32_C(3403525194), UINT32_C(2850644791), UINT32_C(1343686045), UINT32_C(1484433553), UINT32_C(2500647723)), simde_x_mm512_set_epu64(UINT64_C(13367688064089683190), UINT64_C( 321140543933681772), UINT64_C( 174043457915709536), UINT64_C( 1118097134147228649), UINT64_C( 3812870840329150704), UINT64_C( 8910303953543094872), UINT64_C( 3984616553402781010), UINT64_C( 1690391261177681895)) }, { simde_x_mm512_set_epu64(UINT64_C( 9350173910558210368), UINT64_C( 8451791018593404629), UINT64_C( 5111327021160397113), UINT64_C( 8067526547900849939), UINT64_C( 4397190784689926414), UINT64_C(15730677711069966608), UINT64_C(15155555711952095903), UINT64_C( 4601095961680188139)), UINT8_C( 77), simde_x_mm512_set_epu32(UINT32_C( 289816884), UINT32_C(3139170300), UINT32_C(2611086568), UINT32_C( 981312265), UINT32_C(2310975133), UINT32_C(1423285786), UINT32_C(4145860146), UINT32_C( 849682935), UINT32_C(3098522529), UINT32_C(1778767618), UINT32_C(2469498326), UINT32_C(3407697658), UINT32_C(4257688348), UINT32_C(1327333484), UINT32_C(4206795397), UINT32_C(1498113253)), simde_x_mm512_set_epu32(UINT32_C(3119696014), UINT32_C( 934565143), UINT32_C( 659452226), UINT32_C(3987623713), UINT32_C(3941044651), UINT32_C(3075534691), UINT32_C( 348385654), UINT32_C(3299605274), UINT32_C( 734145932), UINT32_C(2544261168), UINT32_C(1332327027), UINT32_C(3348556299), UINT32_C(3524888946), UINT32_C(2026143937), UINT32_C(3684996090), UINT32_C(3613250397)), simde_x_mm512_set_epu64(UINT64_C( 9350173910558210368), UINT64_C( 3913104057771739945), UINT64_C( 5111327021160397113), UINT64_C( 8067526547900849939), UINT64_C( 4525649377373257824), UINT64_C(11410867457783447742), UINT64_C(15155555711952095903), UINT64_C( 5413058306153211441)) }, { simde_x_mm512_set_epu64(UINT64_C(16773967285187515106), UINT64_C( 3477992427783883408), UINT64_C( 1002604261497217766), UINT64_C( 1352136840172993944), UINT64_C(10899831745595212891), UINT64_C( 2236619794744991665), UINT64_C( 4130838651210953091), UINT64_C(12223797258734177268)), UINT8_C(188), simde_x_mm512_set_epu32(UINT32_C(3198361131), UINT32_C(1105007823), UINT32_C(3912476736), UINT32_C(2446995251), UINT32_C(2582844574), UINT32_C(2764726563), UINT32_C(1724064135), UINT32_C( 994964469), UINT32_C( 257756540), UINT32_C( 980676724), UINT32_C(2274290616), UINT32_C(4142129112), UINT32_C(1407509141), UINT32_C(1593753754), UINT32_C(3346961920), UINT32_C(3859603033)), simde_x_mm512_set_epu32(UINT32_C(1883521655), UINT32_C( 997816392), UINT32_C( 186891147), UINT32_C(2813182686), UINT32_C(3818488413), UINT32_C(1262292349), UINT32_C(2615667202), UINT32_C(3433123518), UINT32_C(2317895719), UINT32_C(1795398992), UINT32_C(1050555729), UINT32_C(1774700454), UINT32_C( 350094657), UINT32_C(2222937199), UINT32_C( 539823167), UINT32_C( 663093254)), simde_x_mm512_set_epu64(UINT64_C( 1102594919077634616), UINT64_C( 3477992427783883408), UINT64_C( 3489893187551966487), UINT64_C( 3415835918098281942), UINT64_C( 1760706001747462208), UINT64_C( 7351038415593016848), UINT64_C( 4130838651210953091), UINT64_C(12223797258734177268)) }, { simde_x_mm512_set_epu64(UINT64_C( 9161315007163903385), UINT64_C( 901926328951971839), UINT64_C(11374042021460658344), UINT64_C(14235844241233139061), UINT64_C(16689996302050367513), UINT64_C(17811135944692719319), UINT64_C( 7952138000462838282), UINT64_C(15106420877923679668)), UINT8_C(197), simde_x_mm512_set_epu32(UINT32_C(1215894565), UINT32_C( 325247992), UINT32_C(3808486726), UINT32_C(3829410744), UINT32_C(1276796092), UINT32_C( 483034698), UINT32_C(3265794508), UINT32_C( 145210622), UINT32_C(4212031611), UINT32_C(3325547336), UINT32_C(1445017193), UINT32_C(2689093900), UINT32_C(4273435877), UINT32_C( 524026689), UINT32_C(3618756570), UINT32_C(3961201514)), simde_x_mm512_set_epu32(UINT32_C(3460615822), UINT32_C(2842020471), UINT32_C(1351189519), UINT32_C(2329879373), UINT32_C(3974357402), UINT32_C(2816300347), UINT32_C( 773721318), UINT32_C(3997442937), UINT32_C(2436503902), UINT32_C(3242344117), UINT32_C(1149812233), UINT32_C( 907108945), UINT32_C(1385675283), UINT32_C(3399903430), UINT32_C(2550192792), UINT32_C(3214774192)), simde_x_mm512_set_epu64(UINT64_C( 924361451415644232), UINT64_C( 8922065103190183512), UINT64_C(11374042021460658344), UINT64_C(14235844241233139061), UINT64_C(16689996302050367513), UINT64_C( 2439301130634935500), UINT64_C( 7952138000462838282), UINT64_C(12734368396518526688)) }, { simde_x_mm512_set_epu64(UINT64_C(10381435592908454864), UINT64_C( 7683972863259161915), UINT64_C( 312335983814548083), UINT64_C( 3934167861393427795), UINT64_C(15803008790257017530), UINT64_C(12384685209313245301), UINT64_C(17881738201070197485), UINT64_C(14224003016858721277)), UINT8_C( 76), simde_x_mm512_set_epu32(UINT32_C(3028673683), UINT32_C(2581675996), UINT32_C(3969199228), UINT32_C(1709618805), UINT32_C(3286547215), UINT32_C(2496179327), UINT32_C(2647114121), UINT32_C(2818621113), UINT32_C( 879830851), UINT32_C(3024057012), UINT32_C( 247658746), UINT32_C(1778653183), UINT32_C( 608002580), UINT32_C(2912110970), UINT32_C(2119947745), UINT32_C( 102275654)), simde_x_mm512_set_epu32(UINT32_C(3762799031), UINT32_C(1035026982), UINT32_C( 282468805), UINT32_C( 635023104), UINT32_C(1863059331), UINT32_C(4265385561), UINT32_C( 804673998), UINT32_C(2920963576), UINT32_C(1218801842), UINT32_C(1010987093), UINT32_C(3172703974), UINT32_C(1792395250), UINT32_C(3430253324), UINT32_C( 714780216), UINT32_C(4029344470), UINT32_C(2590869425)), simde_x_mm512_set_epu64(UINT64_C(10381435592908454864), UINT64_C( 1085647440207870720), UINT64_C( 312335983814548083), UINT64_C( 3934167861393427795), UINT64_C( 3057282607628146116), UINT64_C( 3188049516606580750), UINT64_C(17881738201070197485), UINT64_C(14224003016858721277)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_mul_epu32(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_u64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mul_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__m512 b; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -775.40), SIMDE_FLOAT32_C( -210.92), SIMDE_FLOAT32_C( 987.42), SIMDE_FLOAT32_C( 542.45), SIMDE_FLOAT32_C( -745.60), SIMDE_FLOAT32_C( -50.38), SIMDE_FLOAT32_C( 163.82), SIMDE_FLOAT32_C( -164.62), SIMDE_FLOAT32_C( -736.65), SIMDE_FLOAT32_C( -764.30), SIMDE_FLOAT32_C( 675.25), SIMDE_FLOAT32_C( -182.15), SIMDE_FLOAT32_C( -748.44), SIMDE_FLOAT32_C( 82.10), SIMDE_FLOAT32_C( 684.52), SIMDE_FLOAT32_C( -343.09)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 263.91), SIMDE_FLOAT32_C( -350.89), SIMDE_FLOAT32_C( -318.01), SIMDE_FLOAT32_C( -980.00), SIMDE_FLOAT32_C( 872.18), SIMDE_FLOAT32_C( 80.96), SIMDE_FLOAT32_C( 145.89), SIMDE_FLOAT32_C( 832.89), SIMDE_FLOAT32_C( -267.96), SIMDE_FLOAT32_C( -536.57), SIMDE_FLOAT32_C( -934.00), SIMDE_FLOAT32_C( 653.62), SIMDE_FLOAT32_C( 984.11), SIMDE_FLOAT32_C( 140.30), SIMDE_FLOAT32_C( -580.05), SIMDE_FLOAT32_C( -915.75)), simde_mm512_set_ps(SIMDE_FLOAT32_C(-204635.83), SIMDE_FLOAT32_C( 74009.72), SIMDE_FLOAT32_C(-314009.44), SIMDE_FLOAT32_C(-531601.00), SIMDE_FLOAT32_C(-650297.38), SIMDE_FLOAT32_C( -4078.76), SIMDE_FLOAT32_C( 23899.70), SIMDE_FLOAT32_C(-137110.34), SIMDE_FLOAT32_C(197392.73), SIMDE_FLOAT32_C(410100.44), SIMDE_FLOAT32_C(-630683.50), SIMDE_FLOAT32_C(-119056.88), SIMDE_FLOAT32_C(-736547.25), SIMDE_FLOAT32_C( 11518.63), SIMDE_FLOAT32_C(-397055.84), SIMDE_FLOAT32_C(314184.66)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -910.74), SIMDE_FLOAT32_C( -302.10), SIMDE_FLOAT32_C( 937.08), SIMDE_FLOAT32_C( 618.13), SIMDE_FLOAT32_C( 85.12), SIMDE_FLOAT32_C( 3.50), SIMDE_FLOAT32_C( -122.84), SIMDE_FLOAT32_C( 290.22), SIMDE_FLOAT32_C( 606.76), SIMDE_FLOAT32_C( -664.92), SIMDE_FLOAT32_C( 454.81), SIMDE_FLOAT32_C( 299.40), SIMDE_FLOAT32_C( -524.63), SIMDE_FLOAT32_C( 40.68), SIMDE_FLOAT32_C( 218.77), SIMDE_FLOAT32_C( 35.82)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 392.21), SIMDE_FLOAT32_C( 139.00), SIMDE_FLOAT32_C( -878.97), SIMDE_FLOAT32_C( 778.57), SIMDE_FLOAT32_C( -810.83), SIMDE_FLOAT32_C( 413.49), SIMDE_FLOAT32_C( 505.44), SIMDE_FLOAT32_C( 291.58), SIMDE_FLOAT32_C( -757.25), SIMDE_FLOAT32_C( 594.07), SIMDE_FLOAT32_C( 304.96), SIMDE_FLOAT32_C( -155.47), SIMDE_FLOAT32_C( 635.03), SIMDE_FLOAT32_C( 654.85), SIMDE_FLOAT32_C( 777.61), SIMDE_FLOAT32_C( -598.19)), simde_mm512_set_ps(SIMDE_FLOAT32_C(-357201.31), SIMDE_FLOAT32_C(-41991.90), SIMDE_FLOAT32_C(-823665.19), SIMDE_FLOAT32_C(481257.47), SIMDE_FLOAT32_C(-69017.85), SIMDE_FLOAT32_C( 1447.21), SIMDE_FLOAT32_C(-62088.25), SIMDE_FLOAT32_C( 84622.34), SIMDE_FLOAT32_C(-459469.03), SIMDE_FLOAT32_C(-395009.03), SIMDE_FLOAT32_C(138698.86), SIMDE_FLOAT32_C(-46547.72), SIMDE_FLOAT32_C(-333155.81), SIMDE_FLOAT32_C( 26639.30), SIMDE_FLOAT32_C(170117.73), SIMDE_FLOAT32_C(-21427.17)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 202.90), SIMDE_FLOAT32_C( -396.66), SIMDE_FLOAT32_C( -364.01), SIMDE_FLOAT32_C( 56.81), SIMDE_FLOAT32_C( -881.59), SIMDE_FLOAT32_C( 212.81), SIMDE_FLOAT32_C( -968.64), SIMDE_FLOAT32_C( -657.19), SIMDE_FLOAT32_C( 232.02), SIMDE_FLOAT32_C( 984.70), SIMDE_FLOAT32_C( -800.83), SIMDE_FLOAT32_C( -826.63), SIMDE_FLOAT32_C( 822.26), SIMDE_FLOAT32_C( -892.21), SIMDE_FLOAT32_C( -651.70), SIMDE_FLOAT32_C( -380.50)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -221.35), SIMDE_FLOAT32_C( -305.38), SIMDE_FLOAT32_C( 546.45), SIMDE_FLOAT32_C( -697.03), SIMDE_FLOAT32_C( 93.97), SIMDE_FLOAT32_C( 975.92), SIMDE_FLOAT32_C( 876.47), SIMDE_FLOAT32_C( 762.37), SIMDE_FLOAT32_C( 880.83), SIMDE_FLOAT32_C( -763.06), SIMDE_FLOAT32_C( -540.57), SIMDE_FLOAT32_C( -512.55), SIMDE_FLOAT32_C( -32.98), SIMDE_FLOAT32_C( 700.87), SIMDE_FLOAT32_C( -425.19), SIMDE_FLOAT32_C( -849.48)), simde_mm512_set_ps(SIMDE_FLOAT32_C(-44911.91), SIMDE_FLOAT32_C(121132.03), SIMDE_FLOAT32_C(-198913.28), SIMDE_FLOAT32_C(-39598.28), SIMDE_FLOAT32_C(-82843.02), SIMDE_FLOAT32_C(207685.53), SIMDE_FLOAT32_C(-848983.88), SIMDE_FLOAT32_C(-501021.94), SIMDE_FLOAT32_C(204370.19), SIMDE_FLOAT32_C(-751385.19), SIMDE_FLOAT32_C(432904.69), SIMDE_FLOAT32_C(423689.19), SIMDE_FLOAT32_C(-27118.13), SIMDE_FLOAT32_C(-625323.25), SIMDE_FLOAT32_C(277096.34), SIMDE_FLOAT32_C(323227.12)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -25.40), SIMDE_FLOAT32_C( -267.80), SIMDE_FLOAT32_C( 353.79), SIMDE_FLOAT32_C( -35.72), SIMDE_FLOAT32_C( 125.21), SIMDE_FLOAT32_C( 137.22), SIMDE_FLOAT32_C( 310.88), SIMDE_FLOAT32_C( -724.55), SIMDE_FLOAT32_C( -538.86), SIMDE_FLOAT32_C( 39.65), SIMDE_FLOAT32_C( -229.28), SIMDE_FLOAT32_C( -842.78), SIMDE_FLOAT32_C( -14.75), SIMDE_FLOAT32_C( -859.98), SIMDE_FLOAT32_C( 215.44), SIMDE_FLOAT32_C( 762.83)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -551.49), SIMDE_FLOAT32_C( -42.33), SIMDE_FLOAT32_C( -926.18), SIMDE_FLOAT32_C( 36.96), SIMDE_FLOAT32_C( 954.39), SIMDE_FLOAT32_C( 874.71), SIMDE_FLOAT32_C( -375.00), SIMDE_FLOAT32_C( 949.07), SIMDE_FLOAT32_C( -16.18), SIMDE_FLOAT32_C( -931.82), SIMDE_FLOAT32_C( -687.15), SIMDE_FLOAT32_C( -416.23), SIMDE_FLOAT32_C( -313.36), SIMDE_FLOAT32_C( 905.90), SIMDE_FLOAT32_C( 1.93), SIMDE_FLOAT32_C( -464.98)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 14007.85), SIMDE_FLOAT32_C( 11335.97), SIMDE_FLOAT32_C(-327673.22), SIMDE_FLOAT32_C( -1320.21), SIMDE_FLOAT32_C(119499.17), SIMDE_FLOAT32_C(120027.71), SIMDE_FLOAT32_C(-116580.00), SIMDE_FLOAT32_C(-687648.69), SIMDE_FLOAT32_C( 8718.75), SIMDE_FLOAT32_C(-36946.66), SIMDE_FLOAT32_C(157549.75), SIMDE_FLOAT32_C(350790.34), SIMDE_FLOAT32_C( 4622.06), SIMDE_FLOAT32_C(-779055.88), SIMDE_FLOAT32_C( 415.80), SIMDE_FLOAT32_C(-354700.72)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -861.86), SIMDE_FLOAT32_C( 437.62), SIMDE_FLOAT32_C( -236.27), SIMDE_FLOAT32_C( 439.08), SIMDE_FLOAT32_C( 476.60), SIMDE_FLOAT32_C( -725.80), SIMDE_FLOAT32_C( 626.57), SIMDE_FLOAT32_C( -848.67), SIMDE_FLOAT32_C( -961.54), SIMDE_FLOAT32_C( -999.94), SIMDE_FLOAT32_C( 788.38), SIMDE_FLOAT32_C( -928.14), SIMDE_FLOAT32_C( 779.51), SIMDE_FLOAT32_C( 846.68), SIMDE_FLOAT32_C( -858.45), SIMDE_FLOAT32_C( 292.21)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -182.57), SIMDE_FLOAT32_C( -580.78), SIMDE_FLOAT32_C( 704.32), SIMDE_FLOAT32_C( -124.94), SIMDE_FLOAT32_C( -725.23), SIMDE_FLOAT32_C( -635.58), SIMDE_FLOAT32_C( 196.48), SIMDE_FLOAT32_C( -485.66), SIMDE_FLOAT32_C( -906.19), SIMDE_FLOAT32_C( -43.74), SIMDE_FLOAT32_C( 899.84), SIMDE_FLOAT32_C( -720.16), SIMDE_FLOAT32_C( 576.76), SIMDE_FLOAT32_C( 994.06), SIMDE_FLOAT32_C( -108.56), SIMDE_FLOAT32_C( 212.62)), simde_mm512_set_ps(SIMDE_FLOAT32_C(157349.78), SIMDE_FLOAT32_C(-254160.95), SIMDE_FLOAT32_C(-166409.69), SIMDE_FLOAT32_C(-54858.66), SIMDE_FLOAT32_C(-345644.62), SIMDE_FLOAT32_C(461303.97), SIMDE_FLOAT32_C(123108.47), SIMDE_FLOAT32_C(412165.06), SIMDE_FLOAT32_C(871337.94), SIMDE_FLOAT32_C( 43737.38), SIMDE_FLOAT32_C(709415.88), SIMDE_FLOAT32_C(668409.31), SIMDE_FLOAT32_C(449590.19), SIMDE_FLOAT32_C(841650.69), SIMDE_FLOAT32_C( 93193.33), SIMDE_FLOAT32_C( 62129.69)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 593.71), SIMDE_FLOAT32_C( -601.77), SIMDE_FLOAT32_C( -110.10), SIMDE_FLOAT32_C( 145.40), SIMDE_FLOAT32_C( 740.85), SIMDE_FLOAT32_C( 970.19), SIMDE_FLOAT32_C( -854.26), SIMDE_FLOAT32_C( -208.21), SIMDE_FLOAT32_C( 769.57), SIMDE_FLOAT32_C( -297.46), SIMDE_FLOAT32_C( -845.75), SIMDE_FLOAT32_C( -517.72), SIMDE_FLOAT32_C( -240.19), SIMDE_FLOAT32_C( -763.89), SIMDE_FLOAT32_C( -197.03), SIMDE_FLOAT32_C( -33.35)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -533.43), SIMDE_FLOAT32_C( 541.52), SIMDE_FLOAT32_C( -293.53), SIMDE_FLOAT32_C( 215.59), SIMDE_FLOAT32_C( -844.97), SIMDE_FLOAT32_C( -755.33), SIMDE_FLOAT32_C( 468.59), SIMDE_FLOAT32_C( -772.73), SIMDE_FLOAT32_C( 919.17), SIMDE_FLOAT32_C( 772.56), SIMDE_FLOAT32_C( -506.06), SIMDE_FLOAT32_C( 848.47), SIMDE_FLOAT32_C( 289.91), SIMDE_FLOAT32_C( 20.43), SIMDE_FLOAT32_C( -64.43), SIMDE_FLOAT32_C( -706.80)), simde_mm512_set_ps(SIMDE_FLOAT32_C(-316702.72), SIMDE_FLOAT32_C(-325870.50), SIMDE_FLOAT32_C( 32317.65), SIMDE_FLOAT32_C( 31346.78), SIMDE_FLOAT32_C(-625996.00), SIMDE_FLOAT32_C(-732813.62), SIMDE_FLOAT32_C(-400297.69), SIMDE_FLOAT32_C(160890.11), SIMDE_FLOAT32_C(707365.62), SIMDE_FLOAT32_C(-229805.69), SIMDE_FLOAT32_C(428000.25), SIMDE_FLOAT32_C(-439269.84), SIMDE_FLOAT32_C(-69633.48), SIMDE_FLOAT32_C(-15606.27), SIMDE_FLOAT32_C( 12694.64), SIMDE_FLOAT32_C( 23571.78)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -180.32), SIMDE_FLOAT32_C( -914.18), SIMDE_FLOAT32_C( -674.78), SIMDE_FLOAT32_C( 230.92), SIMDE_FLOAT32_C( 619.73), SIMDE_FLOAT32_C( -630.60), SIMDE_FLOAT32_C( -418.47), SIMDE_FLOAT32_C( -865.96), SIMDE_FLOAT32_C( -670.71), SIMDE_FLOAT32_C( 17.47), SIMDE_FLOAT32_C( 61.90), SIMDE_FLOAT32_C( 647.63), SIMDE_FLOAT32_C( -455.42), SIMDE_FLOAT32_C( -850.08), SIMDE_FLOAT32_C( 132.45), SIMDE_FLOAT32_C( -354.79)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -729.37), SIMDE_FLOAT32_C( -945.53), SIMDE_FLOAT32_C( 198.36), SIMDE_FLOAT32_C( 643.43), SIMDE_FLOAT32_C( -994.87), SIMDE_FLOAT32_C( -154.38), SIMDE_FLOAT32_C( -536.68), SIMDE_FLOAT32_C( -548.49), SIMDE_FLOAT32_C( -292.06), SIMDE_FLOAT32_C( -771.61), SIMDE_FLOAT32_C( -487.89), SIMDE_FLOAT32_C( -482.82), SIMDE_FLOAT32_C( 131.08), SIMDE_FLOAT32_C( 366.17), SIMDE_FLOAT32_C( 127.55), SIMDE_FLOAT32_C( -936.85)), simde_mm512_set_ps(SIMDE_FLOAT32_C(131520.00), SIMDE_FLOAT32_C(864384.62), SIMDE_FLOAT32_C(-133849.36), SIMDE_FLOAT32_C(148580.86), SIMDE_FLOAT32_C(-616550.75), SIMDE_FLOAT32_C( 97352.02), SIMDE_FLOAT32_C(224584.48), SIMDE_FLOAT32_C(474970.41), SIMDE_FLOAT32_C(195887.56), SIMDE_FLOAT32_C(-13480.03), SIMDE_FLOAT32_C(-30200.39), SIMDE_FLOAT32_C(-312688.72), SIMDE_FLOAT32_C(-59696.46), SIMDE_FLOAT32_C(-311273.81), SIMDE_FLOAT32_C( 16894.00), SIMDE_FLOAT32_C(332385.00)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 405.10), SIMDE_FLOAT32_C( 457.94), SIMDE_FLOAT32_C( 120.94), SIMDE_FLOAT32_C( 944.02), SIMDE_FLOAT32_C( -205.34), SIMDE_FLOAT32_C( 155.90), SIMDE_FLOAT32_C( -913.86), SIMDE_FLOAT32_C( 170.83), SIMDE_FLOAT32_C( -194.64), SIMDE_FLOAT32_C( 505.24), SIMDE_FLOAT32_C( 874.71), SIMDE_FLOAT32_C( -847.65), SIMDE_FLOAT32_C( -72.00), SIMDE_FLOAT32_C( 772.81), SIMDE_FLOAT32_C( -151.00), SIMDE_FLOAT32_C( -489.53)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 923.98), SIMDE_FLOAT32_C( 597.38), SIMDE_FLOAT32_C( -237.17), SIMDE_FLOAT32_C( -159.78), SIMDE_FLOAT32_C( -45.00), SIMDE_FLOAT32_C( -167.53), SIMDE_FLOAT32_C( 681.28), SIMDE_FLOAT32_C( -654.80), SIMDE_FLOAT32_C( 504.91), SIMDE_FLOAT32_C( -353.27), SIMDE_FLOAT32_C( -789.06), SIMDE_FLOAT32_C( -566.71), SIMDE_FLOAT32_C( -516.77), SIMDE_FLOAT32_C( 957.42), SIMDE_FLOAT32_C( -465.35), SIMDE_FLOAT32_C( 491.11)), simde_mm512_set_ps(SIMDE_FLOAT32_C(374304.28), SIMDE_FLOAT32_C(273564.19), SIMDE_FLOAT32_C(-28683.34), SIMDE_FLOAT32_C(-150835.52), SIMDE_FLOAT32_C( 9240.30), SIMDE_FLOAT32_C(-26117.93), SIMDE_FLOAT32_C(-622594.56), SIMDE_FLOAT32_C(-111859.48), SIMDE_FLOAT32_C(-98275.68), SIMDE_FLOAT32_C(-178486.12), SIMDE_FLOAT32_C(-690198.69), SIMDE_FLOAT32_C(480371.75), SIMDE_FLOAT32_C( 37207.44), SIMDE_FLOAT32_C(739903.75), SIMDE_FLOAT32_C( 70267.85), SIMDE_FLOAT32_C(-240413.08)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mul_ps(test_vec[i].a, test_vec[i].b); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_mul_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 src; simde__mmask16 k; simde__m512 a; simde__m512 b; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( 229.27), SIMDE_FLOAT32_C( -114.91), SIMDE_FLOAT32_C( 520.43), SIMDE_FLOAT32_C( -755.19), SIMDE_FLOAT32_C( -68.64), SIMDE_FLOAT32_C( 632.30), SIMDE_FLOAT32_C( 98.14), SIMDE_FLOAT32_C( 455.87), SIMDE_FLOAT32_C( -873.22), SIMDE_FLOAT32_C( -223.86), SIMDE_FLOAT32_C( 181.32), SIMDE_FLOAT32_C( 364.92), SIMDE_FLOAT32_C( 946.51), SIMDE_FLOAT32_C( 22.05), SIMDE_FLOAT32_C( 444.47), SIMDE_FLOAT32_C( -746.17)), UINT16_C( 6152), simde_mm512_set_ps(SIMDE_FLOAT32_C( 702.34), SIMDE_FLOAT32_C( 368.95), SIMDE_FLOAT32_C( 161.91), SIMDE_FLOAT32_C( 459.04), SIMDE_FLOAT32_C( -828.47), SIMDE_FLOAT32_C( 943.39), SIMDE_FLOAT32_C( 202.26), SIMDE_FLOAT32_C( 112.87), SIMDE_FLOAT32_C( 382.91), SIMDE_FLOAT32_C( 124.14), SIMDE_FLOAT32_C( 954.24), SIMDE_FLOAT32_C( -214.34), SIMDE_FLOAT32_C( -998.93), SIMDE_FLOAT32_C( -255.92), SIMDE_FLOAT32_C( 57.01), SIMDE_FLOAT32_C( -391.73)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -206.12), SIMDE_FLOAT32_C( -322.43), SIMDE_FLOAT32_C( -456.42), SIMDE_FLOAT32_C( 258.98), SIMDE_FLOAT32_C( 291.55), SIMDE_FLOAT32_C( -459.80), SIMDE_FLOAT32_C( 286.61), SIMDE_FLOAT32_C( 15.13), SIMDE_FLOAT32_C( -772.68), SIMDE_FLOAT32_C( -503.52), SIMDE_FLOAT32_C( -599.88), SIMDE_FLOAT32_C( 107.93), SIMDE_FLOAT32_C( -3.35), SIMDE_FLOAT32_C( -993.69), SIMDE_FLOAT32_C( -325.33), SIMDE_FLOAT32_C( 755.40)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 229.27), SIMDE_FLOAT32_C( -114.91), SIMDE_FLOAT32_C( 520.43), SIMDE_FLOAT32_C(118882.19), SIMDE_FLOAT32_C(-241540.41), SIMDE_FLOAT32_C( 632.30), SIMDE_FLOAT32_C( 98.14), SIMDE_FLOAT32_C( 455.87), SIMDE_FLOAT32_C( -873.22), SIMDE_FLOAT32_C( -223.86), SIMDE_FLOAT32_C( 181.32), SIMDE_FLOAT32_C( 364.92), SIMDE_FLOAT32_C( 3346.42), SIMDE_FLOAT32_C( 22.05), SIMDE_FLOAT32_C( 444.47), SIMDE_FLOAT32_C( -746.17)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 974.52), SIMDE_FLOAT32_C( 42.80), SIMDE_FLOAT32_C( -282.69), SIMDE_FLOAT32_C( -590.27), SIMDE_FLOAT32_C( 398.09), SIMDE_FLOAT32_C( 498.04), SIMDE_FLOAT32_C( 449.97), SIMDE_FLOAT32_C( -357.92), SIMDE_FLOAT32_C( -441.74), SIMDE_FLOAT32_C( -180.77), SIMDE_FLOAT32_C( -289.47), SIMDE_FLOAT32_C( -620.49), SIMDE_FLOAT32_C( 763.75), SIMDE_FLOAT32_C( -763.91), SIMDE_FLOAT32_C( -576.44), SIMDE_FLOAT32_C( 698.61)), UINT16_C(15973), simde_mm512_set_ps(SIMDE_FLOAT32_C( -512.47), SIMDE_FLOAT32_C( -526.12), SIMDE_FLOAT32_C( 488.92), SIMDE_FLOAT32_C( -99.48), SIMDE_FLOAT32_C( 579.58), SIMDE_FLOAT32_C( 601.36), SIMDE_FLOAT32_C( 900.16), SIMDE_FLOAT32_C( 871.84), SIMDE_FLOAT32_C( 797.21), SIMDE_FLOAT32_C( 523.84), SIMDE_FLOAT32_C( -923.94), SIMDE_FLOAT32_C( -14.85), SIMDE_FLOAT32_C( -320.00), SIMDE_FLOAT32_C( -463.51), SIMDE_FLOAT32_C( -980.83), SIMDE_FLOAT32_C( -194.63)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 558.10), SIMDE_FLOAT32_C( -796.63), SIMDE_FLOAT32_C( -676.04), SIMDE_FLOAT32_C( 908.46), SIMDE_FLOAT32_C( -870.82), SIMDE_FLOAT32_C( 691.21), SIMDE_FLOAT32_C( -550.67), SIMDE_FLOAT32_C( 268.52), SIMDE_FLOAT32_C( 837.19), SIMDE_FLOAT32_C( -677.60), SIMDE_FLOAT32_C( -171.06), SIMDE_FLOAT32_C( -56.18), SIMDE_FLOAT32_C( 490.37), SIMDE_FLOAT32_C( -61.61), SIMDE_FLOAT32_C( -109.46), SIMDE_FLOAT32_C( -710.13)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 974.52), SIMDE_FLOAT32_C( 42.80), SIMDE_FLOAT32_C(-330529.47), SIMDE_FLOAT32_C(-90373.61), SIMDE_FLOAT32_C(-504709.88), SIMDE_FLOAT32_C(415666.06), SIMDE_FLOAT32_C(-495691.06), SIMDE_FLOAT32_C( -357.92), SIMDE_FLOAT32_C( -441.74), SIMDE_FLOAT32_C(-354954.00), SIMDE_FLOAT32_C(158049.17), SIMDE_FLOAT32_C( -620.49), SIMDE_FLOAT32_C( 763.75), SIMDE_FLOAT32_C( 28556.85), SIMDE_FLOAT32_C( -576.44), SIMDE_FLOAT32_C(138212.61)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -360.39), SIMDE_FLOAT32_C( 643.16), SIMDE_FLOAT32_C( -441.22), SIMDE_FLOAT32_C( 409.21), SIMDE_FLOAT32_C( 666.49), SIMDE_FLOAT32_C( 893.19), SIMDE_FLOAT32_C( -859.97), SIMDE_FLOAT32_C( -253.09), SIMDE_FLOAT32_C( -516.49), SIMDE_FLOAT32_C( -209.00), SIMDE_FLOAT32_C( -119.77), SIMDE_FLOAT32_C( -6.76), SIMDE_FLOAT32_C( 978.44), SIMDE_FLOAT32_C( 847.98), SIMDE_FLOAT32_C( 812.41), SIMDE_FLOAT32_C( -887.11)), UINT16_C(51212), simde_mm512_set_ps(SIMDE_FLOAT32_C( 923.25), SIMDE_FLOAT32_C( 251.46), SIMDE_FLOAT32_C( -49.04), SIMDE_FLOAT32_C( -876.73), SIMDE_FLOAT32_C( -239.21), SIMDE_FLOAT32_C( 952.17), SIMDE_FLOAT32_C( -247.44), SIMDE_FLOAT32_C( -278.60), SIMDE_FLOAT32_C( 877.29), SIMDE_FLOAT32_C( -266.07), SIMDE_FLOAT32_C( -839.50), SIMDE_FLOAT32_C( -281.99), SIMDE_FLOAT32_C( -652.15), SIMDE_FLOAT32_C( -877.11), SIMDE_FLOAT32_C( 527.90), SIMDE_FLOAT32_C( -842.26)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -453.51), SIMDE_FLOAT32_C( 863.74), SIMDE_FLOAT32_C( 571.67), SIMDE_FLOAT32_C( 552.19), SIMDE_FLOAT32_C( -903.81), SIMDE_FLOAT32_C( -274.48), SIMDE_FLOAT32_C( 891.44), SIMDE_FLOAT32_C( 28.40), SIMDE_FLOAT32_C( 753.09), SIMDE_FLOAT32_C( 415.38), SIMDE_FLOAT32_C( -974.66), SIMDE_FLOAT32_C( -864.92), SIMDE_FLOAT32_C( -696.24), SIMDE_FLOAT32_C( -279.21), SIMDE_FLOAT32_C( -548.00), SIMDE_FLOAT32_C( 3.33)), simde_mm512_set_ps(SIMDE_FLOAT32_C(-418703.12), SIMDE_FLOAT32_C(217196.06), SIMDE_FLOAT32_C( -441.22), SIMDE_FLOAT32_C( 409.21), SIMDE_FLOAT32_C(216200.39), SIMDE_FLOAT32_C( 893.19), SIMDE_FLOAT32_C( -859.97), SIMDE_FLOAT32_C( -253.09), SIMDE_FLOAT32_C( -516.49), SIMDE_FLOAT32_C( -209.00), SIMDE_FLOAT32_C( -119.77), SIMDE_FLOAT32_C( -6.76), SIMDE_FLOAT32_C(454052.94), SIMDE_FLOAT32_C(244897.88), SIMDE_FLOAT32_C( 812.41), SIMDE_FLOAT32_C( -887.11)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 325.22), SIMDE_FLOAT32_C( 570.14), SIMDE_FLOAT32_C( 680.58), SIMDE_FLOAT32_C( -511.81), SIMDE_FLOAT32_C( -117.17), SIMDE_FLOAT32_C( -613.97), SIMDE_FLOAT32_C( 182.50), SIMDE_FLOAT32_C( 480.12), SIMDE_FLOAT32_C( -750.83), SIMDE_FLOAT32_C( 220.35), SIMDE_FLOAT32_C( 724.25), SIMDE_FLOAT32_C( 984.66), SIMDE_FLOAT32_C( 871.75), SIMDE_FLOAT32_C( -772.37), SIMDE_FLOAT32_C( 130.52), SIMDE_FLOAT32_C( 736.76)), UINT16_C(42108), simde_mm512_set_ps(SIMDE_FLOAT32_C( 960.66), SIMDE_FLOAT32_C( -509.07), SIMDE_FLOAT32_C( 477.59), SIMDE_FLOAT32_C( -132.21), SIMDE_FLOAT32_C( 254.98), SIMDE_FLOAT32_C( 600.06), SIMDE_FLOAT32_C( 43.49), SIMDE_FLOAT32_C( 466.19), SIMDE_FLOAT32_C( 22.31), SIMDE_FLOAT32_C( -551.17), SIMDE_FLOAT32_C( -167.87), SIMDE_FLOAT32_C( 278.33), SIMDE_FLOAT32_C( -232.38), SIMDE_FLOAT32_C( 650.45), SIMDE_FLOAT32_C( -297.78), SIMDE_FLOAT32_C( -280.35)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 562.20), SIMDE_FLOAT32_C( -287.04), SIMDE_FLOAT32_C( 876.78), SIMDE_FLOAT32_C( 669.33), SIMDE_FLOAT32_C( 940.57), SIMDE_FLOAT32_C( -280.66), SIMDE_FLOAT32_C( 24.42), SIMDE_FLOAT32_C( -147.12), SIMDE_FLOAT32_C( -57.84), SIMDE_FLOAT32_C( 841.25), SIMDE_FLOAT32_C( -446.10), SIMDE_FLOAT32_C( -973.24), SIMDE_FLOAT32_C( 869.66), SIMDE_FLOAT32_C( 982.80), SIMDE_FLOAT32_C( -763.04), SIMDE_FLOAT32_C( -245.47)), simde_mm512_set_ps(SIMDE_FLOAT32_C(540083.06), SIMDE_FLOAT32_C( 570.14), SIMDE_FLOAT32_C(418741.38), SIMDE_FLOAT32_C( -511.81), SIMDE_FLOAT32_C( -117.17), SIMDE_FLOAT32_C(-168412.84), SIMDE_FLOAT32_C( 182.50), SIMDE_FLOAT32_C( 480.12), SIMDE_FLOAT32_C( -750.83), SIMDE_FLOAT32_C(-463671.75), SIMDE_FLOAT32_C( 74886.80), SIMDE_FLOAT32_C(-270881.88), SIMDE_FLOAT32_C(-202091.59), SIMDE_FLOAT32_C(639262.25), SIMDE_FLOAT32_C( 130.52), SIMDE_FLOAT32_C( 736.76)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -770.00), SIMDE_FLOAT32_C( 327.27), SIMDE_FLOAT32_C( -642.48), SIMDE_FLOAT32_C( 864.18), SIMDE_FLOAT32_C( -775.21), SIMDE_FLOAT32_C( -92.35), SIMDE_FLOAT32_C( -864.16), SIMDE_FLOAT32_C( 80.78), SIMDE_FLOAT32_C( -974.40), SIMDE_FLOAT32_C( -299.06), SIMDE_FLOAT32_C( -754.35), SIMDE_FLOAT32_C( -147.65), SIMDE_FLOAT32_C( -797.65), SIMDE_FLOAT32_C( 829.71), SIMDE_FLOAT32_C( 269.35), SIMDE_FLOAT32_C( 372.83)), UINT16_C(61342), simde_mm512_set_ps(SIMDE_FLOAT32_C( -220.25), SIMDE_FLOAT32_C( 802.77), SIMDE_FLOAT32_C( -755.69), SIMDE_FLOAT32_C( -58.33), SIMDE_FLOAT32_C( 587.03), SIMDE_FLOAT32_C( 375.88), SIMDE_FLOAT32_C( 775.50), SIMDE_FLOAT32_C( -179.11), SIMDE_FLOAT32_C( 184.41), SIMDE_FLOAT32_C( -603.91), SIMDE_FLOAT32_C( -170.90), SIMDE_FLOAT32_C( -781.45), SIMDE_FLOAT32_C( -860.97), SIMDE_FLOAT32_C( -616.84), SIMDE_FLOAT32_C( 704.72), SIMDE_FLOAT32_C( -251.07)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -62.78), SIMDE_FLOAT32_C( -149.86), SIMDE_FLOAT32_C( 102.32), SIMDE_FLOAT32_C( -271.05), SIMDE_FLOAT32_C( -465.52), SIMDE_FLOAT32_C( 979.82), SIMDE_FLOAT32_C( 499.92), SIMDE_FLOAT32_C( 32.84), SIMDE_FLOAT32_C( 792.53), SIMDE_FLOAT32_C( 466.38), SIMDE_FLOAT32_C( -301.08), SIMDE_FLOAT32_C( -381.33), SIMDE_FLOAT32_C( -752.23), SIMDE_FLOAT32_C( 18.86), SIMDE_FLOAT32_C( -462.80), SIMDE_FLOAT32_C( -168.70)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 13827.29), SIMDE_FLOAT32_C(-120303.12), SIMDE_FLOAT32_C(-77322.20), SIMDE_FLOAT32_C( 864.18), SIMDE_FLOAT32_C(-273274.22), SIMDE_FLOAT32_C(368294.75), SIMDE_FLOAT32_C(387687.97), SIMDE_FLOAT32_C( -5881.97), SIMDE_FLOAT32_C(146150.47), SIMDE_FLOAT32_C( -299.06), SIMDE_FLOAT32_C( -754.35), SIMDE_FLOAT32_C(297990.31), SIMDE_FLOAT32_C(647647.44), SIMDE_FLOAT32_C(-11633.60), SIMDE_FLOAT32_C(-326144.41), SIMDE_FLOAT32_C( 372.83)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -62.71), SIMDE_FLOAT32_C( 768.74), SIMDE_FLOAT32_C( 172.84), SIMDE_FLOAT32_C( -481.12), SIMDE_FLOAT32_C( 290.59), SIMDE_FLOAT32_C( -598.30), SIMDE_FLOAT32_C( -212.50), SIMDE_FLOAT32_C( 657.51), SIMDE_FLOAT32_C( -400.85), SIMDE_FLOAT32_C( 353.00), SIMDE_FLOAT32_C( -898.98), SIMDE_FLOAT32_C( -461.75), SIMDE_FLOAT32_C( -690.46), SIMDE_FLOAT32_C( -171.93), SIMDE_FLOAT32_C( 135.84), SIMDE_FLOAT32_C( -604.52)), UINT16_C(61129), simde_mm512_set_ps(SIMDE_FLOAT32_C( -267.02), SIMDE_FLOAT32_C( -31.24), SIMDE_FLOAT32_C( -859.84), SIMDE_FLOAT32_C( -104.89), SIMDE_FLOAT32_C( -39.96), SIMDE_FLOAT32_C( 179.68), SIMDE_FLOAT32_C( -71.01), SIMDE_FLOAT32_C( 557.26), SIMDE_FLOAT32_C( 127.39), SIMDE_FLOAT32_C( 271.58), SIMDE_FLOAT32_C( -162.76), SIMDE_FLOAT32_C( 248.01), SIMDE_FLOAT32_C( 856.68), SIMDE_FLOAT32_C( 762.32), SIMDE_FLOAT32_C( 432.07), SIMDE_FLOAT32_C( 743.06)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -846.50), SIMDE_FLOAT32_C( 988.92), SIMDE_FLOAT32_C( 696.05), SIMDE_FLOAT32_C( 647.58), SIMDE_FLOAT32_C( 780.95), SIMDE_FLOAT32_C( 46.01), SIMDE_FLOAT32_C( -77.15), SIMDE_FLOAT32_C( -747.70), SIMDE_FLOAT32_C( -416.84), SIMDE_FLOAT32_C( 679.81), SIMDE_FLOAT32_C( -124.78), SIMDE_FLOAT32_C( -976.50), SIMDE_FLOAT32_C( -745.93), SIMDE_FLOAT32_C( 116.64), SIMDE_FLOAT32_C( -479.84), SIMDE_FLOAT32_C( 919.24)), simde_mm512_set_ps(SIMDE_FLOAT32_C(226032.42), SIMDE_FLOAT32_C(-30893.86), SIMDE_FLOAT32_C(-598491.62), SIMDE_FLOAT32_C( -481.12), SIMDE_FLOAT32_C(-31206.76), SIMDE_FLOAT32_C( 8267.08), SIMDE_FLOAT32_C( 5478.42), SIMDE_FLOAT32_C( 657.51), SIMDE_FLOAT32_C(-53101.25), SIMDE_FLOAT32_C(184622.80), SIMDE_FLOAT32_C( -898.98), SIMDE_FLOAT32_C( -461.75), SIMDE_FLOAT32_C(-639023.31), SIMDE_FLOAT32_C( -171.93), SIMDE_FLOAT32_C( 135.84), SIMDE_FLOAT32_C(683050.44)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 722.50), SIMDE_FLOAT32_C( 1.66), SIMDE_FLOAT32_C( -227.96), SIMDE_FLOAT32_C( -417.42), SIMDE_FLOAT32_C( 237.94), SIMDE_FLOAT32_C( 860.12), SIMDE_FLOAT32_C( -692.46), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( -979.01), SIMDE_FLOAT32_C( 20.38), SIMDE_FLOAT32_C( 85.42), SIMDE_FLOAT32_C( -156.50), SIMDE_FLOAT32_C( 23.29), SIMDE_FLOAT32_C( -569.89), SIMDE_FLOAT32_C( 24.40), SIMDE_FLOAT32_C( 257.32)), UINT16_C(53230), simde_mm512_set_ps(SIMDE_FLOAT32_C( -804.04), SIMDE_FLOAT32_C( -689.70), SIMDE_FLOAT32_C( 435.74), SIMDE_FLOAT32_C( 49.95), SIMDE_FLOAT32_C( 554.19), SIMDE_FLOAT32_C( 936.14), SIMDE_FLOAT32_C( 554.63), SIMDE_FLOAT32_C( -242.02), SIMDE_FLOAT32_C( -909.08), SIMDE_FLOAT32_C( -184.42), SIMDE_FLOAT32_C( -668.15), SIMDE_FLOAT32_C( 202.23), SIMDE_FLOAT32_C( 620.00), SIMDE_FLOAT32_C( -11.65), SIMDE_FLOAT32_C( -295.73), SIMDE_FLOAT32_C( -637.18)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -700.26), SIMDE_FLOAT32_C( -48.09), SIMDE_FLOAT32_C( -661.22), SIMDE_FLOAT32_C( -536.85), SIMDE_FLOAT32_C( -172.68), SIMDE_FLOAT32_C( 263.32), SIMDE_FLOAT32_C( -189.80), SIMDE_FLOAT32_C( -595.66), SIMDE_FLOAT32_C( 244.26), SIMDE_FLOAT32_C( -637.08), SIMDE_FLOAT32_C( -871.35), SIMDE_FLOAT32_C( -417.36), SIMDE_FLOAT32_C( -313.14), SIMDE_FLOAT32_C( -902.95), SIMDE_FLOAT32_C( -801.13), SIMDE_FLOAT32_C( -357.00)), simde_mm512_set_ps(SIMDE_FLOAT32_C(563037.06), SIMDE_FLOAT32_C( 33167.67), SIMDE_FLOAT32_C( -227.96), SIMDE_FLOAT32_C( -417.42), SIMDE_FLOAT32_C(-95697.52), SIMDE_FLOAT32_C(246504.39), SIMDE_FLOAT32_C(-105268.77), SIMDE_FLOAT32_C(144161.62), SIMDE_FLOAT32_C(-222051.88), SIMDE_FLOAT32_C(117490.30), SIMDE_FLOAT32_C(582192.50), SIMDE_FLOAT32_C( -156.50), SIMDE_FLOAT32_C(-194146.81), SIMDE_FLOAT32_C( 10519.37), SIMDE_FLOAT32_C(236918.19), SIMDE_FLOAT32_C( 257.32)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 113.45), SIMDE_FLOAT32_C( -47.38), SIMDE_FLOAT32_C( 434.74), SIMDE_FLOAT32_C( 133.40), SIMDE_FLOAT32_C( 37.09), SIMDE_FLOAT32_C( -195.77), SIMDE_FLOAT32_C( -878.67), SIMDE_FLOAT32_C( 758.19), SIMDE_FLOAT32_C( -87.72), SIMDE_FLOAT32_C( -903.51), SIMDE_FLOAT32_C( -821.22), SIMDE_FLOAT32_C( -102.72), SIMDE_FLOAT32_C( 329.70), SIMDE_FLOAT32_C( 752.97), SIMDE_FLOAT32_C( -341.79), SIMDE_FLOAT32_C( -130.85)), UINT16_C(62361), simde_mm512_set_ps(SIMDE_FLOAT32_C( -994.03), SIMDE_FLOAT32_C( -716.03), SIMDE_FLOAT32_C( -435.51), SIMDE_FLOAT32_C( -960.04), SIMDE_FLOAT32_C( 32.26), SIMDE_FLOAT32_C( -474.76), SIMDE_FLOAT32_C( -182.77), SIMDE_FLOAT32_C( -229.72), SIMDE_FLOAT32_C( -949.63), SIMDE_FLOAT32_C( -938.60), SIMDE_FLOAT32_C( -855.41), SIMDE_FLOAT32_C( -231.99), SIMDE_FLOAT32_C( 115.21), SIMDE_FLOAT32_C( 716.21), SIMDE_FLOAT32_C( -407.80), SIMDE_FLOAT32_C( 373.68)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 186.12), SIMDE_FLOAT32_C( -224.01), SIMDE_FLOAT32_C( 199.06), SIMDE_FLOAT32_C( -162.12), SIMDE_FLOAT32_C( -212.56), SIMDE_FLOAT32_C( -205.93), SIMDE_FLOAT32_C( -577.22), SIMDE_FLOAT32_C( -567.47), SIMDE_FLOAT32_C( -916.44), SIMDE_FLOAT32_C( 780.43), SIMDE_FLOAT32_C( -604.79), SIMDE_FLOAT32_C( 540.03), SIMDE_FLOAT32_C( -974.56), SIMDE_FLOAT32_C( -517.05), SIMDE_FLOAT32_C( -241.22), SIMDE_FLOAT32_C( 102.85)), simde_mm512_set_ps(SIMDE_FLOAT32_C(-185008.86), SIMDE_FLOAT32_C(160397.89), SIMDE_FLOAT32_C(-86692.62), SIMDE_FLOAT32_C(155641.67), SIMDE_FLOAT32_C( 37.09), SIMDE_FLOAT32_C( -195.77), SIMDE_FLOAT32_C(105498.50), SIMDE_FLOAT32_C(130359.20), SIMDE_FLOAT32_C(870278.94), SIMDE_FLOAT32_C( -903.51), SIMDE_FLOAT32_C( -821.22), SIMDE_FLOAT32_C(-125281.57), SIMDE_FLOAT32_C(-112279.05), SIMDE_FLOAT32_C( 752.97), SIMDE_FLOAT32_C( -341.79), SIMDE_FLOAT32_C( 38432.99)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mask_mul_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_maskz_mul_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m512 a; simde__m512 b; simde__m512 r; } test_vec[8] = { { UINT16_C(47289), simde_mm512_set_ps(SIMDE_FLOAT32_C( -658.59), SIMDE_FLOAT32_C( -110.05), SIMDE_FLOAT32_C( -529.45), SIMDE_FLOAT32_C( 46.72), SIMDE_FLOAT32_C( -62.14), SIMDE_FLOAT32_C( 483.09), SIMDE_FLOAT32_C( 301.22), SIMDE_FLOAT32_C( -113.80), SIMDE_FLOAT32_C( -597.24), SIMDE_FLOAT32_C( 55.35), SIMDE_FLOAT32_C( 938.56), SIMDE_FLOAT32_C( -50.24), SIMDE_FLOAT32_C( 49.65), SIMDE_FLOAT32_C( -991.96), SIMDE_FLOAT32_C( 606.92), SIMDE_FLOAT32_C( 149.59)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 375.27), SIMDE_FLOAT32_C( -498.85), SIMDE_FLOAT32_C( -342.99), SIMDE_FLOAT32_C( 861.28), SIMDE_FLOAT32_C( 526.60), SIMDE_FLOAT32_C( -759.33), SIMDE_FLOAT32_C( 328.64), SIMDE_FLOAT32_C( 698.74), SIMDE_FLOAT32_C( 615.23), SIMDE_FLOAT32_C( 873.23), SIMDE_FLOAT32_C( 127.27), SIMDE_FLOAT32_C( 719.43), SIMDE_FLOAT32_C( -625.99), SIMDE_FLOAT32_C( -942.07), SIMDE_FLOAT32_C( 458.53), SIMDE_FLOAT32_C( 322.40)), simde_mm512_set_ps(SIMDE_FLOAT32_C(-247149.08), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(181596.05), SIMDE_FLOAT32_C( 40239.00), SIMDE_FLOAT32_C(-32722.92), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-367439.94), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(119450.53), SIMDE_FLOAT32_C(-36144.16), SIMDE_FLOAT32_C(-31080.40), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 48227.81)) }, { UINT16_C(37892), simde_mm512_set_ps(SIMDE_FLOAT32_C( -265.18), SIMDE_FLOAT32_C( 394.19), SIMDE_FLOAT32_C( 565.39), SIMDE_FLOAT32_C( -167.00), SIMDE_FLOAT32_C( 350.77), SIMDE_FLOAT32_C( 863.35), SIMDE_FLOAT32_C( -537.11), SIMDE_FLOAT32_C( -601.68), SIMDE_FLOAT32_C( -980.35), SIMDE_FLOAT32_C( -851.86), SIMDE_FLOAT32_C( -959.52), SIMDE_FLOAT32_C( -856.72), SIMDE_FLOAT32_C( 393.09), SIMDE_FLOAT32_C( -263.92), SIMDE_FLOAT32_C( 261.53), SIMDE_FLOAT32_C( 409.88)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -261.28), SIMDE_FLOAT32_C( -762.17), SIMDE_FLOAT32_C( 850.55), SIMDE_FLOAT32_C( -684.91), SIMDE_FLOAT32_C( 69.61), SIMDE_FLOAT32_C( 771.73), SIMDE_FLOAT32_C( -506.14), SIMDE_FLOAT32_C( -578.92), SIMDE_FLOAT32_C( 322.24), SIMDE_FLOAT32_C( 192.10), SIMDE_FLOAT32_C( -768.24), SIMDE_FLOAT32_C( -528.40), SIMDE_FLOAT32_C( -871.80), SIMDE_FLOAT32_C( -55.77), SIMDE_FLOAT32_C( 401.18), SIMDE_FLOAT32_C( -914.96)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 69286.23), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(114379.97), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(666273.06), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 14718.82), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C(21270), simde_mm512_set_ps(SIMDE_FLOAT32_C( 124.49), SIMDE_FLOAT32_C( 701.35), SIMDE_FLOAT32_C( 498.79), SIMDE_FLOAT32_C( 832.83), SIMDE_FLOAT32_C( -974.32), SIMDE_FLOAT32_C( -582.20), SIMDE_FLOAT32_C( -288.73), SIMDE_FLOAT32_C( 146.91), SIMDE_FLOAT32_C( 866.64), SIMDE_FLOAT32_C( 902.02), SIMDE_FLOAT32_C( -35.40), SIMDE_FLOAT32_C( -390.90), SIMDE_FLOAT32_C( -670.61), SIMDE_FLOAT32_C( -294.26), SIMDE_FLOAT32_C( 904.08), SIMDE_FLOAT32_C( -920.18)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 282.01), SIMDE_FLOAT32_C( 688.91), SIMDE_FLOAT32_C( 333.36), SIMDE_FLOAT32_C( 260.07), SIMDE_FLOAT32_C( 576.07), SIMDE_FLOAT32_C( 133.85), SIMDE_FLOAT32_C( 534.76), SIMDE_FLOAT32_C( -643.54), SIMDE_FLOAT32_C( -999.40), SIMDE_FLOAT32_C( 257.62), SIMDE_FLOAT32_C( 420.35), SIMDE_FLOAT32_C( -394.28), SIMDE_FLOAT32_C( 211.89), SIMDE_FLOAT32_C( 496.82), SIMDE_FLOAT32_C( -993.25), SIMDE_FLOAT32_C( -590.67)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(483167.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(216594.11), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-154401.27), SIMDE_FLOAT32_C(-94542.46), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(154124.05), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-146194.27), SIMDE_FLOAT32_C(-897977.50), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C(31632), simde_mm512_set_ps(SIMDE_FLOAT32_C( 50.34), SIMDE_FLOAT32_C( -97.56), SIMDE_FLOAT32_C( 263.08), SIMDE_FLOAT32_C( -308.40), SIMDE_FLOAT32_C( 354.47), SIMDE_FLOAT32_C( -70.93), SIMDE_FLOAT32_C( 486.01), SIMDE_FLOAT32_C( -938.29), SIMDE_FLOAT32_C( -47.71), SIMDE_FLOAT32_C( -345.27), SIMDE_FLOAT32_C( 12.62), SIMDE_FLOAT32_C( 733.96), SIMDE_FLOAT32_C( 753.32), SIMDE_FLOAT32_C( -397.23), SIMDE_FLOAT32_C( 708.66), SIMDE_FLOAT32_C( 404.50)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 875.93), SIMDE_FLOAT32_C( -911.86), SIMDE_FLOAT32_C( 784.71), SIMDE_FLOAT32_C( 64.13), SIMDE_FLOAT32_C( -731.87), SIMDE_FLOAT32_C( -647.62), SIMDE_FLOAT32_C( 107.77), SIMDE_FLOAT32_C( 557.50), SIMDE_FLOAT32_C( -491.55), SIMDE_FLOAT32_C( 414.15), SIMDE_FLOAT32_C( -504.43), SIMDE_FLOAT32_C( -27.13), SIMDE_FLOAT32_C( -947.21), SIMDE_FLOAT32_C( -164.39), SIMDE_FLOAT32_C( 287.82), SIMDE_FLOAT32_C( 414.18)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 88961.05), SIMDE_FLOAT32_C(206441.50), SIMDE_FLOAT32_C(-19777.69), SIMDE_FLOAT32_C(-259425.95), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 52377.30), SIMDE_FLOAT32_C(-523096.66), SIMDE_FLOAT32_C( 23451.85), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-19912.33), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C(47299), simde_mm512_set_ps(SIMDE_FLOAT32_C( -544.18), SIMDE_FLOAT32_C( -903.28), SIMDE_FLOAT32_C( 559.95), SIMDE_FLOAT32_C( -483.39), SIMDE_FLOAT32_C( -994.67), SIMDE_FLOAT32_C( -750.48), SIMDE_FLOAT32_C( 312.50), SIMDE_FLOAT32_C( 110.85), SIMDE_FLOAT32_C( -430.65), SIMDE_FLOAT32_C( 39.80), SIMDE_FLOAT32_C( -26.24), SIMDE_FLOAT32_C( 378.89), SIMDE_FLOAT32_C( -139.95), SIMDE_FLOAT32_C( -775.11), SIMDE_FLOAT32_C( -758.69), SIMDE_FLOAT32_C( 318.51)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 548.46), SIMDE_FLOAT32_C( 682.86), SIMDE_FLOAT32_C( -635.50), SIMDE_FLOAT32_C( 737.24), SIMDE_FLOAT32_C( 707.78), SIMDE_FLOAT32_C( -907.72), SIMDE_FLOAT32_C( -791.08), SIMDE_FLOAT32_C( 176.45), SIMDE_FLOAT32_C( 64.55), SIMDE_FLOAT32_C( 55.56), SIMDE_FLOAT32_C( -108.86), SIMDE_FLOAT32_C( 505.77), SIMDE_FLOAT32_C( 224.25), SIMDE_FLOAT32_C( 639.22), SIMDE_FLOAT32_C( 369.92), SIMDE_FLOAT32_C( -708.31)), simde_mm512_set_ps(SIMDE_FLOAT32_C(-298460.97), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-355848.22), SIMDE_FLOAT32_C(-356374.44), SIMDE_FLOAT32_C(-704007.56), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-27798.46), SIMDE_FLOAT32_C( 2211.29), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-280654.62), SIMDE_FLOAT32_C(-225603.83)) }, { UINT16_C(40773), simde_mm512_set_ps(SIMDE_FLOAT32_C( -86.71), SIMDE_FLOAT32_C( -432.63), SIMDE_FLOAT32_C( -433.37), SIMDE_FLOAT32_C( -539.66), SIMDE_FLOAT32_C( 559.54), SIMDE_FLOAT32_C( -287.88), SIMDE_FLOAT32_C( -991.42), SIMDE_FLOAT32_C( -690.07), SIMDE_FLOAT32_C( 345.70), SIMDE_FLOAT32_C( 616.00), SIMDE_FLOAT32_C( 341.79), SIMDE_FLOAT32_C( -307.10), SIMDE_FLOAT32_C( 709.24), SIMDE_FLOAT32_C( -920.15), SIMDE_FLOAT32_C( 404.20), SIMDE_FLOAT32_C( 52.51)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 815.79), SIMDE_FLOAT32_C( -788.62), SIMDE_FLOAT32_C( -370.04), SIMDE_FLOAT32_C( -939.88), SIMDE_FLOAT32_C( -591.16), SIMDE_FLOAT32_C( -819.13), SIMDE_FLOAT32_C( 932.20), SIMDE_FLOAT32_C( -243.11), SIMDE_FLOAT32_C( -87.62), SIMDE_FLOAT32_C( 725.93), SIMDE_FLOAT32_C( -374.67), SIMDE_FLOAT32_C( 301.09), SIMDE_FLOAT32_C( -174.47), SIMDE_FLOAT32_C( -898.14), SIMDE_FLOAT32_C( -924.02), SIMDE_FLOAT32_C( -333.66)), simde_mm512_set_ps(SIMDE_FLOAT32_C(-70737.15), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(507215.62), SIMDE_FLOAT32_C(-330777.62), SIMDE_FLOAT32_C(235811.16), SIMDE_FLOAT32_C(-924201.75), SIMDE_FLOAT32_C(167762.92), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(447172.88), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(826423.56), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-17520.49)) }, { UINT16_C(61172), simde_mm512_set_ps(SIMDE_FLOAT32_C( -690.89), SIMDE_FLOAT32_C( -270.55), SIMDE_FLOAT32_C( 476.48), SIMDE_FLOAT32_C( 219.51), SIMDE_FLOAT32_C( 642.18), SIMDE_FLOAT32_C( -588.29), SIMDE_FLOAT32_C( -762.74), SIMDE_FLOAT32_C( -33.80), SIMDE_FLOAT32_C( -786.44), SIMDE_FLOAT32_C( -855.21), SIMDE_FLOAT32_C( 145.12), SIMDE_FLOAT32_C( 50.96), SIMDE_FLOAT32_C( 710.85), SIMDE_FLOAT32_C( 234.05), SIMDE_FLOAT32_C( 345.96), SIMDE_FLOAT32_C( 118.24)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -143.72), SIMDE_FLOAT32_C( -461.06), SIMDE_FLOAT32_C( 866.17), SIMDE_FLOAT32_C( -706.49), SIMDE_FLOAT32_C( 594.76), SIMDE_FLOAT32_C( 424.92), SIMDE_FLOAT32_C( 166.20), SIMDE_FLOAT32_C( 776.85), SIMDE_FLOAT32_C( -191.32), SIMDE_FLOAT32_C( -329.15), SIMDE_FLOAT32_C( -651.62), SIMDE_FLOAT32_C( -22.33), SIMDE_FLOAT32_C( -429.53), SIMDE_FLOAT32_C( 758.36), SIMDE_FLOAT32_C( 926.10), SIMDE_FLOAT32_C( 17.27)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 99294.71), SIMDE_FLOAT32_C(124739.77), SIMDE_FLOAT32_C(412712.69), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(381942.97), SIMDE_FLOAT32_C(-249976.19), SIMDE_FLOAT32_C(-126767.38), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(150461.70), SIMDE_FLOAT32_C(281492.38), SIMDE_FLOAT32_C(-94563.09), SIMDE_FLOAT32_C( -1137.94), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(177494.16), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C(31704), simde_mm512_set_ps(SIMDE_FLOAT32_C( 761.96), SIMDE_FLOAT32_C( 858.35), SIMDE_FLOAT32_C( 360.42), SIMDE_FLOAT32_C( 321.87), SIMDE_FLOAT32_C( -444.03), SIMDE_FLOAT32_C( -177.67), SIMDE_FLOAT32_C( -802.25), SIMDE_FLOAT32_C( 408.95), SIMDE_FLOAT32_C( -337.63), SIMDE_FLOAT32_C( 948.17), SIMDE_FLOAT32_C( 248.80), SIMDE_FLOAT32_C( 170.02), SIMDE_FLOAT32_C( 939.41), SIMDE_FLOAT32_C( -580.14), SIMDE_FLOAT32_C( 237.93), SIMDE_FLOAT32_C( -698.11)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 786.48), SIMDE_FLOAT32_C( -475.23), SIMDE_FLOAT32_C( 258.84), SIMDE_FLOAT32_C( 824.39), SIMDE_FLOAT32_C( -282.56), SIMDE_FLOAT32_C( 765.76), SIMDE_FLOAT32_C( -709.23), SIMDE_FLOAT32_C( 701.93), SIMDE_FLOAT32_C( -367.75), SIMDE_FLOAT32_C( 404.32), SIMDE_FLOAT32_C( -447.00), SIMDE_FLOAT32_C( 864.94), SIMDE_FLOAT32_C( 954.31), SIMDE_FLOAT32_C( 410.35), SIMDE_FLOAT32_C( -565.19), SIMDE_FLOAT32_C( -545.67)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(-407913.66), SIMDE_FLOAT32_C( 93291.12), SIMDE_FLOAT32_C(265346.41), SIMDE_FLOAT32_C(125465.12), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(568979.75), SIMDE_FLOAT32_C(287054.28), SIMDE_FLOAT32_C(124163.44), SIMDE_FLOAT32_C(383364.09), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C(147057.11), SIMDE_FLOAT32_C(896488.31), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_maskz_mul_ps(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mul_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d a; simde__m512d b; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( -736.65), SIMDE_FLOAT64_C( -764.30), SIMDE_FLOAT64_C( 675.25), SIMDE_FLOAT64_C( -182.15), SIMDE_FLOAT64_C( -748.44), SIMDE_FLOAT64_C( 82.10), SIMDE_FLOAT64_C( 684.52), SIMDE_FLOAT64_C( -343.09)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -267.96), SIMDE_FLOAT64_C( -536.57), SIMDE_FLOAT64_C( -934.00), SIMDE_FLOAT64_C( 653.62), SIMDE_FLOAT64_C( 984.11), SIMDE_FLOAT64_C( 140.30), SIMDE_FLOAT64_C( -580.05), SIMDE_FLOAT64_C( -915.75)), simde_mm512_set_pd(SIMDE_FLOAT64_C(197392.73), SIMDE_FLOAT64_C(410100.45), SIMDE_FLOAT64_C(-630683.50), SIMDE_FLOAT64_C(-119056.88), SIMDE_FLOAT64_C(-736547.29), SIMDE_FLOAT64_C(11518.63), SIMDE_FLOAT64_C(-397055.83), SIMDE_FLOAT64_C(314184.67)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -775.40), SIMDE_FLOAT64_C( -210.92), SIMDE_FLOAT64_C( 987.42), SIMDE_FLOAT64_C( 542.45), SIMDE_FLOAT64_C( -745.60), SIMDE_FLOAT64_C( -50.38), SIMDE_FLOAT64_C( 163.82), SIMDE_FLOAT64_C( -164.62)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 263.91), SIMDE_FLOAT64_C( -350.89), SIMDE_FLOAT64_C( -318.01), SIMDE_FLOAT64_C( -980.00), SIMDE_FLOAT64_C( 872.18), SIMDE_FLOAT64_C( 80.96), SIMDE_FLOAT64_C( 145.89), SIMDE_FLOAT64_C( 832.89)), simde_mm512_set_pd(SIMDE_FLOAT64_C(-204635.81), SIMDE_FLOAT64_C(74009.72), SIMDE_FLOAT64_C(-314009.43), SIMDE_FLOAT64_C(-531601.00), SIMDE_FLOAT64_C(-650297.41), SIMDE_FLOAT64_C(-4078.76), SIMDE_FLOAT64_C(23899.70), SIMDE_FLOAT64_C(-137110.35)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 606.76), SIMDE_FLOAT64_C( -664.92), SIMDE_FLOAT64_C( 454.81), SIMDE_FLOAT64_C( 299.40), SIMDE_FLOAT64_C( -524.63), SIMDE_FLOAT64_C( 40.68), SIMDE_FLOAT64_C( 218.77), SIMDE_FLOAT64_C( 35.82)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -757.25), SIMDE_FLOAT64_C( 594.07), SIMDE_FLOAT64_C( 304.96), SIMDE_FLOAT64_C( -155.47), SIMDE_FLOAT64_C( 635.03), SIMDE_FLOAT64_C( 654.85), SIMDE_FLOAT64_C( 777.61), SIMDE_FLOAT64_C( -598.19)), simde_mm512_set_pd(SIMDE_FLOAT64_C(-459469.01), SIMDE_FLOAT64_C(-395009.02), SIMDE_FLOAT64_C(138698.86), SIMDE_FLOAT64_C(-46547.72), SIMDE_FLOAT64_C(-333155.79), SIMDE_FLOAT64_C(26639.30), SIMDE_FLOAT64_C(170117.74), SIMDE_FLOAT64_C(-21427.17)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -910.74), SIMDE_FLOAT64_C( -302.10), SIMDE_FLOAT64_C( 937.08), SIMDE_FLOAT64_C( 618.13), SIMDE_FLOAT64_C( 85.12), SIMDE_FLOAT64_C( 3.50), SIMDE_FLOAT64_C( -122.84), SIMDE_FLOAT64_C( 290.22)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 392.21), SIMDE_FLOAT64_C( 139.00), SIMDE_FLOAT64_C( -878.97), SIMDE_FLOAT64_C( 778.57), SIMDE_FLOAT64_C( -810.83), SIMDE_FLOAT64_C( 413.49), SIMDE_FLOAT64_C( 505.44), SIMDE_FLOAT64_C( 291.58)), simde_mm512_set_pd(SIMDE_FLOAT64_C(-357201.34), SIMDE_FLOAT64_C(-41991.90), SIMDE_FLOAT64_C(-823665.21), SIMDE_FLOAT64_C(481257.47), SIMDE_FLOAT64_C(-69017.85), SIMDE_FLOAT64_C( 1447.22), SIMDE_FLOAT64_C(-62088.25), SIMDE_FLOAT64_C(84622.35)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 232.02), SIMDE_FLOAT64_C( 984.70), SIMDE_FLOAT64_C( -800.83), SIMDE_FLOAT64_C( -826.63), SIMDE_FLOAT64_C( 822.26), SIMDE_FLOAT64_C( -892.21), SIMDE_FLOAT64_C( -651.70), SIMDE_FLOAT64_C( -380.50)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 880.83), SIMDE_FLOAT64_C( -763.06), SIMDE_FLOAT64_C( -540.57), SIMDE_FLOAT64_C( -512.55), SIMDE_FLOAT64_C( -32.98), SIMDE_FLOAT64_C( 700.87), SIMDE_FLOAT64_C( -425.19), SIMDE_FLOAT64_C( -849.48)), simde_mm512_set_pd(SIMDE_FLOAT64_C(204370.18), SIMDE_FLOAT64_C(-751385.18), SIMDE_FLOAT64_C(432904.67), SIMDE_FLOAT64_C(423689.21), SIMDE_FLOAT64_C(-27118.13), SIMDE_FLOAT64_C(-625323.22), SIMDE_FLOAT64_C(277096.32), SIMDE_FLOAT64_C(323227.14)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 202.90), SIMDE_FLOAT64_C( -396.66), SIMDE_FLOAT64_C( -364.01), SIMDE_FLOAT64_C( 56.81), SIMDE_FLOAT64_C( -881.59), SIMDE_FLOAT64_C( 212.81), SIMDE_FLOAT64_C( -968.64), SIMDE_FLOAT64_C( -657.19)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -221.35), SIMDE_FLOAT64_C( -305.38), SIMDE_FLOAT64_C( 546.45), SIMDE_FLOAT64_C( -697.03), SIMDE_FLOAT64_C( 93.97), SIMDE_FLOAT64_C( 975.92), SIMDE_FLOAT64_C( 876.47), SIMDE_FLOAT64_C( 762.37)), simde_mm512_set_pd(SIMDE_FLOAT64_C(-44911.92), SIMDE_FLOAT64_C(121132.03), SIMDE_FLOAT64_C(-198913.26), SIMDE_FLOAT64_C(-39598.27), SIMDE_FLOAT64_C(-82843.01), SIMDE_FLOAT64_C(207685.54), SIMDE_FLOAT64_C(-848983.90), SIMDE_FLOAT64_C(-501021.94)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -538.86), SIMDE_FLOAT64_C( 39.65), SIMDE_FLOAT64_C( -229.28), SIMDE_FLOAT64_C( -842.78), SIMDE_FLOAT64_C( -14.75), SIMDE_FLOAT64_C( -859.98), SIMDE_FLOAT64_C( 215.44), SIMDE_FLOAT64_C( 762.83)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -16.18), SIMDE_FLOAT64_C( -931.82), SIMDE_FLOAT64_C( -687.15), SIMDE_FLOAT64_C( -416.23), SIMDE_FLOAT64_C( -313.36), SIMDE_FLOAT64_C( 905.90), SIMDE_FLOAT64_C( 1.93), SIMDE_FLOAT64_C( -464.98)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 8718.75), SIMDE_FLOAT64_C(-36946.66), SIMDE_FLOAT64_C(157549.75), SIMDE_FLOAT64_C(350790.32), SIMDE_FLOAT64_C( 4622.06), SIMDE_FLOAT64_C(-779055.88), SIMDE_FLOAT64_C( 415.80), SIMDE_FLOAT64_C(-354700.69)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -25.40), SIMDE_FLOAT64_C( -267.80), SIMDE_FLOAT64_C( 353.79), SIMDE_FLOAT64_C( -35.72), SIMDE_FLOAT64_C( 125.21), SIMDE_FLOAT64_C( 137.22), SIMDE_FLOAT64_C( 310.88), SIMDE_FLOAT64_C( -724.55)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -551.49), SIMDE_FLOAT64_C( -42.33), SIMDE_FLOAT64_C( -926.18), SIMDE_FLOAT64_C( 36.96), SIMDE_FLOAT64_C( 954.39), SIMDE_FLOAT64_C( 874.71), SIMDE_FLOAT64_C( -375.00), SIMDE_FLOAT64_C( 949.07)), simde_mm512_set_pd(SIMDE_FLOAT64_C(14007.85), SIMDE_FLOAT64_C(11335.97), SIMDE_FLOAT64_C(-327673.22), SIMDE_FLOAT64_C(-1320.21), SIMDE_FLOAT64_C(119499.17), SIMDE_FLOAT64_C(120027.71), SIMDE_FLOAT64_C(-116580.00), SIMDE_FLOAT64_C(-687648.67)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mul_pd(test_vec[i].a, test_vec[i].b); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_mul_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d src; simde__mmask8 k; simde__m512d a; simde__m512d b; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( -821.30), SIMDE_FLOAT64_C( -768.64), SIMDE_FLOAT64_C( -18.18), SIMDE_FLOAT64_C( -679.16), SIMDE_FLOAT64_C( -992.98), SIMDE_FLOAT64_C( -764.30), SIMDE_FLOAT64_C( 419.74), SIMDE_FLOAT64_C( 970.61)), UINT8_C( 76), simde_mm512_set_pd(SIMDE_FLOAT64_C( -167.78), SIMDE_FLOAT64_C( -432.98), SIMDE_FLOAT64_C( -407.63), SIMDE_FLOAT64_C( -78.73), SIMDE_FLOAT64_C( -377.24), SIMDE_FLOAT64_C( -338.63), SIMDE_FLOAT64_C( -681.32), SIMDE_FLOAT64_C( -483.94)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -931.82), SIMDE_FLOAT64_C( -180.10), SIMDE_FLOAT64_C( -213.80), SIMDE_FLOAT64_C( -618.07), SIMDE_FLOAT64_C( 922.09), SIMDE_FLOAT64_C( -681.84), SIMDE_FLOAT64_C( -317.54), SIMDE_FLOAT64_C( 448.08)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -821.30), SIMDE_FLOAT64_C(77979.70), SIMDE_FLOAT64_C( -18.18), SIMDE_FLOAT64_C( -679.16), SIMDE_FLOAT64_C(-347849.23), SIMDE_FLOAT64_C(230891.48), SIMDE_FLOAT64_C( 419.74), SIMDE_FLOAT64_C( 970.61)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -706.27), SIMDE_FLOAT64_C( -736.90), SIMDE_FLOAT64_C( 388.85), SIMDE_FLOAT64_C( -452.26), SIMDE_FLOAT64_C( -983.38), SIMDE_FLOAT64_C( -800.62), SIMDE_FLOAT64_C( 310.59), SIMDE_FLOAT64_C( 810.60)), UINT8_C( 87), simde_mm512_set_pd(SIMDE_FLOAT64_C( -613.25), SIMDE_FLOAT64_C( 846.16), SIMDE_FLOAT64_C( 824.90), SIMDE_FLOAT64_C( -554.53), SIMDE_FLOAT64_C( -163.66), SIMDE_FLOAT64_C( 923.31), SIMDE_FLOAT64_C( -996.35), SIMDE_FLOAT64_C( -303.21)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -11.46), SIMDE_FLOAT64_C( -598.19), SIMDE_FLOAT64_C( 495.52), SIMDE_FLOAT64_C( 117.93), SIMDE_FLOAT64_C( 291.55), SIMDE_FLOAT64_C( 189.90), SIMDE_FLOAT64_C( -859.41), SIMDE_FLOAT64_C( 9.76)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -706.27), SIMDE_FLOAT64_C(-506164.45), SIMDE_FLOAT64_C( 388.85), SIMDE_FLOAT64_C(-65395.72), SIMDE_FLOAT64_C( -983.38), SIMDE_FLOAT64_C(175336.57), SIMDE_FLOAT64_C(856273.15), SIMDE_FLOAT64_C(-2959.33)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -563.02), SIMDE_FLOAT64_C( -883.18), SIMDE_FLOAT64_C( 852.82), SIMDE_FLOAT64_C( -331.20), SIMDE_FLOAT64_C( -286.53), SIMDE_FLOAT64_C( -422.71), SIMDE_FLOAT64_C( -717.56), SIMDE_FLOAT64_C( -209.20)), UINT8_C( 30), simde_mm512_set_pd(SIMDE_FLOAT64_C( -241.93), SIMDE_FLOAT64_C( -343.53), SIMDE_FLOAT64_C( 736.91), SIMDE_FLOAT64_C( -835.83), SIMDE_FLOAT64_C( -444.99), SIMDE_FLOAT64_C( 943.16), SIMDE_FLOAT64_C( 17.49), SIMDE_FLOAT64_C( -26.72)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -614.80), SIMDE_FLOAT64_C( -251.11), SIMDE_FLOAT64_C( 421.22), SIMDE_FLOAT64_C( -961.92), SIMDE_FLOAT64_C( 971.24), SIMDE_FLOAT64_C( -348.19), SIMDE_FLOAT64_C( -171.56), SIMDE_FLOAT64_C( -420.89)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -563.02), SIMDE_FLOAT64_C( -883.18), SIMDE_FLOAT64_C( 852.82), SIMDE_FLOAT64_C(804001.59), SIMDE_FLOAT64_C(-432192.09), SIMDE_FLOAT64_C(-328398.88), SIMDE_FLOAT64_C(-3000.58), SIMDE_FLOAT64_C( -209.20)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 442.66), SIMDE_FLOAT64_C( -69.80), SIMDE_FLOAT64_C( 151.84), SIMDE_FLOAT64_C( 278.19), SIMDE_FLOAT64_C( -105.37), SIMDE_FLOAT64_C( -898.05), SIMDE_FLOAT64_C( 104.61), SIMDE_FLOAT64_C( 131.40)), UINT8_C( 92), simde_mm512_set_pd(SIMDE_FLOAT64_C( -598.49), SIMDE_FLOAT64_C( 226.31), SIMDE_FLOAT64_C( -6.29), SIMDE_FLOAT64_C( 443.90), SIMDE_FLOAT64_C( -544.30), SIMDE_FLOAT64_C( -925.04), SIMDE_FLOAT64_C( 484.35), SIMDE_FLOAT64_C( -740.68)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 72.46), SIMDE_FLOAT64_C( -87.05), SIMDE_FLOAT64_C( -714.68), SIMDE_FLOAT64_C( 393.49), SIMDE_FLOAT64_C( 651.31), SIMDE_FLOAT64_C( 480.47), SIMDE_FLOAT64_C( 373.84), SIMDE_FLOAT64_C( 843.89)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 442.66), SIMDE_FLOAT64_C(-19700.29), SIMDE_FLOAT64_C( 151.84), SIMDE_FLOAT64_C(174670.21), SIMDE_FLOAT64_C(-354508.03), SIMDE_FLOAT64_C(-444453.97), SIMDE_FLOAT64_C( 104.61), SIMDE_FLOAT64_C( 131.40)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 988.68), SIMDE_FLOAT64_C( 353.35), SIMDE_FLOAT64_C( -309.14), SIMDE_FLOAT64_C( -266.17), SIMDE_FLOAT64_C( 819.45), SIMDE_FLOAT64_C( 592.47), SIMDE_FLOAT64_C( 382.11), SIMDE_FLOAT64_C( 516.02)), UINT8_C( 87), simde_mm512_set_pd(SIMDE_FLOAT64_C( 51.49), SIMDE_FLOAT64_C( -696.81), SIMDE_FLOAT64_C( 178.38), SIMDE_FLOAT64_C( 907.89), SIMDE_FLOAT64_C( 646.15), SIMDE_FLOAT64_C( 281.27), SIMDE_FLOAT64_C( 226.71), SIMDE_FLOAT64_C( -906.47)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -823.13), SIMDE_FLOAT64_C( -506.07), SIMDE_FLOAT64_C( -848.31), SIMDE_FLOAT64_C( -467.13), SIMDE_FLOAT64_C( 559.51), SIMDE_FLOAT64_C( -498.81), SIMDE_FLOAT64_C( 598.24), SIMDE_FLOAT64_C( 523.97)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 988.68), SIMDE_FLOAT64_C(352634.64), SIMDE_FLOAT64_C( -309.14), SIMDE_FLOAT64_C(-424102.66), SIMDE_FLOAT64_C( 819.45), SIMDE_FLOAT64_C(-140300.29), SIMDE_FLOAT64_C(135626.99), SIMDE_FLOAT64_C(-474963.09)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -811.79), SIMDE_FLOAT64_C( 522.04), SIMDE_FLOAT64_C( 594.85), SIMDE_FLOAT64_C( 0.75), SIMDE_FLOAT64_C( -855.43), SIMDE_FLOAT64_C( 660.82), SIMDE_FLOAT64_C( -308.44), SIMDE_FLOAT64_C( 882.56)), UINT8_C( 62), simde_mm512_set_pd(SIMDE_FLOAT64_C( -252.73), SIMDE_FLOAT64_C( -915.63), SIMDE_FLOAT64_C( -935.95), SIMDE_FLOAT64_C( -722.20), SIMDE_FLOAT64_C( -497.29), SIMDE_FLOAT64_C( -166.63), SIMDE_FLOAT64_C( 516.64), SIMDE_FLOAT64_C( -317.86)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -925.15), SIMDE_FLOAT64_C( 466.19), SIMDE_FLOAT64_C( 263.72), SIMDE_FLOAT64_C( 424.85), SIMDE_FLOAT64_C( 205.96), SIMDE_FLOAT64_C( 401.84), SIMDE_FLOAT64_C( 361.23), SIMDE_FLOAT64_C( 807.53)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -811.79), SIMDE_FLOAT64_C( 522.04), SIMDE_FLOAT64_C(-246828.73), SIMDE_FLOAT64_C(-306826.67), SIMDE_FLOAT64_C(-102421.85), SIMDE_FLOAT64_C(-66958.60), SIMDE_FLOAT64_C(186625.87), SIMDE_FLOAT64_C( 882.56)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 915.95), SIMDE_FLOAT64_C( -721.40), SIMDE_FLOAT64_C( -153.01), SIMDE_FLOAT64_C( 377.63), SIMDE_FLOAT64_C( 983.59), SIMDE_FLOAT64_C( -647.06), SIMDE_FLOAT64_C( 224.30), SIMDE_FLOAT64_C( -39.06)), UINT8_C( 70), simde_mm512_set_pd(SIMDE_FLOAT64_C( 724.37), SIMDE_FLOAT64_C( -108.80), SIMDE_FLOAT64_C( -716.02), SIMDE_FLOAT64_C( -552.47), SIMDE_FLOAT64_C( 411.46), SIMDE_FLOAT64_C( -439.29), SIMDE_FLOAT64_C( 397.99), SIMDE_FLOAT64_C( -31.94)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -763.99), SIMDE_FLOAT64_C( 279.59), SIMDE_FLOAT64_C( 318.18), SIMDE_FLOAT64_C( 57.40), SIMDE_FLOAT64_C( 13.78), SIMDE_FLOAT64_C( -535.45), SIMDE_FLOAT64_C( 52.16), SIMDE_FLOAT64_C( -903.47)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 915.95), SIMDE_FLOAT64_C(-30419.39), SIMDE_FLOAT64_C( -153.01), SIMDE_FLOAT64_C( 377.63), SIMDE_FLOAT64_C( 983.59), SIMDE_FLOAT64_C(235217.83), SIMDE_FLOAT64_C(20759.16), SIMDE_FLOAT64_C( -39.06)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -219.27), SIMDE_FLOAT64_C( 499.44), SIMDE_FLOAT64_C( -493.92), SIMDE_FLOAT64_C( 481.91), SIMDE_FLOAT64_C( 270.70), SIMDE_FLOAT64_C( 857.18), SIMDE_FLOAT64_C( -745.19), SIMDE_FLOAT64_C( -960.45)), UINT8_C(113), simde_mm512_set_pd(SIMDE_FLOAT64_C( -135.86), SIMDE_FLOAT64_C( -159.92), SIMDE_FLOAT64_C( 756.29), SIMDE_FLOAT64_C( -526.68), SIMDE_FLOAT64_C( 5.30), SIMDE_FLOAT64_C( 278.11), SIMDE_FLOAT64_C( 884.85), SIMDE_FLOAT64_C( 638.85)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 726.26), SIMDE_FLOAT64_C( 110.13), SIMDE_FLOAT64_C( -961.42), SIMDE_FLOAT64_C( 96.39), SIMDE_FLOAT64_C( 930.93), SIMDE_FLOAT64_C( -241.35), SIMDE_FLOAT64_C( -108.47), SIMDE_FLOAT64_C( -69.81)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -219.27), SIMDE_FLOAT64_C(-17611.99), SIMDE_FLOAT64_C(-727112.33), SIMDE_FLOAT64_C(-50766.69), SIMDE_FLOAT64_C( 270.70), SIMDE_FLOAT64_C( 857.18), SIMDE_FLOAT64_C( -745.19), SIMDE_FLOAT64_C(-44598.12)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mask_mul_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_maskz_mul_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512d a; simde__m512d b; simde__m512d r; } test_vec[8] = { { UINT8_C( 4), simde_mm512_set_pd(SIMDE_FLOAT64_C( 232.34), SIMDE_FLOAT64_C( 716.29), SIMDE_FLOAT64_C( 520.56), SIMDE_FLOAT64_C( -458.82), SIMDE_FLOAT64_C( 550.79), SIMDE_FLOAT64_C( 687.92), SIMDE_FLOAT64_C( -593.10), SIMDE_FLOAT64_C( -620.76)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -156.55), SIMDE_FLOAT64_C( -441.38), SIMDE_FLOAT64_C( 554.99), SIMDE_FLOAT64_C( 294.84), SIMDE_FLOAT64_C( -270.30), SIMDE_FLOAT64_C( -228.66), SIMDE_FLOAT64_C( 910.49), SIMDE_FLOAT64_C( -483.54)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-157299.79), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(165), simde_mm512_set_pd(SIMDE_FLOAT64_C( -526.05), SIMDE_FLOAT64_C( 453.25), SIMDE_FLOAT64_C( 821.16), SIMDE_FLOAT64_C( -906.31), SIMDE_FLOAT64_C( -873.91), SIMDE_FLOAT64_C( -472.79), SIMDE_FLOAT64_C( -675.37), SIMDE_FLOAT64_C( -955.90)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 305.84), SIMDE_FLOAT64_C( -400.58), SIMDE_FLOAT64_C( -475.09), SIMDE_FLOAT64_C( -582.28), SIMDE_FLOAT64_C( -849.06), SIMDE_FLOAT64_C( -392.73), SIMDE_FLOAT64_C( -370.73), SIMDE_FLOAT64_C( -928.94)), simde_mm512_set_pd(SIMDE_FLOAT64_C(-160887.13), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-390124.90), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(185678.82), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(887973.75)) }, { UINT8_C(175), simde_mm512_set_pd(SIMDE_FLOAT64_C( 871.20), SIMDE_FLOAT64_C( -761.38), SIMDE_FLOAT64_C( -106.42), SIMDE_FLOAT64_C( -228.29), SIMDE_FLOAT64_C( -864.78), SIMDE_FLOAT64_C( -773.10), SIMDE_FLOAT64_C( 984.91), SIMDE_FLOAT64_C( -982.29)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -488.22), SIMDE_FLOAT64_C( 361.25), SIMDE_FLOAT64_C( -346.47), SIMDE_FLOAT64_C( 411.25), SIMDE_FLOAT64_C( 117.68), SIMDE_FLOAT64_C( 448.38), SIMDE_FLOAT64_C( -319.67), SIMDE_FLOAT64_C( -97.98)), simde_mm512_set_pd(SIMDE_FLOAT64_C(-425337.26), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(36871.34), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-101767.31), SIMDE_FLOAT64_C(-346642.58), SIMDE_FLOAT64_C(-314846.18), SIMDE_FLOAT64_C(96244.77)) }, { UINT8_C(195), simde_mm512_set_pd(SIMDE_FLOAT64_C( -43.54), SIMDE_FLOAT64_C( 387.84), SIMDE_FLOAT64_C( -190.98), SIMDE_FLOAT64_C( 468.25), SIMDE_FLOAT64_C( -832.05), SIMDE_FLOAT64_C( -600.33), SIMDE_FLOAT64_C( -246.00), SIMDE_FLOAT64_C( 160.40)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -279.42), SIMDE_FLOAT64_C( 980.35), SIMDE_FLOAT64_C( 897.98), SIMDE_FLOAT64_C( -354.38), SIMDE_FLOAT64_C( 689.03), SIMDE_FLOAT64_C( 555.84), SIMDE_FLOAT64_C( 823.79), SIMDE_FLOAT64_C( -979.93)), simde_mm512_set_pd(SIMDE_FLOAT64_C(12165.95), SIMDE_FLOAT64_C(380218.94), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-202652.34), SIMDE_FLOAT64_C(-157180.77)) }, { UINT8_C(236), simde_mm512_set_pd(SIMDE_FLOAT64_C( 821.55), SIMDE_FLOAT64_C( 740.38), SIMDE_FLOAT64_C( -934.60), SIMDE_FLOAT64_C( 694.91), SIMDE_FLOAT64_C( 432.52), SIMDE_FLOAT64_C( 380.89), SIMDE_FLOAT64_C( -22.14), SIMDE_FLOAT64_C( 683.08)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 735.17), SIMDE_FLOAT64_C( 313.88), SIMDE_FLOAT64_C( -529.80), SIMDE_FLOAT64_C( -869.79), SIMDE_FLOAT64_C( 294.43), SIMDE_FLOAT64_C( 958.02), SIMDE_FLOAT64_C( 383.81), SIMDE_FLOAT64_C( 520.19)), simde_mm512_set_pd(SIMDE_FLOAT64_C(603978.91), SIMDE_FLOAT64_C(232390.47), SIMDE_FLOAT64_C(495151.08), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(127346.86), SIMDE_FLOAT64_C(364900.24), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(144), simde_mm512_set_pd(SIMDE_FLOAT64_C( -704.15), SIMDE_FLOAT64_C( 418.80), SIMDE_FLOAT64_C( -562.82), SIMDE_FLOAT64_C( 910.01), SIMDE_FLOAT64_C( 513.17), SIMDE_FLOAT64_C( 314.44), SIMDE_FLOAT64_C( 866.48), SIMDE_FLOAT64_C( 466.83)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 663.36), SIMDE_FLOAT64_C( 883.11), SIMDE_FLOAT64_C( 475.36), SIMDE_FLOAT64_C( 451.49), SIMDE_FLOAT64_C( 246.05), SIMDE_FLOAT64_C( -122.55), SIMDE_FLOAT64_C( 401.83), SIMDE_FLOAT64_C( 557.78)), simde_mm512_set_pd(SIMDE_FLOAT64_C(-467104.94), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(410860.41), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(181), simde_mm512_set_pd(SIMDE_FLOAT64_C( -758.51), SIMDE_FLOAT64_C( -164.55), SIMDE_FLOAT64_C( 334.89), SIMDE_FLOAT64_C( -549.60), SIMDE_FLOAT64_C( 344.01), SIMDE_FLOAT64_C( -985.45), SIMDE_FLOAT64_C( -235.88), SIMDE_FLOAT64_C( 450.77)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -408.01), SIMDE_FLOAT64_C( 723.62), SIMDE_FLOAT64_C( -159.29), SIMDE_FLOAT64_C( 720.82), SIMDE_FLOAT64_C( -893.97), SIMDE_FLOAT64_C( 826.45), SIMDE_FLOAT64_C( -3.06), SIMDE_FLOAT64_C( 902.05)), simde_mm512_set_pd(SIMDE_FLOAT64_C(309479.67), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-53344.63), SIMDE_FLOAT64_C(-396162.67), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-814425.15), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(406617.08)) }, { UINT8_C(211), simde_mm512_set_pd(SIMDE_FLOAT64_C( 311.61), SIMDE_FLOAT64_C( -930.67), SIMDE_FLOAT64_C( 465.44), SIMDE_FLOAT64_C( -366.35), SIMDE_FLOAT64_C( 205.36), SIMDE_FLOAT64_C( 276.19), SIMDE_FLOAT64_C( 975.10), SIMDE_FLOAT64_C( -338.46)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 817.02), SIMDE_FLOAT64_C( -834.50), SIMDE_FLOAT64_C( -648.42), SIMDE_FLOAT64_C( 761.90), SIMDE_FLOAT64_C( 24.27), SIMDE_FLOAT64_C( 838.31), SIMDE_FLOAT64_C( -854.11), SIMDE_FLOAT64_C( 403.52)), simde_mm512_set_pd(SIMDE_FLOAT64_C(254591.60), SIMDE_FLOAT64_C(776644.11), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-279122.07), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-832842.66), SIMDE_FLOAT64_C(-136575.38)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_maskz_mul_pd(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mul_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_mul_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_mul_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mul_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_mul_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_mul_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mul_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_mul_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_mul_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mul_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_mul_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_mul_pd) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/mulhi.c000066400000000000000000000404731400333146700165460ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN mulhi #include #include static int test_simde_mm512_mulhi_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[32]; const int16_t b[32]; const int16_t r[32]; } test_vec[] = { { { -INT16_C( 16752), INT16_C( 3096), INT16_C( 21789), INT16_C( 27232), -INT16_C( 17038), INT16_C( 13798), -INT16_C( 6599), -INT16_C( 28056), -INT16_C( 14632), INT16_C( 22925), -INT16_C( 11459), INT16_C( 11763), INT16_C( 13417), INT16_C( 15127), INT16_C( 4707), -INT16_C( 3144), -INT16_C( 12079), -INT16_C( 4353), INT16_C( 24613), -INT16_C( 26792), INT16_C( 15901), INT16_C( 22476), INT16_C( 13349), -INT16_C( 535), INT16_C( 30459), INT16_C( 14422), INT16_C( 18761), -INT16_C( 19867), INT16_C( 31870), -INT16_C( 7698), -INT16_C( 22898), INT16_C( 24533) }, { -INT16_C( 11145), -INT16_C( 25523), -INT16_C( 22988), INT16_C( 21044), INT16_C( 228), INT16_C( 2473), -INT16_C( 28107), INT16_C( 12294), INT16_C( 23560), INT16_C( 20840), -INT16_C( 12890), INT16_C( 9220), -INT16_C( 3511), -INT16_C( 10491), -INT16_C( 9576), INT16_C( 3895), -INT16_C( 31569), -INT16_C( 7252), -INT16_C( 8150), INT16_C( 3893), -INT16_C( 8480), INT16_C( 5400), INT16_C( 8048), INT16_C( 30789), -INT16_C( 21125), INT16_C( 8650), -INT16_C( 12678), -INT16_C( 15547), INT16_C( 19392), INT16_C( 22683), -INT16_C( 11739), -INT16_C( 11160) }, { INT16_C( 2848), -INT16_C( 1206), -INT16_C( 7643), INT16_C( 8744), -INT16_C( 60), INT16_C( 520), INT16_C( 2830), -INT16_C( 5264), -INT16_C( 5261), INT16_C( 7289), INT16_C( 2253), INT16_C( 1654), -INT16_C( 719), -INT16_C( 2422), -INT16_C( 688), -INT16_C( 187), INT16_C( 5818), INT16_C( 481), -INT16_C( 3061), -INT16_C( 1592), -INT16_C( 2058), INT16_C( 1851), INT16_C( 1639), -INT16_C( 252), -INT16_C( 9819), INT16_C( 1903), -INT16_C( 3630), INT16_C( 4713), INT16_C( 9430), -INT16_C( 2665), INT16_C( 4101), -INT16_C( 4178) } }, { { INT16_C( 5206), -INT16_C( 32328), -INT16_C( 4620), -INT16_C( 11120), -INT16_C( 22324), INT16_C( 15594), INT16_C( 12231), INT16_C( 17333), INT16_C( 32733), INT16_C( 22372), -INT16_C( 21939), INT16_C( 3355), -INT16_C( 18699), INT16_C( 6757), -INT16_C( 12920), -INT16_C( 8465), -INT16_C( 22559), -INT16_C( 10913), -INT16_C( 4204), INT16_C( 24746), -INT16_C( 27496), INT16_C( 24477), INT16_C( 21187), -INT16_C( 24414), INT16_C( 2001), INT16_C( 7928), INT16_C( 5041), -INT16_C( 22997), -INT16_C( 28471), INT16_C( 20928), -INT16_C( 20642), INT16_C( 16175) }, { -INT16_C( 28842), -INT16_C( 5355), -INT16_C( 16514), INT16_C( 5707), -INT16_C( 6061), INT16_C( 5750), INT16_C( 6202), INT16_C( 2999), -INT16_C( 20705), -INT16_C( 12247), INT16_C( 21698), -INT16_C( 29834), INT16_C( 14309), INT16_C( 17372), INT16_C( 3046), INT16_C( 15746), -INT16_C( 26726), INT16_C( 6440), INT16_C( 29526), -INT16_C( 22225), -INT16_C( 23204), -INT16_C( 26944), INT16_C( 30654), -INT16_C( 8798), -INT16_C( 13530), -INT16_C( 5970), INT16_C( 9248), INT16_C( 1395), INT16_C( 20315), INT16_C( 16968), -INT16_C( 13734), -INT16_C( 2689) }, { -INT16_C( 2292), INT16_C( 2641), INT16_C( 1164), -INT16_C( 969), INT16_C( 2064), INT16_C( 1368), INT16_C( 1157), INT16_C( 793), -INT16_C( 10342), -INT16_C( 4181), -INT16_C( 7264), -INT16_C( 1528), -INT16_C( 4083), INT16_C( 1791), -INT16_C( 601), -INT16_C( 2034), INT16_C( 9199), -INT16_C( 1073), -INT16_C( 1895), -INT16_C( 8393), INT16_C( 9735), -INT16_C( 10064), INT16_C( 9910), INT16_C( 3277), -INT16_C( 414), -INT16_C( 723), INT16_C( 711), -INT16_C( 490), -INT16_C( 8826), INT16_C( 5418), INT16_C( 4325), -INT16_C( 664) } }, { { -INT16_C( 22686), -INT16_C( 18418), INT16_C( 15642), INT16_C( 30306), INT16_C( 8931), -INT16_C( 24307), -INT16_C( 20583), -INT16_C( 16514), INT16_C( 11386), -INT16_C( 25945), INT16_C( 6737), -INT16_C( 21345), -INT16_C( 6295), -INT16_C( 15378), INT16_C( 28082), INT16_C( 5304), -INT16_C( 14828), INT16_C( 12236), INT16_C( 11780), -INT16_C( 6235), -INT16_C( 19888), -INT16_C( 5752), INT16_C( 1633), -INT16_C( 9048), INT16_C( 20275), -INT16_C( 31626), INT16_C( 5737), -INT16_C( 11728), INT16_C( 8189), -INT16_C( 20586), INT16_C( 20108), -INT16_C( 24125) }, { -INT16_C( 28651), INT16_C( 6608), INT16_C( 30142), INT16_C( 3840), -INT16_C( 30680), -INT16_C( 30216), -INT16_C( 24178), -INT16_C( 16027), -INT16_C( 8976), INT16_C( 23109), INT16_C( 30450), -INT16_C( 4308), -INT16_C( 15723), INT16_C( 8607), INT16_C( 25105), INT16_C( 9922), -INT16_C( 27918), -INT16_C( 20161), INT16_C( 16136), INT16_C( 12480), -INT16_C( 18233), INT16_C( 21945), INT16_C( 8025), INT16_C( 18967), INT16_C( 23803), -INT16_C( 4700), -INT16_C( 12078), INT16_C( 26588), INT16_C( 31635), -INT16_C( 23415), INT16_C( 19422), -INT16_C( 12086) }, { INT16_C( 9917), -INT16_C( 1858), INT16_C( 7194), INT16_C( 1775), -INT16_C( 4181), INT16_C( 11206), INT16_C( 7593), INT16_C( 4038), -INT16_C( 1560), -INT16_C( 9149), INT16_C( 3130), INT16_C( 1403), INT16_C( 1510), -INT16_C( 2020), INT16_C( 10757), INT16_C( 803), INT16_C( 6316), -INT16_C( 3765), INT16_C( 2900), -INT16_C( 1188), INT16_C( 5533), -INT16_C( 1927), INT16_C( 199), -INT16_C( 2619), INT16_C( 7363), INT16_C( 2268), -INT16_C( 1058), -INT16_C( 4759), INT16_C( 3952), INT16_C( 7355), INT16_C( 5959), INT16_C( 4449) } }, { { INT16_C( 2526), -INT16_C( 6527), INT16_C( 16712), INT16_C( 3862), -INT16_C( 12294), INT16_C( 21348), INT16_C( 31726), -INT16_C( 5731), INT16_C( 16856), -INT16_C( 21802), -INT16_C( 19694), -INT16_C( 23278), -INT16_C( 25810), INT16_C( 3145), INT16_C( 5094), -INT16_C( 15139), INT16_C( 24092), INT16_C( 25770), -INT16_C( 16224), -INT16_C( 25997), -INT16_C( 10352), INT16_C( 32493), -INT16_C( 29869), INT16_C( 11112), INT16_C( 16076), -INT16_C( 8491), -INT16_C( 6159), INT16_C( 8323), -INT16_C( 13182), INT16_C( 26924), INT16_C( 2527), -INT16_C( 1235) }, { -INT16_C( 10136), INT16_C( 2143), -INT16_C( 11624), INT16_C( 10402), -INT16_C( 28758), -INT16_C( 601), INT16_C( 3866), -INT16_C( 6360), -INT16_C( 691), INT16_C( 16325), INT16_C( 18917), INT16_C( 26463), -INT16_C( 29931), -INT16_C( 2608), -INT16_C( 363), -INT16_C( 528), INT16_C( 20694), INT16_C( 28165), -INT16_C( 22750), -INT16_C( 13161), INT16_C( 15926), INT16_C( 20937), -INT16_C( 3763), -INT16_C( 26056), -INT16_C( 529), -INT16_C( 11047), INT16_C( 14406), INT16_C( 23611), INT16_C( 3268), INT16_C( 22865), INT16_C( 16650), -INT16_C( 8106) }, { -INT16_C( 391), -INT16_C( 214), -INT16_C( 2965), INT16_C( 612), INT16_C( 5394), -INT16_C( 196), INT16_C( 1871), INT16_C( 556), -INT16_C( 178), -INT16_C( 5431), -INT16_C( 5685), -INT16_C( 9400), INT16_C( 11787), -INT16_C( 126), -INT16_C( 29), INT16_C( 121), INT16_C( 7607), INT16_C( 11075), INT16_C( 5631), INT16_C( 5220), -INT16_C( 2516), INT16_C( 10380), INT16_C( 1715), -INT16_C( 4418), -INT16_C( 130), INT16_C( 1431), -INT16_C( 1354), INT16_C( 2998), -INT16_C( 658), INT16_C( 9393), INT16_C( 642), INT16_C( 152) } }, { { INT16_C( 23441), -INT16_C( 19378), -INT16_C( 6910), INT16_C( 14464), INT16_C( 18979), INT16_C( 28809), -INT16_C( 16069), INT16_C( 10763), -INT16_C( 6977), INT16_C( 1534), INT16_C( 14877), -INT16_C( 7839), -INT16_C( 19898), INT16_C( 20538), -INT16_C( 28428), -INT16_C( 31440), INT16_C( 32491), -INT16_C( 4807), -INT16_C( 17820), -INT16_C( 30939), -INT16_C( 20732), INT16_C( 16376), INT16_C( 880), INT16_C( 12138), INT16_C( 26855), INT16_C( 1077), -INT16_C( 26974), -INT16_C( 5915), INT16_C( 8009), INT16_C( 15672), INT16_C( 26799), -INT16_C( 25918) }, { -INT16_C( 793), INT16_C( 19335), -INT16_C( 21066), -INT16_C( 17710), -INT16_C( 13732), -INT16_C( 13063), INT16_C( 25549), -INT16_C( 18948), INT16_C( 12748), INT16_C( 28345), -INT16_C( 24633), INT16_C( 4183), -INT16_C( 28738), INT16_C( 28237), INT16_C( 4344), -INT16_C( 8440), -INT16_C( 28660), -INT16_C( 15830), -INT16_C( 963), -INT16_C( 26244), INT16_C( 30151), -INT16_C( 27547), INT16_C( 25049), -INT16_C( 23223), INT16_C( 914), INT16_C( 23059), INT16_C( 27298), INT16_C( 24682), -INT16_C( 18182), -INT16_C( 3378), -INT16_C( 10296), -INT16_C( 11055) }, { -INT16_C( 284), -INT16_C( 5718), INT16_C( 2221), -INT16_C( 3909), -INT16_C( 3977), -INT16_C( 5743), -INT16_C( 6265), -INT16_C( 3112), -INT16_C( 1358), INT16_C( 663), -INT16_C( 5592), -INT16_C( 501), INT16_C( 8725), INT16_C( 8849), -INT16_C( 1885), INT16_C( 4048), -INT16_C( 14209), INT16_C( 1161), INT16_C( 261), INT16_C( 12389), -INT16_C( 9539), -INT16_C( 6884), INT16_C( 336), -INT16_C( 4302), INT16_C( 374), INT16_C( 378), -INT16_C( 11236), -INT16_C( 2228), -INT16_C( 2222), -INT16_C( 808), -INT16_C( 4211), INT16_C( 4372) } }, { { -INT16_C( 1177), -INT16_C( 23402), INT16_C( 4855), -INT16_C( 16835), -INT16_C( 23929), INT16_C( 24659), -INT16_C( 25596), -INT16_C( 27131), INT16_C( 6559), INT16_C( 16880), INT16_C( 23427), INT16_C( 32162), INT16_C( 28691), -INT16_C( 9361), INT16_C( 16455), -INT16_C( 20817), INT16_C( 17723), INT16_C( 13138), -INT16_C( 28841), -INT16_C( 8463), INT16_C( 17458), INT16_C( 13887), INT16_C( 17633), -INT16_C( 32564), -INT16_C( 17059), -INT16_C( 7742), INT16_C( 25624), INT16_C( 11102), -INT16_C( 12588), INT16_C( 7174), -INT16_C( 19186), INT16_C( 19146) }, { INT16_C( 7674), INT16_C( 20861), INT16_C( 28332), -INT16_C( 8657), INT16_C( 28339), -INT16_C( 27628), -INT16_C( 7757), INT16_C( 4116), -INT16_C( 10594), -INT16_C( 18703), INT16_C( 20538), INT16_C( 4065), -INT16_C( 6370), INT16_C( 11307), -INT16_C( 2660), -INT16_C( 27018), -INT16_C( 3310), -INT16_C( 16409), INT16_C( 5730), INT16_C( 5533), -INT16_C( 19835), INT16_C( 14505), -INT16_C( 17005), INT16_C( 12616), INT16_C( 14996), -INT16_C( 12569), -INT16_C( 14198), -INT16_C( 22307), INT16_C( 2223), INT16_C( 19412), INT16_C( 19454), INT16_C( 4321) }, { -INT16_C( 138), -INT16_C( 7450), INT16_C( 2098), INT16_C( 2223), -INT16_C( 10348), -INT16_C( 10396), INT16_C( 3029), -INT16_C( 1704), -INT16_C( 1061), -INT16_C( 4818), INT16_C( 7341), INT16_C( 1994), -INT16_C( 2789), -INT16_C( 1616), -INT16_C( 668), INT16_C( 8582), -INT16_C( 896), -INT16_C( 3290), -INT16_C( 2522), -INT16_C( 715), -INT16_C( 5284), INT16_C( 3073), -INT16_C( 4576), -INT16_C( 6269), -INT16_C( 3904), INT16_C( 1484), -INT16_C( 5552), -INT16_C( 3779), -INT16_C( 427), INT16_C( 2124), -INT16_C( 5696), INT16_C( 1262) } }, { { -INT16_C( 14274), -INT16_C( 24369), INT16_C( 28126), INT16_C( 25525), INT16_C( 24095), -INT16_C( 19813), -INT16_C( 7140), -INT16_C( 20253), -INT16_C( 13794), -INT16_C( 22402), INT16_C( 23698), INT16_C( 16720), INT16_C( 9316), INT16_C( 25228), INT16_C( 28015), -INT16_C( 20877), INT16_C( 16949), INT16_C( 4942), INT16_C( 1199), -INT16_C( 12681), INT16_C( 4706), INT16_C( 32384), INT16_C( 25590), INT16_C( 5166), -INT16_C( 21203), -INT16_C( 16452), INT16_C( 3081), INT16_C( 27904), -INT16_C( 29647), -INT16_C( 24368), INT16_C( 17401), INT16_C( 11854) }, { -INT16_C( 25211), INT16_C( 13634), -INT16_C( 18015), INT16_C( 771), -INT16_C( 31541), -INT16_C( 15742), -INT16_C( 20249), INT16_C( 5590), -INT16_C( 27811), INT16_C( 26324), -INT16_C( 10849), -INT16_C( 12076), -INT16_C( 23455), INT16_C( 23409), -INT16_C( 16409), INT16_C( 27785), -INT16_C( 13476), -INT16_C( 607), -INT16_C( 23164), INT16_C( 20481), -INT16_C( 31959), INT16_C( 4114), -INT16_C( 6093), -INT16_C( 28379), -INT16_C( 1413), INT16_C( 7159), -INT16_C( 13361), INT16_C( 12523), INT16_C( 23663), INT16_C( 22155), INT16_C( 5404), INT16_C( 30915) }, { INT16_C( 5491), -INT16_C( 5070), -INT16_C( 7732), INT16_C( 300), -INT16_C( 11597), INT16_C( 4759), INT16_C( 2206), -INT16_C( 1728), INT16_C( 5853), -INT16_C( 8999), -INT16_C( 3924), -INT16_C( 3081), -INT16_C( 3335), INT16_C( 9011), -INT16_C( 7015), -INT16_C( 8852), -INT16_C( 3486), -INT16_C( 46), -INT16_C( 424), -INT16_C( 3964), -INT16_C( 2295), INT16_C( 2032), -INT16_C( 2380), -INT16_C( 2238), INT16_C( 457), -INT16_C( 1798), -INT16_C( 629), INT16_C( 5332), -INT16_C( 10705), -INT16_C( 8238), INT16_C( 1434), INT16_C( 5591) } }, { { INT16_C( 25824), INT16_C( 25974), INT16_C( 30473), INT16_C( 12981), -INT16_C( 14342), INT16_C( 11587), INT16_C( 26799), INT16_C( 11198), -INT16_C( 18846), INT16_C( 12614), INT16_C( 12673), -INT16_C( 3742), -INT16_C( 4722), -INT16_C( 21945), INT16_C( 2562), -INT16_C( 7390), -INT16_C( 26513), INT16_C( 30792), -INT16_C( 753), INT16_C( 2475), -INT16_C( 4412), INT16_C( 29495), -INT16_C( 2730), -INT16_C( 18018), -INT16_C( 6997), INT16_C( 11754), INT16_C( 19478), -INT16_C( 23522), INT16_C( 25914), INT16_C( 15438), INT16_C( 28784), -INT16_C( 8417) }, { INT16_C( 26377), INT16_C( 6231), INT16_C( 612), INT16_C( 10274), INT16_C( 23024), INT16_C( 18332), INT16_C( 14926), -INT16_C( 1536), -INT16_C( 5601), INT16_C( 13607), INT16_C( 17719), INT16_C( 29145), INT16_C( 10154), INT16_C( 6829), -INT16_C( 12905), -INT16_C( 24327), INT16_C( 20788), -INT16_C( 26183), -INT16_C( 9389), INT16_C( 17601), INT16_C( 23860), -INT16_C( 32117), -INT16_C( 29800), -INT16_C( 18564), -INT16_C( 23691), -INT16_C( 21268), -INT16_C( 14872), -INT16_C( 27875), -INT16_C( 13332), -INT16_C( 31827), -INT16_C( 22632), -INT16_C( 13276) }, { INT16_C( 10393), INT16_C( 2469), INT16_C( 284), INT16_C( 2035), -INT16_C( 5039), INT16_C( 3241), INT16_C( 6103), -INT16_C( 263), INT16_C( 1610), INT16_C( 2618), INT16_C( 3426), -INT16_C( 1665), -INT16_C( 732), -INT16_C( 2287), -INT16_C( 505), INT16_C( 2743), -INT16_C( 8410), -INT16_C( 12303), INT16_C( 107), INT16_C( 664), -INT16_C( 1607), -INT16_C( 14455), INT16_C( 1241), INT16_C( 5103), INT16_C( 2529), -INT16_C( 3815), -INT16_C( 4421), INT16_C( 10004), -INT16_C( 5272), -INT16_C( 7498), -INT16_C( 9941), INT16_C( 1705) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_mulhi_epi16(a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mulhi_epi16) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/mulhrs.c000066400000000000000000000405421400333146700167370ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN mulhrs #include #include #include static int test_simde_mm512_mulhrs_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[32]; const int16_t b[32]; const int16_t r[32]; } test_vec[] = { { { INT16_C( 2012), -INT16_C( 16441), -INT16_C( 5959), INT16_C( 17687), INT16_C( 27737), -INT16_C( 25181), -INT16_C( 3282), INT16_C( 22705), INT16_C( 13185), INT16_C( 4437), INT16_C( 13761), -INT16_C( 8944), INT16_C( 19921), -INT16_C( 21079), -INT16_C( 20071), INT16_C( 30226), -INT16_C( 9544), INT16_C( 29237), INT16_C( 19650), INT16_C( 7095), INT16_C( 23224), -INT16_C( 6471), INT16_C( 27214), -INT16_C( 12481), -INT16_C( 27491), INT16_C( 24544), -INT16_C( 3895), -INT16_C( 25796), -INT16_C( 6851), -INT16_C( 10424), INT16_C( 23446), INT16_C( 20301) }, { -INT16_C( 32203), -INT16_C( 2111), INT16_C( 30926), -INT16_C( 31214), -INT16_C( 13358), INT16_C( 8300), -INT16_C( 21707), -INT16_C( 11281), -INT16_C( 12481), INT16_C( 2354), INT16_C( 28352), -INT16_C( 604), -INT16_C( 5036), -INT16_C( 5420), INT16_C( 8519), INT16_C( 31801), -INT16_C( 1373), INT16_C( 29043), -INT16_C( 31118), INT16_C( 17911), INT16_C( 25681), -INT16_C( 30875), INT16_C( 21775), INT16_C( 20314), -INT16_C( 29660), -INT16_C( 7080), -INT16_C( 774), INT16_C( 20194), -INT16_C( 18712), INT16_C( 12345), INT16_C( 29400), INT16_C( 31660) }, { -INT16_C( 1977), INT16_C( 1059), -INT16_C( 5624), -INT16_C( 16848), -INT16_C( 11307), -INT16_C( 6378), INT16_C( 2174), -INT16_C( 7817), -INT16_C( 5022), INT16_C( 319), INT16_C( 11906), INT16_C( 165), -INT16_C( 3062), INT16_C( 3487), -INT16_C( 5218), INT16_C( 29334), INT16_C( 400), INT16_C( 25913), -INT16_C( 18661), INT16_C( 3878), INT16_C( 18201), INT16_C( 6097), INT16_C( 18084), -INT16_C( 7737), INT16_C( 24884), -INT16_C( 5303), INT16_C( 92), -INT16_C( 15897), INT16_C( 3912), -INT16_C( 3927), INT16_C( 21036), INT16_C( 19615) } }, { { INT16_C( 8301), -INT16_C( 8211), -INT16_C( 7002), -INT16_C( 2268), -INT16_C( 30136), INT16_C( 22654), -INT16_C( 10017), INT16_C( 935), -INT16_C( 156), INT16_C( 24552), -INT16_C( 13573), -INT16_C( 7251), -INT16_C( 6528), INT16_C( 22547), -INT16_C( 16295), -INT16_C( 14636), -INT16_C( 15904), -INT16_C( 31067), -INT16_C( 13659), -INT16_C( 4483), -INT16_C( 940), INT16_C( 13126), -INT16_C( 4652), INT16_C( 14646), INT16_C( 7916), -INT16_C( 6248), INT16_C( 17896), INT16_C( 27082), -INT16_C( 8660), -INT16_C( 31295), -INT16_C( 27234), INT16_C( 32331) }, { -INT16_C( 4010), -INT16_C( 1020), -INT16_C( 32326), INT16_C( 3818), INT16_C( 12413), INT16_C( 21057), INT16_C( 30749), INT16_C( 2443), INT16_C( 9110), INT16_C( 32752), -INT16_C( 17816), -INT16_C( 27416), -INT16_C( 22120), INT16_C( 13849), INT16_C( 25663), -INT16_C( 27212), -INT16_C( 18347), INT16_C( 3985), INT16_C( 31546), -INT16_C( 18658), INT16_C( 24491), -INT16_C( 14327), -INT16_C( 27433), INT16_C( 28369), -INT16_C( 15945), INT16_C( 8429), -INT16_C( 10884), INT16_C( 5300), -INT16_C( 12674), -INT16_C( 17077), -INT16_C( 206), -INT16_C( 30893) }, { -INT16_C( 1016), INT16_C( 256), INT16_C( 6908), -INT16_C( 264), -INT16_C( 11416), INT16_C( 14558), -INT16_C( 9400), INT16_C( 70), -INT16_C( 43), INT16_C( 24540), INT16_C( 7380), INT16_C( 6067), INT16_C( 4407), INT16_C( 9529), -INT16_C( 12762), INT16_C( 12154), INT16_C( 8905), -INT16_C( 3778), -INT16_C( 13150), INT16_C( 2553), -INT16_C( 703), -INT16_C( 5739), INT16_C( 3895), INT16_C( 12680), -INT16_C( 3852), -INT16_C( 1607), -INT16_C( 5944), INT16_C( 4380), INT16_C( 3350), INT16_C( 16309), INT16_C( 171), -INT16_C( 30481) } }, { { -INT16_C( 6984), -INT16_C( 3433), -INT16_C( 19104), INT16_C( 2985), -INT16_C( 19692), -INT16_C( 4908), -INT16_C( 23225), -INT16_C( 166), INT16_C( 18279), -INT16_C( 7393), -INT16_C( 11492), -INT16_C( 25865), INT16_C( 17057), -INT16_C( 11176), -INT16_C( 21694), -INT16_C( 1445), -INT16_C( 3441), -INT16_C( 4116), -INT16_C( 27225), -INT16_C( 17157), -INT16_C( 12472), -INT16_C( 28504), INT16_C( 628), -INT16_C( 9329), -INT16_C( 20919), INT16_C( 26046), -INT16_C( 18815), INT16_C( 9215), INT16_C( 22520), INT16_C( 15095), INT16_C( 20994), -INT16_C( 28108) }, { INT16_C( 8261), -INT16_C( 4991), INT16_C( 31926), -INT16_C( 344), INT16_C( 20555), -INT16_C( 16242), INT16_C( 7506), -INT16_C( 25701), INT16_C( 23243), INT16_C( 19712), INT16_C( 16), INT16_C( 2160), INT16_C( 26455), INT16_C( 23107), INT16_C( 30649), -INT16_C( 276), INT16_C( 28056), INT16_C( 20203), -INT16_C( 27670), INT16_C( 13644), -INT16_C( 9244), INT16_C( 14069), -INT16_C( 28168), -INT16_C( 15150), -INT16_C( 11541), -INT16_C( 1263), -INT16_C( 32302), INT16_C( 10755), INT16_C( 18152), -INT16_C( 24188), INT16_C( 28862), INT16_C( 22176) }, { -INT16_C( 1761), INT16_C( 523), -INT16_C( 18613), -INT16_C( 31), -INT16_C( 12353), INT16_C( 2433), -INT16_C( 5320), INT16_C( 130), INT16_C( 12966), -INT16_C( 4447), -INT16_C( 6), -INT16_C( 1705), INT16_C( 13771), -INT16_C( 7881), -INT16_C( 20291), INT16_C( 12), -INT16_C( 2946), -INT16_C( 2538), INT16_C( 22989), -INT16_C( 7144), INT16_C( 3518), -INT16_C( 12238), -INT16_C( 540), INT16_C( 4313), INT16_C( 7368), -INT16_C( 1004), INT16_C( 18547), INT16_C( 3025), INT16_C( 12475), -INT16_C( 11143), INT16_C( 18491), -INT16_C( 19022) } }, { { -INT16_C( 29731), -INT16_C( 14428), -INT16_C( 4066), INT16_C( 765), -INT16_C( 3381), -INT16_C( 15303), INT16_C( 2947), INT16_C( 28296), -INT16_C( 26147), -INT16_C( 20375), INT16_C( 27930), INT16_C( 730), INT16_C( 24243), INT16_C( 29091), INT16_C( 17358), -INT16_C( 21561), INT16_C( 27598), -INT16_C( 4749), INT16_C( 28764), INT16_C( 10223), INT16_C( 10338), -INT16_C( 6421), INT16_C( 29491), INT16_C( 4436), -INT16_C( 16884), INT16_C( 9921), -INT16_C( 25813), -INT16_C( 8664), -INT16_C( 13063), -INT16_C( 14512), INT16_C( 5903), -INT16_C( 8590) }, { -INT16_C( 6781), -INT16_C( 8245), -INT16_C( 17835), -INT16_C( 18426), -INT16_C( 3357), INT16_C( 5790), -INT16_C( 3483), INT16_C( 29223), -INT16_C( 5968), -INT16_C( 9320), -INT16_C( 15997), INT16_C( 31930), INT16_C( 2701), -INT16_C( 25533), -INT16_C( 18911), -INT16_C( 23430), INT16_C( 17819), -INT16_C( 3709), -INT16_C( 30208), -INT16_C( 7255), INT16_C( 18300), -INT16_C( 7687), INT16_C( 8505), -INT16_C( 5549), -INT16_C( 5111), -INT16_C( 29243), INT16_C( 32685), INT16_C( 14857), INT16_C( 19849), -INT16_C( 21546), INT16_C( 20739), -INT16_C( 25009) }, { INT16_C( 6153), INT16_C( 3630), INT16_C( 2213), -INT16_C( 430), INT16_C( 346), -INT16_C( 2704), -INT16_C( 313), INT16_C( 25235), INT16_C( 4762), INT16_C( 5795), -INT16_C( 13635), INT16_C( 711), INT16_C( 1998), -INT16_C( 22668), -INT16_C( 10018), INT16_C( 15417), INT16_C( 15008), INT16_C( 538), -INT16_C( 26517), -INT16_C( 2263), INT16_C( 5773), INT16_C( 1506), INT16_C( 7654), -INT16_C( 751), INT16_C( 2633), -INT16_C( 8854), -INT16_C( 25748), -INT16_C( 3928), -INT16_C( 7913), INT16_C( 9542), INT16_C( 3736), INT16_C( 6556) } }, { { -INT16_C( 11370), -INT16_C( 26993), INT16_C( 14429), -INT16_C( 9863), INT16_C( 29567), -INT16_C( 17990), INT16_C( 3732), -INT16_C( 25181), INT16_C( 26874), -INT16_C( 22742), INT16_C( 13544), INT16_C( 29153), -INT16_C( 18559), -INT16_C( 31716), INT16_C( 27656), -INT16_C( 24798), -INT16_C( 19905), -INT16_C( 25547), -INT16_C( 20502), INT16_C( 27253), INT16_C( 12066), -INT16_C( 18909), -INT16_C( 14787), INT16_C( 14163), INT16_C( 32302), INT16_C( 5854), -INT16_C( 16462), INT16_C( 13192), -INT16_C( 23433), INT16_C( 32695), -INT16_C( 9968), INT16_C( 20254) }, { INT16_C( 21643), INT16_C( 30443), INT16_C( 24579), INT16_C( 9696), INT16_C( 912), -INT16_C( 12837), INT16_C( 11977), -INT16_C( 2299), -INT16_C( 7252), INT16_C( 24078), -INT16_C( 26973), INT16_C( 6801), INT16_C( 18490), INT16_C( 19353), -INT16_C( 18398), -INT16_C( 21094), -INT16_C( 31220), INT16_C( 3875), INT16_C( 998), INT16_C( 30260), INT16_C( 3846), -INT16_C( 12476), INT16_C( 18749), -INT16_C( 5433), -INT16_C( 10964), -INT16_C( 12472), -INT16_C( 9621), -INT16_C( 23063), -INT16_C( 31966), INT16_C( 17648), -INT16_C( 29893), INT16_C( 18418) }, { -INT16_C( 7510), -INT16_C( 25078), INT16_C( 10823), -INT16_C( 2918), INT16_C( 823), INT16_C( 7048), INT16_C( 1364), INT16_C( 1767), -INT16_C( 5948), -INT16_C( 16711), -INT16_C( 11149), INT16_C( 6051), -INT16_C( 10472), -INT16_C( 18732), -INT16_C( 15528), INT16_C( 15963), INT16_C( 18965), -INT16_C( 3021), -INT16_C( 624), INT16_C( 25167), INT16_C( 1416), INT16_C( 7199), -INT16_C( 8461), -INT16_C( 2348), -INT16_C( 10808), -INT16_C( 2228), INT16_C( 4833), -INT16_C( 9285), INT16_C( 22859), INT16_C( 17609), INT16_C( 9093), INT16_C( 11384) } }, { { INT16_C( 5393), -INT16_C( 2218), -INT16_C( 30183), INT16_C( 8046), -INT16_C( 19815), -INT16_C( 10513), -INT16_C( 18693), INT16_C( 10176), INT16_C( 2443), -INT16_C( 2313), -INT16_C( 7965), INT16_C( 1435), -INT16_C( 29597), -INT16_C( 25014), INT16_C( 15383), INT16_C( 10469), INT16_C( 15185), INT16_C( 27167), -INT16_C( 29243), INT16_C( 24202), INT16_C( 31039), INT16_C( 14901), -INT16_C( 2769), -INT16_C( 17822), INT16_C( 23038), -INT16_C( 7760), INT16_C( 19257), -INT16_C( 25113), INT16_C( 12759), -INT16_C( 4549), INT16_C( 8557), -INT16_C( 16874) }, { INT16_C( 13916), INT16_C( 8745), -INT16_C( 19517), INT16_C( 896), -INT16_C( 19156), INT16_C( 23357), -INT16_C( 24661), -INT16_C( 22251), -INT16_C( 14856), INT16_C( 12939), INT16_C( 29200), -INT16_C( 5937), INT16_C( 2723), INT16_C( 4310), -INT16_C( 4821), -INT16_C( 30514), -INT16_C( 2269), -INT16_C( 6486), INT16_C( 10922), -INT16_C( 10519), INT16_C( 10208), -INT16_C( 29903), INT16_C( 18118), -INT16_C( 16588), -INT16_C( 16629), INT16_C( 7409), -INT16_C( 16335), -INT16_C( 11260), -INT16_C( 9526), -INT16_C( 2332), -INT16_C( 19513), -INT16_C( 5506) }, { INT16_C( 2290), -INT16_C( 592), INT16_C( 17977), INT16_C( 220), INT16_C( 11584), -INT16_C( 7494), INT16_C( 14068), -INT16_C( 6910), -INT16_C( 1108), -INT16_C( 913), -INT16_C( 7098), -INT16_C( 260), -INT16_C( 2459), -INT16_C( 3290), -INT16_C( 2263), -INT16_C( 9749), -INT16_C( 1051), -INT16_C( 5377), -INT16_C( 9747), -INT16_C( 7769), INT16_C( 9669), -INT16_C( 13598), -INT16_C( 1531), INT16_C( 9022), -INT16_C( 11691), -INT16_C( 1755), -INT16_C( 9600), INT16_C( 8630), -INT16_C( 3709), INT16_C( 324), -INT16_C( 5096), INT16_C( 2835) } }, { { INT16_C( 10410), INT16_C( 21969), -INT16_C( 17838), INT16_C( 12843), INT16_C( 24033), -INT16_C( 22339), -INT16_C( 3421), -INT16_C( 20633), INT16_C( 22705), -INT16_C( 7221), -INT16_C( 12520), -INT16_C( 7497), -INT16_C( 25431), INT16_C( 29144), INT16_C( 22095), -INT16_C( 1701), INT16_C( 11390), -INT16_C( 11954), INT16_C( 31463), -INT16_C( 14333), -INT16_C( 15913), INT16_C( 31344), -INT16_C( 10317), INT16_C( 25641), -INT16_C( 3025), INT16_C( 18247), -INT16_C( 61), INT16_C( 27946), INT16_C( 667), -INT16_C( 5410), INT16_C( 14681), -INT16_C( 10269) }, { INT16_C( 12902), INT16_C( 19880), -INT16_C( 21332), -INT16_C( 31979), -INT16_C( 31123), INT16_C( 8445), INT16_C( 10077), -INT16_C( 29308), -INT16_C( 13285), -INT16_C( 8236), -INT16_C( 309), INT16_C( 26188), INT16_C( 10753), INT16_C( 23120), INT16_C( 13155), -INT16_C( 14031), -INT16_C( 9627), INT16_C( 4374), INT16_C( 11398), -INT16_C( 3180), -INT16_C( 27982), INT16_C( 3859), -INT16_C( 26695), -INT16_C( 11108), INT16_C( 29027), INT16_C( 11955), -INT16_C( 145), INT16_C( 28820), -INT16_C( 7127), -INT16_C( 29238), -INT16_C( 1000), INT16_C( 32086) }, { INT16_C( 4099), INT16_C( 13328), INT16_C( 11613), -INT16_C( 12534), -INT16_C( 22827), -INT16_C( 5757), -INT16_C( 1052), INT16_C( 18454), -INT16_C( 9205), INT16_C( 1815), INT16_C( 118), -INT16_C( 5992), -INT16_C( 8345), INT16_C( 20563), INT16_C( 8870), INT16_C( 728), -INT16_C( 3346), -INT16_C( 1596), INT16_C( 10944), INT16_C( 1391), INT16_C( 13589), INT16_C( 3691), INT16_C( 8405), -INT16_C( 8692), -INT16_C( 2680), INT16_C( 6657), INT16_C( 0), INT16_C( 24579), -INT16_C( 145), INT16_C( 4827), -INT16_C( 448), -INT16_C( 10055) } }, { { INT16_C( 28118), INT16_C( 23695), INT16_C( 9113), INT16_C( 19279), INT16_C( 25269), INT16_C( 28250), -INT16_C( 2055), INT16_C( 23875), -INT16_C( 2456), -INT16_C( 10357), INT16_C( 8438), INT16_C( 8008), INT16_C( 4612), INT16_C( 7340), INT16_C( 782), -INT16_C( 7014), INT16_C( 10608), INT16_C( 2368), -INT16_C( 28852), INT16_C( 596), -INT16_C( 20751), -INT16_C( 5264), -INT16_C( 19547), INT16_C( 3400), -INT16_C( 11350), -INT16_C( 24347), INT16_C( 11763), -INT16_C( 1857), INT16_C( 27711), INT16_C( 19988), -INT16_C( 20881), -INT16_C( 8398) }, { INT16_C( 29655), INT16_C( 9448), INT16_C( 15362), -INT16_C( 3034), -INT16_C( 26902), -INT16_C( 28449), INT16_C( 10058), -INT16_C( 2915), -INT16_C( 32006), -INT16_C( 4460), INT16_C( 21423), -INT16_C( 4122), -INT16_C( 1345), INT16_C( 11837), INT16_C( 28585), -INT16_C( 32755), -INT16_C( 2590), -INT16_C( 6748), -INT16_C( 13775), INT16_C( 7385), -INT16_C( 18335), -INT16_C( 21588), INT16_C( 18911), -INT16_C( 9825), INT16_C( 13260), INT16_C( 31687), -INT16_C( 21114), INT16_C( 18026), -INT16_C( 22616), INT16_C( 20852), -INT16_C( 32233), -INT16_C( 1583) }, { INT16_C( 25447), INT16_C( 6832), INT16_C( 4272), -INT16_C( 1785), -INT16_C( 20745), -INT16_C( 24526), -INT16_C( 631), -INT16_C( 2124), INT16_C( 2399), INT16_C( 1410), INT16_C( 5517), -INT16_C( 1007), -INT16_C( 189), INT16_C( 2651), INT16_C( 682), INT16_C( 7011), -INT16_C( 838), -INT16_C( 488), INT16_C( 12129), INT16_C( 134), INT16_C( 11611), INT16_C( 3468), -INT16_C( 11281), -INT16_C( 1019), -INT16_C( 4593), -INT16_C( 23544), -INT16_C( 7579), -INT16_C( 1022), -INT16_C( 19126), INT16_C( 12719), INT16_C( 20540), INT16_C( 406) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_mulhrs_epi16(a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mulhrs_epi16) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/mullo.c000066400000000000000000001440761400333146700165640ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN mullo #include #include static int test_simde_mm512_mullo_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[32]; const int16_t b[32]; const int16_t r[32]; } test_vec[] = { { { -INT16_C( 31159), -INT16_C( 12097), INT16_C( 29918), INT16_C( 1377), INT16_C( 32398), INT16_C( 6544), -INT16_C( 30801), INT16_C( 30357), INT16_C( 14887), INT16_C( 9940), INT16_C( 3636), -INT16_C( 23516), INT16_C( 28807), -INT16_C( 9340), -INT16_C( 22852), INT16_C( 1558), -INT16_C( 10708), INT16_C( 2774), INT16_C( 14154), -INT16_C( 10224), -INT16_C( 24394), INT16_C( 26098), -INT16_C( 30937), INT16_C( 20444), -INT16_C( 20287), -INT16_C( 2699), -INT16_C( 26177), INT16_C( 18073), INT16_C( 7434), -INT16_C( 14815), INT16_C( 14275), -INT16_C( 3892) }, { -INT16_C( 23795), INT16_C( 22778), INT16_C( 2778), -INT16_C( 28624), INT16_C( 8875), -INT16_C( 11530), -INT16_C( 11607), INT16_C( 27169), -INT16_C( 26750), INT16_C( 16736), -INT16_C( 1744), INT16_C( 14983), -INT16_C( 22505), -INT16_C( 9727), -INT16_C( 12832), -INT16_C( 4662), -INT16_C( 14992), INT16_C( 19269), INT16_C( 30415), INT16_C( 31451), -INT16_C( 11880), INT16_C( 16973), INT16_C( 28323), INT16_C( 9900), INT16_C( 3077), INT16_C( 13927), -INT16_C( 4346), INT16_C( 7536), INT16_C( 29079), INT16_C( 30711), -INT16_C( 15809), -INT16_C( 20635) }, { INT16_C( 19637), -INT16_C( 32122), INT16_C( 12556), -INT16_C( 28112), INT16_C( 25818), -INT16_C( 20384), INT16_C( 8327), -INT16_C( 1227), -INT16_C( 30514), INT16_C( 25472), INT16_C( 15808), -INT16_C( 18692), -INT16_C( 19423), INT16_C( 17284), INT16_C( 28800), INT16_C( 11100), -INT16_C( 28864), -INT16_C( 25170), -INT16_C( 12074), INT16_C( 30128), INT16_C( 528), INT16_C( 3530), -INT16_C( 12331), INT16_C( 20432), INT16_C( 32709), INT16_C( 28691), -INT16_C( 5254), INT16_C( 14320), -INT16_C( 29978), -INT16_C( 32553), INT16_C( 32509), INT16_C( 29820) } }, { { -INT16_C( 21881), INT16_C( 22266), -INT16_C( 10720), -INT16_C( 17967), INT16_C( 7847), INT16_C( 19451), -INT16_C( 22644), -INT16_C( 28047), -INT16_C( 10060), -INT16_C( 17720), INT16_C( 14535), INT16_C( 24535), -INT16_C( 12630), -INT16_C( 5674), INT16_C( 15248), INT16_C( 6040), -INT16_C( 27674), INT16_C( 1646), INT16_C( 16233), INT16_C( 4287), -INT16_C( 17827), -INT16_C( 5797), -INT16_C( 13214), INT16_C( 5755), INT16_C( 17317), INT16_C( 27856), -INT16_C( 22660), INT16_C( 9931), -INT16_C( 23947), INT16_C( 1551), -INT16_C( 22563), -INT16_C( 15587) }, { -INT16_C( 29894), -INT16_C( 23606), -INT16_C( 30262), INT16_C( 10164), INT16_C( 3908), -INT16_C( 23023), -INT16_C( 29476), -INT16_C( 32324), -INT16_C( 29488), INT16_C( 19693), -INT16_C( 18125), -INT16_C( 22414), -INT16_C( 32421), INT16_C( 14510), -INT16_C( 13272), INT16_C( 25596), -INT16_C( 14761), INT16_C( 8710), -INT16_C( 17841), -INT16_C( 27831), INT16_C( 23242), -INT16_C( 22983), -INT16_C( 2585), -INT16_C( 18649), INT16_C( 5249), -INT16_C( 19453), INT16_C( 30157), INT16_C( 10333), INT16_C( 3062), INT16_C( 7777), INT16_C( 24023), INT16_C( 12161) }, { -INT16_C( 4202), -INT16_C( 12476), INT16_C( 5440), INT16_C( 32244), -INT16_C( 4772), -INT16_C( 12885), -INT16_C( 29616), INT16_C( 31740), -INT16_C( 32192), INT16_C( 19240), INT16_C( 7845), -INT16_C( 14914), INT16_C( 8302), -INT16_C( 16524), INT16_C( 3712), INT16_C( 416), INT16_C( 10026), -INT16_C( 15724), -INT16_C( 9369), INT16_C( 29559), -INT16_C( 16542), -INT16_C( 2237), INT16_C( 13934), INT16_C( 22973), -INT16_C( 1499), -INT16_C( 31120), -INT16_C( 13748), -INT16_C( 12353), INT16_C( 9070), INT16_C( 3503), INT16_C( 17307), -INT16_C( 23395) } }, { { -INT16_C( 30685), INT16_C( 29265), -INT16_C( 26046), INT16_C( 3078), INT16_C( 16373), -INT16_C( 9038), -INT16_C( 9931), -INT16_C( 18797), -INT16_C( 26898), -INT16_C( 17557), -INT16_C( 14325), INT16_C( 484), INT16_C( 17875), -INT16_C( 21729), -INT16_C( 24158), -INT16_C( 14886), INT16_C( 11049), INT16_C( 27447), INT16_C( 15813), -INT16_C( 17800), INT16_C( 10877), -INT16_C( 19818), INT16_C( 10500), -INT16_C( 3480), -INT16_C( 11329), -INT16_C( 13651), -INT16_C( 28261), INT16_C( 28619), -INT16_C( 5162), INT16_C( 30746), -INT16_C( 2932), -INT16_C( 19139) }, { INT16_C( 29983), -INT16_C( 7136), -INT16_C( 26446), INT16_C( 12191), INT16_C( 13763), -INT16_C( 14367), INT16_C( 19039), INT16_C( 7865), INT16_C( 26141), -INT16_C( 17943), -INT16_C( 19208), -INT16_C( 12760), INT16_C( 17055), INT16_C( 11079), -INT16_C( 31690), INT16_C( 21984), INT16_C( 505), -INT16_C( 21447), -INT16_C( 10087), INT16_C( 23771), -INT16_C( 17138), INT16_C( 27939), -INT16_C( 9209), INT16_C( 9355), INT16_C( 29763), INT16_C( 15325), INT16_C( 1321), -INT16_C( 14327), INT16_C( 20551), INT16_C( 32244), -INT16_C( 11051), -INT16_C( 12590) }, { INT16_C( 31549), INT16_C( 28192), INT16_C( 29156), -INT16_C( 28230), INT16_C( 28831), INT16_C( 22130), -INT16_C( 4949), INT16_C( 10811), -INT16_C( 4874), -INT16_C( 6301), -INT16_C( 31064), -INT16_C( 15456), -INT16_C( 15347), -INT16_C( 21863), -INT16_C( 24532), -INT16_C( 32576), INT16_C( 9185), -INT16_C( 11457), INT16_C( 8893), -INT16_C( 23384), -INT16_C( 25642), INT16_C( 18562), -INT16_C( 28900), INT16_C( 15992), -INT16_C( 2307), -INT16_C( 10663), INT16_C( 22739), -INT16_C( 31197), INT16_C( 18522), INT16_C( 10952), INT16_C( 26748), -INT16_C( 15862) } }, { { INT16_C( 3285), INT16_C( 28538), INT16_C( 22244), -INT16_C( 3381), -INT16_C( 4333), INT16_C( 6751), -INT16_C( 5173), INT16_C( 3646), INT16_C( 7263), -INT16_C( 30647), INT16_C( 21281), INT16_C( 26961), INT16_C( 17827), INT16_C( 30950), -INT16_C( 18151), -INT16_C( 4281), -INT16_C( 15931), -INT16_C( 22178), INT16_C( 10519), INT16_C( 10908), -INT16_C( 1256), -INT16_C( 7100), -INT16_C( 31770), INT16_C( 18162), INT16_C( 15519), -INT16_C( 16178), INT16_C( 8079), INT16_C( 12841), INT16_C( 4196), INT16_C( 32427), -INT16_C( 3383), -INT16_C( 29075) }, { -INT16_C( 13389), -INT16_C( 13513), -INT16_C( 11276), INT16_C( 3573), INT16_C( 15055), -INT16_C( 18959), -INT16_C( 7235), INT16_C( 23803), -INT16_C( 13793), -INT16_C( 20964), INT16_C( 18153), INT16_C( 20193), -INT16_C( 29610), INT16_C( 8140), INT16_C( 14718), INT16_C( 12717), -INT16_C( 7164), -INT16_C( 1796), -INT16_C( 3400), -INT16_C( 30971), -INT16_C( 2516), -INT16_C( 5828), INT16_C( 14554), -INT16_C( 1723), INT16_C( 24834), -INT16_C( 5208), -INT16_C( 30297), -INT16_C( 711), INT16_C( 1301), -INT16_C( 27876), -INT16_C( 14018), INT16_C( 17092) }, { -INT16_C( 8209), -INT16_C( 20170), -INT16_C( 17072), -INT16_C( 21689), -INT16_C( 24995), -INT16_C( 401), INT16_C( 5599), INT16_C( 16074), INT16_C( 25985), -INT16_C( 31236), -INT16_C( 20727), INT16_C( 15921), -INT16_C( 30526), INT16_C( 12616), -INT16_C( 21682), INT16_C( 18939), INT16_C( 31508), -INT16_C( 14200), INT16_C( 18056), INT16_C( 6412), INT16_C( 14368), INT16_C( 25584), -INT16_C( 24100), -INT16_C( 32454), -INT16_C( 18370), -INT16_C( 24272), INT16_C( 7497), -INT16_C( 20447), INT16_C( 19508), INT16_C( 2996), -INT16_C( 25170), INT16_C( 9588) } }, { { -INT16_C( 15954), INT16_C( 26171), INT16_C( 16563), -INT16_C( 8211), INT16_C( 10551), INT16_C( 4552), INT16_C( 3425), INT16_C( 25354), -INT16_C( 19858), INT16_C( 5711), -INT16_C( 30661), INT16_C( 20499), INT16_C( 12430), -INT16_C( 13085), -INT16_C( 22279), -INT16_C( 22769), INT16_C( 19049), INT16_C( 7181), -INT16_C( 1398), -INT16_C( 15877), -INT16_C( 15580), -INT16_C( 31278), -INT16_C( 8752), INT16_C( 16105), INT16_C( 14479), -INT16_C( 13484), INT16_C( 26816), INT16_C( 19995), -INT16_C( 104), -INT16_C( 28389), INT16_C( 10919), INT16_C( 4153) }, { INT16_C( 18036), -INT16_C( 468), INT16_C( 10049), INT16_C( 26048), -INT16_C( 27926), -INT16_C( 17686), -INT16_C( 11409), -INT16_C( 8), INT16_C( 19723), -INT16_C( 13110), -INT16_C( 6731), INT16_C( 19738), INT16_C( 13796), -INT16_C( 29730), INT16_C( 5983), -INT16_C( 11365), -INT16_C( 14498), -INT16_C( 24622), -INT16_C( 27922), -INT16_C( 10236), -INT16_C( 4572), -INT16_C( 27502), -INT16_C( 29758), -INT16_C( 12909), INT16_C( 24024), -INT16_C( 29287), -INT16_C( 19390), INT16_C( 10202), -INT16_C( 18199), INT16_C( 18866), INT16_C( 20176), INT16_C( 11804) }, { INT16_C( 22232), INT16_C( 7204), -INT16_C( 19853), INT16_C( 29376), INT16_C( 2630), -INT16_C( 28464), -INT16_C( 16369), -INT16_C( 6224), -INT16_C( 16198), -INT16_C( 29098), INT16_C( 6327), -INT16_C( 10002), -INT16_C( 23432), -INT16_C( 4646), INT16_C( 4967), -INT16_C( 31979), -INT16_C( 3698), INT16_C( 5546), -INT16_C( 24500), -INT16_C( 12308), -INT16_C( 5872), -INT16_C( 17980), INT16_C( 1952), -INT16_C( 19253), -INT16_C( 21592), -INT16_C( 14028), INT16_C( 384), -INT16_C( 24578), -INT16_C( 7848), -INT16_C( 26682), -INT16_C( 30288), INT16_C( 1084) } }, { { -INT16_C( 4587), INT16_C( 1229), -INT16_C( 11904), -INT16_C( 23076), INT16_C( 28607), -INT16_C( 32455), -INT16_C( 13062), -INT16_C( 11697), -INT16_C( 6103), INT16_C( 27487), INT16_C( 14748), -INT16_C( 31086), INT16_C( 17905), -INT16_C( 15921), -INT16_C( 5229), -INT16_C( 22289), -INT16_C( 17190), INT16_C( 23212), -INT16_C( 30323), INT16_C( 19967), INT16_C( 14584), -INT16_C( 3378), INT16_C( 7428), INT16_C( 11716), INT16_C( 8966), -INT16_C( 23911), INT16_C( 11100), INT16_C( 19752), -INT16_C( 2192), INT16_C( 783), -INT16_C( 285), -INT16_C( 16980) }, { INT16_C( 22715), INT16_C( 18455), INT16_C( 6113), -INT16_C( 9835), INT16_C( 25679), INT16_C( 21707), -INT16_C( 28799), -INT16_C( 30847), INT16_C( 6834), INT16_C( 3626), INT16_C( 21062), -INT16_C( 18852), INT16_C( 27466), INT16_C( 11706), INT16_C( 26217), INT16_C( 9450), INT16_C( 446), -INT16_C( 24467), INT16_C( 536), INT16_C( 26745), INT16_C( 17766), -INT16_C( 5956), INT16_C( 15828), -INT16_C( 30865), -INT16_C( 26280), -INT16_C( 24939), -INT16_C( 3604), INT16_C( 13908), INT16_C( 3676), -INT16_C( 14749), INT16_C( 19828), INT16_C( 13290) }, { INT16_C( 8535), INT16_C( 5739), -INT16_C( 24192), INT16_C( 1292), INT16_C( 6129), INT16_C( 11315), -INT16_C( 4102), -INT16_C( 23857), -INT16_C( 27006), -INT16_C( 12394), -INT16_C( 18264), INT16_C( 10360), -INT16_C( 3414), INT16_C( 13158), INT16_C( 12619), INT16_C( 1654), INT16_C( 972), INT16_C( 6972), -INT16_C( 200), INT16_C( 30087), -INT16_C( 30000), -INT16_C( 184), -INT16_C( 1200), INT16_C( 13308), -INT16_C( 24560), INT16_C( 4365), -INT16_C( 27440), -INT16_C( 16096), INT16_C( 3136), -INT16_C( 14131), -INT16_C( 14884), -INT16_C( 23752) } }, { { INT16_C( 22350), INT16_C( 26579), INT16_C( 19546), -INT16_C( 16177), -INT16_C( 29807), INT16_C( 26280), INT16_C( 6344), INT16_C( 8429), -INT16_C( 32079), -INT16_C( 25154), INT16_C( 4980), -INT16_C( 12077), INT16_C( 13857), -INT16_C( 26986), -INT16_C( 32381), -INT16_C( 11575), -INT16_C( 25384), INT16_C( 12857), INT16_C( 2280), INT16_C( 31475), -INT16_C( 25709), INT16_C( 23520), -INT16_C( 12877), INT16_C( 25980), INT16_C( 14927), -INT16_C( 15614), -INT16_C( 10675), INT16_C( 28564), INT16_C( 10764), -INT16_C( 28667), -INT16_C( 12629), -INT16_C( 31646) }, { -INT16_C( 25750), INT16_C( 21174), -INT16_C( 22109), INT16_C( 14028), -INT16_C( 21435), -INT16_C( 1903), INT16_C( 3449), -INT16_C( 13987), INT16_C( 24648), -INT16_C( 27252), INT16_C( 8246), INT16_C( 16900), INT16_C( 2379), -INT16_C( 2350), INT16_C( 13527), INT16_C( 16762), INT16_C( 12751), INT16_C( 29332), INT16_C( 24794), INT16_C( 8104), INT16_C( 14861), -INT16_C( 31208), INT16_C( 30023), -INT16_C( 28849), -INT16_C( 9003), INT16_C( 2853), INT16_C( 10748), INT16_C( 18254), INT16_C( 8243), INT16_C( 2622), -INT16_C( 18347), INT16_C( 9292) }, { INT16_C( 24652), INT16_C( 26114), INT16_C( 1870), INT16_C( 20212), INT16_C( 2581), -INT16_C( 6872), -INT16_C( 8568), INT16_C( 2841), INT16_C( 8648), -INT16_C( 9752), -INT16_C( 25992), -INT16_C( 22196), INT16_C( 1195), -INT16_C( 21748), INT16_C( 24837), INT16_C( 31946), INT16_C( 10920), INT16_C( 27380), -INT16_C( 27248), INT16_C( 7288), INT16_C( 13431), -INT16_C( 8960), -INT16_C( 9307), -INT16_C( 27324), INT16_C( 26555), INT16_C( 17738), INT16_C( 18636), INT16_C( 2840), -INT16_C( 8092), INT16_C( 4918), -INT16_C( 31033), INT16_C( 5400) } }, { { -INT16_C( 7959), -INT16_C( 15209), INT16_C( 16192), INT16_C( 19939), -INT16_C( 1159), -INT16_C( 15916), INT16_C( 9073), INT16_C( 18000), INT16_C( 30207), -INT16_C( 942), -INT16_C( 24417), -INT16_C( 11709), -INT16_C( 32320), INT16_C( 5596), INT16_C( 10298), INT16_C( 9018), -INT16_C( 12024), INT16_C( 18919), -INT16_C( 13552), -INT16_C( 30058), INT16_C( 27334), INT16_C( 14155), -INT16_C( 25714), -INT16_C( 29314), -INT16_C( 12271), -INT16_C( 20343), -INT16_C( 12944), INT16_C( 12418), INT16_C( 24142), -INT16_C( 30650), -INT16_C( 32633), -INT16_C( 28756) }, { -INT16_C( 27823), INT16_C( 25048), INT16_C( 28510), INT16_C( 9707), INT16_C( 14041), INT16_C( 26460), -INT16_C( 9518), -INT16_C( 7179), INT16_C( 32426), INT16_C( 6803), INT16_C( 5451), -INT16_C( 26037), -INT16_C( 28301), -INT16_C( 1502), -INT16_C( 12783), INT16_C( 25226), INT16_C( 25186), -INT16_C( 16189), -INT16_C( 20527), -INT16_C( 21531), INT16_C( 17125), -INT16_C( 18670), INT16_C( 1820), -INT16_C( 14438), INT16_C( 11654), -INT16_C( 11807), INT16_C( 11330), -INT16_C( 18837), -INT16_C( 28995), -INT16_C( 12624), INT16_C( 14940), -INT16_C( 16848) }, { -INT16_C( 2887), INT16_C( 5736), -INT16_C( 1664), INT16_C( 20065), -INT16_C( 20591), -INT16_C( 3024), INT16_C( 19634), INT16_C( 14992), -INT16_C( 8874), INT16_C( 14102), INT16_C( 6549), -INT16_C( 6239), INT16_C( 2368), -INT16_C( 16584), INT16_C( 22490), INT16_C( 12612), INT16_C( 5392), -INT16_C( 29963), -INT16_C( 18416), INT16_C( 10798), -INT16_C( 28898), -INT16_C( 32698), -INT16_C( 6776), INT16_C( 4044), -INT16_C( 6682), INT16_C( 361), INT16_C( 14048), -INT16_C( 19882), -INT16_C( 7274), INT16_C( 1056), -INT16_C( 14716), -INT16_C( 26560) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_mullo_epi16(a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_mullo_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[16]; const int32_t b[16]; const int32_t r[16]; } test_vec[] = { { { -INT32_C( 355975199), -INT32_C( 1700022260), INT32_C( 1271212220), INT32_C( 1338643536), INT32_C( 295713745), INT32_C( 236686063), INT32_C( 1041828026), INT32_C( 2021440918), INT32_C( 140657916), -INT32_C( 1197273604), -INT32_C( 2063308747), INT32_C( 1305857660), -INT32_C( 111250166), -INT32_C( 1190692353), INT32_C( 418914690), -INT32_C( 628067106) }, { -INT32_C( 1243417927), INT32_C( 896435711), INT32_C( 1773826797), INT32_C( 1253478208), INT32_C( 54727684), INT32_C( 280841102), INT32_C( 1244181868), -INT32_C( 500909783), -INT32_C( 1416100181), INT32_C( 2044724876), -INT32_C( 1193043336), INT32_C( 771987754), INT32_C( 1009927854), -INT32_C( 45289583), -INT32_C( 850955100), -INT32_C( 642749651) }, { INT32_C( 1891294105), -INT32_C( 657289228), -INT32_C( 1505595892), INT32_C( 1690158080), -INT32_C( 438287548), INT32_C( 1735267730), INT32_C( 113979512), -INT32_C( 2045525242), INT32_C( 820116564), INT32_C( 603950544), INT32_C( 2143271640), INT32_C( 1089469528), -INT32_C( 2044180276), INT32_C( 47524463), -INT32_C( 751673528), INT32_C( 1390236934) } }, { { -INT32_C( 24885134), -INT32_C( 965254066), INT32_C( 679369470), -INT32_C( 1571323404), INT32_C( 1507756488), INT32_C( 475474552), -INT32_C( 857104738), INT32_C( 2091227402), INT32_C( 796535265), -INT32_C( 1946816115), INT32_C( 1085568076), -INT32_C( 1109259275), INT32_C( 202883220), -INT32_C( 2010616086), INT32_C( 374608140), -INT32_C( 1936524885) }, { -INT32_C( 1346630622), INT32_C( 1245426174), INT32_C( 462090021), -INT32_C( 1898418950), INT32_C( 379252524), INT32_C( 1788789341), INT32_C( 2122380243), INT32_C( 252384236), INT32_C( 499042079), -INT32_C( 1637353096), -INT32_C( 491130392), -INT32_C( 1989111459), -INT32_C( 560002431), -INT32_C( 1605878068), INT32_C( 488556849), -INT32_C( 80991780) }, { INT32_C( 581891876), INT32_C( 550073188), -INT32_C( 1474148170), INT32_C( 1915213896), INT32_C( 1194419808), INT32_C( 1072258456), -INT32_C( 1189235142), INT32_C( 1551029048), INT32_C( 1213659455), INT32_C( 150363928), -INT32_C( 1477381920), INT32_C( 1544822017), -INT32_C( 110505324), -INT32_C( 1125839240), -INT32_C( 1562267828), -INT32_C( 357633548) } }, { { INT32_C( 1763306480), -INT32_C( 855146268), -INT32_C( 810565518), -INT32_C( 765910959), -INT32_C( 139331542), INT32_C( 1738012982), -INT32_C( 1635404350), -INT32_C( 811945505), -INT32_C( 2126990436), -INT32_C( 1521598669), INT32_C( 1366687231), INT32_C( 1210306077), -INT32_C( 46148410), -INT32_C( 1872439602), INT32_C( 1781524875), INT32_C( 926599579) }, { -INT32_C( 1346866564), -INT32_C( 1319827790), INT32_C( 553830916), INT32_C( 1583949464), -INT32_C( 933517062), INT32_C( 173654142), INT32_C( 1148487849), -INT32_C( 847466927), -INT32_C( 746834911), INT32_C( 1065668923), INT32_C( 861964187), -INT32_C( 1483552083), -INT32_C( 244257422), INT32_C( 1476118957), -INT32_C( 1566871727), INT32_C( 1064245022) }, { INT32_C( 404695104), INT32_C( 737212040), INT32_C( 528283080), INT32_C( 519058968), -INT32_C( 1248466684), -INT32_C( 1216665452), -INT32_C( 1833169646), -INT32_C( 1881417329), -INT32_C( 1067788516), INT32_C( 502084545), -INT32_C( 1744682395), INT32_C( 2019503001), INT32_C( 1527254572), INT32_C( 1590093622), -INT32_C( 1131075077), INT32_C( 1345686826) } }, { { -INT32_C( 2028803252), INT32_C( 1489409725), -INT32_C( 896784867), INT32_C( 1668423408), -INT32_C( 1185619445), -INT32_C( 66039893), -INT32_C( 593581122), INT32_C( 253431235), -INT32_C( 1248449032), -INT32_C( 519152444), INT32_C( 1940691586), -INT32_C( 1009377608), -INT32_C( 1417926144), INT32_C( 933727353), -INT32_C( 82557640), INT32_C( 1242181458) }, { INT32_C( 553689181), INT32_C( 2114064124), INT32_C( 1626451624), -INT32_C( 870070324), INT32_C( 1786224881), INT32_C( 1688346156), -INT32_C( 1252018589), INT32_C( 1107323365), INT32_C( 90374153), -INT32_C( 1232837106), -INT32_C( 518621932), INT32_C( 783104317), INT32_C( 110699993), -INT32_C( 1486210237), -INT32_C( 698561807), INT32_C( 1025072179) }, { -INT32_C( 1326515556), INT32_C( 1932834828), -INT32_C( 282216184), INT32_C( 1875720000), INT32_C( 659336283), INT32_C( 1918483300), INT32_C( 821526138), -INT32_C( 505918865), INT32_C( 19897784), INT32_C( 885992120), INT32_C( 1967488040), INT32_C( 691292632), INT32_C( 1750178304), INT32_C( 1196664491), INT32_C( 215177656), -INT32_C( 1844779690) } }, { { INT32_C( 1782742108), -INT32_C( 148846878), INT32_C( 2044212796), INT32_C( 1235715440), -INT32_C( 296795990), INT32_C( 1821751931), -INT32_C( 1220284028), -INT32_C( 1426826162), -INT32_C( 1156237352), INT32_C( 967980541), -INT32_C( 592278932), -INT32_C( 1171957233), INT32_C( 380138906), -INT32_C( 1283310289), INT32_C( 2087372078), -INT32_C( 98083039) }, { -INT32_C( 1816839018), -INT32_C( 573741199), INT32_C( 12156913), INT32_C( 1958404057), -INT32_C( 2088082860), -INT32_C( 835318625), -INT32_C( 213212974), -INT32_C( 1779600897), INT32_C( 522822317), -INT32_C( 84085239), INT32_C( 1341896309), -INT32_C( 373049963), -INT32_C( 1217639144), INT32_C( 730178137), INT32_C( 1109381186), -INT32_C( 271119295) }, { -INT32_C( 1725534744), -INT32_C( 1569278014), INT32_C( 684642940), INT32_C( 261925872), INT32_C( 1650871240), -INT32_C( 978413979), -INT32_C( 388587960), INT32_C( 6053810), -INT32_C( 196776712), INT32_C( 2117141477), -INT32_C( 672789668), INT32_C( 14091707), INT32_C( 2116180080), -INT32_C( 2123758761), -INT32_C( 1873922596), INT32_C( 271299425) } }, { { -INT32_C( 1207041873), INT32_C( 1823673078), INT32_C( 1438363328), INT32_C( 2067693155), INT32_C( 607365835), -INT32_C( 1890535348), -INT32_C( 892244088), INT32_C( 716810363), -INT32_C( 1612462167), -INT32_C( 1844734255), -INT32_C( 1477982652), INT32_C( 253961796), INT32_C( 489969360), -INT32_C( 1750301682), INT32_C( 1851882995), -INT32_C( 828827099) }, { -INT32_C( 1301381919), INT32_C( 1447328018), -INT32_C( 2063782848), INT32_C( 580132946), -INT32_C( 2059417482), INT32_C( 1058859852), -INT32_C( 1901232792), INT32_C( 2019313303), -INT32_C( 735393086), -INT32_C( 2077593788), -INT32_C( 318232421), -INT32_C( 1089495992), -INT32_C( 1337700508), -INT32_C( 1544593350), INT32_C( 1949408733), -INT32_C( 1494446621) }, { -INT32_C( 2049614385), -INT32_C( 769848500), INT32_C( 1689563136), -INT32_C( 2129863754), -INT32_C( 1942728302), -INT32_C( 2123072880), INT32_C( 2004006720), INT32_C( 734528141), -INT32_C( 1679644654), INT32_C( 1405267588), INT32_C( 107981612), -INT32_C( 1490013408), -INT32_C( 1636034240), INT32_C( 1101867820), INT32_C( 1273676231), -INT32_C( 1656451121) } }, { { -INT32_C( 1669720488), INT32_C( 539010437), INT32_C( 353183949), INT32_C( 701767109), -INT32_C( 1495656340), INT32_C( 1430899064), INT32_C( 1254718054), INT32_C( 1626387720), INT32_C( 1375496908), -INT32_C( 596501489), INT32_C( 166887236), INT32_C( 137610908), INT32_C( 1471090143), INT32_C( 1034811606), INT32_C( 2072475251), -INT32_C( 119834836) }, { -INT32_C( 246818847), INT32_C( 936229875), -INT32_C( 683557061), INT32_C( 1709208710), INT32_C( 1471975297), -INT32_C( 90936953), INT32_C( 209001440), INT32_C( 1946439826), INT32_C( 442846503), INT32_C( 1146237449), INT32_C( 2015073266), -INT32_C( 1998718201), INT32_C( 316643722), -INT32_C( 485723133), INT32_C( 586121871), -INT32_C( 90770478) }, { INT32_C( 1193904984), INT32_C( 1986413631), INT32_C( 847665727), -INT32_C( 941303522), -INT32_C( 1965383060), -INT32_C( 420776376), INT32_C( 1758583616), -INT32_C( 168182128), INT32_C( 1141874964), INT32_C( 38529671), -INT32_C( 226731448), INT32_C( 1372676676), -INT32_C( 1494695626), -INT32_C( 103619966), -INT32_C( 1506897859), INT32_C( 1106150936) } }, { { INT32_C( 1226111808), INT32_C( 529360429), INT32_C( 9939449), INT32_C( 797471908), INT32_C( 289499150), INT32_C( 1811172828), -INT32_C( 1567759409), INT32_C( 379331542), INT32_C( 1281404958), INT32_C( 275508503), INT32_C( 990970774), -INT32_C( 2056611465), -INT32_C( 577328383), -INT32_C( 934704392), INT32_C( 1131075181), INT32_C( 408553210) }, { -INT32_C( 849036618), INT32_C( 1054789799), INT32_C( 1249505235), -INT32_C( 1966021752), -INT32_C( 2040043890), INT32_C( 1582215409), -INT32_C( 2120042361), INT32_C( 1973025982), INT32_C( 1564671670), -INT32_C( 1583669042), -INT32_C( 1729424369), -INT32_C( 2061321225), INT32_C( 319588642), -INT32_C( 1049535942), -INT32_C( 784198893), -INT32_C( 985212145) }, { -INT32_C( 1737023616), -INT32_C( 112707749), INT32_C( 1700892475), INT32_C( 1831559968), -INT32_C( 748729404), INT32_C( 395086876), INT32_C( 747498025), -INT32_C( 1274068780), -INT32_C( 1992885932), -INT32_C( 1201100670), INT32_C( 620293578), -INT32_C( 859701551), INT32_C( 647118626), -INT32_C( 1963152336), INT32_C( 871498007), -INT32_C( 1147517274) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_mullo_epi32(a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_mullo_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t src[16]; const simde__mmask16 k; const int32_t a[16]; const int32_t b[16]; const int32_t r[16]; } test_vec[] = { { { -INT32_C( 466803588), -INT32_C( 1704804295), -INT32_C( 294739476), -INT32_C( 39638017), -INT32_C( 523648680), -INT32_C( 1903034581), -INT32_C( 1147760279), -INT32_C( 982530232), INT32_C( 631872747), INT32_C( 1220480092), -INT32_C( 1405735507), -INT32_C( 1314268583), -INT32_C( 946769253), -INT32_C( 615177614), -INT32_C( 57218124), -INT32_C( 1648294478) }, UINT16_C(27554), { -INT32_C( 2106065214), INT32_C( 2108630087), INT32_C( 2052524241), INT32_C( 1273885626), INT32_C( 258891706), -INT32_C( 788913606), -INT32_C( 522802146), INT32_C( 206272585), -INT32_C( 1114717578), -INT32_C( 1220854298), INT32_C( 3314246), INT32_C( 1061887877), -INT32_C( 1202800258), -INT32_C( 91731749), INT32_C( 1054498548), INT32_C( 1296704982) }, { -INT32_C( 854927129), INT32_C( 1535460629), INT32_C( 1566291928), INT32_C( 1419618262), INT32_C( 1007479648), INT32_C( 674665524), -INT32_C( 916057869), INT32_C( 471248949), -INT32_C( 1628888696), INT32_C( 1056534118), -INT32_C( 73640667), INT32_C( 1548696060), INT32_C( 1486379812), -INT32_C( 478032144), INT32_C( 330098654), INT32_C( 540001176) }, { -INT32_C( 466803588), -INT32_C( 1710744365), -INT32_C( 294739476), -INT32_C( 39638017), -INT32_C( 523648680), INT32_C( 1456765896), -INT32_C( 1147760279), -INT32_C( 1063774435), -INT32_C( 330398544), INT32_C( 1592360356), -INT32_C( 1405735507), -INT32_C( 1912429588), -INT32_C( 946769253), INT32_C( 1908377424), INT32_C( 1962705816), -INT32_C( 1648294478) } }, { { INT32_C( 1253972452), -INT32_C( 1383483257), INT32_C( 162014477), -INT32_C( 2107246498), INT32_C( 1138490963), -INT32_C( 1423549236), -INT32_C( 608251069), INT32_C( 2063396502), -INT32_C( 1899709945), INT32_C( 2134592882), -INT32_C( 796334990), INT32_C( 794029788), -INT32_C( 1200410900), -INT32_C( 849110646), INT32_C( 44638828), INT32_C( 394044688) }, UINT16_C(16735), { -INT32_C( 510733659), -INT32_C( 624623279), INT32_C( 617128401), INT32_C( 1129493712), -INT32_C( 790766484), INT32_C( 1408387498), -INT32_C( 923270580), -INT32_C( 1106684135), INT32_C( 2074056745), INT32_C( 1783981209), -INT32_C( 695263995), INT32_C( 1025106385), INT32_C( 1745745598), -INT32_C( 1933836480), INT32_C( 441758465), INT32_C( 886660618) }, { -INT32_C( 1884325642), -INT32_C( 486996771), -INT32_C( 222787551), INT32_C( 674222698), INT32_C( 143670728), INT32_C( 1049905980), INT32_C( 140044798), INT32_C( 1027354951), -INT32_C( 2016613462), INT32_C( 275367407), -INT32_C( 1207819698), -INT32_C( 1126157580), -INT32_C( 1413123985), -INT32_C( 1175889477), -INT32_C( 1983823294), INT32_C( 499580531) }, { -INT32_C( 607731058), -INT32_C( 1176475923), INT32_C( 873194481), INT32_C( 1791065632), INT32_C( 1738141792), -INT32_C( 1423549236), INT32_C( 1246327656), INT32_C( 2063396502), INT32_C( 2018815546), INT32_C( 2134592882), -INT32_C( 796334990), INT32_C( 794029788), -INT32_C( 1200410900), -INT32_C( 849110646), INT32_C( 482175042), INT32_C( 394044688) } }, { { -INT32_C( 643525911), -INT32_C( 1494675880), INT32_C( 610200624), -INT32_C( 1914683874), INT32_C( 1765320110), INT32_C( 1092756223), -INT32_C( 674503836), -INT32_C( 873098783), INT32_C( 2091161892), -INT32_C( 685601369), -INT32_C( 1745125255), INT32_C( 1814355134), -INT32_C( 2133500543), -INT32_C( 473761921), -INT32_C( 1128624678), INT32_C( 1116188446) }, UINT16_C(11081), { -INT32_C( 524750658), -INT32_C( 1017105720), -INT32_C( 291561783), INT32_C( 1598759306), INT32_C( 1666632353), INT32_C( 1743794605), INT32_C( 1947668461), INT32_C( 278880337), INT32_C( 418404176), INT32_C( 1406947721), -INT32_C( 96371857), INT32_C( 1046056092), INT32_C( 60928086), -INT32_C( 831876383), -INT32_C( 230522463), INT32_C( 839049697) }, { -INT32_C( 1035275464), -INT32_C( 1290459580), INT32_C( 1051547298), INT32_C( 947652578), -INT32_C( 1740955977), INT32_C( 1365681584), INT32_C( 138651687), -INT32_C( 1053145463), INT32_C( 2105771321), INT32_C( 1295030443), -INT32_C( 796074258), -INT32_C( 1677195035), -INT32_C( 701218010), INT32_C( 271031017), -INT32_C( 887592126), -INT32_C( 359902287) }, { INT32_C( 1206190992), -INT32_C( 1494675880), INT32_C( 610200624), -INT32_C( 1659477548), INT32_C( 1765320110), INT32_C( 1092756223), INT32_C( 1694364955), -INT32_C( 873098783), INT32_C( 684720336), INT32_C( 1396952707), -INT32_C( 1745125255), INT32_C( 123709324), -INT32_C( 2133500543), INT32_C( 1805781193), -INT32_C( 1128624678), INT32_C( 1116188446) } }, { { -INT32_C( 2090397480), -INT32_C( 1747871832), INT32_C( 1516723573), -INT32_C( 1930006427), -INT32_C( 1671288141), INT32_C( 111971012), -INT32_C( 1496201739), -INT32_C( 258974184), INT32_C( 376698734), INT32_C( 78464142), INT32_C( 123606433), INT32_C( 949179781), INT32_C( 1154872703), INT32_C( 1951039871), INT32_C( 1578769478), -INT32_C( 397497734) }, UINT16_C(49825), { -INT32_C( 1408880386), -INT32_C( 1832802252), INT32_C( 1122453167), INT32_C( 1396139902), -INT32_C( 170543189), INT32_C( 1175526187), -INT32_C( 923759750), INT32_C( 1921684083), -INT32_C( 165768766), -INT32_C( 393683143), -INT32_C( 1557499867), -INT32_C( 2097716777), INT32_C( 1148701720), INT32_C( 1636469223), -INT32_C( 2010482156), -INT32_C( 822430708) }, { INT32_C( 2126780485), INT32_C( 476466679), -INT32_C( 1799384899), INT32_C( 169260786), INT32_C( 1783532930), INT32_C( 734779414), INT32_C( 1605629267), -INT32_C( 282153558), -INT32_C( 1133579579), -INT32_C( 52898753), INT32_C( 1469093733), -INT32_C( 798906802), INT32_C( 1245360180), -INT32_C( 613087608), -INT32_C( 1522849541), -INT32_C( 1701484075) }, { INT32_C( 2025522294), -INT32_C( 1747871832), INT32_C( 1516723573), -INT32_C( 1930006427), -INT32_C( 1671288141), INT32_C( 947756466), -INT32_C( 1496201739), INT32_C( 1655109470), INT32_C( 376698734), INT32_C( 111289095), INT32_C( 123606433), INT32_C( 949179781), INT32_C( 1154872703), INT32_C( 1951039871), INT32_C( 726978972), INT32_C( 225229308) } }, { { -INT32_C( 1688861861), INT32_C( 1016540887), INT32_C( 345188550), INT32_C( 48559566), INT32_C( 760029093), INT32_C( 537510437), -INT32_C( 1060748053), INT32_C( 140204973), INT32_C( 899920222), -INT32_C( 1502463008), INT32_C( 834274659), -INT32_C( 1623941382), -INT32_C( 489848387), INT32_C( 772003395), -INT32_C( 940586726), -INT32_C( 2100344284) }, UINT16_C(29691), { INT32_C( 699325367), INT32_C( 992940417), INT32_C( 1994008898), -INT32_C( 1762158647), INT32_C( 2104245114), INT32_C( 1481016937), INT32_C( 480406093), -INT32_C( 1550868756), -INT32_C( 70435463), -INT32_C( 1858667442), -INT32_C( 301527003), INT32_C( 579141544), -INT32_C( 1549799366), -INT32_C( 1006836362), INT32_C( 1004576335), -INT32_C( 1226936516) }, { -INT32_C( 72242259), -INT32_C( 846403673), -INT32_C( 1598319368), -INT32_C( 842907501), -INT32_C( 1485807312), -INT32_C( 1788187578), INT32_C( 1204832779), INT32_C( 1744678586), INT32_C( 23244378), -INT32_C( 1899040874), INT32_C( 372148867), -INT32_C( 85724982), -INT32_C( 1751035055), -INT32_C( 886305600), INT32_C( 269679702), INT32_C( 74911914) }, { INT32_C( 1316272043), -INT32_C( 200551897), INT32_C( 345188550), -INT32_C( 1854904213), INT32_C( 573858016), -INT32_C( 1468592970), -INT32_C( 414919857), INT32_C( 2006572920), INT32_C( 121756298), -INT32_C( 1232558156), INT32_C( 834274659), -INT32_C( 1623941382), -INT32_C( 352858022), -INT32_C( 625631104), INT32_C( 1255731850), -INT32_C( 2100344284) } }, { { INT32_C( 1409735358), INT32_C( 1289934025), INT32_C( 677515358), INT32_C( 1361265920), INT32_C( 1491649688), INT32_C( 656610512), -INT32_C( 1154009584), INT32_C( 79671110), INT32_C( 1381614985), -INT32_C( 123847782), INT32_C( 1277231180), -INT32_C( 576830395), -INT32_C( 650738168), -INT32_C( 1426040421), -INT32_C( 714721393), INT32_C( 1876567782) }, UINT16_C(13291), { INT32_C( 1601078721), -INT32_C( 1621116290), -INT32_C( 1511807993), -INT32_C( 1205081214), -INT32_C( 1005467964), -INT32_C( 654532238), INT32_C( 1358881398), INT32_C( 327412306), -INT32_C( 311168401), -INT32_C( 1282616660), -INT32_C( 111644809), INT32_C( 531727451), -INT32_C( 1125858742), -INT32_C( 627711901), INT32_C( 338334658), -INT32_C( 316100995) }, { INT32_C( 1289395104), -INT32_C( 469735571), INT32_C( 836589782), INT32_C( 659656412), -INT32_C( 1260178095), -INT32_C( 678529003), -INT32_C( 1980974836), INT32_C( 125178983), INT32_C( 475222447), -INT32_C( 1912515656), -INT32_C( 2000626004), -INT32_C( 1095822995), INT32_C( 1534235462), INT32_C( 389153035), INT32_C( 564207290), -INT32_C( 517466318) }, { -INT32_C( 1033276512), -INT32_C( 1748297306), INT32_C( 677515358), -INT32_C( 192774216), INT32_C( 1491649688), -INT32_C( 2086490534), -INT32_C( 396237944), INT32_C( 1747824382), -INT32_C( 1074472735), -INT32_C( 265212000), INT32_C( 1277231180), -INT32_C( 576830395), -INT32_C( 720265668), -INT32_C( 1297676479), -INT32_C( 714721393), INT32_C( 1876567782) } }, { { INT32_C( 553548648), INT32_C( 2075131855), INT32_C( 1241739229), -INT32_C( 972508288), INT32_C( 1361148742), INT32_C( 912872316), -INT32_C( 1537799566), -INT32_C( 1970897119), -INT32_C( 911571718), INT32_C( 1615092099), INT32_C( 1219184840), INT32_C( 1091482619), -INT32_C( 1433260242), -INT32_C( 169804925), INT32_C( 664352517), -INT32_C( 1313792074) }, UINT16_C(23459), { -INT32_C( 1078647174), INT32_C( 839351687), -INT32_C( 723189050), -INT32_C( 721152957), -INT32_C( 1613658178), INT32_C( 366401148), -INT32_C( 1372221955), -INT32_C( 1207248834), -INT32_C( 2038972417), INT32_C( 45645372), -INT32_C( 1009279616), INT32_C( 1889131441), -INT32_C( 569415070), INT32_C( 1056171328), -INT32_C( 1326700430), INT32_C( 6944257) }, { -INT32_C( 226041675), -INT32_C( 537641377), -INT32_C( 1918645285), INT32_C( 150813862), -INT32_C( 421131098), INT32_C( 1680136945), INT32_C( 51646722), -INT32_C( 1123844857), -INT32_C( 1112569506), -INT32_C( 1550015545), INT32_C( 355483503), INT32_C( 555625851), INT32_C( 705168441), -INT32_C( 510776098), INT32_C( 1172611901), INT32_C( 2114119712) }, { -INT32_C( 2070219710), -INT32_C( 1629862119), INT32_C( 1241739229), -INT32_C( 972508288), INT32_C( 1361148742), INT32_C( 458588860), -INT32_C( 1537799566), -INT32_C( 1566822478), -INT32_C( 133630302), -INT32_C( 1235594076), INT32_C( 1219184840), -INT32_C( 1253528821), INT32_C( 371734482), -INT32_C( 169804925), INT32_C( 1166948650), -INT32_C( 1313792074) } }, { { INT32_C( 960213361), -INT32_C( 1008936876), -INT32_C( 1814492137), INT32_C( 1924462393), -INT32_C( 627262213), INT32_C( 649800681), -INT32_C( 294936626), -INT32_C( 110269049), INT32_C( 1932699678), -INT32_C( 1741287808), INT32_C( 1395330842), INT32_C( 46522118), -INT32_C( 2049154660), INT32_C( 1521194892), -INT32_C( 1102506186), -INT32_C( 1548241276) }, UINT16_C(59742), { INT32_C( 1291312918), -INT32_C( 1571024521), INT32_C( 696345188), INT32_C( 1082793316), INT32_C( 1322719138), -INT32_C( 1167782287), -INT32_C( 2089752116), -INT32_C( 1569927284), INT32_C( 636445614), -INT32_C( 658027660), INT32_C( 302074029), INT32_C( 139627366), INT32_C( 341191330), INT32_C( 80657208), INT32_C( 830947237), -INT32_C( 1126894834) }, { -INT32_C( 891174058), -INT32_C( 1146968050), INT32_C( 1456317424), -INT32_C( 782294994), -INT32_C( 2098875062), INT32_C( 377926513), INT32_C( 4656626), INT32_C( 1455168256), -INT32_C( 333406754), INT32_C( 917029445), -INT32_C( 1819511451), -INT32_C( 547034219), INT32_C( 308365729), -INT32_C( 282531843), -INT32_C( 185569292), INT32_C( 1766501515) }, { INT32_C( 960213361), INT32_C( 1187843202), -INT32_C( 1392121408), INT32_C( 1754908664), INT32_C( 1171021524), INT32_C( 649800681), -INT32_C( 735027496), -INT32_C( 110269049), -INT32_C( 1820711196), -INT32_C( 1741287808), INT32_C( 1395330842), -INT32_C( 338049954), -INT32_C( 2049154660), -INT32_C( 738292136), -INT32_C( 936299452), -INT32_C( 1694932838) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi32(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_mask_mullo_epi32(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_mullo_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask16 k; const int32_t a[16]; const int32_t b[16]; const int32_t r[16]; } test_vec[] = { { UINT16_C(24017), { -INT32_C( 266698699), INT32_C( 176354454), INT32_C( 403676777), INT32_C( 2072272096), INT32_C( 1766446988), INT32_C( 2109727987), INT32_C( 675115709), -INT32_C( 1366946183), INT32_C( 614375566), -INT32_C( 30531180), -INT32_C( 1625932353), INT32_C( 639277722), INT32_C( 1703896177), INT32_C( 115494472), INT32_C( 976101569), -INT32_C( 1108822994) }, { -INT32_C( 371095723), INT32_C( 1743196328), -INT32_C( 418972083), INT32_C( 168632472), -INT32_C( 848323196), -INT32_C( 1395437077), -INT32_C( 1595539087), INT32_C( 190697398), -INT32_C( 17547690), -INT32_C( 1671046066), INT32_C( 1921215450), INT32_C( 276599179), INT32_C( 433974062), -INT32_C( 1362710467), INT32_C( 1733209265), -INT32_C( 781014149) }, { -INT32_C( 1644329831), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 620232752), INT32_C( 0), INT32_C( 912286317), -INT32_C( 302571258), -INT32_C( 1551608908), INT32_C( 0), INT32_C( 1803315622), INT32_C( 1151019934), -INT32_C( 1618688178), INT32_C( 0), -INT32_C( 642494095), INT32_C( 0) } }, { UINT16_C(26602), { INT32_C( 876820687), INT32_C( 1486822868), INT32_C( 216607631), INT32_C( 418846523), -INT32_C( 154651600), INT32_C( 832797155), INT32_C( 1407000289), -INT32_C( 1078278160), -INT32_C( 722141697), -INT32_C( 1439919334), -INT32_C( 105507394), INT32_C( 1544662316), -INT32_C( 984360478), INT32_C( 888600147), INT32_C( 76010260), -INT32_C( 1698413926) }, { INT32_C( 1517205567), INT32_C( 168073803), -INT32_C( 620512593), INT32_C( 1228346727), INT32_C( 1091472110), -INT32_C( 1787492992), INT32_C( 1939471832), INT32_C( 2114805055), INT32_C( 1641577237), -INT32_C( 999564267), -INT32_C( 23105898), INT32_C( 1917310595), -INT32_C( 508340639), INT32_C( 863381851), INT32_C( 1705381926), -INT32_C( 2082229395) }, { INT32_C( 0), INT32_C( 1843348764), INT32_C( 0), INT32_C( 553594813), INT32_C( 0), -INT32_C( 690134912), -INT32_C( 779130152), INT32_C( 1355375632), INT32_C( 474634987), -INT32_C( 1477416670), -INT32_C( 1828036268), INT32_C( 0), INT32_C( 0), INT32_C( 1954470529), -INT32_C( 330132232), INT32_C( 0) } }, { UINT16_C(48174), { INT32_C( 1335378916), -INT32_C( 1497551097), INT32_C( 1954365741), INT32_C( 1724571315), INT32_C( 915350975), -INT32_C( 12143271), -INT32_C( 21777638), INT32_C( 448454966), INT32_C( 745099813), -INT32_C( 1361893503), INT32_C( 455299176), -INT32_C( 310252242), INT32_C( 1814237459), -INT32_C( 513054266), INT32_C( 1407131165), INT32_C( 628005120) }, { INT32_C( 1834080235), INT32_C( 1713054974), -INT32_C( 1568588172), INT32_C( 1066402604), -INT32_C( 626281708), INT32_C( 985339421), INT32_C( 898472501), INT32_C( 526056243), -INT32_C( 779310125), INT32_C( 1144563664), INT32_C( 317176294), -INT32_C( 799967300), INT32_C( 1202453546), INT32_C( 1199662610), INT32_C( 880545537), -INT32_C( 581708278) }, { INT32_C( 0), -INT32_C( 905009934), INT32_C( 1882680932), INT32_C( 1563058116), INT32_C( 0), -INT32_C( 1600442091), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 995269264), INT32_C( 1010339784), -INT32_C( 280985314), -INT32_C( 596093972), INT32_C( 0), -INT32_C( 2059929088) } }, { UINT16_C(57218), { -INT32_C( 410561873), INT32_C( 2107665814), -INT32_C( 789291649), -INT32_C( 657711315), -INT32_C( 398467482), -INT32_C( 1560854490), -INT32_C( 931593868), -INT32_C( 1901593633), -INT32_C( 478859699), INT32_C( 442570139), -INT32_C( 1595255438), -INT32_C( 612845964), -INT32_C( 1144801387), INT32_C( 1818082039), -INT32_C( 1707813189), INT32_C( 505994193) }, { -INT32_C( 1526555382), INT32_C( 633365427), INT32_C( 767929016), -INT32_C( 167231903), -INT32_C( 307115019), INT32_C( 1079578245), -INT32_C( 1227125275), INT32_C( 1926562664), INT32_C( 1410914209), -INT32_C( 226895814), -INT32_C( 484491390), INT32_C( 1926834045), INT32_C( 2002750194), INT32_C( 2126035097), -INT32_C( 1372286139), INT32_C( 924846486) }, { INT32_C( 0), -INT32_C( 1072848414), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 893587560), INT32_C( 298633581), -INT32_C( 1602441954), -INT32_C( 791383580), -INT32_C( 569326940), INT32_C( 1665457370), INT32_C( 0), -INT32_C( 1618525849), INT32_C( 2093688182) } }, { UINT16_C(14560), { INT32_C( 84941451), INT32_C( 742691597), INT32_C( 1347731830), -INT32_C( 1814411725), INT32_C( 1984656318), INT32_C( 638161393), INT32_C( 1596956479), INT32_C( 1654132951), INT32_C( 929540138), -INT32_C( 1302025413), -INT32_C( 1610434452), -INT32_C( 1137451778), INT32_C( 1093828176), INT32_C( 1315388175), -INT32_C( 1297180709), -INT32_C( 787200345) }, { INT32_C( 705264878), -INT32_C( 1797493465), INT32_C( 590667301), INT32_C( 182478778), -INT32_C( 179563803), INT32_C( 692302670), -INT32_C( 237244086), INT32_C( 650374967), -INT32_C( 1823421333), INT32_C( 1579625529), -INT32_C( 998155510), -INT32_C( 1479581246), -INT32_C( 1029956748), INT32_C( 434888910), INT32_C( 151701201), INT32_C( 556780981) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 319355502), -INT32_C( 493660362), -INT32_C( 467052751), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 1452558724), INT32_C( 1995680832), INT32_C( 411280914), INT32_C( 0), INT32_C( 0) } }, { UINT16_C(32665), { -INT32_C( 609496396), -INT32_C( 1305037520), INT32_C( 1209268345), INT32_C( 1013155743), INT32_C( 874263113), INT32_C( 1442508107), INT32_C( 623096054), -INT32_C( 2052801327), -INT32_C( 1084207217), INT32_C( 2121373188), INT32_C( 767985038), INT32_C( 1449732620), -INT32_C( 1484094116), INT32_C( 1778156915), INT32_C( 110042933), INT32_C( 1804284892) }, { -INT32_C( 2027230333), INT32_C( 252025985), INT32_C( 758959137), INT32_C( 1384359670), -INT32_C( 1627845077), -INT32_C( 938936941), -INT32_C( 238119147), INT32_C( 1297897930), -INT32_C( 975862204), INT32_C( 1138022946), -INT32_C( 1670311770), -INT32_C( 487656266), -INT32_C( 1786714366), -INT32_C( 228751139), -INT32_C( 354211041), -INT32_C( 935903356) }, { -INT32_C( 1595929060), INT32_C( 0), INT32_C( 0), -INT32_C( 437097270), INT32_C( 1115516995), INT32_C( 0), INT32_C( 0), INT32_C( 1908254186), INT32_C( 2083847164), -INT32_C( 869968760), -INT32_C( 1832814060), INT32_C( 1509905544), -INT32_C( 2023027016), -INT32_C( 693416889), -INT32_C( 208556437), INT32_C( 0) } }, { UINT16_C( 3269), { INT32_C( 1642588301), -INT32_C( 1670214357), INT32_C( 412166186), -INT32_C( 1962896630), -INT32_C( 2045518551), -INT32_C( 1296944177), INT32_C( 1425094173), -INT32_C( 1939753217), -INT32_C( 890353506), -INT32_C( 10067755), -INT32_C( 1827080312), -INT32_C( 1273096053), -INT32_C( 1002819083), -INT32_C( 2122912668), INT32_C( 584476451), -INT32_C( 1129433315) }, { INT32_C( 1418108031), -INT32_C( 2058031876), INT32_C( 1863871716), INT32_C( 2032350852), -INT32_C( 868393625), INT32_C( 1833808714), INT32_C( 932127514), -INT32_C( 638370470), -INT32_C( 701597222), INT32_C( 1247511142), INT32_C( 1924756462), INT32_C( 300670121), -INT32_C( 2082658247), -INT32_C( 168809765), -INT32_C( 1473478834), -INT32_C( 1753079619) }, { -INT32_C( 510637581), INT32_C( 0), INT32_C( 405372264), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 270038514), INT32_C( 2114066598), INT32_C( 0), INT32_C( 0), INT32_C( 1773156464), -INT32_C( 2020694077), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { UINT16_C(45209), { -INT32_C( 919404691), INT32_C( 37494857), INT32_C( 2111760018), -INT32_C( 727377673), -INT32_C( 1929477989), -INT32_C( 1593095051), INT32_C( 2009188597), -INT32_C( 853058721), -INT32_C( 1533650598), INT32_C( 195482233), -INT32_C( 1350007368), INT32_C( 948120989), INT32_C( 583303853), -INT32_C( 1010577202), -INT32_C( 130382440), INT32_C( 986014176) }, { INT32_C( 870210490), -INT32_C( 432110291), -INT32_C( 1483356662), -INT32_C( 1595991565), INT32_C( 1774363803), INT32_C( 204310132), -INT32_C( 352033013), -INT32_C( 2094675511), INT32_C( 1387725860), -INT32_C( 1824983671), -INT32_C( 1355100740), -INT32_C( 2125456922), INT32_C( 837489341), -INT32_C( 1556211560), INT32_C( 1217282687), INT32_C( 818721804) }, { -INT32_C( 2023795662), INT32_C( 0), INT32_C( 0), INT32_C( 391106933), INT32_C( 1894853081), INT32_C( 0), INT32_C( 0), -INT32_C( 866167657), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 1911315527), INT32_C( 1404783184), INT32_C( 0), -INT32_C( 2003691904) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_maskz_mullo_epi32(test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mullo_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mullo_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_mullo_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_mullo_epi32) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/negate.c000066400000000000000000000347661400333146700167030ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur */ #define SIMDE_TEST_X86_AVX512_INSN negate #include #include static int test_simde_x_mm512_negate_ps (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 447.12), SIMDE_FLOAT32_C( -523.97), SIMDE_FLOAT32_C( -986.50), SIMDE_FLOAT32_C( 215.70), SIMDE_FLOAT32_C( -620.53), SIMDE_FLOAT32_C( -551.80), SIMDE_FLOAT32_C( 527.48), SIMDE_FLOAT32_C( -998.22), SIMDE_FLOAT32_C( 113.44), SIMDE_FLOAT32_C( -366.28), SIMDE_FLOAT32_C( 857.01), SIMDE_FLOAT32_C( -561.75), SIMDE_FLOAT32_C( -685.47), SIMDE_FLOAT32_C( 121.91), SIMDE_FLOAT32_C( -767.64), SIMDE_FLOAT32_C( 638.60) }, { SIMDE_FLOAT32_C( -447.12), SIMDE_FLOAT32_C( 523.97), SIMDE_FLOAT32_C( 986.50), SIMDE_FLOAT32_C( -215.70), SIMDE_FLOAT32_C( 620.53), SIMDE_FLOAT32_C( 551.80), SIMDE_FLOAT32_C( -527.48), SIMDE_FLOAT32_C( 998.22), SIMDE_FLOAT32_C( -113.44), SIMDE_FLOAT32_C( 366.28), SIMDE_FLOAT32_C( -857.01), SIMDE_FLOAT32_C( 561.75), SIMDE_FLOAT32_C( 685.47), SIMDE_FLOAT32_C( -121.91), SIMDE_FLOAT32_C( 767.64), SIMDE_FLOAT32_C( -638.60) } }, { { SIMDE_FLOAT32_C( -616.86), SIMDE_FLOAT32_C( -616.09), SIMDE_FLOAT32_C( 561.55), SIMDE_FLOAT32_C( 555.86), SIMDE_FLOAT32_C( -361.77), SIMDE_FLOAT32_C( 375.95), SIMDE_FLOAT32_C( 150.28), SIMDE_FLOAT32_C( 812.25), SIMDE_FLOAT32_C( -808.22), SIMDE_FLOAT32_C( -841.08), SIMDE_FLOAT32_C( 575.36), SIMDE_FLOAT32_C( -396.66), SIMDE_FLOAT32_C( 938.55), SIMDE_FLOAT32_C( -369.15), SIMDE_FLOAT32_C( -777.21), SIMDE_FLOAT32_C( 385.67) }, { SIMDE_FLOAT32_C( 616.86), SIMDE_FLOAT32_C( 616.09), SIMDE_FLOAT32_C( -561.55), SIMDE_FLOAT32_C( -555.86), SIMDE_FLOAT32_C( 361.77), SIMDE_FLOAT32_C( -375.95), SIMDE_FLOAT32_C( -150.28), SIMDE_FLOAT32_C( -812.25), SIMDE_FLOAT32_C( 808.22), SIMDE_FLOAT32_C( 841.08), SIMDE_FLOAT32_C( -575.36), SIMDE_FLOAT32_C( 396.66), SIMDE_FLOAT32_C( -938.55), SIMDE_FLOAT32_C( 369.15), SIMDE_FLOAT32_C( 777.21), SIMDE_FLOAT32_C( -385.67) } }, { { SIMDE_FLOAT32_C( 106.88), SIMDE_FLOAT32_C( -763.71), SIMDE_FLOAT32_C( -398.63), SIMDE_FLOAT32_C( 486.35), SIMDE_FLOAT32_C( -315.51), SIMDE_FLOAT32_C( -871.16), SIMDE_FLOAT32_C( 488.13), SIMDE_FLOAT32_C( 797.93), SIMDE_FLOAT32_C( -237.43), SIMDE_FLOAT32_C( 345.14), SIMDE_FLOAT32_C( -763.82), SIMDE_FLOAT32_C( 77.10), SIMDE_FLOAT32_C( -532.95), SIMDE_FLOAT32_C( -531.46), SIMDE_FLOAT32_C( -284.30), SIMDE_FLOAT32_C( -149.81) }, { SIMDE_FLOAT32_C( -106.88), SIMDE_FLOAT32_C( 763.71), SIMDE_FLOAT32_C( 398.63), SIMDE_FLOAT32_C( -486.35), SIMDE_FLOAT32_C( 315.51), SIMDE_FLOAT32_C( 871.16), SIMDE_FLOAT32_C( -488.13), SIMDE_FLOAT32_C( -797.93), SIMDE_FLOAT32_C( 237.43), SIMDE_FLOAT32_C( -345.14), SIMDE_FLOAT32_C( 763.82), SIMDE_FLOAT32_C( -77.10), SIMDE_FLOAT32_C( 532.95), SIMDE_FLOAT32_C( 531.46), SIMDE_FLOAT32_C( 284.30), SIMDE_FLOAT32_C( 149.81) } }, { { SIMDE_FLOAT32_C( -147.55), SIMDE_FLOAT32_C( -722.75), SIMDE_FLOAT32_C( -593.95), SIMDE_FLOAT32_C( 490.68), SIMDE_FLOAT32_C( 653.20), SIMDE_FLOAT32_C( 556.33), SIMDE_FLOAT32_C( 302.93), SIMDE_FLOAT32_C( 844.97), SIMDE_FLOAT32_C( 715.24), SIMDE_FLOAT32_C( -121.71), SIMDE_FLOAT32_C( -551.69), SIMDE_FLOAT32_C( 653.79), SIMDE_FLOAT32_C( 509.14), SIMDE_FLOAT32_C( -328.89), SIMDE_FLOAT32_C( 39.46), SIMDE_FLOAT32_C( -383.98) }, { SIMDE_FLOAT32_C( 147.55), SIMDE_FLOAT32_C( 722.75), SIMDE_FLOAT32_C( 593.95), SIMDE_FLOAT32_C( -490.68), SIMDE_FLOAT32_C( -653.20), SIMDE_FLOAT32_C( -556.33), SIMDE_FLOAT32_C( -302.93), SIMDE_FLOAT32_C( -844.97), SIMDE_FLOAT32_C( -715.24), SIMDE_FLOAT32_C( 121.71), SIMDE_FLOAT32_C( 551.69), SIMDE_FLOAT32_C( -653.79), SIMDE_FLOAT32_C( -509.14), SIMDE_FLOAT32_C( 328.89), SIMDE_FLOAT32_C( -39.46), SIMDE_FLOAT32_C( 383.98) } }, { { SIMDE_FLOAT32_C( -92.60), SIMDE_FLOAT32_C( 640.83), SIMDE_FLOAT32_C( -897.63), SIMDE_FLOAT32_C( 591.90), SIMDE_FLOAT32_C( 769.67), SIMDE_FLOAT32_C( 590.51), SIMDE_FLOAT32_C( 389.83), SIMDE_FLOAT32_C( -467.76), SIMDE_FLOAT32_C( -64.35), SIMDE_FLOAT32_C( 626.00), SIMDE_FLOAT32_C( 609.34), SIMDE_FLOAT32_C( 402.70), SIMDE_FLOAT32_C( -905.45), SIMDE_FLOAT32_C( -674.96), SIMDE_FLOAT32_C( -747.11), SIMDE_FLOAT32_C( -53.00) }, { SIMDE_FLOAT32_C( 92.60), SIMDE_FLOAT32_C( -640.83), SIMDE_FLOAT32_C( 897.63), SIMDE_FLOAT32_C( -591.90), SIMDE_FLOAT32_C( -769.67), SIMDE_FLOAT32_C( -590.51), SIMDE_FLOAT32_C( -389.83), SIMDE_FLOAT32_C( 467.76), SIMDE_FLOAT32_C( 64.35), SIMDE_FLOAT32_C( -626.00), SIMDE_FLOAT32_C( -609.34), SIMDE_FLOAT32_C( -402.70), SIMDE_FLOAT32_C( 905.45), SIMDE_FLOAT32_C( 674.96), SIMDE_FLOAT32_C( 747.11), SIMDE_FLOAT32_C( 53.00) } }, { { SIMDE_FLOAT32_C( -397.72), SIMDE_FLOAT32_C( -341.07), SIMDE_FLOAT32_C( -562.32), SIMDE_FLOAT32_C( -744.52), SIMDE_FLOAT32_C( -784.74), SIMDE_FLOAT32_C( 740.61), SIMDE_FLOAT32_C( -899.55), SIMDE_FLOAT32_C( 930.50), SIMDE_FLOAT32_C( -381.10), SIMDE_FLOAT32_C( -451.23), SIMDE_FLOAT32_C( 584.30), SIMDE_FLOAT32_C( -871.96), SIMDE_FLOAT32_C( 219.87), SIMDE_FLOAT32_C( -376.24), SIMDE_FLOAT32_C( -255.94), SIMDE_FLOAT32_C( -872.72) }, { SIMDE_FLOAT32_C( 397.72), SIMDE_FLOAT32_C( 341.07), SIMDE_FLOAT32_C( 562.32), SIMDE_FLOAT32_C( 744.52), SIMDE_FLOAT32_C( 784.74), SIMDE_FLOAT32_C( -740.61), SIMDE_FLOAT32_C( 899.55), SIMDE_FLOAT32_C( -930.50), SIMDE_FLOAT32_C( 381.10), SIMDE_FLOAT32_C( 451.23), SIMDE_FLOAT32_C( -584.30), SIMDE_FLOAT32_C( 871.96), SIMDE_FLOAT32_C( -219.87), SIMDE_FLOAT32_C( 376.24), SIMDE_FLOAT32_C( 255.94), SIMDE_FLOAT32_C( 872.72) } }, { { SIMDE_FLOAT32_C( -735.42), SIMDE_FLOAT32_C( -153.57), SIMDE_FLOAT32_C( 719.17), SIMDE_FLOAT32_C( -965.75), SIMDE_FLOAT32_C( -563.06), SIMDE_FLOAT32_C( 109.00), SIMDE_FLOAT32_C( -433.51), SIMDE_FLOAT32_C( 372.59), SIMDE_FLOAT32_C( -265.00), SIMDE_FLOAT32_C( -824.17), SIMDE_FLOAT32_C( -224.72), SIMDE_FLOAT32_C( -170.45), SIMDE_FLOAT32_C( -499.13), SIMDE_FLOAT32_C( 28.17), SIMDE_FLOAT32_C( 776.55), SIMDE_FLOAT32_C( 103.15) }, { SIMDE_FLOAT32_C( 735.42), SIMDE_FLOAT32_C( 153.57), SIMDE_FLOAT32_C( -719.17), SIMDE_FLOAT32_C( 965.75), SIMDE_FLOAT32_C( 563.06), SIMDE_FLOAT32_C( -109.00), SIMDE_FLOAT32_C( 433.51), SIMDE_FLOAT32_C( -372.59), SIMDE_FLOAT32_C( 265.00), SIMDE_FLOAT32_C( 824.17), SIMDE_FLOAT32_C( 224.72), SIMDE_FLOAT32_C( 170.45), SIMDE_FLOAT32_C( 499.13), SIMDE_FLOAT32_C( -28.17), SIMDE_FLOAT32_C( -776.55), SIMDE_FLOAT32_C( -103.15) } }, { { SIMDE_FLOAT32_C( 687.10), SIMDE_FLOAT32_C( -785.77), SIMDE_FLOAT32_C( 358.63), SIMDE_FLOAT32_C( 902.36), SIMDE_FLOAT32_C( 954.84), SIMDE_FLOAT32_C( 459.09), SIMDE_FLOAT32_C( 832.87), SIMDE_FLOAT32_C( -426.26), SIMDE_FLOAT32_C( -992.15), SIMDE_FLOAT32_C( 417.17), SIMDE_FLOAT32_C( -298.23), SIMDE_FLOAT32_C( 227.73), SIMDE_FLOAT32_C( -959.08), SIMDE_FLOAT32_C( 445.83), SIMDE_FLOAT32_C( 355.00), SIMDE_FLOAT32_C( -694.49) }, { SIMDE_FLOAT32_C( -687.10), SIMDE_FLOAT32_C( 785.77), SIMDE_FLOAT32_C( -358.63), SIMDE_FLOAT32_C( -902.36), SIMDE_FLOAT32_C( -954.84), SIMDE_FLOAT32_C( -459.09), SIMDE_FLOAT32_C( -832.87), SIMDE_FLOAT32_C( 426.26), SIMDE_FLOAT32_C( 992.15), SIMDE_FLOAT32_C( -417.17), SIMDE_FLOAT32_C( 298.23), SIMDE_FLOAT32_C( -227.73), SIMDE_FLOAT32_C( 959.08), SIMDE_FLOAT32_C( -445.83), SIMDE_FLOAT32_C( -355.00), SIMDE_FLOAT32_C( 694.49) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_x_mm512_negate_ps(a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512 a = simde_test_x86_random_f32x16(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m512 r = simde_x_mm512_negate_ps(a); simde_test_x86_write_f32x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f32x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_x_mm512_negate_pd (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( -707.74), SIMDE_FLOAT64_C( 74.18), SIMDE_FLOAT64_C( -660.24), SIMDE_FLOAT64_C( -270.80), SIMDE_FLOAT64_C( -816.83), SIMDE_FLOAT64_C( -93.75), SIMDE_FLOAT64_C( -898.21), SIMDE_FLOAT64_C( -81.83) }, { SIMDE_FLOAT64_C( 707.74), SIMDE_FLOAT64_C( -74.18), SIMDE_FLOAT64_C( 660.24), SIMDE_FLOAT64_C( 270.80), SIMDE_FLOAT64_C( 816.83), SIMDE_FLOAT64_C( 93.75), SIMDE_FLOAT64_C( 898.21), SIMDE_FLOAT64_C( 81.83) } }, { { SIMDE_FLOAT64_C( 82.08), SIMDE_FLOAT64_C( -122.93), SIMDE_FLOAT64_C( 747.72), SIMDE_FLOAT64_C( 582.95), SIMDE_FLOAT64_C( 905.24), SIMDE_FLOAT64_C( 524.27), SIMDE_FLOAT64_C( -313.90), SIMDE_FLOAT64_C( 592.34) }, { SIMDE_FLOAT64_C( -82.08), SIMDE_FLOAT64_C( 122.93), SIMDE_FLOAT64_C( -747.72), SIMDE_FLOAT64_C( -582.95), SIMDE_FLOAT64_C( -905.24), SIMDE_FLOAT64_C( -524.27), SIMDE_FLOAT64_C( 313.90), SIMDE_FLOAT64_C( -592.34) } }, { { SIMDE_FLOAT64_C( 738.50), SIMDE_FLOAT64_C( -955.26), SIMDE_FLOAT64_C( 494.71), SIMDE_FLOAT64_C( 693.34), SIMDE_FLOAT64_C( 503.82), SIMDE_FLOAT64_C( 327.58), SIMDE_FLOAT64_C( -732.92), SIMDE_FLOAT64_C( 511.68) }, { SIMDE_FLOAT64_C( -738.50), SIMDE_FLOAT64_C( 955.26), SIMDE_FLOAT64_C( -494.71), SIMDE_FLOAT64_C( -693.34), SIMDE_FLOAT64_C( -503.82), SIMDE_FLOAT64_C( -327.58), SIMDE_FLOAT64_C( 732.92), SIMDE_FLOAT64_C( -511.68) } }, { { SIMDE_FLOAT64_C( -255.26), SIMDE_FLOAT64_C( -31.15), SIMDE_FLOAT64_C( -260.59), SIMDE_FLOAT64_C( -214.33), SIMDE_FLOAT64_C( -585.32), SIMDE_FLOAT64_C( -905.59), SIMDE_FLOAT64_C( 91.17), SIMDE_FLOAT64_C( -293.05) }, { SIMDE_FLOAT64_C( 255.26), SIMDE_FLOAT64_C( 31.15), SIMDE_FLOAT64_C( 260.59), SIMDE_FLOAT64_C( 214.33), SIMDE_FLOAT64_C( 585.32), SIMDE_FLOAT64_C( 905.59), SIMDE_FLOAT64_C( -91.17), SIMDE_FLOAT64_C( 293.05) } }, { { SIMDE_FLOAT64_C( 168.59), SIMDE_FLOAT64_C( 430.93), SIMDE_FLOAT64_C( 436.15), SIMDE_FLOAT64_C( 351.76), SIMDE_FLOAT64_C( -662.82), SIMDE_FLOAT64_C( 537.93), SIMDE_FLOAT64_C( -730.06), SIMDE_FLOAT64_C( 419.27) }, { SIMDE_FLOAT64_C( -168.59), SIMDE_FLOAT64_C( -430.93), SIMDE_FLOAT64_C( -436.15), SIMDE_FLOAT64_C( -351.76), SIMDE_FLOAT64_C( 662.82), SIMDE_FLOAT64_C( -537.93), SIMDE_FLOAT64_C( 730.06), SIMDE_FLOAT64_C( -419.27) } }, { { SIMDE_FLOAT64_C( -585.00), SIMDE_FLOAT64_C( -982.34), SIMDE_FLOAT64_C( 2.22), SIMDE_FLOAT64_C( -679.76), SIMDE_FLOAT64_C( 541.93), SIMDE_FLOAT64_C( 688.32), SIMDE_FLOAT64_C( 912.59), SIMDE_FLOAT64_C( 280.44) }, { SIMDE_FLOAT64_C( 585.00), SIMDE_FLOAT64_C( 982.34), SIMDE_FLOAT64_C( -2.22), SIMDE_FLOAT64_C( 679.76), SIMDE_FLOAT64_C( -541.93), SIMDE_FLOAT64_C( -688.32), SIMDE_FLOAT64_C( -912.59), SIMDE_FLOAT64_C( -280.44) } }, { { SIMDE_FLOAT64_C( 733.06), SIMDE_FLOAT64_C( 407.30), SIMDE_FLOAT64_C( -26.22), SIMDE_FLOAT64_C( 236.89), SIMDE_FLOAT64_C( -265.13), SIMDE_FLOAT64_C( 240.86), SIMDE_FLOAT64_C( -251.44), SIMDE_FLOAT64_C( 479.62) }, { SIMDE_FLOAT64_C( -733.06), SIMDE_FLOAT64_C( -407.30), SIMDE_FLOAT64_C( 26.22), SIMDE_FLOAT64_C( -236.89), SIMDE_FLOAT64_C( 265.13), SIMDE_FLOAT64_C( -240.86), SIMDE_FLOAT64_C( 251.44), SIMDE_FLOAT64_C( -479.62) } }, { { SIMDE_FLOAT64_C( -790.29), SIMDE_FLOAT64_C( 487.97), SIMDE_FLOAT64_C( -734.72), SIMDE_FLOAT64_C( -375.60), SIMDE_FLOAT64_C( 582.38), SIMDE_FLOAT64_C( 356.45), SIMDE_FLOAT64_C( 331.34), SIMDE_FLOAT64_C( -249.03) }, { SIMDE_FLOAT64_C( 790.29), SIMDE_FLOAT64_C( -487.97), SIMDE_FLOAT64_C( 734.72), SIMDE_FLOAT64_C( 375.60), SIMDE_FLOAT64_C( -582.38), SIMDE_FLOAT64_C( -356.45), SIMDE_FLOAT64_C( -331.34), SIMDE_FLOAT64_C( 249.03) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_x_mm512_negate_pd(a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512d a = simde_test_x86_random_f64x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m512d r = simde_x_mm512_negate_pd(a); simde_test_x86_write_f64x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f64x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(x_mm512_negate_ps) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm512_negate_pd) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/or.c000066400000000000000000002606171400333146700160540ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN or #include #include #include static int test_simde_mm512_or_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 b[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 480.60), SIMDE_FLOAT32_C( -511.13), SIMDE_FLOAT32_C( -479.78), SIMDE_FLOAT32_C( 269.24), SIMDE_FLOAT32_C( -874.76), SIMDE_FLOAT32_C( -72.46), SIMDE_FLOAT32_C( 197.37), SIMDE_FLOAT32_C( -811.08), SIMDE_FLOAT32_C( 97.54), SIMDE_FLOAT32_C( -611.27), SIMDE_FLOAT32_C( 407.81), SIMDE_FLOAT32_C( -9.58), SIMDE_FLOAT32_C( -941.56), SIMDE_FLOAT32_C( -785.37), SIMDE_FLOAT32_C( 331.51), SIMDE_FLOAT32_C( -275.33) }, { SIMDE_FLOAT32_C( 603.88), SIMDE_FLOAT32_C( -554.12), SIMDE_FLOAT32_C( 900.59), SIMDE_FLOAT32_C( 137.08), SIMDE_FLOAT32_C( -120.48), SIMDE_FLOAT32_C( -863.13), SIMDE_FLOAT32_C( -707.03), SIMDE_FLOAT32_C( 972.38), SIMDE_FLOAT32_C( 820.00), SIMDE_FLOAT32_C( -330.32), SIMDE_FLOAT32_C( 241.94), SIMDE_FLOAT32_C( 338.15), SIMDE_FLOAT32_C( -659.11), SIMDE_FLOAT32_C( 398.68), SIMDE_FLOAT32_C( 573.47), SIMDE_FLOAT32_C( 358.72) }, { SIMDE_FLOAT32_C(126457.74), SIMDE_FLOAT32_C(-130863.36), SIMDE_FLOAT32_C(-122831.68), SIMDE_FLOAT32_C( 287.24), SIMDE_FLOAT32_C(-32122.95), SIMDE_FLOAT32_C(-27637.92), SIMDE_FLOAT32_C(-62943.98), SIMDE_FLOAT32_C( -1007.46), SIMDE_FLOAT32_C( 26506.24), SIMDE_FLOAT32_C(-97267.98), SIMDE_FLOAT32_C( 503.93), SIMDE_FLOAT32_C( -370.69), SIMDE_FLOAT32_C( -959.62), SIMDE_FLOAT32_C(-102063.36), SIMDE_FLOAT32_C( 90046.72), SIMDE_FLOAT32_C( -375.99) } }, { { SIMDE_FLOAT32_C( 993.93), SIMDE_FLOAT32_C( -985.03), SIMDE_FLOAT32_C( 659.60), SIMDE_FLOAT32_C( -173.26), SIMDE_FLOAT32_C( 63.41), SIMDE_FLOAT32_C( -232.36), SIMDE_FLOAT32_C( 790.92), SIMDE_FLOAT32_C( 958.34), SIMDE_FLOAT32_C( 817.72), SIMDE_FLOAT32_C( -499.64), SIMDE_FLOAT32_C( 607.64), SIMDE_FLOAT32_C( 603.33), SIMDE_FLOAT32_C( 226.40), SIMDE_FLOAT32_C( -876.92), SIMDE_FLOAT32_C( -443.68), SIMDE_FLOAT32_C( -893.18) }, { SIMDE_FLOAT32_C( 265.66), SIMDE_FLOAT32_C( 933.56), SIMDE_FLOAT32_C( 320.39), SIMDE_FLOAT32_C( 825.80), SIMDE_FLOAT32_C( -854.99), SIMDE_FLOAT32_C( 141.14), SIMDE_FLOAT32_C( 123.87), SIMDE_FLOAT32_C( -184.24), SIMDE_FLOAT32_C( -440.97), SIMDE_FLOAT32_C( 558.13), SIMDE_FLOAT32_C( 445.71), SIMDE_FLOAT32_C( -893.22), SIMDE_FLOAT32_C( -301.77), SIMDE_FLOAT32_C( 180.84), SIMDE_FLOAT32_C( -576.68), SIMDE_FLOAT32_C( -226.07) }, { SIMDE_FLOAT32_C(129535.99), SIMDE_FLOAT32_C( -1021.56), SIMDE_FLOAT32_C( 84463.86), SIMDE_FLOAT32_C(-61299.75), SIMDE_FLOAT32_C(-16239.97), SIMDE_FLOAT32_C( -237.50), SIMDE_FLOAT32_C( 31711.97), SIMDE_FLOAT32_C(-65469.95), SIMDE_FLOAT32_C(-112892.48), SIMDE_FLOAT32_C(-128947.98), SIMDE_FLOAT32_C(114677.93), SIMDE_FLOAT32_C( -895.49), SIMDE_FLOAT32_C( -493.80), SIMDE_FLOAT32_C(-65535.92), SIMDE_FLOAT32_C(-113663.12), SIMDE_FLOAT32_C(-65371.93) } }, { { SIMDE_FLOAT32_C( 454.76), SIMDE_FLOAT32_C( -837.79), SIMDE_FLOAT32_C( -704.90), SIMDE_FLOAT32_C( 766.06), SIMDE_FLOAT32_C( -788.99), SIMDE_FLOAT32_C( -546.21), SIMDE_FLOAT32_C( -221.91), SIMDE_FLOAT32_C( 380.98), SIMDE_FLOAT32_C( -951.29), SIMDE_FLOAT32_C( -366.31), SIMDE_FLOAT32_C( -652.14), SIMDE_FLOAT32_C( -549.49), SIMDE_FLOAT32_C( 782.12), SIMDE_FLOAT32_C( -971.68), SIMDE_FLOAT32_C( 893.59), SIMDE_FLOAT32_C( 570.16) }, { SIMDE_FLOAT32_C( 642.89), SIMDE_FLOAT32_C( -941.41), SIMDE_FLOAT32_C( -206.47), SIMDE_FLOAT32_C( -125.77), SIMDE_FLOAT32_C( 256.81), SIMDE_FLOAT32_C( 243.45), SIMDE_FLOAT32_C( -290.65), SIMDE_FLOAT32_C( -982.09), SIMDE_FLOAT32_C( -3.25), SIMDE_FLOAT32_C( -692.16), SIMDE_FLOAT32_C( 812.74), SIMDE_FLOAT32_C( -312.12), SIMDE_FLOAT32_C( 275.94), SIMDE_FLOAT32_C( -213.58), SIMDE_FLOAT32_C( 384.73), SIMDE_FLOAT32_C( 236.37) }, { SIMDE_FLOAT32_C(116723.98), SIMDE_FLOAT32_C( -1005.92), SIMDE_FLOAT32_C(-65145.85), SIMDE_FLOAT32_C(-32710.00), SIMDE_FLOAT32_C(-101119.98), SIMDE_FLOAT32_C(-64511.45), SIMDE_FLOAT32_C( -443.97), SIMDE_FLOAT32_C(-131067.90), SIMDE_FLOAT32_C( -1015.29), SIMDE_FLOAT32_C(-97887.49), SIMDE_FLOAT32_C( -940.75), SIMDE_FLOAT32_C(-80574.72), SIMDE_FLOAT32_C(104447.98), SIMDE_FLOAT32_C(-63488.00), SIMDE_FLOAT32_C(114427.90), SIMDE_FLOAT32_C( 61150.74) } }, { { SIMDE_FLOAT32_C( 695.36), SIMDE_FLOAT32_C( 970.40), SIMDE_FLOAT32_C( -483.18), SIMDE_FLOAT32_C( -766.40), SIMDE_FLOAT32_C( -816.54), SIMDE_FLOAT32_C( 446.99), SIMDE_FLOAT32_C( 488.36), SIMDE_FLOAT32_C( -116.86), SIMDE_FLOAT32_C( -123.76), SIMDE_FLOAT32_C( -85.46), SIMDE_FLOAT32_C( -395.60), SIMDE_FLOAT32_C( -799.86), SIMDE_FLOAT32_C( 677.35), SIMDE_FLOAT32_C( 270.67), SIMDE_FLOAT32_C( -887.56), SIMDE_FLOAT32_C( 725.18) }, { SIMDE_FLOAT32_C( 61.47), SIMDE_FLOAT32_C( -848.75), SIMDE_FLOAT32_C( -941.17), SIMDE_FLOAT32_C( -221.91), SIMDE_FLOAT32_C( 392.93), SIMDE_FLOAT32_C( 38.45), SIMDE_FLOAT32_C( 198.62), SIMDE_FLOAT32_C( 165.26), SIMDE_FLOAT32_C( -481.15), SIMDE_FLOAT32_C( -648.25), SIMDE_FLOAT32_C( 912.49), SIMDE_FLOAT32_C( 198.88), SIMDE_FLOAT32_C( 535.07), SIMDE_FLOAT32_C( 853.64), SIMDE_FLOAT32_C( -950.23), SIMDE_FLOAT32_C( -538.25) }, { SIMDE_FLOAT32_C( 16253.82), SIMDE_FLOAT32_C( -986.90), SIMDE_FLOAT32_C(-128959.84), SIMDE_FLOAT32_C(-65529.99), SIMDE_FLOAT32_C(-104687.12), SIMDE_FLOAT32_C( 447.99), SIMDE_FLOAT32_C( 493.49), SIMDE_FLOAT32_C( -475.96), SIMDE_FLOAT32_C( -495.18), SIMDE_FLOAT32_C(-21885.76), SIMDE_FLOAT32_C(-117695.73), SIMDE_FLOAT32_C(-51191.29), SIMDE_FLOAT32_C( 695.35), SIMDE_FLOAT32_C(110331.93), SIMDE_FLOAT32_C( -1015.75), SIMDE_FLOAT32_C( -735.43) } }, { { SIMDE_FLOAT32_C( -410.72), SIMDE_FLOAT32_C( -917.00), SIMDE_FLOAT32_C( -67.17), SIMDE_FLOAT32_C( -908.76), SIMDE_FLOAT32_C( 534.17), SIMDE_FLOAT32_C( -240.43), SIMDE_FLOAT32_C( -833.05), SIMDE_FLOAT32_C( 947.68), SIMDE_FLOAT32_C( -393.55), SIMDE_FLOAT32_C( -335.35), SIMDE_FLOAT32_C( -257.27), SIMDE_FLOAT32_C( 91.70), SIMDE_FLOAT32_C( -820.62), SIMDE_FLOAT32_C( -157.46), SIMDE_FLOAT32_C( -507.25), SIMDE_FLOAT32_C( 705.00) }, { SIMDE_FLOAT32_C( 880.62), SIMDE_FLOAT32_C( 602.69), SIMDE_FLOAT32_C( -582.79), SIMDE_FLOAT32_C( -873.03), SIMDE_FLOAT32_C( 911.31), SIMDE_FLOAT32_C( 402.01), SIMDE_FLOAT32_C( 647.66), SIMDE_FLOAT32_C( -853.70), SIMDE_FLOAT32_C( -128.13), SIMDE_FLOAT32_C( 472.58), SIMDE_FLOAT32_C( 575.79), SIMDE_FLOAT32_C( -650.07), SIMDE_FLOAT32_C( 200.59), SIMDE_FLOAT32_C( -601.69), SIMDE_FLOAT32_C( -623.66), SIMDE_FLOAT32_C( 186.55) }, { SIMDE_FLOAT32_C(-113407.37), SIMDE_FLOAT32_C( -991.69), SIMDE_FLOAT32_C(-19451.78), SIMDE_FLOAT32_C( -1005.78), SIMDE_FLOAT32_C( 927.44), SIMDE_FLOAT32_C( -498.87), SIMDE_FLOAT32_C( -967.68), SIMDE_FLOAT32_C( -1015.75), SIMDE_FLOAT32_C( -393.81), SIMDE_FLOAT32_C( -479.87), SIMDE_FLOAT32_C(-73701.12), SIMDE_FLOAT32_C(-23539.25), SIMDE_FLOAT32_C(-52663.68), SIMDE_FLOAT32_C(-40829.92), SIMDE_FLOAT32_C(-131028.48), SIMDE_FLOAT32_C( 47820.80) } }, { { SIMDE_FLOAT32_C( -701.53), SIMDE_FLOAT32_C( -543.04), SIMDE_FLOAT32_C( -823.70), SIMDE_FLOAT32_C( 31.11), SIMDE_FLOAT32_C( 866.09), SIMDE_FLOAT32_C( 355.48), SIMDE_FLOAT32_C( 555.77), SIMDE_FLOAT32_C( 886.63), SIMDE_FLOAT32_C( -481.79), SIMDE_FLOAT32_C( 592.72), SIMDE_FLOAT32_C( -189.48), SIMDE_FLOAT32_C( -527.23), SIMDE_FLOAT32_C( 860.31), SIMDE_FLOAT32_C( -791.91), SIMDE_FLOAT32_C( 352.98), SIMDE_FLOAT32_C( 475.12) }, { SIMDE_FLOAT32_C( -598.83), SIMDE_FLOAT32_C( -554.85), SIMDE_FLOAT32_C( 653.69), SIMDE_FLOAT32_C( 649.29), SIMDE_FLOAT32_C( 536.82), SIMDE_FLOAT32_C( 402.83), SIMDE_FLOAT32_C( 333.46), SIMDE_FLOAT32_C( 382.59), SIMDE_FLOAT32_C( 275.47), SIMDE_FLOAT32_C( -132.03), SIMDE_FLOAT32_C( 922.98), SIMDE_FLOAT32_C( 461.96), SIMDE_FLOAT32_C( 85.99), SIMDE_FLOAT32_C( -806.33), SIMDE_FLOAT32_C( 56.71), SIMDE_FLOAT32_C( -909.56) }, { SIMDE_FLOAT32_C( -767.84), SIMDE_FLOAT32_C( -575.86), SIMDE_FLOAT32_C( -959.70), SIMDE_FLOAT32_C( 8030.48), SIMDE_FLOAT32_C( 890.84), SIMDE_FLOAT32_C( 500.00), SIMDE_FLOAT32_C( 89591.82), SIMDE_FLOAT32_C(131031.68), SIMDE_FLOAT32_C( -499.98), SIMDE_FLOAT32_C(-37935.74), SIMDE_FLOAT32_C(-65534.97), SIMDE_FLOAT32_C(-118781.95), SIMDE_FLOAT32_C( 32765.98), SIMDE_FLOAT32_C( -823.99), SIMDE_FLOAT32_C( 486.00), SIMDE_FLOAT32_C(-122847.74) } }, { { SIMDE_FLOAT32_C( 944.34), SIMDE_FLOAT32_C( -560.36), SIMDE_FLOAT32_C( -4.19), SIMDE_FLOAT32_C( -479.80), SIMDE_FLOAT32_C( 51.14), SIMDE_FLOAT32_C( -569.84), SIMDE_FLOAT32_C( 718.42), SIMDE_FLOAT32_C( 535.49), SIMDE_FLOAT32_C( 31.54), SIMDE_FLOAT32_C( 142.94), SIMDE_FLOAT32_C( 349.37), SIMDE_FLOAT32_C( -194.12), SIMDE_FLOAT32_C( -641.81), SIMDE_FLOAT32_C( -963.18), SIMDE_FLOAT32_C( -221.26), SIMDE_FLOAT32_C( -763.94) }, { SIMDE_FLOAT32_C( 382.41), SIMDE_FLOAT32_C( -851.69), SIMDE_FLOAT32_C( -422.97), SIMDE_FLOAT32_C( -784.40), SIMDE_FLOAT32_C( 546.04), SIMDE_FLOAT32_C( -653.53), SIMDE_FLOAT32_C( 109.51), SIMDE_FLOAT32_C( 533.50), SIMDE_FLOAT32_C( 473.25), SIMDE_FLOAT32_C( -161.95), SIMDE_FLOAT32_C( 68.59), SIMDE_FLOAT32_C( 837.71), SIMDE_FLOAT32_C( -640.75), SIMDE_FLOAT32_C( 889.72), SIMDE_FLOAT32_C( -478.90), SIMDE_FLOAT32_C( -840.17) }, { SIMDE_FLOAT32_C(130667.96), SIMDE_FLOAT32_C( -883.99), SIMDE_FLOAT32_C( -430.97), SIMDE_FLOAT32_C(-122879.98), SIMDE_FLOAT32_C( 13091.97), SIMDE_FLOAT32_C( -701.84), SIMDE_FLOAT32_C( 32208.00), SIMDE_FLOAT32_C( 535.99), SIMDE_FLOAT32_C( 505.89), SIMDE_FLOAT32_C( -175.95), SIMDE_FLOAT32_C( 351.37), SIMDE_FLOAT32_C(-54143.97), SIMDE_FLOAT32_C( -641.81), SIMDE_FLOAT32_C( -1019.74), SIMDE_FLOAT32_C( -510.90), SIMDE_FLOAT32_C( -1019.98) } }, { { SIMDE_FLOAT32_C( -711.36), SIMDE_FLOAT32_C( 214.45), SIMDE_FLOAT32_C( 194.07), SIMDE_FLOAT32_C( -275.37), SIMDE_FLOAT32_C( -213.28), SIMDE_FLOAT32_C( -677.66), SIMDE_FLOAT32_C( 637.68), SIMDE_FLOAT32_C( -440.50), SIMDE_FLOAT32_C( 586.73), SIMDE_FLOAT32_C( -829.70), SIMDE_FLOAT32_C( -729.50), SIMDE_FLOAT32_C( -650.44), SIMDE_FLOAT32_C( 123.67), SIMDE_FLOAT32_C( 515.02), SIMDE_FLOAT32_C( 23.03), SIMDE_FLOAT32_C( -964.09) }, { SIMDE_FLOAT32_C( 587.18), SIMDE_FLOAT32_C( 655.47), SIMDE_FLOAT32_C( 537.23), SIMDE_FLOAT32_C( -697.87), SIMDE_FLOAT32_C( 944.44), SIMDE_FLOAT32_C( -768.05), SIMDE_FLOAT32_C( -535.33), SIMDE_FLOAT32_C( 695.04), SIMDE_FLOAT32_C( -482.15), SIMDE_FLOAT32_C( 455.20), SIMDE_FLOAT32_C( 561.68), SIMDE_FLOAT32_C( 223.05), SIMDE_FLOAT32_C( 840.35), SIMDE_FLOAT32_C( -187.16), SIMDE_FLOAT32_C( 369.47), SIMDE_FLOAT32_C( -383.36) }, { SIMDE_FLOAT32_C( -719.49), SIMDE_FLOAT32_C( 63487.21), SIMDE_FLOAT32_C( 50783.98), SIMDE_FLOAT32_C(-90111.98), SIMDE_FLOAT32_C(-64863.68), SIMDE_FLOAT32_C( -933.68), SIMDE_FLOAT32_C( -639.99), SIMDE_FLOAT32_C(-129925.12), SIMDE_FLOAT32_C(-124799.46), SIMDE_FLOAT32_C(-122875.74), SIMDE_FLOAT32_C( -761.68), SIMDE_FLOAT32_C(-65436.93), SIMDE_FLOAT32_C( 31659.71), SIMDE_FLOAT32_C(-48105.99), SIMDE_FLOAT32_C( 369.48), SIMDE_FLOAT32_C(-130911.68) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 r = simde_mm512_or_ps(a, b); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_or_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 b[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( -40.48), SIMDE_FLOAT64_C( -322.78), SIMDE_FLOAT64_C( -915.31), SIMDE_FLOAT64_C( 424.37), SIMDE_FLOAT64_C( 358.24), SIMDE_FLOAT64_C( 951.77), SIMDE_FLOAT64_C( 466.94), SIMDE_FLOAT64_C( -212.54) }, { SIMDE_FLOAT64_C( -206.62), SIMDE_FLOAT64_C( -632.27), SIMDE_FLOAT64_C( -561.37), SIMDE_FLOAT64_C( -939.45), SIMDE_FLOAT64_C( 583.50), SIMDE_FLOAT64_C( 851.39), SIMDE_FLOAT64_C( -748.10), SIMDE_FLOAT64_C( 610.87) }, { SIMDE_FLOAT64_C( -240.00), SIMDE_FLOAT64_C(-98023.69), SIMDE_FLOAT64_C( -947.37), SIMDE_FLOAT64_C(-130559.73), SIMDE_FLOAT64_C( 92157.44), SIMDE_FLOAT64_C( 1015.91), SIMDE_FLOAT64_C(-128764.94), SIMDE_FLOAT64_C(-56511.75) } }, { { SIMDE_FLOAT64_C( 883.05), SIMDE_FLOAT64_C( -496.23), SIMDE_FLOAT64_C( -209.68), SIMDE_FLOAT64_C( -122.92), SIMDE_FLOAT64_C( 19.75), SIMDE_FLOAT64_C( -49.24), SIMDE_FLOAT64_C( 492.96), SIMDE_FLOAT64_C( -866.69) }, { SIMDE_FLOAT64_C( -385.90), SIMDE_FLOAT64_C( 247.26), SIMDE_FLOAT64_C( -268.45), SIMDE_FLOAT64_C( 217.01), SIMDE_FLOAT64_C( -674.00), SIMDE_FLOAT64_C( 155.01), SIMDE_FLOAT64_C( -699.71), SIMDE_FLOAT64_C( -101.01) }, { SIMDE_FLOAT64_C(-113126.40), SIMDE_FLOAT64_C( -510.75), SIMDE_FLOAT64_C( -431.50), SIMDE_FLOAT64_C( -507.68), SIMDE_FLOAT64_C( -6096.00), SIMDE_FLOAT64_C( -223.97), SIMDE_FLOAT64_C(-130559.89), SIMDE_FLOAT64_C(-27990.62) } }, { { SIMDE_FLOAT64_C( 290.86), SIMDE_FLOAT64_C( 407.11), SIMDE_FLOAT64_C( 359.33), SIMDE_FLOAT64_C( -773.11), SIMDE_FLOAT64_C( 409.90), SIMDE_FLOAT64_C( -892.84), SIMDE_FLOAT64_C( -43.07), SIMDE_FLOAT64_C( 160.79) }, { SIMDE_FLOAT64_C( 465.85), SIMDE_FLOAT64_C( -467.47), SIMDE_FLOAT64_C( -782.08), SIMDE_FLOAT64_C( -490.24), SIMDE_FLOAT64_C( 592.42), SIMDE_FLOAT64_C( 806.95), SIMDE_FLOAT64_C( 470.98), SIMDE_FLOAT64_C( -245.49) }, { SIMDE_FLOAT64_C( 499.87), SIMDE_FLOAT64_C( -471.49), SIMDE_FLOAT64_C(-124766.50), SIMDE_FLOAT64_C(-125631.46), SIMDE_FLOAT64_C(113143.90), SIMDE_FLOAT64_C( -894.97), SIMDE_FLOAT64_C( -479.00), SIMDE_FLOAT64_C( -246.00) } }, { { SIMDE_FLOAT64_C( 88.45), SIMDE_FLOAT64_C( 537.22), SIMDE_FLOAT64_C( -949.80), SIMDE_FLOAT64_C( 743.04), SIMDE_FLOAT64_C( 296.63), SIMDE_FLOAT64_C( -483.08), SIMDE_FLOAT64_C( 917.67), SIMDE_FLOAT64_C( -778.36) }, { SIMDE_FLOAT64_C( 89.00), SIMDE_FLOAT64_C( -93.55), SIMDE_FLOAT64_C( -328.25), SIMDE_FLOAT64_C( -923.97), SIMDE_FLOAT64_C( 880.96), SIMDE_FLOAT64_C( 356.45), SIMDE_FLOAT64_C( 842.15), SIMDE_FLOAT64_C( 25.32) }, { SIMDE_FLOAT64_C( 89.45), SIMDE_FLOAT64_C(-24495.81), SIMDE_FLOAT64_C(-121574.40), SIMDE_FLOAT64_C( -1023.98), SIMDE_FLOAT64_C(112891.91), SIMDE_FLOAT64_C( -487.47), SIMDE_FLOAT64_C( 991.69), SIMDE_FLOAT64_C( -6483.92) } }, { { SIMDE_FLOAT64_C( -519.30), SIMDE_FLOAT64_C( 88.55), SIMDE_FLOAT64_C( 186.42), SIMDE_FLOAT64_C( -780.93), SIMDE_FLOAT64_C( 918.96), SIMDE_FLOAT64_C( 887.03), SIMDE_FLOAT64_C( 360.26), SIMDE_FLOAT64_C( 873.25) }, { SIMDE_FLOAT64_C( 651.35), SIMDE_FLOAT64_C( -822.40), SIMDE_FLOAT64_C( -37.73), SIMDE_FLOAT64_C( -102.45), SIMDE_FLOAT64_C( 167.81), SIMDE_FLOAT64_C( 376.55), SIMDE_FLOAT64_C( 698.04), SIMDE_FLOAT64_C( 15.57) }, { SIMDE_FLOAT64_C( -655.37), SIMDE_FLOAT64_C(-32460.80), SIMDE_FLOAT64_C( -190.92), SIMDE_FLOAT64_C(-26623.95), SIMDE_FLOAT64_C( 59391.49), SIMDE_FLOAT64_C(129935.87), SIMDE_FLOAT64_C( 97607.62), SIMDE_FLOAT64_C( 4021.92) } }, { { SIMDE_FLOAT64_C( -325.10), SIMDE_FLOAT64_C( 24.24), SIMDE_FLOAT64_C( -628.64), SIMDE_FLOAT64_C( 379.01), SIMDE_FLOAT64_C( 98.66), SIMDE_FLOAT64_C( 182.61), SIMDE_FLOAT64_C( -798.49), SIMDE_FLOAT64_C( -146.70) }, { SIMDE_FLOAT64_C( 387.34), SIMDE_FLOAT64_C( -48.74), SIMDE_FLOAT64_C( 849.44), SIMDE_FLOAT64_C( 502.00), SIMDE_FLOAT64_C( -892.81), SIMDE_FLOAT64_C( 587.22), SIMDE_FLOAT64_C( -574.98), SIMDE_FLOAT64_C( 384.87) }, { SIMDE_FLOAT64_C( -455.37), SIMDE_FLOAT64_C( -392.00), SIMDE_FLOAT64_C( -885.95), SIMDE_FLOAT64_C( 511.01), SIMDE_FLOAT64_C(-28602.00), SIMDE_FLOAT64_C( 46814.24), SIMDE_FLOAT64_C( -831.00), SIMDE_FLOAT64_C( -422.00) } }, { { SIMDE_FLOAT64_C( 462.25), SIMDE_FLOAT64_C( -905.39), SIMDE_FLOAT64_C( -831.12), SIMDE_FLOAT64_C( -716.46), SIMDE_FLOAT64_C( 498.06), SIMDE_FLOAT64_C( 927.73), SIMDE_FLOAT64_C( 312.19), SIMDE_FLOAT64_C( -955.24) }, { SIMDE_FLOAT64_C( -784.67), SIMDE_FLOAT64_C( 13.39), SIMDE_FLOAT64_C( 973.39), SIMDE_FLOAT64_C( -224.05), SIMDE_FLOAT64_C( 392.29), SIMDE_FLOAT64_C( -737.84), SIMDE_FLOAT64_C( -812.63), SIMDE_FLOAT64_C( 85.02) }, { SIMDE_FLOAT64_C(-118357.76), SIMDE_FLOAT64_C( -3943.87), SIMDE_FLOAT64_C( -1023.50), SIMDE_FLOAT64_C(-62237.99), SIMDE_FLOAT64_C( 506.31), SIMDE_FLOAT64_C( -1024.00), SIMDE_FLOAT64_C(-114288.64), SIMDE_FLOAT64_C(-30567.75) } }, { { SIMDE_FLOAT64_C( 979.20), SIMDE_FLOAT64_C( -512.78), SIMDE_FLOAT64_C( -370.35), SIMDE_FLOAT64_C( -474.08), SIMDE_FLOAT64_C( 129.12), SIMDE_FLOAT64_C( 46.51), SIMDE_FLOAT64_C( -292.51), SIMDE_FLOAT64_C( -218.79) }, { SIMDE_FLOAT64_C( 798.26), SIMDE_FLOAT64_C( 278.63), SIMDE_FLOAT64_C( 902.40), SIMDE_FLOAT64_C( 696.72), SIMDE_FLOAT64_C( -256.04), SIMDE_FLOAT64_C( 749.65), SIMDE_FLOAT64_C( -619.61), SIMDE_FLOAT64_C( 269.84) }, { SIMDE_FLOAT64_C( 991.45), SIMDE_FLOAT64_C(-71395.84), SIMDE_FLOAT64_C(-127867.73), SIMDE_FLOAT64_C(-122460.48), SIMDE_FLOAT64_C( -258.25), SIMDE_FLOAT64_C( 11994.94), SIMDE_FLOAT64_C(-79310.62), SIMDE_FLOAT64_C( -445.84) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__m512d r = simde_mm512_or_pd(a, b); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_or_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[16]; const int32_t b[16]; const int32_t r[16]; } test_vec[] = { { { -INT32_C( 768012835), INT32_C( 1529621558), -INT32_C( 1724343561), -INT32_C( 1025486607), INT32_C( 1861598911), INT32_C( 336886298), -INT32_C( 635461259), INT32_C( 1653142148), INT32_C( 892650239), -INT32_C( 443522578), -INT32_C( 310457861), -INT32_C( 659595751), INT32_C( 927441949), -INT32_C( 1823778018), -INT32_C( 2140312580), INT32_C( 1575155293) }, { -INT32_C( 1500375112), INT32_C( 1938498424), INT32_C( 90180076), -INT32_C( 2049044632), -INT32_C( 759421517), INT32_C( 2086996096), -INT32_C( 788737165), -INT32_C( 2127700023), INT32_C( 1848099062), -INT32_C( 807292189), INT32_C( 601113275), INT32_C( 94941777), INT32_C( 1473734103), -INT32_C( 523027347), -INT32_C( 659501041), -INT32_C( 1520836945) }, { -INT32_C( 155508739), INT32_C( 2075078526), -INT32_C( 1653040129), -INT32_C( 939630599), -INT32_C( 16924737), INT32_C( 2088073882), -INT32_C( 620760201), -INT32_C( 475136051), INT32_C( 2134360831), -INT32_C( 269356049), -INT32_C( 268514309), -INT32_C( 575668647), INT32_C( 2010637791), -INT32_C( 203718785), -INT32_C( 654443521), -INT32_C( 33816833) } }, { { -INT32_C( 2129428322), -INT32_C( 296684494), -INT32_C( 2012142538), -INT32_C( 1366443305), -INT32_C( 1945738209), -INT32_C( 1335043680), INT32_C( 1485315241), -INT32_C( 1728191750), -INT32_C( 1793519518), INT32_C( 998467845), INT32_C( 1707316366), INT32_C( 1829982286), INT32_C( 1442388404), -INT32_C( 1677367821), INT32_C( 2096467330), -INT32_C( 770313617) }, { INT32_C( 140979715), INT32_C( 641985176), -INT32_C( 846526594), INT32_C( 205168215), -INT32_C( 1419692872), INT32_C( 457664153), INT32_C( 1670921459), INT32_C( 842378543), INT32_C( 1933221083), INT32_C( 77168006), -INT32_C( 590273404), INT32_C( 2078805187), -INT32_C( 651802304), -INT32_C( 1560973905), -INT32_C( 653947478), INT32_C( 353057338) }, { -INT32_C( 1988645217), -INT32_C( 296485190), -INT32_C( 845469826), -INT32_C( 1363165481), -INT32_C( 1352174401), -INT32_C( 1150287943), INT32_C( 2073574651), -INT32_C( 1157763073), -INT32_C( 147088133), INT32_C( 1067154823), -INT32_C( 36457330), INT32_C( 2147179727), -INT32_C( 570467852), -INT32_C( 1091211777), -INT32_C( 34239062), -INT32_C( 685770113) } }, { { INT32_C( 1552500182), INT32_C( 1197548226), INT32_C( 170078791), INT32_C( 2122648382), INT32_C( 56142676), -INT32_C( 1029354216), INT32_C( 345746394), -INT32_C( 1138120987), -INT32_C( 1374113045), INT32_C( 485849557), -INT32_C( 366602068), INT32_C( 2003413795), INT32_C( 1853538646), -INT32_C( 399433970), -INT32_C( 1325609782), INT32_C( 1584145779) }, { -INT32_C( 1391688488), -INT32_C( 1429667330), INT32_C( 1016393497), -INT32_C( 256639590), -INT32_C( 866243138), INT32_C( 397709133), -INT32_C( 808996772), -INT32_C( 1372769322), -INT32_C( 1235535176), INT32_C( 1432364091), -INT32_C( 1382943725), -INT32_C( 1331870222), -INT32_C( 1082262670), -INT32_C( 405327477), -INT32_C( 1195991326), -INT32_C( 1972968494) }, { -INT32_C( 41040418), -INT32_C( 269933570), INT32_C( 1052245855), -INT32_C( 21495874), -INT32_C( 815878146), -INT32_C( 675950755), -INT32_C( 538985506), -INT32_C( 1104300041), -INT32_C( 1101284613), INT32_C( 1576369663), -INT32_C( 273154881), -INT32_C( 134352909), -INT32_C( 8389770), -INT32_C( 269009009), -INT32_C( 1191256342), -INT32_C( 563157005) } }, { { INT32_C( 1497415965), -INT32_C( 122773275), -INT32_C( 2035990636), -INT32_C( 147373436), -INT32_C( 927550403), -INT32_C( 944796187), -INT32_C( 25205460), INT32_C( 1720247625), -INT32_C( 1950365530), -INT32_C( 58495640), INT32_C( 830679213), -INT32_C( 1473725846), INT32_C( 1399906158), -INT32_C( 1743052692), -INT32_C( 828990843), INT32_C( 641015424) }, { INT32_C( 1320350950), INT32_C( 256587106), -INT32_C( 935276962), -INT32_C( 160405112), -INT32_C( 1270226616), -INT32_C( 2041748479), INT32_C( 2136335359), -INT32_C( 391804414), -INT32_C( 533244034), -INT32_C( 336625011), -INT32_C( 659345328), -INT32_C( 506583911), INT32_C( 110434053), INT32_C( 2072830588), -INT32_C( 923082298), -INT32_C( 357523605) }, { INT32_C( 1609758207), -INT32_C( 1067545), -INT32_C( 823787554), -INT32_C( 143168628), -INT32_C( 50334339), -INT32_C( 940577307), -INT32_C( 8394753), -INT32_C( 290590901), -INT32_C( 339746818), -INT32_C( 1052691), -INT32_C( 105694979), -INT32_C( 370229509), INT32_C( 1475731311), -INT32_C( 73407876), -INT32_C( 822150201), -INT32_C( 290086933) } }, { { -INT32_C( 2067077129), -INT32_C( 1183860119), -INT32_C( 2087640342), INT32_C( 1264869190), -INT32_C( 212731529), -INT32_C( 1569792292), INT32_C( 728393919), -INT32_C( 32171513), INT32_C( 1803738882), -INT32_C( 2094665319), INT32_C( 1510389268), -INT32_C( 1918473706), INT32_C( 1082193764), -INT32_C( 1780289835), INT32_C( 1572883542), INT32_C( 1750914406) }, { INT32_C( 1322573492), -INT32_C( 456001072), -INT32_C( 985737041), -INT32_C( 1504517054), -INT32_C( 1310272804), INT32_C( 373737664), INT32_C( 2071201300), -INT32_C( 1864118053), INT32_C( 2128525230), INT32_C( 1617080240), -INT32_C( 903503481), INT32_C( 1634760837), INT32_C( 185816906), INT32_C( 874600735), INT32_C( 1001362784), INT32_C( 348885605) }, { -INT32_C( 824246281), -INT32_C( 33555463), -INT32_C( 943719697), -INT32_C( 277414074), -INT32_C( 201851905), -INT32_C( 1234247972), INT32_C( 2071883455), -INT32_C( 17311009), INT32_C( 2145320878), -INT32_C( 479723591), -INT32_C( 634931305), -INT32_C( 302613865), INT32_C( 1267988334), -INT32_C( 1243349025), INT32_C( 2146426230), INT32_C( 2095044455) } }, { { -INT32_C( 91051702), -INT32_C( 530909863), INT32_C( 464158870), INT32_C( 1115495416), -INT32_C( 1857187726), INT32_C( 1237676009), INT32_C( 1787130884), INT32_C( 1367232519), INT32_C( 1397428474), -INT32_C( 1691113979), INT32_C( 515366438), INT32_C( 1801467129), -INT32_C( 1375949116), INT32_C( 569885213), INT32_C( 1032551478), -INT32_C( 963769908) }, { INT32_C( 521787930), -INT32_C( 1497674368), INT32_C( 616854059), INT32_C( 1787765926), -INT32_C( 283669550), -INT32_C( 2079257011), INT32_C( 1472306314), -INT32_C( 1071820634), -INT32_C( 1428211926), -INT32_C( 1353672060), -INT32_C( 1294789620), INT32_C( 186409528), INT32_C( 1006253293), -INT32_C( 843117758), INT32_C( 1311015080), -INT32_C( 99728944) }, { -INT32_C( 6554790), -INT32_C( 419693095), INT32_C( 1072624319), INT32_C( 1795112958), -INT32_C( 10511374), -INT32_C( 841650195), INT32_C( 2143681678), -INT32_C( 780185433), -INT32_C( 69257222), -INT32_C( 1082933627), -INT32_C( 1091051986), INT32_C( 1803319033), -INT32_C( 1073824019), -INT32_C( 302003361), INT32_C( 2142239934), -INT32_C( 24229412) } }, { { -INT32_C( 56300168), -INT32_C( 1800670072), INT32_C( 1095204360), -INT32_C( 833854496), -INT32_C( 636926313), -INT32_C( 89667502), INT32_C( 424201032), -INT32_C( 2062330356), -INT32_C( 880691133), -INT32_C( 1251988052), -INT32_C( 1929992277), -INT32_C( 1571143158), -INT32_C( 629382264), INT32_C( 1943282475), -INT32_C( 91480850), -INT32_C( 1233149838) }, { INT32_C( 75563352), -INT32_C( 642129618), -INT32_C( 1822052472), INT32_C( 2050342897), INT32_C( 1297396002), -INT32_C( 1027528236), -INT32_C( 1195619003), INT32_C( 1164852461), INT32_C( 1800073021), INT32_C( 1497629649), -INT32_C( 1527993677), -INT32_C( 1944182423), -INT32_C( 1495698478), -INT32_C( 513238372), -INT32_C( 711383576), -INT32_C( 1642395807) }, { -INT32_C( 56234632), -INT32_C( 574884434), -INT32_C( 748159096), -INT32_C( 25296911), -INT32_C( 547489865), -INT32_C( 85464618), -INT32_C( 1174614195), -INT32_C( 981500179), -INT32_C( 338952321), -INT32_C( 43765763), -INT32_C( 1392595013), -INT32_C( 1369545877), -INT32_C( 16944166), -INT32_C( 201540673), -INT32_C( 6472210), -INT32_C( 1098930317) } }, { { -INT32_C( 938908169), INT32_C( 455167336), INT32_C( 1639976695), INT32_C( 49143343), -INT32_C( 307706287), INT32_C( 1238307169), -INT32_C( 1759614922), INT32_C( 1731541360), INT32_C( 120536734), -INT32_C( 2094903157), -INT32_C( 1897602466), INT32_C( 311480769), -INT32_C( 134203241), INT32_C( 2135019337), -INT32_C( 1491705801), INT32_C( 940460953) }, { INT32_C( 356466057), -INT32_C( 325557874), INT32_C( 91913284), -INT32_C( 468252083), -INT32_C( 1931667645), INT32_C( 504110822), INT32_C( 415572606), -INT32_C( 145697682), -INT32_C( 1626566895), INT32_C( 881632496), INT32_C( 1832453664), INT32_C( 1414680849), INT32_C( 1340092264), -INT32_C( 865211315), INT32_C( 2112107023), INT32_C( 410268678) }, { -INT32_C( 583041537), -INT32_C( 4624914), INT32_C( 1710915319), -INT32_C( 419438993), -INT32_C( 302131373), INT32_C( 1607409639), -INT32_C( 1612748162), -INT32_C( 143262338), -INT32_C( 1624260705), -INT32_C( 1213270789), -INT32_C( 268573058), INT32_C( 1456656849), -INT32_C( 2080769), -INT32_C( 9572531), -INT32_C( 626113), INT32_C( 947814303) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_or_epi32(a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_or_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t src[16]; const simde__mmask16 k; const int32_t a[16]; const int32_t b[16]; const int32_t r[16]; } test_vec[] = { { { INT32_C( 709045204), INT32_C( 1359355365), INT32_C( 2138292658), -INT32_C( 698886100), -INT32_C( 376778208), INT32_C( 958261001), -INT32_C( 1063656842), -INT32_C( 299971303), -INT32_C( 568827144), INT32_C( 841948799), INT32_C( 212968160), -INT32_C( 1696462726), -INT32_C( 461083174), -INT32_C( 904027820), -INT32_C( 1668630909), INT32_C( 2089462148) }, UINT16_C(41739), { -INT32_C( 1983804838), INT32_C( 1831575996), -INT32_C( 1871338066), -INT32_C( 990031808), INT32_C( 1365594420), -INT32_C( 1509431269), INT32_C( 256871300), INT32_C( 1655855624), -INT32_C( 1578339356), -INT32_C( 1022486763), -INT32_C( 44858179), INT32_C( 146887124), -INT32_C( 1101453662), -INT32_C( 1838915570), -INT32_C( 190729236), -INT32_C( 246000883) }, { -INT32_C( 594394425), INT32_C( 396337498), -INT32_C( 116067803), -INT32_C( 436087485), INT32_C( 161766139), -INT32_C( 1499789126), -INT32_C( 929415749), INT32_C( 1471803792), -INT32_C( 1926017997), INT32_C( 312791789), INT32_C( 134985925), -INT32_C( 1997665139), INT32_C( 546411110), INT32_C( 1439116698), -INT32_C( 98737814), -INT32_C( 2041391278) }, { -INT32_C( 573322529), INT32_C( 2143265278), INT32_C( 2138292658), -INT32_C( 419572413), -INT32_C( 376778208), INT32_C( 958261001), -INT32_C( 1063656842), -INT32_C( 299971303), -INT32_C( 1375764489), -INT32_C( 743516163), INT32_C( 212968160), -INT32_C( 1696462726), -INT32_C( 461083174), -INT32_C( 672698978), -INT32_C( 1668630909), -INT32_C( 145303713) } }, { { INT32_C( 269714723), INT32_C( 488814680), -INT32_C( 31117968), -INT32_C( 1618602951), INT32_C( 1103042726), -INT32_C( 1349089723), INT32_C( 984200424), -INT32_C( 1363084405), -INT32_C( 641805183), -INT32_C( 50929524), INT32_C( 1190796045), -INT32_C( 689602257), -INT32_C( 568875879), INT32_C( 328052011), -INT32_C( 330484127), -INT32_C( 1281749454) }, UINT16_C(22754), { -INT32_C( 2110230900), INT32_C( 1704805738), INT32_C( 1894174091), -INT32_C( 1172996189), -INT32_C( 362332067), -INT32_C( 1608464045), INT32_C( 1353601717), -INT32_C( 1851224059), INT32_C( 1762910718), -INT32_C( 1295077082), INT32_C( 555922558), -INT32_C( 1864681677), -INT32_C( 897957257), -INT32_C( 1049912821), -INT32_C( 233760275), -INT32_C( 1467696726) }, { -INT32_C( 1055811686), -INT32_C( 965484728), -INT32_C( 957901421), INT32_C( 1146536653), INT32_C( 269471749), INT32_C( 1490123371), INT32_C( 1028317843), INT32_C( 921095836), -INT32_C( 1359480986), INT32_C( 1786079959), -INT32_C( 852403200), INT32_C( 605128479), -INT32_C( 1019994024), INT32_C( 773523098), -INT32_C( 2073336344), -INT32_C( 1699000012) }, { INT32_C( 269714723), -INT32_C( 402661526), -INT32_C( 31117968), -INT32_C( 1618602951), INT32_C( 1103042726), -INT32_C( 118359173), INT32_C( 2112811703), -INT32_C( 1209082211), -INT32_C( 641805183), -INT32_C( 50929524), INT32_C( 1190796045), -INT32_C( 1260669121), -INT32_C( 880909697), INT32_C( 328052011), -INT32_C( 159678995), -INT32_C( 1281749454) } }, { { INT32_C( 541635145), INT32_C( 495631645), INT32_C( 971684634), -INT32_C( 1688339645), -INT32_C( 1235316452), -INT32_C( 2132510313), INT32_C( 302272734), -INT32_C( 357777503), -INT32_C( 1895107214), -INT32_C( 861104718), -INT32_C( 1828350384), -INT32_C( 1389469039), -INT32_C( 1956344588), -INT32_C( 469022715), INT32_C( 989204632), INT32_C( 1092920271) }, UINT16_C(12184), { INT32_C( 2093238992), INT32_C( 470946839), -INT32_C( 696277848), -INT32_C( 1251773615), INT32_C( 184445182), INT32_C( 1125815884), -INT32_C( 169416240), -INT32_C( 81494485), -INT32_C( 545724216), -INT32_C( 1526953220), -INT32_C( 2139391186), -INT32_C( 332013842), -INT32_C( 1812581562), -INT32_C( 1697247030), INT32_C( 630177018), INT32_C( 35697466) }, { -INT32_C( 1729980261), INT32_C( 1362943523), INT32_C( 1204926297), -INT32_C( 600635499), INT32_C( 74394170), INT32_C( 882853178), INT32_C( 995700225), INT32_C( 2101181154), INT32_C( 890576658), INT32_C( 1451709181), -INT32_C( 1617078263), -INT32_C( 1703161505), INT32_C( 899607291), INT32_C( 812268847), INT32_C( 1315685228), INT32_C( 1355524157) }, { INT32_C( 541635145), INT32_C( 495631645), INT32_C( 971684634), -INT32_C( 42764331), INT32_C( 251620094), -INT32_C( 2132510313), INT32_C( 302272734), -INT32_C( 12747029), -INT32_C( 8519718), -INT32_C( 151004419), -INT32_C( 1610646737), -INT32_C( 25174017), -INT32_C( 1956344588), -INT32_C( 1157743121), INT32_C( 989204632), INT32_C( 1092920271) } }, { { -INT32_C( 981081656), INT32_C( 1008471091), -INT32_C( 992233115), -INT32_C( 2057415030), INT32_C( 1874591040), -INT32_C( 1499519686), INT32_C( 653527784), INT32_C( 2071380147), -INT32_C( 733938783), INT32_C( 1829788680), -INT32_C( 1607341034), -INT32_C( 2094624702), -INT32_C( 940383859), -INT32_C( 294743546), INT32_C( 1326736028), -INT32_C( 1010136542) }, UINT16_C( 2950), { -INT32_C( 1469608297), INT32_C( 747929083), INT32_C( 1153226270), INT32_C( 1277512025), INT32_C( 2145266449), INT32_C( 786528793), -INT32_C( 1799879478), INT32_C( 1604271815), -INT32_C( 955840564), -INT32_C( 1561027708), -INT32_C( 874073999), INT32_C( 186059770), INT32_C( 1351284022), INT32_C( 964586351), INT32_C( 936261231), INT32_C( 1100377461) }, { -INT32_C( 116810380), -INT32_C( 1449394888), -INT32_C( 1485536595), -INT32_C( 994931827), -INT32_C( 267109248), INT32_C( 388600488), INT32_C( 1028585416), -INT32_C( 645929883), -INT32_C( 1177384831), INT32_C( 845377157), INT32_C( 2111494128), -INT32_C( 482243485), INT32_C( 1909675465), -INT32_C( 1333199385), INT32_C( 1508759540), INT32_C( 1026780347) }, { -INT32_C( 981081656), -INT32_C( 1382023685), -INT32_C( 402860353), -INT32_C( 2057415030), INT32_C( 1874591040), -INT32_C( 1499519686), INT32_C( 653527784), -INT32_C( 536871193), -INT32_C( 2650163), -INT32_C( 1292370043), -INT32_C( 1607341034), -INT32_C( 346583045), -INT32_C( 940383859), -INT32_C( 294743546), INT32_C( 1326736028), -INT32_C( 1010136542) } }, { { INT32_C( 2046166516), INT32_C( 1672239475), -INT32_C( 1797159375), -INT32_C( 612949486), INT32_C( 1598835576), INT32_C( 1007670600), INT32_C( 1737948588), INT32_C( 1587857769), INT32_C( 1121426382), INT32_C( 631604212), INT32_C( 481920521), INT32_C( 569848233), -INT32_C( 998226820), -INT32_C( 989818855), -INT32_C( 164850035), INT32_C( 777310559) }, UINT16_C(11372), { INT32_C( 363815024), INT32_C( 1067235718), -INT32_C( 865057323), -INT32_C( 435098522), -INT32_C( 1317656144), INT32_C( 457638894), INT32_C( 1324132346), INT32_C( 1165646037), INT32_C( 1046096568), -INT32_C( 1199638813), -INT32_C( 1585123781), -INT32_C( 1953983013), -INT32_C( 1388511554), -INT32_C( 53967870), INT32_C( 4895787), -INT32_C( 1002060532) }, { -INT32_C( 771514385), -INT32_C( 779452010), INT32_C( 1249054319), INT32_C( 1658256291), -INT32_C( 82898183), -INT32_C( 1040722026), -INT32_C( 1748942197), -INT32_C( 161806841), INT32_C( 1019764390), INT32_C( 1326273247), INT32_C( 77168736), INT32_C( 1969647483), INT32_C( 410023298), -INT32_C( 673552308), -INT32_C( 1318151254), INT32_C( 1218955682) }, { INT32_C( 2046166516), INT32_C( 1672239475), -INT32_C( 831303681), -INT32_C( 422117401), INT32_C( 1598835576), -INT32_C( 604514306), -INT32_C( 538056709), INT32_C( 1587857769), INT32_C( 1121426382), INT32_C( 631604212), -INT32_C( 1516376453), -INT32_C( 1114117), -INT32_C( 998226820), -INT32_C( 2429874), -INT32_C( 164850035), INT32_C( 777310559) } }, { { INT32_C( 126119720), INT32_C( 559321537), -INT32_C( 1926893550), -INT32_C( 519926945), INT32_C( 1291481856), -INT32_C( 2061249317), INT32_C( 288788847), -INT32_C( 2091262629), INT32_C( 227204428), -INT32_C( 2127634065), INT32_C( 806245585), -INT32_C( 535686689), INT32_C( 1596722308), INT32_C( 1340362976), INT32_C( 1012996833), INT32_C( 1136638711) }, UINT16_C(19096), { INT32_C( 2116749136), -INT32_C( 1764557688), INT32_C( 1051177516), -INT32_C( 1102435438), INT32_C( 1846422154), -INT32_C( 611782534), -INT32_C( 325746900), INT32_C( 305540546), -INT32_C( 1131323084), -INT32_C( 1991023779), -INT32_C( 1480001003), -INT32_C( 1335487962), -INT32_C( 1222675395), -INT32_C( 1852659868), -INT32_C( 377673945), -INT32_C( 1979927723) }, { INT32_C( 1900449044), INT32_C( 117086704), -INT32_C( 1179794797), INT32_C( 308941781), -INT32_C( 322336376), INT32_C( 1467833136), -INT32_C( 683541886), -INT32_C( 1050591827), -INT32_C( 1171085366), -INT32_C( 742380224), -INT32_C( 997363985), INT32_C( 165083009), -INT32_C( 1326014592), INT32_C( 2097705978), INT32_C( 458508654), INT32_C( 1356707206) }, { INT32_C( 126119720), INT32_C( 559321537), -INT32_C( 1926893550), -INT32_C( 1100334121), -INT32_C( 288380022), -INT32_C( 2061249317), INT32_C( 288788847), -INT32_C( 747160081), INT32_C( 227204428), -INT32_C( 606900387), INT32_C( 806245585), -INT32_C( 1174997081), INT32_C( 1596722308), INT32_C( 1340362976), -INT32_C( 75665553), INT32_C( 1136638711) } }, { { -INT32_C( 1677062308), INT32_C( 745589564), -INT32_C( 1158611655), INT32_C( 1959053300), INT32_C( 1629796966), -INT32_C( 1679938515), -INT32_C( 71880075), INT32_C( 1145804008), -INT32_C( 522103133), INT32_C( 1510756641), INT32_C( 1108671566), INT32_C( 716626115), -INT32_C( 1064576366), INT32_C( 2086365447), -INT32_C( 2089282917), INT32_C( 1254671270) }, UINT16_C(43289), { INT32_C( 922368554), -INT32_C( 1456322412), INT32_C( 1082259082), -INT32_C( 1424354272), INT32_C( 789848532), INT32_C( 373469086), -INT32_C( 69605325), INT32_C( 1554314034), -INT32_C( 1030578643), INT32_C( 1886111206), -INT32_C( 609162053), -INT32_C( 729363968), -INT32_C( 1979409684), INT32_C( 2090878537), INT32_C( 1635219758), -INT32_C( 1698882708) }, { -INT32_C( 1621339975), -INT32_C( 804272363), -INT32_C( 1263812685), INT32_C( 1988637066), INT32_C( 335580363), INT32_C( 26321106), -INT32_C( 2040395751), -INT32_C( 585097436), -INT32_C( 2072150929), -INT32_C( 162231229), -INT32_C( 693436596), -INT32_C( 78892240), -INT32_C( 1844425537), INT32_C( 93561324), -INT32_C( 846465623), -INT32_C( 2069124332) }, { -INT32_C( 1073841477), INT32_C( 745589564), -INT32_C( 1158611655), -INT32_C( 6670934), INT32_C( 1058319839), -INT32_C( 1679938515), -INT32_C( 71880075), INT32_C( 1145804008), -INT32_C( 956301713), INT32_C( 1510756641), INT32_C( 1108671566), -INT32_C( 3212496), -INT32_C( 1064576366), INT32_C( 2108942317), -INT32_C( 2089282917), -INT32_C( 1631601796) } }, { { INT32_C( 1778919463), INT32_C( 6380980), -INT32_C( 1915352228), -INT32_C( 24632770), INT32_C( 1519425646), -INT32_C( 480238791), INT32_C( 749792024), -INT32_C( 1095738473), INT32_C( 942258563), INT32_C( 1916307990), -INT32_C( 721481835), -INT32_C( 1596815311), INT32_C( 1526358560), -INT32_C( 1656923515), -INT32_C( 573969082), -INT32_C( 862225847) }, UINT16_C(50227), { INT32_C( 1028540676), -INT32_C( 1152588613), -INT32_C( 1975288392), INT32_C( 418145053), -INT32_C( 93163075), -INT32_C( 672614386), INT32_C( 827404693), INT32_C( 49644797), -INT32_C( 1992342323), -INT32_C( 532378840), INT32_C( 644515592), -INT32_C( 1472309526), -INT32_C( 710758201), -INT32_C( 22246807), -INT32_C( 1188036933), INT32_C( 1354442114) }, { -INT32_C( 1847985559), -INT32_C( 1921966715), -INT32_C( 1900815452), -INT32_C( 130617039), INT32_C( 198039970), INT32_C( 520714595), -INT32_C( 86492809), -INT32_C( 951413922), INT32_C( 307766157), -INT32_C( 459290304), -INT32_C( 713927772), -INT32_C( 422729404), -INT32_C( 437151102), -INT32_C( 1962607853), -INT32_C( 1836721100), -INT32_C( 61223057) }, { -INT32_C( 1109394579), -INT32_C( 1082262081), -INT32_C( 1915352228), -INT32_C( 24632770), -INT32_C( 67110465), -INT32_C( 538314385), INT32_C( 749792024), -INT32_C( 1095738473), INT32_C( 942258563), INT32_C( 1916307990), -INT32_C( 142944340), -INT32_C( 1596815311), INT32_C( 1526358560), -INT32_C( 1656923515), -INT32_C( 1146093889), -INT32_C( 50597905) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi32(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_mask_or_epi32(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_or_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask16 k; const int32_t a[16]; const int32_t b[16]; const int32_t r[16]; } test_vec[] = { { UINT16_C(10069), { -INT32_C( 754578698), -INT32_C( 1968268126), -INT32_C( 940325655), -INT32_C( 1979375336), -INT32_C( 1770342166), -INT32_C( 2085670185), -INT32_C( 977031579), INT32_C( 653006895), -INT32_C( 973475038), INT32_C( 2035263631), -INT32_C( 2076163220), INT32_C( 1309558116), -INT32_C( 723220227), INT32_C( 542610363), INT32_C( 1910905410), INT32_C( 1436013106) }, { INT32_C( 1427804613), -INT32_C( 1513199303), INT32_C( 271126188), INT32_C( 1348417619), INT32_C( 2065974208), INT32_C( 429685975), -INT32_C( 913669482), INT32_C( 438182484), -INT32_C( 328255309), INT32_C( 1318141345), -INT32_C( 1637959093), -INT32_C( 1276199438), -INT32_C( 668069375), INT32_C( 636603022), -INT32_C( 1578206388), INT32_C( 1354435741) }, { -INT32_C( 685859849), INT32_C( 0), -INT32_C( 671363347), INT32_C( 0), -INT32_C( 8455190), INT32_C( 0), -INT32_C( 842289417), INT32_C( 0), -INT32_C( 301990989), INT32_C( 2145369519), -INT32_C( 1637942417), INT32_C( 0), INT32_C( 0), INT32_C( 637000639), INT32_C( 0), INT32_C( 0) } }, { UINT16_C(10820), { -INT32_C( 848829124), -INT32_C( 1836535245), INT32_C( 1045396048), INT32_C( 1548767277), INT32_C( 421977896), -INT32_C( 225086716), -INT32_C( 805424620), -INT32_C( 1074183549), INT32_C( 1535991847), INT32_C( 1659704594), -INT32_C( 1113572209), -INT32_C( 1256591219), -INT32_C( 741392433), -INT32_C( 960142158), INT32_C( 429245334), INT32_C( 769232389) }, { INT32_C( 8939246), INT32_C( 174224763), INT32_C( 1053229745), -INT32_C( 1024139021), -INT32_C( 761871584), -INT32_C( 1114023129), INT32_C( 618081823), -INT32_C( 1420709699), -INT32_C( 1850943210), INT32_C( 10161742), INT32_C( 71197457), INT32_C( 1673933379), INT32_C( 473324789), -INT32_C( 673525832), -INT32_C( 1157910019), INT32_C( 2003193185) }, { INT32_C( 0), INT32_C( 0), INT32_C( 1053784817), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 268485089), INT32_C( 0), INT32_C( 0), INT32_C( 1660886878), INT32_C( 0), -INT32_C( 136318257), INT32_C( 0), -INT32_C( 673189958), INT32_C( 0), INT32_C( 0) } }, { UINT16_C( 4646), { -INT32_C( 1558153976), -INT32_C( 1291439755), -INT32_C( 52016587), -INT32_C( 480715859), -INT32_C( 760082184), -INT32_C( 477908761), -INT32_C( 802102166), INT32_C( 1675777627), INT32_C( 1074136011), INT32_C( 1794379060), INT32_C( 73849431), -INT32_C( 1377255755), -INT32_C( 1233151281), -INT32_C( 1248263606), INT32_C( 1099287014), -INT32_C( 341546976) }, { -INT32_C( 1624462485), INT32_C( 252256440), -INT32_C( 1357680646), -INT32_C( 44172498), -INT32_C( 525083498), -INT32_C( 980070945), INT32_C( 906435350), -INT32_C( 299717757), INT32_C( 244141654), INT32_C( 1746769774), INT32_C( 873935110), -INT32_C( 1036946388), INT32_C( 816047441), INT32_C( 1224030258), -INT32_C( 696255405), -INT32_C( 20667992) }, { INT32_C( 0), -INT32_C( 1089523203), -INT32_C( 561153), INT32_C( 0), INT32_C( 0), -INT32_C( 409469441), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1795006334), INT32_C( 0), INT32_C( 0), -INT32_C( 1224736801), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { UINT16_C(21231), { INT32_C( 719936781), -INT32_C( 597954619), INT32_C( 1431406628), -INT32_C( 314924470), -INT32_C( 953783086), INT32_C( 885291445), INT32_C( 349531216), INT32_C( 2003223658), -INT32_C( 425570527), INT32_C( 1673723199), -INT32_C( 793242747), -INT32_C( 2017594443), INT32_C( 340779871), -INT32_C( 1387785379), -INT32_C( 373219969), INT32_C( 39921889) }, { -INT32_C( 1226308745), -INT32_C( 2045138176), INT32_C( 1951847102), INT32_C( 620434373), INT32_C( 1413040887), -INT32_C( 603881123), -INT32_C( 2134457697), INT32_C( 1652762602), INT32_C( 706243370), -INT32_C( 709873130), -INT32_C( 917961212), INT32_C( 300762138), -INT32_C( 328915313), INT32_C( 1204381352), INT32_C( 331845416), -INT32_C( 529184074) }, { -INT32_C( 1092001921), -INT32_C( 564400187), INT32_C( 1968691902), -INT32_C( 302271537), INT32_C( 0), -INT32_C( 54134275), -INT32_C( 1797787937), INT32_C( 2011686890), INT32_C( 0), -INT32_C( 135069889), INT32_C( 0), INT32_C( 0), -INT32_C( 59775009), INT32_C( 0), -INT32_C( 70803585), INT32_C( 0) } }, { UINT16_C(36278), { -INT32_C( 1145058294), -INT32_C( 356400223), INT32_C( 2049956748), INT32_C( 1369489132), INT32_C( 1941391530), INT32_C( 1459806351), INT32_C( 1755429107), -INT32_C( 1544202344), -INT32_C( 983648988), INT32_C( 95428472), -INT32_C( 411049989), INT32_C( 1194925981), -INT32_C( 138678168), -INT32_C( 1018249776), INT32_C( 237760630), INT32_C( 1806770503) }, { INT32_C( 1311838166), INT32_C( 710140207), INT32_C( 1561449152), INT32_C( 1520716530), INT32_C( 189882171), -INT32_C( 1815109603), -INT32_C( 677250416), -INT32_C( 247246053), -INT32_C( 1858046878), INT32_C( 364614485), INT32_C( 1483918694), INT32_C( 1387402775), -INT32_C( 1822555274), INT32_C( 858140067), INT32_C( 1124845351), INT32_C( 2100579866) }, { INT32_C( 0), -INT32_C( 355212369), INT32_C( 2134891468), INT32_C( 0), INT32_C( 2079809467), -INT32_C( 674242401), INT32_C( 0), -INT32_C( 201893989), -INT32_C( 715197082), INT32_C( 0), -INT32_C( 8396801), INT32_C( 1471881119), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 2142596959) } }, { UINT16_C(29890), { -INT32_C( 905439474), -INT32_C( 1634243028), INT32_C( 2025107142), INT32_C( 1601907201), -INT32_C( 477356099), INT32_C( 1571533650), INT32_C( 732677622), INT32_C( 1369402690), -INT32_C( 1340364924), -INT32_C( 615599595), INT32_C( 1632830304), -INT32_C( 339685587), INT32_C( 1087261934), -INT32_C( 157386496), -INT32_C( 2128524993), INT32_C( 1003667895) }, { INT32_C( 2129456488), INT32_C( 5847711), INT32_C( 1801563453), INT32_C( 1784029820), INT32_C( 1856644206), -INT32_C( 580564834), INT32_C( 1230931602), -INT32_C( 1333513913), -INT32_C( 1104252898), -INT32_C( 390166613), -INT32_C( 1336729804), -INT32_C( 1340430015), INT32_C( 1813956046), -INT32_C( 1622572275), INT32_C( 1374201610), -INT32_C( 150901288) }, { INT32_C( 0), -INT32_C( 1629520193), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1811924982), -INT32_C( 241205945), INT32_C( 0), INT32_C( 0), -INT32_C( 246210700), INT32_C( 0), INT32_C( 1826541038), -INT32_C( 2098419), -INT32_C( 773198017), INT32_C( 0) } }, { UINT16_C(12253), { INT32_C( 1941407925), -INT32_C( 997004431), -INT32_C( 1234250596), -INT32_C( 1568981884), -INT32_C( 249132632), INT32_C( 295186472), -INT32_C( 2088865407), INT32_C( 498228072), INT32_C( 1435527652), -INT32_C( 250011051), INT32_C( 2057799670), INT32_C( 1797071811), -INT32_C( 732150868), -INT32_C( 186256013), -INT32_C( 831036570), -INT32_C( 1544869185) }, { -INT32_C( 386368621), -INT32_C( 1797713507), INT32_C( 1544454553), INT32_C( 1355295652), -INT32_C( 517725074), INT32_C( 2144733721), INT32_C( 743263597), INT32_C( 198129783), INT32_C( 1374930867), INT32_C( 1927663065), -INT32_C( 221318322), -INT32_C( 1925016033), -INT32_C( 730896709), -INT32_C( 564902544), INT32_C( 168468882), -INT32_C( 1927947558) }, { -INT32_C( 67109961), INT32_C( 0), -INT32_C( 26290787), -INT32_C( 218415196), -INT32_C( 249123346), INT32_C( 0), -INT32_C( 1350599187), INT32_C( 536836991), INT32_C( 1442050039), -INT32_C( 201461795), -INT32_C( 84936706), -INT32_C( 278939681), INT32_C( 0), -INT32_C( 17435277), INT32_C( 0), INT32_C( 0) } }, { UINT16_C( 2209), { -INT32_C( 1009419554), -INT32_C( 1145625620), INT32_C( 1464981013), -INT32_C( 759296925), INT32_C( 890711777), INT32_C( 399944204), -INT32_C( 906907468), INT32_C( 483496510), -INT32_C( 102717683), -INT32_C( 525035574), -INT32_C( 801700243), -INT32_C( 207358702), INT32_C( 807975460), INT32_C( 407371620), -INT32_C( 304006993), -INT32_C( 670387253) }, { INT32_C( 617736794), -INT32_C( 301693311), -INT32_C( 1631699828), INT32_C( 1435656497), -INT32_C( 2138654180), INT32_C( 1771687353), -INT32_C( 782861818), -INT32_C( 2018942931), -INT32_C( 877888950), -INT32_C( 1933987840), INT32_C( 506165484), -INT32_C( 176964135), INT32_C( 829880695), -INT32_C( 845541433), -INT32_C( 1214320503), -INT32_C( 1690417327) }, { -INT32_C( 405407010), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 2145382333), INT32_C( 0), -INT32_C( 1611009473), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 135004709), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_maskz_or_epi32(test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_or_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t a[8]; const int64_t b[8]; const int64_t r[8]; } test_vec[] = { { { -INT64_C( 6429398815427941624), -INT64_C( 668819823809793933), -INT64_C( 1864586420094752608), -INT64_C( 6964511610407502025), INT64_C( 1602969904378442689), -INT64_C( 3335834506442223148), -INT64_C( 3507572279834119964), -INT64_C( 4859897487314131624) }, { INT64_C( 7302747944067050655), -INT64_C( 79420915697099292), INT64_C( 5850102220228026311), -INT64_C( 3577600625560452446), INT64_C( 2553937083690429820), INT64_C( 3737175614931113157), -INT64_C( 6501014216955967619), INT64_C( 6107615400781023429) }, { -INT64_C( 1738741473842712673), -INT64_C( 74344604006875657), -INT64_C( 630577615488516121), -INT64_C( 2352603316211171401), INT64_C( 3999175301830475773), -INT64_C( 865327213124397611), -INT64_C( 1164220517755127811), -INT64_C( 230035441899541027) } }, { { INT64_C( 120420968188400456), INT64_C( 9080872039193043814), INT64_C( 2282686581501630468), -INT64_C( 3174801842188849212), -INT64_C( 8510388074405211991), -INT64_C( 8761365484661914134), -INT64_C( 2489862664037179912), INT64_C( 326908290568652074) }, { -INT64_C( 2997762464526244679), INT64_C( 1773621245074587796), -INT64_C( 8034742351829521528), INT64_C( 1928331604867348322), INT64_C( 969781872701123507), -INT64_C( 1561692022419212773), -INT64_C( 224605373488972308), -INT64_C( 6089856293668313087) }, { -INT64_C( 2886843662796720135), INT64_C( 9123658572182447094), -INT64_C( 6917529583168341108), -INT64_C( 2597767831452385306), -INT64_C( 8217556958887019589), -INT64_C( 1262166901253998597), -INT64_C( 147990299458667012), -INT64_C( 5765315608657154773) } }, { { -INT64_C( 1680476196060892634), INT64_C( 9097683093164380661), -INT64_C( 2825529556040889293), INT64_C( 2701798032050652123), INT64_C( 8997473558825650216), -INT64_C( 6245062807636475836), -INT64_C( 5436054925677746719), -INT64_C( 6348099103472760381) }, { INT64_C( 7120324182048287949), -INT64_C( 7378709589531188103), INT64_C( 1459412915824169139), -INT64_C( 7362895231201130582), INT64_C( 6983602974306954601), INT64_C( 7256400371483943777), -INT64_C( 8116277192105655047), INT64_C( 6439173234401752055) }, { -INT64_C( 1513772562375573777), -INT64_C( 10697444992223747), -INT64_C( 2537228811144930125), -INT64_C( 4755871579609577477), INT64_C( 9007199151526374761), -INT64_C( 1299290726435970203), -INT64_C( 4620850456435415559), -INT64_C( 125345336394761) } }, { { INT64_C( 7511664463165082103), INT64_C( 8071354844583202542), INT64_C( 2066614722928243770), -INT64_C( 1759433076923996341), INT64_C( 3455672968657860229), -INT64_C( 1694811768610719854), -INT64_C( 7564158082662058811), -INT64_C( 6344051108417968867) }, { INT64_C( 5858383487294487261), -INT64_C( 5605338946010932320), INT64_C( 1219810240101104906), INT64_C( 310768335838887816), INT64_C( 5538425528129914061), INT64_C( 1669655743652575116), -INT64_C( 1004484776281913332), INT64_C( 884372658259242265) }, { INT64_C( 8754975409943346175), -INT64_C( 993052514467471378), INT64_C( 2085087425586986298), -INT64_C( 1741416447178965045), INT64_C( 8069734670988211917), -INT64_C( 37190235702003810), -INT64_C( 644016469005304627), -INT64_C( 5767440692609618659) } }, { { INT64_C( 7235442747905697904), -INT64_C( 2319343003134786644), -INT64_C( 5751397340793586223), INT64_C( 2839298246934907419), INT64_C( 2367676744825372574), -INT64_C( 7898282672855074639), INT64_C( 5489320810328769254), -INT64_C( 962909039784157498) }, { INT64_C( 5089705937810876174), INT64_C( 743143574359145468), -INT64_C( 2238934313749504400), -INT64_C( 5613173280512043565), -INT64_C( 4268330770017975320), INT64_C( 3063242394876153171), -INT64_C( 7859703023775577615), INT64_C( 3577217418109012302) }, { INT64_C( 7416149706616205182), -INT64_C( 2319303265996538884), -INT64_C( 1085367923656146959), -INT64_C( 5224396885334302757), -INT64_C( 1955688174701903874), -INT64_C( 4979859703327900173), -INT64_C( 2382969628884251657), -INT64_C( 889496806583476274) } }, { { INT64_C( 352250201419733336), INT64_C( 3018534595340232190), -INT64_C( 5307738001475588345), INT64_C( 4531831678781886825), INT64_C( 277887291062326054), INT64_C( 6559386569151370918), -INT64_C( 4148808817127179186), -INT64_C( 8670171034554919799) }, { -INT64_C( 938937519609869328), -INT64_C( 6755724536193108849), INT64_C( 6933227680470425364), -INT64_C( 89653021795897127), -INT64_C( 1673109969782497400), -INT64_C( 8821414289885914977), -INT64_C( 8385652262546510487), INT64_C( 8568247751283208413) }, { -INT64_C( 649791589396325384), -INT64_C( 6052840665212258817), -INT64_C( 686827530652680425), -INT64_C( 79518949804822023), -INT64_C( 1450181770996637778), -INT64_C( 2335222556925108545), -INT64_C( 3464255479336206993), -INT64_C( 581562555244251939) } }, { { INT64_C( 7342858363346869159), INT64_C( 2783203391673250730), -INT64_C( 6983105264582486567), INT64_C( 854402329842736503), -INT64_C( 2127121551310772658), -INT64_C( 7455536841593335131), INT64_C( 7823943280686788146), -INT64_C( 2411769338864263657) }, { INT64_C( 3093530412248699603), -INT64_C( 5074125731344291009), INT64_C( 821136122698447108), -INT64_C( 5346037864804698528), INT64_C( 7949045959357481633), -INT64_C( 586873182692319846), INT64_C( 3150362272630606266), -INT64_C( 1214869851901050997) }, { INT64_C( 8065805954613763063), -INT64_C( 4638710090095002689), -INT64_C( 6956013085732717091), -INT64_C( 4620835162231406729), -INT64_C( 1262416098556647697), -INT64_C( 10240853918359617), INT64_C( 8051450971285614522), -INT64_C( 24788633565577313) } }, { { INT64_C( 3263251090716877996), INT64_C( 8083162768771822915), -INT64_C( 168649198812717646), INT64_C( 6580477785673483285), INT64_C( 1166855396492994417), INT64_C( 6655040048003973173), INT64_C( 7277678794420877456), INT64_C( 7516255955370600774) }, { INT64_C( 4278030445971342977), -INT64_C( 6402850831071796194), -INT64_C( 4202799087610102279), INT64_C( 1112681084295475986), -INT64_C( 7219062955112014193), INT64_C( 8151382797754562144), -INT64_C( 8950229773360445051), INT64_C( 2122882682639385816) }, { INT64_C( 4566649574931687085), -INT64_C( 635665364676594337), -INT64_C( 167479262595645957), INT64_C( 6877996840495281943), -INT64_C( 7209773731122511873), INT64_C( 9034212056335119989), -INT64_C( 1729409199123072619), INT64_C( 9043226952094182878) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_or_epi64(a, b); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_or_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t src[8]; const simde__mmask8 k; const int64_t a[8]; const int64_t b[8]; const int64_t r[8]; } test_vec[] = { { { INT64_C( 3661319176986321035), -INT64_C( 1528367236969856416), -INT64_C( 5104464832910522267), -INT64_C( 726225875027442177), -INT64_C( 9014696675506224050), -INT64_C( 9051454475470047648), -INT64_C( 7795981007606036274), INT64_C( 6143071551276653025) }, UINT8_C( 23), { -INT64_C( 2050057107331807281), -INT64_C( 5815130737346673481), INT64_C( 5364304135242080628), -INT64_C( 3735552285056320386), -INT64_C( 3207261001712377427), INT64_C( 8259492860388211849), INT64_C( 3997073007981093630), INT64_C( 2160162594097776945) }, { INT64_C( 126158479260381905), -INT64_C( 6115446643842019152), -INT64_C( 3935210835850746765), -INT64_C( 1393598984056493285), INT64_C( 4853923356578766820), INT64_C( 5909307511037644458), INT64_C( 5303518722588138295), INT64_C( 7392232739012120718) }, { -INT64_C( 2032038225924589601), -INT64_C( 5805823884632582985), -INT64_C( 3786446772757633673), -INT64_C( 726225875027442177), -INT64_C( 3207234049702002707), -INT64_C( 9051454475470047648), -INT64_C( 7795981007606036274), INT64_C( 6143071551276653025) } }, { { -INT64_C( 5498580644180955173), -INT64_C( 1324888738294488212), -INT64_C( 8260069012969546521), -INT64_C( 8516644795866271002), INT64_C( 1943488705661997116), -INT64_C( 725077280813678004), -INT64_C( 1609078079256613601), -INT64_C( 1526402865792097894) }, UINT8_C(176), { INT64_C( 3007692236954415048), INT64_C( 6701123904443236317), -INT64_C( 6252604006634414793), INT64_C( 2048083762722865819), INT64_C( 2359967858908056176), INT64_C( 6563714040126830346), INT64_C( 6453153310558549276), -INT64_C( 7554927845201025609) }, { INT64_C( 698040348146906443), -INT64_C( 3790102690545815577), INT64_C( 5505156404407722062), INT64_C( 5251899737511525155), -INT64_C( 1784405116429397709), INT64_C( 2859601976980864821), -INT64_C( 4496111277563420743), -INT64_C( 6675762436323413738) }, { -INT64_C( 5498580644180955173), -INT64_C( 1324888738294488212), -INT64_C( 8260069012969546521), -INT64_C( 8516644795866271002), -INT64_C( 1730280267062453389), INT64_C( 9205357625425985343), -INT64_C( 1609078079256613601), -INT64_C( 5224195981274253897) } }, { { -INT64_C( 1448449652967460354), -INT64_C( 1580342141860718318), -INT64_C( 703174105201359394), INT64_C( 3807339619423897795), -INT64_C( 3443535460487156588), INT64_C( 7138148802329511411), -INT64_C( 2456682694661159150), -INT64_C( 1742921284027617180) }, UINT8_C(146), { INT64_C( 7858536605820203522), -INT64_C( 8450066526728211236), -INT64_C( 8812274993042510540), INT64_C( 2309150471083347858), INT64_C( 6541562564319206642), INT64_C( 4850022823630191232), INT64_C( 542199368696197739), INT64_C( 1357143831010631574) }, { -INT64_C( 2323799827673247000), -INT64_C( 1278757848781503250), -INT64_C( 5665963620397296669), INT64_C( 5910195416529821395), -INT64_C( 7743243952069250739), INT64_C( 48926196180856124), -INT64_C( 2057381896675353690), INT64_C( 6940569379697776789) }, { -INT64_C( 1448449652967460354), -INT64_C( 1226116251400342274), -INT64_C( 703174105201359394), INT64_C( 3807339619423897795), -INT64_C( 2392962817406960129), INT64_C( 7138148802329511411), -INT64_C( 2456682694661159150), INT64_C( 8274760820845895575) } }, { { -INT64_C( 4717636599774705331), -INT64_C( 7862956277528454717), -INT64_C( 8810793557811840571), -INT64_C( 5182845562633547874), INT64_C( 8655129809156513502), -INT64_C( 8445873536746546612), -INT64_C( 9157761006655562380), INT64_C( 2363057480066559400) }, UINT8_C( 98), { INT64_C( 4032004374184492918), INT64_C( 1411463924934257895), INT64_C( 5811463090775456485), -INT64_C( 410194232069670488), -INT64_C( 2664275056143473359), INT64_C( 7912142335821808323), -INT64_C( 2107665682917506163), INT64_C( 3839350586672632522) }, { INT64_C( 4994785017172650882), INT64_C( 7926843187899878924), INT64_C( 5309073803146325392), INT64_C( 2029400058945305840), INT64_C( 4869203973805059531), -INT64_C( 1188711299264572879), -INT64_C( 1437378370731352506), INT64_C( 5807402188424652259) }, { -INT64_C( 4717636599774705331), INT64_C( 9194045827134222063), -INT64_C( 8810793557811840571), -INT64_C( 5182845562633547874), INT64_C( 8655129809156513502), -INT64_C( 1167034974457589005), -INT64_C( 1239202385541472305), INT64_C( 2363057480066559400) } }, { { INT64_C( 4517725110942902839), -INT64_C( 6336335555842766399), INT64_C( 1447478681830017582), INT64_C( 4031892649141138975), -INT64_C( 9034405839403380397), -INT64_C( 1158353365791149855), -INT64_C( 882088891294066307), -INT64_C( 3504490474468060634) }, UINT8_C( 29), { -INT64_C( 4667029045628751869), -INT64_C( 4308696435055886588), -INT64_C( 657654580496571768), INT64_C( 7407085908525188386), INT64_C( 1081141035830127217), INT64_C( 8622980344830132347), -INT64_C( 1263805274432454180), -INT64_C( 7287555194217502700) }, { INT64_C( 1525554353750646289), -INT64_C( 4052775759662812837), INT64_C( 1558454381270528486), -INT64_C( 5374365949207233370), INT64_C( 8914872304015656200), INT64_C( 4441260993943024367), INT64_C( 5660620830631061880), -INT64_C( 8347834756869776691) }, { -INT64_C( 4666859652051386861), -INT64_C( 6336335555842766399), -INT64_C( 576532612561505298), -INT64_C( 582253082654655066), INT64_C( 9203102728485793657), -INT64_C( 1158353365791149855), -INT64_C( 882088891294066307), -INT64_C( 3504490474468060634) } }, { { -INT64_C( 1915465842771953458), INT64_C( 3399199257075330967), INT64_C( 8737043393532140619), -INT64_C( 5376996792679637424), INT64_C( 1000939817634336777), INT64_C( 5774220578764071997), INT64_C( 5866364029680994883), INT64_C( 7961950062473794786) }, UINT8_C( 23), { -INT64_C( 690006965118734815), INT64_C( 5476461271299415957), -INT64_C( 5839587232107859825), INT64_C( 6729364364179259076), INT64_C( 2249821286577271023), INT64_C( 1163861103514235992), -INT64_C( 3038495911641386035), INT64_C( 6444796657872791766) }, { INT64_C( 5556831668248771700), -INT64_C( 6585865916961523192), -INT64_C( 6005372159066849855), -INT64_C( 4517902839317542518), -INT64_C( 4220474570919909077), -INT64_C( 6678018137657469738), INT64_C( 5271777547077885606), INT64_C( 4824634032580847846) }, { -INT64_C( 36628065222132107), -INT64_C( 1397719145344139363), -INT64_C( 5837298578245157425), -INT64_C( 5376996792679637424), -INT64_C( 2342438332237088273), INT64_C( 5774220578764071997), INT64_C( 5866364029680994883), INT64_C( 7961950062473794786) } }, { { INT64_C( 7419346753861956468), -INT64_C( 3032499050133568155), INT64_C( 7173524459889975172), -INT64_C( 3910097177235961246), INT64_C( 6068022085240364863), INT64_C( 3691409832953989430), INT64_C( 7549444775824821221), INT64_C( 2877130456848030926) }, UINT8_C(189), { INT64_C( 8434961914935441128), INT64_C( 4181408283957163164), -INT64_C( 8385236112120976945), -INT64_C( 7213189133895481974), INT64_C( 3400362389193134736), INT64_C( 8998289558329185784), INT64_C( 6015666477394239681), INT64_C( 1730615395330354870) }, { -INT64_C( 885514798386415373), INT64_C( 2414626903070213977), INT64_C( 8626518373486940571), -INT64_C( 8639341606507367558), INT64_C( 5425647820748090972), -INT64_C( 338100511992322612), INT64_C( 7412105766290546596), -INT64_C( 114978529918468790) }, { -INT64_C( 594750046436589829), -INT64_C( 3032499050133568155), -INT64_C( 20357526653371937), -INT64_C( 7205760816116402182), INT64_C( 8033234211055861468), -INT64_C( 4824735702452740), INT64_C( 7549444775824821221), -INT64_C( 114872941904265218) } }, { { INT64_C( 625615774204458609), INT64_C( 8566553283545857927), -INT64_C( 6758573349697469982), -INT64_C( 7919606190865015434), -INT64_C( 7520268040824001185), INT64_C( 4318214654586181540), -INT64_C( 532481923674945118), INT64_C( 7318111929964240347) }, UINT8_C(149), { INT64_C( 8776737915996077957), -INT64_C( 378849714569591622), INT64_C( 8228485904037885975), INT64_C( 4702846238338854277), -INT64_C( 2380090673657550627), -INT64_C( 1105711942724826500), INT64_C( 2105563224312456048), INT64_C( 7755777512957522084) }, { -INT64_C( 4397516249695121930), INT64_C( 1978513632449786540), -INT64_C( 3030895946949936582), -INT64_C( 4628900207231910967), INT64_C( 4245998957238004964), -INT64_C( 5942160567249708092), INT64_C( 5473403526820358895), -INT64_C( 1823284935291426262) }, { -INT64_C( 288794531985358857), INT64_C( 8566553283545857927), -INT64_C( 580546031377401281), -INT64_C( 7919606190865015434), -INT64_C( 72908966077928195), INT64_C( 4318214654586181540), -INT64_C( 532481923674945118), -INT64_C( 1174754992533554514) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi64(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_mask_or_epi64(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_or_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const int64_t a[8]; const int64_t b[8]; const int64_t r[8]; } test_vec[] = { { UINT8_C(243), { INT64_C( 6607773143298311001), INT64_C( 747251710714923559), -INT64_C( 651814038446241381), INT64_C( 8581770847649122825), INT64_C( 7934284021902262550), INT64_C( 8289319149355930503), INT64_C( 3001449180940391964), INT64_C( 4515533899237856276) }, { -INT64_C( 513943133449407448), -INT64_C( 4505286668915895762), INT64_C( 3395742826455355607), -INT64_C( 282304193843007825), INT64_C( 6821627629316871233), INT64_C( 8918217582631560028), INT64_C( 70529624180930224), -INT64_C( 6065807279509775104) }, { -INT64_C( 288335948595349639), -INT64_C( 3783368943172363729), INT64_C( 0), INT64_C( 0), INT64_C( 9133158688141268311), INT64_C( 8920469764648055775), INT64_C( 3026377657701299900), -INT64_C( 4612831250118353644) } }, { UINT8_C( 1), { -INT64_C( 5916729309565788611), -INT64_C( 2340802351535626765), -INT64_C( 6498641173007020482), -INT64_C( 5551324850384034809), -INT64_C( 6354546414697503463), INT64_C( 3137880762619666690), -INT64_C( 920480431848309941), INT64_C( 2132696337155049840) }, { -INT64_C( 1054667020777172734), INT64_C( 2399336497810298931), -INT64_C( 8952331340402571293), -INT64_C( 8621859435101771209), -INT64_C( 2399964864933094702), INT64_C( 6370987589993713268), INT64_C( 4983490336306700178), INT64_C( 7179822547708514708) }, { -INT64_C( 144220815423840449), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0) } }, { UINT8_C(116), { INT64_C( 2590131453595428065), -INT64_C( 4189770811124925443), INT64_C( 4149812691542966297), INT64_C( 3956181737923129753), INT64_C( 165407252905396050), -INT64_C( 1204109446590307104), -INT64_C( 3492953594650288732), -INT64_C( 3259152023684951750) }, { -INT64_C( 200333092127606735), INT64_C( 5471818189430293034), -INT64_C( 6489652522350222869), -INT64_C( 8087346319675083920), INT64_C( 3354335113734700401), -INT64_C( 4002821708591526015), -INT64_C( 7273356092679582465), -INT64_C( 5910324440049368307) }, { INT64_C( 0), INT64_C( 0), -INT64_C( 4758299390809218565), INT64_C( 0), INT64_C( 3373195054405894003), -INT64_C( 1190288546101006367), -INT64_C( 2337410815097102849), INT64_C( 0) } }, { UINT8_C( 12), { -INT64_C( 2864220326792774464), -INT64_C( 1762457922283673492), INT64_C( 5065896484844794982), INT64_C( 8328615529749329978), INT64_C( 5768393017907127660), -INT64_C( 6368824068940315686), -INT64_C( 7520672327153504424), -INT64_C( 4678758067609787412) }, { -INT64_C( 8102255326642551973), INT64_C( 2786851643915069892), -INT64_C( 5510677752417447023), -INT64_C( 5162986244131412153), INT64_C( 1435388577372917158), -INT64_C( 6564906772021628688), -INT64_C( 3327850568640928169), INT64_C( 3230985858688164544) }, { INT64_C( 0), INT64_C( 0), -INT64_C( 590059752909506569), -INT64_C( 297958856714895489), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0) } }, { UINT8_C( 17), { -INT64_C( 6610032513410692915), INT64_C( 5594978057287238282), -INT64_C( 5204005490663111699), -INT64_C( 8581493933975214829), INT64_C( 4902204179320948720), INT64_C( 456614092410699298), -INT64_C( 1709162420342280708), -INT64_C( 2450617988190075718) }, { INT64_C( 2156461137493875438), -INT64_C( 5465262492535586809), -INT64_C( 4366648816288058199), -INT64_C( 4982609783682489072), INT64_C( 7721228476751223568), INT64_C( 8231933372063829456), INT64_C( 2284545533172286183), INT64_C( 3675770877265112443) }, { -INT64_C( 4761012908859598097), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 8011746500201635824), INT64_C( 0), INT64_C( 0), INT64_C( 0) } }, { UINT8_C( 17), { -INT64_C( 5582809435615183515), -INT64_C( 3744762563012063892), -INT64_C( 7371835497483863841), INT64_C( 6853898065162336278), -INT64_C( 2358591587479154131), -INT64_C( 8161079221511339865), -INT64_C( 6250023680286156291), INT64_C( 2073858079106334829) }, { INT64_C( 2068827291285117796), -INT64_C( 5192426373283728205), -INT64_C( 7664946069928561564), -INT64_C( 8380983232254093916), INT64_C( 373824410368499615), INT64_C( 7943320640690863819), -INT64_C( 1816632622552081140), INT64_C( 3102463971999532891) }, { -INT64_C( 4704586547721142427), INT64_C( 0), INT64_C( 0), INT64_C( 0), -INT64_C( 2345074191453724737), INT64_C( 0), INT64_C( 0), INT64_C( 0) } }, { UINT8_C(137), { -INT64_C( 5885112414594449552), -INT64_C( 5643532512505898935), INT64_C( 8497405936442532747), -INT64_C( 1295018545582674093), INT64_C( 903476142813930091), -INT64_C( 1051488916752940894), -INT64_C( 8691544632263025857), -INT64_C( 555617802753056034) }, { INT64_C( 3296173732352213486), INT64_C( 573168574036995229), -INT64_C( 4439328891215776088), INT64_C( 7223045329763854775), INT64_C( 784422852770533674), -INT64_C( 4895130134412146879), INT64_C( 1305760606038066551), -INT64_C( 3440305195935384217) }, { -INT64_C( 5764609740397346818), INT64_C( 0), INT64_C( 0), -INT64_C( 1279185573845766153), INT64_C( 0), INT64_C( 0), INT64_C( 0), -INT64_C( 555177849903415297) } }, { UINT8_C(165), { INT64_C( 3345080952170972503), -INT64_C( 8312238056190872021), INT64_C( 6629008511800311960), INT64_C( 8325415308185688911), -INT64_C( 1879114047830451114), -INT64_C( 4137482948338176127), -INT64_C( 4454240953403868491), INT64_C( 6468629226322276662) }, { -INT64_C( 1194857502905769012), -INT64_C( 1718251684327097038), INT64_C( 1530075843475047430), -INT64_C( 2416132743736335844), -INT64_C( 4184459149005451377), INT64_C( 8532153540123419981), INT64_C( 3225750579480306896), -INT64_C( 1730011902885811443) }, { -INT64_C( 1193700811690481697), INT64_C( 0), INT64_C( 6917524492155520158), INT64_C( 0), INT64_C( 0), -INT64_C( 649365526355251251), INT64_C( 0), -INT64_C( 576868332913857) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_maskz_or_epi64(test_vec[i].k, a, b); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_or_si512(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C( 1982508443), INT32_C( -368650443), INT32_C( -190462634), INT32_C( 1539812062), INT32_C( 1356046477), INT32_C( 862189546), INT32_C( 1762309251), INT32_C(-1019483096), INT32_C( 1873631110), INT32_C( -15642982), INT32_C( 1155728159), INT32_C( -93367878), INT32_C( -146552290), INT32_C(-1970782191), INT32_C(-1003501987), INT32_C( 202140132)), simde_mm512_set_epi32(INT32_C(-1200042690), INT32_C( 641010033), INT32_C( 1397158609), INT32_C(-1526446074), INT32_C( 334871021), INT32_C(-1650252171), INT32_C( 346015053), INT32_C( -55637966), INT32_C( 208214931), INT32_C(-1364363811), INT32_C(-1538776181), INT32_C( 1682811579), INT32_C( 1143917073), INT32_C( 1704546357), INT32_C( -526995661), INT32_C( 1822747438)), simde_mm512_set_epi32(INT32_C( -25498177), INT32_C( -298328203), INT32_C( -135792681), INT32_C( -3675426), INT32_C( 1408743917), INT32_C(-1075578881), INT32_C( 2107635151), INT32_C( -4199366), INT32_C( 1877827479), INT32_C( -4358177), INT32_C( -454351969), INT32_C( -26215493), INT32_C( -143668705), INT32_C( -275165131), INT32_C( -457183361), INT32_C( 1823271918)) }, { simde_mm512_set_epi32(INT32_C( 1700241441), INT32_C( 1263470492), INT32_C( 1650149592), INT32_C(-1970638798), INT32_C( 779737204), INT32_C( 613390512), INT32_C( 1903573583), INT32_C( 1579794471), INT32_C( -751717923), INT32_C( 1508394249), INT32_C( 1822398709), INT32_C(-1695423756), INT32_C( -107711426), INT32_C( -896105060), INT32_C( 529237571), INT32_C( 178579675)), simde_mm512_set_epi32(INT32_C( -875424608), INT32_C( 1367415512), INT32_C( 131368493), INT32_C( 402371418), INT32_C( 1616362823), INT32_C(-2005702634), INT32_C( -384303225), INT32_C( 1749032691), INT32_C(-1558748500), INT32_C( 1913654477), INT32_C(-1008865960), INT32_C(-1123849687), INT32_C( 83161050), INT32_C( 1602030576), INT32_C( -421382217), INT32_C( 749511091)), simde_mm512_set_epi32(INT32_C( -271076191), INT32_C( 1540300764), INT32_C( 1742720765), INT32_C(-1610878086), INT32_C( 1853881719), INT32_C(-1392510794), INT32_C( -109576241), INT32_C( 2120867575), INT32_C( -214436867), INT32_C( 2079870413), INT32_C( -270533123), INT32_C(-1074533635), INT32_C( -34275330), INT32_C( -536899588), INT32_C( -1329161), INT32_C( 783084027)) }, { simde_mm512_set_epi32(INT32_C( -198115845), INT32_C( 1907998628), INT32_C(-1258846188), INT32_C( 680092843), INT32_C( 1806004257), INT32_C( 809421067), INT32_C( 530040867), INT32_C(-1316120429), INT32_C( 457386668), INT32_C( 815983260), INT32_C( 1763745819), INT32_C( 2000730006), INT32_C(-1234863927), INT32_C( 1084046116), INT32_C( 472797794), INT32_C(-1156282262)), simde_mm512_set_epi32(INT32_C( 1507578237), INT32_C( 1923983420), INT32_C( 1994590915), INT32_C( 1646522822), INT32_C(-2017657183), INT32_C(-1653054803), INT32_C(-1634459065), INT32_C( -572700558), INT32_C( 1977566390), INT32_C( -646523450), INT32_C( -234450626), INT32_C( 330831665), INT32_C( 1706081529), INT32_C(-1640342739), INT32_C( -694582053), INT32_C( 598851851)), simde_mm512_set_epi32(INT32_C( -33817089), INT32_C( 1941815228), INT32_C( -151549225), INT32_C( 1789654511), INT32_C( -272761183), INT32_C(-1115755601), INT32_C(-1617434009), INT32_C( -35792653), INT32_C( 2145349310), INT32_C( -101253154), INT32_C( -81350337), INT32_C( 2012788663), INT32_C( -134873863), INT32_C( -557876435), INT32_C( -557852933), INT32_C(-1145714325)) }, { simde_mm512_set_epi32(INT32_C( 568896963), INT32_C( -561959153), INT32_C( 769261839), INT32_C( 619550472), INT32_C( 1265145937), INT32_C(-1898129853), INT32_C(-1844756744), INT32_C( 253926616), INT32_C(-1200681430), INT32_C( 757779385), INT32_C(-1090889117), INT32_C( 2001359420), INT32_C( -628410960), INT32_C(-1884853401), INT32_C( 464697363), INT32_C( -267213390)), simde_mm512_set_epi32(INT32_C( 1305596604), INT32_C( 1367027235), INT32_C( 1022068839), INT32_C(-1304299428), INT32_C(-1551155443), INT32_C(-1757021038), INT32_C( -634643752), INT32_C( 417623958), INT32_C( 1338218088), INT32_C( 1144004768), INT32_C( -119591543), INT32_C( 343634162), INT32_C(-1756432337), INT32_C( -336536481), INT32_C( 155367900), INT32_C( 27211228)), simde_mm512_set_epi32(INT32_C( 1845100543), INT32_C( -537186513), INT32_C( 1039894895), INT32_C(-1225916580), INT32_C( -336871587), INT32_C(-1612841261), INT32_C( -634437896), INT32_C( 535232478), INT32_C( -1073558), INT32_C( 1832577977), INT32_C( -16810005), INT32_C( 2004579070), INT32_C( -540017217), INT32_C( -268959873), INT32_C( 468892127), INT32_C( -241191426)) }, { simde_mm512_set_epi32(INT32_C(-1566019929), INT32_C( 1771648205), INT32_C( 293391222), INT32_C( -190388911), INT32_C(-1413267332), INT32_C( -491216745), INT32_C(-2017086754), INT32_C( -505487315), INT32_C(-1311872315), INT32_C( 1730833859), INT32_C( 1507236184), INT32_C( 127469321), INT32_C(-1954223251), INT32_C(-1913468253), INT32_C( 390805157), INT32_C( 1427395916)), simde_mm512_set_epi32(INT32_C( -290198315), INT32_C( -186963818), INT32_C( 337890960), INT32_C( -133116402), INT32_C( -567590842), INT32_C( 1356957734), INT32_C( -411285842), INT32_C( 212429154), INT32_C( 561941682), INT32_C( 1263368380), INT32_C( 33943343), INT32_C( 477355785), INT32_C( 464038301), INT32_C( 283034157), INT32_C( 882337256), INT32_C( 1854097219)), simde_mm512_set_epi32(INT32_C( -289673481), INT32_C( -35963681), INT32_C( 360697846), INT32_C( -55120033), INT32_C( -1359746), INT32_C( -218519369), INT32_C( -402786562), INT32_C( -301995665), INT32_C(-1308692745), INT32_C( 1869577727), INT32_C( 1540882303), INT32_C( 536600329), INT32_C(-1683166211), INT32_C(-1644246353), INT32_C( 937392109), INT32_C( 2140624719)) }, { simde_mm512_set_epi32(INT32_C( 1586789989), INT32_C( 1873262060), INT32_C( -1228101), INT32_C( 1094551912), INT32_C( 1242820965), INT32_C( -129127728), INT32_C( 916155808), INT32_C( 1457274373), INT32_C( -162664167), INT32_C( -307612047), INT32_C(-2058619353), INT32_C( 1041657370), INT32_C(-1303652034), INT32_C( 1318052527), INT32_C( 343091765), INT32_C(-1843970146)), simde_mm512_set_epi32(INT32_C( -418596097), INT32_C( 1359591501), INT32_C( 1365241616), INT32_C( 975187949), INT32_C( 2075206187), INT32_C( 49913508), INT32_C( 982225383), INT32_C( 2039004600), INT32_C( -658027813), INT32_C( 1363761789), INT32_C( -596362918), INT32_C( -188756489), INT32_C( 2075405229), INT32_C( -261325870), INT32_C( 1149275923), INT32_C( 1906788899)), simde_mm512_set_epi32(INT32_C( -6488321), INT32_C( 2142223853), INT32_C( -4165), INT32_C( 2067640301), INT32_C( 2075390831), INT32_C( -84033804), INT32_C( 1050410471), INT32_C( 2145173437), INT32_C( -19924005), INT32_C( -34898307), INT32_C( -578814081), INT32_C( -20975617), INT32_C( -67109953), INT32_C( -17826817), INT32_C( 1425256247), INT32_C( -206078017)) }, { simde_mm512_set_epi32(INT32_C(-2074326161), INT32_C(-2000089664), INT32_C( -95906603), INT32_C(-2144457962), INT32_C( -460603570), INT32_C( -616108121), INT32_C(-1801036003), INT32_C( 192023719), INT32_C( 1229400941), INT32_C( 53109497), INT32_C( 1637729546), INT32_C( -377510882), INT32_C( 959365464), INT32_C( -183985269), INT32_C( 446964672), INT32_C( -984185866)), simde_mm512_set_epi32(INT32_C(-1212943296), INT32_C( 40655504), INT32_C( 1783466062), INT32_C(-1105776557), INT32_C( 2093068641), INT32_C( 923055475), INT32_C(-2145339184), INT32_C( 312550463), INT32_C( -600919225), INT32_C(-1156369187), INT32_C( -442421904), INT32_C( -479777830), INT32_C( 786467717), INT32_C(-1353894968), INT32_C(-2102502413), INT32_C( 630995848)), simde_mm512_set_epi32(INT32_C(-1207959697), INT32_C(-1964154928), INT32_C( -95576865), INT32_C(-1103152297), INT32_C( -53756049), INT32_C( -12124169), INT32_C(-1800994851), INT32_C( 468921535), INT32_C( -579880081), INT32_C(-1153730819), INT32_C( -440537734), INT32_C( -343953442), INT32_C( 1072614365), INT32_C( -11682869), INT32_C(-1699841037), INT32_C( -438387714)) }, { simde_mm512_set_epi32(INT32_C(-2063919183), INT32_C( 261182590), INT32_C( 1716894204), INT32_C( 315016729), INT32_C(-1244972332), INT32_C( 1333991353), INT32_C( 1246104528), INT32_C(-1234716491), INT32_C( -852837622), INT32_C( 266496100), INT32_C(-2090175093), INT32_C( 1822414148), INT32_C(-1888096784), INT32_C(-1814389856), INT32_C( 716652272), INT32_C(-1702112633)), simde_mm512_set_epi32(INT32_C( -775162340), INT32_C( -717192300), INT32_C( 657226535), INT32_C( -646565165), INT32_C( 1464387491), INT32_C(-1521859395), INT32_C( -74746289), INT32_C( -342854144), INT32_C( 1370164421), INT32_C( 1847323166), INT32_C( -31713278), INT32_C( 2054986117), INT32_C(-1330721270), INT32_C( 155186332), INT32_C( 1062642768), INT32_C(-1225803976)), simde_mm512_set_epi32(INT32_C( -704907331), INT32_C( -539895810), INT32_C( 1736309759), INT32_C( -604571941), INT32_C( -137625609), INT32_C( -271886403), INT32_C( -70287393), INT32_C( -524619), INT32_C( -575946801), INT32_C( 1878780542), INT32_C( -8487541), INT32_C( 2130697669), INT32_C(-1073745926), INT32_C(-1677721668), INT32_C( 1073200880), INT32_C(-1091569729)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_or_si512(test_vec[i].a, test_vec[i].b); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_or_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_or_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_or_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_or_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_or_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_or_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_or_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_or_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_or_si512) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/packs.c000066400000000000000000000760021400333146700165260ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN packs #include #include static int test_simde_mm512_packs_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[32]; const int16_t b[32]; const int8_t r[64]; } test_vec[] = { { { INT16_C( 148), INT16_C( 89), INT16_C( 44), INT16_C( 120), INT16_C( 172), INT16_C( 95), INT16_C( 152), INT16_C( 63), INT16_C( 158), INT16_C( 87), INT16_C( 102), INT16_C( 236), INT16_C( 153), INT16_C( 222), INT16_C( 143), INT16_C( 196), INT16_C( 171), INT16_C( 232), INT16_C( 34), INT16_C( 217), INT16_C( 125), INT16_C( 165), INT16_C( 230), INT16_C( 5), INT16_C( 46), INT16_C( 252), INT16_C( 228), INT16_C( 53), INT16_C( 41), INT16_C( 126), INT16_C( 57), INT16_C( 220) }, { INT16_C( 25061), -INT16_C( 16956), INT16_C( 9603), INT16_C( 21142), -INT16_C( 12382), -INT16_C( 18441), -INT16_C( 9035), INT16_C( 14780), INT16_C( 6155), -INT16_C( 24779), INT16_C( 7677), INT16_C( 31444), -INT16_C( 6074), -INT16_C( 7), INT16_C( 15393), INT16_C( 1755), -INT16_C( 24419), INT16_C( 8387), INT16_C( 23237), INT16_C( 26482), INT16_C( 27177), -INT16_C( 8674), -INT16_C( 9658), INT16_C( 20759), INT16_C( 19954), -INT16_C( 4111), -INT16_C( 14998), -INT16_C( 20118), INT16_C( 25517), -INT16_C( 12368), -INT16_C( 29793), INT16_C( 15573) }, { INT8_MAX, INT8_C( 89), INT8_C( 44), INT8_C( 120), INT8_MAX, INT8_C( 95), INT8_MAX, INT8_C( 63), INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_C( 87), INT8_C( 102), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, -INT8_C( 7), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 34), INT8_MAX, INT8_C( 125), INT8_MAX, INT8_MAX, INT8_C( 5), INT8_MIN, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MAX, INT8_C( 46), INT8_MAX, INT8_MAX, INT8_C( 53), INT8_C( 41), INT8_C( 126), INT8_C( 57), INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MAX } }, { { INT16_C( 153), INT16_C( 240), INT16_C( 207), INT16_C( 28), INT16_C( 117), INT16_C( 127), INT16_C( 17), INT16_C( 66), INT16_C( 194), INT16_C( 201), INT16_C( 155), INT16_C( 53), INT16_C( 42), INT16_C( 157), INT16_C( 217), INT16_C( 225), INT16_C( 54), INT16_C( 101), INT16_C( 41), INT16_C( 62), INT16_C( 123), INT16_C( 238), INT16_C( 142), INT16_C( 235), INT16_C( 97), INT16_C( 216), INT16_C( 46), INT16_C( 251), INT16_C( 17), INT16_C( 14), INT16_C( 114), INT16_C( 93) }, { -INT16_C( 16216), -INT16_C( 21054), INT16_C( 17641), -INT16_C( 30485), -INT16_C( 22081), INT16_C( 19574), -INT16_C( 22985), -INT16_C( 30664), -INT16_C( 5113), INT16_C( 1120), INT16_C( 27931), INT16_C( 29440), -INT16_C( 26242), INT16_C( 26753), INT16_C( 28683), -INT16_C( 19259), -INT16_C( 30671), INT16_C( 6753), INT16_C( 19916), -INT16_C( 29790), INT16_C( 6390), INT16_C( 11736), INT16_C( 4286), -INT16_C( 14667), INT16_C( 5628), INT16_C( 6090), -INT16_C( 13694), INT16_C( 139), INT16_C( 3171), INT16_C( 28521), INT16_C( 11901), -INT16_C( 20957) }, { INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 28), INT8_C( 117), INT8_MAX, INT8_C( 17), INT8_C( 66), INT8_MIN, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 53), INT8_C( 42), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, INT8_C( 54), INT8_C( 101), INT8_C( 41), INT8_C( 62), INT8_C( 123), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN, INT8_C( 97), INT8_MAX, INT8_C( 46), INT8_MAX, INT8_C( 17), INT8_C( 14), INT8_C( 114), INT8_C( 93), INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN } }, { { INT16_C( 132), INT16_C( 130), INT16_C( 107), INT16_C( 199), INT16_C( 230), INT16_C( 66), INT16_C( 170), INT16_C( 242), INT16_C( 210), INT16_C( 66), INT16_C( 149), INT16_C( 0), INT16_C( 172), INT16_C( 30), INT16_C( 146), INT16_C( 145), INT16_C( 149), INT16_C( 232), INT16_C( 33), INT16_C( 131), INT16_C( 165), INT16_C( 253), INT16_C( 205), INT16_C( 15), INT16_C( 250), INT16_C( 61), INT16_C( 149), INT16_C( 48), INT16_C( 173), INT16_C( 27), INT16_C( 27), INT16_C( 86) }, { -INT16_C( 16208), -INT16_C( 20417), -INT16_C( 4127), -INT16_C( 5836), -INT16_C( 1644), -INT16_C( 7194), -INT16_C( 10553), INT16_C( 26611), INT16_C( 17872), INT16_C( 24484), -INT16_C( 7718), INT16_C( 7056), -INT16_C( 8306), -INT16_C( 12746), -INT16_C( 7174), -INT16_C( 21724), INT16_C( 25507), -INT16_C( 31653), -INT16_C( 28846), -INT16_C( 6547), INT16_C( 21641), INT16_C( 20682), -INT16_C( 17110), -INT16_C( 1097), INT16_C( 23298), -INT16_C( 9126), -INT16_C( 5572), -INT16_C( 13321), INT16_C( 11721), -INT16_C( 15207), -INT16_C( 17136), -INT16_C( 19601) }, { INT8_MAX, INT8_MAX, INT8_C( 107), INT8_MAX, INT8_MAX, INT8_C( 66), INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_C( 66), INT8_MAX, INT8_C( 0), INT8_MAX, INT8_C( 30), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_C( 33), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 15), INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MAX, INT8_C( 61), INT8_MAX, INT8_C( 48), INT8_MAX, INT8_C( 27), INT8_C( 27), INT8_C( 86), INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN } }, { { INT16_C( 202), INT16_C( 115), INT16_C( 165), INT16_C( 227), INT16_C( 36), INT16_C( 36), INT16_C( 234), INT16_C( 227), INT16_C( 121), INT16_C( 129), INT16_C( 182), INT16_C( 45), INT16_C( 229), INT16_C( 244), INT16_C( 96), INT16_C( 196), INT16_C( 223), INT16_C( 133), INT16_C( 145), INT16_C( 126), INT16_C( 155), INT16_C( 150), INT16_C( 193), INT16_C( 202), INT16_C( 56), INT16_C( 159), INT16_C( 152), INT16_C( 210), INT16_C( 190), INT16_C( 32), INT16_C( 109), INT16_C( 73) }, { INT16_C( 7245), -INT16_C( 11570), INT16_C( 13997), INT16_C( 25424), -INT16_C( 3119), INT16_C( 22265), INT16_C( 29620), -INT16_C( 4320), INT16_C( 27819), -INT16_C( 25970), INT16_C( 23300), -INT16_C( 32404), INT16_C( 12825), INT16_C( 14242), -INT16_C( 31073), -INT16_C( 4991), INT16_C( 20386), INT16_C( 20670), INT16_C( 3974), INT16_C( 22451), -INT16_C( 21502), -INT16_C( 18770), -INT16_C( 12769), -INT16_C( 13402), INT16_C( 13370), INT16_C( 15973), -INT16_C( 11889), -INT16_C( 22336), INT16_C( 25091), -INT16_C( 23840), INT16_C( 25064), -INT16_C( 29809) }, { INT8_MAX, INT8_C( 115), INT8_MAX, INT8_MAX, INT8_C( 36), INT8_C( 36), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, INT8_C( 121), INT8_MAX, INT8_MAX, INT8_C( 45), INT8_MAX, INT8_MAX, INT8_C( 96), INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 126), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_C( 56), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 32), INT8_C( 109), INT8_C( 73), INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN } }, { { INT16_C( 77), INT16_C( 54), INT16_C( 142), INT16_C( 94), INT16_C( 60), INT16_C( 90), INT16_C( 187), INT16_C( 69), INT16_C( 138), INT16_C( 127), INT16_C( 67), INT16_C( 94), INT16_C( 7), INT16_C( 142), INT16_C( 143), INT16_C( 25), INT16_C( 244), INT16_C( 57), INT16_C( 221), INT16_C( 188), INT16_C( 173), INT16_C( 36), INT16_C( 59), INT16_C( 87), INT16_C( 236), INT16_C( 32), INT16_C( 254), INT16_C( 213), INT16_C( 127), INT16_C( 110), INT16_C( 124), INT16_C( 235) }, { -INT16_C( 10640), -INT16_C( 3547), -INT16_C( 16972), -INT16_C( 12881), -INT16_C( 14998), -INT16_C( 11535), INT16_C( 23041), -INT16_C( 14807), INT16_C( 71), INT16_C( 30695), INT16_C( 26110), INT16_C( 844), -INT16_C( 20252), -INT16_C( 3215), -INT16_C( 2004), -INT16_C( 25122), INT16_C( 975), -INT16_C( 31857), INT16_C( 16064), INT16_C( 10832), INT16_C( 16900), INT16_C( 1532), INT16_C( 9884), -INT16_C( 7221), -INT16_C( 19930), INT16_C( 9306), -INT16_C( 22760), -INT16_C( 985), -INT16_C( 26281), -INT16_C( 31761), -INT16_C( 12655), INT16_C( 24608) }, { INT8_C( 77), INT8_C( 54), INT8_MAX, INT8_C( 94), INT8_C( 60), INT8_C( 90), INT8_MAX, INT8_C( 69), INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX, INT8_C( 67), INT8_C( 94), INT8_C( 7), INT8_MAX, INT8_MAX, INT8_C( 25), INT8_C( 71), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX, INT8_C( 57), INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 36), INT8_C( 59), INT8_C( 87), INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_C( 32), INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 110), INT8_C( 124), INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX } }, { { INT16_C( 176), INT16_C( 146), INT16_C( 52), INT16_C( 242), INT16_C( 185), INT16_C( 18), INT16_C( 195), INT16_C( 5), INT16_C( 80), INT16_C( 141), INT16_C( 80), INT16_C( 78), INT16_C( 121), INT16_C( 123), INT16_C( 242), INT16_C( 25), INT16_C( 191), INT16_C( 145), INT16_C( 103), INT16_C( 105), INT16_C( 123), INT16_C( 255), INT16_C( 113), INT16_C( 179), INT16_C( 45), INT16_C( 185), INT16_C( 203), INT16_C( 103), INT16_C( 218), INT16_C( 140), INT16_C( 190), INT16_C( 111) }, { INT16_C( 20605), INT16_C( 28672), -INT16_C( 31817), -INT16_C( 10023), INT16_C( 21758), INT16_C( 15575), -INT16_C( 9018), -INT16_C( 30480), INT16_C( 12553), -INT16_C( 30911), INT16_C( 18940), INT16_C( 16623), -INT16_C( 11997), -INT16_C( 3892), INT16_C( 29071), INT16_C( 3167), INT16_C( 24513), INT16_C( 31100), INT16_C( 21986), -INT16_C( 7855), INT16_C( 10410), INT16_C( 28701), INT16_C( 3332), INT16_C( 3832), INT16_C( 14654), INT16_C( 14997), -INT16_C( 31613), -INT16_C( 22917), INT16_C( 18262), -INT16_C( 6762), -INT16_C( 2631), INT16_C( 31474) }, { INT8_MAX, INT8_MAX, INT8_C( 52), INT8_MAX, INT8_MAX, INT8_C( 18), INT8_MAX, INT8_C( 5), INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN, INT8_C( 80), INT8_MAX, INT8_C( 80), INT8_C( 78), INT8_C( 121), INT8_C( 123), INT8_MAX, INT8_C( 25), INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 103), INT8_C( 105), INT8_C( 123), INT8_MAX, INT8_C( 113), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 45), INT8_MAX, INT8_MAX, INT8_C( 103), INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 111), INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MAX } }, { { INT16_C( 110), INT16_C( 55), INT16_C( 68), INT16_C( 110), INT16_C( 53), INT16_C( 113), INT16_C( 214), INT16_C( 129), INT16_C( 21), INT16_C( 146), INT16_C( 55), INT16_C( 239), INT16_C( 207), INT16_C( 55), INT16_C( 199), INT16_C( 25), INT16_C( 165), INT16_C( 249), INT16_C( 104), INT16_C( 87), INT16_C( 69), INT16_C( 225), INT16_C( 72), INT16_C( 43), INT16_C( 30), INT16_C( 246), INT16_C( 246), INT16_C( 212), INT16_C( 187), INT16_C( 139), INT16_C( 189), INT16_C( 183) }, { -INT16_C( 2717), INT16_C( 19889), INT16_C( 6237), -INT16_C( 1116), INT16_C( 27742), INT16_C( 31196), INT16_C( 16308), INT16_C( 4516), INT16_C( 25181), -INT16_C( 19704), -INT16_C( 4520), INT16_C( 7815), -INT16_C( 27991), INT16_C( 11177), INT16_C( 20048), -INT16_C( 19486), -INT16_C( 27837), -INT16_C( 24576), -INT16_C( 23380), INT16_C( 2716), INT16_C( 30736), -INT16_C( 14973), INT16_C( 10423), INT16_C( 5590), -INT16_C( 8566), -INT16_C( 7480), INT16_C( 20428), INT16_C( 29953), -INT16_C( 21791), INT16_C( 12704), -INT16_C( 31752), INT16_C( 15332) }, { INT8_C( 110), INT8_C( 55), INT8_C( 68), INT8_C( 110), INT8_C( 53), INT8_C( 113), INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 21), INT8_MAX, INT8_C( 55), INT8_MAX, INT8_MAX, INT8_C( 55), INT8_MAX, INT8_C( 25), INT8_MAX, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX, INT8_C( 104), INT8_C( 87), INT8_C( 69), INT8_MAX, INT8_C( 72), INT8_C( 43), INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX, INT8_C( 30), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MAX } }, { { INT16_C( 228), INT16_C( 194), INT16_C( 120), INT16_C( 153), INT16_C( 80), INT16_C( 168), INT16_C( 52), INT16_C( 2), INT16_C( 133), INT16_C( 223), INT16_C( 229), INT16_C( 181), INT16_C( 245), INT16_C( 136), INT16_C( 203), INT16_C( 143), INT16_C( 160), INT16_C( 56), INT16_C( 30), INT16_C( 8), INT16_C( 47), INT16_C( 230), INT16_C( 109), INT16_C( 119), INT16_C( 204), INT16_C( 198), INT16_C( 171), INT16_C( 66), INT16_C( 99), INT16_C( 25), INT16_C( 142), INT16_C( 222) }, { -INT16_C( 1490), INT16_C( 17943), -INT16_C( 6120), -INT16_C( 31153), -INT16_C( 232), INT16_C( 31852), INT16_C( 21613), INT16_C( 24563), INT16_C( 18720), -INT16_C( 11738), -INT16_C( 23819), -INT16_C( 27116), -INT16_C( 8443), INT16_C( 13231), INT16_C( 22637), -INT16_C( 25582), INT16_C( 10578), INT16_C( 27362), INT16_C( 12561), INT16_C( 10736), INT16_C( 23601), -INT16_C( 24923), -INT16_C( 26448), -INT16_C( 12035), INT16_C( 9186), -INT16_C( 10333), -INT16_C( 18491), -INT16_C( 13715), INT16_C( 7318), INT16_C( 1278), INT16_C( 4212), -INT16_C( 14688) }, { INT8_MAX, INT8_MAX, INT8_C( 120), INT8_MAX, INT8_C( 80), INT8_MAX, INT8_C( 52), INT8_C( 2), INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_C( 56), INT8_C( 30), INT8_C( 8), INT8_C( 47), INT8_MAX, INT8_C( 109), INT8_C( 119), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 66), INT8_C( 99), INT8_C( 25), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_packs_epi16(a, b); simde_test_x86_assert_equal_i8x64(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_packs_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[16]; const int32_t b[16]; const int16_t r[32]; } test_vec[] = { { { INT32_C( 1800617241), -INT32_C( 686819306), INT32_C( 140214962), INT32_C( 1970280150), -INT32_C( 1837652367), -INT32_C( 601751898), -INT32_C( 689735000), -INT32_C( 924571217), INT32_C( 2083731302), -INT32_C( 497859792), -INT32_C( 1679118651), INT32_C( 907041733), -INT32_C( 1463253247), INT32_C( 780462469), INT32_C( 319055716), -INT32_C( 153359984) }, { INT32_C( 20594), INT32_C( 5683), INT32_C( 14769), INT32_C( 31344), INT32_C( 53026), INT32_C( 34557), INT32_C( 40345), INT32_C( 2963), INT32_C( 54363), INT32_C( 16618), INT32_C( 9337), INT32_C( 42910), INT32_C( 17526), INT32_C( 29642), INT32_C( 24336), INT32_C( 22890) }, { INT16_MAX, INT16_MIN, INT16_MAX, INT16_MAX, INT16_C( 20594), INT16_C( 5683), INT16_C( 14769), INT16_C( 31344), INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MAX, INT16_MAX, INT16_MAX, INT16_C( 2963), INT16_MAX, INT16_MIN, INT16_MIN, INT16_MAX, INT16_MAX, INT16_C( 16618), INT16_C( 9337), INT16_MAX, INT16_MIN, INT16_MAX, INT16_MAX, INT16_MIN, INT16_C( 17526), INT16_C( 29642), INT16_C( 24336), INT16_C( 22890) } }, { { INT32_C( 1949157033), INT32_C( 11802708), INT32_C( 304426676), INT32_C( 968475415), -INT32_C( 360894332), INT32_C( 408831907), -INT32_C( 2122813782), -INT32_C( 1143217646), -INT32_C( 47249240), -INT32_C( 721558496), INT32_C( 702947858), INT32_C( 1784914150), INT32_C( 1934942416), -INT32_C( 762531288), INT32_C( 861144097), -INT32_C( 1880215578) }, { INT32_C( 22668), INT32_C( 4908), INT32_C( 37437), INT32_C( 33788), INT32_C( 43254), INT32_C( 9339), INT32_C( 27991), INT32_C( 13820), INT32_C( 25741), INT32_C( 48503), INT32_C( 32847), INT32_C( 54531), INT32_C( 40829), INT32_C( 707), INT32_C( 50543), INT32_C( 49659) }, { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_C( 22668), INT16_C( 4908), INT16_MAX, INT16_MAX, INT16_MIN, INT16_MAX, INT16_MIN, INT16_MIN, INT16_MAX, INT16_C( 9339), INT16_C( 27991), INT16_C( 13820), INT16_MIN, INT16_MIN, INT16_MAX, INT16_MAX, INT16_C( 25741), INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MIN, INT16_MAX, INT16_MIN, INT16_MAX, INT16_C( 707), INT16_MAX, INT16_MAX } }, { { INT32_C( 86345971), INT32_C( 264412482), -INT32_C( 1500573103), -INT32_C( 109342115), INT32_C( 144242828), -INT32_C( 1207280655), -INT32_C( 729908619), -INT32_C( 644449819), INT32_C( 1155447553), -INT32_C( 1437360040), INT32_C( 273736626), INT32_C( 17419125), -INT32_C( 1274436925), INT32_C( 1936528637), INT32_C( 1934093198), INT32_C( 1699536228) }, { INT32_C( 61865), INT32_C( 32155), INT32_C( 21901), INT32_C( 31319), INT32_C( 13870), INT32_C( 681), INT32_C( 60022), INT32_C( 26448), INT32_C( 47193), INT32_C( 54837), INT32_C( 38444), INT32_C( 37648), INT32_C( 22729), INT32_C( 24922), INT32_C( 12875), INT32_C( 32922) }, { INT16_MAX, INT16_MAX, INT16_MIN, INT16_MIN, INT16_MAX, INT16_C( 32155), INT16_C( 21901), INT16_C( 31319), INT16_MAX, INT16_MIN, INT16_MIN, INT16_MIN, INT16_C( 13870), INT16_C( 681), INT16_MAX, INT16_C( 26448), INT16_MAX, INT16_MIN, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MIN, INT16_MAX, INT16_MAX, INT16_MAX, INT16_C( 22729), INT16_C( 24922), INT16_C( 12875), INT16_MAX } }, { { -INT32_C( 1959201899), -INT32_C( 949850649), -INT32_C( 1973514704), -INT32_C( 199397871), -INT32_C( 2008225875), -INT32_C( 1091983526), INT32_C( 183514231), INT32_C( 1703578320), INT32_C( 1710277245), INT32_C( 1613517360), -INT32_C( 236221728), -INT32_C( 1494873863), INT32_C( 1227764463), INT32_C( 1359419353), INT32_C( 475789388), INT32_C( 8513154) }, { INT32_C( 55397), INT32_C( 42041), INT32_C( 5526), INT32_C( 7355), INT32_C( 34917), INT32_C( 19929), INT32_C( 59241), INT32_C( 50151), INT32_C( 8347), INT32_C( 64196), INT32_C( 9487), INT32_C( 34113), INT32_C( 46605), INT32_C( 30723), INT32_C( 13664), INT32_C( 46072) }, { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MAX, INT16_MAX, INT16_C( 5526), INT16_C( 7355), INT16_MIN, INT16_MIN, INT16_MAX, INT16_MAX, INT16_MAX, INT16_C( 19929), INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MIN, INT16_MIN, INT16_C( 8347), INT16_MAX, INT16_C( 9487), INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_C( 30723), INT16_C( 13664), INT16_MAX } }, { { INT32_C( 1926468500), INT32_C( 1617729640), -INT32_C( 913998862), -INT32_C( 95500731), -INT32_C( 2135925907), INT32_C( 1543091009), INT32_C( 2022725920), INT32_C( 875268256), -INT32_C( 2069430500), -INT32_C( 1981541737), -INT32_C( 749573491), -INT32_C( 1647468496), INT32_C( 1008631291), INT32_C( 1368921904), INT32_C( 281618544), -INT32_C( 851053391) }, { INT32_C( 35409), INT32_C( 35604), INT32_C( 53342), INT32_C( 621), INT32_C( 55615), INT32_C( 4650), INT32_C( 45091), INT32_C( 56189), INT32_C( 20837), INT32_C( 41949), INT32_C( 59251), INT32_C( 4073), INT32_C( 4072), INT32_C( 65313), INT32_C( 60847), INT32_C( 200) }, { INT16_MAX, INT16_MAX, INT16_MIN, INT16_MIN, INT16_MAX, INT16_MAX, INT16_MAX, INT16_C( 621), INT16_MIN, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_C( 4650), INT16_MAX, INT16_MAX, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_C( 20837), INT16_MAX, INT16_MAX, INT16_C( 4073), INT16_MAX, INT16_MAX, INT16_MAX, INT16_MIN, INT16_C( 4072), INT16_MAX, INT16_MAX, INT16_C( 200) } }, { { -INT32_C( 11457029), -INT32_C( 2019348825), -INT32_C( 781314454), INT32_C( 1692424183), INT32_C( 2138294656), -INT32_C( 511798053), -INT32_C( 2050085159), INT32_C( 1451595355), INT32_C( 1784076227), INT32_C( 1878128901), INT32_C( 121659151), INT32_C( 929767863), -INT32_C( 977871126), INT32_C( 1269183858), -INT32_C( 1093569437), -INT32_C( 837528054) }, { INT32_C( 13112), INT32_C( 29602), INT32_C( 16506), INT32_C( 61047), INT32_C( 13747), INT32_C( 50817), INT32_C( 55684), INT32_C( 54951), INT32_C( 26121), INT32_C( 37849), INT32_C( 37587), INT32_C( 64384), INT32_C( 56369), INT32_C( 23714), INT32_C( 44085), INT32_C( 49538) }, { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MAX, INT16_C( 13112), INT16_C( 29602), INT16_C( 16506), INT16_MAX, INT16_MAX, INT16_MIN, INT16_MIN, INT16_MAX, INT16_C( 13747), INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_C( 26121), INT16_MAX, INT16_MAX, INT16_MAX, INT16_MIN, INT16_MAX, INT16_MIN, INT16_MIN, INT16_MAX, INT16_C( 23714), INT16_MAX, INT16_MAX } }, { { -INT32_C( 987198532), -INT32_C( 984088265), -INT32_C( 1923601323), -INT32_C( 259401609), -INT32_C( 1697859060), INT32_C( 1895263852), INT32_C( 1377578132), -INT32_C( 988504311), INT32_C( 1636449322), -INT32_C( 1842879683), -INT32_C( 2044690673), INT32_C( 1685498199), -INT32_C( 805420445), INT32_C( 1145042352), INT32_C( 731274018), INT32_C( 636529402) }, { INT32_C( 9350), INT32_C( 27830), INT32_C( 34034), INT32_C( 58088), INT32_C( 23217), INT32_C( 65182), INT32_C( 17961), INT32_C( 50795), INT32_C( 61930), INT32_C( 52317), INT32_C( 63056), INT32_C( 25561), INT32_C( 64189), INT32_C( 51192), INT32_C( 28685), INT32_C( 52790) }, { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_C( 9350), INT16_C( 27830), INT16_MAX, INT16_MAX, INT16_MIN, INT16_MAX, INT16_MAX, INT16_MIN, INT16_C( 23217), INT16_MAX, INT16_C( 17961), INT16_MAX, INT16_MAX, INT16_MIN, INT16_MIN, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_C( 25561), INT16_MIN, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_C( 28685), INT16_MAX } }, { { INT32_C( 180297835), INT32_C( 953556161), INT32_C( 623781484), INT32_C( 2106066782), INT32_C( 225920402), INT32_C( 852783265), -INT32_C( 861675119), -INT32_C( 979707558), -INT32_C( 1143973382), -INT32_C( 487348619), INT32_C( 721887693), -INT32_C( 1146581207), INT32_C( 1992827092), INT32_C( 564698256), -INT32_C( 655537283), INT32_C( 530417445) }, { INT32_C( 22234), INT32_C( 57656), INT32_C( 5900), INT32_C( 41682), INT32_C( 25880), INT32_C( 46214), INT32_C( 12684), INT32_C( 56400), INT32_C( 43826), INT32_C( 59020), INT32_C( 30717), INT32_C( 8729), INT32_C( 41351), INT32_C( 65365), INT32_C( 32049), INT32_C( 41305) }, { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_C( 22234), INT16_MAX, INT16_C( 5900), INT16_MAX, INT16_MAX, INT16_MAX, INT16_MIN, INT16_MIN, INT16_C( 25880), INT16_MAX, INT16_C( 12684), INT16_MAX, INT16_MIN, INT16_MIN, INT16_MAX, INT16_MIN, INT16_MAX, INT16_MAX, INT16_C( 30717), INT16_C( 8729), INT16_MAX, INT16_MAX, INT16_MIN, INT16_MAX, INT16_MAX, INT16_MAX, INT16_C( 32049), INT16_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_packs_epi32(a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_packs_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_packs_epi32) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/packus.c000066400000000000000000000747211400333146700167210ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN packus #include #include static int test_simde_mm512_packus_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[32]; const int16_t b[32]; const uint8_t r[64]; } test_vec[] = { { { -INT16_C( 11809), INT16_C( 3301), INT16_C( 4381), -INT16_C( 29201), -INT16_C( 11622), -INT16_C( 1564), INT16_C( 3475), -INT16_C( 8537), INT16_C( 4169), -INT16_C( 23067), INT16_C( 13975), INT16_C( 16305), -INT16_C( 18418), INT16_C( 12904), -INT16_C( 19774), -INT16_C( 24123), -INT16_C( 21629), -INT16_C( 24403), -INT16_C( 25412), INT16_C( 22062), INT16_C( 4719), INT16_C( 591), -INT16_C( 2528), INT16_C( 27104), -INT16_C( 15098), -INT16_C( 25330), -INT16_C( 16389), INT16_C( 2780), INT16_C( 17527), INT16_C( 14652), INT16_C( 758), INT16_C( 31195) }, { INT16_C( 136), INT16_C( 105), INT16_C( 72), INT16_C( 148), INT16_C( 14), INT16_C( 122), INT16_C( 119), INT16_C( 10), INT16_C( 241), INT16_C( 56), INT16_C( 132), INT16_C( 39), INT16_C( 126), INT16_C( 191), INT16_C( 60), INT16_C( 45), INT16_C( 83), INT16_C( 233), INT16_C( 85), INT16_C( 245), INT16_C( 20), INT16_C( 103), INT16_C( 83), INT16_C( 199), INT16_C( 26), INT16_C( 245), INT16_C( 65), INT16_C( 103), INT16_C( 126), INT16_C( 64), INT16_C( 96), INT16_C( 126) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C(136), UINT8_C(105), UINT8_C( 72), UINT8_C(148), UINT8_C( 14), UINT8_C(122), UINT8_C(119), UINT8_C( 10), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C(241), UINT8_C( 56), UINT8_C(132), UINT8_C( 39), UINT8_C(126), UINT8_C(191), UINT8_C( 60), UINT8_C( 45), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 83), UINT8_C(233), UINT8_C( 85), UINT8_C(245), UINT8_C( 20), UINT8_C(103), UINT8_C( 83), UINT8_C(199), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 26), UINT8_C(245), UINT8_C( 65), UINT8_C(103), UINT8_C(126), UINT8_C( 64), UINT8_C( 96), UINT8_C(126) } }, { { INT16_C( 1203), INT16_C( 20072), -INT16_C( 6822), -INT16_C( 17085), -INT16_C( 19463), -INT16_C( 31707), -INT16_C( 26873), INT16_C( 19532), INT16_C( 19377), INT16_C( 20289), INT16_C( 24205), INT16_C( 19895), -INT16_C( 8484), -INT16_C( 26995), -INT16_C( 1218), -INT16_C( 3819), INT16_C( 32000), INT16_C( 23103), -INT16_C( 32158), INT16_C( 23575), INT16_C( 15414), INT16_C( 15840), INT16_C( 11475), -INT16_C( 31607), -INT16_C( 13704), INT16_C( 1492), -INT16_C( 29911), INT16_C( 1362), -INT16_C( 8343), -INT16_C( 22628), -INT16_C( 20005), -INT16_C( 9320) }, { INT16_C( 215), INT16_C( 144), INT16_C( 76), INT16_C( 143), INT16_C( 205), INT16_C( 92), INT16_C( 85), INT16_C( 113), INT16_C( 181), INT16_C( 73), INT16_C( 200), INT16_C( 169), INT16_C( 234), INT16_C( 131), INT16_C( 232), INT16_C( 201), INT16_C( 147), INT16_C( 24), INT16_C( 70), INT16_C( 104), INT16_C( 116), INT16_C( 13), INT16_C( 166), INT16_C( 234), INT16_C( 245), INT16_C( 155), INT16_C( 129), INT16_C( 101), INT16_C( 148), INT16_C( 7), INT16_C( 70), INT16_C( 59) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C(215), UINT8_C(144), UINT8_C( 76), UINT8_C(143), UINT8_C(205), UINT8_C( 92), UINT8_C( 85), UINT8_C(113), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(181), UINT8_C( 73), UINT8_C(200), UINT8_C(169), UINT8_C(234), UINT8_C(131), UINT8_C(232), UINT8_C(201), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C(147), UINT8_C( 24), UINT8_C( 70), UINT8_C(104), UINT8_C(116), UINT8_C( 13), UINT8_C(166), UINT8_C(234), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(245), UINT8_C(155), UINT8_C(129), UINT8_C(101), UINT8_C(148), UINT8_C( 7), UINT8_C( 70), UINT8_C( 59) } }, { { INT16_C( 11225), -INT16_C( 18093), -INT16_C( 1167), -INT16_C( 31455), -INT16_C( 6544), INT16_C( 14994), INT16_C( 4236), -INT16_C( 6364), INT16_C( 22277), -INT16_C( 15486), -INT16_C( 14632), INT16_C( 17448), INT16_C( 4442), -INT16_C( 10676), INT16_C( 7511), INT16_C( 12561), INT16_C( 25928), -INT16_C( 17942), INT16_C( 2912), -INT16_C( 12226), -INT16_C( 12046), INT16_C( 32266), INT16_C( 12001), -INT16_C( 6554), -INT16_C( 6011), INT16_C( 24233), -INT16_C( 11601), INT16_C( 2466), -INT16_C( 4381), INT16_C( 15072), -INT16_C( 3829), INT16_C( 21355) }, { INT16_C( 85), INT16_C( 183), INT16_C( 75), INT16_C( 83), INT16_C( 146), INT16_C( 253), INT16_C( 55), INT16_C( 70), INT16_C( 141), INT16_C( 207), INT16_C( 70), INT16_C( 66), INT16_C( 184), INT16_C( 64), INT16_C( 232), INT16_C( 0), INT16_C( 161), INT16_C( 158), INT16_C( 63), INT16_C( 8), INT16_C( 195), INT16_C( 145), INT16_C( 233), INT16_C( 26), INT16_C( 123), INT16_C( 213), INT16_C( 194), INT16_C( 247), INT16_C( 147), INT16_C( 36), INT16_C( 203), INT16_C( 185) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 85), UINT8_C(183), UINT8_C( 75), UINT8_C( 83), UINT8_C(146), UINT8_C(253), UINT8_C( 55), UINT8_C( 70), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C(141), UINT8_C(207), UINT8_C( 70), UINT8_C( 66), UINT8_C(184), UINT8_C( 64), UINT8_C(232), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C(161), UINT8_C(158), UINT8_C( 63), UINT8_C( 8), UINT8_C(195), UINT8_C(145), UINT8_C(233), UINT8_C( 26), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C(123), UINT8_C(213), UINT8_C(194), UINT8_C(247), UINT8_C(147), UINT8_C( 36), UINT8_C(203), UINT8_C(185) } }, { { -INT16_C( 9108), INT16_C( 22871), INT16_C( 18715), -INT16_C( 5023), INT16_C( 26380), INT16_C( 1662), INT16_C( 21840), -INT16_C( 14815), INT16_C( 2769), -INT16_C( 27749), -INT16_C( 19764), INT16_C( 18314), -INT16_C( 16059), -INT16_C( 16021), -INT16_C( 28531), -INT16_C( 1670), -INT16_C( 11923), -INT16_C( 30638), -INT16_C( 19430), INT16_C( 9845), -INT16_C( 3301), INT16_C( 27437), INT16_C( 20040), INT16_C( 6449), -INT16_C( 13224), INT16_C( 9644), INT16_C( 13950), -INT16_C( 15508), -INT16_C( 10248), -INT16_C( 31356), -INT16_C( 408), -INT16_C( 10882) }, { INT16_C( 209), INT16_C( 234), INT16_C( 210), INT16_C( 160), INT16_C( 62), INT16_C( 14), INT16_C( 60), INT16_C( 228), INT16_C( 212), INT16_C( 134), INT16_C( 117), INT16_C( 2), INT16_C( 206), INT16_C( 181), INT16_C( 6), INT16_C( 156), INT16_C( 231), INT16_C( 92), INT16_C( 152), INT16_C( 127), INT16_C( 7), INT16_C( 98), INT16_C( 181), INT16_C( 75), INT16_C( 80), INT16_C( 147), INT16_C( 26), INT16_C( 18), INT16_C( 29), INT16_C( 181), INT16_C( 81), INT16_C( 250) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C(209), UINT8_C(234), UINT8_C(210), UINT8_C(160), UINT8_C( 62), UINT8_C( 14), UINT8_C( 60), UINT8_C(228), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(212), UINT8_C(134), UINT8_C(117), UINT8_C( 2), UINT8_C(206), UINT8_C(181), UINT8_C( 6), UINT8_C(156), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C(231), UINT8_C( 92), UINT8_C(152), UINT8_C(127), UINT8_C( 7), UINT8_C( 98), UINT8_C(181), UINT8_C( 75), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 80), UINT8_C(147), UINT8_C( 26), UINT8_C( 18), UINT8_C( 29), UINT8_C(181), UINT8_C( 81), UINT8_C(250) } }, { { -INT16_C( 10183), -INT16_C( 3242), INT16_C( 21104), INT16_C( 18034), INT16_C( 89), -INT16_C( 25432), -INT16_C( 4171), INT16_C( 16103), -INT16_C( 18369), INT16_C( 1233), INT16_C( 26579), -INT16_C( 17641), -INT16_C( 8571), -INT16_C( 22416), -INT16_C( 15824), INT16_C( 27043), -INT16_C( 1638), INT16_C( 2908), -INT16_C( 12724), -INT16_C( 23215), -INT16_C( 1330), -INT16_C( 31934), INT16_C( 10729), INT16_C( 10433), -INT16_C( 27678), -INT16_C( 19156), INT16_C( 17402), INT16_C( 32624), -INT16_C( 7902), INT16_C( 21032), -INT16_C( 13405), INT16_C( 15803) }, { INT16_C( 23), INT16_C( 16), INT16_C( 154), INT16_C( 180), INT16_C( 248), INT16_C( 125), INT16_C( 249), INT16_C( 3), INT16_C( 209), INT16_C( 134), INT16_C( 41), INT16_C( 55), INT16_C( 46), INT16_C( 173), INT16_C( 68), INT16_C( 189), INT16_C( 51), INT16_C( 64), INT16_C( 132), INT16_C( 97), INT16_C( 44), INT16_C( 157), INT16_C( 131), INT16_C( 177), INT16_C( 89), INT16_C( 105), INT16_C( 61), INT16_C( 140), INT16_C( 41), INT16_C( 100), INT16_C( 36), INT16_C( 200) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 89), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 23), UINT8_C( 16), UINT8_C(154), UINT8_C(180), UINT8_C(248), UINT8_C(125), UINT8_C(249), UINT8_C( 3), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C(209), UINT8_C(134), UINT8_C( 41), UINT8_C( 55), UINT8_C( 46), UINT8_C(173), UINT8_C( 68), UINT8_C(189), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 51), UINT8_C( 64), UINT8_C(132), UINT8_C( 97), UINT8_C( 44), UINT8_C(157), UINT8_C(131), UINT8_C(177), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 89), UINT8_C(105), UINT8_C( 61), UINT8_C(140), UINT8_C( 41), UINT8_C(100), UINT8_C( 36), UINT8_C(200) } }, { { -INT16_C( 4009), INT16_C( 9225), -INT16_C( 652), -INT16_C( 3963), INT16_C( 25385), INT16_C( 20109), INT16_C( 12006), INT16_C( 15103), INT16_C( 14216), INT16_C( 2724), INT16_C( 17524), -INT16_C( 8041), -INT16_C( 12178), -INT16_C( 9404), INT16_C( 26356), INT16_C( 19364), -INT16_C( 21162), -INT16_C( 13713), -INT16_C( 2902), -INT16_C( 11078), INT16_C( 18519), INT16_C( 15650), INT16_C( 8822), -INT16_C( 392), INT16_C( 7257), -INT16_C( 13047), -INT16_C( 24480), -INT16_C( 12627), -INT16_C( 3472), INT16_C( 26026), INT16_C( 20056), -INT16_C( 20560) }, { INT16_C( 32), INT16_C( 165), INT16_C( 52), INT16_C( 108), INT16_C( 156), INT16_C( 242), INT16_C( 33), INT16_C( 23), INT16_C( 250), INT16_C( 158), INT16_C( 146), INT16_C( 10), INT16_C( 22), INT16_C( 220), INT16_C( 32), INT16_C( 95), INT16_C( 5), INT16_C( 84), INT16_C( 126), INT16_C( 181), INT16_C( 106), INT16_C( 216), INT16_C( 152), INT16_C( 201), INT16_C( 212), INT16_C( 44), INT16_C( 211), INT16_C( 234), INT16_C( 166), INT16_C( 78), INT16_C( 82), INT16_C( 6) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 32), UINT8_C(165), UINT8_C( 52), UINT8_C(108), UINT8_C(156), UINT8_C(242), UINT8_C( 33), UINT8_C( 23), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C(250), UINT8_C(158), UINT8_C(146), UINT8_C( 10), UINT8_C( 22), UINT8_C(220), UINT8_C( 32), UINT8_C( 95), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 5), UINT8_C( 84), UINT8_C(126), UINT8_C(181), UINT8_C(106), UINT8_C(216), UINT8_C(152), UINT8_C(201), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C(212), UINT8_C( 44), UINT8_C(211), UINT8_C(234), UINT8_C(166), UINT8_C( 78), UINT8_C( 82), UINT8_C( 6) } }, { { -INT16_C( 19625), -INT16_C( 28581), INT16_C( 6961), INT16_C( 19525), -INT16_C( 4987), INT16_C( 4388), INT16_C( 5253), INT16_C( 6106), INT16_C( 16872), INT16_C( 20036), INT16_C( 31508), -INT16_C( 456), -INT16_C( 479), -INT16_C( 6067), -INT16_C( 1200), -INT16_C( 22546), INT16_C( 18862), -INT16_C( 8393), INT16_C( 31845), -INT16_C( 5589), INT16_C( 20585), -INT16_C( 4357), -INT16_C( 10908), INT16_C( 19461), INT16_C( 18710), INT16_C( 11162), -INT16_C( 11580), -INT16_C( 6615), INT16_C( 30416), INT16_C( 8654), -INT16_C( 17295), INT16_C( 8136) }, { INT16_C( 0), INT16_C( 107), INT16_C( 42), INT16_C( 229), INT16_C( 81), INT16_C( 222), INT16_C( 217), INT16_C( 61), INT16_C( 196), INT16_C( 231), INT16_C( 145), INT16_C( 103), INT16_C( 155), INT16_C( 121), INT16_C( 80), INT16_C( 93), INT16_C( 152), INT16_C( 205), INT16_C( 30), INT16_C( 61), INT16_C( 134), INT16_C( 149), INT16_C( 70), INT16_C( 129), INT16_C( 58), INT16_C( 161), INT16_C( 53), INT16_C( 212), INT16_C( 144), INT16_C( 40), INT16_C( 230), INT16_C( 49) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C(107), UINT8_C( 42), UINT8_C(229), UINT8_C( 81), UINT8_C(222), UINT8_C(217), UINT8_C( 61), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(196), UINT8_C(231), UINT8_C(145), UINT8_C(103), UINT8_C(155), UINT8_C(121), UINT8_C( 80), UINT8_C( 93), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C(152), UINT8_C(205), UINT8_C( 30), UINT8_C( 61), UINT8_C(134), UINT8_C(149), UINT8_C( 70), UINT8_C(129), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 58), UINT8_C(161), UINT8_C( 53), UINT8_C(212), UINT8_C(144), UINT8_C( 40), UINT8_C(230), UINT8_C( 49) } }, { { INT16_C( 20094), INT16_C( 16894), -INT16_C( 20372), -INT16_C( 9346), -INT16_C( 26314), -INT16_C( 27280), INT16_C( 17375), -INT16_C( 5609), INT16_C( 32637), INT16_C( 18827), -INT16_C( 27723), -INT16_C( 31459), INT16_C( 27427), INT16_C( 941), INT16_C( 13137), -INT16_C( 12236), INT16_C( 12929), -INT16_C( 4847), -INT16_C( 28701), INT16_C( 6600), INT16_C( 14376), INT16_C( 2223), -INT16_C( 14725), -INT16_C( 1550), INT16_C( 32069), -INT16_C( 1470), INT16_C( 24592), INT16_C( 13184), INT16_C( 11723), INT16_C( 7222), INT16_C( 27488), -INT16_C( 7700) }, { INT16_C( 253), INT16_C( 128), INT16_C( 150), INT16_C( 181), INT16_C( 73), INT16_C( 74), INT16_C( 175), INT16_C( 84), INT16_C( 134), INT16_C( 60), INT16_C( 207), INT16_C( 177), INT16_C( 165), INT16_C( 93), INT16_C( 186), INT16_C( 174), INT16_C( 13), INT16_C( 68), INT16_C( 200), INT16_C( 114), INT16_C( 182), INT16_C( 32), INT16_C( 0), INT16_C( 145), INT16_C( 196), INT16_C( 108), INT16_C( 60), INT16_C( 143), INT16_C( 235), INT16_C( 242), INT16_C( 43), INT16_C( 92) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C(253), UINT8_C(128), UINT8_C(150), UINT8_C(181), UINT8_C( 73), UINT8_C( 74), UINT8_C(175), UINT8_C( 84), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C(134), UINT8_C( 60), UINT8_C(207), UINT8_C(177), UINT8_C(165), UINT8_C( 93), UINT8_C(186), UINT8_C(174), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 13), UINT8_C( 68), UINT8_C(200), UINT8_C(114), UINT8_C(182), UINT8_C( 32), UINT8_C( 0), UINT8_C(145), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C(196), UINT8_C(108), UINT8_C( 60), UINT8_C(143), UINT8_C(235), UINT8_C(242), UINT8_C( 43), UINT8_C( 92) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_packus_epi16(a, b); simde_test_x86_assert_equal_u8x64(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_packus_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[16]; const int32_t b[16]; const uint16_t r[32]; } test_vec[] = { { { INT32_C( 32838), INT32_C( 707), INT32_C( 18249), INT32_C( 43411), INT32_C( 33031), INT32_C( 48266), INT32_C( 46389), INT32_C( 30506), INT32_C( 19447), INT32_C( 16717), INT32_C( 9608), INT32_C( 32719), INT32_C( 16128), INT32_C( 507), INT32_C( 9398), INT32_C( 24219) }, { -INT32_C( 374762927), -INT32_C( 768936372), INT32_C( 1090040461), -INT32_C( 926955570), INT32_C( 1560788893), -INT32_C( 1621228982), -INT32_C( 1144842958), INT32_C( 1192845046), INT32_C( 1009828848), INT32_C( 1175411385), -INT32_C( 611907827), INT32_C( 1805862606), INT32_C( 1355393542), -INT32_C( 554752084), INT32_C( 848933692), INT32_C( 41595665) }, { UINT16_C(32838), UINT16_C( 707), UINT16_C(18249), UINT16_C(43411), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C(33031), UINT16_C(48266), UINT16_C(46389), UINT16_C(30506), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C(19447), UINT16_C(16717), UINT16_C( 9608), UINT16_C(32719), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C(16128), UINT16_C( 507), UINT16_C( 9398), UINT16_C(24219), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { INT32_C( 12094), INT32_C( 4726), INT32_C( 8941), INT32_C( 18830), INT32_C( 59545), INT32_C( 48070), INT32_C( 19949), INT32_C( 35151), INT32_C( 6072), INT32_C( 12329), INT32_C( 28498), INT32_C( 58296), INT32_C( 46795), INT32_C( 6001), INT32_C( 1124), INT32_C( 55437) }, { INT32_C( 502220354), -INT32_C( 1605560204), -INT32_C( 703619026), -INT32_C( 1195784320), -INT32_C( 194083815), INT32_C( 118218517), INT32_C( 51081277), INT32_C( 1725667620), INT32_C( 1401146079), INT32_C( 301191650), -INT32_C( 236518799), -INT32_C( 475422518), INT32_C( 970463012), INT32_C( 876667894), INT32_C( 2000112723), -INT32_C( 992144411) }, { UINT16_C(12094), UINT16_C( 4726), UINT16_C( 8941), UINT16_C(18830), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(59545), UINT16_C(48070), UINT16_C(19949), UINT16_C(35151), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 6072), UINT16_C(12329), UINT16_C(28498), UINT16_C(58296), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C(46795), UINT16_C( 6001), UINT16_C( 1124), UINT16_C(55437), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { INT32_C( 49175), INT32_C( 41937), INT32_C( 55188), INT32_C( 31931), INT32_C( 19637), INT32_C( 51840), INT32_C( 10049), INT32_C( 43243), INT32_C( 45672), INT32_C( 6997), INT32_C( 18930), INT32_C( 32197), INT32_C( 47049), INT32_C( 45697), INT32_C( 52185), INT32_C( 24947) }, { -INT32_C( 736896057), INT32_C( 99575828), INT32_C( 2035212882), -INT32_C( 789179505), -INT32_C( 24658035), INT32_C( 162531336), -INT32_C( 1395356982), INT32_C( 353191758), INT32_C( 921313570), -INT32_C( 616834679), INT32_C( 1263897019), INT32_C( 689654684), INT32_C( 321364491), INT32_C( 1948047530), -INT32_C( 1340018590), INT32_C( 1506160183) }, { UINT16_C(49175), UINT16_C(41937), UINT16_C(55188), UINT16_C(31931), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C(19637), UINT16_C(51840), UINT16_C(10049), UINT16_C(43243), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C(45672), UINT16_C( 6997), UINT16_C(18930), UINT16_C(32197), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C(47049), UINT16_C(45697), UINT16_C(52185), UINT16_C(24947), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { INT32_C( 55439), INT32_C( 17844), INT32_C( 61328), INT32_C( 24345), INT32_C( 63347), INT32_C( 31339), INT32_C( 46891), INT32_C( 2321), INT32_C( 10977), INT32_C( 48751), INT32_C( 62382), INT32_C( 63314), INT32_C( 8430), INT32_C( 54682), INT32_C( 41100), INT32_C( 22441) }, { -INT32_C( 1451062722), -INT32_C( 1100484320), -INT32_C( 1682893327), -INT32_C( 460127012), INT32_C( 503611849), -INT32_C( 1040998693), INT32_C( 442597476), INT32_C( 1534200349), -INT32_C( 1257966443), -INT32_C( 697078555), INT32_C( 1584539009), -INT32_C( 230554327), INT32_C( 1645299334), INT32_C( 1210254564), -INT32_C( 1570536060), INT32_C( 620615055) }, { UINT16_C(55439), UINT16_C(17844), UINT16_C(61328), UINT16_C(24345), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(63347), UINT16_C(31339), UINT16_C(46891), UINT16_C( 2321), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C(10977), UINT16_C(48751), UINT16_C(62382), UINT16_C(63314), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 8430), UINT16_C(54682), UINT16_C(41100), UINT16_C(22441), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { INT32_C( 44761), INT32_C( 61317), INT32_C( 39757), INT32_C( 33421), INT32_C( 47844), INT32_C( 9986), INT32_C( 7369), INT32_C( 833), INT32_C( 14258), INT32_C( 55590), INT32_C( 10868), INT32_C( 55724), INT32_C( 17299), INT32_C( 9835), INT32_C( 13634), INT32_C( 50233) }, { INT32_C( 100395934), INT32_C( 1356800546), -INT32_C( 1720036458), -INT32_C( 160291243), INT32_C( 1345914295), -INT32_C( 1770609509), -INT32_C( 724846119), -INT32_C( 627506116), INT32_C( 299930863), INT32_C( 1281474486), INT32_C( 1759959826), -INT32_C( 1184999422), -INT32_C( 116746402), INT32_C( 361726012), INT32_C( 1995004473), INT32_C( 1313899103) }, { UINT16_C(44761), UINT16_C(61317), UINT16_C(39757), UINT16_C(33421), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C(47844), UINT16_C( 9986), UINT16_C( 7369), UINT16_C( 833), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(14258), UINT16_C(55590), UINT16_C(10868), UINT16_C(55724), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C(17299), UINT16_C( 9835), UINT16_C(13634), UINT16_C(50233), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { INT32_C( 52576), INT32_C( 538), INT32_C( 40810), INT32_C( 46680), INT32_C( 39855), INT32_C( 7344), INT32_C( 63634), INT32_C( 13126), INT32_C( 769), INT32_C( 1285), INT32_C( 29604), INT32_C( 38442), INT32_C( 16946), INT32_C( 45406), INT32_C( 39337), INT32_C( 59340) }, { -INT32_C( 18166378), INT32_C( 50589672), -INT32_C( 1787320482), INT32_C( 36479395), -INT32_C( 1841013126), -INT32_C( 1119640768), INT32_C( 1750527124), INT32_C( 1917788892), -INT32_C( 663733520), -INT32_C( 1998818519), -INT32_C( 1122151654), INT32_C( 1858095604), -INT32_C( 402586457), INT32_C( 1000686759), INT32_C( 228850481), INT32_C( 226489117) }, { UINT16_C(52576), UINT16_C( 538), UINT16_C(40810), UINT16_C(46680), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C(39855), UINT16_C( 7344), UINT16_C(63634), UINT16_C(13126), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 769), UINT16_C( 1285), UINT16_C(29604), UINT16_C(38442), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C(16946), UINT16_C(45406), UINT16_C(39337), UINT16_C(59340), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { INT32_C( 22246), INT32_C( 31966), INT32_C( 2361), INT32_C( 60791), INT32_C( 42453), INT32_C( 61153), INT32_C( 37627), INT32_C( 42144), INT32_C( 52219), INT32_C( 23879), INT32_C( 7014), INT32_C( 30728), INT32_C( 4893), INT32_C( 52225), INT32_C( 64094), INT32_C( 57247) }, { -INT32_C( 861234556), INT32_C( 1227485555), -INT32_C( 345731215), -INT32_C( 1016894355), -INT32_C( 1596554935), INT32_C( 40687487), INT32_C( 1241369299), INT32_C( 1294507209), -INT32_C( 1457860042), INT32_C( 888292291), INT32_C( 1075861203), INT32_C( 184779714), -INT32_C( 2069112572), -INT32_C( 2088364112), -INT32_C( 1412660254), INT32_C( 1442378783) }, { UINT16_C(22246), UINT16_C(31966), UINT16_C( 2361), UINT16_C(60791), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C(42453), UINT16_C(61153), UINT16_C(37627), UINT16_C(42144), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C(52219), UINT16_C(23879), UINT16_C( 7014), UINT16_C(30728), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 4893), UINT16_C(52225), UINT16_C(64094), UINT16_C(57247), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, { { INT32_C( 35327), INT32_C( 10685), INT32_C( 2665), INT32_C( 25878), INT32_C( 62953), INT32_C( 47992), INT32_C( 4966), INT32_C( 65128), INT32_C( 51079), INT32_C( 41456), INT32_C( 33707), INT32_C( 2792), INT32_C( 23807), INT32_C( 13591), INT32_C( 62280), INT32_C( 19697) }, { INT32_C( 1897101336), -INT32_C( 569244740), INT32_C( 560053852), INT32_C( 36391551), INT32_C( 1583229468), INT32_C( 1553167777), -INT32_C( 833626894), -INT32_C( 1525006195), INT32_C( 1964453560), -INT32_C( 1907152591), INT32_C( 1739568615), INT32_C( 459922431), -INT32_C( 1485191163), INT32_C( 805506109), INT32_C( 1979601896), INT32_C( 1276844179) }, { UINT16_C(35327), UINT16_C(10685), UINT16_C( 2665), UINT16_C(25878), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C(62953), UINT16_C(47992), UINT16_C( 4966), UINT16_C(65128), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C(51079), UINT16_C(41456), UINT16_C(33707), UINT16_C( 2792), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C(23807), UINT16_C(13591), UINT16_C(62280), UINT16_C(19697), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_packus_epi32(a, b); simde_test_x86_assert_equal_u16x32(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_packus_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_packus_epi32) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/permutex2var.c000066400000000000000000031015421400333146700200720ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #define SIMDE_TEST_X86_AVX512_INSN permutex2var #include #include #include static int test_simde_mm_permutex2var_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[16]; const int16_t idx[16]; const int16_t b[16]; const int16_t r[16]; } test_vec[] = { { { INT16_C( 10061), -INT16_C( 7066), INT16_C( 16449), INT16_C( 17680), -INT16_C( 6998), INT16_C( 23576), -INT16_C( 4794), INT16_C( 22165) }, { INT16_C( 27840), -INT16_C( 18743), INT16_C( 13054), -INT16_C( 26623), INT16_C( 14386), -INT16_C( 31790), INT16_C( 31964), INT16_C( 10654) }, { INT16_C( 1188), -INT16_C( 6899), INT16_C( 7492), -INT16_C( 4310), INT16_C( 17153), INT16_C( 18507), -INT16_C( 8144), -INT16_C( 3938) }, { INT16_C( 10061), -INT16_C( 6899), -INT16_C( 8144), -INT16_C( 7066), INT16_C( 16449), INT16_C( 16449), INT16_C( 17153), -INT16_C( 8144) } }, { { INT16_C( 26700), INT16_C( 19366), -INT16_C( 22374), -INT16_C( 13085), -INT16_C( 18976), -INT16_C( 17328), -INT16_C( 4559), -INT16_C( 10779) }, { -INT16_C( 3086), INT16_C( 14010), -INT16_C( 6896), INT16_C( 4645), INT16_C( 28968), INT16_C( 22618), -INT16_C( 1967), -INT16_C( 25016) }, { -INT16_C( 4512), -INT16_C( 1303), -INT16_C( 13162), INT16_C( 30407), INT16_C( 6017), -INT16_C( 19918), INT16_C( 6149), -INT16_C( 2168) }, { -INT16_C( 22374), -INT16_C( 13162), INT16_C( 26700), -INT16_C( 17328), -INT16_C( 4512), -INT16_C( 13162), INT16_C( 19366), -INT16_C( 4512) } }, { { INT16_C( 16907), INT16_C( 6957), INT16_C( 21287), INT16_C( 20269), -INT16_C( 30780), INT16_C( 5543), -INT16_C( 4224), -INT16_C( 8013) }, { -INT16_C( 25378), INT16_C( 29915), -INT16_C( 23960), -INT16_C( 5653), INT16_C( 7609), -INT16_C( 16740), INT16_C( 9269), INT16_C( 16565) }, { -INT16_C( 7578), -INT16_C( 29092), -INT16_C( 30411), -INT16_C( 1571), -INT16_C( 31471), -INT16_C( 28401), -INT16_C( 15756), INT16_C( 21105) }, { -INT16_C( 15756), -INT16_C( 1571), -INT16_C( 7578), -INT16_C( 1571), -INT16_C( 29092), -INT16_C( 31471), INT16_C( 5543), INT16_C( 5543) } }, { { INT16_C( 19551), -INT16_C( 14393), -INT16_C( 19730), -INT16_C( 22607), INT16_C( 19919), INT16_C( 1381), INT16_C( 6769), -INT16_C( 10427) }, { -INT16_C( 24067), INT16_C( 12901), INT16_C( 17195), INT16_C( 15404), INT16_C( 15304), INT16_C( 15565), INT16_C( 16125), INT16_C( 23695) }, { INT16_C( 22155), INT16_C( 31012), -INT16_C( 11000), -INT16_C( 10463), -INT16_C( 31198), -INT16_C( 27684), INT16_C( 8865), -INT16_C( 24982) }, { -INT16_C( 27684), INT16_C( 1381), -INT16_C( 10463), -INT16_C( 31198), INT16_C( 22155), -INT16_C( 27684), -INT16_C( 27684), -INT16_C( 24982) } }, { { -INT16_C( 12093), -INT16_C( 4400), -INT16_C( 1005), -INT16_C( 9430), -INT16_C( 2249), INT16_C( 13591), -INT16_C( 22986), -INT16_C( 15983) }, { -INT16_C( 18948), INT16_C( 1082), INT16_C( 23434), -INT16_C( 21284), -INT16_C( 18206), -INT16_C( 31937), -INT16_C( 21798), -INT16_C( 25055) }, { -INT16_C( 3718), -INT16_C( 29300), -INT16_C( 18450), INT16_C( 9576), INT16_C( 32686), -INT16_C( 7078), -INT16_C( 5082), INT16_C( 8869) }, { INT16_C( 32686), -INT16_C( 18450), -INT16_C( 18450), INT16_C( 32686), -INT16_C( 1005), INT16_C( 8869), -INT16_C( 18450), -INT16_C( 4400) } }, { { -INT16_C( 8031), INT16_C( 11303), INT16_C( 827), INT16_C( 7640), INT16_C( 6331), -INT16_C( 26976), -INT16_C( 15934), INT16_C( 15412) }, { -INT16_C( 16205), -INT16_C( 24119), INT16_C( 12663), INT16_C( 9926), INT16_C( 8624), -INT16_C( 10742), -INT16_C( 20467), -INT16_C( 20743) }, { INT16_C( 8336), -INT16_C( 13350), -INT16_C( 19677), -INT16_C( 8471), -INT16_C( 30261), -INT16_C( 29324), -INT16_C( 22453), -INT16_C( 311) }, { INT16_C( 7640), -INT16_C( 13350), INT16_C( 15412), -INT16_C( 15934), -INT16_C( 8031), -INT16_C( 19677), -INT16_C( 29324), -INT16_C( 13350) } }, { { -INT16_C( 28055), -INT16_C( 8033), INT16_C( 26051), INT16_C( 29446), INT16_C( 4486), -INT16_C( 27830), INT16_C( 17345), INT16_C( 20802) }, { INT16_C( 7267), -INT16_C( 31204), INT16_C( 1487), -INT16_C( 26012), -INT16_C( 9841), -INT16_C( 9689), -INT16_C( 3967), -INT16_C( 5416) }, { INT16_C( 30594), INT16_C( 17867), -INT16_C( 11812), INT16_C( 25529), INT16_C( 994), -INT16_C( 23562), INT16_C( 14406), -INT16_C( 22028) }, { INT16_C( 29446), INT16_C( 994), -INT16_C( 22028), INT16_C( 4486), -INT16_C( 22028), INT16_C( 20802), -INT16_C( 8033), INT16_C( 30594) } }, { { INT16_C( 4437), INT16_C( 9263), -INT16_C( 27882), -INT16_C( 23105), -INT16_C( 6548), -INT16_C( 4481), INT16_C( 22487), INT16_C( 23000) }, { -INT16_C( 23602), -INT16_C( 21601), INT16_C( 22645), INT16_C( 22286), INT16_C( 1115), -INT16_C( 24069), -INT16_C( 4291), -INT16_C( 28086) }, { INT16_C( 30976), INT16_C( 6070), INT16_C( 29964), INT16_C( 31164), INT16_C( 15452), INT16_C( 13159), INT16_C( 16275), INT16_C( 25228) }, { INT16_C( 16275), INT16_C( 25228), -INT16_C( 4481), INT16_C( 16275), INT16_C( 31164), INT16_C( 31164), INT16_C( 13159), INT16_C( 29964) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i idx = simde_mm_loadu_epi16(test_vec[i].idx); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_permutex2var_epi16(a, idx, b); simde_test_x86_assert_equal_i16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i idx = simde_test_x86_random_i16x8(); simde__m128i b = simde_test_x86_random_i16x8(); simde__m128i r = simde_mm_permutex2var_epi16(a, idx, b); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_mask_permutex2var_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[16]; const simde__mmask8 k; const int16_t idx[16]; const int16_t b[16]; const int16_t r[16]; } test_vec[] = { { { INT16_C( 11235), INT16_C( 22541), INT16_C( 7043), -INT16_C( 8529), -INT16_C( 21985), INT16_C( 23679), -INT16_C( 13926), -INT16_C( 25874) }, UINT8_C( 66), { -INT16_C( 20059), INT16_C( 6735), -INT16_C( 14226), -INT16_C( 21898), -INT16_C( 22225), INT16_C( 28221), -INT16_C( 24778), INT16_C( 24913) }, { -INT16_C( 22100), -INT16_C( 14363), -INT16_C( 15527), INT16_C( 999), INT16_C( 17219), INT16_C( 3229), INT16_C( 14386), -INT16_C( 10417) }, { INT16_C( 11235), -INT16_C( 10417), INT16_C( 7043), -INT16_C( 8529), -INT16_C( 21985), INT16_C( 23679), -INT16_C( 13926), -INT16_C( 25874) } }, { { -INT16_C( 24855), INT16_C( 22513), INT16_C( 26726), -INT16_C( 27391), INT16_C( 16145), INT16_C( 18179), INT16_C( 21982), -INT16_C( 29783) }, UINT8_C(254), { INT16_C( 21134), INT16_C( 20823), INT16_C( 23353), INT16_C( 32148), -INT16_C( 24072), INT16_C( 12463), -INT16_C( 30992), -INT16_C( 29158) }, { INT16_C( 29047), -INT16_C( 8204), -INT16_C( 30349), -INT16_C( 19727), INT16_C( 14476), -INT16_C( 7792), INT16_C( 7137), INT16_C( 28640) }, { -INT16_C( 24855), -INT16_C( 29783), -INT16_C( 8204), INT16_C( 16145), INT16_C( 29047), INT16_C( 28640), -INT16_C( 24855), -INT16_C( 30349) } }, { { INT16_C( 14190), -INT16_C( 22591), INT16_C( 21906), -INT16_C( 29916), -INT16_C( 11274), -INT16_C( 6469), -INT16_C( 10919), -INT16_C( 11916) }, UINT8_C( 71), { -INT16_C( 20376), -INT16_C( 3654), INT16_C( 27809), -INT16_C( 9602), INT16_C( 24572), INT16_C( 6331), INT16_C( 11071), INT16_C( 30598) }, { INT16_C( 11756), INT16_C( 16649), -INT16_C( 27566), INT16_C( 9528), INT16_C( 7760), INT16_C( 9599), INT16_C( 20627), -INT16_C( 1172) }, { INT16_C( 11756), -INT16_C( 27566), -INT16_C( 22591), -INT16_C( 29916), -INT16_C( 11274), -INT16_C( 6469), -INT16_C( 1172), -INT16_C( 11916) } }, { { INT16_C( 9728), -INT16_C( 23827), INT16_C( 27538), -INT16_C( 28804), INT16_C( 14282), INT16_C( 2727), INT16_C( 11618), INT16_C( 20097) }, UINT8_C( 90), { -INT16_C( 28534), INT16_C( 8108), -INT16_C( 11576), -INT16_C( 6545), -INT16_C( 27567), -INT16_C( 24199), INT16_C( 29953), INT16_C( 10145) }, { INT16_C( 17250), -INT16_C( 12870), INT16_C( 18879), -INT16_C( 2153), -INT16_C( 24080), INT16_C( 7513), -INT16_C( 22494), -INT16_C( 21129) }, { INT16_C( 9728), -INT16_C( 24080), INT16_C( 27538), -INT16_C( 21129), -INT16_C( 23827), INT16_C( 2727), -INT16_C( 23827), INT16_C( 20097) } }, { { INT16_C( 9272), INT16_C( 204), INT16_C( 15350), INT16_C( 18406), INT16_C( 24783), -INT16_C( 12056), -INT16_C( 30251), INT16_C( 14328) }, UINT8_C(205), { INT16_C( 1202), -INT16_C( 1140), -INT16_C( 31845), INT16_C( 15851), INT16_C( 2269), -INT16_C( 31393), INT16_C( 3199), -INT16_C( 23619) }, { -INT16_C( 16936), INT16_C( 5017), -INT16_C( 8029), INT16_C( 995), -INT16_C( 19512), INT16_C( 21208), INT16_C( 4011), INT16_C( 23839) }, { INT16_C( 15350), INT16_C( 204), INT16_C( 995), INT16_C( 995), INT16_C( 24783), -INT16_C( 12056), INT16_C( 23839), INT16_C( 21208) } }, { { -INT16_C( 21741), -INT16_C( 20648), INT16_C( 17199), INT16_C( 3308), INT16_C( 19275), -INT16_C( 13423), INT16_C( 20056), INT16_C( 12398) }, UINT8_C( 11), { INT16_C( 17416), -INT16_C( 5970), -INT16_C( 19929), -INT16_C( 9551), INT16_C( 906), -INT16_C( 25978), -INT16_C( 7390), -INT16_C( 12883) }, { INT16_C( 23612), INT16_C( 32764), INT16_C( 2120), -INT16_C( 27445), -INT16_C( 26983), -INT16_C( 6164), INT16_C( 7172), INT16_C( 3314) }, { INT16_C( 23612), INT16_C( 7172), INT16_C( 17199), -INT16_C( 20648), INT16_C( 19275), -INT16_C( 13423), INT16_C( 20056), INT16_C( 12398) } }, { { -INT16_C( 24224), -INT16_C( 30731), -INT16_C( 22957), -INT16_C( 8862), -INT16_C( 5975), -INT16_C( 13449), INT16_C( 9675), INT16_C( 1944) }, UINT8_C(129), { -INT16_C( 30827), -INT16_C( 25142), INT16_C( 24146), -INT16_C( 6089), INT16_C( 7754), INT16_C( 26348), -INT16_C( 1775), -INT16_C( 19769) }, { INT16_C( 20206), -INT16_C( 27643), -INT16_C( 7504), -INT16_C( 26563), INT16_C( 2138), INT16_C( 32612), INT16_C( 27552), INT16_C( 13568) }, { -INT16_C( 13449), -INT16_C( 30731), -INT16_C( 22957), -INT16_C( 8862), -INT16_C( 5975), -INT16_C( 13449), INT16_C( 9675), INT16_C( 1944) } }, { { -INT16_C( 13582), INT16_C( 17619), INT16_C( 2600), INT16_C( 29228), INT16_C( 6440), INT16_C( 14809), -INT16_C( 24558), INT16_C( 235) }, UINT8_C(238), { -INT16_C( 27408), -INT16_C( 11361), INT16_C( 14289), -INT16_C( 9939), -INT16_C( 21349), INT16_C( 1913), -INT16_C( 20564), INT16_C( 30713) }, { INT16_C( 16002), -INT16_C( 29537), INT16_C( 4714), -INT16_C( 31820), -INT16_C( 4373), -INT16_C( 29803), -INT16_C( 27175), -INT16_C( 13703) }, { -INT16_C( 13582), -INT16_C( 13703), INT16_C( 17619), -INT16_C( 29803), INT16_C( 6440), -INT16_C( 29537), -INT16_C( 4373), -INT16_C( 29537) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i idx = simde_mm_loadu_epi16(test_vec[i].idx); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_mask_permutex2var_epi16(a, test_vec[i].k, idx, b); simde_test_x86_assert_equal_i16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i16x8(); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128i idx = simde_test_x86_random_i16x8(); simde__m128i b = simde_test_x86_random_i16x8(); simde__m128i r = simde_mm_mask_permutex2var_epi16(a, k, idx, b); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_mask2_permutex2var_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[16]; const int16_t idx[16]; const simde__mmask8 k; const int16_t b[16]; const int16_t r[16]; } test_vec[] = { { { INT16_C( 6185), -INT16_C( 1379), -INT16_C( 13744), -INT16_C( 5165), INT16_C( 19830), INT16_C( 8946), -INT16_C( 4868), INT16_C( 32409) }, { INT16_C( 14634), -INT16_C( 27638), -INT16_C( 16821), INT16_C( 13848), -INT16_C( 21076), -INT16_C( 31039), INT16_C( 14915), INT16_C( 27728) }, UINT8_C( 83), { INT16_C( 26605), -INT16_C( 18525), -INT16_C( 29126), -INT16_C( 30931), INT16_C( 20353), INT16_C( 28035), INT16_C( 489), INT16_C( 8855) }, { -INT16_C( 29126), -INT16_C( 29126), -INT16_C( 16821), INT16_C( 13848), INT16_C( 20353), -INT16_C( 31039), -INT16_C( 5165), INT16_C( 27728) } }, { { INT16_C( 11019), -INT16_C( 13715), -INT16_C( 23741), -INT16_C( 3722), -INT16_C( 924), -INT16_C( 25036), -INT16_C( 24500), INT16_C( 14833) }, { -INT16_C( 27641), INT16_C( 17136), INT16_C( 7459), -INT16_C( 23351), INT16_C( 19821), INT16_C( 22033), -INT16_C( 22450), INT16_C( 23160) }, UINT8_C(211), { INT16_C( 9445), -INT16_C( 30697), INT16_C( 2202), -INT16_C( 26644), -INT16_C( 30148), -INT16_C( 8989), INT16_C( 7548), INT16_C( 4324) }, { INT16_C( 14833), INT16_C( 11019), INT16_C( 7459), -INT16_C( 23351), -INT16_C( 8989), INT16_C( 22033), INT16_C( 7548), INT16_C( 9445) } }, { { INT16_C( 9741), INT16_C( 11059), -INT16_C( 10257), INT16_C( 15512), -INT16_C( 4376), -INT16_C( 28533), -INT16_C( 6810), INT16_C( 19300) }, { INT16_C( 31497), -INT16_C( 23597), -INT16_C( 16509), -INT16_C( 16582), INT16_C( 7753), -INT16_C( 14949), INT16_C( 32571), INT16_C( 18646) }, UINT8_C(165), { INT16_C( 29449), -INT16_C( 7787), -INT16_C( 12021), -INT16_C( 1591), INT16_C( 23132), INT16_C( 16735), -INT16_C( 21826), INT16_C( 14666) }, { -INT16_C( 7787), -INT16_C( 23597), INT16_C( 15512), -INT16_C( 16582), INT16_C( 7753), -INT16_C( 1591), INT16_C( 32571), -INT16_C( 6810) } }, { { -INT16_C( 4483), INT16_C( 15548), INT16_C( 31528), INT16_C( 18054), INT16_C( 19222), -INT16_C( 27007), -INT16_C( 13791), INT16_C( 11067) }, { -INT16_C( 12227), INT16_C( 18700), -INT16_C( 10846), -INT16_C( 446), -INT16_C( 24017), -INT16_C( 4800), -INT16_C( 30132), -INT16_C( 13786) }, UINT8_C(120), { INT16_C( 1762), INT16_C( 23969), -INT16_C( 6260), -INT16_C( 10124), INT16_C( 2665), INT16_C( 13305), INT16_C( 9285), INT16_C( 5744) }, { -INT16_C( 12227), INT16_C( 18700), -INT16_C( 10846), INT16_C( 31528), INT16_C( 5744), -INT16_C( 4483), INT16_C( 2665), -INT16_C( 13786) } }, { { -INT16_C( 18128), INT16_C( 1720), -INT16_C( 18692), -INT16_C( 25035), INT16_C( 9206), -INT16_C( 32278), -INT16_C( 19383), INT16_C( 11513) }, { -INT16_C( 25925), INT16_C( 18313), -INT16_C( 638), -INT16_C( 5345), INT16_C( 6407), INT16_C( 19742), -INT16_C( 29123), INT16_C( 28259) }, UINT8_C( 72), { INT16_C( 29723), -INT16_C( 11964), -INT16_C( 7511), -INT16_C( 13112), INT16_C( 18892), -INT16_C( 32490), INT16_C( 16962), -INT16_C( 8900) }, { -INT16_C( 25925), INT16_C( 18313), -INT16_C( 638), -INT16_C( 8900), INT16_C( 6407), INT16_C( 19742), -INT16_C( 32490), INT16_C( 28259) } }, { { -INT16_C( 31797), -INT16_C( 13985), INT16_C( 19107), -INT16_C( 17200), INT16_C( 7528), -INT16_C( 2311), INT16_C( 26496), -INT16_C( 25794) }, { -INT16_C( 32037), -INT16_C( 31379), INT16_C( 13668), INT16_C( 12625), INT16_C( 26494), -INT16_C( 16206), -INT16_C( 4439), INT16_C( 30109) }, UINT8_C(113), { INT16_C( 16124), INT16_C( 17940), -INT16_C( 12274), INT16_C( 11438), -INT16_C( 23094), INT16_C( 12716), INT16_C( 18659), INT16_C( 26125) }, { INT16_C( 11438), -INT16_C( 31379), INT16_C( 13668), INT16_C( 12625), INT16_C( 18659), INT16_C( 19107), INT16_C( 17940), INT16_C( 30109) } }, { { -INT16_C( 27979), -INT16_C( 5430), -INT16_C( 1053), INT16_C( 19304), INT16_C( 10413), -INT16_C( 25612), INT16_C( 27078), -INT16_C( 15859) }, { INT16_C( 8615), -INT16_C( 18935), -INT16_C( 18446), -INT16_C( 17182), -INT16_C( 29092), INT16_C( 16621), -INT16_C( 1322), -INT16_C( 29786) }, UINT8_C(140), { INT16_C( 30064), INT16_C( 27760), -INT16_C( 17443), INT16_C( 1561), -INT16_C( 19025), INT16_C( 6604), -INT16_C( 28990), -INT16_C( 7232) }, { INT16_C( 8615), -INT16_C( 18935), -INT16_C( 1053), -INT16_C( 1053), -INT16_C( 29092), INT16_C( 16621), -INT16_C( 1322), INT16_C( 27078) } }, { { INT16_C( 30359), INT16_C( 20437), -INT16_C( 28328), -INT16_C( 6229), -INT16_C( 5249), INT16_C( 31165), INT16_C( 18833), INT16_C( 518) }, { INT16_C( 30398), -INT16_C( 25490), -INT16_C( 30927), -INT16_C( 8030), INT16_C( 28220), -INT16_C( 263), -INT16_C( 17668), -INT16_C( 27422) }, UINT8_C( 48), { -INT16_C( 7241), INT16_C( 18825), INT16_C( 28814), INT16_C( 31432), INT16_C( 16685), INT16_C( 30219), INT16_C( 3399), -INT16_C( 17099) }, { INT16_C( 30398), -INT16_C( 25490), -INT16_C( 30927), -INT16_C( 8030), INT16_C( 16685), INT16_C( 18825), -INT16_C( 17668), -INT16_C( 27422) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i idx = simde_mm_loadu_epi16(test_vec[i].idx); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_mask2_permutex2var_epi16(a, idx, test_vec[i].k, b); simde_test_x86_assert_equal_i16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i idx = simde_test_x86_random_i16x8(); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128i b = simde_test_x86_random_i16x8(); simde__m128i r = simde_mm_mask2_permutex2var_epi16(a, idx, k, b); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_maskz_permutex2var_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask8 k; const int16_t a[16]; const int16_t idx[16]; const int16_t b[16]; const int16_t r[16]; } test_vec[] = { { UINT8_C(123), { -INT16_C( 4399), INT16_C( 29443), INT16_C( 16335), -INT16_C( 14111), -INT16_C( 8898), INT16_C( 8322), -INT16_C( 19599), INT16_C( 21719) }, { INT16_C( 8252), -INT16_C( 21277), INT16_C( 24040), INT16_C( 10969), INT16_C( 20584), INT16_C( 30321), INT16_C( 12165), INT16_C( 22257) }, { -INT16_C( 3043), -INT16_C( 4919), -INT16_C( 21964), INT16_C( 29365), INT16_C( 14215), -INT16_C( 1646), INT16_C( 27114), INT16_C( 9805) }, { INT16_C( 14215), -INT16_C( 14111), INT16_C( 0), -INT16_C( 4919), -INT16_C( 3043), INT16_C( 29443), INT16_C( 8322), INT16_C( 0) } }, { UINT8_C(138), { -INT16_C( 11728), -INT16_C( 29326), -INT16_C( 25428), -INT16_C( 778), INT16_C( 27662), INT16_C( 15745), -INT16_C( 10403), INT16_C( 21082) }, { INT16_C( 18336), INT16_C( 19078), -INT16_C( 1796), INT16_C( 13265), -INT16_C( 13686), -INT16_C( 3298), INT16_C( 17432), INT16_C( 18557) }, { -INT16_C( 4073), -INT16_C( 15402), -INT16_C( 13172), -INT16_C( 25921), INT16_C( 16440), -INT16_C( 27177), INT16_C( 12823), -INT16_C( 18457) }, { INT16_C( 0), -INT16_C( 10403), INT16_C( 0), -INT16_C( 29326), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 27177) } }, { UINT8_C(121), { INT16_C( 365), INT16_C( 25973), -INT16_C( 22318), -INT16_C( 25105), -INT16_C( 7226), INT16_C( 2997), -INT16_C( 672), INT16_C( 20514) }, { -INT16_C( 6701), -INT16_C( 24611), INT16_C( 30628), -INT16_C( 6953), INT16_C( 27983), -INT16_C( 32261), -INT16_C( 19884), -INT16_C( 15622) }, { INT16_C( 28595), -INT16_C( 31449), INT16_C( 5911), -INT16_C( 8670), -INT16_C( 10246), INT16_C( 23273), INT16_C( 3029), -INT16_C( 22357) }, { -INT16_C( 25105), INT16_C( 0), INT16_C( 0), INT16_C( 20514), -INT16_C( 22357), -INT16_C( 8670), -INT16_C( 7226), INT16_C( 0) } }, { UINT8_C(240), { INT16_C( 18568), -INT16_C( 108), INT16_C( 30751), -INT16_C( 29618), -INT16_C( 12429), INT16_C( 9697), -INT16_C( 23607), INT16_C( 14552) }, { INT16_C( 24010), -INT16_C( 7856), INT16_C( 11904), INT16_C( 22491), INT16_C( 13847), INT16_C( 8748), -INT16_C( 10783), INT16_C( 26898) }, { -INT16_C( 23011), INT16_C( 15464), -INT16_C( 18658), -INT16_C( 28215), -INT16_C( 21882), INT16_C( 20662), -INT16_C( 29107), INT16_C( 6024) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 14552), -INT16_C( 21882), -INT16_C( 108), INT16_C( 30751) } }, { UINT8_C(235), { -INT16_C( 1576), INT16_C( 1643), -INT16_C( 15404), INT16_C( 2589), INT16_C( 16367), -INT16_C( 15125), INT16_C( 21585), -INT16_C( 2079) }, { INT16_C( 7869), INT16_C( 29717), -INT16_C( 22809), -INT16_C( 28166), INT16_C( 19036), -INT16_C( 5410), -INT16_C( 2605), -INT16_C( 21546) }, { INT16_C( 16878), -INT16_C( 15438), -INT16_C( 12540), -INT16_C( 2867), -INT16_C( 18161), INT16_C( 24760), -INT16_C( 26099), -INT16_C( 13736) }, { INT16_C( 24760), -INT16_C( 15125), INT16_C( 0), -INT16_C( 12540), INT16_C( 0), -INT16_C( 26099), INT16_C( 2589), INT16_C( 21585) } }, { UINT8_C(184), { INT16_C( 15981), INT16_C( 5279), INT16_C( 12345), -INT16_C( 31888), INT16_C( 23310), INT16_C( 854), INT16_C( 561), INT16_C( 29426) }, { -INT16_C( 19020), -INT16_C( 31881), INT16_C( 27522), INT16_C( 15250), -INT16_C( 3293), -INT16_C( 17079), INT16_C( 4939), -INT16_C( 18315) }, { INT16_C( 5202), -INT16_C( 29748), INT16_C( 15684), INT16_C( 21006), INT16_C( 26008), -INT16_C( 13994), INT16_C( 18535), INT16_C( 6971) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 12345), -INT16_C( 31888), -INT16_C( 29748), INT16_C( 0), INT16_C( 854) } }, { UINT8_C(253), { -INT16_C( 24910), INT16_C( 7551), -INT16_C( 17615), INT16_C( 9281), -INT16_C( 508), INT16_C( 5999), INT16_C( 10100), -INT16_C( 30615) }, { -INT16_C( 2828), INT16_C( 12749), INT16_C( 7939), INT16_C( 26825), -INT16_C( 28043), -INT16_C( 16945), -INT16_C( 5427), -INT16_C( 32582) }, { INT16_C( 14984), -INT16_C( 18019), -INT16_C( 8459), -INT16_C( 1571), INT16_C( 19677), INT16_C( 20752), INT16_C( 31348), INT16_C( 26841) }, { -INT16_C( 508), INT16_C( 0), INT16_C( 9281), -INT16_C( 18019), INT16_C( 5999), INT16_C( 26841), INT16_C( 20752), -INT16_C( 8459) } }, { UINT8_C(110), { -INT16_C( 26202), -INT16_C( 14735), -INT16_C( 9886), -INT16_C( 3013), -INT16_C( 1624), -INT16_C( 27967), INT16_C( 16819), -INT16_C( 4837) }, { -INT16_C( 11041), -INT16_C( 16926), -INT16_C( 9294), -INT16_C( 358), -INT16_C( 5140), INT16_C( 26226), -INT16_C( 9531), INT16_C( 27604) }, { INT16_C( 18035), -INT16_C( 10959), INT16_C( 27935), -INT16_C( 14135), -INT16_C( 29850), INT16_C( 6490), INT16_C( 30156), -INT16_C( 21753) }, { INT16_C( 0), -INT16_C( 9886), -INT16_C( 9886), INT16_C( 27935), INT16_C( 0), -INT16_C( 9886), -INT16_C( 27967), INT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i idx = simde_mm_loadu_epi16(test_vec[i].idx); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_maskz_permutex2var_epi16(test_vec[i].k, a, idx, b); simde_test_x86_assert_equal_i16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i idx = simde_test_x86_random_i16x8(); simde__m128i b = simde_test_x86_random_i16x8(); simde__m128i r = simde_mm_maskz_permutex2var_epi16(k, a, idx, b); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_permutex2var_epi32(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const int32_t a[8]; const int32_t idx[8]; const int32_t b[8]; const int32_t r[8]; } test_vec[] = { { { -INT32_C( 60167862), -INT32_C( 1309015099), -INT32_C( 1273532945), -INT32_C( 1155536057) }, { INT32_C( 1368412465), INT32_C( 605641406), -INT32_C( 1321372699), INT32_C( 861750505) }, { -INT32_C( 214972882), -INT32_C( 1197200951), -INT32_C( 563299434), -INT32_C( 661025626) }, { -INT32_C( 1309015099), -INT32_C( 563299434), -INT32_C( 1197200951), -INT32_C( 1309015099) } }, { { -INT32_C( 1691801123), INT32_C( 1757364867), -INT32_C( 1642398539), INT32_C( 1875998529) }, { INT32_C( 107085885), -INT32_C( 1061222870), INT32_C( 1738419137), -INT32_C( 1807796297) }, { -INT32_C( 466655135), INT32_C( 1615720106), INT32_C( 754870251), INT32_C( 463196382) }, { INT32_C( 1615720106), -INT32_C( 1642398539), INT32_C( 1757364867), INT32_C( 463196382) } }, { { -INT32_C( 98435632), -INT32_C( 994320381), -INT32_C( 1020569332), -INT32_C( 229086319) }, { INT32_C( 2127988691), INT32_C( 1641948022), INT32_C( 1754127498), INT32_C( 2105747628) }, { INT32_C( 678929701), -INT32_C( 1846791547), INT32_C( 492115852), INT32_C( 1443868034) }, { -INT32_C( 229086319), INT32_C( 492115852), -INT32_C( 1020569332), INT32_C( 678929701) } }, { { -INT32_C( 1428888268), -INT32_C( 1827950071), INT32_C( 1006409870), -INT32_C( 440893504) }, { -INT32_C( 1441976540), -INT32_C( 298059422), -INT32_C( 1827958767), INT32_C( 1927879229) }, { INT32_C( 136101375), -INT32_C( 23320465), -INT32_C( 2126931776), INT32_C( 1013379351) }, { INT32_C( 136101375), INT32_C( 1006409870), -INT32_C( 1827950071), -INT32_C( 23320465) } }, { { -INT32_C( 2098826208), INT32_C( 2121277805), -INT32_C( 284001358), -INT32_C( 1788740715) }, { INT32_C( 681410233), INT32_C( 1713781158), -INT32_C( 370712623), INT32_C( 1898270288) }, { INT32_C( 804457410), -INT32_C( 558996436), INT32_C( 1976484063), INT32_C( 1946824635) }, { INT32_C( 2121277805), INT32_C( 1976484063), INT32_C( 2121277805), -INT32_C( 2098826208) } }, { { INT32_C( 1402841005), -INT32_C( 1296383007), INT32_C( 1939579171), -INT32_C( 1310408465) }, { -INT32_C( 119416629), INT32_C( 467046204), INT32_C( 177251407), -INT32_C( 2122343724) }, { INT32_C( 601168962), INT32_C( 47550431), INT32_C( 544633136), -INT32_C( 36611535) }, { -INT32_C( 1310408465), INT32_C( 601168962), -INT32_C( 36611535), INT32_C( 601168962) } }, { { INT32_C( 1861595698), -INT32_C( 1869952191), INT32_C( 1151015536), -INT32_C( 138077515) }, { INT32_C( 354064950), INT32_C( 1511583529), -INT32_C( 1837461920), INT32_C( 462375912) }, { INT32_C( 1065977086), -INT32_C( 1076882609), -INT32_C( 486315218), -INT32_C( 1143289467) }, { -INT32_C( 486315218), -INT32_C( 1869952191), INT32_C( 1861595698), INT32_C( 1861595698) } }, { { -INT32_C( 1915685789), INT32_C( 1156049123), INT32_C( 1607885174), -INT32_C( 1434819156) }, { INT32_C( 954860521), INT32_C( 1173928471), -INT32_C( 1440154843), INT32_C( 677708484) }, { -INT32_C( 642435594), -INT32_C( 1809998818), -INT32_C( 1443630083), INT32_C( 1096052056) }, { INT32_C( 1156049123), INT32_C( 1096052056), -INT32_C( 1809998818), -INT32_C( 642435594) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i idx = simde_mm_loadu_epi32(test_vec[i].idx); simde__m128i b = simde_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_permutex2var_epi32(a, idx, b); simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i32x4(); simde__m128i idx = simde_test_x86_random_i32x4(); simde__m128i b = simde_test_x86_random_i32x4(); simde__m128i r = simde_mm_permutex2var_epi32(a, idx, b); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_mask_permutex2var_epi32(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const int32_t a[8]; const simde__mmask8 k; const int32_t idx[8]; const int32_t b[8]; const int32_t r[8]; } test_vec[] = { { { -INT32_C( 2005254543), INT32_C( 500003576), INT32_C( 851965293), -INT32_C( 312857353) }, UINT8_C( 97), { -INT32_C( 1417689585), -INT32_C( 676850460), INT32_C( 1966100743), -INT32_C( 471436891) }, { INT32_C( 1574661867), INT32_C( 818673723), -INT32_C( 349700673), INT32_C( 1716327511) }, { INT32_C( 1716327511), INT32_C( 500003576), INT32_C( 851965293), -INT32_C( 312857353) } }, { { -INT32_C( 1106129702), -INT32_C( 392775200), -INT32_C( 1336031734), INT32_C( 596853559) }, UINT8_C(177), { INT32_C( 1743552623), INT32_C( 1210522699), INT32_C( 1470042691), INT32_C( 741410143) }, { -INT32_C( 821235690), INT32_C( 1289417862), -INT32_C( 1803318703), -INT32_C( 1941592547) }, { -INT32_C( 1941592547), -INT32_C( 392775200), -INT32_C( 1336031734), INT32_C( 596853559) } }, { { INT32_C( 1928605991), -INT32_C( 1866785971), -INT32_C( 1914217939), INT32_C( 1991842144) }, UINT8_C( 9), { -INT32_C( 1165015355), -INT32_C( 1442063584), INT32_C( 96968798), INT32_C( 388781285) }, { INT32_C( 1667538504), -INT32_C( 1265568423), -INT32_C( 183231012), -INT32_C( 1661039914) }, { -INT32_C( 1265568423), -INT32_C( 1866785971), -INT32_C( 1914217939), -INT32_C( 1265568423) } }, { { -INT32_C( 262763056), -INT32_C( 946183832), -INT32_C( 406035967), -INT32_C( 16844618) }, UINT8_C(150), { INT32_C( 1475371362), INT32_C( 255108337), INT32_C( 1122314680), -INT32_C( 1257012952) }, { INT32_C( 958268375), INT32_C( 3859870), -INT32_C( 1447681615), -INT32_C( 2109754337) }, { -INT32_C( 262763056), -INT32_C( 946183832), -INT32_C( 262763056), -INT32_C( 16844618) } }, { { INT32_C( 114896661), -INT32_C( 1961554477), INT32_C( 1607400247), INT32_C( 1410654588) }, UINT8_C(228), { INT32_C( 394431794), -INT32_C( 372735289), -INT32_C( 334991048), -INT32_C( 536769871) }, { INT32_C( 1907624036), INT32_C( 413679389), -INT32_C( 292223219), INT32_C( 1322510620) }, { INT32_C( 114896661), -INT32_C( 1961554477), INT32_C( 114896661), INT32_C( 1410654588) } }, { { INT32_C( 1030116726), INT32_C( 270937816), INT32_C( 1375547296), INT32_C( 489815737) }, UINT8_C( 6), { INT32_C( 623153125), INT32_C( 1060256823), -INT32_C( 1168432687), INT32_C( 1227926004) }, { INT32_C( 1042378255), -INT32_C( 1008848236), INT32_C( 763178799), INT32_C( 1177852512) }, { INT32_C( 1030116726), INT32_C( 1177852512), INT32_C( 270937816), INT32_C( 489815737) } }, { { INT32_C( 1634424873), INT32_C( 1721802132), -INT32_C( 1306461250), -INT32_C( 1258532444) }, UINT8_C(191), { INT32_C( 1330901533), -INT32_C( 8513584), INT32_C( 778022036), INT32_C( 945333728) }, { -INT32_C( 1362314992), INT32_C( 1416376921), -INT32_C( 1527242925), INT32_C( 946056219) }, { INT32_C( 1416376921), INT32_C( 1634424873), -INT32_C( 1362314992), INT32_C( 1634424873) } }, { { INT32_C( 1854453406), INT32_C( 1634535117), -INT32_C( 1836003918), -INT32_C( 2083854222) }, UINT8_C(161), { -INT32_C( 906350185), -INT32_C( 1138995555), -INT32_C( 203964346), -INT32_C( 628027357) }, { -INT32_C( 1633222504), INT32_C( 978389357), -INT32_C( 2119310439), INT32_C( 1159868590) }, { INT32_C( 1159868590), INT32_C( 1634535117), -INT32_C( 1836003918), -INT32_C( 2083854222) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i idx = simde_mm_loadu_epi32(test_vec[i].idx); simde__m128i b = simde_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_mask_permutex2var_epi32(a, test_vec[i].k, idx, b); simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i32x4(); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128i idx = simde_test_x86_random_i32x4(); simde__m128i b = simde_test_x86_random_i32x4(); simde__m128i r = simde_mm_mask_permutex2var_epi32(a, k, idx, b); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_mask2_permutex2var_epi32(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const int32_t a[8]; const int32_t idx[8]; const simde__mmask8 k; const int32_t b[8]; const int32_t r[8]; } test_vec[] = { { { -INT32_C( 32629663), -INT32_C( 1329910934), INT32_C( 262378219), INT32_C( 988362146) }, { -INT32_C( 1562800075), INT32_C( 853355161), -INT32_C( 1145861619), INT32_C( 453039546) }, UINT8_C(241), { INT32_C( 979048719), INT32_C( 1730481364), -INT32_C( 469158737), -INT32_C( 1374075875) }, { INT32_C( 1730481364), INT32_C( 853355161), -INT32_C( 1145861619), INT32_C( 453039546) } }, { { INT32_C( 1195883549), INT32_C( 592738969), INT32_C( 64819245), INT32_C( 536147984) }, { -INT32_C( 430354415), INT32_C( 189628252), -INT32_C( 772778317), -INT32_C( 1216411238) }, UINT8_C(197), { INT32_C( 1079967430), INT32_C( 1668186451), INT32_C( 1450406238), -INT32_C( 1251437723) }, { INT32_C( 592738969), INT32_C( 189628252), INT32_C( 536147984), -INT32_C( 1216411238) } }, { { INT32_C( 1796296428), -INT32_C( 249619045), INT32_C( 378335245), INT32_C( 903627631) }, { -INT32_C( 1787413950), INT32_C( 452519100), -INT32_C( 1166972075), -INT32_C( 344925698) }, UINT8_C( 39), { -INT32_C( 1631431039), INT32_C( 1705751413), -INT32_C( 2083208641), -INT32_C( 658175330) }, { INT32_C( 378335245), -INT32_C( 1631431039), INT32_C( 1705751413), -INT32_C( 344925698) } }, { { INT32_C( 1687444096), -INT32_C( 1111904430), -INT32_C( 105090272), INT32_C( 1696638947) }, { INT32_C( 1929634557), -INT32_C( 707219563), INT32_C( 257469809), INT32_C( 937893303) }, UINT8_C(119), { INT32_C( 734632828), -INT32_C( 951351724), -INT32_C( 374651838), INT32_C( 1189548132) }, { -INT32_C( 951351724), -INT32_C( 951351724), -INT32_C( 1111904430), INT32_C( 937893303) } }, { { -INT32_C( 1025811949), -INT32_C( 550260686), INT32_C( 630604296), -INT32_C( 1499673302) }, { -INT32_C( 1127127704), INT32_C( 780410092), -INT32_C( 1005047968), INT32_C( 1376452415) }, UINT8_C( 89), { -INT32_C( 1769269787), -INT32_C( 1952552120), -INT32_C( 810171391), -INT32_C( 969450656) }, { -INT32_C( 1025811949), INT32_C( 780410092), -INT32_C( 1005047968), -INT32_C( 969450656) } }, { { INT32_C( 1219687468), -INT32_C( 1482104712), -INT32_C( 135893768), INT32_C( 1531983990) }, { -INT32_C( 1762534323), INT32_C( 1210159175), -INT32_C( 1273506220), INT32_C( 1568296753) }, UINT8_C( 67), { INT32_C( 230401324), -INT32_C( 1190829491), -INT32_C( 2144338616), INT32_C( 701401677) }, { -INT32_C( 1190829491), INT32_C( 701401677), -INT32_C( 1273506220), INT32_C( 1568296753) } }, { { INT32_C( 208692348), INT32_C( 1533065605), INT32_C( 529274064), -INT32_C( 1151145585) }, { -INT32_C( 607642226), -INT32_C( 946549121), INT32_C( 407421899), -INT32_C( 918415795) }, UINT8_C(122), { INT32_C( 1811928498), INT32_C( 1245469237), -INT32_C( 807838746), -INT32_C( 614624066) }, { -INT32_C( 607642226), -INT32_C( 614624066), INT32_C( 407421899), INT32_C( 1245469237) } }, { { INT32_C( 727398493), -INT32_C( 1879694644), -INT32_C( 2133062038), INT32_C( 66758224) }, { -INT32_C( 1318127237), INT32_C( 972794451), -INT32_C( 1006054394), -INT32_C( 979409560) }, UINT8_C(157), { INT32_C( 493482234), -INT32_C( 175638298), INT32_C( 2068121813), -INT32_C( 50968574) }, { INT32_C( 66758224), INT32_C( 972794451), INT32_C( 2068121813), INT32_C( 727398493) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i idx = simde_mm_loadu_epi32(test_vec[i].idx); simde__m128i b = simde_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_mask2_permutex2var_epi32(a, idx, test_vec[i].k, b); simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i32x4(); simde__m128i idx = simde_test_x86_random_i32x4(); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128i b = simde_test_x86_random_i32x4(); simde__m128i r = simde_mm_mask2_permutex2var_epi32(a, idx, k, b); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_maskz_permutex2var_epi32(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde__mmask8 k; const int32_t a[8]; const int32_t idx[8]; const int32_t b[8]; const int32_t r[8]; } test_vec[] = { { UINT8_C(183), { -INT32_C( 1570680921), -INT32_C( 1854511223), -INT32_C( 872947925), -INT32_C( 1782213468) }, { -INT32_C( 159653122), INT32_C( 1925935210), -INT32_C( 25934154), -INT32_C( 457871299) }, { INT32_C( 1233590208), -INT32_C( 1428488834), -INT32_C( 2122984996), INT32_C( 1679178342) }, { -INT32_C( 2122984996), -INT32_C( 872947925), -INT32_C( 2122984996), INT32_C( 0) } }, { UINT8_C( 29), { INT32_C( 42424977), INT32_C( 1824061734), -INT32_C( 559303059), -INT32_C( 2086760852) }, { INT32_C( 302114581), -INT32_C( 1813009471), INT32_C( 1543073824), INT32_C( 393764230) }, { -INT32_C( 585498697), INT32_C( 1699402232), -INT32_C( 196873336), -INT32_C( 1770528383) }, { INT32_C( 1699402232), INT32_C( 0), INT32_C( 42424977), -INT32_C( 196873336) } }, { UINT8_C(200), { INT32_C( 613067128), INT32_C( 138681752), INT32_C( 1938726678), INT32_C( 371893527) }, { -INT32_C( 1878128449), INT32_C( 1159230546), -INT32_C( 1715008329), -INT32_C( 60727933) }, { -INT32_C( 1642009850), INT32_C( 514221064), INT32_C( 462500868), -INT32_C( 1724728102) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 371893527) } }, { UINT8_C(196), { -INT32_C( 1273616064), INT32_C( 1315724098), -INT32_C( 2133719774), INT32_C( 1384566374) }, { INT32_C( 1381639662), INT32_C( 5667275), -INT32_C( 958762486), -INT32_C( 460688476) }, { -INT32_C( 560357220), INT32_C( 506267132), INT32_C( 1889468170), -INT32_C( 1144904243) }, { INT32_C( 0), INT32_C( 0), -INT32_C( 2133719774), INT32_C( 0) } }, { UINT8_C( 74), { -INT32_C( 1777005283), -INT32_C( 694151836), INT32_C( 1652189167), -INT32_C( 1828757775) }, { -INT32_C( 57745929), INT32_C( 151432202), INT32_C( 1893103435), INT32_C( 1455067449) }, { INT32_C( 65851295), -INT32_C( 723939867), -INT32_C( 449358860), -INT32_C( 1451805007) }, { INT32_C( 0), INT32_C( 1652189167), INT32_C( 0), -INT32_C( 694151836) } }, { UINT8_C( 19), { -INT32_C( 1306614523), INT32_C( 603793324), -INT32_C( 1889767938), -INT32_C( 164711641) }, { INT32_C( 752562591), INT32_C( 1562423306), INT32_C( 487524071), -INT32_C( 2093959043) }, { INT32_C( 154488413), INT32_C( 1949053558), -INT32_C( 972846689), -INT32_C( 608423364) }, { -INT32_C( 608423364), -INT32_C( 1889767938), INT32_C( 0), INT32_C( 0) } }, { UINT8_C( 99), { INT32_C( 1215170712), INT32_C( 774884136), -INT32_C( 1834201894), -INT32_C( 873451652) }, { -INT32_C( 1774061212), -INT32_C( 1355434714), -INT32_C( 353633352), INT32_C( 1330562743) }, { -INT32_C( 141050674), INT32_C( 1646642823), -INT32_C( 1879781102), INT32_C( 1683678208) }, { -INT32_C( 141050674), -INT32_C( 1879781102), INT32_C( 0), INT32_C( 0) } }, { UINT8_C(222), { INT32_C( 1342503579), INT32_C( 705213231), INT32_C( 1692595102), -INT32_C( 46976703) }, { -INT32_C( 1887163703), INT32_C( 564323919), -INT32_C( 1088343589), INT32_C( 647857803) }, { -INT32_C( 1334402688), -INT32_C( 220561836), -INT32_C( 1302872975), -INT32_C( 1213232402) }, { INT32_C( 0), -INT32_C( 1213232402), -INT32_C( 46976703), -INT32_C( 46976703) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i idx = simde_mm_loadu_epi32(test_vec[i].idx); simde__m128i b = simde_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_maskz_permutex2var_epi32(test_vec[i].k, a, idx, b); simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128i a = simde_test_x86_random_i32x4(); simde__m128i idx = simde_test_x86_random_i32x4(); simde__m128i b = simde_test_x86_random_i32x4(); simde__m128i r = simde_mm_maskz_permutex2var_epi32(k, a, idx, b); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_permutex2var_epi64(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const int64_t a[4]; const int64_t idx[4]; const int64_t b[4]; const int64_t r[4]; } test_vec[] = { { { -INT64_C( 782245236833897292), INT64_C( 5533326131068225049) }, { INT64_C( 3475608754009686260), -INT64_C( 3442841263988829549) }, { -INT64_C( 9090805672746713146), -INT64_C( 3290177641303930050) }, { -INT64_C( 782245236833897292), -INT64_C( 3290177641303930050) } }, { { -INT64_C( 4818757958397963472), INT64_C( 6688821353463612391) }, { -INT64_C( 2459225296319174666), INT64_C( 2688435622857082015) }, { -INT64_C( 6451348468142937712), INT64_C( 2677809093420955447) }, { -INT64_C( 6451348468142937712), INT64_C( 2677809093420955447) } }, { { INT64_C( 1774526528294708772), INT64_C( 6161175826539054950) }, { INT64_C( 50534417920201806), -INT64_C( 30690825137726145) }, { -INT64_C( 2382857786477046436), -INT64_C( 5330650724638817045) }, { -INT64_C( 2382857786477046436), -INT64_C( 5330650724638817045) } }, { { -INT64_C( 2587467944325606067), INT64_C( 4012668629107270868) }, { -INT64_C( 4591423301560208654), -INT64_C( 8674084366207385472) }, { -INT64_C( 5907116491427454325), INT64_C( 2209873427225726873) }, { -INT64_C( 5907116491427454325), -INT64_C( 2587467944325606067) } }, { { INT64_C( 3824910361258604926), -INT64_C( 8786315084476611249) }, { -INT64_C( 313363687371985633), -INT64_C( 8120524421268044796) }, { -INT64_C( 811220067518523693), -INT64_C( 5260611633037845636) }, { -INT64_C( 5260611633037845636), INT64_C( 3824910361258604926) } }, { { INT64_C( 7464810264407289147), -INT64_C( 722293234019661991) }, { INT64_C( 5788598675114417340), -INT64_C( 7249452481136569578) }, { -INT64_C( 4450453183076494477), INT64_C( 8569828625907242199) }, { INT64_C( 7464810264407289147), -INT64_C( 4450453183076494477) } }, { { -INT64_C( 4889734200370435327), INT64_C( 2664950865741933823) }, { -INT64_C( 2590917768572685087), INT64_C( 6717896597383231242) }, { INT64_C( 4436421935801438510), -INT64_C( 6190028595149276350) }, { INT64_C( 2664950865741933823), INT64_C( 4436421935801438510) } }, { { INT64_C( 6029756168639091405), INT64_C( 8426606836560986679) }, { -INT64_C( 4836907258824023196), -INT64_C( 2068621079329341994) }, { INT64_C( 8392284445376528091), INT64_C( 5088779378738368484) }, { INT64_C( 6029756168639091405), INT64_C( 8392284445376528091) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i a = simde_mm_loadu_epi64(test_vec[i].a); simde__m128i idx = simde_mm_loadu_epi64(test_vec[i].idx); simde__m128i b = simde_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_permutex2var_epi64(a, idx, b); simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i64x2(); simde__m128i idx = simde_test_x86_random_i64x2(); simde__m128i b = simde_test_x86_random_i64x2(); simde__m128i r = simde_mm_permutex2var_epi64(a, idx, b); simde_test_x86_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x2(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_mask_permutex2var_epi64(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const int64_t a[4]; const simde__mmask8 k; const int64_t idx[4]; const int64_t b[4]; const int64_t r[4]; } test_vec[] = { { { -INT64_C( 7543028266611185892), -INT64_C( 6539928377775161020) }, UINT8_C(191), { -INT64_C( 3442877394007045646), -INT64_C( 1761197723447303559) }, { INT64_C( 8569295065047442400), INT64_C( 8554558376476952183) }, { INT64_C( 8569295065047442400), -INT64_C( 6539928377775161020) } }, { { -INT64_C( 7007880133205117778), INT64_C( 5282052113479790949) }, UINT8_C(193), { INT64_C( 2571183598589112566), INT64_C( 1333379257793488621) }, { -INT64_C( 6895339577704746777), -INT64_C( 629287881478130459) }, { -INT64_C( 6895339577704746777), INT64_C( 5282052113479790949) } }, { { INT64_C( 1532027788627639107), -INT64_C( 1300860764580943486) }, UINT8_C(218), { -INT64_C( 2746164267613235415), INT64_C( 2686909180892833460) }, { -INT64_C( 7684157274447844865), INT64_C( 6309693911654415435) }, { INT64_C( 1532027788627639107), INT64_C( 1532027788627639107) } }, { { -INT64_C( 549658762569999776), INT64_C( 751617401140754846) }, UINT8_C(177), { INT64_C( 9037301963785364668), INT64_C( 5365480060888031373) }, { -INT64_C( 7768560529860264871), -INT64_C( 6665140635439236730) }, { -INT64_C( 549658762569999776), INT64_C( 751617401140754846) } }, { { INT64_C( 6650741120286514574), INT64_C( 6145137489363597389) }, UINT8_C(229), { -INT64_C( 8078660489535494443), INT64_C( 1057248810792104308) }, { -INT64_C( 4587134168409998371), INT64_C( 8105091314329867436) }, { INT64_C( 6145137489363597389), INT64_C( 6145137489363597389) } }, { { INT64_C( 404672579086695671), INT64_C( 909673452957537376) }, UINT8_C( 74), { -INT64_C( 4511812508023908740), INT64_C( 3501210561589082254) }, { INT64_C( 6244740346333471897), INT64_C( 6270296949807987690) }, { INT64_C( 404672579086695671), INT64_C( 6244740346333471897) } }, { { INT64_C( 8562873309074364071), -INT64_C( 7562480563159486104) }, UINT8_C( 80), { -INT64_C( 4215957994269908787), INT64_C( 5883942175734970222) }, { -INT64_C( 4405734771213616381), -INT64_C( 8016591215279425596) }, { INT64_C( 8562873309074364071), -INT64_C( 7562480563159486104) } }, { { -INT64_C( 6302891879744450248), INT64_C( 2406133987713427416) }, UINT8_C( 10), { INT64_C( 2742794905676649629), -INT64_C( 8021340572815360226) }, { INT64_C( 2239822800200823196), INT64_C( 2986553460037330970) }, { -INT64_C( 6302891879744450248), INT64_C( 2239822800200823196) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i a = simde_mm_loadu_epi64(test_vec[i].a); simde__m128i idx = simde_mm_loadu_epi64(test_vec[i].idx); simde__m128i b = simde_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_mask_permutex2var_epi64(a, test_vec[i].k, idx, b); simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i64x2(); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128i idx = simde_test_x86_random_i64x2(); simde__m128i b = simde_test_x86_random_i64x2(); simde__m128i r = simde_mm_mask_permutex2var_epi64(a, k, idx, b); simde_test_x86_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_mask2_permutex2var_epi64(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const int64_t a[4]; const int64_t idx[4]; const simde__mmask8 k; const int64_t b[4]; const int64_t r[4]; } test_vec[] = { { { -INT64_C( 2801679457820638586), INT64_C( 203457781086025645) }, { -INT64_C( 8465992442779047437), INT64_C( 4024974182712644857) }, UINT8_C( 22), { -INT64_C( 3238562174574543118), INT64_C( 7602529172184444603) }, { -INT64_C( 8465992442779047437), INT64_C( 203457781086025645) } }, { { -INT64_C( 551499208236670305), -INT64_C( 3815426458027164062) }, { -INT64_C( 8192545992161328225), INT64_C( 2581580203438172265) }, UINT8_C(127), { -INT64_C( 2808033006703115529), -INT64_C( 8285077360635282043) }, { -INT64_C( 8285077360635282043), -INT64_C( 3815426458027164062) } }, { { -INT64_C( 5237985640399732565), -INT64_C( 9067893895777765365) }, { INT64_C( 7896269098771221728), INT64_C( 8197072242104092715) }, UINT8_C( 90), { INT64_C( 6709484544437344034), -INT64_C( 407176718820260581) }, { INT64_C( 7896269098771221728), -INT64_C( 407176718820260581) } }, { { -INT64_C( 942853577198398063), INT64_C( 168650656867426083) }, { INT64_C( 8900291953104748673), INT64_C( 7556281339435314421) }, UINT8_C( 98), { INT64_C( 3545175210209323199), INT64_C( 459952681479450551) }, { INT64_C( 8900291953104748673), INT64_C( 168650656867426083) } }, { { INT64_C( 8740889894934972456), -INT64_C( 3565241159132368427) }, { INT64_C( 5126239391095558880), INT64_C( 8344196267645613631) }, UINT8_C( 19), { INT64_C( 5325955263526587186), INT64_C( 2630426068575063272) }, { INT64_C( 8740889894934972456), INT64_C( 2630426068575063272) } }, { { -INT64_C( 3507744900370893821), INT64_C( 1567971629313169188) }, { INT64_C( 7114019977990627672), INT64_C( 4311498696811549215) }, UINT8_C(249), { -INT64_C( 5284302775468798326), -INT64_C( 188395703719339652) }, { -INT64_C( 3507744900370893821), INT64_C( 4311498696811549215) } }, { { -INT64_C( 6914193595038182440), INT64_C( 2043249474898869469) }, { -INT64_C( 3695119587732165022), -INT64_C( 2756703242194365786) }, UINT8_C( 12), { -INT64_C( 8292187049094888139), -INT64_C( 7526857854022693356) }, { -INT64_C( 3695119587732165022), -INT64_C( 2756703242194365786) } }, { { -INT64_C( 7507583844654543396), -INT64_C( 8876469178637709174) }, { -INT64_C( 5074572008342984137), -INT64_C( 3959106160021032243) }, UINT8_C(250), { INT64_C( 5851539042646094582), INT64_C( 3609676646046562415) }, { -INT64_C( 5074572008342984137), -INT64_C( 8876469178637709174) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i a = simde_mm_loadu_epi64(test_vec[i].a); simde__m128i idx = simde_mm_loadu_epi64(test_vec[i].idx); simde__m128i b = simde_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_mask2_permutex2var_epi64(a, idx, test_vec[i].k, b); simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i64x2(); simde__m128i idx = simde_test_x86_random_i64x2(); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128i b = simde_test_x86_random_i64x2(); simde__m128i r = simde_mm_mask2_permutex2var_epi64(a, idx, k, b); simde_test_x86_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x2(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_maskz_permutex2var_epi64(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde__mmask8 k; const int64_t a[4]; const int64_t idx[4]; const int64_t b[4]; const int64_t r[4]; } test_vec[] = { { UINT8_C(184), { INT64_C( 8315058410344732703), INT64_C( 901241208001548668) }, { INT64_C( 6296066776003062554), INT64_C( 8835308594298569407) }, { INT64_C( 6282401473346000170), INT64_C( 291154986868543459) }, { INT64_C( 0), INT64_C( 0) } }, { UINT8_C(142), { -INT64_C( 7514742692686989864), INT64_C( 999918108119571300) }, { -INT64_C( 2734178881431429991), -INT64_C( 1219974020720227296) }, { -INT64_C( 6293045713091231811), -INT64_C( 6810499231030207433) }, { INT64_C( 0), -INT64_C( 7514742692686989864) } }, { UINT8_C(219), { -INT64_C( 1746409450162959260), INT64_C( 6326411064036443717) }, { INT64_C( 2713211558092857043), -INT64_C( 595091341681223774) }, { INT64_C( 3052980837539275932), INT64_C( 2184813489316978083) }, { INT64_C( 2184813489316978083), INT64_C( 3052980837539275932) } }, { UINT8_C(226), { INT64_C( 5535928024616898285), -INT64_C( 2942302515214895681) }, { INT64_C( 6733697284015169905), -INT64_C( 5979155501241897697) }, { -INT64_C( 4479478530377354925), -INT64_C( 94159367526435481) }, { INT64_C( 0), -INT64_C( 94159367526435481) } }, { UINT8_C( 27), { -INT64_C( 6020336063474840211), INT64_C( 880764458998994464) }, { INT64_C( 5553729540801348344), INT64_C( 843537205922819103) }, { INT64_C( 7535034862078513317), INT64_C( 1157573637160500584) }, { -INT64_C( 6020336063474840211), INT64_C( 1157573637160500584) } }, { UINT8_C( 73), { -INT64_C( 5112379643791361505), INT64_C( 3136020862278804999) }, { -INT64_C( 505004610110133566), INT64_C( 3529168518355353898) }, { INT64_C( 1107933924008385242), -INT64_C( 2640950955453990957) }, { INT64_C( 1107933924008385242), INT64_C( 0) } }, { UINT8_C(223), { INT64_C( 7985676457707466956), INT64_C( 6403468296150025186) }, { -INT64_C( 7219710123367693105), INT64_C( 2500500916436008031) }, { INT64_C( 4817617259592322304), -INT64_C( 2740966993719698912) }, { -INT64_C( 2740966993719698912), -INT64_C( 2740966993719698912) } }, { UINT8_C(165), { -INT64_C( 6907355642667084458), INT64_C( 4064806163663157465) }, { INT64_C( 837628511598991074), -INT64_C( 8510132917449114338) }, { INT64_C( 7784599032857325508), -INT64_C( 625932087889363604) }, { INT64_C( 7784599032857325508), INT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i a = simde_mm_loadu_epi64(test_vec[i].a); simde__m128i idx = simde_mm_loadu_epi64(test_vec[i].idx); simde__m128i b = simde_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_maskz_permutex2var_epi64(test_vec[i].k, a, idx, b); simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128i a = simde_test_x86_random_i64x2(); simde__m128i idx = simde_test_x86_random_i64x2(); simde__m128i b = simde_test_x86_random_i64x2(); simde__m128i r = simde_mm_maskz_permutex2var_epi64(k, a, idx, b); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_permutex2var_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t a[32]; const int8_t idx[32]; const int8_t b[32]; const int8_t r[32]; } test_vec[] = { { { INT8_C( 20), -INT8_C( 24), INT8_C( 118), -INT8_C( 59), -INT8_C( 62), INT8_C( 21), -INT8_C( 47), -INT8_C( 32), INT8_C( 83), -INT8_C( 25), INT8_C( 32), -INT8_C( 122), -INT8_C( 42), INT8_C( 6), INT8_C( 16), -INT8_C( 101) }, { INT8_C( 117), INT8_C( 95), INT8_C( 3), INT8_C( 8), -INT8_C( 46), INT8_C( 11), INT8_C( 117), INT8_C( 62), INT8_C( 8), INT8_C( 74), INT8_C( 86), INT8_C( 29), -INT8_C( 119), -INT8_C( 90), INT8_C( 20), -INT8_C( 99) }, { -INT8_C( 114), -INT8_C( 118), INT8_C( 99), INT8_C( 80), -INT8_C( 96), INT8_C( 52), INT8_C( 48), -INT8_C( 13), INT8_C( 27), INT8_C( 81), INT8_C( 121), -INT8_C( 15), INT8_C( 87), -INT8_C( 119), -INT8_C( 116), -INT8_C( 52) }, { INT8_C( 52), -INT8_C( 52), -INT8_C( 59), INT8_C( 83), INT8_C( 99), -INT8_C( 122), INT8_C( 52), -INT8_C( 116), INT8_C( 83), INT8_C( 32), INT8_C( 48), -INT8_C( 119), -INT8_C( 25), -INT8_C( 47), -INT8_C( 96), -INT8_C( 119) } }, { { -INT8_C( 24), -INT8_C( 113), -INT8_C( 44), -INT8_C( 69), -INT8_C( 102), INT8_C( 73), -INT8_C( 7), -INT8_C( 93), -INT8_C( 108), INT8_C( 79), -INT8_C( 64), INT8_C( 29), -INT8_C( 11), -INT8_C( 43), -INT8_C( 70), -INT8_C( 125) }, { INT8_C( 95), INT8_C( 29), -INT8_C( 45), -INT8_C( 1), INT8_C( 81), INT8_C( 4), -INT8_C( 14), INT8_C( 108), INT8_C( 85), INT8_C( 108), INT8_C( 94), -INT8_C( 84), -INT8_C( 11), -INT8_C( 22), INT8_C( 120), -INT8_C( 34) }, { INT8_C( 122), INT8_C( 76), -INT8_C( 103), INT8_C( 20), -INT8_C( 106), -INT8_C( 110), -INT8_C( 73), INT8_C( 42), -INT8_C( 30), INT8_C( 120), INT8_C( 71), -INT8_C( 41), INT8_C( 77), INT8_C( 1), INT8_C( 91), -INT8_C( 84) }, { -INT8_C( 84), INT8_C( 1), INT8_C( 20), -INT8_C( 84), INT8_C( 76), -INT8_C( 102), -INT8_C( 103), -INT8_C( 11), -INT8_C( 110), -INT8_C( 11), INT8_C( 91), -INT8_C( 11), -INT8_C( 110), -INT8_C( 64), -INT8_C( 30), INT8_C( 91) } }, { { INT8_C( 31), INT8_C( 46), -INT8_C( 84), INT8_C( 112), INT8_C( 50), -INT8_C( 98), -INT8_C( 35), -INT8_C( 121), INT8_C( 10), INT8_C( 59), INT8_C( 51), INT8_C( 0), INT8_C( 37), -INT8_C( 85), -INT8_C( 34), -INT8_C( 97) }, { -INT8_C( 8), INT8_C( 119), -INT8_C( 76), -INT8_C( 114), INT8_C( 9), INT8_C( 107), -INT8_C( 72), -INT8_C( 21), -INT8_C( 29), -INT8_C( 1), -INT8_C( 61), INT8_C( 48), INT8_C( 0), INT8_C( 30), -INT8_C( 35), INT8_C( 31) }, { INT8_C( 76), -INT8_C( 119), -INT8_C( 112), INT8_MAX, INT8_C( 39), INT8_C( 109), INT8_C( 6), INT8_C( 50), -INT8_C( 88), INT8_C( 58), INT8_C( 50), -INT8_C( 51), -INT8_C( 27), INT8_C( 16), INT8_C( 109), -INT8_C( 35) }, { -INT8_C( 88), INT8_C( 50), INT8_C( 39), -INT8_C( 34), INT8_C( 59), INT8_C( 0), -INT8_C( 88), INT8_C( 0), INT8_C( 112), -INT8_C( 35), INT8_C( 112), INT8_C( 76), INT8_C( 31), INT8_C( 109), INT8_C( 16), -INT8_C( 35) } }, { { -INT8_C( 121), INT8_C( 33), INT8_C( 107), -INT8_C( 112), -INT8_C( 116), INT8_C( 35), INT8_C( 124), INT8_C( 112), INT8_C( 34), INT8_C( 63), -INT8_C( 96), INT8_C( 35), INT8_C( 93), INT8_C( 125), INT8_C( 66), -INT8_C( 87) }, { INT8_C( 6), -INT8_C( 46), INT8_C( 40), INT8_C( 46), INT8_C( 63), INT8_C( 47), INT8_C( 96), -INT8_C( 25), INT8_C( 105), -INT8_C( 110), -INT8_C( 75), INT8_C( 78), -INT8_C( 94), INT8_C( 34), INT8_C( 44), INT8_C( 41) }, { INT8_C( 67), -INT8_C( 105), -INT8_C( 71), -INT8_C( 49), -INT8_C( 69), INT8_C( 53), INT8_C( 63), -INT8_C( 35), INT8_C( 116), -INT8_C( 32), INT8_C( 0), -INT8_C( 47), INT8_C( 93), INT8_C( 67), INT8_C( 123), INT8_C( 100) }, { INT8_C( 124), -INT8_C( 71), INT8_C( 34), INT8_C( 66), INT8_C( 100), -INT8_C( 87), -INT8_C( 121), INT8_C( 112), INT8_C( 63), -INT8_C( 71), INT8_C( 53), INT8_C( 66), INT8_C( 107), INT8_C( 107), INT8_C( 93), INT8_C( 63) } }, { { INT8_C( 21), -INT8_C( 93), -INT8_C( 110), INT8_C( 85), -INT8_C( 46), -INT8_C( 14), INT8_C( 60), INT8_C( 59), -INT8_C( 124), -INT8_C( 15), -INT8_C( 118), INT8_C( 38), INT8_C( 19), -INT8_C( 74), INT8_C( 79), INT8_C( 86) }, { INT8_C( 77), INT8_C( 8), INT8_C( 38), INT8_C( 8), INT8_C( 62), INT8_C( 101), -INT8_C( 26), -INT8_C( 78), INT8_C( 69), -INT8_C( 26), -INT8_C( 124), -INT8_C( 93), INT8_C( 41), -INT8_C( 1), INT8_C( 7), INT8_C( 63) }, { -INT8_C( 94), -INT8_C( 103), -INT8_C( 108), INT8_C( 117), -INT8_C( 117), -INT8_C( 48), -INT8_C( 80), INT8_C( 15), -INT8_C( 62), INT8_C( 58), INT8_C( 53), -INT8_C( 43), -INT8_C( 16), -INT8_C( 124), INT8_C( 44), INT8_C( 62) }, { -INT8_C( 74), -INT8_C( 124), INT8_C( 60), -INT8_C( 124), INT8_C( 44), -INT8_C( 14), INT8_C( 60), -INT8_C( 108), -INT8_C( 14), INT8_C( 60), -INT8_C( 46), INT8_C( 85), -INT8_C( 15), INT8_C( 62), INT8_C( 59), INT8_C( 62) } }, { { -INT8_C( 116), INT8_C( 82), INT8_C( 70), -INT8_C( 54), -INT8_C( 73), INT8_C( 44), INT8_C( 125), -INT8_C( 3), INT8_C( 19), INT8_C( 1), -INT8_C( 96), INT8_C( 60), INT8_C( 0), -INT8_C( 89), INT8_C( 123), -INT8_C( 94) }, { INT8_C( 64), INT8_C( 15), INT8_C( 23), -INT8_C( 53), -INT8_C( 32), -INT8_C( 56), -INT8_C( 38), -INT8_C( 94), INT8_C( 2), INT8_C( 15), INT8_C( 119), -INT8_C( 13), -INT8_C( 109), -INT8_C( 93), INT8_C( 49), INT8_C( 31) }, { -INT8_C( 11), INT8_C( 119), -INT8_C( 22), -INT8_C( 83), -INT8_C( 92), INT8_C( 103), -INT8_C( 86), -INT8_C( 73), INT8_C( 104), INT8_C( 74), -INT8_C( 13), INT8_C( 104), -INT8_C( 15), INT8_C( 111), INT8_C( 10), INT8_C( 49) }, { -INT8_C( 116), -INT8_C( 94), -INT8_C( 73), INT8_C( 60), -INT8_C( 116), INT8_C( 19), -INT8_C( 13), INT8_C( 70), INT8_C( 70), -INT8_C( 94), -INT8_C( 73), -INT8_C( 83), -INT8_C( 83), -INT8_C( 54), INT8_C( 119), INT8_C( 49) } }, { { INT8_C( 126), INT8_C( 34), -INT8_C( 4), INT8_C( 94), -INT8_C( 22), -INT8_C( 42), INT8_C( 0), -INT8_C( 20), -INT8_C( 27), INT8_C( 120), -INT8_C( 33), INT8_C( 120), INT8_C( 27), INT8_C( 16), -INT8_C( 105), INT8_C( 17) }, { -INT8_C( 120), -INT8_C( 127), -INT8_C( 66), INT8_C( 44), -INT8_C( 24), INT8_C( 104), -INT8_C( 29), INT8_C( 80), -INT8_C( 78), -INT8_C( 42), -INT8_C( 72), -INT8_C( 93), INT8_C( 69), -INT8_C( 61), -INT8_C( 44), -INT8_C( 60) }, { -INT8_C( 27), -INT8_C( 48), INT8_C( 34), -INT8_C( 49), -INT8_C( 90), INT8_C( 35), -INT8_C( 69), -INT8_C( 117), -INT8_C( 101), -INT8_C( 101), INT8_C( 3), -INT8_C( 74), -INT8_C( 85), -INT8_C( 102), -INT8_C( 57), INT8_C( 51) }, { -INT8_C( 27), INT8_C( 34), -INT8_C( 57), INT8_C( 27), -INT8_C( 27), -INT8_C( 27), INT8_C( 94), -INT8_C( 27), INT8_C( 34), -INT8_C( 69), -INT8_C( 101), INT8_C( 94), -INT8_C( 42), INT8_C( 94), -INT8_C( 90), -INT8_C( 22) } }, { { INT8_C( 28), -INT8_C( 123), INT8_C( 95), INT8_C( 4), -INT8_C( 19), INT8_C( 66), INT8_C( 85), -INT8_C( 97), INT8_C( 25), INT8_C( 13), INT8_C( 66), INT8_C( 94), -INT8_C( 48), INT8_C( 22), INT8_C( 34), -INT8_C( 75) }, { -INT8_C( 26), INT8_C( 69), -INT8_C( 124), -INT8_C( 116), INT8_C( 104), INT8_C( 64), INT8_C( 23), INT8_C( 3), -INT8_C( 37), INT8_C( 26), -INT8_C( 71), -INT8_C( 122), -INT8_C( 75), -INT8_C( 127), -INT8_C( 70), -INT8_C( 47) }, { INT8_C( 6), INT8_C( 25), -INT8_C( 43), -INT8_C( 12), INT8_C( 92), INT8_C( 42), -INT8_C( 109), INT8_C( 117), INT8_C( 56), -INT8_C( 42), -INT8_C( 45), INT8_C( 8), -INT8_C( 20), -INT8_C( 10), -INT8_C( 66), -INT8_C( 45) }, { INT8_C( 85), INT8_C( 66), -INT8_C( 19), -INT8_C( 48), INT8_C( 25), INT8_C( 28), INT8_C( 117), INT8_C( 4), INT8_C( 8), -INT8_C( 45), -INT8_C( 42), INT8_C( 85), INT8_C( 42), -INT8_C( 123), -INT8_C( 45), INT8_C( 25) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i idx = simde_mm_loadu_epi8(test_vec[i].idx); simde__m128i b = simde_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_permutex2var_epi8(a, idx, b); simde_test_x86_assert_equal_i8x16(r, simde_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i8x16(); simde__m128i idx = simde_test_x86_random_i8x16(); simde__m128i b = simde_test_x86_random_i8x16(); simde__m128i r = simde_mm_permutex2var_epi8(a, idx, b); simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x16(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_mask_permutex2var_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t a[32]; const simde__mmask16 k; const int8_t idx[32]; const int8_t b[32]; const int8_t r[32]; } test_vec[] = { { { INT8_C( 59), INT8_C( 66), INT8_C( 95), -INT8_C( 93), -INT8_C( 126), INT8_C( 119), -INT8_C( 90), INT8_C( 93), -INT8_C( 111), INT8_C( 95), -INT8_C( 28), INT8_C( 70), -INT8_C( 32), -INT8_C( 98), INT8_C( 23), -INT8_C( 25) }, UINT16_C(60855), { -INT8_C( 37), INT8_C( 19), INT8_C( 23), INT8_C( 110), -INT8_C( 120), INT8_C( 79), INT8_C( 68), INT8_C( 92), INT8_C( 88), INT8_C( 49), INT8_C( 82), INT8_C( 22), INT8_C( 4), -INT8_C( 115), INT8_C( 88), INT8_C( 99) }, { INT8_C( 48), -INT8_C( 37), -INT8_C( 38), -INT8_C( 42), INT8_C( 56), INT8_C( 108), INT8_C( 53), INT8_C( 28), -INT8_C( 78), INT8_C( 22), -INT8_C( 70), -INT8_C( 54), -INT8_C( 3), INT8_C( 114), -INT8_C( 73), -INT8_C( 40) }, { -INT8_C( 54), -INT8_C( 42), INT8_C( 28), -INT8_C( 93), -INT8_C( 111), -INT8_C( 25), -INT8_C( 90), -INT8_C( 3), -INT8_C( 78), INT8_C( 95), -INT8_C( 38), INT8_C( 53), -INT8_C( 32), -INT8_C( 98), -INT8_C( 78), -INT8_C( 93) } }, { { -INT8_C( 123), -INT8_C( 50), INT8_C( 70), INT8_C( 14), INT8_C( 30), -INT8_C( 117), INT8_C( 106), INT8_C( 118), -INT8_C( 68), -INT8_C( 68), -INT8_C( 116), -INT8_C( 64), INT8_C( 73), -INT8_C( 28), INT8_C( 35), INT8_C( 121) }, UINT16_C(65215), { INT8_C( 79), -INT8_C( 8), INT8_C( 106), -INT8_C( 124), INT8_C( 20), INT8_C( 28), -INT8_C( 102), -INT8_C( 49), -INT8_C( 26), -INT8_C( 105), INT8_C( 65), -INT8_C( 99), INT8_C( 111), -INT8_C( 58), INT8_C( 108), -INT8_C( 74) }, { -INT8_C( 44), -INT8_C( 118), INT8_C( 65), INT8_C( 62), INT8_C( 0), -INT8_C( 3), -INT8_C( 6), -INT8_C( 116), -INT8_C( 67), INT8_C( 67), INT8_C( 112), -INT8_C( 32), -INT8_C( 68), INT8_C( 48), -INT8_C( 34), INT8_C( 11) }, { INT8_C( 121), -INT8_C( 67), -INT8_C( 116), INT8_C( 30), INT8_C( 0), -INT8_C( 68), INT8_C( 106), INT8_C( 121), -INT8_C( 68), -INT8_C( 116), -INT8_C( 50), INT8_C( 48), INT8_C( 121), INT8_C( 106), INT8_C( 73), -INT8_C( 6) } }, { { INT8_C( 40), INT8_C( 72), -INT8_C( 112), INT8_C( 60), INT8_C( 101), INT8_C( 42), INT8_C( 11), INT8_C( 75), -INT8_C( 62), INT8_C( 76), -INT8_C( 23), INT8_C( 49), INT8_C( 19), INT8_C( 85), -INT8_C( 25), -INT8_C( 25) }, UINT16_C(10463), { INT8_C( 38), -INT8_C( 33), INT8_C( 37), INT8_C( 32), INT8_C( 107), -INT8_C( 30), INT8_C( 100), -INT8_C( 37), -INT8_C( 61), INT8_C( 32), INT8_C( 11), -INT8_C( 95), INT8_C( 44), INT8_C( 51), -INT8_C( 22), -INT8_C( 68) }, { INT8_C( 112), INT8_C( 79), -INT8_C( 26), INT8_C( 123), -INT8_C( 102), -INT8_C( 88), -INT8_C( 56), -INT8_C( 125), -INT8_C( 38), -INT8_C( 37), -INT8_C( 40), -INT8_C( 63), -INT8_C( 62), -INT8_C( 73), -INT8_C( 22), -INT8_C( 24) }, { INT8_C( 11), -INT8_C( 24), INT8_C( 42), INT8_C( 40), INT8_C( 49), INT8_C( 42), INT8_C( 101), -INT8_C( 63), -INT8_C( 62), INT8_C( 76), -INT8_C( 23), INT8_C( 72), INT8_C( 19), INT8_C( 123), -INT8_C( 25), -INT8_C( 25) } }, { { -INT8_C( 106), INT8_C( 15), INT8_C( 9), INT8_C( 1), -INT8_C( 14), INT8_C( 109), -INT8_C( 35), -INT8_C( 75), -INT8_C( 115), -INT8_C( 24), INT8_C( 86), -INT8_C( 71), INT8_C( 28), INT8_C( 64), INT8_C( 117), -INT8_C( 116) }, UINT16_C(23695), { INT8_C( 7), INT8_C( 42), INT8_C( 4), -INT8_C( 49), -INT8_C( 83), -INT8_C( 34), -INT8_C( 86), -INT8_C( 122), -INT8_C( 96), INT8_C( 109), INT8_C( 61), -INT8_C( 118), INT8_C( 85), -INT8_C( 44), -INT8_C( 103), INT8_C( 94) }, { -INT8_C( 43), -INT8_C( 117), -INT8_C( 53), -INT8_C( 78), INT8_C( 64), INT8_C( 89), -INT8_C( 101), -INT8_C( 105), INT8_C( 18), -INT8_C( 73), -INT8_C( 41), -INT8_C( 120), INT8_C( 67), INT8_C( 103), -INT8_C( 28), INT8_C( 74) }, { -INT8_C( 75), INT8_C( 86), -INT8_C( 14), -INT8_C( 116), -INT8_C( 14), INT8_C( 109), -INT8_C( 35), -INT8_C( 35), -INT8_C( 115), -INT8_C( 24), INT8_C( 103), INT8_C( 86), INT8_C( 89), INT8_C( 64), -INT8_C( 73), -INT8_C( 116) } }, { { -INT8_C( 111), -INT8_C( 24), INT8_C( 26), INT8_C( 62), -INT8_C( 57), -INT8_C( 60), -INT8_C( 60), INT8_C( 103), INT8_C( 49), INT8_C( 2), -INT8_C( 15), -INT8_C( 121), -INT8_C( 42), -INT8_C( 118), -INT8_C( 27), -INT8_C( 85) }, UINT16_C(45334), { INT8_C( 94), INT8_C( 86), INT8_C( 10), -INT8_C( 7), -INT8_C( 19), INT8_C( 28), -INT8_C( 80), -INT8_C( 59), -INT8_C( 92), -INT8_C( 13), INT8_C( 44), -INT8_C( 120), INT8_C( 61), -INT8_C( 67), INT8_C( 113), INT8_C( 87) }, { -INT8_C( 5), INT8_C( 56), INT8_C( 28), -INT8_C( 64), -INT8_C( 97), INT8_C( 77), -INT8_C( 62), -INT8_C( 112), -INT8_C( 44), -INT8_C( 104), INT8_C( 26), -INT8_C( 70), INT8_C( 67), INT8_C( 48), INT8_C( 107), -INT8_C( 95) }, { -INT8_C( 111), -INT8_C( 62), -INT8_C( 15), INT8_C( 62), -INT8_C( 118), -INT8_C( 60), -INT8_C( 60), INT8_C( 103), -INT8_C( 57), INT8_C( 2), -INT8_C( 15), -INT8_C( 121), INT8_C( 48), INT8_C( 48), -INT8_C( 27), -INT8_C( 112) } }, { { -INT8_C( 121), INT8_C( 117), -INT8_C( 102), INT8_C( 116), -INT8_C( 111), INT8_C( 74), INT8_C( 57), INT8_C( 54), INT8_C( 61), INT8_C( 101), -INT8_C( 66), INT8_C( 123), INT8_C( 34), INT8_C( 47), -INT8_C( 46), INT8_C( 30) }, UINT16_C(61031), { -INT8_C( 34), INT8_C( 6), INT8_C( 60), -INT8_C( 96), -INT8_C( 106), INT8_C( 16), INT8_C( 56), -INT8_C( 79), -INT8_C( 54), INT8_C( 123), -INT8_C( 31), INT8_C( 53), INT8_C( 29), INT8_C( 104), -INT8_C( 86), -INT8_C( 73) }, { -INT8_C( 35), INT8_C( 60), INT8_C( 2), INT8_C( 22), INT8_C( 114), INT8_C( 63), INT8_C( 124), INT8_C( 48), -INT8_C( 70), -INT8_C( 98), INT8_C( 96), -INT8_C( 115), -INT8_C( 68), -INT8_C( 57), INT8_C( 123), -INT8_C( 102) }, { INT8_C( 123), INT8_C( 57), -INT8_C( 68), INT8_C( 116), -INT8_C( 111), -INT8_C( 35), -INT8_C( 70), INT8_C( 54), INT8_C( 61), -INT8_C( 115), INT8_C( 117), INT8_C( 63), INT8_C( 34), INT8_C( 61), -INT8_C( 66), INT8_C( 48) } }, { { -INT8_C( 50), -INT8_C( 73), INT8_C( 58), INT8_C( 100), -INT8_C( 56), INT8_C( 114), INT8_C( 21), -INT8_C( 110), -INT8_C( 18), -INT8_C( 9), -INT8_C( 56), INT8_C( 11), INT8_C( 95), INT8_C( 114), -INT8_C( 62), INT8_C( 60) }, UINT16_C(50350), { INT8_C( 83), INT8_C( 32), INT8_C( 4), -INT8_C( 49), INT8_C( 81), -INT8_C( 66), INT8_C( 109), -INT8_C( 79), INT8_C( 75), INT8_C( 42), INT8_C( 120), -INT8_C( 57), -INT8_C( 60), INT8_C( 70), INT8_C( 126), -INT8_C( 1) }, { -INT8_C( 85), INT8_C( 70), INT8_C( 113), -INT8_C( 64), -INT8_C( 39), INT8_C( 95), -INT8_C( 73), -INT8_C( 95), INT8_C( 106), INT8_C( 23), INT8_C( 19), INT8_C( 45), INT8_C( 83), -INT8_C( 62), -INT8_C( 15), -INT8_C( 90) }, { -INT8_C( 50), -INT8_C( 50), -INT8_C( 56), INT8_C( 60), -INT8_C( 56), -INT8_C( 15), INT8_C( 21), INT8_C( 70), -INT8_C( 18), -INT8_C( 9), INT8_C( 106), INT8_C( 11), INT8_C( 95), INT8_C( 114), -INT8_C( 15), -INT8_C( 90) } }, { { -INT8_C( 30), -INT8_C( 11), INT8_C( 117), INT8_C( 51), -INT8_C( 76), -INT8_C( 29), -INT8_C( 28), -INT8_C( 1), INT8_C( 13), INT8_C( 93), -INT8_C( 58), -INT8_C( 47), -INT8_C( 93), INT8_C( 69), -INT8_C( 48), INT8_C( 78) }, UINT16_C(17035), { INT8_C( 15), INT8_C( 100), -INT8_C( 95), -INT8_C( 58), INT8_C( 5), INT8_C( 12), -INT8_C( 35), INT8_C( 25), INT8_C( 57), INT8_C( 49), -INT8_C( 37), INT8_C( 42), -INT8_C( 41), -INT8_C( 67), INT8_C( 32), INT8_C( 77) }, { -INT8_C( 15), -INT8_C( 44), INT8_C( 48), -INT8_C( 43), -INT8_C( 45), INT8_C( 61), INT8_C( 50), -INT8_C( 102), INT8_C( 14), -INT8_C( 42), -INT8_C( 33), -INT8_C( 33), INT8_C( 36), INT8_C( 106), INT8_C( 33), INT8_C( 51) }, { INT8_C( 78), -INT8_C( 76), INT8_C( 117), -INT8_C( 28), -INT8_C( 76), -INT8_C( 29), -INT8_C( 28), -INT8_C( 42), INT8_C( 13), -INT8_C( 44), -INT8_C( 58), -INT8_C( 47), -INT8_C( 93), INT8_C( 69), -INT8_C( 30), INT8_C( 78) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i idx = simde_mm_loadu_epi8(test_vec[i].idx); simde__m128i b = simde_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_mask_permutex2var_epi8(a, test_vec[i].k, idx, b); simde_test_x86_assert_equal_i8x16(r, simde_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i8x16(); simde__mmask16 k = simde_test_x86_random_mmask16(); simde__m128i idx = simde_test_x86_random_i8x16(); simde__m128i b = simde_test_x86_random_i8x16(); simde__m128i r = simde_mm_mask_permutex2var_epi8(a, k, idx, b); simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask16(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_mask2_permutex2var_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t a[32]; const int8_t idx[32]; const simde__mmask16 k; const int8_t b[32]; const int8_t r[32]; } test_vec[] = { { { -INT8_C( 49), -INT8_C( 62), -INT8_C( 6), -INT8_C( 44), -INT8_C( 50), -INT8_C( 41), -INT8_C( 19), INT8_C( 7), INT8_C( 8), -INT8_C( 56), INT8_C( 50), -INT8_C( 32), -INT8_C( 122), INT8_C( 82), INT8_C( 45), INT8_C( 119) }, { INT8_C( 38), INT8_C( 93), INT8_C( 76), -INT8_C( 7), -INT8_C( 102), INT8_MAX, -INT8_C( 109), -INT8_C( 88), INT8_C( 85), INT8_C( 114), -INT8_C( 121), INT8_C( 121), -INT8_C( 35), -INT8_C( 88), -INT8_C( 83), -INT8_C( 84) }, UINT16_C(42859), { INT8_MIN, INT8_C( 57), INT8_C( 126), INT8_C( 110), INT8_C( 65), -INT8_C( 121), INT8_C( 54), INT8_C( 115), INT8_C( 103), -INT8_C( 68), -INT8_C( 59), -INT8_C( 108), INT8_C( 51), -INT8_C( 21), -INT8_C( 15), INT8_MIN }, { -INT8_C( 19), -INT8_C( 21), INT8_C( 76), -INT8_C( 68), -INT8_C( 102), INT8_MIN, INT8_C( 110), -INT8_C( 88), -INT8_C( 121), INT8_C( 126), INT8_C( 7), INT8_C( 121), -INT8_C( 35), INT8_C( 8), -INT8_C( 83), -INT8_C( 122) } }, { { -INT8_C( 28), -INT8_C( 117), -INT8_C( 1), INT8_C( 120), INT8_C( 51), INT8_C( 84), -INT8_C( 22), -INT8_C( 69), -INT8_C( 51), -INT8_C( 57), INT8_C( 99), INT8_C( 122), INT8_C( 115), -INT8_C( 50), INT8_C( 33), -INT8_C( 12) }, { INT8_C( 8), -INT8_C( 96), INT8_C( 98), INT8_C( 73), INT8_C( 39), -INT8_C( 104), -INT8_C( 68), -INT8_C( 114), INT8_C( 85), -INT8_C( 127), INT8_C( 34), -INT8_C( 120), INT8_C( 108), INT8_C( 19), INT8_C( 8), INT8_C( 80) }, UINT16_C( 1950), { -INT8_C( 56), -INT8_C( 47), INT8_C( 91), -INT8_C( 77), -INT8_C( 116), INT8_C( 41), INT8_C( 122), -INT8_C( 16), -INT8_C( 93), -INT8_C( 18), -INT8_C( 66), -INT8_C( 59), -INT8_C( 30), -INT8_C( 58), INT8_C( 101), INT8_C( 68) }, { INT8_C( 8), -INT8_C( 28), -INT8_C( 1), -INT8_C( 57), -INT8_C( 69), -INT8_C( 104), -INT8_C( 68), INT8_C( 33), INT8_C( 41), -INT8_C( 117), -INT8_C( 1), -INT8_C( 120), INT8_C( 108), INT8_C( 19), INT8_C( 8), INT8_C( 80) } }, { { INT8_C( 15), -INT8_C( 116), -INT8_C( 36), -INT8_C( 53), INT8_C( 26), INT8_C( 49), INT8_C( 76), INT8_C( 60), -INT8_C( 70), -INT8_C( 72), INT8_C( 79), -INT8_C( 62), INT8_C( 9), -INT8_C( 19), -INT8_C( 54), -INT8_C( 47) }, { -INT8_C( 66), INT8_C( 37), -INT8_C( 124), INT8_C( 75), INT8_C( 78), -INT8_C( 1), INT8_C( 59), -INT8_C( 14), -INT8_C( 19), -INT8_C( 7), -INT8_C( 73), -INT8_C( 49), -INT8_C( 64), INT8_C( 28), INT8_C( 19), -INT8_C( 49) }, UINT16_C(61352), { -INT8_C( 101), -INT8_C( 62), INT8_C( 33), -INT8_C( 25), -INT8_C( 2), -INT8_C( 37), -INT8_C( 96), INT8_C( 77), -INT8_C( 99), -INT8_C( 87), INT8_C( 58), INT8_C( 103), INT8_C( 122), -INT8_C( 8), -INT8_C( 115), -INT8_C( 1) }, { -INT8_C( 66), INT8_C( 37), -INT8_C( 124), -INT8_C( 62), INT8_C( 78), -INT8_C( 1), INT8_C( 59), INT8_C( 33), -INT8_C( 19), -INT8_C( 87), INT8_C( 77), -INT8_C( 47), -INT8_C( 64), INT8_C( 122), -INT8_C( 25), -INT8_C( 47) } }, { { INT8_C( 67), -INT8_C( 37), -INT8_C( 2), INT8_C( 126), -INT8_C( 51), -INT8_C( 21), INT8_C( 120), -INT8_C( 124), -INT8_C( 70), INT8_C( 56), -INT8_C( 96), -INT8_C( 51), INT8_C( 7), INT8_C( 72), -INT8_C( 68), -INT8_C( 94) }, { INT8_C( 10), -INT8_C( 35), -INT8_C( 118), INT8_C( 8), -INT8_C( 72), INT8_C( 42), INT8_C( 85), INT8_C( 86), -INT8_C( 45), -INT8_C( 113), -INT8_C( 67), INT8_C( 77), -INT8_C( 120), INT8_C( 74), INT8_C( 76), -INT8_C( 53) }, UINT16_C(18982), { INT8_C( 74), -INT8_C( 13), INT8_C( 53), -INT8_C( 62), INT8_C( 120), -INT8_C( 17), -INT8_C( 6), INT8_C( 24), -INT8_C( 68), INT8_C( 1), INT8_C( 97), INT8_C( 121), -INT8_C( 92), INT8_C( 107), INT8_C( 86), INT8_C( 46) }, { INT8_C( 10), INT8_C( 107), -INT8_C( 96), INT8_C( 8), -INT8_C( 72), -INT8_C( 96), INT8_C( 85), INT8_C( 86), -INT8_C( 45), -INT8_C( 94), -INT8_C( 67), INT8_C( 72), -INT8_C( 120), INT8_C( 74), INT8_C( 7), -INT8_C( 53) } }, { { INT8_C( 116), INT8_C( 15), INT8_C( 88), -INT8_C( 55), INT8_C( 101), INT8_C( 43), INT8_C( 89), INT8_C( 34), INT8_C( 120), -INT8_C( 31), INT8_C( 109), -INT8_C( 59), -INT8_C( 84), -INT8_C( 109), INT8_C( 15), -INT8_C( 10) }, { -INT8_C( 122), INT8_C( 69), -INT8_C( 72), -INT8_C( 2), INT8_C( 52), -INT8_C( 78), INT8_C( 23), -INT8_C( 15), -INT8_C( 76), INT8_C( 120), INT8_C( 106), INT8_C( 88), -INT8_C( 29), -INT8_C( 64), -INT8_C( 122), INT8_C( 87) }, UINT16_C(57039), { INT8_C( 33), INT8_C( 52), INT8_C( 9), INT8_C( 122), INT8_C( 87), -INT8_C( 127), INT8_C( 91), -INT8_C( 60), INT8_C( 70), INT8_C( 7), INT8_C( 87), INT8_C( 86), -INT8_C( 2), -INT8_C( 35), -INT8_C( 101), -INT8_C( 74) }, { INT8_C( 89), INT8_C( 43), INT8_C( 70), -INT8_C( 101), INT8_C( 52), -INT8_C( 78), -INT8_C( 60), INT8_C( 52), -INT8_C( 76), INT8_C( 70), INT8_C( 109), INT8_C( 70), -INT8_C( 55), -INT8_C( 64), INT8_C( 89), -INT8_C( 60) } }, { { -INT8_C( 36), -INT8_C( 49), INT8_C( 105), -INT8_C( 13), -INT8_C( 64), INT8_C( 29), INT8_C( 107), INT8_C( 42), INT8_C( 117), INT8_C( 78), -INT8_C( 21), -INT8_C( 5), -INT8_C( 90), -INT8_C( 70), -INT8_C( 39), -INT8_C( 57) }, { -INT8_C( 17), -INT8_C( 30), INT8_C( 65), INT8_C( 70), INT8_C( 99), -INT8_C( 100), INT8_C( 10), -INT8_C( 86), -INT8_C( 93), INT8_C( 97), INT8_C( 0), -INT8_C( 95), INT8_C( 62), -INT8_C( 101), INT8_C( 88), INT8_C( 26) }, UINT16_C(49514), { INT8_C( 13), INT8_C( 43), -INT8_C( 34), INT8_C( 120), INT8_C( 85), INT8_C( 83), -INT8_C( 57), INT8_C( 64), INT8_C( 78), INT8_C( 109), -INT8_C( 5), INT8_C( 39), INT8_C( 52), -INT8_C( 22), INT8_C( 9), INT8_C( 117) }, { -INT8_C( 17), INT8_C( 105), INT8_C( 65), INT8_C( 107), INT8_C( 99), INT8_C( 52), -INT8_C( 21), -INT8_C( 86), -INT8_C( 13), INT8_C( 97), INT8_C( 0), -INT8_C( 95), INT8_C( 62), -INT8_C( 101), INT8_C( 78), -INT8_C( 5) } }, { { INT8_C( 48), INT8_C( 108), INT8_C( 17), INT8_C( 58), INT8_C( 22), -INT8_C( 76), -INT8_C( 101), INT8_C( 22), INT8_C( 86), -INT8_C( 39), -INT8_C( 79), -INT8_C( 82), -INT8_C( 12), INT8_C( 28), INT8_C( 111), INT8_C( 1) }, { INT8_C( 71), INT8_C( 77), INT8_C( 122), -INT8_C( 100), -INT8_C( 96), INT8_C( 65), -INT8_C( 35), -INT8_C( 18), -INT8_C( 82), -INT8_C( 40), INT8_C( 21), -INT8_C( 30), -INT8_C( 62), INT8_C( 30), INT8_C( 87), -INT8_C( 14) }, UINT16_C(26762), { INT8_C( 44), -INT8_C( 95), INT8_C( 28), -INT8_C( 57), -INT8_C( 73), INT8_C( 114), -INT8_C( 96), INT8_C( 105), INT8_C( 32), -INT8_C( 108), -INT8_C( 123), -INT8_C( 113), -INT8_C( 106), -INT8_C( 52), -INT8_C( 36), INT8_C( 16) }, { INT8_C( 71), INT8_C( 28), INT8_C( 122), -INT8_C( 106), -INT8_C( 96), INT8_C( 65), -INT8_C( 35), INT8_C( 111), -INT8_C( 82), -INT8_C( 40), INT8_C( 21), INT8_C( 17), -INT8_C( 62), -INT8_C( 36), INT8_C( 105), -INT8_C( 14) } }, { { INT8_C( 104), INT8_C( 124), INT8_C( 81), INT8_C( 69), INT8_C( 106), -INT8_C( 1), INT8_C( 29), INT8_MAX, -INT8_C( 31), -INT8_C( 33), -INT8_C( 99), INT8_C( 56), -INT8_C( 47), INT8_C( 40), -INT8_C( 96), -INT8_C( 3) }, { -INT8_C( 55), -INT8_C( 68), -INT8_C( 60), INT8_MIN, INT8_C( 47), INT8_C( 101), -INT8_C( 23), INT8_C( 79), -INT8_C( 7), INT8_C( 110), -INT8_C( 33), -INT8_C( 113), INT8_C( 58), -INT8_C( 69), -INT8_C( 97), -INT8_C( 93) }, UINT16_C(61496), { -INT8_C( 24), -INT8_C( 94), -INT8_C( 17), INT8_C( 6), INT8_C( 34), -INT8_C( 48), -INT8_C( 27), -INT8_C( 65), INT8_C( 8), -INT8_C( 73), -INT8_C( 25), -INT8_C( 88), -INT8_C( 76), -INT8_C( 80), INT8_C( 101), INT8_C( 121) }, { -INT8_C( 55), -INT8_C( 68), -INT8_C( 60), INT8_C( 104), -INT8_C( 3), -INT8_C( 1), -INT8_C( 23), INT8_C( 79), -INT8_C( 7), INT8_C( 110), -INT8_C( 33), -INT8_C( 113), -INT8_C( 25), -INT8_C( 88), INT8_C( 121), INT8_C( 69) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i idx = simde_mm_loadu_epi8(test_vec[i].idx); simde__m128i b = simde_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_mask2_permutex2var_epi8(a, idx, test_vec[i].k, b); simde_test_x86_assert_equal_i8x16(r, simde_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i8x16(); simde__m128i idx = simde_test_x86_random_i8x16(); simde__mmask16 k = simde_test_x86_random_mmask16(); simde__m128i b = simde_test_x86_random_i8x16(); simde__m128i r = simde_mm_mask2_permutex2var_epi8(a, idx, k, b); simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x16(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_mmask16(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_maskz_permutex2var_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask16 k; const int8_t a[32]; const int8_t idx[32]; const int8_t b[32]; const int8_t r[32]; } test_vec[] = { { UINT16_C(37937), { -INT8_C( 34), INT8_C( 26), -INT8_C( 29), -INT8_C( 41), -INT8_C( 119), -INT8_C( 62), INT8_C( 103), -INT8_C( 61), INT8_C( 126), INT8_C( 6), INT8_C( 102), -INT8_C( 74), -INT8_C( 9), INT8_C( 79), INT8_C( 88), -INT8_C( 26) }, { INT8_C( 85), INT8_C( 122), -INT8_C( 73), INT8_C( 58), INT8_C( 58), -INT8_C( 65), -INT8_C( 15), INT8_C( 33), INT8_C( 104), -INT8_C( 90), -INT8_C( 46), -INT8_C( 51), INT8_C( 31), INT8_C( 3), INT8_C( 97), -INT8_C( 3) }, { INT8_C( 29), INT8_C( 68), -INT8_C( 44), -INT8_C( 90), INT8_C( 7), INT8_C( 59), INT8_C( 106), -INT8_C( 123), INT8_C( 66), -INT8_C( 48), INT8_C( 59), INT8_C( 57), INT8_C( 31), -INT8_C( 109), INT8_C( 31), INT8_C( 116) }, { INT8_C( 59), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 59), INT8_C( 116), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 44), INT8_C( 0), INT8_C( 116), INT8_C( 0), INT8_C( 0), -INT8_C( 109) } }, { UINT16_C(54798), { -INT8_C( 81), INT8_C( 72), -INT8_C( 106), -INT8_C( 96), INT8_C( 105), -INT8_C( 2), INT8_C( 70), INT8_C( 59), -INT8_C( 53), INT8_C( 101), INT8_C( 62), INT8_C( 44), INT8_C( 98), INT8_C( 92), INT8_C( 112), INT8_C( 55) }, { INT8_C( 2), INT8_C( 119), INT8_C( 114), INT8_C( 108), -INT8_C( 4), -INT8_C( 76), INT8_C( 61), INT8_C( 55), -INT8_C( 19), INT8_C( 92), -INT8_C( 53), INT8_C( 13), -INT8_C( 47), -INT8_C( 39), -INT8_C( 29), INT8_MIN }, { INT8_C( 33), INT8_C( 121), INT8_C( 32), -INT8_C( 118), INT8_C( 119), INT8_C( 103), -INT8_C( 58), INT8_C( 66), -INT8_C( 52), INT8_C( 4), INT8_C( 110), INT8_C( 47), INT8_C( 96), -INT8_C( 33), INT8_C( 102), INT8_C( 99) }, { INT8_C( 0), INT8_C( 66), INT8_C( 32), INT8_C( 98), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 96), INT8_C( 44), INT8_C( 0), INT8_C( 121), INT8_C( 0), -INT8_C( 96), -INT8_C( 81) } }, { UINT16_C(55382), { -INT8_C( 49), INT8_C( 83), -INT8_C( 115), INT8_C( 12), -INT8_C( 118), INT8_C( 122), INT8_C( 105), INT8_C( 85), -INT8_C( 121), INT8_C( 58), INT8_C( 46), INT8_C( 107), -INT8_C( 70), INT8_C( 79), -INT8_C( 28), -INT8_C( 38) }, { -INT8_C( 38), INT8_C( 92), INT8_C( 65), -INT8_C( 96), -INT8_C( 98), INT8_C( 14), -INT8_C( 92), INT8_C( 13), INT8_C( 61), INT8_C( 5), -INT8_C( 20), -INT8_C( 93), INT8_C( 104), INT8_C( 66), INT8_C( 123), INT8_C( 55) }, { -INT8_C( 107), INT8_C( 8), INT8_C( 68), INT8_C( 32), -INT8_C( 125), -INT8_C( 83), INT8_C( 117), INT8_C( 10), -INT8_C( 25), -INT8_C( 92), INT8_C( 117), -INT8_C( 95), -INT8_C( 13), INT8_C( 90), INT8_C( 123), -INT8_C( 51) }, { INT8_C( 0), -INT8_C( 13), INT8_C( 83), INT8_C( 0), INT8_C( 123), INT8_C( 0), -INT8_C( 118), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 12), -INT8_C( 121), INT8_C( 0), -INT8_C( 95), INT8_C( 10) } }, { UINT16_C(48566), { INT8_C( 109), INT8_C( 84), -INT8_C( 53), INT8_C( 18), INT8_C( 97), INT8_C( 8), INT8_C( 23), INT8_C( 77), -INT8_C( 85), INT8_MAX, -INT8_C( 112), INT8_C( 38), -INT8_C( 74), INT8_C( 37), INT8_C( 47), -INT8_C( 6) }, { INT8_C( 69), -INT8_C( 78), -INT8_C( 89), -INT8_C( 69), -INT8_C( 68), -INT8_C( 114), INT8_C( 95), INT8_C( 50), INT8_C( 47), INT8_C( 82), -INT8_C( 116), -INT8_C( 85), INT8_C( 32), INT8_C( 66), INT8_C( 104), -INT8_C( 115) }, { -INT8_C( 106), INT8_C( 51), -INT8_C( 97), -INT8_C( 8), INT8_C( 59), -INT8_C( 74), INT8_C( 69), -INT8_C( 26), INT8_C( 53), -INT8_C( 43), INT8_C( 12), -INT8_C( 20), -INT8_C( 5), INT8_C( 59), -INT8_C( 26), INT8_C( 64) }, { INT8_C( 0), -INT8_C( 97), INT8_C( 77), INT8_C( 0), -INT8_C( 5), INT8_C( 47), INT8_C( 0), -INT8_C( 97), -INT8_C( 6), INT8_C( 0), -INT8_C( 74), INT8_C( 38), INT8_C( 109), -INT8_C( 53), INT8_C( 0), INT8_C( 37) } }, { UINT16_C(36589), { -INT8_C( 5), -INT8_C( 86), INT8_C( 28), INT8_C( 90), -INT8_C( 36), INT8_C( 76), -INT8_C( 83), INT8_C( 104), -INT8_C( 9), -INT8_C( 51), -INT8_C( 86), INT8_C( 95), INT8_C( 90), INT8_C( 64), -INT8_C( 110), -INT8_C( 6) }, { INT8_C( 56), -INT8_C( 51), -INT8_C( 80), INT8_C( 126), -INT8_C( 77), -INT8_C( 26), INT8_C( 83), -INT8_C( 65), -INT8_C( 46), INT8_C( 78), -INT8_C( 5), -INT8_C( 72), -INT8_C( 113), -INT8_C( 24), INT8_C( 70), -INT8_C( 118) }, { -INT8_C( 110), INT8_C( 99), -INT8_C( 27), INT8_C( 110), -INT8_C( 81), -INT8_C( 110), -INT8_C( 42), -INT8_C( 90), INT8_C( 95), INT8_MIN, INT8_C( 5), -INT8_C( 71), -INT8_C( 63), -INT8_C( 105), -INT8_C( 77), -INT8_C( 7) }, { INT8_C( 95), INT8_C( 0), -INT8_C( 110), -INT8_C( 77), INT8_C( 0), -INT8_C( 83), INT8_C( 110), -INT8_C( 7), INT8_C( 0), -INT8_C( 110), -INT8_C( 71), INT8_C( 95), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 86) } }, { UINT16_C(25700), { INT8_C( 119), INT8_C( 23), INT8_C( 74), -INT8_C( 53), -INT8_C( 42), INT8_C( 28), INT8_C( 25), -INT8_C( 47), -INT8_C( 44), -INT8_C( 88), -INT8_C( 70), INT8_C( 27), INT8_C( 51), INT8_C( 76), INT8_C( 126), INT8_C( 24) }, { -INT8_C( 69), INT8_C( 45), -INT8_C( 86), -INT8_C( 111), -INT8_C( 45), INT8_C( 9), INT8_C( 18), -INT8_C( 40), -INT8_C( 62), -INT8_C( 45), INT8_C( 111), INT8_C( 118), -INT8_C( 52), -INT8_C( 45), -INT8_C( 38), INT8_C( 68) }, { -INT8_C( 22), INT8_C( 36), INT8_C( 15), -INT8_C( 64), INT8_C( 64), INT8_C( 40), -INT8_C( 110), INT8_C( 20), -INT8_C( 47), INT8_C( 76), INT8_C( 47), INT8_C( 4), -INT8_C( 104), -INT8_C( 83), INT8_C( 28), INT8_C( 83) }, { INT8_C( 0), INT8_C( 0), -INT8_C( 70), INT8_C( 0), INT8_C( 0), -INT8_C( 88), INT8_C( 15), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 24), INT8_C( 0), INT8_C( 0), -INT8_C( 64), INT8_C( 47), INT8_C( 0) } }, { UINT16_C(50906), { -INT8_C( 27), -INT8_C( 83), -INT8_C( 49), -INT8_C( 9), -INT8_C( 123), -INT8_C( 111), -INT8_C( 54), -INT8_C( 12), INT8_C( 7), -INT8_C( 106), -INT8_C( 57), -INT8_C( 31), -INT8_C( 38), -INT8_C( 79), INT8_C( 5), -INT8_C( 23) }, { INT8_C( 114), INT8_C( 69), INT8_C( 18), INT8_C( 4), INT8_C( 90), -INT8_C( 29), INT8_C( 80), -INT8_C( 119), -INT8_C( 25), -INT8_C( 24), INT8_C( 55), INT8_C( 3), INT8_C( 60), INT8_C( 17), -INT8_C( 55), INT8_C( 33) }, { -INT8_C( 65), -INT8_C( 104), INT8_C( 24), INT8_C( 68), INT8_C( 41), -INT8_C( 30), INT8_C( 57), INT8_C( 49), INT8_C( 120), INT8_C( 0), INT8_C( 18), INT8_C( 83), -INT8_C( 78), INT8_C( 24), INT8_C( 60), INT8_C( 36) }, { INT8_C( 0), -INT8_C( 111), INT8_C( 0), -INT8_C( 123), INT8_C( 18), INT8_C( 0), -INT8_C( 65), -INT8_C( 106), INT8_C( 0), INT8_C( 7), INT8_C( 49), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 106), -INT8_C( 83) } }, { UINT16_C(20061), { INT8_C( 40), -INT8_C( 73), INT8_C( 49), INT8_C( 120), INT8_C( 65), INT8_C( 24), INT8_C( 96), INT8_C( 120), INT8_C( 27), -INT8_C( 100), -INT8_C( 119), -INT8_C( 28), -INT8_C( 67), INT8_C( 72), INT8_C( 124), -INT8_C( 43) }, { -INT8_C( 115), -INT8_C( 90), -INT8_C( 73), -INT8_C( 58), -INT8_C( 41), INT8_C( 48), -INT8_C( 58), -INT8_C( 23), -INT8_C( 125), INT8_C( 120), INT8_C( 1), -INT8_C( 65), -INT8_C( 100), INT8_C( 95), INT8_C( 14), -INT8_C( 60) }, { INT8_C( 22), INT8_C( 63), INT8_C( 60), INT8_C( 87), INT8_C( 88), -INT8_C( 99), -INT8_C( 49), INT8_C( 115), INT8_C( 57), INT8_C( 89), INT8_C( 88), -INT8_C( 9), -INT8_C( 95), -INT8_C( 44), -INT8_C( 52), INT8_C( 46) }, { INT8_C( 72), INT8_C( 0), INT8_C( 115), INT8_C( 96), INT8_C( 115), INT8_C( 0), INT8_C( 96), INT8_C( 0), INT8_C( 0), INT8_C( 57), -INT8_C( 73), INT8_C( 46), INT8_C( 0), INT8_C( 0), INT8_C( 124), INT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i idx = simde_mm_loadu_epi8(test_vec[i].idx); simde__m128i b = simde_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_maskz_permutex2var_epi8(test_vec[i].k, a, idx, b); simde_test_x86_assert_equal_i8x16(r, simde_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask16 k = simde_test_x86_random_mmask16(); simde__m128i a = simde_test_x86_random_i8x16(); simde__m128i idx = simde_test_x86_random_i8x16(); simde__m128i b = simde_test_x86_random_i8x16(); simde__m128i r = simde_mm_maskz_permutex2var_epi8(k, a, idx, b); simde_test_x86_write_mmask16(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_permutex2var_pd(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde_float64 a[4]; const int64_t idx[4]; const simde_float64 b[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( 266.06), SIMDE_FLOAT64_C( 744.58) }, { INT64_C( 2866518622857575137), -INT64_C( 70274485513066767) }, { SIMDE_FLOAT64_C( -564.11), SIMDE_FLOAT64_C( 228.84) }, { SIMDE_FLOAT64_C( 744.58), SIMDE_FLOAT64_C( 744.58) } }, { { SIMDE_FLOAT64_C( -667.55), SIMDE_FLOAT64_C( -62.50) }, { INT64_C( 2359109570743171007), INT64_C( 1888561416568168530) }, { SIMDE_FLOAT64_C( 32.27), SIMDE_FLOAT64_C( -339.12) }, { SIMDE_FLOAT64_C( -339.12), SIMDE_FLOAT64_C( 32.27) } }, { { SIMDE_FLOAT64_C( 368.54), SIMDE_FLOAT64_C( 616.08) }, { INT64_C( 3682737025744534114), -INT64_C( 3543321188468379579) }, { SIMDE_FLOAT64_C( 916.80), SIMDE_FLOAT64_C( -773.58) }, { SIMDE_FLOAT64_C( 916.80), SIMDE_FLOAT64_C( 616.08) } }, { { SIMDE_FLOAT64_C( 267.08), SIMDE_FLOAT64_C( -489.27) }, { -INT64_C( 1214123070397285263), -INT64_C( 5616835484101678963) }, { SIMDE_FLOAT64_C( 601.50), SIMDE_FLOAT64_C( 563.87) }, { SIMDE_FLOAT64_C( -489.27), SIMDE_FLOAT64_C( -489.27) } }, { { SIMDE_FLOAT64_C( 53.28), SIMDE_FLOAT64_C( -121.26) }, { -INT64_C( 8120324682462519208), -INT64_C( 5054143542780805793) }, { SIMDE_FLOAT64_C( -86.54), SIMDE_FLOAT64_C( -771.73) }, { SIMDE_FLOAT64_C( 53.28), SIMDE_FLOAT64_C( -771.73) } }, { { SIMDE_FLOAT64_C( 696.37), SIMDE_FLOAT64_C( -449.91) }, { -INT64_C( 6341298119235143830), -INT64_C( 4742401433874209450) }, { SIMDE_FLOAT64_C( -228.37), SIMDE_FLOAT64_C( -420.09) }, { SIMDE_FLOAT64_C( -228.37), SIMDE_FLOAT64_C( -228.37) } }, { { SIMDE_FLOAT64_C( 68.99), SIMDE_FLOAT64_C( 66.57) }, { -INT64_C( 4778752305038490181), INT64_C( 4840825679982319224) }, { SIMDE_FLOAT64_C( -375.97), SIMDE_FLOAT64_C( -736.00) }, { SIMDE_FLOAT64_C( -736.00), SIMDE_FLOAT64_C( 68.99) } }, { { SIMDE_FLOAT64_C( -140.34), SIMDE_FLOAT64_C( -974.54) }, { -INT64_C( 8154202376932849390), INT64_C( 4729327344514770161) }, { SIMDE_FLOAT64_C( -658.78), SIMDE_FLOAT64_C( 358.05) }, { SIMDE_FLOAT64_C( -658.78), SIMDE_FLOAT64_C( -974.54) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128i idx = simde_mm_loadu_epi64(test_vec[i].idx); simde__m128d b = simde_mm_loadu_pd(test_vec[i].b); simde__m128d r = simde_mm_permutex2var_pd(a, idx, b); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128d a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m128i idx = simde_test_x86_random_i64x2(); simde__m128d b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m128d r = simde_mm_permutex2var_pd(a, idx, b); simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x2(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_mask_permutex2var_pd(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde_float64 a[4]; const simde__mmask8 k; const int64_t idx[4]; const simde_float64 b[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( -885.68), SIMDE_FLOAT64_C( 794.73) }, UINT8_C(165), { INT64_C( 2179239678391337709), -INT64_C( 8228278719115897823) }, { SIMDE_FLOAT64_C( -869.89), SIMDE_FLOAT64_C( -18.96) }, { SIMDE_FLOAT64_C( 794.73), SIMDE_FLOAT64_C( 794.73) } }, { { SIMDE_FLOAT64_C( -460.41), SIMDE_FLOAT64_C( -989.41) }, UINT8_C(181), { -INT64_C( 6045220835677475797), -INT64_C( 1539500220766109457) }, { SIMDE_FLOAT64_C( 457.21), SIMDE_FLOAT64_C( 356.40) }, { SIMDE_FLOAT64_C( 356.40), SIMDE_FLOAT64_C( -989.41) } }, { { SIMDE_FLOAT64_C( -388.55), SIMDE_FLOAT64_C( -109.59) }, UINT8_C(240), { -INT64_C( 3539169254304005194), INT64_C( 5743432299311634587) }, { SIMDE_FLOAT64_C( -644.32), SIMDE_FLOAT64_C( -714.64) }, { SIMDE_FLOAT64_C( -388.55), SIMDE_FLOAT64_C( -109.59) } }, { { SIMDE_FLOAT64_C( 410.34), SIMDE_FLOAT64_C( 625.69) }, UINT8_C(250), { INT64_C( 5427515123186161136), -INT64_C( 3232029291155733998) }, { SIMDE_FLOAT64_C( 741.50), SIMDE_FLOAT64_C( -336.20) }, { SIMDE_FLOAT64_C( 410.34), SIMDE_FLOAT64_C( 741.50) } }, { { SIMDE_FLOAT64_C( -542.46), SIMDE_FLOAT64_C( 978.14) }, UINT8_C(237), { -INT64_C( 3635931506042322827), INT64_C( 9135638802339824692) }, { SIMDE_FLOAT64_C( -632.49), SIMDE_FLOAT64_C( -921.53) }, { SIMDE_FLOAT64_C( 978.14), SIMDE_FLOAT64_C( 978.14) } }, { { SIMDE_FLOAT64_C( 780.08), SIMDE_FLOAT64_C( -493.39) }, UINT8_C( 20), { -INT64_C( 6810002066634023067), -INT64_C( 5705426183989531901) }, { SIMDE_FLOAT64_C( -300.37), SIMDE_FLOAT64_C( 736.42) }, { SIMDE_FLOAT64_C( 780.08), SIMDE_FLOAT64_C( -493.39) } }, { { SIMDE_FLOAT64_C( 269.97), SIMDE_FLOAT64_C( 873.11) }, UINT8_C( 93), { INT64_C( 8451372922109257325), -INT64_C( 304725281487495630) }, { SIMDE_FLOAT64_C( 286.92), SIMDE_FLOAT64_C( -246.02) }, { SIMDE_FLOAT64_C( 873.11), SIMDE_FLOAT64_C( 873.11) } }, { { SIMDE_FLOAT64_C( -82.47), SIMDE_FLOAT64_C( -217.25) }, UINT8_C(125), { INT64_C( 123156418051075138), INT64_C( 1019402031995949786) }, { SIMDE_FLOAT64_C( -85.30), SIMDE_FLOAT64_C( 529.62) }, { SIMDE_FLOAT64_C( -85.30), SIMDE_FLOAT64_C( -217.25) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128i idx = simde_mm_loadu_epi64(test_vec[i].idx); simde__m128d b = simde_mm_loadu_pd(test_vec[i].b); simde__m128d r = simde_mm_mask_permutex2var_pd(a, test_vec[i].k, idx, b); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128d a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128i idx = simde_test_x86_random_i64x2(); simde__m128d b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m128d r = simde_mm_mask_permutex2var_pd(a, k, idx, b); simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_mask2_permutex2var_pd(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde_float64 a[4]; const int64_t idx[4]; const simde__mmask8 k; const simde_float64 b[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( 830.45), SIMDE_FLOAT64_C( -172.42) }, { INT64_C( 4650192850772568310), -INT64_C( 4573139603663607562) }, UINT8_C( 68), { SIMDE_FLOAT64_C( -762.66), SIMDE_FLOAT64_C( 535.67) }, { SIMDE_FLOAT64_C( 793.72), SIMDE_FLOAT64_C( -798.22) } }, { { SIMDE_FLOAT64_C( 938.33), SIMDE_FLOAT64_C( 524.27) }, { -INT64_C( 4573912516357468979), -INT64_C( 4584096544897651180) }, UINT8_C( 13), { SIMDE_FLOAT64_C( -216.11), SIMDE_FLOAT64_C( 121.15) }, { SIMDE_FLOAT64_C( 524.27), SIMDE_FLOAT64_C( -144.14) } }, { { SIMDE_FLOAT64_C( 731.29), SIMDE_FLOAT64_C( -644.61) }, { -INT64_C( 4573329423351026811), INT64_C( 4643958531882148168) }, UINT8_C(196), { SIMDE_FLOAT64_C( 62.00), SIMDE_FLOAT64_C( -864.37) }, { SIMDE_FLOAT64_C( -776.64), SIMDE_FLOAT64_C( 298.48) } }, { { SIMDE_FLOAT64_C( -229.25), SIMDE_FLOAT64_C( 143.95) }, { -INT64_C( 4575088026228956856), INT64_C( 4648156115433276047) }, UINT8_C( 29), { SIMDE_FLOAT64_C( 985.24), SIMDE_FLOAT64_C( -167.35) }, { SIMDE_FLOAT64_C( -229.25), SIMDE_FLOAT64_C( 562.17) } }, { { SIMDE_FLOAT64_C( -817.77), SIMDE_FLOAT64_C( -100.06) }, { -INT64_C( 4574551288632741724), -INT64_C( 4571476262473108029) }, UINT8_C(228), { SIMDE_FLOAT64_C( -844.00), SIMDE_FLOAT64_C( -785.54) }, { SIMDE_FLOAT64_C( -637.73), SIMDE_FLOAT64_C( -987.32) } }, { { SIMDE_FLOAT64_C( 649.13), SIMDE_FLOAT64_C( -606.66) }, { INT64_C( 4649809429077730263), INT64_C( 4648378568625807688) }, UINT8_C(175), { SIMDE_FLOAT64_C( -960.22), SIMDE_FLOAT64_C( -556.68) }, { SIMDE_FLOAT64_C( -556.68), SIMDE_FLOAT64_C( 649.13) } }, { { SIMDE_FLOAT64_C( -775.38), SIMDE_FLOAT64_C( -176.33) }, { INT64_C( 4648176258486296904), INT64_C( 4651619577060770447) }, UINT8_C(137), { SIMDE_FLOAT64_C( 787.83), SIMDE_FLOAT64_C( 254.39) }, { SIMDE_FLOAT64_C( -775.38), SIMDE_FLOAT64_C( 955.92) } }, { { SIMDE_FLOAT64_C( 805.57), SIMDE_FLOAT64_C( -150.17) }, { INT64_C( 4645568920592654008), -INT64_C( 4577210963279866757) }, UINT8_C(105), { SIMDE_FLOAT64_C( 813.31), SIMDE_FLOAT64_C( -861.51) }, { SIMDE_FLOAT64_C( 805.57), SIMDE_FLOAT64_C( -423.68) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128i idx = simde_mm_loadu_epi64(test_vec[i].idx); simde__m128d b = simde_mm_loadu_pd(test_vec[i].b); simde__m128d r = simde_mm_mask2_permutex2var_pd(a, idx, test_vec[i].k, b); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128d a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m128i idx = simde_mm_castpd_si128(simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0))); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128d b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m128d r = simde_mm_mask2_permutex2var_pd(a, idx, k, b); simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x2(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_maskz_permutex2var_pd(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde__mmask8 k; const simde_float64 a[4]; const int64_t idx[4]; const simde_float64 b[4]; const simde_float64 r[4]; } test_vec[] = { { UINT8_C(134), { SIMDE_FLOAT64_C( 798.55), SIMDE_FLOAT64_C( -28.86) }, { -INT64_C( 5234165115708920910), INT64_C( 6259940836224545394) }, { SIMDE_FLOAT64_C( 670.39), SIMDE_FLOAT64_C( 980.89) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 670.39) } }, { UINT8_C(223), { SIMDE_FLOAT64_C( 458.22), SIMDE_FLOAT64_C( 235.28) }, { -INT64_C( 6133148272475915475), -INT64_C( 6918802135733980991) }, { SIMDE_FLOAT64_C( 976.22), SIMDE_FLOAT64_C( 436.82) }, { SIMDE_FLOAT64_C( 235.28), SIMDE_FLOAT64_C( 235.28) } }, { UINT8_C( 18), { SIMDE_FLOAT64_C( -142.32), SIMDE_FLOAT64_C( 99.43) }, { INT64_C( 4983386258217242676), -INT64_C( 136643658833065413) }, { SIMDE_FLOAT64_C( 927.46), SIMDE_FLOAT64_C( 967.68) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 967.68) } }, { UINT8_C(192), { SIMDE_FLOAT64_C( -657.07), SIMDE_FLOAT64_C( 895.69) }, { -INT64_C( 1307324981216330964), -INT64_C( 3174417301149471108) }, { SIMDE_FLOAT64_C( 246.17), SIMDE_FLOAT64_C( 324.96) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C( 14), { SIMDE_FLOAT64_C( 408.71), SIMDE_FLOAT64_C( -497.75) }, { INT64_C( 8494812220930504452), INT64_C( 1357776601162581184) }, { SIMDE_FLOAT64_C( -755.26), SIMDE_FLOAT64_C( 128.82) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 408.71) } }, { UINT8_C(142), { SIMDE_FLOAT64_C( -458.03), SIMDE_FLOAT64_C( -498.64) }, { INT64_C( 9026187197985430298), -INT64_C( 4230674304573706788) }, { SIMDE_FLOAT64_C( 53.78), SIMDE_FLOAT64_C( 981.38) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -458.03) } }, { UINT8_C(133), { SIMDE_FLOAT64_C( 215.38), SIMDE_FLOAT64_C( 267.34) }, { -INT64_C( 4099173715614750847), -INT64_C( 8057523459168599774) }, { SIMDE_FLOAT64_C( 649.79), SIMDE_FLOAT64_C( -296.26) }, { SIMDE_FLOAT64_C( 267.34), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C(108), { SIMDE_FLOAT64_C( -98.98), SIMDE_FLOAT64_C( 193.37) }, { -INT64_C( 8428680193957347328), INT64_C( 7077518496431368408) }, { SIMDE_FLOAT64_C( -565.11), SIMDE_FLOAT64_C( 34.47) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128i idx = simde_mm_loadu_epi64(test_vec[i].idx); simde__m128d b = simde_mm_loadu_pd(test_vec[i].b); simde__m128d r = simde_mm_maskz_permutex2var_pd(test_vec[i].k, a, idx, b); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128d a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m128i idx = simde_test_x86_random_i64x2(); simde__m128d b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m128d r = simde_mm_maskz_permutex2var_pd(k, a, idx, b); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_permutex2var_ps(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde_float32 a[8]; const int32_t idx[8]; const simde_float32 b[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( -588.11), SIMDE_FLOAT32_C( -504.46), SIMDE_FLOAT32_C( -488.73), SIMDE_FLOAT32_C( 557.45) }, { INT32_C( 762301853), INT32_C( 463084285), INT32_C( 324835910), -INT32_C( 362520093) }, { SIMDE_FLOAT32_C( -920.04), SIMDE_FLOAT32_C( -710.31), SIMDE_FLOAT32_C( 637.33), SIMDE_FLOAT32_C( -474.92) }, { SIMDE_FLOAT32_C( -710.31), SIMDE_FLOAT32_C( -710.31), SIMDE_FLOAT32_C( 637.33), SIMDE_FLOAT32_C( 557.45) } }, { { SIMDE_FLOAT32_C( -342.35), SIMDE_FLOAT32_C( -610.76), SIMDE_FLOAT32_C( 400.68), SIMDE_FLOAT32_C( -237.86) }, { -INT32_C( 881664185), INT32_C( 938594), INT32_C( 472743199), -INT32_C( 566704233) }, { SIMDE_FLOAT32_C( 618.15), SIMDE_FLOAT32_C( 845.27), SIMDE_FLOAT32_C( -347.60), SIMDE_FLOAT32_C( 647.55) }, { SIMDE_FLOAT32_C( 647.55), SIMDE_FLOAT32_C( 400.68), SIMDE_FLOAT32_C( 647.55), SIMDE_FLOAT32_C( 647.55) } }, { { SIMDE_FLOAT32_C( 232.91), SIMDE_FLOAT32_C( -308.31), SIMDE_FLOAT32_C( -152.75), SIMDE_FLOAT32_C( 312.87) }, { -INT32_C( 858029679), -INT32_C( 1388365466), INT32_C( 2071572760), INT32_C( 578520835) }, { SIMDE_FLOAT32_C( 150.94), SIMDE_FLOAT32_C( 144.27), SIMDE_FLOAT32_C( 7.48), SIMDE_FLOAT32_C( 776.03) }, { SIMDE_FLOAT32_C( -308.31), SIMDE_FLOAT32_C( 7.48), SIMDE_FLOAT32_C( 232.91), SIMDE_FLOAT32_C( 312.87) } }, { { SIMDE_FLOAT32_C( 921.94), SIMDE_FLOAT32_C( 752.21), SIMDE_FLOAT32_C( 424.16), SIMDE_FLOAT32_C( 540.08) }, { INT32_C( 1469195), INT32_C( 1390560705), INT32_C( 757054918), INT32_C( 265969398) }, { SIMDE_FLOAT32_C( 987.63), SIMDE_FLOAT32_C( -346.72), SIMDE_FLOAT32_C( 699.38), SIMDE_FLOAT32_C( -506.70) }, { SIMDE_FLOAT32_C( 540.08), SIMDE_FLOAT32_C( 752.21), SIMDE_FLOAT32_C( 699.38), SIMDE_FLOAT32_C( 699.38) } }, { { SIMDE_FLOAT32_C( 8.80), SIMDE_FLOAT32_C( -187.71), SIMDE_FLOAT32_C( -281.06), SIMDE_FLOAT32_C( -840.26) }, { INT32_C( 494630317), -INT32_C( 135268884), INT32_C( 569836896), INT32_C( 276093514) }, { SIMDE_FLOAT32_C( -969.03), SIMDE_FLOAT32_C( 343.07), SIMDE_FLOAT32_C( -446.19), SIMDE_FLOAT32_C( 250.81) }, { SIMDE_FLOAT32_C( 343.07), SIMDE_FLOAT32_C( -969.03), SIMDE_FLOAT32_C( 8.80), SIMDE_FLOAT32_C( -281.06) } }, { { SIMDE_FLOAT32_C( 132.92), SIMDE_FLOAT32_C( -478.94), SIMDE_FLOAT32_C( 544.87), SIMDE_FLOAT32_C( 120.55) }, { INT32_C( 1175791467), -INT32_C( 651867348), -INT32_C( 1393057600), -INT32_C( 173807979) }, { SIMDE_FLOAT32_C( 916.37), SIMDE_FLOAT32_C( -479.54), SIMDE_FLOAT32_C( 279.87), SIMDE_FLOAT32_C( -442.55) }, { SIMDE_FLOAT32_C( 120.55), SIMDE_FLOAT32_C( 916.37), SIMDE_FLOAT32_C( 132.92), SIMDE_FLOAT32_C( -479.54) } }, { { SIMDE_FLOAT32_C( -429.78), SIMDE_FLOAT32_C( 234.27), SIMDE_FLOAT32_C( 717.98), SIMDE_FLOAT32_C( -398.81) }, { INT32_C( 261784350), INT32_C( 101791643), -INT32_C( 1974720930), INT32_C( 828600689) }, { SIMDE_FLOAT32_C( -228.13), SIMDE_FLOAT32_C( -397.66), SIMDE_FLOAT32_C( -625.50), SIMDE_FLOAT32_C( -589.53) }, { SIMDE_FLOAT32_C( -625.50), SIMDE_FLOAT32_C( -398.81), SIMDE_FLOAT32_C( -625.50), SIMDE_FLOAT32_C( 234.27) } }, { { SIMDE_FLOAT32_C( 912.50), SIMDE_FLOAT32_C( -11.54), SIMDE_FLOAT32_C( 485.57), SIMDE_FLOAT32_C( 828.88) }, { -INT32_C( 1889488102), INT32_C( 1553704510), -INT32_C( 999606999), -INT32_C( 892699540) }, { SIMDE_FLOAT32_C( 562.77), SIMDE_FLOAT32_C( 286.45), SIMDE_FLOAT32_C( 267.97), SIMDE_FLOAT32_C( -659.79) }, { SIMDE_FLOAT32_C( 485.57), SIMDE_FLOAT32_C( 267.97), SIMDE_FLOAT32_C( -11.54), SIMDE_FLOAT32_C( 562.77) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128i idx = simde_mm_loadu_epi32(test_vec[i].idx); simde__m128 b = simde_mm_loadu_ps(test_vec[i].b); simde__m128 r = simde_mm_permutex2var_ps(a, idx, b); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128 a = simde_test_x86_random_f32x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m128i idx = simde_test_x86_random_i32x4(); simde__m128 b = simde_test_x86_random_f32x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m128 r = simde_mm_permutex2var_ps(a, idx, b); simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_mask_permutex2var_ps(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde_float32 a[8]; const simde__mmask8 k; const int32_t idx[8]; const simde_float32 b[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 779.66), SIMDE_FLOAT32_C( 978.72), SIMDE_FLOAT32_C( 176.87), SIMDE_FLOAT32_C( -448.47) }, UINT8_C( 18), { -INT32_C( 1588445151), -INT32_C( 1883472164), -INT32_C( 2066920482), INT32_C( 464333286) }, { SIMDE_FLOAT32_C( -922.94), SIMDE_FLOAT32_C( -641.88), SIMDE_FLOAT32_C( 269.76), SIMDE_FLOAT32_C( 195.17) }, { SIMDE_FLOAT32_C( 779.66), SIMDE_FLOAT32_C( -922.94), SIMDE_FLOAT32_C( 176.87), SIMDE_FLOAT32_C( -448.47) } }, { { SIMDE_FLOAT32_C( 494.25), SIMDE_FLOAT32_C( 840.08), SIMDE_FLOAT32_C( -242.06), SIMDE_FLOAT32_C( -219.30) }, UINT8_C(165), { INT32_C( 207412167), -INT32_C( 1238536587), INT32_C( 1066586049), -INT32_C( 702733685) }, { SIMDE_FLOAT32_C( -52.87), SIMDE_FLOAT32_C( -994.31), SIMDE_FLOAT32_C( 351.07), SIMDE_FLOAT32_C( -263.12) }, { SIMDE_FLOAT32_C( -263.12), SIMDE_FLOAT32_C( 840.08), SIMDE_FLOAT32_C( 840.08), SIMDE_FLOAT32_C( -219.30) } }, { { SIMDE_FLOAT32_C( 194.86), SIMDE_FLOAT32_C( -308.38), SIMDE_FLOAT32_C( -186.07), SIMDE_FLOAT32_C( 552.98) }, UINT8_C( 94), { INT32_C( 1940913599), INT32_C( 725308496), INT32_C( 530663344), INT32_C( 1138775924) }, { SIMDE_FLOAT32_C( -904.51), SIMDE_FLOAT32_C( -690.64), SIMDE_FLOAT32_C( -252.21), SIMDE_FLOAT32_C( 829.92) }, { SIMDE_FLOAT32_C( 194.86), SIMDE_FLOAT32_C( 194.86), SIMDE_FLOAT32_C( 194.86), SIMDE_FLOAT32_C( -904.51) } }, { { SIMDE_FLOAT32_C( 501.97), SIMDE_FLOAT32_C( 290.64), SIMDE_FLOAT32_C( -222.95), SIMDE_FLOAT32_C( 507.66) }, UINT8_C( 96), { -INT32_C( 1036571116), -INT32_C( 327051793), -INT32_C( 1740835516), INT32_C( 2001299248) }, { SIMDE_FLOAT32_C( 19.16), SIMDE_FLOAT32_C( 666.10), SIMDE_FLOAT32_C( 76.00), SIMDE_FLOAT32_C( -406.79) }, { SIMDE_FLOAT32_C( 501.97), SIMDE_FLOAT32_C( 290.64), SIMDE_FLOAT32_C( -222.95), SIMDE_FLOAT32_C( 507.66) } }, { { SIMDE_FLOAT32_C( 491.62), SIMDE_FLOAT32_C( 408.83), SIMDE_FLOAT32_C( -311.30), SIMDE_FLOAT32_C( 800.98) }, UINT8_C(253), { INT32_C( 1336058708), -INT32_C( 1335688574), -INT32_C( 811653830), -INT32_C( 1676375129) }, { SIMDE_FLOAT32_C( -797.91), SIMDE_FLOAT32_C( 818.85), SIMDE_FLOAT32_C( 547.59), SIMDE_FLOAT32_C( 721.06) }, { SIMDE_FLOAT32_C( -797.91), SIMDE_FLOAT32_C( 408.83), SIMDE_FLOAT32_C( -311.30), SIMDE_FLOAT32_C( 721.06) } }, { { SIMDE_FLOAT32_C( 820.82), SIMDE_FLOAT32_C( -343.01), SIMDE_FLOAT32_C( -259.78), SIMDE_FLOAT32_C( 486.92) }, UINT8_C( 46), { -INT32_C( 547510890), -INT32_C( 1338746100), -INT32_C( 13466627), INT32_C( 205120231) }, { SIMDE_FLOAT32_C( -825.08), SIMDE_FLOAT32_C( -26.06), SIMDE_FLOAT32_C( 257.90), SIMDE_FLOAT32_C( 246.32) }, { SIMDE_FLOAT32_C( 820.82), SIMDE_FLOAT32_C( -825.08), SIMDE_FLOAT32_C( -26.06), SIMDE_FLOAT32_C( 246.32) } }, { { SIMDE_FLOAT32_C( -212.89), SIMDE_FLOAT32_C( 836.31), SIMDE_FLOAT32_C( 448.41), SIMDE_FLOAT32_C( -394.05) }, UINT8_C( 27), { INT32_C( 945733377), -INT32_C( 1026585316), INT32_C( 1171173098), INT32_C( 1715633890) }, { SIMDE_FLOAT32_C( -352.66), SIMDE_FLOAT32_C( 794.50), SIMDE_FLOAT32_C( -711.52), SIMDE_FLOAT32_C( 53.59) }, { SIMDE_FLOAT32_C( 836.31), SIMDE_FLOAT32_C( -352.66), SIMDE_FLOAT32_C( 448.41), SIMDE_FLOAT32_C( 448.41) } }, { { SIMDE_FLOAT32_C( -948.56), SIMDE_FLOAT32_C( -928.04), SIMDE_FLOAT32_C( 228.50), SIMDE_FLOAT32_C( 25.38) }, UINT8_C( 13), { -INT32_C( 228810465), INT32_C( 720664683), INT32_C( 1665543382), -INT32_C( 1437726469) }, { SIMDE_FLOAT32_C( 322.90), SIMDE_FLOAT32_C( 494.70), SIMDE_FLOAT32_C( 316.80), SIMDE_FLOAT32_C( 77.13) }, { SIMDE_FLOAT32_C( 77.13), SIMDE_FLOAT32_C( -928.04), SIMDE_FLOAT32_C( 316.80), SIMDE_FLOAT32_C( 25.38) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128i idx = simde_mm_loadu_epi32(test_vec[i].idx); simde__m128 b = simde_mm_loadu_ps(test_vec[i].b); simde__m128 r = simde_mm_mask_permutex2var_ps(a, test_vec[i].k, idx, b); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128 a = simde_test_x86_random_f32x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128i idx = simde_test_x86_random_i32x4(); simde__m128 b = simde_test_x86_random_f32x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m128 r = simde_mm_mask_permutex2var_ps(a, k, idx, b); simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_mask2_permutex2var_ps(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde_float32 a[8]; const int32_t idx[8]; const simde__mmask8 k; const simde_float32 b[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( -545.46), SIMDE_FLOAT32_C( -40.75), SIMDE_FLOAT32_C( 724.47), SIMDE_FLOAT32_C( -750.96) }, { INT32_C( 1131919442), -INT32_C( 1017253724), -INT32_C( 1003560632), INT32_C( 1134549074) }, UINT8_C(172), { SIMDE_FLOAT32_C( 325.85), SIMDE_FLOAT32_C( -350.45), SIMDE_FLOAT32_C( -518.61), SIMDE_FLOAT32_C( 138.34) }, { SIMDE_FLOAT32_C( 247.72), SIMDE_FLOAT32_C( -221.94), SIMDE_FLOAT32_C( -545.46), SIMDE_FLOAT32_C( 724.47) } }, { { SIMDE_FLOAT32_C( 815.72), SIMDE_FLOAT32_C( -595.37), SIMDE_FLOAT32_C( 556.77), SIMDE_FLOAT32_C( -634.22) }, { INT32_C( 1140370309), -INT32_C( 1005223772), -INT32_C( 1037235651), -INT32_C( 1007017984) }, UINT8_C(149), { SIMDE_FLOAT32_C( 321.58), SIMDE_FLOAT32_C( -254.72), SIMDE_FLOAT32_C( -187.96), SIMDE_FLOAT32_C( -397.87) }, { SIMDE_FLOAT32_C( -254.72), SIMDE_FLOAT32_C( -598.01), SIMDE_FLOAT32_C( -254.72), SIMDE_FLOAT32_C( -500.25) } }, { { SIMDE_FLOAT32_C( -743.94), SIMDE_FLOAT32_C( -865.07), SIMDE_FLOAT32_C( -903.17), SIMDE_FLOAT32_C( 572.86) }, { INT32_C( 1129582428), -INT32_C( 1008709796), -INT32_C( 1008078356), -INT32_C( 1031937720) }, UINT8_C(132), { SIMDE_FLOAT32_C( 779.83), SIMDE_FLOAT32_C( 714.59), SIMDE_FLOAT32_C( 100.89), SIMDE_FLOAT32_C( 99.52) }, { SIMDE_FLOAT32_C( 212.06), SIMDE_FLOAT32_C( -448.62), SIMDE_FLOAT32_C( 779.83), SIMDE_FLOAT32_C( -63.47) } }, { { SIMDE_FLOAT32_C( 721.15), SIMDE_FLOAT32_C( -573.25), SIMDE_FLOAT32_C( 749.07), SIMDE_FLOAT32_C( -797.46) }, { INT32_C( 1141720351), INT32_C( 1141715599), -INT32_C( 1010537595), INT32_C( 1123268690) }, UINT8_C(254), { SIMDE_FLOAT32_C( -895.49), SIMDE_FLOAT32_C( 523.85), SIMDE_FLOAT32_C( -112.69), SIMDE_FLOAT32_C( -395.75) }, { SIMDE_FLOAT32_C( 565.08), SIMDE_FLOAT32_C( -395.75), SIMDE_FLOAT32_C( 523.85), SIMDE_FLOAT32_C( 749.07) } }, { { SIMDE_FLOAT32_C( -315.26), SIMDE_FLOAT32_C( -791.11), SIMDE_FLOAT32_C( 349.53), SIMDE_FLOAT32_C( 496.78) }, { -INT32_C( 1019414446), INT32_C( 1142384067), INT32_C( 1142812180), -INT32_C( 1028110418) }, UINT8_C(157), { SIMDE_FLOAT32_C( -156.22), SIMDE_FLOAT32_C( 459.24), SIMDE_FLOAT32_C( 710.56), SIMDE_FLOAT32_C( 780.31) }, { SIMDE_FLOAT32_C( 349.53), SIMDE_FLOAT32_C( 605.59), SIMDE_FLOAT32_C( -156.22), SIMDE_FLOAT32_C( 710.56) } }, { { SIMDE_FLOAT32_C( -740.35), SIMDE_FLOAT32_C( 490.39), SIMDE_FLOAT32_C( 494.90), SIMDE_FLOAT32_C( 360.54) }, { -INT32_C( 1009972347), INT32_C( 1129843917), INT32_C( 1145361039), -INT32_C( 1004191416) }, UINT8_C(250), { SIMDE_FLOAT32_C( 352.37), SIMDE_FLOAT32_C( 903.77), SIMDE_FLOAT32_C( -974.25), SIMDE_FLOAT32_C( -525.77) }, { SIMDE_FLOAT32_C( -410.09), SIMDE_FLOAT32_C( 903.77), SIMDE_FLOAT32_C( 787.29), SIMDE_FLOAT32_C( -740.35) } }, { { SIMDE_FLOAT32_C( 834.34), SIMDE_FLOAT32_C( -869.75), SIMDE_FLOAT32_C( 998.08), SIMDE_FLOAT32_C( -278.35) }, { -INT32_C( 1014710600), -INT32_C( 1013016822), -INT32_C( 1031083131), -INT32_C( 1000014479) }, UINT8_C(165), { SIMDE_FLOAT32_C( 741.57), SIMDE_FLOAT32_C( 689.62), SIMDE_FLOAT32_C( 811.33), SIMDE_FLOAT32_C( -350.56) }, { SIMDE_FLOAT32_C( 834.34), SIMDE_FLOAT32_C( -317.18), SIMDE_FLOAT32_C( 689.62), SIMDE_FLOAT32_C( -915.96) } }, { { SIMDE_FLOAT32_C( -131.93), SIMDE_FLOAT32_C( -344.89), SIMDE_FLOAT32_C( -891.33), SIMDE_FLOAT32_C( -421.37) }, { -INT32_C( 1005771489), -INT32_C( 1004672287), -INT32_C( 999768392), INT32_C( 1147704443) }, UINT8_C(241), { SIMDE_FLOAT32_C( -341.07), SIMDE_FLOAT32_C( 146.37), SIMDE_FLOAT32_C( 516.16), SIMDE_FLOAT32_C( -2.09) }, { SIMDE_FLOAT32_C( -2.09), SIMDE_FLOAT32_C( -631.67), SIMDE_FLOAT32_C( -930.98), SIMDE_FLOAT32_C( 930.32) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128i idx = simde_mm_loadu_epi32(test_vec[i].idx); simde__m128 b = simde_mm_loadu_ps(test_vec[i].b); simde__m128 r = simde_mm_mask2_permutex2var_ps(a, idx, test_vec[i].k, b); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128 a = simde_test_x86_random_f32x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m128i idx = simde_mm_castps_si128(simde_test_x86_random_f32x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0))); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128 b = simde_test_x86_random_f32x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m128 r = simde_mm_mask2_permutex2var_ps(a, idx, k, b); simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_maskz_permutex2var_ps(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde__mmask8 k; const simde_float32 a[8]; const int32_t idx[8]; const simde_float32 b[8]; const simde_float32 r[8]; } test_vec[] = { { UINT8_C( 44), { SIMDE_FLOAT32_C( -131.47), SIMDE_FLOAT32_C( -98.32), SIMDE_FLOAT32_C( -409.30), SIMDE_FLOAT32_C( 342.76) }, { INT32_C( 1601285198), -INT32_C( 512390974), -INT32_C( 695365330), INT32_C( 172807497) }, { SIMDE_FLOAT32_C( -324.18), SIMDE_FLOAT32_C( 105.12), SIMDE_FLOAT32_C( 256.22), SIMDE_FLOAT32_C( -255.16) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 256.22), SIMDE_FLOAT32_C( -98.32) } }, { UINT8_C(118), { SIMDE_FLOAT32_C( -14.91), SIMDE_FLOAT32_C( 403.77), SIMDE_FLOAT32_C( -818.19), SIMDE_FLOAT32_C( -498.75) }, { INT32_C( 1720767706), INT32_C( 1152703384), -INT32_C( 720956339), INT32_C( 436463753) }, { SIMDE_FLOAT32_C( -717.52), SIMDE_FLOAT32_C( -705.08), SIMDE_FLOAT32_C( -205.73), SIMDE_FLOAT32_C( -620.33) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -14.91), SIMDE_FLOAT32_C( -705.08), SIMDE_FLOAT32_C( 0.00) } }, { UINT8_C( 38), { SIMDE_FLOAT32_C( 682.17), SIMDE_FLOAT32_C( 55.49), SIMDE_FLOAT32_C( -930.26), SIMDE_FLOAT32_C( -61.61) }, { -INT32_C( 1542456430), INT32_C( 2021531299), -INT32_C( 1995381330), -INT32_C( 1378462311) }, { SIMDE_FLOAT32_C( -56.50), SIMDE_FLOAT32_C( -362.49), SIMDE_FLOAT32_C( -669.14), SIMDE_FLOAT32_C( 996.86) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -61.61), SIMDE_FLOAT32_C( -669.14), SIMDE_FLOAT32_C( 0.00) } }, { UINT8_C(175), { SIMDE_FLOAT32_C( -654.09), SIMDE_FLOAT32_C( -720.66), SIMDE_FLOAT32_C( 212.74), SIMDE_FLOAT32_C( 140.18) }, { INT32_C( 237154053), INT32_C( 799074356), INT32_C( 366101751), -INT32_C( 1480373566) }, { SIMDE_FLOAT32_C( -955.72), SIMDE_FLOAT32_C( -589.43), SIMDE_FLOAT32_C( -817.55), SIMDE_FLOAT32_C( 473.91) }, { SIMDE_FLOAT32_C( -589.43), SIMDE_FLOAT32_C( -955.72), SIMDE_FLOAT32_C( 473.91), SIMDE_FLOAT32_C( 212.74) } }, { UINT8_C( 35), { SIMDE_FLOAT32_C( -371.95), SIMDE_FLOAT32_C( -582.60), SIMDE_FLOAT32_C( 608.54), SIMDE_FLOAT32_C( -41.09) }, { INT32_C( 175472464), -INT32_C( 1206966008), INT32_C( 871112011), INT32_C( 19602365) }, { SIMDE_FLOAT32_C( -844.84), SIMDE_FLOAT32_C( -178.68), SIMDE_FLOAT32_C( 625.63), SIMDE_FLOAT32_C( -171.78) }, { SIMDE_FLOAT32_C( -371.95), SIMDE_FLOAT32_C( -371.95), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { UINT8_C( 4), { SIMDE_FLOAT32_C( -140.46), SIMDE_FLOAT32_C( -127.51), SIMDE_FLOAT32_C( 135.39), SIMDE_FLOAT32_C( 41.99) }, { INT32_C( 1335587907), -INT32_C( 1012940732), INT32_C( 1607182901), -INT32_C( 676691015) }, { SIMDE_FLOAT32_C( -939.53), SIMDE_FLOAT32_C( -325.78), SIMDE_FLOAT32_C( -126.70), SIMDE_FLOAT32_C( 576.34) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -325.78), SIMDE_FLOAT32_C( 0.00) } }, { UINT8_C( 9), { SIMDE_FLOAT32_C( -825.82), SIMDE_FLOAT32_C( 731.50), SIMDE_FLOAT32_C( -792.13), SIMDE_FLOAT32_C( 799.81) }, { INT32_C( 1137003952), INT32_C( 327578270), -INT32_C( 849881843), INT32_C( 503454324) }, { SIMDE_FLOAT32_C( -888.78), SIMDE_FLOAT32_C( -687.15), SIMDE_FLOAT32_C( -183.62), SIMDE_FLOAT32_C( -991.89) }, { SIMDE_FLOAT32_C( -825.82), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -888.78) } }, { UINT8_C( 12), { SIMDE_FLOAT32_C( 107.74), SIMDE_FLOAT32_C( -931.41), SIMDE_FLOAT32_C( -366.82), SIMDE_FLOAT32_C( 981.04) }, { -INT32_C( 606473373), -INT32_C( 1970588868), INT32_C( 1797836537), INT32_C( 695745364) }, { SIMDE_FLOAT32_C( 292.34), SIMDE_FLOAT32_C( 687.44), SIMDE_FLOAT32_C( 409.85), SIMDE_FLOAT32_C( 423.13) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -931.41), SIMDE_FLOAT32_C( 292.34) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128i idx = simde_mm_loadu_epi32(test_vec[i].idx); simde__m128 b = simde_mm_loadu_ps(test_vec[i].b); simde__m128 r = simde_mm_maskz_permutex2var_ps(test_vec[i].k, a, idx, b); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128 a = simde_test_x86_random_f32x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m128i idx = simde_test_x86_random_i32x4(); simde__m128 b = simde_test_x86_random_f32x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m128 r = simde_mm_maskz_permutex2var_ps(k, a, idx, b); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_permutex2var_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[16]; const int16_t idx[16]; const int16_t b[16]; const int16_t r[16]; } test_vec[] = { { { -INT16_C( 32651), -INT16_C( 5922), -INT16_C( 29589), INT16_C( 29469), INT16_C( 29393), INT16_C( 32711), INT16_C( 5048), -INT16_C( 29164), INT16_C( 14884), -INT16_C( 14540), INT16_C( 19202), -INT16_C( 31858), INT16_C( 30405), -INT16_C( 327), -INT16_C( 4310), -INT16_C( 24631) }, { -INT16_C( 22672), -INT16_C( 9337), -INT16_C( 23245), INT16_C( 1102), INT16_C( 5399), -INT16_C( 12413), -INT16_C( 26584), INT16_C( 19549), -INT16_C( 28206), -INT16_C( 10988), -INT16_C( 23844), -INT16_C( 24232), INT16_C( 4376), INT16_C( 17055), INT16_C( 26625), INT16_C( 29154) }, { INT16_C( 26895), INT16_C( 16972), -INT16_C( 26098), INT16_C( 9542), -INT16_C( 13648), -INT16_C( 9995), INT16_C( 21090), INT16_C( 13349), INT16_C( 14820), -INT16_C( 16375), INT16_C( 25307), -INT16_C( 3230), INT16_C( 371), INT16_C( 29749), INT16_C( 5994), INT16_C( 31205) }, { INT16_C( 26895), -INT16_C( 29164), INT16_C( 9542), -INT16_C( 4310), INT16_C( 13349), INT16_C( 29469), INT16_C( 14884), INT16_C( 29749), -INT16_C( 26098), -INT16_C( 13648), INT16_C( 371), INT16_C( 14820), INT16_C( 14820), INT16_C( 31205), -INT16_C( 5922), -INT16_C( 29589) } }, { { INT16_C( 12673), -INT16_C( 28740), INT16_C( 716), INT16_C( 31925), -INT16_C( 21812), INT16_C( 11860), INT16_C( 31228), -INT16_C( 8093), INT16_C( 27826), -INT16_C( 29279), INT16_C( 974), INT16_C( 17024), -INT16_C( 18940), INT16_C( 28342), -INT16_C( 25395), INT16_C( 20200) }, { -INT16_C( 23347), -INT16_C( 26146), -INT16_C( 27738), INT16_C( 29461), INT16_C( 27197), INT16_C( 14753), INT16_C( 1251), -INT16_C( 27110), -INT16_C( 17551), INT16_C( 16163), -INT16_C( 23362), -INT16_C( 15743), INT16_C( 14426), INT16_C( 10033), INT16_C( 6612), -INT16_C( 24202) }, { INT16_C( 21693), INT16_C( 25403), INT16_C( 20711), INT16_C( 9430), INT16_C( 30906), -INT16_C( 24995), INT16_C( 30588), -INT16_C( 4812), INT16_C( 22322), -INT16_C( 4051), -INT16_C( 20741), INT16_C( 21939), -INT16_C( 6938), -INT16_C( 17795), -INT16_C( 3075), -INT16_C( 17828) }, { INT16_C( 28342), -INT16_C( 3075), INT16_C( 31228), -INT16_C( 24995), -INT16_C( 17795), -INT16_C( 28740), INT16_C( 31925), -INT16_C( 20741), INT16_C( 25403), INT16_C( 31925), -INT16_C( 3075), -INT16_C( 28740), -INT16_C( 20741), INT16_C( 25403), INT16_C( 30906), INT16_C( 30588) } }, { { -INT16_C( 26809), INT16_C( 11805), -INT16_C( 2841), -INT16_C( 23982), -INT16_C( 20628), -INT16_C( 6080), INT16_C( 29735), INT16_C( 22998), INT16_C( 971), -INT16_C( 14518), -INT16_C( 591), -INT16_C( 26596), -INT16_C( 26143), -INT16_C( 8622), -INT16_C( 20852), -INT16_C( 11368) }, { -INT16_C( 19131), INT16_C( 11521), INT16_C( 21417), INT16_C( 5583), INT16_C( 3843), INT16_C( 11006), -INT16_C( 11133), INT16_C( 20099), -INT16_C( 12841), -INT16_C( 30699), INT16_C( 13002), -INT16_C( 21728), INT16_C( 29643), INT16_C( 22665), INT16_C( 8481), INT16_C( 26411) }, { INT16_C( 11735), -INT16_C( 32620), INT16_C( 25472), -INT16_C( 31850), -INT16_C( 27534), -INT16_C( 2643), INT16_C( 12648), INT16_C( 16195), INT16_C( 23038), -INT16_C( 13881), -INT16_C( 6005), INT16_C( 22132), -INT16_C( 421), INT16_C( 31918), -INT16_C( 9697), -INT16_C( 2333) }, { -INT16_C( 6080), INT16_C( 11805), -INT16_C( 14518), -INT16_C( 11368), -INT16_C( 23982), -INT16_C( 9697), -INT16_C( 23982), -INT16_C( 23982), INT16_C( 16195), -INT16_C( 2643), -INT16_C( 591), -INT16_C( 26809), -INT16_C( 26596), -INT16_C( 14518), INT16_C( 11805), -INT16_C( 26596) } }, { { INT16_C( 30471), -INT16_C( 30857), INT16_C( 3546), INT16_C( 19467), -INT16_C( 18271), INT16_C( 2369), -INT16_C( 31255), -INT16_C( 6072), INT16_C( 4062), INT16_C( 27057), INT16_C( 9719), INT16_C( 21183), INT16_C( 28195), INT16_C( 17359), -INT16_C( 19896), INT16_C( 20281) }, { -INT16_C( 20438), INT16_C( 1238), -INT16_C( 7747), INT16_C( 24145), -INT16_C( 28006), -INT16_C( 31897), -INT16_C( 20713), -INT16_C( 2709), INT16_C( 7359), -INT16_C( 18850), INT16_C( 7746), INT16_C( 25865), -INT16_C( 10100), -INT16_C( 11096), -INT16_C( 7542), -INT16_C( 19421) }, { -INT16_C( 1646), INT16_C( 20665), INT16_C( 2779), INT16_C( 30126), INT16_C( 5788), -INT16_C( 19208), INT16_C( 25797), -INT16_C( 31575), INT16_C( 2176), -INT16_C( 15813), INT16_C( 17446), -INT16_C( 19928), -INT16_C( 12260), -INT16_C( 22906), -INT16_C( 22094), INT16_C( 17755) }, { INT16_C( 9719), INT16_C( 25797), -INT16_C( 22906), INT16_C( 20665), INT16_C( 17446), -INT16_C( 6072), -INT16_C( 31575), INT16_C( 21183), INT16_C( 17755), -INT16_C( 22094), INT16_C( 3546), INT16_C( 27057), INT16_C( 28195), INT16_C( 4062), INT16_C( 9719), INT16_C( 19467) } }, { { INT16_C( 5282), INT16_C( 32149), INT16_C( 17182), -INT16_C( 17678), -INT16_C( 5287), INT16_C( 8046), INT16_C( 6223), -INT16_C( 12381), -INT16_C( 8672), INT16_C( 18066), -INT16_C( 17886), INT16_C( 16120), INT16_C( 32394), INT16_C( 15845), INT16_C( 16423), -INT16_C( 13950) }, { INT16_C( 5972), INT16_C( 29255), INT16_C( 14682), -INT16_C( 19412), -INT16_C( 25820), INT16_C( 29651), INT16_C( 30387), -INT16_C( 11453), -INT16_C( 10923), INT16_C( 30489), INT16_C( 4495), INT16_C( 6582), -INT16_C( 25713), -INT16_C( 18858), -INT16_C( 10021), INT16_C( 12159) }, { -INT16_C( 14609), INT16_C( 19105), -INT16_C( 13056), INT16_C( 9470), -INT16_C( 11928), INT16_C( 7064), -INT16_C( 9401), -INT16_C( 25362), INT16_C( 1968), INT16_C( 16148), -INT16_C( 13800), -INT16_C( 22696), -INT16_C( 20635), INT16_C( 16477), -INT16_C( 8825), INT16_C( 30575) }, { -INT16_C( 11928), -INT16_C( 12381), -INT16_C( 13800), INT16_C( 32394), -INT16_C( 5287), INT16_C( 9470), INT16_C( 9470), -INT16_C( 17678), INT16_C( 7064), INT16_C( 16148), -INT16_C( 13950), -INT16_C( 9401), -INT16_C( 13950), -INT16_C( 9401), -INT16_C( 22696), INT16_C( 30575) } }, { { INT16_C( 4259), -INT16_C( 23615), -INT16_C( 16419), INT16_C( 18120), INT16_C( 24720), -INT16_C( 10399), INT16_C( 20539), -INT16_C( 5260), -INT16_C( 30633), INT16_C( 28714), -INT16_C( 32174), -INT16_C( 18665), INT16_C( 30001), -INT16_C( 17929), INT16_C( 26194), -INT16_C( 2768) }, { -INT16_C( 3722), INT16_C( 21401), INT16_C( 25008), INT16_C( 16537), -INT16_C( 1087), -INT16_C( 1001), -INT16_C( 29877), -INT16_C( 23833), INT16_C( 4371), INT16_C( 25874), INT16_C( 10899), -INT16_C( 15076), INT16_C( 5023), -INT16_C( 3714), -INT16_C( 20871), -INT16_C( 4122) }, { INT16_C( 32671), INT16_C( 20291), -INT16_C( 8992), -INT16_C( 24177), -INT16_C( 22825), INT16_C( 8861), -INT16_C( 31694), INT16_C( 17861), -INT16_C( 10347), INT16_C( 10667), -INT16_C( 14591), -INT16_C( 24338), INT16_C( 27867), INT16_C( 21649), INT16_C( 30746), -INT16_C( 18108) }, { -INT16_C( 31694), INT16_C( 10667), INT16_C( 32671), INT16_C( 10667), -INT16_C( 23615), INT16_C( 17861), -INT16_C( 18665), -INT16_C( 5260), -INT16_C( 24177), -INT16_C( 8992), -INT16_C( 24177), INT16_C( 27867), -INT16_C( 18108), INT16_C( 30746), INT16_C( 10667), INT16_C( 20539) } }, { { -INT16_C( 30729), -INT16_C( 10232), -INT16_C( 26781), INT16_C( 15225), INT16_C( 5949), INT16_C( 28509), INT16_C( 8859), INT16_C( 12725), INT16_C( 24826), -INT16_C( 1190), INT16_C( 18471), INT16_C( 668), INT16_C( 11700), -INT16_C( 12713), -INT16_C( 25691), -INT16_C( 25209) }, { -INT16_C( 28894), -INT16_C( 31371), -INT16_C( 4570), INT16_C( 25536), INT16_C( 7685), -INT16_C( 24109), -INT16_C( 30656), INT16_C( 15058), INT16_C( 11496), INT16_C( 3894), -INT16_C( 11660), INT16_C( 10258), INT16_C( 27135), -INT16_C( 23050), INT16_C( 32004), INT16_C( 9794) }, { -INT16_C( 18676), INT16_C( 12971), INT16_C( 27813), -INT16_C( 21611), INT16_C( 26762), -INT16_C( 13748), INT16_C( 7920), -INT16_C( 10235), INT16_C( 15178), -INT16_C( 16664), -INT16_C( 1523), INT16_C( 3302), -INT16_C( 9117), INT16_C( 26545), -INT16_C( 3239), INT16_C( 25997) }, { -INT16_C( 26781), -INT16_C( 13748), INT16_C( 8859), -INT16_C( 30729), INT16_C( 28509), -INT16_C( 21611), -INT16_C( 30729), INT16_C( 27813), INT16_C( 24826), INT16_C( 7920), INT16_C( 26762), INT16_C( 27813), INT16_C( 25997), INT16_C( 7920), INT16_C( 5949), -INT16_C( 26781) } }, { { INT16_C( 14506), INT16_C( 20631), INT16_C( 11428), INT16_C( 12027), INT16_C( 18325), -INT16_C( 31239), -INT16_C( 411), -INT16_C( 20642), INT16_C( 17977), INT16_C( 18029), INT16_C( 21312), -INT16_C( 23726), INT16_C( 1071), -INT16_C( 30710), -INT16_C( 26633), -INT16_C( 23827) }, { -INT16_C( 31537), INT16_C( 29938), -INT16_C( 4688), INT16_C( 17826), -INT16_C( 25804), -INT16_C( 26165), INT16_C( 10649), -INT16_C( 11704), -INT16_C( 19089), -INT16_C( 20712), INT16_C( 27400), INT16_C( 14162), INT16_C( 23663), INT16_C( 26303), -INT16_C( 21261), -INT16_C( 15864) }, { -INT16_C( 1488), -INT16_C( 8138), -INT16_C( 9753), INT16_C( 6950), -INT16_C( 3724), INT16_C( 3764), -INT16_C( 998), -INT16_C( 30240), -INT16_C( 1615), -INT16_C( 18120), -INT16_C( 30108), -INT16_C( 11280), -INT16_C( 20506), -INT16_C( 9927), INT16_C( 16987), -INT16_C( 29797) }, { -INT16_C( 23827), -INT16_C( 9753), -INT16_C( 1488), INT16_C( 11428), -INT16_C( 3724), -INT16_C( 23726), -INT16_C( 18120), INT16_C( 17977), -INT16_C( 23827), -INT16_C( 1615), INT16_C( 17977), -INT16_C( 9753), -INT16_C( 23827), -INT16_C( 29797), INT16_C( 6950), INT16_C( 17977) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_mm256_loadu_epi16(test_vec[i].a); simde__m256i idx = simde_mm256_loadu_epi16(test_vec[i].idx); simde__m256i b = simde_mm256_loadu_epi16(test_vec[i].b); simde__m256i r = simde_mm256_permutex2var_epi16(a, idx, b); simde_test_x86_assert_equal_i16x16(r, simde_mm256_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i a = simde_test_x86_random_i16x16(); simde__m256i idx = simde_test_x86_random_i16x16(); simde__m256i b = simde_test_x86_random_i16x16(); simde__m256i r = simde_mm256_permutex2var_epi16(a, idx, b); simde_test_x86_write_i16x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x16(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask_permutex2var_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[16]; const simde__mmask16 k; const int16_t idx[16]; const int16_t b[16]; const int16_t r[16]; } test_vec[] = { { { -INT16_C( 11716), INT16_C( 9324), -INT16_C( 27989), INT16_C( 7999), -INT16_C( 2941), -INT16_C( 25299), INT16_C( 3824), -INT16_C( 24026), INT16_C( 24071), INT16_C( 27483), INT16_C( 19688), -INT16_C( 12738), INT16_C( 30715), INT16_C( 22439), INT16_C( 17081), -INT16_C( 2334) }, UINT16_C(19988), { -INT16_C( 16614), INT16_C( 23008), INT16_C( 25567), INT16_C( 3149), INT16_C( 15872), INT16_C( 9754), INT16_C( 8672), INT16_C( 15236), INT16_C( 27788), -INT16_C( 13689), -INT16_C( 31942), -INT16_C( 7870), -INT16_C( 1062), -INT16_C( 17372), INT16_C( 14577), INT16_C( 2827) }, { -INT16_C( 5128), -INT16_C( 10395), -INT16_C( 19889), INT16_C( 20451), -INT16_C( 272), -INT16_C( 12170), -INT16_C( 1505), -INT16_C( 21492), -INT16_C( 27801), -INT16_C( 24202), -INT16_C( 18410), -INT16_C( 3965), -INT16_C( 22604), -INT16_C( 23123), -INT16_C( 18209), -INT16_C( 10319) }, { -INT16_C( 11716), INT16_C( 9324), -INT16_C( 10319), INT16_C( 7999), -INT16_C( 11716), -INT16_C( 25299), INT16_C( 3824), -INT16_C( 24026), INT16_C( 24071), -INT16_C( 24026), -INT16_C( 18410), -INT16_C( 27989), INT16_C( 30715), INT16_C( 22439), -INT16_C( 10395), -INT16_C( 2334) } }, { { INT16_C( 5795), -INT16_C( 3410), -INT16_C( 27960), -INT16_C( 18110), -INT16_C( 18288), -INT16_C( 20599), -INT16_C( 27214), INT16_C( 6491), -INT16_C( 11735), INT16_C( 16315), INT16_C( 16010), INT16_C( 15920), -INT16_C( 8731), -INT16_C( 15132), -INT16_C( 27243), INT16_C( 14492) }, UINT16_C(19115), { INT16_C( 29483), INT16_C( 28124), INT16_C( 27692), -INT16_C( 18907), -INT16_C( 10468), INT16_C( 30539), INT16_C( 29937), -INT16_C( 21431), -INT16_C( 11084), -INT16_C( 6934), -INT16_C( 12526), -INT16_C( 2367), INT16_C( 22163), INT16_C( 12171), INT16_C( 13966), -INT16_C( 18054) }, { INT16_C( 22186), -INT16_C( 10714), INT16_C( 19395), -INT16_C( 8308), -INT16_C( 10205), INT16_C( 5206), -INT16_C( 24500), INT16_C( 192), -INT16_C( 21900), -INT16_C( 31004), -INT16_C( 23175), INT16_C( 3197), INT16_C( 2299), -INT16_C( 30148), -INT16_C( 18881), -INT16_C( 5821) }, { INT16_C( 15920), INT16_C( 2299), -INT16_C( 27960), -INT16_C( 20599), -INT16_C( 18288), INT16_C( 15920), -INT16_C( 27214), INT16_C( 16315), -INT16_C( 11735), INT16_C( 16010), INT16_C( 16010), -INT16_C( 3410), -INT16_C( 8731), -INT16_C( 15132), -INT16_C( 27243), INT16_C( 14492) } }, { { INT16_C( 27148), -INT16_C( 12353), INT16_C( 19637), -INT16_C( 10066), INT16_C( 1316), INT16_C( 28908), -INT16_C( 21339), INT16_C( 6513), INT16_C( 21846), -INT16_C( 12385), INT16_C( 7419), -INT16_C( 2340), INT16_C( 6181), INT16_C( 25728), -INT16_C( 15154), -INT16_C( 9651) }, UINT16_C( 3118), { -INT16_C( 7254), INT16_C( 22616), INT16_C( 31932), -INT16_C( 22435), INT16_C( 749), INT16_C( 24149), -INT16_C( 21733), -INT16_C( 17485), -INT16_C( 20869), INT16_C( 22487), -INT16_C( 859), INT16_C( 9583), INT16_C( 15712), -INT16_C( 21015), INT16_C( 5911), -INT16_C( 15942) }, { INT16_C( 4859), -INT16_C( 18662), INT16_C( 30607), INT16_C( 31839), -INT16_C( 19334), -INT16_C( 27174), -INT16_C( 29344), -INT16_C( 9392), INT16_C( 10300), -INT16_C( 7886), -INT16_C( 24284), -INT16_C( 31482), -INT16_C( 3874), -INT16_C( 2766), -INT16_C( 5113), INT16_C( 695) }, { INT16_C( 27148), INT16_C( 10300), -INT16_C( 3874), -INT16_C( 2766), INT16_C( 1316), -INT16_C( 27174), -INT16_C( 21339), INT16_C( 6513), INT16_C( 21846), -INT16_C( 12385), INT16_C( 28908), -INT16_C( 9651), INT16_C( 6181), INT16_C( 25728), -INT16_C( 15154), -INT16_C( 9651) } }, { { -INT16_C( 11777), -INT16_C( 28999), INT16_C( 6472), -INT16_C( 15862), -INT16_C( 6963), INT16_C( 11608), -INT16_C( 22415), -INT16_C( 21240), INT16_C( 15056), -INT16_C( 2674), -INT16_C( 27173), -INT16_C( 18054), -INT16_C( 21371), -INT16_C( 29521), INT16_C( 26265), -INT16_C( 26481) }, UINT16_C(18487), { INT16_C( 32550), INT16_C( 12385), INT16_C( 12098), -INT16_C( 26092), -INT16_C( 31396), INT16_C( 25922), INT16_C( 4915), -INT16_C( 15969), INT16_C( 31496), -INT16_C( 32170), -INT16_C( 9420), -INT16_C( 7378), -INT16_C( 14488), -INT16_C( 2231), -INT16_C( 32673), -INT16_C( 31425) }, { -INT16_C( 24320), INT16_C( 17077), -INT16_C( 13872), INT16_C( 11484), INT16_C( 7759), -INT16_C( 32111), INT16_C( 12593), INT16_C( 14659), -INT16_C( 25940), -INT16_C( 8005), -INT16_C( 5515), -INT16_C( 8764), INT16_C( 3505), INT16_C( 4564), INT16_C( 5262), -INT16_C( 29034) }, { -INT16_C( 22415), -INT16_C( 28999), INT16_C( 6472), -INT16_C( 15862), INT16_C( 3505), INT16_C( 6472), -INT16_C( 22415), -INT16_C( 21240), INT16_C( 15056), -INT16_C( 2674), -INT16_C( 27173), INT16_C( 26265), -INT16_C( 21371), -INT16_C( 29521), -INT16_C( 29034), -INT16_C( 26481) } }, { { INT16_C( 19637), -INT16_C( 31280), -INT16_C( 21483), INT16_C( 25777), INT16_C( 17354), -INT16_C( 794), INT16_C( 10868), INT16_C( 8245), -INT16_C( 3644), INT16_C( 14592), -INT16_C( 15141), -INT16_C( 29673), -INT16_C( 5166), INT16_C( 24733), INT16_C( 13567), -INT16_C( 19218) }, UINT16_C(48768), { -INT16_C( 27335), -INT16_C( 5270), INT16_C( 13562), -INT16_C( 8146), -INT16_C( 24016), INT16_C( 26122), -INT16_C( 12606), -INT16_C( 15785), INT16_C( 12808), INT16_C( 8071), INT16_C( 22974), INT16_C( 23562), INT16_C( 2745), -INT16_C( 22640), INT16_C( 4286), -INT16_C( 1947) }, { -INT16_C( 12379), -INT16_C( 24605), INT16_C( 4355), INT16_C( 13440), -INT16_C( 30029), INT16_C( 30106), -INT16_C( 3751), INT16_C( 24887), -INT16_C( 16861), -INT16_C( 7808), -INT16_C( 30185), -INT16_C( 12227), -INT16_C( 12908), INT16_C( 21367), -INT16_C( 8995), -INT16_C( 31925) }, { INT16_C( 19637), -INT16_C( 31280), -INT16_C( 21483), INT16_C( 25777), INT16_C( 17354), -INT16_C( 794), INT16_C( 10868), INT16_C( 24887), -INT16_C( 3644), INT16_C( 8245), -INT16_C( 8995), -INT16_C( 15141), -INT16_C( 7808), -INT16_C( 12379), INT16_C( 13567), -INT16_C( 794) } }, { { INT16_C( 11947), -INT16_C( 20702), -INT16_C( 24001), -INT16_C( 3357), INT16_C( 32045), -INT16_C( 31129), -INT16_C( 24978), -INT16_C( 28185), INT16_C( 26461), INT16_C( 29810), -INT16_C( 20239), -INT16_C( 31163), -INT16_C( 17283), INT16_C( 23513), INT16_C( 9369), INT16_C( 17630) }, UINT16_C( 82), { -INT16_C( 28173), -INT16_C( 10589), -INT16_C( 12157), -INT16_C( 5549), -INT16_C( 16042), INT16_C( 15752), -INT16_C( 6830), -INT16_C( 14940), -INT16_C( 27302), -INT16_C( 24715), -INT16_C( 3557), -INT16_C( 2981), -INT16_C( 2995), INT16_C( 11032), INT16_C( 27193), INT16_C( 11308) }, { -INT16_C( 12293), INT16_C( 32259), INT16_C( 22175), -INT16_C( 2712), -INT16_C( 3816), INT16_C( 27186), -INT16_C( 10538), INT16_C( 12335), -INT16_C( 23445), -INT16_C( 30769), INT16_C( 11159), -INT16_C( 7045), -INT16_C( 27617), INT16_C( 22544), INT16_C( 15614), -INT16_C( 1403) }, { INT16_C( 11947), -INT16_C( 3357), -INT16_C( 24001), -INT16_C( 3357), -INT16_C( 10538), -INT16_C( 31129), INT16_C( 22175), -INT16_C( 28185), INT16_C( 26461), INT16_C( 29810), -INT16_C( 20239), -INT16_C( 31163), -INT16_C( 17283), INT16_C( 23513), INT16_C( 9369), INT16_C( 17630) } }, { { -INT16_C( 30709), -INT16_C( 21896), -INT16_C( 7714), -INT16_C( 2401), -INT16_C( 11822), -INT16_C( 22431), -INT16_C( 28505), INT16_C( 4825), -INT16_C( 22475), -INT16_C( 13159), INT16_C( 5587), -INT16_C( 3152), -INT16_C( 16215), -INT16_C( 22709), -INT16_C( 12036), INT16_C( 1953) }, UINT16_C( 6744), { INT16_C( 14257), INT16_C( 20731), -INT16_C( 13011), -INT16_C( 29151), -INT16_C( 14219), INT16_C( 19999), INT16_C( 21723), INT16_C( 29943), -INT16_C( 13792), -INT16_C( 12151), INT16_C( 12989), INT16_C( 2449), -INT16_C( 29222), INT16_C( 31705), INT16_C( 12949), INT16_C( 18069) }, { -INT16_C( 28567), -INT16_C( 26985), -INT16_C( 18339), -INT16_C( 11483), INT16_C( 17537), INT16_C( 23585), INT16_C( 6296), -INT16_C( 18224), INT16_C( 23267), -INT16_C( 24440), INT16_C( 6540), INT16_C( 26281), -INT16_C( 31833), INT16_C( 15586), INT16_C( 30645), INT16_C( 7810) }, { -INT16_C( 30709), -INT16_C( 21896), -INT16_C( 7714), -INT16_C( 21896), INT16_C( 23585), -INT16_C( 22431), INT16_C( 26281), INT16_C( 4825), -INT16_C( 22475), -INT16_C( 13159), INT16_C( 5587), -INT16_C( 26985), INT16_C( 6540), -INT16_C( 22709), -INT16_C( 12036), INT16_C( 1953) } }, { { INT16_C( 6408), INT16_C( 26036), -INT16_C( 9774), INT16_C( 21304), INT16_C( 23069), -INT16_C( 19025), INT16_C( 32626), INT16_C( 21869), -INT16_C( 2343), INT16_C( 26358), -INT16_C( 24817), -INT16_C( 18740), -INT16_C( 20958), -INT16_C( 10254), INT16_C( 29990), INT16_C( 12021) }, UINT16_C(43662), { INT16_C( 24723), -INT16_C( 13181), -INT16_C( 24141), INT16_C( 25126), -INT16_C( 26538), -INT16_C( 15134), -INT16_C( 17426), -INT16_C( 6982), -INT16_C( 14047), -INT16_C( 4477), -INT16_C( 22912), INT16_C( 29340), -INT16_C( 15747), INT16_C( 29671), INT16_C( 30448), -INT16_C( 31715) }, { -INT16_C( 24362), -INT16_C( 30128), INT16_C( 30273), -INT16_C( 26388), -INT16_C( 12786), -INT16_C( 932), INT16_C( 5770), -INT16_C( 21536), INT16_C( 25823), INT16_C( 24473), INT16_C( 13834), -INT16_C( 30766), -INT16_C( 17928), -INT16_C( 5638), INT16_C( 5935), INT16_C( 1645) }, { INT16_C( 6408), INT16_C( 21304), -INT16_C( 26388), INT16_C( 32626), INT16_C( 23069), -INT16_C( 19025), INT16_C( 32626), INT16_C( 13834), -INT16_C( 2343), INT16_C( 21304), -INT16_C( 24817), -INT16_C( 17928), -INT16_C( 20958), INT16_C( 21869), INT16_C( 29990), -INT16_C( 5638) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_mm256_loadu_epi16(test_vec[i].a); simde__m256i idx = simde_mm256_loadu_epi16(test_vec[i].idx); simde__m256i b = simde_mm256_loadu_epi16(test_vec[i].b); simde__m256i r = simde_mm256_mask_permutex2var_epi16(a, test_vec[i].k, idx, b); simde_test_x86_assert_equal_i16x16(r, simde_mm256_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i a = simde_test_x86_random_i16x16(); simde__mmask16 k = simde_test_x86_random_mmask16(); simde__m256i idx = simde_test_x86_random_i16x16(); simde__m256i b = simde_test_x86_random_i16x16(); simde__m256i r = simde_mm256_mask_permutex2var_epi16(a, k, idx, b); simde_test_x86_write_i16x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask16(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x16(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask2_permutex2var_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[16]; const int16_t idx[16]; const simde__mmask16 k; const int16_t b[16]; const int16_t r[16]; } test_vec[] = { { { -INT16_C( 16968), -INT16_C( 1648), INT16_C( 31795), INT16_C( 16785), -INT16_C( 4789), -INT16_C( 10946), INT16_C( 7683), -INT16_C( 7296), INT16_C( 6786), -INT16_C( 29630), INT16_C( 5200), INT16_C( 18452), INT16_C( 3790), -INT16_C( 719), -INT16_C( 25050), -INT16_C( 8701) }, { -INT16_C( 27813), -INT16_C( 28969), INT16_C( 26896), INT16_C( 23504), INT16_C( 3670), INT16_C( 23088), -INT16_C( 20436), -INT16_C( 20675), INT16_C( 32714), INT16_C( 6715), INT16_C( 20372), INT16_C( 25187), -INT16_C( 27554), -INT16_C( 31649), INT16_C( 25395), -INT16_C( 29086) }, UINT16_C(14838), { INT16_C( 1565), -INT16_C( 4702), -INT16_C( 1695), -INT16_C( 28165), INT16_C( 10067), -INT16_C( 28606), INT16_C( 3286), INT16_C( 4623), -INT16_C( 23769), -INT16_C( 30111), -INT16_C( 16635), INT16_C( 25886), INT16_C( 20803), -INT16_C( 23096), -INT16_C( 16672), -INT16_C( 545) }, { -INT16_C( 27813), INT16_C( 4623), INT16_C( 1565), INT16_C( 23504), INT16_C( 3286), INT16_C( 1565), INT16_C( 3790), -INT16_C( 23096), INT16_C( 5200), INT16_C( 6715), INT16_C( 20372), INT16_C( 16785), -INT16_C( 16672), -INT16_C( 545), INT16_C( 25395), -INT16_C( 29086) } }, { { -INT16_C( 32315), INT16_C( 9962), -INT16_C( 6790), -INT16_C( 12872), -INT16_C( 1524), -INT16_C( 7331), INT16_C( 27910), INT16_C( 11765), INT16_C( 22032), INT16_C( 5815), -INT16_C( 10730), INT16_C( 22907), INT16_C( 17191), INT16_C( 2047), -INT16_C( 8703), -INT16_C( 14844) }, { -INT16_C( 4513), -INT16_C( 9491), -INT16_C( 23085), -INT16_C( 8025), INT16_C( 1439), -INT16_C( 23101), -INT16_C( 18318), -INT16_C( 32045), -INT16_C( 30194), INT16_C( 9368), INT16_C( 4960), -INT16_C( 30594), INT16_C( 32086), INT16_C( 22671), -INT16_C( 27557), -INT16_C( 17890) }, UINT16_C( 2946), { INT16_C( 22164), INT16_C( 15536), INT16_C( 20278), -INT16_C( 1727), -INT16_C( 19467), -INT16_C( 14159), -INT16_C( 16587), -INT16_C( 12718), -INT16_C( 19484), INT16_C( 25313), INT16_C( 14395), -INT16_C( 13601), INT16_C( 14992), -INT16_C( 20898), -INT16_C( 7692), -INT16_C( 30278) }, { -INT16_C( 4513), INT16_C( 2047), -INT16_C( 23085), -INT16_C( 8025), INT16_C( 1439), -INT16_C( 23101), -INT16_C( 18318), -INT16_C( 1727), -INT16_C( 8703), -INT16_C( 19484), INT16_C( 4960), -INT16_C( 7692), INT16_C( 32086), INT16_C( 22671), -INT16_C( 27557), -INT16_C( 17890) } }, { { INT16_C( 27191), INT16_C( 28101), INT16_C( 1722), -INT16_C( 20634), INT16_C( 6073), -INT16_C( 4489), -INT16_C( 13866), -INT16_C( 17732), -INT16_C( 24964), -INT16_C( 18660), -INT16_C( 1066), INT16_C( 26242), -INT16_C( 8139), INT16_C( 10772), -INT16_C( 12607), -INT16_C( 1869) }, { INT16_C( 30777), -INT16_C( 3227), -INT16_C( 13442), INT16_C( 14242), INT16_C( 6626), -INT16_C( 18139), -INT16_C( 7454), INT16_C( 24435), -INT16_C( 28544), INT16_C( 22038), -INT16_C( 26485), -INT16_C( 15940), -INT16_C( 12167), INT16_C( 15083), -INT16_C( 24929), -INT16_C( 10189) }, UINT16_C(38934), { -INT16_C( 27445), INT16_C( 28004), INT16_C( 18123), -INT16_C( 3962), INT16_C( 26879), INT16_C( 29650), INT16_C( 21191), -INT16_C( 8701), -INT16_C( 29016), INT16_C( 25718), -INT16_C( 4273), INT16_C( 14901), -INT16_C( 11222), INT16_C( 24024), -INT16_C( 4436), INT16_C( 30709) }, { INT16_C( 30777), -INT16_C( 4489), -INT16_C( 4436), INT16_C( 14242), INT16_C( 1722), -INT16_C( 18139), -INT16_C( 7454), INT16_C( 24435), -INT16_C( 28544), INT16_C( 22038), -INT16_C( 26485), -INT16_C( 11222), INT16_C( 25718), INT16_C( 15083), -INT16_C( 24929), -INT16_C( 3962) } }, { { INT16_C( 22914), INT16_C( 19940), INT16_C( 27296), -INT16_C( 24770), INT16_C( 4306), -INT16_C( 26094), INT16_C( 5475), INT16_C( 2936), -INT16_C( 4444), -INT16_C( 3216), -INT16_C( 23074), INT16_C( 2094), INT16_C( 1657), INT16_C( 9573), INT16_C( 23285), INT16_C( 30620) }, { -INT16_C( 32588), INT16_C( 21701), INT16_C( 1002), -INT16_C( 17165), INT16_C( 1555), INT16_C( 30294), -INT16_C( 12773), -INT16_C( 16510), -INT16_C( 3395), -INT16_C( 25677), -INT16_C( 7785), INT16_C( 4259), INT16_C( 2279), -INT16_C( 9163), -INT16_C( 11934), INT16_C( 5716) }, UINT16_C( 6481), { INT16_C( 15210), INT16_C( 24092), INT16_C( 12279), INT16_C( 20068), INT16_C( 32678), INT16_C( 10268), -INT16_C( 9921), -INT16_C( 3558), -INT16_C( 20108), INT16_C( 6099), -INT16_C( 17727), -INT16_C( 2529), -INT16_C( 32105), -INT16_C( 5177), INT16_C( 6296), INT16_C( 772) }, { INT16_C( 32678), INT16_C( 21701), INT16_C( 1002), -INT16_C( 17165), INT16_C( 20068), INT16_C( 30294), -INT16_C( 2529), -INT16_C( 16510), -INT16_C( 5177), -INT16_C( 25677), -INT16_C( 7785), -INT16_C( 24770), INT16_C( 2936), -INT16_C( 9163), -INT16_C( 11934), INT16_C( 5716) } }, { { INT16_C( 8275), INT16_C( 19041), -INT16_C( 15025), -INT16_C( 2664), -INT16_C( 19132), -INT16_C( 31971), INT16_C( 14222), INT16_C( 885), INT16_C( 18664), -INT16_C( 22246), INT16_C( 14851), -INT16_C( 25953), INT16_C( 26300), INT16_C( 21637), -INT16_C( 30338), -INT16_C( 11945) }, { -INT16_C( 18263), -INT16_C( 2020), -INT16_C( 19331), -INT16_C( 15634), INT16_C( 2921), -INT16_C( 1979), -INT16_C( 17597), INT16_C( 11259), INT16_C( 5379), INT16_C( 1749), INT16_C( 29775), INT16_C( 2976), INT16_C( 9691), INT16_C( 22880), -INT16_C( 18514), INT16_C( 22315) }, UINT16_C(18288), { -INT16_C( 4784), INT16_C( 16123), INT16_C( 26031), -INT16_C( 2743), -INT16_C( 29603), INT16_C( 22704), -INT16_C( 19528), -INT16_C( 29331), -INT16_C( 16966), INT16_C( 23041), -INT16_C( 9016), INT16_C( 10368), INT16_C( 11830), INT16_C( 25056), INT16_C( 20614), -INT16_C( 10584) }, { -INT16_C( 18263), -INT16_C( 2020), -INT16_C( 19331), -INT16_C( 15634), -INT16_C( 22246), -INT16_C( 31971), -INT16_C( 2664), INT16_C( 11259), -INT16_C( 2664), INT16_C( 22704), -INT16_C( 11945), INT16_C( 2976), INT16_C( 9691), INT16_C( 22880), -INT16_C( 30338), INT16_C( 22315) } }, { { -INT16_C( 23747), -INT16_C( 4844), INT16_C( 23816), INT16_C( 26082), -INT16_C( 27926), -INT16_C( 23875), INT16_C( 11077), -INT16_C( 209), INT16_C( 12520), -INT16_C( 20390), -INT16_C( 9715), INT16_C( 17369), -INT16_C( 18168), -INT16_C( 29020), INT16_C( 19465), INT16_C( 18020) }, { INT16_C( 30959), -INT16_C( 1997), INT16_C( 5590), -INT16_C( 16291), INT16_C( 7079), -INT16_C( 4766), -INT16_C( 28346), INT16_C( 12012), INT16_C( 18113), -INT16_C( 12578), -INT16_C( 18656), INT16_C( 10513), -INT16_C( 19088), INT16_C( 31159), INT16_C( 7169), -INT16_C( 3648) }, UINT16_C(62356), { INT16_C( 27369), INT16_C( 17929), -INT16_C( 20438), -INT16_C( 29599), -INT16_C( 22627), -INT16_C( 30179), -INT16_C( 8235), -INT16_C( 19248), -INT16_C( 3667), -INT16_C( 16533), -INT16_C( 9190), -INT16_C( 11916), INT16_C( 30293), INT16_C( 5613), -INT16_C( 32153), INT16_C( 20489) }, { INT16_C( 30959), -INT16_C( 1997), -INT16_C( 8235), -INT16_C( 16291), -INT16_C( 209), -INT16_C( 4766), -INT16_C( 28346), -INT16_C( 18168), -INT16_C( 4844), -INT16_C( 32153), -INT16_C( 18656), INT16_C( 10513), INT16_C( 27369), -INT16_C( 19248), -INT16_C( 4844), -INT16_C( 23747) } }, { { INT16_C( 4844), INT16_C( 6038), -INT16_C( 1854), INT16_C( 24739), -INT16_C( 15969), INT16_C( 30186), -INT16_C( 17760), INT16_C( 19753), -INT16_C( 27477), -INT16_C( 15092), -INT16_C( 32400), -INT16_C( 14697), -INT16_C( 31497), INT16_C( 24283), -INT16_C( 7162), -INT16_C( 3154) }, { INT16_C( 17654), -INT16_C( 18166), -INT16_C( 21188), -INT16_C( 9191), INT16_C( 878), INT16_C( 3665), INT16_C( 31421), INT16_C( 26972), INT16_C( 26638), INT16_C( 32558), -INT16_C( 14871), -INT16_C( 8123), INT16_C( 8266), INT16_C( 20542), -INT16_C( 5115), -INT16_C( 1213) }, UINT16_C(19761), { INT16_C( 28084), -INT16_C( 12805), INT16_C( 26953), -INT16_C( 25904), -INT16_C( 29064), -INT16_C( 11244), INT16_C( 9207), INT16_C( 9532), INT16_C( 9890), -INT16_C( 6165), INT16_C( 13574), INT16_C( 17671), INT16_C( 3205), -INT16_C( 14031), INT16_C( 25096), -INT16_C( 17386) }, { INT16_C( 9207), -INT16_C( 18166), -INT16_C( 21188), -INT16_C( 9191), -INT16_C( 7162), -INT16_C( 12805), INT16_C( 31421), INT16_C( 26972), -INT16_C( 7162), INT16_C( 32558), -INT16_C( 15092), INT16_C( 30186), INT16_C( 8266), INT16_C( 20542), INT16_C( 30186), -INT16_C( 1213) } }, { { INT16_C( 4560), INT16_C( 6538), INT16_C( 23163), -INT16_C( 3148), -INT16_C( 14104), -INT16_C( 8249), INT16_C( 1003), -INT16_C( 29435), -INT16_C( 4055), INT16_C( 12404), INT16_C( 31781), -INT16_C( 21899), -INT16_C( 22904), -INT16_C( 28557), -INT16_C( 30199), -INT16_C( 9907) }, { -INT16_C( 10341), INT16_C( 5874), -INT16_C( 22991), INT16_C( 6665), -INT16_C( 12177), INT16_C( 23289), -INT16_C( 300), -INT16_C( 536), INT16_C( 23790), INT16_C( 4909), -INT16_C( 23848), INT16_C( 25022), INT16_C( 12617), INT16_C( 21233), INT16_C( 16059), INT16_C( 22315) }, UINT16_C( 7445), { INT16_C( 18285), INT16_C( 30660), INT16_C( 13153), INT16_C( 23111), INT16_C( 7053), INT16_C( 30041), INT16_C( 18201), INT16_C( 18130), -INT16_C( 21925), INT16_C( 6633), INT16_C( 12811), -INT16_C( 694), INT16_C( 1668), -INT16_C( 20677), INT16_C( 20829), -INT16_C( 13620) }, { -INT16_C( 694), INT16_C( 5874), INT16_C( 30660), INT16_C( 6665), -INT16_C( 9907), INT16_C( 23289), -INT16_C( 300), -INT16_C( 536), -INT16_C( 30199), INT16_C( 4909), -INT16_C( 21925), INT16_C( 20829), INT16_C( 12404), INT16_C( 21233), INT16_C( 16059), INT16_C( 22315) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_mm256_loadu_epi16(test_vec[i].a); simde__m256i idx = simde_mm256_loadu_epi16(test_vec[i].idx); simde__m256i b = simde_mm256_loadu_epi16(test_vec[i].b); simde__m256i r = simde_mm256_mask2_permutex2var_epi16(a, idx, test_vec[i].k, b); simde_test_x86_assert_equal_i16x16(r, simde_mm256_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i a = simde_test_x86_random_i16x16(); simde__m256i idx = simde_test_x86_random_i16x16(); simde__mmask16 k = simde_test_x86_random_mmask16(); simde__m256i b = simde_test_x86_random_i16x16(); simde__m256i r = simde_mm256_mask2_permutex2var_epi16(a, idx, k, b); simde_test_x86_write_i16x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x16(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_mmask16(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_maskz_permutex2var_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask16 k; const int16_t a[16]; const int16_t idx[16]; const int16_t b[16]; const int16_t r[16]; } test_vec[] = { { UINT16_C(37016), { -INT16_C( 1727), -INT16_C( 30269), INT16_C( 20819), -INT16_C( 21340), -INT16_C( 16954), -INT16_C( 26380), INT16_C( 20228), -INT16_C( 4797), INT16_C( 20072), -INT16_C( 19937), -INT16_C( 23733), -INT16_C( 30792), INT16_C( 5458), INT16_C( 7896), INT16_C( 28896), INT16_C( 8623) }, { INT16_C( 29289), -INT16_C( 17238), INT16_C( 20419), -INT16_C( 30103), INT16_C( 23820), INT16_C( 4130), INT16_C( 26028), INT16_C( 5373), INT16_C( 7348), -INT16_C( 58), INT16_C( 32703), INT16_C( 4486), INT16_C( 24212), INT16_C( 29744), -INT16_C( 8242), INT16_C( 14230) }, { INT16_C( 16465), INT16_C( 5620), INT16_C( 23951), -INT16_C( 25441), -INT16_C( 15942), INT16_C( 26284), -INT16_C( 21977), -INT16_C( 9350), INT16_C( 16582), -INT16_C( 31014), INT16_C( 25023), INT16_C( 21655), -INT16_C( 14401), -INT16_C( 28984), INT16_C( 24230), -INT16_C( 1851) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 19937), INT16_C( 5458), INT16_C( 0), INT16_C( 0), -INT16_C( 28984), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 15942), INT16_C( 0), INT16_C( 0), -INT16_C( 21977) } }, { UINT16_C(47519), { INT16_C( 11789), -INT16_C( 21482), -INT16_C( 12086), INT16_C( 30573), -INT16_C( 27594), -INT16_C( 20447), -INT16_C( 6289), INT16_C( 19185), -INT16_C( 20371), INT16_C( 1451), INT16_C( 27140), -INT16_C( 12852), INT16_C( 29688), -INT16_C( 16853), -INT16_C( 13717), INT16_C( 30839) }, { -INT16_C( 28935), -INT16_C( 15580), -INT16_C( 28322), -INT16_C( 27334), INT16_C( 23334), -INT16_C( 27323), INT16_C( 13891), -INT16_C( 20257), -INT16_C( 29977), -INT16_C( 5195), -INT16_C( 32011), -INT16_C( 4680), -INT16_C( 6923), INT16_C( 24747), INT16_C( 9134), -INT16_C( 22568) }, { -INT16_C( 847), INT16_C( 3947), -INT16_C( 23155), -INT16_C( 19548), -INT16_C( 5631), INT16_C( 17481), INT16_C( 10272), INT16_C( 2036), -INT16_C( 21837), -INT16_C( 22285), -INT16_C( 21716), INT16_C( 8597), INT16_C( 16783), INT16_C( 16001), INT16_C( 22884), INT16_C( 5605) }, { -INT16_C( 22285), -INT16_C( 27594), INT16_C( 22884), -INT16_C( 21716), -INT16_C( 6289), INT16_C( 0), INT16_C( 0), INT16_C( 5605), INT16_C( 19185), INT16_C( 0), INT16_C( 0), -INT16_C( 21837), INT16_C( 17481), -INT16_C( 12852), INT16_C( 0), -INT16_C( 21837) } }, { UINT16_C(20565), { -INT16_C( 7644), -INT16_C( 13834), -INT16_C( 2154), -INT16_C( 8269), -INT16_C( 11461), INT16_C( 12039), -INT16_C( 17701), -INT16_C( 12583), INT16_C( 1378), -INT16_C( 1927), INT16_C( 2342), -INT16_C( 22727), -INT16_C( 25273), INT16_C( 11264), INT16_C( 21938), -INT16_C( 10627) }, { INT16_C( 29496), -INT16_C( 12641), INT16_C( 21098), -INT16_C( 23123), -INT16_C( 19418), INT16_C( 468), -INT16_C( 20881), -INT16_C( 11825), INT16_C( 18611), -INT16_C( 9527), INT16_C( 593), -INT16_C( 26495), -INT16_C( 32097), INT16_C( 20933), INT16_C( 17111), INT16_C( 3880) }, { -INT16_C( 14411), INT16_C( 8157), -INT16_C( 30182), INT16_C( 16580), -INT16_C( 26561), -INT16_C( 20927), INT16_C( 4166), -INT16_C( 1409), INT16_C( 18776), -INT16_C( 21804), INT16_C( 21835), -INT16_C( 5310), INT16_C( 2007), -INT16_C( 20676), INT16_C( 25673), -INT16_C( 322) }, { INT16_C( 18776), INT16_C( 0), INT16_C( 2342), INT16_C( 0), -INT16_C( 17701), INT16_C( 0), -INT16_C( 10627), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 322), INT16_C( 0), -INT16_C( 1409), INT16_C( 0) } }, { UINT16_C(39980), { INT16_C( 17949), -INT16_C( 7898), INT16_C( 25990), -INT16_C( 14470), -INT16_C( 16365), -INT16_C( 27689), INT16_C( 12218), -INT16_C( 28964), INT16_C( 10201), INT16_C( 7396), -INT16_C( 17646), INT16_C( 20259), INT16_C( 28010), INT16_C( 10675), -INT16_C( 8341), -INT16_C( 30267) }, { -INT16_C( 5339), -INT16_C( 21654), -INT16_C( 7087), INT16_C( 25714), INT16_C( 18853), INT16_C( 24567), -INT16_C( 11399), INT16_C( 21230), -INT16_C( 11525), INT16_C( 3438), -INT16_C( 28019), -INT16_C( 1956), INT16_C( 4351), INT16_C( 27169), -INT16_C( 6417), INT16_C( 5619) }, { INT16_C( 24273), INT16_C( 8896), INT16_C( 13122), -INT16_C( 6265), INT16_C( 32380), -INT16_C( 2745), INT16_C( 13650), INT16_C( 19784), -INT16_C( 18937), -INT16_C( 27558), -INT16_C( 18616), INT16_C( 18316), -INT16_C( 21049), -INT16_C( 18766), -INT16_C( 23149), INT16_C( 26059) }, { INT16_C( 0), INT16_C( 0), INT16_C( 8896), INT16_C( 13122), INT16_C( 0), INT16_C( 19784), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 10675), -INT16_C( 21049), INT16_C( 26059), INT16_C( 0), INT16_C( 0), -INT16_C( 6265) } }, { UINT16_C(35843), { INT16_C( 18055), INT16_C( 3775), INT16_C( 15149), INT16_C( 29837), -INT16_C( 8399), INT16_C( 31145), -INT16_C( 20436), -INT16_C( 31185), INT16_C( 30789), -INT16_C( 11971), INT16_C( 1215), INT16_C( 29055), INT16_C( 4795), -INT16_C( 31209), INT16_C( 6775), -INT16_C( 238) }, { -INT16_C( 11936), -INT16_C( 29171), -INT16_C( 26099), INT16_C( 15874), -INT16_C( 21383), -INT16_C( 23113), -INT16_C( 6564), -INT16_C( 24276), INT16_C( 26974), INT16_C( 7795), -INT16_C( 3474), INT16_C( 10639), -INT16_C( 23036), INT16_C( 31919), -INT16_C( 15679), INT16_C( 8571) }, { -INT16_C( 30573), -INT16_C( 24401), -INT16_C( 19933), -INT16_C( 25378), -INT16_C( 27298), -INT16_C( 17854), INT16_C( 28284), -INT16_C( 9636), -INT16_C( 12329), INT16_C( 17912), -INT16_C( 30527), -INT16_C( 14994), INT16_C( 7726), -INT16_C( 4287), -INT16_C( 17184), INT16_C( 29457) }, { INT16_C( 18055), -INT16_C( 31209), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 6775), -INT16_C( 238), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 14994) } }, { UINT16_C(49221), { INT16_C( 26644), -INT16_C( 3470), -INT16_C( 12284), INT16_C( 18056), INT16_C( 1163), -INT16_C( 6220), -INT16_C( 29474), -INT16_C( 10314), INT16_C( 30673), INT16_C( 16479), -INT16_C( 29380), INT16_C( 32350), INT16_C( 15997), -INT16_C( 29126), INT16_C( 32689), -INT16_C( 15026) }, { -INT16_C( 15897), -INT16_C( 4936), INT16_C( 16529), INT16_C( 7218), -INT16_C( 6332), INT16_C( 8707), -INT16_C( 18061), INT16_C( 17657), INT16_C( 22576), INT16_C( 28036), -INT16_C( 7450), INT16_C( 25579), INT16_C( 9504), -INT16_C( 11535), INT16_C( 16293), -INT16_C( 29545) }, { INT16_C( 20224), -INT16_C( 28040), -INT16_C( 21617), -INT16_C( 11346), -INT16_C( 19822), INT16_C( 1526), -INT16_C( 4245), -INT16_C( 25527), -INT16_C( 12728), INT16_C( 11785), -INT16_C( 2896), -INT16_C( 11887), -INT16_C( 32231), -INT16_C( 16733), INT16_C( 15041), -INT16_C( 15797) }, { -INT16_C( 10314), INT16_C( 0), -INT16_C( 28040), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 11346), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 6220), -INT16_C( 25527) } }, { UINT16_C(50058), { INT16_C( 6484), INT16_C( 622), INT16_C( 237), -INT16_C( 7244), INT16_C( 8197), INT16_C( 20434), INT16_C( 6844), -INT16_C( 15075), -INT16_C( 12984), -INT16_C( 9799), -INT16_C( 11618), INT16_C( 16731), INT16_C( 7569), -INT16_C( 9092), INT16_C( 1759), INT16_C( 13215) }, { INT16_C( 3615), INT16_C( 3125), -INT16_C( 5618), INT16_C( 5359), -INT16_C( 15862), -INT16_C( 14749), -INT16_C( 32548), INT16_C( 9611), INT16_C( 17485), -INT16_C( 4866), INT16_C( 23062), -INT16_C( 22739), -INT16_C( 22153), INT16_C( 22147), INT16_C( 9135), -INT16_C( 12407) }, { -INT16_C( 16847), INT16_C( 16347), -INT16_C( 13400), -INT16_C( 19885), -INT16_C( 18803), INT16_C( 27000), INT16_C( 822), -INT16_C( 31602), -INT16_C( 29369), INT16_C( 24176), -INT16_C( 25113), INT16_C( 24069), -INT16_C( 30393), -INT16_C( 2380), INT16_C( 15788), -INT16_C( 8763) }, { INT16_C( 0), INT16_C( 27000), INT16_C( 0), INT16_C( 13215), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 16731), -INT16_C( 9092), INT16_C( 15788), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 13215), -INT16_C( 9799) } }, { UINT16_C(41467), { -INT16_C( 23524), INT16_C( 28780), -INT16_C( 1706), -INT16_C( 12506), INT16_C( 23906), -INT16_C( 3630), INT16_C( 6881), INT16_C( 20862), INT16_C( 25976), INT16_C( 32238), INT16_C( 13763), INT16_C( 30470), -INT16_C( 19924), -INT16_C( 3660), -INT16_C( 20593), -INT16_C( 21358) }, { -INT16_C( 429), -INT16_C( 21988), INT16_C( 17143), INT16_C( 23161), INT16_C( 19359), -INT16_C( 32693), -INT16_C( 13979), -INT16_C( 8751), -INT16_C( 16338), -INT16_C( 3749), INT16_C( 25077), INT16_C( 8552), INT16_C( 7188), -INT16_C( 23789), -INT16_C( 23093), INT16_C( 8015) }, { INT16_C( 27556), -INT16_C( 25655), INT16_C( 17070), INT16_C( 19957), INT16_C( 16525), -INT16_C( 3122), -INT16_C( 24823), INT16_C( 14288), INT16_C( 11103), INT16_C( 21800), -INT16_C( 28531), -INT16_C( 24202), -INT16_C( 30292), INT16_C( 30788), -INT16_C( 27601), -INT16_C( 11369) }, { INT16_C( 19957), -INT16_C( 30292), INT16_C( 0), INT16_C( 21800), -INT16_C( 11369), INT16_C( 30470), -INT16_C( 3630), -INT16_C( 25655), -INT16_C( 20593), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 19957), INT16_C( 0), -INT16_C( 21358) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_mm256_loadu_epi16(test_vec[i].a); simde__m256i idx = simde_mm256_loadu_epi16(test_vec[i].idx); simde__m256i b = simde_mm256_loadu_epi16(test_vec[i].b); simde__m256i r = simde_mm256_maskz_permutex2var_epi16(test_vec[i].k, a, idx, b); simde_test_x86_assert_equal_i16x16(r, simde_mm256_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask16 k = simde_test_x86_random_mmask16(); simde__m256i a = simde_test_x86_random_i16x16(); simde__m256i idx = simde_test_x86_random_i16x16(); simde__m256i b = simde_test_x86_random_i16x16(); simde__m256i r = simde_mm256_maskz_permutex2var_epi16(k, a, idx, b); simde_test_x86_write_mmask16(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x16(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x16(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_permutex2var_epi32(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const int32_t a[8]; const int32_t idx[8]; const int32_t b[8]; const int32_t r[8]; } test_vec[] = { { { -INT32_C( 1385275137), INT32_C( 805004450), -INT32_C( 1373451868), -INT32_C( 924454040), -INT32_C( 1424159202), INT32_C( 1263309726), INT32_C( 1287885085), INT32_C( 606034469) }, { INT32_C( 1557302714), -INT32_C( 1769222671), -INT32_C( 29053290), -INT32_C( 1077532255), -INT32_C( 697572553), -INT32_C( 1809729673), INT32_C( 1843455048), -INT32_C( 124584130) }, { INT32_C( 2119459980), -INT32_C( 954933455), INT32_C( 784685197), -INT32_C( 1175548799), -INT32_C( 426813073), INT32_C( 1501212688), -INT32_C( 758752620), -INT32_C( 422946727) }, { INT32_C( 784685197), INT32_C( 805004450), INT32_C( 1287885085), INT32_C( 805004450), INT32_C( 606034469), INT32_C( 606034469), INT32_C( 2119459980), -INT32_C( 758752620) } }, { { -INT32_C( 312205636), -INT32_C( 1951106819), INT32_C( 1371110096), INT32_C( 1963632390), INT32_C( 291281152), -INT32_C( 580200887), -INT32_C( 1968230351), INT32_C( 1164999049) }, { -INT32_C( 1791765353), INT32_C( 471918412), INT32_C( 1735317857), -INT32_C( 2116192127), INT32_C( 1536309522), INT32_C( 1077541903), -INT32_C( 1228150739), -INT32_C( 100975774) }, { INT32_C( 1552821776), INT32_C( 2004463126), INT32_C( 165668744), INT32_C( 1904917599), INT32_C( 80551413), INT32_C( 1178928665), INT32_C( 1358696686), INT32_C( 1531639883) }, { INT32_C( 1164999049), INT32_C( 80551413), -INT32_C( 1951106819), -INT32_C( 1951106819), INT32_C( 1371110096), INT32_C( 1531639883), INT32_C( 1178928665), INT32_C( 1371110096) } }, { { INT32_C( 1018746918), INT32_C( 263467399), INT32_C( 1998099224), INT32_C( 1156162127), -INT32_C( 649546049), -INT32_C( 1440772676), -INT32_C( 369419107), INT32_C( 977552660) }, { -INT32_C( 1535640547), INT32_C( 1169369901), INT32_C( 230542270), INT32_C( 760325742), INT32_C( 403085660), -INT32_C( 993909209), INT32_C( 1454226754), INT32_C( 546370050) }, { INT32_C( 482609134), -INT32_C( 262047694), -INT32_C( 1308811709), INT32_C( 551505604), INT32_C( 255387112), INT32_C( 1305737995), -INT32_C( 1146912328), INT32_C( 1641755763) }, { INT32_C( 1305737995), INT32_C( 1305737995), -INT32_C( 1146912328), -INT32_C( 1146912328), INT32_C( 255387112), INT32_C( 977552660), INT32_C( 1998099224), INT32_C( 1998099224) } }, { { INT32_C( 1853726523), INT32_C( 1532944151), -INT32_C( 1039377155), -INT32_C( 1830622294), -INT32_C( 593421359), -INT32_C( 836078314), INT32_C( 1770638838), INT32_C( 1036674049) }, { INT32_C( 464209924), INT32_C( 611715367), INT32_C( 283542373), INT32_C( 1067633006), -INT32_C( 98810652), -INT32_C( 1345829191), INT32_C( 353915411), -INT32_C( 1168973130) }, { INT32_C( 1373044010), INT32_C( 1819692038), INT32_C( 1048337615), INT32_C( 159194661), INT32_C( 453220706), -INT32_C( 204813089), -INT32_C( 737615330), -INT32_C( 275817787) }, { -INT32_C( 593421359), INT32_C( 1036674049), -INT32_C( 836078314), -INT32_C( 737615330), -INT32_C( 593421359), INT32_C( 1819692038), -INT32_C( 1830622294), INT32_C( 1770638838) } }, { { INT32_C( 1564566871), -INT32_C( 2117486671), INT32_C( 968836371), -INT32_C( 968737692), -INT32_C( 1243461930), INT32_C( 816360466), INT32_C( 1409593487), INT32_C( 1631818506) }, { -INT32_C( 1430289160), INT32_C( 1328252987), INT32_C( 847833806), -INT32_C( 50804186), INT32_C( 582146576), INT32_C( 374495879), INT32_C( 359290635), -INT32_C( 478695958) }, { INT32_C( 1837970994), -INT32_C( 1933788994), -INT32_C( 927054686), INT32_C( 533051151), INT32_C( 407009169), -INT32_C( 600927023), -INT32_C( 688744213), INT32_C( 2025417030) }, { INT32_C( 1837970994), INT32_C( 533051151), -INT32_C( 688744213), INT32_C( 1409593487), INT32_C( 1564566871), INT32_C( 1631818506), INT32_C( 533051151), -INT32_C( 927054686) } }, { { INT32_C( 1575306911), -INT32_C( 1595235586), -INT32_C( 177690394), -INT32_C( 250270369), INT32_C( 1980323748), -INT32_C( 682477333), INT32_C( 380454096), INT32_C( 1284400813) }, { -INT32_C( 1431669588), -INT32_C( 62221290), -INT32_C( 1661816260), -INT32_C( 2071132192), INT32_C( 1241159262), -INT32_C( 1625272882), INT32_C( 1068879249), -INT32_C( 544521165) }, { -INT32_C( 829868616), INT32_C( 113955785), INT32_C( 1721941126), INT32_C( 568995779), -INT32_C( 1804868155), -INT32_C( 1020032206), -INT32_C( 1945966503), -INT32_C( 462647764) }, { -INT32_C( 1804868155), INT32_C( 380454096), -INT32_C( 1804868155), INT32_C( 1575306911), -INT32_C( 1945966503), -INT32_C( 1945966503), -INT32_C( 1595235586), -INT32_C( 250270369) } }, { { -INT32_C( 1917651517), INT32_C( 1335066057), -INT32_C( 38456007), INT32_C( 689872740), -INT32_C( 1229092476), INT32_C( 1853550613), INT32_C( 100302041), -INT32_C( 823499254) }, { INT32_C( 626760796), INT32_C( 1400172057), -INT32_C( 2024789725), INT32_C( 1303408328), INT32_C( 218328824), INT32_C( 930839902), INT32_C( 71136762), INT32_C( 953296860) }, { -INT32_C( 581095997), INT32_C( 1043386651), -INT32_C( 1027243782), -INT32_C( 435194130), INT32_C( 1123226596), -INT32_C( 1971687792), -INT32_C( 1064323100), -INT32_C( 1577557538) }, { INT32_C( 1123226596), INT32_C( 1043386651), INT32_C( 689872740), -INT32_C( 581095997), -INT32_C( 581095997), -INT32_C( 1064323100), -INT32_C( 1027243782), INT32_C( 1123226596) } }, { { -INT32_C( 1434561137), INT32_C( 552185382), INT32_C( 501394990), INT32_C( 134476324), -INT32_C( 1790183675), INT32_C( 1226884453), INT32_C( 1510584188), -INT32_C( 1610940144) }, { INT32_C( 2085255766), INT32_C( 1469854504), INT32_C( 108298209), INT32_C( 1980659569), -INT32_C( 737453714), -INT32_C( 1709364450), -INT32_C( 336320550), INT32_C( 2139811880) }, { -INT32_C( 1828989846), -INT32_C( 370567161), -INT32_C( 1997578985), INT32_C( 1140784597), INT32_C( 1964443991), INT32_C( 252720437), -INT32_C( 2063989668), INT32_C( 1577289204) }, { INT32_C( 1510584188), -INT32_C( 1828989846), INT32_C( 552185382), INT32_C( 552185382), -INT32_C( 2063989668), -INT32_C( 2063989668), -INT32_C( 1997578985), -INT32_C( 1828989846) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i a = simde_mm256_loadu_epi32(test_vec[i].a); simde__m256i idx = simde_mm256_loadu_epi32(test_vec[i].idx); simde__m256i b = simde_mm256_loadu_epi32(test_vec[i].b); simde__m256i r = simde_mm256_permutex2var_epi32(a, idx, b); simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i a = simde_test_x86_random_i32x8(); simde__m256i idx = simde_test_x86_random_i32x8(); simde__m256i b = simde_test_x86_random_i32x8(); simde__m256i r = simde_mm256_permutex2var_epi32(a, idx, b); simde_test_x86_write_i32x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask_permutex2var_epi32(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const int32_t a[8]; const simde__mmask8 k; const int32_t idx[8]; const int32_t b[8]; const int32_t r[8]; } test_vec[] = { { { INT32_C( 1643249497), -INT32_C( 1370826089), INT32_C( 221657400), -INT32_C( 1924123594), INT32_C( 1912825917), -INT32_C( 108915811), INT32_C( 209550359), INT32_C( 1533706497) }, UINT8_C(128), { INT32_C( 907525211), INT32_C( 1064224006), INT32_C( 796228603), INT32_C( 862716875), INT32_C( 433118982), -INT32_C( 584005279), -INT32_C( 924894137), INT32_C( 38288039) }, { -INT32_C( 63414282), INT32_C( 524002852), -INT32_C( 313610207), -INT32_C( 1189037389), -INT32_C( 86838887), INT32_C( 30933946), -INT32_C( 422987969), -INT32_C( 404155919) }, { INT32_C( 1643249497), -INT32_C( 1370826089), INT32_C( 221657400), -INT32_C( 1924123594), INT32_C( 1912825917), -INT32_C( 108915811), INT32_C( 209550359), INT32_C( 1533706497) } }, { { -INT32_C( 1797054096), -INT32_C( 374137144), -INT32_C( 2099904050), INT32_C( 1429993404), -INT32_C( 1571811608), INT32_C( 1352935441), -INT32_C( 801673761), -INT32_C( 273211265) }, UINT8_C( 65), { -INT32_C( 1207335782), INT32_C( 965145399), -INT32_C( 1057683255), INT32_C( 1386760772), -INT32_C( 1033679974), INT32_C( 1554101231), INT32_C( 182153706), -INT32_C( 1035154648) }, { -INT32_C( 2038803121), INT32_C( 314507592), INT32_C( 1305654281), -INT32_C( 1717601281), -INT32_C( 1252261178), -INT32_C( 1609433674), -INT32_C( 1750340241), INT32_C( 123402168) }, { INT32_C( 1305654281), -INT32_C( 374137144), -INT32_C( 2099904050), INT32_C( 1429993404), -INT32_C( 1571811608), INT32_C( 1352935441), INT32_C( 1305654281), -INT32_C( 273211265) } }, { { -INT32_C( 1785867188), -INT32_C( 542683691), INT32_C( 2914561), -INT32_C( 1164260108), -INT32_C( 2073037106), INT32_C( 1646625267), INT32_C( 653971566), INT32_C( 321737927) }, UINT8_C( 40), { INT32_C( 134129850), -INT32_C( 922100401), -INT32_C( 709031671), -INT32_C( 1717274461), INT32_C( 1754081511), INT32_C( 500625229), INT32_C( 1038417129), -INT32_C( 463013847) }, { -INT32_C( 253008736), INT32_C( 1253700673), -INT32_C( 1591707906), -INT32_C( 700726034), INT32_C( 977193196), -INT32_C( 1588128329), INT32_C( 1004420113), -INT32_C( 736148428) }, { -INT32_C( 1785867188), -INT32_C( 542683691), INT32_C( 2914561), -INT32_C( 1164260108), -INT32_C( 2073037106), -INT32_C( 1588128329), INT32_C( 653971566), INT32_C( 321737927) } }, { { -INT32_C( 373028184), -INT32_C( 46891521), -INT32_C( 492940044), INT32_C( 79223064), INT32_C( 1497298849), INT32_C( 502961676), INT32_C( 106485970), -INT32_C( 975538403) }, UINT8_C(130), { INT32_C( 478261151), INT32_C( 923827939), -INT32_C( 162532580), -INT32_C( 1567140949), INT32_C( 682553490), -INT32_C( 1006973718), -INT32_C( 1663041500), INT32_C( 2032051674) }, { INT32_C( 932618068), INT32_C( 963552797), INT32_C( 1160756633), -INT32_C( 1545091311), -INT32_C( 1563715913), -INT32_C( 2040150686), -INT32_C( 1625143867), INT32_C( 1058619626) }, { -INT32_C( 373028184), INT32_C( 79223064), -INT32_C( 492940044), INT32_C( 79223064), INT32_C( 1497298849), INT32_C( 502961676), INT32_C( 106485970), INT32_C( 1160756633) } }, { { -INT32_C( 59330593), -INT32_C( 281615019), -INT32_C( 1288411742), -INT32_C( 464118996), INT32_C( 327557553), -INT32_C( 1415910426), INT32_C( 441170992), -INT32_C( 598121219) }, UINT8_C( 19), { -INT32_C( 1251419696), INT32_C( 1951880975), -INT32_C( 1482617973), -INT32_C( 2091350687), -INT32_C( 160863221), -INT32_C( 1037691642), -INT32_C( 994099104), INT32_C( 1792515226) }, { -INT32_C( 2078326923), INT32_C( 586708631), -INT32_C( 473327231), INT32_C( 694559262), -INT32_C( 1809854578), INT32_C( 1146504676), INT32_C( 537400966), INT32_C( 663412658) }, { -INT32_C( 59330593), INT32_C( 663412658), -INT32_C( 1288411742), -INT32_C( 464118996), INT32_C( 694559262), -INT32_C( 1415910426), INT32_C( 441170992), -INT32_C( 598121219) } }, { { -INT32_C( 1230264033), -INT32_C( 1579637985), INT32_C( 1535418941), INT32_C( 1384508100), -INT32_C( 1629051719), INT32_C( 1893874922), INT32_C( 76606290), -INT32_C( 383051062) }, UINT8_C(195), { INT32_C( 2061672406), INT32_C( 448234360), -INT32_C( 237104633), INT32_C( 1017852055), INT32_C( 1378240790), INT32_C( 379950635), INT32_C( 1088530726), -INT32_C( 1425814827) }, { -INT32_C( 500832918), INT32_C( 1895619689), -INT32_C( 2040341777), INT32_C( 549588234), -INT32_C( 2123175850), -INT32_C( 1533536386), -INT32_C( 1763411519), -INT32_C( 1388189885) }, { INT32_C( 76606290), -INT32_C( 500832918), INT32_C( 1535418941), INT32_C( 1384508100), -INT32_C( 1629051719), INT32_C( 1893874922), INT32_C( 76606290), INT32_C( 1893874922) } }, { { INT32_C( 915367885), INT32_C( 849775427), INT32_C( 1874397285), INT32_C( 1804565269), -INT32_C( 487783836), -INT32_C( 628652775), INT32_C( 1097886718), INT32_C( 552579667) }, UINT8_C( 25), { INT32_C( 173823614), INT32_C( 91197436), -INT32_C( 1021649080), INT32_C( 1881638510), -INT32_C( 125171085), -INT32_C( 50961263), -INT32_C( 2024851499), -INT32_C( 1516212442) }, { -INT32_C( 1028653883), -INT32_C( 741925237), INT32_C( 1821893117), -INT32_C( 623067545), INT32_C( 1506961096), -INT32_C( 1604925237), INT32_C( 640132352), -INT32_C( 624179435) }, { INT32_C( 640132352), INT32_C( 849775427), INT32_C( 1874397285), INT32_C( 640132352), INT32_C( 1804565269), -INT32_C( 628652775), INT32_C( 1097886718), INT32_C( 552579667) } }, { { INT32_C( 1335655107), -INT32_C( 1776131175), -INT32_C( 1409107643), INT32_C( 1082646392), INT32_C( 278550853), INT32_C( 582021154), -INT32_C( 1438066795), INT32_C( 1652888734) }, UINT8_C(142), { -INT32_C( 2060996319), -INT32_C( 1916092717), -INT32_C( 1627031872), INT32_C( 1474643453), -INT32_C( 814090785), INT32_C( 2103810982), -INT32_C( 149155869), -INT32_C( 1232699756) }, { INT32_C( 37465391), INT32_C( 747570539), INT32_C( 2043384955), -INT32_C( 1177505574), INT32_C( 1284065702), -INT32_C( 943067420), -INT32_C( 1832917507), -INT32_C( 1840757405) }, { INT32_C( 1335655107), INT32_C( 1082646392), INT32_C( 1335655107), -INT32_C( 943067420), INT32_C( 278550853), INT32_C( 582021154), -INT32_C( 1438066795), INT32_C( 278550853) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i a = simde_mm256_loadu_epi32(test_vec[i].a); simde__m256i idx = simde_mm256_loadu_epi32(test_vec[i].idx); simde__m256i b = simde_mm256_loadu_epi32(test_vec[i].b); simde__m256i r = simde_mm256_mask_permutex2var_epi32(a, test_vec[i].k, idx, b); simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i a = simde_test_x86_random_i32x8(); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256i idx = simde_test_x86_random_i32x8(); simde__m256i b = simde_test_x86_random_i32x8(); simde__m256i r = simde_mm256_mask_permutex2var_epi32(a, k, idx, b); simde_test_x86_write_i32x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask2_permutex2var_epi32(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const int32_t a[8]; const int32_t idx[8]; const simde__mmask8 k; const int32_t b[8]; const int32_t r[8]; } test_vec[] = { { { INT32_C( 1586856946), INT32_C( 59384968), -INT32_C( 1820568135), -INT32_C( 1404285946), INT32_C( 2046350741), -INT32_C( 1052655165), INT32_C( 173211815), INT32_C( 933075781) }, { -INT32_C( 1500171746), INT32_C( 262741846), INT32_C( 2074224245), INT32_C( 136834930), -INT32_C( 2004803644), -INT32_C( 2025209120), INT32_C( 127048898), INT32_C( 1430204215) }, UINT8_C( 97), { -INT32_C( 189269036), -INT32_C( 899037276), INT32_C( 1480451177), INT32_C( 706561291), -INT32_C( 1995790906), -INT32_C( 1974758674), INT32_C( 1405178660), INT32_C( 1723078290) }, { INT32_C( 1405178660), INT32_C( 262741846), INT32_C( 2074224245), INT32_C( 136834930), -INT32_C( 2004803644), INT32_C( 1586856946), -INT32_C( 1820568135), INT32_C( 1430204215) } }, { { -INT32_C( 1252365551), -INT32_C( 1669283022), -INT32_C( 1292583513), -INT32_C( 925101822), -INT32_C( 1538136138), -INT32_C( 1657889415), -INT32_C( 2098139152), INT32_C( 384345093) }, { INT32_C( 1120682767), -INT32_C( 1361163258), INT32_C( 190894601), -INT32_C( 1697432092), -INT32_C( 1656806108), -INT32_C( 1304793662), INT32_C( 1647585885), -INT32_C( 579265330) }, UINT8_C( 95), { -INT32_C( 1855578299), -INT32_C( 795208451), -INT32_C( 1313561228), -INT32_C( 1646965128), -INT32_C( 77630835), -INT32_C( 698871380), INT32_C( 1654962757), INT32_C( 2025947699) }, { INT32_C( 2025947699), -INT32_C( 2098139152), -INT32_C( 795208451), -INT32_C( 1538136138), -INT32_C( 1538136138), -INT32_C( 1304793662), -INT32_C( 698871380), -INT32_C( 579265330) } }, { { -INT32_C( 1626789983), -INT32_C( 1334860997), -INT32_C( 1050598584), -INT32_C( 10602639), INT32_C( 1459273385), INT32_C( 355226319), INT32_C( 1081594124), -INT32_C( 189253549) }, { -INT32_C( 1684815264), -INT32_C( 1370815899), -INT32_C( 1754288859), -INT32_C( 1919496732), INT32_C( 1541640331), -INT32_C( 294645534), INT32_C( 875489249), INT32_C( 2133387039) }, UINT8_C(169), { -INT32_C( 1106371652), INT32_C( 333757542), -INT32_C( 101221589), -INT32_C( 1585150703), INT32_C( 2021908328), INT32_C( 911831375), -INT32_C( 2024370784), INT32_C( 1915803062) }, { -INT32_C( 1626789983), -INT32_C( 1370815899), -INT32_C( 1754288859), INT32_C( 1459273385), INT32_C( 1541640331), -INT32_C( 1050598584), INT32_C( 875489249), INT32_C( 1915803062) } }, { { INT32_C( 1462779632), INT32_C( 644486395), -INT32_C( 1608555889), INT32_C( 1329702119), -INT32_C( 741817213), -INT32_C( 704044746), INT32_C( 1700618159), INT32_C( 634883381) }, { -INT32_C( 964950069), -INT32_C( 1410472164), INT32_C( 810290249), INT32_C( 880774576), -INT32_C( 2012788910), INT32_C( 408817769), -INT32_C( 1518486672), INT32_C( 332026952) }, UINT8_C( 91), { INT32_C( 779606599), -INT32_C( 747166777), -INT32_C( 58415249), INT32_C( 1850652710), -INT32_C( 807938369), -INT32_C( 281022668), -INT32_C( 1070078868), -INT32_C( 165983313) }, { INT32_C( 1850652710), -INT32_C( 807938369), INT32_C( 810290249), INT32_C( 1462779632), -INT32_C( 1608555889), INT32_C( 408817769), INT32_C( 1462779632), INT32_C( 332026952) } }, { { -INT32_C( 333147355), INT32_C( 633314230), INT32_C( 1747010370), -INT32_C( 1160351493), INT32_C( 2072685894), INT32_C( 141216156), INT32_C( 1573429934), INT32_C( 307487981) }, { INT32_C( 771651447), INT32_C( 1414708498), -INT32_C( 54758655), INT32_C( 716674019), -INT32_C( 576372416), -INT32_C( 1192947958), -INT32_C( 1642746191), INT32_C( 162556050) }, UINT8_C(223), { INT32_C( 1827747502), -INT32_C( 76724856), -INT32_C( 1780520702), INT32_C( 1641351456), -INT32_C( 1116949842), INT32_C( 1181688984), -INT32_C( 1562899142), INT32_C( 1820516798) }, { INT32_C( 307487981), INT32_C( 1747010370), INT32_C( 633314230), -INT32_C( 1160351493), -INT32_C( 333147355), -INT32_C( 1192947958), INT32_C( 633314230), INT32_C( 1747010370) } }, { { -INT32_C( 1613204713), -INT32_C( 1147517511), -INT32_C( 816809553), INT32_C( 808527234), INT32_C( 1894686168), -INT32_C( 71934527), INT32_C( 681479786), -INT32_C( 2037047185) }, { INT32_C( 1277521299), INT32_C( 1644674995), -INT32_C( 1154394311), INT32_C( 1441555069), -INT32_C( 1043997953), -INT32_C( 1581483209), INT32_C( 2026527241), INT32_C( 251551610) }, UINT8_C(204), { -INT32_C( 494970333), -INT32_C( 1189355166), INT32_C( 1966528275), -INT32_C( 1653240893), -INT32_C( 875284912), INT32_C( 1305769458), -INT32_C( 1631105985), INT32_C( 1835783498) }, { INT32_C( 1277521299), INT32_C( 1644674995), -INT32_C( 1189355166), INT32_C( 1305769458), -INT32_C( 1043997953), -INT32_C( 1581483209), -INT32_C( 1189355166), INT32_C( 1966528275) } }, { { -INT32_C( 1840190928), -INT32_C( 548705332), INT32_C( 89424450), INT32_C( 1587726605), -INT32_C( 232163585), INT32_C( 725614316), -INT32_C( 1781922230), INT32_C( 201471452) }, { -INT32_C( 341945825), INT32_C( 13298110), INT32_C( 2030444395), -INT32_C( 388519704), INT32_C( 198836255), INT32_C( 1228282366), -INT32_C( 69337057), INT32_C( 1426579509) }, UINT8_C( 51), { -INT32_C( 1880014683), INT32_C( 721088779), -INT32_C( 1609403401), INT32_C( 1270872650), -INT32_C( 313931052), INT32_C( 17666560), INT32_C( 1362495600), -INT32_C( 1249604848) }, { -INT32_C( 1249604848), INT32_C( 1362495600), INT32_C( 2030444395), -INT32_C( 388519704), -INT32_C( 1249604848), INT32_C( 1362495600), -INT32_C( 69337057), INT32_C( 1426579509) } }, { { -INT32_C( 683379252), INT32_C( 1577140070), -INT32_C( 33680462), -INT32_C( 498549490), -INT32_C( 2016374393), -INT32_C( 1802969820), -INT32_C( 169492507), INT32_C( 380332362) }, { INT32_C( 1173221342), -INT32_C( 509350354), INT32_C( 283025666), -INT32_C( 437115298), -INT32_C( 613629257), -INT32_C( 2056194657), -INT32_C( 25537100), -INT32_C( 1659558465) }, UINT8_C( 21), { -INT32_C( 247209470), INT32_C( 653468805), INT32_C( 679740162), -INT32_C( 1193252363), -INT32_C( 883442730), -INT32_C( 2122326997), INT32_C( 2084601175), -INT32_C( 1785602670) }, { INT32_C( 2084601175), -INT32_C( 509350354), -INT32_C( 33680462), -INT32_C( 437115298), INT32_C( 380332362), -INT32_C( 2056194657), -INT32_C( 25537100), -INT32_C( 1659558465) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i a = simde_mm256_loadu_epi32(test_vec[i].a); simde__m256i idx = simde_mm256_loadu_epi32(test_vec[i].idx); simde__m256i b = simde_mm256_loadu_epi32(test_vec[i].b); simde__m256i r = simde_mm256_mask2_permutex2var_epi32(a, idx, test_vec[i].k, b); simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i a = simde_test_x86_random_i32x8(); simde__m256i idx = simde_test_x86_random_i32x8(); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256i b = simde_test_x86_random_i32x8(); simde__m256i r = simde_mm256_mask2_permutex2var_epi32(a, idx, k, b); simde_test_x86_write_i32x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_maskz_permutex2var_epi32(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde__mmask8 k; const int32_t a[8]; const int32_t idx[8]; const int32_t b[8]; const int32_t r[8]; } test_vec[] = { { UINT8_C(192), { -INT32_C( 112884011), INT32_C( 2096917625), INT32_C( 1517364464), -INT32_C( 1070585596), INT32_C( 1575746433), -INT32_C( 139170694), -INT32_C( 1970654804), -INT32_C( 1756749886) }, { INT32_C( 512855973), -INT32_C( 325415429), -INT32_C( 1236923471), -INT32_C( 1233750475), -INT32_C( 351051407), INT32_C( 2061682894), -INT32_C( 1157337863), INT32_C( 810765963) }, { -INT32_C( 649141026), INT32_C( 583395441), INT32_C( 702024692), -INT32_C( 220246399), INT32_C( 2128474800), -INT32_C( 1275543366), -INT32_C( 1200620244), INT32_C( 703119947) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 583395441), -INT32_C( 220246399) } }, { UINT8_C(166), { INT32_C( 521601847), -INT32_C( 736937528), INT32_C( 1616198674), INT32_C( 235948059), -INT32_C( 439841243), -INT32_C( 2079163257), -INT32_C( 1378891029), -INT32_C( 380372558) }, { -INT32_C( 1006015748), -INT32_C( 1248322397), INT32_C( 1947659865), INT32_C( 1535256118), INT32_C( 1010912181), -INT32_C( 1296018489), -INT32_C( 815821027), -INT32_C( 2068204920) }, { -INT32_C( 1085685220), INT32_C( 930472414), INT32_C( 95194063), INT32_C( 1717644977), INT32_C( 1084400249), INT32_C( 317874933), INT32_C( 2061586930), INT32_C( 536844803) }, { INT32_C( 0), INT32_C( 235948059), INT32_C( 930472414), INT32_C( 0), INT32_C( 0), -INT32_C( 380372558), INT32_C( 0), -INT32_C( 1085685220) } }, { UINT8_C( 92), { INT32_C( 691789640), -INT32_C( 537300396), INT32_C( 1301347870), INT32_C( 29816671), -INT32_C( 50985063), INT32_C( 1273891065), -INT32_C( 2075236118), -INT32_C( 1344180633) }, { -INT32_C( 1579606963), -INT32_C( 1384066418), INT32_C( 821694672), -INT32_C( 1590575097), -INT32_C( 1046665017), INT32_C( 437029680), INT32_C( 1537170163), INT32_C( 353009864) }, { INT32_C( 716628892), -INT32_C( 2032716107), INT32_C( 1320603975), INT32_C( 1508894610), INT32_C( 1075481615), INT32_C( 190457367), INT32_C( 1231485313), INT32_C( 358576249) }, { INT32_C( 0), INT32_C( 0), INT32_C( 691789640), -INT32_C( 1344180633), -INT32_C( 1344180633), INT32_C( 0), INT32_C( 29816671), INT32_C( 0) } }, { UINT8_C( 84), { INT32_C( 1275674645), -INT32_C( 376205545), INT32_C( 763093317), INT32_C( 1564267729), INT32_C( 360021231), -INT32_C( 795442985), INT32_C( 1447747814), INT32_C( 1420451647) }, { -INT32_C( 1214204769), -INT32_C( 1985989821), -INT32_C( 424273131), -INT32_C( 565972241), INT32_C( 1190443119), INT32_C( 504793656), -INT32_C( 1451925398), INT32_C( 1610489792) }, { INT32_C( 387358419), -INT32_C( 408897838), -INT32_C( 1043507503), -INT32_C( 1214312376), INT32_C( 33395657), -INT32_C( 2011163618), INT32_C( 875730292), -INT32_C( 2003554124) }, { INT32_C( 0), INT32_C( 0), -INT32_C( 795442985), INT32_C( 0), -INT32_C( 2003554124), INT32_C( 0), -INT32_C( 1043507503), INT32_C( 0) } }, { UINT8_C(206), { INT32_C( 1637916586), -INT32_C( 1791850689), INT32_C( 1709044564), INT32_C( 640587155), -INT32_C( 1505480814), -INT32_C( 467940017), INT32_C( 798576639), -INT32_C( 1896013341) }, { -INT32_C( 1073472), INT32_C( 2056528165), -INT32_C( 1461751531), -INT32_C( 1714549498), -INT32_C( 1942023364), -INT32_C( 546284832), -INT32_C( 1928459862), -INT32_C( 367326422) }, { -INT32_C( 806810967), INT32_C( 1095335212), -INT32_C( 169269009), INT32_C( 1905178421), -INT32_C( 1426207030), -INT32_C( 762745304), -INT32_C( 1570728073), INT32_C( 1284275107) }, { INT32_C( 0), -INT32_C( 467940017), -INT32_C( 467940017), INT32_C( 798576639), INT32_C( 0), INT32_C( 0), -INT32_C( 169269009), -INT32_C( 169269009) } }, { UINT8_C(134), { -INT32_C( 206431370), -INT32_C( 1931283612), -INT32_C( 1816012580), INT32_C( 878588774), -INT32_C( 1638070224), INT32_C( 689319826), INT32_C( 197965967), -INT32_C( 1164895932) }, { -INT32_C( 1716632780), INT32_C( 304451638), -INT32_C( 811210904), INT32_C( 1241711386), -INT32_C( 1628872692), INT32_C( 533200784), -INT32_C( 81095497), -INT32_C( 508118099) }, { INT32_C( 880436478), INT32_C( 1564909812), -INT32_C( 1590891385), -INT32_C( 68472849), INT32_C( 546952336), -INT32_C( 1975557677), -INT32_C( 1568249099), INT32_C( 612580389) }, { INT32_C( 0), INT32_C( 197965967), INT32_C( 880436478), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 1975557677) } }, { UINT8_C(160), { -INT32_C( 1634445058), -INT32_C( 1960447585), INT32_C( 1299891741), -INT32_C( 2032306511), INT32_C( 1884945679), -INT32_C( 1503271876), -INT32_C( 1496578198), -INT32_C( 1991839605) }, { -INT32_C( 416818616), -INT32_C( 378385204), -INT32_C( 1019810542), INT32_C( 1917391715), INT32_C( 1306764048), -INT32_C( 235714425), -INT32_C( 610812080), -INT32_C( 127607377) }, { -INT32_C( 2082501705), -INT32_C( 378711337), -INT32_C( 1565744321), -INT32_C( 954927433), INT32_C( 538245017), -INT32_C( 1877931968), INT32_C( 2003544263), INT32_C( 1013960581) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 1991839605), INT32_C( 0), INT32_C( 1013960581) } }, { UINT8_C( 90), { -INT32_C( 1607352242), -INT32_C( 790685139), -INT32_C( 1131970106), -INT32_C( 1923788906), INT32_C( 1808627043), INT32_C( 775118214), -INT32_C( 1749833016), INT32_C( 1743908889) }, { -INT32_C( 586669392), INT32_C( 61728572), -INT32_C( 4246167), -INT32_C( 410249852), INT32_C( 290675338), -INT32_C( 2143320393), INT32_C( 1226306352), -INT32_C( 1817179677) }, { INT32_C( 1752217643), INT32_C( 158014880), -INT32_C( 704107950), -INT32_C( 893479873), -INT32_C( 1495592466), -INT32_C( 953804137), -INT32_C( 267371251), INT32_C( 1921237062) }, { INT32_C( 0), -INT32_C( 1495592466), INT32_C( 0), INT32_C( 1808627043), -INT32_C( 704107950), INT32_C( 0), -INT32_C( 1607352242), INT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i a = simde_mm256_loadu_epi32(test_vec[i].a); simde__m256i idx = simde_mm256_loadu_epi32(test_vec[i].idx); simde__m256i b = simde_mm256_loadu_epi32(test_vec[i].b); simde__m256i r = simde_mm256_maskz_permutex2var_epi32(test_vec[i].k, a, idx, b); simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256i a = simde_test_x86_random_i32x8(); simde__m256i idx = simde_test_x86_random_i32x8(); simde__m256i b = simde_test_x86_random_i32x8(); simde__m256i r = simde_mm256_maskz_permutex2var_epi32(k, a, idx, b); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_permutex2var_epi64(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const int64_t a[4]; const int64_t idx[4]; const int64_t b[4]; const int64_t r[4]; } test_vec[] = { { { -INT64_C( 9105563135040303212), INT64_C( 5126126908757411805), INT64_C( 4131066676902922608), -INT64_C( 6896356776144442388) }, { -INT64_C( 5036235093458267641), INT64_C( 4883165139464366394), INT64_C( 8034689480396833797), INT64_C( 5551719280306818399) }, { INT64_C( 546344913464597502), INT64_C( 8016486956781595946), INT64_C( 3548333835470430028), INT64_C( 5248284879331565123) }, { INT64_C( 5248284879331565123), INT64_C( 4131066676902922608), INT64_C( 8016486956781595946), INT64_C( 5248284879331565123) } }, { { -INT64_C( 1686676411120370494), -INT64_C( 5657042970237133659), -INT64_C( 5978318039748168700), INT64_C( 1076091259815568477) }, { INT64_C( 8100910073168925383), INT64_C( 8820297081538171218), INT64_C( 308595150875125809), INT64_C( 5717490736600743421) }, { INT64_C( 9144629603125753518), -INT64_C( 7673117386660812910), INT64_C( 3040061883868566660), INT64_C( 2724339638200491254) }, { INT64_C( 2724339638200491254), -INT64_C( 5978318039748168700), -INT64_C( 5657042970237133659), -INT64_C( 7673117386660812910) } }, { { INT64_C( 8389509990101983938), -INT64_C( 4330210769749826834), -INT64_C( 8706830746927556396), INT64_C( 3357665980632637435) }, { -INT64_C( 6139379650476034443), INT64_C( 2103151654529222957), INT64_C( 4041217868872687357), INT64_C( 7242707316660444137) }, { INT64_C( 6332210790954475641), -INT64_C( 4667431523777782318), -INT64_C( 8392629836110211311), -INT64_C( 1238099066544442754) }, { -INT64_C( 4667431523777782318), -INT64_C( 4667431523777782318), -INT64_C( 4667431523777782318), -INT64_C( 4330210769749826834) } }, { { -INT64_C( 1763999190047486717), INT64_C( 5576085515473733857), -INT64_C( 7872253578937847661), INT64_C( 484928624621756502) }, { -INT64_C( 4369977205674746275), INT64_C( 3281374363985009921), -INT64_C( 6980823198101002887), INT64_C( 5605614773420564418) }, { -INT64_C( 5141111072500941310), INT64_C( 5710140835441675640), -INT64_C( 8895739824324173661), INT64_C( 2114180652148061722) }, { INT64_C( 5710140835441675640), INT64_C( 5576085515473733857), INT64_C( 5576085515473733857), -INT64_C( 7872253578937847661) } }, { { -INT64_C( 6232232903019784429), -INT64_C( 7432071931316439370), -INT64_C( 3611587063637035110), INT64_C( 5925943533358230022) }, { -INT64_C( 7052462295194255547), INT64_C( 8043717921512086292), INT64_C( 4269038842336191934), -INT64_C( 8473354181185628457) }, { INT64_C( 3917886633969550552), INT64_C( 5639035121051863805), -INT64_C( 8036674938494579706), INT64_C( 5035603191860220692) }, { INT64_C( 5639035121051863805), INT64_C( 3917886633969550552), -INT64_C( 8036674938494579706), INT64_C( 5035603191860220692) } }, { { INT64_C( 210090308284048274), INT64_C( 1210647009670998766), -INT64_C( 6959535268717359839), -INT64_C( 5744382977254158186) }, { -INT64_C( 6862272307316634179), -INT64_C( 3488903581331908487), INT64_C( 4153185971756195961), INT64_C( 3671422660899478308) }, { INT64_C( 2315335812536162512), -INT64_C( 6427138947509174190), -INT64_C( 5696122737359357929), -INT64_C( 9038188223872684055) }, { -INT64_C( 6427138947509174190), INT64_C( 1210647009670998766), INT64_C( 1210647009670998766), INT64_C( 2315335812536162512) } }, { { -INT64_C( 7996663131977578945), -INT64_C( 560311256199616155), INT64_C( 2023424867947504855), INT64_C( 3245138125600196565) }, { -INT64_C( 7538435120218042991), INT64_C( 3070438256847549379), -INT64_C( 3776933532995268793), -INT64_C( 5925228461146566487) }, { -INT64_C( 5389716996436862615), INT64_C( 527473936533380564), INT64_C( 3100471612772769605), INT64_C( 2372984370039577126) }, { -INT64_C( 560311256199616155), INT64_C( 3245138125600196565), INT64_C( 2372984370039577126), -INT64_C( 560311256199616155) } }, { { INT64_C( 2380695893943479393), -INT64_C( 7056409058415713461), -INT64_C( 3520159413545823367), -INT64_C( 3309583386299799493) }, { INT64_C( 565536811018276203), INT64_C( 3785879925808046483), -INT64_C( 9209768581012757320), -INT64_C( 6088722386688786372) }, { INT64_C( 7571463388080088656), INT64_C( 166035370139788638), INT64_C( 867668402106498116), INT64_C( 5818151540614659284) }, { -INT64_C( 3309583386299799493), -INT64_C( 3309583386299799493), INT64_C( 2380695893943479393), INT64_C( 7571463388080088656) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i a = simde_mm256_loadu_epi64(test_vec[i].a); simde__m256i idx = simde_mm256_loadu_epi64(test_vec[i].idx); simde__m256i b = simde_mm256_loadu_epi64(test_vec[i].b); simde__m256i r = simde_mm256_permutex2var_epi64(a, idx, b); simde_test_x86_assert_equal_i64x4(r, simde_mm256_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i a = simde_test_x86_random_i64x4(); simde__m256i idx = simde_test_x86_random_i64x4(); simde__m256i b = simde_test_x86_random_i64x4(); simde__m256i r = simde_mm256_permutex2var_epi64(a, idx, b); simde_test_x86_write_i64x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x4(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask_permutex2var_epi64(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const int64_t a[4]; const simde__mmask8 k; const int64_t idx[4]; const int64_t b[4]; const int64_t r[4]; } test_vec[] = { { { -INT64_C( 4818729144397585696), -INT64_C( 7286221291590814059), -INT64_C( 5945543322123352672), INT64_C( 5907736937392781227) }, UINT8_C(161), { -INT64_C( 6963201461698164648), INT64_C( 7230968038851689620), INT64_C( 9119483889947888308), -INT64_C( 8157637173652276566) }, { INT64_C( 2022595106281278025), INT64_C( 5292344829359228656), INT64_C( 4953602907035185145), INT64_C( 3943245540026592757) }, { -INT64_C( 4818729144397585696), -INT64_C( 7286221291590814059), -INT64_C( 5945543322123352672), INT64_C( 5907736937392781227) } }, { { -INT64_C( 5501224170313740286), -INT64_C( 6584767694686225217), INT64_C( 8917587435691756516), INT64_C( 7598521893324730191) }, UINT8_C(111), { INT64_C( 1699517991373501468), INT64_C( 6035218813186992808), INT64_C( 3404250731322621516), INT64_C( 777433809885232122) }, { -INT64_C( 8383884036088464141), INT64_C( 2034265633900226911), INT64_C( 7176285248304125032), INT64_C( 2359190405859061188) }, { -INT64_C( 8383884036088464141), -INT64_C( 5501224170313740286), -INT64_C( 8383884036088464141), INT64_C( 8917587435691756516) } }, { { INT64_C( 8210717508658699651), -INT64_C( 1393138937184625987), -INT64_C( 4434660256608111192), INT64_C( 4351658911245611544) }, UINT8_C(207), { -INT64_C( 1214473722860035309), INT64_C( 1433938385448448938), -INT64_C( 7082239805314959437), INT64_C( 8235556906900847433) }, { INT64_C( 2862881428903439158), INT64_C( 4693396830894239410), INT64_C( 5857940704363230274), INT64_C( 7738053005681664657) }, { INT64_C( 4351658911245611544), -INT64_C( 4434660256608111192), INT64_C( 4351658911245611544), -INT64_C( 1393138937184625987) } }, { { -INT64_C( 1232314779336032959), -INT64_C( 2743500766479299810), -INT64_C( 887822231621088172), -INT64_C( 3453056331930109580) }, UINT8_C(132), { -INT64_C( 4730558123629048246), -INT64_C( 4674966267858975317), -INT64_C( 6774516475662074955), INT64_C( 343400271527847417) }, { -INT64_C( 2467804155733900146), -INT64_C( 553722561634429538), INT64_C( 9155392009278615876), -INT64_C( 3611742590858215903) }, { -INT64_C( 1232314779336032959), -INT64_C( 2743500766479299810), -INT64_C( 553722561634429538), -INT64_C( 3453056331930109580) } }, { { INT64_C( 6275643726682332935), -INT64_C( 3683600859521302363), -INT64_C( 22720457791151703), -INT64_C( 391044665780310139) }, UINT8_C(218), { INT64_C( 763335169952330593), -INT64_C( 4345024345105268527), INT64_C( 100662019851608984), INT64_C( 2093873364588571002) }, { INT64_C( 6662694712678195489), INT64_C( 92913583836694821), INT64_C( 328558858737937147), -INT64_C( 7833933000428977297) }, { INT64_C( 6275643726682332935), -INT64_C( 3683600859521302363), -INT64_C( 22720457791151703), -INT64_C( 22720457791151703) } }, { { INT64_C( 2436014175575561960), INT64_C( 5253996852439168127), -INT64_C( 9193143093210723619), -INT64_C( 3247671568395164695) }, UINT8_C( 73), { INT64_C( 5179534379403527660), INT64_C( 9070210831653062927), INT64_C( 6052852953441453746), INT64_C( 3888032857112665016) }, { -INT64_C( 5642596223445157290), -INT64_C( 3210905859794975423), -INT64_C( 4260991237631508776), -INT64_C( 8879862414130078650) }, { -INT64_C( 5642596223445157290), INT64_C( 5253996852439168127), -INT64_C( 9193143093210723619), INT64_C( 2436014175575561960) } }, { { INT64_C( 1509164179333733279), INT64_C( 1070300034895422693), -INT64_C( 628967167704447626), -INT64_C( 5168924997866572399) }, UINT8_C(165), { -INT64_C( 8090561790389716686), INT64_C( 6882296934384910228), -INT64_C( 112019026857688213), -INT64_C( 6305373223716966002) }, { INT64_C( 1675957552568530633), INT64_C( 5182136687888894193), -INT64_C( 8075066273720370520), -INT64_C( 6851691545877805109) }, { -INT64_C( 628967167704447626), INT64_C( 1070300034895422693), -INT64_C( 5168924997866572399), -INT64_C( 5168924997866572399) } }, { { -INT64_C( 8685589258748861476), -INT64_C( 1746837835176830562), INT64_C( 5730769519993481718), -INT64_C( 5600655128059121713) }, UINT8_C(220), { INT64_C( 2435878436295807385), -INT64_C( 936952093332191476), INT64_C( 7977780943068264223), INT64_C( 4371577567422209109) }, { INT64_C( 2571899635558456583), INT64_C( 8609805346395656576), INT64_C( 869500081506611747), INT64_C( 5931386167254209906) }, { -INT64_C( 8685589258748861476), -INT64_C( 1746837835176830562), INT64_C( 5931386167254209906), INT64_C( 8609805346395656576) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i a = simde_mm256_loadu_epi64(test_vec[i].a); simde__m256i idx = simde_mm256_loadu_epi64(test_vec[i].idx); simde__m256i b = simde_mm256_loadu_epi64(test_vec[i].b); simde__m256i r = simde_mm256_mask_permutex2var_epi64(a, test_vec[i].k, idx, b); simde_test_x86_assert_equal_i64x4(r, simde_mm256_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i a = simde_test_x86_random_i64x4(); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256i idx = simde_test_x86_random_i64x4(); simde__m256i b = simde_test_x86_random_i64x4(); simde__m256i r = simde_mm256_mask_permutex2var_epi64(a, k, idx, b); simde_test_x86_write_i64x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x4(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask2_permutex2var_epi64(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const int64_t a[4]; const int64_t idx[4]; const simde__mmask8 k; const int64_t b[4]; const int64_t r[4]; } test_vec[] = { { { INT64_C( 8492825680892050593), INT64_C( 6373573365172471972), INT64_C( 6498707353976361581), INT64_C( 3530469777387424352) }, { INT64_C( 1721998432633675899), -INT64_C( 3721795874446908211), -INT64_C( 4568021803536984999), INT64_C( 4893558780939929898) }, UINT8_C( 73), { INT64_C( 9035297934047753169), INT64_C( 1352955373423518950), -INT64_C( 6060383148574701670), INT64_C( 6363788064031284893) }, { INT64_C( 3530469777387424352), -INT64_C( 3721795874446908211), -INT64_C( 4568021803536984999), INT64_C( 6498707353976361581) } }, { { -INT64_C( 4142940159606911893), -INT64_C( 6735422377981419850), -INT64_C( 4157313758942923008), -INT64_C( 1963819604768931105) }, { -INT64_C( 5345936589050041047), -INT64_C( 183891134409800760), -INT64_C( 1110721076618342490), -INT64_C( 6409443063074757969) }, UINT8_C(195), { -INT64_C( 8995808862409674506), INT64_C( 4248438623831062944), -INT64_C( 4847778265998572260), -INT64_C( 1915469172561943919) }, { -INT64_C( 6735422377981419850), -INT64_C( 4142940159606911893), -INT64_C( 1110721076618342490), -INT64_C( 6409443063074757969) } }, { { INT64_C( 4870201693729795219), -INT64_C( 5904914471745088153), INT64_C( 2622063187536074828), INT64_C( 1452188234137968210) }, { INT64_C( 4775994678214880609), -INT64_C( 5975677053392594478), -INT64_C( 8272628481048393095), -INT64_C( 7787860432377611178) }, UINT8_C( 99), { -INT64_C( 5070348531722708974), -INT64_C( 2075940514974529899), INT64_C( 7650114305729051676), INT64_C( 59443916928975481) }, { -INT64_C( 5904914471745088153), INT64_C( 2622063187536074828), -INT64_C( 8272628481048393095), -INT64_C( 7787860432377611178) } }, { { INT64_C( 2837392639937942215), INT64_C( 6734023017608673912), -INT64_C( 5394647987778383542), -INT64_C( 5382934132027965858) }, { INT64_C( 343888258685213451), -INT64_C( 300323142330113226), -INT64_C( 4572044901185550693), -INT64_C( 5316965915948174701) }, UINT8_C(114), { -INT64_C( 4934815888751766638), -INT64_C( 2937076136688157799), INT64_C( 6018210575517553691), -INT64_C( 3616192333705509626) }, { INT64_C( 343888258685213451), INT64_C( 6018210575517553691), -INT64_C( 4572044901185550693), -INT64_C( 5316965915948174701) } }, { { INT64_C( 7771981306788564743), -INT64_C( 3800391914464851813), INT64_C( 8958857533837835360), INT64_C( 959171091083383366) }, { -INT64_C( 6506002671874512008), INT64_C( 8127962755699426385), INT64_C( 1207463647651137999), -INT64_C( 4815788069337523433) }, UINT8_C( 77), { -INT64_C( 8956817714767787944), INT64_C( 590256871795045134), -INT64_C( 3219823902942136716), -INT64_C( 1178583455997923979) }, { INT64_C( 7771981306788564743), INT64_C( 8127962755699426385), -INT64_C( 1178583455997923979), -INT64_C( 1178583455997923979) } }, { { INT64_C( 512850678642703580), INT64_C( 2899642734365684149), INT64_C( 7501785262765182992), -INT64_C( 2043116871665831394) }, { -INT64_C( 5953586116916480672), INT64_C( 8226141799679445325), INT64_C( 6452485341117581872), INT64_C( 1221237117230729992) }, UINT8_C(169), { -INT64_C( 7828575798022804879), INT64_C( 1189346702712527921), INT64_C( 8863265696407921699), INT64_C( 8291755844977758132) }, { INT64_C( 512850678642703580), INT64_C( 8226141799679445325), INT64_C( 6452485341117581872), INT64_C( 512850678642703580) } }, { { INT64_C( 3748361593402995622), -INT64_C( 8805092100778100168), INT64_C( 8383308598154762923), INT64_C( 7206867430476186254) }, { -INT64_C( 7504728299208866910), INT64_C( 1277653836883123923), INT64_C( 5004136593497012542), -INT64_C( 2167963982574039098) }, UINT8_C( 92), { -INT64_C( 4797931882488822082), -INT64_C( 7851968385777426521), -INT64_C( 4023244466054035936), INT64_C( 6168498375915712102) }, { -INT64_C( 7504728299208866910), INT64_C( 1277653836883123923), -INT64_C( 4023244466054035936), -INT64_C( 4023244466054035936) } }, { { INT64_C( 5659431160458794350), INT64_C( 208328734361103862), INT64_C( 2695845004678934312), -INT64_C( 4166957372693778370) }, { INT64_C( 7226622014085339220), -INT64_C( 6003410983132503093), -INT64_C( 2040773134561503800), INT64_C( 7153656912871493927) }, UINT8_C(253), { INT64_C( 919808177349550042), INT64_C( 3811801338481766533), INT64_C( 3128239336657087882), INT64_C( 673462070523193718) }, { INT64_C( 919808177349550042), -INT64_C( 6003410983132503093), INT64_C( 5659431160458794350), INT64_C( 673462070523193718) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i a = simde_mm256_loadu_epi64(test_vec[i].a); simde__m256i idx = simde_mm256_loadu_epi64(test_vec[i].idx); simde__m256i b = simde_mm256_loadu_epi64(test_vec[i].b); simde__m256i r = simde_mm256_mask2_permutex2var_epi64(a, idx, test_vec[i].k, b); simde_test_x86_assert_equal_i64x4(r, simde_mm256_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i a = simde_test_x86_random_i64x4(); simde__m256i idx = simde_test_x86_random_i64x4(); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256i b = simde_test_x86_random_i64x4(); simde__m256i r = simde_mm256_mask2_permutex2var_epi64(a, idx, k, b); simde_test_x86_write_i64x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x4(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_maskz_permutex2var_epi64(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde__mmask8 k; const int64_t a[4]; const int64_t idx[4]; const int64_t b[4]; const int64_t r[4]; } test_vec[] = { { UINT8_C(253), { INT64_C( 3826285920100155844), -INT64_C( 8499830586908264637), -INT64_C( 7019193492854977437), INT64_C( 1056049353334845963) }, { INT64_C( 4887455631946174168), INT64_C( 1484726287040981234), INT64_C( 1976459080246771482), -INT64_C( 1578955798946170179) }, { INT64_C( 7421468693362096314), -INT64_C( 6176668677397101950), INT64_C( 394599640868683087), INT64_C( 7285736415853035040) }, { INT64_C( 3826285920100155844), INT64_C( 0), -INT64_C( 7019193492854977437), -INT64_C( 6176668677397101950) } }, { UINT8_C(219), { -INT64_C( 2197557469141589853), INT64_C( 8829900793851261995), -INT64_C( 6801905061409396332), -INT64_C( 8299657024144356141) }, { INT64_C( 131179253143838930), -INT64_C( 993387458089696321), -INT64_C( 7875943905623213904), -INT64_C( 8987115911504134189) }, { -INT64_C( 7255221087654063751), INT64_C( 277408243563072649), INT64_C( 2222184791255958917), INT64_C( 3674129947223023561) }, { -INT64_C( 6801905061409396332), INT64_C( 3674129947223023561), INT64_C( 0), -INT64_C( 8299657024144356141) } }, { UINT8_C(243), { INT64_C( 6719488093001318769), INT64_C( 5031563122116208577), INT64_C( 5502747754651195338), INT64_C( 5016226946485704585) }, { -INT64_C( 7501241237203293000), INT64_C( 1632757503154820963), INT64_C( 8072992498939214031), -INT64_C( 146445853385879842) }, { -INT64_C( 7884838469744790747), INT64_C( 29164757738420228), -INT64_C( 635351584041266730), INT64_C( 2745120999013501397) }, { INT64_C( 6719488093001318769), INT64_C( 5016226946485704585), INT64_C( 0), INT64_C( 0) } }, { UINT8_C(149), { INT64_C( 3247191766449005215), -INT64_C( 2756097655420591796), -INT64_C( 812360233779036900), -INT64_C( 8373848756298787692) }, { INT64_C( 3612354086010588990), -INT64_C( 312194374599672055), INT64_C( 2016090389376572762), INT64_C( 5468244112393171051) }, { INT64_C( 3225244889499204522), INT64_C( 1565185295299502047), -INT64_C( 7285112056504936636), -INT64_C( 262100129829210363) }, { -INT64_C( 7285112056504936636), INT64_C( 0), -INT64_C( 812360233779036900), INT64_C( 0) } }, { UINT8_C( 80), { INT64_C( 3303846963211111579), -INT64_C( 1834937231741446244), INT64_C( 8157056014514213913), INT64_C( 4379679862844245409) }, { INT64_C( 8558828147917142603), -INT64_C( 3972944873216364377), INT64_C( 4774706904443646615), INT64_C( 1246794260040713839) }, { INT64_C( 4326678953301083322), -INT64_C( 6945939270704839389), INT64_C( 3235753579611525635), -INT64_C( 2624883760608468806) }, { INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0) } }, { UINT8_C( 15), { INT64_C( 4671605531044245309), -INT64_C( 3231077366142294508), INT64_C( 6181116499391991129), INT64_C( 1698510008422385913) }, { INT64_C( 3133269589367469760), INT64_C( 312851377180164930), -INT64_C( 7183002107340789946), -INT64_C( 2877041395869104607) }, { -INT64_C( 5103829115235738903), INT64_C( 4107728110785231697), INT64_C( 989813267296949116), INT64_C( 8591489912988556363) }, { INT64_C( 4671605531044245309), INT64_C( 6181116499391991129), INT64_C( 989813267296949116), -INT64_C( 3231077366142294508) } }, { UINT8_C(245), { INT64_C( 2102376882953221966), INT64_C( 4571798128701811297), INT64_C( 2976659782148788607), INT64_C( 4273687267873698368) }, { INT64_C( 4712812360372829095), INT64_C( 4745291810681092207), INT64_C( 9057184417130546490), INT64_C( 4643254754805836103) }, { INT64_C( 4324564833728563426), INT64_C( 2120266213347572328), -INT64_C( 8182100131426487624), INT64_C( 6168018830397492160) }, { INT64_C( 6168018830397492160), INT64_C( 0), INT64_C( 2976659782148788607), INT64_C( 0) } }, { UINT8_C(188), { INT64_C( 6087957512085640995), -INT64_C( 4249053806497126499), -INT64_C( 4967018007314498054), INT64_C( 2832345271318244190) }, { INT64_C( 4181794141975074714), -INT64_C( 8270981028679893154), INT64_C( 4448695368223179664), INT64_C( 542435735229164123) }, { -INT64_C( 2896444058409123083), INT64_C( 8801018742315702018), INT64_C( 5538925003136434627), -INT64_C( 4497913174193033696) }, { INT64_C( 0), INT64_C( 0), INT64_C( 6087957512085640995), INT64_C( 2832345271318244190) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i a = simde_mm256_loadu_epi64(test_vec[i].a); simde__m256i idx = simde_mm256_loadu_epi64(test_vec[i].idx); simde__m256i b = simde_mm256_loadu_epi64(test_vec[i].b); simde__m256i r = simde_mm256_maskz_permutex2var_epi64(test_vec[i].k, a, idx, b); simde_test_x86_assert_equal_i64x4(r, simde_mm256_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256i a = simde_test_x86_random_i64x4(); simde__m256i idx = simde_test_x86_random_i64x4(); simde__m256i b = simde_test_x86_random_i64x4(); simde__m256i r = simde_mm256_maskz_permutex2var_epi64(k, a, idx, b); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x4(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_permutex2var_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t a[32]; const int8_t idx[32]; const int8_t b[32]; const int8_t r[32]; } test_vec[] = { { { INT8_C( 52), INT8_C( 20), INT8_C( 74), INT8_C( 119), INT8_C( 101), INT8_C( 73), INT8_C( 52), INT8_C( 74), INT8_C( 78), INT8_C( 78), -INT8_C( 28), -INT8_C( 65), -INT8_C( 125), -INT8_C( 4), INT8_C( 43), INT8_C( 80), INT8_C( 68), -INT8_C( 17), -INT8_C( 46), INT8_C( 34), -INT8_C( 97), INT8_C( 38), -INT8_C( 48), -INT8_C( 123), -INT8_C( 88), INT8_C( 83), INT8_C( 108), INT8_C( 60), INT8_C( 79), -INT8_C( 125), -INT8_C( 93), -INT8_C( 125) }, { -INT8_C( 105), -INT8_C( 19), -INT8_C( 6), -INT8_C( 3), INT8_C( 55), INT8_C( 46), INT8_C( 71), -INT8_C( 123), INT8_C( 124), INT8_C( 44), INT8_C( 68), INT8_C( 0), INT8_C( 40), INT8_C( 111), INT8_C( 80), INT8_C( 108), INT8_C( 94), INT8_C( 34), -INT8_C( 114), -INT8_C( 2), INT8_C( 72), INT8_C( 94), -INT8_C( 125), -INT8_C( 16), -INT8_C( 79), -INT8_C( 17), INT8_C( 44), INT8_C( 0), INT8_C( 115), -INT8_C( 48), -INT8_C( 125), INT8_C( 10) }, { -INT8_C( 67), INT8_C( 126), INT8_C( 7), -INT8_C( 12), -INT8_C( 84), INT8_C( 79), INT8_C( 121), INT8_C( 41), INT8_C( 123), -INT8_C( 66), INT8_C( 41), -INT8_C( 93), INT8_C( 45), INT8_C( 121), INT8_C( 15), -INT8_C( 116), -INT8_C( 101), -INT8_C( 98), -INT8_C( 118), -INT8_C( 29), -INT8_C( 4), INT8_C( 13), -INT8_C( 45), -INT8_C( 82), -INT8_C( 3), -INT8_C( 1), -INT8_C( 82), INT8_C( 112), -INT8_C( 49), INT8_C( 50), INT8_C( 122), -INT8_C( 115) }, { -INT8_C( 123), INT8_C( 121), -INT8_C( 82), INT8_C( 50), -INT8_C( 82), INT8_C( 15), INT8_C( 74), INT8_C( 73), -INT8_C( 49), INT8_C( 45), INT8_C( 101), INT8_C( 52), INT8_C( 123), -INT8_C( 116), INT8_C( 68), INT8_C( 45), -INT8_C( 93), INT8_C( 7), INT8_C( 43), INT8_C( 122), INT8_C( 78), -INT8_C( 93), INT8_C( 119), -INT8_C( 101), -INT8_C( 98), -INT8_C( 116), INT8_C( 45), INT8_C( 52), -INT8_C( 29), INT8_C( 68), INT8_C( 119), -INT8_C( 28) } }, { { -INT8_C( 80), -INT8_C( 126), -INT8_C( 127), INT8_C( 92), -INT8_C( 47), -INT8_C( 5), -INT8_C( 123), INT8_C( 76), -INT8_C( 71), -INT8_C( 82), -INT8_C( 17), -INT8_C( 26), INT8_C( 39), -INT8_C( 2), INT8_C( 114), -INT8_C( 62), -INT8_C( 100), -INT8_C( 4), -INT8_C( 91), -INT8_C( 103), INT8_C( 10), INT8_C( 120), INT8_C( 71), INT8_C( 7), INT8_C( 120), -INT8_C( 11), INT8_C( 119), INT8_C( 71), INT8_C( 39), -INT8_C( 15), -INT8_C( 44), -INT8_C( 41) }, { INT8_C( 115), INT8_C( 86), INT8_C( 52), INT8_C( 68), INT8_C( 81), -INT8_C( 71), -INT8_C( 112), INT8_C( 10), INT8_C( 104), INT8_MAX, -INT8_C( 16), -INT8_C( 113), INT8_C( 126), INT8_C( 99), INT8_C( 82), INT8_C( 26), INT8_C( 95), -INT8_C( 9), -INT8_C( 77), INT8_C( 105), INT8_C( 112), -INT8_C( 6), INT8_C( 112), -INT8_C( 24), -INT8_C( 16), -INT8_C( 25), INT8_C( 47), INT8_C( 23), -INT8_C( 39), INT8_C( 4), -INT8_C( 17), INT8_C( 76) }, { INT8_C( 90), INT8_C( 35), -INT8_C( 111), -INT8_C( 85), -INT8_C( 36), INT8_C( 33), -INT8_C( 75), INT8_C( 68), -INT8_C( 95), -INT8_C( 91), -INT8_C( 44), INT8_C( 31), INT8_C( 8), INT8_C( 38), INT8_C( 57), INT8_C( 104), INT8_C( 29), -INT8_C( 19), -INT8_C( 47), -INT8_C( 115), -INT8_C( 25), INT8_C( 66), INT8_C( 117), -INT8_C( 41), INT8_C( 41), -INT8_C( 91), -INT8_C( 17), INT8_C( 2), -INT8_C( 87), -INT8_C( 34), INT8_C( 79), INT8_C( 3) }, { -INT8_C( 115), INT8_C( 71), -INT8_C( 25), -INT8_C( 47), -INT8_C( 4), -INT8_C( 91), -INT8_C( 100), -INT8_C( 17), -INT8_C( 95), INT8_C( 3), INT8_C( 29), -INT8_C( 62), INT8_C( 79), -INT8_C( 85), -INT8_C( 91), INT8_C( 119), -INT8_C( 41), -INT8_C( 41), -INT8_C( 115), -INT8_C( 91), INT8_C( 29), -INT8_C( 17), INT8_C( 29), -INT8_C( 95), INT8_C( 29), INT8_C( 68), INT8_C( 104), INT8_C( 7), -INT8_C( 11), -INT8_C( 47), INT8_C( 104), INT8_C( 39) } }, { { INT8_C( 1), -INT8_C( 32), -INT8_C( 82), -INT8_C( 35), INT8_C( 1), INT8_C( 99), INT8_C( 34), -INT8_C( 94), INT8_C( 8), -INT8_C( 10), -INT8_C( 63), INT8_C( 17), INT8_C( 28), -INT8_C( 5), INT8_C( 121), INT8_C( 57), -INT8_C( 24), INT8_C( 74), -INT8_C( 57), -INT8_C( 49), -INT8_C( 116), INT8_C( 60), -INT8_C( 89), -INT8_C( 74), -INT8_C( 31), -INT8_C( 106), -INT8_C( 72), -INT8_C( 118), INT8_C( 116), INT8_C( 7), -INT8_C( 115), INT8_C( 117) }, { -INT8_C( 25), INT8_C( 59), INT8_C( 82), -INT8_C( 23), -INT8_C( 98), INT8_C( 116), -INT8_C( 117), -INT8_C( 89), INT8_C( 106), INT8_C( 77), -INT8_C( 72), -INT8_C( 122), INT8_C( 72), INT8_C( 49), -INT8_C( 64), INT8_C( 48), INT8_C( 123), -INT8_C( 121), -INT8_C( 1), INT8_C( 8), -INT8_C( 61), -INT8_C( 90), -INT8_C( 66), -INT8_C( 91), INT8_C( 60), INT8_C( 118), INT8_C( 47), -INT8_C( 80), INT8_C( 126), -INT8_C( 67), INT8_C( 37), INT8_C( 101) }, { -INT8_C( 8), INT8_C( 120), INT8_C( 78), -INT8_C( 105), -INT8_C( 20), -INT8_C( 38), INT8_C( 62), INT8_C( 87), INT8_C( 39), -INT8_C( 10), -INT8_C( 35), INT8_C( 111), INT8_C( 39), -INT8_C( 99), -INT8_C( 97), -INT8_C( 94), INT8_C( 36), -INT8_C( 98), -INT8_C( 86), -INT8_C( 24), INT8_C( 69), INT8_C( 104), -INT8_C( 115), -INT8_C( 127), -INT8_C( 33), -INT8_C( 68), INT8_C( 50), INT8_C( 93), INT8_C( 121), INT8_C( 87), -INT8_C( 62), INT8_C( 114) }, { INT8_C( 87), INT8_C( 93), -INT8_C( 57), -INT8_C( 10), -INT8_C( 115), INT8_C( 69), INT8_C( 17), INT8_C( 87), -INT8_C( 35), -INT8_C( 5), -INT8_C( 33), INT8_C( 34), INT8_C( 8), -INT8_C( 98), INT8_C( 1), INT8_C( 36), INT8_C( 93), -INT8_C( 94), INT8_C( 114), INT8_C( 8), -INT8_C( 35), INT8_C( 62), -INT8_C( 62), -INT8_C( 38), INT8_C( 121), -INT8_C( 115), -INT8_C( 94), INT8_C( 36), -INT8_C( 62), INT8_C( 87), -INT8_C( 38), -INT8_C( 38) } }, { { -INT8_C( 49), INT8_C( 17), INT8_C( 9), -INT8_C( 68), -INT8_C( 21), INT8_C( 71), INT8_C( 19), INT8_C( 18), INT8_C( 61), -INT8_C( 16), -INT8_C( 127), INT8_C( 100), -INT8_C( 114), INT8_C( 32), INT8_C( 6), -INT8_C( 78), -INT8_C( 66), -INT8_C( 79), -INT8_C( 102), INT8_C( 3), INT8_C( 25), INT8_C( 39), -INT8_C( 123), -INT8_C( 8), -INT8_C( 28), -INT8_C( 73), INT8_C( 85), INT8_C( 93), INT8_C( 14), INT8_C( 24), -INT8_C( 49), -INT8_C( 34) }, { INT8_C( 41), -INT8_C( 40), -INT8_C( 102), INT8_C( 20), INT8_C( 31), -INT8_C( 83), INT8_C( 38), INT8_C( 92), -INT8_C( 99), -INT8_C( 89), -INT8_C( 64), INT8_C( 43), -INT8_C( 57), -INT8_C( 57), -INT8_C( 34), -INT8_C( 123), INT8_C( 120), INT8_C( 120), -INT8_C( 119), -INT8_C( 111), -INT8_C( 96), INT8_C( 14), -INT8_C( 118), -INT8_C( 124), -INT8_C( 59), -INT8_C( 33), -INT8_C( 31), -INT8_C( 45), -INT8_C( 9), -INT8_C( 79), -INT8_C( 79), INT8_C( 32) }, { -INT8_C( 119), INT8_C( 75), INT8_C( 52), -INT8_C( 87), -INT8_C( 8), INT8_C( 90), INT8_C( 5), -INT8_C( 106), INT8_C( 1), -INT8_C( 58), -INT8_C( 63), -INT8_C( 56), -INT8_C( 115), -INT8_C( 97), INT8_C( 78), INT8_C( 5), INT8_C( 24), -INT8_C( 41), -INT8_C( 106), -INT8_C( 72), -INT8_C( 27), INT8_C( 32), INT8_C( 60), -INT8_C( 86), INT8_C( 0), INT8_C( 29), INT8_C( 125), -INT8_C( 9), -INT8_C( 50), INT8_C( 47), INT8_C( 24), INT8_C( 88) }, { -INT8_C( 58), -INT8_C( 28), INT8_C( 85), INT8_C( 25), -INT8_C( 34), -INT8_C( 97), INT8_C( 5), INT8_C( 14), INT8_C( 24), -INT8_C( 106), -INT8_C( 49), -INT8_C( 56), INT8_C( 18), INT8_C( 18), -INT8_C( 49), INT8_C( 71), INT8_C( 0), INT8_C( 0), -INT8_C( 16), -INT8_C( 79), -INT8_C( 119), INT8_C( 6), -INT8_C( 127), -INT8_C( 21), INT8_C( 71), -INT8_C( 34), INT8_C( 75), INT8_C( 3), -INT8_C( 86), -INT8_C( 41), -INT8_C( 41), -INT8_C( 119) } }, { { INT8_C( 122), INT8_C( 76), INT8_C( 1), INT8_C( 115), -INT8_C( 89), INT8_C( 6), INT8_C( 9), -INT8_C( 88), -INT8_C( 52), -INT8_C( 54), INT8_C( 113), INT8_C( 89), INT8_C( 106), -INT8_C( 65), INT8_C( 94), -INT8_C( 126), -INT8_C( 106), -INT8_C( 11), INT8_C( 58), INT8_C( 123), INT8_C( 21), INT8_C( 118), INT8_C( 37), INT8_C( 21), -INT8_C( 109), -INT8_C( 94), INT8_C( 13), INT8_C( 98), -INT8_C( 47), INT8_C( 37), -INT8_C( 70), INT8_C( 76) }, { INT8_C( 113), -INT8_C( 69), -INT8_C( 65), INT8_C( 24), -INT8_C( 63), -INT8_C( 56), -INT8_C( 63), -INT8_C( 114), -INT8_C( 110), INT8_C( 50), -INT8_C( 25), -INT8_C( 4), -INT8_C( 15), INT8_C( 70), INT8_C( 126), -INT8_C( 121), INT8_C( 59), -INT8_C( 72), INT8_C( 2), INT8_C( 80), INT8_C( 46), INT8_C( 39), INT8_C( 102), -INT8_C( 62), -INT8_C( 55), INT8_C( 115), INT8_C( 36), -INT8_C( 101), -INT8_C( 104), -INT8_C( 34), -INT8_C( 25), INT8_C( 9) }, { -INT8_C( 103), -INT8_C( 90), INT8_C( 34), INT8_C( 90), INT8_C( 110), -INT8_C( 29), -INT8_C( 24), INT8_C( 0), INT8_C( 21), -INT8_C( 48), -INT8_C( 3), INT8_C( 6), INT8_C( 22), INT8_C( 123), -INT8_C( 115), INT8_C( 81), INT8_C( 52), -INT8_C( 113), -INT8_C( 95), INT8_C( 98), -INT8_C( 74), INT8_C( 7), INT8_C( 36), INT8_MAX, INT8_C( 122), INT8_C( 72), INT8_C( 26), INT8_C( 18), INT8_C( 38), INT8_C( 1), INT8_C( 28), -INT8_C( 65) }, { -INT8_C( 113), INT8_C( 18), -INT8_C( 65), -INT8_C( 109), INT8_C( 76), -INT8_C( 52), INT8_C( 76), INT8_C( 94), INT8_C( 58), -INT8_C( 95), INT8_C( 0), INT8_C( 38), -INT8_C( 113), INT8_C( 9), INT8_C( 28), -INT8_C( 88), INT8_C( 18), INT8_C( 122), INT8_C( 1), -INT8_C( 106), -INT8_C( 115), INT8_C( 0), -INT8_C( 24), INT8_C( 1), -INT8_C( 54), INT8_C( 98), INT8_C( 110), INT8_C( 98), -INT8_C( 109), -INT8_C( 70), INT8_C( 0), -INT8_C( 54) } }, { { -INT8_C( 89), INT8_C( 62), INT8_C( 26), INT8_C( 21), INT8_C( 33), INT8_C( 2), INT8_C( 22), INT8_C( 54), -INT8_C( 46), INT8_C( 19), INT8_C( 60), -INT8_C( 24), -INT8_C( 114), -INT8_C( 55), INT8_C( 57), -INT8_C( 62), INT8_C( 88), -INT8_C( 37), INT8_C( 37), INT8_C( 14), -INT8_C( 30), INT8_C( 73), -INT8_C( 115), INT8_C( 93), -INT8_C( 110), -INT8_C( 88), INT8_C( 111), -INT8_C( 72), -INT8_C( 87), -INT8_C( 117), INT8_C( 120), INT8_C( 81) }, { -INT8_C( 55), -INT8_C( 110), INT8_C( 102), -INT8_C( 22), -INT8_C( 108), INT8_C( 124), INT8_C( 32), INT8_C( 103), -INT8_C( 113), INT8_C( 92), INT8_C( 79), INT8_C( 30), INT8_C( 37), -INT8_C( 119), -INT8_C( 32), INT8_C( 125), INT8_C( 100), INT8_C( 5), -INT8_C( 117), INT8_C( 70), INT8_C( 79), INT8_C( 25), -INT8_C( 93), -INT8_C( 31), -INT8_C( 63), INT8_C( 19), -INT8_C( 103), INT8_C( 106), -INT8_C( 98), INT8_C( 17), -INT8_C( 69), INT8_C( 104) }, { -INT8_C( 93), INT8_C( 34), INT8_C( 82), INT8_C( 56), -INT8_C( 98), INT8_C( 115), -INT8_C( 97), INT8_C( 46), -INT8_C( 49), -INT8_C( 18), INT8_C( 76), -INT8_C( 11), INT8_C( 119), INT8_C( 44), INT8_C( 114), -INT8_C( 37), INT8_C( 50), -INT8_C( 2), INT8_C( 34), -INT8_C( 127), INT8_C( 23), -INT8_C( 59), INT8_C( 98), -INT8_C( 40), -INT8_C( 40), -INT8_C( 5), INT8_C( 66), INT8_C( 119), INT8_C( 13), -INT8_C( 2), -INT8_C( 33), -INT8_C( 80) }, { INT8_C( 19), INT8_C( 37), -INT8_C( 97), INT8_C( 76), -INT8_C( 30), INT8_C( 13), -INT8_C( 93), INT8_C( 46), -INT8_C( 62), -INT8_C( 87), -INT8_C( 62), INT8_C( 120), INT8_C( 115), INT8_C( 19), -INT8_C( 93), -INT8_C( 2), -INT8_C( 98), INT8_C( 2), -INT8_C( 24), INT8_C( 22), -INT8_C( 62), -INT8_C( 88), INT8_C( 56), INT8_C( 34), INT8_C( 62), INT8_C( 14), -INT8_C( 88), INT8_C( 76), INT8_C( 120), -INT8_C( 37), INT8_C( 119), -INT8_C( 49) } }, { { INT8_C( 32), INT8_C( 49), -INT8_C( 24), -INT8_C( 66), -INT8_C( 92), -INT8_C( 121), -INT8_C( 20), INT8_C( 116), INT8_C( 118), INT8_C( 56), INT8_C( 105), -INT8_C( 19), INT8_C( 101), -INT8_C( 37), -INT8_C( 55), -INT8_C( 105), -INT8_C( 39), -INT8_C( 21), INT8_C( 24), -INT8_C( 16), -INT8_C( 80), INT8_C( 122), -INT8_C( 56), -INT8_C( 119), INT8_C( 117), INT8_C( 11), INT8_C( 0), -INT8_C( 126), INT8_C( 9), -INT8_C( 33), INT8_C( 51), INT8_C( 41) }, { INT8_C( 16), INT8_C( 27), -INT8_C( 25), -INT8_C( 75), -INT8_C( 93), -INT8_C( 44), INT8_C( 41), INT8_C( 25), INT8_C( 12), -INT8_C( 110), INT8_C( 6), INT8_C( 113), INT8_C( 109), -INT8_C( 49), INT8_C( 8), INT8_C( 71), -INT8_C( 70), INT8_C( 32), INT8_C( 55), INT8_C( 107), -INT8_C( 102), INT8_C( 0), -INT8_C( 12), INT8_C( 16), INT8_C( 11), -INT8_C( 12), -INT8_C( 110), INT8_C( 20), -INT8_C( 45), -INT8_C( 59), INT8_C( 61), -INT8_C( 29) }, { -INT8_C( 31), INT8_C( 36), -INT8_C( 104), -INT8_C( 124), -INT8_C( 8), -INT8_C( 63), -INT8_C( 99), INT8_C( 5), INT8_C( 83), -INT8_C( 93), INT8_C( 118), -INT8_C( 63), INT8_C( 115), INT8_MAX, INT8_C( 8), INT8_C( 45), -INT8_C( 97), INT8_C( 63), -INT8_C( 104), INT8_C( 58), INT8_C( 63), -INT8_C( 116), INT8_C( 74), INT8_C( 74), INT8_MIN, -INT8_C( 36), INT8_C( 94), INT8_C( 83), -INT8_C( 94), -INT8_C( 101), INT8_C( 55), -INT8_C( 125) }, { -INT8_C( 39), -INT8_C( 126), INT8_C( 5), -INT8_C( 116), -INT8_C( 124), -INT8_C( 80), -INT8_C( 93), INT8_C( 11), INT8_C( 101), INT8_C( 24), -INT8_C( 20), INT8_C( 63), INT8_MAX, -INT8_C( 105), INT8_C( 118), INT8_C( 116), INT8_C( 94), -INT8_C( 31), INT8_C( 74), -INT8_C( 63), INT8_C( 0), INT8_C( 32), INT8_C( 63), -INT8_C( 39), -INT8_C( 19), INT8_C( 63), INT8_C( 24), -INT8_C( 80), -INT8_C( 16), -INT8_C( 121), -INT8_C( 101), -INT8_C( 124) } }, { { -INT8_C( 64), -INT8_C( 49), INT8_C( 7), -INT8_C( 72), -INT8_C( 111), -INT8_C( 92), -INT8_C( 67), -INT8_C( 28), INT8_C( 71), INT8_C( 52), -INT8_C( 91), -INT8_C( 70), -INT8_C( 77), -INT8_C( 83), -INT8_C( 24), INT8_C( 82), -INT8_C( 19), INT8_MIN, -INT8_C( 116), INT8_C( 44), INT8_C( 13), -INT8_C( 42), INT8_C( 119), -INT8_C( 115), -INT8_C( 77), -INT8_C( 43), -INT8_C( 31), INT8_C( 85), INT8_C( 113), INT8_C( 24), -INT8_C( 40), INT8_C( 49) }, { -INT8_C( 25), -INT8_C( 33), -INT8_C( 23), INT8_C( 120), -INT8_C( 125), -INT8_C( 89), INT8_C( 93), -INT8_C( 54), -INT8_C( 37), INT8_C( 2), -INT8_C( 123), -INT8_C( 114), -INT8_C( 80), INT8_C( 109), -INT8_C( 32), -INT8_C( 99), -INT8_C( 19), INT8_C( 109), -INT8_C( 55), -INT8_C( 6), INT8_C( 67), INT8_C( 64), -INT8_C( 120), -INT8_C( 10), INT8_C( 22), INT8_C( 105), INT8_C( 75), -INT8_C( 121), -INT8_C( 127), INT8_C( 35), -INT8_C( 72), INT8_C( 104) }, { INT8_C( 2), -INT8_C( 95), -INT8_C( 31), -INT8_C( 123), INT8_C( 72), INT8_C( 62), INT8_C( 80), INT8_C( 35), INT8_C( 64), -INT8_C( 43), -INT8_C( 79), -INT8_C( 16), INT8_C( 66), -INT8_C( 110), -INT8_C( 115), INT8_C( 47), -INT8_C( 1), INT8_C( 87), INT8_C( 42), INT8_C( 66), -INT8_C( 105), -INT8_C( 78), INT8_C( 57), -INT8_C( 83), INT8_C( 27), -INT8_C( 124), INT8_C( 52), -INT8_C( 100), -INT8_C( 88), -INT8_C( 20), INT8_C( 4), -INT8_C( 86) }, { INT8_C( 35), INT8_C( 49), -INT8_C( 43), INT8_C( 27), -INT8_C( 72), INT8_C( 35), INT8_C( 24), -INT8_C( 91), INT8_C( 85), INT8_C( 7), -INT8_C( 92), -INT8_C( 24), -INT8_C( 1), -INT8_C( 110), INT8_C( 2), INT8_C( 24), -INT8_C( 110), -INT8_C( 110), INT8_C( 52), INT8_C( 52), -INT8_C( 72), -INT8_C( 64), INT8_C( 71), INT8_C( 57), INT8_C( 119), -INT8_C( 43), -INT8_C( 70), -INT8_C( 28), -INT8_C( 49), -INT8_C( 123), INT8_C( 27), INT8_C( 64) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_mm256_loadu_epi8(test_vec[i].a); simde__m256i idx = simde_mm256_loadu_epi8(test_vec[i].idx); simde__m256i b = simde_mm256_loadu_epi8(test_vec[i].b); simde__m256i r = simde_mm256_permutex2var_epi8(a, idx, b); simde_test_x86_assert_equal_i8x32(r, simde_mm256_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i a = simde_test_x86_random_i8x32(); simde__m256i idx = simde_test_x86_random_i8x32(); simde__m256i b = simde_test_x86_random_i8x32(); simde__m256i r = simde_mm256_permutex2var_epi8(a, idx, b); simde_test_x86_write_i8x32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x32(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask_permutex2var_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t a[32]; const simde__mmask32 k; const int8_t idx[32]; const int8_t b[32]; const int8_t r[32]; } test_vec[] = { { { -INT8_C( 114), -INT8_C( 27), INT8_C( 48), -INT8_C( 42), INT8_C( 35), INT8_MIN, -INT8_C( 6), INT8_C( 100), INT8_C( 85), -INT8_C( 85), INT8_C( 84), -INT8_C( 105), INT8_C( 61), -INT8_C( 30), -INT8_C( 58), INT8_C( 60), INT8_C( 57), -INT8_C( 16), INT8_MAX, -INT8_C( 48), -INT8_C( 94), -INT8_C( 72), INT8_C( 126), -INT8_C( 67), INT8_C( 60), -INT8_C( 78), INT8_C( 89), -INT8_C( 28), -INT8_C( 97), INT8_C( 94), -INT8_C( 113), INT8_C( 45) }, UINT32_C(1728298819), { INT8_C( 63), -INT8_C( 3), -INT8_C( 53), -INT8_C( 108), -INT8_C( 87), INT8_C( 31), INT8_C( 43), -INT8_C( 26), INT8_C( 1), -INT8_C( 15), INT8_C( 35), INT8_C( 58), -INT8_C( 30), -INT8_C( 94), INT8_C( 11), -INT8_C( 124), INT8_C( 90), -INT8_C( 119), INT8_C( 66), -INT8_C( 106), INT8_C( 59), -INT8_C( 101), INT8_C( 123), -INT8_C( 38), -INT8_C( 7), INT8_C( 10), INT8_C( 7), INT8_C( 61), -INT8_C( 55), INT8_C( 11), -INT8_C( 92), INT8_C( 8) }, { INT8_C( 8), INT8_C( 111), -INT8_C( 100), -INT8_C( 79), -INT8_C( 114), -INT8_C( 57), -INT8_C( 104), -INT8_C( 112), -INT8_C( 72), -INT8_C( 69), -INT8_C( 54), -INT8_C( 102), INT8_C( 93), -INT8_C( 43), INT8_C( 31), -INT8_C( 73), INT8_C( 94), INT8_C( 97), INT8_C( 77), -INT8_C( 102), -INT8_C( 4), -INT8_C( 56), INT8_C( 116), -INT8_C( 10), -INT8_C( 46), INT8_C( 124), INT8_C( 51), -INT8_C( 101), -INT8_C( 121), -INT8_C( 41), -INT8_C( 93), -INT8_C( 113) }, { -INT8_C( 113), -INT8_C( 41), INT8_C( 48), -INT8_C( 42), INT8_C( 35), INT8_MIN, -INT8_C( 102), INT8_C( 100), -INT8_C( 27), INT8_C( 97), -INT8_C( 79), INT8_C( 51), -INT8_C( 100), -INT8_C( 100), -INT8_C( 58), INT8_C( 35), INT8_C( 89), -INT8_C( 85), INT8_MAX, -INT8_C( 48), -INT8_C( 94), -INT8_C( 72), INT8_C( 126), -INT8_C( 67), INT8_C( 124), INT8_C( 84), INT8_C( 100), -INT8_C( 28), -INT8_C( 97), -INT8_C( 105), -INT8_C( 114), INT8_C( 45) } }, { { INT8_C( 70), INT8_C( 63), INT8_C( 65), -INT8_C( 44), INT8_C( 6), -INT8_C( 39), INT8_C( 100), -INT8_C( 65), -INT8_C( 108), INT8_C( 47), INT8_C( 89), -INT8_C( 15), INT8_C( 4), INT8_C( 120), -INT8_C( 88), INT8_C( 99), -INT8_C( 39), -INT8_C( 11), -INT8_C( 3), -INT8_C( 42), -INT8_C( 66), INT8_C( 113), -INT8_C( 52), -INT8_C( 112), -INT8_C( 19), -INT8_C( 1), INT8_C( 44), INT8_C( 116), -INT8_C( 42), -INT8_C( 49), INT8_C( 4), INT8_C( 28) }, UINT32_C( 368067855), { INT8_C( 30), INT8_C( 85), -INT8_C( 44), -INT8_C( 78), -INT8_C( 124), INT8_C( 46), -INT8_C( 93), -INT8_C( 120), -INT8_C( 90), INT8_C( 75), -INT8_C( 21), INT8_MIN, INT8_C( 64), -INT8_C( 24), INT8_C( 86), -INT8_C( 2), INT8_C( 90), INT8_C( 34), -INT8_C( 113), INT8_C( 71), INT8_C( 33), -INT8_C( 69), -INT8_C( 68), -INT8_C( 9), -INT8_C( 118), -INT8_C( 64), INT8_C( 19), -INT8_C( 103), INT8_C( 5), INT8_C( 3), -INT8_C( 81), INT8_C( 35) }, { INT8_C( 88), -INT8_C( 125), -INT8_C( 43), -INT8_C( 36), -INT8_C( 79), INT8_C( 120), INT8_C( 101), INT8_C( 88), -INT8_C( 61), INT8_C( 80), -INT8_C( 40), INT8_C( 3), INT8_C( 57), INT8_C( 46), INT8_C( 2), -INT8_C( 109), INT8_C( 80), -INT8_C( 111), -INT8_C( 38), INT8_C( 113), INT8_C( 76), -INT8_C( 106), INT8_C( 104), -INT8_C( 42), INT8_C( 86), INT8_C( 123), INT8_C( 112), INT8_C( 91), INT8_C( 126), INT8_C( 31), INT8_C( 126), -INT8_C( 41) }, { INT8_C( 4), INT8_C( 113), -INT8_C( 66), -INT8_C( 38), INT8_C( 6), -INT8_C( 39), INT8_C( 100), -INT8_C( 65), INT8_C( 101), INT8_C( 47), INT8_C( 3), -INT8_C( 15), INT8_C( 4), INT8_C( 120), -INT8_C( 52), INT8_C( 99), -INT8_C( 39), -INT8_C( 11), -INT8_C( 3), -INT8_C( 42), -INT8_C( 125), INT8_C( 91), INT8_C( 126), -INT8_C( 42), INT8_C( 89), -INT8_C( 1), -INT8_C( 42), INT8_C( 116), -INT8_C( 39), -INT8_C( 49), INT8_C( 4), INT8_C( 28) } }, { { -INT8_C( 94), INT8_C( 83), -INT8_C( 77), INT8_C( 84), -INT8_C( 53), INT8_C( 24), -INT8_C( 84), -INT8_C( 114), INT8_C( 105), -INT8_C( 124), -INT8_C( 110), -INT8_C( 94), -INT8_C( 78), -INT8_C( 108), INT8_C( 53), INT8_C( 2), INT8_C( 37), INT8_C( 15), INT8_C( 115), INT8_C( 113), -INT8_C( 90), -INT8_C( 37), INT8_C( 71), -INT8_C( 4), INT8_C( 86), -INT8_C( 73), INT8_C( 88), -INT8_C( 44), -INT8_C( 42), -INT8_C( 42), -INT8_C( 85), INT8_C( 121) }, UINT32_C(4123877162), { INT8_C( 119), INT8_C( 121), -INT8_C( 124), -INT8_C( 32), -INT8_C( 3), INT8_C( 22), -INT8_C( 126), -INT8_C( 81), -INT8_C( 86), -INT8_C( 73), -INT8_C( 79), -INT8_C( 49), -INT8_C( 57), INT8_C( 36), INT8_C( 64), INT8_C( 109), -INT8_C( 1), -INT8_C( 121), INT8_C( 105), INT8_C( 85), INT8_C( 63), -INT8_C( 63), INT8_C( 41), INT8_C( 21), -INT8_C( 104), -INT8_C( 43), -INT8_C( 114), -INT8_C( 62), INT8_C( 52), INT8_C( 91), -INT8_C( 73), -INT8_C( 85) }, { -INT8_C( 44), INT8_C( 59), -INT8_C( 116), -INT8_C( 47), INT8_C( 81), INT8_C( 14), INT8_MIN, -INT8_C( 5), -INT8_C( 58), INT8_C( 49), -INT8_C( 54), -INT8_C( 115), INT8_C( 85), INT8_C( 10), -INT8_C( 6), INT8_C( 84), -INT8_C( 110), INT8_C( 99), -INT8_C( 87), -INT8_C( 47), INT8_C( 37), -INT8_C( 45), -INT8_C( 26), -INT8_C( 67), -INT8_C( 88), INT8_C( 117), INT8_MAX, -INT8_C( 36), -INT8_C( 48), INT8_C( 54), -INT8_C( 121), -INT8_C( 91) }, { -INT8_C( 94), INT8_C( 117), -INT8_C( 77), -INT8_C( 44), -INT8_C( 53), INT8_C( 71), -INT8_C( 84), -INT8_C( 114), -INT8_C( 54), -INT8_C( 67), INT8_C( 99), INT8_C( 2), -INT8_C( 114), -INT8_C( 108), -INT8_C( 94), INT8_C( 2), -INT8_C( 91), INT8_C( 15), INT8_C( 49), -INT8_C( 37), -INT8_C( 90), -INT8_C( 37), INT8_C( 49), -INT8_C( 37), INT8_C( 86), -INT8_C( 73), INT8_C( 53), -INT8_C( 44), INT8_C( 37), -INT8_C( 44), -INT8_C( 67), -INT8_C( 115) } }, { { INT8_C( 114), INT8_C( 19), INT8_C( 118), -INT8_C( 61), INT8_C( 34), -INT8_C( 9), -INT8_C( 65), -INT8_C( 24), INT8_C( 40), -INT8_C( 119), INT8_C( 117), INT8_C( 126), -INT8_C( 108), INT8_C( 111), -INT8_C( 46), INT8_C( 38), -INT8_C( 46), INT8_C( 124), -INT8_C( 9), -INT8_C( 9), INT8_C( 79), -INT8_C( 35), -INT8_C( 76), -INT8_C( 9), INT8_C( 82), INT8_C( 51), -INT8_C( 45), INT8_C( 35), INT8_C( 106), INT8_C( 90), -INT8_C( 56), -INT8_C( 36) }, UINT32_C(2426355310), { INT8_C( 53), INT8_C( 94), INT8_C( 120), INT8_C( 94), -INT8_C( 24), -INT8_C( 19), -INT8_C( 36), INT8_C( 124), INT8_C( 92), -INT8_C( 82), -INT8_C( 94), INT8_C( 46), INT8_C( 42), -INT8_C( 103), INT8_C( 38), INT8_C( 121), INT8_C( 118), -INT8_C( 38), INT8_C( 112), -INT8_C( 55), INT8_C( 14), INT8_C( 67), -INT8_C( 20), INT8_C( 120), -INT8_C( 98), -INT8_C( 76), INT8_C( 84), INT8_C( 12), -INT8_C( 14), -INT8_C( 13), -INT8_C( 100), INT8_C( 40) }, { INT8_C( 82), INT8_C( 20), -INT8_C( 122), INT8_C( 58), INT8_C( 1), INT8_C( 98), -INT8_C( 74), INT8_C( 93), INT8_C( 16), INT8_C( 88), -INT8_C( 117), INT8_C( 59), -INT8_C( 15), -INT8_C( 79), -INT8_C( 76), INT8_C( 103), -INT8_C( 116), INT8_C( 37), INT8_C( 48), -INT8_C( 102), INT8_C( 104), INT8_C( 28), INT8_C( 18), INT8_C( 6), -INT8_C( 48), INT8_C( 102), INT8_C( 18), -INT8_C( 61), INT8_C( 89), -INT8_C( 82), -INT8_C( 21), -INT8_C( 85) }, { INT8_C( 114), -INT8_C( 56), -INT8_C( 48), -INT8_C( 56), INT8_C( 34), -INT8_C( 79), INT8_C( 106), -INT8_C( 24), INT8_C( 40), -INT8_C( 76), -INT8_C( 122), -INT8_C( 76), -INT8_C( 117), INT8_C( 51), -INT8_C( 46), INT8_C( 38), INT8_C( 18), -INT8_C( 45), -INT8_C( 116), -INT8_C( 119), -INT8_C( 46), -INT8_C( 35), -INT8_C( 76), -INT8_C( 48), INT8_C( 82), INT8_C( 51), -INT8_C( 45), INT8_C( 35), INT8_C( 48), INT8_C( 90), -INT8_C( 56), INT8_C( 16) } }, { { -INT8_C( 62), INT8_C( 113), -INT8_C( 27), -INT8_C( 61), -INT8_C( 45), -INT8_C( 101), INT8_C( 32), -INT8_C( 29), -INT8_C( 13), -INT8_C( 84), INT8_C( 30), -INT8_C( 28), INT8_C( 93), -INT8_C( 45), INT8_C( 76), -INT8_C( 23), -INT8_C( 8), INT8_C( 124), -INT8_C( 125), INT8_C( 96), -INT8_C( 103), -INT8_C( 107), INT8_C( 103), INT8_C( 105), -INT8_C( 5), INT8_C( 121), INT8_C( 44), INT8_C( 85), INT8_C( 40), INT8_C( 23), INT8_C( 0), -INT8_C( 22) }, UINT32_C(1538188936), { -INT8_C( 127), -INT8_C( 50), INT8_C( 63), INT8_C( 117), INT8_C( 122), INT8_C( 93), INT8_C( 89), -INT8_C( 40), INT8_C( 48), -INT8_C( 91), -INT8_C( 63), INT8_C( 40), INT8_C( 34), INT8_C( 69), -INT8_C( 119), -INT8_C( 69), -INT8_C( 38), -INT8_C( 16), INT8_C( 36), -INT8_C( 42), INT8_C( 105), INT8_C( 81), INT8_C( 43), -INT8_C( 111), INT8_C( 104), INT8_C( 43), INT8_C( 124), -INT8_C( 15), INT8_C( 17), INT8_C( 42), INT8_C( 76), -INT8_C( 109) }, { -INT8_C( 8), -INT8_C( 117), INT8_C( 8), INT8_C( 115), -INT8_C( 23), INT8_C( 97), INT8_C( 75), INT8_C( 25), INT8_C( 7), INT8_C( 12), INT8_C( 66), INT8_C( 41), INT8_C( 81), -INT8_C( 53), -INT8_C( 28), INT8_C( 44), -INT8_C( 69), INT8_C( 8), INT8_C( 2), INT8_C( 36), INT8_C( 89), INT8_C( 45), -INT8_C( 74), -INT8_C( 62), INT8_C( 88), INT8_C( 50), -INT8_C( 77), INT8_C( 106), INT8_C( 92), -INT8_C( 1), -INT8_C( 3), INT8_C( 84) }, { -INT8_C( 62), INT8_C( 113), -INT8_C( 27), INT8_C( 45), -INT8_C( 45), -INT8_C( 101), INT8_C( 32), -INT8_C( 5), -INT8_C( 13), INT8_C( 97), INT8_C( 113), -INT8_C( 28), INT8_C( 93), -INT8_C( 101), -INT8_C( 84), INT8_C( 106), -INT8_C( 8), -INT8_C( 69), -INT8_C( 23), INT8_C( 103), -INT8_C( 103), INT8_C( 124), INT8_C( 103), INT8_C( 124), INT8_C( 7), INT8_C( 41), INT8_C( 44), INT8_C( 8), INT8_C( 124), INT8_C( 23), INT8_C( 93), -INT8_C( 22) } }, { { -INT8_C( 117), INT8_C( 5), -INT8_C( 57), INT8_C( 116), INT8_C( 102), INT8_C( 18), -INT8_C( 115), INT8_C( 109), INT8_C( 31), -INT8_C( 49), -INT8_C( 106), INT8_C( 112), -INT8_C( 102), INT8_C( 122), -INT8_C( 100), INT8_C( 85), -INT8_C( 125), -INT8_C( 98), INT8_C( 122), -INT8_C( 36), -INT8_C( 53), INT8_C( 48), -INT8_C( 98), INT8_C( 36), INT8_C( 98), INT8_C( 81), -INT8_C( 114), -INT8_C( 66), INT8_C( 81), -INT8_C( 117), INT8_C( 18), -INT8_C( 36) }, UINT32_C(4132493968), { -INT8_C( 20), -INT8_C( 35), INT8_C( 100), INT8_C( 11), -INT8_C( 83), -INT8_C( 6), INT8_C( 124), INT8_C( 71), INT8_C( 117), INT8_C( 24), -INT8_C( 99), -INT8_C( 8), -INT8_C( 73), INT8_C( 23), -INT8_C( 44), -INT8_C( 126), INT8_C( 71), INT8_C( 115), -INT8_C( 90), -INT8_C( 87), -INT8_C( 60), INT8_C( 52), INT8_C( 103), INT8_C( 21), -INT8_C( 65), INT8_C( 121), -INT8_C( 15), INT8_C( 79), INT8_C( 83), INT8_C( 65), INT8_C( 70), INT8_C( 64) }, { INT8_C( 31), -INT8_C( 86), INT8_C( 75), -INT8_C( 52), -INT8_C( 92), -INT8_C( 57), INT8_C( 19), INT8_C( 25), -INT8_C( 32), -INT8_C( 80), INT8_C( 17), -INT8_C( 105), -INT8_C( 57), -INT8_C( 26), INT8_C( 25), INT8_C( 14), INT8_C( 89), -INT8_C( 64), -INT8_C( 73), INT8_C( 29), -INT8_C( 12), INT8_C( 30), INT8_C( 51), -INT8_C( 76), -INT8_C( 104), INT8_C( 36), INT8_C( 3), -INT8_C( 21), INT8_C( 102), INT8_C( 73), INT8_C( 43), -INT8_C( 123) }, { -INT8_C( 117), INT8_C( 5), -INT8_C( 57), INT8_C( 116), -INT8_C( 26), INT8_C( 18), -INT8_C( 115), INT8_C( 109), INT8_C( 31), INT8_C( 98), -INT8_C( 106), -INT8_C( 104), -INT8_C( 76), INT8_C( 122), -INT8_C( 53), -INT8_C( 57), -INT8_C( 125), -INT8_C( 98), INT8_C( 122), -INT8_C( 36), INT8_C( 102), INT8_C( 48), INT8_C( 25), INT8_C( 36), INT8_C( 98), INT8_C( 36), -INT8_C( 64), -INT8_C( 66), -INT8_C( 36), INT8_C( 5), -INT8_C( 115), -INT8_C( 117) } }, { { -INT8_C( 13), INT8_C( 119), INT8_C( 81), -INT8_C( 104), INT8_C( 62), INT8_C( 100), -INT8_C( 79), INT8_C( 30), INT8_C( 21), -INT8_C( 61), -INT8_C( 75), -INT8_C( 36), -INT8_C( 87), -INT8_C( 49), -INT8_C( 21), INT8_C( 2), -INT8_C( 113), -INT8_C( 94), INT8_C( 31), -INT8_C( 125), -INT8_C( 63), INT8_C( 82), INT8_C( 55), INT8_C( 89), INT8_C( 119), INT8_C( 59), INT8_C( 68), -INT8_C( 35), -INT8_C( 124), INT8_C( 112), INT8_C( 98), INT8_C( 120) }, UINT32_C( 621851623), { INT8_C( 23), -INT8_C( 63), INT8_C( 68), INT8_C( 44), -INT8_C( 124), -INT8_C( 7), INT8_C( 9), INT8_C( 45), -INT8_C( 56), -INT8_C( 12), INT8_C( 47), INT8_C( 87), -INT8_C( 106), INT8_C( 79), -INT8_C( 37), INT8_C( 87), -INT8_C( 95), INT8_C( 18), -INT8_C( 80), INT8_C( 24), INT8_C( 77), -INT8_C( 11), -INT8_C( 11), -INT8_C( 46), INT8_C( 101), INT8_C( 87), INT8_C( 74), INT8_C( 76), INT8_C( 10), INT8_C( 90), INT8_C( 113), INT8_C( 34) }, { INT8_C( 27), -INT8_C( 75), INT8_C( 78), -INT8_C( 96), -INT8_C( 81), INT8_C( 87), -INT8_C( 51), INT8_C( 119), INT8_C( 75), -INT8_C( 3), -INT8_C( 49), -INT8_C( 30), INT8_C( 76), -INT8_C( 86), INT8_C( 57), -INT8_C( 19), -INT8_C( 68), -INT8_C( 22), INT8_C( 6), INT8_C( 10), -INT8_C( 33), -INT8_C( 5), -INT8_C( 36), INT8_C( 68), INT8_C( 83), INT8_C( 38), -INT8_C( 112), INT8_C( 93), INT8_MIN, INT8_C( 1), INT8_MAX, -INT8_C( 101) }, { INT8_C( 89), INT8_C( 119), INT8_C( 62), -INT8_C( 104), INT8_C( 62), INT8_C( 38), -INT8_C( 61), -INT8_C( 86), INT8_C( 21), -INT8_C( 33), -INT8_C( 75), -INT8_C( 36), INT8_C( 55), INT8_C( 2), -INT8_C( 21), INT8_C( 89), -INT8_C( 113), -INT8_C( 94), INT8_C( 31), -INT8_C( 125), -INT8_C( 49), INT8_C( 82), INT8_C( 55), INT8_C( 89), INT8_C( 87), INT8_C( 59), -INT8_C( 75), -INT8_C( 35), -INT8_C( 124), INT8_C( 68), INT8_C( 98), INT8_C( 120) } }, { { -INT8_C( 73), -INT8_C( 50), INT8_C( 59), INT8_C( 102), INT8_C( 37), INT8_C( 9), -INT8_C( 35), INT8_C( 113), INT8_C( 6), -INT8_C( 84), INT8_C( 83), INT8_C( 82), INT8_C( 86), -INT8_C( 116), INT8_C( 63), INT8_C( 19), INT8_C( 118), INT8_C( 69), INT8_C( 29), INT8_C( 85), INT8_C( 65), -INT8_C( 7), -INT8_C( 103), -INT8_C( 108), INT8_C( 31), INT8_C( 41), -INT8_C( 15), -INT8_C( 97), INT8_C( 43), INT8_C( 113), INT8_C( 58), -INT8_C( 30) }, UINT32_C(1682470463), { INT8_MAX, INT8_C( 37), -INT8_C( 43), -INT8_C( 123), -INT8_C( 46), INT8_C( 40), -INT8_C( 41), INT8_C( 40), -INT8_C( 75), INT8_C( 22), INT8_C( 59), INT8_C( 43), INT8_C( 92), INT8_C( 88), -INT8_C( 127), -INT8_C( 99), INT8_C( 81), INT8_C( 26), INT8_C( 49), INT8_C( 112), INT8_C( 68), INT8_C( 34), INT8_C( 15), INT8_C( 111), -INT8_C( 109), INT8_C( 74), INT8_C( 81), -INT8_C( 46), -INT8_C( 64), -INT8_C( 103), INT8_C( 55), INT8_C( 63) }, { -INT8_C( 66), INT8_C( 12), -INT8_C( 60), -INT8_C( 112), INT8_C( 53), -INT8_C( 101), -INT8_C( 71), -INT8_C( 22), -INT8_C( 79), -INT8_C( 12), INT8_C( 21), INT8_C( 13), INT8_C( 77), -INT8_C( 106), -INT8_C( 86), -INT8_C( 98), -INT8_C( 79), -INT8_C( 37), INT8_C( 15), -INT8_C( 11), -INT8_C( 2), INT8_C( 30), INT8_C( 100), -INT8_C( 111), INT8_C( 104), -INT8_C( 75), INT8_C( 100), INT8_C( 40), INT8_C( 78), -INT8_C( 101), INT8_C( 103), INT8_C( 12) }, { INT8_C( 12), -INT8_C( 101), -INT8_C( 7), INT8_C( 9), INT8_C( 29), -INT8_C( 79), -INT8_C( 35), INT8_C( 113), INT8_C( 6), -INT8_C( 103), INT8_C( 40), INT8_C( 82), INT8_C( 43), INT8_C( 31), -INT8_C( 50), INT8_C( 19), INT8_C( 118), INT8_C( 69), INT8_C( 29), -INT8_C( 79), INT8_C( 65), -INT8_C( 7), INT8_C( 19), -INT8_C( 108), INT8_C( 31), INT8_C( 41), INT8_C( 69), -INT8_C( 97), INT8_C( 43), INT8_C( 41), -INT8_C( 111), -INT8_C( 30) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_mm256_loadu_epi8(test_vec[i].a); simde__m256i idx = simde_mm256_loadu_epi8(test_vec[i].idx); simde__m256i b = simde_mm256_loadu_epi8(test_vec[i].b); simde__m256i r = simde_mm256_mask_permutex2var_epi8(a, test_vec[i].k, idx, b); simde_test_x86_assert_equal_i8x32(r, simde_mm256_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i a = simde_test_x86_random_i8x32(); simde__mmask32 k = simde_test_x86_random_mmask32(); simde__m256i idx = simde_test_x86_random_i8x32(); simde__m256i b = simde_test_x86_random_i8x32(); simde__m256i r = simde_mm256_mask_permutex2var_epi8(a, k, idx, b); simde_test_x86_write_i8x32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask32(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x32(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask2_permutex2var_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t a[32]; const int8_t idx[32]; const simde__mmask32 k; const int8_t b[32]; const int8_t r[32]; } test_vec[] = { { { -INT8_C( 89), INT8_C( 43), -INT8_C( 99), -INT8_C( 36), -INT8_C( 58), INT8_C( 86), -INT8_C( 58), INT8_C( 120), INT8_C( 74), -INT8_C( 36), -INT8_C( 123), -INT8_C( 105), INT8_C( 114), INT8_C( 48), INT8_C( 54), INT8_C( 35), INT8_C( 11), INT8_C( 69), INT8_C( 24), INT8_C( 9), INT8_C( 99), INT8_C( 124), -INT8_C( 101), -INT8_C( 52), INT8_C( 49), -INT8_C( 1), -INT8_C( 12), INT8_MAX, -INT8_C( 102), INT8_C( 92), -INT8_C( 116), INT8_C( 65) }, { -INT8_C( 121), INT8_C( 41), INT8_C( 30), INT8_C( 78), INT8_MAX, -INT8_C( 28), -INT8_C( 58), -INT8_C( 55), -INT8_C( 64), INT8_C( 75), INT8_C( 97), INT8_C( 51), INT8_C( 123), -INT8_C( 105), INT8_C( 86), -INT8_C( 121), -INT8_C( 36), INT8_C( 111), -INT8_C( 112), INT8_C( 63), -INT8_C( 21), INT8_C( 43), INT8_C( 11), INT8_C( 29), INT8_C( 42), INT8_C( 0), -INT8_C( 100), -INT8_C( 60), INT8_C( 92), INT8_C( 40), INT8_C( 6), -INT8_C( 29) }, UINT32_C(3492881489), { INT8_C( 8), -INT8_C( 9), -INT8_C( 102), -INT8_C( 55), INT8_C( 67), -INT8_C( 5), -INT8_C( 4), -INT8_C( 66), -INT8_C( 110), INT8_C( 82), INT8_C( 69), INT8_C( 110), -INT8_C( 63), -INT8_C( 42), -INT8_C( 83), -INT8_C( 83), INT8_C( 1), -INT8_C( 71), -INT8_C( 54), INT8_C( 44), -INT8_C( 71), INT8_C( 102), -INT8_C( 16), INT8_C( 21), -INT8_C( 113), -INT8_C( 10), -INT8_C( 8), -INT8_C( 32), INT8_C( 26), INT8_C( 42), -INT8_C( 79), INT8_C( 35) }, { INT8_C( 120), INT8_C( 41), INT8_C( 30), INT8_C( 78), INT8_C( 35), -INT8_C( 28), -INT8_C( 58), -INT8_C( 55), -INT8_C( 64), INT8_C( 75), -INT8_C( 9), INT8_C( 51), INT8_C( 123), -INT8_C( 52), INT8_C( 86), -INT8_C( 121), -INT8_C( 102), INT8_C( 111), -INT8_C( 112), INT8_C( 63), INT8_C( 110), INT8_C( 110), INT8_C( 11), INT8_C( 29), INT8_C( 42), INT8_C( 0), -INT8_C( 100), -INT8_C( 60), -INT8_C( 102), INT8_C( 40), -INT8_C( 58), -INT8_C( 55) } }, { { INT8_C( 33), INT8_C( 75), -INT8_C( 20), INT8_C( 100), INT8_C( 70), -INT8_C( 24), INT8_C( 35), -INT8_C( 40), INT8_C( 58), INT8_C( 104), INT8_C( 70), -INT8_C( 4), INT8_C( 62), -INT8_C( 13), -INT8_C( 87), INT8_C( 64), -INT8_C( 84), INT8_C( 115), INT8_C( 108), INT8_C( 101), -INT8_C( 39), INT8_C( 92), INT8_C( 122), INT8_C( 104), INT8_C( 83), INT8_C( 115), INT8_C( 73), INT8_C( 109), -INT8_C( 99), -INT8_C( 6), -INT8_C( 112), -INT8_C( 66) }, { INT8_C( 69), INT8_C( 124), INT8_C( 35), -INT8_C( 117), INT8_C( 100), INT8_C( 70), INT8_C( 99), -INT8_C( 97), -INT8_C( 82), -INT8_C( 87), -INT8_C( 101), -INT8_C( 19), -INT8_C( 100), INT8_C( 68), INT8_C( 45), INT8_C( 73), -INT8_C( 73), -INT8_C( 103), -INT8_C( 82), -INT8_C( 112), -INT8_C( 11), INT8_C( 41), -INT8_C( 7), INT8_C( 72), -INT8_C( 100), INT8_C( 66), -INT8_C( 74), INT8_C( 57), INT8_C( 60), INT8_C( 70), -INT8_C( 9), -INT8_C( 127) }, UINT32_C( 655104707), { INT8_C( 96), INT8_C( 111), -INT8_C( 58), INT8_C( 15), INT8_C( 24), INT8_C( 97), -INT8_C( 4), -INT8_C( 76), -INT8_C( 91), INT8_C( 41), -INT8_C( 3), INT8_C( 92), -INT8_C( 62), -INT8_C( 84), -INT8_C( 19), -INT8_C( 73), -INT8_C( 43), -INT8_C( 26), INT8_C( 0), INT8_C( 113), INT8_C( 40), -INT8_C( 74), -INT8_C( 86), INT8_C( 100), -INT8_C( 4), -INT8_C( 95), -INT8_C( 27), -INT8_C( 65), -INT8_C( 68), -INT8_C( 15), -INT8_C( 25), INT8_C( 28) }, { -INT8_C( 24), -INT8_C( 68), INT8_C( 35), -INT8_C( 117), INT8_C( 100), INT8_C( 70), INT8_C( 15), -INT8_C( 66), -INT8_C( 82), INT8_C( 41), -INT8_C( 101), -INT8_C( 84), -INT8_C( 99), INT8_C( 68), INT8_C( 45), INT8_C( 73), -INT8_C( 73), -INT8_C( 103), -INT8_C( 19), -INT8_C( 84), -INT8_C( 11), INT8_C( 41), -INT8_C( 7), INT8_C( 72), -INT8_C( 99), -INT8_C( 20), -INT8_C( 86), INT8_C( 57), INT8_C( 60), INT8_C( 35), -INT8_C( 9), -INT8_C( 127) } }, { { INT8_C( 96), -INT8_C( 83), INT8_C( 43), INT8_C( 120), INT8_C( 15), INT8_C( 39), INT8_C( 44), -INT8_C( 76), INT8_C( 80), INT8_C( 42), INT8_C( 17), INT8_C( 18), -INT8_C( 42), -INT8_C( 2), -INT8_C( 54), -INT8_C( 85), -INT8_C( 28), -INT8_C( 54), INT8_C( 28), INT8_C( 12), INT8_MIN, -INT8_C( 58), INT8_C( 112), INT8_C( 124), INT8_C( 103), INT8_C( 85), INT8_C( 60), INT8_C( 35), INT8_C( 70), INT8_C( 35), INT8_C( 64), -INT8_C( 90) }, { -INT8_C( 48), INT8_C( 107), INT8_C( 30), -INT8_C( 33), -INT8_C( 109), INT8_C( 74), -INT8_C( 108), -INT8_C( 29), INT8_C( 116), -INT8_C( 91), -INT8_C( 10), INT8_C( 74), -INT8_C( 93), -INT8_C( 64), -INT8_C( 11), -INT8_C( 121), -INT8_C( 118), INT8_C( 17), -INT8_C( 109), INT8_C( 10), -INT8_C( 41), INT8_C( 3), -INT8_C( 122), INT8_C( 63), INT8_C( 88), -INT8_C( 62), INT8_C( 98), -INT8_C( 98), -INT8_C( 27), -INT8_C( 94), INT8_C( 68), -INT8_C( 74) }, UINT32_C(2710921742), { -INT8_C( 84), INT8_C( 41), -INT8_C( 124), INT8_C( 33), -INT8_C( 50), INT8_C( 122), INT8_C( 107), INT8_C( 113), INT8_C( 58), INT8_C( 97), -INT8_C( 8), -INT8_C( 60), INT8_C( 114), -INT8_C( 117), -INT8_C( 50), INT8_C( 74), -INT8_C( 114), INT8_C( 85), -INT8_C( 119), -INT8_C( 26), INT8_C( 23), -INT8_C( 21), -INT8_C( 124), -INT8_C( 3), -INT8_C( 114), -INT8_C( 56), -INT8_C( 77), -INT8_C( 100), INT8_C( 42), INT8_C( 72), INT8_C( 61), -INT8_C( 41) }, { -INT8_C( 48), -INT8_C( 60), INT8_C( 64), -INT8_C( 90), -INT8_C( 109), INT8_C( 74), -INT8_C( 108), -INT8_C( 29), INT8_C( 116), INT8_C( 122), -INT8_C( 10), INT8_C( 74), -INT8_C( 93), INT8_C( 96), -INT8_C( 21), -INT8_C( 121), INT8_C( 17), INT8_C( 17), INT8_C( 12), INT8_C( 10), INT8_C( 124), INT8_C( 3), -INT8_C( 122), -INT8_C( 41), INT8_C( 103), -INT8_C( 62), INT8_C( 98), -INT8_C( 98), -INT8_C( 27), -INT8_C( 124), INT8_C( 68), -INT8_C( 124) } }, { { INT8_C( 114), -INT8_C( 63), -INT8_C( 8), INT8_C( 64), INT8_C( 60), INT8_C( 99), -INT8_C( 78), INT8_C( 118), -INT8_C( 60), -INT8_C( 86), INT8_C( 59), INT8_C( 55), INT8_C( 54), INT8_C( 9), -INT8_C( 127), -INT8_C( 60), INT8_C( 94), INT8_C( 10), -INT8_C( 85), INT8_C( 118), -INT8_C( 11), INT8_C( 47), INT8_C( 115), -INT8_C( 125), -INT8_C( 8), INT8_C( 38), INT8_C( 31), INT8_C( 34), INT8_C( 110), INT8_C( 92), -INT8_C( 7), -INT8_C( 32) }, { INT8_C( 30), -INT8_C( 15), INT8_C( 33), INT8_C( 90), INT8_C( 85), -INT8_C( 45), -INT8_C( 48), INT8_C( 25), INT8_C( 125), INT8_C( 11), INT8_C( 80), -INT8_C( 77), INT8_C( 21), -INT8_C( 47), INT8_C( 120), INT8_C( 115), -INT8_C( 37), INT8_C( 35), -INT8_C( 23), -INT8_C( 47), INT8_C( 82), INT8_C( 92), INT8_C( 84), INT8_C( 74), -INT8_C( 126), INT8_C( 116), INT8_C( 109), -INT8_C( 15), -INT8_C( 48), INT8_C( 102), -INT8_C( 47), -INT8_C( 18) }, UINT32_C(2907239000), { -INT8_C( 59), INT8_C( 25), -INT8_C( 58), INT8_C( 67), INT8_C( 36), INT8_C( 23), -INT8_C( 10), INT8_C( 57), -INT8_C( 24), INT8_C( 110), -INT8_C( 83), -INT8_C( 60), -INT8_C( 111), -INT8_C( 106), -INT8_C( 107), -INT8_C( 28), -INT8_C( 13), -INT8_C( 23), INT8_C( 46), INT8_C( 117), INT8_C( 93), -INT8_C( 101), INT8_C( 102), INT8_C( 46), INT8_C( 2), INT8_C( 56), INT8_C( 28), INT8_C( 90), INT8_C( 42), INT8_C( 101), INT8_C( 7), -INT8_C( 16) }, { INT8_C( 30), -INT8_C( 15), INT8_C( 33), INT8_C( 31), INT8_C( 47), -INT8_C( 45), INT8_C( 94), INT8_C( 25), INT8_C( 125), INT8_C( 55), INT8_C( 80), -INT8_C( 77), INT8_C( 47), INT8_C( 10), INT8_C( 2), INT8_C( 117), -INT8_C( 37), INT8_C( 35), -INT8_C( 23), INT8_C( 10), INT8_C( 82), INT8_C( 92), -INT8_C( 11), INT8_C( 74), -INT8_C( 8), INT8_C( 116), -INT8_C( 106), -INT8_C( 23), -INT8_C( 48), -INT8_C( 10), -INT8_C( 47), -INT8_C( 107) } }, { { INT8_C( 126), -INT8_C( 51), INT8_C( 51), -INT8_C( 94), -INT8_C( 28), INT8_C( 41), -INT8_C( 36), -INT8_C( 51), -INT8_C( 104), -INT8_C( 119), -INT8_C( 111), INT8_C( 41), INT8_C( 31), INT8_C( 38), INT8_C( 13), INT8_C( 18), INT8_C( 15), INT8_C( 60), -INT8_C( 120), INT8_C( 109), -INT8_C( 41), -INT8_C( 18), -INT8_C( 101), -INT8_C( 39), INT8_C( 38), -INT8_C( 73), INT8_C( 51), INT8_C( 81), INT8_C( 28), INT8_C( 58), INT8_C( 65), -INT8_C( 102) }, { INT8_C( 8), INT8_C( 116), INT8_C( 61), -INT8_C( 20), -INT8_C( 99), INT8_C( 25), -INT8_C( 71), INT8_C( 53), -INT8_C( 94), INT8_C( 74), INT8_C( 95), -INT8_C( 63), INT8_C( 112), INT8_C( 108), -INT8_C( 44), INT8_MIN, -INT8_C( 88), INT8_C( 92), -INT8_C( 19), INT8_MIN, INT8_C( 74), -INT8_C( 120), INT8_C( 89), INT8_C( 113), INT8_C( 63), -INT8_C( 115), -INT8_C( 62), INT8_C( 92), -INT8_C( 57), INT8_C( 3), -INT8_C( 10), -INT8_C( 49) }, UINT32_C( 347878263), { INT8_C( 76), INT8_C( 117), INT8_C( 74), -INT8_C( 18), -INT8_C( 64), -INT8_C( 87), -INT8_C( 80), INT8_C( 48), INT8_C( 21), -INT8_C( 124), -INT8_C( 80), -INT8_C( 66), -INT8_C( 32), -INT8_C( 99), INT8_C( 62), INT8_C( 42), INT8_C( 37), -INT8_C( 105), -INT8_C( 101), INT8_C( 101), INT8_C( 36), INT8_C( 93), -INT8_C( 63), -INT8_C( 20), INT8_C( 96), -INT8_C( 73), -INT8_C( 69), -INT8_C( 41), -INT8_C( 21), INT8_C( 119), -INT8_C( 20), INT8_C( 55) }, { -INT8_C( 104), INT8_C( 36), INT8_C( 119), -INT8_C( 20), INT8_C( 58), -INT8_C( 73), -INT8_C( 73), INT8_C( 53), INT8_C( 74), -INT8_C( 111), INT8_C( 95), -INT8_C( 63), INT8_C( 37), -INT8_C( 32), -INT8_C( 44), INT8_MIN, -INT8_C( 88), INT8_C( 92), -INT8_C( 99), INT8_C( 126), -INT8_C( 111), -INT8_C( 104), INT8_C( 89), -INT8_C( 105), INT8_C( 63), -INT8_C( 115), INT8_C( 51), INT8_C( 92), -INT8_C( 51), INT8_C( 3), -INT8_C( 10), -INT8_C( 49) } }, { { -INT8_C( 19), INT8_C( 54), INT8_C( 38), -INT8_C( 83), -INT8_C( 33), -INT8_C( 42), -INT8_C( 35), -INT8_C( 12), INT8_C( 90), -INT8_C( 114), -INT8_C( 78), INT8_C( 58), INT8_C( 43), -INT8_C( 16), INT8_C( 100), INT8_C( 81), -INT8_C( 120), INT8_C( 0), -INT8_C( 74), -INT8_C( 84), INT8_C( 93), INT8_C( 119), -INT8_C( 104), -INT8_C( 66), INT8_C( 46), INT8_C( 84), -INT8_C( 107), INT8_C( 25), -INT8_C( 53), -INT8_C( 127), INT8_C( 81), -INT8_C( 72) }, { -INT8_C( 73), INT8_C( 119), INT8_C( 101), -INT8_C( 106), INT8_C( 77), INT8_C( 67), -INT8_C( 117), -INT8_C( 89), -INT8_C( 47), INT8_C( 61), -INT8_C( 31), -INT8_C( 4), INT8_C( 46), INT8_C( 69), INT8_C( 77), -INT8_C( 74), INT8_C( 69), INT8_C( 3), INT8_C( 98), -INT8_C( 93), INT8_C( 122), -INT8_C( 5), INT8_C( 97), -INT8_C( 87), INT8_C( 79), -INT8_C( 10), -INT8_C( 62), INT8_C( 26), INT8_C( 120), INT8_C( 19), -INT8_C( 45), INT8_C( 47) }, UINT32_C(3620092042), { INT8_C( 123), INT8_C( 81), INT8_C( 126), INT8_C( 76), -INT8_C( 114), INT8_C( 95), INT8_C( 73), -INT8_C( 68), -INT8_C( 91), -INT8_C( 106), INT8_C( 114), -INT8_C( 22), -INT8_C( 102), -INT8_C( 43), -INT8_C( 115), INT8_C( 20), -INT8_C( 48), -INT8_C( 18), -INT8_C( 67), INT8_C( 31), -INT8_C( 27), INT8_MIN, INT8_C( 57), INT8_C( 93), -INT8_C( 109), INT8_C( 12), -INT8_C( 116), INT8_C( 30), INT8_C( 69), INT8_C( 82), -INT8_C( 11), -INT8_C( 64) }, { -INT8_C( 73), INT8_C( 93), INT8_C( 101), -INT8_C( 104), INT8_C( 77), INT8_C( 67), -INT8_C( 117), -INT8_C( 68), -INT8_C( 47), INT8_C( 61), -INT8_C( 31), INT8_C( 69), -INT8_C( 115), -INT8_C( 42), INT8_C( 77), -INT8_C( 74), INT8_C( 69), -INT8_C( 83), INT8_C( 126), -INT8_C( 93), INT8_C( 122), -INT8_C( 5), INT8_C( 81), -INT8_C( 106), INT8_C( 81), INT8_C( 57), INT8_C( 38), INT8_C( 26), -INT8_C( 109), INT8_C( 19), -INT8_C( 84), INT8_C( 20) } }, { { -INT8_C( 93), INT8_C( 116), INT8_C( 13), INT8_C( 50), -INT8_C( 45), INT8_C( 86), -INT8_C( 18), INT8_C( 120), -INT8_C( 20), INT8_C( 97), INT8_C( 99), -INT8_C( 122), INT8_C( 54), -INT8_C( 16), -INT8_C( 101), INT8_C( 6), -INT8_C( 33), INT8_C( 88), INT8_C( 37), -INT8_C( 60), -INT8_C( 40), INT8_C( 94), INT8_C( 33), INT8_C( 108), INT8_C( 107), -INT8_C( 83), -INT8_C( 118), -INT8_C( 80), INT8_C( 0), INT8_MAX, INT8_C( 112), -INT8_C( 93) }, { -INT8_C( 13), INT8_C( 125), -INT8_C( 43), -INT8_C( 57), -INT8_C( 45), -INT8_C( 60), INT8_C( 63), -INT8_C( 64), INT8_C( 37), -INT8_C( 94), INT8_C( 70), INT8_C( 91), -INT8_C( 109), -INT8_C( 31), INT8_C( 97), INT8_C( 114), INT8_C( 58), -INT8_C( 122), INT8_C( 54), INT8_C( 18), -INT8_C( 28), INT8_C( 87), INT8_C( 126), INT8_C( 79), INT8_C( 4), INT8_C( 8), -INT8_C( 1), INT8_C( 4), -INT8_C( 120), INT8_C( 112), -INT8_C( 88), INT8_C( 123) }, UINT32_C(3242360301), { INT8_C( 65), -INT8_C( 126), -INT8_C( 127), INT8_C( 102), INT8_C( 36), -INT8_C( 57), -INT8_C( 63), -INT8_C( 73), -INT8_C( 87), INT8_C( 34), INT8_C( 41), -INT8_C( 29), -INT8_C( 88), INT8_C( 95), -INT8_C( 11), -INT8_C( 115), -INT8_C( 74), INT8_C( 116), -INT8_C( 36), -INT8_C( 69), INT8_C( 124), -INT8_C( 36), -INT8_C( 65), INT8_C( 4), INT8_C( 76), INT8_C( 103), INT8_MIN, INT8_C( 57), -INT8_C( 27), -INT8_C( 62), -INT8_C( 6), INT8_C( 38) }, { -INT8_C( 69), INT8_C( 125), INT8_C( 94), INT8_C( 120), -INT8_C( 45), -INT8_C( 45), INT8_C( 38), -INT8_C( 93), -INT8_C( 57), -INT8_C( 94), -INT8_C( 18), -INT8_C( 80), -INT8_C( 60), -INT8_C( 126), -INT8_C( 126), INT8_C( 114), INT8_C( 58), -INT8_C( 18), INT8_C( 54), INT8_C( 18), -INT8_C( 28), INT8_C( 87), -INT8_C( 6), INT8_C( 79), -INT8_C( 45), INT8_C( 8), -INT8_C( 1), INT8_C( 4), -INT8_C( 120), INT8_C( 112), -INT8_C( 87), INT8_C( 57) } }, { { INT8_C( 68), INT8_C( 123), -INT8_C( 115), INT8_C( 105), INT8_C( 67), INT8_C( 78), INT8_C( 32), -INT8_C( 20), INT8_C( 113), INT8_C( 74), -INT8_C( 49), INT8_C( 25), -INT8_C( 87), -INT8_C( 60), -INT8_C( 90), INT8_C( 96), INT8_C( 56), -INT8_C( 125), INT8_C( 27), -INT8_C( 75), INT8_C( 95), -INT8_C( 38), -INT8_C( 71), -INT8_C( 85), INT8_C( 66), INT8_C( 57), -INT8_C( 28), INT8_C( 39), -INT8_C( 4), -INT8_C( 33), INT8_C( 77), INT8_C( 64) }, { INT8_C( 90), -INT8_C( 38), -INT8_C( 87), -INT8_C( 99), INT8_C( 41), -INT8_C( 54), -INT8_C( 119), -INT8_C( 102), INT8_C( 20), INT8_C( 88), -INT8_C( 77), -INT8_C( 67), INT8_C( 29), INT8_C( 90), INT8_C( 29), INT8_C( 85), -INT8_C( 35), INT8_C( 56), INT8_C( 10), INT8_C( 60), INT8_C( 19), -INT8_C( 60), -INT8_C( 25), INT8_C( 85), -INT8_C( 3), -INT8_C( 53), INT8_C( 124), -INT8_C( 7), -INT8_C( 86), -INT8_C( 55), INT8_C( 58), INT8_C( 5) }, UINT32_C(3450004388), { -INT8_C( 83), INT8_C( 44), INT8_C( 103), -INT8_C( 63), -INT8_C( 124), INT8_C( 26), INT8_MAX, -INT8_C( 95), INT8_C( 116), -INT8_C( 100), -INT8_C( 9), INT8_C( 81), -INT8_C( 43), INT8_C( 1), -INT8_C( 115), -INT8_C( 24), -INT8_C( 59), INT8_C( 116), INT8_C( 61), -INT8_C( 61), INT8_C( 64), -INT8_C( 71), -INT8_C( 68), -INT8_C( 22), -INT8_C( 126), -INT8_C( 10), -INT8_C( 17), INT8_C( 38), -INT8_C( 38), -INT8_C( 110), -INT8_C( 13), -INT8_C( 121) }, { INT8_C( 90), -INT8_C( 38), -INT8_C( 100), -INT8_C( 99), INT8_C( 41), -INT8_C( 49), -INT8_C( 119), -INT8_C( 28), INT8_C( 95), INT8_C( 66), -INT8_C( 77), -INT8_C( 67), INT8_C( 29), -INT8_C( 28), -INT8_C( 33), -INT8_C( 38), -INT8_C( 35), -INT8_C( 126), INT8_C( 10), INT8_C( 60), INT8_C( 19), INT8_C( 67), -INT8_C( 25), -INT8_C( 38), -INT8_C( 110), -INT8_C( 53), -INT8_C( 38), -INT8_C( 10), -INT8_C( 86), -INT8_C( 55), -INT8_C( 17), INT8_C( 78) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_mm256_loadu_epi8(test_vec[i].a); simde__m256i idx = simde_mm256_loadu_epi8(test_vec[i].idx); simde__m256i b = simde_mm256_loadu_epi8(test_vec[i].b); simde__m256i r = simde_mm256_mask2_permutex2var_epi8(a, idx, test_vec[i].k, b); simde_test_x86_assert_equal_i8x32(r, simde_mm256_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i a = simde_test_x86_random_i8x32(); simde__m256i idx = simde_test_x86_random_i8x32(); simde__mmask32 k = simde_test_x86_random_mmask32(); simde__m256i b = simde_test_x86_random_i8x32(); simde__m256i r = simde_mm256_mask2_permutex2var_epi8(a, idx, k, b); simde_test_x86_write_i8x32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x32(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_mmask32(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_maskz_permutex2var_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask32 k; const int8_t a[32]; const int8_t idx[32]; const int8_t b[32]; const int8_t r[32]; } test_vec[] = { { UINT32_C(1112103614), { INT8_C( 117), -INT8_C( 56), -INT8_C( 28), -INT8_C( 23), INT8_C( 100), -INT8_C( 37), INT8_C( 59), INT8_C( 57), -INT8_C( 36), -INT8_C( 56), INT8_C( 33), -INT8_C( 94), INT8_C( 61), INT8_C( 94), INT8_C( 101), INT8_C( 125), INT8_C( 23), INT8_C( 33), INT8_C( 103), -INT8_C( 102), INT8_C( 24), INT8_C( 87), -INT8_C( 64), -INT8_C( 14), -INT8_C( 23), -INT8_C( 76), INT8_C( 121), -INT8_C( 89), INT8_C( 14), -INT8_C( 62), -INT8_C( 23), -INT8_C( 125) }, { -INT8_C( 118), -INT8_C( 51), INT8_C( 109), -INT8_C( 17), -INT8_C( 88), -INT8_C( 88), INT8_C( 40), -INT8_C( 123), INT8_C( 112), INT8_C( 74), INT8_C( 39), -INT8_C( 83), -INT8_C( 88), -INT8_C( 116), INT8_C( 42), -INT8_C( 64), -INT8_C( 83), -INT8_C( 110), INT8_C( 90), -INT8_C( 59), -INT8_C( 23), INT8_C( 26), -INT8_C( 73), -INT8_C( 46), -INT8_C( 50), INT8_C( 49), INT8_C( 121), -INT8_C( 35), -INT8_C( 13), INT8_C( 98), INT8_C( 96), INT8_C( 126) }, { INT8_C( 48), -INT8_C( 51), INT8_C( 109), -INT8_C( 40), INT8_C( 117), -INT8_C( 107), INT8_C( 93), -INT8_C( 26), -INT8_C( 33), -INT8_C( 124), -INT8_C( 109), -INT8_C( 120), INT8_C( 16), -INT8_C( 66), INT8_C( 72), -INT8_C( 66), INT8_C( 80), -INT8_C( 94), -INT8_C( 125), INT8_C( 57), -INT8_C( 68), INT8_C( 59), INT8_C( 11), -INT8_C( 117), INT8_C( 108), -INT8_C( 124), INT8_C( 104), INT8_C( 95), -INT8_C( 26), -INT8_C( 56), -INT8_C( 35), INT8_C( 22) }, { INT8_C( 0), INT8_C( 94), -INT8_C( 66), -INT8_C( 66), -INT8_C( 33), -INT8_C( 33), INT8_C( 0), -INT8_C( 37), INT8_C( 0), INT8_C( 33), INT8_C( 0), -INT8_C( 66), -INT8_C( 33), INT8_C( 0), -INT8_C( 109), INT8_C( 0), -INT8_C( 66), INT8_C( 0), INT8_C( 0), -INT8_C( 37), INT8_C( 0), INT8_C( 0), -INT8_C( 117), INT8_C( 0), INT8_C( 0), -INT8_C( 94), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 48), INT8_C( 0) } }, { UINT32_C( 200231574), { -INT8_C( 32), INT8_C( 76), -INT8_C( 15), -INT8_C( 65), -INT8_C( 47), -INT8_C( 123), INT8_C( 71), -INT8_C( 31), INT8_C( 67), -INT8_C( 113), -INT8_C( 97), -INT8_C( 109), INT8_C( 49), INT8_C( 35), -INT8_C( 52), -INT8_C( 18), INT8_C( 94), -INT8_C( 41), INT8_C( 121), -INT8_C( 54), INT8_C( 91), -INT8_C( 31), INT8_C( 41), INT8_C( 65), -INT8_C( 87), INT8_C( 7), INT8_C( 88), INT8_C( 63), INT8_C( 81), INT8_C( 71), INT8_C( 75), INT8_C( 49) }, { -INT8_C( 109), INT8_C( 60), -INT8_C( 15), INT8_C( 100), -INT8_C( 63), INT8_C( 56), INT8_C( 70), INT8_C( 4), -INT8_C( 56), -INT8_C( 27), -INT8_C( 105), -INT8_C( 7), INT8_C( 8), INT8_C( 99), -INT8_C( 25), INT8_C( 102), INT8_C( 58), INT8_C( 96), INT8_C( 48), -INT8_C( 107), INT8_C( 65), INT8_C( 90), -INT8_C( 41), -INT8_C( 21), INT8_C( 97), INT8_C( 47), INT8_C( 42), -INT8_C( 78), INT8_C( 118), INT8_C( 117), -INT8_C( 28), INT8_C( 9) }, { -INT8_C( 78), -INT8_C( 43), INT8_C( 110), INT8_C( 115), INT8_C( 13), -INT8_C( 76), INT8_C( 120), -INT8_C( 43), -INT8_C( 103), INT8_C( 15), -INT8_C( 49), -INT8_C( 94), INT8_C( 115), -INT8_C( 74), INT8_C( 8), -INT8_C( 83), INT8_C( 23), INT8_C( 57), INT8_C( 67), INT8_C( 88), -INT8_C( 109), INT8_C( 26), INT8_C( 67), -INT8_C( 12), INT8_C( 73), INT8_C( 110), -INT8_C( 90), -INT8_C( 65), -INT8_C( 29), -INT8_C( 118), -INT8_C( 56), -INT8_C( 107) }, { INT8_C( 0), -INT8_C( 29), INT8_C( 57), INT8_C( 0), INT8_C( 76), INT8_C( 0), INT8_C( 0), -INT8_C( 47), INT8_C( 0), -INT8_C( 76), INT8_C( 0), INT8_C( 110), INT8_C( 0), INT8_C( 0), -INT8_C( 43), INT8_C( 0), -INT8_C( 90), -INT8_C( 78), INT8_C( 23), -INT8_C( 31), INT8_C( 0), INT8_C( 88), INT8_C( 65), -INT8_C( 94), -INT8_C( 43), -INT8_C( 83), INT8_C( 0), INT8_C( 67), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { UINT32_C(1829320287), { -INT8_C( 22), -INT8_C( 127), INT8_C( 66), -INT8_C( 124), -INT8_C( 112), INT8_C( 17), INT8_C( 38), INT8_C( 3), -INT8_C( 56), INT8_C( 46), -INT8_C( 79), -INT8_C( 33), INT8_C( 103), -INT8_C( 12), INT8_C( 55), -INT8_C( 6), INT8_C( 14), INT8_C( 123), -INT8_C( 18), INT8_C( 87), -INT8_C( 23), -INT8_C( 107), INT8_C( 22), -INT8_C( 52), INT8_C( 31), -INT8_C( 34), INT8_C( 98), INT8_MAX, INT8_C( 21), INT8_C( 107), -INT8_C( 20), -INT8_C( 1) }, { -INT8_C( 20), INT8_C( 46), -INT8_C( 125), INT8_C( 124), INT8_C( 64), -INT8_C( 87), INT8_MIN, INT8_C( 8), -INT8_C( 40), INT8_C( 49), -INT8_C( 25), INT8_C( 63), INT8_C( 37), INT8_C( 30), INT8_C( 58), INT8_C( 51), -INT8_C( 103), INT8_C( 40), -INT8_C( 118), -INT8_C( 126), -INT8_C( 67), -INT8_C( 96), INT8_C( 79), -INT8_C( 35), INT8_C( 126), -INT8_C( 79), INT8_C( 92), -INT8_C( 109), INT8_C( 28), INT8_C( 72), -INT8_C( 109), INT8_C( 8) }, { INT8_C( 118), INT8_C( 22), -INT8_C( 124), -INT8_C( 74), -INT8_C( 64), INT8_C( 4), -INT8_C( 66), -INT8_C( 104), INT8_C( 53), -INT8_C( 91), -INT8_C( 41), INT8_C( 90), -INT8_C( 60), INT8_C( 17), -INT8_C( 115), INT8_C( 93), INT8_C( 58), INT8_C( 23), -INT8_C( 32), -INT8_C( 9), -INT8_C( 73), INT8_C( 47), -INT8_C( 44), INT8_C( 54), -INT8_C( 32), INT8_C( 48), -INT8_C( 55), -INT8_C( 4), INT8_C( 120), INT8_C( 92), INT8_C( 4), -INT8_C( 17) }, { -INT8_C( 60), -INT8_C( 115), -INT8_C( 124), INT8_C( 120), -INT8_C( 22), INT8_C( 0), -INT8_C( 22), INT8_C( 0), INT8_C( 0), INT8_C( 23), -INT8_C( 104), INT8_C( 0), INT8_C( 4), -INT8_C( 20), INT8_C( 0), INT8_C( 0), -INT8_C( 34), INT8_C( 0), INT8_C( 0), INT8_C( 66), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 4), INT8_C( 0), INT8_C( 21), INT8_C( 87), INT8_C( 0), -INT8_C( 56), INT8_C( 87), INT8_C( 0) } }, { UINT32_C( 866486387), { -INT8_C( 115), INT8_C( 100), -INT8_C( 53), -INT8_C( 62), INT8_C( 9), -INT8_C( 94), INT8_C( 29), -INT8_C( 51), -INT8_C( 76), -INT8_C( 86), INT8_C( 43), -INT8_C( 18), -INT8_C( 62), INT8_C( 11), -INT8_C( 27), INT8_C( 121), INT8_C( 58), -INT8_C( 70), -INT8_C( 81), INT8_C( 26), -INT8_C( 22), INT8_C( 121), INT8_C( 22), INT8_C( 99), -INT8_C( 43), INT8_C( 26), INT8_C( 82), INT8_C( 72), -INT8_C( 94), -INT8_C( 9), INT8_C( 123), INT8_C( 47) }, { INT8_C( 91), INT8_C( 70), -INT8_C( 14), INT8_C( 101), -INT8_C( 23), INT8_C( 15), INT8_C( 50), -INT8_C( 99), -INT8_C( 71), INT8_C( 93), -INT8_C( 117), INT8_C( 123), INT8_C( 104), INT8_C( 112), -INT8_C( 11), -INT8_C( 94), INT8_C( 42), -INT8_C( 92), -INT8_C( 68), INT8_C( 21), INT8_C( 29), -INT8_C( 46), INT8_C( 120), -INT8_C( 13), -INT8_C( 20), -INT8_C( 54), INT8_C( 59), -INT8_C( 113), -INT8_C( 63), -INT8_C( 73), -INT8_C( 66), INT8_C( 29) }, { -INT8_C( 3), -INT8_C( 80), -INT8_C( 126), -INT8_C( 26), -INT8_C( 65), -INT8_C( 76), -INT8_C( 125), INT8_C( 121), INT8_C( 18), INT8_C( 14), -INT8_C( 12), INT8_C( 122), INT8_MAX, -INT8_C( 23), INT8_C( 29), -INT8_C( 87), -INT8_C( 114), -INT8_C( 39), -INT8_C( 66), -INT8_C( 85), -INT8_C( 84), INT8_C( 54), -INT8_C( 98), -INT8_C( 104), INT8_C( 0), -INT8_C( 38), INT8_C( 39), -INT8_C( 62), -INT8_C( 111), -INT8_C( 26), -INT8_C( 33), -INT8_C( 114) }, { INT8_C( 72), INT8_C( 29), INT8_C( 0), INT8_C( 0), INT8_C( 14), INT8_C( 121), -INT8_C( 66), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 62), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 126), -INT8_C( 12), INT8_C( 0), -INT8_C( 111), INT8_C( 0), INT8_C( 0), -INT8_C( 81), INT8_C( 0), -INT8_C( 85), INT8_MAX, INT8_C( 43), INT8_C( 0), INT8_C( 0), INT8_C( 100), -INT8_C( 104), INT8_C( 0), INT8_C( 0) } }, { UINT32_C(1450533270), { INT8_C( 21), -INT8_C( 8), -INT8_C( 49), INT8_C( 39), INT8_C( 7), -INT8_C( 61), -INT8_C( 94), -INT8_C( 122), -INT8_C( 83), -INT8_C( 65), INT8_C( 47), INT8_C( 59), -INT8_C( 104), -INT8_C( 18), -INT8_C( 26), INT8_C( 68), INT8_C( 36), -INT8_C( 123), -INT8_C( 35), INT8_C( 37), INT8_C( 95), INT8_C( 4), -INT8_C( 25), -INT8_C( 16), -INT8_C( 22), -INT8_C( 58), INT8_C( 126), -INT8_C( 127), INT8_C( 39), -INT8_C( 13), -INT8_C( 41), INT8_C( 60) }, { -INT8_C( 20), -INT8_C( 90), INT8_C( 100), -INT8_C( 13), INT8_C( 105), INT8_C( 6), INT8_C( 121), INT8_C( 22), -INT8_C( 59), -INT8_C( 88), INT8_C( 81), INT8_C( 93), -INT8_C( 106), INT8_C( 56), -INT8_C( 94), -INT8_C( 69), -INT8_C( 67), INT8_MAX, -INT8_C( 32), INT8_C( 28), -INT8_C( 125), -INT8_C( 57), INT8_C( 12), INT8_C( 110), -INT8_C( 115), -INT8_C( 118), -INT8_C( 17), -INT8_C( 76), INT8_C( 126), -INT8_C( 58), -INT8_C( 16), INT8_C( 106) }, { INT8_C( 108), INT8_C( 84), INT8_C( 93), -INT8_C( 43), INT8_C( 90), -INT8_C( 42), -INT8_C( 20), INT8_C( 31), INT8_C( 126), INT8_C( 61), INT8_C( 125), INT8_C( 21), INT8_C( 117), INT8_C( 31), -INT8_C( 48), INT8_C( 50), -INT8_C( 98), -INT8_C( 80), INT8_C( 78), INT8_C( 33), INT8_C( 119), INT8_C( 90), -INT8_C( 113), INT8_C( 4), -INT8_C( 27), INT8_C( 126), -INT8_C( 72), INT8_C( 99), INT8_C( 68), -INT8_C( 88), -INT8_C( 51), -INT8_C( 80) }, { INT8_C( 0), -INT8_C( 20), INT8_C( 90), INT8_C( 0), INT8_C( 61), INT8_C( 0), INT8_C( 0), -INT8_C( 25), -INT8_C( 61), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 27), INT8_C( 93), INT8_C( 0), -INT8_C( 88), INT8_C( 0), INT8_C( 108), INT8_C( 0), INT8_C( 39), -INT8_C( 122), -INT8_C( 104), INT8_C( 0), INT8_C( 0), INT8_C( 47), INT8_C( 50), INT8_C( 0), -INT8_C( 51), INT8_C( 0), -INT8_C( 98), INT8_C( 0) } }, { UINT32_C(1468410621), { INT8_C( 0), INT8_C( 114), INT8_C( 119), INT8_C( 126), -INT8_C( 81), -INT8_C( 12), -INT8_C( 109), INT8_C( 37), INT8_C( 19), INT8_C( 99), INT8_C( 87), -INT8_C( 79), INT8_C( 19), -INT8_C( 90), -INT8_C( 46), -INT8_C( 118), INT8_C( 0), INT8_C( 98), -INT8_C( 114), -INT8_C( 27), -INT8_C( 32), INT8_C( 70), INT8_C( 72), INT8_C( 37), -INT8_C( 17), INT8_C( 21), -INT8_C( 43), -INT8_C( 20), INT8_C( 63), INT8_C( 91), INT8_C( 67), INT8_C( 63) }, { -INT8_C( 51), -INT8_C( 70), -INT8_C( 66), INT8_C( 125), -INT8_C( 82), INT8_C( 81), -INT8_C( 94), -INT8_C( 63), -INT8_C( 75), -INT8_C( 7), INT8_C( 114), -INT8_C( 56), -INT8_C( 97), INT8_C( 69), INT8_C( 83), -INT8_C( 96), -INT8_C( 89), -INT8_C( 31), -INT8_C( 123), -INT8_C( 121), INT8_C( 40), -INT8_C( 50), -INT8_C( 84), INT8_C( 23), -INT8_C( 29), -INT8_C( 126), INT8_C( 3), INT8_C( 35), -INT8_C( 35), INT8_C( 70), INT8_C( 98), -INT8_C( 85) }, { INT8_C( 1), INT8_C( 32), INT8_C( 40), -INT8_C( 81), INT8_C( 114), -INT8_C( 54), INT8_C( 113), INT8_C( 39), -INT8_C( 61), -INT8_C( 29), -INT8_C( 17), INT8_C( 99), INT8_C( 40), INT8_C( 66), INT8_C( 3), -INT8_C( 49), INT8_C( 36), -INT8_C( 120), INT8_C( 87), INT8_C( 76), INT8_C( 86), INT8_C( 3), INT8_C( 99), INT8_C( 58), -INT8_C( 123), INT8_C( 102), INT8_C( 93), INT8_C( 99), -INT8_C( 84), -INT8_C( 65), INT8_C( 14), -INT8_C( 83) }, { -INT8_C( 90), INT8_C( 0), INT8_C( 14), -INT8_C( 65), INT8_C( 3), INT8_C( 98), INT8_C( 40), INT8_C( 114), INT8_C( 0), INT8_C( 102), INT8_C( 0), INT8_C( 19), INT8_C( 0), -INT8_C( 12), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 32), -INT8_C( 12), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 37), -INT8_C( 81), INT8_C( 119), INT8_C( 126), INT8_C( 0), INT8_C( 91), INT8_C( 0), INT8_C( 40), INT8_C( 0) } }, { UINT32_C(1381840608), { INT8_C( 0), -INT8_C( 50), INT8_C( 121), -INT8_C( 61), -INT8_C( 79), INT8_C( 104), INT8_C( 38), -INT8_C( 38), -INT8_C( 85), INT8_C( 41), -INT8_C( 87), -INT8_C( 49), -INT8_C( 78), INT8_C( 0), INT8_C( 27), INT8_C( 8), INT8_C( 4), INT8_C( 126), INT8_C( 66), -INT8_C( 119), -INT8_C( 28), -INT8_C( 97), -INT8_C( 20), -INT8_C( 112), INT8_C( 95), -INT8_C( 6), INT8_C( 62), INT8_C( 63), INT8_C( 48), -INT8_C( 101), -INT8_C( 111), INT8_C( 48) }, { INT8_C( 105), INT8_C( 10), -INT8_C( 12), INT8_C( 26), INT8_C( 114), INT8_C( 26), -INT8_C( 12), INT8_C( 29), INT8_C( 68), -INT8_C( 98), -INT8_C( 20), -INT8_C( 10), -INT8_C( 98), INT8_C( 7), -INT8_C( 2), -INT8_C( 94), -INT8_C( 123), INT8_C( 65), INT8_C( 44), INT8_C( 105), -INT8_C( 32), INT8_C( 24), -INT8_C( 6), INT8_C( 63), INT8_C( 19), INT8_C( 56), INT8_C( 126), INT8_C( 67), -INT8_C( 45), INT8_C( 15), INT8_C( 116), INT8_C( 60) }, { INT8_C( 25), INT8_C( 104), INT8_C( 86), -INT8_C( 116), -INT8_C( 126), INT8_C( 75), -INT8_C( 87), -INT8_C( 58), -INT8_C( 23), -INT8_C( 106), -INT8_C( 68), -INT8_C( 121), -INT8_C( 99), -INT8_C( 69), INT8_C( 42), INT8_C( 35), -INT8_C( 4), INT8_C( 86), -INT8_C( 116), -INT8_C( 36), INT8_C( 110), -INT8_C( 122), INT8_C( 28), -INT8_C( 127), -INT8_C( 66), -INT8_C( 102), -INT8_C( 59), -INT8_C( 111), -INT8_C( 86), INT8_C( 57), -INT8_C( 51), -INT8_C( 61) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 62), INT8_C( 110), -INT8_C( 101), INT8_C( 0), -INT8_C( 111), -INT8_C( 99), INT8_C( 0), -INT8_C( 111), -INT8_C( 38), INT8_C( 0), INT8_C( 0), INT8_C( 104), INT8_C( 0), -INT8_C( 99), -INT8_C( 106), INT8_C( 25), INT8_C( 0), -INT8_C( 59), INT8_C( 0), INT8_C( 0), -INT8_C( 66), INT8_C( 0), INT8_C( 0), -INT8_C( 119), INT8_C( 0), INT8_C( 110), INT8_C( 0) } }, { UINT32_C( 592389281), { INT8_C( 111), -INT8_C( 7), -INT8_C( 22), INT8_C( 88), -INT8_C( 113), -INT8_C( 90), -INT8_C( 33), INT8_C( 44), INT8_C( 97), INT8_C( 9), INT8_C( 79), INT8_C( 93), INT8_C( 95), -INT8_C( 36), INT8_C( 58), -INT8_C( 50), INT8_C( 98), INT8_C( 86), INT8_C( 79), INT8_C( 33), -INT8_C( 16), INT8_C( 20), -INT8_C( 78), -INT8_C( 102), INT8_C( 77), INT8_MIN, INT8_C( 94), -INT8_C( 18), -INT8_C( 92), -INT8_C( 83), INT8_C( 18), INT8_C( 19) }, { -INT8_C( 90), -INT8_C( 4), INT8_C( 107), INT8_C( 53), -INT8_C( 94), INT8_C( 74), INT8_C( 98), INT8_C( 4), INT8_C( 84), -INT8_C( 79), INT8_C( 97), -INT8_C( 77), -INT8_C( 115), -INT8_C( 101), -INT8_C( 127), -INT8_C( 16), -INT8_C( 15), -INT8_C( 47), INT8_C( 17), -INT8_C( 30), -INT8_C( 27), -INT8_C( 61), INT8_C( 124), INT8_C( 51), INT8_C( 67), -INT8_C( 38), INT8_C( 33), -INT8_C( 25), -INT8_C( 120), INT8_C( 51), -INT8_C( 6), INT8_C( 46) }, { INT8_C( 47), INT8_C( 101), INT8_C( 100), -INT8_C( 46), -INT8_C( 80), -INT8_C( 58), -INT8_C( 42), INT8_C( 4), INT8_C( 119), INT8_C( 55), -INT8_C( 73), INT8_C( 5), -INT8_C( 45), INT8_C( 57), -INT8_C( 11), -INT8_C( 60), INT8_C( 10), INT8_C( 6), -INT8_C( 90), -INT8_C( 17), -INT8_C( 55), INT8_C( 35), INT8_C( 34), INT8_C( 13), -INT8_C( 3), INT8_C( 68), -INT8_C( 12), -INT8_C( 123), INT8_C( 119), -INT8_C( 17), -INT8_C( 76), -INT8_C( 89) }, { -INT8_C( 42), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 79), INT8_C( 0), -INT8_C( 113), INT8_C( 0), INT8_C( 0), INT8_C( 101), INT8_C( 0), INT8_C( 0), -INT8_C( 18), INT8_C( 0), INT8_C( 0), INT8_C( 6), INT8_C( 86), INT8_C( 86), INT8_C( 100), INT8_C( 0), INT8_C( 0), INT8_C( 119), INT8_C( 0), INT8_C( 88), INT8_C( 94), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 17), INT8_C( 0), INT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_mm256_loadu_epi8(test_vec[i].a); simde__m256i idx = simde_mm256_loadu_epi8(test_vec[i].idx); simde__m256i b = simde_mm256_loadu_epi8(test_vec[i].b); simde__m256i r = simde_mm256_maskz_permutex2var_epi8(test_vec[i].k, a, idx, b); simde_test_x86_assert_equal_i8x32(r, simde_mm256_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask32 k = simde_test_x86_random_mmask32(); simde__m256i a = simde_test_x86_random_i8x32(); simde__m256i idx = simde_test_x86_random_i8x32(); simde__m256i b = simde_test_x86_random_i8x32(); simde__m256i r = simde_mm256_maskz_permutex2var_epi8(k, a, idx, b); simde_test_x86_write_mmask32(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x32(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x32(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_permutex2var_pd(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde_float64 a[4]; const int64_t idx[4]; const simde_float64 b[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( 653.52), SIMDE_FLOAT64_C( 37.34), SIMDE_FLOAT64_C( -869.77), SIMDE_FLOAT64_C( 249.43) }, { -INT64_C( 1249660299835300958), -INT64_C( 6783122132230687666), INT64_C( 8762372594894733093), INT64_C( 2397211869628714028) }, { SIMDE_FLOAT64_C( -550.23), SIMDE_FLOAT64_C( 836.05), SIMDE_FLOAT64_C( -350.30), SIMDE_FLOAT64_C( -131.37) }, { SIMDE_FLOAT64_C( -869.77), SIMDE_FLOAT64_C( -350.30), SIMDE_FLOAT64_C( 836.05), SIMDE_FLOAT64_C( -550.23) } }, { { SIMDE_FLOAT64_C( -919.01), SIMDE_FLOAT64_C( -940.91), SIMDE_FLOAT64_C( 697.78), SIMDE_FLOAT64_C( -496.83) }, { -INT64_C( 5439186647591482232), INT64_C( 3786545457428747679), -INT64_C( 653289433062695601), INT64_C( 8574723873881791160) }, { SIMDE_FLOAT64_C( 431.03), SIMDE_FLOAT64_C( -983.72), SIMDE_FLOAT64_C( -53.49), SIMDE_FLOAT64_C( -124.98) }, { SIMDE_FLOAT64_C( -919.01), SIMDE_FLOAT64_C( -124.98), SIMDE_FLOAT64_C( -124.98), SIMDE_FLOAT64_C( -919.01) } }, { { SIMDE_FLOAT64_C( 487.29), SIMDE_FLOAT64_C( -601.59), SIMDE_FLOAT64_C( 255.35), SIMDE_FLOAT64_C( 815.21) }, { -INT64_C( 7715051790792277326), INT64_C( 8965630858134468542), INT64_C( 6183777361766462828), -INT64_C( 4983423165255767437) }, { SIMDE_FLOAT64_C( -811.29), SIMDE_FLOAT64_C( 642.31), SIMDE_FLOAT64_C( 802.34), SIMDE_FLOAT64_C( 362.56) }, { SIMDE_FLOAT64_C( 255.35), SIMDE_FLOAT64_C( 802.34), SIMDE_FLOAT64_C( -811.29), SIMDE_FLOAT64_C( 815.21) } }, { { SIMDE_FLOAT64_C( -225.44), SIMDE_FLOAT64_C( 766.27), SIMDE_FLOAT64_C( -341.10), SIMDE_FLOAT64_C( -245.01) }, { INT64_C( 1106247939824636186), INT64_C( 3348209423383890787), INT64_C( 6496402078733496677), INT64_C( 1918995237581462728) }, { SIMDE_FLOAT64_C( -669.64), SIMDE_FLOAT64_C( -902.85), SIMDE_FLOAT64_C( -323.30), SIMDE_FLOAT64_C( 455.88) }, { SIMDE_FLOAT64_C( -341.10), SIMDE_FLOAT64_C( -245.01), SIMDE_FLOAT64_C( -902.85), SIMDE_FLOAT64_C( -225.44) } }, { { SIMDE_FLOAT64_C( 416.46), SIMDE_FLOAT64_C( 73.37), SIMDE_FLOAT64_C( -22.74), SIMDE_FLOAT64_C( -361.93) }, { -INT64_C( 5708437547326762531), INT64_C( 8007195249340173183), INT64_C( 7925321014123484385), INT64_C( 2615656005526549704) }, { SIMDE_FLOAT64_C( 543.78), SIMDE_FLOAT64_C( -89.88), SIMDE_FLOAT64_C( 339.30), SIMDE_FLOAT64_C( -527.65) }, { SIMDE_FLOAT64_C( -89.88), SIMDE_FLOAT64_C( -527.65), SIMDE_FLOAT64_C( 73.37), SIMDE_FLOAT64_C( 416.46) } }, { { SIMDE_FLOAT64_C( -668.42), SIMDE_FLOAT64_C( 629.47), SIMDE_FLOAT64_C( 346.35), SIMDE_FLOAT64_C( 620.84) }, { -INT64_C( 115312252773083056), INT64_C( 6344969963767343912), INT64_C( 9168481947187483600), -INT64_C( 3476140653052036768) }, { SIMDE_FLOAT64_C( -79.85), SIMDE_FLOAT64_C( -613.21), SIMDE_FLOAT64_C( 554.92), SIMDE_FLOAT64_C( 759.78) }, { SIMDE_FLOAT64_C( -668.42), SIMDE_FLOAT64_C( -668.42), SIMDE_FLOAT64_C( -668.42), SIMDE_FLOAT64_C( -668.42) } }, { { SIMDE_FLOAT64_C( 971.65), SIMDE_FLOAT64_C( 672.93), SIMDE_FLOAT64_C( -151.29), SIMDE_FLOAT64_C( 782.54) }, { -INT64_C( 4305123536846776488), INT64_C( 5797284939722125876), INT64_C( 6799981624904777233), -INT64_C( 6189970833459960486) }, { SIMDE_FLOAT64_C( -861.21), SIMDE_FLOAT64_C( -653.94), SIMDE_FLOAT64_C( 293.52), SIMDE_FLOAT64_C( -423.18) }, { SIMDE_FLOAT64_C( 971.65), SIMDE_FLOAT64_C( -861.21), SIMDE_FLOAT64_C( 672.93), SIMDE_FLOAT64_C( -151.29) } }, { { SIMDE_FLOAT64_C( -413.93), SIMDE_FLOAT64_C( 60.42), SIMDE_FLOAT64_C( -250.99), SIMDE_FLOAT64_C( 258.92) }, { -INT64_C( 6843807250990663227), INT64_C( 7243915799452296935), -INT64_C( 3193013734401761612), -INT64_C( 2586673575026212018) }, { SIMDE_FLOAT64_C( 961.25), SIMDE_FLOAT64_C( 165.72), SIMDE_FLOAT64_C( -922.46), SIMDE_FLOAT64_C( 131.70) }, { SIMDE_FLOAT64_C( 165.72), SIMDE_FLOAT64_C( 131.70), SIMDE_FLOAT64_C( 961.25), SIMDE_FLOAT64_C( -922.46) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256i idx = simde_mm256_loadu_epi64(test_vec[i].idx); simde__m256d b = simde_mm256_loadu_pd(test_vec[i].b); simde__m256d r = simde_mm256_permutex2var_pd(a, idx, b); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256d a = simde_test_x86_random_f64x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256i idx = simde_test_x86_random_i64x4(); simde__m256d b = simde_test_x86_random_f64x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256d r = simde_mm256_permutex2var_pd(a, idx, b); simde_test_x86_write_f64x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x4(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask_permutex2var_pd(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde_float64 a[4]; const simde__mmask8 k; const int64_t idx[4]; const simde_float64 b[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( 408.46), SIMDE_FLOAT64_C( -602.21), SIMDE_FLOAT64_C( -10.41), SIMDE_FLOAT64_C( 978.30) }, UINT8_C( 60), { INT64_C( 3416731359858034982), INT64_C( 7216436906553012405), INT64_C( 7541654544883302300), INT64_C( 1709751513995410663) }, { SIMDE_FLOAT64_C( -397.99), SIMDE_FLOAT64_C( -628.51), SIMDE_FLOAT64_C( 977.27), SIMDE_FLOAT64_C( 803.25) }, { SIMDE_FLOAT64_C( 408.46), SIMDE_FLOAT64_C( -602.21), SIMDE_FLOAT64_C( -397.99), SIMDE_FLOAT64_C( 803.25) } }, { { SIMDE_FLOAT64_C( 542.89), SIMDE_FLOAT64_C( 82.07), SIMDE_FLOAT64_C( -662.48), SIMDE_FLOAT64_C( 808.21) }, UINT8_C(167), { INT64_C( 2140103026273695783), INT64_C( 4532133341351549351), INT64_C( 2110859038105297472), INT64_C( 6481569584712787732) }, { SIMDE_FLOAT64_C( -276.74), SIMDE_FLOAT64_C( -885.51), SIMDE_FLOAT64_C( 201.17), SIMDE_FLOAT64_C( -557.48) }, { SIMDE_FLOAT64_C( -557.48), SIMDE_FLOAT64_C( -557.48), SIMDE_FLOAT64_C( 542.89), SIMDE_FLOAT64_C( 808.21) } }, { { SIMDE_FLOAT64_C( 249.98), SIMDE_FLOAT64_C( 630.88), SIMDE_FLOAT64_C( -260.35), SIMDE_FLOAT64_C( 653.16) }, UINT8_C(188), { -INT64_C( 3138659908921160494), INT64_C( 7262350432791491181), -INT64_C( 4368227721405917994), -INT64_C( 3728915643820728736) }, { SIMDE_FLOAT64_C( 90.23), SIMDE_FLOAT64_C( 801.44), SIMDE_FLOAT64_C( 328.73), SIMDE_FLOAT64_C( -572.13) }, { SIMDE_FLOAT64_C( 249.98), SIMDE_FLOAT64_C( 630.88), SIMDE_FLOAT64_C( 328.73), SIMDE_FLOAT64_C( 249.98) } }, { { SIMDE_FLOAT64_C( -158.58), SIMDE_FLOAT64_C( -452.42), SIMDE_FLOAT64_C( 522.14), SIMDE_FLOAT64_C( -149.14) }, UINT8_C( 96), { INT64_C( 8926137098736344572), INT64_C( 6767016054276415384), -INT64_C( 1373277184724603408), INT64_C( 3597902442779912480) }, { SIMDE_FLOAT64_C( 404.05), SIMDE_FLOAT64_C( 374.29), SIMDE_FLOAT64_C( 776.88), SIMDE_FLOAT64_C( -254.86) }, { SIMDE_FLOAT64_C( -158.58), SIMDE_FLOAT64_C( -452.42), SIMDE_FLOAT64_C( 522.14), SIMDE_FLOAT64_C( -149.14) } }, { { SIMDE_FLOAT64_C( 453.36), SIMDE_FLOAT64_C( -266.79), SIMDE_FLOAT64_C( 626.35), SIMDE_FLOAT64_C( -321.54) }, UINT8_C(250), { INT64_C( 399016723643739987), -INT64_C( 3660688020061637650), INT64_C( 4271722174278189319), -INT64_C( 4501927894904627857) }, { SIMDE_FLOAT64_C( 510.82), SIMDE_FLOAT64_C( -911.97), SIMDE_FLOAT64_C( -156.49), SIMDE_FLOAT64_C( 3.35) }, { SIMDE_FLOAT64_C( 453.36), SIMDE_FLOAT64_C( -156.49), SIMDE_FLOAT64_C( 626.35), SIMDE_FLOAT64_C( 3.35) } }, { { SIMDE_FLOAT64_C( -104.93), SIMDE_FLOAT64_C( 203.31), SIMDE_FLOAT64_C( -83.08), SIMDE_FLOAT64_C( -818.74) }, UINT8_C(150), { INT64_C( 5374211117209507566), -INT64_C( 3193556578658030070), INT64_C( 9046890245521488382), -INT64_C( 1165280014248983460) }, { SIMDE_FLOAT64_C( -241.97), SIMDE_FLOAT64_C( 998.95), SIMDE_FLOAT64_C( 954.06), SIMDE_FLOAT64_C( -258.08) }, { SIMDE_FLOAT64_C( -104.93), SIMDE_FLOAT64_C( -83.08), SIMDE_FLOAT64_C( 954.06), SIMDE_FLOAT64_C( -818.74) } }, { { SIMDE_FLOAT64_C( -767.52), SIMDE_FLOAT64_C( -830.63), SIMDE_FLOAT64_C( 118.56), SIMDE_FLOAT64_C( 309.23) }, UINT8_MAX, { INT64_C( 5147061381747793979), -INT64_C( 5788950803670278838), -INT64_C( 3756671982009063594), INT64_C( 5849053409824658459) }, { SIMDE_FLOAT64_C( 669.56), SIMDE_FLOAT64_C( -178.67), SIMDE_FLOAT64_C( 767.90), SIMDE_FLOAT64_C( 281.87) }, { SIMDE_FLOAT64_C( 309.23), SIMDE_FLOAT64_C( 118.56), SIMDE_FLOAT64_C( 767.90), SIMDE_FLOAT64_C( 309.23) } }, { { SIMDE_FLOAT64_C( -48.74), SIMDE_FLOAT64_C( 874.09), SIMDE_FLOAT64_C( -369.38), SIMDE_FLOAT64_C( 530.18) }, UINT8_C(140), { -INT64_C( 7883790105709182274), -INT64_C( 5219924978399574315), INT64_C( 5287822894264335355), INT64_C( 5376005530964972767) }, { SIMDE_FLOAT64_C( -103.84), SIMDE_FLOAT64_C( -761.48), SIMDE_FLOAT64_C( 439.85), SIMDE_FLOAT64_C( -949.21) }, { SIMDE_FLOAT64_C( -48.74), SIMDE_FLOAT64_C( 874.09), SIMDE_FLOAT64_C( 530.18), SIMDE_FLOAT64_C( -949.21) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256i idx = simde_mm256_loadu_epi64(test_vec[i].idx); simde__m256d b = simde_mm256_loadu_pd(test_vec[i].b); simde__m256d r = simde_mm256_mask_permutex2var_pd(a, test_vec[i].k, idx, b); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256d a = simde_test_x86_random_f64x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256i idx = simde_test_x86_random_i64x4(); simde__m256d b = simde_test_x86_random_f64x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256d r = simde_mm256_mask_permutex2var_pd(a, k, idx, b); simde_test_x86_write_f64x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x4(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask2_permutex2var_pd(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde_float64 a[4]; const int64_t idx[4]; const simde__mmask8 k; const simde_float64 b[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( 601.13), SIMDE_FLOAT64_C( -769.86), SIMDE_FLOAT64_C( -702.22), SIMDE_FLOAT64_C( 615.42) }, { INT64_C( 4642678524425556460), INT64_C( 4647447238296616305), INT64_C( 4629981891913580544), INT64_C( 4642297477675834409) }, UINT8_C( 16), { SIMDE_FLOAT64_C( 810.92), SIMDE_FLOAT64_C( -60.42), SIMDE_FLOAT64_C( -19.80), SIMDE_FLOAT64_C( -884.45) }, { SIMDE_FLOAT64_C( 240.86), SIMDE_FLOAT64_C( 496.79), SIMDE_FLOAT64_C( 34.00), SIMDE_FLOAT64_C( 230.03) } }, { { SIMDE_FLOAT64_C( 488.07), SIMDE_FLOAT64_C( -193.68), SIMDE_FLOAT64_C( 58.80), SIMDE_FLOAT64_C( 417.59) }, { -INT64_C( 4574021060145363026), -INT64_C( 4582953756492205916), INT64_C( 4637794933579626578), INT64_C( 4635296843161319506) }, UINT8_C(176), { SIMDE_FLOAT64_C( -313.45), SIMDE_FLOAT64_C( -512.31), SIMDE_FLOAT64_C( 785.24), SIMDE_FLOAT64_C( -489.28) }, { SIMDE_FLOAT64_C( -698.01), SIMDE_FLOAT64_C( -176.62), SIMDE_FLOAT64_C( 115.03), SIMDE_FLOAT64_C( 79.53) } }, { { SIMDE_FLOAT64_C( -965.74), SIMDE_FLOAT64_C( -318.60), SIMDE_FLOAT64_C( -250.76), SIMDE_FLOAT64_C( 474.11) }, { -INT64_C( 4579953057318609879), -INT64_C( 4574446615125777449), INT64_C( 4649405864329871360), INT64_C( 4629129022734147256) }, UINT8_C(146), { SIMDE_FLOAT64_C( -54.89), SIMDE_FLOAT64_C( -473.24), SIMDE_FLOAT64_C( -0.21), SIMDE_FLOAT64_C( -824.86) }, { SIMDE_FLOAT64_C( -267.81), SIMDE_FLOAT64_C( -824.86), SIMDE_FLOAT64_C( 704.25), SIMDE_FLOAT64_C( 29.97) } }, { { SIMDE_FLOAT64_C( -826.47), SIMDE_FLOAT64_C( -189.29), SIMDE_FLOAT64_C( 114.72), SIMDE_FLOAT64_C( 153.74) }, { -INT64_C( 4588482628722244977), -INT64_C( 4577676452522602004), INT64_C( 4651655904924952166), INT64_C( 4651875895211437588) }, UINT8_MAX, { SIMDE_FLOAT64_C( -737.96), SIMDE_FLOAT64_C( -191.56), SIMDE_FLOAT64_C( 135.40), SIMDE_FLOAT64_C( 341.57) }, { SIMDE_FLOAT64_C( 341.57), SIMDE_FLOAT64_C( -737.96), SIMDE_FLOAT64_C( 135.40), SIMDE_FLOAT64_C( -737.96) } }, { { SIMDE_FLOAT64_C( -652.34), SIMDE_FLOAT64_C( 821.95), SIMDE_FLOAT64_C( 829.26), SIMDE_FLOAT64_C( -867.09) }, { -INT64_C( 4574290924279284367), INT64_C( 4650806818065518428), -INT64_C( 4582634634237360210), INT64_C( 4635464320772462346) }, UINT8_C(148), { SIMDE_FLOAT64_C( 546.50), SIMDE_FLOAT64_C( 432.28), SIMDE_FLOAT64_C( 41.87), SIMDE_FLOAT64_C( -423.53) }, { SIMDE_FLOAT64_C( -667.33), SIMDE_FLOAT64_C( 863.52), SIMDE_FLOAT64_C( 41.87), SIMDE_FLOAT64_C( 81.91) } }, { { SIMDE_FLOAT64_C( 398.07), SIMDE_FLOAT64_C( 986.98), SIMDE_FLOAT64_C( 103.23), SIMDE_FLOAT64_C( -602.14) }, { -INT64_C( 4572790662653416571), INT64_C( 4643576429601263452), INT64_C( 4641542421050808074), INT64_C( 4643577661054286561) }, UINT8_C(156), { SIMDE_FLOAT64_C( -865.17), SIMDE_FLOAT64_C( 879.61), SIMDE_FLOAT64_C( -609.45), SIMDE_FLOAT64_C( -880.11) }, { SIMDE_FLOAT64_C( -837.89), SIMDE_FLOAT64_C( 276.76), SIMDE_FLOAT64_C( 103.23), SIMDE_FLOAT64_C( 986.98) } }, { { SIMDE_FLOAT64_C( 899.99), SIMDE_FLOAT64_C( -347.41), SIMDE_FLOAT64_C( -71.66), SIMDE_FLOAT64_C( 35.39) }, { INT64_C( 4651955939657939681), INT64_C( 4643563059539869696), -INT64_C( 4584148969612063539), INT64_C( 4650454094735327887) }, UINT8_C(254), { SIMDE_FLOAT64_C( 190.02), SIMDE_FLOAT64_C( 686.93), SIMDE_FLOAT64_C( -776.79), SIMDE_FLOAT64_C( -728.07) }, { SIMDE_FLOAT64_C( 994.16), SIMDE_FLOAT64_C( 899.99), SIMDE_FLOAT64_C( 686.93), SIMDE_FLOAT64_C( -728.07) } }, { { SIMDE_FLOAT64_C( 24.56), SIMDE_FLOAT64_C( 769.71), SIMDE_FLOAT64_C( 704.21), SIMDE_FLOAT64_C( -933.57) }, { -INT64_C( 4574409759496014397), INT64_C( 4636897028403919585), -INT64_C( 4571834527341902561), INT64_C( 4646613896443692319) }, UINT8_C( 4), { SIMDE_FLOAT64_C( -784.48), SIMDE_FLOAT64_C( -273.82), SIMDE_FLOAT64_C( -291.30), SIMDE_FLOAT64_C( 492.35) }, { SIMDE_FLOAT64_C( -653.82), SIMDE_FLOAT64_C( 102.27), SIMDE_FLOAT64_C( 492.35), SIMDE_FLOAT64_C( 449.42) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256i idx = simde_mm256_loadu_epi64(test_vec[i].idx); simde__m256d b = simde_mm256_loadu_pd(test_vec[i].b); simde__m256d r = simde_mm256_mask2_permutex2var_pd(a, idx, test_vec[i].k, b); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256d a = simde_test_x86_random_f64x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256i idx = simde_mm256_castpd_si256(simde_test_x86_random_f64x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0))); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256d b = simde_test_x86_random_f64x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256d r = simde_mm256_mask2_permutex2var_pd(a, idx, k, b); simde_test_x86_write_f64x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x4(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_maskz_permutex2var_pd(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde__mmask8 k; const simde_float64 a[4]; const int64_t idx[4]; const simde_float64 b[4]; const simde_float64 r[4]; } test_vec[] = { { UINT8_C( 39), { SIMDE_FLOAT64_C( -156.47), SIMDE_FLOAT64_C( 371.97), SIMDE_FLOAT64_C( 547.23), SIMDE_FLOAT64_C( -36.58) }, { INT64_C( 6753824616870176200), -INT64_C( 5347421650059644827), -INT64_C( 2766651298058335315), -INT64_C( 829751754638706200) }, { SIMDE_FLOAT64_C( 995.09), SIMDE_FLOAT64_C( 340.65), SIMDE_FLOAT64_C( 507.87), SIMDE_FLOAT64_C( -810.93) }, { SIMDE_FLOAT64_C( -156.47), SIMDE_FLOAT64_C( 340.65), SIMDE_FLOAT64_C( 340.65), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C(185), { SIMDE_FLOAT64_C( -327.43), SIMDE_FLOAT64_C( -793.54), SIMDE_FLOAT64_C( 85.07), SIMDE_FLOAT64_C( 27.28) }, { INT64_C( 5107248257967419662), -INT64_C( 5387029748258741901), -INT64_C( 1110745756586663315), INT64_C( 8528066622377250933) }, { SIMDE_FLOAT64_C( -253.49), SIMDE_FLOAT64_C( 895.28), SIMDE_FLOAT64_C( 811.24), SIMDE_FLOAT64_C( 201.04) }, { SIMDE_FLOAT64_C( 811.24), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 895.28) } }, { UINT8_C(210), { SIMDE_FLOAT64_C( 246.22), SIMDE_FLOAT64_C( -428.67), SIMDE_FLOAT64_C( 297.94), SIMDE_FLOAT64_C( 529.93) }, { -INT64_C( 493534658799625651), -INT64_C( 7293148268721621634), INT64_C( 3256516990520695089), -INT64_C( 6082316498808888030) }, { SIMDE_FLOAT64_C( -631.46), SIMDE_FLOAT64_C( -104.08), SIMDE_FLOAT64_C( 913.70), SIMDE_FLOAT64_C( -989.30) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 913.70), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C( 27), { SIMDE_FLOAT64_C( -877.75), SIMDE_FLOAT64_C( -868.59), SIMDE_FLOAT64_C( -706.07), SIMDE_FLOAT64_C( 200.96) }, { INT64_C( 5764647894897061190), -INT64_C( 3476374108990408410), -INT64_C( 8965583818938487441), INT64_C( 4542505815359047998) }, { SIMDE_FLOAT64_C( -607.72), SIMDE_FLOAT64_C( -219.18), SIMDE_FLOAT64_C( 53.42), SIMDE_FLOAT64_C( -473.84) }, { SIMDE_FLOAT64_C( 53.42), SIMDE_FLOAT64_C( 53.42), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 53.42) } }, { UINT8_C(185), { SIMDE_FLOAT64_C( 642.03), SIMDE_FLOAT64_C( -935.14), SIMDE_FLOAT64_C( 326.35), SIMDE_FLOAT64_C( -815.01) }, { -INT64_C( 5958543503365406865), -INT64_C( 2286080838041795794), -INT64_C( 5750507269739966252), -INT64_C( 6036713302604050651) }, { SIMDE_FLOAT64_C( 166.43), SIMDE_FLOAT64_C( -281.36), SIMDE_FLOAT64_C( -331.66), SIMDE_FLOAT64_C( 400.53) }, { SIMDE_FLOAT64_C( 400.53), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -281.36) } }, { UINT8_C(107), { SIMDE_FLOAT64_C( -207.66), SIMDE_FLOAT64_C( 197.53), SIMDE_FLOAT64_C( 486.60), SIMDE_FLOAT64_C( 783.69) }, { -INT64_C( 544841097366101657), -INT64_C( 4725186751456117714), INT64_C( 4845776272224919341), INT64_C( 7271985175501621042) }, { SIMDE_FLOAT64_C( -819.08), SIMDE_FLOAT64_C( -481.47), SIMDE_FLOAT64_C( -484.90), SIMDE_FLOAT64_C( 165.06) }, { SIMDE_FLOAT64_C( 165.06), SIMDE_FLOAT64_C( -484.90), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 486.60) } }, { UINT8_C(181), { SIMDE_FLOAT64_C( -44.60), SIMDE_FLOAT64_C( 489.25), SIMDE_FLOAT64_C( -660.13), SIMDE_FLOAT64_C( 854.87) }, { INT64_C( 3876854457221982941), -INT64_C( 3878072601808059307), -INT64_C( 8777749705325593601), -INT64_C( 6374809217983053781) }, { SIMDE_FLOAT64_C( 679.12), SIMDE_FLOAT64_C( 77.15), SIMDE_FLOAT64_C( -278.01), SIMDE_FLOAT64_C( -26.22) }, { SIMDE_FLOAT64_C( 77.15), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -26.22), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C(125), { SIMDE_FLOAT64_C( -694.07), SIMDE_FLOAT64_C( 582.10), SIMDE_FLOAT64_C( 116.74), SIMDE_FLOAT64_C( 634.71) }, { INT64_C( 4068059970956742180), -INT64_C( 1457506108626191506), INT64_C( 904199777799200801), -INT64_C( 6336181479927091805) }, { SIMDE_FLOAT64_C( -926.60), SIMDE_FLOAT64_C( 256.82), SIMDE_FLOAT64_C( -405.58), SIMDE_FLOAT64_C( -741.28) }, { SIMDE_FLOAT64_C( -926.60), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 582.10), SIMDE_FLOAT64_C( 634.71) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256i idx = simde_mm256_loadu_epi64(test_vec[i].idx); simde__m256d b = simde_mm256_loadu_pd(test_vec[i].b); simde__m256d r = simde_mm256_maskz_permutex2var_pd(test_vec[i].k, a, idx, b); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256d a = simde_test_x86_random_f64x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256i idx = simde_test_x86_random_i64x4(); simde__m256d b = simde_test_x86_random_f64x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256d r = simde_mm256_maskz_permutex2var_pd(k, a, idx, b); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f64x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x4(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_permutex2var_ps(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde_float32 a[8]; const int32_t idx[8]; const simde_float32 b[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( -687.18), SIMDE_FLOAT32_C( -361.35), SIMDE_FLOAT32_C( 960.68), SIMDE_FLOAT32_C( -862.21), SIMDE_FLOAT32_C( 644.38), SIMDE_FLOAT32_C( 382.40), SIMDE_FLOAT32_C( -700.96), SIMDE_FLOAT32_C( 517.36) }, { INT32_C( 808957085), INT32_C( 35976047), -INT32_C( 1090028281), INT32_C( 389976670), -INT32_C( 1879169113), INT32_C( 1012962718), -INT32_C( 622028337), -INT32_C( 1727159556) }, { SIMDE_FLOAT32_C( -464.67), SIMDE_FLOAT32_C( 179.59), SIMDE_FLOAT32_C( -237.40), SIMDE_FLOAT32_C( -125.05), SIMDE_FLOAT32_C( -58.30), SIMDE_FLOAT32_C( 453.65), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( 962.17) }, { SIMDE_FLOAT32_C( 453.65), SIMDE_FLOAT32_C( 962.17), SIMDE_FLOAT32_C( 517.36), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( 517.36), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( 962.17), SIMDE_FLOAT32_C( -58.30) } }, { { SIMDE_FLOAT32_C( 246.84), SIMDE_FLOAT32_C( -174.75), SIMDE_FLOAT32_C( 263.04), SIMDE_FLOAT32_C( -474.89), SIMDE_FLOAT32_C( 715.74), SIMDE_FLOAT32_C( 512.37), SIMDE_FLOAT32_C( 563.60), SIMDE_FLOAT32_C( -502.42) }, { INT32_C( 26795619), INT32_C( 1128135028), -INT32_C( 1910691182), INT32_C( 589769420), -INT32_C( 1494617746), INT32_C( 1239724510), -INT32_C( 502078848), -INT32_C( 2132020707) }, { SIMDE_FLOAT32_C( 104.27), SIMDE_FLOAT32_C( -73.64), SIMDE_FLOAT32_C( -115.24), SIMDE_FLOAT32_C( 385.75), SIMDE_FLOAT32_C( 409.86), SIMDE_FLOAT32_C( 398.20), SIMDE_FLOAT32_C( -391.56), SIMDE_FLOAT32_C( 420.42) }, { SIMDE_FLOAT32_C( -474.89), SIMDE_FLOAT32_C( 715.74), SIMDE_FLOAT32_C( 263.04), SIMDE_FLOAT32_C( 409.86), SIMDE_FLOAT32_C( -391.56), SIMDE_FLOAT32_C( -391.56), SIMDE_FLOAT32_C( 246.84), SIMDE_FLOAT32_C( 398.20) } }, { { SIMDE_FLOAT32_C( -542.23), SIMDE_FLOAT32_C( -583.29), SIMDE_FLOAT32_C( -659.54), SIMDE_FLOAT32_C( -637.49), SIMDE_FLOAT32_C( -672.07), SIMDE_FLOAT32_C( 914.01), SIMDE_FLOAT32_C( -197.42), SIMDE_FLOAT32_C( 418.73) }, { -INT32_C( 1796881994), -INT32_C( 203503246), -INT32_C( 908726101), -INT32_C( 1270234909), -INT32_C( 1057371325), INT32_C( 1942978954), -INT32_C( 131568601), -INT32_C( 415825615) }, { SIMDE_FLOAT32_C( -723.20), SIMDE_FLOAT32_C( -523.25), SIMDE_FLOAT32_C( -293.61), SIMDE_FLOAT32_C( 365.09), SIMDE_FLOAT32_C( 922.67), SIMDE_FLOAT32_C( 782.98), SIMDE_FLOAT32_C( -750.69), SIMDE_FLOAT32_C( -373.69) }, { SIMDE_FLOAT32_C( -197.42), SIMDE_FLOAT32_C( -659.54), SIMDE_FLOAT32_C( 365.09), SIMDE_FLOAT32_C( -637.49), SIMDE_FLOAT32_C( -637.49), SIMDE_FLOAT32_C( -293.61), SIMDE_FLOAT32_C( 418.73), SIMDE_FLOAT32_C( -583.29) } }, { { SIMDE_FLOAT32_C( -578.69), SIMDE_FLOAT32_C( -354.78), SIMDE_FLOAT32_C( -899.97), SIMDE_FLOAT32_C( -815.01), SIMDE_FLOAT32_C( 50.38), SIMDE_FLOAT32_C( -646.57), SIMDE_FLOAT32_C( 52.93), SIMDE_FLOAT32_C( -618.09) }, { -INT32_C( 121578898), -INT32_C( 2090102948), INT32_C( 763073788), INT32_C( 1461039765), -INT32_C( 1265856050), INT32_C( 893760747), INT32_C( 1852022705), -INT32_C( 1318108094) }, { SIMDE_FLOAT32_C( -674.67), SIMDE_FLOAT32_C( 629.91), SIMDE_FLOAT32_C( 78.57), SIMDE_FLOAT32_C( -505.91), SIMDE_FLOAT32_C( -84.98), SIMDE_FLOAT32_C( -899.37), SIMDE_FLOAT32_C( 941.69), SIMDE_FLOAT32_C( -297.60) }, { SIMDE_FLOAT32_C( 941.69), SIMDE_FLOAT32_C( -84.98), SIMDE_FLOAT32_C( -84.98), SIMDE_FLOAT32_C( -646.57), SIMDE_FLOAT32_C( 941.69), SIMDE_FLOAT32_C( -505.91), SIMDE_FLOAT32_C( -354.78), SIMDE_FLOAT32_C( -899.97) } }, { { SIMDE_FLOAT32_C( -520.68), SIMDE_FLOAT32_C( -3.94), SIMDE_FLOAT32_C( -93.93), SIMDE_FLOAT32_C( 17.21), SIMDE_FLOAT32_C( 75.93), SIMDE_FLOAT32_C( 776.17), SIMDE_FLOAT32_C( 831.91), SIMDE_FLOAT32_C( 632.54) }, { INT32_C( 2041520782), -INT32_C( 2001734442), -INT32_C( 705293677), INT32_C( 1971742038), INT32_C( 1425027221), -INT32_C( 300945595), -INT32_C( 1792215191), -INT32_C( 2087665163) }, { SIMDE_FLOAT32_C( 552.07), SIMDE_FLOAT32_C( -945.13), SIMDE_FLOAT32_C( -859.22), SIMDE_FLOAT32_C( 992.87), SIMDE_FLOAT32_C( 272.29), SIMDE_FLOAT32_C( -366.30), SIMDE_FLOAT32_C( 507.98), SIMDE_FLOAT32_C( 695.32) }, { SIMDE_FLOAT32_C( 507.98), SIMDE_FLOAT32_C( 831.91), SIMDE_FLOAT32_C( 17.21), SIMDE_FLOAT32_C( 831.91), SIMDE_FLOAT32_C( 776.17), SIMDE_FLOAT32_C( 776.17), SIMDE_FLOAT32_C( -945.13), SIMDE_FLOAT32_C( 776.17) } }, { { SIMDE_FLOAT32_C( 92.39), SIMDE_FLOAT32_C( 147.79), SIMDE_FLOAT32_C( 677.41), SIMDE_FLOAT32_C( -430.22), SIMDE_FLOAT32_C( 294.21), SIMDE_FLOAT32_C( -289.87), SIMDE_FLOAT32_C( -627.49), SIMDE_FLOAT32_C( 70.54) }, { -INT32_C( 1752729006), -INT32_C( 762997143), -INT32_C( 2107068019), INT32_C( 1443297395), INT32_C( 1796211512), INT32_C( 1832014511), INT32_C( 696372875), INT32_C( 1130105840) }, { SIMDE_FLOAT32_C( -561.40), SIMDE_FLOAT32_C( 300.74), SIMDE_FLOAT32_C( 660.73), SIMDE_FLOAT32_C( 346.43), SIMDE_FLOAT32_C( -543.60), SIMDE_FLOAT32_C( 385.30), SIMDE_FLOAT32_C( 488.84), SIMDE_FLOAT32_C( 911.87) }, { SIMDE_FLOAT32_C( 677.41), SIMDE_FLOAT32_C( 300.74), SIMDE_FLOAT32_C( 385.30), SIMDE_FLOAT32_C( -430.22), SIMDE_FLOAT32_C( -561.40), SIMDE_FLOAT32_C( 911.87), SIMDE_FLOAT32_C( 346.43), SIMDE_FLOAT32_C( 92.39) } }, { { SIMDE_FLOAT32_C( -181.68), SIMDE_FLOAT32_C( 941.75), SIMDE_FLOAT32_C( 443.33), SIMDE_FLOAT32_C( 791.70), SIMDE_FLOAT32_C( -662.30), SIMDE_FLOAT32_C( -596.55), SIMDE_FLOAT32_C( -682.85), SIMDE_FLOAT32_C( -211.48) }, { INT32_C( 1103555474), -INT32_C( 911280066), -INT32_C( 1225642042), -INT32_C( 1040626117), -INT32_C( 1431251919), INT32_C( 1169191476), -INT32_C( 1077266020), INT32_C( 1511630280) }, { SIMDE_FLOAT32_C( -732.06), SIMDE_FLOAT32_C( -914.56), SIMDE_FLOAT32_C( -697.91), SIMDE_FLOAT32_C( 129.71), SIMDE_FLOAT32_C( -984.49), SIMDE_FLOAT32_C( -631.99), SIMDE_FLOAT32_C( 165.58), SIMDE_FLOAT32_C( 41.42) }, { SIMDE_FLOAT32_C( 443.33), SIMDE_FLOAT32_C( 165.58), SIMDE_FLOAT32_C( -682.85), SIMDE_FLOAT32_C( 129.71), SIMDE_FLOAT32_C( 941.75), SIMDE_FLOAT32_C( -662.30), SIMDE_FLOAT32_C( -984.49), SIMDE_FLOAT32_C( -732.06) } }, { { SIMDE_FLOAT32_C( 830.69), SIMDE_FLOAT32_C( 669.75), SIMDE_FLOAT32_C( 873.30), SIMDE_FLOAT32_C( 62.65), SIMDE_FLOAT32_C( 712.76), SIMDE_FLOAT32_C( 332.57), SIMDE_FLOAT32_C( -266.79), SIMDE_FLOAT32_C( 56.52) }, { INT32_C( 1437738273), INT32_C( 882598808), INT32_C( 1710450076), -INT32_C( 1698755318), -INT32_C( 999728148), INT32_C( 492896420), INT32_C( 1641133450), INT32_C( 610944515) }, { SIMDE_FLOAT32_C( -607.11), SIMDE_FLOAT32_C( -311.81), SIMDE_FLOAT32_C( -178.60), SIMDE_FLOAT32_C( -351.06), SIMDE_FLOAT32_C( 256.90), SIMDE_FLOAT32_C( 102.87), SIMDE_FLOAT32_C( 352.56), SIMDE_FLOAT32_C( -19.03) }, { SIMDE_FLOAT32_C( 669.75), SIMDE_FLOAT32_C( -607.11), SIMDE_FLOAT32_C( 256.90), SIMDE_FLOAT32_C( -178.60), SIMDE_FLOAT32_C( 256.90), SIMDE_FLOAT32_C( 712.76), SIMDE_FLOAT32_C( -178.60), SIMDE_FLOAT32_C( 62.65) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256i idx = simde_mm256_loadu_epi32(test_vec[i].idx); simde__m256 b = simde_mm256_loadu_ps(test_vec[i].b); simde__m256 r = simde_mm256_permutex2var_ps(a, idx, b); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256 a = simde_test_x86_random_f32x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256i idx = simde_test_x86_random_i32x8(); simde__m256 b = simde_test_x86_random_f32x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256 r = simde_mm256_permutex2var_ps(a, idx, b); simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask_permutex2var_ps(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde_float32 a[8]; const simde__mmask8 k; const int32_t idx[8]; const simde_float32 b[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( -500.35), SIMDE_FLOAT32_C( -158.53), SIMDE_FLOAT32_C( -736.15), SIMDE_FLOAT32_C( 328.90), SIMDE_FLOAT32_C( 904.19), SIMDE_FLOAT32_C( -390.12), SIMDE_FLOAT32_C( -573.91), SIMDE_FLOAT32_C( 52.35) }, UINT8_C(155), { -INT32_C( 2009011833), INT32_C( 1203531), INT32_C( 1963160367), -INT32_C( 85907235), -INT32_C( 1216748126), INT32_C( 1244763422), INT32_C( 1468249366), INT32_C( 1542629587) }, { SIMDE_FLOAT32_C( -918.50), SIMDE_FLOAT32_C( 46.12), SIMDE_FLOAT32_C( -348.62), SIMDE_FLOAT32_C( -497.40), SIMDE_FLOAT32_C( -832.95), SIMDE_FLOAT32_C( 759.08), SIMDE_FLOAT32_C( 2.10), SIMDE_FLOAT32_C( 197.46) }, { SIMDE_FLOAT32_C( 52.35), SIMDE_FLOAT32_C( -497.40), SIMDE_FLOAT32_C( -736.15), SIMDE_FLOAT32_C( 759.08), SIMDE_FLOAT32_C( -736.15), SIMDE_FLOAT32_C( -390.12), SIMDE_FLOAT32_C( -573.91), SIMDE_FLOAT32_C( 328.90) } }, { { SIMDE_FLOAT32_C( 641.76), SIMDE_FLOAT32_C( -87.43), SIMDE_FLOAT32_C( -129.45), SIMDE_FLOAT32_C( 355.96), SIMDE_FLOAT32_C( -313.43), SIMDE_FLOAT32_C( -63.46), SIMDE_FLOAT32_C( 758.35), SIMDE_FLOAT32_C( 281.97) }, UINT8_C(251), { INT32_C( 1327063737), INT32_C( 1214604179), -INT32_C( 1894007578), -INT32_C( 517900625), INT32_C( 1316093529), -INT32_C( 1128845093), INT32_C( 2076573029), -INT32_C( 160010179) }, { SIMDE_FLOAT32_C( -245.94), SIMDE_FLOAT32_C( -163.82), SIMDE_FLOAT32_C( 905.91), SIMDE_FLOAT32_C( 932.22), SIMDE_FLOAT32_C( 464.07), SIMDE_FLOAT32_C( -132.05), SIMDE_FLOAT32_C( -362.79), SIMDE_FLOAT32_C( -94.89) }, { SIMDE_FLOAT32_C( -163.82), SIMDE_FLOAT32_C( 355.96), SIMDE_FLOAT32_C( -129.45), SIMDE_FLOAT32_C( -94.89), SIMDE_FLOAT32_C( -163.82), SIMDE_FLOAT32_C( 932.22), SIMDE_FLOAT32_C( -63.46), SIMDE_FLOAT32_C( -132.05) } }, { { SIMDE_FLOAT32_C( 343.99), SIMDE_FLOAT32_C( 295.12), SIMDE_FLOAT32_C( -788.38), SIMDE_FLOAT32_C( -407.97), SIMDE_FLOAT32_C( -441.06), SIMDE_FLOAT32_C( 599.63), SIMDE_FLOAT32_C( -113.82), SIMDE_FLOAT32_C( -525.86) }, UINT8_C(136), { -INT32_C( 1704728727), -INT32_C( 1677778786), INT32_C( 1457027813), -INT32_C( 2144808976), -INT32_C( 1082880492), INT32_C( 69684795), INT32_C( 1094991287), -INT32_C( 1614160587) }, { SIMDE_FLOAT32_C( -745.97), SIMDE_FLOAT32_C( 8.13), SIMDE_FLOAT32_C( -635.67), SIMDE_FLOAT32_C( 718.50), SIMDE_FLOAT32_C( 196.38), SIMDE_FLOAT32_C( 899.62), SIMDE_FLOAT32_C( 854.97), SIMDE_FLOAT32_C( 876.49) }, { SIMDE_FLOAT32_C( 343.99), SIMDE_FLOAT32_C( 295.12), SIMDE_FLOAT32_C( -788.38), SIMDE_FLOAT32_C( 343.99), SIMDE_FLOAT32_C( -441.06), SIMDE_FLOAT32_C( 599.63), SIMDE_FLOAT32_C( -113.82), SIMDE_FLOAT32_C( 599.63) } }, { { SIMDE_FLOAT32_C( -99.32), SIMDE_FLOAT32_C( -578.21), SIMDE_FLOAT32_C( -669.58), SIMDE_FLOAT32_C( 18.98), SIMDE_FLOAT32_C( -163.39), SIMDE_FLOAT32_C( 538.40), SIMDE_FLOAT32_C( 973.44), SIMDE_FLOAT32_C( 157.34) }, UINT8_C( 62), { -INT32_C( 444958057), -INT32_C( 560169311), -INT32_C( 1626022462), -INT32_C( 765152346), INT32_C( 606062572), -INT32_C( 1663675963), -INT32_C( 1992132391), INT32_C( 852032923) }, { SIMDE_FLOAT32_C( 854.86), SIMDE_FLOAT32_C( -904.59), SIMDE_FLOAT32_C( 637.20), SIMDE_FLOAT32_C( -266.46), SIMDE_FLOAT32_C( 623.95), SIMDE_FLOAT32_C( -111.90), SIMDE_FLOAT32_C( -263.09), SIMDE_FLOAT32_C( -216.04) }, { SIMDE_FLOAT32_C( -99.32), SIMDE_FLOAT32_C( -578.21), SIMDE_FLOAT32_C( -669.58), SIMDE_FLOAT32_C( 973.44), SIMDE_FLOAT32_C( 623.95), SIMDE_FLOAT32_C( 538.40), SIMDE_FLOAT32_C( 973.44), SIMDE_FLOAT32_C( 157.34) } }, { { SIMDE_FLOAT32_C( 273.91), SIMDE_FLOAT32_C( -248.84), SIMDE_FLOAT32_C( -950.43), SIMDE_FLOAT32_C( -715.12), SIMDE_FLOAT32_C( -876.33), SIMDE_FLOAT32_C( 569.21), SIMDE_FLOAT32_C( -696.03), SIMDE_FLOAT32_C( 860.52) }, UINT8_C( 77), { INT32_C( 2031340840), INT32_C( 475180963), -INT32_C( 273163278), -INT32_C( 446240093), -INT32_C( 1247379967), INT32_C( 2135239028), -INT32_C( 767787957), -INT32_C( 1390418299) }, { SIMDE_FLOAT32_C( -166.66), SIMDE_FLOAT32_C( 463.92), SIMDE_FLOAT32_C( 560.35), SIMDE_FLOAT32_C( -132.52), SIMDE_FLOAT32_C( -120.73), SIMDE_FLOAT32_C( 594.17), SIMDE_FLOAT32_C( -542.24), SIMDE_FLOAT32_C( -353.88) }, { SIMDE_FLOAT32_C( -166.66), SIMDE_FLOAT32_C( -248.84), SIMDE_FLOAT32_C( -950.43), SIMDE_FLOAT32_C( -715.12), SIMDE_FLOAT32_C( -876.33), SIMDE_FLOAT32_C( 569.21), SIMDE_FLOAT32_C( -132.52), SIMDE_FLOAT32_C( 860.52) } }, { { SIMDE_FLOAT32_C( -411.22), SIMDE_FLOAT32_C( 577.61), SIMDE_FLOAT32_C( 921.81), SIMDE_FLOAT32_C( 222.79), SIMDE_FLOAT32_C( -380.98), SIMDE_FLOAT32_C( 52.37), SIMDE_FLOAT32_C( 952.21), SIMDE_FLOAT32_C( 297.63) }, UINT8_C(167), { -INT32_C( 1424241790), -INT32_C( 1929930224), -INT32_C( 1122907689), INT32_C( 460373736), INT32_C( 1560135652), INT32_C( 1706086714), -INT32_C( 1116100716), INT32_C( 107253635) }, { SIMDE_FLOAT32_C( -801.01), SIMDE_FLOAT32_C( 346.78), SIMDE_FLOAT32_C( 875.66), SIMDE_FLOAT32_C( 903.96), SIMDE_FLOAT32_C( -783.60), SIMDE_FLOAT32_C( -494.54), SIMDE_FLOAT32_C( -265.57), SIMDE_FLOAT32_C( -438.13) }, { SIMDE_FLOAT32_C( 921.81), SIMDE_FLOAT32_C( -411.22), SIMDE_FLOAT32_C( 297.63), SIMDE_FLOAT32_C( 222.79), SIMDE_FLOAT32_C( -380.98), SIMDE_FLOAT32_C( 875.66), SIMDE_FLOAT32_C( 952.21), SIMDE_FLOAT32_C( 222.79) } }, { { SIMDE_FLOAT32_C( 891.37), SIMDE_FLOAT32_C( -5.63), SIMDE_FLOAT32_C( -756.32), SIMDE_FLOAT32_C( -159.26), SIMDE_FLOAT32_C( -796.75), SIMDE_FLOAT32_C( -241.17), SIMDE_FLOAT32_C( 254.03), SIMDE_FLOAT32_C( -27.53) }, UINT8_C(230), { INT32_C( 1126172786), INT32_C( 1607959992), -INT32_C( 1897687809), INT32_C( 2011818488), INT32_C( 1133597850), INT32_C( 1421115979), INT32_C( 1327238704), -INT32_C( 181024637) }, { SIMDE_FLOAT32_C( 114.82), SIMDE_FLOAT32_C( -661.23), SIMDE_FLOAT32_C( -577.95), SIMDE_FLOAT32_C( 402.87), SIMDE_FLOAT32_C( 738.60), SIMDE_FLOAT32_C( -232.21), SIMDE_FLOAT32_C( 865.88), SIMDE_FLOAT32_C( 175.45) }, { SIMDE_FLOAT32_C( 891.37), SIMDE_FLOAT32_C( 114.82), SIMDE_FLOAT32_C( 175.45), SIMDE_FLOAT32_C( -159.26), SIMDE_FLOAT32_C( -796.75), SIMDE_FLOAT32_C( 402.87), SIMDE_FLOAT32_C( 891.37), SIMDE_FLOAT32_C( -159.26) } }, { { SIMDE_FLOAT32_C( -973.61), SIMDE_FLOAT32_C( -543.93), SIMDE_FLOAT32_C( 946.95), SIMDE_FLOAT32_C( 114.30), SIMDE_FLOAT32_C( 479.02), SIMDE_FLOAT32_C( -82.57), SIMDE_FLOAT32_C( 548.99), SIMDE_FLOAT32_C( -622.38) }, UINT8_C(165), { INT32_C( 670143140), INT32_C( 1431782726), INT32_C( 702064225), INT32_C( 838520539), INT32_C( 369918726), INT32_C( 934930027), -INT32_C( 1595189938), INT32_C( 205863528) }, { SIMDE_FLOAT32_C( 67.38), SIMDE_FLOAT32_C( -248.12), SIMDE_FLOAT32_C( -967.07), SIMDE_FLOAT32_C( -855.49), SIMDE_FLOAT32_C( -244.14), SIMDE_FLOAT32_C( -97.40), SIMDE_FLOAT32_C( 357.81), SIMDE_FLOAT32_C( 566.31) }, { SIMDE_FLOAT32_C( 479.02), SIMDE_FLOAT32_C( -543.93), SIMDE_FLOAT32_C( -543.93), SIMDE_FLOAT32_C( 114.30), SIMDE_FLOAT32_C( 479.02), SIMDE_FLOAT32_C( -855.49), SIMDE_FLOAT32_C( 548.99), SIMDE_FLOAT32_C( 67.38) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256i idx = simde_mm256_loadu_epi32(test_vec[i].idx); simde__m256 b = simde_mm256_loadu_ps(test_vec[i].b); simde__m256 r = simde_mm256_mask_permutex2var_ps(a, test_vec[i].k, idx, b); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256 a = simde_test_x86_random_f32x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256i idx = simde_test_x86_random_i32x8(); simde__m256 b = simde_test_x86_random_f32x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256 r = simde_mm256_mask_permutex2var_ps(a, k, idx, b); simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask2_permutex2var_ps(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde_float32 a[8]; const int32_t idx[8]; const simde__mmask8 k; const simde_float32 b[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 987.74), SIMDE_FLOAT32_C( 961.33), SIMDE_FLOAT32_C( -844.27), SIMDE_FLOAT32_C( 429.80), SIMDE_FLOAT32_C( -498.37), SIMDE_FLOAT32_C( 859.96), SIMDE_FLOAT32_C( 210.63), SIMDE_FLOAT32_C( 463.98) }, { -INT32_C( 1039940977), -INT32_C( 1014564454), INT32_C( 1128406712), INT32_C( 1147863859), INT32_C( 1137997906), -INT32_C( 1035639194), INT32_C( 1136168141), -INT32_C( 999731528) }, UINT8_C( 57), { SIMDE_FLOAT32_C( -722.82), SIMDE_FLOAT32_C( 626.03), SIMDE_FLOAT32_C( -758.72), SIMDE_FLOAT32_C( 562.87), SIMDE_FLOAT32_C( -22.83), SIMDE_FLOAT32_C( -138.83), SIMDE_FLOAT32_C( -369.75), SIMDE_FLOAT32_C( 729.06) }, { SIMDE_FLOAT32_C( 729.06), SIMDE_FLOAT32_C( -269.95), SIMDE_FLOAT32_C( 194.12), SIMDE_FLOAT32_C( 429.80), SIMDE_FLOAT32_C( -844.27), SIMDE_FLOAT32_C( 210.63), SIMDE_FLOAT32_C( 369.10), SIMDE_FLOAT32_C( -933.23) } }, { { SIMDE_FLOAT32_C( -105.90), SIMDE_FLOAT32_C( -225.25), SIMDE_FLOAT32_C( -515.09), SIMDE_FLOAT32_C( 796.70), SIMDE_FLOAT32_C( -867.43), SIMDE_FLOAT32_C( -948.77), SIMDE_FLOAT32_C( 784.44), SIMDE_FLOAT32_C( -906.11) }, { -INT32_C( 1002028237), INT32_C( 1129725297), -INT32_C( 1010156175), -INT32_C( 999733821), -INT32_C( 1005598638), -INT32_C( 999612416), INT32_C( 1107815301), INT32_C( 1125837701) }, UINT8_C( 35), { SIMDE_FLOAT32_C( -25.98), SIMDE_FLOAT32_C( -420.14), SIMDE_FLOAT32_C( -795.74), SIMDE_FLOAT32_C( -656.87), SIMDE_FLOAT32_C( -353.37), SIMDE_FLOAT32_C( -974.27), SIMDE_FLOAT32_C( -379.69), SIMDE_FLOAT32_C( -727.34) }, { SIMDE_FLOAT32_C( 796.70), SIMDE_FLOAT32_C( -225.25), SIMDE_FLOAT32_C( -404.48), SIMDE_FLOAT32_C( -933.09), SIMDE_FLOAT32_C( -575.13), SIMDE_FLOAT32_C( -105.90), SIMDE_FLOAT32_C( 33.98), SIMDE_FLOAT32_C( 154.92) } }, { { SIMDE_FLOAT32_C( -732.99), SIMDE_FLOAT32_C( -816.83), SIMDE_FLOAT32_C( 249.84), SIMDE_FLOAT32_C( 128.18), SIMDE_FLOAT32_C( -186.58), SIMDE_FLOAT32_C( -21.11), SIMDE_FLOAT32_C( -977.72), SIMDE_FLOAT32_C( 588.18) }, { INT32_C( 1139271598), INT32_C( 1145880248), INT32_C( 1144270684), INT32_C( 1140900332), INT32_C( 1142348513), INT32_C( 1145808978), INT32_C( 1144291000), -INT32_C( 1019849605) }, UINT8_C(133), { SIMDE_FLOAT32_C( 788.90), SIMDE_FLOAT32_C( 242.54), SIMDE_FLOAT32_C( -530.35), SIMDE_FLOAT32_C( -177.13), SIMDE_FLOAT32_C( -602.54), SIMDE_FLOAT32_C( 723.27), SIMDE_FLOAT32_C( 796.90), SIMDE_FLOAT32_C( -22.68) }, { SIMDE_FLOAT32_C( 796.90), SIMDE_FLOAT32_C( 818.98), SIMDE_FLOAT32_C( -602.54), SIMDE_FLOAT32_C( 515.03), SIMDE_FLOAT32_C( 603.42), SIMDE_FLOAT32_C( 814.63), SIMDE_FLOAT32_C( 721.98), SIMDE_FLOAT32_C( -177.13) } }, { { SIMDE_FLOAT32_C( 927.53), SIMDE_FLOAT32_C( -859.97), SIMDE_FLOAT32_C( 623.95), SIMDE_FLOAT32_C( 953.27), SIMDE_FLOAT32_C( -239.67), SIMDE_FLOAT32_C( 896.61), SIMDE_FLOAT32_C( -779.72), SIMDE_FLOAT32_C( -56.50) }, { INT32_C( 1125282611), INT32_C( 1135491809), INT32_C( 1144863621), -INT32_C( 1000691139), INT32_C( 1136221880), INT32_C( 1135381709), INT32_C( 1142114714), INT32_C( 1128118354) }, UINT8_C( 54), { SIMDE_FLOAT32_C( 104.18), SIMDE_FLOAT32_C( -206.85), SIMDE_FLOAT32_C( -119.52), SIMDE_FLOAT32_C( -173.84), SIMDE_FLOAT32_C( 610.81), SIMDE_FLOAT32_C( 290.63), SIMDE_FLOAT32_C( -384.94), SIMDE_FLOAT32_C( -146.65) }, { SIMDE_FLOAT32_C( 146.45), SIMDE_FLOAT32_C( -859.97), SIMDE_FLOAT32_C( 896.61), SIMDE_FLOAT32_C( -874.66), SIMDE_FLOAT32_C( 104.18), SIMDE_FLOAT32_C( 290.63), SIMDE_FLOAT32_C( 589.15), SIMDE_FLOAT32_C( 189.72) } }, { { SIMDE_FLOAT32_C( 760.28), SIMDE_FLOAT32_C( 437.93), SIMDE_FLOAT32_C( 250.81), SIMDE_FLOAT32_C( 483.55), SIMDE_FLOAT32_C( 234.83), SIMDE_FLOAT32_C( -771.87), SIMDE_FLOAT32_C( 411.08), SIMDE_FLOAT32_C( 374.86) }, { INT32_C( 1146422559), INT32_C( 1136012165), -INT32_C( 1000852521), INT32_C( 1144728617), INT32_C( 1142040494), INT32_C( 1117610312), -INT32_C( 1026443182), -INT32_C( 1031417364) }, UINT8_C(128), { SIMDE_FLOAT32_C( 20.49), SIMDE_FLOAT32_C( -696.17), SIMDE_FLOAT32_C( -819.28), SIMDE_FLOAT32_C( -390.37), SIMDE_FLOAT32_C( 493.55), SIMDE_FLOAT32_C( 246.56), SIMDE_FLOAT32_C( 713.81), SIMDE_FLOAT32_C( -713.30) }, { SIMDE_FLOAT32_C( 852.08), SIMDE_FLOAT32_C( 364.34), SIMDE_FLOAT32_C( -864.81), SIMDE_FLOAT32_C( 748.69), SIMDE_FLOAT32_C( 584.62), SIMDE_FLOAT32_C( 78.69), SIMDE_FLOAT32_C( -104.86), SIMDE_FLOAT32_C( 493.55) } }, { { SIMDE_FLOAT32_C( -872.96), SIMDE_FLOAT32_C( -460.02), SIMDE_FLOAT32_C( 897.52), SIMDE_FLOAT32_C( 417.67), SIMDE_FLOAT32_C( 155.04), SIMDE_FLOAT32_C( -249.13), SIMDE_FLOAT32_C( 177.95), SIMDE_FLOAT32_C( -407.03) }, { -INT32_C( 998665093), -INT32_C( 1012317880), INT32_C( 1146024755), -INT32_C( 1002402775), -INT32_C( 999826555), INT32_C( 1128966390), -INT32_C( 999979254), INT32_C( 1138390467) }, UINT8_C(139), { SIMDE_FLOAT32_C( 830.58), SIMDE_FLOAT32_C( 21.54), SIMDE_FLOAT32_C( -583.47), SIMDE_FLOAT32_C( -274.27), SIMDE_FLOAT32_C( 954.63), SIMDE_FLOAT32_C( 252.15), SIMDE_FLOAT32_C( 746.21), SIMDE_FLOAT32_C( -741.54) }, { SIMDE_FLOAT32_C( -274.27), SIMDE_FLOAT32_C( 830.58), SIMDE_FLOAT32_C( 827.80), SIMDE_FLOAT32_C( 21.54), SIMDE_FLOAT32_C( -927.43), SIMDE_FLOAT32_C( 202.66), SIMDE_FLOAT32_C( -918.11), SIMDE_FLOAT32_C( 417.67) } }, { { SIMDE_FLOAT32_C( 432.87), SIMDE_FLOAT32_C( -644.16), SIMDE_FLOAT32_C( 752.01), SIMDE_FLOAT32_C( -320.56), SIMDE_FLOAT32_C( -930.34), SIMDE_FLOAT32_C( -961.29), SIMDE_FLOAT32_C( -193.52), SIMDE_FLOAT32_C( -390.37) }, { INT32_C( 1147801272), -INT32_C( 1002309878), INT32_C( 1144990433), -INT32_C( 1013156741), INT32_C( 1137249157), -INT32_C( 1004497142), -INT32_C( 1013211791), -INT32_C( 999679263) }, UINT8_C( 80), { SIMDE_FLOAT32_C( -81.42), SIMDE_FLOAT32_C( -863.85), SIMDE_FLOAT32_C( 388.10), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 573.07), SIMDE_FLOAT32_C( -274.06), SIMDE_FLOAT32_C( -168.95), SIMDE_FLOAT32_C( -405.39) }, { SIMDE_FLOAT32_C( 936.23), SIMDE_FLOAT32_C( -775.86), SIMDE_FLOAT32_C( 764.67), SIMDE_FLOAT32_C( -312.91), SIMDE_FLOAT32_C( -961.29), SIMDE_FLOAT32_C( -642.36), SIMDE_FLOAT32_C( -644.16), SIMDE_FLOAT32_C( -936.42) } }, { { SIMDE_FLOAT32_C( 142.47), SIMDE_FLOAT32_C( 556.78), SIMDE_FLOAT32_C( -450.77), SIMDE_FLOAT32_C( -605.38), SIMDE_FLOAT32_C( 302.99), SIMDE_FLOAT32_C( -192.31), SIMDE_FLOAT32_C( 827.49), SIMDE_FLOAT32_C( 658.83) }, { -INT32_C( 1008982426), -INT32_C( 1007253258), INT32_C( 1144397660), -INT32_C( 1010250875), INT32_C( 1134343291), -INT32_C( 1004177326), -INT32_C( 1008161260), INT32_C( 1141269299) }, UINT8_C( 29), { SIMDE_FLOAT32_C( 221.73), SIMDE_FLOAT32_C( -60.36), SIMDE_FLOAT32_C( -539.57), SIMDE_FLOAT32_C( 910.50), SIMDE_FLOAT32_C( 3.22), SIMDE_FLOAT32_C( -354.13), SIMDE_FLOAT32_C( -170.93), SIMDE_FLOAT32_C( 139.37) }, { SIMDE_FLOAT32_C( 827.49), SIMDE_FLOAT32_C( -493.07), SIMDE_FLOAT32_C( 3.22), SIMDE_FLOAT32_C( -192.31), SIMDE_FLOAT32_C( 910.50), SIMDE_FLOAT32_C( -661.88), SIMDE_FLOAT32_C( -465.36), SIMDE_FLOAT32_C( 537.55) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256i idx = simde_mm256_loadu_epi32(test_vec[i].idx); simde__m256 b = simde_mm256_loadu_ps(test_vec[i].b); simde__m256 r = simde_mm256_mask2_permutex2var_ps(a, idx, test_vec[i].k, b); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256 a = simde_test_x86_random_f32x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256i idx = simde_mm256_castps_si256(simde_test_x86_random_f32x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0))); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256 b = simde_test_x86_random_f32x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256 r = simde_mm256_mask2_permutex2var_ps(a, idx, k, b); simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_maskz_permutex2var_ps(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde__mmask8 k; const simde_float32 a[8]; const int32_t idx[8]; const simde_float32 b[8]; const simde_float32 r[8]; } test_vec[] = { { UINT8_C(154), { SIMDE_FLOAT32_C( -270.29), SIMDE_FLOAT32_C( -118.45), SIMDE_FLOAT32_C( 527.46), SIMDE_FLOAT32_C( -793.46), SIMDE_FLOAT32_C( -217.24), SIMDE_FLOAT32_C( 139.69), SIMDE_FLOAT32_C( 175.32), SIMDE_FLOAT32_C( -568.86) }, { -INT32_C( 307649416), -INT32_C( 956032053), -INT32_C( 1754201414), INT32_C( 494816139), -INT32_C( 807894669), -INT32_C( 211153711), -INT32_C( 1251366856), INT32_C( 551614120) }, { SIMDE_FLOAT32_C( 280.16), SIMDE_FLOAT32_C( -576.05), SIMDE_FLOAT32_C( 981.41), SIMDE_FLOAT32_C( 341.99), SIMDE_FLOAT32_C( -28.10), SIMDE_FLOAT32_C( 523.12), SIMDE_FLOAT32_C( 828.32), SIMDE_FLOAT32_C( -577.67) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 341.99), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 341.99), SIMDE_FLOAT32_C( -793.46), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 280.16) } }, { UINT8_C( 19), { SIMDE_FLOAT32_C( 603.04), SIMDE_FLOAT32_C( 131.16), SIMDE_FLOAT32_C( 49.79), SIMDE_FLOAT32_C( 387.47), SIMDE_FLOAT32_C( 705.16), SIMDE_FLOAT32_C( -554.95), SIMDE_FLOAT32_C( 975.16), SIMDE_FLOAT32_C( -444.72) }, { -INT32_C( 1580847979), INT32_C( 1054522006), INT32_C( 451317539), -INT32_C( 105642385), INT32_C( 631052819), -INT32_C( 432473017), INT32_C( 1724897266), INT32_C( 693906580) }, { SIMDE_FLOAT32_C( 650.48), SIMDE_FLOAT32_C( 262.11), SIMDE_FLOAT32_C( -891.18), SIMDE_FLOAT32_C( 678.55), SIMDE_FLOAT32_C( -182.42), SIMDE_FLOAT32_C( 821.73), SIMDE_FLOAT32_C( 901.56), SIMDE_FLOAT32_C( -555.48) }, { SIMDE_FLOAT32_C( -554.95), SIMDE_FLOAT32_C( 975.16), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 387.47), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { UINT8_C( 52), { SIMDE_FLOAT32_C( 459.21), SIMDE_FLOAT32_C( 318.28), SIMDE_FLOAT32_C( -994.67), SIMDE_FLOAT32_C( 210.17), SIMDE_FLOAT32_C( 472.19), SIMDE_FLOAT32_C( -208.90), SIMDE_FLOAT32_C( -57.46), SIMDE_FLOAT32_C( -31.91) }, { INT32_C( 909135930), INT32_C( 1613305481), INT32_C( 318017510), INT32_C( 241835499), -INT32_C( 1930798872), -INT32_C( 1765741948), -INT32_C( 743545852), INT32_C( 985454848) }, { SIMDE_FLOAT32_C( -135.29), SIMDE_FLOAT32_C( 202.89), SIMDE_FLOAT32_C( -32.45), SIMDE_FLOAT32_C( 347.68), SIMDE_FLOAT32_C( -577.55), SIMDE_FLOAT32_C( 7.46), SIMDE_FLOAT32_C( 165.40), SIMDE_FLOAT32_C( 956.73) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -57.46), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -135.29), SIMDE_FLOAT32_C( 472.19), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { UINT8_C( 41), { SIMDE_FLOAT32_C( -237.09), SIMDE_FLOAT32_C( 780.91), SIMDE_FLOAT32_C( -308.51), SIMDE_FLOAT32_C( -917.56), SIMDE_FLOAT32_C( 255.56), SIMDE_FLOAT32_C( -418.58), SIMDE_FLOAT32_C( 510.78), SIMDE_FLOAT32_C( -591.24) }, { -INT32_C( 129918451), -INT32_C( 1090726054), INT32_C( 1589628805), INT32_C( 2022242699), INT32_C( 75185002), INT32_C( 1462658127), -INT32_C( 998489249), INT32_C( 1937869414) }, { SIMDE_FLOAT32_C( -136.89), SIMDE_FLOAT32_C( -744.82), SIMDE_FLOAT32_C( 523.71), SIMDE_FLOAT32_C( -482.42), SIMDE_FLOAT32_C( -767.32), SIMDE_FLOAT32_C( -565.02), SIMDE_FLOAT32_C( 70.98), SIMDE_FLOAT32_C( 934.33) }, { SIMDE_FLOAT32_C( -565.02), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -482.42), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 934.33), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { UINT8_C( 55), { SIMDE_FLOAT32_C( 314.41), SIMDE_FLOAT32_C( -697.33), SIMDE_FLOAT32_C( 53.09), SIMDE_FLOAT32_C( 796.64), SIMDE_FLOAT32_C( 535.71), SIMDE_FLOAT32_C( -979.67), SIMDE_FLOAT32_C( 246.41), SIMDE_FLOAT32_C( -883.56) }, { INT32_C( 445605046), -INT32_C( 713431406), -INT32_C( 314884765), -INT32_C( 2146324546), -INT32_C( 2112254182), -INT32_C( 927320116), -INT32_C( 1195475533), INT32_C( 1861820088) }, { SIMDE_FLOAT32_C( 578.08), SIMDE_FLOAT32_C( 297.87), SIMDE_FLOAT32_C( -199.20), SIMDE_FLOAT32_C( 802.62), SIMDE_FLOAT32_C( 669.47), SIMDE_FLOAT32_C( -315.45), SIMDE_FLOAT32_C( 239.56), SIMDE_FLOAT32_C( -367.47) }, { SIMDE_FLOAT32_C( 246.41), SIMDE_FLOAT32_C( 53.09), SIMDE_FLOAT32_C( 796.64), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -199.20), SIMDE_FLOAT32_C( 669.47), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { UINT8_C( 62), { SIMDE_FLOAT32_C( -529.68), SIMDE_FLOAT32_C( 697.84), SIMDE_FLOAT32_C( -990.51), SIMDE_FLOAT32_C( -216.18), SIMDE_FLOAT32_C( 626.27), SIMDE_FLOAT32_C( 322.97), SIMDE_FLOAT32_C( -378.98), SIMDE_FLOAT32_C( -927.72) }, { -INT32_C( 869502315), -INT32_C( 1451232212), -INT32_C( 798934862), -INT32_C( 1185689807), INT32_C( 1479043159), INT32_C( 2023160402), -INT32_C( 1991404871), -INT32_C( 1544995570) }, { SIMDE_FLOAT32_C( -688.25), SIMDE_FLOAT32_C( 472.47), SIMDE_FLOAT32_C( -265.39), SIMDE_FLOAT32_C( -234.11), SIMDE_FLOAT32_C( 893.13), SIMDE_FLOAT32_C( -132.62), SIMDE_FLOAT32_C( 834.04), SIMDE_FLOAT32_C( -58.19) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 893.13), SIMDE_FLOAT32_C( -990.51), SIMDE_FLOAT32_C( 697.84), SIMDE_FLOAT32_C( -927.72), SIMDE_FLOAT32_C( -990.51), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { UINT8_C( 38), { SIMDE_FLOAT32_C( 536.54), SIMDE_FLOAT32_C( 864.94), SIMDE_FLOAT32_C( 205.98), SIMDE_FLOAT32_C( 283.41), SIMDE_FLOAT32_C( -633.86), SIMDE_FLOAT32_C( 390.80), SIMDE_FLOAT32_C( 831.07), SIMDE_FLOAT32_C( -330.03) }, { INT32_C( 882533177), -INT32_C( 1947397383), -INT32_C( 1650886816), INT32_C( 1967930464), -INT32_C( 1719785557), -INT32_C( 1866450261), -INT32_C( 1505486905), INT32_C( 1659785000) }, { SIMDE_FLOAT32_C( 69.15), SIMDE_FLOAT32_C( 32.54), SIMDE_FLOAT32_C( -164.74), SIMDE_FLOAT32_C( -684.15), SIMDE_FLOAT32_C( 259.61), SIMDE_FLOAT32_C( -248.18), SIMDE_FLOAT32_C( 755.46), SIMDE_FLOAT32_C( -715.79) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 32.54), SIMDE_FLOAT32_C( 536.54), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -684.15), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { UINT8_C(250), { SIMDE_FLOAT32_C( -289.06), SIMDE_FLOAT32_C( -272.65), SIMDE_FLOAT32_C( -909.98), SIMDE_FLOAT32_C( -948.59), SIMDE_FLOAT32_C( 482.26), SIMDE_FLOAT32_C( 173.20), SIMDE_FLOAT32_C( -873.51), SIMDE_FLOAT32_C( 3.04) }, { -INT32_C( 2001383602), INT32_C( 659506447), -INT32_C( 665848183), INT32_C( 1821028836), -INT32_C( 888785593), INT32_C( 985989483), INT32_C( 2084905112), INT32_C( 1032256495) }, { SIMDE_FLOAT32_C( -471.86), SIMDE_FLOAT32_C( -34.05), SIMDE_FLOAT32_C( 306.29), SIMDE_FLOAT32_C( -141.42), SIMDE_FLOAT32_C( -784.81), SIMDE_FLOAT32_C( 156.97), SIMDE_FLOAT32_C( 656.49), SIMDE_FLOAT32_C( 639.22) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 639.22), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 482.26), SIMDE_FLOAT32_C( 3.04), SIMDE_FLOAT32_C( -141.42), SIMDE_FLOAT32_C( -471.86), SIMDE_FLOAT32_C( 639.22) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256i idx = simde_mm256_loadu_epi32(test_vec[i].idx); simde__m256 b = simde_mm256_loadu_ps(test_vec[i].b); simde__m256 r = simde_mm256_maskz_permutex2var_ps(test_vec[i].k, a, idx, b); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256 a = simde_test_x86_random_f32x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256i idx = simde_test_x86_random_i32x8(); simde__m256 b = simde_test_x86_random_f32x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256 r = simde_mm256_maskz_permutex2var_ps(k, a, idx, b); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_permutex2var_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[32]; const int16_t idx[32]; const int16_t b[32]; const int16_t r[32]; } test_vec[] = { { { -INT16_C( 18651), INT16_C( 19875), -INT16_C( 4478), INT16_C( 31873), INT16_C( 26710), -INT16_C( 10451), INT16_C( 2377), INT16_C( 28991), INT16_C( 20239), INT16_C( 31981), INT16_C( 18651), -INT16_C( 9085), INT16_C( 11710), -INT16_C( 26943), INT16_C( 2876), INT16_C( 25170), -INT16_C( 2622), INT16_C( 17839), INT16_C( 12515), INT16_C( 14785), -INT16_C( 4456), -INT16_C( 7920), INT16_C( 20471), INT16_C( 1618), INT16_C( 16287), INT16_C( 31362), INT16_C( 1671), INT16_C( 17750), INT16_C( 5939), INT16_C( 28636), INT16_C( 11810), -INT16_C( 6703) }, { -INT16_C( 32733), INT16_C( 1578), -INT16_C( 5200), INT16_C( 18496), INT16_C( 20697), -INT16_C( 12247), INT16_C( 31648), INT16_C( 16342), INT16_C( 22971), INT16_C( 17081), INT16_C( 3935), -INT16_C( 28024), INT16_C( 25639), INT16_C( 18689), -INT16_C( 11374), -INT16_C( 19154), INT16_C( 22611), INT16_C( 1211), -INT16_C( 1213), INT16_C( 7244), INT16_C( 30284), -INT16_C( 4884), -INT16_C( 15375), -INT16_C( 21461), -INT16_C( 7140), INT16_C( 31727), INT16_C( 30707), INT16_C( 6669), INT16_C( 3803), INT16_C( 28004), -INT16_C( 27935), INT16_C( 13602) }, { -INT16_C( 8725), INT16_C( 11833), -INT16_C( 31271), INT16_C( 9547), INT16_C( 14331), -INT16_C( 4847), INT16_C( 15610), INT16_C( 5785), -INT16_C( 30688), INT16_C( 5009), -INT16_C( 24833), -INT16_C( 9682), -INT16_C( 27987), -INT16_C( 29113), INT16_C( 26916), INT16_C( 4035), -INT16_C( 953), INT16_C( 8254), -INT16_C( 30334), INT16_C( 32069), INT16_C( 22208), -INT16_C( 17558), INT16_C( 1170), -INT16_C( 19759), INT16_C( 25484), -INT16_C( 29499), -INT16_C( 3327), -INT16_C( 20890), -INT16_C( 20859), -INT16_C( 21955), INT16_C( 23), INT16_C( 24249) }, { INT16_C( 9547), -INT16_C( 24833), -INT16_C( 953), -INT16_C( 18651), INT16_C( 31362), INT16_C( 5009), -INT16_C( 8725), INT16_C( 20471), -INT16_C( 20890), -INT16_C( 29499), -INT16_C( 6703), INT16_C( 20239), INT16_C( 5785), INT16_C( 19875), INT16_C( 12515), INT16_C( 26916), INT16_C( 14785), -INT16_C( 20890), INT16_C( 31873), INT16_C( 11710), INT16_C( 11710), -INT16_C( 27987), INT16_C( 8254), -INT16_C( 9682), INT16_C( 5939), INT16_C( 4035), INT16_C( 32069), -INT16_C( 26943), INT16_C( 17750), INT16_C( 14331), INT16_C( 11833), -INT16_C( 31271) } }, { { -INT16_C( 2051), INT16_C( 32638), -INT16_C( 15488), INT16_C( 16892), INT16_C( 26393), -INT16_C( 21508), -INT16_C( 12949), -INT16_C( 2211), INT16_C( 9008), INT16_C( 12931), -INT16_C( 5610), -INT16_C( 25376), INT16_C( 7576), -INT16_C( 20666), -INT16_C( 226), INT16_C( 6926), -INT16_C( 29449), INT16_C( 30618), -INT16_C( 27056), INT16_C( 27064), -INT16_C( 19203), INT16_C( 26645), INT16_C( 29314), -INT16_C( 19872), -INT16_C( 7275), -INT16_C( 21276), -INT16_C( 14899), INT16_C( 25928), -INT16_C( 28958), INT16_C( 21), INT16_C( 9101), -INT16_C( 31717) }, { -INT16_C( 19025), -INT16_C( 4), -INT16_C( 19380), INT16_C( 18793), INT16_C( 32361), -INT16_C( 5198), INT16_C( 4848), -INT16_C( 31075), -INT16_C( 32011), -INT16_C( 15566), INT16_C( 31303), INT16_C( 10536), INT16_C( 15624), -INT16_C( 27350), INT16_C( 17760), INT16_C( 4122), INT16_C( 5883), INT16_C( 18191), INT16_C( 30922), INT16_C( 13200), INT16_C( 17142), -INT16_C( 6370), -INT16_C( 17324), INT16_C( 19053), -INT16_C( 24770), -INT16_C( 31475), INT16_C( 13593), INT16_C( 8622), -INT16_C( 10125), -INT16_C( 11338), -INT16_C( 12258), INT16_C( 6627) }, { -INT16_C( 3098), -INT16_C( 20128), -INT16_C( 3989), INT16_C( 25316), INT16_C( 819), -INT16_C( 30903), -INT16_C( 18753), -INT16_C( 559), -INT16_C( 8619), INT16_C( 28290), INT16_C( 12308), -INT16_C( 30833), INT16_C( 17673), INT16_C( 10074), INT16_C( 15894), -INT16_C( 960), -INT16_C( 24527), -INT16_C( 25427), -INT16_C( 28016), -INT16_C( 15362), INT16_C( 18325), INT16_C( 21579), INT16_C( 7421), INT16_C( 21073), -INT16_C( 11269), INT16_C( 4032), INT16_C( 20227), INT16_C( 3222), -INT16_C( 3947), -INT16_C( 21709), INT16_C( 29486), INT16_C( 24487) }, { -INT16_C( 960), -INT16_C( 3947), INT16_C( 7576), INT16_C( 28290), INT16_C( 28290), -INT16_C( 28016), -INT16_C( 24527), INT16_C( 21), INT16_C( 21579), -INT16_C( 28016), -INT16_C( 2211), -INT16_C( 8619), INT16_C( 9008), INT16_C( 12308), -INT16_C( 3098), -INT16_C( 14899), INT16_C( 3222), INT16_C( 6926), -INT16_C( 5610), -INT16_C( 29449), INT16_C( 7421), INT16_C( 9101), -INT16_C( 19203), INT16_C( 10074), INT16_C( 29486), -INT16_C( 20666), -INT16_C( 21276), INT16_C( 15894), -INT16_C( 15362), INT16_C( 7421), INT16_C( 9101), INT16_C( 25316) } }, { { INT16_C( 21779), -INT16_C( 23300), -INT16_C( 1305), INT16_C( 31847), -INT16_C( 19902), INT16_C( 16336), INT16_C( 8655), -INT16_C( 13678), INT16_C( 21236), -INT16_C( 2087), INT16_C( 28578), INT16_C( 14084), INT16_C( 14175), -INT16_C( 28958), -INT16_C( 30293), -INT16_C( 16659), -INT16_C( 5666), -INT16_C( 15006), -INT16_C( 13596), INT16_C( 9793), INT16_C( 4476), INT16_C( 19301), -INT16_C( 2254), INT16_C( 9749), -INT16_C( 4534), -INT16_C( 5090), INT16_C( 8797), -INT16_C( 17117), INT16_C( 1369), INT16_C( 1099), INT16_C( 14478), INT16_C( 28099) }, { INT16_C( 9506), INT16_C( 1586), INT16_C( 29935), INT16_C( 27692), -INT16_C( 28283), -INT16_C( 18249), -INT16_C( 12919), -INT16_C( 11298), -INT16_C( 837), INT16_C( 6591), -INT16_C( 7650), INT16_C( 30934), INT16_C( 8679), INT16_C( 30076), INT16_C( 16217), INT16_C( 31714), INT16_C( 5477), INT16_C( 21633), -INT16_C( 21111), INT16_C( 3776), INT16_C( 30783), -INT16_C( 14138), -INT16_C( 23227), INT16_C( 155), INT16_C( 23201), -INT16_C( 16359), -INT16_C( 4292), INT16_C( 9016), -INT16_C( 19440), INT16_C( 27288), INT16_C( 31732), INT16_C( 23013) }, { INT16_C( 26512), INT16_C( 6573), INT16_C( 28180), INT16_C( 21287), -INT16_C( 4378), INT16_C( 11035), -INT16_C( 18797), INT16_C( 13355), INT16_C( 17680), INT16_C( 19700), INT16_C( 11316), INT16_C( 17775), INT16_C( 2273), -INT16_C( 10833), -INT16_C( 27517), INT16_C( 4910), -INT16_C( 9221), INT16_C( 4140), INT16_C( 21321), INT16_C( 12131), INT16_C( 32577), -INT16_C( 11174), -INT16_C( 31179), INT16_C( 17929), -INT16_C( 565), -INT16_C( 110), INT16_C( 554), INT16_C( 2884), -INT16_C( 3318), -INT16_C( 29216), INT16_C( 3720), -INT16_C( 31840) }, { INT16_C( 28180), INT16_C( 21321), INT16_C( 4910), INT16_C( 2273), INT16_C( 16336), INT16_C( 17929), -INT16_C( 2087), INT16_C( 14478), INT16_C( 2884), -INT16_C( 31840), INT16_C( 14478), -INT16_C( 2254), INT16_C( 13355), -INT16_C( 3318), -INT16_C( 5090), INT16_C( 28180), INT16_C( 11035), -INT16_C( 23300), -INT16_C( 2087), INT16_C( 21779), -INT16_C( 31840), INT16_C( 8655), INT16_C( 16336), -INT16_C( 17117), INT16_C( 6573), -INT16_C( 5090), -INT16_C( 3318), -INT16_C( 565), -INT16_C( 5666), -INT16_C( 4534), INT16_C( 32577), INT16_C( 11035) } }, { { -INT16_C( 13079), INT16_C( 13203), -INT16_C( 2273), INT16_C( 24930), -INT16_C( 17034), -INT16_C( 21707), INT16_C( 15939), INT16_C( 3825), -INT16_C( 31684), INT16_C( 26125), INT16_C( 21126), -INT16_C( 28559), INT16_C( 20805), -INT16_C( 13027), -INT16_C( 17057), INT16_C( 18513), -INT16_C( 7031), -INT16_C( 22405), -INT16_C( 8485), INT16_C( 20745), INT16_C( 16283), -INT16_C( 8451), -INT16_C( 4483), -INT16_C( 17940), -INT16_C( 1678), -INT16_C( 2017), -INT16_C( 28597), -INT16_C( 28280), -INT16_C( 23071), INT16_C( 16478), -INT16_C( 20638), -INT16_C( 5239) }, { INT16_C( 1172), INT16_C( 28564), -INT16_C( 25118), INT16_C( 32193), -INT16_C( 16676), INT16_C( 23131), INT16_C( 18348), INT16_C( 7955), INT16_C( 13121), -INT16_C( 29673), -INT16_C( 24381), -INT16_C( 23267), INT16_C( 31813), -INT16_C( 22299), INT16_C( 28203), -INT16_C( 16493), INT16_C( 10099), INT16_C( 21807), -INT16_C( 3899), -INT16_C( 24109), INT16_C( 11950), INT16_C( 23291), INT16_C( 3958), -INT16_C( 18567), -INT16_C( 28350), INT16_C( 1347), INT16_C( 24881), INT16_C( 30378), -INT16_C( 28451), INT16_C( 2078), -INT16_C( 19714), INT16_C( 29128) }, { -INT16_C( 2087), -INT16_C( 24889), -INT16_C( 25881), -INT16_C( 27328), INT16_C( 15304), INT16_C( 16111), INT16_C( 26954), -INT16_C( 29451), INT16_C( 14842), INT16_C( 11154), INT16_C( 15514), INT16_C( 30625), -INT16_C( 16180), -INT16_C( 13441), INT16_C( 18290), INT16_C( 19260), INT16_C( 830), INT16_C( 9706), INT16_C( 10909), INT16_C( 26298), -INT16_C( 21915), -INT16_C( 20316), -INT16_C( 26093), INT16_C( 3388), -INT16_C( 12589), INT16_C( 27960), -INT16_C( 9973), -INT16_C( 10268), INT16_C( 25497), INT16_C( 2978), -INT16_C( 8277), -INT16_C( 5801) }, { INT16_C( 16283), INT16_C( 16283), -INT16_C( 25881), INT16_C( 13203), -INT16_C( 23071), -INT16_C( 28280), -INT16_C( 16180), INT16_C( 20745), INT16_C( 13203), -INT16_C( 17940), INT16_C( 24930), INT16_C( 16478), -INT16_C( 21707), INT16_C( 16111), INT16_C( 30625), INT16_C( 20745), INT16_C( 26298), INT16_C( 19260), -INT16_C( 21707), INT16_C( 20745), INT16_C( 18290), -INT16_C( 10268), -INT16_C( 26093), INT16_C( 27960), -INT16_C( 2273), INT16_C( 24930), INT16_C( 9706), INT16_C( 15514), INT16_C( 16478), -INT16_C( 20638), -INT16_C( 8277), -INT16_C( 31684) } }, { { INT16_C( 16866), -INT16_C( 32753), -INT16_C( 13973), -INT16_C( 12058), -INT16_C( 30093), -INT16_C( 31104), -INT16_C( 17116), -INT16_C( 2157), -INT16_C( 13429), -INT16_C( 27036), INT16_C( 18597), INT16_C( 15982), INT16_C( 4268), INT16_C( 22346), -INT16_C( 24081), -INT16_C( 11712), INT16_C( 20450), INT16_C( 19794), INT16_C( 14361), -INT16_C( 29667), -INT16_C( 24894), -INT16_C( 6381), -INT16_C( 22949), -INT16_C( 6434), INT16_C( 17266), INT16_C( 6013), -INT16_C( 5237), INT16_C( 14165), -INT16_C( 24581), -INT16_C( 5234), -INT16_C( 12480), INT16_C( 8893) }, { INT16_C( 3870), INT16_C( 14191), -INT16_C( 29369), INT16_C( 2500), -INT16_C( 10453), -INT16_C( 30992), -INT16_C( 12419), -INT16_C( 4244), -INT16_C( 5870), -INT16_C( 25338), INT16_C( 23764), -INT16_C( 12075), INT16_C( 25595), INT16_C( 15547), INT16_C( 30770), INT16_C( 20830), -INT16_C( 12665), -INT16_C( 12664), INT16_C( 19547), -INT16_C( 31017), -INT16_C( 14301), -INT16_C( 24308), INT16_C( 30871), -INT16_C( 22128), -INT16_C( 26782), INT16_C( 13894), INT16_C( 7155), -INT16_C( 4602), -INT16_C( 16001), -INT16_C( 20182), -INT16_C( 30407), -INT16_C( 16382) }, { -INT16_C( 29865), -INT16_C( 19826), INT16_C( 26327), -INT16_C( 1224), INT16_C( 17454), -INT16_C( 14948), INT16_C( 11452), INT16_C( 7790), -INT16_C( 19261), -INT16_C( 18859), INT16_C( 23504), INT16_C( 20389), -INT16_C( 12515), INT16_C( 22016), INT16_C( 856), -INT16_C( 20713), -INT16_C( 23154), INT16_C( 25953), -INT16_C( 26357), INT16_C( 14688), -INT16_C( 803), -INT16_C( 25858), INT16_C( 27689), -INT16_C( 4936), INT16_C( 3361), -INT16_C( 3677), INT16_C( 18537), -INT16_C( 31168), INT16_C( 16407), INT16_C( 28892), -INT16_C( 3261), -INT16_C( 12001) }, { -INT16_C( 12480), -INT16_C( 20713), -INT16_C( 2157), -INT16_C( 30093), INT16_C( 20389), -INT16_C( 23154), INT16_C( 28892), -INT16_C( 12515), INT16_C( 14361), -INT16_C( 17116), -INT16_C( 24894), -INT16_C( 6381), -INT16_C( 31168), -INT16_C( 31168), -INT16_C( 26357), -INT16_C( 12480), -INT16_C( 2157), -INT16_C( 13429), INT16_C( 14165), -INT16_C( 6434), -INT16_C( 1224), INT16_C( 4268), -INT16_C( 6434), INT16_C( 20450), INT16_C( 26327), -INT16_C( 17116), INT16_C( 14688), -INT16_C( 17116), -INT16_C( 12001), INT16_C( 23504), -INT16_C( 3677), -INT16_C( 13973) } }, { { -INT16_C( 32359), -INT16_C( 23497), -INT16_C( 26854), -INT16_C( 1826), -INT16_C( 9068), -INT16_C( 17006), INT16_C( 19017), INT16_C( 27305), INT16_C( 19544), -INT16_C( 16037), -INT16_C( 25708), -INT16_C( 21433), INT16_C( 9179), INT16_C( 7964), INT16_C( 15127), -INT16_C( 20240), INT16_C( 10172), -INT16_C( 10412), INT16_C( 12991), INT16_C( 21455), INT16_C( 24847), INT16_C( 22544), -INT16_C( 18005), INT16_C( 962), INT16_C( 7430), -INT16_C( 25916), INT16_C( 3000), -INT16_C( 27834), INT16_C( 25135), INT16_C( 18098), -INT16_C( 23650), INT16_C( 23286) }, { INT16_C( 19146), -INT16_C( 30415), INT16_C( 125), -INT16_C( 29476), -INT16_C( 5023), INT16_C( 3556), -INT16_C( 22874), -INT16_C( 21488), -INT16_C( 10813), INT16_C( 31558), -INT16_C( 29216), INT16_C( 3854), -INT16_C( 15889), -INT16_C( 29355), INT16_C( 19300), INT16_C( 12008), INT16_C( 6550), INT16_C( 5048), -INT16_C( 27622), INT16_C( 31647), -INT16_C( 31871), INT16_C( 10120), -INT16_C( 26327), -INT16_C( 4909), INT16_C( 6510), INT16_C( 20071), INT16_C( 30118), -INT16_C( 27042), -INT16_C( 19658), -INT16_C( 26077), INT16_C( 3071), -INT16_C( 27191) }, { -INT16_C( 32475), INT16_C( 16296), INT16_C( 18197), -INT16_C( 26950), INT16_C( 17354), -INT16_C( 3139), -INT16_C( 28452), INT16_C( 19167), INT16_C( 18090), INT16_C( 20632), -INT16_C( 2373), -INT16_C( 3354), INT16_C( 2730), -INT16_C( 22132), INT16_C( 21781), INT16_C( 14910), -INT16_C( 6442), -INT16_C( 4999), INT16_C( 13357), -INT16_C( 2174), INT16_C( 16503), INT16_C( 21482), -INT16_C( 13872), INT16_C( 31389), INT16_C( 13583), -INT16_C( 13621), -INT16_C( 20180), -INT16_C( 10564), INT16_C( 18875), -INT16_C( 11905), -INT16_C( 16994), INT16_C( 29963) }, { -INT16_C( 25708), -INT16_C( 4999), -INT16_C( 11905), INT16_C( 25135), INT16_C( 16296), INT16_C( 17354), -INT16_C( 28452), INT16_C( 10172), -INT16_C( 1826), INT16_C( 19017), -INT16_C( 32475), INT16_C( 15127), INT16_C( 14910), INT16_C( 22544), INT16_C( 17354), INT16_C( 18090), -INT16_C( 18005), INT16_C( 13583), INT16_C( 3000), INT16_C( 23286), -INT16_C( 23497), INT16_C( 19544), INT16_C( 20632), INT16_C( 21455), INT16_C( 21781), INT16_C( 19167), -INT16_C( 28452), -INT16_C( 23650), -INT16_C( 13872), -INT16_C( 26950), INT16_C( 29963), -INT16_C( 16037) } }, { { -INT16_C( 31325), -INT16_C( 12191), -INT16_C( 7239), INT16_C( 12487), -INT16_C( 20189), -INT16_C( 2941), INT16_C( 8314), -INT16_C( 30354), INT16_C( 14677), -INT16_C( 32429), INT16_C( 4331), -INT16_C( 22953), -INT16_C( 10663), -INT16_C( 2185), -INT16_C( 31853), INT16_C( 13932), -INT16_C( 13048), -INT16_C( 16122), -INT16_C( 12879), -INT16_C( 11023), INT16_C( 29822), -INT16_C( 1848), INT16_C( 14228), -INT16_C( 5759), -INT16_C( 10896), INT16_C( 23403), -INT16_C( 15643), INT16_C( 15874), INT16_C( 31129), INT16_C( 11317), -INT16_C( 23812), INT16_C( 1123) }, { INT16_C( 26991), INT16_C( 8389), -INT16_C( 18889), -INT16_C( 18955), -INT16_C( 17110), -INT16_C( 16722), INT16_C( 12276), INT16_C( 26024), INT16_C( 4868), -INT16_C( 5696), -INT16_C( 15659), INT16_C( 28199), INT16_C( 23868), INT16_C( 14491), -INT16_C( 257), INT16_C( 28221), INT16_C( 615), -INT16_C( 24945), -INT16_C( 31559), -INT16_C( 7340), INT16_C( 577), INT16_C( 13986), INT16_C( 18993), INT16_C( 13979), INT16_C( 23389), INT16_C( 12831), INT16_C( 18206), INT16_C( 23201), INT16_C( 15524), -INT16_C( 23662), -INT16_C( 12486), -INT16_C( 24303) }, { -INT16_C( 24366), -INT16_C( 29888), -INT16_C( 27612), INT16_C( 26222), INT16_C( 4246), -INT16_C( 14436), INT16_C( 14170), -INT16_C( 18435), INT16_C( 7570), -INT16_C( 20246), -INT16_C( 29852), INT16_C( 2058), -INT16_C( 25145), INT16_C( 427), -INT16_C( 17300), INT16_C( 16034), -INT16_C( 7587), -INT16_C( 32311), INT16_C( 14454), INT16_C( 3303), -INT16_C( 31928), -INT16_C( 23596), -INT16_C( 11846), INT16_C( 19802), INT16_C( 17646), INT16_C( 21245), INT16_C( 2255), -INT16_C( 27046), INT16_C( 1445), INT16_C( 4503), INT16_C( 15042), INT16_C( 8016) }, { INT16_C( 16034), -INT16_C( 2941), INT16_C( 19802), -INT16_C( 23596), -INT16_C( 29852), -INT16_C( 17300), -INT16_C( 31928), INT16_C( 7570), -INT16_C( 20189), -INT16_C( 31325), -INT16_C( 1848), -INT16_C( 18435), INT16_C( 1445), INT16_C( 15874), INT16_C( 8016), INT16_C( 4503), -INT16_C( 18435), INT16_C( 13932), INT16_C( 21245), INT16_C( 29822), -INT16_C( 12191), -INT16_C( 27612), -INT16_C( 32311), INT16_C( 15874), INT16_C( 11317), INT16_C( 1123), -INT16_C( 23812), -INT16_C( 29888), INT16_C( 4246), -INT16_C( 12879), INT16_C( 2255), -INT16_C( 16122) } }, { { INT16_C( 6428), -INT16_C( 27744), -INT16_C( 30639), -INT16_C( 25953), INT16_C( 29451), -INT16_C( 14787), -INT16_C( 26811), INT16_C( 13075), INT16_C( 4316), -INT16_C( 21626), -INT16_C( 8168), -INT16_C( 17086), -INT16_C( 9754), -INT16_C( 22321), INT16_C( 7955), INT16_C( 12487), INT16_C( 26424), -INT16_C( 30013), INT16_C( 25327), -INT16_C( 1244), INT16_C( 25046), INT16_C( 7105), -INT16_C( 11016), -INT16_C( 11186), -INT16_C( 11036), -INT16_C( 640), -INT16_C( 15691), -INT16_C( 25670), -INT16_C( 30309), -INT16_C( 20669), INT16_C( 2728), -INT16_C( 7713) }, { -INT16_C( 23951), INT16_C( 24939), -INT16_C( 28924), -INT16_C( 9636), INT16_C( 7664), -INT16_C( 5899), INT16_C( 17649), -INT16_C( 10819), INT16_C( 15640), -INT16_C( 12846), -INT16_C( 29185), -INT16_C( 26008), -INT16_C( 21738), -INT16_C( 16567), INT16_C( 10421), INT16_C( 10144), INT16_C( 3018), -INT16_C( 12408), -INT16_C( 7014), -INT16_C( 30039), -INT16_C( 24831), -INT16_C( 3470), INT16_C( 12259), -INT16_C( 1081), -INT16_C( 26004), INT16_C( 27593), INT16_C( 12583), INT16_C( 15622), INT16_C( 20445), -INT16_C( 27908), -INT16_C( 25480), INT16_C( 17081) }, { INT16_C( 16807), INT16_C( 16657), -INT16_C( 17627), INT16_C( 9931), INT16_C( 15962), INT16_C( 15640), -INT16_C( 8083), -INT16_C( 9672), INT16_C( 378), -INT16_C( 24251), INT16_C( 19251), INT16_C( 4318), -INT16_C( 9317), INT16_C( 5026), INT16_C( 23671), INT16_C( 8021), INT16_C( 26525), -INT16_C( 15520), INT16_C( 11298), INT16_C( 31977), INT16_C( 618), -INT16_C( 10311), -INT16_C( 3614), INT16_C( 23729), -INT16_C( 2061), INT16_C( 9981), -INT16_C( 9406), -INT16_C( 8906), -INT16_C( 10058), INT16_C( 12016), INT16_C( 17972), -INT16_C( 11699) }, { -INT16_C( 15520), INT16_C( 4318), INT16_C( 29451), -INT16_C( 30309), INT16_C( 26525), -INT16_C( 10311), -INT16_C( 15520), INT16_C( 12016), -INT16_C( 11036), INT16_C( 25327), -INT16_C( 11699), INT16_C( 378), -INT16_C( 11016), -INT16_C( 21626), -INT16_C( 10311), INT16_C( 16807), -INT16_C( 8168), INT16_C( 4316), -INT16_C( 15691), -INT16_C( 24251), -INT16_C( 27744), INT16_C( 11298), INT16_C( 9931), INT16_C( 13075), -INT16_C( 9317), -INT16_C( 21626), -INT16_C( 9672), -INT16_C( 26811), -INT16_C( 20669), -INT16_C( 10058), -INT16_C( 2061), INT16_C( 9981) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i idx = simde_mm512_loadu_epi16(test_vec[i].idx); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_permutex2var_epi16(a, idx, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i a = simde_test_x86_random_i16x32(); simde__m512i idx = simde_test_x86_random_i16x32(); simde__m512i b = simde_test_x86_random_i16x32(); simde__m512i r = simde_mm512_permutex2var_epi16(a, idx, b); simde_test_x86_write_i16x32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x32(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_mask_permutex2var_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[32]; const simde__mmask32 k; const int16_t idx[32]; const int16_t b[32]; const int16_t r[32]; } test_vec[] = { { { -INT16_C( 21075), -INT16_C( 12395), INT16_C( 32473), INT16_C( 17227), INT16_C( 1152), INT16_C( 25115), -INT16_C( 13067), -INT16_C( 5954), -INT16_C( 17469), INT16_C( 1550), INT16_C( 17559), INT16_C( 19939), -INT16_C( 11235), INT16_C( 20859), -INT16_C( 14310), -INT16_C( 14557), -INT16_C( 18314), INT16_C( 20374), -INT16_C( 7881), -INT16_C( 18541), -INT16_C( 20763), -INT16_C( 9702), -INT16_C( 10118), INT16_C( 16067), -INT16_C( 11884), INT16_C( 11076), INT16_C( 10006), INT16_C( 13176), -INT16_C( 2821), INT16_C( 5508), -INT16_C( 22340), INT16_C( 13020) }, UINT32_C(2541908576), { INT16_C( 5459), INT16_C( 14415), INT16_C( 27075), INT16_C( 15635), -INT16_C( 10687), -INT16_C( 10885), -INT16_C( 16473), -INT16_C( 17152), INT16_C( 31207), -INT16_C( 7440), INT16_C( 30061), INT16_C( 10744), -INT16_C( 11235), INT16_C( 32092), -INT16_C( 8633), -INT16_C( 26091), INT16_C( 25843), -INT16_C( 18733), -INT16_C( 6451), INT16_C( 3827), INT16_C( 28604), INT16_C( 25572), -INT16_C( 7122), INT16_C( 5409), INT16_C( 4445), -INT16_C( 13576), -INT16_C( 3962), -INT16_C( 23564), INT16_C( 20676), INT16_C( 2849), INT16_C( 13870), INT16_C( 8614) }, { INT16_C( 31130), INT16_C( 26583), -INT16_C( 13729), INT16_C( 7029), INT16_C( 22841), INT16_C( 26750), -INT16_C( 24770), -INT16_C( 25731), INT16_C( 30129), INT16_C( 14182), INT16_C( 23141), INT16_C( 10971), -INT16_C( 854), -INT16_C( 10187), -INT16_C( 9422), -INT16_C( 13063), -INT16_C( 12204), -INT16_C( 19661), -INT16_C( 22374), -INT16_C( 11058), INT16_C( 19714), INT16_C( 16444), -INT16_C( 17940), -INT16_C( 25125), INT16_C( 16687), -INT16_C( 27435), -INT16_C( 20325), INT16_C( 17854), -INT16_C( 2900), -INT16_C( 8675), INT16_C( 5839), INT16_C( 9386) }, { -INT16_C( 21075), -INT16_C( 12395), INT16_C( 32473), INT16_C( 17227), INT16_C( 1152), INT16_C( 17854), -INT16_C( 25731), -INT16_C( 5954), -INT16_C( 17469), -INT16_C( 12204), INT16_C( 17559), INT16_C( 19939), INT16_C( 5508), -INT16_C( 2821), -INT16_C( 5954), -INT16_C( 14557), -INT16_C( 18314), -INT16_C( 18541), -INT16_C( 7881), -INT16_C( 18541), -INT16_C( 20763), -INT16_C( 9702), -INT16_C( 10118), INT16_C( 26583), INT16_C( 5508), INT16_C( 16687), -INT16_C( 13067), INT16_C( 13176), INT16_C( 1152), INT16_C( 5508), -INT16_C( 22340), -INT16_C( 24770) } }, { { -INT16_C( 8730), -INT16_C( 32297), -INT16_C( 22907), -INT16_C( 30891), -INT16_C( 28173), -INT16_C( 8249), -INT16_C( 23734), INT16_C( 31101), INT16_C( 21220), -INT16_C( 32754), -INT16_C( 13310), -INT16_C( 20795), -INT16_C( 7232), -INT16_C( 28532), INT16_C( 14073), -INT16_C( 8012), -INT16_C( 29933), -INT16_C( 26527), -INT16_C( 18895), INT16_C( 9248), -INT16_C( 6329), -INT16_C( 28412), -INT16_C( 32374), INT16_C( 28427), INT16_C( 6611), -INT16_C( 10769), -INT16_C( 19227), -INT16_C( 22909), INT16_C( 3991), -INT16_C( 28362), -INT16_C( 5563), INT16_C( 22641) }, UINT32_C(2817577589), { INT16_C( 4232), -INT16_C( 12341), -INT16_C( 12296), -INT16_C( 32160), INT16_C( 27472), INT16_C( 9201), -INT16_C( 8060), INT16_C( 27384), INT16_C( 31637), INT16_C( 11280), INT16_C( 18058), -INT16_C( 12355), INT16_C( 11824), -INT16_C( 23257), INT16_C( 6144), -INT16_C( 30644), INT16_C( 6184), INT16_C( 8279), -INT16_C( 18201), INT16_C( 14499), -INT16_C( 27613), -INT16_C( 22437), INT16_C( 21621), INT16_C( 2578), INT16_C( 8911), INT16_C( 23094), -INT16_C( 2968), -INT16_C( 26583), INT16_C( 20770), INT16_C( 9021), -INT16_C( 30103), -INT16_C( 28245) }, { INT16_C( 930), -INT16_C( 30286), INT16_C( 21947), -INT16_C( 8511), INT16_C( 7657), INT16_C( 24198), -INT16_C( 26511), INT16_C( 16488), -INT16_C( 24646), INT16_C( 8858), -INT16_C( 15213), -INT16_C( 19014), -INT16_C( 2027), INT16_C( 32472), -INT16_C( 31614), INT16_C( 9231), -INT16_C( 15993), INT16_C( 17069), INT16_C( 28438), INT16_C( 32), -INT16_C( 22644), -INT16_C( 674), -INT16_C( 14529), -INT16_C( 1475), -INT16_C( 10138), -INT16_C( 1764), -INT16_C( 10340), -INT16_C( 20050), -INT16_C( 30769), INT16_C( 20783), INT16_C( 15883), -INT16_C( 28043) }, { INT16_C( 21220), -INT16_C( 32297), -INT16_C( 10138), -INT16_C( 30891), -INT16_C( 29933), INT16_C( 17069), -INT16_C( 28173), INT16_C( 31101), INT16_C( 21220), -INT16_C( 29933), -INT16_C( 13310), -INT16_C( 20795), -INT16_C( 15993), -INT16_C( 28532), -INT16_C( 8730), -INT16_C( 7232), -INT16_C( 29933), -INT16_C( 26527), -INT16_C( 18895), INT16_C( 9248), -INT16_C( 8511), -INT16_C( 22909), -INT16_C( 674), -INT16_C( 18895), -INT16_C( 8012), -INT16_C( 14529), -INT16_C( 24646), -INT16_C( 22909), INT16_C( 3991), INT16_C( 20783), -INT16_C( 5563), -INT16_C( 19014) } }, { { INT16_C( 8704), INT16_C( 5844), -INT16_C( 2927), INT16_C( 7446), INT16_C( 30107), -INT16_C( 9446), INT16_C( 22588), -INT16_C( 23851), -INT16_C( 3792), -INT16_C( 13157), INT16_C( 18888), -INT16_C( 26755), -INT16_C( 21296), -INT16_C( 9240), INT16_C( 24042), -INT16_C( 5523), INT16_C( 16768), INT16_C( 4353), INT16_C( 5942), -INT16_C( 11985), INT16_C( 18828), -INT16_C( 14164), -INT16_C( 32351), -INT16_C( 11926), INT16_C( 1395), INT16_C( 15261), INT16_C( 6735), INT16_C( 8147), -INT16_C( 17466), -INT16_C( 19973), INT16_C( 26649), -INT16_C( 26213) }, UINT32_C(3769277610), { -INT16_C( 9804), INT16_C( 16561), INT16_C( 24099), -INT16_C( 15351), INT16_C( 29663), INT16_C( 21142), INT16_C( 13177), -INT16_C( 14194), INT16_C( 24910), INT16_C( 5351), -INT16_C( 7652), INT16_C( 13765), INT16_C( 24907), -INT16_C( 2610), INT16_C( 31229), -INT16_C( 20011), -INT16_C( 31150), INT16_C( 30194), -INT16_C( 1052), -INT16_C( 15302), -INT16_C( 12178), -INT16_C( 6378), -INT16_C( 23549), INT16_C( 20911), -INT16_C( 26875), INT16_C( 8806), INT16_C( 11129), -INT16_C( 15273), INT16_C( 9868), -INT16_C( 30023), -INT16_C( 29025), -INT16_C( 3781) }, { INT16_C( 11541), -INT16_C( 1689), -INT16_C( 24280), -INT16_C( 26691), -INT16_C( 11151), INT16_C( 29822), INT16_C( 11896), INT16_C( 32454), INT16_C( 11461), INT16_C( 16032), -INT16_C( 2217), -INT16_C( 7165), -INT16_C( 17379), -INT16_C( 17298), -INT16_C( 22197), INT16_C( 24750), INT16_C( 5591), -INT16_C( 167), INT16_C( 6070), INT16_C( 10134), INT16_C( 5611), INT16_C( 25499), INT16_C( 24899), INT16_C( 2273), -INT16_C( 32371), -INT16_C( 6842), INT16_C( 18809), -INT16_C( 26935), INT16_C( 14086), INT16_C( 20819), INT16_C( 480), -INT16_C( 18511) }, { INT16_C( 8704), -INT16_C( 167), -INT16_C( 2927), -INT16_C( 13157), INT16_C( 30107), -INT16_C( 32351), INT16_C( 22588), INT16_C( 24042), -INT16_C( 3792), -INT16_C( 13157), -INT16_C( 17466), -INT16_C( 9446), -INT16_C( 26755), -INT16_C( 9240), INT16_C( 24042), -INT16_C( 14164), INT16_C( 16768), INT16_C( 6070), INT16_C( 5942), INT16_C( 18809), INT16_C( 18828), -INT16_C( 32351), -INT16_C( 32351), INT16_C( 24750), INT16_C( 1395), INT16_C( 15261), INT16_C( 6735), INT16_C( 8147), -INT16_C( 17466), -INT16_C( 6842), -INT16_C( 26213), -INT16_C( 26935) } }, { { INT16_C( 2582), -INT16_C( 13129), INT16_C( 19745), INT16_C( 3315), -INT16_C( 29086), -INT16_C( 23184), INT16_C( 20976), INT16_C( 32173), -INT16_C( 2861), INT16_C( 19554), INT16_C( 11069), INT16_C( 17378), INT16_C( 13666), INT16_C( 17300), INT16_C( 17718), INT16_C( 19706), -INT16_C( 20144), INT16_C( 28952), INT16_C( 3071), INT16_C( 24958), -INT16_C( 4454), -INT16_C( 30201), -INT16_C( 19393), INT16_C( 4615), INT16_C( 27304), -INT16_C( 6562), INT16_C( 16789), -INT16_C( 2007), -INT16_C( 16778), -INT16_C( 21189), INT16_C( 13571), INT16_C( 21497) }, UINT32_C(3871675111), { INT16_C( 17181), -INT16_C( 18617), INT16_C( 20017), INT16_C( 28737), INT16_C( 18691), -INT16_C( 21629), -INT16_C( 7757), INT16_C( 18577), -INT16_C( 17630), -INT16_C( 26304), INT16_C( 31609), INT16_C( 31814), INT16_C( 16305), -INT16_C( 26416), -INT16_C( 27311), INT16_C( 28542), -INT16_C( 14888), INT16_C( 2342), INT16_C( 26644), INT16_C( 6009), -INT16_C( 847), INT16_C( 25794), INT16_C( 21726), INT16_C( 172), -INT16_C( 4849), -INT16_C( 30567), -INT16_C( 8344), INT16_C( 6404), -INT16_C( 11233), INT16_C( 28849), INT16_C( 12137), INT16_C( 16863) }, { INT16_C( 1781), INT16_C( 2378), -INT16_C( 15250), INT16_C( 7968), -INT16_C( 7488), -INT16_C( 24957), INT16_C( 12086), INT16_C( 17823), INT16_C( 14364), -INT16_C( 31283), -INT16_C( 11752), INT16_C( 14238), INT16_C( 20646), INT16_C( 4263), -INT16_C( 30849), INT16_C( 29777), -INT16_C( 25459), -INT16_C( 1155), -INT16_C( 25248), INT16_C( 8218), -INT16_C( 25216), -INT16_C( 18753), INT16_C( 24268), -INT16_C( 5636), -INT16_C( 13930), -INT16_C( 20882), INT16_C( 3227), INT16_C( 17125), -INT16_C( 29348), -INT16_C( 9134), -INT16_C( 23788), -INT16_C( 24240) }, { -INT16_C( 21189), INT16_C( 32173), -INT16_C( 1155), INT16_C( 3315), -INT16_C( 29086), INT16_C( 3315), INT16_C( 8218), INT16_C( 28952), -INT16_C( 2861), INT16_C( 2582), INT16_C( 11069), INT16_C( 17378), -INT16_C( 1155), INT16_C( 17300), INT16_C( 17718), INT16_C( 19706), INT16_C( 27304), INT16_C( 28952), -INT16_C( 4454), INT16_C( 24958), -INT16_C( 4454), -INT16_C( 30201), INT16_C( 13571), INT16_C( 20646), INT16_C( 27304), -INT16_C( 6562), INT16_C( 14364), -INT16_C( 2007), -INT16_C( 16778), -INT16_C( 1155), -INT16_C( 31283), INT16_C( 21497) } }, { { -INT16_C( 12737), -INT16_C( 24676), -INT16_C( 18837), -INT16_C( 5184), INT16_C( 32595), INT16_C( 8098), -INT16_C( 24867), INT16_C( 29448), INT16_C( 30311), INT16_C( 802), INT16_C( 1923), -INT16_C( 8379), -INT16_C( 26732), -INT16_C( 22341), INT16_C( 3130), INT16_C( 31305), -INT16_C( 6694), INT16_C( 17689), -INT16_C( 9829), -INT16_C( 4559), -INT16_C( 11432), INT16_C( 13582), INT16_C( 5745), -INT16_C( 10071), -INT16_C( 13427), INT16_C( 4315), INT16_C( 8402), INT16_C( 26607), -INT16_C( 21577), -INT16_C( 3569), INT16_C( 22967), -INT16_C( 28308) }, UINT32_C(3671491902), { INT16_C( 1887), -INT16_C( 18488), -INT16_C( 10534), INT16_C( 19437), -INT16_C( 26899), INT16_C( 31268), -INT16_C( 159), INT16_C( 13194), INT16_C( 31008), -INT16_C( 10342), -INT16_C( 21980), -INT16_C( 9271), INT16_C( 13571), INT16_C( 16748), INT16_C( 17339), INT16_C( 6683), -INT16_C( 7094), INT16_C( 9681), -INT16_C( 16710), -INT16_C( 22672), -INT16_C( 27564), -INT16_C( 19167), -INT16_C( 21612), -INT16_C( 19223), -INT16_C( 31963), INT16_C( 18827), INT16_C( 21805), INT16_C( 12325), -INT16_C( 28278), INT16_C( 17778), -INT16_C( 29228), INT16_C( 8031) }, { INT16_C( 12657), INT16_C( 11332), -INT16_C( 19217), INT16_C( 17619), -INT16_C( 2743), -INT16_C( 8711), -INT16_C( 7520), -INT16_C( 14959), INT16_C( 7270), -INT16_C( 27889), INT16_C( 13425), -INT16_C( 828), INT16_C( 14021), -INT16_C( 26047), -INT16_C( 24125), INT16_C( 13753), -INT16_C( 558), -INT16_C( 16031), INT16_C( 13489), -INT16_C( 1531), -INT16_C( 215), -INT16_C( 13609), INT16_C( 26849), INT16_C( 18319), -INT16_C( 24955), -INT16_C( 2341), -INT16_C( 24622), -INT16_C( 26382), INT16_C( 13525), -INT16_C( 26574), -INT16_C( 5163), -INT16_C( 22579) }, { -INT16_C( 12737), INT16_C( 30311), INT16_C( 8402), -INT16_C( 26047), -INT16_C( 26047), -INT16_C( 2743), -INT16_C( 24867), INT16_C( 29448), INT16_C( 12657), INT16_C( 802), -INT16_C( 2743), -INT16_C( 8379), -INT16_C( 26732), -INT16_C( 22341), INT16_C( 3130), INT16_C( 26607), -INT16_C( 6694), INT16_C( 17689), -INT16_C( 24622), -INT16_C( 4559), -INT16_C( 11432), INT16_C( 13582), -INT16_C( 11432), -INT16_C( 27889), -INT16_C( 13427), -INT16_C( 8379), INT16_C( 8402), -INT16_C( 8711), INT16_C( 1923), -INT16_C( 3569), -INT16_C( 11432), -INT16_C( 28308) } }, { { INT16_C( 12008), -INT16_C( 26264), INT16_C( 28259), -INT16_C( 29548), INT16_C( 27501), INT16_C( 20054), -INT16_C( 6444), INT16_C( 22934), INT16_C( 29060), INT16_C( 22351), INT16_C( 16912), -INT16_C( 6673), INT16_C( 8566), INT16_C( 19325), INT16_C( 19212), -INT16_C( 2830), INT16_C( 23161), -INT16_C( 9075), INT16_C( 8648), INT16_C( 13673), -INT16_C( 16499), INT16_C( 24964), INT16_C( 6821), INT16_C( 10938), INT16_C( 2443), -INT16_C( 25727), INT16_C( 28747), -INT16_C( 16000), -INT16_C( 623), -INT16_C( 25332), -INT16_C( 440), -INT16_C( 15727) }, UINT32_C( 564010585), { INT16_C( 1856), -INT16_C( 12969), -INT16_C( 9273), INT16_C( 27694), -INT16_C( 5899), -INT16_C( 32618), INT16_C( 6129), INT16_C( 15643), -INT16_C( 25721), INT16_C( 6398), INT16_C( 2968), -INT16_C( 7755), INT16_C( 17929), INT16_C( 25251), INT16_C( 16741), -INT16_C( 23164), -INT16_C( 9399), INT16_C( 4210), -INT16_C( 24394), -INT16_C( 21636), INT16_C( 5000), INT16_C( 31019), INT16_C( 17962), -INT16_C( 19786), -INT16_C( 18975), INT16_C( 31178), -INT16_C( 32576), -INT16_C( 13990), -INT16_C( 570), INT16_C( 11052), -INT16_C( 20417), -INT16_C( 30512) }, { INT16_C( 17035), INT16_C( 16792), INT16_C( 5346), INT16_C( 27372), INT16_C( 5927), INT16_C( 21220), -INT16_C( 26019), INT16_C( 15876), -INT16_C( 12721), INT16_C( 4023), INT16_C( 4686), INT16_C( 5593), INT16_C( 1295), INT16_C( 20032), INT16_C( 4533), INT16_C( 16598), INT16_C( 28243), INT16_C( 13953), INT16_C( 28035), -INT16_C( 21856), -INT16_C( 31612), -INT16_C( 7684), INT16_C( 31), INT16_C( 28191), -INT16_C( 10545), INT16_C( 7550), INT16_C( 22504), -INT16_C( 1998), INT16_C( 29532), INT16_C( 4422), INT16_C( 7556), -INT16_C( 10415) }, { INT16_C( 12008), -INT16_C( 26264), INT16_C( 28259), INT16_C( 4533), -INT16_C( 7684), INT16_C( 20054), INT16_C( 13953), INT16_C( 22934), INT16_C( 29060), INT16_C( 7556), INT16_C( 2443), -INT16_C( 7684), INT16_C( 22351), INT16_C( 19325), INT16_C( 19212), -INT16_C( 2830), INT16_C( 23161), INT16_C( 28035), INT16_C( 31), INT16_C( 29532), INT16_C( 29060), INT16_C( 24964), INT16_C( 6821), INT16_C( 31), INT16_C( 16792), -INT16_C( 25727), INT16_C( 28747), -INT16_C( 16000), -INT16_C( 623), INT16_C( 1295), -INT16_C( 440), -INT16_C( 15727) } }, { { -INT16_C( 11637), INT16_C( 3597), -INT16_C( 20929), -INT16_C( 15431), -INT16_C( 19150), INT16_C( 20900), -INT16_C( 15434), -INT16_C( 31296), INT16_C( 16025), -INT16_C( 32094), -INT16_C( 10859), -INT16_C( 3718), -INT16_C( 16312), -INT16_C( 13310), INT16_C( 21469), INT16_C( 27043), -INT16_C( 20187), INT16_C( 25719), INT16_C( 12383), -INT16_C( 28377), -INT16_C( 13338), -INT16_C( 25373), -INT16_C( 23666), INT16_C( 10017), -INT16_C( 15391), INT16_C( 30377), INT16_C( 9112), -INT16_C( 8089), INT16_C( 27108), -INT16_C( 15956), INT16_C( 20668), -INT16_C( 7894) }, UINT32_C(1615176193), { INT16_C( 27858), -INT16_C( 18191), -INT16_C( 11209), -INT16_C( 15020), INT16_C( 30071), INT16_C( 22764), -INT16_C( 27079), -INT16_C( 11826), INT16_C( 13753), -INT16_C( 25166), INT16_C( 24222), INT16_C( 23135), -INT16_C( 30290), -INT16_C( 20677), -INT16_C( 32725), -INT16_C( 497), INT16_C( 492), INT16_C( 9142), INT16_C( 3029), INT16_C( 19944), -INT16_C( 10880), -INT16_C( 18011), INT16_C( 29803), INT16_C( 9355), INT16_C( 15785), INT16_C( 18626), INT16_C( 8603), INT16_C( 19106), -INT16_C( 8534), -INT16_C( 10503), INT16_C( 2398), INT16_C( 19412) }, { -INT16_C( 30198), -INT16_C( 8338), INT16_C( 22421), INT16_C( 5676), -INT16_C( 11732), -INT16_C( 26673), INT16_C( 23110), -INT16_C( 4165), INT16_C( 32151), INT16_C( 13111), -INT16_C( 9570), INT16_C( 18813), INT16_C( 30392), INT16_C( 5663), -INT16_C( 3201), -INT16_C( 30367), -INT16_C( 12163), INT16_C( 4969), -INT16_C( 27353), INT16_C( 21289), -INT16_C( 1945), -INT16_C( 21014), -INT16_C( 23213), -INT16_C( 5475), -INT16_C( 11229), -INT16_C( 16099), -INT16_C( 25938), INT16_C( 26122), INT16_C( 10513), -INT16_C( 28547), -INT16_C( 8676), -INT16_C( 26086) }, { INT16_C( 12383), INT16_C( 3597), -INT16_C( 20929), -INT16_C( 15431), -INT16_C( 19150), INT16_C( 20900), -INT16_C( 15434), -INT16_C( 31296), INT16_C( 16025), -INT16_C( 27353), -INT16_C( 10859), -INT16_C( 3718), -INT16_C( 16312), INT16_C( 26122), INT16_C( 21469), INT16_C( 27043), INT16_C( 30392), INT16_C( 25719), -INT16_C( 25373), -INT16_C( 28377), -INT16_C( 13338), -INT16_C( 25373), INT16_C( 18813), INT16_C( 10017), -INT16_C( 15391), INT16_C( 30377), INT16_C( 9112), -INT16_C( 8089), INT16_C( 27108), -INT16_C( 16099), INT16_C( 20668), -INT16_C( 7894) } }, { { -INT16_C( 31826), -INT16_C( 10835), -INT16_C( 10728), -INT16_C( 32728), INT16_C( 4814), INT16_C( 8493), -INT16_C( 13640), -INT16_C( 9460), INT16_C( 10655), INT16_C( 19868), -INT16_C( 22588), -INT16_C( 10828), INT16_C( 12752), -INT16_C( 4763), INT16_C( 32527), -INT16_C( 16761), INT16_C( 13314), INT16_C( 7059), -INT16_C( 17398), -INT16_C( 10085), -INT16_C( 14130), -INT16_C( 30982), INT16_C( 1683), INT16_C( 12897), -INT16_C( 465), -INT16_C( 3201), INT16_C( 13221), INT16_C( 30152), INT16_C( 11876), INT16_C( 29794), -INT16_C( 5715), -INT16_C( 20430) }, UINT32_C( 667665693), { INT16_C( 26241), INT16_C( 20480), -INT16_C( 1490), -INT16_C( 15914), INT16_C( 14336), INT16_C( 12275), INT16_C( 29494), -INT16_C( 9437), -INT16_C( 5210), INT16_C( 2896), -INT16_C( 19687), -INT16_C( 14465), -INT16_C( 20068), -INT16_C( 17801), INT16_C( 17014), -INT16_C( 1823), -INT16_C( 7768), -INT16_C( 10680), INT16_C( 7899), -INT16_C( 9320), -INT16_C( 29866), -INT16_C( 29685), INT16_C( 12030), -INT16_C( 23193), -INT16_C( 18407), INT16_C( 13232), INT16_C( 12139), INT16_C( 2042), INT16_C( 29152), INT16_C( 22209), -INT16_C( 23629), INT16_C( 23374) }, { -INT16_C( 27004), INT16_C( 24625), -INT16_C( 13899), INT16_C( 2875), INT16_C( 18005), INT16_C( 21400), -INT16_C( 140), -INT16_C( 28936), -INT16_C( 22345), INT16_C( 8897), -INT16_C( 17449), -INT16_C( 18646), -INT16_C( 5332), -INT16_C( 8434), INT16_C( 23694), INT16_C( 4922), INT16_C( 27635), -INT16_C( 22413), -INT16_C( 20939), -INT16_C( 30029), INT16_C( 19445), INT16_C( 27101), -INT16_C( 10677), INT16_C( 759), -INT16_C( 18306), INT16_C( 22053), INT16_C( 20339), -INT16_C( 24819), INT16_C( 6970), -INT16_C( 13954), -INT16_C( 18312), INT16_C( 27612) }, { -INT16_C( 10835), -INT16_C( 10835), INT16_C( 23694), INT16_C( 1683), -INT16_C( 31826), INT16_C( 8493), -INT16_C( 13640), -INT16_C( 9460), -INT16_C( 140), INT16_C( 19868), -INT16_C( 3201), -INT16_C( 10828), INT16_C( 12752), -INT16_C( 4763), -INT16_C( 10677), INT16_C( 24625), -INT16_C( 22345), INT16_C( 10655), -INT16_C( 17398), -INT16_C( 465), -INT16_C( 14130), -INT16_C( 30982), -INT16_C( 18312), -INT16_C( 28936), -INT16_C( 3201), INT16_C( 27635), -INT16_C( 18646), INT16_C( 30152), INT16_C( 11876), -INT16_C( 10835), -INT16_C( 5715), -INT16_C( 20430) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i idx = simde_mm512_loadu_epi16(test_vec[i].idx); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_mask_permutex2var_epi16(a, test_vec[i].k, idx, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i a = simde_test_x86_random_i16x32(); simde__mmask32 k = simde_test_x86_random_mmask32(); simde__m512i idx = simde_test_x86_random_i16x32(); simde__m512i b = simde_test_x86_random_i16x32(); simde__m512i r = simde_mm512_mask_permutex2var_epi16(a, k, idx, b); simde_test_x86_write_i16x32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask32(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x32(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_mask2_permutex2var_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[32]; const int16_t idx[32]; const simde__mmask32 k; const int16_t b[32]; const int16_t r[32]; } test_vec[] = { { { INT16_C( 20260), INT16_C( 22803), -INT16_C( 14595), -INT16_C( 3357), -INT16_C( 16366), INT16_C( 23900), INT16_C( 21398), INT16_C( 5471), -INT16_C( 31732), INT16_C( 32619), INT16_C( 30931), INT16_C( 3615), -INT16_C( 25196), INT16_C( 3287), -INT16_C( 19626), INT16_C( 31351), -INT16_C( 30206), -INT16_C( 45), -INT16_C( 18864), INT16_C( 25330), INT16_C( 20086), INT16_C( 3519), INT16_C( 8097), -INT16_C( 21214), -INT16_C( 29277), INT16_C( 30509), INT16_C( 19461), -INT16_C( 26235), INT16_C( 23785), INT16_C( 16293), INT16_C( 7183), INT16_C( 4537) }, { -INT16_C( 29530), -INT16_C( 2288), INT16_C( 578), -INT16_C( 18087), INT16_C( 6480), -INT16_C( 3386), -INT16_C( 6088), -INT16_C( 9313), -INT16_C( 13195), INT16_C( 31314), -INT16_C( 10472), INT16_C( 532), -INT16_C( 18125), INT16_C( 16961), -INT16_C( 1066), INT16_C( 31827), INT16_C( 25735), -INT16_C( 13709), -INT16_C( 12954), -INT16_C( 18557), INT16_C( 18918), INT16_C( 7849), INT16_C( 18481), -INT16_C( 22791), INT16_C( 19477), INT16_C( 11552), INT16_C( 13347), INT16_C( 22319), INT16_C( 29166), -INT16_C( 15207), -INT16_C( 4756), -INT16_C( 3264) }, UINT32_C(3082662993), { INT16_C( 16513), INT16_C( 26478), INT16_C( 6025), -INT16_C( 17787), INT16_C( 32352), INT16_C( 30048), -INT16_C( 32310), -INT16_C( 4446), -INT16_C( 11595), -INT16_C( 23739), -INT16_C( 8637), -INT16_C( 20633), -INT16_C( 22325), INT16_C( 7330), INT16_C( 24668), -INT16_C( 8748), INT16_C( 17056), INT16_C( 10820), -INT16_C( 13990), -INT16_C( 17692), INT16_C( 17735), INT16_C( 4655), -INT16_C( 11834), INT16_C( 31488), INT16_C( 17827), -INT16_C( 6625), -INT16_C( 31197), -INT16_C( 4203), INT16_C( 14382), -INT16_C( 30197), -INT16_C( 8296), INT16_C( 14439) }, { -INT16_C( 32310), -INT16_C( 2288), INT16_C( 578), -INT16_C( 18087), -INT16_C( 30206), -INT16_C( 3386), INT16_C( 17827), -INT16_C( 9313), -INT16_C( 13195), INT16_C( 31314), -INT16_C( 29277), INT16_C( 532), -INT16_C( 17692), INT16_C( 22803), -INT16_C( 1066), INT16_C( 25330), INT16_C( 5471), -INT16_C( 13709), -INT16_C( 32310), -INT16_C( 3357), -INT16_C( 32310), -INT16_C( 23739), INT16_C( 18481), -INT16_C( 6625), INT16_C( 3519), INT16_C( 16513), -INT16_C( 17787), INT16_C( 22319), INT16_C( 24668), INT16_C( 30509), -INT16_C( 4756), INT16_C( 20260) } }, { { -INT16_C( 21726), INT16_C( 31842), INT16_C( 18292), -INT16_C( 17354), INT16_C( 25996), INT16_C( 21198), -INT16_C( 12746), -INT16_C( 9523), -INT16_C( 5101), INT16_C( 14016), INT16_C( 22131), -INT16_C( 24283), INT16_C( 12686), INT16_C( 9772), -INT16_C( 27888), INT16_C( 12894), -INT16_C( 16065), -INT16_C( 19538), -INT16_C( 7160), -INT16_C( 27537), INT16_C( 15689), -INT16_C( 32538), -INT16_C( 19701), INT16_C( 7770), INT16_C( 6816), INT16_C( 4949), INT16_C( 31344), -INT16_C( 332), -INT16_C( 8021), -INT16_C( 17372), -INT16_C( 31884), -INT16_C( 19474) }, { -INT16_C( 25276), INT16_C( 19558), -INT16_C( 10623), -INT16_C( 13344), -INT16_C( 14829), INT16_C( 8011), -INT16_C( 23175), INT16_C( 6461), -INT16_C( 27969), INT16_C( 12332), -INT16_C( 7923), -INT16_C( 18386), INT16_C( 21441), INT16_C( 13684), INT16_C( 25558), INT16_C( 6888), INT16_C( 20224), -INT16_C( 32410), INT16_C( 17957), INT16_C( 14412), -INT16_C( 26868), -INT16_C( 31401), -INT16_C( 27332), -INT16_C( 865), -INT16_C( 13529), INT16_C( 13356), INT16_C( 23212), INT16_C( 28397), INT16_C( 25005), -INT16_C( 31837), -INT16_C( 29500), -INT16_C( 15203) }, UINT32_C( 4588507), { -INT16_C( 28087), INT16_C( 21816), -INT16_C( 28630), INT16_C( 26331), INT16_C( 31269), INT16_C( 19554), -INT16_C( 29115), -INT16_C( 3455), INT16_C( 28393), -INT16_C( 27040), INT16_C( 975), -INT16_C( 27622), -INT16_C( 18545), INT16_C( 27224), -INT16_C( 24901), INT16_C( 1130), -INT16_C( 23759), INT16_C( 23386), INT16_C( 13619), INT16_C( 22721), INT16_C( 9391), -INT16_C( 2908), INT16_C( 9650), -INT16_C( 25626), INT16_C( 18067), INT16_C( 25394), INT16_C( 19530), -INT16_C( 9737), INT16_C( 20227), -INT16_C( 16828), -INT16_C( 20754), INT16_C( 8131) }, { INT16_C( 25996), -INT16_C( 29115), -INT16_C( 10623), -INT16_C( 28087), -INT16_C( 27537), INT16_C( 8011), INT16_C( 25394), -INT16_C( 16828), INT16_C( 8131), -INT16_C( 18545), -INT16_C( 7923), -INT16_C( 18386), INT16_C( 21441), INT16_C( 13684), INT16_C( 25558), INT16_C( 6888), INT16_C( 20224), -INT16_C( 29115), INT16_C( 19554), INT16_C( 14412), -INT16_C( 26868), -INT16_C( 31401), INT16_C( 20227), -INT16_C( 865), -INT16_C( 13529), INT16_C( 13356), INT16_C( 23212), INT16_C( 28397), INT16_C( 25005), -INT16_C( 31837), -INT16_C( 29500), -INT16_C( 15203) } }, { { INT16_C( 7505), -INT16_C( 31622), INT16_C( 15186), INT16_C( 476), -INT16_C( 32417), INT16_C( 4853), -INT16_C( 9050), INT16_C( 15021), -INT16_C( 8414), INT16_C( 27805), -INT16_C( 27605), INT16_C( 12102), -INT16_C( 29981), -INT16_C( 11795), -INT16_C( 20424), -INT16_C( 29968), INT16_C( 27341), INT16_C( 7950), -INT16_C( 5210), INT16_C( 1312), INT16_C( 5740), INT16_C( 4631), -INT16_C( 14862), INT16_C( 5196), -INT16_C( 5724), -INT16_C( 12159), -INT16_C( 14467), INT16_C( 25087), -INT16_C( 5039), -INT16_C( 30414), INT16_C( 9117), INT16_C( 27155) }, { INT16_C( 8845), INT16_C( 13194), -INT16_C( 22003), INT16_C( 31033), INT16_C( 20672), -INT16_C( 19829), -INT16_C( 10219), -INT16_C( 17721), INT16_C( 18625), INT16_C( 16266), -INT16_C( 30449), INT16_C( 24736), -INT16_C( 11659), INT16_C( 4841), -INT16_C( 523), -INT16_C( 31875), INT16_C( 1823), INT16_C( 11446), -INT16_C( 4175), INT16_C( 29349), INT16_C( 12352), INT16_C( 21796), -INT16_C( 5368), -INT16_C( 13809), -INT16_C( 26317), INT16_C( 16905), -INT16_C( 22238), -INT16_C( 26462), -INT16_C( 29573), INT16_C( 29098), INT16_C( 10121), -INT16_C( 22284) }, UINT32_C(3772033582), { INT16_C( 31130), -INT16_C( 9646), INT16_C( 30377), -INT16_C( 19921), INT16_C( 16226), -INT16_C( 27268), -INT16_C( 31272), -INT16_C( 1064), INT16_C( 31278), -INT16_C( 22125), INT16_C( 15622), -INT16_C( 28902), INT16_C( 3685), -INT16_C( 27849), INT16_C( 3001), INT16_C( 21363), -INT16_C( 14972), INT16_C( 11821), INT16_C( 23612), -INT16_C( 24864), INT16_C( 23707), INT16_C( 29747), INT16_C( 3041), INT16_C( 3951), INT16_C( 646), -INT16_C( 29512), -INT16_C( 11457), -INT16_C( 23524), INT16_C( 21473), -INT16_C( 26056), -INT16_C( 21665), -INT16_C( 7187) }, { INT16_C( 8845), -INT16_C( 27605), -INT16_C( 11795), -INT16_C( 29512), INT16_C( 20672), INT16_C( 12102), -INT16_C( 10219), -INT16_C( 17721), INT16_C( 18625), -INT16_C( 27605), -INT16_C( 30449), INT16_C( 31130), -INT16_C( 11659), -INT16_C( 22125), -INT16_C( 523), -INT16_C( 26056), INT16_C( 1823), INT16_C( 11446), INT16_C( 11821), INT16_C( 29349), INT16_C( 7505), INT16_C( 21796), -INT16_C( 8414), -INT16_C( 29968), -INT16_C( 26317), INT16_C( 16905), -INT16_C( 22238), -INT16_C( 26462), -INT16_C( 29573), INT16_C( 15622), INT16_C( 27805), INT16_C( 23707) } }, { { INT16_C( 6769), -INT16_C( 21231), -INT16_C( 3721), INT16_C( 4683), INT16_C( 32333), INT16_C( 11910), -INT16_C( 2678), INT16_C( 4157), -INT16_C( 2313), INT16_C( 14236), -INT16_C( 18231), -INT16_C( 21797), INT16_C( 4876), INT16_C( 27461), INT16_C( 12991), INT16_C( 12366), INT16_C( 24653), -INT16_C( 15139), INT16_C( 10321), -INT16_C( 24618), INT16_C( 23974), INT16_C( 12493), INT16_C( 2898), INT16_C( 19008), -INT16_C( 8959), -INT16_C( 13695), INT16_C( 23701), -INT16_C( 24204), -INT16_C( 18064), INT16_C( 12044), INT16_C( 23532), INT16_C( 14687) }, { INT16_C( 15547), INT16_C( 3325), -INT16_C( 11420), INT16_C( 2731), INT16_C( 31024), -INT16_C( 31941), INT16_C( 31620), -INT16_C( 31283), INT16_C( 20056), -INT16_C( 4529), -INT16_C( 15446), INT16_C( 6799), -INT16_C( 25475), INT16_C( 26953), -INT16_C( 22281), -INT16_C( 19806), -INT16_C( 24604), INT16_C( 18622), INT16_C( 27250), -INT16_C( 23725), -INT16_C( 28957), INT16_C( 26406), -INT16_C( 3319), INT16_C( 25324), INT16_C( 15169), -INT16_C( 5296), -INT16_C( 8194), INT16_C( 31494), INT16_C( 20347), INT16_C( 29412), -INT16_C( 30984), -INT16_C( 9180) }, UINT32_C(2552619813), { INT16_C( 30797), INT16_C( 12347), INT16_C( 24838), INT16_C( 3991), -INT16_C( 31916), -INT16_C( 27279), -INT16_C( 15938), -INT16_C( 17280), -INT16_C( 31071), INT16_C( 7224), INT16_C( 7382), -INT16_C( 12657), -INT16_C( 19549), -INT16_C( 14166), -INT16_C( 12394), -INT16_C( 7328), -INT16_C( 25785), INT16_C( 19731), -INT16_C( 21764), INT16_C( 20573), -INT16_C( 12755), -INT16_C( 5147), INT16_C( 26256), INT16_C( 12712), -INT16_C( 7956), -INT16_C( 15795), -INT16_C( 8964), -INT16_C( 24688), INT16_C( 15248), INT16_C( 9832), -INT16_C( 14326), INT16_C( 21002) }, { -INT16_C( 24688), INT16_C( 3325), -INT16_C( 31916), INT16_C( 2731), INT16_C( 31024), -INT16_C( 24688), INT16_C( 31620), -INT16_C( 31283), -INT16_C( 8959), INT16_C( 12366), -INT16_C( 15446), INT16_C( 6799), -INT16_C( 25475), INT16_C( 14236), INT16_C( 12712), INT16_C( 24838), -INT16_C( 31916), INT16_C( 18622), -INT16_C( 21764), -INT16_C( 23725), -INT16_C( 28957), -INT16_C( 15938), -INT16_C( 3319), INT16_C( 25324), INT16_C( 15169), -INT16_C( 5296), -INT16_C( 8194), -INT16_C( 2678), -INT16_C( 24688), INT16_C( 29412), -INT16_C( 30984), -INT16_C( 31916) } }, { { INT16_C( 7524), INT16_C( 24735), -INT16_C( 824), -INT16_C( 2639), -INT16_C( 26933), INT16_C( 23521), -INT16_C( 30212), -INT16_C( 5748), -INT16_C( 9879), INT16_C( 26027), INT16_C( 15542), INT16_C( 17925), INT16_C( 28023), -INT16_C( 32404), INT16_C( 30261), -INT16_C( 26157), INT16_C( 29588), INT16_C( 23802), -INT16_C( 21649), INT16_C( 14929), INT16_C( 12865), INT16_C( 16021), INT16_C( 8635), INT16_C( 9255), -INT16_C( 11525), -INT16_C( 20086), -INT16_C( 28914), -INT16_C( 31241), INT16_C( 25596), INT16_C( 12551), -INT16_C( 9510), INT16_C( 28363) }, { -INT16_C( 15027), -INT16_C( 16950), INT16_C( 7024), -INT16_C( 19977), -INT16_C( 29362), INT16_C( 2543), INT16_C( 5806), -INT16_C( 22226), -INT16_C( 18199), -INT16_C( 2214), INT16_C( 20807), INT16_C( 17277), -INT16_C( 31563), -INT16_C( 28812), INT16_C( 16222), -INT16_C( 21251), -INT16_C( 14588), INT16_C( 29801), INT16_C( 24802), INT16_C( 12326), INT16_C( 5613), -INT16_C( 25542), INT16_C( 26668), INT16_C( 5445), -INT16_C( 24544), INT16_C( 26380), -INT16_C( 30223), -INT16_C( 22870), INT16_C( 7693), INT16_C( 27701), INT16_C( 12894), INT16_C( 25112) }, UINT32_C(3705111033), { -INT16_C( 543), -INT16_C( 12532), INT16_C( 17938), INT16_C( 15979), -INT16_C( 20306), -INT16_C( 12717), INT16_C( 24656), INT16_C( 16949), -INT16_C( 8215), -INT16_C( 2072), INT16_C( 7934), INT16_C( 23651), INT16_C( 31568), INT16_C( 19134), -INT16_C( 27140), -INT16_C( 8922), INT16_C( 12946), -INT16_C( 23124), INT16_C( 6009), INT16_C( 10211), INT16_C( 14280), INT16_C( 6390), INT16_C( 11159), -INT16_C( 32678), INT16_C( 17163), INT16_C( 2423), -INT16_C( 9631), -INT16_C( 20123), INT16_C( 9045), INT16_C( 20987), INT16_C( 8633), INT16_C( 19247) }, { -INT16_C( 32404), -INT16_C( 16950), INT16_C( 7024), -INT16_C( 32678), INT16_C( 30261), -INT16_C( 8922), -INT16_C( 27140), -INT16_C( 27140), -INT16_C( 2072), -INT16_C( 2214), INT16_C( 20807), INT16_C( 17277), -INT16_C( 31563), -INT16_C( 28812), INT16_C( 16222), INT16_C( 20987), -INT16_C( 26933), -INT16_C( 2072), INT16_C( 17938), INT16_C( 12326), INT16_C( 19134), -INT16_C( 25542), INT16_C( 31568), INT16_C( 23521), -INT16_C( 24544), INT16_C( 26380), -INT16_C( 23124), INT16_C( 7934), -INT16_C( 32404), INT16_C( 27701), -INT16_C( 9510), -INT16_C( 11525) } }, { { -INT16_C( 9388), -INT16_C( 12816), -INT16_C( 11021), -INT16_C( 17420), -INT16_C( 5621), -INT16_C( 23853), INT16_C( 11798), INT16_C( 8482), -INT16_C( 25999), -INT16_C( 11734), -INT16_C( 28812), -INT16_C( 13693), INT16_C( 32690), INT16_C( 27419), INT16_C( 19104), -INT16_C( 2889), -INT16_C( 22746), INT16_C( 6593), -INT16_C( 18821), -INT16_C( 31020), -INT16_C( 22624), -INT16_C( 18904), INT16_C( 19413), INT16_C( 18135), INT16_C( 485), INT16_C( 22808), -INT16_C( 25456), INT16_C( 17187), INT16_C( 16155), -INT16_C( 17490), INT16_C( 25993), -INT16_C( 20560) }, { INT16_C( 28941), -INT16_C( 30520), -INT16_C( 25561), -INT16_C( 14321), INT16_C( 14148), INT16_C( 6526), INT16_C( 22146), INT16_C( 26464), INT16_C( 30807), -INT16_C( 5951), -INT16_C( 7148), INT16_C( 12075), -INT16_C( 9949), -INT16_C( 21013), -INT16_C( 25793), INT16_C( 19548), INT16_C( 9484), INT16_C( 13524), -INT16_C( 7231), INT16_C( 1532), INT16_C( 31259), -INT16_C( 25313), INT16_C( 32720), INT16_C( 10245), -INT16_C( 14601), INT16_C( 3088), INT16_C( 15274), -INT16_C( 12741), INT16_C( 9748), INT16_C( 21371), -INT16_C( 10303), -INT16_C( 12641) }, UINT32_C(3187832060), { -INT16_C( 425), INT16_C( 29379), -INT16_C( 7560), INT16_C( 18704), INT16_C( 5473), INT16_C( 22897), -INT16_C( 32293), -INT16_C( 31387), -INT16_C( 24388), -INT16_C( 12205), -INT16_C( 12601), -INT16_C( 30684), -INT16_C( 15450), -INT16_C( 23978), INT16_C( 22583), -INT16_C( 28832), INT16_C( 9302), -INT16_C( 12543), INT16_C( 4358), INT16_C( 26648), -INT16_C( 30426), INT16_C( 449), INT16_C( 9738), -INT16_C( 14713), -INT16_C( 9530), -INT16_C( 29290), -INT16_C( 17751), INT16_C( 20246), INT16_C( 27774), -INT16_C( 18959), INT16_C( 21189), INT16_C( 6980) }, { INT16_C( 28941), -INT16_C( 30520), -INT16_C( 31387), -INT16_C( 2889), -INT16_C( 5621), INT16_C( 21189), -INT16_C( 11021), -INT16_C( 425), INT16_C( 30807), -INT16_C( 5951), -INT16_C( 22624), INT16_C( 12075), INT16_C( 18704), -INT16_C( 30684), INT16_C( 6980), INT16_C( 19548), INT16_C( 9484), -INT16_C( 22624), -INT16_C( 7231), INT16_C( 1532), INT16_C( 31259), -INT16_C( 25313), INT16_C( 32720), INT16_C( 10245), -INT16_C( 14601), -INT16_C( 22746), -INT16_C( 12601), INT16_C( 20246), -INT16_C( 22624), INT16_C( 20246), -INT16_C( 10303), -INT16_C( 20560) } }, { { INT16_C( 18038), INT16_C( 31978), INT16_C( 599), INT16_C( 32484), -INT16_C( 23157), -INT16_C( 27265), INT16_C( 1739), -INT16_C( 28069), -INT16_C( 3359), -INT16_C( 30177), INT16_C( 13740), INT16_C( 10969), -INT16_C( 13662), INT16_C( 26592), INT16_C( 9244), -INT16_C( 28030), INT16_C( 28010), -INT16_C( 15857), -INT16_C( 3217), -INT16_C( 1216), -INT16_C( 16487), INT16_C( 25744), -INT16_C( 4922), -INT16_C( 22538), INT16_C( 5854), -INT16_C( 30159), INT16_C( 2635), -INT16_C( 4683), -INT16_C( 27180), -INT16_C( 3756), -INT16_C( 10311), INT16_C( 9347) }, { -INT16_C( 28092), -INT16_C( 19482), INT16_C( 9862), INT16_C( 8110), INT16_C( 16357), -INT16_C( 21629), INT16_C( 31275), INT16_C( 2386), -INT16_C( 31856), -INT16_C( 9325), INT16_C( 18573), INT16_C( 25289), INT16_C( 7645), -INT16_C( 26797), -INT16_C( 10508), INT16_C( 14523), -INT16_C( 24215), -INT16_C( 4116), -INT16_C( 25913), -INT16_C( 21490), -INT16_C( 28199), INT16_C( 1112), -INT16_C( 22005), -INT16_C( 25843), -INT16_C( 24274), -INT16_C( 17545), INT16_C( 16617), -INT16_C( 14563), INT16_C( 28765), INT16_C( 21086), INT16_C( 6471), -INT16_C( 20342) }, UINT32_C(2174711482), { -INT16_C( 21231), -INT16_C( 5587), -INT16_C( 31426), INT16_C( 19183), -INT16_C( 976), INT16_C( 24293), INT16_C( 23709), -INT16_C( 30951), INT16_C( 14236), -INT16_C( 1458), -INT16_C( 21337), -INT16_C( 4532), -INT16_C( 10555), INT16_C( 32670), INT16_C( 15693), INT16_C( 24064), INT16_C( 11754), INT16_C( 10568), INT16_C( 14259), -INT16_C( 7309), INT16_C( 22580), -INT16_C( 11967), INT16_C( 23221), INT16_C( 20824), -INT16_C( 22895), INT16_C( 14667), -INT16_C( 26798), INT16_C( 5927), -INT16_C( 14738), -INT16_C( 17514), -INT16_C( 27133), -INT16_C( 4583) }, { -INT16_C( 28092), INT16_C( 23709), INT16_C( 9862), INT16_C( 15693), INT16_C( 24293), INT16_C( 32484), INT16_C( 31275), -INT16_C( 3217), -INT16_C( 31856), -INT16_C( 1216), INT16_C( 26592), INT16_C( 25289), -INT16_C( 3756), -INT16_C( 1216), INT16_C( 22580), INT16_C( 14523), -INT16_C( 1458), -INT16_C( 10555), -INT16_C( 28069), INT16_C( 9244), -INT16_C( 30159), INT16_C( 1112), -INT16_C( 22005), INT16_C( 26592), INT16_C( 15693), -INT16_C( 17545), INT16_C( 16617), -INT16_C( 14563), INT16_C( 28765), INT16_C( 21086), INT16_C( 6471), INT16_C( 13740) } }, { { INT16_C( 25028), INT16_C( 30487), -INT16_C( 30055), -INT16_C( 12966), -INT16_C( 25630), -INT16_C( 26722), -INT16_C( 2059), -INT16_C( 30743), INT16_C( 13469), -INT16_C( 3904), -INT16_C( 6196), INT16_C( 14855), -INT16_C( 24915), -INT16_C( 19979), INT16_C( 3636), -INT16_C( 1889), -INT16_C( 18833), INT16_C( 2159), -INT16_C( 14016), INT16_C( 8917), INT16_C( 29796), INT16_C( 23226), -INT16_C( 23701), INT16_C( 2273), -INT16_C( 24105), -INT16_C( 23560), INT16_C( 136), INT16_C( 14045), -INT16_C( 11618), -INT16_C( 11545), -INT16_C( 31008), INT16_C( 20683) }, { INT16_C( 14908), INT16_C( 31832), INT16_C( 11780), INT16_C( 26782), INT16_C( 22690), INT16_C( 3522), -INT16_C( 23557), -INT16_C( 11499), INT16_C( 3652), -INT16_C( 12938), INT16_C( 21518), -INT16_C( 21501), -INT16_C( 5594), INT16_C( 1918), INT16_C( 18800), -INT16_C( 21417), -INT16_C( 20604), -INT16_C( 30680), -INT16_C( 14627), INT16_C( 32752), -INT16_C( 19681), INT16_C( 6796), -INT16_C( 23978), -INT16_C( 25619), INT16_C( 25776), -INT16_C( 16792), INT16_C( 27576), -INT16_C( 8598), -INT16_C( 6059), -INT16_C( 14875), INT16_C( 15410), -INT16_C( 18831) }, UINT32_C(3376323052), { INT16_C( 11871), INT16_C( 32329), -INT16_C( 10783), INT16_C( 14489), -INT16_C( 31113), INT16_C( 10195), INT16_C( 15338), -INT16_C( 23835), INT16_C( 20390), -INT16_C( 1151), INT16_C( 26168), INT16_C( 27328), INT16_C( 12707), -INT16_C( 28896), INT16_C( 24266), INT16_C( 10584), -INT16_C( 24180), INT16_C( 28328), INT16_C( 16759), -INT16_C( 4442), INT16_C( 31175), -INT16_C( 19946), -INT16_C( 1100), INT16_C( 23124), -INT16_C( 10933), -INT16_C( 31915), INT16_C( 5436), -INT16_C( 8211), INT16_C( 3398), INT16_C( 4206), -INT16_C( 14741), -INT16_C( 2247) }, { INT16_C( 14908), INT16_C( 31832), -INT16_C( 25630), -INT16_C( 31008), INT16_C( 22690), -INT16_C( 30055), -INT16_C( 8211), INT16_C( 23226), -INT16_C( 25630), -INT16_C( 12938), INT16_C( 21518), -INT16_C( 12966), INT16_C( 15338), INT16_C( 1918), INT16_C( 18800), INT16_C( 2273), -INT16_C( 20604), INT16_C( 20390), -INT16_C( 11545), -INT16_C( 24180), INT16_C( 20683), -INT16_C( 24915), -INT16_C( 23978), -INT16_C( 25619), -INT16_C( 24180), -INT16_C( 16792), INT16_C( 27576), INT16_C( 26168), -INT16_C( 6059), -INT16_C( 14875), INT16_C( 16759), INT16_C( 28328) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i idx = simde_mm512_loadu_epi16(test_vec[i].idx); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_mask2_permutex2var_epi16(a, idx, test_vec[i].k, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i a = simde_test_x86_random_i16x32(); simde__m512i idx = simde_test_x86_random_i16x32(); simde__mmask32 k = simde_test_x86_random_mmask32(); simde__m512i b = simde_test_x86_random_i16x32(); simde__m512i r = simde_mm512_mask2_permutex2var_epi16(a, idx, k, b); simde_test_x86_write_i16x32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x32(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_mmask32(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_maskz_permutex2var_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask32 k; const int16_t a[32]; const int16_t idx[32]; const int16_t b[32]; const int16_t r[32]; } test_vec[] = { { UINT32_C(4126477146), { INT16_C( 7449), INT16_C( 15175), -INT16_C( 25698), -INT16_C( 6637), INT16_C( 26912), -INT16_C( 32056), INT16_C( 14295), -INT16_C( 9209), -INT16_C( 32337), INT16_C( 31179), -INT16_C( 15345), -INT16_C( 8630), INT16_C( 32637), -INT16_C( 10444), INT16_C( 10634), -INT16_C( 23348), INT16_C( 4934), -INT16_C( 6945), -INT16_C( 3410), -INT16_C( 12598), -INT16_C( 27812), INT16_C( 13136), INT16_C( 22730), INT16_C( 30991), -INT16_C( 9255), -INT16_C( 5646), INT16_C( 15519), INT16_C( 7367), -INT16_C( 1092), INT16_C( 18163), -INT16_C( 16348), INT16_C( 27626) }, { -INT16_C( 13613), -INT16_C( 32177), INT16_C( 6844), INT16_C( 6224), -INT16_C( 24147), INT16_C( 30539), INT16_C( 23545), -INT16_C( 11536), -INT16_C( 7626), -INT16_C( 10821), -INT16_C( 31970), -INT16_C( 9486), -INT16_C( 6786), -INT16_C( 23775), INT16_C( 2981), INT16_C( 30990), INT16_C( 24021), -INT16_C( 27909), INT16_C( 19319), INT16_C( 9386), -INT16_C( 2324), -INT16_C( 6757), -INT16_C( 29871), -INT16_C( 30792), INT16_C( 29549), -INT16_C( 29604), INT16_C( 20214), INT16_C( 30054), -INT16_C( 30924), -INT16_C( 9960), INT16_C( 9875), INT16_C( 26706) }, { INT16_C( 19843), -INT16_C( 1030), -INT16_C( 23143), -INT16_C( 31457), -INT16_C( 17509), -INT16_C( 5013), INT16_C( 9030), -INT16_C( 19341), -INT16_C( 12394), -INT16_C( 29376), -INT16_C( 23010), INT16_C( 20994), INT16_C( 6702), -INT16_C( 16085), INT16_C( 32320), -INT16_C( 15575), INT16_C( 9419), INT16_C( 25790), -INT16_C( 8503), INT16_C( 25834), INT16_C( 21913), -INT16_C( 8368), -INT16_C( 15496), INT16_C( 3731), -INT16_C( 11374), -INT16_C( 20325), -INT16_C( 25222), -INT16_C( 22526), INT16_C( 11959), -INT16_C( 2199), -INT16_C( 27988), INT16_C( 30651) }, { INT16_C( 0), -INT16_C( 23348), INT16_C( 0), INT16_C( 4934), -INT16_C( 16085), INT16_C( 0), -INT16_C( 20325), INT16_C( 0), -INT16_C( 15496), -INT16_C( 22526), INT16_C( 0), -INT16_C( 8503), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 13136), INT16_C( 0), INT16_C( 3731), INT16_C( 0), INT16_C( 6702), INT16_C( 7367), -INT16_C( 6945), -INT16_C( 11374), -INT16_C( 16085), INT16_C( 0), -INT16_C( 15496), INT16_C( 0), INT16_C( 21913), -INT16_C( 9255), -INT16_C( 12598), -INT16_C( 3410) } }, { UINT32_C(2145155510), { -INT16_C( 14761), -INT16_C( 3869), INT16_C( 13083), -INT16_C( 27696), INT16_C( 25590), -INT16_C( 30303), INT16_C( 15671), -INT16_C( 20167), INT16_C( 15578), -INT16_C( 28071), -INT16_C( 15766), INT16_C( 5769), INT16_C( 17492), INT16_C( 2957), INT16_C( 27070), INT16_C( 5514), INT16_C( 28207), INT16_C( 18950), -INT16_C( 10591), -INT16_C( 26403), INT16_C( 32569), INT16_C( 28705), INT16_C( 23228), -INT16_C( 27103), INT16_C( 31382), INT16_C( 40), -INT16_C( 19908), -INT16_C( 28394), -INT16_C( 23306), -INT16_C( 19300), INT16_C( 9741), INT16_C( 15818) }, { -INT16_C( 12140), INT16_C( 13959), INT16_C( 26022), -INT16_C( 8242), -INT16_C( 4124), -INT16_C( 24496), INT16_C( 29001), -INT16_C( 8138), INT16_C( 24556), INT16_C( 10464), -INT16_C( 2287), INT16_C( 1977), INT16_C( 21915), -INT16_C( 22340), -INT16_C( 31108), INT16_C( 4325), INT16_C( 27990), -INT16_C( 954), INT16_C( 5330), -INT16_C( 18725), INT16_C( 11011), INT16_C( 19798), -INT16_C( 29539), -INT16_C( 30419), INT16_C( 3563), -INT16_C( 847), INT16_C( 27396), -INT16_C( 24828), -INT16_C( 16192), INT16_C( 15432), INT16_C( 11590), -INT16_C( 25523) }, { -INT16_C( 27750), INT16_C( 27800), INT16_C( 29608), -INT16_C( 21726), INT16_C( 30879), INT16_C( 15608), INT16_C( 9477), -INT16_C( 3899), INT16_C( 30259), INT16_C( 14317), -INT16_C( 3615), -INT16_C( 23849), INT16_C( 8113), -INT16_C( 2082), INT16_C( 11084), -INT16_C( 6253), INT16_C( 11199), INT16_C( 26451), INT16_C( 30366), INT16_C( 15634), INT16_C( 3054), -INT16_C( 3207), INT16_C( 15920), INT16_C( 25572), -INT16_C( 11851), -INT16_C( 26981), INT16_C( 29378), INT16_C( 29496), INT16_C( 6033), -INT16_C( 8854), -INT16_C( 702), INT16_C( 452) }, { INT16_C( 0), -INT16_C( 20167), INT16_C( 9477), INT16_C( 0), INT16_C( 30879), INT16_C( 28207), INT16_C( 0), INT16_C( 15920), INT16_C( 8113), INT16_C( 0), INT16_C( 0), -INT16_C( 26981), -INT16_C( 28394), INT16_C( 6033), INT16_C( 6033), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 10591), -INT16_C( 28394), -INT16_C( 27696), INT16_C( 0), -INT16_C( 19300), -INT16_C( 2082), -INT16_C( 23849), INT16_C( 26451), INT16_C( 25590), INT16_C( 25590), -INT16_C( 14761), INT16_C( 15578), INT16_C( 15671), INT16_C( 0) } }, { UINT32_C(3328710696), { INT16_C( 31630), INT16_C( 31748), INT16_C( 32134), -INT16_C( 18832), INT16_C( 21692), INT16_C( 28954), -INT16_C( 19163), -INT16_C( 6393), INT16_C( 16423), -INT16_C( 18342), -INT16_C( 15273), -INT16_C( 26219), INT16_C( 23233), -INT16_C( 5733), INT16_C( 882), INT16_C( 175), -INT16_C( 19586), INT16_C( 1148), -INT16_C( 5071), -INT16_C( 4677), -INT16_C( 10944), INT16_C( 25950), INT16_C( 25994), -INT16_C( 20148), -INT16_C( 22875), -INT16_C( 919), -INT16_C( 406), INT16_C( 11158), INT16_C( 12632), -INT16_C( 13804), -INT16_C( 15308), -INT16_C( 19510) }, { INT16_C( 18295), -INT16_C( 22345), INT16_C( 29235), INT16_C( 29845), -INT16_C( 3257), -INT16_C( 11815), INT16_C( 9817), -INT16_C( 382), -INT16_C( 5172), INT16_C( 14331), -INT16_C( 28182), INT16_C( 16994), INT16_C( 30658), -INT16_C( 2547), -INT16_C( 10437), -INT16_C( 19799), INT16_C( 24862), INT16_C( 21083), -INT16_C( 3885), INT16_C( 7110), -INT16_C( 24604), INT16_C( 15852), INT16_C( 28613), -INT16_C( 28101), INT16_C( 13914), INT16_C( 17609), INT16_C( 11207), -INT16_C( 30329), -INT16_C( 27486), -INT16_C( 8832), INT16_C( 10603), -INT16_C( 30064) }, { -INT16_C( 5238), INT16_C( 24284), -INT16_C( 23845), -INT16_C( 16519), INT16_C( 25921), INT16_C( 2044), INT16_C( 14548), INT16_C( 12185), INT16_C( 25198), INT16_C( 13939), -INT16_C( 1395), INT16_C( 12479), INT16_C( 16270), -INT16_C( 1523), -INT16_C( 25239), -INT16_C( 3196), INT16_C( 24712), INT16_C( 25681), -INT16_C( 13822), INT16_C( 17187), INT16_C( 8240), INT16_C( 1098), -INT16_C( 7336), -INT16_C( 14797), -INT16_C( 22715), -INT16_C( 11268), -INT16_C( 17247), INT16_C( 12291), INT16_C( 4347), INT16_C( 25642), -INT16_C( 20818), INT16_C( 13912) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 25950), INT16_C( 0), -INT16_C( 919), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 23845), INT16_C( 32134), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 19163), INT16_C( 0), INT16_C( 16270), INT16_C( 28954), INT16_C( 0), INT16_C( 0), -INT16_C( 18342), -INT16_C( 6393), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 12479), -INT16_C( 19586) } }, { UINT32_C( 278571278), { -INT16_C( 16780), -INT16_C( 23469), -INT16_C( 24866), INT16_C( 13992), -INT16_C( 9087), -INT16_C( 14340), -INT16_C( 1661), INT16_C( 9370), -INT16_C( 25163), -INT16_C( 20396), INT16_C( 32429), INT16_C( 23317), INT16_C( 27948), INT16_C( 14994), INT16_C( 11286), -INT16_C( 30134), -INT16_C( 24854), -INT16_C( 14290), -INT16_C( 10436), -INT16_C( 16898), -INT16_C( 1101), INT16_C( 13956), INT16_C( 7924), -INT16_C( 22182), -INT16_C( 20549), INT16_C( 26969), INT16_C( 28205), INT16_C( 23236), INT16_C( 22235), -INT16_C( 3436), -INT16_C( 8317), INT16_C( 28028) }, { -INT16_C( 21635), -INT16_C( 18122), INT16_C( 13442), INT16_C( 13686), -INT16_C( 1233), INT16_C( 9067), -INT16_C( 15079), -INT16_C( 10804), INT16_C( 9844), -INT16_C( 24002), INT16_C( 660), INT16_C( 28924), -INT16_C( 28583), -INT16_C( 9118), -INT16_C( 8593), -INT16_C( 5047), INT16_C( 32649), INT16_C( 2981), INT16_C( 7348), -INT16_C( 7360), -INT16_C( 21737), INT16_C( 12295), -INT16_C( 11407), -INT16_C( 6907), INT16_C( 17401), -INT16_C( 29049), -INT16_C( 31930), -INT16_C( 24578), INT16_C( 24596), -INT16_C( 31877), -INT16_C( 15298), -INT16_C( 14224) }, { INT16_C( 5444), -INT16_C( 1837), INT16_C( 5169), INT16_C( 18651), -INT16_C( 7489), INT16_C( 12409), INT16_C( 32438), -INT16_C( 20714), -INT16_C( 25150), INT16_C( 2109), INT16_C( 15137), INT16_C( 13735), INT16_C( 8859), -INT16_C( 9544), INT16_C( 10470), INT16_C( 10914), INT16_C( 30014), INT16_C( 28450), -INT16_C( 375), INT16_C( 18872), INT16_C( 12768), -INT16_C( 27015), -INT16_C( 28753), INT16_C( 28998), -INT16_C( 31955), INT16_C( 20089), INT16_C( 8383), INT16_C( 23171), INT16_C( 15170), INT16_C( 10548), -INT16_C( 10652), -INT16_C( 23981) }, { INT16_C( 0), -INT16_C( 28753), -INT16_C( 24866), -INT16_C( 28753), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 12768), INT16_C( 0), INT16_C( 0), INT16_C( 15170), INT16_C( 0), INT16_C( 5169), INT16_C( 0), -INT16_C( 20396), INT16_C( 0), INT16_C( 12409), INT16_C( 0), -INT16_C( 16780), -INT16_C( 22182), INT16_C( 0), INT16_C( 0), -INT16_C( 14340), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1101), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { UINT32_C(3574691404), { -INT16_C( 13964), INT16_C( 21534), -INT16_C( 26374), -INT16_C( 21781), INT16_C( 12583), INT16_C( 21531), -INT16_C( 27212), INT16_C( 29602), INT16_C( 9653), -INT16_C( 1842), INT16_C( 609), -INT16_C( 15071), INT16_C( 29913), INT16_C( 9575), INT16_C( 30954), INT16_C( 24314), INT16_C( 6466), INT16_C( 15539), -INT16_C( 24911), -INT16_C( 10010), INT16_C( 719), -INT16_C( 31955), -INT16_C( 12393), INT16_C( 19703), -INT16_C( 14859), INT16_C( 22084), INT16_C( 26055), -INT16_C( 24549), -INT16_C( 32038), -INT16_C( 15163), -INT16_C( 16134), INT16_C( 15395) }, { -INT16_C( 10535), -INT16_C( 30087), INT16_C( 24436), INT16_C( 17250), -INT16_C( 28831), -INT16_C( 1850), -INT16_C( 17057), INT16_C( 21573), -INT16_C( 30334), INT16_C( 19114), -INT16_C( 14865), -INT16_C( 13846), -INT16_C( 20409), INT16_C( 16781), -INT16_C( 20368), INT16_C( 18814), -INT16_C( 2170), -INT16_C( 1325), INT16_C( 13654), -INT16_C( 18371), INT16_C( 1221), INT16_C( 9392), -INT16_C( 2623), INT16_C( 17528), INT16_C( 8831), INT16_C( 28302), INT16_C( 30951), INT16_C( 11831), -INT16_C( 15320), -INT16_C( 26513), -INT16_C( 4747), -INT16_C( 1055) }, { -INT16_C( 19228), INT16_C( 15350), INT16_C( 13290), -INT16_C( 20493), -INT16_C( 23753), -INT16_C( 1581), INT16_C( 19353), INT16_C( 6205), -INT16_C( 13459), INT16_C( 21638), -INT16_C( 17085), INT16_C( 27778), -INT16_C( 3711), -INT16_C( 2556), -INT16_C( 6433), -INT16_C( 15374), -INT16_C( 5990), -INT16_C( 31490), -INT16_C( 3813), INT16_C( 21299), INT16_C( 1685), INT16_C( 11852), -INT16_C( 30383), -INT16_C( 16826), -INT16_C( 13228), -INT16_C( 26862), -INT16_C( 27511), INT16_C( 2563), INT16_C( 2182), INT16_C( 25857), -INT16_C( 3090), -INT16_C( 30680) }, { INT16_C( 0), INT16_C( 0), INT16_C( 1685), INT16_C( 13290), INT16_C( 0), INT16_C( 0), INT16_C( 15395), INT16_C( 0), INT16_C( 0), -INT16_C( 17085), -INT16_C( 15374), INT16_C( 0), INT16_C( 29602), INT16_C( 9575), -INT16_C( 5990), INT16_C( 0), -INT16_C( 27212), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 21531), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 30680), INT16_C( 0), INT16_C( 6205), INT16_C( 0), -INT16_C( 13459), INT16_C( 0), INT16_C( 11852), INT16_C( 15350) } }, { UINT32_C(4128057307), { INT16_C( 16408), -INT16_C( 21175), -INT16_C( 27321), -INT16_C( 26405), INT16_C( 8478), INT16_C( 29271), INT16_C( 27117), INT16_C( 30218), INT16_C( 3582), -INT16_C( 31615), -INT16_C( 32235), INT16_C( 1001), INT16_C( 4469), INT16_C( 20620), -INT16_C( 26312), INT16_C( 20806), -INT16_C( 28455), INT16_C( 8446), -INT16_C( 9691), INT16_C( 17593), INT16_C( 4347), -INT16_C( 5706), -INT16_C( 16263), INT16_C( 30559), -INT16_C( 7986), -INT16_C( 7173), -INT16_C( 7070), -INT16_C( 10265), INT16_C( 29686), INT16_C( 11815), INT16_C( 28172), -INT16_C( 6785) }, { INT16_C( 32510), INT16_C( 8966), -INT16_C( 16552), INT16_C( 21351), INT16_C( 7887), INT16_C( 18492), -INT16_C( 25378), -INT16_C( 21312), -INT16_C( 17540), -INT16_C( 8304), INT16_C( 30624), -INT16_C( 26954), -INT16_C( 8470), -INT16_C( 2364), INT16_C( 17484), INT16_C( 19163), -INT16_C( 7742), INT16_C( 6765), -INT16_C( 10848), INT16_C( 28525), -INT16_C( 21773), -INT16_C( 11848), INT16_C( 30790), -INT16_C( 15746), INT16_C( 3635), -INT16_C( 11359), INT16_C( 22661), INT16_C( 28521), INT16_C( 11830), -INT16_C( 32155), INT16_C( 16498), INT16_C( 13516) }, { INT16_C( 14626), -INT16_C( 15794), -INT16_C( 17650), INT16_C( 306), -INT16_C( 5531), -INT16_C( 21549), INT16_C( 20834), -INT16_C( 27282), INT16_C( 3935), -INT16_C( 7063), -INT16_C( 11673), -INT16_C( 25261), -INT16_C( 18432), INT16_C( 29215), -INT16_C( 5128), INT16_C( 6822), -INT16_C( 3035), INT16_C( 13277), INT16_C( 4016), INT16_C( 5429), INT16_C( 2297), INT16_C( 23489), INT16_C( 12121), -INT16_C( 18192), INT16_C( 22846), -INT16_C( 22884), -INT16_C( 4308), INT16_C( 11331), INT16_C( 25511), -INT16_C( 24673), INT16_C( 17742), INT16_C( 29626) }, { INT16_C( 17742), INT16_C( 27117), INT16_C( 0), -INT16_C( 27282), INT16_C( 20806), INT16_C( 0), INT16_C( 28172), INT16_C( 16408), INT16_C( 25511), -INT16_C( 28455), INT16_C( 14626), INT16_C( 0), INT16_C( 0), INT16_C( 8478), INT16_C( 0), INT16_C( 0), -INT16_C( 27321), INT16_C( 0), INT16_C( 14626), INT16_C( 29215), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 15794), INT16_C( 29271), INT16_C( 0), INT16_C( 12121), -INT16_C( 21549), INT16_C( 4016), INT16_C( 4469) } }, { UINT32_C(3936851770), { -INT16_C( 9050), -INT16_C( 24577), -INT16_C( 16156), INT16_C( 15866), -INT16_C( 5393), INT16_C( 12021), -INT16_C( 28348), INT16_C( 28884), INT16_C( 6016), INT16_C( 10140), INT16_C( 15226), -INT16_C( 13882), -INT16_C( 32639), -INT16_C( 17604), -INT16_C( 7401), -INT16_C( 16987), -INT16_C( 23361), -INT16_C( 23716), INT16_C( 22117), INT16_C( 21728), -INT16_C( 10943), -INT16_C( 31358), INT16_C( 22118), -INT16_C( 6411), -INT16_C( 28306), -INT16_C( 6131), -INT16_C( 11059), INT16_C( 20145), -INT16_C( 4524), INT16_C( 27657), -INT16_C( 20783), -INT16_C( 28375) }, { -INT16_C( 31150), -INT16_C( 18636), INT16_C( 5596), INT16_C( 7436), -INT16_C( 28950), INT16_C( 20898), -INT16_C( 26651), INT16_C( 21303), INT16_C( 17705), -INT16_C( 2501), -INT16_C( 4839), INT16_C( 27972), INT16_C( 19931), -INT16_C( 21287), INT16_C( 1019), INT16_C( 19773), INT16_C( 29321), INT16_C( 25861), INT16_C( 4487), INT16_C( 29059), INT16_C( 9631), -INT16_C( 31550), -INT16_C( 1347), -INT16_C( 6441), INT16_C( 4927), INT16_C( 22748), INT16_C( 8192), -INT16_C( 9275), -INT16_C( 24723), INT16_C( 26759), -INT16_C( 14942), INT16_C( 11189) }, { -INT16_C( 17865), -INT16_C( 16752), INT16_C( 5067), INT16_C( 27439), -INT16_C( 3527), -INT16_C( 2321), -INT16_C( 14356), INT16_C( 11228), -INT16_C( 18214), -INT16_C( 9597), INT16_C( 18648), INT16_C( 17845), INT16_C( 15591), -INT16_C( 30291), INT16_C( 25089), INT16_C( 14516), INT16_C( 17693), -INT16_C( 5898), INT16_C( 9816), -INT16_C( 28333), INT16_C( 17176), INT16_C( 1159), INT16_C( 25354), -INT16_C( 7121), -INT16_C( 19941), -INT16_C( 3138), INT16_C( 29690), -INT16_C( 7624), -INT16_C( 6737), -INT16_C( 20117), INT16_C( 8264), INT16_C( 26089) }, { INT16_C( 0), INT16_C( 17176), INT16_C( 0), -INT16_C( 32639), INT16_C( 18648), INT16_C( 5067), INT16_C( 0), INT16_C( 0), -INT16_C( 9597), -INT16_C( 7624), -INT16_C( 6131), INT16_C( 0), INT16_C( 20145), INT16_C( 0), INT16_C( 0), -INT16_C( 20117), INT16_C( 10140), INT16_C( 12021), INT16_C( 28884), INT16_C( 0), INT16_C( 0), -INT16_C( 16156), INT16_C( 0), -INT16_C( 6411), INT16_C( 0), -INT16_C( 4524), INT16_C( 0), INT16_C( 12021), INT16_C( 0), INT16_C( 28884), INT16_C( 5067), INT16_C( 1159) } }, { UINT32_C(3175997541), { -INT16_C( 24314), INT16_C( 7759), -INT16_C( 10524), -INT16_C( 4574), INT16_C( 20794), INT16_C( 21970), -INT16_C( 28669), -INT16_C( 695), -INT16_C( 32509), -INT16_C( 19745), INT16_C( 19303), -INT16_C( 20637), INT16_C( 19819), -INT16_C( 12268), INT16_C( 24877), INT16_C( 13197), -INT16_C( 9214), -INT16_C( 6575), INT16_C( 29619), -INT16_C( 4652), -INT16_C( 22844), -INT16_C( 14526), -INT16_C( 29898), INT16_C( 14788), -INT16_C( 23539), INT16_C( 29932), INT16_C( 20463), INT16_C( 23075), INT16_C( 14236), -INT16_C( 14038), -INT16_C( 18536), -INT16_C( 25604) }, { INT16_C( 19860), INT16_C( 18305), INT16_C( 22208), -INT16_C( 31692), INT16_C( 30460), INT16_C( 13131), INT16_C( 4098), INT16_C( 3948), INT16_C( 22708), -INT16_C( 23677), -INT16_C( 22872), INT16_C( 17661), INT16_C( 10205), INT16_C( 29966), INT16_C( 2782), INT16_C( 29200), -INT16_C( 28072), INT16_C( 6329), -INT16_C( 4632), -INT16_C( 7011), -INT16_C( 6044), INT16_C( 26135), -INT16_C( 31496), -INT16_C( 21387), -INT16_C( 1828), -INT16_C( 31665), INT16_C( 19614), INT16_C( 31689), -INT16_C( 10381), INT16_C( 21232), INT16_C( 481), INT16_C( 14788) }, { INT16_C( 32403), INT16_C( 31570), -INT16_C( 4245), -INT16_C( 12449), INT16_C( 30679), -INT16_C( 12235), -INT16_C( 21765), -INT16_C( 10372), -INT16_C( 13150), INT16_C( 16476), INT16_C( 9496), -INT16_C( 29509), -INT16_C( 21252), -INT16_C( 8738), -INT16_C( 23891), INT16_C( 16407), INT16_C( 26912), -INT16_C( 29509), INT16_C( 6744), INT16_C( 12123), -INT16_C( 28271), -INT16_C( 29441), INT16_C( 31803), -INT16_C( 8604), -INT16_C( 16312), INT16_C( 24606), -INT16_C( 9499), -INT16_C( 7700), -INT16_C( 13690), INT16_C( 13246), -INT16_C( 10899), -INT16_C( 29325) }, { -INT16_C( 22844), INT16_C( 0), -INT16_C( 24314), INT16_C( 0), INT16_C( 0), -INT16_C( 20637), -INT16_C( 10524), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 24877), -INT16_C( 18536), -INT16_C( 9214), -INT16_C( 23539), INT16_C( 0), -INT16_C( 13150), -INT16_C( 14038), INT16_C( 0), INT16_C( 0), -INT16_C( 16312), INT16_C( 0), INT16_C( 14236), INT16_C( 0), -INT16_C( 18536), -INT16_C( 19745), INT16_C( 12123), INT16_C( 26912), INT16_C( 0), INT16_C( 20794) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i idx = simde_mm512_loadu_epi16(test_vec[i].idx); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_maskz_permutex2var_epi16(test_vec[i].k, a, idx, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask32 k = simde_test_x86_random_mmask32(); simde__m512i a = simde_test_x86_random_i16x32(); simde__m512i idx = simde_test_x86_random_i16x32(); simde__m512i b = simde_test_x86_random_i16x32(); simde__m512i r = simde_mm512_maskz_permutex2var_epi16(k, a, idx, b); simde_test_x86_write_mmask32(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x32(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x32(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_permutex2var_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i idx; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C( 1996766677), INT32_C( 914731069), INT32_C( 1945861252), INT32_C( 879354074), INT32_C( -643998219), INT32_C( -855842922), INT32_C( 1434025670), INT32_C( -672258087), INT32_C( 1290251647), INT32_C(-1094826982), INT32_C( 238338636), INT32_C( -252228416), INT32_C( 122123135), INT32_C( 750368606), INT32_C( -381589944), INT32_C( 924273218)), simde_mm512_set_epi32(INT32_C( -879327216), INT32_C( 1748554959), INT32_C(-1487719285), INT32_C( 1548675310), INT32_C( -557015483), INT32_C(-2047402472), INT32_C( -322319998), INT32_C( 1785739054), INT32_C( 318403170), INT32_C(-1131639819), INT32_C(-1187670028), INT32_C( 146529946), INT32_C(-1037158278), INT32_C( 297634970), INT32_C( 1928669860), INT32_C( -102995392)), simde_mm512_set_epi32(INT32_C( 1106394199), INT32_C( 1786608232), INT32_C( 477078085), INT32_C(-1779632362), INT32_C( -966979640), INT32_C( -998912797), INT32_C( 1045049163), INT32_C(-1292333722), INT32_C( 1183794740), INT32_C( -731066686), INT32_C( 511551854), INT32_C( 1167565783), INT32_C( 809184406), INT32_C(-1484085267), INT32_C( -106978631), INT32_C(-1748122897)), simde_mm512_set_epi32(INT32_C(-1748122897), INT32_C( 1996766677), INT32_C( -643998219), INT32_C( 914731069), INT32_C( 238338636), INT32_C(-1292333722), INT32_C( 750368606), INT32_C( 914731069), INT32_C( 750368606), INT32_C( 511551854), INT32_C( 1167565783), INT32_C( -998912797), INT32_C( -998912797), INT32_C( -998912797), INT32_C( -252228416), INT32_C( 924273218)) }, { simde_mm512_set_epi32(INT32_C( -572206162), INT32_C(-1229665005), INT32_C(-2082924696), INT32_C( -649416979), INT32_C( 2112092702), INT32_C( -909261280), INT32_C(-1418185146), INT32_C(-1714733997), INT32_C( 1030181563), INT32_C( -291680665), INT32_C(-1617776213), INT32_C( 2112788110), INT32_C( 207483507), INT32_C( 1144876369), INT32_C( 1177485782), INT32_C( 1952824319)), simde_mm512_set_epi32(INT32_C( -736462739), INT32_C( 1787530508), INT32_C( 1887757958), INT32_C(-2099529631), INT32_C( 262676022), INT32_C( 53886530), INT32_C( 1760597059), INT32_C( 1505905484), INT32_C( 774167251), INT32_C( 967143468), INT32_C( 868941550), INT32_C(-1213290419), INT32_C( 1972608228), INT32_C( 11142680), INT32_C(-1446276724), INT32_C(-1200659658)), simde_mm512_set_epi32(INT32_C(-1351086746), INT32_C(-1617825510), INT32_C( -53781400), INT32_C( -324253949), INT32_C( 277157108), INT32_C(-1362182408), INT32_C( -600988389), INT32_C(-1661173616), INT32_C( 988661733), INT32_C(-1651241157), INT32_C( 751941929), INT32_C( -37019801), INT32_C( 498171562), INT32_C( 2017003975), INT32_C(-1441476496), INT32_C(-1365008121)), simde_mm512_set_epi32(INT32_C(-2082924696), INT32_C( -649416979), INT32_C( -291680665), INT32_C( 1177485782), INT32_C(-1651241157), INT32_C( 1144876369), INT32_C( 207483507), INT32_C( -649416979), INT32_C( 498171562), INT32_C( -649416979), INT32_C(-1229665005), INT32_C(-2082924696), INT32_C( 2112788110), INT32_C(-1661173616), INT32_C( -649416979), INT32_C(-1651241157)) }, { simde_mm512_set_epi32(INT32_C(-2111484726), INT32_C(-1666865657), INT32_C( 905297213), INT32_C( 873020550), INT32_C(-1639789891), INT32_C( 587061870), INT32_C(-2122729525), INT32_C( 1912650916), INT32_C( -12276084), INT32_C(-1060302964), INT32_C( 650767651), INT32_C( -997283137), INT32_C( 2144598253), INT32_C( 298450302), INT32_C( 1083198112), INT32_C( -506043419)), simde_mm512_set_epi32(INT32_C( -134982351), INT32_C(-1321868808), INT32_C( -899996596), INT32_C(-1289649119), INT32_C( -206201920), INT32_C( 1680053368), INT32_C(-1305508907), INT32_C(-2067500681), INT32_C(-1529397706), INT32_C( 1803067419), INT32_C( 1059939268), INT32_C( 399509097), INT32_C( -338549599), INT32_C( -31125095), INT32_C(-1633544688), INT32_C(-1398964227)), simde_mm512_set_epi32(INT32_C(-1660298113), INT32_C( 39258193), INT32_C( 1764960191), INT32_C( 1032976421), INT32_C(-1057643771), INT32_C( 2071456034), INT32_C(-1085712390), INT32_C( 1506090338), INT32_C( 367006319), INT32_C(-1863539807), INT32_C(-1111523249), INT32_C( 2019983201), INT32_C( 566503151), INT32_C( 503973368), INT32_C( 315043487), INT32_C( 230467816)), simde_mm512_set_epi32(INT32_C( 315043487), INT32_C( 1506090338), INT32_C( 873020550), INT32_C( 1083198112), INT32_C( -506043419), INT32_C( 1506090338), INT32_C(-1111523249), INT32_C( 367006319), INT32_C(-1863539807), INT32_C(-1057643771), INT32_C( -997283137), INT32_C(-2122729525), INT32_C( 1083198112), INT32_C(-1085712390), INT32_C( 230467816), INT32_C( 1764960191)) }, { simde_mm512_set_epi32(INT32_C( 506925802), INT32_C( 939863413), INT32_C( -95691606), INT32_C( -63021650), INT32_C( 1460121332), INT32_C(-1770546120), INT32_C( 691111167), INT32_C( 1916845809), INT32_C( -722416713), INT32_C( 1952309936), INT32_C( -635326036), INT32_C(-2071561697), INT32_C(-1349311592), INT32_C(-1434371918), INT32_C( -908787181), INT32_C( 1027111311)), simde_mm512_set_epi32(INT32_C( -476274551), INT32_C(-1268946349), INT32_C( 1871428230), INT32_C( 1084413756), INT32_C( 1221272409), INT32_C( 1604619894), INT32_C( 245162423), INT32_C( -879398063), INT32_C(-1252847595), INT32_C(-1083417294), INT32_C( -85733631), INT32_C( -568206760), INT32_C( -433087356), INT32_C( 591211590), INT32_C( 216351398), INT32_C( 907563034)), simde_mm512_set_epi32(INT32_C( 553126451), INT32_C( 1865131028), INT32_C( -563146647), INT32_C( 1055479165), INT32_C(-2116199302), INT32_C( 548546839), INT32_C(-1021995369), INT32_C(-1397392070), INT32_C( 1911717085), INT32_C(-1893150233), INT32_C(-1174808283), INT32_C( 1158232544), INT32_C( -280806326), INT32_C( 520223268), INT32_C( 603498036), INT32_C( 115278412)), simde_mm512_set_epi32(INT32_C( 691111167), INT32_C( -280806326), INT32_C( 1952309936), INT32_C( 1055479165), INT32_C(-1021995369), INT32_C(-1893150233), INT32_C( 1911717085), INT32_C( 603498036), INT32_C(-1174808283), INT32_C( 520223268), INT32_C( -908787181), INT32_C(-1397392070), INT32_C(-2071561697), INT32_C( 1952309936), INT32_C( 1952309936), INT32_C( 548546839)) }, { simde_mm512_set_epi32(INT32_C( -195854029), INT32_C( 1958024187), INT32_C( 1003184214), INT32_C( 1306010047), INT32_C(-1204461456), INT32_C(-1689382831), INT32_C(-1474151310), INT32_C( 1613100007), INT32_C( 1060086708), INT32_C(-1444530803), INT32_C( 1862849170), INT32_C( -928808218), INT32_C( -146523132), INT32_C(-1235988391), INT32_C( -964779679), INT32_C( 57349444)), simde_mm512_set_epi32(INT32_C(-2134067081), INT32_C( -882392805), INT32_C( 185789257), INT32_C(-2111603035), INT32_C( 407162836), INT32_C(-1236510605), INT32_C(-1567423785), INT32_C( 780522762), INT32_C( -23120724), INT32_C( 92199108), INT32_C( 24805933), INT32_C( 1301060633), INT32_C(-1139480237), INT32_C( 217822558), INT32_C(-1429367443), INT32_C( 465891853)), simde_mm512_set_epi32(INT32_C( 158027570), INT32_C(-1346235067), INT32_C( 1682432767), INT32_C( 1810962335), INT32_C( 1067516501), INT32_C(-1082135268), INT32_C( -56869560), INT32_C(-1195320775), INT32_C( -393383799), INT32_C( -554410804), INT32_C( 1644924567), INT32_C(-1134386712), INT32_C(-1030153866), INT32_C( 436830495), INT32_C(-1226346340), INT32_C( -700345341)), simde_mm512_set_epi32(INT32_C( -393383799), INT32_C( 1067516501), INT32_C(-1474151310), INT32_C( 1862849170), INT32_C(-1134386712), INT32_C(-1030153866), INT32_C( -393383799), INT32_C(-1689382831), INT32_C( 1306010047), INT32_C( -928808218), INT32_C( 1003184214), INT32_C( -56869560), INT32_C(-1030153866), INT32_C(-1346235067), INT32_C( 1003184214), INT32_C( 1003184214)) }, { simde_mm512_set_epi32(INT32_C( -756862268), INT32_C( 1500443430), INT32_C( 463718589), INT32_C( 262081082), INT32_C( 990406393), INT32_C( 114071142), INT32_C(-1625480036), INT32_C( -478582396), INT32_C( -548991920), INT32_C( 1883825214), INT32_C( -16488776), INT32_C( -956015081), INT32_C(-1165588144), INT32_C( -540410051), INT32_C( 285110286), INT32_C( 1179668936)), simde_mm512_set_epi32(INT32_C( 947326740), INT32_C( 1951776493), INT32_C( 1846700305), INT32_C(-1461724216), INT32_C( 885687075), INT32_C(-2137900171), INT32_C( -224346810), INT32_C( -527126852), INT32_C( 1999299634), INT32_C( -946177834), INT32_C( 996138255), INT32_C( 230639176), INT32_C( 644992332), INT32_C( 587140266), INT32_C(-1686393253), INT32_C( 834603009)), simde_mm512_set_epi32(INT32_C( -565016612), INT32_C(-1511643506), INT32_C(-1182806527), INT32_C( 2039339451), INT32_C( 1136224105), INT32_C(-1359466004), INT32_C( -867898302), INT32_C( 1835028244), INT32_C( 225198660), INT32_C(-1493291743), INT32_C( 1406350672), INT32_C( 420428866), INT32_C( -890727392), INT32_C( 555626838), INT32_C( 641882471), INT32_C(-1439329545)), simde_mm512_set_epi32(INT32_C( 420428866), INT32_C( 463718589), INT32_C( 641882471), INT32_C( -478582396), INT32_C(-1165588144), INT32_C( 1406350672), INT32_C( 1883825214), INT32_C( 2039339451), INT32_C( 555626838), INT32_C(-1493291743), INT32_C( -756862268), INT32_C( -478582396), INT32_C( 262081082), INT32_C( 114071142), INT32_C( 1136224105), INT32_C( 285110286)) }, { simde_mm512_set_epi32(INT32_C(-1531947751), INT32_C( -721959716), INT32_C( 537876613), INT32_C( 2069004024), INT32_C(-1968092173), INT32_C( 857387582), INT32_C(-1694386201), INT32_C( 75306944), INT32_C(-1259042358), INT32_C( 909781307), INT32_C( 1903022709), INT32_C( 1362794737), INT32_C(-1814837932), INT32_C( -304499521), INT32_C(-1173731613), INT32_C(-1043888074)), simde_mm512_set_epi32(INT32_C( 1190479058), INT32_C( 648305778), INT32_C(-2081630262), INT32_C( 656381823), INT32_C(-1582102069), INT32_C( 1943788808), INT32_C( -48840894), INT32_C( 1268696790), INT32_C( 678074994), INT32_C(-1453607052), INT32_C( 1809911494), INT32_C( -679308165), INT32_C( 1323625235), INT32_C( -182774520), INT32_C(-1281289570), INT32_C(-1278212722)), simde_mm512_set_epi32(INT32_C( -829128497), INT32_C( 553616479), INT32_C( 2081281560), INT32_C( 1621929085), INT32_C( 502983658), INT32_C( -152394738), INT32_C(-1678301674), INT32_C( 1357386696), INT32_C(-1943547354), INT32_C( -79733129), INT32_C(-2108182551), INT32_C( -331487526), INT32_C( -808116279), INT32_C( -531514769), INT32_C( 375648558), INT32_C(-1499104175)), simde_mm512_set_epi32(INT32_C( -531514769), INT32_C( -531514769), INT32_C( 857387582), INT32_C( -829128497), INT32_C(-1968092173), INT32_C( 75306944), INT32_C( -304499521), INT32_C( -79733129), INT32_C( -531514769), INT32_C( -331487526), INT32_C( 909781307), INT32_C( 502983658), INT32_C( -808116279), INT32_C( 75306944), INT32_C( 553616479), INT32_C( -721959716)) }, { simde_mm512_set_epi32(INT32_C( -868402874), INT32_C( 1365551587), INT32_C( -286078647), INT32_C( 535680374), INT32_C( -347356302), INT32_C( 423616503), INT32_C( 2101835176), INT32_C( 135041412), INT32_C(-1857111698), INT32_C( 1860240848), INT32_C( 582285428), INT32_C(-1871232648), INT32_C(-1985705707), INT32_C( 949080599), INT32_C( 1320997888), INT32_C( 1175065355)), simde_mm512_set_epi32(INT32_C( 1519347951), INT32_C( 1690235053), INT32_C( 992814047), INT32_C(-2023351849), INT32_C( -939539212), INT32_C( -83602250), INT32_C( 1391682826), INT32_C( 2110848055), INT32_C( 1264988325), INT32_C( 82278637), INT32_C(-2138890389), INT32_C(-1738286108), INT32_C( 841364839), INT32_C(-1629840866), INT32_C( -232831502), INT32_C( 1761703668)), simde_mm512_set_epi32(INT32_C( 511852650), INT32_C( 1504302381), INT32_C( -226523906), INT32_C(-1237330962), INT32_C( 1280120077), INT32_C( 865002127), INT32_C( -203493997), INT32_C( 956618200), INT32_C( 33546873), INT32_C( -965377912), INT32_C( 1970918143), INT32_C( 528719680), INT32_C( 12592404), INT32_C( 1306075002), INT32_C(-2040787420), INT32_C(-1677403893)), simde_mm512_set_epi32(INT32_C( -868402874), INT32_C( -286078647), INT32_C( 511852650), INT32_C( 33546873), INT32_C( 528719680), INT32_C( -965377912), INT32_C( 423616503), INT32_C( 33546873), INT32_C( 582285428), INT32_C( -286078647), INT32_C( -347356302), INT32_C(-1871232648), INT32_C(-1857111698), INT32_C( 1504302381), INT32_C( 1306075002), INT32_C( 528719680)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_permutex2var_epi32(test_vec[i].a, test_vec[i].idx, test_vec[i].b); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_permutex2var_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__mmask16 k; simde__m512i idx; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C(-1026363374), INT32_C(-1801776439), INT32_C( 145438126), INT32_C(-1306064352), INT32_C( -858736392), INT32_C( 923442479), INT32_C( 1092805562), INT32_C( 1443901717), INT32_C( 1848749100), INT32_C( 1777333881), INT32_C( 1570116932), INT32_C(-1302383354), INT32_C( 1993455974), INT32_C(-2068684593), INT32_C(-1936012201), INT32_C( 1856459607)), UINT16_C(27455), simde_mm512_set_epi32(INT32_C(-1110068455), INT32_C( -207240031), INT32_C(-1649179267), INT32_C( 2054398444), INT32_C( -483586503), INT32_C(-1481960002), INT32_C( 861125508), INT32_C( -330381203), INT32_C(-1999224530), INT32_C( 1042470181), INT32_C( 1827473477), INT32_C( 298546792), INT32_C(-1630396605), INT32_C( 1545554432), INT32_C( 344023940), INT32_C(-1871515754)), simde_mm512_set_epi32(INT32_C( 1711460779), INT32_C( -919570191), INT32_C( 1974152373), INT32_C( -695949043), INT32_C( -790242624), INT32_C(-1094331335), INT32_C(-1166320093), INT32_C(-2045280751), INT32_C(-2037261521), INT32_C( 223952317), INT32_C( 282198336), INT32_C( 564965997), INT32_C( 169645898), INT32_C(-1539616610), INT32_C( 1134735685), INT32_C( 1430356381)), simde_mm512_set_epi32(INT32_C(-1026363374), INT32_C(-1936012201), INT32_C( 1974152373), INT32_C(-1306064352), INT32_C(-1166320093), INT32_C( 923442479), INT32_C(-1302383354), INT32_C( 145438126), INT32_C( 1848749100), INT32_C( 1777333881), INT32_C( 1570116932), INT32_C( 1443901717), INT32_C( 1993455974), INT32_C( 1856459607), INT32_C(-1302383354), INT32_C( 223952317)) }, { simde_mm512_set_epi32(INT32_C(-1423023772), INT32_C(-1185448755), INT32_C( 1549802795), INT32_C( 159583350), INT32_C( 548883180), INT32_C( -605945909), INT32_C(-2063050181), INT32_C( 1095467003), INT32_C(-2083755741), INT32_C( 2066979701), INT32_C( 1094609712), INT32_C( 1345059025), INT32_C( -340318359), INT32_C( 1519671047), INT32_C(-1017461983), INT32_C( 353198331)), UINT16_C( 8253), simde_mm512_set_epi32(INT32_C( -888441293), INT32_C( -707551350), INT32_C( 513515868), INT32_C(-1825967755), INT32_C( 822222164), INT32_C(-1689559027), INT32_C( 533478787), INT32_C( 907615417), INT32_C( -199229058), INT32_C( -91537812), INT32_C( 1375258232), INT32_C( 139748399), INT32_C( 1688468565), INT32_C( 736544549), INT32_C(-1282057552), INT32_C( 795925067)), simde_mm512_set_epi32(INT32_C( 1701079465), INT32_C( -195770682), INT32_C( 503748315), INT32_C( 1469355417), INT32_C(-1849349632), INT32_C( 1962664621), INT32_C( -646247370), INT32_C( 1258747662), INT32_C( 1838830023), INT32_C( -532007659), INT32_C( -622852205), INT32_C( -839037220), INT32_C( 499633910), INT32_C( -260167255), INT32_C( 884163960), INT32_C( -329275629)), simde_mm512_set_epi32(INT32_C(-1423023772), INT32_C(-1185448755), INT32_C( 1469355417), INT32_C( 159583350), INT32_C( 548883180), INT32_C( -605945909), INT32_C(-2063050181), INT32_C( 1095467003), INT32_C(-2083755741), INT32_C( 2066979701), INT32_C( 1258747662), INT32_C(-1423023772), INT32_C( -622852205), INT32_C( 1094609712), INT32_C(-1017461983), INT32_C( 548883180)) }, { simde_mm512_set_epi32(INT32_C( -174985661), INT32_C(-1762469023), INT32_C( 1239606494), INT32_C( -22119232), INT32_C( 1216907749), INT32_C( 654527510), INT32_C(-2043358500), INT32_C( 459072440), INT32_C( -430427651), INT32_C( -272088075), INT32_C( 386072301), INT32_C(-1628984154), INT32_C( 87817524), INT32_C( 1219490517), INT32_C(-1569831145), INT32_C( 338985942)), UINT16_C(47186), simde_mm512_set_epi32(INT32_C( 730787370), INT32_C(-2034110695), INT32_C(-1088138491), INT32_C( -353174912), INT32_C( -362301616), INT32_C( 617951303), INT32_C( 817116152), INT32_C(-1034835761), INT32_C( -102069057), INT32_C( 1774242298), INT32_C( 1089620040), INT32_C(-1101477862), INT32_C( 2001101785), INT32_C(-1759250988), INT32_C( -606254738), INT32_C( 1526367108)), simde_mm512_set_epi32(INT32_C( 204417556), INT32_C(-1329665093), INT32_C(-2039025377), INT32_C( 1639231015), INT32_C( 1541217841), INT32_C( 1692413538), INT32_C( 738521275), INT32_C( 159429100), INT32_C( 451955897), INT32_C( 181201098), INT32_C( 450627934), INT32_C( 2082954477), INT32_C( 1254960767), INT32_C( 1995459397), INT32_C( -11572946), INT32_C(-1087388220)), simde_mm512_set_epi32(INT32_C( 654527510), INT32_C(-1762469023), INT32_C( 386072301), INT32_C( 338985942), INT32_C(-1087388220), INT32_C( 654527510), INT32_C(-2043358500), INT32_C( 459072440), INT32_C( -430427651), INT32_C( 1692413538), INT32_C( 386072301), INT32_C( 1692413538), INT32_C( 87817524), INT32_C( 1219490517), INT32_C(-1762469023), INT32_C( 338985942)) }, { simde_mm512_set_epi32(INT32_C( 1928049651), INT32_C( 765730488), INT32_C( -85899231), INT32_C( 1435935141), INT32_C(-2098236580), INT32_C(-1991433794), INT32_C( 1298943776), INT32_C( 277470244), INT32_C(-1834748849), INT32_C( 596054477), INT32_C( 1827419510), INT32_C(-1010527612), INT32_C(-1687118128), INT32_C( 107945377), INT32_C( 1174128677), INT32_C(-1544325740)), UINT16_C(47807), simde_mm512_set_epi32(INT32_C( 1463729035), INT32_C( 2031968571), INT32_C( 333434400), INT32_C( -637142874), INT32_C( -520435756), INT32_C( -148623413), INT32_C( -692754616), INT32_C(-1908406411), INT32_C( 1391053429), INT32_C( 1767908668), INT32_C( 1117151413), INT32_C( 1466854108), INT32_C( -852914371), INT32_C( -773785464), INT32_C(-2142007253), INT32_C( 466013192)), simde_mm512_set_epi32(INT32_C( -811849174), INT32_C(-1510825074), INT32_C( 1897985966), INT32_C( 1445172644), INT32_C( -193622280), INT32_C( 2097959091), INT32_C( -652080500), INT32_C( 1943026961), INT32_C( 763848022), INT32_C(-2124387583), INT32_C(-1102663841), INT32_C( 712044568), INT32_C( 1641785760), INT32_C( 1696516135), INT32_C(-1123374630), INT32_C( -181070601)), simde_mm512_set_epi32(INT32_C(-2098236580), INT32_C( 765730488), INT32_C(-1544325740), INT32_C( 596054477), INT32_C( 712044568), INT32_C(-1991433794), INT32_C( 277470244), INT32_C( 277470244), INT32_C(-1102663841), INT32_C( 596054477), INT32_C(-1102663841), INT32_C( 1445172644), INT32_C( 1897985966), INT32_C( 277470244), INT32_C(-2098236580), INT32_C( 277470244)) }, { simde_mm512_set_epi32(INT32_C( 729621709), INT32_C(-1241407128), INT32_C( 696721321), INT32_C( -603523965), INT32_C( 1730687689), INT32_C( 290786615), INT32_C(-1827031380), INT32_C( 1429317129), INT32_C(-1800615955), INT32_C( -728999228), INT32_C( -788606428), INT32_C( -539592973), INT32_C(-1402526875), INT32_C( -8263463), INT32_C( 478788156), INT32_C( 842200487)), UINT16_C(43569), simde_mm512_set_epi32(INT32_C(-1668086905), INT32_C( -770469750), INT32_C( 1013231130), INT32_C( 543156562), INT32_C( -399740514), INT32_C( 509655415), INT32_C( -160537509), INT32_C( -549528402), INT32_C( -323547130), INT32_C(-1395624565), INT32_C(-1905505546), INT32_C(-1268587914), INT32_C( 1939823644), INT32_C(-1112752789), INT32_C( 2052878307), INT32_C( -856056848)), simde_mm512_set_epi32(INT32_C( 1969107101), INT32_C(-2063427243), INT32_C( -670405092), INT32_C(-1879729053), INT32_C( 1035482990), INT32_C( 1183910939), INT32_C( 1515345934), INT32_C(-1884003639), INT32_C( -638430290), INT32_C(-2007622482), INT32_C( 171336877), INT32_C( 59553613), INT32_C( 165266600), INT32_C( -798384264), INT32_C( 1607584815), INT32_C(-1324336584)), simde_mm512_set_epi32(INT32_C(-1800615955), INT32_C(-1241407128), INT32_C( 1183910939), INT32_C( -603523965), INT32_C(-2063427243), INT32_C( 290786615), INT32_C( 1035482990), INT32_C( 1429317129), INT32_C(-1800615955), INT32_C( -728999228), INT32_C(-2007622482), INT32_C(-2007622482), INT32_C(-1402526875), INT32_C( -8263463), INT32_C( 478788156), INT32_C(-1324336584)) }, { simde_mm512_set_epi32(INT32_C(-1349190316), INT32_C(-1403674818), INT32_C( -521443925), INT32_C(-1464291783), INT32_C(-1686112999), INT32_C(-1290233716), INT32_C( -364563113), INT32_C( 1520783126), INT32_C( -207159885), INT32_C( -104006691), INT32_C( 362759403), INT32_C(-1562242573), INT32_C( -397133039), INT32_C( 568974515), INT32_C(-1726442446), INT32_C(-2134949944)), UINT16_C(54613), simde_mm512_set_epi32(INT32_C(-1929379353), INT32_C( -560036292), INT32_C(-1693642327), INT32_C(-1383827159), INT32_C( 1804875719), INT32_C( 1179452315), INT32_C(-1509656190), INT32_C(-1409992701), INT32_C(-1830359468), INT32_C( 635753031), INT32_C( 310246197), INT32_C(-1783943034), INT32_C(-1307643183), INT32_C( -144888334), INT32_C( 621611179), INT32_C( 743650285)), simde_mm512_set_epi32(INT32_C(-2094713086), INT32_C( 197529411), INT32_C( 1055036471), INT32_C( 351897115), INT32_C( 1594003471), INT32_C(-1709813294), INT32_C( -133653364), INT32_C( -51462036), INT32_C( 46796230), INT32_C( 989301899), INT32_C( -691937914), INT32_C( 1667629581), INT32_C( -496700661), INT32_C(-1318801755), INT32_C( 1076515270), INT32_C(-1757573505)), simde_mm512_set_epi32(INT32_C( -207159885), INT32_C( 351897115), INT32_C( -521443925), INT32_C( -364563113), INT32_C(-1686112999), INT32_C( 1594003471), INT32_C( -364563113), INT32_C( -397133039), INT32_C( -207159885), INT32_C( -207159885), INT32_C( 362759403), INT32_C( -104006691), INT32_C( -397133039), INT32_C(-1318801755), INT32_C(-1726442446), INT32_C( -521443925)) }, { simde_mm512_set_epi32(INT32_C( -662725541), INT32_C( 790228415), INT32_C( -26753919), INT32_C( -495897274), INT32_C( 1526994051), INT32_C( 1569343894), INT32_C(-1059990980), INT32_C( -490626870), INT32_C( 1463745126), INT32_C( -957352131), INT32_C( 1122208393), INT32_C(-1814919780), INT32_C(-1891682702), INT32_C( -176064246), INT32_C(-1293286075), INT32_C(-1398303881)), UINT16_C(12582), simde_mm512_set_epi32(INT32_C(-1664789378), INT32_C( -204514420), INT32_C( 1112369408), INT32_C( 2113109396), INT32_C(-1679339682), INT32_C( 2128430529), INT32_C(-1262713143), INT32_C( 1026756660), INT32_C(-1268083621), INT32_C( 337660693), INT32_C( 1524090799), INT32_C( -275653210), INT32_C(-1422519849), INT32_C( -495051500), INT32_C( 1851182812), INT32_C( 5027269)), simde_mm512_set_epi32(INT32_C( -501257427), INT32_C(-1329431510), INT32_C( 1005777948), INT32_C( 616430734), INT32_C( 1581162255), INT32_C( 1497456456), INT32_C(-1170808415), INT32_C(-1014503666), INT32_C(-1157750165), INT32_C( 1691363299), INT32_C( 1100655145), INT32_C( 673265711), INT32_C( 1544659928), INT32_C(-1956803094), INT32_C( 1970109422), INT32_C(-1197844366)), simde_mm512_set_epi32(INT32_C( -662725541), INT32_C( 790228415), INT32_C(-1398303881), INT32_C( 673265711), INT32_C( 1526994051), INT32_C( 1569343894), INT32_C(-1059990980), INT32_C( 673265711), INT32_C( 1463745126), INT32_C( -957352131), INT32_C( -662725541), INT32_C(-1814919780), INT32_C(-1891682702), INT32_C( 673265711), INT32_C( 616430734), INT32_C(-1398303881)) }, { simde_mm512_set_epi32(INT32_C(-1875196295), INT32_C( -129416454), INT32_C( 246939492), INT32_C(-1135915662), INT32_C( -539014135), INT32_C( 1554320066), INT32_C( 1611314079), INT32_C(-1013292897), INT32_C( 1135188428), INT32_C(-1903831246), INT32_C( 1207366326), INT32_C( -366505666), INT32_C( 1379479886), INT32_C( 1577255779), INT32_C( -488475560), INT32_C( 143370041)), UINT16_C(29031), simde_mm512_set_epi32(INT32_C( -7690721), INT32_C( -632623581), INT32_C( 612963145), INT32_C( 1824881051), INT32_C( 582494706), INT32_C(-1899955415), INT32_C( 618457733), INT32_C( 1216635147), INT32_C( 1404062051), INT32_C( 1905825928), INT32_C(-1635796069), INT32_C( 528096299), INT32_C( 259240399), INT32_C( 271372102), INT32_C( 1305969598), INT32_C( 974818283)), simde_mm512_set_epi32(INT32_C( -675362282), INT32_C(-1218762696), INT32_C(-1400182216), INT32_C(-2088680370), INT32_C(-1895497877), INT32_C( 1563893931), INT32_C( 1105770515), INT32_C(-1745770541), INT32_C(-1255255240), INT32_C( 1238532704), INT32_C( 995946229), INT32_C( 119517601), INT32_C( 1389614040), INT32_C(-2032996348), INT32_C( 1996749952), INT32_C( 450477794)), simde_mm512_set_epi32(INT32_C(-1875196295), INT32_C( 1379479886), INT32_C( 1611314079), INT32_C(-1895497877), INT32_C( -539014135), INT32_C( 1554320066), INT32_C( 1611314079), INT32_C( -539014135), INT32_C( 1135188428), INT32_C(-1013292897), INT32_C(-1895497877), INT32_C( -366505666), INT32_C( 1379479886), INT32_C(-1903831246), INT32_C(-1218762696), INT32_C( -539014135)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_permutex2var_epi32(test_vec[i].a, test_vec[i].k, test_vec[i].idx, test_vec[i].b); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask2_permutex2var_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i idx; simde__mmask16 k; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C(-1026363374), INT32_C(-1801776439), INT32_C( 145438126), INT32_C(-1306064352), INT32_C( -858736392), INT32_C( 923442479), INT32_C( 1092805562), INT32_C( 1443901717), INT32_C( 1848749100), INT32_C( 1777333881), INT32_C( 1570116932), INT32_C(-1302383354), INT32_C( 1993455974), INT32_C(-2068684593), INT32_C(-1936012201), INT32_C( 1856459607)), simde_mm512_set_epi32(INT32_C( -207240031), INT32_C(-1649179267), INT32_C( 2054398444), INT32_C( -483586503), INT32_C(-1481960002), INT32_C( 861125508), INT32_C( -330381203), INT32_C(-1999224530), INT32_C( 1042470181), INT32_C( 1827473477), INT32_C( 298546792), INT32_C(-1630396605), INT32_C( 1545554432), INT32_C( 344023940), INT32_C(-1871515754), INT32_C( 951544639)), UINT16_C(45849), simde_mm512_set_epi32(INT32_C( 1711460779), INT32_C( -919570191), INT32_C( 1974152373), INT32_C( -695949043), INT32_C( -790242624), INT32_C(-1094331335), INT32_C(-1166320093), INT32_C(-2045280751), INT32_C(-2037261521), INT32_C( 223952317), INT32_C( 282198336), INT32_C( 564965997), INT32_C( 169645898), INT32_C(-1539616610), INT32_C( 1134735685), INT32_C( 1430356381)), simde_mm512_set_epi32(INT32_C(-1936012201), INT32_C(-1649179267), INT32_C(-1306064352), INT32_C(-1166320093), INT32_C(-1481960002), INT32_C( 861125508), INT32_C( 145438126), INT32_C(-1801776439), INT32_C( 1042470181), INT32_C( 1827473477), INT32_C( 298546792), INT32_C( 1993455974), INT32_C( 1856459607), INT32_C( 344023940), INT32_C(-1871515754), INT32_C( 1711460779)) }, { simde_mm512_set_epi32(INT32_C(-1423023772), INT32_C(-1185448755), INT32_C( 1549802795), INT32_C( 159583350), INT32_C( 548883180), INT32_C( -605945909), INT32_C(-2063050181), INT32_C( 1095467003), INT32_C(-2083755741), INT32_C( 2066979701), INT32_C( 1094609712), INT32_C( 1345059025), INT32_C( -340318359), INT32_C( 1519671047), INT32_C(-1017461983), INT32_C( 353198331)), simde_mm512_set_epi32(INT32_C( -707551350), INT32_C( 513515868), INT32_C(-1825967755), INT32_C( 822222164), INT32_C(-1689559027), INT32_C( 533478787), INT32_C( 907615417), INT32_C( -199229058), INT32_C( -91537812), INT32_C( 1375258232), INT32_C( 139748399), INT32_C( 1688468565), INT32_C( 736544549), INT32_C(-1282057552), INT32_C( 795925067), INT32_C( 1720852541)), UINT16_C(30259), simde_mm512_set_epi32(INT32_C( 1701079465), INT32_C( -195770682), INT32_C( 503748315), INT32_C( 1469355417), INT32_C(-1849349632), INT32_C( 1962664621), INT32_C( -646247370), INT32_C( 1258747662), INT32_C( 1838830023), INT32_C( -532007659), INT32_C( -622852205), INT32_C( -839037220), INT32_C( 499633910), INT32_C( -260167255), INT32_C( 884163960), INT32_C( -329275629)), simde_mm512_set_epi32(INT32_C( -707551350), INT32_C( 1469355417), INT32_C( -622852205), INT32_C( -839037220), INT32_C(-1689559027), INT32_C( -340318359), INT32_C( -646247370), INT32_C( -199229058), INT32_C( -91537812), INT32_C( 1375258232), INT32_C(-1423023772), INT32_C( -622852205), INT32_C( 736544549), INT32_C(-1282057552), INT32_C( 548883180), INT32_C( 503748315)) }, { simde_mm512_set_epi32(INT32_C( -174985661), INT32_C(-1762469023), INT32_C( 1239606494), INT32_C( -22119232), INT32_C( 1216907749), INT32_C( 654527510), INT32_C(-2043358500), INT32_C( 459072440), INT32_C( -430427651), INT32_C( -272088075), INT32_C( 386072301), INT32_C(-1628984154), INT32_C( 87817524), INT32_C( 1219490517), INT32_C(-1569831145), INT32_C( 338985942)), simde_mm512_set_epi32(INT32_C(-2034110695), INT32_C(-1088138491), INT32_C( -353174912), INT32_C( -362301616), INT32_C( 617951303), INT32_C( 817116152), INT32_C(-1034835761), INT32_C( -102069057), INT32_C( 1774242298), INT32_C( 1089620040), INT32_C(-1101477862), INT32_C( 2001101785), INT32_C(-1759250988), INT32_C( -606254738), INT32_C( 1526367108), INT32_C( 722122834)), UINT16_C(60970), simde_mm512_set_epi32(INT32_C( 204417556), INT32_C(-1329665093), INT32_C(-2039025377), INT32_C( 1639231015), INT32_C( 1541217841), INT32_C( 1692413538), INT32_C( 738521275), INT32_C( 159429100), INT32_C( 451955897), INT32_C( 181201098), INT32_C( 450627934), INT32_C( 2082954477), INT32_C( 1254960767), INT32_C( 1995459397), INT32_C( -11572946), INT32_C(-1087388220)), simde_mm512_set_epi32(INT32_C( 738521275), INT32_C( 386072301), INT32_C( 338985942), INT32_C( -362301616), INT32_C( -430427651), INT32_C( 159429100), INT32_C( -174985661), INT32_C( -102069057), INT32_C( 1774242298), INT32_C( 1089620040), INT32_C( 1692413538), INT32_C( 2001101785), INT32_C( 2082954477), INT32_C( -606254738), INT32_C(-1628984154), INT32_C( 722122834)) }, { simde_mm512_set_epi32(INT32_C( 1928049651), INT32_C( 765730488), INT32_C( -85899231), INT32_C( 1435935141), INT32_C(-2098236580), INT32_C(-1991433794), INT32_C( 1298943776), INT32_C( 277470244), INT32_C(-1834748849), INT32_C( 596054477), INT32_C( 1827419510), INT32_C(-1010527612), INT32_C(-1687118128), INT32_C( 107945377), INT32_C( 1174128677), INT32_C(-1544325740)), simde_mm512_set_epi32(INT32_C( 2031968571), INT32_C( 333434400), INT32_C( -637142874), INT32_C( -520435756), INT32_C( -148623413), INT32_C( -692754616), INT32_C(-1908406411), INT32_C( 1391053429), INT32_C( 1767908668), INT32_C( 1117151413), INT32_C( 1466854108), INT32_C( -852914371), INT32_C( -773785464), INT32_C(-2142007253), INT32_C( 466013192), INT32_C( 1313258175)), UINT16_C(48011), simde_mm512_set_epi32(INT32_C( -811849174), INT32_C(-1510825074), INT32_C( 1897985966), INT32_C( 1445172644), INT32_C( -193622280), INT32_C( 2097959091), INT32_C( -652080500), INT32_C( 1943026961), INT32_C( 763848022), INT32_C(-2124387583), INT32_C(-1102663841), INT32_C( 712044568), INT32_C( 1641785760), INT32_C( 1696516135), INT32_C(-1123374630), INT32_C( -181070601)), simde_mm512_set_epi32(INT32_C( -193622280), INT32_C( 333434400), INT32_C( 596054477), INT32_C( 712044568), INT32_C(-2098236580), INT32_C( -692754616), INT32_C(-1102663841), INT32_C(-1102663841), INT32_C( 1445172644), INT32_C( 1117151413), INT32_C( 1466854108), INT32_C( -852914371), INT32_C( 277470244), INT32_C(-2142007253), INT32_C( 277470244), INT32_C( -811849174)) }, { simde_mm512_set_epi32(INT32_C( 729621709), INT32_C(-1241407128), INT32_C( 696721321), INT32_C( -603523965), INT32_C( 1730687689), INT32_C( 290786615), INT32_C(-1827031380), INT32_C( 1429317129), INT32_C(-1800615955), INT32_C( -728999228), INT32_C( -788606428), INT32_C( -539592973), INT32_C(-1402526875), INT32_C( -8263463), INT32_C( 478788156), INT32_C( 842200487)), simde_mm512_set_epi32(INT32_C( -770469750), INT32_C( 1013231130), INT32_C( 543156562), INT32_C( -399740514), INT32_C( 509655415), INT32_C( -160537509), INT32_C( -549528402), INT32_C( -323547130), INT32_C(-1395624565), INT32_C(-1905505546), INT32_C(-1268587914), INT32_C( 1939823644), INT32_C(-1112752789), INT32_C( 2052878307), INT32_C( -856056848), INT32_C(-1218860495)), UINT16_C( 903), simde_mm512_set_epi32(INT32_C( 1969107101), INT32_C(-2063427243), INT32_C( -670405092), INT32_C(-1879729053), INT32_C( 1035482990), INT32_C( 1183910939), INT32_C( 1515345934), INT32_C(-1884003639), INT32_C( -638430290), INT32_C(-2007622482), INT32_C( 171336877), INT32_C( 59553613), INT32_C( 165266600), INT32_C( -798384264), INT32_C( 1607584815), INT32_C(-1324336584)), simde_mm512_set_epi32(INT32_C( -770469750), INT32_C( 1013231130), INT32_C( 543156562), INT32_C( -399740514), INT32_C( 509655415), INT32_C( -160537509), INT32_C(-1241407128), INT32_C( -728999228), INT32_C( 1730687689), INT32_C(-1905505546), INT32_C(-1268587914), INT32_C( 1939823644), INT32_C(-1112752789), INT32_C(-1402526875), INT32_C(-1324336584), INT32_C( 1607584815)) }, { simde_mm512_set_epi32(INT32_C(-1349190316), INT32_C(-1403674818), INT32_C( -521443925), INT32_C(-1464291783), INT32_C(-1686112999), INT32_C(-1290233716), INT32_C( -364563113), INT32_C( 1520783126), INT32_C( -207159885), INT32_C( -104006691), INT32_C( 362759403), INT32_C(-1562242573), INT32_C( -397133039), INT32_C( 568974515), INT32_C(-1726442446), INT32_C(-2134949944)), simde_mm512_set_epi32(INT32_C( -560036292), INT32_C(-1693642327), INT32_C(-1383827159), INT32_C( 1804875719), INT32_C( 1179452315), INT32_C(-1509656190), INT32_C(-1409992701), INT32_C(-1830359468), INT32_C( 635753031), INT32_C( 310246197), INT32_C(-1783943034), INT32_C(-1307643183), INT32_C( -144888334), INT32_C( 621611179), INT32_C( 743650285), INT32_C( 1845744981)), UINT16_C( 487), simde_mm512_set_epi32(INT32_C(-2094713086), INT32_C( 197529411), INT32_C( 1055036471), INT32_C( 351897115), INT32_C( 1594003471), INT32_C(-1709813294), INT32_C( -133653364), INT32_C( -51462036), INT32_C( 46796230), INT32_C( 989301899), INT32_C( -691937914), INT32_C( 1667629581), INT32_C( -496700661), INT32_C(-1318801755), INT32_C( 1076515270), INT32_C(-1757573505)), simde_mm512_set_epi32(INT32_C( -560036292), INT32_C(-1693642327), INT32_C(-1383827159), INT32_C( 1804875719), INT32_C( 1179452315), INT32_C(-1509656190), INT32_C(-1409992701), INT32_C( 1667629581), INT32_C( -207159885), INT32_C( -691937914), INT32_C( -104006691), INT32_C(-1307643183), INT32_C( -144888334), INT32_C(-1686112999), INT32_C( -521443925), INT32_C( -691937914)) }, { simde_mm512_set_epi32(INT32_C( -662725541), INT32_C( 790228415), INT32_C( -26753919), INT32_C( -495897274), INT32_C( 1526994051), INT32_C( 1569343894), INT32_C(-1059990980), INT32_C( -490626870), INT32_C( 1463745126), INT32_C( -957352131), INT32_C( 1122208393), INT32_C(-1814919780), INT32_C(-1891682702), INT32_C( -176064246), INT32_C(-1293286075), INT32_C(-1398303881)), simde_mm512_set_epi32(INT32_C( -204514420), INT32_C( 1112369408), INT32_C( 2113109396), INT32_C(-1679339682), INT32_C( 2128430529), INT32_C(-1262713143), INT32_C( 1026756660), INT32_C(-1268083621), INT32_C( 337660693), INT32_C( 1524090799), INT32_C( -275653210), INT32_C(-1422519849), INT32_C( -495051500), INT32_C( 1851182812), INT32_C( 5027269), INT32_C( -594726618)), UINT16_C(21630), simde_mm512_set_epi32(INT32_C( -501257427), INT32_C(-1329431510), INT32_C( 1005777948), INT32_C( 616430734), INT32_C( 1581162255), INT32_C( 1497456456), INT32_C(-1170808415), INT32_C(-1014503666), INT32_C(-1157750165), INT32_C( 1691363299), INT32_C( 1100655145), INT32_C( 673265711), INT32_C( 1544659928), INT32_C(-1956803094), INT32_C( 1970109422), INT32_C(-1197844366)), simde_mm512_set_epi32(INT32_C( -204514420), INT32_C(-1398303881), INT32_C( 2113109396), INT32_C(-1329431510), INT32_C( 2128430529), INT32_C(-1059990980), INT32_C( 1026756660), INT32_C(-1268083621), INT32_C( 337660693), INT32_C( -662725541), INT32_C( -957352131), INT32_C(-1157750165), INT32_C( 673265711), INT32_C( 616430734), INT32_C( 1122208393), INT32_C( -594726618)) }, { simde_mm512_set_epi32(INT32_C(-1875196295), INT32_C( -129416454), INT32_C( 246939492), INT32_C(-1135915662), INT32_C( -539014135), INT32_C( 1554320066), INT32_C( 1611314079), INT32_C(-1013292897), INT32_C( 1135188428), INT32_C(-1903831246), INT32_C( 1207366326), INT32_C( -366505666), INT32_C( 1379479886), INT32_C( 1577255779), INT32_C( -488475560), INT32_C( 143370041)), simde_mm512_set_epi32(INT32_C( -632623581), INT32_C( 612963145), INT32_C( 1824881051), INT32_C( 582494706), INT32_C(-1899955415), INT32_C( 618457733), INT32_C( 1216635147), INT32_C( 1404062051), INT32_C( 1905825928), INT32_C(-1635796069), INT32_C( 528096299), INT32_C( 259240399), INT32_C( 271372102), INT32_C( 1305969598), INT32_C( 974818283), INT32_C( 377057639)), UINT16_C(42527), simde_mm512_set_epi32(INT32_C( -675362282), INT32_C(-1218762696), INT32_C(-1400182216), INT32_C(-2088680370), INT32_C(-1895497877), INT32_C( 1563893931), INT32_C( 1105770515), INT32_C(-1745770541), INT32_C(-1255255240), INT32_C( 1238532704), INT32_C( 995946229), INT32_C( 119517601), INT32_C( 1389614040), INT32_C(-2032996348), INT32_C( 1996749952), INT32_C( 450477794)), simde_mm512_set_epi32(INT32_C( 1379479886), INT32_C( 612963145), INT32_C(-1895497877), INT32_C( 582494706), INT32_C(-1899955415), INT32_C( 1207366326), INT32_C( -539014135), INT32_C( 1404062051), INT32_C( 1905825928), INT32_C(-1635796069), INT32_C( 528096299), INT32_C(-1875196295), INT32_C(-1903831246), INT32_C(-1218762696), INT32_C( -539014135), INT32_C( 1135188428)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask2_permutex2var_epi32(test_vec[i].a, test_vec[i].idx, test_vec[i].k, test_vec[i].b); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_permutex2var_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m512i a; simde__m512i idx; simde__m512i b; simde__m512i r; } test_vec[8] = { { UINT16_C(21335), simde_mm512_set_epi32(INT32_C( 951544639), INT32_C(-1026363374), INT32_C(-1801776439), INT32_C( 145438126), INT32_C(-1306064352), INT32_C( -858736392), INT32_C( 923442479), INT32_C( 1092805562), INT32_C( 1443901717), INT32_C( 1848749100), INT32_C( 1777333881), INT32_C( 1570116932), INT32_C(-1302383354), INT32_C( 1993455974), INT32_C(-2068684593), INT32_C(-1936012201)), simde_mm512_set_epi32(INT32_C(-1110068455), INT32_C( -207240031), INT32_C(-1649179267), INT32_C( 2054398444), INT32_C( -483586503), INT32_C(-1481960002), INT32_C( 861125508), INT32_C( -330381203), INT32_C(-1999224530), INT32_C( 1042470181), INT32_C( 1827473477), INT32_C( 298546792), INT32_C(-1630396605), INT32_C( 1545554432), INT32_C( 344023940), INT32_C(-1871515754)), simde_mm512_set_epi32(INT32_C( 1711460779), INT32_C( -919570191), INT32_C( 1974152373), INT32_C( -695949043), INT32_C( -790242624), INT32_C(-1094331335), INT32_C(-1166320093), INT32_C(-2045280751), INT32_C(-2037261521), INT32_C( 223952317), INT32_C( 282198336), INT32_C( 564965997), INT32_C( 169645898), INT32_C(-1539616610), INT32_C( 1134735685), INT32_C( 1430356381)), simde_mm512_set_epi32(INT32_C( 0), INT32_C(-2068684593), INT32_C( 0), INT32_C( 145438126), INT32_C( 0), INT32_C( 0), INT32_C( 1570116932), INT32_C(-1801776439), INT32_C( 0), INT32_C( 1777333881), INT32_C( 0), INT32_C( 1092805562), INT32_C( 0), INT32_C(-1936012201), INT32_C( 1570116932), INT32_C( 223952317)) }, { UINT16_C(24827), simde_mm512_set_epi32(INT32_C( 1720852541), INT32_C(-1423023772), INT32_C(-1185448755), INT32_C( 1549802795), INT32_C( 159583350), INT32_C( 548883180), INT32_C( -605945909), INT32_C(-2063050181), INT32_C( 1095467003), INT32_C(-2083755741), INT32_C( 2066979701), INT32_C( 1094609712), INT32_C( 1345059025), INT32_C( -340318359), INT32_C( 1519671047), INT32_C(-1017461983)), simde_mm512_set_epi32(INT32_C( -888441293), INT32_C( -707551350), INT32_C( 513515868), INT32_C(-1825967755), INT32_C( 822222164), INT32_C(-1689559027), INT32_C( 533478787), INT32_C( 907615417), INT32_C( -199229058), INT32_C( -91537812), INT32_C( 1375258232), INT32_C( 139748399), INT32_C( 1688468565), INT32_C( 736544549), INT32_C(-1282057552), INT32_C( 795925067)), simde_mm512_set_epi32(INT32_C( 1701079465), INT32_C( -195770682), INT32_C( 503748315), INT32_C( 1469355417), INT32_C(-1849349632), INT32_C( 1962664621), INT32_C( -646247370), INT32_C( 1258747662), INT32_C( 1838830023), INT32_C( -532007659), INT32_C( -622852205), INT32_C( -839037220), INT32_C( 499633910), INT32_C( -260167255), INT32_C( 884163960), INT32_C( -329275629)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 548883180), INT32_C( 1469355417), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -195770682), INT32_C( 1549802795), INT32_C( 1258747662), INT32_C( 1720852541), INT32_C( -622852205), INT32_C( 0), INT32_C( -329275629), INT32_C( 159583350)) }, { UINT16_C(33750), simde_mm512_set_epi32(INT32_C( 722122834), INT32_C( -174985661), INT32_C(-1762469023), INT32_C( 1239606494), INT32_C( -22119232), INT32_C( 1216907749), INT32_C( 654527510), INT32_C(-2043358500), INT32_C( 459072440), INT32_C( -430427651), INT32_C( -272088075), INT32_C( 386072301), INT32_C(-1628984154), INT32_C( 87817524), INT32_C( 1219490517), INT32_C(-1569831145)), simde_mm512_set_epi32(INT32_C( 730787370), INT32_C(-2034110695), INT32_C(-1088138491), INT32_C( -353174912), INT32_C( -362301616), INT32_C( 617951303), INT32_C( 817116152), INT32_C(-1034835761), INT32_C( -102069057), INT32_C( 1774242298), INT32_C( 1089620040), INT32_C(-1101477862), INT32_C( 2001101785), INT32_C(-1759250988), INT32_C( -606254738), INT32_C( 1526367108)), simde_mm512_set_epi32(INT32_C( 204417556), INT32_C(-1329665093), INT32_C(-2039025377), INT32_C( 1639231015), INT32_C( 1541217841), INT32_C( 1692413538), INT32_C( 738521275), INT32_C( 159429100), INT32_C( 451955897), INT32_C( 181201098), INT32_C( 450627934), INT32_C( 2082954477), INT32_C( 1254960767), INT32_C( 1995459397), INT32_C( -11572946), INT32_C(-1087388220)), simde_mm512_set_epi32(INT32_C( 1216907749), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 159429100), INT32_C( 722122834), INT32_C( 204417556), INT32_C( 1692413538), INT32_C( 0), INT32_C( 1692413538), INT32_C( 0), INT32_C( 2082954477), INT32_C( -174985661), INT32_C( 0)) }, { UINT16_C(30100), simde_mm512_set_epi32(INT32_C( 1313258175), INT32_C( 1928049651), INT32_C( 765730488), INT32_C( -85899231), INT32_C( 1435935141), INT32_C(-2098236580), INT32_C(-1991433794), INT32_C( 1298943776), INT32_C( 277470244), INT32_C(-1834748849), INT32_C( 596054477), INT32_C( 1827419510), INT32_C(-1010527612), INT32_C(-1687118128), INT32_C( 107945377), INT32_C( 1174128677)), simde_mm512_set_epi32(INT32_C( 1463729035), INT32_C( 2031968571), INT32_C( 333434400), INT32_C( -637142874), INT32_C( -520435756), INT32_C( -148623413), INT32_C( -692754616), INT32_C(-1908406411), INT32_C( 1391053429), INT32_C( 1767908668), INT32_C( 1117151413), INT32_C( 1466854108), INT32_C( -852914371), INT32_C( -773785464), INT32_C(-2142007253), INT32_C( 466013192)), simde_mm512_set_epi32(INT32_C( -811849174), INT32_C(-1510825074), INT32_C( 1897985966), INT32_C( 1445172644), INT32_C( -193622280), INT32_C( 2097959091), INT32_C( -652080500), INT32_C( 1943026961), INT32_C( 763848022), INT32_C(-2124387583), INT32_C(-1102663841), INT32_C( 712044568), INT32_C( 1641785760), INT32_C( 1696516135), INT32_C(-1123374630), INT32_C( -181070601)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( -193622280), INT32_C( 1174128677), INT32_C(-1834748849), INT32_C( 0), INT32_C( 1435935141), INT32_C( 0), INT32_C(-1102663841), INT32_C(-1102663841), INT32_C( 0), INT32_C( 0), INT32_C( 1445172644), INT32_C( 0), INT32_C( 1298943776), INT32_C( 0), INT32_C( 0)) }, { UINT16_C(62887), simde_mm512_set_epi32(INT32_C(-1218860495), INT32_C( 729621709), INT32_C(-1241407128), INT32_C( 696721321), INT32_C( -603523965), INT32_C( 1730687689), INT32_C( 290786615), INT32_C(-1827031380), INT32_C( 1429317129), INT32_C(-1800615955), INT32_C( -728999228), INT32_C( -788606428), INT32_C( -539592973), INT32_C(-1402526875), INT32_C( -8263463), INT32_C( 478788156)), simde_mm512_set_epi32(INT32_C(-1668086905), INT32_C( -770469750), INT32_C( 1013231130), INT32_C( 543156562), INT32_C( -399740514), INT32_C( 509655415), INT32_C( -160537509), INT32_C( -549528402), INT32_C( -323547130), INT32_C(-1395624565), INT32_C(-1905505546), INT32_C(-1268587914), INT32_C( 1939823644), INT32_C(-1112752789), INT32_C( 2052878307), INT32_C( -856056848)), simde_mm512_set_epi32(INT32_C( 1969107101), INT32_C(-2063427243), INT32_C( -670405092), INT32_C(-1879729053), INT32_C( 1035482990), INT32_C( 1183910939), INT32_C( 1515345934), INT32_C(-1884003639), INT32_C( -638430290), INT32_C(-2007622482), INT32_C( 171336877), INT32_C( 59553613), INT32_C( 165266600), INT32_C( -798384264), INT32_C( 1607584815), INT32_C(-1324336584)), simde_mm512_set_epi32(INT32_C( 1429317129), INT32_C( 1730687689), INT32_C( 1183910939), INT32_C( -798384264), INT32_C( 0), INT32_C( -638430290), INT32_C( 0), INT32_C( 729621709), INT32_C(-1800615955), INT32_C( 0), INT32_C(-2007622482), INT32_C( 0), INT32_C( 0), INT32_C( -603523965), INT32_C( -539592973), INT32_C(-1324336584)) }, { UINT16_C(16328), simde_mm512_set_epi32(INT32_C( 1845744981), INT32_C(-1349190316), INT32_C(-1403674818), INT32_C( -521443925), INT32_C(-1464291783), INT32_C(-1686112999), INT32_C(-1290233716), INT32_C( -364563113), INT32_C( 1520783126), INT32_C( -207159885), INT32_C( -104006691), INT32_C( 362759403), INT32_C(-1562242573), INT32_C( -397133039), INT32_C( 568974515), INT32_C(-1726442446)), simde_mm512_set_epi32(INT32_C(-1929379353), INT32_C( -560036292), INT32_C(-1693642327), INT32_C(-1383827159), INT32_C( 1804875719), INT32_C( 1179452315), INT32_C(-1509656190), INT32_C(-1409992701), INT32_C(-1830359468), INT32_C( 635753031), INT32_C( 310246197), INT32_C(-1783943034), INT32_C(-1307643183), INT32_C( -144888334), INT32_C( 621611179), INT32_C( 743650285)), simde_mm512_set_epi32(INT32_C(-2094713086), INT32_C( 197529411), INT32_C( 1055036471), INT32_C( 351897115), INT32_C( 1594003471), INT32_C(-1709813294), INT32_C( -133653364), INT32_C( -51462036), INT32_C( 46796230), INT32_C( 989301899), INT32_C( -691937914), INT32_C( 1667629581), INT32_C( -496700661), INT32_C(-1318801755), INT32_C( 1076515270), INT32_C(-1757573505)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C(-1290233716), INT32_C(-1290233716), INT32_C( 1520783126), INT32_C( 1594003471), INT32_C( -397133039), INT32_C(-1562242573), INT32_C( 1667629581), INT32_C( 1520783126), INT32_C( 0), INT32_C( 0), INT32_C( 1076515270), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { UINT16_C(37751), simde_mm512_set_epi32(INT32_C( -594726618), INT32_C( -662725541), INT32_C( 790228415), INT32_C( -26753919), INT32_C( -495897274), INT32_C( 1526994051), INT32_C( 1569343894), INT32_C(-1059990980), INT32_C( -490626870), INT32_C( 1463745126), INT32_C( -957352131), INT32_C( 1122208393), INT32_C(-1814919780), INT32_C(-1891682702), INT32_C( -176064246), INT32_C(-1293286075)), simde_mm512_set_epi32(INT32_C(-1664789378), INT32_C( -204514420), INT32_C( 1112369408), INT32_C( 2113109396), INT32_C(-1679339682), INT32_C( 2128430529), INT32_C(-1262713143), INT32_C( 1026756660), INT32_C(-1268083621), INT32_C( 337660693), INT32_C( 1524090799), INT32_C( -275653210), INT32_C(-1422519849), INT32_C( -495051500), INT32_C( 1851182812), INT32_C( 5027269)), simde_mm512_set_epi32(INT32_C( -501257427), INT32_C(-1329431510), INT32_C( 1005777948), INT32_C( 616430734), INT32_C( 1581162255), INT32_C( 1497456456), INT32_C(-1170808415), INT32_C(-1014503666), INT32_C(-1157750165), INT32_C( 1691363299), INT32_C( 1100655145), INT32_C( 673265711), INT32_C( 1544659928), INT32_C(-1956803094), INT32_C( 1970109422), INT32_C(-1197844366)), simde_mm512_set_epi32(INT32_C(-1329431510), INT32_C( 0), INT32_C( 0), INT32_C( 673265711), INT32_C( 0), INT32_C( 0), INT32_C( 1569343894), INT32_C( 673265711), INT32_C( 0), INT32_C( 1100655145), INT32_C( -594726618), INT32_C( 1463745126), INT32_C( 0), INT32_C( 673265711), INT32_C( 616430734), INT32_C( -957352131)) }, { UINT16_C(42809), simde_mm512_set_epi32(INT32_C( 377057639), INT32_C(-1875196295), INT32_C( -129416454), INT32_C( 246939492), INT32_C(-1135915662), INT32_C( -539014135), INT32_C( 1554320066), INT32_C( 1611314079), INT32_C(-1013292897), INT32_C( 1135188428), INT32_C(-1903831246), INT32_C( 1207366326), INT32_C( -366505666), INT32_C( 1379479886), INT32_C( 1577255779), INT32_C( -488475560)), simde_mm512_set_epi32(INT32_C( -7690721), INT32_C( -632623581), INT32_C( 612963145), INT32_C( 1824881051), INT32_C( 582494706), INT32_C(-1899955415), INT32_C( 618457733), INT32_C( 1216635147), INT32_C( 1404062051), INT32_C( 1905825928), INT32_C(-1635796069), INT32_C( 528096299), INT32_C( 259240399), INT32_C( 271372102), INT32_C( 1305969598), INT32_C( 974818283)), simde_mm512_set_epi32(INT32_C( -675362282), INT32_C(-1218762696), INT32_C(-1400182216), INT32_C(-2088680370), INT32_C(-1895497877), INT32_C( 1563893931), INT32_C( 1105770515), INT32_C(-1745770541), INT32_C(-1255255240), INT32_C( 1238532704), INT32_C( 995946229), INT32_C( 119517601), INT32_C( 1389614040), INT32_C(-2032996348), INT32_C( 1996749952), INT32_C( 450477794)), simde_mm512_set_epi32(INT32_C( -675362282), INT32_C( 0), INT32_C( 1554320066), INT32_C( 0), INT32_C( 0), INT32_C( 1554320066), INT32_C(-1903831246), INT32_C(-1135915662), INT32_C( 0), INT32_C( 0), INT32_C(-1895497877), INT32_C(-1135915662), INT32_C( 377057639), INT32_C( 0), INT32_C( 0), INT32_C(-1135915662)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_permutex2var_epi32(test_vec[i].k, test_vec[i].a, test_vec[i].idx, test_vec[i].b); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_permutex2var_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i idx; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C( 4022993628330696330), INT64_C( -564047204985781920), INT64_C( 4934063986128071877), INT64_C( 8258886799903261224), INT64_C( 5245738308211416456), INT64_C( 8690736315259258337), INT64_C(-5183161890921602420), INT64_C(-2495336383094170141)), simde_mm512_set_epi64(INT64_C(-8115609027568940125), INT64_C( 3504612124823893047), INT64_C(-7514888466798804666), INT64_C( 9113506312589344178), INT64_C(-5948029684411535130), INT64_C(-4862378680423071053), INT64_C( 7355766231574189317), INT64_C( -310150959079746096)), simde_mm512_set_epi64(INT64_C(-7443148953768886026), INT64_C( 8448077846545567514), INT64_C(-7304267332935478206), INT64_C(-1528489088828046422), INT64_C( 1295774678670654457), INT64_C(-8851107363323835123), INT64_C( 2369486750103851747), INT64_C(-5139586436110975467)), simde_mm512_set_epi64(INT64_C( 5245738308211416456), INT64_C( 4022993628330696330), INT64_C( -564047204985781920), INT64_C( 8690736315259258337), INT64_C( -564047204985781920), INT64_C( 5245738308211416456), INT64_C( 4934063986128071877), INT64_C(-2495336383094170141)) }, { simde_mm512_set_epi64(INT64_C(-3094219001013742557), INT64_C(-3379016320921474793), INT64_C( 7772273849745001049), INT64_C(-4229480058937372017), INT64_C( 8318730560275653847), INT64_C(-2664412856586094061), INT64_C( 2083707536546841162), INT64_C( 5404230241318444880)), simde_mm512_set_epi64(INT64_C(-7922172549839933132), INT64_C( 90826243433254935), INT64_C( 1037923706586637130), INT64_C( 5568688997300093349), INT64_C( 9199350188047982974), INT64_C(-7693432910203882071), INT64_C(-5762072963977532348), INT64_C( 4491924425059371454)), simde_mm512_set_epi64(INT64_C( 7563354526679147255), INT64_C(-6952412028107066884), INT64_C(-3077616107881632928), INT64_C(-2220298267656761827), INT64_C( 2522518958303333112), INT64_C(-1668307566098600867), INT64_C( 8306832211054389426), INT64_C(-4135341282024622606)), simde_mm512_set_epi64(INT64_C(-4229480058937372017), INT64_C(-3094219001013742557), INT64_C(-1668307566098600867), INT64_C( 7772273849745001049), INT64_C(-6952412028107066884), INT64_C( 8306832211054389426), INT64_C(-4229480058937372017), INT64_C(-6952412028107066884)) }, { simde_mm512_set_epi64(INT64_C(-6876215301736363293), INT64_C(-2253243373865166954), INT64_C( 5866706473820467911), INT64_C(-1945184283153250111), INT64_C(-6043663531296462836), INT64_C(-3201199251206898425), INT64_C(-7517867743898200758), INT64_C( 5023666877462679332)), simde_mm512_set_epi64(INT64_C( 135432210503006619), INT64_C(-7059566968128636366), INT64_C(-1295026765047609725), INT64_C( 5447800525707046939), INT64_C( 1419500527032411112), INT64_C( 5424087511148175828), INT64_C(-4780701435803039630), INT64_C( 6069825193561024149)), simde_mm512_set_epi64(INT64_C(-7404082530836275478), INT64_C(-6011864495242619751), INT64_C(-2639903919112693390), INT64_C( 3391502071027493622), INT64_C( 5698026186558744964), INT64_C( 6269499859520580584), INT64_C( 7385991043015762011), INT64_C( 2173549174497415259)), simde_mm512_set_epi64(INT64_C( 5698026186558744964), INT64_C(-3201199251206898425), INT64_C(-6043663531296462836), INT64_C( 5698026186558744964), INT64_C( 2173549174497415259), INT64_C(-1945184283153250111), INT64_C(-3201199251206898425), INT64_C( 5866706473820467911)) }, { simde_mm512_set_epi64(INT64_C(-7778482448656032654), INT64_C(-7388935565641111344), INT64_C( 2154583157079273400), INT64_C( 4649728279138736034), INT64_C( 1896125478609903946), INT64_C( 6795120210135498653), INT64_C(-8532964392806396349), INT64_C(-8044512602622188161)), simde_mm512_set_epi64(INT64_C( 8618855955534148826), INT64_C(-1358620153905394442), INT64_C(-6300735262609682931), INT64_C(-6423460006708841158), INT64_C(-7828848640852632692), INT64_C(-9058711782958006347), INT64_C(-6631984369075385878), INT64_C( 312385656423386943)), simde_mm512_set_epi64(INT64_C( 2330675318709913935), INT64_C( 8512876982035459145), INT64_C( -754385814369639096), INT64_C(-5873166547629617678), INT64_C(-8313572030703954107), INT64_C( 8197482817575228316), INT64_C( 7734796813438689885), INT64_C(-7308014241195865956)), simde_mm512_set_epi64(INT64_C( 8197482817575228316), INT64_C(-7388935565641111344), INT64_C( -754385814369639096), INT64_C( 8197482817575228316), INT64_C(-5873166547629617678), INT64_C( 2154583157079273400), INT64_C( 8197482817575228316), INT64_C( 2330675318709913935)) }, { simde_mm512_set_epi64(INT64_C(-8488192066941669485), INT64_C( 2196446486128224002), INT64_C(-2769327913695750796), INT64_C( 8356403410464371881), INT64_C(-1359912063489860660), INT64_C( 3839494741156011818), INT64_C(-8628380254870238699), INT64_C(-3703720426743693096)), simde_mm512_set_epi64(INT64_C(-5841170484671274964), INT64_C(-3543508984518726324), INT64_C( 3010243785522476560), INT64_C(-1544427551515059156), INT64_C(-6824560769606762223), INT64_C( 5419183893290153987), INT64_C(-6963147146169039734), INT64_C( 796151810828840161)), simde_mm512_set_epi64(INT64_C( 7773392645197032708), INT64_C(-2378134723719136690), INT64_C( 3098859216623929918), INT64_C( 2705178435890570578), INT64_C( 8034863847415814381), INT64_C( 2143618657886453602), INT64_C( 1932054708809515818), INT64_C( -431037512959796468)), simde_mm512_set_epi64(INT64_C( 2705178435890570578), INT64_C( 2705178435890570578), INT64_C(-3703720426743693096), INT64_C( 2705178435890570578), INT64_C(-8628380254870238699), INT64_C(-1359912063489860660), INT64_C( 2143618657886453602), INT64_C(-8628380254870238699)) }, { simde_mm512_set_epi64(INT64_C( 7017505342718424326), INT64_C( 7336144214089986511), INT64_C(-7372811814600044225), INT64_C( 3369678464815021090), INT64_C(-3624005186939786961), INT64_C( 3053727983599056138), INT64_C( 1044677486690381786), INT64_C( 7246985488966277586)), simde_mm512_set_epi64(INT64_C(-7724828277596580344), INT64_C(-6390405785894387099), INT64_C( 1370770444668698151), INT64_C( 397783669977726534), INT64_C( 618616089164357310), INT64_C(-2236191700094566243), INT64_C( 606989345560642948), INT64_C(-5888219196601577993)), simde_mm512_set_epi64(INT64_C( 6890380242518530506), INT64_C( 7812910065385095491), INT64_C( 6573936547803745655), INT64_C( 5624455476374850044), INT64_C(-8202159596651692767), INT64_C(-5134690790763765861), INT64_C( 6771966217713905101), INT64_C( 5500799798135763834)), simde_mm512_set_epi64(INT64_C( 5500799798135763834), INT64_C(-7372811814600044225), INT64_C( 7017505342718424326), INT64_C( 7336144214089986511), INT64_C( 7812910065385095491), INT64_C( 6573936547803745655), INT64_C( 3369678464815021090), INT64_C( 7017505342718424326)) }, { simde_mm512_set_epi64(INT64_C( 9087942297620858697), INT64_C(-5062502778264465278), INT64_C( 4940486863427147430), INT64_C(-8466656670106113805), INT64_C( 8779594976999061301), INT64_C(-6871795698650725249), INT64_C( 7737606421984431976), INT64_C( 4256585739398952106)), simde_mm512_set_epi64(INT64_C( 3159639098800136017), INT64_C( 7547309067228210654), INT64_C( 986058858743442499), INT64_C( -131493493066513451), INT64_C( 4714195534976331711), INT64_C(-2919200212626154268), INT64_C(-6480751279152247349), INT64_C(-8406130515310490191)), simde_mm512_set_epi64(INT64_C(-3303730405560337432), INT64_C( 1832719708746018971), INT64_C( 7156376354960526422), INT64_C( 2595303813823857983), INT64_C( 86688370885605369), INT64_C( 4396380977508135743), INT64_C( 7341255277548419375), INT64_C( 7383808788818698409)), simde_mm512_set_epi64(INT64_C( 7737606421984431976), INT64_C( 1832719708746018971), INT64_C( 8779594976999061301), INT64_C( 4940486863427147430), INT64_C(-3303730405560337432), INT64_C(-8466656670106113805), INT64_C( 86688370885605369), INT64_C( 7737606421984431976)) }, { simde_mm512_set_epi64(INT64_C( 5459696121657899945), INT64_C( 2994903846606128408), INT64_C( 5904359179218034313), INT64_C(-2066889829135543468), INT64_C( 6803831723224696242), INT64_C(-1121730792444760624), INT64_C(-7265999827078260931), INT64_C( 1814191055697415083)), simde_mm512_set_epi64(INT64_C( 199279425141616266), INT64_C( 3221796949435356069), INT64_C(-1875768603448185105), INT64_C( 830129211732438864), INT64_C( 3257319649738390908), INT64_C( 4257114197692508298), INT64_C( 8024289238579469643), INT64_C( 2840131019362087365)), simde_mm512_set_epi64(INT64_C( 2502509311262690711), INT64_C( 5723836460071094920), INT64_C( 7759612686570582771), INT64_C( 8703685710852381059), INT64_C( 6587534258249310175), INT64_C( 2710240672169765174), INT64_C( 8721065329514012238), INT64_C( 5835087168664292514)), simde_mm512_set_epi64(INT64_C( 2710240672169765174), INT64_C( 5904359179218034313), INT64_C( 2502509311262690711), INT64_C( 1814191055697415083), INT64_C( 8703685710852381059), INT64_C( 2710240672169765174), INT64_C( 6587534258249310175), INT64_C( 5904359179218034313)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_permutex2var_epi64(test_vec[i].a, test_vec[i].idx, test_vec[i].b); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_permutex2var_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__mmask8 k; simde__m512i idx; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C(-4408197122649025847), INT64_C( 624651997750430240), INT64_C(-3688244718601593553), INT64_C( 4693564151120802069), INT64_C( 7940316924786767481), INT64_C( 6743600876828439814), INT64_C( 8561828216572109007), INT64_C(-8315109086095518889)), UINT8_C( 63), simde_mm512_set_epi64(INT64_C(-4767707706458520415), INT64_C(-7083171014951853588), INT64_C(-2076988212358998594), INT64_C( 3698505898575972461), INT64_C(-8586603972668500699), INT64_C( 7848938818320954984), INT64_C(-7002500096438875648), INT64_C( 1477571573764517782)), simde_mm512_set_epi64(INT64_C( 7350668077567080689), INT64_C( 8478919882954811661), INT64_C(-3394066222784588743), INT64_C(-5009306653852991983), INT64_C(-8749971605870264899), INT64_C( 1212032624670585453), INT64_C( 728623586565902494), INT64_C( 4873652658109514141)), simde_mm512_set_epi64(INT64_C(-4408197122649025847), INT64_C( 624651997750430240), INT64_C( 8478919882954811661), INT64_C(-3394066222784588743), INT64_C(-3688244718601593553), INT64_C( 4873652658109514141), INT64_C(-8315109086095518889), INT64_C( 624651997750430240)) }, { simde_mm512_set_epi64(INT64_C(-6111840559061041971), INT64_C( 6656352319933975670), INT64_C( 2357435311113502667), INT64_C(-8860733056306413573), INT64_C(-8949662758380266635), INT64_C( 4701312916269037777), INT64_C(-1461656220613716217), INT64_C(-4369965941555109637)), UINT8_C( 61), simde_mm512_set_epi64(INT64_C(-3815826294263537782), INT64_C( 2205533861506052469), INT64_C( 3531417307031756813), INT64_C( 2291273944182365369), INT64_C( -855682284319457684), INT64_C( 5906689130134529071), INT64_C( 7251917267735594789), INT64_C(-5506395256633894325)), simde_mm512_set_epi64(INT64_C( 7306080674171373254), INT64_C( 2163582539809461657), INT64_C(-7942896186346970451), INT64_C(-2775611318017263858), INT64_C( 7897714815450887445), INT64_C(-2675129847260557604), INT64_C( 2145911307457407401), INT64_C( 3797455296467543827)), simde_mm512_set_epi64(INT64_C(-6111840559061041971), INT64_C( 6656352319933975670), INT64_C(-7942896186346970451), INT64_C( 2145911307457407401), INT64_C(-2775611318017263858), INT64_C( 7306080674171373254), INT64_C(-1461656220613716217), INT64_C( 7897714815450887445)) }, { simde_mm512_set_epi64(INT64_C( -751557688731444383), INT64_C( 5324069355912068288), INT64_C( 5226578984858504214), INT64_C(-8776157931044543560), INT64_C(-1848672680316222475), INT64_C( 1658167909352451238), INT64_C( 377173394815185621), INT64_C(-6742373427678247978)), UINT8_C( 82), simde_mm512_set_epi64(INT64_C( 3138707856740708121), INT64_C(-4673519228421997952), INT64_C(-1556073591389999033), INT64_C( 3509487153133496527), INT64_C( -438383259974317574), INT64_C( 4679882440059701274), INT64_C( 8594666725077939668), INT64_C(-2603844271228681340)), simde_mm512_set_epi64(INT64_C( 877966720713550779), INT64_C(-8757547308289839577), INT64_C( 6619480224799141474), INT64_C( 3171924723684651500), INT64_C( 1941135797030545610), INT64_C( 1935432241277000941), INT64_C( 5390015454023535429), INT64_C( -49705421380794940)), simde_mm512_set_epi64(INT64_C( -751557688731444383), INT64_C(-6742373427678247978), INT64_C( 5226578984858504214), INT64_C( 877966720713550779), INT64_C(-1848672680316222475), INT64_C( 1658167909352451238), INT64_C(-8776157931044543560), INT64_C(-6742373427678247978)) }, { simde_mm512_set_epi64(INT64_C( 8280910196874944184), INT64_C( -368934386460614235), INT64_C(-9011857488067354178), INT64_C( 5578921037540219940), INT64_C(-7880186302232587827), INT64_C( 7848707034806784644), INT64_C(-7246117184140796511), INT64_C( 5042844271761388948)), UINT8_C(191), simde_mm512_set_epi64(INT64_C( 6286668337562607931), INT64_C( 1432089847019206822), INT64_C(-2235254547542691893), INT64_C(-2975358417486477451), INT64_C( 5974528986311566652), INT64_C( 4798128784982043356), INT64_C(-3663239326212228984), INT64_C(-9199851098963784696)), simde_mm512_set_epi64(INT64_C(-3486865648830471282), INT64_C( 8151787653682140580), INT64_C( -831601358278995789), INT64_C(-2800664419916301039), INT64_C( 3280702275774868225), INT64_C(-4735905134864699368), INT64_C( 7051416147935021095), INT64_C(-4824857292892203785)), simde_mm512_set_epi64(INT64_C( 3280702275774868225), INT64_C( -368934386460614235), INT64_C( 3280702275774868225), INT64_C(-9011857488067354178), INT64_C(-2800664419916301039), INT64_C(-2800664419916301039), INT64_C(-4824857292892203785), INT64_C(-4824857292892203785)) }, { simde_mm512_set_epi64(INT64_C( 3133701381660189032), INT64_C( 2992395291812361347), INT64_C( 7433247024135605559), INT64_C(-7847040024436431351), INT64_C(-7733586635814839612), INT64_C(-3387038813920004365), INT64_C(-6023807055599376167), INT64_C( 2056379472574346663)), UINT8_C( 49), simde_mm512_set_epi64(INT64_C(-7164378700336361334), INT64_C( 4351794567182281042), INT64_C(-1716872434006574729), INT64_C( -689503347190866770), INT64_C(-1389624339165317749), INT64_C(-8184083999390244234), INT64_C( 8331479114169761131), INT64_C( 8817045194671758320)), simde_mm512_set_epi64(INT64_C( 8457250603347908949), INT64_C(-2879367942796632989), INT64_C( 4447365578798205979), INT64_C( 6508361231067538121), INT64_C(-2742037214038451026), INT64_C( 735886283373328205), INT64_C( 709814645617696632), INT64_C( 6904524208941840952)), simde_mm512_set_epi64(INT64_C( 3133701381660189032), INT64_C( 2992395291812361347), INT64_C( 3133701381660189032), INT64_C(-2879367942796632989), INT64_C(-7733586635814839612), INT64_C(-3387038813920004365), INT64_C(-6023807055599376167), INT64_C( 2056379472574346663)) }, { simde_mm512_set_epi64(INT64_C(-5794728280408613058), INT64_C(-2239584601742201287), INT64_C(-7241800185060747124), INT64_C(-1565786646142169322), INT64_C( -889744926927160355), INT64_C( 1558039774934209011), INT64_C(-1705673414097118029), INT64_C(-7415013841836228664)), UINT8_C( 85), simde_mm512_set_epi64(INT64_C(-8286621218977708484), INT64_C(-7274138402675197655), INT64_C( 7751882187628938139), INT64_C(-6483923961368987645), INT64_C(-7861334054348205497), INT64_C( 1332497272334397574), INT64_C(-5616284701672264206), INT64_C( 2669799685376652269)), simde_mm512_set_epi64(INT64_C(-8996724198675706045), INT64_C( 4531347139384149531), INT64_C( 6846192780240638418), INT64_C( -574036823136878484), INT64_C( 200988278415395979), INT64_C(-2971850709824830963), INT64_C(-2133313091920417115), INT64_C( 4623597880832003711)), simde_mm512_set_epi64(INT64_C(-5794728280408613058), INT64_C(-2133313091920417115), INT64_C(-7241800185060747124), INT64_C( -889744926927160355), INT64_C( -889744926927160355), INT64_C(-2239584601742201287), INT64_C(-1705673414097118029), INT64_C( 6846192780240638418)) }, { simde_mm512_set_epi64(INT64_C(-2846384524028678721), INT64_C( -114907203345763002), INT64_C( 6558389511800899990), INT64_C(-4552626589350649654), INT64_C( 6286737449187014461), INT64_C( 4819848349711762844), INT64_C(-8124715335380010742), INT64_C(-5554621393600539785)), UINT8_C( 38), simde_mm512_set_epi64(INT64_C(-7150215929147729012), INT64_C( 4777590230543990164), INT64_C(-7212709010936609343), INT64_C(-5423311652387614668), INT64_C(-5446377680450598123), INT64_C( 6545920141858823590), INT64_C(-6109676225565942508), INT64_C( 7950769636462343621)), simde_mm512_set_epi64(INT64_C(-2152884252876571606), INT64_C( 4319783394314419342), INT64_C( 6791040176392068936), INT64_C(-5028583849026132210), INT64_C(-4972499093922240541), INT64_C( 4727277852622403631), INT64_C( 6634263876539878890), INT64_C( 8461555540128585842)), simde_mm512_set_epi64(INT64_C(-2846384524028678721), INT64_C( -114907203345763002), INT64_C(-8124715335380010742), INT64_C(-4552626589350649654), INT64_C( 6286737449187014461), INT64_C( -114907203345763002), INT64_C(-4552626589350649654), INT64_C(-5554621393600539785)) }, { simde_mm512_set_epi64(INT64_C(-8053906756439817478), INT64_C( 1060597045389905266), INT64_C(-2315048080352408894), INT64_C( 6920541276171034783), INT64_C( 4875597175448786738), INT64_C( 5185598888390136126), INT64_C( 5924820997437064035), INT64_C(-2097986554951915719)), UINT8_C(103), simde_mm512_set_epi64(INT64_C( -33031391515316701), INT64_C( 2632656663253186971), INT64_C( 2501795714758146857), INT64_C( 2656255738409935115), INT64_C( 6030400592505510024), INT64_C(-7025690618752263125), INT64_C( 1113429035778363206), INT64_C( 5609096713955085291)), simde_mm512_set_epi64(INT64_C(-2900658911065724872), INT64_C(-6013736823954521010), INT64_C(-8141101389788536661), INT64_C( 4749248201355274195), INT64_C(-5391280202694098336), INT64_C( 4277556482249044385), INT64_C( 5968346858124406788), INT64_C( 8575975742580047586)), simde_mm512_set_epi64(INT64_C(-8053906756439817478), INT64_C(-5391280202694098336), INT64_C( 5968346858124406788), INT64_C( 6920541276171034783), INT64_C( 4875597175448786738), INT64_C(-5391280202694098336), INT64_C( 1060597045389905266), INT64_C(-5391280202694098336)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_permutex2var_epi64(test_vec[i].a, test_vec[i].k, test_vec[i].idx, test_vec[i].b); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask2_permutex2var_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i idx; simde__mmask8 k; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C(-4408197122649025847), INT64_C( 624651997750430240), INT64_C(-3688244718601593553), INT64_C( 4693564151120802069), INT64_C( 7940316924786767481), INT64_C( 6743600876828439814), INT64_C( 8561828216572109007), INT64_C(-8315109086095518889)), simde_mm512_set_epi64(INT64_C( -890089152921238147), INT64_C( 8823574133744668217), INT64_C(-6364969741708969084), INT64_C(-1418976459802394322), INT64_C( 4477375336277674053), INT64_C( 1282248710630285123), INT64_C( 6638105739971879812), INT64_C(-8038098956427236545)), UINT8_C( 25), simde_mm512_set_epi64(INT64_C( 7350668077567080689), INT64_C( 8478919882954811661), INT64_C(-3394066222784588743), INT64_C(-5009306653852991983), INT64_C(-8749971605870264899), INT64_C( 1212032624670585453), INT64_C( 728623586565902494), INT64_C( 4873652658109514141)), simde_mm512_set_epi64(INT64_C( -890089152921238147), INT64_C( 8823574133744668217), INT64_C(-6364969741708969084), INT64_C( 8478919882954811661), INT64_C(-3688244718601593553), INT64_C( 1282248710630285123), INT64_C( 6638105739971879812), INT64_C( 7350668077567080689)) }, { simde_mm512_set_epi64(INT64_C(-6111840559061041971), INT64_C( 6656352319933975670), INT64_C( 2357435311113502667), INT64_C(-8860733056306413573), INT64_C(-8949662758380266635), INT64_C( 4701312916269037777), INT64_C(-1461656220613716217), INT64_C(-4369965941555109637)), simde_mm512_set_epi64(INT64_C(-3038909907977133732), INT64_C(-7842471790453318316), INT64_C(-7256600765093102205), INT64_C( 3898178537456140670), INT64_C( -393151907512138120), INT64_C( 600214805061827669), INT64_C( 3163434753014979248), INT64_C( 3418472134552461373)), UINT8_C( 51), simde_mm512_set_epi64(INT64_C( 7306080674171373254), INT64_C( 2163582539809461657), INT64_C(-7942896186346970451), INT64_C(-2775611318017263858), INT64_C( 7897714815450887445), INT64_C(-2675129847260557604), INT64_C( 2145911307457407401), INT64_C( 3797455296467543827)), simde_mm512_set_epi64(INT64_C(-3038909907977133732), INT64_C(-7842471790453318316), INT64_C(-8949662758380266635), INT64_C( 2163582539809461657), INT64_C( -393151907512138120), INT64_C( 600214805061827669), INT64_C(-4369965941555109637), INT64_C(-7942896186346970451)) }, { simde_mm512_set_epi64(INT64_C( -751557688731444383), INT64_C( 5324069355912068288), INT64_C( 5226578984858504214), INT64_C(-8776157931044543560), INT64_C(-1848672680316222475), INT64_C( 1658167909352451238), INT64_C( 377173394815185621), INT64_C(-6742373427678247978)), simde_mm512_set_epi64(INT64_C(-8736438908262001915), INT64_C(-1516874692875012272), INT64_C( 2654080637722702840), INT64_C(-4444585746033374017), INT64_C( 7620312646179506248), INT64_C(-4730811392556899367), INT64_C(-7555925455226975890), INT64_C( 6555696811272222802)), UINT8_C( 42), simde_mm512_set_epi64(INT64_C( 877966720713550779), INT64_C(-8757547308289839577), INT64_C( 6619480224799141474), INT64_C( 3171924723684651500), INT64_C( 1941135797030545610), INT64_C( 1935432241277000941), INT64_C( 5390015454023535429), INT64_C( -49705421380794940)), simde_mm512_set_epi64(INT64_C(-8736438908262001915), INT64_C(-1516874692875012272), INT64_C( -49705421380794940), INT64_C(-4444585746033374017), INT64_C( -49705421380794940), INT64_C(-4730811392556899367), INT64_C(-8757547308289839577), INT64_C( 6555696811272222802)) }, { simde_mm512_set_epi64(INT64_C( 8280910196874944184), INT64_C( -368934386460614235), INT64_C(-9011857488067354178), INT64_C( 5578921037540219940), INT64_C(-7880186302232587827), INT64_C( 7848707034806784644), INT64_C(-7246117184140796511), INT64_C( 5042844271761388948)), simde_mm512_set_epi64(INT64_C( 8727238559278288416), INT64_C(-2736507802934917164), INT64_C( -638332694652688568), INT64_C(-8196543121330681227), INT64_C( 7593109912492073141), INT64_C( 6300090425305304893), INT64_C(-3323383259847225301), INT64_C( 2001511420457827007)), UINT8_C(139), simde_mm512_set_epi64(INT64_C(-3486865648830471282), INT64_C( 8151787653682140580), INT64_C( -831601358278995789), INT64_C(-2800664419916301039), INT64_C( 3280702275774868225), INT64_C(-4735905134864699368), INT64_C( 7051416147935021095), INT64_C(-4824857292892203785)), simde_mm512_set_epi64(INT64_C( 5042844271761388948), INT64_C(-2736507802934917164), INT64_C( -638332694652688568), INT64_C(-8196543121330681227), INT64_C(-9011857488067354178), INT64_C( 6300090425305304893), INT64_C( 3280702275774868225), INT64_C(-3486865648830471282)) }, { simde_mm512_set_epi64(INT64_C( 3133701381660189032), INT64_C( 2992395291812361347), INT64_C( 7433247024135605559), INT64_C(-7847040024436431351), INT64_C(-7733586635814839612), INT64_C(-3387038813920004365), INT64_C(-6023807055599376167), INT64_C( 2056379472574346663)), simde_mm512_set_epi64(INT64_C(-3309142377794064870), INT64_C( 2332839674293023134), INT64_C( 2188953343788737627), INT64_C(-2360206510841720826), INT64_C(-5994161861779764490), INT64_C(-5448543600791036900), INT64_C(-4779236835234910237), INT64_C(-3676736162600736207)), UINT8_C(135), simde_mm512_set_epi64(INT64_C( 8457250603347908949), INT64_C(-2879367942796632989), INT64_C( 4447365578798205979), INT64_C( 6508361231067538121), INT64_C(-2742037214038451026), INT64_C( 735886283373328205), INT64_C( 709814645617696632), INT64_C( 6904524208941840952)), simde_mm512_set_epi64(INT64_C( 735886283373328205), INT64_C( 2332839674293023134), INT64_C( 2188953343788737627), INT64_C(-2360206510841720826), INT64_C(-5994161861779764490), INT64_C( 6508361231067538121), INT64_C(-7733586635814839612), INT64_C(-6023807055599376167)) }, { simde_mm512_set_epi64(INT64_C(-5794728280408613058), INT64_C(-2239584601742201287), INT64_C(-7241800185060747124), INT64_C(-1565786646142169322), INT64_C( -889744926927160355), INT64_C( 1558039774934209011), INT64_C(-1705673414097118029), INT64_C(-7415013841836228664)), simde_mm512_set_epi64(INT64_C(-2405337556111781463), INT64_C(-5943492389416716345), INT64_C( 5065709122901801346), INT64_C(-6055872535929098668), INT64_C( 2730538476788120373), INT64_C(-7661976985969691951), INT64_C( -622290655480313685), INT64_C( 3193953655581824341)), UINT8_C(231), simde_mm512_set_epi64(INT64_C(-8996724198675706045), INT64_C( 4531347139384149531), INT64_C( 6846192780240638418), INT64_C( -574036823136878484), INT64_C( 200988278415395979), INT64_C(-2971850709824830963), INT64_C(-2133313091920417115), INT64_C( 4623597880832003711)), simde_mm512_set_epi64(INT64_C(-2133313091920417115), INT64_C(-5794728280408613058), INT64_C( 1558039774934209011), INT64_C(-6055872535929098668), INT64_C( 2730538476788120373), INT64_C(-1705673414097118029), INT64_C( 200988278415395979), INT64_C(-7241800185060747124)) }, { simde_mm512_set_epi64(INT64_C(-2846384524028678721), INT64_C( -114907203345763002), INT64_C( 6558389511800899990), INT64_C(-4552626589350649654), INT64_C( 6286737449187014461), INT64_C( 4819848349711762844), INT64_C(-8124715335380010742), INT64_C(-5554621393600539785)), simde_mm512_set_epi64(INT64_C( -878382744348038912), INT64_C( 9075735751305940830), INT64_C( 9141539516895233737), INT64_C( 4409886278677075035), INT64_C( 1450241635103786927), INT64_C(-1183921519114972713), INT64_C(-2126230000484561188), INT64_C( 21591959643435302)), UINT8_C(126), simde_mm512_set_epi64(INT64_C(-2152884252876571606), INT64_C( 4319783394314419342), INT64_C( 6791040176392068936), INT64_C(-5028583849026132210), INT64_C(-4972499093922240541), INT64_C( 4727277852622403631), INT64_C( 6634263876539878890), INT64_C( 8461555540128585842)), simde_mm512_set_epi64(INT64_C( -878382744348038912), INT64_C( 4319783394314419342), INT64_C( 6634263876539878890), INT64_C(-4972499093922240541), INT64_C(-2152884252876571606), INT64_C(-2846384524028678721), INT64_C(-5028583849026132210), INT64_C( 21591959643435302)) }, { simde_mm512_set_epi64(INT64_C(-8053906756439817478), INT64_C( 1060597045389905266), INT64_C(-2315048080352408894), INT64_C( 6920541276171034783), INT64_C( 4875597175448786738), INT64_C( 5185598888390136126), INT64_C( 5924820997437064035), INT64_C(-2097986554951915719)), simde_mm512_set_epi64(INT64_C(-2717097590460443831), INT64_C( 7837804433717602802), INT64_C(-8160246370664650107), INT64_C( 5225408168933214563), INT64_C( 8185460035288021915), INT64_C( 2268156333602877903), INT64_C( 1165534304442745790), INT64_C( 4186812645404930407)), UINT8_C( 31), simde_mm512_set_epi64(INT64_C(-2900658911065724872), INT64_C(-6013736823954521010), INT64_C(-8141101389788536661), INT64_C( 4749248201355274195), INT64_C(-5391280202694098336), INT64_C( 4277556482249044385), INT64_C( 5968346858124406788), INT64_C( 8575975742580047586)), simde_mm512_set_epi64(INT64_C(-2717097590460443831), INT64_C( 7837804433717602802), INT64_C(-8160246370664650107), INT64_C( 4875597175448786738), INT64_C(-5391280202694098336), INT64_C(-2900658911065724872), INT64_C(-6013736823954521010), INT64_C(-8053906756439817478)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask2_permutex2var_epi64(test_vec[i].a, test_vec[i].idx, test_vec[i].k, test_vec[i].b); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_permutex2var_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512i a; simde__m512i idx; simde__m512i b; simde__m512i r; } test_vec[8] = { { UINT8_C( 87), simde_mm512_set_epi64(INT64_C( 4086853108457730066), INT64_C(-7738570880062900818), INT64_C(-5609503674875201288), INT64_C( 3966155248134972346), INT64_C( 6201510655001996332), INT64_C( 7633590894537872708), INT64_C(-5593693910291334810), INT64_C(-8884932670315115433)), simde_mm512_set_epi64(INT64_C(-4767707706458520415), INT64_C(-7083171014951853588), INT64_C(-2076988212358998594), INT64_C( 3698505898575972461), INT64_C(-8586603972668500699), INT64_C( 7848938818320954984), INT64_C(-7002500096438875648), INT64_C( 1477571573764517782)), simde_mm512_set_epi64(INT64_C( 7350668077567080689), INT64_C( 8478919882954811661), INT64_C(-3394066222784588743), INT64_C(-5009306653852991983), INT64_C(-8749971605870264899), INT64_C( 1212032624670585453), INT64_C( 728623586565902494), INT64_C( 4873652658109514141)), simde_mm512_set_epi64(INT64_C( 0), INT64_C(-5009306653852991983), INT64_C( 0), INT64_C(-3394066222784588743), INT64_C( 0), INT64_C( 4873652658109514141), INT64_C(-8884932670315115433), INT64_C(-7738570880062900818)) }, { UINT8_C(251), simde_mm512_set_epi64(INT64_C( 7391005387705442660), INT64_C(-5091463632259113685), INT64_C( 685405269785004780), INT64_C(-2602517860068074949), INT64_C( 4704994953943345443), INT64_C( 8877610218385468208), INT64_C( 5776984527519295337), INT64_C( 6526937450820584225)), simde_mm512_set_epi64(INT64_C(-3815826294263537782), INT64_C( 2205533861506052469), INT64_C( 3531417307031756813), INT64_C( 2291273944182365369), INT64_C( -855682284319457684), INT64_C( 5906689130134529071), INT64_C( 7251917267735594789), INT64_C(-5506395256633894325)), simde_mm512_set_epi64(INT64_C( 7306080674171373254), INT64_C( 2163582539809461657), INT64_C(-7942896186346970451), INT64_C(-2775611318017263858), INT64_C( 7897714815450887445), INT64_C(-2675129847260557604), INT64_C( 2145911307457407401), INT64_C( 3797455296467543827)), simde_mm512_set_epi64(INT64_C(-2675129847260557604), INT64_C( 685405269785004780), INT64_C(-7942896186346970451), INT64_C( 2145911307457407401), INT64_C(-2775611318017263858), INT64_C( 0), INT64_C( 685405269785004780), INT64_C( 7897714815450887445)) }, { UINT8_C(214), simde_mm512_set_epi64(INT64_C( 3101493959844818499), INT64_C(-7569746812758465314), INT64_C( -95001376835728923), INT64_C( 2811174252033921756), INT64_C( 1971701120159461885), INT64_C(-1168609383370522899), INT64_C(-6996433667044410060), INT64_C( 5237671891022268183)), simde_mm512_set_epi64(INT64_C( 3138707856740708121), INT64_C(-4673519228421997952), INT64_C(-1556073591389999033), INT64_C( 3509487153133496527), INT64_C( -438383259974317574), INT64_C( 4679882440059701274), INT64_C( 8594666725077939668), INT64_C(-2603844271228681340)), simde_mm512_set_epi64(INT64_C( 877966720713550779), INT64_C(-8757547308289839577), INT64_C( 6619480224799141474), INT64_C( 3171924723684651500), INT64_C( 1941135797030545610), INT64_C( 1935432241277000941), INT64_C( 5390015454023535429), INT64_C( -49705421380794940)), simde_mm512_set_epi64(INT64_C( 5390015454023535429), INT64_C( 5237671891022268183), INT64_C( 0), INT64_C( 877966720713550779), INT64_C( 0), INT64_C( 1935432241277000941), INT64_C( 2811174252033921756), INT64_C( 0)) }, { UINT8_C(148), simde_mm512_set_epi64(INT64_C( 5640400914757694451), INT64_C( 3288787407719188513), INT64_C( 6167294471968879452), INT64_C(-8553143016080257248), INT64_C( 1191725626053358671), INT64_C( 2560034487176803702), INT64_C(-4340183042637127984), INT64_C( 463621865143519269)), simde_mm512_set_epi64(INT64_C( 6286668337562607931), INT64_C( 1432089847019206822), INT64_C(-2235254547542691893), INT64_C(-2975358417486477451), INT64_C( 5974528986311566652), INT64_C( 4798128784982043356), INT64_C(-3663239326212228984), INT64_C(-9199851098963784696)), simde_mm512_set_epi64(INT64_C(-3486865648830471282), INT64_C( 8151787653682140580), INT64_C( -831601358278995789), INT64_C(-2800664419916301039), INT64_C( 3280702275774868225), INT64_C(-4735905134864699368), INT64_C( 7051416147935021095), INT64_C(-4824857292892203785)), simde_mm512_set_epi64(INT64_C( 3280702275774868225), INT64_C( 0), INT64_C( 0), INT64_C( 6167294471968879452), INT64_C( 0), INT64_C(-2800664419916301039), INT64_C( 0), INT64_C( 0)) }, { UINT8_C(167), simde_mm512_set_epi64(INT64_C(-5234965963681749811), INT64_C(-5331803015084564567), INT64_C(-2592115690296560951), INT64_C( 1248919004007478956), INT64_C( 6138870327161964525), INT64_C(-3131027839562886620), INT64_C(-2317534169293970587), INT64_C( -35491302857917892)), simde_mm512_set_epi64(INT64_C(-7164378700336361334), INT64_C( 4351794567182281042), INT64_C(-1716872434006574729), INT64_C( -689503347190866770), INT64_C(-1389624339165317749), INT64_C(-8184083999390244234), INT64_C( 8331479114169761131), INT64_C( 8817045194671758320)), simde_mm512_set_epi64(INT64_C( 8457250603347908949), INT64_C(-2879367942796632989), INT64_C( 4447365578798205979), INT64_C( 6508361231067538121), INT64_C(-2742037214038451026), INT64_C( 735886283373328205), INT64_C( 709814645617696632), INT64_C( 6904524208941840952)), simde_mm512_set_epi64(INT64_C( 735886283373328205), INT64_C( 0), INT64_C(-5234965963681749811), INT64_C( 0), INT64_C( 0), INT64_C(-5331803015084564567), INT64_C(-2742037214038451026), INT64_C( -35491302857917892)) }, { UINT8_C(200), simde_mm512_set_epi64(INT64_C( 7927414333096918356), INT64_C(-6028737433755228757), INT64_C(-6289085317177674471), INT64_C(-5541511610486147753), INT64_C( 6531713794566454707), INT64_C( -446705336047418133), INT64_C(-6709780755556058351), INT64_C( 2443726936750986290)), simde_mm512_set_epi64(INT64_C(-8286621218977708484), INT64_C(-7274138402675197655), INT64_C( 7751882187628938139), INT64_C(-6483923961368987645), INT64_C(-7861334054348205497), INT64_C( 1332497272334397574), INT64_C(-5616284701672264206), INT64_C( 2669799685376652269)), simde_mm512_set_epi64(INT64_C(-8996724198675706045), INT64_C( 4531347139384149531), INT64_C( 6846192780240638418), INT64_C( -574036823136878484), INT64_C( 200988278415395979), INT64_C(-2971850709824830963), INT64_C(-2133313091920417115), INT64_C( 4623597880832003711)), simde_mm512_set_epi64(INT64_C( -574036823136878484), INT64_C(-2133313091920417115), INT64_C( 0), INT64_C( 0), INT64_C( 7927414333096918356), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, { UINT8_C(119), simde_mm512_set_epi64(INT64_C(-2554331370738443173), INT64_C( 3394005203063129217), INT64_C(-2129862572478557053), INT64_C( 6740280704142266940), INT64_C(-2107226359725098394), INT64_C(-4111796092278699383), INT64_C(-7795021097560230286), INT64_C( -756190175563217595)), simde_mm512_set_epi64(INT64_C(-7150215929147729012), INT64_C( 4777590230543990164), INT64_C(-7212709010936609343), INT64_C(-5423311652387614668), INT64_C(-5446377680450598123), INT64_C( 6545920141858823590), INT64_C(-6109676225565942508), INT64_C( 7950769636462343621)), simde_mm512_set_epi64(INT64_C(-2152884252876571606), INT64_C( 4319783394314419342), INT64_C( 6791040176392068936), INT64_C(-5028583849026132210), INT64_C(-4972499093922240541), INT64_C( 4727277852622403631), INT64_C( 6634263876539878890), INT64_C( 8461555540128585842)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 6740280704142266940), INT64_C(-7795021097560230286), INT64_C( 6740280704142266940), INT64_C( 0), INT64_C( 3394005203063129217), INT64_C( 6740280704142266940), INT64_C(-2129862572478557053)) }, { UINT8_C( 57), simde_mm512_set_epi64(INT64_C( 1619450230631745145), INT64_C( -555839437247348892), INT64_C(-4878720615548236791), INT64_C( 6675753852597875615), INT64_C(-4352059852748908084), INT64_C(-8176892937465564490), INT64_C(-1574129847889219250), INT64_C( 6774261992038495320)), simde_mm512_set_epi64(INT64_C( -33031391515316701), INT64_C( 2632656663253186971), INT64_C( 2501795714758146857), INT64_C( 2656255738409935115), INT64_C( 6030400592505510024), INT64_C(-7025690618752263125), INT64_C( 1113429035778363206), INT64_C( 5609096713955085291)), simde_mm512_set_epi64(INT64_C(-2900658911065724872), INT64_C(-6013736823954521010), INT64_C(-8141101389788536661), INT64_C( 4749248201355274195), INT64_C(-5391280202694098336), INT64_C( 4277556482249044385), INT64_C( 5968346858124406788), INT64_C( 8575975742580047586)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( 5968346858124406788), INT64_C(-5391280202694098336), INT64_C( 8575975742580047586), INT64_C( 0), INT64_C( 0), INT64_C(-5391280202694098336)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_permutex2var_epi64(test_vec[i].k, test_vec[i].a, test_vec[i].idx, test_vec[i].b); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_permutex2var_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t a[64]; const int8_t idx[64]; const int8_t b[64]; const int8_t r[64]; } test_vec[] = { { { INT8_C( 94), -INT8_C( 25), INT8_C( 8), -INT8_C( 65), INT8_C( 59), -INT8_C( 67), -INT8_C( 63), INT8_C( 4), INT8_C( 65), INT8_C( 103), -INT8_C( 120), INT8_C( 124), -INT8_C( 25), INT8_C( 12), INT8_C( 58), INT8_C( 2), INT8_C( 8), -INT8_C( 112), INT8_C( 93), -INT8_C( 118), INT8_C( 116), INT8_C( 122), INT8_C( 121), -INT8_C( 110), -INT8_C( 13), INT8_C( 108), INT8_C( 80), INT8_C( 119), -INT8_C( 53), INT8_C( 104), INT8_C( 114), INT8_C( 41), INT8_C( 79), INT8_C( 122), -INT8_C( 24), -INT8_C( 118), INT8_C( 55), -INT8_C( 87), -INT8_C( 114), INT8_C( 120), INT8_C( 16), INT8_C( 22), -INT8_C( 11), -INT8_C( 9), INT8_C( 34), INT8_C( 47), -INT8_C( 7), INT8_C( 43), -INT8_C( 65), INT8_C( 86), -INT8_C( 75), INT8_C( 51), -INT8_C( 47), INT8_C( 46), -INT8_C( 58), -INT8_C( 60), -INT8_C( 102), INT8_C( 22), INT8_C( 59), INT8_C( 101), INT8_MAX, -INT8_C( 83), -INT8_C( 114), -INT8_C( 50) }, { INT8_C( 39), INT8_C( 118), INT8_C( 89), INT8_C( 95), INT8_C( 31), -INT8_C( 25), -INT8_C( 41), INT8_C( 47), -INT8_C( 2), -INT8_C( 52), INT8_C( 39), INT8_C( 32), -INT8_C( 5), INT8_C( 32), INT8_C( 75), -INT8_C( 70), INT8_C( 119), INT8_C( 1), -INT8_C( 18), INT8_C( 72), INT8_C( 47), -INT8_C( 76), INT8_C( 12), -INT8_C( 54), -INT8_C( 54), INT8_C( 72), INT8_C( 47), INT8_C( 73), -INT8_C( 11), -INT8_C( 66), INT8_C( 24), INT8_C( 29), INT8_C( 52), INT8_C( 113), INT8_C( 124), INT8_C( 84), INT8_C( 88), INT8_C( 83), -INT8_C( 125), INT8_C( 86), INT8_C( 32), -INT8_C( 86), INT8_C( 119), INT8_C( 27), -INT8_C( 53), -INT8_C( 62), -INT8_C( 42), INT8_C( 66), -INT8_C( 61), -INT8_C( 60), -INT8_C( 118), -INT8_C( 13), INT8_C( 120), -INT8_C( 106), -INT8_C( 67), INT8_C( 66), -INT8_C( 34), -INT8_C( 20), -INT8_C( 116), -INT8_C( 44), -INT8_C( 86), -INT8_C( 92), -INT8_C( 15), -INT8_C( 33) }, { INT8_C( 21), INT8_C( 109), INT8_C( 51), INT8_C( 109), -INT8_C( 64), -INT8_C( 74), -INT8_C( 60), -INT8_C( 32), INT8_C( 97), INT8_C( 59), -INT8_C( 4), INT8_C( 44), -INT8_C( 3), -INT8_C( 46), INT8_C( 110), -INT8_C( 63), -INT8_C( 106), -INT8_C( 8), -INT8_C( 76), INT8_C( 14), -INT8_C( 114), INT8_C( 113), INT8_C( 80), INT8_C( 109), INT8_C( 93), -INT8_C( 36), INT8_C( 65), INT8_C( 8), INT8_MIN, INT8_C( 50), -INT8_C( 25), -INT8_C( 107), -INT8_C( 97), INT8_C( 26), INT8_C( 3), INT8_C( 95), -INT8_C( 48), -INT8_C( 57), INT8_C( 64), INT8_C( 49), INT8_C( 2), INT8_C( 60), INT8_C( 93), -INT8_C( 1), INT8_C( 14), -INT8_C( 53), -INT8_C( 64), -INT8_C( 92), -INT8_C( 61), INT8_C( 116), -INT8_C( 78), INT8_C( 82), -INT8_C( 27), INT8_C( 2), -INT8_C( 65), INT8_C( 67), -INT8_C( 33), INT8_C( 0), INT8_C( 75), INT8_C( 95), INT8_C( 50), INT8_C( 50), -INT8_C( 11), -INT8_C( 47) }, { INT8_C( 120), -INT8_C( 65), -INT8_C( 36), -INT8_C( 107), INT8_C( 41), INT8_C( 49), INT8_C( 109), INT8_C( 43), -INT8_C( 11), -INT8_C( 3), INT8_C( 120), INT8_C( 79), INT8_C( 95), INT8_C( 79), INT8_C( 44), INT8_C( 59), INT8_C( 67), -INT8_C( 25), -INT8_C( 64), INT8_C( 97), INT8_C( 43), -INT8_C( 47), -INT8_C( 25), -INT8_C( 4), -INT8_C( 4), INT8_C( 97), INT8_C( 43), INT8_C( 59), INT8_C( 2), -INT8_C( 114), -INT8_C( 13), INT8_C( 104), -INT8_C( 47), INT8_C( 116), INT8_C( 50), -INT8_C( 114), INT8_C( 93), INT8_C( 14), -INT8_C( 65), INT8_C( 80), INT8_C( 79), -INT8_C( 11), INT8_C( 67), INT8_C( 119), INT8_C( 44), INT8_C( 51), INT8_C( 80), INT8_C( 51), INT8_C( 109), -INT8_C( 64), -INT8_C( 120), INT8_C( 82), -INT8_C( 33), INT8_C( 121), -INT8_C( 83), INT8_C( 51), -INT8_C( 25), INT8_C( 14), -INT8_C( 25), -INT8_C( 114), -INT8_C( 11), INT8_C( 55), INT8_C( 116), -INT8_C( 107) } }, { { INT8_C( 76), -INT8_C( 8), INT8_C( 48), INT8_C( 28), -INT8_C( 65), INT8_C( 112), INT8_C( 78), -INT8_C( 63), -INT8_C( 84), -INT8_C( 85), -INT8_C( 64), -INT8_C( 70), INT8_C( 119), -INT8_C( 127), INT8_C( 94), INT8_C( 58), -INT8_C( 11), INT8_C( 16), -INT8_C( 116), -INT8_C( 37), INT8_C( 19), INT8_C( 75), INT8_C( 30), -INT8_C( 14), INT8_C( 75), INT8_C( 105), INT8_C( 81), INT8_C( 125), -INT8_C( 101), INT8_C( 70), INT8_C( 78), -INT8_C( 25), INT8_C( 62), INT8_MAX, INT8_C( 3), -INT8_C( 3), -INT8_C( 17), INT8_C( 81), -INT8_C( 66), -INT8_C( 100), -INT8_C( 3), INT8_MAX, INT8_C( 86), INT8_C( 116), INT8_C( 0), -INT8_C( 75), -INT8_C( 82), -INT8_C( 11), -INT8_C( 59), INT8_C( 59), -INT8_C( 48), -INT8_C( 40), -INT8_C( 122), -INT8_C( 18), -INT8_C( 54), -INT8_C( 46), INT8_C( 87), INT8_C( 28), INT8_C( 79), -INT8_C( 14), INT8_C( 98), -INT8_C( 98), -INT8_C( 39), -INT8_C( 95) }, { INT8_C( 29), -INT8_C( 35), -INT8_C( 98), INT8_C( 12), INT8_C( 46), INT8_C( 93), -INT8_C( 88), INT8_C( 43), -INT8_C( 36), -INT8_C( 1), -INT8_C( 97), -INT8_C( 36), -INT8_C( 76), INT8_C( 78), -INT8_C( 47), INT8_C( 121), -INT8_C( 119), -INT8_C( 94), INT8_C( 82), INT8_C( 15), -INT8_C( 112), INT8_C( 28), -INT8_C( 31), -INT8_C( 24), INT8_C( 56), INT8_C( 49), -INT8_C( 38), -INT8_C( 101), -INT8_C( 49), -INT8_C( 76), INT8_C( 60), -INT8_C( 20), -INT8_C( 111), -INT8_C( 38), -INT8_C( 8), -INT8_C( 65), INT8_C( 55), -INT8_C( 95), -INT8_C( 21), INT8_C( 19), -INT8_C( 96), -INT8_C( 118), -INT8_C( 17), INT8_C( 84), -INT8_C( 40), -INT8_C( 63), -INT8_C( 51), INT8_C( 97), INT8_C( 99), INT8_C( 31), INT8_C( 113), -INT8_C( 13), INT8_C( 60), INT8_C( 82), -INT8_C( 37), INT8_C( 116), -INT8_C( 125), -INT8_C( 74), INT8_C( 15), INT8_C( 82), INT8_C( 106), INT8_C( 75), INT8_C( 62), -INT8_C( 5) }, { INT8_C( 38), INT8_C( 55), -INT8_C( 70), INT8_C( 93), -INT8_C( 40), -INT8_C( 91), INT8_C( 113), INT8_C( 120), INT8_C( 48), INT8_C( 96), -INT8_C( 52), INT8_C( 8), INT8_C( 33), -INT8_C( 103), INT8_C( 106), -INT8_C( 124), -INT8_C( 71), -INT8_C( 37), INT8_C( 120), -INT8_C( 11), INT8_C( 45), INT8_C( 83), INT8_C( 105), -INT8_C( 79), INT8_C( 9), INT8_C( 121), INT8_C( 3), INT8_C( 115), -INT8_C( 60), INT8_C( 66), INT8_C( 110), -INT8_C( 22), INT8_C( 121), INT8_C( 41), INT8_C( 72), INT8_C( 81), -INT8_C( 50), -INT8_C( 71), -INT8_C( 55), -INT8_C( 2), INT8_C( 25), -INT8_C( 107), INT8_C( 7), INT8_C( 59), INT8_C( 46), INT8_C( 113), -INT8_C( 65), -INT8_C( 25), INT8_C( 76), INT8_C( 55), -INT8_C( 36), INT8_C( 121), -INT8_C( 117), INT8_C( 70), INT8_C( 42), -INT8_C( 108), -INT8_C( 65), INT8_C( 46), INT8_C( 8), -INT8_C( 125), INT8_C( 112), INT8_C( 118), INT8_C( 110), -INT8_C( 23) }, { INT8_C( 70), INT8_C( 66), INT8_C( 78), INT8_C( 119), -INT8_C( 82), INT8_C( 66), -INT8_C( 3), INT8_C( 116), -INT8_C( 60), -INT8_C( 23), -INT8_C( 25), -INT8_C( 60), -INT8_C( 122), INT8_C( 106), -INT8_C( 37), INT8_C( 46), -INT8_C( 85), INT8_C( 3), INT8_C( 120), INT8_C( 58), -INT8_C( 11), -INT8_C( 101), INT8_C( 41), INT8_C( 25), INT8_C( 87), INT8_C( 59), INT8_C( 3), INT8_C( 125), -INT8_C( 124), -INT8_C( 122), INT8_C( 98), INT8_C( 46), INT8_C( 16), INT8_C( 3), -INT8_C( 65), -INT8_C( 95), -INT8_C( 46), INT8_MAX, INT8_C( 59), -INT8_C( 37), INT8_C( 62), -INT8_C( 64), -INT8_C( 25), INT8_C( 45), INT8_C( 9), INT8_C( 55), -INT8_C( 103), INT8_C( 41), INT8_C( 81), -INT8_C( 25), INT8_C( 55), INT8_C( 121), INT8_C( 98), INT8_C( 120), INT8_C( 115), -INT8_C( 117), INT8_C( 28), -INT8_C( 54), INT8_C( 58), INT8_C( 120), INT8_C( 7), INT8_C( 8), -INT8_C( 39), -INT8_C( 125) } }, { { -INT8_C( 97), -INT8_C( 74), INT8_C( 58), INT8_C( 110), INT8_C( 111), INT8_C( 3), INT8_C( 108), -INT8_C( 120), -INT8_C( 104), INT8_C( 115), -INT8_C( 61), -INT8_C( 58), -INT8_C( 28), -INT8_C( 125), -INT8_C( 82), INT8_C( 48), -INT8_C( 70), -INT8_C( 118), -INT8_C( 86), INT8_C( 69), -INT8_C( 48), -INT8_C( 44), -INT8_C( 38), -INT8_C( 113), INT8_C( 2), -INT8_C( 30), INT8_C( 19), INT8_C( 114), INT8_C( 88), -INT8_C( 127), INT8_C( 91), -INT8_C( 8), INT8_C( 55), -INT8_C( 107), INT8_C( 102), -INT8_C( 90), -INT8_C( 104), -INT8_C( 46), INT8_C( 46), INT8_C( 48), INT8_C( 70), -INT8_C( 14), -INT8_C( 9), INT8_C( 42), INT8_C( 117), -INT8_C( 91), INT8_C( 91), INT8_C( 47), INT8_C( 47), INT8_C( 5), INT8_C( 117), INT8_C( 0), -INT8_C( 39), INT8_C( 79), -INT8_C( 113), -INT8_C( 36), INT8_C( 49), -INT8_C( 94), INT8_C( 78), -INT8_C( 119), INT8_C( 35), -INT8_C( 86), -INT8_C( 127), INT8_C( 90) }, { INT8_C( 63), -INT8_C( 25), INT8_C( 0), -INT8_C( 40), -INT8_C( 70), INT8_C( 47), INT8_C( 8), INT8_C( 0), INT8_C( 33), -INT8_C( 1), INT8_C( 42), -INT8_C( 106), -INT8_C( 92), -INT8_C( 123), -INT8_C( 59), -INT8_C( 44), -INT8_C( 118), INT8_C( 58), -INT8_C( 44), INT8_C( 100), -INT8_C( 119), INT8_C( 99), INT8_C( 64), -INT8_C( 70), INT8_C( 6), -INT8_C( 114), INT8_C( 68), INT8_C( 41), INT8_C( 56), -INT8_C( 59), -INT8_C( 124), INT8_C( 120), -INT8_C( 83), -INT8_C( 124), INT8_C( 80), INT8_C( 103), -INT8_C( 77), INT8_C( 88), INT8_C( 103), -INT8_C( 44), INT8_C( 88), -INT8_C( 111), INT8_C( 106), -INT8_C( 4), INT8_C( 23), INT8_C( 48), -INT8_C( 48), -INT8_C( 95), INT8_C( 106), -INT8_C( 92), INT8_C( 5), -INT8_C( 12), INT8_C( 8), INT8_C( 69), -INT8_C( 82), INT8_C( 14), -INT8_C( 44), -INT8_C( 14), INT8_C( 55), INT8_C( 12), -INT8_C( 72), -INT8_C( 69), -INT8_C( 124), INT8_C( 101) }, { INT8_C( 64), -INT8_C( 44), -INT8_C( 52), -INT8_C( 13), INT8_C( 45), INT8_C( 51), -INT8_C( 56), -INT8_C( 123), -INT8_C( 60), INT8_C( 50), -INT8_C( 127), -INT8_C( 37), INT8_C( 98), INT8_C( 82), INT8_C( 125), -INT8_C( 51), -INT8_C( 10), -INT8_C( 126), -INT8_C( 63), -INT8_C( 2), -INT8_C( 56), INT8_C( 111), INT8_C( 12), -INT8_C( 100), INT8_C( 98), INT8_C( 68), -INT8_C( 88), INT8_C( 26), -INT8_C( 1), INT8_C( 45), INT8_MAX, INT8_C( 63), INT8_C( 1), INT8_C( 75), INT8_C( 51), INT8_C( 46), INT8_C( 126), -INT8_C( 5), -INT8_C( 77), INT8_C( 66), INT8_C( 45), INT8_C( 53), INT8_C( 30), -INT8_C( 112), -INT8_C( 121), -INT8_C( 101), INT8_C( 93), INT8_C( 125), INT8_C( 29), INT8_C( 30), INT8_C( 124), -INT8_C( 27), -INT8_C( 115), -INT8_C( 120), -INT8_C( 127), -INT8_C( 17), -INT8_C( 52), INT8_C( 42), INT8_C( 9), -INT8_C( 52), INT8_C( 87), -INT8_C( 120), INT8_C( 11), INT8_C( 88) }, { INT8_C( 90), INT8_C( 66), -INT8_C( 97), INT8_C( 98), INT8_C( 78), INT8_C( 47), -INT8_C( 104), -INT8_C( 97), -INT8_C( 107), INT8_C( 88), -INT8_C( 9), -INT8_C( 38), -INT8_C( 104), INT8_C( 3), INT8_C( 51), -INT8_C( 56), -INT8_C( 61), INT8_C( 78), -INT8_C( 56), INT8_C( 126), INT8_C( 115), INT8_C( 46), INT8_C( 64), INT8_C( 78), INT8_C( 108), -INT8_C( 82), INT8_C( 45), -INT8_C( 14), INT8_C( 49), INT8_C( 51), INT8_C( 111), -INT8_C( 52), -INT8_C( 91), INT8_C( 111), -INT8_C( 10), INT8_C( 66), INT8_C( 0), INT8_C( 98), INT8_C( 66), -INT8_C( 56), INT8_C( 98), -INT8_C( 118), INT8_C( 30), INT8_C( 87), -INT8_C( 113), INT8_C( 47), -INT8_C( 10), -INT8_C( 107), INT8_C( 30), -INT8_C( 104), INT8_C( 3), -INT8_C( 115), -INT8_C( 104), INT8_C( 51), INT8_C( 91), -INT8_C( 82), -INT8_C( 56), INT8_C( 124), -INT8_C( 36), -INT8_C( 28), INT8_C( 49), -INT8_C( 119), INT8_C( 111), -INT8_C( 5) } }, { { -INT8_C( 45), INT8_C( 62), -INT8_C( 121), INT8_C( 81), INT8_C( 57), INT8_C( 58), -INT8_C( 108), INT8_C( 103), INT8_C( 111), -INT8_C( 78), -INT8_C( 9), -INT8_C( 10), INT8_C( 77), INT8_C( 84), INT8_C( 116), INT8_C( 106), INT8_C( 114), -INT8_C( 16), INT8_C( 80), -INT8_C( 1), INT8_C( 120), -INT8_C( 47), -INT8_C( 17), INT8_C( 69), -INT8_C( 5), -INT8_C( 8), INT8_C( 17), INT8_C( 82), -INT8_C( 127), INT8_C( 28), -INT8_C( 85), INT8_C( 84), INT8_C( 91), INT8_C( 50), -INT8_C( 90), -INT8_C( 108), INT8_C( 108), INT8_C( 58), -INT8_C( 5), -INT8_C( 36), -INT8_C( 20), -INT8_C( 14), -INT8_C( 46), INT8_C( 57), INT8_C( 70), INT8_C( 70), -INT8_C( 93), -INT8_C( 72), INT8_C( 54), -INT8_C( 13), -INT8_C( 72), -INT8_C( 81), -INT8_C( 59), -INT8_C( 89), -INT8_C( 12), -INT8_C( 64), -INT8_C( 97), INT8_C( 5), INT8_C( 19), INT8_C( 32), INT8_C( 33), -INT8_C( 66), INT8_C( 117), INT8_C( 124) }, { -INT8_C( 16), INT8_C( 27), INT8_C( 17), INT8_C( 92), INT8_C( 85), INT8_C( 12), INT8_C( 56), INT8_C( 65), -INT8_C( 1), INT8_C( 11), INT8_C( 122), INT8_C( 69), INT8_C( 81), INT8_C( 29), -INT8_C( 2), -INT8_C( 120), INT8_C( 17), -INT8_C( 74), INT8_C( 55), -INT8_C( 42), INT8_C( 93), INT8_C( 43), -INT8_C( 106), -INT8_C( 4), INT8_C( 48), -INT8_C( 87), INT8_C( 29), INT8_C( 81), INT8_C( 103), -INT8_C( 110), -INT8_C( 50), INT8_C( 87), -INT8_C( 83), -INT8_C( 33), -INT8_C( 76), INT8_C( 2), -INT8_C( 21), -INT8_C( 20), INT8_C( 67), -INT8_C( 22), -INT8_C( 9), -INT8_C( 67), INT8_C( 48), INT8_C( 73), -INT8_C( 38), INT8_C( 46), -INT8_C( 47), -INT8_C( 21), -INT8_C( 28), INT8_C( 8), -INT8_C( 63), INT8_C( 65), INT8_C( 51), INT8_C( 88), INT8_C( 61), INT8_C( 99), INT8_C( 1), INT8_C( 90), -INT8_C( 76), INT8_C( 105), -INT8_C( 20), -INT8_C( 126), -INT8_C( 64), -INT8_C( 103) }, { INT8_C( 97), INT8_C( 116), -INT8_C( 101), INT8_C( 77), INT8_C( 97), -INT8_C( 34), INT8_C( 55), INT8_C( 88), -INT8_C( 101), INT8_C( 103), -INT8_C( 95), INT8_C( 118), -INT8_C( 107), INT8_C( 114), INT8_C( 97), INT8_C( 121), INT8_C( 122), INT8_C( 35), -INT8_C( 70), -INT8_C( 83), INT8_C( 123), -INT8_C( 8), INT8_C( 16), INT8_C( 124), INT8_C( 82), -INT8_C( 59), -INT8_C( 27), INT8_C( 63), INT8_C( 71), -INT8_C( 90), -INT8_C( 40), -INT8_C( 87), INT8_C( 26), INT8_C( 116), -INT8_C( 10), INT8_C( 123), INT8_C( 82), INT8_C( 45), -INT8_C( 44), -INT8_C( 18), -INT8_C( 107), INT8_C( 117), INT8_C( 100), INT8_C( 42), -INT8_C( 24), -INT8_C( 59), -INT8_C( 92), INT8_C( 98), -INT8_C( 24), INT8_C( 94), INT8_C( 16), INT8_C( 99), INT8_C( 86), INT8_C( 32), -INT8_C( 32), -INT8_C( 87), -INT8_C( 27), -INT8_C( 59), -INT8_C( 24), INT8_C( 45), INT8_C( 107), -INT8_C( 64), -INT8_C( 42), -INT8_C( 122) }, { -INT8_C( 24), INT8_C( 82), -INT8_C( 16), INT8_C( 71), -INT8_C( 8), INT8_C( 77), -INT8_C( 97), INT8_C( 116), -INT8_C( 122), -INT8_C( 10), -INT8_C( 24), -INT8_C( 34), INT8_C( 35), INT8_C( 28), -INT8_C( 42), INT8_C( 111), -INT8_C( 16), -INT8_C( 12), -INT8_C( 64), INT8_C( 16), -INT8_C( 90), INT8_C( 57), -INT8_C( 17), INT8_C( 107), INT8_C( 54), -INT8_C( 14), INT8_C( 28), INT8_C( 35), -INT8_C( 18), INT8_C( 80), INT8_C( 97), INT8_C( 124), INT8_C( 70), -INT8_C( 87), -INT8_C( 59), -INT8_C( 121), INT8_C( 42), -INT8_C( 24), INT8_C( 77), INT8_C( 100), -INT8_C( 87), -INT8_C( 66), INT8_C( 54), INT8_C( 103), -INT8_C( 27), -INT8_C( 93), INT8_C( 35), INT8_C( 42), INT8_C( 82), INT8_C( 111), INT8_C( 116), INT8_C( 116), -INT8_C( 81), INT8_C( 82), -INT8_C( 66), INT8_C( 123), INT8_C( 62), -INT8_C( 27), -INT8_C( 59), INT8_C( 117), -INT8_C( 24), -INT8_C( 121), INT8_C( 97), -INT8_C( 8) } }, { { INT8_C( 52), -INT8_C( 52), INT8_C( 1), -INT8_C( 121), -INT8_C( 7), -INT8_C( 43), INT8_C( 117), -INT8_C( 114), INT8_C( 75), -INT8_C( 39), -INT8_C( 71), INT8_C( 51), -INT8_C( 98), INT8_C( 93), -INT8_C( 107), -INT8_C( 121), -INT8_C( 69), -INT8_C( 91), -INT8_C( 22), INT8_C( 18), -INT8_C( 58), -INT8_C( 54), -INT8_C( 69), -INT8_C( 85), -INT8_C( 112), -INT8_C( 93), -INT8_C( 40), -INT8_C( 5), INT8_C( 99), -INT8_C( 82), -INT8_C( 127), -INT8_C( 104), INT8_C( 122), -INT8_C( 125), INT8_C( 31), INT8_C( 116), INT8_C( 88), -INT8_C( 108), INT8_C( 2), -INT8_C( 93), INT8_C( 109), -INT8_C( 69), -INT8_C( 42), INT8_C( 11), INT8_C( 24), INT8_C( 108), -INT8_C( 110), -INT8_C( 44), INT8_C( 17), INT8_C( 125), -INT8_C( 26), -INT8_C( 41), INT8_C( 71), -INT8_C( 95), -INT8_C( 125), -INT8_C( 41), INT8_C( 68), INT8_C( 91), -INT8_C( 45), -INT8_C( 89), INT8_C( 10), INT8_C( 84), INT8_C( 63), -INT8_C( 124) }, { -INT8_C( 41), INT8_C( 94), -INT8_C( 8), INT8_C( 48), -INT8_C( 14), -INT8_C( 5), -INT8_C( 45), INT8_C( 95), -INT8_C( 74), -INT8_C( 86), INT8_C( 107), -INT8_C( 49), INT8_C( 22), -INT8_C( 3), -INT8_C( 93), INT8_C( 39), INT8_C( 122), -INT8_C( 119), -INT8_C( 1), -INT8_C( 62), INT8_C( 42), -INT8_C( 126), -INT8_C( 103), INT8_C( 110), -INT8_C( 35), INT8_C( 108), INT8_C( 21), -INT8_C( 25), -INT8_C( 63), INT8_C( 85), INT8_C( 108), -INT8_C( 104), -INT8_C( 77), INT8_C( 100), -INT8_C( 56), -INT8_C( 90), INT8_C( 95), -INT8_C( 100), INT8_C( 5), INT8_C( 22), INT8_C( 70), INT8_C( 112), -INT8_C( 27), INT8_C( 92), INT8_C( 110), -INT8_C( 120), -INT8_C( 125), -INT8_C( 24), INT8_C( 17), -INT8_C( 126), -INT8_C( 86), INT8_C( 59), INT8_C( 4), INT8_C( 68), -INT8_C( 87), -INT8_C( 30), -INT8_C( 80), -INT8_C( 66), -INT8_C( 55), INT8_C( 113), INT8_C( 19), INT8_C( 53), INT8_C( 10), -INT8_C( 57) }, { -INT8_C( 102), -INT8_C( 46), INT8_C( 109), -INT8_C( 7), INT8_C( 110), INT8_C( 114), INT8_C( 15), -INT8_C( 76), -INT8_C( 29), -INT8_C( 12), INT8_C( 16), INT8_C( 81), INT8_C( 124), -INT8_C( 108), INT8_C( 57), -INT8_C( 115), INT8_C( 22), -INT8_C( 28), -INT8_C( 56), INT8_C( 27), INT8_C( 40), INT8_C( 113), -INT8_C( 3), -INT8_C( 40), INT8_C( 48), -INT8_C( 58), INT8_C( 74), INT8_C( 67), -INT8_C( 4), INT8_C( 84), INT8_C( 10), -INT8_C( 106), INT8_C( 38), INT8_C( 119), -INT8_C( 113), -INT8_C( 107), -INT8_C( 22), -INT8_C( 97), INT8_C( 73), -INT8_C( 51), -INT8_C( 109), INT8_C( 90), INT8_C( 30), INT8_C( 16), -INT8_C( 18), INT8_C( 87), -INT8_C( 99), INT8_C( 4), INT8_C( 59), INT8_C( 102), INT8_C( 31), INT8_C( 99), -INT8_C( 41), INT8_C( 28), INT8_C( 60), INT8_C( 7), -INT8_C( 29), -INT8_C( 122), INT8_C( 75), -INT8_C( 33), -INT8_C( 38), INT8_C( 85), INT8_C( 117), INT8_C( 0) }, { -INT8_C( 40), INT8_C( 10), -INT8_C( 29), INT8_C( 17), INT8_C( 31), -INT8_C( 33), INT8_C( 27), -INT8_C( 106), -INT8_C( 125), -INT8_C( 42), INT8_C( 16), -INT8_C( 115), -INT8_C( 69), INT8_C( 85), INT8_C( 116), -INT8_C( 93), INT8_C( 75), -INT8_C( 39), INT8_C( 0), INT8_C( 109), -INT8_C( 42), INT8_C( 1), -INT8_C( 93), -INT8_C( 99), INT8_C( 84), -INT8_C( 18), -INT8_C( 54), -INT8_C( 51), -INT8_C( 46), INT8_C( 113), -INT8_C( 18), -INT8_C( 112), -INT8_C( 41), -INT8_C( 22), -INT8_C( 29), INT8_C( 2), -INT8_C( 106), INT8_C( 99), -INT8_C( 43), -INT8_C( 69), INT8_C( 15), INT8_C( 59), -INT8_C( 97), -INT8_C( 4), -INT8_C( 99), INT8_C( 75), -INT8_C( 121), -INT8_C( 109), -INT8_C( 91), INT8_C( 1), -INT8_C( 42), -INT8_C( 89), -INT8_C( 7), INT8_C( 110), -INT8_C( 69), -INT8_C( 113), INT8_C( 17), INT8_C( 63), -INT8_C( 12), INT8_C( 102), INT8_C( 18), -INT8_C( 95), -INT8_C( 71), -INT8_C( 76) } }, { { -INT8_C( 51), INT8_C( 4), -INT8_C( 107), -INT8_C( 73), -INT8_C( 93), -INT8_C( 33), -INT8_C( 124), INT8_C( 55), INT8_C( 57), -INT8_C( 94), INT8_C( 71), INT8_C( 39), -INT8_C( 7), -INT8_C( 28), INT8_C( 43), INT8_C( 53), INT8_C( 74), INT8_C( 75), -INT8_C( 104), INT8_C( 34), INT8_C( 103), -INT8_C( 44), INT8_C( 41), INT8_C( 74), INT8_C( 90), INT8_C( 116), INT8_C( 41), INT8_C( 52), -INT8_C( 54), -INT8_C( 98), INT8_C( 53), -INT8_C( 105), -INT8_C( 93), -INT8_C( 54), INT8_C( 78), INT8_C( 70), -INT8_C( 87), -INT8_C( 46), INT8_C( 125), -INT8_C( 30), INT8_C( 116), -INT8_C( 60), INT8_C( 9), INT8_C( 109), -INT8_C( 87), INT8_C( 53), -INT8_C( 94), -INT8_C( 13), INT8_MIN, INT8_C( 59), INT8_C( 21), -INT8_C( 25), INT8_C( 15), INT8_C( 63), INT8_C( 50), INT8_C( 106), -INT8_C( 77), INT8_C( 91), -INT8_C( 98), INT8_C( 125), -INT8_C( 6), -INT8_C( 45), INT8_C( 20), -INT8_C( 99) }, { -INT8_C( 98), INT8_C( 98), -INT8_C( 29), INT8_C( 71), INT8_C( 52), INT8_C( 97), INT8_C( 42), -INT8_C( 88), INT8_C( 37), INT8_C( 51), INT8_C( 22), -INT8_C( 50), INT8_C( 104), -INT8_C( 72), -INT8_C( 62), -INT8_C( 24), -INT8_C( 13), -INT8_C( 41), -INT8_C( 48), INT8_C( 3), INT8_C( 22), INT8_C( 2), INT8_C( 109), -INT8_C( 54), INT8_C( 93), INT8_C( 11), INT8_C( 71), INT8_C( 87), -INT8_C( 33), INT8_C( 92), -INT8_C( 12), INT8_C( 125), -INT8_C( 66), -INT8_C( 40), -INT8_C( 60), -INT8_C( 13), INT8_C( 57), -INT8_C( 18), -INT8_C( 101), INT8_C( 94), INT8_C( 34), -INT8_C( 79), INT8_C( 45), -INT8_C( 118), INT8_C( 106), -INT8_C( 17), INT8_C( 115), INT8_C( 93), -INT8_C( 58), INT8_C( 67), INT8_C( 96), -INT8_C( 35), INT8_C( 69), -INT8_C( 51), -INT8_C( 89), -INT8_C( 94), -INT8_C( 39), -INT8_C( 18), -INT8_C( 6), -INT8_C( 72), INT8_C( 74), -INT8_C( 18), INT8_C( 53), INT8_C( 9) }, { -INT8_C( 58), -INT8_C( 7), -INT8_C( 4), -INT8_C( 1), -INT8_C( 24), -INT8_C( 105), INT8_C( 94), INT8_C( 10), INT8_C( 73), -INT8_C( 117), -INT8_C( 108), -INT8_C( 77), INT8_C( 122), INT8_C( 7), INT8_C( 16), INT8_C( 64), INT8_C( 74), INT8_C( 113), INT8_C( 29), -INT8_C( 113), INT8_C( 62), -INT8_C( 60), INT8_C( 50), INT8_C( 23), -INT8_C( 77), INT8_C( 44), -INT8_C( 49), -INT8_C( 3), INT8_C( 26), INT8_C( 4), INT8_C( 6), -INT8_C( 31), -INT8_C( 2), INT8_C( 2), -INT8_C( 32), -INT8_C( 26), -INT8_C( 102), INT8_C( 62), -INT8_C( 16), -INT8_C( 29), -INT8_C( 55), -INT8_C( 124), -INT8_C( 106), INT8_C( 67), -INT8_C( 116), -INT8_C( 90), -INT8_C( 124), -INT8_C( 42), INT8_C( 23), -INT8_C( 95), INT8_C( 102), INT8_C( 86), INT8_C( 102), -INT8_C( 104), INT8_C( 109), INT8_C( 25), -INT8_C( 60), INT8_C( 61), INT8_C( 22), -INT8_C( 34), INT8_C( 65), INT8_C( 29), -INT8_C( 65), INT8_C( 63) }, { INT8_C( 53), -INT8_C( 32), -INT8_C( 26), INT8_C( 10), INT8_C( 15), INT8_C( 2), INT8_C( 9), INT8_C( 116), -INT8_C( 46), -INT8_C( 25), INT8_C( 41), INT8_C( 16), -INT8_C( 55), -INT8_C( 77), -INT8_C( 4), -INT8_C( 55), INT8_C( 86), INT8_C( 23), INT8_C( 74), -INT8_C( 73), INT8_C( 41), -INT8_C( 107), -INT8_C( 90), -INT8_C( 108), INT8_C( 4), INT8_C( 39), INT8_C( 10), INT8_C( 23), -INT8_C( 31), INT8_C( 26), INT8_C( 102), INT8_C( 29), INT8_C( 20), -INT8_C( 77), -INT8_C( 24), INT8_C( 86), INT8_C( 91), -INT8_C( 124), INT8_C( 52), INT8_C( 6), INT8_C( 78), INT8_C( 59), INT8_C( 53), INT8_C( 71), -INT8_C( 106), -INT8_C( 42), INT8_C( 86), INT8_C( 4), INT8_C( 94), -INT8_C( 1), -INT8_C( 2), INT8_C( 4), -INT8_C( 105), INT8_C( 7), -INT8_C( 30), INT8_C( 78), INT8_C( 44), -INT8_C( 124), INT8_C( 22), -INT8_C( 77), -INT8_C( 108), -INT8_C( 124), INT8_C( 63), -INT8_C( 94) } }, { { INT8_C( 31), -INT8_C( 96), INT8_C( 37), -INT8_C( 71), -INT8_C( 34), INT8_C( 21), -INT8_C( 100), -INT8_C( 88), -INT8_C( 102), INT8_C( 50), -INT8_C( 21), INT8_C( 38), -INT8_C( 39), INT8_C( 111), -INT8_C( 4), -INT8_C( 16), INT8_C( 17), INT8_C( 98), INT8_C( 70), INT8_C( 119), -INT8_C( 6), -INT8_C( 76), -INT8_C( 112), -INT8_C( 66), -INT8_C( 15), -INT8_C( 90), -INT8_C( 99), INT8_C( 50), -INT8_C( 61), INT8_C( 92), INT8_C( 114), -INT8_C( 29), -INT8_C( 4), -INT8_C( 105), -INT8_C( 100), -INT8_C( 37), -INT8_C( 83), INT8_C( 57), -INT8_C( 125), INT8_C( 71), INT8_C( 107), INT8_C( 110), INT8_C( 109), INT8_C( 68), -INT8_C( 34), INT8_C( 105), INT8_C( 53), -INT8_C( 17), -INT8_C( 52), INT8_C( 123), INT8_C( 102), -INT8_C( 58), INT8_C( 47), -INT8_C( 10), -INT8_C( 123), INT8_C( 32), -INT8_C( 100), INT8_C( 34), INT8_C( 83), INT8_C( 96), INT8_C( 126), -INT8_C( 59), INT8_C( 67), INT8_C( 123) }, { INT8_C( 92), -INT8_C( 33), INT8_C( 86), INT8_C( 9), INT8_C( 24), -INT8_C( 39), INT8_C( 80), -INT8_C( 124), INT8_C( 71), -INT8_C( 67), -INT8_C( 56), INT8_C( 37), INT8_C( 39), -INT8_C( 3), INT8_C( 20), -INT8_C( 13), INT8_C( 121), INT8_C( 122), -INT8_C( 71), -INT8_C( 88), INT8_C( 112), INT8_C( 62), -INT8_C( 55), INT8_C( 13), INT8_C( 96), INT8_C( 28), INT8_C( 109), -INT8_C( 33), -INT8_C( 31), -INT8_C( 80), INT8_C( 90), INT8_C( 61), -INT8_C( 113), -INT8_C( 80), INT8_C( 71), -INT8_C( 88), -INT8_C( 119), -INT8_C( 105), INT8_C( 44), -INT8_C( 48), INT8_C( 85), -INT8_C( 12), -INT8_C( 10), INT8_C( 124), -INT8_C( 14), INT8_C( 10), INT8_C( 111), INT8_C( 107), -INT8_C( 123), INT8_C( 40), INT8_C( 19), -INT8_C( 11), INT8_C( 103), -INT8_C( 36), INT8_C( 2), -INT8_C( 57), -INT8_C( 8), INT8_C( 111), -INT8_C( 90), -INT8_C( 39), INT8_C( 31), INT8_C( 0), INT8_C( 23), -INT8_C( 81) }, { -INT8_C( 80), INT8_C( 94), INT8_C( 87), INT8_C( 57), -INT8_C( 11), -INT8_C( 125), INT8_C( 10), INT8_C( 74), INT8_C( 119), INT8_C( 0), -INT8_C( 58), INT8_C( 105), INT8_C( 10), INT8_C( 53), -INT8_C( 44), -INT8_C( 113), INT8_C( 94), -INT8_C( 24), -INT8_C( 123), -INT8_C( 59), -INT8_C( 60), -INT8_C( 121), -INT8_C( 116), -INT8_C( 67), -INT8_C( 9), INT8_C( 51), -INT8_C( 106), INT8_C( 22), INT8_C( 51), -INT8_C( 83), -INT8_C( 59), -INT8_C( 28), INT8_C( 11), INT8_C( 28), INT8_C( 29), INT8_C( 1), -INT8_C( 97), INT8_C( 39), INT8_C( 75), INT8_C( 23), INT8_C( 39), INT8_C( 18), INT8_MIN, INT8_C( 50), INT8_C( 71), INT8_C( 85), -INT8_C( 63), -INT8_C( 91), INT8_C( 61), INT8_C( 70), INT8_C( 106), INT8_C( 1), -INT8_C( 50), -INT8_C( 9), -INT8_C( 66), -INT8_C( 59), INT8_C( 42), INT8_C( 85), -INT8_C( 37), INT8_C( 93), INT8_C( 2), -INT8_C( 95), INT8_C( 65), INT8_C( 14) }, { INT8_C( 51), -INT8_C( 28), -INT8_C( 116), INT8_C( 50), -INT8_C( 15), INT8_C( 51), INT8_C( 94), -INT8_C( 34), INT8_C( 74), -INT8_C( 59), INT8_C( 119), INT8_C( 57), INT8_C( 71), -INT8_C( 95), -INT8_C( 6), INT8_C( 1), INT8_C( 85), -INT8_C( 37), INT8_C( 34), INT8_C( 107), INT8_C( 61), INT8_C( 67), INT8_C( 0), INT8_C( 111), INT8_C( 11), -INT8_C( 61), INT8_C( 85), -INT8_C( 28), INT8_C( 28), -INT8_C( 52), -INT8_C( 106), -INT8_C( 59), -INT8_C( 16), -INT8_C( 52), INT8_C( 74), INT8_C( 107), INT8_C( 50), -INT8_C( 66), -INT8_C( 34), INT8_C( 94), -INT8_C( 121), -INT8_C( 50), -INT8_C( 66), INT8_C( 2), INT8_C( 106), -INT8_C( 21), -INT8_C( 91), INT8_C( 50), INT8_C( 21), INT8_C( 107), INT8_C( 119), -INT8_C( 9), INT8_C( 23), INT8_C( 51), INT8_C( 37), INT8_C( 74), INT8_C( 42), -INT8_C( 91), -INT8_C( 125), INT8_C( 51), -INT8_C( 29), INT8_C( 31), -INT8_C( 66), -INT8_C( 17) } }, { { -INT8_C( 67), INT8_C( 95), INT8_C( 15), INT8_C( 93), -INT8_C( 122), INT8_C( 90), INT8_C( 116), -INT8_C( 82), INT8_C( 108), -INT8_C( 12), -INT8_C( 32), -INT8_C( 76), INT8_C( 73), -INT8_C( 95), INT8_C( 89), -INT8_C( 122), -INT8_C( 24), -INT8_C( 60), -INT8_C( 120), -INT8_C( 74), -INT8_C( 69), INT8_C( 70), INT8_C( 123), -INT8_C( 27), -INT8_C( 101), INT8_C( 86), INT8_C( 66), -INT8_C( 98), -INT8_C( 9), -INT8_C( 124), -INT8_C( 84), -INT8_C( 75), -INT8_C( 29), -INT8_C( 69), INT8_C( 18), INT8_C( 105), INT8_C( 21), -INT8_C( 122), INT8_C( 23), -INT8_C( 126), INT8_C( 122), -INT8_C( 9), INT8_C( 54), -INT8_C( 60), -INT8_C( 103), -INT8_C( 113), INT8_C( 74), -INT8_C( 127), INT8_C( 83), -INT8_C( 46), INT8_C( 55), INT8_C( 14), INT8_C( 25), -INT8_C( 78), -INT8_C( 13), -INT8_C( 76), INT8_C( 8), INT8_C( 54), INT8_C( 82), INT8_C( 0), -INT8_C( 70), -INT8_C( 2), -INT8_C( 75), -INT8_C( 99) }, { -INT8_C( 71), -INT8_C( 57), INT8_C( 6), -INT8_C( 49), INT8_C( 77), INT8_C( 30), INT8_C( 81), -INT8_C( 57), INT8_C( 21), -INT8_C( 121), -INT8_C( 117), -INT8_C( 82), INT8_C( 22), -INT8_C( 42), INT8_C( 47), INT8_C( 106), -INT8_C( 88), INT8_C( 102), INT8_C( 120), -INT8_C( 63), INT8_C( 24), INT8_C( 108), INT8_C( 118), INT8_C( 33), -INT8_C( 94), -INT8_C( 56), INT8_C( 33), INT8_C( 92), -INT8_C( 57), -INT8_C( 42), -INT8_C( 7), INT8_MIN, -INT8_C( 99), -INT8_C( 1), INT8_C( 79), -INT8_C( 22), INT8_C( 29), -INT8_C( 96), -INT8_C( 79), INT8_C( 51), INT8_C( 39), INT8_C( 61), -INT8_C( 31), INT8_C( 62), INT8_C( 19), INT8_C( 17), -INT8_C( 88), -INT8_C( 69), INT8_C( 119), INT8_C( 32), INT8_C( 125), -INT8_C( 112), -INT8_C( 116), -INT8_C( 13), -INT8_C( 79), INT8_C( 46), -INT8_C( 69), -INT8_C( 46), -INT8_C( 118), -INT8_C( 126), -INT8_C( 88), -INT8_C( 125), INT8_C( 3), INT8_C( 69) }, { -INT8_C( 125), INT8_C( 82), INT8_C( 47), -INT8_C( 96), -INT8_C( 13), -INT8_C( 32), -INT8_C( 45), INT8_C( 26), INT8_C( 29), -INT8_C( 75), INT8_C( 88), INT8_C( 48), -INT8_C( 58), INT8_C( 0), -INT8_C( 20), INT8_C( 61), INT8_C( 33), INT8_C( 105), -INT8_C( 51), -INT8_C( 83), INT8_C( 92), INT8_C( 126), -INT8_C( 36), INT8_C( 23), INT8_C( 80), INT8_C( 102), -INT8_C( 102), -INT8_C( 8), -INT8_C( 22), -INT8_C( 99), INT8_C( 61), INT8_C( 109), -INT8_C( 17), INT8_C( 108), INT8_C( 13), -INT8_C( 30), INT8_C( 77), -INT8_C( 31), -INT8_C( 3), INT8_C( 106), -INT8_C( 106), INT8_C( 85), -INT8_C( 101), INT8_C( 92), INT8_C( 86), -INT8_C( 121), -INT8_C( 103), INT8_C( 119), -INT8_C( 16), INT8_C( 103), INT8_C( 36), INT8_C( 76), -INT8_C( 27), INT8_C( 0), INT8_C( 99), INT8_C( 54), INT8_C( 103), -INT8_C( 3), INT8_C( 46), INT8_C( 81), -INT8_C( 102), INT8_C( 108), -INT8_C( 66), -INT8_C( 118) }, { INT8_C( 54), INT8_C( 26), INT8_C( 116), INT8_C( 61), INT8_C( 0), -INT8_C( 84), INT8_C( 105), INT8_C( 26), INT8_C( 70), -INT8_C( 82), -INT8_C( 76), INT8_C( 74), INT8_C( 123), -INT8_C( 36), -INT8_C( 127), -INT8_C( 101), INT8_C( 122), -INT8_C( 3), INT8_C( 103), INT8_C( 82), -INT8_C( 101), INT8_C( 86), INT8_C( 99), -INT8_C( 69), INT8_C( 18), INT8_C( 29), -INT8_C( 69), -INT8_C( 22), INT8_C( 26), -INT8_C( 36), -INT8_C( 3), -INT8_C( 67), -INT8_C( 124), -INT8_C( 118), INT8_C( 61), -INT8_C( 101), -INT8_C( 124), -INT8_C( 29), -INT8_C( 46), INT8_C( 14), -INT8_C( 126), -INT8_C( 2), INT8_C( 108), -INT8_C( 75), -INT8_C( 74), -INT8_C( 60), INT8_C( 122), INT8_C( 0), INT8_C( 54), -INT8_C( 29), INT8_C( 108), -INT8_C( 24), INT8_C( 73), INT8_C( 76), -INT8_C( 46), INT8_C( 74), INT8_C( 0), -INT8_C( 51), -INT8_C( 32), INT8_C( 15), INT8_C( 122), INT8_C( 93), INT8_C( 93), -INT8_C( 32) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i idx = simde_mm512_loadu_epi8(test_vec[i].idx); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_permutex2var_epi8(a, idx, b); simde_test_x86_assert_equal_i8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i a = simde_test_x86_random_i8x64(); simde__m512i idx = simde_test_x86_random_i8x64(); simde__m512i b = simde_test_x86_random_i8x64(); simde__m512i r = simde_mm512_permutex2var_epi8(a, idx, b); simde_test_x86_write_i8x64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x64(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_mask_permutex2var_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t a[64]; const simde__mmask64 k; const int8_t idx[64]; const int8_t b[64]; const int8_t r[64]; } test_vec[] = { { { INT8_C( 82), INT8_C( 85), INT8_C( 49), INT8_C( 4), -INT8_C( 40), INT8_C( 25), INT8_C( 35), -INT8_C( 64), -INT8_C( 90), INT8_C( 61), INT8_C( 110), INT8_C( 91), INT8_C( 116), INT8_C( 61), INT8_C( 86), INT8_C( 91), -INT8_C( 53), INT8_C( 122), -INT8_C( 75), -INT8_C( 28), INT8_C( 85), -INT8_C( 102), -INT8_C( 83), -INT8_C( 28), -INT8_C( 21), INT8_C( 105), INT8_C( 58), -INT8_C( 118), INT8_C( 34), -INT8_C( 34), INT8_C( 13), INT8_C( 116), INT8_C( 51), INT8_C( 62), INT8_C( 120), INT8_C( 12), INT8_C( 87), -INT8_C( 101), -INT8_C( 52), -INT8_C( 2), -INT8_C( 40), INT8_C( 59), INT8_C( 89), INT8_C( 76), INT8_C( 120), -INT8_C( 80), -INT8_C( 89), INT8_C( 67), INT8_C( 42), INT8_C( 93), INT8_C( 39), INT8_MAX, -INT8_C( 9), -INT8_C( 44), INT8_C( 99), -INT8_C( 29), INT8_C( 61), -INT8_C( 98), INT8_C( 109), INT8_C( 95), INT8_C( 124), INT8_C( 122), -INT8_C( 45), -INT8_C( 80) }, UINT64_C(13839149230955711673), { -INT8_C( 61), INT8_C( 104), INT8_C( 12), INT8_C( 59), INT8_C( 24), -INT8_C( 76), INT8_C( 126), INT8_C( 66), INT8_C( 17), -INT8_C( 91), -INT8_C( 63), INT8_C( 8), INT8_C( 121), INT8_C( 37), -INT8_C( 21), -INT8_C( 74), -INT8_C( 61), INT8_C( 89), INT8_C( 21), INT8_C( 63), -INT8_C( 45), -INT8_C( 23), -INT8_C( 17), -INT8_C( 116), INT8_C( 53), -INT8_C( 85), -INT8_C( 99), INT8_C( 28), INT8_C( 52), -INT8_C( 85), -INT8_C( 36), -INT8_C( 9), INT8_C( 19), -INT8_C( 23), INT8_C( 51), INT8_C( 43), -INT8_C( 99), -INT8_C( 79), INT8_C( 109), -INT8_C( 82), INT8_C( 87), INT8_C( 47), -INT8_C( 74), -INT8_C( 48), INT8_C( 84), -INT8_C( 94), -INT8_C( 121), INT8_C( 23), -INT8_C( 5), -INT8_C( 100), INT8_C( 86), -INT8_C( 50), -INT8_C( 123), INT8_C( 70), INT8_C( 91), -INT8_C( 70), -INT8_C( 15), -INT8_C( 8), -INT8_C( 41), INT8_C( 37), -INT8_C( 93), -INT8_C( 77), INT8_C( 29), -INT8_C( 73) }, { -INT8_C( 100), INT8_C( 80), -INT8_C( 30), INT8_C( 57), INT8_C( 1), INT8_C( 80), -INT8_C( 25), INT8_C( 88), INT8_MAX, -INT8_C( 98), INT8_C( 41), -INT8_C( 45), INT8_C( 64), -INT8_C( 80), -INT8_C( 22), INT8_C( 59), INT8_C( 76), INT8_C( 64), INT8_C( 9), -INT8_C( 46), -INT8_C( 122), INT8_C( 100), -INT8_C( 116), INT8_C( 120), INT8_C( 92), INT8_C( 99), -INT8_C( 99), INT8_C( 0), INT8_C( 23), -INT8_C( 70), -INT8_C( 73), -INT8_C( 77), INT8_C( 10), -INT8_C( 103), -INT8_C( 19), INT8_C( 12), -INT8_C( 23), -INT8_C( 44), INT8_C( 100), INT8_C( 104), INT8_C( 114), -INT8_C( 115), INT8_C( 59), -INT8_C( 78), INT8_C( 61), INT8_C( 37), -INT8_C( 19), -INT8_C( 118), INT8_C( 102), -INT8_C( 9), INT8_C( 92), -INT8_C( 20), INT8_C( 91), -INT8_C( 24), INT8_C( 100), -INT8_C( 72), INT8_C( 76), INT8_C( 2), -INT8_C( 72), INT8_C( 99), -INT8_C( 68), INT8_C( 111), INT8_C( 22), -INT8_C( 57) }, { INT8_C( 57), INT8_C( 85), INT8_C( 49), INT8_C( 95), -INT8_C( 21), -INT8_C( 9), INT8_C( 35), -INT8_C( 30), -INT8_C( 90), INT8_C( 61), INT8_C( 80), -INT8_C( 90), INT8_C( 116), INT8_C( 61), -INT8_C( 78), INT8_C( 91), -INT8_C( 53), INT8_C( 122), -INT8_C( 102), -INT8_C( 80), -INT8_C( 46), -INT8_C( 115), -INT8_C( 83), INT8_C( 116), -INT8_C( 21), INT8_C( 105), INT8_C( 58), -INT8_C( 118), -INT8_C( 9), -INT8_C( 34), INT8_C( 13), INT8_C( 116), -INT8_C( 28), -INT8_C( 115), INT8_MAX, INT8_C( 12), INT8_C( 87), INT8_C( 93), INT8_C( 37), -INT8_C( 89), -INT8_C( 40), INT8_C( 59), INT8_C( 89), INT8_C( 76), INT8_C( 120), -INT8_C( 80), -INT8_C( 89), -INT8_C( 28), INT8_C( 42), INT8_C( 34), -INT8_C( 116), -INT8_C( 22), -INT8_C( 9), -INT8_C( 44), INT8_C( 99), -INT8_C( 29), INT8_C( 61), -INT8_C( 98), INT8_C( 109), INT8_C( 95), INT8_C( 124), INT8_C( 122), -INT8_C( 34), -INT8_C( 29) } }, { { INT8_C( 8), INT8_C( 3), -INT8_C( 45), -INT8_C( 14), -INT8_C( 40), INT8_C( 55), INT8_C( 90), INT8_C( 74), -INT8_C( 59), -INT8_C( 106), -INT8_C( 3), INT8_C( 2), -INT8_C( 69), -INT8_C( 22), -INT8_C( 116), INT8_C( 33), -INT8_C( 31), -INT8_C( 24), INT8_C( 14), INT8_C( 61), -INT8_C( 47), INT8_C( 114), -INT8_C( 11), INT8_C( 29), INT8_C( 116), -INT8_C( 83), INT8_MIN, INT8_C( 49), INT8_C( 28), -INT8_C( 106), -INT8_C( 8), INT8_C( 36), -INT8_C( 102), -INT8_C( 53), INT8_C( 22), INT8_C( 114), INT8_C( 2), INT8_C( 113), -INT8_C( 68), -INT8_C( 57), INT8_C( 7), -INT8_C( 71), -INT8_C( 54), -INT8_C( 62), -INT8_C( 92), INT8_C( 86), -INT8_C( 28), -INT8_C( 123), INT8_C( 63), -INT8_C( 14), -INT8_C( 62), INT8_C( 16), INT8_C( 100), -INT8_C( 73), INT8_C( 45), -INT8_C( 39), INT8_C( 100), -INT8_C( 83), INT8_C( 10), INT8_MIN, INT8_C( 67), INT8_C( 2), -INT8_C( 91), -INT8_C( 35) }, UINT64_C( 3717453404013837261), { -INT8_C( 59), INT8_C( 97), -INT8_C( 10), INT8_C( 105), -INT8_C( 73), -INT8_C( 38), -INT8_C( 17), -INT8_C( 10), -INT8_C( 52), -INT8_C( 79), INT8_C( 6), INT8_C( 48), INT8_C( 105), INT8_C( 51), INT8_C( 9), -INT8_C( 51), -INT8_C( 32), INT8_C( 19), INT8_C( 78), INT8_C( 36), INT8_C( 21), -INT8_C( 13), INT8_C( 1), -INT8_C( 30), -INT8_C( 82), INT8_C( 81), -INT8_C( 78), -INT8_C( 37), INT8_C( 93), INT8_C( 73), INT8_C( 14), INT8_C( 34), -INT8_C( 86), INT8_C( 4), -INT8_C( 116), INT8_C( 97), -INT8_C( 34), INT8_C( 123), INT8_C( 88), -INT8_C( 86), INT8_C( 44), INT8_C( 94), -INT8_C( 37), -INT8_C( 107), -INT8_C( 110), -INT8_C( 28), INT8_C( 99), INT8_C( 114), -INT8_C( 8), -INT8_C( 79), -INT8_C( 106), INT8_C( 13), -INT8_C( 92), -INT8_C( 104), -INT8_C( 16), INT8_C( 82), -INT8_C( 23), -INT8_C( 94), INT8_C( 45), INT8_C( 70), -INT8_C( 21), INT8_C( 60), INT8_C( 104), -INT8_C( 107) }, { INT8_C( 64), -INT8_C( 12), -INT8_C( 10), INT8_C( 31), INT8_C( 111), INT8_C( 78), -INT8_C( 55), -INT8_C( 100), -INT8_C( 83), -INT8_C( 92), INT8_C( 49), INT8_C( 63), -INT8_C( 119), -INT8_C( 108), -INT8_C( 79), -INT8_C( 127), INT8_C( 69), INT8_C( 72), -INT8_C( 114), -INT8_C( 23), -INT8_C( 32), INT8_C( 126), INT8_C( 60), -INT8_C( 55), INT8_C( 32), INT8_C( 105), INT8_C( 15), INT8_C( 11), -INT8_C( 91), INT8_C( 119), -INT8_C( 96), -INT8_C( 26), INT8_C( 108), -INT8_C( 105), INT8_C( 5), -INT8_C( 37), -INT8_C( 27), -INT8_C( 50), INT8_C( 119), -INT8_C( 110), INT8_C( 115), -INT8_C( 87), -INT8_C( 47), -INT8_C( 4), INT8_C( 61), -INT8_C( 125), INT8_C( 125), -INT8_C( 125), -INT8_C( 53), INT8_C( 11), INT8_C( 108), -INT8_C( 85), -INT8_C( 118), -INT8_C( 88), INT8_C( 116), -INT8_C( 86), INT8_C( 18), -INT8_C( 125), -INT8_C( 74), -INT8_C( 73), -INT8_C( 6), INT8_C( 86), -INT8_C( 99), INT8_C( 102) }, { INT8_C( 78), INT8_C( 3), INT8_C( 116), -INT8_C( 87), -INT8_C( 40), INT8_C( 55), -INT8_C( 125), INT8_C( 116), -INT8_C( 119), -INT8_C( 14), -INT8_C( 3), INT8_C( 63), -INT8_C( 87), INT8_C( 16), -INT8_C( 116), -INT8_C( 108), INT8_C( 108), INT8_C( 61), -INT8_C( 79), INT8_C( 2), -INT8_C( 47), INT8_C( 114), INT8_C( 3), INT8_C( 29), -INT8_C( 28), INT8_C( 72), -INT8_C( 62), INT8_C( 11), INT8_C( 28), -INT8_C( 106), -INT8_C( 116), INT8_C( 22), -INT8_C( 102), -INT8_C( 53), -INT8_C( 69), -INT8_C( 105), INT8_C( 2), -INT8_C( 73), -INT8_C( 68), -INT8_C( 57), INT8_C( 7), -INT8_C( 71), INT8_C( 11), INT8_C( 114), -INT8_C( 92), INT8_C( 86), -INT8_C( 28), -INT8_C( 123), INT8_C( 18), -INT8_C( 14), -INT8_C( 11), INT8_C( 16), INT8_C( 2), -INT8_C( 73), INT8_C( 45), -INT8_C( 114), -INT8_C( 87), INT8_C( 22), INT8_C( 10), INT8_MIN, -INT8_C( 4), INT8_C( 67), -INT8_C( 91), -INT8_C( 35) } }, { { -INT8_C( 19), -INT8_C( 94), INT8_C( 66), -INT8_C( 45), INT8_C( 113), -INT8_C( 71), INT8_C( 101), -INT8_C( 28), INT8_C( 98), INT8_C( 55), -INT8_C( 32), -INT8_C( 96), -INT8_C( 70), INT8_C( 93), INT8_C( 35), -INT8_C( 123), INT8_C( 104), -INT8_C( 113), INT8_C( 48), -INT8_C( 14), INT8_C( 56), -INT8_C( 92), -INT8_C( 99), INT8_C( 74), INT8_C( 39), INT8_C( 83), INT8_C( 1), INT8_C( 33), -INT8_C( 87), -INT8_C( 97), -INT8_C( 120), -INT8_C( 105), INT8_C( 65), -INT8_C( 54), INT8_C( 106), -INT8_C( 78), -INT8_C( 125), -INT8_C( 49), -INT8_C( 106), -INT8_C( 26), INT8_C( 6), INT8_C( 118), -INT8_C( 122), -INT8_C( 64), -INT8_C( 45), -INT8_C( 87), INT8_C( 69), INT8_C( 60), INT8_C( 56), INT8_C( 117), INT8_C( 46), INT8_C( 112), INT8_C( 25), -INT8_C( 53), -INT8_C( 70), INT8_C( 64), INT8_C( 30), -INT8_C( 68), INT8_C( 98), -INT8_C( 56), INT8_C( 91), -INT8_C( 22), INT8_C( 95), -INT8_C( 100) }, UINT64_C(11465572668280916404), { INT8_C( 92), -INT8_C( 93), INT8_C( 95), INT8_C( 47), INT8_C( 76), -INT8_C( 91), INT8_C( 107), -INT8_C( 123), INT8_C( 26), -INT8_C( 102), -INT8_C( 11), INT8_C( 52), INT8_C( 101), -INT8_C( 80), INT8_C( 116), -INT8_C( 124), INT8_C( 108), -INT8_C( 42), INT8_C( 76), -INT8_C( 57), -INT8_C( 64), -INT8_C( 85), INT8_C( 99), INT8_C( 116), INT8_C( 116), -INT8_C( 78), -INT8_C( 84), INT8_C( 12), -INT8_C( 104), -INT8_C( 55), -INT8_C( 85), -INT8_C( 12), INT8_C( 109), INT8_C( 11), INT8_C( 35), -INT8_C( 71), -INT8_C( 80), -INT8_C( 113), INT8_C( 62), -INT8_C( 54), INT8_C( 41), INT8_C( 52), -INT8_C( 2), -INT8_C( 114), -INT8_C( 28), INT8_C( 115), INT8_C( 18), INT8_C( 80), INT8_C( 73), INT8_C( 94), INT8_C( 23), INT8_C( 10), INT8_C( 9), INT8_C( 122), INT8_C( 126), INT8_C( 125), INT8_C( 45), INT8_C( 42), -INT8_C( 118), -INT8_C( 59), -INT8_C( 12), INT8_C( 53), -INT8_C( 71), INT8_C( 97) }, { INT8_C( 64), -INT8_C( 36), INT8_C( 26), -INT8_C( 16), INT8_C( 107), INT8_C( 89), -INT8_C( 69), -INT8_C( 108), -INT8_C( 115), -INT8_C( 71), INT8_C( 35), INT8_C( 113), INT8_C( 44), INT8_C( 53), -INT8_C( 63), INT8_C( 118), -INT8_C( 108), -INT8_C( 40), INT8_MIN, -INT8_C( 99), INT8_C( 82), -INT8_C( 2), INT8_C( 27), INT8_MAX, INT8_C( 41), -INT8_C( 91), INT8_C( 68), INT8_C( 29), -INT8_C( 38), -INT8_C( 3), INT8_C( 126), INT8_C( 27), -INT8_C( 38), -INT8_C( 104), INT8_C( 11), INT8_C( 69), -INT8_C( 15), -INT8_C( 58), -INT8_C( 38), INT8_C( 126), INT8_MIN, -INT8_C( 3), -INT8_C( 17), -INT8_C( 84), INT8_C( 50), -INT8_C( 80), INT8_C( 34), -INT8_C( 58), -INT8_C( 120), -INT8_C( 94), INT8_C( 100), -INT8_C( 37), -INT8_C( 95), INT8_MAX, INT8_C( 90), -INT8_C( 54), INT8_C( 36), -INT8_C( 97), -INT8_C( 25), -INT8_C( 2), -INT8_C( 100), INT8_C( 101), INT8_C( 25), INT8_C( 118) }, { -INT8_C( 19), -INT8_C( 94), INT8_C( 27), -INT8_C( 45), INT8_C( 44), -INT8_C( 49), INT8_C( 101), -INT8_C( 71), INT8_C( 1), INT8_C( 55), -INT8_C( 32), INT8_C( 25), -INT8_C( 70), INT8_C( 93), -INT8_C( 95), INT8_C( 113), INT8_C( 50), INT8_C( 27), INT8_C( 44), -INT8_C( 108), INT8_C( 56), -INT8_C( 92), INT8_C( 69), INT8_C( 74), -INT8_C( 95), INT8_C( 46), -INT8_C( 45), INT8_C( 33), INT8_C( 39), -INT8_C( 71), -INT8_C( 120), -INT8_C( 105), INT8_C( 65), -INT8_C( 54), INT8_C( 106), -INT8_C( 68), INT8_C( 56), -INT8_C( 49), -INT8_C( 106), INT8_C( 35), INT8_C( 118), INT8_C( 118), INT8_C( 25), -INT8_C( 64), -INT8_C( 45), -INT8_C( 37), INT8_C( 48), -INT8_C( 108), -INT8_C( 71), INT8_C( 117), INT8_C( 74), -INT8_C( 32), INT8_C( 55), -INT8_C( 53), -INT8_C( 70), INT8_C( 64), -INT8_C( 87), -INT8_C( 122), -INT8_C( 32), INT8_C( 89), -INT8_C( 95), -INT8_C( 22), INT8_C( 95), -INT8_C( 104) } }, { { -INT8_C( 3), INT8_C( 37), -INT8_C( 68), -INT8_C( 17), -INT8_C( 21), -INT8_C( 106), INT8_C( 109), INT8_C( 107), -INT8_C( 109), INT8_C( 93), INT8_C( 24), -INT8_C( 59), INT8_C( 13), INT8_C( 58), -INT8_C( 116), -INT8_C( 106), -INT8_C( 35), -INT8_C( 16), INT8_C( 113), INT8_C( 126), INT8_C( 111), -INT8_C( 53), INT8_C( 72), -INT8_C( 109), INT8_C( 106), INT8_C( 47), -INT8_C( 111), INT8_C( 7), -INT8_C( 108), -INT8_C( 85), INT8_C( 125), -INT8_C( 111), -INT8_C( 48), INT8_C( 57), INT8_MIN, -INT8_C( 69), -INT8_C( 49), -INT8_C( 18), INT8_C( 39), INT8_C( 98), INT8_C( 75), INT8_C( 63), INT8_C( 40), INT8_C( 88), INT8_C( 121), -INT8_C( 76), -INT8_C( 18), INT8_C( 86), -INT8_C( 92), INT8_C( 95), -INT8_C( 44), INT8_C( 19), INT8_C( 43), INT8_C( 28), -INT8_C( 90), -INT8_C( 107), INT8_C( 75), INT8_C( 55), -INT8_C( 100), -INT8_C( 33), -INT8_C( 30), INT8_C( 26), INT8_C( 113), -INT8_C( 78) }, UINT64_C( 3064019907100209491), { -INT8_C( 44), -INT8_C( 83), -INT8_C( 125), INT8_C( 77), INT8_C( 97), INT8_C( 113), -INT8_C( 92), INT8_C( 5), -INT8_C( 47), INT8_C( 120), INT8_C( 24), -INT8_C( 4), -INT8_C( 107), -INT8_C( 66), -INT8_C( 111), -INT8_C( 32), -INT8_C( 10), INT8_C( 46), -INT8_C( 64), -INT8_C( 40), INT8_C( 72), INT8_C( 49), -INT8_C( 117), -INT8_C( 101), INT8_C( 34), -INT8_C( 7), -INT8_C( 66), INT8_C( 2), -INT8_C( 114), INT8_C( 68), INT8_C( 44), INT8_C( 98), -INT8_C( 15), -INT8_C( 81), -INT8_C( 81), INT8_C( 83), INT8_C( 33), INT8_C( 83), INT8_C( 88), -INT8_C( 14), -INT8_C( 52), INT8_C( 113), -INT8_C( 18), INT8_C( 97), INT8_C( 47), INT8_MAX, INT8_C( 65), INT8_C( 37), -INT8_C( 83), INT8_C( 1), -INT8_C( 2), -INT8_C( 11), INT8_C( 50), -INT8_C( 119), -INT8_C( 111), INT8_C( 85), -INT8_C( 126), INT8_C( 79), INT8_C( 87), INT8_C( 16), -INT8_C( 109), -INT8_C( 125), INT8_C( 114), -INT8_C( 123) }, { INT8_C( 51), INT8_C( 33), -INT8_C( 40), INT8_C( 84), INT8_C( 117), INT8_C( 48), INT8_C( 70), INT8_C( 65), -INT8_C( 95), INT8_C( 52), -INT8_C( 94), -INT8_C( 47), -INT8_C( 77), -INT8_C( 29), -INT8_C( 10), INT8_C( 97), -INT8_C( 27), -INT8_C( 12), INT8_C( 86), INT8_C( 23), INT8_C( 125), -INT8_C( 25), INT8_C( 108), -INT8_C( 1), INT8_C( 55), -INT8_C( 61), INT8_C( 15), -INT8_C( 54), INT8_C( 71), -INT8_C( 127), INT8_C( 79), INT8_C( 122), -INT8_C( 93), INT8_C( 39), -INT8_C( 50), INT8_C( 24), INT8_C( 88), INT8_C( 20), INT8_C( 89), -INT8_C( 7), INT8_C( 72), -INT8_C( 5), -INT8_C( 54), -INT8_C( 5), -INT8_C( 34), -INT8_C( 63), INT8_C( 92), -INT8_C( 61), -INT8_C( 75), -INT8_C( 77), -INT8_C( 37), INT8_C( 51), -INT8_C( 102), INT8_C( 71), INT8_C( 50), -INT8_C( 47), INT8_C( 11), INT8_C( 66), -INT8_C( 100), INT8_C( 82), -INT8_C( 61), -INT8_C( 21), -INT8_C( 52), INT8_C( 102) }, { INT8_C( 125), -INT8_C( 76), -INT8_C( 68), -INT8_C( 17), INT8_C( 39), -INT8_C( 106), -INT8_C( 49), INT8_C( 107), -INT8_C( 12), INT8_C( 93), INT8_C( 24), -INT8_C( 59), -INT8_C( 53), INT8_C( 113), -INT8_C( 16), -INT8_C( 93), -INT8_C( 35), -INT8_C( 18), INT8_C( 51), INT8_C( 55), INT8_C( 111), INT8_C( 95), -INT8_C( 59), -INT8_C( 109), INT8_MIN, INT8_C( 66), -INT8_C( 111), INT8_C( 7), -INT8_C( 108), INT8_C( 117), INT8_C( 125), -INT8_C( 111), -INT8_C( 77), INT8_C( 86), INT8_C( 86), INT8_C( 23), INT8_C( 57), -INT8_C( 18), INT8_C( 55), -INT8_C( 37), -INT8_C( 77), INT8_C( 63), INT8_C( 92), INT8_C( 88), INT8_C( 86), -INT8_C( 76), -INT8_C( 18), -INT8_C( 18), -INT8_C( 76), INT8_C( 95), -INT8_C( 52), INT8_C( 19), INT8_C( 43), INT8_C( 28), -INT8_C( 90), -INT8_C( 25), INT8_C( 75), INT8_C( 97), -INT8_C( 100), -INT8_C( 35), -INT8_C( 30), -INT8_C( 17), INT8_C( 113), -INT8_C( 78) } }, { { INT8_C( 19), -INT8_C( 102), INT8_C( 126), INT8_C( 107), -INT8_C( 82), -INT8_C( 41), INT8_C( 100), -INT8_C( 10), -INT8_C( 46), INT8_C( 47), -INT8_C( 15), -INT8_C( 79), -INT8_C( 16), INT8_C( 78), INT8_C( 116), -INT8_C( 91), INT8_C( 1), INT8_C( 79), -INT8_C( 40), -INT8_C( 101), -INT8_C( 105), INT8_C( 11), INT8_C( 109), -INT8_C( 94), INT8_C( 77), INT8_C( 9), -INT8_C( 12), INT8_C( 16), -INT8_C( 12), -INT8_C( 64), INT8_C( 119), INT8_C( 7), INT8_C( 90), -INT8_C( 11), INT8_C( 114), INT8_C( 8), -INT8_C( 51), -INT8_C( 41), -INT8_C( 2), -INT8_C( 97), INT8_C( 6), -INT8_C( 17), INT8_C( 80), -INT8_C( 10), INT8_C( 61), -INT8_C( 59), -INT8_C( 101), INT8_C( 62), INT8_C( 20), INT8_C( 116), -INT8_C( 38), -INT8_C( 85), INT8_MAX, INT8_C( 71), INT8_C( 77), -INT8_C( 52), INT8_C( 80), INT8_C( 65), -INT8_C( 36), INT8_C( 68), INT8_C( 1), INT8_C( 83), INT8_C( 76), INT8_C( 91) }, UINT64_C(11219981339617115721), { INT8_C( 81), INT8_C( 6), -INT8_C( 111), -INT8_C( 114), -INT8_C( 53), INT8_C( 45), -INT8_C( 51), -INT8_C( 33), -INT8_C( 95), -INT8_C( 89), -INT8_C( 117), INT8_C( 32), -INT8_C( 18), -INT8_C( 40), -INT8_C( 20), INT8_C( 62), INT8_C( 26), -INT8_C( 56), -INT8_C( 126), INT8_C( 27), INT8_C( 28), -INT8_C( 50), INT8_C( 119), INT8_C( 101), -INT8_C( 115), -INT8_C( 38), INT8_C( 123), INT8_C( 34), INT8_C( 60), INT8_C( 48), -INT8_C( 66), -INT8_C( 115), INT8_C( 54), INT8_C( 79), INT8_C( 27), INT8_C( 1), INT8_C( 124), -INT8_C( 24), -INT8_C( 31), INT8_C( 29), -INT8_C( 113), INT8_C( 108), INT8_C( 61), INT8_C( 125), INT8_C( 68), INT8_C( 41), -INT8_C( 69), INT8_C( 94), -INT8_C( 14), INT8_C( 62), INT8_C( 122), INT8_C( 14), INT8_C( 12), -INT8_C( 15), INT8_C( 115), -INT8_C( 103), -INT8_C( 53), -INT8_C( 18), -INT8_C( 68), INT8_C( 7), INT8_C( 30), INT8_C( 122), -INT8_C( 108), INT8_C( 85) }, { -INT8_C( 55), -INT8_C( 80), INT8_C( 86), INT8_C( 70), -INT8_C( 104), INT8_C( 55), INT8_C( 99), INT8_C( 40), -INT8_C( 93), -INT8_C( 95), -INT8_C( 91), -INT8_C( 24), -INT8_C( 54), INT8_C( 97), INT8_C( 70), -INT8_C( 68), -INT8_C( 97), -INT8_C( 64), -INT8_C( 54), -INT8_C( 85), -INT8_C( 79), INT8_C( 61), INT8_C( 69), INT8_C( 125), INT8_C( 43), INT8_C( 1), -INT8_C( 124), INT8_C( 74), INT8_C( 123), INT8_C( 25), -INT8_C( 97), INT8_C( 68), -INT8_C( 55), -INT8_C( 11), -INT8_C( 118), INT8_C( 97), INT8_C( 45), -INT8_C( 18), -INT8_C( 119), -INT8_C( 48), -INT8_C( 113), INT8_C( 47), -INT8_C( 72), INT8_C( 89), -INT8_C( 112), -INT8_C( 1), INT8_C( 22), INT8_C( 47), -INT8_C( 65), -INT8_C( 32), -INT8_C( 38), INT8_C( 113), INT8_C( 30), INT8_C( 31), -INT8_C( 18), INT8_C( 73), INT8_C( 32), INT8_C( 114), -INT8_C( 109), -INT8_C( 101), -INT8_C( 117), INT8_C( 50), -INT8_C( 32), INT8_C( 84) }, { -INT8_C( 64), -INT8_C( 102), INT8_C( 126), INT8_C( 116), -INT8_C( 82), -INT8_C( 41), INT8_C( 97), -INT8_C( 10), -INT8_C( 46), -INT8_C( 97), -INT8_C( 79), INT8_C( 90), INT8_C( 22), INT8_C( 43), INT8_C( 116), INT8_C( 76), -INT8_C( 12), -INT8_C( 93), -INT8_C( 40), -INT8_C( 101), -INT8_C( 105), INT8_C( 70), INT8_C( 73), -INT8_C( 94), INT8_C( 77), -INT8_C( 124), -INT8_C( 101), INT8_C( 16), INT8_C( 1), -INT8_C( 64), INT8_C( 119), INT8_C( 7), INT8_C( 77), -INT8_C( 11), INT8_C( 16), INT8_C( 8), -INT8_C( 117), -INT8_C( 41), -INT8_C( 2), -INT8_C( 64), -INT8_C( 91), -INT8_C( 17), INT8_C( 80), -INT8_C( 10), INT8_C( 61), -INT8_C( 17), INT8_C( 68), INT8_C( 62), -INT8_C( 38), INT8_C( 116), -INT8_C( 109), -INT8_C( 85), -INT8_C( 16), -INT8_C( 32), INT8_C( 77), INT8_C( 9), -INT8_C( 24), INT8_C( 22), -INT8_C( 36), -INT8_C( 10), INT8_C( 119), INT8_C( 83), INT8_C( 76), INT8_C( 61) } }, { { INT8_C( 40), INT8_C( 106), -INT8_C( 74), INT8_C( 85), INT8_C( 88), INT8_C( 63), INT8_C( 37), -INT8_C( 25), INT8_C( 110), -INT8_C( 34), INT8_C( 65), -INT8_C( 2), -INT8_C( 35), INT8_C( 87), INT8_C( 45), -INT8_C( 100), INT8_C( 55), INT8_C( 8), INT8_C( 13), INT8_C( 85), INT8_C( 39), -INT8_C( 5), -INT8_C( 97), INT8_C( 72), INT8_C( 110), INT8_C( 50), -INT8_C( 29), -INT8_C( 7), INT8_C( 101), -INT8_C( 61), INT8_C( 78), -INT8_C( 115), INT8_C( 46), INT8_C( 4), -INT8_C( 30), -INT8_C( 122), INT8_C( 67), INT8_C( 7), INT8_C( 110), -INT8_C( 78), -INT8_C( 27), -INT8_C( 81), -INT8_C( 80), -INT8_C( 62), INT8_C( 6), -INT8_C( 34), INT8_C( 95), INT8_C( 61), -INT8_C( 26), INT8_C( 108), -INT8_C( 109), INT8_C( 13), INT8_C( 104), INT8_C( 50), INT8_C( 85), -INT8_C( 42), INT8_C( 100), INT8_C( 57), -INT8_C( 49), -INT8_C( 55), -INT8_C( 4), INT8_C( 29), INT8_C( 86), INT8_C( 42) }, UINT64_C( 2672639265312159777), { -INT8_C( 50), -INT8_C( 57), -INT8_C( 24), -INT8_C( 44), -INT8_C( 91), INT8_C( 71), INT8_C( 17), -INT8_C( 117), -INT8_C( 77), -INT8_C( 92), -INT8_C( 103), INT8_C( 27), -INT8_C( 42), -INT8_C( 18), -INT8_C( 15), INT8_C( 59), INT8_C( 39), -INT8_C( 63), INT8_C( 4), INT8_C( 36), -INT8_C( 34), INT8_C( 91), INT8_C( 78), INT8_C( 0), -INT8_C( 109), -INT8_C( 1), INT8_C( 101), -INT8_C( 45), INT8_C( 30), INT8_C( 124), -INT8_C( 7), -INT8_C( 20), INT8_C( 67), -INT8_C( 31), -INT8_C( 64), -INT8_C( 23), INT8_C( 40), -INT8_C( 46), INT8_C( 116), -INT8_C( 37), INT8_C( 118), INT8_C( 13), -INT8_C( 9), INT8_C( 77), -INT8_C( 4), -INT8_C( 24), -INT8_C( 120), INT8_C( 35), -INT8_C( 87), -INT8_C( 116), INT8_C( 71), -INT8_C( 120), -INT8_C( 25), -INT8_C( 106), -INT8_C( 120), INT8_C( 123), -INT8_C( 107), -INT8_C( 19), INT8_C( 78), -INT8_C( 76), INT8_C( 105), INT8_C( 71), -INT8_C( 96), -INT8_C( 84) }, { INT8_C( 40), INT8_C( 97), -INT8_C( 107), INT8_C( 80), INT8_C( 51), INT8_C( 10), INT8_C( 44), -INT8_C( 87), INT8_C( 23), INT8_C( 35), -INT8_C( 10), INT8_C( 19), INT8_C( 11), INT8_C( 126), INT8_C( 55), -INT8_C( 75), INT8_C( 11), INT8_C( 126), INT8_C( 61), -INT8_C( 14), INT8_C( 20), -INT8_C( 59), INT8_C( 109), -INT8_C( 86), -INT8_C( 78), -INT8_C( 68), INT8_C( 94), INT8_C( 27), INT8_C( 3), -INT8_C( 2), -INT8_C( 57), INT8_C( 44), INT8_C( 95), INT8_C( 93), INT8_C( 124), -INT8_C( 110), INT8_C( 103), -INT8_C( 88), INT8_C( 60), INT8_C( 126), -INT8_C( 53), INT8_C( 50), -INT8_C( 110), -INT8_C( 41), -INT8_C( 79), -INT8_C( 55), -INT8_C( 116), -INT8_C( 68), INT8_C( 71), -INT8_C( 55), -INT8_C( 82), INT8_C( 92), -INT8_C( 114), INT8_C( 28), INT8_C( 6), INT8_C( 64), -INT8_C( 40), INT8_C( 100), INT8_C( 91), -INT8_C( 37), INT8_C( 98), INT8_C( 34), INT8_C( 7), -INT8_C( 62) }, { INT8_C( 55), INT8_C( 106), -INT8_C( 74), INT8_C( 85), INT8_C( 88), -INT8_C( 87), INT8_C( 37), -INT8_C( 25), INT8_C( 110), -INT8_C( 34), INT8_C( 65), -INT8_C( 7), INT8_C( 109), -INT8_C( 116), INT8_C( 45), -INT8_C( 100), -INT8_C( 78), INT8_C( 8), INT8_C( 13), INT8_C( 85), -INT8_C( 57), INT8_C( 27), -INT8_C( 97), INT8_C( 40), INT8_C( 85), INT8_C( 50), -INT8_C( 88), -INT8_C( 7), INT8_C( 101), INT8_C( 98), INT8_C( 100), -INT8_C( 115), INT8_C( 46), INT8_C( 4), -INT8_C( 30), -INT8_C( 122), INT8_C( 67), INT8_C( 7), -INT8_C( 114), -INT8_C( 78), INT8_C( 6), INT8_C( 87), INT8_C( 64), INT8_C( 126), INT8_C( 98), -INT8_C( 34), INT8_C( 95), INT8_C( 61), -INT8_C( 81), -INT8_C( 35), -INT8_C( 87), INT8_C( 13), INT8_C( 126), INT8_C( 50), INT8_C( 85), -INT8_C( 42), -INT8_C( 5), INT8_C( 57), INT8_C( 55), -INT8_C( 55), -INT8_C( 4), -INT8_C( 87), INT8_C( 86), INT8_C( 42) } }, { { INT8_MAX, -INT8_C( 124), INT8_C( 84), -INT8_C( 26), INT8_C( 44), -INT8_C( 112), INT8_C( 101), -INT8_C( 8), -INT8_C( 61), -INT8_C( 9), -INT8_C( 49), INT8_C( 116), -INT8_C( 64), INT8_C( 91), INT8_C( 48), INT8_C( 7), INT8_C( 36), -INT8_C( 34), INT8_C( 99), -INT8_C( 78), -INT8_C( 6), INT8_C( 105), -INT8_C( 14), -INT8_C( 46), -INT8_C( 51), INT8_C( 77), -INT8_C( 82), INT8_C( 48), INT8_C( 111), -INT8_C( 75), -INT8_C( 14), -INT8_C( 17), INT8_C( 57), INT8_C( 70), -INT8_C( 43), INT8_C( 102), -INT8_C( 41), INT8_C( 58), INT8_C( 94), -INT8_C( 102), INT8_C( 49), INT8_C( 45), INT8_C( 14), -INT8_C( 15), -INT8_C( 120), INT8_C( 62), -INT8_C( 7), -INT8_C( 84), INT8_C( 28), INT8_C( 92), INT8_C( 94), INT8_C( 23), -INT8_C( 58), INT8_C( 80), -INT8_C( 23), -INT8_C( 109), -INT8_C( 99), -INT8_C( 105), -INT8_C( 61), INT8_C( 12), INT8_C( 77), -INT8_C( 75), -INT8_C( 5), -INT8_C( 122) }, UINT64_C( 4426275423435674108), { INT8_C( 119), INT8_C( 123), INT8_C( 46), -INT8_C( 1), -INT8_C( 71), INT8_C( 39), -INT8_C( 85), -INT8_C( 43), -INT8_C( 124), INT8_C( 9), -INT8_C( 20), INT8_C( 74), INT8_C( 89), -INT8_C( 42), -INT8_C( 35), -INT8_C( 10), INT8_C( 109), -INT8_C( 95), INT8_C( 3), -INT8_C( 70), INT8_C( 86), -INT8_C( 2), INT8_C( 65), INT8_C( 82), -INT8_C( 49), INT8_C( 45), INT8_C( 37), -INT8_C( 37), INT8_C( 120), -INT8_C( 110), INT8_C( 24), -INT8_C( 17), INT8_C( 13), INT8_C( 70), -INT8_C( 17), -INT8_C( 58), INT8_C( 110), -INT8_C( 102), -INT8_C( 100), -INT8_C( 14), -INT8_C( 92), -INT8_C( 120), INT8_C( 60), -INT8_C( 3), INT8_C( 94), INT8_C( 25), -INT8_C( 12), -INT8_C( 52), -INT8_C( 70), -INT8_C( 9), -INT8_C( 122), INT8_C( 17), -INT8_C( 11), -INT8_C( 57), INT8_C( 99), -INT8_C( 59), -INT8_C( 11), -INT8_C( 119), -INT8_C( 96), INT8_C( 109), INT8_C( 27), -INT8_C( 72), INT8_C( 92), INT8_C( 41) }, { -INT8_C( 2), INT8_C( 75), -INT8_C( 17), INT8_C( 108), -INT8_C( 26), -INT8_C( 117), INT8_C( 94), -INT8_C( 118), INT8_C( 20), -INT8_C( 102), -INT8_C( 121), INT8_C( 114), -INT8_C( 76), INT8_C( 123), INT8_C( 62), INT8_C( 110), INT8_C( 114), -INT8_C( 59), INT8_MAX, INT8_C( 104), -INT8_C( 116), -INT8_C( 29), INT8_C( 45), -INT8_C( 127), INT8_C( 108), -INT8_C( 51), -INT8_C( 18), -INT8_C( 121), -INT8_C( 123), INT8_C( 75), -INT8_C( 80), -INT8_C( 125), -INT8_C( 106), -INT8_C( 96), -INT8_C( 16), INT8_C( 124), INT8_C( 43), INT8_C( 78), INT8_C( 6), INT8_C( 63), -INT8_C( 23), -INT8_C( 114), -INT8_C( 78), -INT8_C( 99), INT8_C( 9), -INT8_C( 16), INT8_C( 11), INT8_C( 124), -INT8_C( 75), -INT8_C( 117), -INT8_C( 28), INT8_C( 66), INT8_C( 110), INT8_C( 17), -INT8_C( 61), -INT8_C( 38), -INT8_C( 34), -INT8_C( 78), INT8_C( 97), INT8_C( 99), -INT8_C( 3), INT8_C( 18), -INT8_C( 26), -INT8_C( 109) }, { INT8_MAX, -INT8_C( 124), -INT8_C( 7), -INT8_C( 109), -INT8_C( 105), -INT8_C( 102), -INT8_C( 15), -INT8_C( 29), INT8_C( 44), -INT8_C( 9), -INT8_C( 49), INT8_C( 116), -INT8_C( 51), INT8_C( 91), INT8_C( 75), -INT8_C( 61), INT8_C( 36), -INT8_C( 34), -INT8_C( 26), -INT8_C( 61), -INT8_C( 6), -INT8_C( 26), INT8_C( 75), INT8_MAX, INT8_C( 110), INT8_C( 62), -INT8_C( 82), INT8_C( 48), -INT8_C( 34), -INT8_C( 75), -INT8_C( 51), INT8_C( 124), INT8_C( 91), INT8_C( 94), -INT8_C( 43), INT8_C( 94), -INT8_C( 41), INT8_C( 58), INT8_C( 94), -INT8_C( 102), INT8_C( 49), -INT8_C( 61), INT8_C( 14), INT8_C( 18), -INT8_C( 120), INT8_C( 62), INT8_C( 110), -INT8_C( 84), -INT8_C( 61), INT8_C( 92), INT8_C( 101), -INT8_C( 34), -INT8_C( 58), -INT8_C( 118), INT8_C( 124), -INT8_C( 109), INT8_C( 17), -INT8_C( 105), INT8_C( 57), -INT8_C( 16), INT8_C( 48), -INT8_C( 99), -INT8_C( 5), -INT8_C( 122) } }, { { -INT8_C( 78), -INT8_C( 42), INT8_C( 16), -INT8_C( 35), INT8_C( 37), INT8_C( 22), INT8_C( 29), INT8_C( 14), -INT8_C( 92), -INT8_C( 49), -INT8_C( 85), -INT8_C( 82), -INT8_C( 65), -INT8_C( 74), INT8_C( 42), INT8_C( 117), INT8_C( 65), INT8_C( 14), -INT8_C( 73), -INT8_C( 81), INT8_C( 31), INT8_C( 122), -INT8_C( 119), -INT8_C( 3), INT8_C( 44), -INT8_C( 21), INT8_C( 96), INT8_C( 41), -INT8_C( 3), INT8_C( 70), -INT8_C( 67), -INT8_C( 81), INT8_C( 29), -INT8_C( 51), -INT8_C( 116), INT8_C( 66), -INT8_C( 29), -INT8_C( 87), INT8_C( 80), -INT8_C( 120), INT8_C( 120), -INT8_C( 5), INT8_C( 54), INT8_C( 56), -INT8_C( 79), INT8_C( 96), -INT8_C( 83), -INT8_C( 13), INT8_C( 110), INT8_C( 100), -INT8_C( 94), -INT8_C( 115), -INT8_C( 34), INT8_C( 44), -INT8_C( 118), INT8_C( 11), INT8_C( 23), -INT8_C( 22), INT8_C( 52), INT8_C( 20), INT8_C( 48), -INT8_C( 15), -INT8_C( 61), INT8_C( 77) }, UINT64_C( 8154576338574856126), { -INT8_C( 38), INT8_C( 96), -INT8_C( 87), -INT8_C( 116), -INT8_C( 64), INT8_C( 86), INT8_MAX, INT8_C( 46), -INT8_C( 70), INT8_C( 33), -INT8_C( 69), -INT8_C( 103), INT8_C( 77), INT8_C( 69), -INT8_C( 92), INT8_C( 100), INT8_C( 47), -INT8_C( 40), INT8_C( 120), INT8_C( 95), -INT8_C( 54), INT8_C( 59), -INT8_C( 83), -INT8_C( 120), -INT8_C( 117), INT8_C( 60), INT8_C( 42), -INT8_C( 124), INT8_C( 28), INT8_C( 84), -INT8_C( 11), -INT8_C( 10), -INT8_C( 76), -INT8_C( 97), -INT8_C( 126), INT8_C( 116), -INT8_C( 11), INT8_C( 1), -INT8_C( 94), -INT8_C( 80), INT8_C( 35), INT8_C( 93), INT8_C( 73), INT8_C( 112), -INT8_C( 94), -INT8_C( 19), -INT8_C( 43), -INT8_C( 47), -INT8_C( 59), INT8_C( 77), INT8_C( 49), -INT8_C( 113), -INT8_C( 119), -INT8_C( 34), INT8_C( 24), INT8_C( 20), INT8_C( 26), INT8_C( 66), -INT8_C( 104), INT8_C( 54), -INT8_C( 105), -INT8_C( 115), INT8_C( 45), INT8_C( 75) }, { INT8_C( 44), -INT8_C( 81), -INT8_C( 64), INT8_C( 34), -INT8_C( 79), INT8_C( 98), -INT8_C( 46), -INT8_C( 44), -INT8_C( 64), INT8_C( 27), INT8_C( 68), INT8_C( 98), INT8_C( 8), INT8_C( 25), INT8_C( 52), -INT8_C( 51), INT8_C( 103), INT8_C( 101), INT8_C( 93), -INT8_C( 16), INT8_C( 67), INT8_C( 117), INT8_C( 4), INT8_C( 93), -INT8_C( 73), -INT8_C( 100), -INT8_C( 108), INT8_C( 78), INT8_C( 41), -INT8_C( 63), -INT8_C( 102), INT8_C( 86), INT8_C( 112), INT8_C( 90), INT8_C( 120), INT8_C( 33), -INT8_C( 68), INT8_C( 74), -INT8_C( 11), INT8_C( 124), INT8_C( 101), INT8_C( 58), -INT8_C( 33), INT8_C( 109), INT8_C( 83), INT8_C( 19), INT8_C( 58), -INT8_C( 70), INT8_C( 120), -INT8_C( 105), -INT8_C( 86), -INT8_C( 69), INT8_C( 12), -INT8_C( 82), INT8_C( 24), -INT8_C( 60), INT8_C( 74), -INT8_C( 84), INT8_C( 18), INT8_C( 116), INT8_C( 109), -INT8_C( 84), -INT8_C( 54), -INT8_C( 34) }, { -INT8_C( 78), INT8_C( 112), -INT8_C( 5), -INT8_C( 65), INT8_C( 44), INT8_C( 4), INT8_C( 29), -INT8_C( 83), INT8_C( 52), -INT8_C( 51), INT8_C( 20), -INT8_C( 21), -INT8_C( 65), -INT8_C( 74), -INT8_C( 29), INT8_C( 117), -INT8_C( 13), -INT8_C( 73), INT8_C( 74), INT8_C( 86), INT8_C( 31), INT8_C( 122), -INT8_C( 119), -INT8_C( 92), INT8_C( 44), INT8_C( 48), INT8_C( 96), INT8_C( 41), -INT8_C( 3), INT8_C( 67), -INT8_C( 67), INT8_C( 24), -INT8_C( 34), -INT8_C( 51), -INT8_C( 116), INT8_C( 12), -INT8_C( 82), -INT8_C( 42), -INT8_C( 116), INT8_C( 110), INT8_C( 66), -INT8_C( 63), INT8_C( 27), INT8_C( 120), -INT8_C( 116), INT8_C( 96), INT8_C( 117), INT8_C( 101), INT8_C( 110), INT8_C( 25), -INT8_C( 94), INT8_C( 117), -INT8_C( 34), -INT8_C( 102), -INT8_C( 118), INT8_C( 11), INT8_C( 96), -INT8_C( 22), INT8_C( 52), INT8_C( 20), -INT8_C( 3), -INT8_C( 74), INT8_C( 96), INT8_C( 77) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i idx = simde_mm512_loadu_epi8(test_vec[i].idx); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_mask_permutex2var_epi8(a, test_vec[i].k, idx, b); simde_test_x86_assert_equal_i8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i a = simde_test_x86_random_i8x64(); simde__mmask64 k = simde_test_x86_random_mmask64(); simde__m512i idx = simde_test_x86_random_i8x64(); simde__m512i b = simde_test_x86_random_i8x64(); simde__m512i r = simde_mm512_mask_permutex2var_epi8(a, k, idx, b); simde_test_x86_write_i8x64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask64(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x64(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_mask2_permutex2var_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t a[64]; const int8_t idx[64]; const simde__mmask64 k; const int8_t b[64]; const int8_t r[64]; } test_vec[] = { { { INT8_C( 5), INT8_MIN, -INT8_C( 25), INT8_C( 34), -INT8_C( 7), -INT8_C( 104), INT8_C( 19), -INT8_C( 70), INT8_C( 61), INT8_C( 43), INT8_C( 11), INT8_C( 24), INT8_C( 118), INT8_C( 77), -INT8_C( 57), -INT8_C( 122), -INT8_C( 36), INT8_C( 87), INT8_C( 24), -INT8_C( 115), -INT8_C( 15), -INT8_C( 25), INT8_C( 107), INT8_C( 86), INT8_C( 9), INT8_C( 42), -INT8_C( 104), INT8_C( 70), -INT8_C( 1), -INT8_C( 20), -INT8_C( 114), INT8_C( 5), INT8_C( 108), INT8_C( 118), INT8_C( 39), INT8_C( 101), INT8_C( 14), INT8_C( 58), INT8_C( 31), INT8_C( 75), INT8_C( 101), INT8_C( 43), INT8_C( 99), -INT8_C( 37), INT8_C( 120), INT8_C( 43), INT8_C( 98), INT8_C( 84), -INT8_C( 126), INT8_C( 122), -INT8_C( 30), INT8_C( 115), INT8_C( 97), INT8_C( 77), -INT8_C( 55), INT8_C( 106), INT8_C( 119), INT8_C( 97), -INT8_C( 80), INT8_C( 119), INT8_C( 77), INT8_C( 63), INT8_C( 124), -INT8_C( 70) }, { -INT8_C( 75), -INT8_C( 93), INT8_C( 31), -INT8_C( 61), -INT8_C( 35), INT8_C( 63), INT8_C( 14), INT8_C( 66), INT8_C( 106), INT8_C( 114), INT8_C( 30), -INT8_C( 30), -INT8_C( 99), INT8_MIN, INT8_C( 55), INT8_C( 31), -INT8_C( 6), INT8_C( 25), -INT8_C( 110), INT8_C( 92), INT8_C( 102), INT8_C( 91), -INT8_C( 58), -INT8_C( 35), -INT8_C( 68), INT8_C( 119), INT8_C( 84), INT8_C( 9), -INT8_C( 74), -INT8_C( 48), -INT8_C( 61), INT8_C( 107), INT8_C( 115), -INT8_C( 29), INT8_C( 46), INT8_C( 80), INT8_C( 34), INT8_C( 60), -INT8_C( 109), -INT8_C( 116), -INT8_C( 82), -INT8_C( 79), INT8_C( 110), INT8_C( 75), INT8_C( 49), -INT8_C( 91), INT8_C( 106), INT8_C( 43), -INT8_C( 66), -INT8_C( 4), -INT8_C( 121), INT8_C( 36), INT8_C( 87), INT8_C( 78), INT8_C( 2), INT8_C( 19), -INT8_C( 59), INT8_C( 86), INT8_C( 29), INT8_C( 123), INT8_C( 39), -INT8_C( 32), -INT8_C( 26), -INT8_C( 102) }, UINT64_C(18406632037959800003), { INT8_C( 47), -INT8_C( 32), INT8_C( 74), INT8_C( 96), -INT8_C( 123), -INT8_C( 75), -INT8_C( 117), INT8_C( 68), -INT8_C( 79), INT8_C( 19), INT8_C( 104), INT8_C( 9), INT8_C( 97), INT8_C( 106), INT8_C( 28), INT8_C( 38), -INT8_C( 63), INT8_C( 57), -INT8_C( 95), -INT8_C( 24), INT8_C( 26), -INT8_C( 121), -INT8_C( 126), -INT8_C( 35), -INT8_C( 101), INT8_C( 109), -INT8_C( 61), -INT8_C( 21), -INT8_C( 21), INT8_C( 52), -INT8_C( 22), INT8_C( 26), INT8_C( 20), INT8_C( 53), INT8_C( 122), -INT8_C( 102), -INT8_C( 22), INT8_C( 6), -INT8_C( 34), -INT8_C( 101), INT8_C( 25), INT8_C( 70), -INT8_C( 92), INT8_C( 122), -INT8_C( 79), -INT8_C( 63), -INT8_C( 96), INT8_C( 114), -INT8_C( 6), INT8_C( 65), INT8_C( 90), INT8_C( 20), -INT8_C( 56), -INT8_C( 36), -INT8_C( 14), INT8_C( 99), INT8_C( 74), -INT8_C( 75), INT8_C( 78), INT8_C( 53), -INT8_C( 23), INT8_C( 57), INT8_C( 80), -INT8_C( 2) }, { INT8_C( 77), INT8_C( 101), INT8_C( 31), -INT8_C( 61), -INT8_C( 35), INT8_C( 63), -INT8_C( 57), INT8_C( 74), INT8_C( 106), INT8_C( 114), -INT8_C( 114), -INT8_C( 30), -INT8_C( 20), INT8_MIN, INT8_C( 55), INT8_C( 31), INT8_C( 78), INT8_C( 42), -INT8_C( 110), -INT8_C( 21), INT8_C( 102), -INT8_C( 21), -INT8_C( 117), INT8_C( 52), INT8_C( 77), INT8_C( 119), INT8_C( 26), INT8_C( 9), -INT8_C( 74), -INT8_C( 63), INT8_C( 96), INT8_C( 122), INT8_C( 115), -INT8_C( 29), INT8_C( 46), INT8_C( 80), INT8_C( 39), INT8_C( 60), -INT8_C( 115), -INT8_C( 116), -INT8_C( 82), INT8_C( 122), -INT8_C( 96), INT8_C( 9), INT8_C( 122), INT8_C( 58), -INT8_C( 92), INT8_C( 43), INT8_C( 124), -INT8_C( 4), -INT8_C( 121), INT8_C( 36), -INT8_C( 35), INT8_C( 28), -INT8_C( 25), INT8_C( 19), -INT8_C( 75), -INT8_C( 126), -INT8_C( 20), INT8_C( 53), INT8_C( 75), INT8_C( 20), -INT8_C( 34), -INT8_C( 104) } }, { { INT8_C( 110), -INT8_C( 54), -INT8_C( 104), INT8_C( 88), -INT8_C( 48), INT8_C( 118), -INT8_C( 13), -INT8_C( 23), -INT8_C( 68), -INT8_C( 104), INT8_C( 99), INT8_C( 109), INT8_C( 89), INT8_C( 3), -INT8_C( 33), INT8_C( 83), INT8_C( 68), INT8_C( 57), INT8_C( 104), INT8_C( 12), INT8_C( 22), INT8_C( 90), INT8_C( 111), INT8_C( 96), INT8_C( 15), -INT8_C( 66), -INT8_C( 107), -INT8_C( 8), -INT8_C( 9), -INT8_C( 27), -INT8_C( 10), INT8_C( 101), -INT8_C( 80), -INT8_C( 114), -INT8_C( 67), INT8_MIN, INT8_C( 4), -INT8_C( 80), INT8_C( 106), -INT8_C( 63), INT8_C( 72), -INT8_C( 51), INT8_C( 46), -INT8_C( 95), -INT8_C( 47), INT8_C( 14), -INT8_C( 11), INT8_C( 21), INT8_C( 71), INT8_C( 93), INT8_C( 34), INT8_C( 93), -INT8_C( 73), -INT8_C( 111), -INT8_C( 67), -INT8_C( 58), INT8_C( 79), INT8_C( 83), -INT8_C( 66), INT8_C( 70), INT8_C( 56), -INT8_C( 75), -INT8_C( 85), -INT8_C( 24) }, { INT8_C( 67), INT8_C( 104), INT8_C( 105), INT8_C( 72), INT8_C( 25), -INT8_C( 45), INT8_C( 9), INT8_C( 97), -INT8_C( 96), INT8_C( 55), INT8_C( 3), INT8_C( 113), INT8_C( 69), -INT8_C( 8), -INT8_C( 121), -INT8_C( 115), INT8_C( 85), -INT8_C( 87), -INT8_C( 22), INT8_C( 12), INT8_C( 58), -INT8_C( 88), -INT8_C( 46), -INT8_C( 118), -INT8_C( 5), -INT8_C( 112), -INT8_C( 48), INT8_C( 51), INT8_C( 69), INT8_C( 124), INT8_C( 28), -INT8_C( 119), -INT8_C( 28), -INT8_C( 123), -INT8_C( 47), -INT8_C( 3), INT8_C( 88), -INT8_C( 38), INT8_C( 95), -INT8_C( 8), INT8_C( 17), INT8_C( 98), INT8_C( 106), INT8_C( 87), INT8_C( 90), -INT8_C( 15), -INT8_C( 28), -INT8_C( 81), -INT8_C( 102), -INT8_C( 50), -INT8_C( 69), -INT8_C( 44), INT8_C( 118), -INT8_C( 115), INT8_C( 94), INT8_C( 113), INT8_C( 29), INT8_C( 47), -INT8_C( 91), INT8_C( 99), -INT8_C( 85), -INT8_C( 63), -INT8_C( 20), -INT8_C( 113) }, UINT64_C(12148157180822797638), { INT8_C( 78), INT8_C( 0), -INT8_C( 1), -INT8_C( 88), -INT8_C( 15), -INT8_C( 29), INT8_C( 87), -INT8_C( 117), -INT8_C( 78), INT8_C( 18), INT8_C( 96), INT8_C( 40), -INT8_C( 97), -INT8_C( 66), -INT8_C( 102), -INT8_C( 68), -INT8_C( 19), INT8_C( 63), INT8_C( 31), -INT8_C( 104), INT8_C( 0), INT8_C( 11), INT8_C( 40), INT8_C( 70), -INT8_C( 56), -INT8_C( 75), -INT8_C( 28), INT8_C( 95), -INT8_C( 95), INT8_C( 122), INT8_C( 8), -INT8_C( 17), INT8_C( 123), INT8_C( 7), -INT8_C( 105), INT8_C( 108), -INT8_C( 21), -INT8_C( 18), -INT8_C( 8), -INT8_C( 99), INT8_C( 0), INT8_C( 88), -INT8_C( 59), -INT8_C( 97), INT8_C( 22), INT8_C( 95), INT8_C( 91), INT8_C( 4), -INT8_C( 98), INT8_C( 123), -INT8_C( 100), -INT8_C( 98), -INT8_C( 122), -INT8_C( 60), -INT8_C( 28), INT8_C( 79), INT8_C( 121), -INT8_C( 56), -INT8_C( 82), INT8_C( 26), INT8_C( 67), -INT8_C( 74), INT8_C( 9), -INT8_C( 66) }, { INT8_C( 67), INT8_C( 0), INT8_C( 88), INT8_C( 72), INT8_C( 25), -INT8_C( 45), -INT8_C( 104), INT8_C( 97), -INT8_C( 80), INT8_C( 55), INT8_C( 88), INT8_C( 123), -INT8_C( 29), INT8_C( 121), -INT8_C( 121), INT8_C( 3), INT8_C( 11), -INT8_C( 87), -INT8_C( 59), INT8_C( 89), INT8_C( 58), -INT8_C( 88), -INT8_C( 46), INT8_C( 99), -INT8_C( 5), INT8_C( 68), -INT8_C( 19), INT8_C( 93), -INT8_C( 29), INT8_C( 124), INT8_C( 28), -INT8_C( 104), -INT8_C( 21), INT8_C( 118), INT8_C( 63), -INT8_C( 3), -INT8_C( 56), -INT8_C( 38), INT8_C( 95), INT8_C( 121), INT8_C( 17), INT8_C( 98), -INT8_C( 59), INT8_C( 70), INT8_C( 90), INT8_C( 123), -INT8_C( 21), INT8_C( 21), -INT8_C( 102), -INT8_C( 102), INT8_C( 70), -INT8_C( 44), -INT8_C( 28), -INT8_C( 115), INT8_C( 94), INT8_C( 123), INT8_C( 29), INT8_C( 47), -INT8_C( 91), INT8_C( 108), -INT8_C( 85), INT8_C( 0), -INT8_C( 20), INT8_C( 83) } }, { { -INT8_C( 66), -INT8_C( 96), INT8_C( 42), -INT8_C( 87), -INT8_C( 114), INT8_C( 34), INT8_C( 70), -INT8_C( 114), INT8_C( 122), INT8_C( 11), INT8_C( 45), -INT8_C( 111), INT8_C( 107), -INT8_C( 119), -INT8_C( 107), INT8_C( 9), INT8_C( 4), INT8_C( 49), -INT8_C( 88), -INT8_C( 118), -INT8_C( 10), -INT8_C( 116), -INT8_C( 39), INT8_C( 111), INT8_C( 85), -INT8_C( 120), -INT8_C( 118), -INT8_C( 104), INT8_C( 62), -INT8_C( 109), INT8_C( 86), -INT8_C( 4), INT8_C( 52), INT8_MIN, -INT8_C( 91), -INT8_C( 62), -INT8_C( 93), -INT8_C( 21), INT8_C( 81), INT8_C( 29), -INT8_C( 9), INT8_C( 126), -INT8_C( 82), INT8_C( 98), INT8_C( 7), INT8_C( 67), INT8_C( 107), INT8_C( 11), INT8_C( 117), INT8_C( 19), -INT8_C( 106), INT8_C( 107), -INT8_C( 96), INT8_C( 111), -INT8_C( 38), -INT8_C( 11), -INT8_C( 9), INT8_C( 100), -INT8_C( 115), INT8_C( 54), -INT8_C( 8), -INT8_C( 29), INT8_C( 50), INT8_C( 44) }, { INT8_C( 99), -INT8_C( 40), -INT8_C( 18), INT8_C( 6), -INT8_C( 61), INT8_C( 63), INT8_C( 36), -INT8_C( 70), -INT8_C( 66), -INT8_C( 46), INT8_C( 28), -INT8_C( 59), INT8_C( 22), -INT8_C( 120), -INT8_C( 47), -INT8_C( 117), -INT8_C( 101), INT8_C( 103), -INT8_C( 10), INT8_C( 59), -INT8_C( 42), -INT8_C( 48), INT8_C( 48), -INT8_C( 50), INT8_C( 53), -INT8_C( 67), INT8_C( 4), INT8_C( 45), -INT8_C( 96), INT8_C( 54), INT8_C( 89), INT8_C( 4), INT8_C( 14), INT8_C( 71), INT8_C( 10), -INT8_C( 46), -INT8_C( 121), INT8_C( 46), -INT8_C( 116), INT8_C( 69), INT8_C( 1), -INT8_C( 87), INT8_C( 10), INT8_C( 23), INT8_C( 49), -INT8_C( 37), -INT8_C( 94), -INT8_C( 52), INT8_C( 66), -INT8_C( 104), INT8_C( 8), INT8_C( 25), INT8_C( 104), INT8_C( 56), -INT8_C( 25), -INT8_C( 99), -INT8_C( 10), -INT8_C( 21), -INT8_C( 54), -INT8_C( 106), INT8_C( 33), INT8_C( 35), -INT8_C( 102), INT8_C( 48) }, UINT64_C(15291848100709311851), { INT8_C( 55), INT8_C( 65), -INT8_C( 21), INT8_C( 104), INT8_C( 29), -INT8_C( 115), INT8_C( 53), INT8_C( 95), INT8_C( 37), INT8_C( 61), INT8_C( 120), -INT8_C( 114), INT8_C( 117), INT8_C( 95), INT8_C( 43), INT8_C( 107), INT8_C( 74), -INT8_C( 10), INT8_C( 2), INT8_C( 108), INT8_C( 25), -INT8_C( 100), -INT8_C( 100), -INT8_C( 124), INT8_C( 65), -INT8_C( 98), INT8_C( 118), INT8_C( 21), INT8_C( 44), -INT8_C( 83), -INT8_C( 23), INT8_C( 100), -INT8_C( 17), -INT8_C( 43), -INT8_C( 52), INT8_C( 12), INT8_C( 98), INT8_C( 1), INT8_C( 107), -INT8_C( 120), INT8_C( 62), -INT8_C( 28), INT8_C( 22), -INT8_C( 76), INT8_C( 67), INT8_C( 65), INT8_C( 31), -INT8_C( 114), INT8_C( 55), INT8_C( 33), -INT8_C( 6), INT8_C( 81), -INT8_C( 66), -INT8_C( 106), -INT8_C( 43), -INT8_C( 1), INT8_C( 52), INT8_C( 76), INT8_C( 20), INT8_C( 96), -INT8_C( 7), -INT8_C( 2), -INT8_C( 60), -INT8_C( 24) }, { INT8_C( 12), INT8_C( 65), -INT8_C( 18), INT8_C( 70), -INT8_C( 61), INT8_C( 44), -INT8_C( 93), -INT8_C( 70), INT8_C( 50), -INT8_C( 46), INT8_C( 62), -INT8_C( 59), INT8_C( 22), INT8_C( 122), -INT8_C( 47), -INT8_C( 111), -INT8_C( 101), -INT8_C( 120), -INT8_C( 10), INT8_C( 59), -INT8_C( 42), -INT8_C( 48), INT8_C( 48), -INT8_C( 50), INT8_C( 53), -INT8_C( 29), INT8_C( 4), INT8_C( 45), INT8_C( 52), -INT8_C( 38), -INT8_C( 98), -INT8_C( 114), -INT8_C( 107), INT8_C( 95), INT8_C( 10), -INT8_C( 46), -INT8_C( 114), INT8_C( 46), INT8_C( 107), -INT8_C( 115), INT8_C( 1), INT8_C( 126), INT8_C( 45), INT8_C( 111), INT8_C( 49), -INT8_C( 37), -INT8_C( 94), INT8_C( 117), -INT8_C( 21), INT8_C( 85), INT8_C( 122), INT8_C( 25), INT8_C( 62), -INT8_C( 9), -INT8_C( 25), -INT8_C( 99), -INT8_C( 10), -INT8_C( 21), INT8_C( 120), -INT8_C( 106), INT8_MIN, INT8_C( 35), -INT8_C( 118), INT8_C( 117) } }, { { -INT8_C( 45), -INT8_C( 111), -INT8_C( 12), INT8_C( 53), -INT8_C( 110), INT8_C( 96), -INT8_C( 67), -INT8_C( 47), INT8_C( 68), -INT8_C( 45), -INT8_C( 123), -INT8_C( 121), INT8_C( 21), -INT8_C( 92), INT8_C( 21), INT8_C( 76), -INT8_C( 58), INT8_C( 15), -INT8_C( 99), -INT8_C( 124), -INT8_C( 91), INT8_C( 115), -INT8_C( 125), -INT8_C( 39), -INT8_C( 65), -INT8_C( 104), INT8_C( 58), -INT8_C( 72), -INT8_C( 106), -INT8_C( 2), -INT8_C( 95), INT8_C( 105), -INT8_C( 113), -INT8_C( 107), -INT8_C( 98), INT8_C( 34), -INT8_C( 11), INT8_C( 92), -INT8_C( 13), INT8_C( 57), INT8_C( 47), INT8_C( 120), -INT8_C( 63), INT8_C( 68), INT8_C( 28), -INT8_C( 42), -INT8_C( 111), -INT8_C( 30), -INT8_C( 26), INT8_C( 46), INT8_C( 102), -INT8_C( 117), -INT8_C( 95), -INT8_C( 22), INT8_C( 101), INT8_C( 96), -INT8_C( 126), -INT8_C( 97), INT8_C( 25), INT8_C( 24), -INT8_C( 99), -INT8_C( 70), -INT8_C( 127), INT8_C( 45) }, { INT8_C( 79), INT8_C( 31), INT8_C( 79), INT8_C( 69), INT8_C( 123), INT8_C( 66), INT8_C( 126), -INT8_C( 85), -INT8_C( 70), INT8_C( 63), -INT8_C( 17), -INT8_C( 42), INT8_C( 22), INT8_MIN, -INT8_C( 71), -INT8_C( 4), -INT8_C( 81), INT8_C( 31), -INT8_C( 121), INT8_C( 80), INT8_C( 9), -INT8_C( 20), -INT8_C( 79), -INT8_C( 117), -INT8_C( 117), -INT8_C( 54), -INT8_C( 93), INT8_C( 41), -INT8_C( 124), INT8_C( 36), INT8_C( 86), -INT8_C( 45), INT8_C( 68), -INT8_C( 91), INT8_C( 24), -INT8_C( 65), -INT8_C( 25), -INT8_C( 105), INT8_C( 106), -INT8_C( 95), -INT8_C( 42), INT8_C( 90), INT8_C( 119), -INT8_C( 20), -INT8_C( 38), INT8_C( 48), -INT8_C( 24), -INT8_C( 119), INT8_C( 80), INT8_C( 112), -INT8_C( 38), INT8_C( 89), INT8_C( 92), -INT8_C( 117), -INT8_C( 27), -INT8_C( 24), INT8_C( 85), -INT8_C( 120), INT8_C( 17), -INT8_C( 39), -INT8_C( 83), INT8_C( 103), -INT8_C( 84), -INT8_C( 15) }, UINT64_C( 3644568084488963340), { INT8_C( 117), INT8_C( 11), INT8_C( 31), INT8_C( 79), INT8_C( 60), INT8_C( 7), -INT8_C( 39), -INT8_C( 116), INT8_C( 119), -INT8_C( 77), -INT8_C( 27), -INT8_C( 44), INT8_C( 62), -INT8_C( 54), -INT8_C( 68), -INT8_C( 109), INT8_C( 83), -INT8_C( 51), INT8_C( 108), INT8_C( 0), INT8_C( 52), INT8_C( 24), -INT8_C( 15), INT8_C( 64), -INT8_C( 35), -INT8_C( 95), INT8_C( 51), INT8_C( 57), -INT8_C( 68), -INT8_C( 57), INT8_C( 108), INT8_C( 49), -INT8_C( 46), -INT8_C( 117), -INT8_C( 127), INT8_C( 14), -INT8_C( 110), INT8_C( 90), -INT8_C( 102), INT8_C( 10), INT8_C( 13), INT8_MIN, -INT8_C( 34), INT8_C( 75), INT8_C( 74), -INT8_C( 102), -INT8_C( 34), -INT8_C( 99), INT8_C( 103), INT8_C( 74), -INT8_C( 99), -INT8_C( 101), INT8_C( 98), -INT8_C( 114), -INT8_C( 37), INT8_C( 64), INT8_C( 48), INT8_C( 14), INT8_C( 121), -INT8_C( 20), -INT8_C( 43), -INT8_C( 27), INT8_C( 30), -INT8_C( 89) }, { INT8_C( 79), INT8_C( 31), -INT8_C( 109), INT8_C( 7), INT8_C( 123), INT8_C( 66), INT8_C( 126), -INT8_C( 85), INT8_C( 25), INT8_C( 63), -INT8_C( 99), -INT8_C( 42), INT8_C( 22), INT8_MIN, -INT8_C( 97), -INT8_C( 43), -INT8_C( 81), INT8_C( 31), -INT8_C( 121), INT8_C( 80), -INT8_C( 45), INT8_C( 74), -INT8_C( 79), -INT8_C( 121), -INT8_C( 121), -INT8_C( 27), -INT8_C( 93), INT8_C( 41), -INT8_C( 110), -INT8_C( 11), -INT8_C( 15), INT8_C( 0), INT8_C( 68), -INT8_C( 91), -INT8_C( 65), INT8_C( 45), INT8_C( 10), -INT8_C( 105), -INT8_C( 34), -INT8_C( 95), -INT8_C( 15), INT8_C( 51), INT8_C( 119), INT8_C( 74), INT8_C( 51), INT8_C( 48), -INT8_C( 24), -INT8_C( 119), INT8_C( 80), INT8_C( 112), INT8_C( 51), INT8_C( 89), -INT8_C( 68), -INT8_C( 117), -INT8_C( 27), INT8_C( 13), INT8_C( 85), INT8_C( 68), INT8_C( 17), -INT8_C( 39), -INT8_C( 42), INT8_C( 10), -INT8_C( 84), -INT8_C( 15) } }, { { INT8_C( 112), -INT8_C( 97), -INT8_C( 74), INT8_C( 3), -INT8_C( 7), INT8_C( 80), INT8_C( 13), INT8_C( 6), -INT8_C( 48), -INT8_C( 21), INT8_C( 81), INT8_C( 27), -INT8_C( 123), INT8_C( 47), -INT8_C( 72), -INT8_C( 20), INT8_C( 121), INT8_C( 86), -INT8_C( 121), -INT8_C( 37), -INT8_C( 28), INT8_C( 98), INT8_C( 27), INT8_C( 20), INT8_C( 112), -INT8_C( 107), INT8_C( 1), INT8_C( 69), INT8_C( 122), INT8_C( 31), -INT8_C( 20), -INT8_C( 21), -INT8_C( 66), -INT8_C( 94), -INT8_C( 18), -INT8_C( 73), -INT8_C( 13), -INT8_C( 5), -INT8_C( 67), -INT8_C( 61), -INT8_C( 26), INT8_C( 14), -INT8_C( 34), INT8_C( 107), INT8_C( 61), -INT8_C( 105), INT8_C( 87), -INT8_C( 74), -INT8_C( 19), -INT8_C( 34), -INT8_C( 111), -INT8_C( 47), INT8_C( 64), -INT8_C( 83), -INT8_C( 26), -INT8_C( 80), INT8_C( 66), -INT8_C( 25), -INT8_C( 11), -INT8_C( 68), INT8_C( 6), -INT8_C( 31), -INT8_C( 89), -INT8_C( 60) }, { -INT8_C( 124), -INT8_C( 107), INT8_C( 123), INT8_C( 119), -INT8_C( 112), INT8_C( 56), INT8_C( 58), INT8_C( 118), INT8_C( 70), INT8_C( 25), -INT8_C( 31), -INT8_C( 125), -INT8_C( 80), INT8_C( 56), INT8_C( 57), -INT8_C( 99), INT8_C( 22), -INT8_C( 54), INT8_C( 110), INT8_C( 86), INT8_C( 119), INT8_C( 84), INT8_C( 6), -INT8_C( 71), INT8_C( 59), -INT8_C( 5), INT8_C( 118), INT8_C( 65), -INT8_C( 35), INT8_C( 29), INT8_C( 5), INT8_C( 97), -INT8_C( 77), INT8_MIN, -INT8_C( 40), INT8_C( 67), -INT8_C( 72), INT8_C( 18), -INT8_C( 70), -INT8_C( 2), INT8_C( 43), -INT8_C( 101), -INT8_C( 127), -INT8_C( 37), -INT8_C( 44), -INT8_C( 70), INT8_C( 120), -INT8_C( 22), -INT8_C( 123), -INT8_C( 25), INT8_C( 65), -INT8_C( 4), INT8_C( 59), INT8_C( 71), -INT8_C( 74), INT8_C( 119), INT8_C( 67), INT8_C( 44), -INT8_C( 72), INT8_C( 32), INT8_C( 73), -INT8_C( 66), -INT8_C( 127), -INT8_C( 4) }, UINT64_C(10949933410637863230), { -INT8_C( 107), INT8_C( 119), INT8_C( 114), INT8_C( 105), INT8_C( 49), -INT8_C( 21), INT8_C( 84), -INT8_C( 74), -INT8_C( 46), -INT8_C( 107), -INT8_C( 77), INT8_C( 13), -INT8_C( 36), INT8_C( 105), -INT8_C( 124), INT8_C( 31), -INT8_C( 107), INT8_C( 61), INT8_C( 63), -INT8_C( 34), -INT8_C( 5), -INT8_C( 64), -INT8_C( 37), INT8_C( 57), INT8_C( 25), INT8_C( 27), INT8_C( 48), -INT8_C( 123), INT8_C( 21), INT8_C( 38), INT8_C( 28), -INT8_C( 86), -INT8_C( 99), -INT8_C( 114), INT8_C( 20), -INT8_C( 50), INT8_C( 121), INT8_C( 104), -INT8_C( 123), INT8_C( 75), -INT8_C( 3), INT8_C( 56), INT8_C( 89), -INT8_C( 39), -INT8_C( 95), -INT8_C( 35), -INT8_C( 7), INT8_C( 54), INT8_C( 26), INT8_C( 56), INT8_C( 20), INT8_C( 21), -INT8_C( 7), -INT8_C( 17), INT8_C( 79), INT8_C( 18), INT8_C( 10), INT8_MAX, -INT8_C( 105), INT8_C( 31), -INT8_C( 91), -INT8_C( 77), -INT8_C( 54), INT8_C( 66) }, { -INT8_C( 124), INT8_C( 98), INT8_C( 31), INT8_C( 18), INT8_C( 121), INT8_C( 66), INT8_C( 58), INT8_C( 118), INT8_C( 84), INT8_C( 25), -INT8_C( 31), INT8_C( 3), -INT8_C( 19), INT8_C( 56), -INT8_C( 25), -INT8_C( 99), INT8_C( 22), -INT8_C( 54), INT8_C( 110), INT8_C( 86), INT8_C( 119), INT8_C( 84), INT8_C( 13), -INT8_C( 71), -INT8_C( 68), INT8_C( 31), INT8_C( 79), INT8_C( 65), INT8_C( 38), INT8_C( 31), INT8_C( 80), -INT8_C( 114), -INT8_C( 47), INT8_C( 112), -INT8_C( 40), INT8_C( 105), -INT8_C( 72), -INT8_C( 121), -INT8_C( 11), -INT8_C( 2), INT8_C( 43), INT8_C( 69), -INT8_C( 127), -INT8_C( 123), -INT8_C( 5), -INT8_C( 11), INT8_C( 10), INT8_C( 89), INT8_C( 80), -INT8_C( 25), INT8_C( 119), -INT8_C( 4), -INT8_C( 68), -INT8_C( 74), -INT8_C( 26), INT8_C( 18), INT8_C( 105), INT8_C( 61), INT8_C( 66), INT8_C( 32), -INT8_C( 107), -INT8_C( 66), -INT8_C( 127), -INT8_C( 91) } }, { { INT8_C( 66), -INT8_C( 34), INT8_C( 17), -INT8_C( 69), INT8_C( 70), -INT8_C( 106), INT8_C( 7), INT8_C( 67), -INT8_C( 50), INT8_C( 96), INT8_C( 28), INT8_C( 111), INT8_C( 61), INT8_C( 21), -INT8_C( 91), INT8_C( 88), INT8_C( 78), -INT8_C( 71), INT8_C( 109), INT8_C( 71), -INT8_C( 87), -INT8_C( 68), INT8_C( 89), -INT8_C( 77), INT8_C( 60), -INT8_C( 15), -INT8_C( 45), -INT8_C( 31), -INT8_C( 92), -INT8_C( 99), INT8_C( 36), -INT8_C( 26), INT8_C( 123), INT8_C( 53), -INT8_C( 94), -INT8_C( 63), -INT8_C( 53), -INT8_C( 87), INT8_C( 4), -INT8_C( 103), INT8_C( 9), INT8_C( 32), INT8_C( 8), INT8_C( 70), INT8_C( 54), -INT8_C( 83), -INT8_C( 98), -INT8_C( 124), INT8_C( 102), INT8_C( 12), -INT8_C( 53), INT8_C( 15), -INT8_C( 56), INT8_C( 36), -INT8_C( 61), INT8_C( 4), INT8_C( 21), -INT8_C( 106), -INT8_C( 26), -INT8_C( 70), INT8_C( 51), INT8_C( 10), -INT8_C( 96), -INT8_C( 82) }, { INT8_C( 63), INT8_C( 66), INT8_C( 111), INT8_C( 10), -INT8_C( 21), INT8_C( 115), -INT8_C( 93), -INT8_C( 12), -INT8_C( 109), -INT8_C( 85), INT8_C( 59), -INT8_C( 55), INT8_C( 88), -INT8_C( 39), INT8_C( 77), -INT8_C( 66), -INT8_C( 27), INT8_C( 24), -INT8_C( 50), -INT8_C( 82), INT8_C( 61), -INT8_C( 111), -INT8_C( 78), INT8_C( 82), INT8_C( 39), -INT8_C( 104), INT8_C( 12), INT8_C( 90), -INT8_C( 94), -INT8_C( 83), INT8_C( 8), -INT8_C( 31), -INT8_C( 17), INT8_C( 119), -INT8_C( 21), -INT8_C( 37), -INT8_C( 22), -INT8_C( 114), -INT8_C( 49), INT8_C( 125), INT8_C( 57), INT8_C( 10), INT8_C( 71), -INT8_C( 111), -INT8_C( 28), -INT8_C( 108), INT8_C( 80), -INT8_C( 55), -INT8_C( 83), INT8_C( 30), INT8_C( 119), -INT8_C( 22), -INT8_C( 81), INT8_C( 42), INT8_C( 60), -INT8_C( 42), -INT8_C( 62), INT8_C( 73), INT8_C( 48), INT8_C( 101), -INT8_C( 10), INT8_C( 56), INT8_C( 70), -INT8_C( 27) }, UINT64_C(18020750115857248943), { -INT8_C( 102), INT8_C( 93), -INT8_C( 117), INT8_C( 126), -INT8_C( 14), -INT8_C( 37), INT8_C( 72), -INT8_C( 97), -INT8_C( 7), -INT8_C( 65), -INT8_C( 119), -INT8_C( 88), -INT8_C( 23), -INT8_C( 59), INT8_C( 126), -INT8_C( 84), INT8_C( 14), -INT8_C( 82), INT8_C( 17), INT8_C( 4), -INT8_C( 26), INT8_C( 87), -INT8_C( 22), -INT8_C( 107), -INT8_C( 119), -INT8_C( 86), INT8_C( 46), INT8_C( 74), INT8_C( 58), INT8_C( 69), INT8_C( 68), -INT8_C( 43), -INT8_C( 94), -INT8_C( 49), INT8_C( 83), -INT8_C( 108), -INT8_C( 85), -INT8_C( 101), INT8_C( 51), -INT8_C( 92), INT8_C( 91), -INT8_C( 68), INT8_C( 77), INT8_C( 68), -INT8_C( 126), -INT8_C( 53), -INT8_C( 16), -INT8_C( 112), INT8_C( 122), INT8_C( 1), -INT8_C( 107), INT8_C( 96), INT8_C( 89), INT8_MAX, -INT8_C( 10), -INT8_C( 30), INT8_C( 41), INT8_C( 36), INT8_C( 44), INT8_C( 100), INT8_C( 105), INT8_C( 112), INT8_C( 57), INT8_C( 12) }, { -INT8_C( 82), -INT8_C( 117), -INT8_C( 112), INT8_C( 28), -INT8_C( 21), INT8_C( 96), -INT8_C( 93), INT8_C( 89), -INT8_C( 109), INT8_C( 70), INT8_C( 59), -INT8_C( 55), -INT8_C( 119), -INT8_C( 86), INT8_C( 77), -INT8_C( 66), -INT8_C( 27), INT8_C( 24), -INT8_C( 50), -INT8_C( 82), INT8_C( 61), -INT8_C( 111), -INT8_C( 53), INT8_C( 17), -INT8_C( 103), -INT8_C( 104), INT8_C( 12), INT8_C( 46), -INT8_C( 94), -INT8_C( 83), INT8_C( 8), -INT8_C( 49), -INT8_C( 17), INT8_C( 119), -INT8_C( 21), -INT8_C( 37), -INT8_C( 22), -INT8_C( 114), -INT8_C( 84), INT8_C( 112), INT8_C( 57), INT8_C( 10), INT8_C( 71), -INT8_C( 111), -INT8_C( 85), -INT8_C( 108), INT8_C( 80), -INT8_C( 65), -INT8_C( 83), INT8_C( 36), -INT8_C( 30), -INT8_C( 22), -INT8_C( 124), INT8_C( 42), INT8_C( 60), -INT8_C( 42), -INT8_C( 62), -INT8_C( 65), INT8_C( 48), -INT8_C( 101), -INT8_C( 10), INT8_C( 21), INT8_C( 72), -INT8_C( 101) } }, { { INT8_C( 64), -INT8_C( 116), -INT8_C( 96), -INT8_C( 21), INT8_C( 40), -INT8_C( 44), -INT8_C( 113), -INT8_C( 125), -INT8_C( 112), -INT8_C( 36), -INT8_C( 57), INT8_C( 18), -INT8_C( 88), -INT8_C( 72), -INT8_C( 93), INT8_C( 34), -INT8_C( 71), INT8_C( 56), -INT8_C( 126), INT8_C( 18), -INT8_C( 73), INT8_C( 120), -INT8_C( 11), -INT8_C( 32), -INT8_C( 99), INT8_C( 33), INT8_C( 68), INT8_C( 6), -INT8_C( 110), INT8_C( 125), INT8_C( 18), -INT8_C( 46), INT8_C( 10), -INT8_C( 77), -INT8_C( 67), INT8_C( 50), -INT8_C( 121), INT8_C( 76), -INT8_C( 75), INT8_C( 23), INT8_C( 41), INT8_C( 124), INT8_C( 42), -INT8_C( 47), INT8_C( 52), -INT8_C( 51), -INT8_C( 13), -INT8_C( 18), INT8_C( 5), INT8_C( 117), INT8_C( 0), -INT8_C( 68), -INT8_C( 18), -INT8_C( 11), -INT8_C( 100), -INT8_C( 117), INT8_C( 23), -INT8_C( 31), -INT8_C( 111), -INT8_C( 87), INT8_C( 94), -INT8_C( 92), INT8_C( 123), INT8_C( 104) }, { INT8_C( 87), INT8_C( 56), -INT8_C( 102), -INT8_C( 34), -INT8_C( 124), INT8_C( 79), -INT8_C( 11), -INT8_C( 83), -INT8_C( 52), INT8_C( 31), INT8_C( 126), INT8_C( 0), -INT8_C( 20), INT8_C( 113), -INT8_C( 18), -INT8_C( 15), -INT8_C( 25), -INT8_C( 17), -INT8_C( 83), -INT8_C( 43), -INT8_C( 28), INT8_C( 74), INT8_C( 96), -INT8_C( 5), INT8_C( 43), -INT8_C( 15), -INT8_C( 92), -INT8_C( 119), -INT8_C( 107), INT8_C( 31), -INT8_C( 14), -INT8_C( 20), INT8_C( 87), -INT8_C( 116), -INT8_C( 54), -INT8_C( 36), -INT8_C( 36), -INT8_C( 64), -INT8_C( 119), -INT8_C( 88), -INT8_C( 33), INT8_C( 8), -INT8_C( 88), -INT8_C( 52), INT8_C( 121), -INT8_C( 105), -INT8_C( 67), INT8_C( 96), -INT8_C( 122), INT8_C( 107), INT8_C( 53), INT8_C( 106), -INT8_C( 75), -INT8_C( 107), INT8_C( 102), -INT8_C( 32), -INT8_C( 121), INT8_C( 10), INT8_C( 105), INT8_C( 28), INT8_C( 42), INT8_C( 91), INT8_C( 9), -INT8_C( 127) }, UINT64_C( 8317277233687155688), { -INT8_C( 17), INT8_C( 20), INT8_C( 63), INT8_C( 104), -INT8_C( 85), -INT8_C( 4), -INT8_C( 55), INT8_C( 49), INT8_C( 103), -INT8_C( 2), -INT8_C( 100), INT8_C( 28), -INT8_C( 108), INT8_C( 2), -INT8_C( 4), INT8_C( 27), INT8_C( 12), INT8_C( 102), INT8_C( 55), INT8_C( 54), -INT8_C( 63), INT8_C( 64), -INT8_C( 72), -INT8_C( 87), INT8_C( 20), INT8_C( 21), INT8_C( 109), -INT8_C( 89), -INT8_C( 4), -INT8_C( 39), INT8_C( 26), -INT8_C( 21), -INT8_C( 18), INT8_C( 89), INT8_C( 84), -INT8_C( 103), INT8_C( 86), INT8_C( 29), -INT8_C( 53), -INT8_C( 67), INT8_C( 27), INT8_C( 103), -INT8_C( 38), -INT8_C( 81), INT8_C( 105), -INT8_C( 42), -INT8_C( 54), INT8_C( 117), INT8_C( 60), INT8_C( 2), -INT8_C( 84), -INT8_C( 2), INT8_C( 66), INT8_C( 100), -INT8_C( 89), INT8_C( 86), INT8_C( 121), INT8_C( 21), -INT8_C( 2), INT8_C( 118), -INT8_C( 18), INT8_C( 24), INT8_C( 97), -INT8_C( 36) }, { INT8_C( 87), INT8_C( 56), -INT8_C( 102), INT8_C( 26), -INT8_C( 124), INT8_C( 27), INT8_C( 100), -INT8_C( 51), -INT8_C( 108), -INT8_C( 46), INT8_C( 126), INT8_C( 0), INT8_C( 105), INT8_C( 113), -INT8_C( 54), INT8_C( 2), -INT8_C( 67), -INT8_C( 17), -INT8_C( 51), INT8_C( 64), INT8_C( 86), INT8_C( 74), -INT8_C( 18), -INT8_C( 5), INT8_C( 43), -INT8_C( 15), -INT8_C( 121), -INT8_C( 119), -INT8_C( 107), INT8_C( 31), -INT8_C( 84), INT8_C( 105), -INT8_C( 87), -INT8_C( 88), -INT8_C( 54), -INT8_C( 36), -INT8_C( 4), -INT8_C( 64), -INT8_C( 119), INT8_C( 41), -INT8_C( 21), -INT8_C( 112), INT8_C( 41), -INT8_C( 52), INT8_C( 121), -INT8_C( 32), -INT8_C( 92), -INT8_C( 18), -INT8_C( 122), INT8_C( 107), -INT8_C( 11), -INT8_C( 38), -INT8_C( 75), INT8_C( 120), -INT8_C( 53), -INT8_C( 32), -INT8_C( 125), -INT8_C( 57), INT8_C( 105), INT8_C( 28), INT8_C( 42), -INT8_C( 89), -INT8_C( 36), -INT8_C( 127) } }, { { INT8_C( 114), -INT8_C( 75), INT8_C( 118), -INT8_C( 56), -INT8_C( 46), INT8_C( 65), -INT8_C( 123), -INT8_C( 18), -INT8_C( 88), INT8_C( 95), -INT8_C( 99), INT8_C( 17), INT8_C( 54), INT8_C( 104), -INT8_C( 122), INT8_C( 114), INT8_C( 106), INT8_C( 50), INT8_C( 112), -INT8_C( 84), -INT8_C( 106), INT8_C( 24), INT8_C( 3), INT8_C( 16), INT8_C( 45), INT8_C( 1), -INT8_C( 122), INT8_C( 27), INT8_C( 25), -INT8_C( 25), -INT8_C( 8), -INT8_C( 117), -INT8_C( 99), INT8_C( 110), INT8_C( 83), INT8_C( 111), -INT8_C( 81), -INT8_C( 39), INT8_C( 93), INT8_C( 87), INT8_C( 56), -INT8_C( 5), INT8_C( 104), INT8_C( 110), INT8_C( 99), -INT8_C( 18), -INT8_C( 31), -INT8_C( 51), INT8_C( 33), INT8_C( 81), INT8_C( 121), -INT8_C( 73), INT8_C( 105), INT8_C( 124), -INT8_C( 57), -INT8_C( 106), INT8_C( 125), INT8_C( 77), -INT8_C( 78), -INT8_C( 105), INT8_C( 53), -INT8_C( 86), INT8_C( 34), -INT8_C( 46) }, { INT8_C( 24), INT8_C( 118), INT8_C( 65), -INT8_C( 57), INT8_C( 79), -INT8_C( 97), INT8_C( 30), -INT8_C( 121), -INT8_C( 102), -INT8_C( 122), -INT8_C( 10), -INT8_C( 3), INT8_C( 116), -INT8_C( 41), -INT8_C( 54), -INT8_C( 107), INT8_C( 40), INT8_C( 67), INT8_C( 77), -INT8_C( 110), -INT8_C( 64), INT8_C( 20), INT8_C( 40), INT8_C( 61), INT8_C( 98), -INT8_C( 38), -INT8_C( 44), -INT8_C( 105), -INT8_C( 124), -INT8_C( 9), INT8_C( 105), -INT8_C( 100), INT8_C( 109), -INT8_C( 86), INT8_C( 99), -INT8_C( 68), INT8_C( 73), -INT8_C( 127), INT8_C( 67), -INT8_C( 29), INT8_C( 7), INT8_C( 57), -INT8_C( 32), INT8_C( 124), INT8_C( 16), -INT8_C( 86), INT8_C( 17), INT8_C( 57), -INT8_C( 18), INT8_C( 94), -INT8_C( 53), -INT8_C( 82), INT8_C( 115), -INT8_C( 13), -INT8_C( 21), -INT8_C( 43), -INT8_C( 50), -INT8_C( 64), INT8_C( 108), INT8_C( 82), -INT8_C( 73), -INT8_C( 43), -INT8_C( 17), INT8_C( 36) }, UINT64_C(15829066187127411327), { INT8_C( 93), -INT8_C( 115), INT8_C( 87), INT8_C( 109), INT8_C( 55), INT8_C( 105), -INT8_C( 90), INT8_C( 37), -INT8_C( 57), INT8_C( 113), -INT8_C( 45), INT8_C( 58), INT8_C( 101), -INT8_C( 65), INT8_C( 15), INT8_C( 51), INT8_MAX, INT8_C( 123), -INT8_C( 123), INT8_C( 54), INT8_C( 80), INT8_C( 116), INT8_C( 90), -INT8_C( 48), -INT8_C( 57), INT8_C( 58), -INT8_C( 103), -INT8_C( 101), INT8_C( 93), INT8_C( 69), INT8_C( 118), -INT8_C( 70), -INT8_C( 46), -INT8_C( 50), INT8_C( 40), INT8_C( 10), INT8_C( 55), -INT8_C( 50), INT8_C( 47), -INT8_C( 2), INT8_C( 64), INT8_C( 3), INT8_C( 57), -INT8_C( 91), -INT8_C( 62), INT8_C( 72), -INT8_C( 40), INT8_C( 65), -INT8_C( 60), INT8_C( 93), INT8_C( 119), INT8_C( 20), -INT8_C( 46), -INT8_C( 47), -INT8_C( 28), -INT8_C( 103), INT8_C( 11), INT8_C( 125), INT8_C( 52), INT8_C( 104), -INT8_C( 61), -INT8_C( 86), INT8_C( 35), -INT8_C( 107) }, { INT8_C( 45), -INT8_C( 28), -INT8_C( 115), INT8_C( 37), INT8_C( 51), -INT8_C( 117), -INT8_C( 8), -INT8_C( 121), -INT8_C( 102), -INT8_C( 123), -INT8_C( 10), -INT8_C( 3), -INT8_C( 46), -INT8_C( 41), -INT8_C( 45), -INT8_C( 107), INT8_C( 40), INT8_C( 67), INT8_C( 77), -INT8_C( 110), -INT8_C( 64), -INT8_C( 106), INT8_C( 56), -INT8_C( 86), INT8_C( 40), -INT8_C( 38), -INT8_C( 44), INT8_C( 16), -INT8_C( 124), -INT8_C( 9), INT8_C( 3), INT8_C( 25), INT8_C( 109), -INT8_C( 86), INT8_C( 10), -INT8_C( 68), INT8_C( 113), -INT8_C( 127), INT8_C( 109), INT8_C( 10), -INT8_C( 18), INT8_C( 77), -INT8_C( 32), INT8_C( 124), INT8_C( 16), INT8_C( 104), INT8_C( 17), INT8_C( 57), -INT8_C( 18), INT8_C( 94), INT8_C( 58), -INT8_C( 31), INT8_C( 115), INT8_C( 20), -INT8_C( 21), INT8_C( 116), INT8_C( 15), INT8_C( 93), INT8_C( 108), -INT8_C( 123), -INT8_C( 106), -INT8_C( 43), INT8_C( 65), -INT8_C( 81) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i idx = simde_mm512_loadu_epi8(test_vec[i].idx); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_mask2_permutex2var_epi8(a, idx, test_vec[i].k, b); simde_test_x86_assert_equal_i8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i a = simde_test_x86_random_i8x64(); simde__m512i idx = simde_test_x86_random_i8x64(); simde__mmask64 k = simde_test_x86_random_mmask64(); simde__m512i b = simde_test_x86_random_i8x64(); simde__m512i r = simde_mm512_mask2_permutex2var_epi8(a, idx, k, b); simde_test_x86_write_i8x64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x64(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_mmask64(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_maskz_permutex2var_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask64 k; const int8_t a[64]; const int8_t idx[64]; const int8_t b[64]; const int8_t r[64]; } test_vec[] = { { UINT64_C( 9543929041240957228), { -INT8_C( 56), INT8_C( 90), -INT8_C( 17), -INT8_C( 59), INT8_C( 52), -INT8_C( 102), -INT8_C( 59), -INT8_C( 11), INT8_C( 59), -INT8_C( 90), -INT8_C( 38), -INT8_C( 2), -INT8_C( 89), -INT8_C( 71), -INT8_C( 100), INT8_C( 28), INT8_C( 76), INT8_C( 122), -INT8_C( 36), INT8_C( 7), INT8_C( 4), INT8_C( 31), INT8_C( 102), INT8_C( 48), -INT8_C( 64), INT8_C( 25), -INT8_C( 40), -INT8_C( 90), -INT8_C( 15), INT8_C( 74), INT8_C( 42), -INT8_C( 71), -INT8_C( 92), INT8_C( 25), INT8_MAX, -INT8_C( 40), -INT8_C( 77), INT8_C( 68), -INT8_C( 50), -INT8_C( 18), -INT8_C( 21), -INT8_C( 88), -INT8_C( 20), -INT8_C( 110), INT8_C( 98), -INT8_C( 119), -INT8_C( 82), -INT8_C( 82), INT8_C( 3), -INT8_C( 118), -INT8_C( 75), INT8_C( 7), -INT8_C( 86), INT8_C( 27), INT8_C( 55), INT8_C( 106), INT8_C( 53), INT8_C( 15), INT8_C( 16), INT8_C( 38), INT8_C( 89), INT8_C( 59), -INT8_C( 32), -INT8_C( 3) }, { INT8_C( 84), INT8_C( 95), -INT8_C( 42), INT8_C( 8), -INT8_C( 93), -INT8_C( 92), -INT8_C( 10), -INT8_C( 114), INT8_C( 76), -INT8_C( 29), INT8_C( 33), -INT8_C( 82), INT8_C( 108), -INT8_C( 49), INT8_C( 93), INT8_C( 111), INT8_C( 90), INT8_C( 18), INT8_C( 118), INT8_C( 4), INT8_C( 46), -INT8_C( 83), INT8_C( 110), INT8_C( 99), -INT8_C( 68), INT8_MAX, -INT8_C( 119), INT8_C( 21), -INT8_C( 70), INT8_C( 105), INT8_C( 18), INT8_C( 14), -INT8_C( 56), -INT8_C( 24), INT8_C( 22), INT8_C( 108), -INT8_C( 116), INT8_C( 13), -INT8_C( 6), -INT8_C( 39), -INT8_C( 16), INT8_C( 27), -INT8_C( 121), INT8_C( 92), -INT8_C( 21), -INT8_C( 28), -INT8_C( 53), INT8_C( 69), -INT8_C( 9), INT8_C( 65), INT8_C( 73), INT8_C( 37), -INT8_C( 18), -INT8_C( 73), -INT8_C( 120), -INT8_C( 86), INT8_C( 54), INT8_C( 17), -INT8_C( 65), -INT8_C( 16), INT8_C( 123), -INT8_C( 47), -INT8_C( 1), INT8_C( 67) }, { -INT8_C( 70), INT8_C( 21), -INT8_C( 81), INT8_C( 70), INT8_C( 34), -INT8_C( 86), INT8_C( 31), INT8_C( 18), -INT8_C( 59), -INT8_C( 89), INT8_C( 110), -INT8_C( 80), -INT8_C( 117), INT8_C( 57), -INT8_C( 11), -INT8_C( 126), INT8_C( 122), INT8_C( 62), -INT8_C( 89), INT8_C( 104), -INT8_C( 10), INT8_C( 47), INT8_C( 18), INT8_C( 44), INT8_C( 65), -INT8_C( 47), INT8_C( 29), -INT8_C( 68), -INT8_C( 93), INT8_C( 28), -INT8_C( 1), INT8_C( 93), INT8_C( 49), -INT8_C( 81), -INT8_C( 93), INT8_C( 84), INT8_C( 89), -INT8_C( 61), INT8_C( 102), INT8_C( 30), INT8_C( 106), -INT8_C( 43), -INT8_C( 49), -INT8_C( 11), INT8_C( 14), -INT8_C( 60), INT8_C( 120), -INT8_C( 119), INT8_C( 3), INT8_C( 31), -INT8_C( 15), -INT8_C( 7), INT8_C( 79), INT8_C( 4), INT8_C( 37), -INT8_C( 112), -INT8_C( 43), INT8_C( 66), INT8_C( 76), INT8_C( 120), INT8_C( 94), INT8_C( 75), -INT8_C( 43), -INT8_C( 112) }, { INT8_C( 0), INT8_C( 0), INT8_C( 18), INT8_C( 59), INT8_C( 0), -INT8_C( 77), INT8_C( 0), INT8_C( 0), -INT8_C( 117), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 126), INT8_C( 0), -INT8_C( 119), INT8_C( 29), -INT8_C( 36), INT8_C( 0), INT8_C( 0), -INT8_C( 82), -INT8_C( 119), INT8_C( 0), INT8_C( 84), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 31), INT8_C( 0), -INT8_C( 43), INT8_C( 0), -INT8_C( 100), -INT8_C( 59), INT8_C( 0), INT8_C( 102), INT8_C( 0), INT8_C( 0), -INT8_C( 71), INT8_C( 76), -INT8_C( 47), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 93), -INT8_C( 11), INT8_C( 0), -INT8_C( 80), -INT8_C( 86), INT8_C( 0), INT8_C( 21), INT8_C( 0), INT8_C( 0), INT8_C( 120), INT8_C( 106), INT8_C( 59), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 3), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 70) } }, { UINT64_C(11993730380607027706), { INT8_C( 31), INT8_C( 65), -INT8_C( 101), INT8_C( 46), INT8_C( 5), INT8_C( 19), -INT8_C( 73), INT8_C( 8), INT8_C( 51), -INT8_C( 88), INT8_C( 1), -INT8_C( 126), -INT8_C( 84), INT8_C( 39), INT8_C( 18), -INT8_C( 126), INT8_C( 105), INT8_C( 94), -INT8_C( 6), -INT8_C( 56), -INT8_C( 87), -INT8_C( 48), INT8_C( 88), -INT8_C( 92), INT8_C( 73), INT8_C( 60), -INT8_C( 9), -INT8_C( 123), -INT8_C( 122), INT8_C( 105), INT8_C( 43), -INT8_C( 90), -INT8_C( 86), -INT8_C( 58), -INT8_C( 44), -INT8_C( 80), -INT8_C( 38), -INT8_C( 117), -INT8_C( 72), INT8_C( 13), INT8_C( 51), -INT8_C( 70), -INT8_C( 113), -INT8_C( 32), -INT8_C( 31), -INT8_C( 95), INT8_C( 98), INT8_C( 74), -INT8_C( 1), INT8_C( 92), INT8_C( 18), -INT8_C( 88), INT8_C( 44), INT8_C( 106), INT8_C( 76), INT8_C( 117), -INT8_C( 90), INT8_C( 68), -INT8_C( 6), INT8_C( 45), -INT8_C( 83), INT8_C( 37), -INT8_C( 45), INT8_C( 88) }, { -INT8_C( 20), -INT8_C( 89), INT8_C( 8), -INT8_C( 58), INT8_C( 50), -INT8_C( 64), -INT8_C( 45), INT8_C( 101), INT8_C( 122), INT8_C( 98), INT8_C( 69), INT8_C( 91), INT8_C( 3), -INT8_C( 89), -INT8_C( 90), INT8_C( 2), INT8_C( 4), -INT8_C( 72), -INT8_C( 86), INT8_C( 48), INT8_C( 35), -INT8_C( 9), -INT8_C( 90), -INT8_C( 55), INT8_C( 59), -INT8_C( 96), -INT8_C( 10), -INT8_C( 24), -INT8_C( 58), -INT8_C( 55), INT8_C( 64), -INT8_C( 78), INT8_C( 112), INT8_C( 72), INT8_C( 120), -INT8_C( 94), INT8_C( 9), INT8_C( 75), INT8_C( 8), -INT8_C( 125), -INT8_C( 83), INT8_C( 77), -INT8_C( 33), -INT8_C( 80), -INT8_C( 11), -INT8_C( 123), -INT8_C( 78), -INT8_C( 7), INT8_C( 61), INT8_C( 92), INT8_C( 41), INT8_C( 96), INT8_C( 83), -INT8_C( 49), INT8_C( 42), -INT8_C( 114), INT8_C( 112), INT8_C( 32), INT8_C( 119), INT8_C( 54), -INT8_C( 22), -INT8_C( 73), -INT8_C( 24), INT8_C( 90) }, { INT8_C( 0), INT8_C( 96), -INT8_C( 3), INT8_C( 9), -INT8_C( 85), INT8_C( 5), -INT8_C( 116), INT8_C( 88), INT8_C( 82), INT8_C( 107), INT8_C( 8), INT8_C( 71), -INT8_C( 16), -INT8_C( 70), INT8_C( 64), INT8_C( 46), INT8_C( 22), INT8_C( 106), -INT8_C( 114), INT8_C( 106), INT8_C( 57), -INT8_C( 72), -INT8_C( 8), -INT8_C( 87), -INT8_C( 39), INT8_C( 111), -INT8_C( 33), -INT8_C( 61), INT8_C( 39), -INT8_C( 57), INT8_C( 29), INT8_C( 39), INT8_C( 39), INT8_C( 26), INT8_C( 48), -INT8_C( 46), INT8_C( 31), -INT8_C( 68), INT8_C( 42), INT8_C( 114), INT8_C( 40), INT8_C( 50), -INT8_C( 71), INT8_C( 24), -INT8_C( 20), -INT8_C( 6), INT8_C( 70), INT8_C( 3), INT8_C( 100), -INT8_C( 43), INT8_C( 109), -INT8_C( 99), -INT8_C( 115), INT8_C( 101), INT8_C( 71), INT8_C( 102), -INT8_C( 43), INT8_C( 38), INT8_C( 41), -INT8_C( 4), -INT8_C( 18), INT8_C( 71), INT8_C( 35), INT8_C( 21) }, { INT8_C( 0), INT8_C( 13), INT8_C( 0), -INT8_C( 116), INT8_C( 18), INT8_C( 0), INT8_C( 106), -INT8_C( 68), INT8_C( 41), INT8_C( 0), INT8_C( 0), -INT8_C( 61), INT8_C( 46), INT8_C( 13), -INT8_C( 72), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 113), INT8_C( 0), INT8_C( 0), INT8_C( 102), -INT8_C( 72), INT8_C( 107), INT8_C( 45), -INT8_C( 86), INT8_C( 0), INT8_C( 0), -INT8_C( 116), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 43), -INT8_C( 44), -INT8_C( 88), INT8_C( 71), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 70), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 18), INT8_C( 0), INT8_C( 0), INT8_C( 39), INT8_C( 0), INT8_C( 0), INT8_C( 106), INT8_C( 46), -INT8_C( 113), INT8_C( 0), INT8_C( 0), -INT8_C( 86), INT8_C( 102), INT8_C( 0), INT8_C( 0), INT8_C( 117), INT8_C( 0), -INT8_C( 33) } }, { UINT64_C( 4031585949240021857), { INT8_C( 69), -INT8_C( 84), INT8_C( 80), INT8_C( 49), -INT8_C( 90), -INT8_C( 106), INT8_C( 52), INT8_C( 10), INT8_C( 107), -INT8_C( 95), -INT8_C( 88), -INT8_C( 7), INT8_C( 7), -INT8_C( 17), INT8_C( 95), -INT8_C( 36), INT8_C( 21), -INT8_C( 119), -INT8_C( 40), INT8_C( 3), -INT8_C( 48), -INT8_C( 5), INT8_C( 25), INT8_C( 49), INT8_C( 78), INT8_C( 1), -INT8_C( 78), INT8_C( 93), INT8_C( 19), -INT8_C( 91), -INT8_C( 107), INT8_C( 88), INT8_C( 82), -INT8_C( 27), -INT8_C( 118), -INT8_C( 8), INT8_C( 123), -INT8_C( 66), INT8_C( 3), -INT8_C( 25), INT8_C( 96), -INT8_C( 85), -INT8_C( 32), INT8_C( 103), -INT8_C( 102), INT8_C( 63), INT8_C( 67), -INT8_C( 81), -INT8_C( 56), INT8_C( 27), -INT8_C( 77), -INT8_C( 104), INT8_C( 22), -INT8_C( 52), -INT8_C( 54), INT8_C( 100), -INT8_C( 51), INT8_C( 124), -INT8_C( 63), -INT8_C( 32), INT8_C( 34), INT8_C( 86), INT8_C( 57), INT8_C( 116) }, { INT8_C( 59), -INT8_C( 61), INT8_C( 108), -INT8_C( 73), -INT8_C( 127), INT8_C( 111), -INT8_C( 98), -INT8_C( 31), INT8_C( 26), INT8_C( 126), INT8_C( 72), -INT8_C( 76), -INT8_C( 67), -INT8_C( 117), INT8_C( 100), -INT8_C( 122), -INT8_C( 90), INT8_C( 23), INT8_C( 30), -INT8_C( 68), -INT8_C( 29), -INT8_C( 24), INT8_C( 32), -INT8_C( 80), INT8_C( 101), -INT8_C( 30), -INT8_C( 112), -INT8_C( 121), INT8_C( 56), -INT8_C( 55), -INT8_C( 5), INT8_C( 116), -INT8_C( 116), INT8_C( 103), INT8_C( 43), INT8_C( 14), -INT8_C( 41), -INT8_C( 55), -INT8_C( 17), -INT8_C( 15), INT8_C( 71), INT8_C( 56), -INT8_C( 90), INT8_C( 4), -INT8_C( 61), INT8_C( 10), -INT8_C( 118), INT8_C( 106), INT8_C( 33), -INT8_C( 87), INT8_C( 38), INT8_C( 4), -INT8_C( 111), INT8_C( 71), -INT8_C( 76), -INT8_C( 10), INT8_C( 41), INT8_C( 68), INT8_C( 125), INT8_C( 97), INT8_C( 14), INT8_C( 120), -INT8_C( 43), -INT8_C( 102) }, { -INT8_C( 32), INT8_C( 0), -INT8_C( 88), -INT8_C( 73), -INT8_C( 55), -INT8_C( 104), -INT8_C( 88), INT8_C( 16), -INT8_C( 48), INT8_C( 78), INT8_C( 21), -INT8_C( 109), INT8_C( 88), -INT8_C( 97), -INT8_C( 3), INT8_C( 121), INT8_C( 72), INT8_C( 36), INT8_C( 125), -INT8_C( 38), INT8_C( 107), INT8_C( 49), -INT8_C( 48), -INT8_C( 108), INT8_C( 118), INT8_C( 78), -INT8_C( 11), -INT8_C( 124), -INT8_C( 58), -INT8_C( 53), INT8_C( 30), -INT8_C( 90), -INT8_C( 53), -INT8_C( 57), INT8_C( 93), -INT8_C( 107), INT8_C( 95), INT8_C( 6), -INT8_C( 91), INT8_C( 47), INT8_C( 84), -INT8_C( 70), -INT8_C( 62), -INT8_C( 83), INT8_C( 90), -INT8_C( 64), INT8_C( 38), -INT8_C( 94), -INT8_C( 28), -INT8_C( 92), INT8_C( 124), INT8_C( 79), -INT8_C( 43), INT8_C( 77), -INT8_C( 29), INT8_C( 75), -INT8_C( 101), -INT8_C( 40), -INT8_C( 49), INT8_C( 97), -INT8_C( 93), -INT8_C( 18), INT8_C( 8), INT8_C( 111) }, { -INT8_C( 32), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 94), -INT8_C( 107), INT8_C( 0), -INT8_C( 78), INT8_C( 8), INT8_C( 0), INT8_C( 0), INT8_C( 86), INT8_C( 0), INT8_C( 95), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 34), INT8_C( 0), INT8_C( 84), INT8_C( 82), -INT8_C( 56), INT8_C( 6), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 43), INT8_C( 7), INT8_C( 47), INT8_C( 103), INT8_C( 95), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 51), INT8_C( 0), INT8_C( 0), -INT8_C( 73), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 27), -INT8_C( 85), INT8_C( 0), INT8_C( 0), -INT8_C( 119), INT8_C( 16), INT8_C( 22), -INT8_C( 29), -INT8_C( 85), -INT8_C( 55), -INT8_C( 18), INT8_C( 0), INT8_C( 95), -INT8_C( 101), INT8_C( 0), INT8_C( 0) } }, { UINT64_C(13854103156084204981), { INT8_C( 100), INT8_C( 5), INT8_C( 109), -INT8_C( 66), -INT8_C( 59), -INT8_C( 109), INT8_C( 96), -INT8_C( 87), INT8_C( 55), -INT8_C( 35), -INT8_C( 8), INT8_C( 13), INT8_C( 42), -INT8_C( 37), INT8_C( 88), -INT8_C( 59), -INT8_C( 76), INT8_C( 40), INT8_C( 38), INT8_C( 87), INT8_C( 22), INT8_C( 46), -INT8_C( 58), -INT8_C( 53), -INT8_C( 108), -INT8_C( 54), -INT8_C( 33), -INT8_C( 1), INT8_C( 116), INT8_C( 34), -INT8_C( 65), -INT8_C( 40), INT8_C( 39), INT8_C( 44), -INT8_C( 106), -INT8_C( 19), -INT8_C( 64), -INT8_C( 10), -INT8_C( 106), -INT8_C( 9), -INT8_C( 45), -INT8_C( 113), INT8_C( 4), -INT8_C( 3), INT8_C( 106), INT8_C( 93), -INT8_C( 62), INT8_C( 30), -INT8_C( 123), -INT8_C( 23), INT8_C( 118), -INT8_C( 101), INT8_C( 23), INT8_C( 60), INT8_C( 102), -INT8_C( 85), INT8_C( 7), INT8_C( 69), -INT8_C( 85), INT8_C( 123), INT8_C( 103), INT8_C( 106), INT8_C( 83), -INT8_C( 114) }, { -INT8_C( 105), -INT8_C( 23), INT8_C( 123), INT8_C( 87), -INT8_C( 33), INT8_C( 18), INT8_C( 78), -INT8_C( 77), -INT8_C( 95), INT8_C( 83), -INT8_C( 80), INT8_C( 11), -INT8_C( 80), INT8_C( 115), INT8_C( 42), INT8_C( 53), INT8_C( 92), -INT8_C( 96), -INT8_C( 48), INT8_C( 115), -INT8_C( 36), INT8_C( 54), INT8_C( 31), -INT8_C( 29), INT8_C( 123), -INT8_C( 54), INT8_C( 94), -INT8_C( 30), INT8_C( 52), -INT8_C( 79), INT8_C( 112), -INT8_C( 53), -INT8_C( 102), -INT8_C( 20), INT8_C( 34), INT8_C( 122), -INT8_C( 2), INT8_C( 113), INT8_C( 45), -INT8_C( 97), -INT8_C( 60), -INT8_C( 35), -INT8_C( 86), INT8_C( 116), INT8_C( 80), -INT8_C( 44), -INT8_C( 87), -INT8_C( 84), INT8_C( 116), INT8_C( 121), INT8_C( 32), INT8_C( 81), -INT8_C( 81), INT8_C( 63), INT8_C( 52), INT8_C( 42), INT8_C( 9), -INT8_C( 109), INT8_C( 12), INT8_C( 61), INT8_C( 68), INT8_C( 124), INT8_C( 9), -INT8_C( 33) }, { INT8_C( 104), INT8_C( 43), INT8_C( 89), INT8_C( 102), -INT8_C( 100), -INT8_C( 122), INT8_C( 5), INT8_C( 96), INT8_C( 99), -INT8_C( 80), -INT8_C( 44), -INT8_C( 76), -INT8_C( 124), INT8_C( 125), INT8_C( 96), -INT8_C( 7), -INT8_C( 10), INT8_MIN, INT8_C( 74), -INT8_C( 91), -INT8_C( 65), INT8_C( 126), -INT8_C( 49), -INT8_C( 56), INT8_C( 17), -INT8_C( 37), INT8_C( 6), INT8_C( 86), INT8_C( 88), INT8_C( 15), INT8_C( 53), -INT8_C( 64), INT8_C( 58), -INT8_C( 114), INT8_C( 39), -INT8_C( 41), INT8_C( 20), INT8_C( 44), INT8_C( 55), INT8_C( 119), -INT8_C( 36), INT8_C( 12), INT8_C( 43), INT8_C( 97), -INT8_C( 119), -INT8_C( 116), INT8_C( 90), INT8_MIN, INT8_C( 12), -INT8_C( 92), INT8_C( 37), -INT8_C( 52), INT8_C( 34), -INT8_C( 11), -INT8_C( 108), INT8_C( 52), -INT8_C( 48), -INT8_C( 102), -INT8_C( 118), INT8_C( 40), -INT8_C( 87), -INT8_C( 65), -INT8_C( 23), -INT8_C( 28) }, { -INT8_C( 53), INT8_C( 0), INT8_C( 40), INT8_C( 0), -INT8_C( 64), INT8_C( 38), INT8_C( 0), -INT8_C( 101), INT8_C( 44), INT8_C( 0), -INT8_C( 123), INT8_C( 0), INT8_C( 0), -INT8_C( 52), INT8_C( 4), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 10), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 53), INT8_C( 0), INT8_C( 23), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 33), -INT8_C( 119), INT8_C( 0), -INT8_C( 118), INT8_C( 0), -INT8_C( 92), INT8_C( 93), INT8_C( 0), -INT8_C( 100), INT8_C( 0), INT8_C( 0), INT8_C( 34), INT8_C( 0), -INT8_C( 65), INT8_C( 0), INT8_C( 106), INT8_C( 34), -INT8_C( 102), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 23), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 35), -INT8_C( 64) } }, { UINT64_C( 1862504787069309005), { -INT8_C( 2), INT8_C( 4), INT8_C( 122), -INT8_C( 120), -INT8_C( 112), -INT8_C( 44), INT8_C( 8), -INT8_C( 100), INT8_C( 120), INT8_C( 45), INT8_C( 104), -INT8_C( 102), INT8_C( 34), -INT8_C( 3), -INT8_C( 50), -INT8_C( 13), -INT8_C( 105), INT8_C( 88), INT8_C( 27), INT8_C( 65), INT8_C( 23), INT8_C( 4), INT8_C( 37), INT8_C( 100), INT8_C( 20), -INT8_C( 32), -INT8_C( 59), INT8_C( 81), -INT8_C( 46), -INT8_C( 98), INT8_C( 106), -INT8_C( 47), -INT8_C( 94), -INT8_C( 28), INT8_C( 89), INT8_C( 50), -INT8_C( 72), INT8_C( 97), -INT8_C( 50), INT8_C( 48), -INT8_C( 114), INT8_C( 55), -INT8_C( 54), -INT8_C( 79), INT8_C( 52), -INT8_C( 103), -INT8_C( 92), -INT8_C( 53), -INT8_C( 15), -INT8_C( 65), INT8_C( 12), INT8_C( 9), -INT8_C( 60), INT8_C( 49), INT8_C( 109), -INT8_C( 40), INT8_C( 17), INT8_C( 51), INT8_C( 41), -INT8_C( 28), -INT8_C( 47), -INT8_C( 109), -INT8_C( 75), INT8_C( 115) }, { INT8_C( 119), INT8_C( 14), -INT8_C( 91), INT8_C( 47), INT8_C( 111), INT8_C( 115), INT8_C( 95), -INT8_C( 3), -INT8_C( 86), INT8_C( 42), -INT8_C( 82), -INT8_C( 34), -INT8_C( 61), INT8_C( 82), -INT8_C( 86), -INT8_C( 76), INT8_C( 18), -INT8_C( 74), -INT8_C( 67), -INT8_C( 42), -INT8_C( 24), INT8_C( 43), -INT8_C( 82), -INT8_C( 7), INT8_C( 94), -INT8_C( 40), -INT8_C( 35), INT8_C( 47), INT8_C( 107), -INT8_C( 110), -INT8_C( 94), -INT8_C( 29), -INT8_C( 96), INT8_C( 71), INT8_C( 18), INT8_C( 15), -INT8_C( 70), INT8_C( 114), INT8_C( 13), INT8_C( 101), -INT8_C( 100), -INT8_C( 69), INT8_C( 67), INT8_C( 95), INT8_C( 14), -INT8_C( 19), INT8_C( 19), INT8_C( 32), -INT8_C( 92), -INT8_C( 47), -INT8_C( 10), -INT8_C( 116), -INT8_C( 4), -INT8_C( 92), -INT8_C( 123), INT8_C( 90), INT8_C( 124), INT8_C( 99), -INT8_C( 119), -INT8_C( 24), -INT8_C( 11), INT8_C( 43), -INT8_C( 53), -INT8_C( 106) }, { INT8_C( 114), -INT8_C( 35), -INT8_C( 91), INT8_C( 44), INT8_C( 79), -INT8_C( 78), -INT8_C( 111), -INT8_C( 21), INT8_C( 110), -INT8_C( 43), INT8_C( 74), INT8_C( 124), -INT8_C( 62), INT8_C( 94), -INT8_C( 100), INT8_C( 102), INT8_C( 47), -INT8_C( 110), -INT8_C( 14), INT8_C( 43), INT8_C( 54), INT8_C( 120), -INT8_C( 123), -INT8_C( 77), -INT8_C( 37), INT8_C( 14), -INT8_C( 101), -INT8_C( 48), INT8_C( 57), INT8_C( 102), INT8_C( 102), -INT8_C( 85), INT8_C( 67), INT8_C( 12), -INT8_C( 41), -INT8_C( 109), -INT8_C( 66), INT8_C( 105), INT8_C( 126), INT8_C( 44), INT8_C( 62), -INT8_C( 55), -INT8_C( 88), INT8_C( 0), INT8_C( 39), INT8_C( 68), INT8_C( 103), INT8_C( 86), -INT8_C( 42), INT8_C( 89), -INT8_C( 127), INT8_C( 13), -INT8_C( 47), INT8_C( 6), -INT8_C( 64), -INT8_C( 84), INT8_C( 20), INT8_C( 91), INT8_C( 125), INT8_C( 77), -INT8_C( 63), -INT8_C( 29), -INT8_C( 8), INT8_C( 4) }, { -INT8_C( 84), INT8_C( 0), INT8_C( 97), -INT8_C( 53), INT8_C( 0), INT8_C( 0), -INT8_C( 85), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 44), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 27), INT8_C( 109), INT8_C( 0), -INT8_C( 123), INT8_C( 62), -INT8_C( 79), INT8_C( 0), INT8_C( 91), INT8_C( 102), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 27), INT8_C( 89), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 27), -INT8_C( 13), INT8_C( 41), -INT8_C( 127), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 28), INT8_C( 0), INT8_C( 0), -INT8_C( 50), INT8_C( 68), INT8_C( 65), -INT8_C( 94), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 34), -INT8_C( 63), INT8_C( 0), -INT8_C( 44), -INT8_C( 101), -INT8_C( 63), INT8_C( 0), INT8_C( 0), INT8_C( 62), INT8_C( 6), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { UINT64_C( 8564182074101583855), { -INT8_C( 33), -INT8_C( 125), INT8_C( 119), INT8_C( 6), -INT8_C( 57), -INT8_C( 34), INT8_C( 92), -INT8_C( 98), INT8_C( 55), -INT8_C( 35), -INT8_C( 85), INT8_C( 9), -INT8_C( 29), INT8_C( 107), -INT8_C( 75), -INT8_C( 9), -INT8_C( 58), INT8_C( 50), INT8_C( 68), -INT8_C( 121), INT8_C( 22), INT8_C( 60), -INT8_C( 117), INT8_C( 5), INT8_C( 11), INT8_C( 35), -INT8_C( 77), INT8_C( 68), INT8_C( 57), -INT8_C( 114), -INT8_C( 70), INT8_C( 24), INT8_C( 17), INT8_C( 49), INT8_C( 30), -INT8_C( 40), INT8_C( 15), INT8_C( 122), INT8_C( 118), INT8_C( 71), INT8_C( 87), INT8_C( 33), INT8_C( 80), INT8_C( 58), -INT8_C( 116), INT8_C( 5), INT8_C( 49), INT8_C( 82), INT8_C( 56), INT8_C( 117), -INT8_C( 39), INT8_C( 78), -INT8_C( 79), INT8_C( 101), INT8_C( 83), -INT8_C( 68), -INT8_C( 120), INT8_C( 7), INT8_C( 0), -INT8_C( 63), -INT8_C( 107), -INT8_C( 69), -INT8_C( 39), -INT8_C( 90) }, { -INT8_C( 20), -INT8_C( 9), INT8_C( 126), -INT8_C( 4), INT8_C( 113), -INT8_C( 11), INT8_C( 67), -INT8_C( 56), INT8_C( 22), -INT8_C( 109), INT8_C( 2), -INT8_C( 93), -INT8_C( 104), INT8_C( 51), -INT8_C( 11), -INT8_C( 48), -INT8_C( 88), -INT8_C( 49), INT8_C( 30), INT8_C( 89), INT8_C( 52), INT8_C( 114), INT8_C( 21), -INT8_C( 68), INT8_C( 121), INT8_C( 22), INT8_C( 125), INT8_C( 14), -INT8_C( 47), INT8_C( 86), -INT8_C( 76), -INT8_C( 67), INT8_C( 77), INT8_C( 50), -INT8_C( 71), -INT8_C( 66), INT8_C( 39), -INT8_C( 4), -INT8_C( 122), INT8_C( 62), -INT8_C( 113), -INT8_C( 120), -INT8_C( 31), INT8_C( 40), -INT8_C( 69), -INT8_C( 42), -INT8_C( 8), INT8_C( 99), -INT8_C( 91), INT8_C( 23), -INT8_C( 68), -INT8_C( 39), -INT8_C( 119), -INT8_C( 47), -INT8_C( 107), INT8_C( 2), -INT8_C( 25), INT8_C( 18), INT8_C( 16), -INT8_C( 72), INT8_C( 104), -INT8_C( 60), INT8_C( 118), -INT8_C( 75) }, { -INT8_C( 10), INT8_C( 47), INT8_C( 115), INT8_C( 30), INT8_C( 44), -INT8_C( 7), INT8_C( 92), -INT8_C( 69), -INT8_C( 127), INT8_C( 61), -INT8_C( 29), INT8_C( 60), INT8_C( 19), -INT8_C( 36), -INT8_C( 97), -INT8_C( 71), -INT8_C( 13), INT8_C( 91), -INT8_C( 110), INT8_C( 124), INT8_C( 45), INT8_C( 40), INT8_C( 126), INT8_C( 20), INT8_C( 58), -INT8_C( 114), -INT8_C( 51), -INT8_C( 93), INT8_C( 82), INT8_C( 67), INT8_C( 88), INT8_C( 72), INT8_C( 114), -INT8_C( 52), INT8_C( 102), -INT8_C( 98), -INT8_C( 59), -INT8_C( 62), INT8_C( 90), INT8_C( 71), -INT8_C( 1), INT8_C( 61), -INT8_C( 125), INT8_C( 19), INT8_C( 25), INT8_C( 35), -INT8_C( 52), INT8_C( 12), INT8_C( 126), INT8_C( 94), -INT8_C( 120), -INT8_C( 85), -INT8_C( 122), INT8_C( 6), -INT8_C( 64), -INT8_C( 63), -INT8_C( 108), -INT8_C( 115), INT8_C( 100), -INT8_C( 26), -INT8_C( 48), -INT8_C( 68), INT8_C( 47), INT8_C( 66) }, { INT8_C( 25), -INT8_C( 63), INT8_C( 47), -INT8_C( 48), INT8_C( 0), INT8_C( 6), INT8_C( 30), -INT8_C( 127), -INT8_C( 117), -INT8_C( 121), INT8_C( 119), -INT8_C( 40), INT8_C( 0), INT8_C( 0), INT8_C( 6), -INT8_C( 13), INT8_C( 87), -INT8_C( 71), -INT8_C( 70), INT8_C( 0), -INT8_C( 79), INT8_C( 0), INT8_C( 0), -INT8_C( 107), INT8_C( 0), -INT8_C( 117), -INT8_C( 68), -INT8_C( 75), INT8_C( 0), INT8_C( 126), INT8_C( 0), -INT8_C( 69), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 39), INT8_C( 71), -INT8_C( 48), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 55), -INT8_C( 52), INT8_C( 0), -INT8_C( 63), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 5), INT8_C( 0), -INT8_C( 114), -INT8_C( 35), INT8_C( 0), INT8_C( 60), INT8_C( 119), INT8_C( 0), INT8_C( 68), -INT8_C( 58), INT8_C( 0), -INT8_C( 1), INT8_C( 44), -INT8_C( 64), INT8_C( 0) } }, { UINT64_C( 6311015703296185736), { INT8_C( 120), INT8_C( 24), INT8_C( 106), -INT8_C( 110), INT8_C( 59), INT8_C( 54), -INT8_C( 98), -INT8_C( 70), -INT8_C( 107), INT8_C( 39), INT8_C( 101), INT8_C( 27), INT8_C( 45), INT8_C( 37), -INT8_C( 36), -INT8_C( 62), -INT8_C( 78), INT8_C( 64), -INT8_C( 88), -INT8_C( 126), -INT8_C( 3), -INT8_C( 41), -INT8_C( 59), -INT8_C( 123), INT8_C( 109), -INT8_C( 90), -INT8_C( 45), -INT8_C( 59), -INT8_C( 31), INT8_C( 104), INT8_C( 28), INT8_C( 89), -INT8_C( 127), -INT8_C( 121), -INT8_C( 21), -INT8_C( 68), -INT8_C( 67), -INT8_C( 118), INT8_C( 118), INT8_C( 82), -INT8_C( 79), -INT8_C( 36), INT8_C( 110), -INT8_C( 34), INT8_C( 1), INT8_C( 74), -INT8_C( 96), -INT8_C( 76), -INT8_C( 117), INT8_C( 73), INT8_C( 54), -INT8_C( 120), INT8_C( 32), -INT8_C( 5), INT8_C( 13), -INT8_C( 115), -INT8_C( 95), -INT8_C( 31), INT8_C( 82), -INT8_C( 126), INT8_C( 73), INT8_C( 111), -INT8_C( 36), -INT8_C( 54) }, { -INT8_C( 10), -INT8_C( 57), -INT8_C( 121), -INT8_C( 77), INT8_C( 81), -INT8_C( 3), INT8_C( 6), INT8_C( 2), -INT8_C( 39), INT8_C( 116), -INT8_C( 31), -INT8_C( 37), -INT8_C( 66), -INT8_C( 127), -INT8_C( 113), INT8_C( 73), -INT8_C( 54), -INT8_C( 59), -INT8_C( 47), -INT8_C( 21), -INT8_C( 63), -INT8_C( 33), INT8_C( 120), INT8_C( 98), -INT8_C( 64), -INT8_C( 53), -INT8_C( 27), INT8_C( 9), INT8_C( 58), -INT8_C( 63), -INT8_C( 44), INT8_C( 48), -INT8_C( 120), INT8_C( 91), -INT8_C( 29), -INT8_C( 38), INT8_C( 88), -INT8_C( 23), -INT8_C( 36), INT8_C( 50), INT8_C( 93), -INT8_C( 67), INT8_C( 13), INT8_C( 28), INT8_C( 63), -INT8_C( 100), INT8_C( 101), INT8_C( 9), INT8_C( 97), INT8_C( 55), -INT8_C( 12), INT8_C( 34), INT8_C( 22), INT8_C( 109), -INT8_C( 123), -INT8_C( 42), INT8_C( 56), INT8_C( 106), -INT8_C( 33), INT8_C( 114), INT8_C( 43), -INT8_C( 77), -INT8_C( 94), -INT8_C( 77) }, { INT8_C( 14), -INT8_C( 123), -INT8_C( 115), INT8_C( 103), INT8_C( 111), INT8_C( 106), -INT8_C( 103), -INT8_C( 52), INT8_C( 39), -INT8_C( 90), -INT8_C( 24), INT8_C( 102), INT8_C( 66), INT8_C( 78), INT8_C( 112), -INT8_C( 93), -INT8_C( 123), INT8_C( 100), -INT8_C( 58), -INT8_C( 101), -INT8_C( 47), INT8_C( 75), INT8_C( 113), INT8_C( 9), -INT8_C( 75), INT8_C( 80), INT8_C( 123), -INT8_C( 32), INT8_C( 4), INT8_C( 29), -INT8_C( 109), INT8_C( 18), -INT8_C( 93), INT8_C( 33), INT8_C( 121), INT8_C( 18), -INT8_C( 117), INT8_C( 18), -INT8_C( 34), -INT8_C( 78), -INT8_C( 72), -INT8_C( 57), INT8_C( 25), -INT8_C( 6), INT8_C( 21), -INT8_C( 119), -INT8_C( 98), -INT8_C( 102), -INT8_C( 19), INT8_C( 100), INT8_C( 53), -INT8_C( 65), -INT8_C( 81), -INT8_C( 90), -INT8_C( 56), INT8_C( 100), -INT8_C( 10), INT8_C( 68), INT8_C( 68), -INT8_C( 6), INT8_C( 97), -INT8_C( 41), INT8_C( 13), INT8_C( 4) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 120), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 106), INT8_C( 80), INT8_C( 0), INT8_C( 33), INT8_C( 0), -INT8_C( 36), INT8_C( 0), INT8_C( 0), -INT8_C( 90), -INT8_C( 24), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 18), -INT8_C( 10), INT8_C( 121), INT8_C( 0), INT8_C( 102), INT8_C( 18), INT8_C( 39), INT8_C( 0), INT8_C( 0), -INT8_C( 47), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 123), -INT8_C( 75), INT8_C( 0), INT8_C( 4), INT8_C( 0), INT8_C( 29), INT8_C( 111), INT8_C( 0), -INT8_C( 31), -INT8_C( 54), -INT8_C( 31), INT8_C( 0), INT8_C( 0), INT8_C( 33), INT8_C( 0), -INT8_C( 81), INT8_C( 0), -INT8_C( 59), INT8_C( 0), INT8_C( 0), INT8_C( 113), -INT8_C( 95), INT8_C( 25), INT8_C( 18), INT8_C( 0), -INT8_C( 34), INT8_C( 0), -INT8_C( 21), INT8_C( 0) } }, { UINT64_C( 5852134805492631288), { -INT8_C( 68), INT8_C( 79), INT8_C( 76), -INT8_C( 47), -INT8_C( 40), -INT8_C( 22), INT8_C( 107), -INT8_C( 59), INT8_C( 78), -INT8_C( 96), -INT8_C( 124), -INT8_C( 3), INT8_C( 70), INT8_C( 77), INT8_C( 97), INT8_C( 60), -INT8_C( 111), -INT8_C( 91), INT8_C( 55), -INT8_C( 14), INT8_C( 124), INT8_C( 68), -INT8_C( 9), INT8_C( 117), -INT8_C( 54), INT8_C( 13), -INT8_C( 8), INT8_C( 99), INT8_C( 2), INT8_C( 46), -INT8_C( 75), -INT8_C( 66), INT8_C( 125), INT8_C( 1), -INT8_C( 113), INT8_C( 85), -INT8_C( 21), -INT8_C( 6), INT8_C( 27), INT8_C( 57), -INT8_C( 102), -INT8_C( 97), INT8_C( 54), -INT8_C( 32), -INT8_C( 20), -INT8_C( 105), INT8_C( 29), INT8_C( 125), INT8_C( 60), INT8_C( 84), INT8_C( 112), -INT8_C( 72), -INT8_C( 104), INT8_C( 103), INT8_C( 45), INT8_C( 98), INT8_C( 116), INT8_C( 38), -INT8_C( 58), INT8_C( 119), INT8_C( 84), INT8_C( 123), INT8_C( 53), -INT8_C( 46) }, { INT8_C( 124), -INT8_C( 59), INT8_C( 39), INT8_C( 103), -INT8_C( 65), INT8_C( 66), -INT8_C( 96), INT8_C( 90), -INT8_C( 30), -INT8_C( 42), INT8_C( 58), -INT8_C( 50), INT8_C( 109), INT8_C( 87), INT8_C( 76), -INT8_C( 87), -INT8_C( 85), -INT8_C( 68), INT8_C( 97), INT8_C( 67), INT8_C( 35), -INT8_C( 113), -INT8_C( 90), -INT8_C( 105), -INT8_C( 75), INT8_C( 108), INT8_C( 14), INT8_C( 9), -INT8_C( 25), INT8_C( 68), -INT8_C( 37), INT8_C( 99), INT8_C( 9), INT8_C( 3), -INT8_C( 54), -INT8_C( 56), INT8_C( 69), INT8_C( 106), INT8_C( 34), INT8_C( 39), INT8_C( 64), INT8_C( 93), -INT8_C( 10), -INT8_C( 83), -INT8_C( 76), INT8_C( 66), INT8_C( 86), INT8_C( 96), -INT8_C( 2), -INT8_C( 73), -INT8_C( 93), INT8_C( 33), INT8_C( 70), INT8_C( 73), -INT8_C( 72), -INT8_C( 5), -INT8_C( 75), -INT8_C( 57), INT8_C( 5), -INT8_C( 100), INT8_C( 11), -INT8_C( 32), -INT8_C( 1), INT8_C( 20) }, { -INT8_C( 29), -INT8_C( 55), -INT8_C( 36), INT8_C( 41), INT8_C( 51), -INT8_C( 1), INT8_C( 80), INT8_C( 115), INT8_C( 92), INT8_C( 70), INT8_C( 32), INT8_C( 16), -INT8_C( 120), INT8_C( 118), INT8_C( 112), -INT8_C( 122), INT8_C( 46), INT8_C( 20), -INT8_C( 89), INT8_C( 116), INT8_C( 93), INT8_C( 96), INT8_C( 112), INT8_C( 19), INT8_C( 39), INT8_C( 117), -INT8_C( 81), INT8_C( 50), INT8_C( 85), -INT8_C( 81), INT8_C( 70), INT8_C( 57), INT8_C( 120), INT8_C( 34), INT8_C( 98), -INT8_C( 84), INT8_C( 33), -INT8_C( 78), INT8_C( 31), INT8_C( 125), -INT8_C( 7), INT8_C( 64), -INT8_C( 114), -INT8_C( 127), -INT8_C( 74), -INT8_C( 2), INT8_C( 8), -INT8_C( 28), INT8_C( 18), -INT8_C( 81), INT8_C( 89), INT8_C( 112), INT8_C( 15), -INT8_C( 55), -INT8_C( 125), INT8_C( 54), INT8_C( 62), INT8_C( 50), INT8_C( 104), -INT8_C( 109), -INT8_C( 31), -INT8_C( 82), -INT8_C( 52), INT8_C( 90) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 125), -INT8_C( 46), -INT8_C( 36), INT8_C( 125), -INT8_C( 81), INT8_C( 0), INT8_C( 112), -INT8_C( 58), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 97), INT8_C( 0), INT8_C( 84), INT8_C( 34), INT8_C( 0), INT8_C( 85), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 103), -INT8_C( 74), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 84), -INT8_C( 96), INT8_C( 0), INT8_C( 0), INT8_C( 92), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 57), -INT8_C( 29), INT8_C( 0), -INT8_C( 125), INT8_C( 0), -INT8_C( 104), -INT8_C( 36), INT8_C( 112), INT8_C( 120), INT8_C( 0), INT8_C( 98), INT8_C( 85), INT8_C( 0), INT8_C( 80), INT8_C( 70), INT8_C( 0), INT8_C( 0), INT8_C( 103), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 3), INT8_C( 0), INT8_C( 90), INT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i idx = simde_mm512_loadu_epi8(test_vec[i].idx); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_maskz_permutex2var_epi8(test_vec[i].k, a, idx, b); simde_test_x86_assert_equal_i8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask64 k = simde_test_x86_random_mmask64(); simde__m512i a = simde_test_x86_random_i8x64(); simde__m512i idx = simde_test_x86_random_i8x64(); simde__m512i b = simde_test_x86_random_i8x64(); simde__m512i r = simde_mm512_maskz_permutex2var_epi8(k, a, idx, b); simde_test_x86_write_mmask64(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x64(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x64(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_permutex2var_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d a; simde__m512i idx; simde__m512d b; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( 799.30), SIMDE_FLOAT64_C( -576.01), SIMDE_FLOAT64_C( 439.15), SIMDE_FLOAT64_C( -28.15), SIMDE_FLOAT64_C( 481.25), SIMDE_FLOAT64_C( -784.26), SIMDE_FLOAT64_C( 549.03), SIMDE_FLOAT64_C( 582.53)), simde_mm512_set_epi64(INT64_C(-8115609027568940125), INT64_C( 3504612124823893047), INT64_C(-7514888466798804666), INT64_C( 9113506312589344178), INT64_C(-5948029684411535130), INT64_C(-4862378680423071053), INT64_C( 7355766231574189317), INT64_C( -310150959079746096)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -563.83), SIMDE_FLOAT64_C( 938.85), SIMDE_FLOAT64_C( -465.05), SIMDE_FLOAT64_C( -104.57), SIMDE_FLOAT64_C( -431.26), SIMDE_FLOAT64_C( -57.75), SIMDE_FLOAT64_C( 438.04), SIMDE_FLOAT64_C( 729.46)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 481.25), SIMDE_FLOAT64_C( 799.30), SIMDE_FLOAT64_C( -576.01), SIMDE_FLOAT64_C( -784.26), SIMDE_FLOAT64_C( -576.01), SIMDE_FLOAT64_C( 481.25), SIMDE_FLOAT64_C( 439.15), SIMDE_FLOAT64_C( 582.53)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -435.27), SIMDE_FLOAT64_C( 298.40), SIMDE_FLOAT64_C( -94.60), SIMDE_FLOAT64_C( 260.50), SIMDE_FLOAT64_C( -69.45), SIMDE_FLOAT64_C( 95.61), SIMDE_FLOAT64_C( -688.01), SIMDE_FLOAT64_C( 931.17)), simde_mm512_set_epi64(INT64_C(-3094219001013742557), INT64_C(-3379016320921474793), INT64_C( 7772273849745001049), INT64_C(-4229480058937372017), INT64_C( 8318730560275653847), INT64_C(-2664412856586094061), INT64_C( 2083707536546841162), INT64_C( 5404230241318444880)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 193.01), SIMDE_FLOAT64_C( -84.06), SIMDE_FLOAT64_C( 208.07), SIMDE_FLOAT64_C( 834.28), SIMDE_FLOAT64_C( -859.51), SIMDE_FLOAT64_C( 40.36), SIMDE_FLOAT64_C( -743.10), SIMDE_FLOAT64_C( 442.76)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -69.45), SIMDE_FLOAT64_C( -435.27), SIMDE_FLOAT64_C( -743.10), SIMDE_FLOAT64_C( 193.01), SIMDE_FLOAT64_C( -435.27), SIMDE_FLOAT64_C( -69.45), SIMDE_FLOAT64_C( 40.36), SIMDE_FLOAT64_C( 931.17)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -832.50), SIMDE_FLOAT64_C( 438.46), SIMDE_FLOAT64_C( 336.35), SIMDE_FLOAT64_C( 99.21), SIMDE_FLOAT64_C( -38.88), SIMDE_FLOAT64_C( 218.73), SIMDE_FLOAT64_C( -966.90), SIMDE_FLOAT64_C( -737.78)), simde_mm512_set_epi64(INT64_C( 7563354526679147255), INT64_C(-6952412028107066884), INT64_C(-3077616107881632928), INT64_C(-2220298267656761827), INT64_C( 2522518958303333112), INT64_C(-1668307566098600867), INT64_C( 8306832211054389426), INT64_C(-4135341282024622606)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 141.08), SIMDE_FLOAT64_C( -990.15), SIMDE_FLOAT64_C( -887.47), SIMDE_FLOAT64_C( -396.24), SIMDE_FLOAT64_C( -2.60), SIMDE_FLOAT64_C( 165.88), SIMDE_FLOAT64_C( 375.27), SIMDE_FLOAT64_C( -512.98)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -832.50), SIMDE_FLOAT64_C( -396.24), SIMDE_FLOAT64_C( -737.78), SIMDE_FLOAT64_C( -887.47), SIMDE_FLOAT64_C( -512.98), SIMDE_FLOAT64_C( -887.47), SIMDE_FLOAT64_C( 218.73), SIMDE_FLOAT64_C( 218.73)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -211.73), SIMDE_FLOAT64_C( 908.31), SIMDE_FLOAT64_C( -144.11), SIMDE_FLOAT64_C( -343.92), SIMDE_FLOAT64_C( 961.65), SIMDE_FLOAT64_C( 754.42), SIMDE_FLOAT64_C( -432.97), SIMDE_FLOAT64_C( 164.52)), simde_mm512_set_epi64(INT64_C( 135432210503006619), INT64_C(-7059566968128636366), INT64_C(-1295026765047609725), INT64_C( 5447800525707046939), INT64_C( 1419500527032411112), INT64_C( 5424087511148175828), INT64_C(-4780701435803039630), INT64_C( 6069825193561024149)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 254.48), SIMDE_FLOAT64_C( 755.70), SIMDE_FLOAT64_C( -363.93), SIMDE_FLOAT64_C( 789.10), SIMDE_FLOAT64_C( 344.74), SIMDE_FLOAT64_C( 652.93), SIMDE_FLOAT64_C( 184.91), SIMDE_FLOAT64_C( -455.33)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 344.74), SIMDE_FLOAT64_C( 754.42), SIMDE_FLOAT64_C( 961.65), SIMDE_FLOAT64_C( 344.74), SIMDE_FLOAT64_C( -455.33), SIMDE_FLOAT64_C( -343.92), SIMDE_FLOAT64_C( 754.42), SIMDE_FLOAT64_C( -144.11)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 358.04), SIMDE_FLOAT64_C( -974.37), SIMDE_FLOAT64_C( -342.76), SIMDE_FLOAT64_C( -698.47), SIMDE_FLOAT64_C( 146.15), SIMDE_FLOAT64_C( 360.04), SIMDE_FLOAT64_C( 134.15), SIMDE_FLOAT64_C( -376.86)), simde_mm512_set_epi64(INT64_C(-7778482448656032654), INT64_C(-7388935565641111344), INT64_C( 2154583157079273400), INT64_C( 4649728279138736034), INT64_C( 1896125478609903946), INT64_C( 6795120210135498653), INT64_C(-8532964392806396349), INT64_C(-8044512602622188161)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 197.25), SIMDE_FLOAT64_C( 348.19), SIMDE_FLOAT64_C( 713.78), SIMDE_FLOAT64_C( -632.29), SIMDE_FLOAT64_C( -382.22), SIMDE_FLOAT64_C( -320.26), SIMDE_FLOAT64_C( -199.21), SIMDE_FLOAT64_C( -764.34)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 360.04), SIMDE_FLOAT64_C( -376.86), SIMDE_FLOAT64_C( -764.34), SIMDE_FLOAT64_C( 360.04), SIMDE_FLOAT64_C( -320.26), SIMDE_FLOAT64_C( 713.78), SIMDE_FLOAT64_C( 146.15), SIMDE_FLOAT64_C( 197.25)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 374.01), SIMDE_FLOAT64_C( -829.82), SIMDE_FLOAT64_C( -372.09), SIMDE_FLOAT64_C( -693.82), SIMDE_FLOAT64_C( 763.12), SIMDE_FLOAT64_C( 797.99), SIMDE_FLOAT64_C( 291.55), SIMDE_FLOAT64_C( -93.41)), simde_mm512_set_epi64(INT64_C( 2330675318709913935), INT64_C( 8512876982035459145), INT64_C( -754385814369639096), INT64_C(-5873166547629617678), INT64_C(-8313572030703954107), INT64_C( 8197482817575228316), INT64_C( 7734796813438689885), INT64_C(-7308014241195865956)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -65.54), SIMDE_FLOAT64_C( 852.70), SIMDE_FLOAT64_C( 316.87), SIMDE_FLOAT64_C( 303.57), SIMDE_FLOAT64_C( 151.19), SIMDE_FLOAT64_C( 17.85), SIMDE_FLOAT64_C( 280.96), SIMDE_FLOAT64_C( -966.13)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -65.54), SIMDE_FLOAT64_C( 280.96), SIMDE_FLOAT64_C( -966.13), SIMDE_FLOAT64_C( 797.99), SIMDE_FLOAT64_C( -372.09), SIMDE_FLOAT64_C( 303.57), SIMDE_FLOAT64_C( 316.87), SIMDE_FLOAT64_C( 303.57)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 192.60), SIMDE_FLOAT64_C( -766.97), SIMDE_FLOAT64_C( -268.82), SIMDE_FLOAT64_C( 379.62), SIMDE_FLOAT64_C( 889.99), SIMDE_FLOAT64_C( 778.00), SIMDE_FLOAT64_C( 136.45), SIMDE_FLOAT64_C( -745.44)), simde_mm512_set_epi64(INT64_C(-5841170484671274964), INT64_C(-3543508984518726324), INT64_C( 3010243785522476560), INT64_C(-1544427551515059156), INT64_C(-6824560769606762223), INT64_C( 5419183893290153987), INT64_C(-6963147146169039734), INT64_C( 796151810828840161)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 79.71), SIMDE_FLOAT64_C( -761.86), SIMDE_FLOAT64_C( 699.75), SIMDE_FLOAT64_C( -94.00), SIMDE_FLOAT64_C( 852.56), SIMDE_FLOAT64_C( -583.72), SIMDE_FLOAT64_C( 64.51), SIMDE_FLOAT64_C( 598.44)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -94.00), SIMDE_FLOAT64_C( -94.00), SIMDE_FLOAT64_C( -745.44), SIMDE_FLOAT64_C( -94.00), SIMDE_FLOAT64_C( 136.45), SIMDE_FLOAT64_C( 889.99), SIMDE_FLOAT64_C( -583.72), SIMDE_FLOAT64_C( 136.45)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 910.96), SIMDE_FLOAT64_C( 161.71), SIMDE_FLOAT64_C( 998.55), SIMDE_FLOAT64_C( -146.06), SIMDE_FLOAT64_C( -469.94), SIMDE_FLOAT64_C( 728.29), SIMDE_FLOAT64_C( -146.78), SIMDE_FLOAT64_C( -162.79)), simde_mm512_set_epi64(INT64_C( 7017505342718424326), INT64_C( 7336144214089986511), INT64_C(-7372811814600044225), INT64_C( 3369678464815021090), INT64_C(-3624005186939786961), INT64_C( 3053727983599056138), INT64_C( 1044677486690381786), INT64_C( 7246985488966277586)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -157.21), SIMDE_FLOAT64_C( 742.16), SIMDE_FLOAT64_C( -664.02), SIMDE_FLOAT64_C( -706.70), SIMDE_FLOAT64_C( -128.86), SIMDE_FLOAT64_C( -767.59), SIMDE_FLOAT64_C( -790.53), SIMDE_FLOAT64_C( 953.27)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 161.71), SIMDE_FLOAT64_C( -157.21), SIMDE_FLOAT64_C( -157.21), SIMDE_FLOAT64_C( 728.29), SIMDE_FLOAT64_C( -157.21), SIMDE_FLOAT64_C( -767.59), SIMDE_FLOAT64_C( -767.59), SIMDE_FLOAT64_C( 728.29)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_permutex2var_pd(test_vec[i].a, test_vec[i].idx, test_vec[i].b); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_permutex2var_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d a; simde__mmask8 k; simde__m512i idx; simde__m512d b; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( 160.98), SIMDE_FLOAT64_C( 391.82), SIMDE_FLOAT64_C( -569.99), SIMDE_FLOAT64_C( -327.63), SIMDE_FLOAT64_C( -172.36), SIMDE_FLOAT64_C( 393.53), SIMDE_FLOAT64_C( 36.69), SIMDE_FLOAT64_C( -135.52)), UINT8_C( 63), simde_mm512_set_epi64(INT64_C(-4767707706458520415), INT64_C(-7083171014951853588), INT64_C(-2076988212358998594), INT64_C( 3698505898575972461), INT64_C(-8586603972668500699), INT64_C( 7848938818320954984), INT64_C(-7002500096438875648), INT64_C( 1477571573764517782)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 522.06), SIMDE_FLOAT64_C( -932.28), SIMDE_FLOAT64_C( 600.12), SIMDE_FLOAT64_C( -491.12), SIMDE_FLOAT64_C( -139.11), SIMDE_FLOAT64_C( -268.86), SIMDE_FLOAT64_C( -71.72), SIMDE_FLOAT64_C( 98.47)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 160.98), SIMDE_FLOAT64_C( 391.82), SIMDE_FLOAT64_C( -932.28), SIMDE_FLOAT64_C( 600.12), SIMDE_FLOAT64_C( -569.99), SIMDE_FLOAT64_C( 98.47), SIMDE_FLOAT64_C( -135.52), SIMDE_FLOAT64_C( 391.82)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 571.79), SIMDE_FLOAT64_C( 675.92), SIMDE_FLOAT64_C( 490.41), SIMDE_FLOAT64_C( 47.59), SIMDE_FLOAT64_C( -895.71), SIMDE_FLOAT64_C( -736.92), SIMDE_FLOAT64_C( 283.06), SIMDE_FLOAT64_C( -333.94)), UINT8_C(251), simde_mm512_set_epi64(INT64_C( 7391005387705442660), INT64_C(-5091463632259113685), INT64_C( 685405269785004780), INT64_C(-2602517860068074949), INT64_C( 4704994953943345443), INT64_C( 8877610218385468208), INT64_C( 5776984527519295337), INT64_C( 6526937450820584225)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -203.04), SIMDE_FLOAT64_C( -80.71), SIMDE_FLOAT64_C( 632.01), SIMDE_FLOAT64_C( 456.89), SIMDE_FLOAT64_C( 51.33), SIMDE_FLOAT64_C( -868.59), SIMDE_FLOAT64_C( -921.00), SIMDE_FLOAT64_C( -471.60)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 47.59), SIMDE_FLOAT64_C( 51.33), SIMDE_FLOAT64_C( 456.89), SIMDE_FLOAT64_C( 51.33), SIMDE_FLOAT64_C( -895.71), SIMDE_FLOAT64_C( -736.92), SIMDE_FLOAT64_C( -921.00), SIMDE_FLOAT64_C( 283.06)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 670.52), SIMDE_FLOAT64_C( 149.72), SIMDE_FLOAT64_C( 213.24), SIMDE_FLOAT64_C( -577.36), SIMDE_FLOAT64_C( 957.37), SIMDE_FLOAT64_C( -934.92), SIMDE_FLOAT64_C( -657.02), SIMDE_FLOAT64_C( -629.37)), UINT8_C( 19), simde_mm512_set_epi64(INT64_C( 1455933536394832297), INT64_C( -840828676201867557), INT64_C( 6310833464661060096), INT64_C( 8429580363859954742), INT64_C( 5406280044045291975), INT64_C(-2284955492954404973), INT64_C(-3603637419527123210), INT64_C(-1117409850830928520)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 586.29), SIMDE_FLOAT64_C( -760.88), SIMDE_FLOAT64_C( -617.12), SIMDE_FLOAT64_C( -751.58), SIMDE_FLOAT64_C( 907.23), SIMDE_FLOAT64_C( -359.60), SIMDE_FLOAT64_C( -213.75), SIMDE_FLOAT64_C( 403.00)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 670.52), SIMDE_FLOAT64_C( 149.72), SIMDE_FLOAT64_C( 213.24), SIMDE_FLOAT64_C( 149.72), SIMDE_FLOAT64_C( 957.37), SIMDE_FLOAT64_C( -934.92), SIMDE_FLOAT64_C( 149.72), SIMDE_FLOAT64_C( 403.00)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 918.52), SIMDE_FLOAT64_C( -422.76), SIMDE_FLOAT64_C( -433.33), SIMDE_FLOAT64_C( 48.49), SIMDE_FLOAT64_C( 799.57), SIMDE_FLOAT64_C( -820.22), SIMDE_FLOAT64_C( -959.11), SIMDE_FLOAT64_C( 268.99)), UINT8_C(132), simde_mm512_set_epi64(INT64_C(-4670296842224865750), INT64_C(-8736438908262001915), INT64_C(-1516874692875012272), INT64_C( 2654080637722702840), INT64_C(-4444585746033374017), INT64_C( 7620312646179506248), INT64_C(-4730811392556899367), INT64_C(-7555925455226975890)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -663.74), SIMDE_FLOAT64_C( 179.29), SIMDE_FLOAT64_C( 989.70), SIMDE_FLOAT64_C( -695.21), SIMDE_FLOAT64_C( -786.23), SIMDE_FLOAT64_C( 873.30), SIMDE_FLOAT64_C( 241.45), SIMDE_FLOAT64_C( -432.13)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 873.30), SIMDE_FLOAT64_C( -422.76), SIMDE_FLOAT64_C( -433.33), SIMDE_FLOAT64_C( 48.49), SIMDE_FLOAT64_C( 799.57), SIMDE_FLOAT64_C( -432.13), SIMDE_FLOAT64_C( -959.11), SIMDE_FLOAT64_C( 268.99)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -904.81), SIMDE_FLOAT64_C( 50.50), SIMDE_FLOAT64_C( -282.31), SIMDE_FLOAT64_C( -656.10), SIMDE_FLOAT64_C( -789.54), SIMDE_FLOAT64_C( -790.16), SIMDE_FLOAT64_C( -415.61), SIMDE_FLOAT64_C( 994.61)), UINT8_C( 37), simde_mm512_set_epi64(INT64_C( 2001511420457827007), INT64_C( 8280910196874944184), INT64_C( -368934386460614235), INT64_C(-9011857488067354178), INT64_C( 5578921037540219940), INT64_C(-7880186302232587827), INT64_C( 7848707034806784644), INT64_C(-7246117184140796511)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 280.87), SIMDE_FLOAT64_C( 380.83), SIMDE_FLOAT64_C( -236.67), SIMDE_FLOAT64_C( -211.91), SIMDE_FLOAT64_C( -925.76), SIMDE_FLOAT64_C( -915.62), SIMDE_FLOAT64_C( -30.05), SIMDE_FLOAT64_C( -70.79)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -904.81), SIMDE_FLOAT64_C( 50.50), SIMDE_FLOAT64_C( -282.31), SIMDE_FLOAT64_C( -656.10), SIMDE_FLOAT64_C( -789.54), SIMDE_FLOAT64_C( -236.67), SIMDE_FLOAT64_C( -415.61), SIMDE_FLOAT64_C( -415.61)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -318.40), SIMDE_FLOAT64_C( -844.73), SIMDE_FLOAT64_C( 757.65), SIMDE_FLOAT64_C( 677.41), SIMDE_FLOAT64_C( -352.24), SIMDE_FLOAT64_C( -479.79), SIMDE_FLOAT64_C( 602.83), SIMDE_FLOAT64_C( 2.55)), UINT8_C(218), simde_mm512_set_epi64(INT64_C( 2056379472574346663), INT64_C(-3486865648830471282), INT64_C( 8151787653682140580), INT64_C( -831601358278995789), INT64_C(-2800664419916301039), INT64_C( 3280702275774868225), INT64_C(-4735905134864699368), INT64_C( 7051416147935021095)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 915.68), SIMDE_FLOAT64_C( -53.79), SIMDE_FLOAT64_C( 703.31), SIMDE_FLOAT64_C( 930.79), SIMDE_FLOAT64_C( 111.33), SIMDE_FLOAT64_C( -176.75), SIMDE_FLOAT64_C( -316.94), SIMDE_FLOAT64_C( 639.68)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -318.40), SIMDE_FLOAT64_C( -53.79), SIMDE_FLOAT64_C( 757.65), SIMDE_FLOAT64_C( -352.24), SIMDE_FLOAT64_C( 602.83), SIMDE_FLOAT64_C( -479.79), SIMDE_FLOAT64_C( 639.68), SIMDE_FLOAT64_C( 2.55)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 432.42), SIMDE_FLOAT64_C( 421.92), SIMDE_FLOAT64_C( 718.96), SIMDE_FLOAT64_C( -864.59), SIMDE_FLOAT64_C( -334.42), SIMDE_FLOAT64_C( 660.53), SIMDE_FLOAT64_C( 748.73), SIMDE_FLOAT64_C( 996.15)), UINT8_C(227), simde_mm512_set_epi64(INT64_C( 6904524208941840952), INT64_C(-7164378700336361334), INT64_C( 4351794567182281042), INT64_C(-1716872434006574729), INT64_C( -689503347190866770), INT64_C(-1389624339165317749), INT64_C(-8184083999390244234), INT64_C( 8331479114169761131)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 601.37), SIMDE_FLOAT64_C( -660.24), SIMDE_FLOAT64_C( -675.56), SIMDE_FLOAT64_C( -194.09), SIMDE_FLOAT64_C( 149.22), SIMDE_FLOAT64_C( 161.52), SIMDE_FLOAT64_C( 632.78), SIMDE_FLOAT64_C( 346.90)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 346.90), SIMDE_FLOAT64_C( 161.52), SIMDE_FLOAT64_C( 660.53), SIMDE_FLOAT64_C( -864.59), SIMDE_FLOAT64_C( -334.42), SIMDE_FLOAT64_C( 660.53), SIMDE_FLOAT64_C( 421.92), SIMDE_FLOAT64_C( 149.22)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 5.84), SIMDE_FLOAT64_C( 39.14), SIMDE_FLOAT64_C( 124.68), SIMDE_FLOAT64_C( -448.70), SIMDE_FLOAT64_C( 122.69), SIMDE_FLOAT64_C( 65.13), SIMDE_FLOAT64_C( -972.27), SIMDE_FLOAT64_C( 628.22)), UINT8_C(179), simde_mm512_set_epi64(INT64_C( 2669799685376652269), INT64_C( 7927414333096918356), INT64_C(-6028737433755228757), INT64_C(-6289085317177674471), INT64_C(-5541511610486147753), INT64_C( 6531713794566454707), INT64_C( -446705336047418133), INT64_C(-6709780755556058351)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 196.06), SIMDE_FLOAT64_C( -83.06), SIMDE_FLOAT64_C( 687.82), SIMDE_FLOAT64_C( -517.82), SIMDE_FLOAT64_C( -294.36), SIMDE_FLOAT64_C( 702.71), SIMDE_FLOAT64_C( -920.22), SIMDE_FLOAT64_C( -923.04)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 687.82), SIMDE_FLOAT64_C( 39.14), SIMDE_FLOAT64_C( -294.36), SIMDE_FLOAT64_C( -920.22), SIMDE_FLOAT64_C( 122.69), SIMDE_FLOAT64_C( 65.13), SIMDE_FLOAT64_C( -294.36), SIMDE_FLOAT64_C( -972.27)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mask_permutex2var_pd(test_vec[i].a, test_vec[i].k, test_vec[i].idx, test_vec[i].b); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask2_permutex2var_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d a; simde__m512i idx; simde__mmask8 k; simde__m512d b; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( 824.77), SIMDE_FLOAT64_C( 172.30), SIMDE_FLOAT64_C( -660.39), SIMDE_FLOAT64_C( -605.88), SIMDE_FLOAT64_C( -689.22), SIMDE_FLOAT64_C( -25.12), SIMDE_FLOAT64_C( 921.58), SIMDE_FLOAT64_C( 433.31)), simde_mm512_set_epi64(INT64_C( 4651624766755653550), INT64_C( 4649285006011746222), INT64_C( 4650293302154881925), INT64_C( 4621571419609466143), INT64_C(-4573265035950104248), INT64_C( 4648839571861101609), INT64_C(-4574838041265265705), INT64_C(-4573884896625379246)), UINT8_C(198), simde_mm512_set_pd(SIMDE_FLOAT64_C( -867.63), SIMDE_FLOAT64_C( -65.75), SIMDE_FLOAT64_C( 137.16), SIMDE_FLOAT64_C( -73.36), SIMDE_FLOAT64_C( -396.53), SIMDE_FLOAT64_C( -409.15), SIMDE_FLOAT64_C( 672.29), SIMDE_FLOAT64_C( 992.21)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -65.75), SIMDE_FLOAT64_C( -65.75), SIMDE_FLOAT64_C( 805.14), SIMDE_FLOAT64_C( 9.56), SIMDE_FLOAT64_C( -783.96), SIMDE_FLOAT64_C( 672.29), SIMDE_FLOAT64_C( 824.77), SIMDE_FLOAT64_C( -713.49)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 784.21), SIMDE_FLOAT64_C( 732.58), SIMDE_FLOAT64_C( -862.95), SIMDE_FLOAT64_C( -241.44), SIMDE_FLOAT64_C( -318.72), SIMDE_FLOAT64_C( 630.11), SIMDE_FLOAT64_C( -645.10), SIMDE_FLOAT64_C( -230.29)), simde_mm512_set_epi64(INT64_C( 4646939703729234903), INT64_C( 4639186123652018995), INT64_C( 4627437358124116214), INT64_C(-4583138122601951396), INT64_C(-4586200570348563333), INT64_C(-4573672822822613811), INT64_C(-4584207727513451889), INT64_C(-4573688391907263119)), UINT8_C(222), simde_mm512_set_pd(SIMDE_FLOAT64_C( -947.87), SIMDE_FLOAT64_C( 877.96), SIMDE_FLOAT64_C( 184.43), SIMDE_FLOAT64_C( 375.34), SIMDE_FLOAT64_C( 389.76), SIMDE_FLOAT64_C( 437.16), SIMDE_FLOAT64_C( -638.93), SIMDE_FLOAT64_C( 773.45)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 784.21), SIMDE_FLOAT64_C( -318.72), SIMDE_FLOAT64_C( 23.96), SIMDE_FLOAT64_C( 375.34), SIMDE_FLOAT64_C( 389.76), SIMDE_FLOAT64_C( 184.43), SIMDE_FLOAT64_C( -947.87), SIMDE_FLOAT64_C( -735.83)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 853.65), SIMDE_FLOAT64_C( 495.16), SIMDE_FLOAT64_C( -991.61), SIMDE_FLOAT64_C( 120.75), SIMDE_FLOAT64_C( -451.25), SIMDE_FLOAT64_C( -719.73), SIMDE_FLOAT64_C( -295.20), SIMDE_FLOAT64_C( 405.97)), simde_mm512_set_epi64(INT64_C(-4571849568660970537), INT64_C(-4573624268389131223), INT64_C( 4649833882216332001), INT64_C(-4576057267719073956), INT64_C( 4639950680057509315), INT64_C( 4648192795141178655), INT64_C(-4582991755614061855), INT64_C(-4572918030080378143)), UINT8_C( 61), simde_mm512_set_pd(SIMDE_FLOAT64_C( -475.08), SIMDE_FLOAT64_C( 601.08), SIMDE_FLOAT64_C( -590.62), SIMDE_FLOAT64_C( 446.98), SIMDE_FLOAT64_C( 51.90), SIMDE_FLOAT64_C( 597.18), SIMDE_FLOAT64_C( -19.20), SIMDE_FLOAT64_C( -536.51)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -944.88), SIMDE_FLOAT64_C( -743.12), SIMDE_FLOAT64_C( -295.20), SIMDE_FLOAT64_C( 446.98), SIMDE_FLOAT64_C( -451.25), SIMDE_FLOAT64_C( -475.08), SIMDE_FLOAT64_C( -175.54), SIMDE_FLOAT64_C( -295.20)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 985.03), SIMDE_FLOAT64_C( 291.60), SIMDE_FLOAT64_C( -732.66), SIMDE_FLOAT64_C( 106.36), SIMDE_FLOAT64_C( 883.40), SIMDE_FLOAT64_C( 8.63), SIMDE_FLOAT64_C( 178.77), SIMDE_FLOAT64_C( -118.91)), simde_mm512_set_epi64(INT64_C(-4574823439750848840), INT64_C(-4582104757593702400), INT64_C(-4583336210616811520), INT64_C(-4580375093861815419), INT64_C(-4577720432987713044), INT64_C( 4649427062914054881), INT64_C(-4573213578805924332), INT64_C(-4581494660581682053)), UINT8_C(193), simde_mm512_set_pd(SIMDE_FLOAT64_C( 557.43), SIMDE_FLOAT64_C( 547.40), SIMDE_FLOAT64_C( -994.90), SIMDE_FLOAT64_C( -527.55), SIMDE_FLOAT64_C( 290.76), SIMDE_FLOAT64_C( -839.12), SIMDE_FLOAT64_C( 702.81), SIMDE_FLOAT64_C( -129.54)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -129.54), SIMDE_FLOAT64_C( -118.91), SIMDE_FLOAT64_C( -165.75), SIMDE_FLOAT64_C( -249.91), SIMDE_FLOAT64_C( -394.72), SIMDE_FLOAT64_C( 706.66), SIMDE_FLOAT64_C( -789.81), SIMDE_FLOAT64_C( 290.76)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -733.70), SIMDE_FLOAT64_C( -80.62), SIMDE_FLOAT64_C( 200.37), SIMDE_FLOAT64_C( -613.00), SIMDE_FLOAT64_C( -530.37), SIMDE_FLOAT64_C( 940.29), SIMDE_FLOAT64_C( -837.65), SIMDE_FLOAT64_C( -127.38)), simde_mm512_set_epi64(INT64_C( 4651252164255232819), INT64_C( 4644570915878354289), INT64_C( 4649897126125161677), INT64_C( 4645692593660546253), INT64_C( 4644974304704352748), INT64_C(-4606788353832622490), INT64_C( 4645887339160057938), INT64_C( 4635970272043099750)), UINT8_C( 81), simde_mm512_set_pd(SIMDE_FLOAT64_C( -742.43), SIMDE_FLOAT64_C( 171.75), SIMDE_FLOAT64_C( 896.83), SIMDE_FLOAT64_C( 381.31), SIMDE_FLOAT64_C( -468.20), SIMDE_FLOAT64_C( -182.38), SIMDE_FLOAT64_C( -642.39), SIMDE_FLOAT64_C( -912.51)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 914.15), SIMDE_FLOAT64_C( -837.65), SIMDE_FLOAT64_C( 760.10), SIMDE_FLOAT64_C( 896.83), SIMDE_FLOAT64_C( 356.22), SIMDE_FLOAT64_C( -4.35), SIMDE_FLOAT64_C( 408.12), SIMDE_FLOAT64_C( -80.62)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 566.99), SIMDE_FLOAT64_C( -242.78), SIMDE_FLOAT64_C( 250.83), SIMDE_FLOAT64_C( -818.85), SIMDE_FLOAT64_C( -959.57), SIMDE_FLOAT64_C( 925.03), SIMDE_FLOAT64_C( -644.70), SIMDE_FLOAT64_C( -800.67)), simde_mm512_set_epi64(INT64_C(-4573614680647737016), INT64_C(-4572547362720422298), INT64_C(-4579978038222792950), INT64_C(-4573175139879417283), INT64_C(-4572733488048772219), INT64_C(-4575215833460569539), INT64_C(-4582412972693200568), INT64_C(-4572092340828383478)), UINT8_C(145), simde_mm512_set_pd(SIMDE_FLOAT64_C( 519.90), SIMDE_FLOAT64_C( -645.40), SIMDE_FLOAT64_C( -442.29), SIMDE_FLOAT64_C( -336.85), SIMDE_FLOAT64_C( -152.85), SIMDE_FLOAT64_C( -363.79), SIMDE_FLOAT64_C( -614.03), SIMDE_FLOAT64_C( -923.13)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -923.13), SIMDE_FLOAT64_C( -865.55), SIMDE_FLOAT64_C( -266.39), SIMDE_FLOAT64_C( -442.29), SIMDE_FLOAT64_C( -844.39), SIMDE_FLOAT64_C( -562.18), SIMDE_FLOAT64_C( -191.99), SIMDE_FLOAT64_C( -363.79)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -748.43), SIMDE_FLOAT64_C( -53.25), SIMDE_FLOAT64_C( -132.01), SIMDE_FLOAT64_C( -767.78), SIMDE_FLOAT64_C( -9.36), SIMDE_FLOAT64_C( -226.72), SIMDE_FLOAT64_C( -661.68), SIMDE_FLOAT64_C( -655.84)), simde_mm512_set_epi64(INT64_C( 4636324226826313400), INT64_C(-4582681781295959245), INT64_C(-4588763400011513856), INT64_C(-4578054508600696504), INT64_C( 4643285630765949256), INT64_C(-4577793440559797371), INT64_C( 4641318648444323103), INT64_C(-4573035194039433953)), UINT8_C( 44), simde_mm512_set_pd(SIMDE_FLOAT64_C( 845.75), SIMDE_FLOAT64_C( 43.86), SIMDE_FLOAT64_C( -170.77), SIMDE_FLOAT64_C( 428.74), SIMDE_FLOAT64_C( 695.01), SIMDE_FLOAT64_C( -702.60), SIMDE_FLOAT64_C( -231.45), SIMDE_FLOAT64_C( -416.99)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 94.13), SIMDE_FLOAT64_C( -184.35), SIMDE_FLOAT64_C( -655.84), SIMDE_FLOAT64_C( -375.73), SIMDE_FLOAT64_C( -416.99), SIMDE_FLOAT64_C( -132.01), SIMDE_FLOAT64_C( 202.21), SIMDE_FLOAT64_C( -810.09)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -797.03), SIMDE_FLOAT64_C( -987.00), SIMDE_FLOAT64_C( 679.83), SIMDE_FLOAT64_C( -872.18), SIMDE_FLOAT64_C( 58.63), SIMDE_FLOAT64_C( -398.86), SIMDE_FLOAT64_C( -816.03), SIMDE_FLOAT64_C( 667.01)), simde_mm512_set_epi64(INT64_C(-4573879706930496143), INT64_C(-4575599870881919140), INT64_C( 4646298820391636828), INT64_C( 4636687329546270147), INT64_C( 4651451571684046275), INT64_C(-4586695966307574088), INT64_C(-4575178010260574044), INT64_C( 4649362675513132319)), UINT8_C( 86), simde_mm512_set_pd(SIMDE_FLOAT64_C( 312.31), SIMDE_FLOAT64_C( 578.29), SIMDE_FLOAT64_C( 419.61), SIMDE_FLOAT64_C( -753.42), SIMDE_FLOAT64_C( 687.05), SIMDE_FLOAT64_C( 241.87), SIMDE_FLOAT64_C( -80.26), SIMDE_FLOAT64_C( -667.68)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -714.08), SIMDE_FLOAT64_C( -753.42), SIMDE_FLOAT64_C( 431.51), SIMDE_FLOAT64_C( 58.63), SIMDE_FLOAT64_C( 936.82), SIMDE_FLOAT64_C( -667.68), SIMDE_FLOAT64_C( -872.18), SIMDE_FLOAT64_C( 699.34)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mask2_permutex2var_pd(test_vec[i].a, test_vec[i].idx, test_vec[i].k, test_vec[i].b); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_maskz_permutex2var_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512d a; simde__m512i idx; simde__m512d b; simde__m512d r; } test_vec[8] = { { UINT8_C( 63), simde_mm512_set_pd(SIMDE_FLOAT64_C( 160.98), SIMDE_FLOAT64_C( 391.82), SIMDE_FLOAT64_C( -569.99), SIMDE_FLOAT64_C( -327.63), SIMDE_FLOAT64_C( -172.36), SIMDE_FLOAT64_C( 393.53), SIMDE_FLOAT64_C( 36.69), SIMDE_FLOAT64_C( -135.52)), simde_mm512_set_epi64(INT64_C(-4767707706458520415), INT64_C(-7083171014951853588), INT64_C(-2076988212358998594), INT64_C( 3698505898575972461), INT64_C(-8586603972668500699), INT64_C( 7848938818320954984), INT64_C(-7002500096438875648), INT64_C( 1477571573764517782)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 522.06), SIMDE_FLOAT64_C( -932.28), SIMDE_FLOAT64_C( 600.12), SIMDE_FLOAT64_C( -491.12), SIMDE_FLOAT64_C( -139.11), SIMDE_FLOAT64_C( -268.86), SIMDE_FLOAT64_C( -71.72), SIMDE_FLOAT64_C( 98.47)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -932.28), SIMDE_FLOAT64_C( 600.12), SIMDE_FLOAT64_C( -569.99), SIMDE_FLOAT64_C( 98.47), SIMDE_FLOAT64_C( -135.52), SIMDE_FLOAT64_C( 391.82)) }, { UINT8_C(251), simde_mm512_set_pd(SIMDE_FLOAT64_C( 571.79), SIMDE_FLOAT64_C( 675.92), SIMDE_FLOAT64_C( 490.41), SIMDE_FLOAT64_C( 47.59), SIMDE_FLOAT64_C( -895.71), SIMDE_FLOAT64_C( -736.92), SIMDE_FLOAT64_C( 283.06), SIMDE_FLOAT64_C( -333.94)), simde_mm512_set_epi64(INT64_C( 7391005387705442660), INT64_C(-5091463632259113685), INT64_C( 685405269785004780), INT64_C(-2602517860068074949), INT64_C( 4704994953943345443), INT64_C( 8877610218385468208), INT64_C( 5776984527519295337), INT64_C( 6526937450820584225)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -203.04), SIMDE_FLOAT64_C( -80.71), SIMDE_FLOAT64_C( 632.01), SIMDE_FLOAT64_C( 456.89), SIMDE_FLOAT64_C( 51.33), SIMDE_FLOAT64_C( -868.59), SIMDE_FLOAT64_C( -921.00), SIMDE_FLOAT64_C( -471.60)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 47.59), SIMDE_FLOAT64_C( 51.33), SIMDE_FLOAT64_C( 456.89), SIMDE_FLOAT64_C( 51.33), SIMDE_FLOAT64_C( -895.71), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -921.00), SIMDE_FLOAT64_C( 283.06)) }, { UINT8_C( 19), simde_mm512_set_pd(SIMDE_FLOAT64_C( 670.52), SIMDE_FLOAT64_C( 149.72), SIMDE_FLOAT64_C( 213.24), SIMDE_FLOAT64_C( -577.36), SIMDE_FLOAT64_C( 957.37), SIMDE_FLOAT64_C( -934.92), SIMDE_FLOAT64_C( -657.02), SIMDE_FLOAT64_C( -629.37)), simde_mm512_set_epi64(INT64_C( 1455933536394832297), INT64_C( -840828676201867557), INT64_C( 6310833464661060096), INT64_C( 8429580363859954742), INT64_C( 5406280044045291975), INT64_C(-2284955492954404973), INT64_C(-3603637419527123210), INT64_C(-1117409850830928520)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 586.29), SIMDE_FLOAT64_C( -760.88), SIMDE_FLOAT64_C( -617.12), SIMDE_FLOAT64_C( -751.58), SIMDE_FLOAT64_C( 907.23), SIMDE_FLOAT64_C( -359.60), SIMDE_FLOAT64_C( -213.75), SIMDE_FLOAT64_C( 403.00)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 149.72), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 149.72), SIMDE_FLOAT64_C( 403.00)) }, { UINT8_C(132), simde_mm512_set_pd(SIMDE_FLOAT64_C( 918.52), SIMDE_FLOAT64_C( -422.76), SIMDE_FLOAT64_C( -433.33), SIMDE_FLOAT64_C( 48.49), SIMDE_FLOAT64_C( 799.57), SIMDE_FLOAT64_C( -820.22), SIMDE_FLOAT64_C( -959.11), SIMDE_FLOAT64_C( 268.99)), simde_mm512_set_epi64(INT64_C(-4670296842224865750), INT64_C(-8736438908262001915), INT64_C(-1516874692875012272), INT64_C( 2654080637722702840), INT64_C(-4444585746033374017), INT64_C( 7620312646179506248), INT64_C(-4730811392556899367), INT64_C(-7555925455226975890)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -663.74), SIMDE_FLOAT64_C( 179.29), SIMDE_FLOAT64_C( 989.70), SIMDE_FLOAT64_C( -695.21), SIMDE_FLOAT64_C( -786.23), SIMDE_FLOAT64_C( 873.30), SIMDE_FLOAT64_C( 241.45), SIMDE_FLOAT64_C( -432.13)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 873.30), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -432.13), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C( 37), simde_mm512_set_pd(SIMDE_FLOAT64_C( -904.81), SIMDE_FLOAT64_C( 50.50), SIMDE_FLOAT64_C( -282.31), SIMDE_FLOAT64_C( -656.10), SIMDE_FLOAT64_C( -789.54), SIMDE_FLOAT64_C( -790.16), SIMDE_FLOAT64_C( -415.61), SIMDE_FLOAT64_C( 994.61)), simde_mm512_set_epi64(INT64_C( 2001511420457827007), INT64_C( 8280910196874944184), INT64_C( -368934386460614235), INT64_C(-9011857488067354178), INT64_C( 5578921037540219940), INT64_C(-7880186302232587827), INT64_C( 7848707034806784644), INT64_C(-7246117184140796511)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 280.87), SIMDE_FLOAT64_C( 380.83), SIMDE_FLOAT64_C( -236.67), SIMDE_FLOAT64_C( -211.91), SIMDE_FLOAT64_C( -925.76), SIMDE_FLOAT64_C( -915.62), SIMDE_FLOAT64_C( -30.05), SIMDE_FLOAT64_C( -70.79)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -282.31), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -236.67), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -415.61)) }, { UINT8_C(218), simde_mm512_set_pd(SIMDE_FLOAT64_C( -318.40), SIMDE_FLOAT64_C( -844.73), SIMDE_FLOAT64_C( 757.65), SIMDE_FLOAT64_C( 677.41), SIMDE_FLOAT64_C( -352.24), SIMDE_FLOAT64_C( -479.79), SIMDE_FLOAT64_C( 602.83), SIMDE_FLOAT64_C( 2.55)), simde_mm512_set_epi64(INT64_C( 2056379472574346663), INT64_C(-3486865648830471282), INT64_C( 8151787653682140580), INT64_C( -831601358278995789), INT64_C(-2800664419916301039), INT64_C( 3280702275774868225), INT64_C(-4735905134864699368), INT64_C( 7051416147935021095)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 915.68), SIMDE_FLOAT64_C( -53.79), SIMDE_FLOAT64_C( 703.31), SIMDE_FLOAT64_C( 930.79), SIMDE_FLOAT64_C( 111.33), SIMDE_FLOAT64_C( -176.75), SIMDE_FLOAT64_C( -316.94), SIMDE_FLOAT64_C( 639.68)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -318.40), SIMDE_FLOAT64_C( -53.79), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -352.24), SIMDE_FLOAT64_C( 602.83), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 639.68), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(227), simde_mm512_set_pd(SIMDE_FLOAT64_C( 432.42), SIMDE_FLOAT64_C( 421.92), SIMDE_FLOAT64_C( 718.96), SIMDE_FLOAT64_C( -864.59), SIMDE_FLOAT64_C( -334.42), SIMDE_FLOAT64_C( 660.53), SIMDE_FLOAT64_C( 748.73), SIMDE_FLOAT64_C( 996.15)), simde_mm512_set_epi64(INT64_C( 6904524208941840952), INT64_C(-7164378700336361334), INT64_C( 4351794567182281042), INT64_C(-1716872434006574729), INT64_C( -689503347190866770), INT64_C(-1389624339165317749), INT64_C(-8184083999390244234), INT64_C( 8331479114169761131)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 601.37), SIMDE_FLOAT64_C( -660.24), SIMDE_FLOAT64_C( -675.56), SIMDE_FLOAT64_C( -194.09), SIMDE_FLOAT64_C( 149.22), SIMDE_FLOAT64_C( 161.52), SIMDE_FLOAT64_C( 632.78), SIMDE_FLOAT64_C( 346.90)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 346.90), SIMDE_FLOAT64_C( 161.52), SIMDE_FLOAT64_C( 660.53), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 421.92), SIMDE_FLOAT64_C( 149.22)) }, { UINT8_C(179), simde_mm512_set_pd(SIMDE_FLOAT64_C( 5.84), SIMDE_FLOAT64_C( 39.14), SIMDE_FLOAT64_C( 124.68), SIMDE_FLOAT64_C( -448.70), SIMDE_FLOAT64_C( 122.69), SIMDE_FLOAT64_C( 65.13), SIMDE_FLOAT64_C( -972.27), SIMDE_FLOAT64_C( 628.22)), simde_mm512_set_epi64(INT64_C( 2669799685376652269), INT64_C( 7927414333096918356), INT64_C(-6028737433755228757), INT64_C(-6289085317177674471), INT64_C(-5541511610486147753), INT64_C( 6531713794566454707), INT64_C( -446705336047418133), INT64_C(-6709780755556058351)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 196.06), SIMDE_FLOAT64_C( -83.06), SIMDE_FLOAT64_C( 687.82), SIMDE_FLOAT64_C( -517.82), SIMDE_FLOAT64_C( -294.36), SIMDE_FLOAT64_C( 702.71), SIMDE_FLOAT64_C( -920.22), SIMDE_FLOAT64_C( -923.04)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 687.82), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -294.36), SIMDE_FLOAT64_C( -920.22), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -294.36), SIMDE_FLOAT64_C( -972.27)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_maskz_permutex2var_pd(test_vec[i].k, test_vec[i].a, test_vec[i].idx, test_vec[i].b); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_permutex2var_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__m512i idx; simde__m512 b; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -64.06), SIMDE_FLOAT32_C( 559.81), SIMDE_FLOAT32_C( -423.61), SIMDE_FLOAT32_C( 407.56), SIMDE_FLOAT32_C( -787.72), SIMDE_FLOAT32_C( -703.51), SIMDE_FLOAT32_C( -470.36), SIMDE_FLOAT32_C( 135.20), SIMDE_FLOAT32_C( 799.30), SIMDE_FLOAT32_C( -576.01), SIMDE_FLOAT32_C( 439.15), SIMDE_FLOAT32_C( -28.15), SIMDE_FLOAT32_C( 481.25), SIMDE_FLOAT32_C( -784.26), SIMDE_FLOAT32_C( 549.03), SIMDE_FLOAT32_C( 582.53)), simde_mm512_set_epi32(INT32_C(-1732993162), INT32_C( 1212743926), INT32_C( 1966971402), INT32_C(-1506668774), INT32_C(-1700657265), INT32_C( 1944327234), INT32_C( -355879099), INT32_C(-1588067414), INT32_C( 301696052), INT32_C( 1998339065), INT32_C(-2060809025), INT32_C(-1942156019), INT32_C( 551689125), INT32_C( 669995747), INT32_C(-1196653219), INT32_C( -147816939)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 120.10), SIMDE_FLOAT32_C( -620.03), SIMDE_FLOAT32_C( 185.23), SIMDE_FLOAT32_C( -11.91), SIMDE_FLOAT32_C( 355.11), SIMDE_FLOAT32_C( 472.82), SIMDE_FLOAT32_C( -202.49), SIMDE_FLOAT32_C( 966.37), SIMDE_FLOAT32_C( -563.83), SIMDE_FLOAT32_C( 938.85), SIMDE_FLOAT32_C( -465.05), SIMDE_FLOAT32_C( -104.57), SIMDE_FLOAT32_C( -431.26), SIMDE_FLOAT32_C( -57.75), SIMDE_FLOAT32_C( 438.04), SIMDE_FLOAT32_C( 729.46)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 938.85), SIMDE_FLOAT32_C( 938.85), SIMDE_FLOAT32_C( -703.51), SIMDE_FLOAT32_C( 472.82), SIMDE_FLOAT32_C( -64.06), SIMDE_FLOAT32_C( -784.26), SIMDE_FLOAT32_C( 439.15), SIMDE_FLOAT32_C( -703.51), SIMDE_FLOAT32_C( -104.57), SIMDE_FLOAT32_C( -202.49), SIMDE_FLOAT32_C( 120.10), SIMDE_FLOAT32_C( -423.61), SIMDE_FLOAT32_C( 439.15), SIMDE_FLOAT32_C( 481.25), SIMDE_FLOAT32_C( 185.23), SIMDE_FLOAT32_C( -465.05)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -832.50), SIMDE_FLOAT32_C( 438.46), SIMDE_FLOAT32_C( 336.35), SIMDE_FLOAT32_C( 99.21), SIMDE_FLOAT32_C( -38.88), SIMDE_FLOAT32_C( 218.73), SIMDE_FLOAT32_C( -966.90), SIMDE_FLOAT32_C( -737.78), SIMDE_FLOAT32_C( -224.13), SIMDE_FLOAT32_C( -834.15), SIMDE_FLOAT32_C( -819.46), SIMDE_FLOAT32_C( 112.81), SIMDE_FLOAT32_C( 464.19), SIMDE_FLOAT32_C( 282.83), SIMDE_FLOAT32_C( 841.24), SIMDE_FLOAT32_C( 79.76)), simde_mm512_set_epi32(INT32_C( 1760980702), INT32_C(-1592941833), INT32_C(-1618734568), INT32_C(-1937346052), INT32_C( -716563340), INT32_C(-1364071584), INT32_C( -516953475), INT32_C( 1021791773), INT32_C( 587319712), INT32_C(-1327772936), INT32_C( -388433125), INT32_C(-1835488163), INT32_C( 1934085090), INT32_C( 1823172786), INT32_C( -962834173), INT32_C(-1813383694)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 141.08), SIMDE_FLOAT32_C( -990.15), SIMDE_FLOAT32_C( -887.47), SIMDE_FLOAT32_C( -396.24), SIMDE_FLOAT32_C( -2.60), SIMDE_FLOAT32_C( 165.88), SIMDE_FLOAT32_C( 375.27), SIMDE_FLOAT32_C( -512.98), SIMDE_FLOAT32_C( 664.52), SIMDE_FLOAT32_C( 633.65), SIMDE_FLOAT32_C( -157.33), SIMDE_FLOAT32_C( 541.44), SIMDE_FLOAT32_C( -98.08), SIMDE_FLOAT32_C( 711.12), SIMDE_FLOAT32_C( -774.08), SIMDE_FLOAT32_C( -414.07)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -990.15), SIMDE_FLOAT32_C( 664.52), SIMDE_FLOAT32_C( -512.98), SIMDE_FLOAT32_C( -396.24), SIMDE_FLOAT32_C( 541.44), SIMDE_FLOAT32_C( 79.76), SIMDE_FLOAT32_C( -887.47), SIMDE_FLOAT32_C( -887.47), SIMDE_FLOAT32_C( 79.76), SIMDE_FLOAT32_C( -512.98), SIMDE_FLOAT32_C( -2.60), SIMDE_FLOAT32_C( -887.47), SIMDE_FLOAT32_C( 282.83), SIMDE_FLOAT32_C( 711.12), SIMDE_FLOAT32_C( 464.19), SIMDE_FLOAT32_C( 711.12)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 485.76), SIMDE_FLOAT32_C( 786.03), SIMDE_FLOAT32_C( 489.95), SIMDE_FLOAT32_C( 796.52), SIMDE_FLOAT32_C( -248.07), SIMDE_FLOAT32_C( -88.91), SIMDE_FLOAT32_C( 170.00), SIMDE_FLOAT32_C( 366.57), SIMDE_FLOAT32_C( -211.73), SIMDE_FLOAT32_C( 908.31), SIMDE_FLOAT32_C( -144.11), SIMDE_FLOAT32_C( -343.92), SIMDE_FLOAT32_C( 961.65), SIMDE_FLOAT32_C( 754.42), SIMDE_FLOAT32_C( -432.97), SIMDE_FLOAT32_C( 164.52)), simde_mm512_set_epi32(INT32_C(-1723897302), INT32_C(-1378607382), INT32_C(-1399746280), INT32_C( 55039129), INT32_C( -614650529), INT32_C( 1411406194), INT32_C( 789645610), INT32_C( 647523062), INT32_C( 1326675104), INT32_C(-1833621116), INT32_C( 1459731687), INT32_C(-1374295064), INT32_C( 1719685048), INT32_C(-1859395493), INT32_C( 506068853), INT32_C( 1338183771)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -985.32), SIMDE_FLOAT32_C( 234.60), SIMDE_FLOAT32_C( 859.59), SIMDE_FLOAT32_C( -409.35), SIMDE_FLOAT32_C( -846.10), SIMDE_FLOAT32_C( -411.92), SIMDE_FLOAT32_C( 481.68), SIMDE_FLOAT32_C( -341.91), SIMDE_FLOAT32_C( 254.48), SIMDE_FLOAT32_C( 755.70), SIMDE_FLOAT32_C( -363.93), SIMDE_FLOAT32_C( 789.10), SIMDE_FLOAT32_C( 344.74), SIMDE_FLOAT32_C( 652.93), SIMDE_FLOAT32_C( 184.91), SIMDE_FLOAT32_C( -455.33)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -88.91), SIMDE_FLOAT32_C( -88.91), SIMDE_FLOAT32_C( -341.91), SIMDE_FLOAT32_C( 481.68), SIMDE_FLOAT32_C( -985.32), SIMDE_FLOAT32_C( 652.93), SIMDE_FLOAT32_C( -88.91), SIMDE_FLOAT32_C( 755.70), SIMDE_FLOAT32_C( 164.52), SIMDE_FLOAT32_C( -343.92), SIMDE_FLOAT32_C( -211.73), SIMDE_FLOAT32_C( 366.57), SIMDE_FLOAT32_C( -341.91), SIMDE_FLOAT32_C( -846.10), SIMDE_FLOAT32_C( -363.93), SIMDE_FLOAT32_C( -846.10)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 374.01), SIMDE_FLOAT32_C( -829.82), SIMDE_FLOAT32_C( -372.09), SIMDE_FLOAT32_C( -693.82), SIMDE_FLOAT32_C( 763.12), SIMDE_FLOAT32_C( 797.99), SIMDE_FLOAT32_C( 291.55), SIMDE_FLOAT32_C( -93.41), SIMDE_FLOAT32_C( 658.18), SIMDE_FLOAT32_C( -330.38), SIMDE_FLOAT32_C( -462.89), SIMDE_FLOAT32_C( -905.06), SIMDE_FLOAT32_C( 266.49), SIMDE_FLOAT32_C( -537.47), SIMDE_FLOAT32_C( -705.33), SIMDE_FLOAT32_C( 699.62)), simde_mm512_set_epi32(INT32_C( 542652634), INT32_C(-1703311025), INT32_C( 1982058627), INT32_C( 315796553), INT32_C( -175644135), INT32_C( 1189569864), INT32_C(-1367453148), INT32_C( 1842630130), INT32_C(-1935654327), INT32_C( 121935685), INT32_C( 1908625200), INT32_C(-1041198180), INT32_C( 1800897720), INT32_C(-1697242531), INT32_C(-1701529660), INT32_C( 1678133404)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -65.54), SIMDE_FLOAT32_C( 852.70), SIMDE_FLOAT32_C( 316.87), SIMDE_FLOAT32_C( 303.57), SIMDE_FLOAT32_C( 151.19), SIMDE_FLOAT32_C( 17.85), SIMDE_FLOAT32_C( 280.96), SIMDE_FLOAT32_C( -966.13), SIMDE_FLOAT32_C( 156.66), SIMDE_FLOAT32_C( 198.89), SIMDE_FLOAT32_C( -766.40), SIMDE_FLOAT32_C( -495.88), SIMDE_FLOAT32_C( -794.42), SIMDE_FLOAT32_C( -263.27), SIMDE_FLOAT32_C( 74.85), SIMDE_FLOAT32_C( 127.81)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 17.85), SIMDE_FLOAT32_C( 374.01), SIMDE_FLOAT32_C( 266.49), SIMDE_FLOAT32_C( 291.55), SIMDE_FLOAT32_C( 280.96), SIMDE_FLOAT32_C( -93.41), SIMDE_FLOAT32_C( -905.06), SIMDE_FLOAT32_C( -263.27), SIMDE_FLOAT32_C( 291.55), SIMDE_FLOAT32_C( -462.89), SIMDE_FLOAT32_C( 127.81), SIMDE_FLOAT32_C( 303.57), SIMDE_FLOAT32_C( -966.13), SIMDE_FLOAT32_C( 316.87), SIMDE_FLOAT32_C( -905.06), SIMDE_FLOAT32_C( 303.57)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -276.00), SIMDE_FLOAT32_C( -317.53), SIMDE_FLOAT32_C( -723.48), SIMDE_FLOAT32_C( -792.04), SIMDE_FLOAT32_C( -284.98), SIMDE_FLOAT32_C( 906.86), SIMDE_FLOAT32_C( -798.29), SIMDE_FLOAT32_C( 122.95), SIMDE_FLOAT32_C( 192.60), SIMDE_FLOAT32_C( -766.97), SIMDE_FLOAT32_C( -268.82), SIMDE_FLOAT32_C( 379.62), SIMDE_FLOAT32_C( 889.99), SIMDE_FLOAT32_C( 778.00), SIMDE_FLOAT32_C( 136.45), SIMDE_FLOAT32_C( -745.44)), simde_mm512_set_epi32(INT32_C( 1809884012), INT32_C( -191206140), INT32_C( -553702639), INT32_C(-1800209842), INT32_C( 721509385), INT32_C( -3110338), INT32_C( 629848436), INT32_C( 1833821522), INT32_C( 1870762521), INT32_C( 1138301165), INT32_C( 499100111), INT32_C( -583483550), INT32_C( 449841541), INT32_C( 1832272682), INT32_C( -100358742), INT32_C( 1797905164)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 366.70), SIMDE_FLOAT32_C( 615.81), SIMDE_FLOAT32_C( -673.63), SIMDE_FLOAT32_C( 832.55), SIMDE_FLOAT32_C( 260.08), SIMDE_FLOAT32_C( -412.45), SIMDE_FLOAT32_C( 245.05), SIMDE_FLOAT32_C( -913.68), SIMDE_FLOAT32_C( 79.71), SIMDE_FLOAT32_C( -761.86), SIMDE_FLOAT32_C( 699.75), SIMDE_FLOAT32_C( -94.00), SIMDE_FLOAT32_C( 852.56), SIMDE_FLOAT32_C( -583.72), SIMDE_FLOAT32_C( 64.51), SIMDE_FLOAT32_C( 598.44)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -792.04), SIMDE_FLOAT32_C( 379.62), SIMDE_FLOAT32_C( 64.51), SIMDE_FLOAT32_C( -317.53), SIMDE_FLOAT32_C( -798.29), SIMDE_FLOAT32_C( 615.81), SIMDE_FLOAT32_C( -94.00), SIMDE_FLOAT32_C( -583.72), SIMDE_FLOAT32_C( 245.05), SIMDE_FLOAT32_C( -723.48), SIMDE_FLOAT32_C( -276.00), SIMDE_FLOAT32_C( 778.00), SIMDE_FLOAT32_C( -268.82), SIMDE_FLOAT32_C( 906.86), SIMDE_FLOAT32_C( 906.86), SIMDE_FLOAT32_C( -792.04)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -976.85), SIMDE_FLOAT32_C( 321.59), SIMDE_FLOAT32_C( 867.83), SIMDE_FLOAT32_C( 831.83), SIMDE_FLOAT32_C( -570.45), SIMDE_FLOAT32_C( -732.92), SIMDE_FLOAT32_C( -964.15), SIMDE_FLOAT32_C( -39.63), SIMDE_FLOAT32_C( 579.20), SIMDE_FLOAT32_C( -391.63), SIMDE_FLOAT32_C( 335.63), SIMDE_FLOAT32_C( 738.93), SIMDE_FLOAT32_C( 439.69), SIMDE_FLOAT32_C( 930.40), SIMDE_FLOAT32_C( 273.66), SIMDE_FLOAT32_C( 892.54)), simde_mm512_set_epi32(INT32_C( 1604291666), INT32_C( -491791926), INT32_C( 1819084879), INT32_C( 1431978307), INT32_C( 1530613877), INT32_C(-1009988233), INT32_C( 1309545588), INT32_C(-1001207300), INT32_C(-1909714099), INT32_C(-1031553759), INT32_C(-1195513362), INT32_C( 957243291), INT32_C( 1576721253), INT32_C( 1170763213), INT32_C( 1280754757), INT32_C(-1670630534)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 162.47), SIMDE_FLOAT32_C( 307.15), SIMDE_FLOAT32_C( -851.38), SIMDE_FLOAT32_C( -956.87), SIMDE_FLOAT32_C( -932.93), SIMDE_FLOAT32_C( 757.55), SIMDE_FLOAT32_C( -934.19), SIMDE_FLOAT32_C( 361.60), SIMDE_FLOAT32_C( -239.16), SIMDE_FLOAT32_C( -204.61), SIMDE_FLOAT32_C( 200.64), SIMDE_FLOAT32_C( -634.66), SIMDE_FLOAT32_C( 607.08), SIMDE_FLOAT32_C( -668.91), SIMDE_FLOAT32_C( -886.74), SIMDE_FLOAT32_C( -214.28)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -668.91), SIMDE_FLOAT32_C( -732.92), SIMDE_FLOAT32_C( -976.85), SIMDE_FLOAT32_C( 439.69), SIMDE_FLOAT32_C( 200.64), SIMDE_FLOAT32_C( -239.16), SIMDE_FLOAT32_C( -634.66), SIMDE_FLOAT32_C( -956.87), SIMDE_FLOAT32_C( 867.83), SIMDE_FLOAT32_C( 273.66), SIMDE_FLOAT32_C( 321.59), SIMDE_FLOAT32_C( -932.93), SIMDE_FLOAT32_C( 335.63), SIMDE_FLOAT32_C( 867.83), SIMDE_FLOAT32_C( 335.63), SIMDE_FLOAT32_C( 757.55)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -824.86), SIMDE_FLOAT32_C( 954.86), SIMDE_FLOAT32_C( 523.67), SIMDE_FLOAT32_C( -253.94), SIMDE_FLOAT32_C( 437.48), SIMDE_FLOAT32_C( 79.48), SIMDE_FLOAT32_C( -371.31), SIMDE_FLOAT32_C( -36.35), SIMDE_FLOAT32_C( 288.58), SIMDE_FLOAT32_C( 274.36), SIMDE_FLOAT32_C( -249.54), SIMDE_FLOAT32_C( 805.73), SIMDE_FLOAT32_C( 29.94), SIMDE_FLOAT32_C( -149.48), SIMDE_FLOAT32_C( 141.90), SIMDE_FLOAT32_C( -55.72)), simde_mm512_set_epi32(INT32_C( -769209677), INT32_C( 921385960), INT32_C( 426713309), INT32_C( 1823076507), INT32_C( 1666223712), INT32_C( -194163626), INT32_C( 604266257), INT32_C( 1932526911), INT32_C( 20183709), INT32_C( 818624505), INT32_C( 1023612212), INT32_C(-1113050305), INT32_C( 1709269191), INT32_C( 2143041839), INT32_C( 1719176952), INT32_C( -353230679)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -657.43), SIMDE_FLOAT32_C( -181.72), SIMDE_FLOAT32_C( -893.09), SIMDE_FLOAT32_C( 985.74), SIMDE_FLOAT32_C( -488.89), SIMDE_FLOAT32_C( 683.50), SIMDE_FLOAT32_C( 297.36), SIMDE_FLOAT32_C( 88.61), SIMDE_FLOAT32_C( -14.68), SIMDE_FLOAT32_C( 451.12), SIMDE_FLOAT32_C( -464.35), SIMDE_FLOAT32_C( 82.04), SIMDE_FLOAT32_C( -48.11), SIMDE_FLOAT32_C( 254.96), SIMDE_FLOAT32_C( -161.09), SIMDE_FLOAT32_C( -538.50)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -48.11), SIMDE_FLOAT32_C( -36.35), SIMDE_FLOAT32_C( -893.09), SIMDE_FLOAT32_C( -488.89), SIMDE_FLOAT32_C( -55.72), SIMDE_FLOAT32_C( 451.12), SIMDE_FLOAT32_C( -161.09), SIMDE_FLOAT32_C( -657.43), SIMDE_FLOAT32_C( -893.09), SIMDE_FLOAT32_C( 297.36), SIMDE_FLOAT32_C( 82.04), SIMDE_FLOAT32_C( -657.43), SIMDE_FLOAT32_C( 288.58), SIMDE_FLOAT32_C( -824.86), SIMDE_FLOAT32_C( 88.61), SIMDE_FLOAT32_C( -371.31)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 645.39), SIMDE_FLOAT32_C( -81.78), SIMDE_FLOAT32_C( 426.88), SIMDE_FLOAT32_C( 367.09), SIMDE_FLOAT32_C( 269.97), SIMDE_FLOAT32_C( 513.55), SIMDE_FLOAT32_C( -433.45), SIMDE_FLOAT32_C( -401.21), SIMDE_FLOAT32_C( -679.71), SIMDE_FLOAT32_C( -426.49), SIMDE_FLOAT32_C( -878.06), SIMDE_FLOAT32_C( 441.47), SIMDE_FLOAT32_C( -137.95), SIMDE_FLOAT32_C( 177.44), SIMDE_FLOAT32_C( -55.07), SIMDE_FLOAT32_C( -581.76)), simde_mm512_set_epi32(INT32_C( 582660853), INT32_C(-1326740073), INT32_C( 1332684527), INT32_C( 720865928), INT32_C( 1806675616), INT32_C( 1369928435), INT32_C( 2026484746), INT32_C( 939514243), INT32_C( 1533779841), INT32_C( 1890230239), INT32_C( 631027080), INT32_C( 679389494), INT32_C( 2030531254), INT32_C( 78143054), INT32_C( 1358587101), INT32_C( 1101843618)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -978.39), SIMDE_FLOAT32_C( -650.69), SIMDE_FLOAT32_C( 796.63), SIMDE_FLOAT32_C( -910.00), SIMDE_FLOAT32_C( -646.84), SIMDE_FLOAT32_C( -538.44), SIMDE_FLOAT32_C( -130.00), SIMDE_FLOAT32_C( -692.07), SIMDE_FLOAT32_C( -408.06), SIMDE_FLOAT32_C( -675.29), SIMDE_FLOAT32_C( -359.85), SIMDE_FLOAT32_C( 775.91), SIMDE_FLOAT32_C( -262.33), SIMDE_FLOAT32_C( 878.38), SIMDE_FLOAT32_C( 212.22), SIMDE_FLOAT32_C( -803.31)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -359.85), SIMDE_FLOAT32_C( -408.06), SIMDE_FLOAT32_C( 645.39), SIMDE_FLOAT32_C( -401.21), SIMDE_FLOAT32_C( -581.76), SIMDE_FLOAT32_C( -262.33), SIMDE_FLOAT32_C( 513.55), SIMDE_FLOAT32_C( -137.95), SIMDE_FLOAT32_C( -55.07), SIMDE_FLOAT32_C( -978.39), SIMDE_FLOAT32_C( -401.21), SIMDE_FLOAT32_C( -675.29), SIMDE_FLOAT32_C( -675.29), SIMDE_FLOAT32_C( -81.78), SIMDE_FLOAT32_C( 796.63), SIMDE_FLOAT32_C( 177.44)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_permutex2var_ps(test_vec[i].a, test_vec[i].idx, test_vec[i].b); #if defined(__EMSCRIPTEN__) (void) r; #else simde_assert_m512_close(r, test_vec[i].r, 1); #endif } return 0; } static int test_simde_mm512_mask_permutex2var_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__mmask16 k; simde__m512i idx; simde__m512 b; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( 232.04), SIMDE_FLOAT32_C( 774.81), SIMDE_FLOAT32_C( -599.01), SIMDE_FLOAT32_C( 69.04), SIMDE_FLOAT32_C( -149.02), SIMDE_FLOAT32_C( 240.79), SIMDE_FLOAT32_C( -839.80), SIMDE_FLOAT32_C( -556.90), SIMDE_FLOAT32_C( 160.98), SIMDE_FLOAT32_C( 391.82), SIMDE_FLOAT32_C( -569.99), SIMDE_FLOAT32_C( -327.63), SIMDE_FLOAT32_C( -172.36), SIMDE_FLOAT32_C( 393.53), SIMDE_FLOAT32_C( 36.69), SIMDE_FLOAT32_C( -135.52)), UINT16_C(45849), simde_mm512_set_epi32(INT32_C( 1711460779), INT32_C( -919570191), INT32_C( 1974152373), INT32_C( -695949043), INT32_C( -790242624), INT32_C(-1094331335), INT32_C(-1166320093), INT32_C(-2045280751), INT32_C(-2037261521), INT32_C( 223952317), INT32_C( 282198336), INT32_C( 564965997), INT32_C( 169645898), INT32_C(-1539616610), INT32_C( 1134735685), INT32_C( 1430356381)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 903.50), SIMDE_FLOAT32_C( -43.35), SIMDE_FLOAT32_C( 309.91), SIMDE_FLOAT32_C( 846.15), SIMDE_FLOAT32_C( -514.56), SIMDE_FLOAT32_C( -860.98), SIMDE_FLOAT32_C( -280.30), SIMDE_FLOAT32_C( 128.51), SIMDE_FLOAT32_C( 522.06), SIMDE_FLOAT32_C( -932.28), SIMDE_FLOAT32_C( 600.12), SIMDE_FLOAT32_C( -491.12), SIMDE_FLOAT32_C( -139.11), SIMDE_FLOAT32_C( -268.86), SIMDE_FLOAT32_C( -71.72), SIMDE_FLOAT32_C( 98.47)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -149.02), SIMDE_FLOAT32_C( 774.81), SIMDE_FLOAT32_C( 600.12), SIMDE_FLOAT32_C( -599.01), SIMDE_FLOAT32_C( -149.02), SIMDE_FLOAT32_C( 240.79), SIMDE_FLOAT32_C( -172.36), SIMDE_FLOAT32_C( -71.72), SIMDE_FLOAT32_C( 160.98), SIMDE_FLOAT32_C( 391.82), SIMDE_FLOAT32_C( -569.99), SIMDE_FLOAT32_C( -599.01), SIMDE_FLOAT32_C( 240.79), SIMDE_FLOAT32_C( 393.53), SIMDE_FLOAT32_C( 36.69), SIMDE_FLOAT32_C( 309.91)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -760.88), SIMDE_FLOAT32_C( -617.12), SIMDE_FLOAT32_C( -751.58), SIMDE_FLOAT32_C( 907.23), SIMDE_FLOAT32_C( -359.60), SIMDE_FLOAT32_C( -213.75), SIMDE_FLOAT32_C( 403.00), SIMDE_FLOAT32_C( -198.67), SIMDE_FLOAT32_C( 447.98), SIMDE_FLOAT32_C( -925.69), SIMDE_FLOAT32_C( 717.83), SIMDE_FLOAT32_C( -489.88), SIMDE_FLOAT32_C( -37.49), SIMDE_FLOAT32_C( -373.66), SIMDE_FLOAT32_C( -292.35), SIMDE_FLOAT32_C( -835.53)), UINT16_C(30259), simde_mm512_set_epi32(INT32_C( 1701079465), INT32_C( -195770682), INT32_C( 503748315), INT32_C( 1469355417), INT32_C(-1849349632), INT32_C( 1962664621), INT32_C( -646247370), INT32_C( 1258747662), INT32_C( 1838830023), INT32_C( -532007659), INT32_C( -622852205), INT32_C( -839037220), INT32_C( 499633910), INT32_C( -260167255), INT32_C( 884163960), INT32_C( -329275629)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 670.52), SIMDE_FLOAT32_C( 149.72), SIMDE_FLOAT32_C( 213.24), SIMDE_FLOAT32_C( -577.36), SIMDE_FLOAT32_C( 957.37), SIMDE_FLOAT32_C( -934.92), SIMDE_FLOAT32_C( -657.02), SIMDE_FLOAT32_C( -629.37), SIMDE_FLOAT32_C( 337.35), SIMDE_FLOAT32_C( -278.32), SIMDE_FLOAT32_C( -744.41), SIMDE_FLOAT32_C( 39.32), SIMDE_FLOAT32_C( 29.68), SIMDE_FLOAT32_C( -490.28), SIMDE_FLOAT32_C( 841.53), SIMDE_FLOAT32_C( 526.21)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -760.88), SIMDE_FLOAT32_C( -925.69), SIMDE_FLOAT32_C( 957.37), SIMDE_FLOAT32_C( -657.02), SIMDE_FLOAT32_C( -359.60), SIMDE_FLOAT32_C( -751.58), SIMDE_FLOAT32_C( -278.32), SIMDE_FLOAT32_C( -198.67), SIMDE_FLOAT32_C( 447.98), SIMDE_FLOAT32_C( -925.69), SIMDE_FLOAT32_C( 29.68), SIMDE_FLOAT32_C( -577.36), SIMDE_FLOAT32_C( -37.49), SIMDE_FLOAT32_C( -373.66), SIMDE_FLOAT32_C( -629.37), SIMDE_FLOAT32_C( 29.68)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 493.30), SIMDE_FLOAT32_C( 831.29), SIMDE_FLOAT32_C( -619.50), SIMDE_FLOAT32_C( 952.47), SIMDE_FLOAT32_C( -492.61), SIMDE_FLOAT32_C( -68.16), SIMDE_FLOAT32_C( 717.69), SIMDE_FLOAT32_C( -663.74), SIMDE_FLOAT32_C( 179.29), SIMDE_FLOAT32_C( 989.70), SIMDE_FLOAT32_C( -695.21), SIMDE_FLOAT32_C( -786.23), SIMDE_FLOAT32_C( 873.30), SIMDE_FLOAT32_C( 241.45), SIMDE_FLOAT32_C( -432.13), SIMDE_FLOAT32_C( -842.15)), UINT16_C(60970), simde_mm512_set_epi32(INT32_C( 204417556), INT32_C(-1329665093), INT32_C(-2039025377), INT32_C( 1639231015), INT32_C( 1541217841), INT32_C( 1692413538), INT32_C( 738521275), INT32_C( 159429100), INT32_C( 451955897), INT32_C( 181201098), INT32_C( 450627934), INT32_C( 2082954477), INT32_C( 1254960767), INT32_C( 1995459397), INT32_C( -11572946), INT32_C(-1087388220)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 52.79), SIMDE_FLOAT32_C( 835.54), SIMDE_FLOAT32_C( -712.24), SIMDE_FLOAT32_C( 518.12), SIMDE_FLOAT32_C( -173.80), SIMDE_FLOAT32_C( 487.08), SIMDE_FLOAT32_C( 180.78), SIMDE_FLOAT32_C( -289.23), SIMDE_FLOAT32_C( 918.52), SIMDE_FLOAT32_C( -422.76), SIMDE_FLOAT32_C( -433.33), SIMDE_FLOAT32_C( 48.49), SIMDE_FLOAT32_C( 799.57), SIMDE_FLOAT32_C( -820.22), SIMDE_FLOAT32_C( -959.11), SIMDE_FLOAT32_C( 268.99)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 48.49), SIMDE_FLOAT32_C( -173.80), SIMDE_FLOAT32_C( 52.79), SIMDE_FLOAT32_C( 952.47), SIMDE_FLOAT32_C( -959.11), SIMDE_FLOAT32_C( 241.45), SIMDE_FLOAT32_C( -173.80), SIMDE_FLOAT32_C( -663.74), SIMDE_FLOAT32_C( 179.29), SIMDE_FLOAT32_C( 989.70), SIMDE_FLOAT32_C( 835.54), SIMDE_FLOAT32_C( -786.23), SIMDE_FLOAT32_C( 52.79), SIMDE_FLOAT32_C( 241.45), SIMDE_FLOAT32_C( 831.29), SIMDE_FLOAT32_C( -842.15)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -844.73), SIMDE_FLOAT32_C( 757.65), SIMDE_FLOAT32_C( 677.41), SIMDE_FLOAT32_C( -352.24), SIMDE_FLOAT32_C( -479.79), SIMDE_FLOAT32_C( 602.83), SIMDE_FLOAT32_C( 2.55), SIMDE_FLOAT32_C( -388.47), SIMDE_FLOAT32_C( -643.43), SIMDE_FLOAT32_C( -331.34), SIMDE_FLOAT32_C( 72.67), SIMDE_FLOAT32_C( -870.79), SIMDE_FLOAT32_C( -722.44), SIMDE_FLOAT32_C( 529.44), SIMDE_FLOAT32_C( -949.73), SIMDE_FLOAT32_C( 280.87)), UINT16_C(48011), simde_mm512_set_epi32(INT32_C( -811849174), INT32_C(-1510825074), INT32_C( 1897985966), INT32_C( 1445172644), INT32_C( -193622280), INT32_C( 2097959091), INT32_C( -652080500), INT32_C( 1943026961), INT32_C( 763848022), INT32_C(-2124387583), INT32_C(-1102663841), INT32_C( 712044568), INT32_C( 1641785760), INT32_C( 1696516135), INT32_C(-1123374630), INT32_C( -181070601)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -53.79), SIMDE_FLOAT32_C( 703.31), SIMDE_FLOAT32_C( 930.79), SIMDE_FLOAT32_C( 111.33), SIMDE_FLOAT32_C( -176.75), SIMDE_FLOAT32_C( -316.94), SIMDE_FLOAT32_C( 639.68), SIMDE_FLOAT32_C( -783.00), SIMDE_FLOAT32_C( -102.18), SIMDE_FLOAT32_C( 960.00), SIMDE_FLOAT32_C( 22.93), SIMDE_FLOAT32_C( -395.13), SIMDE_FLOAT32_C( 145.63), SIMDE_FLOAT32_C( -149.04), SIMDE_FLOAT32_C( 214.37), SIMDE_FLOAT32_C( -453.25)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 602.83), SIMDE_FLOAT32_C( 757.65), SIMDE_FLOAT32_C( 757.65), SIMDE_FLOAT32_C( -870.79), SIMDE_FLOAT32_C( -783.00), SIMDE_FLOAT32_C( 602.83), SIMDE_FLOAT32_C( -352.24), SIMDE_FLOAT32_C( 214.37), SIMDE_FLOAT32_C( 960.00), SIMDE_FLOAT32_C( -331.34), SIMDE_FLOAT32_C( 72.67), SIMDE_FLOAT32_C( -870.79), SIMDE_FLOAT32_C( 280.87), SIMDE_FLOAT32_C( 529.44), SIMDE_FLOAT32_C( -316.94), SIMDE_FLOAT32_C( -102.18)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -528.18), SIMDE_FLOAT32_C( 813.86), SIMDE_FLOAT32_C( 925.24), SIMDE_FLOAT32_C( 849.34), SIMDE_FLOAT32_C( 112.68), SIMDE_FLOAT32_C( -96.70), SIMDE_FLOAT32_C( -44.05), SIMDE_FLOAT32_C( 432.42), SIMDE_FLOAT32_C( 421.92), SIMDE_FLOAT32_C( 718.96), SIMDE_FLOAT32_C( -864.59), SIMDE_FLOAT32_C( -334.42), SIMDE_FLOAT32_C( 660.53), SIMDE_FLOAT32_C( 748.73), SIMDE_FLOAT32_C( 996.15), SIMDE_FLOAT32_C( -607.82)), UINT16_C( 903), simde_mm512_set_epi32(INT32_C( 1969107101), INT32_C(-2063427243), INT32_C( -670405092), INT32_C(-1879729053), INT32_C( 1035482990), INT32_C( 1183910939), INT32_C( 1515345934), INT32_C(-1884003639), INT32_C( -638430290), INT32_C(-2007622482), INT32_C( 171336877), INT32_C( 59553613), INT32_C( 165266600), INT32_C( -798384264), INT32_C( 1607584815), INT32_C(-1324336584)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 641.22), SIMDE_FLOAT32_C( -747.07), SIMDE_FLOAT32_C( -762.67), SIMDE_FLOAT32_C( 744.11), SIMDE_FLOAT32_C( 350.11), SIMDE_FLOAT32_C( 409.27), SIMDE_FLOAT32_C( 481.83), SIMDE_FLOAT32_C( 601.37), SIMDE_FLOAT32_C( -660.24), SIMDE_FLOAT32_C( -675.56), SIMDE_FLOAT32_C( -194.09), SIMDE_FLOAT32_C( 149.22), SIMDE_FLOAT32_C( 161.52), SIMDE_FLOAT32_C( 632.78), SIMDE_FLOAT32_C( 346.90), SIMDE_FLOAT32_C( -777.05)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -528.18), SIMDE_FLOAT32_C( 813.86), SIMDE_FLOAT32_C( 925.24), SIMDE_FLOAT32_C( 849.34), SIMDE_FLOAT32_C( 112.68), SIMDE_FLOAT32_C( -96.70), SIMDE_FLOAT32_C( 813.86), SIMDE_FLOAT32_C( -44.05), SIMDE_FLOAT32_C( 813.86), SIMDE_FLOAT32_C( 718.96), SIMDE_FLOAT32_C( -864.59), SIMDE_FLOAT32_C( -334.42), SIMDE_FLOAT32_C( 660.53), SIMDE_FLOAT32_C( 601.37), SIMDE_FLOAT32_C( -528.18), SIMDE_FLOAT32_C( 601.37)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 211.34), SIMDE_FLOAT32_C( -159.54), SIMDE_FLOAT32_C( 297.01), SIMDE_FLOAT32_C( 147.67), SIMDE_FLOAT32_C( -855.53), SIMDE_FLOAT32_C( 391.08), SIMDE_FLOAT32_C( -710.54), SIMDE_FLOAT32_C( -140.51), SIMDE_FLOAT32_C( 346.36), SIMDE_FLOAT32_C( 318.14), SIMDE_FLOAT32_C( 399.19), SIMDE_FLOAT32_C( -291.83), SIMDE_FLOAT32_C( 951.57), SIMDE_FLOAT32_C( 272.52), SIMDE_FLOAT32_C( -735.05), SIMDE_FLOAT32_C( 5.84)), UINT16_C( 487), simde_mm512_set_epi32(INT32_C(-2094713086), INT32_C( 197529411), INT32_C( 1055036471), INT32_C( 351897115), INT32_C( 1594003471), INT32_C(-1709813294), INT32_C( -133653364), INT32_C( -51462036), INT32_C( 46796230), INT32_C( 989301899), INT32_C( -691937914), INT32_C( 1667629581), INT32_C( -496700661), INT32_C(-1318801755), INT32_C( 1076515270), INT32_C(-1757573505)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 739.21), SIMDE_FLOAT32_C( 355.61), SIMDE_FLOAT32_C( -450.77), SIMDE_FLOAT32_C( 343.42), SIMDE_FLOAT32_C( -703.95), SIMDE_FLOAT32_C( 169.29), SIMDE_FLOAT32_C( 932.53), SIMDE_FLOAT32_C( -653.71), SIMDE_FLOAT32_C( 371.73), SIMDE_FLOAT32_C( 757.18), SIMDE_FLOAT32_C( 214.84), SIMDE_FLOAT32_C( 830.24), SIMDE_FLOAT32_C( 903.53), SIMDE_FLOAT32_C( -831.08), SIMDE_FLOAT32_C( 815.07), SIMDE_FLOAT32_C( 196.06)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 211.34), SIMDE_FLOAT32_C( -159.54), SIMDE_FLOAT32_C( 297.01), SIMDE_FLOAT32_C( 147.67), SIMDE_FLOAT32_C( -855.53), SIMDE_FLOAT32_C( 391.08), SIMDE_FLOAT32_C( -710.54), SIMDE_FLOAT32_C( 147.67), SIMDE_FLOAT32_C( 318.14), SIMDE_FLOAT32_C( -855.53), SIMDE_FLOAT32_C( 318.14), SIMDE_FLOAT32_C( -291.83), SIMDE_FLOAT32_C( 951.57), SIMDE_FLOAT32_C( 399.19), SIMDE_FLOAT32_C( 318.14), SIMDE_FLOAT32_C( 739.21)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -482.01), SIMDE_FLOAT32_C( 218.00), SIMDE_FLOAT32_C( 412.00), SIMDE_FLOAT32_C( 409.50), SIMDE_FLOAT32_C( -290.29), SIMDE_FLOAT32_C( 337.59), SIMDE_FLOAT32_C( -137.98), SIMDE_FLOAT32_C( 723.06), SIMDE_FLOAT32_C( -632.02), SIMDE_FLOAT32_C( 769.08), SIMDE_FLOAT32_C( -269.22), SIMDE_FLOAT32_C( 771.53), SIMDE_FLOAT32_C( 554.20), SIMDE_FLOAT32_C( 154.86), SIMDE_FLOAT32_C( 918.01), SIMDE_FLOAT32_C( 348.86)), UINT16_C(21630), simde_mm512_set_epi32(INT32_C( -501257427), INT32_C(-1329431510), INT32_C( 1005777948), INT32_C( 616430734), INT32_C( 1581162255), INT32_C( 1497456456), INT32_C(-1170808415), INT32_C(-1014503666), INT32_C(-1157750165), INT32_C( 1691363299), INT32_C( 1100655145), INT32_C( 673265711), INT32_C( 1544659928), INT32_C(-1956803094), INT32_C( 1970109422), INT32_C(-1197844366)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 904.77), SIMDE_FLOAT32_C( -16.01), SIMDE_FLOAT32_C( -8.87), SIMDE_FLOAT32_C( -521.88), SIMDE_FLOAT32_C( -842.76), SIMDE_FLOAT32_C( 871.64), SIMDE_FLOAT32_C( 769.47), SIMDE_FLOAT32_C( -997.66), SIMDE_FLOAT32_C( 691.39), SIMDE_FLOAT32_C( 987.54), SIMDE_FLOAT32_C( -288.94), SIMDE_FLOAT32_C( 506.40), SIMDE_FLOAT32_C( -318.39), SIMDE_FLOAT32_C( -477.43), SIMDE_FLOAT32_C( 119.12), SIMDE_FLOAT32_C( 397.77)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -482.01), SIMDE_FLOAT32_C( 337.59), SIMDE_FLOAT32_C( 412.00), SIMDE_FLOAT32_C( 218.00), SIMDE_FLOAT32_C( -290.29), SIMDE_FLOAT32_C( 723.06), SIMDE_FLOAT32_C( -137.98), SIMDE_FLOAT32_C( 723.06), SIMDE_FLOAT32_C( -632.02), SIMDE_FLOAT32_C( 554.20), SIMDE_FLOAT32_C( -137.98), SIMDE_FLOAT32_C( -482.01), SIMDE_FLOAT32_C( -997.66), SIMDE_FLOAT32_C( 337.59), SIMDE_FLOAT32_C( 218.00), SIMDE_FLOAT32_C( 348.86)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -714.57), SIMDE_FLOAT32_C( -728.75), SIMDE_FLOAT32_C( -712.01), SIMDE_FLOAT32_C( -346.18), SIMDE_FLOAT32_C( 238.27), SIMDE_FLOAT32_C( -879.28), SIMDE_FLOAT32_C( -391.86), SIMDE_FLOAT32_C( -824.42), SIMDE_FLOAT32_C( 939.74), SIMDE_FLOAT32_C( 471.05), SIMDE_FLOAT32_C( -276.21), SIMDE_FLOAT32_C( 528.15), SIMDE_FLOAT32_C( 113.46), SIMDE_FLOAT32_C( 829.33), SIMDE_FLOAT32_C( -265.53), SIMDE_FLOAT32_C( -933.24)), UINT16_C(42527), simde_mm512_set_epi32(INT32_C( -675362282), INT32_C(-1218762696), INT32_C(-1400182216), INT32_C(-2088680370), INT32_C(-1895497877), INT32_C( 1563893931), INT32_C( 1105770515), INT32_C(-1745770541), INT32_C(-1255255240), INT32_C( 1238532704), INT32_C( 995946229), INT32_C( 119517601), INT32_C( 1389614040), INT32_C(-2032996348), INT32_C( 1996749952), INT32_C( 450477794)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 705.41), SIMDE_FLOAT32_C( -150.22), SIMDE_FLOAT32_C( 115.26), SIMDE_FLOAT32_C( -433.46), SIMDE_FLOAT32_C( -112.53), SIMDE_FLOAT32_C( -754.09), SIMDE_FLOAT32_C( -873.63), SIMDE_FLOAT32_C( -546.06), SIMDE_FLOAT32_C( 126.79), SIMDE_FLOAT32_C( -885.01), SIMDE_FLOAT32_C( 749.00), SIMDE_FLOAT32_C( -249.67), SIMDE_FLOAT32_C( -471.39), SIMDE_FLOAT32_C( -437.78), SIMDE_FLOAT32_C( -357.63), SIMDE_FLOAT32_C( 772.54)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -885.01), SIMDE_FLOAT32_C( -728.75), SIMDE_FLOAT32_C( -546.06), SIMDE_FLOAT32_C( -346.18), SIMDE_FLOAT32_C( 238.27), SIMDE_FLOAT32_C( 238.27), SIMDE_FLOAT32_C( -471.39), SIMDE_FLOAT32_C( -824.42), SIMDE_FLOAT32_C( 939.74), SIMDE_FLOAT32_C( 471.05), SIMDE_FLOAT32_C( -276.21), SIMDE_FLOAT32_C( -265.53), SIMDE_FLOAT32_C( -546.06), SIMDE_FLOAT32_C( 528.15), SIMDE_FLOAT32_C( -933.24), SIMDE_FLOAT32_C( 829.33)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mask_permutex2var_ps(test_vec[i].a, test_vec[i].k, test_vec[i].idx, test_vec[i].b); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask2_permutex2var_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__m512i idx; simde__mmask16 k; simde__m512 b; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( 877.96), SIMDE_FLOAT32_C( 184.43), SIMDE_FLOAT32_C( 375.34), SIMDE_FLOAT32_C( 389.76), SIMDE_FLOAT32_C( 437.16), SIMDE_FLOAT32_C( -638.93), SIMDE_FLOAT32_C( 773.45), SIMDE_FLOAT32_C( 109.31), SIMDE_FLOAT32_C( 824.77), SIMDE_FLOAT32_C( 172.30), SIMDE_FLOAT32_C( -660.39), SIMDE_FLOAT32_C( -605.88), SIMDE_FLOAT32_C( -689.22), SIMDE_FLOAT32_C( -25.12), SIMDE_FLOAT32_C( 921.58), SIMDE_FLOAT32_C( 433.31)), simde_mm512_set_epi32(INT32_C( 1145310577), INT32_C( 1144464671), INT32_C(-1000882995), INT32_C(-1015975772), INT32_C(-1012966359), INT32_C( 1142785802), INT32_C(-1004452250), INT32_C(-1016706499), INT32_C( 1148133540), INT32_C( 1143775396), INT32_C( 1145653494), INT32_C( 1092154819), INT32_C(-1002177167), INT32_C( 1142945710), INT32_C(-1005107118), INT32_C(-1003331748)), UINT16_C( 7750), simde_mm512_set_ps(SIMDE_FLOAT32_C( 467.94), SIMDE_FLOAT32_C( 141.60), SIMDE_FLOAT32_C( 23.96), SIMDE_FLOAT32_C( -171.38), SIMDE_FLOAT32_C( -106.17), SIMDE_FLOAT32_C( -737.60), SIMDE_FLOAT32_C( -140.98), SIMDE_FLOAT32_C( -735.83), SIMDE_FLOAT32_C( -867.63), SIMDE_FLOAT32_C( -65.75), SIMDE_FLOAT32_C( 137.16), SIMDE_FLOAT32_C( -73.36), SIMDE_FLOAT32_C( -396.53), SIMDE_FLOAT32_C( -409.15), SIMDE_FLOAT32_C( 672.29), SIMDE_FLOAT32_C( 992.21)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 784.21), SIMDE_FLOAT32_C( 732.58), SIMDE_FLOAT32_C( -862.95), SIMDE_FLOAT32_C( -605.88), SIMDE_FLOAT32_C( 773.45), SIMDE_FLOAT32_C( -638.93), SIMDE_FLOAT32_C( 172.30), SIMDE_FLOAT32_C( -230.29), SIMDE_FLOAT32_C( 956.51), SIMDE_FLOAT32_C( -605.88), SIMDE_FLOAT32_C( 805.14), SIMDE_FLOAT32_C( 9.56), SIMDE_FLOAT32_C( -783.96), SIMDE_FLOAT32_C( 184.43), SIMDE_FLOAT32_C( -409.15), SIMDE_FLOAT32_C( -713.49)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -200.75), SIMDE_FLOAT32_C( -165.75), SIMDE_FLOAT32_C( -249.91), SIMDE_FLOAT32_C( -394.72), SIMDE_FLOAT32_C( 706.66), SIMDE_FLOAT32_C( -789.81), SIMDE_FLOAT32_C( -218.09), SIMDE_FLOAT32_C( -475.08), SIMDE_FLOAT32_C( 601.08), SIMDE_FLOAT32_C( -590.62), SIMDE_FLOAT32_C( 446.98), SIMDE_FLOAT32_C( 51.90), SIMDE_FLOAT32_C( 597.18), SIMDE_FLOAT32_C( -19.20), SIMDE_FLOAT32_C( -536.51), SIMDE_FLOAT32_C( 510.31)), simde_mm512_set_epi32(INT32_C( 1141430682), INT32_C( -998721126), INT32_C(-1006378189), INT32_C( 1133601096), INT32_C(-1001273426), INT32_C( 1143976919), INT32_C(-1023309251), INT32_C( 1140917043), INT32_C( 1146448282), INT32_C( 1140298875), INT32_C( -998775030), INT32_C( 1123123200), INT32_C(-1008623616), INT32_C(-1003229512), INT32_C(-1013737062), INT32_C( 1137376297)), UINT16_C(36885), simde_mm512_set_ps(SIMDE_FLOAT32_C( 985.03), SIMDE_FLOAT32_C( 291.60), SIMDE_FLOAT32_C( -732.66), SIMDE_FLOAT32_C( 106.36), SIMDE_FLOAT32_C( 883.40), SIMDE_FLOAT32_C( 8.63), SIMDE_FLOAT32_C( 178.77), SIMDE_FLOAT32_C( -118.91), SIMDE_FLOAT32_C( -944.88), SIMDE_FLOAT32_C( -743.12), SIMDE_FLOAT32_C( 752.91), SIMDE_FLOAT32_C( -489.26), SIMDE_FLOAT32_C( 163.33), SIMDE_FLOAT32_C( 566.34), SIMDE_FLOAT32_C( -175.54), SIMDE_FLOAT32_C( -823.41)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 8.63), SIMDE_FLOAT32_C( -994.90), SIMDE_FLOAT32_C( -527.55), SIMDE_FLOAT32_C( -475.08), SIMDE_FLOAT32_C( -839.12), SIMDE_FLOAT32_C( 702.81), SIMDE_FLOAT32_C( -129.54), SIMDE_FLOAT32_C( 516.05), SIMDE_FLOAT32_C( 853.65), SIMDE_FLOAT32_C( 495.16), SIMDE_FLOAT32_C( -991.61), SIMDE_FLOAT32_C( 510.31), SIMDE_FLOAT32_C( -451.25), SIMDE_FLOAT32_C( -118.91), SIMDE_FLOAT32_C( -295.20), SIMDE_FLOAT32_C( -218.09)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -242.78), SIMDE_FLOAT32_C( 250.83), SIMDE_FLOAT32_C( -818.85), SIMDE_FLOAT32_C( -959.57), SIMDE_FLOAT32_C( 925.03), SIMDE_FLOAT32_C( -644.70), SIMDE_FLOAT32_C( -800.67), SIMDE_FLOAT32_C( 914.15), SIMDE_FLOAT32_C( 333.29), SIMDE_FLOAT32_C( 760.10), SIMDE_FLOAT32_C( 397.05), SIMDE_FLOAT32_C( 356.22), SIMDE_FLOAT32_C( -4.35), SIMDE_FLOAT32_C( 408.12), SIMDE_FLOAT32_C( 89.10), SIMDE_FLOAT32_C( 557.43)), simde_mm512_set_epi32(INT32_C(-1000840397), INT32_C(-1014681108), INT32_C(-1002009723), INT32_C(-1001187082), INT32_C(-1005810811), INT32_C(-1019216527), INT32_C( -999992852), INT32_C(-1002857595), INT32_C( 1126940672), INT32_C( 1147155743), INT32_C( 1136568238), INT32_C(-1008068198), INT32_C(-1019846328), INT32_C(-1004496650), INT32_C(-1000071004), INT32_C(-1004023153)), UINT16_C( 3523), simde_mm512_set_ps(SIMDE_FLOAT32_C( -645.40), SIMDE_FLOAT32_C( -442.29), SIMDE_FLOAT32_C( -336.85), SIMDE_FLOAT32_C( -152.85), SIMDE_FLOAT32_C( -363.79), SIMDE_FLOAT32_C( -614.03), SIMDE_FLOAT32_C( -923.13), SIMDE_FLOAT32_C( 985.40), SIMDE_FLOAT32_C( -733.70), SIMDE_FLOAT32_C( -80.62), SIMDE_FLOAT32_C( 200.37), SIMDE_FLOAT32_C( -613.00), SIMDE_FLOAT32_C( -530.37), SIMDE_FLOAT32_C( 940.29), SIMDE_FLOAT32_C( -837.65), SIMDE_FLOAT32_C( -127.38)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -865.55), SIMDE_FLOAT32_C( -266.39), SIMDE_FLOAT32_C( -794.18), SIMDE_FLOAT32_C( -844.39), SIMDE_FLOAT32_C( 397.05), SIMDE_FLOAT32_C( -837.65), SIMDE_FLOAT32_C( -917.28), SIMDE_FLOAT32_C( 397.05), SIMDE_FLOAT32_C( 557.43), SIMDE_FLOAT32_C( -645.40), SIMDE_FLOAT32_C( 381.31), SIMDE_FLOAT32_C( -468.20), SIMDE_FLOAT32_C( -182.38), SIMDE_FLOAT32_C( -642.39), SIMDE_FLOAT32_C( 356.22), SIMDE_FLOAT32_C( -242.78)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 419.61), SIMDE_FLOAT32_C( -753.42), SIMDE_FLOAT32_C( 687.05), SIMDE_FLOAT32_C( 241.87), SIMDE_FLOAT32_C( -80.26), SIMDE_FLOAT32_C( -667.68), SIMDE_FLOAT32_C( 776.25), SIMDE_FLOAT32_C( -748.43), SIMDE_FLOAT32_C( -53.25), SIMDE_FLOAT32_C( -132.01), SIMDE_FLOAT32_C( -767.78), SIMDE_FLOAT32_C( -9.36), SIMDE_FLOAT32_C( -226.72), SIMDE_FLOAT32_C( -661.68), SIMDE_FLOAT32_C( -655.84), SIMDE_FLOAT32_C( -744.21)), simde_mm512_set_epi32(INT32_C( -998850560), INT32_C( 1143600415), INT32_C(-1000731771), INT32_C( 1114277151), INT32_C(-1010340332), INT32_C(-1001651732), INT32_C( 1143390372), INT32_C( 1119634063), INT32_C(-1019717222), INT32_C(-1031045120), INT32_C(-1011098255), INT32_C( 1132600689), INT32_C(-1010611978), INT32_C( 1128936899), INT32_C(-1001749053), INT32_C( 1140980122)), UINT16_C(37204), simde_mm512_set_ps(SIMDE_FLOAT32_C( -518.52), SIMDE_FLOAT32_C( 431.51), SIMDE_FLOAT32_C( 99.29), SIMDE_FLOAT32_C( 936.82), SIMDE_FLOAT32_C( -99.13), SIMDE_FLOAT32_C( -566.48), SIMDE_FLOAT32_C( 699.34), SIMDE_FLOAT32_C( 845.75), SIMDE_FLOAT32_C( 43.86), SIMDE_FLOAT32_C( -170.77), SIMDE_FLOAT32_C( 428.74), SIMDE_FLOAT32_C( 695.01), SIMDE_FLOAT32_C( -702.60), SIMDE_FLOAT32_C( -231.45), SIMDE_FLOAT32_C( -416.99), SIMDE_FLOAT32_C( 838.71)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -744.21), SIMDE_FLOAT32_C( 679.83), SIMDE_FLOAT32_C( -872.18), SIMDE_FLOAT32_C( -518.52), SIMDE_FLOAT32_C( -398.86), SIMDE_FLOAT32_C( -816.03), SIMDE_FLOAT32_C( 667.01), SIMDE_FLOAT32_C( 419.61), SIMDE_FLOAT32_C( -184.35), SIMDE_FLOAT32_C( -744.21), SIMDE_FLOAT32_C( -375.73), SIMDE_FLOAT32_C( -416.99), SIMDE_FLOAT32_C( -390.57), SIMDE_FLOAT32_C( -226.72), SIMDE_FLOAT32_C( -810.09), SIMDE_FLOAT32_C( 519.90)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 924.35), SIMDE_FLOAT32_C( -647.29), SIMDE_FLOAT32_C( -651.10), SIMDE_FLOAT32_C( 678.72), SIMDE_FLOAT32_C( 318.50), SIMDE_FLOAT32_C( 607.02), SIMDE_FLOAT32_C( 959.46), SIMDE_FLOAT32_C( 708.40), SIMDE_FLOAT32_C( 684.78), SIMDE_FLOAT32_C( 938.65), SIMDE_FLOAT32_C( 79.72), SIMDE_FLOAT32_C( -367.19), SIMDE_FLOAT32_C( -848.84), SIMDE_FLOAT32_C( -319.22), SIMDE_FLOAT32_C( 807.26), SIMDE_FLOAT32_C( -797.03)), simde_mm512_set_epi32(INT32_C( 1144478269), INT32_C(-1014431089), INT32_C( 1145887785), INT32_C(-1000062648), INT32_C(-1004164383), INT32_C(-1038973665), INT32_C(-1013049590), INT32_C( 1139362365), INT32_C( 1143090545), INT32_C( 1142120612), INT32_C(-1031333478), INT32_C( 1146798899), INT32_C( 1148490383), INT32_C( 1146840351), INT32_C(-1034768876), INT32_C(-1003322081)), UINT16_C(41858), simde_mm512_set_ps(SIMDE_FLOAT32_C( 538.68), SIMDE_FLOAT32_C( -878.56), SIMDE_FLOAT32_C( 48.75), SIMDE_FLOAT32_C( -299.81), SIMDE_FLOAT32_C( -177.03), SIMDE_FLOAT32_C( -530.47), SIMDE_FLOAT32_C( 848.40), SIMDE_FLOAT32_C( -491.55), SIMDE_FLOAT32_C( 978.68), SIMDE_FLOAT32_C( 838.38), SIMDE_FLOAT32_C( 198.53), SIMDE_FLOAT32_C( -129.15), SIMDE_FLOAT32_C( -588.91), SIMDE_FLOAT32_C( 399.94), SIMDE_FLOAT32_C( 44.96), SIMDE_FLOAT32_C( 312.31)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 48.75), SIMDE_FLOAT32_C( -274.02), SIMDE_FLOAT32_C( 959.46), SIMDE_FLOAT32_C( -913.02), SIMDE_FLOAT32_C( -662.67), SIMDE_FLOAT32_C( -36.63), SIMDE_FLOAT32_C( 607.02), SIMDE_FLOAT32_C( 48.75), SIMDE_FLOAT32_C( 44.96), SIMDE_FLOAT32_C( 589.51), SIMDE_FLOAT32_C( -67.55), SIMDE_FLOAT32_C( 875.05), SIMDE_FLOAT32_C( 978.29), SIMDE_FLOAT32_C( 877.58), SIMDE_FLOAT32_C( -129.15), SIMDE_FLOAT32_C( -714.08)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -944.36), SIMDE_FLOAT32_C( -838.63), SIMDE_FLOAT32_C( 38.26), SIMDE_FLOAT32_C( -152.44), SIMDE_FLOAT32_C( 946.95), SIMDE_FLOAT32_C( -423.59), SIMDE_FLOAT32_C( -297.05), SIMDE_FLOAT32_C( -205.88), SIMDE_FLOAT32_C( -253.64), SIMDE_FLOAT32_C( -501.33), SIMDE_FLOAT32_C( -37.12), SIMDE_FLOAT32_C( -369.82), SIMDE_FLOAT32_C( -496.81), SIMDE_FLOAT32_C( -47.73), SIMDE_FLOAT32_C( -743.25), SIMDE_FLOAT32_C( -913.65)), simde_mm512_set_epi32(INT32_C( 1148280996), INT32_C( 1141096448), INT32_C( 1147594179), INT32_C( 1133745603), INT32_C( 1142890004), INT32_C(-1013119386), INT32_C( 1142369157), INT32_C( 1137598792), INT32_C( -999746929), INT32_C(-1011464274), INT32_C( 1136544645), INT32_C( 1147671675), INT32_C( 1148562964), INT32_C( 1137554883), INT32_C(-1006904934), INT32_C(-1018696172)), UINT16_C(58593), simde_mm512_set_ps(SIMDE_FLOAT32_C( -84.19), SIMDE_FLOAT32_C( 399.10), SIMDE_FLOAT32_C( -610.29), SIMDE_FLOAT32_C( 149.87), SIMDE_FLOAT32_C( 820.28), SIMDE_FLOAT32_C( -278.06), SIMDE_FLOAT32_C( 765.67), SIMDE_FLOAT32_C( 664.38), SIMDE_FLOAT32_C( -117.33), SIMDE_FLOAT32_C( 617.04), SIMDE_FLOAT32_C( 690.47), SIMDE_FLOAT32_C( 983.05), SIMDE_FLOAT32_C( 980.92), SIMDE_FLOAT32_C( -996.65), SIMDE_FLOAT32_C( 209.60), SIMDE_FLOAT32_C( -480.53)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -369.82), SIMDE_FLOAT32_C( -913.65), SIMDE_FLOAT32_C( -496.81), SIMDE_FLOAT32_C( 295.17), SIMDE_FLOAT32_C( 636.47), SIMDE_FLOAT32_C( -501.33), SIMDE_FLOAT32_C( 604.68), SIMDE_FLOAT32_C( 412.76), SIMDE_FLOAT32_C( -944.36), SIMDE_FLOAT32_C( -838.63), SIMDE_FLOAT32_C( -37.12), SIMDE_FLOAT32_C( 928.32), SIMDE_FLOAT32_C( 982.72), SIMDE_FLOAT32_C( 411.42), SIMDE_FLOAT32_C( -503.70), SIMDE_FLOAT32_C( 983.05)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -918.50), SIMDE_FLOAT32_C( -515.44), SIMDE_FLOAT32_C( -137.15), SIMDE_FLOAT32_C( 561.32), SIMDE_FLOAT32_C( 501.72), SIMDE_FLOAT32_C( -843.67), SIMDE_FLOAT32_C( -326.79), SIMDE_FLOAT32_C( 106.60), SIMDE_FLOAT32_C( 380.78), SIMDE_FLOAT32_C( 2.64), SIMDE_FLOAT32_C( -699.45), SIMDE_FLOAT32_C( 503.17), SIMDE_FLOAT32_C( -803.82), SIMDE_FLOAT32_C( 78.52), SIMDE_FLOAT32_C( 647.26), SIMDE_FLOAT32_C( -393.01)), simde_mm512_set_epi32(INT32_C( -999928627), INT32_C( 1129960571), INT32_C( 1099662623), INT32_C(-1017545359), INT32_C( -999118602), INT32_C( 1144669635), INT32_C(-1000925266), INT32_C(-1002151281), INT32_C(-1020877210), INT32_C(-1027625452), INT32_C(-1004458476), INT32_C(-1004674089), INT32_C(-1004804342), INT32_C( 1143535534), INT32_C( 1125517230), INT32_C( 1132990300)), UINT16_C(14071), simde_mm512_set_ps(SIMDE_FLOAT32_C( -663.05), SIMDE_FLOAT32_C( -657.03), SIMDE_FLOAT32_C( -368.45), SIMDE_FLOAT32_C( 822.35), SIMDE_FLOAT32_C( 469.97), SIMDE_FLOAT32_C( 973.10), SIMDE_FLOAT32_C( 419.44), SIMDE_FLOAT32_C( -510.97), SIMDE_FLOAT32_C( -448.70), SIMDE_FLOAT32_C( -780.36), SIMDE_FLOAT32_C( 722.25), SIMDE_FLOAT32_C( 745.47), SIMDE_FLOAT32_C( 381.61), SIMDE_FLOAT32_C( 670.39), SIMDE_FLOAT32_C( 71.28), SIMDE_FLOAT32_C( -178.06)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -921.20), SIMDE_FLOAT32_C( 217.83), SIMDE_FLOAT32_C( -663.05), SIMDE_FLOAT32_C( 71.28), SIMDE_FLOAT32_C( -970.64), SIMDE_FLOAT32_C( -803.82), SIMDE_FLOAT32_C( -515.44), SIMDE_FLOAT32_C( -785.54), SIMDE_FLOAT32_C( 2.64), SIMDE_FLOAT32_C( 745.47), SIMDE_FLOAT32_C( 745.47), SIMDE_FLOAT32_C( -448.70), SIMDE_FLOAT32_C( -623.61), SIMDE_FLOAT32_C( -515.44), SIMDE_FLOAT32_C( -515.44), SIMDE_FLOAT32_C( 822.35)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -199.95), SIMDE_FLOAT32_C( -694.07), SIMDE_FLOAT32_C( 741.25), SIMDE_FLOAT32_C( -612.71), SIMDE_FLOAT32_C( 316.26), SIMDE_FLOAT32_C( -621.79), SIMDE_FLOAT32_C( 163.37), SIMDE_FLOAT32_C( 389.35), SIMDE_FLOAT32_C( 146.51), SIMDE_FLOAT32_C( 723.78), SIMDE_FLOAT32_C( 687.77), SIMDE_FLOAT32_C( 582.14), SIMDE_FLOAT32_C( 891.09), SIMDE_FLOAT32_C( -226.06), SIMDE_FLOAT32_C( 206.62), SIMDE_FLOAT32_C( 608.43)), simde_mm512_set_epi32(INT32_C( 1143844536), INT32_C(-1020415181), INT32_C(-1053032448), INT32_C( 1138942607), INT32_C( 1134596915), INT32_C( 1146536264), INT32_C( 1145270436), INT32_C( 1147929395), INT32_C( 1143444111), INT32_C( 1126843679), INT32_C(-1004511724), INT32_C( 1141078753), INT32_C(-1010149949), INT32_C(-1005574062), INT32_C(-1016484332), INT32_C(-1003633050)), UINT16_C(28924), simde_mm512_set_ps(SIMDE_FLOAT32_C( 725.47), SIMDE_FLOAT32_C( 989.71), SIMDE_FLOAT32_C( 233.61), SIMDE_FLOAT32_C( 876.86), SIMDE_FLOAT32_C( -135.15), SIMDE_FLOAT32_C( 476.19), SIMDE_FLOAT32_C( 231.31), SIMDE_FLOAT32_C( -79.03), SIMDE_FLOAT32_C( -501.37), SIMDE_FLOAT32_C( 310.62), SIMDE_FLOAT32_C( 652.20), SIMDE_FLOAT32_C( -426.66), SIMDE_FLOAT32_C( -323.40), SIMDE_FLOAT32_C( -305.08), SIMDE_FLOAT32_C( -4.94), SIMDE_FLOAT32_C( -30.71)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 694.73), SIMDE_FLOAT32_C( -323.40), SIMDE_FLOAT32_C( 608.43), SIMDE_FLOAT32_C( -199.95), SIMDE_FLOAT32_C( 321.15), SIMDE_FLOAT32_C( 859.02), SIMDE_FLOAT32_C( 781.76), SIMDE_FLOAT32_C( 944.05), SIMDE_FLOAT32_C( -199.95), SIMDE_FLOAT32_C( 725.47), SIMDE_FLOAT32_C( -426.66), SIMDE_FLOAT32_C( 206.62), SIMDE_FLOAT32_C( 891.09), SIMDE_FLOAT32_C( -305.08), SIMDE_FLOAT32_C( -233.68), SIMDE_FLOAT32_C( -695.10)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mask2_permutex2var_ps(test_vec[i].a, test_vec[i].idx, test_vec[i].k, test_vec[i].b); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_maskz_permutex2var_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m512 a; simde__m512i idx; simde__m512 b; simde__m512 r; } test_vec[8] = { { UINT16_C(45849), simde_mm512_set_ps(SIMDE_FLOAT32_C( 232.04), SIMDE_FLOAT32_C( 774.81), SIMDE_FLOAT32_C( -599.01), SIMDE_FLOAT32_C( 69.04), SIMDE_FLOAT32_C( -149.02), SIMDE_FLOAT32_C( 240.79), SIMDE_FLOAT32_C( -839.80), SIMDE_FLOAT32_C( -556.90), SIMDE_FLOAT32_C( 160.98), SIMDE_FLOAT32_C( 391.82), SIMDE_FLOAT32_C( -569.99), SIMDE_FLOAT32_C( -327.63), SIMDE_FLOAT32_C( -172.36), SIMDE_FLOAT32_C( 393.53), SIMDE_FLOAT32_C( 36.69), SIMDE_FLOAT32_C( -135.52)), simde_mm512_set_epi32(INT32_C( 1711460779), INT32_C( -919570191), INT32_C( 1974152373), INT32_C( -695949043), INT32_C( -790242624), INT32_C(-1094331335), INT32_C(-1166320093), INT32_C(-2045280751), INT32_C(-2037261521), INT32_C( 223952317), INT32_C( 282198336), INT32_C( 564965997), INT32_C( 169645898), INT32_C(-1539616610), INT32_C( 1134735685), INT32_C( 1430356381)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 903.50), SIMDE_FLOAT32_C( -43.35), SIMDE_FLOAT32_C( 309.91), SIMDE_FLOAT32_C( 846.15), SIMDE_FLOAT32_C( -514.56), SIMDE_FLOAT32_C( -860.98), SIMDE_FLOAT32_C( -280.30), SIMDE_FLOAT32_C( 128.51), SIMDE_FLOAT32_C( 522.06), SIMDE_FLOAT32_C( -932.28), SIMDE_FLOAT32_C( 600.12), SIMDE_FLOAT32_C( -491.12), SIMDE_FLOAT32_C( -139.11), SIMDE_FLOAT32_C( -268.86), SIMDE_FLOAT32_C( -71.72), SIMDE_FLOAT32_C( 98.47)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -149.02), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 600.12), SIMDE_FLOAT32_C( -599.01), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -172.36), SIMDE_FLOAT32_C( -71.72), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -599.01), SIMDE_FLOAT32_C( 240.79), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 309.91)) }, { UINT16_C(30259), simde_mm512_set_ps(SIMDE_FLOAT32_C( -760.88), SIMDE_FLOAT32_C( -617.12), SIMDE_FLOAT32_C( -751.58), SIMDE_FLOAT32_C( 907.23), SIMDE_FLOAT32_C( -359.60), SIMDE_FLOAT32_C( -213.75), SIMDE_FLOAT32_C( 403.00), SIMDE_FLOAT32_C( -198.67), SIMDE_FLOAT32_C( 447.98), SIMDE_FLOAT32_C( -925.69), SIMDE_FLOAT32_C( 717.83), SIMDE_FLOAT32_C( -489.88), SIMDE_FLOAT32_C( -37.49), SIMDE_FLOAT32_C( -373.66), SIMDE_FLOAT32_C( -292.35), SIMDE_FLOAT32_C( -835.53)), simde_mm512_set_epi32(INT32_C( 1701079465), INT32_C( -195770682), INT32_C( 503748315), INT32_C( 1469355417), INT32_C(-1849349632), INT32_C( 1962664621), INT32_C( -646247370), INT32_C( 1258747662), INT32_C( 1838830023), INT32_C( -532007659), INT32_C( -622852205), INT32_C( -839037220), INT32_C( 499633910), INT32_C( -260167255), INT32_C( 884163960), INT32_C( -329275629)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 670.52), SIMDE_FLOAT32_C( 149.72), SIMDE_FLOAT32_C( 213.24), SIMDE_FLOAT32_C( -577.36), SIMDE_FLOAT32_C( 957.37), SIMDE_FLOAT32_C( -934.92), SIMDE_FLOAT32_C( -657.02), SIMDE_FLOAT32_C( -629.37), SIMDE_FLOAT32_C( 337.35), SIMDE_FLOAT32_C( -278.32), SIMDE_FLOAT32_C( -744.41), SIMDE_FLOAT32_C( 39.32), SIMDE_FLOAT32_C( 29.68), SIMDE_FLOAT32_C( -490.28), SIMDE_FLOAT32_C( 841.53), SIMDE_FLOAT32_C( 526.21)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -925.69), SIMDE_FLOAT32_C( 957.37), SIMDE_FLOAT32_C( -657.02), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -751.58), SIMDE_FLOAT32_C( -278.32), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 29.68), SIMDE_FLOAT32_C( -577.36), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -629.37), SIMDE_FLOAT32_C( 29.68)) }, { UINT16_C(60970), simde_mm512_set_ps(SIMDE_FLOAT32_C( 493.30), SIMDE_FLOAT32_C( 831.29), SIMDE_FLOAT32_C( -619.50), SIMDE_FLOAT32_C( 952.47), SIMDE_FLOAT32_C( -492.61), SIMDE_FLOAT32_C( -68.16), SIMDE_FLOAT32_C( 717.69), SIMDE_FLOAT32_C( -663.74), SIMDE_FLOAT32_C( 179.29), SIMDE_FLOAT32_C( 989.70), SIMDE_FLOAT32_C( -695.21), SIMDE_FLOAT32_C( -786.23), SIMDE_FLOAT32_C( 873.30), SIMDE_FLOAT32_C( 241.45), SIMDE_FLOAT32_C( -432.13), SIMDE_FLOAT32_C( -842.15)), simde_mm512_set_epi32(INT32_C( 204417556), INT32_C(-1329665093), INT32_C(-2039025377), INT32_C( 1639231015), INT32_C( 1541217841), INT32_C( 1692413538), INT32_C( 738521275), INT32_C( 159429100), INT32_C( 451955897), INT32_C( 181201098), INT32_C( 450627934), INT32_C( 2082954477), INT32_C( 1254960767), INT32_C( 1995459397), INT32_C( -11572946), INT32_C(-1087388220)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 52.79), SIMDE_FLOAT32_C( 835.54), SIMDE_FLOAT32_C( -712.24), SIMDE_FLOAT32_C( 518.12), SIMDE_FLOAT32_C( -173.80), SIMDE_FLOAT32_C( 487.08), SIMDE_FLOAT32_C( 180.78), SIMDE_FLOAT32_C( -289.23), SIMDE_FLOAT32_C( 918.52), SIMDE_FLOAT32_C( -422.76), SIMDE_FLOAT32_C( -433.33), SIMDE_FLOAT32_C( 48.49), SIMDE_FLOAT32_C( 799.57), SIMDE_FLOAT32_C( -820.22), SIMDE_FLOAT32_C( -959.11), SIMDE_FLOAT32_C( 268.99)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 48.49), SIMDE_FLOAT32_C( -173.80), SIMDE_FLOAT32_C( 52.79), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -959.11), SIMDE_FLOAT32_C( 241.45), SIMDE_FLOAT32_C( -173.80), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 835.54), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 52.79), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 831.29), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C(48011), simde_mm512_set_ps(SIMDE_FLOAT32_C( -844.73), SIMDE_FLOAT32_C( 757.65), SIMDE_FLOAT32_C( 677.41), SIMDE_FLOAT32_C( -352.24), SIMDE_FLOAT32_C( -479.79), SIMDE_FLOAT32_C( 602.83), SIMDE_FLOAT32_C( 2.55), SIMDE_FLOAT32_C( -388.47), SIMDE_FLOAT32_C( -643.43), SIMDE_FLOAT32_C( -331.34), SIMDE_FLOAT32_C( 72.67), SIMDE_FLOAT32_C( -870.79), SIMDE_FLOAT32_C( -722.44), SIMDE_FLOAT32_C( 529.44), SIMDE_FLOAT32_C( -949.73), SIMDE_FLOAT32_C( 280.87)), simde_mm512_set_epi32(INT32_C( -811849174), INT32_C(-1510825074), INT32_C( 1897985966), INT32_C( 1445172644), INT32_C( -193622280), INT32_C( 2097959091), INT32_C( -652080500), INT32_C( 1943026961), INT32_C( 763848022), INT32_C(-2124387583), INT32_C(-1102663841), INT32_C( 712044568), INT32_C( 1641785760), INT32_C( 1696516135), INT32_C(-1123374630), INT32_C( -181070601)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -53.79), SIMDE_FLOAT32_C( 703.31), SIMDE_FLOAT32_C( 930.79), SIMDE_FLOAT32_C( 111.33), SIMDE_FLOAT32_C( -176.75), SIMDE_FLOAT32_C( -316.94), SIMDE_FLOAT32_C( 639.68), SIMDE_FLOAT32_C( -783.00), SIMDE_FLOAT32_C( -102.18), SIMDE_FLOAT32_C( 960.00), SIMDE_FLOAT32_C( 22.93), SIMDE_FLOAT32_C( -395.13), SIMDE_FLOAT32_C( 145.63), SIMDE_FLOAT32_C( -149.04), SIMDE_FLOAT32_C( 214.37), SIMDE_FLOAT32_C( -453.25)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 602.83), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 757.65), SIMDE_FLOAT32_C( -870.79), SIMDE_FLOAT32_C( -783.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -352.24), SIMDE_FLOAT32_C( 214.37), SIMDE_FLOAT32_C( 960.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 280.87), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -316.94), SIMDE_FLOAT32_C( -102.18)) }, { UINT16_C( 903), simde_mm512_set_ps(SIMDE_FLOAT32_C( -528.18), SIMDE_FLOAT32_C( 813.86), SIMDE_FLOAT32_C( 925.24), SIMDE_FLOAT32_C( 849.34), SIMDE_FLOAT32_C( 112.68), SIMDE_FLOAT32_C( -96.70), SIMDE_FLOAT32_C( -44.05), SIMDE_FLOAT32_C( 432.42), SIMDE_FLOAT32_C( 421.92), SIMDE_FLOAT32_C( 718.96), SIMDE_FLOAT32_C( -864.59), SIMDE_FLOAT32_C( -334.42), SIMDE_FLOAT32_C( 660.53), SIMDE_FLOAT32_C( 748.73), SIMDE_FLOAT32_C( 996.15), SIMDE_FLOAT32_C( -607.82)), simde_mm512_set_epi32(INT32_C( 1969107101), INT32_C(-2063427243), INT32_C( -670405092), INT32_C(-1879729053), INT32_C( 1035482990), INT32_C( 1183910939), INT32_C( 1515345934), INT32_C(-1884003639), INT32_C( -638430290), INT32_C(-2007622482), INT32_C( 171336877), INT32_C( 59553613), INT32_C( 165266600), INT32_C( -798384264), INT32_C( 1607584815), INT32_C(-1324336584)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 641.22), SIMDE_FLOAT32_C( -747.07), SIMDE_FLOAT32_C( -762.67), SIMDE_FLOAT32_C( 744.11), SIMDE_FLOAT32_C( 350.11), SIMDE_FLOAT32_C( 409.27), SIMDE_FLOAT32_C( 481.83), SIMDE_FLOAT32_C( 601.37), SIMDE_FLOAT32_C( -660.24), SIMDE_FLOAT32_C( -675.56), SIMDE_FLOAT32_C( -194.09), SIMDE_FLOAT32_C( 149.22), SIMDE_FLOAT32_C( 161.52), SIMDE_FLOAT32_C( 632.78), SIMDE_FLOAT32_C( 346.90), SIMDE_FLOAT32_C( -777.05)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 813.86), SIMDE_FLOAT32_C( -44.05), SIMDE_FLOAT32_C( 813.86), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 601.37), SIMDE_FLOAT32_C( -528.18), SIMDE_FLOAT32_C( 601.37)) }, { UINT16_C( 487), simde_mm512_set_ps(SIMDE_FLOAT32_C( 211.34), SIMDE_FLOAT32_C( -159.54), SIMDE_FLOAT32_C( 297.01), SIMDE_FLOAT32_C( 147.67), SIMDE_FLOAT32_C( -855.53), SIMDE_FLOAT32_C( 391.08), SIMDE_FLOAT32_C( -710.54), SIMDE_FLOAT32_C( -140.51), SIMDE_FLOAT32_C( 346.36), SIMDE_FLOAT32_C( 318.14), SIMDE_FLOAT32_C( 399.19), SIMDE_FLOAT32_C( -291.83), SIMDE_FLOAT32_C( 951.57), SIMDE_FLOAT32_C( 272.52), SIMDE_FLOAT32_C( -735.05), SIMDE_FLOAT32_C( 5.84)), simde_mm512_set_epi32(INT32_C(-2094713086), INT32_C( 197529411), INT32_C( 1055036471), INT32_C( 351897115), INT32_C( 1594003471), INT32_C(-1709813294), INT32_C( -133653364), INT32_C( -51462036), INT32_C( 46796230), INT32_C( 989301899), INT32_C( -691937914), INT32_C( 1667629581), INT32_C( -496700661), INT32_C(-1318801755), INT32_C( 1076515270), INT32_C(-1757573505)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 739.21), SIMDE_FLOAT32_C( 355.61), SIMDE_FLOAT32_C( -450.77), SIMDE_FLOAT32_C( 343.42), SIMDE_FLOAT32_C( -703.95), SIMDE_FLOAT32_C( 169.29), SIMDE_FLOAT32_C( 932.53), SIMDE_FLOAT32_C( -653.71), SIMDE_FLOAT32_C( 371.73), SIMDE_FLOAT32_C( 757.18), SIMDE_FLOAT32_C( 214.84), SIMDE_FLOAT32_C( 830.24), SIMDE_FLOAT32_C( 903.53), SIMDE_FLOAT32_C( -831.08), SIMDE_FLOAT32_C( 815.07), SIMDE_FLOAT32_C( 196.06)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 147.67), SIMDE_FLOAT32_C( 318.14), SIMDE_FLOAT32_C( -855.53), SIMDE_FLOAT32_C( 318.14), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 399.19), SIMDE_FLOAT32_C( 318.14), SIMDE_FLOAT32_C( 739.21)) }, { UINT16_C(21630), simde_mm512_set_ps(SIMDE_FLOAT32_C( -482.01), SIMDE_FLOAT32_C( 218.00), SIMDE_FLOAT32_C( 412.00), SIMDE_FLOAT32_C( 409.50), SIMDE_FLOAT32_C( -290.29), SIMDE_FLOAT32_C( 337.59), SIMDE_FLOAT32_C( -137.98), SIMDE_FLOAT32_C( 723.06), SIMDE_FLOAT32_C( -632.02), SIMDE_FLOAT32_C( 769.08), SIMDE_FLOAT32_C( -269.22), SIMDE_FLOAT32_C( 771.53), SIMDE_FLOAT32_C( 554.20), SIMDE_FLOAT32_C( 154.86), SIMDE_FLOAT32_C( 918.01), SIMDE_FLOAT32_C( 348.86)), simde_mm512_set_epi32(INT32_C( -501257427), INT32_C(-1329431510), INT32_C( 1005777948), INT32_C( 616430734), INT32_C( 1581162255), INT32_C( 1497456456), INT32_C(-1170808415), INT32_C(-1014503666), INT32_C(-1157750165), INT32_C( 1691363299), INT32_C( 1100655145), INT32_C( 673265711), INT32_C( 1544659928), INT32_C(-1956803094), INT32_C( 1970109422), INT32_C(-1197844366)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 904.77), SIMDE_FLOAT32_C( -16.01), SIMDE_FLOAT32_C( -8.87), SIMDE_FLOAT32_C( -521.88), SIMDE_FLOAT32_C( -842.76), SIMDE_FLOAT32_C( 871.64), SIMDE_FLOAT32_C( 769.47), SIMDE_FLOAT32_C( -997.66), SIMDE_FLOAT32_C( 691.39), SIMDE_FLOAT32_C( 987.54), SIMDE_FLOAT32_C( -288.94), SIMDE_FLOAT32_C( 506.40), SIMDE_FLOAT32_C( -318.39), SIMDE_FLOAT32_C( -477.43), SIMDE_FLOAT32_C( 119.12), SIMDE_FLOAT32_C( 397.77)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 337.59), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 218.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 723.06), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 554.20), SIMDE_FLOAT32_C( -137.98), SIMDE_FLOAT32_C( -482.01), SIMDE_FLOAT32_C( -997.66), SIMDE_FLOAT32_C( 337.59), SIMDE_FLOAT32_C( 218.00), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C(42527), simde_mm512_set_ps(SIMDE_FLOAT32_C( -714.57), SIMDE_FLOAT32_C( -728.75), SIMDE_FLOAT32_C( -712.01), SIMDE_FLOAT32_C( -346.18), SIMDE_FLOAT32_C( 238.27), SIMDE_FLOAT32_C( -879.28), SIMDE_FLOAT32_C( -391.86), SIMDE_FLOAT32_C( -824.42), SIMDE_FLOAT32_C( 939.74), SIMDE_FLOAT32_C( 471.05), SIMDE_FLOAT32_C( -276.21), SIMDE_FLOAT32_C( 528.15), SIMDE_FLOAT32_C( 113.46), SIMDE_FLOAT32_C( 829.33), SIMDE_FLOAT32_C( -265.53), SIMDE_FLOAT32_C( -933.24)), simde_mm512_set_epi32(INT32_C( -675362282), INT32_C(-1218762696), INT32_C(-1400182216), INT32_C(-2088680370), INT32_C(-1895497877), INT32_C( 1563893931), INT32_C( 1105770515), INT32_C(-1745770541), INT32_C(-1255255240), INT32_C( 1238532704), INT32_C( 995946229), INT32_C( 119517601), INT32_C( 1389614040), INT32_C(-2032996348), INT32_C( 1996749952), INT32_C( 450477794)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 705.41), SIMDE_FLOAT32_C( -150.22), SIMDE_FLOAT32_C( 115.26), SIMDE_FLOAT32_C( -433.46), SIMDE_FLOAT32_C( -112.53), SIMDE_FLOAT32_C( -754.09), SIMDE_FLOAT32_C( -873.63), SIMDE_FLOAT32_C( -546.06), SIMDE_FLOAT32_C( 126.79), SIMDE_FLOAT32_C( -885.01), SIMDE_FLOAT32_C( 749.00), SIMDE_FLOAT32_C( -249.67), SIMDE_FLOAT32_C( -471.39), SIMDE_FLOAT32_C( -437.78), SIMDE_FLOAT32_C( -357.63), SIMDE_FLOAT32_C( 772.54)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -885.01), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -546.06), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 238.27), SIMDE_FLOAT32_C( -471.39), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -265.53), SIMDE_FLOAT32_C( -546.06), SIMDE_FLOAT32_C( 528.15), SIMDE_FLOAT32_C( -933.24), SIMDE_FLOAT32_C( 829.33)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_maskz_permutex2var_ps(test_vec[i].k, test_vec[i].a, test_vec[i].idx, test_vec[i].b); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm_permutex2var_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_permutex2var_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask2_permutex2var_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_permutex2var_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_permutex2var_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_permutex2var_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask2_permutex2var_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_permutex2var_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_permutex2var_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_permutex2var_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask2_permutex2var_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_permutex2var_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_permutex2var_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_permutex2var_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask2_permutex2var_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_permutex2var_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_permutex2var_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_permutex2var_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask2_permutex2var_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_permutex2var_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_permutex2var_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_permutex2var_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask2_permutex2var_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_permutex2var_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_permutex2var_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_permutex2var_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask2_permutex2var_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_permutex2var_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_permutex2var_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_permutex2var_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask2_permutex2var_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_permutex2var_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_permutex2var_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_permutex2var_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask2_permutex2var_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_permutex2var_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_permutex2var_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_permutex2var_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask2_permutex2var_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_permutex2var_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_permutex2var_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_permutex2var_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask2_permutex2var_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_permutex2var_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_permutex2var_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_permutex2var_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask2_permutex2var_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_permutex2var_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_permutex2var_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_permutex2var_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask2_permutex2var_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_permutex2var_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_permutex2var_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_permutex2var_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask2_permutex2var_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_permutex2var_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_permutex2var_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_permutex2var_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask2_permutex2var_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_permutex2var_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_permutex2var_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_permutex2var_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask2_permutex2var_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_permutex2var_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_permutex2var_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_permutex2var_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask2_permutex2var_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_permutex2var_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_permutex2var_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_permutex2var_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask2_permutex2var_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_permutex2var_ps) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/permutexvar.c000066400000000000000000016301651400333146700200160ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #define SIMDE_TEST_X86_AVX512_INSN permutexvar #include #include #include static int test_simde_mm_permutexvar_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t idx[8]; const int16_t a[8]; const int16_t r[8]; } test_vec[] = { { { INT16_C( 6502), -INT16_C( 30001), -INT16_C( 24228), INT16_C( 3581), -INT16_C( 616), -INT16_C( 1803), INT16_C( 7078), INT16_C( 16289) }, { -INT16_C( 9505), INT16_C( 1150), INT16_C( 11629), -INT16_C( 6820), INT16_C( 5125), -INT16_C( 892), INT16_C( 21083), -INT16_C( 15937) }, { INT16_C( 21083), -INT16_C( 15937), INT16_C( 5125), -INT16_C( 892), -INT16_C( 9505), -INT16_C( 892), INT16_C( 21083), INT16_C( 1150) } }, { { -INT16_C( 29077), -INT16_C( 14517), INT16_C( 18480), -INT16_C( 14124), -INT16_C( 14011), -INT16_C( 5184), INT16_C( 25060), -INT16_C( 15574) }, { -INT16_C( 22213), -INT16_C( 22328), INT16_C( 9430), -INT16_C( 9075), INT16_C( 4665), -INT16_C( 27432), -INT16_C( 26524), -INT16_C( 12459) }, { -INT16_C( 9075), -INT16_C( 9075), -INT16_C( 22213), INT16_C( 4665), -INT16_C( 27432), -INT16_C( 22213), INT16_C( 4665), INT16_C( 9430) } }, { { -INT16_C( 24538), INT16_C( 22166), INT16_C( 27368), INT16_C( 11550), -INT16_C( 8653), INT16_C( 5912), INT16_C( 17215), INT16_C( 31450) }, { -INT16_C( 23828), -INT16_C( 15838), -INT16_C( 20281), INT16_C( 158), INT16_C( 30658), INT16_C( 9876), -INT16_C( 5873), INT16_C( 13813) }, { -INT16_C( 5873), -INT16_C( 5873), -INT16_C( 23828), -INT16_C( 5873), INT16_C( 158), -INT16_C( 23828), INT16_C( 13813), -INT16_C( 20281) } }, { { -INT16_C( 29815), INT16_C( 29068), -INT16_C( 21771), INT16_C( 10398), -INT16_C( 18807), -INT16_C( 14273), INT16_C( 6649), -INT16_C( 6845) }, { INT16_C( 26044), -INT16_C( 31832), INT16_C( 17941), -INT16_C( 10365), INT16_C( 6077), -INT16_C( 13059), -INT16_C( 3584), -INT16_C( 30462) }, { -INT16_C( 31832), INT16_C( 6077), -INT16_C( 13059), -INT16_C( 3584), -INT16_C( 31832), -INT16_C( 30462), -INT16_C( 31832), -INT16_C( 10365) } }, { { -INT16_C( 29059), INT16_C( 29434), -INT16_C( 26568), -INT16_C( 15974), -INT16_C( 9906), INT16_C( 18570), -INT16_C( 12813), -INT16_C( 20691) }, { -INT16_C( 10958), INT16_C( 18482), -INT16_C( 19172), -INT16_C( 9953), INT16_C( 7628), -INT16_C( 13146), -INT16_C( 22513), -INT16_C( 29355) }, { -INT16_C( 13146), -INT16_C( 19172), -INT16_C( 10958), -INT16_C( 19172), -INT16_C( 22513), -INT16_C( 19172), -INT16_C( 9953), -INT16_C( 13146) } }, { { INT16_C( 20278), INT16_C( 28415), -INT16_C( 25881), INT16_C( 13616), -INT16_C( 17805), INT16_C( 26237), -INT16_C( 21625), -INT16_C( 18155) }, { INT16_C( 18304), -INT16_C( 25599), INT16_C( 8700), -INT16_C( 14218), INT16_C( 7230), INT16_C( 19860), -INT16_C( 5692), -INT16_C( 1318) }, { -INT16_C( 5692), -INT16_C( 1318), -INT16_C( 1318), INT16_C( 18304), -INT16_C( 14218), INT16_C( 19860), -INT16_C( 1318), INT16_C( 19860) } }, { { -INT16_C( 9672), INT16_C( 8040), -INT16_C( 26508), -INT16_C( 6315), -INT16_C( 11694), -INT16_C( 9906), INT16_C( 25469), -INT16_C( 365) }, { -INT16_C( 27477), -INT16_C( 22630), INT16_C( 4277), -INT16_C( 3216), INT16_C( 1068), -INT16_C( 4031), INT16_C( 7150), INT16_C( 9962) }, { -INT16_C( 27477), -INT16_C( 27477), INT16_C( 1068), -INT16_C( 4031), INT16_C( 4277), INT16_C( 7150), -INT16_C( 4031), -INT16_C( 3216) } }, { { INT16_C( 21493), INT16_C( 26950), -INT16_C( 25621), INT16_C( 15953), -INT16_C( 24723), -INT16_C( 5353), -INT16_C( 22014), -INT16_C( 21015) }, { -INT16_C( 31937), -INT16_C( 2987), -INT16_C( 14956), -INT16_C( 16152), INT16_C( 10697), -INT16_C( 18511), -INT16_C( 25788), INT16_C( 15070) }, { -INT16_C( 18511), -INT16_C( 25788), -INT16_C( 16152), -INT16_C( 2987), -INT16_C( 18511), INT16_C( 15070), -INT16_C( 14956), -INT16_C( 2987) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i idx = simde_mm_loadu_epi16(test_vec[i].idx); simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i r = simde_mm_permutexvar_epi16(idx, a); simde_test_x86_assert_equal_i16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i idx = simde_test_x86_random_i16x8(); simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i r = simde_mm_permutexvar_epi16(idx, a); simde_test_x86_write_i16x8(2, idx, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_mask_permutexvar_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t src[8]; const simde__mmask8 k; const int16_t idx[8]; const int16_t a[8]; const int16_t r[8]; } test_vec[] = { { { INT16_C( 9454), -INT16_C( 9565), -INT16_C( 2881), INT16_C( 11288), INT16_C( 12179), -INT16_C( 27113), INT16_C( 218), INT16_C( 6467) }, UINT8_C(132), { INT16_C( 3480), INT16_C( 23832), -INT16_C( 9995), INT16_C( 7719), -INT16_C( 8567), INT16_C( 9571), -INT16_C( 25156), -INT16_C( 8173) }, { -INT16_C( 4800), INT16_C( 13727), -INT16_C( 13307), INT16_C( 13768), INT16_C( 24291), -INT16_C( 7153), INT16_C( 10402), INT16_C( 14952) }, { INT16_C( 9454), -INT16_C( 9565), -INT16_C( 7153), INT16_C( 11288), INT16_C( 12179), -INT16_C( 27113), INT16_C( 218), INT16_C( 13768) } }, { { -INT16_C( 32715), INT16_C( 11160), -INT16_C( 16552), -INT16_C( 7607), -INT16_C( 21347), INT16_C( 23047), INT16_C( 6729), -INT16_C( 30150) }, UINT8_C( 8), { -INT16_C( 16422), -INT16_C( 23027), INT16_C( 17031), -INT16_C( 6519), INT16_C( 27985), INT16_C( 31112), -INT16_C( 15659), INT16_C( 21935) }, { -INT16_C( 9638), INT16_C( 6574), -INT16_C( 28637), -INT16_C( 12105), INT16_C( 4503), -INT16_C( 20199), -INT16_C( 23733), INT16_C( 9657) }, { -INT16_C( 32715), INT16_C( 11160), -INT16_C( 16552), INT16_C( 6574), -INT16_C( 21347), INT16_C( 23047), INT16_C( 6729), -INT16_C( 30150) } }, { { -INT16_C( 14494), -INT16_C( 5429), INT16_C( 21769), INT16_C( 23504), INT16_C( 22722), -INT16_C( 26412), -INT16_C( 31974), INT16_C( 30189) }, UINT8_C( 93), { -INT16_C( 29029), INT16_C( 11137), INT16_C( 20805), INT16_C( 22210), INT16_C( 29802), INT16_C( 3746), -INT16_C( 14547), -INT16_C( 2960) }, { INT16_C( 23187), -INT16_C( 5890), INT16_C( 22826), -INT16_C( 32086), INT16_C( 16941), -INT16_C( 20067), INT16_C( 4656), -INT16_C( 13554) }, { -INT16_C( 32086), -INT16_C( 5429), -INT16_C( 20067), INT16_C( 22826), INT16_C( 22826), -INT16_C( 26412), -INT16_C( 20067), INT16_C( 30189) } }, { { -INT16_C( 28768), -INT16_C( 6409), -INT16_C( 17952), INT16_C( 19260), -INT16_C( 8659), INT16_C( 23385), -INT16_C( 13914), INT16_C( 14671) }, UINT8_C( 36), { INT16_C( 8525), -INT16_C( 22962), -INT16_C( 11829), INT16_C( 3796), -INT16_C( 31378), -INT16_C( 32706), INT16_C( 2451), INT16_C( 8992) }, { INT16_C( 1536), -INT16_C( 17917), INT16_C( 20035), INT16_C( 8679), INT16_C( 17063), INT16_C( 29127), INT16_C( 146), -INT16_C( 8299) }, { -INT16_C( 28768), -INT16_C( 6409), INT16_C( 8679), INT16_C( 19260), -INT16_C( 8659), INT16_C( 146), -INT16_C( 13914), INT16_C( 14671) } }, { { -INT16_C( 7391), -INT16_C( 4730), INT16_C( 23220), INT16_C( 8955), INT16_C( 14815), INT16_C( 29346), -INT16_C( 15550), INT16_C( 17301) }, UINT8_C(201), { -INT16_C( 615), -INT16_C( 6388), INT16_C( 12004), INT16_C( 10127), INT16_C( 245), -INT16_C( 2375), -INT16_C( 26475), INT16_C( 30743) }, { INT16_C( 1054), INT16_C( 30765), INT16_C( 20479), INT16_C( 14423), -INT16_C( 13582), -INT16_C( 19077), -INT16_C( 16801), -INT16_C( 1922) }, { INT16_C( 30765), -INT16_C( 4730), INT16_C( 23220), -INT16_C( 1922), INT16_C( 14815), INT16_C( 29346), -INT16_C( 19077), -INT16_C( 1922) } }, { { -INT16_C( 29765), -INT16_C( 24608), INT16_C( 28601), -INT16_C( 20794), INT16_C( 32623), INT16_C( 1188), -INT16_C( 17384), INT16_C( 13948) }, UINT8_C(192), { -INT16_C( 20567), -INT16_C( 1600), -INT16_C( 2042), -INT16_C( 12053), -INT16_C( 24461), INT16_C( 12592), INT16_C( 10270), -INT16_C( 22036) }, { -INT16_C( 29688), INT16_C( 30562), INT16_C( 4434), -INT16_C( 11546), -INT16_C( 5451), INT16_C( 29162), INT16_C( 8295), INT16_C( 4146) }, { -INT16_C( 29765), -INT16_C( 24608), INT16_C( 28601), -INT16_C( 20794), INT16_C( 32623), INT16_C( 1188), INT16_C( 8295), -INT16_C( 5451) } }, { { -INT16_C( 3377), -INT16_C( 10743), -INT16_C( 2838), INT16_C( 24230), -INT16_C( 10604), -INT16_C( 19569), INT16_C( 31999), INT16_C( 1884) }, UINT8_C( 8), { INT16_C( 32703), -INT16_C( 12198), INT16_C( 11365), INT16_C( 20613), -INT16_C( 2282), INT16_C( 14263), -INT16_C( 14551), INT16_C( 6918) }, { -INT16_C( 9007), -INT16_C( 15099), INT16_C( 25475), INT16_C( 22874), INT16_C( 3571), INT16_C( 28504), INT16_C( 24681), INT16_C( 10359) }, { -INT16_C( 3377), -INT16_C( 10743), -INT16_C( 2838), INT16_C( 28504), -INT16_C( 10604), -INT16_C( 19569), INT16_C( 31999), INT16_C( 1884) } }, { { -INT16_C( 11809), INT16_C( 17656), INT16_C( 32510), INT16_C( 5268), INT16_C( 19317), -INT16_C( 25013), INT16_C( 21011), -INT16_C( 6983) }, UINT8_C( 46), { -INT16_C( 22082), INT16_C( 8881), INT16_C( 2819), INT16_C( 4117), -INT16_C( 31645), -INT16_C( 15494), -INT16_C( 23813), -INT16_C( 13150) }, { -INT16_C( 6245), INT16_C( 6602), -INT16_C( 8325), -INT16_C( 14450), INT16_C( 11306), INT16_C( 31962), -INT16_C( 16667), -INT16_C( 23637) }, { -INT16_C( 11809), INT16_C( 6602), -INT16_C( 14450), INT16_C( 31962), INT16_C( 19317), -INT16_C( 8325), INT16_C( 21011), -INT16_C( 6983) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i src = simde_mm_loadu_epi16(test_vec[i].src); simde__m128i idx = simde_mm_loadu_epi16(test_vec[i].idx); simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i r = simde_mm_mask_permutexvar_epi16(src, test_vec[i].k, idx, a); simde_test_x86_assert_equal_i16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i src = simde_test_x86_random_i16x8(); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128i idx = simde_test_x86_random_i16x8(); simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i r = simde_mm_mask_permutexvar_epi16(src, k, idx, a); simde_test_x86_write_i16x8(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_maskz_permutexvar_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask8 k; const int16_t idx[8]; const int16_t a[8]; const int16_t r[8]; } test_vec[] = { { UINT8_C(103), { -INT16_C( 15012), INT16_C( 26475), INT16_C( 31706), INT16_C( 24267), -INT16_C( 28939), -INT16_C( 26535), INT16_C( 9777), INT16_C( 6195) }, { INT16_C( 19696), -INT16_C( 12397), INT16_C( 23258), INT16_C( 1786), INT16_C( 30260), -INT16_C( 3349), -INT16_C( 29151), INT16_C( 32346) }, { INT16_C( 30260), INT16_C( 1786), INT16_C( 23258), INT16_C( 0), INT16_C( 0), -INT16_C( 12397), -INT16_C( 12397), INT16_C( 0) } }, { UINT8_C( 84), { -INT16_C( 6715), INT16_C( 16430), -INT16_C( 29264), INT16_C( 16182), -INT16_C( 12570), INT16_C( 3184), -INT16_C( 30719), INT16_C( 19965) }, { -INT16_C( 13285), INT16_C( 30247), INT16_C( 11718), INT16_C( 15786), -INT16_C( 25320), -INT16_C( 22946), -INT16_C( 8969), -INT16_C( 17158) }, { INT16_C( 0), INT16_C( 0), -INT16_C( 13285), INT16_C( 0), -INT16_C( 8969), INT16_C( 0), INT16_C( 30247), INT16_C( 0) } }, { UINT8_C(194), { -INT16_C( 983), -INT16_C( 18830), -INT16_C( 20174), INT16_C( 156), -INT16_C( 22239), -INT16_C( 22271), INT16_C( 20134), INT16_C( 29381) }, { INT16_C( 15221), -INT16_C( 24007), INT16_C( 30437), -INT16_C( 32070), INT16_C( 25044), -INT16_C( 20103), INT16_C( 13659), -INT16_C( 31629) }, { INT16_C( 0), INT16_C( 30437), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 13659), -INT16_C( 20103) } }, { UINT8_C( 50), { INT16_C( 15077), -INT16_C( 26780), INT16_C( 26071), -INT16_C( 32584), INT16_C( 25190), -INT16_C( 19162), -INT16_C( 26585), INT16_C( 25130) }, { -INT16_C( 12847), INT16_C( 18247), -INT16_C( 13689), -INT16_C( 6116), -INT16_C( 12989), INT16_C( 31044), -INT16_C( 14272), INT16_C( 9643) }, { INT16_C( 0), -INT16_C( 12989), INT16_C( 0), INT16_C( 0), -INT16_C( 14272), -INT16_C( 14272), INT16_C( 0), INT16_C( 0) } }, { UINT8_C( 3), { -INT16_C( 17393), INT16_C( 29914), INT16_C( 23157), -INT16_C( 10277), -INT16_C( 28544), INT16_C( 6398), INT16_C( 24762), -INT16_C( 30742) }, { INT16_C( 12711), INT16_C( 28943), -INT16_C( 2227), INT16_C( 6837), INT16_C( 11835), INT16_C( 1114), -INT16_C( 32551), -INT16_C( 6137) }, { -INT16_C( 6137), -INT16_C( 2227), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { UINT8_C( 60), { INT16_C( 24033), INT16_C( 15281), -INT16_C( 30664), -INT16_C( 14149), -INT16_C( 11386), -INT16_C( 6526), INT16_C( 2749), -INT16_C( 4210) }, { -INT16_C( 231), INT16_C( 4156), INT16_C( 22452), -INT16_C( 7604), INT16_C( 20657), INT16_C( 12731), -INT16_C( 23465), INT16_C( 14446) }, { INT16_C( 0), INT16_C( 0), -INT16_C( 231), -INT16_C( 7604), -INT16_C( 23465), INT16_C( 22452), INT16_C( 0), INT16_C( 0) } }, { UINT8_C( 1), { INT16_C( 29471), -INT16_C( 22471), INT16_C( 302), INT16_C( 302), INT16_C( 5507), -INT16_C( 29249), -INT16_C( 20829), -INT16_C( 23898) }, { -INT16_C( 18454), INT16_C( 16727), INT16_C( 14595), INT16_C( 21491), INT16_C( 9461), -INT16_C( 26198), -INT16_C( 7534), -INT16_C( 19814) }, { -INT16_C( 19814), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { UINT8_C( 85), { INT16_C( 23251), -INT16_C( 11133), -INT16_C( 31608), -INT16_C( 25257), -INT16_C( 6845), -INT16_C( 3776), -INT16_C( 7285), INT16_C( 17116) }, { INT16_C( 7482), INT16_C( 29509), -INT16_C( 26608), INT16_C( 13672), INT16_C( 322), INT16_C( 9415), INT16_C( 31131), INT16_C( 28281) }, { INT16_C( 13672), INT16_C( 0), INT16_C( 7482), INT16_C( 0), INT16_C( 13672), INT16_C( 0), INT16_C( 13672), INT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i idx = simde_mm_loadu_epi16(test_vec[i].idx); simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i r = simde_mm_maskz_permutexvar_epi16(test_vec[i].k, idx, a); simde_test_x86_assert_equal_i16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128i idx = simde_test_x86_random_i16x8(); simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i r = simde_mm_maskz_permutexvar_epi16(k, idx, a); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_permutexvar_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t idx[16]; const int8_t a[16]; const int8_t r[16]; } test_vec[] = { { { -INT8_C( 89), INT8_C( 112), INT8_C( 118), INT8_C( 112), INT8_C( 27), -INT8_C( 75), -INT8_C( 48), -INT8_C( 116), INT8_C( 48), -INT8_C( 117), -INT8_C( 112), -INT8_C( 23), -INT8_C( 119), -INT8_C( 91), INT8_C( 15), INT8_C( 59) }, { -INT8_C( 71), INT8_C( 114), INT8_C( 108), INT8_C( 13), INT8_C( 18), -INT8_C( 6), INT8_C( 114), INT8_C( 113), -INT8_C( 41), -INT8_C( 3), INT8_C( 44), INT8_C( 29), INT8_C( 73), -INT8_C( 122), INT8_C( 24), -INT8_C( 15) }, { INT8_C( 113), -INT8_C( 71), INT8_C( 114), -INT8_C( 71), INT8_C( 29), -INT8_C( 6), -INT8_C( 71), INT8_C( 73), -INT8_C( 71), INT8_C( 29), -INT8_C( 71), -INT8_C( 3), -INT8_C( 3), -INT8_C( 6), -INT8_C( 15), INT8_C( 29) } }, { { -INT8_C( 10), -INT8_C( 114), INT8_C( 97), INT8_C( 18), INT8_C( 67), INT8_C( 49), -INT8_C( 98), INT8_C( 115), -INT8_C( 67), INT8_C( 47), INT8_C( 92), INT8_C( 70), -INT8_C( 44), INT8_C( 108), -INT8_C( 127), -INT8_C( 114) }, { -INT8_C( 34), -INT8_C( 19), -INT8_C( 101), -INT8_C( 15), -INT8_C( 24), INT8_C( 13), INT8_C( 98), -INT8_C( 65), INT8_C( 10), -INT8_C( 113), -INT8_C( 36), INT8_C( 83), INT8_C( 21), -INT8_C( 12), INT8_C( 68), INT8_C( 11) }, { INT8_C( 98), INT8_C( 68), -INT8_C( 19), -INT8_C( 101), -INT8_C( 15), -INT8_C( 19), INT8_C( 68), -INT8_C( 15), -INT8_C( 12), INT8_C( 11), INT8_C( 21), INT8_C( 98), -INT8_C( 24), INT8_C( 21), -INT8_C( 19), INT8_C( 68) } }, { { -INT8_C( 126), -INT8_C( 91), INT8_C( 29), -INT8_C( 59), -INT8_C( 41), -INT8_C( 68), INT8_C( 56), -INT8_C( 108), -INT8_C( 21), -INT8_C( 107), -INT8_C( 38), -INT8_C( 65), INT8_C( 1), INT8_C( 92), INT8_C( 77), -INT8_C( 33) }, { INT8_C( 73), -INT8_C( 24), -INT8_C( 48), INT8_C( 49), -INT8_C( 11), INT8_C( 51), -INT8_C( 16), -INT8_C( 1), -INT8_C( 62), -INT8_C( 51), INT8_C( 83), -INT8_C( 41), -INT8_C( 63), -INT8_C( 105), -INT8_C( 30), INT8_C( 68) }, { -INT8_C( 48), INT8_C( 51), -INT8_C( 105), INT8_C( 51), -INT8_C( 1), -INT8_C( 63), -INT8_C( 62), -INT8_C( 11), -INT8_C( 41), INT8_C( 51), INT8_C( 83), INT8_C( 68), -INT8_C( 24), -INT8_C( 63), -INT8_C( 105), INT8_C( 68) } }, { { INT8_C( 61), INT8_C( 0), INT8_C( 9), INT8_C( 20), -INT8_C( 68), INT8_C( 66), -INT8_C( 88), -INT8_C( 89), -INT8_C( 41), -INT8_C( 126), INT8_C( 102), -INT8_C( 40), -INT8_C( 34), -INT8_C( 76), -INT8_C( 73), INT8_C( 40) }, { -INT8_C( 100), -INT8_C( 120), INT8_C( 89), -INT8_C( 110), -INT8_C( 69), INT8_C( 74), -INT8_C( 111), INT8_C( 125), INT8_C( 23), -INT8_C( 28), INT8_C( 84), -INT8_C( 40), INT8_C( 124), INT8_C( 54), INT8_C( 28), -INT8_C( 71) }, { INT8_C( 54), -INT8_C( 100), -INT8_C( 28), -INT8_C( 69), INT8_C( 124), INT8_C( 89), INT8_C( 23), INT8_C( 125), INT8_C( 125), INT8_C( 89), -INT8_C( 111), INT8_C( 23), INT8_C( 28), -INT8_C( 69), INT8_C( 125), INT8_C( 23) } }, { { INT8_C( 54), INT8_C( 38), -INT8_C( 51), -INT8_C( 14), INT8_C( 104), INT8_C( 117), -INT8_C( 103), INT8_C( 63), -INT8_C( 9), INT8_C( 0), INT8_C( 23), -INT8_C( 42), -INT8_C( 76), -INT8_C( 50), -INT8_C( 2), INT8_C( 80) }, { INT8_C( 86), INT8_C( 87), -INT8_C( 30), INT8_C( 17), -INT8_C( 95), INT8_C( 116), -INT8_C( 114), -INT8_C( 72), INT8_C( 88), -INT8_C( 30), -INT8_C( 111), -INT8_C( 44), INT8_C( 25), -INT8_C( 83), -INT8_C( 115), INT8_C( 79) }, { -INT8_C( 114), -INT8_C( 114), -INT8_C( 83), -INT8_C( 30), INT8_C( 88), INT8_C( 116), -INT8_C( 30), INT8_C( 79), -INT8_C( 72), INT8_C( 86), -INT8_C( 72), -INT8_C( 114), -INT8_C( 95), -INT8_C( 115), -INT8_C( 115), INT8_C( 86) } }, { { -INT8_C( 45), INT8_C( 90), INT8_C( 66), INT8_C( 59), -INT8_C( 49), -INT8_C( 37), INT8_C( 122), -INT8_C( 57), -INT8_C( 37), -INT8_C( 111), -INT8_C( 99), -INT8_C( 113), INT8_C( 96), -INT8_C( 101), -INT8_C( 32), -INT8_C( 74) }, { -INT8_C( 14), -INT8_C( 62), -INT8_C( 56), -INT8_C( 108), INT8_C( 54), INT8_C( 86), INT8_C( 76), -INT8_C( 113), INT8_C( 57), -INT8_C( 35), INT8_C( 99), INT8_C( 82), -INT8_C( 117), -INT8_C( 15), -INT8_C( 95), INT8_C( 94) }, { -INT8_C( 108), INT8_C( 99), -INT8_C( 56), INT8_C( 82), INT8_C( 94), INT8_C( 82), INT8_C( 99), -INT8_C( 113), INT8_C( 82), -INT8_C( 62), -INT8_C( 15), INT8_C( 94), -INT8_C( 14), INT8_C( 82), -INT8_C( 14), INT8_C( 76) } }, { { INT8_C( 75), -INT8_C( 29), -INT8_C( 102), INT8_C( 27), -INT8_C( 65), INT8_C( 20), -INT8_C( 30), -INT8_C( 102), -INT8_C( 90), INT8_MAX, INT8_C( 42), INT8_C( 6), INT8_C( 26), INT8_C( 10), -INT8_C( 68), INT8_C( 12) }, { -INT8_C( 52), -INT8_C( 124), -INT8_C( 96), INT8_C( 3), -INT8_C( 37), -INT8_C( 19), -INT8_C( 110), INT8_C( 20), -INT8_C( 54), -INT8_C( 11), INT8_C( 102), INT8_C( 85), -INT8_C( 26), INT8_C( 7), -INT8_C( 76), INT8_C( 50) }, { INT8_C( 85), INT8_C( 3), INT8_C( 102), INT8_C( 85), INT8_C( 50), -INT8_C( 37), -INT8_C( 96), INT8_C( 102), -INT8_C( 110), INT8_C( 50), INT8_C( 102), -INT8_C( 110), INT8_C( 102), INT8_C( 102), -INT8_C( 26), -INT8_C( 26) } }, { { -INT8_C( 21), INT8_C( 78), INT8_C( 77), -INT8_C( 86), INT8_C( 98), INT8_C( 47), INT8_C( 68), INT8_C( 8), -INT8_C( 82), INT8_C( 110), INT8_C( 14), -INT8_C( 56), INT8_C( 120), -INT8_C( 53), -INT8_C( 44), INT8_C( 69) }, { INT8_C( 79), INT8_C( 117), INT8_C( 72), INT8_C( 42), INT8_C( 98), -INT8_C( 38), INT8_C( 62), INT8_C( 44), -INT8_C( 49), -INT8_C( 92), -INT8_C( 126), -INT8_C( 74), -INT8_C( 84), INT8_C( 54), -INT8_C( 24), -INT8_C( 105) }, { -INT8_C( 74), -INT8_C( 24), INT8_C( 54), -INT8_C( 126), INT8_C( 72), -INT8_C( 105), INT8_C( 98), -INT8_C( 49), -INT8_C( 24), -INT8_C( 24), -INT8_C( 24), -INT8_C( 49), -INT8_C( 49), -INT8_C( 74), INT8_C( 98), -INT8_C( 38) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i idx = simde_mm_loadu_epi8(test_vec[i].idx); simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i r = simde_mm_permutexvar_epi8(idx, a); simde_test_x86_assert_equal_i8x16(r, simde_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i idx = simde_test_x86_random_i8x16(); simde__m128i a = simde_test_x86_random_i8x16(); simde__m128i r = simde_mm_permutexvar_epi8(idx, a); simde_test_x86_write_i8x16(2, idx, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_mask_permutexvar_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t src[16]; const simde__mmask16 k; const int8_t idx[16]; const int8_t a[16]; const int8_t r[16]; } test_vec[] = { { { -INT8_C( 124), INT8_C( 53), INT8_C( 65), -INT8_C( 26), INT8_C( 100), -INT8_C( 123), -INT8_C( 17), INT8_C( 18), -INT8_C( 12), -INT8_C( 3), -INT8_C( 38), INT8_C( 108), -INT8_C( 56), -INT8_C( 82), -INT8_C( 79), INT8_C( 24) }, UINT16_C(63779), { INT8_C( 66), -INT8_C( 123), -INT8_C( 45), -INT8_C( 127), -INT8_C( 78), -INT8_C( 93), INT8_C( 37), INT8_C( 52), INT8_C( 89), -INT8_C( 47), INT8_C( 106), INT8_C( 65), INT8_C( 104), -INT8_C( 18), INT8_C( 118), -INT8_C( 87) }, { -INT8_C( 44), -INT8_C( 38), INT8_C( 47), -INT8_C( 61), -INT8_C( 20), INT8_C( 35), -INT8_C( 63), -INT8_C( 58), -INT8_C( 113), -INT8_C( 119), INT8_C( 116), INT8_C( 65), -INT8_C( 95), -INT8_C( 104), INT8_C( 58), -INT8_C( 28) }, { INT8_C( 47), INT8_C( 35), INT8_C( 65), -INT8_C( 26), INT8_C( 100), -INT8_C( 61), -INT8_C( 17), INT8_C( 18), -INT8_C( 119), -INT8_C( 3), -INT8_C( 38), -INT8_C( 38), -INT8_C( 113), INT8_C( 58), -INT8_C( 63), -INT8_C( 119) } }, { { INT8_C( 29), INT8_C( 14), INT8_C( 101), -INT8_C( 49), -INT8_C( 79), -INT8_C( 118), INT8_C( 3), INT8_C( 10), INT8_C( 92), INT8_C( 109), INT8_C( 75), -INT8_C( 60), INT8_C( 91), -INT8_C( 63), INT8_C( 110), INT8_C( 48) }, UINT16_C(40347), { -INT8_C( 13), -INT8_C( 121), -INT8_C( 64), -INT8_C( 76), INT8_C( 77), INT8_C( 79), INT8_C( 62), -INT8_C( 63), -INT8_C( 112), -INT8_C( 33), INT8_C( 89), -INT8_C( 53), -INT8_C( 61), INT8_C( 119), -INT8_C( 39), INT8_C( 40) }, { INT8_C( 70), -INT8_C( 118), -INT8_C( 77), INT8_C( 74), -INT8_C( 108), INT8_C( 15), -INT8_C( 73), -INT8_C( 33), -INT8_C( 45), INT8_C( 19), -INT8_C( 96), INT8_C( 65), INT8_C( 67), INT8_C( 59), -INT8_C( 34), INT8_C( 54) }, { INT8_C( 74), -INT8_C( 33), INT8_C( 101), -INT8_C( 108), INT8_C( 59), -INT8_C( 118), INT8_C( 3), -INT8_C( 118), INT8_C( 70), INT8_C( 109), INT8_C( 19), INT8_C( 65), INT8_C( 74), -INT8_C( 63), INT8_C( 110), -INT8_C( 45) } }, { { -INT8_C( 62), -INT8_C( 98), -INT8_C( 21), INT8_C( 15), -INT8_C( 18), INT8_C( 41), -INT8_C( 48), INT8_C( 126), INT8_C( 8), INT8_C( 42), INT8_C( 73), -INT8_C( 52), -INT8_C( 95), INT8_C( 34), -INT8_C( 12), -INT8_C( 25) }, UINT16_C(42924), { INT8_C( 49), INT8_C( 64), -INT8_C( 74), -INT8_C( 23), INT8_C( 31), -INT8_C( 118), -INT8_C( 4), -INT8_C( 65), -INT8_C( 53), INT8_C( 63), -INT8_C( 6), -INT8_C( 86), INT8_C( 117), -INT8_C( 68), INT8_C( 72), INT8_C( 96) }, { -INT8_C( 53), INT8_C( 54), -INT8_C( 119), -INT8_C( 100), -INT8_C( 75), -INT8_C( 110), -INT8_C( 58), -INT8_C( 2), INT8_C( 94), INT8_C( 103), INT8_C( 33), INT8_C( 82), INT8_C( 78), -INT8_C( 51), -INT8_C( 6), INT8_MIN }, { -INT8_C( 62), -INT8_C( 98), -INT8_C( 58), INT8_C( 103), -INT8_C( 18), INT8_C( 33), -INT8_C( 48), INT8_MIN, INT8_C( 82), INT8_MIN, INT8_C( 33), -INT8_C( 52), -INT8_C( 95), INT8_C( 78), -INT8_C( 12), -INT8_C( 53) } }, { { INT8_C( 14), -INT8_C( 80), INT8_C( 105), INT8_C( 45), INT8_C( 58), INT8_C( 101), -INT8_C( 19), INT8_C( 6), -INT8_C( 92), -INT8_C( 25), -INT8_C( 80), INT8_C( 25), -INT8_C( 92), -INT8_C( 8), INT8_C( 122), INT8_C( 111) }, UINT16_C( 815), { INT8_C( 11), -INT8_C( 28), -INT8_C( 107), -INT8_C( 47), -INT8_C( 30), -INT8_C( 13), INT8_C( 56), INT8_C( 3), INT8_C( 70), -INT8_C( 121), -INT8_C( 47), INT8_C( 64), INT8_C( 7), -INT8_C( 33), -INT8_C( 16), INT8_C( 112) }, { INT8_C( 12), INT8_C( 43), -INT8_C( 43), -INT8_C( 7), INT8_C( 49), INT8_C( 121), -INT8_C( 31), -INT8_C( 31), -INT8_C( 110), -INT8_C( 123), -INT8_C( 39), INT8_C( 12), -INT8_C( 12), INT8_C( 8), INT8_C( 16), INT8_C( 0) }, { INT8_C( 12), INT8_C( 49), INT8_C( 121), INT8_C( 43), INT8_C( 58), -INT8_C( 7), -INT8_C( 19), INT8_C( 6), -INT8_C( 31), -INT8_C( 31), -INT8_C( 80), INT8_C( 25), -INT8_C( 92), -INT8_C( 8), INT8_C( 122), INT8_C( 111) } }, { { -INT8_C( 20), -INT8_C( 91), -INT8_C( 47), -INT8_C( 49), -INT8_C( 103), INT8_C( 10), -INT8_C( 46), -INT8_C( 33), -INT8_C( 111), -INT8_C( 93), INT8_C( 31), -INT8_C( 104), -INT8_C( 126), INT8_C( 15), INT8_C( 8), -INT8_C( 113) }, UINT16_C(56634), { -INT8_C( 120), INT8_C( 107), INT8_C( 86), INT8_C( 105), INT8_C( 76), -INT8_C( 24), -INT8_C( 18), INT8_C( 38), -INT8_C( 11), -INT8_C( 29), INT8_C( 46), INT8_C( 5), -INT8_C( 29), INT8_C( 27), -INT8_C( 86), -INT8_C( 76) }, { -INT8_C( 22), INT8_C( 67), -INT8_C( 66), -INT8_C( 68), INT8_C( 34), INT8_C( 79), INT8_C( 96), INT8_C( 65), -INT8_C( 25), -INT8_C( 30), INT8_C( 81), -INT8_C( 17), INT8_C( 113), -INT8_C( 117), -INT8_C( 52), -INT8_C( 6) }, { -INT8_C( 20), -INT8_C( 17), -INT8_C( 47), -INT8_C( 30), INT8_C( 113), -INT8_C( 25), -INT8_C( 46), -INT8_C( 33), INT8_C( 79), -INT8_C( 93), -INT8_C( 52), INT8_C( 79), -INT8_C( 68), INT8_C( 15), INT8_C( 81), INT8_C( 34) } }, { { -INT8_C( 9), INT8_C( 34), INT8_C( 99), INT8_C( 67), INT8_C( 11), INT8_C( 82), INT8_C( 105), INT8_C( 0), INT8_C( 53), -INT8_C( 104), INT8_C( 5), INT8_C( 24), -INT8_C( 77), -INT8_C( 81), -INT8_C( 52), -INT8_C( 99) }, UINT16_C(35827), { INT8_C( 89), INT8_C( 21), -INT8_C( 38), -INT8_C( 71), INT8_C( 87), -INT8_C( 62), -INT8_C( 100), -INT8_C( 88), -INT8_C( 79), INT8_C( 13), INT8_C( 51), INT8_C( 126), INT8_C( 7), INT8_C( 42), -INT8_C( 96), INT8_C( 107) }, { INT8_C( 110), -INT8_C( 85), -INT8_C( 67), -INT8_C( 41), -INT8_C( 85), -INT8_C( 14), INT8_C( 111), -INT8_C( 80), INT8_C( 10), INT8_C( 34), INT8_C( 96), -INT8_C( 42), -INT8_C( 65), INT8_C( 83), INT8_C( 97), INT8_C( 25) }, { INT8_C( 34), -INT8_C( 14), INT8_C( 99), INT8_C( 67), -INT8_C( 80), -INT8_C( 67), -INT8_C( 65), INT8_C( 10), -INT8_C( 85), INT8_C( 83), INT8_C( 5), INT8_C( 97), -INT8_C( 77), -INT8_C( 81), -INT8_C( 52), -INT8_C( 42) } }, { { INT8_C( 104), INT8_C( 60), -INT8_C( 46), -INT8_C( 65), -INT8_C( 2), INT8_C( 110), INT8_C( 103), -INT8_C( 81), INT8_C( 124), -INT8_C( 101), INT8_C( 45), -INT8_C( 125), -INT8_C( 59), -INT8_C( 50), -INT8_C( 18), INT8_C( 51) }, UINT16_C(43897), { INT8_C( 11), INT8_C( 37), -INT8_C( 99), INT8_C( 122), -INT8_C( 43), -INT8_C( 89), -INT8_C( 99), INT8_C( 53), INT8_C( 126), INT8_C( 92), -INT8_C( 120), -INT8_C( 33), INT8_C( 117), -INT8_C( 15), INT8_C( 27), INT8_C( 72) }, { -INT8_C( 80), INT8_C( 25), -INT8_C( 74), INT8_C( 24), -INT8_C( 55), INT8_C( 50), -INT8_C( 77), -INT8_C( 10), -INT8_C( 74), INT8_C( 120), -INT8_C( 60), -INT8_C( 92), -INT8_C( 84), INT8_C( 62), INT8_C( 80), -INT8_C( 73) }, { -INT8_C( 92), INT8_C( 60), -INT8_C( 46), -INT8_C( 60), INT8_C( 50), -INT8_C( 10), INT8_C( 62), -INT8_C( 81), INT8_C( 80), -INT8_C( 84), INT8_C( 45), -INT8_C( 73), -INT8_C( 59), INT8_C( 25), -INT8_C( 18), -INT8_C( 74) } }, { { INT8_C( 99), -INT8_C( 19), INT8_C( 49), INT8_C( 56), -INT8_C( 107), -INT8_C( 50), INT8_C( 110), INT8_C( 19), INT8_C( 43), -INT8_C( 10), -INT8_C( 14), -INT8_C( 96), -INT8_C( 25), INT8_C( 14), -INT8_C( 24), -INT8_C( 104) }, UINT16_C(40743), { -INT8_C( 80), -INT8_C( 16), -INT8_C( 47), INT8_C( 99), -INT8_C( 25), -INT8_C( 121), -INT8_C( 37), -INT8_C( 85), INT8_C( 44), -INT8_C( 121), -INT8_C( 23), INT8_C( 124), INT8_C( 62), INT8_C( 76), INT8_C( 105), INT8_C( 112) }, { -INT8_C( 123), -INT8_C( 2), INT8_C( 62), -INT8_C( 13), INT8_C( 17), INT8_C( 105), -INT8_C( 23), INT8_C( 4), INT8_C( 10), -INT8_C( 47), INT8_C( 18), -INT8_C( 14), INT8_C( 105), INT8_C( 57), -INT8_C( 111), INT8_C( 25) }, { -INT8_C( 123), -INT8_C( 123), -INT8_C( 2), INT8_C( 56), -INT8_C( 107), INT8_C( 4), INT8_C( 110), INT8_C( 19), INT8_C( 105), INT8_C( 4), -INT8_C( 47), INT8_C( 105), -INT8_C( 111), INT8_C( 14), -INT8_C( 24), -INT8_C( 123) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i src = simde_mm_loadu_epi8(test_vec[i].src); simde__m128i idx = simde_mm_loadu_epi8(test_vec[i].idx); simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i r = simde_mm_mask_permutexvar_epi8(src, test_vec[i].k, idx, a); simde_test_x86_assert_equal_i8x16(r, simde_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i src = simde_test_x86_random_i8x16(); simde__mmask16 k = simde_test_x86_random_mmask16(); simde__m128i idx = simde_test_x86_random_i8x16(); simde__m128i a = simde_test_x86_random_i8x16(); simde__m128i r = simde_mm_mask_permutexvar_epi8(src, k, idx, a); simde_test_x86_write_i8x16(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask16(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_maskz_permutexvar_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask16 k; const int8_t idx[16]; const int8_t a[16]; const int8_t r[16]; } test_vec[] = { { UINT16_C(25386), { INT8_C( 124), INT8_C( 17), -INT8_C( 22), INT8_C( 87), -INT8_C( 68), INT8_C( 22), -INT8_C( 33), -INT8_C( 90), -INT8_C( 110), INT8_C( 29), -INT8_C( 14), -INT8_C( 4), -INT8_C( 115), INT8_C( 119), -INT8_C( 6), -INT8_C( 52) }, { INT8_C( 106), INT8_C( 12), INT8_C( 53), INT8_C( 84), INT8_C( 16), INT8_C( 63), INT8_C( 37), INT8_C( 34), INT8_C( 50), -INT8_C( 114), INT8_C( 91), -INT8_C( 61), -INT8_C( 89), -INT8_C( 123), INT8_C( 38), INT8_C( 35) }, { INT8_C( 0), INT8_C( 12), INT8_C( 0), INT8_C( 34), INT8_C( 0), INT8_C( 37), INT8_C( 0), INT8_C( 0), INT8_C( 53), -INT8_C( 123), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 34), INT8_C( 91), INT8_C( 0) } }, { UINT16_C( 4502), { INT8_C( 122), INT8_C( 83), INT8_C( 39), INT8_C( 89), -INT8_C( 7), -INT8_C( 70), INT8_C( 119), -INT8_C( 21), -INT8_C( 74), INT8_C( 4), INT8_C( 99), -INT8_C( 80), -INT8_C( 48), -INT8_C( 51), -INT8_C( 68), INT8_C( 6) }, { INT8_C( 33), -INT8_C( 52), INT8_C( 69), INT8_C( 70), -INT8_C( 18), INT8_C( 119), -INT8_C( 44), INT8_C( 74), INT8_C( 59), INT8_C( 123), -INT8_C( 49), INT8_C( 97), -INT8_C( 98), INT8_C( 102), INT8_C( 114), INT8_C( 25) }, { INT8_C( 0), INT8_C( 70), INT8_C( 74), INT8_C( 0), INT8_C( 123), INT8_C( 0), INT8_C( 0), INT8_C( 97), -INT8_C( 44), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 33), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { UINT16_C(39609), { INT8_C( 114), -INT8_C( 78), INT8_C( 84), -INT8_C( 23), -INT8_C( 99), INT8_C( 10), -INT8_C( 18), INT8_C( 0), -INT8_C( 70), -INT8_C( 66), -INT8_C( 50), INT8_C( 119), -INT8_C( 60), -INT8_C( 17), INT8_C( 67), INT8_C( 10) }, { INT8_C( 54), INT8_C( 50), -INT8_C( 127), INT8_C( 10), INT8_C( 124), -INT8_C( 68), -INT8_C( 122), INT8_C( 75), INT8_C( 30), INT8_C( 36), -INT8_C( 79), -INT8_C( 112), INT8_C( 61), INT8_C( 106), INT8_C( 42), -INT8_C( 80) }, { -INT8_C( 127), INT8_C( 0), INT8_C( 0), INT8_C( 36), INT8_C( 106), -INT8_C( 79), INT8_C( 0), INT8_C( 54), INT8_C( 0), INT8_C( 42), INT8_C( 0), INT8_C( 75), INT8_C( 124), INT8_C( 0), INT8_C( 0), -INT8_C( 79) } }, { UINT16_C(32284), { -INT8_C( 103), -INT8_C( 70), -INT8_C( 120), -INT8_C( 121), -INT8_C( 70), INT8_C( 67), INT8_C( 70), -INT8_C( 120), -INT8_C( 70), INT8_C( 10), INT8_C( 120), -INT8_C( 3), INT8_C( 20), -INT8_C( 82), INT8_C( 47), -INT8_C( 106) }, { -INT8_C( 72), -INT8_C( 85), INT8_C( 82), INT8_C( 62), -INT8_C( 9), INT8_C( 112), INT8_C( 99), -INT8_C( 88), INT8_C( 1), -INT8_C( 96), INT8_C( 19), INT8_C( 43), INT8_C( 80), INT8_C( 47), -INT8_C( 86), -INT8_C( 22) }, { INT8_C( 0), INT8_C( 0), INT8_C( 1), -INT8_C( 88), INT8_C( 19), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 19), INT8_C( 1), INT8_C( 47), -INT8_C( 9), -INT8_C( 86), -INT8_C( 22), INT8_C( 0) } }, { UINT16_C(13033), { INT8_C( 113), -INT8_C( 92), INT8_C( 117), -INT8_C( 73), INT8_C( 44), INT8_C( 47), -INT8_C( 62), -INT8_C( 92), INT8_C( 45), -INT8_C( 42), INT8_C( 82), INT8_C( 92), INT8_C( 108), INT8_C( 11), INT8_C( 8), -INT8_C( 65) }, { INT8_C( 73), -INT8_C( 1), INT8_C( 47), -INT8_C( 84), -INT8_C( 89), INT8_C( 48), INT8_C( 77), -INT8_C( 70), INT8_C( 92), -INT8_C( 99), -INT8_C( 22), INT8_C( 6), -INT8_C( 121), -INT8_C( 45), INT8_C( 56), -INT8_C( 7) }, { -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 70), INT8_C( 0), -INT8_C( 7), INT8_C( 47), -INT8_C( 89), INT8_C( 0), INT8_C( 77), INT8_C( 0), INT8_C( 0), -INT8_C( 121), INT8_C( 6), INT8_C( 0), INT8_C( 0) } }, { UINT16_C(44663), { -INT8_C( 80), -INT8_C( 92), -INT8_C( 35), INT8_C( 114), INT8_C( 72), INT8_C( 10), INT8_C( 73), -INT8_C( 101), INT8_C( 103), -INT8_C( 75), -INT8_C( 90), INT8_C( 111), INT8_C( 116), -INT8_C( 17), INT8_C( 110), -INT8_C( 92) }, { -INT8_C( 100), INT8_C( 21), -INT8_C( 44), -INT8_C( 23), -INT8_C( 48), INT8_C( 48), -INT8_C( 122), -INT8_C( 70), INT8_C( 54), INT8_C( 14), -INT8_C( 115), INT8_C( 111), INT8_C( 7), INT8_C( 5), INT8_C( 29), -INT8_C( 73) }, { -INT8_C( 100), -INT8_C( 48), INT8_C( 5), INT8_C( 0), INT8_C( 54), -INT8_C( 115), INT8_C( 14), INT8_C( 0), INT8_C( 0), INT8_C( 48), -INT8_C( 122), -INT8_C( 73), INT8_C( 0), -INT8_C( 73), INT8_C( 0), -INT8_C( 48) } }, { UINT16_C(64169), { INT8_C( 42), -INT8_C( 15), INT8_C( 5), INT8_C( 115), -INT8_C( 116), INT8_C( 108), INT8_C( 40), INT8_C( 50), -INT8_C( 37), -INT8_C( 99), INT8_C( 34), INT8_C( 73), INT8_C( 65), -INT8_C( 66), INT8_C( 94), INT8_C( 21) }, { -INT8_C( 89), INT8_C( 46), INT8_C( 70), INT8_C( 45), -INT8_C( 24), INT8_C( 124), INT8_C( 59), INT8_C( 118), -INT8_C( 21), INT8_C( 66), INT8_C( 123), INT8_C( 8), -INT8_C( 6), INT8_C( 36), INT8_C( 3), INT8_C( 36) }, { INT8_C( 123), INT8_C( 0), INT8_C( 0), INT8_C( 45), INT8_C( 0), -INT8_C( 6), INT8_C( 0), INT8_C( 70), INT8_C( 0), INT8_C( 36), INT8_C( 0), INT8_C( 66), INT8_C( 46), INT8_C( 3), INT8_C( 3), INT8_C( 124) } }, { UINT16_C( 2069), { -INT8_C( 105), -INT8_C( 94), INT8_C( 116), -INT8_C( 65), -INT8_C( 44), INT8_C( 79), INT8_C( 92), -INT8_C( 10), -INT8_C( 104), -INT8_C( 99), -INT8_C( 76), -INT8_C( 10), -INT8_C( 77), INT8_C( 91), INT8_C( 37), -INT8_C( 7) }, { -INT8_C( 119), INT8_C( 13), INT8_C( 117), -INT8_C( 60), -INT8_C( 125), INT8_C( 97), INT8_C( 7), -INT8_C( 2), INT8_C( 105), INT8_C( 1), INT8_C( 34), INT8_C( 108), INT8_C( 37), INT8_C( 56), INT8_C( 116), -INT8_C( 68) }, { -INT8_C( 2), INT8_C( 0), -INT8_C( 125), INT8_C( 0), -INT8_C( 125), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 7), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i idx = simde_mm_loadu_epi8(test_vec[i].idx); simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i r = simde_mm_maskz_permutexvar_epi8(test_vec[i].k, idx, a); simde_test_x86_assert_equal_i8x16(r, simde_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask16 k = simde_test_x86_random_mmask16(); simde__m128i idx = simde_test_x86_random_i8x16(); simde__m128i a = simde_test_x86_random_i8x16(); simde__m128i r = simde_mm_maskz_permutexvar_epi8(k, idx, a); simde_test_x86_write_mmask16(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x16(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_permutexvar_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t idx[16]; const int16_t a[16]; const int16_t r[16]; } test_vec[] = { { { INT16_C( 5893), -INT16_C( 32362), INT16_C( 15791), -INT16_C( 31311), -INT16_C( 29409), -INT16_C( 15393), INT16_C( 6807), -INT16_C( 24848), -INT16_C( 12998), INT16_C( 1199), INT16_C( 9359), INT16_C( 30035), INT16_C( 14670), -INT16_C( 18355), -INT16_C( 18338), INT16_C( 25575) }, { INT16_C( 32208), INT16_C( 32740), -INT16_C( 27206), -INT16_C( 9980), -INT16_C( 7390), -INT16_C( 18019), -INT16_C( 29187), INT16_C( 14423), INT16_C( 1626), -INT16_C( 5828), -INT16_C( 28886), INT16_C( 31071), -INT16_C( 21304), INT16_C( 9777), INT16_C( 6244), INT16_C( 13449) }, { -INT16_C( 18019), -INT16_C( 29187), INT16_C( 13449), INT16_C( 32740), INT16_C( 13449), INT16_C( 13449), INT16_C( 14423), INT16_C( 32208), -INT16_C( 28886), INT16_C( 13449), INT16_C( 13449), -INT16_C( 9980), INT16_C( 6244), INT16_C( 9777), INT16_C( 6244), INT16_C( 14423) } }, { { INT16_C( 28053), INT16_C( 20659), -INT16_C( 18430), INT16_C( 9257), -INT16_C( 14693), -INT16_C( 26147), INT16_C( 13395), -INT16_C( 20783), INT16_C( 3387), INT16_C( 26007), -INT16_C( 2403), INT16_C( 26078), INT16_C( 4002), INT16_C( 1932), INT16_C( 5415), -INT16_C( 17093) }, { -INT16_C( 4221), -INT16_C( 31475), INT16_C( 13991), INT16_C( 17066), -INT16_C( 30723), INT16_C( 20699), -INT16_C( 21316), -INT16_C( 2050), -INT16_C( 26950), INT16_C( 22364), INT16_C( 15244), INT16_C( 12220), INT16_C( 18506), INT16_C( 29238), INT16_C( 29022), -INT16_C( 7889) }, { INT16_C( 20699), INT16_C( 17066), INT16_C( 13991), INT16_C( 22364), INT16_C( 12220), INT16_C( 29238), INT16_C( 17066), -INT16_C( 31475), INT16_C( 12220), -INT16_C( 2050), INT16_C( 29238), INT16_C( 29022), INT16_C( 13991), INT16_C( 18506), -INT16_C( 2050), INT16_C( 12220) } }, { { INT16_C( 15456), INT16_C( 1894), INT16_C( 4210), INT16_C( 28490), INT16_C( 9624), INT16_C( 21696), -INT16_C( 16686), -INT16_C( 29621), -INT16_C( 22700), -INT16_C( 7709), -INT16_C( 24606), INT16_C( 11536), INT16_C( 18152), INT16_C( 18079), -INT16_C( 12617), INT16_C( 6183) }, { -INT16_C( 29430), INT16_C( 31775), INT16_C( 27038), INT16_C( 14060), -INT16_C( 21361), INT16_C( 24970), -INT16_C( 10902), -INT16_C( 16403), -INT16_C( 12164), INT16_C( 24480), -INT16_C( 20369), INT16_C( 22412), INT16_C( 11254), -INT16_C( 21091), -INT16_C( 15111), INT16_C( 965) }, { -INT16_C( 29430), -INT16_C( 10902), INT16_C( 27038), -INT16_C( 20369), -INT16_C( 12164), -INT16_C( 29430), INT16_C( 27038), INT16_C( 22412), -INT16_C( 21361), INT16_C( 14060), INT16_C( 27038), -INT16_C( 29430), -INT16_C( 12164), INT16_C( 965), -INT16_C( 16403), -INT16_C( 16403) } }, { { -INT16_C( 6830), -INT16_C( 3969), INT16_C( 27470), -INT16_C( 8922), -INT16_C( 20457), -INT16_C( 32194), INT16_C( 11141), INT16_C( 321), -INT16_C( 7685), INT16_C( 27488), -INT16_C( 4975), -INT16_C( 30782), INT16_C( 24599), INT16_C( 4148), -INT16_C( 1500), INT16_C( 30227) }, { -INT16_C( 27681), INT16_C( 11622), -INT16_C( 29442), INT16_C( 5643), INT16_C( 18748), -INT16_C( 15976), -INT16_C( 9867), INT16_C( 28867), INT16_C( 9146), INT16_C( 19419), -INT16_C( 25072), INT16_C( 10194), INT16_C( 1790), INT16_C( 8760), INT16_C( 19200), -INT16_C( 8295) }, { -INT16_C( 29442), -INT16_C( 8295), INT16_C( 19200), -INT16_C( 9867), INT16_C( 28867), INT16_C( 19200), -INT16_C( 15976), INT16_C( 11622), INT16_C( 10194), -INT16_C( 27681), INT16_C( 11622), -INT16_C( 29442), INT16_C( 28867), INT16_C( 18748), INT16_C( 18748), INT16_C( 5643) } }, { { -INT16_C( 34), -INT16_C( 8947), INT16_C( 6284), -INT16_C( 14093), -INT16_C( 29855), -INT16_C( 10614), INT16_C( 19812), INT16_C( 7751), INT16_C( 8816), -INT16_C( 32663), INT16_C( 15296), -INT16_C( 16728), -INT16_C( 8127), INT16_C( 17121), INT16_C( 31275), INT16_C( 2593) }, { INT16_C( 11897), INT16_C( 1511), -INT16_C( 9658), -INT16_C( 22322), INT16_C( 22629), -INT16_C( 13954), -INT16_C( 14939), INT16_C( 5607), INT16_C( 20712), -INT16_C( 22378), INT16_C( 16011), -INT16_C( 13209), INT16_C( 18462), INT16_C( 18702), INT16_C( 12482), INT16_C( 15187) }, { INT16_C( 12482), INT16_C( 18702), INT16_C( 18462), -INT16_C( 22322), INT16_C( 1511), INT16_C( 16011), INT16_C( 22629), INT16_C( 5607), INT16_C( 11897), -INT16_C( 22378), INT16_C( 11897), INT16_C( 20712), INT16_C( 1511), INT16_C( 1511), -INT16_C( 13209), INT16_C( 1511) } }, { { INT16_C( 14942), -INT16_C( 23231), INT16_C( 3860), INT16_C( 31053), -INT16_C( 13465), INT16_C( 3138), INT16_C( 10641), INT16_C( 31009), -INT16_C( 18567), INT16_C( 1057), -INT16_C( 30475), INT16_C( 5073), -INT16_C( 8240), -INT16_C( 28067), -INT16_C( 20465), INT16_C( 28366) }, { INT16_C( 4075), -INT16_C( 237), INT16_C( 24606), -INT16_C( 31367), -INT16_C( 17621), -INT16_C( 17263), -INT16_C( 19739), INT16_C( 24117), INT16_C( 22378), INT16_C( 24419), INT16_C( 13535), -INT16_C( 20365), -INT16_C( 12269), INT16_C( 9026), INT16_C( 4224), INT16_C( 27537) }, { INT16_C( 4224), -INT16_C( 237), -INT16_C( 17621), INT16_C( 9026), INT16_C( 24117), INT16_C( 24606), -INT16_C( 237), -INT16_C( 237), INT16_C( 24419), -INT16_C( 237), -INT16_C( 17263), -INT16_C( 237), INT16_C( 4075), INT16_C( 9026), INT16_C( 27537), INT16_C( 4224) } }, { { -INT16_C( 23521), INT16_C( 15723), -INT16_C( 7164), INT16_C( 12226), INT16_C( 21407), -INT16_C( 31508), INT16_C( 8454), INT16_C( 28899), INT16_C( 18040), INT16_C( 22735), INT16_C( 17018), -INT16_C( 29432), INT16_C( 18962), -INT16_C( 27728), INT16_C( 16731), INT16_C( 31486) }, { INT16_C( 27109), -INT16_C( 5704), INT16_C( 31309), -INT16_C( 4839), INT16_C( 1486), -INT16_C( 11151), INT16_C( 21542), -INT16_C( 24764), INT16_C( 5018), INT16_C( 5367), -INT16_C( 170), INT16_C( 26786), INT16_C( 21065), -INT16_C( 23301), -INT16_C( 1388), INT16_C( 31007) }, { INT16_C( 31007), INT16_C( 26786), INT16_C( 1486), INT16_C( 31309), INT16_C( 31007), INT16_C( 21065), INT16_C( 21542), -INT16_C( 4839), INT16_C( 5018), INT16_C( 31007), -INT16_C( 170), INT16_C( 5018), INT16_C( 31309), INT16_C( 27109), INT16_C( 26786), -INT16_C( 1388) } }, { { -INT16_C( 10397), -INT16_C( 20125), INT16_C( 31825), INT16_C( 8094), INT16_C( 3969), -INT16_C( 22541), INT16_C( 14180), -INT16_C( 442), INT16_C( 15691), -INT16_C( 24301), -INT16_C( 19140), -INT16_C( 31223), INT16_C( 1287), -INT16_C( 25814), INT16_C( 18943), INT16_C( 25109) }, { INT16_C( 30752), INT16_C( 29203), -INT16_C( 19980), INT16_C( 30097), -INT16_C( 31295), INT16_C( 9500), INT16_C( 25532), INT16_C( 1827), INT16_C( 13984), -INT16_C( 8792), -INT16_C( 19733), -INT16_C( 3229), -INT16_C( 29257), -INT16_C( 18802), -INT16_C( 23593), -INT16_C( 2280) }, { INT16_C( 30097), INT16_C( 30097), INT16_C( 29203), -INT16_C( 23593), INT16_C( 29203), INT16_C( 30097), -INT16_C( 31295), INT16_C( 25532), -INT16_C( 3229), INT16_C( 30097), -INT16_C( 29257), -INT16_C( 8792), INT16_C( 1827), -INT16_C( 19733), -INT16_C( 2280), INT16_C( 9500) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i idx = simde_mm256_loadu_epi16(test_vec[i].idx); simde__m256i a = simde_mm256_loadu_epi16(test_vec[i].a); simde__m256i r = simde_mm256_permutexvar_epi16(idx, a); simde_test_x86_assert_equal_i16x16(r, simde_mm256_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i idx = simde_test_x86_random_i16x16(); simde__m256i a = simde_test_x86_random_i16x16(); simde__m256i r = simde_mm256_permutexvar_epi16(idx, a); simde_test_x86_write_i16x16(2, idx, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x16(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask_permutexvar_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t src[16]; const simde__mmask16 k; const int16_t idx[16]; const int16_t a[16]; const int16_t r[16]; } test_vec[] = { { { INT16_C( 11291), INT16_C( 3945), -INT16_C( 1059), -INT16_C( 24956), -INT16_C( 24192), INT16_C( 15555), -INT16_C( 6396), -INT16_C( 23484), -INT16_C( 5091), INT16_C( 2433), -INT16_C( 7010), INT16_C( 22012), -INT16_C( 30094), INT16_C( 18699), INT16_C( 9262), INT16_C( 18752) }, UINT16_C(43600), { INT16_C( 11609), -INT16_C( 8795), INT16_C( 9676), -INT16_C( 28802), -INT16_C( 32159), -INT16_C( 23178), -INT16_C( 27609), -INT16_C( 22382), INT16_C( 12445), -INT16_C( 26227), -INT16_C( 122), -INT16_C( 28381), INT16_C( 20808), -INT16_C( 30539), INT16_C( 1435), -INT16_C( 3022) }, { -INT16_C( 10445), -INT16_C( 47), INT16_C( 20732), INT16_C( 24206), INT16_C( 1490), -INT16_C( 1789), -INT16_C( 27239), INT16_C( 13986), INT16_C( 12230), INT16_C( 19663), -INT16_C( 3538), INT16_C( 30429), -INT16_C( 27836), -INT16_C( 8194), INT16_C( 12696), -INT16_C( 13357) }, { INT16_C( 11291), INT16_C( 3945), -INT16_C( 1059), -INT16_C( 24956), -INT16_C( 47), INT16_C( 15555), INT16_C( 13986), -INT16_C( 23484), -INT16_C( 5091), -INT16_C( 8194), -INT16_C( 7010), INT16_C( 24206), -INT16_C( 30094), -INT16_C( 1789), INT16_C( 9262), INT16_C( 20732) } }, { { -INT16_C( 23544), INT16_C( 1482), INT16_C( 23028), -INT16_C( 14493), INT16_C( 26206), -INT16_C( 2112), INT16_C( 25340), -INT16_C( 15827), -INT16_C( 879), -INT16_C( 16626), -INT16_C( 5138), INT16_C( 12853), INT16_C( 13438), INT16_C( 5905), -INT16_C( 7067), INT16_C( 28130) }, UINT16_C(44425), { INT16_C( 32114), -INT16_C( 11002), INT16_C( 25668), INT16_C( 1340), INT16_C( 14427), -INT16_C( 30617), -INT16_C( 1542), INT16_C( 2180), INT16_C( 29368), -INT16_C( 4365), INT16_C( 29349), -INT16_C( 18910), -INT16_C( 30839), INT16_C( 27547), INT16_C( 9460), INT16_C( 26392) }, { INT16_C( 7841), -INT16_C( 6596), INT16_C( 30850), -INT16_C( 8725), INT16_C( 21168), -INT16_C( 21915), -INT16_C( 5813), INT16_C( 1202), -INT16_C( 22948), INT16_C( 498), INT16_C( 5144), -INT16_C( 24137), INT16_C( 21147), -INT16_C( 28916), INT16_C( 9590), INT16_C( 6390) }, { INT16_C( 30850), INT16_C( 1482), INT16_C( 23028), INT16_C( 21147), INT16_C( 26206), -INT16_C( 2112), INT16_C( 25340), INT16_C( 21168), -INT16_C( 22948), -INT16_C( 16626), -INT16_C( 21915), INT16_C( 30850), INT16_C( 13438), -INT16_C( 24137), -INT16_C( 7067), -INT16_C( 22948) } }, { { INT16_C( 13123), -INT16_C( 14594), -INT16_C( 5717), INT16_C( 23715), INT16_C( 2363), -INT16_C( 30970), -INT16_C( 17934), INT16_C( 20107), INT16_C( 32095), INT16_C( 30543), INT16_C( 1937), INT16_C( 11288), INT16_C( 9305), -INT16_C( 12101), -INT16_C( 19895), -INT16_C( 29208) }, UINT16_C(59109), { -INT16_C( 28589), -INT16_C( 2353), INT16_C( 2796), -INT16_C( 3073), -INT16_C( 3439), INT16_C( 7340), INT16_C( 2880), -INT16_C( 28519), INT16_C( 10882), -INT16_C( 25961), -INT16_C( 4010), INT16_C( 4798), INT16_C( 2240), -INT16_C( 22332), -INT16_C( 22123), -INT16_C( 6002) }, { INT16_C( 23865), INT16_C( 9950), -INT16_C( 8600), -INT16_C( 1767), -INT16_C( 14896), INT16_C( 4118), -INT16_C( 20528), INT16_C( 21152), INT16_C( 14298), INT16_C( 12524), -INT16_C( 21976), -INT16_C( 6078), INT16_C( 1714), INT16_C( 18321), INT16_C( 8111), -INT16_C( 5841) }, { -INT16_C( 1767), -INT16_C( 14594), INT16_C( 1714), INT16_C( 23715), INT16_C( 2363), INT16_C( 1714), INT16_C( 23865), INT16_C( 12524), INT16_C( 32095), INT16_C( 21152), -INT16_C( 20528), INT16_C( 11288), INT16_C( 9305), -INT16_C( 14896), INT16_C( 4118), INT16_C( 8111) } }, { { INT16_C( 3709), -INT16_C( 6897), INT16_C( 10476), -INT16_C( 17186), -INT16_C( 2835), -INT16_C( 16948), INT16_C( 28068), INT16_C( 32271), -INT16_C( 1116), -INT16_C( 13138), -INT16_C( 3675), INT16_C( 22709), INT16_C( 18167), -INT16_C( 22625), -INT16_C( 12443), -INT16_C( 7536) }, UINT16_C(40925), { -INT16_C( 13881), -INT16_C( 22841), -INT16_C( 19323), INT16_C( 20890), INT16_C( 15985), -INT16_C( 32578), INT16_C( 25532), INT16_C( 27515), INT16_C( 8239), -INT16_C( 7076), INT16_C( 21368), INT16_C( 6186), -INT16_C( 28422), -INT16_C( 29977), -INT16_C( 15246), INT16_C( 14889) }, { -INT16_C( 3955), INT16_C( 4832), INT16_C( 31396), INT16_C( 5475), INT16_C( 8889), INT16_C( 30101), INT16_C( 4229), -INT16_C( 19232), INT16_C( 15409), -INT16_C( 22119), -INT16_C( 15472), -INT16_C( 30015), -INT16_C( 22445), -INT16_C( 14827), INT16_C( 15980), -INT16_C( 1792) }, { -INT16_C( 19232), -INT16_C( 6897), INT16_C( 30101), -INT16_C( 15472), INT16_C( 4832), -INT16_C( 16948), -INT16_C( 22445), -INT16_C( 30015), -INT16_C( 1792), -INT16_C( 22445), INT16_C( 15409), -INT16_C( 15472), -INT16_C( 15472), -INT16_C( 22625), -INT16_C( 12443), -INT16_C( 22119) } }, { { -INT16_C( 8145), -INT16_C( 11509), INT16_C( 28506), INT16_C( 5097), INT16_C( 32401), INT16_C( 5769), INT16_C( 27023), -INT16_C( 16182), INT16_C( 25510), INT16_C( 13929), INT16_C( 11047), INT16_C( 31424), -INT16_C( 10797), INT16_C( 16448), INT16_C( 16404), INT16_C( 17209) }, UINT16_C(17696), { INT16_C( 31510), -INT16_C( 76), INT16_C( 17806), INT16_C( 6014), INT16_C( 3419), INT16_C( 9601), INT16_C( 10189), INT16_C( 13961), -INT16_C( 20387), INT16_C( 7521), INT16_C( 13610), INT16_C( 27635), INT16_C( 1909), -INT16_C( 20821), -INT16_C( 13238), INT16_C( 24819) }, { -INT16_C( 22713), -INT16_C( 10912), -INT16_C( 8468), INT16_C( 18413), INT16_C( 28395), -INT16_C( 18323), -INT16_C( 2411), -INT16_C( 3346), INT16_C( 20646), -INT16_C( 12273), INT16_C( 645), -INT16_C( 1477), -INT16_C( 6391), INT16_C( 21416), -INT16_C( 25421), -INT16_C( 1356) }, { -INT16_C( 8145), -INT16_C( 11509), INT16_C( 28506), INT16_C( 5097), INT16_C( 32401), -INT16_C( 10912), INT16_C( 27023), -INT16_C( 16182), INT16_C( 21416), INT16_C( 13929), INT16_C( 645), INT16_C( 31424), -INT16_C( 10797), INT16_C( 16448), INT16_C( 645), INT16_C( 17209) } }, { { INT16_C( 5187), INT16_C( 12495), -INT16_C( 17166), -INT16_C( 8841), -INT16_C( 7126), -INT16_C( 16491), -INT16_C( 31782), -INT16_C( 32591), -INT16_C( 15917), INT16_C( 22609), -INT16_C( 29501), -INT16_C( 12974), -INT16_C( 1165), INT16_C( 9760), -INT16_C( 11113), -INT16_C( 9696) }, UINT16_C(61672), { -INT16_C( 9718), -INT16_C( 32084), -INT16_C( 10313), INT16_C( 19558), INT16_C( 16790), INT16_C( 18640), -INT16_C( 23615), INT16_C( 4617), -INT16_C( 13060), INT16_C( 20127), INT16_C( 4761), -INT16_C( 17847), -INT16_C( 8135), INT16_C( 22926), INT16_C( 30651), -INT16_C( 15031) }, { -INT16_C( 2479), INT16_C( 2375), -INT16_C( 20787), INT16_C( 25429), INT16_C( 9711), -INT16_C( 20309), -INT16_C( 19255), -INT16_C( 14909), INT16_C( 25217), INT16_C( 6675), INT16_C( 23924), -INT16_C( 21036), INT16_C( 25405), -INT16_C( 2041), INT16_C( 20698), INT16_C( 11198) }, { INT16_C( 5187), INT16_C( 12495), -INT16_C( 17166), -INT16_C( 19255), -INT16_C( 7126), -INT16_C( 2479), INT16_C( 2375), INT16_C( 6675), -INT16_C( 15917), INT16_C( 22609), -INT16_C( 29501), -INT16_C( 12974), INT16_C( 6675), INT16_C( 20698), -INT16_C( 21036), INT16_C( 6675) } }, { { INT16_C( 1350), INT16_C( 4916), -INT16_C( 30029), -INT16_C( 23945), INT16_C( 8879), INT16_C( 30803), INT16_C( 5847), INT16_C( 22589), INT16_C( 20856), -INT16_C( 5006), INT16_C( 18350), -INT16_C( 5222), -INT16_C( 24150), -INT16_C( 31516), -INT16_C( 23823), INT16_C( 14511) }, UINT16_C(58535), { INT16_C( 23371), -INT16_C( 15762), INT16_C( 7677), INT16_C( 20709), -INT16_C( 17258), -INT16_C( 11418), -INT16_C( 8684), -INT16_C( 31196), -INT16_C( 11573), INT16_C( 26061), INT16_C( 30654), -INT16_C( 24058), -INT16_C( 2053), -INT16_C( 21692), -INT16_C( 5329), INT16_C( 31631) }, { -INT16_C( 698), INT16_C( 17469), INT16_C( 8730), -INT16_C( 20332), -INT16_C( 1058), -INT16_C( 3452), -INT16_C( 22311), -INT16_C( 23431), INT16_C( 18043), INT16_C( 14601), INT16_C( 4030), -INT16_C( 17957), INT16_C( 7943), INT16_C( 13924), -INT16_C( 3318), INT16_C( 20913) }, { -INT16_C( 17957), -INT16_C( 3318), INT16_C( 13924), -INT16_C( 23945), INT16_C( 8879), -INT16_C( 22311), INT16_C( 5847), -INT16_C( 1058), INT16_C( 20856), -INT16_C( 5006), -INT16_C( 3318), -INT16_C( 5222), -INT16_C( 24150), -INT16_C( 1058), INT16_C( 20913), INT16_C( 20913) } }, { { -INT16_C( 4112), INT16_C( 2965), INT16_C( 10513), -INT16_C( 3909), INT16_C( 16164), -INT16_C( 286), INT16_C( 23528), INT16_C( 25506), -INT16_C( 21342), INT16_C( 24732), INT16_C( 30651), -INT16_C( 15847), INT16_C( 32406), -INT16_C( 24327), -INT16_C( 21903), INT16_C( 25329) }, UINT16_C(34457), { -INT16_C( 21651), INT16_C( 10416), -INT16_C( 11109), INT16_C( 32104), INT16_C( 20690), INT16_C( 30169), INT16_C( 31667), INT16_C( 20257), -INT16_C( 8997), -INT16_C( 2874), INT16_C( 23711), -INT16_C( 26510), -INT16_C( 6916), -INT16_C( 4542), -INT16_C( 9146), -INT16_C( 19596) }, { INT16_C( 9351), INT16_C( 8923), INT16_C( 17401), -INT16_C( 13409), INT16_C( 30867), INT16_C( 17984), INT16_C( 25075), -INT16_C( 12651), INT16_C( 23358), -INT16_C( 8765), INT16_C( 13751), -INT16_C( 19339), -INT16_C( 18663), INT16_C( 24482), INT16_C( 5779), INT16_C( 6674) }, { INT16_C( 24482), INT16_C( 2965), INT16_C( 10513), INT16_C( 23358), INT16_C( 17401), -INT16_C( 286), INT16_C( 23528), INT16_C( 8923), -INT16_C( 21342), INT16_C( 25075), INT16_C( 6674), -INT16_C( 15847), INT16_C( 32406), -INT16_C( 24327), -INT16_C( 21903), INT16_C( 30867) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i src = simde_mm256_loadu_epi16(test_vec[i].src); simde__m256i idx = simde_mm256_loadu_epi16(test_vec[i].idx); simde__m256i a = simde_mm256_loadu_epi16(test_vec[i].a); simde__m256i r = simde_mm256_mask_permutexvar_epi16(src, test_vec[i].k, idx, a); simde_test_x86_assert_equal_i16x16(r, simde_mm256_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i src = simde_test_x86_random_i16x16(); simde__mmask16 k = simde_test_x86_random_mmask16(); simde__m256i idx = simde_test_x86_random_i16x16(); simde__m256i a = simde_test_x86_random_i16x16(); simde__m256i r = simde_mm256_mask_permutexvar_epi16(src, k, idx, a); simde_test_x86_write_i16x16(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask16(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x16(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x16(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_maskz_permutexvar_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask16 k; const int16_t idx[16]; const int16_t a[16]; const int16_t r[16]; } test_vec[] = { { UINT16_C(60987), { INT16_C( 13372), -INT16_C( 9167), -INT16_C( 14849), INT16_C( 16468), INT16_C( 18443), -INT16_C( 24159), -INT16_C( 8426), -INT16_C( 9732), -INT16_C( 19268), INT16_C( 12559), INT16_C( 10344), INT16_C( 2793), INT16_C( 31880), -INT16_C( 26080), INT16_C( 23447), -INT16_C( 11384) }, { -INT16_C( 17777), -INT16_C( 28753), INT16_C( 1151), -INT16_C( 30001), INT16_C( 28748), INT16_C( 25131), INT16_C( 10320), INT16_C( 3132), INT16_C( 19420), INT16_C( 17470), INT16_C( 10099), -INT16_C( 1202), INT16_C( 28323), INT16_C( 14998), INT16_C( 7882), INT16_C( 22798) }, { INT16_C( 28323), -INT16_C( 28753), INT16_C( 0), INT16_C( 28748), -INT16_C( 1202), -INT16_C( 28753), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 22798), INT16_C( 19420), INT16_C( 17470), INT16_C( 0), -INT16_C( 17777), INT16_C( 3132), INT16_C( 19420) } }, { UINT16_C(48600), { INT16_C( 22504), -INT16_C( 18495), INT16_C( 3554), INT16_C( 3368), INT16_C( 30832), -INT16_C( 21451), INT16_C( 4484), -INT16_C( 15625), INT16_C( 27221), -INT16_C( 23575), -INT16_C( 29338), -INT16_C( 1006), -INT16_C( 9017), -INT16_C( 10982), -INT16_C( 3275), INT16_C( 7827) }, { INT16_C( 21578), INT16_C( 11477), -INT16_C( 670), -INT16_C( 11718), INT16_C( 28533), -INT16_C( 1410), INT16_C( 30081), -INT16_C( 10564), -INT16_C( 22817), INT16_C( 17786), -INT16_C( 29645), -INT16_C( 1471), INT16_C( 23656), -INT16_C( 25136), INT16_C( 25423), -INT16_C( 26181) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 22817), INT16_C( 21578), INT16_C( 0), INT16_C( 28533), -INT16_C( 10564), -INT16_C( 1410), INT16_C( 0), INT16_C( 30081), -INT16_C( 670), -INT16_C( 10564), -INT16_C( 29645), INT16_C( 0), -INT16_C( 11718) } }, { UINT16_C(37303), { INT16_C( 6598), INT16_C( 142), INT16_C( 1259), INT16_C( 26991), -INT16_C( 3842), -INT16_C( 17698), -INT16_C( 16697), INT16_C( 16736), -INT16_C( 27901), INT16_C( 17869), INT16_C( 13710), INT16_C( 24225), -INT16_C( 3886), -INT16_C( 28991), INT16_C( 30857), INT16_C( 20255) }, { -INT16_C( 21102), INT16_C( 32079), -INT16_C( 16463), -INT16_C( 20505), -INT16_C( 14929), INT16_C( 30314), -INT16_C( 13693), -INT16_C( 30793), -INT16_C( 31650), -INT16_C( 4916), INT16_C( 28089), -INT16_C( 29622), INT16_C( 2909), -INT16_C( 6630), INT16_C( 14723), INT16_C( 5430) }, { -INT16_C( 13693), INT16_C( 14723), -INT16_C( 29622), INT16_C( 0), INT16_C( 14723), INT16_C( 14723), INT16_C( 0), -INT16_C( 21102), -INT16_C( 20505), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 16463), INT16_C( 0), INT16_C( 0), INT16_C( 5430) } }, { UINT16_C(34278), { -INT16_C( 26477), INT16_C( 31300), -INT16_C( 3001), -INT16_C( 20161), -INT16_C( 15510), INT16_C( 8828), -INT16_C( 9654), INT16_C( 5798), INT16_C( 24774), INT16_C( 4227), -INT16_C( 7956), INT16_C( 1563), -INT16_C( 24890), -INT16_C( 961), INT16_C( 9652), INT16_C( 18306) }, { -INT16_C( 14659), INT16_C( 1473), INT16_C( 186), INT16_C( 9654), INT16_C( 12995), INT16_C( 3399), -INT16_C( 4852), -INT16_C( 11741), -INT16_C( 22963), INT16_C( 14818), -INT16_C( 634), INT16_C( 19775), INT16_C( 32412), INT16_C( 20553), -INT16_C( 13404), INT16_C( 24983) }, { INT16_C( 0), INT16_C( 12995), -INT16_C( 11741), INT16_C( 0), INT16_C( 0), INT16_C( 32412), -INT16_C( 634), -INT16_C( 4852), -INT16_C( 4852), INT16_C( 0), INT16_C( 32412), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 186) } }, { UINT16_C(22674), { INT16_C( 19558), INT16_C( 7512), INT16_C( 7281), -INT16_C( 18353), INT16_C( 23593), INT16_C( 19878), -INT16_C( 3282), INT16_C( 4595), INT16_C( 31277), INT16_C( 27662), -INT16_C( 21817), INT16_C( 4331), -INT16_C( 28678), -INT16_C( 28196), INT16_C( 28400), INT16_C( 22505) }, { INT16_C( 17082), INT16_C( 11380), -INT16_C( 15522), -INT16_C( 30748), -INT16_C( 30177), INT16_C( 20180), -INT16_C( 14210), -INT16_C( 21665), INT16_C( 27970), INT16_C( 2327), INT16_C( 536), INT16_C( 4633), -INT16_C( 2671), -INT16_C( 32092), -INT16_C( 29341), INT16_C( 7897) }, { INT16_C( 0), INT16_C( 27970), INT16_C( 0), INT16_C( 0), INT16_C( 2327), INT16_C( 0), INT16_C( 0), -INT16_C( 30748), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 4633), INT16_C( 536), INT16_C( 0), INT16_C( 17082), INT16_C( 0) } }, { UINT16_C(19919), { INT16_C( 11594), INT16_C( 11792), INT16_C( 12469), -INT16_C( 30279), INT16_C( 14206), -INT16_C( 8879), -INT16_C( 27678), -INT16_C( 1718), INT16_C( 25244), -INT16_C( 18692), -INT16_C( 29323), INT16_C( 6571), INT16_C( 3855), -INT16_C( 5978), INT16_C( 30253), INT16_C( 30517) }, { INT16_C( 18083), INT16_C( 22693), INT16_C( 24182), -INT16_C( 2846), INT16_C( 13205), INT16_C( 30673), INT16_C( 7111), INT16_C( 25457), INT16_C( 28030), -INT16_C( 3303), -INT16_C( 14854), INT16_C( 2572), -INT16_C( 19756), INT16_C( 498), INT16_C( 10280), -INT16_C( 13192) }, { -INT16_C( 14854), INT16_C( 18083), INT16_C( 30673), -INT16_C( 3303), INT16_C( 0), INT16_C( 0), INT16_C( 24182), -INT16_C( 14854), -INT16_C( 19756), INT16_C( 0), INT16_C( 30673), INT16_C( 2572), INT16_C( 0), INT16_C( 0), INT16_C( 498), INT16_C( 0) } }, { UINT16_C( 7534), { -INT16_C( 7132), INT16_C( 1660), INT16_C( 4568), -INT16_C( 22214), INT16_C( 393), -INT16_C( 1340), INT16_C( 16996), INT16_C( 32359), INT16_C( 24885), INT16_C( 16707), INT16_C( 5995), INT16_C( 24308), INT16_C( 7192), -INT16_C( 28538), -INT16_C( 2840), INT16_C( 3501) }, { INT16_C( 10712), -INT16_C( 20461), INT16_C( 19771), -INT16_C( 15271), INT16_C( 7502), -INT16_C( 19522), INT16_C( 9568), -INT16_C( 27343), INT16_C( 29830), -INT16_C( 3369), -INT16_C( 13429), -INT16_C( 23728), -INT16_C( 10521), -INT16_C( 12237), -INT16_C( 7990), -INT16_C( 23843) }, { INT16_C( 0), -INT16_C( 10521), INT16_C( 29830), -INT16_C( 13429), INT16_C( 0), INT16_C( 7502), INT16_C( 7502), INT16_C( 0), -INT16_C( 19522), INT16_C( 0), -INT16_C( 23728), INT16_C( 7502), INT16_C( 29830), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { UINT16_C(61450), { INT16_C( 17746), -INT16_C( 21698), -INT16_C( 29687), -INT16_C( 14392), INT16_C( 10303), INT16_C( 28908), INT16_C( 29374), -INT16_C( 27164), INT16_C( 28516), -INT16_C( 19360), INT16_C( 18194), INT16_C( 17802), INT16_C( 21527), -INT16_C( 3034), INT16_C( 12534), INT16_C( 18661) }, { INT16_C( 9077), INT16_C( 32499), -INT16_C( 17233), -INT16_C( 4283), INT16_C( 12772), -INT16_C( 23969), INT16_C( 17571), INT16_C( 2103), -INT16_C( 26701), -INT16_C( 14660), INT16_C( 18399), -INT16_C( 2549), INT16_C( 12699), -INT16_C( 27925), -INT16_C( 12191), -INT16_C( 10534) }, { INT16_C( 0), -INT16_C( 12191), INT16_C( 0), -INT16_C( 26701), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 2103), INT16_C( 17571), INT16_C( 17571), -INT16_C( 23969) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i idx = simde_mm256_loadu_epi16(test_vec[i].idx); simde__m256i a = simde_mm256_loadu_epi16(test_vec[i].a); simde__m256i r = simde_mm256_maskz_permutexvar_epi16(test_vec[i].k, idx, a); simde_test_x86_assert_equal_i16x16(r, simde_mm256_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask16 k = simde_test_x86_random_mmask16(); simde__m256i idx = simde_test_x86_random_i16x16(); simde__m256i a = simde_test_x86_random_i16x16(); simde__m256i r = simde_mm256_maskz_permutexvar_epi16(k, idx, a); simde_test_x86_write_mmask16(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x16(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x16(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_permutexvar_epi32(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int32_t idx[8]; const int32_t a[8]; const int32_t r[8]; } test_vec[] = { { { -INT32_C( 1307444235), -INT32_C( 1706430584), -INT32_C( 604486448), -INT32_C( 1741113544), -INT32_C( 524345514), INT32_C( 1817309466), -INT32_C( 1187810749), INT32_C( 1039629383) }, { -INT32_C( 1111689), -INT32_C( 929482504), -INT32_C( 1264348804), -INT32_C( 1622287543), INT32_C( 343870714), INT32_C( 1082183933), -INT32_C( 1963281597), -INT32_C( 1496846033) }, { INT32_C( 1082183933), -INT32_C( 1111689), -INT32_C( 1111689), -INT32_C( 1111689), -INT32_C( 1963281597), -INT32_C( 1264348804), -INT32_C( 1622287543), -INT32_C( 1496846033) } }, { { -INT32_C( 223954950), INT32_C( 1824145391), INT32_C( 438328785), INT32_C( 867790137), INT32_C( 2001222010), INT32_C( 1287178249), -INT32_C( 1411927428), -INT32_C( 1638752605) }, { INT32_C( 1167128661), INT32_C( 145836855), -INT32_C( 517811800), -INT32_C( 1189749697), INT32_C( 506486037), -INT32_C( 1586763739), INT32_C( 1045250714), INT32_C( 920428512) }, { -INT32_C( 517811800), INT32_C( 920428512), INT32_C( 145836855), INT32_C( 145836855), -INT32_C( 517811800), INT32_C( 145836855), INT32_C( 506486037), -INT32_C( 1189749697) } }, { { -INT32_C( 830772073), INT32_C( 1624714423), INT32_C( 1010956797), -INT32_C( 353020203), -INT32_C( 653711693), -INT32_C( 1451592690), -INT32_C( 1763194954), -INT32_C( 36912282) }, { -INT32_C( 406042833), INT32_C( 1900520307), INT32_C( 1923975324), -INT32_C( 1839422497), -INT32_C( 680827447), -INT32_C( 1887377703), INT32_C( 338061229), INT32_C( 1511125546) }, { INT32_C( 1511125546), INT32_C( 1511125546), -INT32_C( 1887377703), -INT32_C( 1887377703), -INT32_C( 1839422497), INT32_C( 338061229), INT32_C( 338061229), INT32_C( 338061229) } }, { { -INT32_C( 1388192454), INT32_C( 488540288), -INT32_C( 275788784), INT32_C( 948104047), INT32_C( 705686865), -INT32_C( 2118479661), INT32_C( 580247799), INT32_C( 209495762) }, { INT32_C( 79347076), INT32_C( 1428281413), INT32_C( 323334308), -INT32_C( 313800804), -INT32_C( 2011735116), -INT32_C( 502672917), -INT32_C( 2080072015), -INT32_C( 930054076) }, { INT32_C( 323334308), INT32_C( 79347076), INT32_C( 79347076), -INT32_C( 930054076), INT32_C( 1428281413), -INT32_C( 313800804), -INT32_C( 930054076), INT32_C( 323334308) } }, { { -INT32_C( 2100475331), -INT32_C( 942084573), INT32_C( 1004215711), -INT32_C( 1742199068), INT32_C( 1814052737), -INT32_C( 1034999535), INT32_C( 205935559), INT32_C( 299161556) }, { INT32_C( 1150591265), INT32_C( 789343376), INT32_C( 1835722633), -INT32_C( 1895460339), -INT32_C( 486857007), INT32_C( 396708431), INT32_C( 1914956702), -INT32_C( 461113406) }, { INT32_C( 396708431), -INT32_C( 1895460339), -INT32_C( 461113406), -INT32_C( 486857007), INT32_C( 789343376), INT32_C( 789343376), -INT32_C( 461113406), -INT32_C( 486857007) } }, { { INT32_C( 690493337), INT32_C( 207107203), INT32_C( 695845403), INT32_C( 632848212), -INT32_C( 200756315), -INT32_C( 1676956162), INT32_C( 1527656088), -INT32_C( 1086353114) }, { INT32_C( 753428393), -INT32_C( 1220984676), INT32_C( 1457566210), -INT32_C( 696543183), INT32_C( 1254851404), -INT32_C( 907618768), INT32_C( 723842053), INT32_C( 787112837) }, { -INT32_C( 1220984676), -INT32_C( 696543183), -INT32_C( 696543183), INT32_C( 1254851404), -INT32_C( 907618768), INT32_C( 723842053), INT32_C( 753428393), INT32_C( 723842053) } }, { { INT32_C( 1717293770), INT32_C( 337548306), INT32_C( 2020277830), -INT32_C( 481368681), -INT32_C( 1725097623), -INT32_C( 178121744), -INT32_C( 1944025593), -INT32_C( 1262810391) }, { -INT32_C( 300280100), -INT32_C( 268289878), -INT32_C( 832017353), -INT32_C( 1162758319), -INT32_C( 1068179760), -INT32_C( 122308879), INT32_C( 646239549), -INT32_C( 1143324705) }, { -INT32_C( 832017353), -INT32_C( 832017353), INT32_C( 646239549), -INT32_C( 1143324705), -INT32_C( 268289878), -INT32_C( 300280100), -INT32_C( 1143324705), -INT32_C( 268289878) } }, { { -INT32_C( 5638827), INT32_C( 1693494061), INT32_C( 1764907031), -INT32_C( 534519025), -INT32_C( 1298106431), INT32_C( 1806325294), INT32_C( 194064171), -INT32_C( 993629074) }, { -INT32_C( 1899794335), INT32_C( 871609115), INT32_C( 463217932), -INT32_C( 889471223), INT32_C( 1702730807), INT32_C( 500180978), -INT32_C( 987209386), INT32_C( 780791757) }, { INT32_C( 500180978), INT32_C( 500180978), INT32_C( 780791757), INT32_C( 780791757), INT32_C( 871609115), -INT32_C( 987209386), -INT32_C( 889471223), -INT32_C( 987209386) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i idx = simde_mm256_loadu_epi32(test_vec[i].idx); simde__m256i a = simde_mm256_loadu_epi32(test_vec[i].a); simde__m256i r = simde_mm256_permutexvar_epi32(idx, a); simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i idx = simde_test_x86_random_i32x8(); simde__m256i a = simde_test_x86_random_i32x8(); simde__m256i r = simde_mm256_permutexvar_epi32(idx, a); simde_test_x86_write_i32x8(2, idx, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask_permutexvar_epi32(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int32_t src[8]; const simde__mmask8 k; const int32_t idx[8]; const int32_t a[8]; const int32_t r[8]; } test_vec[] = { { { INT32_C( 2059226207), INT32_C( 212709376), -INT32_C( 567850539), INT32_C( 1084826377), -INT32_C( 1314576705), -INT32_C( 1529973427), -INT32_C( 1536559146), INT32_C( 1171452646) }, UINT8_C( 62), { INT32_C( 1061076879), -INT32_C( 1240118675), -INT32_C( 1782582414), -INT32_C( 1034616932), INT32_C( 420480420), -INT32_C( 890194733), INT32_C( 263230493), -INT32_C( 162663066) }, { INT32_C( 573934773), INT32_C( 1222134486), -INT32_C( 623011778), INT32_C( 1016934807), INT32_C( 173387062), INT32_C( 2127840609), INT32_C( 1083016665), INT32_C( 792124026) }, { INT32_C( 2059226207), INT32_C( 2127840609), -INT32_C( 623011778), INT32_C( 173387062), INT32_C( 173387062), INT32_C( 1016934807), -INT32_C( 1536559146), INT32_C( 1171452646) } }, { { INT32_C( 1028746086), -INT32_C( 192599370), INT32_C( 1506698178), -INT32_C( 879400044), INT32_C( 2044062488), INT32_C( 184002864), -INT32_C( 1454734290), -INT32_C( 975667105) }, UINT8_C(235), { INT32_C( 1419837994), -INT32_C( 350841464), -INT32_C( 813731996), -INT32_C( 253212155), -INT32_C( 920624865), -INT32_C( 571004327), -INT32_C( 197353356), -INT32_C( 1545600391) }, { -INT32_C( 1929936636), INT32_C( 2071399703), -INT32_C( 2125793412), INT32_C( 1634808385), -INT32_C( 315977068), INT32_C( 835330492), INT32_C( 992282562), INT32_C( 232654089) }, { -INT32_C( 2125793412), -INT32_C( 1929936636), INT32_C( 1506698178), INT32_C( 835330492), INT32_C( 2044062488), INT32_C( 2071399703), -INT32_C( 315977068), INT32_C( 2071399703) } }, { { -INT32_C( 1634019961), INT32_C( 1578701282), INT32_C( 1256219400), INT32_C( 715870614), -INT32_C( 1609050653), -INT32_C( 1194204682), -INT32_C( 235669784), -INT32_C( 2080386564) }, UINT8_C(166), { -INT32_C( 1433919079), -INT32_C( 1649154246), INT32_C( 406060487), INT32_C( 2113691048), INT32_C( 1450417268), INT32_C( 1665018989), -INT32_C( 245420001), -INT32_C( 929570257) }, { INT32_C( 1030889475), -INT32_C( 824498937), -INT32_C( 890892766), -INT32_C( 532159892), -INT32_C( 348734594), INT32_C( 105870823), -INT32_C( 721965403), -INT32_C( 1801678959) }, { -INT32_C( 1634019961), -INT32_C( 890892766), -INT32_C( 1801678959), INT32_C( 715870614), -INT32_C( 1609050653), INT32_C( 105870823), -INT32_C( 235669784), -INT32_C( 1801678959) } }, { { -INT32_C( 1227747665), INT32_C( 1451535668), INT32_C( 656501435), -INT32_C( 888641459), INT32_C( 179781155), INT32_C( 1477510835), INT32_C( 1160513716), INT32_C( 1188743319) }, UINT8_C(215), { INT32_C( 1493957804), -INT32_C( 350985856), -INT32_C( 348636030), -INT32_C( 2113010876), -INT32_C( 1070196294), INT32_C( 846564906), -INT32_C( 2083865925), INT32_C( 1079644308) }, { -INT32_C( 1919326963), INT32_C( 1232645831), INT32_C( 775205098), INT32_C( 1857045428), -INT32_C( 2043681188), INT32_C( 817407093), -INT32_C( 206339490), -INT32_C( 1607266669) }, { -INT32_C( 2043681188), -INT32_C( 1919326963), INT32_C( 775205098), -INT32_C( 888641459), INT32_C( 775205098), INT32_C( 1477510835), INT32_C( 1857045428), -INT32_C( 2043681188) } }, { { INT32_C( 976080243), INT32_C( 1703192187), INT32_C( 177453142), INT32_C( 1467565051), -INT32_C( 1612863446), -INT32_C( 1429236148), -INT32_C( 1415740904), INT32_C( 72077712) }, UINT8_C(158), { INT32_C( 521748089), INT32_C( 2071297730), INT32_C( 1417052177), -INT32_C( 1585524999), INT32_C( 1106058667), INT32_C( 1868208108), INT32_C( 117376309), -INT32_C( 895220911) }, { INT32_C( 82427202), INT32_C( 1283415611), -INT32_C( 677316898), INT32_C( 1870143428), INT32_C( 699491644), INT32_C( 848825341), INT32_C( 1631098640), -INT32_C( 584328037) }, { INT32_C( 976080243), -INT32_C( 677316898), INT32_C( 1283415611), INT32_C( 1283415611), INT32_C( 1870143428), -INT32_C( 1429236148), -INT32_C( 1415740904), INT32_C( 1283415611) } }, { { -INT32_C( 723446631), INT32_C( 1361076595), INT32_C( 455721047), INT32_C( 478847455), INT32_C( 71645959), INT32_C( 1463213382), INT32_C( 263745140), -INT32_C( 471014326) }, UINT8_C(248), { INT32_C( 795588558), -INT32_C( 1752777257), -INT32_C( 2022202906), INT32_C( 1737397035), -INT32_C( 1246915880), INT32_C( 908657864), -INT32_C( 1585432131), -INT32_C( 208051419) }, { -INT32_C( 249428710), -INT32_C( 1467373374), INT32_C( 1966014537), INT32_C( 1809628819), INT32_C( 421562704), INT32_C( 1263487374), -INT32_C( 1460875134), INT32_C( 1318815027) }, { -INT32_C( 723446631), INT32_C( 1361076595), INT32_C( 455721047), INT32_C( 1809628819), -INT32_C( 249428710), -INT32_C( 249428710), INT32_C( 1263487374), INT32_C( 1263487374) } }, { { INT32_C( 1279245962), -INT32_C( 1326135194), INT32_C( 1529160904), INT32_C( 851837410), INT32_C( 407627402), -INT32_C( 1302095056), -INT32_C( 1638248597), INT32_C( 1609365205) }, UINT8_C(180), { -INT32_C( 199578580), -INT32_C( 994194784), -INT32_C( 257550097), -INT32_C( 981739041), -INT32_C( 1074425052), INT32_C( 1177200887), -INT32_C( 132396798), -INT32_C( 508789835) }, { -INT32_C( 942225625), -INT32_C( 2121493615), -INT32_C( 1972292949), INT32_C( 810609675), INT32_C( 2012169600), -INT32_C( 256042259), -INT32_C( 1729570333), INT32_C( 2071631188) }, { INT32_C( 1279245962), -INT32_C( 1326135194), INT32_C( 2071631188), INT32_C( 851837410), INT32_C( 2012169600), INT32_C( 2071631188), -INT32_C( 1638248597), -INT32_C( 256042259) } }, { { -INT32_C( 314421156), -INT32_C( 1905340701), INT32_C( 203022337), INT32_C( 1279027660), -INT32_C( 1664930642), INT32_C( 697073990), -INT32_C( 1363053478), INT32_C( 1697200905) }, UINT8_C(139), { INT32_C( 980308843), -INT32_C( 1589903935), INT32_C( 2137933590), -INT32_C( 1339180412), -INT32_C( 603778), -INT32_C( 900128939), INT32_C( 483592160), -INT32_C( 1683539664) }, { INT32_C( 1305810572), INT32_C( 703533075), -INT32_C( 592945832), -INT32_C( 1785932521), -INT32_C( 191593825), -INT32_C( 2118193759), INT32_C( 614306548), INT32_C( 1472218571) }, { -INT32_C( 1785932521), INT32_C( 703533075), INT32_C( 203022337), -INT32_C( 191593825), -INT32_C( 1664930642), INT32_C( 697073990), -INT32_C( 1363053478), INT32_C( 1305810572) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i src = simde_mm256_loadu_epi32(test_vec[i].src); simde__m256i idx = simde_mm256_loadu_epi32(test_vec[i].idx); simde__m256i a = simde_mm256_loadu_epi32(test_vec[i].a); simde__m256i r = simde_mm256_mask_permutexvar_epi32(src, test_vec[i].k, idx, a); simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i src = simde_test_x86_random_i32x8(); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256i idx = simde_test_x86_random_i32x8(); simde__m256i a = simde_test_x86_random_i32x8(); simde__m256i r = simde_mm256_mask_permutexvar_epi32(src, k, idx, a); simde_test_x86_write_i32x8(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_maskz_permutexvar_epi32(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask8 k; const int32_t idx[8]; const int32_t a[8]; const int32_t r[8]; } test_vec[] = { { UINT8_C( 91), { -INT32_C( 1502698347), -INT32_C( 251750509), INT32_C( 336124479), -INT32_C( 390881946), INT32_C( 529115186), -INT32_C( 132904346), -INT32_C( 305973080), -INT32_C( 1907877128) }, { INT32_C( 1379186367), -INT32_C( 1941753267), INT32_C( 1923107596), INT32_C( 442127592), INT32_C( 1648026620), -INT32_C( 1789178131), INT32_C( 2139233926), -INT32_C( 133313991) }, { -INT32_C( 1789178131), INT32_C( 442127592), INT32_C( 0), INT32_C( 2139233926), INT32_C( 1923107596), INT32_C( 0), INT32_C( 1379186367), INT32_C( 0) } }, { UINT8_C(128), { INT32_C( 1942833729), -INT32_C( 662742643), INT32_C( 1321333242), INT32_C( 776657739), INT32_C( 1662757909), INT32_C( 652914951), -INT32_C( 27301581), -INT32_C( 1216456842) }, { INT32_C( 791301281), -INT32_C( 1626887771), -INT32_C( 437401446), -INT32_C( 1189922908), -INT32_C( 350408988), INT32_C( 319883231), -INT32_C( 435064720), INT32_C( 1771933639) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 435064720) } }, { UINT8_C(219), { INT32_C( 1887541447), INT32_C( 1745494175), INT32_C( 1158475534), INT32_C( 808043778), -INT32_C( 384821790), -INT32_C( 1755765978), -INT32_C( 1017233612), -INT32_C( 1549809700) }, { -INT32_C( 15523745), INT32_C( 1315380544), INT32_C( 244609804), INT32_C( 457162040), -INT32_C( 117157934), -INT32_C( 1500488078), INT32_C( 2053762717), INT32_C( 354224310) }, { INT32_C( 354224310), INT32_C( 354224310), INT32_C( 0), INT32_C( 244609804), INT32_C( 244609804), INT32_C( 0), -INT32_C( 117157934), -INT32_C( 117157934) } }, { UINT8_C( 40), { INT32_C( 1315509297), -INT32_C( 295979141), INT32_C( 153577803), -INT32_C( 136625496), -INT32_C( 1536568250), INT32_C( 1396838244), -INT32_C( 2130068359), INT32_C( 178921177) }, { -INT32_C( 1369894093), INT32_C( 379434186), -INT32_C( 970996707), INT32_C( 1287518726), INT32_C( 871442383), -INT32_C( 1350159562), -INT32_C( 936275985), -INT32_C( 506209362) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 1369894093), INT32_C( 0), INT32_C( 871442383), INT32_C( 0), INT32_C( 0) } }, { UINT8_C(238), { -INT32_C( 524775636), -INT32_C( 251802068), -INT32_C( 386481171), -INT32_C( 1464384639), INT32_C( 1742662196), INT32_C( 5672561), -INT32_C( 1699864897), INT32_C( 495489009) }, { INT32_C( 1274888223), -INT32_C( 63112433), INT32_C( 1071919806), -INT32_C( 1444439179), -INT32_C( 149895546), INT32_C( 334980692), INT32_C( 1991091845), INT32_C( 1418999093) }, { INT32_C( 0), -INT32_C( 149895546), INT32_C( 334980692), -INT32_C( 63112433), INT32_C( 0), -INT32_C( 63112433), INT32_C( 1418999093), -INT32_C( 63112433) } }, { UINT8_C(117), { -INT32_C( 1937465199), INT32_C( 239829468), INT32_C( 25463397), INT32_C( 948383090), -INT32_C( 1534296514), INT32_C( 455712629), -INT32_C( 2125356980), -INT32_C( 973691596) }, { INT32_C( 559053637), INT32_C( 1630576124), -INT32_C( 1721584601), INT32_C( 533850593), -INT32_C( 574333593), INT32_C( 1224273404), -INT32_C( 1043773043), INT32_C( 881311983) }, { INT32_C( 1630576124), INT32_C( 0), INT32_C( 1224273404), INT32_C( 0), -INT32_C( 1043773043), INT32_C( 1224273404), -INT32_C( 574333593), INT32_C( 0) } }, { UINT8_C( 59), { INT32_C( 1983338201), INT32_C( 983406726), -INT32_C( 467978245), INT32_C( 1716271880), -INT32_C( 329111041), INT32_C( 1803201313), INT32_C( 878328692), -INT32_C( 1687187774) }, { INT32_C( 1779541732), INT32_C( 983871295), -INT32_C( 299974426), -INT32_C( 95130885), -INT32_C( 1243170925), -INT32_C( 702521246), INT32_C( 1577810588), -INT32_C( 302351864) }, { INT32_C( 983871295), INT32_C( 1577810588), INT32_C( 0), INT32_C( 1779541732), -INT32_C( 302351864), INT32_C( 983871295), INT32_C( 0), INT32_C( 0) } }, { UINT8_C( 33), { -INT32_C( 1168091381), -INT32_C( 1130325252), INT32_C( 599232440), -INT32_C( 1699302941), -INT32_C( 117675113), INT32_C( 93639563), INT32_C( 1477374686), -INT32_C( 126223380) }, { INT32_C( 1320343890), INT32_C( 738874227), -INT32_C( 984628766), INT32_C( 174065010), -INT32_C( 66954127), INT32_C( 218273327), INT32_C( 1969623176), INT32_C( 1567481611) }, { INT32_C( 174065010), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 174065010), INT32_C( 0), INT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i idx = simde_mm256_loadu_epi32(test_vec[i].idx); simde__m256i a = simde_mm256_loadu_epi32(test_vec[i].a); simde__m256i r = simde_mm256_maskz_permutexvar_epi32(test_vec[i].k, idx, a); simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256i idx = simde_test_x86_random_i32x8(); simde__m256i a = simde_test_x86_random_i32x8(); simde__m256i r = simde_mm256_maskz_permutexvar_epi32(k, idx, a); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_permutexvar_epi64(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const int64_t idx[4]; const int64_t a[4]; const int64_t r[4]; } test_vec[8] = { { { INT64_C( 3465447535646586345), INT64_C( 3112574604386585021), -INT64_C( 3298717360923514682), INT64_C( 5323359623116614530) }, { INT64_C( 5940711443930208020), -INT64_C( 6109672874813763858), INT64_C( 1105777252857738108), INT64_C( 5770626510813846893) }, { -INT64_C( 6109672874813763858), -INT64_C( 6109672874813763858), INT64_C( 1105777252857738108), INT64_C( 1105777252857738108) } }, { { INT64_C( 3900401264022889273), -INT64_C( 7755349089616591920), -INT64_C( 7492176496441054870), INT64_C( 2727909151360780622) }, { INT64_C( 4511785965612985374), -INT64_C( 4628483991183106535), -INT64_C( 7007675447010468101), -INT64_C( 4803647970284657595) }, { -INT64_C( 4628483991183106535), INT64_C( 4511785965612985374), -INT64_C( 7007675447010468101), -INT64_C( 7007675447010468101) } }, { { INT64_C( 8654722626887043695), INT64_C( 6607463866478169758), INT64_C( 7654221770983106635), -INT64_C( 3115662575998901914) }, { INT64_C( 4977548128634988091), INT64_C( 8473499790535605467), -INT64_C( 4403595353433040557), -INT64_C( 6060566984611365205) }, { -INT64_C( 6060566984611365205), -INT64_C( 4403595353433040557), -INT64_C( 6060566984611365205), -INT64_C( 4403595353433040557) } }, { { INT64_C( 4565342033187083888), -INT64_C( 3263400542231358442), -INT64_C( 4951375666792905173), INT64_C( 777064012164305068) }, { INT64_C( 9217034901363166790), -INT64_C( 6745700139769306096), -INT64_C( 8716872267551101636), -INT64_C( 4096186034266254718) }, { INT64_C( 9217034901363166790), -INT64_C( 8716872267551101636), -INT64_C( 4096186034266254718), INT64_C( 9217034901363166790) } }, { { -INT64_C( 3070791090430719878), -INT64_C( 3181573684470668865), -INT64_C( 732355612710600332), INT64_C( 7007947256713345784) }, { -INT64_C( 3666591945649245566), -INT64_C( 4813825418525965625), INT64_C( 7945724640348623068), INT64_C( 1415301886787475293) }, { INT64_C( 7945724640348623068), INT64_C( 1415301886787475293), -INT64_C( 3666591945649245566), -INT64_C( 3666591945649245566) } }, { { INT64_C( 6248314088218945755), INT64_C( 8070704056713394682), INT64_C( 5411923700867861558), INT64_C( 648293139388236634) }, { -INT64_C( 3746810366719337956), -INT64_C( 3358542259622529696), -INT64_C( 9127219581637745893), INT64_C( 3734164790968116381) }, { INT64_C( 3734164790968116381), -INT64_C( 9127219581637745893), -INT64_C( 9127219581637745893), -INT64_C( 9127219581637745893) } }, { { -INT64_C( 3862999505957895227), -INT64_C( 7205721465562434925), -INT64_C( 5265557781999092155), -INT64_C( 8490883141550974497) }, { INT64_C( 8811721370948929557), INT64_C( 7932431214771177463), -INT64_C( 7640982706706597081), -INT64_C( 2563191643271564001) }, { INT64_C( 7932431214771177463), -INT64_C( 2563191643271564001), INT64_C( 7932431214771177463), -INT64_C( 2563191643271564001) } }, { { INT64_C( 801258192282292158), -INT64_C( 7619357814459938901), INT64_C( 117599237026749525), -INT64_C( 6780922528876188351) }, { -INT64_C( 8070339593422618399), INT64_C( 8372656936451409195), INT64_C( 1487915019302602691), -INT64_C( 4696126461883675825) }, { INT64_C( 1487915019302602691), -INT64_C( 4696126461883675825), INT64_C( 8372656936451409195), INT64_C( 8372656936451409195) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i idx = simde_mm256_loadu_epi64(test_vec[i].idx); simde__m256i a = simde_mm256_loadu_epi64(test_vec[i].a); simde__m256i r = simde_mm256_permutexvar_epi64(idx, a); simde_test_x86_assert_equal_i64x4(r, simde_mm256_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i idx = simde_test_x86_random_i64x4(); simde__m256i a = simde_test_x86_random_i64x4(); simde__m256i r = simde_mm256_permutexvar_epi64(idx, a); simde_test_x86_write_i64x4(2, idx, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask_permutexvar_epi64(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const int64_t src[4]; const simde__mmask8 k; const int64_t idx[4]; const int64_t a[4]; const int64_t r[4]; } test_vec[8] = { { { -INT64_C( 6145245807815484862), -INT64_C( 8661416298178864072), -INT64_C( 4727139763093681026), -INT64_C( 4181093611599723938) }, UINT8_C(224), { -INT64_C( 4568891220371182420), INT64_C( 595615055643734095), INT64_C( 9037376429611259449), INT64_C( 5533734961594607142) }, { -INT64_C( 9000649239165130061), -INT64_C( 2163542389232657355), -INT64_C( 3775044921414815709), INT64_C( 1723614651087298047) }, { -INT64_C( 6145245807815484862), -INT64_C( 8661416298178864072), -INT64_C( 4727139763093681026), -INT64_C( 4181093611599723938) } }, { { INT64_C( 1325331378370156717), INT64_C( 6821541915016801569), -INT64_C( 1162463134921942931), -INT64_C( 4309247653774977609) }, UINT8_C( 18), { -INT64_C( 5312276096657414752), INT64_C( 6011548742202757124), -INT64_C( 892782143459852084), -INT64_C( 8552951516681816127) }, { INT64_C( 4698651055233121396), INT64_C( 4617508433955005702), INT64_C( 5512619848525830191), -INT64_C( 3535935548222305414) }, { INT64_C( 1325331378370156717), INT64_C( 4698651055233121396), -INT64_C( 1162463134921942931), -INT64_C( 4309247653774977609) } }, { { -INT64_C( 7453033442925831150), -INT64_C( 5594756606993968218), -INT64_C( 4711116760529534522), INT64_C( 969134818784781899) }, UINT8_C(170), { -INT64_C( 4474372485495890496), -INT64_C( 2349483318866410961), INT64_C( 8737231027810055477), INT64_C( 1782442867293545731) }, { -INT64_C( 248188577289471777), -INT64_C( 251639970792811965), -INT64_C( 8538770441765804877), INT64_C( 6307104878424159510) }, { -INT64_C( 7453033442925831150), INT64_C( 6307104878424159510), -INT64_C( 4711116760529534522), INT64_C( 6307104878424159510) } }, { { INT64_C( 7285050831330992463), INT64_C( 7226457372175610757), -INT64_C( 8990332279521068244), INT64_C( 8913038368937531105) }, UINT8_C( 90), { INT64_C( 7210230780587186390), -INT64_C( 2261517686396330775), INT64_C( 7039918466464043237), -INT64_C( 7812045795274912574) }, { -INT64_C( 2470739017216552287), INT64_C( 3030337906147249031), -INT64_C( 1429421007458051524), INT64_C( 7064387224332711865) }, { INT64_C( 7285050831330992463), INT64_C( 3030337906147249031), -INT64_C( 8990332279521068244), -INT64_C( 1429421007458051524) } }, { { -INT64_C( 2764529222298622256), INT64_C( 1847905064078209334), INT64_C( 541849563344969838), INT64_C( 4315441763754880448) }, UINT8_C(173), { -INT64_C( 7765937887782174569), -INT64_C( 405453318237604642), INT64_C( 3041144148134000746), INT64_C( 3459838291440154062) }, { -INT64_C( 5185776384920649007), INT64_C( 7886938757980059936), -INT64_C( 2618818936096834303), -INT64_C( 2408243768127898165) }, { -INT64_C( 2408243768127898165), INT64_C( 1847905064078209334), -INT64_C( 2618818936096834303), -INT64_C( 2618818936096834303) } }, { { INT64_C( 5964835402527458869), INT64_C( 8882295261415371219), INT64_C( 569038202891478525), INT64_C( 9208453081059759838) }, UINT8_C(197), { -INT64_C( 4086373076156576228), -INT64_C( 8479226529983684331), INT64_C( 6427988257266884277), INT64_C( 503315643414814520) }, { INT64_C( 6038701740289553584), INT64_C( 9212390983602867944), -INT64_C( 6631821568448815526), INT64_C( 7861269252833712681) }, { INT64_C( 6038701740289553584), INT64_C( 8882295261415371219), INT64_C( 9212390983602867944), INT64_C( 9208453081059759838) } }, { { INT64_C( 4997660079327253962), INT64_C( 6600424377078187087), -INT64_C( 6838733108989563127), -INT64_C( 7875444692090556021) }, UINT8_C( 98), { -INT64_C( 452606310463071704), -INT64_C( 2662941931776388457), -INT64_C( 8662122382630879411), -INT64_C( 1308328928534916951) }, { INT64_C( 3632450635218458235), INT64_C( 299560531238675169), INT64_C( 1863939872889404352), -INT64_C( 8516757631211197156) }, { INT64_C( 4997660079327253962), -INT64_C( 8516757631211197156), -INT64_C( 6838733108989563127), -INT64_C( 7875444692090556021) } }, { { INT64_C( 1426060201470534909), INT64_C( 36178607548202948), -INT64_C( 1545239560821751292), -INT64_C( 9153397452835185311) }, UINT8_C(189), { -INT64_C( 2154968146796144399), -INT64_C( 2958827486221003933), INT64_C( 1085243208166040697), INT64_C( 7543269306760658521) }, { INT64_C( 428357227795750444), -INT64_C( 5813618019834719129), -INT64_C( 2573969351176751803), INT64_C( 1930493853078313706) }, { -INT64_C( 5813618019834719129), INT64_C( 36178607548202948), -INT64_C( 5813618019834719129), -INT64_C( 5813618019834719129) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i src = simde_mm256_loadu_epi64(test_vec[i].src); simde__m256i idx = simde_mm256_loadu_epi64(test_vec[i].idx); simde__m256i a = simde_mm256_loadu_epi64(test_vec[i].a); simde__m256i r = simde_mm256_mask_permutexvar_epi64(src, test_vec[i].k, idx, a); simde_test_x86_assert_equal_i64x4(r, simde_mm256_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i src = simde_test_x86_random_i64x4(); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256i idx = simde_test_x86_random_i64x4(); simde__m256i a = simde_test_x86_random_i64x4(); simde__m256i r = simde_mm256_mask_permutexvar_epi64(src, k, idx, a); simde_test_x86_write_i64x4(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x4(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_maskz_permutexvar_epi64(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde__mmask8 k; const int64_t idx[4]; const int64_t a[4]; const int64_t r[4]; } test_vec[8] = { { UINT8_C( 27), { INT64_C( 3369187414697126278), -INT64_C( 9018154180520100685), INT64_C( 3731480067352891761), INT64_C( 8739300992515910063) }, { INT64_C( 8839886327673718116), -INT64_C( 7430403753900109103), INT64_C( 8684416486551775225), INT64_C( 5176022328699823975) }, { INT64_C( 8684416486551775225), INT64_C( 5176022328699823975), INT64_C( 0), INT64_C( 5176022328699823975) } }, { UINT8_C(241), { -INT64_C( 8787030352415111512), INT64_C( 3900474958566747456), -INT64_C( 6962152737219906629), INT64_C( 4105534155763399707) }, { INT64_C( 2632500097026732431), INT64_C( 3924444501427107313), INT64_C( 466283003580020192), INT64_C( 5914923698406488653) }, { INT64_C( 2632500097026732431), INT64_C( 0), INT64_C( 0), INT64_C( 0) } }, { UINT8_C(184), { INT64_C( 1369588916891178379), INT64_C( 3268422251213373709), -INT64_C( 6353981294143589132), INT64_C( 5903301870556524589) }, { INT64_C( 6269963180229888274), INT64_C( 5370593499786940835), -INT64_C( 1317405420199894686), -INT64_C( 979236192142679470) }, { INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 5370593499786940835) } }, { UINT8_C(150), { INT64_C( 8561405448582914762), INT64_C( 6168547978405161738), -INT64_C( 8946116566893352219), INT64_C( 2722238345616027939) }, { -INT64_C( 1544134480098121929), INT64_C( 6365483424972692813), -INT64_C( 8048574177363399832), INT64_C( 4334863096951648425) }, { INT64_C( 0), -INT64_C( 8048574177363399832), INT64_C( 6365483424972692813), INT64_C( 0) } }, { UINT8_C( 47), { INT64_C( 5181665057797199994), -INT64_C( 6178918623323952869), INT64_C( 527668101296622321), -INT64_C( 567936792988461464) }, { INT64_C( 4670650789240516000), -INT64_C( 8047833996201456538), INT64_C( 8629332360195303267), -INT64_C( 5379121953059189278) }, { INT64_C( 8629332360195303267), -INT64_C( 5379121953059189278), -INT64_C( 8047833996201456538), INT64_C( 4670650789240516000) } }, { UINT8_C(178), { -INT64_C( 7075410026506534931), INT64_C( 3237749231414113585), INT64_C( 3995733465180645381), INT64_C( 1823960239742767897) }, { INT64_C( 8138094958596007746), -INT64_C( 235900256028775481), -INT64_C( 4597330073135221615), -INT64_C( 4316226894482734893) }, { INT64_C( 0), -INT64_C( 235900256028775481), INT64_C( 0), INT64_C( 0) } }, { UINT8_C(213), { INT64_C( 3726312010818066332), -INT64_C( 1082228419914480768), -INT64_C( 6231197197802146125), INT64_C( 4339764287597479605) }, { INT64_C( 6071664643769323016), INT64_C( 4976962000991906853), -INT64_C( 5295530570217379409), -INT64_C( 6421479277348671995) }, { INT64_C( 6071664643769323016), INT64_C( 0), -INT64_C( 6421479277348671995), INT64_C( 0) } }, { UINT8_C(160), { INT64_C( 7283667468859782669), -INT64_C( 1086144558918060667), -INT64_C( 740096361840458443), INT64_C( 5742998000986513344) }, { INT64_C( 8380728246458525680), -INT64_C( 5134209665999944312), INT64_C( 1630908168763388005), INT64_C( 6996021448102459014) }, { INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i idx = simde_mm256_loadu_epi64(test_vec[i].idx); simde__m256i a = simde_mm256_loadu_epi64(test_vec[i].a); simde__m256i r = simde_mm256_maskz_permutexvar_epi64(test_vec[i].k, idx, a); simde_test_x86_assert_equal_i64x4(r, simde_mm256_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256i idx = simde_test_x86_random_i64x4(); simde__m256i a = simde_test_x86_random_i64x4(); simde__m256i r = simde_mm256_maskz_permutexvar_epi64(k, idx, a); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x4(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_permutexvar_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t idx[32]; const int8_t a[32]; const int8_t r[32]; } test_vec[] = { { { INT8_C( 68), -INT8_C( 19), INT8_C( 106), INT8_C( 42), INT8_C( 79), INT8_C( 38), INT8_C( 16), -INT8_C( 110), INT8_C( 30), -INT8_C( 123), -INT8_C( 25), -INT8_C( 91), -INT8_C( 106), INT8_C( 84), -INT8_C( 99), INT8_C( 86), INT8_C( 104), -INT8_C( 2), INT8_C( 83), INT8_C( 91), INT8_C( 103), INT8_C( 0), INT8_C( 29), -INT8_C( 11), -INT8_C( 51), -INT8_C( 11), INT8_C( 85), INT8_C( 85), INT8_C( 102), -INT8_C( 55), -INT8_C( 18), -INT8_C( 86) }, { -INT8_C( 74), INT8_C( 88), -INT8_C( 44), INT8_C( 5), INT8_C( 126), -INT8_C( 27), -INT8_C( 105), -INT8_C( 100), INT8_C( 106), INT8_MAX, INT8_C( 65), INT8_C( 0), -INT8_C( 45), -INT8_C( 33), INT8_C( 87), INT8_C( 59), -INT8_C( 35), -INT8_C( 86), -INT8_C( 106), INT8_C( 68), -INT8_C( 86), -INT8_C( 77), INT8_C( 57), INT8_C( 120), -INT8_C( 88), -INT8_C( 113), -INT8_C( 51), INT8_C( 14), INT8_C( 88), -INT8_C( 69), -INT8_C( 71), INT8_C( 14) }, { INT8_C( 126), -INT8_C( 33), INT8_C( 65), INT8_C( 65), INT8_C( 59), -INT8_C( 105), -INT8_C( 35), -INT8_C( 106), -INT8_C( 71), -INT8_C( 27), -INT8_C( 100), -INT8_C( 27), INT8_C( 57), -INT8_C( 86), -INT8_C( 69), INT8_C( 57), INT8_C( 106), -INT8_C( 71), INT8_C( 68), INT8_C( 14), -INT8_C( 100), -INT8_C( 74), -INT8_C( 69), -INT8_C( 77), -INT8_C( 33), -INT8_C( 77), -INT8_C( 77), -INT8_C( 77), -INT8_C( 105), INT8_MAX, INT8_C( 87), INT8_C( 65) } }, { { INT8_C( 19), -INT8_C( 115), INT8_C( 19), -INT8_C( 111), INT8_C( 114), -INT8_C( 86), INT8_C( 45), -INT8_C( 36), INT8_C( 41), INT8_C( 110), -INT8_C( 35), -INT8_C( 4), INT8_C( 77), INT8_C( 52), INT8_C( 55), INT8_C( 42), -INT8_C( 34), -INT8_C( 51), INT8_C( 111), -INT8_C( 120), INT8_MIN, -INT8_C( 88), INT8_C( 0), INT8_C( 40), INT8_C( 55), -INT8_C( 51), INT8_C( 55), -INT8_C( 113), -INT8_C( 120), -INT8_C( 16), -INT8_C( 99), -INT8_C( 101) }, { INT8_C( 125), -INT8_C( 80), INT8_C( 44), -INT8_C( 16), INT8_C( 91), INT8_C( 89), -INT8_C( 52), -INT8_C( 124), -INT8_C( 56), -INT8_C( 87), -INT8_C( 127), INT8_C( 21), -INT8_C( 35), -INT8_C( 72), INT8_C( 64), -INT8_C( 69), -INT8_C( 122), -INT8_C( 81), INT8_C( 68), INT8_C( 6), INT8_C( 87), INT8_C( 68), INT8_C( 47), -INT8_C( 113), INT8_C( 18), INT8_C( 102), INT8_C( 30), -INT8_C( 102), INT8_C( 86), -INT8_C( 68), INT8_C( 54), -INT8_C( 45) }, { INT8_C( 6), -INT8_C( 72), INT8_C( 6), -INT8_C( 81), INT8_C( 68), -INT8_C( 127), -INT8_C( 72), INT8_C( 86), -INT8_C( 87), INT8_C( 64), -INT8_C( 68), INT8_C( 86), -INT8_C( 72), INT8_C( 87), -INT8_C( 113), -INT8_C( 127), INT8_C( 54), -INT8_C( 72), -INT8_C( 69), -INT8_C( 56), INT8_C( 125), -INT8_C( 56), INT8_C( 125), -INT8_C( 56), -INT8_C( 113), -INT8_C( 72), -INT8_C( 113), -INT8_C( 69), -INT8_C( 56), -INT8_C( 122), -INT8_C( 68), -INT8_C( 102) } }, { { INT8_C( 108), INT8_C( 98), -INT8_C( 61), -INT8_C( 57), -INT8_C( 68), -INT8_C( 112), INT8_C( 76), -INT8_C( 124), INT8_C( 57), -INT8_C( 51), -INT8_C( 103), INT8_C( 23), -INT8_C( 123), -INT8_C( 39), -INT8_C( 46), INT8_C( 11), -INT8_C( 120), INT8_C( 22), INT8_C( 18), -INT8_C( 32), INT8_C( 91), INT8_C( 65), INT8_C( 111), INT8_C( 109), -INT8_C( 89), -INT8_C( 115), INT8_C( 7), -INT8_C( 3), INT8_C( 73), INT8_C( 61), -INT8_C( 48), -INT8_C( 74) }, { -INT8_C( 96), -INT8_C( 108), INT8_C( 125), INT8_C( 92), INT8_C( 36), -INT8_C( 55), -INT8_C( 32), INT8_C( 93), -INT8_C( 106), INT8_C( 121), INT8_C( 116), INT8_C( 28), INT8_C( 83), INT8_C( 71), INT8_C( 39), -INT8_C( 37), INT8_C( 93), INT8_C( 57), -INT8_C( 69), -INT8_C( 72), INT8_C( 122), INT8_C( 42), INT8_C( 37), INT8_C( 33), -INT8_C( 72), INT8_C( 45), INT8_C( 30), INT8_C( 1), INT8_C( 106), -INT8_C( 17), -INT8_C( 73), INT8_C( 10) }, { INT8_C( 83), INT8_C( 125), INT8_C( 92), INT8_C( 93), INT8_C( 106), INT8_C( 93), INT8_C( 83), INT8_C( 36), INT8_C( 45), INT8_C( 71), INT8_C( 45), INT8_C( 33), -INT8_C( 55), INT8_C( 45), -INT8_C( 69), INT8_C( 28), -INT8_C( 106), INT8_C( 37), -INT8_C( 69), -INT8_C( 96), INT8_C( 1), -INT8_C( 108), -INT8_C( 37), INT8_C( 71), INT8_C( 93), INT8_C( 71), INT8_C( 93), -INT8_C( 17), INT8_C( 121), -INT8_C( 17), INT8_C( 93), INT8_C( 37) } }, { { -INT8_C( 125), INT8_C( 53), INT8_C( 102), -INT8_C( 89), -INT8_C( 2), INT8_C( 70), INT8_C( 4), -INT8_C( 107), -INT8_C( 64), INT8_C( 121), -INT8_C( 79), INT8_C( 19), -INT8_C( 64), -INT8_C( 40), -INT8_C( 18), INT8_C( 29), INT8_C( 18), -INT8_C( 86), -INT8_C( 42), -INT8_C( 116), -INT8_C( 44), -INT8_C( 5), -INT8_C( 82), -INT8_C( 116), INT8_C( 40), -INT8_C( 52), -INT8_C( 114), -INT8_C( 109), -INT8_C( 69), INT8_C( 69), -INT8_C( 99), INT8_C( 62) }, { INT8_C( 122), INT8_C( 4), -INT8_C( 27), INT8_C( 121), INT8_C( 74), -INT8_C( 22), INT8_C( 14), INT8_C( 10), INT8_C( 99), -INT8_C( 65), INT8_C( 29), INT8_C( 35), -INT8_C( 105), INT8_C( 12), INT8_C( 64), -INT8_C( 87), -INT8_C( 74), INT8_C( 22), INT8_C( 54), -INT8_C( 118), INT8_C( 18), -INT8_C( 28), INT8_C( 23), INT8_C( 58), -INT8_C( 80), -INT8_C( 91), -INT8_C( 51), INT8_C( 108), -INT8_C( 22), INT8_C( 107), -INT8_C( 86), INT8_C( 101) }, { INT8_C( 121), -INT8_C( 28), INT8_C( 14), INT8_C( 10), -INT8_C( 86), INT8_C( 14), INT8_C( 74), -INT8_C( 28), INT8_C( 122), -INT8_C( 91), INT8_C( 22), -INT8_C( 118), INT8_C( 122), -INT8_C( 80), INT8_C( 64), INT8_C( 107), INT8_C( 54), INT8_C( 29), INT8_C( 23), -INT8_C( 105), INT8_C( 18), INT8_C( 108), INT8_C( 64), -INT8_C( 105), INT8_C( 99), -INT8_C( 105), INT8_C( 64), -INT8_C( 118), INT8_C( 108), -INT8_C( 22), INT8_C( 107), -INT8_C( 86) } }, { { INT8_C( 111), -INT8_C( 112), -INT8_C( 34), -INT8_C( 71), INT8_C( 122), -INT8_C( 20), -INT8_C( 60), -INT8_C( 35), -INT8_C( 85), -INT8_C( 31), INT8_C( 0), INT8_C( 66), -INT8_C( 19), INT8_C( 64), -INT8_C( 20), -INT8_C( 93), INT8_C( 87), INT8_C( 34), INT8_C( 46), INT8_C( 105), INT8_C( 6), INT8_C( 69), -INT8_C( 93), -INT8_C( 74), -INT8_C( 22), INT8_C( 113), INT8_C( 34), -INT8_C( 44), -INT8_C( 36), -INT8_C( 51), INT8_C( 57), INT8_C( 75) }, { INT8_C( 93), INT8_C( 23), INT8_C( 4), -INT8_C( 41), INT8_C( 3), -INT8_C( 56), -INT8_C( 76), -INT8_C( 82), -INT8_C( 86), -INT8_C( 76), -INT8_C( 15), -INT8_C( 105), -INT8_C( 12), -INT8_C( 35), INT8_C( 59), INT8_C( 75), -INT8_C( 1), INT8_C( 105), -INT8_C( 76), INT8_C( 5), -INT8_C( 82), INT8_C( 88), -INT8_C( 69), -INT8_C( 104), -INT8_C( 55), -INT8_C( 34), INT8_C( 108), -INT8_C( 91), -INT8_C( 85), -INT8_C( 90), -INT8_C( 16), INT8_C( 8) }, { INT8_C( 75), -INT8_C( 1), -INT8_C( 16), -INT8_C( 34), INT8_C( 108), -INT8_C( 12), INT8_C( 3), -INT8_C( 90), -INT8_C( 105), INT8_C( 23), INT8_C( 93), INT8_C( 4), -INT8_C( 35), INT8_C( 93), -INT8_C( 12), -INT8_C( 41), -INT8_C( 104), INT8_C( 4), INT8_C( 59), -INT8_C( 76), -INT8_C( 76), -INT8_C( 56), -INT8_C( 41), -INT8_C( 69), -INT8_C( 15), INT8_C( 105), INT8_C( 4), -INT8_C( 82), -INT8_C( 85), -INT8_C( 35), -INT8_C( 34), -INT8_C( 105) } }, { { -INT8_C( 67), -INT8_C( 12), -INT8_C( 33), -INT8_C( 63), -INT8_C( 67), -INT8_C( 109), INT8_C( 111), INT8_C( 103), INT8_C( 71), INT8_C( 96), -INT8_C( 2), INT8_C( 59), INT8_C( 61), INT8_C( 57), -INT8_C( 121), INT8_C( 60), -INT8_C( 94), INT8_C( 59), INT8_C( 65), INT8_C( 80), -INT8_C( 109), -INT8_C( 3), -INT8_C( 24), INT8_C( 92), -INT8_C( 37), INT8_C( 85), INT8_C( 1), -INT8_C( 122), -INT8_C( 5), -INT8_C( 15), -INT8_C( 114), -INT8_C( 72) }, { -INT8_C( 26), INT8_C( 109), INT8_C( 121), -INT8_C( 93), INT8_C( 0), -INT8_C( 23), INT8_C( 10), INT8_C( 71), INT8_C( 73), INT8_C( 8), -INT8_C( 126), -INT8_C( 121), INT8_C( 66), INT8_C( 9), -INT8_C( 61), -INT8_C( 28), INT8_C( 69), INT8_C( 5), INT8_C( 53), -INT8_C( 40), INT8_C( 2), INT8_C( 29), INT8_C( 53), -INT8_C( 35), INT8_C( 114), INT8_C( 54), INT8_C( 99), INT8_C( 109), INT8_C( 40), -INT8_C( 15), INT8_C( 38), INT8_C( 14) }, { -INT8_C( 15), INT8_C( 2), INT8_C( 14), INT8_C( 109), -INT8_C( 15), -INT8_C( 40), -INT8_C( 28), INT8_C( 71), INT8_C( 71), -INT8_C( 26), INT8_C( 38), INT8_C( 109), -INT8_C( 15), INT8_C( 54), INT8_C( 71), INT8_C( 40), INT8_C( 121), INT8_C( 109), INT8_C( 109), INT8_C( 69), -INT8_C( 40), -INT8_C( 15), INT8_C( 73), INT8_C( 40), INT8_C( 109), INT8_C( 29), INT8_C( 109), INT8_C( 10), INT8_C( 109), INT8_C( 5), -INT8_C( 61), INT8_C( 114) } }, { { INT8_C( 94), -INT8_C( 97), -INT8_C( 79), INT8_C( 94), -INT8_C( 120), -INT8_C( 69), -INT8_C( 91), -INT8_C( 46), -INT8_C( 61), INT8_C( 39), INT8_C( 89), INT8_C( 5), INT8_C( 49), INT8_C( 28), -INT8_C( 22), INT8_C( 118), INT8_C( 33), INT8_C( 31), INT8_C( 78), INT8_C( 35), INT8_C( 60), -INT8_C( 125), INT8_C( 0), -INT8_C( 81), -INT8_C( 70), INT8_C( 99), INT8_C( 28), -INT8_C( 30), INT8_C( 84), INT8_C( 66), -INT8_C( 16), -INT8_C( 78) }, { -INT8_C( 30), -INT8_C( 95), INT8_C( 16), INT8_C( 106), INT8_C( 92), -INT8_C( 75), INT8_C( 60), INT8_C( 31), -INT8_C( 35), -INT8_C( 107), INT8_C( 37), INT8_C( 14), -INT8_C( 78), INT8_C( 15), -INT8_C( 124), -INT8_C( 45), INT8_C( 46), -INT8_C( 46), -INT8_C( 9), INT8_C( 106), INT8_C( 86), -INT8_C( 9), INT8_C( 25), INT8_C( 16), INT8_C( 91), INT8_C( 54), -INT8_C( 14), -INT8_C( 81), INT8_C( 120), -INT8_C( 30), INT8_C( 98), INT8_C( 90) }, { INT8_C( 98), INT8_C( 90), -INT8_C( 46), INT8_C( 98), -INT8_C( 35), -INT8_C( 81), -INT8_C( 75), -INT8_C( 9), INT8_C( 106), INT8_C( 31), INT8_C( 54), -INT8_C( 75), -INT8_C( 46), INT8_C( 120), INT8_C( 37), INT8_C( 25), -INT8_C( 95), INT8_C( 90), -INT8_C( 124), INT8_C( 106), INT8_C( 120), INT8_C( 106), -INT8_C( 30), -INT8_C( 45), -INT8_C( 14), INT8_C( 106), INT8_C( 120), INT8_C( 16), INT8_C( 86), INT8_C( 16), INT8_C( 46), -INT8_C( 9) } }, { { -INT8_C( 125), INT8_C( 114), -INT8_C( 59), -INT8_C( 33), INT8_C( 40), INT8_C( 1), -INT8_C( 2), INT8_C( 5), -INT8_C( 105), INT8_C( 35), INT8_C( 19), INT8_C( 73), INT8_C( 50), -INT8_C( 105), INT8_C( 28), INT8_C( 96), INT8_C( 105), INT8_C( 19), -INT8_C( 53), -INT8_C( 65), INT8_C( 11), -INT8_C( 28), -INT8_C( 49), INT8_C( 102), INT8_C( 26), -INT8_C( 63), INT8_C( 21), -INT8_C( 109), -INT8_C( 93), INT8_C( 119), -INT8_C( 19), INT8_C( 38) }, { -INT8_C( 22), -INT8_C( 78), INT8_C( 5), INT8_C( 18), -INT8_C( 76), INT8_C( 4), INT8_C( 23), INT8_C( 75), INT8_C( 39), INT8_C( 42), -INT8_C( 108), INT8_C( 90), -INT8_C( 63), -INT8_C( 80), -INT8_C( 70), INT8_C( 42), -INT8_C( 60), -INT8_C( 123), -INT8_C( 22), -INT8_C( 49), INT8_C( 106), -INT8_C( 71), INT8_C( 53), -INT8_C( 124), INT8_C( 123), INT8_C( 74), INT8_C( 23), INT8_C( 30), -INT8_C( 62), INT8_C( 5), INT8_C( 69), -INT8_C( 84) }, { INT8_C( 18), -INT8_C( 22), INT8_C( 4), -INT8_C( 84), INT8_C( 39), -INT8_C( 78), INT8_C( 69), INT8_C( 4), -INT8_C( 124), INT8_C( 18), -INT8_C( 49), INT8_C( 42), -INT8_C( 22), -INT8_C( 124), -INT8_C( 62), -INT8_C( 22), INT8_C( 42), -INT8_C( 49), INT8_C( 90), -INT8_C( 84), INT8_C( 90), -INT8_C( 76), INT8_C( 42), INT8_C( 23), INT8_C( 23), -INT8_C( 78), -INT8_C( 71), -INT8_C( 49), INT8_C( 18), -INT8_C( 124), -INT8_C( 80), INT8_C( 23) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i idx = simde_mm256_loadu_epi8(test_vec[i].idx); simde__m256i a = simde_mm256_loadu_epi8(test_vec[i].a); simde__m256i r = simde_mm256_permutexvar_epi8(idx, a); simde_test_x86_assert_equal_i8x32(r, simde_mm256_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i idx = simde_test_x86_random_i8x32(); simde__m256i a = simde_test_x86_random_i8x32(); simde__m256i r = simde_mm256_permutexvar_epi8(idx, a); simde_test_x86_write_i8x32(2, idx, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x32(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask_permutexvar_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t src[32]; const simde__mmask32 k; const int8_t idx[32]; const int8_t a[32]; const int8_t r[32]; } test_vec[] = { { { -INT8_C( 73), INT8_C( 74), -INT8_C( 66), INT8_C( 107), INT8_C( 78), -INT8_C( 43), -INT8_C( 74), INT8_C( 118), -INT8_C( 1), INT8_C( 74), -INT8_C( 48), -INT8_C( 64), -INT8_C( 5), -INT8_C( 118), -INT8_C( 22), -INT8_C( 65), INT8_C( 16), -INT8_C( 44), -INT8_C( 114), INT8_C( 122), -INT8_C( 114), -INT8_C( 61), -INT8_C( 2), INT8_C( 9), INT8_C( 13), INT8_C( 22), INT8_C( 39), -INT8_C( 49), INT8_C( 27), INT8_C( 108), INT8_C( 123), -INT8_C( 46) }, UINT32_C( 87964087), { INT8_C( 14), -INT8_C( 12), INT8_C( 123), INT8_C( 13), INT8_C( 63), INT8_C( 75), -INT8_C( 51), INT8_C( 58), -INT8_C( 42), -INT8_C( 72), -INT8_C( 7), -INT8_C( 26), -INT8_C( 116), -INT8_C( 121), INT8_C( 96), INT8_C( 26), INT8_C( 74), INT8_C( 94), INT8_C( 35), INT8_C( 87), INT8_C( 116), INT8_C( 75), INT8_C( 39), -INT8_C( 113), -INT8_C( 73), -INT8_C( 94), INT8_C( 98), INT8_C( 110), -INT8_C( 36), -INT8_C( 96), INT8_C( 116), -INT8_C( 22) }, { -INT8_C( 108), -INT8_C( 17), -INT8_C( 8), -INT8_C( 45), INT8_C( 59), -INT8_C( 59), INT8_C( 13), INT8_C( 17), INT8_C( 125), INT8_C( 6), -INT8_C( 9), INT8_C( 10), -INT8_C( 115), INT8_C( 87), INT8_C( 36), -INT8_C( 41), -INT8_C( 75), INT8_C( 72), INT8_C( 47), INT8_C( 42), -INT8_C( 109), INT8_C( 86), -INT8_C( 71), INT8_C( 74), -INT8_C( 8), INT8_C( 27), -INT8_C( 71), -INT8_C( 44), -INT8_C( 69), INT8_C( 45), -INT8_C( 65), INT8_C( 80) }, { INT8_C( 36), -INT8_C( 109), -INT8_C( 44), INT8_C( 107), INT8_C( 80), INT8_C( 10), -INT8_C( 74), -INT8_C( 71), -INT8_C( 71), INT8_C( 74), -INT8_C( 48), INT8_C( 13), -INT8_C( 115), INT8_C( 17), -INT8_C( 22), -INT8_C( 65), INT8_C( 16), -INT8_C( 65), -INT8_C( 45), INT8_C( 74), -INT8_C( 109), INT8_C( 10), -INT8_C( 2), INT8_C( 9), INT8_C( 74), INT8_C( 22), -INT8_C( 8), -INT8_C( 49), INT8_C( 27), INT8_C( 108), INT8_C( 123), -INT8_C( 46) } }, { { INT8_C( 28), -INT8_C( 73), INT8_C( 35), INT8_C( 87), INT8_C( 124), INT8_C( 49), INT8_C( 104), -INT8_C( 6), INT8_C( 55), INT8_C( 95), INT8_C( 4), -INT8_C( 59), -INT8_C( 74), INT8_C( 40), -INT8_C( 100), INT8_C( 108), INT8_C( 112), -INT8_C( 53), -INT8_C( 106), INT8_C( 3), INT8_C( 33), INT8_C( 79), INT8_C( 78), INT8_C( 26), INT8_C( 107), INT8_C( 7), -INT8_C( 18), INT8_C( 38), INT8_C( 52), -INT8_C( 83), INT8_C( 118), INT8_C( 80) }, UINT32_C(3785923172), { -INT8_C( 53), INT8_C( 16), -INT8_C( 37), INT8_C( 2), INT8_C( 112), -INT8_C( 33), -INT8_C( 57), INT8_C( 38), INT8_C( 7), INT8_C( 100), -INT8_C( 110), INT8_C( 120), INT8_C( 47), INT8_C( 40), INT8_C( 123), INT8_C( 81), INT8_C( 120), -INT8_C( 55), INT8_C( 107), -INT8_C( 29), -INT8_C( 48), INT8_C( 89), INT8_C( 9), INT8_C( 4), INT8_C( 7), INT8_MIN, INT8_C( 85), INT8_C( 107), INT8_C( 26), -INT8_C( 3), INT8_C( 76), -INT8_C( 27) }, { INT8_C( 13), INT8_C( 39), -INT8_C( 25), INT8_C( 125), INT8_C( 6), -INT8_C( 81), -INT8_C( 92), INT8_C( 14), INT8_C( 19), INT8_C( 54), -INT8_C( 122), INT8_C( 66), INT8_C( 95), INT8_C( 1), -INT8_C( 109), -INT8_C( 41), -INT8_C( 53), -INT8_C( 2), -INT8_C( 70), -INT8_C( 101), INT8_C( 88), -INT8_C( 61), -INT8_C( 96), INT8_C( 95), INT8_C( 67), -INT8_C( 11), -INT8_C( 54), INT8_C( 93), -INT8_C( 14), INT8_C( 23), INT8_C( 66), -INT8_C( 1) }, { INT8_C( 28), -INT8_C( 73), INT8_C( 93), INT8_C( 87), INT8_C( 124), -INT8_C( 1), INT8_C( 14), -INT8_C( 6), INT8_C( 55), INT8_C( 6), INT8_C( 4), INT8_C( 67), -INT8_C( 41), INT8_C( 40), -INT8_C( 100), -INT8_C( 2), INT8_C( 112), -INT8_C( 53), -INT8_C( 106), INT8_C( 125), INT8_C( 33), -INT8_C( 11), INT8_C( 78), INT8_C( 6), INT8_C( 14), INT8_C( 7), -INT8_C( 18), INT8_C( 38), INT8_C( 52), INT8_C( 23), INT8_C( 95), -INT8_C( 81) } }, { { INT8_C( 62), INT8_C( 42), INT8_C( 125), INT8_C( 69), -INT8_C( 39), INT8_C( 33), INT8_C( 83), -INT8_C( 20), INT8_C( 87), -INT8_C( 39), INT8_C( 46), -INT8_C( 74), -INT8_C( 38), -INT8_C( 62), -INT8_C( 115), -INT8_C( 91), -INT8_C( 64), INT8_C( 71), INT8_C( 65), INT8_C( 24), INT8_C( 11), -INT8_C( 31), INT8_C( 119), INT8_C( 78), -INT8_C( 42), INT8_C( 66), -INT8_C( 84), -INT8_C( 56), INT8_C( 89), -INT8_C( 18), -INT8_C( 57), -INT8_C( 105) }, UINT32_C(4057744408), { INT8_C( 101), INT8_C( 47), -INT8_C( 35), -INT8_C( 67), INT8_C( 8), INT8_C( 12), INT8_C( 115), -INT8_C( 29), -INT8_C( 50), INT8_C( 1), -INT8_C( 120), -INT8_C( 114), INT8_C( 72), -INT8_C( 55), -INT8_C( 89), INT8_C( 83), -INT8_C( 86), INT8_C( 30), -INT8_C( 94), INT8_MIN, INT8_C( 96), INT8_C( 78), INT8_C( 72), -INT8_C( 71), INT8_C( 60), INT8_C( 16), INT8_C( 81), INT8_C( 85), INT8_C( 84), INT8_C( 45), INT8_C( 70), -INT8_C( 70) }, { INT8_C( 93), INT8_C( 36), INT8_C( 119), INT8_C( 101), INT8_C( 48), -INT8_C( 22), INT8_C( 72), -INT8_C( 2), -INT8_C( 21), -INT8_C( 47), -INT8_C( 116), INT8_C( 52), -INT8_C( 102), INT8_C( 51), -INT8_C( 121), INT8_C( 69), INT8_C( 82), INT8_C( 41), -INT8_C( 59), -INT8_C( 78), INT8_C( 119), INT8_C( 14), INT8_C( 108), -INT8_C( 76), INT8_C( 30), -INT8_C( 67), INT8_C( 9), INT8_C( 114), -INT8_C( 22), INT8_C( 79), INT8_C( 44), INT8_C( 71) }, { INT8_C( 62), INT8_C( 42), INT8_C( 125), INT8_C( 79), -INT8_C( 21), INT8_C( 33), INT8_C( 83), -INT8_C( 20), INT8_C( 87), -INT8_C( 39), -INT8_C( 21), -INT8_C( 74), -INT8_C( 38), -INT8_C( 62), -INT8_C( 2), -INT8_C( 91), -INT8_C( 64), INT8_C( 71), INT8_C( 119), INT8_C( 93), INT8_C( 93), -INT8_C( 31), -INT8_C( 21), -INT8_C( 67), -INT8_C( 22), INT8_C( 66), -INT8_C( 84), -INT8_C( 56), INT8_C( 119), INT8_C( 51), INT8_C( 72), INT8_C( 9) } }, { { INT8_C( 115), -INT8_C( 93), -INT8_C( 83), -INT8_C( 93), -INT8_C( 114), -INT8_C( 11), -INT8_C( 95), INT8_C( 121), -INT8_C( 58), INT8_C( 46), -INT8_C( 83), INT8_C( 97), INT8_C( 97), INT8_C( 53), -INT8_C( 90), -INT8_C( 77), INT8_C( 94), INT8_C( 107), INT8_C( 102), -INT8_C( 42), INT8_C( 121), -INT8_C( 46), -INT8_C( 118), -INT8_C( 105), -INT8_C( 113), -INT8_C( 109), INT8_C( 10), INT8_C( 121), -INT8_C( 30), INT8_C( 54), -INT8_C( 63), INT8_C( 86) }, UINT32_C(1761177306), { INT8_C( 99), -INT8_C( 101), -INT8_C( 31), INT8_C( 42), -INT8_C( 55), -INT8_C( 113), -INT8_C( 117), INT8_C( 42), -INT8_C( 60), INT8_C( 49), -INT8_C( 34), INT8_C( 34), -INT8_C( 100), INT8_C( 68), -INT8_C( 8), INT8_C( 22), INT8_C( 22), -INT8_C( 126), -INT8_C( 83), -INT8_C( 91), INT8_C( 21), -INT8_C( 73), INT8_C( 30), -INT8_C( 8), -INT8_C( 18), -INT8_C( 33), INT8_C( 78), -INT8_C( 56), INT8_C( 77), INT8_C( 71), INT8_C( 48), -INT8_C( 79) }, { -INT8_C( 30), INT8_C( 17), -INT8_C( 37), -INT8_C( 85), -INT8_C( 96), INT8_C( 102), -INT8_C( 42), INT8_C( 100), -INT8_C( 105), -INT8_C( 76), -INT8_C( 121), INT8_C( 51), -INT8_C( 8), INT8_MAX, INT8_C( 73), INT8_C( 14), INT8_C( 2), -INT8_C( 9), -INT8_C( 77), INT8_C( 23), -INT8_C( 82), -INT8_C( 47), INT8_C( 15), -INT8_C( 100), -INT8_C( 79), INT8_C( 93), INT8_C( 100), -INT8_C( 2), -INT8_C( 91), -INT8_C( 108), -INT8_C( 81), -INT8_C( 121) }, { INT8_C( 115), -INT8_C( 2), -INT8_C( 83), -INT8_C( 121), -INT8_C( 76), -INT8_C( 11), INT8_C( 51), -INT8_C( 121), -INT8_C( 58), -INT8_C( 9), -INT8_C( 81), -INT8_C( 37), INT8_C( 97), -INT8_C( 96), -INT8_C( 79), -INT8_C( 77), INT8_C( 15), INT8_C( 107), INT8_C( 102), INT8_C( 102), -INT8_C( 47), -INT8_C( 100), -INT8_C( 81), -INT8_C( 79), -INT8_C( 113), -INT8_C( 109), INT8_C( 10), -INT8_C( 105), -INT8_C( 30), INT8_C( 100), INT8_C( 2), INT8_C( 86) } }, { { -INT8_C( 90), -INT8_C( 118), INT8_C( 51), INT8_C( 70), -INT8_C( 16), INT8_C( 9), -INT8_C( 85), -INT8_C( 121), -INT8_C( 67), INT8_C( 50), -INT8_C( 69), -INT8_C( 75), -INT8_C( 79), INT8_C( 4), -INT8_C( 61), -INT8_C( 77), -INT8_C( 5), INT8_C( 118), -INT8_C( 53), -INT8_C( 86), INT8_C( 71), -INT8_C( 38), INT8_C( 70), -INT8_C( 8), INT8_C( 56), -INT8_C( 85), -INT8_C( 9), -INT8_C( 35), INT8_C( 63), -INT8_C( 90), INT8_C( 100), -INT8_C( 27) }, UINT32_C( 556570417), { -INT8_C( 96), -INT8_C( 41), -INT8_C( 87), INT8_C( 93), INT8_C( 9), INT8_C( 100), INT8_C( 18), -INT8_C( 70), INT8_C( 104), -INT8_C( 43), INT8_C( 110), INT8_C( 100), INT8_C( 75), INT8_C( 57), INT8_C( 14), -INT8_C( 109), INT8_C( 19), INT8_C( 84), -INT8_C( 117), INT8_C( 75), -INT8_C( 1), -INT8_C( 126), INT8_C( 40), INT8_C( 63), INT8_C( 41), -INT8_C( 115), INT8_C( 36), INT8_C( 90), INT8_C( 36), INT8_C( 80), INT8_C( 123), -INT8_C( 59) }, { INT8_C( 39), INT8_C( 36), INT8_C( 34), INT8_C( 48), -INT8_C( 120), INT8_C( 53), -INT8_C( 21), -INT8_C( 15), INT8_C( 10), INT8_C( 89), INT8_C( 85), INT8_C( 86), -INT8_C( 110), INT8_C( 99), -INT8_C( 23), -INT8_C( 91), -INT8_C( 73), INT8_C( 116), -INT8_C( 15), -INT8_C( 73), -INT8_C( 9), INT8_C( 25), -INT8_C( 10), INT8_C( 32), -INT8_C( 90), INT8_C( 26), INT8_C( 122), -INT8_C( 53), INT8_C( 107), -INT8_C( 11), -INT8_C( 112), -INT8_C( 110) }, { INT8_C( 39), -INT8_C( 118), INT8_C( 51), INT8_C( 70), INT8_C( 89), -INT8_C( 120), -INT8_C( 85), -INT8_C( 121), INT8_C( 10), INT8_C( 25), -INT8_C( 23), -INT8_C( 75), INT8_C( 86), INT8_C( 4), -INT8_C( 61), -INT8_C( 73), -INT8_C( 5), INT8_C( 118), INT8_C( 86), INT8_C( 86), INT8_C( 71), INT8_C( 34), INT8_C( 70), -INT8_C( 8), INT8_C( 89), -INT8_C( 85), -INT8_C( 9), -INT8_C( 35), INT8_C( 63), -INT8_C( 73), INT8_C( 100), -INT8_C( 27) } }, { { INT8_C( 26), -INT8_C( 78), -INT8_C( 61), -INT8_C( 94), -INT8_C( 25), -INT8_C( 82), -INT8_C( 109), -INT8_C( 14), INT8_C( 7), -INT8_C( 24), INT8_C( 72), -INT8_C( 103), INT8_C( 75), INT8_C( 49), INT8_C( 62), INT8_C( 3), -INT8_C( 91), INT8_C( 47), -INT8_C( 70), -INT8_C( 100), INT8_C( 73), -INT8_C( 80), -INT8_C( 68), -INT8_C( 17), -INT8_C( 54), INT8_C( 54), -INT8_C( 70), INT8_C( 53), INT8_C( 44), INT8_C( 74), -INT8_C( 56), INT8_C( 70) }, UINT32_C(3840445437), { INT8_C( 57), INT8_C( 124), -INT8_C( 42), INT8_C( 64), INT8_C( 100), INT8_C( 30), -INT8_C( 39), -INT8_C( 80), INT8_C( 79), INT8_C( 23), -INT8_C( 77), -INT8_C( 11), INT8_C( 71), INT8_C( 109), -INT8_C( 111), -INT8_C( 112), INT8_C( 29), INT8_C( 78), INT8_MAX, -INT8_C( 25), -INT8_C( 124), INT8_C( 58), INT8_C( 29), -INT8_C( 80), -INT8_C( 124), -INT8_C( 27), -INT8_C( 10), -INT8_C( 127), INT8_C( 112), -INT8_C( 33), INT8_C( 102), -INT8_C( 87) }, { INT8_C( 91), INT8_C( 60), -INT8_C( 23), -INT8_C( 65), INT8_C( 91), -INT8_C( 62), INT8_C( 111), -INT8_C( 86), -INT8_C( 39), INT8_C( 34), -INT8_C( 97), INT8_C( 32), -INT8_C( 113), INT8_C( 49), -INT8_C( 80), -INT8_C( 84), INT8_MAX, INT8_C( 48), -INT8_C( 108), INT8_C( 3), INT8_C( 106), -INT8_C( 79), -INT8_C( 76), -INT8_C( 18), -INT8_C( 106), -INT8_C( 86), INT8_C( 112), INT8_C( 6), -INT8_C( 119), -INT8_C( 42), -INT8_C( 81), -INT8_C( 28) }, { -INT8_C( 86), -INT8_C( 78), -INT8_C( 76), INT8_C( 91), INT8_C( 91), -INT8_C( 81), -INT8_C( 86), INT8_MAX, -INT8_C( 84), -INT8_C( 18), INT8_C( 72), -INT8_C( 79), INT8_C( 75), INT8_C( 49), INT8_C( 62), INT8_MAX, -INT8_C( 91), INT8_C( 47), -INT8_C( 70), -INT8_C( 86), INT8_C( 73), INT8_C( 112), -INT8_C( 42), INT8_MAX, -INT8_C( 54), INT8_C( 54), -INT8_C( 76), INT8_C( 53), INT8_C( 44), -INT8_C( 28), INT8_C( 111), INT8_C( 34) } }, { { INT8_C( 18), -INT8_C( 104), -INT8_C( 92), INT8_C( 109), INT8_C( 90), INT8_C( 19), INT8_C( 24), INT8_C( 51), INT8_C( 54), -INT8_C( 73), INT8_C( 84), -INT8_C( 59), -INT8_C( 24), INT8_C( 4), INT8_C( 114), INT8_C( 103), INT8_C( 52), INT8_C( 6), INT8_C( 107), -INT8_C( 98), -INT8_C( 73), INT8_C( 31), -INT8_C( 115), INT8_C( 77), -INT8_C( 55), -INT8_C( 3), INT8_C( 83), INT8_C( 83), -INT8_C( 45), INT8_C( 2), INT8_C( 55), -INT8_C( 27) }, UINT32_C(4099136410), { -INT8_C( 17), INT8_C( 107), INT8_C( 39), INT8_C( 37), INT8_C( 34), INT8_C( 123), -INT8_C( 22), INT8_C( 11), INT8_MIN, INT8_C( 92), INT8_C( 114), -INT8_C( 76), INT8_C( 98), -INT8_C( 35), INT8_C( 83), INT8_C( 25), -INT8_C( 4), -INT8_C( 32), INT8_C( 102), -INT8_C( 58), -INT8_C( 35), -INT8_C( 71), INT8_C( 25), -INT8_C( 80), -INT8_C( 69), INT8_C( 80), -INT8_C( 107), INT8_C( 85), INT8_C( 44), -INT8_C( 24), INT8_C( 73), INT8_C( 27) }, { INT8_C( 83), INT8_C( 113), INT8_C( 64), INT8_C( 118), -INT8_C( 20), INT8_C( 42), -INT8_C( 127), INT8_C( 108), -INT8_C( 121), -INT8_C( 13), INT8_C( 33), -INT8_C( 23), -INT8_C( 47), INT8_C( 116), INT8_C( 3), -INT8_C( 51), INT8_C( 84), INT8_C( 105), -INT8_C( 109), INT8_C( 49), INT8_C( 35), -INT8_C( 84), -INT8_C( 31), -INT8_C( 34), -INT8_C( 3), INT8_C( 118), INT8_C( 52), INT8_C( 41), INT8_C( 95), INT8_C( 125), INT8_C( 68), -INT8_C( 78) }, { INT8_C( 18), -INT8_C( 23), -INT8_C( 92), INT8_C( 42), INT8_C( 64), INT8_C( 19), INT8_C( 24), -INT8_C( 23), INT8_C( 83), INT8_C( 95), INT8_C( 84), INT8_C( 35), INT8_C( 64), INT8_C( 4), INT8_C( 49), INT8_C( 118), INT8_C( 95), INT8_C( 83), INT8_C( 107), -INT8_C( 98), INT8_C( 125), INT8_C( 31), INT8_C( 118), INT8_C( 77), -INT8_C( 55), -INT8_C( 3), -INT8_C( 84), INT8_C( 83), -INT8_C( 47), -INT8_C( 121), -INT8_C( 13), INT8_C( 41) } }, { { -INT8_C( 18), -INT8_C( 124), INT8_C( 40), -INT8_C( 37), -INT8_C( 82), -INT8_C( 87), INT8_C( 71), INT8_C( 53), -INT8_C( 99), INT8_C( 104), INT8_C( 31), INT8_C( 110), -INT8_C( 36), INT8_C( 34), INT8_C( 59), INT8_C( 48), -INT8_C( 117), -INT8_C( 49), INT8_C( 97), -INT8_C( 82), INT8_C( 123), INT8_C( 66), -INT8_C( 115), INT8_C( 120), -INT8_C( 71), -INT8_C( 63), -INT8_C( 95), INT8_C( 24), INT8_C( 62), -INT8_C( 27), -INT8_C( 54), INT8_C( 45) }, UINT32_C( 403239785), { -INT8_C( 100), INT8_C( 79), INT8_C( 77), INT8_C( 57), -INT8_C( 72), INT8_C( 108), -INT8_C( 89), -INT8_C( 108), -INT8_C( 114), -INT8_C( 29), -INT8_C( 59), INT8_C( 26), -INT8_C( 78), INT8_C( 38), -INT8_C( 56), INT8_C( 45), INT8_C( 105), INT8_C( 85), -INT8_C( 90), INT8_C( 34), INT8_C( 22), INT8_C( 71), INT8_C( 58), INT8_C( 85), INT8_C( 45), INT8_C( 4), -INT8_C( 126), -INT8_C( 106), -INT8_C( 9), -INT8_C( 118), -INT8_C( 82), -INT8_C( 108) }, { -INT8_C( 39), -INT8_C( 4), -INT8_C( 51), -INT8_C( 111), INT8_C( 104), INT8_C( 117), INT8_C( 38), -INT8_C( 9), INT8_C( 88), -INT8_C( 21), INT8_C( 17), INT8_C( 10), INT8_C( 17), -INT8_C( 39), INT8_C( 55), INT8_C( 122), INT8_C( 47), -INT8_C( 35), -INT8_C( 100), INT8_C( 69), INT8_C( 37), -INT8_C( 42), -INT8_C( 102), INT8_C( 82), -INT8_C( 37), INT8_C( 28), -INT8_C( 24), -INT8_C( 46), -INT8_C( 90), -INT8_C( 105), INT8_C( 102), INT8_MIN }, { -INT8_C( 90), -INT8_C( 124), INT8_C( 40), INT8_C( 28), -INT8_C( 82), INT8_C( 17), -INT8_C( 9), INT8_C( 53), INT8_C( 55), -INT8_C( 111), INT8_C( 31), INT8_C( 110), -INT8_C( 100), INT8_C( 38), INT8_C( 88), -INT8_C( 39), -INT8_C( 117), -INT8_C( 49), INT8_C( 97), -INT8_C( 51), INT8_C( 123), INT8_C( 66), -INT8_C( 115), INT8_C( 120), -INT8_C( 71), -INT8_C( 63), -INT8_C( 95), -INT8_C( 102), INT8_C( 82), -INT8_C( 27), -INT8_C( 54), INT8_C( 45) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i src = simde_mm256_loadu_epi8(test_vec[i].src); simde__m256i idx = simde_mm256_loadu_epi8(test_vec[i].idx); simde__m256i a = simde_mm256_loadu_epi8(test_vec[i].a); simde__m256i r = simde_mm256_mask_permutexvar_epi8(src, test_vec[i].k, idx, a); simde_test_x86_assert_equal_i8x32(r, simde_mm256_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i src = simde_test_x86_random_i8x32(); simde__mmask32 k = simde_test_x86_random_mmask32(); simde__m256i idx = simde_test_x86_random_i8x32(); simde__m256i a = simde_test_x86_random_i8x32(); simde__m256i r = simde_mm256_mask_permutexvar_epi8(src, k, idx, a); simde_test_x86_write_i8x32(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask32(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x32(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x32(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_maskz_permutexvar_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask32 k; const int8_t idx[32]; const int8_t a[32]; const int8_t r[32]; } test_vec[] = { { UINT32_C(4212208787), { -INT8_C( 87), INT8_C( 55), -INT8_C( 14), INT8_C( 1), INT8_C( 34), INT8_C( 3), INT8_C( 11), INT8_C( 52), -INT8_C( 35), INT8_C( 66), -INT8_C( 82), INT8_C( 12), INT8_C( 32), INT8_C( 75), INT8_C( 81), INT8_C( 69), INT8_C( 33), -INT8_C( 20), -INT8_C( 105), -INT8_C( 4), INT8_C( 8), INT8_MAX, -INT8_C( 49), -INT8_C( 81), INT8_C( 22), INT8_C( 53), INT8_C( 47), -INT8_C( 87), INT8_C( 105), INT8_C( 64), -INT8_C( 91), INT8_C( 18) }, { INT8_C( 120), -INT8_C( 105), INT8_C( 19), -INT8_C( 102), -INT8_C( 101), INT8_C( 30), -INT8_C( 50), INT8_C( 120), INT8_C( 97), INT8_C( 125), -INT8_C( 124), -INT8_C( 127), -INT8_C( 56), -INT8_C( 43), -INT8_C( 58), -INT8_C( 23), -INT8_C( 63), INT8_C( 93), -INT8_C( 26), -INT8_C( 54), -INT8_C( 36), -INT8_C( 75), INT8_C( 121), -INT8_C( 13), -INT8_C( 22), -INT8_C( 88), -INT8_C( 100), INT8_C( 84), -INT8_C( 24), INT8_C( 65), INT8_C( 102), INT8_C( 96) }, { INT8_C( 125), -INT8_C( 13), INT8_C( 0), INT8_C( 0), INT8_C( 19), INT8_C( 0), INT8_C( 0), -INT8_C( 36), INT8_C( 0), INT8_C( 0), -INT8_C( 58), INT8_C( 0), INT8_C( 120), -INT8_C( 127), INT8_C( 0), INT8_C( 0), -INT8_C( 105), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 97), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 121), -INT8_C( 75), INT8_C( 0), INT8_C( 125), INT8_C( 125), INT8_C( 120), INT8_C( 30), -INT8_C( 26) } }, { UINT32_C(1962638041), { -INT8_C( 104), -INT8_C( 55), -INT8_C( 20), -INT8_C( 7), INT8_C( 70), INT8_C( 112), INT8_C( 122), INT8_C( 14), INT8_C( 69), INT8_C( 64), -INT8_C( 8), INT8_C( 7), -INT8_C( 99), -INT8_C( 34), -INT8_C( 47), INT8_C( 122), -INT8_C( 109), INT8_C( 74), INT8_C( 109), INT8_C( 125), -INT8_C( 14), INT8_C( 9), -INT8_C( 47), -INT8_C( 38), INT8_C( 75), INT8_C( 56), INT8_C( 59), INT8_C( 36), -INT8_C( 78), INT8_C( 54), -INT8_C( 104), INT8_C( 74) }, { -INT8_C( 1), -INT8_C( 124), INT8_C( 68), INT8_C( 70), -INT8_C( 12), -INT8_C( 66), INT8_C( 84), INT8_C( 57), -INT8_C( 1), INT8_C( 76), INT8_C( 64), -INT8_C( 100), INT8_C( 42), INT8_C( 17), INT8_C( 22), -INT8_C( 67), INT8_C( 91), -INT8_C( 125), INT8_C( 59), INT8_C( 77), -INT8_C( 115), INT8_C( 12), INT8_C( 40), -INT8_C( 40), INT8_C( 68), INT8_C( 99), -INT8_C( 4), -INT8_C( 10), -INT8_C( 103), -INT8_C( 108), INT8_C( 65), -INT8_C( 104) }, { INT8_C( 68), INT8_C( 0), INT8_C( 0), INT8_C( 99), INT8_C( 84), INT8_C( 0), -INT8_C( 4), INT8_C( 22), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 57), -INT8_C( 108), INT8_C( 65), -INT8_C( 125), INT8_C( 0), INT8_C( 77), INT8_C( 64), INT8_C( 0), -INT8_C( 108), INT8_C( 59), INT8_C( 76), -INT8_C( 125), -INT8_C( 4), INT8_C( 0), INT8_C( 0), -INT8_C( 10), INT8_C( 0), INT8_C( 59), INT8_C( 40), INT8_C( 68), INT8_C( 0) } }, { UINT32_C( 215909656), { INT8_C( 67), INT8_C( 51), INT8_C( 69), INT8_C( 66), INT8_MAX, -INT8_C( 122), -INT8_C( 33), -INT8_C( 86), -INT8_C( 105), -INT8_C( 11), INT8_C( 103), -INT8_C( 13), INT8_C( 121), -INT8_C( 94), INT8_C( 64), INT8_C( 6), -INT8_C( 81), INT8_C( 104), -INT8_C( 34), -INT8_C( 13), -INT8_C( 53), -INT8_C( 38), -INT8_C( 22), INT8_C( 100), INT8_C( 110), INT8_C( 43), -INT8_C( 3), -INT8_C( 122), -INT8_C( 80), -INT8_C( 37), -INT8_C( 110), -INT8_C( 13) }, { INT8_C( 14), -INT8_C( 41), INT8_C( 54), -INT8_C( 114), INT8_C( 93), INT8_C( 21), INT8_C( 56), -INT8_C( 11), INT8_C( 10), -INT8_C( 97), -INT8_C( 24), -INT8_C( 125), INT8_C( 66), INT8_C( 40), -INT8_C( 119), -INT8_C( 15), -INT8_C( 111), INT8_C( 103), -INT8_C( 28), INT8_C( 92), INT8_C( 65), -INT8_C( 50), -INT8_C( 63), -INT8_C( 81), -INT8_C( 7), -INT8_C( 66), INT8_C( 53), -INT8_C( 87), -INT8_C( 103), -INT8_C( 57), -INT8_C( 99), -INT8_C( 88) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 54), -INT8_C( 88), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 81), INT8_C( 0), -INT8_C( 11), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 56), INT8_C( 0), INT8_C( 10), -INT8_C( 99), INT8_C( 92), -INT8_C( 125), INT8_C( 0), -INT8_C( 24), INT8_C( 93), INT8_C( 0), INT8_C( 0), -INT8_C( 57), INT8_C( 56), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { UINT32_C(4231451551), { -INT8_C( 24), INT8_C( 110), -INT8_C( 15), -INT8_C( 14), INT8_C( 13), -INT8_C( 39), INT8_C( 118), INT8_C( 79), INT8_C( 2), -INT8_C( 1), INT8_C( 64), -INT8_C( 109), INT8_C( 103), INT8_C( 37), -INT8_C( 17), -INT8_C( 88), -INT8_C( 13), -INT8_C( 80), INT8_C( 88), -INT8_C( 19), INT8_C( 110), -INT8_C( 115), -INT8_C( 106), INT8_C( 8), INT8_C( 85), INT8_C( 51), -INT8_C( 80), -INT8_C( 12), INT8_C( 6), -INT8_C( 26), -INT8_C( 16), -INT8_C( 18) }, { INT8_C( 84), -INT8_C( 30), -INT8_C( 31), INT8_C( 97), -INT8_C( 69), INT8_C( 87), -INT8_C( 79), -INT8_C( 67), INT8_C( 86), -INT8_C( 15), INT8_C( 80), -INT8_C( 67), INT8_C( 22), INT8_C( 64), INT8_C( 102), INT8_C( 10), -INT8_C( 16), -INT8_C( 66), -INT8_C( 9), INT8_C( 95), INT8_C( 75), -INT8_C( 115), INT8_C( 103), -INT8_C( 96), -INT8_C( 63), INT8_C( 23), -INT8_C( 108), -INT8_C( 57), -INT8_C( 3), -INT8_C( 123), -INT8_C( 74), INT8_C( 81) }, { INT8_C( 86), INT8_C( 102), -INT8_C( 66), -INT8_C( 9), INT8_C( 64), INT8_C( 0), INT8_C( 0), INT8_C( 10), -INT8_C( 31), INT8_C( 81), INT8_C( 0), INT8_C( 0), -INT8_C( 67), INT8_C( 0), INT8_C( 10), INT8_C( 86), INT8_C( 0), -INT8_C( 16), -INT8_C( 63), INT8_C( 0), INT8_C( 102), INT8_C( 64), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 16), INT8_C( 75), -INT8_C( 79), -INT8_C( 79), -INT8_C( 16), INT8_C( 102) } }, { UINT32_C( 582129511), { -INT8_C( 18), INT8_C( 99), -INT8_C( 32), INT8_C( 68), INT8_C( 85), INT8_C( 48), INT8_C( 2), INT8_C( 107), INT8_C( 112), INT8_C( 104), INT8_C( 117), INT8_C( 97), INT8_C( 38), INT8_C( 108), -INT8_C( 64), INT8_C( 113), -INT8_C( 6), INT8_C( 39), INT8_C( 18), -INT8_C( 69), INT8_C( 62), -INT8_C( 90), -INT8_C( 126), INT8_C( 59), INT8_C( 43), INT8_C( 56), -INT8_C( 116), -INT8_C( 110), -INT8_C( 49), INT8_C( 62), -INT8_C( 75), -INT8_C( 67) }, { -INT8_C( 94), -INT8_C( 107), INT8_C( 2), -INT8_C( 9), -INT8_C( 59), INT8_C( 4), INT8_C( 98), INT8_C( 54), INT8_C( 108), -INT8_C( 40), -INT8_C( 105), -INT8_C( 110), INT8_C( 68), INT8_C( 87), INT8_C( 3), INT8_C( 62), INT8_C( 126), INT8_C( 21), -INT8_C( 7), -INT8_C( 68), -INT8_C( 68), INT8_C( 124), -INT8_C( 9), -INT8_C( 25), -INT8_C( 76), -INT8_C( 125), INT8_C( 122), -INT8_C( 124), -INT8_C( 63), INT8_C( 47), INT8_C( 65), INT8_C( 99) }, { INT8_C( 3), -INT8_C( 9), -INT8_C( 94), INT8_C( 0), INT8_C( 0), INT8_C( 126), INT8_C( 2), INT8_C( 0), INT8_C( 126), INT8_C( 108), INT8_C( 124), INT8_C( 0), INT8_C( 98), INT8_C( 0), INT8_C( 0), INT8_C( 21), INT8_C( 0), INT8_C( 54), INT8_C( 0), INT8_C( 0), INT8_C( 65), INT8_C( 98), INT8_C( 0), -INT8_C( 124), INT8_C( 0), -INT8_C( 76), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 65), INT8_C( 0), INT8_C( 0) } }, { UINT32_C(2304394180), { INT8_C( 71), -INT8_C( 67), -INT8_C( 65), -INT8_C( 77), -INT8_C( 107), INT8_C( 86), INT8_C( 69), -INT8_C( 39), -INT8_C( 83), INT8_C( 73), INT8_C( 24), INT8_C( 43), INT8_C( 94), INT8_C( 17), -INT8_C( 25), INT8_C( 26), -INT8_C( 115), -INT8_C( 34), INT8_C( 2), INT8_C( 66), INT8_C( 97), INT8_C( 124), -INT8_C( 58), INT8_C( 35), -INT8_C( 85), INT8_C( 7), -INT8_C( 122), INT8_C( 111), INT8_C( 75), -INT8_C( 31), -INT8_C( 8), -INT8_C( 110) }, { -INT8_C( 98), -INT8_C( 72), INT8_C( 70), INT8_C( 51), INT8_C( 14), -INT8_C( 117), INT8_C( 12), -INT8_C( 68), -INT8_C( 44), INT8_C( 36), -INT8_C( 25), INT8_C( 51), INT8_C( 54), -INT8_C( 49), INT8_C( 77), -INT8_C( 61), -INT8_C( 83), INT8_C( 79), INT8_C( 5), INT8_C( 15), -INT8_C( 53), -INT8_C( 53), INT8_C( 50), INT8_C( 118), -INT8_C( 45), -INT8_C( 72), -INT8_C( 27), INT8_C( 30), -INT8_C( 103), -INT8_C( 34), -INT8_C( 80), INT8_C( 55) }, { INT8_C( 0), INT8_C( 0), INT8_C( 55), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 117), -INT8_C( 72), -INT8_C( 49), INT8_C( 36), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 68), INT8_C( 0), INT8_C( 0), -INT8_C( 80), INT8_C( 0), INT8_C( 70), -INT8_C( 72), INT8_C( 0), INT8_C( 12), INT8_C( 0), INT8_C( 51), INT8_C( 0), INT8_C( 0), -INT8_C( 61), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 5) } }, { UINT32_C(2758473366), { -INT8_C( 126), INT8_C( 119), INT8_C( 96), INT8_C( 86), -INT8_C( 101), INT8_C( 72), -INT8_C( 119), -INT8_C( 47), INT8_C( 23), -INT8_C( 41), -INT8_C( 107), -INT8_C( 60), INT8_C( 38), -INT8_C( 102), -INT8_C( 45), -INT8_C( 14), INT8_C( 102), INT8_C( 5), INT8_C( 104), INT8_C( 57), -INT8_C( 66), INT8_C( 78), INT8_C( 87), INT8_C( 87), INT8_C( 44), INT8_C( 7), -INT8_C( 113), -INT8_C( 62), -INT8_C( 2), -INT8_C( 7), INT8_C( 102), INT8_MIN }, { INT8_C( 112), -INT8_C( 57), -INT8_C( 42), INT8_C( 12), INT8_C( 15), INT8_C( 96), -INT8_C( 35), INT8_C( 38), INT8_C( 55), INT8_C( 114), -INT8_C( 22), INT8_C( 93), INT8_C( 13), -INT8_C( 66), INT8_C( 79), INT8_C( 115), -INT8_C( 61), -INT8_C( 72), -INT8_C( 84), -INT8_C( 127), INT8_C( 6), INT8_C( 3), -INT8_C( 39), INT8_C( 50), INT8_C( 10), INT8_C( 104), -INT8_C( 12), INT8_C( 8), INT8_C( 97), INT8_C( 90), -INT8_C( 120), -INT8_C( 46) }, { INT8_C( 0), INT8_C( 50), INT8_C( 112), INT8_C( 0), INT8_C( 8), INT8_C( 0), INT8_C( 0), -INT8_C( 72), INT8_C( 0), INT8_C( 50), INT8_C( 3), INT8_C( 0), -INT8_C( 35), -INT8_C( 12), -INT8_C( 127), -INT8_C( 84), INT8_C( 0), INT8_C( 96), INT8_C( 0), INT8_C( 104), INT8_C( 0), INT8_C( 79), INT8_C( 50), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 115), INT8_C( 0), INT8_C( 0), INT8_C( 104), INT8_C( 0), INT8_C( 112) } }, { UINT32_C( 819879713), { -INT8_C( 65), -INT8_C( 69), INT8_C( 86), -INT8_C( 10), INT8_C( 46), INT8_C( 65), INT8_C( 83), INT8_C( 59), -INT8_C( 1), -INT8_C( 93), -INT8_C( 82), -INT8_C( 62), INT8_C( 91), INT8_C( 90), INT8_C( 68), INT8_C( 97), INT8_C( 93), INT8_C( 29), -INT8_C( 109), INT8_C( 103), -INT8_C( 123), -INT8_C( 121), INT8_C( 112), -INT8_C( 26), -INT8_C( 31), -INT8_C( 8), -INT8_C( 72), INT8_C( 3), INT8_C( 87), -INT8_C( 106), INT8_C( 51), INT8_C( 22) }, { INT8_C( 82), -INT8_C( 118), INT8_C( 12), INT8_MIN, -INT8_C( 53), INT8_C( 96), -INT8_C( 69), -INT8_C( 54), INT8_C( 3), INT8_C( 105), -INT8_C( 116), INT8_C( 94), -INT8_C( 61), -INT8_C( 48), -INT8_C( 65), INT8_C( 32), -INT8_C( 19), INT8_C( 82), -INT8_C( 121), INT8_C( 114), -INT8_C( 39), -INT8_C( 9), INT8_C( 89), -INT8_C( 70), -INT8_C( 16), INT8_C( 17), -INT8_C( 67), INT8_C( 71), -INT8_C( 88), -INT8_C( 15), INT8_C( 94), -INT8_C( 6) }, { -INT8_C( 6), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 118), INT8_C( 0), INT8_C( 0), -INT8_C( 6), INT8_MIN, -INT8_C( 65), INT8_C( 12), INT8_C( 71), INT8_C( 0), -INT8_C( 53), INT8_C( 0), INT8_C( 0), -INT8_C( 15), INT8_C( 114), -INT8_C( 54), INT8_C( 96), INT8_C( 0), -INT8_C( 19), -INT8_C( 69), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 70), INT8_C( 89), INT8_C( 0), INT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i idx = simde_mm256_loadu_epi8(test_vec[i].idx); simde__m256i a = simde_mm256_loadu_epi8(test_vec[i].a); simde__m256i r = simde_mm256_maskz_permutexvar_epi8(test_vec[i].k, idx, a); simde_test_x86_assert_equal_i8x32(r, simde_mm256_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask32 k = simde_test_x86_random_mmask32(); simde__m256i idx = simde_test_x86_random_i8x32(); simde__m256i a = simde_test_x86_random_i8x32(); simde__m256i r = simde_mm256_maskz_permutexvar_epi8(k, idx, a); simde_test_x86_write_mmask32(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x32(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x32(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_permutexvar_pd(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const int64_t idx[4]; const simde_float64 a[4]; const simde_float64 r[4]; } test_vec[8] = { { { -INT64_C( 6723175484270992638), -INT64_C( 4563869859770635283), INT64_C( 651974606270245935), -INT64_C( 8302155520550445577) }, { SIMDE_FLOAT64_C( -770.65), SIMDE_FLOAT64_C( 51.51), SIMDE_FLOAT64_C( -565.74), SIMDE_FLOAT64_C( -26.48) }, { SIMDE_FLOAT64_C( -565.74), SIMDE_FLOAT64_C( 51.51), SIMDE_FLOAT64_C( -26.48), SIMDE_FLOAT64_C( -26.48) } }, { { -INT64_C( 3631628342071142227), -INT64_C( 2150787199714573635), -INT64_C( 7382396981396950491), INT64_C( 3653346760470123590) }, { SIMDE_FLOAT64_C( 723.79), SIMDE_FLOAT64_C( 211.57), SIMDE_FLOAT64_C( -703.04), SIMDE_FLOAT64_C( 319.79) }, { SIMDE_FLOAT64_C( 211.57), SIMDE_FLOAT64_C( 211.57), SIMDE_FLOAT64_C( 211.57), SIMDE_FLOAT64_C( -703.04) } }, { { INT64_C( 1789830837432444459), INT64_C( 4531154689147468959), INT64_C( 5070206288598101338), -INT64_C( 8515948049036931256) }, { SIMDE_FLOAT64_C( 745.53), SIMDE_FLOAT64_C( -121.69), SIMDE_FLOAT64_C( 436.39), SIMDE_FLOAT64_C( -375.64) }, { SIMDE_FLOAT64_C( -375.64), SIMDE_FLOAT64_C( -375.64), SIMDE_FLOAT64_C( 436.39), SIMDE_FLOAT64_C( 745.53) } }, { { INT64_C( 8298558806840658118), INT64_C( 4127699110275910476), INT64_C( 1241015148471100549), -INT64_C( 3152505381425837762) }, { SIMDE_FLOAT64_C( 290.53), SIMDE_FLOAT64_C( 233.57), SIMDE_FLOAT64_C( -508.59), SIMDE_FLOAT64_C( -457.88) }, { SIMDE_FLOAT64_C( -508.59), SIMDE_FLOAT64_C( 290.53), SIMDE_FLOAT64_C( 233.57), SIMDE_FLOAT64_C( -508.59) } }, { { -INT64_C( 1870754788895005931), -INT64_C( 6297545250883808953), -INT64_C( 1530847991345524073), -INT64_C( 4227763291079966366) }, { SIMDE_FLOAT64_C( 157.81), SIMDE_FLOAT64_C( -884.15), SIMDE_FLOAT64_C( 58.01), SIMDE_FLOAT64_C( -253.60) }, { SIMDE_FLOAT64_C( -884.15), SIMDE_FLOAT64_C( -253.60), SIMDE_FLOAT64_C( -253.60), SIMDE_FLOAT64_C( 58.01) } }, { { INT64_C( 8517097999991582751), INT64_C( 8925246739055561838), INT64_C( 5640797697215929645), INT64_C( 1963352818527214362) }, { SIMDE_FLOAT64_C( 338.84), SIMDE_FLOAT64_C( -454.50), SIMDE_FLOAT64_C( -481.79), SIMDE_FLOAT64_C( 868.84) }, { SIMDE_FLOAT64_C( 868.84), SIMDE_FLOAT64_C( -481.79), SIMDE_FLOAT64_C( -454.50), SIMDE_FLOAT64_C( -481.79) } }, { { INT64_C( 2248280827481928638), INT64_C( 5338454049758515032), -INT64_C( 742341901610885598), -INT64_C( 7045158274482507291) }, { SIMDE_FLOAT64_C( 669.60), SIMDE_FLOAT64_C( -19.61), SIMDE_FLOAT64_C( 253.69), SIMDE_FLOAT64_C( 272.69) }, { SIMDE_FLOAT64_C( 253.69), SIMDE_FLOAT64_C( 669.60), SIMDE_FLOAT64_C( 253.69), SIMDE_FLOAT64_C( -19.61) } }, { { -INT64_C( 5046144786224579045), INT64_C( 7339632008068599421), INT64_C( 6808716401325083976), INT64_C( 7928525928940812285) }, { SIMDE_FLOAT64_C( -743.17), SIMDE_FLOAT64_C( 904.38), SIMDE_FLOAT64_C( 600.61), SIMDE_FLOAT64_C( -731.72) }, { SIMDE_FLOAT64_C( -731.72), SIMDE_FLOAT64_C( 904.38), SIMDE_FLOAT64_C( -743.17), SIMDE_FLOAT64_C( 904.38) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i idx = simde_mm256_loadu_epi64(test_vec[i].idx); simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d r = simde_mm256_permutexvar_pd(idx, a); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i idx = simde_test_x86_random_i64x4(); simde__m256d a = simde_test_x86_random_f64x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256d r = simde_mm256_permutexvar_pd(idx, a); simde_test_x86_write_i64x4(2, idx, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f64x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask_permutexvar_pd(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde_float64 src[4]; const simde__mmask8 k; const int64_t idx[4]; const simde_float64 a[4]; const simde_float64 r[4]; } test_vec[8] = { { { SIMDE_FLOAT64_C( -749.78), SIMDE_FLOAT64_C( 320.61), SIMDE_FLOAT64_C( -674.06), SIMDE_FLOAT64_C( -589.15) }, UINT8_C(231), { -INT64_C( 1263618822685620846), -INT64_C( 2847177519672810476), -INT64_C( 5739950126254014776), INT64_C( 5493265346248387706) }, { SIMDE_FLOAT64_C( -516.97), SIMDE_FLOAT64_C( -674.45), SIMDE_FLOAT64_C( -986.75), SIMDE_FLOAT64_C( -900.24) }, { SIMDE_FLOAT64_C( -986.75), SIMDE_FLOAT64_C( -516.97), SIMDE_FLOAT64_C( -516.97), SIMDE_FLOAT64_C( -589.15) } }, { { SIMDE_FLOAT64_C( -673.90), SIMDE_FLOAT64_C( -543.83), SIMDE_FLOAT64_C( -527.91), SIMDE_FLOAT64_C( 493.44) }, UINT8_C(149), { INT64_C( 1644533322731674431), -INT64_C( 1815358867884973126), -INT64_C( 1429070044781862234), INT64_C( 2444769474548862823) }, { SIMDE_FLOAT64_C( -140.49), SIMDE_FLOAT64_C( -448.35), SIMDE_FLOAT64_C( 133.99), SIMDE_FLOAT64_C( 549.40) }, { SIMDE_FLOAT64_C( 549.40), SIMDE_FLOAT64_C( -543.83), SIMDE_FLOAT64_C( 133.99), SIMDE_FLOAT64_C( 493.44) } }, { { SIMDE_FLOAT64_C( -678.26), SIMDE_FLOAT64_C( 381.11), SIMDE_FLOAT64_C( 667.04), SIMDE_FLOAT64_C( 575.51) }, UINT8_C( 97), { INT64_C( 3452292845339951030), -INT64_C( 4037906182301774173), -INT64_C( 2401883995933946195), -INT64_C( 47292980296388873) }, { SIMDE_FLOAT64_C( 948.31), SIMDE_FLOAT64_C( 488.58), SIMDE_FLOAT64_C( 153.16), SIMDE_FLOAT64_C( 778.73) }, { SIMDE_FLOAT64_C( 153.16), SIMDE_FLOAT64_C( 381.11), SIMDE_FLOAT64_C( 667.04), SIMDE_FLOAT64_C( 575.51) } }, { { SIMDE_FLOAT64_C( -963.94), SIMDE_FLOAT64_C( 996.32), SIMDE_FLOAT64_C( 531.49), SIMDE_FLOAT64_C( -945.52) }, UINT8_C(185), { -INT64_C( 1357490459819988989), INT64_C( 9197765586491828195), INT64_C( 7227652815215696407), INT64_C( 1075782015956718601) }, { SIMDE_FLOAT64_C( 437.45), SIMDE_FLOAT64_C( -523.92), SIMDE_FLOAT64_C( -270.27), SIMDE_FLOAT64_C( -213.44) }, { SIMDE_FLOAT64_C( -213.44), SIMDE_FLOAT64_C( 996.32), SIMDE_FLOAT64_C( 531.49), SIMDE_FLOAT64_C( -523.92) } }, { { SIMDE_FLOAT64_C( -557.45), SIMDE_FLOAT64_C( -121.42), SIMDE_FLOAT64_C( 881.29), SIMDE_FLOAT64_C( 967.43) }, UINT8_C( 27), { INT64_C( 7596636677954349646), INT64_C( 6936880209947492212), -INT64_C( 4240568522068232039), -INT64_C( 3111184124717670653) }, { SIMDE_FLOAT64_C( -412.42), SIMDE_FLOAT64_C( 611.71), SIMDE_FLOAT64_C( 321.78), SIMDE_FLOAT64_C( 605.43) }, { SIMDE_FLOAT64_C( 321.78), SIMDE_FLOAT64_C( -412.42), SIMDE_FLOAT64_C( 881.29), SIMDE_FLOAT64_C( 605.43) } }, { { SIMDE_FLOAT64_C( -459.52), SIMDE_FLOAT64_C( -808.78), SIMDE_FLOAT64_C( -336.07), SIMDE_FLOAT64_C( 279.61) }, UINT8_C( 21), { INT64_C( 1580835613247101335), INT64_C( 156008206567177389), -INT64_C( 5657545105993047650), INT64_C( 2535661328087666565) }, { SIMDE_FLOAT64_C( 650.97), SIMDE_FLOAT64_C( 966.14), SIMDE_FLOAT64_C( 607.62), SIMDE_FLOAT64_C( -257.27) }, { SIMDE_FLOAT64_C( -257.27), SIMDE_FLOAT64_C( -808.78), SIMDE_FLOAT64_C( 607.62), SIMDE_FLOAT64_C( 279.61) } }, { { SIMDE_FLOAT64_C( 110.41), SIMDE_FLOAT64_C( -827.61), SIMDE_FLOAT64_C( -943.00), SIMDE_FLOAT64_C( -461.29) }, UINT8_C(231), { INT64_C( 3594026778983278082), INT64_C( 2591424474769545236), INT64_C( 5999024786878894578), INT64_C( 4295772386389045835) }, { SIMDE_FLOAT64_C( 829.81), SIMDE_FLOAT64_C( 151.31), SIMDE_FLOAT64_C( -948.92), SIMDE_FLOAT64_C( 823.99) }, { SIMDE_FLOAT64_C( -948.92), SIMDE_FLOAT64_C( 829.81), SIMDE_FLOAT64_C( -948.92), SIMDE_FLOAT64_C( -461.29) } }, { { SIMDE_FLOAT64_C( -83.93), SIMDE_FLOAT64_C( 443.69), SIMDE_FLOAT64_C( 453.64), SIMDE_FLOAT64_C( -121.53) }, UINT8_C(181), { INT64_C( 4664511156021966899), INT64_C( 1907227610614736986), -INT64_C( 1013434392324657796), -INT64_C( 8134225108301181685) }, { SIMDE_FLOAT64_C( -243.67), SIMDE_FLOAT64_C( 980.91), SIMDE_FLOAT64_C( -312.16), SIMDE_FLOAT64_C( 981.49) }, { SIMDE_FLOAT64_C( 981.49), SIMDE_FLOAT64_C( 443.69), SIMDE_FLOAT64_C( -243.67), SIMDE_FLOAT64_C( -121.53) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d src = simde_mm256_loadu_pd(test_vec[i].src); simde__m256i idx = simde_mm256_loadu_epi64(test_vec[i].idx); simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d r = simde_mm256_mask_permutexvar_pd(src, test_vec[i].k, idx, a); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256d src = simde_test_x86_random_f64x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256i idx = simde_test_x86_random_i64x4(); simde__m256d a = simde_test_x86_random_f64x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256d r = simde_mm256_mask_permutexvar_pd(src, k, idx, a); simde_test_x86_write_f64x4(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x4(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_maskz_permutexvar_pd(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde__mmask8 k; const int64_t idx[4]; const simde_float64 a[4]; const simde_float64 r[4]; } test_vec[8] = { { UINT8_C( 82), { INT64_C( 7756845172501845012), -INT64_C( 6825194156430213296), INT64_C( 5503985742234073849), INT64_C( 6858073262148153907) }, { SIMDE_FLOAT64_C( 348.35), SIMDE_FLOAT64_C( 624.29), SIMDE_FLOAT64_C( -664.85), SIMDE_FLOAT64_C( 199.21) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 348.35), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C(178), { -INT64_C( 3741505294702552255), INT64_C( 8638691756592248265), -INT64_C( 1915010472295649745), -INT64_C( 6324410229484237904) }, { SIMDE_FLOAT64_C( -819.48), SIMDE_FLOAT64_C( -45.29), SIMDE_FLOAT64_C( -250.63), SIMDE_FLOAT64_C( -204.86) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -45.29), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C(190), { INT64_C( 3085962307915192508), INT64_C( 8771176944555871084), -INT64_C( 8423505512529027354), INT64_C( 2875934750950081261) }, { SIMDE_FLOAT64_C( 493.35), SIMDE_FLOAT64_C( -846.35), SIMDE_FLOAT64_C( -448.39), SIMDE_FLOAT64_C( -858.20) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 493.35), SIMDE_FLOAT64_C( -448.39), SIMDE_FLOAT64_C( -846.35) } }, { UINT8_C(250), { INT64_C( 6101534005241741420), INT64_C( 195321988682229433), INT64_C( 3689323120464773126), INT64_C( 6948633058193660420) }, { SIMDE_FLOAT64_C( -979.67), SIMDE_FLOAT64_C( -4.00), SIMDE_FLOAT64_C( 936.36), SIMDE_FLOAT64_C( -772.83) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -4.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -979.67) } }, { UINT8_C(212), { INT64_C( 6141754755641061060), INT64_C( 5237162096522378650), -INT64_C( 8240250253156228522), -INT64_C( 2245492898981346258) }, { SIMDE_FLOAT64_C( 244.04), SIMDE_FLOAT64_C( 477.58), SIMDE_FLOAT64_C( -396.44), SIMDE_FLOAT64_C( 196.16) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -396.44), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C( 73), { -INT64_C( 3354690591741684986), -INT64_C( 4153207234733033107), -INT64_C( 2333999889224701572), INT64_C( 5511126413861026766) }, { SIMDE_FLOAT64_C( 527.73), SIMDE_FLOAT64_C( -488.23), SIMDE_FLOAT64_C( -9.01), SIMDE_FLOAT64_C( -402.49) }, { SIMDE_FLOAT64_C( -9.01), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -9.01) } }, { UINT8_C( 30), { INT64_C( 5170372545627244289), INT64_C( 8329200236232935222), INT64_C( 6962692482878502622), INT64_C( 9109829307771383279) }, { SIMDE_FLOAT64_C( -972.26), SIMDE_FLOAT64_C( 204.27), SIMDE_FLOAT64_C( 393.26), SIMDE_FLOAT64_C( 414.79) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 393.26), SIMDE_FLOAT64_C( 393.26), SIMDE_FLOAT64_C( 414.79) } }, { UINT8_C(209), { -INT64_C( 6774496452643677191), INT64_C( 2603319939733352382), INT64_C( 5036258758730416250), -INT64_C( 7811635825436106034) }, { SIMDE_FLOAT64_C( -877.39), SIMDE_FLOAT64_C( 290.97), SIMDE_FLOAT64_C( 735.30), SIMDE_FLOAT64_C( -604.30) }, { SIMDE_FLOAT64_C( 290.97), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i idx = simde_mm256_loadu_epi64(test_vec[i].idx); simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d r = simde_mm256_maskz_permutexvar_pd(test_vec[i].k, idx, a); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256i idx = simde_test_x86_random_i64x4(); simde__m256d a = simde_test_x86_random_f64x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256d r = simde_mm256_maskz_permutexvar_pd(k, idx, a); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x4(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_permutexvar_ps(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const int32_t idx[8]; const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[8] = { { { -INT32_C( 1003465647), -INT32_C( 514585522), INT32_C( 1936858576), -INT32_C( 2075224162), INT32_C( 1915012941), INT32_C( 1612493475), INT32_C( 2023952200), -INT32_C( 1918079684) }, { SIMDE_FLOAT32_C( -948.78), SIMDE_FLOAT32_C( -410.03), SIMDE_FLOAT32_C( -879.71), SIMDE_FLOAT32_C( 201.10), SIMDE_FLOAT32_C( 452.67), SIMDE_FLOAT32_C( 820.53), SIMDE_FLOAT32_C( 280.22), SIMDE_FLOAT32_C( 700.21) }, { SIMDE_FLOAT32_C( -410.03), SIMDE_FLOAT32_C( 280.22), SIMDE_FLOAT32_C( -948.78), SIMDE_FLOAT32_C( 280.22), SIMDE_FLOAT32_C( 820.53), SIMDE_FLOAT32_C( 201.10), SIMDE_FLOAT32_C( -948.78), SIMDE_FLOAT32_C( 452.67) } }, { { INT32_C( 1630364355), INT32_C( 1206221817), -INT32_C( 440858046), INT32_C( 71751100), INT32_C( 561834469), INT32_C( 414067030), -INT32_C( 282656762), INT32_C( 1772689574) }, { SIMDE_FLOAT32_C( 591.13), SIMDE_FLOAT32_C( -529.69), SIMDE_FLOAT32_C( -654.92), SIMDE_FLOAT32_C( 391.52), SIMDE_FLOAT32_C( -8.88), SIMDE_FLOAT32_C( -562.27), SIMDE_FLOAT32_C( -53.46), SIMDE_FLOAT32_C( 344.19) }, { SIMDE_FLOAT32_C( 391.52), SIMDE_FLOAT32_C( -529.69), SIMDE_FLOAT32_C( -654.92), SIMDE_FLOAT32_C( -8.88), SIMDE_FLOAT32_C( -562.27), SIMDE_FLOAT32_C( -53.46), SIMDE_FLOAT32_C( -53.46), SIMDE_FLOAT32_C( -53.46) } }, { { INT32_C( 1987736762), INT32_C( 796573770), INT32_C( 5306281), INT32_C( 639172128), -INT32_C( 1525334017), -INT32_C( 770719913), -INT32_C( 398009706), INT32_C( 1165820298) }, { SIMDE_FLOAT32_C( 502.30), SIMDE_FLOAT32_C( -922.61), SIMDE_FLOAT32_C( -408.74), SIMDE_FLOAT32_C( 423.84), SIMDE_FLOAT32_C( -781.34), SIMDE_FLOAT32_C( 645.58), SIMDE_FLOAT32_C( -578.41), SIMDE_FLOAT32_C( -100.81) }, { SIMDE_FLOAT32_C( -408.74), SIMDE_FLOAT32_C( -408.74), SIMDE_FLOAT32_C( -922.61), SIMDE_FLOAT32_C( 502.30), SIMDE_FLOAT32_C( -100.81), SIMDE_FLOAT32_C( -100.81), SIMDE_FLOAT32_C( -578.41), SIMDE_FLOAT32_C( -408.74) } }, { { INT32_C( 1298284077), INT32_C( 1014200893), INT32_C( 299993273), -INT32_C( 572264377), INT32_C( 1439050186), -INT32_C( 1617280214), -INT32_C( 195078853), -INT32_C( 1202303605) }, { SIMDE_FLOAT32_C( 114.38), SIMDE_FLOAT32_C( -407.95), SIMDE_FLOAT32_C( -190.09), SIMDE_FLOAT32_C( -913.09), SIMDE_FLOAT32_C( -594.00), SIMDE_FLOAT32_C( -805.95), SIMDE_FLOAT32_C( 896.02), SIMDE_FLOAT32_C( 706.50) }, { SIMDE_FLOAT32_C( -805.95), SIMDE_FLOAT32_C( -805.95), SIMDE_FLOAT32_C( -407.95), SIMDE_FLOAT32_C( 706.50), SIMDE_FLOAT32_C( -190.09), SIMDE_FLOAT32_C( -190.09), SIMDE_FLOAT32_C( -913.09), SIMDE_FLOAT32_C( -913.09) } }, { { INT32_C( 1224533505), -INT32_C( 1591287849), INT32_C( 855043080), INT32_C( 1808961583), INT32_C( 1902064102), INT32_C( 170571134), -INT32_C( 1613549715), -INT32_C( 1450518360) }, { SIMDE_FLOAT32_C( 408.50), SIMDE_FLOAT32_C( -14.37), SIMDE_FLOAT32_C( -881.14), SIMDE_FLOAT32_C( -158.53), SIMDE_FLOAT32_C( -555.96), SIMDE_FLOAT32_C( -372.25), SIMDE_FLOAT32_C( 52.25), SIMDE_FLOAT32_C( 336.52) }, { SIMDE_FLOAT32_C( -14.37), SIMDE_FLOAT32_C( 336.52), SIMDE_FLOAT32_C( 408.50), SIMDE_FLOAT32_C( 336.52), SIMDE_FLOAT32_C( 52.25), SIMDE_FLOAT32_C( 52.25), SIMDE_FLOAT32_C( -372.25), SIMDE_FLOAT32_C( 408.50) } }, { { INT32_C( 866201091), -INT32_C( 1533119554), INT32_C( 588709284), INT32_C( 523059378), INT32_C( 398328175), -INT32_C( 1732228903), INT32_C( 909030095), -INT32_C( 844798263) }, { SIMDE_FLOAT32_C( -302.01), SIMDE_FLOAT32_C( -152.55), SIMDE_FLOAT32_C( -912.35), SIMDE_FLOAT32_C( -238.55), SIMDE_FLOAT32_C( 120.82), SIMDE_FLOAT32_C( -497.21), SIMDE_FLOAT32_C( -204.10), SIMDE_FLOAT32_C( -635.30) }, { SIMDE_FLOAT32_C( -238.55), SIMDE_FLOAT32_C( -204.10), SIMDE_FLOAT32_C( 120.82), SIMDE_FLOAT32_C( -912.35), SIMDE_FLOAT32_C( -635.30), SIMDE_FLOAT32_C( -152.55), SIMDE_FLOAT32_C( -635.30), SIMDE_FLOAT32_C( -152.55) } }, { { INT32_C( 1300303259), -INT32_C( 1133727923), -INT32_C( 1982584144), INT32_C( 1109562482), INT32_C( 276320326), INT32_C( 1256005046), INT32_C( 496885091), INT32_C( 377263227) }, { SIMDE_FLOAT32_C( -910.92), SIMDE_FLOAT32_C( 41.77), SIMDE_FLOAT32_C( -614.35), SIMDE_FLOAT32_C( -51.13), SIMDE_FLOAT32_C( 612.93), SIMDE_FLOAT32_C( 716.33), SIMDE_FLOAT32_C( -728.68), SIMDE_FLOAT32_C( -652.05) }, { SIMDE_FLOAT32_C( -51.13), SIMDE_FLOAT32_C( 716.33), SIMDE_FLOAT32_C( -910.92), SIMDE_FLOAT32_C( -614.35), SIMDE_FLOAT32_C( -728.68), SIMDE_FLOAT32_C( -728.68), SIMDE_FLOAT32_C( -51.13), SIMDE_FLOAT32_C( -51.13) } }, { { INT32_C( 1810267897), INT32_C( 1504512019), INT32_C( 258549080), -INT32_C( 1504098750), -INT32_C( 1631324637), INT32_C( 733233034), -INT32_C( 384231619), -INT32_C( 549010202) }, { SIMDE_FLOAT32_C( -556.70), SIMDE_FLOAT32_C( -412.20), SIMDE_FLOAT32_C( -513.11), SIMDE_FLOAT32_C( 461.90), SIMDE_FLOAT32_C( -797.86), SIMDE_FLOAT32_C( -380.85), SIMDE_FLOAT32_C( -514.89), SIMDE_FLOAT32_C( -402.18) }, { SIMDE_FLOAT32_C( -412.20), SIMDE_FLOAT32_C( 461.90), SIMDE_FLOAT32_C( -556.70), SIMDE_FLOAT32_C( -513.11), SIMDE_FLOAT32_C( 461.90), SIMDE_FLOAT32_C( -513.11), SIMDE_FLOAT32_C( -380.85), SIMDE_FLOAT32_C( -514.89) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i idx = simde_mm256_loadu_epi32(test_vec[i].idx); simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_permutexvar_ps(idx, a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i idx = simde_test_x86_random_i32x8(); simde__m256 a = simde_test_x86_random_f32x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256 r = simde_mm256_permutexvar_ps(idx, a); simde_test_x86_write_i32x8(2, idx, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask_permutexvar_ps(SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde_float32 src[8]; const simde__mmask8 k; const int32_t idx[8]; const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( -2.13), SIMDE_FLOAT32_C( -740.45), SIMDE_FLOAT32_C( -30.00), SIMDE_FLOAT32_C( 497.57), SIMDE_FLOAT32_C( 218.90), SIMDE_FLOAT32_C( -632.82), SIMDE_FLOAT32_C( -495.65), SIMDE_FLOAT32_C( 553.04) }, UINT8_C( 9), { -INT32_C( 806795204), INT32_C( 982691045), -INT32_C( 417966830), -INT32_C( 75699523), INT32_C( 241326701), INT32_C( 1998051949), -INT32_C( 2006706479), -INT32_C( 795763308) }, { SIMDE_FLOAT32_C( -328.16), SIMDE_FLOAT32_C( -360.93), SIMDE_FLOAT32_C( 935.10), SIMDE_FLOAT32_C( -329.17), SIMDE_FLOAT32_C( -862.40), SIMDE_FLOAT32_C( 392.03), SIMDE_FLOAT32_C( 803.30), SIMDE_FLOAT32_C( 62.01) }, { SIMDE_FLOAT32_C( -862.40), SIMDE_FLOAT32_C( -740.45), SIMDE_FLOAT32_C( -30.00), SIMDE_FLOAT32_C( 392.03), SIMDE_FLOAT32_C( 218.90), SIMDE_FLOAT32_C( -632.82), SIMDE_FLOAT32_C( -495.65), SIMDE_FLOAT32_C( 553.04) } }, { { SIMDE_FLOAT32_C( 71.98), SIMDE_FLOAT32_C( 655.74), SIMDE_FLOAT32_C( -706.91), SIMDE_FLOAT32_C( -789.78), SIMDE_FLOAT32_C( -487.45), SIMDE_FLOAT32_C( 980.26), SIMDE_FLOAT32_C( 767.91), SIMDE_FLOAT32_C( -611.76) }, UINT8_C(247), { INT32_C( 2070184353), -INT32_C( 1370694504), -INT32_C( 565979840), -INT32_C( 507243674), -INT32_C( 452423757), -INT32_C( 1335082604), -INT32_C( 21581727), -INT32_C( 1862981137) }, { SIMDE_FLOAT32_C( 667.85), SIMDE_FLOAT32_C( 476.90), SIMDE_FLOAT32_C( -690.52), SIMDE_FLOAT32_C( -472.40), SIMDE_FLOAT32_C( -496.30), SIMDE_FLOAT32_C( 3.74), SIMDE_FLOAT32_C( 619.63), SIMDE_FLOAT32_C( -839.28) }, { SIMDE_FLOAT32_C( 476.90), SIMDE_FLOAT32_C( 667.85), SIMDE_FLOAT32_C( 667.85), SIMDE_FLOAT32_C( -789.78), SIMDE_FLOAT32_C( -472.40), SIMDE_FLOAT32_C( -496.30), SIMDE_FLOAT32_C( 476.90), SIMDE_FLOAT32_C( -839.28) } }, { { SIMDE_FLOAT32_C( 681.67), SIMDE_FLOAT32_C( 448.27), SIMDE_FLOAT32_C( -999.01), SIMDE_FLOAT32_C( -32.65), SIMDE_FLOAT32_C( 871.55), SIMDE_FLOAT32_C( -486.89), SIMDE_FLOAT32_C( -107.91), SIMDE_FLOAT32_C( -770.07) }, UINT8_C(167), { -INT32_C( 1103371907), -INT32_C( 31462579), -INT32_C( 890430047), INT32_C( 1819639314), -INT32_C( 492717942), -INT32_C( 569370711), -INT32_C( 2094619541), -INT32_C( 1775623911) }, { SIMDE_FLOAT32_C( 366.81), SIMDE_FLOAT32_C( 789.20), SIMDE_FLOAT32_C( 755.17), SIMDE_FLOAT32_C( -830.47), SIMDE_FLOAT32_C( 113.51), SIMDE_FLOAT32_C( 251.44), SIMDE_FLOAT32_C( -995.14), SIMDE_FLOAT32_C( 592.37) }, { SIMDE_FLOAT32_C( 251.44), SIMDE_FLOAT32_C( 251.44), SIMDE_FLOAT32_C( 789.20), SIMDE_FLOAT32_C( -32.65), SIMDE_FLOAT32_C( 871.55), SIMDE_FLOAT32_C( 789.20), SIMDE_FLOAT32_C( -107.91), SIMDE_FLOAT32_C( 789.20) } }, { { SIMDE_FLOAT32_C( -854.58), SIMDE_FLOAT32_C( -359.30), SIMDE_FLOAT32_C( 614.00), SIMDE_FLOAT32_C( -99.17), SIMDE_FLOAT32_C( -491.72), SIMDE_FLOAT32_C( 303.49), SIMDE_FLOAT32_C( 133.14), SIMDE_FLOAT32_C( -314.65) }, UINT8_C(234), { -INT32_C( 929814863), -INT32_C( 634097098), -INT32_C( 1175210088), INT32_C( 1220315618), INT32_C( 1352207581), -INT32_C( 1780380582), INT32_C( 2085586250), INT32_C( 1164415636) }, { SIMDE_FLOAT32_C( -573.04), SIMDE_FLOAT32_C( -528.28), SIMDE_FLOAT32_C( -222.24), SIMDE_FLOAT32_C( 155.58), SIMDE_FLOAT32_C( 114.00), SIMDE_FLOAT32_C( -558.66), SIMDE_FLOAT32_C( 714.03), SIMDE_FLOAT32_C( -87.19) }, { SIMDE_FLOAT32_C( -854.58), SIMDE_FLOAT32_C( 714.03), SIMDE_FLOAT32_C( 614.00), SIMDE_FLOAT32_C( -222.24), SIMDE_FLOAT32_C( -491.72), SIMDE_FLOAT32_C( -222.24), SIMDE_FLOAT32_C( -222.24), SIMDE_FLOAT32_C( 114.00) } }, { { SIMDE_FLOAT32_C( 515.75), SIMDE_FLOAT32_C( -5.60), SIMDE_FLOAT32_C( -901.90), SIMDE_FLOAT32_C( 697.05), SIMDE_FLOAT32_C( 951.14), SIMDE_FLOAT32_C( -349.81), SIMDE_FLOAT32_C( 667.55), SIMDE_FLOAT32_C( -336.95) }, UINT8_C(134), { INT32_C( 1239446205), -INT32_C( 661424557), INT32_C( 1466765509), INT32_C( 1913696887), INT32_C( 48234176), -INT32_C( 2097355577), INT32_C( 499636130), -INT32_C( 1197217541) }, { SIMDE_FLOAT32_C( 433.29), SIMDE_FLOAT32_C( 143.53), SIMDE_FLOAT32_C( -821.67), SIMDE_FLOAT32_C( -831.55), SIMDE_FLOAT32_C( -391.00), SIMDE_FLOAT32_C( 896.63), SIMDE_FLOAT32_C( 913.22), SIMDE_FLOAT32_C( 949.92) }, { SIMDE_FLOAT32_C( 515.75), SIMDE_FLOAT32_C( -831.55), SIMDE_FLOAT32_C( 896.63), SIMDE_FLOAT32_C( 697.05), SIMDE_FLOAT32_C( 951.14), SIMDE_FLOAT32_C( -349.81), SIMDE_FLOAT32_C( 667.55), SIMDE_FLOAT32_C( -831.55) } }, { { SIMDE_FLOAT32_C( 208.80), SIMDE_FLOAT32_C( 800.73), SIMDE_FLOAT32_C( 851.65), SIMDE_FLOAT32_C( 635.40), SIMDE_FLOAT32_C( 700.13), SIMDE_FLOAT32_C( -819.67), SIMDE_FLOAT32_C( -466.27), SIMDE_FLOAT32_C( -622.72) }, UINT8_C( 37), { INT32_C( 1374447981), INT32_C( 1022586469), INT32_C( 506925109), INT32_C( 947449780), -INT32_C( 2043533583), -INT32_C( 1607732819), INT32_C( 776357639), INT32_C( 1112780245) }, { SIMDE_FLOAT32_C( -104.78), SIMDE_FLOAT32_C( 475.03), SIMDE_FLOAT32_C( -319.30), SIMDE_FLOAT32_C( 25.24), SIMDE_FLOAT32_C( -469.36), SIMDE_FLOAT32_C( 175.13), SIMDE_FLOAT32_C( 486.55), SIMDE_FLOAT32_C( 730.10) }, { SIMDE_FLOAT32_C( 175.13), SIMDE_FLOAT32_C( 800.73), SIMDE_FLOAT32_C( 175.13), SIMDE_FLOAT32_C( 635.40), SIMDE_FLOAT32_C( 700.13), SIMDE_FLOAT32_C( 175.13), SIMDE_FLOAT32_C( -466.27), SIMDE_FLOAT32_C( -622.72) } }, { { SIMDE_FLOAT32_C( 342.99), SIMDE_FLOAT32_C( 946.71), SIMDE_FLOAT32_C( 736.44), SIMDE_FLOAT32_C( -443.90), SIMDE_FLOAT32_C( 157.26), SIMDE_FLOAT32_C( 176.08), SIMDE_FLOAT32_C( -87.27), SIMDE_FLOAT32_C( -453.86) }, UINT8_C(160), { -INT32_C( 1504881227), -INT32_C( 944902784), -INT32_C( 509813964), INT32_C( 1844960814), -INT32_C( 132418959), INT32_C( 412089897), INT32_C( 2029509375), INT32_C( 303613533) }, { SIMDE_FLOAT32_C( -62.11), SIMDE_FLOAT32_C( -141.29), SIMDE_FLOAT32_C( 668.54), SIMDE_FLOAT32_C( 263.85), SIMDE_FLOAT32_C( -513.77), SIMDE_FLOAT32_C( 229.65), SIMDE_FLOAT32_C( 719.52), SIMDE_FLOAT32_C( 135.92) }, { SIMDE_FLOAT32_C( 342.99), SIMDE_FLOAT32_C( 946.71), SIMDE_FLOAT32_C( 736.44), SIMDE_FLOAT32_C( -443.90), SIMDE_FLOAT32_C( 157.26), SIMDE_FLOAT32_C( -141.29), SIMDE_FLOAT32_C( -87.27), SIMDE_FLOAT32_C( 229.65) } }, { { SIMDE_FLOAT32_C( 738.29), SIMDE_FLOAT32_C( -161.36), SIMDE_FLOAT32_C( 185.05), SIMDE_FLOAT32_C( -140.93), SIMDE_FLOAT32_C( 167.26), SIMDE_FLOAT32_C( -870.60), SIMDE_FLOAT32_C( 454.87), SIMDE_FLOAT32_C( -823.43) }, UINT8_C(209), { -INT32_C( 151369480), -INT32_C( 1309338665), -INT32_C( 804360950), -INT32_C( 336846714), INT32_C( 1044285912), -INT32_C( 310393363), -INT32_C( 1932726739), -INT32_C( 1017307701) }, { SIMDE_FLOAT32_C( 89.09), SIMDE_FLOAT32_C( 802.57), SIMDE_FLOAT32_C( -187.53), SIMDE_FLOAT32_C( -22.46), SIMDE_FLOAT32_C( -239.46), SIMDE_FLOAT32_C( -156.70), SIMDE_FLOAT32_C( -50.47), SIMDE_FLOAT32_C( -721.38) }, { SIMDE_FLOAT32_C( 89.09), SIMDE_FLOAT32_C( -161.36), SIMDE_FLOAT32_C( 185.05), SIMDE_FLOAT32_C( -140.93), SIMDE_FLOAT32_C( 89.09), SIMDE_FLOAT32_C( -870.60), SIMDE_FLOAT32_C( -156.70), SIMDE_FLOAT32_C( -22.46) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 src = simde_mm256_loadu_ps(test_vec[i].src); simde__m256i idx = simde_mm256_loadu_epi32(test_vec[i].idx); simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_mask_permutexvar_ps(src, test_vec[i].k, idx, a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256 src = simde_test_x86_random_f32x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256i idx = simde_test_x86_random_i32x8(); simde__m256 a = simde_test_x86_random_f32x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256 r = simde_mm256_mask_permutexvar_ps(src, k, idx, a); simde_test_x86_write_f32x8(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_maskz_permutexvar_ps(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde__mmask8 k; const int32_t idx[8]; const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[8] = { { UINT8_C( 70), { -INT32_C( 451449221), INT32_C( 638291178), -INT32_C( 1683155451), INT32_C( 1763935515), -INT32_C( 1057453855), INT32_C( 204563644), INT32_C( 641593323), INT32_C( 1265392848) }, { SIMDE_FLOAT32_C( -564.32), SIMDE_FLOAT32_C( -573.78), SIMDE_FLOAT32_C( 632.73), SIMDE_FLOAT32_C( 965.26), SIMDE_FLOAT32_C( 840.88), SIMDE_FLOAT32_C( -46.96), SIMDE_FLOAT32_C( 249.88), SIMDE_FLOAT32_C( 458.77) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 632.73), SIMDE_FLOAT32_C( -46.96), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 965.26), SIMDE_FLOAT32_C( 0.00) } }, { UINT8_C( 81), { INT32_C( 225292424), INT32_C( 1542379219), INT32_C( 840478414), -INT32_C( 836950817), -INT32_C( 1113701279), INT32_C( 881322416), INT32_C( 1413692441), -INT32_C( 1616488425) }, { SIMDE_FLOAT32_C( -724.50), SIMDE_FLOAT32_C( 39.62), SIMDE_FLOAT32_C( 808.79), SIMDE_FLOAT32_C( -136.37), SIMDE_FLOAT32_C( 834.12), SIMDE_FLOAT32_C( 972.60), SIMDE_FLOAT32_C( -952.76), SIMDE_FLOAT32_C( -309.86) }, { SIMDE_FLOAT32_C( -724.50), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 39.62), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 39.62), SIMDE_FLOAT32_C( 0.00) } }, { UINT8_C( 73), { INT32_C( 1932061263), INT32_C( 1272248071), INT32_C( 2147258773), INT32_C( 1436037145), -INT32_C( 865211021), -INT32_C( 1496052589), INT32_C( 1401925817), INT32_C( 932988648) }, { SIMDE_FLOAT32_C( 220.62), SIMDE_FLOAT32_C( -582.61), SIMDE_FLOAT32_C( -37.83), SIMDE_FLOAT32_C( 328.48), SIMDE_FLOAT32_C( -771.08), SIMDE_FLOAT32_C( -354.91), SIMDE_FLOAT32_C( -96.61), SIMDE_FLOAT32_C( -734.93) }, { SIMDE_FLOAT32_C( -734.93), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -582.61), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -582.61), SIMDE_FLOAT32_C( 0.00) } }, { UINT8_C( 17), { -INT32_C( 1356148609), INT32_C( 1445166953), -INT32_C( 102109204), INT32_C( 1957859267), INT32_C( 1683752222), INT32_C( 1737855906), -INT32_C( 1104949954), -INT32_C( 791710640) }, { SIMDE_FLOAT32_C( 550.64), SIMDE_FLOAT32_C( 877.66), SIMDE_FLOAT32_C( 194.12), SIMDE_FLOAT32_C( 80.38), SIMDE_FLOAT32_C( 546.98), SIMDE_FLOAT32_C( -364.43), SIMDE_FLOAT32_C( -575.59), SIMDE_FLOAT32_C( 188.77) }, { SIMDE_FLOAT32_C( 188.77), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -575.59), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { UINT8_C(145), { INT32_C( 2085969645), INT32_C( 362465552), -INT32_C( 1195901403), INT32_C( 1626807955), -INT32_C( 1229867711), INT32_C( 2113634692), -INT32_C( 1544181504), -INT32_C( 1707844947) }, { SIMDE_FLOAT32_C( 669.10), SIMDE_FLOAT32_C( 62.35), SIMDE_FLOAT32_C( 322.64), SIMDE_FLOAT32_C( 659.77), SIMDE_FLOAT32_C( -547.87), SIMDE_FLOAT32_C( -17.71), SIMDE_FLOAT32_C( -408.07), SIMDE_FLOAT32_C( 710.86) }, { SIMDE_FLOAT32_C( -17.71), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 62.35), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -17.71) } }, { UINT8_C(174), { -INT32_C( 1270730603), -INT32_C( 587881945), -INT32_C( 731862189), INT32_C( 1339415974), INT32_C( 771520722), INT32_C( 921015980), INT32_C( 1552461484), INT32_C( 554369164) }, { SIMDE_FLOAT32_C( -536.25), SIMDE_FLOAT32_C( 201.89), SIMDE_FLOAT32_C( 308.61), SIMDE_FLOAT32_C( -264.78), SIMDE_FLOAT32_C( -918.21), SIMDE_FLOAT32_C( 87.10), SIMDE_FLOAT32_C( 915.83), SIMDE_FLOAT32_C( 129.09) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 129.09), SIMDE_FLOAT32_C( -264.78), SIMDE_FLOAT32_C( 915.83), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -918.21), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -918.21) } }, { UINT8_C(118), { INT32_C( 1880954771), INT32_C( 1682074860), -INT32_C( 15634583), INT32_C( 61622101), -INT32_C( 812709681), INT32_C( 1627369491), INT32_C( 1364219526), INT32_C( 482841481) }, { SIMDE_FLOAT32_C( -554.85), SIMDE_FLOAT32_C( 513.70), SIMDE_FLOAT32_C( -876.79), SIMDE_FLOAT32_C( 468.50), SIMDE_FLOAT32_C( -252.10), SIMDE_FLOAT32_C( -307.53), SIMDE_FLOAT32_C( 168.50), SIMDE_FLOAT32_C( -616.11) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -252.10), SIMDE_FLOAT32_C( 513.70), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -616.11), SIMDE_FLOAT32_C( 468.50), SIMDE_FLOAT32_C( 168.50), SIMDE_FLOAT32_C( 0.00) } }, { UINT8_C( 62), { INT32_C( 1402190092), INT32_C( 1847826021), -INT32_C( 696126939), INT32_C( 1214046962), -INT32_C( 976048846), INT32_C( 1500507764), INT32_C( 1252656763), INT32_C( 243819522) }, { SIMDE_FLOAT32_C( 673.48), SIMDE_FLOAT32_C( -869.39), SIMDE_FLOAT32_C( -10.67), SIMDE_FLOAT32_C( -6.27), SIMDE_FLOAT32_C( 502.41), SIMDE_FLOAT32_C( -652.01), SIMDE_FLOAT32_C( 919.28), SIMDE_FLOAT32_C( -434.77) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -652.01), SIMDE_FLOAT32_C( -652.01), SIMDE_FLOAT32_C( -10.67), SIMDE_FLOAT32_C( -10.67), SIMDE_FLOAT32_C( 502.41), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i idx = simde_mm256_loadu_epi32(test_vec[i].idx); simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_maskz_permutexvar_ps(test_vec[i].k, idx, a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256i idx = simde_test_x86_random_i32x8(); simde__m256 a = simde_test_x86_random_f32x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256 r = simde_mm256_maskz_permutexvar_ps(k, idx, a); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x8(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_permutexvar_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t idx[32]; const int16_t a[32]; const int16_t r[32]; } test_vec[] = { { { -INT16_C( 6047), INT16_C( 25507), INT16_C( 23210), INT16_C( 31289), -INT16_C( 8917), INT16_C( 16161), INT16_C( 31147), -INT16_C( 31366), -INT16_C( 14183), -INT16_C( 15429), -INT16_C( 20721), INT16_C( 11147), INT16_C( 6367), -INT16_C( 21463), -INT16_C( 4483), -INT16_C( 8577), INT16_C( 8918), -INT16_C( 32702), INT16_C( 31613), -INT16_C( 22278), INT16_C( 7256), INT16_C( 999), INT16_C( 24981), INT16_C( 11912), INT16_C( 17450), INT16_C( 14833), INT16_C( 31987), -INT16_C( 11676), -INT16_C( 29036), INT16_C( 4734), -INT16_C( 644), INT16_C( 21232) }, { INT16_C( 12832), -INT16_C( 25134), -INT16_C( 12883), INT16_C( 1349), INT16_C( 11497), INT16_C( 32264), -INT16_C( 28275), -INT16_C( 18516), -INT16_C( 25131), -INT16_C( 14095), INT16_C( 21785), -INT16_C( 21094), INT16_C( 6371), INT16_C( 24511), -INT16_C( 20459), INT16_C( 13745), -INT16_C( 31518), -INT16_C( 28462), INT16_C( 5969), INT16_C( 14997), -INT16_C( 25021), -INT16_C( 11848), INT16_C( 25647), INT16_C( 1160), INT16_C( 30977), INT16_C( 6860), INT16_C( 26319), -INT16_C( 19769), -INT16_C( 30850), -INT16_C( 27886), -INT16_C( 15561), INT16_C( 6601) }, { -INT16_C( 25134), INT16_C( 1349), INT16_C( 21785), INT16_C( 6860), -INT16_C( 21094), -INT16_C( 25134), -INT16_C( 21094), INT16_C( 26319), INT16_C( 6860), -INT16_C( 19769), INT16_C( 13745), -INT16_C( 21094), INT16_C( 6601), -INT16_C( 14095), -INT16_C( 27886), INT16_C( 6601), INT16_C( 25647), -INT16_C( 12883), -INT16_C( 27886), INT16_C( 26319), INT16_C( 30977), -INT16_C( 18516), -INT16_C( 11848), -INT16_C( 25131), INT16_C( 21785), -INT16_C( 28462), INT16_C( 14997), INT16_C( 11497), -INT16_C( 25021), -INT16_C( 15561), -INT16_C( 30850), -INT16_C( 31518) } }, { { -INT16_C( 25785), -INT16_C( 26455), INT16_C( 16307), -INT16_C( 2350), -INT16_C( 29987), INT16_C( 3271), INT16_C( 20718), -INT16_C( 4336), -INT16_C( 9015), -INT16_C( 26615), -INT16_C( 11966), -INT16_C( 16309), INT16_C( 23896), -INT16_C( 28845), INT16_C( 7200), INT16_C( 26792), INT16_C( 21176), INT16_C( 27392), -INT16_C( 11375), INT16_C( 28257), INT16_C( 10589), INT16_C( 19578), -INT16_C( 30087), INT16_C( 16955), INT16_C( 17766), -INT16_C( 22309), INT16_C( 9750), INT16_C( 28264), -INT16_C( 17533), -INT16_C( 23555), -INT16_C( 23080), -INT16_C( 28661) }, { INT16_C( 3319), -INT16_C( 30469), INT16_C( 23775), INT16_C( 15606), INT16_C( 28805), -INT16_C( 376), -INT16_C( 15110), INT16_C( 24641), INT16_C( 7177), INT16_C( 7944), INT16_C( 28738), -INT16_C( 14963), -INT16_C( 30164), INT16_C( 1128), INT16_C( 29743), INT16_C( 10132), -INT16_C( 28800), INT16_C( 24495), -INT16_C( 22805), INT16_C( 29083), INT16_C( 9238), INT16_C( 4463), -INT16_C( 20248), -INT16_C( 3727), INT16_C( 31436), INT16_C( 3600), -INT16_C( 25110), INT16_C( 5843), INT16_C( 15399), INT16_C( 22042), -INT16_C( 20816), INT16_C( 12413) }, { INT16_C( 24641), INT16_C( 7944), INT16_C( 29083), -INT16_C( 22805), INT16_C( 22042), INT16_C( 24641), INT16_C( 29743), -INT16_C( 28800), INT16_C( 7944), INT16_C( 7944), INT16_C( 23775), -INT16_C( 14963), INT16_C( 31436), INT16_C( 29083), INT16_C( 3319), INT16_C( 7177), INT16_C( 31436), INT16_C( 3319), INT16_C( 24495), -INT16_C( 30469), INT16_C( 22042), -INT16_C( 25110), INT16_C( 3600), INT16_C( 5843), -INT16_C( 15110), INT16_C( 5843), -INT16_C( 20248), INT16_C( 7177), INT16_C( 15606), INT16_C( 22042), INT16_C( 31436), -INT16_C( 14963) } }, { { INT16_C( 11581), INT16_C( 10639), INT16_C( 10963), -INT16_C( 5734), INT16_C( 2382), INT16_C( 14074), INT16_C( 27834), -INT16_C( 31193), INT16_C( 14310), -INT16_C( 12139), INT16_C( 26836), -INT16_C( 1049), INT16_C( 420), INT16_C( 21586), -INT16_C( 12368), -INT16_C( 4732), INT16_C( 5116), -INT16_C( 12522), -INT16_C( 20418), -INT16_C( 29511), -INT16_C( 19526), INT16_C( 29891), -INT16_C( 5601), INT16_C( 1530), -INT16_C( 28894), -INT16_C( 2346), -INT16_C( 16904), -INT16_C( 25358), INT16_C( 17598), INT16_C( 28401), INT16_C( 29971), INT16_C( 4188) }, { INT16_C( 29321), -INT16_C( 14369), -INT16_C( 26589), -INT16_C( 8877), INT16_C( 5708), INT16_C( 27473), INT16_C( 19201), INT16_C( 9073), INT16_C( 18395), -INT16_C( 11495), INT16_C( 2820), -INT16_C( 15761), INT16_C( 24655), INT16_C( 25393), -INT16_C( 29226), INT16_C( 24435), INT16_C( 21247), INT16_C( 8742), INT16_C( 31211), INT16_C( 14335), INT16_C( 20624), -INT16_C( 28254), INT16_C( 5020), INT16_C( 30644), -INT16_C( 12966), INT16_C( 24138), -INT16_C( 17959), INT16_C( 10273), INT16_C( 21018), -INT16_C( 3957), -INT16_C( 289), -INT16_C( 8625) }, { -INT16_C( 3957), INT16_C( 24435), INT16_C( 14335), -INT16_C( 17959), -INT16_C( 29226), -INT16_C( 17959), -INT16_C( 17959), INT16_C( 9073), INT16_C( 19201), -INT16_C( 28254), INT16_C( 20624), INT16_C( 9073), INT16_C( 5708), INT16_C( 31211), INT16_C( 21247), INT16_C( 5708), INT16_C( 21018), INT16_C( 5020), -INT16_C( 289), INT16_C( 24138), -INT16_C( 17959), -INT16_C( 8877), -INT16_C( 8625), -INT16_C( 17959), -INT16_C( 26589), INT16_C( 5020), -INT16_C( 12966), INT16_C( 31211), -INT16_C( 289), INT16_C( 8742), INT16_C( 14335), INT16_C( 21018) } }, { { INT16_C( 30033), INT16_C( 15361), INT16_C( 238), INT16_C( 32371), INT16_C( 5457), -INT16_C( 4849), -INT16_C( 15575), -INT16_C( 31900), -INT16_C( 20847), INT16_C( 27362), INT16_C( 871), -INT16_C( 32366), INT16_C( 7765), INT16_C( 13425), -INT16_C( 16356), INT16_C( 27922), INT16_C( 4917), INT16_C( 9385), INT16_C( 7188), INT16_C( 26018), -INT16_C( 19918), INT16_C( 23378), -INT16_C( 18827), INT16_C( 1758), -INT16_C( 16284), -INT16_C( 13456), INT16_C( 963), INT16_C( 6221), -INT16_C( 16863), INT16_C( 15692), INT16_C( 24447), -INT16_C( 19285) }, { INT16_C( 21618), -INT16_C( 31016), INT16_C( 31601), -INT16_C( 23573), INT16_C( 15661), -INT16_C( 23810), -INT16_C( 8973), INT16_C( 22441), INT16_C( 6557), INT16_C( 24611), INT16_C( 28700), INT16_C( 15737), -INT16_C( 15058), -INT16_C( 21125), INT16_C( 9764), -INT16_C( 26782), INT16_C( 14970), -INT16_C( 5347), INT16_C( 2485), -INT16_C( 7538), -INT16_C( 29626), INT16_C( 14981), INT16_C( 11881), INT16_C( 1681), -INT16_C( 19385), INT16_C( 25702), -INT16_C( 8412), INT16_C( 21409), INT16_C( 7333), -INT16_C( 14080), INT16_C( 25154), -INT16_C( 17056) }, { -INT16_C( 5347), -INT16_C( 31016), INT16_C( 9764), -INT16_C( 7538), -INT16_C( 5347), -INT16_C( 26782), INT16_C( 24611), INT16_C( 15661), -INT16_C( 5347), INT16_C( 31601), INT16_C( 22441), INT16_C( 2485), INT16_C( 14981), -INT16_C( 5347), INT16_C( 7333), INT16_C( 2485), INT16_C( 14981), INT16_C( 24611), -INT16_C( 29626), INT16_C( 31601), INT16_C( 2485), INT16_C( 2485), INT16_C( 14981), INT16_C( 25154), INT16_C( 15661), INT16_C( 14970), -INT16_C( 23573), -INT16_C( 21125), -INT16_C( 31016), -INT16_C( 15058), -INT16_C( 17056), INT16_C( 15737) } }, { { INT16_C( 32413), INT16_C( 21160), INT16_C( 14215), -INT16_C( 13003), -INT16_C( 17725), INT16_C( 11271), -INT16_C( 26136), INT16_C( 12082), -INT16_C( 26291), INT16_C( 29331), INT16_C( 13688), INT16_C( 7621), -INT16_C( 15023), -INT16_C( 27417), INT16_C( 18216), -INT16_C( 15023), -INT16_C( 1595), INT16_C( 19479), INT16_C( 19504), -INT16_C( 3046), INT16_C( 8454), -INT16_C( 4576), INT16_C( 21434), INT16_C( 2078), -INT16_C( 19988), INT16_C( 25722), INT16_C( 16358), INT16_C( 14466), INT16_C( 26884), INT16_C( 11468), INT16_C( 7600), INT16_C( 30449) }, { INT16_C( 2326), INT16_C( 18370), -INT16_C( 9131), INT16_C( 23611), INT16_C( 23550), -INT16_C( 18358), INT16_C( 26798), -INT16_C( 25920), INT16_C( 14874), INT16_C( 255), -INT16_C( 32391), INT16_C( 32312), INT16_C( 1258), -INT16_C( 25942), -INT16_C( 25567), INT16_C( 14352), -INT16_C( 11355), -INT16_C( 1409), -INT16_C( 17745), -INT16_C( 21162), -INT16_C( 24299), -INT16_C( 15258), INT16_C( 9737), INT16_C( 9054), INT16_C( 23905), -INT16_C( 9692), INT16_C( 23774), -INT16_C( 14248), INT16_C( 865), -INT16_C( 32157), INT16_C( 29599), INT16_C( 17594) }, { -INT16_C( 32157), INT16_C( 14874), -INT16_C( 25920), -INT16_C( 15258), INT16_C( 23611), -INT16_C( 25920), INT16_C( 14874), -INT16_C( 17745), -INT16_C( 25942), -INT16_C( 21162), INT16_C( 23905), -INT16_C( 18358), -INT16_C( 1409), -INT16_C( 25920), INT16_C( 14874), -INT16_C( 1409), -INT16_C( 18358), INT16_C( 9054), -INT16_C( 11355), INT16_C( 23774), INT16_C( 26798), INT16_C( 2326), INT16_C( 23774), INT16_C( 29599), INT16_C( 1258), INT16_C( 23774), INT16_C( 26798), -INT16_C( 9131), INT16_C( 23550), INT16_C( 1258), -INT16_C( 11355), -INT16_C( 1409) } }, { { INT16_C( 14662), -INT16_C( 2498), -INT16_C( 27149), INT16_C( 2467), INT16_C( 2358), INT16_C( 16333), INT16_C( 11056), -INT16_C( 28317), -INT16_C( 30839), INT16_C( 26475), -INT16_C( 15133), INT16_C( 17456), -INT16_C( 27705), INT16_C( 26311), -INT16_C( 32506), INT16_C( 19882), -INT16_C( 5957), -INT16_C( 20925), -INT16_C( 6531), -INT16_C( 19529), -INT16_C( 31504), INT16_C( 8435), INT16_C( 22192), INT16_C( 14769), INT16_C( 7389), -INT16_C( 16224), -INT16_C( 12064), -INT16_C( 22779), -INT16_C( 13213), INT16_C( 27149), -INT16_C( 18611), INT16_C( 2231) }, { -INT16_C( 1376), INT16_C( 7607), INT16_C( 28384), -INT16_C( 12079), -INT16_C( 15117), -INT16_C( 23568), -INT16_C( 24294), -INT16_C( 2084), INT16_C( 31934), -INT16_C( 24905), -INT16_C( 17331), -INT16_C( 20410), INT16_C( 21384), -INT16_C( 10726), -INT16_C( 12021), -INT16_C( 21538), -INT16_C( 27189), -INT16_C( 21304), -INT16_C( 26364), -INT16_C( 2180), INT16_C( 27997), INT16_C( 30618), INT16_C( 30222), -INT16_C( 13202), INT16_C( 9970), INT16_C( 16235), -INT16_C( 19998), INT16_C( 27632), INT16_C( 2564), INT16_C( 3905), INT16_C( 8156), -INT16_C( 22598) }, { -INT16_C( 24294), INT16_C( 8156), -INT16_C( 2180), -INT16_C( 12079), INT16_C( 30222), -INT16_C( 10726), -INT16_C( 27189), -INT16_C( 12079), -INT16_C( 24905), -INT16_C( 20410), -INT16_C( 12079), -INT16_C( 27189), -INT16_C( 2084), -INT16_C( 2084), -INT16_C( 24294), -INT16_C( 17331), INT16_C( 27632), -INT16_C( 12079), INT16_C( 3905), -INT16_C( 13202), -INT16_C( 27189), -INT16_C( 2180), -INT16_C( 27189), -INT16_C( 21304), INT16_C( 3905), -INT16_C( 1376), -INT16_C( 1376), -INT16_C( 23568), -INT16_C( 12079), -INT16_C( 10726), -INT16_C( 10726), -INT16_C( 13202) } }, { { -INT16_C( 31819), -INT16_C( 18093), -INT16_C( 12260), INT16_C( 31408), INT16_C( 19005), INT16_C( 19441), INT16_C( 24768), -INT16_C( 19944), -INT16_C( 31866), INT16_C( 26866), -INT16_C( 7628), INT16_C( 14547), INT16_C( 5356), -INT16_C( 14264), INT16_C( 564), -INT16_C( 5776), -INT16_C( 15483), -INT16_C( 23902), INT16_C( 21139), -INT16_C( 12260), INT16_C( 3484), INT16_C( 23580), INT16_C( 13421), -INT16_C( 3314), INT16_C( 183), -INT16_C( 5284), INT16_C( 12258), -INT16_C( 12509), INT16_C( 27460), INT16_C( 30871), INT16_C( 1902), -INT16_C( 3231) }, { INT16_C( 971), INT16_C( 24213), -INT16_C( 20139), -INT16_C( 3793), INT16_C( 19391), INT16_C( 11341), INT16_C( 23423), INT16_C( 13856), INT16_C( 31836), INT16_C( 15905), INT16_C( 17579), -INT16_C( 4339), -INT16_C( 23120), INT16_C( 7783), -INT16_C( 14164), INT16_C( 30481), -INT16_C( 22581), INT16_C( 8406), INT16_C( 1368), INT16_C( 5905), INT16_C( 24144), -INT16_C( 12476), INT16_C( 25786), INT16_C( 5637), INT16_C( 9952), -INT16_C( 29868), INT16_C( 25194), INT16_C( 6779), -INT16_C( 7673), -INT16_C( 19656), INT16_C( 19115), INT16_C( 30251) }, { -INT16_C( 12476), INT16_C( 5905), -INT16_C( 7673), -INT16_C( 22581), -INT16_C( 19656), INT16_C( 8406), INT16_C( 971), INT16_C( 9952), INT16_C( 23423), INT16_C( 1368), INT16_C( 24144), INT16_C( 5905), -INT16_C( 23120), INT16_C( 31836), INT16_C( 24144), -INT16_C( 22581), INT16_C( 11341), -INT16_C( 20139), INT16_C( 5905), -INT16_C( 7673), -INT16_C( 7673), -INT16_C( 7673), INT16_C( 7783), -INT16_C( 14164), INT16_C( 5637), -INT16_C( 7673), -INT16_C( 20139), -INT16_C( 3793), INT16_C( 19391), INT16_C( 5637), -INT16_C( 14164), INT16_C( 24213) } }, { { INT16_C( 497), INT16_C( 18839), -INT16_C( 22522), INT16_C( 22113), -INT16_C( 23289), -INT16_C( 16091), INT16_C( 10761), -INT16_C( 5673), INT16_C( 11088), -INT16_C( 17804), -INT16_C( 4211), -INT16_C( 27435), INT16_C( 3538), INT16_C( 32072), INT16_C( 29527), INT16_C( 18675), -INT16_C( 30092), INT16_C( 31378), -INT16_C( 3277), INT16_C( 15056), -INT16_C( 2664), -INT16_C( 24069), -INT16_C( 11745), INT16_C( 28554), -INT16_C( 259), -INT16_C( 29911), -INT16_C( 274), -INT16_C( 16353), INT16_C( 26380), INT16_C( 25405), INT16_C( 12506), INT16_C( 20140) }, { INT16_C( 16059), -INT16_C( 4408), -INT16_C( 26575), -INT16_C( 14040), INT16_C( 9101), -INT16_C( 21398), -INT16_C( 2827), -INT16_C( 3557), INT16_C( 17906), -INT16_C( 8067), -INT16_C( 25277), INT16_C( 20384), -INT16_C( 8956), -INT16_C( 8269), INT16_C( 24334), -INT16_C( 14035), -INT16_C( 2403), -INT16_C( 12617), -INT16_C( 8306), INT16_C( 7319), INT16_C( 258), -INT16_C( 2104), -INT16_C( 6923), -INT16_C( 6167), INT16_C( 26409), INT16_C( 27848), INT16_C( 26628), INT16_C( 2236), INT16_C( 28486), INT16_C( 21735), INT16_C( 5582), INT16_C( 27421) }, { -INT16_C( 12617), -INT16_C( 6167), -INT16_C( 2827), -INT16_C( 4408), -INT16_C( 3557), -INT16_C( 21398), -INT16_C( 8067), -INT16_C( 6167), -INT16_C( 2403), INT16_C( 258), -INT16_C( 8269), -INT16_C( 2104), -INT16_C( 8306), INT16_C( 17906), -INT16_C( 6167), INT16_C( 7319), INT16_C( 258), -INT16_C( 8306), INT16_C( 7319), -INT16_C( 2403), INT16_C( 26409), INT16_C( 2236), INT16_C( 27421), -INT16_C( 25277), INT16_C( 21735), -INT16_C( 8067), INT16_C( 24334), INT16_C( 27421), -INT16_C( 8956), INT16_C( 21735), INT16_C( 26628), -INT16_C( 8956) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i idx = simde_mm512_loadu_epi16(test_vec[i].idx); simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i r = simde_mm512_permutexvar_epi16(idx, a); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i idx = simde_test_x86_random_i16x32(); simde__m512i a = simde_test_x86_random_i16x32(); simde__m512i r = simde_mm512_permutexvar_epi16(idx, a); simde_test_x86_write_i16x32(2, idx, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x32(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_mask_permutexvar_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t src[32]; const simde__mmask32 k; const int16_t idx[32]; const int16_t a[32]; const int16_t r[32]; } test_vec[] = { { { -INT16_C( 11253), -INT16_C( 26311), -INT16_C( 12109), -INT16_C( 19019), INT16_C( 32465), -INT16_C( 14676), -INT16_C( 27294), -INT16_C( 29779), INT16_C( 30204), INT16_C( 247), -INT16_C( 19490), INT16_C( 9225), -INT16_C( 4062), -INT16_C( 3976), -INT16_C( 27387), INT16_C( 4187), -INT16_C( 27543), INT16_C( 7338), INT16_C( 24420), INT16_C( 13777), INT16_C( 32221), INT16_C( 16379), -INT16_C( 22254), INT16_C( 4042), -INT16_C( 15842), -INT16_C( 1009), INT16_C( 6261), -INT16_C( 26592), -INT16_C( 26615), INT16_C( 3720), -INT16_C( 7123), -INT16_C( 27105) }, UINT32_C(3719481720), { -INT16_C( 31960), INT16_C( 1554), INT16_C( 3584), INT16_C( 4933), INT16_C( 4279), -INT16_C( 10974), INT16_C( 12754), INT16_C( 18386), -INT16_C( 3510), INT16_C( 21471), INT16_C( 26763), -INT16_C( 18335), -INT16_C( 32692), -INT16_C( 15281), INT16_C( 329), INT16_C( 29345), -INT16_C( 19323), -INT16_C( 31368), -INT16_C( 16958), INT16_C( 31128), -INT16_C( 17715), -INT16_C( 24754), INT16_C( 8428), INT16_C( 14055), -INT16_C( 14829), -INT16_C( 24951), -INT16_C( 5586), INT16_C( 31318), -INT16_C( 23189), -INT16_C( 19393), -INT16_C( 8025), INT16_C( 11302) }, { -INT16_C( 24940), INT16_C( 22193), INT16_C( 19036), INT16_C( 10703), INT16_C( 7684), -INT16_C( 3895), -INT16_C( 20418), INT16_C( 20774), -INT16_C( 20618), -INT16_C( 23057), INT16_C( 18074), INT16_C( 1311), INT16_C( 24299), -INT16_C( 27975), -INT16_C( 8129), -INT16_C( 11330), INT16_C( 28798), -INT16_C( 9686), -INT16_C( 1606), -INT16_C( 16892), -INT16_C( 13033), INT16_C( 22191), -INT16_C( 10883), -INT16_C( 3161), -INT16_C( 26747), INT16_C( 8088), -INT16_C( 18211), -INT16_C( 14300), -INT16_C( 8938), INT16_C( 21851), INT16_C( 6589), INT16_C( 15401) }, { -INT16_C( 11253), -INT16_C( 26311), -INT16_C( 12109), -INT16_C( 3895), -INT16_C( 3161), INT16_C( 19036), -INT16_C( 1606), -INT16_C( 29779), INT16_C( 18074), INT16_C( 247), -INT16_C( 19490), INT16_C( 22193), -INT16_C( 4062), -INT16_C( 3976), -INT16_C( 23057), INT16_C( 22193), -INT16_C( 27543), -INT16_C( 26747), INT16_C( 24420), INT16_C( 13777), -INT16_C( 27975), -INT16_C( 8129), -INT16_C( 22254), INT16_C( 20774), -INT16_C( 16892), -INT16_C( 1009), -INT16_C( 8129), -INT16_C( 10883), INT16_C( 1311), INT16_C( 3720), INT16_C( 20774), -INT16_C( 20418) } }, { { INT16_C( 21385), INT16_C( 17174), INT16_C( 6732), INT16_C( 25602), -INT16_C( 19993), INT16_C( 25786), INT16_C( 24966), INT16_C( 2904), -INT16_C( 3848), -INT16_C( 10966), INT16_C( 20136), -INT16_C( 16482), -INT16_C( 1748), -INT16_C( 5868), INT16_C( 15634), -INT16_C( 25563), INT16_C( 15504), -INT16_C( 8737), -INT16_C( 7850), INT16_C( 15937), -INT16_C( 1134), INT16_C( 6562), -INT16_C( 1444), INT16_C( 21796), INT16_C( 20459), -INT16_C( 27862), -INT16_C( 14179), -INT16_C( 13998), INT16_C( 26561), -INT16_C( 11085), -INT16_C( 10076), INT16_C( 13680) }, UINT32_C(1796362004), { INT16_C( 21297), -INT16_C( 15447), INT16_C( 19278), -INT16_C( 21796), INT16_C( 326), INT16_C( 12799), INT16_C( 10832), -INT16_C( 4668), INT16_C( 6130), -INT16_C( 19273), INT16_C( 27262), INT16_C( 8840), -INT16_C( 1982), INT16_C( 22359), INT16_C( 26951), INT16_C( 30914), INT16_C( 27580), INT16_C( 2620), INT16_C( 6326), -INT16_C( 843), -INT16_C( 19431), INT16_C( 26925), -INT16_C( 3362), -INT16_C( 11945), INT16_C( 3593), -INT16_C( 30843), INT16_C( 3448), -INT16_C( 17751), INT16_C( 261), INT16_C( 19473), -INT16_C( 11414), INT16_C( 10181) }, { INT16_C( 318), -INT16_C( 2767), -INT16_C( 6631), INT16_C( 13297), INT16_C( 8091), INT16_C( 31132), -INT16_C( 3311), INT16_C( 6730), -INT16_C( 12543), INT16_C( 31137), INT16_C( 19164), -INT16_C( 7884), INT16_C( 17739), -INT16_C( 18898), -INT16_C( 3303), INT16_C( 22493), INT16_C( 3828), INT16_C( 3404), INT16_C( 16117), -INT16_C( 28608), -INT16_C( 8867), INT16_C( 28169), INT16_C( 21712), -INT16_C( 11640), INT16_C( 10531), INT16_C( 75), INT16_C( 32627), -INT16_C( 16415), INT16_C( 4037), -INT16_C( 8587), INT16_C( 20994), -INT16_C( 2507) }, { INT16_C( 21385), INT16_C( 17174), -INT16_C( 3303), INT16_C( 25602), -INT16_C( 3311), INT16_C( 25786), INT16_C( 24966), INT16_C( 2904), INT16_C( 16117), -INT16_C( 11640), INT16_C( 20994), -INT16_C( 12543), -INT16_C( 1748), -INT16_C( 5868), INT16_C( 6730), -INT16_C( 25563), INT16_C( 15504), INT16_C( 4037), -INT16_C( 7850), INT16_C( 15937), INT16_C( 75), INT16_C( 6562), -INT16_C( 1444), INT16_C( 21796), INT16_C( 31137), INT16_C( 31132), -INT16_C( 14179), INT16_C( 31137), INT16_C( 26561), INT16_C( 3404), INT16_C( 19164), INT16_C( 13680) } }, { { -INT16_C( 32160), INT16_C( 21764), INT16_C( 17600), INT16_C( 7653), -INT16_C( 4319), -INT16_C( 3445), INT16_C( 4931), INT16_C( 26308), INT16_C( 3900), -INT16_C( 20634), INT16_C( 18575), INT16_C( 21614), -INT16_C( 7337), INT16_C( 23090), INT16_C( 26421), -INT16_C( 27056), INT16_C( 21737), -INT16_C( 22037), -INT16_C( 11879), -INT16_C( 17722), INT16_C( 20928), INT16_C( 940), INT16_C( 28772), -INT16_C( 24471), -INT16_C( 12160), INT16_C( 3920), -INT16_C( 16872), INT16_C( 28515), -INT16_C( 27230), -INT16_C( 10295), INT16_C( 6908), -INT16_C( 6547) }, UINT32_C( 126835054), { INT16_C( 22058), -INT16_C( 5438), INT16_C( 28327), INT16_C( 3309), INT16_C( 22239), INT16_C( 24492), -INT16_C( 986), INT16_C( 15982), -INT16_C( 11845), INT16_C( 23982), INT16_C( 30566), INT16_C( 25140), -INT16_C( 23919), INT16_C( 72), -INT16_C( 9989), INT16_C( 9479), -INT16_C( 14034), -INT16_C( 10993), -INT16_C( 968), INT16_C( 6113), -INT16_C( 29102), INT16_C( 31094), -INT16_C( 7030), INT16_C( 17847), INT16_C( 26037), INT16_C( 7074), -INT16_C( 10275), INT16_C( 28285), -INT16_C( 14727), INT16_C( 29806), INT16_C( 30366), -INT16_C( 13159) }, { -INT16_C( 22465), INT16_C( 30625), -INT16_C( 31836), -INT16_C( 2418), INT16_C( 1041), -INT16_C( 25745), INT16_C( 10216), -INT16_C( 25119), -INT16_C( 31860), INT16_C( 27064), INT16_C( 13914), -INT16_C( 11304), INT16_C( 18172), -INT16_C( 26041), -INT16_C( 8004), -INT16_C( 922), INT16_C( 1928), INT16_C( 11379), INT16_C( 650), -INT16_C( 25821), -INT16_C( 28154), -INT16_C( 4297), INT16_C( 6329), INT16_C( 18060), INT16_C( 17819), -INT16_C( 2385), -INT16_C( 30853), INT16_C( 30665), INT16_C( 4558), -INT16_C( 30191), INT16_C( 30705), INT16_C( 31366) }, { -INT16_C( 32160), -INT16_C( 31836), -INT16_C( 25119), -INT16_C( 26041), -INT16_C( 4319), INT16_C( 18172), INT16_C( 10216), INT16_C( 26308), INT16_C( 30665), -INT16_C( 20634), INT16_C( 18575), -INT16_C( 28154), INT16_C( 11379), INT16_C( 23090), INT16_C( 30665), -INT16_C( 27056), -INT16_C( 8004), -INT16_C( 922), INT16_C( 17819), INT16_C( 30625), INT16_C( 20928), INT16_C( 940), INT16_C( 28772), INT16_C( 18060), -INT16_C( 4297), -INT16_C( 31836), -INT16_C( 30191), INT16_C( 28515), -INT16_C( 27230), -INT16_C( 10295), INT16_C( 6908), -INT16_C( 6547) } }, { { -INT16_C( 1410), INT16_C( 2470), -INT16_C( 13828), INT16_C( 676), -INT16_C( 9380), INT16_C( 5617), INT16_C( 32499), -INT16_C( 28837), INT16_C( 3011), INT16_C( 16005), INT16_C( 20114), INT16_C( 24757), -INT16_C( 14753), INT16_C( 20971), INT16_C( 28989), -INT16_C( 17461), INT16_C( 29035), INT16_C( 26564), INT16_C( 26939), -INT16_C( 26774), INT16_C( 23364), INT16_C( 14508), INT16_C( 2265), -INT16_C( 25401), INT16_C( 19475), -INT16_C( 23078), -INT16_C( 28774), -INT16_C( 1530), -INT16_C( 3755), -INT16_C( 28085), INT16_C( 5730), -INT16_C( 12722) }, UINT32_C(3258258055), { -INT16_C( 24709), -INT16_C( 16295), INT16_C( 1787), -INT16_C( 11016), -INT16_C( 16626), INT16_C( 8561), INT16_C( 19211), -INT16_C( 23098), -INT16_C( 13093), INT16_C( 12447), -INT16_C( 5443), INT16_C( 8387), INT16_C( 4352), -INT16_C( 30482), INT16_C( 8995), -INT16_C( 24758), -INT16_C( 23357), -INT16_C( 16801), INT16_C( 22442), -INT16_C( 18286), INT16_C( 790), INT16_C( 8665), -INT16_C( 24753), INT16_C( 10950), INT16_C( 26220), INT16_C( 10586), INT16_C( 7504), INT16_C( 20809), INT16_C( 14126), INT16_C( 21209), INT16_C( 9051), INT16_C( 7921) }, { INT16_C( 20679), INT16_C( 29148), INT16_C( 28327), -INT16_C( 17111), INT16_C( 626), -INT16_C( 15906), -INT16_C( 23390), INT16_C( 3819), INT16_C( 17674), INT16_C( 23351), -INT16_C( 32413), -INT16_C( 28244), -INT16_C( 31304), INT16_C( 5091), -INT16_C( 11096), INT16_C( 28721), INT16_C( 3364), -INT16_C( 13343), INT16_C( 2940), -INT16_C( 4472), INT16_C( 26125), -INT16_C( 20561), -INT16_C( 26101), INT16_C( 5565), -INT16_C( 2593), INT16_C( 17008), INT16_C( 7286), INT16_C( 11988), -INT16_C( 18527), INT16_C( 19010), INT16_C( 29580), -INT16_C( 20294) }, { INT16_C( 11988), INT16_C( 17008), INT16_C( 11988), INT16_C( 676), -INT16_C( 9380), INT16_C( 5617), INT16_C( 32499), -INT16_C( 23390), INT16_C( 3011), -INT16_C( 20294), INT16_C( 20114), INT16_C( 24757), INT16_C( 20679), INT16_C( 20971), INT16_C( 28989), -INT16_C( 17461), -INT16_C( 17111), INT16_C( 26564), -INT16_C( 32413), -INT16_C( 26774), -INT16_C( 26101), INT16_C( 17008), INT16_C( 2265), -INT16_C( 25401), INT16_C( 19475), INT16_C( 7286), -INT16_C( 28774), -INT16_C( 1530), -INT16_C( 3755), -INT16_C( 28085), INT16_C( 11988), -INT16_C( 13343) } }, { { -INT16_C( 25727), -INT16_C( 644), INT16_C( 1190), -INT16_C( 19221), -INT16_C( 26005), INT16_C( 30307), INT16_C( 8500), INT16_C( 5003), -INT16_C( 1002), -INT16_C( 29610), INT16_C( 10776), -INT16_C( 17734), -INT16_C( 799), INT16_C( 27908), -INT16_C( 16784), -INT16_C( 3810), -INT16_C( 26023), INT16_C( 238), -INT16_C( 9826), INT16_C( 2484), INT16_C( 6003), -INT16_C( 22657), INT16_C( 2872), INT16_C( 20154), INT16_C( 4103), INT16_C( 8154), -INT16_C( 27334), INT16_C( 7385), -INT16_C( 8815), INT16_C( 393), -INT16_C( 22629), -INT16_C( 2574) }, UINT32_C(3774210113), { -INT16_C( 22087), INT16_C( 11497), INT16_C( 27072), -INT16_C( 1581), -INT16_C( 29068), INT16_C( 31559), INT16_C( 8862), -INT16_C( 9830), INT16_C( 29879), INT16_C( 18677), INT16_C( 32337), -INT16_C( 4790), INT16_C( 15398), INT16_C( 26594), -INT16_C( 10467), -INT16_C( 10681), INT16_C( 12672), INT16_C( 16387), -INT16_C( 10598), INT16_C( 3641), -INT16_C( 32412), INT16_C( 905), INT16_C( 9123), INT16_C( 23260), -INT16_C( 11881), -INT16_C( 5726), -INT16_C( 5041), INT16_C( 30166), -INT16_C( 18391), INT16_C( 18141), INT16_C( 9359), INT16_C( 3868) }, { INT16_C( 8021), -INT16_C( 4273), -INT16_C( 30218), INT16_C( 23293), -INT16_C( 31222), -INT16_C( 21155), INT16_C( 14762), INT16_C( 16647), -INT16_C( 22262), INT16_C( 23082), INT16_C( 150), -INT16_C( 16433), -INT16_C( 21320), INT16_C( 18181), INT16_C( 8657), INT16_C( 9814), -INT16_C( 22975), INT16_C( 14102), INT16_C( 4911), INT16_C( 14737), -INT16_C( 4198), INT16_C( 17638), -INT16_C( 4824), INT16_C( 13189), -INT16_C( 20330), INT16_C( 11405), INT16_C( 23728), INT16_C( 27115), -INT16_C( 4087), -INT16_C( 9552), INT16_C( 1810), INT16_C( 21248) }, { INT16_C( 11405), -INT16_C( 644), INT16_C( 1190), -INT16_C( 19221), -INT16_C( 26005), INT16_C( 30307), INT16_C( 1810), INT16_C( 5003), -INT16_C( 1002), -INT16_C( 29610), INT16_C( 10776), -INT16_C( 17734), -INT16_C( 799), -INT16_C( 30218), -INT16_C( 9552), INT16_C( 16647), INT16_C( 8021), INT16_C( 238), INT16_C( 23728), INT16_C( 2484), -INT16_C( 31222), INT16_C( 23082), INT16_C( 23293), -INT16_C( 4087), INT16_C( 4103), INT16_C( 8154), -INT16_C( 27334), INT16_C( 7385), -INT16_C( 8815), -INT16_C( 9552), INT16_C( 9814), -INT16_C( 4087) } }, { { INT16_C( 5805), -INT16_C( 9078), INT16_C( 6954), -INT16_C( 15339), -INT16_C( 1270), INT16_C( 13064), -INT16_C( 29208), INT16_C( 32358), -INT16_C( 3267), -INT16_C( 4437), -INT16_C( 27057), INT16_C( 22615), INT16_C( 1927), -INT16_C( 26318), INT16_C( 13070), -INT16_C( 17428), INT16_C( 30281), INT16_C( 29591), -INT16_C( 21359), -INT16_C( 25545), INT16_C( 16295), -INT16_C( 28721), INT16_C( 13773), INT16_C( 2574), -INT16_C( 18136), INT16_C( 30712), INT16_C( 20303), -INT16_C( 10544), INT16_C( 599), INT16_C( 25967), INT16_C( 23349), INT16_C( 32545) }, UINT32_C(1676851409), { INT16_C( 10853), INT16_C( 3327), -INT16_C( 12695), INT16_C( 13980), -INT16_C( 22013), INT16_C( 11073), INT16_C( 14691), -INT16_C( 19806), INT16_C( 29321), -INT16_C( 8055), -INT16_C( 1931), -INT16_C( 21947), INT16_C( 26196), INT16_C( 9513), INT16_C( 7199), -INT16_C( 31608), -INT16_C( 30906), -INT16_C( 20592), INT16_C( 11349), INT16_C( 22758), INT16_C( 10198), INT16_C( 14723), INT16_C( 9824), -INT16_C( 5652), INT16_C( 30104), INT16_C( 3529), INT16_C( 3949), -INT16_C( 15944), -INT16_C( 7819), -INT16_C( 27417), INT16_C( 28669), INT16_C( 17176) }, { -INT16_C( 22025), INT16_C( 19699), -INT16_C( 9771), -INT16_C( 21339), INT16_C( 10240), INT16_C( 24805), -INT16_C( 11954), -INT16_C( 6326), INT16_C( 4934), -INT16_C( 19212), -INT16_C( 21470), -INT16_C( 26507), INT16_C( 23694), -INT16_C( 29908), INT16_C( 17868), -INT16_C( 15409), -INT16_C( 15634), -INT16_C( 15601), -INT16_C( 19301), -INT16_C( 25745), INT16_C( 21981), INT16_C( 11259), INT16_C( 17702), INT16_C( 27922), INT16_C( 1881), INT16_C( 31521), -INT16_C( 26957), INT16_C( 16659), INT16_C( 16627), -INT16_C( 16435), -INT16_C( 25467), INT16_C( 29570) }, { INT16_C( 24805), -INT16_C( 9078), INT16_C( 6954), -INT16_C( 15339), -INT16_C( 21339), INT16_C( 13064), -INT16_C( 21339), -INT16_C( 9771), -INT16_C( 3267), -INT16_C( 4437), -INT16_C( 27057), INT16_C( 24805), INT16_C( 21981), -INT16_C( 19212), INT16_C( 13070), INT16_C( 4934), INT16_C( 30281), -INT16_C( 15634), -INT16_C( 21359), -INT16_C( 25545), INT16_C( 17702), -INT16_C( 21339), -INT16_C( 22025), INT16_C( 23694), INT16_C( 1881), -INT16_C( 19212), INT16_C( 20303), -INT16_C( 10544), INT16_C( 599), -INT16_C( 6326), -INT16_C( 16435), INT16_C( 32545) } }, { { -INT16_C( 28322), -INT16_C( 1738), -INT16_C( 22970), INT16_C( 9108), -INT16_C( 28677), INT16_C( 8526), INT16_C( 25045), INT16_C( 11918), -INT16_C( 20632), INT16_C( 7081), -INT16_C( 17082), INT16_C( 14685), INT16_C( 11005), -INT16_C( 32008), INT16_C( 31430), INT16_C( 9461), INT16_C( 11019), INT16_C( 20765), -INT16_C( 20015), -INT16_C( 13196), -INT16_C( 15552), INT16_C( 5614), INT16_C( 31780), -INT16_C( 29629), -INT16_C( 4820), INT16_C( 29351), INT16_C( 1194), -INT16_C( 22613), -INT16_C( 23762), -INT16_C( 3031), INT16_C( 7709), INT16_C( 10264) }, UINT32_C( 460993865), { -INT16_C( 4378), INT16_C( 10215), -INT16_C( 10831), -INT16_C( 10948), -INT16_C( 32686), INT16_C( 32353), INT16_C( 2413), INT16_C( 6128), -INT16_C( 25843), INT16_C( 15550), -INT16_C( 6338), INT16_C( 23344), INT16_C( 18693), INT16_C( 20099), -INT16_C( 642), INT16_C( 25961), INT16_C( 20972), -INT16_C( 25204), -INT16_C( 14298), INT16_C( 30835), -INT16_C( 11192), -INT16_C( 18954), -INT16_C( 6435), -INT16_C( 5172), -INT16_C( 30079), -INT16_C( 16601), INT16_C( 22385), INT16_C( 30234), -INT16_C( 24928), INT16_C( 8133), INT16_C( 11931), -INT16_C( 30844) }, { INT16_C( 4223), -INT16_C( 23003), -INT16_C( 26408), INT16_C( 8478), INT16_C( 5484), INT16_C( 19158), -INT16_C( 23557), INT16_C( 32053), INT16_C( 23597), -INT16_C( 24772), INT16_C( 22451), INT16_C( 21525), -INT16_C( 9483), -INT16_C( 28557), -INT16_C( 2295), -INT16_C( 30696), INT16_C( 15623), -INT16_C( 8402), INT16_C( 19925), INT16_C( 16640), -INT16_C( 10398), INT16_C( 23947), -INT16_C( 16262), -INT16_C( 22566), INT16_C( 5916), -INT16_C( 12218), INT16_C( 23662), INT16_C( 25380), -INT16_C( 26826), INT16_C( 16371), INT16_C( 2958), -INT16_C( 27192) }, { -INT16_C( 23557), -INT16_C( 1738), -INT16_C( 22970), -INT16_C( 26826), -INT16_C( 28677), INT16_C( 8526), -INT16_C( 28557), INT16_C( 11918), -INT16_C( 28557), INT16_C( 7081), INT16_C( 2958), INT16_C( 14685), INT16_C( 19158), INT16_C( 8478), INT16_C( 31430), INT16_C( 9461), INT16_C( 11019), -INT16_C( 9483), -INT16_C( 20015), INT16_C( 16640), INT16_C( 23597), -INT16_C( 16262), INT16_C( 16371), -INT16_C( 29629), -INT16_C( 23003), INT16_C( 32053), INT16_C( 1194), INT16_C( 23662), INT16_C( 4223), -INT16_C( 3031), INT16_C( 7709), INT16_C( 10264) } }, { { -INT16_C( 2488), INT16_C( 7540), INT16_C( 30019), -INT16_C( 23201), -INT16_C( 5556), -INT16_C( 14845), -INT16_C( 8789), -INT16_C( 14483), -INT16_C( 19212), INT16_C( 25239), -INT16_C( 17648), INT16_C( 18117), -INT16_C( 18094), -INT16_C( 8058), INT16_C( 20164), INT16_C( 3445), -INT16_C( 5564), -INT16_C( 30678), -INT16_C( 30369), -INT16_C( 21715), INT16_C( 12404), INT16_C( 8049), -INT16_C( 8690), INT16_C( 742), INT16_C( 32402), -INT16_C( 23963), INT16_C( 10809), -INT16_C( 29463), INT16_C( 28643), -INT16_C( 22420), -INT16_C( 7491), INT16_C( 437) }, UINT32_C( 730456012), { -INT16_C( 18583), -INT16_C( 8746), INT16_C( 18407), -INT16_C( 2564), -INT16_C( 7643), -INT16_C( 18184), INT16_C( 23904), -INT16_C( 26022), INT16_C( 17287), INT16_C( 27430), -INT16_C( 27982), INT16_C( 28435), -INT16_C( 14220), INT16_C( 16497), -INT16_C( 1369), INT16_C( 4203), INT16_C( 16817), -INT16_C( 26131), -INT16_C( 5752), -INT16_C( 20850), -INT16_C( 31028), INT16_C( 11366), -INT16_C( 16157), INT16_C( 27590), -INT16_C( 5116), -INT16_C( 18730), -INT16_C( 5761), -INT16_C( 3290), -INT16_C( 26703), INT16_C( 22580), -INT16_C( 24687), INT16_C( 17257) }, { INT16_C( 22241), INT16_C( 27100), INT16_C( 27200), INT16_C( 3095), INT16_C( 32241), -INT16_C( 11208), -INT16_C( 194), INT16_C( 16959), INT16_C( 5611), INT16_C( 27384), INT16_C( 7934), -INT16_C( 20642), -INT16_C( 27979), INT16_C( 18184), INT16_C( 28977), INT16_C( 4746), INT16_C( 26311), INT16_C( 1916), -INT16_C( 27696), -INT16_C( 16109), INT16_C( 19473), INT16_C( 20374), -INT16_C( 10933), INT16_C( 13969), -INT16_C( 30229), -INT16_C( 5727), -INT16_C( 88), INT16_C( 23961), -INT16_C( 24175), -INT16_C( 15708), INT16_C( 11794), -INT16_C( 9771) }, { -INT16_C( 2488), INT16_C( 7540), INT16_C( 16959), -INT16_C( 24175), -INT16_C( 5556), -INT16_C( 14845), INT16_C( 22241), -INT16_C( 88), INT16_C( 16959), -INT16_C( 194), -INT16_C( 27696), -INT16_C( 16109), INT16_C( 19473), -INT16_C( 8058), INT16_C( 16959), -INT16_C( 20642), INT16_C( 1916), -INT16_C( 30678), -INT16_C( 30369), INT16_C( 28977), INT16_C( 12404), INT16_C( 8049), -INT16_C( 8690), -INT16_C( 194), INT16_C( 32241), -INT16_C( 10933), INT16_C( 10809), -INT16_C( 194), INT16_C( 28643), INT16_C( 19473), -INT16_C( 7491), INT16_C( 437) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi16(test_vec[i].src); simde__m512i idx = simde_mm512_loadu_epi16(test_vec[i].idx); simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i r = simde_mm512_mask_permutexvar_epi16(src, test_vec[i].k, idx, a); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i src = simde_test_x86_random_i16x32(); simde__mmask32 k = simde_test_x86_random_mmask32(); simde__m512i idx = simde_test_x86_random_i16x32(); simde__m512i a = simde_test_x86_random_i16x32(); simde__m512i r = simde_mm512_mask_permutexvar_epi16(src, k, idx, a); simde_test_x86_write_i16x32(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask32(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x32(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x32(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_maskz_permutexvar_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask32 k; const int16_t idx[32]; const int16_t a[32]; const int16_t r[32]; } test_vec[] = { { UINT32_C(1709265300), { -INT16_C( 2844), -INT16_C( 2778), -INT16_C( 17344), -INT16_C( 29884), -INT16_C( 10862), INT16_C( 32194), INT16_C( 25439), INT16_C( 1894), -INT16_C( 158), -INT16_C( 3228), INT16_C( 2464), -INT16_C( 19787), -INT16_C( 30153), -INT16_C( 13172), INT16_C( 28123), -INT16_C( 16335), INT16_C( 22369), -INT16_C( 23883), -INT16_C( 1516), -INT16_C( 22995), -INT16_C( 4145), INT16_C( 11811), -INT16_C( 30382), -INT16_C( 19403), -INT16_C( 25975), INT16_C( 10663), INT16_C( 23971), -INT16_C( 9508), INT16_C( 26855), -INT16_C( 15450), -INT16_C( 10283), INT16_C( 13955) }, { INT16_C( 14383), INT16_C( 17368), INT16_C( 1586), INT16_C( 745), INT16_C( 3317), INT16_C( 18480), INT16_C( 26261), INT16_C( 7932), -INT16_C( 23552), -INT16_C( 23736), INT16_C( 9217), -INT16_C( 6019), INT16_C( 9356), INT16_C( 25003), INT16_C( 12027), INT16_C( 10903), INT16_C( 28775), -INT16_C( 26259), INT16_C( 22134), INT16_C( 27547), -INT16_C( 13214), -INT16_C( 1869), -INT16_C( 20430), INT16_C( 12822), INT16_C( 24148), INT16_C( 21973), INT16_C( 21122), INT16_C( 3645), -INT16_C( 5770), INT16_C( 29295), INT16_C( 1815), INT16_C( 32412) }, { INT16_C( 0), INT16_C( 0), INT16_C( 14383), INT16_C( 0), INT16_C( 22134), INT16_C( 0), INT16_C( 0), INT16_C( 26261), INT16_C( 1586), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 12822), INT16_C( 0), INT16_C( 3645), INT16_C( 0), INT16_C( 17368), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 745), INT16_C( 22134), -INT16_C( 1869), -INT16_C( 23736), INT16_C( 0), INT16_C( 745), INT16_C( 0), INT16_C( 0), INT16_C( 26261), -INT16_C( 1869), INT16_C( 0) } }, { UINT32_C(3977775735), { -INT16_C( 19616), -INT16_C( 15528), INT16_C( 3199), -INT16_C( 20037), -INT16_C( 11844), INT16_C( 4323), -INT16_C( 18384), -INT16_C( 19867), -INT16_C( 24053), -INT16_C( 32319), INT16_C( 12427), -INT16_C( 23565), -INT16_C( 28617), -INT16_C( 20959), INT16_C( 14746), -INT16_C( 1381), -INT16_C( 2835), INT16_C( 27837), INT16_C( 30720), -INT16_C( 17378), INT16_C( 330), INT16_C( 31436), INT16_C( 12730), -INT16_C( 15060), -INT16_C( 4653), INT16_C( 24390), INT16_C( 14878), INT16_C( 21762), INT16_C( 9162), INT16_C( 25604), -INT16_C( 24739), INT16_C( 19038) }, { INT16_C( 7315), -INT16_C( 27722), -INT16_C( 11116), -INT16_C( 8625), INT16_C( 7126), -INT16_C( 28584), -INT16_C( 31412), INT16_C( 8277), -INT16_C( 25742), -INT16_C( 28545), -INT16_C( 32299), -INT16_C( 24602), -INT16_C( 5468), INT16_C( 259), INT16_C( 25225), INT16_C( 7499), INT16_C( 638), INT16_C( 4784), INT16_C( 214), -INT16_C( 21263), INT16_C( 18715), INT16_C( 26684), -INT16_C( 28210), INT16_C( 16776), INT16_C( 1837), INT16_C( 721), -INT16_C( 18552), INT16_C( 11426), -INT16_C( 23135), INT16_C( 11054), INT16_C( 30983), -INT16_C( 31416) }, { INT16_C( 7315), INT16_C( 1837), -INT16_C( 31416), INT16_C( 0), -INT16_C( 23135), -INT16_C( 8625), INT16_C( 638), INT16_C( 0), INT16_C( 0), -INT16_C( 27722), INT16_C( 0), -INT16_C( 21263), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 30983), -INT16_C( 32299), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 21263), INT16_C( 0), INT16_C( 30983), -INT16_C( 11116), INT16_C( 0), INT16_C( 7126), INT16_C( 11054), INT16_C( 30983) } }, { UINT32_C(1385756795), { -INT16_C( 30216), INT16_C( 5374), INT16_C( 15314), -INT16_C( 24196), INT16_C( 1228), -INT16_C( 1566), -INT16_C( 19701), -INT16_C( 27652), -INT16_C( 24981), INT16_C( 3263), -INT16_C( 4797), INT16_C( 19255), INT16_C( 32615), -INT16_C( 7472), INT16_C( 26744), INT16_C( 28724), INT16_C( 13297), -INT16_C( 15228), INT16_C( 110), INT16_C( 14949), INT16_C( 18180), INT16_C( 3892), INT16_C( 12538), INT16_C( 26018), INT16_C( 25294), INT16_C( 4466), -INT16_C( 22193), -INT16_C( 18852), INT16_C( 11561), -INT16_C( 24167), -INT16_C( 12907), -INT16_C( 30959) }, { -INT16_C( 27136), INT16_C( 28235), -INT16_C( 20330), -INT16_C( 25687), -INT16_C( 8713), -INT16_C( 3670), INT16_C( 19725), -INT16_C( 9385), -INT16_C( 13905), -INT16_C( 276), INT16_C( 18802), -INT16_C( 25675), INT16_C( 20086), INT16_C( 2876), INT16_C( 19995), INT16_C( 7314), -INT16_C( 8732), INT16_C( 31370), INT16_C( 13197), -INT16_C( 31723), -INT16_C( 16368), INT16_C( 7542), -INT16_C( 13043), -INT16_C( 17160), -INT16_C( 6762), INT16_C( 2234), INT16_C( 28462), -INT16_C( 23388), -INT16_C( 8003), -INT16_C( 9809), INT16_C( 16942), INT16_C( 4853) }, { -INT16_C( 6762), INT16_C( 16942), INT16_C( 0), -INT16_C( 8003), INT16_C( 20086), -INT16_C( 20330), -INT16_C( 25675), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 17160), -INT16_C( 9385), -INT16_C( 8732), -INT16_C( 6762), -INT16_C( 16368), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 3670), -INT16_C( 8713), INT16_C( 0), INT16_C( 0), -INT16_C( 20330), INT16_C( 0), INT16_C( 13197), INT16_C( 0), INT16_C( 0), -INT16_C( 276), INT16_C( 0), INT16_C( 7542), INT16_C( 0) } }, { UINT32_C(2911731487), { -INT16_C( 23885), -INT16_C( 15567), -INT16_C( 22686), INT16_C( 28641), -INT16_C( 9868), INT16_C( 2603), -INT16_C( 6466), -INT16_C( 5101), -INT16_C( 18603), INT16_C( 5008), INT16_C( 16535), -INT16_C( 14612), -INT16_C( 7806), -INT16_C( 24104), INT16_C( 25952), INT16_C( 4942), -INT16_C( 32760), INT16_C( 27351), -INT16_C( 18393), -INT16_C( 25382), INT16_C( 1425), INT16_C( 20646), -INT16_C( 17941), INT16_C( 16700), -INT16_C( 12944), INT16_C( 2132), INT16_C( 16397), -INT16_C( 28722), -INT16_C( 23007), -INT16_C( 32464), INT16_C( 32524), INT16_C( 5269) }, { INT16_C( 27903), INT16_C( 9854), INT16_C( 22564), -INT16_C( 19006), INT16_C( 26974), INT16_C( 18693), INT16_C( 16930), -INT16_C( 27766), -INT16_C( 8689), INT16_C( 7323), INT16_C( 26910), INT16_C( 16299), -INT16_C( 9457), INT16_C( 7105), INT16_C( 22106), INT16_C( 22831), -INT16_C( 20798), -INT16_C( 6528), INT16_C( 16902), INT16_C( 25755), -INT16_C( 24149), -INT16_C( 12626), INT16_C( 14563), -INT16_C( 3487), -INT16_C( 1001), INT16_C( 13582), -INT16_C( 18075), INT16_C( 29813), INT16_C( 13972), -INT16_C( 4208), -INT16_C( 16500), INT16_C( 20040) }, { INT16_C( 25755), -INT16_C( 6528), INT16_C( 22564), INT16_C( 9854), -INT16_C( 24149), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 12626), -INT16_C( 20798), -INT16_C( 3487), -INT16_C( 9457), INT16_C( 22564), -INT16_C( 1001), INT16_C( 27903), INT16_C( 0), -INT16_C( 8689), INT16_C( 0), -INT16_C( 27766), -INT16_C( 18075), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 13972), -INT16_C( 20798), INT16_C( 0), INT16_C( 7105), INT16_C( 22106), INT16_C( 0), -INT16_C( 20798), INT16_C( 0), -INT16_C( 12626) } }, { UINT32_C(1949616237), { -INT16_C( 12533), -INT16_C( 18728), -INT16_C( 31120), INT16_C( 21380), -INT16_C( 6721), -INT16_C( 10683), INT16_C( 21473), INT16_C( 17931), -INT16_C( 32756), -INT16_C( 24133), INT16_C( 19382), INT16_C( 17040), -INT16_C( 10230), INT16_C( 30864), -INT16_C( 15199), -INT16_C( 21268), -INT16_C( 15212), INT16_C( 1122), -INT16_C( 6325), INT16_C( 2648), -INT16_C( 25140), -INT16_C( 20768), -INT16_C( 5135), -INT16_C( 524), -INT16_C( 20628), INT16_C( 8862), INT16_C( 12026), INT16_C( 1381), -INT16_C( 2809), -INT16_C( 22403), INT16_C( 27066), INT16_C( 20052) }, { -INT16_C( 18899), INT16_C( 30802), -INT16_C( 21859), INT16_C( 27266), INT16_C( 25160), INT16_C( 14616), INT16_C( 3150), -INT16_C( 17866), -INT16_C( 10820), -INT16_C( 18724), INT16_C( 16643), INT16_C( 2747), INT16_C( 14391), -INT16_C( 3662), INT16_C( 1697), -INT16_C( 12481), -INT16_C( 28227), INT16_C( 23111), -INT16_C( 13764), -INT16_C( 31548), -INT16_C( 9172), INT16_C( 31421), -INT16_C( 3095), -INT16_C( 23244), INT16_C( 4552), -INT16_C( 13221), INT16_C( 5970), -INT16_C( 30250), -INT16_C( 30385), -INT16_C( 3718), -INT16_C( 18033), INT16_C( 19648) }, { INT16_C( 2747), INT16_C( 0), -INT16_C( 28227), INT16_C( 25160), INT16_C( 0), INT16_C( 14616), INT16_C( 30802), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 28227), INT16_C( 0), INT16_C( 0), INT16_C( 30802), INT16_C( 14391), INT16_C( 0), INT16_C( 0), INT16_C( 2747), INT16_C( 0), INT16_C( 14391), -INT16_C( 18899), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 5970), INT16_C( 0), -INT16_C( 17866), -INT16_C( 3718), INT16_C( 5970), INT16_C( 0) } }, { UINT32_C(2275870539), { INT16_C( 27601), -INT16_C( 501), -INT16_C( 14264), INT16_C( 12664), -INT16_C( 21061), -INT16_C( 31530), INT16_C( 12734), INT16_C( 4176), INT16_C( 9800), -INT16_C( 26470), INT16_C( 5295), INT16_C( 16265), INT16_C( 18894), INT16_C( 6539), INT16_C( 12880), INT16_C( 8864), -INT16_C( 21602), -INT16_C( 6624), -INT16_C( 26509), INT16_C( 11799), -INT16_C( 4795), INT16_C( 946), INT16_C( 542), INT16_C( 26388), -INT16_C( 20951), -INT16_C( 9985), -INT16_C( 30526), -INT16_C( 28649), -INT16_C( 23599), INT16_C( 8617), INT16_C( 18901), INT16_C( 29507) }, { INT16_C( 25588), INT16_C( 26457), INT16_C( 28924), INT16_C( 16790), INT16_C( 18525), INT16_C( 31813), INT16_C( 22859), INT16_C( 29923), -INT16_C( 7673), -INT16_C( 14004), INT16_C( 25706), INT16_C( 15194), INT16_C( 775), -INT16_C( 9124), -INT16_C( 24499), INT16_C( 16720), -INT16_C( 22269), -INT16_C( 87), INT16_C( 16154), INT16_C( 30529), -INT16_C( 31097), -INT16_C( 11533), -INT16_C( 10529), -INT16_C( 6586), -INT16_C( 27720), INT16_C( 8879), INT16_C( 2551), -INT16_C( 419), -INT16_C( 17907), INT16_C( 23258), INT16_C( 10842), INT16_C( 23963) }, { -INT16_C( 87), INT16_C( 15194), INT16_C( 0), -INT16_C( 27720), INT16_C( 0), INT16_C( 0), INT16_C( 10842), INT16_C( 0), -INT16_C( 7673), INT16_C( 2551), INT16_C( 16720), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 10842), INT16_C( 25588), INT16_C( 30529), INT16_C( 0), INT16_C( 0), INT16_C( 16154), INT16_C( 0), -INT16_C( 31097), -INT16_C( 14004), INT16_C( 23963), INT16_C( 28924), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 16790) } }, { UINT32_C(3999089876), { -INT16_C( 24957), INT16_C( 2917), INT16_C( 22820), INT16_C( 989), INT16_C( 9263), -INT16_C( 5911), -INT16_C( 26441), -INT16_C( 20982), INT16_C( 26786), -INT16_C( 20564), -INT16_C( 31198), INT16_C( 31753), -INT16_C( 23375), -INT16_C( 31271), INT16_C( 14057), INT16_C( 27763), -INT16_C( 10028), -INT16_C( 1929), INT16_C( 21809), INT16_C( 25083), -INT16_C( 7047), INT16_C( 12361), INT16_C( 21373), INT16_C( 8158), -INT16_C( 30021), -INT16_C( 8754), -INT16_C( 10480), -INT16_C( 16039), INT16_C( 13179), INT16_C( 25670), -INT16_C( 18071), INT16_C( 16081) }, { INT16_C( 18578), -INT16_C( 15562), INT16_C( 12957), INT16_C( 5668), INT16_C( 27926), -INT16_C( 27834), INT16_C( 9409), INT16_C( 31922), -INT16_C( 32594), -INT16_C( 16550), -INT16_C( 19625), -INT16_C( 11392), -INT16_C( 14362), INT16_C( 20535), INT16_C( 2176), INT16_C( 4750), -INT16_C( 15279), -INT16_C( 4394), -INT16_C( 1290), INT16_C( 3333), INT16_C( 19304), INT16_C( 10656), INT16_C( 21360), INT16_C( 7845), -INT16_C( 45), INT16_C( 11229), INT16_C( 24243), -INT16_C( 26114), INT16_C( 13605), -INT16_C( 23063), INT16_C( 30526), -INT16_C( 28744) }, { INT16_C( 0), INT16_C( 0), INT16_C( 27926), INT16_C( 0), INT16_C( 4750), INT16_C( 0), INT16_C( 7845), -INT16_C( 19625), INT16_C( 0), INT16_C( 0), INT16_C( 12957), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 16550), INT16_C( 0), INT16_C( 19304), INT16_C( 0), -INT16_C( 4394), -INT16_C( 26114), INT16_C( 11229), INT16_C( 0), -INT16_C( 23063), INT16_C( 0), INT16_C( 0), INT16_C( 2176), -INT16_C( 15279), INT16_C( 11229), INT16_C( 0), INT16_C( 9409), -INT16_C( 16550), -INT16_C( 4394) } }, { UINT32_C( 847089212), { -INT16_C( 32120), -INT16_C( 4033), -INT16_C( 7986), INT16_C( 15897), -INT16_C( 16589), INT16_C( 1628), INT16_C( 15038), INT16_C( 28977), INT16_C( 12184), -INT16_C( 17141), -INT16_C( 2971), -INT16_C( 23710), INT16_C( 6764), -INT16_C( 22478), -INT16_C( 20568), INT16_C( 12762), INT16_C( 6706), INT16_C( 33), INT16_C( 15354), INT16_C( 11582), -INT16_C( 25862), -INT16_C( 18381), INT16_C( 26068), INT16_C( 27690), INT16_C( 13716), -INT16_C( 1751), -INT16_C( 29655), -INT16_C( 27236), -INT16_C( 12634), INT16_C( 20285), INT16_C( 6270), -INT16_C( 20352) }, { -INT16_C( 24270), INT16_C( 11440), -INT16_C( 4388), -INT16_C( 10663), -INT16_C( 29560), INT16_C( 23951), -INT16_C( 17935), -INT16_C( 31031), -INT16_C( 3090), INT16_C( 6015), INT16_C( 7295), INT16_C( 9645), -INT16_C( 5398), INT16_C( 26740), -INT16_C( 3070), INT16_C( 13336), -INT16_C( 14186), INT16_C( 29280), -INT16_C( 17994), INT16_C( 16201), -INT16_C( 10170), INT16_C( 14236), INT16_C( 26001), INT16_C( 32701), INT16_C( 15704), -INT16_C( 10346), INT16_C( 17241), INT16_C( 17405), INT16_C( 28974), INT16_C( 12460), -INT16_C( 15258), -INT16_C( 923) }, { INT16_C( 0), INT16_C( 0), -INT16_C( 3070), -INT16_C( 10346), INT16_C( 16201), INT16_C( 28974), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 9645), INT16_C( 23951), -INT16_C( 4388), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 17241), -INT16_C( 17994), INT16_C( 0), INT16_C( 17241), -INT16_C( 15258), INT16_C( 17241), INT16_C( 16201), -INT16_C( 10170), INT16_C( 0), INT16_C( 0), INT16_C( 6015), INT16_C( 0), INT16_C( 0), -INT16_C( 17935), INT16_C( 12460), INT16_C( 0), INT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i idx = simde_mm512_loadu_epi16(test_vec[i].idx); simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i r = simde_mm512_maskz_permutexvar_epi16(test_vec[i].k, idx, a); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask32 k = simde_test_x86_random_mmask32(); simde__m512i idx = simde_test_x86_random_i16x32(); simde__m512i a = simde_test_x86_random_i16x32(); simde__m512i r = simde_mm512_maskz_permutexvar_epi16(k, idx, a); simde_test_x86_write_mmask32(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x32(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x32(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_permutexvar_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i idx; simde__m512i a; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C( 1996766677), INT32_C( 914731069), INT32_C( 1945861252), INT32_C( 879354074), INT32_C( -643998219), INT32_C( -855842922), INT32_C( 1434025670), INT32_C( -672258087), INT32_C( 1290251647), INT32_C(-1094826982), INT32_C( 238338636), INT32_C( -252228416), INT32_C( 122123135), INT32_C( 750368606), INT32_C( -381589944), INT32_C( 924273218)), simde_mm512_set_epi32(INT32_C( -879327216), INT32_C( 1748554959), INT32_C(-1487719285), INT32_C( 1548675310), INT32_C( -557015483), INT32_C(-2047402472), INT32_C( -322319998), INT32_C( 1785739054), INT32_C( 318403170), INT32_C(-1131639819), INT32_C(-1187670028), INT32_C( 146529946), INT32_C(-1037158278), INT32_C( 297634970), INT32_C( 1928669860), INT32_C( -102995392)), simde_mm512_set_epi32(INT32_C(-1187670028), INT32_C(-1487719285), INT32_C( 146529946), INT32_C(-2047402472), INT32_C(-1187670028), INT32_C(-1131639819), INT32_C(-1131639819), INT32_C( -322319998), INT32_C( -879327216), INT32_C(-2047402472), INT32_C( 1548675310), INT32_C( -102995392), INT32_C( -879327216), INT32_C( 1748554959), INT32_C( 1785739054), INT32_C( 297634970)) }, { simde_mm512_set_epi32(INT32_C( 1106394199), INT32_C( 1786608232), INT32_C( 477078085), INT32_C(-1779632362), INT32_C( -966979640), INT32_C( -998912797), INT32_C( 1045049163), INT32_C(-1292333722), INT32_C( 1183794740), INT32_C( -731066686), INT32_C( 511551854), INT32_C( 1167565783), INT32_C( 809184406), INT32_C(-1484085267), INT32_C( -106978631), INT32_C(-1748122897)), simde_mm512_set_epi32(INT32_C( -572206162), INT32_C(-1229665005), INT32_C(-2082924696), INT32_C( -649416979), INT32_C( 2112092702), INT32_C( -909261280), INT32_C(-1418185146), INT32_C(-1714733997), INT32_C( 1030181563), INT32_C( -291680665), INT32_C(-1617776213), INT32_C( 2112788110), INT32_C( 207483507), INT32_C( 1144876369), INT32_C( 1177485782), INT32_C( 1952824319)), simde_mm512_set_epi32(INT32_C( 1030181563), INT32_C(-1714733997), INT32_C(-1617776213), INT32_C( -291680665), INT32_C(-1714733997), INT32_C( 207483507), INT32_C( 2112092702), INT32_C( -291680665), INT32_C( 2112788110), INT32_C( 1144876369), INT32_C(-1229665005), INT32_C( 1030181563), INT32_C( -291680665), INT32_C(-2082924696), INT32_C(-1418185146), INT32_C( -572206162)) }, { simde_mm512_set_epi32(INT32_C( -736462739), INT32_C( 1787530508), INT32_C( 1887757958), INT32_C(-2099529631), INT32_C( 262676022), INT32_C( 53886530), INT32_C( 1760597059), INT32_C( 1505905484), INT32_C( 774167251), INT32_C( 967143468), INT32_C( 868941550), INT32_C(-1213290419), INT32_C( 1972608228), INT32_C( 11142680), INT32_C(-1446276724), INT32_C(-1200659658)), simde_mm512_set_epi32(INT32_C(-1351086746), INT32_C(-1617825510), INT32_C( -53781400), INT32_C( -324253949), INT32_C( 277157108), INT32_C(-1362182408), INT32_C( -600988389), INT32_C(-1661173616), INT32_C( 988661733), INT32_C(-1651241157), INT32_C( 751941929), INT32_C( -37019801), INT32_C( 498171562), INT32_C( 2017003975), INT32_C(-1441476496), INT32_C(-1365008121)), simde_mm512_set_epi32(INT32_C( -53781400), INT32_C( -324253949), INT32_C(-1651241157), INT32_C(-1441476496), INT32_C(-1651241157), INT32_C( 2017003975), INT32_C( 498171562), INT32_C( -324253949), INT32_C( 498171562), INT32_C( -324253949), INT32_C(-1617825510), INT32_C( -53781400), INT32_C( -37019801), INT32_C(-1661173616), INT32_C( -324253949), INT32_C(-1651241157)) }, { simde_mm512_set_epi32(INT32_C(-2111484726), INT32_C(-1666865657), INT32_C( 905297213), INT32_C( 873020550), INT32_C(-1639789891), INT32_C( 587061870), INT32_C(-2122729525), INT32_C( 1912650916), INT32_C( -12276084), INT32_C(-1060302964), INT32_C( 650767651), INT32_C( -997283137), INT32_C( 2144598253), INT32_C( 298450302), INT32_C( 1083198112), INT32_C( -506043419)), simde_mm512_set_epi32(INT32_C( -134982351), INT32_C(-1321868808), INT32_C( -899996596), INT32_C(-1289649119), INT32_C( -206201920), INT32_C( 1680053368), INT32_C(-1305508907), INT32_C(-2067500681), INT32_C(-1529397706), INT32_C( 1803067419), INT32_C( 1059939268), INT32_C( 399509097), INT32_C( -338549599), INT32_C( -31125095), INT32_C(-1633544688), INT32_C(-1398964227)), simde_mm512_set_epi32(INT32_C( 1680053368), INT32_C(-1529397706), INT32_C( -899996596), INT32_C( 1803067419), INT32_C( -899996596), INT32_C(-1321868808), INT32_C( -206201920), INT32_C( 399509097), INT32_C(-1289649119), INT32_C(-1289649119), INT32_C( -338549599), INT32_C( -134982351), INT32_C( -899996596), INT32_C(-1321868808), INT32_C(-1398964227), INT32_C( 1059939268)) }, { simde_mm512_set_epi32(INT32_C(-1660298113), INT32_C( 39258193), INT32_C( 1764960191), INT32_C( 1032976421), INT32_C(-1057643771), INT32_C( 2071456034), INT32_C(-1085712390), INT32_C( 1506090338), INT32_C( 367006319), INT32_C(-1863539807), INT32_C(-1111523249), INT32_C( 2019983201), INT32_C( 566503151), INT32_C( 503973368), INT32_C( 315043487), INT32_C( 230467816)), simde_mm512_set_epi32(INT32_C( 506925802), INT32_C( 939863413), INT32_C( -95691606), INT32_C( -63021650), INT32_C( 1460121332), INT32_C(-1770546120), INT32_C( 691111167), INT32_C( 1916845809), INT32_C( -722416713), INT32_C( 1952309936), INT32_C( -635326036), INT32_C(-2071561697), INT32_C(-1349311592), INT32_C(-1434371918), INT32_C( -908787181), INT32_C( 1027111311)), simde_mm512_set_epi32(INT32_C( 506925802), INT32_C( -908787181), INT32_C( 506925802), INT32_C( -635326036), INT32_C( -635326036), INT32_C(-1434371918), INT32_C(-1770546120), INT32_C(-1434371918), INT32_C( 506925802), INT32_C( -908787181), INT32_C( 506925802), INT32_C( -908787181), INT32_C( 506925802), INT32_C( 1916845809), INT32_C( 506925802), INT32_C( 1916845809)) }, { simde_mm512_set_epi32(INT32_C( -476274551), INT32_C(-1268946349), INT32_C( 1871428230), INT32_C( 1084413756), INT32_C( 1221272409), INT32_C( 1604619894), INT32_C( 245162423), INT32_C( -879398063), INT32_C(-1252847595), INT32_C(-1083417294), INT32_C( -85733631), INT32_C( -568206760), INT32_C( -433087356), INT32_C( 591211590), INT32_C( 216351398), INT32_C( 907563034)), simde_mm512_set_epi32(INT32_C( 553126451), INT32_C( 1865131028), INT32_C( -563146647), INT32_C( 1055479165), INT32_C(-2116199302), INT32_C( 548546839), INT32_C(-1021995369), INT32_C(-1397392070), INT32_C( 1911717085), INT32_C(-1893150233), INT32_C(-1174808283), INT32_C( 1158232544), INT32_C( -280806326), INT32_C( 520223268), INT32_C( 603498036), INT32_C( 115278412)), simde_mm512_set_epi32(INT32_C(-1021995369), INT32_C( -280806326), INT32_C(-1893150233), INT32_C( 1055479165), INT32_C(-1021995369), INT32_C(-1893150233), INT32_C( 1911717085), INT32_C( 603498036), INT32_C(-1174808283), INT32_C( 520223268), INT32_C( 603498036), INT32_C(-1397392070), INT32_C( 1158232544), INT32_C(-1893150233), INT32_C(-1893150233), INT32_C( 548546839)) }, { simde_mm512_set_epi32(INT32_C( -195854029), INT32_C( 1958024187), INT32_C( 1003184214), INT32_C( 1306010047), INT32_C(-1204461456), INT32_C(-1689382831), INT32_C(-1474151310), INT32_C( 1613100007), INT32_C( 1060086708), INT32_C(-1444530803), INT32_C( 1862849170), INT32_C( -928808218), INT32_C( -146523132), INT32_C(-1235988391), INT32_C( -964779679), INT32_C( 57349444)), simde_mm512_set_epi32(INT32_C(-2134067081), INT32_C( -882392805), INT32_C( 185789257), INT32_C(-2111603035), INT32_C( 407162836), INT32_C(-1236510605), INT32_C(-1567423785), INT32_C( 780522762), INT32_C( -23120724), INT32_C( 92199108), INT32_C( 24805933), INT32_C( 1301060633), INT32_C(-1139480237), INT32_C( 217822558), INT32_C(-1429367443), INT32_C( 465891853)), simde_mm512_set_epi32(INT32_C(-1139480237), INT32_C( 407162836), INT32_C( 92199108), INT32_C(-2134067081), INT32_C( 465891853), INT32_C(-1429367443), INT32_C( 217822558), INT32_C( -23120724), INT32_C( 1301060633), INT32_C( 185789257), INT32_C( 217822558), INT32_C( 92199108), INT32_C( 1301060633), INT32_C(-1567423785), INT32_C(-1429367443), INT32_C( 1301060633)) }, { simde_mm512_set_epi32(INT32_C( 158027570), INT32_C(-1346235067), INT32_C( 1682432767), INT32_C( 1810962335), INT32_C( 1067516501), INT32_C(-1082135268), INT32_C( -56869560), INT32_C(-1195320775), INT32_C( -393383799), INT32_C( -554410804), INT32_C( 1644924567), INT32_C(-1134386712), INT32_C(-1030153866), INT32_C( 436830495), INT32_C(-1226346340), INT32_C( -700345341)), simde_mm512_set_epi32(INT32_C( -756862268), INT32_C( 1500443430), INT32_C( 463718589), INT32_C( 262081082), INT32_C( 990406393), INT32_C( 114071142), INT32_C(-1625480036), INT32_C( -478582396), INT32_C( -548991920), INT32_C( 1883825214), INT32_C( -16488776), INT32_C( -956015081), INT32_C(-1165588144), INT32_C( -540410051), INT32_C( 285110286), INT32_C( 1179668936)), simde_mm512_set_epi32(INT32_C( -540410051), INT32_C( -16488776), INT32_C( -756862268), INT32_C( -756862268), INT32_C( -16488776), INT32_C( 262081082), INT32_C( -478582396), INT32_C(-1625480036), INT32_C(-1625480036), INT32_C( 262081082), INT32_C( -548991920), INT32_C( -478582396), INT32_C( 1883825214), INT32_C( -756862268), INT32_C( 262081082), INT32_C(-1165588144)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_permutexvar_epi32(test_vec[i].idx, test_vec[i].a); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_permutexvar_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask16 k; simde__m512i idx; simde__m512i a; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C(-1026363374), INT32_C(-1801776439), INT32_C( 145438126), INT32_C(-1306064352), INT32_C( -858736392), INT32_C( 923442479), INT32_C( 1092805562), INT32_C( 1443901717), INT32_C( 1848749100), INT32_C( 1777333881), INT32_C( 1570116932), INT32_C(-1302383354), INT32_C( 1993455974), INT32_C(-2068684593), INT32_C(-1936012201), INT32_C( 1856459607)), UINT16_C(27455), simde_mm512_set_epi32(INT32_C(-1110068455), INT32_C( -207240031), INT32_C(-1649179267), INT32_C( 2054398444), INT32_C( -483586503), INT32_C(-1481960002), INT32_C( 861125508), INT32_C( -330381203), INT32_C(-1999224530), INT32_C( 1042470181), INT32_C( 1827473477), INT32_C( 298546792), INT32_C(-1630396605), INT32_C( 1545554432), INT32_C( 344023940), INT32_C(-1871515754)), simde_mm512_set_epi32(INT32_C( 1711460779), INT32_C( -919570191), INT32_C( 1974152373), INT32_C( -695949043), INT32_C( -790242624), INT32_C(-1094331335), INT32_C(-1166320093), INT32_C(-2045280751), INT32_C(-2037261521), INT32_C( 223952317), INT32_C( 282198336), INT32_C( 564965997), INT32_C( 169645898), INT32_C(-1539616610), INT32_C( 1134735685), INT32_C( 1430356381)), simde_mm512_set_epi32(INT32_C(-1026363374), INT32_C( 1134735685), INT32_C( 1974152373), INT32_C(-1306064352), INT32_C(-1166320093), INT32_C( 923442479), INT32_C( 564965997), INT32_C( 1974152373), INT32_C( 1848749100), INT32_C( 1777333881), INT32_C( 282198336), INT32_C(-2045280751), INT32_C( 169645898), INT32_C( 1430356381), INT32_C( 564965997), INT32_C( 223952317)) }, { simde_mm512_set_epi32(INT32_C(-1423023772), INT32_C(-1185448755), INT32_C( 1549802795), INT32_C( 159583350), INT32_C( 548883180), INT32_C( -605945909), INT32_C(-2063050181), INT32_C( 1095467003), INT32_C(-2083755741), INT32_C( 2066979701), INT32_C( 1094609712), INT32_C( 1345059025), INT32_C( -340318359), INT32_C( 1519671047), INT32_C(-1017461983), INT32_C( 353198331)), UINT16_C( 8253), simde_mm512_set_epi32(INT32_C( -888441293), INT32_C( -707551350), INT32_C( 513515868), INT32_C(-1825967755), INT32_C( 822222164), INT32_C(-1689559027), INT32_C( 533478787), INT32_C( 907615417), INT32_C( -199229058), INT32_C( -91537812), INT32_C( 1375258232), INT32_C( 139748399), INT32_C( 1688468565), INT32_C( 736544549), INT32_C(-1282057552), INT32_C( 795925067)), simde_mm512_set_epi32(INT32_C( 1701079465), INT32_C( -195770682), INT32_C( 503748315), INT32_C( 1469355417), INT32_C(-1849349632), INT32_C( 1962664621), INT32_C( -646247370), INT32_C( 1258747662), INT32_C( 1838830023), INT32_C( -532007659), INT32_C( -622852205), INT32_C( -839037220), INT32_C( 499633910), INT32_C( -260167255), INT32_C( 884163960), INT32_C( -329275629)), simde_mm512_set_epi32(INT32_C(-1423023772), INT32_C(-1185448755), INT32_C( 1469355417), INT32_C( 159583350), INT32_C( 548883180), INT32_C( -605945909), INT32_C(-2063050181), INT32_C( 1095467003), INT32_C(-2083755741), INT32_C( 2066979701), INT32_C( 1258747662), INT32_C( 1701079465), INT32_C( -622852205), INT32_C( -622852205), INT32_C(-1017461983), INT32_C(-1849349632)) }, { simde_mm512_set_epi32(INT32_C( -174985661), INT32_C(-1762469023), INT32_C( 1239606494), INT32_C( -22119232), INT32_C( 1216907749), INT32_C( 654527510), INT32_C(-2043358500), INT32_C( 459072440), INT32_C( -430427651), INT32_C( -272088075), INT32_C( 386072301), INT32_C(-1628984154), INT32_C( 87817524), INT32_C( 1219490517), INT32_C(-1569831145), INT32_C( 338985942)), UINT16_C(47186), simde_mm512_set_epi32(INT32_C( 730787370), INT32_C(-2034110695), INT32_C(-1088138491), INT32_C( -353174912), INT32_C( -362301616), INT32_C( 617951303), INT32_C( 817116152), INT32_C(-1034835761), INT32_C( -102069057), INT32_C( 1774242298), INT32_C( 1089620040), INT32_C(-1101477862), INT32_C( 2001101785), INT32_C(-1759250988), INT32_C( -606254738), INT32_C( 1526367108)), simde_mm512_set_epi32(INT32_C( 204417556), INT32_C(-1329665093), INT32_C(-2039025377), INT32_C( 1639231015), INT32_C( 1541217841), INT32_C( 1692413538), INT32_C( 738521275), INT32_C( 159429100), INT32_C( 451955897), INT32_C( 181201098), INT32_C( 450627934), INT32_C( 2082954477), INT32_C( 1254960767), INT32_C( 1995459397), INT32_C( -11572946), INT32_C(-1087388220)), simde_mm512_set_epi32(INT32_C( 1692413538), INT32_C(-1762469023), INT32_C( 450627934), INT32_C(-1087388220), INT32_C(-1087388220), INT32_C( 654527510), INT32_C(-2043358500), INT32_C( 459072440), INT32_C( -430427651), INT32_C( 1692413538), INT32_C( 386072301), INT32_C( 1692413538), INT32_C( 87817524), INT32_C( 1219490517), INT32_C(-1329665093), INT32_C( 338985942)) }, { simde_mm512_set_epi32(INT32_C( 1928049651), INT32_C( 765730488), INT32_C( -85899231), INT32_C( 1435935141), INT32_C(-2098236580), INT32_C(-1991433794), INT32_C( 1298943776), INT32_C( 277470244), INT32_C(-1834748849), INT32_C( 596054477), INT32_C( 1827419510), INT32_C(-1010527612), INT32_C(-1687118128), INT32_C( 107945377), INT32_C( 1174128677), INT32_C(-1544325740)), UINT16_C(47807), simde_mm512_set_epi32(INT32_C( 1463729035), INT32_C( 2031968571), INT32_C( 333434400), INT32_C( -637142874), INT32_C( -520435756), INT32_C( -148623413), INT32_C( -692754616), INT32_C(-1908406411), INT32_C( 1391053429), INT32_C( 1767908668), INT32_C( 1117151413), INT32_C( 1466854108), INT32_C( -852914371), INT32_C( -773785464), INT32_C(-2142007253), INT32_C( 466013192)), simde_mm512_set_epi32(INT32_C( -811849174), INT32_C(-1510825074), INT32_C( 1897985966), INT32_C( 1445172644), INT32_C( -193622280), INT32_C( 2097959091), INT32_C( -652080500), INT32_C( 1943026961), INT32_C( 763848022), INT32_C(-2124387583), INT32_C(-1102663841), INT32_C( 712044568), INT32_C( 1641785760), INT32_C( 1696516135), INT32_C(-1123374630), INT32_C( -181070601)), simde_mm512_set_epi32(INT32_C( -193622280), INT32_C( 765730488), INT32_C( -181070601), INT32_C(-2124387583), INT32_C( 712044568), INT32_C(-1991433794), INT32_C( 1943026961), INT32_C( 277470244), INT32_C(-1102663841), INT32_C( 596054477), INT32_C(-1102663841), INT32_C( 1445172644), INT32_C( 1897985966), INT32_C( 1943026961), INT32_C( -193622280), INT32_C( 1943026961)) }, { simde_mm512_set_epi32(INT32_C( 729621709), INT32_C(-1241407128), INT32_C( 696721321), INT32_C( -603523965), INT32_C( 1730687689), INT32_C( 290786615), INT32_C(-1827031380), INT32_C( 1429317129), INT32_C(-1800615955), INT32_C( -728999228), INT32_C( -788606428), INT32_C( -539592973), INT32_C(-1402526875), INT32_C( -8263463), INT32_C( 478788156), INT32_C( 842200487)), UINT16_C(43569), simde_mm512_set_epi32(INT32_C(-1668086905), INT32_C( -770469750), INT32_C( 1013231130), INT32_C( 543156562), INT32_C( -399740514), INT32_C( 509655415), INT32_C( -160537509), INT32_C( -549528402), INT32_C( -323547130), INT32_C(-1395624565), INT32_C(-1905505546), INT32_C(-1268587914), INT32_C( 1939823644), INT32_C(-1112752789), INT32_C( 2052878307), INT32_C( -856056848)), simde_mm512_set_epi32(INT32_C( 1969107101), INT32_C(-2063427243), INT32_C( -670405092), INT32_C(-1879729053), INT32_C( 1035482990), INT32_C( 1183910939), INT32_C( 1515345934), INT32_C(-1884003639), INT32_C( -638430290), INT32_C(-2007622482), INT32_C( 171336877), INT32_C( 59553613), INT32_C( 165266600), INT32_C( -798384264), INT32_C( 1607584815), INT32_C(-1324336584)), simde_mm512_set_epi32(INT32_C( -638430290), INT32_C(-1241407128), INT32_C( 1183910939), INT32_C( -603523965), INT32_C(-2063427243), INT32_C( 290786615), INT32_C( 1035482990), INT32_C( 1429317129), INT32_C(-1800615955), INT32_C( -728999228), INT32_C(-2007622482), INT32_C(-2007622482), INT32_C(-1402526875), INT32_C( -8263463), INT32_C( 478788156), INT32_C(-1324336584)) }, { simde_mm512_set_epi32(INT32_C(-1349190316), INT32_C(-1403674818), INT32_C( -521443925), INT32_C(-1464291783), INT32_C(-1686112999), INT32_C(-1290233716), INT32_C( -364563113), INT32_C( 1520783126), INT32_C( -207159885), INT32_C( -104006691), INT32_C( 362759403), INT32_C(-1562242573), INT32_C( -397133039), INT32_C( 568974515), INT32_C(-1726442446), INT32_C(-2134949944)), UINT16_C(54613), simde_mm512_set_epi32(INT32_C(-1929379353), INT32_C( -560036292), INT32_C(-1693642327), INT32_C(-1383827159), INT32_C( 1804875719), INT32_C( 1179452315), INT32_C(-1509656190), INT32_C(-1409992701), INT32_C(-1830359468), INT32_C( 635753031), INT32_C( 310246197), INT32_C(-1783943034), INT32_C(-1307643183), INT32_C( -144888334), INT32_C( 621611179), INT32_C( 743650285)), simde_mm512_set_epi32(INT32_C(-2094713086), INT32_C( 197529411), INT32_C( 1055036471), INT32_C( 351897115), INT32_C( 1594003471), INT32_C(-1709813294), INT32_C( -133653364), INT32_C( -51462036), INT32_C( 46796230), INT32_C( 989301899), INT32_C( -691937914), INT32_C( 1667629581), INT32_C( -496700661), INT32_C(-1318801755), INT32_C( 1076515270), INT32_C(-1757573505)), simde_mm512_set_epi32(INT32_C( 46796230), INT32_C( 351897115), INT32_C( -521443925), INT32_C( -133653364), INT32_C(-1686112999), INT32_C( 1594003471), INT32_C( -364563113), INT32_C( -496700661), INT32_C( -207159885), INT32_C( 46796230), INT32_C( 362759403), INT32_C( 989301899), INT32_C( -397133039), INT32_C(-1318801755), INT32_C(-1726442446), INT32_C( 1055036471)) }, { simde_mm512_set_epi32(INT32_C( -662725541), INT32_C( 790228415), INT32_C( -26753919), INT32_C( -495897274), INT32_C( 1526994051), INT32_C( 1569343894), INT32_C(-1059990980), INT32_C( -490626870), INT32_C( 1463745126), INT32_C( -957352131), INT32_C( 1122208393), INT32_C(-1814919780), INT32_C(-1891682702), INT32_C( -176064246), INT32_C(-1293286075), INT32_C(-1398303881)), UINT16_C(12582), simde_mm512_set_epi32(INT32_C(-1664789378), INT32_C( -204514420), INT32_C( 1112369408), INT32_C( 2113109396), INT32_C(-1679339682), INT32_C( 2128430529), INT32_C(-1262713143), INT32_C( 1026756660), INT32_C(-1268083621), INT32_C( 337660693), INT32_C( 1524090799), INT32_C( -275653210), INT32_C(-1422519849), INT32_C( -495051500), INT32_C( 1851182812), INT32_C( 5027269)), simde_mm512_set_epi32(INT32_C( -501257427), INT32_C(-1329431510), INT32_C( 1005777948), INT32_C( 616430734), INT32_C( 1581162255), INT32_C( 1497456456), INT32_C(-1170808415), INT32_C(-1014503666), INT32_C(-1157750165), INT32_C( 1691363299), INT32_C( 1100655145), INT32_C( 673265711), INT32_C( 1544659928), INT32_C(-1956803094), INT32_C( 1970109422), INT32_C(-1197844366)), simde_mm512_set_epi32(INT32_C( -662725541), INT32_C( 790228415), INT32_C(-1197844366), INT32_C( 673265711), INT32_C( 1526994051), INT32_C( 1569343894), INT32_C(-1059990980), INT32_C( 673265711), INT32_C( 1463745126), INT32_C( -957352131), INT32_C( -501257427), INT32_C(-1814919780), INT32_C(-1891682702), INT32_C( 673265711), INT32_C( 616430734), INT32_C(-1398303881)) }, { simde_mm512_set_epi32(INT32_C(-1875196295), INT32_C( -129416454), INT32_C( 246939492), INT32_C(-1135915662), INT32_C( -539014135), INT32_C( 1554320066), INT32_C( 1611314079), INT32_C(-1013292897), INT32_C( 1135188428), INT32_C(-1903831246), INT32_C( 1207366326), INT32_C( -366505666), INT32_C( 1379479886), INT32_C( 1577255779), INT32_C( -488475560), INT32_C( 143370041)), UINT16_C(29031), simde_mm512_set_epi32(INT32_C( -7690721), INT32_C( -632623581), INT32_C( 612963145), INT32_C( 1824881051), INT32_C( 582494706), INT32_C(-1899955415), INT32_C( 618457733), INT32_C( 1216635147), INT32_C( 1404062051), INT32_C( 1905825928), INT32_C(-1635796069), INT32_C( 528096299), INT32_C( 259240399), INT32_C( 271372102), INT32_C( 1305969598), INT32_C( 974818283)), simde_mm512_set_epi32(INT32_C( -675362282), INT32_C(-1218762696), INT32_C(-1400182216), INT32_C(-2088680370), INT32_C(-1895497877), INT32_C( 1563893931), INT32_C( 1105770515), INT32_C(-1745770541), INT32_C(-1255255240), INT32_C( 1238532704), INT32_C( 995946229), INT32_C( 119517601), INT32_C( 1389614040), INT32_C(-2032996348), INT32_C( 1996749952), INT32_C( 450477794)), simde_mm512_set_epi32(INT32_C(-1875196295), INT32_C( 1389614040), INT32_C( 1105770515), INT32_C(-1895497877), INT32_C( -539014135), INT32_C( 1554320066), INT32_C( 1611314079), INT32_C(-1895497877), INT32_C( 1135188428), INT32_C(-1745770541), INT32_C(-1895497877), INT32_C( -366505666), INT32_C( 1379479886), INT32_C( 1238532704), INT32_C(-1218762696), INT32_C(-1895497877)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_permutexvar_epi32(test_vec[i].src, test_vec[i].k, test_vec[i].idx, test_vec[i].a); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_permutexvar_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m512i idx; simde__m512i a; simde__m512i r; } test_vec[8] = { { UINT16_C(21335), simde_mm512_set_epi32(INT32_C( 951544639), INT32_C(-1026363374), INT32_C(-1801776439), INT32_C( 145438126), INT32_C(-1306064352), INT32_C( -858736392), INT32_C( 923442479), INT32_C( 1092805562), INT32_C( 1443901717), INT32_C( 1848749100), INT32_C( 1777333881), INT32_C( 1570116932), INT32_C(-1302383354), INT32_C( 1993455974), INT32_C(-2068684593), INT32_C(-1936012201)), simde_mm512_set_epi32(INT32_C(-1110068455), INT32_C( -207240031), INT32_C(-1649179267), INT32_C( 2054398444), INT32_C( -483586503), INT32_C(-1481960002), INT32_C( 861125508), INT32_C( -330381203), INT32_C(-1999224530), INT32_C( 1042470181), INT32_C( 1827473477), INT32_C( 298546792), INT32_C(-1630396605), INT32_C( 1545554432), INT32_C( 344023940), INT32_C(-1871515754)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 1545554432), INT32_C( 0), INT32_C( -207240031), INT32_C( 0), INT32_C( 0), INT32_C(-1110068455), INT32_C(-1481960002), INT32_C( 0), INT32_C( 2054398444), INT32_C( 0), INT32_C( 298546792), INT32_C( 0), INT32_C( 1042470181), INT32_C(-1110068455), INT32_C(-1999224530)) }, { UINT16_C(33181), simde_mm512_set_epi32(INT32_C( 353198331), INT32_C( 1711460779), INT32_C( -919570191), INT32_C( 1974152373), INT32_C( -695949043), INT32_C( -790242624), INT32_C(-1094331335), INT32_C(-1166320093), INT32_C(-2045280751), INT32_C(-2037261521), INT32_C( 223952317), INT32_C( 282198336), INT32_C( 564965997), INT32_C( 169645898), INT32_C(-1539616610), INT32_C( 1134735685)), simde_mm512_set_epi32(INT32_C( 1720852541), INT32_C(-1423023772), INT32_C(-1185448755), INT32_C( 1549802795), INT32_C( 159583350), INT32_C( 548883180), INT32_C( -605945909), INT32_C(-2063050181), INT32_C( 1095467003), INT32_C(-2083755741), INT32_C( 2066979701), INT32_C( 1094609712), INT32_C( 1345059025), INT32_C( -340318359), INT32_C( 1519671047), INT32_C(-1017461983)), simde_mm512_set_epi32(INT32_C( 159583350), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1345059025), INT32_C( 1519671047), INT32_C( 0), INT32_C( 0), INT32_C(-1017461983), INT32_C(-1185448755), INT32_C( 548883180), INT32_C( 0), INT32_C( 2066979701)) }, { UINT16_C(55883), simde_mm512_set_epi32(INT32_C( -329275629), INT32_C( -888441293), INT32_C( -707551350), INT32_C( 513515868), INT32_C(-1825967755), INT32_C( 822222164), INT32_C(-1689559027), INT32_C( 533478787), INT32_C( 907615417), INT32_C( -199229058), INT32_C( -91537812), INT32_C( 1375258232), INT32_C( 139748399), INT32_C( 1688468565), INT32_C( 736544549), INT32_C(-1282057552)), simde_mm512_set_epi32(INT32_C( 338985942), INT32_C( 1701079465), INT32_C( -195770682), INT32_C( 503748315), INT32_C( 1469355417), INT32_C(-1849349632), INT32_C( 1962664621), INT32_C( -646247370), INT32_C( 1258747662), INT32_C( 1838830023), INT32_C( -532007659), INT32_C( -622852205), INT32_C( -839037220), INT32_C( 499633910), INT32_C( -260167255), INT32_C( 884163960)), simde_mm512_set_epi32(INT32_C( -839037220), INT32_C( -839037220), INT32_C( 0), INT32_C( 503748315), INT32_C( -532007659), INT32_C( 0), INT32_C( -195770682), INT32_C( 0), INT32_C( 0), INT32_C( 1701079465), INT32_C( 0), INT32_C( 0), INT32_C( 338985942), INT32_C( 0), INT32_C( -532007659), INT32_C( 884163960)) }, { UINT16_C(18199), simde_mm512_set_epi32(INT32_C( 1526367108), INT32_C( 722122834), INT32_C( -174985661), INT32_C(-1762469023), INT32_C( 1239606494), INT32_C( -22119232), INT32_C( 1216907749), INT32_C( 654527510), INT32_C(-2043358500), INT32_C( 459072440), INT32_C( -430427651), INT32_C( -272088075), INT32_C( 386072301), INT32_C(-1628984154), INT32_C( 87817524), INT32_C( 1219490517)), simde_mm512_set_epi32(INT32_C(-1087388220), INT32_C( 730787370), INT32_C(-2034110695), INT32_C(-1088138491), INT32_C( -353174912), INT32_C( -362301616), INT32_C( 617951303), INT32_C( 817116152), INT32_C(-1034835761), INT32_C( -102069057), INT32_C( 1774242298), INT32_C( 1089620040), INT32_C(-1101477862), INT32_C( 2001101785), INT32_C(-1759250988), INT32_C( -606254738)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 2001101785), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -606254738), INT32_C( 1774242298), INT32_C( -102069057), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1774242298), INT32_C( 0), INT32_C( -102069057), INT32_C( 1089620040), INT32_C( 1774242298)) }, { UINT16_C(26926), simde_mm512_set_epi32(INT32_C( 1174128677), INT32_C(-1544325740), INT32_C( 204417556), INT32_C(-1329665093), INT32_C(-2039025377), INT32_C( 1639231015), INT32_C( 1541217841), INT32_C( 1692413538), INT32_C( 738521275), INT32_C( 159429100), INT32_C( 451955897), INT32_C( 181201098), INT32_C( 450627934), INT32_C( 2082954477), INT32_C( 1254960767), INT32_C( 1995459397)), simde_mm512_set_epi32(INT32_C( 466013192), INT32_C( 1313258175), INT32_C( 1928049651), INT32_C( 765730488), INT32_C( -85899231), INT32_C( 1435935141), INT32_C(-2098236580), INT32_C(-1991433794), INT32_C( 1298943776), INT32_C( 277470244), INT32_C(-1834748849), INT32_C( 596054477), INT32_C( 1827419510), INT32_C(-1010527612), INT32_C(-1687118128), INT32_C( 107945377)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 596054477), INT32_C( 596054477), INT32_C( 0), INT32_C( 466013192), INT32_C( 0), INT32_C( 0), INT32_C(-1010527612), INT32_C( 0), INT32_C( 0), INT32_C(-2098236580), INT32_C( 0), INT32_C( 1313258175), INT32_C( 1928049651), INT32_C( 466013192), INT32_C( 0)) }, { UINT16_C(36907), simde_mm512_set_epi32(INT32_C(-1123374630), INT32_C( -181070601), INT32_C( 1463729035), INT32_C( 2031968571), INT32_C( 333434400), INT32_C( -637142874), INT32_C( -520435756), INT32_C( -148623413), INT32_C( -692754616), INT32_C(-1908406411), INT32_C( 1391053429), INT32_C( 1767908668), INT32_C( 1117151413), INT32_C( 1466854108), INT32_C( -852914371), INT32_C( -773785464)), simde_mm512_set_epi32(INT32_C( 478788156), INT32_C( 842200487), INT32_C( -811849174), INT32_C(-1510825074), INT32_C( 1897985966), INT32_C( 1445172644), INT32_C( -193622280), INT32_C( 2097959091), INT32_C( -652080500), INT32_C( 1943026961), INT32_C( 763848022), INT32_C(-2124387583), INT32_C(-1102663841), INT32_C( 712044568), INT32_C( 1641785760), INT32_C( 1696516135)), simde_mm512_set_epi32(INT32_C( 1445172644), INT32_C( 0), INT32_C( 0), INT32_C( 1897985966), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 763848022), INT32_C( 0), INT32_C( 763848022), INT32_C( 0), INT32_C( -811849174), INT32_C( 2097959091)) }, { UINT16_C(59609), simde_mm512_set_epi32(INT32_C( 2052878307), INT32_C( -856056848), INT32_C(-1218860495), INT32_C( 729621709), INT32_C(-1241407128), INT32_C( 696721321), INT32_C( -603523965), INT32_C( 1730687689), INT32_C( 290786615), INT32_C(-1827031380), INT32_C( 1429317129), INT32_C(-1800615955), INT32_C( -728999228), INT32_C( -788606428), INT32_C( -539592973), INT32_C(-1402526875)), simde_mm512_set_epi32(INT32_C( 1607584815), INT32_C(-1324336584), INT32_C(-1668086905), INT32_C( -770469750), INT32_C( 1013231130), INT32_C( 543156562), INT32_C( -399740514), INT32_C( 509655415), INT32_C( -160537509), INT32_C( -549528402), INT32_C( -323547130), INT32_C(-1395624565), INT32_C(-1905505546), INT32_C(-1268587914), INT32_C( 1939823644), INT32_C(-1112752789)), simde_mm512_set_epi32(INT32_C(-1905505546), INT32_C(-1112752789), INT32_C( 1939823644), INT32_C( 0), INT32_C( 509655415), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -160537509), INT32_C( -770469750), INT32_C( 0), INT32_C(-1668086905), INT32_C(-1395624565), INT32_C( 0), INT32_C( 0), INT32_C( -323547130)) }, { UINT16_C(40824), simde_mm512_set_epi32(INT32_C( 568974515), INT32_C(-1726442446), INT32_C(-2134949944), INT32_C( 1969107101), INT32_C(-2063427243), INT32_C( -670405092), INT32_C(-1879729053), INT32_C( 1035482990), INT32_C( 1183910939), INT32_C( 1515345934), INT32_C(-1884003639), INT32_C( -638430290), INT32_C(-2007622482), INT32_C( 171336877), INT32_C( 59553613), INT32_C( 165266600)), simde_mm512_set_epi32(INT32_C( 621611179), INT32_C( 743650285), INT32_C( 1845744981), INT32_C(-1349190316), INT32_C(-1403674818), INT32_C( -521443925), INT32_C(-1464291783), INT32_C(-1686112999), INT32_C(-1290233716), INT32_C( -364563113), INT32_C( 1520783126), INT32_C( -207159885), INT32_C( -104006691), INT32_C( 362759403), INT32_C(-1562242573), INT32_C( -397133039)), simde_mm512_set_epi32(INT32_C( -104006691), INT32_C( 0), INT32_C( 0), INT32_C( 1845744981), INT32_C( 1520783126), INT32_C(-1349190316), INT32_C( -104006691), INT32_C( 743650285), INT32_C( 0), INT32_C( 743650285), INT32_C(-1464291783), INT32_C( 743650285), INT32_C( 743650285), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_permutexvar_epi32(test_vec[i].k, test_vec[i].idx, test_vec[i].a); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_permutexvar_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i idx; simde__m512i a; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C(-2671339936797273634), INT64_C(-6569724575104779065), INT64_C( 6280671509806293744), INT64_C(-6564012982845614940), INT64_C(-8218226210083292891), INT64_C(-1970878168494992951), INT64_C( 623983056867500923), INT64_C( 8381435109868166325)), simde_mm512_set_epi64(INT64_C(-2238152008698912054), INT64_C(-6011307368237149167), INT64_C( 7021013141387677756), INT64_C( 209431633592419129), INT64_C(-2042251418947820180), INT64_C(-1637501592002869374), INT64_C(-7968886780493171295), INT64_C( 7201832131237889915)), simde_mm512_set_epi64(INT64_C(-6011307368237149167), INT64_C(-2238152008698912054), INT64_C( 7201832131237889915), INT64_C( 209431633592419129), INT64_C( 7021013141387677756), INT64_C(-7968886780493171295), INT64_C(-2042251418947820180), INT64_C( 7021013141387677756)) }, { simde_mm512_set_epi64(INT64_C( 9078632813866954216), INT64_C( 5306066193622774721), INT64_C( 6328033075457178183), INT64_C(-7841620825407880030), INT64_C( 8494413761555835343), INT64_C(-6629030689949253425), INT64_C(-3415199944519615656), INT64_C( 9203934858949759250)), simde_mm512_set_epi64(INT64_C( 2479803165234466245), INT64_C( 2661396061418991136), INT64_C(-5715036345641082343), INT64_C( 1298703295811073042), INT64_C( 5852540304039009511), INT64_C( 6078752793077415028), INT64_C( 4402609795769289287), INT64_C( 6115701178487842322)), simde_mm512_set_epi64(INT64_C( 6115701178487842322), INT64_C( 4402609795769289287), INT64_C( 2479803165234466245), INT64_C( 6078752793077415028), INT64_C( 2479803165234466245), INT64_C( 2479803165234466245), INT64_C( 6115701178487842322), INT64_C( 6078752793077415028)) }, { simde_mm512_set_epi64(INT64_C(-4996537380140624007), INT64_C( 3942559966214732809), INT64_C(-2671134702774502774), INT64_C( 8456382047897336120), INT64_C( 6407530222969271406), INT64_C( 407141370872746761), INT64_C( 8529772400902424249), INT64_C(-4246571771485464983)), simde_mm512_set_epi64(INT64_C( 834150414269389391), INT64_C( 2027953688477105836), INT64_C( 4657551970593349995), INT64_C(-7093404930502337621), INT64_C(-1111425946814561024), INT64_C(-6887525210431388893), INT64_C( 7494759347392082910), INT64_C(-5042062261697149458)), simde_mm512_set_epi64(INT64_C( 7494759347392082910), INT64_C( 7494759347392082910), INT64_C(-6887525210431388893), INT64_C(-5042062261697149458), INT64_C( 2027953688477105836), INT64_C( 7494759347392082910), INT64_C( 7494759347392082910), INT64_C( 7494759347392082910)) }, { simde_mm512_set_epi64(INT64_C( 2040395228897941830), INT64_C( 2765121423290447296), INT64_C( 2050656439718793919), INT64_C(-7162012369641923245), INT64_C( -776125111083479372), INT64_C( 6485544818721995013), INT64_C( 8129692823118117691), INT64_C(-8572576708578902442)), simde_mm512_set_epi64(INT64_C( 2289013539283677845), INT64_C(-5799540082455789735), INT64_C( 2163632088234152524), INT64_C(-2186518714684028349), INT64_C(-8831520715491995762), INT64_C( 3869803765755785917), INT64_C( 8013865163596309228), INT64_C(-3584939450023784210)), simde_mm512_set_epi64(INT64_C(-5799540082455789735), INT64_C(-3584939450023784210), INT64_C( 2289013539283677845), INT64_C(-8831520715491995762), INT64_C(-2186518714684028349), INT64_C( 2163632088234152524), INT64_C(-8831520715491995762), INT64_C(-5799540082455789735)) }, { simde_mm512_set_epi64(INT64_C( 3677820960269215734), INT64_C(-6343206756900565591), INT64_C( 8255445898463112459), INT64_C( 4852161483621128051), INT64_C( 8099305670443807898), INT64_C(-8684281278489558074), INT64_C( 3137344644687539915), INT64_C( 5523372352513099931)), simde_mm512_set_epi64(INT64_C( 4283186432542966059), INT64_C( 1170657112776090420), INT64_C( 2518002084319668884), INT64_C( 4471327755299130682), INT64_C( 2261821509900268313), INT64_C( 5692030531386538246), INT64_C(-1483628367950291610), INT64_C( 6533622553555897345)), simde_mm512_set_epi64(INT64_C( 1170657112776090420), INT64_C(-1483628367950291610), INT64_C( 2261821509900268313), INT64_C( 2261821509900268313), INT64_C( 5692030531386538246), INT64_C( 1170657112776090420), INT64_C( 2261821509900268313), INT64_C( 2261821509900268313)) }, { simde_mm512_set_epi64(INT64_C( 2347171147608692871), INT64_C(-6783728424977598502), INT64_C(-2041006483186382974), INT64_C( 8642864406765137587), INT64_C(-1636657579614487452), INT64_C( 9207381314159514814), INT64_C(-3687478572965330724), INT64_C(-9006129120063771595)), simde_mm512_set_epi64(INT64_C( 4701446694745358904), INT64_C( 5644218643369465634), INT64_C( 3403557732929675228), INT64_C( 5349859821272859600), INT64_C(-1653042609039941611), INT64_C(-4665914487571652764), INT64_C( 3371940136328092430), INT64_C(-2406499600794803976)), simde_mm512_set_epi64(INT64_C( 4701446694745358904), INT64_C(-4665914487571652764), INT64_C(-4665914487571652764), INT64_C(-1653042609039941611), INT64_C( 5349859821272859600), INT64_C( 5644218643369465634), INT64_C( 5349859821272859600), INT64_C( 3403557732929675228)) }, { simde_mm512_set_epi64(INT64_C( 4782376392228385514), INT64_C(-1906708615590323735), INT64_C(-3545169353098873151), INT64_C( 3265944245325513127), INT64_C( 4131290265645096409), INT64_C(-4397904511930284448), INT64_C(-2647251656339754605), INT64_C( 3315710643141776922)), simde_mm512_set_epi64(INT64_C(-6136398457404459673), INT64_C( -648409073018341138), INT64_C( 958458472131577038), INT64_C( 368136538694245601), INT64_C( 7267809600451931130), INT64_C( 7002668171940425391), INT64_C(-7238180738987124127), INT64_C(-3577079750371726624)), simde_mm512_set_epi64(INT64_C( 7002668171940425391), INT64_C(-7238180738987124127), INT64_C(-7238180738987124127), INT64_C(-6136398457404459673), INT64_C(-7238180738987124127), INT64_C(-3577079750371726624), INT64_C( 7267809600451931130), INT64_C( 7002668171940425391)) }, { simde_mm512_set_epi64(INT64_C( 7177491025608125879), INT64_C( 2298304526936591870), INT64_C( 4104659984995714948), INT64_C(-3124913163496554810), INT64_C( 7421100574262603376), INT64_C( 5351476444327610551), INT64_C( 110626773490384787), INT64_C( 8483452379448679247)), simde_mm512_set_epi64(INT64_C( 7070883828009365305), INT64_C(-8674554469467912700), INT64_C( 4454911305585732940), INT64_C(-2880418777083904328), INT64_C( 6809950466209546729), INT64_C( 2297778791052038555), INT64_C( 3156218119380498044), INT64_C(-2342831745602989292)), simde_mm512_set_epi64(INT64_C( 7070883828009365305), INT64_C(-8674554469467912700), INT64_C(-2880418777083904328), INT64_C(-8674554469467912700), INT64_C(-2342831745602989292), INT64_C( 7070883828009365305), INT64_C( 6809950466209546729), INT64_C( 7070883828009365305)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_permutexvar_epi64(test_vec[i].idx, test_vec[i].a); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_permutexvar_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask8 k; simde__m512i idx; simde__m512i a; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C(-4408197122649025847), INT64_C( 624651997750430240), INT64_C(-3688244718601593553), INT64_C( 4693564151120802069), INT64_C( 7940316924786767481), INT64_C( 6743600876828439814), INT64_C( 8561828216572109007), INT64_C(-8315109086095518889)), UINT8_C( 63), simde_mm512_set_epi64(INT64_C(-4767707706458520415), INT64_C(-7083171014951853588), INT64_C(-2076988212358998594), INT64_C( 3698505898575972461), INT64_C(-8586603972668500699), INT64_C( 7848938818320954984), INT64_C(-7002500096438875648), INT64_C( 1477571573764517782)), simde_mm512_set_epi64(INT64_C( 7350668077567080689), INT64_C( 8478919882954811661), INT64_C(-3394066222784588743), INT64_C(-5009306653852991983), INT64_C(-8749971605870264899), INT64_C( 1212032624670585453), INT64_C( 728623586565902494), INT64_C( 4873652658109514141)), simde_mm512_set_epi64(INT64_C(-4408197122649025847), INT64_C( 624651997750430240), INT64_C( 8478919882954811661), INT64_C(-3394066222784588743), INT64_C(-3394066222784588743), INT64_C( 4873652658109514141), INT64_C( 4873652658109514141), INT64_C( 8478919882954811661)) }, { simde_mm512_set_epi64(INT64_C(-6111840559061041971), INT64_C( 6656352319933975670), INT64_C( 2357435311113502667), INT64_C(-8860733056306413573), INT64_C(-8949662758380266635), INT64_C( 4701312916269037777), INT64_C(-1461656220613716217), INT64_C(-4369965941555109637)), UINT8_C( 61), simde_mm512_set_epi64(INT64_C(-3815826294263537782), INT64_C( 2205533861506052469), INT64_C( 3531417307031756813), INT64_C( 2291273944182365369), INT64_C( -855682284319457684), INT64_C( 5906689130134529071), INT64_C( 7251917267735594789), INT64_C(-5506395256633894325)), simde_mm512_set_epi64(INT64_C( 7306080674171373254), INT64_C( 2163582539809461657), INT64_C(-7942896186346970451), INT64_C(-2775611318017263858), INT64_C( 7897714815450887445), INT64_C(-2675129847260557604), INT64_C( 2145911307457407401), INT64_C( 3797455296467543827)), simde_mm512_set_epi64(INT64_C(-6111840559061041971), INT64_C( 6656352319933975670), INT64_C(-7942896186346970451), INT64_C( 2145911307457407401), INT64_C(-2775611318017263858), INT64_C( 7306080674171373254), INT64_C(-1461656220613716217), INT64_C( 7897714815450887445)) }, { simde_mm512_set_epi64(INT64_C( -751557688731444383), INT64_C( 5324069355912068288), INT64_C( 5226578984858504214), INT64_C(-8776157931044543560), INT64_C(-1848672680316222475), INT64_C( 1658167909352451238), INT64_C( 377173394815185621), INT64_C(-6742373427678247978)), UINT8_C( 82), simde_mm512_set_epi64(INT64_C( 3138707856740708121), INT64_C(-4673519228421997952), INT64_C(-1556073591389999033), INT64_C( 3509487153133496527), INT64_C( -438383259974317574), INT64_C( 4679882440059701274), INT64_C( 8594666725077939668), INT64_C(-2603844271228681340)), simde_mm512_set_epi64(INT64_C( 877966720713550779), INT64_C(-8757547308289839577), INT64_C( 6619480224799141474), INT64_C( 3171924723684651500), INT64_C( 1941135797030545610), INT64_C( 1935432241277000941), INT64_C( 5390015454023535429), INT64_C( -49705421380794940)), simde_mm512_set_epi64(INT64_C( -751557688731444383), INT64_C( -49705421380794940), INT64_C( 5226578984858504214), INT64_C( 877966720713550779), INT64_C(-1848672680316222475), INT64_C( 1658167909352451238), INT64_C( 3171924723684651500), INT64_C(-6742373427678247978)) }, { simde_mm512_set_epi64(INT64_C( 8280910196874944184), INT64_C( -368934386460614235), INT64_C(-9011857488067354178), INT64_C( 5578921037540219940), INT64_C(-7880186302232587827), INT64_C( 7848707034806784644), INT64_C(-7246117184140796511), INT64_C( 5042844271761388948)), UINT8_C(191), simde_mm512_set_epi64(INT64_C( 6286668337562607931), INT64_C( 1432089847019206822), INT64_C(-2235254547542691893), INT64_C(-2975358417486477451), INT64_C( 5974528986311566652), INT64_C( 4798128784982043356), INT64_C(-3663239326212228984), INT64_C(-9199851098963784696)), simde_mm512_set_epi64(INT64_C(-3486865648830471282), INT64_C( 8151787653682140580), INT64_C( -831601358278995789), INT64_C(-2800664419916301039), INT64_C( 3280702275774868225), INT64_C(-4735905134864699368), INT64_C( 7051416147935021095), INT64_C(-4824857292892203785)), simde_mm512_set_epi64(INT64_C( 3280702275774868225), INT64_C( -368934386460614235), INT64_C( 3280702275774868225), INT64_C( -831601358278995789), INT64_C(-2800664419916301039), INT64_C(-2800664419916301039), INT64_C(-4824857292892203785), INT64_C(-4824857292892203785)) }, { simde_mm512_set_epi64(INT64_C( 3133701381660189032), INT64_C( 2992395291812361347), INT64_C( 7433247024135605559), INT64_C(-7847040024436431351), INT64_C(-7733586635814839612), INT64_C(-3387038813920004365), INT64_C(-6023807055599376167), INT64_C( 2056379472574346663)), UINT8_C( 49), simde_mm512_set_epi64(INT64_C(-7164378700336361334), INT64_C( 4351794567182281042), INT64_C(-1716872434006574729), INT64_C( -689503347190866770), INT64_C(-1389624339165317749), INT64_C(-8184083999390244234), INT64_C( 8331479114169761131), INT64_C( 8817045194671758320)), simde_mm512_set_epi64(INT64_C( 8457250603347908949), INT64_C(-2879367942796632989), INT64_C( 4447365578798205979), INT64_C( 6508361231067538121), INT64_C(-2742037214038451026), INT64_C( 735886283373328205), INT64_C( 709814645617696632), INT64_C( 6904524208941840952)), simde_mm512_set_epi64(INT64_C( 3133701381660189032), INT64_C( 2992395291812361347), INT64_C( 8457250603347908949), INT64_C(-2879367942796632989), INT64_C(-7733586635814839612), INT64_C(-3387038813920004365), INT64_C(-6023807055599376167), INT64_C( 6904524208941840952)) }, { simde_mm512_set_epi64(INT64_C(-5794728280408613058), INT64_C(-2239584601742201287), INT64_C(-7241800185060747124), INT64_C(-1565786646142169322), INT64_C( -889744926927160355), INT64_C( 1558039774934209011), INT64_C(-1705673414097118029), INT64_C(-7415013841836228664)), UINT8_C( 85), simde_mm512_set_epi64(INT64_C(-8286621218977708484), INT64_C(-7274138402675197655), INT64_C( 7751882187628938139), INT64_C(-6483923961368987645), INT64_C(-7861334054348205497), INT64_C( 1332497272334397574), INT64_C(-5616284701672264206), INT64_C( 2669799685376652269)), simde_mm512_set_epi64(INT64_C(-8996724198675706045), INT64_C( 4531347139384149531), INT64_C( 6846192780240638418), INT64_C( -574036823136878484), INT64_C( 200988278415395979), INT64_C(-2971850709824830963), INT64_C(-2133313091920417115), INT64_C( 4623597880832003711)), simde_mm512_set_epi64(INT64_C(-5794728280408613058), INT64_C(-2133313091920417115), INT64_C(-7241800185060747124), INT64_C( 200988278415395979), INT64_C( -889744926927160355), INT64_C( 4531347139384149531), INT64_C(-1705673414097118029), INT64_C( 6846192780240638418)) }, { simde_mm512_set_epi64(INT64_C(-2846384524028678721), INT64_C( -114907203345763002), INT64_C( 6558389511800899990), INT64_C(-4552626589350649654), INT64_C( 6286737449187014461), INT64_C( 4819848349711762844), INT64_C(-8124715335380010742), INT64_C(-5554621393600539785)), UINT8_C( 38), simde_mm512_set_epi64(INT64_C(-7150215929147729012), INT64_C( 4777590230543990164), INT64_C(-7212709010936609343), INT64_C(-5423311652387614668), INT64_C(-5446377680450598123), INT64_C( 6545920141858823590), INT64_C(-6109676225565942508), INT64_C( 7950769636462343621)), simde_mm512_set_epi64(INT64_C(-2152884252876571606), INT64_C( 4319783394314419342), INT64_C( 6791040176392068936), INT64_C(-5028583849026132210), INT64_C(-4972499093922240541), INT64_C( 4727277852622403631), INT64_C( 6634263876539878890), INT64_C( 8461555540128585842)), simde_mm512_set_epi64(INT64_C(-2846384524028678721), INT64_C( -114907203345763002), INT64_C( 6634263876539878890), INT64_C(-4552626589350649654), INT64_C( 6286737449187014461), INT64_C( 4319783394314419342), INT64_C(-5028583849026132210), INT64_C(-5554621393600539785)) }, { simde_mm512_set_epi64(INT64_C(-8053906756439817478), INT64_C( 1060597045389905266), INT64_C(-2315048080352408894), INT64_C( 6920541276171034783), INT64_C( 4875597175448786738), INT64_C( 5185598888390136126), INT64_C( 5924820997437064035), INT64_C(-2097986554951915719)), UINT8_C(103), simde_mm512_set_epi64(INT64_C( -33031391515316701), INT64_C( 2632656663253186971), INT64_C( 2501795714758146857), INT64_C( 2656255738409935115), INT64_C( 6030400592505510024), INT64_C(-7025690618752263125), INT64_C( 1113429035778363206), INT64_C( 5609096713955085291)), simde_mm512_set_epi64(INT64_C(-2900658911065724872), INT64_C(-6013736823954521010), INT64_C(-8141101389788536661), INT64_C( 4749248201355274195), INT64_C(-5391280202694098336), INT64_C( 4277556482249044385), INT64_C( 5968346858124406788), INT64_C( 8575975742580047586)), simde_mm512_set_epi64(INT64_C(-8053906756439817478), INT64_C(-5391280202694098336), INT64_C( 5968346858124406788), INT64_C( 6920541276171034783), INT64_C( 4875597175448786738), INT64_C(-5391280202694098336), INT64_C(-6013736823954521010), INT64_C(-5391280202694098336)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_permutexvar_epi64(test_vec[i].src, test_vec[i].k, test_vec[i].idx, test_vec[i].a); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_permutexvar_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512i idx; simde__m512i a; simde__m512i r; } test_vec[8] = { { UINT8_C( 87), simde_mm512_set_epi64(INT64_C( 4086853108457730066), INT64_C(-7738570880062900818), INT64_C(-5609503674875201288), INT64_C( 3966155248134972346), INT64_C( 6201510655001996332), INT64_C( 7633590894537872708), INT64_C(-5593693910291334810), INT64_C(-8884932670315115433)), simde_mm512_set_epi64(INT64_C(-4767707706458520415), INT64_C(-7083171014951853588), INT64_C(-2076988212358998594), INT64_C( 3698505898575972461), INT64_C(-8586603972668500699), INT64_C( 7848938818320954984), INT64_C(-7002500096438875648), INT64_C( 1477571573764517782)), simde_mm512_set_epi64(INT64_C( 0), INT64_C(-7083171014951853588), INT64_C( 0), INT64_C( 7848938818320954984), INT64_C( 0), INT64_C( 3698505898575972461), INT64_C(-7083171014951853588), INT64_C(-4767707706458520415)) }, { UINT8_C(157), simde_mm512_set_epi64(INT64_C( 1516975282358243755), INT64_C(-3949523894747321163), INT64_C(-2989078375862773056), INT64_C(-4700117291684372957), INT64_C(-8784413934425613521), INT64_C( 961867877660623168), INT64_C( 2426510480636680010), INT64_C(-6612602987193650875)), simde_mm512_set_epi64(INT64_C( 7391005387705442660), INT64_C(-5091463632259113685), INT64_C( 685405269785004780), INT64_C(-2602517860068074949), INT64_C( 4704994953943345443), INT64_C( 8877610218385468208), INT64_C( 5776984527519295337), INT64_C( 6526937450820584225)), simde_mm512_set_epi64(INT64_C( 4704994953943345443), INT64_C( 0), INT64_C( 0), INT64_C( 4704994953943345443), INT64_C( 7391005387705442660), INT64_C( 6526937450820584225), INT64_C( 0), INT64_C( 685405269785004780)) }, { UINT8_C( 75), simde_mm512_set_epi64(INT64_C(-1414228054518303181), INT64_C(-3038909907977133732), INT64_C(-7842471790453318316), INT64_C(-7256600765093102205), INT64_C( 3898178537456140670), INT64_C( -393151907512138120), INT64_C( 600214805061827669), INT64_C( 3163434753014979248)), simde_mm512_set_epi64(INT64_C( 1455933536394832297), INT64_C( -840828676201867557), INT64_C( 6310833464661060096), INT64_C( 8429580363859954742), INT64_C( 5406280044045291975), INT64_C(-2284955492954404973), INT64_C(-3603637419527123210), INT64_C(-1117409850830928520)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 8429580363859954742), INT64_C( 0), INT64_C( 0), INT64_C( -840828676201867557), INT64_C( 0), INT64_C( 6310833464661060096), INT64_C(-1117409850830928520)) }, { UINT8_C( 23), simde_mm512_set_epi64(INT64_C( 6555696811272222802), INT64_C( -751557688731444383), INT64_C( 5324069355912068288), INT64_C( 5226578984858504214), INT64_C(-8776157931044543560), INT64_C(-1848672680316222475), INT64_C( 1658167909352451238), INT64_C( 377173394815185621)), simde_mm512_set_epi64(INT64_C(-4670296842224865750), INT64_C(-8736438908262001915), INT64_C(-1516874692875012272), INT64_C( 2654080637722702840), INT64_C(-4444585746033374017), INT64_C( 7620312646179506248), INT64_C(-4730811392556899367), INT64_C(-7555925455226975890)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C(-8736438908262001915), INT64_C( 0), INT64_C(-1516874692875012272), INT64_C(-8736438908262001915), INT64_C(-1516874692875012272)) }, { UINT8_C( 46), simde_mm512_set_epi64(INT64_C( 5042844271761388948), INT64_C( 877966720713550779), INT64_C(-8757547308289839577), INT64_C( 6619480224799141474), INT64_C( 3171924723684651500), INT64_C( 1941135797030545610), INT64_C( 1935432241277000941), INT64_C( 5390015454023535429)), simde_mm512_set_epi64(INT64_C( 2001511420457827007), INT64_C( 8280910196874944184), INT64_C( -368934386460614235), INT64_C(-9011857488067354178), INT64_C( 5578921037540219940), INT64_C(-7880186302232587827), INT64_C( 7848707034806784644), INT64_C(-7246117184140796511)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( 2001511420457827007), INT64_C( 0), INT64_C(-9011857488067354178), INT64_C(-7880186302232587827), INT64_C( -368934386460614235), INT64_C( 0)) }, { UINT8_C( 43), simde_mm512_set_epi64(INT64_C(-4824857292892203785), INT64_C( 6286668337562607931), INT64_C( 1432089847019206822), INT64_C(-2235254547542691893), INT64_C(-2975358417486477451), INT64_C( 5974528986311566652), INT64_C( 4798128784982043356), INT64_C(-3663239326212228984)), simde_mm512_set_epi64(INT64_C( 2056379472574346663), INT64_C(-3486865648830471282), INT64_C( 8151787653682140580), INT64_C( -831601358278995789), INT64_C(-2800664419916301039), INT64_C( 3280702275774868225), INT64_C(-4735905134864699368), INT64_C( 7051416147935021095)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C(-3486865648830471282), INT64_C( 0), INT64_C( 8151787653682140580), INT64_C( 0), INT64_C( -831601358278995789), INT64_C( 7051416147935021095)) }, { UINT8_C(217), simde_mm512_set_epi64(INT64_C( 8817045194671758320), INT64_C(-5234965963681749811), INT64_C(-5331803015084564567), INT64_C(-2592115690296560951), INT64_C( 1248919004007478956), INT64_C( 6138870327161964525), INT64_C(-3131027839562886620), INT64_C(-2317534169293970587)), simde_mm512_set_epi64(INT64_C( 6904524208941840952), INT64_C(-7164378700336361334), INT64_C( 4351794567182281042), INT64_C(-1716872434006574729), INT64_C( -689503347190866770), INT64_C(-1389624339165317749), INT64_C(-8184083999390244234), INT64_C( 8331479114169761131)), simde_mm512_set_epi64(INT64_C( 8331479114169761131), INT64_C( 4351794567182281042), INT64_C( 0), INT64_C(-8184083999390244234), INT64_C(-1716872434006574729), INT64_C( 0), INT64_C( 0), INT64_C( 4351794567182281042)) }, { UINT8_C(120), simde_mm512_set_epi64(INT64_C( 2443726936750986290), INT64_C(-9169540186107924323), INT64_C(-8862352522735882724), INT64_C(-8073374806940567698), INT64_C( 5084858765896996878), INT64_C(-8091734011393453138), INT64_C(-8622672902733011795), INT64_C( 255780820358907048)), simde_mm512_set_epi64(INT64_C( 2669799685376652269), INT64_C( 7927414333096918356), INT64_C(-6028737433755228757), INT64_C(-6289085317177674471), INT64_C(-5541511610486147753), INT64_C( 6531713794566454707), INT64_C( -446705336047418133), INT64_C(-6709780755556058351)), simde_mm512_set_epi64(INT64_C( 0), INT64_C(-6028737433755228757), INT64_C(-6289085317177674471), INT64_C( 7927414333096918356), INT64_C( 7927414333096918356), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_permutexvar_epi64(test_vec[i].k, test_vec[i].idx, test_vec[i].a); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_permutexvar_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t idx[64]; const int8_t a[64]; const int8_t r[64]; } test_vec[] = { { { INT8_C( 22), INT8_C( 85), -INT8_C( 31), INT8_C( 117), -INT8_C( 32), -INT8_C( 86), -INT8_C( 120), -INT8_C( 43), INT8_C( 4), -INT8_C( 3), -INT8_C( 53), -INT8_C( 80), -INT8_C( 109), INT8_C( 15), INT8_C( 125), -INT8_C( 41), -INT8_C( 115), INT8_C( 37), -INT8_C( 14), -INT8_C( 111), -INT8_C( 68), -INT8_C( 101), INT8_C( 66), INT8_C( 26), -INT8_C( 110), -INT8_C( 106), INT8_C( 19), INT8_C( 91), -INT8_C( 27), -INT8_C( 92), -INT8_C( 115), -INT8_C( 5), -INT8_C( 7), INT8_C( 111), INT8_C( 112), -INT8_C( 39), INT8_C( 25), -INT8_C( 8), -INT8_C( 81), INT8_C( 29), -INT8_C( 11), INT8_C( 122), -INT8_C( 50), -INT8_C( 119), -INT8_C( 119), INT8_C( 75), INT8_C( 96), INT8_C( 22), INT8_C( 112), INT8_C( 82), -INT8_C( 89), INT8_C( 44), -INT8_C( 19), -INT8_C( 22), INT8_C( 71), INT8_MIN, INT8_MIN, INT8_C( 90), -INT8_C( 37), INT8_C( 101), -INT8_C( 2), INT8_C( 104), INT8_C( 96), -INT8_C( 8) }, { -INT8_C( 41), -INT8_C( 48), -INT8_C( 47), -INT8_C( 15), -INT8_C( 56), INT8_MIN, INT8_C( 14), -INT8_C( 66), -INT8_C( 6), -INT8_C( 36), INT8_C( 71), -INT8_C( 124), INT8_C( 39), -INT8_C( 89), -INT8_C( 102), -INT8_C( 105), -INT8_C( 7), INT8_C( 66), -INT8_C( 60), -INT8_C( 25), INT8_C( 44), INT8_C( 11), INT8_C( 103), -INT8_C( 84), INT8_C( 101), INT8_C( 66), INT8_C( 17), INT8_C( 100), -INT8_C( 86), INT8_C( 113), INT8_C( 92), -INT8_C( 126), INT8_C( 65), INT8_C( 45), INT8_C( 115), INT8_C( 9), -INT8_C( 82), -INT8_C( 127), -INT8_C( 57), -INT8_C( 88), INT8_C( 94), INT8_C( 14), INT8_C( 44), -INT8_C( 123), -INT8_C( 75), -INT8_C( 57), INT8_C( 29), -INT8_C( 81), INT8_C( 9), -INT8_C( 31), -INT8_C( 106), INT8_C( 53), -INT8_C( 20), -INT8_C( 3), -INT8_C( 31), INT8_C( 81), INT8_C( 63), -INT8_C( 14), -INT8_C( 75), -INT8_C( 23), INT8_C( 99), INT8_C( 17), INT8_C( 107), -INT8_C( 92) }, { INT8_C( 103), INT8_C( 11), INT8_C( 45), -INT8_C( 3), INT8_C( 65), INT8_C( 44), -INT8_C( 6), INT8_C( 11), -INT8_C( 56), INT8_C( 17), -INT8_C( 124), INT8_C( 9), -INT8_C( 25), -INT8_C( 105), INT8_C( 17), -INT8_C( 84), -INT8_C( 89), -INT8_C( 127), -INT8_C( 106), INT8_C( 66), INT8_C( 99), INT8_C( 100), -INT8_C( 47), INT8_C( 17), -INT8_C( 60), INT8_C( 103), -INT8_C( 25), INT8_C( 100), -INT8_C( 127), -INT8_C( 82), -INT8_C( 89), -INT8_C( 23), -INT8_C( 14), -INT8_C( 81), INT8_C( 9), INT8_C( 66), INT8_C( 66), INT8_C( 63), -INT8_C( 81), INT8_C( 113), -INT8_C( 3), -INT8_C( 75), -INT8_C( 102), -INT8_C( 36), -INT8_C( 36), -INT8_C( 124), INT8_C( 65), INT8_C( 103), INT8_C( 9), -INT8_C( 60), -INT8_C( 88), -INT8_C( 75), -INT8_C( 57), INT8_C( 44), -INT8_C( 66), -INT8_C( 41), -INT8_C( 41), INT8_C( 17), INT8_C( 100), -INT8_C( 127), INT8_C( 107), INT8_C( 94), INT8_C( 65), INT8_C( 63) } }, { { INT8_C( 63), -INT8_C( 34), -INT8_C( 83), -INT8_C( 19), INT8_C( 96), INT8_C( 117), -INT8_C( 107), -INT8_C( 66), -INT8_C( 125), -INT8_C( 62), INT8_C( 67), INT8_C( 57), -INT8_C( 119), INT8_C( 96), -INT8_C( 24), -INT8_C( 110), INT8_C( 65), INT8_C( 126), -INT8_C( 57), INT8_C( 45), INT8_C( 123), -INT8_C( 88), INT8_MAX, -INT8_C( 70), -INT8_C( 102), INT8_C( 52), -INT8_C( 93), -INT8_C( 3), INT8_C( 70), INT8_C( 15), -INT8_C( 95), -INT8_C( 123), -INT8_C( 19), INT8_C( 78), INT8_C( 114), INT8_C( 77), -INT8_C( 61), INT8_C( 7), INT8_C( 11), INT8_C( 71), -INT8_C( 55), INT8_C( 79), INT8_MIN, INT8_C( 82), -INT8_C( 81), INT8_C( 104), -INT8_C( 28), -INT8_C( 15), -INT8_C( 26), -INT8_C( 85), INT8_C( 30), INT8_C( 97), INT8_C( 83), -INT8_C( 99), INT8_C( 27), -INT8_C( 19), -INT8_C( 46), -INT8_C( 66), -INT8_C( 22), INT8_C( 24), -INT8_C( 51), -INT8_C( 117), -INT8_C( 99), -INT8_C( 69) }, { -INT8_C( 38), INT8_C( 15), INT8_C( 8), -INT8_C( 99), INT8_C( 22), INT8_C( 20), -INT8_C( 28), -INT8_C( 32), INT8_C( 99), INT8_C( 100), INT8_C( 50), INT8_C( 18), -INT8_C( 52), INT8_C( 23), INT8_C( 3), -INT8_C( 78), -INT8_C( 62), INT8_C( 34), INT8_C( 19), INT8_C( 22), -INT8_C( 65), INT8_C( 46), INT8_C( 3), -INT8_C( 111), -INT8_C( 19), -INT8_C( 18), -INT8_C( 87), -INT8_C( 70), INT8_C( 121), INT8_C( 70), INT8_C( 117), INT8_C( 83), INT8_C( 85), INT8_C( 126), -INT8_C( 15), INT8_C( 108), -INT8_C( 110), -INT8_C( 43), INT8_C( 76), -INT8_C( 11), INT8_C( 58), INT8_C( 126), INT8_C( 7), INT8_C( 6), -INT8_C( 107), INT8_C( 11), -INT8_C( 71), INT8_C( 88), INT8_C( 45), -INT8_C( 52), INT8_C( 110), -INT8_C( 20), -INT8_C( 5), INT8_C( 113), INT8_C( 126), -INT8_C( 24), INT8_C( 95), INT8_C( 39), -INT8_C( 94), -INT8_C( 39), INT8_C( 110), INT8_C( 24), INT8_C( 44), -INT8_C( 61) }, { -INT8_C( 61), INT8_C( 117), INT8_C( 11), INT8_C( 11), INT8_C( 85), INT8_C( 113), INT8_C( 46), INT8_C( 44), -INT8_C( 99), INT8_C( 8), -INT8_C( 99), INT8_C( 39), INT8_C( 100), INT8_C( 85), INT8_C( 58), INT8_C( 19), INT8_C( 15), INT8_C( 44), -INT8_C( 32), INT8_C( 11), -INT8_C( 39), INT8_C( 58), -INT8_C( 61), -INT8_C( 94), -INT8_C( 87), -INT8_C( 5), INT8_C( 108), INT8_C( 24), -INT8_C( 28), -INT8_C( 78), INT8_C( 126), INT8_C( 20), INT8_C( 11), INT8_C( 3), INT8_C( 110), INT8_C( 23), -INT8_C( 99), -INT8_C( 32), INT8_C( 18), -INT8_C( 32), INT8_C( 100), -INT8_C( 78), -INT8_C( 38), INT8_C( 19), INT8_C( 88), INT8_C( 58), -INT8_C( 110), -INT8_C( 52), INT8_C( 76), INT8_C( 6), INT8_C( 117), INT8_C( 126), INT8_C( 22), INT8_C( 70), -INT8_C( 70), INT8_C( 11), INT8_C( 19), INT8_C( 44), INT8_C( 7), -INT8_C( 19), INT8_C( 23), INT8_C( 18), INT8_C( 70), -INT8_C( 39) } }, { { -INT8_C( 106), INT8_C( 29), INT8_C( 47), INT8_C( 40), -INT8_C( 13), INT8_C( 123), INT8_C( 29), INT8_C( 45), -INT8_C( 6), INT8_C( 36), INT8_C( 51), -INT8_C( 113), INT8_C( 47), -INT8_C( 20), -INT8_C( 25), INT8_C( 92), -INT8_C( 71), INT8_C( 85), INT8_C( 73), -INT8_C( 76), -INT8_C( 57), -INT8_C( 57), -INT8_C( 100), INT8_C( 38), -INT8_C( 18), INT8_C( 62), -INT8_C( 1), INT8_C( 92), INT8_C( 86), INT8_C( 44), INT8_C( 32), -INT8_C( 20), INT8_C( 73), INT8_C( 79), INT8_C( 20), INT8_C( 60), -INT8_C( 53), INT8_C( 49), INT8_C( 105), -INT8_C( 59), INT8_C( 86), -INT8_C( 99), INT8_C( 84), -INT8_C( 123), -INT8_C( 119), INT8_C( 60), -INT8_C( 30), INT8_C( 66), -INT8_C( 111), INT8_C( 43), -INT8_C( 10), INT8_C( 88), -INT8_C( 14), -INT8_C( 110), INT8_MAX, -INT8_C( 32), -INT8_C( 47), INT8_C( 126), INT8_C( 61), INT8_C( 39), -INT8_C( 86), INT8_C( 93), INT8_C( 20), -INT8_C( 12) }, { -INT8_C( 84), INT8_C( 40), INT8_C( 48), INT8_C( 119), INT8_C( 90), -INT8_C( 102), INT8_C( 60), -INT8_C( 80), INT8_C( 55), -INT8_C( 111), INT8_C( 53), -INT8_C( 64), -INT8_C( 51), INT8_C( 23), INT8_C( 3), INT8_C( 94), INT8_C( 66), -INT8_C( 7), -INT8_C( 73), INT8_C( 52), -INT8_C( 116), INT8_C( 54), INT8_C( 21), INT8_C( 93), -INT8_C( 76), INT8_C( 82), -INT8_C( 124), INT8_C( 95), -INT8_C( 81), -INT8_C( 104), INT8_C( 83), INT8_C( 91), -INT8_C( 63), -INT8_C( 125), -INT8_C( 45), INT8_C( 27), INT8_C( 29), INT8_C( 15), -INT8_C( 53), INT8_C( 84), -INT8_C( 96), INT8_C( 0), INT8_C( 21), INT8_C( 109), INT8_C( 24), INT8_C( 24), -INT8_C( 52), INT8_C( 90), INT8_C( 17), -INT8_C( 125), -INT8_C( 113), -INT8_C( 99), -INT8_C( 71), -INT8_C( 92), -INT8_C( 6), INT8_C( 109), -INT8_C( 10), INT8_MAX, -INT8_C( 52), -INT8_C( 91), INT8_C( 23), INT8_C( 31), INT8_C( 0), -INT8_C( 40) }, { INT8_C( 21), -INT8_C( 104), INT8_C( 90), -INT8_C( 96), -INT8_C( 99), -INT8_C( 91), -INT8_C( 104), INT8_C( 24), -INT8_C( 52), INT8_C( 29), -INT8_C( 99), INT8_C( 94), INT8_C( 90), INT8_C( 24), INT8_C( 84), -INT8_C( 81), INT8_MAX, INT8_C( 54), -INT8_C( 111), -INT8_C( 71), -INT8_C( 80), -INT8_C( 80), -INT8_C( 81), -INT8_C( 53), -INT8_C( 52), INT8_C( 0), -INT8_C( 40), -INT8_C( 81), INT8_C( 21), INT8_C( 24), -INT8_C( 63), INT8_C( 24), -INT8_C( 111), INT8_C( 94), -INT8_C( 116), INT8_C( 23), -INT8_C( 64), -INT8_C( 125), INT8_C( 0), -INT8_C( 102), INT8_C( 21), -INT8_C( 104), -INT8_C( 116), -INT8_C( 102), -INT8_C( 111), INT8_C( 23), -INT8_C( 45), INT8_C( 48), -INT8_C( 7), INT8_C( 109), -INT8_C( 6), -INT8_C( 76), -INT8_C( 113), -INT8_C( 73), -INT8_C( 40), -INT8_C( 63), -INT8_C( 7), INT8_C( 0), INT8_C( 31), INT8_C( 84), INT8_C( 21), -INT8_C( 104), -INT8_C( 116), -INT8_C( 71) } }, { { -INT8_C( 93), -INT8_C( 45), -INT8_C( 13), -INT8_C( 64), -INT8_C( 29), -INT8_C( 66), INT8_C( 21), -INT8_C( 125), -INT8_C( 65), INT8_C( 42), -INT8_C( 15), -INT8_C( 41), INT8_C( 66), -INT8_C( 67), INT8_C( 49), INT8_C( 83), INT8_C( 64), -INT8_C( 64), -INT8_C( 15), -INT8_C( 7), INT8_C( 100), -INT8_C( 21), INT8_C( 102), INT8_C( 90), INT8_C( 106), INT8_C( 51), -INT8_C( 1), -INT8_C( 126), INT8_C( 82), INT8_C( 0), INT8_C( 90), -INT8_C( 11), -INT8_C( 45), INT8_C( 78), -INT8_C( 74), -INT8_C( 74), INT8_C( 12), -INT8_C( 53), INT8_C( 58), -INT8_C( 53), -INT8_C( 11), INT8_C( 43), -INT8_C( 94), INT8_C( 55), -INT8_C( 24), -INT8_C( 44), -INT8_C( 118), INT8_C( 40), -INT8_C( 108), INT8_C( 123), INT8_C( 33), -INT8_C( 7), INT8_C( 103), -INT8_C( 121), INT8_C( 83), -INT8_C( 47), -INT8_C( 70), INT8_C( 83), INT8_C( 83), INT8_C( 13), INT8_C( 83), -INT8_C( 82), INT8_C( 2), INT8_C( 38) }, { -INT8_C( 4), -INT8_C( 72), -INT8_C( 35), INT8_C( 8), -INT8_C( 125), INT8_C( 23), -INT8_C( 44), INT8_C( 120), INT8_C( 66), INT8_C( 118), -INT8_C( 81), INT8_C( 42), INT8_C( 74), INT8_C( 58), INT8_C( 82), -INT8_C( 33), -INT8_C( 75), INT8_C( 115), -INT8_C( 40), INT8_C( 28), -INT8_C( 6), INT8_C( 43), -INT8_C( 18), -INT8_C( 75), INT8_C( 126), INT8_C( 65), -INT8_C( 62), -INT8_C( 47), -INT8_C( 17), -INT8_C( 60), -INT8_C( 8), -INT8_C( 21), INT8_C( 125), -INT8_C( 43), -INT8_C( 12), INT8_C( 0), -INT8_C( 20), -INT8_C( 56), INT8_C( 121), INT8_C( 46), INT8_C( 62), INT8_C( 40), INT8_C( 88), -INT8_C( 119), INT8_C( 98), -INT8_C( 86), INT8_C( 104), INT8_C( 24), INT8_C( 29), INT8_C( 64), INT8_C( 52), INT8_C( 23), INT8_C( 107), INT8_C( 34), -INT8_C( 52), -INT8_C( 22), INT8_C( 100), -INT8_C( 114), -INT8_C( 69), INT8_C( 83), INT8_C( 83), -INT8_C( 77), INT8_C( 63), -INT8_C( 48) }, { INT8_C( 0), INT8_C( 28), INT8_C( 23), -INT8_C( 4), INT8_C( 0), INT8_C( 63), INT8_C( 43), INT8_C( 8), -INT8_C( 48), INT8_C( 88), INT8_C( 64), -INT8_C( 75), -INT8_C( 35), -INT8_C( 77), INT8_C( 64), INT8_C( 28), -INT8_C( 4), -INT8_C( 4), INT8_C( 64), -INT8_C( 114), -INT8_C( 20), -INT8_C( 119), INT8_C( 121), -INT8_C( 62), INT8_C( 88), INT8_C( 23), -INT8_C( 48), -INT8_C( 35), -INT8_C( 40), -INT8_C( 4), -INT8_C( 62), INT8_C( 34), INT8_C( 28), INT8_C( 82), -INT8_C( 52), -INT8_C( 52), INT8_C( 74), INT8_C( 42), -INT8_C( 69), INT8_C( 42), INT8_C( 34), -INT8_C( 119), -INT8_C( 12), -INT8_C( 22), INT8_C( 62), -INT8_C( 6), -INT8_C( 81), INT8_C( 62), -INT8_C( 6), INT8_C( 83), -INT8_C( 43), -INT8_C( 114), INT8_C( 46), INT8_C( 120), INT8_C( 28), INT8_C( 115), -INT8_C( 69), INT8_C( 28), INT8_C( 28), INT8_C( 58), INT8_C( 28), INT8_C( 104), -INT8_C( 35), INT8_C( 121) } }, { { -INT8_C( 120), INT8_C( 51), -INT8_C( 48), INT8_C( 116), -INT8_C( 5), INT8_C( 73), -INT8_C( 94), INT8_C( 57), INT8_C( 114), -INT8_C( 6), -INT8_C( 62), -INT8_C( 44), -INT8_C( 92), INT8_C( 42), -INT8_C( 20), -INT8_C( 63), INT8_C( 106), INT8_C( 33), -INT8_C( 39), -INT8_C( 42), INT8_C( 67), -INT8_C( 91), -INT8_C( 64), -INT8_C( 89), INT8_C( 52), INT8_C( 123), -INT8_C( 5), -INT8_C( 121), INT8_C( 47), INT8_C( 58), INT8_C( 87), -INT8_C( 73), INT8_C( 109), INT8_C( 39), INT8_C( 44), INT8_C( 104), INT8_C( 113), -INT8_C( 50), -INT8_C( 95), -INT8_C( 29), -INT8_C( 55), INT8_C( 100), -INT8_C( 73), INT8_C( 109), -INT8_C( 114), -INT8_C( 92), INT8_C( 47), -INT8_C( 7), -INT8_C( 59), INT8_C( 8), -INT8_C( 49), INT8_C( 8), -INT8_C( 83), -INT8_C( 113), -INT8_C( 80), -INT8_C( 31), INT8_C( 10), -INT8_C( 85), INT8_C( 104), INT8_C( 57), -INT8_C( 27), -INT8_C( 65), -INT8_C( 15), INT8_C( 82) }, { -INT8_C( 25), INT8_C( 29), -INT8_C( 70), INT8_C( 88), -INT8_C( 21), INT8_C( 91), INT8_C( 59), -INT8_C( 76), -INT8_C( 65), -INT8_C( 14), INT8_C( 34), INT8_C( 78), -INT8_C( 106), INT8_C( 81), INT8_C( 71), INT8_C( 91), INT8_C( 89), INT8_C( 22), INT8_C( 100), INT8_C( 6), -INT8_C( 91), INT8_C( 20), -INT8_C( 24), -INT8_C( 81), -INT8_C( 65), INT8_C( 80), -INT8_C( 23), -INT8_C( 92), INT8_C( 16), -INT8_C( 38), -INT8_C( 10), -INT8_C( 9), -INT8_C( 9), -INT8_C( 80), INT8_C( 79), -INT8_C( 30), INT8_C( 11), -INT8_C( 118), -INT8_C( 105), -INT8_C( 53), INT8_C( 124), -INT8_C( 71), INT8_C( 25), INT8_C( 19), INT8_C( 10), INT8_C( 96), INT8_C( 110), INT8_C( 99), INT8_C( 118), -INT8_C( 46), INT8_C( 105), INT8_C( 27), -INT8_C( 26), INT8_C( 81), -INT8_C( 54), -INT8_C( 91), -INT8_C( 94), -INT8_C( 77), INT8_C( 73), -INT8_C( 78), -INT8_C( 115), INT8_C( 63), -INT8_C( 87), -INT8_C( 124) }, { -INT8_C( 65), INT8_C( 27), INT8_C( 89), -INT8_C( 26), -INT8_C( 78), -INT8_C( 14), INT8_C( 79), -INT8_C( 77), INT8_C( 105), INT8_C( 73), -INT8_C( 70), -INT8_C( 91), INT8_C( 11), INT8_C( 25), INT8_C( 10), INT8_C( 29), INT8_C( 25), -INT8_C( 80), INT8_C( 80), -INT8_C( 24), INT8_C( 88), -INT8_C( 118), -INT8_C( 25), -INT8_C( 53), -INT8_C( 26), -INT8_C( 78), -INT8_C( 78), -INT8_C( 76), INT8_C( 99), INT8_C( 73), -INT8_C( 81), -INT8_C( 91), INT8_C( 96), -INT8_C( 53), INT8_C( 10), INT8_C( 124), -INT8_C( 46), INT8_C( 71), -INT8_C( 80), -INT8_C( 30), -INT8_C( 14), INT8_C( 11), -INT8_C( 91), INT8_C( 96), INT8_C( 71), INT8_C( 11), INT8_C( 99), -INT8_C( 77), INT8_C( 91), -INT8_C( 65), INT8_C( 91), -INT8_C( 65), INT8_C( 96), INT8_C( 91), INT8_C( 118), -INT8_C( 80), INT8_C( 34), INT8_C( 19), INT8_C( 124), -INT8_C( 77), -INT8_C( 118), -INT8_C( 124), -INT8_C( 46), INT8_C( 100) } }, { { -INT8_C( 17), -INT8_C( 8), INT8_C( 103), -INT8_C( 5), -INT8_C( 126), -INT8_C( 2), -INT8_C( 58), -INT8_C( 2), -INT8_C( 73), -INT8_C( 33), INT8_C( 17), -INT8_C( 63), INT8_C( 63), INT8_MIN, INT8_C( 36), -INT8_C( 75), INT8_C( 82), -INT8_C( 115), -INT8_C( 48), INT8_C( 57), -INT8_C( 33), -INT8_C( 102), -INT8_C( 34), -INT8_C( 127), INT8_C( 78), INT8_C( 40), INT8_C( 51), -INT8_C( 37), INT8_C( 103), -INT8_C( 36), INT8_C( 96), INT8_C( 87), -INT8_C( 44), -INT8_C( 57), INT8_C( 82), INT8_C( 86), -INT8_C( 59), INT8_C( 24), INT8_C( 84), INT8_C( 124), -INT8_C( 9), INT8_C( 102), INT8_C( 61), INT8_C( 54), -INT8_C( 26), INT8_C( 97), -INT8_C( 21), INT8_C( 56), -INT8_C( 18), -INT8_C( 69), INT8_C( 113), -INT8_C( 51), INT8_C( 85), INT8_C( 80), INT8_C( 78), -INT8_C( 93), INT8_C( 120), -INT8_C( 127), INT8_MAX, -INT8_C( 33), INT8_C( 93), -INT8_C( 33), INT8_C( 54), INT8_C( 49) }, { -INT8_C( 90), -INT8_C( 120), -INT8_C( 121), INT8_C( 107), -INT8_C( 96), -INT8_C( 36), -INT8_C( 25), -INT8_C( 105), INT8_C( 66), INT8_C( 36), -INT8_C( 51), INT8_C( 40), -INT8_C( 123), -INT8_C( 72), INT8_C( 96), INT8_C( 115), INT8_C( 115), -INT8_C( 46), INT8_C( 65), -INT8_C( 55), INT8_C( 34), -INT8_C( 113), INT8_C( 108), -INT8_C( 102), INT8_C( 17), -INT8_C( 21), INT8_C( 121), INT8_C( 110), -INT8_C( 54), -INT8_C( 80), -INT8_C( 96), INT8_C( 112), INT8_C( 56), INT8_C( 39), -INT8_C( 37), -INT8_C( 39), INT8_C( 3), -INT8_C( 62), INT8_C( 112), INT8_C( 69), -INT8_C( 26), INT8_C( 62), INT8_C( 109), INT8_C( 107), -INT8_C( 10), -INT8_C( 50), -INT8_C( 33), INT8_C( 106), -INT8_C( 96), INT8_C( 32), INT8_C( 51), -INT8_C( 62), -INT8_C( 81), -INT8_C( 97), INT8_C( 92), -INT8_C( 64), -INT8_C( 117), -INT8_C( 43), INT8_C( 47), INT8_C( 85), -INT8_C( 123), -INT8_C( 49), -INT8_C( 58), -INT8_C( 66) }, { INT8_C( 106), -INT8_C( 117), INT8_C( 69), INT8_C( 85), -INT8_C( 121), -INT8_C( 58), -INT8_C( 25), -INT8_C( 58), -INT8_C( 64), INT8_C( 112), -INT8_C( 46), -INT8_C( 120), -INT8_C( 66), -INT8_C( 90), INT8_C( 3), -INT8_C( 97), INT8_C( 65), -INT8_C( 72), INT8_C( 115), -INT8_C( 43), INT8_C( 112), INT8_C( 121), -INT8_C( 96), -INT8_C( 120), INT8_C( 96), -INT8_C( 26), -INT8_C( 62), INT8_C( 110), INT8_C( 69), -INT8_C( 54), INT8_C( 56), -INT8_C( 102), INT8_C( 34), -INT8_C( 105), INT8_C( 65), INT8_C( 108), -INT8_C( 36), INT8_C( 17), INT8_C( 34), -INT8_C( 123), -INT8_C( 64), INT8_C( 112), -INT8_C( 49), INT8_C( 92), INT8_C( 112), INT8_C( 39), INT8_C( 107), -INT8_C( 117), -INT8_C( 33), INT8_C( 85), INT8_C( 32), -INT8_C( 72), -INT8_C( 113), INT8_C( 115), INT8_C( 96), -INT8_C( 39), -INT8_C( 117), -INT8_C( 120), -INT8_C( 66), INT8_C( 112), -INT8_C( 80), INT8_C( 112), INT8_C( 92), INT8_C( 32) } }, { { -INT8_C( 10), -INT8_C( 95), -INT8_C( 105), -INT8_C( 6), INT8_C( 100), INT8_C( 7), INT8_C( 63), INT8_C( 74), INT8_C( 69), -INT8_C( 83), -INT8_C( 74), INT8_C( 60), INT8_C( 123), -INT8_C( 107), -INT8_C( 90), INT8_C( 27), -INT8_C( 75), -INT8_C( 39), -INT8_C( 35), INT8_C( 100), INT8_C( 120), INT8_C( 57), INT8_C( 37), INT8_C( 3), INT8_C( 14), INT8_C( 84), INT8_C( 89), -INT8_C( 108), INT8_C( 35), INT8_C( 31), INT8_C( 82), INT8_C( 25), -INT8_C( 64), -INT8_C( 23), INT8_C( 19), INT8_C( 36), -INT8_C( 16), INT8_C( 83), INT8_C( 111), INT8_C( 54), INT8_C( 0), INT8_C( 37), INT8_C( 114), INT8_C( 123), -INT8_C( 70), INT8_C( 24), -INT8_C( 106), INT8_C( 111), -INT8_C( 15), INT8_C( 115), -INT8_C( 45), INT8_C( 105), -INT8_C( 84), -INT8_C( 8), INT8_C( 109), -INT8_C( 70), INT8_C( 76), -INT8_C( 58), INT8_C( 78), INT8_C( 111), -INT8_C( 27), -INT8_C( 96), -INT8_C( 119), -INT8_C( 91) }, { -INT8_C( 119), -INT8_C( 100), -INT8_C( 54), INT8_C( 122), -INT8_C( 17), INT8_C( 57), -INT8_C( 80), -INT8_C( 17), INT8_C( 94), INT8_C( 34), INT8_C( 106), INT8_C( 24), INT8_C( 58), INT8_C( 0), -INT8_C( 121), INT8_C( 43), INT8_C( 115), INT8_C( 90), -INT8_C( 108), INT8_C( 31), INT8_C( 83), INT8_C( 1), -INT8_C( 38), -INT8_C( 97), -INT8_C( 57), INT8_C( 40), INT8_C( 15), -INT8_C( 84), -INT8_C( 55), -INT8_C( 104), INT8_C( 82), INT8_C( 82), INT8_C( 52), INT8_C( 28), -INT8_C( 52), INT8_C( 36), INT8_C( 85), INT8_C( 124), INT8_C( 19), -INT8_C( 77), -INT8_C( 98), INT8_C( 126), -INT8_C( 53), -INT8_C( 40), INT8_C( 126), INT8_C( 82), INT8_C( 3), -INT8_C( 14), -INT8_C( 84), -INT8_C( 104), INT8_C( 17), -INT8_C( 1), -INT8_C( 103), -INT8_C( 21), -INT8_C( 97), INT8_C( 97), INT8_C( 20), -INT8_C( 82), INT8_C( 13), -INT8_C( 35), INT8_C( 70), INT8_C( 95), INT8_C( 47), INT8_C( 122) }, { -INT8_C( 97), INT8_C( 28), -INT8_C( 97), INT8_C( 13), INT8_C( 85), -INT8_C( 17), INT8_C( 122), INT8_C( 106), INT8_C( 57), INT8_C( 82), -INT8_C( 97), INT8_C( 70), -INT8_C( 35), INT8_C( 1), INT8_C( 19), -INT8_C( 84), -INT8_C( 21), INT8_C( 40), -INT8_C( 104), INT8_C( 85), INT8_C( 20), -INT8_C( 82), INT8_C( 124), INT8_C( 122), -INT8_C( 121), INT8_C( 83), INT8_C( 40), INT8_C( 83), INT8_C( 36), INT8_C( 82), -INT8_C( 108), INT8_C( 40), -INT8_C( 119), INT8_C( 126), INT8_C( 31), INT8_C( 85), -INT8_C( 84), INT8_C( 31), -INT8_C( 14), -INT8_C( 97), -INT8_C( 119), INT8_C( 124), INT8_C( 17), -INT8_C( 35), INT8_C( 13), -INT8_C( 57), -INT8_C( 38), -INT8_C( 14), -INT8_C( 104), -INT8_C( 1), INT8_C( 31), INT8_C( 126), INT8_C( 126), INT8_C( 20), INT8_C( 82), INT8_C( 13), INT8_C( 58), -INT8_C( 80), -INT8_C( 121), -INT8_C( 14), INT8_C( 124), INT8_C( 52), INT8_C( 34), INT8_C( 124) } }, { { INT8_C( 123), -INT8_C( 4), -INT8_C( 98), -INT8_C( 48), INT8_C( 120), -INT8_C( 78), -INT8_C( 125), INT8_C( 23), INT8_C( 48), INT8_C( 78), -INT8_C( 17), -INT8_C( 82), -INT8_C( 96), -INT8_C( 13), -INT8_C( 96), INT8_C( 77), -INT8_C( 117), -INT8_C( 78), INT8_C( 76), INT8_C( 36), -INT8_C( 99), -INT8_C( 21), -INT8_C( 123), -INT8_C( 79), -INT8_C( 103), -INT8_C( 109), -INT8_C( 114), -INT8_C( 33), -INT8_C( 14), -INT8_C( 66), INT8_C( 90), INT8_C( 110), -INT8_C( 70), -INT8_C( 8), INT8_C( 62), INT8_C( 50), -INT8_C( 86), -INT8_C( 62), INT8_C( 73), -INT8_C( 38), INT8_C( 16), INT8_C( 57), -INT8_C( 119), -INT8_C( 79), INT8_C( 44), INT8_C( 41), -INT8_C( 2), -INT8_C( 73), -INT8_C( 37), INT8_C( 74), -INT8_C( 37), INT8_C( 121), INT8_C( 54), INT8_C( 97), INT8_C( 42), -INT8_C( 49), -INT8_C( 12), -INT8_C( 71), -INT8_C( 81), -INT8_C( 26), INT8_C( 119), INT8_C( 9), INT8_C( 84), INT8_C( 49) }, { INT8_C( 1), -INT8_C( 109), INT8_C( 99), -INT8_C( 84), INT8_C( 85), -INT8_C( 83), -INT8_C( 122), INT8_C( 101), -INT8_C( 26), INT8_C( 15), INT8_C( 22), INT8_C( 18), INT8_C( 57), INT8_C( 20), -INT8_C( 55), INT8_C( 20), INT8_C( 95), -INT8_C( 92), -INT8_C( 115), -INT8_C( 107), INT8_C( 5), -INT8_C( 72), INT8_C( 100), -INT8_C( 7), INT8_C( 113), INT8_C( 19), -INT8_C( 32), -INT8_C( 24), INT8_C( 28), INT8_C( 52), INT8_C( 25), INT8_C( 30), -INT8_C( 57), INT8_C( 124), -INT8_C( 54), INT8_C( 28), INT8_C( 41), INT8_C( 80), -INT8_C( 126), INT8_C( 15), INT8_C( 96), -INT8_C( 104), INT8_C( 33), -INT8_C( 103), -INT8_C( 83), -INT8_C( 22), -INT8_C( 83), INT8_C( 12), -INT8_C( 113), INT8_C( 59), -INT8_C( 95), -INT8_C( 108), -INT8_C( 13), INT8_C( 5), -INT8_C( 114), INT8_C( 100), INT8_C( 25), INT8_C( 110), INT8_C( 76), INT8_C( 53), -INT8_C( 94), INT8_C( 101), INT8_C( 83), INT8_C( 106) }, { INT8_C( 53), -INT8_C( 94), INT8_C( 25), INT8_C( 95), INT8_C( 25), -INT8_C( 95), -INT8_C( 84), -INT8_C( 7), -INT8_C( 113), -INT8_C( 55), INT8_C( 12), -INT8_C( 83), -INT8_C( 57), -INT8_C( 108), -INT8_C( 57), INT8_C( 20), INT8_C( 18), -INT8_C( 95), INT8_C( 57), INT8_C( 41), INT8_C( 52), -INT8_C( 103), -INT8_C( 83), INT8_C( 59), INT8_C( 19), -INT8_C( 107), -INT8_C( 55), INT8_C( 30), -INT8_C( 95), INT8_C( 83), -INT8_C( 32), -INT8_C( 83), INT8_C( 76), INT8_C( 25), INT8_C( 83), -INT8_C( 95), INT8_C( 33), INT8_C( 99), INT8_C( 15), -INT8_C( 32), INT8_C( 95), INT8_C( 110), INT8_C( 15), INT8_C( 59), -INT8_C( 83), -INT8_C( 104), INT8_C( 83), INT8_C( 100), -INT8_C( 24), INT8_C( 22), -INT8_C( 24), INT8_C( 110), -INT8_C( 114), INT8_C( 124), INT8_C( 33), INT8_C( 20), -INT8_C( 13), INT8_C( 110), INT8_C( 12), -INT8_C( 126), INT8_C( 100), INT8_C( 15), INT8_C( 5), INT8_C( 59) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i idx = simde_mm512_loadu_epi8(test_vec[i].idx); simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i r = simde_mm512_permutexvar_epi8(idx, a); simde_test_x86_assert_equal_i8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i idx = simde_test_x86_random_i8x64(); simde__m512i a = simde_test_x86_random_i8x64(); simde__m512i r = simde_mm512_permutexvar_epi8(idx, a); simde_test_x86_write_i8x64(2, idx, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x64(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_mask_permutexvar_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t src[64]; const simde__mmask64 k; const int8_t idx[64]; const int8_t a[64]; const int8_t r[64]; } test_vec[] = { { { INT8_C( 31), -INT8_C( 83), -INT8_C( 33), -INT8_C( 110), -INT8_C( 67), INT8_C( 2), -INT8_C( 29), INT8_C( 6), INT8_C( 42), -INT8_C( 8), INT8_MIN, INT8_C( 3), INT8_C( 107), INT8_C( 77), -INT8_C( 49), -INT8_C( 47), -INT8_C( 17), INT8_C( 121), -INT8_C( 107), INT8_C( 106), -INT8_C( 11), INT8_C( 45), INT8_C( 39), -INT8_C( 113), INT8_C( 27), -INT8_C( 96), INT8_C( 77), -INT8_C( 91), -INT8_C( 69), -INT8_C( 74), -INT8_C( 12), -INT8_C( 38), INT8_C( 99), -INT8_C( 44), INT8_C( 109), INT8_C( 32), -INT8_C( 42), INT8_C( 80), INT8_C( 39), INT8_C( 1), INT8_C( 72), -INT8_C( 89), INT8_C( 4), -INT8_C( 77), -INT8_C( 12), -INT8_C( 44), -INT8_C( 124), -INT8_C( 29), INT8_C( 77), INT8_C( 25), INT8_C( 77), INT8_C( 66), INT8_C( 71), INT8_C( 116), -INT8_C( 47), INT8_C( 98), INT8_C( 20), INT8_C( 31), INT8_C( 7), -INT8_C( 49), -INT8_C( 43), -INT8_C( 5), -INT8_C( 86), INT8_C( 56) }, UINT64_C(12729284054446839759), { INT8_C( 39), -INT8_C( 85), INT8_C( 99), INT8_C( 27), INT8_MAX, -INT8_C( 24), -INT8_C( 2), -INT8_C( 52), INT8_C( 1), INT8_C( 75), INT8_C( 14), INT8_C( 72), -INT8_C( 64), -INT8_C( 32), -INT8_C( 86), -INT8_C( 44), -INT8_C( 1), -INT8_C( 79), -INT8_C( 92), -INT8_C( 44), -INT8_C( 83), INT8_C( 78), INT8_C( 12), INT8_C( 124), INT8_C( 101), INT8_C( 101), INT8_C( 34), -INT8_C( 52), -INT8_C( 27), -INT8_C( 55), INT8_C( 124), INT8_C( 12), INT8_C( 117), -INT8_C( 32), INT8_C( 39), -INT8_C( 12), -INT8_C( 56), INT8_C( 37), -INT8_C( 63), -INT8_C( 55), INT8_C( 113), -INT8_C( 49), INT8_C( 18), INT8_C( 49), -INT8_C( 81), -INT8_C( 68), INT8_C( 5), -INT8_C( 82), INT8_C( 110), -INT8_C( 87), -INT8_C( 126), INT8_C( 27), -INT8_C( 9), -INT8_C( 113), -INT8_C( 105), INT8_C( 92), -INT8_C( 12), -INT8_C( 70), INT8_C( 41), -INT8_C( 38), -INT8_C( 125), -INT8_C( 91), -INT8_C( 26), -INT8_C( 8) }, { -INT8_C( 123), INT8_C( 14), -INT8_C( 19), INT8_C( 77), INT8_C( 51), -INT8_C( 82), INT8_C( 23), -INT8_C( 92), INT8_C( 125), INT8_C( 41), -INT8_C( 43), INT8_C( 45), -INT8_C( 27), -INT8_C( 37), -INT8_C( 37), INT8_C( 83), -INT8_C( 124), INT8_C( 94), INT8_C( 110), INT8_C( 124), -INT8_C( 19), INT8_C( 6), -INT8_C( 40), -INT8_C( 31), -INT8_C( 64), INT8_C( 1), -INT8_C( 69), INT8_C( 67), -INT8_C( 89), -INT8_C( 94), INT8_C( 60), INT8_C( 44), -INT8_C( 80), INT8_C( 41), INT8_C( 122), -INT8_C( 29), -INT8_C( 41), -INT8_C( 111), -INT8_C( 120), INT8_C( 84), -INT8_C( 70), INT8_C( 93), -INT8_C( 127), -INT8_C( 97), INT8_C( 56), INT8_C( 93), -INT8_C( 13), -INT8_C( 67), -INT8_C( 69), INT8_C( 97), INT8_C( 57), -INT8_C( 88), INT8_C( 103), INT8_C( 17), -INT8_C( 119), INT8_C( 39), INT8_C( 19), INT8_C( 69), INT8_C( 107), -INT8_C( 70), -INT8_C( 25), -INT8_C( 89), -INT8_C( 26), -INT8_C( 105) }, { INT8_C( 84), -INT8_C( 97), -INT8_C( 29), INT8_C( 67), -INT8_C( 67), INT8_C( 2), -INT8_C( 26), -INT8_C( 27), INT8_C( 14), INT8_C( 45), -INT8_C( 37), INT8_C( 3), -INT8_C( 123), INT8_C( 77), -INT8_C( 49), -INT8_C( 47), -INT8_C( 105), INT8_C( 121), -INT8_C( 107), -INT8_C( 19), INT8_C( 93), INT8_C( 45), -INT8_C( 27), -INT8_C( 113), INT8_C( 27), -INT8_C( 111), INT8_C( 122), -INT8_C( 91), -INT8_C( 69), INT8_C( 41), -INT8_C( 12), -INT8_C( 27), INT8_C( 17), -INT8_C( 80), INT8_C( 84), INT8_C( 32), -INT8_C( 42), -INT8_C( 111), INT8_C( 14), INT8_C( 1), INT8_C( 72), -INT8_C( 89), INT8_C( 4), -INT8_C( 77), -INT8_C( 12), -INT8_C( 44), -INT8_C( 124), -INT8_C( 13), -INT8_C( 13), INT8_C( 93), -INT8_C( 19), INT8_C( 66), INT8_C( 71), INT8_C( 83), -INT8_C( 47), -INT8_C( 89), INT8_C( 20), INT8_C( 31), INT8_C( 7), -INT8_C( 49), INT8_C( 77), -INT8_C( 111), -INT8_C( 86), INT8_C( 19) } }, { { -INT8_C( 48), INT8_C( 96), INT8_C( 122), -INT8_C( 89), -INT8_C( 15), INT8_C( 2), -INT8_C( 5), -INT8_C( 85), INT8_C( 96), INT8_C( 125), INT8_C( 75), -INT8_C( 104), -INT8_C( 38), INT8_C( 62), INT8_C( 85), -INT8_C( 107), -INT8_C( 97), -INT8_C( 114), INT8_C( 61), INT8_C( 7), -INT8_C( 96), -INT8_C( 58), INT8_C( 46), -INT8_C( 77), INT8_C( 11), -INT8_C( 103), INT8_C( 109), -INT8_C( 14), INT8_C( 64), INT8_C( 83), -INT8_C( 119), INT8_C( 16), -INT8_C( 76), INT8_C( 4), -INT8_C( 73), -INT8_C( 91), INT8_C( 6), -INT8_C( 77), INT8_C( 81), INT8_C( 102), INT8_C( 48), -INT8_C( 100), -INT8_C( 1), INT8_C( 10), -INT8_C( 38), INT8_C( 84), -INT8_C( 97), INT8_C( 121), -INT8_C( 29), -INT8_C( 36), INT8_MIN, -INT8_C( 125), -INT8_C( 94), -INT8_C( 81), INT8_C( 54), -INT8_C( 82), INT8_C( 72), -INT8_C( 93), -INT8_C( 96), -INT8_C( 119), -INT8_C( 10), INT8_C( 42), -INT8_C( 103), -INT8_C( 86) }, UINT64_C( 3790800550792024366), { INT8_C( 61), -INT8_C( 102), INT8_C( 62), INT8_C( 23), -INT8_C( 18), -INT8_C( 35), -INT8_C( 112), -INT8_C( 47), -INT8_C( 71), INT8_C( 17), INT8_C( 84), INT8_C( 91), -INT8_C( 64), -INT8_C( 118), INT8_C( 9), INT8_C( 8), INT8_C( 45), -INT8_C( 86), -INT8_C( 111), INT8_C( 36), -INT8_C( 44), INT8_C( 43), -INT8_C( 50), INT8_C( 2), INT8_C( 124), INT8_C( 30), INT8_C( 54), INT8_MIN, -INT8_C( 65), -INT8_C( 47), -INT8_C( 76), -INT8_C( 4), INT8_C( 107), -INT8_C( 14), INT8_C( 19), INT8_C( 90), -INT8_C( 49), -INT8_C( 92), INT8_C( 43), -INT8_C( 120), -INT8_C( 75), INT8_MIN, -INT8_C( 29), INT8_C( 117), INT8_C( 10), -INT8_C( 19), INT8_C( 125), INT8_C( 56), -INT8_C( 105), INT8_C( 15), INT8_C( 92), INT8_C( 107), INT8_C( 58), INT8_C( 42), INT8_C( 109), -INT8_C( 74), INT8_C( 73), -INT8_C( 93), INT8_C( 54), INT8_C( 8), INT8_C( 117), -INT8_C( 22), INT8_C( 5), -INT8_C( 32) }, { -INT8_C( 36), INT8_C( 24), INT8_C( 58), -INT8_C( 85), -INT8_C( 68), INT8_C( 102), INT8_C( 51), INT8_C( 113), -INT8_C( 26), INT8_C( 22), -INT8_C( 26), -INT8_C( 16), INT8_C( 3), INT8_C( 100), INT8_C( 40), -INT8_C( 102), INT8_C( 115), -INT8_C( 124), INT8_C( 5), -INT8_C( 83), -INT8_C( 81), INT8_C( 114), INT8_C( 99), -INT8_C( 8), INT8_C( 22), -INT8_C( 103), INT8_C( 0), -INT8_C( 117), -INT8_C( 125), INT8_C( 5), INT8_C( 107), INT8_C( 95), INT8_C( 30), -INT8_C( 90), INT8_C( 10), -INT8_C( 38), INT8_C( 12), INT8_C( 61), INT8_C( 76), -INT8_C( 14), INT8_C( 83), INT8_C( 50), -INT8_C( 30), INT8_C( 87), -INT8_C( 106), INT8_C( 11), -INT8_C( 15), INT8_C( 9), -INT8_C( 113), -INT8_C( 9), -INT8_C( 74), INT8_C( 62), INT8_C( 105), INT8_C( 25), INT8_C( 54), INT8_MAX, -INT8_C( 78), INT8_C( 55), INT8_C( 10), INT8_C( 53), INT8_C( 60), INT8_C( 118), -INT8_C( 108), INT8_C( 90) }, { -INT8_C( 48), INT8_C( 0), -INT8_C( 108), -INT8_C( 8), -INT8_C( 15), INT8_C( 5), -INT8_C( 5), -INT8_C( 85), INT8_C( 55), INT8_C( 125), INT8_C( 75), -INT8_C( 104), -INT8_C( 36), INT8_C( 62), INT8_C( 22), -INT8_C( 107), -INT8_C( 97), -INT8_C( 114), INT8_C( 61), INT8_C( 7), -INT8_C( 81), -INT8_C( 58), INT8_C( 40), -INT8_C( 77), INT8_C( 11), -INT8_C( 103), INT8_C( 54), -INT8_C( 14), INT8_C( 90), -INT8_C( 124), -INT8_C( 119), INT8_C( 16), -INT8_C( 76), INT8_C( 4), -INT8_C( 83), -INT8_C( 91), INT8_C( 6), -INT8_C( 77), INT8_C( 81), INT8_C( 102), INT8_C( 25), -INT8_C( 100), -INT8_C( 1), INT8_C( 10), -INT8_C( 38), INT8_C( 11), -INT8_C( 97), -INT8_C( 78), -INT8_C( 8), -INT8_C( 102), INT8_MIN, INT8_C( 87), INT8_C( 10), -INT8_C( 81), INT8_C( 54), INT8_C( 54), INT8_C( 72), -INT8_C( 93), INT8_C( 54), -INT8_C( 119), INT8_C( 25), -INT8_C( 30), -INT8_C( 103), -INT8_C( 86) } }, { { INT8_C( 28), -INT8_C( 98), INT8_C( 53), INT8_C( 40), -INT8_C( 37), -INT8_C( 127), INT8_C( 26), INT8_C( 47), -INT8_C( 77), -INT8_C( 4), -INT8_C( 122), INT8_C( 74), INT8_C( 7), INT8_C( 119), INT8_C( 83), -INT8_C( 105), INT8_C( 110), INT8_C( 10), -INT8_C( 43), -INT8_C( 40), INT8_C( 35), INT8_C( 12), INT8_C( 87), -INT8_C( 42), INT8_C( 67), INT8_C( 98), INT8_C( 11), INT8_MAX, -INT8_C( 40), -INT8_C( 96), -INT8_C( 38), -INT8_C( 12), INT8_C( 62), INT8_C( 15), INT8_C( 28), INT8_C( 26), -INT8_C( 112), INT8_C( 54), INT8_C( 73), INT8_C( 67), INT8_C( 50), -INT8_C( 49), -INT8_C( 115), INT8_C( 58), INT8_C( 70), -INT8_C( 31), -INT8_C( 47), -INT8_C( 75), -INT8_C( 21), -INT8_C( 90), -INT8_C( 115), INT8_C( 14), -INT8_C( 78), -INT8_C( 28), -INT8_C( 28), -INT8_C( 11), INT8_C( 70), -INT8_C( 16), INT8_C( 117), INT8_C( 30), -INT8_C( 112), INT8_C( 79), INT8_C( 18), -INT8_C( 50) }, UINT64_C(10894543283160624734), { INT8_C( 0), -INT8_C( 65), -INT8_C( 47), INT8_C( 71), -INT8_C( 96), -INT8_C( 94), -INT8_C( 4), -INT8_C( 117), INT8_C( 72), -INT8_C( 119), -INT8_C( 103), -INT8_C( 5), INT8_C( 109), INT8_C( 126), -INT8_C( 16), -INT8_C( 76), INT8_C( 110), INT8_C( 101), -INT8_C( 46), -INT8_C( 2), -INT8_C( 76), -INT8_C( 27), -INT8_C( 52), INT8_C( 18), INT8_C( 19), -INT8_C( 75), INT8_C( 0), INT8_C( 120), -INT8_C( 26), INT8_C( 50), INT8_C( 15), -INT8_C( 25), -INT8_C( 15), -INT8_C( 32), INT8_C( 46), -INT8_C( 111), -INT8_C( 126), INT8_C( 42), INT8_C( 28), -INT8_C( 54), -INT8_C( 77), -INT8_C( 75), -INT8_C( 59), INT8_C( 32), INT8_C( 51), -INT8_C( 74), -INT8_C( 44), -INT8_C( 95), INT8_C( 27), -INT8_C( 89), -INT8_C( 97), -INT8_C( 48), -INT8_C( 116), INT8_C( 108), -INT8_C( 30), -INT8_C( 97), INT8_C( 33), -INT8_C( 29), INT8_C( 23), INT8_C( 7), INT8_C( 21), INT8_C( 38), -INT8_C( 18), INT8_C( 6) }, { INT8_C( 6), INT8_C( 28), -INT8_C( 105), -INT8_C( 120), INT8_C( 70), -INT8_C( 77), INT8_C( 83), -INT8_C( 7), INT8_C( 104), INT8_C( 24), INT8_C( 26), -INT8_C( 100), -INT8_C( 50), -INT8_C( 18), INT8_C( 61), -INT8_C( 22), -INT8_C( 107), -INT8_C( 35), -INT8_C( 70), INT8_C( 33), INT8_C( 73), -INT8_C( 100), -INT8_C( 63), INT8_C( 106), INT8_MAX, -INT8_C( 40), INT8_C( 113), -INT8_C( 108), -INT8_C( 1), INT8_C( 96), -INT8_C( 102), INT8_C( 5), INT8_C( 124), INT8_C( 49), -INT8_C( 114), -INT8_C( 61), -INT8_C( 28), -INT8_C( 31), -INT8_C( 68), INT8_C( 77), -INT8_C( 7), -INT8_C( 42), -INT8_C( 23), -INT8_C( 56), -INT8_C( 59), INT8_C( 38), -INT8_C( 78), INT8_C( 90), INT8_C( 3), INT8_C( 108), INT8_C( 124), INT8_C( 76), INT8_C( 8), INT8_C( 61), -INT8_C( 74), -INT8_C( 120), INT8_C( 21), INT8_C( 40), INT8_C( 28), INT8_C( 20), -INT8_C( 120), -INT8_C( 73), INT8_C( 26), INT8_C( 4) }, { INT8_C( 28), INT8_C( 4), -INT8_C( 35), -INT8_C( 7), INT8_C( 124), -INT8_C( 127), -INT8_C( 120), INT8_C( 47), -INT8_C( 77), INT8_C( 24), -INT8_C( 40), INT8_C( 20), INT8_C( 7), INT8_C( 26), INT8_C( 83), -INT8_C( 105), INT8_C( 110), INT8_C( 10), -INT8_C( 43), INT8_C( 26), INT8_C( 35), -INT8_C( 31), -INT8_C( 50), -INT8_C( 70), INT8_C( 67), INT8_C( 61), INT8_C( 6), INT8_C( 21), -INT8_C( 40), INT8_C( 124), -INT8_C( 22), INT8_C( 77), INT8_C( 62), INT8_C( 15), -INT8_C( 78), INT8_C( 26), -INT8_C( 112), -INT8_C( 23), -INT8_C( 1), INT8_C( 67), INT8_C( 76), -INT8_C( 49), -INT8_C( 115), INT8_C( 58), INT8_C( 76), -INT8_C( 74), -INT8_C( 47), -INT8_C( 75), -INT8_C( 108), -INT8_C( 90), -INT8_C( 115), INT8_C( 14), -INT8_C( 50), -INT8_C( 59), -INT8_C( 28), -INT8_C( 11), INT8_C( 49), -INT8_C( 61), INT8_C( 106), INT8_C( 30), -INT8_C( 100), INT8_C( 79), INT8_C( 18), INT8_C( 83) } }, { { -INT8_C( 24), -INT8_C( 88), -INT8_C( 57), -INT8_C( 51), -INT8_C( 119), -INT8_C( 124), INT8_C( 26), -INT8_C( 126), INT8_C( 90), INT8_C( 3), INT8_C( 74), INT8_C( 31), INT8_C( 41), -INT8_C( 4), INT8_C( 122), INT8_C( 45), INT8_C( 104), -INT8_C( 10), INT8_C( 121), INT8_C( 113), INT8_C( 51), INT8_C( 48), -INT8_C( 7), INT8_C( 72), INT8_C( 88), INT8_C( 21), INT8_C( 93), -INT8_C( 32), -INT8_C( 52), INT8_C( 119), -INT8_C( 28), -INT8_C( 75), INT8_C( 31), -INT8_C( 84), -INT8_C( 126), -INT8_C( 88), INT8_C( 48), -INT8_C( 100), INT8_C( 42), -INT8_C( 118), -INT8_C( 97), INT8_C( 117), -INT8_C( 86), -INT8_C( 56), INT8_C( 113), INT8_C( 36), -INT8_C( 11), -INT8_C( 38), INT8_C( 26), INT8_C( 111), INT8_C( 75), INT8_C( 77), -INT8_C( 97), INT8_C( 68), -INT8_C( 107), -INT8_C( 9), INT8_C( 89), -INT8_C( 14), -INT8_C( 41), INT8_C( 38), INT8_C( 105), -INT8_C( 69), -INT8_C( 37), -INT8_C( 120) }, UINT64_C(10962425570514722151), { -INT8_C( 48), -INT8_C( 52), INT8_C( 96), INT8_C( 65), -INT8_C( 16), INT8_C( 86), INT8_C( 27), INT8_C( 10), -INT8_C( 59), INT8_C( 102), INT8_C( 87), INT8_C( 100), -INT8_C( 86), -INT8_C( 20), INT8_C( 91), INT8_C( 4), -INT8_C( 33), INT8_C( 50), INT8_C( 42), INT8_C( 72), -INT8_C( 19), INT8_C( 5), -INT8_C( 47), INT8_C( 85), INT8_C( 98), INT8_C( 1), -INT8_C( 20), INT8_C( 91), INT8_C( 92), INT8_C( 14), -INT8_C( 13), INT8_C( 44), -INT8_C( 38), INT8_C( 83), INT8_C( 110), -INT8_C( 54), -INT8_C( 87), -INT8_C( 119), -INT8_C( 44), INT8_C( 110), -INT8_C( 16), INT8_C( 43), -INT8_C( 46), -INT8_C( 102), INT8_C( 24), INT8_C( 45), -INT8_C( 98), -INT8_C( 9), INT8_C( 95), -INT8_C( 56), INT8_C( 63), INT8_C( 77), -INT8_C( 51), INT8_C( 16), -INT8_C( 94), INT8_C( 47), INT8_C( 18), -INT8_C( 114), -INT8_C( 118), INT8_C( 110), -INT8_C( 99), INT8_C( 125), -INT8_C( 101), INT8_C( 119) }, { -INT8_C( 47), INT8_C( 9), INT8_C( 66), INT8_C( 122), -INT8_C( 110), INT8_C( 22), -INT8_C( 23), -INT8_C( 126), INT8_C( 66), -INT8_C( 69), INT8_C( 29), INT8_C( 90), -INT8_C( 23), -INT8_C( 69), INT8_C( 81), INT8_C( 72), -INT8_C( 124), -INT8_C( 112), -INT8_C( 107), INT8_C( 81), -INT8_C( 95), INT8_C( 55), -INT8_C( 127), -INT8_C( 77), -INT8_C( 58), INT8_C( 11), INT8_C( 33), INT8_C( 99), -INT8_C( 119), -INT8_C( 68), -INT8_C( 38), INT8_C( 90), -INT8_C( 59), INT8_C( 28), -INT8_C( 44), INT8_C( 88), INT8_C( 51), -INT8_C( 67), -INT8_C( 38), INT8_C( 117), INT8_C( 121), -INT8_C( 9), -INT8_C( 49), INT8_C( 98), -INT8_C( 77), INT8_C( 32), -INT8_C( 86), INT8_C( 55), -INT8_C( 80), INT8_C( 64), -INT8_C( 120), INT8_C( 81), INT8_C( 119), INT8_C( 9), INT8_C( 4), INT8_C( 61), INT8_C( 21), INT8_C( 38), -INT8_C( 96), -INT8_C( 98), -INT8_C( 30), INT8_C( 123), -INT8_C( 8), -INT8_C( 88) }, { -INT8_C( 124), -INT8_C( 23), -INT8_C( 59), -INT8_C( 51), -INT8_C( 119), -INT8_C( 127), INT8_C( 99), -INT8_C( 126), INT8_C( 22), INT8_C( 3), -INT8_C( 77), INT8_C( 51), -INT8_C( 49), -INT8_C( 4), INT8_C( 99), INT8_C( 45), INT8_C( 104), -INT8_C( 10), INT8_C( 121), INT8_C( 113), INT8_C( 32), INT8_C( 22), -INT8_C( 7), INT8_C( 72), -INT8_C( 44), INT8_C( 9), -INT8_C( 77), -INT8_C( 32), -INT8_C( 119), INT8_C( 119), -INT8_C( 28), -INT8_C( 77), INT8_C( 33), -INT8_C( 84), -INT8_C( 126), INT8_C( 29), -INT8_C( 9), -INT8_C( 69), -INT8_C( 95), -INT8_C( 86), -INT8_C( 80), INT8_C( 98), -INT8_C( 86), INT8_C( 33), -INT8_C( 58), INT8_C( 36), -INT8_C( 38), -INT8_C( 38), INT8_C( 26), INT8_C( 66), INT8_C( 75), INT8_C( 77), -INT8_C( 97), -INT8_C( 124), -INT8_C( 107), -INT8_C( 9), INT8_C( 89), -INT8_C( 14), -INT8_C( 41), -INT8_C( 86), -INT8_C( 68), -INT8_C( 69), -INT8_C( 37), INT8_C( 61) } }, { { -INT8_C( 105), -INT8_C( 52), INT8_C( 0), -INT8_C( 54), -INT8_C( 118), -INT8_C( 38), INT8_C( 63), INT8_C( 3), -INT8_C( 46), INT8_C( 14), INT8_C( 101), -INT8_C( 123), INT8_C( 46), INT8_C( 15), -INT8_C( 68), -INT8_C( 33), INT8_C( 79), INT8_C( 68), INT8_C( 48), -INT8_C( 57), INT8_C( 78), INT8_C( 53), INT8_C( 4), INT8_C( 99), INT8_C( 91), -INT8_C( 91), INT8_C( 1), INT8_C( 61), INT8_C( 32), -INT8_C( 7), -INT8_C( 27), -INT8_C( 73), -INT8_C( 59), -INT8_C( 27), -INT8_C( 126), INT8_C( 79), -INT8_C( 64), -INT8_C( 63), INT8_C( 82), -INT8_C( 110), -INT8_C( 48), -INT8_C( 73), INT8_C( 23), -INT8_C( 2), -INT8_C( 57), -INT8_C( 45), -INT8_C( 35), INT8_C( 22), INT8_C( 23), INT8_C( 14), -INT8_C( 35), INT8_C( 101), INT8_C( 67), -INT8_C( 30), -INT8_C( 56), -INT8_C( 98), -INT8_C( 121), -INT8_C( 55), -INT8_C( 37), -INT8_C( 89), -INT8_C( 62), -INT8_C( 63), INT8_C( 94), -INT8_C( 120) }, UINT64_C( 8284418391546519718), { -INT8_C( 31), INT8_C( 15), INT8_C( 112), -INT8_C( 88), -INT8_C( 30), INT8_C( 78), -INT8_C( 65), -INT8_C( 6), INT8_C( 92), -INT8_C( 100), INT8_C( 95), -INT8_C( 97), INT8_C( 126), INT8_C( 40), INT8_C( 61), INT8_C( 5), -INT8_C( 15), INT8_C( 24), -INT8_C( 84), -INT8_C( 76), -INT8_C( 39), INT8_C( 11), INT8_C( 60), INT8_MIN, -INT8_C( 21), INT8_C( 19), -INT8_C( 26), -INT8_C( 115), INT8_C( 61), -INT8_C( 33), -INT8_C( 1), INT8_C( 31), -INT8_C( 18), INT8_C( 112), -INT8_C( 57), -INT8_C( 47), -INT8_C( 66), -INT8_C( 122), -INT8_C( 53), INT8_C( 26), INT8_C( 35), INT8_C( 42), -INT8_C( 71), -INT8_C( 95), INT8_C( 82), -INT8_C( 10), -INT8_C( 89), INT8_C( 68), INT8_C( 14), INT8_C( 83), -INT8_C( 8), -INT8_C( 24), INT8_C( 94), INT8_C( 52), INT8_C( 104), INT8_C( 74), INT8_C( 71), INT8_C( 78), -INT8_C( 41), -INT8_C( 123), INT8_C( 45), -INT8_C( 41), -INT8_C( 92), INT8_C( 28) }, { INT8_C( 71), INT8_C( 107), -INT8_C( 19), INT8_C( 5), -INT8_C( 14), -INT8_C( 72), INT8_C( 31), INT8_C( 21), -INT8_C( 30), -INT8_C( 40), -INT8_C( 74), INT8_C( 53), -INT8_C( 50), INT8_C( 93), INT8_C( 121), -INT8_C( 36), -INT8_C( 79), INT8_C( 113), -INT8_C( 60), INT8_C( 15), -INT8_C( 91), INT8_C( 44), INT8_C( 89), -INT8_C( 20), INT8_C( 123), INT8_C( 49), INT8_C( 113), -INT8_C( 88), INT8_C( 8), INT8_C( 21), -INT8_C( 60), INT8_C( 79), -INT8_C( 127), -INT8_C( 79), INT8_C( 84), INT8_C( 115), INT8_C( 105), INT8_C( 115), -INT8_C( 120), INT8_C( 76), INT8_C( 75), INT8_C( 62), -INT8_C( 127), INT8_C( 25), -INT8_C( 100), -INT8_C( 6), -INT8_C( 11), INT8_C( 77), INT8_C( 107), -INT8_C( 70), INT8_C( 92), INT8_C( 16), -INT8_C( 26), -INT8_C( 74), -INT8_C( 4), INT8_C( 97), -INT8_C( 25), INT8_C( 110), INT8_C( 10), -INT8_C( 17), -INT8_C( 125), -INT8_C( 50), INT8_C( 62), INT8_C( 4) }, { -INT8_C( 105), -INT8_C( 36), INT8_C( 107), -INT8_C( 54), -INT8_C( 118), INT8_C( 121), INT8_C( 63), INT8_C( 10), -INT8_C( 46), INT8_C( 14), INT8_C( 101), -INT8_C( 123), INT8_C( 46), INT8_C( 75), -INT8_C( 50), -INT8_C( 72), -INT8_C( 70), INT8_C( 123), -INT8_C( 100), -INT8_C( 57), INT8_C( 49), INT8_C( 53), -INT8_C( 125), INT8_C( 71), INT8_C( 91), INT8_C( 15), -INT8_C( 120), INT8_C( 61), INT8_C( 32), INT8_C( 79), INT8_C( 4), -INT8_C( 73), -INT8_C( 59), INT8_C( 107), -INT8_C( 126), INT8_C( 79), -INT8_C( 64), INT8_C( 31), INT8_C( 82), INT8_C( 113), -INT8_C( 48), -INT8_C( 127), INT8_C( 23), -INT8_C( 79), -INT8_C( 57), -INT8_C( 4), -INT8_C( 35), INT8_C( 22), INT8_C( 23), INT8_C( 14), -INT8_C( 35), INT8_C( 75), -INT8_C( 60), -INT8_C( 26), INT8_C( 75), -INT8_C( 74), -INT8_C( 121), INT8_C( 121), -INT8_C( 37), -INT8_C( 89), -INT8_C( 6), -INT8_C( 20), INT8_C( 105), -INT8_C( 120) } }, { { INT8_MIN, -INT8_C( 110), INT8_C( 119), -INT8_C( 23), INT8_C( 5), -INT8_C( 1), INT8_C( 53), INT8_C( 80), INT8_C( 62), -INT8_C( 74), INT8_C( 105), -INT8_C( 38), -INT8_C( 80), INT8_C( 94), INT8_C( 39), INT8_C( 27), INT8_C( 24), -INT8_C( 125), INT8_C( 43), -INT8_C( 1), INT8_C( 57), INT8_C( 40), INT8_C( 96), INT8_C( 32), -INT8_C( 106), INT8_C( 106), INT8_C( 15), INT8_C( 25), INT8_C( 57), INT8_C( 77), INT8_C( 30), -INT8_C( 71), -INT8_C( 33), -INT8_C( 107), -INT8_C( 94), -INT8_C( 28), -INT8_C( 107), -INT8_C( 40), INT8_C( 52), -INT8_C( 45), -INT8_C( 114), -INT8_C( 99), -INT8_C( 83), INT8_C( 63), -INT8_C( 4), -INT8_C( 44), INT8_C( 90), INT8_C( 20), INT8_C( 87), -INT8_C( 122), INT8_C( 19), -INT8_C( 111), -INT8_C( 82), INT8_C( 116), -INT8_C( 79), INT8_C( 68), -INT8_C( 34), -INT8_C( 63), INT8_C( 93), INT8_C( 23), INT8_C( 14), INT8_C( 123), -INT8_C( 48), -INT8_C( 18) }, UINT64_C(15670564399915102993), { -INT8_C( 92), INT8_C( 38), INT8_C( 24), -INT8_C( 96), -INT8_C( 6), INT8_C( 115), -INT8_C( 75), INT8_C( 81), -INT8_C( 7), -INT8_C( 56), -INT8_C( 30), -INT8_C( 89), INT8_C( 60), -INT8_C( 108), -INT8_C( 21), INT8_C( 27), INT8_C( 85), INT8_C( 72), INT8_C( 50), INT8_C( 99), -INT8_C( 60), INT8_C( 3), INT8_C( 81), -INT8_C( 43), INT8_C( 118), INT8_C( 36), INT8_C( 123), -INT8_C( 63), INT8_C( 43), -INT8_C( 12), -INT8_C( 102), -INT8_C( 49), INT8_C( 26), -INT8_C( 77), INT8_C( 112), INT8_C( 20), INT8_C( 38), INT8_C( 37), INT8_C( 101), INT8_C( 31), -INT8_C( 19), INT8_C( 72), -INT8_C( 58), INT8_C( 42), -INT8_C( 36), -INT8_C( 79), INT8_C( 69), INT8_C( 49), -INT8_C( 7), INT8_C( 119), -INT8_C( 108), -INT8_C( 67), INT8_C( 122), -INT8_C( 26), -INT8_C( 110), -INT8_C( 16), INT8_C( 10), INT8_C( 13), -INT8_C( 79), INT8_C( 53), INT8_C( 1), INT8_C( 76), INT8_C( 4), INT8_C( 27) }, { -INT8_C( 1), INT8_C( 116), INT8_C( 47), INT8_C( 37), -INT8_C( 103), -INT8_C( 107), INT8_C( 68), -INT8_C( 121), -INT8_C( 35), INT8_C( 10), -INT8_C( 79), -INT8_C( 71), -INT8_C( 69), -INT8_C( 10), -INT8_C( 22), -INT8_C( 76), INT8_C( 109), INT8_C( 126), INT8_C( 114), -INT8_C( 24), INT8_C( 100), INT8_C( 4), -INT8_C( 40), INT8_C( 110), INT8_C( 18), -INT8_C( 118), -INT8_C( 93), INT8_C( 19), -INT8_C( 42), -INT8_C( 88), INT8_C( 47), -INT8_C( 43), INT8_C( 28), INT8_C( 94), -INT8_C( 6), -INT8_C( 74), -INT8_C( 13), INT8_C( 62), INT8_C( 61), -INT8_C( 48), INT8_C( 72), -INT8_C( 18), -INT8_C( 119), INT8_C( 3), -INT8_C( 28), INT8_C( 115), -INT8_C( 73), INT8_C( 81), -INT8_C( 14), INT8_C( 41), INT8_C( 57), INT8_C( 86), INT8_C( 46), INT8_C( 18), -INT8_C( 59), INT8_C( 64), -INT8_C( 100), INT8_C( 104), INT8_C( 83), INT8_C( 114), INT8_C( 16), -INT8_C( 126), INT8_C( 71), INT8_C( 45) }, { -INT8_C( 13), -INT8_C( 110), INT8_C( 119), -INT8_C( 23), INT8_C( 83), -INT8_C( 1), INT8_C( 53), INT8_C( 80), INT8_C( 104), -INT8_C( 35), INT8_C( 105), -INT8_C( 38), INT8_C( 16), INT8_C( 100), INT8_C( 3), INT8_C( 27), INT8_C( 24), -INT8_C( 35), INT8_C( 43), -INT8_C( 1), -INT8_C( 103), INT8_C( 40), INT8_C( 126), INT8_C( 4), -INT8_C( 106), -INT8_C( 13), INT8_C( 114), INT8_C( 25), INT8_C( 57), INT8_C( 46), INT8_C( 30), -INT8_C( 76), -INT8_C( 93), INT8_C( 86), -INT8_C( 94), INT8_C( 100), -INT8_C( 107), -INT8_C( 40), INT8_C( 62), -INT8_C( 45), INT8_C( 115), -INT8_C( 35), INT8_C( 68), INT8_C( 63), -INT8_C( 4), -INT8_C( 44), INT8_C( 90), INT8_C( 20), INT8_C( 104), -INT8_C( 122), INT8_C( 19), -INT8_C( 126), INT8_C( 83), INT8_C( 61), INT8_C( 114), INT8_C( 68), -INT8_C( 79), -INT8_C( 63), INT8_C( 93), INT8_C( 18), INT8_C( 116), INT8_C( 123), -INT8_C( 103), INT8_C( 19) } }, { { -INT8_C( 31), INT8_C( 65), -INT8_C( 29), -INT8_C( 44), INT8_MAX, INT8_C( 32), -INT8_C( 91), -INT8_C( 57), INT8_C( 14), INT8_C( 46), -INT8_C( 54), -INT8_C( 14), -INT8_C( 94), -INT8_C( 127), INT8_C( 67), -INT8_C( 108), -INT8_C( 85), INT8_C( 125), -INT8_C( 22), -INT8_C( 39), -INT8_C( 113), -INT8_C( 81), INT8_C( 25), INT8_C( 43), INT8_C( 24), INT8_C( 108), -INT8_C( 99), INT8_C( 40), -INT8_C( 17), -INT8_C( 28), INT8_C( 85), -INT8_C( 48), INT8_C( 37), INT8_C( 56), -INT8_C( 92), -INT8_C( 92), INT8_C( 88), INT8_C( 73), INT8_C( 107), INT8_C( 102), INT8_C( 120), INT8_C( 53), INT8_C( 88), INT8_C( 26), -INT8_C( 74), -INT8_C( 100), -INT8_C( 82), INT8_C( 97), INT8_C( 25), -INT8_C( 104), INT8_C( 58), -INT8_C( 88), INT8_C( 72), INT8_C( 83), -INT8_C( 45), INT8_C( 96), -INT8_C( 64), INT8_C( 112), -INT8_C( 120), -INT8_C( 81), INT8_C( 84), -INT8_C( 34), INT8_MAX, INT8_C( 121) }, UINT64_C(16561293208321860374), { -INT8_C( 67), INT8_C( 46), -INT8_C( 1), INT8_C( 115), -INT8_C( 54), -INT8_C( 83), -INT8_C( 43), -INT8_C( 29), INT8_C( 69), INT8_C( 15), -INT8_C( 117), -INT8_C( 115), INT8_C( 99), INT8_C( 94), -INT8_C( 19), INT8_C( 35), -INT8_C( 50), INT8_C( 118), -INT8_C( 46), INT8_C( 34), INT8_C( 84), INT8_C( 81), -INT8_C( 101), INT8_C( 106), INT8_C( 116), -INT8_C( 72), -INT8_C( 39), -INT8_C( 31), INT8_C( 64), -INT8_C( 81), -INT8_C( 58), -INT8_C( 3), -INT8_C( 35), -INT8_C( 59), INT8_C( 112), -INT8_C( 89), INT8_C( 114), INT8_C( 69), -INT8_C( 118), -INT8_C( 72), INT8_C( 85), INT8_C( 21), INT8_C( 69), -INT8_C( 72), INT8_C( 115), INT8_C( 51), -INT8_C( 37), INT8_C( 65), -INT8_C( 87), -INT8_C( 83), INT8_C( 99), -INT8_C( 3), -INT8_C( 2), -INT8_C( 2), INT8_C( 103), INT8_C( 114), -INT8_C( 74), INT8_C( 65), INT8_C( 84), -INT8_C( 10), -INT8_C( 16), INT8_C( 26), -INT8_C( 13), -INT8_C( 51) }, { -INT8_C( 32), INT8_C( 99), INT8_C( 116), INT8_C( 82), -INT8_C( 87), -INT8_C( 2), INT8_C( 10), -INT8_C( 2), INT8_C( 19), INT8_C( 80), -INT8_C( 74), -INT8_C( 122), -INT8_C( 125), -INT8_C( 111), -INT8_C( 57), INT8_C( 44), INT8_C( 62), INT8_C( 42), INT8_C( 41), INT8_C( 60), INT8_C( 40), -INT8_C( 112), -INT8_C( 82), -INT8_C( 34), -INT8_C( 47), INT8_C( 2), -INT8_C( 44), -INT8_C( 63), INT8_C( 29), -INT8_C( 57), -INT8_C( 114), -INT8_C( 3), INT8_C( 42), INT8_C( 2), INT8_C( 79), -INT8_C( 45), INT8_C( 0), INT8_C( 90), -INT8_C( 47), INT8_C( 19), -INT8_C( 86), -INT8_C( 121), -INT8_C( 103), INT8_C( 45), INT8_C( 24), INT8_C( 96), INT8_C( 89), INT8_C( 86), -INT8_C( 118), -INT8_C( 126), -INT8_C( 110), -INT8_C( 78), INT8_C( 18), INT8_C( 65), -INT8_C( 112), -INT8_C( 28), INT8_C( 67), INT8_C( 100), -INT8_C( 91), INT8_C( 96), INT8_C( 43), INT8_C( 52), INT8_C( 93), INT8_C( 86) }, { -INT8_C( 31), INT8_C( 89), INT8_C( 86), -INT8_C( 44), -INT8_C( 74), INT8_C( 32), -INT8_C( 91), -INT8_C( 57), -INT8_C( 2), INT8_C( 44), -INT8_C( 54), -INT8_C( 14), -INT8_C( 94), -INT8_C( 114), INT8_C( 67), -INT8_C( 108), -INT8_C( 57), INT8_C( 125), INT8_C( 41), INT8_C( 79), INT8_C( 40), -INT8_C( 81), INT8_C( 25), INT8_C( 43), INT8_C( 18), INT8_C( 67), INT8_C( 2), INT8_C( 2), -INT8_C( 17), INT8_C( 86), INT8_C( 10), -INT8_C( 48), -INT8_C( 57), INT8_C( 56), -INT8_C( 118), INT8_C( 19), INT8_C( 88), -INT8_C( 2), -INT8_C( 74), INT8_C( 102), INT8_C( 120), INT8_C( 53), INT8_C( 88), INT8_C( 67), -INT8_C( 74), -INT8_C( 100), -INT8_C( 82), INT8_C( 99), -INT8_C( 121), -INT8_C( 104), -INT8_C( 45), -INT8_C( 88), INT8_C( 93), INT8_C( 83), INT8_C( 19), -INT8_C( 110), -INT8_C( 112), INT8_C( 112), INT8_C( 40), -INT8_C( 81), INT8_C( 84), -INT8_C( 44), -INT8_C( 78), -INT8_C( 111) } }, { { INT8_C( 54), -INT8_C( 83), INT8_C( 41), INT8_C( 55), INT8_C( 7), -INT8_C( 5), INT8_C( 74), -INT8_C( 79), -INT8_C( 126), -INT8_C( 28), -INT8_C( 34), -INT8_C( 101), INT8_C( 68), INT8_C( 55), -INT8_C( 15), -INT8_C( 49), -INT8_C( 71), -INT8_C( 124), -INT8_C( 127), -INT8_C( 53), -INT8_C( 59), INT8_C( 18), -INT8_C( 81), INT8_C( 8), INT8_C( 118), INT8_C( 85), INT8_C( 105), -INT8_C( 94), -INT8_C( 119), -INT8_C( 58), -INT8_C( 8), -INT8_C( 65), INT8_C( 115), INT8_C( 33), -INT8_C( 10), INT8_C( 122), INT8_C( 28), INT8_C( 65), INT8_C( 43), -INT8_C( 97), INT8_C( 37), INT8_C( 9), INT8_C( 58), INT8_C( 105), INT8_C( 64), INT8_C( 43), INT8_C( 56), -INT8_C( 7), -INT8_C( 81), -INT8_C( 70), -INT8_C( 59), INT8_C( 116), -INT8_C( 52), INT8_C( 116), INT8_C( 125), INT8_C( 66), -INT8_C( 55), -INT8_C( 26), -INT8_C( 28), INT8_C( 82), -INT8_C( 84), -INT8_C( 36), INT8_C( 18), INT8_C( 32) }, UINT64_C( 7978626232144759038), { -INT8_C( 49), -INT8_C( 13), -INT8_C( 40), INT8_C( 16), INT8_C( 31), INT8_C( 16), INT8_C( 9), -INT8_C( 50), -INT8_C( 54), -INT8_C( 50), INT8_C( 67), -INT8_C( 106), INT8_C( 67), -INT8_C( 64), -INT8_C( 39), INT8_C( 12), -INT8_C( 90), -INT8_C( 67), INT8_C( 95), INT8_C( 82), -INT8_C( 102), INT8_C( 113), INT8_C( 114), -INT8_C( 104), INT8_C( 121), INT8_C( 13), -INT8_C( 78), -INT8_C( 61), -INT8_C( 45), INT8_C( 108), INT8_C( 49), -INT8_C( 94), INT8_C( 95), INT8_C( 9), -INT8_C( 78), INT8_C( 126), INT8_C( 26), -INT8_C( 68), INT8_C( 77), -INT8_C( 28), -INT8_C( 118), -INT8_C( 112), INT8_C( 123), -INT8_C( 51), INT8_C( 80), INT8_C( 84), -INT8_C( 38), -INT8_C( 10), INT8_C( 17), INT8_C( 57), INT8_C( 72), -INT8_C( 85), -INT8_C( 86), -INT8_C( 69), INT8_C( 67), INT8_C( 35), -INT8_C( 56), -INT8_C( 10), -INT8_C( 26), -INT8_C( 101), INT8_C( 98), INT8_C( 24), INT8_C( 61), -INT8_C( 63) }, { INT8_C( 33), -INT8_C( 16), INT8_C( 64), INT8_C( 59), -INT8_C( 84), -INT8_C( 115), INT8_C( 32), INT8_C( 54), INT8_C( 29), -INT8_C( 101), INT8_C( 4), INT8_C( 109), -INT8_C( 17), -INT8_C( 34), INT8_C( 99), INT8_C( 0), INT8_C( 23), -INT8_C( 85), -INT8_C( 84), -INT8_C( 63), INT8_C( 102), -INT8_C( 17), -INT8_C( 28), INT8_C( 46), -INT8_C( 27), -INT8_C( 53), -INT8_C( 55), INT8_C( 71), -INT8_C( 29), INT8_C( 7), INT8_C( 9), INT8_C( 4), -INT8_C( 9), INT8_C( 73), INT8_C( 64), -INT8_C( 93), -INT8_C( 42), INT8_C( 96), -INT8_C( 39), -INT8_C( 13), -INT8_C( 5), -INT8_C( 35), INT8_C( 96), -INT8_C( 22), -INT8_C( 69), -INT8_C( 61), -INT8_C( 22), -INT8_C( 46), INT8_C( 110), -INT8_C( 106), -INT8_C( 109), -INT8_C( 43), -INT8_C( 122), INT8_C( 120), INT8_C( 3), INT8_C( 107), INT8_C( 67), -INT8_C( 51), -INT8_C( 77), INT8_C( 38), -INT8_C( 44), -INT8_C( 68), INT8_C( 42), -INT8_C( 53) }, { INT8_C( 54), -INT8_C( 43), -INT8_C( 27), INT8_C( 23), INT8_C( 4), INT8_C( 23), -INT8_C( 101), INT8_C( 99), -INT8_C( 126), -INT8_C( 28), -INT8_C( 34), -INT8_C( 28), INT8_C( 68), INT8_C( 55), -INT8_C( 15), -INT8_C( 49), -INT8_C( 71), -INT8_C( 68), -INT8_C( 127), -INT8_C( 84), -INT8_C( 55), INT8_C( 18), -INT8_C( 81), -INT8_C( 27), INT8_C( 118), -INT8_C( 34), INT8_C( 105), INT8_C( 59), -INT8_C( 63), -INT8_C( 58), -INT8_C( 8), -INT8_C( 65), INT8_C( 4), INT8_C( 33), -INT8_C( 10), INT8_C( 42), INT8_C( 28), INT8_C( 65), -INT8_C( 34), -INT8_C( 97), INT8_C( 37), INT8_C( 23), INT8_C( 38), INT8_C( 105), INT8_C( 64), INT8_C( 43), -INT8_C( 55), INT8_C( 3), -INT8_C( 85), -INT8_C( 70), -INT8_C( 59), -INT8_C( 22), INT8_C( 96), INT8_C( 38), INT8_C( 125), -INT8_C( 93), -INT8_C( 55), INT8_C( 3), -INT8_C( 39), INT8_C( 71), -INT8_C( 84), -INT8_C( 27), -INT8_C( 68), INT8_C( 32) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi8(test_vec[i].src); simde__m512i idx = simde_mm512_loadu_epi8(test_vec[i].idx); simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i r = simde_mm512_mask_permutexvar_epi8(src, test_vec[i].k, idx, a); simde_test_x86_assert_equal_i8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i src = simde_test_x86_random_i8x64(); simde__mmask64 k = simde_test_x86_random_mmask64(); simde__m512i idx = simde_test_x86_random_i8x64(); simde__m512i a = simde_test_x86_random_i8x64(); simde__m512i r = simde_mm512_mask_permutexvar_epi8(src, k, idx, a); simde_test_x86_write_i8x64(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask64(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x64(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x64(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_maskz_permutexvar_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask64 k; const int8_t idx[64]; const int8_t a[64]; const int8_t r[64]; } test_vec[] = { { UINT64_C( 9857402488456260496), { -INT8_C( 87), -INT8_C( 18), -INT8_C( 21), -INT8_C( 97), INT8_C( 108), -INT8_C( 78), INT8_C( 96), INT8_C( 73), -INT8_C( 16), INT8_C( 37), INT8_C( 82), INT8_C( 112), INT8_C( 4), INT8_C( 125), INT8_C( 95), -INT8_C( 19), INT8_C( 22), INT8_C( 2), INT8_C( 53), -INT8_C( 110), INT8_C( 121), INT8_C( 74), -INT8_C( 71), INT8_C( 9), -INT8_C( 10), INT8_C( 53), INT8_C( 45), INT8_C( 76), -INT8_C( 68), -INT8_C( 7), -INT8_C( 44), INT8_C( 102), -INT8_C( 24), -INT8_C( 64), INT8_C( 5), INT8_C( 84), INT8_C( 114), INT8_C( 101), -INT8_C( 99), INT8_C( 98), -INT8_C( 117), -INT8_C( 17), -INT8_C( 46), -INT8_C( 113), INT8_C( 108), INT8_C( 49), INT8_C( 125), -INT8_C( 126), INT8_C( 52), -INT8_C( 78), INT8_C( 20), -INT8_C( 83), -INT8_C( 4), -INT8_C( 51), -INT8_C( 74), -INT8_C( 14), INT8_C( 3), -INT8_C( 28), INT8_C( 62), -INT8_C( 65), -INT8_C( 35), INT8_C( 19), INT8_C( 37), -INT8_C( 59) }, { -INT8_C( 45), INT8_C( 42), INT8_C( 25), INT8_C( 69), -INT8_C( 112), -INT8_C( 74), -INT8_C( 89), INT8_C( 27), -INT8_C( 91), INT8_C( 121), -INT8_C( 86), INT8_C( 17), -INT8_C( 86), INT8_C( 39), -INT8_C( 109), -INT8_C( 34), -INT8_C( 39), -INT8_C( 89), -INT8_C( 117), -INT8_C( 42), INT8_C( 117), INT8_C( 66), -INT8_C( 56), INT8_C( 120), INT8_C( 38), INT8_C( 7), INT8_C( 55), INT8_C( 3), INT8_C( 26), INT8_C( 93), -INT8_C( 55), -INT8_C( 19), -INT8_C( 121), -INT8_C( 30), INT8_C( 50), INT8_C( 23), -INT8_C( 103), -INT8_C( 39), INT8_C( 50), INT8_C( 62), INT8_C( 82), -INT8_C( 35), INT8_C( 80), -INT8_C( 4), INT8_C( 4), -INT8_C( 29), -INT8_C( 37), -INT8_C( 34), -INT8_C( 117), INT8_C( 102), -INT8_C( 76), INT8_C( 0), -INT8_C( 88), INT8_C( 124), INT8_C( 120), -INT8_C( 50), -INT8_C( 125), -INT8_C( 81), -INT8_C( 46), -INT8_C( 99), INT8_C( 12), -INT8_C( 101), -INT8_C( 118), -INT8_C( 108) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 4), INT8_C( 0), INT8_C( 0), INT8_C( 121), -INT8_C( 117), -INT8_C( 39), INT8_C( 0), -INT8_C( 117), INT8_C( 0), -INT8_C( 101), INT8_C( 0), -INT8_C( 29), INT8_C( 0), INT8_C( 0), INT8_C( 124), -INT8_C( 117), -INT8_C( 81), -INT8_C( 86), -INT8_C( 81), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 29), INT8_C( 0), INT8_C( 0), -INT8_C( 81), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 45), -INT8_C( 74), INT8_C( 0), -INT8_C( 76), INT8_C( 0), INT8_C( 93), INT8_C( 0), INT8_C( 17), -INT8_C( 34), -INT8_C( 117), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 25), INT8_C( 0), INT8_C( 0), INT8_C( 117), -INT8_C( 29), INT8_C( 0), INT8_C( 0), INT8_C( 120), -INT8_C( 76), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 108), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 74) } }, { UINT64_C(16669474327693606013), { -INT8_C( 69), -INT8_C( 91), -INT8_C( 28), -INT8_C( 65), -INT8_C( 120), -INT8_C( 65), -INT8_C( 99), INT8_C( 19), INT8_C( 37), INT8_C( 81), INT8_C( 19), -INT8_C( 50), -INT8_C( 50), -INT8_C( 117), -INT8_C( 100), INT8_C( 81), INT8_C( 59), INT8_C( 110), -INT8_C( 17), INT8_C( 71), INT8_C( 9), INT8_C( 121), -INT8_C( 37), -INT8_C( 121), INT8_C( 54), -INT8_C( 121), -INT8_C( 99), -INT8_C( 53), INT8_C( 101), -INT8_C( 14), -INT8_C( 77), INT8_C( 32), -INT8_C( 105), -INT8_C( 105), -INT8_C( 33), INT8_C( 32), INT8_C( 86), INT8_C( 125), INT8_C( 51), INT8_C( 123), -INT8_C( 50), INT8_C( 71), INT8_C( 73), -INT8_C( 100), -INT8_C( 46), -INT8_C( 26), -INT8_C( 18), INT8_C( 13), INT8_C( 84), -INT8_C( 35), INT8_C( 85), INT8_C( 94), INT8_C( 86), INT8_C( 48), -INT8_C( 27), -INT8_C( 116), -INT8_C( 73), -INT8_C( 126), INT8_C( 88), INT8_C( 28), INT8_C( 117), INT8_C( 11), INT8_C( 60), INT8_C( 12) }, { -INT8_C( 94), INT8_C( 28), INT8_C( 44), -INT8_C( 8), -INT8_C( 103), INT8_C( 96), INT8_C( 115), INT8_C( 103), -INT8_C( 89), -INT8_C( 67), INT8_C( 4), INT8_C( 121), -INT8_C( 93), -INT8_C( 14), -INT8_C( 121), -INT8_C( 9), -INT8_C( 49), -INT8_C( 36), INT8_C( 85), INT8_C( 37), INT8_C( 12), INT8_C( 58), -INT8_C( 78), -INT8_C( 60), -INT8_C( 67), INT8_C( 10), -INT8_C( 32), INT8_C( 50), INT8_C( 21), INT8_C( 29), INT8_C( 62), -INT8_C( 73), INT8_C( 57), INT8_C( 107), -INT8_C( 81), -INT8_C( 46), -INT8_C( 53), INT8_C( 34), INT8_C( 57), INT8_C( 114), -INT8_C( 33), INT8_C( 61), -INT8_C( 21), -INT8_C( 126), INT8_C( 47), INT8_C( 114), INT8_C( 122), -INT8_C( 2), INT8_C( 78), -INT8_C( 49), INT8_C( 36), INT8_C( 91), INT8_C( 10), -INT8_C( 42), INT8_C( 31), -INT8_C( 57), -INT8_C( 32), -INT8_C( 1), -INT8_C( 7), -INT8_C( 11), INT8_C( 28), INT8_C( 55), -INT8_C( 84), INT8_C( 85) }, { -INT8_C( 11), INT8_C( 0), -INT8_C( 53), INT8_C( 85), -INT8_C( 89), INT8_C( 85), INT8_C( 29), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 37), -INT8_C( 121), -INT8_C( 121), INT8_C( 121), INT8_C( 0), -INT8_C( 36), -INT8_C( 11), INT8_C( 122), INT8_C( 0), INT8_C( 103), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 103), INT8_C( 0), INT8_C( 103), INT8_C( 29), INT8_C( 0), INT8_C( 34), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 60), INT8_C( 0), -INT8_C( 73), INT8_C( 0), -INT8_C( 78), INT8_C( 0), INT8_C( 0), -INT8_C( 11), INT8_C( 0), INT8_C( 103), -INT8_C( 67), INT8_C( 21), INT8_C( 85), INT8_C( 0), INT8_C( 122), -INT8_C( 14), INT8_C( 12), INT8_C( 0), INT8_C( 58), INT8_C( 0), -INT8_C( 78), INT8_C( 0), INT8_C( 34), INT8_C( 0), -INT8_C( 57), INT8_C( 44), -INT8_C( 67), INT8_C( 0), INT8_C( 0), INT8_C( 121), INT8_C( 28), -INT8_C( 93) } }, { UINT64_C( 6764232356663876514), { -INT8_C( 98), -INT8_C( 53), -INT8_C( 33), -INT8_C( 50), INT8_C( 61), INT8_C( 89), -INT8_C( 52), -INT8_C( 116), INT8_C( 41), -INT8_C( 16), -INT8_C( 25), INT8_C( 51), -INT8_C( 58), INT8_C( 6), -INT8_C( 6), -INT8_C( 90), INT8_C( 5), -INT8_C( 13), -INT8_C( 101), INT8_C( 34), INT8_C( 42), INT8_C( 71), INT8_C( 119), -INT8_C( 51), -INT8_C( 94), -INT8_C( 97), INT8_C( 58), INT8_C( 32), INT8_C( 0), INT8_C( 26), INT8_C( 125), -INT8_C( 98), -INT8_C( 27), INT8_C( 92), INT8_C( 108), INT8_C( 34), -INT8_C( 74), INT8_C( 57), -INT8_C( 82), -INT8_C( 33), INT8_C( 41), -INT8_C( 107), INT8_C( 18), -INT8_C( 16), -INT8_C( 101), INT8_C( 12), -INT8_C( 106), -INT8_C( 95), -INT8_C( 1), INT8_C( 50), -INT8_C( 61), INT8_C( 41), INT8_C( 121), INT8_C( 58), -INT8_C( 10), INT8_C( 28), -INT8_C( 39), INT8_C( 49), INT8_C( 60), -INT8_C( 39), INT8_C( 75), -INT8_C( 71), INT8_C( 120), INT8_C( 48) }, { INT8_C( 21), -INT8_C( 28), INT8_C( 82), -INT8_C( 53), INT8_C( 29), INT8_C( 1), -INT8_C( 86), INT8_C( 71), -INT8_C( 106), -INT8_C( 68), INT8_C( 55), INT8_C( 50), -INT8_C( 56), -INT8_C( 51), -INT8_C( 45), -INT8_C( 57), -INT8_C( 1), -INT8_C( 106), -INT8_C( 15), INT8_C( 121), -INT8_C( 48), -INT8_C( 25), -INT8_C( 107), -INT8_C( 86), INT8_C( 24), -INT8_C( 47), -INT8_C( 125), INT8_C( 99), -INT8_C( 118), -INT8_C( 5), -INT8_C( 109), -INT8_C( 97), -INT8_C( 32), -INT8_C( 26), INT8_C( 107), -INT8_C( 3), -INT8_C( 25), INT8_C( 21), INT8_C( 68), INT8_C( 125), -INT8_C( 46), INT8_C( 123), -INT8_C( 81), -INT8_C( 102), INT8_C( 73), -INT8_C( 126), INT8_C( 98), INT8_C( 72), INT8_C( 24), INT8_C( 83), -INT8_C( 63), -INT8_C( 23), INT8_C( 58), INT8_C( 86), -INT8_C( 109), INT8_C( 83), INT8_C( 39), INT8_C( 22), -INT8_C( 74), -INT8_C( 79), INT8_C( 18), INT8_C( 74), INT8_C( 81), -INT8_C( 14) }, { INT8_C( 0), INT8_C( 50), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 47), INT8_C( 0), -INT8_C( 56), INT8_C( 123), INT8_C( 24), INT8_C( 0), -INT8_C( 23), -INT8_C( 86), INT8_C( 0), -INT8_C( 74), INT8_C( 0), INT8_C( 1), -INT8_C( 23), INT8_C( 99), INT8_C( 0), INT8_C( 0), INT8_C( 71), INT8_C( 0), INT8_C( 0), INT8_C( 107), INT8_C( 0), -INT8_C( 74), -INT8_C( 32), INT8_C( 0), -INT8_C( 125), INT8_C( 74), INT8_C( 0), INT8_C( 21), INT8_C( 0), INT8_C( 73), INT8_C( 107), -INT8_C( 109), INT8_C( 22), INT8_C( 98), INT8_C( 0), INT8_C( 123), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 56), -INT8_C( 107), INT8_C( 0), -INT8_C( 14), -INT8_C( 63), -INT8_C( 53), INT8_C( 123), INT8_C( 22), INT8_C( 0), -INT8_C( 109), -INT8_C( 118), -INT8_C( 47), INT8_C( 0), INT8_C( 18), -INT8_C( 47), INT8_C( 50), INT8_C( 0), INT8_C( 39), INT8_C( 0) } }, { UINT64_C(11787104197389827120), { -INT8_C( 81), INT8_C( 68), INT8_C( 62), -INT8_C( 8), -INT8_C( 58), -INT8_C( 96), INT8_C( 65), -INT8_C( 33), -INT8_C( 13), INT8_C( 2), -INT8_C( 56), INT8_C( 45), INT8_C( 89), INT8_C( 91), INT8_MIN, INT8_MIN, INT8_C( 113), INT8_C( 55), INT8_C( 50), -INT8_C( 125), -INT8_C( 127), -INT8_C( 125), INT8_C( 117), -INT8_C( 79), INT8_C( 63), INT8_C( 101), -INT8_C( 56), INT8_C( 16), -INT8_C( 103), INT8_C( 92), -INT8_C( 76), INT8_C( 72), -INT8_C( 96), -INT8_C( 14), INT8_C( 65), INT8_C( 103), -INT8_C( 110), -INT8_C( 126), INT8_C( 70), -INT8_C( 123), -INT8_C( 124), INT8_C( 14), -INT8_C( 78), -INT8_C( 35), INT8_C( 105), INT8_C( 51), INT8_C( 94), -INT8_C( 38), INT8_C( 106), -INT8_C( 112), INT8_C( 94), -INT8_C( 21), INT8_C( 19), -INT8_C( 45), -INT8_C( 100), INT8_C( 82), INT8_C( 56), INT8_C( 100), INT8_C( 98), -INT8_C( 47), -INT8_C( 64), INT8_C( 22), INT8_C( 26), INT8_C( 97) }, { INT8_C( 8), INT8_C( 91), -INT8_C( 56), -INT8_C( 102), -INT8_C( 35), INT8_C( 14), INT8_C( 31), INT8_C( 97), INT8_C( 28), -INT8_C( 46), INT8_C( 63), -INT8_C( 123), INT8_C( 5), -INT8_C( 99), INT8_C( 95), INT8_C( 111), INT8_C( 45), -INT8_C( 67), INT8_C( 90), INT8_C( 64), -INT8_C( 111), -INT8_C( 10), -INT8_C( 110), -INT8_C( 55), INT8_C( 90), -INT8_C( 12), -INT8_C( 101), INT8_C( 26), INT8_C( 11), -INT8_C( 75), INT8_C( 123), INT8_C( 19), INT8_C( 16), INT8_C( 67), -INT8_C( 82), -INT8_C( 19), INT8_C( 81), -INT8_C( 51), INT8_C( 78), INT8_C( 109), -INT8_C( 97), -INT8_C( 115), -INT8_C( 14), -INT8_C( 92), INT8_C( 42), INT8_C( 82), INT8_C( 19), INT8_C( 87), INT8_C( 15), INT8_C( 109), -INT8_C( 105), -INT8_C( 96), INT8_C( 99), INT8_C( 41), INT8_C( 106), -INT8_C( 67), INT8_C( 30), INT8_C( 5), -INT8_C( 40), INT8_C( 41), -INT8_C( 70), INT8_C( 83), INT8_C( 60), -INT8_C( 54) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 31), INT8_C( 16), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 28), INT8_C( 82), -INT8_C( 12), INT8_C( 26), INT8_C( 0), INT8_C( 8), INT8_C( 109), -INT8_C( 67), -INT8_C( 105), -INT8_C( 102), INT8_C( 0), -INT8_C( 102), INT8_C( 41), INT8_C( 109), -INT8_C( 54), -INT8_C( 51), INT8_C( 28), INT8_C( 0), -INT8_C( 12), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 16), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 90), INT8_C( 0), INT8_C( 31), INT8_C( 14), INT8_C( 0), INT8_C( 0), -INT8_C( 105), INT8_C( 0), -INT8_C( 115), -INT8_C( 96), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 123), INT8_C( 0), INT8_C( 64), INT8_C( 0), INT8_C( 0), INT8_C( 90), INT8_C( 30), INT8_C( 81), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 110), INT8_C( 0), INT8_C( 67) } }, { UINT64_C( 6293223821033335447), { -INT8_C( 109), INT8_C( 72), -INT8_C( 4), -INT8_C( 67), -INT8_C( 102), INT8_C( 15), INT8_C( 21), -INT8_C( 86), INT8_C( 125), -INT8_C( 84), INT8_C( 74), -INT8_C( 32), -INT8_C( 42), -INT8_C( 76), -INT8_C( 98), -INT8_C( 12), -INT8_C( 71), INT8_C( 118), INT8_C( 29), INT8_C( 115), -INT8_C( 55), INT8_C( 89), INT8_C( 61), INT8_C( 96), INT8_C( 68), -INT8_C( 12), INT8_C( 73), -INT8_C( 4), -INT8_C( 6), -INT8_C( 97), INT8_C( 83), -INT8_C( 115), -INT8_C( 25), INT8_C( 79), INT8_C( 74), -INT8_C( 126), INT8_C( 95), INT8_C( 95), INT8_C( 44), -INT8_C( 36), INT8_C( 12), INT8_C( 118), -INT8_C( 68), -INT8_C( 30), INT8_C( 43), INT8_C( 90), -INT8_C( 42), -INT8_C( 28), -INT8_C( 48), -INT8_C( 13), INT8_C( 88), -INT8_C( 102), INT8_C( 76), -INT8_C( 107), -INT8_C( 6), -INT8_C( 112), -INT8_C( 118), INT8_C( 67), -INT8_C( 116), -INT8_C( 124), -INT8_C( 30), -INT8_C( 32), INT8_C( 17), -INT8_C( 54) }, { INT8_C( 47), INT8_C( 91), INT8_C( 76), -INT8_C( 114), -INT8_C( 69), INT8_C( 120), INT8_C( 106), -INT8_C( 57), -INT8_C( 18), INT8_C( 39), -INT8_C( 87), INT8_C( 25), -INT8_C( 127), INT8_MAX, -INT8_C( 2), INT8_C( 82), INT8_C( 114), INT8_C( 86), -INT8_C( 20), -INT8_C( 66), -INT8_C( 21), -INT8_C( 26), INT8_C( 79), INT8_C( 117), INT8_C( 42), -INT8_C( 37), -INT8_C( 7), INT8_C( 12), -INT8_C( 69), INT8_C( 10), -INT8_C( 42), -INT8_C( 21), INT8_C( 102), INT8_C( 34), INT8_C( 121), INT8_C( 33), -INT8_C( 102), -INT8_C( 28), -INT8_C( 24), -INT8_C( 119), INT8_C( 11), -INT8_C( 111), -INT8_C( 94), -INT8_C( 116), INT8_C( 16), -INT8_C( 96), -INT8_C( 34), -INT8_C( 126), -INT8_C( 10), -INT8_C( 54), INT8_C( 64), -INT8_C( 30), -INT8_C( 79), -INT8_C( 113), INT8_C( 87), -INT8_C( 37), INT8_C( 107), INT8_C( 81), -INT8_C( 25), INT8_C( 38), INT8_C( 91), -INT8_C( 66), INT8_C( 17), -INT8_C( 63) }, { -INT8_C( 66), -INT8_C( 18), INT8_C( 91), INT8_C( 0), -INT8_C( 7), INT8_C( 0), INT8_C( 0), -INT8_C( 94), INT8_C( 0), INT8_C( 16), INT8_C( 0), INT8_C( 102), INT8_C( 0), -INT8_C( 79), -INT8_C( 42), -INT8_C( 79), INT8_C( 81), INT8_C( 87), INT8_C( 10), INT8_C( 0), INT8_C( 39), -INT8_C( 37), INT8_C( 0), INT8_C( 102), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 91), INT8_C( 0), -INT8_C( 21), -INT8_C( 66), INT8_MAX, INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 76), -INT8_C( 21), -INT8_C( 21), INT8_C( 0), -INT8_C( 69), -INT8_C( 127), INT8_C( 0), INT8_C( 91), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 30), INT8_C( 42), INT8_C( 0), -INT8_C( 127), INT8_C( 0), -INT8_C( 25), INT8_C( 0), -INT8_C( 87), -INT8_C( 114), -INT8_C( 127), INT8_C( 0), INT8_C( 121), INT8_C( 0), INT8_C( 86), INT8_C( 0) } }, { UINT64_C( 8792374952702675936), { INT8_C( 91), -INT8_C( 90), INT8_C( 6), INT8_C( 107), INT8_C( 71), -INT8_C( 27), -INT8_C( 19), INT8_C( 61), -INT8_C( 81), INT8_C( 46), INT8_C( 31), INT8_C( 96), -INT8_C( 67), INT8_C( 119), INT8_C( 59), INT8_C( 40), -INT8_C( 56), INT8_C( 35), INT8_C( 79), INT8_C( 35), -INT8_C( 31), INT8_C( 96), -INT8_C( 27), -INT8_C( 63), -INT8_C( 21), -INT8_C( 57), INT8_C( 60), INT8_C( 90), -INT8_C( 110), INT8_C( 64), -INT8_C( 44), -INT8_C( 19), -INT8_C( 25), -INT8_C( 37), INT8_C( 89), INT8_C( 46), -INT8_C( 64), INT8_C( 70), INT8_C( 107), INT8_C( 111), INT8_C( 116), -INT8_C( 117), -INT8_C( 48), INT8_C( 50), INT8_C( 2), INT8_C( 11), INT8_C( 90), -INT8_C( 54), INT8_C( 46), -INT8_C( 87), -INT8_C( 19), INT8_C( 15), INT8_C( 10), -INT8_C( 46), -INT8_C( 47), -INT8_C( 11), -INT8_C( 102), INT8_C( 13), INT8_C( 80), INT8_C( 44), INT8_C( 78), INT8_C( 36), INT8_C( 25), INT8_C( 53) }, { -INT8_C( 1), INT8_C( 114), INT8_C( 99), -INT8_C( 65), -INT8_C( 71), -INT8_C( 50), INT8_C( 47), INT8_C( 45), INT8_C( 89), -INT8_C( 1), INT8_C( 95), INT8_C( 91), INT8_C( 10), -INT8_C( 70), INT8_C( 37), INT8_C( 57), INT8_C( 99), INT8_C( 19), INT8_C( 72), INT8_C( 109), -INT8_C( 27), INT8_C( 25), INT8_C( 99), INT8_MAX, INT8_C( 39), -INT8_C( 77), -INT8_C( 85), INT8_C( 117), -INT8_C( 41), -INT8_C( 59), -INT8_C( 86), -INT8_C( 41), INT8_C( 55), INT8_C( 13), -INT8_C( 106), -INT8_C( 16), -INT8_C( 37), -INT8_C( 59), INT8_C( 30), INT8_C( 53), -INT8_C( 60), INT8_C( 125), -INT8_C( 112), -INT8_C( 49), INT8_C( 55), -INT8_C( 74), INT8_C( 8), -INT8_C( 101), -INT8_C( 55), INT8_C( 80), INT8_C( 8), -INT8_C( 82), INT8_C( 106), INT8_C( 107), INT8_C( 46), -INT8_C( 111), INT8_C( 30), -INT8_C( 39), INT8_C( 6), -INT8_C( 10), -INT8_C( 98), -INT8_C( 80), -INT8_C( 51), -INT8_C( 42) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 59), -INT8_C( 74), -INT8_C( 80), -INT8_C( 101), INT8_C( 8), INT8_C( 0), INT8_C( 55), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 60), INT8_C( 0), -INT8_C( 16), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 55), -INT8_C( 59), INT8_C( 114), -INT8_C( 49), INT8_C( 45), INT8_C( 0), -INT8_C( 85), INT8_C( 72), -INT8_C( 1), -INT8_C( 27), INT8_C( 0), INT8_C( 53), INT8_C( 117), -INT8_C( 77), INT8_C( 8), INT8_C( 0), INT8_C( 47), -INT8_C( 49), INT8_C( 0), INT8_C( 0), INT8_C( 91), INT8_C( 0), INT8_C( 8), INT8_C( 0), INT8_C( 0), -INT8_C( 85), INT8_C( 95), INT8_C( 0), INT8_C( 0), -INT8_C( 74), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 70), INT8_C( 0), INT8_C( 55), INT8_C( 37), -INT8_C( 37), -INT8_C( 77), INT8_C( 0) } }, { UINT64_C(17135603024522535869), { INT8_C( 98), INT8_C( 94), -INT8_C( 68), -INT8_C( 103), INT8_C( 20), -INT8_C( 60), INT8_C( 52), -INT8_C( 35), INT8_C( 21), INT8_C( 61), -INT8_C( 117), INT8_MAX, -INT8_C( 88), -INT8_C( 71), INT8_C( 16), -INT8_C( 57), -INT8_C( 109), INT8_C( 22), -INT8_C( 67), INT8_C( 49), -INT8_C( 58), -INT8_C( 118), INT8_C( 7), -INT8_C( 125), -INT8_C( 19), -INT8_C( 50), INT8_C( 27), INT8_C( 22), -INT8_C( 78), -INT8_C( 23), INT8_C( 4), INT8_C( 20), INT8_C( 71), -INT8_C( 64), -INT8_C( 82), INT8_C( 91), -INT8_C( 123), -INT8_C( 30), INT8_C( 56), -INT8_C( 102), INT8_C( 31), -INT8_C( 61), INT8_C( 25), -INT8_C( 56), INT8_C( 125), INT8_C( 41), -INT8_C( 113), INT8_C( 16), INT8_C( 63), INT8_C( 76), INT8_C( 65), INT8_C( 5), -INT8_C( 42), INT8_C( 73), -INT8_C( 120), -INT8_C( 61), INT8_C( 23), -INT8_C( 93), -INT8_C( 38), -INT8_C( 55), -INT8_C( 116), -INT8_C( 34), -INT8_C( 34), -INT8_C( 45) }, { -INT8_C( 98), -INT8_C( 116), INT8_C( 46), INT8_C( 35), INT8_C( 110), INT8_C( 102), -INT8_C( 67), -INT8_C( 114), INT8_C( 42), -INT8_C( 42), INT8_C( 86), -INT8_C( 89), -INT8_C( 1), -INT8_C( 27), -INT8_C( 73), INT8_C( 62), INT8_C( 49), -INT8_C( 8), INT8_C( 67), INT8_C( 7), INT8_C( 65), -INT8_C( 53), -INT8_C( 54), INT8_C( 88), INT8_C( 111), -INT8_C( 92), INT8_C( 34), -INT8_C( 5), -INT8_C( 126), INT8_C( 0), -INT8_C( 49), INT8_C( 33), -INT8_C( 116), -INT8_C( 3), INT8_C( 68), -INT8_C( 6), INT8_C( 100), INT8_C( 2), -INT8_C( 120), -INT8_C( 114), -INT8_C( 40), -INT8_C( 34), INT8_C( 53), -INT8_C( 40), -INT8_C( 61), -INT8_C( 20), INT8_C( 22), -INT8_C( 12), -INT8_C( 28), INT8_C( 90), -INT8_C( 5), INT8_C( 38), INT8_C( 37), -INT8_C( 58), INT8_C( 126), -INT8_C( 108), INT8_C( 106), -INT8_C( 96), -INT8_C( 112), -INT8_C( 19), -INT8_C( 96), INT8_C( 95), INT8_C( 14), INT8_C( 44) }, { INT8_C( 68), INT8_C( 0), -INT8_C( 96), -INT8_C( 92), INT8_C( 65), INT8_C( 110), INT8_C( 0), INT8_C( 0), -INT8_C( 53), INT8_C( 95), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 96), INT8_C( 49), INT8_C( 0), INT8_C( 0), -INT8_C( 54), INT8_C( 95), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 114), INT8_C( 35), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 54), -INT8_C( 5), INT8_C( 0), INT8_C( 0), INT8_C( 65), -INT8_C( 114), INT8_C( 0), INT8_C( 0), -INT8_C( 5), INT8_C( 0), INT8_C( 68), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 92), INT8_C( 0), INT8_C( 0), -INT8_C( 34), INT8_C( 62), INT8_C( 49), INT8_C( 44), INT8_C( 0), -INT8_C( 116), INT8_C( 102), INT8_C( 0), INT8_C( 0), INT8_C( 42), INT8_C( 35), INT8_C( 88), INT8_C( 0), INT8_C( 34), -INT8_C( 42), INT8_C( 0), -INT8_C( 49), -INT8_C( 49), INT8_C( 7) } }, { UINT64_C( 3264739558426104412), { -INT8_C( 114), -INT8_C( 125), INT8_C( 5), INT8_C( 81), INT8_C( 111), INT8_C( 27), INT8_C( 70), INT8_C( 84), INT8_C( 117), INT8_C( 65), INT8_C( 122), -INT8_C( 101), INT8_C( 7), -INT8_C( 8), INT8_C( 47), INT8_C( 114), -INT8_C( 103), -INT8_C( 65), INT8_C( 95), INT8_C( 57), INT8_C( 30), INT8_C( 109), INT8_C( 102), INT8_C( 123), -INT8_C( 65), -INT8_C( 115), INT8_C( 59), INT8_C( 20), INT8_C( 60), -INT8_C( 118), INT8_C( 65), -INT8_C( 54), INT8_C( 13), INT8_C( 70), INT8_C( 28), INT8_C( 125), INT8_C( 97), INT8_C( 98), -INT8_C( 47), -INT8_C( 41), -INT8_C( 93), INT8_C( 75), INT8_C( 114), -INT8_C( 85), INT8_C( 67), -INT8_C( 95), INT8_C( 29), -INT8_C( 36), INT8_C( 97), INT8_C( 124), INT8_C( 22), INT8_MAX, -INT8_C( 23), INT8_C( 124), -INT8_C( 6), -INT8_C( 88), INT8_C( 9), INT8_C( 54), -INT8_C( 68), INT8_C( 69), -INT8_C( 64), -INT8_C( 3), INT8_C( 16), -INT8_C( 51) }, { INT8_C( 67), INT8_C( 44), INT8_C( 74), -INT8_C( 91), -INT8_C( 114), INT8_C( 27), INT8_C( 124), INT8_C( 49), INT8_C( 102), -INT8_C( 18), -INT8_C( 36), -INT8_C( 86), -INT8_C( 113), -INT8_C( 7), -INT8_C( 122), -INT8_C( 16), INT8_C( 117), -INT8_C( 100), INT8_C( 112), INT8_C( 94), INT8_C( 24), INT8_C( 106), INT8_C( 7), INT8_C( 33), -INT8_C( 96), -INT8_C( 61), INT8_C( 103), INT8_C( 96), -INT8_C( 63), INT8_C( 119), INT8_C( 46), INT8_C( 4), -INT8_C( 93), INT8_C( 120), -INT8_C( 87), INT8_C( 49), -INT8_C( 108), INT8_C( 37), INT8_C( 98), -INT8_C( 6), INT8_C( 19), INT8_C( 63), -INT8_C( 92), -INT8_C( 93), INT8_C( 56), INT8_C( 43), -INT8_C( 109), -INT8_C( 82), -INT8_C( 57), INT8_C( 3), INT8_C( 12), -INT8_C( 32), INT8_C( 110), INT8_C( 19), INT8_C( 1), INT8_C( 14), -INT8_C( 41), INT8_C( 104), INT8_C( 111), -INT8_C( 104), -INT8_C( 33), -INT8_C( 99), -INT8_C( 100), -INT8_C( 126) }, { INT8_C( 0), INT8_C( 0), INT8_C( 27), -INT8_C( 100), -INT8_C( 82), INT8_C( 0), INT8_C( 124), INT8_C( 0), INT8_C( 0), INT8_C( 44), INT8_C( 0), INT8_C( 0), INT8_C( 49), INT8_C( 0), -INT8_C( 82), INT8_C( 0), -INT8_C( 61), -INT8_C( 126), INT8_C( 4), INT8_C( 0), INT8_C( 0), INT8_C( 43), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 44), -INT8_C( 36), INT8_C( 0), INT8_C( 0), -INT8_C( 63), INT8_C( 0), INT8_C( 120), INT8_C( 0), -INT8_C( 100), INT8_C( 0), INT8_C( 49), -INT8_C( 86), INT8_C( 12), -INT8_C( 93), INT8_C( 0), INT8_C( 120), INT8_C( 0), -INT8_C( 63), INT8_C( 0), -INT8_C( 33), INT8_C( 7), -INT8_C( 126), INT8_C( 0), INT8_C( 0), INT8_C( 111), INT8_C( 0), -INT8_C( 18), INT8_C( 0), -INT8_C( 33), INT8_C( 27), INT8_C( 0), -INT8_C( 99), INT8_C( 0), INT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i idx = simde_mm512_loadu_epi8(test_vec[i].idx); simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i r = simde_mm512_maskz_permutexvar_epi8(test_vec[i].k, idx, a); simde_test_x86_assert_equal_i8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask64 k = simde_test_x86_random_mmask64(); simde__m512i idx = simde_test_x86_random_i8x64(); simde__m512i a = simde_test_x86_random_i8x64(); simde__m512i r = simde_mm512_maskz_permutexvar_epi8(k, idx, a); simde_test_x86_write_mmask64(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x64(2, idx, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x64(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_permutexvar_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i idx; simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C( 4022993628330696330), INT64_C( -564047204985781920), INT64_C( 4934063986128071877), INT64_C( 8258886799903261224), INT64_C( 5245738308211416456), INT64_C( 8690736315259258337), INT64_C(-5183161890921602420), INT64_C(-2495336383094170141)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 355.11), SIMDE_FLOAT64_C( -787.72), SIMDE_FLOAT64_C( 472.82), SIMDE_FLOAT64_C( -703.51), SIMDE_FLOAT64_C( -202.49), SIMDE_FLOAT64_C( -470.36), SIMDE_FLOAT64_C( 966.37), SIMDE_FLOAT64_C( 135.20)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -470.36), SIMDE_FLOAT64_C( 135.20), SIMDE_FLOAT64_C( 472.82), SIMDE_FLOAT64_C( 135.20), SIMDE_FLOAT64_C( 135.20), SIMDE_FLOAT64_C( 966.37), SIMDE_FLOAT64_C( -703.51), SIMDE_FLOAT64_C( -202.49)) }, { simde_mm512_set_epi64(INT64_C( 1295774678670654457), INT64_C(-8851107363323835123), INT64_C( 2369486750103851747), INT64_C(-5139586436110975467), INT64_C(-8115609027568940125), INT64_C( 3504612124823893047), INT64_C(-7514888466798804666), INT64_C( 9113506312589344178)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 193.01), SIMDE_FLOAT64_C( -435.27), SIMDE_FLOAT64_C( -84.06), SIMDE_FLOAT64_C( 298.40), SIMDE_FLOAT64_C( 208.07), SIMDE_FLOAT64_C( -94.60), SIMDE_FLOAT64_C( 834.28), SIMDE_FLOAT64_C( 260.50)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 834.28), SIMDE_FLOAT64_C( -84.06), SIMDE_FLOAT64_C( 208.07), SIMDE_FLOAT64_C( -84.06), SIMDE_FLOAT64_C( 208.07), SIMDE_FLOAT64_C( 193.01), SIMDE_FLOAT64_C( -435.27), SIMDE_FLOAT64_C( -94.60)) }, { simde_mm512_set_epi64(INT64_C(-3094219001013742557), INT64_C(-3379016320921474793), INT64_C( 7772273849745001049), INT64_C(-4229480058937372017), INT64_C( 8318730560275653847), INT64_C(-2664412856586094061), INT64_C( 2083707536546841162), INT64_C( 5404230241318444880)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -2.60), SIMDE_FLOAT64_C( -38.88), SIMDE_FLOAT64_C( 165.88), SIMDE_FLOAT64_C( 218.73), SIMDE_FLOAT64_C( 375.27), SIMDE_FLOAT64_C( -966.90), SIMDE_FLOAT64_C( -512.98), SIMDE_FLOAT64_C( -737.78)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 375.27), SIMDE_FLOAT64_C( -2.60), SIMDE_FLOAT64_C( -512.98), SIMDE_FLOAT64_C( -2.60), SIMDE_FLOAT64_C( -2.60), SIMDE_FLOAT64_C( 375.27), SIMDE_FLOAT64_C( -966.90), SIMDE_FLOAT64_C( -737.78)) }, { simde_mm512_set_epi64(INT64_C( 2522518958303333112), INT64_C(-1668307566098600867), INT64_C( 8306832211054389426), INT64_C(-4135341282024622606), INT64_C(-7922172549839933132), INT64_C( 90826243433254935), INT64_C( 1037923706586637130), INT64_C( 5568688997300093349)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -179.98), SIMDE_FLOAT64_C( 258.23), SIMDE_FLOAT64_C( 246.22), SIMDE_FLOAT64_C( 97.85), SIMDE_FLOAT64_C( 666.32), SIMDE_FLOAT64_C( 364.80), SIMDE_FLOAT64_C( 759.27), SIMDE_FLOAT64_C( -524.19)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -524.19), SIMDE_FLOAT64_C( 246.22), SIMDE_FLOAT64_C( 364.80), SIMDE_FLOAT64_C( 364.80), SIMDE_FLOAT64_C( 97.85), SIMDE_FLOAT64_C( -179.98), SIMDE_FLOAT64_C( 364.80), SIMDE_FLOAT64_C( 246.22)) }, { simde_mm512_set_epi64(INT64_C(-6876215301736363293), INT64_C(-2253243373865166954), INT64_C( 5866706473820467911), INT64_C(-1945184283153250111), INT64_C(-6043663531296462836), INT64_C(-3201199251206898425), INT64_C(-7517867743898200758), INT64_C( 5023666877462679332)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -846.10), SIMDE_FLOAT64_C( -248.07), SIMDE_FLOAT64_C( -411.92), SIMDE_FLOAT64_C( -88.91), SIMDE_FLOAT64_C( 481.68), SIMDE_FLOAT64_C( 170.00), SIMDE_FLOAT64_C( -341.91), SIMDE_FLOAT64_C( 366.57)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 481.68), SIMDE_FLOAT64_C( -248.07), SIMDE_FLOAT64_C( -846.10), SIMDE_FLOAT64_C( -341.91), SIMDE_FLOAT64_C( -88.91), SIMDE_FLOAT64_C( -846.10), SIMDE_FLOAT64_C( 170.00), SIMDE_FLOAT64_C( -88.91)) }, { simde_mm512_set_epi64(INT64_C( 5698026186558744964), INT64_C( 6269499859520580584), INT64_C( 7385991043015762011), INT64_C( 2173549174497415259), INT64_C( 135432210503006619), INT64_C(-7059566968128636366), INT64_C(-1295026765047609725), INT64_C( 5447800525707046939)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 197.25), SIMDE_FLOAT64_C( 358.04), SIMDE_FLOAT64_C( 348.19), SIMDE_FLOAT64_C( -974.37), SIMDE_FLOAT64_C( 713.78), SIMDE_FLOAT64_C( -342.76), SIMDE_FLOAT64_C( -632.29), SIMDE_FLOAT64_C( -698.47)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -974.37), SIMDE_FLOAT64_C( -698.47), SIMDE_FLOAT64_C( 713.78), SIMDE_FLOAT64_C( 713.78), SIMDE_FLOAT64_C( 713.78), SIMDE_FLOAT64_C( -342.76), SIMDE_FLOAT64_C( 713.78), SIMDE_FLOAT64_C( 713.78)) }, { simde_mm512_set_epi64(INT64_C(-7778482448656032654), INT64_C(-7388935565641111344), INT64_C( 2154583157079273400), INT64_C( 4649728279138736034), INT64_C( 1896125478609903946), INT64_C( 6795120210135498653), INT64_C(-8532964392806396349), INT64_C(-8044512602622188161)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 151.19), SIMDE_FLOAT64_C( 763.12), SIMDE_FLOAT64_C( 17.85), SIMDE_FLOAT64_C( 797.99), SIMDE_FLOAT64_C( 280.96), SIMDE_FLOAT64_C( 291.55), SIMDE_FLOAT64_C( -966.13), SIMDE_FLOAT64_C( -93.41)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 291.55), SIMDE_FLOAT64_C( -93.41), SIMDE_FLOAT64_C( -93.41), SIMDE_FLOAT64_C( 291.55), SIMDE_FLOAT64_C( 291.55), SIMDE_FLOAT64_C( 17.85), SIMDE_FLOAT64_C( 280.96), SIMDE_FLOAT64_C( 151.19)) }, { simde_mm512_set_epi64(INT64_C(-8313572030703954107), INT64_C( 8197482817575228316), INT64_C( 7734796813438689885), INT64_C(-7308014241195865956), INT64_C( 8618855955534148826), INT64_C(-1358620153905394442), INT64_C(-6300735262609682931), INT64_C(-6423460006708841158)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -747.31), SIMDE_FLOAT64_C( 206.83), SIMDE_FLOAT64_C( -77.03), SIMDE_FLOAT64_C( -852.95), SIMDE_FLOAT64_C( 918.21), SIMDE_FLOAT64_C( -446.06), SIMDE_FLOAT64_C( 363.23), SIMDE_FLOAT64_C( -141.96)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -77.03), SIMDE_FLOAT64_C( -852.95), SIMDE_FLOAT64_C( -77.03), SIMDE_FLOAT64_C( -852.95), SIMDE_FLOAT64_C( -446.06), SIMDE_FLOAT64_C( 206.83), SIMDE_FLOAT64_C( -77.03), SIMDE_FLOAT64_C( -446.06)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_permutexvar_pd(test_vec[i].idx, test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_permutexvar_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d src; simde__mmask8 k; simde__m512i idx; simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( 903.50), SIMDE_FLOAT64_C( -43.35), SIMDE_FLOAT64_C( 309.91), SIMDE_FLOAT64_C( 846.15), SIMDE_FLOAT64_C( -514.56), SIMDE_FLOAT64_C( -860.98), SIMDE_FLOAT64_C( -280.30), SIMDE_FLOAT64_C( 128.51)), UINT8_C( 87), simde_mm512_set_epi64(INT64_C( 4086853108457730066), INT64_C(-7738570880062900818), INT64_C(-5609503674875201288), INT64_C( 3966155248134972346), INT64_C( 6201510655001996332), INT64_C( 7633590894537872708), INT64_C(-5593693910291334810), INT64_C(-8884932670315115433)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 483.08), SIMDE_FLOAT64_C( 232.04), SIMDE_FLOAT64_C( 774.81), SIMDE_FLOAT64_C( -599.01), SIMDE_FLOAT64_C( 69.04), SIMDE_FLOAT64_C( -149.02), SIMDE_FLOAT64_C( 240.79), SIMDE_FLOAT64_C( -839.80)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 903.50), SIMDE_FLOAT64_C( 232.04), SIMDE_FLOAT64_C( 309.91), SIMDE_FLOAT64_C( -149.02), SIMDE_FLOAT64_C( -514.56), SIMDE_FLOAT64_C( -599.01), SIMDE_FLOAT64_C( 232.04), SIMDE_FLOAT64_C( 483.08)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 337.35), SIMDE_FLOAT64_C( -278.32), SIMDE_FLOAT64_C( -744.41), SIMDE_FLOAT64_C( 39.32), SIMDE_FLOAT64_C( 29.68), SIMDE_FLOAT64_C( -490.28), SIMDE_FLOAT64_C( 841.53), SIMDE_FLOAT64_C( 526.21)), UINT8_C(157), simde_mm512_set_epi64(INT64_C( 1516975282358243755), INT64_C(-3949523894747321163), INT64_C(-2989078375862773056), INT64_C(-4700117291684372957), INT64_C(-8784413934425613521), INT64_C( 961867877660623168), INT64_C( 2426510480636680010), INT64_C(-6612602987193650875)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -198.67), SIMDE_FLOAT64_C( 447.98), SIMDE_FLOAT64_C( -925.69), SIMDE_FLOAT64_C( 717.83), SIMDE_FLOAT64_C( -489.88), SIMDE_FLOAT64_C( -37.49), SIMDE_FLOAT64_C( -373.66), SIMDE_FLOAT64_C( -292.35)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -489.88), SIMDE_FLOAT64_C( -278.32), SIMDE_FLOAT64_C( -744.41), SIMDE_FLOAT64_C( -489.88), SIMDE_FLOAT64_C( -198.67), SIMDE_FLOAT64_C( -292.35), SIMDE_FLOAT64_C( 841.53), SIMDE_FLOAT64_C( -925.69)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -207.87), SIMDE_FLOAT64_C( -765.42), SIMDE_FLOAT64_C( 138.83), SIMDE_FLOAT64_C( 699.07), SIMDE_FLOAT64_C( -143.73), SIMDE_FLOAT64_C( 709.96), SIMDE_FLOAT64_C( -767.34), SIMDE_FLOAT64_C( -588.28)), UINT8_C( 75), simde_mm512_set_epi64(INT64_C(-1414228054518303181), INT64_C(-3038909907977133732), INT64_C(-7842471790453318316), INT64_C(-7256600765093102205), INT64_C( 3898178537456140670), INT64_C( -393151907512138120), INT64_C( 600214805061827669), INT64_C( 3163434753014979248)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -842.15), SIMDE_FLOAT64_C( 908.84), SIMDE_FLOAT64_C( -315.78), SIMDE_FLOAT64_C( -86.06), SIMDE_FLOAT64_C( -413.85), SIMDE_FLOAT64_C( 752.26), SIMDE_FLOAT64_C( 609.29), SIMDE_FLOAT64_C( 878.85)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -207.87), SIMDE_FLOAT64_C( -86.06), SIMDE_FLOAT64_C( 138.83), SIMDE_FLOAT64_C( 699.07), SIMDE_FLOAT64_C( 908.84), SIMDE_FLOAT64_C( 709.96), SIMDE_FLOAT64_C( -315.78), SIMDE_FLOAT64_C( 878.85)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -659.70), SIMDE_FLOAT64_C( 493.30), SIMDE_FLOAT64_C( 831.29), SIMDE_FLOAT64_C( -619.50), SIMDE_FLOAT64_C( 952.47), SIMDE_FLOAT64_C( -492.61), SIMDE_FLOAT64_C( -68.16), SIMDE_FLOAT64_C( 717.69)), UINT8_C( 23), simde_mm512_set_epi64(INT64_C( 6555696811272222802), INT64_C( -751557688731444383), INT64_C( 5324069355912068288), INT64_C( 5226578984858504214), INT64_C(-8776157931044543560), INT64_C(-1848672680316222475), INT64_C( 1658167909352451238), INT64_C( 377173394815185621)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 493.65), SIMDE_FLOAT64_C( 52.79), SIMDE_FLOAT64_C( 835.54), SIMDE_FLOAT64_C( -712.24), SIMDE_FLOAT64_C( 518.12), SIMDE_FLOAT64_C( -173.80), SIMDE_FLOAT64_C( 487.08), SIMDE_FLOAT64_C( 180.78)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -659.70), SIMDE_FLOAT64_C( 493.30), SIMDE_FLOAT64_C( 831.29), SIMDE_FLOAT64_C( 52.79), SIMDE_FLOAT64_C( 952.47), SIMDE_FLOAT64_C( 835.54), SIMDE_FLOAT64_C( 52.79), SIMDE_FLOAT64_C( 835.54)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -388.47), SIMDE_FLOAT64_C( -643.43), SIMDE_FLOAT64_C( -331.34), SIMDE_FLOAT64_C( 72.67), SIMDE_FLOAT64_C( -870.79), SIMDE_FLOAT64_C( -722.44), SIMDE_FLOAT64_C( 529.44), SIMDE_FLOAT64_C( -949.73)), UINT8_C( 46), simde_mm512_set_epi64(INT64_C( 5042844271761388948), INT64_C( 877966720713550779), INT64_C(-8757547308289839577), INT64_C( 6619480224799141474), INT64_C( 3171924723684651500), INT64_C( 1941135797030545610), INT64_C( 1935432241277000941), INT64_C( 5390015454023535429)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -783.00), SIMDE_FLOAT64_C( -102.18), SIMDE_FLOAT64_C( 960.00), SIMDE_FLOAT64_C( 22.93), SIMDE_FLOAT64_C( -395.13), SIMDE_FLOAT64_C( 145.63), SIMDE_FLOAT64_C( -149.04), SIMDE_FLOAT64_C( 214.37)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -388.47), SIMDE_FLOAT64_C( -643.43), SIMDE_FLOAT64_C( -783.00), SIMDE_FLOAT64_C( 72.67), SIMDE_FLOAT64_C( 22.93), SIMDE_FLOAT64_C( 145.63), SIMDE_FLOAT64_C( 960.00), SIMDE_FLOAT64_C( -949.73)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -607.82), SIMDE_FLOAT64_C( 296.47), SIMDE_FLOAT64_C( -327.04), SIMDE_FLOAT64_C( -23.06), SIMDE_FLOAT64_C( -95.21), SIMDE_FLOAT64_C( 10.75), SIMDE_FLOAT64_C( -668.43), SIMDE_FLOAT64_C( -210.00)), UINT8_C( 43), simde_mm512_set_epi64(INT64_C(-4824857292892203785), INT64_C( 6286668337562607931), INT64_C( 1432089847019206822), INT64_C(-2235254547542691893), INT64_C(-2975358417486477451), INT64_C( 5974528986311566652), INT64_C( 4798128784982043356), INT64_C(-3663239326212228984)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -777.05), SIMDE_FLOAT64_C( 621.95), SIMDE_FLOAT64_C( -116.18), SIMDE_FLOAT64_C( 909.84), SIMDE_FLOAT64_C( 696.35), SIMDE_FLOAT64_C( -644.31), SIMDE_FLOAT64_C( 486.53), SIMDE_FLOAT64_C( -235.48)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -607.82), SIMDE_FLOAT64_C( 296.47), SIMDE_FLOAT64_C( 621.95), SIMDE_FLOAT64_C( -23.06), SIMDE_FLOAT64_C( -116.18), SIMDE_FLOAT64_C( 10.75), SIMDE_FLOAT64_C( 909.84), SIMDE_FLOAT64_C( -235.48)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 383.31), SIMDE_FLOAT64_C( 641.22), SIMDE_FLOAT64_C( -747.07), SIMDE_FLOAT64_C( -762.67), SIMDE_FLOAT64_C( 744.11), SIMDE_FLOAT64_C( 350.11), SIMDE_FLOAT64_C( 409.27), SIMDE_FLOAT64_C( 481.83)), UINT8_C(217), simde_mm512_set_epi64(INT64_C( 8817045194671758320), INT64_C(-5234965963681749811), INT64_C(-5331803015084564567), INT64_C(-2592115690296560951), INT64_C( 1248919004007478956), INT64_C( 6138870327161964525), INT64_C(-3131027839562886620), INT64_C(-2317534169293970587)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -251.41), SIMDE_FLOAT64_C( 223.24), SIMDE_FLOAT64_C( -528.18), SIMDE_FLOAT64_C( 813.86), SIMDE_FLOAT64_C( 925.24), SIMDE_FLOAT64_C( 849.34), SIMDE_FLOAT64_C( 112.68), SIMDE_FLOAT64_C( -96.70)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -96.70), SIMDE_FLOAT64_C( -528.18), SIMDE_FLOAT64_C( -747.07), SIMDE_FLOAT64_C( 112.68), SIMDE_FLOAT64_C( 813.86), SIMDE_FLOAT64_C( 350.11), SIMDE_FLOAT64_C( 409.27), SIMDE_FLOAT64_C( -528.18)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -653.71), SIMDE_FLOAT64_C( 371.73), SIMDE_FLOAT64_C( 757.18), SIMDE_FLOAT64_C( 214.84), SIMDE_FLOAT64_C( 830.24), SIMDE_FLOAT64_C( 903.53), SIMDE_FLOAT64_C( -831.08), SIMDE_FLOAT64_C( 815.07)), UINT8_C(120), simde_mm512_set_epi64(INT64_C( 2443726936750986290), INT64_C(-9169540186107924323), INT64_C(-8862352522735882724), INT64_C(-8073374806940567698), INT64_C( 5084858765896996878), INT64_C(-8091734011393453138), INT64_C(-8622672902733011795), INT64_C( 255780820358907048)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -710.54), SIMDE_FLOAT64_C( -140.51), SIMDE_FLOAT64_C( 346.36), SIMDE_FLOAT64_C( 318.14), SIMDE_FLOAT64_C( 399.19), SIMDE_FLOAT64_C( -291.83), SIMDE_FLOAT64_C( 951.57), SIMDE_FLOAT64_C( 272.52)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -653.71), SIMDE_FLOAT64_C( 346.36), SIMDE_FLOAT64_C( 318.14), SIMDE_FLOAT64_C( -140.51), SIMDE_FLOAT64_C( -140.51), SIMDE_FLOAT64_C( 903.53), SIMDE_FLOAT64_C( -831.08), SIMDE_FLOAT64_C( 815.07)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mask_permutexvar_pd(test_vec[i].src, test_vec[i].k, test_vec[i].idx, test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_maskz_permutexvar_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512i idx; simde__m512d a; simde__m512d r; } test_vec[8] = { { UINT8_C( 87), simde_mm512_set_epi64(INT64_C( 4086853108457730066), INT64_C(-7738570880062900818), INT64_C(-5609503674875201288), INT64_C( 3966155248134972346), INT64_C( 6201510655001996332), INT64_C( 7633590894537872708), INT64_C(-5593693910291334810), INT64_C(-8884932670315115433)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 69.04), SIMDE_FLOAT64_C( -514.56), SIMDE_FLOAT64_C( -149.02), SIMDE_FLOAT64_C( -860.98), SIMDE_FLOAT64_C( 240.79), SIMDE_FLOAT64_C( -280.30), SIMDE_FLOAT64_C( -839.80), SIMDE_FLOAT64_C( 128.51)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -514.56), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -280.30), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -860.98), SIMDE_FLOAT64_C( -514.56), SIMDE_FLOAT64_C( 69.04)) }, { UINT8_C(109), simde_mm512_set_epi64(INT64_C(-8784413934425613521), INT64_C( 961867877660623168), INT64_C( 2426510480636680010), INT64_C(-6612602987193650875), INT64_C( 6143333881204814617), INT64_C( -890089152921238147), INT64_C( 8823574133744668217), INT64_C(-6364969741708969084)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -835.53), SIMDE_FLOAT64_C( -203.04), SIMDE_FLOAT64_C( 571.79), SIMDE_FLOAT64_C( -80.71), SIMDE_FLOAT64_C( 675.92), SIMDE_FLOAT64_C( 632.01), SIMDE_FLOAT64_C( 490.41), SIMDE_FLOAT64_C( 456.89)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 456.89), SIMDE_FLOAT64_C( 632.01), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 490.41), SIMDE_FLOAT64_C( 571.79), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -80.71)) }, { UINT8_C( 33), simde_mm512_set_epi64(INT64_C( 3418472134552461373), INT64_C(-6111840559061041971), INT64_C( 6656352319933975670), INT64_C( 2357435311113502667), INT64_C(-8860733056306413573), INT64_C(-8949662758380266635), INT64_C( 4701312916269037777), INT64_C(-1461656220613716217)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -577.36), SIMDE_FLOAT64_C( 907.23), SIMDE_FLOAT64_C( 957.37), SIMDE_FLOAT64_C( -359.60), SIMDE_FLOAT64_C( -934.92), SIMDE_FLOAT64_C( -213.75), SIMDE_FLOAT64_C( -657.02), SIMDE_FLOAT64_C( 403.00)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 907.23), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -577.36)) }, { UINT8_C(131), simde_mm512_set_epi64(INT64_C(-2775611318017263858), INT64_C( 7897714815450887445), INT64_C(-2675129847260557604), INT64_C( 2145911307457407401), INT64_C( 3797455296467543827), INT64_C(-3815826294263537782), INT64_C( 2205533861506052469), INT64_C( 3531417307031756813)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 268.99), SIMDE_FLOAT64_C( -842.15), SIMDE_FLOAT64_C( -207.87), SIMDE_FLOAT64_C( 908.84), SIMDE_FLOAT64_C( -765.42), SIMDE_FLOAT64_C( -315.78), SIMDE_FLOAT64_C( 138.83), SIMDE_FLOAT64_C( -86.06)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -842.15), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -207.87), SIMDE_FLOAT64_C( -207.87)) }, { UINT8_C(213), simde_mm512_set_epi64(INT64_C(-2603844271228681340), INT64_C( 3101493959844818499), INT64_C(-7569746812758465314), INT64_C( -95001376835728923), INT64_C( 2811174252033921756), INT64_C( 1971701120159461885), INT64_C(-1168609383370522899), INT64_C(-6996433667044410060)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -619.50), SIMDE_FLOAT64_C( 518.12), SIMDE_FLOAT64_C( 952.47), SIMDE_FLOAT64_C( -173.80), SIMDE_FLOAT64_C( -492.61), SIMDE_FLOAT64_C( 487.08), SIMDE_FLOAT64_C( -68.16), SIMDE_FLOAT64_C( 180.78)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -173.80), SIMDE_FLOAT64_C( -492.61), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 952.47), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 952.47), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -173.80)) }, { UINT8_C( 71), simde_mm512_set_epi64(INT64_C( 7268860797756174523), INT64_C( 684742770982669497), INT64_C( 778252790359918942), INT64_C( 8946221359026744959), INT64_C( 8570432854894274862), INT64_C(-4670296842224865750), INT64_C(-8736438908262001915), INT64_C(-1516874692875012272)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -949.73), SIMDE_FLOAT64_C( -453.25), SIMDE_FLOAT64_C( 280.87), SIMDE_FLOAT64_C( -904.81), SIMDE_FLOAT64_C( 380.83), SIMDE_FLOAT64_C( 50.50), SIMDE_FLOAT64_C( -236.67), SIMDE_FLOAT64_C( -282.31)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -236.67), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 50.50), SIMDE_FLOAT64_C( 280.87), SIMDE_FLOAT64_C( -282.31)) }, { UINT8_C(208), simde_mm512_set_epi64(INT64_C(-3323383259847225301), INT64_C( 2001511420457827007), INT64_C( 8280910196874944184), INT64_C( -368934386460614235), INT64_C(-9011857488067354178), INT64_C( 5578921037540219940), INT64_C(-7880186302232587827), INT64_C( 7848707034806784644)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 930.79), SIMDE_FLOAT64_C( 677.41), SIMDE_FLOAT64_C( 111.33), SIMDE_FLOAT64_C( -352.24), SIMDE_FLOAT64_C( -176.75), SIMDE_FLOAT64_C( -479.79), SIMDE_FLOAT64_C( -316.94), SIMDE_FLOAT64_C( 602.83)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -176.75), SIMDE_FLOAT64_C( 930.79), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 111.33), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(212), simde_mm512_set_epi64(INT64_C( -831601358278995789), INT64_C(-2800664419916301039), INT64_C( 3280702275774868225), INT64_C(-4735905134864699368), INT64_C( 7051416147935021095), INT64_C(-4824857292892203785), INT64_C( 6286668337562607931), INT64_C( 1432089847019206822)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 346.90), SIMDE_FLOAT64_C( 996.15), SIMDE_FLOAT64_C( -777.05), SIMDE_FLOAT64_C( -607.82), SIMDE_FLOAT64_C( 621.95), SIMDE_FLOAT64_C( 296.47), SIMDE_FLOAT64_C( -116.18), SIMDE_FLOAT64_C( -327.04)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 621.95), SIMDE_FLOAT64_C( -116.18), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -327.04), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 346.90), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_maskz_permutexvar_pd(test_vec[i].k, test_vec[i].idx, test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_permutexvar_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i idx; simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C( 936676195), INT32_C( -430989686), INT32_C( -131327474), INT32_C( 910508384), INT32_C( 1148801293), INT32_C(-1204409147), INT32_C( 1922921929), INT32_C( 2087027240), INT32_C( 1221368626), INT32_C(-1114006136), INT32_C( 2023469730), INT32_C( 463308257), INT32_C(-1206798920), INT32_C( -968449396), INT32_C( -580990777), INT32_C( -896508445)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 120.10), SIMDE_FLOAT32_C( -64.06), SIMDE_FLOAT32_C( -620.03), SIMDE_FLOAT32_C( 559.81), SIMDE_FLOAT32_C( 185.23), SIMDE_FLOAT32_C( -423.61), SIMDE_FLOAT32_C( -11.91), SIMDE_FLOAT32_C( 407.56), SIMDE_FLOAT32_C( 355.11), SIMDE_FLOAT32_C( -787.72), SIMDE_FLOAT32_C( 472.82), SIMDE_FLOAT32_C( -703.51), SIMDE_FLOAT32_C( -202.49), SIMDE_FLOAT32_C( -470.36), SIMDE_FLOAT32_C( 966.37), SIMDE_FLOAT32_C( 135.20)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -202.49), SIMDE_FLOAT32_C( -423.61), SIMDE_FLOAT32_C( -64.06), SIMDE_FLOAT32_C( 135.20), SIMDE_FLOAT32_C( -620.03), SIMDE_FLOAT32_C( 472.82), SIMDE_FLOAT32_C( -11.91), SIMDE_FLOAT32_C( 407.56), SIMDE_FLOAT32_C( -470.36), SIMDE_FLOAT32_C( 407.56), SIMDE_FLOAT32_C( -470.36), SIMDE_FLOAT32_C( 966.37), SIMDE_FLOAT32_C( 407.56), SIMDE_FLOAT32_C( 559.81), SIMDE_FLOAT32_C( 355.11), SIMDE_FLOAT32_C( -202.49)) }, { simde_mm512_set_epi32(INT32_C(-1732993162), INT32_C( 1212743926), INT32_C( 1966971402), INT32_C(-1506668774), INT32_C(-1700657265), INT32_C( 1944327234), INT32_C( -355879099), INT32_C(-1588067414), INT32_C( 301696052), INT32_C( 1998339065), INT32_C(-2060809025), INT32_C(-1942156019), INT32_C( 551689125), INT32_C( 669995747), INT32_C(-1196653219), INT32_C( -147816939)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 664.52), SIMDE_FLOAT32_C( -224.13), SIMDE_FLOAT32_C( 633.65), SIMDE_FLOAT32_C( -834.15), SIMDE_FLOAT32_C( -157.33), SIMDE_FLOAT32_C( -819.46), SIMDE_FLOAT32_C( 541.44), SIMDE_FLOAT32_C( 112.81), SIMDE_FLOAT32_C( -98.08), SIMDE_FLOAT32_C( 464.19), SIMDE_FLOAT32_C( 711.12), SIMDE_FLOAT32_C( 282.83), SIMDE_FLOAT32_C( -774.08), SIMDE_FLOAT32_C( 841.24), SIMDE_FLOAT32_C( -414.07), SIMDE_FLOAT32_C( 79.76)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 464.19), SIMDE_FLOAT32_C( 464.19), SIMDE_FLOAT32_C( -819.46), SIMDE_FLOAT32_C( -819.46), SIMDE_FLOAT32_C( 664.52), SIMDE_FLOAT32_C( 841.24), SIMDE_FLOAT32_C( 711.12), SIMDE_FLOAT32_C( -819.46), SIMDE_FLOAT32_C( 282.83), SIMDE_FLOAT32_C( 541.44), SIMDE_FLOAT32_C( 664.52), SIMDE_FLOAT32_C( 633.65), SIMDE_FLOAT32_C( 711.12), SIMDE_FLOAT32_C( -774.08), SIMDE_FLOAT32_C( 633.65), SIMDE_FLOAT32_C( 711.12)) }, { simde_mm512_set_epi32(INT32_C(-1844524534), INT32_C( 359706932), INT32_C( 21147132), INT32_C(-1205907433), INT32_C( 241660444), INT32_C(-1425169590), INT32_C( 1296561443), INT32_C(-1934442075), INT32_C( 2141890625), INT32_C( 2063982974), INT32_C(-1791266937), INT32_C(-1677757015), INT32_C(-1341587157), INT32_C( 71085124), INT32_C( 1045857655), INT32_C( 563120574)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -179.98), SIMDE_FLOAT32_C( 258.23), SIMDE_FLOAT32_C( 246.22), SIMDE_FLOAT32_C( 97.85), SIMDE_FLOAT32_C( 666.32), SIMDE_FLOAT32_C( 364.80), SIMDE_FLOAT32_C( 759.27), SIMDE_FLOAT32_C( -524.19), SIMDE_FLOAT32_C( -726.51), SIMDE_FLOAT32_C( 381.71), SIMDE_FLOAT32_C( 819.12), SIMDE_FLOAT32_C( 145.28), SIMDE_FLOAT32_C( -99.37), SIMDE_FLOAT32_C( -151.02), SIMDE_FLOAT32_C( 551.65), SIMDE_FLOAT32_C( 155.58)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 364.80), SIMDE_FLOAT32_C( 145.28), SIMDE_FLOAT32_C( 97.85), SIMDE_FLOAT32_C( -726.51), SIMDE_FLOAT32_C( 97.85), SIMDE_FLOAT32_C( 364.80), SIMDE_FLOAT32_C( -99.37), SIMDE_FLOAT32_C( 819.12), SIMDE_FLOAT32_C( 551.65), SIMDE_FLOAT32_C( 258.23), SIMDE_FLOAT32_C( -726.51), SIMDE_FLOAT32_C( 759.27), SIMDE_FLOAT32_C( 666.32), SIMDE_FLOAT32_C( 145.28), SIMDE_FLOAT32_C( -726.51), SIMDE_FLOAT32_C( 258.23)) }, { simde_mm512_set_epi32(INT32_C(-1600993635), INT32_C( 1692797667), INT32_C( -524624106), INT32_C( -196896874), INT32_C( 1365949044), INT32_C( 1838002887), INT32_C( -452898509), INT32_C( 1408911553), INT32_C(-1407150071), INT32_C( -82352116), INT32_C( -745337283), INT32_C( -527368953), INT32_C(-1750389986), INT32_C( 1217697098), INT32_C( 1169663592), INT32_C(-1794175196)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -985.32), SIMDE_FLOAT32_C( 485.76), SIMDE_FLOAT32_C( 234.60), SIMDE_FLOAT32_C( 786.03), SIMDE_FLOAT32_C( 859.59), SIMDE_FLOAT32_C( 489.95), SIMDE_FLOAT32_C( -409.35), SIMDE_FLOAT32_C( 796.52), SIMDE_FLOAT32_C( -846.10), SIMDE_FLOAT32_C( -248.07), SIMDE_FLOAT32_C( -411.92), SIMDE_FLOAT32_C( -88.91), SIMDE_FLOAT32_C( 481.68), SIMDE_FLOAT32_C( 170.00), SIMDE_FLOAT32_C( -341.91), SIMDE_FLOAT32_C( 366.57)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 234.60), SIMDE_FLOAT32_C( 481.68), SIMDE_FLOAT32_C( -248.07), SIMDE_FLOAT32_C( -248.07), SIMDE_FLOAT32_C( -88.91), SIMDE_FLOAT32_C( -846.10), SIMDE_FLOAT32_C( 481.68), SIMDE_FLOAT32_C( -341.91), SIMDE_FLOAT32_C( -409.35), SIMDE_FLOAT32_C( 786.03), SIMDE_FLOAT32_C( 234.60), SIMDE_FLOAT32_C( -846.10), SIMDE_FLOAT32_C( 485.76), SIMDE_FLOAT32_C( 489.95), SIMDE_FLOAT32_C( 796.52), SIMDE_FLOAT32_C( -88.91)) }, { simde_mm512_set_epi32(INT32_C(-1723897302), INT32_C(-1378607382), INT32_C(-1399746280), INT32_C( 55039129), INT32_C( -614650529), INT32_C( 1411406194), INT32_C( 789645610), INT32_C( 647523062), INT32_C( 1326675104), INT32_C(-1833621116), INT32_C( 1459731687), INT32_C(-1374295064), INT32_C( 1719685048), INT32_C(-1859395493), INT32_C( 506068853), INT32_C( 1338183771)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 156.66), SIMDE_FLOAT32_C( 658.18), SIMDE_FLOAT32_C( 198.89), SIMDE_FLOAT32_C( -330.38), SIMDE_FLOAT32_C( -766.40), SIMDE_FLOAT32_C( -462.89), SIMDE_FLOAT32_C( -495.88), SIMDE_FLOAT32_C( -905.06), SIMDE_FLOAT32_C( -794.42), SIMDE_FLOAT32_C( 266.49), SIMDE_FLOAT32_C( -263.27), SIMDE_FLOAT32_C( -537.47), SIMDE_FLOAT32_C( 74.85), SIMDE_FLOAT32_C( -705.33), SIMDE_FLOAT32_C( 127.81), SIMDE_FLOAT32_C( 699.62)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -462.89), SIMDE_FLOAT32_C( -462.89), SIMDE_FLOAT32_C( -905.06), SIMDE_FLOAT32_C( -495.88), SIMDE_FLOAT32_C( 156.66), SIMDE_FLOAT32_C( -705.33), SIMDE_FLOAT32_C( -462.89), SIMDE_FLOAT32_C( 266.49), SIMDE_FLOAT32_C( 699.62), SIMDE_FLOAT32_C( -537.47), SIMDE_FLOAT32_C( -794.42), SIMDE_FLOAT32_C( -905.06), SIMDE_FLOAT32_C( -905.06), SIMDE_FLOAT32_C( -766.40), SIMDE_FLOAT32_C( -263.27), SIMDE_FLOAT32_C( -766.40)) }, { simde_mm512_set_epi32(INT32_C( 2006733779), INT32_C(-1344310054), INT32_C( -316328405), INT32_C( 365448438), INT32_C(-1467004247), INT32_C( 1348423181), INT32_C(-1495578328), INT32_C( 657519930), INT32_C(-1822795869), INT32_C( -508699764), INT32_C(-2109145696), INT32_C( -433815627), INT32_C(-1544129190), INT32_C(-1521382934), INT32_C( 72732953), INT32_C( 1946881855)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -747.31), SIMDE_FLOAT32_C( 206.83), SIMDE_FLOAT32_C( -77.03), SIMDE_FLOAT32_C( -852.95), SIMDE_FLOAT32_C( 918.21), SIMDE_FLOAT32_C( -446.06), SIMDE_FLOAT32_C( 363.23), SIMDE_FLOAT32_C( -141.96), SIMDE_FLOAT32_C( 98.64), SIMDE_FLOAT32_C( -943.22), SIMDE_FLOAT32_C( -111.23), SIMDE_FLOAT32_C( 515.15), SIMDE_FLOAT32_C( -161.39), SIMDE_FLOAT32_C( 209.66), SIMDE_FLOAT32_C( 207.66), SIMDE_FLOAT32_C( -218.56)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -161.39), SIMDE_FLOAT32_C( -446.06), SIMDE_FLOAT32_C( 918.21), SIMDE_FLOAT32_C( -943.22), SIMDE_FLOAT32_C( 363.23), SIMDE_FLOAT32_C( -77.03), SIMDE_FLOAT32_C( -141.96), SIMDE_FLOAT32_C( -446.06), SIMDE_FLOAT32_C( -161.39), SIMDE_FLOAT32_C( -852.95), SIMDE_FLOAT32_C( -218.56), SIMDE_FLOAT32_C( -111.23), SIMDE_FLOAT32_C( -446.06), SIMDE_FLOAT32_C( -446.06), SIMDE_FLOAT32_C( 363.23), SIMDE_FLOAT32_C( -747.31)) }, { simde_mm512_set_epi32(INT32_C(-1976311223), INT32_C(-1733873773), INT32_C( 511400049), INT32_C( 500426498), INT32_C( -644784401), INT32_C( 1570198900), INT32_C( 1945626784), INT32_C(-1332251479), INT32_C( -316629202), INT32_C( -236250164), INT32_C( 893952031), INT32_C( -476733654), INT32_C(-2008951329), INT32_C(-1854469611), INT32_C( -862339611), INT32_C( 546668760)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 366.70), SIMDE_FLOAT32_C( -276.00), SIMDE_FLOAT32_C( 615.81), SIMDE_FLOAT32_C( -317.53), SIMDE_FLOAT32_C( -673.63), SIMDE_FLOAT32_C( -723.48), SIMDE_FLOAT32_C( 832.55), SIMDE_FLOAT32_C( -792.04), SIMDE_FLOAT32_C( 260.08), SIMDE_FLOAT32_C( -284.98), SIMDE_FLOAT32_C( -412.45), SIMDE_FLOAT32_C( 906.86), SIMDE_FLOAT32_C( 245.05), SIMDE_FLOAT32_C( -798.29), SIMDE_FLOAT32_C( -913.68), SIMDE_FLOAT32_C( 122.95)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 832.55), SIMDE_FLOAT32_C( 245.05), SIMDE_FLOAT32_C( -913.68), SIMDE_FLOAT32_C( -798.29), SIMDE_FLOAT32_C( 366.70), SIMDE_FLOAT32_C( 906.86), SIMDE_FLOAT32_C( 122.95), SIMDE_FLOAT32_C( 832.55), SIMDE_FLOAT32_C( -276.00), SIMDE_FLOAT32_C( -317.53), SIMDE_FLOAT32_C( 366.70), SIMDE_FLOAT32_C( -723.48), SIMDE_FLOAT32_C( 366.70), SIMDE_FLOAT32_C( -412.45), SIMDE_FLOAT32_C( -412.45), SIMDE_FLOAT32_C( -792.04)) }, { simde_mm512_set_epi32(INT32_C( 1809884012), INT32_C( -191206140), INT32_C( -553702639), INT32_C(-1800209842), INT32_C( 721509385), INT32_C( -3110338), INT32_C( 629848436), INT32_C( 1833821522), INT32_C( 1870762521), INT32_C( 1138301165), INT32_C( 499100111), INT32_C( -583483550), INT32_C( 449841541), INT32_C( 1832272682), INT32_C( -100358742), INT32_C( 1797905164)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -239.16), SIMDE_FLOAT32_C( 579.20), SIMDE_FLOAT32_C( -204.61), SIMDE_FLOAT32_C( -391.63), SIMDE_FLOAT32_C( 200.64), SIMDE_FLOAT32_C( 335.63), SIMDE_FLOAT32_C( -634.66), SIMDE_FLOAT32_C( 738.93), SIMDE_FLOAT32_C( 607.08), SIMDE_FLOAT32_C( 439.69), SIMDE_FLOAT32_C( -668.91), SIMDE_FLOAT32_C( 930.40), SIMDE_FLOAT32_C( -886.74), SIMDE_FLOAT32_C( 273.66), SIMDE_FLOAT32_C( -214.28), SIMDE_FLOAT32_C( 892.54)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -391.63), SIMDE_FLOAT32_C( 930.40), SIMDE_FLOAT32_C( -214.28), SIMDE_FLOAT32_C( 579.20), SIMDE_FLOAT32_C( -634.66), SIMDE_FLOAT32_C( 579.20), SIMDE_FLOAT32_C( 930.40), SIMDE_FLOAT32_C( 273.66), SIMDE_FLOAT32_C( -634.66), SIMDE_FLOAT32_C( -204.61), SIMDE_FLOAT32_C( -239.16), SIMDE_FLOAT32_C( 273.66), SIMDE_FLOAT32_C( -668.91), SIMDE_FLOAT32_C( 335.63), SIMDE_FLOAT32_C( 335.63), SIMDE_FLOAT32_C( -391.63)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_permutexvar_ps(test_vec[i].idx, test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_permutexvar_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 src; simde__mmask16 k; simde__m512i idx; simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -126.54), SIMDE_FLOAT32_C( 486.54), SIMDE_FLOAT32_C( 115.89), SIMDE_FLOAT32_C( 387.13), SIMDE_FLOAT32_C( -862.52), SIMDE_FLOAT32_C( -954.44), SIMDE_FLOAT32_C( -62.35), SIMDE_FLOAT32_C( -928.05), SIMDE_FLOAT32_C( 553.71), SIMDE_FLOAT32_C( 241.48), SIMDE_FLOAT32_C( -508.93), SIMDE_FLOAT32_C( 882.19), SIMDE_FLOAT32_C( -62.25), SIMDE_FLOAT32_C( 746.09), SIMDE_FLOAT32_C( 695.90), SIMDE_FLOAT32_C( 211.82)), UINT16_C(16928), simde_mm512_set_epi32(INT32_C(-1969848041), INT32_C( 986227924), INT32_C( 1136615869), INT32_C(-1528801648), INT32_C( -812889932), INT32_C(-1855240790), INT32_C( 1844683884), INT32_C( -586741017), INT32_C( 1867360941), INT32_C( -991976637), INT32_C( 1849718349), INT32_C( 1716834208), INT32_C(-1194054110), INT32_C( 2134562344), INT32_C(-1402914588), INT32_C( -545264918)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -642.79), SIMDE_FLOAT32_C( 80.95), SIMDE_FLOAT32_C( 263.76), SIMDE_FLOAT32_C( -849.20), SIMDE_FLOAT32_C( 701.20), SIMDE_FLOAT32_C( -187.10), SIMDE_FLOAT32_C( -193.72), SIMDE_FLOAT32_C( -482.55), SIMDE_FLOAT32_C( 133.18), SIMDE_FLOAT32_C( -425.61), SIMDE_FLOAT32_C( 934.88), SIMDE_FLOAT32_C( 29.19), SIMDE_FLOAT32_C( -215.85), SIMDE_FLOAT32_C( -948.93), SIMDE_FLOAT32_C( -669.86), SIMDE_FLOAT32_C( -774.43)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -126.54), SIMDE_FLOAT32_C( 29.19), SIMDE_FLOAT32_C( 115.89), SIMDE_FLOAT32_C( 387.13), SIMDE_FLOAT32_C( -862.52), SIMDE_FLOAT32_C( -954.44), SIMDE_FLOAT32_C( -849.20), SIMDE_FLOAT32_C( -928.05), SIMDE_FLOAT32_C( 553.71), SIMDE_FLOAT32_C( 241.48), SIMDE_FLOAT32_C( 263.76), SIMDE_FLOAT32_C( 882.19), SIMDE_FLOAT32_C( -62.25), SIMDE_FLOAT32_C( 746.09), SIMDE_FLOAT32_C( 695.90), SIMDE_FLOAT32_C( 211.82)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 151.66), SIMDE_FLOAT32_C( 443.48), SIMDE_FLOAT32_C( -269.77), SIMDE_FLOAT32_C( 614.76), SIMDE_FLOAT32_C( -133.17), SIMDE_FLOAT32_C( -180.83), SIMDE_FLOAT32_C( 194.81), SIMDE_FLOAT32_C( 42.31), SIMDE_FLOAT32_C( 374.34), SIMDE_FLOAT32_C( -656.44), SIMDE_FLOAT32_C( 548.30), SIMDE_FLOAT32_C( 222.71), SIMDE_FLOAT32_C( -984.87), SIMDE_FLOAT32_C( 443.22), SIMDE_FLOAT32_C( 673.00), SIMDE_FLOAT32_C( -157.35)), UINT16_C(35439), simde_mm512_set_epi32(INT32_C( 1227892331), INT32_C(-1027676412), INT32_C( 359651495), INT32_C( 1118790944), INT32_C( 91682217), INT32_C(-2133769799), INT32_C(-1769360502), INT32_C( -926475819), INT32_C( 1573948348), INT32_C(-1072703852), INT32_C( -613174427), INT32_C( 2078566978), INT32_C(-1129401521), INT32_C(-1989448771), INT32_C(-2043876227), INT32_C( 1692040296)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 56.76), SIMDE_FLOAT32_C( 152.42), SIMDE_FLOAT32_C( 481.26), SIMDE_FLOAT32_C( 801.28), SIMDE_FLOAT32_C( 636.86), SIMDE_FLOAT32_C( 909.80), SIMDE_FLOAT32_C( 703.56), SIMDE_FLOAT32_C( 542.79), SIMDE_FLOAT32_C( -247.58), SIMDE_FLOAT32_C( -687.87), SIMDE_FLOAT32_C( -260.04), SIMDE_FLOAT32_C( -901.34), SIMDE_FLOAT32_C( -209.58), SIMDE_FLOAT32_C( 661.31), SIMDE_FLOAT32_C( 996.67), SIMDE_FLOAT32_C( -590.89)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 636.86), SIMDE_FLOAT32_C( 443.48), SIMDE_FLOAT32_C( -269.77), SIMDE_FLOAT32_C( 614.76), SIMDE_FLOAT32_C( 703.56), SIMDE_FLOAT32_C( -180.83), SIMDE_FLOAT32_C( 909.80), SIMDE_FLOAT32_C( 42.31), SIMDE_FLOAT32_C( 374.34), SIMDE_FLOAT32_C( -901.34), SIMDE_FLOAT32_C( -260.04), SIMDE_FLOAT32_C( 222.71), SIMDE_FLOAT32_C( 56.76), SIMDE_FLOAT32_C( 481.26), SIMDE_FLOAT32_C( 481.26), SIMDE_FLOAT32_C( 542.79)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -750.66), SIMDE_FLOAT32_C( -458.79), SIMDE_FLOAT32_C( 673.71), SIMDE_FLOAT32_C( 151.27), SIMDE_FLOAT32_C( 301.16), SIMDE_FLOAT32_C( -17.04), SIMDE_FLOAT32_C( -751.47), SIMDE_FLOAT32_C( -39.79), SIMDE_FLOAT32_C( -478.79), SIMDE_FLOAT32_C( -874.74), SIMDE_FLOAT32_C( 82.59), SIMDE_FLOAT32_C( 758.78), SIMDE_FLOAT32_C( -838.81), SIMDE_FLOAT32_C( -454.98), SIMDE_FLOAT32_C( 5.20), SIMDE_FLOAT32_C( 523.36)), UINT16_C(36222), simde_mm512_set_epi32(INT32_C( 2006445721), INT32_C( 1432323599), INT32_C( 475511895), INT32_C( -589436639), INT32_C(-2145227731), INT32_C( 339016017), INT32_C( 1108425812), INT32_C(-1667703848), INT32_C( 1002175747), INT32_C( -280079864), INT32_C( 92142797), INT32_C( 1809366615), INT32_C(-1928845693), INT32_C(-1066128967), INT32_C(-1068022325), INT32_C( -129380427)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -1.31), SIMDE_FLOAT32_C( 116.19), SIMDE_FLOAT32_C( 258.15), SIMDE_FLOAT32_C( -933.38), SIMDE_FLOAT32_C( -512.69), SIMDE_FLOAT32_C( -147.18), SIMDE_FLOAT32_C( 595.37), SIMDE_FLOAT32_C( 114.40), SIMDE_FLOAT32_C( 621.76), SIMDE_FLOAT32_C( -918.69), SIMDE_FLOAT32_C( -162.22), SIMDE_FLOAT32_C( -310.44), SIMDE_FLOAT32_C( -814.01), SIMDE_FLOAT32_C( -464.51), SIMDE_FLOAT32_C( -491.30), SIMDE_FLOAT32_C( -261.98)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 595.37), SIMDE_FLOAT32_C( -458.79), SIMDE_FLOAT32_C( 673.71), SIMDE_FLOAT32_C( 151.27), SIMDE_FLOAT32_C( 258.15), SIMDE_FLOAT32_C( -491.30), SIMDE_FLOAT32_C( -751.47), SIMDE_FLOAT32_C( 114.40), SIMDE_FLOAT32_C( -478.79), SIMDE_FLOAT32_C( 114.40), SIMDE_FLOAT32_C( 258.15), SIMDE_FLOAT32_C( 621.76), SIMDE_FLOAT32_C( -814.01), SIMDE_FLOAT32_C( 595.37), SIMDE_FLOAT32_C( -512.69), SIMDE_FLOAT32_C( 523.36)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -625.36), SIMDE_FLOAT32_C( 576.25), SIMDE_FLOAT32_C( -491.19), SIMDE_FLOAT32_C( 36.67), SIMDE_FLOAT32_C( 144.75), SIMDE_FLOAT32_C( -337.24), SIMDE_FLOAT32_C( 923.33), SIMDE_FLOAT32_C( -171.10), SIMDE_FLOAT32_C( -499.67), SIMDE_FLOAT32_C( -489.63), SIMDE_FLOAT32_C( -565.60), SIMDE_FLOAT32_C( 521.68), SIMDE_FLOAT32_C( -663.69), SIMDE_FLOAT32_C( 977.71), SIMDE_FLOAT32_C( -68.19), SIMDE_FLOAT32_C( 621.30)), UINT16_C(37636), simde_mm512_set_epi32(INT32_C(-1855337133), INT32_C( -145365176), INT32_C( -898457281), INT32_C(-1771294387), INT32_C( 1891153968), INT32_C( 1228490057), INT32_C( 1633049538), INT32_C(-1958946575), INT32_C( 1185839426), INT32_C( -155257689), INT32_C( -468686279), INT32_C( 158817819), INT32_C(-1241996695), INT32_C( 1151388076), INT32_C( 1858464278), INT32_C( 1759636985)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -890.16), SIMDE_FLOAT32_C( 541.77), SIMDE_FLOAT32_C( -229.54), SIMDE_FLOAT32_C( -351.71), SIMDE_FLOAT32_C( -174.86), SIMDE_FLOAT32_C( -589.39), SIMDE_FLOAT32_C( 568.96), SIMDE_FLOAT32_C( 728.23), SIMDE_FLOAT32_C( 762.97), SIMDE_FLOAT32_C( -567.80), SIMDE_FLOAT32_C( -772.48), SIMDE_FLOAT32_C( 342.73), SIMDE_FLOAT32_C( 146.72), SIMDE_FLOAT32_C( -732.03), SIMDE_FLOAT32_C( 502.49), SIMDE_FLOAT32_C( 705.12)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 146.72), SIMDE_FLOAT32_C( 576.25), SIMDE_FLOAT32_C( -491.19), SIMDE_FLOAT32_C( -229.54), SIMDE_FLOAT32_C( 144.75), SIMDE_FLOAT32_C( -337.24), SIMDE_FLOAT32_C( -732.03), SIMDE_FLOAT32_C( 502.49), SIMDE_FLOAT32_C( -499.67), SIMDE_FLOAT32_C( -489.63), SIMDE_FLOAT32_C( -565.60), SIMDE_FLOAT32_C( 521.68), SIMDE_FLOAT32_C( -663.69), SIMDE_FLOAT32_C( -351.71), SIMDE_FLOAT32_C( -68.19), SIMDE_FLOAT32_C( 621.30)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 338.68), SIMDE_FLOAT32_C( -26.37), SIMDE_FLOAT32_C( 688.52), SIMDE_FLOAT32_C( 895.19), SIMDE_FLOAT32_C( -756.53), SIMDE_FLOAT32_C( -324.15), SIMDE_FLOAT32_C( -353.65), SIMDE_FLOAT32_C( 29.09), SIMDE_FLOAT32_C( 167.64), SIMDE_FLOAT32_C( -10.77), SIMDE_FLOAT32_C( -916.15), SIMDE_FLOAT32_C( -222.63), SIMDE_FLOAT32_C( 671.28), SIMDE_FLOAT32_C( 830.16), SIMDE_FLOAT32_C( 152.94), SIMDE_FLOAT32_C( 442.97)), UINT16_C( 7327), simde_mm512_set_epi32(INT32_C(-1635146322), INT32_C(-1549527285), INT32_C( -767681217), INT32_C( 200060997), INT32_C( 1897816859), INT32_C( -33590828), INT32_C( 1228456059), INT32_C( 363372158), INT32_C(-1880249713), INT32_C( 563834696), INT32_C( 298711594), INT32_C( 675952191), INT32_C( 93229580), INT32_C(-1903862955), INT32_C( 927507330), INT32_C( 735163498)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -225.51), SIMDE_FLOAT32_C( -481.64), SIMDE_FLOAT32_C( -505.22), SIMDE_FLOAT32_C( -701.32), SIMDE_FLOAT32_C( -0.71), SIMDE_FLOAT32_C( -981.96), SIMDE_FLOAT32_C( 509.57), SIMDE_FLOAT32_C( -514.74), SIMDE_FLOAT32_C( -731.84), SIMDE_FLOAT32_C( -363.17), SIMDE_FLOAT32_C( 622.22), SIMDE_FLOAT32_C( 459.46), SIMDE_FLOAT32_C( -421.94), SIMDE_FLOAT32_C( -833.19), SIMDE_FLOAT32_C( 52.45), SIMDE_FLOAT32_C( -445.99)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 338.68), SIMDE_FLOAT32_C( -26.37), SIMDE_FLOAT32_C( 688.52), SIMDE_FLOAT32_C( 622.22), SIMDE_FLOAT32_C( -0.71), SIMDE_FLOAT32_C( 459.46), SIMDE_FLOAT32_C( -353.65), SIMDE_FLOAT32_C( 29.09), SIMDE_FLOAT32_C( -225.51), SIMDE_FLOAT32_C( -10.77), SIMDE_FLOAT32_C( -916.15), SIMDE_FLOAT32_C( -225.51), SIMDE_FLOAT32_C( -701.32), SIMDE_FLOAT32_C( 622.22), SIMDE_FLOAT32_C( -833.19), SIMDE_FLOAT32_C( -981.96)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 548.33), SIMDE_FLOAT32_C( 187.27), SIMDE_FLOAT32_C( 96.63), SIMDE_FLOAT32_C( 273.45), SIMDE_FLOAT32_C( -676.75), SIMDE_FLOAT32_C( 630.01), SIMDE_FLOAT32_C( -273.70), SIMDE_FLOAT32_C( 595.19), SIMDE_FLOAT32_C( -827.09), SIMDE_FLOAT32_C( -9.74), SIMDE_FLOAT32_C( 348.50), SIMDE_FLOAT32_C( -287.28), SIMDE_FLOAT32_C( -214.23), SIMDE_FLOAT32_C( 252.31), SIMDE_FLOAT32_C( -162.09), SIMDE_FLOAT32_C( 125.35)), UINT16_C(47269), simde_mm512_set_epi32(INT32_C(-1058548029), INT32_C( 1236526607), INT32_C( -254770193), INT32_C( 93180276), INT32_C( 1350898601), INT32_C( 1804650502), INT32_C( 425106003), INT32_C( 1132369672), INT32_C(-1843449231), INT32_C( 1221695547), INT32_C( -627121059), INT32_C( 837413942), INT32_C( 535626003), INT32_C(-2017749884), INT32_C(-1177177868), INT32_C(-1219767649)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 707.49), SIMDE_FLOAT32_C( 251.70), SIMDE_FLOAT32_C( -353.77), SIMDE_FLOAT32_C( 201.00), SIMDE_FLOAT32_C( 254.86), SIMDE_FLOAT32_C( -348.78), SIMDE_FLOAT32_C( 550.78), SIMDE_FLOAT32_C( -277.90), SIMDE_FLOAT32_C( -324.13), SIMDE_FLOAT32_C( -996.73), SIMDE_FLOAT32_C( -839.66), SIMDE_FLOAT32_C( -878.81), SIMDE_FLOAT32_C( 809.40), SIMDE_FLOAT32_C( -567.81), SIMDE_FLOAT32_C( 341.97), SIMDE_FLOAT32_C( 173.53)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 809.40), SIMDE_FLOAT32_C( 187.27), SIMDE_FLOAT32_C( 707.49), SIMDE_FLOAT32_C( -878.81), SIMDE_FLOAT32_C( 550.78), SIMDE_FLOAT32_C( 630.01), SIMDE_FLOAT32_C( -273.70), SIMDE_FLOAT32_C( 595.19), SIMDE_FLOAT32_C( 341.97), SIMDE_FLOAT32_C( -9.74), SIMDE_FLOAT32_C( -353.77), SIMDE_FLOAT32_C( -287.28), SIMDE_FLOAT32_C( -214.23), SIMDE_FLOAT32_C( -878.81), SIMDE_FLOAT32_C( -162.09), SIMDE_FLOAT32_C( 707.49)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -667.18), SIMDE_FLOAT32_C( 47.15), SIMDE_FLOAT32_C( 840.15), SIMDE_FLOAT32_C( -252.88), SIMDE_FLOAT32_C( -918.58), SIMDE_FLOAT32_C( -332.91), SIMDE_FLOAT32_C( -82.80), SIMDE_FLOAT32_C( 317.13), SIMDE_FLOAT32_C( -364.09), SIMDE_FLOAT32_C( -815.93), SIMDE_FLOAT32_C( -526.38), SIMDE_FLOAT32_C( 780.77), SIMDE_FLOAT32_C( 785.63), SIMDE_FLOAT32_C( -266.51), SIMDE_FLOAT32_C( 617.69), SIMDE_FLOAT32_C( -648.38)), UINT16_C( 5176), simde_mm512_set_epi32(INT32_C( -812272178), INT32_C( 1938172163), INT32_C(-1629887879), INT32_C( 1699159582), INT32_C( 1760153584), INT32_C( 448749626), INT32_C( 1866295090), INT32_C( 1060092543), INT32_C( 607878771), INT32_C( 1797306818), INT32_C( 547150787), INT32_C( -708809478), INT32_C( 1892985133), INT32_C( 766652099), INT32_C( 2132386901), INT32_C( -87942209)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -617.36), SIMDE_FLOAT32_C( 242.43), SIMDE_FLOAT32_C( -957.92), SIMDE_FLOAT32_C( -328.75), SIMDE_FLOAT32_C( -790.98), SIMDE_FLOAT32_C( -291.92), SIMDE_FLOAT32_C( 890.91), SIMDE_FLOAT32_C( 431.01), SIMDE_FLOAT32_C( -726.89), SIMDE_FLOAT32_C( 279.78), SIMDE_FLOAT32_C( -463.41), SIMDE_FLOAT32_C( -367.48), SIMDE_FLOAT32_C( -111.74), SIMDE_FLOAT32_C( -359.87), SIMDE_FLOAT32_C( -442.32), SIMDE_FLOAT32_C( -340.55)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -667.18), SIMDE_FLOAT32_C( 47.15), SIMDE_FLOAT32_C( 840.15), SIMDE_FLOAT32_C( 242.43), SIMDE_FLOAT32_C( -918.58), SIMDE_FLOAT32_C( -291.92), SIMDE_FLOAT32_C( -82.80), SIMDE_FLOAT32_C( 317.13), SIMDE_FLOAT32_C( -364.09), SIMDE_FLOAT32_C( -815.93), SIMDE_FLOAT32_C( -111.74), SIMDE_FLOAT32_C( -291.92), SIMDE_FLOAT32_C( -957.92), SIMDE_FLOAT32_C( -266.51), SIMDE_FLOAT32_C( 617.69), SIMDE_FLOAT32_C( -648.38)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 284.58), SIMDE_FLOAT32_C( 746.63), SIMDE_FLOAT32_C( 125.59), SIMDE_FLOAT32_C( 4.12), SIMDE_FLOAT32_C( -328.28), SIMDE_FLOAT32_C( 935.21), SIMDE_FLOAT32_C( -572.72), SIMDE_FLOAT32_C( -282.68), SIMDE_FLOAT32_C( -535.09), SIMDE_FLOAT32_C( -455.20), SIMDE_FLOAT32_C( 284.53), SIMDE_FLOAT32_C( -643.06), SIMDE_FLOAT32_C( -951.05), SIMDE_FLOAT32_C( -691.81), SIMDE_FLOAT32_C( 546.43), SIMDE_FLOAT32_C( -886.61)), UINT16_C( 9849), simde_mm512_set_epi32(INT32_C( 659357217), INT32_C( 516445967), INT32_C(-1793934445), INT32_C( -751346082), INT32_C( 1615640988), INT32_C(-2047699786), INT32_C( 407764025), INT32_C( -531318026), INT32_C( 2098775830), INT32_C(-1132806848), INT32_C( 515968910), INT32_C( 502727224), INT32_C( -356346062), INT32_C( 26000082), INT32_C( -122568662), INT32_C( -797533991)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -159.63), SIMDE_FLOAT32_C( -112.82), SIMDE_FLOAT32_C( 609.39), SIMDE_FLOAT32_C( 941.33), SIMDE_FLOAT32_C( -858.59), SIMDE_FLOAT32_C( -584.34), SIMDE_FLOAT32_C( 842.06), SIMDE_FLOAT32_C( -503.14), SIMDE_FLOAT32_C( -897.65), SIMDE_FLOAT32_C( -249.26), SIMDE_FLOAT32_C( 159.94), SIMDE_FLOAT32_C( -741.72), SIMDE_FLOAT32_C( 417.02), SIMDE_FLOAT32_C( -743.17), SIMDE_FLOAT32_C( -364.29), SIMDE_FLOAT32_C( 495.38)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 284.58), SIMDE_FLOAT32_C( 746.63), SIMDE_FLOAT32_C( 417.02), SIMDE_FLOAT32_C( 4.12), SIMDE_FLOAT32_C( -328.28), SIMDE_FLOAT32_C( -249.26), SIMDE_FLOAT32_C( 842.06), SIMDE_FLOAT32_C( -282.68), SIMDE_FLOAT32_C( -535.09), SIMDE_FLOAT32_C( 495.38), SIMDE_FLOAT32_C( -112.82), SIMDE_FLOAT32_C( -503.14), SIMDE_FLOAT32_C( -743.17), SIMDE_FLOAT32_C( -691.81), SIMDE_FLOAT32_C( 546.43), SIMDE_FLOAT32_C( 842.06)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mask_permutexvar_ps(test_vec[i].src, test_vec[i].k, test_vec[i].idx, test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_maskz_permutexvar_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m512i idx; simde__m512 a; simde__m512 r; } test_vec[8] = { { UINT16_C(21335), simde_mm512_set_epi32(INT32_C( 951544639), INT32_C(-1026363374), INT32_C(-1801776439), INT32_C( 145438126), INT32_C(-1306064352), INT32_C( -858736392), INT32_C( 923442479), INT32_C( 1092805562), INT32_C( 1443901717), INT32_C( 1848749100), INT32_C( 1777333881), INT32_C( 1570116932), INT32_C(-1302383354), INT32_C( 1993455974), INT32_C(-2068684593), INT32_C(-1936012201)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 483.08), SIMDE_FLOAT32_C( 903.50), SIMDE_FLOAT32_C( 232.04), SIMDE_FLOAT32_C( -43.35), SIMDE_FLOAT32_C( 774.81), SIMDE_FLOAT32_C( 309.91), SIMDE_FLOAT32_C( -599.01), SIMDE_FLOAT32_C( 846.15), SIMDE_FLOAT32_C( 69.04), SIMDE_FLOAT32_C( -514.56), SIMDE_FLOAT32_C( -149.02), SIMDE_FLOAT32_C( -860.98), SIMDE_FLOAT32_C( 240.79), SIMDE_FLOAT32_C( -280.30), SIMDE_FLOAT32_C( -839.80), SIMDE_FLOAT32_C( 128.51)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -280.30), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 903.50), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 483.08), SIMDE_FLOAT32_C( 309.91), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -43.35), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -860.98), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -514.56), SIMDE_FLOAT32_C( 483.08), SIMDE_FLOAT32_C( 69.04)) }, { UINT16_C(33181), simde_mm512_set_epi32(INT32_C( 353198331), INT32_C( 1711460779), INT32_C( -919570191), INT32_C( 1974152373), INT32_C( -695949043), INT32_C( -790242624), INT32_C(-1094331335), INT32_C(-1166320093), INT32_C(-2045280751), INT32_C(-2037261521), INT32_C( 223952317), INT32_C( 282198336), INT32_C( 564965997), INT32_C( 169645898), INT32_C(-1539616610), INT32_C( 1134735685)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -198.67), SIMDE_FLOAT32_C( 337.35), SIMDE_FLOAT32_C( 447.98), SIMDE_FLOAT32_C( -278.32), SIMDE_FLOAT32_C( -925.69), SIMDE_FLOAT32_C( -744.41), SIMDE_FLOAT32_C( 717.83), SIMDE_FLOAT32_C( 39.32), SIMDE_FLOAT32_C( -489.88), SIMDE_FLOAT32_C( 29.68), SIMDE_FLOAT32_C( -37.49), SIMDE_FLOAT32_C( -490.28), SIMDE_FLOAT32_C( -373.66), SIMDE_FLOAT32_C( 841.53), SIMDE_FLOAT32_C( -292.35), SIMDE_FLOAT32_C( 526.21)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -925.69), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -373.66), SIMDE_FLOAT32_C( -292.35), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 526.21), SIMDE_FLOAT32_C( 447.98), SIMDE_FLOAT32_C( -744.41), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -37.49)) }, { UINT16_C(55883), simde_mm512_set_epi32(INT32_C( -329275629), INT32_C( -888441293), INT32_C( -707551350), INT32_C( 513515868), INT32_C(-1825967755), INT32_C( 822222164), INT32_C(-1689559027), INT32_C( 533478787), INT32_C( 907615417), INT32_C( -199229058), INT32_C( -91537812), INT32_C( 1375258232), INT32_C( 139748399), INT32_C( 1688468565), INT32_C( 736544549), INT32_C(-1282057552)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -842.15), SIMDE_FLOAT32_C( -207.87), SIMDE_FLOAT32_C( 908.84), SIMDE_FLOAT32_C( -765.42), SIMDE_FLOAT32_C( -315.78), SIMDE_FLOAT32_C( 138.83), SIMDE_FLOAT32_C( -86.06), SIMDE_FLOAT32_C( 699.07), SIMDE_FLOAT32_C( -413.85), SIMDE_FLOAT32_C( -143.73), SIMDE_FLOAT32_C( 752.26), SIMDE_FLOAT32_C( 709.96), SIMDE_FLOAT32_C( 609.29), SIMDE_FLOAT32_C( -767.34), SIMDE_FLOAT32_C( 878.85), SIMDE_FLOAT32_C( -588.28)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 609.29), SIMDE_FLOAT32_C( 609.29), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -765.42), SIMDE_FLOAT32_C( 752.26), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 908.84), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -207.87), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -842.15), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 752.26), SIMDE_FLOAT32_C( -588.28)) }, { UINT16_C(18199), simde_mm512_set_epi32(INT32_C( 1526367108), INT32_C( 722122834), INT32_C( -174985661), INT32_C(-1762469023), INT32_C( 1239606494), INT32_C( -22119232), INT32_C( 1216907749), INT32_C( 654527510), INT32_C(-2043358500), INT32_C( 459072440), INT32_C( -430427651), INT32_C( -272088075), INT32_C( 386072301), INT32_C(-1628984154), INT32_C( 87817524), INT32_C( 1219490517)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 493.65), SIMDE_FLOAT32_C( -659.70), SIMDE_FLOAT32_C( 52.79), SIMDE_FLOAT32_C( 493.30), SIMDE_FLOAT32_C( 835.54), SIMDE_FLOAT32_C( 831.29), SIMDE_FLOAT32_C( -712.24), SIMDE_FLOAT32_C( -619.50), SIMDE_FLOAT32_C( 518.12), SIMDE_FLOAT32_C( 952.47), SIMDE_FLOAT32_C( -173.80), SIMDE_FLOAT32_C( -492.61), SIMDE_FLOAT32_C( 487.08), SIMDE_FLOAT32_C( -68.16), SIMDE_FLOAT32_C( 180.78), SIMDE_FLOAT32_C( 717.69)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -68.16), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 717.69), SIMDE_FLOAT32_C( -173.80), SIMDE_FLOAT32_C( 952.47), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -173.80), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 952.47), SIMDE_FLOAT32_C( -492.61), SIMDE_FLOAT32_C( -173.80)) }, { UINT16_C(26926), simde_mm512_set_epi32(INT32_C( 1174128677), INT32_C(-1544325740), INT32_C( 204417556), INT32_C(-1329665093), INT32_C(-2039025377), INT32_C( 1639231015), INT32_C( 1541217841), INT32_C( 1692413538), INT32_C( 738521275), INT32_C( 159429100), INT32_C( 451955897), INT32_C( 181201098), INT32_C( 450627934), INT32_C( 2082954477), INT32_C( 1254960767), INT32_C( 1995459397)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -783.00), SIMDE_FLOAT32_C( -388.47), SIMDE_FLOAT32_C( -102.18), SIMDE_FLOAT32_C( -643.43), SIMDE_FLOAT32_C( 960.00), SIMDE_FLOAT32_C( -331.34), SIMDE_FLOAT32_C( 22.93), SIMDE_FLOAT32_C( 72.67), SIMDE_FLOAT32_C( -395.13), SIMDE_FLOAT32_C( -870.79), SIMDE_FLOAT32_C( 145.63), SIMDE_FLOAT32_C( -722.44), SIMDE_FLOAT32_C( -149.04), SIMDE_FLOAT32_C( 529.44), SIMDE_FLOAT32_C( 214.37), SIMDE_FLOAT32_C( -949.73)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -722.44), SIMDE_FLOAT32_C( -722.44), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -783.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 529.44), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 22.93), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -388.47), SIMDE_FLOAT32_C( -102.18), SIMDE_FLOAT32_C( -783.00), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C(36907), simde_mm512_set_epi32(INT32_C(-1123374630), INT32_C( -181070601), INT32_C( 1463729035), INT32_C( 2031968571), INT32_C( 333434400), INT32_C( -637142874), INT32_C( -520435756), INT32_C( -148623413), INT32_C( -692754616), INT32_C(-1908406411), INT32_C( 1391053429), INT32_C( 1767908668), INT32_C( 1117151413), INT32_C( 1466854108), INT32_C( -852914371), INT32_C( -773785464)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -777.05), SIMDE_FLOAT32_C( -607.82), SIMDE_FLOAT32_C( 621.95), SIMDE_FLOAT32_C( 296.47), SIMDE_FLOAT32_C( -116.18), SIMDE_FLOAT32_C( -327.04), SIMDE_FLOAT32_C( 909.84), SIMDE_FLOAT32_C( -23.06), SIMDE_FLOAT32_C( 696.35), SIMDE_FLOAT32_C( -95.21), SIMDE_FLOAT32_C( -644.31), SIMDE_FLOAT32_C( 10.75), SIMDE_FLOAT32_C( 486.53), SIMDE_FLOAT32_C( -668.43), SIMDE_FLOAT32_C( -235.48), SIMDE_FLOAT32_C( -210.00)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -327.04), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -116.18), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -644.31), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -644.31), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 621.95), SIMDE_FLOAT32_C( -23.06)) }, { UINT16_C(59609), simde_mm512_set_epi32(INT32_C( 2052878307), INT32_C( -856056848), INT32_C(-1218860495), INT32_C( 729621709), INT32_C(-1241407128), INT32_C( 696721321), INT32_C( -603523965), INT32_C( 1730687689), INT32_C( 290786615), INT32_C(-1827031380), INT32_C( 1429317129), INT32_C(-1800615955), INT32_C( -728999228), INT32_C( -788606428), INT32_C( -539592973), INT32_C(-1402526875)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -251.41), SIMDE_FLOAT32_C( 383.31), SIMDE_FLOAT32_C( 223.24), SIMDE_FLOAT32_C( 641.22), SIMDE_FLOAT32_C( -528.18), SIMDE_FLOAT32_C( -747.07), SIMDE_FLOAT32_C( 813.86), SIMDE_FLOAT32_C( -762.67), SIMDE_FLOAT32_C( 925.24), SIMDE_FLOAT32_C( 744.11), SIMDE_FLOAT32_C( 849.34), SIMDE_FLOAT32_C( 350.11), SIMDE_FLOAT32_C( 112.68), SIMDE_FLOAT32_C( 409.27), SIMDE_FLOAT32_C( -96.70), SIMDE_FLOAT32_C( 481.83)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 112.68), SIMDE_FLOAT32_C( 481.83), SIMDE_FLOAT32_C( -96.70), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -762.67), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 925.24), SIMDE_FLOAT32_C( 641.22), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 223.24), SIMDE_FLOAT32_C( 350.11), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 849.34)) }, { UINT16_C(40824), simde_mm512_set_epi32(INT32_C( 568974515), INT32_C(-1726442446), INT32_C(-2134949944), INT32_C( 1969107101), INT32_C(-2063427243), INT32_C( -670405092), INT32_C(-1879729053), INT32_C( 1035482990), INT32_C( 1183910939), INT32_C( 1515345934), INT32_C(-1884003639), INT32_C( -638430290), INT32_C(-2007622482), INT32_C( 171336877), INT32_C( 59553613), INT32_C( 165266600)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -710.54), SIMDE_FLOAT32_C( -653.71), SIMDE_FLOAT32_C( -140.51), SIMDE_FLOAT32_C( 371.73), SIMDE_FLOAT32_C( 346.36), SIMDE_FLOAT32_C( 757.18), SIMDE_FLOAT32_C( 318.14), SIMDE_FLOAT32_C( 214.84), SIMDE_FLOAT32_C( 399.19), SIMDE_FLOAT32_C( 830.24), SIMDE_FLOAT32_C( -291.83), SIMDE_FLOAT32_C( 903.53), SIMDE_FLOAT32_C( 951.57), SIMDE_FLOAT32_C( -831.08), SIMDE_FLOAT32_C( 272.52), SIMDE_FLOAT32_C( 815.07)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 951.57), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -140.51), SIMDE_FLOAT32_C( -291.83), SIMDE_FLOAT32_C( 371.73), SIMDE_FLOAT32_C( 951.57), SIMDE_FLOAT32_C( -653.71), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -653.71), SIMDE_FLOAT32_C( 318.14), SIMDE_FLOAT32_C( -653.71), SIMDE_FLOAT32_C( -653.71), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_maskz_permutexvar_ps(test_vec[i].k, test_vec[i].idx, test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm_permutexvar_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_permutexvar_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_permutexvar_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_permutexvar_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_permutexvar_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_permutexvar_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_permutexvar_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_permutexvar_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_permutexvar_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_permutexvar_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_permutexvar_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_permutexvar_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_permutexvar_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_permutexvar_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_permutexvar_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_permutexvar_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_permutexvar_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_permutexvar_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_permutexvar_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_permutexvar_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_permutexvar_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_permutexvar_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_permutexvar_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_permutexvar_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_permutexvar_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_permutexvar_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_permutexvar_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_permutexvar_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_permutexvar_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_permutexvar_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_permutexvar_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_permutexvar_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_permutexvar_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_permutexvar_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_permutexvar_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_permutexvar_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_permutexvar_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_permutexvar_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_permutexvar_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_permutexvar_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_permutexvar_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_permutexvar_ps) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/run-tests.c000066400000000000000000000021141400333146700173620ustar00rootroot00000000000000#include "test-avx512.h" #include "run-tests.h" static MunitSuite suites[] = { #define SIMDE_TEST_DECLARE_SUITE(name) \ { NULL, NULL, NULL, 1, MUNIT_SUITE_OPTION_NONE }, \ { NULL, NULL, NULL, 1, MUNIT_SUITE_OPTION_NONE }, \ { NULL, NULL, NULL, 1, MUNIT_SUITE_OPTION_NONE }, \ { NULL, NULL, NULL, 1, MUNIT_SUITE_OPTION_NONE }, #include "declare-suites.h" #undef SIMDE_TEST_DECLARE_SUITE { NULL, NULL, NULL, 0, MUNIT_SUITE_OPTION_NONE } }; static MunitSuite suite = { "/avx512", NULL, suites, 1, MUNIT_SUITE_OPTION_NONE }; MunitSuite* simde_tests_x86_avx512_get_suite(void) { int i = 0; #define SIMDE_TEST_DECLARE_SUITE(name) \ suites[i++] = *HEDLEY_CONCAT3(simde_test_x86_avx512_get_suite_, name, _native_c)(); \ suites[i++] = *HEDLEY_CONCAT3(simde_test_x86_avx512_get_suite_, name, _native_cpp)(); \ suites[i++] = *HEDLEY_CONCAT3(simde_test_x86_avx512_get_suite_, name, _emul_c)(); \ suites[i++] = *HEDLEY_CONCAT3(simde_test_x86_avx512_get_suite_, name, _emul_cpp)(); #include "declare-suites.h" #undef SIMDE_TEST_DECLARE_SUITE return &suite; } simde-0.7.2/test/x86/avx512/run-tests.h000066400000000000000000000003271400333146700173730ustar00rootroot00000000000000#if defined(SIMDE_TESTS_X86_AVX512_RUN_TESTS_H) #error File already included. #endif #define SIMDE_TESTS_X86_AVX512_RUN_TESTS_H #include "../../munit/munit.h" MunitSuite* simde_tests_x86_avx512_get_suite(void); simde-0.7.2/test/x86/avx512/sad.c000066400000000000000000000462541400333146700162020ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #define SIMDE_TEST_X86_AVX512_INSN sad #include #include static int test_simde_mm512_sad_epu8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const uint8_t a[64]; const uint8_t b[64]; const uint64_t r[8]; } test_vec[] = { { { UINT8_C( 88), UINT8_C(111), UINT8_C( 54), UINT8_C( 92), UINT8_C( 49), UINT8_C(210), UINT8_C( 24), UINT8_C( 5), UINT8_C( 48), UINT8_C( 13), UINT8_C(211), UINT8_C( 11), UINT8_C(190), UINT8_C( 85), UINT8_C(117), UINT8_C( 12), UINT8_C(184), UINT8_C( 19), UINT8_C(180), UINT8_C(185), UINT8_C( 42), UINT8_C( 26), UINT8_C(118), UINT8_C(165), UINT8_C(131), UINT8_C( 96), UINT8_C( 71), UINT8_C(113), UINT8_C(208), UINT8_C(125), UINT8_C( 93), UINT8_C( 40), UINT8_C(236), UINT8_C(147), UINT8_C(132), UINT8_C( 29), UINT8_C(101), UINT8_C(157), UINT8_C( 34), UINT8_C(149), UINT8_C(170), UINT8_C(245), UINT8_C(161), UINT8_C(105), UINT8_C( 75), UINT8_C( 22), UINT8_C(117), UINT8_C( 3), UINT8_C( 41), UINT8_C( 42), UINT8_C(188), UINT8_C( 83), UINT8_C( 68), UINT8_C( 51), UINT8_C(249), UINT8_C(200), UINT8_C(147), UINT8_C( 64), UINT8_C( 57), UINT8_C( 99), UINT8_C(189), UINT8_C(150), UINT8_C(139), UINT8_C(169) }, { UINT8_C( 42), UINT8_C( 16), UINT8_C(198), UINT8_C(143), UINT8_C(173), UINT8_C(233), UINT8_C( 37), UINT8_C( 87), UINT8_C(222), UINT8_C(198), UINT8_C(192), UINT8_C( 41), UINT8_C(220), UINT8_C( 54), UINT8_C( 44), UINT8_C( 5), UINT8_C( 96), UINT8_C(233), UINT8_C( 89), UINT8_C(164), UINT8_C( 28), UINT8_C( 82), UINT8_C(108), UINT8_C(175), UINT8_C(146), UINT8_C(165), UINT8_C( 19), UINT8_C( 79), UINT8_C( 60), UINT8_C(158), UINT8_C(248), UINT8_C(102), UINT8_C(174), UINT8_C(190), UINT8_C(245), UINT8_C( 91), UINT8_C(167), UINT8_C( 26), UINT8_C(179), UINT8_C(134), UINT8_C(224), UINT8_C(115), UINT8_C(175), UINT8_C(188), UINT8_C(169), UINT8_C(220), UINT8_C(194), UINT8_C( 9), UINT8_C(197), UINT8_C( 27), UINT8_C(174), UINT8_C(225), UINT8_C(109), UINT8_C( 26), UINT8_C(144), UINT8_MAX, UINT8_C(192), UINT8_C(163), UINT8_C( 78), UINT8_C(252), UINT8_C( 66), UINT8_C( 70), UINT8_C( 98), UINT8_C(240) }, { UINT64_C( 578), UINT64_C( 549), UINT64_C( 504), UINT64_C( 568), UINT64_C( 637), UINT64_C( 656), UINT64_C( 553), UINT64_C( 633) } }, { { UINT8_C( 4), UINT8_C( 87), UINT8_C( 76), UINT8_C(172), UINT8_C(114), UINT8_MAX, UINT8_C( 50), UINT8_C( 82), UINT8_C(114), UINT8_C(225), UINT8_C( 15), UINT8_C( 28), UINT8_C(189), UINT8_C(209), UINT8_C( 37), UINT8_C(130), UINT8_C(236), UINT8_C(211), UINT8_C( 99), UINT8_C( 89), UINT8_C(238), UINT8_C(244), UINT8_C( 88), UINT8_C(174), UINT8_C(151), UINT8_C(166), UINT8_C(170), UINT8_C(217), UINT8_C(236), UINT8_C( 12), UINT8_C(202), UINT8_C(240), UINT8_C( 99), UINT8_C( 22), UINT8_C(156), UINT8_C(213), UINT8_C( 21), UINT8_C(206), UINT8_C( 40), UINT8_C(135), UINT8_C(176), UINT8_C( 55), UINT8_C(163), UINT8_C(109), UINT8_C( 8), UINT8_C(201), UINT8_C(240), UINT8_C(244), UINT8_C(156), UINT8_C( 83), UINT8_C( 77), UINT8_C(138), UINT8_C( 71), UINT8_C(165), UINT8_C( 56), UINT8_C(223), UINT8_C( 75), UINT8_C(226), UINT8_C(184), UINT8_C( 55), UINT8_C(238), UINT8_C(130), UINT8_C( 39), UINT8_C( 82) }, { UINT8_C(152), UINT8_C(196), UINT8_C( 39), UINT8_C(173), UINT8_C(146), UINT8_C( 79), UINT8_C( 53), UINT8_C( 66), UINT8_C(134), UINT8_C(216), UINT8_C(176), UINT8_C(142), UINT8_C(161), UINT8_C(160), UINT8_C(130), UINT8_C( 62), UINT8_C(243), UINT8_C(207), UINT8_C(200), UINT8_C( 59), UINT8_C(116), UINT8_C( 1), UINT8_C( 26), UINT8_C(191), UINT8_C(227), UINT8_C(210), UINT8_C(246), UINT8_C(210), UINT8_C( 85), UINT8_C( 30), UINT8_C( 36), UINT8_C(237), UINT8_C(226), UINT8_C( 75), UINT8_C(155), UINT8_C(116), UINT8_C(155), UINT8_C(208), UINT8_C(183), UINT8_C( 33), UINT8_C(168), UINT8_C(103), UINT8_C(176), UINT8_C( 74), UINT8_C( 7), UINT8_C( 50), UINT8_C(136), UINT8_C(250), UINT8_C( 2), UINT8_C( 80), UINT8_C( 53), UINT8_C(118), UINT8_C( 81), UINT8_C( 79), UINT8_C( 54), UINT8_C( 53), UINT8_C( 34), UINT8_C( 44), UINT8_C( 7), UINT8_C(119), UINT8_C( 74), UINT8_C( 43), UINT8_C(100), UINT8_C( 44) }, { UINT64_C( 522), UINT64_C( 542), UINT64_C( 586), UINT64_C( 541), UINT64_C( 659), UINT64_C( 366), UINT64_C( 469), UINT64_C( 814) } }, { { UINT8_C(118), UINT8_MAX, UINT8_C(161), UINT8_C( 17), UINT8_C(207), UINT8_C( 88), UINT8_C( 51), UINT8_C(120), UINT8_C(191), UINT8_C(227), UINT8_C(194), UINT8_C(198), UINT8_C( 21), UINT8_C( 74), UINT8_C(192), UINT8_C( 23), UINT8_C(154), UINT8_C(246), UINT8_C(142), UINT8_C(236), UINT8_C( 69), UINT8_C(196), UINT8_C( 33), UINT8_C(103), UINT8_C(240), UINT8_C( 40), UINT8_C(222), UINT8_C( 59), UINT8_C( 83), UINT8_C( 67), UINT8_C(103), UINT8_C(201), UINT8_C( 66), UINT8_C( 8), UINT8_C(219), UINT8_C( 18), UINT8_C( 96), UINT8_C( 14), UINT8_C(138), UINT8_C( 31), UINT8_C(241), UINT8_C( 76), UINT8_C(229), UINT8_C( 6), UINT8_C(150), UINT8_C(166), UINT8_C( 30), UINT8_C( 48), UINT8_C(156), UINT8_C(172), UINT8_C( 28), UINT8_C(225), UINT8_C(112), UINT8_C( 61), UINT8_C( 73), UINT8_C( 96), UINT8_C(101), UINT8_C( 39), UINT8_C(155), UINT8_C(184), UINT8_C(106), UINT8_C( 3), UINT8_C(130), UINT8_C(173) }, { UINT8_C( 11), UINT8_C( 93), UINT8_C(191), UINT8_C(108), UINT8_C(107), UINT8_C( 73), UINT8_C(139), UINT8_C( 92), UINT8_C(149), UINT8_C(113), UINT8_C( 98), UINT8_C( 43), UINT8_C( 23), UINT8_C(128), UINT8_C( 91), UINT8_C(179), UINT8_C( 44), UINT8_C(120), UINT8_C(148), UINT8_C(156), UINT8_C(181), UINT8_C(221), UINT8_C(253), UINT8_C( 27), UINT8_C( 5), UINT8_C(152), UINT8_C(211), UINT8_C(111), UINT8_C(155), UINT8_C( 85), UINT8_C( 28), UINT8_C(167), UINT8_C(178), UINT8_C(219), UINT8_C( 19), UINT8_C( 29), UINT8_C( 36), UINT8_C(158), UINT8_C(121), UINT8_C(185), UINT8_C( 15), UINT8_C(220), UINT8_C(228), UINT8_C( 38), UINT8_C( 92), UINT8_C( 64), UINT8_C(217), UINT8_C(137), UINT8_C(184), UINT8_C(110), UINT8_C( 37), UINT8_C(109), UINT8_C( 75), UINT8_C( 34), UINT8_C(136), UINT8_C( 80), UINT8_C(187), UINT8_C( 92), UINT8_C(192), UINT8_C( 86), UINT8_C(177), UINT8_C(220), UINT8_C(253), UINT8_C(100) }, { UINT64_C( 621), UINT64_C( 720), UINT64_C( 755), UINT64_C( 609), UINT64_C( 909), UINT64_C( 839), UINT64_C( 358), UINT64_C( 758) } }, { { UINT8_C(184), UINT8_C( 16), UINT8_C(129), UINT8_C(220), UINT8_C(175), UINT8_C(251), UINT8_C(150), UINT8_C(190), UINT8_C(215), UINT8_C(122), UINT8_C(229), UINT8_C( 51), UINT8_C(186), UINT8_C(190), UINT8_C(188), UINT8_C(114), UINT8_C( 44), UINT8_C(226), UINT8_C(224), UINT8_C(120), UINT8_C( 4), UINT8_C(104), UINT8_C(200), UINT8_C(191), UINT8_C(196), UINT8_C(136), UINT8_C( 22), UINT8_C(118), UINT8_C(101), UINT8_C( 19), UINT8_C(218), UINT8_C( 29), UINT8_C( 36), UINT8_C( 91), UINT8_C(249), UINT8_C(211), UINT8_C( 86), UINT8_C(143), UINT8_C(145), UINT8_C( 45), UINT8_C( 10), UINT8_C(118), UINT8_C( 97), UINT8_C(196), UINT8_C( 53), UINT8_C( 29), UINT8_C( 55), UINT8_C( 97), UINT8_MAX, UINT8_C( 23), UINT8_C(217), UINT8_C( 4), UINT8_C(127), UINT8_C(162), UINT8_C(195), UINT8_C( 68), UINT8_C( 42), UINT8_C(217), UINT8_C(186), UINT8_C(143), UINT8_C(237), UINT8_C(148), UINT8_C(172), UINT8_C( 17) }, { UINT8_C(239), UINT8_C(166), UINT8_C(228), UINT8_C( 70), UINT8_C( 53), UINT8_C(117), UINT8_C(115), UINT8_C( 63), UINT8_C(236), UINT8_C(212), UINT8_C( 4), UINT8_C( 33), UINT8_C(242), UINT8_C( 59), UINT8_C(130), UINT8_C(241), UINT8_C( 82), UINT8_C( 92), UINT8_C(245), UINT8_C(209), UINT8_C(254), UINT8_C(185), UINT8_C( 21), UINT8_C( 40), UINT8_C(146), UINT8_C(207), UINT8_C(184), UINT8_C(127), UINT8_C( 99), UINT8_C(100), UINT8_C(144), UINT8_C( 83), UINT8_C( 10), UINT8_C(116), UINT8_C(153), UINT8_C( 64), UINT8_C(234), UINT8_C( 12), UINT8_C(127), UINT8_C(214), UINT8_C(225), UINT8_C(131), UINT8_C(247), UINT8_C(211), UINT8_C(190), UINT8_C(121), UINT8_C(196), UINT8_C( 16), UINT8_C(213), UINT8_C(186), UINT8_C(226), UINT8_C(211), UINT8_C(115), UINT8_C(247), UINT8_C(252), UINT8_C( 5), UINT8_C(199), UINT8_C(180), UINT8_C(133), UINT8_C( 42), UINT8_C( 24), UINT8_C( 21), UINT8_C(125), UINT8_C( 35) }, { UINT64_C( 872), UINT64_C( 726), UINT64_C( 943), UINT64_C( 503), UINT64_C( 760), UINT64_C( 844), UINT64_C( 638), UINT64_C( 753) } }, { { UINT8_C(138), UINT8_C( 22), UINT8_C( 99), UINT8_C(116), UINT8_C( 35), UINT8_C(226), UINT8_C( 74), UINT8_C( 4), UINT8_C(102), UINT8_C( 65), UINT8_C(215), UINT8_C( 36), UINT8_C(186), UINT8_C(155), UINT8_C( 53), UINT8_C(144), UINT8_C( 85), UINT8_C( 23), UINT8_C( 99), UINT8_C(200), UINT8_C( 14), UINT8_C( 95), UINT8_C(206), UINT8_C(213), UINT8_C( 19), UINT8_C( 83), UINT8_C( 0), UINT8_C( 44), UINT8_C(104), UINT8_C(125), UINT8_C( 79), UINT8_C(242), UINT8_C(148), UINT8_C(178), UINT8_C(102), UINT8_C(183), UINT8_C(148), UINT8_C(176), UINT8_C(187), UINT8_C(250), UINT8_C(241), UINT8_C(146), UINT8_C( 31), UINT8_C(172), UINT8_C( 45), UINT8_C( 84), UINT8_C( 60), UINT8_C(131), UINT8_C(107), UINT8_C(159), UINT8_C( 75), UINT8_C(121), UINT8_MAX, UINT8_C( 25), UINT8_C( 79), UINT8_C( 18), UINT8_C(108), UINT8_C( 79), UINT8_C( 62), UINT8_C(213), UINT8_C(204), UINT8_C(141), UINT8_C(199), UINT8_C( 96) }, { UINT8_C( 63), UINT8_C( 46), UINT8_C( 23), UINT8_C(212), UINT8_C(222), UINT8_C(210), UINT8_C(206), UINT8_C(208), UINT8_C(100), UINT8_C(237), UINT8_C(124), UINT8_C(146), UINT8_C( 65), UINT8_C(184), UINT8_C( 21), UINT8_C(172), UINT8_C( 87), UINT8_C( 96), UINT8_C( 38), UINT8_C( 86), UINT8_C(122), UINT8_C(117), UINT8_C(105), UINT8_C(230), UINT8_C(196), UINT8_C(167), UINT8_C(187), UINT8_C(144), UINT8_C( 53), UINT8_C(131), UINT8_C(241), UINT8_C(116), UINT8_C(177), UINT8_C( 8), UINT8_C( 72), UINT8_C(143), UINT8_C(219), UINT8_C( 23), UINT8_C( 95), UINT8_C( 63), UINT8_C( 4), UINT8_C(219), UINT8_C(209), UINT8_C( 70), UINT8_C(147), UINT8_C(230), UINT8_C(242), UINT8_C(235), UINT8_C( 71), UINT8_C( 24), UINT8_C( 65), UINT8_C(193), UINT8_C(141), UINT8_C(170), UINT8_C(167), UINT8_C( 81), UINT8_C( 82), UINT8_C( 99), UINT8_C(226), UINT8_C(135), UINT8_C(230), UINT8_C(211), UINT8_C(251), UINT8_C(151) }, { UINT64_C( 810), UINT64_C( 585), UINT64_C( 498), UINT64_C( 893), UINT64_C( 772), UINT64_C( 1124), UINT64_C( 663), UINT64_C( 491) } }, { { UINT8_C(219), UINT8_C( 68), UINT8_C( 38), UINT8_C(182), UINT8_C( 91), UINT8_C(134), UINT8_C(246), UINT8_C( 95), UINT8_C( 97), UINT8_C(199), UINT8_C(165), UINT8_C(245), UINT8_C(174), UINT8_C(152), UINT8_C(224), UINT8_C(245), UINT8_C(176), UINT8_C( 33), UINT8_C(182), UINT8_C( 62), UINT8_C(204), UINT8_C( 93), UINT8_C(143), UINT8_C( 30), UINT8_C(192), UINT8_C(113), UINT8_C(165), UINT8_C(166), UINT8_C( 68), UINT8_C(160), UINT8_C( 61), UINT8_C( 32), UINT8_C(228), UINT8_C(100), UINT8_C(214), UINT8_C( 63), UINT8_C(234), UINT8_C(204), UINT8_C(159), UINT8_C( 75), UINT8_C(148), UINT8_C( 68), UINT8_C( 64), UINT8_C( 66), UINT8_C(220), UINT8_C( 32), UINT8_C( 55), UINT8_C(141), UINT8_C( 66), UINT8_C(237), UINT8_C(203), UINT8_C( 14), UINT8_C( 74), UINT8_C( 90), UINT8_C( 44), UINT8_C( 11), UINT8_C(204), UINT8_C(209), UINT8_C(177), UINT8_C( 16), UINT8_C(113), UINT8_C(239), UINT8_C( 48), UINT8_C( 86) }, { UINT8_C( 83), UINT8_C( 7), UINT8_C(149), UINT8_C( 61), UINT8_C(211), UINT8_C( 52), UINT8_C(136), UINT8_C(103), UINT8_C(121), UINT8_C(201), UINT8_C(169), UINT8_C( 85), UINT8_C(233), UINT8_C(224), UINT8_C(226), UINT8_C( 43), UINT8_C(205), UINT8_C(173), UINT8_C( 57), UINT8_C( 24), UINT8_C( 8), UINT8_C(101), UINT8_C( 35), UINT8_C(212), UINT8_C( 54), UINT8_C(212), UINT8_C(228), UINT8_C(168), UINT8_C(195), UINT8_C( 21), UINT8_C(254), UINT8_C( 22), UINT8_C( 28), UINT8_C(147), UINT8_C( 83), UINT8_C(239), UINT8_C(200), UINT8_C(220), UINT8_C( 87), UINT8_C( 65), UINT8_C(165), UINT8_C( 0), UINT8_C(150), UINT8_C(142), UINT8_C(225), UINT8_C(121), UINT8_C(186), UINT8_C(174), UINT8_C( 38), UINT8_C(243), UINT8_C(198), UINT8_C( 46), UINT8_C( 89), UINT8_C(233), UINT8_C( 2), UINT8_C(143), UINT8_C(190), UINT8_C(231), UINT8_C( 55), UINT8_C(129), UINT8_C(252), UINT8_C( 53), UINT8_C(152), UINT8_C( 24) }, { UINT64_C( 749), UINT64_C( 525), UINT64_C( 826), UINT64_C( 771), UINT64_C( 686), UINT64_C( 505), UINT64_C( 403), UINT64_C( 762) } }, { { UINT8_C(201), UINT8_C(235), UINT8_C( 7), UINT8_C(145), UINT8_C(199), UINT8_C( 94), UINT8_C(210), UINT8_C(108), UINT8_C( 95), UINT8_C(104), UINT8_C(251), UINT8_C( 64), UINT8_C(225), UINT8_C(181), UINT8_C(238), UINT8_C( 8), UINT8_C(168), UINT8_C(181), UINT8_C( 54), UINT8_C( 1), UINT8_C(158), UINT8_C( 57), UINT8_C(145), UINT8_C( 92), UINT8_C( 32), UINT8_C(200), UINT8_C(222), UINT8_C( 28), UINT8_C(254), UINT8_C(118), UINT8_C( 52), UINT8_C(199), UINT8_C( 97), UINT8_C( 59), UINT8_C( 88), UINT8_C( 41), UINT8_C(154), UINT8_C( 42), UINT8_C(149), UINT8_C(249), UINT8_C(146), UINT8_C(144), UINT8_C( 57), UINT8_C(116), UINT8_C( 69), UINT8_C( 39), UINT8_C(124), UINT8_C(238), UINT8_C(220), UINT8_C(178), UINT8_C(239), UINT8_C(123), UINT8_C(235), UINT8_C(128), UINT8_C(215), UINT8_C( 11), UINT8_C( 73), UINT8_C(181), UINT8_C( 39), UINT8_C( 71), UINT8_C( 43), UINT8_C( 91), UINT8_C( 14), UINT8_C(141) }, { UINT8_C(151), UINT8_C(102), UINT8_C(182), UINT8_C( 49), UINT8_C(144), UINT8_C( 75), UINT8_C( 42), UINT8_C( 34), UINT8_C(220), UINT8_C( 99), UINT8_C(150), UINT8_C( 33), UINT8_C(138), UINT8_C( 18), UINT8_C( 15), UINT8_C(103), UINT8_C(197), UINT8_MAX, UINT8_C(226), UINT8_C(176), UINT8_C(127), UINT8_C(185), UINT8_C(188), UINT8_C(200), UINT8_C(111), UINT8_C(227), UINT8_C( 15), UINT8_C(154), UINT8_C( 63), UINT8_C( 29), UINT8_C( 39), UINT8_C(214), UINT8_C(131), UINT8_C(221), UINT8_C( 7), UINT8_C( 19), UINT8_C( 41), UINT8_C( 49), UINT8_C( 54), UINT8_C( 5), UINT8_C(148), UINT8_C(204), UINT8_C( 38), UINT8_C( 30), UINT8_C(223), UINT8_C( 54), UINT8_C(133), UINT8_C(164), UINT8_C( 53), UINT8_C(103), UINT8_C( 84), UINT8_C(180), UINT8_C( 33), UINT8_C( 16), UINT8_C(125), UINT8_C(144), UINT8_C(244), UINT8_C(140), UINT8_C( 42), UINT8_C( 51), UINT8_C(170), UINT8_C( 82), UINT8_C( 9), UINT8_C( 45) }, { UINT64_C( 770), UINT64_C( 830), UINT64_C( 760), UINT64_C( 747), UINT64_C( 758), UINT64_C( 419), UINT64_C( 991), UINT64_C( 472) } }, { { UINT8_C( 47), UINT8_C( 16), UINT8_C( 65), UINT8_C( 88), UINT8_C( 65), UINT8_C(119), UINT8_C( 93), UINT8_C(213), UINT8_C( 67), UINT8_C(132), UINT8_C(243), UINT8_C( 34), UINT8_C(186), UINT8_C(121), UINT8_C(198), UINT8_C(239), UINT8_C(224), UINT8_C( 27), UINT8_C(163), UINT8_C( 1), UINT8_C( 43), UINT8_C( 32), UINT8_C(145), UINT8_C( 31), UINT8_C(173), UINT8_C(188), UINT8_C( 82), UINT8_C( 87), UINT8_C( 14), UINT8_C( 91), UINT8_C(132), UINT8_C( 61), UINT8_C(107), UINT8_C(197), UINT8_C(150), UINT8_C(172), UINT8_C( 60), UINT8_C(243), UINT8_C(129), UINT8_C(128), UINT8_C(119), UINT8_C(117), UINT8_C(162), UINT8_C( 49), UINT8_C(238), UINT8_C(105), UINT8_C( 32), UINT8_C(206), UINT8_C(132), UINT8_C(196), UINT8_C(208), UINT8_C(175), UINT8_C(228), UINT8_C( 97), UINT8_C(207), UINT8_C(145), UINT8_C( 29), UINT8_C( 33), UINT8_C(232), UINT8_C( 43), UINT8_C(125), UINT8_C(109), UINT8_C(105), UINT8_C(232) }, { UINT8_C( 50), UINT8_MAX, UINT8_C(149), UINT8_C(111), UINT8_C(242), UINT8_C( 22), UINT8_C(239), UINT8_C(106), UINT8_C(139), UINT8_C(145), UINT8_C(155), UINT8_C(121), UINT8_C(250), UINT8_C(188), UINT8_C( 72), UINT8_C(126), UINT8_C(128), UINT8_C( 24), UINT8_C( 46), UINT8_C(100), UINT8_C(121), UINT8_C(253), UINT8_C(246), UINT8_C(151), UINT8_C( 30), UINT8_C(222), UINT8_C(194), UINT8_C(155), UINT8_C( 75), UINT8_C( 43), UINT8_C(132), UINT8_C(126), UINT8_C( 42), UINT8_C( 25), UINT8_C(237), UINT8_C( 29), UINT8_C( 47), UINT8_C(220), UINT8_C(135), UINT8_C(187), UINT8_C(109), UINT8_C( 34), UINT8_C( 52), UINT8_C(104), UINT8_C(222), UINT8_C(124), UINT8_C(230), UINT8_C( 94), UINT8_C(148), UINT8_C( 20), UINT8_C(195), UINT8_C( 14), UINT8_C( 17), UINT8_C(185), UINT8_C(165), UINT8_C( 48), UINT8_C(151), UINT8_C(103), UINT8_C(203), UINT8_C(227), UINT8_C(147), UINT8_C( 79), UINT8_C( 97), UINT8_C(189) }, { UINT64_C( 876), UINT64_C( 630), UINT64_C( 835), UINT64_C( 531), UINT64_C( 568), UINT64_C( 603), UINT64_C( 804), UINT64_C( 508) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_sad_epu8(a, b); simde_test_x86_assert_equal_u64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_sad_epu8) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/set.c000066400000000000000000001503231400333146700162170ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #define SIMDE_TEST_X86_AVX512_INSN set #include #include static int test_simde_mm512_set_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t a[64]; const int8_t r[64]; } test_vec[] = { { { INT8_C( 83), INT8_C( 58), INT8_C( 10), INT8_C( 67), -INT8_C( 56), -INT8_C( 81), -INT8_C( 100), INT8_C( 82), -INT8_C( 7), INT8_C( 93), -INT8_C( 108), -INT8_C( 84), -INT8_C( 80), -INT8_C( 17), -INT8_C( 12), INT8_C( 15), INT8_MIN, INT8_C( 43), -INT8_C( 48), -INT8_C( 20), INT8_C( 126), -INT8_C( 118), INT8_C( 4), INT8_C( 113), INT8_C( 4), INT8_C( 115), INT8_C( 93), INT8_C( 20), -INT8_C( 25), INT8_C( 42), -INT8_C( 91), INT8_C( 58), INT8_C( 100), -INT8_C( 81), INT8_C( 126), INT8_C( 44), INT8_C( 95), INT8_C( 26), INT8_C( 126), INT8_C( 88), INT8_C( 119), INT8_C( 19), INT8_C( 4), INT8_C( 39), INT8_C( 2), -INT8_C( 8), INT8_C( 54), -INT8_C( 126), INT8_C( 36), INT8_C( 7), INT8_C( 110), -INT8_C( 94), -INT8_C( 111), INT8_C( 114), INT8_C( 19), -INT8_C( 107), -INT8_C( 27), INT8_C( 112), -INT8_C( 86), -INT8_C( 52), -INT8_C( 102), INT8_C( 79), INT8_C( 7), -INT8_C( 2) }, { INT8_C( 83), INT8_C( 58), INT8_C( 10), INT8_C( 67), -INT8_C( 56), -INT8_C( 81), -INT8_C( 100), INT8_C( 82), -INT8_C( 7), INT8_C( 93), -INT8_C( 108), -INT8_C( 84), -INT8_C( 80), -INT8_C( 17), -INT8_C( 12), INT8_C( 15), INT8_MIN, INT8_C( 43), -INT8_C( 48), -INT8_C( 20), INT8_C( 126), -INT8_C( 118), INT8_C( 4), INT8_C( 113), INT8_C( 4), INT8_C( 115), INT8_C( 93), INT8_C( 20), -INT8_C( 25), INT8_C( 42), -INT8_C( 91), INT8_C( 58), INT8_C( 100), -INT8_C( 81), INT8_C( 126), INT8_C( 44), INT8_C( 95), INT8_C( 26), INT8_C( 126), INT8_C( 88), INT8_C( 119), INT8_C( 19), INT8_C( 4), INT8_C( 39), INT8_C( 2), -INT8_C( 8), INT8_C( 54), -INT8_C( 126), INT8_C( 36), INT8_C( 7), INT8_C( 110), -INT8_C( 94), -INT8_C( 111), INT8_C( 114), INT8_C( 19), -INT8_C( 107), -INT8_C( 27), INT8_C( 112), -INT8_C( 86), -INT8_C( 52), -INT8_C( 102), INT8_C( 79), INT8_C( 7), -INT8_C( 2) } }, { { -INT8_C( 1), -INT8_C( 123), INT8_C( 42), INT8_C( 94), -INT8_C( 97), -INT8_C( 87), -INT8_C( 74), INT8_C( 22), -INT8_C( 68), -INT8_C( 70), INT8_C( 61), -INT8_C( 66), -INT8_C( 78), INT8_C( 115), INT8_C( 64), -INT8_C( 42), INT8_C( 122), -INT8_C( 82), INT8_C( 120), INT8_C( 12), INT8_C( 32), -INT8_C( 116), -INT8_C( 95), INT8_C( 6), -INT8_C( 4), INT8_C( 75), -INT8_C( 46), -INT8_C( 105), -INT8_C( 101), -INT8_C( 39), -INT8_C( 107), -INT8_C( 102), INT8_C( 94), -INT8_C( 64), -INT8_C( 8), -INT8_C( 3), INT8_C( 105), -INT8_C( 82), INT8_C( 19), INT8_C( 37), INT8_C( 104), INT8_C( 80), -INT8_C( 29), INT8_C( 26), -INT8_C( 60), INT8_C( 35), -INT8_C( 15), INT8_C( 62), -INT8_C( 47), INT8_C( 105), INT8_C( 74), -INT8_C( 15), -INT8_C( 11), -INT8_C( 20), -INT8_C( 9), -INT8_C( 14), INT8_C( 55), -INT8_C( 54), -INT8_C( 119), -INT8_C( 46), -INT8_C( 93), INT8_C( 30), INT8_C( 108), INT8_C( 2) }, { -INT8_C( 1), -INT8_C( 123), INT8_C( 42), INT8_C( 94), -INT8_C( 97), -INT8_C( 87), -INT8_C( 74), INT8_C( 22), -INT8_C( 68), -INT8_C( 70), INT8_C( 61), -INT8_C( 66), -INT8_C( 78), INT8_C( 115), INT8_C( 64), -INT8_C( 42), INT8_C( 122), -INT8_C( 82), INT8_C( 120), INT8_C( 12), INT8_C( 32), -INT8_C( 116), -INT8_C( 95), INT8_C( 6), -INT8_C( 4), INT8_C( 75), -INT8_C( 46), -INT8_C( 105), -INT8_C( 101), -INT8_C( 39), -INT8_C( 107), -INT8_C( 102), INT8_C( 94), -INT8_C( 64), -INT8_C( 8), -INT8_C( 3), INT8_C( 105), -INT8_C( 82), INT8_C( 19), INT8_C( 37), INT8_C( 104), INT8_C( 80), -INT8_C( 29), INT8_C( 26), -INT8_C( 60), INT8_C( 35), -INT8_C( 15), INT8_C( 62), -INT8_C( 47), INT8_C( 105), INT8_C( 74), -INT8_C( 15), -INT8_C( 11), -INT8_C( 20), -INT8_C( 9), -INT8_C( 14), INT8_C( 55), -INT8_C( 54), -INT8_C( 119), -INT8_C( 46), -INT8_C( 93), INT8_C( 30), INT8_C( 108), INT8_C( 2) } }, { { -INT8_C( 34), INT8_C( 100), -INT8_C( 1), INT8_C( 71), INT8_C( 18), INT8_C( 19), INT8_C( 108), INT8_C( 122), INT8_C( 99), INT8_C( 79), -INT8_C( 107), INT8_C( 39), INT8_C( 114), -INT8_C( 122), INT8_C( 102), INT8_C( 67), -INT8_C( 17), -INT8_C( 80), INT8_C( 53), -INT8_C( 27), -INT8_C( 100), INT8_C( 44), -INT8_C( 41), -INT8_C( 44), -INT8_C( 10), INT8_C( 96), -INT8_C( 90), -INT8_C( 102), INT8_C( 126), INT8_C( 19), -INT8_C( 100), INT8_C( 93), INT8_C( 119), -INT8_C( 101), -INT8_C( 92), -INT8_C( 118), -INT8_C( 82), INT8_C( 17), INT8_C( 4), INT8_C( 18), INT8_C( 96), -INT8_C( 103), INT8_C( 57), -INT8_C( 45), INT8_C( 31), -INT8_C( 97), INT8_C( 22), INT8_C( 15), INT8_C( 80), INT8_C( 75), -INT8_C( 12), -INT8_C( 20), INT8_C( 120), -INT8_C( 53), -INT8_C( 64), INT8_C( 110), INT8_C( 43), INT8_C( 103), INT8_C( 8), -INT8_C( 87), INT8_C( 122), -INT8_C( 92), INT8_C( 6), -INT8_C( 15) }, { -INT8_C( 34), INT8_C( 100), -INT8_C( 1), INT8_C( 71), INT8_C( 18), INT8_C( 19), INT8_C( 108), INT8_C( 122), INT8_C( 99), INT8_C( 79), -INT8_C( 107), INT8_C( 39), INT8_C( 114), -INT8_C( 122), INT8_C( 102), INT8_C( 67), -INT8_C( 17), -INT8_C( 80), INT8_C( 53), -INT8_C( 27), -INT8_C( 100), INT8_C( 44), -INT8_C( 41), -INT8_C( 44), -INT8_C( 10), INT8_C( 96), -INT8_C( 90), -INT8_C( 102), INT8_C( 126), INT8_C( 19), -INT8_C( 100), INT8_C( 93), INT8_C( 119), -INT8_C( 101), -INT8_C( 92), -INT8_C( 118), -INT8_C( 82), INT8_C( 17), INT8_C( 4), INT8_C( 18), INT8_C( 96), -INT8_C( 103), INT8_C( 57), -INT8_C( 45), INT8_C( 31), -INT8_C( 97), INT8_C( 22), INT8_C( 15), INT8_C( 80), INT8_C( 75), -INT8_C( 12), -INT8_C( 20), INT8_C( 120), -INT8_C( 53), -INT8_C( 64), INT8_C( 110), INT8_C( 43), INT8_C( 103), INT8_C( 8), -INT8_C( 87), INT8_C( 122), -INT8_C( 92), INT8_C( 6), -INT8_C( 15) } }, { { INT8_C( 64), -INT8_C( 85), INT8_C( 123), -INT8_C( 18), -INT8_C( 68), INT8_MIN, INT8_C( 0), INT8_C( 28), INT8_C( 25), INT8_C( 58), -INT8_C( 17), INT8_C( 57), -INT8_C( 39), INT8_C( 6), INT8_C( 72), INT8_C( 41), INT8_C( 81), INT8_C( 60), INT8_C( 22), -INT8_C( 55), INT8_C( 7), -INT8_C( 42), INT8_C( 56), INT8_C( 50), INT8_C( 61), INT8_C( 64), -INT8_C( 37), -INT8_C( 73), -INT8_C( 27), -INT8_C( 30), -INT8_C( 87), INT8_C( 37), -INT8_C( 115), INT8_C( 36), INT8_C( 19), INT8_C( 73), -INT8_C( 92), INT8_C( 20), INT8_C( 101), -INT8_C( 66), INT8_C( 78), INT8_C( 85), -INT8_C( 9), INT8_C( 39), INT8_C( 91), INT8_C( 63), INT8_C( 81), -INT8_C( 84), INT8_C( 123), INT8_C( 103), INT8_C( 118), -INT8_C( 126), INT8_C( 61), -INT8_C( 82), -INT8_C( 76), INT8_C( 123), -INT8_C( 18), -INT8_C( 113), INT8_C( 50), -INT8_C( 45), INT8_C( 113), -INT8_C( 37), -INT8_C( 8), -INT8_C( 2) }, { INT8_C( 64), -INT8_C( 85), INT8_C( 123), -INT8_C( 18), -INT8_C( 68), INT8_MIN, INT8_C( 0), INT8_C( 28), INT8_C( 25), INT8_C( 58), -INT8_C( 17), INT8_C( 57), -INT8_C( 39), INT8_C( 6), INT8_C( 72), INT8_C( 41), INT8_C( 81), INT8_C( 60), INT8_C( 22), -INT8_C( 55), INT8_C( 7), -INT8_C( 42), INT8_C( 56), INT8_C( 50), INT8_C( 61), INT8_C( 64), -INT8_C( 37), -INT8_C( 73), -INT8_C( 27), -INT8_C( 30), -INT8_C( 87), INT8_C( 37), -INT8_C( 115), INT8_C( 36), INT8_C( 19), INT8_C( 73), -INT8_C( 92), INT8_C( 20), INT8_C( 101), -INT8_C( 66), INT8_C( 78), INT8_C( 85), -INT8_C( 9), INT8_C( 39), INT8_C( 91), INT8_C( 63), INT8_C( 81), -INT8_C( 84), INT8_C( 123), INT8_C( 103), INT8_C( 118), -INT8_C( 126), INT8_C( 61), -INT8_C( 82), -INT8_C( 76), INT8_C( 123), -INT8_C( 18), -INT8_C( 113), INT8_C( 50), -INT8_C( 45), INT8_C( 113), -INT8_C( 37), -INT8_C( 8), -INT8_C( 2) } }, { { INT8_C( 0), INT8_C( 12), INT8_C( 71), -INT8_C( 92), INT8_C( 32), -INT8_C( 83), INT8_C( 98), INT8_C( 110), INT8_C( 2), INT8_C( 89), -INT8_C( 107), INT8_C( 93), -INT8_C( 104), -INT8_C( 26), INT8_C( 9), INT8_C( 19), INT8_C( 77), INT8_MAX, -INT8_C( 107), -INT8_C( 117), INT8_C( 45), INT8_C( 73), INT8_C( 6), INT8_C( 28), -INT8_C( 39), INT8_C( 56), -INT8_C( 17), INT8_C( 74), INT8_C( 20), -INT8_C( 24), INT8_C( 73), INT8_C( 20), -INT8_C( 12), -INT8_C( 112), -INT8_C( 72), INT8_C( 20), INT8_C( 61), INT8_C( 27), -INT8_C( 126), INT8_C( 63), INT8_C( 116), INT8_C( 23), -INT8_C( 100), INT8_C( 13), -INT8_C( 2), -INT8_C( 90), INT8_C( 32), INT8_C( 75), INT8_C( 37), -INT8_C( 74), -INT8_C( 42), INT8_C( 83), -INT8_C( 1), -INT8_C( 36), INT8_C( 111), -INT8_C( 40), INT8_C( 21), INT8_C( 94), INT8_C( 35), INT8_C( 41), INT8_C( 70), INT8_C( 108), INT8_C( 61), INT8_C( 58) }, { INT8_C( 0), INT8_C( 12), INT8_C( 71), -INT8_C( 92), INT8_C( 32), -INT8_C( 83), INT8_C( 98), INT8_C( 110), INT8_C( 2), INT8_C( 89), -INT8_C( 107), INT8_C( 93), -INT8_C( 104), -INT8_C( 26), INT8_C( 9), INT8_C( 19), INT8_C( 77), INT8_MAX, -INT8_C( 107), -INT8_C( 117), INT8_C( 45), INT8_C( 73), INT8_C( 6), INT8_C( 28), -INT8_C( 39), INT8_C( 56), -INT8_C( 17), INT8_C( 74), INT8_C( 20), -INT8_C( 24), INT8_C( 73), INT8_C( 20), -INT8_C( 12), -INT8_C( 112), -INT8_C( 72), INT8_C( 20), INT8_C( 61), INT8_C( 27), -INT8_C( 126), INT8_C( 63), INT8_C( 116), INT8_C( 23), -INT8_C( 100), INT8_C( 13), -INT8_C( 2), -INT8_C( 90), INT8_C( 32), INT8_C( 75), INT8_C( 37), -INT8_C( 74), -INT8_C( 42), INT8_C( 83), -INT8_C( 1), -INT8_C( 36), INT8_C( 111), -INT8_C( 40), INT8_C( 21), INT8_C( 94), INT8_C( 35), INT8_C( 41), INT8_C( 70), INT8_C( 108), INT8_C( 61), INT8_C( 58) } }, { { -INT8_C( 4), -INT8_C( 11), INT8_C( 78), INT8_C( 58), INT8_C( 16), -INT8_C( 48), INT8_C( 121), -INT8_C( 123), -INT8_C( 24), INT8_C( 22), -INT8_C( 110), -INT8_C( 26), -INT8_C( 68), -INT8_C( 78), INT8_C( 49), -INT8_C( 31), INT8_C( 104), INT8_C( 8), INT8_C( 52), INT8_C( 104), -INT8_C( 28), -INT8_C( 93), INT8_C( 64), -INT8_C( 7), INT8_C( 2), INT8_C( 99), INT8_C( 34), INT8_C( 72), -INT8_C( 49), INT8_C( 95), -INT8_C( 125), -INT8_C( 52), INT8_C( 85), -INT8_C( 47), INT8_C( 6), INT8_C( 101), -INT8_C( 94), INT8_MAX, -INT8_C( 22), -INT8_C( 118), -INT8_C( 107), INT8_C( 124), INT8_C( 112), INT8_C( 81), INT8_C( 47), -INT8_C( 95), INT8_C( 51), -INT8_C( 105), -INT8_C( 87), INT8_C( 103), -INT8_C( 1), -INT8_C( 114), INT8_C( 11), INT8_C( 64), -INT8_C( 121), INT8_C( 13), -INT8_C( 93), -INT8_C( 86), INT8_C( 85), INT8_C( 115), INT8_C( 9), -INT8_C( 40), INT8_C( 63), INT8_C( 94) }, { -INT8_C( 4), -INT8_C( 11), INT8_C( 78), INT8_C( 58), INT8_C( 16), -INT8_C( 48), INT8_C( 121), -INT8_C( 123), -INT8_C( 24), INT8_C( 22), -INT8_C( 110), -INT8_C( 26), -INT8_C( 68), -INT8_C( 78), INT8_C( 49), -INT8_C( 31), INT8_C( 104), INT8_C( 8), INT8_C( 52), INT8_C( 104), -INT8_C( 28), -INT8_C( 93), INT8_C( 64), -INT8_C( 7), INT8_C( 2), INT8_C( 99), INT8_C( 34), INT8_C( 72), -INT8_C( 49), INT8_C( 95), -INT8_C( 125), -INT8_C( 52), INT8_C( 85), -INT8_C( 47), INT8_C( 6), INT8_C( 101), -INT8_C( 94), INT8_MAX, -INT8_C( 22), -INT8_C( 118), -INT8_C( 107), INT8_C( 124), INT8_C( 112), INT8_C( 81), INT8_C( 47), -INT8_C( 95), INT8_C( 51), -INT8_C( 105), -INT8_C( 87), INT8_C( 103), -INT8_C( 1), -INT8_C( 114), INT8_C( 11), INT8_C( 64), -INT8_C( 121), INT8_C( 13), -INT8_C( 93), -INT8_C( 86), INT8_C( 85), INT8_C( 115), INT8_C( 9), -INT8_C( 40), INT8_C( 63), INT8_C( 94) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i r = simde_mm512_set_epi8( test_vec[i].a[63], test_vec[i].a[62], test_vec[i].a[61], test_vec[i].a[60], test_vec[i].a[59], test_vec[i].a[58], test_vec[i].a[57], test_vec[i].a[56], test_vec[i].a[55], test_vec[i].a[54], test_vec[i].a[53], test_vec[i].a[52], test_vec[i].a[51], test_vec[i].a[50], test_vec[i].a[49], test_vec[i].a[48], test_vec[i].a[47], test_vec[i].a[46], test_vec[i].a[45], test_vec[i].a[44], test_vec[i].a[43], test_vec[i].a[42], test_vec[i].a[41], test_vec[i].a[40], test_vec[i].a[39], test_vec[i].a[38], test_vec[i].a[37], test_vec[i].a[36], test_vec[i].a[35], test_vec[i].a[34], test_vec[i].a[33], test_vec[i].a[32], test_vec[i].a[31], test_vec[i].a[30], test_vec[i].a[29], test_vec[i].a[28], test_vec[i].a[27], test_vec[i].a[26], test_vec[i].a[25], test_vec[i].a[24], test_vec[i].a[23], test_vec[i].a[22], test_vec[i].a[21], test_vec[i].a[20], test_vec[i].a[19], test_vec[i].a[18], test_vec[i].a[17], test_vec[i].a[16], test_vec[i].a[15], test_vec[i].a[14], test_vec[i].a[13], test_vec[i].a[12], test_vec[i].a[11], test_vec[i].a[10], test_vec[i].a[ 9], test_vec[i].a[ 8], test_vec[i].a[ 7], test_vec[i].a[ 6], test_vec[i].a[ 5], test_vec[i].a[ 4], test_vec[i].a[ 3], test_vec[i].a[ 2], test_vec[i].a[ 1], test_vec[i].a[ 0]); simde_test_x86_assert_equal_i8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i a = simde_test_x86_random_i8x64(); simde__m512i r = a; simde_test_x86_write_i8x64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_set_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[32]; const int16_t r[32]; } test_vec[] = { { { INT16_C( 28208), INT16_C( 11568), -INT16_C( 18520), INT16_C( 32673), -INT16_C( 5618), -INT16_C( 32516), -INT16_C( 29675), -INT16_C( 30471), -INT16_C( 22650), -INT16_C( 27648), -INT16_C( 32156), -INT16_C( 14845), -INT16_C( 31076), -INT16_C( 2753), -INT16_C( 2052), INT16_C( 11282), INT16_C( 17253), INT16_C( 3418), -INT16_C( 1030), INT16_C( 2188), -INT16_C( 30235), -INT16_C( 1144), -INT16_C( 32235), -INT16_C( 25469), -INT16_C( 31959), -INT16_C( 29392), INT16_C( 13062), -INT16_C( 23981), -INT16_C( 27974), -INT16_C( 18793), -INT16_C( 21879), -INT16_C( 4125) }, { INT16_C( 28208), INT16_C( 11568), -INT16_C( 18520), INT16_C( 32673), -INT16_C( 5618), -INT16_C( 32516), -INT16_C( 29675), -INT16_C( 30471), -INT16_C( 22650), -INT16_C( 27648), -INT16_C( 32156), -INT16_C( 14845), -INT16_C( 31076), -INT16_C( 2753), -INT16_C( 2052), INT16_C( 11282), INT16_C( 17253), INT16_C( 3418), -INT16_C( 1030), INT16_C( 2188), -INT16_C( 30235), -INT16_C( 1144), -INT16_C( 32235), -INT16_C( 25469), -INT16_C( 31959), -INT16_C( 29392), INT16_C( 13062), -INT16_C( 23981), -INT16_C( 27974), -INT16_C( 18793), -INT16_C( 21879), -INT16_C( 4125) } }, { { INT16_C( 15853), -INT16_C( 6148), -INT16_C( 30408), INT16_C( 7919), INT16_C( 30482), INT16_C( 10009), -INT16_C( 25351), INT16_C( 8899), -INT16_C( 3296), INT16_C( 9903), INT16_C( 807), -INT16_C( 7736), INT16_C( 24725), INT16_C( 8087), INT16_C( 31242), -INT16_C( 2290), INT16_C( 2743), -INT16_C( 3874), -INT16_C( 12909), -INT16_C( 23282), INT16_C( 10052), INT16_C( 16077), -INT16_C( 28477), -INT16_C( 7328), INT16_C( 4228), -INT16_C( 21751), -INT16_C( 11757), -INT16_C( 22388), INT16_C( 9010), INT16_C( 15559), -INT16_C( 10850), INT16_C( 21811) }, { INT16_C( 15853), -INT16_C( 6148), -INT16_C( 30408), INT16_C( 7919), INT16_C( 30482), INT16_C( 10009), -INT16_C( 25351), INT16_C( 8899), -INT16_C( 3296), INT16_C( 9903), INT16_C( 807), -INT16_C( 7736), INT16_C( 24725), INT16_C( 8087), INT16_C( 31242), -INT16_C( 2290), INT16_C( 2743), -INT16_C( 3874), -INT16_C( 12909), -INT16_C( 23282), INT16_C( 10052), INT16_C( 16077), -INT16_C( 28477), -INT16_C( 7328), INT16_C( 4228), -INT16_C( 21751), -INT16_C( 11757), -INT16_C( 22388), INT16_C( 9010), INT16_C( 15559), -INT16_C( 10850), INT16_C( 21811) } }, { { INT16_C( 4576), INT16_C( 29509), INT16_C( 21470), INT16_C( 8729), -INT16_C( 6534), INT16_C( 15968), -INT16_C( 16010), -INT16_C( 1503), INT16_C( 11217), -INT16_C( 7003), INT16_C( 12797), INT16_C( 12172), INT16_C( 21589), -INT16_C( 3221), -INT16_C( 25047), INT16_C( 2376), -INT16_C( 29009), -INT16_C( 29315), -INT16_C( 26911), INT16_C( 23727), INT16_C( 4220), -INT16_C( 3430), -INT16_C( 17455), -INT16_C( 23827), -INT16_C( 27930), -INT16_C( 7290), INT16_C( 4804), INT16_C( 6418), INT16_C( 32102), -INT16_C( 28660), INT16_C( 21531), -INT16_C( 13671) }, { INT16_C( 4576), INT16_C( 29509), INT16_C( 21470), INT16_C( 8729), -INT16_C( 6534), INT16_C( 15968), -INT16_C( 16010), -INT16_C( 1503), INT16_C( 11217), -INT16_C( 7003), INT16_C( 12797), INT16_C( 12172), INT16_C( 21589), -INT16_C( 3221), -INT16_C( 25047), INT16_C( 2376), -INT16_C( 29009), -INT16_C( 29315), -INT16_C( 26911), INT16_C( 23727), INT16_C( 4220), -INT16_C( 3430), -INT16_C( 17455), -INT16_C( 23827), -INT16_C( 27930), -INT16_C( 7290), INT16_C( 4804), INT16_C( 6418), INT16_C( 32102), -INT16_C( 28660), INT16_C( 21531), -INT16_C( 13671) } }, { { INT16_C( 5858), -INT16_C( 15273), INT16_C( 1964), INT16_C( 10272), -INT16_C( 17897), -INT16_C( 6117), INT16_C( 2165), INT16_C( 23690), INT16_C( 4250), INT16_C( 24127), INT16_C( 21026), -INT16_C( 30345), -INT16_C( 31793), -INT16_C( 5351), -INT16_C( 19752), -INT16_C( 17739), INT16_C( 3529), INT16_C( 30078), -INT16_C( 25068), INT16_C( 11166), -INT16_C( 18088), -INT16_C( 12781), -INT16_C( 25151), INT16_C( 23338), INT16_C( 27053), -INT16_C( 12358), INT16_C( 12731), -INT16_C( 29864), INT16_C( 29109), -INT16_C( 29322), INT16_C( 11044), -INT16_C( 4793) }, { INT16_C( 5858), -INT16_C( 15273), INT16_C( 1964), INT16_C( 10272), -INT16_C( 17897), -INT16_C( 6117), INT16_C( 2165), INT16_C( 23690), INT16_C( 4250), INT16_C( 24127), INT16_C( 21026), -INT16_C( 30345), -INT16_C( 31793), -INT16_C( 5351), -INT16_C( 19752), -INT16_C( 17739), INT16_C( 3529), INT16_C( 30078), -INT16_C( 25068), INT16_C( 11166), -INT16_C( 18088), -INT16_C( 12781), -INT16_C( 25151), INT16_C( 23338), INT16_C( 27053), -INT16_C( 12358), INT16_C( 12731), -INT16_C( 29864), INT16_C( 29109), -INT16_C( 29322), INT16_C( 11044), -INT16_C( 4793) } }, { { -INT16_C( 14792), INT16_C( 19554), INT16_C( 100), -INT16_C( 17033), -INT16_C( 30023), INT16_C( 31371), -INT16_C( 19161), -INT16_C( 11050), -INT16_C( 28642), -INT16_C( 9564), -INT16_C( 831), INT16_C( 30309), -INT16_C( 9362), -INT16_C( 28157), INT16_C( 19206), INT16_C( 16255), -INT16_C( 7919), INT16_C( 30091), INT16_C( 994), -INT16_C( 25806), -INT16_C( 17011), -INT16_C( 19178), -INT16_C( 5006), -INT16_C( 28279), INT16_C( 11644), INT16_C( 15723), -INT16_C( 12246), -INT16_C( 26444), -INT16_C( 18517), -INT16_C( 20182), -INT16_C( 22270), INT16_C( 5104) }, { -INT16_C( 14792), INT16_C( 19554), INT16_C( 100), -INT16_C( 17033), -INT16_C( 30023), INT16_C( 31371), -INT16_C( 19161), -INT16_C( 11050), -INT16_C( 28642), -INT16_C( 9564), -INT16_C( 831), INT16_C( 30309), -INT16_C( 9362), -INT16_C( 28157), INT16_C( 19206), INT16_C( 16255), -INT16_C( 7919), INT16_C( 30091), INT16_C( 994), -INT16_C( 25806), -INT16_C( 17011), -INT16_C( 19178), -INT16_C( 5006), -INT16_C( 28279), INT16_C( 11644), INT16_C( 15723), -INT16_C( 12246), -INT16_C( 26444), -INT16_C( 18517), -INT16_C( 20182), -INT16_C( 22270), INT16_C( 5104) } }, { { INT16_C( 31882), INT16_C( 27785), -INT16_C( 17537), INT16_C( 3080), INT16_C( 7801), -INT16_C( 5183), INT16_C( 19210), -INT16_C( 31108), -INT16_C( 6280), -INT16_C( 23869), INT16_C( 30647), INT16_C( 25146), INT16_C( 25647), INT16_C( 12564), INT16_C( 1037), -INT16_C( 26555), -INT16_C( 12672), -INT16_C( 252), INT16_C( 3209), INT16_C( 524), -INT16_C( 13014), INT16_C( 13550), INT16_C( 27160), -INT16_C( 28230), INT16_C( 32338), INT16_C( 2355), INT16_C( 28405), INT16_C( 9324), -INT16_C( 32558), -INT16_C( 8106), -INT16_C( 25724), INT16_C( 1400) }, { INT16_C( 31882), INT16_C( 27785), -INT16_C( 17537), INT16_C( 3080), INT16_C( 7801), -INT16_C( 5183), INT16_C( 19210), -INT16_C( 31108), -INT16_C( 6280), -INT16_C( 23869), INT16_C( 30647), INT16_C( 25146), INT16_C( 25647), INT16_C( 12564), INT16_C( 1037), -INT16_C( 26555), -INT16_C( 12672), -INT16_C( 252), INT16_C( 3209), INT16_C( 524), -INT16_C( 13014), INT16_C( 13550), INT16_C( 27160), -INT16_C( 28230), INT16_C( 32338), INT16_C( 2355), INT16_C( 28405), INT16_C( 9324), -INT16_C( 32558), -INT16_C( 8106), -INT16_C( 25724), INT16_C( 1400) } }, { { INT16_C( 31849), -INT16_C( 3580), INT16_C( 4233), -INT16_C( 19467), -INT16_C( 7202), -INT16_C( 2328), -INT16_C( 23987), -INT16_C( 24697), -INT16_C( 17632), INT16_C( 5801), INT16_C( 5417), -INT16_C( 1222), -INT16_C( 28523), INT16_C( 6619), INT16_C( 21291), -INT16_C( 27618), INT16_C( 9168), INT16_C( 22919), INT16_C( 31795), INT16_C( 4364), -INT16_C( 2977), -INT16_C( 21496), -INT16_C( 28777), -INT16_C( 18612), -INT16_C( 2742), INT16_C( 29645), INT16_C( 2058), -INT16_C( 24721), INT16_C( 19096), -INT16_C( 15176), -INT16_C( 10338), INT16_C( 28248) }, { INT16_C( 31849), -INT16_C( 3580), INT16_C( 4233), -INT16_C( 19467), -INT16_C( 7202), -INT16_C( 2328), -INT16_C( 23987), -INT16_C( 24697), -INT16_C( 17632), INT16_C( 5801), INT16_C( 5417), -INT16_C( 1222), -INT16_C( 28523), INT16_C( 6619), INT16_C( 21291), -INT16_C( 27618), INT16_C( 9168), INT16_C( 22919), INT16_C( 31795), INT16_C( 4364), -INT16_C( 2977), -INT16_C( 21496), -INT16_C( 28777), -INT16_C( 18612), -INT16_C( 2742), INT16_C( 29645), INT16_C( 2058), -INT16_C( 24721), INT16_C( 19096), -INT16_C( 15176), -INT16_C( 10338), INT16_C( 28248) } }, { { -INT16_C( 8198), INT16_C( 11719), -INT16_C( 11429), -INT16_C( 17857), INT16_C( 18376), INT16_C( 24423), -INT16_C( 19498), INT16_C( 8470), -INT16_C( 7000), -INT16_C( 19820), INT16_C( 1004), -INT16_C( 31663), INT16_C( 2382), -INT16_C( 5048), -INT16_C( 24096), -INT16_C( 9638), INT16_C( 8576), -INT16_C( 9208), INT16_C( 18420), -INT16_C( 17258), -INT16_C( 626), INT16_C( 25627), INT16_C( 12976), INT16_C( 22661), INT16_C( 6678), INT16_C( 522), INT16_C( 23325), INT16_C( 27526), -INT16_C( 12443), INT16_C( 17751), -INT16_C( 20112), -INT16_C( 4064) }, { -INT16_C( 8198), INT16_C( 11719), -INT16_C( 11429), -INT16_C( 17857), INT16_C( 18376), INT16_C( 24423), -INT16_C( 19498), INT16_C( 8470), -INT16_C( 7000), -INT16_C( 19820), INT16_C( 1004), -INT16_C( 31663), INT16_C( 2382), -INT16_C( 5048), -INT16_C( 24096), -INT16_C( 9638), INT16_C( 8576), -INT16_C( 9208), INT16_C( 18420), -INT16_C( 17258), -INT16_C( 626), INT16_C( 25627), INT16_C( 12976), INT16_C( 22661), INT16_C( 6678), INT16_C( 522), INT16_C( 23325), INT16_C( 27526), -INT16_C( 12443), INT16_C( 17751), -INT16_C( 20112), -INT16_C( 4064) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i r = simde_mm512_set_epi16( test_vec[i].a[31], test_vec[i].a[30], test_vec[i].a[29], test_vec[i].a[28], test_vec[i].a[27], test_vec[i].a[26], test_vec[i].a[25], test_vec[i].a[24], test_vec[i].a[23], test_vec[i].a[22], test_vec[i].a[21], test_vec[i].a[20], test_vec[i].a[19], test_vec[i].a[18], test_vec[i].a[17], test_vec[i].a[16], test_vec[i].a[15], test_vec[i].a[14], test_vec[i].a[13], test_vec[i].a[12], test_vec[i].a[11], test_vec[i].a[10], test_vec[i].a[ 9], test_vec[i].a[ 8], test_vec[i].a[ 7], test_vec[i].a[ 6], test_vec[i].a[ 5], test_vec[i].a[ 4], test_vec[i].a[ 3], test_vec[i].a[ 2], test_vec[i].a[ 1], test_vec[i].a[ 0]); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i a = simde_test_x86_random_i16x32(); simde__m512i r = a; simde_test_x86_write_i16x32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_set_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int32_t a[16]; const int32_t r[16]; } test_vec[] = { { { -INT32_C( 718084918), -INT32_C( 1447421420), INT32_C( 1848496256), INT32_C( 279896686), -INT32_C( 1716299902), INT32_C( 757651794), -INT32_C( 612291898), -INT32_C( 2021453635), INT32_C( 140293876), INT32_C( 1353783248), INT32_C( 1438572519), INT32_C( 1130785985), INT32_C( 366746051), -INT32_C( 1186789901), -INT32_C( 325794769), -INT32_C( 931981100) }, { -INT32_C( 718084918), -INT32_C( 1447421420), INT32_C( 1848496256), INT32_C( 279896686), -INT32_C( 1716299902), INT32_C( 757651794), -INT32_C( 612291898), -INT32_C( 2021453635), INT32_C( 140293876), INT32_C( 1353783248), INT32_C( 1438572519), INT32_C( 1130785985), INT32_C( 366746051), -INT32_C( 1186789901), -INT32_C( 325794769), -INT32_C( 931981100) } }, { { -INT32_C( 1630416690), -INT32_C( 823229721), INT32_C( 589605985), -INT32_C( 614036968), -INT32_C( 1762638941), INT32_C( 2001679176), -INT32_C( 865868552), -INT32_C( 896149764), -INT32_C( 1922537818), INT32_C( 1247565544), INT32_C( 443383810), -INT32_C( 1376398582), INT32_C( 1581573398), INT32_C( 299209497), INT32_C( 1943877751), -INT32_C( 1254264305) }, { -INT32_C( 1630416690), -INT32_C( 823229721), INT32_C( 589605985), -INT32_C( 614036968), -INT32_C( 1762638941), INT32_C( 2001679176), -INT32_C( 865868552), -INT32_C( 896149764), -INT32_C( 1922537818), INT32_C( 1247565544), INT32_C( 443383810), -INT32_C( 1376398582), INT32_C( 1581573398), INT32_C( 299209497), INT32_C( 1943877751), -INT32_C( 1254264305) } }, { { -INT32_C( 1052531240), -INT32_C( 49569797), INT32_C( 689403935), INT32_C( 1658195019), INT32_C( 197139186), INT32_C( 622630574), -INT32_C( 577111602), INT32_C( 1150539372), INT32_C( 1996871292), -INT32_C( 1804267403), -INT32_C( 725775224), -INT32_C( 1959357543), INT32_C( 1553397422), INT32_C( 1535226508), INT32_C( 389552811), INT32_C( 1834798065) }, { -INT32_C( 1052531240), -INT32_C( 49569797), INT32_C( 689403935), INT32_C( 1658195019), INT32_C( 197139186), INT32_C( 622630574), -INT32_C( 577111602), INT32_C( 1150539372), INT32_C( 1996871292), -INT32_C( 1804267403), -INT32_C( 725775224), -INT32_C( 1959357543), INT32_C( 1553397422), INT32_C( 1535226508), INT32_C( 389552811), INT32_C( 1834798065) } }, { { INT32_C( 384065953), -INT32_C( 89499278), INT32_C( 2144233446), -INT32_C( 1458961157), -INT32_C( 2029674245), -INT32_C( 35486126), -INT32_C( 1844110431), -INT32_C( 1996525082), INT32_C( 1151263698), INT32_C( 591350077), -INT32_C( 1415443024), INT32_C( 223652882), -INT32_C( 1634444980), -INT32_C( 2120517664), INT32_C( 2014556306), -INT32_C( 201321951) }, { INT32_C( 384065953), -INT32_C( 89499278), INT32_C( 2144233446), -INT32_C( 1458961157), -INT32_C( 2029674245), -INT32_C( 35486126), -INT32_C( 1844110431), -INT32_C( 1996525082), INT32_C( 1151263698), INT32_C( 591350077), -INT32_C( 1415443024), INT32_C( 223652882), -INT32_C( 1634444980), -INT32_C( 2120517664), INT32_C( 2014556306), -INT32_C( 201321951) } }, { { INT32_C( 842571765), -INT32_C( 1739229208), -INT32_C( 1757087867), -INT32_C( 274425693), -INT32_C( 762496782), INT32_C( 1095969199), -INT32_C( 71670055), INT32_C( 1844427384), INT32_C( 1101014873), INT32_C( 618329503), -INT32_C( 1866785043), -INT32_C( 1467981898), INT32_C( 1199181207), INT32_C( 277400886), -INT32_C( 1425325517), INT32_C( 1444543229) }, { INT32_C( 842571765), -INT32_C( 1739229208), -INT32_C( 1757087867), -INT32_C( 274425693), -INT32_C( 762496782), INT32_C( 1095969199), -INT32_C( 71670055), INT32_C( 1844427384), INT32_C( 1101014873), INT32_C( 618329503), -INT32_C( 1866785043), -INT32_C( 1467981898), INT32_C( 1199181207), INT32_C( 277400886), -INT32_C( 1425325517), INT32_C( 1444543229) } }, { { -INT32_C( 1063732959), -INT32_C( 1679527250), INT32_C( 1177329552), -INT32_C( 1762677506), -INT32_C( 253924935), INT32_C( 1778410807), -INT32_C( 1525281880), INT32_C( 653995781), -INT32_C( 1763208216), -INT32_C( 1791833339), INT32_C( 1776049771), -INT32_C( 1006646518), INT32_C( 1806949428), -INT32_C( 355028158), -INT32_C( 1013977922), INT32_C( 65702427) }, { -INT32_C( 1063732959), -INT32_C( 1679527250), INT32_C( 1177329552), -INT32_C( 1762677506), -INT32_C( 253924935), INT32_C( 1778410807), -INT32_C( 1525281880), INT32_C( 653995781), -INT32_C( 1763208216), -INT32_C( 1791833339), INT32_C( 1776049771), -INT32_C( 1006646518), INT32_C( 1806949428), -INT32_C( 355028158), -INT32_C( 1013977922), INT32_C( 65702427) } }, { { INT32_C( 597283102), INT32_C( 129616796), INT32_C( 863081769), -INT32_C( 1795723168), -INT32_C( 1895781811), INT32_C( 477746782), -INT32_C( 572520254), -INT32_C( 1327445358), INT32_C( 936671643), INT32_C( 1849658693), -INT32_C( 2103267294), INT32_C( 1830197536), -INT32_C( 1577314749), -INT32_C( 1363249684), INT32_C( 277585533), INT32_C( 62942056) }, { INT32_C( 597283102), INT32_C( 129616796), INT32_C( 863081769), -INT32_C( 1795723168), -INT32_C( 1895781811), INT32_C( 477746782), -INT32_C( 572520254), -INT32_C( 1327445358), INT32_C( 936671643), INT32_C( 1849658693), -INT32_C( 2103267294), INT32_C( 1830197536), -INT32_C( 1577314749), -INT32_C( 1363249684), INT32_C( 277585533), INT32_C( 62942056) } }, { { INT32_C( 708482277), INT32_C( 1134065953), INT32_C( 1254439465), INT32_C( 397925587), -INT32_C( 541543182), -INT32_C( 1483901399), INT32_C( 2092374292), INT32_C( 1769961348), INT32_C( 764656140), INT32_C( 1567697971), INT32_C( 984036966), INT32_C( 89218578), INT32_C( 1021577490), -INT32_C( 1797033600), INT32_C( 252811914), INT32_C( 494440465) }, { INT32_C( 708482277), INT32_C( 1134065953), INT32_C( 1254439465), INT32_C( 397925587), -INT32_C( 541543182), -INT32_C( 1483901399), INT32_C( 2092374292), INT32_C( 1769961348), INT32_C( 764656140), INT32_C( 1567697971), INT32_C( 984036966), INT32_C( 89218578), INT32_C( 1021577490), -INT32_C( 1797033600), INT32_C( 252811914), INT32_C( 494440465) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i r = simde_mm512_set_epi32( test_vec[i].a[15], test_vec[i].a[14], test_vec[i].a[13], test_vec[i].a[12], test_vec[i].a[11], test_vec[i].a[10], test_vec[i].a[ 9], test_vec[i].a[ 8], test_vec[i].a[ 7], test_vec[i].a[ 6], test_vec[i].a[ 5], test_vec[i].a[ 4], test_vec[i].a[ 3], test_vec[i].a[ 2], test_vec[i].a[ 1], test_vec[i].a[ 0]); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i a = simde_test_x86_random_i32x16(); simde__m512i r = a; simde_test_x86_write_i32x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_set_epi64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int64_t a[8]; const int64_t r[8]; } test_vec[] = { { { INT64_C( 7416271910326683854), -INT64_C( 4897747652392899789), INT64_C( 6856628829737554158), -INT64_C( 2150993203686264898), -INT64_C( 552480653107076628), -INT64_C( 7597675204674063523), INT64_C( 2749860335145960499), INT64_C( 4150965691438738086) }, { INT64_C( 7416271910326683854), -INT64_C( 4897747652392899789), INT64_C( 6856628829737554158), -INT64_C( 2150993203686264898), -INT64_C( 552480653107076628), -INT64_C( 7597675204674063523), INT64_C( 2749860335145960499), INT64_C( 4150965691438738086) } }, { { INT64_C( 8129134851571376658), INT64_C( 4732951216391416688), INT64_C( 1883688221230759318), INT64_C( 8207088352664759902), -INT64_C( 300305717954802351), -INT64_C( 9091710009513666428), INT64_C( 8164361002356301502), -INT64_C( 2116124952781577775) }, { INT64_C( 8129134851571376658), INT64_C( 4732951216391416688), INT64_C( 1883688221230759318), INT64_C( 8207088352664759902), -INT64_C( 300305717954802351), -INT64_C( 9091710009513666428), INT64_C( 8164361002356301502), -INT64_C( 2116124952781577775) } }, { { -INT64_C( 8532600538384569584), INT64_C( 2927884471851788597), -INT64_C( 7758952549258157875), -INT64_C( 8989632965809705014), -INT64_C( 2818773282401122989), -INT64_C( 5048647419307688867), -INT64_C( 4751038171580904519), -INT64_C( 7975777913320843685) }, { -INT64_C( 8532600538384569584), INT64_C( 2927884471851788597), -INT64_C( 7758952549258157875), -INT64_C( 8989632965809705014), -INT64_C( 2818773282401122989), -INT64_C( 5048647419307688867), -INT64_C( 4751038171580904519), -INT64_C( 7975777913320843685) } }, { { INT64_C( 9176306490307866333), -INT64_C( 6743235639710953273), INT64_C( 5418726386948769153), -INT64_C( 8947410401178858236), INT64_C( 5726990520023236056), INT64_C( 8590174245068325035), -INT64_C( 8669956488264133518), INT64_C( 618854828626003821) }, { INT64_C( 9176306490307866333), -INT64_C( 6743235639710953273), INT64_C( 5418726386948769153), -INT64_C( 8947410401178858236), INT64_C( 5726990520023236056), INT64_C( 8590174245068325035), -INT64_C( 8669956488264133518), INT64_C( 618854828626003821) } }, { { -INT64_C( 6975374382109320869), INT64_C( 1345285050491679549), INT64_C( 2721902299037669565), -INT64_C( 8636832967257714454), -INT64_C( 2235300940097051033), INT64_C( 6018146754448600529), INT64_C( 447361715065321816), -INT64_C( 6584470204827275543) }, { -INT64_C( 6975374382109320869), INT64_C( 1345285050491679549), INT64_C( 2721902299037669565), -INT64_C( 8636832967257714454), -INT64_C( 2235300940097051033), INT64_C( 6018146754448600529), INT64_C( 447361715065321816), -INT64_C( 6584470204827275543) } }, { { INT64_C( 8785954047749196649), INT64_C( 1806347803474723111), -INT64_C( 1762490757418474392), INT64_C( 6317880597477763789), -INT64_C( 3545650502557192791), -INT64_C( 8273783058313988526), -INT64_C( 8995769048611334334), INT64_C( 7893467386249750085) }, { INT64_C( 8785954047749196649), INT64_C( 1806347803474723111), -INT64_C( 1762490757418474392), INT64_C( 6317880597477763789), -INT64_C( 3545650502557192791), -INT64_C( 8273783058313988526), -INT64_C( 8995769048611334334), INT64_C( 7893467386249750085) } }, { { -INT64_C( 7900287636437209113), -INT64_C( 3894817049730946752), -INT64_C( 2061467071467093205), INT64_C( 1264000947191279704), INT64_C( 1397238296100161937), INT64_C( 663554023642106597), -INT64_C( 3823751311176089747), INT64_C( 4141747775465784894) }, { -INT64_C( 7900287636437209113), -INT64_C( 3894817049730946752), -INT64_C( 2061467071467093205), INT64_C( 1264000947191279704), INT64_C( 1397238296100161937), INT64_C( 663554023642106597), -INT64_C( 3823751311176089747), INT64_C( 4141747775465784894) } }, { { -INT64_C( 314569080322778437), -INT64_C( 982175572242785673), -INT64_C( 3091741389357168420), INT64_C( 3158352032474005235), -INT64_C( 4331518035015115054), -INT64_C( 4328671828599354762), -INT64_C( 3806904150389625921), -INT64_C( 810302542018352858) }, { -INT64_C( 314569080322778437), -INT64_C( 982175572242785673), -INT64_C( 3091741389357168420), INT64_C( 3158352032474005235), -INT64_C( 4331518035015115054), -INT64_C( 4328671828599354762), -INT64_C( 3806904150389625921), -INT64_C( 810302542018352858) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i r = simde_mm512_set_epi64( test_vec[i].a[ 7], test_vec[i].a[ 6], test_vec[i].a[ 5], test_vec[i].a[ 4], test_vec[i].a[ 3], test_vec[i].a[ 2], test_vec[i].a[ 1], test_vec[i].a[ 0]); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i a = simde_test_x86_random_i64x8(); simde__m512i r = a; simde_test_x86_write_i64x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_set_pd (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 670.56), SIMDE_FLOAT64_C( 769.83), SIMDE_FLOAT64_C( 452.75), SIMDE_FLOAT64_C( -321.12), SIMDE_FLOAT64_C( 540.94), SIMDE_FLOAT64_C( -744.39), SIMDE_FLOAT64_C( -392.02), SIMDE_FLOAT64_C( -608.21) }, { SIMDE_FLOAT64_C( 670.56), SIMDE_FLOAT64_C( 769.83), SIMDE_FLOAT64_C( 452.75), SIMDE_FLOAT64_C( -321.12), SIMDE_FLOAT64_C( 540.94), SIMDE_FLOAT64_C( -744.39), SIMDE_FLOAT64_C( -392.02), SIMDE_FLOAT64_C( -608.21) } }, { { SIMDE_FLOAT64_C( -346.44), SIMDE_FLOAT64_C( -787.69), SIMDE_FLOAT64_C( 30.52), SIMDE_FLOAT64_C( -179.08), SIMDE_FLOAT64_C( 272.54), SIMDE_FLOAT64_C( -636.81), SIMDE_FLOAT64_C( 543.83), SIMDE_FLOAT64_C( 519.44) }, { SIMDE_FLOAT64_C( -346.44), SIMDE_FLOAT64_C( -787.69), SIMDE_FLOAT64_C( 30.52), SIMDE_FLOAT64_C( -179.08), SIMDE_FLOAT64_C( 272.54), SIMDE_FLOAT64_C( -636.81), SIMDE_FLOAT64_C( 543.83), SIMDE_FLOAT64_C( 519.44) } }, { { SIMDE_FLOAT64_C( -541.32), SIMDE_FLOAT64_C( 265.86), SIMDE_FLOAT64_C( 389.70), SIMDE_FLOAT64_C( 719.33), SIMDE_FLOAT64_C( 489.83), SIMDE_FLOAT64_C( 260.51), SIMDE_FLOAT64_C( 233.90), SIMDE_FLOAT64_C( -892.98) }, { SIMDE_FLOAT64_C( -541.32), SIMDE_FLOAT64_C( 265.86), SIMDE_FLOAT64_C( 389.70), SIMDE_FLOAT64_C( 719.33), SIMDE_FLOAT64_C( 489.83), SIMDE_FLOAT64_C( 260.51), SIMDE_FLOAT64_C( 233.90), SIMDE_FLOAT64_C( -892.98) } }, { { SIMDE_FLOAT64_C( -45.07), SIMDE_FLOAT64_C( 174.26), SIMDE_FLOAT64_C( 220.48), SIMDE_FLOAT64_C( 233.05), SIMDE_FLOAT64_C( -537.78), SIMDE_FLOAT64_C( -609.38), SIMDE_FLOAT64_C( -208.27), SIMDE_FLOAT64_C( -867.22) }, { SIMDE_FLOAT64_C( -45.07), SIMDE_FLOAT64_C( 174.26), SIMDE_FLOAT64_C( 220.48), SIMDE_FLOAT64_C( 233.05), SIMDE_FLOAT64_C( -537.78), SIMDE_FLOAT64_C( -609.38), SIMDE_FLOAT64_C( -208.27), SIMDE_FLOAT64_C( -867.22) } }, { { SIMDE_FLOAT64_C( -839.55), SIMDE_FLOAT64_C( -755.53), SIMDE_FLOAT64_C( -188.34), SIMDE_FLOAT64_C( 701.40), SIMDE_FLOAT64_C( -499.92), SIMDE_FLOAT64_C( 419.64), SIMDE_FLOAT64_C( -906.81), SIMDE_FLOAT64_C( 153.65) }, { SIMDE_FLOAT64_C( -839.55), SIMDE_FLOAT64_C( -755.53), SIMDE_FLOAT64_C( -188.34), SIMDE_FLOAT64_C( 701.40), SIMDE_FLOAT64_C( -499.92), SIMDE_FLOAT64_C( 419.64), SIMDE_FLOAT64_C( -906.81), SIMDE_FLOAT64_C( 153.65) } }, { { SIMDE_FLOAT64_C( 631.95), SIMDE_FLOAT64_C( 123.71), SIMDE_FLOAT64_C( 974.57), SIMDE_FLOAT64_C( -95.51), SIMDE_FLOAT64_C( 486.89), SIMDE_FLOAT64_C( 518.40), SIMDE_FLOAT64_C( -576.07), SIMDE_FLOAT64_C( 945.58) }, { SIMDE_FLOAT64_C( 631.95), SIMDE_FLOAT64_C( 123.71), SIMDE_FLOAT64_C( 974.57), SIMDE_FLOAT64_C( -95.51), SIMDE_FLOAT64_C( 486.89), SIMDE_FLOAT64_C( 518.40), SIMDE_FLOAT64_C( -576.07), SIMDE_FLOAT64_C( 945.58) } }, { { SIMDE_FLOAT64_C( -215.73), SIMDE_FLOAT64_C( 813.63), SIMDE_FLOAT64_C( 664.91), SIMDE_FLOAT64_C( -725.90), SIMDE_FLOAT64_C( 74.14), SIMDE_FLOAT64_C( -101.19), SIMDE_FLOAT64_C( -618.88), SIMDE_FLOAT64_C( -970.94) }, { SIMDE_FLOAT64_C( -215.73), SIMDE_FLOAT64_C( 813.63), SIMDE_FLOAT64_C( 664.91), SIMDE_FLOAT64_C( -725.90), SIMDE_FLOAT64_C( 74.14), SIMDE_FLOAT64_C( -101.19), SIMDE_FLOAT64_C( -618.88), SIMDE_FLOAT64_C( -970.94) } }, { { SIMDE_FLOAT64_C( -926.93), SIMDE_FLOAT64_C( 601.60), SIMDE_FLOAT64_C( 262.11), SIMDE_FLOAT64_C( -464.71), SIMDE_FLOAT64_C( 992.22), SIMDE_FLOAT64_C( -946.16), SIMDE_FLOAT64_C( -331.93), SIMDE_FLOAT64_C( -847.32) }, { SIMDE_FLOAT64_C( -926.93), SIMDE_FLOAT64_C( 601.60), SIMDE_FLOAT64_C( 262.11), SIMDE_FLOAT64_C( -464.71), SIMDE_FLOAT64_C( 992.22), SIMDE_FLOAT64_C( -946.16), SIMDE_FLOAT64_C( -331.93), SIMDE_FLOAT64_C( -847.32) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d r = simde_mm512_set_pd( test_vec[i].a[ 7], test_vec[i].a[ 6], test_vec[i].a[ 5], test_vec[i].a[ 4], test_vec[i].a[ 3], test_vec[i].a[ 2], test_vec[i].a[ 1], test_vec[i].a[ 0]); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512d a = simde_test_x86_random_f64x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m512d r = a; simde_test_x86_write_f64x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f64x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_set_ps (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( -967.45), SIMDE_FLOAT32_C( 555.14), SIMDE_FLOAT32_C( 395.62), SIMDE_FLOAT32_C( 646.95), SIMDE_FLOAT32_C( -7.99), SIMDE_FLOAT32_C( -159.39), SIMDE_FLOAT32_C( -204.24), SIMDE_FLOAT32_C( 19.62), SIMDE_FLOAT32_C( 980.65), SIMDE_FLOAT32_C( -174.06), SIMDE_FLOAT32_C( -131.07), SIMDE_FLOAT32_C( -28.25), SIMDE_FLOAT32_C( -768.63), SIMDE_FLOAT32_C( 553.54), SIMDE_FLOAT32_C( 497.97), SIMDE_FLOAT32_C( -142.92) }, { SIMDE_FLOAT32_C( -967.45), SIMDE_FLOAT32_C( 555.14), SIMDE_FLOAT32_C( 395.62), SIMDE_FLOAT32_C( 646.95), SIMDE_FLOAT32_C( -7.99), SIMDE_FLOAT32_C( -159.39), SIMDE_FLOAT32_C( -204.24), SIMDE_FLOAT32_C( 19.62), SIMDE_FLOAT32_C( 980.65), SIMDE_FLOAT32_C( -174.06), SIMDE_FLOAT32_C( -131.07), SIMDE_FLOAT32_C( -28.25), SIMDE_FLOAT32_C( -768.63), SIMDE_FLOAT32_C( 553.54), SIMDE_FLOAT32_C( 497.97), SIMDE_FLOAT32_C( -142.92) } }, { { SIMDE_FLOAT32_C( -51.14), SIMDE_FLOAT32_C( 28.70), SIMDE_FLOAT32_C( -598.25), SIMDE_FLOAT32_C( 911.70), SIMDE_FLOAT32_C( -905.59), SIMDE_FLOAT32_C( 815.98), SIMDE_FLOAT32_C( -798.54), SIMDE_FLOAT32_C( -677.94), SIMDE_FLOAT32_C( 697.56), SIMDE_FLOAT32_C( -111.92), SIMDE_FLOAT32_C( -783.66), SIMDE_FLOAT32_C( 226.91), SIMDE_FLOAT32_C( -846.01), SIMDE_FLOAT32_C( 293.51), SIMDE_FLOAT32_C( 37.49), SIMDE_FLOAT32_C( -813.46) }, { SIMDE_FLOAT32_C( -51.14), SIMDE_FLOAT32_C( 28.70), SIMDE_FLOAT32_C( -598.25), SIMDE_FLOAT32_C( 911.70), SIMDE_FLOAT32_C( -905.59), SIMDE_FLOAT32_C( 815.98), SIMDE_FLOAT32_C( -798.54), SIMDE_FLOAT32_C( -677.94), SIMDE_FLOAT32_C( 697.56), SIMDE_FLOAT32_C( -111.92), SIMDE_FLOAT32_C( -783.66), SIMDE_FLOAT32_C( 226.91), SIMDE_FLOAT32_C( -846.01), SIMDE_FLOAT32_C( 293.51), SIMDE_FLOAT32_C( 37.49), SIMDE_FLOAT32_C( -813.46) } }, { { SIMDE_FLOAT32_C( -151.35), SIMDE_FLOAT32_C( -566.88), SIMDE_FLOAT32_C( 833.49), SIMDE_FLOAT32_C( 840.66), SIMDE_FLOAT32_C( 273.73), SIMDE_FLOAT32_C( -370.75), SIMDE_FLOAT32_C( -139.72), SIMDE_FLOAT32_C( 254.38), SIMDE_FLOAT32_C( 455.19), SIMDE_FLOAT32_C( 729.21), SIMDE_FLOAT32_C( -773.88), SIMDE_FLOAT32_C( 686.56), SIMDE_FLOAT32_C( 282.75), SIMDE_FLOAT32_C( 724.10), SIMDE_FLOAT32_C( -456.37), SIMDE_FLOAT32_C( -768.39) }, { SIMDE_FLOAT32_C( -151.35), SIMDE_FLOAT32_C( -566.88), SIMDE_FLOAT32_C( 833.49), SIMDE_FLOAT32_C( 840.66), SIMDE_FLOAT32_C( 273.73), SIMDE_FLOAT32_C( -370.75), SIMDE_FLOAT32_C( -139.72), SIMDE_FLOAT32_C( 254.38), SIMDE_FLOAT32_C( 455.19), SIMDE_FLOAT32_C( 729.21), SIMDE_FLOAT32_C( -773.88), SIMDE_FLOAT32_C( 686.56), SIMDE_FLOAT32_C( 282.75), SIMDE_FLOAT32_C( 724.10), SIMDE_FLOAT32_C( -456.37), SIMDE_FLOAT32_C( -768.39) } }, { { SIMDE_FLOAT32_C( -247.20), SIMDE_FLOAT32_C( -54.62), SIMDE_FLOAT32_C( -856.69), SIMDE_FLOAT32_C( -152.80), SIMDE_FLOAT32_C( -238.64), SIMDE_FLOAT32_C( -655.23), SIMDE_FLOAT32_C( 169.26), SIMDE_FLOAT32_C( -541.09), SIMDE_FLOAT32_C( 232.84), SIMDE_FLOAT32_C( 385.60), SIMDE_FLOAT32_C( 685.83), SIMDE_FLOAT32_C( 386.84), SIMDE_FLOAT32_C( -320.88), SIMDE_FLOAT32_C( -276.68), SIMDE_FLOAT32_C( 573.38), SIMDE_FLOAT32_C( 527.77) }, { SIMDE_FLOAT32_C( -247.20), SIMDE_FLOAT32_C( -54.62), SIMDE_FLOAT32_C( -856.69), SIMDE_FLOAT32_C( -152.80), SIMDE_FLOAT32_C( -238.64), SIMDE_FLOAT32_C( -655.23), SIMDE_FLOAT32_C( 169.26), SIMDE_FLOAT32_C( -541.09), SIMDE_FLOAT32_C( 232.84), SIMDE_FLOAT32_C( 385.60), SIMDE_FLOAT32_C( 685.83), SIMDE_FLOAT32_C( 386.84), SIMDE_FLOAT32_C( -320.88), SIMDE_FLOAT32_C( -276.68), SIMDE_FLOAT32_C( 573.38), SIMDE_FLOAT32_C( 527.77) } }, { { SIMDE_FLOAT32_C( 156.44), SIMDE_FLOAT32_C( 406.87), SIMDE_FLOAT32_C( 368.42), SIMDE_FLOAT32_C( -569.84), SIMDE_FLOAT32_C( -963.88), SIMDE_FLOAT32_C( -771.30), SIMDE_FLOAT32_C( 684.54), SIMDE_FLOAT32_C( 491.30), SIMDE_FLOAT32_C( 957.91), SIMDE_FLOAT32_C( 910.66), SIMDE_FLOAT32_C( 177.86), SIMDE_FLOAT32_C( 240.66), SIMDE_FLOAT32_C( 634.76), SIMDE_FLOAT32_C( 721.49), SIMDE_FLOAT32_C( 472.26), SIMDE_FLOAT32_C( -612.44) }, { SIMDE_FLOAT32_C( 156.44), SIMDE_FLOAT32_C( 406.87), SIMDE_FLOAT32_C( 368.42), SIMDE_FLOAT32_C( -569.84), SIMDE_FLOAT32_C( -963.88), SIMDE_FLOAT32_C( -771.30), SIMDE_FLOAT32_C( 684.54), SIMDE_FLOAT32_C( 491.30), SIMDE_FLOAT32_C( 957.91), SIMDE_FLOAT32_C( 910.66), SIMDE_FLOAT32_C( 177.86), SIMDE_FLOAT32_C( 240.66), SIMDE_FLOAT32_C( 634.76), SIMDE_FLOAT32_C( 721.49), SIMDE_FLOAT32_C( 472.26), SIMDE_FLOAT32_C( -612.44) } }, { { SIMDE_FLOAT32_C( -333.13), SIMDE_FLOAT32_C( 615.57), SIMDE_FLOAT32_C( 234.76), SIMDE_FLOAT32_C( 428.23), SIMDE_FLOAT32_C( 960.34), SIMDE_FLOAT32_C( -595.97), SIMDE_FLOAT32_C( 887.14), SIMDE_FLOAT32_C( 193.19), SIMDE_FLOAT32_C( 789.63), SIMDE_FLOAT32_C( 572.97), SIMDE_FLOAT32_C( -419.98), SIMDE_FLOAT32_C( -531.26), SIMDE_FLOAT32_C( -703.71), SIMDE_FLOAT32_C( -846.60), SIMDE_FLOAT32_C( 996.51), SIMDE_FLOAT32_C( 452.72) }, { SIMDE_FLOAT32_C( -333.13), SIMDE_FLOAT32_C( 615.57), SIMDE_FLOAT32_C( 234.76), SIMDE_FLOAT32_C( 428.23), SIMDE_FLOAT32_C( 960.34), SIMDE_FLOAT32_C( -595.97), SIMDE_FLOAT32_C( 887.14), SIMDE_FLOAT32_C( 193.19), SIMDE_FLOAT32_C( 789.63), SIMDE_FLOAT32_C( 572.97), SIMDE_FLOAT32_C( -419.98), SIMDE_FLOAT32_C( -531.26), SIMDE_FLOAT32_C( -703.71), SIMDE_FLOAT32_C( -846.60), SIMDE_FLOAT32_C( 996.51), SIMDE_FLOAT32_C( 452.72) } }, { { SIMDE_FLOAT32_C( 560.27), SIMDE_FLOAT32_C( 364.93), SIMDE_FLOAT32_C( 882.89), SIMDE_FLOAT32_C( 596.38), SIMDE_FLOAT32_C( 593.63), SIMDE_FLOAT32_C( 567.43), SIMDE_FLOAT32_C( 87.69), SIMDE_FLOAT32_C( 551.54), SIMDE_FLOAT32_C( 478.09), SIMDE_FLOAT32_C( -734.45), SIMDE_FLOAT32_C( -207.80), SIMDE_FLOAT32_C( 112.86), SIMDE_FLOAT32_C( 987.04), SIMDE_FLOAT32_C( -735.54), SIMDE_FLOAT32_C( 500.42), SIMDE_FLOAT32_C( -346.09) }, { SIMDE_FLOAT32_C( 560.27), SIMDE_FLOAT32_C( 364.93), SIMDE_FLOAT32_C( 882.89), SIMDE_FLOAT32_C( 596.38), SIMDE_FLOAT32_C( 593.63), SIMDE_FLOAT32_C( 567.43), SIMDE_FLOAT32_C( 87.69), SIMDE_FLOAT32_C( 551.54), SIMDE_FLOAT32_C( 478.09), SIMDE_FLOAT32_C( -734.45), SIMDE_FLOAT32_C( -207.80), SIMDE_FLOAT32_C( 112.86), SIMDE_FLOAT32_C( 987.04), SIMDE_FLOAT32_C( -735.54), SIMDE_FLOAT32_C( 500.42), SIMDE_FLOAT32_C( -346.09) } }, { { SIMDE_FLOAT32_C( 880.04), SIMDE_FLOAT32_C( -264.82), SIMDE_FLOAT32_C( -917.86), SIMDE_FLOAT32_C( 840.38), SIMDE_FLOAT32_C( 139.21), SIMDE_FLOAT32_C( 969.28), SIMDE_FLOAT32_C( 33.57), SIMDE_FLOAT32_C( -71.16), SIMDE_FLOAT32_C( 542.24), SIMDE_FLOAT32_C( 613.59), SIMDE_FLOAT32_C( 397.58), SIMDE_FLOAT32_C( 838.53), SIMDE_FLOAT32_C( 766.99), SIMDE_FLOAT32_C( 394.09), SIMDE_FLOAT32_C( 291.25), SIMDE_FLOAT32_C( 327.26) }, { SIMDE_FLOAT32_C( 880.04), SIMDE_FLOAT32_C( -264.82), SIMDE_FLOAT32_C( -917.86), SIMDE_FLOAT32_C( 840.38), SIMDE_FLOAT32_C( 139.21), SIMDE_FLOAT32_C( 969.28), SIMDE_FLOAT32_C( 33.57), SIMDE_FLOAT32_C( -71.16), SIMDE_FLOAT32_C( 542.24), SIMDE_FLOAT32_C( 613.59), SIMDE_FLOAT32_C( 397.58), SIMDE_FLOAT32_C( 838.53), SIMDE_FLOAT32_C( 766.99), SIMDE_FLOAT32_C( 394.09), SIMDE_FLOAT32_C( 291.25), SIMDE_FLOAT32_C( 327.26) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 r = simde_mm512_set_ps( test_vec[i].a[15], test_vec[i].a[14], test_vec[i].a[13], test_vec[i].a[12], test_vec[i].a[11], test_vec[i].a[10], test_vec[i].a[ 9], test_vec[i].a[ 8], test_vec[i].a[ 7], test_vec[i].a[ 6], test_vec[i].a[ 5], test_vec[i].a[ 4], test_vec[i].a[ 3], test_vec[i].a[ 2], test_vec[i].a[ 1], test_vec[i].a[ 0]); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512 a = simde_test_x86_random_f32x16(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m512 r = a; simde_test_x86_write_f32x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f32x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_set_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_set_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_set_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_set_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_set_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_set_pd) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/set1.c000066400000000000000000003334751400333146700163130ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur */ #define SIMDE_TEST_X86_AVX512_INSN set1 #include #include static int test_simde_mm512_set1_ps (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde_float32 a; const simde_float32 r[16]; } test_vec[] = { { SIMDE_FLOAT32_C( -130.28), { SIMDE_FLOAT32_C( -130.28), SIMDE_FLOAT32_C( -130.28), SIMDE_FLOAT32_C( -130.28), SIMDE_FLOAT32_C( -130.28), SIMDE_FLOAT32_C( -130.28), SIMDE_FLOAT32_C( -130.28), SIMDE_FLOAT32_C( -130.28), SIMDE_FLOAT32_C( -130.28), SIMDE_FLOAT32_C( -130.28), SIMDE_FLOAT32_C( -130.28), SIMDE_FLOAT32_C( -130.28), SIMDE_FLOAT32_C( -130.28), SIMDE_FLOAT32_C( -130.28), SIMDE_FLOAT32_C( -130.28), SIMDE_FLOAT32_C( -130.28), SIMDE_FLOAT32_C( -130.28) } }, { SIMDE_FLOAT32_C( 996.56), { SIMDE_FLOAT32_C( 996.56), SIMDE_FLOAT32_C( 996.56), SIMDE_FLOAT32_C( 996.56), SIMDE_FLOAT32_C( 996.56), SIMDE_FLOAT32_C( 996.56), SIMDE_FLOAT32_C( 996.56), SIMDE_FLOAT32_C( 996.56), SIMDE_FLOAT32_C( 996.56), SIMDE_FLOAT32_C( 996.56), SIMDE_FLOAT32_C( 996.56), SIMDE_FLOAT32_C( 996.56), SIMDE_FLOAT32_C( 996.56), SIMDE_FLOAT32_C( 996.56), SIMDE_FLOAT32_C( 996.56), SIMDE_FLOAT32_C( 996.56), SIMDE_FLOAT32_C( 996.56) } }, { SIMDE_FLOAT32_C( -437.56), { SIMDE_FLOAT32_C( -437.56), SIMDE_FLOAT32_C( -437.56), SIMDE_FLOAT32_C( -437.56), SIMDE_FLOAT32_C( -437.56), SIMDE_FLOAT32_C( -437.56), SIMDE_FLOAT32_C( -437.56), SIMDE_FLOAT32_C( -437.56), SIMDE_FLOAT32_C( -437.56), SIMDE_FLOAT32_C( -437.56), SIMDE_FLOAT32_C( -437.56), SIMDE_FLOAT32_C( -437.56), SIMDE_FLOAT32_C( -437.56), SIMDE_FLOAT32_C( -437.56), SIMDE_FLOAT32_C( -437.56), SIMDE_FLOAT32_C( -437.56), SIMDE_FLOAT32_C( -437.56) } }, { SIMDE_FLOAT32_C( -653.34), { SIMDE_FLOAT32_C( -653.34), SIMDE_FLOAT32_C( -653.34), SIMDE_FLOAT32_C( -653.34), SIMDE_FLOAT32_C( -653.34), SIMDE_FLOAT32_C( -653.34), SIMDE_FLOAT32_C( -653.34), SIMDE_FLOAT32_C( -653.34), SIMDE_FLOAT32_C( -653.34), SIMDE_FLOAT32_C( -653.34), SIMDE_FLOAT32_C( -653.34), SIMDE_FLOAT32_C( -653.34), SIMDE_FLOAT32_C( -653.34), SIMDE_FLOAT32_C( -653.34), SIMDE_FLOAT32_C( -653.34), SIMDE_FLOAT32_C( -653.34), SIMDE_FLOAT32_C( -653.34) } }, { SIMDE_FLOAT32_C( -547.09), { SIMDE_FLOAT32_C( -547.09), SIMDE_FLOAT32_C( -547.09), SIMDE_FLOAT32_C( -547.09), SIMDE_FLOAT32_C( -547.09), SIMDE_FLOAT32_C( -547.09), SIMDE_FLOAT32_C( -547.09), SIMDE_FLOAT32_C( -547.09), SIMDE_FLOAT32_C( -547.09), SIMDE_FLOAT32_C( -547.09), SIMDE_FLOAT32_C( -547.09), SIMDE_FLOAT32_C( -547.09), SIMDE_FLOAT32_C( -547.09), SIMDE_FLOAT32_C( -547.09), SIMDE_FLOAT32_C( -547.09), SIMDE_FLOAT32_C( -547.09), SIMDE_FLOAT32_C( -547.09) } }, { SIMDE_FLOAT32_C( -670.08), { SIMDE_FLOAT32_C( -670.08), SIMDE_FLOAT32_C( -670.08), SIMDE_FLOAT32_C( -670.08), SIMDE_FLOAT32_C( -670.08), SIMDE_FLOAT32_C( -670.08), SIMDE_FLOAT32_C( -670.08), SIMDE_FLOAT32_C( -670.08), SIMDE_FLOAT32_C( -670.08), SIMDE_FLOAT32_C( -670.08), SIMDE_FLOAT32_C( -670.08), SIMDE_FLOAT32_C( -670.08), SIMDE_FLOAT32_C( -670.08), SIMDE_FLOAT32_C( -670.08), SIMDE_FLOAT32_C( -670.08), SIMDE_FLOAT32_C( -670.08), SIMDE_FLOAT32_C( -670.08) } }, { SIMDE_FLOAT32_C( -380.10), { SIMDE_FLOAT32_C( -380.10), SIMDE_FLOAT32_C( -380.10), SIMDE_FLOAT32_C( -380.10), SIMDE_FLOAT32_C( -380.10), SIMDE_FLOAT32_C( -380.10), SIMDE_FLOAT32_C( -380.10), SIMDE_FLOAT32_C( -380.10), SIMDE_FLOAT32_C( -380.10), SIMDE_FLOAT32_C( -380.10), SIMDE_FLOAT32_C( -380.10), SIMDE_FLOAT32_C( -380.10), SIMDE_FLOAT32_C( -380.10), SIMDE_FLOAT32_C( -380.10), SIMDE_FLOAT32_C( -380.10), SIMDE_FLOAT32_C( -380.10), SIMDE_FLOAT32_C( -380.10) } }, { SIMDE_FLOAT32_C( -89.44), { SIMDE_FLOAT32_C( -89.44), SIMDE_FLOAT32_C( -89.44), SIMDE_FLOAT32_C( -89.44), SIMDE_FLOAT32_C( -89.44), SIMDE_FLOAT32_C( -89.44), SIMDE_FLOAT32_C( -89.44), SIMDE_FLOAT32_C( -89.44), SIMDE_FLOAT32_C( -89.44), SIMDE_FLOAT32_C( -89.44), SIMDE_FLOAT32_C( -89.44), SIMDE_FLOAT32_C( -89.44), SIMDE_FLOAT32_C( -89.44), SIMDE_FLOAT32_C( -89.44), SIMDE_FLOAT32_C( -89.44), SIMDE_FLOAT32_C( -89.44), SIMDE_FLOAT32_C( -89.44) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 r = simde_mm512_set1_ps(test_vec[i].a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32 a = simde_test_codegen_random_f32(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m512 r = simde_mm512_set1_ps(a); simde_test_codegen_write_f32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f32x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_set1_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde_float64 a; simde__m512d r; } test_vec[8] = { { SIMDE_FLOAT64_C( -426.34), simde_mm512_set_pd(SIMDE_FLOAT64_C( -426.34), SIMDE_FLOAT64_C( -426.34), SIMDE_FLOAT64_C( -426.34), SIMDE_FLOAT64_C( -426.34), SIMDE_FLOAT64_C( -426.34), SIMDE_FLOAT64_C( -426.34), SIMDE_FLOAT64_C( -426.34), SIMDE_FLOAT64_C( -426.34)) }, { SIMDE_FLOAT64_C( 122.65), simde_mm512_set_pd(SIMDE_FLOAT64_C( 122.65), SIMDE_FLOAT64_C( 122.65), SIMDE_FLOAT64_C( 122.65), SIMDE_FLOAT64_C( 122.65), SIMDE_FLOAT64_C( 122.65), SIMDE_FLOAT64_C( 122.65), SIMDE_FLOAT64_C( 122.65), SIMDE_FLOAT64_C( 122.65)) }, { SIMDE_FLOAT64_C( 879.85), simde_mm512_set_pd(SIMDE_FLOAT64_C( 879.85), SIMDE_FLOAT64_C( 879.85), SIMDE_FLOAT64_C( 879.85), SIMDE_FLOAT64_C( 879.85), SIMDE_FLOAT64_C( 879.85), SIMDE_FLOAT64_C( 879.85), SIMDE_FLOAT64_C( 879.85), SIMDE_FLOAT64_C( 879.85)) }, { SIMDE_FLOAT64_C( 301.17), simde_mm512_set_pd(SIMDE_FLOAT64_C( 301.17), SIMDE_FLOAT64_C( 301.17), SIMDE_FLOAT64_C( 301.17), SIMDE_FLOAT64_C( 301.17), SIMDE_FLOAT64_C( 301.17), SIMDE_FLOAT64_C( 301.17), SIMDE_FLOAT64_C( 301.17), SIMDE_FLOAT64_C( 301.17)) }, { SIMDE_FLOAT64_C( -341.96), simde_mm512_set_pd(SIMDE_FLOAT64_C( -341.96), SIMDE_FLOAT64_C( -341.96), SIMDE_FLOAT64_C( -341.96), SIMDE_FLOAT64_C( -341.96), SIMDE_FLOAT64_C( -341.96), SIMDE_FLOAT64_C( -341.96), SIMDE_FLOAT64_C( -341.96), SIMDE_FLOAT64_C( -341.96)) }, { SIMDE_FLOAT64_C( -854.60), simde_mm512_set_pd(SIMDE_FLOAT64_C( -854.60), SIMDE_FLOAT64_C( -854.60), SIMDE_FLOAT64_C( -854.60), SIMDE_FLOAT64_C( -854.60), SIMDE_FLOAT64_C( -854.60), SIMDE_FLOAT64_C( -854.60), SIMDE_FLOAT64_C( -854.60), SIMDE_FLOAT64_C( -854.60)) }, { SIMDE_FLOAT64_C( 711.48), simde_mm512_set_pd(SIMDE_FLOAT64_C( 711.48), SIMDE_FLOAT64_C( 711.48), SIMDE_FLOAT64_C( 711.48), SIMDE_FLOAT64_C( 711.48), SIMDE_FLOAT64_C( 711.48), SIMDE_FLOAT64_C( 711.48), SIMDE_FLOAT64_C( 711.48), SIMDE_FLOAT64_C( 711.48)) }, { SIMDE_FLOAT64_C( -146.85), simde_mm512_set_pd(SIMDE_FLOAT64_C( -146.85), SIMDE_FLOAT64_C( -146.85), SIMDE_FLOAT64_C( -146.85), SIMDE_FLOAT64_C( -146.85), SIMDE_FLOAT64_C( -146.85), SIMDE_FLOAT64_C( -146.85), SIMDE_FLOAT64_C( -146.85), SIMDE_FLOAT64_C( -146.85)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_set1_pd(test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_set1_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { int8_t a; simde__m512i r; } test_vec[8] = { { 15, simde_mm512_set_epi8(INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15), INT8_C( 15)) }, { 124, simde_mm512_set_epi8(INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124), INT8_C( 124)) }, { -93, simde_mm512_set_epi8(INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93), INT8_C( -93)) }, { 121, simde_mm512_set_epi8(INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121), INT8_C( 121)) }, { 117, simde_mm512_set_epi8(INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117), INT8_C( 117)) }, { 93, simde_mm512_set_epi8(INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93), INT8_C( 93)) }, { 88, simde_mm512_set_epi8(INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88), INT8_C( 88)) }, { -73, simde_mm512_set_epi8(INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_set1_epi8(test_vec[i].a); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_set1_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask64 k; int8_t a; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi8(INT8_C( 80), INT8_C( 13), INT8_C( -86), INT8_C( 103), INT8_C( 30), INT8_C( 88), INT8_C( -63), INT8_C( -16), INT8_C( -68), INT8_C( -20), INT8_C( 48), INT8_C( -36), INT8_C( -97), INT8_C(-103), INT8_C(-104), INT8_C( -61), INT8_C(-122), INT8_C( -83), INT8_C( -3), INT8_C(-115), INT8_C( 29), INT8_C(-112), INT8_C( 118), INT8_C( 53), INT8_C(-107), INT8_C(-126), INT8_C( 41), INT8_C(-117), INT8_C( -4), INT8_C( -72), INT8_C( -9), INT8_C( 2), INT8_C( 10), INT8_C( -61), INT8_C( 116), INT8_C( 1), INT8_C( 35), INT8_C( -78), INT8_C( 17), INT8_C( -82), INT8_C( -14), INT8_C( 120), INT8_C( 120), INT8_C( 33), INT8_C( 97), INT8_C( 4), INT8_C(-104), INT8_C( 67), INT8_C( -86), INT8_C( -90), INT8_C( -95), INT8_C( 51), INT8_C( -83), INT8_C(-120), INT8_C( 123), INT8_C( -4), INT8_C( 51), INT8_C( -66), INT8_C( -91), INT8_C( 51), INT8_C( -1), INT8_C( 32), INT8_C( 30), INT8_C( 92)), UINT64_C(12701675613368776088), INT8_C( -94), simde_mm512_set_epi8(INT8_C( -94), INT8_C( 13), INT8_C( -94), INT8_C( -94), INT8_C( 30), INT8_C( 88), INT8_C( -63), INT8_C( -16), INT8_C( -68), INT8_C( -94), INT8_C( 48), INT8_C( -36), INT8_C( -97), INT8_C( -94), INT8_C(-104), INT8_C( -94), INT8_C(-122), INT8_C( -94), INT8_C( -94), INT8_C(-115), INT8_C( -94), INT8_C(-112), INT8_C( -94), INT8_C( 53), INT8_C( -94), INT8_C(-126), INT8_C( -94), INT8_C(-117), INT8_C( -94), INT8_C( -94), INT8_C( -9), INT8_C( 2), INT8_C( -94), INT8_C( -61), INT8_C( 116), INT8_C( 1), INT8_C( -94), INT8_C( -78), INT8_C( -94), INT8_C( -94), INT8_C( -14), INT8_C( 120), INT8_C( 120), INT8_C( -94), INT8_C( 97), INT8_C( -94), INT8_C( -94), INT8_C( 67), INT8_C( -86), INT8_C( -94), INT8_C( -94), INT8_C( -94), INT8_C( -83), INT8_C( -94), INT8_C( 123), INT8_C( -94), INT8_C( -94), INT8_C( -66), INT8_C( -91), INT8_C( -94), INT8_C( -94), INT8_C( 32), INT8_C( 30), INT8_C( 92)) }, { simde_mm512_set_epi8(INT8_C( -64), INT8_C( -80), INT8_C( 33), INT8_C( -9), INT8_C( 3), INT8_C( 93), INT8_C( 13), INT8_C( -28), INT8_C( 79), INT8_C( 10), INT8_C( -42), INT8_C(-127), INT8_C( 114), INT8_C( 78), INT8_C( 61), INT8_C( 67), INT8_C( 95), INT8_C( 14), INT8_C( 28), INT8_C( 56), INT8_C( 43), INT8_C( -20), INT8_C( -77), INT8_C( 83), INT8_C( -68), INT8_C( 87), INT8_C( -96), INT8_C( 13), INT8_C( 40), INT8_C( 107), INT8_C( -63), INT8_C( -1), INT8_C( 77), INT8_C( 21), INT8_C( -46), INT8_C( -12), INT8_C( 42), INT8_C( 69), INT8_C( 51), INT8_C( 11), INT8_C(-120), INT8_C( 65), INT8_C( -70), INT8_C( -19), INT8_C( -95), INT8_C( 43), INT8_C( -2), INT8_C( -62), INT8_C( -16), INT8_C( 28), INT8_C( 29), INT8_C( -11), INT8_C( 17), INT8_C( -18), INT8_C( 105), INT8_C(-119), INT8_C( 60), INT8_C( 120), INT8_C( 38), INT8_C( -41), INT8_C( 20), INT8_C( -30), INT8_C( 15), INT8_C( 112)), UINT64_C(15052494645983188959), INT8_C( -73), simde_mm512_set_epi8(INT8_C( -73), INT8_C( -73), INT8_C( 33), INT8_C( -73), INT8_C( 3), INT8_C( 93), INT8_C( 13), INT8_C( -28), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C(-127), INT8_C( 114), INT8_C( -73), INT8_C( 61), INT8_C( -73), INT8_C( 95), INT8_C( 14), INT8_C( -73), INT8_C( -73), INT8_C( 43), INT8_C( -73), INT8_C( -77), INT8_C( 83), INT8_C( -68), INT8_C( 87), INT8_C( -73), INT8_C( 13), INT8_C( 40), INT8_C( 107), INT8_C( -63), INT8_C( -73), INT8_C( 77), INT8_C( 21), INT8_C( -46), INT8_C( -12), INT8_C( 42), INT8_C( -73), INT8_C( 51), INT8_C( -73), INT8_C(-120), INT8_C( -73), INT8_C( -70), INT8_C( -19), INT8_C( -73), INT8_C( -73), INT8_C( -2), INT8_C( -62), INT8_C( -16), INT8_C( 28), INT8_C( 29), INT8_C( -73), INT8_C( 17), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( 38), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73), INT8_C( -73)) }, { simde_mm512_set_epi8(INT8_C( 107), INT8_C( 126), INT8_C( -33), INT8_C( 83), INT8_C( 46), INT8_C( 62), INT8_C( -81), INT8_C( 33), INT8_C( -68), INT8_C(-126), INT8_C( -41), INT8_C( 125), INT8_C( -96), INT8_C( -20), INT8_C( 62), INT8_C( -19), INT8_C( 29), INT8_C( -96), INT8_C( 68), INT8_C( 119), INT8_C( -36), INT8_C( -62), INT8_C( -27), INT8_C(-112), INT8_C(-123), INT8_C( 55), INT8_C(-119), INT8_C( -4), INT8_C( 58), INT8_C( 28), INT8_C( -84), INT8_C( -38), INT8_C( 1), INT8_C( -25), INT8_C( 107), INT8_C( -63), INT8_C( -86), INT8_C( 88), INT8_C( 36), INT8_C( 53), INT8_C( 109), INT8_C( -36), INT8_C( -70), INT8_C(-125), INT8_C( -3), INT8_C(-109), INT8_C( 121), INT8_C( -63), INT8_C( 113), INT8_C( -92), INT8_C( -4), INT8_C(-105), INT8_C( -65), INT8_C( 26), INT8_C( -36), INT8_C( 87), INT8_C(-101), INT8_C( -70), INT8_C( -3), INT8_C( 26), INT8_C( -88), INT8_C( -51), INT8_C(-123), INT8_C( 93)), UINT64_C( 2985661334514035835), INT8_C( 111), simde_mm512_set_epi8(INT8_C( 107), INT8_C( 126), INT8_C( 111), INT8_C( 83), INT8_C( 111), INT8_C( 62), INT8_C( -81), INT8_C( 111), INT8_C( -68), INT8_C( 111), INT8_C( 111), INT8_C( 125), INT8_C( 111), INT8_C( 111), INT8_C( 111), INT8_C( 111), INT8_C( 29), INT8_C( -96), INT8_C( 111), INT8_C( 111), INT8_C( -36), INT8_C( -62), INT8_C( 111), INT8_C( 111), INT8_C(-123), INT8_C( 55), INT8_C( 111), INT8_C( -4), INT8_C( 111), INT8_C( 28), INT8_C( 111), INT8_C( -38), INT8_C( 1), INT8_C( -25), INT8_C( 111), INT8_C( 111), INT8_C( 111), INT8_C( 111), INT8_C( 111), INT8_C( 111), INT8_C( 109), INT8_C( 111), INT8_C( -70), INT8_C( 111), INT8_C( -3), INT8_C(-109), INT8_C( 111), INT8_C( 111), INT8_C( 111), INT8_C( 111), INT8_C( 111), INT8_C(-105), INT8_C( 111), INT8_C( 111), INT8_C( -36), INT8_C( 87), INT8_C(-101), INT8_C( 111), INT8_C( 111), INT8_C( 111), INT8_C( 111), INT8_C( -51), INT8_C( 111), INT8_C( 111)) }, { simde_mm512_set_epi8(INT8_C( -63), INT8_C( 92), INT8_C( -41), INT8_C( -80), INT8_C(-101), INT8_C( 86), INT8_C( 45), INT8_C( 45), INT8_C( -41), INT8_C(-113), INT8_C( -17), INT8_C(-101), INT8_C(-113), INT8_C( -69), INT8_C( 73), INT8_C(-124), INT8_C( 90), INT8_C(-118), INT8_C( 31), INT8_C(-124), INT8_C( -88), INT8_C(-116), INT8_C( 8), INT8_C( -37), INT8_C( -41), INT8_C( 93), INT8_C( -86), INT8_C( 61), INT8_C( -70), INT8_C( -88), INT8_C( 44), INT8_C( -34), INT8_C( -21), INT8_C(-121), INT8_C(-124), INT8_C(-114), INT8_C( 73), INT8_C( 92), INT8_C( -92), INT8_C(-115), INT8_C( 6), INT8_C(-120), INT8_C( 89), INT8_C(-102), INT8_C( -43), INT8_C( 33), INT8_C( 15), INT8_C( -6), INT8_C(-105), INT8_C( 66), INT8_C( -60), INT8_C( 54), INT8_C( -95), INT8_C( 49), INT8_C( 1), INT8_C( 118), INT8_C( -33), INT8_C( -35), INT8_C( -34), INT8_C( -10), INT8_C( -70), INT8_C( 74), INT8_C( -10), INT8_C( 97)), UINT64_C(12556192675989742329), INT8_C(-120), simde_mm512_set_epi8(INT8_C(-120), INT8_C( 92), INT8_C(-120), INT8_C( -80), INT8_C(-120), INT8_C(-120), INT8_C(-120), INT8_C( 45), INT8_C( -41), INT8_C(-120), INT8_C( -17), INT8_C(-101), INT8_C(-113), INT8_C( -69), INT8_C( 73), INT8_C(-124), INT8_C(-120), INT8_C(-118), INT8_C( 31), INT8_C(-124), INT8_C(-120), INT8_C(-120), INT8_C(-120), INT8_C( -37), INT8_C(-120), INT8_C( 93), INT8_C(-120), INT8_C(-120), INT8_C( -70), INT8_C(-120), INT8_C(-120), INT8_C( -34), INT8_C(-120), INT8_C(-121), INT8_C(-124), INT8_C(-120), INT8_C( 73), INT8_C(-120), INT8_C(-120), INT8_C(-120), INT8_C(-120), INT8_C(-120), INT8_C(-120), INT8_C(-120), INT8_C(-120), INT8_C( 33), INT8_C(-120), INT8_C( -6), INT8_C(-120), INT8_C(-120), INT8_C(-120), INT8_C( 54), INT8_C( -95), INT8_C( 49), INT8_C(-120), INT8_C( 118), INT8_C(-120), INT8_C(-120), INT8_C(-120), INT8_C(-120), INT8_C(-120), INT8_C( 74), INT8_C( -10), INT8_C(-120)) }, { simde_mm512_set_epi8(INT8_C( 21), INT8_C( 17), INT8_C( 22), INT8_C(-115), INT8_C( 101), INT8_C( -2), INT8_C( -32), INT8_C( -27), INT8_C( -14), INT8_C( 47), INT8_C( 110), INT8_C( -88), INT8_C( 23), INT8_C( -87), INT8_C( -20), INT8_C( 115), INT8_C( 108), INT8_C( -54), INT8_C(-105), INT8_C( -94), INT8_C( 96), INT8_C(-110), INT8_C( -87), INT8_C( 119), INT8_C( 110), INT8_C( -13), INT8_C( 53), INT8_C( -27), INT8_C( -59), INT8_C( 57), INT8_C( -46), INT8_C( -24), INT8_C( 35), INT8_C( 26), INT8_C( 124), INT8_C( -28), INT8_C( -68), INT8_C( -57), INT8_C( 75), INT8_C( -25), INT8_C(-112), INT8_C( 112), INT8_C( 123), INT8_C(-108), INT8_C( 115), INT8_C( -6), INT8_C( 43), INT8_C( 52), INT8_C( -91), INT8_C( -17), INT8_C( 93), INT8_C( -2), INT8_C( 116), INT8_C( -51), INT8_C( 70), INT8_C( 98), INT8_C( 104), INT8_C( -69), INT8_C(-102), INT8_C( 77), INT8_C( 82), INT8_C( 125), INT8_C( 42), INT8_C( 83)), UINT64_C(12090133344763257330), INT8_C( 55), simde_mm512_set_epi8(INT8_C( 55), INT8_C( 17), INT8_C( 55), INT8_C(-115), INT8_C( 101), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 110), INT8_C( -88), INT8_C( 55), INT8_C( -87), INT8_C( -20), INT8_C( 115), INT8_C( 55), INT8_C( 55), INT8_C(-105), INT8_C( -94), INT8_C( 55), INT8_C(-110), INT8_C( -87), INT8_C( 119), INT8_C( 110), INT8_C( -13), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 57), INT8_C( -46), INT8_C( -24), INT8_C( 55), INT8_C( 26), INT8_C( 124), INT8_C( -28), INT8_C( -68), INT8_C( 55), INT8_C( 75), INT8_C( 55), INT8_C( 55), INT8_C( 112), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( -6), INT8_C( 43), INT8_C( 52), INT8_C( -91), INT8_C( -17), INT8_C( 55), INT8_C( -2), INT8_C( 116), INT8_C( -51), INT8_C( 70), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 55), INT8_C( 82), INT8_C( 125), INT8_C( 55), INT8_C( 83)) }, { simde_mm512_set_epi8(INT8_C(-124), INT8_C( -37), INT8_C( -61), INT8_C( -35), INT8_C( -22), INT8_C( -85), INT8_C(-117), INT8_C(-105), INT8_C( 99), INT8_C( -62), INT8_C( 102), INT8_C( -31), INT8_C( 82), INT8_C( 39), INT8_C( 49), INT8_C( 43), INT8_C( 21), INT8_C( 16), INT8_C( 12), INT8_C(-125), INT8_C( 2), INT8_C(-106), INT8_C( -4), INT8_C( 100), INT8_C( -12), INT8_C( 30), INT8_C( -39), INT8_C( -37), INT8_C( 92), INT8_C( -43), INT8_C( 33), INT8_C(-124), INT8_C( 48), INT8_C( 4), INT8_C( 31), INT8_C( 78), INT8_C(-113), INT8_C( 115), INT8_C( 116), INT8_C( -62), INT8_C(-109), INT8_C( -66), INT8_C( 43), INT8_C(-118), INT8_C(-105), INT8_C( -11), INT8_C( 100), INT8_C( 41), INT8_C(-104), INT8_C(-114), INT8_C(-105), INT8_C( 88), INT8_C( -33), INT8_C( -8), INT8_C( 41), INT8_C( 16), INT8_C( 4), INT8_C( 89), INT8_C( 66), INT8_C( 27), INT8_C( -63), INT8_C( 30), INT8_C( -95), INT8_C( 33)), UINT64_C(13436704833767296949), INT8_C( 18), simde_mm512_set_epi8(INT8_C( 18), INT8_C( -37), INT8_C( 18), INT8_C( 18), INT8_C( 18), INT8_C( -85), INT8_C( 18), INT8_C(-105), INT8_C( 99), INT8_C( 18), INT8_C( 18), INT8_C( 18), INT8_C( 18), INT8_C( 39), INT8_C( 49), INT8_C( 43), INT8_C( 18), INT8_C( 18), INT8_C( 12), INT8_C(-125), INT8_C( 2), INT8_C(-106), INT8_C( 18), INT8_C( 18), INT8_C( 18), INT8_C( 18), INT8_C( -39), INT8_C( 18), INT8_C( 18), INT8_C( -43), INT8_C( 18), INT8_C( 18), INT8_C( 48), INT8_C( 4), INT8_C( 31), INT8_C( 78), INT8_C( 18), INT8_C( 115), INT8_C( 116), INT8_C( -62), INT8_C(-109), INT8_C( 18), INT8_C( 18), INT8_C( 18), INT8_C( 18), INT8_C( -11), INT8_C( 100), INT8_C( 18), INT8_C(-104), INT8_C( 18), INT8_C(-105), INT8_C( 88), INT8_C( 18), INT8_C( 18), INT8_C( 18), INT8_C( 18), INT8_C( 18), INT8_C( 89), INT8_C( 18), INT8_C( 18), INT8_C( -63), INT8_C( 18), INT8_C( -95), INT8_C( 18)) }, { simde_mm512_set_epi8(INT8_C( -30), INT8_C( 101), INT8_C( 64), INT8_C( 107), INT8_C( -34), INT8_C( -67), INT8_C( -96), INT8_C( 35), INT8_C( 117), INT8_C( 76), INT8_C( 106), INT8_C( -82), INT8_C( -48), INT8_C( 63), INT8_C( 11), INT8_C( 22), INT8_C( 41), INT8_C( 95), INT8_C(-123), INT8_C( -90), INT8_C( 67), INT8_C( -76), INT8_C(-105), INT8_C( -7), INT8_C( 115), INT8_C( 121), INT8_C( -52), INT8_C( -95), INT8_C(-101), INT8_C( 64), INT8_C( -67), INT8_C( 107), INT8_C(-104), INT8_C( 56), INT8_C( 89), INT8_C( -95), INT8_C( 21), INT8_C( -42), INT8_C( -75), INT8_C( 45), INT8_C( -86), INT8_C( 32), INT8_C( 27), INT8_C(-119), INT8_C( -68), INT8_C( 5), INT8_C( -78), INT8_C( -36), INT8_C( 125), INT8_C( 117), INT8_C( -63), INT8_C( -68), INT8_C( -45), INT8_C( -77), INT8_C( 6), INT8_C( 68), INT8_C( 79), INT8_C( -92), INT8_C( 67), INT8_C( 61), INT8_C( 42), INT8_C( 26), INT8_C(-117), INT8_C( -55)), UINT64_C(14020412538477965079), INT8_C( -46), simde_mm512_set_epi8(INT8_C( -46), INT8_C( -46), INT8_C( 64), INT8_C( 107), INT8_C( -34), INT8_C( -67), INT8_C( -46), INT8_C( 35), INT8_C( -46), INT8_C( 76), INT8_C( 106), INT8_C( -46), INT8_C( -48), INT8_C( 63), INT8_C( -46), INT8_C( 22), INT8_C( -46), INT8_C( 95), INT8_C(-123), INT8_C( -90), INT8_C( 67), INT8_C( -76), INT8_C( -46), INT8_C( -7), INT8_C( -46), INT8_C( -46), INT8_C( -46), INT8_C( -95), INT8_C( -46), INT8_C( 64), INT8_C( -46), INT8_C( -46), INT8_C( -46), INT8_C( 56), INT8_C( -46), INT8_C( -95), INT8_C( 21), INT8_C( -42), INT8_C( -75), INT8_C( 45), INT8_C( -86), INT8_C( -46), INT8_C( -46), INT8_C(-119), INT8_C( -46), INT8_C( 5), INT8_C( -78), INT8_C( -46), INT8_C( 125), INT8_C( -46), INT8_C( -46), INT8_C( -68), INT8_C( -46), INT8_C( -46), INT8_C( -46), INT8_C( -46), INT8_C( 79), INT8_C( -92), INT8_C( 67), INT8_C( -46), INT8_C( 42), INT8_C( -46), INT8_C( -46), INT8_C( -46)) }, { simde_mm512_set_epi8(INT8_C( -83), INT8_C( -73), INT8_C( -22), INT8_C( 98), INT8_C( 126), INT8_C( 41), INT8_C( -28), INT8_C( 126), INT8_C( -75), INT8_C( 91), INT8_C( -33), INT8_C( 103), INT8_C( -63), INT8_C( 62), INT8_C( 83), INT8_C( 4), INT8_C( 65), INT8_C( -22), INT8_C( 107), INT8_C( 8), INT8_C( 31), INT8_C(-111), INT8_C(-114), INT8_C(-118), INT8_C( 2), INT8_C( 76), INT8_C( 19), INT8_C( 127), INT8_C( -37), INT8_C( -41), INT8_C( 91), INT8_C( -64), INT8_C(-105), INT8_C( 127), INT8_C(-121), INT8_C( 84), INT8_C( 124), INT8_C( 50), INT8_C( -86), INT8_C(-101), INT8_C( -82), INT8_C( 121), INT8_C( 18), INT8_C( -17), INT8_C( -55), INT8_C(-102), INT8_C( -81), INT8_C( -54), INT8_C( -56), INT8_C( -2), INT8_C( -68), INT8_C( 105), INT8_C( -48), INT8_C( -90), INT8_C( -46), INT8_C( 63), INT8_C( 126), INT8_C( -93), INT8_C( 46), INT8_C(-114), INT8_C( 58), INT8_C( 110), INT8_C( 102), INT8_C( -93)), UINT64_C(14839809536761107867), INT8_C( 106), simde_mm512_set_epi8(INT8_C( 106), INT8_C( 106), INT8_C( -22), INT8_C( 98), INT8_C( 106), INT8_C( 106), INT8_C( -28), INT8_C( 106), INT8_C( 106), INT8_C( 106), INT8_C( 106), INT8_C( 106), INT8_C( -63), INT8_C( 62), INT8_C( 83), INT8_C( 106), INT8_C( 106), INT8_C( -22), INT8_C( 107), INT8_C( 106), INT8_C( 106), INT8_C(-111), INT8_C(-114), INT8_C(-118), INT8_C( 2), INT8_C( 76), INT8_C( 106), INT8_C( 127), INT8_C( -37), INT8_C( 106), INT8_C( 106), INT8_C( -64), INT8_C(-105), INT8_C( 127), INT8_C( 106), INT8_C( 84), INT8_C( 124), INT8_C( 106), INT8_C( -86), INT8_C( 106), INT8_C( -82), INT8_C( 121), INT8_C( 18), INT8_C( 106), INT8_C( 106), INT8_C(-102), INT8_C( -81), INT8_C( 106), INT8_C( 106), INT8_C( -2), INT8_C( -68), INT8_C( 106), INT8_C( -48), INT8_C( 106), INT8_C( -46), INT8_C( 106), INT8_C( 106), INT8_C( -93), INT8_C( 46), INT8_C( 106), INT8_C( 106), INT8_C( 110), INT8_C( 106), INT8_C( 106)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_set1_epi8(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_set1_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask64 k; int8_t a; simde__m512i r; } test_vec[8] = { { UINT64_C( 2901368310709582274), INT8_C( -37), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( -37), INT8_C( 0), INT8_C( -37), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -37), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -37), INT8_C( -37), INT8_C( -37), INT8_C( 0), INT8_C( -37), INT8_C( -37), INT8_C( -37), INT8_C( 0), INT8_C( -37), INT8_C( -37), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -37), INT8_C( -37), INT8_C( 0), INT8_C( -37), INT8_C( -37), INT8_C( 0), INT8_C( -37), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -37), INT8_C( 0), INT8_C( 0), INT8_C( -37), INT8_C( -37), INT8_C( 0), INT8_C( -37), INT8_C( 0), INT8_C( -37), INT8_C( -37), INT8_C( 0), INT8_C( -37), INT8_C( -37), INT8_C( -37), INT8_C( -37), INT8_C( 0), INT8_C( 0), INT8_C( -37), INT8_C( -37), INT8_C( -37), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -37), INT8_C( 0)) }, { UINT64_C(15800639674747260058), INT8_C( 63), simde_mm512_set_epi8(INT8_C( 63), INT8_C( 63), INT8_C( 0), INT8_C( 63), INT8_C( 63), INT8_C( 0), INT8_C( 63), INT8_C( 63), INT8_C( 0), INT8_C( 63), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 63), INT8_C( 63), INT8_C( 63), INT8_C( 0), INT8_C( 0), INT8_C( 63), INT8_C( 0), INT8_C( 0), INT8_C( 63), INT8_C( 63), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 63), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 63), INT8_C( 63), INT8_C( 0), INT8_C( 0), INT8_C( 63), INT8_C( 63), INT8_C( 63), INT8_C( 0), INT8_C( 63), INT8_C( 0), INT8_C( 63), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 63), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 63), INT8_C( 63), INT8_C( 0), INT8_C( 0), INT8_C( 63), INT8_C( 0), INT8_C( 0), INT8_C( 63), INT8_C( 63), INT8_C( 0), INT8_C( 63), INT8_C( 0)) }, { UINT64_C(12860739080443979541), INT8_C( 53), simde_mm512_set_epi8(INT8_C( 53), INT8_C( 0), INT8_C( 53), INT8_C( 53), INT8_C( 0), INT8_C( 0), INT8_C( 53), INT8_C( 0), INT8_C( 0), INT8_C( 53), INT8_C( 53), INT8_C( 53), INT8_C( 53), INT8_C( 0), INT8_C( 53), INT8_C( 0), INT8_C( 53), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 53), INT8_C( 53), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 53), INT8_C( 53), INT8_C( 0), INT8_C( 53), INT8_C( 53), INT8_C( 53), INT8_C( 53), INT8_C( 53), INT8_C( 0), INT8_C( 53), INT8_C( 53), INT8_C( 0), INT8_C( 53), INT8_C( 53), INT8_C( 0), INT8_C( 53), INT8_C( 53), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 53), INT8_C( 53), INT8_C( 53), INT8_C( 0), INT8_C( 0), INT8_C( 53), INT8_C( 53), INT8_C( 53), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 53), INT8_C( 0), INT8_C( 53), INT8_C( 0), INT8_C( 53)) }, { UINT64_C( 2595884503750725802), INT8_C( 78), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 78), INT8_C( 0), INT8_C( 0), INT8_C( 78), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 78), INT8_C( 78), INT8_C( 0), INT8_C( 0), INT8_C( 78), INT8_C( 78), INT8_C( 0), INT8_C( 78), INT8_C( 78), INT8_C( 78), INT8_C( 78), INT8_C( 0), INT8_C( 0), INT8_C( 78), INT8_C( 78), INT8_C( 0), INT8_C( 0), INT8_C( 78), INT8_C( 78), INT8_C( 78), INT8_C( 78), INT8_C( 0), INT8_C( 78), INT8_C( 78), INT8_C( 78), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 78), INT8_C( 78), INT8_C( 0), INT8_C( 78), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 78), INT8_C( 0), INT8_C( 78), INT8_C( 0), INT8_C( 78), INT8_C( 0), INT8_C( 78), INT8_C( 0)) }, { UINT64_C(13286373173549182748), INT8_C( -67), simde_mm512_set_epi8(INT8_C( -67), INT8_C( 0), INT8_C( -67), INT8_C( -67), INT8_C( -67), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -67), INT8_C( -67), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -67), INT8_C( 0), INT8_C( -67), INT8_C( 0), INT8_C( -67), INT8_C( 0), INT8_C( -67), INT8_C( -67), INT8_C( -67), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -67), INT8_C( -67), INT8_C( -67), INT8_C( 0), INT8_C( -67), INT8_C( 0), INT8_C( 0), INT8_C( -67), INT8_C( 0), INT8_C( -67), INT8_C( 0), INT8_C( -67), INT8_C( 0), INT8_C( -67), INT8_C( -67), INT8_C( 0), INT8_C( -67), INT8_C( -67), INT8_C( 0), INT8_C( -67), INT8_C( 0), INT8_C( -67), INT8_C( -67), INT8_C( -67), INT8_C( -67), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -67), INT8_C( -67), INT8_C( -67), INT8_C( 0), INT8_C( 0)) }, { UINT64_C(16804997844821669286), INT8_C( -98), simde_mm512_set_epi8(INT8_C( -98), INT8_C( -98), INT8_C( -98), INT8_C( 0), INT8_C( -98), INT8_C( 0), INT8_C( 0), INT8_C( -98), INT8_C( 0), INT8_C( 0), INT8_C( -98), INT8_C( -98), INT8_C( 0), INT8_C( -98), INT8_C( -98), INT8_C( -98), INT8_C( 0), INT8_C( -98), INT8_C( 0), INT8_C( -98), INT8_C( -98), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -98), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -98), INT8_C( -98), INT8_C( 0), INT8_C( 0), INT8_C( -98), INT8_C( -98), INT8_C( -98), INT8_C( -98), INT8_C( -98), INT8_C( -98), INT8_C( 0), INT8_C( 0), INT8_C( -98), INT8_C( -98), INT8_C( 0), INT8_C( 0), INT8_C( -98), INT8_C( -98), INT8_C( -98), INT8_C( -98), INT8_C( 0), INT8_C( -98), INT8_C( 0), INT8_C( -98), INT8_C( -98), INT8_C( 0), INT8_C( -98), INT8_C( 0), INT8_C( 0), INT8_C( -98), INT8_C( -98), INT8_C( 0)) }, { UINT64_C(14388383136321922859), INT8_C( -31), simde_mm512_set_epi8(INT8_C( -31), INT8_C( -31), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -31), INT8_C( -31), INT8_C( -31), INT8_C( -31), INT8_C( 0), INT8_C( -31), INT8_C( 0), INT8_C( -31), INT8_C( -31), INT8_C( 0), INT8_C( -31), INT8_C( -31), INT8_C( -31), INT8_C( 0), INT8_C( 0), INT8_C( -31), INT8_C( -31), INT8_C( -31), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -31), INT8_C( -31), INT8_C( -31), INT8_C( 0), INT8_C( -31), INT8_C( 0), INT8_C( -31), INT8_C( -31), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -31), INT8_C( 0), INT8_C( 0), INT8_C( -31), INT8_C( 0), INT8_C( -31), INT8_C( -31), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -31), INT8_C( -31), INT8_C( 0), INT8_C( 0), INT8_C( -31), INT8_C( -31), INT8_C( -31), INT8_C( 0), INT8_C( 0), INT8_C( -31), INT8_C( 0), INT8_C( -31), INT8_C( 0), INT8_C( -31), INT8_C( -31)) }, { UINT64_C( 9693935732927043828), INT8_C( 57), simde_mm512_set_epi8(INT8_C( 57), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 57), INT8_C( 57), INT8_C( 0), INT8_C( 57), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 57), INT8_C( 57), INT8_C( 57), INT8_C( 57), INT8_C( 57), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 57), INT8_C( 57), INT8_C( 57), INT8_C( 0), INT8_C( 0), INT8_C( 57), INT8_C( 57), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 57), INT8_C( 57), INT8_C( 57), INT8_C( 57), INT8_C( 0), INT8_C( 0), INT8_C( 57), INT8_C( 0), INT8_C( 0), INT8_C( 57), INT8_C( 0), INT8_C( 57), INT8_C( 0), INT8_C( 57), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 57), INT8_C( 57), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 57), INT8_C( 57), INT8_C( 57), INT8_C( 57), INT8_C( 0), INT8_C( 57), INT8_C( 0), INT8_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_set1_epi8(test_vec[i].k, test_vec[i].a); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_set1_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { int16_t a; simde__m512i r; } test_vec[8] = { { -334, simde_mm512_set_epi16(INT16_C( -334), INT16_C( -334), INT16_C( -334), INT16_C( -334), INT16_C( -334), INT16_C( -334), INT16_C( -334), INT16_C( -334), INT16_C( -334), INT16_C( -334), INT16_C( -334), INT16_C( -334), INT16_C( -334), INT16_C( -334), INT16_C( -334), INT16_C( -334), INT16_C( -334), INT16_C( -334), INT16_C( -334), INT16_C( -334), INT16_C( -334), INT16_C( -334), INT16_C( -334), INT16_C( -334), INT16_C( -334), INT16_C( -334), INT16_C( -334), INT16_C( -334), INT16_C( -334), INT16_C( -334), INT16_C( -334), INT16_C( -334)) }, { 27900, simde_mm512_set_epi16(INT16_C( 27900), INT16_C( 27900), INT16_C( 27900), INT16_C( 27900), INT16_C( 27900), INT16_C( 27900), INT16_C( 27900), INT16_C( 27900), INT16_C( 27900), INT16_C( 27900), INT16_C( 27900), INT16_C( 27900), INT16_C( 27900), INT16_C( 27900), INT16_C( 27900), INT16_C( 27900), INT16_C( 27900), INT16_C( 27900), INT16_C( 27900), INT16_C( 27900), INT16_C( 27900), INT16_C( 27900), INT16_C( 27900), INT16_C( 27900), INT16_C( 27900), INT16_C( 27900), INT16_C( 27900), INT16_C( 27900), INT16_C( 27900), INT16_C( 27900), INT16_C( 27900), INT16_C( 27900)) }, { 9352, simde_mm512_set_epi16(INT16_C( 9352), INT16_C( 9352), INT16_C( 9352), INT16_C( 9352), INT16_C( 9352), INT16_C( 9352), INT16_C( 9352), INT16_C( 9352), INT16_C( 9352), INT16_C( 9352), INT16_C( 9352), INT16_C( 9352), INT16_C( 9352), INT16_C( 9352), INT16_C( 9352), INT16_C( 9352), INT16_C( 9352), INT16_C( 9352), INT16_C( 9352), INT16_C( 9352), INT16_C( 9352), INT16_C( 9352), INT16_C( 9352), INT16_C( 9352), INT16_C( 9352), INT16_C( 9352), INT16_C( 9352), INT16_C( 9352), INT16_C( 9352), INT16_C( 9352), INT16_C( 9352), INT16_C( 9352)) }, { -21903, simde_mm512_set_epi16(INT16_C(-21903), INT16_C(-21903), INT16_C(-21903), INT16_C(-21903), INT16_C(-21903), INT16_C(-21903), INT16_C(-21903), INT16_C(-21903), INT16_C(-21903), INT16_C(-21903), INT16_C(-21903), INT16_C(-21903), INT16_C(-21903), INT16_C(-21903), INT16_C(-21903), INT16_C(-21903), INT16_C(-21903), INT16_C(-21903), INT16_C(-21903), INT16_C(-21903), INT16_C(-21903), INT16_C(-21903), INT16_C(-21903), INT16_C(-21903), INT16_C(-21903), INT16_C(-21903), INT16_C(-21903), INT16_C(-21903), INT16_C(-21903), INT16_C(-21903), INT16_C(-21903), INT16_C(-21903)) }, { 32371, simde_mm512_set_epi16(INT16_C( 32371), INT16_C( 32371), INT16_C( 32371), INT16_C( 32371), INT16_C( 32371), INT16_C( 32371), INT16_C( 32371), INT16_C( 32371), INT16_C( 32371), INT16_C( 32371), INT16_C( 32371), INT16_C( 32371), INT16_C( 32371), INT16_C( 32371), INT16_C( 32371), INT16_C( 32371), INT16_C( 32371), INT16_C( 32371), INT16_C( 32371), INT16_C( 32371), INT16_C( 32371), INT16_C( 32371), INT16_C( 32371), INT16_C( 32371), INT16_C( 32371), INT16_C( 32371), INT16_C( 32371), INT16_C( 32371), INT16_C( 32371), INT16_C( 32371), INT16_C( 32371), INT16_C( 32371)) }, { -49, simde_mm512_set_epi16(INT16_C( -49), INT16_C( -49), INT16_C( -49), INT16_C( -49), INT16_C( -49), INT16_C( -49), INT16_C( -49), INT16_C( -49), INT16_C( -49), INT16_C( -49), INT16_C( -49), INT16_C( -49), INT16_C( -49), INT16_C( -49), INT16_C( -49), INT16_C( -49), INT16_C( -49), INT16_C( -49), INT16_C( -49), INT16_C( -49), INT16_C( -49), INT16_C( -49), INT16_C( -49), INT16_C( -49), INT16_C( -49), INT16_C( -49), INT16_C( -49), INT16_C( -49), INT16_C( -49), INT16_C( -49), INT16_C( -49), INT16_C( -49)) }, { 18491, simde_mm512_set_epi16(INT16_C( 18491), INT16_C( 18491), INT16_C( 18491), INT16_C( 18491), INT16_C( 18491), INT16_C( 18491), INT16_C( 18491), INT16_C( 18491), INT16_C( 18491), INT16_C( 18491), INT16_C( 18491), INT16_C( 18491), INT16_C( 18491), INT16_C( 18491), INT16_C( 18491), INT16_C( 18491), INT16_C( 18491), INT16_C( 18491), INT16_C( 18491), INT16_C( 18491), INT16_C( 18491), INT16_C( 18491), INT16_C( 18491), INT16_C( 18491), INT16_C( 18491), INT16_C( 18491), INT16_C( 18491), INT16_C( 18491), INT16_C( 18491), INT16_C( 18491), INT16_C( 18491), INT16_C( 18491)) }, { 25038, simde_mm512_set_epi16(INT16_C( 25038), INT16_C( 25038), INT16_C( 25038), INT16_C( 25038), INT16_C( 25038), INT16_C( 25038), INT16_C( 25038), INT16_C( 25038), INT16_C( 25038), INT16_C( 25038), INT16_C( 25038), INT16_C( 25038), INT16_C( 25038), INT16_C( 25038), INT16_C( 25038), INT16_C( 25038), INT16_C( 25038), INT16_C( 25038), INT16_C( 25038), INT16_C( 25038), INT16_C( 25038), INT16_C( 25038), INT16_C( 25038), INT16_C( 25038), INT16_C( 25038), INT16_C( 25038), INT16_C( 25038), INT16_C( 25038), INT16_C( 25038), INT16_C( 25038), INT16_C( 25038), INT16_C( 25038)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_set1_epi16(test_vec[i].a); simde_assert_m512i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_set1_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask32 k; int16_t a; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi16(INT16_C( 874), INT16_C( 15357), INT16_C( 3602), INT16_C( 11090), INT16_C( 31475), INT16_C( 20808), INT16_C(-26328), INT16_C(-21794), INT16_C(-24829), INT16_C(-15530), INT16_C( -9785), INT16_C( 22806), INT16_C( -6385), INT16_C(-26604), INT16_C(-15351), INT16_C(-18936), INT16_C( 28985), INT16_C( 24045), INT16_C(-25535), INT16_C(-25436), INT16_C( 749), INT16_C( 15517), INT16_C(-24369), INT16_C(-27864), INT16_C( 6864), INT16_C( 16553), INT16_C( -407), INT16_C(-28514), INT16_C( -9423), INT16_C(-29018), INT16_C(-11420), INT16_C(-22112)), UINT32_C(1121120418), INT16_C(-24380), simde_mm512_set_epi16(INT16_C( 874), INT16_C(-24380), INT16_C( 3602), INT16_C( 11090), INT16_C( 31475), INT16_C( 20808), INT16_C(-24380), INT16_C(-21794), INT16_C(-24380), INT16_C(-24380), INT16_C( -9785), INT16_C(-24380), INT16_C( -6385), INT16_C(-26604), INT16_C(-24380), INT16_C(-18936), INT16_C(-24380), INT16_C(-24380), INT16_C(-24380), INT16_C(-24380), INT16_C( 749), INT16_C( 15517), INT16_C(-24369), INT16_C(-27864), INT16_C(-24380), INT16_C( 16553), INT16_C(-24380), INT16_C(-28514), INT16_C( -9423), INT16_C(-29018), INT16_C(-24380), INT16_C(-22112)) }, { simde_mm512_set_epi16(INT16_C( 21630), INT16_C( 53), INT16_C(-30787), INT16_C( 11298), INT16_C( 13120), INT16_C(-15891), INT16_C( 20834), INT16_C( 5170), INT16_C( 11237), INT16_C(-32025), INT16_C( 2036), INT16_C(-32146), INT16_C( 6820), INT16_C( 29964), INT16_C(-20673), INT16_C( -6255), INT16_C( 12677), INT16_C( 5934), INT16_C( 18392), INT16_C(-16008), INT16_C( -6967), INT16_C(-23263), INT16_C( 28759), INT16_C( 4932), INT16_C(-20928), INT16_C(-12287), INT16_C(-21100), INT16_C(-15604), INT16_C(-25734), INT16_C(-27889), INT16_C( 22154), INT16_C( 16749)), UINT32_C( 442706120), INT16_C(-18045), simde_mm512_set_epi16(INT16_C( 21630), INT16_C( 53), INT16_C(-30787), INT16_C(-18045), INT16_C(-18045), INT16_C(-15891), INT16_C(-18045), INT16_C( 5170), INT16_C( 11237), INT16_C(-18045), INT16_C(-18045), INT16_C(-32146), INT16_C( 6820), INT16_C( 29964), INT16_C(-18045), INT16_C(-18045), INT16_C( 12677), INT16_C( 5934), INT16_C(-18045), INT16_C(-16008), INT16_C(-18045), INT16_C(-23263), INT16_C( 28759), INT16_C( 4932), INT16_C(-18045), INT16_C(-18045), INT16_C(-21100), INT16_C(-15604), INT16_C(-18045), INT16_C(-27889), INT16_C( 22154), INT16_C( 16749)) }, { simde_mm512_set_epi16(INT16_C(-12675), INT16_C(-13885), INT16_C( -4000), INT16_C( 31908), INT16_C( 16178), INT16_C( -8662), INT16_C(-27877), INT16_C(-11427), INT16_C(-10847), INT16_C( 7965), INT16_C(-13767), INT16_C( 14192), INT16_C( -3024), INT16_C(-20651), INT16_C( 1677), INT16_C(-14378), INT16_C( 13823), INT16_C(-21716), INT16_C(-14569), INT16_C( 19205), INT16_C(-19335), INT16_C( 31769), INT16_C(-13133), INT16_C(-12032), INT16_C(-27851), INT16_C(-12954), INT16_C(-30941), INT16_C( 26210), INT16_C( 10250), INT16_C(-12883), INT16_C(-31618), INT16_C( -328)), UINT32_C(3083705480), INT16_C( 4440), simde_mm512_set_epi16(INT16_C( 4440), INT16_C(-13885), INT16_C( 4440), INT16_C( 4440), INT16_C( 16178), INT16_C( 4440), INT16_C( 4440), INT16_C( 4440), INT16_C( 4440), INT16_C( 4440), INT16_C(-13767), INT16_C( 14192), INT16_C( 4440), INT16_C( 4440), INT16_C( 1677), INT16_C( 4440), INT16_C( 4440), INT16_C(-21716), INT16_C(-14569), INT16_C( 4440), INT16_C( 4440), INT16_C( 4440), INT16_C(-13133), INT16_C(-12032), INT16_C( 4440), INT16_C(-12954), INT16_C(-30941), INT16_C( 26210), INT16_C( 4440), INT16_C(-12883), INT16_C(-31618), INT16_C( -328)) }, { simde_mm512_set_epi16(INT16_C(-23201), INT16_C( 4909), INT16_C(-10596), INT16_C( 25003), INT16_C( 25193), INT16_C(-28193), INT16_C( 7484), INT16_C( 22842), INT16_C( 12827), INT16_C(-21490), INT16_C(-19021), INT16_C( 17939), INT16_C( 14187), INT16_C( 31294), INT16_C(-22999), INT16_C( 25206), INT16_C(-22002), INT16_C( 23505), INT16_C(-20713), INT16_C( 22238), INT16_C( 29284), INT16_C( 28054), INT16_C(-21727), INT16_C( 30369), INT16_C( 19358), INT16_C( -623), INT16_C( 2386), INT16_C( 9395), INT16_C(-11819), INT16_C( 28599), INT16_C(-11863), INT16_C( -4500)), UINT32_C(1729799485), INT16_C( -51), simde_mm512_set_epi16(INT16_C(-23201), INT16_C( -51), INT16_C( -51), INT16_C( 25003), INT16_C( 25193), INT16_C( -51), INT16_C( -51), INT16_C( -51), INT16_C( 12827), INT16_C(-21490), INT16_C(-19021), INT16_C( -51), INT16_C( -51), INT16_C( 31294), INT16_C( -51), INT16_C( 25206), INT16_C( -51), INT16_C( 23505), INT16_C( -51), INT16_C( 22238), INT16_C( 29284), INT16_C( -51), INT16_C(-21727), INT16_C( -51), INT16_C( 19358), INT16_C( -623), INT16_C( -51), INT16_C( -51), INT16_C( -51), INT16_C( -51), INT16_C(-11863), INT16_C( -51)) }, { simde_mm512_set_epi16(INT16_C(-12929), INT16_C( -9559), INT16_C( -1255), INT16_C(-25300), INT16_C( 24130), INT16_C( 22555), INT16_C(-26496), INT16_C( 4179), INT16_C( 25227), INT16_C( 31028), INT16_C( 12492), INT16_C(-27096), INT16_C( 22382), INT16_C( -5113), INT16_C(-30455), INT16_C( 15691), INT16_C(-18605), INT16_C( -4278), INT16_C( 11441), INT16_C(-26478), INT16_C( 11388), INT16_C(-27754), INT16_C( 607), INT16_C( -1601), INT16_C(-14454), INT16_C( 1251), INT16_C( 27178), INT16_C( 11399), INT16_C( -184), INT16_C( 17990), INT16_C(-12132), INT16_C(-20400)), UINT32_C(3701546889), INT16_C( 26765), simde_mm512_set_epi16(INT16_C( 26765), INT16_C( 26765), INT16_C( -1255), INT16_C( 26765), INT16_C( 26765), INT16_C( 26765), INT16_C(-26496), INT16_C( 4179), INT16_C( 26765), INT16_C( 31028), INT16_C( 26765), INT16_C(-27096), INT16_C( 22382), INT16_C( -5113), INT16_C(-30455), INT16_C( 26765), INT16_C(-18605), INT16_C( -4278), INT16_C( 11441), INT16_C( 26765), INT16_C( 26765), INT16_C( 26765), INT16_C( 26765), INT16_C( 26765), INT16_C( 26765), INT16_C( 1251), INT16_C( 27178), INT16_C( 11399), INT16_C( 26765), INT16_C( 17990), INT16_C(-12132), INT16_C( 26765)) }, { simde_mm512_set_epi16(INT16_C( 23556), INT16_C( 11192), INT16_C(-13439), INT16_C( -2357), INT16_C( 858), INT16_C( 27575), INT16_C( 20368), INT16_C(-20256), INT16_C(-11019), INT16_C( -7073), INT16_C(-32385), INT16_C( 27749), INT16_C( 17332), INT16_C(-28131), INT16_C( 22510), INT16_C( -872), INT16_C( 20986), INT16_C(-25896), INT16_C( 7561), INT16_C(-22951), INT16_C( -9997), INT16_C( 18542), INT16_C( -1921), INT16_C(-16319), INT16_C(-24759), INT16_C( 10467), INT16_C( 8453), INT16_C( 5278), INT16_C(-22217), INT16_C( 17080), INT16_C( 16797), INT16_C( -9777)), UINT32_C(3298748633), INT16_C( -5240), simde_mm512_set_epi16(INT16_C( -5240), INT16_C( -5240), INT16_C(-13439), INT16_C( -2357), INT16_C( 858), INT16_C( -5240), INT16_C( 20368), INT16_C(-20256), INT16_C( -5240), INT16_C( -7073), INT16_C(-32385), INT16_C( -5240), INT16_C( -5240), INT16_C( -5240), INT16_C( -5240), INT16_C( -872), INT16_C( -5240), INT16_C( -5240), INT16_C( -5240), INT16_C(-22951), INT16_C( -5240), INT16_C( 18542), INT16_C( -1921), INT16_C(-16319), INT16_C( -5240), INT16_C( -5240), INT16_C( 8453), INT16_C( -5240), INT16_C( -5240), INT16_C( 17080), INT16_C( 16797), INT16_C( -5240)) }, { simde_mm512_set_epi16(INT16_C( -894), INT16_C( 15324), INT16_C(-23364), INT16_C( 25648), INT16_C( -512), INT16_C( 12172), INT16_C(-27706), INT16_C(-10514), INT16_C( 1026), INT16_C( 20384), INT16_C(-25471), INT16_C( -3464), INT16_C( 14827), INT16_C( 18045), INT16_C(-25826), INT16_C( 12664), INT16_C(-16682), INT16_C( 16498), INT16_C( 29333), INT16_C( -511), INT16_C( 15382), INT16_C(-19710), INT16_C(-14139), INT16_C( 14459), INT16_C( 16092), INT16_C(-12889), INT16_C( -337), INT16_C( 29893), INT16_C(-29467), INT16_C( -8274), INT16_C( 30322), INT16_C(-19138)), UINT32_C(3605268017), INT16_C(-14523), simde_mm512_set_epi16(INT16_C(-14523), INT16_C(-14523), INT16_C(-23364), INT16_C(-14523), INT16_C( -512), INT16_C(-14523), INT16_C(-14523), INT16_C(-10514), INT16_C(-14523), INT16_C(-14523), INT16_C(-14523), INT16_C( -3464), INT16_C( 14827), INT16_C(-14523), INT16_C(-25826), INT16_C( 12664), INT16_C(-16682), INT16_C( 16498), INT16_C( 29333), INT16_C( -511), INT16_C( 15382), INT16_C(-14523), INT16_C(-14523), INT16_C( 14459), INT16_C( 16092), INT16_C(-12889), INT16_C(-14523), INT16_C(-14523), INT16_C(-29467), INT16_C( -8274), INT16_C( 30322), INT16_C(-14523)) }, { simde_mm512_set_epi16(INT16_C( -6967), INT16_C(-20070), INT16_C( -8289), INT16_C( -479), INT16_C(-18969), INT16_C( -6012), INT16_C( 11721), INT16_C( 13564), INT16_C( 19765), INT16_C( 23581), INT16_C(-21527), INT16_C( -2847), INT16_C( 23178), INT16_C(-14967), INT16_C( 17682), INT16_C( 28255), INT16_C( 8882), INT16_C( 14691), INT16_C(-27903), INT16_C( 28973), INT16_C( 619), INT16_C(-10329), INT16_C( 25572), INT16_C(-13439), INT16_C( -3930), INT16_C( 5659), INT16_C( -675), INT16_C(-18004), INT16_C(-26191), INT16_C( 5303), INT16_C(-13369), INT16_C( 21695)), UINT32_C( 349570055), INT16_C( 24210), simde_mm512_set_epi16(INT16_C( -6967), INT16_C(-20070), INT16_C( -8289), INT16_C( 24210), INT16_C(-18969), INT16_C( 24210), INT16_C( 11721), INT16_C( 13564), INT16_C( 24210), INT16_C( 24210), INT16_C(-21527), INT16_C( 24210), INT16_C( 23178), INT16_C( 24210), INT16_C( 24210), INT16_C( 28255), INT16_C( 8882), INT16_C( 14691), INT16_C(-27903), INT16_C( 28973), INT16_C( 619), INT16_C( 24210), INT16_C( 25572), INT16_C(-13439), INT16_C( -3930), INT16_C( 5659), INT16_C( -675), INT16_C(-18004), INT16_C(-26191), INT16_C( 24210), INT16_C( 24210), INT16_C( 24210)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_set1_epi16(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_set1_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask32 k; int16_t a; simde__m512i r; } test_vec[8] = { { UINT32_C( 693683203), INT16_C(-16188), simde_mm512_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C(-16188), INT16_C( 0), INT16_C(-16188), INT16_C( 0), INT16_C( 0), INT16_C(-16188), INT16_C( 0), INT16_C(-16188), INT16_C( 0), INT16_C(-16188), INT16_C(-16188), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C(-16188), INT16_C(-16188), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C(-16188), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C(-16188), INT16_C(-16188)) }, { UINT32_C(2322862674), INT16_C(-31832), simde_mm512_set_epi16(INT16_C(-31832), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C(-31832), INT16_C( 0), INT16_C(-31832), INT16_C( 0), INT16_C( 0), INT16_C(-31832), INT16_C(-31832), INT16_C(-31832), INT16_C( 0), INT16_C(-31832), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C(-31832), INT16_C( 0), INT16_C( 0), INT16_C(-31832), INT16_C( 0), INT16_C( 0), INT16_C(-31832), INT16_C( 0), INT16_C(-31832), INT16_C( 0), INT16_C( 0), INT16_C(-31832), INT16_C( 0)) }, { UINT32_C(3196780114), INT16_C( 8083), simde_mm512_set_epi16(INT16_C( 8083), INT16_C( 0), INT16_C( 8083), INT16_C( 8083), INT16_C( 8083), INT16_C( 8083), INT16_C( 8083), INT16_C( 0), INT16_C( 8083), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 8083), INT16_C( 0), INT16_C( 8083), INT16_C( 0), INT16_C( 8083), INT16_C( 8083), INT16_C( 8083), INT16_C( 8083), INT16_C( 8083), INT16_C( 8083), INT16_C( 8083), INT16_C( 0), INT16_C( 0), INT16_C( 8083), INT16_C( 0), INT16_C( 8083), INT16_C( 0), INT16_C( 0), INT16_C( 8083), INT16_C( 0)) }, { UINT32_C( 962615778), INT16_C( 10134), simde_mm512_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 10134), INT16_C( 10134), INT16_C( 10134), INT16_C( 0), INT16_C( 0), INT16_C( 10134), INT16_C( 0), INT16_C( 10134), INT16_C( 10134), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 10134), INT16_C( 0), INT16_C( 10134), INT16_C( 10134), INT16_C( 0), INT16_C( 0), INT16_C( 10134), INT16_C( 10134), INT16_C( 10134), INT16_C( 10134), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 10134), INT16_C( 0)) }, { UINT32_C(3651012064), INT16_C(-28841), simde_mm512_set_epi16(INT16_C(-28841), INT16_C(-28841), INT16_C( 0), INT16_C(-28841), INT16_C(-28841), INT16_C( 0), INT16_C( 0), INT16_C(-28841), INT16_C(-28841), INT16_C( 0), INT16_C( 0), INT16_C(-28841), INT16_C(-28841), INT16_C(-28841), INT16_C(-28841), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C(-28841), INT16_C( 0), INT16_C(-28841), INT16_C(-28841), INT16_C(-28841), INT16_C(-28841), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { UINT32_C(1153035128), INT16_C( 9546), simde_mm512_set_epi16(INT16_C( 0), INT16_C( 9546), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 9546), INT16_C( 0), INT16_C( 0), INT16_C( 9546), INT16_C( 0), INT16_C( 9546), INT16_C( 9546), INT16_C( 9546), INT16_C( 0), INT16_C( 0), INT16_C( 9546), INT16_C( 9546), INT16_C( 9546), INT16_C( 9546), INT16_C( 0), INT16_C( 9546), INT16_C( 0), INT16_C( 9546), INT16_C( 9546), INT16_C( 0), INT16_C( 9546), INT16_C( 9546), INT16_C( 9546), INT16_C( 9546), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { UINT32_C(2648275992), INT16_C(-29002), simde_mm512_set_epi16(INT16_C(-29002), INT16_C( 0), INT16_C( 0), INT16_C(-29002), INT16_C(-29002), INT16_C(-29002), INT16_C( 0), INT16_C(-29002), INT16_C(-29002), INT16_C(-29002), INT16_C( 0), INT16_C(-29002), INT16_C(-29002), INT16_C( 0), INT16_C( 0), INT16_C(-29002), INT16_C( 0), INT16_C(-29002), INT16_C(-29002), INT16_C(-29002), INT16_C(-29002), INT16_C(-29002), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C(-29002), INT16_C(-29002), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { UINT32_C(1548742660), INT16_C( 11362), simde_mm512_set_epi16(INT16_C( 0), INT16_C( 11362), INT16_C( 0), INT16_C( 11362), INT16_C( 11362), INT16_C( 11362), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 11362), INT16_C( 0), INT16_C( 0), INT16_C( 11362), INT16_C( 11362), INT16_C( 11362), INT16_C( 11362), INT16_C( 11362), INT16_C( 11362), INT16_C( 11362), INT16_C( 11362), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 11362), INT16_C( 0), INT16_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_set1_epi16(test_vec[i].k, test_vec[i].a); simde_assert_m512i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_set1_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { int32_t a; simde__m512i r; } test_vec[8] = { { 1727286739, simde_mm512_set_epi32(INT32_C( 1727286739), INT32_C( 1727286739), INT32_C( 1727286739), INT32_C( 1727286739), INT32_C( 1727286739), INT32_C( 1727286739), INT32_C( 1727286739), INT32_C( 1727286739), INT32_C( 1727286739), INT32_C( 1727286739), INT32_C( 1727286739), INT32_C( 1727286739), INT32_C( 1727286739), INT32_C( 1727286739), INT32_C( 1727286739), INT32_C( 1727286739)) }, { 1944050466, simde_mm512_set_epi32(INT32_C( 1944050466), INT32_C( 1944050466), INT32_C( 1944050466), INT32_C( 1944050466), INT32_C( 1944050466), INT32_C( 1944050466), INT32_C( 1944050466), INT32_C( 1944050466), INT32_C( 1944050466), INT32_C( 1944050466), INT32_C( 1944050466), INT32_C( 1944050466), INT32_C( 1944050466), INT32_C( 1944050466), INT32_C( 1944050466), INT32_C( 1944050466)) }, { -1212539061, simde_mm512_set_epi32(INT32_C(-1212539061), INT32_C(-1212539061), INT32_C(-1212539061), INT32_C(-1212539061), INT32_C(-1212539061), INT32_C(-1212539061), INT32_C(-1212539061), INT32_C(-1212539061), INT32_C(-1212539061), INT32_C(-1212539061), INT32_C(-1212539061), INT32_C(-1212539061), INT32_C(-1212539061), INT32_C(-1212539061), INT32_C(-1212539061), INT32_C(-1212539061)) }, { -1654733061, simde_mm512_set_epi32(INT32_C(-1654733061), INT32_C(-1654733061), INT32_C(-1654733061), INT32_C(-1654733061), INT32_C(-1654733061), INT32_C(-1654733061), INT32_C(-1654733061), INT32_C(-1654733061), INT32_C(-1654733061), INT32_C(-1654733061), INT32_C(-1654733061), INT32_C(-1654733061), INT32_C(-1654733061), INT32_C(-1654733061), INT32_C(-1654733061), INT32_C(-1654733061)) }, { -1048158621, simde_mm512_set_epi32(INT32_C(-1048158621), INT32_C(-1048158621), INT32_C(-1048158621), INT32_C(-1048158621), INT32_C(-1048158621), INT32_C(-1048158621), INT32_C(-1048158621), INT32_C(-1048158621), INT32_C(-1048158621), INT32_C(-1048158621), INT32_C(-1048158621), INT32_C(-1048158621), INT32_C(-1048158621), INT32_C(-1048158621), INT32_C(-1048158621), INT32_C(-1048158621)) }, { -676031020, simde_mm512_set_epi32(INT32_C( -676031020), INT32_C( -676031020), INT32_C( -676031020), INT32_C( -676031020), INT32_C( -676031020), INT32_C( -676031020), INT32_C( -676031020), INT32_C( -676031020), INT32_C( -676031020), INT32_C( -676031020), INT32_C( -676031020), INT32_C( -676031020), INT32_C( -676031020), INT32_C( -676031020), INT32_C( -676031020), INT32_C( -676031020)) }, { 651688918, simde_mm512_set_epi32(INT32_C( 651688918), INT32_C( 651688918), INT32_C( 651688918), INT32_C( 651688918), INT32_C( 651688918), INT32_C( 651688918), INT32_C( 651688918), INT32_C( 651688918), INT32_C( 651688918), INT32_C( 651688918), INT32_C( 651688918), INT32_C( 651688918), INT32_C( 651688918), INT32_C( 651688918), INT32_C( 651688918), INT32_C( 651688918)) }, { -1051556258, simde_mm512_set_epi32(INT32_C(-1051556258), INT32_C(-1051556258), INT32_C(-1051556258), INT32_C(-1051556258), INT32_C(-1051556258), INT32_C(-1051556258), INT32_C(-1051556258), INT32_C(-1051556258), INT32_C(-1051556258), INT32_C(-1051556258), INT32_C(-1051556258), INT32_C(-1051556258), INT32_C(-1051556258), INT32_C(-1051556258), INT32_C(-1051556258), INT32_C(-1051556258)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_set1_epi32(test_vec[i].a); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_set1_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask16 k; int32_t a; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C(-2133842294), INT32_C( 1453587049), INT32_C( 2146642803), INT32_C(-1231323727), INT32_C( 1853533908), INT32_C(-1907653908), INT32_C( 564694133), INT32_C(-1137944481), INT32_C( 355997036), INT32_C( 15257739), INT32_C( 1494729649), INT32_C( 1029796613), INT32_C( 2106354400), INT32_C( 1763331885), INT32_C( -506942576), INT32_C( -586993838)), UINT16_C( 2584), -447334412, simde_mm512_set_epi32(INT32_C(-2133842294), INT32_C( 1453587049), INT32_C( 2146642803), INT32_C(-1231323727), INT32_C( -447334412), INT32_C(-1907653908), INT32_C( -447334412), INT32_C(-1137944481), INT32_C( 355997036), INT32_C( 15257739), INT32_C( 1494729649), INT32_C( -447334412), INT32_C( -447334412), INT32_C( 1763331885), INT32_C( -506942576), INT32_C( -586993838)) }, { simde_mm512_set_epi32(INT32_C( -998613800), INT32_C( 131555600), INT32_C( -778207920), INT32_C(-1881674086), INT32_C( 920672518), INT32_C(-1753434445), INT32_C( 982638267), INT32_C( 1856541033), INT32_C( -869506663), INT32_C( -394635465), INT32_C(-1596048192), INT32_C( 274218308), INT32_C( 757893716), INT32_C(-2119306902), INT32_C( 364747827), INT32_C( -200526147)), UINT16_C(52133), 749362876, simde_mm512_set_epi32(INT32_C( 749362876), INT32_C( 749362876), INT32_C( -778207920), INT32_C(-1881674086), INT32_C( 749362876), INT32_C(-1753434445), INT32_C( 749362876), INT32_C( 749362876), INT32_C( 749362876), INT32_C( -394635465), INT32_C( 749362876), INT32_C( 274218308), INT32_C( 757893716), INT32_C( 749362876), INT32_C( 364747827), INT32_C( 749362876)) }, { simde_mm512_set_epi32(INT32_C(-2009617550), INT32_C( 1354406381), INT32_C( 2028903938), INT32_C(-1425115920), INT32_C(-1833209985), INT32_C( -485232115), INT32_C( -246273875), INT32_C(-1220668381), INT32_C( 1710154952), INT32_C(-1764069342), INT32_C( -426734827), INT32_C(-1603498425), INT32_C(-1463214772), INT32_C(-1312774926), INT32_C( 714085999), INT32_C( -352604741)), UINT16_C(50570), 722829713, simde_mm512_set_epi32(INT32_C( 722829713), INT32_C( 722829713), INT32_C( 2028903938), INT32_C(-1425115920), INT32_C(-1833209985), INT32_C( 722829713), INT32_C( -246273875), INT32_C( 722829713), INT32_C( 722829713), INT32_C(-1764069342), INT32_C( -426734827), INT32_C(-1603498425), INT32_C( 722829713), INT32_C(-1312774926), INT32_C( 722829713), INT32_C( -352604741)) }, { simde_mm512_set_epi32(INT32_C(-1600817970), INT32_C( -289243644), INT32_C( 742005878), INT32_C( -612930926), INT32_C( 717430896), INT32_C( 1787140065), INT32_C(-1405808293), INT32_C( 816556317), INT32_C( 1747379900), INT32_C(-1006412100), INT32_C( 2116251350), INT32_C(-1238632202), INT32_C( 1684739890), INT32_C( 1414060999), INT32_C(-2081867445), INT32_C( 1952705540)), UINT16_C(15423), 1968604658, simde_mm512_set_epi32(INT32_C(-1600817970), INT32_C( -289243644), INT32_C( 1968604658), INT32_C( 1968604658), INT32_C( 1968604658), INT32_C( 1968604658), INT32_C(-1405808293), INT32_C( 816556317), INT32_C( 1747379900), INT32_C(-1006412100), INT32_C( 1968604658), INT32_C( 1968604658), INT32_C( 1968604658), INT32_C( 1968604658), INT32_C( 1968604658), INT32_C( 1968604658)) }, { simde_mm512_set_epi32(INT32_C( -666739030), INT32_C(-1370874438), INT32_C(-1476494318), INT32_C(-1101994537), INT32_C( 338919471), INT32_C( -523657701), INT32_C( 1918205933), INT32_C( -933363441), INT32_C( 191279486), INT32_C( -793805997), INT32_C(-1611569913), INT32_C(-1249963897), INT32_C(-1384621234), INT32_C( 1593832662), INT32_C( 656079206), INT32_C(-1000644982)), UINT16_C(34631), 997675190, simde_mm512_set_epi32(INT32_C( 997675190), INT32_C(-1370874438), INT32_C(-1476494318), INT32_C(-1101994537), INT32_C( 338919471), INT32_C( 997675190), INT32_C( 997675190), INT32_C( 997675190), INT32_C( 191279486), INT32_C( 997675190), INT32_C(-1611569913), INT32_C(-1249963897), INT32_C(-1384621234), INT32_C( 997675190), INT32_C( 997675190), INT32_C( 997675190)) }, { simde_mm512_set_epi32(INT32_C( 121649236), INT32_C( 1078857855), INT32_C( -789079366), INT32_C( 720922870), INT32_C( 2041256669), INT32_C( -203208947), INT32_C( 1607011101), INT32_C(-1156829654), INT32_C( 230848793), INT32_C( 1678224863), INT32_C( 2110278578), INT32_C(-1808926794), INT32_C( 1395318189), INT32_C( 331190146), INT32_C( 150534496), INT32_C( 511594435)), UINT16_C(61391), -1035845727, simde_mm512_set_epi32(INT32_C(-1035845727), INT32_C(-1035845727), INT32_C(-1035845727), INT32_C( 720922870), INT32_C(-1035845727), INT32_C(-1035845727), INT32_C(-1035845727), INT32_C(-1035845727), INT32_C(-1035845727), INT32_C(-1035845727), INT32_C( 2110278578), INT32_C(-1808926794), INT32_C(-1035845727), INT32_C(-1035845727), INT32_C(-1035845727), INT32_C(-1035845727)) }, { simde_mm512_set_epi32(INT32_C( -439673063), INT32_C( 281345174), INT32_C( 1703672409), INT32_C( 1433894072), INT32_C(-1374287391), INT32_C(-2054374124), INT32_C(-2087863688), INT32_C( 775409014), INT32_C( 684629778), INT32_C(-1498533524), INT32_C( -208955538), INT32_C( 1063127700), INT32_C( 429182470), INT32_C(-1892329828), INT32_C( 837229295), INT32_C( -115373033)), UINT16_C( 2879), -1796290912, simde_mm512_set_epi32(INT32_C( -439673063), INT32_C( 281345174), INT32_C( 1703672409), INT32_C( 1433894072), INT32_C(-1796290912), INT32_C(-2054374124), INT32_C(-1796290912), INT32_C(-1796290912), INT32_C( 684629778), INT32_C(-1498533524), INT32_C(-1796290912), INT32_C(-1796290912), INT32_C(-1796290912), INT32_C(-1796290912), INT32_C(-1796290912), INT32_C(-1796290912)) }, { simde_mm512_set_epi32(INT32_C( 211854878), INT32_C( 1120217162), INT32_C( 1399020352), INT32_C(-1730262794), INT32_C( -217750907), INT32_C(-1958971298), INT32_C( 1308051941), INT32_C( 659156948), INT32_C( -413755412), INT32_C(-1891691945), INT32_C(-1613764989), INT32_C( 1818229349), INT32_C( 1838020027), INT32_C( 1546326520), INT32_C( 1564338027), INT32_C( 1340948138)), UINT16_C(26109), 154532243, simde_mm512_set_epi32(INT32_C( 211854878), INT32_C( 154532243), INT32_C( 154532243), INT32_C(-1730262794), INT32_C( -217750907), INT32_C( 154532243), INT32_C( 1308051941), INT32_C( 154532243), INT32_C( 154532243), INT32_C( 154532243), INT32_C( 154532243), INT32_C( 154532243), INT32_C( 154532243), INT32_C( 154532243), INT32_C( 1564338027), INT32_C( 154532243)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_set1_epi32(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_set1_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; int32_t a; simde__m512i r; } test_vec[8] = { { UINT16_C(55449), 1161879327, simde_mm512_set_epi32(INT32_C( 1161879327), INT32_C( 1161879327), INT32_C( 0), INT32_C( 1161879327), INT32_C( 1161879327), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1161879327), INT32_C( 0), INT32_C( 0), INT32_C( 1161879327), INT32_C( 1161879327), INT32_C( 0), INT32_C( 0), INT32_C( 1161879327)) }, { UINT16_C(42205), 491258437, simde_mm512_set_epi32(INT32_C( 491258437), INT32_C( 0), INT32_C( 491258437), INT32_C( 0), INT32_C( 0), INT32_C( 491258437), INT32_C( 0), INT32_C( 0), INT32_C( 491258437), INT32_C( 491258437), INT32_C( 0), INT32_C( 491258437), INT32_C( 491258437), INT32_C( 491258437), INT32_C( 0), INT32_C( 491258437)) }, { UINT16_C(46294), 1464671644, simde_mm512_set_epi32(INT32_C( 1464671644), INT32_C( 0), INT32_C( 1464671644), INT32_C( 1464671644), INT32_C( 0), INT32_C( 1464671644), INT32_C( 0), INT32_C( 0), INT32_C( 1464671644), INT32_C( 1464671644), INT32_C( 0), INT32_C( 1464671644), INT32_C( 0), INT32_C( 1464671644), INT32_C( 1464671644), INT32_C( 0)) }, { UINT16_C(57846), 1382569562, simde_mm512_set_epi32(INT32_C( 1382569562), INT32_C( 1382569562), INT32_C( 1382569562), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1382569562), INT32_C( 1382569562), INT32_C( 1382569562), INT32_C( 1382569562), INT32_C( 1382569562), INT32_C( 0), INT32_C( 1382569562), INT32_C( 1382569562), INT32_C( 0)) }, { UINT16_C(64688), 417592133, simde_mm512_set_epi32(INT32_C( 417592133), INT32_C( 417592133), INT32_C( 417592133), INT32_C( 417592133), INT32_C( 417592133), INT32_C( 417592133), INT32_C( 0), INT32_C( 0), INT32_C( 417592133), INT32_C( 0), INT32_C( 417592133), INT32_C( 417592133), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { UINT16_C(40468), 103154350, simde_mm512_set_epi32(INT32_C( 103154350), INT32_C( 0), INT32_C( 0), INT32_C( 103154350), INT32_C( 103154350), INT32_C( 103154350), INT32_C( 103154350), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 103154350), INT32_C( 0), INT32_C( 103154350), INT32_C( 0), INT32_C( 0)) }, { UINT16_C(20696), 487897671, simde_mm512_set_epi32(INT32_C( 0), INT32_C( 487897671), INT32_C( 0), INT32_C( 487897671), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 487897671), INT32_C( 487897671), INT32_C( 0), INT32_C( 487897671), INT32_C( 487897671), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { UINT16_C(47493), 643357764, simde_mm512_set_epi32(INT32_C( 643357764), INT32_C( 0), INT32_C( 643357764), INT32_C( 643357764), INT32_C( 643357764), INT32_C( 0), INT32_C( 0), INT32_C( 643357764), INT32_C( 643357764), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 643357764), INT32_C( 0), INT32_C( 643357764)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_set1_epi32(test_vec[i].k, test_vec[i].a); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_set1_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { int64_t a; simde__m512i r; } test_vec[8] = { { -8789375007372599774, simde_mm512_set_epi64(INT64_C(-8789375007372599774), INT64_C(-8789375007372599774), INT64_C(-8789375007372599774), INT64_C(-8789375007372599774), INT64_C(-8789375007372599774), INT64_C(-8789375007372599774), INT64_C(-8789375007372599774), INT64_C(-8789375007372599774)) }, { -4285935604177939284, simde_mm512_set_epi64(INT64_C(-4285935604177939284), INT64_C(-4285935604177939284), INT64_C(-4285935604177939284), INT64_C(-4285935604177939284), INT64_C(-4285935604177939284), INT64_C(-4285935604177939284), INT64_C(-4285935604177939284), INT64_C(-4285935604177939284)) }, { -1541935515905504488, simde_mm512_set_epi64(INT64_C(-1541935515905504488), INT64_C(-1541935515905504488), INT64_C(-1541935515905504488), INT64_C(-1541935515905504488), INT64_C(-1541935515905504488), INT64_C(-1541935515905504488), INT64_C(-1541935515905504488), INT64_C(-1541935515905504488)) }, { 5952985382071947058, simde_mm512_set_epi64(INT64_C( 5952985382071947058), INT64_C( 5952985382071947058), INT64_C( 5952985382071947058), INT64_C( 5952985382071947058), INT64_C( 5952985382071947058), INT64_C( 5952985382071947058), INT64_C( 5952985382071947058), INT64_C( 5952985382071947058)) }, { -7162660555270519798, simde_mm512_set_epi64(INT64_C(-7162660555270519798), INT64_C(-7162660555270519798), INT64_C(-7162660555270519798), INT64_C(-7162660555270519798), INT64_C(-7162660555270519798), INT64_C(-7162660555270519798), INT64_C(-7162660555270519798), INT64_C(-7162660555270519798)) }, { 8404097979084250521, simde_mm512_set_epi64(INT64_C( 8404097979084250521), INT64_C( 8404097979084250521), INT64_C( 8404097979084250521), INT64_C( 8404097979084250521), INT64_C( 8404097979084250521), INT64_C( 8404097979084250521), INT64_C( 8404097979084250521), INT64_C( 8404097979084250521)) }, { 274863432779804064, simde_mm512_set_epi64(INT64_C( 274863432779804064), INT64_C( 274863432779804064), INT64_C( 274863432779804064), INT64_C( 274863432779804064), INT64_C( 274863432779804064), INT64_C( 274863432779804064), INT64_C( 274863432779804064), INT64_C( 274863432779804064)) }, { -6073562903357076278, simde_mm512_set_epi64(INT64_C(-6073562903357076278), INT64_C(-6073562903357076278), INT64_C(-6073562903357076278), INT64_C(-6073562903357076278), INT64_C(-6073562903357076278), INT64_C(-6073562903357076278), INT64_C(-6073562903357076278), INT64_C(-6073562903357076278)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_set1_epi64(test_vec[i].a); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_set1_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask8 k; int64_t a; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C( 1045216498523672669), INT64_C(-6036444540175881058), INT64_C(-5911148920502355606), INT64_C(-7577028982327639795), INT64_C(-2741592730704877834), INT64_C(-6453831303076951346), INT64_C(-6689489276565790355), INT64_C( 202845396246057443)), UINT8_C(230), 147395854529182590, simde_mm512_set_epi64(INT64_C( 147395854529182590), INT64_C( 147395854529182590), INT64_C( 147395854529182590), INT64_C(-7577028982327639795), INT64_C(-2741592730704877834), INT64_C( 147395854529182590), INT64_C( 147395854529182590), INT64_C( 202845396246057443)) }, { simde_mm512_set_epi64(INT64_C(-7718401035083209663), INT64_C(-3076780457048070953), INT64_C( 6475016072843370494), INT64_C(-8381463578250516776), INT64_C( 7440179812526306236), INT64_C(-1565233080792835049), INT64_C(-3520705077242655190), INT64_C( 711599945422741640)), UINT8_C(183), 7896918852801948623, simde_mm512_set_epi64(INT64_C( 7896918852801948623), INT64_C(-3076780457048070953), INT64_C( 7896918852801948623), INT64_C( 7896918852801948623), INT64_C( 7440179812526306236), INT64_C( 7896918852801948623), INT64_C( 7896918852801948623), INT64_C( 7896918852801948623)) }, { simde_mm512_set_epi64(INT64_C( 4486030894140599897), INT64_C( 6422628958957749227), INT64_C(-5036188723709908563), INT64_C( 7249692644755604208), INT64_C(-7968846935772652304), INT64_C(-1019958922473354647), INT64_C( 5481721181155050457), INT64_C( 3220728135426515219)), UINT8_C(250), 6737731418145878376, simde_mm512_set_epi64(INT64_C( 6737731418145878376), INT64_C( 6737731418145878376), INT64_C( 6737731418145878376), INT64_C( 6737731418145878376), INT64_C( 6737731418145878376), INT64_C(-1019958922473354647), INT64_C( 6737731418145878376), INT64_C( 3220728135426515219)) }, { simde_mm512_set_epi64(INT64_C(-6396453660831390526), INT64_C( 8933529613499491135), INT64_C( -583608444119273487), INT64_C( 2774349158822651995), INT64_C( 5342483589547515588), INT64_C( 169032945576329978), INT64_C(-6862029605560509115), INT64_C( 6948715933942990141)), UINT8_C(144), 5224961598009568585, simde_mm512_set_epi64(INT64_C( 5224961598009568585), INT64_C( 8933529613499491135), INT64_C( -583608444119273487), INT64_C( 5224961598009568585), INT64_C( 5342483589547515588), INT64_C( 169032945576329978), INT64_C(-6862029605560509115), INT64_C( 6948715933942990141)) }, { simde_mm512_set_epi64(INT64_C(-4346308446834850778), INT64_C( 2749670639259677889), INT64_C(-1682235429196139261), INT64_C(-8570560540139381802), INT64_C(-7853283901496397391), INT64_C( 153768084219711829), INT64_C(-3210037353748455743), INT64_C(-4029896259883002015)), UINT8_C(214), -5146489163462262224, simde_mm512_set_epi64(INT64_C(-5146489163462262224), INT64_C(-5146489163462262224), INT64_C(-1682235429196139261), INT64_C(-5146489163462262224), INT64_C(-7853283901496397391), INT64_C(-5146489163462262224), INT64_C(-5146489163462262224), INT64_C(-4029896259883002015)) }, { simde_mm512_set_epi64(INT64_C( 6394437943527522650), INT64_C(-6125470791748892618), INT64_C(-5975035781359101837), INT64_C( 4399409063692409934), INT64_C(-8019209045639092618), INT64_C(-3157603671849839607), INT64_C(-6814419689115640150), INT64_C( 5538401471960412489)), UINT8_C( 88), -748084489617986997, simde_mm512_set_epi64(INT64_C( 6394437943527522650), INT64_C( -748084489617986997), INT64_C(-5975035781359101837), INT64_C( -748084489617986997), INT64_C( -748084489617986997), INT64_C(-3157603671849839607), INT64_C(-6814419689115640150), INT64_C( 5538401471960412489)) }, { simde_mm512_set_epi64(INT64_C( 6475451416366061513), INT64_C( 3128457729014411682), INT64_C( 4167134861407868007), INT64_C( 2076318686723048286), INT64_C( 764926893292127387), INT64_C(-3471922167199587188), INT64_C(-1007473193319966067), INT64_C(-7587900950013848349)), UINT8_C( 14), -3095861881784422408, simde_mm512_set_epi64(INT64_C( 6475451416366061513), INT64_C( 3128457729014411682), INT64_C( 4167134861407868007), INT64_C( 2076318686723048286), INT64_C(-3095861881784422408), INT64_C(-3095861881784422408), INT64_C(-3095861881784422408), INT64_C(-7587900950013848349)) }, { simde_mm512_set_epi64(INT64_C(-8918688664014182717), INT64_C(-5923824341695687917), INT64_C( 597335319340416274), INT64_C(-6405873593024845306), INT64_C( 9156616106305782892), INT64_C(-3930771615997737816), INT64_C(-3489614562807589194), INT64_C(-6234599678791232286)), UINT8_C( 48), -2201472844397108415, simde_mm512_set_epi64(INT64_C(-8918688664014182717), INT64_C(-5923824341695687917), INT64_C(-2201472844397108415), INT64_C(-2201472844397108415), INT64_C( 9156616106305782892), INT64_C(-3930771615997737816), INT64_C(-3489614562807589194), INT64_C(-6234599678791232286)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_set1_epi64(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_set1_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; int64_t a; simde__m512i r; } test_vec[8] = { { UINT8_C(207), 9161374966470958313, simde_mm512_set_epi64(INT64_C( 9161374966470958313), INT64_C( 9161374966470958313), INT64_C( 0), INT64_C( 0), INT64_C( 9161374966470958313), INT64_C( 9161374966470958313), INT64_C( 9161374966470958313), INT64_C( 9161374966470958313)) }, { UINT8_C( 52), -5504071340329784539, simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C(-5504071340329784539), INT64_C(-5504071340329784539), INT64_C( 0), INT64_C(-5504071340329784539), INT64_C( 0), INT64_C( 0)) }, { UINT8_C( 37), -4694012945600318045, simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C(-4694012945600318045), INT64_C( 0), INT64_C( 0), INT64_C(-4694012945600318045), INT64_C( 0), INT64_C(-4694012945600318045)) }, { UINT8_C( 77), -4616382267006571958, simde_mm512_set_epi64(INT64_C( 0), INT64_C(-4616382267006571958), INT64_C( 0), INT64_C( 0), INT64_C(-4616382267006571958), INT64_C(-4616382267006571958), INT64_C( 0), INT64_C(-4616382267006571958)) }, { UINT8_C( 33), -7296455954195359480, simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C(-7296455954195359480), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C(-7296455954195359480)) }, { UINT8_C( 47), -8949112185126954032, simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C(-8949112185126954032), INT64_C( 0), INT64_C(-8949112185126954032), INT64_C(-8949112185126954032), INT64_C(-8949112185126954032), INT64_C(-8949112185126954032)) }, { UINT8_C( 80), 8577224771648710248, simde_mm512_set_epi64(INT64_C( 0), INT64_C( 8577224771648710248), INT64_C( 0), INT64_C( 8577224771648710248), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, { UINT8_C( 2), -5341779416438471199, simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C(-5341779416438471199), INT64_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_set1_epi64(test_vec[i].k, test_vec[i].a); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_set1_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_set1_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_set1_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_set1_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_set1_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_set1_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_set1_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_set1_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_set1_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_set1_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_set1_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_set1_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_set1_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_set1_pd) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/set4.c000066400000000000000000000513461400333146700163100ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN set4 #include #include #include static int test_simde_mm512_set4_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { int32_t d; int32_t c; int32_t b; int32_t a; simde__m512i r; } test_vec[8] = { { INT32_C( 1704071444), INT32_C(-1428654423), INT32_C( 1780802031), INT32_C( 917850215), simde_mm512_set_epi32(INT32_C( 1704071444), INT32_C(-1428654423), INT32_C( 1780802031), INT32_C( 917850215), INT32_C( 1704071444), INT32_C(-1428654423), INT32_C( 1780802031), INT32_C( 917850215), INT32_C( 1704071444), INT32_C(-1428654423), INT32_C( 1780802031), INT32_C( 917850215), INT32_C( 1704071444), INT32_C(-1428654423), INT32_C( 1780802031), INT32_C( 917850215)) }, { INT32_C( -585790761), INT32_C( -876839881), INT32_C( 793753163), INT32_C(-1831291840), simde_mm512_set_epi32(INT32_C( -585790761), INT32_C( -876839881), INT32_C( 793753163), INT32_C(-1831291840), INT32_C( -585790761), INT32_C( -876839881), INT32_C( 793753163), INT32_C(-1831291840), INT32_C( -585790761), INT32_C( -876839881), INT32_C( 793753163), INT32_C(-1831291840), INT32_C( -585790761), INT32_C( -876839881), INT32_C( 793753163), INT32_C(-1831291840)) }, { INT32_C( 1386668759), INT32_C( -584767031), INT32_C( -774218527), INT32_C( -668438843), simde_mm512_set_epi32(INT32_C( 1386668759), INT32_C( -584767031), INT32_C( -774218527), INT32_C( -668438843), INT32_C( 1386668759), INT32_C( -584767031), INT32_C( -774218527), INT32_C( -668438843), INT32_C( 1386668759), INT32_C( -584767031), INT32_C( -774218527), INT32_C( -668438843), INT32_C( 1386668759), INT32_C( -584767031), INT32_C( -774218527), INT32_C( -668438843)) }, { INT32_C(-1303355692), INT32_C( -501990193), INT32_C( 1644239252), INT32_C( 1280971850), simde_mm512_set_epi32(INT32_C(-1303355692), INT32_C( -501990193), INT32_C( 1644239252), INT32_C( 1280971850), INT32_C(-1303355692), INT32_C( -501990193), INT32_C( 1644239252), INT32_C( 1280971850), INT32_C(-1303355692), INT32_C( -501990193), INT32_C( 1644239252), INT32_C( 1280971850), INT32_C(-1303355692), INT32_C( -501990193), INT32_C( 1644239252), INT32_C( 1280971850)) }, { INT32_C( -594731794), INT32_C( 1836344530), INT32_C(-1063862321), INT32_C( -5810296), simde_mm512_set_epi32(INT32_C( -594731794), INT32_C( 1836344530), INT32_C(-1063862321), INT32_C( -5810296), INT32_C( -594731794), INT32_C( 1836344530), INT32_C(-1063862321), INT32_C( -5810296), INT32_C( -594731794), INT32_C( 1836344530), INT32_C(-1063862321), INT32_C( -5810296), INT32_C( -594731794), INT32_C( 1836344530), INT32_C(-1063862321), INT32_C( -5810296)) }, { INT32_C(-1188761421), INT32_C( -479336401), INT32_C( 651981060), INT32_C( 1888988895), simde_mm512_set_epi32(INT32_C(-1188761421), INT32_C( -479336401), INT32_C( 651981060), INT32_C( 1888988895), INT32_C(-1188761421), INT32_C( -479336401), INT32_C( 651981060), INT32_C( 1888988895), INT32_C(-1188761421), INT32_C( -479336401), INT32_C( 651981060), INT32_C( 1888988895), INT32_C(-1188761421), INT32_C( -479336401), INT32_C( 651981060), INT32_C( 1888988895)) }, { INT32_C(-1384652575), INT32_C( -302711392), INT32_C( -7833127), INT32_C( 1710017817), simde_mm512_set_epi32(INT32_C(-1384652575), INT32_C( -302711392), INT32_C( -7833127), INT32_C( 1710017817), INT32_C(-1384652575), INT32_C( -302711392), INT32_C( -7833127), INT32_C( 1710017817), INT32_C(-1384652575), INT32_C( -302711392), INT32_C( -7833127), INT32_C( 1710017817), INT32_C(-1384652575), INT32_C( -302711392), INT32_C( -7833127), INT32_C( 1710017817)) }, { INT32_C(-1634061345), INT32_C( 1275812965), INT32_C( 644899010), INT32_C(-1359876073), simde_mm512_set_epi32(INT32_C(-1634061345), INT32_C( 1275812965), INT32_C( 644899010), INT32_C(-1359876073), INT32_C(-1634061345), INT32_C( 1275812965), INT32_C( 644899010), INT32_C(-1359876073), INT32_C(-1634061345), INT32_C( 1275812965), INT32_C( 644899010), INT32_C(-1359876073), INT32_C(-1634061345), INT32_C( 1275812965), INT32_C( 644899010), INT32_C(-1359876073)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_set4_epi32(test_vec[i].d, test_vec[i].c, test_vec[i].b, test_vec[i].a); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_set4_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { int64_t d; int64_t c; int64_t b; int64_t a; simde__m512i r; } test_vec[8] = { { INT64_C(-2593705665723537468), INT64_C( 7248735595083828941), INT64_C(-1480325022577808917), INT64_C(-3460515645430413676), simde_mm512_set_epi64(INT64_C(-2593705665723537468), INT64_C( 7248735595083828941), INT64_C(-1480325022577808917), INT64_C(-3460515645430413676), INT64_C(-2593705665723537468), INT64_C( 7248735595083828941), INT64_C(-1480325022577808917), INT64_C(-3460515645430413676)) }, { INT64_C( 9178362238632935456), INT64_C( 4357840778555019964), INT64_C(-4052759487924272616), INT64_C( 5679911602698625292), simde_mm512_set_epi64(INT64_C( 9178362238632935456), INT64_C( 4357840778555019964), INT64_C(-4052759487924272616), INT64_C( 5679911602698625292), INT64_C( 9178362238632935456), INT64_C( 4357840778555019964), INT64_C(-4052759487924272616), INT64_C( 5679911602698625292)) }, { INT64_C( 4752422498272537365), INT64_C(-8834973358315632886), INT64_C(-3972020332791269926), INT64_C( 1768010603398062576), simde_mm512_set_epi64(INT64_C( 4752422498272537365), INT64_C(-8834973358315632886), INT64_C(-3972020332791269926), INT64_C( 1768010603398062576), INT64_C( 4752422498272537365), INT64_C(-8834973358315632886), INT64_C(-3972020332791269926), INT64_C( 1768010603398062576)) }, { INT64_C( 1080024807241277031), INT64_C( 6916234010560042694), INT64_C(-5226061354301241362), INT64_C(-7556111188703729544), simde_mm512_set_epi64(INT64_C( 1080024807241277031), INT64_C( 6916234010560042694), INT64_C(-5226061354301241362), INT64_C(-7556111188703729544), INT64_C( 1080024807241277031), INT64_C( 6916234010560042694), INT64_C(-5226061354301241362), INT64_C(-7556111188703729544)) }, { INT64_C(-2308292611043682685), INT64_C(-4902179645913622872), INT64_C( 1328708783259675127), INT64_C(-8034995773554670267), simde_mm512_set_epi64(INT64_C(-2308292611043682685), INT64_C(-4902179645913622872), INT64_C( 1328708783259675127), INT64_C(-8034995773554670267), INT64_C(-2308292611043682685), INT64_C(-4902179645913622872), INT64_C( 1328708783259675127), INT64_C(-8034995773554670267)) }, { INT64_C( -429235303463415783), INT64_C( 3969176932066333632), INT64_C(-3604302014217727944), INT64_C( 3066662634116690752), simde_mm512_set_epi64(INT64_C( -429235303463415783), INT64_C( 3969176932066333632), INT64_C(-3604302014217727944), INT64_C( 3066662634116690752), INT64_C( -429235303463415783), INT64_C( 3969176932066333632), INT64_C(-3604302014217727944), INT64_C( 3066662634116690752)) }, { INT64_C( 4584355551724557735), INT64_C(-3275746751222287827), INT64_C(-3251679101250531673), INT64_C(-3482741022099573882), simde_mm512_set_epi64(INT64_C( 4584355551724557735), INT64_C(-3275746751222287827), INT64_C(-3251679101250531673), INT64_C(-3482741022099573882), INT64_C( 4584355551724557735), INT64_C(-3275746751222287827), INT64_C(-3251679101250531673), INT64_C(-3482741022099573882)) }, { INT64_C(-8676900622553771545), INT64_C(-1335594618905000828), INT64_C( -120183447945767440), INT64_C(-4223741771381142614), simde_mm512_set_epi64(INT64_C(-8676900622553771545), INT64_C(-1335594618905000828), INT64_C( -120183447945767440), INT64_C(-4223741771381142614), INT64_C(-8676900622553771545), INT64_C(-1335594618905000828), INT64_C( -120183447945767440), INT64_C(-4223741771381142614)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_set4_epi64(test_vec[i].d, test_vec[i].c, test_vec[i].b, test_vec[i].a); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_set4_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde_float32 d; simde_float32 c; simde_float32 b; simde_float32 a; simde__m512 r; } test_vec[8] = { { SIMDE_FLOAT32_C( 549.42), SIMDE_FLOAT32_C( 390.74), SIMDE_FLOAT32_C( -762.45), SIMDE_FLOAT32_C( 105.60), simde_mm512_set_ps(SIMDE_FLOAT32_C( 549.42), SIMDE_FLOAT32_C( 390.74), SIMDE_FLOAT32_C( -762.45), SIMDE_FLOAT32_C( 105.60), SIMDE_FLOAT32_C( 549.42), SIMDE_FLOAT32_C( 390.74), SIMDE_FLOAT32_C( -762.45), SIMDE_FLOAT32_C( 105.60), SIMDE_FLOAT32_C( 549.42), SIMDE_FLOAT32_C( 390.74), SIMDE_FLOAT32_C( -762.45), SIMDE_FLOAT32_C( 105.60), SIMDE_FLOAT32_C( 549.42), SIMDE_FLOAT32_C( 390.74), SIMDE_FLOAT32_C( -762.45), SIMDE_FLOAT32_C( 105.60)) }, { SIMDE_FLOAT32_C( -455.28), SIMDE_FLOAT32_C( 904.62), SIMDE_FLOAT32_C( 43.64), SIMDE_FLOAT32_C( -136.13), simde_mm512_set_ps(SIMDE_FLOAT32_C( -455.28), SIMDE_FLOAT32_C( 904.62), SIMDE_FLOAT32_C( 43.64), SIMDE_FLOAT32_C( -136.13), SIMDE_FLOAT32_C( -455.28), SIMDE_FLOAT32_C( 904.62), SIMDE_FLOAT32_C( 43.64), SIMDE_FLOAT32_C( -136.13), SIMDE_FLOAT32_C( -455.28), SIMDE_FLOAT32_C( 904.62), SIMDE_FLOAT32_C( 43.64), SIMDE_FLOAT32_C( -136.13), SIMDE_FLOAT32_C( -455.28), SIMDE_FLOAT32_C( 904.62), SIMDE_FLOAT32_C( 43.64), SIMDE_FLOAT32_C( -136.13)) }, { SIMDE_FLOAT32_C( 780.38), SIMDE_FLOAT32_C( -314.57), SIMDE_FLOAT32_C( -46.15), SIMDE_FLOAT32_C( -499.56), simde_mm512_set_ps(SIMDE_FLOAT32_C( 780.38), SIMDE_FLOAT32_C( -314.57), SIMDE_FLOAT32_C( -46.15), SIMDE_FLOAT32_C( -499.56), SIMDE_FLOAT32_C( 780.38), SIMDE_FLOAT32_C( -314.57), SIMDE_FLOAT32_C( -46.15), SIMDE_FLOAT32_C( -499.56), SIMDE_FLOAT32_C( 780.38), SIMDE_FLOAT32_C( -314.57), SIMDE_FLOAT32_C( -46.15), SIMDE_FLOAT32_C( -499.56), SIMDE_FLOAT32_C( 780.38), SIMDE_FLOAT32_C( -314.57), SIMDE_FLOAT32_C( -46.15), SIMDE_FLOAT32_C( -499.56)) }, { SIMDE_FLOAT32_C( -150.61), SIMDE_FLOAT32_C( -652.43), SIMDE_FLOAT32_C( 308.34), SIMDE_FLOAT32_C( 420.76), simde_mm512_set_ps(SIMDE_FLOAT32_C( -150.61), SIMDE_FLOAT32_C( -652.43), SIMDE_FLOAT32_C( 308.34), SIMDE_FLOAT32_C( 420.76), SIMDE_FLOAT32_C( -150.61), SIMDE_FLOAT32_C( -652.43), SIMDE_FLOAT32_C( 308.34), SIMDE_FLOAT32_C( 420.76), SIMDE_FLOAT32_C( -150.61), SIMDE_FLOAT32_C( -652.43), SIMDE_FLOAT32_C( 308.34), SIMDE_FLOAT32_C( 420.76), SIMDE_FLOAT32_C( -150.61), SIMDE_FLOAT32_C( -652.43), SIMDE_FLOAT32_C( 308.34), SIMDE_FLOAT32_C( 420.76)) }, { SIMDE_FLOAT32_C( 591.99), SIMDE_FLOAT32_C( 726.98), SIMDE_FLOAT32_C( 873.87), SIMDE_FLOAT32_C( -584.98), simde_mm512_set_ps(SIMDE_FLOAT32_C( 591.99), SIMDE_FLOAT32_C( 726.98), SIMDE_FLOAT32_C( 873.87), SIMDE_FLOAT32_C( -584.98), SIMDE_FLOAT32_C( 591.99), SIMDE_FLOAT32_C( 726.98), SIMDE_FLOAT32_C( 873.87), SIMDE_FLOAT32_C( -584.98), SIMDE_FLOAT32_C( 591.99), SIMDE_FLOAT32_C( 726.98), SIMDE_FLOAT32_C( 873.87), SIMDE_FLOAT32_C( -584.98), SIMDE_FLOAT32_C( 591.99), SIMDE_FLOAT32_C( 726.98), SIMDE_FLOAT32_C( 873.87), SIMDE_FLOAT32_C( -584.98)) }, { SIMDE_FLOAT32_C( -867.47), SIMDE_FLOAT32_C( -316.79), SIMDE_FLOAT32_C( 168.26), SIMDE_FLOAT32_C( -818.10), simde_mm512_set_ps(SIMDE_FLOAT32_C( -867.47), SIMDE_FLOAT32_C( -316.79), SIMDE_FLOAT32_C( 168.26), SIMDE_FLOAT32_C( -818.10), SIMDE_FLOAT32_C( -867.47), SIMDE_FLOAT32_C( -316.79), SIMDE_FLOAT32_C( 168.26), SIMDE_FLOAT32_C( -818.10), SIMDE_FLOAT32_C( -867.47), SIMDE_FLOAT32_C( -316.79), SIMDE_FLOAT32_C( 168.26), SIMDE_FLOAT32_C( -818.10), SIMDE_FLOAT32_C( -867.47), SIMDE_FLOAT32_C( -316.79), SIMDE_FLOAT32_C( 168.26), SIMDE_FLOAT32_C( -818.10)) }, { SIMDE_FLOAT32_C( 307.87), SIMDE_FLOAT32_C( -536.04), SIMDE_FLOAT32_C( 660.51), SIMDE_FLOAT32_C( 761.16), simde_mm512_set_ps(SIMDE_FLOAT32_C( 307.87), SIMDE_FLOAT32_C( -536.04), SIMDE_FLOAT32_C( 660.51), SIMDE_FLOAT32_C( 761.16), SIMDE_FLOAT32_C( 307.87), SIMDE_FLOAT32_C( -536.04), SIMDE_FLOAT32_C( 660.51), SIMDE_FLOAT32_C( 761.16), SIMDE_FLOAT32_C( 307.87), SIMDE_FLOAT32_C( -536.04), SIMDE_FLOAT32_C( 660.51), SIMDE_FLOAT32_C( 761.16), SIMDE_FLOAT32_C( 307.87), SIMDE_FLOAT32_C( -536.04), SIMDE_FLOAT32_C( 660.51), SIMDE_FLOAT32_C( 761.16)) }, { SIMDE_FLOAT32_C( -886.20), SIMDE_FLOAT32_C( 955.45), SIMDE_FLOAT32_C( -821.28), SIMDE_FLOAT32_C( -285.01), simde_mm512_set_ps(SIMDE_FLOAT32_C( -886.20), SIMDE_FLOAT32_C( 955.45), SIMDE_FLOAT32_C( -821.28), SIMDE_FLOAT32_C( -285.01), SIMDE_FLOAT32_C( -886.20), SIMDE_FLOAT32_C( 955.45), SIMDE_FLOAT32_C( -821.28), SIMDE_FLOAT32_C( -285.01), SIMDE_FLOAT32_C( -886.20), SIMDE_FLOAT32_C( 955.45), SIMDE_FLOAT32_C( -821.28), SIMDE_FLOAT32_C( -285.01), SIMDE_FLOAT32_C( -886.20), SIMDE_FLOAT32_C( 955.45), SIMDE_FLOAT32_C( -821.28), SIMDE_FLOAT32_C( -285.01)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_set4_ps(test_vec[i].d, test_vec[i].c, test_vec[i].b, test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_set4_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde_float64 d; simde_float64 c; simde_float64 b; simde_float64 a; simde__m512d r; } test_vec[8] = { { SIMDE_FLOAT64_C( -466.05), SIMDE_FLOAT64_C( -39.63), SIMDE_FLOAT64_C( 479.17), SIMDE_FLOAT64_C( 865.09), simde_mm512_set_pd(SIMDE_FLOAT64_C( -466.05), SIMDE_FLOAT64_C( -39.63), SIMDE_FLOAT64_C( 479.17), SIMDE_FLOAT64_C( 865.09), SIMDE_FLOAT64_C( -466.05), SIMDE_FLOAT64_C( -39.63), SIMDE_FLOAT64_C( 479.17), SIMDE_FLOAT64_C( 865.09)) }, { SIMDE_FLOAT64_C( -466.60), SIMDE_FLOAT64_C( 25.52), SIMDE_FLOAT64_C( 108.39), SIMDE_FLOAT64_C( -438.48), simde_mm512_set_pd(SIMDE_FLOAT64_C( -466.60), SIMDE_FLOAT64_C( 25.52), SIMDE_FLOAT64_C( 108.39), SIMDE_FLOAT64_C( -438.48), SIMDE_FLOAT64_C( -466.60), SIMDE_FLOAT64_C( 25.52), SIMDE_FLOAT64_C( 108.39), SIMDE_FLOAT64_C( -438.48)) }, { SIMDE_FLOAT64_C( 192.69), SIMDE_FLOAT64_C( -608.10), SIMDE_FLOAT64_C( -132.84), SIMDE_FLOAT64_C( 545.82), simde_mm512_set_pd(SIMDE_FLOAT64_C( 192.69), SIMDE_FLOAT64_C( -608.10), SIMDE_FLOAT64_C( -132.84), SIMDE_FLOAT64_C( 545.82), SIMDE_FLOAT64_C( 192.69), SIMDE_FLOAT64_C( -608.10), SIMDE_FLOAT64_C( -132.84), SIMDE_FLOAT64_C( 545.82)) }, { SIMDE_FLOAT64_C( -398.73), SIMDE_FLOAT64_C( 115.78), SIMDE_FLOAT64_C( 863.37), SIMDE_FLOAT64_C( -519.72), simde_mm512_set_pd(SIMDE_FLOAT64_C( -398.73), SIMDE_FLOAT64_C( 115.78), SIMDE_FLOAT64_C( 863.37), SIMDE_FLOAT64_C( -519.72), SIMDE_FLOAT64_C( -398.73), SIMDE_FLOAT64_C( 115.78), SIMDE_FLOAT64_C( 863.37), SIMDE_FLOAT64_C( -519.72)) }, { SIMDE_FLOAT64_C( -372.73), SIMDE_FLOAT64_C( 151.17), SIMDE_FLOAT64_C( 865.66), SIMDE_FLOAT64_C( 411.45), simde_mm512_set_pd(SIMDE_FLOAT64_C( -372.73), SIMDE_FLOAT64_C( 151.17), SIMDE_FLOAT64_C( 865.66), SIMDE_FLOAT64_C( 411.45), SIMDE_FLOAT64_C( -372.73), SIMDE_FLOAT64_C( 151.17), SIMDE_FLOAT64_C( 865.66), SIMDE_FLOAT64_C( 411.45)) }, { SIMDE_FLOAT64_C( 448.07), SIMDE_FLOAT64_C( 391.44), SIMDE_FLOAT64_C( 859.02), SIMDE_FLOAT64_C( 20.32), simde_mm512_set_pd(SIMDE_FLOAT64_C( 448.07), SIMDE_FLOAT64_C( 391.44), SIMDE_FLOAT64_C( 859.02), SIMDE_FLOAT64_C( 20.32), SIMDE_FLOAT64_C( 448.07), SIMDE_FLOAT64_C( 391.44), SIMDE_FLOAT64_C( 859.02), SIMDE_FLOAT64_C( 20.32)) }, { SIMDE_FLOAT64_C( -644.29), SIMDE_FLOAT64_C( 326.35), SIMDE_FLOAT64_C( 131.23), SIMDE_FLOAT64_C( -950.42), simde_mm512_set_pd(SIMDE_FLOAT64_C( -644.29), SIMDE_FLOAT64_C( 326.35), SIMDE_FLOAT64_C( 131.23), SIMDE_FLOAT64_C( -950.42), SIMDE_FLOAT64_C( -644.29), SIMDE_FLOAT64_C( 326.35), SIMDE_FLOAT64_C( 131.23), SIMDE_FLOAT64_C( -950.42)) }, { SIMDE_FLOAT64_C( 9.75), SIMDE_FLOAT64_C( 116.77), SIMDE_FLOAT64_C( 62.44), SIMDE_FLOAT64_C( 838.89), simde_mm512_set_pd(SIMDE_FLOAT64_C( 9.75), SIMDE_FLOAT64_C( 116.77), SIMDE_FLOAT64_C( 62.44), SIMDE_FLOAT64_C( 838.89), SIMDE_FLOAT64_C( 9.75), SIMDE_FLOAT64_C( 116.77), SIMDE_FLOAT64_C( 62.44), SIMDE_FLOAT64_C( 838.89)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_set4_pd(test_vec[i].d, test_vec[i].c, test_vec[i].b, test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_set4_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_set4_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_set4_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_set4_pd) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/setone.c000066400000000000000000000032541400333146700167210ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #define SIMDE_TEST_X86_AVX512_INSN setone #include #include #include #include static int test_simde_x_mm512_setone_si512(SIMDE_MUNIT_TEST_ARGS) { simde_assert_m512i_i32(simde_x_mm512_setone_si512(), ==, simde_mm512_set1_epi32(~INT32_C(0))); return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(x_mm512_setone_si512) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/setr.c000066400000000000000000000736261400333146700164130ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur */ #define SIMDE_TEST_X86_AVX512_INSN setr #include #include #include static int test_simde_mm512_setr_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { int32_t e15; int32_t e14; int32_t e13; int32_t e12; int32_t e11; int32_t e10; int32_t e9; int32_t e8; int32_t e7; int32_t e6; int32_t e5; int32_t e4; int32_t e3; int32_t e2; int32_t e1; int32_t e0; simde__m512i r; } test_vec[8] = { { INT32_C( -225639004), INT32_C( -150299519), INT32_C(-1534790107), INT32_C( 1014269333), INT32_C( 29394004), INT32_C( 599047736), INT32_C( 1587483242), INT32_C(-1377286712), INT32_C( 298403451), INT32_C( -826427815), INT32_C( 694491833), INT32_C( -705464027), INT32_C( -970263706), INT32_C( -734607127), INT32_C( 554049123), INT32_C( -70690832), simde_mm512_set_epi32(INT32_C( -70690832), INT32_C( 554049123), INT32_C( -734607127), INT32_C( -970263706), INT32_C( -705464027), INT32_C( 694491833), INT32_C( -826427815), INT32_C( 298403451), INT32_C(-1377286712), INT32_C( 1587483242), INT32_C( 599047736), INT32_C( 29394004), INT32_C( 1014269333), INT32_C(-1534790107), INT32_C( -150299519), INT32_C( -225639004)) }, { INT32_C( -94411288), INT32_C( -666296442), INT32_C( -469504489), INT32_C( 1755610261), INT32_C( -444648579), INT32_C( 539810698), INT32_C(-1045838623), INT32_C( 1780875011), INT32_C(-1180587774), INT32_C( 1720505332), INT32_C( -289874239), INT32_C(-1063749100), INT32_C(-1248309679), INT32_C(-1329997639), INT32_C( 575866898), INT32_C( -286711209), simde_mm512_set_epi32(INT32_C( -286711209), INT32_C( 575866898), INT32_C(-1329997639), INT32_C(-1248309679), INT32_C(-1063749100), INT32_C( -289874239), INT32_C( 1720505332), INT32_C(-1180587774), INT32_C( 1780875011), INT32_C(-1045838623), INT32_C( 539810698), INT32_C( -444648579), INT32_C( 1755610261), INT32_C( -469504489), INT32_C( -666296442), INT32_C( -94411288)) }, { INT32_C( -592672865), INT32_C(-1173697634), INT32_C(-2138963688), INT32_C( 1137021966), INT32_C( -277367531), INT32_C( 1535015576), INT32_C( -300840189), INT32_C( 970793246), INT32_C( 2002409399), INT32_C(-1909422707), INT32_C( 1060757649), INT32_C( 187969074), INT32_C(-1128231220), INT32_C(-1406585232), INT32_C( 325181376), INT32_C(-1609780510), simde_mm512_set_epi32(INT32_C(-1609780510), INT32_C( 325181376), INT32_C(-1406585232), INT32_C(-1128231220), INT32_C( 187969074), INT32_C( 1060757649), INT32_C(-1909422707), INT32_C( 2002409399), INT32_C( 970793246), INT32_C( -300840189), INT32_C( 1535015576), INT32_C( -277367531), INT32_C( 1137021966), INT32_C(-2138963688), INT32_C(-1173697634), INT32_C( -592672865)) }, { INT32_C( 1559684973), INT32_C( -969314800), INT32_C(-1209891452), INT32_C( 1346728079), INT32_C( 1672631336), INT32_C(-1432148577), INT32_C(-2062863806), INT32_C( 47399907), INT32_C( 1393917193), INT32_C( 2115773956), INT32_C( -279257842), INT32_C( 1954534792), INT32_C( 2098673509), INT32_C(-1488841900), INT32_C(-1097418668), INT32_C( 924984498), simde_mm512_set_epi32(INT32_C( 924984498), INT32_C(-1097418668), INT32_C(-1488841900), INT32_C( 2098673509), INT32_C( 1954534792), INT32_C( -279257842), INT32_C( 2115773956), INT32_C( 1393917193), INT32_C( 47399907), INT32_C(-2062863806), INT32_C(-1432148577), INT32_C( 1672631336), INT32_C( 1346728079), INT32_C(-1209891452), INT32_C( -969314800), INT32_C( 1559684973)) }, { INT32_C( 168008623), INT32_C( -453715155), INT32_C(-1391419453), INT32_C( -49732507), INT32_C(-1285867712), INT32_C(-1989456497), INT32_C( -747356949), INT32_C( 490906758), INT32_C( 1610110312), INT32_C( 1643820847), INT32_C( -516397675), INT32_C( 100321539), INT32_C( 1161753679), INT32_C( 1545726171), INT32_C( 793769069), INT32_C( 1644133252), simde_mm512_set_epi32(INT32_C( 1644133252), INT32_C( 793769069), INT32_C( 1545726171), INT32_C( 1161753679), INT32_C( 100321539), INT32_C( -516397675), INT32_C( 1643820847), INT32_C( 1610110312), INT32_C( 490906758), INT32_C( -747356949), INT32_C(-1989456497), INT32_C(-1285867712), INT32_C( -49732507), INT32_C(-1391419453), INT32_C( -453715155), INT32_C( 168008623)) }, { INT32_C(-1778931889), INT32_C( -605375026), INT32_C( 1673050617), INT32_C( -625199109), INT32_C( -96995330), INT32_C(-1228008621), INT32_C(-2102854258), INT32_C( 134984452), INT32_C( 310905618), INT32_C(-2014521010), INT32_C(-1089463947), INT32_C(-2013872078), INT32_C( 801481447), INT32_C( 737497431), INT32_C( 1618614949), INT32_C(-1017834831), simde_mm512_set_epi32(INT32_C(-1017834831), INT32_C( 1618614949), INT32_C( 737497431), INT32_C( 801481447), INT32_C(-2013872078), INT32_C(-1089463947), INT32_C(-2014521010), INT32_C( 310905618), INT32_C( 134984452), INT32_C(-2102854258), INT32_C(-1228008621), INT32_C( -96995330), INT32_C( -625199109), INT32_C( 1673050617), INT32_C( -605375026), INT32_C(-1778931889)) }, { INT32_C( -93411582), INT32_C(-2041831255), INT32_C( -37603949), INT32_C(-1753490957), INT32_C( -415214737), INT32_C( 910720124), INT32_C(-1202548944), INT32_C( -274420795), INT32_C( 1822689778), INT32_C( 894388184), INT32_C(-1563709444), INT32_C( 53989264), INT32_C( 680272844), INT32_C( -566635982), INT32_C( 1666979976), INT32_C(-1823765080), simde_mm512_set_epi32(INT32_C(-1823765080), INT32_C( 1666979976), INT32_C( -566635982), INT32_C( 680272844), INT32_C( 53989264), INT32_C(-1563709444), INT32_C( 894388184), INT32_C( 1822689778), INT32_C( -274420795), INT32_C(-1202548944), INT32_C( 910720124), INT32_C( -415214737), INT32_C(-1753490957), INT32_C( -37603949), INT32_C(-2041831255), INT32_C( -93411582)) }, { INT32_C(-1594363360), INT32_C( 1289095896), INT32_C( 1158627349), INT32_C( -895971493), INT32_C( -373133234), INT32_C(-1633749559), INT32_C( 1579262744), INT32_C( -827722334), INT32_C( -839338980), INT32_C( 1359699192), INT32_C( -211246622), INT32_C(-1308719740), INT32_C( -442822787), INT32_C( 253799860), INT32_C(-1608354611), INT32_C( -49457301), simde_mm512_set_epi32(INT32_C( -49457301), INT32_C(-1608354611), INT32_C( 253799860), INT32_C( -442822787), INT32_C(-1308719740), INT32_C( -211246622), INT32_C( 1359699192), INT32_C( -839338980), INT32_C( -827722334), INT32_C( 1579262744), INT32_C(-1633749559), INT32_C( -373133234), INT32_C( -895971493), INT32_C( 1158627349), INT32_C( 1289095896), INT32_C(-1594363360)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_setr_epi32( test_vec[i].e15, test_vec[i].e14, test_vec[i].e13, test_vec[i].e12, test_vec[i].e11, test_vec[i].e10, test_vec[i].e9, test_vec[i].e8, test_vec[i].e7, test_vec[i].e6, test_vec[i].e5, test_vec[i].e4, test_vec[i].e3, test_vec[i].e2, test_vec[i].e1, test_vec[i].e0); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_setr_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { int64_t e7; int64_t e6; int64_t e5; int64_t e4; int64_t e3; int64_t e2; int64_t e1; int64_t e0; simde__m512i r; } test_vec[8] = { { INT64_C( 1888907496602700549), INT64_C(-4376226703742367928), INT64_C( 1319808302306308723), INT64_C(-8416436825139627772), INT64_C(-7030268320569234373), INT64_C( 4053306656518847875), INT64_C(-7288565613940945788), INT64_C( 3412457605701455282), simde_mm512_set_epi64(INT64_C( 3412457605701455282), INT64_C(-7288565613940945788), INT64_C( 4053306656518847875), INT64_C(-7030268320569234373), INT64_C(-8416436825139627772), INT64_C( 1319808302306308723), INT64_C(-4376226703742367928), INT64_C( 1888907496602700549)) }, { INT64_C(-9149987363116605946), INT64_C(-8568066162437210648), INT64_C( 5724154895417122611), INT64_C( 3016754579022165386), INT64_C(-6067588729873055686), INT64_C( 2536453724300579147), INT64_C( 4833323501533385465), INT64_C(-7273821104851084568), simde_mm512_set_epi64(INT64_C(-7273821104851084568), INT64_C( 4833323501533385465), INT64_C( 2536453724300579147), INT64_C(-6067588729873055686), INT64_C( 3016754579022165386), INT64_C( 5724154895417122611), INT64_C(-8568066162437210648), INT64_C(-9149987363116605946)) }, { INT64_C( 8826953524808214711), INT64_C( 3028688385033096274), INT64_C( 7940008473030673245), INT64_C(-3386602331089063366), INT64_C(-5551104753805792180), INT64_C( 8877895432400251040), INT64_C( 3968860811702288437), INT64_C(-8457518168881001349), simde_mm512_set_epi64(INT64_C(-8457518168881001349), INT64_C( 3968860811702288437), INT64_C( 8877895432400251040), INT64_C(-5551104753805792180), INT64_C(-3386602331089063366), INT64_C( 7940008473030673245), INT64_C( 3028688385033096274), INT64_C( 8826953524808214711)) }, { INT64_C(-1749629734112662134), INT64_C(-4615379630913236957), INT64_C(-1845654885783324819), INT64_C(-4590913868344348334), INT64_C(-8403233988575943004), INT64_C(-2056952412471198236), INT64_C(-2511875144579280083), INT64_C(-8623043688818523441), simde_mm512_set_epi64(INT64_C(-8623043688818523441), INT64_C(-2511875144579280083), INT64_C(-2056952412471198236), INT64_C(-8403233988575943004), INT64_C(-4590913868344348334), INT64_C(-1845654885783324819), INT64_C(-4615379630913236957), INT64_C(-1749629734112662134)) }, { INT64_C( 1761036457964785466), INT64_C(-7054972876528755116), INT64_C(-7738430230159702471), INT64_C( 5826683286381985571), INT64_C(-4997893423121397361), INT64_C( 95646196860293964), INT64_C( -268412225668008681), INT64_C(-4388731883907908541), simde_mm512_set_epi64(INT64_C(-4388731883907908541), INT64_C( -268412225668008681), INT64_C( 95646196860293964), INT64_C(-4997893423121397361), INT64_C( 5826683286381985571), INT64_C(-7738430230159702471), INT64_C(-7054972876528755116), INT64_C( 1761036457964785466)) }, { INT64_C(-1634122079602729117), INT64_C( -197548662778062026), INT64_C( 8430179558397234507), INT64_C(-6847424589158690122), INT64_C( 108510191489750517), INT64_C(-5960504929867173559), INT64_C( 7692198613837645958), INT64_C(-5086195662698067529), simde_mm512_set_epi64(INT64_C(-5086195662698067529), INT64_C( 7692198613837645958), INT64_C(-5960504929867173559), INT64_C( 108510191489750517), INT64_C(-6847424589158690122), INT64_C( 8430179558397234507), INT64_C( -197548662778062026), INT64_C(-1634122079602729117)) }, { INT64_C(-2013722991297495958), INT64_C( -331332851212596030), INT64_C(-7644478161857365687), INT64_C(-5200258926547250644), INT64_C(-7000971193485373522), INT64_C( 3470026623990179641), INT64_C(-5506978056254809529), INT64_C( 5213166683745714657), simde_mm512_set_epi64(INT64_C( 5213166683745714657), INT64_C(-5506978056254809529), INT64_C( 3470026623990179641), INT64_C(-7000971193485373522), INT64_C(-5200258926547250644), INT64_C(-7644478161857365687), INT64_C( -331332851212596030), INT64_C(-2013722991297495958)) }, { INT64_C(-7035344274950827789), INT64_C( 6421674353147720323), INT64_C( 7861915403667476835), INT64_C(-7744076081560701528), INT64_C(-5000728962608243188), INT64_C(-2643855462940399334), INT64_C( -358888979465373416), INT64_C( 5439855456779249715), simde_mm512_set_epi64(INT64_C( 5439855456779249715), INT64_C( -358888979465373416), INT64_C(-2643855462940399334), INT64_C(-5000728962608243188), INT64_C(-7744076081560701528), INT64_C( 7861915403667476835), INT64_C( 6421674353147720323), INT64_C(-7035344274950827789)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_setr_epi64( test_vec[i].e7, test_vec[i].e6, test_vec[i].e5, test_vec[i].e4, test_vec[i].e3, test_vec[i].e2, test_vec[i].e1, test_vec[i].e0); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_setr_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde_float32 e15; simde_float32 e14; simde_float32 e13; simde_float32 e12; simde_float32 e11; simde_float32 e10; simde_float32 e9; simde_float32 e8; simde_float32 e7; simde_float32 e6; simde_float32 e5; simde_float32 e4; simde_float32 e3; simde_float32 e2; simde_float32 e1; simde_float32 e0; simde__m512 r; } test_vec[8] = { { SIMDE_FLOAT32_C( -693.34), SIMDE_FLOAT32_C( -391.55), SIMDE_FLOAT32_C( 340.77), SIMDE_FLOAT32_C( 396.59), SIMDE_FLOAT32_C( 776.91), SIMDE_FLOAT32_C( 317.46), SIMDE_FLOAT32_C( -394.47), SIMDE_FLOAT32_C( 664.34), SIMDE_FLOAT32_C( 353.81), SIMDE_FLOAT32_C( -365.45), SIMDE_FLOAT32_C( 141.75), SIMDE_FLOAT32_C( 338.26), SIMDE_FLOAT32_C( -898.63), SIMDE_FLOAT32_C( -717.56), SIMDE_FLOAT32_C( -763.21), SIMDE_FLOAT32_C( -594.53), simde_mm512_set_ps(SIMDE_FLOAT32_C( -594.53), SIMDE_FLOAT32_C( -763.21), SIMDE_FLOAT32_C( -717.56), SIMDE_FLOAT32_C( -898.63), SIMDE_FLOAT32_C( 338.26), SIMDE_FLOAT32_C( 141.75), SIMDE_FLOAT32_C( -365.45), SIMDE_FLOAT32_C( 353.81), SIMDE_FLOAT32_C( 664.34), SIMDE_FLOAT32_C( -394.47), SIMDE_FLOAT32_C( 317.46), SIMDE_FLOAT32_C( 776.91), SIMDE_FLOAT32_C( 396.59), SIMDE_FLOAT32_C( 340.77), SIMDE_FLOAT32_C( -391.55), SIMDE_FLOAT32_C( -693.34)) }, { SIMDE_FLOAT32_C( -212.87), SIMDE_FLOAT32_C( -107.93), SIMDE_FLOAT32_C( 395.61), SIMDE_FLOAT32_C( 321.82), SIMDE_FLOAT32_C( 802.07), SIMDE_FLOAT32_C( -157.82), SIMDE_FLOAT32_C( 306.15), SIMDE_FLOAT32_C( 501.93), SIMDE_FLOAT32_C( -150.29), SIMDE_FLOAT32_C( -247.68), SIMDE_FLOAT32_C( 472.68), SIMDE_FLOAT32_C( 263.49), SIMDE_FLOAT32_C( 775.84), SIMDE_FLOAT32_C( 573.19), SIMDE_FLOAT32_C( -361.82), SIMDE_FLOAT32_C( 173.65), simde_mm512_set_ps(SIMDE_FLOAT32_C( 173.65), SIMDE_FLOAT32_C( -361.82), SIMDE_FLOAT32_C( 573.19), SIMDE_FLOAT32_C( 775.84), SIMDE_FLOAT32_C( 263.49), SIMDE_FLOAT32_C( 472.68), SIMDE_FLOAT32_C( -247.68), SIMDE_FLOAT32_C( -150.29), SIMDE_FLOAT32_C( 501.93), SIMDE_FLOAT32_C( 306.15), SIMDE_FLOAT32_C( -157.82), SIMDE_FLOAT32_C( 802.07), SIMDE_FLOAT32_C( 321.82), SIMDE_FLOAT32_C( 395.61), SIMDE_FLOAT32_C( -107.93), SIMDE_FLOAT32_C( -212.87)) }, { SIMDE_FLOAT32_C( 44.50), SIMDE_FLOAT32_C( -431.57), SIMDE_FLOAT32_C( 308.32), SIMDE_FLOAT32_C( -958.90), SIMDE_FLOAT32_C( -174.46), SIMDE_FLOAT32_C( -298.65), SIMDE_FLOAT32_C( 209.15), SIMDE_FLOAT32_C( 439.10), SIMDE_FLOAT32_C( -926.44), SIMDE_FLOAT32_C( -768.87), SIMDE_FLOAT32_C( 291.37), SIMDE_FLOAT32_C( 727.05), SIMDE_FLOAT32_C( -573.28), SIMDE_FLOAT32_C( 36.75), SIMDE_FLOAT32_C( -302.25), SIMDE_FLOAT32_C( 571.00), simde_mm512_set_ps(SIMDE_FLOAT32_C( 571.00), SIMDE_FLOAT32_C( -302.25), SIMDE_FLOAT32_C( 36.75), SIMDE_FLOAT32_C( -573.28), SIMDE_FLOAT32_C( 727.05), SIMDE_FLOAT32_C( 291.37), SIMDE_FLOAT32_C( -768.87), SIMDE_FLOAT32_C( -926.44), SIMDE_FLOAT32_C( 439.10), SIMDE_FLOAT32_C( 209.15), SIMDE_FLOAT32_C( -298.65), SIMDE_FLOAT32_C( -174.46), SIMDE_FLOAT32_C( -958.90), SIMDE_FLOAT32_C( 308.32), SIMDE_FLOAT32_C( -431.57), SIMDE_FLOAT32_C( 44.50)) }, { SIMDE_FLOAT32_C( 7.49), SIMDE_FLOAT32_C( -164.63), SIMDE_FLOAT32_C( 120.97), SIMDE_FLOAT32_C( -584.72), SIMDE_FLOAT32_C( -840.08), SIMDE_FLOAT32_C( -642.99), SIMDE_FLOAT32_C( -632.99), SIMDE_FLOAT32_C( 911.46), SIMDE_FLOAT32_C( 708.81), SIMDE_FLOAT32_C( 815.55), SIMDE_FLOAT32_C( 163.40), SIMDE_FLOAT32_C( 42.16), SIMDE_FLOAT32_C( -604.25), SIMDE_FLOAT32_C( -667.70), SIMDE_FLOAT32_C( -123.35), SIMDE_FLOAT32_C( -145.23), simde_mm512_set_ps(SIMDE_FLOAT32_C( -145.23), SIMDE_FLOAT32_C( -123.35), SIMDE_FLOAT32_C( -667.70), SIMDE_FLOAT32_C( -604.25), SIMDE_FLOAT32_C( 42.16), SIMDE_FLOAT32_C( 163.40), SIMDE_FLOAT32_C( 815.55), SIMDE_FLOAT32_C( 708.81), SIMDE_FLOAT32_C( 911.46), SIMDE_FLOAT32_C( -632.99), SIMDE_FLOAT32_C( -642.99), SIMDE_FLOAT32_C( -840.08), SIMDE_FLOAT32_C( -584.72), SIMDE_FLOAT32_C( 120.97), SIMDE_FLOAT32_C( -164.63), SIMDE_FLOAT32_C( 7.49)) }, { SIMDE_FLOAT32_C( 363.26), SIMDE_FLOAT32_C( 849.07), SIMDE_FLOAT32_C( 296.23), SIMDE_FLOAT32_C( -961.65), SIMDE_FLOAT32_C( -607.94), SIMDE_FLOAT32_C( -947.07), SIMDE_FLOAT32_C( 8.40), SIMDE_FLOAT32_C( -646.90), SIMDE_FLOAT32_C( -23.65), SIMDE_FLOAT32_C( -317.45), SIMDE_FLOAT32_C( -389.74), SIMDE_FLOAT32_C( -24.49), SIMDE_FLOAT32_C( -609.72), SIMDE_FLOAT32_C( -45.92), SIMDE_FLOAT32_C( 85.01), SIMDE_FLOAT32_C( 775.08), simde_mm512_set_ps(SIMDE_FLOAT32_C( 775.08), SIMDE_FLOAT32_C( 85.01), SIMDE_FLOAT32_C( -45.92), SIMDE_FLOAT32_C( -609.72), SIMDE_FLOAT32_C( -24.49), SIMDE_FLOAT32_C( -389.74), SIMDE_FLOAT32_C( -317.45), SIMDE_FLOAT32_C( -23.65), SIMDE_FLOAT32_C( -646.90), SIMDE_FLOAT32_C( 8.40), SIMDE_FLOAT32_C( -947.07), SIMDE_FLOAT32_C( -607.94), SIMDE_FLOAT32_C( -961.65), SIMDE_FLOAT32_C( 296.23), SIMDE_FLOAT32_C( 849.07), SIMDE_FLOAT32_C( 363.26)) }, { SIMDE_FLOAT32_C( -743.67), SIMDE_FLOAT32_C( 584.63), SIMDE_FLOAT32_C( 124.10), SIMDE_FLOAT32_C( 44.15), SIMDE_FLOAT32_C( 97.80), SIMDE_FLOAT32_C( -183.84), SIMDE_FLOAT32_C( 775.52), SIMDE_FLOAT32_C( -276.30), SIMDE_FLOAT32_C( 521.23), SIMDE_FLOAT32_C( -285.93), SIMDE_FLOAT32_C( 127.02), SIMDE_FLOAT32_C( 104.60), SIMDE_FLOAT32_C( 881.59), SIMDE_FLOAT32_C( 934.75), SIMDE_FLOAT32_C( -126.94), SIMDE_FLOAT32_C( 948.78), simde_mm512_set_ps(SIMDE_FLOAT32_C( 948.78), SIMDE_FLOAT32_C( -126.94), SIMDE_FLOAT32_C( 934.75), SIMDE_FLOAT32_C( 881.59), SIMDE_FLOAT32_C( 104.60), SIMDE_FLOAT32_C( 127.02), SIMDE_FLOAT32_C( -285.93), SIMDE_FLOAT32_C( 521.23), SIMDE_FLOAT32_C( -276.30), SIMDE_FLOAT32_C( 775.52), SIMDE_FLOAT32_C( -183.84), SIMDE_FLOAT32_C( 97.80), SIMDE_FLOAT32_C( 44.15), SIMDE_FLOAT32_C( 124.10), SIMDE_FLOAT32_C( 584.63), SIMDE_FLOAT32_C( -743.67)) }, { SIMDE_FLOAT32_C( 385.42), SIMDE_FLOAT32_C( 69.88), SIMDE_FLOAT32_C( 900.97), SIMDE_FLOAT32_C( -107.25), SIMDE_FLOAT32_C( -574.36), SIMDE_FLOAT32_C( 748.65), SIMDE_FLOAT32_C( -126.70), SIMDE_FLOAT32_C( -459.43), SIMDE_FLOAT32_C( -231.22), SIMDE_FLOAT32_C( 815.77), SIMDE_FLOAT32_C( -476.98), SIMDE_FLOAT32_C( 236.36), SIMDE_FLOAT32_C( -834.38), SIMDE_FLOAT32_C( -511.69), SIMDE_FLOAT32_C( -443.09), SIMDE_FLOAT32_C( -508.39), simde_mm512_set_ps(SIMDE_FLOAT32_C( -508.39), SIMDE_FLOAT32_C( -443.09), SIMDE_FLOAT32_C( -511.69), SIMDE_FLOAT32_C( -834.38), SIMDE_FLOAT32_C( 236.36), SIMDE_FLOAT32_C( -476.98), SIMDE_FLOAT32_C( 815.77), SIMDE_FLOAT32_C( -231.22), SIMDE_FLOAT32_C( -459.43), SIMDE_FLOAT32_C( -126.70), SIMDE_FLOAT32_C( 748.65), SIMDE_FLOAT32_C( -574.36), SIMDE_FLOAT32_C( -107.25), SIMDE_FLOAT32_C( 900.97), SIMDE_FLOAT32_C( 69.88), SIMDE_FLOAT32_C( 385.42)) }, { SIMDE_FLOAT32_C( -878.65), SIMDE_FLOAT32_C( -787.20), SIMDE_FLOAT32_C( -378.35), SIMDE_FLOAT32_C( -406.07), SIMDE_FLOAT32_C( 822.02), SIMDE_FLOAT32_C( -574.07), SIMDE_FLOAT32_C( 229.39), SIMDE_FLOAT32_C( -3.08), SIMDE_FLOAT32_C( -878.41), SIMDE_FLOAT32_C( -80.74), SIMDE_FLOAT32_C( 518.86), SIMDE_FLOAT32_C( -831.41), SIMDE_FLOAT32_C( -639.09), SIMDE_FLOAT32_C( 136.23), SIMDE_FLOAT32_C( -279.71), SIMDE_FLOAT32_C( -447.29), simde_mm512_set_ps(SIMDE_FLOAT32_C( -447.29), SIMDE_FLOAT32_C( -279.71), SIMDE_FLOAT32_C( 136.23), SIMDE_FLOAT32_C( -639.09), SIMDE_FLOAT32_C( -831.41), SIMDE_FLOAT32_C( 518.86), SIMDE_FLOAT32_C( -80.74), SIMDE_FLOAT32_C( -878.41), SIMDE_FLOAT32_C( -3.08), SIMDE_FLOAT32_C( 229.39), SIMDE_FLOAT32_C( -574.07), SIMDE_FLOAT32_C( 822.02), SIMDE_FLOAT32_C( -406.07), SIMDE_FLOAT32_C( -378.35), SIMDE_FLOAT32_C( -787.20), SIMDE_FLOAT32_C( -878.65)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_setr_ps( test_vec[i].e15, test_vec[i].e14, test_vec[i].e13, test_vec[i].e12, test_vec[i].e11, test_vec[i].e10, test_vec[i].e9, test_vec[i].e8, test_vec[i].e7, test_vec[i].e6, test_vec[i].e5, test_vec[i].e4, test_vec[i].e3, test_vec[i].e2, test_vec[i].e1, test_vec[i].e0); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_setr_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde_float64 e7; simde_float64 e6; simde_float64 e5; simde_float64 e4; simde_float64 e3; simde_float64 e2; simde_float64 e1; simde_float64 e0; simde__m512d r; } test_vec[8] = { { SIMDE_FLOAT64_C( -434.21), SIMDE_FLOAT64_C( -283.66), SIMDE_FLOAT64_C( 252.63), SIMDE_FLOAT64_C( 58.11), SIMDE_FLOAT64_C( 292.72), SIMDE_FLOAT64_C( -266.76), SIMDE_FLOAT64_C( 689.73), SIMDE_FLOAT64_C( 940.04), simde_mm512_set_pd(SIMDE_FLOAT64_C( 940.04), SIMDE_FLOAT64_C( 689.73), SIMDE_FLOAT64_C( -266.76), SIMDE_FLOAT64_C( 292.72), SIMDE_FLOAT64_C( 58.11), SIMDE_FLOAT64_C( 252.63), SIMDE_FLOAT64_C( -283.66), SIMDE_FLOAT64_C( -434.21)) }, { SIMDE_FLOAT64_C( 736.05), SIMDE_FLOAT64_C( -71.17), SIMDE_FLOAT64_C( -331.02), SIMDE_FLOAT64_C( 508.70), SIMDE_FLOAT64_C( 419.04), SIMDE_FLOAT64_C( 700.48), SIMDE_FLOAT64_C( 623.94), SIMDE_FLOAT64_C( 606.97), simde_mm512_set_pd(SIMDE_FLOAT64_C( 606.97), SIMDE_FLOAT64_C( 623.94), SIMDE_FLOAT64_C( 700.48), SIMDE_FLOAT64_C( 419.04), SIMDE_FLOAT64_C( 508.70), SIMDE_FLOAT64_C( -331.02), SIMDE_FLOAT64_C( -71.17), SIMDE_FLOAT64_C( 736.05)) }, { SIMDE_FLOAT64_C( -489.77), SIMDE_FLOAT64_C( 777.96), SIMDE_FLOAT64_C( -178.61), SIMDE_FLOAT64_C( -524.51), SIMDE_FLOAT64_C( -840.38), SIMDE_FLOAT64_C( -796.18), SIMDE_FLOAT64_C( -772.79), SIMDE_FLOAT64_C( 621.56), simde_mm512_set_pd(SIMDE_FLOAT64_C( 621.56), SIMDE_FLOAT64_C( -772.79), SIMDE_FLOAT64_C( -796.18), SIMDE_FLOAT64_C( -840.38), SIMDE_FLOAT64_C( -524.51), SIMDE_FLOAT64_C( -178.61), SIMDE_FLOAT64_C( 777.96), SIMDE_FLOAT64_C( -489.77)) }, { SIMDE_FLOAT64_C( -273.76), SIMDE_FLOAT64_C( -100.14), SIMDE_FLOAT64_C( -705.52), SIMDE_FLOAT64_C( -912.29), SIMDE_FLOAT64_C( 694.94), SIMDE_FLOAT64_C( 638.47), SIMDE_FLOAT64_C( -57.86), SIMDE_FLOAT64_C( 194.70), simde_mm512_set_pd(SIMDE_FLOAT64_C( 194.70), SIMDE_FLOAT64_C( -57.86), SIMDE_FLOAT64_C( 638.47), SIMDE_FLOAT64_C( 694.94), SIMDE_FLOAT64_C( -912.29), SIMDE_FLOAT64_C( -705.52), SIMDE_FLOAT64_C( -100.14), SIMDE_FLOAT64_C( -273.76)) }, { SIMDE_FLOAT64_C( 72.92), SIMDE_FLOAT64_C( -114.92), SIMDE_FLOAT64_C( -543.69), SIMDE_FLOAT64_C( -956.64), SIMDE_FLOAT64_C( -736.35), SIMDE_FLOAT64_C( -239.62), SIMDE_FLOAT64_C( 526.64), SIMDE_FLOAT64_C( -912.23), simde_mm512_set_pd(SIMDE_FLOAT64_C( -912.23), SIMDE_FLOAT64_C( 526.64), SIMDE_FLOAT64_C( -239.62), SIMDE_FLOAT64_C( -736.35), SIMDE_FLOAT64_C( -956.64), SIMDE_FLOAT64_C( -543.69), SIMDE_FLOAT64_C( -114.92), SIMDE_FLOAT64_C( 72.92)) }, { SIMDE_FLOAT64_C( 460.51), SIMDE_FLOAT64_C( 395.84), SIMDE_FLOAT64_C( 320.81), SIMDE_FLOAT64_C( -538.91), SIMDE_FLOAT64_C( -261.04), SIMDE_FLOAT64_C( -617.09), SIMDE_FLOAT64_C( 102.81), SIMDE_FLOAT64_C( -450.10), simde_mm512_set_pd(SIMDE_FLOAT64_C( -450.10), SIMDE_FLOAT64_C( 102.81), SIMDE_FLOAT64_C( -617.09), SIMDE_FLOAT64_C( -261.04), SIMDE_FLOAT64_C( -538.91), SIMDE_FLOAT64_C( 320.81), SIMDE_FLOAT64_C( 395.84), SIMDE_FLOAT64_C( 460.51)) }, { SIMDE_FLOAT64_C( -348.77), SIMDE_FLOAT64_C( -294.21), SIMDE_FLOAT64_C( -435.22), SIMDE_FLOAT64_C( 29.39), SIMDE_FLOAT64_C( 461.71), SIMDE_FLOAT64_C( -58.38), SIMDE_FLOAT64_C( -276.76), SIMDE_FLOAT64_C( 4.34), simde_mm512_set_pd(SIMDE_FLOAT64_C( 4.34), SIMDE_FLOAT64_C( -276.76), SIMDE_FLOAT64_C( -58.38), SIMDE_FLOAT64_C( 461.71), SIMDE_FLOAT64_C( 29.39), SIMDE_FLOAT64_C( -435.22), SIMDE_FLOAT64_C( -294.21), SIMDE_FLOAT64_C( -348.77)) }, { SIMDE_FLOAT64_C( -13.27), SIMDE_FLOAT64_C( 86.22), SIMDE_FLOAT64_C( -609.85), SIMDE_FLOAT64_C( -191.12), SIMDE_FLOAT64_C( -989.14), SIMDE_FLOAT64_C( -547.71), SIMDE_FLOAT64_C( -227.52), SIMDE_FLOAT64_C( -15.18), simde_mm512_set_pd(SIMDE_FLOAT64_C( -15.18), SIMDE_FLOAT64_C( -227.52), SIMDE_FLOAT64_C( -547.71), SIMDE_FLOAT64_C( -989.14), SIMDE_FLOAT64_C( -191.12), SIMDE_FLOAT64_C( -609.85), SIMDE_FLOAT64_C( 86.22), SIMDE_FLOAT64_C( -13.27)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_setr_pd(test_vec[i].e7, test_vec[i].e6, test_vec[i].e5, test_vec[i].e4, test_vec[i].e3, test_vec[i].e2, test_vec[i].e1, test_vec[i].e0); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_setr_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_setr_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_setr_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_setr_pd) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/setr4.c000066400000000000000000000514071400333146700164700ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur */ #define SIMDE_TEST_X86_AVX512_INSN setr4 #include #include #include static int test_simde_mm512_setr4_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { int32_t d; int32_t c; int32_t b; int32_t a; simde__m512i r; } test_vec[8] = { { INT32_C( 440568275), INT32_C(-1307171366), INT32_C( -667071334), INT32_C(-1006059139), simde_mm512_set_epi32(INT32_C(-1006059139), INT32_C( -667071334), INT32_C(-1307171366), INT32_C( 440568275), INT32_C(-1006059139), INT32_C( -667071334), INT32_C(-1307171366), INT32_C( 440568275), INT32_C(-1006059139), INT32_C( -667071334), INT32_C(-1307171366), INT32_C( 440568275), INT32_C(-1006059139), INT32_C( -667071334), INT32_C(-1307171366), INT32_C( 440568275)) }, { INT32_C( 985235756), INT32_C(-2117652171), INT32_C( -492848785), INT32_C( 765678538), simde_mm512_set_epi32(INT32_C( 765678538), INT32_C( -492848785), INT32_C(-2117652171), INT32_C( 985235756), INT32_C( 765678538), INT32_C( -492848785), INT32_C(-2117652171), INT32_C( 985235756), INT32_C( 765678538), INT32_C( -492848785), INT32_C(-2117652171), INT32_C( 985235756), INT32_C( 765678538), INT32_C( -492848785), INT32_C(-2117652171), INT32_C( 985235756)) }, { INT32_C( 1812566322), INT32_C( -457041277), INT32_C(-1069434801), INT32_C( -605856203), simde_mm512_set_epi32(INT32_C( -605856203), INT32_C(-1069434801), INT32_C( -457041277), INT32_C( 1812566322), INT32_C( -605856203), INT32_C(-1069434801), INT32_C( -457041277), INT32_C( 1812566322), INT32_C( -605856203), INT32_C(-1069434801), INT32_C( -457041277), INT32_C( 1812566322), INT32_C( -605856203), INT32_C(-1069434801), INT32_C( -457041277), INT32_C( 1812566322)) }, { INT32_C( 1968671665), INT32_C( 838296696), INT32_C( -693015358), INT32_C(-1386069498), simde_mm512_set_epi32(INT32_C(-1386069498), INT32_C( -693015358), INT32_C( 838296696), INT32_C( 1968671665), INT32_C(-1386069498), INT32_C( -693015358), INT32_C( 838296696), INT32_C( 1968671665), INT32_C(-1386069498), INT32_C( -693015358), INT32_C( 838296696), INT32_C( 1968671665), INT32_C(-1386069498), INT32_C( -693015358), INT32_C( 838296696), INT32_C( 1968671665)) }, { INT32_C( 717585874), INT32_C( -870190090), INT32_C( 62628055), INT32_C(-1058408989), simde_mm512_set_epi32(INT32_C(-1058408989), INT32_C( 62628055), INT32_C( -870190090), INT32_C( 717585874), INT32_C(-1058408989), INT32_C( 62628055), INT32_C( -870190090), INT32_C( 717585874), INT32_C(-1058408989), INT32_C( 62628055), INT32_C( -870190090), INT32_C( 717585874), INT32_C(-1058408989), INT32_C( 62628055), INT32_C( -870190090), INT32_C( 717585874)) }, { INT32_C( -646678116), INT32_C( -636471021), INT32_C( 2050242002), INT32_C( 1467573389), simde_mm512_set_epi32(INT32_C( 1467573389), INT32_C( 2050242002), INT32_C( -636471021), INT32_C( -646678116), INT32_C( 1467573389), INT32_C( 2050242002), INT32_C( -636471021), INT32_C( -646678116), INT32_C( 1467573389), INT32_C( 2050242002), INT32_C( -636471021), INT32_C( -646678116), INT32_C( 1467573389), INT32_C( 2050242002), INT32_C( -636471021), INT32_C( -646678116)) }, { INT32_C( -468604998), INT32_C( 416458537), INT32_C(-1356493538), INT32_C( -338084785), simde_mm512_set_epi32(INT32_C( -338084785), INT32_C(-1356493538), INT32_C( 416458537), INT32_C( -468604998), INT32_C( -338084785), INT32_C(-1356493538), INT32_C( 416458537), INT32_C( -468604998), INT32_C( -338084785), INT32_C(-1356493538), INT32_C( 416458537), INT32_C( -468604998), INT32_C( -338084785), INT32_C(-1356493538), INT32_C( 416458537), INT32_C( -468604998)) }, { INT32_C( 1519812884), INT32_C( 743581731), INT32_C(-1035717687), INT32_C( -38963525), simde_mm512_set_epi32(INT32_C( -38963525), INT32_C(-1035717687), INT32_C( 743581731), INT32_C( 1519812884), INT32_C( -38963525), INT32_C(-1035717687), INT32_C( 743581731), INT32_C( 1519812884), INT32_C( -38963525), INT32_C(-1035717687), INT32_C( 743581731), INT32_C( 1519812884), INT32_C( -38963525), INT32_C(-1035717687), INT32_C( 743581731), INT32_C( 1519812884)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_setr4_epi32( test_vec[i].d, test_vec[i].c, test_vec[i].b, test_vec[i].a); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_setr4_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { int64_t d; int64_t c; int64_t b; int64_t a; simde__m512i r; } test_vec[8] = { { INT64_C( 6563849718269597141), INT64_C(-6183679436467555899), INT64_C( -626758305238464386), INT64_C( 8994159492887548356), simde_mm512_set_epi64(INT64_C( 8994159492887548356), INT64_C( -626758305238464386), INT64_C(-6183679436467555899), INT64_C( 6563849718269597141), INT64_C( 8994159492887548356), INT64_C( -626758305238464386), INT64_C(-6183679436467555899), INT64_C( 6563849718269597141)) }, { INT64_C( 6729148419260484779), INT64_C( -277511807975612216), INT64_C(-8071294684814160544), INT64_C( 9097495128638227239), simde_mm512_set_epi64(INT64_C( 9097495128638227239), INT64_C(-8071294684814160544), INT64_C( -277511807975612216), INT64_C( 6729148419260484779), INT64_C( 9097495128638227239), INT64_C(-8071294684814160544), INT64_C( -277511807975612216), INT64_C( 6729148419260484779)) }, { INT64_C( 2451446111308764542), INT64_C( 7443262200234995807), INT64_C( 1452118817457897022), INT64_C( 8577124855339817739), simde_mm512_set_epi64(INT64_C( 8577124855339817739), INT64_C( 1452118817457897022), INT64_C( 7443262200234995807), INT64_C( 2451446111308764542), INT64_C( 8577124855339817739), INT64_C( 1452118817457897022), INT64_C( 7443262200234995807), INT64_C( 2451446111308764542)) }, { INT64_C( 5794476453905478874), INT64_C(-1405809235656433875), INT64_C(-9152840578969258696), INT64_C( 8562326329950659697), simde_mm512_set_epi64(INT64_C( 8562326329950659697), INT64_C(-9152840578969258696), INT64_C(-1405809235656433875), INT64_C( 5794476453905478874), INT64_C( 8562326329950659697), INT64_C(-9152840578969258696), INT64_C(-1405809235656433875), INT64_C( 5794476453905478874)) }, { INT64_C(-8764167661207563767), INT64_C( -157881503650322899), INT64_C(-4202918664443291804), INT64_C( 2806446076990238010), simde_mm512_set_epi64(INT64_C( 2806446076990238010), INT64_C(-4202918664443291804), INT64_C( -157881503650322899), INT64_C(-8764167661207563767), INT64_C( 2806446076990238010), INT64_C(-4202918664443291804), INT64_C( -157881503650322899), INT64_C(-8764167661207563767)) }, { INT64_C(-5837281652074857748), INT64_C(-7080037588592146058), INT64_C(-4482514275105483583), INT64_C( 7870122127635681284), simde_mm512_set_epi64(INT64_C( 7870122127635681284), INT64_C(-4482514275105483583), INT64_C(-7080037588592146058), INT64_C(-5837281652074857748), INT64_C( 7870122127635681284), INT64_C(-4482514275105483583), INT64_C(-7080037588592146058), INT64_C(-5837281652074857748)) }, { INT64_C(-2741649954653767454), INT64_C( 7022257894354348987), INT64_C(-7607333645615092101), INT64_C( 3821399499306603551), simde_mm512_set_epi64(INT64_C( 3821399499306603551), INT64_C(-7607333645615092101), INT64_C( 7022257894354348987), INT64_C(-2741649954653767454), INT64_C( 3821399499306603551), INT64_C(-7607333645615092101), INT64_C( 7022257894354348987), INT64_C(-2741649954653767454)) }, { INT64_C( 6134432460743068033), INT64_C( 1716871541978724160), INT64_C(-7436535278984624040), INT64_C( 1873233539406121615), simde_mm512_set_epi64(INT64_C( 1873233539406121615), INT64_C(-7436535278984624040), INT64_C( 1716871541978724160), INT64_C( 6134432460743068033), INT64_C( 1873233539406121615), INT64_C(-7436535278984624040), INT64_C( 1716871541978724160), INT64_C( 6134432460743068033)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_setr4_epi64( test_vec[i].d, test_vec[i].c, test_vec[i].b, test_vec[i].a); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_setr4_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde_float32 d; simde_float32 c; simde_float32 b; simde_float32 a; simde__m512 r; } test_vec[8] = { { SIMDE_FLOAT32_C( -92.68), SIMDE_FLOAT32_C( 845.12), SIMDE_FLOAT32_C( -953.73), SIMDE_FLOAT32_C( 237.00), simde_mm512_set_ps(SIMDE_FLOAT32_C( 237.00), SIMDE_FLOAT32_C( -953.73), SIMDE_FLOAT32_C( 845.12), SIMDE_FLOAT32_C( -92.68), SIMDE_FLOAT32_C( 237.00), SIMDE_FLOAT32_C( -953.73), SIMDE_FLOAT32_C( 845.12), SIMDE_FLOAT32_C( -92.68), SIMDE_FLOAT32_C( 237.00), SIMDE_FLOAT32_C( -953.73), SIMDE_FLOAT32_C( 845.12), SIMDE_FLOAT32_C( -92.68), SIMDE_FLOAT32_C( 237.00), SIMDE_FLOAT32_C( -953.73), SIMDE_FLOAT32_C( 845.12), SIMDE_FLOAT32_C( -92.68)) }, { SIMDE_FLOAT32_C( -555.84), SIMDE_FLOAT32_C( -722.05), SIMDE_FLOAT32_C( -788.55), SIMDE_FLOAT32_C( 545.68), simde_mm512_set_ps(SIMDE_FLOAT32_C( 545.68), SIMDE_FLOAT32_C( -788.55), SIMDE_FLOAT32_C( -722.05), SIMDE_FLOAT32_C( -555.84), SIMDE_FLOAT32_C( 545.68), SIMDE_FLOAT32_C( -788.55), SIMDE_FLOAT32_C( -722.05), SIMDE_FLOAT32_C( -555.84), SIMDE_FLOAT32_C( 545.68), SIMDE_FLOAT32_C( -788.55), SIMDE_FLOAT32_C( -722.05), SIMDE_FLOAT32_C( -555.84), SIMDE_FLOAT32_C( 545.68), SIMDE_FLOAT32_C( -788.55), SIMDE_FLOAT32_C( -722.05), SIMDE_FLOAT32_C( -555.84)) }, { SIMDE_FLOAT32_C( 823.18), SIMDE_FLOAT32_C( -207.95), SIMDE_FLOAT32_C( -413.77), SIMDE_FLOAT32_C( 808.21), simde_mm512_set_ps(SIMDE_FLOAT32_C( 808.21), SIMDE_FLOAT32_C( -413.77), SIMDE_FLOAT32_C( -207.95), SIMDE_FLOAT32_C( 823.18), SIMDE_FLOAT32_C( 808.21), SIMDE_FLOAT32_C( -413.77), SIMDE_FLOAT32_C( -207.95), SIMDE_FLOAT32_C( 823.18), SIMDE_FLOAT32_C( 808.21), SIMDE_FLOAT32_C( -413.77), SIMDE_FLOAT32_C( -207.95), SIMDE_FLOAT32_C( 823.18), SIMDE_FLOAT32_C( 808.21), SIMDE_FLOAT32_C( -413.77), SIMDE_FLOAT32_C( -207.95), SIMDE_FLOAT32_C( 823.18)) }, { SIMDE_FLOAT32_C( -179.14), SIMDE_FLOAT32_C( 28.27), SIMDE_FLOAT32_C( -190.88), SIMDE_FLOAT32_C( -337.32), simde_mm512_set_ps(SIMDE_FLOAT32_C( -337.32), SIMDE_FLOAT32_C( -190.88), SIMDE_FLOAT32_C( 28.27), SIMDE_FLOAT32_C( -179.14), SIMDE_FLOAT32_C( -337.32), SIMDE_FLOAT32_C( -190.88), SIMDE_FLOAT32_C( 28.27), SIMDE_FLOAT32_C( -179.14), SIMDE_FLOAT32_C( -337.32), SIMDE_FLOAT32_C( -190.88), SIMDE_FLOAT32_C( 28.27), SIMDE_FLOAT32_C( -179.14), SIMDE_FLOAT32_C( -337.32), SIMDE_FLOAT32_C( -190.88), SIMDE_FLOAT32_C( 28.27), SIMDE_FLOAT32_C( -179.14)) }, { SIMDE_FLOAT32_C( -691.46), SIMDE_FLOAT32_C( -801.82), SIMDE_FLOAT32_C( -579.89), SIMDE_FLOAT32_C( -420.42), simde_mm512_set_ps(SIMDE_FLOAT32_C( -420.42), SIMDE_FLOAT32_C( -579.89), SIMDE_FLOAT32_C( -801.82), SIMDE_FLOAT32_C( -691.46), SIMDE_FLOAT32_C( -420.42), SIMDE_FLOAT32_C( -579.89), SIMDE_FLOAT32_C( -801.82), SIMDE_FLOAT32_C( -691.46), SIMDE_FLOAT32_C( -420.42), SIMDE_FLOAT32_C( -579.89), SIMDE_FLOAT32_C( -801.82), SIMDE_FLOAT32_C( -691.46), SIMDE_FLOAT32_C( -420.42), SIMDE_FLOAT32_C( -579.89), SIMDE_FLOAT32_C( -801.82), SIMDE_FLOAT32_C( -691.46)) }, { SIMDE_FLOAT32_C( 490.22), SIMDE_FLOAT32_C( 560.02), SIMDE_FLOAT32_C( -244.24), SIMDE_FLOAT32_C( 184.70), simde_mm512_set_ps(SIMDE_FLOAT32_C( 184.70), SIMDE_FLOAT32_C( -244.24), SIMDE_FLOAT32_C( 560.02), SIMDE_FLOAT32_C( 490.22), SIMDE_FLOAT32_C( 184.70), SIMDE_FLOAT32_C( -244.24), SIMDE_FLOAT32_C( 560.02), SIMDE_FLOAT32_C( 490.22), SIMDE_FLOAT32_C( 184.70), SIMDE_FLOAT32_C( -244.24), SIMDE_FLOAT32_C( 560.02), SIMDE_FLOAT32_C( 490.22), SIMDE_FLOAT32_C( 184.70), SIMDE_FLOAT32_C( -244.24), SIMDE_FLOAT32_C( 560.02), SIMDE_FLOAT32_C( 490.22)) }, { SIMDE_FLOAT32_C( 353.38), SIMDE_FLOAT32_C( 199.20), SIMDE_FLOAT32_C( 132.74), SIMDE_FLOAT32_C( 599.57), simde_mm512_set_ps(SIMDE_FLOAT32_C( 599.57), SIMDE_FLOAT32_C( 132.74), SIMDE_FLOAT32_C( 199.20), SIMDE_FLOAT32_C( 353.38), SIMDE_FLOAT32_C( 599.57), SIMDE_FLOAT32_C( 132.74), SIMDE_FLOAT32_C( 199.20), SIMDE_FLOAT32_C( 353.38), SIMDE_FLOAT32_C( 599.57), SIMDE_FLOAT32_C( 132.74), SIMDE_FLOAT32_C( 199.20), SIMDE_FLOAT32_C( 353.38), SIMDE_FLOAT32_C( 599.57), SIMDE_FLOAT32_C( 132.74), SIMDE_FLOAT32_C( 199.20), SIMDE_FLOAT32_C( 353.38)) }, { SIMDE_FLOAT32_C( -109.85), SIMDE_FLOAT32_C( 62.56), SIMDE_FLOAT32_C( 250.77), SIMDE_FLOAT32_C( -873.95), simde_mm512_set_ps(SIMDE_FLOAT32_C( -873.95), SIMDE_FLOAT32_C( 250.77), SIMDE_FLOAT32_C( 62.56), SIMDE_FLOAT32_C( -109.85), SIMDE_FLOAT32_C( -873.95), SIMDE_FLOAT32_C( 250.77), SIMDE_FLOAT32_C( 62.56), SIMDE_FLOAT32_C( -109.85), SIMDE_FLOAT32_C( -873.95), SIMDE_FLOAT32_C( 250.77), SIMDE_FLOAT32_C( 62.56), SIMDE_FLOAT32_C( -109.85), SIMDE_FLOAT32_C( -873.95), SIMDE_FLOAT32_C( 250.77), SIMDE_FLOAT32_C( 62.56), SIMDE_FLOAT32_C( -109.85)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_setr4_ps( test_vec[i].d, test_vec[i].c, test_vec[i].b, test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_setr4_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde_float64 d; simde_float64 c; simde_float64 b; simde_float64 a; simde__m512d r; } test_vec[8] = { { SIMDE_FLOAT64_C( -159.85), SIMDE_FLOAT64_C( 360.42), SIMDE_FLOAT64_C( -560.02), SIMDE_FLOAT64_C( -340.11), simde_mm512_set_pd(SIMDE_FLOAT64_C( -340.11), SIMDE_FLOAT64_C( -560.02), SIMDE_FLOAT64_C( 360.42), SIMDE_FLOAT64_C( -159.85), SIMDE_FLOAT64_C( -340.11), SIMDE_FLOAT64_C( -560.02), SIMDE_FLOAT64_C( 360.42), SIMDE_FLOAT64_C( -159.85)) }, { SIMDE_FLOAT64_C( 76.83), SIMDE_FLOAT64_C( -871.20), SIMDE_FLOAT64_C( 277.42), SIMDE_FLOAT64_C( 632.86), simde_mm512_set_pd(SIMDE_FLOAT64_C( 632.86), SIMDE_FLOAT64_C( 277.42), SIMDE_FLOAT64_C( -871.20), SIMDE_FLOAT64_C( 76.83), SIMDE_FLOAT64_C( 632.86), SIMDE_FLOAT64_C( 277.42), SIMDE_FLOAT64_C( -871.20), SIMDE_FLOAT64_C( 76.83)) }, { SIMDE_FLOAT64_C( 908.32), SIMDE_FLOAT64_C( -754.84), SIMDE_FLOAT64_C( -232.66), SIMDE_FLOAT64_C( 453.94), simde_mm512_set_pd(SIMDE_FLOAT64_C( 453.94), SIMDE_FLOAT64_C( -232.66), SIMDE_FLOAT64_C( -754.84), SIMDE_FLOAT64_C( 908.32), SIMDE_FLOAT64_C( 453.94), SIMDE_FLOAT64_C( -232.66), SIMDE_FLOAT64_C( -754.84), SIMDE_FLOAT64_C( 908.32)) }, { SIMDE_FLOAT64_C( 389.27), SIMDE_FLOAT64_C( 400.56), SIMDE_FLOAT64_C( 223.12), SIMDE_FLOAT64_C( -299.15), simde_mm512_set_pd(SIMDE_FLOAT64_C( -299.15), SIMDE_FLOAT64_C( 223.12), SIMDE_FLOAT64_C( 400.56), SIMDE_FLOAT64_C( 389.27), SIMDE_FLOAT64_C( -299.15), SIMDE_FLOAT64_C( 223.12), SIMDE_FLOAT64_C( 400.56), SIMDE_FLOAT64_C( 389.27)) }, { SIMDE_FLOAT64_C( 642.96), SIMDE_FLOAT64_C( 603.97), SIMDE_FLOAT64_C( -782.74), SIMDE_FLOAT64_C( 593.11), simde_mm512_set_pd(SIMDE_FLOAT64_C( 593.11), SIMDE_FLOAT64_C( -782.74), SIMDE_FLOAT64_C( 603.97), SIMDE_FLOAT64_C( 642.96), SIMDE_FLOAT64_C( 593.11), SIMDE_FLOAT64_C( -782.74), SIMDE_FLOAT64_C( 603.97), SIMDE_FLOAT64_C( 642.96)) }, { SIMDE_FLOAT64_C( 918.13), SIMDE_FLOAT64_C( 886.70), SIMDE_FLOAT64_C( 337.10), SIMDE_FLOAT64_C( -359.87), simde_mm512_set_pd(SIMDE_FLOAT64_C( -359.87), SIMDE_FLOAT64_C( 337.10), SIMDE_FLOAT64_C( 886.70), SIMDE_FLOAT64_C( 918.13), SIMDE_FLOAT64_C( -359.87), SIMDE_FLOAT64_C( 337.10), SIMDE_FLOAT64_C( 886.70), SIMDE_FLOAT64_C( 918.13)) }, { SIMDE_FLOAT64_C( 794.16), SIMDE_FLOAT64_C( -191.83), SIMDE_FLOAT64_C( -298.69), SIMDE_FLOAT64_C( 612.50), simde_mm512_set_pd(SIMDE_FLOAT64_C( 612.50), SIMDE_FLOAT64_C( -298.69), SIMDE_FLOAT64_C( -191.83), SIMDE_FLOAT64_C( 794.16), SIMDE_FLOAT64_C( 612.50), SIMDE_FLOAT64_C( -298.69), SIMDE_FLOAT64_C( -191.83), SIMDE_FLOAT64_C( 794.16)) }, { SIMDE_FLOAT64_C( 850.90), SIMDE_FLOAT64_C( -669.22), SIMDE_FLOAT64_C( -90.20), SIMDE_FLOAT64_C( 431.18), simde_mm512_set_pd(SIMDE_FLOAT64_C( 431.18), SIMDE_FLOAT64_C( -90.20), SIMDE_FLOAT64_C( -669.22), SIMDE_FLOAT64_C( 850.90), SIMDE_FLOAT64_C( 431.18), SIMDE_FLOAT64_C( -90.20), SIMDE_FLOAT64_C( -669.22), SIMDE_FLOAT64_C( 850.90)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_setr4_pd(test_vec[i].d, test_vec[i].c, test_vec[i].b, test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_setr4_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_setr4_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_setr4_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_setr4_pd ) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/setzero.c000066400000000000000000000041571400333146700171220ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #define SIMDE_TEST_X86_AVX512_INSN setzero #include #include #include #include static int test_simde_mm512_setzero_si512(SIMDE_MUNIT_TEST_ARGS) { simde_assert_m512i_i32(simde_mm512_setzero_si512(), ==, simde_mm512_set1_epi32(INT32_C(0))); return 0; } static int test_simde_mm512_setzero_ps(SIMDE_MUNIT_TEST_ARGS) { simde_assert_m512_close(simde_mm512_setzero_ps(), simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), 1); return 0; } static int test_simde_mm512_setzero_pd(SIMDE_MUNIT_TEST_ARGS) { simde_assert_m512d_close(simde_mm512_setzero_pd(), simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), 1); return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_setzero_si512) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_setzero_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_setzero_pd) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/shuffle.c000066400000000000000000011757171400333146700170770ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore */ #define SIMDE_TEST_X86_AVX512_INSN shuffle #include #include #include static int test_simde_mm512_shuffle_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi8(INT8_C( 56), INT8_C( -94), INT8_C( -41), INT8_C( -59), INT8_C( 40), INT8_C( 78), INT8_C( 93), INT8_C( 107), INT8_C( -47), INT8_C( 66), INT8_C( -8), INT8_C( -52), INT8_C( 50), INT8_C( 78), INT8_C( 13), INT8_C( 26), INT8_C( -33), INT8_C( 120), INT8_C( 67), INT8_C( -12), INT8_C( 80), INT8_C( 88), INT8_C( -65), INT8_C( 49), INT8_C( 28), INT8_C( 96), INT8_C( -99), INT8_C(-119), INT8_C( 126), INT8_C( 24), INT8_C( 35), INT8_C( 11), INT8_C( -43), INT8_C( -24), INT8_C( 1), INT8_C( -61), INT8_C( 101), INT8_C( 5), INT8_C( 67), INT8_C( -42), INT8_C( -46), INT8_C(-115), INT8_C(-105), INT8_C( -92), INT8_C( -3), INT8_C( -44), INT8_C( 13), INT8_C( -36), INT8_C( 110), INT8_C( -38), INT8_C( -48), INT8_C( 36), INT8_C( 117), INT8_C( -59), INT8_C( 109), INT8_C( -27), INT8_C( -62), INT8_C( 98), INT8_C(-121), INT8_C(-108), INT8_C( -7), INT8_C(-112), INT8_C( -52), INT8_C( -84)), simde_mm512_set_epi8(INT8_C( 34), INT8_C( 34), INT8_C( -71), INT8_C( -63), INT8_C( -34), INT8_C(-128), INT8_C( 113), INT8_C( -72), INT8_C( 86), INT8_C( -73), INT8_C( -43), INT8_C( 95), INT8_C( 3), INT8_C( 13), INT8_C( 19), INT8_C( -1), INT8_C( -54), INT8_C( 114), INT8_C( 27), INT8_C( -30), INT8_C( 52), INT8_C( -54), INT8_C( 78), INT8_C( 23), INT8_C( 71), INT8_C( 87), INT8_C( 107), INT8_C( -27), INT8_C( -35), INT8_C(-122), INT8_C( 40), INT8_C( 55), INT8_C( -2), INT8_C( 40), INT8_C( -63), INT8_C( 85), INT8_C( -25), INT8_C( -93), INT8_C( 25), INT8_C( 70), INT8_C( 117), INT8_C( 66), INT8_C( -79), INT8_C( 57), INT8_C(-115), INT8_C( -27), INT8_C( 59), INT8_C( 25), INT8_C( -6), INT8_C( 42), INT8_C( -27), INT8_C( -34), INT8_C( -16), INT8_C( 5), INT8_C( -55), INT8_C( 74), INT8_C( -29), INT8_C( 77), INT8_C( -17), INT8_C( 16), INT8_C( -98), INT8_C( -38), INT8_C( 116), INT8_C( -56)), simde_mm512_set_epi8(INT8_C( 78), INT8_C( 78), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 13), INT8_C( 0), INT8_C( 66), INT8_C( 0), INT8_C( 0), INT8_C( 56), INT8_C( 50), INT8_C( -41), INT8_C( 50), INT8_C( 0), INT8_C( 0), INT8_C( 24), INT8_C( 80), INT8_C( 0), INT8_C(-119), INT8_C( 0), INT8_C( 120), INT8_C( 28), INT8_C( 28), INT8_C( 28), INT8_C( 80), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 49), INT8_C( 28), INT8_C( 0), INT8_C( -42), INT8_C( 0), INT8_C(-105), INT8_C( 0), INT8_C( 0), INT8_C( 67), INT8_C(-115), INT8_C(-105), INT8_C( -44), INT8_C( 0), INT8_C( 67), INT8_C( 0), INT8_C( 0), INT8_C( 101), INT8_C( 67), INT8_C( 0), INT8_C( -59), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-121), INT8_C( 0), INT8_C( -59), INT8_C( 0), INT8_C( -48), INT8_C( 0), INT8_C( -84), INT8_C( 0), INT8_C( 0), INT8_C(-108), INT8_C( 0)) }, { simde_mm512_set_epi8(INT8_C( -75), INT8_C(-123), INT8_C( -44), INT8_C( -97), INT8_C( 61), INT8_C(-112), INT8_C( 109), INT8_C( 27), INT8_C( -9), INT8_C( -31), INT8_C(-111), INT8_C( -59), INT8_C( 94), INT8_C( 110), INT8_C( 31), INT8_C( 50), INT8_C( -86), INT8_C( 24), INT8_C( 91), INT8_C(-105), INT8_C( 39), INT8_C( 25), INT8_C( -65), INT8_C( 69), INT8_C( -59), INT8_C( -57), INT8_C( -98), INT8_C( 26), INT8_C( -35), INT8_C( -2), INT8_C( 114), INT8_C( -51), INT8_C( -16), INT8_C( 101), INT8_C( -92), INT8_C( 65), INT8_C( -88), INT8_C( 19), INT8_C(-102), INT8_C( -51), INT8_C( -94), INT8_C( -30), INT8_C( 60), INT8_C( -9), INT8_C( 4), INT8_C( -93), INT8_C( 110), INT8_C(-102), INT8_C( 109), INT8_C( 1), INT8_C( 113), INT8_C( 114), INT8_C( 100), INT8_C( 80), INT8_C( 18), INT8_C( -7), INT8_C( -87), INT8_C( 0), INT8_C( -55), INT8_C( 37), INT8_C( 13), INT8_C( 86), INT8_C(-114), INT8_C(-125)), simde_mm512_set_epi8(INT8_C( 18), INT8_C( 11), INT8_C( 73), INT8_C( 57), INT8_C( -54), INT8_C( 9), INT8_C( 69), INT8_C( -45), INT8_C( 71), INT8_C( -6), INT8_C( -68), INT8_C( -8), INT8_C( 14), INT8_C( -27), INT8_C( 84), INT8_C( -90), INT8_C( -10), INT8_C( -81), INT8_C( 77), INT8_C(-113), INT8_C( 32), INT8_C( 17), INT8_C( 62), INT8_C( -18), INT8_C( 124), INT8_C( -42), INT8_C( -52), INT8_C(-109), INT8_C( -19), INT8_C( 30), INT8_C( -51), INT8_C(-111), INT8_C( 100), INT8_C( 63), INT8_C( -80), INT8_C(-126), INT8_C( 3), INT8_C( 119), INT8_C( 38), INT8_C( -44), INT8_C( -48), INT8_C( -43), INT8_C( 6), INT8_C(-113), INT8_C( -5), INT8_C( -18), INT8_C( 58), INT8_C( 77), INT8_C( -49), INT8_C(-107), INT8_C( -77), INT8_C( 119), INT8_C( -6), INT8_C( 92), INT8_C(-122), INT8_C( 43), INT8_C( 85), INT8_C(-111), INT8_C( -15), INT8_C( 90), INT8_C(-104), INT8_C( 31), INT8_C( -4), INT8_C( 57)), simde_mm512_set_epi8(INT8_C( 110), INT8_C( 61), INT8_C( 109), INT8_C( 109), INT8_C( 0), INT8_C( 109), INT8_C(-111), INT8_C( 0), INT8_C( -9), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-123), INT8_C( 0), INT8_C( -59), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 91), INT8_C( 0), INT8_C( -51), INT8_C( 114), INT8_C( 24), INT8_C( 0), INT8_C(-105), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 24), INT8_C( 0), INT8_C( 0), INT8_C( -9), INT8_C( -16), INT8_C( 0), INT8_C( 0), INT8_C( 4), INT8_C( -94), INT8_C( -30), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -30), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 19), INT8_C( -92), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -87), INT8_C( 0), INT8_C( 114), INT8_C( 0), INT8_C( 100), INT8_C( -55), INT8_C( 0), INT8_C( 0), INT8_C( 80), INT8_C( 0), INT8_C( 109), INT8_C( 0), INT8_C( 18)) }, { simde_mm512_set_epi8(INT8_C( -54), INT8_C( 53), INT8_C( 35), INT8_C( 51), INT8_C( 24), INT8_C( 100), INT8_C( 80), INT8_C( 7), INT8_C( 54), INT8_C( 114), INT8_C( -42), INT8_C( -2), INT8_C( 18), INT8_C( 100), INT8_C( 26), INT8_C( -68), INT8_C( 82), INT8_C( -30), INT8_C( -4), INT8_C( 110), INT8_C( 24), INT8_C( 88), INT8_C(-100), INT8_C(-120), INT8_C( -17), INT8_C(-109), INT8_C( 92), INT8_C(-105), INT8_C( -20), INT8_C( 34), INT8_C( 98), INT8_C( -17), INT8_C( 2), INT8_C( 6), INT8_C( 37), INT8_C( -85), INT8_C( 118), INT8_C( 28), INT8_C( 34), INT8_C( 117), INT8_C( 77), INT8_C( -32), INT8_C( 25), INT8_C( -74), INT8_C( 93), INT8_C(-108), INT8_C( 3), INT8_C( -90), INT8_C( 13), INT8_C( -29), INT8_C( -88), INT8_C( 116), INT8_C( 14), INT8_C( -29), INT8_C( 30), INT8_C( -26), INT8_C(-115), INT8_C( -36), INT8_C( 94), INT8_C(-125), INT8_C( 118), INT8_C( 125), INT8_C( -91), INT8_C( 31)), simde_mm512_set_epi8(INT8_C( -74), INT8_C( -13), INT8_C( -87), INT8_C( -10), INT8_C( 21), INT8_C( 49), INT8_C( -64), INT8_C( 52), INT8_C( -97), INT8_C(-103), INT8_C(-127), INT8_C( 53), INT8_C( 99), INT8_C( 80), INT8_C( 55), INT8_C( 111), INT8_C( 61), INT8_C( -93), INT8_C( -87), INT8_C( 81), INT8_C( 108), INT8_C( 89), INT8_C( 63), INT8_C(-109), INT8_C( -47), INT8_C( -32), INT8_C( 105), INT8_C( 91), INT8_C( 88), INT8_C( 85), INT8_C( -96), INT8_C( 88), INT8_C( -85), INT8_C( 3), INT8_C( 124), INT8_C( -33), INT8_C( -21), INT8_C( -7), INT8_C( -31), INT8_C(-126), INT8_C( 3), INT8_C( -17), INT8_C( 40), INT8_C( 5), INT8_C( 126), INT8_C( -60), INT8_C( -91), INT8_C(-112), INT8_C( 88), INT8_C( 11), INT8_C( 100), INT8_C( 114), INT8_C( 112), INT8_C( -53), INT8_C( 89), INT8_C( 78), INT8_C( 115), INT8_C( 117), INT8_C( -33), INT8_C( 14), INT8_C( -42), INT8_C( -15), INT8_C( 80), INT8_C( -46)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -42), INT8_C( 26), INT8_C( 0), INT8_C( -2), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -42), INT8_C( 18), INT8_C( -68), INT8_C( 54), INT8_C( -54), INT8_C( -4), INT8_C( 0), INT8_C( 0), INT8_C( 98), INT8_C( 110), INT8_C(-100), INT8_C( 82), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-100), INT8_C( 24), INT8_C(-120), INT8_C( 92), INT8_C( 0), INT8_C(-120), INT8_C( 0), INT8_C( 93), INT8_C( -85), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 93), INT8_C( 0), INT8_C( 117), INT8_C( 25), INT8_C( 6), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -26), INT8_C( 14), INT8_C(-125), INT8_C( 125), INT8_C( 31), INT8_C( 0), INT8_C( 30), INT8_C( -29), INT8_C( 118), INT8_C( 94), INT8_C( 0), INT8_C( -29), INT8_C( 0), INT8_C( 0), INT8_C( 31), INT8_C( 0)) }, { simde_mm512_set_epi8(INT8_C(-109), INT8_C( 14), INT8_C( -91), INT8_C( -44), INT8_C( 79), INT8_C( -4), INT8_C( 88), INT8_C( 49), INT8_C( -95), INT8_C( -92), INT8_C(-116), INT8_C( 8), INT8_C( 25), INT8_C( 30), INT8_C( -55), INT8_C(-120), INT8_C( -58), INT8_C( -84), INT8_C( 99), INT8_C( -92), INT8_C( -29), INT8_C( -47), INT8_C( 42), INT8_C( -56), INT8_C( 109), INT8_C( -71), INT8_C( -73), INT8_C( -30), INT8_C( -15), INT8_C(-106), INT8_C( -91), INT8_C( -57), INT8_C( 51), INT8_C( -8), INT8_C( 55), INT8_C( 36), INT8_C( -55), INT8_C(-103), INT8_C( 51), INT8_C( -28), INT8_C( 70), INT8_C( 114), INT8_C( -3), INT8_C( 49), INT8_C( 23), INT8_C( -28), INT8_C( -87), INT8_C( 57), INT8_C( 28), INT8_C( -12), INT8_C( -51), INT8_C( 37), INT8_C( 29), INT8_C( -58), INT8_C( -69), INT8_C( 30), INT8_C( 43), INT8_C( -65), INT8_C( -2), INT8_C( -83), INT8_C( -27), INT8_C(-122), INT8_C( 77), INT8_C( 47)), simde_mm512_set_epi8(INT8_C(-116), INT8_C(-127), INT8_C( -95), INT8_C( -97), INT8_C( -78), INT8_C( -53), INT8_C( -36), INT8_C( 62), INT8_C( 63), INT8_C( 28), INT8_C( -37), INT8_C( 40), INT8_C( -14), INT8_C( 2), INT8_C( -65), INT8_C( -67), INT8_C( 79), INT8_C( 51), INT8_C( -51), INT8_C( -64), INT8_C( -40), INT8_C(-102), INT8_C(-107), INT8_C( 82), INT8_C( 20), INT8_C( 54), INT8_C( 66), INT8_C( 7), INT8_C( 33), INT8_C( 108), INT8_C( -88), INT8_C( 69), INT8_C( 98), INT8_C( 43), INT8_C( 25), INT8_C( 58), INT8_C( 117), INT8_C( -34), INT8_C( -70), INT8_C( 3), INT8_C( 1), INT8_C( 100), INT8_C( 3), INT8_C( 14), INT8_C( 117), INT8_C( -91), INT8_C( -89), INT8_C( 66), INT8_C( 32), INT8_C( -35), INT8_C( 104), INT8_C( 65), INT8_C(-100), INT8_C( 27), INT8_C( 42), INT8_C( 91), INT8_C( -47), INT8_C( 39), INT8_C( -65), INT8_C(-124), INT8_C( -84), INT8_C( -95), INT8_C( -46), INT8_C(-113)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 14), INT8_C(-109), INT8_C( -44), INT8_C( 0), INT8_C( 49), INT8_C( 0), INT8_C( 30), INT8_C( 0), INT8_C( 0), INT8_C( -58), INT8_C( -15), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-106), INT8_C( -30), INT8_C( -71), INT8_C(-106), INT8_C( 109), INT8_C( -91), INT8_C( -92), INT8_C( 0), INT8_C( -73), INT8_C( -28), INT8_C( -55), INT8_C( 51), INT8_C(-103), INT8_C( -3), INT8_C( 0), INT8_C( 0), INT8_C( 23), INT8_C( -87), INT8_C( 49), INT8_C( 23), INT8_C( -8), INT8_C( -3), INT8_C( 0), INT8_C( 0), INT8_C( -28), INT8_C( 47), INT8_C( 0), INT8_C( 30), INT8_C( 77), INT8_C( 0), INT8_C( 29), INT8_C( -58), INT8_C( 29), INT8_C( 0), INT8_C( 43), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { simde_mm512_set_epi8(INT8_C(-119), INT8_C( 55), INT8_C( -13), INT8_C(-118), INT8_C( 5), INT8_C( 14), INT8_C(-103), INT8_C( 58), INT8_C( 109), INT8_C(-113), INT8_C( -44), INT8_C( 126), INT8_C( 70), INT8_C(-126), INT8_C( 24), INT8_C(-117), INT8_C( 80), INT8_C( 25), INT8_C( 64), INT8_C( -89), INT8_C( 60), INT8_C( 5), INT8_C(-119), INT8_C( -29), INT8_C( -95), INT8_C( -26), INT8_C( 25), INT8_C( -65), INT8_C( -81), INT8_C( -84), INT8_C( 46), INT8_C( 62), INT8_C( -25), INT8_C(-116), INT8_C(-126), INT8_C(-103), INT8_C( 126), INT8_C( -39), INT8_C( 111), INT8_C( -10), INT8_C( -48), INT8_C( 122), INT8_C( -86), INT8_C( -25), INT8_C( 35), INT8_C( -4), INT8_C( -7), INT8_C( 76), INT8_C(-116), INT8_C( 1), INT8_C( 126), INT8_C( -30), INT8_C( -11), INT8_C( 82), INT8_C( 81), INT8_C( 27), INT8_C( -15), INT8_C( -14), INT8_C( 56), INT8_C( 60), INT8_C( -29), INT8_C( -65), INT8_C( -35), INT8_C( 21)), simde_mm512_set_epi8(INT8_C( 125), INT8_C( -43), INT8_C( -42), INT8_C( -51), INT8_C( -36), INT8_C( 38), INT8_C( 42), INT8_C( 18), INT8_C( 24), INT8_C( -25), INT8_C( 19), INT8_C( 15), INT8_C(-119), INT8_C(-124), INT8_C( 104), INT8_C( -93), INT8_C( 22), INT8_C( -16), INT8_C( 31), INT8_C( 116), INT8_C( -99), INT8_C( 45), INT8_C( -54), INT8_C( 50), INT8_C( -72), INT8_C(-107), INT8_C( 48), INT8_C( 118), INT8_C( -68), INT8_C( -15), INT8_C(-108), INT8_C( -29), INT8_C( 118), INT8_C( 96), INT8_C( -51), INT8_C( 107), INT8_C( -23), INT8_C( -97), INT8_C( -6), INT8_C( 91), INT8_C( -57), INT8_C( -67), INT8_C(-105), INT8_C( 68), INT8_C( -50), INT8_C( 16), INT8_C( -86), INT8_C( 6), INT8_C( 55), INT8_C(-118), INT8_C( 112), INT8_C( 125), INT8_C( 92), INT8_C( 91), INT8_C( -27), INT8_C( 53), INT8_C( 28), INT8_C( -66), INT8_C( -69), INT8_C(-117), INT8_C( -30), INT8_C(-106), INT8_C( 1), INT8_C( 95)), simde_mm512_set_epi8(INT8_C( -13), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-113), INT8_C( 14), INT8_C(-126), INT8_C( 58), INT8_C( 0), INT8_C( 70), INT8_C(-119), INT8_C( 0), INT8_C( 0), INT8_C( 58), INT8_C( 0), INT8_C( -26), INT8_C( 0), INT8_C( 80), INT8_C( -65), INT8_C( 0), INT8_C( 64), INT8_C( 0), INT8_C( -84), INT8_C( 0), INT8_C( 0), INT8_C( 62), INT8_C( -26), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 122), INT8_C( 76), INT8_C( 0), INT8_C( 126), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 126), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -25), INT8_C( 0), INT8_C( 76), INT8_C( 0), INT8_C( 122), INT8_C( -15), INT8_C( 0), INT8_C( 21), INT8_C( 126), INT8_C( -30), INT8_C( -11), INT8_C( 0), INT8_C( 56), INT8_C( -30), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -35), INT8_C(-116)) }, { simde_mm512_set_epi8(INT8_C( 18), INT8_C(-124), INT8_C( -6), INT8_C( -41), INT8_C(-109), INT8_C( 19), INT8_C( 47), INT8_C( 73), INT8_C( -58), INT8_C( -50), INT8_C( 73), INT8_C( 110), INT8_C( 56), INT8_C( -30), INT8_C( -79), INT8_C( 123), INT8_C( 77), INT8_C( 62), INT8_C( -28), INT8_C( 45), INT8_C( -95), INT8_C( 12), INT8_C( 53), INT8_C( 75), INT8_C( 66), INT8_C( -89), INT8_C( 46), INT8_C( -47), INT8_C( 6), INT8_C( 4), INT8_C( 112), INT8_C( -34), INT8_C( 121), INT8_C( -3), INT8_C(-102), INT8_C(-115), INT8_C( -12), INT8_C( 80), INT8_C( 3), INT8_C( 56), INT8_C( 85), INT8_C( -43), INT8_C( -93), INT8_C( 34), INT8_C( -58), INT8_C(-101), INT8_C( 51), INT8_C( 1), INT8_C( -18), INT8_C( -90), INT8_C( -71), INT8_C( 74), INT8_C( -5), INT8_C(-113), INT8_C( 71), INT8_C( 61), INT8_C( 92), INT8_C( 73), INT8_C( 116), INT8_C( 110), INT8_C(-117), INT8_C( 96), INT8_C( -77), INT8_C( 45)), simde_mm512_set_epi8(INT8_C( 52), INT8_C( -84), INT8_C( 103), INT8_C(-105), INT8_C( -57), INT8_C( 17), INT8_C(-110), INT8_C(-113), INT8_C( 56), INT8_C( 125), INT8_C( 56), INT8_C( -88), INT8_C( -31), INT8_C( 119), INT8_C( -86), INT8_C( 114), INT8_C( -85), INT8_C( 14), INT8_C( 60), INT8_C( 84), INT8_C( -61), INT8_C(-111), INT8_C( 74), INT8_C( 113), INT8_C( 13), INT8_C( -89), INT8_C(-114), INT8_C( -79), INT8_C( -34), INT8_C( -73), INT8_C( -9), INT8_C(-108), INT8_C( 93), INT8_C( -78), INT8_C( -1), INT8_C(-102), INT8_C( 12), INT8_C( 117), INT8_C( 80), INT8_C( 44), INT8_C( 29), INT8_C( 18), INT8_C( 91), INT8_C( -23), INT8_C( 110), INT8_C(-126), INT8_C( -71), INT8_C( 65), INT8_C( -76), INT8_C( 85), INT8_C( 93), INT8_C( -86), INT8_C(-123), INT8_C(-115), INT8_C(-105), INT8_C( 75), INT8_C( 45), INT8_C( 22), INT8_C( 73), INT8_C( 74), INT8_C( 107), INT8_C( -60), INT8_C( -28), INT8_C( -57)), simde_mm512_set_epi8(INT8_C( 110), INT8_C( 0), INT8_C( -58), INT8_C( 0), INT8_C( 0), INT8_C( -79), INT8_C( 0), INT8_C( 0), INT8_C( 73), INT8_C( -6), INT8_C( 73), INT8_C( 0), INT8_C( 0), INT8_C( -58), INT8_C( 0), INT8_C( -30), INT8_C( 0), INT8_C( 62), INT8_C( 45), INT8_C( -47), INT8_C( 0), INT8_C( 0), INT8_C( 12), INT8_C( 112), INT8_C( -28), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-102), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-115), INT8_C( -93), INT8_C( 1), INT8_C(-115), INT8_C(-102), INT8_C(-101), INT8_C( -12), INT8_C( 0), INT8_C( -3), INT8_C( 0), INT8_C( 0), INT8_C( 51), INT8_C( 0), INT8_C( 116), INT8_C( -71), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -5), INT8_C( -71), INT8_C( 73), INT8_C( 71), INT8_C(-113), INT8_C( -5), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { simde_mm512_set_epi8(INT8_C( 4), INT8_C( -30), INT8_C( -68), INT8_C( 57), INT8_C( 73), INT8_C( 25), INT8_C( -45), INT8_C( 112), INT8_C( 41), INT8_C( 41), INT8_C( -56), INT8_C( -93), INT8_C( 29), INT8_C( 99), INT8_C( 15), INT8_C( 122), INT8_C( 74), INT8_C( 0), INT8_C( 89), INT8_C( 61), INT8_C( 124), INT8_C( 36), INT8_C( -87), INT8_C( -25), INT8_C( 42), INT8_C(-125), INT8_C( -87), INT8_C( 69), INT8_C( 81), INT8_C( 4), INT8_C( 55), INT8_C( 107), INT8_C( 0), INT8_C( -7), INT8_C( 37), INT8_C( -15), INT8_C( 117), INT8_C( 9), INT8_C( -27), INT8_C( 11), INT8_C( 16), INT8_C( 87), INT8_C( 119), INT8_C(-107), INT8_C( -22), INT8_C( 27), INT8_C( -86), INT8_C( -63), INT8_C( 90), INT8_C( -44), INT8_C(-113), INT8_C(-126), INT8_C( -75), INT8_C( 67), INT8_C( 88), INT8_C( 105), INT8_C( 80), INT8_C( 7), INT8_C( 66), INT8_C( 126), INT8_C( 17), INT8_C( -20), INT8_C( -91), INT8_C( -99)), simde_mm512_set_epi8(INT8_C( 123), INT8_C( 117), INT8_C( 18), INT8_C( 2), INT8_C(-105), INT8_C( -51), INT8_C( -9), INT8_C( 72), INT8_C( 69), INT8_C(-116), INT8_C( 95), INT8_C(-110), INT8_C( -32), INT8_C( 110), INT8_C( -8), INT8_C( 85), INT8_C( -41), INT8_C( 80), INT8_C( -44), INT8_C( 110), INT8_C( -28), INT8_C( 116), INT8_C( 41), INT8_C( -31), INT8_C( 12), INT8_C( 40), INT8_C( 5), INT8_C( 1), INT8_C( -14), INT8_C( 66), INT8_C( -95), INT8_C( 106), INT8_C( 66), INT8_C( -91), INT8_C( -20), INT8_C(-109), INT8_C( -46), INT8_C( 41), INT8_C( 82), INT8_C( -61), INT8_C( 2), INT8_C( 3), INT8_C( 62), INT8_C( -90), INT8_C( 99), INT8_C( -54), INT8_C( -28), INT8_C( -21), INT8_C( 126), INT8_C( 115), INT8_C( -46), INT8_C( 76), INT8_C( -43), INT8_C( 46), INT8_C( 107), INT8_C( 75), INT8_C( -93), INT8_C( 67), INT8_C( -37), INT8_C( 4), INT8_C(-127), INT8_C( -50), INT8_C( -26), INT8_C( 99)), simde_mm512_set_epi8(INT8_C( 73), INT8_C( -56), INT8_C( 99), INT8_C( 99), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 112), INT8_C( -56), INT8_C( 0), INT8_C( 4), INT8_C( 0), INT8_C( 0), INT8_C( -30), INT8_C( 0), INT8_C( -56), INT8_C( 0), INT8_C( 107), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 69), INT8_C( -87), INT8_C( 0), INT8_C( 61), INT8_C( -25), INT8_C( -87), INT8_C( 55), INT8_C( 0), INT8_C( 4), INT8_C( 0), INT8_C( 36), INT8_C( 27), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -27), INT8_C( 27), INT8_C( 0), INT8_C( 27), INT8_C( -22), INT8_C( -7), INT8_C( 0), INT8_C( -22), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -44), INT8_C( 17), INT8_C( 0), INT8_C(-126), INT8_C( 0), INT8_C( -44), INT8_C( -75), INT8_C( -75), INT8_C( 0), INT8_C( 17), INT8_C( 0), INT8_C( 126), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 17)) }, { simde_mm512_set_epi8(INT8_C( 56), INT8_C( 24), INT8_C( -33), INT8_C( 82), INT8_C( 72), INT8_C( 63), INT8_C(-114), INT8_C( 14), INT8_C( 2), INT8_C( 71), INT8_C( -31), INT8_C( -46), INT8_C( 64), INT8_C( -11), INT8_C( 35), INT8_C( -7), INT8_C( -78), INT8_C( 125), INT8_C( -28), INT8_C( -59), INT8_C( 6), INT8_C(-127), INT8_C( 34), INT8_C( 64), INT8_C( 88), INT8_C( -88), INT8_C( 70), INT8_C( -90), INT8_C( 47), INT8_C( -21), INT8_C(-104), INT8_C( -27), INT8_C( -16), INT8_C( -29), INT8_C( 51), INT8_C(-116), INT8_C( -87), INT8_C( 91), INT8_C(-106), INT8_C( -3), INT8_C( -55), INT8_C( -15), INT8_C(-109), INT8_C(-108), INT8_C( -41), INT8_C( 79), INT8_C( 54), INT8_C( 68), INT8_C( -51), INT8_C(-108), INT8_C( -83), INT8_C( 104), INT8_C( 18), INT8_C(-128), INT8_C( -75), INT8_C( -12), INT8_C( 56), INT8_C(-104), INT8_C( 56), INT8_C( 47), INT8_C( -30), INT8_C( -15), INT8_C(-115), INT8_C( 88)), simde_mm512_set_epi8(INT8_C( 107), INT8_C( 125), INT8_C(-111), INT8_C( -89), INT8_C( -9), INT8_C( 122), INT8_C( -2), INT8_C( -26), INT8_C(-100), INT8_C( 80), INT8_C( 89), INT8_C( 38), INT8_C( 95), INT8_C( -82), INT8_C( -66), INT8_C( 108), INT8_C( 104), INT8_C( -20), INT8_C( 72), INT8_C( -82), INT8_C( 23), INT8_C( -45), INT8_C( -20), INT8_C( 8), INT8_C( -2), INT8_C( -77), INT8_C(-112), INT8_C( 102), INT8_C( 83), INT8_C( 80), INT8_C( 56), INT8_C( -53), INT8_C( -83), INT8_C( -86), INT8_C( 15), INT8_C( 30), INT8_C( -92), INT8_C( -2), INT8_C( 42), INT8_C( -79), INT8_C( 108), INT8_C( 44), INT8_C( 17), INT8_C( 83), INT8_C( -49), INT8_C( -44), INT8_C( -42), INT8_C( -54), INT8_C(-127), INT8_C( 12), INT8_C( 88), INT8_C(-118), INT8_C( -23), INT8_C( -69), INT8_C( -23), INT8_C( -36), INT8_C( -23), INT8_C( -80), INT8_C( 33), INT8_C( 82), INT8_C( -43), INT8_C( -91), INT8_C( -36), INT8_C( -56)), simde_mm512_set_epi8(INT8_C( 72), INT8_C( -33), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 63), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -7), INT8_C(-114), INT8_C( 71), INT8_C( 56), INT8_C( 0), INT8_C( 0), INT8_C( 82), INT8_C( 64), INT8_C( 0), INT8_C( 64), INT8_C( 0), INT8_C( 88), INT8_C( 0), INT8_C( 0), INT8_C( 64), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -88), INT8_C( 47), INT8_C( -27), INT8_C( 64), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -16), INT8_C( -29), INT8_C( 0), INT8_C( 0), INT8_C( 91), INT8_C( 0), INT8_C(-116), INT8_C(-116), INT8_C( 54), INT8_C( -41), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 104), INT8_C( -12), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-115), INT8_C( -15), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_shuffle_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_shuffle_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask64 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi8(INT8_C( 92), INT8_C( 116), INT8_C( -78), INT8_C( -19), INT8_C( -73), INT8_C( 22), INT8_C( -66), INT8_C( -29), INT8_C( 55), INT8_C( 78), INT8_C( -45), INT8_C(-119), INT8_C( 30), INT8_C( -29), INT8_C( -23), INT8_C( 89), INT8_C( 15), INT8_C( 119), INT8_C( -88), INT8_C( 109), INT8_C( -16), INT8_C( -10), INT8_C( 0), INT8_C( -18), INT8_C( 1), INT8_C( 116), INT8_C( 62), INT8_C( -73), INT8_C(-122), INT8_C( 0), INT8_C( 111), INT8_C( 95), INT8_C( -20), INT8_C( 3), INT8_C( 122), INT8_C(-121), INT8_C( 13), INT8_C( 25), INT8_C( -35), INT8_C(-127), INT8_C( 82), INT8_C( 90), INT8_C( 53), INT8_C( 123), INT8_C( 73), INT8_C( 108), INT8_C( -18), INT8_C( 15), INT8_C( 81), INT8_C( -27), INT8_C( 102), INT8_C( 118), INT8_C( 39), INT8_C( -77), INT8_C( 45), INT8_C( 81), INT8_C( -17), INT8_C( -28), INT8_C( 67), INT8_C(-118), INT8_C( 79), INT8_C(-113), INT8_C(-122), INT8_C( 124)), UINT64_C(10224647434006242820), simde_mm512_set_epi8(INT8_C( 68), INT8_C(-108), INT8_C(-121), INT8_C(-115), INT8_C( -59), INT8_C( -78), INT8_C(-111), INT8_C( 56), INT8_C( 9), INT8_C(-121), INT8_C( 120), INT8_C( -36), INT8_C( 36), INT8_C(-103), INT8_C(-104), INT8_C( 25), INT8_C(-111), INT8_C( -63), INT8_C( -35), INT8_C(-120), INT8_C( 28), INT8_C( -44), INT8_C( -26), INT8_C( -86), INT8_C( -13), INT8_C( 53), INT8_C( -88), INT8_C(-107), INT8_C( 68), INT8_C( 42), INT8_C(-118), INT8_C( 111), INT8_C( 54), INT8_C( -58), INT8_C( -13), INT8_C( 27), INT8_C( 23), INT8_C( 41), INT8_C(-119), INT8_C( 44), INT8_C( 7), INT8_C(-120), INT8_C( 32), INT8_C( -43), INT8_C( 114), INT8_C( -72), INT8_C( 73), INT8_C( -96), INT8_C( 96), INT8_C( 110), INT8_C( -81), INT8_C( -76), INT8_C( 103), INT8_C(-100), INT8_C( -22), INT8_C( 18), INT8_C( 115), INT8_C( 54), INT8_C( -40), INT8_C( 125), INT8_C( 110), INT8_C( 31), INT8_C( 51), INT8_C(-104)), simde_mm512_set_epi8(INT8_C( -14), INT8_C( 85), INT8_C( -80), INT8_C( 80), INT8_C( 48), INT8_C( 93), INT8_C( 79), INT8_C( 127), INT8_C( 16), INT8_C( 41), INT8_C( 54), INT8_C(-116), INT8_C( 17), INT8_C( 42), INT8_C( -86), INT8_C( 38), INT8_C(-118), INT8_C( -56), INT8_C( 60), INT8_C( 19), INT8_C(-100), INT8_C(-107), INT8_C( 105), INT8_C( -76), INT8_C( 10), INT8_C( -9), INT8_C( -12), INT8_C( -56), INT8_C( -71), INT8_C( 96), INT8_C( 6), INT8_C( 24), INT8_C( 69), INT8_C( 73), INT8_C( -27), INT8_C( 3), INT8_C( -95), INT8_C( 50), INT8_C( -67), INT8_C( -39), INT8_C( 97), INT8_C( 62), INT8_C( 101), INT8_C( 56), INT8_C( -79), INT8_C( 83), INT8_C( -99), INT8_C( -56), INT8_C( -17), INT8_C( -56), INT8_C( -8), INT8_C( -16), INT8_C( 84), INT8_C( -95), INT8_C( -73), INT8_C( 54), INT8_C( 125), INT8_C( 85), INT8_C( 78), INT8_C( -65), INT8_C( 4), INT8_C( 113), INT8_C( -16), INT8_C( -53)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 116), INT8_C( -78), INT8_C( -19), INT8_C( 25), INT8_C(-121), INT8_C( -66), INT8_C( 68), INT8_C( 25), INT8_C(-111), INT8_C(-121), INT8_C(-119), INT8_C( 30), INT8_C( -78), INT8_C( -23), INT8_C(-121), INT8_C( 15), INT8_C( 119), INT8_C(-120), INT8_C( 68), INT8_C( 0), INT8_C( 0), INT8_C( -26), INT8_C( -18), INT8_C( -44), INT8_C( 116), INT8_C( 0), INT8_C( -73), INT8_C( 0), INT8_C( 0), INT8_C( 53), INT8_C( -86), INT8_C( -20), INT8_C( 3), INT8_C( 0), INT8_C( 114), INT8_C( 13), INT8_C( 25), INT8_C( -35), INT8_C( 0), INT8_C( 73), INT8_C( 90), INT8_C( 53), INT8_C( 123), INT8_C( 0), INT8_C( 114), INT8_C( -18), INT8_C( 0), INT8_C( 81), INT8_C( -27), INT8_C( 0), INT8_C( 118), INT8_C( 39), INT8_C( 0), INT8_C( 0), INT8_C( 81), INT8_C( -17), INT8_C( -28), INT8_C( 67), INT8_C(-118), INT8_C( 79), INT8_C( 51), INT8_C(-122), INT8_C( 124)) }, { simde_mm512_set_epi8(INT8_C(-123), INT8_C(-100), INT8_C( -39), INT8_C( -44), INT8_C( 22), INT8_C( 112), INT8_C( 16), INT8_C( 15), INT8_C(-104), INT8_C(-102), INT8_C( 102), INT8_C( -95), INT8_C( 69), INT8_C( 13), INT8_C( -21), INT8_C( 45), INT8_C( 62), INT8_C(-120), INT8_C(-111), INT8_C( 32), INT8_C(-107), INT8_C( -30), INT8_C( 99), INT8_C( -64), INT8_C( 8), INT8_C( -42), INT8_C( 81), INT8_C( -34), INT8_C( -46), INT8_C( 26), INT8_C( 31), INT8_C( -2), INT8_C( 68), INT8_C( -7), INT8_C( -71), INT8_C( 46), INT8_C( -21), INT8_C( -73), INT8_C( 21), INT8_C( 83), INT8_C(-108), INT8_C( -97), INT8_C( -69), INT8_C( 73), INT8_C( 57), INT8_C( -37), INT8_C( 21), INT8_C( 82), INT8_C(-119), INT8_C(-126), INT8_C( 126), INT8_C( 91), INT8_C( 115), INT8_C( 31), INT8_C( -79), INT8_C( 28), INT8_C(-106), INT8_C( -18), INT8_C( 65), INT8_C(-104), INT8_C( 81), INT8_C( 38), INT8_C( -84), INT8_C( -2)), UINT64_C( 4597426592773770833), simde_mm512_set_epi8(INT8_C( 74), INT8_C( 72), INT8_C( -4), INT8_C( 28), INT8_C(-115), INT8_C( 93), INT8_C( 102), INT8_C( 44), INT8_C(-103), INT8_C( -29), INT8_C( -50), INT8_C( 48), INT8_C( -96), INT8_C( -50), INT8_C( 46), INT8_C( -65), INT8_C( 4), INT8_C( 43), INT8_C( -75), INT8_C( 97), INT8_C( -26), INT8_C(-103), INT8_C( 71), INT8_C(-107), INT8_C( 91), INT8_C( 45), INT8_C( -11), INT8_C( 47), INT8_C( 29), INT8_C( 25), INT8_C( 26), INT8_C( -9), INT8_C( 10), INT8_C( 36), INT8_C( -79), INT8_C( -53), INT8_C( 41), INT8_C( 1), INT8_C( -23), INT8_C( -63), INT8_C(-127), INT8_C( 68), INT8_C( 48), INT8_C( 36), INT8_C( 89), INT8_C(-112), INT8_C( -31), INT8_C( 120), INT8_C( 35), INT8_C( 62), INT8_C( -21), INT8_C(-114), INT8_C(-104), INT8_C( 57), INT8_C( 42), INT8_C(-111), INT8_C( 94), INT8_C( -63), INT8_C( -9), INT8_C( 64), INT8_C( -65), INT8_C( -2), INT8_C( 110), INT8_C( -8)), simde_mm512_set_epi8(INT8_C( 44), INT8_C( -92), INT8_C( -31), INT8_C( 26), INT8_C( -99), INT8_C( -53), INT8_C( 117), INT8_C( 18), INT8_C( -63), INT8_C( 45), INT8_C( 12), INT8_C( 24), INT8_C(-108), INT8_C( 18), INT8_C( -60), INT8_C( 28), INT8_C( 50), INT8_C( -11), INT8_C( -68), INT8_C( -31), INT8_C( 105), INT8_C(-106), INT8_C( 98), INT8_C( 51), INT8_C( 58), INT8_C( 103), INT8_C( 111), INT8_C(-127), INT8_C( 68), INT8_C( -56), INT8_C( 124), INT8_C(-119), INT8_C( 74), INT8_C( -62), INT8_C(-116), INT8_C( 37), INT8_C( -12), INT8_C( 114), INT8_C( 0), INT8_C( 61), INT8_C( 103), INT8_C( -4), INT8_C(-105), INT8_C( -68), INT8_C( 39), INT8_C(-100), INT8_C( -93), INT8_C( 11), INT8_C( -80), INT8_C( -19), INT8_C( -22), INT8_C( -39), INT8_C( 127), INT8_C( -38), INT8_C(-125), INT8_C(-111), INT8_C( 84), INT8_C( -96), INT8_C( 87), INT8_C( -22), INT8_C( -5), INT8_C( -3), INT8_C(-127), INT8_C( 41)), simde_mm512_set_epi8(INT8_C(-123), INT8_C(-100), INT8_C( 0), INT8_C( 93), INT8_C( 0), INT8_C( 0), INT8_C( -50), INT8_C( -50), INT8_C( 0), INT8_C( -4), INT8_C( 102), INT8_C( -95), INT8_C( 0), INT8_C( -50), INT8_C( -21), INT8_C( 28), INT8_C( 62), INT8_C( 0), INT8_C(-111), INT8_C( 0), INT8_C(-107), INT8_C( 0), INT8_C( 25), INT8_C( 29), INT8_C( 8), INT8_C( -42), INT8_C( 4), INT8_C( -34), INT8_C( -46), INT8_C( 26), INT8_C( 31), INT8_C( -2), INT8_C( 1), INT8_C( -7), INT8_C( 0), INT8_C( 48), INT8_C( 0), INT8_C(-112), INT8_C( 120), INT8_C( -79), INT8_C(-108), INT8_C( -97), INT8_C( 0), INT8_C( 0), INT8_C( 57), INT8_C( 0), INT8_C( 0), INT8_C( 41), INT8_C(-119), INT8_C(-126), INT8_C( 126), INT8_C( 91), INT8_C( 35), INT8_C( 0), INT8_C( 0), INT8_C( 28), INT8_C(-106), INT8_C( 0), INT8_C( 65), INT8_C( 0), INT8_C( 81), INT8_C( 38), INT8_C( -84), INT8_C( 42)) }, { simde_mm512_set_epi8(INT8_C( -30), INT8_C( -37), INT8_C( 51), INT8_C( -36), INT8_C( 8), INT8_C( 52), INT8_C( 97), INT8_C( 123), INT8_C( -49), INT8_C(-124), INT8_C( 95), INT8_C( -83), INT8_C( 70), INT8_C( -50), INT8_C( -61), INT8_C( 25), INT8_C( -97), INT8_C( 28), INT8_C( -58), INT8_C( 11), INT8_C( -14), INT8_C( 126), INT8_C( 81), INT8_C( 45), INT8_C( -23), INT8_C( 120), INT8_C( -83), INT8_C( -16), INT8_C( 7), INT8_C( 51), INT8_C( -57), INT8_C( -50), INT8_C( -21), INT8_C( 98), INT8_C( 88), INT8_C( 0), INT8_C( -66), INT8_C( 3), INT8_C( 124), INT8_C(-113), INT8_C( 50), INT8_C( 88), INT8_C( -85), INT8_C( -93), INT8_C( -44), INT8_C( -13), INT8_C( -94), INT8_C( 17), INT8_C( -2), INT8_C( 79), INT8_C(-116), INT8_C( 43), INT8_C( -77), INT8_C(-125), INT8_C( -23), INT8_C(-120), INT8_C( 96), INT8_C( -64), INT8_C( -23), INT8_C( -46), INT8_C( -29), INT8_C( -71), INT8_C( 71), INT8_C( 90)), UINT64_C(12627002542648829104), simde_mm512_set_epi8(INT8_C( 56), INT8_C( -45), INT8_C(-108), INT8_C( -19), INT8_C(-124), INT8_C( -27), INT8_C( 22), INT8_C( 126), INT8_C(-106), INT8_C( -68), INT8_C( -60), INT8_C( 8), INT8_C( 60), INT8_C( 93), INT8_C( -33), INT8_C( -27), INT8_C( -7), INT8_C( 27), INT8_C(-122), INT8_C( -38), INT8_C( 23), INT8_C( 6), INT8_C( 45), INT8_C( -21), INT8_C( -23), INT8_C(-101), INT8_C( 116), INT8_C( 127), INT8_C( 96), INT8_C( 40), INT8_C( -97), INT8_C( 40), INT8_C( 86), INT8_C( -44), INT8_C( 70), INT8_C( -71), INT8_C( 62), INT8_C( -21), INT8_C( 66), INT8_C( 68), INT8_C( -87), INT8_C( -61), INT8_C( 48), INT8_C( -70), INT8_C( 18), INT8_C( -78), INT8_C( -98), INT8_C( 117), INT8_C( 74), INT8_C( 32), INT8_C( 93), INT8_C( 125), INT8_C( -47), INT8_C( -60), INT8_C( -86), INT8_C( 117), INT8_C( 122), INT8_C( -54), INT8_C( 50), INT8_C( 123), INT8_C( -31), INT8_C( -74), INT8_C( -64), INT8_C( 54)), simde_mm512_set_epi8(INT8_C( 115), INT8_C( 51), INT8_C( -91), INT8_C( 56), INT8_C( 64), INT8_C( -39), INT8_C(-119), INT8_C( -28), INT8_C( -54), INT8_C( 28), INT8_C( 54), INT8_C( -8), INT8_C( -54), INT8_C(-128), INT8_C( -28), INT8_C( -71), INT8_C( 107), INT8_C( -66), INT8_C(-114), INT8_C( -88), INT8_C( 34), INT8_C( -83), INT8_C( -21), INT8_C( -64), INT8_C( 121), INT8_C( -20), INT8_C( -89), INT8_C( -94), INT8_C( 112), INT8_C( -27), INT8_C( 81), INT8_C( -54), INT8_C( -64), INT8_C(-114), INT8_C( 48), INT8_C( -89), INT8_C( -61), INT8_C( 26), INT8_C( 43), INT8_C( 29), INT8_C( 0), INT8_C( 64), INT8_C( 123), INT8_C( -67), INT8_C( 15), INT8_C( 120), INT8_C( 36), INT8_C( 40), INT8_C( 106), INT8_C(-118), INT8_C(-108), INT8_C( -58), INT8_C( 26), INT8_C(-111), INT8_C( 63), INT8_C( -98), INT8_C( -13), INT8_C( -12), INT8_C(-124), INT8_C( 96), INT8_C( -13), INT8_C( -98), INT8_C( 99), INT8_C( -13)), simde_mm512_set_epi8(INT8_C( 60), INT8_C( -37), INT8_C( 0), INT8_C( -36), INT8_C( -27), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -49), INT8_C(-124), INT8_C( -68), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -61), INT8_C( 25), INT8_C( -97), INT8_C( 28), INT8_C( -58), INT8_C( 0), INT8_C( 40), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 45), INT8_C( 0), INT8_C( 0), INT8_C( -16), INT8_C( 40), INT8_C( 51), INT8_C( -57), INT8_C( 0), INT8_C( -21), INT8_C( 0), INT8_C( 117), INT8_C( 0), INT8_C( 0), INT8_C( -21), INT8_C( 124), INT8_C(-113), INT8_C( 117), INT8_C( 88), INT8_C( -85), INT8_C( -93), INT8_C( 86), INT8_C( -13), INT8_C( -94), INT8_C( 68), INT8_C( -60), INT8_C( 79), INT8_C( 0), INT8_C( 43), INT8_C( -77), INT8_C( 0), INT8_C( -23), INT8_C(-120), INT8_C( 0), INT8_C( -64), INT8_C( 0), INT8_C( 54), INT8_C( -29), INT8_C( -71), INT8_C( 71), INT8_C( 90)) }, { simde_mm512_set_epi8(INT8_C( 72), INT8_C( 45), INT8_C( 120), INT8_C( -5), INT8_C(-109), INT8_C( 62), INT8_C( 17), INT8_C( 31), INT8_C( -30), INT8_C( -58), INT8_C( 56), INT8_C( 21), INT8_C( 72), INT8_C( -74), INT8_C( -40), INT8_C( 120), INT8_C( 95), INT8_C( 108), INT8_C( 32), INT8_C( 64), INT8_C(-128), INT8_C( 102), INT8_C( -21), INT8_C( 28), INT8_C( 105), INT8_C( 52), INT8_C( 85), INT8_C(-104), INT8_C( 57), INT8_C( -31), INT8_C( -38), INT8_C(-124), INT8_C(-107), INT8_C( -2), INT8_C( 55), INT8_C( 46), INT8_C( -71), INT8_C( 77), INT8_C( 18), INT8_C( 70), INT8_C( 89), INT8_C( 125), INT8_C( -42), INT8_C(-125), INT8_C( 121), INT8_C( -11), INT8_C( -69), INT8_C( -59), INT8_C( -53), INT8_C( 34), INT8_C( 9), INT8_C( 64), INT8_C( -61), INT8_C( -25), INT8_C(-115), INT8_C( 100), INT8_C( 65), INT8_C( 8), INT8_C( 69), INT8_C( -8), INT8_C( -15), INT8_C( -51), INT8_C( 1), INT8_C( 90)), UINT64_C(14515151237088493607), simde_mm512_set_epi8(INT8_C( -26), INT8_C( -9), INT8_C( 66), INT8_C( 1), INT8_C( -13), INT8_C( 60), INT8_C(-119), INT8_C( -83), INT8_C(-122), INT8_C( -64), INT8_C( -83), INT8_C( -74), INT8_C( 119), INT8_C( -8), INT8_C( 12), INT8_C( 113), INT8_C( -12), INT8_C( -84), INT8_C( 6), INT8_C( 69), INT8_C( 2), INT8_C( -75), INT8_C( -34), INT8_C(-126), INT8_C( 3), INT8_C(-128), INT8_C( -9), INT8_C( 24), INT8_C( 11), INT8_C( -94), INT8_C( -32), INT8_C( 110), INT8_C( 33), INT8_C( -24), INT8_C( 125), INT8_C( 35), INT8_C(-103), INT8_C( -48), INT8_C( -22), INT8_C( 38), INT8_C( -81), INT8_C( 9), INT8_C( -11), INT8_C(-124), INT8_C( 71), INT8_C( 31), INT8_C( -42), INT8_C( 93), INT8_C( 67), INT8_C( 45), INT8_C( 51), INT8_C( -92), INT8_C( 126), INT8_C( 108), INT8_C(-123), INT8_C( -71), INT8_C( 113), INT8_C( 32), INT8_C( 71), INT8_C( 55), INT8_C( -26), INT8_C( 82), INT8_C( -81), INT8_C( -20)), simde_mm512_set_epi8(INT8_C(-125), INT8_C( 121), INT8_C(-128), INT8_C( 103), INT8_C( 0), INT8_C( 101), INT8_C( -41), INT8_C( 89), INT8_C( -83), INT8_C( -65), INT8_C( 9), INT8_C( -7), INT8_C( -63), INT8_C( 13), INT8_C( 105), INT8_C( 92), INT8_C( -18), INT8_C( -21), INT8_C(-102), INT8_C(-114), INT8_C( 74), INT8_C( 121), INT8_C( -45), INT8_C( 52), INT8_C( -63), INT8_C( -93), INT8_C( 98), INT8_C( 106), INT8_C(-109), INT8_C( -47), INT8_C( 37), INT8_C( 70), INT8_C( 100), INT8_C( 121), INT8_C( 18), INT8_C( 28), INT8_C(-117), INT8_C( 107), INT8_C( 3), INT8_C( -62), INT8_C( 42), INT8_C( 72), INT8_C( 91), INT8_C( 86), INT8_C( -72), INT8_C( 9), INT8_C( -80), INT8_C( 118), INT8_C( 122), INT8_C(-108), INT8_C( -70), INT8_C( -63), INT8_C( 56), INT8_C( 71), INT8_C( -14), INT8_C( 49), INT8_C( -73), INT8_C( 53), INT8_C( -29), INT8_C( 3), INT8_C( -73), INT8_C( 43), INT8_C( -22), INT8_C( 85)), simde_mm512_set_epi8(INT8_C( 0), INT8_C(-119), INT8_C( 120), INT8_C( -5), INT8_C( 113), INT8_C( 62), INT8_C( 17), INT8_C(-119), INT8_C( -30), INT8_C( 0), INT8_C(-119), INT8_C( 0), INT8_C( 72), INT8_C( -74), INT8_C( -40), INT8_C( 120), INT8_C( 95), INT8_C( 108), INT8_C( 0), INT8_C( 64), INT8_C( -75), INT8_C( -34), INT8_C( -21), INT8_C( 24), INT8_C( 105), INT8_C( 52), INT8_C( -94), INT8_C(-104), INT8_C( 57), INT8_C( 0), INT8_C( -38), INT8_C(-128), INT8_C(-107), INT8_C( -22), INT8_C( 55), INT8_C( 46), INT8_C( -71), INT8_C( 77), INT8_C( 71), INT8_C( 0), INT8_C( -48), INT8_C( 125), INT8_C(-103), INT8_C( 9), INT8_C( 0), INT8_C( -11), INT8_C( 0), INT8_C( 9), INT8_C( -53), INT8_C( 0), INT8_C( 9), INT8_C( 64), INT8_C( -61), INT8_C( -25), INT8_C(-115), INT8_C( 100), INT8_C( 65), INT8_C( 8), INT8_C( 0), INT8_C( -8), INT8_C( -15), INT8_C( 126), INT8_C( 0), INT8_C( 71)) }, { simde_mm512_set_epi8(INT8_C( -47), INT8_C( 84), INT8_C(-126), INT8_C( -64), INT8_C( 14), INT8_C( 11), INT8_C( 37), INT8_C( -23), INT8_C( 67), INT8_C( 124), INT8_C( 58), INT8_C( -94), INT8_C( 30), INT8_C( -33), INT8_C( 70), INT8_C( -24), INT8_C( 38), INT8_C( -97), INT8_C( -56), INT8_C( -60), INT8_C( -59), INT8_C( 65), INT8_C( -74), INT8_C( 45), INT8_C( -11), INT8_C( 55), INT8_C( -82), INT8_C( 12), INT8_C( 106), INT8_C( 22), INT8_C(-124), INT8_C( -4), INT8_C( 2), INT8_C( -81), INT8_C( 14), INT8_C( 90), INT8_C(-100), INT8_C(-122), INT8_C( -35), INT8_C( 81), INT8_C( -14), INT8_C( -42), INT8_C( 125), INT8_C(-125), INT8_C( -57), INT8_C( 90), INT8_C( -9), INT8_C( 63), INT8_C( 53), INT8_C( 77), INT8_C( 63), INT8_C( -84), INT8_C( 27), INT8_C( 22), INT8_C( 3), INT8_C( -37), INT8_C( 65), INT8_C( 118), INT8_C(-126), INT8_C( 97), INT8_C( 109), INT8_C( 7), INT8_C(-114), INT8_C( -75)), UINT64_C( 8707623543556880126), simde_mm512_set_epi8(INT8_C( 84), INT8_C( -71), INT8_C( 8), INT8_C( 12), INT8_C( -11), INT8_C( -76), INT8_C( 62), INT8_C( 93), INT8_C( -75), INT8_C( -77), INT8_C( -84), INT8_C(-108), INT8_C( -35), INT8_C( 14), INT8_C( -60), INT8_C( 18), INT8_C( 23), INT8_C( -60), INT8_C( -63), INT8_C(-114), INT8_C( -55), INT8_C( 75), INT8_C( -99), INT8_C( -55), INT8_C( 58), INT8_C( 76), INT8_C(-102), INT8_C(-118), INT8_C( -30), INT8_C( 39), INT8_C( 119), INT8_C( 85), INT8_C( -8), INT8_C( -72), INT8_C( -60), INT8_C( -94), INT8_C(-112), INT8_C( 119), INT8_C( 124), INT8_C( 76), INT8_C( -42), INT8_C(-124), INT8_C( 54), INT8_C( 74), INT8_C( -92), INT8_C( 99), INT8_C( 79), INT8_C( -3), INT8_C( 61), INT8_C( -89), INT8_C( 84), INT8_C( -94), INT8_C( 31), INT8_C(-116), INT8_C( -67), INT8_C(-102), INT8_C( -72), INT8_C( -91), INT8_C(-105), INT8_C(-108), INT8_C( -44), INT8_C( 74), INT8_C( -28), INT8_C( 124)), simde_mm512_set_epi8(INT8_C( 1), INT8_C( 75), INT8_C( 21), INT8_C( -36), INT8_C(-126), INT8_C( 122), INT8_C( 71), INT8_C( 76), INT8_C( 28), INT8_C( -56), INT8_C( 32), INT8_C( 101), INT8_C(-107), INT8_C(-111), INT8_C( -88), INT8_C( -19), INT8_C( -77), INT8_C( 19), INT8_C( -21), INT8_C(-111), INT8_C( -68), INT8_C( 82), INT8_C(-118), INT8_C( -76), INT8_C( 47), INT8_C( 127), INT8_C( 62), INT8_C( -16), INT8_C( 10), INT8_C( -14), INT8_C(-100), INT8_C( 86), INT8_C( 29), INT8_C( 107), INT8_C( 56), INT8_C( 21), INT8_C( 24), INT8_C( 68), INT8_C( -96), INT8_C( 64), INT8_C( 48), INT8_C( 13), INT8_C( -83), INT8_C( 4), INT8_C( -3), INT8_C( -64), INT8_C( 17), INT8_C(-115), INT8_C( 21), INT8_C( 108), INT8_C( 125), INT8_C( -60), INT8_C( -72), INT8_C( 74), INT8_C( -5), INT8_C( -58), INT8_C( -41), INT8_C( 22), INT8_C(-115), INT8_C( 102), INT8_C( 59), INT8_C( -80), INT8_C( -15), INT8_C( -63)), simde_mm512_set_epi8(INT8_C( -47), INT8_C( -11), INT8_C( -84), INT8_C( 0), INT8_C( 0), INT8_C( 11), INT8_C( 37), INT8_C( -23), INT8_C( 12), INT8_C( 0), INT8_C( 58), INT8_C( -84), INT8_C( 30), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -97), INT8_C( 0), INT8_C( 0), INT8_C( -59), INT8_C( 65), INT8_C( -74), INT8_C( 0), INT8_C( -11), INT8_C( 23), INT8_C( -60), INT8_C( 0), INT8_C( 75), INT8_C( 22), INT8_C( 0), INT8_C( -4), INT8_C( 2), INT8_C(-112), INT8_C( 14), INT8_C( 54), INT8_C(-100), INT8_C( 74), INT8_C( 0), INT8_C( -3), INT8_C( -3), INT8_C( -42), INT8_C( 125), INT8_C(-125), INT8_C( 0), INT8_C( 90), INT8_C( -9), INT8_C( 0), INT8_C( 53), INT8_C( 77), INT8_C( 84), INT8_C( 0), INT8_C( 27), INT8_C(-116), INT8_C( 0), INT8_C( -37), INT8_C( 0), INT8_C( -91), INT8_C( 0), INT8_C( -91), INT8_C( 31), INT8_C( 0), INT8_C( 0), INT8_C( -75)) }, { simde_mm512_set_epi8(INT8_C( -64), INT8_C( -53), INT8_C( -42), INT8_C( 126), INT8_C( 67), INT8_C( 50), INT8_C( -18), INT8_C( 76), INT8_C( -19), INT8_C( 123), INT8_C( -87), INT8_C( 106), INT8_C( -74), INT8_C( 44), INT8_C( 117), INT8_C( 103), INT8_C( 81), INT8_C( 122), INT8_C( 56), INT8_C( -10), INT8_C( 67), INT8_C( 79), INT8_C( 83), INT8_C( -38), INT8_C( -13), INT8_C( 43), INT8_C( 27), INT8_C( -97), INT8_C( 102), INT8_C( 126), INT8_C( 38), INT8_C( -62), INT8_C( -24), INT8_C( 117), INT8_C( -38), INT8_C( -93), INT8_C( -58), INT8_C(-124), INT8_C( -75), INT8_C( 10), INT8_C( 18), INT8_C( -74), INT8_C( 14), INT8_C( 36), INT8_C( -7), INT8_C( 113), INT8_C( 40), INT8_C( 48), INT8_C(-107), INT8_C( -34), INT8_C( -75), INT8_C( 85), INT8_C( -35), INT8_C(-116), INT8_C( 65), INT8_C( -21), INT8_C( 15), INT8_C( 3), INT8_C( 45), INT8_C( 21), INT8_C( 72), INT8_C( 93), INT8_C( 108), INT8_C( 125)), UINT64_C(12576710173448868104), simde_mm512_set_epi8(INT8_C( 90), INT8_C( -38), INT8_C( -98), INT8_C( -70), INT8_C(-108), INT8_C( 20), INT8_C( 43), INT8_C(-128), INT8_C( 77), INT8_C( 108), INT8_C( 53), INT8_C( 82), INT8_C( -50), INT8_C( 52), INT8_C( 56), INT8_C( 58), INT8_C(-120), INT8_C( -43), INT8_C( 114), INT8_C( 93), INT8_C( -44), INT8_C( -15), INT8_C( 38), INT8_C( -17), INT8_C(-110), INT8_C(-123), INT8_C( -39), INT8_C( 114), INT8_C( 51), INT8_C(-115), INT8_C( -74), INT8_C( 43), INT8_C( 41), INT8_C( -36), INT8_C( 19), INT8_C( 69), INT8_C( 60), INT8_C( -53), INT8_C( 112), INT8_C( 108), INT8_C( 8), INT8_C( 46), INT8_C( -35), INT8_C( 26), INT8_C( 11), INT8_C( 42), INT8_C( 47), INT8_C( 59), INT8_C( -57), INT8_C( 94), INT8_C(-125), INT8_C(-124), INT8_C( 36), INT8_C( 57), INT8_C( 68), INT8_C( -52), INT8_C( 39), INT8_C( 50), INT8_C( -48), INT8_C( 94), INT8_C( 53), INT8_C( 11), INT8_C( 29), INT8_C( 65)), simde_mm512_set_epi8(INT8_C(-107), INT8_C( 120), INT8_C( -58), INT8_C( 107), INT8_C( -32), INT8_C( -32), INT8_C( 88), INT8_C( -43), INT8_C( 31), INT8_C( -32), INT8_C( -64), INT8_C( 27), INT8_C( 82), INT8_C( -90), INT8_C( -54), INT8_C( -84), INT8_C( -30), INT8_C( 63), INT8_C( 24), INT8_C( 81), INT8_C( -8), INT8_C( 9), INT8_C( -35), INT8_C(-101), INT8_C( 83), INT8_C( 107), INT8_C( -47), INT8_C( -56), INT8_C( 57), INT8_C( -88), INT8_C(-115), INT8_C( -20), INT8_C( 58), INT8_C( 75), INT8_C( 56), INT8_C( 93), INT8_C( 49), INT8_C( 43), INT8_C( 108), INT8_C( 118), INT8_C( -79), INT8_C( 112), INT8_C( 44), INT8_C(-112), INT8_C( -52), INT8_C( 10), INT8_C( 28), INT8_C( -86), INT8_C( 65), INT8_C( 62), INT8_C( 86), INT8_C(-107), INT8_C( 24), INT8_C( -55), INT8_C( 54), INT8_C(-110), INT8_C( -33), INT8_C( 110), INT8_C(-116), INT8_C( -39), INT8_C( 39), INT8_C(-112), INT8_C( 64), INT8_C( 43)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( -53), INT8_C( 0), INT8_C( 126), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 76), INT8_C( 90), INT8_C( 123), INT8_C( -87), INT8_C( 106), INT8_C( 52), INT8_C( 44), INT8_C( 117), INT8_C( 0), INT8_C( 81), INT8_C(-120), INT8_C( -17), INT8_C( -74), INT8_C( 67), INT8_C( 79), INT8_C( 0), INT8_C( 0), INT8_C( -13), INT8_C( -44), INT8_C( 27), INT8_C( -97), INT8_C( 102), INT8_C( 0), INT8_C( 38), INT8_C( -62), INT8_C( -53), INT8_C( 117), INT8_C( 108), INT8_C( 19), INT8_C( 47), INT8_C(-124), INT8_C( 69), INT8_C( 46), INT8_C( 18), INT8_C( 59), INT8_C( 69), INT8_C( 0), INT8_C( -7), INT8_C( -53), INT8_C( 69), INT8_C( 48), INT8_C(-107), INT8_C( -34), INT8_C( -75), INT8_C( 0), INT8_C( -35), INT8_C(-116), INT8_C( 65), INT8_C( 0), INT8_C( 15), INT8_C( 3), INT8_C( 45), INT8_C( 21), INT8_C( 39), INT8_C( 93), INT8_C( 108), INT8_C( 125)) }, { simde_mm512_set_epi8(INT8_C(-103), INT8_C(-124), INT8_C( 13), INT8_C( -80), INT8_C( -32), INT8_C( 123), INT8_C( 115), INT8_C( -49), INT8_C( 113), INT8_C( -51), INT8_C( 119), INT8_C( -67), INT8_C( -21), INT8_C( 19), INT8_C( -12), INT8_C( 29), INT8_C( 91), INT8_C( -64), INT8_C( 88), INT8_C(-105), INT8_C( 24), INT8_C( 31), INT8_C( 77), INT8_C( 92), INT8_C( -85), INT8_C( 48), INT8_C(-113), INT8_C(-114), INT8_C( 16), INT8_C( 18), INT8_C( -44), INT8_C( -66), INT8_C( 25), INT8_C(-105), INT8_C(-100), INT8_C( -91), INT8_C( 26), INT8_C(-123), INT8_C( -33), INT8_C( -37), INT8_C( 63), INT8_C( 13), INT8_C(-124), INT8_C( 41), INT8_C(-104), INT8_C( -59), INT8_C( -2), INT8_C( -54), INT8_C( 73), INT8_C( 3), INT8_C( -18), INT8_C( 10), INT8_C( 19), INT8_C( 58), INT8_C( -12), INT8_C( -75), INT8_C( -88), INT8_C( 59), INT8_C( 104), INT8_C( -40), INT8_C( -82), INT8_C( 42), INT8_C( -73), INT8_C( -94)), UINT64_C( 2633789449456316803), simde_mm512_set_epi8(INT8_C( 118), INT8_C( 44), INT8_C( -14), INT8_C( 84), INT8_C( -69), INT8_C( -48), INT8_C( 40), INT8_C( 86), INT8_C( -1), INT8_C( 121), INT8_C( -40), INT8_C( 44), INT8_C( -1), INT8_C( 38), INT8_C( -44), INT8_C( 38), INT8_C( 93), INT8_C(-107), INT8_C( -4), INT8_C( -61), INT8_C( -52), INT8_C( 22), INT8_C( -54), INT8_C(-125), INT8_C( -29), INT8_C( 25), INT8_C( -77), INT8_C( -68), INT8_C( 9), INT8_C( -35), INT8_C( -11), INT8_C( 53), INT8_C( 28), INT8_C( 61), INT8_C( -35), INT8_C(-106), INT8_C( -46), INT8_C( 121), INT8_C(-102), INT8_C( 121), INT8_C( -54), INT8_C( -60), INT8_C( 7), INT8_C( 2), INT8_C(-119), INT8_C( 111), INT8_C( -20), INT8_C( -58), INT8_C( -98), INT8_C( 83), INT8_C( 32), INT8_C( -49), INT8_C( -11), INT8_C( 48), INT8_C( 92), INT8_C( 47), INT8_C( -38), INT8_C( 11), INT8_C( 26), INT8_C( 90), INT8_C( -5), INT8_C( 73), INT8_C( 45), INT8_C( -2)), simde_mm512_set_epi8(INT8_C( -31), INT8_C( 126), INT8_C(-118), INT8_C( -57), INT8_C(-126), INT8_C( 29), INT8_C( 101), INT8_C( -15), INT8_C( -61), INT8_C( 118), INT8_C( 102), INT8_C( 12), INT8_C( -59), INT8_C( -41), INT8_C( -60), INT8_C( 46), INT8_C( 63), INT8_C( 78), INT8_C( 87), INT8_C( 18), INT8_C( 18), INT8_C( 32), INT8_C( -26), INT8_C( -6), INT8_C( -20), INT8_C(-123), INT8_C( 99), INT8_C( 65), INT8_C( 13), INT8_C( 25), INT8_C( 108), INT8_C(-121), INT8_C( 42), INT8_C( 0), INT8_C( 104), INT8_C( -17), INT8_C( 111), INT8_C(-110), INT8_C( -39), INT8_C( 125), INT8_C( -50), INT8_C( -47), INT8_C( 30), INT8_C( 92), INT8_C( 19), INT8_C( -8), INT8_C(-120), INT8_C( 127), INT8_C( -49), INT8_C( 23), INT8_C( 16), INT8_C( -64), INT8_C( -79), INT8_C( 116), INT8_C( -5), INT8_C( -50), INT8_C( 32), INT8_C( 22), INT8_C( -42), INT8_C( -3), INT8_C( 30), INT8_C( 64), INT8_C( 96), INT8_C( -66)), simde_mm512_set_epi8(INT8_C(-103), INT8_C(-124), INT8_C( 0), INT8_C( -80), INT8_C( -32), INT8_C( -14), INT8_C( 115), INT8_C( -49), INT8_C( 0), INT8_C( -51), INT8_C( 119), INT8_C( -67), INT8_C( 0), INT8_C( 0), INT8_C( -12), INT8_C( 44), INT8_C( 91), INT8_C( -64), INT8_C( 88), INT8_C( -35), INT8_C( -35), INT8_C( 31), INT8_C( 77), INT8_C( 0), INT8_C( 0), INT8_C( 48), INT8_C(-113), INT8_C(-114), INT8_C( -4), INT8_C( -54), INT8_C( -44), INT8_C( -66), INT8_C( 121), INT8_C( -58), INT8_C(-100), INT8_C( -91), INT8_C( 26), INT8_C( 0), INT8_C( -33), INT8_C( -37), INT8_C( 63), INT8_C( 13), INT8_C(-124), INT8_C( 41), INT8_C(-104), INT8_C( -59), INT8_C( 0), INT8_C( 28), INT8_C( 0), INT8_C( -38), INT8_C( -2), INT8_C( 0), INT8_C( 19), INT8_C( 58), INT8_C( -12), INT8_C( 0), INT8_C( -2), INT8_C( 59), INT8_C( 104), INT8_C( -40), INT8_C( -82), INT8_C( 42), INT8_C( -2), INT8_C( 0)) }, { simde_mm512_set_epi8(INT8_C( 117), INT8_C( 69), INT8_C( 121), INT8_C( -45), INT8_C( 30), INT8_C( -73), INT8_C( 2), INT8_C( -40), INT8_C( 95), INT8_C( -3), INT8_C( 16), INT8_C( -78), INT8_C(-128), INT8_C( -41), INT8_C( -66), INT8_C( 66), INT8_C( 93), INT8_C( -32), INT8_C( -50), INT8_C( -2), INT8_C( -61), INT8_C( 29), INT8_C( -88), INT8_C(-118), INT8_C( -27), INT8_C( 42), INT8_C( 78), INT8_C( -46), INT8_C( -79), INT8_C( 38), INT8_C( -75), INT8_C( 14), INT8_C(-118), INT8_C(-114), INT8_C(-120), INT8_C( 42), INT8_C( 4), INT8_C( 79), INT8_C( -84), INT8_C(-110), INT8_C( 0), INT8_C( 85), INT8_C( -20), INT8_C( 61), INT8_C( 40), INT8_C( -75), INT8_C( 69), INT8_C( -99), INT8_C( 73), INT8_C( -34), INT8_C( -14), INT8_C( 72), INT8_C( 101), INT8_C( 96), INT8_C(-114), INT8_C(-124), INT8_C( -87), INT8_C( 43), INT8_C(-118), INT8_C( 114), INT8_C( -88), INT8_C( 8), INT8_C( -52), INT8_C( 75)), UINT64_C(14890918166471265655), simde_mm512_set_epi8(INT8_C( -56), INT8_C( -70), INT8_C( 66), INT8_C(-111), INT8_C( -69), INT8_C( 15), INT8_C( -82), INT8_C( -12), INT8_C( -89), INT8_C( 37), INT8_C( 80), INT8_C( 120), INT8_C( -83), INT8_C(-120), INT8_C( 95), INT8_C( 21), INT8_C( 91), INT8_C( -97), INT8_C( -72), INT8_C( 42), INT8_C( 22), INT8_C( -70), INT8_C( 71), INT8_C( -78), INT8_C( -5), INT8_C( 52), INT8_C( -22), INT8_C( -34), INT8_C( 16), INT8_C( 92), INT8_C( 91), INT8_C( -72), INT8_C( 3), INT8_C( -31), INT8_C( -95), INT8_C( -56), INT8_C( -50), INT8_C( 68), INT8_C( -24), INT8_C( -50), INT8_C( 94), INT8_C( 67), INT8_C( 108), INT8_C(-118), INT8_C( -65), INT8_C( 31), INT8_C( 70), INT8_C( 108), INT8_C( -66), INT8_C( 107), INT8_C( -85), INT8_C( 38), INT8_C( 2), INT8_C( 32), INT8_C( 56), INT8_C( 66), INT8_C( -2), INT8_C( -74), INT8_C( 112), INT8_C( 10), INT8_C( 64), INT8_C( 100), INT8_C( -55), INT8_C( 83)), simde_mm512_set_epi8(INT8_C( -69), INT8_C( -45), INT8_C( 23), INT8_C( 70), INT8_C( 22), INT8_C( 113), INT8_C( 75), INT8_C(-117), INT8_C( 69), INT8_C( -74), INT8_C( 40), INT8_C( -39), INT8_C( -51), INT8_C( -91), INT8_C( 25), INT8_C( -4), INT8_C( -12), INT8_C( -13), INT8_C( -77), INT8_C( 75), INT8_C(-116), INT8_C( -36), INT8_C( 38), INT8_C( -52), INT8_C( 13), INT8_C( -52), INT8_C(-100), INT8_C( -46), INT8_C( 13), INT8_C( 83), INT8_C( -94), INT8_C( 102), INT8_C( 116), INT8_C( -60), INT8_C( 44), INT8_C( -7), INT8_C( 104), INT8_C( -50), INT8_C( -30), INT8_C( 17), INT8_C( 13), INT8_C(-107), INT8_C( 111), INT8_C( -13), INT8_C( 17), INT8_C( -45), INT8_C( -31), INT8_C( 76), INT8_C(-102), INT8_C( -71), INT8_C( 116), INT8_C( 72), INT8_C( -11), INT8_C( -97), INT8_C( 93), INT8_C( 48), INT8_C( -75), INT8_C( -90), INT8_C( -10), INT8_C( -78), INT8_C( -8), INT8_C( -10), INT8_C( 48), INT8_C( -11)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 121), INT8_C( -45), INT8_C( 37), INT8_C( 95), INT8_C( -69), INT8_C( -40), INT8_C( 80), INT8_C( -3), INT8_C( -12), INT8_C( -78), INT8_C(-128), INT8_C( 0), INT8_C( -82), INT8_C( 0), INT8_C( 93), INT8_C( -32), INT8_C( 0), INT8_C( -2), INT8_C( 0), INT8_C( 29), INT8_C( 52), INT8_C( 0), INT8_C( -27), INT8_C( 42), INT8_C( 0), INT8_C( -46), INT8_C( -72), INT8_C( 16), INT8_C( -75), INT8_C( 52), INT8_C(-118), INT8_C( 0), INT8_C(-120), INT8_C( 42), INT8_C( -50), INT8_C( 79), INT8_C( 0), INT8_C( 70), INT8_C( -95), INT8_C( 0), INT8_C( 3), INT8_C( 61), INT8_C( 70), INT8_C( 0), INT8_C( 0), INT8_C( -99), INT8_C( 73), INT8_C( 0), INT8_C( -14), INT8_C( 72), INT8_C( 0), INT8_C( 0), INT8_C(-114), INT8_C( 83), INT8_C( -87), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -88), INT8_C( 0), INT8_C( 83), INT8_C( 0)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_shuffle_epi8(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_shuffle_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask64 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { UINT64_C(17286015531074160252), simde_mm512_set_epi8(INT8_C(-115), INT8_C( -27), INT8_C( 62), INT8_C( -85), INT8_C( 49), INT8_C(-115), INT8_C( 38), INT8_C( 4), INT8_C( 92), INT8_C( 116), INT8_C( -78), INT8_C( -19), INT8_C( -73), INT8_C( 22), INT8_C( -66), INT8_C( -29), INT8_C( 55), INT8_C( 78), INT8_C( -45), INT8_C(-119), INT8_C( 30), INT8_C( -29), INT8_C( -23), INT8_C( 89), INT8_C( 15), INT8_C( 119), INT8_C( -88), INT8_C( 109), INT8_C( -16), INT8_C( -10), INT8_C( 0), INT8_C( -18), INT8_C( 1), INT8_C( 116), INT8_C( 62), INT8_C( -73), INT8_C(-122), INT8_C( 0), INT8_C( 111), INT8_C( 95), INT8_C( -20), INT8_C( 3), INT8_C( 122), INT8_C(-121), INT8_C( 13), INT8_C( 25), INT8_C( -35), INT8_C(-127), INT8_C( 82), INT8_C( 90), INT8_C( 53), INT8_C( 123), INT8_C( 73), INT8_C( 108), INT8_C( -18), INT8_C( 15), INT8_C( 81), INT8_C( -27), INT8_C( 102), INT8_C( 118), INT8_C( 39), INT8_C( -77), INT8_C( 45), INT8_C( 81)), simde_mm512_set_epi8(INT8_C( 68), INT8_C(-108), INT8_C(-121), INT8_C(-115), INT8_C( -59), INT8_C( -78), INT8_C(-111), INT8_C( 56), INT8_C( 9), INT8_C(-121), INT8_C( 120), INT8_C( -36), INT8_C( 36), INT8_C(-103), INT8_C(-104), INT8_C( 25), INT8_C(-111), INT8_C( -63), INT8_C( -35), INT8_C(-120), INT8_C( 28), INT8_C( -44), INT8_C( -26), INT8_C( -86), INT8_C( -13), INT8_C( 53), INT8_C( -88), INT8_C(-107), INT8_C( 68), INT8_C( 42), INT8_C(-118), INT8_C( 111), INT8_C( 54), INT8_C( -58), INT8_C( -13), INT8_C( 27), INT8_C( 23), INT8_C( 41), INT8_C(-119), INT8_C( 44), INT8_C( 7), INT8_C(-120), INT8_C( 32), INT8_C( -43), INT8_C( 114), INT8_C( -72), INT8_C( 73), INT8_C( -96), INT8_C( 96), INT8_C( 110), INT8_C( -81), INT8_C( -76), INT8_C( 103), INT8_C(-100), INT8_C( -22), INT8_C( 18), INT8_C( 115), INT8_C( 54), INT8_C( -40), INT8_C( 125), INT8_C( 110), INT8_C( 31), INT8_C( 51), INT8_C(-104)), simde_mm512_set_epi8(INT8_C( -19), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 4), INT8_C( 38), INT8_C( 0), INT8_C( 4), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 109), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -20), INT8_C( 111), INT8_C( 0), INT8_C( -73), INT8_C( -20), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 25), INT8_C( 0), INT8_C( 111), INT8_C( 0), INT8_C( 81), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -27), INT8_C( 0), INT8_C( 53), INT8_C( 90), INT8_C( 82), INT8_C( 0), INT8_C( 0)) }, { UINT64_C( 9031211210081693899), simde_mm512_set_epi8(INT8_C(-106), INT8_C( -18), INT8_C( 65), INT8_C(-104), INT8_C( 81), INT8_C( 38), INT8_C( -84), INT8_C( -2), INT8_C( -14), INT8_C( 85), INT8_C( -80), INT8_C( 80), INT8_C( 48), INT8_C( 93), INT8_C( 79), INT8_C( 127), INT8_C( 16), INT8_C( 41), INT8_C( 54), INT8_C(-116), INT8_C( 17), INT8_C( 42), INT8_C( -86), INT8_C( 38), INT8_C(-118), INT8_C( -56), INT8_C( 60), INT8_C( 19), INT8_C(-100), INT8_C(-107), INT8_C( 105), INT8_C( -76), INT8_C( 10), INT8_C( -9), INT8_C( -12), INT8_C( -56), INT8_C( -71), INT8_C( 96), INT8_C( 6), INT8_C( 24), INT8_C( 69), INT8_C( 73), INT8_C( -27), INT8_C( 3), INT8_C( -95), INT8_C( 50), INT8_C( -67), INT8_C( -39), INT8_C( 97), INT8_C( 62), INT8_C( 101), INT8_C( 56), INT8_C( -79), INT8_C( 83), INT8_C( -99), INT8_C( -56), INT8_C( -17), INT8_C( -56), INT8_C( -8), INT8_C( -16), INT8_C( 84), INT8_C( -95), INT8_C( -73), INT8_C( 54)), simde_mm512_set_epi8(INT8_C( 63), INT8_C( -51), INT8_C( 87), INT8_C( 32), INT8_C( -65), INT8_C( 55), INT8_C( 14), INT8_C( 81), INT8_C(-123), INT8_C(-100), INT8_C( -39), INT8_C( -44), INT8_C( 22), INT8_C( 112), INT8_C( 16), INT8_C( 15), INT8_C(-104), INT8_C(-102), INT8_C( 102), INT8_C( -95), INT8_C( 69), INT8_C( 13), INT8_C( -21), INT8_C( 45), INT8_C( 62), INT8_C(-120), INT8_C(-111), INT8_C( 32), INT8_C(-107), INT8_C( -30), INT8_C( 99), INT8_C( -64), INT8_C( 8), INT8_C( -42), INT8_C( 81), INT8_C( -34), INT8_C( -46), INT8_C( 26), INT8_C( 31), INT8_C( -2), INT8_C( 68), INT8_C( -7), INT8_C( -71), INT8_C( 46), INT8_C( -21), INT8_C( -73), INT8_C( 21), INT8_C( 83), INT8_C(-108), INT8_C( -97), INT8_C( -69), INT8_C( 73), INT8_C( 57), INT8_C( -37), INT8_C( 21), INT8_C( 82), INT8_C(-119), INT8_C(-126), INT8_C( 126), INT8_C( 91), INT8_C( 115), INT8_C( 31), INT8_C( -79), INT8_C( 28)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( -14), INT8_C( 127), INT8_C( 0), INT8_C( -14), INT8_C( 0), INT8_C( 79), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C(-106), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 60), INT8_C( 54), INT8_C( 0), INT8_C( 0), INT8_C( 41), INT8_C( 0), INT8_C( 0), INT8_C( -76), INT8_C( 0), INT8_C( 0), INT8_C(-100), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 96), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -9), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -95), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -99), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 84), INT8_C( 0), INT8_C( 0), INT8_C( 56)) }, { UINT64_C( 6828010367541473016), simde_mm512_set_epi8(INT8_C( 84), INT8_C( -96), INT8_C( 87), INT8_C( -22), INT8_C( -5), INT8_C( -3), INT8_C(-127), INT8_C( 41), INT8_C( 74), INT8_C( 72), INT8_C( -4), INT8_C( 28), INT8_C(-115), INT8_C( 93), INT8_C( 102), INT8_C( 44), INT8_C(-103), INT8_C( -29), INT8_C( -50), INT8_C( 48), INT8_C( -96), INT8_C( -50), INT8_C( 46), INT8_C( -65), INT8_C( 4), INT8_C( 43), INT8_C( -75), INT8_C( 97), INT8_C( -26), INT8_C(-103), INT8_C( 71), INT8_C(-107), INT8_C( 91), INT8_C( 45), INT8_C( -11), INT8_C( 47), INT8_C( 29), INT8_C( 25), INT8_C( 26), INT8_C( -9), INT8_C( 10), INT8_C( 36), INT8_C( -79), INT8_C( -53), INT8_C( 41), INT8_C( 1), INT8_C( -23), INT8_C( -63), INT8_C(-127), INT8_C( 68), INT8_C( 48), INT8_C( 36), INT8_C( 89), INT8_C(-112), INT8_C( -31), INT8_C( 120), INT8_C( 35), INT8_C( 62), INT8_C( -21), INT8_C(-114), INT8_C(-104), INT8_C( 57), INT8_C( 42), INT8_C(-111)), simde_mm512_set_epi8(INT8_C( 96), INT8_C( -64), INT8_C( -23), INT8_C( -46), INT8_C( -29), INT8_C( -71), INT8_C( 71), INT8_C( 90), INT8_C( 44), INT8_C( -92), INT8_C( -31), INT8_C( 26), INT8_C( -99), INT8_C( -53), INT8_C( 117), INT8_C( 18), INT8_C( -63), INT8_C( 45), INT8_C( 12), INT8_C( 24), INT8_C(-108), INT8_C( 18), INT8_C( -60), INT8_C( 28), INT8_C( 50), INT8_C( -11), INT8_C( -68), INT8_C( -31), INT8_C( 105), INT8_C(-106), INT8_C( 98), INT8_C( 51), INT8_C( 58), INT8_C( 103), INT8_C( 111), INT8_C(-127), INT8_C( 68), INT8_C( -56), INT8_C( 124), INT8_C(-119), INT8_C( 74), INT8_C( -62), INT8_C(-116), INT8_C( 37), INT8_C( -12), INT8_C( 114), INT8_C( 0), INT8_C( 61), INT8_C( 103), INT8_C( -4), INT8_C(-105), INT8_C( -68), INT8_C( 39), INT8_C(-100), INT8_C( -93), INT8_C( 11), INT8_C( -80), INT8_C( -19), INT8_C( -22), INT8_C( -39), INT8_C( 127), INT8_C( -38), INT8_C(-125), INT8_C(-111)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 74), INT8_C( 0), INT8_C( -22), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 93), INT8_C( 0), INT8_C( -50), INT8_C( 48), INT8_C( -65), INT8_C( 0), INT8_C(-103), INT8_C( 0), INT8_C( 48), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 25), INT8_C( 0), INT8_C( 91), INT8_C( 0), INT8_C( -53), INT8_C( 0), INT8_C( 47), INT8_C( 0), INT8_C( 25), INT8_C( 0), INT8_C( 0), INT8_C( -79), INT8_C( 0), INT8_C( 1), INT8_C( -63), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 35), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-127), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { UINT64_C(18325019528117086600), simde_mm512_set_epi8(INT8_C( 122), INT8_C( -54), INT8_C( 50), INT8_C( 123), INT8_C( -31), INT8_C( -74), INT8_C( -64), INT8_C( 54), INT8_C( -81), INT8_C( 60), INT8_C( 31), INT8_C( -23), INT8_C( 108), INT8_C(-119), INT8_C( -92), INT8_C( -80), INT8_C( -30), INT8_C( -37), INT8_C( 51), INT8_C( -36), INT8_C( 8), INT8_C( 52), INT8_C( 97), INT8_C( 123), INT8_C( -49), INT8_C(-124), INT8_C( 95), INT8_C( -83), INT8_C( 70), INT8_C( -50), INT8_C( -61), INT8_C( 25), INT8_C( -97), INT8_C( 28), INT8_C( -58), INT8_C( 11), INT8_C( -14), INT8_C( 126), INT8_C( 81), INT8_C( 45), INT8_C( -23), INT8_C( 120), INT8_C( -83), INT8_C( -16), INT8_C( 7), INT8_C( 51), INT8_C( -57), INT8_C( -50), INT8_C( -21), INT8_C( 98), INT8_C( 88), INT8_C( 0), INT8_C( -66), INT8_C( 3), INT8_C( 124), INT8_C(-113), INT8_C( 50), INT8_C( 88), INT8_C( -85), INT8_C( -93), INT8_C( -44), INT8_C( -13), INT8_C( -94), INT8_C( 17)), simde_mm512_set_epi8(INT8_C( -13), INT8_C( -12), INT8_C(-124), INT8_C( 96), INT8_C( -13), INT8_C( -98), INT8_C( 99), INT8_C( -13), INT8_C( 56), INT8_C( -45), INT8_C(-108), INT8_C( -19), INT8_C(-124), INT8_C( -27), INT8_C( 22), INT8_C( 126), INT8_C(-106), INT8_C( -68), INT8_C( -60), INT8_C( 8), INT8_C( 60), INT8_C( 93), INT8_C( -33), INT8_C( -27), INT8_C( -7), INT8_C( 27), INT8_C(-122), INT8_C( -38), INT8_C( 23), INT8_C( 6), INT8_C( 45), INT8_C( -21), INT8_C( -23), INT8_C(-101), INT8_C( 116), INT8_C( 127), INT8_C( 96), INT8_C( 40), INT8_C( -97), INT8_C( 40), INT8_C( 86), INT8_C( -44), INT8_C( 70), INT8_C( -71), INT8_C( 62), INT8_C( -21), INT8_C( 66), INT8_C( 68), INT8_C( -87), INT8_C( -61), INT8_C( 48), INT8_C( -70), INT8_C( 18), INT8_C( -78), INT8_C( -98), INT8_C( 117), INT8_C( 74), INT8_C( 32), INT8_C( 93), INT8_C( 125), INT8_C( -47), INT8_C( -60), INT8_C( -86), INT8_C( 117)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -80), INT8_C( 0), INT8_C( 0), INT8_C( 108), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 60), INT8_C( -54), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -36), INT8_C( 51), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -49), INT8_C( 0), INT8_C( 51), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -16), INT8_C( -97), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 45), INT8_C( 120), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 51), INT8_C( -16), INT8_C( 0), INT8_C( 0), INT8_C( 17), INT8_C( 0), INT8_C( -13), INT8_C( 0), INT8_C( 0), INT8_C( -85), INT8_C( 3), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { UINT64_C( 7677112093376593822), simde_mm512_set_epi8(INT8_C( -53), INT8_C( 34), INT8_C( 9), INT8_C( 64), INT8_C( -61), INT8_C( -25), INT8_C(-115), INT8_C( 100), INT8_C( 65), INT8_C( 8), INT8_C( 69), INT8_C( -8), INT8_C( -15), INT8_C( -51), INT8_C( 1), INT8_C( 90), INT8_C( 115), INT8_C( 51), INT8_C( -91), INT8_C( 56), INT8_C( 64), INT8_C( -39), INT8_C(-119), INT8_C( -28), INT8_C( -54), INT8_C( 28), INT8_C( 54), INT8_C( -8), INT8_C( -54), INT8_C(-128), INT8_C( -28), INT8_C( -71), INT8_C( 107), INT8_C( -66), INT8_C(-114), INT8_C( -88), INT8_C( 34), INT8_C( -83), INT8_C( -21), INT8_C( -64), INT8_C( 121), INT8_C( -20), INT8_C( -89), INT8_C( -94), INT8_C( 112), INT8_C( -27), INT8_C( 81), INT8_C( -54), INT8_C( -64), INT8_C(-114), INT8_C( 48), INT8_C( -89), INT8_C( -61), INT8_C( 26), INT8_C( 43), INT8_C( 29), INT8_C( 0), INT8_C( 64), INT8_C( 123), INT8_C( -67), INT8_C( 15), INT8_C( 120), INT8_C( 36), INT8_C( 40)), simde_mm512_set_epi8(INT8_C( 113), INT8_C( 32), INT8_C( 71), INT8_C( 55), INT8_C( -26), INT8_C( 82), INT8_C( -81), INT8_C( -20), INT8_C( -55), INT8_C( 112), INT8_C( 45), INT8_C( 37), INT8_C( 67), INT8_C( -69), INT8_C( 64), INT8_C( 39), INT8_C( 72), INT8_C( 45), INT8_C( 120), INT8_C( -5), INT8_C(-109), INT8_C( 62), INT8_C( 17), INT8_C( 31), INT8_C( -30), INT8_C( -58), INT8_C( 56), INT8_C( 21), INT8_C( 72), INT8_C( -74), INT8_C( -40), INT8_C( 120), INT8_C( 95), INT8_C( 108), INT8_C( 32), INT8_C( 64), INT8_C(-128), INT8_C( 102), INT8_C( -21), INT8_C( 28), INT8_C( 105), INT8_C( 52), INT8_C( 85), INT8_C(-104), INT8_C( 57), INT8_C( -31), INT8_C( -38), INT8_C(-124), INT8_C(-107), INT8_C( -2), INT8_C( 55), INT8_C( 46), INT8_C( -71), INT8_C( 77), INT8_C( 18), INT8_C( 70), INT8_C( 89), INT8_C( 125), INT8_C( -42), INT8_C(-125), INT8_C( 121), INT8_C( -11), INT8_C( -69), INT8_C( -59)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 90), INT8_C( 65), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -15), INT8_C( 0), INT8_C( 90), INT8_C( 0), INT8_C( -28), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 51), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -54), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -21), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-114), INT8_C( 0), INT8_C( 48), INT8_C( 120), INT8_C( 64), INT8_C( 43), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 43), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { UINT64_C( 4840581956081845689), simde_mm512_set_epi8(INT8_C( 122), INT8_C(-108), INT8_C( -70), INT8_C( -63), INT8_C( 56), INT8_C( 71), INT8_C( -14), INT8_C( 49), INT8_C( -73), INT8_C( 53), INT8_C( -29), INT8_C( 3), INT8_C( -73), INT8_C( 43), INT8_C( -22), INT8_C( 85), INT8_C( -26), INT8_C( -9), INT8_C( 66), INT8_C( 1), INT8_C( -13), INT8_C( 60), INT8_C(-119), INT8_C( -83), INT8_C(-122), INT8_C( -64), INT8_C( -83), INT8_C( -74), INT8_C( 119), INT8_C( -8), INT8_C( 12), INT8_C( 113), INT8_C( -12), INT8_C( -84), INT8_C( 6), INT8_C( 69), INT8_C( 2), INT8_C( -75), INT8_C( -34), INT8_C(-126), INT8_C( 3), INT8_C(-128), INT8_C( -9), INT8_C( 24), INT8_C( 11), INT8_C( -94), INT8_C( -32), INT8_C( 110), INT8_C( 33), INT8_C( -24), INT8_C( 125), INT8_C( 35), INT8_C(-103), INT8_C( -48), INT8_C( -22), INT8_C( 38), INT8_C( -81), INT8_C( 9), INT8_C( -11), INT8_C(-124), INT8_C( 71), INT8_C( 31), INT8_C( -42), INT8_C( 93)), simde_mm512_set_epi8(INT8_C( 53), INT8_C( 77), INT8_C( 63), INT8_C( -84), INT8_C( 27), INT8_C( 22), INT8_C( 3), INT8_C( -37), INT8_C( 65), INT8_C( 118), INT8_C(-126), INT8_C( 97), INT8_C( 109), INT8_C( 7), INT8_C(-114), INT8_C( -75), INT8_C(-125), INT8_C( 121), INT8_C(-128), INT8_C( 103), INT8_C( 0), INT8_C( 101), INT8_C( -41), INT8_C( 89), INT8_C( -83), INT8_C( -65), INT8_C( 9), INT8_C( -7), INT8_C( -63), INT8_C( 13), INT8_C( 105), INT8_C( 92), INT8_C( -18), INT8_C( -21), INT8_C(-102), INT8_C(-114), INT8_C( 74), INT8_C( 121), INT8_C( -45), INT8_C( 52), INT8_C( -63), INT8_C( -93), INT8_C( 98), INT8_C( 106), INT8_C(-109), INT8_C( -47), INT8_C( 37), INT8_C( 70), INT8_C( 100), INT8_C( 121), INT8_C( 18), INT8_C( 28), INT8_C(-117), INT8_C( 107), INT8_C( 3), INT8_C( -62), INT8_C( 42), INT8_C( 72), INT8_C( 91), INT8_C( 86), INT8_C( -72), INT8_C( 9), INT8_C( -80), INT8_C( 118)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( -70), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -73), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -70), INT8_C( -73), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-122), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-119), INT8_C( 0), INT8_C( 0), INT8_C(-119), INT8_C( 0), INT8_C( 0), INT8_C( 66), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -75), INT8_C( -34), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -94), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-124), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-103), INT8_C( 0), INT8_C( 0), INT8_C( -48), INT8_C( 0), INT8_C(-103), INT8_C( 9), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 9)) }, { UINT64_C(17498311407133456191), simde_mm512_set_epi8(INT8_C( 61), INT8_C( -89), INT8_C( 84), INT8_C( -94), INT8_C( 31), INT8_C(-116), INT8_C( -67), INT8_C(-102), INT8_C( -72), INT8_C( -91), INT8_C(-105), INT8_C(-108), INT8_C( -44), INT8_C( 74), INT8_C( -28), INT8_C( 124), INT8_C( 120), INT8_C( -41), INT8_C( -79), INT8_C( 122), INT8_C( 87), INT8_C(-119), INT8_C( 54), INT8_C( -2), INT8_C( -47), INT8_C( 84), INT8_C(-126), INT8_C( -64), INT8_C( 14), INT8_C( 11), INT8_C( 37), INT8_C( -23), INT8_C( 67), INT8_C( 124), INT8_C( 58), INT8_C( -94), INT8_C( 30), INT8_C( -33), INT8_C( 70), INT8_C( -24), INT8_C( 38), INT8_C( -97), INT8_C( -56), INT8_C( -60), INT8_C( -59), INT8_C( 65), INT8_C( -74), INT8_C( 45), INT8_C( -11), INT8_C( 55), INT8_C( -82), INT8_C( 12), INT8_C( 106), INT8_C( 22), INT8_C(-124), INT8_C( -4), INT8_C( 2), INT8_C( -81), INT8_C( 14), INT8_C( 90), INT8_C(-100), INT8_C(-122), INT8_C( -35), INT8_C( 81)), simde_mm512_set_epi8(INT8_C( 21), INT8_C( 108), INT8_C( 125), INT8_C( -60), INT8_C( -72), INT8_C( 74), INT8_C( -5), INT8_C( -58), INT8_C( -41), INT8_C( 22), INT8_C(-115), INT8_C( 102), INT8_C( 59), INT8_C( -80), INT8_C( -15), INT8_C( -63), INT8_C( 84), INT8_C( -71), INT8_C( 8), INT8_C( 12), INT8_C( -11), INT8_C( -76), INT8_C( 62), INT8_C( 93), INT8_C( -75), INT8_C( -77), INT8_C( -84), INT8_C(-108), INT8_C( -35), INT8_C( 14), INT8_C( -60), INT8_C( 18), INT8_C( 23), INT8_C( -60), INT8_C( -63), INT8_C(-114), INT8_C( -55), INT8_C( 75), INT8_C( -99), INT8_C( -55), INT8_C( 58), INT8_C( 76), INT8_C(-102), INT8_C(-118), INT8_C( -30), INT8_C( 39), INT8_C( 119), INT8_C( 85), INT8_C( -8), INT8_C( -72), INT8_C( -60), INT8_C( -94), INT8_C(-112), INT8_C( 119), INT8_C( 124), INT8_C( 76), INT8_C( -42), INT8_C(-124), INT8_C( 54), INT8_C( 74), INT8_C( -92), INT8_C( 99), INT8_C( 79), INT8_C( -3)), simde_mm512_set_epi8(INT8_C(-105), INT8_C( -94), INT8_C( 84), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -91), INT8_C( 0), INT8_C( -91), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -2), INT8_C( 122), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -79), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 11), INT8_C( 38), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 30), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -94), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 38), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 2), INT8_C( 12), INT8_C( 12), INT8_C( 0), INT8_C( 0), INT8_C( -81), INT8_C( 22), INT8_C( 0), INT8_C(-100), INT8_C( -11), INT8_C( 0)) }, { UINT64_C( 3462613925466476941), simde_mm512_set_epi8(INT8_C( 18), INT8_C( -74), INT8_C( 14), INT8_C( 36), INT8_C( -7), INT8_C( 113), INT8_C( 40), INT8_C( 48), INT8_C(-107), INT8_C( -34), INT8_C( -75), INT8_C( 85), INT8_C( -35), INT8_C(-116), INT8_C( 65), INT8_C( -21), INT8_C( 15), INT8_C( 3), INT8_C( 45), INT8_C( 21), INT8_C( 72), INT8_C( 93), INT8_C( 108), INT8_C( 125), INT8_C( 1), INT8_C( 75), INT8_C( 21), INT8_C( -36), INT8_C(-126), INT8_C( 122), INT8_C( 71), INT8_C( 76), INT8_C( 28), INT8_C( -56), INT8_C( 32), INT8_C( 101), INT8_C(-107), INT8_C(-111), INT8_C( -88), INT8_C( -19), INT8_C( -77), INT8_C( 19), INT8_C( -21), INT8_C(-111), INT8_C( -68), INT8_C( 82), INT8_C(-118), INT8_C( -76), INT8_C( 47), INT8_C( 127), INT8_C( 62), INT8_C( -16), INT8_C( 10), INT8_C( -14), INT8_C(-100), INT8_C( 86), INT8_C( 29), INT8_C( 107), INT8_C( 56), INT8_C( 21), INT8_C( 24), INT8_C( 68), INT8_C( -96), INT8_C( 64)), simde_mm512_set_epi8(INT8_C( -57), INT8_C( 94), INT8_C(-125), INT8_C(-124), INT8_C( 36), INT8_C( 57), INT8_C( 68), INT8_C( -52), INT8_C( 39), INT8_C( 50), INT8_C( -48), INT8_C( 94), INT8_C( 53), INT8_C( 11), INT8_C( 29), INT8_C( 65), INT8_C( -82), INT8_C(-119), INT8_C( 115), INT8_C( 68), INT8_C( -69), INT8_C( 118), INT8_C( 17), INT8_C( 8), INT8_C( -64), INT8_C( -53), INT8_C( -42), INT8_C( 126), INT8_C( 67), INT8_C( 50), INT8_C( -18), INT8_C( 76), INT8_C( -19), INT8_C( 123), INT8_C( -87), INT8_C( 106), INT8_C( -74), INT8_C( 44), INT8_C( 117), INT8_C( 103), INT8_C( 81), INT8_C( 122), INT8_C( 56), INT8_C( -10), INT8_C( 67), INT8_C( 79), INT8_C( 83), INT8_C( -38), INT8_C( -13), INT8_C( 43), INT8_C( 27), INT8_C( -97), INT8_C( 102), INT8_C( 126), INT8_C( 38), INT8_C( -62), INT8_C( -24), INT8_C( 117), INT8_C( -38), INT8_C( -93), INT8_C( -58), INT8_C(-124), INT8_C( -75), INT8_C( 10)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -75), INT8_C( -7), INT8_C( 0), INT8_C( 65), INT8_C( 0), INT8_C( 0), INT8_C(-126), INT8_C( 0), INT8_C( 0), INT8_C( 75), INT8_C( 0), INT8_C( 125), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 122), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-107), INT8_C( 0), INT8_C(-111), INT8_C( 0), INT8_C( 101), INT8_C( 0), INT8_C( -77), INT8_C(-118), INT8_C(-111), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -14)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_shuffle_epi8(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_shuffle_f32x4(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde_float32 a[16]; const simde_float32 b[16]; const simde_float32 r0[16]; const simde_float32 r1[16]; const simde_float32 r2[16]; const simde_float32 r3[16]; } test_vec[4] = { { { SIMDE_FLOAT32_C( -180.16), SIMDE_FLOAT32_C( -789.86), SIMDE_FLOAT32_C( 107.29), SIMDE_FLOAT32_C( 489.11), SIMDE_FLOAT32_C( 914.56), SIMDE_FLOAT32_C( 797.57), SIMDE_FLOAT32_C( 982.30), SIMDE_FLOAT32_C( 320.59) }, { SIMDE_FLOAT32_C( -128.27), SIMDE_FLOAT32_C( -299.60), SIMDE_FLOAT32_C( 846.68), SIMDE_FLOAT32_C( 701.28), SIMDE_FLOAT32_C( -495.11), SIMDE_FLOAT32_C( -859.93), SIMDE_FLOAT32_C( 964.29), SIMDE_FLOAT32_C( 599.86) }, { SIMDE_FLOAT32_C( -180.16), SIMDE_FLOAT32_C( -789.86), SIMDE_FLOAT32_C( 107.29), SIMDE_FLOAT32_C( 489.11), SIMDE_FLOAT32_C( -128.27), SIMDE_FLOAT32_C( -299.60), SIMDE_FLOAT32_C( 846.68), SIMDE_FLOAT32_C( 701.28) }, { SIMDE_FLOAT32_C( 914.56), SIMDE_FLOAT32_C( 797.57), SIMDE_FLOAT32_C( 982.30), SIMDE_FLOAT32_C( 320.59), SIMDE_FLOAT32_C( -128.27), SIMDE_FLOAT32_C( -299.60), SIMDE_FLOAT32_C( 846.68), SIMDE_FLOAT32_C( 701.28) }, { SIMDE_FLOAT32_C( -180.16), SIMDE_FLOAT32_C( -789.86), SIMDE_FLOAT32_C( 107.29), SIMDE_FLOAT32_C( 489.11), SIMDE_FLOAT32_C( -495.11), SIMDE_FLOAT32_C( -859.93), SIMDE_FLOAT32_C( 964.29), SIMDE_FLOAT32_C( 599.86) }, { SIMDE_FLOAT32_C( 914.56), SIMDE_FLOAT32_C( 797.57), SIMDE_FLOAT32_C( 982.30), SIMDE_FLOAT32_C( 320.59), SIMDE_FLOAT32_C( -495.11), SIMDE_FLOAT32_C( -859.93), SIMDE_FLOAT32_C( 964.29), SIMDE_FLOAT32_C( 599.86) } }, { { SIMDE_FLOAT32_C( 619.10), SIMDE_FLOAT32_C( -510.63), SIMDE_FLOAT32_C( 541.95), SIMDE_FLOAT32_C( 148.21), SIMDE_FLOAT32_C( -639.14), SIMDE_FLOAT32_C( -554.80), SIMDE_FLOAT32_C( 761.16), SIMDE_FLOAT32_C( -78.91) }, { SIMDE_FLOAT32_C( 645.85), SIMDE_FLOAT32_C( 405.43), SIMDE_FLOAT32_C( 111.06), SIMDE_FLOAT32_C( -405.34), SIMDE_FLOAT32_C( -37.23), SIMDE_FLOAT32_C( -49.39), SIMDE_FLOAT32_C( 669.49), SIMDE_FLOAT32_C( 782.61) }, { SIMDE_FLOAT32_C( 619.10), SIMDE_FLOAT32_C( -510.63), SIMDE_FLOAT32_C( 541.95), SIMDE_FLOAT32_C( 148.21), SIMDE_FLOAT32_C( 645.85), SIMDE_FLOAT32_C( 405.43), SIMDE_FLOAT32_C( 111.06), SIMDE_FLOAT32_C( -405.34) }, { SIMDE_FLOAT32_C( -639.14), SIMDE_FLOAT32_C( -554.80), SIMDE_FLOAT32_C( 761.16), SIMDE_FLOAT32_C( -78.91), SIMDE_FLOAT32_C( 645.85), SIMDE_FLOAT32_C( 405.43), SIMDE_FLOAT32_C( 111.06), SIMDE_FLOAT32_C( -405.34) }, { SIMDE_FLOAT32_C( 619.10), SIMDE_FLOAT32_C( -510.63), SIMDE_FLOAT32_C( 541.95), SIMDE_FLOAT32_C( 148.21), SIMDE_FLOAT32_C( -37.23), SIMDE_FLOAT32_C( -49.39), SIMDE_FLOAT32_C( 669.49), SIMDE_FLOAT32_C( 782.61) }, { SIMDE_FLOAT32_C( -639.14), SIMDE_FLOAT32_C( -554.80), SIMDE_FLOAT32_C( 761.16), SIMDE_FLOAT32_C( -78.91), SIMDE_FLOAT32_C( -37.23), SIMDE_FLOAT32_C( -49.39), SIMDE_FLOAT32_C( 669.49), SIMDE_FLOAT32_C( 782.61) } }, { { SIMDE_FLOAT32_C( 160.75), SIMDE_FLOAT32_C( -223.22), SIMDE_FLOAT32_C( 271.72), SIMDE_FLOAT32_C( 75.31), SIMDE_FLOAT32_C( -425.65), SIMDE_FLOAT32_C( 254.02), SIMDE_FLOAT32_C( -604.10), SIMDE_FLOAT32_C( 446.08) }, { SIMDE_FLOAT32_C( 954.41), SIMDE_FLOAT32_C( -757.42), SIMDE_FLOAT32_C( 147.36), SIMDE_FLOAT32_C( -540.70), SIMDE_FLOAT32_C( -617.36), SIMDE_FLOAT32_C( 111.65), SIMDE_FLOAT32_C( -940.84), SIMDE_FLOAT32_C( -998.26) }, { SIMDE_FLOAT32_C( 160.75), SIMDE_FLOAT32_C( -223.22), SIMDE_FLOAT32_C( 271.72), SIMDE_FLOAT32_C( 75.31), SIMDE_FLOAT32_C( 954.41), SIMDE_FLOAT32_C( -757.42), SIMDE_FLOAT32_C( 147.36), SIMDE_FLOAT32_C( -540.70) }, { SIMDE_FLOAT32_C( -425.65), SIMDE_FLOAT32_C( 254.02), SIMDE_FLOAT32_C( -604.10), SIMDE_FLOAT32_C( 446.08), SIMDE_FLOAT32_C( 954.41), SIMDE_FLOAT32_C( -757.42), SIMDE_FLOAT32_C( 147.36), SIMDE_FLOAT32_C( -540.70) }, { SIMDE_FLOAT32_C( 160.75), SIMDE_FLOAT32_C( -223.22), SIMDE_FLOAT32_C( 271.72), SIMDE_FLOAT32_C( 75.31), SIMDE_FLOAT32_C( -617.36), SIMDE_FLOAT32_C( 111.65), SIMDE_FLOAT32_C( -940.84), SIMDE_FLOAT32_C( -998.26) }, { SIMDE_FLOAT32_C( -425.65), SIMDE_FLOAT32_C( 254.02), SIMDE_FLOAT32_C( -604.10), SIMDE_FLOAT32_C( 446.08), SIMDE_FLOAT32_C( -617.36), SIMDE_FLOAT32_C( 111.65), SIMDE_FLOAT32_C( -940.84), SIMDE_FLOAT32_C( -998.26) } }, { { SIMDE_FLOAT32_C( 601.02), SIMDE_FLOAT32_C( 601.12), SIMDE_FLOAT32_C( 149.95), SIMDE_FLOAT32_C( 961.88), SIMDE_FLOAT32_C( -953.68), SIMDE_FLOAT32_C( -88.89), SIMDE_FLOAT32_C( -117.03), SIMDE_FLOAT32_C( 692.17) }, { SIMDE_FLOAT32_C( -683.46), SIMDE_FLOAT32_C( 994.03), SIMDE_FLOAT32_C( -713.17), SIMDE_FLOAT32_C( 279.31), SIMDE_FLOAT32_C( -55.35), SIMDE_FLOAT32_C( 956.32), SIMDE_FLOAT32_C( 61.92), SIMDE_FLOAT32_C( -894.60) }, { SIMDE_FLOAT32_C( 601.02), SIMDE_FLOAT32_C( 601.12), SIMDE_FLOAT32_C( 149.95), SIMDE_FLOAT32_C( 961.88), SIMDE_FLOAT32_C( -683.46), SIMDE_FLOAT32_C( 994.03), SIMDE_FLOAT32_C( -713.17), SIMDE_FLOAT32_C( 279.31) }, { SIMDE_FLOAT32_C( -953.68), SIMDE_FLOAT32_C( -88.89), SIMDE_FLOAT32_C( -117.03), SIMDE_FLOAT32_C( 692.17), SIMDE_FLOAT32_C( -683.46), SIMDE_FLOAT32_C( 994.03), SIMDE_FLOAT32_C( -713.17), SIMDE_FLOAT32_C( 279.31) }, { SIMDE_FLOAT32_C( 601.02), SIMDE_FLOAT32_C( 601.12), SIMDE_FLOAT32_C( 149.95), SIMDE_FLOAT32_C( 961.88), SIMDE_FLOAT32_C( -55.35), SIMDE_FLOAT32_C( 956.32), SIMDE_FLOAT32_C( 61.92), SIMDE_FLOAT32_C( -894.60) }, { SIMDE_FLOAT32_C( -953.68), SIMDE_FLOAT32_C( -88.89), SIMDE_FLOAT32_C( -117.03), SIMDE_FLOAT32_C( 692.17), SIMDE_FLOAT32_C( -55.35), SIMDE_FLOAT32_C( 956.32), SIMDE_FLOAT32_C( 61.92), SIMDE_FLOAT32_C( -894.60) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 b = simde_mm256_loadu_ps(test_vec[i].b); simde__m256 r; r = simde_mm256_shuffle_f32x4(a, b, 0); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r0), 1); r = simde_mm256_shuffle_f32x4(a, b, 1); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r1), 1); r = simde_mm256_shuffle_f32x4(a, b, 2); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r2), 1); r = simde_mm256_shuffle_f32x4(a, b, 3); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r3), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 4 ; i++) { simde__m256 a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m256 b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m256 r; simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_shuffle_f32x4(a, b, 0); simde_test_x86_write_f32x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_shuffle_f32x4(a, b, 1); simde_test_x86_write_f32x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_shuffle_f32x4(a, b, 2); simde_test_x86_write_f32x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_shuffle_f32x4(a, b, 3); simde_test_x86_write_f32x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask_shuffle_f32x4(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde_float32 src[8]; const simde__mmask8 k; const simde_float32 a[8]; const simde_float32 b[8]; const simde_float32 r0[8]; const simde_float32 r1[8]; const simde_float32 r2[8]; const simde_float32 r3[8]; } test_vec[4] = { { { SIMDE_FLOAT32_C( -410.56), SIMDE_FLOAT32_C( 408.21), SIMDE_FLOAT32_C( -376.06), SIMDE_FLOAT32_C( -599.18), SIMDE_FLOAT32_C( 884.20), SIMDE_FLOAT32_C( -695.71), SIMDE_FLOAT32_C( 448.37), SIMDE_FLOAT32_C( 207.77) }, UINT8_C(145), { SIMDE_FLOAT32_C( 700.72), SIMDE_FLOAT32_C( 963.37), SIMDE_FLOAT32_C( 188.56), SIMDE_FLOAT32_C( -599.52), SIMDE_FLOAT32_C( 860.41), SIMDE_FLOAT32_C( 527.11), SIMDE_FLOAT32_C( -761.05), SIMDE_FLOAT32_C( 372.33) }, { SIMDE_FLOAT32_C( -711.48), SIMDE_FLOAT32_C( -848.21), SIMDE_FLOAT32_C( 434.42), SIMDE_FLOAT32_C( 109.85), SIMDE_FLOAT32_C( 382.55), SIMDE_FLOAT32_C( 606.63), SIMDE_FLOAT32_C( -450.37), SIMDE_FLOAT32_C( -569.99) }, { SIMDE_FLOAT32_C( 700.72), SIMDE_FLOAT32_C( 408.21), SIMDE_FLOAT32_C( -376.06), SIMDE_FLOAT32_C( -599.18), SIMDE_FLOAT32_C( -711.48), SIMDE_FLOAT32_C( -695.71), SIMDE_FLOAT32_C( 448.37), SIMDE_FLOAT32_C( 109.85) }, { SIMDE_FLOAT32_C( 860.41), SIMDE_FLOAT32_C( 408.21), SIMDE_FLOAT32_C( -376.06), SIMDE_FLOAT32_C( -599.18), SIMDE_FLOAT32_C( -711.48), SIMDE_FLOAT32_C( -695.71), SIMDE_FLOAT32_C( 448.37), SIMDE_FLOAT32_C( 109.85) }, { SIMDE_FLOAT32_C( 700.72), SIMDE_FLOAT32_C( 408.21), SIMDE_FLOAT32_C( -376.06), SIMDE_FLOAT32_C( -599.18), SIMDE_FLOAT32_C( 382.55), SIMDE_FLOAT32_C( -695.71), SIMDE_FLOAT32_C( 448.37), SIMDE_FLOAT32_C( -569.99) }, { SIMDE_FLOAT32_C( 860.41), SIMDE_FLOAT32_C( 408.21), SIMDE_FLOAT32_C( -376.06), SIMDE_FLOAT32_C( -599.18), SIMDE_FLOAT32_C( 382.55), SIMDE_FLOAT32_C( -695.71), SIMDE_FLOAT32_C( 448.37), SIMDE_FLOAT32_C( -569.99) } }, { { SIMDE_FLOAT32_C( 673.95), SIMDE_FLOAT32_C( 135.70), SIMDE_FLOAT32_C( -124.67), SIMDE_FLOAT32_C( -944.44), SIMDE_FLOAT32_C( 326.62), SIMDE_FLOAT32_C( -646.51), SIMDE_FLOAT32_C( -355.00), SIMDE_FLOAT32_C( -265.17) }, UINT8_C( 56), { SIMDE_FLOAT32_C( 45.82), SIMDE_FLOAT32_C( -380.97), SIMDE_FLOAT32_C( 281.73), SIMDE_FLOAT32_C( -505.81), SIMDE_FLOAT32_C( 826.80), SIMDE_FLOAT32_C( 650.60), SIMDE_FLOAT32_C( -805.09), SIMDE_FLOAT32_C( 790.17) }, { SIMDE_FLOAT32_C( -160.84), SIMDE_FLOAT32_C( -404.61), SIMDE_FLOAT32_C( 650.58), SIMDE_FLOAT32_C( -633.73), SIMDE_FLOAT32_C( -165.66), SIMDE_FLOAT32_C( 22.91), SIMDE_FLOAT32_C( -345.22), SIMDE_FLOAT32_C( -13.87) }, { SIMDE_FLOAT32_C( 673.95), SIMDE_FLOAT32_C( 135.70), SIMDE_FLOAT32_C( -124.67), SIMDE_FLOAT32_C( -505.81), SIMDE_FLOAT32_C( -160.84), SIMDE_FLOAT32_C( -404.61), SIMDE_FLOAT32_C( -355.00), SIMDE_FLOAT32_C( -265.17) }, { SIMDE_FLOAT32_C( 673.95), SIMDE_FLOAT32_C( 135.70), SIMDE_FLOAT32_C( -124.67), SIMDE_FLOAT32_C( 790.17), SIMDE_FLOAT32_C( -160.84), SIMDE_FLOAT32_C( -404.61), SIMDE_FLOAT32_C( -355.00), SIMDE_FLOAT32_C( -265.17) }, { SIMDE_FLOAT32_C( 673.95), SIMDE_FLOAT32_C( 135.70), SIMDE_FLOAT32_C( -124.67), SIMDE_FLOAT32_C( -505.81), SIMDE_FLOAT32_C( -165.66), SIMDE_FLOAT32_C( 22.91), SIMDE_FLOAT32_C( -355.00), SIMDE_FLOAT32_C( -265.17) }, { SIMDE_FLOAT32_C( 673.95), SIMDE_FLOAT32_C( 135.70), SIMDE_FLOAT32_C( -124.67), SIMDE_FLOAT32_C( 790.17), SIMDE_FLOAT32_C( -165.66), SIMDE_FLOAT32_C( 22.91), SIMDE_FLOAT32_C( -355.00), SIMDE_FLOAT32_C( -265.17) } }, { { SIMDE_FLOAT32_C( -542.67), SIMDE_FLOAT32_C( 764.64), SIMDE_FLOAT32_C( -631.31), SIMDE_FLOAT32_C( -936.05), SIMDE_FLOAT32_C( -685.73), SIMDE_FLOAT32_C( -201.30), SIMDE_FLOAT32_C( 737.90), SIMDE_FLOAT32_C( 449.97) }, UINT8_C(251), { SIMDE_FLOAT32_C( 793.46), SIMDE_FLOAT32_C( -223.41), SIMDE_FLOAT32_C( -972.48), SIMDE_FLOAT32_C( -561.54), SIMDE_FLOAT32_C( 511.42), SIMDE_FLOAT32_C( 4.96), SIMDE_FLOAT32_C( 484.27), SIMDE_FLOAT32_C( -869.55) }, { SIMDE_FLOAT32_C( -713.31), SIMDE_FLOAT32_C( 978.46), SIMDE_FLOAT32_C( 957.25), SIMDE_FLOAT32_C( 937.29), SIMDE_FLOAT32_C( -826.63), SIMDE_FLOAT32_C( 747.42), SIMDE_FLOAT32_C( -223.55), SIMDE_FLOAT32_C( -231.24) }, { SIMDE_FLOAT32_C( 793.46), SIMDE_FLOAT32_C( -223.41), SIMDE_FLOAT32_C( -631.31), SIMDE_FLOAT32_C( -561.54), SIMDE_FLOAT32_C( -713.31), SIMDE_FLOAT32_C( 978.46), SIMDE_FLOAT32_C( 957.25), SIMDE_FLOAT32_C( 937.29) }, { SIMDE_FLOAT32_C( 511.42), SIMDE_FLOAT32_C( 4.96), SIMDE_FLOAT32_C( -631.31), SIMDE_FLOAT32_C( -869.55), SIMDE_FLOAT32_C( -713.31), SIMDE_FLOAT32_C( 978.46), SIMDE_FLOAT32_C( 957.25), SIMDE_FLOAT32_C( 937.29) }, { SIMDE_FLOAT32_C( 793.46), SIMDE_FLOAT32_C( -223.41), SIMDE_FLOAT32_C( -631.31), SIMDE_FLOAT32_C( -561.54), SIMDE_FLOAT32_C( -826.63), SIMDE_FLOAT32_C( 747.42), SIMDE_FLOAT32_C( -223.55), SIMDE_FLOAT32_C( -231.24) }, { SIMDE_FLOAT32_C( 511.42), SIMDE_FLOAT32_C( 4.96), SIMDE_FLOAT32_C( -631.31), SIMDE_FLOAT32_C( -869.55), SIMDE_FLOAT32_C( -826.63), SIMDE_FLOAT32_C( 747.42), SIMDE_FLOAT32_C( -223.55), SIMDE_FLOAT32_C( -231.24) } }, { { SIMDE_FLOAT32_C( 398.00), SIMDE_FLOAT32_C( 142.72), SIMDE_FLOAT32_C( 603.11), SIMDE_FLOAT32_C( -579.09), SIMDE_FLOAT32_C( 797.50), SIMDE_FLOAT32_C( -410.76), SIMDE_FLOAT32_C( -121.76), SIMDE_FLOAT32_C( 562.14) }, UINT8_C( 18), { SIMDE_FLOAT32_C( -57.81), SIMDE_FLOAT32_C( 876.41), SIMDE_FLOAT32_C( 756.63), SIMDE_FLOAT32_C( -319.91), SIMDE_FLOAT32_C( 326.38), SIMDE_FLOAT32_C( 430.66), SIMDE_FLOAT32_C( -526.45), SIMDE_FLOAT32_C( -897.02) }, { SIMDE_FLOAT32_C( 458.18), SIMDE_FLOAT32_C( -88.00), SIMDE_FLOAT32_C( 614.40), SIMDE_FLOAT32_C( -536.86), SIMDE_FLOAT32_C( -603.72), SIMDE_FLOAT32_C( 744.85), SIMDE_FLOAT32_C( -250.16), SIMDE_FLOAT32_C( -625.26) }, { SIMDE_FLOAT32_C( 398.00), SIMDE_FLOAT32_C( 876.41), SIMDE_FLOAT32_C( 603.11), SIMDE_FLOAT32_C( -579.09), SIMDE_FLOAT32_C( 458.18), SIMDE_FLOAT32_C( -410.76), SIMDE_FLOAT32_C( -121.76), SIMDE_FLOAT32_C( 562.14) }, { SIMDE_FLOAT32_C( 398.00), SIMDE_FLOAT32_C( 430.66), SIMDE_FLOAT32_C( 603.11), SIMDE_FLOAT32_C( -579.09), SIMDE_FLOAT32_C( 458.18), SIMDE_FLOAT32_C( -410.76), SIMDE_FLOAT32_C( -121.76), SIMDE_FLOAT32_C( 562.14) }, { SIMDE_FLOAT32_C( 398.00), SIMDE_FLOAT32_C( 876.41), SIMDE_FLOAT32_C( 603.11), SIMDE_FLOAT32_C( -579.09), SIMDE_FLOAT32_C( -603.72), SIMDE_FLOAT32_C( -410.76), SIMDE_FLOAT32_C( -121.76), SIMDE_FLOAT32_C( 562.14) }, { SIMDE_FLOAT32_C( 398.00), SIMDE_FLOAT32_C( 430.66), SIMDE_FLOAT32_C( 603.11), SIMDE_FLOAT32_C( -579.09), SIMDE_FLOAT32_C( -603.72), SIMDE_FLOAT32_C( -410.76), SIMDE_FLOAT32_C( -121.76), SIMDE_FLOAT32_C( 562.14) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 src = simde_mm256_loadu_ps(test_vec[i].src); simde__mmask8 k = test_vec[i].k; simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 b = simde_mm256_loadu_ps(test_vec[i].b); simde__m256 r; r = simde_mm256_mask_shuffle_f32x4(src, k, a, b, 0); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r0), 1); r = simde_mm256_mask_shuffle_f32x4(src, k, a, b, 1); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r1), 1); r = simde_mm256_mask_shuffle_f32x4(src, k, a, b, 2); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r2), 1); r = simde_mm256_mask_shuffle_f32x4(src, k, a, b, 3); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r3), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 4 ; i++) { simde__m256 src = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256 a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m256 b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m256 r; simde_test_x86_write_f32x8(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k,SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_mask_shuffle_f32x4(src, k, a, b, 0); simde_test_x86_write_f32x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_mask_shuffle_f32x4(src, k, a, b, 1); simde_test_x86_write_f32x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_mask_shuffle_f32x4(src, k, a, b, 2); simde_test_x86_write_f32x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_mask_shuffle_f32x4(src, k, a, b, 3); simde_test_x86_write_f32x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_maskz_shuffle_f32x4(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde__mmask8 k; const simde_float32 a[8]; const simde_float32 b[8]; const simde_float32 r0[8]; const simde_float32 r1[8]; const simde_float32 r2[8]; const simde_float32 r3[8]; } test_vec[4] = { { UINT8_C( 37), { SIMDE_FLOAT32_C( -503.55), SIMDE_FLOAT32_C( -649.65), SIMDE_FLOAT32_C( 910.61), SIMDE_FLOAT32_C( 771.78), SIMDE_FLOAT32_C( -605.93), SIMDE_FLOAT32_C( -622.75), SIMDE_FLOAT32_C( 832.26), SIMDE_FLOAT32_C( 766.22) }, { SIMDE_FLOAT32_C( -841.56), SIMDE_FLOAT32_C( 991.01), SIMDE_FLOAT32_C( -280.43), SIMDE_FLOAT32_C( -392.32), SIMDE_FLOAT32_C( -589.75), SIMDE_FLOAT32_C( -912.19), SIMDE_FLOAT32_C( -591.95), SIMDE_FLOAT32_C( -83.34) }, { SIMDE_FLOAT32_C( -503.55), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 910.61), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 991.01), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) }, { SIMDE_FLOAT32_C( -605.93), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 832.26), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 991.01), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) }, { SIMDE_FLOAT32_C( -503.55), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 910.61), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -912.19), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) }, { SIMDE_FLOAT32_C( -605.93), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 832.26), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -912.19), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { UINT8_C(137), { SIMDE_FLOAT32_C( 551.56), SIMDE_FLOAT32_C( -923.11), SIMDE_FLOAT32_C( -542.77), SIMDE_FLOAT32_C( -505.26), SIMDE_FLOAT32_C( 797.95), SIMDE_FLOAT32_C( -348.51), SIMDE_FLOAT32_C( 505.07), SIMDE_FLOAT32_C( 188.52) }, { SIMDE_FLOAT32_C( -7.34), SIMDE_FLOAT32_C( -957.28), SIMDE_FLOAT32_C( 510.02), SIMDE_FLOAT32_C( 615.87), SIMDE_FLOAT32_C( 148.64), SIMDE_FLOAT32_C( -767.81), SIMDE_FLOAT32_C( -887.67), SIMDE_FLOAT32_C( 499.00) }, { SIMDE_FLOAT32_C( 551.56), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -505.26), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 615.87) }, { SIMDE_FLOAT32_C( 797.95), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 188.52), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 615.87) }, { SIMDE_FLOAT32_C( 551.56), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -505.26), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 499.00) }, { SIMDE_FLOAT32_C( 797.95), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 188.52), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 499.00) } }, { UINT8_C(123), { SIMDE_FLOAT32_C( 884.11), SIMDE_FLOAT32_C( 893.06), SIMDE_FLOAT32_C( -479.94), SIMDE_FLOAT32_C( 716.37), SIMDE_FLOAT32_C( 659.28), SIMDE_FLOAT32_C( -321.51), SIMDE_FLOAT32_C( 707.38), SIMDE_FLOAT32_C( -621.15) }, { SIMDE_FLOAT32_C( 286.17), SIMDE_FLOAT32_C( -882.37), SIMDE_FLOAT32_C( -533.34), SIMDE_FLOAT32_C( 694.23), SIMDE_FLOAT32_C( 34.29), SIMDE_FLOAT32_C( -894.33), SIMDE_FLOAT32_C( 245.79), SIMDE_FLOAT32_C( 111.18) }, { SIMDE_FLOAT32_C( 884.11), SIMDE_FLOAT32_C( 893.06), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 716.37), SIMDE_FLOAT32_C( 286.17), SIMDE_FLOAT32_C( -882.37), SIMDE_FLOAT32_C( -533.34), SIMDE_FLOAT32_C( 0.00) }, { SIMDE_FLOAT32_C( 659.28), SIMDE_FLOAT32_C( -321.51), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -621.15), SIMDE_FLOAT32_C( 286.17), SIMDE_FLOAT32_C( -882.37), SIMDE_FLOAT32_C( -533.34), SIMDE_FLOAT32_C( 0.00) }, { SIMDE_FLOAT32_C( 884.11), SIMDE_FLOAT32_C( 893.06), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 716.37), SIMDE_FLOAT32_C( 34.29), SIMDE_FLOAT32_C( -894.33), SIMDE_FLOAT32_C( 245.79), SIMDE_FLOAT32_C( 0.00) }, { SIMDE_FLOAT32_C( 659.28), SIMDE_FLOAT32_C( -321.51), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -621.15), SIMDE_FLOAT32_C( 34.29), SIMDE_FLOAT32_C( -894.33), SIMDE_FLOAT32_C( 245.79), SIMDE_FLOAT32_C( 0.00) } }, { UINT8_C(192), { SIMDE_FLOAT32_C( 740.53), SIMDE_FLOAT32_C( -90.87), SIMDE_FLOAT32_C( 214.39), SIMDE_FLOAT32_C( 245.59), SIMDE_FLOAT32_C( -902.35), SIMDE_FLOAT32_C( -792.95), SIMDE_FLOAT32_C( 288.32), SIMDE_FLOAT32_C( 607.67) }, { SIMDE_FLOAT32_C( 822.92), SIMDE_FLOAT32_C( -563.04), SIMDE_FLOAT32_C( 839.85), SIMDE_FLOAT32_C( 935.25), SIMDE_FLOAT32_C( 935.95), SIMDE_FLOAT32_C( 982.66), SIMDE_FLOAT32_C( 819.36), SIMDE_FLOAT32_C( 829.02) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 839.85), SIMDE_FLOAT32_C( 935.25) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 839.85), SIMDE_FLOAT32_C( 935.25) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 819.36), SIMDE_FLOAT32_C( 829.02) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 819.36), SIMDE_FLOAT32_C( 829.02) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask8 k = test_vec[i].k; simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 b = simde_mm256_loadu_ps(test_vec[i].b); simde__m256 r; r = simde_mm256_maskz_shuffle_f32x4(k, a, b, 0); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r0), 1); r = simde_mm256_maskz_shuffle_f32x4(k, a, b, 1); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r1), 1); r = simde_mm256_maskz_shuffle_f32x4(k, a, b, 2); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r2), 1); r = simde_mm256_maskz_shuffle_f32x4(k, a, b, 3); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r3), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 4 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256 a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m256 b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m256 r; simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_maskz_shuffle_f32x4(k, a, b, 0); simde_test_x86_write_f32x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_maskz_shuffle_f32x4(k, a, b, 1); simde_test_x86_write_f32x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_maskz_shuffle_f32x4(k, a, b, 2); simde_test_x86_write_f32x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_maskz_shuffle_f32x4(k, a, b, 3); simde_test_x86_write_f32x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_shuffle_f64x2(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde_float64 a[4]; const simde_float64 b[4]; const simde_float64 r0[4]; const simde_float64 r1[4]; const simde_float64 r2[4]; const simde_float64 r3[4]; } test_vec[4] = { { { SIMDE_FLOAT64_C( 830.90), SIMDE_FLOAT64_C( 926.38), SIMDE_FLOAT64_C( 812.31), SIMDE_FLOAT64_C( -579.52) }, { SIMDE_FLOAT64_C( 629.36), SIMDE_FLOAT64_C( -269.59), SIMDE_FLOAT64_C( 113.88), SIMDE_FLOAT64_C( 874.17) }, { SIMDE_FLOAT64_C( 830.90), SIMDE_FLOAT64_C( 926.38), SIMDE_FLOAT64_C( 629.36), SIMDE_FLOAT64_C( -269.59) }, { SIMDE_FLOAT64_C( 812.31), SIMDE_FLOAT64_C( -579.52), SIMDE_FLOAT64_C( 629.36), SIMDE_FLOAT64_C( -269.59) }, { SIMDE_FLOAT64_C( 830.90), SIMDE_FLOAT64_C( 926.38), SIMDE_FLOAT64_C( 113.88), SIMDE_FLOAT64_C( 874.17) }, { SIMDE_FLOAT64_C( 812.31), SIMDE_FLOAT64_C( -579.52), SIMDE_FLOAT64_C( 113.88), SIMDE_FLOAT64_C( 874.17) } }, { { SIMDE_FLOAT64_C( 865.84), SIMDE_FLOAT64_C( -710.16), SIMDE_FLOAT64_C( 619.64), SIMDE_FLOAT64_C( -47.96) }, { SIMDE_FLOAT64_C( 272.44), SIMDE_FLOAT64_C( 425.20), SIMDE_FLOAT64_C( -475.06), SIMDE_FLOAT64_C( 355.52) }, { SIMDE_FLOAT64_C( 865.84), SIMDE_FLOAT64_C( -710.16), SIMDE_FLOAT64_C( 272.44), SIMDE_FLOAT64_C( 425.20) }, { SIMDE_FLOAT64_C( 619.64), SIMDE_FLOAT64_C( -47.96), SIMDE_FLOAT64_C( 272.44), SIMDE_FLOAT64_C( 425.20) }, { SIMDE_FLOAT64_C( 865.84), SIMDE_FLOAT64_C( -710.16), SIMDE_FLOAT64_C( -475.06), SIMDE_FLOAT64_C( 355.52) }, { SIMDE_FLOAT64_C( 619.64), SIMDE_FLOAT64_C( -47.96), SIMDE_FLOAT64_C( -475.06), SIMDE_FLOAT64_C( 355.52) } }, { { SIMDE_FLOAT64_C( 626.39), SIMDE_FLOAT64_C( 445.81), SIMDE_FLOAT64_C( -682.23), SIMDE_FLOAT64_C( 942.94) }, { SIMDE_FLOAT64_C( -162.65), SIMDE_FLOAT64_C( -82.05), SIMDE_FLOAT64_C( 14.57), SIMDE_FLOAT64_C( 671.89) }, { SIMDE_FLOAT64_C( 626.39), SIMDE_FLOAT64_C( 445.81), SIMDE_FLOAT64_C( -162.65), SIMDE_FLOAT64_C( -82.05) }, { SIMDE_FLOAT64_C( -682.23), SIMDE_FLOAT64_C( 942.94), SIMDE_FLOAT64_C( -162.65), SIMDE_FLOAT64_C( -82.05) }, { SIMDE_FLOAT64_C( 626.39), SIMDE_FLOAT64_C( 445.81), SIMDE_FLOAT64_C( 14.57), SIMDE_FLOAT64_C( 671.89) }, { SIMDE_FLOAT64_C( -682.23), SIMDE_FLOAT64_C( 942.94), SIMDE_FLOAT64_C( 14.57), SIMDE_FLOAT64_C( 671.89) } }, { { SIMDE_FLOAT64_C( 458.27), SIMDE_FLOAT64_C( 217.29), SIMDE_FLOAT64_C( -704.67), SIMDE_FLOAT64_C( -316.48) }, { SIMDE_FLOAT64_C( 549.40), SIMDE_FLOAT64_C( 639.72), SIMDE_FLOAT64_C( 943.11), SIMDE_FLOAT64_C( 380.30) }, { SIMDE_FLOAT64_C( 458.27), SIMDE_FLOAT64_C( 217.29), SIMDE_FLOAT64_C( 549.40), SIMDE_FLOAT64_C( 639.72) }, { SIMDE_FLOAT64_C( -704.67), SIMDE_FLOAT64_C( -316.48), SIMDE_FLOAT64_C( 549.40), SIMDE_FLOAT64_C( 639.72) }, { SIMDE_FLOAT64_C( 458.27), SIMDE_FLOAT64_C( 217.29), SIMDE_FLOAT64_C( 943.11), SIMDE_FLOAT64_C( 380.30) }, { SIMDE_FLOAT64_C( -704.67), SIMDE_FLOAT64_C( -316.48), SIMDE_FLOAT64_C( 943.11), SIMDE_FLOAT64_C( 380.30) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d b = simde_mm256_loadu_pd(test_vec[i].b); simde__m256d r; r = simde_mm256_shuffle_f64x2(a, b, 0); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r0), 1); r = simde_mm256_shuffle_f64x2(a, b, 1); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r1), 1); r = simde_mm256_shuffle_f64x2(a, b, 2); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r2), 1); r = simde_mm256_shuffle_f64x2(a, b, 3); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r3), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 4 ; i++) { simde__m256d a = simde_test_x86_random_f64x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256d b = simde_test_x86_random_f64x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256d r; simde_test_x86_write_f64x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f64x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_shuffle_f64x2(a, b, 0); simde_test_x86_write_f64x4(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_shuffle_f64x2(a, b, 1); simde_test_x86_write_f64x4(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_shuffle_f64x2(a, b, 2); simde_test_x86_write_f64x4(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_shuffle_f64x2(a, b, 3); simde_test_x86_write_f64x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask_shuffle_f64x2(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde_float64 src[4]; const simde__mmask8 k; const simde_float64 a[4]; const simde_float64 b[4]; const simde_float64 r0[4]; const simde_float64 r1[4]; const simde_float64 r2[4]; const simde_float64 r3[4]; } test_vec[4] = { { { SIMDE_FLOAT64_C( 211.42), SIMDE_FLOAT64_C( -547.45), SIMDE_FLOAT64_C( -286.14), SIMDE_FLOAT64_C( -940.55) }, UINT8_C( 76), { SIMDE_FLOAT64_C( -611.26), SIMDE_FLOAT64_C( -905.87), SIMDE_FLOAT64_C( 815.09), SIMDE_FLOAT64_C( -241.57) }, { SIMDE_FLOAT64_C( -880.14), SIMDE_FLOAT64_C( -483.26), SIMDE_FLOAT64_C( -269.54), SIMDE_FLOAT64_C( 979.22) }, { SIMDE_FLOAT64_C( 211.42), SIMDE_FLOAT64_C( -547.45), SIMDE_FLOAT64_C( -880.14), SIMDE_FLOAT64_C( -483.26) }, { SIMDE_FLOAT64_C( 211.42), SIMDE_FLOAT64_C( -547.45), SIMDE_FLOAT64_C( -880.14), SIMDE_FLOAT64_C( -483.26) }, { SIMDE_FLOAT64_C( 211.42), SIMDE_FLOAT64_C( -547.45), SIMDE_FLOAT64_C( -269.54), SIMDE_FLOAT64_C( 979.22) }, { SIMDE_FLOAT64_C( 211.42), SIMDE_FLOAT64_C( -547.45), SIMDE_FLOAT64_C( -269.54), SIMDE_FLOAT64_C( 979.22) } }, { { SIMDE_FLOAT64_C( -556.92), SIMDE_FLOAT64_C( -759.71), SIMDE_FLOAT64_C( -65.30), SIMDE_FLOAT64_C( 731.13) }, UINT8_C( 50), { SIMDE_FLOAT64_C( -613.42), SIMDE_FLOAT64_C( 156.49), SIMDE_FLOAT64_C( -801.68), SIMDE_FLOAT64_C( 886.07) }, { SIMDE_FLOAT64_C( 749.49), SIMDE_FLOAT64_C( -810.15), SIMDE_FLOAT64_C( -492.14), SIMDE_FLOAT64_C( 99.86) }, { SIMDE_FLOAT64_C( -556.92), SIMDE_FLOAT64_C( 156.49), SIMDE_FLOAT64_C( -65.30), SIMDE_FLOAT64_C( 731.13) }, { SIMDE_FLOAT64_C( -556.92), SIMDE_FLOAT64_C( 886.07), SIMDE_FLOAT64_C( -65.30), SIMDE_FLOAT64_C( 731.13) }, { SIMDE_FLOAT64_C( -556.92), SIMDE_FLOAT64_C( 156.49), SIMDE_FLOAT64_C( -65.30), SIMDE_FLOAT64_C( 731.13) }, { SIMDE_FLOAT64_C( -556.92), SIMDE_FLOAT64_C( 886.07), SIMDE_FLOAT64_C( -65.30), SIMDE_FLOAT64_C( 731.13) } }, { { SIMDE_FLOAT64_C( 33.49), SIMDE_FLOAT64_C( 874.95), SIMDE_FLOAT64_C( -437.24), SIMDE_FLOAT64_C( -510.52) }, UINT8_C( 10), { SIMDE_FLOAT64_C( 774.18), SIMDE_FLOAT64_C( -57.97), SIMDE_FLOAT64_C( 947.46), SIMDE_FLOAT64_C( 833.63) }, { SIMDE_FLOAT64_C( -822.49), SIMDE_FLOAT64_C( -663.80), SIMDE_FLOAT64_C( 927.75), SIMDE_FLOAT64_C( 992.60) }, { SIMDE_FLOAT64_C( 33.49), SIMDE_FLOAT64_C( -57.97), SIMDE_FLOAT64_C( -437.24), SIMDE_FLOAT64_C( -663.80) }, { SIMDE_FLOAT64_C( 33.49), SIMDE_FLOAT64_C( 833.63), SIMDE_FLOAT64_C( -437.24), SIMDE_FLOAT64_C( -663.80) }, { SIMDE_FLOAT64_C( 33.49), SIMDE_FLOAT64_C( -57.97), SIMDE_FLOAT64_C( -437.24), SIMDE_FLOAT64_C( 992.60) }, { SIMDE_FLOAT64_C( 33.49), SIMDE_FLOAT64_C( 833.63), SIMDE_FLOAT64_C( -437.24), SIMDE_FLOAT64_C( 992.60) } }, { { SIMDE_FLOAT64_C( 94.63), SIMDE_FLOAT64_C( -952.39), SIMDE_FLOAT64_C( -490.66), SIMDE_FLOAT64_C( 825.09) }, UINT8_C(233), { SIMDE_FLOAT64_C( -47.58), SIMDE_FLOAT64_C( -934.62), SIMDE_FLOAT64_C( -38.48), SIMDE_FLOAT64_C( -316.46) }, { SIMDE_FLOAT64_C( -835.78), SIMDE_FLOAT64_C( 348.10), SIMDE_FLOAT64_C( 840.03), SIMDE_FLOAT64_C( -637.46) }, { SIMDE_FLOAT64_C( -47.58), SIMDE_FLOAT64_C( -952.39), SIMDE_FLOAT64_C( -490.66), SIMDE_FLOAT64_C( 348.10) }, { SIMDE_FLOAT64_C( -38.48), SIMDE_FLOAT64_C( -952.39), SIMDE_FLOAT64_C( -490.66), SIMDE_FLOAT64_C( 348.10) }, { SIMDE_FLOAT64_C( -47.58), SIMDE_FLOAT64_C( -952.39), SIMDE_FLOAT64_C( -490.66), SIMDE_FLOAT64_C( -637.46) }, { SIMDE_FLOAT64_C( -38.48), SIMDE_FLOAT64_C( -952.39), SIMDE_FLOAT64_C( -490.66), SIMDE_FLOAT64_C( -637.46) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d src = simde_mm256_loadu_pd(test_vec[i].src); simde__mmask8 k = test_vec[i].k; simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d b = simde_mm256_loadu_pd(test_vec[i].b); simde__m256d r; r = simde_mm256_mask_shuffle_f64x2(src, k, a, b, 0); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r0), 1); r = simde_mm256_mask_shuffle_f64x2(src, k, a, b, 1); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r1), 1); r = simde_mm256_mask_shuffle_f64x2(src, k, a, b, 2); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r2), 1); r = simde_mm256_mask_shuffle_f64x2(src, k, a, b, 3); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r3), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 4 ; i++) { simde__m256d src = simde_test_x86_random_f64x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256d a = simde_test_x86_random_f64x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256d b = simde_test_x86_random_f64x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256d r; simde_test_x86_write_f64x4(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k,SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_mask_shuffle_f64x2(src, k, a, b, 0); simde_test_x86_write_f64x4(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_mask_shuffle_f64x2(src, k, a, b, 1); simde_test_x86_write_f64x4(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_mask_shuffle_f64x2(src, k, a, b, 2); simde_test_x86_write_f64x4(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_mask_shuffle_f64x2(src, k, a, b, 3); simde_test_x86_write_f64x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_maskz_shuffle_f64x2(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde__mmask8 k; const simde_float64 a[4]; const simde_float64 b[4]; const simde_float64 r0[4]; const simde_float64 r1[4]; const simde_float64 r2[4]; const simde_float64 r3[4]; } test_vec[4] = { { UINT8_C(146), { SIMDE_FLOAT64_C( 519.29), SIMDE_FLOAT64_C( 30.37), SIMDE_FLOAT64_C( 576.58), SIMDE_FLOAT64_C( 498.93) }, { SIMDE_FLOAT64_C( -198.77), SIMDE_FLOAT64_C( 114.03), SIMDE_FLOAT64_C( -687.70), SIMDE_FLOAT64_C( 903.89) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 30.37), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 498.93), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 30.37), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 498.93), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C( 37), { SIMDE_FLOAT64_C( 68.15), SIMDE_FLOAT64_C( -745.26), SIMDE_FLOAT64_C( -93.84), SIMDE_FLOAT64_C( -979.52) }, { SIMDE_FLOAT64_C( -682.93), SIMDE_FLOAT64_C( 146.97), SIMDE_FLOAT64_C( -788.41), SIMDE_FLOAT64_C( 393.72) }, { SIMDE_FLOAT64_C( 68.15), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -682.93), SIMDE_FLOAT64_C( 0.00) }, { SIMDE_FLOAT64_C( -93.84), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -682.93), SIMDE_FLOAT64_C( 0.00) }, { SIMDE_FLOAT64_C( 68.15), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -788.41), SIMDE_FLOAT64_C( 0.00) }, { SIMDE_FLOAT64_C( -93.84), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -788.41), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C(187), { SIMDE_FLOAT64_C( -30.90), SIMDE_FLOAT64_C( -576.88), SIMDE_FLOAT64_C( 637.60), SIMDE_FLOAT64_C( -671.29) }, { SIMDE_FLOAT64_C( 124.20), SIMDE_FLOAT64_C( 203.69), SIMDE_FLOAT64_C( -87.92), SIMDE_FLOAT64_C( 509.77) }, { SIMDE_FLOAT64_C( -30.90), SIMDE_FLOAT64_C( -576.88), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 203.69) }, { SIMDE_FLOAT64_C( 637.60), SIMDE_FLOAT64_C( -671.29), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 203.69) }, { SIMDE_FLOAT64_C( -30.90), SIMDE_FLOAT64_C( -576.88), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 509.77) }, { SIMDE_FLOAT64_C( 637.60), SIMDE_FLOAT64_C( -671.29), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 509.77) } }, { UINT8_C(177), { SIMDE_FLOAT64_C( -933.79), SIMDE_FLOAT64_C( -943.60), SIMDE_FLOAT64_C( 943.63), SIMDE_FLOAT64_C( 102.23) }, { SIMDE_FLOAT64_C( 575.68), SIMDE_FLOAT64_C( -26.00), SIMDE_FLOAT64_C( -321.19), SIMDE_FLOAT64_C( 74.62) }, { SIMDE_FLOAT64_C( -933.79), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) }, { SIMDE_FLOAT64_C( 943.63), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) }, { SIMDE_FLOAT64_C( -933.79), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) }, { SIMDE_FLOAT64_C( 943.63), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask8 k = test_vec[i].k; simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d b = simde_mm256_loadu_pd(test_vec[i].b); simde__m256d r; r = simde_mm256_maskz_shuffle_f64x2(k, a, b, 0); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r0), 1); r = simde_mm256_maskz_shuffle_f64x2(k, a, b, 1); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r1), 1); r = simde_mm256_maskz_shuffle_f64x2(k, a, b, 2); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r2), 1); r = simde_mm256_maskz_shuffle_f64x2(k, a, b, 3); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r3), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 4 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256d a = simde_test_x86_random_f64x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256d b = simde_test_x86_random_f64x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256d r; simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f64x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_maskz_shuffle_f64x2(k, a, b, 0); simde_test_x86_write_f64x4(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_maskz_shuffle_f64x2(k, a, b, 1); simde_test_x86_write_f64x4(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_maskz_shuffle_f64x2(k, a, b, 2); simde_test_x86_write_f64x4(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_maskz_shuffle_f64x2(k, a, b, 3); simde_test_x86_write_f64x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_shuffle_i32x4(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const int32_t a[8]; const int32_t b[8]; const int32_t r0[8]; const int32_t r1[8]; const int32_t r2[8]; const int32_t r3[8]; } test_vec[4] = { { { INT32_C( 472292888), -INT32_C( 1050141756), INT32_C( 896688052), -INT32_C( 1361646918), -INT32_C( 1915024474), -INT32_C( 10389770), INT32_C( 590002938), INT32_C( 1234553392) }, { INT32_C( 744864872), -INT32_C( 1947349545), -INT32_C( 423600340), -INT32_C( 275474871), -INT32_C( 1736675422), -INT32_C( 526918170), -INT32_C( 939277672), -INT32_C( 183396212) }, { INT32_C( 472292888), -INT32_C( 1050141756), INT32_C( 896688052), -INT32_C( 1361646918), INT32_C( 744864872), -INT32_C( 1947349545), -INT32_C( 423600340), -INT32_C( 275474871) }, { -INT32_C( 1915024474), -INT32_C( 10389770), INT32_C( 590002938), INT32_C( 1234553392), INT32_C( 744864872), -INT32_C( 1947349545), -INT32_C( 423600340), -INT32_C( 275474871) }, { INT32_C( 472292888), -INT32_C( 1050141756), INT32_C( 896688052), -INT32_C( 1361646918), -INT32_C( 1736675422), -INT32_C( 526918170), -INT32_C( 939277672), -INT32_C( 183396212) }, { -INT32_C( 1915024474), -INT32_C( 10389770), INT32_C( 590002938), INT32_C( 1234553392), -INT32_C( 1736675422), -INT32_C( 526918170), -INT32_C( 939277672), -INT32_C( 183396212) } }, { { INT32_C( 740390484), INT32_C( 1891045187), -INT32_C( 1202292626), -INT32_C( 1331172594), INT32_C( 1078469722), -INT32_C( 1725898751), INT32_C( 778183586), INT32_C( 270758844) }, { INT32_C( 758924778), -INT32_C( 1029835692), INT32_C( 2054878316), INT32_C( 959128287), INT32_C( 1199207238), -INT32_C( 169764269), INT32_C( 2049196990), -INT32_C( 1601550410) }, { INT32_C( 740390484), INT32_C( 1891045187), -INT32_C( 1202292626), -INT32_C( 1331172594), INT32_C( 758924778), -INT32_C( 1029835692), INT32_C( 2054878316), INT32_C( 959128287) }, { INT32_C( 1078469722), -INT32_C( 1725898751), INT32_C( 778183586), INT32_C( 270758844), INT32_C( 758924778), -INT32_C( 1029835692), INT32_C( 2054878316), INT32_C( 959128287) }, { INT32_C( 740390484), INT32_C( 1891045187), -INT32_C( 1202292626), -INT32_C( 1331172594), INT32_C( 1199207238), -INT32_C( 169764269), INT32_C( 2049196990), -INT32_C( 1601550410) }, { INT32_C( 1078469722), -INT32_C( 1725898751), INT32_C( 778183586), INT32_C( 270758844), INT32_C( 1199207238), -INT32_C( 169764269), INT32_C( 2049196990), -INT32_C( 1601550410) } }, { { -INT32_C( 523319412), INT32_C( 665021371), INT32_C( 1050746207), -INT32_C( 2055680961), -INT32_C( 1815219648), INT32_C( 1250537100), -INT32_C( 1480282639), -INT32_C( 2125967372) }, { -INT32_C( 782166506), -INT32_C( 503839615), INT32_C( 1629460770), -INT32_C( 1494771866), INT32_C( 372880521), INT32_C( 1398850146), INT32_C( 1694115183), -INT32_C( 1964686732) }, { -INT32_C( 523319412), INT32_C( 665021371), INT32_C( 1050746207), -INT32_C( 2055680961), -INT32_C( 782166506), -INT32_C( 503839615), INT32_C( 1629460770), -INT32_C( 1494771866) }, { -INT32_C( 1815219648), INT32_C( 1250537100), -INT32_C( 1480282639), -INT32_C( 2125967372), -INT32_C( 782166506), -INT32_C( 503839615), INT32_C( 1629460770), -INT32_C( 1494771866) }, { -INT32_C( 523319412), INT32_C( 665021371), INT32_C( 1050746207), -INT32_C( 2055680961), INT32_C( 372880521), INT32_C( 1398850146), INT32_C( 1694115183), -INT32_C( 1964686732) }, { -INT32_C( 1815219648), INT32_C( 1250537100), -INT32_C( 1480282639), -INT32_C( 2125967372), INT32_C( 372880521), INT32_C( 1398850146), INT32_C( 1694115183), -INT32_C( 1964686732) } }, { { -INT32_C( 631552424), INT32_C( 1840993099), INT32_C( 1389288172), -INT32_C( 67586702), -INT32_C( 888065431), INT32_C( 1679717108), INT32_C( 197663127), -INT32_C( 1265259173) }, { INT32_C( 1049555187), INT32_C( 799754563), -INT32_C( 1786611165), -INT32_C( 1718519249), -INT32_C( 1587240276), -INT32_C( 1425702124), -INT32_C( 139014756), INT32_C( 1839942522) }, { -INT32_C( 631552424), INT32_C( 1840993099), INT32_C( 1389288172), -INT32_C( 67586702), INT32_C( 1049555187), INT32_C( 799754563), -INT32_C( 1786611165), -INT32_C( 1718519249) }, { -INT32_C( 888065431), INT32_C( 1679717108), INT32_C( 197663127), -INT32_C( 1265259173), INT32_C( 1049555187), INT32_C( 799754563), -INT32_C( 1786611165), -INT32_C( 1718519249) }, { -INT32_C( 631552424), INT32_C( 1840993099), INT32_C( 1389288172), -INT32_C( 67586702), -INT32_C( 1587240276), -INT32_C( 1425702124), -INT32_C( 139014756), INT32_C( 1839942522) }, { -INT32_C( 888065431), INT32_C( 1679717108), INT32_C( 197663127), -INT32_C( 1265259173), -INT32_C( 1587240276), -INT32_C( 1425702124), -INT32_C( 139014756), INT32_C( 1839942522) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i a = simde_mm256_loadu_epi32(test_vec[i].a); simde__m256i b = simde_mm256_loadu_epi32(test_vec[i].b); simde__m256i r; r = simde_mm256_shuffle_i32x4(a, b, 0); simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[i].r0)); r = simde_mm256_shuffle_i32x4(a, b, 1); simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[i].r1)); r = simde_mm256_shuffle_i32x4(a, b, 2); simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[i].r2)); r = simde_mm256_shuffle_i32x4(a, b, 3); simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[i].r3)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 4 ; i++) { simde__m256i a = simde_test_x86_random_i32x8(); simde__m256i b = simde_test_x86_random_i32x8(); simde__m256i r; simde_test_x86_write_i32x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_shuffle_i32x4(a, b, 0); simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_shuffle_i32x4(a, b, 1); simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_shuffle_i32x4(a, b, 2); simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_shuffle_i32x4(a, b, 3); simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask_shuffle_i32x4(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const int32_t src[8]; const simde__mmask8 k; const int32_t a[8]; const int32_t b[8]; const int32_t r0[8]; const int32_t r1[8]; const int32_t r2[8]; const int32_t r3[8]; } test_vec[4] = { { { -INT32_C( 426491337), -INT32_C( 1735303558), INT32_C( 1998478902), -INT32_C( 236589506), -INT32_C( 1344094934), -INT32_C( 1393533496), -INT32_C( 1532062390), -INT32_C( 952087920) }, UINT8_C(140), { INT32_C( 839298516), -INT32_C( 1654088129), -INT32_C( 1411587908), -INT32_C( 2083140155), INT32_C( 223052976), INT32_C( 173602677), -INT32_C( 275055451), INT32_C( 276587068) }, { INT32_C( 1313047055), -INT32_C( 571692255), INT32_C( 1367918731), INT32_C( 1171545749), INT32_C( 1465065442), -INT32_C( 1151161578), -INT32_C( 475267929), INT32_C( 1861429086) }, { -INT32_C( 426491337), -INT32_C( 1735303558), -INT32_C( 1411587908), -INT32_C( 2083140155), -INT32_C( 1344094934), -INT32_C( 1393533496), -INT32_C( 1532062390), INT32_C( 1171545749) }, { -INT32_C( 426491337), -INT32_C( 1735303558), -INT32_C( 275055451), INT32_C( 276587068), -INT32_C( 1344094934), -INT32_C( 1393533496), -INT32_C( 1532062390), INT32_C( 1171545749) }, { -INT32_C( 426491337), -INT32_C( 1735303558), -INT32_C( 1411587908), -INT32_C( 2083140155), -INT32_C( 1344094934), -INT32_C( 1393533496), -INT32_C( 1532062390), INT32_C( 1861429086) }, { -INT32_C( 426491337), -INT32_C( 1735303558), -INT32_C( 275055451), INT32_C( 276587068), -INT32_C( 1344094934), -INT32_C( 1393533496), -INT32_C( 1532062390), INT32_C( 1861429086) } }, { { -INT32_C( 893634903), INT32_C( 1839769826), INT32_C( 113127536), INT32_C( 1900778126), -INT32_C( 943153487), -INT32_C( 259839415), -INT32_C( 2049757657), -INT32_C( 17578155) }, UINT8_C(253), { INT32_C( 1491061168), -INT32_C( 1580642959), -INT32_C( 1640968437), -INT32_C( 1185963750), -INT32_C( 1811802263), -INT32_C( 927206502), -INT32_C( 1927462714), -INT32_C( 460645580) }, { INT32_C( 1430022884), -INT32_C( 1007221321), -INT32_C( 278845484), INT32_C( 833138888), INT32_C( 1640344263), INT32_C( 1680441501), -INT32_C( 168737087), INT32_C( 1188658274) }, { INT32_C( 1491061168), INT32_C( 1839769826), -INT32_C( 1640968437), -INT32_C( 1185963750), INT32_C( 1430022884), -INT32_C( 1007221321), -INT32_C( 278845484), INT32_C( 833138888) }, { -INT32_C( 1811802263), INT32_C( 1839769826), -INT32_C( 1927462714), -INT32_C( 460645580), INT32_C( 1430022884), -INT32_C( 1007221321), -INT32_C( 278845484), INT32_C( 833138888) }, { INT32_C( 1491061168), INT32_C( 1839769826), -INT32_C( 1640968437), -INT32_C( 1185963750), INT32_C( 1640344263), INT32_C( 1680441501), -INT32_C( 168737087), INT32_C( 1188658274) }, { -INT32_C( 1811802263), INT32_C( 1839769826), -INT32_C( 1927462714), -INT32_C( 460645580), INT32_C( 1640344263), INT32_C( 1680441501), -INT32_C( 168737087), INT32_C( 1188658274) } }, { { -INT32_C( 1633937945), -INT32_C( 278818021), -INT32_C( 2099330374), INT32_C( 984843891), -INT32_C( 828606159), -INT32_C( 1171077639), INT32_C( 1856971788), -INT32_C( 2018211680) }, UINT8_C(158), { -INT32_C( 474405296), INT32_C( 1251846279), INT32_C( 230498183), INT32_C( 1279195091), INT32_C( 1497697683), INT32_C( 1667563583), INT32_C( 939840431), -INT32_C( 657028217) }, { INT32_C( 968593329), -INT32_C( 1098688201), INT32_C( 1271677048), -INT32_C( 879293897), INT32_C( 1462033687), -INT32_C( 1933932067), -INT32_C( 473645476), -INT32_C( 71591350) }, { -INT32_C( 1633937945), INT32_C( 1251846279), INT32_C( 230498183), INT32_C( 1279195091), INT32_C( 968593329), -INT32_C( 1171077639), INT32_C( 1856971788), -INT32_C( 879293897) }, { -INT32_C( 1633937945), INT32_C( 1667563583), INT32_C( 939840431), -INT32_C( 657028217), INT32_C( 968593329), -INT32_C( 1171077639), INT32_C( 1856971788), -INT32_C( 879293897) }, { -INT32_C( 1633937945), INT32_C( 1251846279), INT32_C( 230498183), INT32_C( 1279195091), INT32_C( 1462033687), -INT32_C( 1171077639), INT32_C( 1856971788), -INT32_C( 71591350) }, { -INT32_C( 1633937945), INT32_C( 1667563583), INT32_C( 939840431), -INT32_C( 657028217), INT32_C( 1462033687), -INT32_C( 1171077639), INT32_C( 1856971788), -INT32_C( 71591350) } }, { { INT32_C( 1630828329), INT32_C( 1210038224), INT32_C( 798288887), INT32_C( 234499062), -INT32_C( 446423544), INT32_C( 57810855), INT32_C( 669398749), -INT32_C( 98327855) }, UINT8_C( 25), { INT32_C( 266951511), INT32_C( 1711681915), -INT32_C( 245615163), INT32_C( 1308191279), -INT32_C( 302719026), -INT32_C( 2016676015), -INT32_C( 2141654306), INT32_C( 1838764821) }, { INT32_C( 696025774), INT32_C( 2039513779), -INT32_C( 412422984), INT32_C( 624255830), -INT32_C( 1827526334), -INT32_C( 14951135), -INT32_C( 444632112), INT32_C( 1968314566) }, { INT32_C( 266951511), INT32_C( 1210038224), INT32_C( 798288887), INT32_C( 1308191279), INT32_C( 696025774), INT32_C( 57810855), INT32_C( 669398749), -INT32_C( 98327855) }, { -INT32_C( 302719026), INT32_C( 1210038224), INT32_C( 798288887), INT32_C( 1838764821), INT32_C( 696025774), INT32_C( 57810855), INT32_C( 669398749), -INT32_C( 98327855) }, { INT32_C( 266951511), INT32_C( 1210038224), INT32_C( 798288887), INT32_C( 1308191279), -INT32_C( 1827526334), INT32_C( 57810855), INT32_C( 669398749), -INT32_C( 98327855) }, { -INT32_C( 302719026), INT32_C( 1210038224), INT32_C( 798288887), INT32_C( 1838764821), -INT32_C( 1827526334), INT32_C( 57810855), INT32_C( 669398749), -INT32_C( 98327855) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i src = simde_mm256_loadu_epi32(test_vec[i].src); simde__mmask8 k = test_vec[i].k; simde__m256i a = simde_mm256_loadu_epi32(test_vec[i].a); simde__m256i b = simde_mm256_loadu_epi32(test_vec[i].b); simde__m256i r; r = simde_mm256_mask_shuffle_i32x4(src, k, a, b, 0); simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[i].r0)); r = simde_mm256_mask_shuffle_i32x4(src, k, a, b, 1); simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[i].r1)); r = simde_mm256_mask_shuffle_i32x4(src, k, a, b, 2); simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[i].r2)); r = simde_mm256_mask_shuffle_i32x4(src, k, a, b, 3); simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[i].r3)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 4 ; i++) { simde__m256i src = simde_test_x86_random_i32x8(); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256i a = simde_test_x86_random_i32x8(); simde__m256i b = simde_test_x86_random_i32x8(); simde__m256i r; simde_test_x86_write_i32x8(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k,SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_mask_shuffle_i32x4(src, k, a, b, 0); simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_mask_shuffle_i32x4(src, k, a, b, 1); simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_mask_shuffle_i32x4(src, k, a, b, 2); simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_mask_shuffle_i32x4(src, k, a, b, 3); simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_maskz_shuffle_i32x4(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde__mmask8 k; const int32_t a[8]; const int32_t b[8]; const int32_t r0[8]; const int32_t r1[8]; const int32_t r2[8]; const int32_t r3[8]; } test_vec[4] = { { UINT8_C(133), { -INT32_C( 509342325), -INT32_C( 1485532748), INT32_C( 1451290319), INT32_C( 264777338), -INT32_C( 1029505922), -INT32_C( 893145572), -INT32_C( 2043526745), INT32_C( 2013988077) }, { -INT32_C( 983912431), INT32_C( 292343361), INT32_C( 912780476), -INT32_C( 1723453669), INT32_C( 1213982763), INT32_C( 1024597654), INT32_C( 1287865695), INT32_C( 1522913097) }, { -INT32_C( 509342325), INT32_C( 0), INT32_C( 1451290319), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 1723453669) }, { -INT32_C( 1029505922), INT32_C( 0), -INT32_C( 2043526745), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 1723453669) }, { -INT32_C( 509342325), INT32_C( 0), INT32_C( 1451290319), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1522913097) }, { -INT32_C( 1029505922), INT32_C( 0), -INT32_C( 2043526745), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1522913097) } }, { UINT8_C(127), { -INT32_C( 306176225), INT32_C( 2007617931), INT32_C( 1754455864), INT32_C( 227748645), -INT32_C( 1515988090), INT32_C( 855957998), INT32_C( 1937527204), INT32_C( 905106966) }, { -INT32_C( 2145209355), -INT32_C( 1107833980), -INT32_C( 802846038), INT32_C( 1021163701), -INT32_C( 2099150444), INT32_C( 112584034), INT32_C( 1283076406), -INT32_C( 58627065) }, { -INT32_C( 306176225), INT32_C( 2007617931), INT32_C( 1754455864), INT32_C( 227748645), -INT32_C( 2145209355), -INT32_C( 1107833980), -INT32_C( 802846038), INT32_C( 0) }, { -INT32_C( 1515988090), INT32_C( 855957998), INT32_C( 1937527204), INT32_C( 905106966), -INT32_C( 2145209355), -INT32_C( 1107833980), -INT32_C( 802846038), INT32_C( 0) }, { -INT32_C( 306176225), INT32_C( 2007617931), INT32_C( 1754455864), INT32_C( 227748645), -INT32_C( 2099150444), INT32_C( 112584034), INT32_C( 1283076406), INT32_C( 0) }, { -INT32_C( 1515988090), INT32_C( 855957998), INT32_C( 1937527204), INT32_C( 905106966), -INT32_C( 2099150444), INT32_C( 112584034), INT32_C( 1283076406), INT32_C( 0) } }, { UINT8_C( 31), { INT32_C( 1856273571), -INT32_C( 48668301), INT32_C( 1051978118), INT32_C( 1205006278), -INT32_C( 1247193904), INT32_C( 988590089), -INT32_C( 1774110678), INT32_C( 1572224442) }, { INT32_C( 768367289), INT32_C( 1093330363), -INT32_C( 1803559474), -INT32_C( 1663282484), -INT32_C( 1336769114), INT32_C( 1609186869), INT32_C( 821439606), INT32_C( 596552809) }, { INT32_C( 1856273571), -INT32_C( 48668301), INT32_C( 1051978118), INT32_C( 1205006278), INT32_C( 768367289), INT32_C( 0), INT32_C( 0), INT32_C( 0) }, { -INT32_C( 1247193904), INT32_C( 988590089), -INT32_C( 1774110678), INT32_C( 1572224442), INT32_C( 768367289), INT32_C( 0), INT32_C( 0), INT32_C( 0) }, { INT32_C( 1856273571), -INT32_C( 48668301), INT32_C( 1051978118), INT32_C( 1205006278), -INT32_C( 1336769114), INT32_C( 0), INT32_C( 0), INT32_C( 0) }, { -INT32_C( 1247193904), INT32_C( 988590089), -INT32_C( 1774110678), INT32_C( 1572224442), -INT32_C( 1336769114), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { UINT8_C( 6), { INT32_C( 1069633626), INT32_C( 1477247610), -INT32_C( 752574079), INT32_C( 58376573), INT32_C( 1362635283), INT32_C( 1086822420), INT32_C( 984283278), -INT32_C( 532623994) }, { -INT32_C( 1759575779), -INT32_C( 2064700413), INT32_C( 1264063693), -INT32_C( 397487403), INT32_C( 272205564), -INT32_C( 1403977698), INT32_C( 2129067000), -INT32_C( 446814520) }, { INT32_C( 0), INT32_C( 1477247610), -INT32_C( 752574079), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) }, { INT32_C( 0), INT32_C( 1086822420), INT32_C( 984283278), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) }, { INT32_C( 0), INT32_C( 1477247610), -INT32_C( 752574079), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) }, { INT32_C( 0), INT32_C( 1086822420), INT32_C( 984283278), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask8 k = test_vec[i].k; simde__m256i a = simde_mm256_loadu_epi32(test_vec[i].a); simde__m256i b = simde_mm256_loadu_epi32(test_vec[i].b); simde__m256i r; r = simde_mm256_maskz_shuffle_i32x4(k, a, b, 0); simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[i].r0)); r = simde_mm256_maskz_shuffle_i32x4(k, a, b, 1); simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[i].r1)); r = simde_mm256_maskz_shuffle_i32x4(k, a, b, 2); simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[i].r2)); r = simde_mm256_maskz_shuffle_i32x4(k, a, b, 3); simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[i].r3)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 4 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256i a = simde_test_x86_random_i32x8(); simde__m256i b = simde_test_x86_random_i32x8(); simde__m256i r; simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_maskz_shuffle_i32x4(k, a, b, 0); simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_maskz_shuffle_i32x4(k, a, b, 1); simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_maskz_shuffle_i32x4(k, a, b, 2); simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_maskz_shuffle_i32x4(k, a, b, 3); simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_shuffle_i64x2(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const int64_t a[4]; const int64_t b[4]; const int64_t r0[4]; const int64_t r1[4]; const int64_t r2[4]; const int64_t r3[4]; } test_vec[4] = { { { INT64_C( 3551315409324880873), INT64_C( 8008329497760772479), INT64_C( 2445691485855378668), INT64_C( 3145002663167902100) }, { INT64_C( 2394514919912297342), -INT64_C( 4258764945951323337), -INT64_C( 1453948719952461761), INT64_C( 7113696077798676811) }, { INT64_C( 3551315409324880873), INT64_C( 8008329497760772479), INT64_C( 2394514919912297342), -INT64_C( 4258764945951323337) }, { INT64_C( 2445691485855378668), INT64_C( 3145002663167902100), INT64_C( 2394514919912297342), -INT64_C( 4258764945951323337) }, { INT64_C( 3551315409324880873), INT64_C( 8008329497760772479), -INT64_C( 1453948719952461761), INT64_C( 7113696077798676811) }, { INT64_C( 2445691485855378668), INT64_C( 3145002663167902100), -INT64_C( 1453948719952461761), INT64_C( 7113696077798676811) } }, { { -INT64_C( 5511377366319073601), INT64_C( 4958197129583079986), INT64_C( 4454562452028680334), -INT64_C( 4275743416332445598) }, { -INT64_C( 1213253092931987975), -INT64_C( 1214342002771340581), INT64_C( 4999461655375321906), -INT64_C( 6374786247804834881) }, { -INT64_C( 5511377366319073601), INT64_C( 4958197129583079986), -INT64_C( 1213253092931987975), -INT64_C( 1214342002771340581) }, { INT64_C( 4454562452028680334), -INT64_C( 4275743416332445598), -INT64_C( 1213253092931987975), -INT64_C( 1214342002771340581) }, { -INT64_C( 5511377366319073601), INT64_C( 4958197129583079986), INT64_C( 4999461655375321906), -INT64_C( 6374786247804834881) }, { INT64_C( 4454562452028680334), -INT64_C( 4275743416332445598), INT64_C( 4999461655375321906), -INT64_C( 6374786247804834881) } }, { { INT64_C( 3600422662894824778), -INT64_C( 1231674317120343656), INT64_C( 2628541310555045457), -INT64_C( 8722223178297375073) }, { -INT64_C( 1250447814667988706), INT64_C( 4476526271792658038), INT64_C( 4978710313897767566), -INT64_C( 8163188358641417996) }, { INT64_C( 3600422662894824778), -INT64_C( 1231674317120343656), -INT64_C( 1250447814667988706), INT64_C( 4476526271792658038) }, { INT64_C( 2628541310555045457), -INT64_C( 8722223178297375073), -INT64_C( 1250447814667988706), INT64_C( 4476526271792658038) }, { INT64_C( 3600422662894824778), -INT64_C( 1231674317120343656), INT64_C( 4978710313897767566), -INT64_C( 8163188358641417996) }, { INT64_C( 2628541310555045457), -INT64_C( 8722223178297375073), INT64_C( 4978710313897767566), -INT64_C( 8163188358641417996) } }, { { INT64_C( 4529961281649787800), INT64_C( 2560735734515352926), -INT64_C( 2329436418187264575), INT64_C( 6858073793719385901) }, { INT64_C( 3751266315769097993), -INT64_C( 3555300477136568591), INT64_C( 1932293747428933382), -INT64_C( 5888205111561756639) }, { INT64_C( 4529961281649787800), INT64_C( 2560735734515352926), INT64_C( 3751266315769097993), -INT64_C( 3555300477136568591) }, { -INT64_C( 2329436418187264575), INT64_C( 6858073793719385901), INT64_C( 3751266315769097993), -INT64_C( 3555300477136568591) }, { INT64_C( 4529961281649787800), INT64_C( 2560735734515352926), INT64_C( 1932293747428933382), -INT64_C( 5888205111561756639) }, { -INT64_C( 2329436418187264575), INT64_C( 6858073793719385901), INT64_C( 1932293747428933382), -INT64_C( 5888205111561756639) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i a = simde_mm256_loadu_epi64(test_vec[i].a); simde__m256i b = simde_mm256_loadu_epi64(test_vec[i].b); simde__m256i r; r = simde_mm256_shuffle_i64x2(a, b, 0); simde_test_x86_assert_equal_i64x4(r, simde_mm256_loadu_epi64(test_vec[i].r0)); r = simde_mm256_shuffle_i64x2(a, b, 1); simde_test_x86_assert_equal_i64x4(r, simde_mm256_loadu_epi64(test_vec[i].r1)); r = simde_mm256_shuffle_i64x2(a, b, 2); simde_test_x86_assert_equal_i64x4(r, simde_mm256_loadu_epi64(test_vec[i].r2)); r = simde_mm256_shuffle_i64x2(a, b, 3); simde_test_x86_assert_equal_i64x4(r, simde_mm256_loadu_epi64(test_vec[i].r3)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 4 ; i++) { simde__m256i a = simde_test_x86_random_i64x4(); simde__m256i b = simde_test_x86_random_i64x4(); simde__m256i r; simde_test_x86_write_i64x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_shuffle_i64x2(a, b, 0); simde_test_x86_write_i64x4(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_shuffle_i64x2(a, b, 1); simde_test_x86_write_i64x4(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_shuffle_i64x2(a, b, 2); simde_test_x86_write_i64x4(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_shuffle_i64x2(a, b, 3); simde_test_x86_write_i64x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask_shuffle_i64x2(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const int64_t src[4]; const simde__mmask8 k; const int64_t a[4]; const int64_t b[4]; const int64_t r0[4]; const int64_t r1[4]; const int64_t r2[4]; const int64_t r3[4]; } test_vec[4] = { { { -INT64_C( 4750049666754593255), INT64_C( 7348475432003887810), INT64_C( 835478380301128017), INT64_C( 1476489067024726412) }, UINT8_C(200), { -INT64_C( 4733137129855072481), INT64_C( 1307340219673347531), -INT64_C( 9170376919911550983), INT64_C( 3123966575694810632) }, { INT64_C( 8138014032272040023), -INT64_C( 5034911404046277106), -INT64_C( 6134281326629856362), INT64_C( 4642251244478159157) }, { -INT64_C( 4750049666754593255), INT64_C( 7348475432003887810), INT64_C( 835478380301128017), -INT64_C( 5034911404046277106) }, { -INT64_C( 4750049666754593255), INT64_C( 7348475432003887810), INT64_C( 835478380301128017), -INT64_C( 5034911404046277106) }, { -INT64_C( 4750049666754593255), INT64_C( 7348475432003887810), INT64_C( 835478380301128017), INT64_C( 4642251244478159157) }, { -INT64_C( 4750049666754593255), INT64_C( 7348475432003887810), INT64_C( 835478380301128017), INT64_C( 4642251244478159157) } }, { { INT64_C( 4308646514968896950), INT64_C( 722100362974727817), -INT64_C( 3741850724285747002), -INT64_C( 7293444474572747105) }, UINT8_C(159), { -INT64_C( 3519834956080744901), INT64_C( 7530949282372022866), INT64_C( 5448453940943252406), INT64_C( 2950924202014614745) }, { INT64_C( 1968334486882402495), -INT64_C( 7403816753243319200), -INT64_C( 1977588166704087226), INT64_C( 5074197265111366998) }, { -INT64_C( 3519834956080744901), INT64_C( 7530949282372022866), INT64_C( 1968334486882402495), -INT64_C( 7403816753243319200) }, { INT64_C( 5448453940943252406), INT64_C( 2950924202014614745), INT64_C( 1968334486882402495), -INT64_C( 7403816753243319200) }, { -INT64_C( 3519834956080744901), INT64_C( 7530949282372022866), -INT64_C( 1977588166704087226), INT64_C( 5074197265111366998) }, { INT64_C( 5448453940943252406), INT64_C( 2950924202014614745), -INT64_C( 1977588166704087226), INT64_C( 5074197265111366998) } }, { { INT64_C( 9209887762445447660), INT64_C( 5416026072730282412), INT64_C( 5171291196819947478), INT64_C( 1488710330141637850) }, UINT8_C( 40), { INT64_C( 4770656267994122607), -INT64_C( 6085890401103264995), INT64_C( 3105482834620213792), -INT64_C( 7958591573362247313) }, { -INT64_C( 5256742437348912080), INT64_C( 479903728446165776), -INT64_C( 7513592798553881504), -INT64_C( 6447032102297989419) }, { INT64_C( 9209887762445447660), INT64_C( 5416026072730282412), INT64_C( 5171291196819947478), INT64_C( 479903728446165776) }, { INT64_C( 9209887762445447660), INT64_C( 5416026072730282412), INT64_C( 5171291196819947478), INT64_C( 479903728446165776) }, { INT64_C( 9209887762445447660), INT64_C( 5416026072730282412), INT64_C( 5171291196819947478), -INT64_C( 6447032102297989419) }, { INT64_C( 9209887762445447660), INT64_C( 5416026072730282412), INT64_C( 5171291196819947478), -INT64_C( 6447032102297989419) } }, { { -INT64_C( 238271948034891681), INT64_C( 8267378642703162576), INT64_C( 3433356286396334053), -INT64_C( 8294032728888270135) }, UINT8_C( 77), { INT64_C( 6026157385464841557), INT64_C( 1276444020211799855), INT64_C( 891039179036476632), -INT64_C( 2973284494263870271) }, { INT64_C( 6689051806349653578), -INT64_C( 1673963500626691680), -INT64_C( 8278503648347344729), INT64_C( 7335158844038600052) }, { INT64_C( 6026157385464841557), INT64_C( 8267378642703162576), INT64_C( 6689051806349653578), -INT64_C( 1673963500626691680) }, { INT64_C( 891039179036476632), INT64_C( 8267378642703162576), INT64_C( 6689051806349653578), -INT64_C( 1673963500626691680) }, { INT64_C( 6026157385464841557), INT64_C( 8267378642703162576), -INT64_C( 8278503648347344729), INT64_C( 7335158844038600052) }, { INT64_C( 891039179036476632), INT64_C( 8267378642703162576), -INT64_C( 8278503648347344729), INT64_C( 7335158844038600052) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i src = simde_mm256_loadu_epi64(test_vec[i].src); simde__mmask8 k = test_vec[i].k; simde__m256i a = simde_mm256_loadu_epi64(test_vec[i].a); simde__m256i b = simde_mm256_loadu_epi64(test_vec[i].b); simde__m256i r; r = simde_mm256_mask_shuffle_i64x2(src, k, a, b, 0); simde_test_x86_assert_equal_i64x4(r, simde_mm256_loadu_epi64(test_vec[i].r0)); r = simde_mm256_mask_shuffle_i64x2(src, k, a, b, 1); simde_test_x86_assert_equal_i64x4(r, simde_mm256_loadu_epi64(test_vec[i].r1)); r = simde_mm256_mask_shuffle_i64x2(src, k, a, b, 2); simde_test_x86_assert_equal_i64x4(r, simde_mm256_loadu_epi64(test_vec[i].r2)); r = simde_mm256_mask_shuffle_i64x2(src, k, a, b, 3); simde_test_x86_assert_equal_i64x4(r, simde_mm256_loadu_epi64(test_vec[i].r3)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 4 ; i++) { simde__m256i src = simde_test_x86_random_i64x4(); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256i a = simde_test_x86_random_i64x4(); simde__m256i b = simde_test_x86_random_i64x4(); simde__m256i r; simde_test_x86_write_i64x4(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k,SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_mask_shuffle_i64x2(src, k, a, b, 0); simde_test_x86_write_i64x4(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_mask_shuffle_i64x2(src, k, a, b, 1); simde_test_x86_write_i64x4(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_mask_shuffle_i64x2(src, k, a, b, 2); simde_test_x86_write_i64x4(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_mask_shuffle_i64x2(src, k, a, b, 3); simde_test_x86_write_i64x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_maskz_shuffle_i64x2(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde__mmask8 k; const int64_t a[4]; const int64_t b[4]; const int64_t r0[4]; const int64_t r1[4]; const int64_t r2[4]; const int64_t r3[4]; } test_vec[4] = { { UINT8_C(130), { INT64_C( 5897795453911463084), INT64_C( 1234160324116783402), -INT64_C( 3441831997656375695), INT64_C( 364741372620777884) }, { -INT64_C( 5453987194644374202), INT64_C( 4216626858216074028), INT64_C( 7398880696424309461), -INT64_C( 4998711411531865789) }, { INT64_C( 0), INT64_C( 1234160324116783402), INT64_C( 0), INT64_C( 0) }, { INT64_C( 0), INT64_C( 364741372620777884), INT64_C( 0), INT64_C( 0) }, { INT64_C( 0), INT64_C( 1234160324116783402), INT64_C( 0), INT64_C( 0) }, { INT64_C( 0), INT64_C( 364741372620777884), INT64_C( 0), INT64_C( 0) } }, { UINT8_C( 95), { INT64_C( 3605868208288544850), -INT64_C( 6755059690134340363), INT64_C( 7958093419730771730), -INT64_C( 60343750250853177) }, { -INT64_C( 6517016866100276651), -INT64_C( 3456983586772514461), INT64_C( 1057621640285614962), -INT64_C( 9068842080715435911) }, { INT64_C( 3605868208288544850), -INT64_C( 6755059690134340363), -INT64_C( 6517016866100276651), -INT64_C( 3456983586772514461) }, { INT64_C( 7958093419730771730), -INT64_C( 60343750250853177), -INT64_C( 6517016866100276651), -INT64_C( 3456983586772514461) }, { INT64_C( 3605868208288544850), -INT64_C( 6755059690134340363), INT64_C( 1057621640285614962), -INT64_C( 9068842080715435911) }, { INT64_C( 7958093419730771730), -INT64_C( 60343750250853177), INT64_C( 1057621640285614962), -INT64_C( 9068842080715435911) } }, { UINT8_C( 19), { -INT64_C( 770844044711174654), INT64_C( 3116354292617032058), -INT64_C( 4871065016503992704), -INT64_C( 5446140910634431401) }, { INT64_C( 1345146475543146057), INT64_C( 7025087517084835228), -INT64_C( 3997589598203088739), INT64_C( 2497399761580092993) }, { -INT64_C( 770844044711174654), INT64_C( 3116354292617032058), INT64_C( 0), INT64_C( 0) }, { -INT64_C( 4871065016503992704), -INT64_C( 5446140910634431401), INT64_C( 0), INT64_C( 0) }, { -INT64_C( 770844044711174654), INT64_C( 3116354292617032058), INT64_C( 0), INT64_C( 0) }, { -INT64_C( 4871065016503992704), -INT64_C( 5446140910634431401), INT64_C( 0), INT64_C( 0) } }, { UINT8_C(185), { -INT64_C( 2392146420534487226), -INT64_C( 8070696138312835006), -INT64_C( 4871267184011065234), INT64_C( 5056618793708968168) }, { INT64_C( 2589645514894441872), INT64_C( 4437034983047343973), INT64_C( 4852283745272697676), INT64_C( 3847731754999147550) }, { -INT64_C( 2392146420534487226), INT64_C( 0), INT64_C( 0), INT64_C( 4437034983047343973) }, { -INT64_C( 4871267184011065234), INT64_C( 0), INT64_C( 0), INT64_C( 4437034983047343973) }, { -INT64_C( 2392146420534487226), INT64_C( 0), INT64_C( 0), INT64_C( 3847731754999147550) }, { -INT64_C( 4871267184011065234), INT64_C( 0), INT64_C( 0), INT64_C( 3847731754999147550) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask8 k = test_vec[i].k; simde__m256i a = simde_mm256_loadu_epi64(test_vec[i].a); simde__m256i b = simde_mm256_loadu_epi64(test_vec[i].b); simde__m256i r; r = simde_mm256_maskz_shuffle_i64x2(k, a, b, 0); simde_test_x86_assert_equal_i64x4(r, simde_mm256_loadu_epi64(test_vec[i].r0)); r = simde_mm256_maskz_shuffle_i64x2(k, a, b, 1); simde_test_x86_assert_equal_i64x4(r, simde_mm256_loadu_epi64(test_vec[i].r1)); r = simde_mm256_maskz_shuffle_i64x2(k, a, b, 2); simde_test_x86_assert_equal_i64x4(r, simde_mm256_loadu_epi64(test_vec[i].r2)); r = simde_mm256_maskz_shuffle_i64x2(k, a, b, 3); simde_test_x86_assert_equal_i64x4(r, simde_mm256_loadu_epi64(test_vec[i].r3)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 4 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m256i a = simde_test_x86_random_i64x4(); simde__m256i b = simde_test_x86_random_i64x4(); simde__m256i r; simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_maskz_shuffle_i64x2(k, a, b, 0); simde_test_x86_write_i64x4(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_maskz_shuffle_i64x2(k, a, b, 1); simde_test_x86_write_i64x4(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_maskz_shuffle_i64x2(k, a, b, 2); simde_test_x86_write_i64x4(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm256_maskz_shuffle_i64x2(k, a, b, 3); simde_test_x86_write_i64x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_shuffle_f32x4(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde_float32 a[16]; const simde_float32 b[16]; const simde_float32 r1[16]; const simde_float32 r2[16]; const simde_float32 r3[16]; const simde_float32 r4[16]; } test_vec[4] = { { { SIMDE_FLOAT32_C( 612.00), SIMDE_FLOAT32_C( 41.95), SIMDE_FLOAT32_C( 976.38), SIMDE_FLOAT32_C( 600.17), SIMDE_FLOAT32_C( 703.58), SIMDE_FLOAT32_C( 547.81), SIMDE_FLOAT32_C( -87.34), SIMDE_FLOAT32_C( -795.34), SIMDE_FLOAT32_C( -60.06), SIMDE_FLOAT32_C( -487.81), SIMDE_FLOAT32_C( 639.28), SIMDE_FLOAT32_C( -834.87), SIMDE_FLOAT32_C( 869.20), SIMDE_FLOAT32_C( -130.23), SIMDE_FLOAT32_C( -140.41), SIMDE_FLOAT32_C( -683.29) }, { SIMDE_FLOAT32_C( -862.91), SIMDE_FLOAT32_C( -708.52), SIMDE_FLOAT32_C( -653.90), SIMDE_FLOAT32_C( -928.55), SIMDE_FLOAT32_C( 531.84), SIMDE_FLOAT32_C( 569.89), SIMDE_FLOAT32_C( -311.59), SIMDE_FLOAT32_C( -160.89), SIMDE_FLOAT32_C( 412.77), SIMDE_FLOAT32_C( 762.26), SIMDE_FLOAT32_C( -945.34), SIMDE_FLOAT32_C( 174.68), SIMDE_FLOAT32_C( 709.50), SIMDE_FLOAT32_C( -817.46), SIMDE_FLOAT32_C( 656.33), SIMDE_FLOAT32_C( 321.50) }, { SIMDE_FLOAT32_C( -60.06), SIMDE_FLOAT32_C( -487.81), SIMDE_FLOAT32_C( 639.28), SIMDE_FLOAT32_C( -834.87), SIMDE_FLOAT32_C( 869.20), SIMDE_FLOAT32_C( -130.23), SIMDE_FLOAT32_C( -140.41), SIMDE_FLOAT32_C( -683.29), SIMDE_FLOAT32_C( -862.91), SIMDE_FLOAT32_C( -708.52), SIMDE_FLOAT32_C( -653.90), SIMDE_FLOAT32_C( -928.55), SIMDE_FLOAT32_C( 531.84), SIMDE_FLOAT32_C( 569.89), SIMDE_FLOAT32_C( -311.59), SIMDE_FLOAT32_C( -160.89) }, { SIMDE_FLOAT32_C( 703.58), SIMDE_FLOAT32_C( 547.81), SIMDE_FLOAT32_C( -87.34), SIMDE_FLOAT32_C( -795.34), SIMDE_FLOAT32_C( -60.06), SIMDE_FLOAT32_C( -487.81), SIMDE_FLOAT32_C( 639.28), SIMDE_FLOAT32_C( -834.87), SIMDE_FLOAT32_C( 709.50), SIMDE_FLOAT32_C( -817.46), SIMDE_FLOAT32_C( 656.33), SIMDE_FLOAT32_C( 321.50), SIMDE_FLOAT32_C( -862.91), SIMDE_FLOAT32_C( -708.52), SIMDE_FLOAT32_C( -653.90), SIMDE_FLOAT32_C( -928.55) }, { SIMDE_FLOAT32_C( -60.06), SIMDE_FLOAT32_C( -487.81), SIMDE_FLOAT32_C( 639.28), SIMDE_FLOAT32_C( -834.87), SIMDE_FLOAT32_C( -60.06), SIMDE_FLOAT32_C( -487.81), SIMDE_FLOAT32_C( 639.28), SIMDE_FLOAT32_C( -834.87), SIMDE_FLOAT32_C( 531.84), SIMDE_FLOAT32_C( 569.89), SIMDE_FLOAT32_C( -311.59), SIMDE_FLOAT32_C( -160.89), SIMDE_FLOAT32_C( 709.50), SIMDE_FLOAT32_C( -817.46), SIMDE_FLOAT32_C( 656.33), SIMDE_FLOAT32_C( 321.50) }, { SIMDE_FLOAT32_C( 869.20), SIMDE_FLOAT32_C( -130.23), SIMDE_FLOAT32_C( -140.41), SIMDE_FLOAT32_C( -683.29), SIMDE_FLOAT32_C( 703.58), SIMDE_FLOAT32_C( 547.81), SIMDE_FLOAT32_C( -87.34), SIMDE_FLOAT32_C( -795.34), SIMDE_FLOAT32_C( 412.77), SIMDE_FLOAT32_C( 762.26), SIMDE_FLOAT32_C( -945.34), SIMDE_FLOAT32_C( 174.68), SIMDE_FLOAT32_C( 412.77), SIMDE_FLOAT32_C( 762.26), SIMDE_FLOAT32_C( -945.34), SIMDE_FLOAT32_C( 174.68) } }, { { SIMDE_FLOAT32_C( 224.49), SIMDE_FLOAT32_C( 632.71), SIMDE_FLOAT32_C( -78.33), SIMDE_FLOAT32_C( -71.93), SIMDE_FLOAT32_C( 180.52), SIMDE_FLOAT32_C( 834.33), SIMDE_FLOAT32_C( 132.73), SIMDE_FLOAT32_C( -879.54), SIMDE_FLOAT32_C( -653.48), SIMDE_FLOAT32_C( -227.99), SIMDE_FLOAT32_C( -714.41), SIMDE_FLOAT32_C( -784.28), SIMDE_FLOAT32_C( 641.77), SIMDE_FLOAT32_C( 145.18), SIMDE_FLOAT32_C( -467.57), SIMDE_FLOAT32_C( 778.87) }, { SIMDE_FLOAT32_C( 436.67), SIMDE_FLOAT32_C( -121.47), SIMDE_FLOAT32_C( 850.32), SIMDE_FLOAT32_C( -31.50), SIMDE_FLOAT32_C( -551.58), SIMDE_FLOAT32_C( -461.27), SIMDE_FLOAT32_C( 807.61), SIMDE_FLOAT32_C( 861.18), SIMDE_FLOAT32_C( -699.01), SIMDE_FLOAT32_C( 862.27), SIMDE_FLOAT32_C( 35.86), SIMDE_FLOAT32_C( -989.51), SIMDE_FLOAT32_C( -955.19), SIMDE_FLOAT32_C( -307.81), SIMDE_FLOAT32_C( 331.99), SIMDE_FLOAT32_C( 269.30) }, { SIMDE_FLOAT32_C( -653.48), SIMDE_FLOAT32_C( -227.99), SIMDE_FLOAT32_C( -714.41), SIMDE_FLOAT32_C( -784.28), SIMDE_FLOAT32_C( 641.77), SIMDE_FLOAT32_C( 145.18), SIMDE_FLOAT32_C( -467.57), SIMDE_FLOAT32_C( 778.87), SIMDE_FLOAT32_C( 436.67), SIMDE_FLOAT32_C( -121.47), SIMDE_FLOAT32_C( 850.32), SIMDE_FLOAT32_C( -31.50), SIMDE_FLOAT32_C( -551.58), SIMDE_FLOAT32_C( -461.27), SIMDE_FLOAT32_C( 807.61), SIMDE_FLOAT32_C( 861.18) }, { SIMDE_FLOAT32_C( 180.52), SIMDE_FLOAT32_C( 834.33), SIMDE_FLOAT32_C( 132.73), SIMDE_FLOAT32_C( -879.54), SIMDE_FLOAT32_C( -653.48), SIMDE_FLOAT32_C( -227.99), SIMDE_FLOAT32_C( -714.41), SIMDE_FLOAT32_C( -784.28), SIMDE_FLOAT32_C( -955.19), SIMDE_FLOAT32_C( -307.81), SIMDE_FLOAT32_C( 331.99), SIMDE_FLOAT32_C( 269.30), SIMDE_FLOAT32_C( 436.67), SIMDE_FLOAT32_C( -121.47), SIMDE_FLOAT32_C( 850.32), SIMDE_FLOAT32_C( -31.50) }, { SIMDE_FLOAT32_C( -653.48), SIMDE_FLOAT32_C( -227.99), SIMDE_FLOAT32_C( -714.41), SIMDE_FLOAT32_C( -784.28), SIMDE_FLOAT32_C( -653.48), SIMDE_FLOAT32_C( -227.99), SIMDE_FLOAT32_C( -714.41), SIMDE_FLOAT32_C( -784.28), SIMDE_FLOAT32_C( -551.58), SIMDE_FLOAT32_C( -461.27), SIMDE_FLOAT32_C( 807.61), SIMDE_FLOAT32_C( 861.18), SIMDE_FLOAT32_C( -955.19), SIMDE_FLOAT32_C( -307.81), SIMDE_FLOAT32_C( 331.99), SIMDE_FLOAT32_C( 269.30) }, { SIMDE_FLOAT32_C( 641.77), SIMDE_FLOAT32_C( 145.18), SIMDE_FLOAT32_C( -467.57), SIMDE_FLOAT32_C( 778.87), SIMDE_FLOAT32_C( 180.52), SIMDE_FLOAT32_C( 834.33), SIMDE_FLOAT32_C( 132.73), SIMDE_FLOAT32_C( -879.54), SIMDE_FLOAT32_C( -699.01), SIMDE_FLOAT32_C( 862.27), SIMDE_FLOAT32_C( 35.86), SIMDE_FLOAT32_C( -989.51), SIMDE_FLOAT32_C( -699.01), SIMDE_FLOAT32_C( 862.27), SIMDE_FLOAT32_C( 35.86), SIMDE_FLOAT32_C( -989.51) } }, { { SIMDE_FLOAT32_C( -675.10), SIMDE_FLOAT32_C( -746.34), SIMDE_FLOAT32_C( -802.63), SIMDE_FLOAT32_C( 505.42), SIMDE_FLOAT32_C( -912.01), SIMDE_FLOAT32_C( 330.10), SIMDE_FLOAT32_C( 625.89), SIMDE_FLOAT32_C( -565.49), SIMDE_FLOAT32_C( -897.90), SIMDE_FLOAT32_C( 911.48), SIMDE_FLOAT32_C( -349.78), SIMDE_FLOAT32_C( 743.88), SIMDE_FLOAT32_C( 56.66), SIMDE_FLOAT32_C( 182.65), SIMDE_FLOAT32_C( 522.74), SIMDE_FLOAT32_C( -506.67) }, { SIMDE_FLOAT32_C( -938.82), SIMDE_FLOAT32_C( 373.07), SIMDE_FLOAT32_C( 461.83), SIMDE_FLOAT32_C( -490.40), SIMDE_FLOAT32_C( 911.80), SIMDE_FLOAT32_C( 269.44), SIMDE_FLOAT32_C( -629.22), SIMDE_FLOAT32_C( -787.21), SIMDE_FLOAT32_C( 131.71), SIMDE_FLOAT32_C( 406.64), SIMDE_FLOAT32_C( -776.72), SIMDE_FLOAT32_C( 176.53), SIMDE_FLOAT32_C( -901.17), SIMDE_FLOAT32_C( 555.27), SIMDE_FLOAT32_C( -554.17), SIMDE_FLOAT32_C( -576.26) }, { SIMDE_FLOAT32_C( -897.90), SIMDE_FLOAT32_C( 911.48), SIMDE_FLOAT32_C( -349.78), SIMDE_FLOAT32_C( 743.88), SIMDE_FLOAT32_C( 56.66), SIMDE_FLOAT32_C( 182.65), SIMDE_FLOAT32_C( 522.74), SIMDE_FLOAT32_C( -506.67), SIMDE_FLOAT32_C( -938.82), SIMDE_FLOAT32_C( 373.07), SIMDE_FLOAT32_C( 461.83), SIMDE_FLOAT32_C( -490.40), SIMDE_FLOAT32_C( 911.80), SIMDE_FLOAT32_C( 269.44), SIMDE_FLOAT32_C( -629.22), SIMDE_FLOAT32_C( -787.21) }, { SIMDE_FLOAT32_C( -912.01), SIMDE_FLOAT32_C( 330.10), SIMDE_FLOAT32_C( 625.89), SIMDE_FLOAT32_C( -565.49), SIMDE_FLOAT32_C( -897.90), SIMDE_FLOAT32_C( 911.48), SIMDE_FLOAT32_C( -349.78), SIMDE_FLOAT32_C( 743.88), SIMDE_FLOAT32_C( -901.17), SIMDE_FLOAT32_C( 555.27), SIMDE_FLOAT32_C( -554.17), SIMDE_FLOAT32_C( -576.26), SIMDE_FLOAT32_C( -938.82), SIMDE_FLOAT32_C( 373.07), SIMDE_FLOAT32_C( 461.83), SIMDE_FLOAT32_C( -490.40) }, { SIMDE_FLOAT32_C( -897.90), SIMDE_FLOAT32_C( 911.48), SIMDE_FLOAT32_C( -349.78), SIMDE_FLOAT32_C( 743.88), SIMDE_FLOAT32_C( -897.90), SIMDE_FLOAT32_C( 911.48), SIMDE_FLOAT32_C( -349.78), SIMDE_FLOAT32_C( 743.88), SIMDE_FLOAT32_C( 911.80), SIMDE_FLOAT32_C( 269.44), SIMDE_FLOAT32_C( -629.22), SIMDE_FLOAT32_C( -787.21), SIMDE_FLOAT32_C( -901.17), SIMDE_FLOAT32_C( 555.27), SIMDE_FLOAT32_C( -554.17), SIMDE_FLOAT32_C( -576.26) }, { SIMDE_FLOAT32_C( 56.66), SIMDE_FLOAT32_C( 182.65), SIMDE_FLOAT32_C( 522.74), SIMDE_FLOAT32_C( -506.67), SIMDE_FLOAT32_C( -912.01), SIMDE_FLOAT32_C( 330.10), SIMDE_FLOAT32_C( 625.89), SIMDE_FLOAT32_C( -565.49), SIMDE_FLOAT32_C( 131.71), SIMDE_FLOAT32_C( 406.64), SIMDE_FLOAT32_C( -776.72), SIMDE_FLOAT32_C( 176.53), SIMDE_FLOAT32_C( 131.71), SIMDE_FLOAT32_C( 406.64), SIMDE_FLOAT32_C( -776.72), SIMDE_FLOAT32_C( 176.53) } }, { { SIMDE_FLOAT32_C( 808.93), SIMDE_FLOAT32_C( -356.80), SIMDE_FLOAT32_C( 929.16), SIMDE_FLOAT32_C( 896.92), SIMDE_FLOAT32_C( 973.30), SIMDE_FLOAT32_C( 555.05), SIMDE_FLOAT32_C( -668.58), SIMDE_FLOAT32_C( -924.60), SIMDE_FLOAT32_C( 466.53), SIMDE_FLOAT32_C( -18.35), SIMDE_FLOAT32_C( 819.28), SIMDE_FLOAT32_C( -476.81), SIMDE_FLOAT32_C( -835.70), SIMDE_FLOAT32_C( 342.03), SIMDE_FLOAT32_C( 16.52), SIMDE_FLOAT32_C( -774.52) }, { SIMDE_FLOAT32_C( -284.91), SIMDE_FLOAT32_C( -521.66), SIMDE_FLOAT32_C( -264.92), SIMDE_FLOAT32_C( -373.11), SIMDE_FLOAT32_C( 747.78), SIMDE_FLOAT32_C( 105.87), SIMDE_FLOAT32_C( -160.32), SIMDE_FLOAT32_C( -120.51), SIMDE_FLOAT32_C( -487.49), SIMDE_FLOAT32_C( 62.96), SIMDE_FLOAT32_C( -943.98), SIMDE_FLOAT32_C( -388.66), SIMDE_FLOAT32_C( -381.77), SIMDE_FLOAT32_C( -498.15), SIMDE_FLOAT32_C( 35.08), SIMDE_FLOAT32_C( -572.84) }, { SIMDE_FLOAT32_C( 466.53), SIMDE_FLOAT32_C( -18.35), SIMDE_FLOAT32_C( 819.28), SIMDE_FLOAT32_C( -476.81), SIMDE_FLOAT32_C( -835.70), SIMDE_FLOAT32_C( 342.03), SIMDE_FLOAT32_C( 16.52), SIMDE_FLOAT32_C( -774.52), SIMDE_FLOAT32_C( -284.91), SIMDE_FLOAT32_C( -521.66), SIMDE_FLOAT32_C( -264.92), SIMDE_FLOAT32_C( -373.11), SIMDE_FLOAT32_C( 747.78), SIMDE_FLOAT32_C( 105.87), SIMDE_FLOAT32_C( -160.32), SIMDE_FLOAT32_C( -120.51) }, { SIMDE_FLOAT32_C( 973.30), SIMDE_FLOAT32_C( 555.05), SIMDE_FLOAT32_C( -668.58), SIMDE_FLOAT32_C( -924.60), SIMDE_FLOAT32_C( 466.53), SIMDE_FLOAT32_C( -18.35), SIMDE_FLOAT32_C( 819.28), SIMDE_FLOAT32_C( -476.81), SIMDE_FLOAT32_C( -381.77), SIMDE_FLOAT32_C( -498.15), SIMDE_FLOAT32_C( 35.08), SIMDE_FLOAT32_C( -572.84), SIMDE_FLOAT32_C( -284.91), SIMDE_FLOAT32_C( -521.66), SIMDE_FLOAT32_C( -264.92), SIMDE_FLOAT32_C( -373.11) }, { SIMDE_FLOAT32_C( 466.53), SIMDE_FLOAT32_C( -18.35), SIMDE_FLOAT32_C( 819.28), SIMDE_FLOAT32_C( -476.81), SIMDE_FLOAT32_C( 466.53), SIMDE_FLOAT32_C( -18.35), SIMDE_FLOAT32_C( 819.28), SIMDE_FLOAT32_C( -476.81), SIMDE_FLOAT32_C( 747.78), SIMDE_FLOAT32_C( 105.87), SIMDE_FLOAT32_C( -160.32), SIMDE_FLOAT32_C( -120.51), SIMDE_FLOAT32_C( -381.77), SIMDE_FLOAT32_C( -498.15), SIMDE_FLOAT32_C( 35.08), SIMDE_FLOAT32_C( -572.84) }, { SIMDE_FLOAT32_C( -835.70), SIMDE_FLOAT32_C( 342.03), SIMDE_FLOAT32_C( 16.52), SIMDE_FLOAT32_C( -774.52), SIMDE_FLOAT32_C( 973.30), SIMDE_FLOAT32_C( 555.05), SIMDE_FLOAT32_C( -668.58), SIMDE_FLOAT32_C( -924.60), SIMDE_FLOAT32_C( -487.49), SIMDE_FLOAT32_C( 62.96), SIMDE_FLOAT32_C( -943.98), SIMDE_FLOAT32_C( -388.66), SIMDE_FLOAT32_C( -487.49), SIMDE_FLOAT32_C( 62.96), SIMDE_FLOAT32_C( -943.98), SIMDE_FLOAT32_C( -388.66) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 r; r = simde_mm512_shuffle_f32x4(a, b, (1 << 6) + (0 << 4) + (3 << 2) + 2); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r1), 1); r = simde_mm512_shuffle_f32x4(a, b, (0 << 6) + (3 << 4) + (2 << 2) + 1); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r2), 1); r = simde_mm512_shuffle_f32x4(a, b, (3 << 6) + (1 << 4) + (2 << 2) + 2); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r3), 1); r = simde_mm512_shuffle_f32x4(a, b, (2 << 6) + (2 << 4) + (1 << 2) + 3); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r4), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 4 ; i++) { simde__m512 a = simde_test_x86_random_f32x16(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m512 b = simde_test_x86_random_f32x16(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m512 r; simde_test_x86_write_f32x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f32x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_shuffle_f32x4(a, b, (1 << 6) + (0 << 4) + (3 << 2) + 2); simde_test_x86_write_f32x16(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_shuffle_f32x4(a, b, (0 << 6) + (3 << 4) + (2 << 2) + 1); simde_test_x86_write_f32x16(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_shuffle_f32x4(a, b, (3 << 6) + (1 << 4) + (2 << 2) + 2); simde_test_x86_write_f32x16(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_shuffle_f32x4(a, b, (2 << 6) + (2 << 4) + (1 << 2) + 3); simde_test_x86_write_f32x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_mask_shuffle_f32x4(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde_float32 src[16]; const simde__mmask16 k; const simde_float32 a[16]; const simde_float32 b[16]; const simde_float32 r1[16]; const simde_float32 r2[16]; const simde_float32 r3[16]; const simde_float32 r4[16]; } test_vec[4] = { { { SIMDE_FLOAT32_C( -725.57), SIMDE_FLOAT32_C( -383.90), SIMDE_FLOAT32_C( 720.60), SIMDE_FLOAT32_C( 554.17), SIMDE_FLOAT32_C( 448.60), SIMDE_FLOAT32_C( 822.68), SIMDE_FLOAT32_C( -709.42), SIMDE_FLOAT32_C( -354.77), SIMDE_FLOAT32_C( -252.87), SIMDE_FLOAT32_C( 698.39), SIMDE_FLOAT32_C( 892.46), SIMDE_FLOAT32_C( 204.87), SIMDE_FLOAT32_C( 942.56), SIMDE_FLOAT32_C( -22.09), SIMDE_FLOAT32_C( -20.72), SIMDE_FLOAT32_C( -714.33) }, UINT16_C(57460), { SIMDE_FLOAT32_C( 182.18), SIMDE_FLOAT32_C( 667.71), SIMDE_FLOAT32_C( 770.89), SIMDE_FLOAT32_C( -144.25), SIMDE_FLOAT32_C( -458.14), SIMDE_FLOAT32_C( -547.82), SIMDE_FLOAT32_C( 115.83), SIMDE_FLOAT32_C( -131.04), SIMDE_FLOAT32_C( 173.86), SIMDE_FLOAT32_C( 778.80), SIMDE_FLOAT32_C( -446.61), SIMDE_FLOAT32_C( -472.37), SIMDE_FLOAT32_C( 42.51), SIMDE_FLOAT32_C( -172.17), SIMDE_FLOAT32_C( 143.73), SIMDE_FLOAT32_C( -236.89) }, { SIMDE_FLOAT32_C( -618.00), SIMDE_FLOAT32_C( -407.67), SIMDE_FLOAT32_C( -414.21), SIMDE_FLOAT32_C( -327.43), SIMDE_FLOAT32_C( 237.55), SIMDE_FLOAT32_C( 332.92), SIMDE_FLOAT32_C( -629.03), SIMDE_FLOAT32_C( 130.01), SIMDE_FLOAT32_C( -462.21), SIMDE_FLOAT32_C( -686.47), SIMDE_FLOAT32_C( -892.08), SIMDE_FLOAT32_C( 517.07), SIMDE_FLOAT32_C( -400.81), SIMDE_FLOAT32_C( -907.38), SIMDE_FLOAT32_C( 859.44), SIMDE_FLOAT32_C( 781.38) }, { SIMDE_FLOAT32_C( -725.57), SIMDE_FLOAT32_C( -383.90), SIMDE_FLOAT32_C( -446.61), SIMDE_FLOAT32_C( 554.17), SIMDE_FLOAT32_C( 42.51), SIMDE_FLOAT32_C( -172.17), SIMDE_FLOAT32_C( 143.73), SIMDE_FLOAT32_C( -354.77), SIMDE_FLOAT32_C( -252.87), SIMDE_FLOAT32_C( 698.39), SIMDE_FLOAT32_C( 892.46), SIMDE_FLOAT32_C( 204.87), SIMDE_FLOAT32_C( 942.56), SIMDE_FLOAT32_C( 332.92), SIMDE_FLOAT32_C( -629.03), SIMDE_FLOAT32_C( 130.01) }, { SIMDE_FLOAT32_C( -725.57), SIMDE_FLOAT32_C( -383.90), SIMDE_FLOAT32_C( 115.83), SIMDE_FLOAT32_C( 554.17), SIMDE_FLOAT32_C( 173.86), SIMDE_FLOAT32_C( 778.80), SIMDE_FLOAT32_C( -446.61), SIMDE_FLOAT32_C( -354.77), SIMDE_FLOAT32_C( -252.87), SIMDE_FLOAT32_C( 698.39), SIMDE_FLOAT32_C( 892.46), SIMDE_FLOAT32_C( 204.87), SIMDE_FLOAT32_C( 942.56), SIMDE_FLOAT32_C( -407.67), SIMDE_FLOAT32_C( -414.21), SIMDE_FLOAT32_C( -327.43) }, { SIMDE_FLOAT32_C( -725.57), SIMDE_FLOAT32_C( -383.90), SIMDE_FLOAT32_C( -446.61), SIMDE_FLOAT32_C( 554.17), SIMDE_FLOAT32_C( 173.86), SIMDE_FLOAT32_C( 778.80), SIMDE_FLOAT32_C( -446.61), SIMDE_FLOAT32_C( -354.77), SIMDE_FLOAT32_C( -252.87), SIMDE_FLOAT32_C( 698.39), SIMDE_FLOAT32_C( 892.46), SIMDE_FLOAT32_C( 204.87), SIMDE_FLOAT32_C( 942.56), SIMDE_FLOAT32_C( -907.38), SIMDE_FLOAT32_C( 859.44), SIMDE_FLOAT32_C( 781.38) }, { SIMDE_FLOAT32_C( -725.57), SIMDE_FLOAT32_C( -383.90), SIMDE_FLOAT32_C( 143.73), SIMDE_FLOAT32_C( 554.17), SIMDE_FLOAT32_C( -458.14), SIMDE_FLOAT32_C( -547.82), SIMDE_FLOAT32_C( 115.83), SIMDE_FLOAT32_C( -354.77), SIMDE_FLOAT32_C( -252.87), SIMDE_FLOAT32_C( 698.39), SIMDE_FLOAT32_C( 892.46), SIMDE_FLOAT32_C( 204.87), SIMDE_FLOAT32_C( 942.56), SIMDE_FLOAT32_C( -686.47), SIMDE_FLOAT32_C( -892.08), SIMDE_FLOAT32_C( 517.07) } }, { { SIMDE_FLOAT32_C( 760.33), SIMDE_FLOAT32_C( 630.33), SIMDE_FLOAT32_C( -362.87), SIMDE_FLOAT32_C( -697.81), SIMDE_FLOAT32_C( -917.49), SIMDE_FLOAT32_C( 752.96), SIMDE_FLOAT32_C( 171.15), SIMDE_FLOAT32_C( 256.37), SIMDE_FLOAT32_C( 531.76), SIMDE_FLOAT32_C( 724.54), SIMDE_FLOAT32_C( 784.00), SIMDE_FLOAT32_C( -425.73), SIMDE_FLOAT32_C( -447.63), SIMDE_FLOAT32_C( -72.27), SIMDE_FLOAT32_C( 337.38), SIMDE_FLOAT32_C( -65.64) }, UINT16_C(36452), { SIMDE_FLOAT32_C( 606.94), SIMDE_FLOAT32_C( -242.39), SIMDE_FLOAT32_C( 256.10), SIMDE_FLOAT32_C( 977.90), SIMDE_FLOAT32_C( 887.62), SIMDE_FLOAT32_C( 793.89), SIMDE_FLOAT32_C( -708.57), SIMDE_FLOAT32_C( 995.54), SIMDE_FLOAT32_C( 310.96), SIMDE_FLOAT32_C( -109.37), SIMDE_FLOAT32_C( -911.84), SIMDE_FLOAT32_C( 170.40), SIMDE_FLOAT32_C( -328.00), SIMDE_FLOAT32_C( 848.49), SIMDE_FLOAT32_C( -199.28), SIMDE_FLOAT32_C( 309.13) }, { SIMDE_FLOAT32_C( -849.32), SIMDE_FLOAT32_C( -116.77), SIMDE_FLOAT32_C( 62.09), SIMDE_FLOAT32_C( 321.83), SIMDE_FLOAT32_C( -860.40), SIMDE_FLOAT32_C( -406.15), SIMDE_FLOAT32_C( 46.37), SIMDE_FLOAT32_C( 923.60), SIMDE_FLOAT32_C( 168.12), SIMDE_FLOAT32_C( 598.74), SIMDE_FLOAT32_C( -148.68), SIMDE_FLOAT32_C( -494.50), SIMDE_FLOAT32_C( -466.90), SIMDE_FLOAT32_C( -628.62), SIMDE_FLOAT32_C( -571.32), SIMDE_FLOAT32_C( -859.96) }, { SIMDE_FLOAT32_C( 760.33), SIMDE_FLOAT32_C( 630.33), SIMDE_FLOAT32_C( -911.84), SIMDE_FLOAT32_C( -697.81), SIMDE_FLOAT32_C( -917.49), SIMDE_FLOAT32_C( 848.49), SIMDE_FLOAT32_C( -199.28), SIMDE_FLOAT32_C( 256.37), SIMDE_FLOAT32_C( 531.76), SIMDE_FLOAT32_C( -116.77), SIMDE_FLOAT32_C( 62.09), SIMDE_FLOAT32_C( 321.83), SIMDE_FLOAT32_C( -447.63), SIMDE_FLOAT32_C( -72.27), SIMDE_FLOAT32_C( 337.38), SIMDE_FLOAT32_C( 923.60) }, { SIMDE_FLOAT32_C( 760.33), SIMDE_FLOAT32_C( 630.33), SIMDE_FLOAT32_C( -708.57), SIMDE_FLOAT32_C( -697.81), SIMDE_FLOAT32_C( -917.49), SIMDE_FLOAT32_C( -109.37), SIMDE_FLOAT32_C( -911.84), SIMDE_FLOAT32_C( 256.37), SIMDE_FLOAT32_C( 531.76), SIMDE_FLOAT32_C( -628.62), SIMDE_FLOAT32_C( -571.32), SIMDE_FLOAT32_C( -859.96), SIMDE_FLOAT32_C( -447.63), SIMDE_FLOAT32_C( -72.27), SIMDE_FLOAT32_C( 337.38), SIMDE_FLOAT32_C( 321.83) }, { SIMDE_FLOAT32_C( 760.33), SIMDE_FLOAT32_C( 630.33), SIMDE_FLOAT32_C( -911.84), SIMDE_FLOAT32_C( -697.81), SIMDE_FLOAT32_C( -917.49), SIMDE_FLOAT32_C( -109.37), SIMDE_FLOAT32_C( -911.84), SIMDE_FLOAT32_C( 256.37), SIMDE_FLOAT32_C( 531.76), SIMDE_FLOAT32_C( -406.15), SIMDE_FLOAT32_C( 46.37), SIMDE_FLOAT32_C( 923.60), SIMDE_FLOAT32_C( -447.63), SIMDE_FLOAT32_C( -72.27), SIMDE_FLOAT32_C( 337.38), SIMDE_FLOAT32_C( -859.96) }, { SIMDE_FLOAT32_C( 760.33), SIMDE_FLOAT32_C( 630.33), SIMDE_FLOAT32_C( -199.28), SIMDE_FLOAT32_C( -697.81), SIMDE_FLOAT32_C( -917.49), SIMDE_FLOAT32_C( 793.89), SIMDE_FLOAT32_C( -708.57), SIMDE_FLOAT32_C( 256.37), SIMDE_FLOAT32_C( 531.76), SIMDE_FLOAT32_C( 598.74), SIMDE_FLOAT32_C( -148.68), SIMDE_FLOAT32_C( -494.50), SIMDE_FLOAT32_C( -447.63), SIMDE_FLOAT32_C( -72.27), SIMDE_FLOAT32_C( 337.38), SIMDE_FLOAT32_C( -494.50) } }, { { SIMDE_FLOAT32_C( 128.98), SIMDE_FLOAT32_C( 684.78), SIMDE_FLOAT32_C( -882.06), SIMDE_FLOAT32_C( 16.60), SIMDE_FLOAT32_C( 478.66), SIMDE_FLOAT32_C( -590.62), SIMDE_FLOAT32_C( 12.14), SIMDE_FLOAT32_C( -210.38), SIMDE_FLOAT32_C( 300.00), SIMDE_FLOAT32_C( 100.31), SIMDE_FLOAT32_C( 960.02), SIMDE_FLOAT32_C( 972.00), SIMDE_FLOAT32_C( -51.20), SIMDE_FLOAT32_C( -239.26), SIMDE_FLOAT32_C( 281.13), SIMDE_FLOAT32_C( 99.48) }, UINT16_C(60205), { SIMDE_FLOAT32_C( -578.69), SIMDE_FLOAT32_C( 783.57), SIMDE_FLOAT32_C( -62.93), SIMDE_FLOAT32_C( 467.68), SIMDE_FLOAT32_C( 707.16), SIMDE_FLOAT32_C( -894.81), SIMDE_FLOAT32_C( 66.42), SIMDE_FLOAT32_C( -441.51), SIMDE_FLOAT32_C( -389.31), SIMDE_FLOAT32_C( 599.52), SIMDE_FLOAT32_C( -70.14), SIMDE_FLOAT32_C( 39.37), SIMDE_FLOAT32_C( 739.56), SIMDE_FLOAT32_C( -941.15), SIMDE_FLOAT32_C( -275.85), SIMDE_FLOAT32_C( 857.50) }, { SIMDE_FLOAT32_C( 75.45), SIMDE_FLOAT32_C( -797.19), SIMDE_FLOAT32_C( -733.12), SIMDE_FLOAT32_C( -912.41), SIMDE_FLOAT32_C( -7.57), SIMDE_FLOAT32_C( 566.88), SIMDE_FLOAT32_C( 187.90), SIMDE_FLOAT32_C( -47.55), SIMDE_FLOAT32_C( 538.88), SIMDE_FLOAT32_C( -863.30), SIMDE_FLOAT32_C( 713.19), SIMDE_FLOAT32_C( -179.98), SIMDE_FLOAT32_C( 236.18), SIMDE_FLOAT32_C( 357.17), SIMDE_FLOAT32_C( 163.24), SIMDE_FLOAT32_C( 657.49) }, { SIMDE_FLOAT32_C( -389.31), SIMDE_FLOAT32_C( 684.78), SIMDE_FLOAT32_C( -70.14), SIMDE_FLOAT32_C( 39.37), SIMDE_FLOAT32_C( 478.66), SIMDE_FLOAT32_C( -941.15), SIMDE_FLOAT32_C( 12.14), SIMDE_FLOAT32_C( -210.38), SIMDE_FLOAT32_C( 75.45), SIMDE_FLOAT32_C( -797.19), SIMDE_FLOAT32_C( 960.02), SIMDE_FLOAT32_C( -912.41), SIMDE_FLOAT32_C( -51.20), SIMDE_FLOAT32_C( 566.88), SIMDE_FLOAT32_C( 187.90), SIMDE_FLOAT32_C( -47.55) }, { SIMDE_FLOAT32_C( 707.16), SIMDE_FLOAT32_C( 684.78), SIMDE_FLOAT32_C( 66.42), SIMDE_FLOAT32_C( -441.51), SIMDE_FLOAT32_C( 478.66), SIMDE_FLOAT32_C( 599.52), SIMDE_FLOAT32_C( 12.14), SIMDE_FLOAT32_C( -210.38), SIMDE_FLOAT32_C( 236.18), SIMDE_FLOAT32_C( 357.17), SIMDE_FLOAT32_C( 960.02), SIMDE_FLOAT32_C( 657.49), SIMDE_FLOAT32_C( -51.20), SIMDE_FLOAT32_C( -797.19), SIMDE_FLOAT32_C( -733.12), SIMDE_FLOAT32_C( -912.41) }, { SIMDE_FLOAT32_C( -389.31), SIMDE_FLOAT32_C( 684.78), SIMDE_FLOAT32_C( -70.14), SIMDE_FLOAT32_C( 39.37), SIMDE_FLOAT32_C( 478.66), SIMDE_FLOAT32_C( 599.52), SIMDE_FLOAT32_C( 12.14), SIMDE_FLOAT32_C( -210.38), SIMDE_FLOAT32_C( -7.57), SIMDE_FLOAT32_C( 566.88), SIMDE_FLOAT32_C( 960.02), SIMDE_FLOAT32_C( -47.55), SIMDE_FLOAT32_C( -51.20), SIMDE_FLOAT32_C( 357.17), SIMDE_FLOAT32_C( 163.24), SIMDE_FLOAT32_C( 657.49) }, { SIMDE_FLOAT32_C( 739.56), SIMDE_FLOAT32_C( 684.78), SIMDE_FLOAT32_C( -275.85), SIMDE_FLOAT32_C( 857.50), SIMDE_FLOAT32_C( 478.66), SIMDE_FLOAT32_C( -894.81), SIMDE_FLOAT32_C( 12.14), SIMDE_FLOAT32_C( -210.38), SIMDE_FLOAT32_C( 538.88), SIMDE_FLOAT32_C( -863.30), SIMDE_FLOAT32_C( 960.02), SIMDE_FLOAT32_C( -179.98), SIMDE_FLOAT32_C( -51.20), SIMDE_FLOAT32_C( -863.30), SIMDE_FLOAT32_C( 713.19), SIMDE_FLOAT32_C( -179.98) } }, { { SIMDE_FLOAT32_C( 140.73), SIMDE_FLOAT32_C( -899.69), SIMDE_FLOAT32_C( 125.17), SIMDE_FLOAT32_C( -152.10), SIMDE_FLOAT32_C( -794.50), SIMDE_FLOAT32_C( -808.41), SIMDE_FLOAT32_C( 406.38), SIMDE_FLOAT32_C( -183.81), SIMDE_FLOAT32_C( 791.11), SIMDE_FLOAT32_C( -663.76), SIMDE_FLOAT32_C( 855.56), SIMDE_FLOAT32_C( 530.67), SIMDE_FLOAT32_C( -604.91), SIMDE_FLOAT32_C( -420.29), SIMDE_FLOAT32_C( 388.17), SIMDE_FLOAT32_C( 470.54) }, UINT16_C( 6216), { SIMDE_FLOAT32_C( 558.14), SIMDE_FLOAT32_C( 774.95), SIMDE_FLOAT32_C( 221.93), SIMDE_FLOAT32_C( -253.96), SIMDE_FLOAT32_C( -272.60), SIMDE_FLOAT32_C( -239.19), SIMDE_FLOAT32_C( -117.26), SIMDE_FLOAT32_C( -559.40), SIMDE_FLOAT32_C( 580.83), SIMDE_FLOAT32_C( -881.08), SIMDE_FLOAT32_C( 797.76), SIMDE_FLOAT32_C( -255.93), SIMDE_FLOAT32_C( 776.41), SIMDE_FLOAT32_C( -61.50), SIMDE_FLOAT32_C( -155.62), SIMDE_FLOAT32_C( -98.42) }, { SIMDE_FLOAT32_C( 786.39), SIMDE_FLOAT32_C( 49.88), SIMDE_FLOAT32_C( 93.16), SIMDE_FLOAT32_C( 192.77), SIMDE_FLOAT32_C( 866.08), SIMDE_FLOAT32_C( -115.73), SIMDE_FLOAT32_C( 529.02), SIMDE_FLOAT32_C( 721.64), SIMDE_FLOAT32_C( -585.06), SIMDE_FLOAT32_C( 924.11), SIMDE_FLOAT32_C( -698.65), SIMDE_FLOAT32_C( 803.11), SIMDE_FLOAT32_C( 394.65), SIMDE_FLOAT32_C( 83.87), SIMDE_FLOAT32_C( 458.16), SIMDE_FLOAT32_C( -47.21) }, { SIMDE_FLOAT32_C( 140.73), SIMDE_FLOAT32_C( -899.69), SIMDE_FLOAT32_C( 125.17), SIMDE_FLOAT32_C( -255.93), SIMDE_FLOAT32_C( -794.50), SIMDE_FLOAT32_C( -808.41), SIMDE_FLOAT32_C( -155.62), SIMDE_FLOAT32_C( -183.81), SIMDE_FLOAT32_C( 791.11), SIMDE_FLOAT32_C( -663.76), SIMDE_FLOAT32_C( 855.56), SIMDE_FLOAT32_C( 192.77), SIMDE_FLOAT32_C( 866.08), SIMDE_FLOAT32_C( -420.29), SIMDE_FLOAT32_C( 388.17), SIMDE_FLOAT32_C( 470.54) }, { SIMDE_FLOAT32_C( 140.73), SIMDE_FLOAT32_C( -899.69), SIMDE_FLOAT32_C( 125.17), SIMDE_FLOAT32_C( -559.40), SIMDE_FLOAT32_C( -794.50), SIMDE_FLOAT32_C( -808.41), SIMDE_FLOAT32_C( 797.76), SIMDE_FLOAT32_C( -183.81), SIMDE_FLOAT32_C( 791.11), SIMDE_FLOAT32_C( -663.76), SIMDE_FLOAT32_C( 855.56), SIMDE_FLOAT32_C( -47.21), SIMDE_FLOAT32_C( 786.39), SIMDE_FLOAT32_C( -420.29), SIMDE_FLOAT32_C( 388.17), SIMDE_FLOAT32_C( 470.54) }, { SIMDE_FLOAT32_C( 140.73), SIMDE_FLOAT32_C( -899.69), SIMDE_FLOAT32_C( 125.17), SIMDE_FLOAT32_C( -255.93), SIMDE_FLOAT32_C( -794.50), SIMDE_FLOAT32_C( -808.41), SIMDE_FLOAT32_C( 797.76), SIMDE_FLOAT32_C( -183.81), SIMDE_FLOAT32_C( 791.11), SIMDE_FLOAT32_C( -663.76), SIMDE_FLOAT32_C( 855.56), SIMDE_FLOAT32_C( 721.64), SIMDE_FLOAT32_C( 394.65), SIMDE_FLOAT32_C( -420.29), SIMDE_FLOAT32_C( 388.17), SIMDE_FLOAT32_C( 470.54) }, { SIMDE_FLOAT32_C( 140.73), SIMDE_FLOAT32_C( -899.69), SIMDE_FLOAT32_C( 125.17), SIMDE_FLOAT32_C( -98.42), SIMDE_FLOAT32_C( -794.50), SIMDE_FLOAT32_C( -808.41), SIMDE_FLOAT32_C( -117.26), SIMDE_FLOAT32_C( -183.81), SIMDE_FLOAT32_C( 791.11), SIMDE_FLOAT32_C( -663.76), SIMDE_FLOAT32_C( 855.56), SIMDE_FLOAT32_C( 803.11), SIMDE_FLOAT32_C( -585.06), SIMDE_FLOAT32_C( -420.29), SIMDE_FLOAT32_C( 388.17), SIMDE_FLOAT32_C( 470.54) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__mmask16 k = test_vec[i].k; simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 r; r = simde_mm512_mask_shuffle_f32x4(src, k, a, b, (1 << 6) + (0 << 4) + (3 << 2) + 2); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r1), 1); r = simde_mm512_mask_shuffle_f32x4(src, k, a, b, (0 << 6) + (3 << 4) + (2 << 2) + 1); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r2), 1); r = simde_mm512_mask_shuffle_f32x4(src, k, a, b, (3 << 6) + (1 << 4) + (2 << 2) + 2); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r3), 1); r = simde_mm512_mask_shuffle_f32x4(src, k, a, b, (2 << 6) + (2 << 4) + (1 << 2) + 3); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r4), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 4 ; i++) { simde__m512 src = simde_test_x86_random_f32x16(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__mmask16 k = simde_test_x86_random_mmask16(); simde__m512 a = simde_test_x86_random_f32x16(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m512 b = simde_test_x86_random_f32x16(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m512 r; simde_test_x86_write_f32x16(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask16(2, k,SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x16(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_mask_shuffle_f32x4(src, k, a, b, (1 << 6) + (0 << 4) + (3 << 2) + 2); simde_test_x86_write_f32x16(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_mask_shuffle_f32x4(src, k, a, b, (0 << 6) + (3 << 4) + (2 << 2) + 1); simde_test_x86_write_f32x16(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_mask_shuffle_f32x4(src, k, a, b, (3 << 6) + (1 << 4) + (2 << 2) + 2); simde_test_x86_write_f32x16(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_mask_shuffle_f32x4(src, k, a, b, (2 << 6) + (2 << 4) + (1 << 2) + 3); simde_test_x86_write_f32x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_maskz_shuffle_f32x4(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde__mmask16 k; const simde_float32 a[16]; const simde_float32 b[16]; const simde_float32 r1[16]; const simde_float32 r2[16]; const simde_float32 r3[16]; const simde_float32 r4[16]; } test_vec[4] = { { UINT16_C(43927), { SIMDE_FLOAT32_C( 369.14), SIMDE_FLOAT32_C( 354.32), SIMDE_FLOAT32_C( 623.24), SIMDE_FLOAT32_C( 353.44), SIMDE_FLOAT32_C( 642.86), SIMDE_FLOAT32_C( 91.50), SIMDE_FLOAT32_C( 851.20), SIMDE_FLOAT32_C( -709.38), SIMDE_FLOAT32_C( 791.58), SIMDE_FLOAT32_C( 170.73), SIMDE_FLOAT32_C( 771.96), SIMDE_FLOAT32_C( 365.77), SIMDE_FLOAT32_C( -663.84), SIMDE_FLOAT32_C( -903.26), SIMDE_FLOAT32_C( -343.54), SIMDE_FLOAT32_C( 983.63) }, { SIMDE_FLOAT32_C( 796.16), SIMDE_FLOAT32_C( -952.47), SIMDE_FLOAT32_C( 568.60), SIMDE_FLOAT32_C( 931.49), SIMDE_FLOAT32_C( -388.48), SIMDE_FLOAT32_C( 788.36), SIMDE_FLOAT32_C( 380.68), SIMDE_FLOAT32_C( 675.57), SIMDE_FLOAT32_C( 26.10), SIMDE_FLOAT32_C( 385.39), SIMDE_FLOAT32_C( 261.01), SIMDE_FLOAT32_C( 184.97), SIMDE_FLOAT32_C( 9.80), SIMDE_FLOAT32_C( -711.89), SIMDE_FLOAT32_C( -425.15), SIMDE_FLOAT32_C( -621.06) }, { SIMDE_FLOAT32_C( 791.58), SIMDE_FLOAT32_C( 170.73), SIMDE_FLOAT32_C( 771.96), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -663.84), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 983.63), SIMDE_FLOAT32_C( 796.16), SIMDE_FLOAT32_C( -952.47), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 931.49), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 788.36), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 675.57) }, { SIMDE_FLOAT32_C( 642.86), SIMDE_FLOAT32_C( 91.50), SIMDE_FLOAT32_C( 851.20), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 791.58), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 365.77), SIMDE_FLOAT32_C( 9.80), SIMDE_FLOAT32_C( -711.89), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -621.06), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -952.47), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 931.49) }, { SIMDE_FLOAT32_C( 791.58), SIMDE_FLOAT32_C( 170.73), SIMDE_FLOAT32_C( 771.96), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 791.58), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 365.77), SIMDE_FLOAT32_C( -388.48), SIMDE_FLOAT32_C( 788.36), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 675.57), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -711.89), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -621.06) }, { SIMDE_FLOAT32_C( -663.84), SIMDE_FLOAT32_C( -903.26), SIMDE_FLOAT32_C( -343.54), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 642.86), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -709.38), SIMDE_FLOAT32_C( 26.10), SIMDE_FLOAT32_C( 385.39), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 184.97), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 385.39), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 184.97) } }, { UINT16_C(50370), { SIMDE_FLOAT32_C( 732.38), SIMDE_FLOAT32_C( 285.29), SIMDE_FLOAT32_C( 289.59), SIMDE_FLOAT32_C( 583.58), SIMDE_FLOAT32_C( 575.91), SIMDE_FLOAT32_C( 81.17), SIMDE_FLOAT32_C( -245.69), SIMDE_FLOAT32_C( 347.87), SIMDE_FLOAT32_C( -553.05), SIMDE_FLOAT32_C( 90.47), SIMDE_FLOAT32_C( 444.61), SIMDE_FLOAT32_C( 103.40), SIMDE_FLOAT32_C( 74.10), SIMDE_FLOAT32_C( 240.78), SIMDE_FLOAT32_C( 150.93), SIMDE_FLOAT32_C( -357.31) }, { SIMDE_FLOAT32_C( 172.27), SIMDE_FLOAT32_C( 762.45), SIMDE_FLOAT32_C( -568.95), SIMDE_FLOAT32_C( -447.05), SIMDE_FLOAT32_C( 438.01), SIMDE_FLOAT32_C( 457.15), SIMDE_FLOAT32_C( 938.34), SIMDE_FLOAT32_C( -300.97), SIMDE_FLOAT32_C( -357.88), SIMDE_FLOAT32_C( -51.86), SIMDE_FLOAT32_C( -12.86), SIMDE_FLOAT32_C( 216.97), SIMDE_FLOAT32_C( 327.09), SIMDE_FLOAT32_C( -370.42), SIMDE_FLOAT32_C( 415.06), SIMDE_FLOAT32_C( 59.47) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 90.47), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 150.93), SIMDE_FLOAT32_C( -357.31), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -568.95), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 938.34), SIMDE_FLOAT32_C( -300.97) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 81.17), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 444.61), SIMDE_FLOAT32_C( 103.40), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 415.06), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -568.95), SIMDE_FLOAT32_C( -447.05) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 90.47), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 444.61), SIMDE_FLOAT32_C( 103.40), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 938.34), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 415.06), SIMDE_FLOAT32_C( 59.47) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 240.78), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -245.69), SIMDE_FLOAT32_C( 347.87), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -12.86), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -12.86), SIMDE_FLOAT32_C( 216.97) } }, { UINT16_C(27593), { SIMDE_FLOAT32_C( -356.95), SIMDE_FLOAT32_C( 490.78), SIMDE_FLOAT32_C( 785.82), SIMDE_FLOAT32_C( 397.35), SIMDE_FLOAT32_C( -161.34), SIMDE_FLOAT32_C( -767.23), SIMDE_FLOAT32_C( -512.18), SIMDE_FLOAT32_C( -716.73), SIMDE_FLOAT32_C( 336.17), SIMDE_FLOAT32_C( 561.92), SIMDE_FLOAT32_C( 524.05), SIMDE_FLOAT32_C( -512.90), SIMDE_FLOAT32_C( -795.39), SIMDE_FLOAT32_C( -303.68), SIMDE_FLOAT32_C( -750.45), SIMDE_FLOAT32_C( -364.34) }, { SIMDE_FLOAT32_C( 249.27), SIMDE_FLOAT32_C( 687.56), SIMDE_FLOAT32_C( -907.19), SIMDE_FLOAT32_C( 187.61), SIMDE_FLOAT32_C( -613.41), SIMDE_FLOAT32_C( -265.06), SIMDE_FLOAT32_C( -864.25), SIMDE_FLOAT32_C( 373.73), SIMDE_FLOAT32_C( 951.91), SIMDE_FLOAT32_C( 462.84), SIMDE_FLOAT32_C( -996.69), SIMDE_FLOAT32_C( 366.97), SIMDE_FLOAT32_C( -477.69), SIMDE_FLOAT32_C( 918.18), SIMDE_FLOAT32_C( -928.38), SIMDE_FLOAT32_C( 165.36) }, { SIMDE_FLOAT32_C( 336.17), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -512.90), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -750.45), SIMDE_FLOAT32_C( -364.34), SIMDE_FLOAT32_C( 249.27), SIMDE_FLOAT32_C( 687.56), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 187.61), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -265.06), SIMDE_FLOAT32_C( -864.25), SIMDE_FLOAT32_C( 0.00) }, { SIMDE_FLOAT32_C( -161.34), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -716.73), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 524.05), SIMDE_FLOAT32_C( -512.90), SIMDE_FLOAT32_C( -477.69), SIMDE_FLOAT32_C( 918.18), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 165.36), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 687.56), SIMDE_FLOAT32_C( -907.19), SIMDE_FLOAT32_C( 0.00) }, { SIMDE_FLOAT32_C( 336.17), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -512.90), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 524.05), SIMDE_FLOAT32_C( -512.90), SIMDE_FLOAT32_C( -613.41), SIMDE_FLOAT32_C( -265.06), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 373.73), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 918.18), SIMDE_FLOAT32_C( -928.38), SIMDE_FLOAT32_C( 0.00) }, { SIMDE_FLOAT32_C( -795.39), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -364.34), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -512.18), SIMDE_FLOAT32_C( -716.73), SIMDE_FLOAT32_C( 951.91), SIMDE_FLOAT32_C( 462.84), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 366.97), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 462.84), SIMDE_FLOAT32_C( -996.69), SIMDE_FLOAT32_C( 0.00) } }, { UINT16_C(32572), { SIMDE_FLOAT32_C( -437.29), SIMDE_FLOAT32_C( -752.38), SIMDE_FLOAT32_C( -909.78), SIMDE_FLOAT32_C( 50.53), SIMDE_FLOAT32_C( -469.11), SIMDE_FLOAT32_C( 426.39), SIMDE_FLOAT32_C( -387.55), SIMDE_FLOAT32_C( -945.06), SIMDE_FLOAT32_C( 913.49), SIMDE_FLOAT32_C( -182.94), SIMDE_FLOAT32_C( -248.74), SIMDE_FLOAT32_C( -836.96), SIMDE_FLOAT32_C( 452.73), SIMDE_FLOAT32_C( -999.47), SIMDE_FLOAT32_C( 850.60), SIMDE_FLOAT32_C( 545.54) }, { SIMDE_FLOAT32_C( 188.14), SIMDE_FLOAT32_C( -762.80), SIMDE_FLOAT32_C( -719.53), SIMDE_FLOAT32_C( 323.90), SIMDE_FLOAT32_C( 610.93), SIMDE_FLOAT32_C( -767.62), SIMDE_FLOAT32_C( -213.26), SIMDE_FLOAT32_C( 614.24), SIMDE_FLOAT32_C( 599.35), SIMDE_FLOAT32_C( 309.05), SIMDE_FLOAT32_C( 532.42), SIMDE_FLOAT32_C( 670.97), SIMDE_FLOAT32_C( -525.60), SIMDE_FLOAT32_C( -58.61), SIMDE_FLOAT32_C( 528.42), SIMDE_FLOAT32_C( 37.11) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -248.74), SIMDE_FLOAT32_C( -836.96), SIMDE_FLOAT32_C( 452.73), SIMDE_FLOAT32_C( -999.47), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 188.14), SIMDE_FLOAT32_C( -762.80), SIMDE_FLOAT32_C( -719.53), SIMDE_FLOAT32_C( 323.90), SIMDE_FLOAT32_C( 610.93), SIMDE_FLOAT32_C( -767.62), SIMDE_FLOAT32_C( -213.26), SIMDE_FLOAT32_C( 0.00) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -387.55), SIMDE_FLOAT32_C( -945.06), SIMDE_FLOAT32_C( 913.49), SIMDE_FLOAT32_C( -182.94), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -525.60), SIMDE_FLOAT32_C( -58.61), SIMDE_FLOAT32_C( 528.42), SIMDE_FLOAT32_C( 37.11), SIMDE_FLOAT32_C( 188.14), SIMDE_FLOAT32_C( -762.80), SIMDE_FLOAT32_C( -719.53), SIMDE_FLOAT32_C( 0.00) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -248.74), SIMDE_FLOAT32_C( -836.96), SIMDE_FLOAT32_C( 913.49), SIMDE_FLOAT32_C( -182.94), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 610.93), SIMDE_FLOAT32_C( -767.62), SIMDE_FLOAT32_C( -213.26), SIMDE_FLOAT32_C( 614.24), SIMDE_FLOAT32_C( -525.60), SIMDE_FLOAT32_C( -58.61), SIMDE_FLOAT32_C( 528.42), SIMDE_FLOAT32_C( 0.00) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 850.60), SIMDE_FLOAT32_C( 545.54), SIMDE_FLOAT32_C( -469.11), SIMDE_FLOAT32_C( 426.39), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 599.35), SIMDE_FLOAT32_C( 309.05), SIMDE_FLOAT32_C( 532.42), SIMDE_FLOAT32_C( 670.97), SIMDE_FLOAT32_C( 599.35), SIMDE_FLOAT32_C( 309.05), SIMDE_FLOAT32_C( 532.42), SIMDE_FLOAT32_C( 0.00) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask16 k = test_vec[i].k; simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 r; r = simde_mm512_maskz_shuffle_f32x4(k, a, b, (1 << 6) + (0 << 4) + (3 << 2) + 2); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r1), 1); r = simde_mm512_maskz_shuffle_f32x4(k, a, b, (0 << 6) + (3 << 4) + (2 << 2) + 1); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r2), 1); r = simde_mm512_maskz_shuffle_f32x4(k, a, b, (3 << 6) + (1 << 4) + (2 << 2) + 2); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r3), 1); r = simde_mm512_maskz_shuffle_f32x4(k, a, b, (2 << 6) + (2 << 4) + (1 << 2) + 3); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r4), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 4 ; i++) { simde__mmask16 k = simde_test_x86_random_mmask16(); simde__m512 a = simde_test_x86_random_f32x16(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m512 b = simde_test_x86_random_f32x16(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m512 r; simde_test_x86_write_mmask16(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f32x16(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_maskz_shuffle_f32x4(k, a, b, (1 << 6) + (0 << 4) + (3 << 2) + 2); simde_test_x86_write_f32x16(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_maskz_shuffle_f32x4(k, a, b, (0 << 6) + (3 << 4) + (2 << 2) + 1); simde_test_x86_write_f32x16(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_maskz_shuffle_f32x4(k, a, b, (3 << 6) + (1 << 4) + (2 << 2) + 2); simde_test_x86_write_f32x16(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_maskz_shuffle_f32x4(k, a, b, (2 << 6) + (2 << 4) + (1 << 2) + 3); simde_test_x86_write_f32x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_shuffle_f64x2(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde_float64 a[8]; const simde_float64 b[8]; const simde_float64 r1[8]; const simde_float64 r2[8]; const simde_float64 r3[8]; const simde_float64 r4[8]; } test_vec[4] = { { { SIMDE_FLOAT64_C( -520.93), SIMDE_FLOAT64_C( 576.45), SIMDE_FLOAT64_C( 563.43), SIMDE_FLOAT64_C( 352.92), SIMDE_FLOAT64_C( -311.69), SIMDE_FLOAT64_C( -719.51), SIMDE_FLOAT64_C( -514.13), SIMDE_FLOAT64_C( 476.28) }, { SIMDE_FLOAT64_C( -203.97), SIMDE_FLOAT64_C( -513.95), SIMDE_FLOAT64_C( -501.55), SIMDE_FLOAT64_C( 134.12), SIMDE_FLOAT64_C( -664.49), SIMDE_FLOAT64_C( 81.08), SIMDE_FLOAT64_C( 457.05), SIMDE_FLOAT64_C( -756.64) }, { SIMDE_FLOAT64_C( -311.69), SIMDE_FLOAT64_C( -719.51), SIMDE_FLOAT64_C( -514.13), SIMDE_FLOAT64_C( 476.28), SIMDE_FLOAT64_C( -203.97), SIMDE_FLOAT64_C( -513.95), SIMDE_FLOAT64_C( -501.55), SIMDE_FLOAT64_C( 134.12) }, { SIMDE_FLOAT64_C( 563.43), SIMDE_FLOAT64_C( 352.92), SIMDE_FLOAT64_C( -311.69), SIMDE_FLOAT64_C( -719.51), SIMDE_FLOAT64_C( 457.05), SIMDE_FLOAT64_C( -756.64), SIMDE_FLOAT64_C( -203.97), SIMDE_FLOAT64_C( -513.95) }, { SIMDE_FLOAT64_C( -311.69), SIMDE_FLOAT64_C( -719.51), SIMDE_FLOAT64_C( -311.69), SIMDE_FLOAT64_C( -719.51), SIMDE_FLOAT64_C( -501.55), SIMDE_FLOAT64_C( 134.12), SIMDE_FLOAT64_C( 457.05), SIMDE_FLOAT64_C( -756.64) }, { SIMDE_FLOAT64_C( -514.13), SIMDE_FLOAT64_C( 476.28), SIMDE_FLOAT64_C( 563.43), SIMDE_FLOAT64_C( 352.92), SIMDE_FLOAT64_C( -664.49), SIMDE_FLOAT64_C( 81.08), SIMDE_FLOAT64_C( -664.49), SIMDE_FLOAT64_C( 81.08) } }, { { SIMDE_FLOAT64_C( -556.38), SIMDE_FLOAT64_C( -801.12), SIMDE_FLOAT64_C( -37.21), SIMDE_FLOAT64_C( 708.98), SIMDE_FLOAT64_C( 510.14), SIMDE_FLOAT64_C( -557.21), SIMDE_FLOAT64_C( 880.68), SIMDE_FLOAT64_C( -422.13) }, { SIMDE_FLOAT64_C( -257.69), SIMDE_FLOAT64_C( 779.79), SIMDE_FLOAT64_C( -543.55), SIMDE_FLOAT64_C( -721.39), SIMDE_FLOAT64_C( -242.21), SIMDE_FLOAT64_C( -59.62), SIMDE_FLOAT64_C( -781.42), SIMDE_FLOAT64_C( 236.86) }, { SIMDE_FLOAT64_C( 510.14), SIMDE_FLOAT64_C( -557.21), SIMDE_FLOAT64_C( 880.68), SIMDE_FLOAT64_C( -422.13), SIMDE_FLOAT64_C( -257.69), SIMDE_FLOAT64_C( 779.79), SIMDE_FLOAT64_C( -543.55), SIMDE_FLOAT64_C( -721.39) }, { SIMDE_FLOAT64_C( -37.21), SIMDE_FLOAT64_C( 708.98), SIMDE_FLOAT64_C( 510.14), SIMDE_FLOAT64_C( -557.21), SIMDE_FLOAT64_C( -781.42), SIMDE_FLOAT64_C( 236.86), SIMDE_FLOAT64_C( -257.69), SIMDE_FLOAT64_C( 779.79) }, { SIMDE_FLOAT64_C( 510.14), SIMDE_FLOAT64_C( -557.21), SIMDE_FLOAT64_C( 510.14), SIMDE_FLOAT64_C( -557.21), SIMDE_FLOAT64_C( -543.55), SIMDE_FLOAT64_C( -721.39), SIMDE_FLOAT64_C( -781.42), SIMDE_FLOAT64_C( 236.86) }, { SIMDE_FLOAT64_C( 880.68), SIMDE_FLOAT64_C( -422.13), SIMDE_FLOAT64_C( -37.21), SIMDE_FLOAT64_C( 708.98), SIMDE_FLOAT64_C( -242.21), SIMDE_FLOAT64_C( -59.62), SIMDE_FLOAT64_C( -242.21), SIMDE_FLOAT64_C( -59.62) } }, { { SIMDE_FLOAT64_C( -483.17), SIMDE_FLOAT64_C( 782.01), SIMDE_FLOAT64_C( -410.22), SIMDE_FLOAT64_C( 205.14), SIMDE_FLOAT64_C( -937.50), SIMDE_FLOAT64_C( 75.64), SIMDE_FLOAT64_C( -318.58), SIMDE_FLOAT64_C( -141.47) }, { SIMDE_FLOAT64_C( 561.70), SIMDE_FLOAT64_C( 179.87), SIMDE_FLOAT64_C( 992.65), SIMDE_FLOAT64_C( 897.21), SIMDE_FLOAT64_C( -739.05), SIMDE_FLOAT64_C( 449.70), SIMDE_FLOAT64_C( -859.43), SIMDE_FLOAT64_C( -295.43) }, { SIMDE_FLOAT64_C( -937.50), SIMDE_FLOAT64_C( 75.64), SIMDE_FLOAT64_C( -318.58), SIMDE_FLOAT64_C( -141.47), SIMDE_FLOAT64_C( 561.70), SIMDE_FLOAT64_C( 179.87), SIMDE_FLOAT64_C( 992.65), SIMDE_FLOAT64_C( 897.21) }, { SIMDE_FLOAT64_C( -410.22), SIMDE_FLOAT64_C( 205.14), SIMDE_FLOAT64_C( -937.50), SIMDE_FLOAT64_C( 75.64), SIMDE_FLOAT64_C( -859.43), SIMDE_FLOAT64_C( -295.43), SIMDE_FLOAT64_C( 561.70), SIMDE_FLOAT64_C( 179.87) }, { SIMDE_FLOAT64_C( -937.50), SIMDE_FLOAT64_C( 75.64), SIMDE_FLOAT64_C( -937.50), SIMDE_FLOAT64_C( 75.64), SIMDE_FLOAT64_C( 992.65), SIMDE_FLOAT64_C( 897.21), SIMDE_FLOAT64_C( -859.43), SIMDE_FLOAT64_C( -295.43) }, { SIMDE_FLOAT64_C( -318.58), SIMDE_FLOAT64_C( -141.47), SIMDE_FLOAT64_C( -410.22), SIMDE_FLOAT64_C( 205.14), SIMDE_FLOAT64_C( -739.05), SIMDE_FLOAT64_C( 449.70), SIMDE_FLOAT64_C( -739.05), SIMDE_FLOAT64_C( 449.70) } }, { { SIMDE_FLOAT64_C( 648.58), SIMDE_FLOAT64_C( 103.36), SIMDE_FLOAT64_C( -586.45), SIMDE_FLOAT64_C( 158.71), SIMDE_FLOAT64_C( 546.15), SIMDE_FLOAT64_C( -705.77), SIMDE_FLOAT64_C( 736.58), SIMDE_FLOAT64_C( -711.55) }, { SIMDE_FLOAT64_C( -925.98), SIMDE_FLOAT64_C( -806.97), SIMDE_FLOAT64_C( -432.94), SIMDE_FLOAT64_C( -168.19), SIMDE_FLOAT64_C( 133.41), SIMDE_FLOAT64_C( -214.36), SIMDE_FLOAT64_C( -931.33), SIMDE_FLOAT64_C( 650.24) }, { SIMDE_FLOAT64_C( 546.15), SIMDE_FLOAT64_C( -705.77), SIMDE_FLOAT64_C( 736.58), SIMDE_FLOAT64_C( -711.55), SIMDE_FLOAT64_C( -925.98), SIMDE_FLOAT64_C( -806.97), SIMDE_FLOAT64_C( -432.94), SIMDE_FLOAT64_C( -168.19) }, { SIMDE_FLOAT64_C( -586.45), SIMDE_FLOAT64_C( 158.71), SIMDE_FLOAT64_C( 546.15), SIMDE_FLOAT64_C( -705.77), SIMDE_FLOAT64_C( -931.33), SIMDE_FLOAT64_C( 650.24), SIMDE_FLOAT64_C( -925.98), SIMDE_FLOAT64_C( -806.97) }, { SIMDE_FLOAT64_C( 546.15), SIMDE_FLOAT64_C( -705.77), SIMDE_FLOAT64_C( 546.15), SIMDE_FLOAT64_C( -705.77), SIMDE_FLOAT64_C( -432.94), SIMDE_FLOAT64_C( -168.19), SIMDE_FLOAT64_C( -931.33), SIMDE_FLOAT64_C( 650.24) }, { SIMDE_FLOAT64_C( 736.58), SIMDE_FLOAT64_C( -711.55), SIMDE_FLOAT64_C( -586.45), SIMDE_FLOAT64_C( 158.71), SIMDE_FLOAT64_C( 133.41), SIMDE_FLOAT64_C( -214.36), SIMDE_FLOAT64_C( 133.41), SIMDE_FLOAT64_C( -214.36) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__m512d r; r = simde_mm512_shuffle_f64x2(a, b, (1 << 6) + (0 << 4) + (3 << 2) + 2); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r1), 1); r = simde_mm512_shuffle_f64x2(a, b, (0 << 6) + (3 << 4) + (2 << 2) + 1); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r2), 1); r = simde_mm512_shuffle_f64x2(a, b, (3 << 6) + (1 << 4) + (2 << 2) + 2); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r3), 1); r = simde_mm512_shuffle_f64x2(a, b, (2 << 6) + (2 << 4) + (1 << 2) + 3); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r4), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 4 ; i++) { simde__m512d a = simde_test_x86_random_f64x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m512d b = simde_test_x86_random_f64x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m512d r; simde_test_x86_write_f64x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f64x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_shuffle_f64x2(a, b, (1 << 6) + (0 << 4) + (3 << 2) + 2); simde_test_x86_write_f64x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_shuffle_f64x2(a, b, (0 << 6) + (3 << 4) + (2 << 2) + 1); simde_test_x86_write_f64x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_shuffle_f64x2(a, b, (3 << 6) + (1 << 4) + (2 << 2) + 2); simde_test_x86_write_f64x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_shuffle_f64x2(a, b, (2 << 6) + (2 << 4) + (1 << 2) + 3); simde_test_x86_write_f64x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_mask_shuffle_f64x2(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 b[8]; const simde_float64 r1[8]; const simde_float64 r2[8]; const simde_float64 r3[8]; const simde_float64 r4[8]; } test_vec[4] = { { { SIMDE_FLOAT64_C( -452.32), SIMDE_FLOAT64_C( -238.42), SIMDE_FLOAT64_C( 874.88), SIMDE_FLOAT64_C( 875.56), SIMDE_FLOAT64_C( 19.67), SIMDE_FLOAT64_C( -26.49), SIMDE_FLOAT64_C( -41.91), SIMDE_FLOAT64_C( 532.26) }, UINT8_C(187), { SIMDE_FLOAT64_C( -670.56), SIMDE_FLOAT64_C( 78.82), SIMDE_FLOAT64_C( -438.00), SIMDE_FLOAT64_C( -112.77), SIMDE_FLOAT64_C( -95.70), SIMDE_FLOAT64_C( 217.05), SIMDE_FLOAT64_C( -809.66), SIMDE_FLOAT64_C( -527.36) }, { SIMDE_FLOAT64_C( -21.99), SIMDE_FLOAT64_C( -162.55), SIMDE_FLOAT64_C( 960.43), SIMDE_FLOAT64_C( 988.41), SIMDE_FLOAT64_C( 662.95), SIMDE_FLOAT64_C( 852.97), SIMDE_FLOAT64_C( -758.86), SIMDE_FLOAT64_C( 340.18) }, { SIMDE_FLOAT64_C( -95.70), SIMDE_FLOAT64_C( 217.05), SIMDE_FLOAT64_C( 874.88), SIMDE_FLOAT64_C( -527.36), SIMDE_FLOAT64_C( -21.99), SIMDE_FLOAT64_C( -162.55), SIMDE_FLOAT64_C( -41.91), SIMDE_FLOAT64_C( 988.41) }, { SIMDE_FLOAT64_C( -438.00), SIMDE_FLOAT64_C( -112.77), SIMDE_FLOAT64_C( 874.88), SIMDE_FLOAT64_C( 217.05), SIMDE_FLOAT64_C( -758.86), SIMDE_FLOAT64_C( 340.18), SIMDE_FLOAT64_C( -41.91), SIMDE_FLOAT64_C( -162.55) }, { SIMDE_FLOAT64_C( -95.70), SIMDE_FLOAT64_C( 217.05), SIMDE_FLOAT64_C( 874.88), SIMDE_FLOAT64_C( 217.05), SIMDE_FLOAT64_C( 960.43), SIMDE_FLOAT64_C( 988.41), SIMDE_FLOAT64_C( -41.91), SIMDE_FLOAT64_C( 340.18) }, { SIMDE_FLOAT64_C( -809.66), SIMDE_FLOAT64_C( -527.36), SIMDE_FLOAT64_C( 874.88), SIMDE_FLOAT64_C( -112.77), SIMDE_FLOAT64_C( 662.95), SIMDE_FLOAT64_C( 852.97), SIMDE_FLOAT64_C( -41.91), SIMDE_FLOAT64_C( 852.97) } }, { { SIMDE_FLOAT64_C( 401.34), SIMDE_FLOAT64_C( -871.43), SIMDE_FLOAT64_C( 12.13), SIMDE_FLOAT64_C( 459.55), SIMDE_FLOAT64_C( -800.76), SIMDE_FLOAT64_C( 805.68), SIMDE_FLOAT64_C( -992.78), SIMDE_FLOAT64_C( -39.18) }, UINT8_C(162), { SIMDE_FLOAT64_C( 882.78), SIMDE_FLOAT64_C( 980.49), SIMDE_FLOAT64_C( -345.93), SIMDE_FLOAT64_C( -159.12), SIMDE_FLOAT64_C( 512.75), SIMDE_FLOAT64_C( -484.64), SIMDE_FLOAT64_C( 170.32), SIMDE_FLOAT64_C( -408.43) }, { SIMDE_FLOAT64_C( 77.36), SIMDE_FLOAT64_C( -942.46), SIMDE_FLOAT64_C( 495.87), SIMDE_FLOAT64_C( -705.58), SIMDE_FLOAT64_C( -752.11), SIMDE_FLOAT64_C( 968.51), SIMDE_FLOAT64_C( 272.43), SIMDE_FLOAT64_C( 85.33) }, { SIMDE_FLOAT64_C( 401.34), SIMDE_FLOAT64_C( -484.64), SIMDE_FLOAT64_C( 12.13), SIMDE_FLOAT64_C( 459.55), SIMDE_FLOAT64_C( -800.76), SIMDE_FLOAT64_C( -942.46), SIMDE_FLOAT64_C( -992.78), SIMDE_FLOAT64_C( -705.58) }, { SIMDE_FLOAT64_C( 401.34), SIMDE_FLOAT64_C( -159.12), SIMDE_FLOAT64_C( 12.13), SIMDE_FLOAT64_C( 459.55), SIMDE_FLOAT64_C( -800.76), SIMDE_FLOAT64_C( 85.33), SIMDE_FLOAT64_C( -992.78), SIMDE_FLOAT64_C( -942.46) }, { SIMDE_FLOAT64_C( 401.34), SIMDE_FLOAT64_C( -484.64), SIMDE_FLOAT64_C( 12.13), SIMDE_FLOAT64_C( 459.55), SIMDE_FLOAT64_C( -800.76), SIMDE_FLOAT64_C( -705.58), SIMDE_FLOAT64_C( -992.78), SIMDE_FLOAT64_C( 85.33) }, { SIMDE_FLOAT64_C( 401.34), SIMDE_FLOAT64_C( -408.43), SIMDE_FLOAT64_C( 12.13), SIMDE_FLOAT64_C( 459.55), SIMDE_FLOAT64_C( -800.76), SIMDE_FLOAT64_C( 968.51), SIMDE_FLOAT64_C( -992.78), SIMDE_FLOAT64_C( 968.51) } }, { { SIMDE_FLOAT64_C( 928.94), SIMDE_FLOAT64_C( 260.83), SIMDE_FLOAT64_C( -251.71), SIMDE_FLOAT64_C( 781.91), SIMDE_FLOAT64_C( 501.97), SIMDE_FLOAT64_C( -911.54), SIMDE_FLOAT64_C( 183.25), SIMDE_FLOAT64_C( 630.54) }, UINT8_C( 36), { SIMDE_FLOAT64_C( -357.21), SIMDE_FLOAT64_C( 829.78), SIMDE_FLOAT64_C( -93.73), SIMDE_FLOAT64_C( -349.98), SIMDE_FLOAT64_C( -209.40), SIMDE_FLOAT64_C( -413.17), SIMDE_FLOAT64_C( -467.20), SIMDE_FLOAT64_C( -228.91) }, { SIMDE_FLOAT64_C( 240.90), SIMDE_FLOAT64_C( 373.68), SIMDE_FLOAT64_C( -716.16), SIMDE_FLOAT64_C( 756.26), SIMDE_FLOAT64_C( -456.01), SIMDE_FLOAT64_C( -124.59), SIMDE_FLOAT64_C( -166.37), SIMDE_FLOAT64_C( -398.47) }, { SIMDE_FLOAT64_C( 928.94), SIMDE_FLOAT64_C( 260.83), SIMDE_FLOAT64_C( -467.20), SIMDE_FLOAT64_C( 781.91), SIMDE_FLOAT64_C( 501.97), SIMDE_FLOAT64_C( 373.68), SIMDE_FLOAT64_C( 183.25), SIMDE_FLOAT64_C( 630.54) }, { SIMDE_FLOAT64_C( 928.94), SIMDE_FLOAT64_C( 260.83), SIMDE_FLOAT64_C( -209.40), SIMDE_FLOAT64_C( 781.91), SIMDE_FLOAT64_C( 501.97), SIMDE_FLOAT64_C( -398.47), SIMDE_FLOAT64_C( 183.25), SIMDE_FLOAT64_C( 630.54) }, { SIMDE_FLOAT64_C( 928.94), SIMDE_FLOAT64_C( 260.83), SIMDE_FLOAT64_C( -209.40), SIMDE_FLOAT64_C( 781.91), SIMDE_FLOAT64_C( 501.97), SIMDE_FLOAT64_C( 756.26), SIMDE_FLOAT64_C( 183.25), SIMDE_FLOAT64_C( 630.54) }, { SIMDE_FLOAT64_C( 928.94), SIMDE_FLOAT64_C( 260.83), SIMDE_FLOAT64_C( -93.73), SIMDE_FLOAT64_C( 781.91), SIMDE_FLOAT64_C( 501.97), SIMDE_FLOAT64_C( -124.59), SIMDE_FLOAT64_C( 183.25), SIMDE_FLOAT64_C( 630.54) } }, { { SIMDE_FLOAT64_C( -628.72), SIMDE_FLOAT64_C( 128.04), SIMDE_FLOAT64_C( -150.58), SIMDE_FLOAT64_C( -660.21), SIMDE_FLOAT64_C( -599.53), SIMDE_FLOAT64_C( 934.76), SIMDE_FLOAT64_C( -731.27), SIMDE_FLOAT64_C( 661.30) }, UINT8_C(192), { SIMDE_FLOAT64_C( -949.36), SIMDE_FLOAT64_C( 163.28), SIMDE_FLOAT64_C( -228.49), SIMDE_FLOAT64_C( 233.89), SIMDE_FLOAT64_C( -206.18), SIMDE_FLOAT64_C( 872.11), SIMDE_FLOAT64_C( 876.68), SIMDE_FLOAT64_C( -376.40) }, { SIMDE_FLOAT64_C( -221.62), SIMDE_FLOAT64_C( -473.31), SIMDE_FLOAT64_C( 414.20), SIMDE_FLOAT64_C( 365.21), SIMDE_FLOAT64_C( 59.49), SIMDE_FLOAT64_C( -814.71), SIMDE_FLOAT64_C( -393.89), SIMDE_FLOAT64_C( -566.83) }, { SIMDE_FLOAT64_C( -628.72), SIMDE_FLOAT64_C( 128.04), SIMDE_FLOAT64_C( -150.58), SIMDE_FLOAT64_C( -660.21), SIMDE_FLOAT64_C( -599.53), SIMDE_FLOAT64_C( 934.76), SIMDE_FLOAT64_C( 414.20), SIMDE_FLOAT64_C( 365.21) }, { SIMDE_FLOAT64_C( -628.72), SIMDE_FLOAT64_C( 128.04), SIMDE_FLOAT64_C( -150.58), SIMDE_FLOAT64_C( -660.21), SIMDE_FLOAT64_C( -599.53), SIMDE_FLOAT64_C( 934.76), SIMDE_FLOAT64_C( -221.62), SIMDE_FLOAT64_C( -473.31) }, { SIMDE_FLOAT64_C( -628.72), SIMDE_FLOAT64_C( 128.04), SIMDE_FLOAT64_C( -150.58), SIMDE_FLOAT64_C( -660.21), SIMDE_FLOAT64_C( -599.53), SIMDE_FLOAT64_C( 934.76), SIMDE_FLOAT64_C( -393.89), SIMDE_FLOAT64_C( -566.83) }, { SIMDE_FLOAT64_C( -628.72), SIMDE_FLOAT64_C( 128.04), SIMDE_FLOAT64_C( -150.58), SIMDE_FLOAT64_C( -660.21), SIMDE_FLOAT64_C( -599.53), SIMDE_FLOAT64_C( 934.76), SIMDE_FLOAT64_C( 59.49), SIMDE_FLOAT64_C( -814.71) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__mmask8 k = test_vec[i].k; simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__m512d r; r = simde_mm512_mask_shuffle_f64x2(src, k, a, b, (1 << 6) + (0 << 4) + (3 << 2) + 2); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r1), 1); r = simde_mm512_mask_shuffle_f64x2(src, k, a, b, (0 << 6) + (3 << 4) + (2 << 2) + 1); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r2), 1); r = simde_mm512_mask_shuffle_f64x2(src, k, a, b, (3 << 6) + (1 << 4) + (2 << 2) + 2); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r3), 1); r = simde_mm512_mask_shuffle_f64x2(src, k, a, b, (2 << 6) + (2 << 4) + (1 << 2) + 3); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r4), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 4 ; i++) { simde__m512d src = simde_test_x86_random_f64x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m512d a = simde_test_x86_random_f64x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m512d b = simde_test_x86_random_f64x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m512d r; simde_test_x86_write_f64x8(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k,SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_mask_shuffle_f64x2(src, k, a, b, (1 << 6) + (0 << 4) + (3 << 2) + 2); simde_test_x86_write_f64x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_mask_shuffle_f64x2(src, k, a, b, (0 << 6) + (3 << 4) + (2 << 2) + 1); simde_test_x86_write_f64x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_mask_shuffle_f64x2(src, k, a, b, (3 << 6) + (1 << 4) + (2 << 2) + 2); simde_test_x86_write_f64x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_mask_shuffle_f64x2(src, k, a, b, (2 << 6) + (2 << 4) + (1 << 2) + 3); simde_test_x86_write_f64x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_maskz_shuffle_f64x2(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 b[8]; const simde_float64 r1[8]; const simde_float64 r2[8]; const simde_float64 r3[8]; const simde_float64 r4[8]; } test_vec[4] = { { UINT8_C( 76), { SIMDE_FLOAT64_C( -379.23), SIMDE_FLOAT64_C( 115.72), SIMDE_FLOAT64_C( 73.21), SIMDE_FLOAT64_C( 368.89), SIMDE_FLOAT64_C( -957.25), SIMDE_FLOAT64_C( 678.78), SIMDE_FLOAT64_C( -404.68), SIMDE_FLOAT64_C( -949.07) }, { SIMDE_FLOAT64_C( 93.24), SIMDE_FLOAT64_C( 662.55), SIMDE_FLOAT64_C( 397.06), SIMDE_FLOAT64_C( -397.64), SIMDE_FLOAT64_C( 60.46), SIMDE_FLOAT64_C( 418.60), SIMDE_FLOAT64_C( -942.59), SIMDE_FLOAT64_C( -459.80) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -404.68), SIMDE_FLOAT64_C( -949.07), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 397.06), SIMDE_FLOAT64_C( 0.00) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -957.25), SIMDE_FLOAT64_C( 678.78), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 93.24), SIMDE_FLOAT64_C( 0.00) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -957.25), SIMDE_FLOAT64_C( 678.78), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -942.59), SIMDE_FLOAT64_C( 0.00) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 73.21), SIMDE_FLOAT64_C( 368.89), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 60.46), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C(214), { SIMDE_FLOAT64_C( -399.74), SIMDE_FLOAT64_C( 463.35), SIMDE_FLOAT64_C( -417.58), SIMDE_FLOAT64_C( 305.42), SIMDE_FLOAT64_C( 556.46), SIMDE_FLOAT64_C( 407.97), SIMDE_FLOAT64_C( 544.91), SIMDE_FLOAT64_C( 643.69) }, { SIMDE_FLOAT64_C( 775.59), SIMDE_FLOAT64_C( -852.73), SIMDE_FLOAT64_C( 451.69), SIMDE_FLOAT64_C( -86.20), SIMDE_FLOAT64_C( 719.16), SIMDE_FLOAT64_C( -340.00), SIMDE_FLOAT64_C( 534.57), SIMDE_FLOAT64_C( -165.13) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 407.97), SIMDE_FLOAT64_C( 544.91), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 775.59), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 451.69), SIMDE_FLOAT64_C( -86.20) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 305.42), SIMDE_FLOAT64_C( 556.46), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 534.57), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 775.59), SIMDE_FLOAT64_C( -852.73) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 407.97), SIMDE_FLOAT64_C( 556.46), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 451.69), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 534.57), SIMDE_FLOAT64_C( -165.13) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 643.69), SIMDE_FLOAT64_C( -417.58), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 719.16), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 719.16), SIMDE_FLOAT64_C( -340.00) } }, { UINT8_C(161), { SIMDE_FLOAT64_C( -96.54), SIMDE_FLOAT64_C( -122.37), SIMDE_FLOAT64_C( 411.99), SIMDE_FLOAT64_C( 498.78), SIMDE_FLOAT64_C( -71.44), SIMDE_FLOAT64_C( -494.78), SIMDE_FLOAT64_C( 161.33), SIMDE_FLOAT64_C( -674.38) }, { SIMDE_FLOAT64_C( 107.58), SIMDE_FLOAT64_C( -778.21), SIMDE_FLOAT64_C( 744.22), SIMDE_FLOAT64_C( 164.99), SIMDE_FLOAT64_C( -238.01), SIMDE_FLOAT64_C( -673.11), SIMDE_FLOAT64_C( 765.25), SIMDE_FLOAT64_C( -774.66) }, { SIMDE_FLOAT64_C( -71.44), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -778.21), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 164.99) }, { SIMDE_FLOAT64_C( 411.99), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -774.66), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -778.21) }, { SIMDE_FLOAT64_C( -71.44), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 164.99), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -774.66) }, { SIMDE_FLOAT64_C( 161.33), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -673.11), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -673.11) } }, { UINT8_C( 92), { SIMDE_FLOAT64_C( 70.66), SIMDE_FLOAT64_C( 781.80), SIMDE_FLOAT64_C( -682.72), SIMDE_FLOAT64_C( -384.43), SIMDE_FLOAT64_C( 425.49), SIMDE_FLOAT64_C( -907.13), SIMDE_FLOAT64_C( -237.16), SIMDE_FLOAT64_C( -122.82) }, { SIMDE_FLOAT64_C( 6.67), SIMDE_FLOAT64_C( -518.01), SIMDE_FLOAT64_C( 537.18), SIMDE_FLOAT64_C( -458.76), SIMDE_FLOAT64_C( 316.87), SIMDE_FLOAT64_C( 270.38), SIMDE_FLOAT64_C( 444.70), SIMDE_FLOAT64_C( -805.51) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -237.16), SIMDE_FLOAT64_C( -122.82), SIMDE_FLOAT64_C( 6.67), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 537.18), SIMDE_FLOAT64_C( 0.00) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 425.49), SIMDE_FLOAT64_C( -907.13), SIMDE_FLOAT64_C( 444.70), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 6.67), SIMDE_FLOAT64_C( 0.00) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 425.49), SIMDE_FLOAT64_C( -907.13), SIMDE_FLOAT64_C( 537.18), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 444.70), SIMDE_FLOAT64_C( 0.00) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -682.72), SIMDE_FLOAT64_C( -384.43), SIMDE_FLOAT64_C( 316.87), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 316.87), SIMDE_FLOAT64_C( 0.00) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask8 k = test_vec[i].k; simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__m512d r; r = simde_mm512_maskz_shuffle_f64x2(k, a, b, (1 << 6) + (0 << 4) + (3 << 2) + 2); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r1), 1); r = simde_mm512_maskz_shuffle_f64x2(k, a, b, (0 << 6) + (3 << 4) + (2 << 2) + 1); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r2), 1); r = simde_mm512_maskz_shuffle_f64x2(k, a, b, (3 << 6) + (1 << 4) + (2 << 2) + 2); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r3), 1); r = simde_mm512_maskz_shuffle_f64x2(k, a, b, (2 << 6) + (2 << 4) + (1 << 2) + 3); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r4), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 4 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m512d a = simde_test_x86_random_f64x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m512d b = simde_test_x86_random_f64x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m512d r; simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f64x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_maskz_shuffle_f64x2(k, a, b, (1 << 6) + (0 << 4) + (3 << 2) + 2); simde_test_x86_write_f64x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_maskz_shuffle_f64x2(k, a, b, (0 << 6) + (3 << 4) + (2 << 2) + 1); simde_test_x86_write_f64x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_maskz_shuffle_f64x2(k, a, b, (3 << 6) + (1 << 4) + (2 << 2) + 2); simde_test_x86_write_f64x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_maskz_shuffle_f64x2(k, a, b, (2 << 6) + (2 << 4) + (1 << 2) + 3); simde_test_x86_write_f64x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_shuffle_i32x4(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const int32_t a[16]; const int32_t b[16]; const int32_t r1[16]; const int32_t r2[16]; const int32_t r3[16]; const int32_t r4[16]; } test_vec[4] = { { { INT32_C( 1991146578), INT32_C( 1899045466), -INT32_C( 1068581367), -INT32_C( 826279770), INT32_C( 117359005), INT32_C( 1832948451), INT32_C( 516563938), -INT32_C( 2119307474), -INT32_C( 973644949), -INT32_C( 2026428290), -INT32_C( 1924626969), INT32_C( 458950525), -INT32_C( 1407100472), -INT32_C( 853974293), -INT32_C( 1259543675), INT32_C( 1043700178) }, { INT32_C( 1929587957), INT32_C( 1023031893), INT32_C( 1036600000), INT32_C( 307766346), INT32_C( 1757313661), INT32_C( 1630984156), -INT32_C( 1927994694), -INT32_C( 1328854341), -INT32_C( 870068617), -INT32_C( 922214903), -INT32_C( 1442393760), INT32_C( 1925013493), -INT32_C( 1243972647), INT32_C( 219549779), -INT32_C( 291886030), -INT32_C( 308386442) }, { -INT32_C( 973644949), -INT32_C( 2026428290), -INT32_C( 1924626969), INT32_C( 458950525), -INT32_C( 1407100472), -INT32_C( 853974293), -INT32_C( 1259543675), INT32_C( 1043700178), INT32_C( 1929587957), INT32_C( 1023031893), INT32_C( 1036600000), INT32_C( 307766346), INT32_C( 1757313661), INT32_C( 1630984156), -INT32_C( 1927994694), -INT32_C( 1328854341) }, { INT32_C( 117359005), INT32_C( 1832948451), INT32_C( 516563938), -INT32_C( 2119307474), -INT32_C( 973644949), -INT32_C( 2026428290), -INT32_C( 1924626969), INT32_C( 458950525), -INT32_C( 1243972647), INT32_C( 219549779), -INT32_C( 291886030), -INT32_C( 308386442), INT32_C( 1929587957), INT32_C( 1023031893), INT32_C( 1036600000), INT32_C( 307766346) }, { -INT32_C( 973644949), -INT32_C( 2026428290), -INT32_C( 1924626969), INT32_C( 458950525), -INT32_C( 973644949), -INT32_C( 2026428290), -INT32_C( 1924626969), INT32_C( 458950525), INT32_C( 1757313661), INT32_C( 1630984156), -INT32_C( 1927994694), -INT32_C( 1328854341), -INT32_C( 1243972647), INT32_C( 219549779), -INT32_C( 291886030), -INT32_C( 308386442) }, { -INT32_C( 1407100472), -INT32_C( 853974293), -INT32_C( 1259543675), INT32_C( 1043700178), INT32_C( 117359005), INT32_C( 1832948451), INT32_C( 516563938), -INT32_C( 2119307474), -INT32_C( 870068617), -INT32_C( 922214903), -INT32_C( 1442393760), INT32_C( 1925013493), -INT32_C( 870068617), -INT32_C( 922214903), -INT32_C( 1442393760), INT32_C( 1925013493) } }, { { INT32_C( 1035584052), INT32_C( 1074184672), -INT32_C( 2014638958), INT32_C( 1157212267), INT32_C( 1996084259), INT32_C( 394531044), -INT32_C( 1308287428), -INT32_C( 1197431932), INT32_C( 1173707109), -INT32_C( 1383662821), INT32_C( 1932882183), INT32_C( 1018637849), -INT32_C( 407654142), -INT32_C( 33671231), -INT32_C( 642841771), INT32_C( 210849702) }, { -INT32_C( 1001290071), -INT32_C( 1989027967), INT32_C( 1643947592), -INT32_C( 677465131), INT32_C( 633229668), -INT32_C( 584926072), INT32_C( 1723323071), -INT32_C( 898480095), INT32_C( 1351533519), -INT32_C( 472317797), INT32_C( 2068174246), -INT32_C( 313269367), -INT32_C( 1139601100), -INT32_C( 1919339058), INT32_C( 703811591), INT32_C( 1760781721) }, { INT32_C( 1173707109), -INT32_C( 1383662821), INT32_C( 1932882183), INT32_C( 1018637849), -INT32_C( 407654142), -INT32_C( 33671231), -INT32_C( 642841771), INT32_C( 210849702), -INT32_C( 1001290071), -INT32_C( 1989027967), INT32_C( 1643947592), -INT32_C( 677465131), INT32_C( 633229668), -INT32_C( 584926072), INT32_C( 1723323071), -INT32_C( 898480095) }, { INT32_C( 1996084259), INT32_C( 394531044), -INT32_C( 1308287428), -INT32_C( 1197431932), INT32_C( 1173707109), -INT32_C( 1383662821), INT32_C( 1932882183), INT32_C( 1018637849), -INT32_C( 1139601100), -INT32_C( 1919339058), INT32_C( 703811591), INT32_C( 1760781721), -INT32_C( 1001290071), -INT32_C( 1989027967), INT32_C( 1643947592), -INT32_C( 677465131) }, { INT32_C( 1173707109), -INT32_C( 1383662821), INT32_C( 1932882183), INT32_C( 1018637849), INT32_C( 1173707109), -INT32_C( 1383662821), INT32_C( 1932882183), INT32_C( 1018637849), INT32_C( 633229668), -INT32_C( 584926072), INT32_C( 1723323071), -INT32_C( 898480095), -INT32_C( 1139601100), -INT32_C( 1919339058), INT32_C( 703811591), INT32_C( 1760781721) }, { -INT32_C( 407654142), -INT32_C( 33671231), -INT32_C( 642841771), INT32_C( 210849702), INT32_C( 1996084259), INT32_C( 394531044), -INT32_C( 1308287428), -INT32_C( 1197431932), INT32_C( 1351533519), -INT32_C( 472317797), INT32_C( 2068174246), -INT32_C( 313269367), INT32_C( 1351533519), -INT32_C( 472317797), INT32_C( 2068174246), -INT32_C( 313269367) } }, { { -INT32_C( 994541015), INT32_C( 682070658), -INT32_C( 257627033), INT32_C( 64944079), -INT32_C( 692063992), INT32_C( 778328358), INT32_C( 1113020329), -INT32_C( 425047363), INT32_C( 1319789516), INT32_C( 1551323637), INT32_C( 223157054), INT32_C( 437332754), INT32_C( 1123143708), -INT32_C( 747612887), INT32_C( 1763035052), -INT32_C( 565198830) }, { INT32_C( 405666083), -INT32_C( 1988844469), -INT32_C( 778649153), INT32_C( 149661676), -INT32_C( 1588863880), -INT32_C( 562775247), -INT32_C( 1790473597), INT32_C( 1836291914), -INT32_C( 611999600), INT32_C( 56949060), -INT32_C( 1479214149), INT32_C( 447791266), -INT32_C( 826475620), INT32_C( 967585974), INT32_C( 80671674), INT32_C( 460407434) }, { INT32_C( 1319789516), INT32_C( 1551323637), INT32_C( 223157054), INT32_C( 437332754), INT32_C( 1123143708), -INT32_C( 747612887), INT32_C( 1763035052), -INT32_C( 565198830), INT32_C( 405666083), -INT32_C( 1988844469), -INT32_C( 778649153), INT32_C( 149661676), -INT32_C( 1588863880), -INT32_C( 562775247), -INT32_C( 1790473597), INT32_C( 1836291914) }, { -INT32_C( 692063992), INT32_C( 778328358), INT32_C( 1113020329), -INT32_C( 425047363), INT32_C( 1319789516), INT32_C( 1551323637), INT32_C( 223157054), INT32_C( 437332754), -INT32_C( 826475620), INT32_C( 967585974), INT32_C( 80671674), INT32_C( 460407434), INT32_C( 405666083), -INT32_C( 1988844469), -INT32_C( 778649153), INT32_C( 149661676) }, { INT32_C( 1319789516), INT32_C( 1551323637), INT32_C( 223157054), INT32_C( 437332754), INT32_C( 1319789516), INT32_C( 1551323637), INT32_C( 223157054), INT32_C( 437332754), -INT32_C( 1588863880), -INT32_C( 562775247), -INT32_C( 1790473597), INT32_C( 1836291914), -INT32_C( 826475620), INT32_C( 967585974), INT32_C( 80671674), INT32_C( 460407434) }, { INT32_C( 1123143708), -INT32_C( 747612887), INT32_C( 1763035052), -INT32_C( 565198830), -INT32_C( 692063992), INT32_C( 778328358), INT32_C( 1113020329), -INT32_C( 425047363), -INT32_C( 611999600), INT32_C( 56949060), -INT32_C( 1479214149), INT32_C( 447791266), -INT32_C( 611999600), INT32_C( 56949060), -INT32_C( 1479214149), INT32_C( 447791266) } }, { { INT32_C( 670496482), -INT32_C( 1423287312), -INT32_C( 128778410), INT32_C( 1527972543), -INT32_C( 1272328195), -INT32_C( 1158818305), INT32_C( 1405009097), -INT32_C( 529649410), INT32_C( 369583398), INT32_C( 381760192), -INT32_C( 267512783), INT32_C( 340467990), -INT32_C( 255298064), INT32_C( 329954634), INT32_C( 1869047921), -INT32_C( 1085221480) }, { -INT32_C( 86681798), -INT32_C( 1173317751), -INT32_C( 1045815381), INT32_C( 836105792), -INT32_C( 1256088213), -INT32_C( 993408173), -INT32_C( 868995021), INT32_C( 1082885125), INT32_C( 1698324699), -INT32_C( 1575007241), -INT32_C( 1436235158), INT32_C( 735787456), INT32_C( 719387863), -INT32_C( 68245049), -INT32_C( 540597543), -INT32_C( 2111876442) }, { INT32_C( 369583398), INT32_C( 381760192), -INT32_C( 267512783), INT32_C( 340467990), -INT32_C( 255298064), INT32_C( 329954634), INT32_C( 1869047921), -INT32_C( 1085221480), -INT32_C( 86681798), -INT32_C( 1173317751), -INT32_C( 1045815381), INT32_C( 836105792), -INT32_C( 1256088213), -INT32_C( 993408173), -INT32_C( 868995021), INT32_C( 1082885125) }, { -INT32_C( 1272328195), -INT32_C( 1158818305), INT32_C( 1405009097), -INT32_C( 529649410), INT32_C( 369583398), INT32_C( 381760192), -INT32_C( 267512783), INT32_C( 340467990), INT32_C( 719387863), -INT32_C( 68245049), -INT32_C( 540597543), -INT32_C( 2111876442), -INT32_C( 86681798), -INT32_C( 1173317751), -INT32_C( 1045815381), INT32_C( 836105792) }, { INT32_C( 369583398), INT32_C( 381760192), -INT32_C( 267512783), INT32_C( 340467990), INT32_C( 369583398), INT32_C( 381760192), -INT32_C( 267512783), INT32_C( 340467990), -INT32_C( 1256088213), -INT32_C( 993408173), -INT32_C( 868995021), INT32_C( 1082885125), INT32_C( 719387863), -INT32_C( 68245049), -INT32_C( 540597543), -INT32_C( 2111876442) }, { -INT32_C( 255298064), INT32_C( 329954634), INT32_C( 1869047921), -INT32_C( 1085221480), -INT32_C( 1272328195), -INT32_C( 1158818305), INT32_C( 1405009097), -INT32_C( 529649410), INT32_C( 1698324699), -INT32_C( 1575007241), -INT32_C( 1436235158), INT32_C( 735787456), INT32_C( 1698324699), -INT32_C( 1575007241), -INT32_C( 1436235158), INT32_C( 735787456) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r; r = simde_mm512_shuffle_i32x4(a, b, (1 << 6) + (0 << 4) + (3 << 2) + 2); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r1)); r = simde_mm512_shuffle_i32x4(a, b, (0 << 6) + (3 << 4) + (2 << 2) + 1); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r2)); r = simde_mm512_shuffle_i32x4(a, b, (3 << 6) + (1 << 4) + (2 << 2) + 2); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r3)); r = simde_mm512_shuffle_i32x4(a, b, (2 << 6) + (2 << 4) + (1 << 2) + 3); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r4)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 4 ; i++) { simde__m512i a = simde_test_x86_random_i32x16(); simde__m512i b = simde_test_x86_random_i32x16(); simde__m512i r; simde_test_x86_write_i32x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_shuffle_i32x4(a, b, (1 << 6) + (0 << 4) + (3 << 2) + 2); simde_test_x86_write_i32x16(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_shuffle_i32x4(a, b, (0 << 6) + (3 << 4) + (2 << 2) + 1); simde_test_x86_write_i32x16(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_shuffle_i32x4(a, b, (3 << 6) + (1 << 4) + (2 << 2) + 2); simde_test_x86_write_i32x16(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_shuffle_i32x4(a, b, (2 << 6) + (2 << 4) + (1 << 2) + 3); simde_test_x86_write_i32x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_mask_shuffle_i32x4(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const int32_t src[16]; const simde__mmask16 k; const int32_t a[16]; const int32_t b[16]; const int32_t r1[16]; const int32_t r2[16]; const int32_t r3[16]; const int32_t r4[16]; } test_vec[4] = { { { -INT32_C( 1024729526), -INT32_C( 210388682), -INT32_C( 73862869), -INT32_C( 260015396), -INT32_C( 1919425577), -INT32_C( 420651029), -INT32_C( 1844649463), -INT32_C( 1214752147), INT32_C( 1853457208), INT32_C( 1751314236), -INT32_C( 1117521184), INT32_C( 1286464373), -INT32_C( 1311160890), -INT32_C( 1382561884), INT32_C( 373269673), INT32_C( 919459838) }, UINT16_C(18266), { INT32_C( 104241060), INT32_C( 1644238591), -INT32_C( 2126154029), -INT32_C( 1681519422), INT32_C( 1432513213), INT32_C( 1459161879), -INT32_C( 282134751), -INT32_C( 784955347), INT32_C( 517499935), INT32_C( 1451284866), INT32_C( 299353679), -INT32_C( 1884512814), INT32_C( 518262535), INT32_C( 1014291738), INT32_C( 19636948), INT32_C( 1255366955) }, { INT32_C( 1332325325), -INT32_C( 744101500), -INT32_C( 2115732305), INT32_C( 538021913), -INT32_C( 1170279009), -INT32_C( 1477004333), -INT32_C( 2136399531), INT32_C( 1338735746), -INT32_C( 1415695321), -INT32_C( 847363043), -INT32_C( 632397120), -INT32_C( 1829085198), INT32_C( 676084053), INT32_C( 1104102124), -INT32_C( 440240029), INT32_C( 456429044) }, { -INT32_C( 1024729526), INT32_C( 1451284866), -INT32_C( 73862869), -INT32_C( 1884512814), INT32_C( 518262535), -INT32_C( 420651029), INT32_C( 19636948), -INT32_C( 1214752147), INT32_C( 1332325325), -INT32_C( 744101500), -INT32_C( 2115732305), INT32_C( 1286464373), -INT32_C( 1311160890), -INT32_C( 1382561884), -INT32_C( 2136399531), INT32_C( 919459838) }, { -INT32_C( 1024729526), INT32_C( 1459161879), -INT32_C( 73862869), -INT32_C( 784955347), INT32_C( 517499935), -INT32_C( 420651029), INT32_C( 299353679), -INT32_C( 1214752147), INT32_C( 676084053), INT32_C( 1104102124), -INT32_C( 440240029), INT32_C( 1286464373), -INT32_C( 1311160890), -INT32_C( 1382561884), -INT32_C( 2115732305), INT32_C( 919459838) }, { -INT32_C( 1024729526), INT32_C( 1451284866), -INT32_C( 73862869), -INT32_C( 1884512814), INT32_C( 517499935), -INT32_C( 420651029), INT32_C( 299353679), -INT32_C( 1214752147), -INT32_C( 1170279009), -INT32_C( 1477004333), -INT32_C( 2136399531), INT32_C( 1286464373), -INT32_C( 1311160890), -INT32_C( 1382561884), -INT32_C( 440240029), INT32_C( 919459838) }, { -INT32_C( 1024729526), INT32_C( 1014291738), -INT32_C( 73862869), INT32_C( 1255366955), INT32_C( 1432513213), -INT32_C( 420651029), -INT32_C( 282134751), -INT32_C( 1214752147), -INT32_C( 1415695321), -INT32_C( 847363043), -INT32_C( 632397120), INT32_C( 1286464373), -INT32_C( 1311160890), -INT32_C( 1382561884), -INT32_C( 632397120), INT32_C( 919459838) } }, { { -INT32_C( 557395263), -INT32_C( 676641770), -INT32_C( 1716389210), -INT32_C( 1372869799), -INT32_C( 791250972), INT32_C( 470984377), INT32_C( 318886942), INT32_C( 573453665), INT32_C( 503379207), -INT32_C( 520770503), -INT32_C( 8804698), INT32_C( 917414993), -INT32_C( 737770469), INT32_C( 1240471594), INT32_C( 1297936876), INT32_C( 762284582) }, UINT16_C(28799), { INT32_C( 1075624267), INT32_C( 317112985), INT32_C( 1874213057), INT32_C( 1962201454), -INT32_C( 1785913691), -INT32_C( 1014597273), INT32_C( 911125703), INT32_C( 631688665), INT32_C( 526762630), INT32_C( 1177635972), -INT32_C( 222959740), INT32_C( 1567009208), INT32_C( 804451272), INT32_C( 888371309), -INT32_C( 26590940), -INT32_C( 1792863986) }, { INT32_C( 1488226515), INT32_C( 1486808788), -INT32_C( 2041949235), -INT32_C( 974933507), INT32_C( 301258404), INT32_C( 1933961038), INT32_C( 930197544), -INT32_C( 1798531903), -INT32_C( 236158948), INT32_C( 877234790), -INT32_C( 608529186), -INT32_C( 375349947), -INT32_C( 1023699853), -INT32_C( 1540013956), -INT32_C( 1311004944), INT32_C( 1447470906) }, { INT32_C( 526762630), INT32_C( 1177635972), -INT32_C( 222959740), INT32_C( 1567009208), INT32_C( 804451272), INT32_C( 888371309), -INT32_C( 26590940), INT32_C( 573453665), INT32_C( 503379207), -INT32_C( 520770503), -INT32_C( 8804698), INT32_C( 917414993), INT32_C( 301258404), INT32_C( 1933961038), INT32_C( 930197544), INT32_C( 762284582) }, { -INT32_C( 1785913691), -INT32_C( 1014597273), INT32_C( 911125703), INT32_C( 631688665), INT32_C( 526762630), INT32_C( 1177635972), -INT32_C( 222959740), INT32_C( 573453665), INT32_C( 503379207), -INT32_C( 520770503), -INT32_C( 8804698), INT32_C( 917414993), INT32_C( 1488226515), INT32_C( 1486808788), -INT32_C( 2041949235), INT32_C( 762284582) }, { INT32_C( 526762630), INT32_C( 1177635972), -INT32_C( 222959740), INT32_C( 1567009208), INT32_C( 526762630), INT32_C( 1177635972), -INT32_C( 222959740), INT32_C( 573453665), INT32_C( 503379207), -INT32_C( 520770503), -INT32_C( 8804698), INT32_C( 917414993), -INT32_C( 1023699853), -INT32_C( 1540013956), -INT32_C( 1311004944), INT32_C( 762284582) }, { INT32_C( 804451272), INT32_C( 888371309), -INT32_C( 26590940), -INT32_C( 1792863986), -INT32_C( 1785913691), -INT32_C( 1014597273), INT32_C( 911125703), INT32_C( 573453665), INT32_C( 503379207), -INT32_C( 520770503), -INT32_C( 8804698), INT32_C( 917414993), -INT32_C( 236158948), INT32_C( 877234790), -INT32_C( 608529186), INT32_C( 762284582) } }, { { -INT32_C( 1907936728), -INT32_C( 1681747523), INT32_C( 1786149925), -INT32_C( 1923934694), INT32_C( 642731690), INT32_C( 2144044175), INT32_C( 1680975402), INT32_C( 1991997262), INT32_C( 1711538857), -INT32_C( 1207842925), INT32_C( 1562539843), INT32_C( 954955405), INT32_C( 1398684356), -INT32_C( 372037185), INT32_C( 508429520), INT32_C( 613681531) }, UINT16_C(38923), { -INT32_C( 1939890293), INT32_C( 2047124311), -INT32_C( 336555776), INT32_C( 673625289), -INT32_C( 615389944), INT32_C( 484385230), -INT32_C( 752526785), INT32_C( 158085502), INT32_C( 664194000), INT32_C( 1872861806), -INT32_C( 178613973), INT32_C( 1310556230), INT32_C( 875130469), -INT32_C( 816838512), -INT32_C( 526158238), INT32_C( 2011828135) }, { INT32_C( 1235124442), INT32_C( 1169702938), INT32_C( 389681873), -INT32_C( 127576173), INT32_C( 1445760710), -INT32_C( 131760746), -INT32_C( 1697003277), -INT32_C( 1307458601), INT32_C( 1576775491), -INT32_C( 1046301713), INT32_C( 1490607557), -INT32_C( 95338956), INT32_C( 1666219468), -INT32_C( 312772870), INT32_C( 377959486), INT32_C( 986224887) }, { INT32_C( 664194000), INT32_C( 1872861806), INT32_C( 1786149925), INT32_C( 1310556230), INT32_C( 642731690), INT32_C( 2144044175), INT32_C( 1680975402), INT32_C( 1991997262), INT32_C( 1711538857), -INT32_C( 1207842925), INT32_C( 1562539843), -INT32_C( 127576173), INT32_C( 1445760710), -INT32_C( 372037185), INT32_C( 508429520), -INT32_C( 1307458601) }, { -INT32_C( 615389944), INT32_C( 484385230), INT32_C( 1786149925), INT32_C( 158085502), INT32_C( 642731690), INT32_C( 2144044175), INT32_C( 1680975402), INT32_C( 1991997262), INT32_C( 1711538857), -INT32_C( 1207842925), INT32_C( 1562539843), INT32_C( 986224887), INT32_C( 1235124442), -INT32_C( 372037185), INT32_C( 508429520), -INT32_C( 127576173) }, { INT32_C( 664194000), INT32_C( 1872861806), INT32_C( 1786149925), INT32_C( 1310556230), INT32_C( 642731690), INT32_C( 2144044175), INT32_C( 1680975402), INT32_C( 1991997262), INT32_C( 1711538857), -INT32_C( 1207842925), INT32_C( 1562539843), -INT32_C( 1307458601), INT32_C( 1666219468), -INT32_C( 372037185), INT32_C( 508429520), INT32_C( 986224887) }, { INT32_C( 875130469), -INT32_C( 816838512), INT32_C( 1786149925), INT32_C( 2011828135), INT32_C( 642731690), INT32_C( 2144044175), INT32_C( 1680975402), INT32_C( 1991997262), INT32_C( 1711538857), -INT32_C( 1207842925), INT32_C( 1562539843), -INT32_C( 95338956), INT32_C( 1576775491), -INT32_C( 372037185), INT32_C( 508429520), -INT32_C( 95338956) } }, { { INT32_C( 932692808), INT32_C( 1006123638), INT32_C( 1268044055), -INT32_C( 616110833), INT32_C( 1564382818), INT32_C( 1263180300), -INT32_C( 966667570), -INT32_C( 1308612246), INT32_C( 1659541740), -INT32_C( 375528750), -INT32_C( 1036766797), INT32_C( 2040429078), INT32_C( 500620305), INT32_C( 1164451958), INT32_C( 1561053682), -INT32_C( 569439246) }, UINT16_C(63907), { -INT32_C( 555977408), -INT32_C( 1827696802), -INT32_C( 267573678), INT32_C( 1976311455), -INT32_C( 1533721796), -INT32_C( 1821472632), -INT32_C( 190947099), INT32_C( 2146320959), INT32_C( 375245495), -INT32_C( 1414959783), INT32_C( 849065619), INT32_C( 296184021), INT32_C( 867515563), -INT32_C( 1446632508), -INT32_C( 1013029756), INT32_C( 1581419686) }, { -INT32_C( 1351311274), -INT32_C( 1604641523), -INT32_C( 1462569261), INT32_C( 163150174), INT32_C( 2050781109), INT32_C( 371393170), INT32_C( 232374631), -INT32_C( 1553261491), -INT32_C( 917250116), -INT32_C( 798380292), INT32_C( 58211236), INT32_C( 1762407092), INT32_C( 870533537), -INT32_C( 1303771317), INT32_C( 381690824), -INT32_C( 71750849) }, { INT32_C( 375245495), -INT32_C( 1414959783), INT32_C( 1268044055), -INT32_C( 616110833), INT32_C( 1564382818), -INT32_C( 1446632508), -INT32_C( 966667570), INT32_C( 1581419686), -INT32_C( 1351311274), -INT32_C( 375528750), -INT32_C( 1036766797), INT32_C( 163150174), INT32_C( 2050781109), INT32_C( 371393170), INT32_C( 232374631), -INT32_C( 1553261491) }, { -INT32_C( 1533721796), -INT32_C( 1821472632), INT32_C( 1268044055), -INT32_C( 616110833), INT32_C( 1564382818), -INT32_C( 1414959783), -INT32_C( 966667570), INT32_C( 296184021), INT32_C( 870533537), -INT32_C( 375528750), -INT32_C( 1036766797), -INT32_C( 71750849), -INT32_C( 1351311274), -INT32_C( 1604641523), -INT32_C( 1462569261), INT32_C( 163150174) }, { INT32_C( 375245495), -INT32_C( 1414959783), INT32_C( 1268044055), -INT32_C( 616110833), INT32_C( 1564382818), -INT32_C( 1414959783), -INT32_C( 966667570), INT32_C( 296184021), INT32_C( 2050781109), -INT32_C( 375528750), -INT32_C( 1036766797), -INT32_C( 1553261491), INT32_C( 870533537), -INT32_C( 1303771317), INT32_C( 381690824), -INT32_C( 71750849) }, { INT32_C( 867515563), -INT32_C( 1446632508), INT32_C( 1268044055), -INT32_C( 616110833), INT32_C( 1564382818), -INT32_C( 1821472632), -INT32_C( 966667570), INT32_C( 2146320959), -INT32_C( 917250116), -INT32_C( 375528750), -INT32_C( 1036766797), INT32_C( 1762407092), -INT32_C( 917250116), -INT32_C( 798380292), INT32_C( 58211236), INT32_C( 1762407092) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i src = simde_mm512_loadu_epi32(test_vec[i].src); simde__mmask16 k = test_vec[i].k; simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r; r = simde_mm512_mask_shuffle_i32x4(src, k, a, b, (1 << 6) + (0 << 4) + (3 << 2) + 2); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r1)); r = simde_mm512_mask_shuffle_i32x4(src, k, a, b, (0 << 6) + (3 << 4) + (2 << 2) + 1); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r2)); r = simde_mm512_mask_shuffle_i32x4(src, k, a, b, (3 << 6) + (1 << 4) + (2 << 2) + 2); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r3)); r = simde_mm512_mask_shuffle_i32x4(src, k, a, b, (2 << 6) + (2 << 4) + (1 << 2) + 3); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r4)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 4 ; i++) { simde__m512i src = simde_test_x86_random_i32x16(); simde__mmask16 k = simde_test_x86_random_mmask16(); simde__m512i a = simde_test_x86_random_i32x16(); simde__m512i b = simde_test_x86_random_i32x16(); simde__m512i r; simde_test_x86_write_i32x16(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask16(2, k,SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x16(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_mask_shuffle_i32x4(src, k, a, b, (1 << 6) + (0 << 4) + (3 << 2) + 2); simde_test_x86_write_i32x16(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_mask_shuffle_i32x4(src, k, a, b, (0 << 6) + (3 << 4) + (2 << 2) + 1); simde_test_x86_write_i32x16(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_mask_shuffle_i32x4(src, k, a, b, (3 << 6) + (1 << 4) + (2 << 2) + 2); simde_test_x86_write_i32x16(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_mask_shuffle_i32x4(src, k, a, b, (2 << 6) + (2 << 4) + (1 << 2) + 3); simde_test_x86_write_i32x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_maskz_shuffle_i32x4(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde__mmask16 k; const int32_t a[16]; const int32_t b[16]; const int32_t r1[16]; const int32_t r2[16]; const int32_t r3[16]; const int32_t r4[16]; } test_vec[4] = { { UINT16_C(59334), { -INT32_C( 1109512267), -INT32_C( 80650447), INT32_C( 1002028998), -INT32_C( 1570143339), INT32_C( 1819608205), -INT32_C( 1683399021), -INT32_C( 525025), INT32_C( 2011610562), INT32_C( 506840556), -INT32_C( 367434204), -INT32_C( 1171926491), -INT32_C( 480473514), INT32_C( 642765458), INT32_C( 1338112048), -INT32_C( 1202800394), INT32_C( 1630549365) }, { INT32_C( 511665658), -INT32_C( 251094581), -INT32_C( 1045746069), INT32_C( 1336150204), INT32_C( 175502298), -INT32_C( 497404180), INT32_C( 1671080174), -INT32_C( 674968867), -INT32_C( 67812305), INT32_C( 1223491293), -INT32_C( 401959124), INT32_C( 2033692319), -INT32_C( 1903907678), -INT32_C( 797909278), INT32_C( 1681066630), INT32_C( 71039189) }, { INT32_C( 0), -INT32_C( 367434204), -INT32_C( 1171926491), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 1202800394), INT32_C( 1630549365), INT32_C( 511665658), -INT32_C( 251094581), -INT32_C( 1045746069), INT32_C( 0), INT32_C( 0), -INT32_C( 497404180), INT32_C( 1671080174), -INT32_C( 674968867) }, { INT32_C( 0), -INT32_C( 1683399021), -INT32_C( 525025), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 1171926491), -INT32_C( 480473514), -INT32_C( 1903907678), -INT32_C( 797909278), INT32_C( 1681066630), INT32_C( 0), INT32_C( 0), -INT32_C( 251094581), -INT32_C( 1045746069), INT32_C( 1336150204) }, { INT32_C( 0), -INT32_C( 367434204), -INT32_C( 1171926491), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 1171926491), -INT32_C( 480473514), INT32_C( 175502298), -INT32_C( 497404180), INT32_C( 1671080174), INT32_C( 0), INT32_C( 0), -INT32_C( 797909278), INT32_C( 1681066630), INT32_C( 71039189) }, { INT32_C( 0), INT32_C( 1338112048), -INT32_C( 1202800394), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 525025), INT32_C( 2011610562), -INT32_C( 67812305), INT32_C( 1223491293), -INT32_C( 401959124), INT32_C( 0), INT32_C( 0), INT32_C( 1223491293), -INT32_C( 401959124), INT32_C( 2033692319) } }, { UINT16_C(12604), { -INT32_C( 349234689), INT32_C( 1803770721), INT32_C( 2065310275), INT32_C( 539475100), -INT32_C( 1157756342), INT32_C( 247760090), INT32_C( 604412392), -INT32_C( 1655356770), -INT32_C( 1131838373), INT32_C( 573050079), -INT32_C( 895663570), INT32_C( 1223345662), -INT32_C( 1459427121), INT32_C( 1438107245), -INT32_C( 25576096), INT32_C( 1520160511) }, { INT32_C( 823600210), INT32_C( 1599356720), INT32_C( 2133455233), -INT32_C( 2050550602), INT32_C( 1764739580), -INT32_C( 272636273), -INT32_C( 1544734556), INT32_C( 1509853447), -INT32_C( 561310291), -INT32_C( 717365420), -INT32_C( 2024511792), INT32_C( 1997282170), INT32_C( 1944075236), -INT32_C( 966549726), -INT32_C( 546746152), -INT32_C( 2026346535) }, { INT32_C( 0), INT32_C( 0), -INT32_C( 895663570), INT32_C( 1223345662), -INT32_C( 1459427121), INT32_C( 1438107245), INT32_C( 0), INT32_C( 0), INT32_C( 823600210), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1764739580), -INT32_C( 272636273), INT32_C( 0), INT32_C( 0) }, { INT32_C( 0), INT32_C( 0), INT32_C( 604412392), -INT32_C( 1655356770), -INT32_C( 1131838373), INT32_C( 573050079), INT32_C( 0), INT32_C( 0), INT32_C( 1944075236), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 823600210), INT32_C( 1599356720), INT32_C( 0), INT32_C( 0) }, { INT32_C( 0), INT32_C( 0), -INT32_C( 895663570), INT32_C( 1223345662), -INT32_C( 1131838373), INT32_C( 573050079), INT32_C( 0), INT32_C( 0), INT32_C( 1764739580), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1944075236), -INT32_C( 966549726), INT32_C( 0), INT32_C( 0) }, { INT32_C( 0), INT32_C( 0), -INT32_C( 25576096), INT32_C( 1520160511), -INT32_C( 1157756342), INT32_C( 247760090), INT32_C( 0), INT32_C( 0), -INT32_C( 561310291), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 561310291), -INT32_C( 717365420), INT32_C( 0), INT32_C( 0) } }, { UINT16_C(50044), { -INT32_C( 1566388123), -INT32_C( 116886619), INT32_C( 102007802), -INT32_C( 633145094), -INT32_C( 814062484), -INT32_C( 1826663894), INT32_C( 1778121009), -INT32_C( 450005120), -INT32_C( 309866680), INT32_C( 1021808450), INT32_C( 205781778), INT32_C( 1625785844), INT32_C( 321872361), -INT32_C( 458862925), -INT32_C( 951213753), INT32_C( 1638693401) }, { -INT32_C( 1941032118), -INT32_C( 708233789), INT32_C( 635571249), INT32_C( 2055588241), -INT32_C( 577915862), INT32_C( 1254175490), -INT32_C( 300871979), -INT32_C( 749748599), -INT32_C( 1268802063), INT32_C( 76163283), -INT32_C( 987141068), INT32_C( 1598008885), INT32_C( 1681706082), -INT32_C( 709952256), -INT32_C( 1799110645), INT32_C( 1869025918) }, { INT32_C( 0), INT32_C( 0), INT32_C( 205781778), INT32_C( 1625785844), INT32_C( 321872361), -INT32_C( 458862925), -INT32_C( 951213753), INT32_C( 0), -INT32_C( 1941032118), -INT32_C( 708233789), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 300871979), -INT32_C( 749748599) }, { INT32_C( 0), INT32_C( 0), INT32_C( 1778121009), -INT32_C( 450005120), -INT32_C( 309866680), INT32_C( 1021808450), INT32_C( 205781778), INT32_C( 0), INT32_C( 1681706082), -INT32_C( 709952256), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 635571249), INT32_C( 2055588241) }, { INT32_C( 0), INT32_C( 0), INT32_C( 205781778), INT32_C( 1625785844), -INT32_C( 309866680), INT32_C( 1021808450), INT32_C( 205781778), INT32_C( 0), -INT32_C( 577915862), INT32_C( 1254175490), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 1799110645), INT32_C( 1869025918) }, { INT32_C( 0), INT32_C( 0), -INT32_C( 951213753), INT32_C( 1638693401), -INT32_C( 814062484), -INT32_C( 1826663894), INT32_C( 1778121009), INT32_C( 0), -INT32_C( 1268802063), INT32_C( 76163283), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 987141068), INT32_C( 1598008885) } }, { UINT16_C(51119), { -INT32_C( 1360035292), -INT32_C( 1357241210), INT32_C( 693981161), -INT32_C( 352993362), -INT32_C( 756550364), -INT32_C( 1919749430), -INT32_C( 291565433), -INT32_C( 1531621504), INT32_C( 1481811409), -INT32_C( 1308136247), INT32_C( 1775986107), INT32_C( 1213452580), -INT32_C( 1860486458), -INT32_C( 1289835220), INT32_C( 1050853054), -INT32_C( 555591923) }, { -INT32_C( 986303236), INT32_C( 1534607008), -INT32_C( 943434845), -INT32_C( 351267036), INT32_C( 2105289553), -INT32_C( 1775133992), INT32_C( 1708446552), INT32_C( 658814506), -INT32_C( 1964213526), INT32_C( 1541760184), -INT32_C( 601642568), INT32_C( 298267584), INT32_C( 915358558), INT32_C( 919453918), -INT32_C( 1113808493), INT32_C( 1122295896) }, { INT32_C( 1481811409), -INT32_C( 1308136247), INT32_C( 1775986107), INT32_C( 1213452580), INT32_C( 0), -INT32_C( 1289835220), INT32_C( 0), -INT32_C( 555591923), -INT32_C( 986303236), INT32_C( 1534607008), -INT32_C( 943434845), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1708446552), INT32_C( 658814506) }, { -INT32_C( 756550364), -INT32_C( 1919749430), -INT32_C( 291565433), -INT32_C( 1531621504), INT32_C( 0), -INT32_C( 1308136247), INT32_C( 0), INT32_C( 1213452580), INT32_C( 915358558), INT32_C( 919453918), -INT32_C( 1113808493), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 943434845), -INT32_C( 351267036) }, { INT32_C( 1481811409), -INT32_C( 1308136247), INT32_C( 1775986107), INT32_C( 1213452580), INT32_C( 0), -INT32_C( 1308136247), INT32_C( 0), INT32_C( 1213452580), INT32_C( 2105289553), -INT32_C( 1775133992), INT32_C( 1708446552), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 1113808493), INT32_C( 1122295896) }, { -INT32_C( 1860486458), -INT32_C( 1289835220), INT32_C( 1050853054), -INT32_C( 555591923), INT32_C( 0), -INT32_C( 1919749430), INT32_C( 0), -INT32_C( 1531621504), -INT32_C( 1964213526), INT32_C( 1541760184), -INT32_C( 601642568), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 601642568), INT32_C( 298267584) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask16 k = test_vec[i].k; simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r; r = simde_mm512_maskz_shuffle_i32x4(k, a, b, (1 << 6) + (0 << 4) + (3 << 2) + 2); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r1)); r = simde_mm512_maskz_shuffle_i32x4(k, a, b, (0 << 6) + (3 << 4) + (2 << 2) + 1); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r2)); r = simde_mm512_maskz_shuffle_i32x4(k, a, b, (3 << 6) + (1 << 4) + (2 << 2) + 2); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r3)); r = simde_mm512_maskz_shuffle_i32x4(k, a, b, (2 << 6) + (2 << 4) + (1 << 2) + 3); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r4)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 4 ; i++) { simde__mmask16 k = simde_test_x86_random_mmask16(); simde__m512i a = simde_test_x86_random_i32x16(); simde__m512i b = simde_test_x86_random_i32x16(); simde__m512i r; simde_test_x86_write_mmask16(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x16(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_maskz_shuffle_i32x4(k, a, b, (1 << 6) + (0 << 4) + (3 << 2) + 2); simde_test_x86_write_i32x16(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_maskz_shuffle_i32x4(k, a, b, (0 << 6) + (3 << 4) + (2 << 2) + 1); simde_test_x86_write_i32x16(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_maskz_shuffle_i32x4(k, a, b, (3 << 6) + (1 << 4) + (2 << 2) + 2); simde_test_x86_write_i32x16(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_maskz_shuffle_i32x4(k, a, b, (2 << 6) + (2 << 4) + (1 << 2) + 3); simde_test_x86_write_i32x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_shuffle_i64x2(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const int64_t a[8]; const int64_t b[8]; const int64_t r1[8]; const int64_t r2[8]; const int64_t r3[8]; const int64_t r4[8]; } test_vec[4] = { { { INT64_C( 9064358367993939014), INT64_C( 1655253076713723789), -INT64_C( 5202867091852910807), -INT64_C( 6235410276320205197), INT64_C( 3749974182315333027), -INT64_C( 4869958472303186068), INT64_C( 1925200501811770125), -INT64_C( 2890610351864884010) }, { -INT64_C( 5769834314734457581), -INT64_C( 2329367258326736035), INT64_C( 1594719845308338272), -INT64_C( 5155441086053800087), -INT64_C( 6570647101520095010), -INT64_C( 6957146357537385790), INT64_C( 5775523367379947984), -INT64_C( 8841584596049103377) }, { INT64_C( 3749974182315333027), -INT64_C( 4869958472303186068), INT64_C( 1925200501811770125), -INT64_C( 2890610351864884010), -INT64_C( 5769834314734457581), -INT64_C( 2329367258326736035), INT64_C( 1594719845308338272), -INT64_C( 5155441086053800087) }, { -INT64_C( 5202867091852910807), -INT64_C( 6235410276320205197), INT64_C( 3749974182315333027), -INT64_C( 4869958472303186068), INT64_C( 5775523367379947984), -INT64_C( 8841584596049103377), -INT64_C( 5769834314734457581), -INT64_C( 2329367258326736035) }, { INT64_C( 3749974182315333027), -INT64_C( 4869958472303186068), INT64_C( 3749974182315333027), -INT64_C( 4869958472303186068), INT64_C( 1594719845308338272), -INT64_C( 5155441086053800087), INT64_C( 5775523367379947984), -INT64_C( 8841584596049103377) }, { INT64_C( 1925200501811770125), -INT64_C( 2890610351864884010), -INT64_C( 5202867091852910807), -INT64_C( 6235410276320205197), -INT64_C( 6570647101520095010), -INT64_C( 6957146357537385790), -INT64_C( 6570647101520095010), -INT64_C( 6957146357537385790) } }, { { -INT64_C( 2367637639300465351), -INT64_C( 4023993786916685751), -INT64_C( 8975674520688276169), -INT64_C( 603091206924706699), INT64_C( 512455698990712865), -INT64_C( 5974950190718677165), INT64_C( 7033280028771872131), INT64_C( 3984593951100089239) }, { -INT64_C( 5535608529132075929), -INT64_C( 701696829275849695), -INT64_C( 2283982201537294337), INT64_C( 5884941281556548445), -INT64_C( 3289699544413626298), INT64_C( 7622813414819714912), -INT64_C( 2313264154265921349), -INT64_C( 2042103578795828371) }, { INT64_C( 512455698990712865), -INT64_C( 5974950190718677165), INT64_C( 7033280028771872131), INT64_C( 3984593951100089239), -INT64_C( 5535608529132075929), -INT64_C( 701696829275849695), -INT64_C( 2283982201537294337), INT64_C( 5884941281556548445) }, { -INT64_C( 8975674520688276169), -INT64_C( 603091206924706699), INT64_C( 512455698990712865), -INT64_C( 5974950190718677165), -INT64_C( 2313264154265921349), -INT64_C( 2042103578795828371), -INT64_C( 5535608529132075929), -INT64_C( 701696829275849695) }, { INT64_C( 512455698990712865), -INT64_C( 5974950190718677165), INT64_C( 512455698990712865), -INT64_C( 5974950190718677165), -INT64_C( 2283982201537294337), INT64_C( 5884941281556548445), -INT64_C( 2313264154265921349), -INT64_C( 2042103578795828371) }, { INT64_C( 7033280028771872131), INT64_C( 3984593951100089239), -INT64_C( 8975674520688276169), -INT64_C( 603091206924706699), -INT64_C( 3289699544413626298), INT64_C( 7622813414819714912), -INT64_C( 3289699544413626298), INT64_C( 7622813414819714912) } }, { { INT64_C( 2495523895361675037), INT64_C( 8217591265508002360), INT64_C( 8068300996032814999), INT64_C( 7347745147448674936), -INT64_C( 7389326850122284914), -INT64_C( 4680852822059947552), INT64_C( 6203375191466738085), -INT64_C( 2609726800046334694) }, { -INT64_C( 5485488996149822236), INT64_C( 355877138387835401), -INT64_C( 5617182149092587306), -INT64_C( 4699262278827673953), -INT64_C( 7106712558798375719), -INT64_C( 3143302540216587780), INT64_C( 7913268533751450758), -INT64_C( 7160122232820537846) }, { -INT64_C( 7389326850122284914), -INT64_C( 4680852822059947552), INT64_C( 6203375191466738085), -INT64_C( 2609726800046334694), -INT64_C( 5485488996149822236), INT64_C( 355877138387835401), -INT64_C( 5617182149092587306), -INT64_C( 4699262278827673953) }, { INT64_C( 8068300996032814999), INT64_C( 7347745147448674936), -INT64_C( 7389326850122284914), -INT64_C( 4680852822059947552), INT64_C( 7913268533751450758), -INT64_C( 7160122232820537846), -INT64_C( 5485488996149822236), INT64_C( 355877138387835401) }, { -INT64_C( 7389326850122284914), -INT64_C( 4680852822059947552), -INT64_C( 7389326850122284914), -INT64_C( 4680852822059947552), -INT64_C( 5617182149092587306), -INT64_C( 4699262278827673953), INT64_C( 7913268533751450758), -INT64_C( 7160122232820537846) }, { INT64_C( 6203375191466738085), -INT64_C( 2609726800046334694), INT64_C( 8068300996032814999), INT64_C( 7347745147448674936), -INT64_C( 7106712558798375719), -INT64_C( 3143302540216587780), -INT64_C( 7106712558798375719), -INT64_C( 3143302540216587780) } }, { { -INT64_C( 8914691256948841194), INT64_C( 6639222660098168657), INT64_C( 5298567696017435469), INT64_C( 1986596530830565937), -INT64_C( 3278323102294550156), -INT64_C( 1200415308882288347), INT64_C( 5654149610722742767), -INT64_C( 1687208892942801803) }, { -INT64_C( 8142396623853167699), INT64_C( 6150194591829729735), INT64_C( 7790941442859557810), -INT64_C( 2090942134086114913), INT64_C( 922693962766082006), -INT64_C( 1374215564511848397), INT64_C( 834530889533452594), -INT64_C( 3795942993997956550) }, { -INT64_C( 3278323102294550156), -INT64_C( 1200415308882288347), INT64_C( 5654149610722742767), -INT64_C( 1687208892942801803), -INT64_C( 8142396623853167699), INT64_C( 6150194591829729735), INT64_C( 7790941442859557810), -INT64_C( 2090942134086114913) }, { INT64_C( 5298567696017435469), INT64_C( 1986596530830565937), -INT64_C( 3278323102294550156), -INT64_C( 1200415308882288347), INT64_C( 834530889533452594), -INT64_C( 3795942993997956550), -INT64_C( 8142396623853167699), INT64_C( 6150194591829729735) }, { -INT64_C( 3278323102294550156), -INT64_C( 1200415308882288347), -INT64_C( 3278323102294550156), -INT64_C( 1200415308882288347), INT64_C( 7790941442859557810), -INT64_C( 2090942134086114913), INT64_C( 834530889533452594), -INT64_C( 3795942993997956550) }, { INT64_C( 5654149610722742767), -INT64_C( 1687208892942801803), INT64_C( 5298567696017435469), INT64_C( 1986596530830565937), INT64_C( 922693962766082006), -INT64_C( 1374215564511848397), INT64_C( 922693962766082006), -INT64_C( 1374215564511848397) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r; r = simde_mm512_shuffle_i64x2(a, b, (1 << 6) + (0 << 4) + (3 << 2) + 2); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r1)); r = simde_mm512_shuffle_i64x2(a, b, (0 << 6) + (3 << 4) + (2 << 2) + 1); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r2)); r = simde_mm512_shuffle_i64x2(a, b, (3 << 6) + (1 << 4) + (2 << 2) + 2); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r3)); r = simde_mm512_shuffle_i64x2(a, b, (2 << 6) + (2 << 4) + (1 << 2) + 3); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r4)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 4 ; i++) { simde__m512i a = simde_test_x86_random_i64x8(); simde__m512i b = simde_test_x86_random_i64x8(); simde__m512i r; simde_test_x86_write_i64x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_shuffle_i64x2(a, b, (1 << 6) + (0 << 4) + (3 << 2) + 2); simde_test_x86_write_i64x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_shuffle_i64x2(a, b, (0 << 6) + (3 << 4) + (2 << 2) + 1); simde_test_x86_write_i64x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_shuffle_i64x2(a, b, (3 << 6) + (1 << 4) + (2 << 2) + 2); simde_test_x86_write_i64x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_shuffle_i64x2(a, b, (2 << 6) + (2 << 4) + (1 << 2) + 3); simde_test_x86_write_i64x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_mask_shuffle_i64x2(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const int64_t src[8]; const simde__mmask8 k; const int64_t a[8]; const int64_t b[8]; const int64_t r1[8]; const int64_t r2[8]; const int64_t r3[8]; const int64_t r4[8]; } test_vec[4] = { { { -INT64_C( 5450500260720638046), -INT64_C( 303069419900149776), -INT64_C( 8586981197250616758), INT64_C( 6464661867379747878), INT64_C( 2825660459400055198), INT64_C( 6419273303731259426), INT64_C( 1268602132586889465), INT64_C( 9148604011860445414) }, UINT8_C(168), { -INT64_C( 52909362248744610), INT64_C( 8350632976837467411), INT64_C( 6372153185034761614), INT64_C( 6997949517676416022), INT64_C( 1652475833497091255), -INT64_C( 861906724410295004), -INT64_C( 1314511409601812519), INT64_C( 7447405327590554102) }, { -INT64_C( 4065267619625247621), INT64_C( 6024804178023257666), -INT64_C( 463170361341765426), -INT64_C( 1257264663508093834), INT64_C( 7607146190033773352), INT64_C( 2695032016521523353), INT64_C( 1711751179010712517), INT64_C( 2860571913174758224) }, { -INT64_C( 5450500260720638046), -INT64_C( 303069419900149776), -INT64_C( 8586981197250616758), INT64_C( 7447405327590554102), INT64_C( 2825660459400055198), INT64_C( 6024804178023257666), INT64_C( 1268602132586889465), -INT64_C( 1257264663508093834) }, { -INT64_C( 5450500260720638046), -INT64_C( 303069419900149776), -INT64_C( 8586981197250616758), -INT64_C( 861906724410295004), INT64_C( 2825660459400055198), INT64_C( 2860571913174758224), INT64_C( 1268602132586889465), INT64_C( 6024804178023257666) }, { -INT64_C( 5450500260720638046), -INT64_C( 303069419900149776), -INT64_C( 8586981197250616758), -INT64_C( 861906724410295004), INT64_C( 2825660459400055198), -INT64_C( 1257264663508093834), INT64_C( 1268602132586889465), INT64_C( 2860571913174758224) }, { -INT64_C( 5450500260720638046), -INT64_C( 303069419900149776), -INT64_C( 8586981197250616758), INT64_C( 6997949517676416022), INT64_C( 2825660459400055198), INT64_C( 2695032016521523353), INT64_C( 1268602132586889465), INT64_C( 2695032016521523353) } }, { { -INT64_C( 4954376155412881491), -INT64_C( 4399394521808482443), INT64_C( 7570352073549200472), -INT64_C( 4384853372345430786), INT64_C( 1431254751958800638), INT64_C( 4099913796378667813), -INT64_C( 5111514402357481586), INT64_C( 1974507502212431244) }, UINT8_C(244), { -INT64_C( 4324964065583481602), INT64_C( 8206643744777378948), -INT64_C( 2821252711386936820), -INT64_C( 2275425146850935908), INT64_C( 2749640684518899276), -INT64_C( 5530878118052705333), -INT64_C( 1414737971373970710), -INT64_C( 5902581516866399661) }, { INT64_C( 5014823577851251407), INT64_C( 6728466301046625286), -INT64_C( 8433764122048176198), -INT64_C( 386231857752089966), -INT64_C( 550279245437618529), INT64_C( 8784278492937547543), -INT64_C( 7122873769374004379), INT64_C( 6482710365369198034) }, { -INT64_C( 4954376155412881491), -INT64_C( 4399394521808482443), -INT64_C( 1414737971373970710), -INT64_C( 4384853372345430786), INT64_C( 5014823577851251407), INT64_C( 6728466301046625286), -INT64_C( 8433764122048176198), -INT64_C( 386231857752089966) }, { -INT64_C( 4954376155412881491), -INT64_C( 4399394521808482443), INT64_C( 2749640684518899276), -INT64_C( 4384853372345430786), -INT64_C( 7122873769374004379), INT64_C( 6482710365369198034), INT64_C( 5014823577851251407), INT64_C( 6728466301046625286) }, { -INT64_C( 4954376155412881491), -INT64_C( 4399394521808482443), INT64_C( 2749640684518899276), -INT64_C( 4384853372345430786), -INT64_C( 8433764122048176198), -INT64_C( 386231857752089966), -INT64_C( 7122873769374004379), INT64_C( 6482710365369198034) }, { -INT64_C( 4954376155412881491), -INT64_C( 4399394521808482443), -INT64_C( 2821252711386936820), -INT64_C( 4384853372345430786), -INT64_C( 550279245437618529), INT64_C( 8784278492937547543), -INT64_C( 550279245437618529), INT64_C( 8784278492937547543) } }, { { INT64_C( 9139720534520587244), -INT64_C( 1767678788104520898), INT64_C( 5000590375510080188), INT64_C( 1919160357390382991), INT64_C( 2414719824660272285), INT64_C( 4157551968255970121), INT64_C( 8019334095005824401), INT64_C( 7324079806595239261) }, UINT8_C(111), { INT64_C( 8553589047221709212), INT64_C( 7402557902687469459), -INT64_C( 7498857272533663700), -INT64_C( 5028335926252766867), -INT64_C( 8030523979005130452), -INT64_C( 2886959456527417257), INT64_C( 6329342989913634101), INT64_C( 2713515502149318096) }, { -INT64_C( 257045314966598431), -INT64_C( 3184761139703240630), INT64_C( 6839535778382624425), INT64_C( 4844734526285154846), -INT64_C( 8330938845354690368), INT64_C( 299294054410217399), INT64_C( 4715410797492648580), INT64_C( 8084405194229005452) }, { -INT64_C( 8030523979005130452), -INT64_C( 2886959456527417257), INT64_C( 6329342989913634101), INT64_C( 2713515502149318096), INT64_C( 2414719824660272285), -INT64_C( 3184761139703240630), INT64_C( 6839535778382624425), INT64_C( 7324079806595239261) }, { -INT64_C( 7498857272533663700), -INT64_C( 5028335926252766867), -INT64_C( 8030523979005130452), -INT64_C( 2886959456527417257), INT64_C( 2414719824660272285), INT64_C( 8084405194229005452), -INT64_C( 257045314966598431), INT64_C( 7324079806595239261) }, { -INT64_C( 8030523979005130452), -INT64_C( 2886959456527417257), -INT64_C( 8030523979005130452), -INT64_C( 2886959456527417257), INT64_C( 2414719824660272285), INT64_C( 4844734526285154846), INT64_C( 4715410797492648580), INT64_C( 7324079806595239261) }, { INT64_C( 6329342989913634101), INT64_C( 2713515502149318096), -INT64_C( 7498857272533663700), -INT64_C( 5028335926252766867), INT64_C( 2414719824660272285), INT64_C( 299294054410217399), -INT64_C( 8330938845354690368), INT64_C( 7324079806595239261) } }, { { -INT64_C( 7432847169673604597), INT64_C( 3278080296916638239), INT64_C( 2500253803823961422), -INT64_C( 6373647233911355029), -INT64_C( 5112485413701524845), INT64_C( 6628930374438484264), INT64_C( 6449012749347077830), INT64_C( 7262899722324840327) }, UINT8_C( 98), { -INT64_C( 2398811228970624576), -INT64_C( 4368695573899147544), -INT64_C( 3884344527171830766), -INT64_C( 4043950958213120898), INT64_C( 8907287267678477785), -INT64_C( 2172576912451991407), INT64_C( 1240331113154644272), -INT64_C( 6154889269332684346) }, { -INT64_C( 8837180719984154034), INT64_C( 1603128525128954254), INT64_C( 117859106248897022), -INT64_C( 1151404580747202167), INT64_C( 272236279839682125), -INT64_C( 3054672174236277857), INT64_C( 6884887643602427216), INT64_C( 922322850172001082) }, { -INT64_C( 7432847169673604597), -INT64_C( 2172576912451991407), INT64_C( 2500253803823961422), -INT64_C( 6373647233911355029), -INT64_C( 5112485413701524845), INT64_C( 1603128525128954254), INT64_C( 117859106248897022), INT64_C( 7262899722324840327) }, { -INT64_C( 7432847169673604597), -INT64_C( 4043950958213120898), INT64_C( 2500253803823961422), -INT64_C( 6373647233911355029), -INT64_C( 5112485413701524845), INT64_C( 922322850172001082), -INT64_C( 8837180719984154034), INT64_C( 7262899722324840327) }, { -INT64_C( 7432847169673604597), -INT64_C( 2172576912451991407), INT64_C( 2500253803823961422), -INT64_C( 6373647233911355029), -INT64_C( 5112485413701524845), -INT64_C( 1151404580747202167), INT64_C( 6884887643602427216), INT64_C( 7262899722324840327) }, { -INT64_C( 7432847169673604597), -INT64_C( 6154889269332684346), INT64_C( 2500253803823961422), -INT64_C( 6373647233911355029), -INT64_C( 5112485413701524845), -INT64_C( 3054672174236277857), INT64_C( 272236279839682125), INT64_C( 7262899722324840327) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i src = simde_mm512_loadu_epi64(test_vec[i].src); simde__mmask8 k = test_vec[i].k; simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r; r = simde_mm512_mask_shuffle_i64x2(src, k, a, b, (1 << 6) + (0 << 4) + (3 << 2) + 2); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r1)); r = simde_mm512_mask_shuffle_i64x2(src, k, a, b, (0 << 6) + (3 << 4) + (2 << 2) + 1); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r2)); r = simde_mm512_mask_shuffle_i64x2(src, k, a, b, (3 << 6) + (1 << 4) + (2 << 2) + 2); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r3)); r = simde_mm512_mask_shuffle_i64x2(src, k, a, b, (2 << 6) + (2 << 4) + (1 << 2) + 3); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r4)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 4 ; i++) { simde__m512i src = simde_test_x86_random_i64x8(); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m512i a = simde_test_x86_random_i64x8(); simde__m512i b = simde_test_x86_random_i64x8(); simde__m512i r; simde_test_x86_write_i64x8(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k,SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_mask_shuffle_i64x2(src, k, a, b, (1 << 6) + (0 << 4) + (3 << 2) + 2); simde_test_x86_write_i64x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_mask_shuffle_i64x2(src, k, a, b, (0 << 6) + (3 << 4) + (2 << 2) + 1); simde_test_x86_write_i64x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_mask_shuffle_i64x2(src, k, a, b, (3 << 6) + (1 << 4) + (2 << 2) + 2); simde_test_x86_write_i64x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_mask_shuffle_i64x2(src, k, a, b, (2 << 6) + (2 << 4) + (1 << 2) + 3); simde_test_x86_write_i64x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_maskz_shuffle_i64x2(SIMDE_MUNIT_TEST_ARGS) { #if 1 const struct { const simde__mmask8 k; const int64_t a[8]; const int64_t b[8]; const int64_t r1[8]; const int64_t r2[8]; const int64_t r3[8]; const int64_t r4[8]; } test_vec[4] = { { UINT8_C( 21), { INT64_C( 5490098567529945882), INT64_C( 5911748109366692064), -INT64_C( 8897163291908773346), INT64_C( 3301347518828238380), -INT64_C( 1119829433731952487), -INT64_C( 5331585107673360358), INT64_C( 4464554940584633169), INT64_C( 3197776265679000607) }, { INT64_C( 7789479484726821144), INT64_C( 3381955780673304226), INT64_C( 1985688297139057217), INT64_C( 7432819889251714664), -INT64_C( 1788306984465892497), -INT64_C( 6257452164178687458), -INT64_C( 4261943065901872957), INT64_C( 5027677267904374799) }, { -INT64_C( 1119829433731952487), INT64_C( 0), INT64_C( 4464554940584633169), INT64_C( 0), INT64_C( 7789479484726821144), INT64_C( 0), INT64_C( 0), INT64_C( 0) }, { -INT64_C( 8897163291908773346), INT64_C( 0), -INT64_C( 1119829433731952487), INT64_C( 0), -INT64_C( 4261943065901872957), INT64_C( 0), INT64_C( 0), INT64_C( 0) }, { -INT64_C( 1119829433731952487), INT64_C( 0), -INT64_C( 1119829433731952487), INT64_C( 0), INT64_C( 1985688297139057217), INT64_C( 0), INT64_C( 0), INT64_C( 0) }, { INT64_C( 4464554940584633169), INT64_C( 0), -INT64_C( 8897163291908773346), INT64_C( 0), -INT64_C( 1788306984465892497), INT64_C( 0), INT64_C( 0), INT64_C( 0) } }, { UINT8_C( 97), { INT64_C( 9159070629727962964), INT64_C( 685268701085636527), INT64_C( 4909814318836826506), -INT64_C( 7336539410741362199), INT64_C( 1376891355797116519), INT64_C( 2095112337392534218), -INT64_C( 8289064626617812586), -INT64_C( 2687089379983617324) }, { -INT64_C( 3902488924882842528), -INT64_C( 7690767184866006636), -INT64_C( 7165439453353773984), -INT64_C( 3126223317569639856), INT64_C( 4215773483752756152), -INT64_C( 7085106431956789992), -INT64_C( 9043738006621392103), INT64_C( 6440959600597742874) }, { INT64_C( 1376891355797116519), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), -INT64_C( 7690767184866006636), -INT64_C( 7165439453353773984), INT64_C( 0) }, { INT64_C( 4909814318836826506), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 6440959600597742874), -INT64_C( 3902488924882842528), INT64_C( 0) }, { INT64_C( 1376891355797116519), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), -INT64_C( 3126223317569639856), -INT64_C( 9043738006621392103), INT64_C( 0) }, { -INT64_C( 8289064626617812586), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), -INT64_C( 7085106431956789992), INT64_C( 4215773483752756152), INT64_C( 0) } }, { UINT8_C(246), { -INT64_C( 1692555680829206022), INT64_C( 4104401340214909238), -INT64_C( 8525334622065994140), -INT64_C( 7346632544040108750), -INT64_C( 5615559392548837739), INT64_C( 6745411449051297311), INT64_C( 2753847769439536985), INT64_C( 8870198896857146078) }, { -INT64_C( 4459771292523684371), -INT64_C( 8772235626085438997), -INT64_C( 5412082286445972279), INT64_C( 5994092533518573517), INT64_C( 8344562663899845480), INT64_C( 9165840875523732353), -INT64_C( 6892440561819752497), INT64_C( 4609860655822179136) }, { INT64_C( 0), INT64_C( 6745411449051297311), INT64_C( 2753847769439536985), INT64_C( 0), -INT64_C( 4459771292523684371), -INT64_C( 8772235626085438997), -INT64_C( 5412082286445972279), INT64_C( 5994092533518573517) }, { INT64_C( 0), -INT64_C( 7346632544040108750), -INT64_C( 5615559392548837739), INT64_C( 0), -INT64_C( 6892440561819752497), INT64_C( 4609860655822179136), -INT64_C( 4459771292523684371), -INT64_C( 8772235626085438997) }, { INT64_C( 0), INT64_C( 6745411449051297311), -INT64_C( 5615559392548837739), INT64_C( 0), -INT64_C( 5412082286445972279), INT64_C( 5994092533518573517), -INT64_C( 6892440561819752497), INT64_C( 4609860655822179136) }, { INT64_C( 0), INT64_C( 8870198896857146078), -INT64_C( 8525334622065994140), INT64_C( 0), INT64_C( 8344562663899845480), INT64_C( 9165840875523732353), INT64_C( 8344562663899845480), INT64_C( 9165840875523732353) } }, { UINT8_C( 86), { INT64_C( 868720944030305205), INT64_C( 6202655082719394730), -INT64_C( 6998371639605216898), INT64_C( 3888775267815360030), -INT64_C( 3448402552616200705), -INT64_C( 6426308844223863156), -INT64_C( 2047970398376166429), -INT64_C( 4800419675374351956) }, { -INT64_C( 9203112485147302832), INT64_C( 7686744639541042399), -INT64_C( 4363719656928644408), -INT64_C( 4650158549917240067), INT64_C( 2660075167475387509), INT64_C( 8847840104046912985), INT64_C( 1919790311379773897), INT64_C( 6528333684952896619) }, { INT64_C( 0), -INT64_C( 6426308844223863156), -INT64_C( 2047970398376166429), INT64_C( 0), -INT64_C( 9203112485147302832), INT64_C( 0), -INT64_C( 4363719656928644408), INT64_C( 0) }, { INT64_C( 0), INT64_C( 3888775267815360030), -INT64_C( 3448402552616200705), INT64_C( 0), INT64_C( 1919790311379773897), INT64_C( 0), -INT64_C( 9203112485147302832), INT64_C( 0) }, { INT64_C( 0), -INT64_C( 6426308844223863156), -INT64_C( 3448402552616200705), INT64_C( 0), -INT64_C( 4363719656928644408), INT64_C( 0), INT64_C( 1919790311379773897), INT64_C( 0) }, { INT64_C( 0), -INT64_C( 4800419675374351956), -INT64_C( 6998371639605216898), INT64_C( 0), INT64_C( 2660075167475387509), INT64_C( 0), INT64_C( 2660075167475387509), INT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask8 k = test_vec[i].k; simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r; r = simde_mm512_maskz_shuffle_i64x2(k, a, b, (1 << 6) + (0 << 4) + (3 << 2) + 2); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r1)); r = simde_mm512_maskz_shuffle_i64x2(k, a, b, (0 << 6) + (3 << 4) + (2 << 2) + 1); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r2)); r = simde_mm512_maskz_shuffle_i64x2(k, a, b, (3 << 6) + (1 << 4) + (2 << 2) + 2); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r3)); r = simde_mm512_maskz_shuffle_i64x2(k, a, b, (2 << 6) + (2 << 4) + (1 << 2) + 3); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r4)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 4 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m512i a = simde_test_x86_random_i64x8(); simde__m512i b = simde_test_x86_random_i64x8(); simde__m512i r; simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_maskz_shuffle_i64x2(k, a, b, (1 << 6) + (0 << 4) + (3 << 2) + 2); simde_test_x86_write_i64x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_maskz_shuffle_i64x2(k, a, b, (0 << 6) + (3 << 4) + (2 << 2) + 1); simde_test_x86_write_i64x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_maskz_shuffle_i64x2(k, a, b, (3 << 6) + (1 << 4) + (2 << 2) + 2); simde_test_x86_write_i64x8(2, r, SIMDE_TEST_VEC_POS_MIDDLE); r = simde_mm512_maskz_shuffle_i64x2(k, a, b, (2 << 6) + (2 << 4) + (1 << 2) + 3); simde_test_x86_write_i64x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_shuffle_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_shuffle_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_shuffle_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_shuffle_f32x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_shuffle_f32x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_shuffle_f32x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_shuffle_f64x2) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_shuffle_f64x2) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_shuffle_f64x2) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_shuffle_i32x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_shuffle_i32x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_shuffle_i32x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_shuffle_i64x2) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_shuffle_i64x2) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_shuffle_i64x2) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_shuffle_f32x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_shuffle_f32x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_shuffle_f32x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_shuffle_f64x2) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_shuffle_f64x2) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_shuffle_f64x2) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_shuffle_i32x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_shuffle_i32x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_shuffle_i32x4) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_shuffle_i64x2) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_shuffle_i64x2) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_shuffle_i64x2) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/skel.c000066400000000000000000000025711400333146700163630ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #define SIMDE_TEST_X86_AVX512_INSN xxx #include #include SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/sll.c000066400000000000000000002476661400333146700162370ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN sll #include #include static int test_simde_mm512_sll_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[32]; const int64_t b[2]; const int16_t r[32]; } test_vec[] = { { { -INT16_C( 11176), -INT16_C( 31146), INT16_C( 31553), INT16_C( 68), INT16_C( 109), -INT16_C( 6231), -INT16_C( 28515), INT16_C( 4842), -INT16_C( 8039), -INT16_C( 26148), -INT16_C( 31237), INT16_C( 19279), -INT16_C( 2919), -INT16_C( 17167), INT16_C( 24677), -INT16_C( 16888), INT16_C( 24116), INT16_C( 30020), -INT16_C( 30246), INT16_C( 18294), INT16_C( 8073), INT16_C( 10031), INT16_C( 6575), INT16_C( 18489), INT16_C( 5625), -INT16_C( 2847), INT16_C( 12442), INT16_C( 13375), INT16_C( 12324), -INT16_C( 29968), -INT16_C( 1903), -INT16_C( 15032) }, { INT64_C( 19), INT64_C( 23) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 22508), -INT16_C( 20817), -INT16_C( 4391), -INT16_C( 542), -INT16_C( 11489), -INT16_C( 20345), -INT16_C( 12341), INT16_C( 8821), -INT16_C( 20388), INT16_C( 29011), -INT16_C( 13215), INT16_C( 12560), -INT16_C( 10637), INT16_C( 13488), INT16_C( 30677), -INT16_C( 5649), -INT16_C( 25056), -INT16_C( 1640), INT16_C( 31372), -INT16_C( 21514), INT16_C( 32333), INT16_C( 6491), -INT16_C( 11955), -INT16_C( 22213), -INT16_C( 28799), -INT16_C( 7397), INT16_C( 11099), -INT16_C( 12780), -INT16_C( 15102), -INT16_C( 10493), -INT16_C( 3524), INT16_C( 23745) }, { INT64_C( 4), INT64_C( 14) }, { -INT16_C( 32448), -INT16_C( 5392), -INT16_C( 4720), -INT16_C( 8672), INT16_C( 12784), INT16_C( 2160), -INT16_C( 848), INT16_C( 10064), INT16_C( 1472), INT16_C( 5424), -INT16_C( 14832), INT16_C( 4352), INT16_C( 26416), INT16_C( 19200), INT16_C( 32080), -INT16_C( 24848), -INT16_C( 7680), -INT16_C( 26240), -INT16_C( 22336), -INT16_C( 16544), -INT16_C( 6960), -INT16_C( 27216), INT16_C( 5328), -INT16_C( 27728), -INT16_C( 2032), INT16_C( 12720), -INT16_C( 19024), -INT16_C( 7872), INT16_C( 20512), INT16_C( 28720), INT16_C( 9152), -INT16_C( 13296) } }, { { -INT16_C( 9212), INT16_C( 24409), INT16_C( 27911), INT16_C( 2350), INT16_C( 12594), INT16_C( 28641), -INT16_C( 24029), -INT16_C( 19509), INT16_C( 8699), -INT16_C( 12593), -INT16_C( 26771), INT16_C( 14319), INT16_C( 10683), -INT16_C( 20658), INT16_C( 3999), -INT16_C( 23771), INT16_C( 32491), -INT16_C( 3325), INT16_C( 12780), INT16_C( 7932), -INT16_C( 8862), -INT16_C( 31347), INT16_C( 22911), INT16_C( 31288), INT16_C( 1914), -INT16_C( 6327), INT16_C( 14495), INT16_C( 23070), INT16_C( 27746), INT16_C( 265), INT16_C( 12156), INT16_C( 26532) }, { INT64_C( 7), INT64_C( 3) }, { INT16_C( 512), -INT16_C( 21376), -INT16_C( 31872), -INT16_C( 26880), -INT16_C( 26368), -INT16_C( 3968), INT16_C( 4480), -INT16_C( 6784), -INT16_C( 640), INT16_C( 26496), -INT16_C( 18816), -INT16_C( 2176), -INT16_C( 8832), -INT16_C( 22784), -INT16_C( 12416), -INT16_C( 28032), INT16_C( 30080), -INT16_C( 32384), -INT16_C( 2560), INT16_C( 32256), -INT16_C( 20224), -INT16_C( 14720), -INT16_C( 16512), INT16_C( 7168), -INT16_C( 17152), -INT16_C( 23424), INT16_C( 20352), INT16_C( 3840), INT16_C( 12544), -INT16_C( 31616), -INT16_C( 16896), -INT16_C( 11776) } }, { { INT16_C( 30719), -INT16_C( 24833), INT16_C( 7600), INT16_C( 4856), INT16_C( 394), INT16_C( 1555), -INT16_C( 18640), -INT16_C( 8595), -INT16_C( 14241), INT16_C( 14199), INT16_C( 12063), INT16_C( 21362), INT16_C( 12661), INT16_C( 4871), INT16_C( 13865), INT16_C( 10284), INT16_C( 11181), INT16_C( 24006), -INT16_C( 16823), -INT16_C( 11409), -INT16_C( 32065), -INT16_C( 3879), INT16_C( 17978), -INT16_C( 26162), INT16_C( 17678), INT16_C( 11728), INT16_C( 17013), -INT16_C( 5503), -INT16_C( 30604), -INT16_C( 25091), INT16_C( 10686), INT16_C( 27845) }, { INT64_C( 1), INT64_C( 19) }, { -INT16_C( 4098), INT16_C( 15870), INT16_C( 15200), INT16_C( 9712), INT16_C( 788), INT16_C( 3110), INT16_C( 28256), -INT16_C( 17190), -INT16_C( 28482), INT16_C( 28398), INT16_C( 24126), -INT16_C( 22812), INT16_C( 25322), INT16_C( 9742), INT16_C( 27730), INT16_C( 20568), INT16_C( 22362), -INT16_C( 17524), INT16_C( 31890), -INT16_C( 22818), INT16_C( 1406), -INT16_C( 7758), -INT16_C( 29580), INT16_C( 13212), -INT16_C( 30180), INT16_C( 23456), -INT16_C( 31510), -INT16_C( 11006), INT16_C( 4328), INT16_C( 15354), INT16_C( 21372), -INT16_C( 9846) } }, { { INT16_C( 24332), -INT16_C( 32308), INT16_C( 19873), INT16_C( 5483), INT16_C( 26838), -INT16_C( 27470), INT16_C( 30610), -INT16_C( 6400), -INT16_C( 13822), INT16_C( 19333), -INT16_C( 2557), -INT16_C( 16812), INT16_C( 19520), -INT16_C( 12108), INT16_C( 16915), INT16_C( 8047), INT16_C( 15521), INT16_C( 17312), INT16_C( 2953), INT16_C( 24408), INT16_C( 2931), INT16_C( 1524), -INT16_C( 2942), -INT16_C( 31252), INT16_C( 29118), -INT16_C( 15920), INT16_C( 9319), -INT16_C( 22656), INT16_C( 13425), -INT16_C( 31624), -INT16_C( 6282), INT16_C( 6307) }, { INT64_C( 24), INT64_C( 22) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 27099), -INT16_C( 29579), -INT16_C( 2629), INT16_C( 11316), -INT16_C( 21463), -INT16_C( 24656), INT16_C( 21395), -INT16_C( 18505), INT16_C( 4758), -INT16_C( 7068), INT16_C( 28870), -INT16_C( 31579), INT16_C( 27761), INT16_C( 26309), -INT16_C( 29920), INT16_C( 17689), -INT16_C( 29150), -INT16_C( 8751), INT16_C( 1411), -INT16_C( 21495), -INT16_C( 17999), INT16_C( 17740), INT16_C( 780), -INT16_C( 23812), INT16_C( 24598), -INT16_C( 9082), INT16_C( 11216), INT16_C( 16736), INT16_C( 9880), -INT16_C( 18265), -INT16_C( 15951), -INT16_C( 11267) }, { INT64_C( 16), INT64_C( 28) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 23002), -INT16_C( 2111), INT16_C( 8658), INT16_C( 27192), -INT16_C( 8121), -INT16_C( 1758), INT16_C( 8097), -INT16_C( 3892), INT16_C( 32237), -INT16_C( 15933), INT16_C( 17206), -INT16_C( 22201), INT16_C( 4366), -INT16_C( 8921), INT16_C( 18648), -INT16_C( 318), -INT16_C( 31762), -INT16_C( 16139), INT16_C( 11941), -INT16_C( 5078), INT16_C( 19470), -INT16_C( 20507), -INT16_C( 19861), INT16_C( 22943), INT16_C( 25391), INT16_C( 25882), INT16_C( 24998), -INT16_C( 19442), INT16_C( 13939), INT16_C( 19346), INT16_C( 21630), INT16_C( 27721) }, { INT64_C( 15), INT64_C( 6) }, { INT16_C( 0), INT16_MIN, INT16_C( 0), INT16_C( 0), INT16_MIN, INT16_C( 0), INT16_MIN, INT16_C( 0), INT16_MIN, INT16_MIN, INT16_C( 0), INT16_MIN, INT16_C( 0), INT16_MIN, INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_MIN, INT16_MIN, INT16_C( 0), INT16_C( 0), INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_MIN, INT16_C( 0), INT16_C( 0), INT16_MIN } }, { { -INT16_C( 31956), -INT16_C( 11627), -INT16_C( 23324), INT16_C( 22407), INT16_C( 6618), INT16_C( 22690), -INT16_C( 5011), INT16_C( 17860), -INT16_C( 3797), -INT16_C( 26430), INT16_C( 11337), -INT16_C( 4845), INT16_C( 15739), INT16_C( 31996), INT16_C( 25862), INT16_C( 13228), INT16_C( 16872), -INT16_C( 13307), -INT16_C( 29467), -INT16_C( 16604), -INT16_C( 14683), INT16_C( 4887), -INT16_C( 9038), -INT16_C( 8872), INT16_C( 7117), INT16_C( 5749), -INT16_C( 30649), -INT16_C( 15869), INT16_C( 197), -INT16_C( 13250), -INT16_C( 5531), INT16_C( 19967) }, { INT64_C( 6), INT64_C( 18) }, { -INT16_C( 13568), -INT16_C( 23232), INT16_C( 14592), -INT16_C( 7744), INT16_C( 30336), INT16_C( 10368), INT16_C( 6976), INT16_C( 28928), INT16_C( 19136), INT16_C( 12416), INT16_C( 4672), INT16_C( 17600), INT16_C( 24256), INT16_C( 16128), INT16_C( 16768), -INT16_C( 5376), INT16_C( 31232), INT16_C( 320), INT16_C( 14656), -INT16_C( 14080), -INT16_C( 22208), -INT16_C( 14912), INT16_C( 11392), INT16_C( 22016), -INT16_C( 3264), -INT16_C( 25280), INT16_C( 4544), -INT16_C( 32576), INT16_C( 12608), INT16_C( 3968), -INT16_C( 26304), INT16_C( 32704) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_sll_epi16(a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_sll_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t src[32]; const simde__mmask32 k; const int16_t a[32]; const int64_t count[2]; const int16_t r[32]; } test_vec[] = { { { -INT16_C( 21525), -INT16_C( 32533), INT16_C( 21413), INT16_C( 7535), -INT16_C( 3695), INT16_C( 13787), INT16_C( 26396), -INT16_C( 19626), -INT16_C( 10603), -INT16_C( 32249), -INT16_C( 25187), -INT16_C( 28361), INT16_C( 8365), INT16_C( 30498), -INT16_C( 15983), INT16_C( 32126), INT16_C( 26988), INT16_C( 4605), INT16_C( 28093), INT16_C( 20015), INT16_C( 2654), INT16_C( 31364), -INT16_C( 9615), INT16_C( 1837), INT16_C( 13488), INT16_C( 19849), -INT16_C( 16175), INT16_C( 32479), INT16_C( 481), INT16_C( 29430), INT16_C( 29890), INT16_C( 12015) }, UINT32_C(2604723678), { INT16_C( 28506), -INT16_C( 18199), INT16_C( 28025), -INT16_C( 5326), INT16_C( 24391), -INT16_C( 2062), INT16_C( 31636), INT16_C( 25925), INT16_C( 9275), INT16_C( 7396), -INT16_C( 9691), -INT16_C( 6257), INT16_C( 32334), INT16_C( 11285), INT16_C( 21867), -INT16_C( 14905), -INT16_C( 20028), INT16_C( 15997), -INT16_C( 20706), INT16_C( 26153), INT16_C( 6927), -INT16_C( 23715), -INT16_C( 23914), -INT16_C( 12024), -INT16_C( 4922), -INT16_C( 5138), INT16_C( 32198), INT16_C( 5586), -INT16_C( 5893), INT16_C( 26433), INT16_C( 2365), INT16_C( 556) }, { INT64_C( 7511557063101688506), INT64_C( 426456846674140292) }, { -INT16_C( 21525), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 13787), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 32249), INT16_C( 0), INT16_C( 0), INT16_C( 8365), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 26988), INT16_C( 4605), INT16_C( 28093), INT16_C( 20015), INT16_C( 2654), INT16_C( 31364), INT16_C( 0), INT16_C( 1837), INT16_C( 0), INT16_C( 0), -INT16_C( 16175), INT16_C( 0), INT16_C( 0), INT16_C( 29430), INT16_C( 29890), INT16_C( 0) } }, { { -INT16_C( 9984), -INT16_C( 14352), -INT16_C( 15530), INT16_C( 21212), INT16_C( 7595), -INT16_C( 5959), -INT16_C( 6874), -INT16_C( 7958), INT16_C( 10895), -INT16_C( 5703), -INT16_C( 2157), INT16_C( 5969), INT16_C( 23955), -INT16_C( 11727), INT16_C( 7537), INT16_C( 29143), -INT16_C( 14346), INT16_C( 19768), INT16_C( 5258), INT16_C( 13727), INT16_C( 22578), INT16_C( 22558), INT16_C( 2109), -INT16_C( 12999), -INT16_C( 3533), -INT16_C( 14666), INT16_C( 2025), INT16_C( 32222), INT16_C( 3940), -INT16_C( 10929), INT16_C( 9772), INT16_C( 9031) }, UINT32_C(2020638701), { INT16_C( 3988), -INT16_C( 14675), -INT16_C( 13465), -INT16_C( 23522), INT16_C( 22484), INT16_C( 1905), INT16_C( 10057), INT16_C( 13261), -INT16_C( 21713), -INT16_C( 27728), -INT16_C( 69), -INT16_C( 6295), -INT16_C( 20443), INT16_C( 4618), INT16_C( 31279), -INT16_C( 15478), INT16_C( 14473), -INT16_C( 3959), -INT16_C( 22525), -INT16_C( 10347), INT16_C( 1791), INT16_C( 18910), -INT16_C( 21458), INT16_C( 23932), INT16_C( 11351), INT16_C( 4848), INT16_C( 22827), INT16_C( 20730), INT16_C( 1033), INT16_C( 14690), -INT16_C( 4737), INT16_C( 2300) }, { INT64_C( 3242748060613838373), -INT64_C( 2152735419077828972) }, { INT16_C( 0), -INT16_C( 14352), INT16_C( 0), INT16_C( 0), INT16_C( 7595), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 29143), -INT16_C( 14346), INT16_C( 19768), INT16_C( 5258), INT16_C( 13727), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 12999), -INT16_C( 3533), -INT16_C( 14666), INT16_C( 2025), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 9031) } }, { { INT16_C( 4126), INT16_C( 18932), -INT16_C( 4503), INT16_C( 29593), -INT16_C( 781), INT16_C( 29356), -INT16_C( 22295), INT16_C( 3706), INT16_C( 29486), INT16_C( 23606), INT16_C( 13825), -INT16_C( 26998), INT16_C( 21), -INT16_C( 24744), INT16_C( 30963), INT16_C( 4481), INT16_C( 30344), -INT16_C( 3749), -INT16_C( 2972), INT16_C( 22372), INT16_C( 4336), -INT16_C( 9783), INT16_C( 17593), -INT16_C( 6169), INT16_C( 7863), -INT16_C( 18108), -INT16_C( 12716), INT16_C( 26959), -INT16_C( 22578), -INT16_C( 16119), -INT16_C( 30177), -INT16_C( 22573) }, UINT32_C(1704537600), { -INT16_C( 734), INT16_C( 5052), -INT16_C( 31218), -INT16_C( 14356), -INT16_C( 11062), -INT16_C( 32338), -INT16_C( 3342), INT16_C( 17978), -INT16_C( 30272), -INT16_C( 28752), -INT16_C( 18127), INT16_C( 20560), INT16_C( 9027), INT16_C( 17656), -INT16_C( 28335), INT16_C( 29865), INT16_C( 25998), -INT16_C( 25465), INT16_C( 29675), -INT16_C( 19101), INT16_C( 4679), INT16_C( 14647), INT16_C( 28932), -INT16_C( 14976), INT16_C( 12539), INT16_C( 11348), -INT16_C( 23319), INT16_C( 11388), INT16_C( 29896), INT16_C( 6512), INT16_C( 6405), -INT16_C( 27507) }, { -INT64_C( 3521651594985728897), -INT64_C( 4364118018959517786) }, { INT16_C( 4126), INT16_C( 18932), -INT16_C( 4503), INT16_C( 29593), -INT16_C( 781), INT16_C( 29356), -INT16_C( 22295), INT16_C( 3706), INT16_C( 29486), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 21), INT16_C( 0), INT16_C( 30963), INT16_C( 4481), INT16_C( 0), -INT16_C( 3749), -INT16_C( 2972), INT16_C( 0), INT16_C( 0), -INT16_C( 9783), INT16_C( 17593), INT16_C( 0), INT16_C( 0), -INT16_C( 18108), INT16_C( 0), INT16_C( 26959), -INT16_C( 22578), INT16_C( 0), INT16_C( 0), -INT16_C( 22573) } }, { { -INT16_C( 15431), -INT16_C( 23825), INT16_C( 27752), INT16_C( 12494), INT16_C( 16352), -INT16_C( 6583), -INT16_C( 10408), -INT16_C( 10374), -INT16_C( 21781), INT16_C( 29506), INT16_C( 25150), -INT16_C( 7101), INT16_C( 19641), -INT16_C( 32369), -INT16_C( 299), -INT16_C( 29115), INT16_C( 13506), INT16_C( 10800), -INT16_C( 352), -INT16_C( 32422), -INT16_C( 23747), -INT16_C( 27033), -INT16_C( 7814), INT16_C( 26221), -INT16_C( 20597), -INT16_C( 13607), INT16_C( 7185), -INT16_C( 13650), INT16_C( 15720), INT16_C( 15692), -INT16_C( 28356), -INT16_C( 309) }, UINT32_C(1713961925), { -INT16_C( 32006), INT16_C( 14311), INT16_C( 20005), -INT16_C( 24371), INT16_C( 15151), -INT16_C( 17914), -INT16_C( 8214), -INT16_C( 892), INT16_C( 13308), INT16_C( 25798), INT16_C( 4720), -INT16_C( 21342), INT16_C( 28067), INT16_C( 27050), -INT16_C( 11671), INT16_C( 25551), -INT16_C( 18860), INT16_C( 31386), INT16_C( 26628), INT16_C( 13082), INT16_C( 8355), -INT16_C( 29203), INT16_C( 29439), -INT16_C( 1143), INT16_C( 20645), INT16_C( 5472), INT16_C( 610), INT16_C( 1730), INT16_C( 27759), -INT16_C( 10129), INT16_C( 15935), -INT16_C( 27845) }, { -INT64_C( 2221638839774095628), INT64_C( 3405557541033089095) }, { INT16_C( 0), -INT16_C( 23825), INT16_C( 0), INT16_C( 12494), INT16_C( 16352), -INT16_C( 6583), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 25150), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 13506), INT16_C( 10800), -INT16_C( 352), INT16_C( 0), -INT16_C( 23747), INT16_C( 0), -INT16_C( 7814), INT16_C( 26221), -INT16_C( 20597), INT16_C( 0), INT16_C( 0), -INT16_C( 13650), INT16_C( 15720), INT16_C( 0), INT16_C( 0), -INT16_C( 309) } }, { { -INT16_C( 23992), -INT16_C( 21947), INT16_C( 1956), INT16_C( 5296), INT16_C( 8051), -INT16_C( 19732), INT16_C( 10333), INT16_C( 20806), INT16_C( 21502), INT16_C( 15433), INT16_C( 29819), -INT16_C( 15843), -INT16_C( 29811), INT16_C( 5897), INT16_C( 19587), -INT16_C( 13497), -INT16_C( 29458), -INT16_C( 27786), INT16_C( 9875), INT16_C( 1703), -INT16_C( 27834), -INT16_C( 23623), -INT16_C( 69), -INT16_C( 17931), INT16_C( 15954), -INT16_C( 12811), INT16_C( 4787), INT16_C( 16528), -INT16_C( 26210), INT16_C( 8535), -INT16_C( 24859), -INT16_C( 11027) }, UINT32_C(3177669418), { INT16_C( 3721), -INT16_C( 12348), INT16_C( 32161), INT16_C( 23923), INT16_C( 26748), -INT16_C( 12778), INT16_C( 3238), INT16_C( 22940), INT16_C( 11294), -INT16_C( 17255), -INT16_C( 3643), -INT16_C( 21538), -INT16_C( 13425), -INT16_C( 17793), -INT16_C( 6610), -INT16_C( 18569), INT16_C( 15348), -INT16_C( 27257), -INT16_C( 1352), INT16_C( 13554), INT16_C( 2402), INT16_C( 2051), -INT16_C( 24811), INT16_C( 13154), -INT16_C( 1077), -INT16_C( 28432), -INT16_C( 12564), INT16_C( 31803), -INT16_C( 17767), -INT16_C( 14538), -INT16_C( 21088), -INT16_C( 27522) }, { INT64_C( 7049854150941214185), INT64_C( 4859045864727566629) }, { -INT16_C( 23992), INT16_C( 0), INT16_C( 1956), INT16_C( 0), INT16_C( 8051), INT16_C( 0), INT16_C( 10333), INT16_C( 20806), INT16_C( 0), INT16_C( 0), INT16_C( 29819), -INT16_C( 15843), -INT16_C( 29811), INT16_C( 0), INT16_C( 0), -INT16_C( 13497), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 1703), -INT16_C( 27834), INT16_C( 0), INT16_C( 0), -INT16_C( 17931), INT16_C( 0), -INT16_C( 12811), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 24859), INT16_C( 0) } }, { { INT16_C( 24263), -INT16_C( 19245), INT16_C( 3884), -INT16_C( 15056), INT16_C( 26313), INT16_C( 27276), INT16_C( 2579), -INT16_C( 770), INT16_C( 10256), INT16_C( 3998), INT16_C( 29765), INT16_C( 27249), -INT16_C( 9395), -INT16_C( 14939), INT16_C( 5031), INT16_C( 28168), -INT16_C( 9359), -INT16_C( 25310), INT16_C( 21226), -INT16_C( 19358), -INT16_C( 4424), -INT16_C( 13282), INT16_C( 7416), INT16_C( 2248), INT16_C( 26181), -INT16_C( 30184), -INT16_C( 30246), INT16_C( 10228), -INT16_C( 26268), INT16_C( 3052), -INT16_C( 2900), INT16_C( 7545) }, UINT32_C(3132792016), { INT16_C( 7406), -INT16_C( 22674), -INT16_C( 29686), INT16_C( 883), INT16_C( 15273), -INT16_C( 4597), INT16_C( 9122), INT16_C( 31864), INT16_C( 27820), INT16_C( 4260), -INT16_C( 28666), -INT16_C( 19941), -INT16_C( 27259), INT16_C( 21968), -INT16_C( 30159), INT16_C( 7951), INT16_C( 32423), -INT16_C( 20026), INT16_C( 14602), -INT16_C( 19532), -INT16_C( 16267), INT16_C( 6049), INT16_C( 6627), -INT16_C( 28525), INT16_C( 14214), -INT16_C( 29536), -INT16_C( 17208), INT16_C( 19774), INT16_C( 3665), -INT16_C( 32094), -INT16_C( 20071), INT16_C( 16545) }, { INT64_C( 1652159849723684911), INT64_C( 3375942324217286502) }, { INT16_C( 24263), -INT16_C( 19245), INT16_C( 3884), -INT16_C( 15056), INT16_C( 0), INT16_C( 27276), INT16_C( 0), INT16_C( 0), INT16_C( 10256), INT16_C( 3998), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 14939), INT16_C( 5031), INT16_C( 0), -INT16_C( 9359), INT16_C( 0), INT16_C( 21226), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 7416), INT16_C( 0), INT16_C( 26181), INT16_C( 0), -INT16_C( 30246), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 2900), INT16_C( 0) } }, { { INT16_C( 31480), -INT16_C( 16198), -INT16_C( 1738), -INT16_C( 30963), -INT16_C( 20729), -INT16_C( 24567), -INT16_C( 21919), -INT16_C( 28448), -INT16_C( 11758), -INT16_C( 19254), -INT16_C( 18312), -INT16_C( 8502), -INT16_C( 1977), -INT16_C( 4313), INT16_C( 441), -INT16_C( 20194), -INT16_C( 10117), -INT16_C( 20110), INT16_C( 32721), -INT16_C( 9928), INT16_C( 16687), -INT16_C( 28551), INT16_C( 23275), -INT16_C( 480), -INT16_C( 5332), -INT16_C( 23374), INT16_C( 31907), -INT16_C( 5502), -INT16_C( 22156), INT16_C( 11737), -INT16_C( 2134), INT16_C( 9695) }, UINT32_C(2715177424), { INT16_C( 3792), -INT16_C( 134), -INT16_C( 2993), INT16_C( 15247), -INT16_C( 20402), INT16_C( 31289), -INT16_C( 5221), INT16_C( 15902), -INT16_C( 24473), -INT16_C( 9176), INT16_C( 329), -INT16_C( 3063), -INT16_C( 5895), -INT16_C( 14055), -INT16_C( 4039), INT16_C( 2666), -INT16_C( 6658), INT16_C( 19977), -INT16_C( 26151), INT16_C( 10121), -INT16_C( 15799), -INT16_C( 7007), -INT16_C( 16467), INT16_C( 5154), INT16_C( 19039), -INT16_C( 22288), -INT16_C( 1461), INT16_C( 17564), -INT16_C( 18718), INT16_C( 7181), INT16_C( 30886), -INT16_C( 23514) }, { INT64_C( 1251292371324383069), INT64_C( 2089414856581447229) }, { INT16_C( 31480), -INT16_C( 16198), -INT16_C( 1738), -INT16_C( 30963), INT16_C( 0), -INT16_C( 24567), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 19254), -INT16_C( 18312), -INT16_C( 8502), INT16_C( 0), -INT16_C( 4313), INT16_C( 0), -INT16_C( 20194), -INT16_C( 10117), INT16_C( 0), INT16_C( 0), -INT16_C( 9928), INT16_C( 0), -INT16_C( 28551), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 23374), INT16_C( 31907), -INT16_C( 5502), -INT16_C( 22156), INT16_C( 0), -INT16_C( 2134), INT16_C( 0) } }, { { -INT16_C( 4255), -INT16_C( 21052), INT16_C( 25065), -INT16_C( 13071), -INT16_C( 233), -INT16_C( 16920), INT16_C( 3703), -INT16_C( 11167), INT16_C( 21565), INT16_C( 1546), INT16_C( 26575), INT16_C( 3351), INT16_C( 3429), INT16_C( 8951), -INT16_C( 2524), -INT16_C( 31170), INT16_C( 742), -INT16_C( 12493), INT16_C( 9315), INT16_C( 31387), -INT16_C( 31965), -INT16_C( 26057), -INT16_C( 26223), -INT16_C( 12434), INT16_C( 30957), -INT16_C( 17195), -INT16_C( 4897), INT16_C( 17609), -INT16_C( 15879), INT16_C( 7782), -INT16_C( 23369), -INT16_C( 25180) }, UINT32_C( 174970791), { INT16_C( 2299), INT16_C( 8069), -INT16_C( 17268), INT16_C( 7609), INT16_C( 10325), INT16_C( 17132), -INT16_C( 15968), -INT16_C( 32513), -INT16_C( 14162), -INT16_C( 22588), INT16_C( 11145), INT16_C( 16837), INT16_C( 27087), INT16_C( 30430), INT16_C( 19264), INT16_C( 15489), INT16_C( 1620), -INT16_C( 8101), INT16_C( 5314), INT16_C( 6397), -INT16_C( 5572), -INT16_C( 8870), INT16_C( 22955), INT16_C( 22877), INT16_C( 8482), -INT16_C( 21759), -INT16_C( 14772), INT16_C( 7404), -INT16_C( 13520), INT16_C( 28818), INT16_C( 4886), INT16_C( 27308) }, { INT64_C( 6409827458447181593), -INT64_C( 3875467700365603278) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 13071), -INT16_C( 233), INT16_C( 0), INT16_C( 3703), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 3351), INT16_C( 0), INT16_C( 8951), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 12493), INT16_C( 0), INT16_C( 0), -INT16_C( 31965), INT16_C( 0), INT16_C( 0), -INT16_C( 12434), INT16_C( 30957), INT16_C( 0), -INT16_C( 4897), INT16_C( 0), -INT16_C( 15879), INT16_C( 7782), -INT16_C( 23369), -INT16_C( 25180) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi16(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m128i count = simde_mm_loadu_epi64(test_vec[i].count); simde__m512i r = simde_mm512_mask_sll_epi16(src, test_vec[i].k, a, count); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_sll_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask32 k; const int16_t a[32]; const int64_t count[2]; const int16_t r[32]; } test_vec[] = { { UINT32_C(3130761277), { INT16_C( 26780), INT16_C( 3232), INT16_C( 17838), -INT16_C( 22728), INT16_C( 24616), -INT16_C( 22729), INT16_C( 26223), -INT16_C( 9801), INT16_C( 7959), -INT16_C( 2390), INT16_C( 26413), -INT16_C( 19078), INT16_C( 2538), INT16_C( 10062), -INT16_C( 5719), INT16_C( 17889), -INT16_C( 32175), -INT16_C( 175), -INT16_C( 30265), -INT16_C( 4185), -INT16_C( 8471), INT16_C( 22934), INT16_C( 19781), INT16_C( 23602), -INT16_C( 9108), -INT16_C( 26285), -INT16_C( 12989), INT16_C( 11599), -INT16_C( 25130), INT16_C( 32596), INT16_C( 13958), -INT16_C( 10044) }, { -INT64_C( 8615809817898445384), -INT64_C( 4684285321072802723) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { UINT32_C( 861360624), { -INT16_C( 23009), -INT16_C( 2719), -INT16_C( 19133), -INT16_C( 13707), INT16_C( 14827), -INT16_C( 23646), INT16_C( 31055), -INT16_C( 4830), -INT16_C( 28424), INT16_C( 21877), INT16_C( 22164), -INT16_C( 6409), -INT16_C( 2710), INT16_C( 23204), -INT16_C( 1209), INT16_C( 26253), -INT16_C( 4446), -INT16_C( 6821), -INT16_C( 12124), -INT16_C( 28753), INT16_C( 20746), INT16_C( 22835), INT16_C( 21963), -INT16_C( 15546), -INT16_C( 17178), INT16_C( 31256), INT16_C( 3858), INT16_C( 31840), INT16_C( 1028), INT16_C( 19414), INT16_C( 25600), -INT16_C( 23887) }, { -INT64_C( 1763660777605624494), -INT64_C( 858350183371458168) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { UINT32_C(1433349699), { -INT16_C( 12483), INT16_C( 17106), -INT16_C( 22316), -INT16_C( 11123), INT16_C( 16140), INT16_C( 24438), -INT16_C( 692), INT16_C( 10581), -INT16_C( 9420), -INT16_C( 17135), INT16_C( 20884), -INT16_C( 23792), INT16_C( 10200), INT16_C( 7063), INT16_C( 1621), -INT16_C( 27791), INT16_C( 17366), -INT16_C( 21803), INT16_C( 25323), -INT16_C( 1922), -INT16_C( 2911), -INT16_C( 4777), -INT16_C( 21263), INT16_C( 9751), INT16_C( 10376), INT16_C( 7395), -INT16_C( 3207), INT16_C( 21183), INT16_C( 22298), INT16_C( 28781), -INT16_C( 8611), INT16_C( 13059) }, { -INT64_C( 2592565705582979039), -INT64_C( 8041212283578655665) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { UINT32_C(2192394760), { INT16_C( 27709), INT16_C( 22740), INT16_C( 16835), INT16_C( 8648), -INT16_C( 13536), INT16_C( 16724), INT16_C( 12963), -INT16_C( 8882), INT16_C( 21389), -INT16_C( 8775), -INT16_C( 31825), -INT16_C( 18402), -INT16_C( 31389), INT16_C( 27720), -INT16_C( 2609), INT16_C( 3310), -INT16_C( 15774), INT16_C( 9572), INT16_C( 11267), INT16_C( 9030), -INT16_C( 25609), -INT16_C( 26011), -INT16_C( 19507), INT16_C( 23160), INT16_C( 12551), -INT16_C( 18889), INT16_C( 21940), INT16_C( 6254), -INT16_C( 18470), -INT16_C( 22140), INT16_C( 29356), INT16_C( 3766) }, { INT64_C( 4493319499519629876), INT64_C( 8808286004375568405) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { UINT32_C( 925987970), { -INT16_C( 24630), -INT16_C( 23473), -INT16_C( 11434), INT16_C( 846), INT16_C( 1093), INT16_C( 30993), INT16_C( 17694), INT16_C( 26032), INT16_C( 3008), -INT16_C( 10844), INT16_C( 32203), INT16_C( 16312), -INT16_C( 2610), INT16_C( 20665), -INT16_C( 5527), INT16_C( 13191), -INT16_C( 10614), -INT16_C( 7976), INT16_C( 9897), -INT16_C( 4381), -INT16_C( 2774), INT16_C( 18535), INT16_C( 6202), -INT16_C( 1362), INT16_C( 21027), -INT16_C( 4144), -INT16_C( 30513), -INT16_C( 25298), -INT16_C( 6275), -INT16_C( 6419), INT16_C( 30162), INT16_C( 23578) }, { INT64_C( 4819731317782278731), -INT64_C( 8770135325163238635) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { UINT32_C(1500846730), { -INT16_C( 23646), INT16_C( 8182), -INT16_C( 7029), INT16_C( 23813), INT16_C( 8025), -INT16_C( 23367), -INT16_C( 2799), INT16_C( 10649), INT16_C( 32021), INT16_C( 10859), -INT16_C( 2360), INT16_C( 11130), -INT16_C( 15314), -INT16_C( 17999), INT16_C( 10206), -INT16_C( 32750), INT16_C( 2506), INT16_C( 21919), -INT16_C( 23315), INT16_C( 18098), INT16_C( 27588), -INT16_C( 10774), -INT16_C( 31647), INT16_C( 30463), INT16_C( 27137), -INT16_C( 13919), INT16_C( 7008), -INT16_C( 28684), -INT16_C( 23073), -INT16_C( 17080), INT16_C( 23244), -INT16_C( 26819) }, { INT64_C( 4942313014548028515), INT64_C( 495986420026605834) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { UINT32_C(3838804867), { -INT16_C( 15458), INT16_C( 32115), -INT16_C( 17560), INT16_C( 13626), INT16_C( 30485), INT16_C( 31180), -INT16_C( 18349), -INT16_C( 11319), INT16_C( 24663), INT16_C( 25112), INT16_C( 13025), -INT16_C( 6451), -INT16_C( 20661), -INT16_C( 12564), -INT16_C( 17614), -INT16_C( 12110), INT16_C( 9598), -INT16_C( 6579), -INT16_C( 30752), -INT16_C( 2533), -INT16_C( 6146), INT16_C( 20847), INT16_C( 14496), -INT16_C( 2267), INT16_C( 15768), INT16_C( 31065), INT16_C( 10095), -INT16_C( 17825), INT16_C( 19414), INT16_C( 2440), INT16_C( 15110), -INT16_C( 31527) }, { -INT64_C( 5965151098448959648), INT64_C( 8576300502038259310) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { UINT32_C(3488636768), { INT16_C( 20614), INT16_C( 23689), INT16_C( 4763), -INT16_C( 23963), INT16_C( 16205), -INT16_C( 21210), -INT16_C( 28314), INT16_C( 5358), INT16_C( 9496), -INT16_C( 31039), -INT16_C( 16181), -INT16_C( 21868), -INT16_C( 26141), INT16_C( 17441), INT16_C( 4600), INT16_C( 32275), -INT16_C( 25247), -INT16_C( 549), INT16_C( 16559), -INT16_C( 865), -INT16_C( 14977), -INT16_C( 6743), -INT16_C( 26537), INT16_C( 28666), -INT16_C( 17475), -INT16_C( 30219), -INT16_C( 30341), INT16_C( 24371), INT16_C( 21538), INT16_C( 7075), -INT16_C( 18843), -INT16_C( 14439) }, { INT64_C( 3818599163143418963), -INT64_C( 149159470413731800) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m128i count = simde_mm_loadu_epi64(test_vec[i].count); simde__m512i r = simde_mm512_maskz_sll_epi16(test_vec[i].k, a, count); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_sll_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[16]; const int64_t b[2]; const int32_t r[16]; } test_vec[] = { { { INT32_C( 288158111), -INT32_C( 1833315413), INT32_C( 1495932463), -INT32_C( 583370780), -INT32_C( 1745975803), -INT32_C( 1803847798), INT32_C( 311362271), -INT32_C( 1679574532), INT32_C( 1437339818), INT32_C( 317220323), INT32_C( 1751847555), -INT32_C( 1891260790), -INT32_C( 1239010516), -INT32_C( 1991597398), -INT32_C( 1600398940), INT32_C( 1027309458) }, { INT64_C( 25), INT64_C( 24) }, { INT32_C( 1040187392), INT32_C( 1442840576), INT32_C( 1577058304), -INT32_C( 939524096), INT32_C( 167772160), INT32_C( 335544320), -INT32_C( 1107296256), -INT32_C( 134217728), INT32_C( 1409286144), -INT32_C( 973078528), INT32_C( 100663296), INT32_C( 335544320), INT32_C( 1476395008), INT32_C( 1409286144), INT32_C( 1207959552), INT32_C( 603979776) } }, { { INT32_C( 1517800879), INT32_C( 333693295), INT32_C( 766738330), -INT32_C( 1922371842), INT32_C( 570424533), INT32_C( 82871159), INT32_C( 135997554), INT32_C( 1422508452), -INT32_C( 38911602), -INT32_C( 1659858686), INT32_C( 248169232), -INT32_C( 2036648783), -INT32_C( 1482188240), -INT32_C( 1867802595), INT32_C( 1687733952), INT32_C( 381182344) }, { INT64_C( 1), INT64_C( 14) }, { -INT32_C( 1259365538), INT32_C( 667386590), INT32_C( 1533476660), INT32_C( 450223612), INT32_C( 1140849066), INT32_C( 165742318), INT32_C( 271995108), -INT32_C( 1449950392), -INT32_C( 77823204), INT32_C( 975249924), INT32_C( 496338464), INT32_C( 221669730), INT32_C( 1330590816), INT32_C( 559362106), -INT32_C( 919499392), INT32_C( 762364688) } }, { { INT32_C( 1780008781), INT32_C( 536527711), INT32_C( 310678154), -INT32_C( 1775747852), -INT32_C( 1690616669), INT32_C( 1235451233), -INT32_C( 907887991), -INT32_C( 1170603411), INT32_C( 690246346), -INT32_C( 1622597867), -INT32_C( 1515074127), -INT32_C( 1405298167), INT32_C( 2001172246), INT32_C( 2126572533), INT32_C( 306750373), INT32_C( 1875673765) }, { INT64_C( 24), INT64_C( 5) }, { INT32_C( 1291845632), INT32_C( 1593835520), -INT32_C( 1979711488), -INT32_C( 201326592), -INT32_C( 1560281088), INT32_C( 1627389952), -INT32_C( 1996488704), INT32_C( 1828716544), -INT32_C( 905969664), INT32_C( 352321536), -INT32_C( 1325400064), INT32_C( 150994944), INT32_C( 369098752), -INT32_C( 184549376), -INT32_C( 1526726656), -INT32_C( 1526726656) } }, { { INT32_C( 262319130), INT32_C( 1032741783), -INT32_C( 1420831226), INT32_C( 739974232), INT32_C( 487961613), INT32_C( 1172217494), -INT32_C( 302168615), INT32_C( 51929832), -INT32_C( 1508721905), INT32_C( 618897438), -INT32_C( 825281674), INT32_C( 1559947855), INT32_C( 880349342), -INT32_C( 2022090834), -INT32_C( 2055899235), -INT32_C( 393703975) }, { INT64_C( 22), INT64_C( 28) }, { INT32_C( 109051904), -INT32_C( 440401920), -INT32_C( 2122317824), INT32_C( 369098752), INT32_C( 54525952), -INT32_C( 1518338048), INT32_C( 1983905792), -INT32_C( 1174405120), -INT32_C( 1010827264), INT32_C( 125829120), -INT32_C( 578813952), -INT32_C( 1816133632), INT32_C( 662700032), -INT32_C( 343932928), -INT32_C( 415236096), INT32_C( 1983905792) } }, { { INT32_C( 957860235), -INT32_C( 1094610655), -INT32_C( 515688952), INT32_C( 214617283), -INT32_C( 1569564313), INT32_C( 1901395403), -INT32_C( 1687825065), -INT32_C( 612452784), -INT32_C( 1609263489), INT32_C( 794744103), -INT32_C( 820993525), -INT32_C( 707011986), -INT32_C( 8958669), -INT32_C( 1586443190), -INT32_C( 247605855), INT32_C( 197966731) }, { INT64_C( 24), INT64_C( 24) }, { -INT32_C( 1962934272), INT32_C( 553648128), INT32_C( 134217728), -INT32_C( 1023410176), INT32_C( 1728053248), -INT32_C( 889192448), INT32_C( 1459617792), INT32_C( 1342177280), INT32_C( 2130706432), INT32_C( 654311424), INT32_C( 184549376), INT32_C( 1845493760), INT32_C( 855638016), INT32_C( 1241513984), -INT32_C( 1593835520), -INT32_C( 1962934272) } }, { { INT32_C( 63268537), -INT32_C( 744147662), -INT32_C( 1765481974), -INT32_C( 274624355), INT32_C( 661081201), INT32_C( 48762710), INT32_C( 1495038407), -INT32_C( 1658909724), -INT32_C( 1532894094), INT32_C( 611862041), -INT32_C( 977650648), INT32_C( 1052007373), -INT32_C( 26927961), -INT32_C( 234861269), -INT32_C( 1421140538), -INT32_C( 1706530008) }, { INT64_C( 10), INT64_C( 9) }, { INT32_C( 362472448), -INT32_C( 1797994496), INT32_C( 327690240), -INT32_C( 2042465280), -INT32_C( 1657682944), -INT32_C( 1606592512), INT32_C( 1910971392), INT32_C( 2083491840), -INT32_C( 2020489216), -INT32_C( 518495232), -INT32_C( 386883584), -INT32_C( 781241344), -INT32_C( 1804428288), INT32_C( 20229120), INT32_C( 746002432), INT32_C( 564961280) } }, { { INT32_C( 407331821), INT32_C( 1997162673), -INT32_C( 1927129499), INT32_C( 271084481), -INT32_C( 2072418476), INT32_C( 299566622), -INT32_C( 271386547), -INT32_C( 1220947766), -INT32_C( 439320524), INT32_C( 761060040), -INT32_C( 239370448), INT32_C( 1040376810), INT32_C( 1757576010), -INT32_C( 814113150), INT32_C( 1623149462), INT32_C( 1343813660) }, { INT64_C( 30), INT64_C( 10) }, { INT32_C( 1073741824), INT32_C( 1073741824), INT32_C( 1073741824), INT32_C( 1073741824), INT32_C( 0), INT32_MIN, INT32_C( 1073741824), INT32_MIN, INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN, INT32_C( 0) } }, { { -INT32_C( 457245854), INT32_C( 817051801), -INT32_C( 1617923453), -INT32_C( 470833046), INT32_C( 1394877584), -INT32_C( 968453450), -INT32_C( 926864708), INT32_C( 1931475473), -INT32_C( 1890066955), -INT32_C( 1715533291), -INT32_C( 415740035), -INT32_C( 2000017160), INT32_C( 47967820), INT32_C( 1036591489), INT32_C( 1560644172), INT32_C( 2043683972) }, { INT64_C( 21), INT64_C( 11) }, { INT32_C( 1816133632), INT32_C( 320864256), INT32_C( 1348468736), INT32_C( 222298112), -INT32_C( 1845493760), INT32_C( 1455423488), INT32_C( 1468006400), INT32_C( 1109393408), -INT32_C( 1096810496), INT32_C( 1117782016), -INT32_C( 274726912), INT32_C( 520093696), -INT32_C( 914358272), INT32_C( 807403520), INT32_C( 1233125376), -INT32_C( 1870659584) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_sll_epi32(a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_sll_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t src[16]; const simde__mmask16 k; const int32_t a[16]; const int64_t b[2]; const int32_t r[16]; } test_vec[] = { { { -INT32_C( 66481915), INT32_C( 651953117), INT32_C( 585778516), INT32_C( 874194668), -INT32_C( 357929301), INT32_C( 677909855), -INT32_C( 1595426413), INT32_C( 575516444), INT32_C( 488527424), -INT32_C( 1388053927), INT32_C( 735063359), -INT32_C( 10491053), -INT32_C( 1192687271), -INT32_C( 1293921762), INT32_C( 659736331), -INT32_C( 1219911561) }, UINT16_C(26614), { INT32_C( 409030869), -INT32_C( 851009283), INT32_C( 716741067), -INT32_C( 2110582376), -INT32_C( 1445702967), INT32_C( 1450303236), INT32_C( 1358358534), INT32_C( 1958210975), INT32_C( 982325565), -INT32_C( 2063019079), INT32_C( 95404140), -INT32_C( 1685593134), INT32_C( 960846645), INT32_C( 1083160122), INT32_C( 1032881822), -INT32_C( 1330493325) }, { INT64_C( 15), INT64_C( 12) }, { -INT32_C( 66481915), INT32_C( 1350467584), INT32_C( 1290108928), INT32_C( 874194668), INT32_C( 694452224), -INT32_C( 276692992), INT32_C( 1946353664), -INT32_C( 154173440), -INT32_C( 1935769600), INT32_C( 1776058368), -INT32_C( 533331968), -INT32_C( 10491053), -INT32_C( 1192687271), -INT32_C( 618856448), INT32_C( 1129250816), -INT32_C( 1219911561) } }, { { -INT32_C( 1616878235), -INT32_C( 1176490213), INT32_C( 687304885), INT32_C( 433629624), -INT32_C( 97205272), INT32_C( 1752748981), INT32_C( 1424882722), INT32_C( 1824198150), -INT32_C( 1895081101), INT32_C( 1078520715), INT32_C( 342441820), -INT32_C( 802274840), -INT32_C( 1161141756), INT32_C( 1478640437), INT32_C( 1168904255), -INT32_C( 1850644450) }, UINT16_C(48579), { INT32_C( 1772637728), -INT32_C( 123206513), INT32_C( 1194955033), INT32_C( 749289057), INT32_C( 443539192), -INT32_C( 483742154), -INT32_C( 1521792781), -INT32_C( 94236966), -INT32_C( 329053603), INT32_C( 686033935), -INT32_C( 26272355), INT32_C( 1395267675), INT32_C( 762223094), INT32_C( 990943303), -INT32_C( 1142924319), -INT32_C( 961199511) }, { INT64_C( 24), INT64_C( 0) }, { INT32_C( 536870912), -INT32_C( 1895825408), INT32_C( 687304885), INT32_C( 433629624), -INT32_C( 97205272), INT32_C( 1752748981), -INT32_C( 218103808), -INT32_C( 637534208), INT32_C( 1560281088), INT32_C( 1078520715), -INT32_C( 1660944384), INT32_C( 1526726656), -INT32_C( 167772160), INT32_C( 1191182336), INT32_C( 1168904255), INT32_C( 1761607680) } }, { { -INT32_C( 869346940), INT32_C( 1241988713), INT32_C( 33941401), INT32_C( 1976154921), -INT32_C( 103711788), -INT32_C( 960801774), INT32_C( 1372945223), -INT32_C( 346933146), INT32_C( 1941405705), INT32_C( 1472052926), -INT32_C( 832912475), INT32_C( 1380131710), -INT32_C( 1337256802), -INT32_C( 1334442391), -INT32_C( 402568063), -INT32_C( 1898752892) }, UINT16_C(35540), { -INT32_C( 1102540031), INT32_C( 1149365738), -INT32_C( 26738757), -INT32_C( 1676474799), INT32_C( 765623478), INT32_C( 762913836), -INT32_C( 545129204), -INT32_C( 1939253621), -INT32_C( 750013975), INT32_C( 1494797470), INT32_C( 492273612), INT32_C( 1018849925), INT32_C( 308894950), -INT32_C( 1941904768), INT32_C( 1802224095), INT32_C( 16241687) }, { INT64_C( 27), INT64_C( 4) }, { -INT32_C( 869346940), INT32_C( 1241988713), -INT32_C( 671088640), INT32_C( 1976154921), -INT32_C( 1342177280), -INT32_C( 960801774), INT32_C( 1610612736), INT32_C( 1476395008), INT32_C( 1941405705), -INT32_C( 268435456), -INT32_C( 832912475), INT32_C( 671088640), -INT32_C( 1337256802), -INT32_C( 1334442391), -INT32_C( 402568063), -INT32_C( 1207959552) } }, { { -INT32_C( 1892182513), INT32_C( 1461483384), INT32_C( 1354925881), -INT32_C( 514737572), INT32_C( 184886780), INT32_C( 2095481105), -INT32_C( 1804738731), -INT32_C( 1598449007), -INT32_C( 1473187792), -INT32_C( 1593815960), INT32_C( 804373203), INT32_C( 2031174268), INT32_C( 2021922407), -INT32_C( 302683241), -INT32_C( 612277686), INT32_C( 763116285) }, UINT16_C(44074), { -INT32_C( 688352554), INT32_C( 630770483), INT32_C( 208082427), INT32_C( 304271246), INT32_C( 1014872391), INT32_C( 664782758), INT32_C( 526490787), -INT32_C( 1614050103), INT32_C( 1383449374), -INT32_C( 1988686194), -INT32_C( 1315578333), -INT32_C( 171722835), INT32_C( 389103985), -INT32_C( 1421881336), INT32_C( 919249004), INT32_C( 1272288556) }, { INT64_C( 15), INT64_C( 5) }, { -INT32_C( 1892182513), INT32_C( 1704558592), INT32_C( 1354925881), INT32_C( 1741094912), INT32_C( 184886780), -INT32_C( 472711168), -INT32_C( 1804738731), -INT32_C( 1598449007), -INT32_C( 1473187792), -INT32_C( 1593815960), -INT32_C( 284065792), -INT32_C( 606699520), INT32_C( 2021922407), -INT32_C( 402391040), -INT32_C( 612277686), -INT32_C( 896139264) } }, { { INT32_C( 977716785), INT32_C( 65373591), INT32_C( 1379512357), -INT32_C( 1633874107), -INT32_C( 1283114406), INT32_C( 1076884814), -INT32_C( 1176478469), INT32_C( 2129098060), -INT32_C( 742904516), -INT32_C( 657023566), -INT32_C( 1825959859), INT32_C( 2033305375), INT32_C( 1328330241), -INT32_C( 1483777109), INT32_C( 56651959), INT32_C( 562120677) }, UINT16_C(14709), { -INT32_C( 875091980), INT32_C( 702227711), -INT32_C( 386860361), -INT32_C( 1616973453), -INT32_C( 788903360), INT32_C( 1363194353), INT32_C( 915940788), INT32_C( 997133639), INT32_C( 872826421), INT32_C( 576643435), INT32_C( 1309363931), -INT32_C( 2131908288), -INT32_C( 464459789), INT32_C( 1295356056), -INT32_C( 41693514), INT32_C( 272167643) }, { INT64_C( 31), INT64_C( 13) }, { INT32_C( 0), INT32_C( 65373591), INT32_MIN, -INT32_C( 1633874107), INT32_C( 0), INT32_MIN, INT32_C( 0), INT32_C( 2129098060), INT32_MIN, -INT32_C( 657023566), -INT32_C( 1825959859), INT32_C( 0), INT32_MIN, INT32_C( 0), INT32_C( 56651959), INT32_C( 562120677) } }, { { -INT32_C( 1202519521), INT32_C( 1225099411), INT32_C( 843483222), -INT32_C( 1287487878), -INT32_C( 564688963), INT32_C( 484056618), INT32_C( 1783440623), INT32_C( 2094661468), -INT32_C( 1791742974), INT32_C( 199113140), INT32_C( 993862849), INT32_C( 1626308514), INT32_C( 826164743), INT32_C( 1414338660), -INT32_C( 1715561668), -INT32_C( 1676306534) }, UINT16_C(18883), { INT32_C( 260208689), -INT32_C( 1070382205), INT32_C( 1832900222), INT32_C( 1957971510), -INT32_C( 980674440), INT32_C( 727763052), INT32_C( 1992489825), -INT32_C( 910195049), -INT32_C( 2116533762), INT32_C( 54594692), INT32_C( 410026210), INT32_C( 1066149063), -INT32_C( 1459349443), INT32_C( 1121215968), -INT32_C( 138897568), INT32_C( 465598493) }, { INT64_C( 16), INT64_C( 2) }, { INT32_C( 2016477184), INT32_C( 1132658688), INT32_C( 843483222), -INT32_C( 1287487878), -INT32_C( 564688963), INT32_C( 484056618), -INT32_C( 77529088), -INT32_C( 2036924416), INT32_C( 1107165184), INT32_C( 199113140), INT32_C( 993862849), INT32_C( 617021440), INT32_C( 826164743), INT32_C( 1414338660), -INT32_C( 1755316224), -INT32_C( 1676306534) } }, { { INT32_C( 599550019), INT32_C( 761631181), INT32_C( 1159994920), INT32_C( 1331750294), INT32_C( 596507774), INT32_C( 917163737), INT32_C( 1448823168), -INT32_C( 1217806732), -INT32_C( 203807450), -INT32_C( 568311626), -INT32_C( 199015074), -INT32_C( 1471970518), INT32_C( 1489752447), INT32_C( 529495455), INT32_C( 846588606), -INT32_C( 806756696) }, UINT16_C(49924), { -INT32_C( 486294846), -INT32_C( 1138204263), -INT32_C( 1723837867), -INT32_C( 982859782), -INT32_C( 1489368808), -INT32_C( 1634600919), INT32_C( 326973738), -INT32_C( 875134712), -INT32_C( 726738373), -INT32_C( 1852713413), INT32_C( 573231400), -INT32_C( 1461218160), -INT32_C( 967892579), INT32_C( 1130749977), INT32_C( 576119322), -INT32_C( 1628623773) }, { INT64_C( 19), INT64_C( 5) }, { INT32_C( 599550019), INT32_C( 761631181), -INT32_C( 1834483712), INT32_C( 1331750294), INT32_C( 596507774), INT32_C( 917163737), INT32_C( 1448823168), -INT32_C( 1217806732), -INT32_C( 774373376), -INT32_C( 1311244288), -INT32_C( 199015074), -INT32_C( 1471970518), INT32_C( 1489752447), INT32_C( 529495455), INT32_C( 282066944), INT32_C( 1662517248) } }, { { -INT32_C( 587899453), INT32_C( 1478449726), -INT32_C( 1619364548), -INT32_C( 1472370526), INT32_C( 1978314755), -INT32_C( 1995522636), INT32_C( 1274006202), INT32_C( 813366636), -INT32_C( 1744015526), INT32_C( 99626185), INT32_C( 1134848929), -INT32_C( 689118765), INT32_C( 1179375250), INT32_C( 1322277524), -INT32_C( 1868906716), INT32_C( 381686972) }, UINT16_C(52611), { -INT32_C( 1611051857), -INT32_C( 183854511), -INT32_C( 891888162), -INT32_C( 6264652), INT32_C( 2136552623), INT32_C( 490634627), -INT32_C( 835585522), -INT32_C( 1080314864), INT32_C( 1365218304), INT32_C( 222718255), -INT32_C( 69788601), INT32_C( 888829829), INT32_C( 800281772), -INT32_C( 548605487), -INT32_C( 72450581), INT32_C( 834357553) }, { INT64_C( 25), INT64_C( 2) }, { INT32_C( 1577058304), -INT32_C( 1577058304), -INT32_C( 1619364548), -INT32_C( 1472370526), INT32_C( 1978314755), -INT32_C( 1995522636), INT32_C( 1274006202), INT32_C( 536870912), INT32_C( 0), INT32_C( 99626185), -INT32_C( 1912602624), INT32_C( 167772160), INT32_C( 1179375250), INT32_C( 1322277524), -INT32_C( 704643072), INT32_C( 1644167168) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi32(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_mask_sll_epi32(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_sll_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask16 k; const int32_t a[16]; const int64_t b[2]; const int32_t r[16]; } test_vec[] = { { UINT16_C(19274), { INT32_C( 1987879395), -INT32_C( 743556432), INT32_C( 119987798), INT32_C( 1067671101), INT32_C( 1066290845), INT32_C( 1149348051), -INT32_C( 1149162198), -INT32_C( 1845048402), INT32_C( 487097197), INT32_C( 334542781), -INT32_C( 753265003), INT32_C( 303218293), -INT32_C( 548298484), INT32_C( 1948504905), -INT32_C( 1372609536), -INT32_C( 599771537) }, { INT64_C( 18), INT64_C( 13) }, { INT32_C( 0), -INT32_C( 356515840), INT32_C( 0), INT32_C( 2029256704), INT32_C( 0), INT32_C( 0), -INT32_C( 1264058368), INT32_C( 0), INT32_C( 229900288), -INT32_C( 554434560), INT32_C( 0), -INT32_C( 103546880), INT32_C( 0), INT32_C( 0), -INT32_C( 1879048192), INT32_C( 0) } }, { UINT16_C(55836), { INT32_C( 1873634636), INT32_C( 152284633), -INT32_C( 1673559205), INT32_C( 1508243551), -INT32_C( 146479762), -INT32_C( 476428934), INT32_C( 1130298555), -INT32_C( 1239582103), -INT32_C( 1188705569), -INT32_C( 742246025), INT32_C( 460259772), INT32_C( 1735742713), -INT32_C( 1285637831), -INT32_C( 1181288194), INT32_C( 1593636084), INT32_C( 1309940334) }, { INT64_C( 5), INT64_C( 5) }, { INT32_C( 0), INT32_C( 0), -INT32_C( 2014287008), INT32_C( 1019153376), -INT32_C( 392385088), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 2017930976), INT32_C( 0), -INT32_C( 290808032), INT32_C( 1809262368), INT32_C( 0), -INT32_C( 543252864), -INT32_C( 1031582272) } }, { UINT16_C(34936), { INT32_C( 1971353310), INT32_C( 728331567), -INT32_C( 414852909), INT32_C( 757082662), -INT32_C( 1242131578), INT32_C( 190038209), -INT32_C( 1379252861), INT32_C( 1312113264), -INT32_C( 406604360), -INT32_C( 49074902), INT32_C( 736385029), INT32_C( 139986306), INT32_C( 1505578648), -INT32_C( 1855647730), -INT32_C( 985780395), INT32_C( 705983346) }, { INT64_C( 1), INT64_C( 26) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1514165324), INT32_C( 1810704140), INT32_C( 380076418), INT32_C( 1536461574), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 279972612), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1411966692) } }, { UINT16_C(50906), { -INT32_C( 1864964053), -INT32_C( 1212142471), INT32_C( 86651633), INT32_C( 1859998556), -INT32_C( 158080602), INT32_C( 522916331), INT32_C( 430728465), INT32_C( 1675593271), -INT32_C( 386681233), -INT32_C( 744442910), INT32_C( 1121569509), -INT32_C( 1011829219), -INT32_C( 2101721961), INT32_C( 1721951573), -INT32_C( 2105586101), INT32_C( 1139105748) }, { INT64_C( 14), INT64_C( 30) }, { INT32_C( 0), INT32_C( 186531840), INT32_C( 0), INT32_C( 1423376384), -INT32_C( 127303680), INT32_C( 0), INT32_C( 423903232), -INT32_C( 510803968), INT32_C( 0), INT32_C( 754483200), INT32_C( 1924743168), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 745357312), INT32_C( 1475674112) } }, { UINT16_C(12712), { INT32_C( 353893747), -INT32_C( 480026013), -INT32_C( 901629724), -INT32_C( 1482467461), INT32_C( 410201934), INT32_C( 1438386849), INT32_C( 901387710), INT32_C( 2053595654), -INT32_C( 1014006176), -INT32_C( 995691552), -INT32_C( 1500583893), -INT32_C( 1924255425), INT32_C( 44482913), -INT32_C( 1990696245), INT32_C( 2126451319), -INT32_C( 721934732) }, { INT64_C( 20), INT64_C( 13) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 2008023040), INT32_C( 0), -INT32_C( 1441792000), INT32_C( 0), INT32_C( 543162368), -INT32_C( 436207616), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 370147328), INT32_C( 749731840), INT32_C( 0), INT32_C( 0) } }, { UINT16_C(39515), { -INT32_C( 990107796), INT32_C( 1876325296), -INT32_C( 376091919), -INT32_C( 1217316577), INT32_C( 116779965), -INT32_C( 572711791), INT32_C( 2004346243), -INT32_C( 1156459953), -INT32_C( 1434513927), INT32_C( 1914262912), -INT32_C( 1101287521), -INT32_C( 1502229272), INT32_C( 1236036536), INT32_C( 170297735), -INT32_C( 1115579026), INT32_C( 1769509487) }, { INT64_C( 29), INT64_C( 14) }, { INT32_MIN, INT32_C( 0), INT32_C( 0), -INT32_C( 536870912), -INT32_C( 1610612736), INT32_C( 0), INT32_C( 1610612736), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 536870912) } }, { UINT16_C( 5771), { -INT32_C( 509668677), -INT32_C( 1635906275), INT32_C( 1127345611), INT32_C( 1765527638), -INT32_C( 2104064016), INT32_C( 510685555), -INT32_C( 1623315915), INT32_C( 1471531420), -INT32_C( 130525989), INT32_C( 764917346), -INT32_C( 93271901), -INT32_C( 1989956712), -INT32_C( 1509164749), -INT32_C( 1597736085), INT32_C( 1094714021), -INT32_C( 1483147829) }, { INT64_C( 5), INT64_C( 5) }, { INT32_C( 870471520), -INT32_C( 809393248), INT32_C( 0), INT32_C( 662309568), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 155634816), INT32_C( 0), -INT32_C( 1292448704), INT32_C( 1310266464), INT32_C( 0), -INT32_C( 1048631712), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { UINT16_C(11137), { -INT32_C( 1817711410), -INT32_C( 862630772), INT32_C( 650273166), -INT32_C( 1476981752), -INT32_C( 1847689800), -INT32_C( 874849113), -INT32_C( 1823223949), -INT32_C( 960506633), INT32_C( 1280927424), INT32_C( 1075441330), INT32_C( 1466424143), -INT32_C( 1610653977), -INT32_C( 2093949477), INT32_C( 760089273), -INT32_C( 775904806), INT32_C( 899121013) }, { INT64_C( 5), INT64_C( 15) }, { INT32_C( 1962777024), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 671441184), -INT32_C( 1959995392), INT32_C( 54384192), INT32_C( 0), -INT32_C( 1319712), INT32_C( 0), -INT32_C( 1446947040), INT32_C( 0), INT32_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_maskz_sll_epi32(test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_sll_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t a[8]; const int64_t b[2]; const int64_t r[8]; } test_vec[] = { { { -INT64_C( 3043210905362980970), -INT64_C( 8566001345715781385), INT64_C( 8038474297071378046), -INT64_C( 4267062589809021897), INT64_C( 6240767933180872696), -INT64_C( 6549473941646625943), -INT64_C( 4256242992551417930), INT64_C( 5973814186616553973) }, { INT64_C( 55), INT64_C( 48) }, { -INT64_C( 3819052484010180608), INT64_C( 8899112863684100096), INT64_C( 4539628424389459968), INT64_C( 1981583836043018240), -INT64_C( 288230376151711744), -INT64_C( 5440348349863559168), -INT64_C( 2666130979403333632), -INT64_C( 396316767208603648) } }, { { INT64_C( 3099936928095694261), INT64_C( 422834507516640561), INT64_C( 1393766964600239874), INT64_C( 931634168761272604), -INT64_C( 7952420843205873855), INT64_C( 2996295799414846160), INT64_C( 265987151192211442), INT64_C( 7432375506683384258) }, { INT64_C( 37), INT64_C( 17) }, { INT64_C( 2904178133034860544), INT64_C( 7624495350530703360), INT64_C( 1548921887344558080), INT64_C( 3763077996307546112), INT64_C( 695216941635207168), -INT64_C( 4341934034692079616), -INT64_C( 5465120071959445504), INT64_C( 2714043572973207552) } }, { { INT64_C( 2514015061285169144), INT64_C( 8318536819918246116), INT64_C( 5129979112103694548), INT64_C( 524854139031869104), -INT64_C( 6732095577990419953), INT64_C( 321757986159382234), -INT64_C( 7167813143326976915), INT64_C( 2395043167232551205) }, { INT64_C( 49), INT64_C( 41) }, { -INT64_C( 3463268113447911424), INT64_C( 2434195598593753088), -INT64_C( 6798183637515763712), -INT64_C( 5953758707383795712), INT64_C( 6061282148487266304), -INT64_C( 7083036313946947584), INT64_C( 2943665306440040448), -INT64_C( 3293820177468096512) } }, { { -INT64_C( 5161004695847189545), -INT64_C( 501543431971209257), -INT64_C( 5764824409340077237), -INT64_C( 2001300220254565801), INT64_C( 7278388255503360183), INT64_C( 7126761795142741511), INT64_C( 4711498488697565172), -INT64_C( 2425737990017699227) }, { INT64_C( 3), INT64_C( 46) }, { -INT64_C( 4394549419358413128), -INT64_C( 4012347455769674056), INT64_C( 9221636946408036952), INT64_C( 2436342311673025208), INT64_C( 2886873822898226616), INT64_C( 1673862140013277240), INT64_C( 798499762161418144), -INT64_C( 959159846432042200) } }, { { INT64_C( 3223398413973138832), -INT64_C( 4105606542351495679), INT64_C( 8071473455876419058), INT64_C( 7967204048965205264), -INT64_C( 3579618652289696162), -INT64_C( 2912707569573760719), -INT64_C( 3895383087310301655), INT64_C( 2313497541479534798) }, { INT64_C( 54), INT64_C( 57) }, { INT64_C( 7205759403792793600), -INT64_C( 9205357638345293824), INT64_C( 8971170457722028032), INT64_C( 4899916394579099648), -INT64_C( 7530018576963469312), INT64_C( 5494391545392005120), -INT64_C( 8484781697966014464), INT64_C( 3710966092953288704) } }, { { -INT64_C( 5345957587203975858), -INT64_C( 3329993415690457113), INT64_C( 3602768269637717888), INT64_C( 3026672782606902364), -INT64_C( 6442679917796628485), -INT64_C( 729274773593393041), -INT64_C( 6413095861259292633), -INT64_C( 9023593494483984193) }, { INT64_C( 25), INT64_C( 7) }, { -INT64_C( 3743390507850530816), INT64_C( 4851154015121047552), INT64_C( 5642900515076440064), -INT64_C( 5173156816384688128), INT64_C( 9206589984553828352), -INT64_C( 1576084012714164224), -INT64_C( 7665626429949739008), INT64_C( 952374163522191360) } }, { { INT64_C( 6123186393984104711), -INT64_C( 6587159339739334003), -INT64_C( 1727381194576954965), -INT64_C( 2590221981096837639), INT64_C( 8363509859251845671), INT64_C( 7061958359767319009), -INT64_C( 6777061297406898921), INT64_C( 1817377213416293511) }, { INT64_C( 27), INT64_C( 44) }, { -INT64_C( 6154331858101338112), -INT64_C( 2837402092895731712), -INT64_C( 6553749226088562688), -INT64_C( 6378640856341020672), INT64_C( 4458976846544896000), -INT64_C( 5556245606965444608), INT64_C( 4636487052467109888), -INT64_C( 7862007161319063552) } }, { { -INT64_C( 7592030972611328935), INT64_C( 7710209647364420852), -INT64_C( 6517867833321531760), INT64_C( 8838952131357791582), INT64_C( 931152308542859080), -INT64_C( 7453191266487837102), -INT64_C( 4960081269175780152), -INT64_C( 418449168689500522) }, { INT64_C( 7), INT64_C( 37) }, { INT64_C( 5897471412356131968), -INT64_C( 9217345117669918208), -INT64_C( 4183599348226242560), INT64_C( 6134484317514673920), INT64_C( 8507031051228652544), INT64_C( 5222209722453534976), -INT64_C( 7701103948375104512), INT64_C( 1778738628872588032) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_sll_epi64(a, b); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_sll_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t src[8]; const simde__mmask8 k; const int64_t a[8]; const int64_t b[2]; const int64_t r[8]; } test_vec[] = { { { INT64_C( 8844407328334556588), -INT64_C( 6468868398122554644), INT64_C( 7919920011796737235), -INT64_C( 110988753495623940), INT64_C( 4968237128581166045), -INT64_C( 2053179562796574108), INT64_C( 6433626666797115536), -INT64_C( 6081296775066477237) }, UINT8_C(151), { INT64_C( 3437429681747403670), INT64_C( 4095217988934799264), INT64_C( 1526702650967896079), -INT64_C( 226712261368755362), -INT64_C( 6992400277197501775), INT64_C( 7482933471671956879), -INT64_C( 4866567801566171544), -INT64_C( 7446462394794026682) }, { INT64_C( 33), INT64_C( 16) }, { -INT64_C( 6782342784515833856), -INT64_C( 8611129602770731008), -INT64_C( 3950395616168771584), -INT64_C( 110988753495623940), -INT64_C( 8315450988823576576), -INT64_C( 2053179562796574108), INT64_C( 6433626666797115536), INT64_C( 6484816827825258496) } }, { { INT64_C( 6234698294050919788), INT64_C( 140080385622181234), INT64_C( 3322217971163811999), -INT64_C( 6330834859143905530), INT64_C( 6005261897915615216), -INT64_C( 6710752655294244738), INT64_C( 7692519130933985774), INT64_C( 2194782635921494672) }, UINT8_C( 55), { -INT64_C( 3648079019115012376), -INT64_C( 7159189469750346728), -INT64_C( 3707469667518826326), -INT64_C( 5233999913943410249), -INT64_C( 470072137464908490), INT64_C( 7488174172224375092), INT64_C( 1105052447113735310), -INT64_C( 7075274418578698423) }, { INT64_C( 36), INT64_C( 30) }, { INT64_C( 1261551604163674112), INT64_C( 9008256435171098624), -INT64_C( 4606185574070484992), -INT64_C( 6330834859143905530), -INT64_C( 5077998383054979072), -INT64_C( 359000167195607040), INT64_C( 7692519130933985774), INT64_C( 2194782635921494672) } }, { { -INT64_C( 3882008865968975175), -INT64_C( 1033099025019939164), -INT64_C( 4681000655360626152), INT64_C( 8193093049506065233), -INT64_C( 5938942746147179704), INT64_C( 4743524235269994489), -INT64_C( 4699575012095905964), INT64_C( 5680917119143333804) }, UINT8_C( 96), { INT64_C( 2591283567823231065), INT64_C( 6594833000054970575), -INT64_C( 6578765831402386107), INT64_C( 5083992152416524160), -INT64_C( 2531241738697968113), -INT64_C( 208973486024217839), -INT64_C( 1650086124891872736), -INT64_C( 3890367105679834162) }, { INT64_C( 36), INT64_C( 16) }, { -INT64_C( 3882008865968975175), -INT64_C( 1033099025019939164), -INT64_C( 4681000655360626152), INT64_C( 8193093049506065233), -INT64_C( 5938942746147179704), -INT64_C( 2246538581826863104), INT64_C( 5786599954613010432), INT64_C( 5680917119143333804) } }, { { -INT64_C( 3414623508423488695), -INT64_C( 1217784623362428267), -INT64_C( 7574184836662452268), -INT64_C( 2158307683753578073), INT64_C( 3189727863122478449), INT64_C( 3758418125259526371), INT64_C( 8993524444907945524), INT64_C( 5122091226845589403) }, UINT8_C( 23), { -INT64_C( 3477382549134722390), -INT64_C( 2513182177820215722), -INT64_C( 2887318460848514583), -INT64_C( 2170747974958898571), -INT64_C( 4720764818892747346), -INT64_C( 689956682324137360), -INT64_C( 4458220251510071775), INT64_C( 8174994033010724442) }, { INT64_C( 6), INT64_C( 43) }, { -INT64_C( 1191554260107613568), INT64_C( 5177037282892158336), -INT64_C( 320940757209417152), -INT64_C( 2158307683753578073), -INT64_C( 6981043229783004288), INT64_C( 3758418125259526371), INT64_C( 8993524444907945524), INT64_C( 5122091226845589403) } }, { { INT64_C( 2684738884427420210), INT64_C( 3178743322012798185), INT64_C( 1809505683625360218), INT64_C( 768624430915765356), -INT64_C( 6326205360360479931), INT64_C( 8141225094183971737), -INT64_C( 7378427618179971668), -INT64_C( 201186747133786797) }, UINT8_C(225), { INT64_C( 3253366127696219584), INT64_C( 8554785335769560475), -INT64_C( 2807928582899091162), INT64_C( 2949171008047821775), INT64_C( 8073789563819363740), INT64_C( 2826067047849152209), -INT64_C( 1937422955413972370), -INT64_C( 2690655322121327081) }, { INT64_C( 2), INT64_C( 52) }, { -INT64_C( 5433279562924673280), INT64_C( 3178743322012798185), INT64_C( 1809505683625360218), INT64_C( 768624430915765356), -INT64_C( 6326205360360479931), -INT64_C( 7142475882312942780), -INT64_C( 7749691821655889480), INT64_C( 7684122785224243292) } }, { { INT64_C( 1214335800134800863), INT64_C( 1726065765214309597), -INT64_C( 9162282690612319083), -INT64_C( 8401522641993554379), -INT64_C( 8725342017996948768), INT64_C( 5037722244871190823), INT64_C( 9021533900040847426), INT64_C( 8127445436518941844) }, UINT8_C(246), { -INT64_C( 5995093525679551925), -INT64_C( 3990292038261301305), INT64_C( 5132008467138171062), -INT64_C( 8280633990523160669), -INT64_C( 6669548624957688431), INT64_C( 2877693995247521439), INT64_C( 3654956487228981488), INT64_C( 7586989603334230139) }, { INT64_C( 7), INT64_C( 10) }, { INT64_C( 1214335800134800863), INT64_C( 5751453166420878208), -INT64_C( 7185702859857962240), -INT64_C( 8401522641993554379), -INT64_C( 5151996603944744832), -INT64_C( 590050082508288128), INT64_C( 6665828522570840064), -INT64_C( 6542766679824777856) } }, { { INT64_C( 5905441591289369139), -INT64_C( 4095644891431942680), INT64_C( 7380937961405110485), INT64_C( 1611811354841625079), INT64_C( 4271325957586484255), -INT64_C( 9114118322039073775), -INT64_C( 7576389794988342147), -INT64_C( 8148095368328371726) }, UINT8_C( 34), { INT64_C( 2207803725923648453), INT64_C( 2904624830350057645), -INT64_C( 4515881587224099808), -INT64_C( 2634679353523361242), INT64_C( 4839417511209198913), INT64_C( 7024881225514218608), -INT64_C( 3996638305905916888), -INT64_C( 5245986071642851264) }, { INT64_C( 7), INT64_C( 57) }, { INT64_C( 5905441591289369139), INT64_C( 2857096810616346240), INT64_C( 7380937961405110485), INT64_C( 1611811354841625079), INT64_C( 4271325957586484255), -INT64_C( 4705662745948047360), -INT64_C( 7576389794988342147), -INT64_C( 8148095368328371726) } }, { { -INT64_C( 7233002708445346763), -INT64_C( 6909888283422837522), -INT64_C( 6227385604361953852), INT64_C( 3376376977820354361), -INT64_C( 6014656537924630384), INT64_C( 7147261668170496345), -INT64_C( 1550246859726401782), INT64_C( 2492948631282271858) }, UINT8_C( 67), { INT64_C( 1052908368995356318), INT64_C( 3239513502534937791), -INT64_C( 8447688280260097817), INT64_C( 7252835846219116200), INT64_C( 1652895107988826445), -INT64_C( 7711860561054425744), INT64_C( 5156245123832928191), INT64_C( 3963822889188961239) }, { INT64_C( 23), INT64_C( 8) }, { INT64_C( 5377720748691423232), INT64_C( 3383846636374458368), -INT64_C( 6227385604361953852), INT64_C( 3376376977820354361), -INT64_C( 6014656537924630384), INT64_C( 7147261668170496345), -INT64_C( 3494103453737680896), INT64_C( 2492948631282271858) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi64(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_mask_sll_epi64(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_sll_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const int64_t a[8]; const int64_t b[2]; const int64_t r[8]; } test_vec[] = { { UINT8_C(249), { -INT64_C( 2427428339764267774), -INT64_C( 4325021648514947492), -INT64_C( 6293168853304433046), INT64_C( 7598455919355618041), INT64_C( 8881084306305521048), INT64_C( 9139553048861713498), INT64_C( 4092764080299905758), INT64_C( 3322853429276209997) }, { INT64_C( 3), INT64_C( 9) }, { -INT64_C( 972682644404590576), INT64_C( 0), INT64_C( 0), INT64_C( 5447415133716289480), -INT64_C( 2738301844394038080), -INT64_C( 670551903944498480), -INT64_C( 4151375505019857168), INT64_C( 8136083360500128360) } }, { UINT8_C( 10), { -INT64_C( 8466906837822125114), INT64_C( 1194104057701151539), -INT64_C( 3376906149356639265), INT64_C( 8427152646742977010), -INT64_C( 1139650684260041587), INT64_C( 9203346217789575256), INT64_C( 4491865967930801093), INT64_C( 4691936849519211002) }, { INT64_C( 16), INT64_C( 59) }, { INT64_C( 0), INT64_C( 5715164826749304832), INT64_C( 0), INT64_C( 4805034157475495936), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0) } }, { UINT8_C( 27), { INT64_C( 4322513128922837937), -INT64_C( 8916509603300137778), -INT64_C( 6011834511385667817), -INT64_C( 3868377566883244652), INT64_C( 3259700536392071223), INT64_C( 8409163185970949229), INT64_C( 6969938125797728176), -INT64_C( 8526750800371413050) }, { INT64_C( 43), INT64_C( 18) }, { INT64_C( 4872199905466122240), INT64_C( 9018018449158307840), INT64_C( 0), -INT64_C( 6004248281331269632), INT64_C( 7332343978475388928), INT64_C( 0), INT64_C( 0), INT64_C( 0) } }, { UINT8_C(173), { -INT64_C( 6891676135214983324), INT64_C( 6624932376259738478), INT64_C( 6106958149877165062), INT64_C( 4060898676588679201), INT64_C( 2034828436589892748), -INT64_C( 5574224145695100828), -INT64_C( 6434165551018262708), INT64_C( 7160354895656493393) }, { INT64_C( 46), INT64_C( 27) }, { INT64_C( 6185975563170086912), INT64_C( 0), -INT64_C( 4971551776151961600), INT64_C( 290552544709574656), INT64_C( 0), INT64_C( 871728002872901632), INT64_C( 0), INT64_C( 7517704046732378112) } }, { UINT8_C(229), { INT64_C( 2656580082611178829), -INT64_C( 8242890917586205594), -INT64_C( 2518468635301589409), INT64_C( 4694838733885482908), INT64_C( 639139111563596087), -INT64_C( 7520806632001906255), -INT64_C( 5078862884151917943), INT64_C( 2566245761238663784) }, { INT64_C( 50), INT64_C( 48) }, { INT64_C( 3833689182799134720), INT64_C( 0), INT64_C( 4142185757274013696), INT64_C( 0), INT64_C( 0), INT64_C( 7405043687303938048), INT64_C( 730709039540862976), INT64_C( 693554342615056384) } }, { UINT8_C( 0), { INT64_C( 7945149005378185368), -INT64_C( 6294204628957749551), -INT64_C( 2834018317142721445), INT64_C( 8279254228066662248), -INT64_C( 6227195325739957703), -INT64_C( 4030647529310982356), -INT64_C( 4290932790102339265), -INT64_C( 8192171598209711358) }, { INT64_C( 21), INT64_C( 12) }, { INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0) } }, { UINT8_C(160), { -INT64_C( 4261881613090173619), -INT64_C( 6733542215129647253), -INT64_C( 4837672991304386724), INT64_C( 4346223159485104864), INT64_C( 8841954815871569328), -INT64_C( 3364186217201152602), INT64_C( 2216144017487699193), INT64_C( 216487602223476949) }, { INT64_C( 42), INT64_C( 25) }, { INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), -INT64_C( 2938994581045248000), INT64_C( 0), INT64_C( 4162262839597203456) } }, { UINT8_C(196), { -INT64_C( 7324786852454307535), INT64_C( 3781035903345827567), INT64_C( 5548967890448413156), -INT64_C( 2762645388975870916), -INT64_C( 4879166627240891259), INT64_C( 803964173351991253), -INT64_C( 9050659354103553742), -INT64_C( 4754375993875554939) }, { INT64_C( 59), INT64_C( 32) }, { INT64_C( 0), INT64_C( 0), INT64_C( 2305843009213693952), INT64_C( 0), INT64_C( 0), INT64_C( 0), -INT64_C( 8070450532247928832), INT64_C( 2882303761517117440) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_maskz_sll_epi64(test_vec[i].k, a, b); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_sll_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_sll_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_sll_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_sll_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_sll_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_sll_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_sll_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_sll_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_sll_epi64) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/slli.c000066400000000000000000001323071400333146700163710ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan * 2020 Christopher Moore */ #define SIMDE_TEST_X86_AVX512_INSN slli #include #include #include static int test_simde_mm512_slli_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[32]; const int16_t r0[32]; const int16_t r3[32]; const int16_t r7[32]; const int16_t r13[32]; const int16_t r24[32]; } test_vec[] = { { { -INT16_C( 4513), -INT16_C( 32064), -INT16_C( 20539), INT16_C( 16953), -INT16_C( 19443), INT16_C( 8904), INT16_C( 17111), -INT16_C( 18058), INT16_C( 9034), -INT16_C( 18739), -INT16_C( 25271), INT16_C( 4847), -INT16_C( 27918), -INT16_C( 9400), -INT16_C( 4204), -INT16_C( 3107), -INT16_C( 24867), -INT16_C( 23691), -INT16_C( 20915), INT16_C( 23269), -INT16_C( 21149), INT16_C( 14973), -INT16_C( 3088), INT16_C( 15091), -INT16_C( 16362), INT16_C( 24561), -INT16_C( 8099), INT16_C( 20594), -INT16_C( 17806), INT16_C( 1579), INT16_C( 2218), -INT16_C( 30727) }, { -INT16_C( 4513), -INT16_C( 32064), -INT16_C( 20539), INT16_C( 16953), -INT16_C( 19443), INT16_C( 8904), INT16_C( 17111), -INT16_C( 18058), INT16_C( 9034), -INT16_C( 18739), -INT16_C( 25271), INT16_C( 4847), -INT16_C( 27918), -INT16_C( 9400), -INT16_C( 4204), -INT16_C( 3107), -INT16_C( 24867), -INT16_C( 23691), -INT16_C( 20915), INT16_C( 23269), -INT16_C( 21149), INT16_C( 14973), -INT16_C( 3088), INT16_C( 15091), -INT16_C( 16362), INT16_C( 24561), -INT16_C( 8099), INT16_C( 20594), -INT16_C( 17806), INT16_C( 1579), INT16_C( 2218), -INT16_C( 30727) }, { INT16_C( 29432), INT16_C( 5632), INT16_C( 32296), INT16_C( 4552), -INT16_C( 24472), INT16_C( 5696), INT16_C( 5816), -INT16_C( 13392), INT16_C( 6736), -INT16_C( 18840), -INT16_C( 5560), -INT16_C( 26760), -INT16_C( 26736), -INT16_C( 9664), INT16_C( 31904), -INT16_C( 24856), -INT16_C( 2328), INT16_C( 7080), INT16_C( 29288), -INT16_C( 10456), INT16_C( 27416), -INT16_C( 11288), -INT16_C( 24704), -INT16_C( 10344), INT16_C( 176), -INT16_C( 120), INT16_C( 744), -INT16_C( 31856), -INT16_C( 11376), INT16_C( 12632), INT16_C( 17744), INT16_C( 16328) }, { INT16_C( 12160), INT16_C( 24576), -INT16_C( 7552), INT16_C( 7296), INT16_C( 1664), INT16_C( 25600), INT16_C( 27520), -INT16_C( 17664), -INT16_C( 23296), INT16_C( 26240), -INT16_C( 23424), INT16_C( 30592), INT16_C( 30976), -INT16_C( 23552), -INT16_C( 13824), -INT16_C( 4480), INT16_C( 28288), -INT16_C( 17792), INT16_C( 9856), INT16_C( 29312), -INT16_C( 20096), INT16_C( 16000), -INT16_C( 2048), INT16_C( 31104), INT16_C( 2816), -INT16_C( 1920), INT16_C( 11904), INT16_C( 14592), INT16_C( 14592), INT16_C( 5504), INT16_C( 21760), -INT16_C( 896) }, { -INT16_C( 8192), INT16_C( 0), -INT16_C( 24576), INT16_C( 8192), -INT16_C( 24576), INT16_C( 0), -INT16_C( 8192), -INT16_C( 16384), INT16_C( 16384), -INT16_C( 24576), INT16_C( 8192), -INT16_C( 8192), INT16_C( 16384), INT16_C( 0), INT16_MIN, -INT16_C( 24576), -INT16_C( 24576), -INT16_C( 24576), -INT16_C( 24576), -INT16_C( 24576), INT16_C( 24576), -INT16_C( 24576), INT16_C( 0), INT16_C( 24576), -INT16_C( 16384), INT16_C( 8192), -INT16_C( 24576), INT16_C( 16384), INT16_C( 16384), INT16_C( 24576), INT16_C( 16384), INT16_C( 8192) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 28582), -INT16_C( 3030), INT16_C( 3869), -INT16_C( 32690), -INT16_C( 13379), -INT16_C( 21062), -INT16_C( 20801), -INT16_C( 10777), -INT16_C( 10130), -INT16_C( 13259), -INT16_C( 22600), INT16_C( 11036), INT16_C( 18273), INT16_C( 2865), INT16_C( 11087), -INT16_C( 2413), -INT16_C( 16998), -INT16_C( 18454), INT16_C( 14541), -INT16_C( 30152), -INT16_C( 3580), -INT16_C( 15561), INT16_C( 7840), INT16_C( 3992), -INT16_C( 12809), -INT16_C( 20517), -INT16_C( 2188), -INT16_C( 10534), INT16_C( 3134), -INT16_C( 29215), INT16_C( 29751), -INT16_C( 11901) }, { INT16_C( 28582), -INT16_C( 3030), INT16_C( 3869), -INT16_C( 32690), -INT16_C( 13379), -INT16_C( 21062), -INT16_C( 20801), -INT16_C( 10777), -INT16_C( 10130), -INT16_C( 13259), -INT16_C( 22600), INT16_C( 11036), INT16_C( 18273), INT16_C( 2865), INT16_C( 11087), -INT16_C( 2413), -INT16_C( 16998), -INT16_C( 18454), INT16_C( 14541), -INT16_C( 30152), -INT16_C( 3580), -INT16_C( 15561), INT16_C( 7840), INT16_C( 3992), -INT16_C( 12809), -INT16_C( 20517), -INT16_C( 2188), -INT16_C( 10534), INT16_C( 3134), -INT16_C( 29215), INT16_C( 29751), -INT16_C( 11901) }, { INT16_C( 32048), -INT16_C( 24240), INT16_C( 30952), INT16_C( 624), INT16_C( 24040), INT16_C( 28112), INT16_C( 30200), -INT16_C( 20680), -INT16_C( 15504), INT16_C( 25000), INT16_C( 15808), INT16_C( 22752), INT16_C( 15112), INT16_C( 22920), INT16_C( 23160), -INT16_C( 19304), -INT16_C( 4912), -INT16_C( 16560), -INT16_C( 14744), INT16_C( 20928), -INT16_C( 28640), INT16_C( 6584), -INT16_C( 2816), INT16_C( 31936), INT16_C( 28600), INT16_C( 32472), -INT16_C( 17504), -INT16_C( 18736), INT16_C( 25072), INT16_C( 28424), -INT16_C( 24136), -INT16_C( 29672) }, { -INT16_C( 11520), INT16_C( 5376), -INT16_C( 29056), INT16_C( 9984), -INT16_C( 8576), -INT16_C( 8960), INT16_C( 24448), -INT16_C( 3200), INT16_C( 14080), INT16_C( 6784), -INT16_C( 9216), -INT16_C( 29184), -INT16_C( 20352), -INT16_C( 26496), -INT16_C( 22656), INT16_C( 18816), -INT16_C( 13056), -INT16_C( 2816), INT16_C( 26240), INT16_C( 7168), INT16_C( 512), -INT16_C( 25728), INT16_C( 20480), -INT16_C( 13312), -INT16_C( 1152), -INT16_C( 4736), -INT16_C( 17920), INT16_C( 27904), INT16_C( 7936), -INT16_C( 3968), INT16_C( 7040), -INT16_C( 16000) }, { -INT16_C( 16384), INT16_C( 16384), -INT16_C( 24576), -INT16_C( 16384), -INT16_C( 24576), INT16_C( 16384), -INT16_C( 8192), -INT16_C( 8192), -INT16_C( 16384), -INT16_C( 24576), INT16_C( 0), INT16_MIN, INT16_C( 8192), INT16_C( 8192), -INT16_C( 8192), INT16_C( 24576), INT16_C( 16384), INT16_C( 16384), -INT16_C( 24576), INT16_C( 0), INT16_MIN, -INT16_C( 8192), INT16_C( 0), INT16_C( 0), -INT16_C( 8192), INT16_C( 24576), INT16_MIN, INT16_C( 16384), -INT16_C( 16384), INT16_C( 8192), -INT16_C( 8192), INT16_C( 24576) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 27954), -INT16_C( 120), -INT16_C( 16218), -INT16_C( 21879), -INT16_C( 16205), INT16_C( 21357), INT16_C( 1502), -INT16_C( 10910), INT16_C( 15827), INT16_C( 18309), INT16_C( 24372), INT16_C( 29213), -INT16_C( 149), -INT16_C( 24064), -INT16_C( 31885), -INT16_C( 23181), -INT16_C( 783), -INT16_C( 26716), INT16_C( 11708), INT16_C( 28481), -INT16_C( 20755), -INT16_C( 13117), INT16_C( 9651), -INT16_C( 31071), INT16_C( 9827), -INT16_C( 26674), -INT16_C( 5242), -INT16_C( 3830), INT16_C( 2794), INT16_C( 24212), INT16_C( 1933), INT16_C( 32259) }, { INT16_C( 27954), -INT16_C( 120), -INT16_C( 16218), -INT16_C( 21879), -INT16_C( 16205), INT16_C( 21357), INT16_C( 1502), -INT16_C( 10910), INT16_C( 15827), INT16_C( 18309), INT16_C( 24372), INT16_C( 29213), -INT16_C( 149), -INT16_C( 24064), -INT16_C( 31885), -INT16_C( 23181), -INT16_C( 783), -INT16_C( 26716), INT16_C( 11708), INT16_C( 28481), -INT16_C( 20755), -INT16_C( 13117), INT16_C( 9651), -INT16_C( 31071), INT16_C( 9827), -INT16_C( 26674), -INT16_C( 5242), -INT16_C( 3830), INT16_C( 2794), INT16_C( 24212), INT16_C( 1933), INT16_C( 32259) }, { INT16_C( 27024), -INT16_C( 960), INT16_C( 1328), INT16_C( 21576), INT16_C( 1432), -INT16_C( 25752), INT16_C( 12016), -INT16_C( 21744), -INT16_C( 4456), INT16_C( 15400), -INT16_C( 1632), -INT16_C( 28440), -INT16_C( 1192), INT16_C( 4096), INT16_C( 7064), INT16_C( 11160), -INT16_C( 6264), -INT16_C( 17120), INT16_C( 28128), INT16_C( 31240), INT16_C( 30568), INT16_C( 26136), INT16_C( 11672), INT16_C( 13576), INT16_C( 13080), -INT16_C( 16784), INT16_C( 23600), -INT16_C( 30640), INT16_C( 22352), -INT16_C( 2912), INT16_C( 15464), -INT16_C( 4072) }, { -INT16_C( 26368), -INT16_C( 15360), INT16_C( 21248), INT16_C( 17536), INT16_C( 22912), -INT16_C( 18816), -INT16_C( 4352), -INT16_C( 20224), -INT16_C( 5760), -INT16_C( 15744), -INT16_C( 26112), INT16_C( 3712), -INT16_C( 19072), INT16_C( 0), -INT16_C( 18048), -INT16_C( 18048), INT16_C( 30848), -INT16_C( 11776), -INT16_C( 8704), -INT16_C( 24448), INT16_C( 30336), INT16_C( 24960), -INT16_C( 9856), INT16_C( 20608), INT16_C( 12672), -INT16_C( 6400), -INT16_C( 15616), -INT16_C( 31488), INT16_C( 29952), INT16_C( 18944), -INT16_C( 14720), INT16_C( 384) }, { INT16_C( 16384), INT16_C( 0), -INT16_C( 16384), INT16_C( 8192), INT16_C( 24576), -INT16_C( 24576), -INT16_C( 16384), INT16_C( 16384), INT16_C( 24576), -INT16_C( 24576), INT16_MIN, -INT16_C( 24576), INT16_C( 24576), INT16_C( 0), INT16_C( 24576), INT16_C( 24576), INT16_C( 8192), INT16_MIN, INT16_MIN, INT16_C( 8192), -INT16_C( 24576), INT16_C( 24576), INT16_C( 24576), INT16_C( 8192), INT16_C( 24576), -INT16_C( 16384), -INT16_C( 16384), INT16_C( 16384), INT16_C( 16384), INT16_MIN, -INT16_C( 24576), INT16_C( 24576) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 22525), -INT16_C( 16363), INT16_C( 22229), -INT16_C( 15569), -INT16_C( 3580), -INT16_C( 18289), INT16_C( 12312), INT16_C( 31550), INT16_C( 3159), -INT16_C( 8942), INT16_C( 7416), -INT16_C( 7474), INT16_C( 25126), -INT16_C( 19392), INT16_C( 17514), INT16_C( 27954), INT16_C( 18668), -INT16_C( 16083), INT16_C( 23966), -INT16_C( 23676), INT16_C( 4943), INT16_C( 26459), -INT16_C( 26300), -INT16_C( 25630), -INT16_C( 2650), -INT16_C( 24968), INT16_C( 17937), INT16_C( 14464), -INT16_C( 15959), INT16_C( 5100), INT16_C( 7685), -INT16_C( 3712) }, { -INT16_C( 22525), -INT16_C( 16363), INT16_C( 22229), -INT16_C( 15569), -INT16_C( 3580), -INT16_C( 18289), INT16_C( 12312), INT16_C( 31550), INT16_C( 3159), -INT16_C( 8942), INT16_C( 7416), -INT16_C( 7474), INT16_C( 25126), -INT16_C( 19392), INT16_C( 17514), INT16_C( 27954), INT16_C( 18668), -INT16_C( 16083), INT16_C( 23966), -INT16_C( 23676), INT16_C( 4943), INT16_C( 26459), -INT16_C( 26300), -INT16_C( 25630), -INT16_C( 2650), -INT16_C( 24968), INT16_C( 17937), INT16_C( 14464), -INT16_C( 15959), INT16_C( 5100), INT16_C( 7685), -INT16_C( 3712) }, { INT16_C( 16408), INT16_C( 168), -INT16_C( 18776), INT16_C( 6520), -INT16_C( 28640), -INT16_C( 15240), -INT16_C( 32576), -INT16_C( 9744), INT16_C( 25272), -INT16_C( 6000), -INT16_C( 6208), INT16_C( 5744), INT16_C( 4400), -INT16_C( 24064), INT16_C( 9040), INT16_C( 27024), INT16_C( 18272), INT16_C( 2408), -INT16_C( 4880), INT16_C( 7200), -INT16_C( 25992), INT16_C( 15064), -INT16_C( 13792), -INT16_C( 8432), -INT16_C( 21200), -INT16_C( 3136), INT16_C( 12424), -INT16_C( 15360), INT16_C( 3400), -INT16_C( 24736), -INT16_C( 4056), -INT16_C( 29696) }, { INT16_C( 384), INT16_C( 2688), INT16_C( 27264), -INT16_C( 26752), INT16_C( 512), INT16_C( 18304), INT16_C( 3072), -INT16_C( 24832), INT16_C( 11136), -INT16_C( 30464), INT16_C( 31744), INT16_C( 26368), INT16_C( 4864), INT16_C( 8192), INT16_C( 13568), -INT16_C( 26368), INT16_C( 30208), -INT16_C( 27008), -INT16_C( 12544), -INT16_C( 15872), -INT16_C( 22656), -INT16_C( 21120), -INT16_C( 24064), -INT16_C( 3840), -INT16_C( 11520), INT16_C( 15360), INT16_C( 2176), INT16_C( 16384), -INT16_C( 11136), -INT16_C( 2560), INT16_C( 640), -INT16_C( 16384) }, { INT16_C( 24576), -INT16_C( 24576), -INT16_C( 24576), -INT16_C( 8192), INT16_MIN, -INT16_C( 8192), INT16_C( 0), -INT16_C( 16384), -INT16_C( 8192), INT16_C( 16384), INT16_C( 0), -INT16_C( 16384), -INT16_C( 16384), INT16_C( 0), INT16_C( 16384), INT16_C( 16384), INT16_MIN, -INT16_C( 24576), -INT16_C( 16384), INT16_MIN, -INT16_C( 8192), INT16_C( 24576), INT16_MIN, INT16_C( 16384), -INT16_C( 16384), INT16_C( 0), INT16_C( 8192), INT16_C( 0), INT16_C( 8192), INT16_MIN, -INT16_C( 24576), INT16_C( 0) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 20890), INT16_C( 1458), INT16_C( 14091), INT16_C( 23208), INT16_C( 842), -INT16_C( 28990), -INT16_C( 23396), INT16_C( 16937), -INT16_C( 24167), -INT16_C( 21536), INT16_C( 25064), -INT16_C( 28189), -INT16_C( 12510), INT16_C( 10148), INT16_C( 9453), INT16_C( 21528), -INT16_C( 13614), -INT16_C( 8871), INT16_C( 257), INT16_C( 19512), -INT16_C( 1532), -INT16_C( 24358), INT16_C( 1182), INT16_C( 14563), -INT16_C( 15451), -INT16_C( 29213), -INT16_C( 14812), INT16_C( 17950), -INT16_C( 15723), -INT16_C( 32147), -INT16_C( 31257), -INT16_C( 17962) }, { -INT16_C( 20890), INT16_C( 1458), INT16_C( 14091), INT16_C( 23208), INT16_C( 842), -INT16_C( 28990), -INT16_C( 23396), INT16_C( 16937), -INT16_C( 24167), -INT16_C( 21536), INT16_C( 25064), -INT16_C( 28189), -INT16_C( 12510), INT16_C( 10148), INT16_C( 9453), INT16_C( 21528), -INT16_C( 13614), -INT16_C( 8871), INT16_C( 257), INT16_C( 19512), -INT16_C( 1532), -INT16_C( 24358), INT16_C( 1182), INT16_C( 14563), -INT16_C( 15451), -INT16_C( 29213), -INT16_C( 14812), INT16_C( 17950), -INT16_C( 15723), -INT16_C( 32147), -INT16_C( 31257), -INT16_C( 17962) }, { INT16_C( 29488), INT16_C( 11664), -INT16_C( 18344), -INT16_C( 10944), INT16_C( 6736), INT16_C( 30224), INT16_C( 9440), INT16_C( 4424), INT16_C( 3272), INT16_C( 24320), INT16_C( 3904), -INT16_C( 28904), INT16_C( 30992), INT16_C( 15648), INT16_C( 10088), -INT16_C( 24384), INT16_C( 22160), -INT16_C( 5432), INT16_C( 2056), INT16_C( 25024), -INT16_C( 12256), INT16_C( 1744), INT16_C( 9456), -INT16_C( 14568), INT16_C( 7464), INT16_C( 28440), INT16_C( 12576), INT16_C( 12528), INT16_C( 5288), INT16_C( 4968), INT16_C( 12088), -INT16_C( 12624) }, { INT16_C( 13056), -INT16_C( 9984), -INT16_C( 31360), INT16_C( 21504), -INT16_C( 23296), INT16_C( 24832), INT16_C( 19968), INT16_C( 5248), -INT16_C( 13184), -INT16_C( 4096), -INT16_C( 3072), -INT16_C( 3712), -INT16_C( 28416), -INT16_C( 11776), INT16_C( 30336), INT16_C( 3072), INT16_C( 26880), -INT16_C( 21376), -INT16_C( 32640), INT16_C( 7168), INT16_C( 512), INT16_C( 27904), INT16_C( 20224), INT16_C( 29056), -INT16_C( 11648), -INT16_C( 3712), INT16_C( 4608), INT16_C( 3840), INT16_C( 19072), INT16_C( 13952), -INT16_C( 3200), -INT16_C( 5376) }, { -INT16_C( 16384), INT16_C( 16384), INT16_C( 24576), INT16_C( 0), INT16_C( 16384), INT16_C( 16384), INT16_MIN, INT16_C( 8192), INT16_C( 8192), INT16_C( 0), INT16_C( 0), INT16_C( 24576), INT16_C( 16384), INT16_MIN, -INT16_C( 24576), INT16_C( 0), INT16_C( 16384), INT16_C( 8192), INT16_C( 8192), INT16_C( 0), INT16_MIN, INT16_C( 16384), -INT16_C( 16384), INT16_C( 24576), -INT16_C( 24576), INT16_C( 24576), INT16_MIN, -INT16_C( 16384), -INT16_C( 24576), -INT16_C( 24576), -INT16_C( 8192), -INT16_C( 16384) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 12112), INT16_C( 20887), -INT16_C( 12496), INT16_C( 13469), INT16_C( 30921), INT16_C( 26581), -INT16_C( 18308), INT16_C( 8607), -INT16_C( 32133), -INT16_C( 24401), -INT16_C( 12984), -INT16_C( 8730), INT16_C( 21648), INT16_C( 30560), INT16_C( 14041), INT16_C( 10544), -INT16_C( 14490), -INT16_C( 27013), INT16_C( 6294), INT16_C( 24523), -INT16_C( 24432), INT16_C( 3271), INT16_C( 26200), -INT16_C( 11474), -INT16_C( 8727), INT16_C( 12659), INT16_C( 23210), INT16_C( 14863), INT16_C( 28590), -INT16_C( 30799), -INT16_C( 7515), INT16_C( 2993) }, { INT16_C( 12112), INT16_C( 20887), -INT16_C( 12496), INT16_C( 13469), INT16_C( 30921), INT16_C( 26581), -INT16_C( 18308), INT16_C( 8607), -INT16_C( 32133), -INT16_C( 24401), -INT16_C( 12984), -INT16_C( 8730), INT16_C( 21648), INT16_C( 30560), INT16_C( 14041), INT16_C( 10544), -INT16_C( 14490), -INT16_C( 27013), INT16_C( 6294), INT16_C( 24523), -INT16_C( 24432), INT16_C( 3271), INT16_C( 26200), -INT16_C( 11474), -INT16_C( 8727), INT16_C( 12659), INT16_C( 23210), INT16_C( 14863), INT16_C( 28590), -INT16_C( 30799), -INT16_C( 7515), INT16_C( 2993) }, { INT16_C( 31360), -INT16_C( 29512), INT16_C( 31104), -INT16_C( 23320), -INT16_C( 14776), INT16_C( 16040), -INT16_C( 15392), INT16_C( 3320), INT16_C( 5080), INT16_C( 1400), INT16_C( 27200), -INT16_C( 4304), -INT16_C( 23424), -INT16_C( 17664), -INT16_C( 18744), INT16_C( 18816), INT16_C( 15152), -INT16_C( 19496), -INT16_C( 15184), -INT16_C( 424), INT16_C( 1152), INT16_C( 26168), INT16_C( 12992), -INT16_C( 26256), -INT16_C( 4280), -INT16_C( 29800), -INT16_C( 10928), -INT16_C( 12168), INT16_C( 32112), INT16_C( 15752), INT16_C( 5416), INT16_C( 23944) }, { -INT16_C( 22528), -INT16_C( 13440), -INT16_C( 26624), INT16_C( 20096), INT16_C( 25728), -INT16_C( 5504), INT16_C( 15872), -INT16_C( 12416), INT16_C( 15744), INT16_C( 22400), -INT16_C( 23552), -INT16_C( 3328), INT16_C( 18432), -INT16_C( 20480), INT16_C( 27776), -INT16_C( 26624), -INT16_C( 19712), INT16_C( 15744), INT16_C( 19200), -INT16_C( 6784), INT16_C( 18432), INT16_C( 25472), INT16_C( 11264), -INT16_C( 26880), -INT16_C( 2944), -INT16_C( 18048), INT16_C( 21760), INT16_C( 1920), -INT16_C( 10496), -INT16_C( 10112), INT16_C( 21120), -INT16_C( 10112) }, { INT16_C( 0), -INT16_C( 8192), INT16_C( 0), -INT16_C( 24576), INT16_C( 8192), -INT16_C( 24576), INT16_MIN, -INT16_C( 8192), INT16_C( 24576), -INT16_C( 8192), INT16_C( 0), -INT16_C( 16384), INT16_C( 0), INT16_C( 0), INT16_C( 8192), INT16_C( 0), -INT16_C( 16384), INT16_C( 24576), -INT16_C( 16384), INT16_C( 24576), INT16_C( 0), -INT16_C( 8192), INT16_C( 0), -INT16_C( 16384), INT16_C( 8192), INT16_C( 24576), INT16_C( 16384), -INT16_C( 8192), -INT16_C( 16384), INT16_C( 8192), -INT16_C( 24576), INT16_C( 8192) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 11433), INT16_C( 16546), INT16_C( 27972), -INT16_C( 10849), INT16_C( 26125), INT16_C( 26081), INT16_C( 4045), -INT16_C( 18888), -INT16_C( 21268), -INT16_C( 26649), -INT16_C( 2554), -INT16_C( 19247), -INT16_C( 31899), INT16_C( 2875), -INT16_C( 5019), INT16_C( 3606), -INT16_C( 18408), INT16_C( 23886), -INT16_C( 4571), INT16_C( 12850), INT16_C( 4948), INT16_C( 8599), -INT16_C( 12253), INT16_C( 4055), -INT16_C( 16516), -INT16_C( 32090), INT16_C( 30901), INT16_C( 6966), INT16_C( 29179), INT16_C( 24614), INT16_C( 15454), INT16_C( 30318) }, { INT16_C( 11433), INT16_C( 16546), INT16_C( 27972), -INT16_C( 10849), INT16_C( 26125), INT16_C( 26081), INT16_C( 4045), -INT16_C( 18888), -INT16_C( 21268), -INT16_C( 26649), -INT16_C( 2554), -INT16_C( 19247), -INT16_C( 31899), INT16_C( 2875), -INT16_C( 5019), INT16_C( 3606), -INT16_C( 18408), INT16_C( 23886), -INT16_C( 4571), INT16_C( 12850), INT16_C( 4948), INT16_C( 8599), -INT16_C( 12253), INT16_C( 4055), -INT16_C( 16516), -INT16_C( 32090), INT16_C( 30901), INT16_C( 6966), INT16_C( 29179), INT16_C( 24614), INT16_C( 15454), INT16_C( 30318) }, { INT16_C( 25928), INT16_C( 1296), INT16_C( 27168), -INT16_C( 21256), INT16_C( 12392), INT16_C( 12040), INT16_C( 32360), -INT16_C( 20032), INT16_C( 26464), -INT16_C( 16584), -INT16_C( 20432), -INT16_C( 22904), INT16_C( 6952), INT16_C( 23000), INT16_C( 25384), INT16_C( 28848), -INT16_C( 16192), -INT16_C( 5520), INT16_C( 28968), -INT16_C( 28272), -INT16_C( 25952), INT16_C( 3256), -INT16_C( 32488), INT16_C( 32440), -INT16_C( 1056), INT16_C( 5424), -INT16_C( 14936), -INT16_C( 9808), -INT16_C( 28712), INT16_C( 304), -INT16_C( 7440), -INT16_C( 19600) }, { INT16_C( 21632), INT16_C( 20736), -INT16_C( 24064), -INT16_C( 12416), INT16_C( 1664), -INT16_C( 3968), -INT16_C( 6528), INT16_C( 7168), INT16_C( 30208), -INT16_C( 3200), INT16_C( 768), INT16_C( 26752), -INT16_C( 19840), -INT16_C( 25216), INT16_C( 12928), INT16_C( 2816), INT16_C( 3072), -INT16_C( 22784), INT16_C( 4736), INT16_C( 6400), -INT16_C( 22016), -INT16_C( 13440), INT16_C( 4480), -INT16_C( 5248), -INT16_C( 16896), INT16_C( 21248), INT16_C( 23168), -INT16_C( 25856), -INT16_C( 640), INT16_C( 4864), INT16_C( 12032), INT16_C( 14080) }, { INT16_C( 8192), INT16_C( 16384), INT16_MIN, -INT16_C( 8192), -INT16_C( 24576), INT16_C( 8192), -INT16_C( 24576), INT16_C( 0), INT16_MIN, -INT16_C( 8192), -INT16_C( 16384), INT16_C( 8192), -INT16_C( 24576), INT16_C( 24576), -INT16_C( 24576), -INT16_C( 16384), INT16_C( 0), -INT16_C( 16384), -INT16_C( 24576), INT16_C( 16384), INT16_MIN, -INT16_C( 8192), INT16_C( 24576), -INT16_C( 8192), INT16_MIN, -INT16_C( 16384), -INT16_C( 24576), -INT16_C( 16384), INT16_C( 24576), -INT16_C( 16384), -INT16_C( 16384), -INT16_C( 16384) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 16907), INT16_C( 6867), INT16_C( 1451), -INT16_C( 179), -INT16_C( 7143), INT16_C( 15393), -INT16_C( 1868), INT16_C( 12363), -INT16_C( 3401), INT16_C( 28082), -INT16_C( 6038), INT16_C( 25992), -INT16_C( 20902), -INT16_C( 18235), INT16_C( 13290), -INT16_C( 8402), INT16_C( 752), -INT16_C( 25606), INT16_C( 18183), INT16_C( 8347), -INT16_C( 17365), -INT16_C( 8100), -INT16_C( 22348), INT16_C( 27664), -INT16_C( 15462), INT16_C( 1241), INT16_C( 25003), INT16_C( 1385), INT16_C( 11791), -INT16_C( 1603), -INT16_C( 5023), INT16_C( 21209) }, { -INT16_C( 16907), INT16_C( 6867), INT16_C( 1451), -INT16_C( 179), -INT16_C( 7143), INT16_C( 15393), -INT16_C( 1868), INT16_C( 12363), -INT16_C( 3401), INT16_C( 28082), -INT16_C( 6038), INT16_C( 25992), -INT16_C( 20902), -INT16_C( 18235), INT16_C( 13290), -INT16_C( 8402), INT16_C( 752), -INT16_C( 25606), INT16_C( 18183), INT16_C( 8347), -INT16_C( 17365), -INT16_C( 8100), -INT16_C( 22348), INT16_C( 27664), -INT16_C( 15462), INT16_C( 1241), INT16_C( 25003), INT16_C( 1385), INT16_C( 11791), -INT16_C( 1603), -INT16_C( 5023), INT16_C( 21209) }, { -INT16_C( 4184), -INT16_C( 10600), INT16_C( 11608), -INT16_C( 1432), INT16_C( 8392), -INT16_C( 7928), -INT16_C( 14944), -INT16_C( 32168), -INT16_C( 27208), INT16_C( 28048), INT16_C( 17232), INT16_C( 11328), INT16_C( 29392), -INT16_C( 14808), -INT16_C( 24752), -INT16_C( 1680), INT16_C( 6016), -INT16_C( 8240), INT16_C( 14392), INT16_C( 1240), -INT16_C( 7848), INT16_C( 736), INT16_C( 17824), INT16_C( 24704), INT16_C( 7376), INT16_C( 9928), INT16_C( 3416), INT16_C( 11080), INT16_C( 28792), -INT16_C( 12824), INT16_C( 25352), -INT16_C( 26936) }, { -INT16_C( 1408), INT16_C( 27008), -INT16_C( 10880), -INT16_C( 22912), INT16_C( 3200), INT16_C( 4224), INT16_C( 23040), INT16_C( 9600), INT16_C( 23424), -INT16_C( 9984), INT16_C( 13568), -INT16_C( 15360), INT16_C( 11520), INT16_C( 25216), -INT16_C( 2816), -INT16_C( 26880), INT16_C( 30720), -INT16_C( 768), -INT16_C( 31872), INT16_C( 19840), INT16_C( 5504), INT16_C( 11776), INT16_C( 23040), INT16_C( 2048), -INT16_C( 13056), INT16_C( 27776), -INT16_C( 10880), -INT16_C( 19328), INT16_C( 1920), -INT16_C( 8576), INT16_C( 12416), INT16_C( 27776) }, { -INT16_C( 24576), INT16_C( 24576), INT16_C( 24576), -INT16_C( 24576), INT16_C( 8192), INT16_C( 8192), INT16_MIN, INT16_C( 24576), -INT16_C( 8192), INT16_C( 16384), INT16_C( 16384), INT16_C( 0), INT16_C( 16384), -INT16_C( 24576), INT16_C( 16384), -INT16_C( 16384), INT16_C( 0), INT16_C( 16384), -INT16_C( 8192), INT16_C( 24576), INT16_C( 24576), INT16_MIN, INT16_MIN, INT16_C( 0), INT16_C( 16384), INT16_C( 8192), INT16_C( 24576), INT16_C( 8192), -INT16_C( 8192), -INT16_C( 24576), INT16_C( 8192), INT16_C( 8192) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i r0 = simde_mm512_slli_epi16(a, 0); simde_test_x86_assert_equal_i16x32(r0, simde_mm512_loadu_epi16(test_vec[i].r0)); simde__m512i r3 = simde_mm512_slli_epi16(a, 3); simde_test_x86_assert_equal_i16x32(r3, simde_mm512_loadu_epi16(test_vec[i].r3)); simde__m512i r7 = simde_mm512_slli_epi16(a, 7); simde_test_x86_assert_equal_i16x32(r7, simde_mm512_loadu_epi16(test_vec[i].r7)); simde__m512i r13 = simde_mm512_slli_epi16(a, 13); simde_test_x86_assert_equal_i16x32(r13, simde_mm512_loadu_epi16(test_vec[i].r13)); simde__m512i r24 = simde_mm512_slli_epi16(a, 24); simde_test_x86_assert_equal_i16x32(r24, simde_mm512_loadu_epi16(test_vec[i].r24)); } return 0; } static int test_simde_mm512_slli_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; unsigned int b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C( -687706949), INT32_C( 1593775683), INT32_C( 332932989), INT32_C( 583872054), INT32_C( 1838832857), INT32_C( 847835558), INT32_C(-1396128258), INT32_C( -183977070), INT32_C( -902383138), INT32_C( -512492201), INT32_C(-1812249336), INT32_C( -562835271), INT32_C(-1029714159), INT32_C( 1476158556), INT32_C( 877549641), INT32_C( 1218378177)), 0xac, simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm512_set_epi32(INT32_C( 241549121), INT32_C( 1732816264), INT32_C( 875489890), INT32_C( 72071518), INT32_C(-1641761300), INT32_C( 313288882), INT32_C(-1735158939), INT32_C( 1219761116), INT32_C( 877921588), INT32_C( 2045964482), INT32_C( -360092415), INT32_C(-1302958505), INT32_C(-1122092800), INT32_C( -177019481), INT32_C( 875636041), INT32_C( -150268654)), 0x8017, simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm512_set_epi32(INT32_C( -52534216), INT32_C( -794188551), INT32_C( -186449823), INT32_C( 1580979103), INT32_C( -972993456), INT32_C( -666426563), INT32_C( -645023430), INT32_C(-1043227266), INT32_C( 1237525980), INT32_C( 349749966), INT32_C( -978999744), INT32_C( -487689408), INT32_C( 898649460), INT32_C(-1217796896), INT32_C( 1277301360), INT32_C( 1454357892)), 0x10, simde_mm512_set_epi32(INT32_C( 1681391616), INT32_C(-1527185408), INT32_C( 6356992), INT32_C( -744554496), INT32_C( 1280311296), INT32_C( 591200256), INT32_C(-1187381248), INT32_C(-1652686848), INT32_C( 635174912), INT32_C(-1026686976), INT32_C(-1505755136), INT32_C( 1933574144), INT32_C( 1299447808), INT32_C( -455081984), INT32_C( 309329920), INT32_C(-1115422720)) }, { simde_mm512_set_epi32(INT32_C(-1898779532), INT32_C( 1141724513), INT32_C( -782352739), INT32_C( 973072470), INT32_C( 1112939167), INT32_C( 368903984), INT32_C( 1631675339), INT32_C( -86505288), INT32_C( 2011287771), INT32_C( 1938765310), INT32_C( 2078191935), INT32_C( -616151900), INT32_C( -454977425), INT32_C( 544105809), INT32_C(-1307935124), INT32_C(-1400984309)), 0xa, simde_mm512_set_epi32(INT32_C( 1269944320), INT32_C( 894796800), INT32_C( 2029679616), INT32_C( -6203392), INT32_C( 1483373568), INT32_C( -199442432), INT32_C( 93268992), INT32_C( 1612898304), INT32_C(-2025624576), INT32_C( 1020786688), INT32_C( 2059729920), INT32_C( 420646912), INT32_C(-2040415232), INT32_C(-1181400064), INT32_C( 704229376), INT32_C( -88855552)) }, { simde_mm512_set_epi32(INT32_C( -955538666), INT32_C( 1399393330), INT32_C( 1832782688), INT32_C(-1931362608), INT32_C(-1247233529), INT32_C( -537843102), INT32_C( -120831887), INT32_C( 1329473476), INT32_C( 1569899726), INT32_C( 920247722), INT32_C( 275348332), INT32_C( 1640312018), INT32_C( -873496512), INT32_C( 957396290), INT32_C( 390504842), INT32_C( 1781792417)), 0xf, simde_mm512_set_epi32(INT32_C( -779419648), INT32_C(-2045181952), INT32_C( 95420416), INT32_C( -546832384), INT32_C( 1560510464), INT32_C(-1791950848), INT32_C( 540573696), INT32_C( 333578240), INT32_C( 1650917376), INT32_C( -288030720), INT32_C(-1112145920), INT32_C(-1771503616), INT32_C(-1071644672), INT32_C( 1520500736), INT32_C( 1355087872), INT32_C( -11501568)) }, { simde_mm512_set_epi32(INT32_C( 188085108), INT32_C( 489074602), INT32_C( 1720231560), INT32_C( 106164094), INT32_C( 1250223633), INT32_C( -962071158), INT32_C( 38255424), INT32_C( 801121683), INT32_C(-1580720854), INT32_C( 609844423), INT32_C( 44983522), INT32_C( 481953328), INT32_C( -181212371), INT32_C( 912186226), INT32_C( -42587351), INT32_C( 680089879)), 0x18, simde_mm512_set_epi32(INT32_C( 1946157056), INT32_C(-1442840576), INT32_C(-2013265920), INT32_C( 2113929216), INT32_C( 285212672), INT32_C(-1979711488), INT32_C( 1073741824), INT32_C(-1828716544), INT32_C( 704643072), INT32_C( -956301312), INT32_C( -503316480), INT32_C( 805306368), INT32_C( 754974720), INT32_C( 1912602624), INT32_C( 687865856), INT32_C( 385875968)) }, { simde_mm512_set_epi32(INT32_C(-1878529143), INT32_C( 968369206), INT32_C(-2025408372), INT32_C( -521427427), INT32_C( 750337953), INT32_C( 1599422728), INT32_C( 1832999614), INT32_C( -922516627), INT32_C( 1054703043), INT32_C( -229764941), INT32_C(-1888970968), INT32_C( -770679003), INT32_C( 957667650), INT32_C(-1367078699), INT32_C( 400185050), INT32_C( 619858989)), 0, simde_mm512_set_epi32(INT32_C(-1878529143), INT32_C( 968369206), INT32_C(-2025408372), INT32_C( -521427427), INT32_C( 750337953), INT32_C( 1599422728), INT32_C( 1832999614), INT32_C( -922516627), INT32_C( 1054703043), INT32_C( -229764941), INT32_C(-1888970968), INT32_C( -770679003), INT32_C( 957667650), INT32_C(-1367078699), INT32_C( 400185050), INT32_C( 619858989)) }, { simde_mm512_set_epi32(INT32_C( -939632719), INT32_C( 1727963384), INT32_C( 1880331239), INT32_C( 699090974), INT32_C( 1068401563), INT32_C(-1558361689), INT32_C(-1814494206), INT32_C( 1865180366), INT32_C(-1767733366), INT32_C(-1147256695), INT32_C(-1631901793), INT32_C( -198157319), INT32_C( 285018015), INT32_C( 583696937), INT32_C( 1785762602), INT32_C(-1724046997)), 0x16, simde_mm512_set_epi32(INT32_C( -331350016), INT32_C( 1040187392), INT32_C( -104857600), INT32_C( 125829120), INT32_C( -423624704), INT32_C( 1774190592), INT32_C( 8388608), INT32_C( 864026624), INT32_C( -494927872), INT32_C( 574619648), INT32_C( -406847488), INT32_C( -29360128), INT32_C( -406847488), INT32_C(-1975517184), INT32_C( -897581056), INT32_C( 1522532352)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_slli_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_slli_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; unsigned int b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C(-2953678853593164221), INT64_C( 1429936300098399798), INT64_C( 7897726984473080230), INT64_C(-5996325205020460142), INT64_C(-3875706062389379753), INT64_C(-7783551626585583431), INT64_C(-4422588635656985508), INT64_C( 3769047009929918913)), 0xac, simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, { simde_mm512_set_epi64(INT64_C( 1037445576805363080), INT64_C( 3760200445600708958), INT64_C(-7051311091025155918), INT64_C(-7452450895147297828), INT64_C( 3770644510958350530), INT64_C(-1546585142970651049), INT64_C(-4819351874959120985), INT64_C( 3760828163438613778)), 0x8017, simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, { simde_mm512_set_epi64(INT64_C( -225632736140221191), INT64_C( -800795890549009505), INT64_C(-4178975069113474243), INT64_C(-2770354533752005250), INT64_C( 5315133612400100046), INT64_C(-4204771879465094336), INT64_C( 3859670044345230560), INT64_C( 5485967569790680452)), 0x10, simde_mm512_set_epi64(INT64_C( 7221751429524422656), INT64_C( 27406683787427840), INT64_C( 5499132945064853504), INT64_C(-5099550522009059328), INT64_C( 2728078395493449728), INT64_C(-6466919551140364288), INT64_C( 5581287503658221568), INT64_C( 1328657202873106432)) }, { simde_mm512_set_epi64(INT64_C( 854012069371251830), INT64_C(-7338075353641633319), INT64_C(-3664756911608965568), INT64_C( 6190577389993756354), INT64_C(-5695521678932466387), INT64_C( 8986269833406294113), INT64_C(-6594347992267195055), INT64_C( 897886006004895547)), 0x2a, simde_mm512_set_epi64(INT64_C( 1455181649128980480), INT64_C(-8005319861464989696), INT64_C(-8117456853358608384), INT64_C(-7918726723707863040), INT64_C(-4137484245553643520), INT64_C( -675113333593997312), INT64_C( 7635083510067232768), INT64_C(-2504867807980683264)) }, { simde_mm512_set_epi64(INT64_C( 6876450537877586373), INT64_C(-2498836913726354503), INT64_C(-5925650014767999746), INT64_C(-5091981247482556140), INT64_C( 2276397305581596841), INT64_C( 4333846664358463853), INT64_C( -724672155607878887), INT64_C( 3417746373838389455)), 0x2a, simde_mm512_set_epi64(INT64_C( -934756407423533056), INT64_C(-3157335600089006080), INT64_C(-2570438283414732800), INT64_C( 7513218039291052032), INT64_C( 7188488275143688192), INT64_C( 7216371890024087552), INT64_C(-9165841190443024384), INT64_C( 1225889494272573440)) }, { simde_mm512_set_epi64(INT64_C(-8036497785869311574), INT64_C( 3581702479948115598), INT64_C( 748249211564829520), INT64_C( -816680525172154454), INT64_C(-4839891842343135042), INT64_C(-6001583230129728210), INT64_C(-4279294013059977744), INT64_C(-1555144075545091790)), 0x26, simde_mm512_set_epi64(INT64_C(-4397647938138931200), INT64_C( 2043406626093793280), INT64_C(-5590422890961960960), INT64_C(-8604150727591329792), INT64_C( 8423472379845410816), INT64_C(-8411233715916636160), INT64_C(-8742757912167841792), INT64_C(-6541394346116120576)) }, { simde_mm512_set_epi64(INT64_C(-6276545081940248579), INT64_C(-9016855820360504888), INT64_C( 2589347389053699338), INT64_C(-6212989007002338187), INT64_C( 5925964847698460032), INT64_C( 8758478916256841908), INT64_C( 5134329058456078862), INT64_C(-4414137185393506410)), 0x18, simde_mm512_set_epi64(INT64_C(-9176583453456465920), INT64_C( 7465982649455083520), INT64_C(-4954907897243893760), INT64_C( 7673069422566703104), INT64_C(-6790719338690117632), INT64_C( 3134572139001151488), INT64_C( 1398716822424911872), INT64_C(-7065366029995606016)) }, { simde_mm512_set_epi64(INT64_C(-4035691796628594440), INT64_C( 8075961177851250718), INT64_C( 4588749774816889255), INT64_C(-7793193271686306610), INT64_C(-7592356991870287735), INT64_C(-7008964827121951751), INT64_C( 1224143053779534377), INT64_C( 7669791976580784491)), 0x16, simde_mm512_set_epi64(INT64_C(-1415889878515712000), INT64_C( -447427762668437504), INT64_C(-1807976093613817856), INT64_C( 43851930488799232), INT64_C(-2112496568954257408), INT64_C(-1730213388945981440), INT64_C(-1744948453022105600), INT64_C(-3844298059735367680)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_slli_epi64(test_vec[i].a, test_vec[i].b); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_slli_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_slli_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_slli_epi64) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/sllv.c000066400000000000000000000404531400333146700164060ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN sllv #include #include static int test_simde_mm512_sllv_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[32]; const int16_t b[32]; const int16_t r[32]; } test_vec[] = { { { INT16_C( 22565), INT16_C( 7765), -INT16_C( 5967), INT16_C( 8467), INT16_C( 20064), INT16_C( 22451), INT16_C( 4892), -INT16_C( 11833), -INT16_C( 29997), INT16_C( 21077), -INT16_C( 20080), INT16_C( 23507), INT16_C( 27770), -INT16_C( 29233), -INT16_C( 7519), -INT16_C( 14708), -INT16_C( 7621), -INT16_C( 4892), -INT16_C( 2102), INT16_C( 10765), -INT16_C( 16059), INT16_C( 24961), INT16_C( 18644), -INT16_C( 22733), -INT16_C( 30510), INT16_C( 25594), -INT16_C( 12999), -INT16_C( 19522), -INT16_C( 29383), -INT16_C( 9663), -INT16_C( 12944), -INT16_C( 21600) }, { INT16_C( 16), INT16_C( 15), INT16_C( 20), INT16_C( 24), INT16_C( 4), INT16_C( 7), INT16_C( 10), INT16_C( 8), INT16_C( 27), INT16_C( 2), INT16_C( 12), INT16_C( 28), INT16_C( 1), INT16_C( 11), INT16_C( 11), INT16_C( 16), INT16_C( 20), INT16_C( 11), INT16_C( 20), INT16_C( 21), INT16_C( 7), INT16_C( 7), INT16_C( 24), INT16_C( 13), INT16_C( 3), INT16_C( 9), INT16_C( 9), INT16_C( 14), INT16_C( 28), INT16_C( 6), INT16_C( 27), INT16_C( 3) }, { INT16_C( 0), INT16_MIN, INT16_C( 0), INT16_C( 0), -INT16_C( 6656), -INT16_C( 9856), INT16_C( 28672), -INT16_C( 14592), INT16_C( 0), INT16_C( 18772), INT16_C( 0), INT16_C( 0), -INT16_C( 9996), INT16_C( 30720), INT16_C( 2048), INT16_C( 0), INT16_C( 0), INT16_C( 8192), INT16_C( 0), INT16_C( 0), -INT16_C( 23936), -INT16_C( 16256), INT16_C( 0), INT16_C( 24576), INT16_C( 18064), -INT16_C( 3072), INT16_C( 29184), INT16_MIN, INT16_C( 0), -INT16_C( 28608), INT16_C( 0), INT16_C( 23808) } }, { { -INT16_C( 18052), -INT16_C( 15751), -INT16_C( 27809), INT16_C( 11118), INT16_C( 21456), INT16_C( 24934), -INT16_C( 7911), -INT16_C( 17712), INT16_C( 21505), -INT16_C( 32253), INT16_C( 11682), -INT16_C( 1037), -INT16_C( 15342), INT16_C( 21035), -INT16_C( 7265), INT16_C( 7025), -INT16_C( 5475), -INT16_C( 802), INT16_C( 19581), INT16_C( 19752), -INT16_C( 29025), -INT16_C( 18257), INT16_C( 32623), INT16_C( 28786), INT16_C( 30163), INT16_C( 30451), -INT16_C( 6494), -INT16_C( 19343), -INT16_C( 25430), INT16_C( 18694), INT16_C( 30592), INT16_C( 7525) }, { INT16_C( 8), INT16_C( 27), INT16_C( 8), INT16_C( 5), INT16_C( 27), INT16_C( 7), INT16_C( 11), INT16_C( 5), INT16_C( 20), INT16_C( 13), INT16_C( 2), INT16_C( 6), INT16_C( 5), INT16_C( 6), INT16_C( 28), INT16_C( 0), INT16_C( 12), INT16_C( 22), INT16_C( 1), INT16_C( 15), INT16_C( 25), INT16_C( 8), INT16_C( 12), INT16_C( 29), INT16_C( 2), INT16_C( 18), INT16_C( 15), INT16_C( 26), INT16_C( 8), INT16_C( 9), INT16_C( 10), INT16_C( 9) }, { INT16_C( 31744), INT16_C( 0), INT16_C( 24320), INT16_C( 28096), INT16_C( 0), -INT16_C( 19712), -INT16_C( 14336), INT16_C( 23040), INT16_C( 0), INT16_C( 24576), -INT16_C( 18808), -INT16_C( 832), -INT16_C( 32192), -INT16_C( 30016), INT16_C( 0), INT16_C( 7025), -INT16_C( 12288), INT16_C( 0), -INT16_C( 26374), INT16_C( 0), INT16_C( 0), -INT16_C( 20736), -INT16_C( 4096), INT16_C( 0), -INT16_C( 10420), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 22016), INT16_C( 3072), INT16_C( 0), -INT16_C( 13824) } }, { { INT16_C( 10429), INT16_C( 26366), -INT16_C( 8140), INT16_C( 6879), -INT16_C( 26968), -INT16_C( 14245), -INT16_C( 13827), INT16_C( 1973), INT16_C( 4570), INT16_C( 153), INT16_C( 24464), INT16_C( 14551), -INT16_C( 8286), -INT16_C( 14464), -INT16_C( 14027), -INT16_C( 3311), INT16_C( 4081), INT16_C( 9561), INT16_C( 14575), -INT16_C( 26816), -INT16_C( 25649), -INT16_C( 13217), INT16_C( 5220), INT16_C( 16083), INT16_C( 27941), -INT16_C( 19137), INT16_C( 5836), INT16_C( 28653), INT16_C( 28150), INT16_C( 11062), INT16_C( 18486), INT16_C( 10270) }, { INT16_C( 15), INT16_C( 8), INT16_C( 17), INT16_C( 15), INT16_C( 7), INT16_C( 17), INT16_C( 3), INT16_C( 15), INT16_C( 1), INT16_C( 11), INT16_C( 3), INT16_C( 2), INT16_C( 31), INT16_C( 23), INT16_C( 12), INT16_C( 19), INT16_C( 6), INT16_C( 17), INT16_C( 24), INT16_C( 29), INT16_C( 10), INT16_C( 10), INT16_C( 8), INT16_C( 0), INT16_C( 31), INT16_C( 13), INT16_C( 4), INT16_C( 19), INT16_C( 24), INT16_C( 12), INT16_C( 8), INT16_C( 31) }, { INT16_MIN, -INT16_C( 512), INT16_C( 0), INT16_MIN, INT16_C( 21504), INT16_C( 0), INT16_C( 20456), INT16_MIN, INT16_C( 9140), -INT16_C( 14336), -INT16_C( 896), -INT16_C( 7332), INT16_C( 0), INT16_C( 0), INT16_C( 20480), INT16_C( 0), -INT16_C( 960), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 15360), INT16_C( 31744), INT16_C( 25600), INT16_C( 16083), INT16_C( 0), -INT16_C( 8192), INT16_C( 27840), INT16_C( 0), INT16_C( 0), INT16_C( 24576), INT16_C( 13824), INT16_C( 0) } }, { { -INT16_C( 5769), INT16_C( 14727), -INT16_C( 28755), -INT16_C( 20956), -INT16_C( 25628), INT16_C( 22530), -INT16_C( 12579), INT16_C( 10584), -INT16_C( 19768), -INT16_C( 9067), INT16_C( 6355), -INT16_C( 3207), -INT16_C( 10787), INT16_C( 858), INT16_C( 24088), -INT16_C( 28671), -INT16_C( 30648), -INT16_C( 2615), -INT16_C( 4841), -INT16_C( 1117), -INT16_C( 22904), INT16_C( 25939), -INT16_C( 21388), INT16_C( 15502), INT16_C( 9310), INT16_C( 12569), -INT16_C( 28100), INT16_C( 6693), INT16_C( 32616), -INT16_C( 32739), INT16_C( 7901), INT16_C( 9488) }, { INT16_C( 27), INT16_C( 23), INT16_C( 23), INT16_C( 10), INT16_C( 1), INT16_C( 27), INT16_C( 8), INT16_C( 2), INT16_C( 5), INT16_C( 20), INT16_C( 13), INT16_C( 5), INT16_C( 27), INT16_C( 25), INT16_C( 23), INT16_C( 19), INT16_C( 1), INT16_C( 11), INT16_C( 2), INT16_C( 5), INT16_C( 12), INT16_C( 27), INT16_C( 3), INT16_C( 1), INT16_C( 7), INT16_C( 0), INT16_C( 13), INT16_C( 18), INT16_C( 27), INT16_C( 8), INT16_C( 9), INT16_C( 4) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 28672), INT16_C( 14280), INT16_C( 0), -INT16_C( 8960), -INT16_C( 23200), INT16_C( 22784), INT16_C( 0), INT16_C( 24576), INT16_C( 28448), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 4240), INT16_C( 18432), -INT16_C( 19364), INT16_C( 29792), INT16_MIN, INT16_C( 0), INT16_C( 25504), INT16_C( 31004), INT16_C( 12032), INT16_C( 12569), INT16_MIN, INT16_C( 0), INT16_C( 0), INT16_C( 7424), -INT16_C( 17920), INT16_C( 20736) } }, { { INT16_C( 16724), INT16_C( 7297), INT16_C( 11351), INT16_C( 31047), INT16_C( 19340), INT16_C( 12628), INT16_C( 18020), -INT16_C( 21699), -INT16_C( 4481), INT16_C( 9907), -INT16_C( 7330), -INT16_C( 22088), INT16_C( 5308), INT16_C( 20205), -INT16_C( 11937), -INT16_C( 19340), -INT16_C( 2798), INT16_C( 27088), INT16_C( 5921), -INT16_C( 20765), INT16_C( 14179), -INT16_C( 14369), INT16_C( 7293), -INT16_C( 653), INT16_C( 9739), INT16_C( 26915), -INT16_C( 9463), -INT16_C( 15086), -INT16_C( 16), INT16_C( 20243), -INT16_C( 30511), -INT16_C( 7421) }, { INT16_C( 26), INT16_C( 19), INT16_C( 6), INT16_C( 9), INT16_C( 5), INT16_C( 28), INT16_C( 17), INT16_C( 10), INT16_C( 0), INT16_C( 22), INT16_C( 25), INT16_C( 26), INT16_C( 18), INT16_C( 19), INT16_C( 4), INT16_C( 18), INT16_C( 25), INT16_C( 28), INT16_C( 16), INT16_C( 12), INT16_C( 8), INT16_C( 30), INT16_C( 5), INT16_C( 15), INT16_C( 0), INT16_C( 2), INT16_C( 22), INT16_C( 20), INT16_C( 0), INT16_C( 11), INT16_C( 24), INT16_C( 3) }, { INT16_C( 0), INT16_C( 0), INT16_C( 5568), -INT16_C( 29184), INT16_C( 29056), INT16_C( 0), INT16_C( 0), -INT16_C( 3072), -INT16_C( 4481), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 5616), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 12288), INT16_C( 25344), INT16_C( 0), -INT16_C( 28768), INT16_MIN, INT16_C( 9739), -INT16_C( 23412), INT16_C( 0), INT16_C( 0), -INT16_C( 16), -INT16_C( 26624), INT16_C( 0), INT16_C( 6168) } }, { { INT16_C( 8851), -INT16_C( 27904), INT16_C( 12452), INT16_C( 20986), INT16_C( 18038), INT16_C( 17479), -INT16_C( 28300), -INT16_C( 22592), -INT16_C( 2921), INT16_C( 27578), -INT16_C( 24923), -INT16_C( 6898), INT16_C( 20386), -INT16_C( 13762), INT16_C( 11283), -INT16_C( 22552), -INT16_C( 5810), -INT16_C( 3526), INT16_C( 13337), -INT16_C( 28860), -INT16_C( 29830), -INT16_C( 4397), -INT16_C( 27620), -INT16_C( 19563), INT16_C( 20360), INT16_C( 11551), INT16_C( 11758), -INT16_C( 28653), INT16_C( 20860), -INT16_C( 28582), INT16_C( 17277), -INT16_C( 13513) }, { INT16_C( 14), INT16_C( 8), INT16_C( 0), INT16_C( 3), INT16_C( 21), INT16_C( 21), INT16_C( 20), INT16_C( 24), INT16_C( 15), INT16_C( 27), INT16_C( 0), INT16_C( 4), INT16_C( 25), INT16_C( 26), INT16_C( 29), INT16_C( 7), INT16_C( 11), INT16_C( 0), INT16_C( 10), INT16_C( 29), INT16_C( 5), INT16_C( 7), INT16_C( 30), INT16_C( 24), INT16_C( 30), INT16_C( 3), INT16_C( 2), INT16_C( 9), INT16_C( 30), INT16_C( 29), INT16_C( 23), INT16_C( 8) }, { -INT16_C( 16384), INT16_C( 0), INT16_C( 12452), -INT16_C( 28720), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_MIN, INT16_C( 0), -INT16_C( 24923), INT16_C( 20704), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 3072), INT16_C( 28672), -INT16_C( 3526), INT16_C( 25600), INT16_C( 0), INT16_C( 28480), INT16_C( 27008), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 26872), -INT16_C( 18504), INT16_C( 9728), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 14080) } }, { { -INT16_C( 24036), INT16_C( 31556), INT16_C( 26358), -INT16_C( 3225), -INT16_C( 363), INT16_C( 26156), INT16_C( 10994), INT16_C( 25128), -INT16_C( 13798), INT16_C( 4220), -INT16_C( 17188), -INT16_C( 18083), INT16_C( 32178), -INT16_C( 27229), -INT16_C( 14788), INT16_C( 22742), INT16_C( 6761), INT16_C( 24531), INT16_C( 14976), INT16_C( 5458), INT16_C( 32312), INT16_C( 10875), -INT16_C( 23384), -INT16_C( 15475), INT16_C( 2414), INT16_C( 19411), INT16_C( 12486), INT16_C( 30724), -INT16_C( 22611), -INT16_C( 5874), -INT16_C( 7059), -INT16_C( 10687) }, { INT16_C( 2), INT16_C( 15), INT16_C( 17), INT16_C( 16), INT16_C( 2), INT16_C( 21), INT16_C( 7), INT16_C( 4), INT16_C( 8), INT16_C( 1), INT16_C( 14), INT16_C( 4), INT16_C( 18), INT16_C( 16), INT16_C( 9), INT16_C( 15), INT16_C( 18), INT16_C( 22), INT16_C( 17), INT16_C( 4), INT16_C( 29), INT16_C( 10), INT16_C( 8), INT16_C( 14), INT16_C( 28), INT16_C( 0), INT16_C( 1), INT16_C( 13), INT16_C( 6), INT16_C( 2), INT16_C( 10), INT16_C( 28) }, { -INT16_C( 30608), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1452), INT16_C( 0), INT16_C( 30976), INT16_C( 8832), INT16_C( 6656), INT16_C( 8440), INT16_C( 0), -INT16_C( 27184), INT16_C( 0), INT16_C( 0), INT16_C( 30720), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 21792), INT16_C( 0), -INT16_C( 5120), -INT16_C( 22528), INT16_C( 16384), INT16_C( 0), INT16_C( 19411), INT16_C( 24972), INT16_MIN, -INT16_C( 5312), -INT16_C( 23496), -INT16_C( 19456), INT16_C( 0) } }, { { -INT16_C( 30492), INT16_C( 151), -INT16_C( 12011), -INT16_C( 19934), -INT16_C( 2883), -INT16_C( 6140), INT16_C( 31033), -INT16_C( 15524), -INT16_C( 8612), -INT16_C( 20028), -INT16_C( 5657), -INT16_C( 31457), INT16_C( 5148), -INT16_C( 25188), INT16_C( 11365), INT16_C( 19074), INT16_C( 6836), -INT16_C( 14006), INT16_C( 27883), -INT16_C( 22148), -INT16_C( 32671), -INT16_C( 25967), -INT16_C( 4614), INT16_C( 22109), INT16_C( 8907), -INT16_C( 19960), INT16_C( 9995), INT16_C( 10039), -INT16_C( 11205), -INT16_C( 24123), INT16_C( 18176), -INT16_C( 18965) }, { INT16_C( 6), INT16_C( 9), INT16_C( 31), INT16_C( 0), INT16_C( 16), INT16_C( 14), INT16_C( 31), INT16_C( 8), INT16_C( 26), INT16_C( 4), INT16_C( 5), INT16_C( 6), INT16_C( 2), INT16_C( 31), INT16_C( 24), INT16_C( 23), INT16_C( 6), INT16_C( 19), INT16_C( 0), INT16_C( 21), INT16_C( 7), INT16_C( 31), INT16_C( 29), INT16_C( 9), INT16_C( 6), INT16_C( 22), INT16_C( 24), INT16_C( 11), INT16_C( 24), INT16_C( 6), INT16_C( 1), INT16_C( 15) }, { INT16_C( 14592), INT16_C( 11776), INT16_C( 0), -INT16_C( 19934), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 23552), INT16_C( 0), INT16_C( 7232), INT16_C( 15584), INT16_C( 18368), INT16_C( 20592), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 21248), INT16_C( 0), INT16_C( 27883), INT16_C( 0), INT16_C( 12416), INT16_C( 0), INT16_C( 0), -INT16_C( 17920), -INT16_C( 19776), INT16_C( 0), INT16_C( 0), -INT16_C( 18432), INT16_C( 0), INT16_C( 28992), -INT16_C( 29184), INT16_MIN } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_sllv_epi16(a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_sllv_epi16) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/sqrt.c000066400000000000000000001034361400333146700164200ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur */ #define SIMDE_TEST_X86_AVX512_INSN sqrt #include #include #include static int test_simde_mm512_sqrt_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 217.83), SIMDE_FLOAT32_C( 108.94), SIMDE_FLOAT32_C( 38.33), SIMDE_FLOAT32_C( 277.15), SIMDE_FLOAT32_C( 59.82), SIMDE_FLOAT32_C( 344.99), SIMDE_FLOAT32_C( 240.56), SIMDE_FLOAT32_C( 162.50), SIMDE_FLOAT32_C( 91.12), SIMDE_FLOAT32_C( 517.25), SIMDE_FLOAT32_C( 419.27), SIMDE_FLOAT32_C( 630.58), SIMDE_FLOAT32_C( 139.48), SIMDE_FLOAT32_C( 227.23), SIMDE_FLOAT32_C( 130.66), SIMDE_FLOAT32_C( 968.78) }, { SIMDE_FLOAT32_C( 14.76), SIMDE_FLOAT32_C( 10.44), SIMDE_FLOAT32_C( 6.19), SIMDE_FLOAT32_C( 16.65), SIMDE_FLOAT32_C( 7.73), SIMDE_FLOAT32_C( 18.57), SIMDE_FLOAT32_C( 15.51), SIMDE_FLOAT32_C( 12.75), SIMDE_FLOAT32_C( 9.55), SIMDE_FLOAT32_C( 22.74), SIMDE_FLOAT32_C( 20.48), SIMDE_FLOAT32_C( 25.11), SIMDE_FLOAT32_C( 11.81), SIMDE_FLOAT32_C( 15.07), SIMDE_FLOAT32_C( 11.43), SIMDE_FLOAT32_C( 31.13) } }, { { SIMDE_FLOAT32_C( 223.24), SIMDE_FLOAT32_C( 61.22), SIMDE_FLOAT32_C( 5.71), SIMDE_FLOAT32_C( 939.37), SIMDE_FLOAT32_C( 950.58), SIMDE_FLOAT32_C( 463.21), SIMDE_FLOAT32_C( 93.23), SIMDE_FLOAT32_C( 926.17), SIMDE_FLOAT32_C( 149.54), SIMDE_FLOAT32_C( 345.84), SIMDE_FLOAT32_C( 517.84), SIMDE_FLOAT32_C( 367.13), SIMDE_FLOAT32_C( 366.95), SIMDE_FLOAT32_C( 396.36), SIMDE_FLOAT32_C( 650.42), SIMDE_FLOAT32_C( 583.12) }, { SIMDE_FLOAT32_C( 14.94), SIMDE_FLOAT32_C( 7.82), SIMDE_FLOAT32_C( 2.39), SIMDE_FLOAT32_C( 30.65), SIMDE_FLOAT32_C( 30.83), SIMDE_FLOAT32_C( 21.52), SIMDE_FLOAT32_C( 9.66), SIMDE_FLOAT32_C( 30.43), SIMDE_FLOAT32_C( 12.23), SIMDE_FLOAT32_C( 18.60), SIMDE_FLOAT32_C( 22.76), SIMDE_FLOAT32_C( 19.16), SIMDE_FLOAT32_C( 19.16), SIMDE_FLOAT32_C( 19.91), SIMDE_FLOAT32_C( 25.50), SIMDE_FLOAT32_C( 24.15) } }, { { SIMDE_FLOAT32_C( 710.05), SIMDE_FLOAT32_C( 748.28), SIMDE_FLOAT32_C( 893.06), SIMDE_FLOAT32_C( -62.84), SIMDE_FLOAT32_C( 792.96), SIMDE_FLOAT32_C( 635.10), SIMDE_FLOAT32_C( 563.04), SIMDE_FLOAT32_C( 594.48), SIMDE_FLOAT32_C( 976.52), SIMDE_FLOAT32_C( 154.93), SIMDE_FLOAT32_C( 90.22), SIMDE_FLOAT32_C( 370.25), SIMDE_FLOAT32_C( 935.93), SIMDE_FLOAT32_C( -51.25), SIMDE_FLOAT32_C( 771.97), SIMDE_FLOAT32_C( 851.63) }, { SIMDE_FLOAT32_C( 26.65), SIMDE_FLOAT32_C( 27.35), SIMDE_FLOAT32_C( 29.88), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 28.16), SIMDE_FLOAT32_C( 25.20), SIMDE_FLOAT32_C( 23.73), SIMDE_FLOAT32_C( 24.38), SIMDE_FLOAT32_C( 31.25), SIMDE_FLOAT32_C( 12.45), SIMDE_FLOAT32_C( 9.50), SIMDE_FLOAT32_C( 19.24), SIMDE_FLOAT32_C( 30.59), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 27.78), SIMDE_FLOAT32_C( 29.18) } }, { { SIMDE_FLOAT32_C( -30.75), SIMDE_FLOAT32_C( -68.78), SIMDE_FLOAT32_C( -79.81), SIMDE_FLOAT32_C( 475.72), SIMDE_FLOAT32_C( 407.95), SIMDE_FLOAT32_C( 958.53), SIMDE_FLOAT32_C( 380.76), SIMDE_FLOAT32_C( 553.07), SIMDE_FLOAT32_C( 201.21), SIMDE_FLOAT32_C( 214.86), SIMDE_FLOAT32_C( 771.54), SIMDE_FLOAT32_C( 348.19), SIMDE_FLOAT32_C( 997.59), SIMDE_FLOAT32_C( 154.92), SIMDE_FLOAT32_C( 997.20), SIMDE_FLOAT32_C( 140.62) }, { SIMDE_MATH_NANF, SIMDE_MATH_NANF, SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 21.81), SIMDE_FLOAT32_C( 20.20), SIMDE_FLOAT32_C( 30.96), SIMDE_FLOAT32_C( 19.51), SIMDE_FLOAT32_C( 23.52), SIMDE_FLOAT32_C( 14.19), SIMDE_FLOAT32_C( 14.66), SIMDE_FLOAT32_C( 27.78), SIMDE_FLOAT32_C( 18.66), SIMDE_FLOAT32_C( 31.58), SIMDE_FLOAT32_C( 12.45), SIMDE_FLOAT32_C( 31.58), SIMDE_FLOAT32_C( 11.86) } }, { { SIMDE_FLOAT32_C( 466.31), SIMDE_FLOAT32_C( 614.68), SIMDE_FLOAT32_C( 580.31), SIMDE_FLOAT32_C( 539.32), SIMDE_FLOAT32_C( 203.17), SIMDE_FLOAT32_C( 122.82), SIMDE_FLOAT32_C( 465.01), SIMDE_FLOAT32_C( 751.36), SIMDE_FLOAT32_C( 957.86), SIMDE_FLOAT32_C( 40.61), SIMDE_FLOAT32_C( 299.33), SIMDE_FLOAT32_C( 397.65), SIMDE_FLOAT32_C( 571.56), SIMDE_FLOAT32_C( 866.02), SIMDE_FLOAT32_C( 947.17), SIMDE_FLOAT32_C( 787.06) }, { SIMDE_FLOAT32_C( 21.59), SIMDE_FLOAT32_C( 24.79), SIMDE_FLOAT32_C( 24.09), SIMDE_FLOAT32_C( 23.22), SIMDE_FLOAT32_C( 14.25), SIMDE_FLOAT32_C( 11.08), SIMDE_FLOAT32_C( 21.56), SIMDE_FLOAT32_C( 27.41), SIMDE_FLOAT32_C( 30.95), SIMDE_FLOAT32_C( 6.37), SIMDE_FLOAT32_C( 17.30), SIMDE_FLOAT32_C( 19.94), SIMDE_FLOAT32_C( 23.91), SIMDE_FLOAT32_C( 29.43), SIMDE_FLOAT32_C( 30.78), SIMDE_FLOAT32_C( 28.05) } }, { { SIMDE_FLOAT32_C( 379.06), SIMDE_FLOAT32_C( 518.14), SIMDE_FLOAT32_C( 498.86), SIMDE_FLOAT32_C( -3.46), SIMDE_FLOAT32_C( -23.53), SIMDE_FLOAT32_C( 266.36), SIMDE_FLOAT32_C( 681.68), SIMDE_FLOAT32_C( 242.19), SIMDE_FLOAT32_C( 263.88), SIMDE_FLOAT32_C( 654.06), SIMDE_FLOAT32_C( 331.27), SIMDE_FLOAT32_C( 317.61), SIMDE_FLOAT32_C( 624.18), SIMDE_FLOAT32_C( 874.14), SIMDE_FLOAT32_C( 894.91), SIMDE_FLOAT32_C( 175.60) }, { SIMDE_FLOAT32_C( 19.47), SIMDE_FLOAT32_C( 22.76), SIMDE_FLOAT32_C( 22.34), SIMDE_MATH_NANF, SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 16.32), SIMDE_FLOAT32_C( 26.11), SIMDE_FLOAT32_C( 15.56), SIMDE_FLOAT32_C( 16.24), SIMDE_FLOAT32_C( 25.57), SIMDE_FLOAT32_C( 18.20), SIMDE_FLOAT32_C( 17.82), SIMDE_FLOAT32_C( 24.98), SIMDE_FLOAT32_C( 29.57), SIMDE_FLOAT32_C( 29.91), SIMDE_FLOAT32_C( 13.25) } }, { { SIMDE_FLOAT32_C( 910.44), SIMDE_FLOAT32_C( 492.48), SIMDE_FLOAT32_C( 518.91), SIMDE_FLOAT32_C( 259.60), SIMDE_FLOAT32_C( 324.91), SIMDE_FLOAT32_C( 233.97), SIMDE_FLOAT32_C( 654.12), SIMDE_FLOAT32_C( 260.58), SIMDE_FLOAT32_C( 230.74), SIMDE_FLOAT32_C( 276.07), SIMDE_FLOAT32_C( -86.08), SIMDE_FLOAT32_C( 582.99), SIMDE_FLOAT32_C( 393.66), SIMDE_FLOAT32_C( 633.68), SIMDE_FLOAT32_C( 958.09), SIMDE_FLOAT32_C( 559.47) }, { SIMDE_FLOAT32_C( 30.17), SIMDE_FLOAT32_C( 22.19), SIMDE_FLOAT32_C( 22.78), SIMDE_FLOAT32_C( 16.11), SIMDE_FLOAT32_C( 18.03), SIMDE_FLOAT32_C( 15.30), SIMDE_FLOAT32_C( 25.58), SIMDE_FLOAT32_C( 16.14), SIMDE_FLOAT32_C( 15.19), SIMDE_FLOAT32_C( 16.62), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 24.15), SIMDE_FLOAT32_C( 19.84), SIMDE_FLOAT32_C( 25.17), SIMDE_FLOAT32_C( 30.95), SIMDE_FLOAT32_C( 23.65) } }, { { SIMDE_FLOAT32_C( 421.74), SIMDE_FLOAT32_C( 223.58), SIMDE_FLOAT32_C( 636.89), SIMDE_FLOAT32_C( 288.28), SIMDE_FLOAT32_C( 38.61), SIMDE_FLOAT32_C( 936.22), SIMDE_FLOAT32_C( 31.25), SIMDE_FLOAT32_C( 215.71), SIMDE_FLOAT32_C( 498.70), SIMDE_FLOAT32_C( 630.00), SIMDE_FLOAT32_C( 370.58), SIMDE_FLOAT32_C( 365.46), SIMDE_FLOAT32_C( 300.68), SIMDE_FLOAT32_C( 498.16), SIMDE_FLOAT32_C( 559.20), SIMDE_FLOAT32_C( 547.97) }, { SIMDE_FLOAT32_C( 20.54), SIMDE_FLOAT32_C( 14.95), SIMDE_FLOAT32_C( 25.24), SIMDE_FLOAT32_C( 16.98), SIMDE_FLOAT32_C( 6.21), SIMDE_FLOAT32_C( 30.60), SIMDE_FLOAT32_C( 5.59), SIMDE_FLOAT32_C( 14.69), SIMDE_FLOAT32_C( 22.33), SIMDE_FLOAT32_C( 25.10), SIMDE_FLOAT32_C( 19.25), SIMDE_FLOAT32_C( 19.12), SIMDE_FLOAT32_C( 17.34), SIMDE_FLOAT32_C( 22.32), SIMDE_FLOAT32_C( 23.65), SIMDE_FLOAT32_C( 23.41) } }, { { SIMDE_FLOAT32_C( 482.41), SIMDE_FLOAT32_C( 904.16), SIMDE_FLOAT32_C( 301.69), SIMDE_FLOAT32_C( 497.46), SIMDE_FLOAT32_C( 869.63), SIMDE_FLOAT32_C( 866.07), SIMDE_FLOAT32_C( 86.91), SIMDE_FLOAT32_C( 705.04), SIMDE_FLOAT32_C( 534.39), SIMDE_FLOAT32_C( 480.29), SIMDE_FLOAT32_C( 152.20), SIMDE_FLOAT32_C( 7.09), SIMDE_FLOAT32_C( 89.72), SIMDE_FLOAT32_C( 938.68), SIMDE_FLOAT32_C( 472.63), SIMDE_FLOAT32_C( 431.56) }, { SIMDE_FLOAT32_C( 21.96), SIMDE_FLOAT32_C( 30.07), SIMDE_FLOAT32_C( 17.37), SIMDE_FLOAT32_C( 22.30), SIMDE_FLOAT32_C( 29.49), SIMDE_FLOAT32_C( 29.43), SIMDE_FLOAT32_C( 9.32), SIMDE_FLOAT32_C( 26.55), SIMDE_FLOAT32_C( 23.12), SIMDE_FLOAT32_C( 21.92), SIMDE_FLOAT32_C( 12.34), SIMDE_FLOAT32_C( 2.66), SIMDE_FLOAT32_C( 9.47), SIMDE_FLOAT32_C( 30.64), SIMDE_FLOAT32_C( 21.74), SIMDE_FLOAT32_C( 20.77) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_sqrt_ps(a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_sqrt_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[16]; const simde__mmask8 k; const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 437.33), SIMDE_FLOAT32_C( 830.55), SIMDE_FLOAT32_C( 885.40), SIMDE_FLOAT32_C( 946.45), SIMDE_FLOAT32_C( 740.66), SIMDE_FLOAT32_C( 515.39), SIMDE_FLOAT32_C( 501.14), SIMDE_FLOAT32_C( 807.71), SIMDE_FLOAT32_C( 988.01), SIMDE_FLOAT32_C( 854.01), SIMDE_FLOAT32_C( 302.94), SIMDE_FLOAT32_C( 510.25), SIMDE_FLOAT32_C( -3.35), SIMDE_FLOAT32_C( 705.28), SIMDE_FLOAT32_C( 895.93), SIMDE_FLOAT32_C( 944.13) }, UINT8_C( 44), { SIMDE_FLOAT32_C( -17.46), SIMDE_FLOAT32_C( 104.01), SIMDE_FLOAT32_C( -12.13), SIMDE_FLOAT32_C( 572.59), SIMDE_FLOAT32_C( 553.23), SIMDE_FLOAT32_C( 667.21), SIMDE_FLOAT32_C( 175.86), SIMDE_FLOAT32_C( 857.51), SIMDE_FLOAT32_C( 875.76), SIMDE_FLOAT32_C( 661.26), SIMDE_FLOAT32_C( 359.55), SIMDE_FLOAT32_C( 492.88), SIMDE_FLOAT32_C( 614.94), SIMDE_FLOAT32_C( 592.23), SIMDE_FLOAT32_C( 639.48), SIMDE_FLOAT32_C( 586.75) }, { SIMDE_FLOAT32_C( 437.33), SIMDE_FLOAT32_C( 830.55), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 23.93), SIMDE_FLOAT32_C( 740.66), SIMDE_FLOAT32_C( 25.83), SIMDE_FLOAT32_C( 501.14), SIMDE_FLOAT32_C( 807.71), SIMDE_FLOAT32_C( 988.01), SIMDE_FLOAT32_C( 854.01), SIMDE_FLOAT32_C( 302.94), SIMDE_FLOAT32_C( 510.25), SIMDE_FLOAT32_C( -3.35), SIMDE_FLOAT32_C( 705.28), SIMDE_FLOAT32_C( 895.93), SIMDE_FLOAT32_C( 944.13) } }, { { SIMDE_FLOAT32_C( 830.85), SIMDE_FLOAT32_C( 416.09), SIMDE_FLOAT32_C( 252.98), SIMDE_FLOAT32_C( 170.02), SIMDE_FLOAT32_C( 649.47), SIMDE_FLOAT32_C( 61.92), SIMDE_FLOAT32_C( -30.00), SIMDE_FLOAT32_C( 565.15), SIMDE_FLOAT32_C( 804.54), SIMDE_FLOAT32_C( 537.62), SIMDE_FLOAT32_C( 139.69), SIMDE_FLOAT32_C( 223.23), SIMDE_FLOAT32_C( 700.71), SIMDE_FLOAT32_C( 84.06), SIMDE_FLOAT32_C( 154.25), SIMDE_FLOAT32_C( 749.16) }, UINT8_C(186), { SIMDE_FLOAT32_C( 637.38), SIMDE_FLOAT32_C( 483.43), SIMDE_FLOAT32_C( 245.18), SIMDE_FLOAT32_C( 987.92), SIMDE_FLOAT32_C( 407.77), SIMDE_FLOAT32_C( 184.67), SIMDE_FLOAT32_C( 504.64), SIMDE_FLOAT32_C( 244.98), SIMDE_FLOAT32_C( -92.45), SIMDE_FLOAT32_C( 233.10), SIMDE_FLOAT32_C( 347.51), SIMDE_FLOAT32_C( 453.74), SIMDE_FLOAT32_C( 654.02), SIMDE_FLOAT32_C( 778.35), SIMDE_FLOAT32_C( 364.48), SIMDE_FLOAT32_C( 774.62) }, { SIMDE_FLOAT32_C( 830.85), SIMDE_FLOAT32_C( 21.99), SIMDE_FLOAT32_C( 252.98), SIMDE_FLOAT32_C( 31.43), SIMDE_FLOAT32_C( 20.19), SIMDE_FLOAT32_C( 13.59), SIMDE_FLOAT32_C( -30.00), SIMDE_FLOAT32_C( 15.65), SIMDE_FLOAT32_C( 804.54), SIMDE_FLOAT32_C( 537.62), SIMDE_FLOAT32_C( 139.69), SIMDE_FLOAT32_C( 223.23), SIMDE_FLOAT32_C( 700.71), SIMDE_FLOAT32_C( 84.06), SIMDE_FLOAT32_C( 154.25), SIMDE_FLOAT32_C( 749.16) } }, { { SIMDE_FLOAT32_C( 341.01), SIMDE_FLOAT32_C( 234.85), SIMDE_FLOAT32_C( 83.58), SIMDE_FLOAT32_C( -91.38), SIMDE_FLOAT32_C( 735.59), SIMDE_FLOAT32_C( -51.68), SIMDE_FLOAT32_C( 211.29), SIMDE_FLOAT32_C( 125.75), SIMDE_FLOAT32_C( 171.18), SIMDE_FLOAT32_C( 387.03), SIMDE_FLOAT32_C( 278.80), SIMDE_FLOAT32_C( 688.49), SIMDE_FLOAT32_C( 284.47), SIMDE_FLOAT32_C( 309.43), SIMDE_FLOAT32_C( 761.03), SIMDE_FLOAT32_C( 804.65) }, UINT8_C( 32), { SIMDE_FLOAT32_C( 348.92), SIMDE_FLOAT32_C( -22.74), SIMDE_FLOAT32_C( 451.50), SIMDE_FLOAT32_C( 370.23), SIMDE_FLOAT32_C( 582.81), SIMDE_FLOAT32_C( 734.74), SIMDE_FLOAT32_C( 42.12), SIMDE_FLOAT32_C( 353.92), SIMDE_FLOAT32_C( 504.09), SIMDE_FLOAT32_C( 977.38), SIMDE_FLOAT32_C( 328.27), SIMDE_FLOAT32_C( 482.22), SIMDE_FLOAT32_C( 737.20), SIMDE_FLOAT32_C( 630.17), SIMDE_FLOAT32_C( 265.58), SIMDE_FLOAT32_C( 661.60) }, { SIMDE_FLOAT32_C( 341.01), SIMDE_FLOAT32_C( 234.85), SIMDE_FLOAT32_C( 83.58), SIMDE_FLOAT32_C( -91.38), SIMDE_FLOAT32_C( 735.59), SIMDE_FLOAT32_C( 27.11), SIMDE_FLOAT32_C( 211.29), SIMDE_FLOAT32_C( 125.75), SIMDE_FLOAT32_C( 171.18), SIMDE_FLOAT32_C( 387.03), SIMDE_FLOAT32_C( 278.80), SIMDE_FLOAT32_C( 688.49), SIMDE_FLOAT32_C( 284.47), SIMDE_FLOAT32_C( 309.43), SIMDE_FLOAT32_C( 761.03), SIMDE_FLOAT32_C( 804.65) } }, { { SIMDE_FLOAT32_C( 525.05), SIMDE_FLOAT32_C( 166.18), SIMDE_FLOAT32_C( 952.07), SIMDE_FLOAT32_C( 664.08), SIMDE_FLOAT32_C( 409.88), SIMDE_FLOAT32_C( 422.77), SIMDE_FLOAT32_C( 381.48), SIMDE_FLOAT32_C( 505.76), SIMDE_FLOAT32_C( 441.87), SIMDE_FLOAT32_C( 222.70), SIMDE_FLOAT32_C( 519.86), SIMDE_FLOAT32_C( 854.25), SIMDE_FLOAT32_C( -46.91), SIMDE_FLOAT32_C( 81.38), SIMDE_FLOAT32_C( 328.69), SIMDE_FLOAT32_C( 977.87) }, UINT8_C(124), { SIMDE_FLOAT32_C( 332.53), SIMDE_FLOAT32_C( 706.88), SIMDE_FLOAT32_C( 312.95), SIMDE_FLOAT32_C( 533.68), SIMDE_FLOAT32_C( -71.13), SIMDE_FLOAT32_C( -10.56), SIMDE_FLOAT32_C( 585.48), SIMDE_FLOAT32_C( 449.30), SIMDE_FLOAT32_C( 860.34), SIMDE_FLOAT32_C( 80.38), SIMDE_FLOAT32_C( 990.66), SIMDE_FLOAT32_C( 203.10), SIMDE_FLOAT32_C( -25.23), SIMDE_FLOAT32_C( 283.85), SIMDE_FLOAT32_C( 906.28), SIMDE_FLOAT32_C( 992.76) }, { SIMDE_FLOAT32_C( 525.05), SIMDE_FLOAT32_C( 166.18), SIMDE_FLOAT32_C( 17.69), SIMDE_FLOAT32_C( 23.10), SIMDE_MATH_NANF, SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 24.20), SIMDE_FLOAT32_C( 505.76), SIMDE_FLOAT32_C( 441.87), SIMDE_FLOAT32_C( 222.70), SIMDE_FLOAT32_C( 519.86), SIMDE_FLOAT32_C( 854.25), SIMDE_FLOAT32_C( -46.91), SIMDE_FLOAT32_C( 81.38), SIMDE_FLOAT32_C( 328.69), SIMDE_FLOAT32_C( 977.87) } }, { { SIMDE_FLOAT32_C( 261.52), SIMDE_FLOAT32_C( 593.91), SIMDE_FLOAT32_C( 282.09), SIMDE_FLOAT32_C( 905.01), SIMDE_FLOAT32_C( 558.85), SIMDE_FLOAT32_C( 546.97), SIMDE_FLOAT32_C( 39.41), SIMDE_FLOAT32_C( 37.09), SIMDE_FLOAT32_C( 653.22), SIMDE_FLOAT32_C( 550.08), SIMDE_FLOAT32_C( 671.18), SIMDE_FLOAT32_C( 893.07), SIMDE_FLOAT32_C( 49.27), SIMDE_FLOAT32_C( 666.55), SIMDE_FLOAT32_C( 76.85), SIMDE_FLOAT32_C( 59.26) }, UINT8_C( 52), { SIMDE_FLOAT32_C( 191.70), SIMDE_FLOAT32_C( 831.43), SIMDE_FLOAT32_C( 284.20), SIMDE_FLOAT32_C( 147.82), SIMDE_FLOAT32_C( 463.91), SIMDE_FLOAT32_C( -90.80), SIMDE_FLOAT32_C( 595.96), SIMDE_FLOAT32_C( 665.44), SIMDE_FLOAT32_C( 187.07), SIMDE_FLOAT32_C( 126.37), SIMDE_FLOAT32_C( 751.70), SIMDE_FLOAT32_C( 153.73), SIMDE_FLOAT32_C( 678.31), SIMDE_FLOAT32_C( 781.00), SIMDE_FLOAT32_C( 842.34), SIMDE_FLOAT32_C( 5.66) }, { SIMDE_FLOAT32_C( 261.52), SIMDE_FLOAT32_C( 593.91), SIMDE_FLOAT32_C( 16.86), SIMDE_FLOAT32_C( 905.01), SIMDE_FLOAT32_C( 21.54), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 39.41), SIMDE_FLOAT32_C( 37.09), SIMDE_FLOAT32_C( 653.22), SIMDE_FLOAT32_C( 550.08), SIMDE_FLOAT32_C( 671.18), SIMDE_FLOAT32_C( 893.07), SIMDE_FLOAT32_C( 49.27), SIMDE_FLOAT32_C( 666.55), SIMDE_FLOAT32_C( 76.85), SIMDE_FLOAT32_C( 59.26) } }, { { SIMDE_FLOAT32_C( 370.90), SIMDE_FLOAT32_C( 934.09), SIMDE_FLOAT32_C( 929.98), SIMDE_FLOAT32_C( 111.97), SIMDE_FLOAT32_C( 630.79), SIMDE_FLOAT32_C( 778.41), SIMDE_FLOAT32_C( 263.20), SIMDE_FLOAT32_C( 298.61), SIMDE_FLOAT32_C( 360.62), SIMDE_FLOAT32_C( 832.32), SIMDE_FLOAT32_C( 957.47), SIMDE_FLOAT32_C( 168.49), SIMDE_FLOAT32_C( 294.36), SIMDE_FLOAT32_C( 406.95), SIMDE_FLOAT32_C( 757.71), SIMDE_FLOAT32_C( 992.73) }, UINT8_C( 43), { SIMDE_FLOAT32_C( 358.33), SIMDE_FLOAT32_C( 783.52), SIMDE_FLOAT32_C( 332.05), SIMDE_FLOAT32_C( 318.37), SIMDE_FLOAT32_C( 298.14), SIMDE_FLOAT32_C( 66.82), SIMDE_FLOAT32_C( 869.43), SIMDE_FLOAT32_C( 946.18), SIMDE_FLOAT32_C( 680.16), SIMDE_FLOAT32_C( 120.71), SIMDE_FLOAT32_C( 248.65), SIMDE_FLOAT32_C( -79.28), SIMDE_FLOAT32_C( 590.86), SIMDE_FLOAT32_C( 707.03), SIMDE_FLOAT32_C( 570.73), SIMDE_FLOAT32_C( 84.44) }, { SIMDE_FLOAT32_C( 18.93), SIMDE_FLOAT32_C( 27.99), SIMDE_FLOAT32_C( 929.98), SIMDE_FLOAT32_C( 17.84), SIMDE_FLOAT32_C( 630.79), SIMDE_FLOAT32_C( 8.17), SIMDE_FLOAT32_C( 263.20), SIMDE_FLOAT32_C( 298.61), SIMDE_FLOAT32_C( 360.62), SIMDE_FLOAT32_C( 832.32), SIMDE_FLOAT32_C( 957.47), SIMDE_FLOAT32_C( 168.49), SIMDE_FLOAT32_C( 294.36), SIMDE_FLOAT32_C( 406.95), SIMDE_FLOAT32_C( 757.71), SIMDE_FLOAT32_C( 992.73) } }, { { SIMDE_FLOAT32_C( -62.52), SIMDE_FLOAT32_C( 613.71), SIMDE_FLOAT32_C( 789.90), SIMDE_FLOAT32_C( 932.36), SIMDE_FLOAT32_C( 552.83), SIMDE_FLOAT32_C( 530.45), SIMDE_FLOAT32_C( 607.43), SIMDE_FLOAT32_C( 797.28), SIMDE_FLOAT32_C( 661.83), SIMDE_FLOAT32_C( -89.44), SIMDE_FLOAT32_C( 318.54), SIMDE_FLOAT32_C( 945.75), SIMDE_FLOAT32_C( -58.88), SIMDE_FLOAT32_C( 130.26), SIMDE_FLOAT32_C( 25.25), SIMDE_FLOAT32_C( 627.24) }, UINT8_C( 40), { SIMDE_FLOAT32_C( 242.87), SIMDE_FLOAT32_C( 758.11), SIMDE_FLOAT32_C( 97.46), SIMDE_FLOAT32_C( -58.70), SIMDE_FLOAT32_C( 972.97), SIMDE_FLOAT32_C( -55.48), SIMDE_FLOAT32_C( 199.83), SIMDE_FLOAT32_C( 10.44), SIMDE_FLOAT32_C( 304.24), SIMDE_FLOAT32_C( 341.28), SIMDE_FLOAT32_C( 281.04), SIMDE_FLOAT32_C( 900.87), SIMDE_FLOAT32_C( 363.46), SIMDE_FLOAT32_C( 218.41), SIMDE_FLOAT32_C( 940.41), SIMDE_FLOAT32_C( 457.09) }, { SIMDE_FLOAT32_C( -62.52), SIMDE_FLOAT32_C( 613.71), SIMDE_FLOAT32_C( 789.90), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 552.83), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 607.43), SIMDE_FLOAT32_C( 797.28), SIMDE_FLOAT32_C( 661.83), SIMDE_FLOAT32_C( -89.44), SIMDE_FLOAT32_C( 318.54), SIMDE_FLOAT32_C( 945.75), SIMDE_FLOAT32_C( -58.88), SIMDE_FLOAT32_C( 130.26), SIMDE_FLOAT32_C( 25.25), SIMDE_FLOAT32_C( 627.24) } }, { { SIMDE_FLOAT32_C( 750.99), SIMDE_FLOAT32_C( 296.52), SIMDE_FLOAT32_C( 456.51), SIMDE_FLOAT32_C( 964.82), SIMDE_FLOAT32_C( 376.02), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -19.77), SIMDE_FLOAT32_C( 808.40), SIMDE_FLOAT32_C( 462.68), SIMDE_FLOAT32_C( 106.03), SIMDE_FLOAT32_C( 864.38), SIMDE_FLOAT32_C( 846.10), SIMDE_FLOAT32_C( 539.67), SIMDE_FLOAT32_C( 599.36), SIMDE_FLOAT32_C( 551.35), SIMDE_FLOAT32_C( -77.63) }, UINT8_C( 43), { SIMDE_FLOAT32_C( 762.24), SIMDE_FLOAT32_C( 130.02), SIMDE_FLOAT32_C( 518.26), SIMDE_FLOAT32_C( 332.17), SIMDE_FLOAT32_C( 129.59), SIMDE_FLOAT32_C( 952.63), SIMDE_FLOAT32_C( 71.40), SIMDE_FLOAT32_C( 788.60), SIMDE_FLOAT32_C( 964.30), SIMDE_FLOAT32_C( 468.08), SIMDE_FLOAT32_C( 636.78), SIMDE_FLOAT32_C( 267.82), SIMDE_FLOAT32_C( 875.24), SIMDE_FLOAT32_C( 684.32), SIMDE_FLOAT32_C( 694.20), SIMDE_FLOAT32_C( 586.14) }, { SIMDE_FLOAT32_C( 27.61), SIMDE_FLOAT32_C( 11.40), SIMDE_FLOAT32_C( 456.51), SIMDE_FLOAT32_C( 18.23), SIMDE_FLOAT32_C( 376.02), SIMDE_FLOAT32_C( 30.86), SIMDE_FLOAT32_C( -19.77), SIMDE_FLOAT32_C( 808.40), SIMDE_FLOAT32_C( 462.68), SIMDE_FLOAT32_C( 106.03), SIMDE_FLOAT32_C( 864.38), SIMDE_FLOAT32_C( 846.10), SIMDE_FLOAT32_C( 539.67), SIMDE_FLOAT32_C( 599.36), SIMDE_FLOAT32_C( 551.35), SIMDE_FLOAT32_C( -77.63) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_mask_sqrt_ps(src, test_vec[i].k, a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_sqrt_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 475.48), SIMDE_FLOAT64_C( 699.80), SIMDE_FLOAT64_C( 552.88), SIMDE_FLOAT64_C( 673.91), SIMDE_FLOAT64_C( 591.26), SIMDE_FLOAT64_C( 249.70), SIMDE_FLOAT64_C( 639.44), SIMDE_FLOAT64_C( 997.04) }, { SIMDE_FLOAT64_C( 21.81), SIMDE_FLOAT64_C( 26.45), SIMDE_FLOAT64_C( 23.51), SIMDE_FLOAT64_C( 25.96), SIMDE_FLOAT64_C( 24.32), SIMDE_FLOAT64_C( 15.80), SIMDE_FLOAT64_C( 25.29), SIMDE_FLOAT64_C( 31.58) } }, { { SIMDE_FLOAT64_C( 727.89), SIMDE_FLOAT64_C( 978.25), SIMDE_FLOAT64_C( 4.89), SIMDE_FLOAT64_C( 693.62), SIMDE_FLOAT64_C( 611.57), SIMDE_FLOAT64_C( 256.31), SIMDE_FLOAT64_C( 600.18), SIMDE_FLOAT64_C( 836.50) }, { SIMDE_FLOAT64_C( 26.98), SIMDE_FLOAT64_C( 31.28), SIMDE_FLOAT64_C( 2.21), SIMDE_FLOAT64_C( 26.34), SIMDE_FLOAT64_C( 24.73), SIMDE_FLOAT64_C( 16.01), SIMDE_FLOAT64_C( 24.50), SIMDE_FLOAT64_C( 28.92) } }, { { SIMDE_FLOAT64_C( 214.90), SIMDE_FLOAT64_C( 393.95), SIMDE_FLOAT64_C( 919.26), SIMDE_FLOAT64_C( 432.55), SIMDE_FLOAT64_C( 371.71), SIMDE_FLOAT64_C( 880.26), SIMDE_FLOAT64_C( 482.63), SIMDE_FLOAT64_C( 601.65) }, { SIMDE_FLOAT64_C( 14.66), SIMDE_FLOAT64_C( 19.85), SIMDE_FLOAT64_C( 30.32), SIMDE_FLOAT64_C( 20.80), SIMDE_FLOAT64_C( 19.28), SIMDE_FLOAT64_C( 29.67), SIMDE_FLOAT64_C( 21.97), SIMDE_FLOAT64_C( 24.53) } }, { { SIMDE_FLOAT64_C( 234.49), SIMDE_FLOAT64_C( 534.45), SIMDE_FLOAT64_C( -6.91), SIMDE_FLOAT64_C( 455.17), SIMDE_FLOAT64_C( 785.50), SIMDE_FLOAT64_C( 558.55), SIMDE_FLOAT64_C( 29.83), SIMDE_FLOAT64_C( 758.42) }, { SIMDE_FLOAT64_C( 15.31), SIMDE_FLOAT64_C( 23.12), SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 21.33), SIMDE_FLOAT64_C( 28.03), SIMDE_FLOAT64_C( 23.63), SIMDE_FLOAT64_C( 5.46), SIMDE_FLOAT64_C( 27.54) } }, { { SIMDE_FLOAT64_C( 958.27), SIMDE_FLOAT64_C( 519.34), SIMDE_FLOAT64_C( 591.49), SIMDE_FLOAT64_C( 62.08), SIMDE_FLOAT64_C( 499.87), SIMDE_FLOAT64_C( 535.48), SIMDE_FLOAT64_C( 73.76), SIMDE_FLOAT64_C( 819.20) }, { SIMDE_FLOAT64_C( 30.96), SIMDE_FLOAT64_C( 22.79), SIMDE_FLOAT64_C( 24.32), SIMDE_FLOAT64_C( 7.88), SIMDE_FLOAT64_C( 22.36), SIMDE_FLOAT64_C( 23.14), SIMDE_FLOAT64_C( 8.59), SIMDE_FLOAT64_C( 28.62) } }, { { SIMDE_FLOAT64_C( 299.98), SIMDE_FLOAT64_C( 211.87), SIMDE_FLOAT64_C( 78.11), SIMDE_FLOAT64_C( 113.36), SIMDE_FLOAT64_C( 727.13), SIMDE_FLOAT64_C( 252.70), SIMDE_FLOAT64_C( 421.79), SIMDE_FLOAT64_C( 169.91) }, { SIMDE_FLOAT64_C( 17.32), SIMDE_FLOAT64_C( 14.56), SIMDE_FLOAT64_C( 8.84), SIMDE_FLOAT64_C( 10.65), SIMDE_FLOAT64_C( 26.97), SIMDE_FLOAT64_C( 15.90), SIMDE_FLOAT64_C( 20.54), SIMDE_FLOAT64_C( 13.04) } }, { { SIMDE_FLOAT64_C( 878.93), SIMDE_FLOAT64_C( 333.65), SIMDE_FLOAT64_C( 469.80), SIMDE_FLOAT64_C( 224.14), SIMDE_FLOAT64_C( 245.21), SIMDE_FLOAT64_C( 905.97), SIMDE_FLOAT64_C( 267.17), SIMDE_FLOAT64_C( 243.63) }, { SIMDE_FLOAT64_C( 29.65), SIMDE_FLOAT64_C( 18.27), SIMDE_FLOAT64_C( 21.67), SIMDE_FLOAT64_C( 14.97), SIMDE_FLOAT64_C( 15.66), SIMDE_FLOAT64_C( 30.10), SIMDE_FLOAT64_C( 16.35), SIMDE_FLOAT64_C( 15.61) } }, { { SIMDE_FLOAT64_C( 486.76), SIMDE_FLOAT64_C( 343.81), SIMDE_FLOAT64_C( 521.65), SIMDE_FLOAT64_C( 919.38), SIMDE_FLOAT64_C( 462.37), SIMDE_FLOAT64_C( 489.02), SIMDE_FLOAT64_C( 941.81), SIMDE_FLOAT64_C( 719.89) }, { SIMDE_FLOAT64_C( 22.06), SIMDE_FLOAT64_C( 18.54), SIMDE_FLOAT64_C( 22.84), SIMDE_FLOAT64_C( 30.32), SIMDE_FLOAT64_C( 21.50), SIMDE_FLOAT64_C( 22.11), SIMDE_FLOAT64_C( 30.69), SIMDE_FLOAT64_C( 26.83) } }, { { SIMDE_FLOAT64_C( 260.10), SIMDE_FLOAT64_C( 2158.90), SIMDE_FLOAT64_C( 9449.89), SIMDE_FLOAT64_C( 5861.96), SIMDE_FLOAT64_C( 394.19), SIMDE_FLOAT64_C( 3597.89), SIMDE_FLOAT64_C( 5033.33), SIMDE_FLOAT64_C( 1539.23) }, { SIMDE_FLOAT64_C( 16.13), SIMDE_FLOAT64_C( 46.46), SIMDE_FLOAT64_C( 97.21), SIMDE_FLOAT64_C( 76.56), SIMDE_FLOAT64_C( 19.85), SIMDE_FLOAT64_C( 59.98), SIMDE_FLOAT64_C( 70.95), SIMDE_FLOAT64_C( 39.23) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_sqrt_pd(a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_sqrt_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 436.97), SIMDE_FLOAT64_C( 398.19), SIMDE_FLOAT64_C( 907.60), SIMDE_FLOAT64_C( 61.33), SIMDE_FLOAT64_C( 912.86), SIMDE_FLOAT64_C( 540.33), SIMDE_FLOAT64_C( 579.42), SIMDE_FLOAT64_C( 990.91) }, UINT8_C( 17), { SIMDE_FLOAT64_C( 499.36), SIMDE_FLOAT64_C( 607.00), SIMDE_FLOAT64_C( 184.57), SIMDE_FLOAT64_C( -79.89), SIMDE_FLOAT64_C( 246.08), SIMDE_FLOAT64_C( 684.81), SIMDE_FLOAT64_C( 154.65), SIMDE_FLOAT64_C( 825.63) }, { SIMDE_FLOAT64_C( 22.35), SIMDE_FLOAT64_C( 398.19), SIMDE_FLOAT64_C( 907.60), SIMDE_FLOAT64_C( 61.33), SIMDE_FLOAT64_C( 15.69), SIMDE_FLOAT64_C( 540.33), SIMDE_FLOAT64_C( 579.42), SIMDE_FLOAT64_C( 990.91) } }, { { SIMDE_FLOAT64_C( 735.36), SIMDE_FLOAT64_C( 411.96), SIMDE_FLOAT64_C( 273.29), SIMDE_FLOAT64_C( 443.97), SIMDE_FLOAT64_C( 379.78), SIMDE_FLOAT64_C( 504.36), SIMDE_FLOAT64_C( 13.17), SIMDE_FLOAT64_C( 95.38) }, UINT8_C(184), { SIMDE_FLOAT64_C( 913.65), SIMDE_FLOAT64_C( 567.81), SIMDE_FLOAT64_C( 431.31), SIMDE_FLOAT64_C( 891.24), SIMDE_FLOAT64_C( 236.76), SIMDE_FLOAT64_C( 364.35), SIMDE_FLOAT64_C( 850.12), SIMDE_FLOAT64_C( 890.20) }, { SIMDE_FLOAT64_C( 735.36), SIMDE_FLOAT64_C( 411.96), SIMDE_FLOAT64_C( 273.29), SIMDE_FLOAT64_C( 29.85), SIMDE_FLOAT64_C( 15.39), SIMDE_FLOAT64_C( 19.09), SIMDE_FLOAT64_C( 13.17), SIMDE_FLOAT64_C( 29.84) } }, { { SIMDE_FLOAT64_C( 218.79), SIMDE_FLOAT64_C( 849.62), SIMDE_FLOAT64_C( 238.02), SIMDE_FLOAT64_C( 635.35), SIMDE_FLOAT64_C( 466.14), SIMDE_FLOAT64_C( -6.77), SIMDE_FLOAT64_C( 423.69), SIMDE_FLOAT64_C( 491.52) }, UINT8_C( 45), { SIMDE_FLOAT64_C( 263.35), SIMDE_FLOAT64_C( 539.75), SIMDE_FLOAT64_C( 722.58), SIMDE_FLOAT64_C( 197.33), SIMDE_FLOAT64_C( 953.96), SIMDE_FLOAT64_C( 549.94), SIMDE_FLOAT64_C( 504.50), SIMDE_FLOAT64_C( 168.47) }, { SIMDE_FLOAT64_C( 16.23), SIMDE_FLOAT64_C( 849.62), SIMDE_FLOAT64_C( 26.88), SIMDE_FLOAT64_C( 14.05), SIMDE_FLOAT64_C( 466.14), SIMDE_FLOAT64_C( 23.45), SIMDE_FLOAT64_C( 423.69), SIMDE_FLOAT64_C( 491.52) } }, { { SIMDE_FLOAT64_C( 937.73), SIMDE_FLOAT64_C( 521.55), SIMDE_FLOAT64_C( 689.62), SIMDE_FLOAT64_C( 704.92), SIMDE_FLOAT64_C( 375.20), SIMDE_FLOAT64_C( 562.70), SIMDE_FLOAT64_C( 460.68), SIMDE_FLOAT64_C( 702.26) }, UINT8_C( 99), { SIMDE_FLOAT64_C( 247.65), SIMDE_FLOAT64_C( 799.76), SIMDE_FLOAT64_C( 469.26), SIMDE_FLOAT64_C( 689.97), SIMDE_FLOAT64_C( 966.92), SIMDE_FLOAT64_C( 496.27), SIMDE_FLOAT64_C( 437.90), SIMDE_FLOAT64_C( 542.79) }, { SIMDE_FLOAT64_C( 15.74), SIMDE_FLOAT64_C( 28.28), SIMDE_FLOAT64_C( 689.62), SIMDE_FLOAT64_C( 704.92), SIMDE_FLOAT64_C( 375.20), SIMDE_FLOAT64_C( 22.28), SIMDE_FLOAT64_C( 20.93), SIMDE_FLOAT64_C( 702.26) } }, { { SIMDE_FLOAT64_C( 239.42), SIMDE_FLOAT64_C( 982.54), SIMDE_FLOAT64_C( 153.70), SIMDE_FLOAT64_C( 223.51), SIMDE_FLOAT64_C( 914.90), SIMDE_FLOAT64_C( 712.58), SIMDE_FLOAT64_C( 479.58), SIMDE_FLOAT64_C( 340.52) }, UINT8_C(110), { SIMDE_FLOAT64_C( 629.70), SIMDE_FLOAT64_C( 40.18), SIMDE_FLOAT64_C( 773.21), SIMDE_FLOAT64_C( 826.47), SIMDE_FLOAT64_C( 650.68), SIMDE_FLOAT64_C( 597.70), SIMDE_FLOAT64_C( 99.58), SIMDE_FLOAT64_C( 419.32) }, { SIMDE_FLOAT64_C( 239.42), SIMDE_FLOAT64_C( 6.34), SIMDE_FLOAT64_C( 27.81), SIMDE_FLOAT64_C( 28.75), SIMDE_FLOAT64_C( 914.90), SIMDE_FLOAT64_C( 24.45), SIMDE_FLOAT64_C( 9.98), SIMDE_FLOAT64_C( 340.52) } }, { { SIMDE_FLOAT64_C( 659.09), SIMDE_FLOAT64_C( 166.50), SIMDE_FLOAT64_C( 866.73), SIMDE_FLOAT64_C( 759.19), SIMDE_FLOAT64_C( 667.51), SIMDE_FLOAT64_C( 836.84), SIMDE_FLOAT64_C( 988.98), SIMDE_FLOAT64_C( 546.59) }, UINT8_C(105), { SIMDE_FLOAT64_C( 223.00), SIMDE_FLOAT64_C( -16.57), SIMDE_FLOAT64_C( 745.33), SIMDE_FLOAT64_C( 813.45), SIMDE_FLOAT64_C( 615.39), SIMDE_FLOAT64_C( 490.76), SIMDE_FLOAT64_C( 133.77), SIMDE_FLOAT64_C( 749.00) }, { SIMDE_FLOAT64_C( 14.93), SIMDE_FLOAT64_C( 166.50), SIMDE_FLOAT64_C( 866.73), SIMDE_FLOAT64_C( 28.52), SIMDE_FLOAT64_C( 667.51), SIMDE_FLOAT64_C( 22.15), SIMDE_FLOAT64_C( 11.57), SIMDE_FLOAT64_C( 546.59) } }, { { SIMDE_FLOAT64_C( 910.80), SIMDE_FLOAT64_C( 67.30), SIMDE_FLOAT64_C( 90.00), SIMDE_FLOAT64_C( 999.85), SIMDE_FLOAT64_C( 617.26), SIMDE_FLOAT64_C( 51.15), SIMDE_FLOAT64_C( 319.01), SIMDE_FLOAT64_C( 38.40) }, UINT8_C(223), { SIMDE_FLOAT64_C( 305.09), SIMDE_FLOAT64_C( 369.65), SIMDE_FLOAT64_C( 856.43), SIMDE_FLOAT64_C( 297.17), SIMDE_FLOAT64_C( 331.69), SIMDE_FLOAT64_C( 827.02), SIMDE_FLOAT64_C( -88.21), SIMDE_FLOAT64_C( -20.46) }, { SIMDE_FLOAT64_C( 17.47), SIMDE_FLOAT64_C( 19.23), SIMDE_FLOAT64_C( 29.26), SIMDE_FLOAT64_C( 17.24), SIMDE_FLOAT64_C( 18.21), SIMDE_FLOAT64_C( 51.15), SIMDE_MATH_NAN, SIMDE_MATH_NAN } }, { { SIMDE_FLOAT64_C( 151.41), SIMDE_FLOAT64_C( 225.20), SIMDE_FLOAT64_C( 805.52), SIMDE_FLOAT64_C( 450.20), SIMDE_FLOAT64_C( 464.68), SIMDE_FLOAT64_C( 748.22), SIMDE_FLOAT64_C( -32.49), SIMDE_FLOAT64_C( 491.49) }, UINT8_C( 9), { SIMDE_FLOAT64_C( 616.36), SIMDE_FLOAT64_C( 262.82), SIMDE_FLOAT64_C( 503.00), SIMDE_FLOAT64_C( 184.91), SIMDE_FLOAT64_C( 81.40), SIMDE_FLOAT64_C( 665.20), SIMDE_FLOAT64_C( 481.84), SIMDE_FLOAT64_C( -81.41) }, { SIMDE_FLOAT64_C( 24.83), SIMDE_FLOAT64_C( 225.20), SIMDE_FLOAT64_C( 805.52), SIMDE_FLOAT64_C( 13.60), SIMDE_FLOAT64_C( 464.68), SIMDE_FLOAT64_C( 748.22), SIMDE_FLOAT64_C( -32.49), SIMDE_FLOAT64_C( 491.49) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_mask_sqrt_pd(src, test_vec[i].k, a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_sqrt_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_sqrt_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_sqrt_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_sqrt_pd) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/sra.c000066400000000000000000000301571400333146700162130ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN sra #include #include static int test_simde_mm512_sra_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[32]; const int64_t b[2]; const int16_t r[32]; } test_vec[] = { { { INT16_C( 11061), INT16_C( 2909), INT16_C( 7771), -INT16_C( 15451), INT16_C( 10536), INT16_C( 16880), INT16_C( 9564), INT16_C( 7363), INT16_C( 22530), -INT16_C( 8028), INT16_C( 1724), -INT16_C( 32593), -INT16_C( 10410), INT16_C( 1671), INT16_C( 28692), INT16_C( 18993), -INT16_C( 29029), -INT16_C( 2475), -INT16_C( 1107), -INT16_C( 10822), -INT16_C( 21980), -INT16_C( 32490), -INT16_C( 9777), -INT16_C( 11619), INT16_C( 16689), -INT16_C( 4686), INT16_C( 25159), -INT16_C( 25235), -INT16_C( 2759), INT16_C( 19876), -INT16_C( 10907), INT16_C( 407) }, { INT64_C( 0), INT64_C( 0) }, { INT16_C( 11061), INT16_C( 2909), INT16_C( 7771), -INT16_C( 15451), INT16_C( 10536), INT16_C( 16880), INT16_C( 9564), INT16_C( 7363), INT16_C( 22530), -INT16_C( 8028), INT16_C( 1724), -INT16_C( 32593), -INT16_C( 10410), INT16_C( 1671), INT16_C( 28692), INT16_C( 18993), -INT16_C( 29029), -INT16_C( 2475), -INT16_C( 1107), -INT16_C( 10822), -INT16_C( 21980), -INT16_C( 32490), -INT16_C( 9777), -INT16_C( 11619), INT16_C( 16689), -INT16_C( 4686), INT16_C( 25159), -INT16_C( 25235), -INT16_C( 2759), INT16_C( 19876), -INT16_C( 10907), INT16_C( 407) } }, { { -INT16_C( 20629), -INT16_C( 19467), INT16_C( 25361), INT16_C( 19024), -INT16_C( 2984), -INT16_C( 17000), INT16_C( 12234), INT16_C( 11966), -INT16_C( 18916), INT16_C( 1087), INT16_C( 9575), -INT16_C( 15599), -INT16_C( 25054), -INT16_C( 1554), -INT16_C( 5175), INT16_C( 13313), -INT16_C( 2406), -INT16_C( 21273), INT16_C( 14425), -INT16_C( 19978), -INT16_C( 29140), -INT16_C( 2449), INT16_C( 11710), -INT16_C( 9692), INT16_C( 25571), INT16_C( 19423), -INT16_C( 3959), -INT16_C( 21746), -INT16_C( 882), INT16_C( 22436), -INT16_C( 23065), -INT16_C( 32372) }, { INT64_C( 13), INT64_C( 2) }, { -INT16_C( 3), -INT16_C( 3), INT16_C( 3), INT16_C( 2), -INT16_C( 1), -INT16_C( 3), INT16_C( 1), INT16_C( 1), -INT16_C( 3), INT16_C( 0), INT16_C( 1), -INT16_C( 2), -INT16_C( 4), -INT16_C( 1), -INT16_C( 1), INT16_C( 1), -INT16_C( 1), -INT16_C( 3), INT16_C( 1), -INT16_C( 3), -INT16_C( 4), -INT16_C( 1), INT16_C( 1), -INT16_C( 2), INT16_C( 3), INT16_C( 2), -INT16_C( 1), -INT16_C( 3), -INT16_C( 1), INT16_C( 2), -INT16_C( 3), -INT16_C( 4) } }, { { INT16_C( 10838), -INT16_C( 8334), -INT16_C( 32742), -INT16_C( 22389), INT16_C( 12156), INT16_C( 25344), -INT16_C( 29483), INT16_C( 29156), INT16_C( 4863), -INT16_C( 21658), INT16_C( 3382), -INT16_C( 6013), INT16_C( 20771), INT16_C( 26457), -INT16_C( 23484), -INT16_C( 25714), INT16_C( 206), -INT16_C( 6022), INT16_C( 1408), -INT16_C( 880), -INT16_C( 28619), INT16_C( 2655), INT16_C( 17180), INT16_C( 7291), -INT16_C( 7851), -INT16_C( 29753), INT16_C( 19183), INT16_C( 4724), -INT16_C( 12901), -INT16_C( 8071), INT16_C( 1905), INT16_C( 16251) }, { INT64_C( 3), INT64_C( 7) }, { INT16_C( 1354), -INT16_C( 1042), -INT16_C( 4093), -INT16_C( 2799), INT16_C( 1519), INT16_C( 3168), -INT16_C( 3686), INT16_C( 3644), INT16_C( 607), -INT16_C( 2708), INT16_C( 422), -INT16_C( 752), INT16_C( 2596), INT16_C( 3307), -INT16_C( 2936), -INT16_C( 3215), INT16_C( 25), -INT16_C( 753), INT16_C( 176), -INT16_C( 110), -INT16_C( 3578), INT16_C( 331), INT16_C( 2147), INT16_C( 911), -INT16_C( 982), -INT16_C( 3720), INT16_C( 2397), INT16_C( 590), -INT16_C( 1613), -INT16_C( 1009), INT16_C( 238), INT16_C( 2031) } }, { { INT16_C( 18326), -INT16_C( 31481), INT16_C( 31633), INT16_C( 11672), INT16_C( 4424), -INT16_C( 18163), -INT16_C( 30695), INT16_C( 8440), INT16_C( 8061), INT16_C( 30888), INT16_C( 11222), INT16_C( 7848), -INT16_C( 7666), INT16_C( 13443), INT16_C( 919), INT16_C( 11951), -INT16_C( 18869), -INT16_C( 9037), INT16_C( 19249), INT16_C( 30985), INT16_C( 5725), INT16_C( 30258), INT16_C( 10910), INT16_C( 7318), INT16_C( 15945), INT16_C( 8340), INT16_C( 15722), INT16_C( 30782), -INT16_C( 16097), -INT16_C( 18516), INT16_C( 23493), INT16_C( 4325) }, { INT64_C( 4), INT64_C( 6) }, { INT16_C( 1145), -INT16_C( 1968), INT16_C( 1977), INT16_C( 729), INT16_C( 276), -INT16_C( 1136), -INT16_C( 1919), INT16_C( 527), INT16_C( 503), INT16_C( 1930), INT16_C( 701), INT16_C( 490), -INT16_C( 480), INT16_C( 840), INT16_C( 57), INT16_C( 746), -INT16_C( 1180), -INT16_C( 565), INT16_C( 1203), INT16_C( 1936), INT16_C( 357), INT16_C( 1891), INT16_C( 681), INT16_C( 457), INT16_C( 996), INT16_C( 521), INT16_C( 982), INT16_C( 1923), -INT16_C( 1007), -INT16_C( 1158), INT16_C( 1468), INT16_C( 270) } }, { { INT16_C( 23436), -INT16_C( 2429), -INT16_C( 15720), -INT16_C( 18322), INT16_C( 6787), INT16_C( 18543), INT16_C( 21621), -INT16_C( 30888), INT16_C( 17900), -INT16_C( 12085), -INT16_C( 30661), INT16_C( 18193), -INT16_C( 14217), -INT16_C( 28174), -INT16_C( 18154), -INT16_C( 23819), INT16_C( 30741), -INT16_C( 21096), INT16_C( 1594), -INT16_C( 16795), -INT16_C( 11232), -INT16_C( 27386), INT16_C( 24360), INT16_C( 5405), -INT16_C( 5980), -INT16_C( 8219), -INT16_C( 2192), -INT16_C( 6362), INT16_C( 6591), -INT16_C( 10887), INT16_C( 28370), -INT16_C( 6281) }, { INT64_C( 3), INT64_C( 14) }, { INT16_C( 2929), -INT16_C( 304), -INT16_C( 1965), -INT16_C( 2291), INT16_C( 848), INT16_C( 2317), INT16_C( 2702), -INT16_C( 3861), INT16_C( 2237), -INT16_C( 1511), -INT16_C( 3833), INT16_C( 2274), -INT16_C( 1778), -INT16_C( 3522), -INT16_C( 2270), -INT16_C( 2978), INT16_C( 3842), -INT16_C( 2637), INT16_C( 199), -INT16_C( 2100), -INT16_C( 1404), -INT16_C( 3424), INT16_C( 3045), INT16_C( 675), -INT16_C( 748), -INT16_C( 1028), -INT16_C( 274), -INT16_C( 796), INT16_C( 823), -INT16_C( 1361), INT16_C( 3546), -INT16_C( 786) } }, { { -INT16_C( 3376), INT16_C( 16583), -INT16_C( 4375), -INT16_C( 22489), -INT16_C( 24569), -INT16_C( 9858), -INT16_C( 2802), -INT16_C( 2623), INT16_C( 22021), INT16_C( 6678), -INT16_C( 2736), INT16_C( 8016), INT16_C( 7130), INT16_C( 7959), INT16_C( 8963), -INT16_C( 11513), -INT16_C( 12523), -INT16_C( 493), INT16_C( 15037), -INT16_C( 15193), INT16_C( 9691), -INT16_C( 5731), INT16_C( 24090), INT16_C( 8158), -INT16_C( 2892), INT16_C( 1338), -INT16_C( 29975), -INT16_C( 15324), INT16_C( 15269), -INT16_C( 22301), -INT16_C( 5537), INT16_C( 29819) }, { INT64_C( 10), INT64_C( 14) }, { -INT16_C( 4), INT16_C( 16), -INT16_C( 5), -INT16_C( 22), -INT16_C( 24), -INT16_C( 10), -INT16_C( 3), -INT16_C( 3), INT16_C( 21), INT16_C( 6), -INT16_C( 3), INT16_C( 7), INT16_C( 6), INT16_C( 7), INT16_C( 8), -INT16_C( 12), -INT16_C( 13), -INT16_C( 1), INT16_C( 14), -INT16_C( 15), INT16_C( 9), -INT16_C( 6), INT16_C( 23), INT16_C( 7), -INT16_C( 3), INT16_C( 1), -INT16_C( 30), -INT16_C( 15), INT16_C( 14), -INT16_C( 22), -INT16_C( 6), INT16_C( 29) } }, { { -INT16_C( 19616), INT16_C( 18928), INT16_C( 5181), -INT16_C( 7667), -INT16_C( 4016), -INT16_C( 20598), INT16_C( 1499), -INT16_C( 27613), -INT16_C( 26989), INT16_C( 23307), INT16_C( 17840), -INT16_C( 4097), -INT16_C( 29667), INT16_C( 21577), -INT16_C( 15625), INT16_C( 22335), INT16_C( 12149), -INT16_C( 19807), -INT16_C( 20925), -INT16_C( 27756), INT16_C( 7839), INT16_C( 31298), INT16_C( 26147), -INT16_C( 18930), INT16_C( 6652), -INT16_C( 21231), INT16_C( 4191), INT16_C( 31900), -INT16_C( 6756), -INT16_C( 27440), INT16_C( 4007), INT16_C( 7403) }, { INT64_C( 13), INT64_C( 11) }, { -INT16_C( 3), INT16_C( 2), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), -INT16_C( 3), INT16_C( 0), -INT16_C( 4), -INT16_C( 4), INT16_C( 2), INT16_C( 2), -INT16_C( 1), -INT16_C( 4), INT16_C( 2), -INT16_C( 2), INT16_C( 2), INT16_C( 1), -INT16_C( 3), -INT16_C( 3), -INT16_C( 4), INT16_C( 0), INT16_C( 3), INT16_C( 3), -INT16_C( 3), INT16_C( 0), -INT16_C( 3), INT16_C( 0), INT16_C( 3), -INT16_C( 1), -INT16_C( 4), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 27516), -INT16_C( 9369), INT16_C( 1147), INT16_C( 6231), INT16_C( 10473), -INT16_C( 28244), -INT16_C( 26825), INT16_C( 30381), INT16_C( 31780), INT16_C( 24568), INT16_C( 3550), INT16_C( 24377), -INT16_C( 29339), INT16_C( 8962), INT16_C( 23791), INT16_C( 27614), INT16_C( 17863), INT16_C( 16966), -INT16_C( 25015), INT16_C( 13146), INT16_C( 1734), -INT16_C( 572), INT16_C( 29086), -INT16_C( 15757), INT16_C( 27629), -INT16_C( 13279), INT16_C( 23161), -INT16_C( 8661), INT16_C( 11751), -INT16_C( 10750), -INT16_C( 8055), INT16_C( 20546) }, { INT64_C( 14), INT64_C( 12) }, { INT16_C( 1), -INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 2), -INT16_C( 2), INT16_C( 1), INT16_C( 1), INT16_C( 1), INT16_C( 0), INT16_C( 1), -INT16_C( 2), INT16_C( 0), INT16_C( 1), INT16_C( 1), INT16_C( 1), INT16_C( 1), -INT16_C( 2), INT16_C( 0), INT16_C( 0), -INT16_C( 1), INT16_C( 1), -INT16_C( 1), INT16_C( 1), -INT16_C( 1), INT16_C( 1), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), INT16_C( 1) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_sra_epi16(a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_sra_epi16) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/srai.c000066400000000000000000000762521400333146700163720ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN srai #include #include static int test_simde_mm512_srai_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[32]; const int16_t r0[32]; const int16_t r3[32]; const int16_t r7[32]; const int16_t r13[32]; const int16_t r24[32]; } test_vec[] = { { { -INT16_C( 2725), INT16_C( 6711), INT16_C( 7327), INT16_C( 11963), INT16_C( 28148), INT16_C( 5058), INT16_C( 21695), -INT16_C( 19668), -INT16_C( 11147), INT16_C( 27930), -INT16_C( 5129), -INT16_C( 26938), -INT16_C( 23608), INT16_C( 22277), INT16_C( 10373), -INT16_C( 8091), -INT16_C( 25571), -INT16_C( 17158), -INT16_C( 19015), -INT16_C( 21013), -INT16_C( 21214), -INT16_C( 7488), -INT16_C( 5119), INT16_C( 30357), -INT16_C( 20543), -INT16_C( 18205), -INT16_C( 21861), INT16_C( 25422), INT16_C( 21325), -INT16_C( 11590), INT16_C( 8315), -INT16_C( 26446) }, { -INT16_C( 2725), INT16_C( 6711), INT16_C( 7327), INT16_C( 11963), INT16_C( 28148), INT16_C( 5058), INT16_C( 21695), -INT16_C( 19668), -INT16_C( 11147), INT16_C( 27930), -INT16_C( 5129), -INT16_C( 26938), -INT16_C( 23608), INT16_C( 22277), INT16_C( 10373), -INT16_C( 8091), -INT16_C( 25571), -INT16_C( 17158), -INT16_C( 19015), -INT16_C( 21013), -INT16_C( 21214), -INT16_C( 7488), -INT16_C( 5119), INT16_C( 30357), -INT16_C( 20543), -INT16_C( 18205), -INT16_C( 21861), INT16_C( 25422), INT16_C( 21325), -INT16_C( 11590), INT16_C( 8315), -INT16_C( 26446) }, { -INT16_C( 341), INT16_C( 838), INT16_C( 915), INT16_C( 1495), INT16_C( 3518), INT16_C( 632), INT16_C( 2711), -INT16_C( 2459), -INT16_C( 1394), INT16_C( 3491), -INT16_C( 642), -INT16_C( 3368), -INT16_C( 2951), INT16_C( 2784), INT16_C( 1296), -INT16_C( 1012), -INT16_C( 3197), -INT16_C( 2145), -INT16_C( 2377), -INT16_C( 2627), -INT16_C( 2652), -INT16_C( 936), -INT16_C( 640), INT16_C( 3794), -INT16_C( 2568), -INT16_C( 2276), -INT16_C( 2733), INT16_C( 3177), INT16_C( 2665), -INT16_C( 1449), INT16_C( 1039), -INT16_C( 3306) }, { -INT16_C( 22), INT16_C( 52), INT16_C( 57), INT16_C( 93), INT16_C( 219), INT16_C( 39), INT16_C( 169), -INT16_C( 154), -INT16_C( 88), INT16_C( 218), -INT16_C( 41), -INT16_C( 211), -INT16_C( 185), INT16_C( 174), INT16_C( 81), -INT16_C( 64), -INT16_C( 200), -INT16_C( 135), -INT16_C( 149), -INT16_C( 165), -INT16_C( 166), -INT16_C( 59), -INT16_C( 40), INT16_C( 237), -INT16_C( 161), -INT16_C( 143), -INT16_C( 171), INT16_C( 198), INT16_C( 166), -INT16_C( 91), INT16_C( 64), -INT16_C( 207) }, { -INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 3), INT16_C( 0), INT16_C( 2), -INT16_C( 3), -INT16_C( 2), INT16_C( 3), -INT16_C( 1), -INT16_C( 4), -INT16_C( 3), INT16_C( 2), INT16_C( 1), -INT16_C( 1), -INT16_C( 4), -INT16_C( 3), -INT16_C( 3), -INT16_C( 3), -INT16_C( 3), -INT16_C( 1), -INT16_C( 1), INT16_C( 3), -INT16_C( 3), -INT16_C( 3), -INT16_C( 3), INT16_C( 3), INT16_C( 2), -INT16_C( 2), INT16_C( 1), -INT16_C( 4) }, { -INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), INT16_C( 0), -INT16_C( 1), INT16_C( 0), -INT16_C( 1) } }, { { -INT16_C( 21316), INT16_C( 30036), INT16_C( 16225), -INT16_C( 31710), -INT16_C( 7444), -INT16_C( 4762), -INT16_C( 1073), -INT16_C( 28572), INT16_C( 18347), INT16_C( 17992), -INT16_C( 26895), INT16_C( 16041), INT16_C( 25833), INT16_C( 25616), -INT16_C( 15740), INT16_C( 16636), INT16_C( 20590), -INT16_C( 12106), -INT16_C( 10096), INT16_C( 31828), -INT16_C( 17733), -INT16_C( 30102), -INT16_C( 12619), INT16_C( 24602), INT16_C( 25109), INT16_C( 1958), INT16_C( 20728), -INT16_C( 7867), INT16_C( 22196), INT16_C( 14405), INT16_C( 16664), -INT16_C( 30856) }, { -INT16_C( 21316), INT16_C( 30036), INT16_C( 16225), -INT16_C( 31710), -INT16_C( 7444), -INT16_C( 4762), -INT16_C( 1073), -INT16_C( 28572), INT16_C( 18347), INT16_C( 17992), -INT16_C( 26895), INT16_C( 16041), INT16_C( 25833), INT16_C( 25616), -INT16_C( 15740), INT16_C( 16636), INT16_C( 20590), -INT16_C( 12106), -INT16_C( 10096), INT16_C( 31828), -INT16_C( 17733), -INT16_C( 30102), -INT16_C( 12619), INT16_C( 24602), INT16_C( 25109), INT16_C( 1958), INT16_C( 20728), -INT16_C( 7867), INT16_C( 22196), INT16_C( 14405), INT16_C( 16664), -INT16_C( 30856) }, { -INT16_C( 2665), INT16_C( 3754), INT16_C( 2028), -INT16_C( 3964), -INT16_C( 931), -INT16_C( 596), -INT16_C( 135), -INT16_C( 3572), INT16_C( 2293), INT16_C( 2249), -INT16_C( 3362), INT16_C( 2005), INT16_C( 3229), INT16_C( 3202), -INT16_C( 1968), INT16_C( 2079), INT16_C( 2573), -INT16_C( 1514), -INT16_C( 1262), INT16_C( 3978), -INT16_C( 2217), -INT16_C( 3763), -INT16_C( 1578), INT16_C( 3075), INT16_C( 3138), INT16_C( 244), INT16_C( 2591), -INT16_C( 984), INT16_C( 2774), INT16_C( 1800), INT16_C( 2083), -INT16_C( 3857) }, { -INT16_C( 167), INT16_C( 234), INT16_C( 126), -INT16_C( 248), -INT16_C( 59), -INT16_C( 38), -INT16_C( 9), -INT16_C( 224), INT16_C( 143), INT16_C( 140), -INT16_C( 211), INT16_C( 125), INT16_C( 201), INT16_C( 200), -INT16_C( 123), INT16_C( 129), INT16_C( 160), -INT16_C( 95), -INT16_C( 79), INT16_C( 248), -INT16_C( 139), -INT16_C( 236), -INT16_C( 99), INT16_C( 192), INT16_C( 196), INT16_C( 15), INT16_C( 161), -INT16_C( 62), INT16_C( 173), INT16_C( 112), INT16_C( 130), -INT16_C( 242) }, { -INT16_C( 3), INT16_C( 3), INT16_C( 1), -INT16_C( 4), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 4), INT16_C( 2), INT16_C( 2), -INT16_C( 4), INT16_C( 1), INT16_C( 3), INT16_C( 3), -INT16_C( 2), INT16_C( 2), INT16_C( 2), -INT16_C( 2), -INT16_C( 2), INT16_C( 3), -INT16_C( 3), -INT16_C( 4), -INT16_C( 2), INT16_C( 3), INT16_C( 3), INT16_C( 0), INT16_C( 2), -INT16_C( 1), INT16_C( 2), INT16_C( 1), INT16_C( 2), -INT16_C( 4) }, { -INT16_C( 1), INT16_C( 0), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1) } }, { { INT16_C( 11921), INT16_C( 8535), -INT16_C( 21753), -INT16_C( 15714), INT16_C( 2149), INT16_C( 6732), INT16_C( 26326), -INT16_C( 5253), INT16_C( 8648), -INT16_C( 16142), INT16_C( 14449), INT16_C( 9633), -INT16_C( 6514), -INT16_C( 22947), -INT16_C( 10713), -INT16_C( 18387), -INT16_C( 31740), INT16_C( 3034), INT16_C( 30767), -INT16_C( 27443), INT16_C( 6528), INT16_C( 22191), INT16_C( 10879), INT16_C( 18241), INT16_C( 13387), -INT16_C( 17145), -INT16_C( 22420), -INT16_C( 1310), INT16_C( 16526), -INT16_C( 19040), -INT16_C( 12778), INT16_C( 6766) }, { INT16_C( 11921), INT16_C( 8535), -INT16_C( 21753), -INT16_C( 15714), INT16_C( 2149), INT16_C( 6732), INT16_C( 26326), -INT16_C( 5253), INT16_C( 8648), -INT16_C( 16142), INT16_C( 14449), INT16_C( 9633), -INT16_C( 6514), -INT16_C( 22947), -INT16_C( 10713), -INT16_C( 18387), -INT16_C( 31740), INT16_C( 3034), INT16_C( 30767), -INT16_C( 27443), INT16_C( 6528), INT16_C( 22191), INT16_C( 10879), INT16_C( 18241), INT16_C( 13387), -INT16_C( 17145), -INT16_C( 22420), -INT16_C( 1310), INT16_C( 16526), -INT16_C( 19040), -INT16_C( 12778), INT16_C( 6766) }, { INT16_C( 1490), INT16_C( 1066), -INT16_C( 2720), -INT16_C( 1965), INT16_C( 268), INT16_C( 841), INT16_C( 3290), -INT16_C( 657), INT16_C( 1081), -INT16_C( 2018), INT16_C( 1806), INT16_C( 1204), -INT16_C( 815), -INT16_C( 2869), -INT16_C( 1340), -INT16_C( 2299), -INT16_C( 3968), INT16_C( 379), INT16_C( 3845), -INT16_C( 3431), INT16_C( 816), INT16_C( 2773), INT16_C( 1359), INT16_C( 2280), INT16_C( 1673), -INT16_C( 2144), -INT16_C( 2803), -INT16_C( 164), INT16_C( 2065), -INT16_C( 2380), -INT16_C( 1598), INT16_C( 845) }, { INT16_C( 93), INT16_C( 66), -INT16_C( 170), -INT16_C( 123), INT16_C( 16), INT16_C( 52), INT16_C( 205), -INT16_C( 42), INT16_C( 67), -INT16_C( 127), INT16_C( 112), INT16_C( 75), -INT16_C( 51), -INT16_C( 180), -INT16_C( 84), -INT16_C( 144), -INT16_C( 248), INT16_C( 23), INT16_C( 240), -INT16_C( 215), INT16_C( 51), INT16_C( 173), INT16_C( 84), INT16_C( 142), INT16_C( 104), -INT16_C( 134), -INT16_C( 176), -INT16_C( 11), INT16_C( 129), -INT16_C( 149), -INT16_C( 100), INT16_C( 52) }, { INT16_C( 1), INT16_C( 1), -INT16_C( 3), -INT16_C( 2), INT16_C( 0), INT16_C( 0), INT16_C( 3), -INT16_C( 1), INT16_C( 1), -INT16_C( 2), INT16_C( 1), INT16_C( 1), -INT16_C( 1), -INT16_C( 3), -INT16_C( 2), -INT16_C( 3), -INT16_C( 4), INT16_C( 0), INT16_C( 3), -INT16_C( 4), INT16_C( 0), INT16_C( 2), INT16_C( 1), INT16_C( 2), INT16_C( 1), -INT16_C( 3), -INT16_C( 3), -INT16_C( 1), INT16_C( 2), -INT16_C( 3), -INT16_C( 2), INT16_C( 0) }, { INT16_C( 0), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), INT16_C( 0) } }, { { INT16_C( 18514), -INT16_C( 32218), -INT16_C( 3136), INT16_C( 16406), -INT16_C( 15091), -INT16_C( 29546), -INT16_C( 10257), INT16_C( 15316), -INT16_C( 9461), INT16_C( 30712), -INT16_C( 9596), INT16_C( 4721), INT16_C( 4634), INT16_C( 12488), INT16_C( 14048), INT16_C( 12875), INT16_C( 29054), INT16_C( 16052), -INT16_C( 13468), INT16_C( 29054), INT16_C( 5264), -INT16_C( 32514), -INT16_C( 11541), -INT16_C( 2117), -INT16_C( 19539), INT16_C( 12654), -INT16_C( 8051), -INT16_C( 22460), INT16_C( 3314), -INT16_C( 11560), INT16_C( 9026), -INT16_C( 16380) }, { INT16_C( 18514), -INT16_C( 32218), -INT16_C( 3136), INT16_C( 16406), -INT16_C( 15091), -INT16_C( 29546), -INT16_C( 10257), INT16_C( 15316), -INT16_C( 9461), INT16_C( 30712), -INT16_C( 9596), INT16_C( 4721), INT16_C( 4634), INT16_C( 12488), INT16_C( 14048), INT16_C( 12875), INT16_C( 29054), INT16_C( 16052), -INT16_C( 13468), INT16_C( 29054), INT16_C( 5264), -INT16_C( 32514), -INT16_C( 11541), -INT16_C( 2117), -INT16_C( 19539), INT16_C( 12654), -INT16_C( 8051), -INT16_C( 22460), INT16_C( 3314), -INT16_C( 11560), INT16_C( 9026), -INT16_C( 16380) }, { INT16_C( 2314), -INT16_C( 4028), -INT16_C( 392), INT16_C( 2050), -INT16_C( 1887), -INT16_C( 3694), -INT16_C( 1283), INT16_C( 1914), -INT16_C( 1183), INT16_C( 3839), -INT16_C( 1200), INT16_C( 590), INT16_C( 579), INT16_C( 1561), INT16_C( 1756), INT16_C( 1609), INT16_C( 3631), INT16_C( 2006), -INT16_C( 1684), INT16_C( 3631), INT16_C( 658), -INT16_C( 4065), -INT16_C( 1443), -INT16_C( 265), -INT16_C( 2443), INT16_C( 1581), -INT16_C( 1007), -INT16_C( 2808), INT16_C( 414), -INT16_C( 1445), INT16_C( 1128), -INT16_C( 2048) }, { INT16_C( 144), -INT16_C( 252), -INT16_C( 25), INT16_C( 128), -INT16_C( 118), -INT16_C( 231), -INT16_C( 81), INT16_C( 119), -INT16_C( 74), INT16_C( 239), -INT16_C( 75), INT16_C( 36), INT16_C( 36), INT16_C( 97), INT16_C( 109), INT16_C( 100), INT16_C( 226), INT16_C( 125), -INT16_C( 106), INT16_C( 226), INT16_C( 41), -INT16_C( 255), -INT16_C( 91), -INT16_C( 17), -INT16_C( 153), INT16_C( 98), -INT16_C( 63), -INT16_C( 176), INT16_C( 25), -INT16_C( 91), INT16_C( 70), -INT16_C( 128) }, { INT16_C( 2), -INT16_C( 4), -INT16_C( 1), INT16_C( 2), -INT16_C( 2), -INT16_C( 4), -INT16_C( 2), INT16_C( 1), -INT16_C( 2), INT16_C( 3), -INT16_C( 2), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 1), INT16_C( 1), INT16_C( 3), INT16_C( 1), -INT16_C( 2), INT16_C( 3), INT16_C( 0), -INT16_C( 4), -INT16_C( 2), -INT16_C( 1), -INT16_C( 3), INT16_C( 1), -INT16_C( 1), -INT16_C( 3), INT16_C( 0), -INT16_C( 2), INT16_C( 1), -INT16_C( 2) }, { INT16_C( 0), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), INT16_C( 0), -INT16_C( 1) } }, { { -INT16_C( 18028), -INT16_C( 1538), INT16_C( 31876), INT16_C( 5226), INT16_C( 26768), INT16_C( 31636), INT16_C( 20282), -INT16_C( 6030), -INT16_C( 7934), -INT16_C( 28647), INT16_C( 24001), -INT16_C( 19656), INT16_C( 4201), -INT16_C( 21627), -INT16_C( 30412), -INT16_C( 14229), INT16_C( 26946), -INT16_C( 14655), INT16_C( 11493), INT16_C( 30171), INT16_C( 28564), -INT16_C( 12303), INT16_C( 25535), -INT16_C( 15945), -INT16_C( 12220), INT16_C( 1361), -INT16_C( 30418), -INT16_C( 26696), INT16_C( 15770), -INT16_C( 12733), -INT16_C( 20793), INT16_C( 2454) }, { -INT16_C( 18028), -INT16_C( 1538), INT16_C( 31876), INT16_C( 5226), INT16_C( 26768), INT16_C( 31636), INT16_C( 20282), -INT16_C( 6030), -INT16_C( 7934), -INT16_C( 28647), INT16_C( 24001), -INT16_C( 19656), INT16_C( 4201), -INT16_C( 21627), -INT16_C( 30412), -INT16_C( 14229), INT16_C( 26946), -INT16_C( 14655), INT16_C( 11493), INT16_C( 30171), INT16_C( 28564), -INT16_C( 12303), INT16_C( 25535), -INT16_C( 15945), -INT16_C( 12220), INT16_C( 1361), -INT16_C( 30418), -INT16_C( 26696), INT16_C( 15770), -INT16_C( 12733), -INT16_C( 20793), INT16_C( 2454) }, { -INT16_C( 2254), -INT16_C( 193), INT16_C( 3984), INT16_C( 653), INT16_C( 3346), INT16_C( 3954), INT16_C( 2535), -INT16_C( 754), -INT16_C( 992), -INT16_C( 3581), INT16_C( 3000), -INT16_C( 2457), INT16_C( 525), -INT16_C( 2704), -INT16_C( 3802), -INT16_C( 1779), INT16_C( 3368), -INT16_C( 1832), INT16_C( 1436), INT16_C( 3771), INT16_C( 3570), -INT16_C( 1538), INT16_C( 3191), -INT16_C( 1994), -INT16_C( 1528), INT16_C( 170), -INT16_C( 3803), -INT16_C( 3337), INT16_C( 1971), -INT16_C( 1592), -INT16_C( 2600), INT16_C( 306) }, { -INT16_C( 141), -INT16_C( 13), INT16_C( 249), INT16_C( 40), INT16_C( 209), INT16_C( 247), INT16_C( 158), -INT16_C( 48), -INT16_C( 62), -INT16_C( 224), INT16_C( 187), -INT16_C( 154), INT16_C( 32), -INT16_C( 169), -INT16_C( 238), -INT16_C( 112), INT16_C( 210), -INT16_C( 115), INT16_C( 89), INT16_C( 235), INT16_C( 223), -INT16_C( 97), INT16_C( 199), -INT16_C( 125), -INT16_C( 96), INT16_C( 10), -INT16_C( 238), -INT16_C( 209), INT16_C( 123), -INT16_C( 100), -INT16_C( 163), INT16_C( 19) }, { -INT16_C( 3), -INT16_C( 1), INT16_C( 3), INT16_C( 0), INT16_C( 3), INT16_C( 3), INT16_C( 2), -INT16_C( 1), -INT16_C( 1), -INT16_C( 4), INT16_C( 2), -INT16_C( 3), INT16_C( 0), -INT16_C( 3), -INT16_C( 4), -INT16_C( 2), INT16_C( 3), -INT16_C( 2), INT16_C( 1), INT16_C( 3), INT16_C( 3), -INT16_C( 2), INT16_C( 3), -INT16_C( 2), -INT16_C( 2), INT16_C( 0), -INT16_C( 4), -INT16_C( 4), INT16_C( 1), -INT16_C( 2), -INT16_C( 3), INT16_C( 0) }, { -INT16_C( 1), -INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), INT16_C( 0) } }, { { INT16_C( 22552), -INT16_C( 560), -INT16_C( 21628), INT16_C( 6259), INT16_C( 25626), -INT16_C( 9753), -INT16_C( 24889), INT16_C( 3227), -INT16_C( 5009), -INT16_C( 25327), -INT16_C( 13706), INT16_C( 4148), INT16_C( 30471), -INT16_C( 12578), INT16_C( 29734), INT16_C( 16088), -INT16_C( 22324), INT16_C( 20539), -INT16_C( 20909), INT16_C( 28009), INT16_C( 20498), -INT16_C( 9657), -INT16_C( 7441), INT16_C( 24294), -INT16_C( 2098), INT16_C( 17659), INT16_C( 12225), -INT16_C( 13996), INT16_C( 12967), -INT16_C( 12905), INT16_C( 28583), INT16_C( 29451) }, { INT16_C( 22552), -INT16_C( 560), -INT16_C( 21628), INT16_C( 6259), INT16_C( 25626), -INT16_C( 9753), -INT16_C( 24889), INT16_C( 3227), -INT16_C( 5009), -INT16_C( 25327), -INT16_C( 13706), INT16_C( 4148), INT16_C( 30471), -INT16_C( 12578), INT16_C( 29734), INT16_C( 16088), -INT16_C( 22324), INT16_C( 20539), -INT16_C( 20909), INT16_C( 28009), INT16_C( 20498), -INT16_C( 9657), -INT16_C( 7441), INT16_C( 24294), -INT16_C( 2098), INT16_C( 17659), INT16_C( 12225), -INT16_C( 13996), INT16_C( 12967), -INT16_C( 12905), INT16_C( 28583), INT16_C( 29451) }, { INT16_C( 2819), -INT16_C( 70), -INT16_C( 2704), INT16_C( 782), INT16_C( 3203), -INT16_C( 1220), -INT16_C( 3112), INT16_C( 403), -INT16_C( 627), -INT16_C( 3166), -INT16_C( 1714), INT16_C( 518), INT16_C( 3808), -INT16_C( 1573), INT16_C( 3716), INT16_C( 2011), -INT16_C( 2791), INT16_C( 2567), -INT16_C( 2614), INT16_C( 3501), INT16_C( 2562), -INT16_C( 1208), -INT16_C( 931), INT16_C( 3036), -INT16_C( 263), INT16_C( 2207), INT16_C( 1528), -INT16_C( 1750), INT16_C( 1620), -INT16_C( 1614), INT16_C( 3572), INT16_C( 3681) }, { INT16_C( 176), -INT16_C( 5), -INT16_C( 169), INT16_C( 48), INT16_C( 200), -INT16_C( 77), -INT16_C( 195), INT16_C( 25), -INT16_C( 40), -INT16_C( 198), -INT16_C( 108), INT16_C( 32), INT16_C( 238), -INT16_C( 99), INT16_C( 232), INT16_C( 125), -INT16_C( 175), INT16_C( 160), -INT16_C( 164), INT16_C( 218), INT16_C( 160), -INT16_C( 76), -INT16_C( 59), INT16_C( 189), -INT16_C( 17), INT16_C( 137), INT16_C( 95), -INT16_C( 110), INT16_C( 101), -INT16_C( 101), INT16_C( 223), INT16_C( 230) }, { INT16_C( 2), -INT16_C( 1), -INT16_C( 3), INT16_C( 0), INT16_C( 3), -INT16_C( 2), -INT16_C( 4), INT16_C( 0), -INT16_C( 1), -INT16_C( 4), -INT16_C( 2), INT16_C( 0), INT16_C( 3), -INT16_C( 2), INT16_C( 3), INT16_C( 1), -INT16_C( 3), INT16_C( 2), -INT16_C( 3), INT16_C( 3), INT16_C( 2), -INT16_C( 2), -INT16_C( 1), INT16_C( 2), -INT16_C( 1), INT16_C( 2), INT16_C( 1), -INT16_C( 2), INT16_C( 1), -INT16_C( 2), INT16_C( 3), INT16_C( 3) }, { INT16_C( 0), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 17943), INT16_C( 27332), INT16_C( 11765), INT16_C( 2008), INT16_C( 8061), INT16_C( 27873), -INT16_C( 14591), -INT16_C( 12342), -INT16_C( 14913), -INT16_C( 32748), INT16_C( 26869), -INT16_C( 25527), -INT16_C( 7781), INT16_C( 17001), INT16_C( 29776), INT16_C( 26805), INT16_C( 31162), -INT16_C( 20526), -INT16_C( 21850), INT16_C( 9399), -INT16_C( 26423), -INT16_C( 13680), INT16_C( 23392), INT16_C( 8090), -INT16_C( 20960), INT16_C( 5535), -INT16_C( 5866), -INT16_C( 20047), INT16_C( 6858), INT16_C( 6899), -INT16_C( 22130), INT16_C( 18818) }, { INT16_C( 17943), INT16_C( 27332), INT16_C( 11765), INT16_C( 2008), INT16_C( 8061), INT16_C( 27873), -INT16_C( 14591), -INT16_C( 12342), -INT16_C( 14913), -INT16_C( 32748), INT16_C( 26869), -INT16_C( 25527), -INT16_C( 7781), INT16_C( 17001), INT16_C( 29776), INT16_C( 26805), INT16_C( 31162), -INT16_C( 20526), -INT16_C( 21850), INT16_C( 9399), -INT16_C( 26423), -INT16_C( 13680), INT16_C( 23392), INT16_C( 8090), -INT16_C( 20960), INT16_C( 5535), -INT16_C( 5866), -INT16_C( 20047), INT16_C( 6858), INT16_C( 6899), -INT16_C( 22130), INT16_C( 18818) }, { INT16_C( 2242), INT16_C( 3416), INT16_C( 1470), INT16_C( 251), INT16_C( 1007), INT16_C( 3484), -INT16_C( 1824), -INT16_C( 1543), -INT16_C( 1865), -INT16_C( 4094), INT16_C( 3358), -INT16_C( 3191), -INT16_C( 973), INT16_C( 2125), INT16_C( 3722), INT16_C( 3350), INT16_C( 3895), -INT16_C( 2566), -INT16_C( 2732), INT16_C( 1174), -INT16_C( 3303), -INT16_C( 1710), INT16_C( 2924), INT16_C( 1011), -INT16_C( 2620), INT16_C( 691), -INT16_C( 734), -INT16_C( 2506), INT16_C( 857), INT16_C( 862), -INT16_C( 2767), INT16_C( 2352) }, { INT16_C( 140), INT16_C( 213), INT16_C( 91), INT16_C( 15), INT16_C( 62), INT16_C( 217), -INT16_C( 114), -INT16_C( 97), -INT16_C( 117), -INT16_C( 256), INT16_C( 209), -INT16_C( 200), -INT16_C( 61), INT16_C( 132), INT16_C( 232), INT16_C( 209), INT16_C( 243), -INT16_C( 161), -INT16_C( 171), INT16_C( 73), -INT16_C( 207), -INT16_C( 107), INT16_C( 182), INT16_C( 63), -INT16_C( 164), INT16_C( 43), -INT16_C( 46), -INT16_C( 157), INT16_C( 53), INT16_C( 53), -INT16_C( 173), INT16_C( 147) }, { INT16_C( 2), INT16_C( 3), INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 3), -INT16_C( 2), -INT16_C( 2), -INT16_C( 2), -INT16_C( 4), INT16_C( 3), -INT16_C( 4), -INT16_C( 1), INT16_C( 2), INT16_C( 3), INT16_C( 3), INT16_C( 3), -INT16_C( 3), -INT16_C( 3), INT16_C( 1), -INT16_C( 4), -INT16_C( 2), INT16_C( 2), INT16_C( 0), -INT16_C( 3), INT16_C( 0), -INT16_C( 1), -INT16_C( 3), INT16_C( 0), INT16_C( 0), -INT16_C( 3), INT16_C( 2) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), INT16_C( 0), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), INT16_C( 0), -INT16_C( 1), INT16_C( 0) } }, { { INT16_C( 21794), -INT16_C( 13832), -INT16_C( 20481), -INT16_C( 13843), INT16_C( 32072), -INT16_C( 22381), INT16_C( 11736), -INT16_C( 1593), INT16_C( 26331), -INT16_C( 3570), -INT16_C( 16305), INT16_C( 6563), -INT16_C( 26662), INT16_C( 26932), -INT16_C( 18880), INT16_C( 25266), -INT16_C( 22005), INT16_C( 2859), INT16_C( 6234), -INT16_C( 23852), INT16_C( 26518), INT16_C( 28234), INT16_C( 4501), INT16_C( 28775), INT16_C( 30327), -INT16_C( 14494), INT16_C( 1590), INT16_C( 4320), INT16_C( 5277), -INT16_C( 8839), INT16_C( 11211), -INT16_C( 10689) }, { INT16_C( 21794), -INT16_C( 13832), -INT16_C( 20481), -INT16_C( 13843), INT16_C( 32072), -INT16_C( 22381), INT16_C( 11736), -INT16_C( 1593), INT16_C( 26331), -INT16_C( 3570), -INT16_C( 16305), INT16_C( 6563), -INT16_C( 26662), INT16_C( 26932), -INT16_C( 18880), INT16_C( 25266), -INT16_C( 22005), INT16_C( 2859), INT16_C( 6234), -INT16_C( 23852), INT16_C( 26518), INT16_C( 28234), INT16_C( 4501), INT16_C( 28775), INT16_C( 30327), -INT16_C( 14494), INT16_C( 1590), INT16_C( 4320), INT16_C( 5277), -INT16_C( 8839), INT16_C( 11211), -INT16_C( 10689) }, { INT16_C( 2724), -INT16_C( 1729), -INT16_C( 2561), -INT16_C( 1731), INT16_C( 4009), -INT16_C( 2798), INT16_C( 1467), -INT16_C( 200), INT16_C( 3291), -INT16_C( 447), -INT16_C( 2039), INT16_C( 820), -INT16_C( 3333), INT16_C( 3366), -INT16_C( 2360), INT16_C( 3158), -INT16_C( 2751), INT16_C( 357), INT16_C( 779), -INT16_C( 2982), INT16_C( 3314), INT16_C( 3529), INT16_C( 562), INT16_C( 3596), INT16_C( 3790), -INT16_C( 1812), INT16_C( 198), INT16_C( 540), INT16_C( 659), -INT16_C( 1105), INT16_C( 1401), -INT16_C( 1337) }, { INT16_C( 170), -INT16_C( 109), -INT16_C( 161), -INT16_C( 109), INT16_C( 250), -INT16_C( 175), INT16_C( 91), -INT16_C( 13), INT16_C( 205), -INT16_C( 28), -INT16_C( 128), INT16_C( 51), -INT16_C( 209), INT16_C( 210), -INT16_C( 148), INT16_C( 197), -INT16_C( 172), INT16_C( 22), INT16_C( 48), -INT16_C( 187), INT16_C( 207), INT16_C( 220), INT16_C( 35), INT16_C( 224), INT16_C( 236), -INT16_C( 114), INT16_C( 12), INT16_C( 33), INT16_C( 41), -INT16_C( 70), INT16_C( 87), -INT16_C( 84) }, { INT16_C( 2), -INT16_C( 2), -INT16_C( 3), -INT16_C( 2), INT16_C( 3), -INT16_C( 3), INT16_C( 1), -INT16_C( 1), INT16_C( 3), -INT16_C( 1), -INT16_C( 2), INT16_C( 0), -INT16_C( 4), INT16_C( 3), -INT16_C( 3), INT16_C( 3), -INT16_C( 3), INT16_C( 0), INT16_C( 0), -INT16_C( 3), INT16_C( 3), INT16_C( 3), INT16_C( 0), INT16_C( 3), INT16_C( 3), -INT16_C( 2), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 2), INT16_C( 1), -INT16_C( 2) }, { INT16_C( 0), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1), INT16_C( 0), -INT16_C( 1) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i r0 = simde_mm512_srai_epi16(a, 0); simde__m512i r3 = simde_mm512_srai_epi16(a, 3); simde__m512i r7 = simde_mm512_srai_epi16(a, 7); simde__m512i r13 = simde_mm512_srai_epi16(a, 13); simde__m512i r24 = simde_mm512_srai_epi16(a, 24); simde_test_x86_assert_equal_i16x32(r0, simde_mm512_loadu_epi16(test_vec[i].r0)); simde_test_x86_assert_equal_i16x32(r3, simde_mm512_loadu_epi16(test_vec[i].r3)); simde_test_x86_assert_equal_i16x32(r7, simde_mm512_loadu_epi16(test_vec[i].r7)); simde_test_x86_assert_equal_i16x32(r13, simde_mm512_loadu_epi16(test_vec[i].r13)); simde_test_x86_assert_equal_i16x32(r24, simde_mm512_loadu_epi16(test_vec[i].r24)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_srai_epi16) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/srav.c000066400000000000000000000404661400333146700164050ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN srav #include #include static int test_simde_mm512_srav_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[32]; const int16_t b[32]; const int16_t r[32]; } test_vec[] = { { { -INT16_C( 26126), INT16_C( 9315), INT16_C( 20615), -INT16_C( 25676), INT16_C( 25129), -INT16_C( 13781), -INT16_C( 22935), -INT16_C( 22311), -INT16_C( 968), INT16_C( 28073), -INT16_C( 1215), INT16_C( 1561), -INT16_C( 5797), INT16_C( 15623), -INT16_C( 24120), -INT16_C( 17815), -INT16_C( 13254), -INT16_C( 15649), -INT16_C( 27875), INT16_C( 18013), -INT16_C( 30475), INT16_C( 24080), -INT16_C( 5841), INT16_C( 26374), -INT16_C( 20250), INT16_C( 10196), -INT16_C( 4693), INT16_C( 1581), INT16_C( 13782), -INT16_C( 24765), -INT16_C( 21290), INT16_C( 4185) }, { INT16_C( 3), INT16_C( 9), INT16_C( 2), INT16_C( 12), INT16_C( 14), INT16_C( 14), INT16_C( 2), INT16_C( 11), INT16_C( 2), INT16_C( 8), INT16_C( 0), INT16_C( 14), INT16_C( 12), INT16_C( 1), INT16_C( 13), INT16_C( 14), INT16_C( 15), INT16_C( 14), INT16_C( 5), INT16_C( 14), INT16_C( 12), INT16_C( 2), INT16_C( 1), INT16_C( 12), INT16_C( 11), INT16_C( 4), INT16_C( 12), INT16_C( 1), INT16_C( 11), INT16_C( 0), INT16_C( 5), INT16_C( 11) }, { -INT16_C( 3266), INT16_C( 18), INT16_C( 5153), -INT16_C( 7), INT16_C( 1), -INT16_C( 1), -INT16_C( 5734), -INT16_C( 11), -INT16_C( 242), INT16_C( 109), -INT16_C( 1215), INT16_C( 0), -INT16_C( 2), INT16_C( 7811), -INT16_C( 3), -INT16_C( 2), -INT16_C( 1), -INT16_C( 1), -INT16_C( 872), INT16_C( 1), -INT16_C( 8), INT16_C( 6020), -INT16_C( 2921), INT16_C( 6), -INT16_C( 10), INT16_C( 637), -INT16_C( 2), INT16_C( 790), INT16_C( 6), -INT16_C( 24765), -INT16_C( 666), INT16_C( 2) } }, { { INT16_C( 31316), -INT16_C( 32362), INT16_C( 15066), INT16_C( 9574), INT16_C( 13054), -INT16_C( 6075), INT16_C( 8268), -INT16_C( 30040), -INT16_C( 5667), -INT16_C( 22056), INT16_C( 3505), INT16_C( 17338), -INT16_C( 6456), INT16_C( 25163), INT16_C( 16956), -INT16_C( 28651), -INT16_C( 21572), -INT16_C( 27119), INT16_C( 30693), -INT16_C( 6981), INT16_C( 169), -INT16_C( 2356), INT16_C( 29984), -INT16_C( 640), INT16_C( 22622), INT16_C( 4263), INT16_C( 24933), INT16_C( 11603), -INT16_C( 24761), -INT16_C( 31601), -INT16_C( 23327), -INT16_C( 25324) }, { INT16_C( 2), INT16_C( 3), INT16_C( 14), INT16_C( 4), INT16_C( 14), INT16_C( 1), INT16_C( 11), INT16_C( 11), INT16_C( 11), INT16_C( 7), INT16_C( 1), INT16_C( 5), INT16_C( 3), INT16_C( 9), INT16_C( 15), INT16_C( 2), INT16_C( 6), INT16_C( 11), INT16_C( 7), INT16_C( 4), INT16_C( 3), INT16_C( 11), INT16_C( 6), INT16_C( 1), INT16_C( 3), INT16_C( 3), INT16_C( 3), INT16_C( 1), INT16_C( 7), INT16_C( 4), INT16_C( 14), INT16_C( 8) }, { INT16_C( 7829), -INT16_C( 4046), INT16_C( 0), INT16_C( 598), INT16_C( 0), -INT16_C( 3038), INT16_C( 4), -INT16_C( 15), -INT16_C( 3), -INT16_C( 173), INT16_C( 1752), INT16_C( 541), -INT16_C( 807), INT16_C( 49), INT16_C( 0), -INT16_C( 7163), -INT16_C( 338), -INT16_C( 14), INT16_C( 239), -INT16_C( 437), INT16_C( 21), -INT16_C( 2), INT16_C( 468), -INT16_C( 320), INT16_C( 2827), INT16_C( 532), INT16_C( 3116), INT16_C( 5801), -INT16_C( 194), -INT16_C( 1976), -INT16_C( 2), -INT16_C( 99) } }, { { -INT16_C( 13225), -INT16_C( 19388), INT16_C( 17733), -INT16_C( 23806), INT16_C( 24707), INT16_C( 32347), -INT16_C( 12852), -INT16_C( 4722), INT16_C( 6151), INT16_C( 24100), -INT16_C( 17847), -INT16_C( 20111), INT16_C( 8241), -INT16_C( 24589), INT16_C( 24585), INT16_C( 24618), INT16_C( 28205), INT16_C( 29204), INT16_C( 5812), INT16_C( 14101), INT16_C( 28790), INT16_C( 17078), INT16_C( 17469), INT16_C( 17711), INT16_C( 21597), -INT16_C( 22877), INT16_C( 5134), INT16_C( 16215), INT16_C( 19252), INT16_C( 15839), INT16_C( 2475), -INT16_C( 10083) }, { INT16_C( 11), INT16_C( 2), INT16_C( 5), INT16_C( 3), INT16_C( 1), INT16_C( 0), INT16_C( 11), INT16_C( 11), INT16_C( 15), INT16_C( 1), INT16_C( 11), INT16_C( 3), INT16_C( 3), INT16_C( 10), INT16_C( 1), INT16_C( 11), INT16_C( 13), INT16_C( 9), INT16_C( 4), INT16_C( 0), INT16_C( 5), INT16_C( 11), INT16_C( 6), INT16_C( 0), INT16_C( 13), INT16_C( 6), INT16_C( 7), INT16_C( 8), INT16_C( 1), INT16_C( 13), INT16_C( 12), INT16_C( 0) }, { -INT16_C( 7), -INT16_C( 4847), INT16_C( 554), -INT16_C( 2976), INT16_C( 12353), INT16_C( 32347), -INT16_C( 7), -INT16_C( 3), INT16_C( 0), INT16_C( 12050), -INT16_C( 9), -INT16_C( 2514), INT16_C( 1030), -INT16_C( 25), INT16_C( 12292), INT16_C( 12), INT16_C( 3), INT16_C( 57), INT16_C( 363), INT16_C( 14101), INT16_C( 899), INT16_C( 8), INT16_C( 272), INT16_C( 17711), INT16_C( 2), -INT16_C( 358), INT16_C( 40), INT16_C( 63), INT16_C( 9626), INT16_C( 1), INT16_C( 0), -INT16_C( 10083) } }, { { INT16_C( 27799), -INT16_C( 14184), INT16_C( 27564), INT16_C( 1738), -INT16_C( 9792), -INT16_C( 14659), INT16_C( 11834), -INT16_C( 27951), -INT16_C( 4351), -INT16_C( 29452), -INT16_C( 27296), INT16_C( 538), INT16_C( 22706), -INT16_C( 5410), INT16_C( 27933), -INT16_C( 19219), -INT16_C( 31271), -INT16_C( 31364), INT16_C( 18161), -INT16_C( 20085), INT16_C( 18463), INT16_C( 23160), INT16_C( 18807), INT16_C( 30956), -INT16_C( 8135), -INT16_C( 26364), INT16_C( 7797), INT16_C( 10139), INT16_C( 31094), -INT16_C( 27887), -INT16_C( 26), -INT16_C( 16569) }, { INT16_C( 12), INT16_C( 7), INT16_C( 13), INT16_C( 2), INT16_C( 9), INT16_C( 9), INT16_C( 7), INT16_C( 2), INT16_C( 0), INT16_C( 12), INT16_C( 5), INT16_C( 10), INT16_C( 15), INT16_C( 11), INT16_C( 7), INT16_C( 8), INT16_C( 11), INT16_C( 4), INT16_C( 1), INT16_C( 10), INT16_C( 15), INT16_C( 10), INT16_C( 3), INT16_C( 11), INT16_C( 8), INT16_C( 7), INT16_C( 6), INT16_C( 10), INT16_C( 4), INT16_C( 6), INT16_C( 13), INT16_C( 0) }, { INT16_C( 6), -INT16_C( 111), INT16_C( 3), INT16_C( 434), -INT16_C( 20), -INT16_C( 29), INT16_C( 92), -INT16_C( 6988), -INT16_C( 4351), -INT16_C( 8), -INT16_C( 853), INT16_C( 0), INT16_C( 0), -INT16_C( 3), INT16_C( 218), -INT16_C( 76), -INT16_C( 16), -INT16_C( 1961), INT16_C( 9080), -INT16_C( 20), INT16_C( 0), INT16_C( 22), INT16_C( 2350), INT16_C( 15), -INT16_C( 32), -INT16_C( 206), INT16_C( 121), INT16_C( 9), INT16_C( 1943), -INT16_C( 436), -INT16_C( 1), -INT16_C( 16569) } }, { { -INT16_C( 8822), INT16_C( 5454), -INT16_C( 15621), -INT16_C( 18248), -INT16_C( 4933), INT16_C( 9054), INT16_C( 9511), INT16_C( 28636), INT16_C( 22950), INT16_C( 32225), -INT16_C( 2877), INT16_C( 11043), INT16_C( 32571), -INT16_C( 112), INT16_C( 30543), -INT16_C( 9726), INT16_C( 20564), INT16_C( 20719), -INT16_C( 22765), -INT16_C( 12792), INT16_C( 26259), -INT16_C( 17423), -INT16_C( 12917), INT16_C( 12842), INT16_C( 2855), -INT16_C( 5457), -INT16_C( 11265), INT16_C( 14870), -INT16_C( 22958), -INT16_C( 24263), INT16_C( 15389), INT16_C( 29307) }, { INT16_C( 6), INT16_C( 9), INT16_C( 12), INT16_C( 10), INT16_C( 5), INT16_C( 11), INT16_C( 8), INT16_C( 5), INT16_C( 9), INT16_C( 9), INT16_C( 5), INT16_C( 12), INT16_C( 0), INT16_C( 1), INT16_C( 14), INT16_C( 13), INT16_C( 4), INT16_C( 5), INT16_C( 13), INT16_C( 4), INT16_C( 6), INT16_C( 6), INT16_C( 15), INT16_C( 8), INT16_C( 15), INT16_C( 0), INT16_C( 14), INT16_C( 4), INT16_C( 2), INT16_C( 3), INT16_C( 14), INT16_C( 5) }, { -INT16_C( 138), INT16_C( 10), -INT16_C( 4), -INT16_C( 18), -INT16_C( 155), INT16_C( 4), INT16_C( 37), INT16_C( 894), INT16_C( 44), INT16_C( 62), -INT16_C( 90), INT16_C( 2), INT16_C( 32571), -INT16_C( 56), INT16_C( 1), -INT16_C( 2), INT16_C( 1285), INT16_C( 647), -INT16_C( 3), -INT16_C( 800), INT16_C( 410), -INT16_C( 273), -INT16_C( 1), INT16_C( 50), INT16_C( 0), -INT16_C( 5457), -INT16_C( 1), INT16_C( 929), -INT16_C( 5740), -INT16_C( 3033), INT16_C( 0), INT16_C( 915) } }, { { INT16_C( 29751), INT16_C( 20144), -INT16_C( 19886), -INT16_C( 28779), -INT16_C( 26348), INT16_C( 505), -INT16_C( 18804), INT16_C( 7300), -INT16_C( 25679), INT16_C( 30), -INT16_C( 7551), INT16_C( 28489), -INT16_C( 21749), INT16_C( 5282), -INT16_C( 22890), -INT16_C( 12696), INT16_C( 6171), INT16_C( 27932), -INT16_C( 20022), -INT16_C( 8451), -INT16_C( 2485), -INT16_C( 10272), INT16_C( 25772), INT16_C( 24051), INT16_C( 4607), -INT16_C( 32675), -INT16_C( 22796), -INT16_C( 17), -INT16_C( 28079), -INT16_C( 6124), INT16_C( 31800), INT16_C( 21430) }, { INT16_C( 13), INT16_C( 5), INT16_C( 11), INT16_C( 12), INT16_C( 1), INT16_C( 6), INT16_C( 9), INT16_C( 7), INT16_C( 1), INT16_C( 9), INT16_C( 14), INT16_C( 1), INT16_C( 11), INT16_C( 11), INT16_C( 11), INT16_C( 12), INT16_C( 12), INT16_C( 0), INT16_C( 5), INT16_C( 4), INT16_C( 7), INT16_C( 15), INT16_C( 5), INT16_C( 11), INT16_C( 7), INT16_C( 3), INT16_C( 15), INT16_C( 14), INT16_C( 4), INT16_C( 13), INT16_C( 10), INT16_C( 7) }, { INT16_C( 3), INT16_C( 629), -INT16_C( 10), -INT16_C( 8), -INT16_C( 13174), INT16_C( 7), -INT16_C( 37), INT16_C( 57), -INT16_C( 12840), INT16_C( 0), -INT16_C( 1), INT16_C( 14244), -INT16_C( 11), INT16_C( 2), -INT16_C( 12), -INT16_C( 4), INT16_C( 1), INT16_C( 27932), -INT16_C( 626), -INT16_C( 529), -INT16_C( 20), -INT16_C( 1), INT16_C( 805), INT16_C( 11), INT16_C( 35), -INT16_C( 4085), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1755), -INT16_C( 1), INT16_C( 31), INT16_C( 167) } }, { { -INT16_C( 17539), INT16_C( 2427), INT16_C( 20248), -INT16_C( 28343), -INT16_C( 5688), -INT16_C( 9334), INT16_C( 838), -INT16_C( 17000), -INT16_C( 3204), -INT16_C( 7180), INT16_C( 16109), -INT16_C( 26420), INT16_C( 28289), INT16_C( 30066), INT16_C( 3357), -INT16_C( 25878), INT16_C( 26057), -INT16_C( 7773), -INT16_C( 4940), INT16_C( 32114), -INT16_C( 811), INT16_C( 7000), -INT16_C( 4096), INT16_C( 31960), -INT16_C( 13085), -INT16_C( 12193), INT16_C( 11018), -INT16_C( 29591), -INT16_C( 9319), -INT16_C( 18943), -INT16_C( 5144), -INT16_C( 20144) }, { INT16_C( 15), INT16_C( 0), INT16_C( 0), INT16_C( 11), INT16_C( 13), INT16_C( 0), INT16_C( 10), INT16_C( 10), INT16_C( 13), INT16_C( 7), INT16_C( 14), INT16_C( 10), INT16_C( 0), INT16_C( 10), INT16_C( 10), INT16_C( 4), INT16_C( 14), INT16_C( 7), INT16_C( 13), INT16_C( 15), INT16_C( 15), INT16_C( 7), INT16_C( 7), INT16_C( 1), INT16_C( 10), INT16_C( 5), INT16_C( 10), INT16_C( 4), INT16_C( 5), INT16_C( 10), INT16_C( 5), INT16_C( 9) }, { -INT16_C( 1), INT16_C( 2427), INT16_C( 20248), -INT16_C( 14), -INT16_C( 1), -INT16_C( 9334), INT16_C( 0), -INT16_C( 17), -INT16_C( 1), -INT16_C( 57), INT16_C( 0), -INT16_C( 26), INT16_C( 28289), INT16_C( 29), INT16_C( 3), -INT16_C( 1618), INT16_C( 1), -INT16_C( 61), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), INT16_C( 54), -INT16_C( 32), INT16_C( 15980), -INT16_C( 13), -INT16_C( 382), INT16_C( 10), -INT16_C( 1850), -INT16_C( 292), -INT16_C( 19), -INT16_C( 161), -INT16_C( 40) } }, { { INT16_C( 15426), INT16_C( 13584), INT16_C( 16396), -INT16_C( 18902), INT16_C( 8000), -INT16_C( 6357), INT16_C( 20114), -INT16_C( 7934), -INT16_C( 25616), INT16_C( 31032), INT16_C( 12351), -INT16_C( 4156), -INT16_C( 17535), INT16_C( 31381), -INT16_C( 31730), INT16_C( 20495), INT16_C( 8128), -INT16_C( 13179), -INT16_C( 20640), -INT16_C( 24446), -INT16_C( 20785), INT16_C( 24967), -INT16_C( 30212), -INT16_C( 5054), INT16_C( 31268), INT16_C( 25701), INT16_C( 10922), INT16_C( 11091), -INT16_C( 5915), -INT16_C( 3163), -INT16_C( 19348), INT16_C( 11331) }, { INT16_C( 12), INT16_C( 3), INT16_C( 7), INT16_C( 4), INT16_C( 5), INT16_C( 2), INT16_C( 14), INT16_C( 0), INT16_C( 7), INT16_C( 0), INT16_C( 11), INT16_C( 8), INT16_C( 13), INT16_C( 1), INT16_C( 11), INT16_C( 6), INT16_C( 3), INT16_C( 15), INT16_C( 7), INT16_C( 13), INT16_C( 14), INT16_C( 10), INT16_C( 1), INT16_C( 3), INT16_C( 2), INT16_C( 2), INT16_C( 8), INT16_C( 8), INT16_C( 2), INT16_C( 15), INT16_C( 14), INT16_C( 6) }, { INT16_C( 3), INT16_C( 1698), INT16_C( 128), -INT16_C( 1182), INT16_C( 250), -INT16_C( 1590), INT16_C( 1), -INT16_C( 7934), -INT16_C( 201), INT16_C( 31032), INT16_C( 6), -INT16_C( 17), -INT16_C( 3), INT16_C( 15690), -INT16_C( 16), INT16_C( 320), INT16_C( 1016), -INT16_C( 1), -INT16_C( 162), -INT16_C( 3), -INT16_C( 2), INT16_C( 24), -INT16_C( 15106), -INT16_C( 632), INT16_C( 7817), INT16_C( 6425), INT16_C( 42), INT16_C( 43), -INT16_C( 1479), -INT16_C( 1), -INT16_C( 2), INT16_C( 177) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_srav_epi16(a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_srav_epi16) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/srl.c000066400000000000000000001622001400333146700162210ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN srl #include #include static int test_simde_mm512_srl_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[32]; const int64_t b[2]; const int16_t r[32]; } test_vec[] = { { { -INT16_C( 21752), -INT16_C( 22194), -INT16_C( 18737), INT16_C( 8433), INT16_C( 23052), INT16_C( 22961), -INT16_C( 7722), -INT16_C( 29326), INT16_C( 6474), INT16_C( 15054), INT16_C( 9100), -INT16_C( 16434), -INT16_C( 23329), INT16_C( 14938), -INT16_C( 23326), -INT16_C( 5412), INT16_C( 10831), INT16_C( 8083), -INT16_C( 31264), -INT16_C( 4801), -INT16_C( 3873), -INT16_C( 18874), -INT16_C( 18223), INT16_C( 7235), INT16_C( 4562), INT16_C( 24150), INT16_C( 9268), INT16_C( 4893), INT16_C( 30920), -INT16_C( 21939), INT16_C( 10524), INT16_C( 27541) }, { INT64_C( 17), INT64_C( 30) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 616), -INT16_C( 13232), INT16_C( 28193), -INT16_C( 5664), INT16_C( 11750), INT16_C( 660), INT16_C( 10583), -INT16_C( 21907), -INT16_C( 1967), -INT16_C( 34), -INT16_C( 63), INT16_C( 31628), -INT16_C( 12441), -INT16_C( 30970), -INT16_C( 21163), -INT16_C( 4743), -INT16_C( 13910), -INT16_C( 13382), -INT16_C( 26057), INT16_C( 7604), INT16_C( 18631), INT16_C( 7711), -INT16_C( 29327), -INT16_C( 15415), -INT16_C( 22651), INT16_C( 18114), INT16_C( 20135), INT16_C( 3777), -INT16_C( 14563), INT16_C( 29333), INT16_C( 3700), INT16_C( 7776) }, { INT64_C( 15), INT64_C( 11) }, { INT16_C( 1), INT16_C( 1), INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 1), INT16_C( 1), INT16_C( 1), INT16_C( 0), INT16_C( 1), INT16_C( 1), INT16_C( 1), INT16_C( 1), INT16_C( 1), INT16_C( 1), INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 1), INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 8950), -INT16_C( 20060), INT16_C( 26155), INT16_C( 18623), INT16_C( 21549), -INT16_C( 23878), INT16_C( 6754), INT16_C( 15040), -INT16_C( 21964), -INT16_C( 6071), INT16_C( 30024), INT16_C( 11876), -INT16_C( 319), -INT16_C( 25978), -INT16_C( 24223), INT16_C( 27640), -INT16_C( 25218), -INT16_C( 22243), -INT16_C( 9213), INT16_C( 12529), -INT16_C( 21455), -INT16_C( 27694), -INT16_C( 27706), -INT16_C( 1075), INT16_C( 5693), -INT16_C( 31261), INT16_C( 18316), INT16_C( 19891), INT16_C( 14917), -INT16_C( 22808), -INT16_C( 7973), INT16_C( 23058) }, { INT64_C( 7), INT64_C( 10) }, { INT16_C( 442), INT16_C( 355), INT16_C( 204), INT16_C( 145), INT16_C( 168), INT16_C( 325), INT16_C( 52), INT16_C( 117), INT16_C( 340), INT16_C( 464), INT16_C( 234), INT16_C( 92), INT16_C( 509), INT16_C( 309), INT16_C( 322), INT16_C( 215), INT16_C( 314), INT16_C( 338), INT16_C( 440), INT16_C( 97), INT16_C( 344), INT16_C( 295), INT16_C( 295), INT16_C( 503), INT16_C( 44), INT16_C( 267), INT16_C( 143), INT16_C( 155), INT16_C( 116), INT16_C( 333), INT16_C( 449), INT16_C( 180) } }, { { INT16_C( 18100), INT16_C( 16600), -INT16_C( 29555), -INT16_C( 11379), INT16_C( 30150), -INT16_C( 24199), -INT16_C( 29866), -INT16_C( 11269), -INT16_C( 70), -INT16_C( 14764), INT16_C( 1524), -INT16_C( 27390), -INT16_C( 11640), -INT16_C( 24580), INT16_C( 24432), INT16_C( 9458), -INT16_C( 13403), INT16_C( 12900), -INT16_C( 3753), INT16_C( 7429), INT16_C( 32615), -INT16_C( 16962), -INT16_C( 17910), -INT16_C( 14960), -INT16_C( 6983), -INT16_C( 21109), -INT16_C( 29207), INT16_C( 29250), INT16_C( 15968), -INT16_C( 12271), INT16_C( 925), INT16_C( 17140) }, { INT64_C( 22), INT64_C( 14) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 21730), -INT16_C( 13279), INT16_C( 25569), INT16_C( 16702), INT16_C( 20385), INT16_C( 16145), INT16_C( 1362), INT16_C( 8577), -INT16_C( 2467), -INT16_C( 22714), -INT16_C( 30351), INT16_C( 27223), -INT16_C( 14966), INT16_C( 17774), INT16_C( 14275), -INT16_C( 23111), -INT16_C( 9589), INT16_C( 28017), -INT16_C( 20675), -INT16_C( 8530), -INT16_C( 16130), INT16_C( 20765), -INT16_C( 24635), INT16_C( 9074), -INT16_C( 18283), INT16_C( 1738), INT16_C( 8513), -INT16_C( 13455), -INT16_C( 8218), -INT16_C( 22256), -INT16_C( 14057), -INT16_C( 23985) }, { INT64_C( 13), INT64_C( 2) }, { INT16_C( 2), INT16_C( 6), INT16_C( 3), INT16_C( 2), INT16_C( 2), INT16_C( 1), INT16_C( 0), INT16_C( 1), INT16_C( 7), INT16_C( 5), INT16_C( 4), INT16_C( 3), INT16_C( 6), INT16_C( 2), INT16_C( 1), INT16_C( 5), INT16_C( 6), INT16_C( 3), INT16_C( 5), INT16_C( 6), INT16_C( 6), INT16_C( 2), INT16_C( 4), INT16_C( 1), INT16_C( 5), INT16_C( 0), INT16_C( 1), INT16_C( 6), INT16_C( 6), INT16_C( 5), INT16_C( 6), INT16_C( 5) } }, { { INT16_C( 12522), INT16_C( 11031), -INT16_C( 30638), INT16_C( 14583), INT16_C( 1896), INT16_C( 32738), INT16_C( 12753), INT16_C( 29729), INT16_C( 12785), INT16_C( 24917), INT16_C( 5359), INT16_C( 28112), -INT16_C( 28688), INT16_C( 27824), INT16_C( 6081), -INT16_C( 21635), -INT16_C( 27577), -INT16_C( 26154), -INT16_C( 13027), -INT16_C( 31278), -INT16_C( 19243), -INT16_C( 23036), INT16_C( 9701), -INT16_C( 10726), INT16_C( 28502), INT16_C( 17720), INT16_C( 2179), INT16_C( 29874), INT16_C( 25495), INT16_C( 22752), INT16_C( 23930), -INT16_C( 16125) }, { INT64_C( 15), INT64_C( 2) }, { INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 1), INT16_C( 1), INT16_C( 1), INT16_C( 1), INT16_C( 1), INT16_C( 1), INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 1) } }, { { -INT16_C( 11092), INT16_C( 12377), INT16_C( 3036), INT16_C( 29860), -INT16_C( 31634), -INT16_C( 5940), -INT16_C( 12063), -INT16_C( 11606), INT16_C( 1450), INT16_C( 20961), INT16_C( 29746), INT16_C( 5070), -INT16_C( 4084), -INT16_C( 13863), INT16_C( 29997), -INT16_C( 9508), INT16_C( 13642), INT16_C( 9738), -INT16_C( 20927), -INT16_C( 20582), INT16_C( 26418), INT16_C( 5016), INT16_C( 16951), -INT16_C( 7707), -INT16_C( 14777), INT16_C( 31026), INT16_C( 59), INT16_C( 18316), INT16_C( 26097), INT16_C( 7696), -INT16_C( 4902), INT16_C( 9464) }, { INT64_C( 28), INT64_C( 6) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 26719), -INT16_C( 9044), INT16_C( 14487), -INT16_C( 30685), INT16_C( 13213), INT16_C( 30887), -INT16_C( 24800), INT16_C( 17052), -INT16_C( 6238), INT16_C( 21157), -INT16_C( 18483), INT16_C( 6453), INT16_C( 10850), INT16_C( 20125), INT16_C( 261), -INT16_C( 22654), INT16_C( 11928), INT16_C( 12419), -INT16_C( 22681), INT16_C( 1208), INT16_C( 24538), -INT16_C( 1412), INT16_C( 6655), -INT16_C( 24260), -INT16_C( 7936), -INT16_C( 12813), INT16_C( 10393), -INT16_C( 1049), -INT16_C( 31661), INT16_C( 22601), -INT16_C( 13435), INT16_C( 7935) }, { INT64_C( 0), INT64_C( 31) }, { -INT16_C( 26719), -INT16_C( 9044), INT16_C( 14487), -INT16_C( 30685), INT16_C( 13213), INT16_C( 30887), -INT16_C( 24800), INT16_C( 17052), -INT16_C( 6238), INT16_C( 21157), -INT16_C( 18483), INT16_C( 6453), INT16_C( 10850), INT16_C( 20125), INT16_C( 261), -INT16_C( 22654), INT16_C( 11928), INT16_C( 12419), -INT16_C( 22681), INT16_C( 1208), INT16_C( 24538), -INT16_C( 1412), INT16_C( 6655), -INT16_C( 24260), -INT16_C( 7936), -INT16_C( 12813), INT16_C( 10393), -INT16_C( 1049), -INT16_C( 31661), INT16_C( 22601), -INT16_C( 13435), INT16_C( 7935) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_srl_epi16(a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_srl_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[16]; const int64_t b[2]; const int32_t r[16]; } test_vec[] = { { { -INT32_C( 1475230993), INT32_C( 227741951), -INT32_C( 1586039920), -INT32_C( 909665363), INT32_C( 1410969055), INT32_C( 106491510), INT32_C( 829046348), INT32_C( 1384343139), INT32_C( 670733352), INT32_C( 892636837), INT32_C( 785820289), INT32_C( 620272965), -INT32_C( 1049030325), INT32_C( 1288163584), INT32_C( 1920807183), -INT32_C( 1060831080) }, { INT64_C( 25), INT64_C( 5) }, { INT32_C( 84), INT32_C( 6), INT32_C( 80), INT32_C( 100), INT32_C( 42), INT32_C( 3), INT32_C( 24), INT32_C( 41), INT32_C( 19), INT32_C( 26), INT32_C( 23), INT32_C( 18), INT32_C( 96), INT32_C( 38), INT32_C( 57), INT32_C( 96) } }, { { INT32_C( 116369926), -INT32_C( 1974290821), -INT32_C( 2130849816), INT32_C( 1681965775), -INT32_C( 811784063), INT32_C( 211618885), INT32_C( 857250385), -INT32_C( 1772009073), INT32_C( 1855738099), -INT32_C( 285610234), -INT32_C( 1905330498), INT32_C( 972206008), INT32_C( 470323159), -INT32_C( 332880229), -INT32_C( 803192512), INT32_C( 2120647050) }, { INT64_C( 22), INT64_C( 23) }, { INT32_C( 27), INT32_C( 553), INT32_C( 515), INT32_C( 401), INT32_C( 830), INT32_C( 50), INT32_C( 204), INT32_C( 601), INT32_C( 442), INT32_C( 955), INT32_C( 569), INT32_C( 231), INT32_C( 112), INT32_C( 944), INT32_C( 832), INT32_C( 505) } }, { { INT32_C( 1540871615), -INT32_C( 1136197509), -INT32_C( 879990975), -INT32_C( 1186336024), -INT32_C( 426690828), -INT32_C( 141142245), INT32_C( 1871369355), -INT32_C( 1020635388), -INT32_C( 1474428627), INT32_C( 1113875969), -INT32_C( 1240534835), -INT32_C( 680568862), -INT32_C( 1447231347), -INT32_C( 476032169), INT32_C( 726805031), -INT32_C( 1343325054) }, { INT64_C( 8), INT64_C( 22) }, { INT32_C( 6019029), INT32_C( 12338944), INT32_C( 13339751), INT32_C( 12143090), INT32_C( 15110454), INT32_C( 16225879), INT32_C( 7310036), INT32_C( 12790359), INT32_C( 11017729), INT32_C( 4351078), INT32_C( 11931376), INT32_C( 14118743), INT32_C( 11123968), INT32_C( 14917715), INT32_C( 2839082), INT32_C( 11529852) } }, { { -INT32_C( 1101192346), -INT32_C( 1650328202), -INT32_C( 1429671128), -INT32_C( 329664913), INT32_C( 913027267), INT32_C( 427240812), INT32_C( 598240764), INT32_C( 987238099), -INT32_C( 1476905679), INT32_C( 1531222323), -INT32_C( 83555188), -INT32_C( 2031657278), INT32_C( 2075939598), INT32_C( 2090087296), INT32_C( 1956658337), INT32_C( 2075031626) }, { INT64_C( 25), INT64_C( 21) }, { INT32_C( 95), INT32_C( 78), INT32_C( 85), INT32_C( 118), INT32_C( 27), INT32_C( 12), INT32_C( 17), INT32_C( 29), INT32_C( 83), INT32_C( 45), INT32_C( 125), INT32_C( 67), INT32_C( 61), INT32_C( 62), INT32_C( 58), INT32_C( 61) } }, { { -INT32_C( 2128054527), INT32_C( 1274920106), INT32_C( 1119919608), -INT32_C( 1010995691), INT32_C( 1420025621), -INT32_C( 1222582459), -INT32_C( 1091836385), -INT32_C( 1788107116), -INT32_C( 954821859), INT32_C( 1192432719), -INT32_C( 947268687), INT32_C( 1451902529), INT32_C( 1789603109), INT32_C( 740412172), INT32_C( 1189743793), -INT32_C( 807709262) }, { INT64_C( 22), INT64_C( 14) }, { INT32_C( 516), INT32_C( 303), INT32_C( 267), INT32_C( 782), INT32_C( 338), INT32_C( 732), INT32_C( 763), INT32_C( 597), INT32_C( 796), INT32_C( 284), INT32_C( 798), INT32_C( 346), INT32_C( 426), INT32_C( 176), INT32_C( 283), INT32_C( 831) } }, { { INT32_C( 1121763382), INT32_C( 1013906827), -INT32_C( 1132308471), -INT32_C( 1786028371), INT32_C( 1456218704), INT32_C( 1225607884), -INT32_C( 1643606959), -INT32_C( 904913516), INT32_C( 1745743069), -INT32_C( 207324183), -INT32_C( 2119227436), -INT32_C( 719897979), INT32_C( 690742109), INT32_C( 2138257454), INT32_C( 1495169988), INT32_C( 1965239960) }, { INT64_C( 16), INT64_C( 7) }, { INT32_C( 17116), INT32_C( 15470), INT32_C( 48258), INT32_C( 38283), INT32_C( 22220), INT32_C( 18701), INT32_C( 40456), INT32_C( 51728), INT32_C( 26637), INT32_C( 62372), INT32_C( 33199), INT32_C( 54551), INT32_C( 10539), INT32_C( 32627), INT32_C( 22814), INT32_C( 29987) } }, { { INT32_C( 711405052), INT32_C( 715774566), -INT32_C( 310130859), INT32_C( 291678198), -INT32_C( 2095759401), INT32_C( 1761807809), -INT32_C( 1802041933), INT32_C( 433232157), INT32_C( 759380679), INT32_C( 1784147220), -INT32_C( 1437082700), INT32_C( 1505475202), -INT32_C( 1159867911), -INT32_C( 1859854114), INT32_C( 52870117), INT32_C( 454883412) }, { INT64_C( 0), INT64_C( 23) }, { INT32_C( 711405052), INT32_C( 715774566), -INT32_C( 310130859), INT32_C( 291678198), -INT32_C( 2095759401), INT32_C( 1761807809), -INT32_C( 1802041933), INT32_C( 433232157), INT32_C( 759380679), INT32_C( 1784147220), -INT32_C( 1437082700), INT32_C( 1505475202), -INT32_C( 1159867911), -INT32_C( 1859854114), INT32_C( 52870117), INT32_C( 454883412) } }, { { INT32_C( 376845112), -INT32_C( 106391020), -INT32_C( 1426272683), -INT32_C( 104523322), -INT32_C( 968880519), INT32_C( 700969390), -INT32_C( 1138330631), INT32_C( 326663387), -INT32_C( 1003819344), -INT32_C( 557985143), INT32_C( 1720236704), INT32_C( 1281314515), INT32_C( 168992604), -INT32_C( 1976313456), INT32_C( 675699021), -INT32_C( 2059682091) }, { INT64_C( 26), INT64_C( 8) }, { INT32_C( 5), INT32_C( 62), INT32_C( 42), INT32_C( 62), INT32_C( 49), INT32_C( 10), INT32_C( 47), INT32_C( 4), INT32_C( 49), INT32_C( 55), INT32_C( 25), INT32_C( 19), INT32_C( 2), INT32_C( 34), INT32_C( 10), INT32_C( 33) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_srl_epi32(a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_srl_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t src[16]; const simde__mmask16 k; const int32_t a[16]; const int64_t b[2]; const int32_t r[16]; } test_vec[] = { { { -INT32_C( 1391069190), INT32_C( 76515171), -INT32_C( 1669093777), INT32_C( 2052374992), -INT32_C( 17678478), -INT32_C( 1259542507), INT32_C( 626340292), INT32_C( 1967227002), -INT32_C( 618506633), INT32_C( 1306505694), INT32_C( 669606487), -INT32_C( 1784595165), -INT32_C( 1835822212), INT32_C( 1078361979), INT32_C( 728079281), -INT32_C( 1818188005) }, UINT16_C(49917), { INT32_C( 1299438446), INT32_C( 313510440), -INT32_C( 1840262415), -INT32_C( 98186137), -INT32_C( 1535467170), INT32_C( 1178545121), -INT32_C( 135439530), INT32_C( 1555688430), -INT32_C( 274125370), -INT32_C( 385787657), -INT32_C( 1854189526), INT32_C( 2055971100), INT32_C( 589169986), -INT32_C( 2023137744), -INT32_C( 1484892231), INT32_C( 134428482) }, { INT64_C( 5), INT64_C( 20) }, { INT32_C( 40607451), INT32_C( 76515171), INT32_C( 76709527), INT32_C( 131149411), INT32_C( 86234378), INT32_C( 36829535), INT32_C( 129985242), INT32_C( 48615263), -INT32_C( 618506633), INT32_C( 122161863), INT32_C( 669606487), -INT32_C( 1784595165), -INT32_C( 1835822212), INT32_C( 1078361979), INT32_C( 87814845), INT32_C( 4200890) } }, { { -INT32_C( 2134442416), INT32_C( 352792668), -INT32_C( 910391929), INT32_C( 567394237), INT32_C( 1870514539), INT32_C( 178111169), -INT32_C( 462398333), -INT32_C( 142061401), -INT32_C( 1418244530), INT32_C( 96501630), -INT32_C( 1043432188), -INT32_C( 1461477316), INT32_C( 706175081), -INT32_C( 1506429661), -INT32_C( 1198873327), INT32_C( 1135547125) }, UINT16_C(10081), { -INT32_C( 1348018194), -INT32_C( 1288918299), INT32_C( 1330866284), INT32_C( 682605584), INT32_C( 484299495), INT32_C( 63041144), INT32_C( 1444263591), -INT32_C( 394430727), INT32_C( 999760726), INT32_C( 988726222), INT32_C( 1015628331), -INT32_C( 446416642), -INT32_C( 2097069813), -INT32_C( 695811537), INT32_C( 1915591800), INT32_C( 1750772242) }, { INT64_C( 27), INT64_C( 20) }, { INT32_C( 21), INT32_C( 352792668), -INT32_C( 910391929), INT32_C( 567394237), INT32_C( 1870514539), INT32_C( 0), INT32_C( 10), -INT32_C( 142061401), INT32_C( 7), INT32_C( 7), INT32_C( 7), -INT32_C( 1461477316), INT32_C( 706175081), INT32_C( 26), -INT32_C( 1198873327), INT32_C( 1135547125) } }, { { -INT32_C( 265897536), -INT32_C( 188306308), INT32_C( 1533473608), INT32_C( 1824768158), INT32_C( 1695049649), -INT32_C( 851058951), -INT32_C( 727752643), -INT32_C( 1602791456), -INT32_C( 1919902447), -INT32_C( 1837017271), -INT32_C( 387061686), INT32_C( 1515499688), INT32_C( 280976407), INT32_C( 2010973242), INT32_C( 1162640741), -INT32_C( 437926956) }, UINT16_C(30304), { -INT32_C( 187848334), INT32_C( 685512507), INT32_C( 1423541248), -INT32_C( 1632505634), -INT32_C( 559748351), -INT32_C( 1352988829), INT32_C( 846344268), -INT32_C( 2002202091), -INT32_C( 1216580229), -INT32_C( 1931519860), -INT32_C( 1142834980), -INT32_C( 1436970327), -INT32_C( 527893635), INT32_C( 1334830083), INT32_C( 696320276), INT32_C( 1337010643) }, { INT64_C( 12), INT64_C( 21) }, { -INT32_C( 265897536), -INT32_C( 188306308), INT32_C( 1533473608), INT32_C( 1824768158), INT32_C( 1695049649), INT32_C( 718256), INT32_C( 206627), -INT32_C( 1602791456), -INT32_C( 1919902447), INT32_C( 577013), INT32_C( 769563), INT32_C( 1515499688), INT32_C( 919695), INT32_C( 325886), INT32_C( 170000), -INT32_C( 437926956) } }, { { INT32_C( 1955101041), INT32_C( 1908676701), -INT32_C( 308591335), INT32_C( 222055535), -INT32_C( 13090182), -INT32_C( 983437273), -INT32_C( 1291026808), INT32_C( 1717304820), -INT32_C( 723852425), INT32_C( 356949755), INT32_C( 1392697828), -INT32_C( 1486864851), -INT32_C( 1482188416), INT32_C( 292358281), INT32_C( 2076473735), -INT32_C( 287236233) }, UINT16_C(48131), { INT32_C( 140181186), INT32_C( 367607315), -INT32_C( 229435503), INT32_C( 1703662526), -INT32_C( 395438981), -INT32_C( 362679003), INT32_C( 1376376944), -INT32_C( 1995567930), INT32_C( 512845835), INT32_C( 942963623), INT32_C( 1344964498), -INT32_C( 692669093), -INT32_C( 272751415), -INT32_C( 1982259431), -INT32_C( 1092885768), -INT32_C( 95950353) }, { INT64_C( 28), INT64_C( 27) }, { INT32_C( 0), INT32_C( 1), -INT32_C( 308591335), INT32_C( 222055535), -INT32_C( 13090182), -INT32_C( 983437273), -INT32_C( 1291026808), INT32_C( 1717304820), -INT32_C( 723852425), INT32_C( 356949755), INT32_C( 5), INT32_C( 13), INT32_C( 14), INT32_C( 8), INT32_C( 2076473735), INT32_C( 15) } }, { { INT32_C( 701154064), -INT32_C( 625761310), INT32_C( 1956220549), -INT32_C( 898703240), INT32_C( 230918073), -INT32_C( 1477184301), INT32_C( 1658202704), INT32_C( 658365206), -INT32_C( 2125461602), INT32_C( 794493866), INT32_C( 178582674), -INT32_C( 1898704171), INT32_C( 1838978969), -INT32_C( 602632309), -INT32_C( 801182791), INT32_C( 1710717894) }, UINT16_C(18306), { INT32_C( 1095445734), -INT32_C( 13181605), -INT32_C( 1156445209), INT32_C( 894610329), INT32_C( 767941912), -INT32_C( 149389639), INT32_C( 1165155918), INT32_C( 697168963), INT32_C( 2087442464), -INT32_C( 1669619275), INT32_C( 1163365804), INT32_C( 1367058745), INT32_C( 1082015878), INT32_C( 221746878), -INT32_C( 1202541963), -INT32_C( 1075650658) }, { INT64_C( 19), INT64_C( 17) }, { INT32_C( 701154064), INT32_C( 8166), INT32_C( 1956220549), -INT32_C( 898703240), INT32_C( 230918073), -INT32_C( 1477184301), INT32_C( 1658202704), INT32_C( 1329), INT32_C( 3981), INT32_C( 5007), INT32_C( 2218), -INT32_C( 1898704171), INT32_C( 1838978969), -INT32_C( 602632309), INT32_C( 5898), INT32_C( 1710717894) } }, { { INT32_C( 1506758042), INT32_C( 1483081443), INT32_C( 1326561456), INT32_C( 1326379928), INT32_C( 784091456), INT32_C( 1137231103), INT32_C( 750823204), -INT32_C( 1665429758), -INT32_C( 1074427172), INT32_C( 1092115345), -INT32_C( 1399838444), INT32_C( 1559993884), -INT32_C( 410339353), -INT32_C( 1607839108), -INT32_C( 70456327), INT32_C( 1452836986) }, UINT16_C(36114), { INT32_C( 770286357), INT32_C( 1968635365), -INT32_C( 1542163799), INT32_C( 1482488782), INT32_C( 229300450), INT32_C( 1157145720), INT32_C( 936145567), -INT32_C( 574234680), INT32_C( 1728818818), INT32_C( 1423794603), -INT32_C( 1560743468), -INT32_C( 805612308), -INT32_C( 1529043668), -INT32_C( 370551735), INT32_C( 237024582), INT32_C( 401335700) }, { INT64_C( 5), INT64_C( 1) }, { INT32_C( 1506758042), INT32_C( 61519855), INT32_C( 1326561456), INT32_C( 1326379928), INT32_C( 7165639), INT32_C( 1137231103), INT32_C( 750823204), -INT32_C( 1665429758), INT32_C( 54025588), INT32_C( 1092115345), INT32_C( 85444494), INT32_C( 109042343), -INT32_C( 410339353), -INT32_C( 1607839108), -INT32_C( 70456327), INT32_C( 12541740) } }, { { -INT32_C( 1196366737), -INT32_C( 22963784), -INT32_C( 485703089), INT32_C( 1006303143), INT32_C( 1182366190), INT32_C( 561122516), INT32_C( 1985626263), INT32_C( 2038587914), -INT32_C( 30330042), INT32_C( 469554124), INT32_C( 1023346837), -INT32_C( 310904321), INT32_C( 1194586482), -INT32_C( 1805081091), -INT32_C( 267730202), INT32_C( 1785302308) }, UINT16_C(39616), { INT32_C( 1684900968), -INT32_C( 1486093656), -INT32_C( 1231000769), -INT32_C( 1935207591), -INT32_C( 1036868518), INT32_C( 1132730424), INT32_C( 1909499912), INT32_C( 2047578130), -INT32_C( 992052964), -INT32_C( 1167373701), INT32_C( 242289845), INT32_C( 2040207391), -INT32_C( 180630083), -INT32_C( 315047963), INT32_C( 2036205671), -INT32_C( 1242338920) }, { INT64_C( 26), INT64_C( 11) }, { -INT32_C( 1196366737), -INT32_C( 22963784), -INT32_C( 485703089), INT32_C( 1006303143), INT32_C( 1182366190), INT32_C( 561122516), INT32_C( 28), INT32_C( 30), -INT32_C( 30330042), INT32_C( 46), INT32_C( 1023346837), INT32_C( 30), INT32_C( 61), -INT32_C( 1805081091), -INT32_C( 267730202), INT32_C( 45) } }, { { INT32_C( 743752775), -INT32_C( 367424125), INT32_C( 778270613), -INT32_C( 1008511264), INT32_C( 1109482535), INT32_C( 840055105), INT32_C( 1698886083), -INT32_C( 1295725717), INT32_C( 316545167), INT32_C( 1006434213), INT32_C( 1332305774), -INT32_C( 602780491), -INT32_C( 367119448), INT32_C( 790376812), -INT32_C( 1902878942), -INT32_C( 1170188246) }, UINT16_C( 8049), { -INT32_C( 921299252), -INT32_C( 1171749551), -INT32_C( 435757356), -INT32_C( 652628038), -INT32_C( 1257601639), INT32_C( 1226058933), -INT32_C( 73252934), -INT32_C( 988146695), INT32_C( 2072916009), -INT32_C( 1992968267), INT32_C( 1332690069), -INT32_C( 2077718293), -INT32_C( 1019661810), INT32_C( 420236895), -INT32_C( 2045464947), -INT32_C( 347394367) }, { INT64_C( 4), INT64_C( 20) }, { INT32_C( 210854252), -INT32_C( 367424125), INT32_C( 778270613), -INT32_C( 1008511264), INT32_C( 189835353), INT32_C( 76628683), INT32_C( 263857147), -INT32_C( 1295725717), INT32_C( 129557250), INT32_C( 143874939), INT32_C( 83293129), INT32_C( 138578062), INT32_C( 204706592), INT32_C( 790376812), -INT32_C( 1902878942), -INT32_C( 1170188246) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi32(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_mask_srl_epi32(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_srl_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask16 k; const int32_t a[16]; const int64_t b[2]; const int32_t r[16]; } test_vec[] = { { UINT16_C(10508), { -INT32_C( 652285907), INT32_C( 1973831532), INT32_C( 1688648047), -INT32_C( 1906907068), INT32_C( 2083047916), -INT32_C( 1402768041), -INT32_C( 2074714565), -INT32_C( 1683135890), -INT32_C( 1217017014), -INT32_C( 2111038702), INT32_C( 434557909), -INT32_C( 1482146373), INT32_C( 1562693638), -INT32_C( 2130081978), INT32_C( 1392861157), INT32_C( 217035713) }, { INT64_C( 10), INT64_C( 7) }, { INT32_C( 0), INT32_C( 0), INT32_C( 1649070), INT32_C( 2332090), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 3005810), INT32_C( 0), INT32_C( 0), INT32_C( 2746895), INT32_C( 0), INT32_C( 2114145), INT32_C( 0), INT32_C( 0) } }, { UINT16_C(18918), { -INT32_C( 1563415399), -INT32_C( 1291733586), -INT32_C( 144260344), -INT32_C( 1839471153), -INT32_C( 1987978378), -INT32_C( 1652407249), -INT32_C( 424429119), -INT32_C( 1892705547), INT32_C( 1949433798), -INT32_C( 1121438796), -INT32_C( 978022666), -INT32_C( 380170125), INT32_C( 443734251), -INT32_C( 558303972), -INT32_C( 1547408466), -INT32_C( 869075963) }, { INT64_C( 17), INT64_C( 2) }, { INT32_C( 0), INT32_C( 22912), INT32_C( 31667), INT32_C( 0), INT32_C( 0), INT32_C( 20161), INT32_C( 29529), INT32_C( 18327), INT32_C( 14872), INT32_C( 0), INT32_C( 0), INT32_C( 29867), INT32_C( 0), INT32_C( 0), INT32_C( 20962), INT32_C( 0) } }, { UINT16_C(50817), { -INT32_C( 407134673), INT32_C( 1079142780), INT32_C( 1060395021), INT32_C( 1688414244), INT32_C( 902642384), INT32_C( 424592583), -INT32_C( 2101184466), -INT32_C( 1152826228), INT32_C( 1587676386), INT32_C( 2074015086), INT32_C( 1908069197), -INT32_C( 875208965), INT32_C( 1610654360), -INT32_C( 1787212186), INT32_C( 1394031814), INT32_C( 1645109376) }, { INT64_C( 30), INT64_C( 31) }, { INT32_C( 3), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 2), INT32_C( 0), INT32_C( 1), INT32_C( 1), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1), INT32_C( 1) } }, { UINT16_C(63452), { -INT32_C( 666484129), -INT32_C( 283898665), INT32_C( 1850709087), -INT32_C( 1239436042), -INT32_C( 719993465), INT32_C( 484329144), INT32_C( 1783972979), -INT32_C( 1570623165), INT32_C( 1266395252), INT32_C( 339382196), INT32_C( 444762660), -INT32_C( 959340226), INT32_C( 513533542), -INT32_C( 1623557844), INT32_C( 17469374), INT32_C( 899968193) }, { INT64_C( 26), INT64_C( 12) }, { INT32_C( 0), INT32_C( 0), INT32_C( 27), INT32_C( 45), INT32_C( 53), INT32_C( 0), INT32_C( 26), INT32_C( 40), INT32_C( 18), INT32_C( 5), INT32_C( 6), INT32_C( 0), INT32_C( 7), INT32_C( 39), INT32_C( 0), INT32_C( 13) } }, { UINT16_C(58788), { -INT32_C( 1117859709), INT32_C( 2051873904), -INT32_C( 1025110498), -INT32_C( 1008600509), -INT32_C( 1619095614), INT32_C( 1342030690), -INT32_C( 1878131385), -INT32_C( 428494494), -INT32_C( 240921471), INT32_C( 241955056), INT32_C( 1104171518), INT32_C( 235254091), -INT32_C( 1548909759), -INT32_C( 1896699321), INT32_C( 169803687), INT32_C( 837850288) }, { INT64_C( 16), INT64_C( 5) }, { INT32_C( 0), INT32_C( 0), INT32_C( 49894), INT32_C( 0), INT32_C( 0), INT32_C( 20477), INT32_C( 0), INT32_C( 58997), INT32_C( 61859), INT32_C( 0), INT32_C( 16848), INT32_C( 0), INT32_C( 0), INT32_C( 36594), INT32_C( 2590), INT32_C( 12784) } }, { UINT16_C(58956), { -INT32_C( 1080978483), INT32_C( 1086208033), INT32_C( 852782658), -INT32_C( 1027195745), -INT32_C( 78558572), -INT32_C( 1875693108), -INT32_C( 1772544932), INT32_C( 326936134), INT32_C( 1540492601), -INT32_C( 2003070906), -INT32_C( 1648660482), INT32_C( 1063289259), -INT32_C( 1757695541), INT32_C( 1042837218), -INT32_C( 791379574), INT32_C( 1642287399) }, { INT64_C( 8), INT64_C( 6) }, { INT32_C( 0), INT32_C( 0), INT32_C( 3331182), INT32_C( 12764732), INT32_C( 0), INT32_C( 0), INT32_C( 9853212), INT32_C( 0), INT32_C( 0), INT32_C( 8952720), INT32_C( 10337135), INT32_C( 0), INT32_C( 0), INT32_C( 4073582), INT32_C( 13685889), INT32_C( 6415185) } }, { UINT16_C(60401), { -INT32_C( 212216885), -INT32_C( 428481774), -INT32_C( 1774740301), -INT32_C( 1186228483), INT32_C( 1729138746), -INT32_C( 1269836077), -INT32_C( 118179769), -INT32_C( 1193023764), -INT32_C( 877969991), -INT32_C( 726523872), -INT32_C( 1167398467), -INT32_C( 1166756225), INT32_C( 438404166), -INT32_C( 1563528869), INT32_C( 379241001), INT32_C( 1657700008) }, { INT64_C( 10), INT64_C( 2) }, { INT32_C( 3987060), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1688612), INT32_C( 2954229), INT32_C( 4078894), INT32_C( 3029241), INT32_C( 3336911), INT32_C( 3484808), INT32_C( 0), INT32_C( 3054893), INT32_C( 0), INT32_C( 2667420), INT32_C( 370352), INT32_C( 1618847) } }, { UINT16_C( 9226), { -INT32_C( 23763664), -INT32_C( 1564361209), -INT32_C( 1574934060), -INT32_C( 115549237), INT32_C( 1725478582), INT32_C( 511746317), -INT32_C( 324775702), -INT32_C( 1760514458), INT32_C( 446014739), INT32_C( 951866980), -INT32_C( 1948525376), INT32_C( 1854207927), -INT32_C( 1160487507), -INT32_C( 388475650), INT32_C( 181763236), INT32_C( 1034020138) }, { INT64_C( 9), INT64_C( 20) }, { INT32_C( 0), INT32_C( 5333215), INT32_C( 0), INT32_C( 8162925), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 4582894), INT32_C( 0), INT32_C( 0), INT32_C( 7629866), INT32_C( 0), INT32_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_maskz_srl_epi32(test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_srl_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t a[8]; const int64_t b[2]; const int64_t r[8]; } test_vec[] = { { { INT64_C( 4503272731429987359), -INT64_C( 2015384591523464281), INT64_C( 8370329603485121851), INT64_C( 8741546637051844249), INT64_C( 7244483912508171930), INT64_C( 2420618720360332110), INT64_C( 4422947859294142848), INT64_C( 3359849781269978573) }, { INT64_C( 37), INT64_C( 38) }, { INT64_C( 32765621), INT64_C( 119553875), INT64_C( 60902163), INT64_C( 63603122), INT64_C( 52710557), INT64_C( 17612319), INT64_C( 32181181), INT64_C( 24446124) } }, { { -INT64_C( 7637003490261708254), INT64_C( 6415480880923232326), INT64_C( 9218637000122400785), INT64_C( 4196059733781788805), -INT64_C( 930270758595148872), INT64_C( 4864966429638513740), -INT64_C( 4010865298925415083), -INT64_C( 2947988449335396157) }, { INT64_C( 25), INT64_C( 63) }, { INT64_C( 322155373795), INT64_C( 191196229485), INT64_C( 274736791852), INT64_C( 125052324944), INT64_C( 522031584832), INT64_C( 144987297941), INT64_C( 430222713195), INT64_C( 461898911725) } }, { { -INT64_C( 6686951679321549528), -INT64_C( 6351318751585002523), -INT64_C( 1572949627593083733), -INT64_C( 631318895338144874), INT64_C( 2734693502203073391), -INT64_C( 5541956386463059407), INT64_C( 3082894904229679673), -INT64_C( 5609612383155050401) }, { INT64_C( 19), INT64_C( 33) }, { INT64_C( 22430023945594), INT64_C( 23070192951439), INT64_C( 32184208767159), INT64_C( 33980226856940), INT64_C( 5216013912588), INT64_C( 24613929152005), INT64_C( 5880155380687), INT64_C( 24484885579213) } }, { { -INT64_C( 7032322167997686871), INT64_C( 3695752672797643348), -INT64_C( 6275681140897592453), INT64_C( 2911922955017037974), INT64_C( 5599047430447746517), -INT64_C( 1275313147080310623), INT64_C( 6872898518047554424), -INT64_C( 8520621702864799464) }, { INT64_C( 37), INT64_C( 18) }, { INT64_C( 83050849), INT64_C( 26890139), INT64_C( 88556138), INT64_C( 21187027), INT64_C( 40738431), INT64_C( 124938603), INT64_C( 50006918), INT64_C( 72222045) } }, { { INT64_C( 4873285309233805945), INT64_C( 439834950256813021), -INT64_C( 2716455584873858879), INT64_C( 8498945592412536537), INT64_C( 3853490666578965515), -INT64_C( 5633192859071523523), INT64_C( 6376453306728335769), -INT64_C( 565776880762709068) }, { INT64_C( 29), INT64_C( 45) }, { INT64_C( 9077201242), INT64_C( 819256436), INT64_C( 29299945549), INT64_C( 15830519781), INT64_C( 7177685697), INT64_C( 23867099014), INT64_C( 11877069821), INT64_C( 33305896805) } }, { { INT64_C( 8157098361486837595), INT64_C( 1321040308684797296), INT64_C( 3012847868082094884), INT64_C( 8923801435785072389), INT64_C( 8497317249283403709), INT64_C( 8789380872681950910), INT64_C( 2017793055357488554), -INT64_C( 8480534500030408781) }, { INT64_C( 59), INT64_C( 24) }, { INT64_C( 14), INT64_C( 2), INT64_C( 5), INT64_C( 15), INT64_C( 14), INT64_C( 15), INT64_C( 3), INT64_C( 17) } }, { { -INT64_C( 337988464786431205), -INT64_C( 1610283556321391280), -INT64_C( 6926550933078561340), -INT64_C( 7089296378098830461), INT64_C( 6893073757373453926), INT64_C( 6645101486772353452), -INT64_C( 2301568806200316975), INT64_C( 3098962276046799842) }, { INT64_C( 38), INT64_C( 28) }, { INT64_C( 65879269), INT64_C( 61250686), INT64_C( 41910218), INT64_C( 41318154), INT64_C( 25076856), INT64_C( 24174738), INT64_C( 58735805), INT64_C( 11273959) } }, { { INT64_C( 5894349276315664526), INT64_C( 3522860139222001906), INT64_C( 4256409894813416583), -INT64_C( 5955799268209984695), INT64_C( 1855594266527086592), -INT64_C( 5539512011075848627), INT64_C( 426499171783667312), INT64_C( 8791384339317111350) }, { INT64_C( 19), INT64_C( 44) }, { INT64_C( 11242579033500), INT64_C( 6719322470134), INT64_C( 8118457593561), INT64_C( 23824586497305), INT64_C( 3539265187315), INT64_C( 24618591428058), INT64_C( 813482612197), INT64_C( 16768234900125) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_srl_epi64(a, b); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_srl_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t src[8]; const simde__mmask8 k; const int64_t a[8]; const int64_t b[2]; const int64_t r[8]; } test_vec[] = { { { INT64_C( 5018051879103075663), -INT64_C( 660831149718276767), -INT64_C( 4444598874757254777), -INT64_C( 3636321904948482748), INT64_C( 694495148200162758), -INT64_C( 912842675246716212), INT64_C( 3729394938595987999), INT64_C( 5382103391663641445) }, UINT8_C( 7), { INT64_C( 8469222834209532005), -INT64_C( 8151281972419631149), -INT64_C( 875474453646156037), -INT64_C( 3146959030850175687), -INT64_C( 8202998051811360151), -INT64_C( 2609501873265243137), INT64_C( 3431787955991037454), INT64_C( 4695510415541147622) }, { INT64_C( 52), INT64_C( 41) }, { INT64_C( 1880), INT64_C( 2286), INT64_C( 3901), -INT64_C( 3636321904948482748), INT64_C( 694495148200162758), -INT64_C( 912842675246716212), INT64_C( 3729394938595987999), INT64_C( 5382103391663641445) } }, { { INT64_C( 4774085380789252891), INT64_C( 6351976018652674572), INT64_C( 7285695253303204985), INT64_C( 1704423325787105127), -INT64_C( 205924299156423802), -INT64_C( 863837376267240669), INT64_C( 5459588585176950308), INT64_C( 907179034420130690) }, UINT8_C(237), { -INT64_C( 3509752084262096962), -INT64_C( 2384022715044792899), INT64_C( 4709090717786589566), INT64_C( 3258586805351995736), -INT64_C( 5862104578432690494), -INT64_C( 6022899097165107309), -INT64_C( 7856847056654750553), INT64_C( 6004729387056741816) }, { INT64_C( 54), INT64_C( 16) }, { INT64_C( 829), INT64_C( 6351976018652674572), INT64_C( 261), INT64_C( 180), -INT64_C( 205924299156423802), INT64_C( 689), INT64_C( 587), INT64_C( 333) } }, { { -INT64_C( 5347199935727591939), -INT64_C( 1761696095209150550), INT64_C( 4827803292971284187), INT64_C( 3812233551889196554), INT64_C( 8473019998232436452), -INT64_C( 9115217864340507683), INT64_C( 6537717375098455028), -INT64_C( 7821090437979903991) }, UINT8_C(198), { INT64_C( 9147944483022796001), -INT64_C( 8004967957813555785), INT64_C( 357537112035189065), -INT64_C( 8293301012967246278), INT64_C( 3981911599859595706), INT64_C( 6035127009660397092), INT64_C( 1627267161321939317), INT64_C( 5234344461575145580) }, { INT64_C( 1), INT64_C( 2) }, { -INT64_C( 5347199935727591939), INT64_C( 5220888057947997915), INT64_C( 178768556017594532), INT64_C( 3812233551889196554), INT64_C( 8473019998232436452), -INT64_C( 9115217864340507683), INT64_C( 813633580660969658), INT64_C( 2617172230787572790) } }, { { -INT64_C( 753457344752823400), INT64_C( 9100845639399228278), INT64_C( 6691762627973503226), -INT64_C( 6772517690425994392), INT64_C( 4611865007080578178), -INT64_C( 4987858906752217499), -INT64_C( 5481882081349639960), -INT64_C( 5102383525635684220) }, UINT8_C( 77), { -INT64_C( 3220258701015792569), -INT64_C( 4606252918426593125), -INT64_C( 7433827182372543520), -INT64_C( 2636424837617130645), INT64_C( 8279327878949470286), -INT64_C( 6427567103991252564), INT64_C( 35410411469199188), INT64_C( 2639257638627200305) }, { INT64_C( 9), INT64_C( 60) }, { INT64_C( 29739229243542498), INT64_C( 9100845639399228278), INT64_C( 21509603303392593), INT64_C( 30879529757993009), INT64_C( 4611865007080578178), -INT64_C( 4987858906752217499), INT64_C( 69160959900779), -INT64_C( 5102383525635684220) } }, { { -INT64_C( 474472491459740714), INT64_C( 1413390369529169826), -INT64_C( 8618690178674731836), INT64_C( 7765024437675857456), -INT64_C( 7951878168928412957), -INT64_C( 8653482228769463353), INT64_C( 5473607594897955195), -INT64_C( 6430896436868883521) }, UINT8_C(119), { INT64_C( 6529930248793313686), INT64_C( 3757589078185112322), INT64_C( 7156672293724263016), -INT64_C( 213133291419162954), INT64_C( 7813308203475955998), -INT64_C( 7533049731877532699), INT64_C( 1637473394195219917), -INT64_C( 3854038203012358113) }, { INT64_C( 43), INT64_C( 6) }, { INT64_C( 742367), INT64_C( 427188), INT64_C( 813619), INT64_C( 7765024437675857456), INT64_C( 888270), INT64_C( 1240743), INT64_C( 186159), -INT64_C( 6430896436868883521) } }, { { INT64_C( 6890553886565778985), INT64_C( 2558441506522614282), -INT64_C( 8775070714056974145), -INT64_C( 5393012673236621375), -INT64_C( 3633876295361783311), -INT64_C( 7040528235959716310), INT64_C( 7969224737570684516), INT64_C( 4740220371150316935) }, UINT8_C( 62), { INT64_C( 8796319542675600136), -INT64_C( 8882514063471855023), -INT64_C( 8572988487162016737), -INT64_C( 1585517887377303268), INT64_C( 2309471185300160654), INT64_C( 9018229961860212253), INT64_C( 1579359360142563581), INT64_C( 7919253252302357531) }, { INT64_C( 46), INT64_C( 26) }, { INT64_C( 6890553886565778985), INT64_C( 135915), INT64_C( 140314), INT64_C( 239612), INT64_C( 32819), INT64_C( 128156), INT64_C( 7969224737570684516), INT64_C( 4740220371150316935) } }, { { INT64_C( 7559772791110257371), INT64_C( 7838035306306977657), INT64_C( 4924922392629855747), INT64_C( 1361601353282270225), -INT64_C( 2494766160983707733), INT64_C( 1481962523173766543), -INT64_C( 8585818606364548669), -INT64_C( 429836139557950017) }, UINT8_C(153), { INT64_C( 1679238285005550319), -INT64_C( 6570405501343403503), INT64_C( 3222506128052807406), -INT64_C( 6472670232636117335), INT64_C( 1499612929985882470), INT64_C( 2435025945260914468), INT64_C( 3730338983580460633), INT64_C( 1936251741381476225) }, { INT64_C( 6), INT64_C( 40) }, { INT64_C( 26238098203211723), INT64_C( 7838035306306977657), INT64_C( 4924922392629855747), INT64_C( 187094903766772410), INT64_C( 23431452031029413), INT64_C( 1481962523173766543), -INT64_C( 8585818606364548669), INT64_C( 30253933459085566) } }, { { INT64_C( 6606217542241028080), -INT64_C( 9219868554947169474), INT64_C( 1856557821955333813), -INT64_C( 210876376874155225), -INT64_C( 8426757936638374440), -INT64_C( 3856056457707543913), INT64_C( 1369719826837412456), -INT64_C( 7978806546093840581) }, UINT8_C(227), { -INT64_C( 903047239936490564), INT64_C( 660572222557549369), -INT64_C( 236148617042914490), INT64_C( 145317255498170763), INT64_C( 2940386493423826057), -INT64_C( 1241562307819219252), INT64_C( 5636579028621965864), -INT64_C( 4867983057477439960) }, { INT64_C( 30), INT64_C( 58) }, { INT64_C( 16338840903), INT64_C( 615205823), INT64_C( 1856557821955333813), -INT64_C( 210876376874155225), -INT64_C( 8426757936638374440), INT64_C( 16023574178), INT64_C( 5249473292), INT64_C( 12646206669) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi64(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_mask_srl_epi64(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_srl_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const int64_t a[8]; const int64_t b[2]; const int64_t r[8]; } test_vec[] = { { UINT8_C( 2), { INT64_C( 1273292168187332866), INT64_C( 7422134831920816881), INT64_C( 8778785881423789008), -INT64_C( 6870649568514933397), INT64_C( 5972440929581448533), INT64_C( 2069270216126665473), INT64_C( 2304214308246073665), INT64_C( 6084761119011074867) }, { INT64_C( 33), INT64_C( 10) }, { INT64_C( 0), INT64_C( 864050215), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0) } }, { UINT8_C(190), { INT64_C( 1715469743755078245), INT64_C( 3883908348811583318), -INT64_C( 6110753254294327408), -INT64_C( 9052310990052255468), -INT64_C( 4962707970185112987), INT64_C( 3589194774746904202), -INT64_C( 2682461174757816112), INT64_C( 8697536180863588116) }, { INT64_C( 17), INT64_C( 30) }, { INT64_C( 0), INT64_C( 29631869116299), INT64_C( 94116140895196), INT64_C( 71673836392649), INT64_C( 102875031307406), INT64_C( 27383382986045), INT64_C( 0), INT64_C( 66356934973629) } }, { UINT8_C( 54), { -INT64_C( 5799555120303860877), INT64_C( 4096933604670747731), INT64_C( 231998857741135549), -INT64_C( 3315171190985936359), INT64_C( 2720388382403328952), INT64_C( 8797552889633669751), INT64_C( 6835358425641782045), INT64_C( 7418312242013505733) }, { INT64_C( 27), INT64_C( 45) }, { INT64_C( 0), INT64_C( 30524534021), INT64_C( 1728526187), INT64_C( 0), INT64_C( 20268472898), INT64_C( 65546876859), INT64_C( 0), INT64_C( 0) } }, { UINT8_C(110), { -INT64_C( 890849150588711221), -INT64_C( 1407084915905673929), -INT64_C( 1553937031705302402), -INT64_C( 6671174823202003303), INT64_C( 7432386549469224648), INT64_C( 5516411252743718633), INT64_C( 6273357867429032100), -INT64_C( 7723181002292161705) }, { INT64_C( 24), INT64_C( 5) }, { INT64_C( 0), INT64_C( 1015642831194), INT64_C( 1006889762997), INT64_C( 701878622204), INT64_C( 0), INT64_C( 328803733154), INT64_C( 373921267237), INT64_C( 0) } }, { UINT8_C( 1), { -INT64_C( 2551546160007398705), -INT64_C( 1583229024549091734), -INT64_C( 4637370017592110076), -INT64_C( 4443569649745975869), -INT64_C( 7663831157342858114), -INT64_C( 8078959051578389152), -INT64_C( 4241788306351701286), INT64_C( 1512768116375507273) }, { INT64_C( 62), INT64_C( 40) }, { INT64_C( 3), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0) } }, { UINT8_C( 21), { INT64_C( 8563556369835754345), INT64_C( 1362375245873026039), -INT64_C( 2932503238363277827), -INT64_C( 467743725081221479), INT64_C( 7988363705945922112), INT64_C( 546549678847879282), -INT64_C( 9007248830868667823), INT64_C( 1945960595755801977) }, { INT64_C( 22), INT64_C( 52) }, { INT64_C( 2041710941752), INT64_C( 0), INT64_C( 3698883255802), INT64_C( 0), INT64_C( 1904574324118), INT64_C( 0), INT64_C( 0), INT64_C( 0) } }, { UINT8_C(180), { -INT64_C( 7627373761685227045), -INT64_C( 5077655539464261875), -INT64_C( 4634773583306946036), INT64_C( 3241203397608668373), -INT64_C( 4208050115763857397), -INT64_C( 6766153118426920401), -INT64_C( 8678233403792115242), -INT64_C( 1354797465776172876) }, { INT64_C( 15), INT64_C( 49) }, { INT64_C( 0), INT64_C( 0), INT64_C( 421507888501056), INT64_C( 0), INT64_C( 434530455259573), INT64_C( 356463347024006), INT64_C( 0), INT64_C( 521604815915935) } }, { UINT8_C(110), { -INT64_C( 701251862189271592), -INT64_C( 8672589849232389770), -INT64_C( 4551220995287630154), -INT64_C( 6372748713757113748), -INT64_C( 3664950061693423865), INT64_C( 8559287630595816389), -INT64_C( 7435569239400424336), -INT64_C( 646749299245744753) }, { INT64_C( 31), INT64_C( 18) }, { INT64_C( 0), INT64_C( 4551445238), INT64_C( 6470607164), INT64_C( 5622392222), INT64_C( 0), INT64_C( 3985728896), INT64_C( 5127477848), INT64_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_maskz_srl_epi64(test_vec[i].k, a, b); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_srl_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_srl_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_srl_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_srl_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_srl_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_srl_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_srl_epi64) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/srli.c000066400000000000000000000627071400333146700164050ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN srli #include #include #include static int test_simde_mm512_srli_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[32]; const int16_t r[32]; } test_vec[] = { { { INT16_C( 5064), -INT16_C( 13372), -INT16_C( 27857), -INT16_C( 22226), INT16_C( 15192), INT16_C( 15375), -INT16_C( 9626), -INT16_C( 29567), -INT16_C( 11953), -INT16_C( 3869), -INT16_C( 31356), INT16_C( 27097), -INT16_C( 12959), INT16_C( 26598), -INT16_C( 26711), INT16_C( 28952), -INT16_C( 8790), -INT16_C( 9924), INT16_C( 27504), -INT16_C( 14205), -INT16_C( 27994), INT16_C( 3076), -INT16_C( 31124), -INT16_C( 17511), INT16_C( 31831), -INT16_C( 9045), -INT16_C( 31743), INT16_C( 25157), INT16_C( 11345), -INT16_C( 1335), -INT16_C( 7485), INT16_C( 28011) }, { INT16_C( 39), INT16_C( 407), INT16_C( 294), INT16_C( 338), INT16_C( 118), INT16_C( 120), INT16_C( 436), INT16_C( 281), INT16_C( 418), INT16_C( 481), INT16_C( 267), INT16_C( 211), INT16_C( 410), INT16_C( 207), INT16_C( 303), INT16_C( 226), INT16_C( 443), INT16_C( 434), INT16_C( 214), INT16_C( 401), INT16_C( 293), INT16_C( 24), INT16_C( 268), INT16_C( 375), INT16_C( 248), INT16_C( 441), INT16_C( 264), INT16_C( 196), INT16_C( 88), INT16_C( 501), INT16_C( 453), INT16_C( 218) } }, { { -INT16_C( 22593), INT16_C( 12102), -INT16_C( 14062), -INT16_C( 17928), -INT16_C( 933), -INT16_C( 14395), INT16_C( 24194), -INT16_C( 9598), INT16_C( 11738), -INT16_C( 9290), -INT16_C( 1103), INT16_C( 574), INT16_C( 1831), -INT16_C( 5380), INT16_C( 26601), -INT16_C( 22441), -INT16_C( 25073), INT16_C( 8664), -INT16_C( 12185), -INT16_C( 15398), -INT16_C( 24372), INT16_C( 20362), INT16_C( 3582), -INT16_C( 9943), -INT16_C( 8390), -INT16_C( 4940), -INT16_C( 3366), INT16_C( 750), -INT16_C( 5126), -INT16_C( 7188), INT16_C( 17490), INT16_C( 24972) }, { INT16_C( 335), INT16_C( 94), INT16_C( 402), INT16_C( 371), INT16_C( 504), INT16_C( 399), INT16_C( 189), INT16_C( 437), INT16_C( 91), INT16_C( 439), INT16_C( 503), INT16_C( 4), INT16_C( 14), INT16_C( 469), INT16_C( 207), INT16_C( 336), INT16_C( 316), INT16_C( 67), INT16_C( 416), INT16_C( 391), INT16_C( 321), INT16_C( 159), INT16_C( 27), INT16_C( 434), INT16_C( 446), INT16_C( 473), INT16_C( 485), INT16_C( 5), INT16_C( 471), INT16_C( 455), INT16_C( 136), INT16_C( 195) } }, { { INT16_C( 25826), INT16_C( 18819), INT16_C( 23860), INT16_C( 12), -INT16_C( 26627), -INT16_C( 945), INT16_C( 30884), -INT16_C( 8491), -INT16_C( 30377), INT16_C( 13002), -INT16_C( 18052), INT16_C( 30260), INT16_C( 8356), -INT16_C( 2471), -INT16_C( 6812), INT16_C( 18008), -INT16_C( 9399), INT16_C( 32144), -INT16_C( 25544), INT16_C( 13950), -INT16_C( 13005), -INT16_C( 10446), INT16_C( 1862), -INT16_C( 25162), -INT16_C( 32624), INT16_C( 3279), INT16_C( 825), -INT16_C( 8830), -INT16_C( 9180), -INT16_C( 30508), INT16_C( 11457), INT16_C( 3023) }, { INT16_C( 201), INT16_C( 147), INT16_C( 186), INT16_C( 0), INT16_C( 303), INT16_C( 504), INT16_C( 241), INT16_C( 445), INT16_C( 274), INT16_C( 101), INT16_C( 370), INT16_C( 236), INT16_C( 65), INT16_C( 492), INT16_C( 458), INT16_C( 140), INT16_C( 438), INT16_C( 251), INT16_C( 312), INT16_C( 108), INT16_C( 410), INT16_C( 430), INT16_C( 14), INT16_C( 315), INT16_C( 257), INT16_C( 25), INT16_C( 6), INT16_C( 443), INT16_C( 440), INT16_C( 273), INT16_C( 89), INT16_C( 23) } }, { { INT16_C( 24327), INT16_C( 16264), INT16_C( 1787), INT16_C( 12149), -INT16_C( 22572), INT16_C( 6662), -INT16_C( 17234), INT16_C( 16311), -INT16_C( 30915), INT16_C( 30283), -INT16_C( 12662), -INT16_C( 20908), INT16_C( 10410), INT16_C( 27447), INT16_C( 1620), INT16_C( 23414), -INT16_C( 155), INT16_C( 24730), INT16_C( 4101), -INT16_C( 9841), -INT16_C( 26953), INT16_C( 26355), -INT16_C( 21678), -INT16_C( 28763), -INT16_C( 4046), -INT16_C( 17402), INT16_C( 23230), INT16_C( 26731), -INT16_C( 23934), -INT16_C( 10540), INT16_C( 19112), INT16_C( 3377) }, { INT16_C( 190), INT16_C( 127), INT16_C( 13), INT16_C( 94), INT16_C( 335), INT16_C( 52), INT16_C( 377), INT16_C( 127), INT16_C( 270), INT16_C( 236), INT16_C( 413), INT16_C( 348), INT16_C( 81), INT16_C( 214), INT16_C( 12), INT16_C( 182), INT16_C( 510), INT16_C( 193), INT16_C( 32), INT16_C( 435), INT16_C( 301), INT16_C( 205), INT16_C( 342), INT16_C( 287), INT16_C( 480), INT16_C( 376), INT16_C( 181), INT16_C( 208), INT16_C( 325), INT16_C( 429), INT16_C( 149), INT16_C( 26) } }, { { -INT16_C( 13495), INT16_C( 20333), -INT16_C( 549), -INT16_C( 27864), INT16_C( 7315), -INT16_C( 6663), -INT16_C( 24889), -INT16_C( 1675), INT16_C( 31630), INT16_C( 19893), INT16_C( 8405), INT16_C( 22453), -INT16_C( 30270), INT16_C( 27181), INT16_C( 24276), INT16_C( 7543), -INT16_C( 6871), INT16_C( 1388), -INT16_C( 27166), INT16_C( 30104), -INT16_C( 28239), INT16_C( 30810), -INT16_C( 12497), -INT16_C( 17039), INT16_C( 9802), INT16_C( 7946), -INT16_C( 16313), INT16_C( 2422), -INT16_C( 23735), INT16_C( 7540), -INT16_C( 5375), INT16_C( 11067) }, { INT16_C( 406), INT16_C( 158), INT16_C( 507), INT16_C( 294), INT16_C( 57), INT16_C( 459), INT16_C( 317), INT16_C( 498), INT16_C( 247), INT16_C( 155), INT16_C( 65), INT16_C( 175), INT16_C( 275), INT16_C( 212), INT16_C( 189), INT16_C( 58), INT16_C( 458), INT16_C( 10), INT16_C( 299), INT16_C( 235), INT16_C( 291), INT16_C( 240), INT16_C( 414), INT16_C( 378), INT16_C( 76), INT16_C( 62), INT16_C( 384), INT16_C( 18), INT16_C( 326), INT16_C( 58), INT16_C( 470), INT16_C( 86) } }, { { -INT16_C( 22576), -INT16_C( 19920), -INT16_C( 14276), -INT16_C( 4825), -INT16_C( 32167), -INT16_C( 30619), -INT16_C( 10671), -INT16_C( 25531), INT16_C( 20733), INT16_C( 17595), INT16_C( 12816), INT16_C( 22861), -INT16_C( 15915), -INT16_C( 10377), -INT16_C( 19795), INT16_C( 32002), INT16_C( 12889), -INT16_C( 27088), INT16_C( 22522), INT16_C( 21379), -INT16_C( 5671), INT16_C( 11227), INT16_C( 8383), -INT16_C( 17209), -INT16_C( 32144), INT16_MIN, INT16_C( 20148), -INT16_C( 29990), INT16_C( 20751), -INT16_C( 17311), INT16_C( 25347), INT16_C( 23610) }, { INT16_C( 335), INT16_C( 356), INT16_C( 400), INT16_C( 474), INT16_C( 260), INT16_C( 272), INT16_C( 428), INT16_C( 312), INT16_C( 161), INT16_C( 137), INT16_C( 100), INT16_C( 178), INT16_C( 387), INT16_C( 430), INT16_C( 357), INT16_C( 250), INT16_C( 100), INT16_C( 300), INT16_C( 175), INT16_C( 167), INT16_C( 467), INT16_C( 87), INT16_C( 65), INT16_C( 377), INT16_C( 260), INT16_C( 256), INT16_C( 157), INT16_C( 277), INT16_C( 162), INT16_C( 376), INT16_C( 198), INT16_C( 184) } }, { { INT16_C( 27285), -INT16_C( 28686), INT16_C( 30401), -INT16_C( 25630), -INT16_C( 17057), INT16_C( 7878), -INT16_C( 29219), INT16_C( 20187), -INT16_C( 9457), -INT16_C( 15154), -INT16_C( 22487), INT16_C( 14670), -INT16_C( 20487), -INT16_C( 779), INT16_C( 12050), -INT16_C( 22695), INT16_C( 19353), INT16_C( 23350), INT16_C( 6337), INT16_C( 8438), -INT16_C( 17195), -INT16_C( 19905), INT16_C( 6729), INT16_C( 22528), -INT16_C( 12299), INT16_C( 7964), INT16_C( 27255), INT16_C( 29016), INT16_C( 19737), INT16_C( 11117), -INT16_C( 14723), INT16_C( 5842) }, { INT16_C( 213), INT16_C( 287), INT16_C( 237), INT16_C( 311), INT16_C( 378), INT16_C( 61), INT16_C( 283), INT16_C( 157), INT16_C( 438), INT16_C( 393), INT16_C( 336), INT16_C( 114), INT16_C( 351), INT16_C( 505), INT16_C( 94), INT16_C( 334), INT16_C( 151), INT16_C( 182), INT16_C( 49), INT16_C( 65), INT16_C( 377), INT16_C( 356), INT16_C( 52), INT16_C( 176), INT16_C( 415), INT16_C( 62), INT16_C( 212), INT16_C( 226), INT16_C( 154), INT16_C( 86), INT16_C( 396), INT16_C( 45) } }, { { INT16_C( 2066), -INT16_C( 11407), INT16_C( 26400), -INT16_C( 2572), INT16_C( 13091), INT16_C( 27816), -INT16_C( 22451), INT16_C( 17093), -INT16_C( 7817), -INT16_C( 4255), -INT16_C( 18100), INT16_C( 25952), -INT16_C( 13049), -INT16_C( 31599), INT16_C( 25492), -INT16_C( 22886), INT16_C( 3180), -INT16_C( 29575), INT16_C( 28019), -INT16_C( 26750), INT16_C( 10912), -INT16_C( 4861), -INT16_C( 14126), INT16_C( 18992), -INT16_C( 28246), -INT16_C( 2503), -INT16_C( 26293), INT16_C( 21083), -INT16_C( 5018), -INT16_C( 1322), INT16_C( 28752), -INT16_C( 17248) }, { INT16_C( 16), INT16_C( 422), INT16_C( 206), INT16_C( 491), INT16_C( 102), INT16_C( 217), INT16_C( 336), INT16_C( 133), INT16_C( 450), INT16_C( 478), INT16_C( 370), INT16_C( 202), INT16_C( 410), INT16_C( 265), INT16_C( 199), INT16_C( 333), INT16_C( 24), INT16_C( 280), INT16_C( 218), INT16_C( 303), INT16_C( 85), INT16_C( 474), INT16_C( 401), INT16_C( 148), INT16_C( 291), INT16_C( 492), INT16_C( 306), INT16_C( 164), INT16_C( 472), INT16_C( 501), INT16_C( 224), INT16_C( 377) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i r = simde_mm512_srli_epi16(a, 7); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_srli_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; unsigned int imm8; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C(-2020822652), INT32_C( -257395769), INT32_C( 499270536), INT32_C( 1400500940), INT32_C( 1373098033), INT32_C( 1102869287), INT32_C( 1033807112), INT32_C(-1561080563), INT32_C( 1506432231), INT32_C(-1063413574), INT32_C( 341686905), INT32_C( -287206476), INT32_C( 265122437), INT32_C( 1398620072), INT32_C( -665611582), INT32_C(-1504345300)), 22, simde_mm512_set_epi32(INT32_C( 542), INT32_C( 962), INT32_C( 119), INT32_C( 333), INT32_C( 327), INT32_C( 262), INT32_C( 246), INT32_C( 651), INT32_C( 359), INT32_C( 770), INT32_C( 81), INT32_C( 955), INT32_C( 63), INT32_C( 333), INT32_C( 865), INT32_C( 665)) }, { simde_mm512_set_epi32(INT32_C( -819412546), INT32_C(-1796000475), INT32_C(-1622143581), INT32_C(-1041781306), INT32_C( -155789533), INT32_C( -151174821), INT32_C( 1958936143), INT32_C( -107637458), INT32_C(-1381646204), INT32_C( 1022043250), INT32_C( -104481290), INT32_C(-1025833596), INT32_C( 1618482767), INT32_C( 1888220027), INT32_C(-1169248526), INT32_C( -703447035)), 11, simde_mm512_set_epi32(INT32_C( 1697048), INT32_C( 1220198), INT32_C( 1305089), INT32_C( 1588469), INT32_C( 2021082), INT32_C( 2023336), INT32_C( 956511), INT32_C( 2044594), INT32_C( 1422520), INT32_C( 499044), INT32_C( 2046135), INT32_C( 1596256), INT32_C( 790274), INT32_C( 921982), INT32_C( 1526229), INT32_C( 1753672)) }, { simde_mm512_set_epi32(INT32_C(-1594292345), INT32_C( -684588879), INT32_C( 1676697175), INT32_C( -659819552), INT32_C( 1894934939), INT32_C( 577900071), INT32_C( 818876053), INT32_C( 557599341), INT32_C( -791753790), INT32_C( 286117889), INT32_C( 1667395914), INT32_C( -574374162), INT32_C( 516383634), INT32_C( 1867216785), INT32_C( 1360165420), INT32_C(-1026060155)), 3, simde_mm512_set_epi32(INT32_C( 337584368), INT32_C( 451297302), INT32_C( 209587146), INT32_C( 454393468), INT32_C( 236866867), INT32_C( 72237508), INT32_C( 102359506), INT32_C( 69699917), INT32_C( 437901688), INT32_C( 35764736), INT32_C( 208424489), INT32_C( 465074141), INT32_C( 64547954), INT32_C( 233402098), INT32_C( 170020677), INT32_C( 408613392)) }, { simde_mm512_set_epi32(INT32_C( 563818649), INT32_C( 1327166173), INT32_C( 1236848070), INT32_C( -448866475), INT32_C( -173418493), INT32_C(-1571972356), INT32_C( 1881284471), INT32_C( 439987043), INT32_C( 508631938), INT32_C( 763400402), INT32_C( 2004762594), INT32_C(-1789579909), INT32_C( -823229171), INT32_C(-1537029967), INT32_C(-2094893814), INT32_C( 1910734558)), 0, simde_mm512_set_epi32(INT32_C( 563818649), INT32_C( 1327166173), INT32_C( 1236848070), INT32_C( -448866475), INT32_C( -173418493), INT32_C(-1571972356), INT32_C( 1881284471), INT32_C( 439987043), INT32_C( 508631938), INT32_C( 763400402), INT32_C( 2004762594), INT32_C(-1789579909), INT32_C( -823229171), INT32_C(-1537029967), INT32_C(-2094893814), INT32_C( 1910734558)) }, { simde_mm512_set_epi32(INT32_C( 1331571680), INT32_C(-1968130549), INT32_C(-1401578233), INT32_C(-1310278942), INT32_C( -553135974), INT32_C( 390049321), INT32_C( -502176380), INT32_C( -721913400), INT32_C( 297997941), INT32_C( 812527594), INT32_C(-1593317379), INT32_C( -643296593), INT32_C(-1978632480), INT32_C(-2010319907), INT32_C(-1081044111), INT32_C( 223565748)), 26, simde_mm512_set_epi32(INT32_C( 19), INT32_C( 34), INT32_C( 43), INT32_C( 44), INT32_C( 55), INT32_C( 5), INT32_C( 56), INT32_C( 53), INT32_C( 4), INT32_C( 12), INT32_C( 40), INT32_C( 54), INT32_C( 34), INT32_C( 34), INT32_C( 47), INT32_C( 3)) }, { simde_mm512_set_epi32(INT32_C( 69766264), INT32_C( 1121309360), INT32_C( -164257344), INT32_C( 1544624998), INT32_C(-1638151086), INT32_C( 617641637), INT32_C(-2109782153), INT32_C( -381251627), INT32_C( 648330089), INT32_C( -370018417), INT32_C(-1896387892), INT32_C(-1167774485), INT32_C( -297453838), INT32_C( -617551956), INT32_C( 863958459), INT32_C( 1052098740)), 1, simde_mm512_set_epi32(INT32_C( 34883132), INT32_C( 560654680), INT32_C( 2065354976), INT32_C( 772312499), INT32_C( 1328408105), INT32_C( 308820818), INT32_C( 1092592571), INT32_C( 1956857834), INT32_C( 324165044), INT32_C( 1962474439), INT32_C( 1199289702), INT32_C( 1563596405), INT32_C( 1998756729), INT32_C( 1838707670), INT32_C( 431979229), INT32_C( 526049370)) }, { simde_mm512_set_epi32(INT32_C( -185630809), INT32_C( -795283306), INT32_C( 1353888329), INT32_C( 1750377549), INT32_C( -609950002), INT32_C(-2070799804), INT32_C( -717783400), INT32_C( -489437394), INT32_C( 782151967), INT32_C( -135381456), INT32_C(-1044185983), INT32_C(-1168288861), INT32_C( 1570077349), INT32_C(-1514349775), INT32_C(-1300428717), INT32_C(-1070450073)), 14, simde_mm512_set_epi32(INT32_C( 250813), INT32_C( 213603), INT32_C( 82634), INT32_C( 106834), INT32_C( 224915), INT32_C( 135752), INT32_C( 218333), INT32_C( 232271), INT32_C( 47738), INT32_C( 253880), INT32_C( 198411), INT32_C( 190837), INT32_C( 95829), INT32_C( 169715), INT32_C( 182772), INT32_C( 196808)) }, { simde_mm512_set_epi32(INT32_C( 858780966), INT32_C( 471539970), INT32_C( 308326365), INT32_C( 897623009), INT32_C( 274412137), INT32_C(-1363032868), INT32_C( 2080428503), INT32_C( 1048755350), INT32_C( -342337536), INT32_C( 1475004820), INT32_C( 1074270282), INT32_C( -894671787), INT32_C(-2107817427), INT32_C( -444084191), INT32_C( 851286899), INT32_C( 1423269304)), 1, simde_mm512_set_epi32(INT32_C( 429390483), INT32_C( 235769985), INT32_C( 154163182), INT32_C( 448811504), INT32_C( 137206068), INT32_C( 1465967214), INT32_C( 1040214251), INT32_C( 524377675), INT32_C( 1976314880), INT32_C( 737502410), INT32_C( 537135141), INT32_C( 1700147754), INT32_C( 1093574934), INT32_C( 1925441552), INT32_C( 425643449), INT32_C( 711634652)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_srli_epi32(test_vec[i].a,test_vec[i].imm8); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_srli_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; unsigned int b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C( 7973262903512536694), INT64_C( -756652926976123625), INT64_C(-7907329678808178856), INT64_C(-4613066309848201378), INT64_C( 911796452309072772), INT64_C(-7947449538018331043), INT64_C(-4094891379879736374), INT64_C( 2567785713935265105)), 0xab, simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, { simde_mm512_set_epi64(INT64_C(-8733599303468285770), INT64_C(-1221042997940104437), INT64_C( 1700326984023276146), INT64_C( 299160601816116482), INT64_C(-8645581509002533463), INT64_C(-8083364442012234823), INT64_C(-5545717914343726512), INT64_C( 419833451025710133)), 0x8029, simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, { simde_mm512_set_epi64(INT64_C(-4893117706119522679), INT64_C( 5420919605114671392), INT64_C( 5087336166907414861), INT64_C( -562883644934737039), INT64_C(-3838729031805448294), INT64_C(-7418143174233432135), INT64_C( 7017318210470297490), INT64_C( 3464955922400203693)), 0xe, simde_mm512_set_epi64(INT64_C( 827247703099977), INT64_C( 330866675116862), INT64_C( 310506357843470), INT64_C( 1091544215623462), INT64_C( 891602480584967), INT64_C( 673132379118415), INT64_C( 428303113432024), INT64_C( 211484126123059)) }, { simde_mm512_set_epi64(INT64_C( 2541614580543521019), INT64_C( 499823435321299561), INT64_C(-5904924501366764508), INT64_C( 7023944739814045444), INT64_C( 6015406288340926104), INT64_C( 7321833489159498588), INT64_C(-2737849912327243109), INT64_C(-3578554550642761007)), 0x32, simde_mm512_set_epi64(INT64_C( 2257), INT64_C( 443), INT64_C( 11139), INT64_C( 6238), INT64_C( 5342), INT64_C( 6503), INT64_C( 13952), INT64_C( 13205)) }, { simde_mm512_set_epi64(INT64_C(-5028928596309812666), INT64_C(-4599097054342878650), INT64_C( 1737746464556527965), INT64_C( 7519897503489365685), INT64_C( 2668093889339798821), INT64_C(-3758388356888738937), INT64_C(-2613982157457207556), INT64_C(-2225410235035714021)), 0x30, simde_mm512_set_epi64(INT64_C( 47669), INT64_C( 49196), INT64_C( 6173), INT64_C( 26716), INT64_C( 9478), INT64_C( 52183), INT64_C( 56249), INT64_C( 57629)) }, { simde_mm512_set_epi64(INT64_C(-5661929570079819163), INT64_C( 606174630548676143), INT64_C( 4062026724724267051), INT64_C(-7721509817758052189), INT64_C(-4899766988012067491), INT64_C( 849655025943263586), INT64_C(-7243604229092766255), INT64_C(-3011226666080476035)), 0x29, simde_mm512_set_epi64(INT64_C( 5813860), INT64_C( 275656), INT64_C( 1847195), INT64_C( 4877271), INT64_C( 6160451), INT64_C( 386378), INT64_C( 5094598), INT64_C( 7019260)) }, { simde_mm512_set_epi64(INT64_C( 3357536311959110775), INT64_C( 4508830932063799722), INT64_C(-5800425134717732029), INT64_C( 1782066721260114087), INT64_C( -181633913032181218), INT64_C(-5152953019677919849), INT64_C( 3009514543526146963), INT64_C( -248934049093542484)), 0x31, simde_mm512_set_epi64(INT64_C( 5964), INT64_C( 8009), INT64_C( 22464), INT64_C( 3165), INT64_C( 32445), INT64_C( 23614), INT64_C( 5345), INT64_C( 32325)) }, { simde_mm512_set_epi64(INT64_C( 7443398932235525007), INT64_C(-1954475805396281420), INT64_C( 2896517201997827064), INT64_C(-7120983626837339415), INT64_C( -201538146421797804), INT64_C( 96284688433294814), INT64_C( 317424323145668713), INT64_C(-2012972091494378925)), 0x24, simde_mm512_set_epi64(INT64_C( 108315710), INT64_C( 239994089), INT64_C( 42149872), INT64_C( 164811505), INT64_C( 265502689), INT64_C( 1401126), INT64_C( 4619131), INT64_C( 239142856)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_srli_epi64(test_vec[i].a, test_vec[i].b); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_srli_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_srli_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_srli_epi64) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/srlv.c000066400000000000000000002313021400333146700164070ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN srlv #include #include #include #include #include #include static int test_simde_mm_srlv_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[8]; const int16_t b[8]; const int16_t r[8]; } test_vec[] = { { { INT16_C( 20854), INT16_C( 32520), INT16_C( 459), INT16_C( 13016), -INT16_C( 10658), -INT16_C( 10061), -INT16_C( 28349), INT16_C( 13711) }, { INT16_C( 0), INT16_C( 31), -INT16_C( 1), INT16_C( 32), INT16_C( 21), INT16_C( 6), INT16_C( 14), INT16_C( 24) }, { INT16_C( 20854), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 866), INT16_C( 2), INT16_C( 0) } }, { { INT16_C( 24789), -INT16_C( 24476), INT16_C( 15458), -INT16_C( 16173), -INT16_C( 31214), INT16_C( 21912), INT16_C( 10008), -INT16_C( 30837) }, { INT16_C( 24), INT16_C( 20), INT16_C( 17), INT16_C( 0), INT16_C( 18), INT16_C( 24), INT16_C( 11), INT16_C( 5) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 16173), INT16_C( 0), INT16_C( 0), INT16_C( 4), INT16_C( 1084) } }, { { -INT16_C( 5808), -INT16_C( 19807), INT16_C( 29734), INT16_C( 14450), INT16_C( 3066), INT16_C( 4750), INT16_C( 6450), -INT16_C( 30054) }, { INT16_C( 27), INT16_C( 12), INT16_C( 1), INT16_C( 19), INT16_C( 18), INT16_C( 19), INT16_C( 27), INT16_C( 30) }, { INT16_C( 0), INT16_C( 11), INT16_C( 14867), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 16322), -INT16_C( 5986), INT16_C( 4275), -INT16_C( 21215), -INT16_C( 20709), INT16_C( 20160), INT16_C( 23240), INT16_C( 9176) }, { INT16_C( 8), INT16_C( 15), INT16_C( 16), INT16_C( 28), INT16_C( 13), INT16_C( 0), INT16_C( 8), INT16_C( 20) }, { INT16_C( 63), INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 5), INT16_C( 20160), INT16_C( 90), INT16_C( 0) } }, { { INT16_C( 21245), -INT16_C( 20397), INT16_C( 29794), INT16_C( 32350), INT16_C( 7715), -INT16_C( 5172), -INT16_C( 23432), -INT16_C( 16370) }, { INT16_C( 9), INT16_C( 9), INT16_C( 0), INT16_C( 28), INT16_C( 20), INT16_C( 22), INT16_C( 27), INT16_C( 8) }, { INT16_C( 41), INT16_C( 88), INT16_C( 29794), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 192) } }, { { INT16_C( 15356), INT16_C( 24329), INT16_C( 26544), -INT16_C( 11299), -INT16_C( 22139), -INT16_C( 577), -INT16_C( 12979), INT16_C( 14013) }, { INT16_C( 11), INT16_C( 16), INT16_C( 11), INT16_C( 26), INT16_C( 9), INT16_C( 29), INT16_C( 27), INT16_C( 30) }, { INT16_C( 7), INT16_C( 0), INT16_C( 12), INT16_C( 0), INT16_C( 84), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 1984), INT16_C( 28790), INT16_C( 21359), -INT16_C( 3004), INT16_C( 1020), INT16_C( 19186), -INT16_C( 20528), INT16_C( 7296) }, { INT16_C( 22), INT16_C( 8), INT16_C( 29), INT16_C( 1), INT16_C( 18), INT16_C( 11), INT16_C( 3), INT16_C( 5) }, { INT16_C( 0), INT16_C( 112), INT16_C( 0), INT16_C( 31266), INT16_C( 0), INT16_C( 9), INT16_C( 5626), INT16_C( 228) } }, { { -INT16_C( 1199), -INT16_C( 16204), -INT16_C( 1969), INT16_C( 19380), -INT16_C( 22789), -INT16_C( 13163), INT16_C( 5718), INT16_C( 19688) }, { INT16_C( 6), INT16_C( 13), INT16_C( 18), INT16_C( 10), INT16_C( 13), INT16_C( 18), INT16_C( 31), INT16_C( 21) }, { INT16_C( 1005), INT16_C( 6), INT16_C( 0), INT16_C( 18), INT16_C( 5), INT16_C( 0), INT16_C( 0), INT16_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_srlv_epi16(a, b); simde_test_x86_assert_equal_i16x8(r, simde_x_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i b = simde_test_x86_random_i16x8(); simde__m128i r = simde_mm_srlv_epi16(a, b); b = simde_mm_and_si128(b, simde_mm_set1_epi16(31)); if (i == 0 ) b = simde_mm_mask_blend_epi16(0xf, b, simde_mm_set_epi16(0, 0, 0, 0, 32, -1, 31, 0)); r = simde_mm_srlv_epi16(a, b); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_mask_srlv_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t src[8]; const simde__mmask8 k; const int16_t a[8]; const int16_t b[8]; const int16_t r[8]; } test_vec[] = { { { INT16_C( 23213), -INT16_C( 23484), -INT16_C( 3615), INT16_C( 20407), INT16_C( 12157), -INT16_C( 32012), -INT16_C( 26926), -INT16_C( 1675) }, UINT8_C(222), { -INT16_C( 29011), INT16_C( 7859), INT16_C( 600), INT16_C( 16506), INT16_C( 6847), -INT16_C( 26796), INT16_C( 1432), -INT16_C( 3516) }, { INT16_C( 0), INT16_C( 31), -INT16_C( 1), INT16_C( 32), INT16_C( 22), INT16_C( 1), INT16_C( 15), INT16_C( 10) }, { INT16_C( 23213), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 32012), INT16_C( 0), INT16_C( 60) } }, { { INT16_C( 15657), -INT16_C( 32389), -INT16_C( 2752), -INT16_C( 63), INT16_C( 5391), -INT16_C( 22634), -INT16_C( 9702), INT16_C( 25498) }, UINT8_C(195), { -INT16_C( 24979), -INT16_C( 28573), INT16_C( 12886), -INT16_C( 28506), INT16_C( 21460), INT16_C( 28223), -INT16_C( 25379), INT16_C( 7063) }, { INT16_C( 24), INT16_C( 27), INT16_C( 26), INT16_C( 29), INT16_C( 16), INT16_C( 10), INT16_C( 30), INT16_C( 14) }, { INT16_C( 0), INT16_C( 0), -INT16_C( 2752), -INT16_C( 63), INT16_C( 5391), -INT16_C( 22634), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 3828), INT16_C( 25180), INT16_C( 548), -INT16_C( 1806), INT16_C( 12629), INT16_C( 13158), -INT16_C( 306), -INT16_C( 6578) }, UINT8_C( 23), { -INT16_C( 3159), INT16_C( 1009), -INT16_C( 7920), -INT16_C( 10765), -INT16_C( 16916), INT16_C( 23091), -INT16_C( 181), INT16_C( 15718) }, { INT16_C( 27), INT16_C( 1), INT16_C( 26), INT16_C( 19), INT16_C( 31), INT16_C( 25), INT16_C( 20), INT16_C( 20) }, { INT16_C( 0), INT16_C( 504), INT16_C( 0), -INT16_C( 1806), INT16_C( 0), INT16_C( 13158), -INT16_C( 306), -INT16_C( 6578) } }, { { -INT16_C( 14701), -INT16_C( 23584), -INT16_C( 11353), -INT16_C( 27784), -INT16_C( 21359), -INT16_C( 8979), INT16_C( 21419), INT16_C( 1817) }, UINT8_C( 27), { INT16_C( 25978), -INT16_C( 11307), -INT16_C( 16104), -INT16_C( 109), INT16_C( 20602), INT16_C( 6707), INT16_C( 4389), -INT16_C( 5203) }, { INT16_C( 17), INT16_C( 18), INT16_C( 9), INT16_C( 22), INT16_C( 19), INT16_C( 0), INT16_C( 12), INT16_C( 2) }, { INT16_C( 0), INT16_C( 0), -INT16_C( 11353), INT16_C( 0), INT16_C( 0), -INT16_C( 8979), INT16_C( 21419), INT16_C( 1817) } }, { { INT16_C( 22668), -INT16_C( 23142), INT16_C( 11545), -INT16_C( 27740), -INT16_C( 10371), -INT16_C( 23891), INT16_C( 23272), -INT16_C( 9587) }, UINT8_C(171), { -INT16_C( 24800), INT16_C( 18036), -INT16_C( 5643), INT16_C( 10073), -INT16_C( 16375), INT16_C( 12659), INT16_C( 14915), -INT16_C( 25667) }, { INT16_C( 20), INT16_C( 20), INT16_C( 6), INT16_C( 30), INT16_C( 21), INT16_C( 6), INT16_C( 14), INT16_C( 26) }, { INT16_C( 0), INT16_C( 0), INT16_C( 11545), INT16_C( 0), -INT16_C( 10371), INT16_C( 197), INT16_C( 23272), INT16_C( 0) } }, { { INT16_C( 28223), INT16_C( 13332), INT16_C( 28247), INT16_C( 24924), -INT16_C( 12498), INT16_C( 29074), INT16_C( 20233), -INT16_C( 8948) }, UINT8_C(178), { -INT16_C( 8512), INT16_C( 2232), -INT16_C( 27043), INT16_C( 32509), INT16_C( 19549), -INT16_C( 724), -INT16_C( 1209), -INT16_C( 19139) }, { INT16_C( 15), INT16_C( 13), INT16_C( 13), INT16_C( 12), INT16_C( 0), INT16_C( 6), INT16_C( 10), INT16_C( 1) }, { INT16_C( 28223), INT16_C( 0), INT16_C( 28247), INT16_C( 24924), INT16_C( 19549), INT16_C( 1012), INT16_C( 20233), INT16_C( 23198) } }, { { -INT16_C( 17822), -INT16_C( 16398), -INT16_C( 4272), -INT16_C( 21187), INT16_C( 27196), -INT16_C( 31829), -INT16_C( 6043), INT16_C( 29752) }, UINT8_C( 89), { -INT16_C( 3515), -INT16_C( 19673), -INT16_C( 15202), -INT16_C( 17485), INT16_C( 874), -INT16_C( 4379), -INT16_C( 12284), -INT16_C( 16815) }, { INT16_C( 2), INT16_C( 15), INT16_C( 14), INT16_C( 14), INT16_C( 7), INT16_C( 29), INT16_C( 9), INT16_C( 9) }, { INT16_C( 15505), -INT16_C( 16398), -INT16_C( 4272), INT16_C( 2), INT16_C( 6), -INT16_C( 31829), INT16_C( 104), INT16_C( 29752) } }, { { -INT16_C( 12157), INT16_C( 8610), INT16_C( 22164), -INT16_C( 291), -INT16_C( 15783), INT16_C( 24045), INT16_C( 16018), INT16_C( 21788) }, UINT8_C( 78), { INT16_C( 1835), -INT16_C( 6244), INT16_C( 21749), INT16_C( 26191), -INT16_C( 24975), INT16_C( 783), -INT16_C( 441), INT16_C( 6022) }, { INT16_C( 1), INT16_C( 11), INT16_C( 5), INT16_C( 16), INT16_C( 23), INT16_C( 26), INT16_C( 9), INT16_C( 3) }, { -INT16_C( 12157), INT16_C( 28), INT16_C( 679), INT16_C( 0), -INT16_C( 15783), INT16_C( 24045), INT16_C( 127), INT16_C( 21788) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i src = simde_x_mm_loadu_epi16(test_vec[i].src); simde__m128i a = simde_x_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_mask_srlv_epi16(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x8(r, simde_x_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i src = simde_test_x86_random_i16x8(); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i b = simde_test_x86_random_i16x8(); simde__m128i r; b = simde_mm_and_si128(b, simde_mm_set1_epi16(31)); if (i == 0 ) b = simde_mm_mask_blend_epi16(0xf, b, simde_mm_set_epi16(0, 0, 0, 0, 32, -1, 31, 0)); r = simde_mm_mask_srlv_epi16(src, k, a, b); simde_test_x86_write_i16x8(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_maskz_srlv_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask8 k; const int16_t a[8]; const int16_t b[8]; const int16_t r[8]; } test_vec[] = { { UINT8_C( 78), { -INT16_C( 1423), -INT16_C( 21384), -INT16_C( 6824), INT16_C( 11575), INT16_C( 13486), INT16_C( 24722), -INT16_C( 7561), -INT16_C( 13693) }, { INT16_C( 0), INT16_C( 31), -INT16_C( 1), INT16_C( 32), INT16_C( 31), INT16_C( 19), INT16_C( 9), INT16_C( 25) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 113), INT16_C( 0) } }, { UINT8_C(113), { -INT16_C( 6542), INT16_C( 22473), -INT16_C( 2531), INT16_C( 20997), INT16_C( 25992), INT16_C( 27337), -INT16_C( 27672), INT16_C( 26631) }, { INT16_C( 29), INT16_C( 14), INT16_C( 10), INT16_C( 13), INT16_C( 29), INT16_C( 12), INT16_C( 18), INT16_C( 5) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 6), INT16_C( 0), INT16_C( 0) } }, { UINT8_C( 13), { INT16_C( 15311), -INT16_C( 15062), INT16_C( 31808), -INT16_C( 22962), -INT16_C( 18363), -INT16_C( 10098), -INT16_C( 2368), INT16_C( 2006) }, { INT16_C( 4), INT16_C( 17), INT16_C( 18), INT16_C( 11), INT16_C( 1), INT16_C( 29), INT16_C( 5), INT16_C( 21) }, { INT16_C( 956), INT16_C( 0), INT16_C( 0), INT16_C( 20), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { UINT8_C(125), { -INT16_C( 18145), -INT16_C( 25410), INT16_C( 25607), -INT16_C( 16159), -INT16_C( 17678), -INT16_C( 5760), -INT16_C( 30832), INT16_C( 13421) }, { INT16_C( 24), INT16_C( 6), INT16_C( 7), INT16_C( 15), INT16_C( 15), INT16_C( 12), INT16_C( 12), INT16_C( 14) }, { INT16_C( 0), INT16_C( 0), INT16_C( 200), INT16_C( 1), INT16_C( 1), INT16_C( 14), INT16_C( 8), INT16_C( 0) } }, { UINT8_C( 89), { INT16_C( 26476), -INT16_C( 12191), INT16_C( 8521), INT16_C( 963), -INT16_C( 21343), INT16_C( 10387), -INT16_C( 14567), -INT16_C( 2720) }, { INT16_C( 14), INT16_C( 29), INT16_C( 29), INT16_C( 14), INT16_C( 16), INT16_C( 0), INT16_C( 14), INT16_C( 9) }, { INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 3), INT16_C( 0) } }, { UINT8_C(211), { INT16_C( 3050), INT16_C( 2844), INT16_C( 8142), INT16_C( 31404), -INT16_C( 11086), INT16_C( 31123), -INT16_C( 30411), -INT16_C( 23577) }, { INT16_C( 6), INT16_C( 0), INT16_C( 21), INT16_C( 26), INT16_C( 20), INT16_C( 3), INT16_C( 19), INT16_C( 18) }, { INT16_C( 47), INT16_C( 2844), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { UINT8_C( 8), { INT16_C( 2510), -INT16_C( 4650), INT16_C( 20662), -INT16_C( 30049), INT16_C( 6628), INT16_C( 28095), INT16_C( 25088), -INT16_C( 6317) }, { INT16_C( 2), INT16_C( 29), INT16_C( 23), INT16_C( 10), INT16_C( 21), INT16_C( 20), INT16_C( 28), INT16_C( 27) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 34), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { UINT8_C( 28), { -INT16_C( 10159), -INT16_C( 23854), INT16_C( 23672), -INT16_C( 28282), -INT16_C( 3300), INT16_C( 32401), INT16_C( 31046), INT16_C( 25537) }, { INT16_C( 22), INT16_C( 26), INT16_C( 2), INT16_C( 29), INT16_C( 15), INT16_C( 8), INT16_C( 10), INT16_C( 30) }, { INT16_C( 0), INT16_C( 0), INT16_C( 5918), INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_maskz_srlv_epi16(test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x8(r, simde_x_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i b = simde_test_x86_random_i16x8(); simde__m128i r; b = simde_mm_and_si128(b, simde_mm_set1_epi16(31)); if (i == 0 ) b = simde_mm_mask_blend_epi16(0xf, b, simde_mm_set_epi16(0, 0, 0, 0, 32, -1, 31, 0)); r = simde_mm_maskz_srlv_epi16(k, a, b); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_mask_srlv_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int32_t src[4]; const simde__mmask8 k; const int32_t a[4]; const int32_t b[4]; const int32_t r[4]; } test_vec[] = { { { INT32_C( 1045051322), INT32_C( 813297984), INT32_C( 277504789), -INT32_C( 698453306) }, UINT8_C(195), { -INT32_C( 987978706), INT32_C( 942231910), -INT32_C( 1996244392), -INT32_C( 1572658069) }, { INT32_C( 0), INT32_C( 31), -INT32_C( 1), INT32_C( 32) }, { -INT32_C( 987978706), INT32_C( 0), INT32_C( 277504789), -INT32_C( 698453306) } }, { { INT32_C( 98037663), -INT32_C( 1724055488), INT32_C( 623002042), -INT32_C( 725064347) }, UINT8_C(231), { -INT32_C( 1092406357), INT32_C( 777798056), INT32_C( 1611754854), INT32_C( 822027072) }, { INT32_C( 26), INT32_C( 2), INT32_C( 11), INT32_C( 2) }, { INT32_C( 47), INT32_C( 194449514), INT32_C( 786989), -INT32_C( 725064347) } }, { { -INT32_C( 1744020496), INT32_C( 63334557), INT32_C( 375642325), -INT32_C( 163159045) }, UINT8_C(103), { -INT32_C( 1062604618), -INT32_C( 2132071259), -INT32_C( 786269163), INT32_C( 1354854645) }, { INT32_C( 28), INT32_C( 0), INT32_C( 20), INT32_C( 22) }, { INT32_C( 12), -INT32_C( 2132071259), INT32_C( 3346), -INT32_C( 163159045) } }, { { -INT32_C( 2138257190), INT32_C( 151025908), -INT32_C( 371514636), INT32_C( 238722194) }, UINT8_C(246), { INT32_C( 404222759), INT32_C( 2104233901), -INT32_C( 309124349), INT32_C( 164126529) }, { INT32_C( 12), INT32_C( 8), INT32_C( 2), INT32_C( 28) }, { -INT32_C( 2138257190), INT32_C( 8219663), INT32_C( 996460736), INT32_C( 238722194) } }, { { -INT32_C( 1413772034), -INT32_C( 1624758372), -INT32_C( 1936868534), INT32_C( 127227163) }, UINT8_C(157), { -INT32_C( 1679463533), INT32_C( 92098499), INT32_C( 1484913740), -INT32_C( 27908750) }, { INT32_C( 0), INT32_C( 10), INT32_C( 7), INT32_C( 20) }, { -INT32_C( 1679463533), -INT32_C( 1624758372), INT32_C( 11600888), INT32_C( 4069) } }, { { INT32_C( 916627059), INT32_C( 977019117), -INT32_C( 1902985956), -INT32_C( 1014109982) }, UINT8_C(234), { INT32_C( 1628818215), -INT32_C( 64423139), INT32_C( 74466556), -INT32_C( 1686669315) }, { INT32_C( 26), INT32_C( 9), INT32_C( 21), INT32_C( 18) }, { INT32_C( 916627059), INT32_C( 8262781), -INT32_C( 1902985956), INT32_C( 9949) } }, { { INT32_C( 905657623), INT32_C( 875635512), INT32_C( 1698210152), INT32_C( 838971160) }, UINT8_C( 92), { INT32_C( 1296460938), -INT32_C( 1465717309), -INT32_C( 1021649034), INT32_C( 1155183367) }, { INT32_C( 14), INT32_C( 0), INT32_C( 8), INT32_C( 0) }, { INT32_C( 905657623), INT32_C( 875635512), INT32_C( 12786399), INT32_C( 1155183367) } }, { { INT32_C( 1551317657), -INT32_C( 1660675801), INT32_C( 6299385), -INT32_C( 2142946351) }, UINT8_C( 74), { INT32_C( 1921733313), INT32_C( 727346316), -INT32_C( 1815285146), INT32_C( 539817702) }, { INT32_C( 14), INT32_C( 12), INT32_C( 5), INT32_C( 6) }, { INT32_C( 1551317657), INT32_C( 177574), INT32_C( 6299385), INT32_C( 8434651) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i src = simde_x_mm_loadu_epi32(test_vec[i].src); simde__m128i a = simde_x_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_mask_srlv_epi32(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i src = simde_test_x86_random_i32x4(); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128i a = simde_test_x86_random_i32x4(); simde__m128i b = simde_test_x86_random_i32x4(); simde__m128i r; b = simde_mm_and_si128(b, simde_mm_set1_epi32(31)); if (i == 0 ) b = simde_mm_mask_blend_epi32(0xf, b, simde_mm_set_epi32(32, -1, 31, 0)); r = simde_mm_mask_srlv_epi32(src, k, a, b); simde_test_x86_write_i32x4(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_maskz_srlv_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask8 k; const int32_t a[4]; const int32_t b[4]; const int32_t r[4]; } test_vec[] = { { UINT8_C( 77), { -INT32_C( 1025902251), INT32_C( 1177027924), -INT32_C( 953369647), INT32_C( 1865764071) }, { INT32_C( 0), INT32_C( 31), -INT32_C( 1), INT32_C( 32) }, { -INT32_C( 1025902251), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { UINT8_C( 5), { INT32_C( 1213800003), INT32_C( 35233607), INT32_C( 401269195), -INT32_C( 743810794) }, { INT32_C( 21), INT32_C( 30), INT32_C( 20), INT32_C( 2) }, { INT32_C( 578), INT32_C( 0), INT32_C( 382), INT32_C( 0) } }, { UINT8_C(157), { INT32_C( 1592012479), INT32_C( 673900103), INT32_C( 708788688), INT32_C( 243208940) }, { INT32_C( 3), INT32_C( 21), INT32_C( 9), INT32_C( 12) }, { INT32_C( 199001559), INT32_C( 0), INT32_C( 1384352), INT32_C( 59377) } }, { UINT8_C(130), { INT32_C( 264907305), -INT32_C( 1780420012), INT32_C( 1149307441), INT32_C( 40341385) }, { INT32_C( 28), INT32_C( 1), INT32_C( 19), INT32_C( 26) }, { INT32_C( 0), INT32_C( 1257273642), INT32_C( 0), INT32_C( 0) } }, { UINT8_C( 74), { INT32_C( 1738461812), -INT32_C( 1130875726), INT32_C( 1162206389), -INT32_C( 1631500220) }, { INT32_C( 31), INT32_C( 17), INT32_C( 29), INT32_C( 10) }, { INT32_C( 0), INT32_C( 24140), INT32_C( 0), INT32_C( 2601042) } }, { UINT8_C(118), { INT32_C( 455656935), INT32_C( 986768734), INT32_C( 1920865578), INT32_C( 458366422) }, { INT32_C( 29), INT32_C( 13), INT32_C( 19), INT32_C( 9) }, { INT32_C( 0), INT32_C( 120455), INT32_C( 3663), INT32_C( 0) } }, { UINT8_C(251), { INT32_C( 1515785077), -INT32_C( 242904100), INT32_C( 801699602), -INT32_C( 544414903) }, { INT32_C( 15), INT32_C( 20), INT32_C( 17), INT32_C( 26) }, { INT32_C( 46258), INT32_C( 3864), INT32_C( 0), INT32_C( 55) } }, { UINT8_C(251), { INT32_C( 1859611354), -INT32_C( 427767569), INT32_C( 1932570512), -INT32_C( 1402794181) }, { INT32_C( 28), INT32_C( 27), INT32_C( 21), INT32_C( 15) }, { INT32_C( 6), INT32_C( 28), INT32_C( 0), INT32_C( 88262) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_maskz_srlv_epi32(test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128i a = simde_test_x86_random_i32x4(); simde__m128i b = simde_test_x86_random_i32x4(); simde__m128i r; b = simde_mm_and_si128(b, simde_mm_set1_epi32(31)); if (i == 0 ) b = simde_mm_mask_blend_epi32(0xf, b, simde_mm_set_epi32(32, -1, 31, 0)); r = simde_mm_maskz_srlv_epi32(k, a, b); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_mask_srlv_epi64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int64_t src[2]; const simde__mmask8 k; const int64_t a[2]; const int64_t b[2]; const int64_t r[2]; } test_vec[] = { { { INT64_C( 4759340010941027350), -INT64_C( 1967368948826934569) }, UINT8_C(135), { -INT64_C( 172620600388341517), INT64_C( 4905822627423390417) }, { INT64_C( 0), INT64_C( 63) }, { -INT64_C( 172620600388341517), INT64_C( 0) } }, { { -INT64_C( 7378324697411089118), -INT64_C( 6275014858101014282) }, UINT8_C(221), { -INT64_C( 6208830575577618764), -INT64_C( 7642509024037409829) }, { -INT64_C( 1), INT64_C( 64) }, { INT64_C( 0), -INT64_C( 6275014858101014282) } }, { { -INT64_C( 2731880677689380111), -INT64_C( 6561577564133417261) }, UINT8_C( 36), { -INT64_C( 13313113293308851), INT64_C( 5878305957286381910) }, { INT64_C( 3), INT64_C( 17) }, { -INT64_C( 2731880677689380111), -INT64_C( 6561577564133417261) } }, { { -INT64_C( 2064500699318003313), -INT64_C( 1449006342675794060) }, UINT8_C( 24), { -INT64_C( 5967298555844504012), -INT64_C( 39392036025624429) }, { INT64_C( 40), INT64_C( 53) }, { -INT64_C( 2064500699318003313), -INT64_C( 1449006342675794060) } }, { { -INT64_C( 3672193763761957432), INT64_C( 8465966638073823320) }, UINT8_C(245), { INT64_C( 2985812952140843018), -INT64_C( 684730067239600956) }, { INT64_C( 41), INT64_C( 48) }, { INT64_C( 1357790), INT64_C( 8465966638073823320) } }, { { INT64_C( 7758276810793609841), INT64_C( 1442174611476369771) }, UINT8_C(126), { INT64_C( 4458855818550558004), INT64_C( 8058418049905768572) }, { INT64_C( 1), INT64_C( 36) }, { INT64_C( 7758276810793609841), INT64_C( 117265416) } }, { { -INT64_C( 5995853301519603762), INT64_C( 441605950480122924) }, UINT8_C( 60), { -INT64_C( 334657449128839626), -INT64_C( 8220608201982697206) }, { INT64_C( 26), INT64_C( 59) }, { -INT64_C( 5995853301519603762), INT64_C( 441605950480122924) } }, { { -INT64_C( 930603654640510393), -INT64_C( 2276296078673429131) }, UINT8_C(111), { -INT64_C( 7851191246785515995), -INT64_C( 1992034650794595198) }, { INT64_C( 24), INT64_C( 14) }, { INT64_C( 631544162447), INT64_C( 1004315760675961) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i src = simde_x_mm_loadu_epi64(test_vec[i].src); simde__m128i a = simde_x_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_mask_srlv_epi64(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i64x2(r, simde_x_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i src = simde_test_x86_random_i64x2(); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128i a = simde_test_x86_random_i64x2(); simde__m128i b = simde_test_x86_random_i64x2(); simde__m128i r; b = simde_mm_and_si128(b, simde_mm_set1_epi64x(63)); switch (i) { case 0: b = simde_mm_set_epi64x(63, 0); break; case 1: b = simde_mm_set_epi64x(64, -1); break; } r = simde_mm_mask_srlv_epi64(src, k, a, b); simde_test_x86_write_i64x2(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_maskz_srlv_epi64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask8 k; const int64_t a[2]; const int64_t b[2]; const int64_t r[2]; } test_vec[] = { { UINT8_C(197), { INT64_C( 320422378828511374), INT64_C( 4719663389677011455) }, { INT64_C( 0), INT64_C( 63) }, { INT64_C( 320422378828511374), INT64_C( 0) } }, { UINT8_C(119), { -INT64_C( 936202600172470364), INT64_C( 6540085766919084813) }, { -INT64_C( 1), INT64_C( 64) }, { INT64_C( 0), INT64_C( 0) } }, { UINT8_C( 32), { INT64_C( 1229418059044893969), -INT64_C( 447609553217144687) }, { INT64_C( 34), INT64_C( 30) }, { INT64_C( 0), INT64_C( 0) } }, { UINT8_C(182), { -INT64_C( 5041500496302640207), -INT64_C( 4624863187752802711) }, { INT64_C( 31), INT64_C( 59) }, { INT64_C( 0), INT64_C( 23) } }, { UINT8_C(123), { INT64_C( 4253148009518173342), -INT64_C( 649527669600668307) }, { INT64_C( 22), INT64_C( 63) }, { INT64_C( 1014029505137), INT64_C( 1) } }, { UINT8_C( 57), { -INT64_C( 6745255973997353025), -INT64_C( 2092873116858973773) }, { INT64_C( 16), INT64_C( 23) }, { INT64_C( 178550538630862), INT64_C( 0) } }, { UINT8_C( 76), { -INT64_C( 7221421285141039604), -INT64_C( 3372215517009287676) }, { INT64_C( 10), INT64_C( 16) }, { INT64_C( 0), INT64_C( 0) } }, { UINT8_C( 26), { INT64_C( 3790711809845331259), -INT64_C( 2562583989125759511) }, { INT64_C( 37), INT64_C( 7) }, { INT64_C( 0), INT64_C( 124095000660810875) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_maskz_srlv_epi64(test_vec[i].k, a, b); simde_test_x86_assert_equal_i64x2(r, simde_x_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128i a = simde_test_x86_random_i64x2(); simde__m128i b = simde_test_x86_random_i64x2(); simde__m128i r; b = simde_mm_and_si128(b, simde_mm_set1_epi64x(63)); switch (i) { case 0: b = simde_mm_set_epi64x(63, 0); break; case 1: b = simde_mm_set_epi64x(64, -1); break; } r = simde_mm_maskz_srlv_epi64(k, a, b); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_srlv_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[16]; const int16_t b[16]; const int16_t r[16]; } test_vec[] = { { { -INT16_C( 30765), -INT16_C( 4649), INT16_C( 8331), -INT16_C( 2502), INT16_C( 27582), INT16_C( 3401), -INT16_C( 22990), INT16_C( 13442), -INT16_C( 1803), INT16_C( 4907), -INT16_C( 2305), INT16_C( 1438), -INT16_C( 6736), -INT16_C( 12403), -INT16_C( 20039), -INT16_C( 29497) }, { INT16_C( 0), INT16_C( 31), -INT16_C( 1), INT16_C( 32), INT16_C( 30), INT16_C( 11), INT16_C( 9), INT16_C( 4), INT16_C( 5), INT16_C( 17), INT16_C( 6), INT16_C( 10), INT16_C( 21), INT16_C( 6), INT16_C( 8), INT16_C( 26) }, { -INT16_C( 30765), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 83), INT16_C( 840), INT16_C( 1991), INT16_C( 0), INT16_C( 987), INT16_C( 1), INT16_C( 0), INT16_C( 830), INT16_C( 177), INT16_C( 0) } }, { { -INT16_C( 3188), INT16_C( 19269), -INT16_C( 90), -INT16_C( 14904), INT16_C( 21250), -INT16_C( 21739), -INT16_C( 26015), INT16_C( 26185), -INT16_C( 1462), -INT16_C( 3989), INT16_C( 30025), INT16_C( 32326), INT16_C( 27660), INT16_C( 21868), -INT16_C( 6566), -INT16_C( 6442) }, { INT16_C( 25), INT16_C( 17), INT16_C( 26), INT16_C( 5), INT16_C( 13), INT16_C( 7), INT16_C( 20), INT16_C( 20), INT16_C( 10), INT16_C( 14), INT16_C( 21), INT16_C( 18), INT16_C( 1), INT16_C( 23), INT16_C( 5), INT16_C( 1) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 1582), INT16_C( 2), INT16_C( 342), INT16_C( 0), INT16_C( 0), INT16_C( 62), INT16_C( 3), INT16_C( 0), INT16_C( 0), INT16_C( 13830), INT16_C( 0), INT16_C( 1842), INT16_C( 29547) } }, { { INT16_C( 21064), INT16_C( 25214), -INT16_C( 15540), -INT16_C( 26242), INT16_C( 17694), INT16_C( 4679), INT16_C( 23381), INT16_C( 24401), INT16_C( 32731), -INT16_C( 11854), -INT16_C( 31500), -INT16_C( 10541), INT16_C( 10947), -INT16_C( 6127), INT16_C( 13143), -INT16_C( 24602) }, { INT16_C( 5), INT16_C( 1), INT16_C( 8), INT16_C( 10), INT16_C( 4), INT16_C( 25), INT16_C( 13), INT16_C( 24), INT16_C( 9), INT16_C( 25), INT16_C( 15), INT16_C( 20), INT16_C( 22), INT16_C( 26), INT16_C( 24), INT16_C( 12) }, { INT16_C( 658), INT16_C( 12607), INT16_C( 195), INT16_C( 38), INT16_C( 1105), INT16_C( 0), INT16_C( 2), INT16_C( 0), INT16_C( 63), INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 9) } }, { { -INT16_C( 21083), -INT16_C( 12657), -INT16_C( 1492), -INT16_C( 4076), INT16_C( 28075), -INT16_C( 18423), -INT16_C( 32489), INT16_C( 16801), INT16_C( 23212), INT16_C( 23391), INT16_C( 21479), -INT16_C( 25139), INT16_C( 10072), -INT16_C( 28245), INT16_C( 22375), INT16_C( 3407) }, { INT16_C( 5), INT16_C( 27), INT16_C( 24), INT16_C( 2), INT16_C( 29), INT16_C( 28), INT16_C( 13), INT16_C( 21), INT16_C( 24), INT16_C( 20), INT16_C( 7), INT16_C( 28), INT16_C( 8), INT16_C( 17), INT16_C( 31), INT16_C( 28) }, { INT16_C( 1389), INT16_C( 0), INT16_C( 0), INT16_C( 15365), INT16_C( 0), INT16_C( 0), INT16_C( 4), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 167), INT16_C( 0), INT16_C( 39), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 2178), INT16_C( 22517), INT16_C( 6119), INT16_C( 17627), INT16_C( 5955), -INT16_C( 3912), INT16_C( 28405), INT16_C( 11593), -INT16_C( 638), -INT16_C( 5556), INT16_C( 2174), INT16_C( 9898), -INT16_C( 1168), INT16_C( 12085), INT16_C( 21147), INT16_C( 6643) }, { INT16_C( 9), INT16_C( 16), INT16_C( 0), INT16_C( 20), INT16_C( 3), INT16_C( 19), INT16_C( 27), INT16_C( 5), INT16_C( 25), INT16_C( 7), INT16_C( 25), INT16_C( 29), INT16_C( 12), INT16_C( 24), INT16_C( 4), INT16_C( 1) }, { INT16_C( 123), INT16_C( 0), INT16_C( 6119), INT16_C( 0), INT16_C( 744), INT16_C( 0), INT16_C( 0), INT16_C( 362), INT16_C( 0), INT16_C( 468), INT16_C( 0), INT16_C( 0), INT16_C( 15), INT16_C( 0), INT16_C( 1321), INT16_C( 3321) } }, { { -INT16_C( 11948), INT16_C( 21534), -INT16_C( 27875), -INT16_C( 32617), -INT16_C( 13632), INT16_C( 23512), INT16_C( 23878), -INT16_C( 16520), -INT16_C( 32722), INT16_C( 1974), -INT16_C( 11471), -INT16_C( 8623), -INT16_C( 14043), -INT16_C( 13787), -INT16_C( 31179), -INT16_C( 30280) }, { INT16_C( 24), INT16_C( 29), INT16_C( 9), INT16_C( 21), INT16_C( 30), INT16_C( 4), INT16_C( 10), INT16_C( 3), INT16_C( 29), INT16_C( 31), INT16_C( 12), INT16_C( 12), INT16_C( 26), INT16_C( 27), INT16_C( 24), INT16_C( 24) }, { INT16_C( 0), INT16_C( 0), INT16_C( 73), INT16_C( 0), INT16_C( 0), INT16_C( 1469), INT16_C( 23), INT16_C( 6127), INT16_C( 0), INT16_C( 0), INT16_C( 13), INT16_C( 13), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 5450), -INT16_C( 19707), -INT16_C( 1399), -INT16_C( 14371), INT16_C( 25031), -INT16_C( 3765), -INT16_C( 29090), -INT16_C( 9399), -INT16_C( 22137), INT16_C( 21386), INT16_C( 5721), -INT16_C( 11452), INT16_C( 200), INT16_C( 386), -INT16_C( 17805), -INT16_C( 17007) }, { INT16_C( 15), INT16_C( 17), INT16_C( 17), INT16_C( 31), INT16_C( 15), INT16_C( 10), INT16_C( 24), INT16_C( 9), INT16_C( 29), INT16_C( 18), INT16_C( 10), INT16_C( 10), INT16_C( 23), INT16_C( 19), INT16_C( 7), INT16_C( 8) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 60), INT16_C( 0), INT16_C( 109), INT16_C( 0), INT16_C( 0), INT16_C( 5), INT16_C( 52), INT16_C( 0), INT16_C( 0), INT16_C( 372), INT16_C( 189) } }, { { -INT16_C( 18052), INT16_C( 3535), -INT16_C( 4601), -INT16_C( 18842), -INT16_C( 20135), INT16_C( 20932), -INT16_C( 20923), -INT16_C( 32047), -INT16_C( 23775), -INT16_C( 21736), -INT16_C( 32070), -INT16_C( 11778), INT16_C( 20847), INT16_C( 5724), -INT16_C( 23498), -INT16_C( 19828) }, { INT16_C( 29), INT16_C( 0), INT16_C( 10), INT16_C( 26), INT16_C( 23), INT16_C( 20), INT16_C( 13), INT16_C( 30), INT16_C( 9), INT16_C( 26), INT16_C( 25), INT16_C( 21), INT16_C( 9), INT16_C( 30), INT16_C( 21), INT16_C( 18) }, { INT16_C( 0), INT16_C( 3535), INT16_C( 59), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 5), INT16_C( 0), INT16_C( 81), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 40), INT16_C( 0), INT16_C( 0), INT16_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi16(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi16(test_vec[i].b); simde__m256i r = simde_mm256_srlv_epi16(a, b); simde_test_x86_assert_equal_i16x16(r, simde_x_mm256_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i r, a = simde_test_x86_random_i16x16(), b = simde_test_x86_random_i16x16(); b = simde_mm256_and_si256(b, simde_mm256_set1_epi16(31)); if (i == 0 ) b = simde_mm256_mask_blend_epi16(0xf, b, simde_mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, -1, 31, 0)); r = simde_mm256_srlv_epi16(a, b); simde_test_x86_write_i16x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_srlv_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[32]; const int16_t b[32]; const int16_t r[32]; } test_vec[] = { { { -INT16_C( 10059), -INT16_C( 29715), INT16_C( 14501), -INT16_C( 6071), -INT16_C( 7543), -INT16_C( 19762), -INT16_C( 5974), INT16_C( 26858), INT16_C( 17414), -INT16_C( 28813), INT16_C( 23223), -INT16_C( 29510), -INT16_C( 13013), -INT16_C( 17439), -INT16_C( 13407), INT16_C( 22246), -INT16_C( 11356), INT16_C( 18913), INT16_C( 10763), -INT16_C( 27599), -INT16_C( 243), -INT16_C( 18618), INT16_C( 12520), -INT16_C( 4576), -INT16_C( 27788), INT16_C( 11134), INT16_C( 14573), INT16_C( 6584), -INT16_C( 26363), -INT16_C( 22828), -INT16_C( 17820), INT16_C( 2300) }, { INT16_C( 0), INT16_C( 31), -INT16_C( 1), INT16_C( 32), INT16_C( 3), INT16_C( 12), INT16_C( 4), INT16_C( 25), INT16_C( 0), INT16_C( 4), INT16_C( 15), INT16_C( 6), INT16_C( 21), INT16_C( 26), INT16_C( 20), INT16_C( 2), INT16_C( 20), INT16_C( 27), INT16_C( 24), INT16_C( 17), INT16_C( 28), INT16_C( 6), INT16_C( 10), INT16_C( 26), INT16_C( 23), INT16_C( 23), INT16_C( 27), INT16_C( 27), INT16_C( 24), INT16_C( 10), INT16_C( 12), INT16_C( 15) }, { -INT16_C( 10059), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 7249), INT16_C( 11), INT16_C( 3722), INT16_C( 0), INT16_C( 17414), INT16_C( 2295), INT16_C( 0), INT16_C( 562), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 5561), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 733), INT16_C( 12), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 41), INT16_C( 11), INT16_C( 0) } }, { { INT16_C( 10945), -INT16_C( 26180), INT16_C( 28050), INT16_C( 28660), -INT16_C( 17686), INT16_C( 21744), -INT16_C( 29990), -INT16_C( 11970), -INT16_C( 27032), INT16_C( 17367), -INT16_C( 3468), -INT16_C( 21324), INT16_C( 7880), INT16_C( 21753), -INT16_C( 6005), INT16_C( 19828), INT16_C( 12306), -INT16_C( 23066), -INT16_C( 9315), -INT16_C( 30956), INT16_C( 1173), INT16_C( 28636), INT16_C( 6798), -INT16_C( 2496), INT16_C( 6320), INT16_C( 9274), -INT16_C( 4598), -INT16_C( 11567), -INT16_C( 13812), -INT16_C( 26586), -INT16_C( 25933), -INT16_C( 14875) }, { INT16_C( 10), INT16_C( 10), INT16_C( 6), INT16_C( 15), INT16_C( 2), INT16_C( 11), INT16_C( 5), INT16_C( 7), INT16_C( 4), INT16_C( 26), INT16_C( 15), INT16_C( 1), INT16_C( 22), INT16_C( 19), INT16_C( 2), INT16_C( 14), INT16_C( 4), INT16_C( 20), INT16_C( 23), INT16_C( 6), INT16_C( 14), INT16_C( 10), INT16_C( 30), INT16_C( 9), INT16_C( 18), INT16_C( 16), INT16_C( 15), INT16_C( 29), INT16_C( 25), INT16_C( 14), INT16_C( 9), INT16_C( 7) }, { INT16_C( 10), INT16_C( 38), INT16_C( 438), INT16_C( 0), INT16_C( 11962), INT16_C( 10), INT16_C( 1110), INT16_C( 418), INT16_C( 2406), INT16_C( 0), INT16_C( 1), INT16_C( 22106), INT16_C( 0), INT16_C( 0), INT16_C( 14882), INT16_C( 1), INT16_C( 769), INT16_C( 0), INT16_C( 0), INT16_C( 540), INT16_C( 0), INT16_C( 27), INT16_C( 0), INT16_C( 123), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 2), INT16_C( 77), INT16_C( 395) } }, { { -INT16_C( 9290), INT16_C( 27959), -INT16_C( 24930), INT16_C( 11431), -INT16_C( 3408), -INT16_C( 20832), -INT16_C( 22205), -INT16_C( 10576), -INT16_C( 16275), -INT16_C( 17001), -INT16_C( 27470), -INT16_C( 21662), INT16_C( 4453), -INT16_C( 4538), INT16_C( 19854), INT16_C( 17659), INT16_C( 12841), -INT16_C( 14415), INT16_C( 22992), -INT16_C( 32524), -INT16_C( 27573), -INT16_C( 29138), -INT16_C( 8643), -INT16_C( 21660), -INT16_C( 865), INT16_C( 20840), -INT16_C( 13680), -INT16_C( 2564), INT16_C( 17115), INT16_C( 27107), -INT16_C( 8561), -INT16_C( 18259) }, { INT16_C( 17), INT16_C( 0), INT16_C( 24), INT16_C( 2), INT16_C( 8), INT16_C( 17), INT16_C( 15), INT16_C( 16), INT16_C( 18), INT16_C( 31), INT16_C( 3), INT16_C( 24), INT16_C( 29), INT16_C( 8), INT16_C( 26), INT16_C( 5), INT16_C( 20), INT16_C( 12), INT16_C( 25), INT16_C( 15), INT16_C( 31), INT16_C( 6), INT16_C( 23), INT16_C( 28), INT16_C( 15), INT16_C( 11), INT16_C( 22), INT16_C( 17), INT16_C( 31), INT16_C( 31), INT16_C( 14), INT16_C( 4) }, { INT16_C( 0), INT16_C( 27959), INT16_C( 0), INT16_C( 2857), INT16_C( 242), INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 4758), INT16_C( 0), INT16_C( 0), INT16_C( 238), INT16_C( 0), INT16_C( 551), INT16_C( 0), INT16_C( 12), INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( 568), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 10), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 3), INT16_C( 2954) } }, { { -INT16_C( 20375), INT16_C( 17071), -INT16_C( 8385), INT16_C( 24099), INT16_C( 19104), INT16_C( 22508), -INT16_C( 30623), -INT16_C( 12288), INT16_C( 11139), -INT16_C( 9885), -INT16_C( 2865), -INT16_C( 12596), -INT16_C( 5139), -INT16_C( 1273), -INT16_C( 29712), INT16_C( 22910), INT16_C( 11835), INT16_C( 31388), -INT16_C( 16627), -INT16_C( 21032), -INT16_C( 15351), INT16_C( 27140), INT16_C( 1100), -INT16_C( 12485), -INT16_C( 25041), -INT16_C( 344), INT16_C( 29842), INT16_C( 32716), -INT16_C( 11424), INT16_C( 20602), -INT16_C( 1698), -INT16_C( 25943) }, { INT16_C( 7), INT16_C( 20), INT16_C( 5), INT16_C( 1), INT16_C( 17), INT16_C( 25), INT16_C( 9), INT16_C( 13), INT16_C( 18), INT16_C( 23), INT16_C( 10), INT16_C( 3), INT16_C( 23), INT16_C( 26), INT16_C( 22), INT16_C( 15), INT16_C( 9), INT16_C( 17), INT16_C( 17), INT16_C( 29), INT16_C( 23), INT16_C( 0), INT16_C( 10), INT16_C( 25), INT16_C( 4), INT16_C( 0), INT16_C( 19), INT16_C( 25), INT16_C( 0), INT16_C( 0), INT16_C( 23), INT16_C( 20) }, { INT16_C( 352), INT16_C( 0), INT16_C( 1785), INT16_C( 12049), INT16_C( 0), INT16_C( 0), INT16_C( 68), INT16_C( 6), INT16_C( 0), INT16_C( 0), INT16_C( 61), INT16_C( 6617), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 23), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 27140), INT16_C( 1), INT16_C( 0), INT16_C( 2530), -INT16_C( 344), INT16_C( 0), INT16_C( 0), -INT16_C( 11424), INT16_C( 20602), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 17971), -INT16_C( 6961), INT16_C( 27736), INT16_C( 20551), -INT16_C( 22654), INT16_C( 19504), INT16_C( 10709), INT16_C( 31080), INT16_C( 26681), INT16_C( 11528), -INT16_C( 7733), INT16_C( 3287), -INT16_C( 26796), INT16_C( 3107), INT16_C( 14087), INT16_C( 14925), INT16_C( 7293), -INT16_C( 10721), INT16_C( 26249), INT16_C( 2854), INT16_C( 22029), -INT16_C( 7336), -INT16_C( 16256), -INT16_C( 18084), INT16_C( 25641), -INT16_C( 2842), -INT16_C( 16827), -INT16_C( 26112), INT16_C( 9045), INT16_C( 23718), -INT16_C( 3237), -INT16_C( 10089) }, { INT16_C( 15), INT16_C( 14), INT16_C( 28), INT16_C( 4), INT16_C( 11), INT16_C( 12), INT16_C( 28), INT16_C( 4), INT16_C( 13), INT16_C( 26), INT16_C( 9), INT16_C( 13), INT16_C( 30), INT16_C( 27), INT16_C( 6), INT16_C( 17), INT16_C( 8), INT16_C( 14), INT16_C( 20), INT16_C( 13), INT16_C( 14), INT16_C( 10), INT16_C( 3), INT16_C( 16), INT16_C( 26), INT16_C( 3), INT16_C( 4), INT16_C( 1), INT16_C( 3), INT16_C( 27), INT16_C( 14), INT16_C( 31) }, { INT16_C( 0), INT16_C( 3), INT16_C( 0), INT16_C( 1284), INT16_C( 20), INT16_C( 4), INT16_C( 0), INT16_C( 1942), INT16_C( 3), INT16_C( 0), INT16_C( 112), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 220), INT16_C( 0), INT16_C( 28), INT16_C( 3), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 56), INT16_C( 6160), INT16_C( 0), INT16_C( 0), INT16_C( 7836), INT16_C( 3044), INT16_C( 19712), INT16_C( 1130), INT16_C( 0), INT16_C( 3), INT16_C( 0) } }, { { INT16_C( 11597), INT16_C( 474), INT16_C( 10431), INT16_C( 19937), INT16_C( 27522), INT16_C( 17815), -INT16_C( 14502), -INT16_C( 27435), INT16_C( 31185), -INT16_C( 18729), INT16_C( 31177), INT16_C( 28056), -INT16_C( 11051), -INT16_C( 31658), -INT16_C( 27327), -INT16_C( 29126), INT16_C( 5570), -INT16_C( 32369), INT16_C( 28733), -INT16_C( 16434), INT16_C( 26332), INT16_C( 13828), -INT16_C( 9939), -INT16_C( 53), -INT16_C( 23982), INT16_C( 7349), INT16_C( 19739), -INT16_C( 3703), -INT16_C( 8415), INT16_C( 25205), -INT16_C( 20619), INT16_C( 14320) }, { INT16_C( 4), INT16_C( 25), INT16_C( 16), INT16_C( 0), INT16_C( 13), INT16_C( 3), INT16_C( 30), INT16_C( 26), INT16_C( 16), INT16_C( 12), INT16_C( 28), INT16_C( 29), INT16_C( 21), INT16_C( 0), INT16_C( 1), INT16_C( 1), INT16_C( 17), INT16_C( 7), INT16_C( 2), INT16_C( 14), INT16_C( 12), INT16_C( 10), INT16_C( 31), INT16_C( 27), INT16_C( 19), INT16_C( 27), INT16_C( 29), INT16_C( 14), INT16_C( 10), INT16_C( 28), INT16_C( 31), INT16_C( 18) }, { INT16_C( 724), INT16_C( 0), INT16_C( 0), INT16_C( 19937), INT16_C( 3), INT16_C( 2226), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 11), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 31658), INT16_C( 19104), INT16_C( 18205), INT16_C( 0), INT16_C( 259), INT16_C( 7183), INT16_C( 2), INT16_C( 6), INT16_C( 13), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 3), INT16_C( 55), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 31096), -INT16_C( 9486), -INT16_C( 16223), -INT16_C( 29143), -INT16_C( 27503), INT16_C( 12312), -INT16_C( 27880), INT16_C( 27711), -INT16_C( 9445), INT16_C( 14556), -INT16_C( 29965), -INT16_C( 310), INT16_C( 18136), -INT16_C( 18262), -INT16_C( 17181), INT16_C( 23464), -INT16_C( 26059), -INT16_C( 10443), INT16_C( 24410), -INT16_C( 5275), INT16_C( 32243), INT16_C( 2843), INT16_C( 23313), INT16_C( 11383), INT16_C( 21302), INT16_C( 10596), INT16_C( 11997), -INT16_C( 18905), -INT16_C( 11916), INT16_C( 22382), INT16_C( 5773), -INT16_C( 15437) }, { INT16_C( 17), INT16_C( 26), INT16_C( 7), INT16_C( 23), INT16_C( 28), INT16_C( 6), INT16_C( 13), INT16_C( 25), INT16_C( 17), INT16_C( 13), INT16_C( 11), INT16_C( 4), INT16_C( 6), INT16_C( 23), INT16_C( 9), INT16_C( 22), INT16_C( 18), INT16_C( 5), INT16_C( 15), INT16_C( 20), INT16_C( 15), INT16_C( 25), INT16_C( 24), INT16_C( 0), INT16_C( 16), INT16_C( 23), INT16_C( 1), INT16_C( 27), INT16_C( 14), INT16_C( 27), INT16_C( 28), INT16_C( 17) }, { INT16_C( 0), INT16_C( 0), INT16_C( 385), INT16_C( 0), INT16_C( 0), INT16_C( 192), INT16_C( 4), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 17), INT16_C( 4076), INT16_C( 283), INT16_C( 0), INT16_C( 94), INT16_C( 0), INT16_C( 0), INT16_C( 1721), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 11383), INT16_C( 0), INT16_C( 0), INT16_C( 5998), INT16_C( 0), INT16_C( 3), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 10334), INT16_C( 20777), INT16_C( 23987), -INT16_C( 23939), INT16_C( 14296), INT16_C( 4351), -INT16_C( 86), INT16_C( 14937), -INT16_C( 28468), -INT16_C( 29418), -INT16_C( 19860), INT16_C( 23317), -INT16_C( 3996), -INT16_C( 7822), INT16_C( 25825), -INT16_C( 31984), INT16_C( 14651), -INT16_C( 4395), INT16_C( 21142), INT16_C( 28305), -INT16_C( 28535), INT16_C( 13182), -INT16_C( 10353), INT16_C( 23406), -INT16_C( 31640), -INT16_C( 11032), -INT16_C( 714), -INT16_C( 25809), -INT16_C( 23827), -INT16_C( 12420), -INT16_C( 29690), INT16_C( 16722) }, { INT16_C( 5), INT16_C( 15), INT16_C( 26), INT16_C( 10), INT16_C( 16), INT16_C( 23), INT16_C( 0), INT16_C( 26), INT16_C( 9), INT16_C( 28), INT16_C( 0), INT16_C( 27), INT16_C( 14), INT16_C( 29), INT16_C( 3), INT16_C( 21), INT16_C( 23), INT16_C( 3), INT16_C( 5), INT16_C( 20), INT16_C( 22), INT16_C( 21), INT16_C( 16), INT16_C( 30), INT16_C( 18), INT16_C( 26), INT16_C( 6), INT16_C( 1), INT16_C( 12), INT16_C( 8), INT16_C( 13), INT16_C( 23) }, { INT16_C( 1725), INT16_C( 0), INT16_C( 0), INT16_C( 40), INT16_C( 0), INT16_C( 0), -INT16_C( 86), INT16_C( 0), INT16_C( 72), INT16_C( 0), -INT16_C( 19860), INT16_C( 0), INT16_C( 3), INT16_C( 0), INT16_C( 3228), INT16_C( 0), INT16_C( 0), INT16_C( 7642), INT16_C( 660), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 1012), INT16_C( 19863), INT16_C( 10), INT16_C( 207), INT16_C( 4), INT16_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_srlv_epi16(a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i r, a = simde_test_x86_random_i16x32(), b = simde_test_x86_random_i16x32(); b = simde_mm512_and_si512(b, simde_mm512_set1_epi16(31)); if (i == 0 ) b = simde_mm512_mask_blend_epi16(0xf, b, simde_mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, -1, 31, 0)); r = simde_mm512_srlv_epi16(a, b); simde_test_x86_write_i16x32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_srlv_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int32_t a[16]; const int32_t b[16]; const int32_t r[16]; } test_vec[] = { { { -INT32_C( 2112154129), -INT32_C( 934216458), INT32_C( 84870349), -INT32_C( 1586062879), -INT32_C( 576350279), -INT32_C( 828904262), -INT32_C( 1116817424), INT32_C( 2111336333), INT32_C( 2063594628), -INT32_C( 1119727376), INT32_C( 918704724), -INT32_C( 1529398805), -INT32_C( 1971225392), INT32_C( 1498945641), INT32_C( 1544996559), -INT32_C( 1160122826) }, { INT32_C( 0), INT32_C( 31), -INT32_C( 1), INT32_C( 32), INT32_C( 7), INT32_C( 17), INT32_C( 6), INT32_C( 14), INT32_C( 15), INT32_C( 3), INT32_C( 7), INT32_C( 20), INT32_C( 19), INT32_C( 22), INT32_C( 21), INT32_C( 24) }, { -INT32_C( 2112154129), INT32_C( 1), INT32_C( 0), INT32_C( 0), INT32_C( 29051695), INT32_C( 26443), INT32_C( 49658591), INT32_C( 128865), INT32_C( 62975), INT32_C( 396904990), INT32_C( 7177380), INT32_C( 2637), INT32_C( 4432), INT32_C( 357), INT32_C( 736), INT32_C( 186) } }, { { INT32_C( 2111887322), INT32_C( 1391023851), INT32_C( 876939904), -INT32_C( 672098940), INT32_C( 1092296139), -INT32_C( 390258669), INT32_C( 1535931714), -INT32_C( 29106140), -INT32_C( 1552211017), -INT32_C( 84581254), -INT32_C( 215073937), -INT32_C( 1681252400), INT32_C( 1339942204), INT32_C( 1211603462), INT32_C( 883147536), INT32_C( 1530062499) }, { INT32_C( 10), INT32_C( 17), INT32_C( 14), INT32_C( 11), INT32_C( 3), INT32_C( 17), INT32_C( 17), INT32_C( 27), INT32_C( 20), INT32_C( 1), INT32_C( 20), INT32_C( 23), INT32_C( 31), INT32_C( 11), INT32_C( 10), INT32_C( 15) }, { INT32_C( 2062389), INT32_C( 10612), INT32_C( 53524), INT32_C( 1768978), INT32_C( 136537017), INT32_C( 29790), INT32_C( 11718), INT32_C( 31), INT32_C( 2615), INT32_C( 2105193021), INT32_C( 3890), INT32_C( 311), INT32_C( 0), INT32_C( 591603), INT32_C( 862448), INT32_C( 46693) } }, { { -INT32_C( 1861561264), -INT32_C( 1878929188), -INT32_C( 2112917494), -INT32_C( 132344200), -INT32_C( 544612236), INT32_C( 1604074261), -INT32_C( 744206781), -INT32_C( 1600655536), -INT32_C( 1607294524), INT32_C( 2083533681), INT32_C( 486424483), INT32_C( 1242831574), INT32_C( 254385658), INT32_C( 477021912), INT32_C( 1710166804), -INT32_C( 972716542) }, { INT32_C( 7), INT32_C( 11), INT32_C( 22), INT32_C( 13), INT32_C( 27), INT32_C( 5), INT32_C( 24), INT32_C( 5), INT32_C( 28), INT32_C( 30), INT32_C( 11), INT32_C( 30), INT32_C( 22), INT32_C( 28), INT32_C( 29), INT32_C( 25) }, { INT32_C( 19010984), INT32_C( 1179706), INT32_C( 520), INT32_C( 508132), INT32_C( 27), INT32_C( 50127320), INT32_C( 211), INT32_C( 84197242), INT32_C( 10), INT32_C( 1), INT32_C( 237511), INT32_C( 1), INT32_C( 60), INT32_C( 1), INT32_C( 3), INT32_C( 99) } }, { { -INT32_C( 1201909191), -INT32_C( 1760792213), INT32_C( 820972977), INT32_C( 428502787), -INT32_C( 2095736794), INT32_C( 328731254), INT32_C( 418736223), -INT32_C( 210943047), INT32_C( 28035477), -INT32_C( 325535942), -INT32_C( 937654332), INT32_C( 501327607), -INT32_C( 1264519618), INT32_C( 1623668737), INT32_C( 1517862048), -INT32_C( 1806834177) }, { INT32_C( 14), INT32_C( 15), INT32_C( 21), INT32_C( 22), INT32_C( 20), INT32_C( 2), INT32_C( 11), INT32_C( 18), INT32_C( 2), INT32_C( 2), INT32_C( 13), INT32_C( 31), INT32_C( 22), INT32_C( 16), INT32_C( 10), INT32_C( 3) }, { INT32_C( 188785), INT32_C( 77336), INT32_C( 391), INT32_C( 102), INT32_C( 2097), INT32_C( 82182813), INT32_C( 204461), INT32_C( 15579), INT32_C( 7008869), INT32_C( 992357838), INT32_C( 409828), INT32_C( 0), INT32_C( 722), INT32_C( 24775), INT32_C( 1482287), INT32_C( 311016639) } }, { { -INT32_C( 1126783910), INT32_C( 265542881), INT32_C( 2094241309), INT32_C( 435120930), -INT32_C( 38635636), INT32_C( 1585024628), INT32_C( 1780154119), INT32_C( 2142197285), -INT32_C( 600078853), INT32_C( 2146111073), -INT32_C( 956580188), -INT32_C( 1226839254), -INT32_C( 676097694), INT32_C( 641018911), INT32_C( 1368412204), -INT32_C( 1580187738) }, { INT32_C( 5), INT32_C( 25), INT32_C( 8), INT32_C( 12), INT32_C( 20), INT32_C( 8), INT32_C( 11), INT32_C( 10), INT32_C( 0), INT32_C( 26), INT32_C( 28), INT32_C( 29), INT32_C( 25), INT32_C( 23), INT32_C( 11), INT32_C( 17) }, { INT32_C( 99005730), INT32_C( 7), INT32_C( 8180630), INT32_C( 106230), INT32_C( 4059), INT32_C( 6191502), INT32_C( 869215), INT32_C( 2091989), -INT32_C( 600078853), INT32_C( 31), INT32_C( 12), INT32_C( 5), INT32_C( 107), INT32_C( 76), INT32_C( 668170), INT32_C( 20712) } }, { { -INT32_C( 756294824), -INT32_C( 1284177546), -INT32_C( 1736742501), INT32_C( 529150566), INT32_C( 650219439), -INT32_C( 622233649), -INT32_C( 470770286), INT32_C( 1014355428), INT32_C( 638476464), -INT32_C( 36076703), INT32_C( 1402295789), INT32_C( 913514375), INT32_C( 2120037551), INT32_C( 1029260971), -INT32_C( 954119709), -INT32_C( 838625762) }, { INT32_C( 22), INT32_C( 21), INT32_C( 3), INT32_C( 9), INT32_C( 28), INT32_C( 4), INT32_C( 25), INT32_C( 28), INT32_C( 2), INT32_C( 7), INT32_C( 8), INT32_C( 23), INT32_C( 20), INT32_C( 17), INT32_C( 6), INT32_C( 30) }, { INT32_C( 843), INT32_C( 1435), INT32_C( 319778099), INT32_C( 1033497), INT32_C( 2), INT32_C( 229545852), INT32_C( 113), INT32_C( 3), INT32_C( 159619116), INT32_C( 33272582), INT32_C( 5477717), INT32_C( 108), INT32_C( 2021), INT32_C( 7852), INT32_C( 52200743), INT32_C( 3) } }, { { INT32_C( 131636863), INT32_C( 78721372), -INT32_C( 659224992), -INT32_C( 1328799940), -INT32_C( 240588864), INT32_C( 1861945192), INT32_C( 1385299092), INT32_C( 1112786370), -INT32_C( 1085723805), -INT32_C( 1111229860), INT32_C( 1033206017), INT32_C( 1508794776), -INT32_C( 1337289144), INT32_C( 840910494), INT32_C( 92647490), -INT32_C( 649602955) }, { INT32_C( 3), INT32_C( 10), INT32_C( 21), INT32_C( 18), INT32_C( 13), INT32_C( 23), INT32_C( 26), INT32_C( 27), INT32_C( 21), INT32_C( 29), INT32_C( 11), INT32_C( 11), INT32_C( 23), INT32_C( 15), INT32_C( 5), INT32_C( 12) }, { INT32_C( 16454607), INT32_C( 76876), INT32_C( 1733), INT32_C( 11315), INT32_C( 494919), INT32_C( 221), INT32_C( 20), INT32_C( 8), INT32_C( 1530), INT32_C( 5), INT32_C( 504495), INT32_C( 736716), INT32_C( 352), INT32_C( 25662), INT32_C( 2895234), INT32_C( 889981) } }, { { -INT32_C( 2128487324), -INT32_C( 2123059754), INT32_C( 222286849), INT32_C( 845568250), -INT32_C( 859788675), INT32_C( 318105613), INT32_C( 1536389039), INT32_C( 1971097873), INT32_C( 1140301421), INT32_C( 1288006475), INT32_C( 962135103), -INT32_C( 630472867), INT32_C( 2007379050), -INT32_C( 1014392044), INT32_C( 471735306), INT32_C( 462527406) }, { INT32_C( 25), INT32_C( 19), INT32_C( 8), INT32_C( 9), INT32_C( 3), INT32_C( 1), INT32_C( 16), INT32_C( 21), INT32_C( 1), INT32_C( 29), INT32_C( 12), INT32_C( 11), INT32_C( 16), INT32_C( 8), INT32_C( 28), INT32_C( 29) }, { INT32_C( 64), INT32_C( 4142), INT32_C( 868308), INT32_C( 1651500), INT32_C( 429397327), INT32_C( 159052806), INT32_C( 23443), INT32_C( 939), INT32_C( 570150710), INT32_C( 2), INT32_C( 234896), INT32_C( 1789303), INT32_C( 30630), INT32_C( 12814747), INT32_C( 1), INT32_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_srlv_epi32(a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i r, a = simde_test_x86_random_i32x16(), b = simde_test_x86_random_i32x16(); b = simde_mm512_and_si512(b, simde_mm512_set1_epi32(31)); if (i == 0 ) b = simde_mm512_mask_blend_epi32(0xf, b, simde_mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, -1, 31, 0)); r = simde_mm512_srlv_epi32(a, b); simde_test_x86_write_i32x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_srlv_epi64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int64_t a[8]; const int64_t b[8]; const int64_t r[8]; } test_vec[] = { { { INT64_C( 6966218786012918026), -INT64_C( 5618205476508002450), -INT64_C( 1426445156367503757), -INT64_C( 1837648021358286045), -INT64_C( 2539650388535184819), -INT64_C( 1583686562416863375), -INT64_C( 8383106113674598388), INT64_C( 1181749340145411748) }, { INT64_C( 0), INT64_C( 63), -INT64_C( 1), INT64_C( 64), INT64_C( 8), INT64_C( 48), INT64_C( 12), INT64_C( 42) }, { INT64_C( 6966218786012918026), INT64_C( 1), INT64_C( 0), INT64_C( 0), INT64_C( 62137084707712370), INT64_C( 59909), INT64_C( 2456942861336658), INT64_C( 268698) } }, { { -INT64_C( 4046179603180281090), INT64_C( 3795197655086337113), INT64_C( 6970565303225370834), -INT64_C( 4283339106891513924), -INT64_C( 106883913000185689), INT64_C( 7543103411622860113), INT64_C( 4637673542611507141), -INT64_C( 5107767993907966574) }, { INT64_C( 32), INT64_C( 16), INT64_C( 29), INT64_C( 60), INT64_C( 8), INT64_C( 51), INT64_C( 35), INT64_C( 20) }, { INT64_C( 3352892694), INT64_C( 57910120469457), INT64_C( 12983689649), INT64_C( 12), INT64_C( 71640078752770960), INT64_C( 3349), INT64_C( 134974064), INT64_C( 12721038894464) } }, { { -INT64_C( 8993731813987568914), INT64_C( 4658525066509246244), INT64_C( 8976307826881951255), -INT64_C( 2729782890559505209), INT64_C( 4549462570720903694), INT64_C( 3056988749443738447), INT64_C( 5761990167718086418), INT64_C( 2555505904528996527) }, { INT64_C( 61), INT64_C( 15), INT64_C( 24), INT64_C( 34), INT64_C( 25), INT64_C( 62), INT64_C( 39), INT64_C( 31) }, { INT64_C( 4), INT64_C( 142166902664466), INT64_C( 535029639415), INT64_C( 914847547), INT64_C( 135584550223), INT64_C( 0), INT64_C( 10480999), INT64_C( 1190000169) } }, { { INT64_C( 3915167907981510217), -INT64_C( 6518895596913605547), INT64_C( 6982693785598471437), -INT64_C( 979970894321009915), INT64_C( 8243081679749162423), -INT64_C( 4019676266592108397), -INT64_C( 6104004055400373270), INT64_C( 4111781521073115507) }, { INT64_C( 32), INT64_C( 13), INT64_C( 23), INT64_C( 18), INT64_C( 3), INT64_C( 45), INT64_C( 10), INT64_C( 53) }, { INT64_C( 911571064), INT64_C( 1456036191015130), INT64_C( 832401965331), INT64_C( 66630451886705), INT64_C( 1030385209968645302), INT64_C( 410041), INT64_C( 12053457049130056), INT64_C( 456) } }, { { INT64_C( 6057686915148406591), -INT64_C( 7078250056236356866), INT64_C( 4598512266726445908), -INT64_C( 7639947378485785139), INT64_C( 2652382851266767955), -INT64_C( 3926188394952363054), INT64_C( 341291946128573683), INT64_C( 3601860306028357904) }, { INT64_C( 12), INT64_C( 46), INT64_C( 59), INT64_C( 39), INT64_C( 41), INT64_C( 6), INT64_C( 53), INT64_C( 33) }, { INT64_C( 1478927469518653), INT64_C( 161556), INT64_C( 7), INT64_C( 19657448), INT64_C( 1206164), INT64_C( 226883682480581071), INT64_C( 37), INT64_C( 419311726) } }, { { INT64_C( 55303212152202527), -INT64_C( 190921087765614432), INT64_C( 5384226706700903894), INT64_C( 2258363894737715711), -INT64_C( 5300351835966890634), -INT64_C( 7632388881543525681), -INT64_C( 4150345481680443020), -INT64_C( 7502623004535435325) }, { INT64_C( 49), INT64_C( 2), INT64_C( 61), INT64_C( 9), INT64_C( 8), INT64_C( 6), INT64_C( 3), INT64_C( 50) }, { INT64_C( 98), INT64_C( 4563955746485984296), INT64_C( 2), INT64_C( 4410866981909600), INT64_C( 51353094678682269), INT64_C( 168974299877594155), INT64_C( 1787049824003638574), INT64_C( 9720) } }, { { INT64_C( 5155634216359390984), -INT64_C( 5330768873949015095), INT64_C( 4899478041251538590), -INT64_C( 8932982782673421075), INT64_C( 4711637638825507354), -INT64_C( 2371656200242924692), INT64_C( 1489501970028344598), -INT64_C( 9024348993671742269) }, { INT64_C( 12), INT64_C( 52), INT64_C( 1), INT64_C( 4), INT64_C( 56), INT64_C( 32), INT64_C( 39), INT64_C( 34) }, { INT64_C( 1258699759853366), INT64_C( 2912), INT64_C( 2449739020625769295), INT64_C( 594610080689758158), INT64_C( 65), INT64_C( 3742773056), INT64_C( 2709388), INT64_C( 548455577) } }, { { INT64_C( 1861832143430731975), INT64_C( 48924248233315771), INT64_C( 5557999578332679780), INT64_C( 8107508098736352848), INT64_C( 7206212032826514031), INT64_C( 1045194286659328553), INT64_C( 1661107840780061844), -INT64_C( 1442935853270313505) }, { INT64_C( 3), INT64_C( 47), INT64_C( 17), INT64_C( 37), INT64_C( 6), INT64_C( 41), INT64_C( 28), INT64_C( 27) }, { INT64_C( 232729017928841496), INT64_C( 347), INT64_C( 42404171587621), INT64_C( 58989885), INT64_C( 112597063012914281), INT64_C( 475299), INT64_C( 6188108923), INT64_C( 126688243601) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_srlv_epi64(a, b); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i r, a = simde_test_x86_random_i64x8(), b = simde_test_x86_random_i64x8(); b = simde_mm512_and_si512(b, simde_mm512_set1_epi64(63)); if (i == 0 ) b = simde_mm512_mask_blend_epi64(0xf, b, simde_mm512_set_epi64(0, 0, 0, 0, 64, -1, 63, 0)); r = simde_mm512_srlv_epi64(a, b); simde_test_x86_write_i64x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm_srlv_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_srlv_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_srlv_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_srlv_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_srlv_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_srlv_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_srlv_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_srlv_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_srlv_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_srlv_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_srlv_epi64) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/store.c000066400000000000000000000236561400333146700165700ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #define SIMDE_TEST_X86_AVX512_INSN store #include #include static int test_simde_mm512_store_ps (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 397.85), SIMDE_FLOAT32_C( 280.50), SIMDE_FLOAT32_C( -482.10), SIMDE_FLOAT32_C( -764.38), SIMDE_FLOAT32_C( 375.26), SIMDE_FLOAT32_C( -613.57), SIMDE_FLOAT32_C( 56.03), SIMDE_FLOAT32_C( 417.16), SIMDE_FLOAT32_C( -424.36), SIMDE_FLOAT32_C( 64.48), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 101.24), SIMDE_FLOAT32_C( -965.83), SIMDE_FLOAT32_C( 916.49), SIMDE_FLOAT32_C( 799.09), SIMDE_FLOAT32_C( 628.08) }, { SIMDE_FLOAT32_C( 397.85), SIMDE_FLOAT32_C( 280.50), SIMDE_FLOAT32_C( -482.10), SIMDE_FLOAT32_C( -764.38), SIMDE_FLOAT32_C( 375.26), SIMDE_FLOAT32_C( -613.57), SIMDE_FLOAT32_C( 56.03), SIMDE_FLOAT32_C( 417.16), SIMDE_FLOAT32_C( -424.36), SIMDE_FLOAT32_C( 64.48), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 101.24), SIMDE_FLOAT32_C( -965.83), SIMDE_FLOAT32_C( 916.49), SIMDE_FLOAT32_C( 799.09), SIMDE_FLOAT32_C( 628.08) } }, { { SIMDE_FLOAT32_C( -588.70), SIMDE_FLOAT32_C( 688.61), SIMDE_FLOAT32_C( 202.01), SIMDE_FLOAT32_C( -610.64), SIMDE_FLOAT32_C( 838.07), SIMDE_FLOAT32_C( -733.40), SIMDE_FLOAT32_C( -127.00), SIMDE_FLOAT32_C( 993.35), SIMDE_FLOAT32_C( -249.66), SIMDE_FLOAT32_C( -45.23), SIMDE_FLOAT32_C( 849.71), SIMDE_FLOAT32_C( -85.52), SIMDE_FLOAT32_C( 193.59), SIMDE_FLOAT32_C( -257.46), SIMDE_FLOAT32_C( 827.23), SIMDE_FLOAT32_C( -408.56) }, { SIMDE_FLOAT32_C( -588.70), SIMDE_FLOAT32_C( 688.61), SIMDE_FLOAT32_C( 202.01), SIMDE_FLOAT32_C( -610.64), SIMDE_FLOAT32_C( 838.07), SIMDE_FLOAT32_C( -733.40), SIMDE_FLOAT32_C( -127.00), SIMDE_FLOAT32_C( 993.35), SIMDE_FLOAT32_C( -249.66), SIMDE_FLOAT32_C( -45.23), SIMDE_FLOAT32_C( 849.71), SIMDE_FLOAT32_C( -85.52), SIMDE_FLOAT32_C( 193.59), SIMDE_FLOAT32_C( -257.46), SIMDE_FLOAT32_C( 827.23), SIMDE_FLOAT32_C( -408.56) } }, { { SIMDE_FLOAT32_C( -976.96), SIMDE_FLOAT32_C( -654.87), SIMDE_FLOAT32_C( -172.94), SIMDE_FLOAT32_C( 398.29), SIMDE_FLOAT32_C( -268.45), SIMDE_FLOAT32_C( 883.09), SIMDE_FLOAT32_C( -184.55), SIMDE_FLOAT32_C( 307.20), SIMDE_FLOAT32_C( -52.43), SIMDE_FLOAT32_C( 816.29), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( -18.26), SIMDE_FLOAT32_C( 732.78), SIMDE_FLOAT32_C( -792.48), SIMDE_FLOAT32_C( -390.18), SIMDE_FLOAT32_C( -855.92) }, { SIMDE_FLOAT32_C( -976.96), SIMDE_FLOAT32_C( -654.87), SIMDE_FLOAT32_C( -172.94), SIMDE_FLOAT32_C( 398.29), SIMDE_FLOAT32_C( -268.45), SIMDE_FLOAT32_C( 883.09), SIMDE_FLOAT32_C( -184.55), SIMDE_FLOAT32_C( 307.20), SIMDE_FLOAT32_C( -52.43), SIMDE_FLOAT32_C( 816.29), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( -18.26), SIMDE_FLOAT32_C( 732.78), SIMDE_FLOAT32_C( -792.48), SIMDE_FLOAT32_C( -390.18), SIMDE_FLOAT32_C( -855.92) } }, { { SIMDE_FLOAT32_C( 896.13), SIMDE_FLOAT32_C( 811.83), SIMDE_FLOAT32_C( -466.56), SIMDE_FLOAT32_C( 734.20), SIMDE_FLOAT32_C( -921.57), SIMDE_FLOAT32_C( 406.44), SIMDE_FLOAT32_C( 727.55), SIMDE_FLOAT32_C( -171.23), SIMDE_FLOAT32_C( -638.79), SIMDE_FLOAT32_C( 577.26), SIMDE_FLOAT32_C( 743.25), SIMDE_FLOAT32_C( 554.80), SIMDE_FLOAT32_C( -680.21), SIMDE_FLOAT32_C( 570.48), SIMDE_FLOAT32_C( -853.75), SIMDE_FLOAT32_C( -657.17) }, { SIMDE_FLOAT32_C( 896.13), SIMDE_FLOAT32_C( 811.83), SIMDE_FLOAT32_C( -466.56), SIMDE_FLOAT32_C( 734.20), SIMDE_FLOAT32_C( -921.57), SIMDE_FLOAT32_C( 406.44), SIMDE_FLOAT32_C( 727.55), SIMDE_FLOAT32_C( -171.23), SIMDE_FLOAT32_C( -638.79), SIMDE_FLOAT32_C( 577.26), SIMDE_FLOAT32_C( 743.25), SIMDE_FLOAT32_C( 554.80), SIMDE_FLOAT32_C( -680.21), SIMDE_FLOAT32_C( 570.48), SIMDE_FLOAT32_C( -853.75), SIMDE_FLOAT32_C( -657.17) } }, { { SIMDE_FLOAT32_C( 915.61), SIMDE_FLOAT32_C( -26.70), SIMDE_FLOAT32_C( 741.12), SIMDE_FLOAT32_C( -352.84), SIMDE_FLOAT32_C( -143.61), SIMDE_FLOAT32_C( -443.43), SIMDE_FLOAT32_C( 954.36), SIMDE_FLOAT32_C( 803.96), SIMDE_FLOAT32_C( -627.14), SIMDE_FLOAT32_C( -637.21), SIMDE_FLOAT32_C( -214.30), SIMDE_FLOAT32_C( -894.36), SIMDE_FLOAT32_C( -429.68), SIMDE_FLOAT32_C( 395.52), SIMDE_FLOAT32_C( -750.28), SIMDE_FLOAT32_C( -533.55) }, { SIMDE_FLOAT32_C( 915.61), SIMDE_FLOAT32_C( -26.70), SIMDE_FLOAT32_C( 741.12), SIMDE_FLOAT32_C( -352.84), SIMDE_FLOAT32_C( -143.61), SIMDE_FLOAT32_C( -443.43), SIMDE_FLOAT32_C( 954.36), SIMDE_FLOAT32_C( 803.96), SIMDE_FLOAT32_C( -627.14), SIMDE_FLOAT32_C( -637.21), SIMDE_FLOAT32_C( -214.30), SIMDE_FLOAT32_C( -894.36), SIMDE_FLOAT32_C( -429.68), SIMDE_FLOAT32_C( 395.52), SIMDE_FLOAT32_C( -750.28), SIMDE_FLOAT32_C( -533.55) } }, { { SIMDE_FLOAT32_C( 207.35), SIMDE_FLOAT32_C( -216.84), SIMDE_FLOAT32_C( -799.36), SIMDE_FLOAT32_C( 285.78), SIMDE_FLOAT32_C( -810.40), SIMDE_FLOAT32_C( 928.19), SIMDE_FLOAT32_C( -885.45), SIMDE_FLOAT32_C( -449.19), SIMDE_FLOAT32_C( 505.45), SIMDE_FLOAT32_C( 857.81), SIMDE_FLOAT32_C( -894.39), SIMDE_FLOAT32_C( 825.24), SIMDE_FLOAT32_C( 428.29), SIMDE_FLOAT32_C( -748.14), SIMDE_FLOAT32_C( -831.93), SIMDE_FLOAT32_C( 343.89) }, { SIMDE_FLOAT32_C( 207.35), SIMDE_FLOAT32_C( -216.84), SIMDE_FLOAT32_C( -799.36), SIMDE_FLOAT32_C( 285.78), SIMDE_FLOAT32_C( -810.40), SIMDE_FLOAT32_C( 928.19), SIMDE_FLOAT32_C( -885.45), SIMDE_FLOAT32_C( -449.19), SIMDE_FLOAT32_C( 505.45), SIMDE_FLOAT32_C( 857.81), SIMDE_FLOAT32_C( -894.39), SIMDE_FLOAT32_C( 825.24), SIMDE_FLOAT32_C( 428.29), SIMDE_FLOAT32_C( -748.14), SIMDE_FLOAT32_C( -831.93), SIMDE_FLOAT32_C( 343.89) } }, { { SIMDE_FLOAT32_C( 225.16), SIMDE_FLOAT32_C( 909.19), SIMDE_FLOAT32_C( 991.05), SIMDE_FLOAT32_C( -918.45), SIMDE_FLOAT32_C( -534.23), SIMDE_FLOAT32_C( 945.41), SIMDE_FLOAT32_C( 885.51), SIMDE_FLOAT32_C( -161.37), SIMDE_FLOAT32_C( -691.80), SIMDE_FLOAT32_C( -328.80), SIMDE_FLOAT32_C( -55.73), SIMDE_FLOAT32_C( -121.48), SIMDE_FLOAT32_C( -933.28), SIMDE_FLOAT32_C( 193.99), SIMDE_FLOAT32_C( 344.96), SIMDE_FLOAT32_C( 274.08) }, { SIMDE_FLOAT32_C( 225.16), SIMDE_FLOAT32_C( 909.19), SIMDE_FLOAT32_C( 991.05), SIMDE_FLOAT32_C( -918.45), SIMDE_FLOAT32_C( -534.23), SIMDE_FLOAT32_C( 945.41), SIMDE_FLOAT32_C( 885.51), SIMDE_FLOAT32_C( -161.37), SIMDE_FLOAT32_C( -691.80), SIMDE_FLOAT32_C( -328.80), SIMDE_FLOAT32_C( -55.73), SIMDE_FLOAT32_C( -121.48), SIMDE_FLOAT32_C( -933.28), SIMDE_FLOAT32_C( 193.99), SIMDE_FLOAT32_C( 344.96), SIMDE_FLOAT32_C( 274.08) } }, { { SIMDE_FLOAT32_C( 977.14), SIMDE_FLOAT32_C( 545.61), SIMDE_FLOAT32_C( -440.14), SIMDE_FLOAT32_C( -833.26), SIMDE_FLOAT32_C( 473.80), SIMDE_FLOAT32_C( -325.59), SIMDE_FLOAT32_C( -282.45), SIMDE_FLOAT32_C( -20.75), SIMDE_FLOAT32_C( -467.78), SIMDE_FLOAT32_C( -176.84), SIMDE_FLOAT32_C( -195.51), SIMDE_FLOAT32_C( 960.51), SIMDE_FLOAT32_C( 75.02), SIMDE_FLOAT32_C( -27.44), SIMDE_FLOAT32_C( 304.40), SIMDE_FLOAT32_C( -699.82) }, { SIMDE_FLOAT32_C( 977.14), SIMDE_FLOAT32_C( 545.61), SIMDE_FLOAT32_C( -440.14), SIMDE_FLOAT32_C( -833.26), SIMDE_FLOAT32_C( 473.80), SIMDE_FLOAT32_C( -325.59), SIMDE_FLOAT32_C( -282.45), SIMDE_FLOAT32_C( -20.75), SIMDE_FLOAT32_C( -467.78), SIMDE_FLOAT32_C( -176.84), SIMDE_FLOAT32_C( -195.51), SIMDE_FLOAT32_C( 960.51), SIMDE_FLOAT32_C( 75.02), SIMDE_FLOAT32_C( -27.44), SIMDE_FLOAT32_C( 304.40), SIMDE_FLOAT32_C( -699.82) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); SIMDE_ALIGN_LIKE_64(simde__m512) simde_float32 r[sizeof(simde__m512) / sizeof(simde_float32)]; simde_mm512_store_ps(r, a); simde_assert_equal_vf32(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512 a = simde_test_x86_random_f32x16(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde_float32 r[sizeof(simde__m512) / sizeof(simde_float32)]; simde_mm512_store_ps(r, a); simde_test_x86_write_f32x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_vf32(2, sizeof(r) / sizeof(r[0]), r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_store_ps) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/storeu.c000066400000000000000000000236231400333146700167470ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #define SIMDE_TEST_X86_AVX512_INSN storeu #include #include static int test_simde_mm512_storeu_ps (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 397.85), SIMDE_FLOAT32_C( 280.50), SIMDE_FLOAT32_C( -482.10), SIMDE_FLOAT32_C( -764.38), SIMDE_FLOAT32_C( 375.26), SIMDE_FLOAT32_C( -613.57), SIMDE_FLOAT32_C( 56.03), SIMDE_FLOAT32_C( 417.16), SIMDE_FLOAT32_C( -424.36), SIMDE_FLOAT32_C( 64.48), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 101.24), SIMDE_FLOAT32_C( -965.83), SIMDE_FLOAT32_C( 916.49), SIMDE_FLOAT32_C( 799.09), SIMDE_FLOAT32_C( 628.08) }, { SIMDE_FLOAT32_C( 397.85), SIMDE_FLOAT32_C( 280.50), SIMDE_FLOAT32_C( -482.10), SIMDE_FLOAT32_C( -764.38), SIMDE_FLOAT32_C( 375.26), SIMDE_FLOAT32_C( -613.57), SIMDE_FLOAT32_C( 56.03), SIMDE_FLOAT32_C( 417.16), SIMDE_FLOAT32_C( -424.36), SIMDE_FLOAT32_C( 64.48), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 101.24), SIMDE_FLOAT32_C( -965.83), SIMDE_FLOAT32_C( 916.49), SIMDE_FLOAT32_C( 799.09), SIMDE_FLOAT32_C( 628.08) } }, { { SIMDE_FLOAT32_C( -588.70), SIMDE_FLOAT32_C( 688.61), SIMDE_FLOAT32_C( 202.01), SIMDE_FLOAT32_C( -610.64), SIMDE_FLOAT32_C( 838.07), SIMDE_FLOAT32_C( -733.40), SIMDE_FLOAT32_C( -127.00), SIMDE_FLOAT32_C( 993.35), SIMDE_FLOAT32_C( -249.66), SIMDE_FLOAT32_C( -45.23), SIMDE_FLOAT32_C( 849.71), SIMDE_FLOAT32_C( -85.52), SIMDE_FLOAT32_C( 193.59), SIMDE_FLOAT32_C( -257.46), SIMDE_FLOAT32_C( 827.23), SIMDE_FLOAT32_C( -408.56) }, { SIMDE_FLOAT32_C( -588.70), SIMDE_FLOAT32_C( 688.61), SIMDE_FLOAT32_C( 202.01), SIMDE_FLOAT32_C( -610.64), SIMDE_FLOAT32_C( 838.07), SIMDE_FLOAT32_C( -733.40), SIMDE_FLOAT32_C( -127.00), SIMDE_FLOAT32_C( 993.35), SIMDE_FLOAT32_C( -249.66), SIMDE_FLOAT32_C( -45.23), SIMDE_FLOAT32_C( 849.71), SIMDE_FLOAT32_C( -85.52), SIMDE_FLOAT32_C( 193.59), SIMDE_FLOAT32_C( -257.46), SIMDE_FLOAT32_C( 827.23), SIMDE_FLOAT32_C( -408.56) } }, { { SIMDE_FLOAT32_C( -976.96), SIMDE_FLOAT32_C( -654.87), SIMDE_FLOAT32_C( -172.94), SIMDE_FLOAT32_C( 398.29), SIMDE_FLOAT32_C( -268.45), SIMDE_FLOAT32_C( 883.09), SIMDE_FLOAT32_C( -184.55), SIMDE_FLOAT32_C( 307.20), SIMDE_FLOAT32_C( -52.43), SIMDE_FLOAT32_C( 816.29), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( -18.26), SIMDE_FLOAT32_C( 732.78), SIMDE_FLOAT32_C( -792.48), SIMDE_FLOAT32_C( -390.18), SIMDE_FLOAT32_C( -855.92) }, { SIMDE_FLOAT32_C( -976.96), SIMDE_FLOAT32_C( -654.87), SIMDE_FLOAT32_C( -172.94), SIMDE_FLOAT32_C( 398.29), SIMDE_FLOAT32_C( -268.45), SIMDE_FLOAT32_C( 883.09), SIMDE_FLOAT32_C( -184.55), SIMDE_FLOAT32_C( 307.20), SIMDE_FLOAT32_C( -52.43), SIMDE_FLOAT32_C( 816.29), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( -18.26), SIMDE_FLOAT32_C( 732.78), SIMDE_FLOAT32_C( -792.48), SIMDE_FLOAT32_C( -390.18), SIMDE_FLOAT32_C( -855.92) } }, { { SIMDE_FLOAT32_C( 896.13), SIMDE_FLOAT32_C( 811.83), SIMDE_FLOAT32_C( -466.56), SIMDE_FLOAT32_C( 734.20), SIMDE_FLOAT32_C( -921.57), SIMDE_FLOAT32_C( 406.44), SIMDE_FLOAT32_C( 727.55), SIMDE_FLOAT32_C( -171.23), SIMDE_FLOAT32_C( -638.79), SIMDE_FLOAT32_C( 577.26), SIMDE_FLOAT32_C( 743.25), SIMDE_FLOAT32_C( 554.80), SIMDE_FLOAT32_C( -680.21), SIMDE_FLOAT32_C( 570.48), SIMDE_FLOAT32_C( -853.75), SIMDE_FLOAT32_C( -657.17) }, { SIMDE_FLOAT32_C( 896.13), SIMDE_FLOAT32_C( 811.83), SIMDE_FLOAT32_C( -466.56), SIMDE_FLOAT32_C( 734.20), SIMDE_FLOAT32_C( -921.57), SIMDE_FLOAT32_C( 406.44), SIMDE_FLOAT32_C( 727.55), SIMDE_FLOAT32_C( -171.23), SIMDE_FLOAT32_C( -638.79), SIMDE_FLOAT32_C( 577.26), SIMDE_FLOAT32_C( 743.25), SIMDE_FLOAT32_C( 554.80), SIMDE_FLOAT32_C( -680.21), SIMDE_FLOAT32_C( 570.48), SIMDE_FLOAT32_C( -853.75), SIMDE_FLOAT32_C( -657.17) } }, { { SIMDE_FLOAT32_C( 915.61), SIMDE_FLOAT32_C( -26.70), SIMDE_FLOAT32_C( 741.12), SIMDE_FLOAT32_C( -352.84), SIMDE_FLOAT32_C( -143.61), SIMDE_FLOAT32_C( -443.43), SIMDE_FLOAT32_C( 954.36), SIMDE_FLOAT32_C( 803.96), SIMDE_FLOAT32_C( -627.14), SIMDE_FLOAT32_C( -637.21), SIMDE_FLOAT32_C( -214.30), SIMDE_FLOAT32_C( -894.36), SIMDE_FLOAT32_C( -429.68), SIMDE_FLOAT32_C( 395.52), SIMDE_FLOAT32_C( -750.28), SIMDE_FLOAT32_C( -533.55) }, { SIMDE_FLOAT32_C( 915.61), SIMDE_FLOAT32_C( -26.70), SIMDE_FLOAT32_C( 741.12), SIMDE_FLOAT32_C( -352.84), SIMDE_FLOAT32_C( -143.61), SIMDE_FLOAT32_C( -443.43), SIMDE_FLOAT32_C( 954.36), SIMDE_FLOAT32_C( 803.96), SIMDE_FLOAT32_C( -627.14), SIMDE_FLOAT32_C( -637.21), SIMDE_FLOAT32_C( -214.30), SIMDE_FLOAT32_C( -894.36), SIMDE_FLOAT32_C( -429.68), SIMDE_FLOAT32_C( 395.52), SIMDE_FLOAT32_C( -750.28), SIMDE_FLOAT32_C( -533.55) } }, { { SIMDE_FLOAT32_C( 207.35), SIMDE_FLOAT32_C( -216.84), SIMDE_FLOAT32_C( -799.36), SIMDE_FLOAT32_C( 285.78), SIMDE_FLOAT32_C( -810.40), SIMDE_FLOAT32_C( 928.19), SIMDE_FLOAT32_C( -885.45), SIMDE_FLOAT32_C( -449.19), SIMDE_FLOAT32_C( 505.45), SIMDE_FLOAT32_C( 857.81), SIMDE_FLOAT32_C( -894.39), SIMDE_FLOAT32_C( 825.24), SIMDE_FLOAT32_C( 428.29), SIMDE_FLOAT32_C( -748.14), SIMDE_FLOAT32_C( -831.93), SIMDE_FLOAT32_C( 343.89) }, { SIMDE_FLOAT32_C( 207.35), SIMDE_FLOAT32_C( -216.84), SIMDE_FLOAT32_C( -799.36), SIMDE_FLOAT32_C( 285.78), SIMDE_FLOAT32_C( -810.40), SIMDE_FLOAT32_C( 928.19), SIMDE_FLOAT32_C( -885.45), SIMDE_FLOAT32_C( -449.19), SIMDE_FLOAT32_C( 505.45), SIMDE_FLOAT32_C( 857.81), SIMDE_FLOAT32_C( -894.39), SIMDE_FLOAT32_C( 825.24), SIMDE_FLOAT32_C( 428.29), SIMDE_FLOAT32_C( -748.14), SIMDE_FLOAT32_C( -831.93), SIMDE_FLOAT32_C( 343.89) } }, { { SIMDE_FLOAT32_C( 225.16), SIMDE_FLOAT32_C( 909.19), SIMDE_FLOAT32_C( 991.05), SIMDE_FLOAT32_C( -918.45), SIMDE_FLOAT32_C( -534.23), SIMDE_FLOAT32_C( 945.41), SIMDE_FLOAT32_C( 885.51), SIMDE_FLOAT32_C( -161.37), SIMDE_FLOAT32_C( -691.80), SIMDE_FLOAT32_C( -328.80), SIMDE_FLOAT32_C( -55.73), SIMDE_FLOAT32_C( -121.48), SIMDE_FLOAT32_C( -933.28), SIMDE_FLOAT32_C( 193.99), SIMDE_FLOAT32_C( 344.96), SIMDE_FLOAT32_C( 274.08) }, { SIMDE_FLOAT32_C( 225.16), SIMDE_FLOAT32_C( 909.19), SIMDE_FLOAT32_C( 991.05), SIMDE_FLOAT32_C( -918.45), SIMDE_FLOAT32_C( -534.23), SIMDE_FLOAT32_C( 945.41), SIMDE_FLOAT32_C( 885.51), SIMDE_FLOAT32_C( -161.37), SIMDE_FLOAT32_C( -691.80), SIMDE_FLOAT32_C( -328.80), SIMDE_FLOAT32_C( -55.73), SIMDE_FLOAT32_C( -121.48), SIMDE_FLOAT32_C( -933.28), SIMDE_FLOAT32_C( 193.99), SIMDE_FLOAT32_C( 344.96), SIMDE_FLOAT32_C( 274.08) } }, { { SIMDE_FLOAT32_C( 977.14), SIMDE_FLOAT32_C( 545.61), SIMDE_FLOAT32_C( -440.14), SIMDE_FLOAT32_C( -833.26), SIMDE_FLOAT32_C( 473.80), SIMDE_FLOAT32_C( -325.59), SIMDE_FLOAT32_C( -282.45), SIMDE_FLOAT32_C( -20.75), SIMDE_FLOAT32_C( -467.78), SIMDE_FLOAT32_C( -176.84), SIMDE_FLOAT32_C( -195.51), SIMDE_FLOAT32_C( 960.51), SIMDE_FLOAT32_C( 75.02), SIMDE_FLOAT32_C( -27.44), SIMDE_FLOAT32_C( 304.40), SIMDE_FLOAT32_C( -699.82) }, { SIMDE_FLOAT32_C( 977.14), SIMDE_FLOAT32_C( 545.61), SIMDE_FLOAT32_C( -440.14), SIMDE_FLOAT32_C( -833.26), SIMDE_FLOAT32_C( 473.80), SIMDE_FLOAT32_C( -325.59), SIMDE_FLOAT32_C( -282.45), SIMDE_FLOAT32_C( -20.75), SIMDE_FLOAT32_C( -467.78), SIMDE_FLOAT32_C( -176.84), SIMDE_FLOAT32_C( -195.51), SIMDE_FLOAT32_C( 960.51), SIMDE_FLOAT32_C( 75.02), SIMDE_FLOAT32_C( -27.44), SIMDE_FLOAT32_C( 304.40), SIMDE_FLOAT32_C( -699.82) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde_float32 r[sizeof(simde__m512) / sizeof(simde_float32)]; simde_mm512_storeu_ps(r, a); simde_assert_equal_vf32(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512 a = simde_test_x86_random_f32x16(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde_float32 r[sizeof(simde__m512) / sizeof(simde_float32)]; simde_mm512_storeu_ps(r, a); simde_test_x86_write_f32x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_vf32(2, sizeof(r) / sizeof(r[0]), r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_storeu_ps) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/sub.c000066400000000000000000010221351400333146700162150ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN sub #include #include #include static int test_simde_mm512_sub_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi8(INT8_C( 82), INT8_C( 83), INT8_C( 117), INT8_C( 65), INT8_C( -47), INT8_C(-122), INT8_C( 116), INT8_C( 14), INT8_C( 76), INT8_C( 1), INT8_C( -50), INT8_C( 4), INT8_C( 83), INT8_C( -77), INT8_C( 112), INT8_C(-102), INT8_C( 99), INT8_C(-118), INT8_C( -47), INT8_C( -67), INT8_C( 60), INT8_C( -34), INT8_C( 78), INT8_C(-110), INT8_C( -58), INT8_C( 87), INT8_C( -61), INT8_C( 26), INT8_C( -17), INT8_C( -46), INT8_C( 116), INT8_C( -20), INT8_C(-120), INT8_C( 48), INT8_C( 24), INT8_C( 46), INT8_C( 103), INT8_C( -34), INT8_C( 42), INT8_C( 51), INT8_C( -65), INT8_C( 86), INT8_C( 61), INT8_C( -56), INT8_C( 58), INT8_C( 119), INT8_C( 93), INT8_C( -1), INT8_C( -58), INT8_C(-121), INT8_C( -32), INT8_C( 98), INT8_C( -66), INT8_C( 79), INT8_C( 99), INT8_C( -93), INT8_C( 77), INT8_C( -56), INT8_C( -78), INT8_C( 39), INT8_C( -50), INT8_C( -60), INT8_C( -68), INT8_C( -4)), simde_mm512_set_epi8(INT8_C( 106), INT8_C( 39), INT8_C(-106), INT8_C( 14), INT8_C( -28), INT8_C( -2), INT8_C( 39), INT8_C( -28), INT8_C( 54), INT8_C( 70), INT8_C( -19), INT8_C( -12), INT8_C( -42), INT8_C( 28), INT8_C( -13), INT8_C( -6), INT8_C( 116), INT8_C( 2), INT8_C( 23), INT8_C( 121), INT8_C( 112), INT8_C( -35), INT8_C(-124), INT8_C( 10), INT8_C( -16), INT8_C(-117), INT8_C( 26), INT8_C(-125), INT8_C( 36), INT8_C( 109), INT8_C( 29), INT8_C( -35), INT8_C( -9), INT8_C( -85), INT8_C( -38), INT8_C( 95), INT8_C( -88), INT8_C( 3), INT8_C( 4), INT8_C( 100), INT8_C( 85), INT8_C( 21), INT8_C( 66), INT8_C( -33), INT8_C( -77), INT8_C( -5), INT8_C(-126), INT8_C( 122), INT8_C( -30), INT8_C( -83), INT8_C( 74), INT8_C(-108), INT8_C( 83), INT8_C( -96), INT8_C( -57), INT8_C(-108), INT8_C( 89), INT8_C( 59), INT8_C( 111), INT8_C( -47), INT8_C( -10), INT8_C( -18), INT8_C( 20), INT8_C(-125)), simde_mm512_set_epi8(INT8_C( -24), INT8_C( 44), INT8_C( -33), INT8_C( 51), INT8_C( -19), INT8_C(-120), INT8_C( 77), INT8_C( 42), INT8_C( 22), INT8_C( -69), INT8_C( -31), INT8_C( 16), INT8_C( 125), INT8_C(-105), INT8_C( 125), INT8_C( -96), INT8_C( -17), INT8_C(-120), INT8_C( -70), INT8_C( 68), INT8_C( -52), INT8_C( 1), INT8_C( -54), INT8_C(-120), INT8_C( -42), INT8_C( -52), INT8_C( -87), INT8_C(-105), INT8_C( -53), INT8_C( 101), INT8_C( 87), INT8_C( 15), INT8_C(-111), INT8_C(-123), INT8_C( 62), INT8_C( -49), INT8_C( -65), INT8_C( -37), INT8_C( 38), INT8_C( -49), INT8_C( 106), INT8_C( 65), INT8_C( -5), INT8_C( -23), INT8_C(-121), INT8_C( 124), INT8_C( -37), INT8_C(-123), INT8_C( -28), INT8_C( -38), INT8_C(-106), INT8_C( -50), INT8_C( 107), INT8_C( -81), INT8_C(-100), INT8_C( 15), INT8_C( -12), INT8_C(-115), INT8_C( 67), INT8_C( 86), INT8_C( -40), INT8_C( -42), INT8_C( -88), INT8_C( 121)) }, { simde_mm512_set_epi8(INT8_C(-108), INT8_C(-116), INT8_C( 21), INT8_C(-123), INT8_C( -53), INT8_C( 42), INT8_C( 66), INT8_C( 13), INT8_C( 9), INT8_C( 115), INT8_C( 86), INT8_C( 126), INT8_C( -24), INT8_C( 35), INT8_C( -5), INT8_C( 103), INT8_C( 38), INT8_C( 111), INT8_C( 24), INT8_C( -71), INT8_C( -1), INT8_C( 17), INT8_C( -63), INT8_C( -13), INT8_C( 14), INT8_C( 82), INT8_C( 78), INT8_C(-102), INT8_C( -7), INT8_C( 93), INT8_C( 25), INT8_C( 103), INT8_C( 113), INT8_C( -15), INT8_C( -19), INT8_C( -73), INT8_C( -11), INT8_C( 103), INT8_C( -97), INT8_C( 123), INT8_C( 28), INT8_C( 53), INT8_C( -15), INT8_C( 122), INT8_C( 3), INT8_C( -54), INT8_C( -61), INT8_C( 58), INT8_C( -44), INT8_C( -3), INT8_C( -43), INT8_C( -35), INT8_C(-118), INT8_C( -18), INT8_C( 15), INT8_C( 54), INT8_C(-102), INT8_C( -58), INT8_C( -74), INT8_C( -70), INT8_C( 46), INT8_C( 48), INT8_C( -35), INT8_C( 92)), simde_mm512_set_epi8(INT8_C( 6), INT8_C( 68), INT8_C( 77), INT8_C( -94), INT8_C( -48), INT8_C(-101), INT8_C( -8), INT8_C( 82), INT8_C( 50), INT8_C( -15), INT8_C( 6), INT8_C( 30), INT8_C( -47), INT8_C( -15), INT8_C( -14), INT8_C( -97), INT8_C( 28), INT8_C( -47), INT8_C( -92), INT8_C( -84), INT8_C( -37), INT8_C( -33), INT8_C(-123), INT8_C( -19), INT8_C( 58), INT8_C( 29), INT8_C( 93), INT8_C( -55), INT8_C(-127), INT8_C( -60), INT8_C( 32), INT8_C( 116), INT8_C( -46), INT8_C( 51), INT8_C( -40), INT8_C( 10), INT8_C( 4), INT8_C( 50), INT8_C( 48), INT8_C( 53), INT8_C( 78), INT8_C( 21), INT8_C( 64), INT8_C( 107), INT8_C( 16), INT8_C( 48), INT8_C( -46), INT8_C( 62), INT8_C( 75), INT8_C( 85), INT8_C(-115), INT8_C( -14), INT8_C( -99), INT8_C( 86), INT8_C(-116), INT8_C( -74), INT8_C( 38), INT8_C( 27), INT8_C(-115), INT8_C( 55), INT8_C( -91), INT8_C( -71), INT8_C( -14), INT8_C( -84)), simde_mm512_set_epi8(INT8_C(-114), INT8_C( 72), INT8_C( -56), INT8_C( -29), INT8_C( -5), INT8_C(-113), INT8_C( 74), INT8_C( -69), INT8_C( -41), INT8_C(-126), INT8_C( 80), INT8_C( 96), INT8_C( 23), INT8_C( 50), INT8_C( 9), INT8_C( -56), INT8_C( 10), INT8_C( -98), INT8_C( 116), INT8_C( 13), INT8_C( 36), INT8_C( 50), INT8_C( 60), INT8_C( 6), INT8_C( -44), INT8_C( 53), INT8_C( -15), INT8_C( -47), INT8_C( 120), INT8_C(-103), INT8_C( -7), INT8_C( -13), INT8_C( -97), INT8_C( -66), INT8_C( 21), INT8_C( -83), INT8_C( -15), INT8_C( 53), INT8_C( 111), INT8_C( 70), INT8_C( -50), INT8_C( 32), INT8_C( -79), INT8_C( 15), INT8_C( -13), INT8_C(-102), INT8_C( -15), INT8_C( -4), INT8_C(-119), INT8_C( -88), INT8_C( 72), INT8_C( -21), INT8_C( -19), INT8_C(-104), INT8_C(-125), INT8_C(-128), INT8_C( 116), INT8_C( -85), INT8_C( 41), INT8_C(-125), INT8_C(-119), INT8_C( 119), INT8_C( -21), INT8_C( -80)) }, { simde_mm512_set_epi8(INT8_C( 2), INT8_C( -77), INT8_C( -19), INT8_C( 41), INT8_C( -13), INT8_C( 75), INT8_C(-123), INT8_C( 96), INT8_C( -86), INT8_C( -24), INT8_C( -27), INT8_C( -84), INT8_C( 35), INT8_C( -86), INT8_C( -72), INT8_C( -97), INT8_C( 44), INT8_C( 11), INT8_C(-106), INT8_C( 44), INT8_C( 0), INT8_C( 90), INT8_C( -79), INT8_C( 91), INT8_C( 119), INT8_C( 59), INT8_C( 105), INT8_C(-128), INT8_C( 110), INT8_C( -29), INT8_C( 67), INT8_C( 114), INT8_C( -39), INT8_C( -49), INT8_C( 105), INT8_C( -40), INT8_C( -33), INT8_C( 120), INT8_C( -27), INT8_C( 100), INT8_C( -90), INT8_C( 86), INT8_C( -18), INT8_C( -57), INT8_C( 84), INT8_C( -26), INT8_C( -77), INT8_C( 17), INT8_C( -47), INT8_C( 51), INT8_C( -83), INT8_C( 53), INT8_C( 71), INT8_C( 96), INT8_C( 110), INT8_C( -89), INT8_C( 27), INT8_C( -45), INT8_C(-126), INT8_C( 40), INT8_C( 95), INT8_C( -87), INT8_C( -62), INT8_C( -52)), simde_mm512_set_epi8(INT8_C( -84), INT8_C( 127), INT8_C( 61), INT8_C( -16), INT8_C( 30), INT8_C( 6), INT8_C(-112), INT8_C( 104), INT8_C( -60), INT8_C( -88), INT8_C( -39), INT8_C( -19), INT8_C( 44), INT8_C( 36), INT8_C( 105), INT8_C( 120), INT8_C( -26), INT8_C( 21), INT8_C( 14), INT8_C( 42), INT8_C( 49), INT8_C( -84), INT8_C(-120), INT8_C(-107), INT8_C( 123), INT8_C( -47), INT8_C( 21), INT8_C( -10), INT8_C( 95), INT8_C( 124), INT8_C( -33), INT8_C( -34), INT8_C( -33), INT8_C( -71), INT8_C( 11), INT8_C( 74), INT8_C( 104), INT8_C( 108), INT8_C( -35), INT8_C( -59), INT8_C( -55), INT8_C(-126), INT8_C( 107), INT8_C( 23), INT8_C( 29), INT8_C( -27), INT8_C( 123), INT8_C( 23), INT8_C( -83), INT8_C( -90), INT8_C( 9), INT8_C( 94), INT8_C( 91), INT8_C( 69), INT8_C( -51), INT8_C(-103), INT8_C( -72), INT8_C( -45), INT8_C( 16), INT8_C( 108), INT8_C( -80), INT8_C( 27), INT8_C( 58), INT8_C( -83)), simde_mm512_set_epi8(INT8_C( 86), INT8_C( 52), INT8_C( -80), INT8_C( 57), INT8_C( -43), INT8_C( 69), INT8_C( -11), INT8_C( -8), INT8_C( -26), INT8_C( 64), INT8_C( 12), INT8_C( -65), INT8_C( -9), INT8_C(-122), INT8_C( 79), INT8_C( 39), INT8_C( 70), INT8_C( -10), INT8_C(-120), INT8_C( 2), INT8_C( -49), INT8_C( -82), INT8_C( 41), INT8_C( -58), INT8_C( -4), INT8_C( 106), INT8_C( 84), INT8_C(-118), INT8_C( 15), INT8_C( 103), INT8_C( 100), INT8_C(-108), INT8_C( -6), INT8_C( 22), INT8_C( 94), INT8_C(-114), INT8_C( 119), INT8_C( 12), INT8_C( 8), INT8_C( -97), INT8_C( -35), INT8_C( -44), INT8_C(-125), INT8_C( -80), INT8_C( 55), INT8_C( 1), INT8_C( 56), INT8_C( -6), INT8_C( 36), INT8_C(-115), INT8_C( -92), INT8_C( -41), INT8_C( -20), INT8_C( 27), INT8_C( -95), INT8_C( 14), INT8_C( 99), INT8_C( 0), INT8_C( 114), INT8_C( -68), INT8_C( -81), INT8_C(-114), INT8_C(-120), INT8_C( 31)) }, { simde_mm512_set_epi8(INT8_C( 17), INT8_C( 99), INT8_C( -13), INT8_C( -49), INT8_C( 45), INT8_C(-128), INT8_C( 55), INT8_C( 105), INT8_C( -34), INT8_C( -51), INT8_C( -97), INT8_C(-103), INT8_C(-124), INT8_C( 111), INT8_C( 74), INT8_C( 75), INT8_C( 102), INT8_C( 98), INT8_C(-117), INT8_C( 9), INT8_C( -74), INT8_C( 61), INT8_C( 99), INT8_C( 124), INT8_C( 79), INT8_C(-114), INT8_C( 19), INT8_C( 97), INT8_C(-100), INT8_C(-124), INT8_C( -17), INT8_C( -62), INT8_C( 25), INT8_C( -3), INT8_C( -7), INT8_C( 72), INT8_C(-117), INT8_C( -27), INT8_C( -56), INT8_C( 92), INT8_C( -20), INT8_C( -53), INT8_C( 2), INT8_C( -38), INT8_C( -81), INT8_C( 59), INT8_C( 66), INT8_C( 90), INT8_C( 36), INT8_C( 100), INT8_C( 112), INT8_C( 123), INT8_C( -72), INT8_C( -97), INT8_C(-115), INT8_C( 17), INT8_C( -93), INT8_C(-122), INT8_C( 31), INT8_C( 27), INT8_C( 109), INT8_C( 115), INT8_C( 53), INT8_C( -96)), simde_mm512_set_epi8(INT8_C( -43), INT8_C( -18), INT8_C( 114), INT8_C( -29), INT8_C( 118), INT8_C( -1), INT8_C( -20), INT8_C( -38), INT8_C( -80), INT8_C( 88), INT8_C(-111), INT8_C( -91), INT8_C( 44), INT8_C( -72), INT8_C( 106), INT8_C( 19), INT8_C( -46), INT8_C( 107), INT8_C( 46), INT8_C( 44), INT8_C( -65), INT8_C(-128), INT8_C( 41), INT8_C( 44), INT8_C( 68), INT8_C( 69), INT8_C( -78), INT8_C( -47), INT8_C( 109), INT8_C( 120), INT8_C( -57), INT8_C( -95), INT8_C( 95), INT8_C( 80), INT8_C( -30), INT8_C( 97), INT8_C( -48), INT8_C( -97), INT8_C( 111), INT8_C( -80), INT8_C(-122), INT8_C( -81), INT8_C( -71), INT8_C( 85), INT8_C( 77), INT8_C( -42), INT8_C(-115), INT8_C( -77), INT8_C( 29), INT8_C( 77), INT8_C( 64), INT8_C( -20), INT8_C( 27), INT8_C( 41), INT8_C( 13), INT8_C( 109), INT8_C( 22), INT8_C( -98), INT8_C( 20), INT8_C( -28), INT8_C( 66), INT8_C( -7), INT8_C(-113), INT8_C(-119)), simde_mm512_set_epi8(INT8_C( 60), INT8_C( 117), INT8_C(-127), INT8_C( -20), INT8_C( -73), INT8_C(-127), INT8_C( 75), INT8_C(-113), INT8_C( 46), INT8_C( 117), INT8_C( 14), INT8_C( -12), INT8_C( 88), INT8_C( -73), INT8_C( -32), INT8_C( 56), INT8_C(-108), INT8_C( -9), INT8_C( 93), INT8_C( -35), INT8_C( -9), INT8_C( -67), INT8_C( 58), INT8_C( 80), INT8_C( 11), INT8_C( 73), INT8_C( 97), INT8_C(-112), INT8_C( 47), INT8_C( 12), INT8_C( 40), INT8_C( 33), INT8_C( -70), INT8_C( -83), INT8_C( 23), INT8_C( -25), INT8_C( -69), INT8_C( 70), INT8_C( 89), INT8_C( -84), INT8_C( 102), INT8_C( 28), INT8_C( 73), INT8_C(-123), INT8_C( 98), INT8_C( 101), INT8_C( -75), INT8_C( -89), INT8_C( 7), INT8_C( 23), INT8_C( 48), INT8_C(-113), INT8_C( -99), INT8_C( 118), INT8_C(-128), INT8_C( -92), INT8_C(-115), INT8_C( -24), INT8_C( 11), INT8_C( 55), INT8_C( 43), INT8_C( 122), INT8_C( -90), INT8_C( 23)) }, { simde_mm512_set_epi8(INT8_C(-124), INT8_C( -73), INT8_C( 74), INT8_C( 5), INT8_C( -9), INT8_C( 17), INT8_C( -81), INT8_C( -54), INT8_C( -5), INT8_C( -33), INT8_C( -12), INT8_C( 26), INT8_C( 86), INT8_C( 122), INT8_C( -44), INT8_C( -23), INT8_C( 0), INT8_C( 43), INT8_C( -25), INT8_C(-122), INT8_C( -79), INT8_C(-122), INT8_C( -88), INT8_C(-121), INT8_C(-102), INT8_C( 66), INT8_C( -93), INT8_C( 105), INT8_C( 109), INT8_C( -68), INT8_C( 24), INT8_C( -54), INT8_C( 40), INT8_C( 68), INT8_C( 2), INT8_C( 60), INT8_C( 0), INT8_C( 5), INT8_C( 59), INT8_C( -54), INT8_C( -76), INT8_C( 27), INT8_C( -23), INT8_C( 77), INT8_C(-108), INT8_C( -28), INT8_C(-114), INT8_C( 56), INT8_C( -54), INT8_C(-108), INT8_C( -15), INT8_C( -89), INT8_C(-103), INT8_C( -45), INT8_C( 74), INT8_C( -3), INT8_C(-108), INT8_C( 55), INT8_C( -79), INT8_C( -62), INT8_C( 14), INT8_C( 106), INT8_C( -16), INT8_C( -10)), simde_mm512_set_epi8(INT8_C( -47), INT8_C( 124), INT8_C( 57), INT8_C( -74), INT8_C( 20), INT8_C( 124), INT8_C( 70), INT8_C( -69), INT8_C( -65), INT8_C( -12), INT8_C( 124), INT8_C( -90), INT8_C(-113), INT8_C( 63), INT8_C( -79), INT8_C( -70), INT8_C( -76), INT8_C( -34), INT8_C( -60), INT8_C( -4), INT8_C( -41), INT8_C( 60), INT8_C( 77), INT8_C( -57), INT8_C( 13), INT8_C( 2), INT8_C( 111), INT8_C( -39), INT8_C( 41), INT8_C( 54), INT8_C( -37), INT8_C( 114), INT8_C( 92), INT8_C(-111), INT8_C( 77), INT8_C( 14), INT8_C(-104), INT8_C( -39), INT8_C( -74), INT8_C( 66), INT8_C( 16), INT8_C( -26), INT8_C( -89), INT8_C(-114), INT8_C( -68), INT8_C( 6), INT8_C( 62), INT8_C( -93), INT8_C( 55), INT8_C(-113), INT8_C( -60), INT8_C( -56), INT8_C( -37), INT8_C( 2), INT8_C( -15), INT8_C( 88), INT8_C( 26), INT8_C( 54), INT8_C( 82), INT8_C( 124), INT8_C( -38), INT8_C(-107), INT8_C( 40), INT8_C( 13)), simde_mm512_set_epi8(INT8_C( -77), INT8_C( 59), INT8_C( 17), INT8_C( 79), INT8_C( -29), INT8_C(-107), INT8_C( 105), INT8_C( 15), INT8_C( 60), INT8_C( -21), INT8_C( 120), INT8_C( 116), INT8_C( -57), INT8_C( 59), INT8_C( 35), INT8_C( 47), INT8_C( 76), INT8_C( 77), INT8_C( 35), INT8_C(-118), INT8_C( -38), INT8_C( 74), INT8_C( 91), INT8_C( -64), INT8_C(-115), INT8_C( 64), INT8_C( 52), INT8_C(-112), INT8_C( 68), INT8_C(-122), INT8_C( 61), INT8_C( 88), INT8_C( -52), INT8_C( -77), INT8_C( -75), INT8_C( 46), INT8_C( 104), INT8_C( 44), INT8_C(-123), INT8_C(-120), INT8_C( -92), INT8_C( 53), INT8_C( 66), INT8_C( -65), INT8_C( -40), INT8_C( -34), INT8_C( 80), INT8_C(-107), INT8_C(-109), INT8_C( 5), INT8_C( 45), INT8_C( -33), INT8_C( -66), INT8_C( -47), INT8_C( 89), INT8_C( -91), INT8_C( 122), INT8_C( 1), INT8_C( 95), INT8_C( 70), INT8_C( 52), INT8_C( -43), INT8_C( -56), INT8_C( -23)) }, { simde_mm512_set_epi8(INT8_C( 5), INT8_C( -68), INT8_C( -18), INT8_C( -37), INT8_C( 5), INT8_C( 16), INT8_C(-109), INT8_C( -67), INT8_C( -62), INT8_C( -4), INT8_C( 14), INT8_C(-109), INT8_C( -29), INT8_C(-121), INT8_C(-109), INT8_C( -55), INT8_C( 1), INT8_C( -38), INT8_C( 107), INT8_C( 55), INT8_C( -36), INT8_C( -76), INT8_C( 35), INT8_C( -40), INT8_C( 10), INT8_C( -90), INT8_C( -48), INT8_C(-112), INT8_C( -9), INT8_C( -53), INT8_C( 105), INT8_C( 27), INT8_C( -97), INT8_C(-124), INT8_C( 4), INT8_C( -36), INT8_C( -16), INT8_C( -87), INT8_C( -89), INT8_C(-104), INT8_C( -30), INT8_C(-101), INT8_C( 69), INT8_C( 79), INT8_C( 59), INT8_C( -97), INT8_C( -15), INT8_C( 17), INT8_C( 106), INT8_C( -85), INT8_C( 126), INT8_C(-121), INT8_C( -91), INT8_C( 26), INT8_C(-115), INT8_C(-117), INT8_C( 91), INT8_C( 73), INT8_C( -60), INT8_C( 69), INT8_C( -23), INT8_C( 48), INT8_C( 70), INT8_C( -8)), simde_mm512_set_epi8(INT8_C( 91), INT8_C(-103), INT8_C( 69), INT8_C( 61), INT8_C( -82), INT8_C( 73), INT8_C( 122), INT8_C( -22), INT8_C( 122), INT8_C( 76), INT8_C( -9), INT8_C( 121), INT8_C(-123), INT8_C(-119), INT8_C(-127), INT8_C( 126), INT8_C( 105), INT8_C( 10), INT8_C(-120), INT8_C(-127), INT8_C( -50), INT8_C( 15), INT8_C( -93), INT8_C( -86), INT8_C(-125), INT8_C( 45), INT8_C( -39), INT8_C(-119), INT8_C( 74), INT8_C( -92), INT8_C( -78), INT8_C( 53), INT8_C( 17), INT8_C( -21), INT8_C( 105), INT8_C(-102), INT8_C( -1), INT8_C( -19), INT8_C( 110), INT8_C( -84), INT8_C( -93), INT8_C( 19), INT8_C( -98), INT8_C(-128), INT8_C( -23), INT8_C( 49), INT8_C( 100), INT8_C( 122), INT8_C( -96), INT8_C(-103), INT8_C( 60), INT8_C( -24), INT8_C( 23), INT8_C( -52), INT8_C( -37), INT8_C( -56), INT8_C( -50), INT8_C( 4), INT8_C( -69), INT8_C( 1), INT8_C( -25), INT8_C( -10), INT8_C( 93), INT8_C( 51)), simde_mm512_set_epi8(INT8_C( -86), INT8_C( 35), INT8_C( -87), INT8_C( -98), INT8_C( 87), INT8_C( -57), INT8_C( 25), INT8_C( -45), INT8_C( 72), INT8_C( -80), INT8_C( 23), INT8_C( 26), INT8_C( 94), INT8_C( -2), INT8_C( 18), INT8_C( 75), INT8_C(-104), INT8_C( -48), INT8_C( -29), INT8_C( -74), INT8_C( 14), INT8_C( -91), INT8_C(-128), INT8_C( 46), INT8_C(-121), INT8_C( 121), INT8_C( -9), INT8_C( 7), INT8_C( -83), INT8_C( 39), INT8_C( -73), INT8_C( -26), INT8_C(-114), INT8_C(-103), INT8_C(-101), INT8_C( 66), INT8_C( -15), INT8_C( -68), INT8_C( 57), INT8_C( -20), INT8_C( 63), INT8_C(-120), INT8_C( -89), INT8_C( -49), INT8_C( 82), INT8_C( 110), INT8_C(-115), INT8_C(-105), INT8_C( -54), INT8_C( 18), INT8_C( 66), INT8_C( -97), INT8_C(-114), INT8_C( 78), INT8_C( -78), INT8_C( -61), INT8_C(-115), INT8_C( 69), INT8_C( 9), INT8_C( 68), INT8_C( 2), INT8_C( 58), INT8_C( -23), INT8_C( -59)) }, { simde_mm512_set_epi8(INT8_C( -89), INT8_C( 43), INT8_C( 52), INT8_C( 82), INT8_C( -37), INT8_C( 55), INT8_C( 112), INT8_C( -22), INT8_C( -75), INT8_C( -36), INT8_C( -34), INT8_C( -15), INT8_C( 35), INT8_C( -42), INT8_C(-101), INT8_C( -5), INT8_C( 2), INT8_C( 35), INT8_C( 14), INT8_C( -73), INT8_C( -50), INT8_C( -33), INT8_C( -65), INT8_C( 94), INT8_C( -6), INT8_C( -21), INT8_C( -28), INT8_C( 21), INT8_C( 102), INT8_C( -87), INT8_C( 114), INT8_C( 125), INT8_C( 113), INT8_C( 124), INT8_C(-121), INT8_C(-122), INT8_C( 23), INT8_C( 107), INT8_C( 24), INT8_C( 126), INT8_C( 80), INT8_C( 59), INT8_C( 39), INT8_C( -61), INT8_C(-105), INT8_C( 32), INT8_C( 55), INT8_C( -9), INT8_C( 60), INT8_C(-125), INT8_C( 72), INT8_C( -36), INT8_C( 77), INT8_C( -65), INT8_C( 117), INT8_C( -85), INT8_C( 98), INT8_C( -83), INT8_C( -69), INT8_C( -52), INT8_C( 41), INT8_C( -10), INT8_C( -18), INT8_C( 56)), simde_mm512_set_epi8(INT8_C( 22), INT8_C( 122), INT8_C( -90), INT8_C( 2), INT8_C( -65), INT8_C( 51), INT8_C( -94), INT8_C( -50), INT8_C( -15), INT8_C( 19), INT8_C( -19), INT8_C( 66), INT8_C( 119), INT8_C(-118), INT8_C(-112), INT8_C(-116), INT8_C( 44), INT8_C( -12), INT8_C( 31), INT8_C( 43), INT8_C( -16), INT8_C( -37), INT8_C( -24), INT8_C( -32), INT8_C( -95), INT8_C( -86), INT8_C( -96), INT8_C( 80), INT8_C( 68), INT8_C( 13), INT8_C( -8), INT8_C( 67), INT8_C( 107), INT8_C(-125), INT8_C( 104), INT8_C( -80), INT8_C( 97), INT8_C( -78), INT8_C( 106), INT8_C( -53), INT8_C( -36), INT8_C( -90), INT8_C( 74), INT8_C( -72), INT8_C( 59), INT8_C( -81), INT8_C( -8), INT8_C( -25), INT8_C( -55), INT8_C( -99), INT8_C( 20), INT8_C( 9), INT8_C( -89), INT8_C( -90), INT8_C( 108), INT8_C( 56), INT8_C( -19), INT8_C( 81), INT8_C( 122), INT8_C( 6), INT8_C(-119), INT8_C( 122), INT8_C( -35), INT8_C( 106)), simde_mm512_set_epi8(INT8_C(-111), INT8_C( -79), INT8_C(-114), INT8_C( 80), INT8_C( 28), INT8_C( 4), INT8_C( -50), INT8_C( 28), INT8_C( -60), INT8_C( -55), INT8_C( -15), INT8_C( -81), INT8_C( -84), INT8_C( 76), INT8_C( 11), INT8_C( 111), INT8_C( -42), INT8_C( 47), INT8_C( -17), INT8_C(-116), INT8_C( -34), INT8_C( 4), INT8_C( -41), INT8_C( 126), INT8_C( 89), INT8_C( 65), INT8_C( 68), INT8_C( -59), INT8_C( 34), INT8_C(-100), INT8_C( 122), INT8_C( 58), INT8_C( 6), INT8_C( -7), INT8_C( 31), INT8_C( -42), INT8_C( -74), INT8_C( -71), INT8_C( -82), INT8_C( -77), INT8_C( 116), INT8_C(-107), INT8_C( -35), INT8_C( 11), INT8_C( 92), INT8_C( 113), INT8_C( 63), INT8_C( 16), INT8_C( 115), INT8_C( -26), INT8_C( 52), INT8_C( -45), INT8_C( -90), INT8_C( 25), INT8_C( 9), INT8_C( 115), INT8_C( 117), INT8_C( 92), INT8_C( 65), INT8_C( -58), INT8_C( -96), INT8_C( 124), INT8_C( 17), INT8_C( -50)) }, { simde_mm512_set_epi8(INT8_C( 105), INT8_C(-115), INT8_C( 121), INT8_C(-101), INT8_C( 0), INT8_C( 63), INT8_C( -42), INT8_C( -34), INT8_C( -5), INT8_C( -47), INT8_C(-123), INT8_C( -52), INT8_C( -86), INT8_C( -28), INT8_C( -63), INT8_C( 20), INT8_C( -60), INT8_C( -63), INT8_C( 99), INT8_C( 78), INT8_C( 56), INT8_C( -72), INT8_C( -55), INT8_C( -72), INT8_C( 79), INT8_C( -81), INT8_C( 124), INT8_C( -85), INT8_C( -65), INT8_C( 122), INT8_C( -25), INT8_C( -58), INT8_C( -64), INT8_C( 52), INT8_C( -12), INT8_C( 1), INT8_C( -62), INT8_C( -28), INT8_C( -28), INT8_C(-104), INT8_C( 54), INT8_C(-103), INT8_C( -55), INT8_C( -22), INT8_C( -91), INT8_C( 6), INT8_C( -9), INT8_C( -31), INT8_C( 18), INT8_C(-111), INT8_C( 58), INT8_C( 71), INT8_C( -73), INT8_C( -96), INT8_C( 28), INT8_C( -4), INT8_C( 47), INT8_C( 66), INT8_C( 121), INT8_C( 38), INT8_C( 69), INT8_C(-107), INT8_C( -57), INT8_C(-120)), simde_mm512_set_epi8(INT8_C( -49), INT8_C( 15), INT8_C( -15), INT8_C( -59), INT8_C(-113), INT8_C( 102), INT8_C( -48), INT8_C( -78), INT8_C( 31), INT8_C( 94), INT8_C( 79), INT8_C( 92), INT8_C( 106), INT8_C( -68), INT8_C( 96), INT8_C( -97), INT8_C( -27), INT8_C(-118), INT8_C( -11), INT8_C( 112), INT8_C(-125), INT8_C( 70), INT8_C( 26), INT8_C( -38), INT8_C( -16), INT8_C(-112), INT8_C( 10), INT8_C( 98), INT8_C( -4), INT8_C( 120), INT8_C( -33), INT8_C(-127), INT8_C( -65), INT8_C( -40), INT8_C( 88), INT8_C( -6), INT8_C( 74), INT8_C( 41), INT8_C( 39), INT8_C( 79), INT8_C(-125), INT8_C( -7), INT8_C( 62), INT8_C(-112), INT8_C(-119), INT8_C( -9), INT8_C( 71), INT8_C( -68), INT8_C( -79), INT8_C( 48), INT8_C( -20), INT8_C( -97), INT8_C(-116), INT8_C( 120), INT8_C( -65), INT8_C( 6), INT8_C( -32), INT8_C( -75), INT8_C(-106), INT8_C( 26), INT8_C( -96), INT8_C( 50), INT8_C( -45), INT8_C( 16)), simde_mm512_set_epi8(INT8_C(-102), INT8_C( 126), INT8_C(-120), INT8_C( -42), INT8_C( 113), INT8_C( -39), INT8_C( 6), INT8_C( 44), INT8_C( -36), INT8_C( 115), INT8_C( 54), INT8_C( 112), INT8_C( 64), INT8_C( 40), INT8_C( 97), INT8_C( 117), INT8_C( -33), INT8_C( 55), INT8_C( 110), INT8_C( -34), INT8_C( -75), INT8_C( 114), INT8_C( -81), INT8_C( -34), INT8_C( 95), INT8_C( 31), INT8_C( 114), INT8_C( 73), INT8_C( -61), INT8_C( 2), INT8_C( 8), INT8_C( 69), INT8_C( 1), INT8_C( 92), INT8_C(-100), INT8_C( 7), INT8_C( 120), INT8_C( -69), INT8_C( -67), INT8_C( 73), INT8_C( -77), INT8_C( -96), INT8_C(-117), INT8_C( 90), INT8_C( 28), INT8_C( 15), INT8_C( -80), INT8_C( 37), INT8_C( 97), INT8_C( 97), INT8_C( 78), INT8_C( -88), INT8_C( 43), INT8_C( 40), INT8_C( 93), INT8_C( -10), INT8_C( 79), INT8_C(-115), INT8_C( -29), INT8_C( 12), INT8_C( -91), INT8_C( 99), INT8_C( -12), INT8_C( 120)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_sub_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_sub_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask64 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi8(INT8_C( 121), INT8_C( -8), INT8_C(-121), INT8_C( -19), INT8_C( 19), INT8_C( -3), INT8_C( 10), INT8_C( -37), INT8_C( 96), INT8_C( 15), INT8_C( -45), INT8_C( -44), INT8_C( -83), INT8_C( -37), INT8_C( 8), INT8_C( 111), INT8_C( -30), INT8_C( -99), INT8_C( 116), INT8_C( 112), INT8_C( 67), INT8_C(-123), INT8_C( 59), INT8_C( -62), INT8_C( 33), INT8_C( 51), INT8_C( -16), INT8_C( -35), INT8_C( -53), INT8_C( 108), INT8_C( -37), INT8_C( -15), INT8_C( 26), INT8_C( 83), INT8_C( -47), INT8_C( -23), INT8_C( 6), INT8_C( 52), INT8_C( -19), INT8_C( 108), INT8_C( -33), INT8_C( 120), INT8_C( 55), INT8_C(-128), INT8_C( -46), INT8_C( 117), INT8_C( 41), INT8_C( -54), INT8_C( -99), INT8_C( -39), INT8_C( 117), INT8_C( 57), INT8_C( 78), INT8_C(-110), INT8_C( -8), INT8_C(-114), INT8_C( -54), INT8_C( 20), INT8_C( 112), INT8_C( 39), INT8_C( -60), INT8_C( 36), INT8_C( -53), INT8_C( 53)), UINT64_C( 567574400), simde_mm512_set_epi8(INT8_C( 115), INT8_C( 65), INT8_C(-123), INT8_C( 116), INT8_C( -46), INT8_C( -68), INT8_C( -8), INT8_C( 96), INT8_C( -90), INT8_C( -14), INT8_C( 27), INT8_C( 33), INT8_C( 21), INT8_C( 58), INT8_C( -12), INT8_C( -76), INT8_C( 70), INT8_C( -70), INT8_C( -4), INT8_C( 64), INT8_C( 35), INT8_C( -32), INT8_C(-115), INT8_C( 109), INT8_C( -69), INT8_C( -61), INT8_C(-126), INT8_C( 45), INT8_C( 93), INT8_C( 100), INT8_C( -53), INT8_C( 105), INT8_C( -51), INT8_C( 43), INT8_C( -96), INT8_C( -95), INT8_C( 86), INT8_C( -66), INT8_C( -47), INT8_C( 123), INT8_C( 63), INT8_C( 16), INT8_C( -66), INT8_C( 12), INT8_C( 115), INT8_C( -39), INT8_C( 10), INT8_C( 108), INT8_C(-120), INT8_C( 110), INT8_C( -25), INT8_C( 7), INT8_C( 105), INT8_C( -93), INT8_C( 68), INT8_C( 3), INT8_C( 113), INT8_C( -50), INT8_C( -34), INT8_C( 22), INT8_C( -61), INT8_C( 75), INT8_C( 28), INT8_C( 116)), simde_mm512_set_epi8(INT8_C( 26), INT8_C( -6), INT8_C( -92), INT8_C( 7), INT8_C(-105), INT8_C( -92), INT8_C( 38), INT8_C( -63), INT8_C( 77), INT8_C( 86), INT8_C( 113), INT8_C( -48), INT8_C( 108), INT8_C( -92), INT8_C( 69), INT8_C( 74), INT8_C( 67), INT8_C( 96), INT8_C( -34), INT8_C( 78), INT8_C( 124), INT8_C( 9), INT8_C( -1), INT8_C( -86), INT8_C( -35), INT8_C( 26), INT8_C( 67), INT8_C( 46), INT8_C( 75), INT8_C(-119), INT8_C( 68), INT8_C( 31), INT8_C( -52), INT8_C(-102), INT8_C( -4), INT8_C( 118), INT8_C( 0), INT8_C( -44), INT8_C( 123), INT8_C( -73), INT8_C( 84), INT8_C( 30), INT8_C( -8), INT8_C( 64), INT8_C( -20), INT8_C( -79), INT8_C( -85), INT8_C( -23), INT8_C( -34), INT8_C( -4), INT8_C( -85), INT8_C( 107), INT8_C( -6), INT8_C( 16), INT8_C( -66), INT8_C(-113), INT8_C( 60), INT8_C( 127), INT8_C( -54), INT8_C( -36), INT8_C( 73), INT8_C( -97), INT8_C( -65), INT8_C( 63)), simde_mm512_set_epi8(INT8_C( 121), INT8_C( -8), INT8_C(-121), INT8_C( -19), INT8_C( 19), INT8_C( -3), INT8_C( 10), INT8_C( -37), INT8_C( 96), INT8_C( 15), INT8_C( -45), INT8_C( -44), INT8_C( -83), INT8_C( -37), INT8_C( 8), INT8_C( 111), INT8_C( -30), INT8_C( -99), INT8_C( 116), INT8_C( 112), INT8_C( 67), INT8_C(-123), INT8_C( 59), INT8_C( -62), INT8_C( 33), INT8_C( 51), INT8_C( -16), INT8_C( -35), INT8_C( -53), INT8_C( 108), INT8_C( -37), INT8_C( -15), INT8_C( 26), INT8_C( 83), INT8_C( -92), INT8_C( -23), INT8_C( 6), INT8_C( 52), INT8_C( -19), INT8_C( -60), INT8_C( -21), INT8_C( -14), INT8_C( 55), INT8_C( -52), INT8_C( -46), INT8_C( 40), INT8_C( 41), INT8_C( -54), INT8_C( -99), INT8_C( 114), INT8_C( 60), INT8_C(-100), INT8_C( 111), INT8_C(-109), INT8_C(-122), INT8_C( 116), INT8_C( 53), INT8_C( 20), INT8_C( 112), INT8_C( 39), INT8_C( -60), INT8_C( 36), INT8_C( -53), INT8_C( 53)) }, { simde_mm512_set_epi8(INT8_C( 44), INT8_C( 68), INT8_C( 96), INT8_C( 88), INT8_C( 1), INT8_C( 68), INT8_C( 46), INT8_C( 19), INT8_C( 31), INT8_C( 85), INT8_C( 35), INT8_C( 68), INT8_C( -79), INT8_C( 41), INT8_C( 28), INT8_C( 92), INT8_C( -26), INT8_C( 20), INT8_C( -16), INT8_C( -7), INT8_C( 41), INT8_C( 71), INT8_C( 88), INT8_C( 39), INT8_C( -42), INT8_C( 76), INT8_C( 40), INT8_C( 108), INT8_C( -87), INT8_C(-126), INT8_C( 42), INT8_C( 58), INT8_C( 2), INT8_C( -23), INT8_C( 5), INT8_C(-116), INT8_C( 34), INT8_C( 66), INT8_C( 28), INT8_C( 86), INT8_C( 50), INT8_C( -67), INT8_C( 20), INT8_C( 73), INT8_C( 27), INT8_C( -29), INT8_C( 84), INT8_C( 112), INT8_C( 104), INT8_C( 53), INT8_C( -89), INT8_C(-113), INT8_C( -4), INT8_C( 94), INT8_C( 75), INT8_C( 21), INT8_C(-120), INT8_C( -25), INT8_C( 111), INT8_C(-128), INT8_C( -4), INT8_C( 58), INT8_C(-115), INT8_C( 4)), UINT64_C( 2779079274), simde_mm512_set_epi8(INT8_C( -70), INT8_C( -58), INT8_C( 69), INT8_C( -7), INT8_C( 115), INT8_C( -10), INT8_C( -39), INT8_C( 78), INT8_C( 56), INT8_C( 116), INT8_C( 104), INT8_C( -92), INT8_C( 5), INT8_C( -47), INT8_C( 27), INT8_C( 94), INT8_C(-126), INT8_C( 88), INT8_C( 80), INT8_C( 112), INT8_C( 21), INT8_C( 62), INT8_C( 86), INT8_C(-103), INT8_C( 66), INT8_C( -9), INT8_C( -26), INT8_C( 47), INT8_C( -50), INT8_C(-118), INT8_C( 115), INT8_C( 49), INT8_C( 42), INT8_C( 6), INT8_C( 92), INT8_C( 2), INT8_C( 63), INT8_C( -6), INT8_C( -32), INT8_C( 15), INT8_C( 66), INT8_C( 82), INT8_C( -9), INT8_C( -79), INT8_C(-123), INT8_C( 52), INT8_C( -90), INT8_C(-111), INT8_C( 62), INT8_C( -43), INT8_C( -50), INT8_C( 62), INT8_C( 4), INT8_C( -92), INT8_C( 86), INT8_C( -32), INT8_C( 69), INT8_C( -15), INT8_C( 55), INT8_C(-127), INT8_C( -36), INT8_C( 56), INT8_C( 46), INT8_C(-119)), simde_mm512_set_epi8(INT8_C( 18), INT8_C( 68), INT8_C( -9), INT8_C( 64), INT8_C(-111), INT8_C( 37), INT8_C( -82), INT8_C( -95), INT8_C( 54), INT8_C( 75), INT8_C( -77), INT8_C( -34), INT8_C( 52), INT8_C( -80), INT8_C( -94), INT8_C( 90), INT8_C( 0), INT8_C( -8), INT8_C( 123), INT8_C(-111), INT8_C( 16), INT8_C( 125), INT8_C( -51), INT8_C( 99), INT8_C( -22), INT8_C( 121), INT8_C( 63), INT8_C( -55), INT8_C( 117), INT8_C( 109), INT8_C(-126), INT8_C(-111), INT8_C( 47), INT8_C(-127), INT8_C( 109), INT8_C( -9), INT8_C( -42), INT8_C( 36), INT8_C( -32), INT8_C( 115), INT8_C( -89), INT8_C( 7), INT8_C( 90), INT8_C( 46), INT8_C( -83), INT8_C( -35), INT8_C( 30), INT8_C( -19), INT8_C( -99), INT8_C( -56), INT8_C( -70), INT8_C( 73), INT8_C( -61), INT8_C( 27), INT8_C( 117), INT8_C( 47), INT8_C( 0), INT8_C( -45), INT8_C( 59), INT8_C( 51), INT8_C( -35), INT8_C( 70), INT8_C( 73), INT8_C( 33)), simde_mm512_set_epi8(INT8_C( 44), INT8_C( 68), INT8_C( 96), INT8_C( 88), INT8_C( 1), INT8_C( 68), INT8_C( 46), INT8_C( 19), INT8_C( 31), INT8_C( 85), INT8_C( 35), INT8_C( 68), INT8_C( -79), INT8_C( 41), INT8_C( 28), INT8_C( 92), INT8_C( -26), INT8_C( 20), INT8_C( -16), INT8_C( -7), INT8_C( 41), INT8_C( 71), INT8_C( 88), INT8_C( 39), INT8_C( -42), INT8_C( 76), INT8_C( 40), INT8_C( 108), INT8_C( -87), INT8_C(-126), INT8_C( 42), INT8_C( 58), INT8_C( -5), INT8_C( -23), INT8_C( -17), INT8_C(-116), INT8_C( 34), INT8_C( -42), INT8_C( 28), INT8_C(-100), INT8_C(-101), INT8_C( -67), INT8_C( -99), INT8_C( 73), INT8_C( 27), INT8_C( 87), INT8_C( 84), INT8_C( -92), INT8_C( 104), INT8_C( 13), INT8_C( 20), INT8_C(-113), INT8_C( -4), INT8_C( 94), INT8_C( -31), INT8_C( 21), INT8_C(-120), INT8_C( 30), INT8_C( -4), INT8_C(-128), INT8_C( -1), INT8_C( 58), INT8_C( -27), INT8_C( 4)) }, { simde_mm512_set_epi8(INT8_C( -35), INT8_C( 32), INT8_C( -43), INT8_C( 108), INT8_C( 83), INT8_C( -59), INT8_C( -4), INT8_C( 125), INT8_C( -31), INT8_C( 118), INT8_C( -25), INT8_C( -91), INT8_C( 50), INT8_C( -74), INT8_C( 78), INT8_C( 95), INT8_C( -84), INT8_C( -63), INT8_C( 87), INT8_C(-108), INT8_C( 28), INT8_C( -70), INT8_C( 77), INT8_C(-113), INT8_C( -20), INT8_C( 50), INT8_C( 95), INT8_C(-108), INT8_C( 105), INT8_C( 114), INT8_C(-109), INT8_C( 19), INT8_C( -79), INT8_C( 106), INT8_C( 61), INT8_C( -12), INT8_C( 126), INT8_C(-117), INT8_C( 126), INT8_C(-125), INT8_C( -93), INT8_C( 69), INT8_C( 104), INT8_C( 119), INT8_C( 63), INT8_C( 95), INT8_C(-106), INT8_C( -66), INT8_C( -47), INT8_C( -45), INT8_C( -60), INT8_C( -54), INT8_C(-109), INT8_C( -45), INT8_C( -86), INT8_C( 121), INT8_C( 23), INT8_C( -12), INT8_C( 67), INT8_C( -6), INT8_C( -37), INT8_C( 92), INT8_C( -35), INT8_C( 99)), UINT64_C( 1100920337), simde_mm512_set_epi8(INT8_C( 13), INT8_C( 104), INT8_C( 50), INT8_C( 43), INT8_C( 82), INT8_C( -5), INT8_C( -23), INT8_C( -47), INT8_C( 99), INT8_C(-116), INT8_C( 118), INT8_C( 73), INT8_C( -10), INT8_C( -88), INT8_C( -42), INT8_C( -58), INT8_C( -49), INT8_C( 65), INT8_C( -18), INT8_C( 54), INT8_C( -68), INT8_C( 1), INT8_C( -7), INT8_C( -96), INT8_C( 4), INT8_C( 115), INT8_C( 42), INT8_C(-106), INT8_C( 31), INT8_C( 94), INT8_C( -71), INT8_C( -41), INT8_C( 33), INT8_C(-106), INT8_C( -65), INT8_C(-107), INT8_C( 71), INT8_C( -10), INT8_C( -21), INT8_C(-128), INT8_C( -23), INT8_C( 20), INT8_C( 2), INT8_C( 96), INT8_C(-128), INT8_C( -51), INT8_C( -38), INT8_C( 47), INT8_C( -56), INT8_C( 123), INT8_C( -20), INT8_C( -50), INT8_C( -2), INT8_C( 40), INT8_C( 24), INT8_C( -98), INT8_C( 4), INT8_C( -62), INT8_C( -44), INT8_C( 49), INT8_C( 83), INT8_C( 115), INT8_C( 5), INT8_C( 57)), simde_mm512_set_epi8(INT8_C( 76), INT8_C( -52), INT8_C( -96), INT8_C( -6), INT8_C(-119), INT8_C( -87), INT8_C( 102), INT8_C( 5), INT8_C( 24), INT8_C( -44), INT8_C( 110), INT8_C(-113), INT8_C(-116), INT8_C( -3), INT8_C( 62), INT8_C( -87), INT8_C( 7), INT8_C( -54), INT8_C( -57), INT8_C( -66), INT8_C( 42), INT8_C( -82), INT8_C( 46), INT8_C( -16), INT8_C( 91), INT8_C( -73), INT8_C( -20), INT8_C( -77), INT8_C( -11), INT8_C( 25), INT8_C( 12), INT8_C( 76), INT8_C( -58), INT8_C( 3), INT8_C(-125), INT8_C( -36), INT8_C( 18), INT8_C( -40), INT8_C( 111), INT8_C( 107), INT8_C( 88), INT8_C( 48), INT8_C( 113), INT8_C( -90), INT8_C(-117), INT8_C( 116), INT8_C( 46), INT8_C( -70), INT8_C( 51), INT8_C( -55), INT8_C( 127), INT8_C( 82), INT8_C( -88), INT8_C( 60), INT8_C( -59), INT8_C( 80), INT8_C( -51), INT8_C( 11), INT8_C( -44), INT8_C( 33), INT8_C( 29), INT8_C( 8), INT8_C( 5), INT8_C( 70)), simde_mm512_set_epi8(INT8_C( -35), INT8_C( 32), INT8_C( -43), INT8_C( 108), INT8_C( 83), INT8_C( -59), INT8_C( -4), INT8_C( 125), INT8_C( -31), INT8_C( 118), INT8_C( -25), INT8_C( -91), INT8_C( 50), INT8_C( -74), INT8_C( 78), INT8_C( 95), INT8_C( -84), INT8_C( -63), INT8_C( 87), INT8_C(-108), INT8_C( 28), INT8_C( -70), INT8_C( 77), INT8_C(-113), INT8_C( -20), INT8_C( 50), INT8_C( 95), INT8_C(-108), INT8_C( 105), INT8_C( 114), INT8_C(-109), INT8_C( 19), INT8_C( -79), INT8_C(-109), INT8_C( 61), INT8_C( -12), INT8_C( 126), INT8_C(-117), INT8_C( 126), INT8_C( 21), INT8_C(-111), INT8_C( 69), INT8_C( 104), INT8_C( -70), INT8_C( -11), INT8_C( 89), INT8_C( -84), INT8_C( -66), INT8_C(-107), INT8_C( -45), INT8_C( 109), INT8_C( 124), INT8_C(-109), INT8_C( -20), INT8_C( 83), INT8_C( 121), INT8_C( 23), INT8_C( -12), INT8_C( 67), INT8_C( 16), INT8_C( -37), INT8_C( 92), INT8_C( -35), INT8_C( -13)) }, { simde_mm512_set_epi8(INT8_C( 27), INT8_C( 45), INT8_C( 71), INT8_C( -63), INT8_C( 96), INT8_C(-106), INT8_C( -43), INT8_C( 10), INT8_C( 104), INT8_C( -19), INT8_C(-110), INT8_C( 126), INT8_C( -52), INT8_C( -56), INT8_C( -96), INT8_C( -27), INT8_C(-125), INT8_C(-116), INT8_C( 25), INT8_C( 78), INT8_C( -76), INT8_C( -85), INT8_C( -23), INT8_C( -19), INT8_C(-106), INT8_C( 126), INT8_C( 19), INT8_C( -41), INT8_C( 40), INT8_C( 78), INT8_C( -69), INT8_C( 57), INT8_C( 73), INT8_C( -58), INT8_C( 3), INT8_C( 65), INT8_C( -87), INT8_C( -37), INT8_C( 5), INT8_C(-126), INT8_C( 14), INT8_C( -36), INT8_C( -37), INT8_C( 11), INT8_C( 94), INT8_C( 24), INT8_C( 8), INT8_C( -31), INT8_C( -38), INT8_C( -1), INT8_C( 48), INT8_C( 32), INT8_C( 88), INT8_C( -18), INT8_C( 123), INT8_C( 27), INT8_C( 111), INT8_C( 27), INT8_C( -3), INT8_C( 52), INT8_C( -31), INT8_C( 2), INT8_C( -47), INT8_C( 64)), UINT64_C( 361367503), simde_mm512_set_epi8(INT8_C( -20), INT8_C(-104), INT8_C( -27), INT8_C( 38), INT8_C( 31), INT8_C( -21), INT8_C( 79), INT8_C( -62), INT8_C( 36), INT8_C( 95), INT8_C( 42), INT8_C(-102), INT8_C( -80), INT8_C( -69), INT8_C( 107), INT8_C(-114), INT8_C( 76), INT8_C( 123), INT8_C(-126), INT8_C( 108), INT8_C( -55), INT8_C( 89), INT8_C( -46), INT8_C( 18), INT8_C( 117), INT8_C( 25), INT8_C(-120), INT8_C( 27), INT8_C( 34), INT8_C( 64), INT8_C( 71), INT8_C( 64), INT8_C( -13), INT8_C( -73), INT8_C( 112), INT8_C( 25), INT8_C( -18), INT8_C( -63), INT8_C( 109), INT8_C( 9), INT8_C( 14), INT8_C(-125), INT8_C( -89), INT8_C( 70), INT8_C( 10), INT8_C( 15), INT8_C( 120), INT8_C( -59), INT8_C( 55), INT8_C( 108), INT8_C( 41), INT8_C( -5), INT8_C( -91), INT8_C(-120), INT8_C( -46), INT8_C( 122), INT8_C( 116), INT8_C(-120), INT8_C( -67), INT8_C( -86), INT8_C( 48), INT8_C( 2), INT8_C( 37), INT8_C( -26)), simde_mm512_set_epi8(INT8_C( -70), INT8_C( 3), INT8_C( 118), INT8_C( 37), INT8_C( 104), INT8_C( 111), INT8_C( -17), INT8_C( 110), INT8_C( -58), INT8_C( 58), INT8_C( 102), INT8_C( 64), INT8_C( -67), INT8_C( -76), INT8_C( -30), INT8_C( 108), INT8_C( 79), INT8_C( 46), INT8_C( -40), INT8_C( 101), INT8_C( -13), INT8_C( -25), INT8_C( 60), INT8_C( 25), INT8_C( 32), INT8_C( -21), INT8_C( 114), INT8_C( -21), INT8_C( 71), INT8_C( -85), INT8_C( 34), INT8_C( 82), INT8_C(-114), INT8_C( -30), INT8_C( -58), INT8_C( 116), INT8_C( 58), INT8_C(-105), INT8_C( 117), INT8_C( 11), INT8_C( -91), INT8_C( 118), INT8_C( -50), INT8_C( -8), INT8_C( -22), INT8_C( 59), INT8_C( -29), INT8_C( -88), INT8_C( -82), INT8_C( -24), INT8_C( 18), INT8_C( 115), INT8_C( -15), INT8_C( 55), INT8_C( 78), INT8_C( 60), INT8_C( -8), INT8_C( -91), INT8_C( 126), INT8_C( 15), INT8_C( 23), INT8_C( 6), INT8_C( -21), INT8_C( 120)), simde_mm512_set_epi8(INT8_C( 27), INT8_C( 45), INT8_C( 71), INT8_C( -63), INT8_C( 96), INT8_C(-106), INT8_C( -43), INT8_C( 10), INT8_C( 104), INT8_C( -19), INT8_C(-110), INT8_C( 126), INT8_C( -52), INT8_C( -56), INT8_C( -96), INT8_C( -27), INT8_C(-125), INT8_C(-116), INT8_C( 25), INT8_C( 78), INT8_C( -76), INT8_C( -85), INT8_C( -23), INT8_C( -19), INT8_C(-106), INT8_C( 126), INT8_C( 19), INT8_C( -41), INT8_C( 40), INT8_C( 78), INT8_C( -69), INT8_C( 57), INT8_C( 73), INT8_C( -58), INT8_C( 3), INT8_C( -91), INT8_C( -87), INT8_C( 42), INT8_C( 5), INT8_C( -2), INT8_C( 105), INT8_C( -36), INT8_C( -37), INT8_C( 11), INT8_C( 32), INT8_C( 24), INT8_C(-107), INT8_C( -31), INT8_C( -38), INT8_C( -1), INT8_C( 48), INT8_C( 32), INT8_C( 88), INT8_C( 81), INT8_C(-124), INT8_C( 62), INT8_C( 124), INT8_C( -29), INT8_C( -3), INT8_C( 52), INT8_C( 25), INT8_C( -4), INT8_C( 58), INT8_C( 110)) }, { simde_mm512_set_epi8(INT8_C( 4), INT8_C( 97), INT8_C( 53), INT8_C( -46), INT8_C( 92), INT8_C(-100), INT8_C( 47), INT8_C( 107), INT8_C( -52), INT8_C( 68), INT8_C( 11), INT8_C( -16), INT8_C( -66), INT8_C( -79), INT8_C( -14), INT8_C( 27), INT8_C( 14), INT8_C( 125), INT8_C( 22), INT8_C( -82), INT8_C( 44), INT8_C( -12), INT8_C( 94), INT8_C( -30), INT8_C( 98), INT8_C( 125), INT8_C(-107), INT8_C( 37), INT8_C( -66), INT8_C( 90), INT8_C( 68), INT8_C( 10), INT8_C( -72), INT8_C( -10), INT8_C(-119), INT8_C( -9), INT8_C( 49), INT8_C(-107), INT8_C( 10), INT8_C( 47), INT8_C( 58), INT8_C(-125), INT8_C( 4), INT8_C( 68), INT8_C( -24), INT8_C( -12), INT8_C( 44), INT8_C(-128), INT8_C( -52), INT8_C( -61), INT8_C( -14), INT8_C( -38), INT8_C( -93), INT8_C( -34), INT8_C( 64), INT8_C( -67), INT8_C(-123), INT8_C( 123), INT8_C( -93), INT8_C( 41), INT8_C( 97), INT8_C( -8), INT8_C( -86), INT8_C( -16)), UINT64_C( 944667126), simde_mm512_set_epi8(INT8_C( -24), INT8_C( -47), INT8_C(-119), INT8_C( 5), INT8_C( 95), INT8_C( 82), INT8_C( -3), INT8_C( -62), INT8_C(-116), INT8_C( -98), INT8_C( -29), INT8_C( 77), INT8_C( -38), INT8_C(-118), INT8_C( -85), INT8_C( 121), INT8_C( -72), INT8_C(-111), INT8_C( 28), INT8_C( -18), INT8_C( 64), INT8_C(-126), INT8_C( 122), INT8_C( -54), INT8_C( 87), INT8_C( -22), INT8_C( 17), INT8_C( 50), INT8_C( -83), INT8_C( -39), INT8_C( 77), INT8_C( -13), INT8_C( 17), INT8_C( -66), INT8_C(-128), INT8_C( 77), INT8_C( 107), INT8_C( 47), INT8_C( -68), INT8_C( -44), INT8_C( -30), INT8_C( -22), INT8_C( 14), INT8_C( 26), INT8_C( 59), INT8_C( 103), INT8_C( -54), INT8_C( -39), INT8_C( 16), INT8_C( 5), INT8_C( 18), INT8_C(-104), INT8_C(-119), INT8_C( -46), INT8_C( -92), INT8_C( 37), INT8_C( -84), INT8_C( 2), INT8_C( -49), INT8_C( 99), INT8_C( -79), INT8_C( 48), INT8_C(-103), INT8_C( 3)), simde_mm512_set_epi8(INT8_C( -56), INT8_C( -56), INT8_C( 57), INT8_C( -25), INT8_C( -3), INT8_C( 99), INT8_C( -6), INT8_C( 31), INT8_C( -96), INT8_C( 49), INT8_C( 110), INT8_C( -10), INT8_C( -82), INT8_C( 32), INT8_C( -27), INT8_C( 112), INT8_C( 84), INT8_C( 37), INT8_C( -62), INT8_C( 38), INT8_C( -53), INT8_C( -97), INT8_C( 76), INT8_C( 13), INT8_C(-124), INT8_C(-120), INT8_C( -86), INT8_C( 98), INT8_C( 96), INT8_C( 4), INT8_C( 4), INT8_C( 94), INT8_C( -41), INT8_C( -81), INT8_C( -40), INT8_C( -28), INT8_C( -23), INT8_C( -59), INT8_C( -15), INT8_C( -40), INT8_C( 113), INT8_C( 116), INT8_C( 41), INT8_C( -96), INT8_C( -83), INT8_C( 4), INT8_C( 93), INT8_C( 28), INT8_C( 114), INT8_C( 29), INT8_C( -56), INT8_C( -61), INT8_C(-124), INT8_C(-107), INT8_C( -23), INT8_C( -89), INT8_C( 38), INT8_C( -97), INT8_C( 109), INT8_C( 53), INT8_C(-117), INT8_C( 76), INT8_C( -82), INT8_C( -65)), simde_mm512_set_epi8(INT8_C( 4), INT8_C( 97), INT8_C( 53), INT8_C( -46), INT8_C( 92), INT8_C(-100), INT8_C( 47), INT8_C( 107), INT8_C( -52), INT8_C( 68), INT8_C( 11), INT8_C( -16), INT8_C( -66), INT8_C( -79), INT8_C( -14), INT8_C( 27), INT8_C( 14), INT8_C( 125), INT8_C( 22), INT8_C( -82), INT8_C( 44), INT8_C( -12), INT8_C( 94), INT8_C( -30), INT8_C( 98), INT8_C( 125), INT8_C(-107), INT8_C( 37), INT8_C( -66), INT8_C( 90), INT8_C( 68), INT8_C( 10), INT8_C( -72), INT8_C( -10), INT8_C( -88), INT8_C( 105), INT8_C(-126), INT8_C(-107), INT8_C( 10), INT8_C( 47), INT8_C( 58), INT8_C( 118), INT8_C( 4), INT8_C( 68), INT8_C(-114), INT8_C( 99), INT8_C( 109), INT8_C(-128), INT8_C( -52), INT8_C( -24), INT8_C( 74), INT8_C( -43), INT8_C( 5), INT8_C( -34), INT8_C( 64), INT8_C( 126), INT8_C(-122), INT8_C( 99), INT8_C( 98), INT8_C( 46), INT8_C( 97), INT8_C( -28), INT8_C( -21), INT8_C( -16)) }, { simde_mm512_set_epi8(INT8_C( -50), INT8_C( 0), INT8_C( 80), INT8_C(-123), INT8_C( 19), INT8_C( 112), INT8_C( 30), INT8_C( 95), INT8_C( 58), INT8_C( 21), INT8_C( 13), INT8_C( 32), INT8_C( 113), INT8_C( 126), INT8_C( 27), INT8_C( 113), INT8_C( 121), INT8_C( 97), INT8_C( 51), INT8_C( -16), INT8_C( -77), INT8_C( 84), INT8_C( 16), INT8_C(-112), INT8_C( -40), INT8_C( -69), INT8_C(-116), INT8_C( -97), INT8_C(-120), INT8_C( 102), INT8_C( -82), INT8_C( -42), INT8_C( 43), INT8_C( -70), INT8_C( 46), INT8_C( 17), INT8_C( 108), INT8_C( -47), INT8_C( 53), INT8_C( -84), INT8_C( 19), INT8_C( -37), INT8_C( -32), INT8_C( -59), INT8_C( 33), INT8_C( 110), INT8_C( 17), INT8_C( 67), INT8_C( 51), INT8_C( -19), INT8_C( 91), INT8_C( 26), INT8_C( 33), INT8_C( -43), INT8_C( -14), INT8_C( -56), INT8_C( 112), INT8_C( -72), INT8_C( 96), INT8_C( -62), INT8_C( -21), INT8_C( 96), INT8_C( -25), INT8_C( 104)), UINT64_C( 1662672283), simde_mm512_set_epi8(INT8_C( 55), INT8_C( 43), INT8_C(-128), INT8_C( 23), INT8_C( -59), INT8_C( -21), INT8_C( -11), INT8_C( -65), INT8_C(-101), INT8_C( -89), INT8_C( -88), INT8_C( -71), INT8_C( -70), INT8_C( 37), INT8_C( 122), INT8_C( 74), INT8_C( 109), INT8_C( -13), INT8_C( -13), INT8_C( 72), INT8_C( -1), INT8_C( -35), INT8_C( 80), INT8_C( -20), INT8_C( 14), INT8_C(-104), INT8_C( -76), INT8_C(-122), INT8_C( -35), INT8_C( -33), INT8_C( 63), INT8_C( 74), INT8_C( 98), INT8_C( 54), INT8_C( -12), INT8_C( -1), INT8_C( -30), INT8_C( 96), INT8_C( 95), INT8_C( 58), INT8_C( -63), INT8_C( -6), INT8_C(-113), INT8_C( 55), INT8_C(-128), INT8_C( -43), INT8_C( -90), INT8_C( -63), INT8_C( 3), INT8_C( -6), INT8_C( -45), INT8_C( -75), INT8_C( -83), INT8_C(-118), INT8_C( 74), INT8_C( 35), INT8_C( 38), INT8_C( 4), INT8_C( 35), INT8_C( 15), INT8_C( -42), INT8_C( 71), INT8_C( -1), INT8_C( 27)), simde_mm512_set_epi8(INT8_C( 32), INT8_C( 122), INT8_C( 89), INT8_C( 21), INT8_C( -83), INT8_C( -46), INT8_C( -78), INT8_C( 71), INT8_C( -35), INT8_C( 54), INT8_C( -65), INT8_C(-111), INT8_C( 45), INT8_C( -5), INT8_C( 102), INT8_C( 32), INT8_C(-110), INT8_C( 116), INT8_C( -61), INT8_C( 36), INT8_C( -25), INT8_C( 106), INT8_C( -63), INT8_C( 23), INT8_C( -59), INT8_C( 25), INT8_C(-108), INT8_C( -84), INT8_C( -23), INT8_C( 118), INT8_C( -35), INT8_C( 92), INT8_C( -29), INT8_C(-121), INT8_C( -87), INT8_C( 93), INT8_C( 6), INT8_C( -12), INT8_C(-123), INT8_C( 42), INT8_C( 121), INT8_C( 3), INT8_C( 69), INT8_C( 75), INT8_C( 68), INT8_C( -1), INT8_C( -25), INT8_C( 83), INT8_C( -4), INT8_C( -73), INT8_C( -63), INT8_C( 12), INT8_C( -93), INT8_C( -22), INT8_C( 40), INT8_C( -24), INT8_C( -60), INT8_C( 99), INT8_C( 122), INT8_C( 49), INT8_C( -46), INT8_C( 127), INT8_C( 18), INT8_C( 124)), simde_mm512_set_epi8(INT8_C( -50), INT8_C( 0), INT8_C( 80), INT8_C(-123), INT8_C( 19), INT8_C( 112), INT8_C( 30), INT8_C( 95), INT8_C( 58), INT8_C( 21), INT8_C( 13), INT8_C( 32), INT8_C( 113), INT8_C( 126), INT8_C( 27), INT8_C( 113), INT8_C( 121), INT8_C( 97), INT8_C( 51), INT8_C( -16), INT8_C( -77), INT8_C( 84), INT8_C( 16), INT8_C(-112), INT8_C( -40), INT8_C( -69), INT8_C(-116), INT8_C( -97), INT8_C(-120), INT8_C( 102), INT8_C( -82), INT8_C( -42), INT8_C( 43), INT8_C( -81), INT8_C( 75), INT8_C( 17), INT8_C( 108), INT8_C( -47), INT8_C( -38), INT8_C( 16), INT8_C( 19), INT8_C( -37), INT8_C( -32), INT8_C( -20), INT8_C( 60), INT8_C( 110), INT8_C( -65), INT8_C( 67), INT8_C( 51), INT8_C( 67), INT8_C( 91), INT8_C( -87), INT8_C( 10), INT8_C( -96), INT8_C( -14), INT8_C( 59), INT8_C( 98), INT8_C( -72), INT8_C( 96), INT8_C( -34), INT8_C( 4), INT8_C( 96), INT8_C( -19), INT8_C( -97)) }, { simde_mm512_set_epi8(INT8_C( -82), INT8_C( 17), INT8_C( 105), INT8_C( 8), INT8_C( -41), INT8_C( 122), INT8_C( -11), INT8_C( -52), INT8_C( -81), INT8_C( -30), INT8_C( 109), INT8_C( 119), INT8_C( -78), INT8_C(-123), INT8_C( 5), INT8_C( -23), INT8_C( 44), INT8_C( -23), INT8_C(-122), INT8_C(-101), INT8_C( -30), INT8_C( 103), INT8_C( 30), INT8_C( -6), INT8_C( 113), INT8_C( -64), INT8_C( -3), INT8_C(-100), INT8_C( 72), INT8_C( -30), INT8_C( 59), INT8_C( -7), INT8_C(-101), INT8_C( 48), INT8_C( -62), INT8_C( 5), INT8_C( -52), INT8_C( 72), INT8_C( 56), INT8_C( 6), INT8_C( 86), INT8_C( -78), INT8_C( -43), INT8_C( 91), INT8_C( -63), INT8_C( -91), INT8_C(-105), INT8_C( -98), INT8_C( 39), INT8_C( 5), INT8_C( 77), INT8_C( 91), INT8_C( -82), INT8_C( 20), INT8_C( 41), INT8_C( 62), INT8_C( 27), INT8_C( 82), INT8_C( -39), INT8_C( 57), INT8_C(-116), INT8_C( -85), INT8_C(-107), INT8_C( 31)), UINT64_C( 782232724), simde_mm512_set_epi8(INT8_C( 11), INT8_C( 49), INT8_C( -30), INT8_C(-117), INT8_C( 85), INT8_C( 19), INT8_C( 44), INT8_C(-110), INT8_C( 61), INT8_C( -27), INT8_C( 26), INT8_C( -12), INT8_C( 110), INT8_C( 11), INT8_C( 45), INT8_C( -32), INT8_C( -1), INT8_C( 86), INT8_C( 125), INT8_C( 95), INT8_C( -41), INT8_C( -73), INT8_C( -6), INT8_C( 122), INT8_C( 65), INT8_C( -38), INT8_C(-116), INT8_C( 84), INT8_C(-121), INT8_C( -15), INT8_C( 41), INT8_C(-102), INT8_C( -31), INT8_C( -83), INT8_C( -68), INT8_C( 89), INT8_C( 27), INT8_C(-107), INT8_C( -85), INT8_C( 74), INT8_C( 95), INT8_C( -86), INT8_C( 94), INT8_C( -13), INT8_C( -84), INT8_C( 38), INT8_C( 116), INT8_C(-101), INT8_C( 72), INT8_C( 32), INT8_C( -98), INT8_C( 48), INT8_C( -94), INT8_C( -55), INT8_C( -17), INT8_C( 28), INT8_C( 42), INT8_C( 70), INT8_C( 89), INT8_C(-115), INT8_C( -86), INT8_C( 126), INT8_C( -92), INT8_C( 91)), simde_mm512_set_epi8(INT8_C( -46), INT8_C( -24), INT8_C( -24), INT8_C( -26), INT8_C( 89), INT8_C( 108), INT8_C( 49), INT8_C( 123), INT8_C( -86), INT8_C( -61), INT8_C( -22), INT8_C( -47), INT8_C( 21), INT8_C( 76), INT8_C( 6), INT8_C( -21), INT8_C( -19), INT8_C( 38), INT8_C(-116), INT8_C( -22), INT8_C( -75), INT8_C( 54), INT8_C( -81), INT8_C( 9), INT8_C( 94), INT8_C( -15), INT8_C( 26), INT8_C(-110), INT8_C( 18), INT8_C( -49), INT8_C( -21), INT8_C( 70), INT8_C( 50), INT8_C( 20), INT8_C( -59), INT8_C( 63), INT8_C( -20), INT8_C( -92), INT8_C( -44), INT8_C( 37), INT8_C(-125), INT8_C( 4), INT8_C( 53), INT8_C( -49), INT8_C( -10), INT8_C( 11), INT8_C( 91), INT8_C( -86), INT8_C( -34), INT8_C(-108), INT8_C( -80), INT8_C( 122), INT8_C( 31), INT8_C( 31), INT8_C( -29), INT8_C( 70), INT8_C( 28), INT8_C( 33), INT8_C( 109), INT8_C( 55), INT8_C( -79), INT8_C( 95), INT8_C( 100), INT8_C( -33)), simde_mm512_set_epi8(INT8_C( -82), INT8_C( 17), INT8_C( 105), INT8_C( 8), INT8_C( -41), INT8_C( 122), INT8_C( -11), INT8_C( -52), INT8_C( -81), INT8_C( -30), INT8_C( 109), INT8_C( 119), INT8_C( -78), INT8_C(-123), INT8_C( 5), INT8_C( -23), INT8_C( 44), INT8_C( -23), INT8_C(-122), INT8_C(-101), INT8_C( -30), INT8_C( 103), INT8_C( 30), INT8_C( -6), INT8_C( 113), INT8_C( -64), INT8_C( -3), INT8_C(-100), INT8_C( 72), INT8_C( -30), INT8_C( 59), INT8_C( -7), INT8_C(-101), INT8_C( 48), INT8_C( -9), INT8_C( 5), INT8_C( 47), INT8_C( -15), INT8_C( -41), INT8_C( 6), INT8_C( -36), INT8_C( -78), INT8_C( -43), INT8_C( 36), INT8_C( -74), INT8_C( 27), INT8_C( 25), INT8_C( -15), INT8_C( 106), INT8_C(-116), INT8_C( -18), INT8_C( 91), INT8_C(-125), INT8_C( -86), INT8_C( 41), INT8_C( 62), INT8_C( 14), INT8_C( 82), INT8_C( -39), INT8_C( 86), INT8_C(-116), INT8_C( 31), INT8_C(-107), INT8_C( 31)) }, { simde_mm512_set_epi8(INT8_C( 82), INT8_C( -55), INT8_C( 13), INT8_C(-104), INT8_C( 62), INT8_C( 20), INT8_C( -36), INT8_C( 92), INT8_C( -73), INT8_C( -79), INT8_C( -7), INT8_C( -22), INT8_C( -50), INT8_C(-119), INT8_C( -83), INT8_C( -71), INT8_C( 125), INT8_C( 29), INT8_C( -61), INT8_C(-111), INT8_C( -9), INT8_C( 67), INT8_C( -39), INT8_C( -17), INT8_C( 23), INT8_C( -11), INT8_C(-122), INT8_C( -24), INT8_C( 37), INT8_C(-122), INT8_C( -16), INT8_C( -40), INT8_C( -34), INT8_C( -17), INT8_C( 100), INT8_C( 120), INT8_C( -51), INT8_C( 8), INT8_C( 82), INT8_C( 19), INT8_C( -50), INT8_C( -24), INT8_C( -20), INT8_C( -32), INT8_C( 74), INT8_C( -84), INT8_C( 9), INT8_C( 14), INT8_C(-102), INT8_C( -2), INT8_C( 106), INT8_C( 41), INT8_C( 98), INT8_C( -87), INT8_C(-124), INT8_C( -3), INT8_C( 80), INT8_C( 110), INT8_C( -32), INT8_C( 20), INT8_C( -15), INT8_C( 65), INT8_C( -54), INT8_C( -49)), UINT64_C( 1883474426), simde_mm512_set_epi8(INT8_C( -36), INT8_C(-125), INT8_C( -71), INT8_C(-101), INT8_C( -95), INT8_C( -1), INT8_C( 65), INT8_C( 67), INT8_C( 4), INT8_C( 126), INT8_C( -9), INT8_C( 50), INT8_C( 46), INT8_C( 17), INT8_C( 12), INT8_C( 7), INT8_C( 31), INT8_C( -83), INT8_C( 63), INT8_C( 21), INT8_C(-105), INT8_C( 56), INT8_C( 6), INT8_C( 88), INT8_C( -4), INT8_C( -51), INT8_C( -16), INT8_C( -27), INT8_C( -26), INT8_C( 50), INT8_C( 3), INT8_C( -65), INT8_C( -3), INT8_C( -86), INT8_C( 39), INT8_C( 48), INT8_C( 65), INT8_C( 36), INT8_C( -65), INT8_C( -82), INT8_C(-107), INT8_C( 14), INT8_C( 110), INT8_C( 56), INT8_C( 111), INT8_C( -32), INT8_C( 109), INT8_C( -95), INT8_C( 69), INT8_C(-111), INT8_C( -7), INT8_C( 9), INT8_C( 116), INT8_C( 77), INT8_C( 122), INT8_C( 26), INT8_C( 9), INT8_C( -79), INT8_C(-127), INT8_C( -5), INT8_C( 117), INT8_C( 5), INT8_C( 80), INT8_C( -85)), simde_mm512_set_epi8(INT8_C( 82), INT8_C(-125), INT8_C( -33), INT8_C( 83), INT8_C( -98), INT8_C( 50), INT8_C( -27), INT8_C( -16), INT8_C( -63), INT8_C(-111), INT8_C( -65), INT8_C( 3), INT8_C( -15), INT8_C( 37), INT8_C( 46), INT8_C( 58), INT8_C( -41), INT8_C( 72), INT8_C( 108), INT8_C(-124), INT8_C( 9), INT8_C( 40), INT8_C( 115), INT8_C( 12), INT8_C( 1), INT8_C( 41), INT8_C( -71), INT8_C( 87), INT8_C( -55), INT8_C( 52), INT8_C( -97), INT8_C( 49), INT8_C( 32), INT8_C(-115), INT8_C( 71), INT8_C( 64), INT8_C( -61), INT8_C( 43), INT8_C( -42), INT8_C( 57), INT8_C( -56), INT8_C( 113), INT8_C( 60), INT8_C( 75), INT8_C( 7), INT8_C( -47), INT8_C( 4), INT8_C( 115), INT8_C( 67), INT8_C( 44), INT8_C( -1), INT8_C( -85), INT8_C( -95), INT8_C( 108), INT8_C( 37), INT8_C( -99), INT8_C( -88), INT8_C( -11), INT8_C( 47), INT8_C( -69), INT8_C(-123), INT8_C( 17), INT8_C( -30), INT8_C( 36)), simde_mm512_set_epi8(INT8_C( 82), INT8_C( -55), INT8_C( 13), INT8_C(-104), INT8_C( 62), INT8_C( 20), INT8_C( -36), INT8_C( 92), INT8_C( -73), INT8_C( -79), INT8_C( -7), INT8_C( -22), INT8_C( -50), INT8_C(-119), INT8_C( -83), INT8_C( -71), INT8_C( 125), INT8_C( 29), INT8_C( -61), INT8_C(-111), INT8_C( -9), INT8_C( 67), INT8_C( -39), INT8_C( -17), INT8_C( 23), INT8_C( -11), INT8_C(-122), INT8_C( -24), INT8_C( 37), INT8_C(-122), INT8_C( -16), INT8_C( -40), INT8_C( -34), INT8_C( 29), INT8_C( -32), INT8_C( -16), INT8_C( -51), INT8_C( 8), INT8_C( 82), INT8_C( 19), INT8_C( -50), INT8_C( -99), INT8_C( -20), INT8_C( -32), INT8_C( 74), INT8_C( -84), INT8_C( 105), INT8_C( 46), INT8_C( 2), INT8_C( -2), INT8_C( 106), INT8_C( 41), INT8_C( -45), INT8_C( -87), INT8_C(-124), INT8_C( 125), INT8_C( 97), INT8_C( -68), INT8_C( 82), INT8_C( 64), INT8_C( -16), INT8_C( 65), INT8_C( 110), INT8_C( -49)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_sub_epi8(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_sub_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask64 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { UINT64_C( 3290745653), simde_mm512_set_epi8(INT8_C( 68), INT8_C( -18), INT8_C(-120), INT8_C( -91), INT8_C( 33), INT8_C( -44), INT8_C( 127), INT8_C(-128), INT8_C( 121), INT8_C( -8), INT8_C(-121), INT8_C( -19), INT8_C( 19), INT8_C( -3), INT8_C( 10), INT8_C( -37), INT8_C( 96), INT8_C( 15), INT8_C( -45), INT8_C( -44), INT8_C( -83), INT8_C( -37), INT8_C( 8), INT8_C( 111), INT8_C( -30), INT8_C( -99), INT8_C( 116), INT8_C( 112), INT8_C( 67), INT8_C(-123), INT8_C( 59), INT8_C( -62), INT8_C( 33), INT8_C( 51), INT8_C( -16), INT8_C( -35), INT8_C( -53), INT8_C( 108), INT8_C( -37), INT8_C( -15), INT8_C( 26), INT8_C( 83), INT8_C( -47), INT8_C( -23), INT8_C( 6), INT8_C( 52), INT8_C( -19), INT8_C( 108), INT8_C( -33), INT8_C( 120), INT8_C( 55), INT8_C(-128), INT8_C( -46), INT8_C( 117), INT8_C( 41), INT8_C( -54), INT8_C( -99), INT8_C( -39), INT8_C( 117), INT8_C( 57), INT8_C( 78), INT8_C(-110), INT8_C( -8), INT8_C(-114)), simde_mm512_set_epi8(INT8_C( 115), INT8_C( 65), INT8_C(-123), INT8_C( 116), INT8_C( -46), INT8_C( -68), INT8_C( -8), INT8_C( 96), INT8_C( -90), INT8_C( -14), INT8_C( 27), INT8_C( 33), INT8_C( 21), INT8_C( 58), INT8_C( -12), INT8_C( -76), INT8_C( 70), INT8_C( -70), INT8_C( -4), INT8_C( 64), INT8_C( 35), INT8_C( -32), INT8_C(-115), INT8_C( 109), INT8_C( -69), INT8_C( -61), INT8_C(-126), INT8_C( 45), INT8_C( 93), INT8_C( 100), INT8_C( -53), INT8_C( 105), INT8_C( -51), INT8_C( 43), INT8_C( -96), INT8_C( -95), INT8_C( 86), INT8_C( -66), INT8_C( -47), INT8_C( 123), INT8_C( 63), INT8_C( 16), INT8_C( -66), INT8_C( 12), INT8_C( 115), INT8_C( -39), INT8_C( 10), INT8_C( 108), INT8_C(-120), INT8_C( 110), INT8_C( -25), INT8_C( 7), INT8_C( 105), INT8_C( -93), INT8_C( 68), INT8_C( 3), INT8_C( 113), INT8_C( -50), INT8_C( -34), INT8_C( 22), INT8_C( -61), INT8_C( 75), INT8_C( 28), INT8_C( 116)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 84), INT8_C( 8), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -82), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 19), INT8_C( 0), INT8_C( 0), INT8_C( 91), INT8_C( 0), INT8_C( 0), INT8_C( 87), INT8_C( 10), INT8_C( 0), INT8_C( 0), INT8_C( 105), INT8_C( 0), INT8_C( -27), INT8_C( -57), INT8_C( 0), INT8_C( 0), INT8_C(-105), INT8_C( 35), INT8_C( 0), INT8_C( 71), INT8_C( 0), INT8_C( 26)) }, { UINT64_C( 1235205951), simde_mm512_set_epi8(INT8_C(-120), INT8_C( -25), INT8_C( 111), INT8_C(-128), INT8_C( -4), INT8_C( 58), INT8_C(-115), INT8_C( 4), INT8_C( 26), INT8_C( -6), INT8_C( -92), INT8_C( 7), INT8_C(-105), INT8_C( -92), INT8_C( 38), INT8_C( -63), INT8_C( 77), INT8_C( 86), INT8_C( 113), INT8_C( -48), INT8_C( 108), INT8_C( -92), INT8_C( 69), INT8_C( 74), INT8_C( 67), INT8_C( 96), INT8_C( -34), INT8_C( 78), INT8_C( 124), INT8_C( 9), INT8_C( -1), INT8_C( -86), INT8_C( -35), INT8_C( 26), INT8_C( 67), INT8_C( 46), INT8_C( 75), INT8_C(-119), INT8_C( 68), INT8_C( 31), INT8_C( -52), INT8_C(-102), INT8_C( -4), INT8_C( 118), INT8_C( 0), INT8_C( -44), INT8_C( 123), INT8_C( -73), INT8_C( 84), INT8_C( 30), INT8_C( -8), INT8_C( 64), INT8_C( -20), INT8_C( -79), INT8_C( -85), INT8_C( -23), INT8_C( -34), INT8_C( -4), INT8_C( -85), INT8_C( 107), INT8_C( -6), INT8_C( 16), INT8_C( -66), INT8_C(-113)), simde_mm512_set_epi8(INT8_C( -19), INT8_C( 117), INT8_C( 121), INT8_C( 67), INT8_C( -91), INT8_C( -91), INT8_C( 98), INT8_C( 106), INT8_C( 44), INT8_C( 68), INT8_C( 96), INT8_C( 88), INT8_C( 1), INT8_C( 68), INT8_C( 46), INT8_C( 19), INT8_C( 31), INT8_C( 85), INT8_C( 35), INT8_C( 68), INT8_C( -79), INT8_C( 41), INT8_C( 28), INT8_C( 92), INT8_C( -26), INT8_C( 20), INT8_C( -16), INT8_C( -7), INT8_C( 41), INT8_C( 71), INT8_C( 88), INT8_C( 39), INT8_C( -42), INT8_C( 76), INT8_C( 40), INT8_C( 108), INT8_C( -87), INT8_C(-126), INT8_C( 42), INT8_C( 58), INT8_C( 2), INT8_C( -23), INT8_C( 5), INT8_C(-116), INT8_C( 34), INT8_C( 66), INT8_C( 28), INT8_C( 86), INT8_C( 50), INT8_C( -67), INT8_C( 20), INT8_C( 73), INT8_C( 27), INT8_C( -29), INT8_C( 84), INT8_C( 112), INT8_C( 104), INT8_C( 53), INT8_C( -89), INT8_C(-113), INT8_C( -4), INT8_C( 94), INT8_C( 75), INT8_C( 21)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -50), INT8_C( 0), INT8_C( 0), INT8_C( -94), INT8_C( 0), INT8_C( 0), INT8_C( -27), INT8_C( -54), INT8_C( 0), INT8_C( 0), INT8_C( -22), INT8_C( -34), INT8_C(-110), INT8_C( 95), INT8_C( 97), INT8_C( 34), INT8_C( 0), INT8_C( -28), INT8_C( -9), INT8_C( -47), INT8_C( -50), INT8_C( 87), INT8_C( 121), INT8_C( 0), INT8_C( 0), INT8_C( 4), INT8_C( -36), INT8_C( -2), INT8_C( -78), INT8_C( 115), INT8_C( 122)) }, { UINT64_C( 3694669449), simde_mm512_set_epi8(INT8_C( 0), INT8_C( -45), INT8_C( 59), INT8_C( 51), INT8_C( -35), INT8_C( 70), INT8_C( 73), INT8_C( 33), INT8_C( -70), INT8_C( -58), INT8_C( 69), INT8_C( -7), INT8_C( 115), INT8_C( -10), INT8_C( -39), INT8_C( 78), INT8_C( 56), INT8_C( 116), INT8_C( 104), INT8_C( -92), INT8_C( 5), INT8_C( -47), INT8_C( 27), INT8_C( 94), INT8_C(-126), INT8_C( 88), INT8_C( 80), INT8_C( 112), INT8_C( 21), INT8_C( 62), INT8_C( 86), INT8_C(-103), INT8_C( 66), INT8_C( -9), INT8_C( -26), INT8_C( 47), INT8_C( -50), INT8_C(-118), INT8_C( 115), INT8_C( 49), INT8_C( 42), INT8_C( 6), INT8_C( 92), INT8_C( 2), INT8_C( 63), INT8_C( -6), INT8_C( -32), INT8_C( 15), INT8_C( 66), INT8_C( 82), INT8_C( -9), INT8_C( -79), INT8_C(-123), INT8_C( 52), INT8_C( -90), INT8_C(-111), INT8_C( 62), INT8_C( -43), INT8_C( -50), INT8_C( 62), INT8_C( 4), INT8_C( -92), INT8_C( 86), INT8_C( -32)), simde_mm512_set_epi8(INT8_C( 23), INT8_C( -12), INT8_C( 67), INT8_C( -6), INT8_C( -37), INT8_C( 92), INT8_C( -35), INT8_C( 99), INT8_C( 18), INT8_C( 68), INT8_C( -9), INT8_C( 64), INT8_C(-111), INT8_C( 37), INT8_C( -82), INT8_C( -95), INT8_C( 54), INT8_C( 75), INT8_C( -77), INT8_C( -34), INT8_C( 52), INT8_C( -80), INT8_C( -94), INT8_C( 90), INT8_C( 0), INT8_C( -8), INT8_C( 123), INT8_C(-111), INT8_C( 16), INT8_C( 125), INT8_C( -51), INT8_C( 99), INT8_C( -22), INT8_C( 121), INT8_C( 63), INT8_C( -55), INT8_C( 117), INT8_C( 109), INT8_C(-126), INT8_C(-111), INT8_C( 47), INT8_C(-127), INT8_C( 109), INT8_C( -9), INT8_C( -42), INT8_C( 36), INT8_C( -32), INT8_C( 115), INT8_C( -89), INT8_C( 7), INT8_C( 90), INT8_C( 46), INT8_C( -83), INT8_C( -35), INT8_C( 30), INT8_C( -19), INT8_C( -99), INT8_C( -56), INT8_C( -70), INT8_C( 73), INT8_C( -61), INT8_C( 27), INT8_C( 117), INT8_C( 47)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 88), INT8_C( 126), INT8_C( 0), INT8_C( 102), INT8_C( 89), INT8_C( 29), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -17), INT8_C( 11), INT8_C( 105), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -99), INT8_C( 0), INT8_C( -40), INT8_C( 87), INT8_C(-120), INT8_C( 0), INT8_C( -95), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 65), INT8_C( 0), INT8_C( 0), INT8_C( -79)) }, { UINT64_C( 2480122489), simde_mm512_set_epi8(INT8_C( 4), INT8_C( -62), INT8_C( -44), INT8_C( 49), INT8_C( 83), INT8_C( 115), INT8_C( 5), INT8_C( 57), INT8_C( -76), INT8_C( -80), INT8_C( 40), INT8_C( 60), INT8_C( 65), INT8_C( -98), INT8_C( -74), INT8_C( 17), INT8_C( -35), INT8_C( 32), INT8_C( -43), INT8_C( 108), INT8_C( 83), INT8_C( -59), INT8_C( -4), INT8_C( 125), INT8_C( -31), INT8_C( 118), INT8_C( -25), INT8_C( -91), INT8_C( 50), INT8_C( -74), INT8_C( 78), INT8_C( 95), INT8_C( -84), INT8_C( -63), INT8_C( 87), INT8_C(-108), INT8_C( 28), INT8_C( -70), INT8_C( 77), INT8_C(-113), INT8_C( -20), INT8_C( 50), INT8_C( 95), INT8_C(-108), INT8_C( 105), INT8_C( 114), INT8_C(-109), INT8_C( 19), INT8_C( -79), INT8_C( 106), INT8_C( 61), INT8_C( -12), INT8_C( 126), INT8_C(-117), INT8_C( 126), INT8_C(-125), INT8_C( -93), INT8_C( 69), INT8_C( 104), INT8_C( 119), INT8_C( 63), INT8_C( 95), INT8_C(-106), INT8_C( -66)), simde_mm512_set_epi8(INT8_C( -51), INT8_C( 11), INT8_C( -44), INT8_C( 33), INT8_C( 29), INT8_C( 8), INT8_C( 5), INT8_C( 70), INT8_C( 13), INT8_C( 104), INT8_C( 50), INT8_C( 43), INT8_C( 82), INT8_C( -5), INT8_C( -23), INT8_C( -47), INT8_C( 99), INT8_C(-116), INT8_C( 118), INT8_C( 73), INT8_C( -10), INT8_C( -88), INT8_C( -42), INT8_C( -58), INT8_C( -49), INT8_C( 65), INT8_C( -18), INT8_C( 54), INT8_C( -68), INT8_C( 1), INT8_C( -7), INT8_C( -96), INT8_C( 4), INT8_C( 115), INT8_C( 42), INT8_C(-106), INT8_C( 31), INT8_C( 94), INT8_C( -71), INT8_C( -41), INT8_C( 33), INT8_C(-106), INT8_C( -65), INT8_C(-107), INT8_C( 71), INT8_C( -10), INT8_C( -21), INT8_C(-128), INT8_C( -23), INT8_C( 20), INT8_C( 2), INT8_C( 96), INT8_C(-128), INT8_C( -51), INT8_C( -38), INT8_C( 47), INT8_C( -56), INT8_C( 123), INT8_C( -20), INT8_C( -50), INT8_C( -2), INT8_C( 40), INT8_C( 24), INT8_C( -98)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -88), INT8_C( 0), INT8_C( 0), INT8_C( -2), INT8_C( 0), INT8_C( 0), INT8_C(-108), INT8_C( -72), INT8_C( -53), INT8_C(-100), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -88), INT8_C(-109), INT8_C( -56), INT8_C( 0), INT8_C( 59), INT8_C( 0), INT8_C( -2), INT8_C( 0), INT8_C( -92), INT8_C( 0), INT8_C( 0), INT8_C( -54), INT8_C( 124), INT8_C( -87), INT8_C( 65), INT8_C( 0), INT8_C( 0), INT8_C( 32)) }, { UINT64_C( 2822554960), simde_mm512_set_epi8(INT8_C( -38), INT8_C( -1), INT8_C( 48), INT8_C( 32), INT8_C( 88), INT8_C( -18), INT8_C( 123), INT8_C( 27), INT8_C( 111), INT8_C( 27), INT8_C( -3), INT8_C( 52), INT8_C( -31), INT8_C( 2), INT8_C( -47), INT8_C( 64), INT8_C( 76), INT8_C( -52), INT8_C( -96), INT8_C( -6), INT8_C(-119), INT8_C( -87), INT8_C( 102), INT8_C( 5), INT8_C( 24), INT8_C( -44), INT8_C( 110), INT8_C(-113), INT8_C(-116), INT8_C( -3), INT8_C( 62), INT8_C( -87), INT8_C( 7), INT8_C( -54), INT8_C( -57), INT8_C( -66), INT8_C( 42), INT8_C( -82), INT8_C( 46), INT8_C( -16), INT8_C( 91), INT8_C( -73), INT8_C( -20), INT8_C( -77), INT8_C( -11), INT8_C( 25), INT8_C( 12), INT8_C( 76), INT8_C( -58), INT8_C( 3), INT8_C(-125), INT8_C( -36), INT8_C( 18), INT8_C( -40), INT8_C( 111), INT8_C( 107), INT8_C( 88), INT8_C( 48), INT8_C( 113), INT8_C( -90), INT8_C(-117), INT8_C( 116), INT8_C( 46), INT8_C( -70)), simde_mm512_set_epi8(INT8_C( 116), INT8_C(-120), INT8_C( -67), INT8_C( -86), INT8_C( 48), INT8_C( 2), INT8_C( 37), INT8_C( -26), INT8_C( -55), INT8_C( 66), INT8_C( 80), INT8_C( -7), INT8_C( 21), INT8_C(-118), INT8_C( 7), INT8_C( -49), INT8_C( 27), INT8_C( 45), INT8_C( 71), INT8_C( -63), INT8_C( 96), INT8_C(-106), INT8_C( -43), INT8_C( 10), INT8_C( 104), INT8_C( -19), INT8_C(-110), INT8_C( 126), INT8_C( -52), INT8_C( -56), INT8_C( -96), INT8_C( -27), INT8_C(-125), INT8_C(-116), INT8_C( 25), INT8_C( 78), INT8_C( -76), INT8_C( -85), INT8_C( -23), INT8_C( -19), INT8_C(-106), INT8_C( 126), INT8_C( 19), INT8_C( -41), INT8_C( 40), INT8_C( 78), INT8_C( -69), INT8_C( 57), INT8_C( 73), INT8_C( -58), INT8_C( 3), INT8_C( 65), INT8_C( -87), INT8_C( -37), INT8_C( 5), INT8_C(-126), INT8_C( 14), INT8_C( -36), INT8_C( -37), INT8_C( 11), INT8_C( 94), INT8_C( 24), INT8_C( 8), INT8_C( -31)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-124), INT8_C( 0), INT8_C( -82), INT8_C( 0), INT8_C( 118), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -39), INT8_C( -36), INT8_C( -51), INT8_C( -53), INT8_C( 0), INT8_C( 0), INT8_C( 125), INT8_C( 61), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -3), INT8_C( 0), INT8_C( -23), INT8_C( 0), INT8_C( 84), INT8_C( 0), INT8_C(-101), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { UINT64_C( 2777207418), simde_mm512_set_epi8(INT8_C( -82), INT8_C( -24), INT8_C( 18), INT8_C( 115), INT8_C( -15), INT8_C( 55), INT8_C( 78), INT8_C( 60), INT8_C( -8), INT8_C( -91), INT8_C( 126), INT8_C( 15), INT8_C( 23), INT8_C( 6), INT8_C( -21), INT8_C( 120), INT8_C( -20), INT8_C(-104), INT8_C( -27), INT8_C( 38), INT8_C( 31), INT8_C( -21), INT8_C( 79), INT8_C( -62), INT8_C( 36), INT8_C( 95), INT8_C( 42), INT8_C(-102), INT8_C( -80), INT8_C( -69), INT8_C( 107), INT8_C(-114), INT8_C( 76), INT8_C( 123), INT8_C(-126), INT8_C( 108), INT8_C( -55), INT8_C( 89), INT8_C( -46), INT8_C( 18), INT8_C( 117), INT8_C( 25), INT8_C(-120), INT8_C( 27), INT8_C( 34), INT8_C( 64), INT8_C( 71), INT8_C( 64), INT8_C( -13), INT8_C( -73), INT8_C( 112), INT8_C( 25), INT8_C( -18), INT8_C( -63), INT8_C( 109), INT8_C( 9), INT8_C( 14), INT8_C(-125), INT8_C( -89), INT8_C( 70), INT8_C( 10), INT8_C( 15), INT8_C( 120), INT8_C( -59)), simde_mm512_set_epi8(INT8_C( -52), INT8_C( -61), INT8_C( -14), INT8_C( -38), INT8_C( -93), INT8_C( -34), INT8_C( 64), INT8_C( -67), INT8_C(-123), INT8_C( 123), INT8_C( -93), INT8_C( 41), INT8_C( 97), INT8_C( -8), INT8_C( -86), INT8_C( -16), INT8_C( -70), INT8_C( 3), INT8_C( 118), INT8_C( 37), INT8_C( 104), INT8_C( 111), INT8_C( -17), INT8_C( 110), INT8_C( -58), INT8_C( 58), INT8_C( 102), INT8_C( 64), INT8_C( -67), INT8_C( -76), INT8_C( -30), INT8_C( 108), INT8_C( 79), INT8_C( 46), INT8_C( -40), INT8_C( 101), INT8_C( -13), INT8_C( -25), INT8_C( 60), INT8_C( 25), INT8_C( 32), INT8_C( -21), INT8_C( 114), INT8_C( -21), INT8_C( 71), INT8_C( -85), INT8_C( 34), INT8_C( 82), INT8_C(-114), INT8_C( -30), INT8_C( -58), INT8_C( 116), INT8_C( 58), INT8_C(-105), INT8_C( 117), INT8_C( 11), INT8_C( -91), INT8_C( 118), INT8_C( -50), INT8_C( -8), INT8_C( -22), INT8_C( 59), INT8_C( -29), INT8_C( -88)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -3), INT8_C( 0), INT8_C( -86), INT8_C( 0), INT8_C( 0), INT8_C( 114), INT8_C( 0), INT8_C( -7), INT8_C( 85), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -37), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 101), INT8_C( -43), INT8_C( 0), INT8_C( -91), INT8_C( 0), INT8_C( 0), INT8_C( -8), INT8_C( 0), INT8_C( 0), INT8_C( 13), INT8_C( -39), INT8_C( 78), INT8_C( 32), INT8_C( 0), INT8_C(-107), INT8_C( 0)) }, { UINT64_C( 3908316288), simde_mm512_set_epi8(INT8_C( 16), INT8_C( 5), INT8_C( 18), INT8_C(-104), INT8_C(-119), INT8_C( -46), INT8_C( -92), INT8_C( 37), INT8_C( -84), INT8_C( 2), INT8_C( -49), INT8_C( 99), INT8_C( -79), INT8_C( 48), INT8_C(-103), INT8_C( 3), INT8_C( 54), INT8_C( 118), INT8_C( -53), INT8_C( 24), INT8_C( 56), INT8_C( 78), INT8_C( 121), INT8_C( -10), INT8_C( 4), INT8_C( 97), INT8_C( 53), INT8_C( -46), INT8_C( 92), INT8_C(-100), INT8_C( 47), INT8_C( 107), INT8_C( -52), INT8_C( 68), INT8_C( 11), INT8_C( -16), INT8_C( -66), INT8_C( -79), INT8_C( -14), INT8_C( 27), INT8_C( 14), INT8_C( 125), INT8_C( 22), INT8_C( -82), INT8_C( 44), INT8_C( -12), INT8_C( 94), INT8_C( -30), INT8_C( 98), INT8_C( 125), INT8_C(-107), INT8_C( 37), INT8_C( -66), INT8_C( 90), INT8_C( 68), INT8_C( 10), INT8_C( -72), INT8_C( -10), INT8_C(-119), INT8_C( -9), INT8_C( 49), INT8_C(-107), INT8_C( 10), INT8_C( 47)), simde_mm512_set_epi8(INT8_C( 114), INT8_C( 29), INT8_C( -56), INT8_C( -61), INT8_C(-124), INT8_C(-107), INT8_C( -23), INT8_C( -89), INT8_C( 38), INT8_C( -97), INT8_C( 109), INT8_C( 53), INT8_C(-117), INT8_C( 76), INT8_C( -82), INT8_C( -65), INT8_C( -24), INT8_C( -47), INT8_C(-119), INT8_C( 5), INT8_C( 95), INT8_C( 82), INT8_C( -3), INT8_C( -62), INT8_C(-116), INT8_C( -98), INT8_C( -29), INT8_C( 77), INT8_C( -38), INT8_C(-118), INT8_C( -85), INT8_C( 121), INT8_C( -72), INT8_C(-111), INT8_C( 28), INT8_C( -18), INT8_C( 64), INT8_C(-126), INT8_C( 122), INT8_C( -54), INT8_C( 87), INT8_C( -22), INT8_C( 17), INT8_C( 50), INT8_C( -83), INT8_C( -39), INT8_C( 77), INT8_C( -13), INT8_C( 17), INT8_C( -66), INT8_C(-128), INT8_C( 77), INT8_C( 107), INT8_C( 47), INT8_C( -68), INT8_C( -44), INT8_C( -30), INT8_C( -22), INT8_C( 14), INT8_C( 26), INT8_C( 59), INT8_C( 103), INT8_C( -54), INT8_C( -39)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 20), INT8_C( -77), INT8_C( -17), INT8_C( 0), INT8_C( 126), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -73), INT8_C(-109), INT8_C( 5), INT8_C( 124), INT8_C( 0), INT8_C( 27), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 21), INT8_C( 0), INT8_C( 83), INT8_C( 43), INT8_C( 0), INT8_C( 0), INT8_C( -42), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { UINT64_C( 2902744348), simde_mm512_set_epi8(INT8_C( 19), INT8_C( -37), INT8_C( -32), INT8_C( -59), INT8_C( 33), INT8_C( 110), INT8_C( 17), INT8_C( 67), INT8_C( 51), INT8_C( -19), INT8_C( 91), INT8_C( 26), INT8_C( 33), INT8_C( -43), INT8_C( -14), INT8_C( -56), INT8_C( 112), INT8_C( -72), INT8_C( 96), INT8_C( -62), INT8_C( -21), INT8_C( 96), INT8_C( -25), INT8_C( 104), INT8_C( -56), INT8_C( -56), INT8_C( 57), INT8_C( -25), INT8_C( -3), INT8_C( 99), INT8_C( -6), INT8_C( 31), INT8_C( -96), INT8_C( 49), INT8_C( 110), INT8_C( -10), INT8_C( -82), INT8_C( 32), INT8_C( -27), INT8_C( 112), INT8_C( 84), INT8_C( 37), INT8_C( -62), INT8_C( 38), INT8_C( -53), INT8_C( -97), INT8_C( 76), INT8_C( 13), INT8_C(-124), INT8_C(-120), INT8_C( -86), INT8_C( 98), INT8_C( 96), INT8_C( 4), INT8_C( 4), INT8_C( 94), INT8_C( -41), INT8_C( -81), INT8_C( -40), INT8_C( -28), INT8_C( -23), INT8_C( -59), INT8_C( -15), INT8_C( -40)), simde_mm512_set_epi8(INT8_C( 3), INT8_C( -6), INT8_C( -45), INT8_C( -75), INT8_C( -83), INT8_C(-118), INT8_C( 74), INT8_C( 35), INT8_C( 38), INT8_C( 4), INT8_C( 35), INT8_C( 15), INT8_C( -42), INT8_C( 71), INT8_C( -1), INT8_C( 27), INT8_C( -48), INT8_C( -77), INT8_C( 116), INT8_C( 52), INT8_C( 99), INT8_C( 26), INT8_C( 93), INT8_C(-101), INT8_C( -50), INT8_C( 0), INT8_C( 80), INT8_C(-123), INT8_C( 19), INT8_C( 112), INT8_C( 30), INT8_C( 95), INT8_C( 58), INT8_C( 21), INT8_C( 13), INT8_C( 32), INT8_C( 113), INT8_C( 126), INT8_C( 27), INT8_C( 113), INT8_C( 121), INT8_C( 97), INT8_C( 51), INT8_C( -16), INT8_C( -77), INT8_C( 84), INT8_C( 16), INT8_C(-112), INT8_C( -40), INT8_C( -69), INT8_C(-116), INT8_C( -97), INT8_C(-120), INT8_C( 102), INT8_C( -82), INT8_C( -42), INT8_C( 43), INT8_C( -70), INT8_C( 46), INT8_C( 17), INT8_C( 108), INT8_C( -47), INT8_C( 53), INT8_C( -84)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 102), INT8_C( 0), INT8_C( 97), INT8_C( 0), INT8_C( 61), INT8_C( -94), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 75), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -51), INT8_C( 0), INT8_C( -61), INT8_C( -40), INT8_C( -98), INT8_C( 0), INT8_C(-120), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -45), INT8_C( 125), INT8_C( -12), INT8_C( 0), INT8_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_sub_epi8(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_sub_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi16(INT16_C( 21075), INT16_C( 30017), INT16_C(-11898), INT16_C( 29710), INT16_C( 19457), INT16_C(-12796), INT16_C( 21427), INT16_C( 28826), INT16_C( 25482), INT16_C(-11843), INT16_C( 15582), INT16_C( 20114), INT16_C(-14761), INT16_C(-15590), INT16_C( -4142), INT16_C( 29932), INT16_C(-30672), INT16_C( 6190), INT16_C( 26590), INT16_C( 10803), INT16_C(-16554), INT16_C( 15816), INT16_C( 14967), INT16_C( 24063), INT16_C(-14713), INT16_C( -8094), INT16_C(-16817), INT16_C( 25507), INT16_C( 19912), INT16_C(-19929), INT16_C(-12604), INT16_C(-17156)), simde_mm512_set_epi16(INT16_C( 27175), INT16_C(-27122), INT16_C( -6914), INT16_C( 10212), INT16_C( 13894), INT16_C( -4620), INT16_C(-10724), INT16_C( -3078), INT16_C( 29698), INT16_C( 6009), INT16_C( 28893), INT16_C(-31734), INT16_C( -3957), INT16_C( 6787), INT16_C( 9325), INT16_C( 7645), INT16_C( -2133), INT16_C( -9633), INT16_C(-22525), INT16_C( 1124), INT16_C( 21781), INT16_C( 17119), INT16_C(-19461), INT16_C(-32134), INT16_C( -7507), INT16_C( 19092), INT16_C( 21408), INT16_C(-14444), INT16_C( 22843), INT16_C( 28625), INT16_C( -2322), INT16_C( 5251)), simde_mm512_set_epi16(INT16_C( -6100), INT16_C( -8397), INT16_C( -4984), INT16_C( 19498), INT16_C( 5563), INT16_C( -8176), INT16_C( 32151), INT16_C( 31904), INT16_C( -4216), INT16_C(-17852), INT16_C(-13311), INT16_C(-13688), INT16_C(-10804), INT16_C(-22377), INT16_C(-13467), INT16_C( 22287), INT16_C(-28539), INT16_C( 15823), INT16_C(-16421), INT16_C( 9679), INT16_C( 27201), INT16_C( -1303), INT16_C(-31108), INT16_C( -9339), INT16_C( -7206), INT16_C(-27186), INT16_C( 27311), INT16_C(-25585), INT16_C( -2931), INT16_C( 16982), INT16_C(-10282), INT16_C(-22407)) }, { simde_mm512_set_epi16(INT16_C(-27508), INT16_C( 5509), INT16_C(-13526), INT16_C( 16909), INT16_C( 2419), INT16_C( 22142), INT16_C( -6109), INT16_C( -1177), INT16_C( 9839), INT16_C( 6329), INT16_C( -239), INT16_C(-15885), INT16_C( 3666), INT16_C( 20122), INT16_C( -1699), INT16_C( 6503), INT16_C( 29169), INT16_C( -4681), INT16_C( -2713), INT16_C(-24709), INT16_C( 7221), INT16_C( -3718), INT16_C( 970), INT16_C(-15558), INT16_C(-11011), INT16_C(-10787), INT16_C(-29970), INT16_C( 3894), INT16_C(-25914), INT16_C(-18758), INT16_C( 11824), INT16_C( -8868)), simde_mm512_set_epi16(INT16_C( 1604), INT16_C( 19874), INT16_C(-12133), INT16_C( -1966), INT16_C( 13041), INT16_C( 1566), INT16_C(-11791), INT16_C( -3425), INT16_C( 7377), INT16_C(-23380), INT16_C( -9249), INT16_C(-31251), INT16_C( 14877), INT16_C( 24009), INT16_C(-32316), INT16_C( 8308), INT16_C(-11725), INT16_C(-10230), INT16_C( 1074), INT16_C( 12341), INT16_C( 19989), INT16_C( 16491), INT16_C( 4144), INT16_C(-11714), INT16_C( 19285), INT16_C(-29198), INT16_C(-25258), INT16_C(-29514), INT16_C( 9755), INT16_C(-29385), INT16_C(-23111), INT16_C( -3412)), simde_mm512_set_epi16(INT16_C(-29112), INT16_C(-14365), INT16_C( -1393), INT16_C( 18875), INT16_C(-10622), INT16_C( 20576), INT16_C( 5682), INT16_C( 2248), INT16_C( 2462), INT16_C( 29709), INT16_C( 9010), INT16_C( 15366), INT16_C(-11211), INT16_C( -3887), INT16_C( 30617), INT16_C( -1805), INT16_C(-24642), INT16_C( 5549), INT16_C( -3787), INT16_C( 28486), INT16_C(-12768), INT16_C(-20209), INT16_C( -3174), INT16_C( -3844), INT16_C(-30296), INT16_C( 18411), INT16_C( -4712), INT16_C(-32128), INT16_C( 29867), INT16_C( 10627), INT16_C(-30601), INT16_C( -5456)) }, { simde_mm512_set_epi16(INT16_C( 691), INT16_C( -4823), INT16_C( -3253), INT16_C(-31392), INT16_C(-21784), INT16_C( -6740), INT16_C( 9130), INT16_C(-18273), INT16_C( 11275), INT16_C(-27092), INT16_C( 90), INT16_C(-20133), INT16_C( 30523), INT16_C( 27008), INT16_C( 28387), INT16_C( 17266), INT16_C( -9777), INT16_C( 27096), INT16_C( -8328), INT16_C( -6812), INT16_C(-22954), INT16_C( -4409), INT16_C( 21734), INT16_C(-19695), INT16_C(-11981), INT16_C(-21195), INT16_C( 18272), INT16_C( 28327), INT16_C( 7123), INT16_C(-32216), INT16_C( 24489), INT16_C(-15668)), simde_mm512_set_epi16(INT16_C(-21377), INT16_C( 15856), INT16_C( 7686), INT16_C(-28568), INT16_C(-15192), INT16_C( -9747), INT16_C( 11300), INT16_C( 27000), INT16_C( -6635), INT16_C( 3626), INT16_C( 12716), INT16_C(-30571), INT16_C( 31697), INT16_C( 5622), INT16_C( 24444), INT16_C( -8226), INT16_C( -8263), INT16_C( 2890), INT16_C( 26732), INT16_C( -8763), INT16_C(-13950), INT16_C( 27415), INT16_C( 7653), INT16_C( 31511), INT16_C(-21082), INT16_C( 2398), INT16_C( 23365), INT16_C(-12903), INT16_C(-18221), INT16_C( 4204), INT16_C(-20453), INT16_C( 15021)), simde_mm512_set_epi16(INT16_C( 22068), INT16_C(-20679), INT16_C(-10939), INT16_C( -2824), INT16_C( -6592), INT16_C( 3007), INT16_C( -2170), INT16_C( 20263), INT16_C( 17910), INT16_C(-30718), INT16_C(-12626), INT16_C( 10438), INT16_C( -1174), INT16_C( 21386), INT16_C( 3943), INT16_C( 25492), INT16_C( -1514), INT16_C( 24206), INT16_C( 30476), INT16_C( 1951), INT16_C( -9004), INT16_C(-31824), INT16_C( 14081), INT16_C( 14330), INT16_C( 9101), INT16_C(-23593), INT16_C( -5093), INT16_C(-24306), INT16_C( 25344), INT16_C( 29116), INT16_C(-20594), INT16_C(-30689)) }, { simde_mm512_set_epi16(INT16_C( 4451), INT16_C( -3121), INT16_C( 11648), INT16_C( 14185), INT16_C( -8499), INT16_C(-24679), INT16_C(-31633), INT16_C( 19019), INT16_C( 26210), INT16_C(-29943), INT16_C(-18883), INT16_C( 25468), INT16_C( 20366), INT16_C( 4961), INT16_C(-25468), INT16_C( -4158), INT16_C( 6653), INT16_C( -1720), INT16_C(-29723), INT16_C(-14244), INT16_C( -4917), INT16_C( 730), INT16_C(-20677), INT16_C( 16986), INT16_C( 9316), INT16_C( 28795), INT16_C(-18273), INT16_C(-29423), INT16_C(-23674), INT16_C( 7963), INT16_C( 28019), INT16_C( 13728)), simde_mm512_set_epi16(INT16_C(-10770), INT16_C( 29411), INT16_C( 30463), INT16_C( -4902), INT16_C(-20392), INT16_C(-28251), INT16_C( 11448), INT16_C( 27155), INT16_C(-11669), INT16_C( 11820), INT16_C(-16512), INT16_C( 10540), INT16_C( 17477), INT16_C(-19759), INT16_C( 28024), INT16_C(-14431), INT16_C( 24400), INT16_C( -7583), INT16_C(-12129), INT16_C( 28592), INT16_C(-31057), INT16_C(-18091), INT16_C( 19926), INT16_C(-29261), INT16_C( 7501), INT16_C( 16620), INT16_C( 6953), INT16_C( 3437), INT16_C( 5790), INT16_C( 5348), INT16_C( 17145), INT16_C(-28791)), simde_mm512_set_epi16(INT16_C( 15221), INT16_C(-32532), INT16_C(-18815), INT16_C( 19087), INT16_C( 11893), INT16_C( 3572), INT16_C( 22455), INT16_C( -8136), INT16_C(-27657), INT16_C( 23773), INT16_C( -2371), INT16_C( 14928), INT16_C( 2889), INT16_C( 24720), INT16_C( 12044), INT16_C( 10273), INT16_C(-17747), INT16_C( 5863), INT16_C(-17594), INT16_C( 22700), INT16_C( 26140), INT16_C( 18821), INT16_C( 24933), INT16_C(-19289), INT16_C( 1815), INT16_C( 12175), INT16_C(-25226), INT16_C( 32676), INT16_C(-29464), INT16_C( 2615), INT16_C( 10874), INT16_C(-23017)) }, { simde_mm512_set_epi16(INT16_C(-31561), INT16_C( 18949), INT16_C( -2287), INT16_C(-20534), INT16_C( -1057), INT16_C( -3046), INT16_C( 22138), INT16_C(-11031), INT16_C( 43), INT16_C( -6266), INT16_C(-20090), INT16_C(-22393), INT16_C(-26046), INT16_C(-23703), INT16_C( 28092), INT16_C( 6346), INT16_C( 10308), INT16_C( 572), INT16_C( 5), INT16_C( 15306), INT16_C(-19429), INT16_C( -5811), INT16_C(-27420), INT16_C(-29128), INT16_C(-13676), INT16_C( -3673), INT16_C(-26157), INT16_C( 19197), INT16_C(-27593), INT16_C(-20030), INT16_C( 3690), INT16_C( -3850)), simde_mm512_set_epi16(INT16_C(-11908), INT16_C( 14774), INT16_C( 5244), INT16_C( 18107), INT16_C(-16396), INT16_C( 31910), INT16_C(-28865), INT16_C(-20038), INT16_C(-19234), INT16_C(-15108), INT16_C(-10436), INT16_C( 19911), INT16_C( 3330), INT16_C( 28633), INT16_C( 10550), INT16_C( -9358), INT16_C( 23697), INT16_C( 19726), INT16_C(-26407), INT16_C(-18878), INT16_C( 4326), INT16_C(-22642), INT16_C(-17402), INT16_C( 16035), INT16_C( 14223), INT16_C(-15160), INT16_C( -9470), INT16_C( -3752), INT16_C( 6710), INT16_C( 21116), INT16_C( -9579), INT16_C( 10253)), simde_mm512_set_epi16(INT16_C(-19653), INT16_C( 4175), INT16_C( -7531), INT16_C( 26895), INT16_C( 15339), INT16_C( 30580), INT16_C(-14533), INT16_C( 9007), INT16_C( 19277), INT16_C( 8842), INT16_C( -9654), INT16_C( 23232), INT16_C(-29376), INT16_C( 13200), INT16_C( 17542), INT16_C( 15704), INT16_C(-13389), INT16_C(-19154), INT16_C( 26412), INT16_C(-31352), INT16_C(-23755), INT16_C( 16831), INT16_C(-10018), INT16_C( 20373), INT16_C(-27899), INT16_C( 11487), INT16_C(-16687), INT16_C( 22949), INT16_C( 31233), INT16_C( 24390), INT16_C( 13269), INT16_C(-14103)) }, { simde_mm512_set_epi16(INT16_C( 1468), INT16_C( -4389), INT16_C( 1296), INT16_C(-27715), INT16_C(-15620), INT16_C( 3731), INT16_C( -7289), INT16_C(-27703), INT16_C( 474), INT16_C( 27447), INT16_C( -9036), INT16_C( 9176), INT16_C( 2726), INT16_C(-12144), INT16_C( -2101), INT16_C( 26907), INT16_C(-24700), INT16_C( 1244), INT16_C( -3927), INT16_C(-22632), INT16_C( -7525), INT16_C( 17743), INT16_C( 15263), INT16_C( -3823), INT16_C( 27307), INT16_C( 32391), INT16_C(-23270), INT16_C(-29301), INT16_C( 23369), INT16_C(-15291), INT16_C( -5840), INT16_C( 18168)), simde_mm512_set_epi16(INT16_C( 23449), INT16_C( 17725), INT16_C(-20919), INT16_C( 31466), INT16_C( 31308), INT16_C( -2183), INT16_C(-31351), INT16_C(-32386), INT16_C( 26890), INT16_C(-30591), INT16_C(-12785), INT16_C(-23638), INT16_C(-31955), INT16_C( -9847), INT16_C( 19108), INT16_C(-19915), INT16_C( 4587), INT16_C( 27034), INT16_C( -19), INT16_C( 28332), INT16_C(-23789), INT16_C(-24960), INT16_C( -5839), INT16_C( 25722), INT16_C(-24423), INT16_C( 15592), INT16_C( 6092), INT16_C( -9272), INT16_C(-12796), INT16_C(-17663), INT16_C( -6154), INT16_C( 23859)), simde_mm512_set_epi16(INT16_C(-21981), INT16_C(-22114), INT16_C( 22215), INT16_C( 6355), INT16_C( 18608), INT16_C( 5914), INT16_C( 24062), INT16_C( 4683), INT16_C(-26416), INT16_C( -7498), INT16_C( 3749), INT16_C(-32722), INT16_C(-30855), INT16_C( -2297), INT16_C(-21209), INT16_C(-18714), INT16_C(-29287), INT16_C(-25790), INT16_C( -3908), INT16_C( 14572), INT16_C( 16264), INT16_C(-22833), INT16_C( 21102), INT16_C(-29545), INT16_C(-13806), INT16_C( 16799), INT16_C(-29362), INT16_C(-20029), INT16_C(-29371), INT16_C( 2372), INT16_C( 314), INT16_C( -5691)) }, { simde_mm512_set_epi16(INT16_C(-22741), INT16_C( 13394), INT16_C( -9417), INT16_C( 28906), INT16_C(-18980), INT16_C( -8463), INT16_C( 9174), INT16_C(-25605), INT16_C( 547), INT16_C( 3767), INT16_C(-12577), INT16_C(-16546), INT16_C( -1301), INT16_C( -7147), INT16_C( 26281), INT16_C( 29309), INT16_C( 29052), INT16_C(-30842), INT16_C( 5995), INT16_C( 6270), INT16_C( 20539), INT16_C( 10179), INT16_C(-26848), INT16_C( 14327), INT16_C( 15491), INT16_C( 18652), INT16_C( 19903), INT16_C( 30123), INT16_C( 25261), INT16_C(-17460), INT16_C( 10742), INT16_C( -4552)), simde_mm512_set_epi16(INT16_C( 5754), INT16_C(-23038), INT16_C(-16589), INT16_C(-23858), INT16_C( -3821), INT16_C( -4798), INT16_C( 30602), INT16_C(-28532), INT16_C( 11508), INT16_C( 7979), INT16_C( -3877), INT16_C( -5920), INT16_C(-24150), INT16_C(-24496), INT16_C( 17421), INT16_C( -1981), INT16_C( 27523), INT16_C( 26800), INT16_C( 25010), INT16_C( 27339), INT16_C( -9050), INT16_C( 19128), INT16_C( 15279), INT16_C( -1817), INT16_C(-13923), INT16_C( 5129), INT16_C(-22618), INT16_C( 27704), INT16_C( -4783), INT16_C( 31238), INT16_C(-30342), INT16_C( -8854)), simde_mm512_set_epi16(INT16_C(-28495), INT16_C(-29104), INT16_C( 7172), INT16_C(-12772), INT16_C(-15159), INT16_C( -3665), INT16_C(-21428), INT16_C( 2927), INT16_C(-10961), INT16_C( -4212), INT16_C( -8700), INT16_C(-10626), INT16_C( 22849), INT16_C( 17349), INT16_C( 8860), INT16_C( 31290), INT16_C( 1529), INT16_C( 7894), INT16_C(-19015), INT16_C(-21069), INT16_C( 29589), INT16_C( -8949), INT16_C( 23409), INT16_C( 16144), INT16_C( 29414), INT16_C( 13523), INT16_C(-23015), INT16_C( 2419), INT16_C( 30044), INT16_C( 16838), INT16_C(-24452), INT16_C( 4302)) }, { simde_mm512_set_epi16(INT16_C( 27021), INT16_C( 31131), INT16_C( 63), INT16_C(-10530), INT16_C( -1071), INT16_C(-31284), INT16_C(-21788), INT16_C(-16108), INT16_C(-15167), INT16_C( 25422), INT16_C( 14520), INT16_C(-13896), INT16_C( 20399), INT16_C( 31915), INT16_C(-16518), INT16_C( -6202), INT16_C(-16332), INT16_C( -3071), INT16_C(-15644), INT16_C( -7016), INT16_C( 13977), INT16_C(-13846), INT16_C(-23290), INT16_C( -2079), INT16_C( 4753), INT16_C( 14919), INT16_C(-18528), INT16_C( 7420), INT16_C( 12098), INT16_C( 31014), INT16_C( 17813), INT16_C(-14456)), simde_mm512_set_epi16(INT16_C(-12529), INT16_C( -3643), INT16_C(-28826), INT16_C(-12110), INT16_C( 8030), INT16_C( 20316), INT16_C( 27324), INT16_C( 24735), INT16_C( -6774), INT16_C( -2704), INT16_C(-31930), INT16_C( 6874), INT16_C( -3952), INT16_C( 2658), INT16_C( -904), INT16_C( -8319), INT16_C(-16424), INT16_C( 22778), INT16_C( 18985), INT16_C( 10063), INT16_C(-31751), INT16_C( 16016), INT16_C(-30217), INT16_C( 18364), INT16_C(-20176), INT16_C( -4961), INT16_C(-29576), INT16_C(-16634), INT16_C( -8011), INT16_C(-27110), INT16_C(-24526), INT16_C(-11504)), simde_mm512_set_epi16(INT16_C(-25986), INT16_C(-30762), INT16_C( 28889), INT16_C( 1580), INT16_C( -9101), INT16_C( 13936), INT16_C( 16424), INT16_C( 24693), INT16_C( -8393), INT16_C( 28126), INT16_C(-19086), INT16_C(-20770), INT16_C( 24351), INT16_C( 29257), INT16_C(-15614), INT16_C( 2117), INT16_C( 92), INT16_C(-25849), INT16_C( 30907), INT16_C(-17079), INT16_C(-19808), INT16_C(-29862), INT16_C( 6927), INT16_C(-20443), INT16_C( 24929), INT16_C( 19880), INT16_C( 11048), INT16_C( 24054), INT16_C( 20109), INT16_C( -7412), INT16_C(-23197), INT16_C( -2952)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_sub_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m512i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_sub_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C(-2076524081), INT32_C( 1825078206), INT32_C(-1787857556), INT32_C(-1179707533), INT32_C( 233802890), INT32_C( 1015107327), INT32_C(-1130135421), INT32_C( 769270921), INT32_C( 970769619), INT32_C( -152032958), INT32_C(-1037455861), INT32_C( 1543352525), INT32_C( 1997985923), INT32_C( 1878044503), INT32_C( 49641854), INT32_C( 78691943)), simde_mm512_set_epi32(INT32_C( 1273959589), INT32_C( 730948807), INT32_C( 152082522), INT32_C( 516109144), INT32_C( -608654122), INT32_C( 326583665), INT32_C(-2143544685), INT32_C( 2015525957), INT32_C(-1762782050), INT32_C( 1423018518), INT32_C(-1835751490), INT32_C(-1847524510), INT32_C( 1152317453), INT32_C( -732966175), INT32_C( 916913335), INT32_C(-1961618071)), simde_mm512_set_epi32(INT32_C( 944483626), INT32_C( 1094129399), INT32_C(-1939940078), INT32_C(-1695816677), INT32_C( 842457012), INT32_C( 688523662), INT32_C( 1013409264), INT32_C(-1246255036), INT32_C(-1561415627), INT32_C(-1575051476), INT32_C( 798295629), INT32_C( -904090261), INT32_C( 845668470), INT32_C(-1683956618), INT32_C( -867271481), INT32_C( 2040310014)) }, { simde_mm512_set_epi32(INT32_C( 1516029066), INT32_C( 1696213023), INT32_C( 690963136), INT32_C( -395017807), INT32_C(-1227102652), INT32_C( 1731549524), INT32_C( 1416885076), INT32_C( -891143280), INT32_C(-1187279454), INT32_C( 699906112), INT32_C( 947982370), INT32_C(-1809113234), INT32_C( 892884346), INT32_C( 173663466), INT32_C( -426903082), INT32_C(-1178201759)), simde_mm512_set_epi32(INT32_C( 565568434), INT32_C( 1477571639), INT32_C( -962268135), INT32_C( 93801511), INT32_C( 1166085377), INT32_C(-1613873583), INT32_C( 1514751666), INT32_C( -9777248), INT32_C( 880861168), INT32_C(-1847118927), INT32_C( -454330268), INT32_C( -465889797), INT32_C( 607148382), INT32_C( -892911578), INT32_C(-1830027716), INT32_C( 171620514)), simde_mm512_set_epi32(INT32_C( 950460632), INT32_C( 218641384), INT32_C( 1653231271), INT32_C( -488819318), INT32_C( 1901779267), INT32_C( -949544189), INT32_C( -97866590), INT32_C( -881366032), INT32_C(-2068140622), INT32_C(-1747942257), INT32_C( 1402312638), INT32_C(-1343223437), INT32_C( 285735964), INT32_C( 1066575044), INT32_C( 1403124634), INT32_C(-1349822273)) }, { simde_mm512_set_epi32(INT32_C( -894737208), INT32_C( -894707310), INT32_C(-1734937643), INT32_C(-1821919338), INT32_C(-1629473200), INT32_C( 1017176222), INT32_C( 555630880), INT32_C( 1893052174), INT32_C( -395602197), INT32_C( 851153269), INT32_C( 1448617638), INT32_C( 1939202047), INT32_C(-1165352739), INT32_C( 784136789), INT32_C(-1222569677), INT32_C(-1663359991)), simde_mm512_set_epi32(INT32_C(-2134962383), INT32_C( 711344265), INT32_C( -499544380), INT32_C( 658556967), INT32_C(-1607446648), INT32_C(-2074003952), INT32_C( 449264495), INT32_C( -469125832), INT32_C(-1465796532), INT32_C( -575249454), INT32_C( -236269065), INT32_C( 567769266), INT32_C( -145854210), INT32_C( 502784491), INT32_C( -258238741), INT32_C( 1554234017)), simde_mm512_set_epi32(INT32_C( 1240225175), INT32_C(-1606051575), INT32_C(-1235393263), INT32_C( 1814490991), INT32_C( -22026552), INT32_C(-1203787122), INT32_C( 106366385), INT32_C(-1932789290), INT32_C( 1070194335), INT32_C( 1426402723), INT32_C( 1684886703), INT32_C( 1371432781), INT32_C(-1019498529), INT32_C( 281352298), INT32_C( -964330936), INT32_C( 1077373288)) }, { simde_mm512_set_epi32(INT32_C( -658606825), INT32_C(-1465142546), INT32_C(-1613315081), INT32_C( 1981327993), INT32_C( -540883338), INT32_C( -52568431), INT32_C( 513288938), INT32_C(-1741957410), INT32_C( -457290370), INT32_C( 949496535), INT32_C( -574503672), INT32_C( -516003313), INT32_C( 1705152287), INT32_C( 268459282), INT32_C( -796672854), INT32_C(-2124069536)), simde_mm512_set_epi32(INT32_C(-1627464574), INT32_C( 688417349), INT32_C(-1204757032), INT32_C(-1541532775), INT32_C( -489028243), INT32_C( -14341503), INT32_C( 1546753292), INT32_C( -383774267), INT32_C( 1479759913), INT32_C(-1792003336), INT32_C( 324281321), INT32_C(-1031805126), INT32_C(-1668912025), INT32_C( -271675366), INT32_C(-1502890080), INT32_C( -582208760)), simde_mm512_set_epi32(INT32_C( 968857749), INT32_C( 2141407401), INT32_C( -408558049), INT32_C( -772106528), INT32_C( -51855095), INT32_C( -38226928), INT32_C(-1033464354), INT32_C(-1358183143), INT32_C(-1937050283), INT32_C(-1553467425), INT32_C( -898784993), INT32_C( 515801813), INT32_C( -920902984), INT32_C( 540134648), INT32_C( 706217226), INT32_C(-1541860776)) }, { simde_mm512_set_epi32(INT32_C( 1656401797), INT32_C( 50049750), INT32_C( -488722048), INT32_C( 1532620410), INT32_C( 761833085), INT32_C( -28253750), INT32_C( 1071891913), INT32_C( -578065038), INT32_C( 2114869114), INT32_C( 1114386003), INT32_C( -192755303), INT32_C( -163390023), INT32_C(-1012186074), INT32_C( -258665152), INT32_C( 548389384), INT32_C( -601025611)), simde_mm512_set_epi32(INT32_C( -962813354), INT32_C(-1563683363), INT32_C( 1476422960), INT32_C(-1996230234), INT32_C( 594356694), INT32_C( -37573818), INT32_C( 2109710080), INT32_C(-2049942476), INT32_C(-1449482441), INT32_C(-1892730921), INT32_C( 1298337068), INT32_C( 30251788), INT32_C( -250852108), INT32_C(-2130168940), INT32_C( 414197854), INT32_C( -971416192)), simde_mm512_set_epi32(INT32_C(-1675752145), INT32_C( 1613733113), INT32_C(-1965145008), INT32_C( -766116652), INT32_C( 167476391), INT32_C( 9320068), INT32_C(-1037818167), INT32_C( 1471877438), INT32_C( -730615741), INT32_C(-1287850372), INT32_C(-1491092371), INT32_C( -193641811), INT32_C( -761333966), INT32_C( 1871503788), INT32_C( 134191530), INT32_C( 370390581)) }, { simde_mm512_set_epi32(INT32_C( 841332080), INT32_C( 332746710), INT32_C( 1180202036), INT32_C(-1365461084), INT32_C( -972107726), INT32_C( -919074620), INT32_C( 336794208), INT32_C(-2145769013), INT32_C(-1090767268), INT32_C( 1447456701), INT32_C(-1878509449), INT32_C( 1479468832), INT32_C(-2038652659), INT32_C( -428110707), INT32_C( -605535334), INT32_C( 1876977582)), simde_mm512_set_epi32(INT32_C(-1104919125), INT32_C(-1965384352), INT32_C( 1846340148), INT32_C( 1439724559), INT32_C( 1174009148), INT32_C( -500908704), INT32_C( 2074430235), INT32_C( 746110301), INT32_C( -229497465), INT32_C( 567264435), INT32_C(-1820479715), INT32_C( -409682629), INT32_C(-1976550605), INT32_C(-1717329929), INT32_C( 392593328), INT32_C( 809330056)), simde_mm512_set_epi32(INT32_C( 1946251205), INT32_C(-1996836234), INT32_C( -666138112), INT32_C( 1489781653), INT32_C(-2146116874), INT32_C( -418165916), INT32_C(-1737636027), INT32_C( 1403087982), INT32_C( -861269803), INT32_C( 880192266), INT32_C( -58029734), INT32_C( 1889151461), INT32_C( -62102054), INT32_C( 1289219222), INT32_C( -998128662), INT32_C( 1067647526)) }, { simde_mm512_set_epi32(INT32_C(-1188475624), INT32_C(-1471681451), INT32_C( -219755555), INT32_C(-1657771963), INT32_C( -257604504), INT32_C( 874981434), INT32_C(-1610485047), INT32_C(-1272947332), INT32_C( 1561476022), INT32_C( 375243187), INT32_C( 1479356717), INT32_C( 1523794483), INT32_C(-1698967593), INT32_C( -80864233), INT32_C( 1644091986), INT32_C( -229623607)), simde_mm512_set_epi32(INT32_C( 9741774), INT32_C( 693305140), INT32_C(-1221395242), INT32_C(-1923328842), INT32_C( 85084148), INT32_C( 1125599333), INT32_C( 2042080920), INT32_C( -456911551), INT32_C( -399701639), INT32_C(-1860388051), INT32_C( -699039468), INT32_C( 84523143), INT32_C(-1293034841), INT32_C(-1626054083), INT32_C( 96950550), INT32_C( 1663457642)), simde_mm512_set_epi32(INT32_C(-1198217398), INT32_C( 2129980705), INT32_C( 1001639687), INT32_C( 265556879), INT32_C( -342688652), INT32_C( -250617899), INT32_C( 642401329), INT32_C( -816035781), INT32_C( 1961177661), INT32_C(-2059336058), INT32_C(-2116571111), INT32_C( 1439271340), INT32_C( -405932752), INT32_C( 1545189850), INT32_C( 1547141436), INT32_C(-1893081249)) }, { simde_mm512_set_epi32(INT32_C(-1473946007), INT32_C( 121708864), INT32_C( 1020809582), INT32_C( 1669312470), INT32_C( -682688365), INT32_C( 500732292), INT32_C( 1673154382), INT32_C(-1552445241), INT32_C( 2068495467), INT32_C(-2039438173), INT32_C( 869593130), INT32_C( -471794528), INT32_C(-1539319849), INT32_C( 1041904784), INT32_C( -120989465), INT32_C(-1180697219)), simde_mm512_set_epi32(INT32_C( 1388066655), INT32_C( 1341381019), INT32_C(-1738591736), INT32_C( -783428109), INT32_C(-1884288937), INT32_C( 1340467391), INT32_C(-1349575878), INT32_C(-1411283384), INT32_C( 1173507492), INT32_C( 1805408001), INT32_C( 1184512890), INT32_C(-1180223583), INT32_C( -121255394), INT32_C(-2007254522), INT32_C( 970045213), INT32_C(-2132245994)), simde_mm512_set_epi32(INT32_C( 1432954634), INT32_C(-1219672155), INT32_C(-1535565978), INT32_C(-1842226717), INT32_C( 1201600572), INT32_C( -839735099), INT32_C(-1272237036), INT32_C( -141161857), INT32_C( 894987975), INT32_C( 450121122), INT32_C( -314919760), INT32_C( 708429055), INT32_C(-1418064455), INT32_C(-1245807990), INT32_C(-1091034678), INT32_C( 951548775)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_sub_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_sub_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C(-8918603015426376770), INT64_C(-7678789729811228813), INT64_C( 1004175767275392767), INT64_C(-4853894672476920695), INT64_C( 4169423769698314562), INT64_C(-4455838992495169331), INT64_C( 8581284199031418711), INT64_C( 213210139521498727)), simde_mm512_set_epi64(INT64_C( 5471614771911550151), INT64_C( 653189458799309656), INT64_C(-2614149548239010447), INT64_C(-9206454317574095803), INT64_C(-7571091253302818282), INT64_C(-7884492610685828254), INT64_C( 4949165778807018209), INT64_C( 3938112789424641385)), simde_mm512_set_epi64(INT64_C( 4056526286371624695), INT64_C(-8331979188610538469), INT64_C( 3618325315514403214), INT64_C( 4352559645097175108), INT64_C(-6706229050708418772), INT64_C( 3428653618190658923), INT64_C( 3632118420224400502), INT64_C(-3724902649903142658)) }, { simde_mm512_set_epi64(INT64_C( 6511295259951638559), INT64_C( 2967664075761549745), INT64_C(-5270365757443319468), INT64_C( 6085475067014298512), INT64_C(-5099326425442830272), INT64_C( 4071553278820425582), INT64_C( 3834909065354011882), INT64_C(-1833534772634840735)), simde_mm512_set_epi64(INT64_C( 2429097929157506103), INT64_C(-4132910169714111449), INT64_C( 5008298561239924305), INT64_C( 6505808871316705184), INT64_C( 3783269911324210097), INT64_C(-1951333638813837829), INT64_C( 2607682447911370790), INT64_C(-7859909190821955422)), simde_mm512_set_epi64(INT64_C( 4082197330794132456), INT64_C( 7100574245475661194), INT64_C( 8168079755026307843), INT64_C( -420333804302406672), INT64_C(-8882596336767040369), INT64_C( 6022886917634263411), INT64_C( 1227226617442641092), INT64_C( 6026374418187114687)) }, { simde_mm512_set_epi64(INT64_C(-3842867043474089582), INT64_C(-7451500434811275370), INT64_C(-6998534102691290978), INT64_C( 2386416460140752654), INT64_C(-1699098497489596043), INT64_C( 6221765381557968895), INT64_C(-5005151901524886955), INT64_C(-5250896777164676087)), simde_mm512_set_epi64(INT64_C(-9169593612463882103), INT64_C(-2145526774342039513), INT64_C(-6903930781003860464), INT64_C( 1929576317104796984), INT64_C(-6295548163810499630), INT64_C(-1014767906663728974), INT64_C( -626439061431131669), INT64_C(-1109126945600980319)), simde_mm512_set_epi64(INT64_C( 5326726568989792521), INT64_C(-5305973660469235857), INT64_C( -94603321687430514), INT64_C( 456840143035955670), INT64_C( 4596449666320903587), INT64_C( 7236533288221697869), INT64_C(-4378712840093755286), INT64_C(-4141769831563695768)) }, { simde_mm512_set_epi64(INT64_C(-2828694771467570450), INT64_C(-6929135509057262983), INT64_C(-2323076243418915183), INT64_C( 2204559204661581534), INT64_C(-1964047182976242985), INT64_C(-2467474478892946929), INT64_C( 7323573307633065234), INT64_C(-3421683851370085024)), simde_mm512_set_epi64(INT64_C(-6989907120040154555), INT64_C(-5174392049312590951), INT64_C(-2100360306224715135), INT64_C( 6643254808031531461), INT64_C( 6355520434769769208), INT64_C( 1392777671661840186), INT64_C(-7167922563252842470), INT64_C(-6454863739370065144)), simde_mm512_set_epi64(INT64_C( 4161212348572584105), INT64_C(-1754743459744672032), INT64_C( -222715937194200048), INT64_C(-4438695603369949927), INT64_C(-8319567617746012193), INT64_C(-3860252150554787115), INT64_C(-3955248202823643912), INT64_C( 3033179887999980120)) }, { simde_mm512_set_epi64(INT64_C( 7114191547200680662), INT64_C(-2099045211461521798), INT64_C( 3272048189352501706), INT64_C( 4603740714898779506), INT64_C( 9083293681064881747), INT64_C( -827877718383993415), INT64_C(-4347306081260333760), INT64_C( 2355314473447527349)), simde_mm512_set_epi64(INT64_C(-4135251864850786851), INT64_C( 6341188330562253222), INT64_C( 2552742567146072902), INT64_C( 9061135799886568500), INT64_C(-6225479677819013161), INT64_C( 5576315246274779916), INT64_C(-1077401597827861612), INT64_C( 1778966240326933888)), simde_mm512_set_epi64(INT64_C(-7197300661658084103), INT64_C(-8440233542023775020), INT64_C( 719305622206428804), INT64_C(-4457395084987788994), INT64_C(-3137970714825656708), INT64_C(-6404192964658773331), INT64_C(-3269904483432472148), INT64_C( 576348233120593461)) }, { simde_mm512_set_epi64(INT64_C( 3613493769008402390), INT64_C( 5068929150222120868), INT64_C(-4175170887983036220), INT64_C( 1446520110991419851), INT64_C(-4684809742159810627), INT64_C(-8068136647202511072), INT64_C(-8755946494441583475), INT64_C(-2600754454225459282)), simde_mm512_set_epi64(INT64_C(-4745591504270353056), INT64_C( 7929970554391524367), INT64_C( 5042330899658882400), INT64_C( 8909610017904704861), INT64_C( -985684106122640205), INT64_C(-7818900835071115973), INT64_C(-8489220204786376713), INT64_C( 1686175505197131144)), simde_mm512_set_epi64(INT64_C( 8359085273278755446), INT64_C(-2861041404169403499), INT64_C(-9217501787641918620), INT64_C(-7463089906913285010), INT64_C(-3699125636037170422), INT64_C( -249235812131395099), INT64_C( -266726289655206762), INT64_C(-4286929959422590426)) }, { simde_mm512_set_epi64(INT64_C(-5104463934349906859), INT64_C( -943842919202133947), INT64_C(-1106402919107319750), INT64_C(-6916980604540002948), INT64_C( 6706488448353419699), INT64_C( 6353788720156721715), INT64_C(-7297010244684735465), INT64_C( 7061321315551033545)), simde_mm512_set_epi64(INT64_C( 41840601428328244), INT64_C(-5245852617508367178), INT64_C( 365433634193623141), INT64_C( 8770670771023648065), INT64_C(-1716705465228018899), INT64_C(-3002351653588715385), INT64_C(-5553542352014646723), INT64_C( 416399443242670442)), simde_mm512_set_epi64(INT64_C(-5146304535778235103), INT64_C( 4302009698306233231), INT64_C(-1471836553300942891), INT64_C( 2759092698145900603), INT64_C( 8423193913581438598), INT64_C(-9090603699964114516), INT64_C(-1743467892670088742), INT64_C( 6644921872308363103)) }, { simde_mm512_set_epi64(INT64_C(-6330549896013078208), INT64_C( 4384343771802742742), INT64_C(-2932124200533978748), INT64_C( 7186143354591613127), INT64_C( 8884120384944776355), INT64_C( 3734874057999449248), INT64_C(-6611328408496753520), INT64_C( -519645792221266563)), simde_mm512_set_epi64(INT64_C( 5961700889234495899), INT64_C(-7467194643704326669), INT64_C(-8092959359289136961), INT64_C(-5796384256596801976), INT64_C( 5040176301556389633), INT64_C( 5087444127355189153), INT64_C( -520787949405881850), INT64_C( 4166312467639075350)), simde_mm512_set_epi64(INT64_C( 6154493288461977509), INT64_C(-6595205658202482205), INT64_C( 5160835158755158213), INT64_C(-5464216462521136513), INT64_C( 3843944083388386722), INT64_C(-1352570069355739905), INT64_C(-6090540459090871670), INT64_C(-4685958259860341913)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_sub_epi64(test_vec[i].a, test_vec[i].b); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_sub_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__m512 b; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -659.63), SIMDE_FLOAT32_C( -759.67), SIMDE_FLOAT32_C( -847.92), SIMDE_FLOAT32_C( -61.45), SIMDE_FLOAT32_C( -337.36), SIMDE_FLOAT32_C( 139.68), SIMDE_FLOAT32_C( 658.69), SIMDE_FLOAT32_C( 86.55), SIMDE_FLOAT32_C( -150.13), SIMDE_FLOAT32_C( 450.66), SIMDE_FLOAT32_C( -527.30), SIMDE_FLOAT32_C( -641.78), SIMDE_FLOAT32_C( 929.20), SIMDE_FLOAT32_C( -281.32), SIMDE_FLOAT32_C( -125.47), SIMDE_FLOAT32_C( -963.36)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -406.77), SIMDE_FLOAT32_C( -929.18), SIMDE_FLOAT32_C( 716.57), SIMDE_FLOAT32_C( 1.83), SIMDE_FLOAT32_C( 179.14), SIMDE_FLOAT32_C( 145.16), SIMDE_FLOAT32_C( -463.41), SIMDE_FLOAT32_C( -573.03), SIMDE_FLOAT32_C( 33.04), SIMDE_FLOAT32_C( 167.46), SIMDE_FLOAT32_C( -891.13), SIMDE_FLOAT32_C( 473.74), SIMDE_FLOAT32_C( -547.95), SIMDE_FLOAT32_C( 516.90), SIMDE_FLOAT32_C( -69.62), SIMDE_FLOAT32_C( -976.88)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -252.86), SIMDE_FLOAT32_C( 169.51), SIMDE_FLOAT32_C( -1564.49), SIMDE_FLOAT32_C( -63.28), SIMDE_FLOAT32_C( -516.50), SIMDE_FLOAT32_C( -5.48), SIMDE_FLOAT32_C( 1122.10), SIMDE_FLOAT32_C( 659.58), SIMDE_FLOAT32_C( -183.17), SIMDE_FLOAT32_C( 283.20), SIMDE_FLOAT32_C( 363.83), SIMDE_FLOAT32_C( -1115.52), SIMDE_FLOAT32_C( 1477.15), SIMDE_FLOAT32_C( -798.22), SIMDE_FLOAT32_C( -55.85), SIMDE_FLOAT32_C( 13.52)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -311.95), SIMDE_FLOAT32_C( -956.32), SIMDE_FLOAT32_C( 248.48), SIMDE_FLOAT32_C( 995.45), SIMDE_FLOAT32_C( 139.87), SIMDE_FLOAT32_C( 783.05), SIMDE_FLOAT32_C( 584.21), SIMDE_FLOAT32_C( -920.08), SIMDE_FLOAT32_C( -210.14), SIMDE_FLOAT32_C( 816.06), SIMDE_FLOAT32_C( -193.68), SIMDE_FLOAT32_C( 585.03), SIMDE_FLOAT32_C( -674.08), SIMDE_FLOAT32_C( 157.57), SIMDE_FLOAT32_C( -919.13), SIMDE_FLOAT32_C( 451.36)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -736.64), SIMDE_FLOAT32_C( 551.91), SIMDE_FLOAT32_C( -457.00), SIMDE_FLOAT32_C( -294.64), SIMDE_FLOAT32_C( -589.82), SIMDE_FLOAT32_C( 788.44), SIMDE_FLOAT32_C( -717.27), SIMDE_FLOAT32_C( 147.83), SIMDE_FLOAT32_C( -294.04), SIMDE_FLOAT32_C( -678.25), SIMDE_FLOAT32_C( 428.59), SIMDE_FLOAT32_C( -340.21), SIMDE_FLOAT32_C( 447.13), SIMDE_FLOAT32_C( -558.56), SIMDE_FLOAT32_C( -584.22), SIMDE_FLOAT32_C( 801.21)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 424.69), SIMDE_FLOAT32_C( -1508.23), SIMDE_FLOAT32_C( 705.48), SIMDE_FLOAT32_C( 1290.09), SIMDE_FLOAT32_C( 729.69), SIMDE_FLOAT32_C( -5.39), SIMDE_FLOAT32_C( 1301.48), SIMDE_FLOAT32_C( -1067.91), SIMDE_FLOAT32_C( 83.90), SIMDE_FLOAT32_C( 1494.31), SIMDE_FLOAT32_C( -622.27), SIMDE_FLOAT32_C( 925.24), SIMDE_FLOAT32_C( -1121.21), SIMDE_FLOAT32_C( 716.13), SIMDE_FLOAT32_C( -334.91), SIMDE_FLOAT32_C( -349.85)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -668.75), SIMDE_FLOAT32_C( -693.34), SIMDE_FLOAT32_C( 34.22), SIMDE_FLOAT32_C( 781.55), SIMDE_FLOAT32_C( 732.13), SIMDE_FLOAT32_C( -735.61), SIMDE_FLOAT32_C( -765.87), SIMDE_FLOAT32_C( -276.25), SIMDE_FLOAT32_C( 583.37), SIMDE_FLOAT32_C( 151.60), SIMDE_FLOAT32_C( -526.34), SIMDE_FLOAT32_C( -118.48), SIMDE_FLOAT32_C( -603.65), SIMDE_FLOAT32_C( -96.99), SIMDE_FLOAT32_C( -634.86), SIMDE_FLOAT32_C( 225.44)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 5.83), SIMDE_FLOAT32_C( 767.38), SIMDE_FLOAT32_C( 251.47), SIMDE_FLOAT32_C( -790.79), SIMDE_FLOAT32_C( 317.44), SIMDE_FLOAT32_C( 889.98), SIMDE_FLOAT32_C( 932.08), SIMDE_FLOAT32_C( 879.75), SIMDE_FLOAT32_C( 583.36), SIMDE_FLOAT32_C( 192.11), SIMDE_FLOAT32_C( 241.22), SIMDE_FLOAT32_C( -741.26), SIMDE_FLOAT32_C( 815.78), SIMDE_FLOAT32_C( -325.43), SIMDE_FLOAT32_C( 457.34), SIMDE_FLOAT32_C( 430.70)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -674.58), SIMDE_FLOAT32_C( -1460.72), SIMDE_FLOAT32_C( -217.25), SIMDE_FLOAT32_C( 1572.34), SIMDE_FLOAT32_C( 414.69), SIMDE_FLOAT32_C( -1625.59), SIMDE_FLOAT32_C( -1697.95), SIMDE_FLOAT32_C( -1156.00), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -40.51), SIMDE_FLOAT32_C( -767.56), SIMDE_FLOAT32_C( 622.78), SIMDE_FLOAT32_C( -1419.43), SIMDE_FLOAT32_C( 228.44), SIMDE_FLOAT32_C( -1092.20), SIMDE_FLOAT32_C( -205.26)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -679.43), SIMDE_FLOAT32_C( 282.17), SIMDE_FLOAT32_C( 993.32), SIMDE_FLOAT32_C( 821.29), SIMDE_FLOAT32_C( 165.53), SIMDE_FLOAT32_C( 519.53), SIMDE_FLOAT32_C( 873.49), SIMDE_FLOAT32_C( 728.89), SIMDE_FLOAT32_C( 317.74), SIMDE_FLOAT32_C( -77.37), SIMDE_FLOAT32_C( 975.52), SIMDE_FLOAT32_C( 188.84), SIMDE_FLOAT32_C( -557.86), SIMDE_FLOAT32_C( 759.72), SIMDE_FLOAT32_C( -874.99), SIMDE_FLOAT32_C( 10.90)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 242.15), SIMDE_FLOAT32_C( 438.99), SIMDE_FLOAT32_C( 772.28), SIMDE_FLOAT32_C( -279.74), SIMDE_FLOAT32_C( -310.93), SIMDE_FLOAT32_C( -848.99), SIMDE_FLOAT32_C( 222.85), SIMDE_FLOAT32_C( 300.16), SIMDE_FLOAT32_C( 693.31), SIMDE_FLOAT32_C( 248.74), SIMDE_FLOAT32_C( 748.13), SIMDE_FLOAT32_C( -760.98), SIMDE_FLOAT32_C( 787.06), SIMDE_FLOAT32_C( 732.48), SIMDE_FLOAT32_C( -205.98), SIMDE_FLOAT32_C( 629.02)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -921.58), SIMDE_FLOAT32_C( -156.82), SIMDE_FLOAT32_C( 221.04), SIMDE_FLOAT32_C( 1101.03), SIMDE_FLOAT32_C( 476.46), SIMDE_FLOAT32_C( 1368.52), SIMDE_FLOAT32_C( 650.64), SIMDE_FLOAT32_C( 428.73), SIMDE_FLOAT32_C( -375.57), SIMDE_FLOAT32_C( -326.11), SIMDE_FLOAT32_C( 227.39), SIMDE_FLOAT32_C( 949.82), SIMDE_FLOAT32_C( -1344.92), SIMDE_FLOAT32_C( 27.24), SIMDE_FLOAT32_C( -669.01), SIMDE_FLOAT32_C( -618.12)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 271.85), SIMDE_FLOAT32_C( 70.43), SIMDE_FLOAT32_C( 982.50), SIMDE_FLOAT32_C( 45.42), SIMDE_FLOAT32_C( 118.63), SIMDE_FLOAT32_C( -985.91), SIMDE_FLOAT32_C( 8.06), SIMDE_FLOAT32_C( 547.65), SIMDE_FLOAT32_C( -976.69), SIMDE_FLOAT32_C( -286.32), SIMDE_FLOAT32_C( 986.84), SIMDE_FLOAT32_C( 730.82), SIMDE_FLOAT32_C( -481.07), SIMDE_FLOAT32_C( 923.92), SIMDE_FLOAT32_C( 879.55), SIMDE_FLOAT32_C( 720.13)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 551.66), SIMDE_FLOAT32_C( -312.49), SIMDE_FLOAT32_C( -723.23), SIMDE_FLOAT32_C( -17.59), SIMDE_FLOAT32_C( 325.03), SIMDE_FLOAT32_C( -395.41), SIMDE_FLOAT32_C( 883.19), SIMDE_FLOAT32_C( -807.12), SIMDE_FLOAT32_C( -228.68), SIMDE_FLOAT32_C( 772.42), SIMDE_FLOAT32_C( -645.24), SIMDE_FLOAT32_C( -500.86), SIMDE_FLOAT32_C( -15.19), SIMDE_FLOAT32_C( 910.24), SIMDE_FLOAT32_C( 528.66), SIMDE_FLOAT32_C( -744.64)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -279.81), SIMDE_FLOAT32_C( 382.92), SIMDE_FLOAT32_C( 1705.73), SIMDE_FLOAT32_C( 63.01), SIMDE_FLOAT32_C( -206.40), SIMDE_FLOAT32_C( -590.50), SIMDE_FLOAT32_C( -875.13), SIMDE_FLOAT32_C( 1354.77), SIMDE_FLOAT32_C( -748.01), SIMDE_FLOAT32_C( -1058.74), SIMDE_FLOAT32_C( 1632.08), SIMDE_FLOAT32_C( 1231.68), SIMDE_FLOAT32_C( -465.88), SIMDE_FLOAT32_C( 13.68), SIMDE_FLOAT32_C( 350.89), SIMDE_FLOAT32_C( 1464.77)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 84.80), SIMDE_FLOAT32_C( -329.58), SIMDE_FLOAT32_C( 766.75), SIMDE_FLOAT32_C( -652.57), SIMDE_FLOAT32_C( -735.85), SIMDE_FLOAT32_C( 809.23), SIMDE_FLOAT32_C( 200.31), SIMDE_FLOAT32_C( -623.13), SIMDE_FLOAT32_C( -845.05), SIMDE_FLOAT32_C( 364.16), SIMDE_FLOAT32_C( 572.02), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( -325.98), SIMDE_FLOAT32_C( -311.07), SIMDE_FLOAT32_C( 800.65), SIMDE_FLOAT32_C( -125.96)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 485.48), SIMDE_FLOAT32_C( -140.23), SIMDE_FLOAT32_C( -453.31), SIMDE_FLOAT32_C( -34.02), SIMDE_FLOAT32_C( 893.13), SIMDE_FLOAT32_C( 152.27), SIMDE_FLOAT32_C( 79.60), SIMDE_FLOAT32_C( -817.18), SIMDE_FLOAT32_C( -608.22), SIMDE_FLOAT32_C( -450.43), SIMDE_FLOAT32_C( 547.33), SIMDE_FLOAT32_C( -843.17), SIMDE_FLOAT32_C( 492.07), SIMDE_FLOAT32_C( 125.25), SIMDE_FLOAT32_C( 50.68), SIMDE_FLOAT32_C( 718.03)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -400.68), SIMDE_FLOAT32_C( -189.35), SIMDE_FLOAT32_C( 1220.06), SIMDE_FLOAT32_C( -618.55), SIMDE_FLOAT32_C( -1628.98), SIMDE_FLOAT32_C( 656.96), SIMDE_FLOAT32_C( 120.71), SIMDE_FLOAT32_C( 194.05), SIMDE_FLOAT32_C( -236.83), SIMDE_FLOAT32_C( 814.59), SIMDE_FLOAT32_C( 24.69), SIMDE_FLOAT32_C( 843.97), SIMDE_FLOAT32_C( -818.05), SIMDE_FLOAT32_C( -436.32), SIMDE_FLOAT32_C( 749.97), SIMDE_FLOAT32_C( -843.99)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -677.15), SIMDE_FLOAT32_C( 104.38), SIMDE_FLOAT32_C( -475.85), SIMDE_FLOAT32_C( 787.23), SIMDE_FLOAT32_C( 133.69), SIMDE_FLOAT32_C( -960.64), SIMDE_FLOAT32_C( 242.81), SIMDE_FLOAT32_C( -225.39), SIMDE_FLOAT32_C( 314.69), SIMDE_FLOAT32_C( 228.04), SIMDE_FLOAT32_C( -592.56), SIMDE_FLOAT32_C( 407.24), SIMDE_FLOAT32_C( -825.26), SIMDE_FLOAT32_C( -290.43), SIMDE_FLOAT32_C( 962.34), SIMDE_FLOAT32_C( 893.07)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -995.46), SIMDE_FLOAT32_C( 431.24), SIMDE_FLOAT32_C( -960.38), SIMDE_FLOAT32_C( -49.08), SIMDE_FLOAT32_C( 813.87), SIMDE_FLOAT32_C( 674.48), SIMDE_FLOAT32_C( 397.88), SIMDE_FLOAT32_C( -954.85), SIMDE_FLOAT32_C( 446.57), SIMDE_FLOAT32_C( 897.67), SIMDE_FLOAT32_C( 880.04), SIMDE_FLOAT32_C( 250.06), SIMDE_FLOAT32_C( -272.88), SIMDE_FLOAT32_C( -311.12), SIMDE_FLOAT32_C( 208.86), SIMDE_FLOAT32_C( -234.41)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 318.31), SIMDE_FLOAT32_C( -326.86), SIMDE_FLOAT32_C( 484.53), SIMDE_FLOAT32_C( 836.31), SIMDE_FLOAT32_C( -680.18), SIMDE_FLOAT32_C( -1635.12), SIMDE_FLOAT32_C( -155.07), SIMDE_FLOAT32_C( 729.46), SIMDE_FLOAT32_C( -131.88), SIMDE_FLOAT32_C( -669.63), SIMDE_FLOAT32_C( -1472.60), SIMDE_FLOAT32_C( 157.18), SIMDE_FLOAT32_C( -552.38), SIMDE_FLOAT32_C( 20.69), SIMDE_FLOAT32_C( 753.48), SIMDE_FLOAT32_C( 1127.48)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -375.37), SIMDE_FLOAT32_C( 635.19), SIMDE_FLOAT32_C( -375.80), SIMDE_FLOAT32_C( 342.82), SIMDE_FLOAT32_C( -159.29), SIMDE_FLOAT32_C( 450.42), SIMDE_FLOAT32_C( 65.30), SIMDE_FLOAT32_C( 7.10), SIMDE_FLOAT32_C( -943.32), SIMDE_FLOAT32_C( -222.67), SIMDE_FLOAT32_C( -766.83), SIMDE_FLOAT32_C( 277.09), SIMDE_FLOAT32_C( 50.31), SIMDE_FLOAT32_C( 780.30), SIMDE_FLOAT32_C( -514.83), SIMDE_FLOAT32_C( 450.20)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -353.63), SIMDE_FLOAT32_C( 190.41), SIMDE_FLOAT32_C( 122.56), SIMDE_FLOAT32_C( 371.55), SIMDE_FLOAT32_C( -453.54), SIMDE_FLOAT32_C( -448.42), SIMDE_FLOAT32_C( 943.54), SIMDE_FLOAT32_C( -548.29), SIMDE_FLOAT32_C( 313.64), SIMDE_FLOAT32_C( -524.65), SIMDE_FLOAT32_C( 682.10), SIMDE_FLOAT32_C( -220.88), SIMDE_FLOAT32_C( -36.78), SIMDE_FLOAT32_C( -595.06), SIMDE_FLOAT32_C( 283.20), SIMDE_FLOAT32_C( 943.66)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -21.74), SIMDE_FLOAT32_C( 444.78), SIMDE_FLOAT32_C( -498.36), SIMDE_FLOAT32_C( -28.73), SIMDE_FLOAT32_C( 294.25), SIMDE_FLOAT32_C( 898.84), SIMDE_FLOAT32_C( -878.24), SIMDE_FLOAT32_C( 555.39), SIMDE_FLOAT32_C( -1256.96), SIMDE_FLOAT32_C( 301.98), SIMDE_FLOAT32_C( -1448.93), SIMDE_FLOAT32_C( 497.97), SIMDE_FLOAT32_C( 87.09), SIMDE_FLOAT32_C( 1375.36), SIMDE_FLOAT32_C( -798.03), SIMDE_FLOAT32_C( -493.46)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_sub_ps(test_vec[i].a, test_vec[i].b); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_sub_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d a; simde__m512d b; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( -150.13), SIMDE_FLOAT64_C( 450.66), SIMDE_FLOAT64_C( -527.30), SIMDE_FLOAT64_C( -641.78), SIMDE_FLOAT64_C( 929.20), SIMDE_FLOAT64_C( -281.32), SIMDE_FLOAT64_C( -125.47), SIMDE_FLOAT64_C( -963.36)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 33.04), SIMDE_FLOAT64_C( 167.46), SIMDE_FLOAT64_C( -891.13), SIMDE_FLOAT64_C( 473.74), SIMDE_FLOAT64_C( -547.95), SIMDE_FLOAT64_C( 516.90), SIMDE_FLOAT64_C( -69.62), SIMDE_FLOAT64_C( -976.88)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -183.17), SIMDE_FLOAT64_C( 283.20), SIMDE_FLOAT64_C( 363.83), SIMDE_FLOAT64_C(-1115.52), SIMDE_FLOAT64_C( 1477.15), SIMDE_FLOAT64_C( -798.22), SIMDE_FLOAT64_C( -55.85), SIMDE_FLOAT64_C( 13.52)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -659.63), SIMDE_FLOAT64_C( -759.67), SIMDE_FLOAT64_C( -847.92), SIMDE_FLOAT64_C( -61.45), SIMDE_FLOAT64_C( -337.36), SIMDE_FLOAT64_C( 139.68), SIMDE_FLOAT64_C( 658.69), SIMDE_FLOAT64_C( 86.55)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -406.77), SIMDE_FLOAT64_C( -929.18), SIMDE_FLOAT64_C( 716.57), SIMDE_FLOAT64_C( 1.83), SIMDE_FLOAT64_C( 179.14), SIMDE_FLOAT64_C( 145.16), SIMDE_FLOAT64_C( -463.41), SIMDE_FLOAT64_C( -573.03)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -252.86), SIMDE_FLOAT64_C( 169.51), SIMDE_FLOAT64_C(-1564.49), SIMDE_FLOAT64_C( -63.28), SIMDE_FLOAT64_C( -516.50), SIMDE_FLOAT64_C( -5.48), SIMDE_FLOAT64_C( 1122.10), SIMDE_FLOAT64_C( 659.58)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -210.14), SIMDE_FLOAT64_C( 816.06), SIMDE_FLOAT64_C( -193.68), SIMDE_FLOAT64_C( 585.03), SIMDE_FLOAT64_C( -674.08), SIMDE_FLOAT64_C( 157.57), SIMDE_FLOAT64_C( -919.13), SIMDE_FLOAT64_C( 451.36)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -294.04), SIMDE_FLOAT64_C( -678.25), SIMDE_FLOAT64_C( 428.59), SIMDE_FLOAT64_C( -340.21), SIMDE_FLOAT64_C( 447.13), SIMDE_FLOAT64_C( -558.56), SIMDE_FLOAT64_C( -584.22), SIMDE_FLOAT64_C( 801.21)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 83.90), SIMDE_FLOAT64_C( 1494.31), SIMDE_FLOAT64_C( -622.27), SIMDE_FLOAT64_C( 925.24), SIMDE_FLOAT64_C(-1121.21), SIMDE_FLOAT64_C( 716.13), SIMDE_FLOAT64_C( -334.91), SIMDE_FLOAT64_C( -349.85)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -311.95), SIMDE_FLOAT64_C( -956.32), SIMDE_FLOAT64_C( 248.48), SIMDE_FLOAT64_C( 995.45), SIMDE_FLOAT64_C( 139.87), SIMDE_FLOAT64_C( 783.05), SIMDE_FLOAT64_C( 584.21), SIMDE_FLOAT64_C( -920.08)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -736.64), SIMDE_FLOAT64_C( 551.91), SIMDE_FLOAT64_C( -457.00), SIMDE_FLOAT64_C( -294.64), SIMDE_FLOAT64_C( -589.82), SIMDE_FLOAT64_C( 788.44), SIMDE_FLOAT64_C( -717.27), SIMDE_FLOAT64_C( 147.83)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 424.69), SIMDE_FLOAT64_C(-1508.23), SIMDE_FLOAT64_C( 705.48), SIMDE_FLOAT64_C( 1290.09), SIMDE_FLOAT64_C( 729.69), SIMDE_FLOAT64_C( -5.39), SIMDE_FLOAT64_C( 1301.48), SIMDE_FLOAT64_C(-1067.91)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 583.37), SIMDE_FLOAT64_C( 151.60), SIMDE_FLOAT64_C( -526.34), SIMDE_FLOAT64_C( -118.48), SIMDE_FLOAT64_C( -603.65), SIMDE_FLOAT64_C( -96.99), SIMDE_FLOAT64_C( -634.86), SIMDE_FLOAT64_C( 225.44)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 583.36), SIMDE_FLOAT64_C( 192.11), SIMDE_FLOAT64_C( 241.22), SIMDE_FLOAT64_C( -741.26), SIMDE_FLOAT64_C( 815.78), SIMDE_FLOAT64_C( -325.43), SIMDE_FLOAT64_C( 457.34), SIMDE_FLOAT64_C( 430.70)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( -40.51), SIMDE_FLOAT64_C( -767.56), SIMDE_FLOAT64_C( 622.78), SIMDE_FLOAT64_C(-1419.43), SIMDE_FLOAT64_C( 228.44), SIMDE_FLOAT64_C(-1092.20), SIMDE_FLOAT64_C( -205.26)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -668.75), SIMDE_FLOAT64_C( -693.34), SIMDE_FLOAT64_C( 34.22), SIMDE_FLOAT64_C( 781.55), SIMDE_FLOAT64_C( 732.13), SIMDE_FLOAT64_C( -735.61), SIMDE_FLOAT64_C( -765.87), SIMDE_FLOAT64_C( -276.25)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 5.83), SIMDE_FLOAT64_C( 767.38), SIMDE_FLOAT64_C( 251.47), SIMDE_FLOAT64_C( -790.79), SIMDE_FLOAT64_C( 317.44), SIMDE_FLOAT64_C( 889.98), SIMDE_FLOAT64_C( 932.08), SIMDE_FLOAT64_C( 879.75)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -674.58), SIMDE_FLOAT64_C(-1460.72), SIMDE_FLOAT64_C( -217.25), SIMDE_FLOAT64_C( 1572.34), SIMDE_FLOAT64_C( 414.69), SIMDE_FLOAT64_C(-1625.59), SIMDE_FLOAT64_C(-1697.95), SIMDE_FLOAT64_C(-1156.00)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 317.74), SIMDE_FLOAT64_C( -77.37), SIMDE_FLOAT64_C( 975.52), SIMDE_FLOAT64_C( 188.84), SIMDE_FLOAT64_C( -557.86), SIMDE_FLOAT64_C( 759.72), SIMDE_FLOAT64_C( -874.99), SIMDE_FLOAT64_C( 10.90)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 693.31), SIMDE_FLOAT64_C( 248.74), SIMDE_FLOAT64_C( 748.13), SIMDE_FLOAT64_C( -760.98), SIMDE_FLOAT64_C( 787.06), SIMDE_FLOAT64_C( 732.48), SIMDE_FLOAT64_C( -205.98), SIMDE_FLOAT64_C( 629.02)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -375.57), SIMDE_FLOAT64_C( -326.11), SIMDE_FLOAT64_C( 227.39), SIMDE_FLOAT64_C( 949.82), SIMDE_FLOAT64_C(-1344.92), SIMDE_FLOAT64_C( 27.24), SIMDE_FLOAT64_C( -669.01), SIMDE_FLOAT64_C( -618.12)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -679.43), SIMDE_FLOAT64_C( 282.17), SIMDE_FLOAT64_C( 993.32), SIMDE_FLOAT64_C( 821.29), SIMDE_FLOAT64_C( 165.53), SIMDE_FLOAT64_C( 519.53), SIMDE_FLOAT64_C( 873.49), SIMDE_FLOAT64_C( 728.89)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 242.15), SIMDE_FLOAT64_C( 438.99), SIMDE_FLOAT64_C( 772.28), SIMDE_FLOAT64_C( -279.74), SIMDE_FLOAT64_C( -310.93), SIMDE_FLOAT64_C( -848.99), SIMDE_FLOAT64_C( 222.85), SIMDE_FLOAT64_C( 300.16)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -921.58), SIMDE_FLOAT64_C( -156.82), SIMDE_FLOAT64_C( 221.04), SIMDE_FLOAT64_C( 1101.03), SIMDE_FLOAT64_C( 476.46), SIMDE_FLOAT64_C( 1368.52), SIMDE_FLOAT64_C( 650.64), SIMDE_FLOAT64_C( 428.73)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_sub_pd(test_vec[i].a, test_vec[i].b); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_sub_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask16 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C( -957186609), INT32_C(-1524765283), INT32_C( 1290068568), INT32_C( 1887468775), INT32_C( -904096999), INT32_C(-1189693212), INT32_C( 221355870), INT32_C(-1952779315), INT32_C( 1347985035), INT32_C(-2063939133), INT32_C(-1602582649), INT32_C(-2096850611), INT32_C(-2084994527), INT32_C( -75386963), INT32_C( 1835417512), INT32_C(-2072964471)), UINT16_C(35396), simde_mm512_set_epi32(INT32_C( 136551409), INT32_C( 1192962314), INT32_C( 2058621765), INT32_C(-2039270859), INT32_C( -26254502), INT32_C( 733381108), INT32_C( -187934344), INT32_C( 989979336), INT32_C(-1964919382), INT32_C( 126554293), INT32_C( 254011928), INT32_C( 1490517506), INT32_C(-1065486850), INT32_C( 45941921), INT32_C(-1082899768), INT32_C( -219628031)), simde_mm512_set_epi32(INT32_C( -680185335), INT32_C( 111102276), INT32_C( 1222454066), INT32_C( -422241261), INT32_C( -78061198), INT32_C(-2084414007), INT32_C( 1367041146), INT32_C(-1471398421), INT32_C( -348147705), INT32_C( 673564238), INT32_C(-1457376577), INT32_C( 613875036), INT32_C( -859069431), INT32_C( -733638834), INT32_C(-1673403701), INT32_C( 842474288)), simde_mm512_set_epi32(INT32_C( 816736744), INT32_C(-1524765283), INT32_C( 1290068568), INT32_C( 1887468775), INT32_C( 51806696), INT32_C(-1189693212), INT32_C(-1554975490), INT32_C(-1952779315), INT32_C( 1347985035), INT32_C( -547009945), INT32_C(-1602582649), INT32_C(-2096850611), INT32_C(-2084994527), INT32_C( 779580755), INT32_C( 1835417512), INT32_C(-2072964471)) }, { simde_mm512_set_epi32(INT32_C( 2077489237), INT32_C(-2066152618), INT32_C( -825076901), INT32_C(-1372574642), INT32_C( -889460158), INT32_C( 498921453), INT32_C( 943332338), INT32_C(-1383811831), INT32_C( 1827152592), INT32_C( 1728034912), INT32_C( 1350913629), INT32_C( 868578809), INT32_C( 1368636899), INT32_C( -389235219), INT32_C( 602990700), INT32_C( -400551366)), UINT16_C(47779), simde_mm512_set_epi32(INT32_C( 1704357216), INT32_C( -538157327), INT32_C( 1370875608), INT32_C( 1508504457), INT32_C( -68294915), INT32_C(-1479685367), INT32_C( -615597542), INT32_C(-1638887359), INT32_C(-1417912572), INT32_C( 1479002949), INT32_C( -647118153), INT32_C( 1670566025), INT32_C(-1880268561), INT32_C(-1083232065), INT32_C( 2092339698), INT32_C(-1021873283)), simde_mm512_set_epi32(INT32_C( -839277498), INT32_C( 551588590), INT32_C( 1834572496), INT32_C( 1613035598), INT32_C(-1678404828), INT32_C(-1769391216), INT32_C(-1638931514), INT32_C( 156804649), INT32_C( 1764158657), INT32_C( -132604621), INT32_C( 446542816), INT32_C( 2037189710), INT32_C( 109296986), INT32_C( 257019297), INT32_C( 473079611), INT32_C( 1127076998)), simde_mm512_set_epi32(INT32_C(-1751332582), INT32_C(-2066152618), INT32_C( -463696888), INT32_C( -104531141), INT32_C( 1610109913), INT32_C( 498921453), INT32_C( 1023333972), INT32_C(-1383811831), INT32_C( 1112896067), INT32_C( 1728034912), INT32_C(-1093660969), INT32_C( 868578809), INT32_C( 1368636899), INT32_C( -389235219), INT32_C( 1619260087), INT32_C( 2146017015)) }, { simde_mm512_set_epi32(INT32_C( 307630641), INT32_C(-1560148595), INT32_C( 376284729), INT32_C( 278591183), INT32_C( -277186219), INT32_C( 1940926671), INT32_C( 662058232), INT32_C( 1091202812), INT32_C( -701136301), INT32_C( -504607320), INT32_C( -251380880), INT32_C( 1860616049), INT32_C(-1752161866), INT32_C(-1199997313), INT32_C(-1668691262), INT32_C( 1717921298)), UINT16_C( 2459), simde_mm512_set_epi32(INT32_C( 2079917891), INT32_C(-1199015072), INT32_C( -98602729), INT32_C( -930567988), INT32_C(-1256209763), INT32_C( 1068967165), INT32_C( 1289079409), INT32_C( 1251085533), INT32_C( -727360546), INT32_C(-1724797341), INT32_C( 2093813635), INT32_C( 1051617285), INT32_C( 1264716001), INT32_C( 940727836), INT32_C( 1722577424), INT32_C(-1275657732)), simde_mm512_set_epi32(INT32_C( 671797033), INT32_C(-1012795446), INT32_C( 2106088193), INT32_C( -458612579), INT32_C( -261772865), INT32_C( -550994046), INT32_C( 2105186719), INT32_C( 1074097751), INT32_C(-1251411324), INT32_C( 65867416), INT32_C(-1495248139), INT32_C( 315553116), INT32_C(-1869712369), INT32_C(-1246794510), INT32_C( 1218370652), INT32_C( -240388126)), simde_mm512_set_epi32(INT32_C( 307630641), INT32_C(-1560148595), INT32_C( 376284729), INT32_C( 278591183), INT32_C( -994436898), INT32_C( 1940926671), INT32_C( 662058232), INT32_C( 176987782), INT32_C( 524050778), INT32_C( -504607320), INT32_C( -251380880), INT32_C( 736064169), INT32_C(-1160538926), INT32_C(-1199997313), INT32_C( 504206772), INT32_C(-1035269606)) }, { simde_mm512_set_epi32(INT32_C( -789716549), INT32_C(-1932674309), INT32_C( 548470804), INT32_C( -318652401), INT32_C(-2041118423), INT32_C(-2107945718), INT32_C( -715661009), INT32_C( 1609073505), INT32_C( 1214609500), INT32_C( 283085327), INT32_C(-1633515677), INT32_C( 1697029857), INT32_C( 1976447422), INT32_C( 904412076), INT32_C( 1198927422), INT32_C(-1498026761)), UINT16_C(54315), simde_mm512_set_epi32(INT32_C( 1385182319), INT32_C( 795273310), INT32_C( 1955628796), INT32_C( -526907127), INT32_C(-2141025282), INT32_C( -931446405), INT32_C(-1422139726), INT32_C(-1101084337), INT32_C( -254080461), INT32_C( -595291883), INT32_C( 1292692652), INT32_C(-1849951866), INT32_C( -815091127), INT32_C( 370112774), INT32_C( -520479179), INT32_C( 1681391452)), simde_mm512_set_epi32(INT32_C(-1825216267), INT32_C( 1555513845), INT32_C(-2081576252), INT32_C(-1972081268), INT32_C( -563427058), INT32_C( 1922040193), INT32_C(-2102270715), INT32_C(-1257264155), INT32_C( -894851768), INT32_C( 1793334666), INT32_C( 1049305530), INT32_C(-1935379009), INT32_C( -8279361), INT32_C(-1567490719), INT32_C(-2014130513), INT32_C(-1826154506)), simde_mm512_set_epi32(INT32_C(-1084568710), INT32_C( -760240535), INT32_C( 548470804), INT32_C( 1445174141), INT32_C(-2041118423), INT32_C( 1441480698), INT32_C( -715661009), INT32_C( 1609073505), INT32_C( 1214609500), INT32_C( 283085327), INT32_C( 243387122), INT32_C( 1697029857), INT32_C( -806811766), INT32_C( 904412076), INT32_C( 1493651334), INT32_C( -787421338)) }, { simde_mm512_set_epi32(INT32_C( 997407681), INT32_C( -83308341), INT32_C( 1430458288), INT32_C( -655910274), INT32_C( 17159218), INT32_C( 197891822), INT32_C( -82165524), INT32_C( 98130061), INT32_C( -696255503), INT32_C( 616388941), INT32_C( 1383637516), INT32_C( 255219509), INT32_C(-1280964183), INT32_C(-1753221031), INT32_C( 480974923), INT32_C(-1444611560)), UINT16_C(47568), simde_mm512_set_epi32(INT32_C(-1796791424), INT32_C( 919413682), INT32_C( 907613991), INT32_C(-1471064632), INT32_C(-2017464794), INT32_C( -67778959), INT32_C(-1033884668), INT32_C( -839095279), INT32_C( -881742684), INT32_C( 1193890045), INT32_C( -817450648), INT32_C( -450889209), INT32_C(-1829442769), INT32_C( -254239276), INT32_C( 1531184539), INT32_C( 204100550)), simde_mm512_set_epi32(INT32_C(-1574624316), INT32_C( 1965632168), INT32_C( -507137262), INT32_C( 868285762), INT32_C( -287712967), INT32_C(-1275855491), INT32_C(-1948986373), INT32_C( 378189270), INT32_C( 2028975029), INT32_C( -983819985), INT32_C(-1530834794), INT32_C( -267906659), INT32_C( 2013371063), INT32_C( -972550977), INT32_C(-1345658151), INT32_C(-2001069348)), simde_mm512_set_epi32(INT32_C( -222167108), INT32_C( -83308341), INT32_C( 1414751253), INT32_C( 1955616902), INT32_C(-1729751827), INT32_C( 197891822), INT32_C( -82165524), INT32_C(-1217284549), INT32_C( 1384249583), INT32_C(-2117257266), INT32_C( 1383637516), INT32_C( -182982550), INT32_C(-1280964183), INT32_C(-1753221031), INT32_C( 480974923), INT32_C(-1444611560)) }, { simde_mm512_set_epi32(INT32_C( 1875288432), INT32_C( 1158027251), INT32_C( -303056299), INT32_C( -939396673), INT32_C( 1585003262), INT32_C( 1365783459), INT32_C( 111845672), INT32_C(-1286713478), INT32_C( 674624782), INT32_C( 2020528740), INT32_C( 497192398), INT32_C( 1112540789), INT32_C(-1764167278), INT32_C(-1540772359), INT32_C( 395629026), INT32_C( 984304916)), UINT16_C(16877), simde_mm512_set_epi32(INT32_C( -344292944), INT32_C( 1968428151), INT32_C( 2086978939), INT32_C( 1501910543), INT32_C(-1262393002), INT32_C( 2081469023), INT32_C( 2016768793), INT32_C( 1922434397), INT32_C( -253304624), INT32_C( 515280842), INT32_C(-1708348294), INT32_C( 2107558843), INT32_C( 1919035054), INT32_C( 1742835915), INT32_C( 989439209), INT32_C( 2080310116)), simde_mm512_set_epi32(INT32_C( 1560352883), INT32_C( -937050525), INT32_C( 15000953), INT32_C( 298895006), INT32_C( -255287325), INT32_C( -851082971), INT32_C( -981170631), INT32_C( 30364523), INT32_C( -626854551), INT32_C( 1776719697), INT32_C(-1286673883), INT32_C( 2134458392), INT32_C(-1884377437), INT32_C(-2042525337), INT32_C( 2143156805), INT32_C(-1045267304)), simde_mm512_set_epi32(INT32_C( 1875288432), INT32_C(-1389488620), INT32_C( -303056299), INT32_C( -939396673), INT32_C( 1585003262), INT32_C( 1365783459), INT32_C( 111845672), INT32_C( 1892069874), INT32_C( 373549927), INT32_C(-1261438855), INT32_C( -421674411), INT32_C( 1112540789), INT32_C( -491554805), INT32_C( -509606044), INT32_C( 395629026), INT32_C(-1169389876)) }, { simde_mm512_set_epi32(INT32_C( 726531409), INT32_C( -606374582), INT32_C(-1057918709), INT32_C( -811736744), INT32_C(-1460245574), INT32_C( -627872087), INT32_C( 1799586442), INT32_C(-1105519928), INT32_C(-1288829692), INT32_C(-2144392739), INT32_C( 1110910857), INT32_C( -282270116), INT32_C(-1420141426), INT32_C( 1682561587), INT32_C( 1308021682), INT32_C( 712875579)), UINT16_C(17567), simde_mm512_set_epi32(INT32_C(-1065890522), INT32_C( 1362887862), INT32_C(-1905482051), INT32_C( 174767211), INT32_C( 1968089357), INT32_C(-1207243832), INT32_C( -701927204), INT32_C(-1701909648), INT32_C(-1822821880), INT32_C(-1418686446), INT32_C( 2002979046), INT32_C( -531029674), INT32_C( -233545704), INT32_C( 1270923539), INT32_C( -515398077), INT32_C( 870828526)), simde_mm512_set_epi32(INT32_C(-1161246521), INT32_C(-1263382687), INT32_C( -761171059), INT32_C( 1052537110), INT32_C(-1225204820), INT32_C( 1299827393), INT32_C( 477328169), INT32_C( 2043159101), INT32_C( 984199920), INT32_C( 1963689737), INT32_C(-1149812166), INT32_C( -500241318), INT32_C( -953270640), INT32_C( 1180984926), INT32_C( -645305643), INT32_C( 1026486800)), simde_mm512_set_epi32(INT32_C( 726531409), INT32_C(-1668696747), INT32_C(-1057918709), INT32_C( -811736744), INT32_C(-1460245574), INT32_C( 1787896071), INT32_C( 1799586442), INT32_C(-1105519928), INT32_C( 1487945496), INT32_C(-2144392739), INT32_C( 1110910857), INT32_C( -30788356), INT32_C( 719724936), INT32_C( 89938613), INT32_C( 129907566), INT32_C( -155658274)) }, { simde_mm512_set_epi32(INT32_C( 1723004290), INT32_C( 721161302), INT32_C( 1077400739), INT32_C( 861837752), INT32_C(-1943224858), INT32_C( 2112602876), INT32_C(-1445821889), INT32_C(-2100432693), INT32_C(-1175934343), INT32_C( 805502143), INT32_C( 1163969458), INT32_C( 873642413), INT32_C( 2052720739), INT32_C(-1010971457), INT32_C( 199344228), INT32_C( 251460647)), UINT16_C(59134), simde_mm512_set_epi32(INT32_C(-1391704351), INT32_C( -847303025), INT32_C(-1711491580), INT32_C( -147993971), INT32_C(-1140349230), INT32_C( 172650828), INT32_C(-2090294261), INT32_C( -216506888), INT32_C(-1813744120), INT32_C( 1589656338), INT32_C( 1010967585), INT32_C(-2076714127), INT32_C( 1156626662), INT32_C( -264321123), INT32_C(-1099385436), INT32_C( -148901794)), simde_mm512_set_epi32(INT32_C( 1003282629), INT32_C( 1250297288), INT32_C( 26548422), INT32_C(-1100962758), INT32_C( 1934048830), INT32_C( -886200980), INT32_C( -228926178), INT32_C( 21722717), INT32_C(-1321187708), INT32_C( 904822803), INT32_C( -875700432), INT32_C(-1302414558), INT32_C( 962131440), INT32_C( -729214075), INT32_C(-1094266114), INT32_C( 1122895720)), simde_mm512_set_epi32(INT32_C( 1899980316), INT32_C(-2097600313), INT32_C(-1738040002), INT32_C( 861837752), INT32_C(-1943224858), INT32_C( 1058851808), INT32_C(-1861368083), INT32_C(-2100432693), INT32_C( -492556412), INT32_C( 684833535), INT32_C( 1886668017), INT32_C( -774299569), INT32_C( 194495222), INT32_C( 464892952), INT32_C( -5119322), INT32_C( 251460647)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_sub_epi32(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_sub_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask8 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C( 8894478799917719473), INT64_C(-7614529333518044459), INT64_C( 8458392650500739529), INT64_C( 7085639313865748967), INT64_C(-7547504459018552290), INT64_C(-8310189466716392279), INT64_C(-1750715323825344235), INT64_C(-2532781790488219528)), UINT8_C(106), simde_mm512_set_epi64(INT64_C(-7192427816606966254), INT64_C(-1619523557840103557), INT64_C( 7616061596213068646), INT64_C( -560841280842371832), INT64_C( -806373115982863580), INT64_C( -816793021936842074), INT64_C( -317565234288882547), INT64_C(-7290553309909260368)), simde_mm512_set_epi64(INT64_C(-9084839040863053259), INT64_C( 332697972184433101), INT64_C(-8959492887484217950), INT64_C( 7617292932467329680), INT64_C(-2740045277871922718), INT64_C(-3634413508032825567), INT64_C( -448440935066054877), INT64_C(-6805574594168851327)), simde_mm512_set_epi64(INT64_C( 8894478799917719473), INT64_C(-1952221530024536658), INT64_C(-1871189590012265020), INT64_C( 7085639313865748967), INT64_C( 1933672161889059138), INT64_C(-8310189466716392279), INT64_C( 130875700777172330), INT64_C(-2532781790488219528)) }, { simde_mm512_set_epi64(INT64_C(-3459089877760882917), INT64_C( 1753327656617706405), INT64_C( 3932187030396497555), INT64_C(-4341921971190139713), INT64_C(-7354864635860030437), INT64_C(-7512931671900842140), INT64_C( 7677521206664265888), INT64_C(-8008068901606036732)), UINT8_C( 1), simde_mm512_set_epi64(INT64_C(-9084086707853197365), INT64_C( 5962789269656503800), INT64_C( 6806616562165680967), INT64_C( 8724516399523474076), INT64_C( -924171789017863248), INT64_C(-2255835938032964673), INT64_C(-4560088794132063361), INT64_C(-5517329800302195238)), simde_mm512_set_epi64(INT64_C(-2849655299932577704), INT64_C( 2712991932590941674), INT64_C( 2564329750539599066), INT64_C(-4536455326234991583), INT64_C(-6477728239233614839), INT64_C(-5729565646249538826), INT64_C( 3092410715614407585), INT64_C( 7984397770129184299)), simde_mm512_set_epi64(INT64_C(-3459089877760882917), INT64_C( 1753327656617706405), INT64_C( 3932187030396497555), INT64_C(-4341921971190139713), INT64_C(-7354864635860030437), INT64_C(-7512931671900842140), INT64_C( 7677521206664265888), INT64_C( 4945016503278172079)) }, { simde_mm512_set_epi64(INT64_C( -240340334077349403), INT64_C( 5647038489743797240), INT64_C( 5171415873092064400), INT64_C(-1851380595205120917), INT64_C( -836370148956202078), INT64_C( 8425549504970400810), INT64_C( 2808549870315159479), INT64_C( 3545474415643732634)), UINT8_C(194), simde_mm512_set_epi64(INT64_C(-5877702108931305293), INT64_C(-5372639016544358566), INT64_C(-4535660820549680684), INT64_C(-6747544612783901147), INT64_C( 6705850594648382655), INT64_C(-1906321743942105225), INT64_C( -281981608123407868), INT64_C(-5990711758326206044)), simde_mm512_set_epi64(INT64_C( 8110080903340414341), INT64_C(-3598578875674169061), INT64_C( 4977285870543484474), INT64_C( 6776152673642620958), INT64_C( 4245929756722282054), INT64_C( 3649495924615361625), INT64_C( -638056186877872345), INT64_C(-8828385988165140326)), simde_mm512_set_epi64(INT64_C( 4458961061437831982), INT64_C(-1774060140870189505), INT64_C( 5171415873092064400), INT64_C(-1851380595205120917), INT64_C( -836370148956202078), INT64_C( 8425549504970400810), INT64_C( 356074578754464477), INT64_C( 3545474415643732634)) }, { simde_mm512_set_epi64(INT64_C(-6385979888474332285), INT64_C( 3716758445629922885), INT64_C( 7861010731589253148), INT64_C(-6334773111204875550), INT64_C(-5054960975820633825), INT64_C( 8639514840721539279), INT64_C(-1027366943904624518), INT64_C(-4721195859159142702)), UINT8_C(222), simde_mm512_set_epi64(INT64_C(-7001132877809342173), INT64_C( 6512733899690414848), INT64_C( 988878120815000883), INT64_C(-5994563704199492012), INT64_C( 1587634372980811194), INT64_C( -914749563856678715), INT64_C( 7495962388934953888), INT64_C(-7831181051188885332)), simde_mm512_set_epi64(INT64_C( 4229507402435677476), INT64_C( 2501842736425447642), INT64_C( 8009397189160901283), INT64_C( 3833558633773719409), INT64_C( 2852442819818074174), INT64_C(-8638015813272823849), INT64_C( 8579593880416924807), INT64_C( 2713766728753976690)), simde_mm512_set_epi64(INT64_C( 7216103793464531967), INT64_C( 4010891163264967206), INT64_C( 7861010731589253148), INT64_C( 8618621735736340195), INT64_C(-1264808446837262980), INT64_C( 7723266249416145134), INT64_C(-1083631491481970919), INT64_C(-4721195859159142702)) }, { simde_mm512_set_epi64(INT64_C( 4051614369896270101), INT64_C( 6703896128856670897), INT64_C(-5750389130785475983), INT64_C(-7878547924784098469), INT64_C( 5491867996743881624), INT64_C(-2189602113514909499), INT64_C( -887220462507309287), INT64_C(-5733898489940979010)), UINT8_C( 26), simde_mm512_set_epi64(INT64_C( -99656633840764240), INT64_C(-3479731851565468885), INT64_C(-7074577238264434881), INT64_C(-3836339826871533273), INT64_C( 4198283975631841849), INT64_C(-3829622956767240841), INT64_C( 5960966148924368684), INT64_C( -504125670847055963)), simde_mm512_set_epi64(INT64_C(-8344319212574510912), INT64_C(-3371415321000668561), INT64_C(-8338525176508042897), INT64_C( 5173420397567361383), INT64_C(-6751809518396836721), INT64_C(-8388491552134432960), INT64_C(-9161028627110906680), INT64_C( 7472048750700349549)), simde_mm512_set_epi64(INT64_C( 4051614369896270101), INT64_C( 6703896128856670897), INT64_C(-5750389130785475983), INT64_C(-9009760224438894656), INT64_C(-7496650579680873046), INT64_C(-2189602113514909499), INT64_C(-3324749297674276252), INT64_C(-5733898489940979010)) }, { simde_mm512_set_epi64(INT64_C(-6378393891104748170), INT64_C(-8478287659785501826), INT64_C(-2127236125072242134), INT64_C( 8702738982982040445), INT64_C( 645844328650761785), INT64_C(-4561773442934600720), INT64_C(-5793568656482259588), INT64_C( -379681413311801170)), UINT8_C(230), simde_mm512_set_epi64(INT64_C( -848706848545220792), INT64_C(-1124075123789220737), INT64_C(-2005439629632543252), INT64_C( 8274388146286059619), INT64_C( -261550962782015927), INT64_C(-8761037216848109215), INT64_C(-3016365966836321630), INT64_C( 2543055264688040393)), simde_mm512_set_epi64(INT64_C( 1583638370136684317), INT64_C(-1184919915070849427), INT64_C( 6948286910398693964), INT64_C( 2437457976149582578), INT64_C( 3426542754873284897), INT64_C(-7983270512780038531), INT64_C( 1779296328975282374), INT64_C(-5362999871220584978)), simde_mm512_set_epi64(INT64_C(-2432345218681905109), INT64_C( 60844791281628690), INT64_C(-8953726540031237216), INT64_C( 8702738982982040445), INT64_C( 645844328650761785), INT64_C( -777766704068070684), INT64_C(-4795662295811604004), INT64_C( -379681413311801170)) }, { simde_mm512_set_epi64(INT64_C(-2563692560784467599), INT64_C(-2764729313181954331), INT64_C( 7449793955604076666), INT64_C(-6302011830015535814), INT64_C(-5919077484698028869), INT64_C(-6127059769393124093), INT64_C( 2958642729945465911), INT64_C( 2772140786646472311)), UINT8_C(198), simde_mm512_set_epi64(INT64_C(-3934991658845807023), INT64_C( 7561755153516237296), INT64_C(-1521478373140770922), INT64_C( 6956443634033398294), INT64_C(-5307063963483146371), INT64_C( 6556039892370535969), INT64_C(-6645788521893978945), INT64_C(-6307512051127595595)), simde_mm512_set_epi64(INT64_C(-7270561721689602230), INT64_C( 8935792808270452615), INT64_C( 1984489943341614372), INT64_C( 6860868624136070257), INT64_C(-2243581398369652256), INT64_C(-6592818671779181804), INT64_C( -308663241436655846), INT64_C(-8935526257161672911)), simde_mm512_set_epi64(INT64_C( 3335570062843795207), INT64_C(-1374037654754215319), INT64_C( 7449793955604076666), INT64_C(-6302011830015535814), INT64_C(-5919077484698028869), INT64_C(-5297885509559833843), INT64_C(-6337125280457323099), INT64_C( 2772140786646472311)) }, { simde_mm512_set_epi64(INT64_C(-7511866029206584895), INT64_C( 6685003933657692663), INT64_C( 112057327023275278), INT64_C( 2785131907782223781), INT64_C( -403719025987547254), INT64_C(-5974279397850363938), INT64_C(-6601571580489345254), INT64_C( 1896379997419403836)), UINT8_C( 70), simde_mm512_set_epi64(INT64_C(-6334367433946281110), INT64_C(-5840485098030444461), INT64_C(-6383956557021185117), INT64_C(-7600398675722821668), INT64_C(-2279362749413199885), INT64_C(-8009539466982888201), INT64_C( 340327559398526723), INT64_C(-2438629088141247826)), simde_mm512_set_epi64(INT64_C( 3758222621544461478), INT64_C( 8264387002851618510), INT64_C( 5256515298231032169), INT64_C( 4555501816451377355), INT64_C(-9184304616258229288), INT64_C( 5115688705834988612), INT64_C(-3795492187184599084), INT64_C(-3221204559120447653)), simde_mm512_set_epi64(INT64_C(-7511866029206584895), INT64_C( 4341871972827488645), INT64_C( 112057327023275278), INT64_C( 2785131907782223781), INT64_C( -403719025987547254), INT64_C( 5321515900891674803), INT64_C( 4135819746583125807), INT64_C( 1896379997419403836)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_sub_epi64(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_sub_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 src; simde__mmask16 k; simde__m512 a; simde__m512 b; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -417.79), SIMDE_FLOAT32_C( -912.83), SIMDE_FLOAT32_C( 111.29), SIMDE_FLOAT32_C( -470.87), SIMDE_FLOAT32_C( 685.45), SIMDE_FLOAT32_C( -92.85), SIMDE_FLOAT32_C( 704.55), SIMDE_FLOAT32_C( 450.79), SIMDE_FLOAT32_C( -761.01), SIMDE_FLOAT32_C( -759.35), SIMDE_FLOAT32_C( 646.77), SIMDE_FLOAT32_C( 616.33), SIMDE_FLOAT32_C( 922.76), SIMDE_FLOAT32_C( 721.94), SIMDE_FLOAT32_C( 721.78), SIMDE_FLOAT32_C( 651.66)), UINT16_C(55049), simde_mm512_set_ps(SIMDE_FLOAT32_C( 492.15), SIMDE_FLOAT32_C( 363.86), SIMDE_FLOAT32_C( -906.93), SIMDE_FLOAT32_C( -51.88), SIMDE_FLOAT32_C( 976.36), SIMDE_FLOAT32_C( 844.84), SIMDE_FLOAT32_C( 525.57), SIMDE_FLOAT32_C( 575.43), SIMDE_FLOAT32_C( -719.61), SIMDE_FLOAT32_C( 570.91), SIMDE_FLOAT32_C( -748.06), SIMDE_FLOAT32_C( 823.89), SIMDE_FLOAT32_C( -708.11), SIMDE_FLOAT32_C( -805.87), SIMDE_FLOAT32_C( 626.28), SIMDE_FLOAT32_C( 344.43)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -814.48), SIMDE_FLOAT32_C( 843.19), SIMDE_FLOAT32_C( -866.28), SIMDE_FLOAT32_C( -230.51), SIMDE_FLOAT32_C( -264.51), SIMDE_FLOAT32_C( 935.39), SIMDE_FLOAT32_C( 479.68), SIMDE_FLOAT32_C( -375.52), SIMDE_FLOAT32_C( -928.92), SIMDE_FLOAT32_C( -243.75), SIMDE_FLOAT32_C( 771.60), SIMDE_FLOAT32_C( 150.31), SIMDE_FLOAT32_C( -627.83), SIMDE_FLOAT32_C( -720.61), SIMDE_FLOAT32_C( 345.13), SIMDE_FLOAT32_C( 203.00)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1306.63), SIMDE_FLOAT32_C( -479.33), SIMDE_FLOAT32_C( 111.29), SIMDE_FLOAT32_C( 178.63), SIMDE_FLOAT32_C( 685.45), SIMDE_FLOAT32_C( -90.55), SIMDE_FLOAT32_C( 45.89), SIMDE_FLOAT32_C( 950.95), SIMDE_FLOAT32_C( -761.01), SIMDE_FLOAT32_C( -759.35), SIMDE_FLOAT32_C( 646.77), SIMDE_FLOAT32_C( 616.33), SIMDE_FLOAT32_C( -80.28), SIMDE_FLOAT32_C( 721.94), SIMDE_FLOAT32_C( 721.78), SIMDE_FLOAT32_C( 141.43)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -594.79), SIMDE_FLOAT32_C( -68.26), SIMDE_FLOAT32_C( 772.68), SIMDE_FLOAT32_C( -615.12), SIMDE_FLOAT32_C( 489.20), SIMDE_FLOAT32_C( -609.74), SIMDE_FLOAT32_C( -297.42), SIMDE_FLOAT32_C( -701.58), SIMDE_FLOAT32_C( 71.34), SIMDE_FLOAT32_C( -811.20), SIMDE_FLOAT32_C( -44.61), SIMDE_FLOAT32_C( 172.32), SIMDE_FLOAT32_C( -336.24), SIMDE_FLOAT32_C( -959.77), SIMDE_FLOAT32_C( 896.40), SIMDE_FLOAT32_C( 321.28)), UINT16_C( 2266), simde_mm512_set_ps(SIMDE_FLOAT32_C( 136.73), SIMDE_FLOAT32_C( 408.70), SIMDE_FLOAT32_C( 907.04), SIMDE_FLOAT32_C( 175.32), SIMDE_FLOAT32_C( 125.78), SIMDE_FLOAT32_C( -176.42), SIMDE_FLOAT32_C( -192.20), SIMDE_FLOAT32_C( 636.29), SIMDE_FLOAT32_C( -812.72), SIMDE_FLOAT32_C( -295.02), SIMDE_FLOAT32_C( 426.00), SIMDE_FLOAT32_C( 348.29), SIMDE_FLOAT32_C( 859.20), SIMDE_FLOAT32_C( -28.95), SIMDE_FLOAT32_C( -637.06), SIMDE_FLOAT32_C( -450.15)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -929.87), SIMDE_FLOAT32_C( -208.53), SIMDE_FLOAT32_C( 561.71), SIMDE_FLOAT32_C( -74.05), SIMDE_FLOAT32_C( 477.79), SIMDE_FLOAT32_C( 772.49), SIMDE_FLOAT32_C( 648.48), SIMDE_FLOAT32_C( -58.61), SIMDE_FLOAT32_C( 835.38), SIMDE_FLOAT32_C( -689.00), SIMDE_FLOAT32_C( 607.03), SIMDE_FLOAT32_C( 421.78), SIMDE_FLOAT32_C( -574.15), SIMDE_FLOAT32_C( 302.76), SIMDE_FLOAT32_C( 178.11), SIMDE_FLOAT32_C( -298.57)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -594.79), SIMDE_FLOAT32_C( -68.26), SIMDE_FLOAT32_C( 772.68), SIMDE_FLOAT32_C( -615.12), SIMDE_FLOAT32_C( -352.01), SIMDE_FLOAT32_C( -609.74), SIMDE_FLOAT32_C( -297.42), SIMDE_FLOAT32_C( -701.58), SIMDE_FLOAT32_C( -1648.10), SIMDE_FLOAT32_C( 393.98), SIMDE_FLOAT32_C( -44.61), SIMDE_FLOAT32_C( -73.49), SIMDE_FLOAT32_C( 1433.35), SIMDE_FLOAT32_C( -959.77), SIMDE_FLOAT32_C( -815.17), SIMDE_FLOAT32_C( 321.28)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -914.76), SIMDE_FLOAT32_C( 285.68), SIMDE_FLOAT32_C( 695.03), SIMDE_FLOAT32_C( -235.78), SIMDE_FLOAT32_C( 90.17), SIMDE_FLOAT32_C( 891.02), SIMDE_FLOAT32_C( -456.46), SIMDE_FLOAT32_C( 952.55), SIMDE_FLOAT32_C( -153.33), SIMDE_FLOAT32_C( -533.35), SIMDE_FLOAT32_C( -130.02), SIMDE_FLOAT32_C( -580.21), SIMDE_FLOAT32_C( -857.73), SIMDE_FLOAT32_C( -362.64), SIMDE_FLOAT32_C( 808.25), SIMDE_FLOAT32_C( 908.95)), UINT16_C(53407), simde_mm512_set_ps(SIMDE_FLOAT32_C( 415.38), SIMDE_FLOAT32_C( 622.33), SIMDE_FLOAT32_C( 849.49), SIMDE_FLOAT32_C( -552.97), SIMDE_FLOAT32_C( 837.01), SIMDE_FLOAT32_C( -753.98), SIMDE_FLOAT32_C( 167.51), SIMDE_FLOAT32_C( 898.60), SIMDE_FLOAT32_C( -36.68), SIMDE_FLOAT32_C( -931.19), SIMDE_FLOAT32_C( 230.22), SIMDE_FLOAT32_C( -885.80), SIMDE_FLOAT32_C( -894.49), SIMDE_FLOAT32_C( -402.23), SIMDE_FLOAT32_C( -68.60), SIMDE_FLOAT32_C( -153.88)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 247.18), SIMDE_FLOAT32_C( 507.40), SIMDE_FLOAT32_C( -715.17), SIMDE_FLOAT32_C( 785.48), SIMDE_FLOAT32_C( -543.41), SIMDE_FLOAT32_C( 761.08), SIMDE_FLOAT32_C( 479.07), SIMDE_FLOAT32_C( -938.93), SIMDE_FLOAT32_C( -655.56), SIMDE_FLOAT32_C( 618.55), SIMDE_FLOAT32_C( 224.83), SIMDE_FLOAT32_C( -983.99), SIMDE_FLOAT32_C( -18.22), SIMDE_FLOAT32_C( -142.62), SIMDE_FLOAT32_C( 120.01), SIMDE_FLOAT32_C( 186.92)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 168.20), SIMDE_FLOAT32_C( 114.93), SIMDE_FLOAT32_C( 695.03), SIMDE_FLOAT32_C( -1338.45), SIMDE_FLOAT32_C( 90.17), SIMDE_FLOAT32_C( 891.02), SIMDE_FLOAT32_C( -456.46), SIMDE_FLOAT32_C( 952.55), SIMDE_FLOAT32_C( 618.88), SIMDE_FLOAT32_C( -533.35), SIMDE_FLOAT32_C( -130.02), SIMDE_FLOAT32_C( 98.19), SIMDE_FLOAT32_C( -876.27), SIMDE_FLOAT32_C( -259.61), SIMDE_FLOAT32_C( -188.61), SIMDE_FLOAT32_C( -340.80)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -586.97), SIMDE_FLOAT32_C( -706.71), SIMDE_FLOAT32_C( 862.31), SIMDE_FLOAT32_C( 901.76), SIMDE_FLOAT32_C( -777.23), SIMDE_FLOAT32_C( -615.23), SIMDE_FLOAT32_C( 540.06), SIMDE_FLOAT32_C( -837.05), SIMDE_FLOAT32_C( 896.68), SIMDE_FLOAT32_C( -818.79), SIMDE_FLOAT32_C( -146.21), SIMDE_FLOAT32_C( -751.20), SIMDE_FLOAT32_C( -724.86), SIMDE_FLOAT32_C( -446.10), SIMDE_FLOAT32_C( 747.21), SIMDE_FLOAT32_C( -830.22)), UINT16_C(24145), simde_mm512_set_ps(SIMDE_FLOAT32_C( 809.72), SIMDE_FLOAT32_C( -191.45), SIMDE_FLOAT32_C( -687.88), SIMDE_FLOAT32_C( -561.69), SIMDE_FLOAT32_C( 623.06), SIMDE_FLOAT32_C( -685.16), SIMDE_FLOAT32_C( 155.59), SIMDE_FLOAT32_C( -91.67), SIMDE_FLOAT32_C( -292.32), SIMDE_FLOAT32_C( 436.29), SIMDE_FLOAT32_C( 682.53), SIMDE_FLOAT32_C( -427.71), SIMDE_FLOAT32_C( -252.26), SIMDE_FLOAT32_C( -814.33), SIMDE_FLOAT32_C( -116.78), SIMDE_FLOAT32_C( -176.18)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -476.63), SIMDE_FLOAT32_C( -403.49), SIMDE_FLOAT32_C( -129.06), SIMDE_FLOAT32_C( -540.32), SIMDE_FLOAT32_C( -296.84), SIMDE_FLOAT32_C( 354.93), SIMDE_FLOAT32_C( 301.70), SIMDE_FLOAT32_C( 818.26), SIMDE_FLOAT32_C( 152.41), SIMDE_FLOAT32_C( -7.33), SIMDE_FLOAT32_C( 901.12), SIMDE_FLOAT32_C( 276.49), SIMDE_FLOAT32_C( -421.45), SIMDE_FLOAT32_C( -19.17), SIMDE_FLOAT32_C( 559.47), SIMDE_FLOAT32_C( -62.60)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -586.97), SIMDE_FLOAT32_C( 212.04), SIMDE_FLOAT32_C( 862.31), SIMDE_FLOAT32_C( -21.37), SIMDE_FLOAT32_C( 919.90), SIMDE_FLOAT32_C( -1040.09), SIMDE_FLOAT32_C( -146.11), SIMDE_FLOAT32_C( -837.05), SIMDE_FLOAT32_C( 896.68), SIMDE_FLOAT32_C( 443.62), SIMDE_FLOAT32_C( -146.21), SIMDE_FLOAT32_C( -704.20), SIMDE_FLOAT32_C( -724.86), SIMDE_FLOAT32_C( -446.10), SIMDE_FLOAT32_C( 747.21), SIMDE_FLOAT32_C( -113.58)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 853.44), SIMDE_FLOAT32_C( 804.93), SIMDE_FLOAT32_C( 753.54), SIMDE_FLOAT32_C( 129.42), SIMDE_FLOAT32_C( -911.24), SIMDE_FLOAT32_C( -795.01), SIMDE_FLOAT32_C( -264.21), SIMDE_FLOAT32_C( 110.23), SIMDE_FLOAT32_C( 779.42), SIMDE_FLOAT32_C( 756.19), SIMDE_FLOAT32_C( -61.94), SIMDE_FLOAT32_C( -845.71), SIMDE_FLOAT32_C( 522.75), SIMDE_FLOAT32_C( 703.06), SIMDE_FLOAT32_C( 989.80), SIMDE_FLOAT32_C( 594.14)), UINT16_C(58122), simde_mm512_set_ps(SIMDE_FLOAT32_C( 774.43), SIMDE_FLOAT32_C( 251.56), SIMDE_FLOAT32_C( -915.66), SIMDE_FLOAT32_C( -492.31), SIMDE_FLOAT32_C( 722.32), SIMDE_FLOAT32_C( 853.19), SIMDE_FLOAT32_C( 466.28), SIMDE_FLOAT32_C( 573.97), SIMDE_FLOAT32_C( -516.73), SIMDE_FLOAT32_C( -267.27), SIMDE_FLOAT32_C( 110.95), SIMDE_FLOAT32_C( -68.16), SIMDE_FLOAT32_C( -400.30), SIMDE_FLOAT32_C( 327.53), SIMDE_FLOAT32_C( -638.51), SIMDE_FLOAT32_C( -96.92)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 101.96), SIMDE_FLOAT32_C( -734.61), SIMDE_FLOAT32_C( 219.43), SIMDE_FLOAT32_C( -507.66), SIMDE_FLOAT32_C( -747.54), SIMDE_FLOAT32_C( 794.68), SIMDE_FLOAT32_C( -663.99), SIMDE_FLOAT32_C( -123.94), SIMDE_FLOAT32_C( -793.12), SIMDE_FLOAT32_C( 673.57), SIMDE_FLOAT32_C( -777.14), SIMDE_FLOAT32_C( 175.88), SIMDE_FLOAT32_C( -792.24), SIMDE_FLOAT32_C( -246.51), SIMDE_FLOAT32_C( 848.21), SIMDE_FLOAT32_C( -124.15)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 672.47), SIMDE_FLOAT32_C( 986.17), SIMDE_FLOAT32_C( -1135.09), SIMDE_FLOAT32_C( 129.42), SIMDE_FLOAT32_C( -911.24), SIMDE_FLOAT32_C( -795.01), SIMDE_FLOAT32_C( 1130.27), SIMDE_FLOAT32_C( 697.91), SIMDE_FLOAT32_C( 779.42), SIMDE_FLOAT32_C( 756.19), SIMDE_FLOAT32_C( -61.94), SIMDE_FLOAT32_C( -845.71), SIMDE_FLOAT32_C( 391.94), SIMDE_FLOAT32_C( 703.06), SIMDE_FLOAT32_C( -1486.72), SIMDE_FLOAT32_C( 594.14)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -670.29), SIMDE_FLOAT32_C( 821.01), SIMDE_FLOAT32_C( -293.06), SIMDE_FLOAT32_C( -56.42), SIMDE_FLOAT32_C( -163.64), SIMDE_FLOAT32_C( -919.47), SIMDE_FLOAT32_C( 636.75), SIMDE_FLOAT32_C( 555.64), SIMDE_FLOAT32_C( 630.28), SIMDE_FLOAT32_C( 798.33), SIMDE_FLOAT32_C( -536.88), SIMDE_FLOAT32_C( 256.29), SIMDE_FLOAT32_C( 834.99), SIMDE_FLOAT32_C( -678.50), SIMDE_FLOAT32_C( -716.28), SIMDE_FLOAT32_C( -235.17)), UINT16_C( 7968), simde_mm512_set_ps(SIMDE_FLOAT32_C( 575.18), SIMDE_FLOAT32_C( -655.63), SIMDE_FLOAT32_C( 986.91), SIMDE_FLOAT32_C( 710.96), SIMDE_FLOAT32_C( 921.30), SIMDE_FLOAT32_C( -96.00), SIMDE_FLOAT32_C( -68.75), SIMDE_FLOAT32_C( -119.17), SIMDE_FLOAT32_C( -795.52), SIMDE_FLOAT32_C( -851.06), SIMDE_FLOAT32_C( 982.58), SIMDE_FLOAT32_C( 432.45), SIMDE_FLOAT32_C( 834.71), SIMDE_FLOAT32_C( -931.48), SIMDE_FLOAT32_C( 421.86), SIMDE_FLOAT32_C( 549.54)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 699.42), SIMDE_FLOAT32_C( -430.21), SIMDE_FLOAT32_C( -842.83), SIMDE_FLOAT32_C( -375.32), SIMDE_FLOAT32_C( -889.13), SIMDE_FLOAT32_C( 77.46), SIMDE_FLOAT32_C( -426.32), SIMDE_FLOAT32_C( -319.52), SIMDE_FLOAT32_C( 633.46), SIMDE_FLOAT32_C( -484.05), SIMDE_FLOAT32_C( 991.09), SIMDE_FLOAT32_C( 894.84), SIMDE_FLOAT32_C( 148.17), SIMDE_FLOAT32_C( -167.11), SIMDE_FLOAT32_C( -811.87), SIMDE_FLOAT32_C( -574.29)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -670.29), SIMDE_FLOAT32_C( 821.01), SIMDE_FLOAT32_C( -293.06), SIMDE_FLOAT32_C( 1086.28), SIMDE_FLOAT32_C( 1810.43), SIMDE_FLOAT32_C( -173.46), SIMDE_FLOAT32_C( 357.57), SIMDE_FLOAT32_C( 200.35), SIMDE_FLOAT32_C( 630.28), SIMDE_FLOAT32_C( 798.33), SIMDE_FLOAT32_C( -8.51), SIMDE_FLOAT32_C( 256.29), SIMDE_FLOAT32_C( 834.99), SIMDE_FLOAT32_C( -678.50), SIMDE_FLOAT32_C( -716.28), SIMDE_FLOAT32_C( -235.17)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 640.00), SIMDE_FLOAT32_C( 440.55), SIMDE_FLOAT32_C( 793.44), SIMDE_FLOAT32_C( 554.05), SIMDE_FLOAT32_C( 245.74), SIMDE_FLOAT32_C( -388.16), SIMDE_FLOAT32_C( -27.32), SIMDE_FLOAT32_C( -923.44), SIMDE_FLOAT32_C( 109.81), SIMDE_FLOAT32_C( 855.67), SIMDE_FLOAT32_C( -513.53), SIMDE_FLOAT32_C( -921.47), SIMDE_FLOAT32_C( -410.90), SIMDE_FLOAT32_C( -404.15), SIMDE_FLOAT32_C( -502.43), SIMDE_FLOAT32_C( -674.13)), UINT16_C(34235), simde_mm512_set_ps(SIMDE_FLOAT32_C( 455.94), SIMDE_FLOAT32_C( 822.75), SIMDE_FLOAT32_C( 672.52), SIMDE_FLOAT32_C( 418.16), SIMDE_FLOAT32_C( 993.17), SIMDE_FLOAT32_C( -581.12), SIMDE_FLOAT32_C( 737.02), SIMDE_FLOAT32_C( -48.12), SIMDE_FLOAT32_C( 169.53), SIMDE_FLOAT32_C( 875.02), SIMDE_FLOAT32_C( 325.94), SIMDE_FLOAT32_C( -197.05), SIMDE_FLOAT32_C( 209.80), SIMDE_FLOAT32_C( 679.16), SIMDE_FLOAT32_C( -743.34), SIMDE_FLOAT32_C( 192.93)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -400.32), SIMDE_FLOAT32_C( 747.89), SIMDE_FLOAT32_C( -417.14), SIMDE_FLOAT32_C( -149.76), SIMDE_FLOAT32_C( -769.13), SIMDE_FLOAT32_C( 952.70), SIMDE_FLOAT32_C( 55.59), SIMDE_FLOAT32_C( -118.59), SIMDE_FLOAT32_C( -651.36), SIMDE_FLOAT32_C( 213.50), SIMDE_FLOAT32_C( 998.39), SIMDE_FLOAT32_C( 155.85), SIMDE_FLOAT32_C( 985.22), SIMDE_FLOAT32_C( -399.37), SIMDE_FLOAT32_C( -660.54), SIMDE_FLOAT32_C( -918.87)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 856.26), SIMDE_FLOAT32_C( 440.55), SIMDE_FLOAT32_C( 793.44), SIMDE_FLOAT32_C( 554.05), SIMDE_FLOAT32_C( 245.74), SIMDE_FLOAT32_C( -1533.82), SIMDE_FLOAT32_C( -27.32), SIMDE_FLOAT32_C( 70.47), SIMDE_FLOAT32_C( 820.89), SIMDE_FLOAT32_C( 855.67), SIMDE_FLOAT32_C( -672.45), SIMDE_FLOAT32_C( -352.90), SIMDE_FLOAT32_C( -775.42), SIMDE_FLOAT32_C( -404.15), SIMDE_FLOAT32_C( -82.80), SIMDE_FLOAT32_C( 1111.80)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -717.43), SIMDE_FLOAT32_C( 307.65), SIMDE_FLOAT32_C( -776.64), SIMDE_FLOAT32_C( 883.24), SIMDE_FLOAT32_C( 462.38), SIMDE_FLOAT32_C( 941.52), SIMDE_FLOAT32_C( 465.21), SIMDE_FLOAT32_C( 772.92), SIMDE_FLOAT32_C( -448.96), SIMDE_FLOAT32_C( 167.95), SIMDE_FLOAT32_C( -770.79), SIMDE_FLOAT32_C( 607.02), SIMDE_FLOAT32_C( 588.25), SIMDE_FLOAT32_C( -430.65), SIMDE_FLOAT32_C( -379.22), SIMDE_FLOAT32_C( 62.66)), UINT16_C(21184), simde_mm512_set_ps(SIMDE_FLOAT32_C( 745.31), SIMDE_FLOAT32_C( 528.47), SIMDE_FLOAT32_C( 260.56), SIMDE_FLOAT32_C( 756.92), SIMDE_FLOAT32_C( -237.78), SIMDE_FLOAT32_C( 890.33), SIMDE_FLOAT32_C( -276.66), SIMDE_FLOAT32_C( -845.25), SIMDE_FLOAT32_C( 73.01), SIMDE_FLOAT32_C( -169.10), SIMDE_FLOAT32_C( -390.26), SIMDE_FLOAT32_C( 55.87), SIMDE_FLOAT32_C( 461.32), SIMDE_FLOAT32_C( -911.03), SIMDE_FLOAT32_C( 362.01), SIMDE_FLOAT32_C( 998.06)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 177.96), SIMDE_FLOAT32_C( -105.40), SIMDE_FLOAT32_C( -516.55), SIMDE_FLOAT32_C( -62.31), SIMDE_FLOAT32_C( -757.68), SIMDE_FLOAT32_C( 665.34), SIMDE_FLOAT32_C( 689.63), SIMDE_FLOAT32_C( 938.32), SIMDE_FLOAT32_C( -408.00), SIMDE_FLOAT32_C( 998.26), SIMDE_FLOAT32_C( -263.70), SIMDE_FLOAT32_C( 807.54), SIMDE_FLOAT32_C( 485.72), SIMDE_FLOAT32_C( -74.68), SIMDE_FLOAT32_C( 725.36), SIMDE_FLOAT32_C( 301.00)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -717.43), SIMDE_FLOAT32_C( 633.87), SIMDE_FLOAT32_C( -776.64), SIMDE_FLOAT32_C( 819.23), SIMDE_FLOAT32_C( 462.38), SIMDE_FLOAT32_C( 941.52), SIMDE_FLOAT32_C( -966.29), SIMDE_FLOAT32_C( 772.92), SIMDE_FLOAT32_C( 481.01), SIMDE_FLOAT32_C( -1167.36), SIMDE_FLOAT32_C( -770.79), SIMDE_FLOAT32_C( 607.02), SIMDE_FLOAT32_C( 588.25), SIMDE_FLOAT32_C( -430.65), SIMDE_FLOAT32_C( -379.22), SIMDE_FLOAT32_C( 62.66)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mask_sub_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_sub_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d src; simde__mmask8 k; simde__m512d a; simde__m512d b; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( -621.09), SIMDE_FLOAT64_C( 350.18), SIMDE_FLOAT64_C( 873.40), SIMDE_FLOAT64_C( -136.67), SIMDE_FLOAT64_C( -484.90), SIMDE_FLOAT64_C( 672.37), SIMDE_FLOAT64_C( -983.97), SIMDE_FLOAT64_C( -747.18)), UINT8_C(213), simde_mm512_set_pd(SIMDE_FLOAT64_C( -615.22), SIMDE_FLOAT64_C( 861.93), SIMDE_FLOAT64_C( -99.63), SIMDE_FLOAT64_C( -760.72), SIMDE_FLOAT64_C( 803.54), SIMDE_FLOAT64_C( -811.65), SIMDE_FLOAT64_C( -888.48), SIMDE_FLOAT64_C( 353.19)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 451.54), SIMDE_FLOAT64_C( 490.96), SIMDE_FLOAT64_C( -563.07), SIMDE_FLOAT64_C( -968.95), SIMDE_FLOAT64_C( -964.80), SIMDE_FLOAT64_C( -259.48), SIMDE_FLOAT64_C( -97.31), SIMDE_FLOAT64_C( 696.26)), simde_mm512_set_pd(SIMDE_FLOAT64_C(-1066.76), SIMDE_FLOAT64_C( 370.97), SIMDE_FLOAT64_C( 873.40), SIMDE_FLOAT64_C( 208.23), SIMDE_FLOAT64_C( -484.90), SIMDE_FLOAT64_C( -552.17), SIMDE_FLOAT64_C( -983.97), SIMDE_FLOAT64_C( -343.07)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 956.74), SIMDE_FLOAT64_C( 507.70), SIMDE_FLOAT64_C( 525.25), SIMDE_FLOAT64_C( -653.24), SIMDE_FLOAT64_C( -748.66), SIMDE_FLOAT64_C( 738.72), SIMDE_FLOAT64_C( 584.29), SIMDE_FLOAT64_C( -344.89)), UINT8_C(200), simde_mm512_set_pd(SIMDE_FLOAT64_C( -70.99), SIMDE_FLOAT64_C( -712.48), SIMDE_FLOAT64_C( 721.37), SIMDE_FLOAT64_C( 290.11), SIMDE_FLOAT64_C( 739.65), SIMDE_FLOAT64_C( 378.13), SIMDE_FLOAT64_C( 523.23), SIMDE_FLOAT64_C( 338.41)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -243.21), SIMDE_FLOAT64_C( 71.87), SIMDE_FLOAT64_C( 81.06), SIMDE_FLOAT64_C( 409.05), SIMDE_FLOAT64_C( -595.58), SIMDE_FLOAT64_C( 278.33), SIMDE_FLOAT64_C( -484.02), SIMDE_FLOAT64_C( -861.59)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 172.22), SIMDE_FLOAT64_C( -784.35), SIMDE_FLOAT64_C( 525.25), SIMDE_FLOAT64_C( -653.24), SIMDE_FLOAT64_C( 1335.23), SIMDE_FLOAT64_C( 738.72), SIMDE_FLOAT64_C( 584.29), SIMDE_FLOAT64_C( -344.89)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 475.39), SIMDE_FLOAT64_C( 345.93), SIMDE_FLOAT64_C( 233.76), SIMDE_FLOAT64_C( -401.11), SIMDE_FLOAT64_C( -964.57), SIMDE_FLOAT64_C( 939.13), SIMDE_FLOAT64_C( -392.63), SIMDE_FLOAT64_C( -585.02)), UINT8_C( 75), simde_mm512_set_pd(SIMDE_FLOAT64_C( 496.11), SIMDE_FLOAT64_C( -235.94), SIMDE_FLOAT64_C( -715.35), SIMDE_FLOAT64_C( 338.71), SIMDE_FLOAT64_C( -776.11), SIMDE_FLOAT64_C( 941.96), SIMDE_FLOAT64_C( 76.10), SIMDE_FLOAT64_C( -188.31)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 824.70), SIMDE_FLOAT64_C( -886.45), SIMDE_FLOAT64_C( 497.17), SIMDE_FLOAT64_C( -965.13), SIMDE_FLOAT64_C( -601.99), SIMDE_FLOAT64_C( -657.07), SIMDE_FLOAT64_C( 201.36), SIMDE_FLOAT64_C( -807.98)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 475.39), SIMDE_FLOAT64_C( 650.51), SIMDE_FLOAT64_C( 233.76), SIMDE_FLOAT64_C( -401.11), SIMDE_FLOAT64_C( -174.12), SIMDE_FLOAT64_C( 939.13), SIMDE_FLOAT64_C( -125.26), SIMDE_FLOAT64_C( 619.67)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -246.72), SIMDE_FLOAT64_C( -493.17), SIMDE_FLOAT64_C( -501.93), SIMDE_FLOAT64_C( -95.50), SIMDE_FLOAT64_C( 754.55), SIMDE_FLOAT64_C( -990.48), SIMDE_FLOAT64_C( -396.36), SIMDE_FLOAT64_C( -466.97)), UINT8_C( 69), simde_mm512_set_pd(SIMDE_FLOAT64_C( 601.28), SIMDE_FLOAT64_C( -873.85), SIMDE_FLOAT64_C( -689.96), SIMDE_FLOAT64_C( 31.77), SIMDE_FLOAT64_C( -97.11), SIMDE_FLOAT64_C( 971.94), SIMDE_FLOAT64_C( 389.02), SIMDE_FLOAT64_C( -650.79)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 136.61), SIMDE_FLOAT64_C( 436.94), SIMDE_FLOAT64_C( -777.02), SIMDE_FLOAT64_C( 166.29), SIMDE_FLOAT64_C( -377.75), SIMDE_FLOAT64_C( 71.16), SIMDE_FLOAT64_C( 481.01), SIMDE_FLOAT64_C( -926.81)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -246.72), SIMDE_FLOAT64_C(-1310.79), SIMDE_FLOAT64_C( -501.93), SIMDE_FLOAT64_C( -95.50), SIMDE_FLOAT64_C( 754.55), SIMDE_FLOAT64_C( 900.78), SIMDE_FLOAT64_C( -396.36), SIMDE_FLOAT64_C( 276.02)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -389.46), SIMDE_FLOAT64_C( -8.03), SIMDE_FLOAT64_C( -523.51), SIMDE_FLOAT64_C( 466.89), SIMDE_FLOAT64_C( 698.90), SIMDE_FLOAT64_C( -346.04), SIMDE_FLOAT64_C( -734.67), SIMDE_FLOAT64_C( 404.34)), UINT8_C(100), simde_mm512_set_pd(SIMDE_FLOAT64_C( 989.13), SIMDE_FLOAT64_C( 228.14), SIMDE_FLOAT64_C( 840.94), SIMDE_FLOAT64_C( -718.83), SIMDE_FLOAT64_C( 274.95), SIMDE_FLOAT64_C( -99.21), SIMDE_FLOAT64_C( 84.76), SIMDE_FLOAT64_C( -295.84)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -956.22), SIMDE_FLOAT64_C( 564.94), SIMDE_FLOAT64_C( -97.16), SIMDE_FLOAT64_C( -407.99), SIMDE_FLOAT64_C( 352.62), SIMDE_FLOAT64_C( 244.25), SIMDE_FLOAT64_C( 43.92), SIMDE_FLOAT64_C( 624.69)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -389.46), SIMDE_FLOAT64_C( -336.80), SIMDE_FLOAT64_C( 938.10), SIMDE_FLOAT64_C( 466.89), SIMDE_FLOAT64_C( 698.90), SIMDE_FLOAT64_C( -343.46), SIMDE_FLOAT64_C( -734.67), SIMDE_FLOAT64_C( 404.34)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -571.96), SIMDE_FLOAT64_C( 40.27), SIMDE_FLOAT64_C( 676.69), SIMDE_FLOAT64_C( -150.37), SIMDE_FLOAT64_C( 945.34), SIMDE_FLOAT64_C( 75.83), SIMDE_FLOAT64_C( 64.75), SIMDE_FLOAT64_C( 239.06)), UINT8_C(209), simde_mm512_set_pd(SIMDE_FLOAT64_C( 792.47), SIMDE_FLOAT64_C( -265.19), SIMDE_FLOAT64_C( -768.95), SIMDE_FLOAT64_C( 515.15), SIMDE_FLOAT64_C( 350.59), SIMDE_FLOAT64_C( 422.68), SIMDE_FLOAT64_C( 582.99), SIMDE_FLOAT64_C( -985.50)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 12.38), SIMDE_FLOAT64_C( -71.80), SIMDE_FLOAT64_C( 363.01), SIMDE_FLOAT64_C( -195.65), SIMDE_FLOAT64_C( 967.47), SIMDE_FLOAT64_C( -4.13), SIMDE_FLOAT64_C( -478.81), SIMDE_FLOAT64_C( 909.10)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 780.09), SIMDE_FLOAT64_C( -193.39), SIMDE_FLOAT64_C( 676.69), SIMDE_FLOAT64_C( 710.80), SIMDE_FLOAT64_C( 945.34), SIMDE_FLOAT64_C( 75.83), SIMDE_FLOAT64_C( 64.75), SIMDE_FLOAT64_C(-1894.60)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -879.88), SIMDE_FLOAT64_C( -687.95), SIMDE_FLOAT64_C( -892.89), SIMDE_FLOAT64_C( -642.85), SIMDE_FLOAT64_C( 533.08), SIMDE_FLOAT64_C( 898.29), SIMDE_FLOAT64_C( -29.99), SIMDE_FLOAT64_C( 5.58)), UINT8_C(186), simde_mm512_set_pd(SIMDE_FLOAT64_C( 649.80), SIMDE_FLOAT64_C( -257.91), SIMDE_FLOAT64_C( 356.56), SIMDE_FLOAT64_C( 567.70), SIMDE_FLOAT64_C( -80.43), SIMDE_FLOAT64_C( -499.15), SIMDE_FLOAT64_C( -866.12), SIMDE_FLOAT64_C( 639.40)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 702.45), SIMDE_FLOAT64_C( 464.79), SIMDE_FLOAT64_C( 387.80), SIMDE_FLOAT64_C( -528.10), SIMDE_FLOAT64_C( -409.82), SIMDE_FLOAT64_C( -696.40), SIMDE_FLOAT64_C( 455.43), SIMDE_FLOAT64_C( 856.81)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -52.65), SIMDE_FLOAT64_C( -687.95), SIMDE_FLOAT64_C( -31.24), SIMDE_FLOAT64_C( 1095.80), SIMDE_FLOAT64_C( 329.39), SIMDE_FLOAT64_C( 898.29), SIMDE_FLOAT64_C(-1321.55), SIMDE_FLOAT64_C( 5.58)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -750.95), SIMDE_FLOAT64_C( 203.46), SIMDE_FLOAT64_C( 194.87), SIMDE_FLOAT64_C( 667.81), SIMDE_FLOAT64_C( -258.76), SIMDE_FLOAT64_C( 897.89), SIMDE_FLOAT64_C( 571.10), SIMDE_FLOAT64_C( -320.96)), UINT8_C( 56), simde_mm512_set_pd(SIMDE_FLOAT64_C( -938.69), SIMDE_FLOAT64_C( 74.05), SIMDE_FLOAT64_C( -981.48), SIMDE_FLOAT64_C( -656.78), SIMDE_FLOAT64_C( -794.37), SIMDE_FLOAT64_C( 177.36), SIMDE_FLOAT64_C( 380.50), SIMDE_FLOAT64_C( 812.91)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -10.37), SIMDE_FLOAT64_C( -894.99), SIMDE_FLOAT64_C( -148.09), SIMDE_FLOAT64_C( 314.75), SIMDE_FLOAT64_C( -740.28), SIMDE_FLOAT64_C( -372.00), SIMDE_FLOAT64_C( -357.36), SIMDE_FLOAT64_C( -791.79)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -750.95), SIMDE_FLOAT64_C( 203.46), SIMDE_FLOAT64_C( -833.39), SIMDE_FLOAT64_C( -971.53), SIMDE_FLOAT64_C( -54.09), SIMDE_FLOAT64_C( 897.89), SIMDE_FLOAT64_C( 571.10), SIMDE_FLOAT64_C( -320.96)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mask_sub_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_maskz_sub_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { UINT16_C(42308), simde_mm512_set_epi32(INT32_C( 1724059665), INT32_C(-1181331137), INT32_C( -956878955), INT32_C( 1254662027), INT32_C( -334196329), INT32_C( -462422656), INT32_C( 391895544), INT32_C( 1081692585), INT32_C(-1420053828), INT32_C(-1016697350), INT32_C( 1995028549), INT32_C(-2003231670), INT32_C( 1672190791), INT32_C( 255109958), INT32_C(-2019884289), INT32_C(-1398510440)), simde_mm512_set_epi32(INT32_C( 1758500044), INT32_C( 727344602), INT32_C(-1303831643), INT32_C( 1021495274), INT32_C(-2113209677), INT32_C( 1628670789), INT32_C( 684532718), INT32_C( 1920084108), INT32_C( -516238646), INT32_C( 1525557846), INT32_C( 1058541430), INT32_C( 232836803), INT32_C( 1824295576), INT32_C(-1334166784), INT32_C(-1267999587), INT32_C( 1992895333)), simde_mm512_set_epi32(INT32_C( -34440379), INT32_C( 0), INT32_C( 346952688), INT32_C( 0), INT32_C( 0), INT32_C(-2091093445), INT32_C( 0), INT32_C( -838391523), INT32_C( 0), INT32_C( 1752712100), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1589276742), INT32_C( 0), INT32_C( 0)) }, { UINT16_C(57112), simde_mm512_set_epi32(INT32_C( 1427084198), INT32_C( 800677318), INT32_C(-1624555826), INT32_C( -189169736), INT32_C( -500462405), INT32_C( 393027187), INT32_C( -215642095), INT32_C( 1795082661), INT32_C(-1120274966), INT32_C( 1416315501), INT32_C( 2071781830), INT32_C( 1981287236), INT32_C( 1895228887), INT32_C( -102536112), INT32_C(-1592734830), INT32_C(-1858725491)), simde_mm512_set_epi32(INT32_C( -450919787), INT32_C( 1299130560), INT32_C( 1762509692), INT32_C( 310818231), INT32_C( -225659966), INT32_C(-1193662266), INT32_C( 959080993), INT32_C( -80526553), INT32_C( -695376176), INT32_C( -26080833), INT32_C( 542712435), INT32_C( 1266358760), INT32_C( 181254235), INT32_C(-2068678559), INT32_C( 1863289430), INT32_C( -269529302)), simde_mm512_set_epi32(INT32_C( 1878003985), INT32_C( -498453242), INT32_C( 0), INT32_C( -499987967), INT32_C( -274802439), INT32_C( 1586689453), INT32_C(-1174723088), INT32_C( 1875609214), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 714928476), INT32_C( 1713974652), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { UINT16_C(45985), simde_mm512_set_epi32(INT32_C(-1997599226), INT32_C( 1542236612), INT32_C( 969579913), INT32_C(-1642088433), INT32_C( 579114801), INT32_C(-1194258935), INT32_C(-1422143462), INT32_C( 1748279001), INT32_C(-1953627340), INT32_C( 1674288033), INT32_C( 717963559), INT32_C( 34905906), INT32_C( -149768860), INT32_C( 1400155142), INT32_C( 1757125654), INT32_C(-1787496119)), simde_mm512_set_epi32(INT32_C( 11674598), INT32_C( 1849959427), INT32_C(-1203439394), INT32_C( -261642074), INT32_C(-2062167113), INT32_C( 1504166558), INT32_C( -111161554), INT32_C( -367200138), INT32_C( 1040642836), INT32_C( 378025736), INT32_C( 1031970925), INT32_C(-1474878922), INT32_C(-1560910320), INT32_C( 1296215099), INT32_C(-1595601438), INT32_C( -126839035)), simde_mm512_set_epi32(INT32_C(-2009273824), INT32_C( 0), INT32_C(-2121947989), INT32_C(-1380446359), INT32_C( 0), INT32_C( 0), INT32_C(-1310981908), INT32_C( 2115479139), INT32_C( 1300697120), INT32_C( 0), INT32_C( -314007366), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C(-1660657084)) }, { UINT16_C(21153), simde_mm512_set_epi32(INT32_C( -788633826), INT32_C( 1642420282), INT32_C( 723895008), INT32_C( 207632598), INT32_C(-2079938207), INT32_C( 1754477079), INT32_C( 1798135551), INT32_C( 23449555), INT32_C( -151172429), INT32_C( 677778908), INT32_C( 90905464), INT32_C( 1354586615), INT32_C(-1670436324), INT32_C( -505523122), INT32_C(-1519449460), INT32_C(-1685310582)), simde_mm512_set_epi32(INT32_C( 799456687), INT32_C(-1358763208), INT32_C( 737687311), INT32_C( 1515407453), INT32_C( 439395016), INT32_C( -78627541), INT32_C(-1674155016), INT32_C( 1063201251), INT32_C( -686363587), INT32_C( 742525264), INT32_C( 701319512), INT32_C( 24989685), INT32_C( -301118736), INT32_C( -785334161), INT32_C(-1489992316), INT32_C( 306022421)), simde_mm512_set_epi32(INT32_C( 0), INT32_C(-1293783806), INT32_C( 0), INT32_C(-1307774855), INT32_C( 0), INT32_C( 0), INT32_C( -822676729), INT32_C( 0), INT32_C( 535191158), INT32_C( 0), INT32_C( -610414048), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C(-1991333003)) }, { UINT16_C(12143), simde_mm512_set_epi32(INT32_C( -246629264), INT32_C( 633039851), INT32_C( 1692158737), INT32_C( 1115946871), INT32_C( 309808098), INT32_C( 1170830326), INT32_C( 1350105561), INT32_C(-1022199838), INT32_C( 654046756), INT32_C( 1807741640), INT32_C( 224020334), INT32_C( 1191767429), INT32_C( -990326759), INT32_C( 85294451), INT32_C( -252749112), INT32_C(-1788577569)), simde_mm512_set_epi32(INT32_C( 1174570840), INT32_C( 974062633), INT32_C( 983904988), INT32_C( 1803536893), INT32_C( 1164598462), INT32_C( 1777437641), INT32_C(-1475760323), INT32_C( 1833217111), INT32_C( 2013842885), INT32_C( 720911006), INT32_C(-1253744600), INT32_C( 1820529236), INT32_C( -314819268), INT32_C(-1926268921), INT32_C( 2108913431), INT32_C( 1190393502)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 708253749), INT32_C( 0), INT32_C( -854790364), INT32_C( -606607315), INT32_C(-1469101412), INT32_C( 1439550347), INT32_C( 0), INT32_C( 1086830634), INT32_C( 1477764934), INT32_C( 0), INT32_C( -675507491), INT32_C( 2011563372), INT32_C( 1933304753), INT32_C( 1315996225)) }, { UINT16_C(26005), simde_mm512_set_epi32(INT32_C( 1813548464), INT32_C( -757290941), INT32_C( 1295512986), INT32_C( 1291803276), INT32_C( 2032260868), INT32_C( 316165049), INT32_C( 1037644878), INT32_C(-1728213057), INT32_C( 231750243), INT32_C( 1220512969), INT32_C(-1711918828), INT32_C( 1618345779), INT32_C( 1444876028), INT32_C( 1881924556), INT32_C(-1672732354), INT32_C(-1497726182)), simde_mm512_set_epi32(INT32_C(-2042300804), INT32_C( -199486597), INT32_C( -290224964), INT32_C( -95049939), INT32_C( 242789967), INT32_C(-2042388049), INT32_C(-1526333573), INT32_C( -943172088), INT32_C(-1987449183), INT32_C( -802616226), INT32_C( 743071941), INT32_C( -28537087), INT32_C(-2054489846), INT32_C( 2118922267), INT32_C( 1876700525), INT32_C( 356823736)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( -557804344), INT32_C( 1585737950), INT32_C( 0), INT32_C( 0), INT32_C(-1936414198), INT32_C( 0), INT32_C( -785040969), INT32_C(-2075767870), INT32_C( 0), INT32_C( 0), INT32_C( 1646882866), INT32_C( 0), INT32_C( -236997711), INT32_C( 0), INT32_C(-1854549918)) }, { UINT16_C(22214), simde_mm512_set_epi32(INT32_C( 1255503250), INT32_C( 603134448), INT32_C( 1664652192), INT32_C( -343768171), INT32_C(-1798248429), INT32_C(-1446513257), INT32_C( 127732840), INT32_C(-1651163018), INT32_C( 741467989), INT32_C( 859412594), INT32_C( 472043835), INT32_C( 1771260096), INT32_C(-1144930983), INT32_C( 236371534), INT32_C( 1323254991), INT32_C( 1564105257)), simde_mm512_set_epi32(INT32_C( 438781482), INT32_C( 1278794690), INT32_C(-1026818029), INT32_C( 2082034838), INT32_C( -20030271), INT32_C( -682181759), INT32_C( 1547951192), INT32_C( 690567023), INT32_C( -270117367), INT32_C( -771535010), INT32_C( 916148853), INT32_C( 1687091511), INT32_C( -535908173), INT32_C( -185822843), INT32_C( -711684672), INT32_C( -424619293)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( -675660242), INT32_C( 0), INT32_C( 1869164287), INT32_C( 0), INT32_C( -764331498), INT32_C(-1420218352), INT32_C( 0), INT32_C( 1011585356), INT32_C( 1630947604), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 422194377), INT32_C( 2034939663), INT32_C( 0)) }, { UINT16_C(35591), simde_mm512_set_epi32(INT32_C( 1513047065), INT32_C( -104652818), INT32_C( 1564491564), INT32_C( -98950215), INT32_C( 631827200), INT32_C( 1322294700), INT32_C( 436005702), INT32_C( 1825722103), INT32_C( 2013933934), INT32_C( -532774987), INT32_C( 1616518393), INT32_C( 803856137), INT32_C(-1663534883), INT32_C(-2021437227), INT32_C(-1476004613), INT32_C( -899510926)), simde_mm512_set_epi32(INT32_C( -910624932), INT32_C( 209536966), INT32_C(-1923748050), INT32_C(-1520303619), INT32_C( -387141989), INT32_C( 959069600), INT32_C( 1208361371), INT32_C(-1838273096), INT32_C(-1330134815), INT32_C( 126713528), INT32_C( -150313435), INT32_C(-1972942202), INT32_C( 1666269875), INT32_C(-1750237431), INT32_C( 950405946), INT32_C( -725753907)), simde_mm512_set_epi32(INT32_C(-1871295299), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1018969189), INT32_C( 0), INT32_C( -772355669), INT32_C( -630972097), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -271199796), INT32_C( 1868556737), INT32_C( -173757019)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_sub_epi32(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_sub_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { UINT8_C( 68), simde_mm512_set_epi64(INT64_C(-5073778595823407211), INT64_C( 5388732377458839959), INT64_C(-1986090184057562632), INT64_C( 4645834279775613628), INT64_C(-4366681866184837051), INT64_C(-8603814507289273529), INT64_C( 1095688928769016575), INT64_C(-6006556600469720682)), simde_mm512_set_epi64(INT64_C( 3123921281503271845), INT64_C( 4387288797030316723), INT64_C( 6995087775390049262), INT64_C( 8246698453208060618), INT64_C( 6552221057784745846), INT64_C( 1000026456014490264), INT64_C(-5730202701662528355), INT64_C( 8559420281310089233)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 1001443580428523236), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 8842903110405787823), INT64_C( 0), INT64_C( 0)) }, { UINT8_C(204), simde_mm512_set_epi64(INT64_C(-6977414139090468936), INT64_C(-2149469661959479693), INT64_C( -926175743870842459), INT64_C(-4811544340081196435), INT64_C( 8898235206278318916), INT64_C( 8139946092291910736), INT64_C(-6840744003613877875), INT64_C( 6930156028979502872)), simde_mm512_set_epi64(INT64_C( 7569921486333851063), INT64_C( -969202170885166906), INT64_C( 4119221503364645671), INT64_C(-2986617930068653633), INT64_C( 2330932160723884520), INT64_C( 778481013812787297), INT64_C( 8002767168857919274), INT64_C( 6129279959849065926)), simde_mm512_set_epi64(INT64_C( 3899408448285231617), INT64_C(-1180267491074312787), INT64_C( 0), INT64_C( 0), INT64_C( 6567303045554434396), INT64_C( 7361465078479123439), INT64_C( 0), INT64_C( 0)) }, { UINT8_C(192), simde_mm512_set_epi64(INT64_C(-7052716116295772367), INT64_C(-5129303065907965926), INT64_C( 7508801135919891252), INT64_C( 7191012346537132327), INT64_C( 149919728852448612), INT64_C( 6013620545973361686), INT64_C(-7677237369544501225), INT64_C(-4708878852454120811)), simde_mm512_set_epi64(INT64_C(-1123744148854811721), INT64_C( 6460346178530692910), INT64_C(-1577112582756044012), INT64_C( 1623608174198300781), INT64_C(-6334556732815677936), INT64_C( 5567201461485768162), INT64_C( -544769504883831290), INT64_C( 6623855812203421065)), simde_mm512_set_epi64(INT64_C(-5928971967440960646), INT64_C( 6857094829270892780), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, { UINT8_C(222), simde_mm512_set_epi64(INT64_C(-8933266575011401193), INT64_C( 7722933385343389651), INT64_C( -649280637934103076), INT64_C( 390435996262291959), INT64_C(-7174469377841015730), INT64_C(-6525985736015203446), INT64_C( 8306044009918255777), INT64_C( 50142018453906435)), simde_mm512_set_epi64(INT64_C( 1887187227961736491), INT64_C(-7190441041091155485), INT64_C(-2947909158587725488), INT64_C( 3012144368111669237), INT64_C(-1293295119823224721), INT64_C(-6399468268205275115), INT64_C(-3387156489546934214), INT64_C( 3109105385305290966)), simde_mm512_set_epi64(INT64_C( 7626290270736413932), INT64_C(-3533369647275006480), INT64_C( 0), INT64_C(-2621708371849377278), INT64_C(-5881174258017791009), INT64_C( -126517467809928331), INT64_C(-6753543574244361625), INT64_C( 0)) }, { UINT8_C( 93), simde_mm512_set_epi64(INT64_C( 5028677960685124057), INT64_C(-4390314873532451292), INT64_C( 7764191223641425774), INT64_C( 5118602135297642521), INT64_C( 366336881617492680), INT64_C(-7681882161808553379), INT64_C( 1609244596442152367), INT64_C(-5835843540630358257)), simde_mm512_set_epi64(INT64_C( 7634036541593595709), INT64_C( 7873607540226444741), INT64_C( 3096289197137682472), INT64_C( 7819113534012013884), INT64_C(-8273262016887294185), INT64_C( 5112701164509248624), INT64_C( 2718885458801871633), INT64_C( 4792955315328338914)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 6182821659950655583), INT64_C( 0), INT64_C(-2700511398714371363), INT64_C( 8639598898504786865), INT64_C( 5652160747391749613), INT64_C( 0), INT64_C( 7817945217750854445)) }, { UINT8_C(190), simde_mm512_set_epi64(INT64_C( 4456650818438664127), INT64_C( 995359715745565897), INT64_C(-7352635378048303309), INT64_C( 6205695288916304844), INT64_C(-7184330752593853670), INT64_C( 997194843856987541), INT64_C( 5044743345609311273), INT64_C( 4225839747634809341)), simde_mm512_set_epi64(INT64_C(-6555552775470033400), INT64_C(-8536029239954568098), INT64_C( 3191469689436671745), INT64_C(-8823966696415154149), INT64_C( 8060367379617854136), INT64_C( 7789131346128709699), INT64_C( 5564185907705109132), INT64_C( 8728493965316737977)), simde_mm512_set_epi64(INT64_C(-7434540479800854089), INT64_C( 0), INT64_C( 7902639006224576562), INT64_C(-3417082088378092623), INT64_C( 3202045941497843810), INT64_C(-6791936502271722158), INT64_C( -519442562095797859), INT64_C( 0)) }, { UINT8_C(175), simde_mm512_set_epi64(INT64_C(-7091691161933191339), INT64_C( 3691148985472569659), INT64_C( 7607504188179856729), INT64_C( 1015208009558607055), INT64_C( 6717780929629073882), INT64_C( 2140431133564008060), INT64_C( -856788406100589380), INT64_C( -408236379249004977)), simde_mm512_set_epi64(INT64_C( 2965962783505929737), INT64_C(-3313717634752884107), INT64_C( 7246002868863283379), INT64_C( -798103029951459904), INT64_C(-1823725975430138478), INT64_C( 2590442730915664800), INT64_C(-1476473049354016749), INT64_C(-6212727131917710232)), simde_mm512_set_epi64(INT64_C( 8389090128270430540), INT64_C( 0), INT64_C( 361501319316573350), INT64_C( 0), INT64_C( 8541506905059212360), INT64_C( -450011597351656740), INT64_C( 619684643253427369), INT64_C( 5804490752668705255)) }, { UINT8_C( 88), simde_mm512_set_epi64(INT64_C( 8649780386596814773), INT64_C( 6942893632121331465), INT64_C(-7144827915966656299), INT64_C(-6339391538184680078), INT64_C( 7515152281876400903), INT64_C( 1884552116559207362), INT64_C(-4410149851416144746), INT64_C( -86029355262231679)), simde_mm512_set_epi64(INT64_C(-5712885529569296712), INT64_C( -645591285152396666), INT64_C( 7156574621979737865), INT64_C( 4081962459563155405), INT64_C( 6498487665674100718), INT64_C( 6719440106443908025), INT64_C( 2713677162045545900), INT64_C( 1872630232785243895)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 7588484917273728131), INT64_C( 0), INT64_C( 8025390075961716133), INT64_C( 1016664616202300185), INT64_C( 0), INT64_C( 0), INT64_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_sub_epi64(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_sub_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m512 a; simde__m512 b; simde__m512 r; } test_vec[8] = { { UINT16_C(26074), simde_mm512_set_ps(SIMDE_FLOAT32_C( -524.33), SIMDE_FLOAT32_C( -241.59), SIMDE_FLOAT32_C( -105.89), SIMDE_FLOAT32_C( -289.61), SIMDE_FLOAT32_C( -891.58), SIMDE_FLOAT32_C( 378.73), SIMDE_FLOAT32_C( -71.99), SIMDE_FLOAT32_C( 449.90), SIMDE_FLOAT32_C( -415.75), SIMDE_FLOAT32_C( 784.67), SIMDE_FLOAT32_C( -496.30), SIMDE_FLOAT32_C( 526.56), SIMDE_FLOAT32_C( 67.17), SIMDE_FLOAT32_C( -881.21), SIMDE_FLOAT32_C( 348.77), SIMDE_FLOAT32_C( 537.04)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 392.86), SIMDE_FLOAT32_C( 15.96), SIMDE_FLOAT32_C( -681.24), SIMDE_FLOAT32_C( 759.61), SIMDE_FLOAT32_C( -507.08), SIMDE_FLOAT32_C( -150.50), SIMDE_FLOAT32_C( 409.54), SIMDE_FLOAT32_C( -197.17), SIMDE_FLOAT32_C( 554.42), SIMDE_FLOAT32_C( 844.38), SIMDE_FLOAT32_C( -817.51), SIMDE_FLOAT32_C( 338.74), SIMDE_FLOAT32_C( -70.99), SIMDE_FLOAT32_C( -221.33), SIMDE_FLOAT32_C( 59.42), SIMDE_FLOAT32_C( 138.47)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -257.55), SIMDE_FLOAT32_C( 575.35), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 529.23), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 647.07), SIMDE_FLOAT32_C( -970.17), SIMDE_FLOAT32_C( -59.71), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 187.82), SIMDE_FLOAT32_C( 138.16), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 289.35), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C(10432), simde_mm512_set_ps(SIMDE_FLOAT32_C( -855.26), SIMDE_FLOAT32_C( 444.16), SIMDE_FLOAT32_C( 962.50), SIMDE_FLOAT32_C( 987.86), SIMDE_FLOAT32_C( -410.31), SIMDE_FLOAT32_C( 36.70), SIMDE_FLOAT32_C( 874.49), SIMDE_FLOAT32_C( -627.16), SIMDE_FLOAT32_C( 911.91), SIMDE_FLOAT32_C( -816.98), SIMDE_FLOAT32_C( -164.10), SIMDE_FLOAT32_C( -340.48), SIMDE_FLOAT32_C( -77.39), SIMDE_FLOAT32_C( 952.25), SIMDE_FLOAT32_C( 134.46), SIMDE_FLOAT32_C( 698.09)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -179.27), SIMDE_FLOAT32_C( 894.92), SIMDE_FLOAT32_C( -553.39), SIMDE_FLOAT32_C( 676.19), SIMDE_FLOAT32_C( -747.28), SIMDE_FLOAT32_C( -915.60), SIMDE_FLOAT32_C( -132.34), SIMDE_FLOAT32_C( -335.46), SIMDE_FLOAT32_C( 243.51), SIMDE_FLOAT32_C( 766.95), SIMDE_FLOAT32_C( 899.58), SIMDE_FLOAT32_C( 478.33), SIMDE_FLOAT32_C( -35.25), SIMDE_FLOAT32_C( -117.47), SIMDE_FLOAT32_C( 258.33), SIMDE_FLOAT32_C( -248.63)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1515.89), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 336.97), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 668.40), SIMDE_FLOAT32_C( -1583.93), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C( 9219), simde_mm512_set_ps(SIMDE_FLOAT32_C( 878.16), SIMDE_FLOAT32_C( -299.57), SIMDE_FLOAT32_C( 829.01), SIMDE_FLOAT32_C( -823.97), SIMDE_FLOAT32_C( 313.21), SIMDE_FLOAT32_C( -396.40), SIMDE_FLOAT32_C( 940.94), SIMDE_FLOAT32_C( -281.84), SIMDE_FLOAT32_C( 235.34), SIMDE_FLOAT32_C( 443.88), SIMDE_FLOAT32_C( -185.89), SIMDE_FLOAT32_C( -220.35), SIMDE_FLOAT32_C( -983.75), SIMDE_FLOAT32_C( -348.00), SIMDE_FLOAT32_C( 167.63), SIMDE_FLOAT32_C( 489.46)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 439.60), SIMDE_FLOAT32_C( 39.73), SIMDE_FLOAT32_C( 948.24), SIMDE_FLOAT32_C( -515.41), SIMDE_FLOAT32_C( -519.45), SIMDE_FLOAT32_C( 273.14), SIMDE_FLOAT32_C( 256.99), SIMDE_FLOAT32_C( 69.80), SIMDE_FLOAT32_C( -548.50), SIMDE_FLOAT32_C( -730.33), SIMDE_FLOAT32_C( 337.76), SIMDE_FLOAT32_C( 90.27), SIMDE_FLOAT32_C( -665.67), SIMDE_FLOAT32_C( 930.26), SIMDE_FLOAT32_C( -181.77), SIMDE_FLOAT32_C( 530.83)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -119.23), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -669.54), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 349.40), SIMDE_FLOAT32_C( -41.37)) }, { UINT16_C(60216), simde_mm512_set_ps(SIMDE_FLOAT32_C( -294.33), SIMDE_FLOAT32_C( 963.39), SIMDE_FLOAT32_C( -504.91), SIMDE_FLOAT32_C( -654.23), SIMDE_FLOAT32_C( -988.36), SIMDE_FLOAT32_C( 634.30), SIMDE_FLOAT32_C( -857.50), SIMDE_FLOAT32_C( -235.19), SIMDE_FLOAT32_C( -903.31), SIMDE_FLOAT32_C( -183.01), SIMDE_FLOAT32_C( -989.08), SIMDE_FLOAT32_C( -684.38), SIMDE_FLOAT32_C( -369.22), SIMDE_FLOAT32_C( 764.60), SIMDE_FLOAT32_C( 215.22), SIMDE_FLOAT32_C( -906.73)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -656.49), SIMDE_FLOAT32_C( -795.39), SIMDE_FLOAT32_C( 220.41), SIMDE_FLOAT32_C( 680.39), SIMDE_FLOAT32_C( -673.42), SIMDE_FLOAT32_C( 859.78), SIMDE_FLOAT32_C( 306.17), SIMDE_FLOAT32_C( 632.76), SIMDE_FLOAT32_C( -662.91), SIMDE_FLOAT32_C( 31.45), SIMDE_FLOAT32_C( -162.68), SIMDE_FLOAT32_C( 929.60), SIMDE_FLOAT32_C( -957.67), SIMDE_FLOAT32_C( 222.14), SIMDE_FLOAT32_C( 292.45), SIMDE_FLOAT32_C( -99.46)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 362.16), SIMDE_FLOAT32_C( 1758.78), SIMDE_FLOAT32_C( -725.32), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -314.94), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -1163.67), SIMDE_FLOAT32_C( -867.95), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -826.40), SIMDE_FLOAT32_C( -1613.98), SIMDE_FLOAT32_C( 588.45), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C( 1065), simde_mm512_set_ps(SIMDE_FLOAT32_C( -160.16), SIMDE_FLOAT32_C( -172.32), SIMDE_FLOAT32_C( -146.34), SIMDE_FLOAT32_C( -664.30), SIMDE_FLOAT32_C( -152.25), SIMDE_FLOAT32_C( 103.01), SIMDE_FLOAT32_C( -445.68), SIMDE_FLOAT32_C( -705.22), SIMDE_FLOAT32_C( -480.35), SIMDE_FLOAT32_C( -454.79), SIMDE_FLOAT32_C( 524.00), SIMDE_FLOAT32_C( -158.20), SIMDE_FLOAT32_C( -445.04), SIMDE_FLOAT32_C( -960.28), SIMDE_FLOAT32_C( 167.13), SIMDE_FLOAT32_C( -825.53)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -541.83), SIMDE_FLOAT32_C( -457.69), SIMDE_FLOAT32_C( 312.80), SIMDE_FLOAT32_C( -62.23), SIMDE_FLOAT32_C( 416.18), SIMDE_FLOAT32_C( 853.40), SIMDE_FLOAT32_C( -17.96), SIMDE_FLOAT32_C( 885.15), SIMDE_FLOAT32_C( -212.03), SIMDE_FLOAT32_C( -855.73), SIMDE_FLOAT32_C( -371.31), SIMDE_FLOAT32_C( -695.44), SIMDE_FLOAT32_C( -895.68), SIMDE_FLOAT32_C( 538.84), SIMDE_FLOAT32_C( 882.30), SIMDE_FLOAT32_C( 585.87)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -750.39), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 895.31), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 450.64), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -1411.40)) }, { UINT16_C( 4987), simde_mm512_set_ps(SIMDE_FLOAT32_C( 955.74), SIMDE_FLOAT32_C( 48.94), SIMDE_FLOAT32_C( 560.80), SIMDE_FLOAT32_C( 626.25), SIMDE_FLOAT32_C( 986.71), SIMDE_FLOAT32_C( -13.30), SIMDE_FLOAT32_C( -833.84), SIMDE_FLOAT32_C( 647.36), SIMDE_FLOAT32_C( -398.46), SIMDE_FLOAT32_C( -852.77), SIMDE_FLOAT32_C( 195.24), SIMDE_FLOAT32_C( -431.65), SIMDE_FLOAT32_C( -246.40), SIMDE_FLOAT32_C( -123.66), SIMDE_FLOAT32_C( 302.57), SIMDE_FLOAT32_C( -312.92)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 864.85), SIMDE_FLOAT32_C( -886.94), SIMDE_FLOAT32_C( 289.25), SIMDE_FLOAT32_C( 74.52), SIMDE_FLOAT32_C( -653.98), SIMDE_FLOAT32_C( 43.30), SIMDE_FLOAT32_C( -126.09), SIMDE_FLOAT32_C( -155.50), SIMDE_FLOAT32_C( -396.73), SIMDE_FLOAT32_C( -53.65), SIMDE_FLOAT32_C( -516.81), SIMDE_FLOAT32_C( -892.08), SIMDE_FLOAT32_C( 202.83), SIMDE_FLOAT32_C( -327.18), SIMDE_FLOAT32_C( 221.07), SIMDE_FLOAT32_C( -891.88)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 551.73), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -707.75), SIMDE_FLOAT32_C( 802.86), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -799.12), SIMDE_FLOAT32_C( 712.05), SIMDE_FLOAT32_C( 460.43), SIMDE_FLOAT32_C( -449.23), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 81.50), SIMDE_FLOAT32_C( 578.96)) }, { UINT16_C(56258), simde_mm512_set_ps(SIMDE_FLOAT32_C( -30.48), SIMDE_FLOAT32_C( 682.33), SIMDE_FLOAT32_C( -678.43), SIMDE_FLOAT32_C( 640.73), SIMDE_FLOAT32_C( -214.39), SIMDE_FLOAT32_C( 913.47), SIMDE_FLOAT32_C( 802.27), SIMDE_FLOAT32_C( -719.14), SIMDE_FLOAT32_C( 839.92), SIMDE_FLOAT32_C( 326.41), SIMDE_FLOAT32_C( 231.12), SIMDE_FLOAT32_C( -599.80), SIMDE_FLOAT32_C( -175.19), SIMDE_FLOAT32_C( -889.93), SIMDE_FLOAT32_C( -271.66), SIMDE_FLOAT32_C( -767.93)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 521.85), SIMDE_FLOAT32_C( 990.67), SIMDE_FLOAT32_C( -279.18), SIMDE_FLOAT32_C( 874.22), SIMDE_FLOAT32_C( -573.38), SIMDE_FLOAT32_C( 750.45), SIMDE_FLOAT32_C( 668.60), SIMDE_FLOAT32_C( -415.36), SIMDE_FLOAT32_C( -224.84), SIMDE_FLOAT32_C( 162.63), SIMDE_FLOAT32_C( -940.52), SIMDE_FLOAT32_C( -654.73), SIMDE_FLOAT32_C( -780.19), SIMDE_FLOAT32_C( 466.85), SIMDE_FLOAT32_C( -383.81), SIMDE_FLOAT32_C( 542.46)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -552.33), SIMDE_FLOAT32_C( -308.34), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -233.49), SIMDE_FLOAT32_C( 358.99), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 133.67), SIMDE_FLOAT32_C( -303.78), SIMDE_FLOAT32_C( 1064.76), SIMDE_FLOAT32_C( 163.78), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 112.15), SIMDE_FLOAT32_C( 0.00)) }, { UINT16_C(18374), simde_mm512_set_ps(SIMDE_FLOAT32_C( 292.05), SIMDE_FLOAT32_C( -553.40), SIMDE_FLOAT32_C( 143.99), SIMDE_FLOAT32_C( -940.99), SIMDE_FLOAT32_C( 81.28), SIMDE_FLOAT32_C( 184.98), SIMDE_FLOAT32_C( 662.04), SIMDE_FLOAT32_C( 951.27), SIMDE_FLOAT32_C( 953.92), SIMDE_FLOAT32_C( -384.26), SIMDE_FLOAT32_C( -149.83), SIMDE_FLOAT32_C( 751.91), SIMDE_FLOAT32_C( -625.68), SIMDE_FLOAT32_C( 58.69), SIMDE_FLOAT32_C( 581.13), SIMDE_FLOAT32_C( 892.26)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 104.19), SIMDE_FLOAT32_C( 819.72), SIMDE_FLOAT32_C( -437.31), SIMDE_FLOAT32_C( 380.61), SIMDE_FLOAT32_C( 930.00), SIMDE_FLOAT32_C( -224.08), SIMDE_FLOAT32_C( -557.43), SIMDE_FLOAT32_C( -295.43), SIMDE_FLOAT32_C( -271.48), SIMDE_FLOAT32_C( -705.78), SIMDE_FLOAT32_C( -796.97), SIMDE_FLOAT32_C( -62.19), SIMDE_FLOAT32_C( -247.25), SIMDE_FLOAT32_C( 225.36), SIMDE_FLOAT32_C( 312.68), SIMDE_FLOAT32_C( -185.21)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -1373.12), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 409.06), SIMDE_FLOAT32_C( 1219.47), SIMDE_FLOAT32_C( 1246.70), SIMDE_FLOAT32_C( 1225.40), SIMDE_FLOAT32_C( 321.52), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -166.67), SIMDE_FLOAT32_C( 268.45), SIMDE_FLOAT32_C( 0.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_maskz_sub_ps(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_maskz_sub_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512d a; simde__m512d b; simde__m512d r; } test_vec[8] = { { UINT8_C( 63), simde_mm512_set_pd(SIMDE_FLOAT64_C( -415.75), SIMDE_FLOAT64_C( 784.67), SIMDE_FLOAT64_C( -496.30), SIMDE_FLOAT64_C( 526.56), SIMDE_FLOAT64_C( 67.17), SIMDE_FLOAT64_C( -881.21), SIMDE_FLOAT64_C( 348.77), SIMDE_FLOAT64_C( 537.04)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 554.42), SIMDE_FLOAT64_C( 844.38), SIMDE_FLOAT64_C( -817.51), SIMDE_FLOAT64_C( 338.74), SIMDE_FLOAT64_C( -70.99), SIMDE_FLOAT64_C( -221.33), SIMDE_FLOAT64_C( 59.42), SIMDE_FLOAT64_C( 138.47)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 321.21), SIMDE_FLOAT64_C( 187.82), SIMDE_FLOAT64_C( 138.16), SIMDE_FLOAT64_C( -659.88), SIMDE_FLOAT64_C( 289.35), SIMDE_FLOAT64_C( 398.57)) }, { UINT8_C(204), simde_mm512_set_pd(SIMDE_FLOAT64_C( 392.86), SIMDE_FLOAT64_C( 15.96), SIMDE_FLOAT64_C( -681.24), SIMDE_FLOAT64_C( 759.61), SIMDE_FLOAT64_C( -507.08), SIMDE_FLOAT64_C( -150.50), SIMDE_FLOAT64_C( 409.54), SIMDE_FLOAT64_C( -197.17)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -661.30), SIMDE_FLOAT64_C( -524.33), SIMDE_FLOAT64_C( -241.59), SIMDE_FLOAT64_C( -105.89), SIMDE_FLOAT64_C( -289.61), SIMDE_FLOAT64_C( -891.58), SIMDE_FLOAT64_C( 378.73), SIMDE_FLOAT64_C( -71.99)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1054.16), SIMDE_FLOAT64_C( 540.29), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -217.47), SIMDE_FLOAT64_C( 741.08), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(198), simde_mm512_set_pd(SIMDE_FLOAT64_C( 911.91), SIMDE_FLOAT64_C( -816.98), SIMDE_FLOAT64_C( -164.10), SIMDE_FLOAT64_C( -340.48), SIMDE_FLOAT64_C( -77.39), SIMDE_FLOAT64_C( 952.25), SIMDE_FLOAT64_C( 134.46), SIMDE_FLOAT64_C( 698.09)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 243.51), SIMDE_FLOAT64_C( 766.95), SIMDE_FLOAT64_C( 899.58), SIMDE_FLOAT64_C( 478.33), SIMDE_FLOAT64_C( -35.25), SIMDE_FLOAT64_C( -117.47), SIMDE_FLOAT64_C( 258.33), SIMDE_FLOAT64_C( -248.63)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 668.40), SIMDE_FLOAT64_C(-1583.93), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 1069.72), SIMDE_FLOAT64_C( -123.87), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(149), simde_mm512_set_pd(SIMDE_FLOAT64_C( -179.27), SIMDE_FLOAT64_C( 894.92), SIMDE_FLOAT64_C( -553.39), SIMDE_FLOAT64_C( 676.19), SIMDE_FLOAT64_C( -747.28), SIMDE_FLOAT64_C( -915.60), SIMDE_FLOAT64_C( -132.34), SIMDE_FLOAT64_C( -335.46)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -395.05), SIMDE_FLOAT64_C( -855.26), SIMDE_FLOAT64_C( 444.16), SIMDE_FLOAT64_C( 962.50), SIMDE_FLOAT64_C( 987.86), SIMDE_FLOAT64_C( -410.31), SIMDE_FLOAT64_C( 36.70), SIMDE_FLOAT64_C( 874.49)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 215.78), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -286.31), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -505.29), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-1209.95)) }, { UINT8_C(196), simde_mm512_set_pd(SIMDE_FLOAT64_C( 235.34), SIMDE_FLOAT64_C( 443.88), SIMDE_FLOAT64_C( -185.89), SIMDE_FLOAT64_C( -220.35), SIMDE_FLOAT64_C( -983.75), SIMDE_FLOAT64_C( -348.00), SIMDE_FLOAT64_C( 167.63), SIMDE_FLOAT64_C( 489.46)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -548.50), SIMDE_FLOAT64_C( -730.33), SIMDE_FLOAT64_C( 337.76), SIMDE_FLOAT64_C( 90.27), SIMDE_FLOAT64_C( -665.67), SIMDE_FLOAT64_C( 930.26), SIMDE_FLOAT64_C( -181.77), SIMDE_FLOAT64_C( 530.83)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 783.84), SIMDE_FLOAT64_C( 1174.21), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-1278.26), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(230), simde_mm512_set_pd(SIMDE_FLOAT64_C( 439.60), SIMDE_FLOAT64_C( 39.73), SIMDE_FLOAT64_C( 948.24), SIMDE_FLOAT64_C( -515.41), SIMDE_FLOAT64_C( -519.45), SIMDE_FLOAT64_C( 273.14), SIMDE_FLOAT64_C( 256.99), SIMDE_FLOAT64_C( 69.80)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -138.55), SIMDE_FLOAT64_C( 878.16), SIMDE_FLOAT64_C( -299.57), SIMDE_FLOAT64_C( 829.01), SIMDE_FLOAT64_C( -823.97), SIMDE_FLOAT64_C( 313.21), SIMDE_FLOAT64_C( -396.40), SIMDE_FLOAT64_C( 940.94)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 578.15), SIMDE_FLOAT64_C( -838.43), SIMDE_FLOAT64_C( 1247.81), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -40.07), SIMDE_FLOAT64_C( 653.39), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C( 58), simde_mm512_set_pd(SIMDE_FLOAT64_C( -903.31), SIMDE_FLOAT64_C( -183.01), SIMDE_FLOAT64_C( -989.08), SIMDE_FLOAT64_C( -684.38), SIMDE_FLOAT64_C( -369.22), SIMDE_FLOAT64_C( 764.60), SIMDE_FLOAT64_C( 215.22), SIMDE_FLOAT64_C( -906.73)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -662.91), SIMDE_FLOAT64_C( 31.45), SIMDE_FLOAT64_C( -162.68), SIMDE_FLOAT64_C( 929.60), SIMDE_FLOAT64_C( -957.67), SIMDE_FLOAT64_C( 222.14), SIMDE_FLOAT64_C( 292.45), SIMDE_FLOAT64_C( -99.46)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -826.40), SIMDE_FLOAT64_C(-1613.98), SIMDE_FLOAT64_C( 588.45), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -77.23), SIMDE_FLOAT64_C( 0.00)) }, { UINT8_C(175), simde_mm512_set_pd(SIMDE_FLOAT64_C( -656.49), SIMDE_FLOAT64_C( -795.39), SIMDE_FLOAT64_C( 220.41), SIMDE_FLOAT64_C( 680.39), SIMDE_FLOAT64_C( -673.42), SIMDE_FLOAT64_C( 859.78), SIMDE_FLOAT64_C( 306.17), SIMDE_FLOAT64_C( 632.76)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 367.28), SIMDE_FLOAT64_C( -294.33), SIMDE_FLOAT64_C( 963.39), SIMDE_FLOAT64_C( -504.91), SIMDE_FLOAT64_C( -654.23), SIMDE_FLOAT64_C( -988.36), SIMDE_FLOAT64_C( 634.30), SIMDE_FLOAT64_C( -857.50)), simde_mm512_set_pd(SIMDE_FLOAT64_C(-1023.77), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -742.98), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -19.19), SIMDE_FLOAT64_C( 1848.14), SIMDE_FLOAT64_C( -328.13), SIMDE_FLOAT64_C( 1490.26)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_maskz_sub_pd(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_sub_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_sub_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_sub_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_sub_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_sub_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_sub_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_sub_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_sub_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_sub_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_sub_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_sub_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_sub_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_sub_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_sub_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_sub_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_sub_pd) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/subs.c000066400000000000000000007733641400333146700164200ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN subs #include #include #include static int test_simde_mm512_subs_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi8(INT8_C( 82), INT8_C( 83), INT8_C( 117), INT8_C( 65), INT8_C( -47), INT8_C(-122), INT8_C( 116), INT8_C( 14), INT8_C( 76), INT8_C( 1), INT8_C( -50), INT8_C( 4), INT8_C( 83), INT8_C( -77), INT8_C( 112), INT8_C(-102), INT8_C( 99), INT8_C(-118), INT8_C( -47), INT8_C( -67), INT8_C( 60), INT8_C( -34), INT8_C( 78), INT8_C(-110), INT8_C( -58), INT8_C( 87), INT8_C( -61), INT8_C( 26), INT8_C( -17), INT8_C( -46), INT8_C( 116), INT8_C( -20), INT8_C(-120), INT8_C( 48), INT8_C( 24), INT8_C( 46), INT8_C( 103), INT8_C( -34), INT8_C( 42), INT8_C( 51), INT8_C( -65), INT8_C( 86), INT8_C( 61), INT8_C( -56), INT8_C( 58), INT8_C( 119), INT8_C( 93), INT8_C( -1), INT8_C( -58), INT8_C(-121), INT8_C( -32), INT8_C( 98), INT8_C( -66), INT8_C( 79), INT8_C( 99), INT8_C( -93), INT8_C( 77), INT8_C( -56), INT8_C( -78), INT8_C( 39), INT8_C( -50), INT8_C( -60), INT8_C( -68), INT8_C( -4)), simde_mm512_set_epi8(INT8_C( 106), INT8_C( 39), INT8_C(-106), INT8_C( 14), INT8_C( -28), INT8_C( -2), INT8_C( 39), INT8_C( -28), INT8_C( 54), INT8_C( 70), INT8_C( -19), INT8_C( -12), INT8_C( -42), INT8_C( 28), INT8_C( -13), INT8_C( -6), INT8_C( 116), INT8_C( 2), INT8_C( 23), INT8_C( 121), INT8_C( 112), INT8_C( -35), INT8_C(-124), INT8_C( 10), INT8_C( -16), INT8_C(-117), INT8_C( 26), INT8_C(-125), INT8_C( 36), INT8_C( 109), INT8_C( 29), INT8_C( -35), INT8_C( -9), INT8_C( -85), INT8_C( -38), INT8_C( 95), INT8_C( -88), INT8_C( 3), INT8_C( 4), INT8_C( 100), INT8_C( 85), INT8_C( 21), INT8_C( 66), INT8_C( -33), INT8_C( -77), INT8_C( -5), INT8_C(-126), INT8_C( 122), INT8_C( -30), INT8_C( -83), INT8_C( 74), INT8_C(-108), INT8_C( 83), INT8_C( -96), INT8_C( -57), INT8_C(-108), INT8_C( 89), INT8_C( 59), INT8_C( 111), INT8_C( -47), INT8_C( -10), INT8_C( -18), INT8_C( 20), INT8_C(-125)), simde_mm512_set_epi8(INT8_C( -24), INT8_C( 44), INT8_C( 127), INT8_C( 51), INT8_C( -19), INT8_C(-120), INT8_C( 77), INT8_C( 42), INT8_C( 22), INT8_C( -69), INT8_C( -31), INT8_C( 16), INT8_C( 125), INT8_C(-105), INT8_C( 125), INT8_C( -96), INT8_C( -17), INT8_C(-120), INT8_C( -70), INT8_C(-128), INT8_C( -52), INT8_C( 1), INT8_C( 127), INT8_C(-120), INT8_C( -42), INT8_C( 127), INT8_C( -87), INT8_C( 127), INT8_C( -53), INT8_C(-128), INT8_C( 87), INT8_C( 15), INT8_C(-111), INT8_C( 127), INT8_C( 62), INT8_C( -49), INT8_C( 127), INT8_C( -37), INT8_C( 38), INT8_C( -49), INT8_C(-128), INT8_C( 65), INT8_C( -5), INT8_C( -23), INT8_C( 127), INT8_C( 124), INT8_C( 127), INT8_C(-123), INT8_C( -28), INT8_C( -38), INT8_C(-106), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 15), INT8_C( -12), INT8_C(-115), INT8_C(-128), INT8_C( 86), INT8_C( -40), INT8_C( -42), INT8_C( -88), INT8_C( 121)) }, { simde_mm512_set_epi8(INT8_C(-108), INT8_C(-116), INT8_C( 21), INT8_C(-123), INT8_C( -53), INT8_C( 42), INT8_C( 66), INT8_C( 13), INT8_C( 9), INT8_C( 115), INT8_C( 86), INT8_C( 126), INT8_C( -24), INT8_C( 35), INT8_C( -5), INT8_C( 103), INT8_C( 38), INT8_C( 111), INT8_C( 24), INT8_C( -71), INT8_C( -1), INT8_C( 17), INT8_C( -63), INT8_C( -13), INT8_C( 14), INT8_C( 82), INT8_C( 78), INT8_C(-102), INT8_C( -7), INT8_C( 93), INT8_C( 25), INT8_C( 103), INT8_C( 113), INT8_C( -15), INT8_C( -19), INT8_C( -73), INT8_C( -11), INT8_C( 103), INT8_C( -97), INT8_C( 123), INT8_C( 28), INT8_C( 53), INT8_C( -15), INT8_C( 122), INT8_C( 3), INT8_C( -54), INT8_C( -61), INT8_C( 58), INT8_C( -44), INT8_C( -3), INT8_C( -43), INT8_C( -35), INT8_C(-118), INT8_C( -18), INT8_C( 15), INT8_C( 54), INT8_C(-102), INT8_C( -58), INT8_C( -74), INT8_C( -70), INT8_C( 46), INT8_C( 48), INT8_C( -35), INT8_C( 92)), simde_mm512_set_epi8(INT8_C( 6), INT8_C( 68), INT8_C( 77), INT8_C( -94), INT8_C( -48), INT8_C(-101), INT8_C( -8), INT8_C( 82), INT8_C( 50), INT8_C( -15), INT8_C( 6), INT8_C( 30), INT8_C( -47), INT8_C( -15), INT8_C( -14), INT8_C( -97), INT8_C( 28), INT8_C( -47), INT8_C( -92), INT8_C( -84), INT8_C( -37), INT8_C( -33), INT8_C(-123), INT8_C( -19), INT8_C( 58), INT8_C( 29), INT8_C( 93), INT8_C( -55), INT8_C(-127), INT8_C( -60), INT8_C( 32), INT8_C( 116), INT8_C( -46), INT8_C( 51), INT8_C( -40), INT8_C( 10), INT8_C( 4), INT8_C( 50), INT8_C( 48), INT8_C( 53), INT8_C( 78), INT8_C( 21), INT8_C( 64), INT8_C( 107), INT8_C( 16), INT8_C( 48), INT8_C( -46), INT8_C( 62), INT8_C( 75), INT8_C( 85), INT8_C(-115), INT8_C( -14), INT8_C( -99), INT8_C( 86), INT8_C(-116), INT8_C( -74), INT8_C( 38), INT8_C( 27), INT8_C(-115), INT8_C( 55), INT8_C( -91), INT8_C( -71), INT8_C( -14), INT8_C( -84)), simde_mm512_set_epi8(INT8_C(-114), INT8_C(-128), INT8_C( -56), INT8_C( -29), INT8_C( -5), INT8_C( 127), INT8_C( 74), INT8_C( -69), INT8_C( -41), INT8_C( 127), INT8_C( 80), INT8_C( 96), INT8_C( 23), INT8_C( 50), INT8_C( 9), INT8_C( 127), INT8_C( 10), INT8_C( 127), INT8_C( 116), INT8_C( 13), INT8_C( 36), INT8_C( 50), INT8_C( 60), INT8_C( 6), INT8_C( -44), INT8_C( 53), INT8_C( -15), INT8_C( -47), INT8_C( 120), INT8_C( 127), INT8_C( -7), INT8_C( -13), INT8_C( 127), INT8_C( -66), INT8_C( 21), INT8_C( -83), INT8_C( -15), INT8_C( 53), INT8_C(-128), INT8_C( 70), INT8_C( -50), INT8_C( 32), INT8_C( -79), INT8_C( 15), INT8_C( -13), INT8_C(-102), INT8_C( -15), INT8_C( -4), INT8_C(-119), INT8_C( -88), INT8_C( 72), INT8_C( -21), INT8_C( -19), INT8_C(-104), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( -85), INT8_C( 41), INT8_C(-125), INT8_C( 127), INT8_C( 119), INT8_C( -21), INT8_C( 127)) }, { simde_mm512_set_epi8(INT8_C( 2), INT8_C( -77), INT8_C( -19), INT8_C( 41), INT8_C( -13), INT8_C( 75), INT8_C(-123), INT8_C( 96), INT8_C( -86), INT8_C( -24), INT8_C( -27), INT8_C( -84), INT8_C( 35), INT8_C( -86), INT8_C( -72), INT8_C( -97), INT8_C( 44), INT8_C( 11), INT8_C(-106), INT8_C( 44), INT8_C( 0), INT8_C( 90), INT8_C( -79), INT8_C( 91), INT8_C( 119), INT8_C( 59), INT8_C( 105), INT8_C(-128), INT8_C( 110), INT8_C( -29), INT8_C( 67), INT8_C( 114), INT8_C( -39), INT8_C( -49), INT8_C( 105), INT8_C( -40), INT8_C( -33), INT8_C( 120), INT8_C( -27), INT8_C( 100), INT8_C( -90), INT8_C( 86), INT8_C( -18), INT8_C( -57), INT8_C( 84), INT8_C( -26), INT8_C( -77), INT8_C( 17), INT8_C( -47), INT8_C( 51), INT8_C( -83), INT8_C( 53), INT8_C( 71), INT8_C( 96), INT8_C( 110), INT8_C( -89), INT8_C( 27), INT8_C( -45), INT8_C(-126), INT8_C( 40), INT8_C( 95), INT8_C( -87), INT8_C( -62), INT8_C( -52)), simde_mm512_set_epi8(INT8_C( -84), INT8_C( 127), INT8_C( 61), INT8_C( -16), INT8_C( 30), INT8_C( 6), INT8_C(-112), INT8_C( 104), INT8_C( -60), INT8_C( -88), INT8_C( -39), INT8_C( -19), INT8_C( 44), INT8_C( 36), INT8_C( 105), INT8_C( 120), INT8_C( -26), INT8_C( 21), INT8_C( 14), INT8_C( 42), INT8_C( 49), INT8_C( -84), INT8_C(-120), INT8_C(-107), INT8_C( 123), INT8_C( -47), INT8_C( 21), INT8_C( -10), INT8_C( 95), INT8_C( 124), INT8_C( -33), INT8_C( -34), INT8_C( -33), INT8_C( -71), INT8_C( 11), INT8_C( 74), INT8_C( 104), INT8_C( 108), INT8_C( -35), INT8_C( -59), INT8_C( -55), INT8_C(-126), INT8_C( 107), INT8_C( 23), INT8_C( 29), INT8_C( -27), INT8_C( 123), INT8_C( 23), INT8_C( -83), INT8_C( -90), INT8_C( 9), INT8_C( 94), INT8_C( 91), INT8_C( 69), INT8_C( -51), INT8_C(-103), INT8_C( -72), INT8_C( -45), INT8_C( 16), INT8_C( 108), INT8_C( -80), INT8_C( 27), INT8_C( 58), INT8_C( -83)), simde_mm512_set_epi8(INT8_C( 86), INT8_C(-128), INT8_C( -80), INT8_C( 57), INT8_C( -43), INT8_C( 69), INT8_C( -11), INT8_C( -8), INT8_C( -26), INT8_C( 64), INT8_C( 12), INT8_C( -65), INT8_C( -9), INT8_C(-122), INT8_C(-128), INT8_C(-128), INT8_C( 70), INT8_C( -10), INT8_C(-120), INT8_C( 2), INT8_C( -49), INT8_C( 127), INT8_C( 41), INT8_C( 127), INT8_C( -4), INT8_C( 106), INT8_C( 84), INT8_C(-118), INT8_C( 15), INT8_C(-128), INT8_C( 100), INT8_C( 127), INT8_C( -6), INT8_C( 22), INT8_C( 94), INT8_C(-114), INT8_C(-128), INT8_C( 12), INT8_C( 8), INT8_C( 127), INT8_C( -35), INT8_C( 127), INT8_C(-125), INT8_C( -80), INT8_C( 55), INT8_C( 1), INT8_C(-128), INT8_C( -6), INT8_C( 36), INT8_C( 127), INT8_C( -92), INT8_C( -41), INT8_C( -20), INT8_C( 27), INT8_C( 127), INT8_C( 14), INT8_C( 99), INT8_C( 0), INT8_C(-128), INT8_C( -68), INT8_C( 127), INT8_C(-114), INT8_C(-120), INT8_C( 31)) }, { simde_mm512_set_epi8(INT8_C( 17), INT8_C( 99), INT8_C( -13), INT8_C( -49), INT8_C( 45), INT8_C(-128), INT8_C( 55), INT8_C( 105), INT8_C( -34), INT8_C( -51), INT8_C( -97), INT8_C(-103), INT8_C(-124), INT8_C( 111), INT8_C( 74), INT8_C( 75), INT8_C( 102), INT8_C( 98), INT8_C(-117), INT8_C( 9), INT8_C( -74), INT8_C( 61), INT8_C( 99), INT8_C( 124), INT8_C( 79), INT8_C(-114), INT8_C( 19), INT8_C( 97), INT8_C(-100), INT8_C(-124), INT8_C( -17), INT8_C( -62), INT8_C( 25), INT8_C( -3), INT8_C( -7), INT8_C( 72), INT8_C(-117), INT8_C( -27), INT8_C( -56), INT8_C( 92), INT8_C( -20), INT8_C( -53), INT8_C( 2), INT8_C( -38), INT8_C( -81), INT8_C( 59), INT8_C( 66), INT8_C( 90), INT8_C( 36), INT8_C( 100), INT8_C( 112), INT8_C( 123), INT8_C( -72), INT8_C( -97), INT8_C(-115), INT8_C( 17), INT8_C( -93), INT8_C(-122), INT8_C( 31), INT8_C( 27), INT8_C( 109), INT8_C( 115), INT8_C( 53), INT8_C( -96)), simde_mm512_set_epi8(INT8_C( -43), INT8_C( -18), INT8_C( 114), INT8_C( -29), INT8_C( 118), INT8_C( -1), INT8_C( -20), INT8_C( -38), INT8_C( -80), INT8_C( 88), INT8_C(-111), INT8_C( -91), INT8_C( 44), INT8_C( -72), INT8_C( 106), INT8_C( 19), INT8_C( -46), INT8_C( 107), INT8_C( 46), INT8_C( 44), INT8_C( -65), INT8_C(-128), INT8_C( 41), INT8_C( 44), INT8_C( 68), INT8_C( 69), INT8_C( -78), INT8_C( -47), INT8_C( 109), INT8_C( 120), INT8_C( -57), INT8_C( -95), INT8_C( 95), INT8_C( 80), INT8_C( -30), INT8_C( 97), INT8_C( -48), INT8_C( -97), INT8_C( 111), INT8_C( -80), INT8_C(-122), INT8_C( -81), INT8_C( -71), INT8_C( 85), INT8_C( 77), INT8_C( -42), INT8_C(-115), INT8_C( -77), INT8_C( 29), INT8_C( 77), INT8_C( 64), INT8_C( -20), INT8_C( 27), INT8_C( 41), INT8_C( 13), INT8_C( 109), INT8_C( 22), INT8_C( -98), INT8_C( 20), INT8_C( -28), INT8_C( 66), INT8_C( -7), INT8_C(-113), INT8_C(-119)), simde_mm512_set_epi8(INT8_C( 60), INT8_C( 117), INT8_C(-127), INT8_C( -20), INT8_C( -73), INT8_C(-127), INT8_C( 75), INT8_C( 127), INT8_C( 46), INT8_C(-128), INT8_C( 14), INT8_C( -12), INT8_C(-128), INT8_C( 127), INT8_C( -32), INT8_C( 56), INT8_C( 127), INT8_C( -9), INT8_C(-128), INT8_C( -35), INT8_C( -9), INT8_C( 127), INT8_C( 58), INT8_C( 80), INT8_C( 11), INT8_C(-128), INT8_C( 97), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C( 40), INT8_C( 33), INT8_C( -70), INT8_C( -83), INT8_C( 23), INT8_C( -25), INT8_C( -69), INT8_C( 70), INT8_C(-128), INT8_C( 127), INT8_C( 102), INT8_C( 28), INT8_C( 73), INT8_C(-123), INT8_C(-128), INT8_C( 101), INT8_C( 127), INT8_C( 127), INT8_C( 7), INT8_C( 23), INT8_C( 48), INT8_C( 127), INT8_C( -99), INT8_C(-128), INT8_C(-128), INT8_C( -92), INT8_C(-115), INT8_C( -24), INT8_C( 11), INT8_C( 55), INT8_C( 43), INT8_C( 122), INT8_C( 127), INT8_C( 23)) }, { simde_mm512_set_epi8(INT8_C(-124), INT8_C( -73), INT8_C( 74), INT8_C( 5), INT8_C( -9), INT8_C( 17), INT8_C( -81), INT8_C( -54), INT8_C( -5), INT8_C( -33), INT8_C( -12), INT8_C( 26), INT8_C( 86), INT8_C( 122), INT8_C( -44), INT8_C( -23), INT8_C( 0), INT8_C( 43), INT8_C( -25), INT8_C(-122), INT8_C( -79), INT8_C(-122), INT8_C( -88), INT8_C(-121), INT8_C(-102), INT8_C( 66), INT8_C( -93), INT8_C( 105), INT8_C( 109), INT8_C( -68), INT8_C( 24), INT8_C( -54), INT8_C( 40), INT8_C( 68), INT8_C( 2), INT8_C( 60), INT8_C( 0), INT8_C( 5), INT8_C( 59), INT8_C( -54), INT8_C( -76), INT8_C( 27), INT8_C( -23), INT8_C( 77), INT8_C(-108), INT8_C( -28), INT8_C(-114), INT8_C( 56), INT8_C( -54), INT8_C(-108), INT8_C( -15), INT8_C( -89), INT8_C(-103), INT8_C( -45), INT8_C( 74), INT8_C( -3), INT8_C(-108), INT8_C( 55), INT8_C( -79), INT8_C( -62), INT8_C( 14), INT8_C( 106), INT8_C( -16), INT8_C( -10)), simde_mm512_set_epi8(INT8_C( -47), INT8_C( 124), INT8_C( 57), INT8_C( -74), INT8_C( 20), INT8_C( 124), INT8_C( 70), INT8_C( -69), INT8_C( -65), INT8_C( -12), INT8_C( 124), INT8_C( -90), INT8_C(-113), INT8_C( 63), INT8_C( -79), INT8_C( -70), INT8_C( -76), INT8_C( -34), INT8_C( -60), INT8_C( -4), INT8_C( -41), INT8_C( 60), INT8_C( 77), INT8_C( -57), INT8_C( 13), INT8_C( 2), INT8_C( 111), INT8_C( -39), INT8_C( 41), INT8_C( 54), INT8_C( -37), INT8_C( 114), INT8_C( 92), INT8_C(-111), INT8_C( 77), INT8_C( 14), INT8_C(-104), INT8_C( -39), INT8_C( -74), INT8_C( 66), INT8_C( 16), INT8_C( -26), INT8_C( -89), INT8_C(-114), INT8_C( -68), INT8_C( 6), INT8_C( 62), INT8_C( -93), INT8_C( 55), INT8_C(-113), INT8_C( -60), INT8_C( -56), INT8_C( -37), INT8_C( 2), INT8_C( -15), INT8_C( 88), INT8_C( 26), INT8_C( 54), INT8_C( 82), INT8_C( 124), INT8_C( -38), INT8_C(-107), INT8_C( 40), INT8_C( 13)), simde_mm512_set_epi8(INT8_C( -77), INT8_C(-128), INT8_C( 17), INT8_C( 79), INT8_C( -29), INT8_C(-107), INT8_C(-128), INT8_C( 15), INT8_C( 60), INT8_C( -21), INT8_C(-128), INT8_C( 116), INT8_C( 127), INT8_C( 59), INT8_C( 35), INT8_C( 47), INT8_C( 76), INT8_C( 77), INT8_C( 35), INT8_C(-118), INT8_C( -38), INT8_C(-128), INT8_C(-128), INT8_C( -64), INT8_C(-115), INT8_C( 64), INT8_C(-128), INT8_C( 127), INT8_C( 68), INT8_C(-122), INT8_C( 61), INT8_C(-128), INT8_C( -52), INT8_C( 127), INT8_C( -75), INT8_C( 46), INT8_C( 104), INT8_C( 44), INT8_C( 127), INT8_C(-120), INT8_C( -92), INT8_C( 53), INT8_C( 66), INT8_C( 127), INT8_C( -40), INT8_C( -34), INT8_C(-128), INT8_C( 127), INT8_C(-109), INT8_C( 5), INT8_C( 45), INT8_C( -33), INT8_C( -66), INT8_C( -47), INT8_C( 89), INT8_C( -91), INT8_C(-128), INT8_C( 1), INT8_C(-128), INT8_C(-128), INT8_C( 52), INT8_C( 127), INT8_C( -56), INT8_C( -23)) }, { simde_mm512_set_epi8(INT8_C( 5), INT8_C( -68), INT8_C( -18), INT8_C( -37), INT8_C( 5), INT8_C( 16), INT8_C(-109), INT8_C( -67), INT8_C( -62), INT8_C( -4), INT8_C( 14), INT8_C(-109), INT8_C( -29), INT8_C(-121), INT8_C(-109), INT8_C( -55), INT8_C( 1), INT8_C( -38), INT8_C( 107), INT8_C( 55), INT8_C( -36), INT8_C( -76), INT8_C( 35), INT8_C( -40), INT8_C( 10), INT8_C( -90), INT8_C( -48), INT8_C(-112), INT8_C( -9), INT8_C( -53), INT8_C( 105), INT8_C( 27), INT8_C( -97), INT8_C(-124), INT8_C( 4), INT8_C( -36), INT8_C( -16), INT8_C( -87), INT8_C( -89), INT8_C(-104), INT8_C( -30), INT8_C(-101), INT8_C( 69), INT8_C( 79), INT8_C( 59), INT8_C( -97), INT8_C( -15), INT8_C( 17), INT8_C( 106), INT8_C( -85), INT8_C( 126), INT8_C(-121), INT8_C( -91), INT8_C( 26), INT8_C(-115), INT8_C(-117), INT8_C( 91), INT8_C( 73), INT8_C( -60), INT8_C( 69), INT8_C( -23), INT8_C( 48), INT8_C( 70), INT8_C( -8)), simde_mm512_set_epi8(INT8_C( 91), INT8_C(-103), INT8_C( 69), INT8_C( 61), INT8_C( -82), INT8_C( 73), INT8_C( 122), INT8_C( -22), INT8_C( 122), INT8_C( 76), INT8_C( -9), INT8_C( 121), INT8_C(-123), INT8_C(-119), INT8_C(-127), INT8_C( 126), INT8_C( 105), INT8_C( 10), INT8_C(-120), INT8_C(-127), INT8_C( -50), INT8_C( 15), INT8_C( -93), INT8_C( -86), INT8_C(-125), INT8_C( 45), INT8_C( -39), INT8_C(-119), INT8_C( 74), INT8_C( -92), INT8_C( -78), INT8_C( 53), INT8_C( 17), INT8_C( -21), INT8_C( 105), INT8_C(-102), INT8_C( -1), INT8_C( -19), INT8_C( 110), INT8_C( -84), INT8_C( -93), INT8_C( 19), INT8_C( -98), INT8_C(-128), INT8_C( -23), INT8_C( 49), INT8_C( 100), INT8_C( 122), INT8_C( -96), INT8_C(-103), INT8_C( 60), INT8_C( -24), INT8_C( 23), INT8_C( -52), INT8_C( -37), INT8_C( -56), INT8_C( -50), INT8_C( 4), INT8_C( -69), INT8_C( 1), INT8_C( -25), INT8_C( -10), INT8_C( 93), INT8_C( 51)), simde_mm512_set_epi8(INT8_C( -86), INT8_C( 35), INT8_C( -87), INT8_C( -98), INT8_C( 87), INT8_C( -57), INT8_C(-128), INT8_C( -45), INT8_C(-128), INT8_C( -80), INT8_C( 23), INT8_C(-128), INT8_C( 94), INT8_C( -2), INT8_C( 18), INT8_C(-128), INT8_C(-104), INT8_C( -48), INT8_C( 127), INT8_C( 127), INT8_C( 14), INT8_C( -91), INT8_C( 127), INT8_C( 46), INT8_C( 127), INT8_C(-128), INT8_C( -9), INT8_C( 7), INT8_C( -83), INT8_C( 39), INT8_C( 127), INT8_C( -26), INT8_C(-114), INT8_C(-103), INT8_C(-101), INT8_C( 66), INT8_C( -15), INT8_C( -68), INT8_C(-128), INT8_C( -20), INT8_C( 63), INT8_C(-120), INT8_C( 127), INT8_C( 127), INT8_C( 82), INT8_C(-128), INT8_C(-115), INT8_C(-105), INT8_C( 127), INT8_C( 18), INT8_C( 66), INT8_C( -97), INT8_C(-114), INT8_C( 78), INT8_C( -78), INT8_C( -61), INT8_C( 127), INT8_C( 69), INT8_C( 9), INT8_C( 68), INT8_C( 2), INT8_C( 58), INT8_C( -23), INT8_C( -59)) }, { simde_mm512_set_epi8(INT8_C( -89), INT8_C( 43), INT8_C( 52), INT8_C( 82), INT8_C( -37), INT8_C( 55), INT8_C( 112), INT8_C( -22), INT8_C( -75), INT8_C( -36), INT8_C( -34), INT8_C( -15), INT8_C( 35), INT8_C( -42), INT8_C(-101), INT8_C( -5), INT8_C( 2), INT8_C( 35), INT8_C( 14), INT8_C( -73), INT8_C( -50), INT8_C( -33), INT8_C( -65), INT8_C( 94), INT8_C( -6), INT8_C( -21), INT8_C( -28), INT8_C( 21), INT8_C( 102), INT8_C( -87), INT8_C( 114), INT8_C( 125), INT8_C( 113), INT8_C( 124), INT8_C(-121), INT8_C(-122), INT8_C( 23), INT8_C( 107), INT8_C( 24), INT8_C( 126), INT8_C( 80), INT8_C( 59), INT8_C( 39), INT8_C( -61), INT8_C(-105), INT8_C( 32), INT8_C( 55), INT8_C( -9), INT8_C( 60), INT8_C(-125), INT8_C( 72), INT8_C( -36), INT8_C( 77), INT8_C( -65), INT8_C( 117), INT8_C( -85), INT8_C( 98), INT8_C( -83), INT8_C( -69), INT8_C( -52), INT8_C( 41), INT8_C( -10), INT8_C( -18), INT8_C( 56)), simde_mm512_set_epi8(INT8_C( 22), INT8_C( 122), INT8_C( -90), INT8_C( 2), INT8_C( -65), INT8_C( 51), INT8_C( -94), INT8_C( -50), INT8_C( -15), INT8_C( 19), INT8_C( -19), INT8_C( 66), INT8_C( 119), INT8_C(-118), INT8_C(-112), INT8_C(-116), INT8_C( 44), INT8_C( -12), INT8_C( 31), INT8_C( 43), INT8_C( -16), INT8_C( -37), INT8_C( -24), INT8_C( -32), INT8_C( -95), INT8_C( -86), INT8_C( -96), INT8_C( 80), INT8_C( 68), INT8_C( 13), INT8_C( -8), INT8_C( 67), INT8_C( 107), INT8_C(-125), INT8_C( 104), INT8_C( -80), INT8_C( 97), INT8_C( -78), INT8_C( 106), INT8_C( -53), INT8_C( -36), INT8_C( -90), INT8_C( 74), INT8_C( -72), INT8_C( 59), INT8_C( -81), INT8_C( -8), INT8_C( -25), INT8_C( -55), INT8_C( -99), INT8_C( 20), INT8_C( 9), INT8_C( -89), INT8_C( -90), INT8_C( 108), INT8_C( 56), INT8_C( -19), INT8_C( 81), INT8_C( 122), INT8_C( 6), INT8_C(-119), INT8_C( 122), INT8_C( -35), INT8_C( 106)), simde_mm512_set_epi8(INT8_C(-111), INT8_C( -79), INT8_C( 127), INT8_C( 80), INT8_C( 28), INT8_C( 4), INT8_C( 127), INT8_C( 28), INT8_C( -60), INT8_C( -55), INT8_C( -15), INT8_C( -81), INT8_C( -84), INT8_C( 76), INT8_C( 11), INT8_C( 111), INT8_C( -42), INT8_C( 47), INT8_C( -17), INT8_C(-116), INT8_C( -34), INT8_C( 4), INT8_C( -41), INT8_C( 126), INT8_C( 89), INT8_C( 65), INT8_C( 68), INT8_C( -59), INT8_C( 34), INT8_C(-100), INT8_C( 122), INT8_C( 58), INT8_C( 6), INT8_C( 127), INT8_C(-128), INT8_C( -42), INT8_C( -74), INT8_C( 127), INT8_C( -82), INT8_C( 127), INT8_C( 116), INT8_C( 127), INT8_C( -35), INT8_C( 11), INT8_C(-128), INT8_C( 113), INT8_C( 63), INT8_C( 16), INT8_C( 115), INT8_C( -26), INT8_C( 52), INT8_C( -45), INT8_C( 127), INT8_C( 25), INT8_C( 9), INT8_C(-128), INT8_C( 117), INT8_C(-128), INT8_C(-128), INT8_C( -58), INT8_C( 127), INT8_C(-128), INT8_C( 17), INT8_C( -50)) }, { simde_mm512_set_epi8(INT8_C( 105), INT8_C(-115), INT8_C( 121), INT8_C(-101), INT8_C( 0), INT8_C( 63), INT8_C( -42), INT8_C( -34), INT8_C( -5), INT8_C( -47), INT8_C(-123), INT8_C( -52), INT8_C( -86), INT8_C( -28), INT8_C( -63), INT8_C( 20), INT8_C( -60), INT8_C( -63), INT8_C( 99), INT8_C( 78), INT8_C( 56), INT8_C( -72), INT8_C( -55), INT8_C( -72), INT8_C( 79), INT8_C( -81), INT8_C( 124), INT8_C( -85), INT8_C( -65), INT8_C( 122), INT8_C( -25), INT8_C( -58), INT8_C( -64), INT8_C( 52), INT8_C( -12), INT8_C( 1), INT8_C( -62), INT8_C( -28), INT8_C( -28), INT8_C(-104), INT8_C( 54), INT8_C(-103), INT8_C( -55), INT8_C( -22), INT8_C( -91), INT8_C( 6), INT8_C( -9), INT8_C( -31), INT8_C( 18), INT8_C(-111), INT8_C( 58), INT8_C( 71), INT8_C( -73), INT8_C( -96), INT8_C( 28), INT8_C( -4), INT8_C( 47), INT8_C( 66), INT8_C( 121), INT8_C( 38), INT8_C( 69), INT8_C(-107), INT8_C( -57), INT8_C(-120)), simde_mm512_set_epi8(INT8_C( -49), INT8_C( 15), INT8_C( -15), INT8_C( -59), INT8_C(-113), INT8_C( 102), INT8_C( -48), INT8_C( -78), INT8_C( 31), INT8_C( 94), INT8_C( 79), INT8_C( 92), INT8_C( 106), INT8_C( -68), INT8_C( 96), INT8_C( -97), INT8_C( -27), INT8_C(-118), INT8_C( -11), INT8_C( 112), INT8_C(-125), INT8_C( 70), INT8_C( 26), INT8_C( -38), INT8_C( -16), INT8_C(-112), INT8_C( 10), INT8_C( 98), INT8_C( -4), INT8_C( 120), INT8_C( -33), INT8_C(-127), INT8_C( -65), INT8_C( -40), INT8_C( 88), INT8_C( -6), INT8_C( 74), INT8_C( 41), INT8_C( 39), INT8_C( 79), INT8_C(-125), INT8_C( -7), INT8_C( 62), INT8_C(-112), INT8_C(-119), INT8_C( -9), INT8_C( 71), INT8_C( -68), INT8_C( -79), INT8_C( 48), INT8_C( -20), INT8_C( -97), INT8_C(-116), INT8_C( 120), INT8_C( -65), INT8_C( 6), INT8_C( -32), INT8_C( -75), INT8_C(-106), INT8_C( 26), INT8_C( -96), INT8_C( 50), INT8_C( -45), INT8_C( 16)), simde_mm512_set_epi8(INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( -42), INT8_C( 113), INT8_C( -39), INT8_C( 6), INT8_C( 44), INT8_C( -36), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 40), INT8_C(-128), INT8_C( 117), INT8_C( -33), INT8_C( 55), INT8_C( 110), INT8_C( -34), INT8_C( 127), INT8_C(-128), INT8_C( -81), INT8_C( -34), INT8_C( 95), INT8_C( 31), INT8_C( 114), INT8_C(-128), INT8_C( -61), INT8_C( 2), INT8_C( 8), INT8_C( 69), INT8_C( 1), INT8_C( 92), INT8_C(-100), INT8_C( 7), INT8_C(-128), INT8_C( -69), INT8_C( -67), INT8_C(-128), INT8_C( 127), INT8_C( -96), INT8_C(-117), INT8_C( 90), INT8_C( 28), INT8_C( 15), INT8_C( -80), INT8_C( 37), INT8_C( 97), INT8_C(-128), INT8_C( 78), INT8_C( 127), INT8_C( 43), INT8_C(-128), INT8_C( 93), INT8_C( -10), INT8_C( 79), INT8_C( 127), INT8_C( 127), INT8_C( 12), INT8_C( 127), INT8_C(-128), INT8_C( -12), INT8_C(-128)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_subs_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_subs_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask64 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi8(INT8_C( 52), INT8_C(-124), INT8_C( -17), INT8_C( -9), INT8_C( 31), INT8_C( 67), INT8_C( -76), INT8_C( -4), INT8_C( -52), INT8_C( 99), INT8_C( 106), INT8_C( -35), INT8_C(-115), INT8_C(-127), INT8_C( 121), INT8_C( -60), INT8_C( -27), INT8_C( 73), INT8_C( -21), INT8_C( -51), INT8_C(-127), INT8_C( 68), INT8_C( 109), INT8_C( 35), INT8_C( 96), INT8_C( 3), INT8_C( -62), INT8_C( -43), INT8_C( -75), INT8_C( 102), INT8_C(-105), INT8_C( 63), INT8_C( 64), INT8_C( 114), INT8_C( 112), INT8_C( 10), INT8_C( 108), INT8_C( 71), INT8_C( -40), INT8_C( 20), INT8_C( -40), INT8_C( 26), INT8_C( -94), INT8_C( 107), INT8_C( -89), INT8_C( 22), INT8_C( 55), INT8_C( 105), INT8_C( 23), INT8_C( 52), INT8_C( 82), INT8_C( 50), INT8_C( -48), INT8_C( 77), INT8_C( 73), INT8_C( 98), INT8_C( -85), INT8_C( 123), INT8_C( 15), INT8_C( 36), INT8_C(-103), INT8_C( 18), INT8_C( -81), INT8_C( 9)), UINT64_C( 2070647823), simde_mm512_set_epi8(INT8_C( 87), INT8_C( 118), INT8_C( 10), INT8_C( -53), INT8_C( -21), INT8_C( -77), INT8_C( 81), INT8_C( 16), INT8_C( 125), INT8_C( 53), INT8_C( -93), INT8_C( -44), INT8_C( -14), INT8_C( -81), INT8_C( 93), INT8_C( -81), INT8_C( 69), INT8_C( 88), INT8_C( 17), INT8_C(-127), INT8_C( 49), INT8_C( 91), INT8_C( 55), INT8_C( -23), INT8_C( -84), INT8_C( 12), INT8_C( -97), INT8_C( -75), INT8_C( 99), INT8_C( 54), INT8_C( -92), INT8_C( -28), INT8_C( -2), INT8_C( 69), INT8_C(-102), INT8_C( 42), INT8_C( 41), INT8_C(-101), INT8_C( -59), INT8_C( -72), INT8_C( -53), INT8_C( -47), INT8_C( -78), INT8_C( 67), INT8_C( 54), INT8_C( 12), INT8_C( -65), INT8_C( 101), INT8_C( 77), INT8_C( -96), INT8_C( -15), INT8_C( 66), INT8_C( 50), INT8_C( 9), INT8_C( -75), INT8_C( 102), INT8_C( 2), INT8_C( -31), INT8_C( 83), INT8_C( -71), INT8_C( -84), INT8_C( -66), INT8_C(-111), INT8_C( -2)), simde_mm512_set_epi8(INT8_C( 87), INT8_C(-105), INT8_C( 96), INT8_C( -79), INT8_C( 99), INT8_C( 74), INT8_C( 20), INT8_C( 120), INT8_C( 60), INT8_C( 49), INT8_C( 101), INT8_C( -94), INT8_C( -5), INT8_C( 114), INT8_C( 17), INT8_C( 1), INT8_C( 36), INT8_C( -92), INT8_C( 39), INT8_C(-108), INT8_C( -15), INT8_C( 14), INT8_C( -15), INT8_C( 79), INT8_C( 112), INT8_C( 108), INT8_C( -44), INT8_C( -18), INT8_C( 125), INT8_C( 93), INT8_C( -92), INT8_C( -51), INT8_C( 21), INT8_C( -19), INT8_C( -56), INT8_C( 114), INT8_C( -95), INT8_C( 40), INT8_C( -27), INT8_C( -85), INT8_C( 26), INT8_C( 29), INT8_C( -30), INT8_C( 53), INT8_C( -69), INT8_C( -26), INT8_C( 79), INT8_C(-124), INT8_C( -35), INT8_C( -29), INT8_C( -59), INT8_C( -99), INT8_C(-118), INT8_C( 30), INT8_C( 105), INT8_C( 3), INT8_C( -2), INT8_C( 26), INT8_C(-121), INT8_C( 115), INT8_C( -78), INT8_C( 10), INT8_C( 6), INT8_C( -16)), simde_mm512_set_epi8(INT8_C( 52), INT8_C(-124), INT8_C( -17), INT8_C( -9), INT8_C( 31), INT8_C( 67), INT8_C( -76), INT8_C( -4), INT8_C( -52), INT8_C( 99), INT8_C( 106), INT8_C( -35), INT8_C(-115), INT8_C(-127), INT8_C( 121), INT8_C( -60), INT8_C( -27), INT8_C( 73), INT8_C( -21), INT8_C( -51), INT8_C(-127), INT8_C( 68), INT8_C( 109), INT8_C( 35), INT8_C( 96), INT8_C( 3), INT8_C( -62), INT8_C( -43), INT8_C( -75), INT8_C( 102), INT8_C(-105), INT8_C( 63), INT8_C( 64), INT8_C( 88), INT8_C( -46), INT8_C( -72), INT8_C( 127), INT8_C( 71), INT8_C( -32), INT8_C( 13), INT8_C( -40), INT8_C( -76), INT8_C( -48), INT8_C( 107), INT8_C( 123), INT8_C( 22), INT8_C(-128), INT8_C( 127), INT8_C( 112), INT8_C( 52), INT8_C( 82), INT8_C( 127), INT8_C( -48), INT8_C( -21), INT8_C( 73), INT8_C( 98), INT8_C( -85), INT8_C( 123), INT8_C( 15), INT8_C( 36), INT8_C( -6), INT8_C( -76), INT8_C(-117), INT8_C( 14)) }, { simde_mm512_set_epi8(INT8_C( -60), INT8_C( -92), INT8_C( -73), INT8_C( -20), INT8_C( -84), INT8_C( -11), INT8_C( -28), INT8_C(-123), INT8_C( -48), INT8_C(-118), INT8_C(-107), INT8_C( 73), INT8_C( -80), INT8_C( 46), INT8_C(-117), INT8_C( -85), INT8_C( -21), INT8_C( -54), INT8_C( -49), INT8_C( -8), INT8_C( 51), INT8_C( 126), INT8_C( 22), INT8_C( 70), INT8_C( -86), INT8_C( -98), INT8_C( -17), INT8_C( -29), INT8_C( 75), INT8_C( 115), INT8_C( 61), INT8_C( 76), INT8_C( -65), INT8_C( 43), INT8_C( -13), INT8_C( -2), INT8_C( 81), INT8_C( -26), INT8_C( -6), INT8_C( -15), INT8_C( -40), INT8_C( 72), INT8_C( 66), INT8_C( 32), INT8_C( 22), INT8_C( -74), INT8_C( -73), INT8_C( 67), INT8_C( 75), INT8_C( -18), INT8_C( -97), INT8_C( 75), INT8_C( -47), INT8_C( 32), INT8_C( 26), INT8_C(-108), INT8_C( 31), INT8_C( 110), INT8_C(-101), INT8_C( -57), INT8_C( 28), INT8_C( -50), INT8_C( -16), INT8_C( 37)), UINT64_C( 3026541283), simde_mm512_set_epi8(INT8_C( -66), INT8_C( -12), INT8_C( -24), INT8_C(-107), INT8_C(-117), INT8_C( 63), INT8_C( 42), INT8_C( 79), INT8_C( -78), INT8_C( -79), INT8_C( -22), INT8_C( -52), INT8_C( -78), INT8_C( -8), INT8_C( -61), INT8_C( 101), INT8_C(-111), INT8_C( 35), INT8_C( -89), INT8_C( 127), INT8_C( 50), INT8_C(-115), INT8_C( 91), INT8_C( -72), INT8_C( -97), INT8_C( -34), INT8_C( -80), INT8_C( -31), INT8_C( -60), INT8_C( 119), INT8_C(-106), INT8_C( 66), INT8_C( 75), INT8_C(-108), INT8_C(-113), INT8_C( 120), INT8_C( 36), INT8_C( 112), INT8_C( -45), INT8_C( 41), INT8_C( 90), INT8_C( -32), INT8_C( 22), INT8_C( 86), INT8_C( 72), INT8_C( -20), INT8_C( -4), INT8_C(-125), INT8_C( -37), INT8_C( 84), INT8_C( -95), INT8_C( -66), INT8_C( 109), INT8_C( 18), INT8_C( 120), INT8_C(-115), INT8_C( 83), INT8_C( 26), INT8_C(-108), INT8_C(-128), INT8_C( 113), INT8_C( -65), INT8_C( 84), INT8_C( -78)), simde_mm512_set_epi8(INT8_C(-100), INT8_C(-107), INT8_C( 106), INT8_C( -87), INT8_C( 61), INT8_C( -94), INT8_C( 7), INT8_C( 84), INT8_C( 25), INT8_C( 77), INT8_C( 24), INT8_C( 35), INT8_C( 56), INT8_C( -72), INT8_C( 65), INT8_C( -77), INT8_C( 81), INT8_C( 110), INT8_C( 109), INT8_C( 26), INT8_C( -46), INT8_C( 6), INT8_C( -4), INT8_C( -76), INT8_C( 49), INT8_C( 67), INT8_C( -22), INT8_C( 12), INT8_C( 66), INT8_C( -76), INT8_C( -71), INT8_C( 20), INT8_C( -11), INT8_C( 97), INT8_C( 20), INT8_C( -1), INT8_C( 69), INT8_C( 54), INT8_C( -68), INT8_C( 104), INT8_C( 61), INT8_C( -16), INT8_C( -52), INT8_C( -98), INT8_C(-126), INT8_C(-109), INT8_C( 86), INT8_C( 51), INT8_C( 95), INT8_C( -88), INT8_C( 6), INT8_C(-121), INT8_C( 49), INT8_C( 97), INT8_C( -69), INT8_C( 125), INT8_C( 57), INT8_C( 79), INT8_C( 6), INT8_C( 7), INT8_C( 37), INT8_C(-119), INT8_C( -51), INT8_C( 99)), simde_mm512_set_epi8(INT8_C( -60), INT8_C( -92), INT8_C( -73), INT8_C( -20), INT8_C( -84), INT8_C( -11), INT8_C( -28), INT8_C(-123), INT8_C( -48), INT8_C(-118), INT8_C(-107), INT8_C( 73), INT8_C( -80), INT8_C( 46), INT8_C(-117), INT8_C( -85), INT8_C( -21), INT8_C( -54), INT8_C( -49), INT8_C( -8), INT8_C( 51), INT8_C( 126), INT8_C( 22), INT8_C( 70), INT8_C( -86), INT8_C( -98), INT8_C( -17), INT8_C( -29), INT8_C( 75), INT8_C( 115), INT8_C( 61), INT8_C( 76), INT8_C( 86), INT8_C( 43), INT8_C(-128), INT8_C( 121), INT8_C( 81), INT8_C( 58), INT8_C( -6), INT8_C( -15), INT8_C( -40), INT8_C( -16), INT8_C( 74), INT8_C( 32), INT8_C( 22), INT8_C( 89), INT8_C( -73), INT8_C(-128), INT8_C( 75), INT8_C( 127), INT8_C( -97), INT8_C( 55), INT8_C( 60), INT8_C( 32), INT8_C( 127), INT8_C(-108), INT8_C( 26), INT8_C( -53), INT8_C(-114), INT8_C( -57), INT8_C( 28), INT8_C( -50), INT8_C( 127), INT8_C(-128)) }, { simde_mm512_set_epi8(INT8_C( 66), INT8_C( 28), INT8_C( -53), INT8_C( 48), INT8_C( -75), INT8_C( -97), INT8_C( -42), INT8_C( 47), INT8_C( -33), INT8_C( -17), INT8_C( 113), INT8_C( 120), INT8_C(-111), INT8_C( 14), INT8_C( 77), INT8_C(-108), INT8_C( 25), INT8_C( -42), INT8_C( 109), INT8_C( -97), INT8_C( -19), INT8_C( -18), INT8_C( -1), INT8_C( -50), INT8_C( -87), INT8_C( -70), INT8_C( 94), INT8_C( -91), INT8_C( 23), INT8_C( 33), INT8_C( -10), INT8_C( 81), INT8_C( 81), INT8_C( -63), INT8_C( 35), INT8_C( 44), INT8_C( 11), INT8_C(-124), INT8_C( -20), INT8_C( 71), INT8_C( -25), INT8_C(-112), INT8_C( 2), INT8_C( 59), INT8_C( 21), INT8_C(-113), INT8_C( 35), INT8_C( -40), INT8_C( -89), INT8_C( -74), INT8_C( -34), INT8_C( 115), INT8_C( -51), INT8_C( 30), INT8_C(-104), INT8_C( 117), INT8_C( 127), INT8_C( -8), INT8_C( 57), INT8_C( -73), INT8_C( 40), INT8_C( -7), INT8_C( 90), INT8_C(-110)), UINT64_C( 3345097188), simde_mm512_set_epi8(INT8_C( 58), INT8_C(-128), INT8_C(-123), INT8_C( -3), INT8_C( 74), INT8_C( -78), INT8_C(-124), INT8_C( -30), INT8_C( 21), INT8_C( 86), INT8_C( -53), INT8_C(-116), INT8_C(-126), INT8_C( 10), INT8_C(-123), INT8_C( -78), INT8_C( 2), INT8_C( 100), INT8_C( -90), INT8_C( -79), INT8_C( -46), INT8_C( -83), INT8_C( 11), INT8_C(-120), INT8_C( -56), INT8_C( -4), INT8_C( -39), INT8_C(-114), INT8_C( -98), INT8_C(-108), INT8_C( -22), INT8_C( 24), INT8_C( 127), INT8_C( -20), INT8_C( 7), INT8_C( -16), INT8_C( -8), INT8_C( 19), INT8_C( 119), INT8_C( -44), INT8_C( 7), INT8_C( -47), INT8_C(-126), INT8_C(-119), INT8_C( 1), INT8_C( 111), INT8_C( -95), INT8_C( -67), INT8_C( 38), INT8_C( -19), INT8_C( 96), INT8_C( -21), INT8_C( -76), INT8_C(-123), INT8_C(-115), INT8_C( -61), INT8_C( 96), INT8_C( 30), INT8_C( 105), INT8_C( -4), INT8_C( 44), INT8_C( 54), INT8_C( 89), INT8_C( 40)), simde_mm512_set_epi8(INT8_C( -42), INT8_C( 43), INT8_C( 6), INT8_C( -97), INT8_C( -90), INT8_C( -4), INT8_C( -71), INT8_C( 33), INT8_C( 1), INT8_C( 3), INT8_C( -91), INT8_C( -62), INT8_C(-106), INT8_C( -6), INT8_C( 35), INT8_C( 67), INT8_C( 97), INT8_C( 46), INT8_C( 89), INT8_C( 56), INT8_C( -84), INT8_C( -39), INT8_C( -70), INT8_C( 78), INT8_C( -49), INT8_C( 94), INT8_C( -1), INT8_C( -23), INT8_C( -88), INT8_C( -11), INT8_C(-128), INT8_C( -62), INT8_C( 106), INT8_C( 125), INT8_C( -71), INT8_C( -96), INT8_C( -28), INT8_C( -58), INT8_C( -40), INT8_C( -32), INT8_C( -13), INT8_C( -84), INT8_C( -69), INT8_C( -40), INT8_C(-122), INT8_C( 117), INT8_C( 115), INT8_C( -87), INT8_C( 101), INT8_C( -82), INT8_C( -68), INT8_C( -48), INT8_C( 42), INT8_C( 29), INT8_C( 67), INT8_C( 9), INT8_C( -44), INT8_C( 54), INT8_C(-101), INT8_C( -13), INT8_C( 111), INT8_C( 110), INT8_C( 114), INT8_C( -57)), simde_mm512_set_epi8(INT8_C( 66), INT8_C( 28), INT8_C( -53), INT8_C( 48), INT8_C( -75), INT8_C( -97), INT8_C( -42), INT8_C( 47), INT8_C( -33), INT8_C( -17), INT8_C( 113), INT8_C( 120), INT8_C(-111), INT8_C( 14), INT8_C( 77), INT8_C(-108), INT8_C( 25), INT8_C( -42), INT8_C( 109), INT8_C( -97), INT8_C( -19), INT8_C( -18), INT8_C( -1), INT8_C( -50), INT8_C( -87), INT8_C( -70), INT8_C( 94), INT8_C( -91), INT8_C( 23), INT8_C( 33), INT8_C( -10), INT8_C( 81), INT8_C( 21), INT8_C(-128), INT8_C( 35), INT8_C( 44), INT8_C( 11), INT8_C( 77), INT8_C( 127), INT8_C( -12), INT8_C( -25), INT8_C( 37), INT8_C( -57), INT8_C( 59), INT8_C( 21), INT8_C(-113), INT8_C(-128), INT8_C( -40), INT8_C( -89), INT8_C( -74), INT8_C( 127), INT8_C( 115), INT8_C( -51), INT8_C( 30), INT8_C(-104), INT8_C( -70), INT8_C( 127), INT8_C( -24), INT8_C( 127), INT8_C( -73), INT8_C( 40), INT8_C( -56), INT8_C( 90), INT8_C(-110)) }, { simde_mm512_set_epi8(INT8_C( 127), INT8_C( 66), INT8_C( 63), INT8_C( -46), INT8_C( 90), INT8_C( 39), INT8_C( -89), INT8_C(-105), INT8_C( -51), INT8_C( -46), INT8_C( 111), INT8_C( -45), INT8_C( -76), INT8_C(-112), INT8_C( -37), INT8_C(-120), INT8_C( 50), INT8_C( -97), INT8_C( -2), INT8_C( 121), INT8_C( 68), INT8_C(-106), INT8_C( 101), INT8_C( 115), INT8_C( -29), INT8_C( 59), INT8_C( 77), INT8_C( -36), INT8_C( 111), INT8_C( 95), INT8_C( 48), INT8_C( 113), INT8_C( -97), INT8_C( 105), INT8_C( -37), INT8_C( -42), INT8_C( -7), INT8_C( 31), INT8_C( 45), INT8_C( -5), INT8_C( -29), INT8_C( 44), INT8_C( 78), INT8_C( -25), INT8_C( -47), INT8_C(-119), INT8_C( -90), INT8_C( 115), INT8_C( 44), INT8_C( 94), INT8_C( 127), INT8_C( -12), INT8_C( 90), INT8_C( 111), INT8_C( -20), INT8_C(-110), INT8_C( -82), INT8_C( -19), INT8_C( -79), INT8_C(-128), INT8_C( -6), INT8_C(-119), INT8_C( 100), INT8_C( 125)), UINT64_C( 1390729414), simde_mm512_set_epi8(INT8_C( 89), INT8_C( 39), INT8_C( 109), INT8_C( 109), INT8_C( -71), INT8_C( -81), INT8_C( 67), INT8_C( 48), INT8_C( -43), INT8_C( 31), INT8_C( -36), INT8_C( -53), INT8_C( -7), INT8_C( 84), INT8_C( -92), INT8_C( 124), INT8_C( -22), INT8_C( 31), INT8_C( 42), INT8_C(-103), INT8_C( 29), INT8_C( 75), INT8_C( -91), INT8_C( 75), INT8_C( 107), INT8_C( 62), INT8_C( 126), INT8_C( 7), INT8_C( -89), INT8_C( 119), INT8_C( 79), INT8_C( -38), INT8_C( 18), INT8_C( 57), INT8_C(-102), INT8_C( 66), INT8_C( -7), INT8_C( 89), INT8_C( 57), INT8_C( -55), INT8_C( -68), INT8_C( 21), INT8_C( -21), INT8_C( -6), INT8_C( -26), INT8_C( 122), INT8_C( 114), INT8_C( 83), INT8_C( 80), INT8_C( 103), INT8_C( 109), INT8_C( -19), INT8_C( -24), INT8_C( -48), INT8_C( 79), INT8_C( 5), INT8_C( 66), INT8_C( -40), INT8_C( -14), INT8_C( -56), INT8_C( -68), INT8_C( -68), INT8_C(-124), INT8_C( -65)), simde_mm512_set_epi8(INT8_C(-102), INT8_C(-123), INT8_C( 20), INT8_C( 36), INT8_C( 70), INT8_C( -95), INT8_C( 63), INT8_C( 110), INT8_C( 98), INT8_C(-128), INT8_C( -74), INT8_C( -50), INT8_C( 17), INT8_C( -37), INT8_C(-104), INT8_C( -74), INT8_C( -4), INT8_C( 15), INT8_C( -59), INT8_C( 62), INT8_C( -89), INT8_C( 103), INT8_C( -5), INT8_C( 71), INT8_C( 103), INT8_C(-119), INT8_C( -62), INT8_C( -47), INT8_C( -78), INT8_C( 32), INT8_C( 104), INT8_C( 33), INT8_C( -45), INT8_C( -54), INT8_C( 17), INT8_C(-117), INT8_C( 96), INT8_C( 66), INT8_C( -18), INT8_C( -58), INT8_C(-111), INT8_C(-110), INT8_C( -14), INT8_C( 7), INT8_C( 85), INT8_C(-113), INT8_C( 19), INT8_C( -72), INT8_C( 51), INT8_C( -49), INT8_C( 69), INT8_C( -80), INT8_C( 34), INT8_C( 87), INT8_C( 88), INT8_C( -38), INT8_C( -14), INT8_C( -30), INT8_C(-111), INT8_C( -87), INT8_C( 37), INT8_C( 40), INT8_C( -30), INT8_C( -40)), simde_mm512_set_epi8(INT8_C( 127), INT8_C( 66), INT8_C( 63), INT8_C( -46), INT8_C( 90), INT8_C( 39), INT8_C( -89), INT8_C(-105), INT8_C( -51), INT8_C( -46), INT8_C( 111), INT8_C( -45), INT8_C( -76), INT8_C(-112), INT8_C( -37), INT8_C(-120), INT8_C( 50), INT8_C( -97), INT8_C( -2), INT8_C( 121), INT8_C( 68), INT8_C(-106), INT8_C( 101), INT8_C( 115), INT8_C( -29), INT8_C( 59), INT8_C( 77), INT8_C( -36), INT8_C( 111), INT8_C( 95), INT8_C( 48), INT8_C( 113), INT8_C( -97), INT8_C( 111), INT8_C( -37), INT8_C( 127), INT8_C( -7), INT8_C( 31), INT8_C( 75), INT8_C( -5), INT8_C( 43), INT8_C( 127), INT8_C( -7), INT8_C( -25), INT8_C( -47), INT8_C( 127), INT8_C( -90), INT8_C( 115), INT8_C( 29), INT8_C( 127), INT8_C( 127), INT8_C( 61), INT8_C( -58), INT8_C( 111), INT8_C( -20), INT8_C(-110), INT8_C( 80), INT8_C( -10), INT8_C( -79), INT8_C(-128), INT8_C( -6), INT8_C(-108), INT8_C( -94), INT8_C( 125)) }, { simde_mm512_set_epi8(INT8_C( 91), INT8_C( 46), INT8_C( -16), INT8_C( 27), INT8_C( -66), INT8_C( -93), INT8_C( -43), INT8_C( 14), INT8_C( 100), INT8_C(-113), INT8_C( 32), INT8_C( 15), INT8_C( -79), INT8_C( 63), INT8_C( -18), INT8_C( -96), INT8_C(-119), INT8_C( 114), INT8_C( -87), INT8_C( -82), INT8_C( 76), INT8_C( 117), INT8_C( 14), INT8_C( -65), INT8_C( -37), INT8_C( -15), INT8_C( 43), INT8_C( 5), INT8_C(-108), INT8_C( -5), INT8_C( -84), INT8_C( 13), INT8_C( -66), INT8_C( 80), INT8_C( -20), INT8_C( 34), INT8_C(-122), INT8_C(-101), INT8_C( -91), INT8_C( -35), INT8_C( 57), INT8_C( 117), INT8_C( -26), INT8_C( 112), INT8_C( 59), INT8_C( 54), INT8_C( 91), INT8_C(-111), INT8_C( 111), INT8_C( 34), INT8_C( -76), INT8_C( 65), INT8_C( -31), INT8_C( 25), INT8_C( -63), INT8_C(-128), INT8_C( -28), INT8_C( 80), INT8_C( -75), INT8_C( 83), INT8_C( -23), INT8_C( -65), INT8_C( -75), INT8_C( -10)), UINT64_C( 2530018527), simde_mm512_set_epi8(INT8_C( 54), INT8_C( 110), INT8_C( -53), INT8_C( 100), INT8_C( 94), INT8_C( -2), INT8_C( 72), INT8_C( 95), INT8_C( -61), INT8_C( -31), INT8_C( 2), INT8_C( 88), INT8_C( 69), INT8_C(-117), INT8_C( 33), INT8_C( -57), INT8_C( -67), INT8_C( 92), INT8_C( -31), INT8_C( -16), INT8_C(-115), INT8_C( -91), INT8_C( 0), INT8_C( -73), INT8_C( -3), INT8_C( -71), INT8_C( 55), INT8_C( -82), INT8_C( -28), INT8_C( -34), INT8_C( -40), INT8_C( 60), INT8_C( 31), INT8_C(-113), INT8_C( 90), INT8_C( 114), INT8_C( 73), INT8_C( 77), INT8_C( -29), INT8_C( 80), INT8_C(-106), INT8_C( 121), INT8_C(-122), INT8_C( -4), INT8_C( 104), INT8_C( -76), INT8_C( 85), INT8_C( -33), INT8_C( -62), INT8_C( 96), INT8_C( -27), INT8_C( -88), INT8_C( -35), INT8_C( -58), INT8_C( -83), INT8_C( 36), INT8_C( -16), INT8_C( 50), INT8_C(-127), INT8_C( -82), INT8_C( 106), INT8_C( 56), INT8_C( 74), INT8_C( -59)), simde_mm512_set_epi8(INT8_C( -24), INT8_C( -30), INT8_C( 27), INT8_C( -44), INT8_C( -74), INT8_C( -85), INT8_C( -66), INT8_C( 26), INT8_C( -69), INT8_C( 112), INT8_C( -22), INT8_C( 62), INT8_C( 24), INT8_C(-107), INT8_C( -41), INT8_C( -21), INT8_C( 68), INT8_C( -34), INT8_C( 109), INT8_C( 58), INT8_C( 16), INT8_C( -72), INT8_C( -84), INT8_C( 18), INT8_C( -73), INT8_C( -54), INT8_C( 16), INT8_C( -93), INT8_C( -51), INT8_C( -50), INT8_C( 54), INT8_C( -69), INT8_C( -98), INT8_C(-113), INT8_C( -73), INT8_C(-124), INT8_C( -65), INT8_C( -51), INT8_C( 43), INT8_C( 10), INT8_C( -79), INT8_C( 35), INT8_C( -67), INT8_C( 69), INT8_C( 84), INT8_C( -44), INT8_C( -48), INT8_C( 89), INT8_C( 96), INT8_C(-100), INT8_C( -47), INT8_C( 114), INT8_C(-109), INT8_C( 8), INT8_C( 51), INT8_C( 120), INT8_C( -12), INT8_C(-128), INT8_C( -4), INT8_C(-105), INT8_C( 83), INT8_C(-123), INT8_C( -98), INT8_C( -2)), simde_mm512_set_epi8(INT8_C( 91), INT8_C( 46), INT8_C( -16), INT8_C( 27), INT8_C( -66), INT8_C( -93), INT8_C( -43), INT8_C( 14), INT8_C( 100), INT8_C(-113), INT8_C( 32), INT8_C( 15), INT8_C( -79), INT8_C( 63), INT8_C( -18), INT8_C( -96), INT8_C(-119), INT8_C( 114), INT8_C( -87), INT8_C( -82), INT8_C( 76), INT8_C( 117), INT8_C( 14), INT8_C( -65), INT8_C( -37), INT8_C( -15), INT8_C( 43), INT8_C( 5), INT8_C(-108), INT8_C( -5), INT8_C( -84), INT8_C( 13), INT8_C( 127), INT8_C( 80), INT8_C( -20), INT8_C( 127), INT8_C(-122), INT8_C( 127), INT8_C( -72), INT8_C( -35), INT8_C( -27), INT8_C( 86), INT8_C( -26), INT8_C( 112), INT8_C( 20), INT8_C( -32), INT8_C( 91), INT8_C(-122), INT8_C( 111), INT8_C( 34), INT8_C( -76), INT8_C( 65), INT8_C( -31), INT8_C( -66), INT8_C( -63), INT8_C(-128), INT8_C( -4), INT8_C( 127), INT8_C( -75), INT8_C( 23), INT8_C( 23), INT8_C( 127), INT8_C( 127), INT8_C( -57)) }, { simde_mm512_set_epi8(INT8_C( 125), INT8_C( -29), INT8_C( 116), INT8_C( -26), INT8_C( 78), INT8_C( 37), INT8_C( -43), INT8_C( 45), INT8_C( -81), INT8_C(-109), INT8_C( 26), INT8_C( -51), INT8_C( -54), INT8_C( 99), INT8_C( 124), INT8_C( 2), INT8_C( 10), INT8_C( 6), INT8_C( -77), INT8_C( -61), INT8_C( 59), INT8_C( 60), INT8_C( 42), INT8_C( 33), INT8_C( 126), INT8_C( 88), INT8_C( 41), INT8_C( 66), INT8_C( 98), INT8_C( 111), INT8_C( 94), INT8_C( -70), INT8_C( 116), INT8_C( -40), INT8_C( -88), INT8_C( 13), INT8_C( 50), INT8_C( 54), INT8_C( 32), INT8_C( -48), INT8_C( 85), INT8_C( 12), INT8_C( -17), INT8_C( 78), INT8_C( 2), INT8_C( -46), INT8_C( 21), INT8_C( 89), INT8_C( -58), INT8_C( -19), INT8_C( -7), INT8_C( -46), INT8_C( 58), INT8_C( 120), INT8_C( -85), INT8_C( 46), INT8_C( 80), INT8_C( -20), INT8_C( 94), INT8_C( 32), INT8_C( -61), INT8_C( 80), INT8_C( -39), INT8_C( 109)), UINT64_C( 1874542844), simde_mm512_set_epi8(INT8_C(-100), INT8_C( 8), INT8_C( -30), INT8_C( -66), INT8_C( -36), INT8_C( -55), INT8_C(-115), INT8_C( 64), INT8_C( 70), INT8_C( -8), INT8_C(-128), INT8_C( 84), INT8_C( 103), INT8_C( -54), INT8_C( 15), INT8_C( -45), INT8_C(-118), INT8_C( -27), INT8_C( 32), INT8_C( -75), INT8_C( 62), INT8_C( 67), INT8_C( -42), INT8_C( 4), INT8_C( 16), INT8_C( 45), INT8_C( 43), INT8_C( 21), INT8_C( 14), INT8_C( -97), INT8_C( -37), INT8_C( -6), INT8_C(-100), INT8_C( -82), INT8_C( 58), INT8_C( 88), INT8_C( 61), INT8_C( 74), INT8_C( 5), INT8_C( 44), INT8_C( 61), INT8_C( -83), INT8_C( 68), INT8_C( -3), INT8_C( 41), INT8_C( 117), INT8_C( 116), INT8_C( -16), INT8_C( 48), INT8_C( -35), INT8_C( -2), INT8_C( -9), INT8_C( -72), INT8_C( 97), INT8_C(-100), INT8_C( 120), INT8_C( -96), INT8_C( 21), INT8_C( 35), INT8_C( 38), INT8_C( -30), INT8_C( 81), INT8_C(-111), INT8_C( 42)), simde_mm512_set_epi8(INT8_C( -63), INT8_C( 101), INT8_C( -21), INT8_C( -29), INT8_C( -58), INT8_C( 75), INT8_C( -46), INT8_C( -8), INT8_C( -92), INT8_C( 68), INT8_C( 64), INT8_C( 71), INT8_C( 50), INT8_C( -60), INT8_C( 45), INT8_C( 13), INT8_C( 106), INT8_C( 39), INT8_C( 64), INT8_C( 94), INT8_C( -73), INT8_C( 90), INT8_C( -93), INT8_C(-109), INT8_C(-100), INT8_C( 76), INT8_C( 29), INT8_C( -99), INT8_C( 84), INT8_C( 37), INT8_C( 69), INT8_C( 125), INT8_C( -66), INT8_C( 106), INT8_C( 75), INT8_C(-109), INT8_C( 19), INT8_C( 119), INT8_C( 108), INT8_C( 52), INT8_C( -69), INT8_C( 84), INT8_C( 6), INT8_C( 75), INT8_C( 9), INT8_C(-103), INT8_C( -24), INT8_C( 9), INT8_C( -42), INT8_C( 127), INT8_C( 47), INT8_C( -53), INT8_C( 93), INT8_C( 18), INT8_C( -4), INT8_C( -42), INT8_C( -20), INT8_C( 12), INT8_C( 100), INT8_C( -66), INT8_C( -56), INT8_C( -47), INT8_C( -16), INT8_C( 104)), simde_mm512_set_epi8(INT8_C( 125), INT8_C( -29), INT8_C( 116), INT8_C( -26), INT8_C( 78), INT8_C( 37), INT8_C( -43), INT8_C( 45), INT8_C( -81), INT8_C(-109), INT8_C( 26), INT8_C( -51), INT8_C( -54), INT8_C( 99), INT8_C( 124), INT8_C( 2), INT8_C( 10), INT8_C( 6), INT8_C( -77), INT8_C( -61), INT8_C( 59), INT8_C( 60), INT8_C( 42), INT8_C( 33), INT8_C( 126), INT8_C( 88), INT8_C( 41), INT8_C( 66), INT8_C( 98), INT8_C( 111), INT8_C( 94), INT8_C( -70), INT8_C( 116), INT8_C(-128), INT8_C( -17), INT8_C( 13), INT8_C( 42), INT8_C( -45), INT8_C(-103), INT8_C( -8), INT8_C( 127), INT8_C( 12), INT8_C( 62), INT8_C( -78), INT8_C( 32), INT8_C( -46), INT8_C( 127), INT8_C( -25), INT8_C( -58), INT8_C(-128), INT8_C( -7), INT8_C( -46), INT8_C( 58), INT8_C( 120), INT8_C( -85), INT8_C( 46), INT8_C( -76), INT8_C( 9), INT8_C( -65), INT8_C( 104), INT8_C( 26), INT8_C( 127), INT8_C( -39), INT8_C( 109)) }, { simde_mm512_set_epi8(INT8_C(-105), INT8_C( -37), INT8_C( 57), INT8_C( -65), INT8_C( -18), INT8_C( -12), INT8_C( -19), INT8_C( 108), INT8_C(-104), INT8_C( -75), INT8_C( 90), INT8_C( 70), INT8_C( -27), INT8_C( 72), INT8_C( 24), INT8_C( -76), INT8_C( -96), INT8_C(-123), INT8_C( 29), INT8_C(-106), INT8_C( 55), INT8_C( 118), INT8_C( 81), INT8_C( 39), INT8_C( -71), INT8_C( -44), INT8_C( 102), INT8_C( -78), INT8_C( -31), INT8_C( -59), INT8_C( 63), INT8_C( 2), INT8_C(-102), INT8_C( 50), INT8_C( 51), INT8_C(-105), INT8_C( -79), INT8_C( 83), INT8_C( 28), INT8_C( 101), INT8_C( 60), INT8_C( -89), INT8_C(-127), INT8_C( 40), INT8_C( 37), INT8_C( -30), INT8_C( -95), INT8_C( -95), INT8_C( -12), INT8_C( -31), INT8_C( -9), INT8_C( 29), INT8_C( 10), INT8_C( 43), INT8_C( -6), INT8_C( -78), INT8_C( -94), INT8_C( 107), INT8_C( 111), INT8_C( 4), INT8_C( -9), INT8_C( 42), INT8_C( -92), INT8_C( 29)), UINT64_C( 3975530942), simde_mm512_set_epi8(INT8_C( -43), INT8_C( 17), INT8_C(-121), INT8_C( 17), INT8_C( -38), INT8_C( 95), INT8_C( 94), INT8_C( -34), INT8_C( 26), INT8_C(-107), INT8_C( 37), INT8_C(-105), INT8_C( 52), INT8_C( 37), INT8_C( 92), INT8_C( -72), INT8_C( -33), INT8_C( -75), INT8_C( -94), INT8_C( 1), INT8_C( -71), INT8_C( 17), INT8_C(-112), INT8_C( -35), INT8_C( 16), INT8_C( 122), INT8_C( -73), INT8_C( 18), INT8_C( 88), INT8_C( 56), INT8_C( -10), INT8_C( -55), INT8_C( 123), INT8_C(-125), INT8_C( -53), INT8_C( -1), INT8_C( 2), INT8_C( 36), INT8_C( 121), INT8_C( -35), INT8_C( 48), INT8_C( 77), INT8_C( 102), INT8_C( -56), INT8_C( 57), INT8_C( -53), INT8_C( -69), INT8_C( -26), INT8_C( -26), INT8_C( 90), INT8_C( 94), INT8_C( 84), INT8_C( -13), INT8_C( 64), INT8_C( 115), INT8_C(-119), INT8_C( 126), INT8_C( 83), INT8_C( 55), INT8_C( -37), INT8_C( -59), INT8_C( 31), INT8_C( 24), INT8_C( 17)), simde_mm512_set_epi8(INT8_C( 74), INT8_C( 90), INT8_C( 31), INT8_C( 63), INT8_C( -50), INT8_C( 3), INT8_C( 34), INT8_C( -98), INT8_C( -18), INT8_C(-126), INT8_C( 83), INT8_C( 80), INT8_C( 21), INT8_C( 114), INT8_C( -45), INT8_C( -76), INT8_C( -10), INT8_C( -63), INT8_C( -36), INT8_C(-104), INT8_C( -48), INT8_C( -29), INT8_C( -85), INT8_C( 3), INT8_C(-112), INT8_C( -72), INT8_C( 68), INT8_C( 5), INT8_C( -84), INT8_C( -48), INT8_C(-110), INT8_C(-128), INT8_C( 89), INT8_C( -17), INT8_C( -2), INT8_C( -14), INT8_C( 61), INT8_C( 40), INT8_C( -41), INT8_C( 78), INT8_C( -65), INT8_C( 82), INT8_C( -97), INT8_C(-107), INT8_C( 36), INT8_C( 59), INT8_C( -78), INT8_C( 61), INT8_C( 9), INT8_C( 86), INT8_C( 122), INT8_C( 2), INT8_C( 27), INT8_C( 124), INT8_C( -38), INT8_C( 23), INT8_C( -78), INT8_C( 48), INT8_C( 17), INT8_C( 54), INT8_C( 116), INT8_C(-115), INT8_C( 105), INT8_C( -48)), simde_mm512_set_epi8(INT8_C(-105), INT8_C( -37), INT8_C( 57), INT8_C( -65), INT8_C( -18), INT8_C( -12), INT8_C( -19), INT8_C( 108), INT8_C(-104), INT8_C( -75), INT8_C( 90), INT8_C( 70), INT8_C( -27), INT8_C( 72), INT8_C( 24), INT8_C( -76), INT8_C( -96), INT8_C(-123), INT8_C( 29), INT8_C(-106), INT8_C( 55), INT8_C( 118), INT8_C( 81), INT8_C( 39), INT8_C( -71), INT8_C( -44), INT8_C( 102), INT8_C( -78), INT8_C( -31), INT8_C( -59), INT8_C( 63), INT8_C( 2), INT8_C( 34), INT8_C(-108), INT8_C( -51), INT8_C(-105), INT8_C( -59), INT8_C( -4), INT8_C( 28), INT8_C( 101), INT8_C( 113), INT8_C( -5), INT8_C( 127), INT8_C( 51), INT8_C( 37), INT8_C(-112), INT8_C( -95), INT8_C( -87), INT8_C( -35), INT8_C( 4), INT8_C( -9), INT8_C( 29), INT8_C( -40), INT8_C( 43), INT8_C( -6), INT8_C(-128), INT8_C( 127), INT8_C( 107), INT8_C( 38), INT8_C( -91), INT8_C(-128), INT8_C( 127), INT8_C( -81), INT8_C( 29)) }, { simde_mm512_set_epi8(INT8_C( -61), INT8_C( -50), INT8_C( -30), INT8_C( 100), INT8_C( -32), INT8_C(-100), INT8_C( 0), INT8_C( -53), INT8_C( -74), INT8_C( 41), INT8_C( 37), INT8_C( 44), INT8_C( 4), INT8_C( -98), INT8_C( 73), INT8_C( -60), INT8_C( 85), INT8_C( 11), INT8_C( 90), INT8_C( 41), INT8_C( 94), INT8_C( 105), INT8_C( -3), INT8_C( -61), INT8_C( 118), INT8_C( -95), INT8_C(-121), INT8_C( -38), INT8_C(-104), INT8_C( 36), INT8_C( 86), INT8_C( 49), INT8_C( 118), INT8_C( -55), INT8_C( -12), INT8_C( -47), INT8_C( 98), INT8_C( 69), INT8_C(-107), INT8_C( 120), INT8_C( 49), INT8_C( 116), INT8_C( -55), INT8_C( -29), INT8_C( 93), INT8_C(-120), INT8_C( 116), INT8_C( -2), INT8_C(-100), INT8_C(-116), INT8_C( -60), INT8_C( -65), INT8_C( 55), INT8_C( 64), INT8_C( -42), INT8_C( -46), INT8_C(-101), INT8_C( -6), INT8_C( -22), INT8_C( 36), INT8_C( 111), INT8_C( -33), INT8_C( 84), INT8_C( 25)), UINT64_C( 2085281943), simde_mm512_set_epi8(INT8_C( 125), INT8_C( 0), INT8_C( 14), INT8_C( 82), INT8_C( 68), INT8_C( -35), INT8_C( -43), INT8_C( -7), INT8_C( 68), INT8_C( 127), INT8_C( -94), INT8_C( -60), INT8_C( -16), INT8_C( -81), INT8_C(-125), INT8_C( 75), INT8_C( -5), INT8_C( 103), INT8_C( 97), INT8_C( -56), INT8_C( 5), INT8_C( -6), INT8_C( 40), INT8_C( 58), INT8_C( 106), INT8_C(-112), INT8_C( -85), INT8_C( 2), INT8_C( 42), INT8_C( -58), INT8_C( -70), INT8_C( -97), INT8_C( 95), INT8_C( 14), INT8_C( 116), INT8_C( -67), INT8_C( 1), INT8_C( 68), INT8_C( -54), INT8_C( -13), INT8_C( -11), INT8_C( -60), INT8_C( -44), INT8_C( 102), INT8_C( -9), INT8_C( -71), INT8_C( -51), INT8_C( 40), INT8_C( 0), INT8_C( -81), INT8_C( 85), INT8_C(-103), INT8_C( 21), INT8_C( 112), INT8_C( 59), INT8_C( -49), INT8_C( -57), INT8_C( 28), INT8_C( -72), INT8_C( -76), INT8_C( -32), INT8_C( -46), INT8_C( -10), INT8_C( 43)), simde_mm512_set_epi8(INT8_C( 65), INT8_C( 28), INT8_C( 112), INT8_C( 84), INT8_C( -84), INT8_C( 90), INT8_C( 43), INT8_C( -27), INT8_C( 82), INT8_C( -68), INT8_C( -29), INT8_C(-114), INT8_C( 96), INT8_C( 68), INT8_C( 41), INT8_C( -5), INT8_C( 60), INT8_C( 55), INT8_C( 65), INT8_C( 51), INT8_C(-116), INT8_C( 90), INT8_C( 113), INT8_C( 72), INT8_C( 119), INT8_C( -14), INT8_C( 52), INT8_C( 73), INT8_C( -28), INT8_C( -61), INT8_C( 83), INT8_C( 94), INT8_C( 116), INT8_C( -75), INT8_C( 6), INT8_C( -97), INT8_C( 114), INT8_C( 125), INT8_C( -13), INT8_C( -39), INT8_C( -76), INT8_C( -78), INT8_C( 126), INT8_C( 42), INT8_C( -31), INT8_C(-119), INT8_C( 44), INT8_C( -13), INT8_C( 39), INT8_C( 26), INT8_C( 103), INT8_C( 126), INT8_C( -63), INT8_C( -48), INT8_C( -12), INT8_C( 54), INT8_C( -42), INT8_C( 31), INT8_C( 1), INT8_C( 119), INT8_C( -34), INT8_C( -59), INT8_C(-107), INT8_C( 102)), simde_mm512_set_epi8(INT8_C( -61), INT8_C( -50), INT8_C( -30), INT8_C( 100), INT8_C( -32), INT8_C(-100), INT8_C( 0), INT8_C( -53), INT8_C( -74), INT8_C( 41), INT8_C( 37), INT8_C( 44), INT8_C( 4), INT8_C( -98), INT8_C( 73), INT8_C( -60), INT8_C( 85), INT8_C( 11), INT8_C( 90), INT8_C( 41), INT8_C( 94), INT8_C( 105), INT8_C( -3), INT8_C( -61), INT8_C( 118), INT8_C( -95), INT8_C(-121), INT8_C( -38), INT8_C(-104), INT8_C( 36), INT8_C( 86), INT8_C( 49), INT8_C( 118), INT8_C( 89), INT8_C( 110), INT8_C( 30), INT8_C(-113), INT8_C( -57), INT8_C(-107), INT8_C( 120), INT8_C( 49), INT8_C( 18), INT8_C( -55), INT8_C( -29), INT8_C( 22), INT8_C(-120), INT8_C( -95), INT8_C( -2), INT8_C( -39), INT8_C(-107), INT8_C( -18), INT8_C( -65), INT8_C( 55), INT8_C( 64), INT8_C( -42), INT8_C( -46), INT8_C( -15), INT8_C( -6), INT8_C( -22), INT8_C(-128), INT8_C( 111), INT8_C( 13), INT8_C( 97), INT8_C( -59)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_subs_epi8(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_subs_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask64 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { UINT64_C( 2568138505), simde_mm512_set_epi8(INT8_C( 65), INT8_C( -91), INT8_C( -13), INT8_C( 114), INT8_C( 123), INT8_C( 107), INT8_C(-108), INT8_C( 15), INT8_C( 52), INT8_C(-124), INT8_C( -17), INT8_C( -9), INT8_C( 31), INT8_C( 67), INT8_C( -76), INT8_C( -4), INT8_C( -52), INT8_C( 99), INT8_C( 106), INT8_C( -35), INT8_C(-115), INT8_C(-127), INT8_C( 121), INT8_C( -60), INT8_C( -27), INT8_C( 73), INT8_C( -21), INT8_C( -51), INT8_C(-127), INT8_C( 68), INT8_C( 109), INT8_C( 35), INT8_C( 96), INT8_C( 3), INT8_C( -62), INT8_C( -43), INT8_C( -75), INT8_C( 102), INT8_C(-105), INT8_C( 63), INT8_C( 64), INT8_C( 114), INT8_C( 112), INT8_C( 10), INT8_C( 108), INT8_C( 71), INT8_C( -40), INT8_C( 20), INT8_C( -40), INT8_C( 26), INT8_C( -94), INT8_C( 107), INT8_C( -89), INT8_C( 22), INT8_C( 55), INT8_C( 105), INT8_C( 23), INT8_C( 52), INT8_C( 82), INT8_C( 50), INT8_C( -48), INT8_C( 77), INT8_C( 73), INT8_C( 98)), simde_mm512_set_epi8(INT8_C( 87), INT8_C( 118), INT8_C( 10), INT8_C( -53), INT8_C( -21), INT8_C( -77), INT8_C( 81), INT8_C( 16), INT8_C( 125), INT8_C( 53), INT8_C( -93), INT8_C( -44), INT8_C( -14), INT8_C( -81), INT8_C( 93), INT8_C( -81), INT8_C( 69), INT8_C( 88), INT8_C( 17), INT8_C(-127), INT8_C( 49), INT8_C( 91), INT8_C( 55), INT8_C( -23), INT8_C( -84), INT8_C( 12), INT8_C( -97), INT8_C( -75), INT8_C( 99), INT8_C( 54), INT8_C( -92), INT8_C( -28), INT8_C( -2), INT8_C( 69), INT8_C(-102), INT8_C( 42), INT8_C( 41), INT8_C(-101), INT8_C( -59), INT8_C( -72), INT8_C( -53), INT8_C( -47), INT8_C( -78), INT8_C( 67), INT8_C( 54), INT8_C( 12), INT8_C( -65), INT8_C( 101), INT8_C( 77), INT8_C( -96), INT8_C( -15), INT8_C( 66), INT8_C( 50), INT8_C( 9), INT8_C( -75), INT8_C( 102), INT8_C( 2), INT8_C( -31), INT8_C( 83), INT8_C( -71), INT8_C( -84), INT8_C( -66), INT8_C(-111), INT8_C( -2)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 98), INT8_C( 0), INT8_C( 0), INT8_C( -85), INT8_C(-116), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -57), INT8_C( 0), INT8_C( 0), INT8_C( 25), INT8_C( 0), INT8_C(-117), INT8_C( 0), INT8_C( -79), INT8_C( 0), INT8_C(-128), INT8_C( 13), INT8_C( 127), INT8_C( 3), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 36), INT8_C( 0), INT8_C( 0), INT8_C( 100)) }, { UINT64_C( 2987001584), simde_mm512_set_epi8(INT8_C( 31), INT8_C( 110), INT8_C(-101), INT8_C( -57), INT8_C( 28), INT8_C( -50), INT8_C( -16), INT8_C( 37), INT8_C( 87), INT8_C(-105), INT8_C( 96), INT8_C( -79), INT8_C( 99), INT8_C( 74), INT8_C( 20), INT8_C( 120), INT8_C( 60), INT8_C( 49), INT8_C( 101), INT8_C( -94), INT8_C( -5), INT8_C( 114), INT8_C( 17), INT8_C( 1), INT8_C( 36), INT8_C( -92), INT8_C( 39), INT8_C(-108), INT8_C( -15), INT8_C( 14), INT8_C( -15), INT8_C( 79), INT8_C( 112), INT8_C( 108), INT8_C( -44), INT8_C( -18), INT8_C( 125), INT8_C( 93), INT8_C( -92), INT8_C( -51), INT8_C( 21), INT8_C( -19), INT8_C( -56), INT8_C( 114), INT8_C( -95), INT8_C( 40), INT8_C( -27), INT8_C( -85), INT8_C( 26), INT8_C( 29), INT8_C( -30), INT8_C( 53), INT8_C( -69), INT8_C( -26), INT8_C( 79), INT8_C(-124), INT8_C( -35), INT8_C( -29), INT8_C( -59), INT8_C( -99), INT8_C(-118), INT8_C( 30), INT8_C( 105), INT8_C( 3)), simde_mm512_set_epi8(INT8_C( 100), INT8_C(-112), INT8_C( 127), INT8_C( -1), INT8_C( -76), INT8_C( 101), INT8_C( 90), INT8_C( -29), INT8_C( -60), INT8_C( -92), INT8_C( -73), INT8_C( -20), INT8_C( -84), INT8_C( -11), INT8_C( -28), INT8_C(-123), INT8_C( -48), INT8_C(-118), INT8_C(-107), INT8_C( 73), INT8_C( -80), INT8_C( 46), INT8_C(-117), INT8_C( -85), INT8_C( -21), INT8_C( -54), INT8_C( -49), INT8_C( -8), INT8_C( 51), INT8_C( 126), INT8_C( 22), INT8_C( 70), INT8_C( -86), INT8_C( -98), INT8_C( -17), INT8_C( -29), INT8_C( 75), INT8_C( 115), INT8_C( 61), INT8_C( 76), INT8_C( -65), INT8_C( 43), INT8_C( -13), INT8_C( -2), INT8_C( 81), INT8_C( -26), INT8_C( -6), INT8_C( -15), INT8_C( -40), INT8_C( 72), INT8_C( 66), INT8_C( 32), INT8_C( 22), INT8_C( -74), INT8_C( -73), INT8_C( 67), INT8_C( 75), INT8_C( -18), INT8_C( -97), INT8_C( 75), INT8_C( -47), INT8_C( 32), INT8_C( 26), INT8_C(-108)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( -27), INT8_C( 11), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( -21), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 48), INT8_C( 127), INT8_C( 0), INT8_C(-110), INT8_C( -11), INT8_C( 38), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { UINT64_C( 1908364466), simde_mm512_set_epi8(INT8_C( 57), INT8_C( 79), INT8_C( 6), INT8_C( 7), INT8_C( 37), INT8_C(-119), INT8_C( -51), INT8_C( 99), INT8_C( -66), INT8_C( -12), INT8_C( -24), INT8_C(-107), INT8_C(-117), INT8_C( 63), INT8_C( 42), INT8_C( 79), INT8_C( -78), INT8_C( -79), INT8_C( -22), INT8_C( -52), INT8_C( -78), INT8_C( -8), INT8_C( -61), INT8_C( 101), INT8_C(-111), INT8_C( 35), INT8_C( -89), INT8_C( 127), INT8_C( 50), INT8_C(-115), INT8_C( 91), INT8_C( -72), INT8_C( -97), INT8_C( -34), INT8_C( -80), INT8_C( -31), INT8_C( -60), INT8_C( 119), INT8_C(-106), INT8_C( 66), INT8_C( 75), INT8_C(-108), INT8_C(-113), INT8_C( 120), INT8_C( 36), INT8_C( 112), INT8_C( -45), INT8_C( 41), INT8_C( 90), INT8_C( -32), INT8_C( 22), INT8_C( 86), INT8_C( 72), INT8_C( -20), INT8_C( -4), INT8_C(-125), INT8_C( -37), INT8_C( 84), INT8_C( -95), INT8_C( -66), INT8_C( 109), INT8_C( 18), INT8_C( 120), INT8_C(-115)), simde_mm512_set_epi8(INT8_C( 127), INT8_C( -8), INT8_C( 57), INT8_C( -73), INT8_C( 40), INT8_C( -7), INT8_C( 90), INT8_C(-110), INT8_C(-100), INT8_C(-107), INT8_C( 106), INT8_C( -87), INT8_C( 61), INT8_C( -94), INT8_C( 7), INT8_C( 84), INT8_C( 25), INT8_C( 77), INT8_C( 24), INT8_C( 35), INT8_C( 56), INT8_C( -72), INT8_C( 65), INT8_C( -77), INT8_C( 81), INT8_C( 110), INT8_C( 109), INT8_C( 26), INT8_C( -46), INT8_C( 6), INT8_C( -4), INT8_C( -76), INT8_C( 49), INT8_C( 67), INT8_C( -22), INT8_C( 12), INT8_C( 66), INT8_C( -76), INT8_C( -71), INT8_C( 20), INT8_C( -11), INT8_C( 97), INT8_C( 20), INT8_C( -1), INT8_C( 69), INT8_C( 54), INT8_C( -68), INT8_C( 104), INT8_C( 61), INT8_C( -16), INT8_C( -52), INT8_C( -98), INT8_C(-126), INT8_C(-109), INT8_C( 86), INT8_C( 51), INT8_C( 95), INT8_C( -88), INT8_C( 6), INT8_C(-121), INT8_C( 49), INT8_C( 97), INT8_C( -69), INT8_C( 125)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-101), INT8_C( -58), INT8_C( -43), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 46), INT8_C( 86), INT8_C( 0), INT8_C(-128), INT8_C( 121), INT8_C( -33), INT8_C( 58), INT8_C( 23), INT8_C( -63), INT8_C( 0), INT8_C( -16), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( 89), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C(-101), INT8_C( 55), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C( 0)) }, { UINT64_C( 3441334389), simde_mm512_set_epi8(INT8_C( 96), INT8_C( 30), INT8_C( 105), INT8_C( -4), INT8_C( 44), INT8_C( 54), INT8_C( 89), INT8_C( 40), INT8_C( 99), INT8_C( 41), INT8_C( 5), INT8_C( -2), INT8_C( -57), INT8_C( 98), INT8_C( 33), INT8_C( -28), INT8_C( 66), INT8_C( 28), INT8_C( -53), INT8_C( 48), INT8_C( -75), INT8_C( -97), INT8_C( -42), INT8_C( 47), INT8_C( -33), INT8_C( -17), INT8_C( 113), INT8_C( 120), INT8_C(-111), INT8_C( 14), INT8_C( 77), INT8_C(-108), INT8_C( 25), INT8_C( -42), INT8_C( 109), INT8_C( -97), INT8_C( -19), INT8_C( -18), INT8_C( -1), INT8_C( -50), INT8_C( -87), INT8_C( -70), INT8_C( 94), INT8_C( -91), INT8_C( 23), INT8_C( 33), INT8_C( -10), INT8_C( 81), INT8_C( 81), INT8_C( -63), INT8_C( 35), INT8_C( 44), INT8_C( 11), INT8_C(-124), INT8_C( -20), INT8_C( 71), INT8_C( -25), INT8_C(-112), INT8_C( 2), INT8_C( 59), INT8_C( 21), INT8_C(-113), INT8_C( 35), INT8_C( -40)), simde_mm512_set_epi8(INT8_C( -44), INT8_C( 54), INT8_C(-101), INT8_C( -13), INT8_C( 111), INT8_C( 110), INT8_C( 114), INT8_C( -57), INT8_C( 58), INT8_C(-128), INT8_C(-123), INT8_C( -3), INT8_C( 74), INT8_C( -78), INT8_C(-124), INT8_C( -30), INT8_C( 21), INT8_C( 86), INT8_C( -53), INT8_C(-116), INT8_C(-126), INT8_C( 10), INT8_C(-123), INT8_C( -78), INT8_C( 2), INT8_C( 100), INT8_C( -90), INT8_C( -79), INT8_C( -46), INT8_C( -83), INT8_C( 11), INT8_C(-120), INT8_C( -56), INT8_C( -4), INT8_C( -39), INT8_C(-114), INT8_C( -98), INT8_C(-108), INT8_C( -22), INT8_C( 24), INT8_C( 127), INT8_C( -20), INT8_C( 7), INT8_C( -16), INT8_C( -8), INT8_C( 19), INT8_C( 119), INT8_C( -44), INT8_C( 7), INT8_C( -47), INT8_C(-126), INT8_C(-119), INT8_C( 1), INT8_C( 111), INT8_C( -95), INT8_C( -67), INT8_C( 38), INT8_C( -19), INT8_C( 96), INT8_C( -21), INT8_C( -76), INT8_C(-123), INT8_C(-115), INT8_C( -61)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 81), INT8_C( -38), INT8_C( 0), INT8_C( 0), INT8_C( 79), INT8_C( 90), INT8_C( 0), INT8_C( -74), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -75), INT8_C( 31), INT8_C( 14), INT8_C(-128), INT8_C( 0), INT8_C( 74), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C( 10), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -93), INT8_C( -94), INT8_C( 80), INT8_C( 0), INT8_C( 10), INT8_C( 0), INT8_C( 21)) }, { UINT64_C( 706560777), simde_mm512_set_epi8(INT8_C( 44), INT8_C( 94), INT8_C( 127), INT8_C( -12), INT8_C( 90), INT8_C( 111), INT8_C( -20), INT8_C(-110), INT8_C( -82), INT8_C( -19), INT8_C( -79), INT8_C(-128), INT8_C( -6), INT8_C(-119), INT8_C( 100), INT8_C( 125), INT8_C( -42), INT8_C( 43), INT8_C( 6), INT8_C( -97), INT8_C( -90), INT8_C( -4), INT8_C( -71), INT8_C( 33), INT8_C( 1), INT8_C( 3), INT8_C( -91), INT8_C( -62), INT8_C(-106), INT8_C( -6), INT8_C( 35), INT8_C( 67), INT8_C( 97), INT8_C( 46), INT8_C( 89), INT8_C( 56), INT8_C( -84), INT8_C( -39), INT8_C( -70), INT8_C( 78), INT8_C( -49), INT8_C( 94), INT8_C( -1), INT8_C( -23), INT8_C( -88), INT8_C( -11), INT8_C(-128), INT8_C( -62), INT8_C( 106), INT8_C( 125), INT8_C( -71), INT8_C( -96), INT8_C( -28), INT8_C( -58), INT8_C( -40), INT8_C( -32), INT8_C( -13), INT8_C( -84), INT8_C( -69), INT8_C( -40), INT8_C(-122), INT8_C( 117), INT8_C( 115), INT8_C( -87)), simde_mm512_set_epi8(INT8_C( 66), INT8_C( -40), INT8_C( -14), INT8_C( -56), INT8_C( -68), INT8_C( -68), INT8_C(-124), INT8_C( -65), INT8_C( 41), INT8_C( -59), INT8_C(-122), INT8_C( 101), INT8_C( 82), INT8_C( -28), INT8_C( -40), INT8_C( -58), INT8_C( 127), INT8_C( 66), INT8_C( 63), INT8_C( -46), INT8_C( 90), INT8_C( 39), INT8_C( -89), INT8_C(-105), INT8_C( -51), INT8_C( -46), INT8_C( 111), INT8_C( -45), INT8_C( -76), INT8_C(-112), INT8_C( -37), INT8_C(-120), INT8_C( 50), INT8_C( -97), INT8_C( -2), INT8_C( 121), INT8_C( 68), INT8_C(-106), INT8_C( 101), INT8_C( 115), INT8_C( -29), INT8_C( 59), INT8_C( 77), INT8_C( -36), INT8_C( 111), INT8_C( 95), INT8_C( 48), INT8_C( 113), INT8_C( -97), INT8_C( 105), INT8_C( -37), INT8_C( -42), INT8_C( -7), INT8_C( 31), INT8_C( 45), INT8_C( -5), INT8_C( -29), INT8_C( 44), INT8_C( 78), INT8_C( -25), INT8_C( -47), INT8_C(-119), INT8_C( -90), INT8_C( 115)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 91), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 13), INT8_C(-128), INT8_C(-106), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 20), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -85), INT8_C( -27), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -75), INT8_C( 0), INT8_C( 0), INT8_C(-128)) }, { UINT64_C( 3905965829), simde_mm512_set_epi8(INT8_C( 51), INT8_C( -49), INT8_C( 69), INT8_C( -80), INT8_C( 34), INT8_C( 87), INT8_C( 88), INT8_C( -38), INT8_C( -14), INT8_C( -30), INT8_C(-111), INT8_C( -87), INT8_C( 37), INT8_C( 40), INT8_C( -30), INT8_C( -40), INT8_C( 89), INT8_C( 39), INT8_C( 109), INT8_C( 109), INT8_C( -71), INT8_C( -81), INT8_C( 67), INT8_C( 48), INT8_C( -43), INT8_C( 31), INT8_C( -36), INT8_C( -53), INT8_C( -7), INT8_C( 84), INT8_C( -92), INT8_C( 124), INT8_C( -22), INT8_C( 31), INT8_C( 42), INT8_C(-103), INT8_C( 29), INT8_C( 75), INT8_C( -91), INT8_C( 75), INT8_C( 107), INT8_C( 62), INT8_C( 126), INT8_C( 7), INT8_C( -89), INT8_C( 119), INT8_C( 79), INT8_C( -38), INT8_C( 18), INT8_C( 57), INT8_C(-102), INT8_C( 66), INT8_C( -7), INT8_C( 89), INT8_C( 57), INT8_C( -55), INT8_C( -68), INT8_C( 21), INT8_C( -21), INT8_C( -6), INT8_C( -26), INT8_C( 122), INT8_C( 114), INT8_C( 83)), simde_mm512_set_epi8(INT8_C( 111), INT8_C( 34), INT8_C( -76), INT8_C( 65), INT8_C( -31), INT8_C( 25), INT8_C( -63), INT8_C(-128), INT8_C( -28), INT8_C( 80), INT8_C( -75), INT8_C( 83), INT8_C( -23), INT8_C( -65), INT8_C( -75), INT8_C( -10), INT8_C(-102), INT8_C(-123), INT8_C( 20), INT8_C( 36), INT8_C( 70), INT8_C( -95), INT8_C( 63), INT8_C( 110), INT8_C( 98), INT8_C(-128), INT8_C( -74), INT8_C( -50), INT8_C( 17), INT8_C( -37), INT8_C(-104), INT8_C( -74), INT8_C( -4), INT8_C( 15), INT8_C( -59), INT8_C( 62), INT8_C( -89), INT8_C( 103), INT8_C( -5), INT8_C( 71), INT8_C( 103), INT8_C(-119), INT8_C( -62), INT8_C( -47), INT8_C( -78), INT8_C( 32), INT8_C( 104), INT8_C( 33), INT8_C( -45), INT8_C( -54), INT8_C( 17), INT8_C(-117), INT8_C( 96), INT8_C( 66), INT8_C( -18), INT8_C( -58), INT8_C(-111), INT8_C(-110), INT8_C( -14), INT8_C( 7), INT8_C( 85), INT8_C(-113), INT8_C( 19), INT8_C( -72)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -18), INT8_C( 16), INT8_C( 101), INT8_C( 0), INT8_C( 118), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 4), INT8_C( 127), INT8_C( 0), INT8_C( 54), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 111), INT8_C( 0), INT8_C( 0), INT8_C(-103), INT8_C( 23), INT8_C( 75), INT8_C( 3), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( 127)) }, { UINT64_C( 993418129), simde_mm512_set_epi8(INT8_C( -62), INT8_C( 96), INT8_C( -27), INT8_C( -88), INT8_C( -35), INT8_C( -58), INT8_C( -83), INT8_C( 36), INT8_C( -16), INT8_C( 50), INT8_C(-127), INT8_C( -82), INT8_C( 106), INT8_C( 56), INT8_C( 74), INT8_C( -59), INT8_C( -80), INT8_C( -22), INT8_C( 6), INT8_C( -65), INT8_C(-106), INT8_C( -51), INT8_C( 4), INT8_C( -33), INT8_C( 91), INT8_C( 46), INT8_C( -16), INT8_C( 27), INT8_C( -66), INT8_C( -93), INT8_C( -43), INT8_C( 14), INT8_C( 100), INT8_C(-113), INT8_C( 32), INT8_C( 15), INT8_C( -79), INT8_C( 63), INT8_C( -18), INT8_C( -96), INT8_C(-119), INT8_C( 114), INT8_C( -87), INT8_C( -82), INT8_C( 76), INT8_C( 117), INT8_C( 14), INT8_C( -65), INT8_C( -37), INT8_C( -15), INT8_C( 43), INT8_C( 5), INT8_C(-108), INT8_C( -5), INT8_C( -84), INT8_C( 13), INT8_C( -66), INT8_C( 80), INT8_C( -20), INT8_C( 34), INT8_C(-122), INT8_C(-101), INT8_C( -91), INT8_C( -35)), simde_mm512_set_epi8(INT8_C( 96), INT8_C(-100), INT8_C( -47), INT8_C( 114), INT8_C(-109), INT8_C( 8), INT8_C( 51), INT8_C( 120), INT8_C( -12), INT8_C(-128), INT8_C( -4), INT8_C(-105), INT8_C( 83), INT8_C(-123), INT8_C( -98), INT8_C( -2), INT8_C( 54), INT8_C( 110), INT8_C( -53), INT8_C( 100), INT8_C( 94), INT8_C( -2), INT8_C( 72), INT8_C( 95), INT8_C( -61), INT8_C( -31), INT8_C( 2), INT8_C( 88), INT8_C( 69), INT8_C(-117), INT8_C( 33), INT8_C( -57), INT8_C( -67), INT8_C( 92), INT8_C( -31), INT8_C( -16), INT8_C(-115), INT8_C( -91), INT8_C( 0), INT8_C( -73), INT8_C( -3), INT8_C( -71), INT8_C( 55), INT8_C( -82), INT8_C( -28), INT8_C( -34), INT8_C( -40), INT8_C( 60), INT8_C( 31), INT8_C(-113), INT8_C( 90), INT8_C( 114), INT8_C( 73), INT8_C( 77), INT8_C( -29), INT8_C( 80), INT8_C(-106), INT8_C( 121), INT8_C(-122), INT8_C( -4), INT8_C( 104), INT8_C( -76), INT8_C( 85), INT8_C( -33)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 63), INT8_C( 31), INT8_C( 36), INT8_C( 0), INT8_C( -18), INT8_C( -23), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C( 54), INT8_C( 0), INT8_C( 0), INT8_C( 98), INT8_C( 0), INT8_C(-109), INT8_C(-128), INT8_C( 0), INT8_C( -55), INT8_C( -67), INT8_C( 40), INT8_C( 0), INT8_C( 0), INT8_C( 38), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -2)) }, { UINT64_C( 1423233113), simde_mm512_set_epi8(INT8_C( 85), INT8_C( 12), INT8_C( -17), INT8_C( 78), INT8_C( 2), INT8_C( -46), INT8_C( 21), INT8_C( 89), INT8_C( -58), INT8_C( -19), INT8_C( -7), INT8_C( -46), INT8_C( 58), INT8_C( 120), INT8_C( -85), INT8_C( 46), INT8_C( 80), INT8_C( -20), INT8_C( 94), INT8_C( 32), INT8_C( -61), INT8_C( 80), INT8_C( -39), INT8_C( 109), INT8_C( -24), INT8_C( -30), INT8_C( 27), INT8_C( -44), INT8_C( -74), INT8_C( -85), INT8_C( -66), INT8_C( 26), INT8_C( -69), INT8_C( 112), INT8_C( -22), INT8_C( 62), INT8_C( 24), INT8_C(-107), INT8_C( -41), INT8_C( -21), INT8_C( 68), INT8_C( -34), INT8_C( 109), INT8_C( 58), INT8_C( 16), INT8_C( -72), INT8_C( -84), INT8_C( 18), INT8_C( -73), INT8_C( -54), INT8_C( 16), INT8_C( -93), INT8_C( -51), INT8_C( -50), INT8_C( 54), INT8_C( -69), INT8_C( -98), INT8_C(-113), INT8_C( -73), INT8_C(-124), INT8_C( -65), INT8_C( -51), INT8_C( 43), INT8_C( 10)), simde_mm512_set_epi8(INT8_C( 48), INT8_C( -35), INT8_C( -2), INT8_C( -9), INT8_C( -72), INT8_C( 97), INT8_C(-100), INT8_C( 120), INT8_C( -96), INT8_C( 21), INT8_C( 35), INT8_C( 38), INT8_C( -30), INT8_C( 81), INT8_C(-111), INT8_C( 42), INT8_C( 39), INT8_C(-112), INT8_C( 20), INT8_C(-116), INT8_C( 111), INT8_C( -69), INT8_C( 64), INT8_C( -4), INT8_C( 125), INT8_C( -29), INT8_C( 116), INT8_C( -26), INT8_C( 78), INT8_C( 37), INT8_C( -43), INT8_C( 45), INT8_C( -81), INT8_C(-109), INT8_C( 26), INT8_C( -51), INT8_C( -54), INT8_C( 99), INT8_C( 124), INT8_C( 2), INT8_C( 10), INT8_C( 6), INT8_C( -77), INT8_C( -61), INT8_C( 59), INT8_C( 60), INT8_C( 42), INT8_C( 33), INT8_C( 126), INT8_C( 88), INT8_C( 41), INT8_C( 66), INT8_C( 98), INT8_C( 111), INT8_C( 94), INT8_C( -70), INT8_C( 116), INT8_C( -40), INT8_C( -88), INT8_C( 13), INT8_C( 50), INT8_C( 54), INT8_C( 32), INT8_C( -48)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( 113), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( 58), INT8_C( -40), INT8_C( 0), INT8_C( 119), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C(-128), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -73), INT8_C( 0), INT8_C(-128), INT8_C(-115), INT8_C( 0), INT8_C( 0), INT8_C( 58)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_subs_epi8(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_subs_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi16(INT16_C( 21075), INT16_C( 30017), INT16_C(-11898), INT16_C( 29710), INT16_C( 19457), INT16_C(-12796), INT16_C( 21427), INT16_C( 28826), INT16_C( 25482), INT16_C(-11843), INT16_C( 15582), INT16_C( 20114), INT16_C(-14761), INT16_C(-15590), INT16_C( -4142), INT16_C( 29932), INT16_C(-30672), INT16_C( 6190), INT16_C( 26590), INT16_C( 10803), INT16_C(-16554), INT16_C( 15816), INT16_C( 14967), INT16_C( 24063), INT16_C(-14713), INT16_C( -8094), INT16_C(-16817), INT16_C( 25507), INT16_C( 19912), INT16_C(-19929), INT16_C(-12604), INT16_C(-17156)), simde_mm512_set_epi16(INT16_C( 27175), INT16_C(-27122), INT16_C( -6914), INT16_C( 10212), INT16_C( 13894), INT16_C( -4620), INT16_C(-10724), INT16_C( -3078), INT16_C( 29698), INT16_C( 6009), INT16_C( 28893), INT16_C(-31734), INT16_C( -3957), INT16_C( 6787), INT16_C( 9325), INT16_C( 7645), INT16_C( -2133), INT16_C( -9633), INT16_C(-22525), INT16_C( 1124), INT16_C( 21781), INT16_C( 17119), INT16_C(-19461), INT16_C(-32134), INT16_C( -7507), INT16_C( 19092), INT16_C( 21408), INT16_C(-14444), INT16_C( 22843), INT16_C( 28625), INT16_C( -2322), INT16_C( 5251)), simde_mm512_set_epi16(INT16_C( -6100), INT16_C( 32767), INT16_C( -4984), INT16_C( 19498), INT16_C( 5563), INT16_C( -8176), INT16_C( 32151), INT16_C( 31904), INT16_C( -4216), INT16_C(-17852), INT16_C(-13311), INT16_C( 32767), INT16_C(-10804), INT16_C(-22377), INT16_C(-13467), INT16_C( 22287), INT16_C(-28539), INT16_C( 15823), INT16_C( 32767), INT16_C( 9679), INT16_C(-32768), INT16_C( -1303), INT16_C( 32767), INT16_C( 32767), INT16_C( -7206), INT16_C(-27186), INT16_C(-32768), INT16_C( 32767), INT16_C( -2931), INT16_C(-32768), INT16_C(-10282), INT16_C(-22407)) }, { simde_mm512_set_epi16(INT16_C(-27508), INT16_C( 5509), INT16_C(-13526), INT16_C( 16909), INT16_C( 2419), INT16_C( 22142), INT16_C( -6109), INT16_C( -1177), INT16_C( 9839), INT16_C( 6329), INT16_C( -239), INT16_C(-15885), INT16_C( 3666), INT16_C( 20122), INT16_C( -1699), INT16_C( 6503), INT16_C( 29169), INT16_C( -4681), INT16_C( -2713), INT16_C(-24709), INT16_C( 7221), INT16_C( -3718), INT16_C( 970), INT16_C(-15558), INT16_C(-11011), INT16_C(-10787), INT16_C(-29970), INT16_C( 3894), INT16_C(-25914), INT16_C(-18758), INT16_C( 11824), INT16_C( -8868)), simde_mm512_set_epi16(INT16_C( 1604), INT16_C( 19874), INT16_C(-12133), INT16_C( -1966), INT16_C( 13041), INT16_C( 1566), INT16_C(-11791), INT16_C( -3425), INT16_C( 7377), INT16_C(-23380), INT16_C( -9249), INT16_C(-31251), INT16_C( 14877), INT16_C( 24009), INT16_C(-32316), INT16_C( 8308), INT16_C(-11725), INT16_C(-10230), INT16_C( 1074), INT16_C( 12341), INT16_C( 19989), INT16_C( 16491), INT16_C( 4144), INT16_C(-11714), INT16_C( 19285), INT16_C(-29198), INT16_C(-25258), INT16_C(-29514), INT16_C( 9755), INT16_C(-29385), INT16_C(-23111), INT16_C( -3412)), simde_mm512_set_epi16(INT16_C(-29112), INT16_C(-14365), INT16_C( -1393), INT16_C( 18875), INT16_C(-10622), INT16_C( 20576), INT16_C( 5682), INT16_C( 2248), INT16_C( 2462), INT16_C( 29709), INT16_C( 9010), INT16_C( 15366), INT16_C(-11211), INT16_C( -3887), INT16_C( 30617), INT16_C( -1805), INT16_C( 32767), INT16_C( 5549), INT16_C( -3787), INT16_C(-32768), INT16_C(-12768), INT16_C(-20209), INT16_C( -3174), INT16_C( -3844), INT16_C(-30296), INT16_C( 18411), INT16_C( -4712), INT16_C( 32767), INT16_C(-32768), INT16_C( 10627), INT16_C( 32767), INT16_C( -5456)) }, { simde_mm512_set_epi16(INT16_C( 691), INT16_C( -4823), INT16_C( -3253), INT16_C(-31392), INT16_C(-21784), INT16_C( -6740), INT16_C( 9130), INT16_C(-18273), INT16_C( 11275), INT16_C(-27092), INT16_C( 90), INT16_C(-20133), INT16_C( 30523), INT16_C( 27008), INT16_C( 28387), INT16_C( 17266), INT16_C( -9777), INT16_C( 27096), INT16_C( -8328), INT16_C( -6812), INT16_C(-22954), INT16_C( -4409), INT16_C( 21734), INT16_C(-19695), INT16_C(-11981), INT16_C(-21195), INT16_C( 18272), INT16_C( 28327), INT16_C( 7123), INT16_C(-32216), INT16_C( 24489), INT16_C(-15668)), simde_mm512_set_epi16(INT16_C(-21377), INT16_C( 15856), INT16_C( 7686), INT16_C(-28568), INT16_C(-15192), INT16_C( -9747), INT16_C( 11300), INT16_C( 27000), INT16_C( -6635), INT16_C( 3626), INT16_C( 12716), INT16_C(-30571), INT16_C( 31697), INT16_C( 5622), INT16_C( 24444), INT16_C( -8226), INT16_C( -8263), INT16_C( 2890), INT16_C( 26732), INT16_C( -8763), INT16_C(-13950), INT16_C( 27415), INT16_C( 7653), INT16_C( 31511), INT16_C(-21082), INT16_C( 2398), INT16_C( 23365), INT16_C(-12903), INT16_C(-18221), INT16_C( 4204), INT16_C(-20453), INT16_C( 15021)), simde_mm512_set_epi16(INT16_C( 22068), INT16_C(-20679), INT16_C(-10939), INT16_C( -2824), INT16_C( -6592), INT16_C( 3007), INT16_C( -2170), INT16_C(-32768), INT16_C( 17910), INT16_C(-30718), INT16_C(-12626), INT16_C( 10438), INT16_C( -1174), INT16_C( 21386), INT16_C( 3943), INT16_C( 25492), INT16_C( -1514), INT16_C( 24206), INT16_C(-32768), INT16_C( 1951), INT16_C( -9004), INT16_C(-31824), INT16_C( 14081), INT16_C(-32768), INT16_C( 9101), INT16_C(-23593), INT16_C( -5093), INT16_C( 32767), INT16_C( 25344), INT16_C(-32768), INT16_C( 32767), INT16_C(-30689)) }, { simde_mm512_set_epi16(INT16_C( 4451), INT16_C( -3121), INT16_C( 11648), INT16_C( 14185), INT16_C( -8499), INT16_C(-24679), INT16_C(-31633), INT16_C( 19019), INT16_C( 26210), INT16_C(-29943), INT16_C(-18883), INT16_C( 25468), INT16_C( 20366), INT16_C( 4961), INT16_C(-25468), INT16_C( -4158), INT16_C( 6653), INT16_C( -1720), INT16_C(-29723), INT16_C(-14244), INT16_C( -4917), INT16_C( 730), INT16_C(-20677), INT16_C( 16986), INT16_C( 9316), INT16_C( 28795), INT16_C(-18273), INT16_C(-29423), INT16_C(-23674), INT16_C( 7963), INT16_C( 28019), INT16_C( 13728)), simde_mm512_set_epi16(INT16_C(-10770), INT16_C( 29411), INT16_C( 30463), INT16_C( -4902), INT16_C(-20392), INT16_C(-28251), INT16_C( 11448), INT16_C( 27155), INT16_C(-11669), INT16_C( 11820), INT16_C(-16512), INT16_C( 10540), INT16_C( 17477), INT16_C(-19759), INT16_C( 28024), INT16_C(-14431), INT16_C( 24400), INT16_C( -7583), INT16_C(-12129), INT16_C( 28592), INT16_C(-31057), INT16_C(-18091), INT16_C( 19926), INT16_C(-29261), INT16_C( 7501), INT16_C( 16620), INT16_C( 6953), INT16_C( 3437), INT16_C( 5790), INT16_C( 5348), INT16_C( 17145), INT16_C(-28791)), simde_mm512_set_epi16(INT16_C( 15221), INT16_C(-32532), INT16_C(-18815), INT16_C( 19087), INT16_C( 11893), INT16_C( 3572), INT16_C(-32768), INT16_C( -8136), INT16_C( 32767), INT16_C(-32768), INT16_C( -2371), INT16_C( 14928), INT16_C( 2889), INT16_C( 24720), INT16_C(-32768), INT16_C( 10273), INT16_C(-17747), INT16_C( 5863), INT16_C(-17594), INT16_C(-32768), INT16_C( 26140), INT16_C( 18821), INT16_C(-32768), INT16_C( 32767), INT16_C( 1815), INT16_C( 12175), INT16_C(-25226), INT16_C(-32768), INT16_C(-29464), INT16_C( 2615), INT16_C( 10874), INT16_C( 32767)) }, { simde_mm512_set_epi16(INT16_C(-31561), INT16_C( 18949), INT16_C( -2287), INT16_C(-20534), INT16_C( -1057), INT16_C( -3046), INT16_C( 22138), INT16_C(-11031), INT16_C( 43), INT16_C( -6266), INT16_C(-20090), INT16_C(-22393), INT16_C(-26046), INT16_C(-23703), INT16_C( 28092), INT16_C( 6346), INT16_C( 10308), INT16_C( 572), INT16_C( 5), INT16_C( 15306), INT16_C(-19429), INT16_C( -5811), INT16_C(-27420), INT16_C(-29128), INT16_C(-13676), INT16_C( -3673), INT16_C(-26157), INT16_C( 19197), INT16_C(-27593), INT16_C(-20030), INT16_C( 3690), INT16_C( -3850)), simde_mm512_set_epi16(INT16_C(-11908), INT16_C( 14774), INT16_C( 5244), INT16_C( 18107), INT16_C(-16396), INT16_C( 31910), INT16_C(-28865), INT16_C(-20038), INT16_C(-19234), INT16_C(-15108), INT16_C(-10436), INT16_C( 19911), INT16_C( 3330), INT16_C( 28633), INT16_C( 10550), INT16_C( -9358), INT16_C( 23697), INT16_C( 19726), INT16_C(-26407), INT16_C(-18878), INT16_C( 4326), INT16_C(-22642), INT16_C(-17402), INT16_C( 16035), INT16_C( 14223), INT16_C(-15160), INT16_C( -9470), INT16_C( -3752), INT16_C( 6710), INT16_C( 21116), INT16_C( -9579), INT16_C( 10253)), simde_mm512_set_epi16(INT16_C(-19653), INT16_C( 4175), INT16_C( -7531), INT16_C(-32768), INT16_C( 15339), INT16_C(-32768), INT16_C( 32767), INT16_C( 9007), INT16_C( 19277), INT16_C( 8842), INT16_C( -9654), INT16_C(-32768), INT16_C(-29376), INT16_C(-32768), INT16_C( 17542), INT16_C( 15704), INT16_C(-13389), INT16_C(-19154), INT16_C( 26412), INT16_C( 32767), INT16_C(-23755), INT16_C( 16831), INT16_C(-10018), INT16_C(-32768), INT16_C(-27899), INT16_C( 11487), INT16_C(-16687), INT16_C( 22949), INT16_C(-32768), INT16_C(-32768), INT16_C( 13269), INT16_C(-14103)) }, { simde_mm512_set_epi16(INT16_C( 1468), INT16_C( -4389), INT16_C( 1296), INT16_C(-27715), INT16_C(-15620), INT16_C( 3731), INT16_C( -7289), INT16_C(-27703), INT16_C( 474), INT16_C( 27447), INT16_C( -9036), INT16_C( 9176), INT16_C( 2726), INT16_C(-12144), INT16_C( -2101), INT16_C( 26907), INT16_C(-24700), INT16_C( 1244), INT16_C( -3927), INT16_C(-22632), INT16_C( -7525), INT16_C( 17743), INT16_C( 15263), INT16_C( -3823), INT16_C( 27307), INT16_C( 32391), INT16_C(-23270), INT16_C(-29301), INT16_C( 23369), INT16_C(-15291), INT16_C( -5840), INT16_C( 18168)), simde_mm512_set_epi16(INT16_C( 23449), INT16_C( 17725), INT16_C(-20919), INT16_C( 31466), INT16_C( 31308), INT16_C( -2183), INT16_C(-31351), INT16_C(-32386), INT16_C( 26890), INT16_C(-30591), INT16_C(-12785), INT16_C(-23638), INT16_C(-31955), INT16_C( -9847), INT16_C( 19108), INT16_C(-19915), INT16_C( 4587), INT16_C( 27034), INT16_C( -19), INT16_C( 28332), INT16_C(-23789), INT16_C(-24960), INT16_C( -5839), INT16_C( 25722), INT16_C(-24423), INT16_C( 15592), INT16_C( 6092), INT16_C( -9272), INT16_C(-12796), INT16_C(-17663), INT16_C( -6154), INT16_C( 23859)), simde_mm512_set_epi16(INT16_C(-21981), INT16_C(-22114), INT16_C( 22215), INT16_C(-32768), INT16_C(-32768), INT16_C( 5914), INT16_C( 24062), INT16_C( 4683), INT16_C(-26416), INT16_C( 32767), INT16_C( 3749), INT16_C( 32767), INT16_C( 32767), INT16_C( -2297), INT16_C(-21209), INT16_C( 32767), INT16_C(-29287), INT16_C(-25790), INT16_C( -3908), INT16_C(-32768), INT16_C( 16264), INT16_C( 32767), INT16_C( 21102), INT16_C(-29545), INT16_C( 32767), INT16_C( 16799), INT16_C(-29362), INT16_C(-20029), INT16_C( 32767), INT16_C( 2372), INT16_C( 314), INT16_C( -5691)) }, { simde_mm512_set_epi16(INT16_C(-22741), INT16_C( 13394), INT16_C( -9417), INT16_C( 28906), INT16_C(-18980), INT16_C( -8463), INT16_C( 9174), INT16_C(-25605), INT16_C( 547), INT16_C( 3767), INT16_C(-12577), INT16_C(-16546), INT16_C( -1301), INT16_C( -7147), INT16_C( 26281), INT16_C( 29309), INT16_C( 29052), INT16_C(-30842), INT16_C( 5995), INT16_C( 6270), INT16_C( 20539), INT16_C( 10179), INT16_C(-26848), INT16_C( 14327), INT16_C( 15491), INT16_C( 18652), INT16_C( 19903), INT16_C( 30123), INT16_C( 25261), INT16_C(-17460), INT16_C( 10742), INT16_C( -4552)), simde_mm512_set_epi16(INT16_C( 5754), INT16_C(-23038), INT16_C(-16589), INT16_C(-23858), INT16_C( -3821), INT16_C( -4798), INT16_C( 30602), INT16_C(-28532), INT16_C( 11508), INT16_C( 7979), INT16_C( -3877), INT16_C( -5920), INT16_C(-24150), INT16_C(-24496), INT16_C( 17421), INT16_C( -1981), INT16_C( 27523), INT16_C( 26800), INT16_C( 25010), INT16_C( 27339), INT16_C( -9050), INT16_C( 19128), INT16_C( 15279), INT16_C( -1817), INT16_C(-13923), INT16_C( 5129), INT16_C(-22618), INT16_C( 27704), INT16_C( -4783), INT16_C( 31238), INT16_C(-30342), INT16_C( -8854)), simde_mm512_set_epi16(INT16_C(-28495), INT16_C( 32767), INT16_C( 7172), INT16_C( 32767), INT16_C(-15159), INT16_C( -3665), INT16_C(-21428), INT16_C( 2927), INT16_C(-10961), INT16_C( -4212), INT16_C( -8700), INT16_C(-10626), INT16_C( 22849), INT16_C( 17349), INT16_C( 8860), INT16_C( 31290), INT16_C( 1529), INT16_C(-32768), INT16_C(-19015), INT16_C(-21069), INT16_C( 29589), INT16_C( -8949), INT16_C(-32768), INT16_C( 16144), INT16_C( 29414), INT16_C( 13523), INT16_C( 32767), INT16_C( 2419), INT16_C( 30044), INT16_C(-32768), INT16_C( 32767), INT16_C( 4302)) }, { simde_mm512_set_epi16(INT16_C( 27021), INT16_C( 31131), INT16_C( 63), INT16_C(-10530), INT16_C( -1071), INT16_C(-31284), INT16_C(-21788), INT16_C(-16108), INT16_C(-15167), INT16_C( 25422), INT16_C( 14520), INT16_C(-13896), INT16_C( 20399), INT16_C( 31915), INT16_C(-16518), INT16_C( -6202), INT16_C(-16332), INT16_C( -3071), INT16_C(-15644), INT16_C( -7016), INT16_C( 13977), INT16_C(-13846), INT16_C(-23290), INT16_C( -2079), INT16_C( 4753), INT16_C( 14919), INT16_C(-18528), INT16_C( 7420), INT16_C( 12098), INT16_C( 31014), INT16_C( 17813), INT16_C(-14456)), simde_mm512_set_epi16(INT16_C(-12529), INT16_C( -3643), INT16_C(-28826), INT16_C(-12110), INT16_C( 8030), INT16_C( 20316), INT16_C( 27324), INT16_C( 24735), INT16_C( -6774), INT16_C( -2704), INT16_C(-31930), INT16_C( 6874), INT16_C( -3952), INT16_C( 2658), INT16_C( -904), INT16_C( -8319), INT16_C(-16424), INT16_C( 22778), INT16_C( 18985), INT16_C( 10063), INT16_C(-31751), INT16_C( 16016), INT16_C(-30217), INT16_C( 18364), INT16_C(-20176), INT16_C( -4961), INT16_C(-29576), INT16_C(-16634), INT16_C( -8011), INT16_C(-27110), INT16_C(-24526), INT16_C(-11504)), simde_mm512_set_epi16(INT16_C( 32767), INT16_C( 32767), INT16_C( 28889), INT16_C( 1580), INT16_C( -9101), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768), INT16_C( -8393), INT16_C( 28126), INT16_C( 32767), INT16_C(-20770), INT16_C( 24351), INT16_C( 29257), INT16_C(-15614), INT16_C( 2117), INT16_C( 92), INT16_C(-25849), INT16_C(-32768), INT16_C(-17079), INT16_C( 32767), INT16_C(-29862), INT16_C( 6927), INT16_C(-20443), INT16_C( 24929), INT16_C( 19880), INT16_C( 11048), INT16_C( 24054), INT16_C( 20109), INT16_C( 32767), INT16_C( 32767), INT16_C( -2952)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_subs_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m512i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_subs_epu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_x_mm512_set_epu8(UINT8_C( 82), UINT8_C( 83), UINT8_C(117), UINT8_C( 65), UINT8_C(209), UINT8_C(134), UINT8_C(116), UINT8_C( 14), UINT8_C( 76), UINT8_C( 1), UINT8_C(206), UINT8_C( 4), UINT8_C( 83), UINT8_C(179), UINT8_C(112), UINT8_C(154), UINT8_C( 99), UINT8_C(138), UINT8_C(209), UINT8_C(189), UINT8_C( 60), UINT8_C(222), UINT8_C( 78), UINT8_C(146), UINT8_C(198), UINT8_C( 87), UINT8_C(195), UINT8_C( 26), UINT8_C(239), UINT8_C(210), UINT8_C(116), UINT8_C(236), UINT8_C(136), UINT8_C( 48), UINT8_C( 24), UINT8_C( 46), UINT8_C(103), UINT8_C(222), UINT8_C( 42), UINT8_C( 51), UINT8_C(191), UINT8_C( 86), UINT8_C( 61), UINT8_C(200), UINT8_C( 58), UINT8_C(119), UINT8_C( 93), UINT8_C(255), UINT8_C(198), UINT8_C(135), UINT8_C(224), UINT8_C( 98), UINT8_C(190), UINT8_C( 79), UINT8_C( 99), UINT8_C(163), UINT8_C( 77), UINT8_C(200), UINT8_C(178), UINT8_C( 39), UINT8_C(206), UINT8_C(196), UINT8_C(188), UINT8_C(252)), simde_x_mm512_set_epu8(UINT8_C(106), UINT8_C( 39), UINT8_C(150), UINT8_C( 14), UINT8_C(228), UINT8_C(254), UINT8_C( 39), UINT8_C(228), UINT8_C( 54), UINT8_C( 70), UINT8_C(237), UINT8_C(244), UINT8_C(214), UINT8_C( 28), UINT8_C(243), UINT8_C(250), UINT8_C(116), UINT8_C( 2), UINT8_C( 23), UINT8_C(121), UINT8_C(112), UINT8_C(221), UINT8_C(132), UINT8_C( 10), UINT8_C(240), UINT8_C(139), UINT8_C( 26), UINT8_C(131), UINT8_C( 36), UINT8_C(109), UINT8_C( 29), UINT8_C(221), UINT8_C(247), UINT8_C(171), UINT8_C(218), UINT8_C( 95), UINT8_C(168), UINT8_C( 3), UINT8_C( 4), UINT8_C(100), UINT8_C( 85), UINT8_C( 21), UINT8_C( 66), UINT8_C(223), UINT8_C(179), UINT8_C(251), UINT8_C(130), UINT8_C(122), UINT8_C(226), UINT8_C(173), UINT8_C( 74), UINT8_C(148), UINT8_C( 83), UINT8_C(160), UINT8_C(199), UINT8_C(148), UINT8_C( 89), UINT8_C( 59), UINT8_C(111), UINT8_C(209), UINT8_C(246), UINT8_C(238), UINT8_C( 20), UINT8_C(131)), simde_x_mm512_set_epu8(UINT8_C( 0), UINT8_C( 44), UINT8_C( 0), UINT8_C( 51), UINT8_C( 0), UINT8_C( 0), UINT8_C( 77), UINT8_C( 0), UINT8_C( 22), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(151), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(136), UINT8_C(186), UINT8_C( 68), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C(136), UINT8_C( 0), UINT8_C( 0), UINT8_C(169), UINT8_C( 0), UINT8_C(203), UINT8_C(101), UINT8_C( 87), UINT8_C( 15), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(219), UINT8_C( 38), UINT8_C( 0), UINT8_C(106), UINT8_C( 65), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(133), UINT8_C( 0), UINT8_C( 0), UINT8_C(150), UINT8_C( 0), UINT8_C(107), UINT8_C( 0), UINT8_C( 0), UINT8_C( 15), UINT8_C( 0), UINT8_C(141), UINT8_C( 67), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(168), UINT8_C(121)) }, { simde_x_mm512_set_epu8(UINT8_C(148), UINT8_C(140), UINT8_C( 21), UINT8_C(133), UINT8_C(203), UINT8_C( 42), UINT8_C( 66), UINT8_C( 13), UINT8_C( 9), UINT8_C(115), UINT8_C( 86), UINT8_C(126), UINT8_C(232), UINT8_C( 35), UINT8_C(251), UINT8_C(103), UINT8_C( 38), UINT8_C(111), UINT8_C( 24), UINT8_C(185), UINT8_C(255), UINT8_C( 17), UINT8_C(193), UINT8_C(243), UINT8_C( 14), UINT8_C( 82), UINT8_C( 78), UINT8_C(154), UINT8_C(249), UINT8_C( 93), UINT8_C( 25), UINT8_C(103), UINT8_C(113), UINT8_C(241), UINT8_C(237), UINT8_C(183), UINT8_C(245), UINT8_C(103), UINT8_C(159), UINT8_C(123), UINT8_C( 28), UINT8_C( 53), UINT8_C(241), UINT8_C(122), UINT8_C( 3), UINT8_C(202), UINT8_C(195), UINT8_C( 58), UINT8_C(212), UINT8_C(253), UINT8_C(213), UINT8_C(221), UINT8_C(138), UINT8_C(238), UINT8_C( 15), UINT8_C( 54), UINT8_C(154), UINT8_C(198), UINT8_C(182), UINT8_C(186), UINT8_C( 46), UINT8_C( 48), UINT8_C(221), UINT8_C( 92)), simde_x_mm512_set_epu8(UINT8_C( 6), UINT8_C( 68), UINT8_C( 77), UINT8_C(162), UINT8_C(208), UINT8_C(155), UINT8_C(248), UINT8_C( 82), UINT8_C( 50), UINT8_C(241), UINT8_C( 6), UINT8_C( 30), UINT8_C(209), UINT8_C(241), UINT8_C(242), UINT8_C(159), UINT8_C( 28), UINT8_C(209), UINT8_C(164), UINT8_C(172), UINT8_C(219), UINT8_C(223), UINT8_C(133), UINT8_C(237), UINT8_C( 58), UINT8_C( 29), UINT8_C( 93), UINT8_C(201), UINT8_C(129), UINT8_C(196), UINT8_C( 32), UINT8_C(116), UINT8_C(210), UINT8_C( 51), UINT8_C(216), UINT8_C( 10), UINT8_C( 4), UINT8_C( 50), UINT8_C( 48), UINT8_C( 53), UINT8_C( 78), UINT8_C( 21), UINT8_C( 64), UINT8_C(107), UINT8_C( 16), UINT8_C( 48), UINT8_C(210), UINT8_C( 62), UINT8_C( 75), UINT8_C( 85), UINT8_C(141), UINT8_C(242), UINT8_C(157), UINT8_C( 86), UINT8_C(140), UINT8_C(182), UINT8_C( 38), UINT8_C( 27), UINT8_C(141), UINT8_C( 55), UINT8_C(165), UINT8_C(185), UINT8_C(242), UINT8_C(172)), simde_x_mm512_set_epu8(UINT8_C(142), UINT8_C( 72), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 80), UINT8_C( 96), UINT8_C( 23), UINT8_C( 0), UINT8_C( 9), UINT8_C( 0), UINT8_C( 10), UINT8_C( 0), UINT8_C( 0), UINT8_C( 13), UINT8_C( 36), UINT8_C( 0), UINT8_C( 60), UINT8_C( 6), UINT8_C( 0), UINT8_C( 53), UINT8_C( 0), UINT8_C( 0), UINT8_C(120), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(190), UINT8_C( 21), UINT8_C(173), UINT8_C(241), UINT8_C( 53), UINT8_C(111), UINT8_C( 70), UINT8_C( 0), UINT8_C( 32), UINT8_C(177), UINT8_C( 15), UINT8_C( 0), UINT8_C(154), UINT8_C( 0), UINT8_C( 0), UINT8_C(137), UINT8_C(168), UINT8_C( 72), UINT8_C( 0), UINT8_C( 0), UINT8_C(152), UINT8_C( 0), UINT8_C( 0), UINT8_C(116), UINT8_C(171), UINT8_C( 41), UINT8_C(131), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0)) }, { simde_x_mm512_set_epu8(UINT8_C( 2), UINT8_C(179), UINT8_C(237), UINT8_C( 41), UINT8_C(243), UINT8_C( 75), UINT8_C(133), UINT8_C( 96), UINT8_C(170), UINT8_C(232), UINT8_C(229), UINT8_C(172), UINT8_C( 35), UINT8_C(170), UINT8_C(184), UINT8_C(159), UINT8_C( 44), UINT8_C( 11), UINT8_C(150), UINT8_C( 44), UINT8_C( 0), UINT8_C( 90), UINT8_C(177), UINT8_C( 91), UINT8_C(119), UINT8_C( 59), UINT8_C(105), UINT8_C(128), UINT8_C(110), UINT8_C(227), UINT8_C( 67), UINT8_C(114), UINT8_C(217), UINT8_C(207), UINT8_C(105), UINT8_C(216), UINT8_C(223), UINT8_C(120), UINT8_C(229), UINT8_C(100), UINT8_C(166), UINT8_C( 86), UINT8_C(238), UINT8_C(199), UINT8_C( 84), UINT8_C(230), UINT8_C(179), UINT8_C( 17), UINT8_C(209), UINT8_C( 51), UINT8_C(173), UINT8_C( 53), UINT8_C( 71), UINT8_C( 96), UINT8_C(110), UINT8_C(167), UINT8_C( 27), UINT8_C(211), UINT8_C(130), UINT8_C( 40), UINT8_C( 95), UINT8_C(169), UINT8_C(194), UINT8_C(204)), simde_x_mm512_set_epu8(UINT8_C(172), UINT8_C(127), UINT8_C( 61), UINT8_C(240), UINT8_C( 30), UINT8_C( 6), UINT8_C(144), UINT8_C(104), UINT8_C(196), UINT8_C(168), UINT8_C(217), UINT8_C(237), UINT8_C( 44), UINT8_C( 36), UINT8_C(105), UINT8_C(120), UINT8_C(230), UINT8_C( 21), UINT8_C( 14), UINT8_C( 42), UINT8_C( 49), UINT8_C(172), UINT8_C(136), UINT8_C(149), UINT8_C(123), UINT8_C(209), UINT8_C( 21), UINT8_C(246), UINT8_C( 95), UINT8_C(124), UINT8_C(223), UINT8_C(222), UINT8_C(223), UINT8_C(185), UINT8_C( 11), UINT8_C( 74), UINT8_C(104), UINT8_C(108), UINT8_C(221), UINT8_C(197), UINT8_C(201), UINT8_C(130), UINT8_C(107), UINT8_C( 23), UINT8_C( 29), UINT8_C(229), UINT8_C(123), UINT8_C( 23), UINT8_C(173), UINT8_C(166), UINT8_C( 9), UINT8_C( 94), UINT8_C( 91), UINT8_C( 69), UINT8_C(205), UINT8_C(153), UINT8_C(184), UINT8_C(211), UINT8_C( 16), UINT8_C(108), UINT8_C(176), UINT8_C( 27), UINT8_C( 58), UINT8_C(173)), simde_x_mm512_set_epu8(UINT8_C( 0), UINT8_C( 52), UINT8_C(176), UINT8_C( 0), UINT8_C(213), UINT8_C( 69), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 64), UINT8_C( 12), UINT8_C( 0), UINT8_C( 0), UINT8_C(134), UINT8_C( 79), UINT8_C( 39), UINT8_C( 0), UINT8_C( 0), UINT8_C(136), UINT8_C( 2), UINT8_C( 0), UINT8_C( 0), UINT8_C( 41), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 84), UINT8_C( 0), UINT8_C( 15), UINT8_C(103), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 22), UINT8_C( 94), UINT8_C(142), UINT8_C(119), UINT8_C( 12), UINT8_C( 8), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(131), UINT8_C(176), UINT8_C( 55), UINT8_C( 1), UINT8_C( 56), UINT8_C( 0), UINT8_C( 36), UINT8_C( 0), UINT8_C(164), UINT8_C( 0), UINT8_C( 0), UINT8_C( 27), UINT8_C( 0), UINT8_C( 14), UINT8_C( 0), UINT8_C( 0), UINT8_C(114), UINT8_C( 0), UINT8_C( 0), UINT8_C(142), UINT8_C(136), UINT8_C( 31)) }, { simde_x_mm512_set_epu8(UINT8_C( 17), UINT8_C( 99), UINT8_C(243), UINT8_C(207), UINT8_C( 45), UINT8_C(128), UINT8_C( 55), UINT8_C(105), UINT8_C(222), UINT8_C(205), UINT8_C(159), UINT8_C(153), UINT8_C(132), UINT8_C(111), UINT8_C( 74), UINT8_C( 75), UINT8_C(102), UINT8_C( 98), UINT8_C(139), UINT8_C( 9), UINT8_C(182), UINT8_C( 61), UINT8_C( 99), UINT8_C(124), UINT8_C( 79), UINT8_C(142), UINT8_C( 19), UINT8_C( 97), UINT8_C(156), UINT8_C(132), UINT8_C(239), UINT8_C(194), UINT8_C( 25), UINT8_C(253), UINT8_C(249), UINT8_C( 72), UINT8_C(139), UINT8_C(229), UINT8_C(200), UINT8_C( 92), UINT8_C(236), UINT8_C(203), UINT8_C( 2), UINT8_C(218), UINT8_C(175), UINT8_C( 59), UINT8_C( 66), UINT8_C( 90), UINT8_C( 36), UINT8_C(100), UINT8_C(112), UINT8_C(123), UINT8_C(184), UINT8_C(159), UINT8_C(141), UINT8_C( 17), UINT8_C(163), UINT8_C(134), UINT8_C( 31), UINT8_C( 27), UINT8_C(109), UINT8_C(115), UINT8_C( 53), UINT8_C(160)), simde_x_mm512_set_epu8(UINT8_C(213), UINT8_C(238), UINT8_C(114), UINT8_C(227), UINT8_C(118), UINT8_C(255), UINT8_C(236), UINT8_C(218), UINT8_C(176), UINT8_C( 88), UINT8_C(145), UINT8_C(165), UINT8_C( 44), UINT8_C(184), UINT8_C(106), UINT8_C( 19), UINT8_C(210), UINT8_C(107), UINT8_C( 46), UINT8_C( 44), UINT8_C(191), UINT8_C(128), UINT8_C( 41), UINT8_C( 44), UINT8_C( 68), UINT8_C( 69), UINT8_C(178), UINT8_C(209), UINT8_C(109), UINT8_C(120), UINT8_C(199), UINT8_C(161), UINT8_C( 95), UINT8_C( 80), UINT8_C(226), UINT8_C( 97), UINT8_C(208), UINT8_C(159), UINT8_C(111), UINT8_C(176), UINT8_C(134), UINT8_C(175), UINT8_C(185), UINT8_C( 85), UINT8_C( 77), UINT8_C(214), UINT8_C(141), UINT8_C(179), UINT8_C( 29), UINT8_C( 77), UINT8_C( 64), UINT8_C(236), UINT8_C( 27), UINT8_C( 41), UINT8_C( 13), UINT8_C(109), UINT8_C( 22), UINT8_C(158), UINT8_C( 20), UINT8_C(228), UINT8_C( 66), UINT8_C(249), UINT8_C(143), UINT8_C(137)), simde_x_mm512_set_epu8(UINT8_C( 0), UINT8_C( 0), UINT8_C(129), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 46), UINT8_C(117), UINT8_C( 14), UINT8_C( 0), UINT8_C( 88), UINT8_C( 0), UINT8_C( 0), UINT8_C( 56), UINT8_C( 0), UINT8_C( 0), UINT8_C( 93), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 58), UINT8_C( 80), UINT8_C( 11), UINT8_C( 73), UINT8_C( 0), UINT8_C( 0), UINT8_C( 47), UINT8_C( 12), UINT8_C( 40), UINT8_C( 33), UINT8_C( 0), UINT8_C(173), UINT8_C( 23), UINT8_C( 0), UINT8_C( 0), UINT8_C( 70), UINT8_C( 89), UINT8_C( 0), UINT8_C(102), UINT8_C( 28), UINT8_C( 0), UINT8_C(133), UINT8_C( 98), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 7), UINT8_C( 23), UINT8_C( 48), UINT8_C( 0), UINT8_C(157), UINT8_C(118), UINT8_C(128), UINT8_C( 0), UINT8_C(141), UINT8_C( 0), UINT8_C( 11), UINT8_C( 0), UINT8_C( 43), UINT8_C( 0), UINT8_C( 0), UINT8_C( 23)) }, { simde_x_mm512_set_epu8(UINT8_C(132), UINT8_C(183), UINT8_C( 74), UINT8_C( 5), UINT8_C(247), UINT8_C( 17), UINT8_C(175), UINT8_C(202), UINT8_C(251), UINT8_C(223), UINT8_C(244), UINT8_C( 26), UINT8_C( 86), UINT8_C(122), UINT8_C(212), UINT8_C(233), UINT8_C( 0), UINT8_C( 43), UINT8_C(231), UINT8_C(134), UINT8_C(177), UINT8_C(134), UINT8_C(168), UINT8_C(135), UINT8_C(154), UINT8_C( 66), UINT8_C(163), UINT8_C(105), UINT8_C(109), UINT8_C(188), UINT8_C( 24), UINT8_C(202), UINT8_C( 40), UINT8_C( 68), UINT8_C( 2), UINT8_C( 60), UINT8_C( 0), UINT8_C( 5), UINT8_C( 59), UINT8_C(202), UINT8_C(180), UINT8_C( 27), UINT8_C(233), UINT8_C( 77), UINT8_C(148), UINT8_C(228), UINT8_C(142), UINT8_C( 56), UINT8_C(202), UINT8_C(148), UINT8_C(241), UINT8_C(167), UINT8_C(153), UINT8_C(211), UINT8_C( 74), UINT8_C(253), UINT8_C(148), UINT8_C( 55), UINT8_C(177), UINT8_C(194), UINT8_C( 14), UINT8_C(106), UINT8_C(240), UINT8_C(246)), simde_x_mm512_set_epu8(UINT8_C(209), UINT8_C(124), UINT8_C( 57), UINT8_C(182), UINT8_C( 20), UINT8_C(124), UINT8_C( 70), UINT8_C(187), UINT8_C(191), UINT8_C(244), UINT8_C(124), UINT8_C(166), UINT8_C(143), UINT8_C( 63), UINT8_C(177), UINT8_C(186), UINT8_C(180), UINT8_C(222), UINT8_C(196), UINT8_C(252), UINT8_C(215), UINT8_C( 60), UINT8_C( 77), UINT8_C(199), UINT8_C( 13), UINT8_C( 2), UINT8_C(111), UINT8_C(217), UINT8_C( 41), UINT8_C( 54), UINT8_C(219), UINT8_C(114), UINT8_C( 92), UINT8_C(145), UINT8_C( 77), UINT8_C( 14), UINT8_C(152), UINT8_C(217), UINT8_C(182), UINT8_C( 66), UINT8_C( 16), UINT8_C(230), UINT8_C(167), UINT8_C(142), UINT8_C(188), UINT8_C( 6), UINT8_C( 62), UINT8_C(163), UINT8_C( 55), UINT8_C(143), UINT8_C(196), UINT8_C(200), UINT8_C(219), UINT8_C( 2), UINT8_C(241), UINT8_C( 88), UINT8_C( 26), UINT8_C( 54), UINT8_C( 82), UINT8_C(124), UINT8_C(218), UINT8_C(149), UINT8_C( 40), UINT8_C( 13)), simde_x_mm512_set_epu8(UINT8_C( 0), UINT8_C( 59), UINT8_C( 17), UINT8_C( 0), UINT8_C(227), UINT8_C( 0), UINT8_C(105), UINT8_C( 15), UINT8_C( 60), UINT8_C( 0), UINT8_C(120), UINT8_C( 0), UINT8_C( 0), UINT8_C( 59), UINT8_C( 35), UINT8_C( 47), UINT8_C( 0), UINT8_C( 0), UINT8_C( 35), UINT8_C( 0), UINT8_C( 0), UINT8_C( 74), UINT8_C( 91), UINT8_C( 0), UINT8_C(141), UINT8_C( 64), UINT8_C( 52), UINT8_C( 0), UINT8_C( 68), UINT8_C(134), UINT8_C( 0), UINT8_C( 88), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 46), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(136), UINT8_C(164), UINT8_C( 0), UINT8_C( 66), UINT8_C( 0), UINT8_C( 0), UINT8_C(222), UINT8_C( 80), UINT8_C( 0), UINT8_C(147), UINT8_C( 5), UINT8_C( 45), UINT8_C( 0), UINT8_C( 0), UINT8_C(209), UINT8_C( 0), UINT8_C(165), UINT8_C(122), UINT8_C( 1), UINT8_C( 95), UINT8_C( 70), UINT8_C( 0), UINT8_C( 0), UINT8_C(200), UINT8_C(233)) }, { simde_x_mm512_set_epu8(UINT8_C( 5), UINT8_C(188), UINT8_C(238), UINT8_C(219), UINT8_C( 5), UINT8_C( 16), UINT8_C(147), UINT8_C(189), UINT8_C(194), UINT8_C(252), UINT8_C( 14), UINT8_C(147), UINT8_C(227), UINT8_C(135), UINT8_C(147), UINT8_C(201), UINT8_C( 1), UINT8_C(218), UINT8_C(107), UINT8_C( 55), UINT8_C(220), UINT8_C(180), UINT8_C( 35), UINT8_C(216), UINT8_C( 10), UINT8_C(166), UINT8_C(208), UINT8_C(144), UINT8_C(247), UINT8_C(203), UINT8_C(105), UINT8_C( 27), UINT8_C(159), UINT8_C(132), UINT8_C( 4), UINT8_C(220), UINT8_C(240), UINT8_C(169), UINT8_C(167), UINT8_C(152), UINT8_C(226), UINT8_C(155), UINT8_C( 69), UINT8_C( 79), UINT8_C( 59), UINT8_C(159), UINT8_C(241), UINT8_C( 17), UINT8_C(106), UINT8_C(171), UINT8_C(126), UINT8_C(135), UINT8_C(165), UINT8_C( 26), UINT8_C(141), UINT8_C(139), UINT8_C( 91), UINT8_C( 73), UINT8_C(196), UINT8_C( 69), UINT8_C(233), UINT8_C( 48), UINT8_C( 70), UINT8_C(248)), simde_x_mm512_set_epu8(UINT8_C( 91), UINT8_C(153), UINT8_C( 69), UINT8_C( 61), UINT8_C(174), UINT8_C( 73), UINT8_C(122), UINT8_C(234), UINT8_C(122), UINT8_C( 76), UINT8_C(247), UINT8_C(121), UINT8_C(133), UINT8_C(137), UINT8_C(129), UINT8_C(126), UINT8_C(105), UINT8_C( 10), UINT8_C(136), UINT8_C(129), UINT8_C(206), UINT8_C( 15), UINT8_C(163), UINT8_C(170), UINT8_C(131), UINT8_C( 45), UINT8_C(217), UINT8_C(137), UINT8_C( 74), UINT8_C(164), UINT8_C(178), UINT8_C( 53), UINT8_C( 17), UINT8_C(235), UINT8_C(105), UINT8_C(154), UINT8_C(255), UINT8_C(237), UINT8_C(110), UINT8_C(172), UINT8_C(163), UINT8_C( 19), UINT8_C(158), UINT8_C(128), UINT8_C(233), UINT8_C( 49), UINT8_C(100), UINT8_C(122), UINT8_C(160), UINT8_C(153), UINT8_C( 60), UINT8_C(232), UINT8_C( 23), UINT8_C(204), UINT8_C(219), UINT8_C(200), UINT8_C(206), UINT8_C( 4), UINT8_C(187), UINT8_C( 1), UINT8_C(231), UINT8_C(246), UINT8_C( 93), UINT8_C( 51)), simde_x_mm512_set_epu8(UINT8_C( 0), UINT8_C( 35), UINT8_C(169), UINT8_C(158), UINT8_C( 0), UINT8_C( 0), UINT8_C( 25), UINT8_C( 0), UINT8_C( 72), UINT8_C(176), UINT8_C( 0), UINT8_C( 26), UINT8_C( 94), UINT8_C( 0), UINT8_C( 18), UINT8_C( 75), UINT8_C( 0), UINT8_C(208), UINT8_C( 0), UINT8_C( 0), UINT8_C( 14), UINT8_C(165), UINT8_C( 0), UINT8_C( 46), UINT8_C( 0), UINT8_C(121), UINT8_C( 0), UINT8_C( 7), UINT8_C(173), UINT8_C( 39), UINT8_C( 0), UINT8_C( 0), UINT8_C(142), UINT8_C( 0), UINT8_C( 0), UINT8_C( 66), UINT8_C( 0), UINT8_C( 0), UINT8_C( 57), UINT8_C( 0), UINT8_C( 63), UINT8_C(136), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(110), UINT8_C(141), UINT8_C( 0), UINT8_C( 0), UINT8_C( 18), UINT8_C( 66), UINT8_C( 0), UINT8_C(142), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 69), UINT8_C( 9), UINT8_C( 68), UINT8_C( 2), UINT8_C( 0), UINT8_C( 0), UINT8_C(197)) }, { simde_x_mm512_set_epu8(UINT8_C(167), UINT8_C( 43), UINT8_C( 52), UINT8_C( 82), UINT8_C(219), UINT8_C( 55), UINT8_C(112), UINT8_C(234), UINT8_C(181), UINT8_C(220), UINT8_C(222), UINT8_C(241), UINT8_C( 35), UINT8_C(214), UINT8_C(155), UINT8_C(251), UINT8_C( 2), UINT8_C( 35), UINT8_C( 14), UINT8_C(183), UINT8_C(206), UINT8_C(223), UINT8_C(191), UINT8_C( 94), UINT8_C(250), UINT8_C(235), UINT8_C(228), UINT8_C( 21), UINT8_C(102), UINT8_C(169), UINT8_C(114), UINT8_C(125), UINT8_C(113), UINT8_C(124), UINT8_C(135), UINT8_C(134), UINT8_C( 23), UINT8_C(107), UINT8_C( 24), UINT8_C(126), UINT8_C( 80), UINT8_C( 59), UINT8_C( 39), UINT8_C(195), UINT8_C(151), UINT8_C( 32), UINT8_C( 55), UINT8_C(247), UINT8_C( 60), UINT8_C(131), UINT8_C( 72), UINT8_C(220), UINT8_C( 77), UINT8_C(191), UINT8_C(117), UINT8_C(171), UINT8_C( 98), UINT8_C(173), UINT8_C(187), UINT8_C(204), UINT8_C( 41), UINT8_C(246), UINT8_C(238), UINT8_C( 56)), simde_x_mm512_set_epu8(UINT8_C( 22), UINT8_C(122), UINT8_C(166), UINT8_C( 2), UINT8_C(191), UINT8_C( 51), UINT8_C(162), UINT8_C(206), UINT8_C(241), UINT8_C( 19), UINT8_C(237), UINT8_C( 66), UINT8_C(119), UINT8_C(138), UINT8_C(144), UINT8_C(140), UINT8_C( 44), UINT8_C(244), UINT8_C( 31), UINT8_C( 43), UINT8_C(240), UINT8_C(219), UINT8_C(232), UINT8_C(224), UINT8_C(161), UINT8_C(170), UINT8_C(160), UINT8_C( 80), UINT8_C( 68), UINT8_C( 13), UINT8_C(248), UINT8_C( 67), UINT8_C(107), UINT8_C(131), UINT8_C(104), UINT8_C(176), UINT8_C( 97), UINT8_C(178), UINT8_C(106), UINT8_C(203), UINT8_C(220), UINT8_C(166), UINT8_C( 74), UINT8_C(184), UINT8_C( 59), UINT8_C(175), UINT8_C(248), UINT8_C(231), UINT8_C(201), UINT8_C(157), UINT8_C( 20), UINT8_C( 9), UINT8_C(167), UINT8_C(166), UINT8_C(108), UINT8_C( 56), UINT8_C(237), UINT8_C( 81), UINT8_C(122), UINT8_C( 6), UINT8_C(137), UINT8_C(122), UINT8_C(221), UINT8_C(106)), simde_x_mm512_set_epu8(UINT8_C(145), UINT8_C( 0), UINT8_C( 0), UINT8_C( 80), UINT8_C( 28), UINT8_C( 4), UINT8_C( 0), UINT8_C( 28), UINT8_C( 0), UINT8_C(201), UINT8_C( 0), UINT8_C(175), UINT8_C( 0), UINT8_C( 76), UINT8_C( 11), UINT8_C(111), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(140), UINT8_C( 0), UINT8_C( 4), UINT8_C( 0), UINT8_C( 0), UINT8_C( 89), UINT8_C( 65), UINT8_C( 68), UINT8_C( 0), UINT8_C( 34), UINT8_C(156), UINT8_C( 0), UINT8_C( 58), UINT8_C( 6), UINT8_C( 0), UINT8_C( 31), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 11), UINT8_C( 92), UINT8_C( 0), UINT8_C( 0), UINT8_C( 16), UINT8_C( 0), UINT8_C( 0), UINT8_C( 52), UINT8_C(211), UINT8_C( 0), UINT8_C( 25), UINT8_C( 9), UINT8_C(115), UINT8_C( 0), UINT8_C( 92), UINT8_C( 65), UINT8_C(198), UINT8_C( 0), UINT8_C(124), UINT8_C( 17), UINT8_C( 0)) }, { simde_x_mm512_set_epu8(UINT8_C(105), UINT8_C(141), UINT8_C(121), UINT8_C(155), UINT8_C( 0), UINT8_C( 63), UINT8_C(214), UINT8_C(222), UINT8_C(251), UINT8_C(209), UINT8_C(133), UINT8_C(204), UINT8_C(170), UINT8_C(228), UINT8_C(193), UINT8_C( 20), UINT8_C(196), UINT8_C(193), UINT8_C( 99), UINT8_C( 78), UINT8_C( 56), UINT8_C(184), UINT8_C(201), UINT8_C(184), UINT8_C( 79), UINT8_C(175), UINT8_C(124), UINT8_C(171), UINT8_C(191), UINT8_C(122), UINT8_C(231), UINT8_C(198), UINT8_C(192), UINT8_C( 52), UINT8_C(244), UINT8_C( 1), UINT8_C(194), UINT8_C(228), UINT8_C(228), UINT8_C(152), UINT8_C( 54), UINT8_C(153), UINT8_C(201), UINT8_C(234), UINT8_C(165), UINT8_C( 6), UINT8_C(247), UINT8_C(225), UINT8_C( 18), UINT8_C(145), UINT8_C( 58), UINT8_C( 71), UINT8_C(183), UINT8_C(160), UINT8_C( 28), UINT8_C(252), UINT8_C( 47), UINT8_C( 66), UINT8_C(121), UINT8_C( 38), UINT8_C( 69), UINT8_C(149), UINT8_C(199), UINT8_C(136)), simde_x_mm512_set_epu8(UINT8_C(207), UINT8_C( 15), UINT8_C(241), UINT8_C(197), UINT8_C(143), UINT8_C(102), UINT8_C(208), UINT8_C(178), UINT8_C( 31), UINT8_C( 94), UINT8_C( 79), UINT8_C( 92), UINT8_C(106), UINT8_C(188), UINT8_C( 96), UINT8_C(159), UINT8_C(229), UINT8_C(138), UINT8_C(245), UINT8_C(112), UINT8_C(131), UINT8_C( 70), UINT8_C( 26), UINT8_C(218), UINT8_C(240), UINT8_C(144), UINT8_C( 10), UINT8_C( 98), UINT8_C(252), UINT8_C(120), UINT8_C(223), UINT8_C(129), UINT8_C(191), UINT8_C(216), UINT8_C( 88), UINT8_C(250), UINT8_C( 74), UINT8_C( 41), UINT8_C( 39), UINT8_C( 79), UINT8_C(131), UINT8_C(249), UINT8_C( 62), UINT8_C(144), UINT8_C(137), UINT8_C(247), UINT8_C( 71), UINT8_C(188), UINT8_C(177), UINT8_C( 48), UINT8_C(236), UINT8_C(159), UINT8_C(140), UINT8_C(120), UINT8_C(191), UINT8_C( 6), UINT8_C(224), UINT8_C(181), UINT8_C(150), UINT8_C( 26), UINT8_C(160), UINT8_C( 50), UINT8_C(211), UINT8_C( 16)), simde_x_mm512_set_epu8(UINT8_C( 0), UINT8_C(126), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 6), UINT8_C( 44), UINT8_C(220), UINT8_C(115), UINT8_C( 54), UINT8_C(112), UINT8_C( 64), UINT8_C( 40), UINT8_C( 97), UINT8_C( 0), UINT8_C( 0), UINT8_C( 55), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(114), UINT8_C(175), UINT8_C( 0), UINT8_C( 0), UINT8_C( 31), UINT8_C(114), UINT8_C( 73), UINT8_C( 0), UINT8_C( 2), UINT8_C( 8), UINT8_C( 69), UINT8_C( 1), UINT8_C( 0), UINT8_C(156), UINT8_C( 0), UINT8_C(120), UINT8_C(187), UINT8_C(189), UINT8_C( 73), UINT8_C( 0), UINT8_C( 0), UINT8_C(139), UINT8_C( 90), UINT8_C( 28), UINT8_C( 0), UINT8_C(176), UINT8_C( 37), UINT8_C( 0), UINT8_C( 97), UINT8_C( 0), UINT8_C( 0), UINT8_C( 43), UINT8_C( 40), UINT8_C( 0), UINT8_C(246), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 12), UINT8_C( 0), UINT8_C( 99), UINT8_C( 0), UINT8_C(120)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_subs_epu8(test_vec[i].a, test_vec[i].b); simde_assert_m512i_u8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_subs_epu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask64 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_x_mm512_set_epu8(UINT8_C(160), UINT8_C(209), UINT8_C( 53), UINT8_C(241), UINT8_C(169), UINT8_C( 74), UINT8_C(195), UINT8_C(156), UINT8_C(226), UINT8_C(114), UINT8_C(119), UINT8_C(203), UINT8_C( 15), UINT8_C( 80), UINT8_C(207), UINT8_C(120), UINT8_C(164), UINT8_C(171), UINT8_C( 95), UINT8_C(130), UINT8_C(161), UINT8_C( 3), UINT8_C( 82), UINT8_C(197), UINT8_C( 78), UINT8_C(216), UINT8_C(176), UINT8_C(131), UINT8_C(213), UINT8_C( 21), UINT8_C( 48), UINT8_C(162), UINT8_C(135), UINT8_C( 81), UINT8_C( 67), UINT8_C(242), UINT8_C( 97), UINT8_C( 89), UINT8_C(174), UINT8_C( 64), UINT8_C(170), UINT8_C(194), UINT8_C(235), UINT8_C(133), UINT8_C( 33), UINT8_C(160), UINT8_C(236), UINT8_C( 7), UINT8_C(224), UINT8_C( 63), UINT8_C(130), UINT8_C( 28), UINT8_C( 83), UINT8_C(152), UINT8_C(149), UINT8_C(111), UINT8_C(188), UINT8_C( 24), UINT8_C( 57), UINT8_C( 80), UINT8_C(100), UINT8_C( 11), UINT8_C(241), UINT8_C(117)), UINT64_C( 2661966794), simde_x_mm512_set_epu8(UINT8_C(164), UINT8_C( 45), UINT8_C(193), UINT8_C(111), UINT8_C( 96), UINT8_C(220), UINT8_C( 54), UINT8_C(232), UINT8_C(214), UINT8_C(244), UINT8_C(236), UINT8_C( 74), UINT8_C( 47), UINT8_C(180), UINT8_C(143), UINT8_C(214), UINT8_C( 6), UINT8_C(178), UINT8_C( 35), UINT8_C(129), UINT8_C( 21), UINT8_C(157), UINT8_C( 30), UINT8_C( 29), UINT8_C(226), UINT8_C(222), UINT8_C( 14), UINT8_C(170), UINT8_C(223), UINT8_C(200), UINT8_C( 41), UINT8_C(245), UINT8_C( 7), UINT8_C(251), UINT8_C( 30), UINT8_C(217), UINT8_C( 93), UINT8_C(228), UINT8_C(143), UINT8_C( 9), UINT8_C(221), UINT8_C( 49), UINT8_C(207), UINT8_C( 44), UINT8_C( 52), UINT8_C(108), UINT8_C(124), UINT8_C( 14), UINT8_C(144), UINT8_C(229), UINT8_C(102), UINT8_C( 16), UINT8_C( 61), UINT8_C( 2), UINT8_C(169), UINT8_C( 72), UINT8_C(194), UINT8_C(122), UINT8_C(216), UINT8_C(151), UINT8_C(244), UINT8_C(227), UINT8_C( 5), UINT8_C( 98)), simde_x_mm512_set_epu8(UINT8_C( 53), UINT8_C( 65), UINT8_C(131), UINT8_C(189), UINT8_C(196), UINT8_C(200), UINT8_C(234), UINT8_C( 92), UINT8_C( 99), UINT8_C(187), UINT8_C(123), UINT8_C(180), UINT8_C(209), UINT8_C(163), UINT8_C( 86), UINT8_C( 26), UINT8_C(192), UINT8_C( 36), UINT8_C(205), UINT8_C(108), UINT8_C(173), UINT8_C( 71), UINT8_C(190), UINT8_C( 46), UINT8_C(173), UINT8_C(156), UINT8_C( 59), UINT8_C(250), UINT8_C( 13), UINT8_C(171), UINT8_C( 1), UINT8_C( 47), UINT8_C(227), UINT8_C( 45), UINT8_C(181), UINT8_C( 45), UINT8_C(116), UINT8_C( 89), UINT8_C(112), UINT8_C(127), UINT8_C( 42), UINT8_C(134), UINT8_C(128), UINT8_C(206), UINT8_C( 87), UINT8_C(206), UINT8_C( 39), UINT8_C( 7), UINT8_C( 48), UINT8_C( 47), UINT8_C( 79), UINT8_C( 32), UINT8_C( 39), UINT8_C(205), UINT8_C(174), UINT8_C(244), UINT8_C(251), UINT8_C( 10), UINT8_C( 13), UINT8_C( 53), UINT8_C(255), UINT8_C(222), UINT8_C( 66), UINT8_C( 52)), simde_x_mm512_set_epu8(UINT8_C(160), UINT8_C(209), UINT8_C( 53), UINT8_C(241), UINT8_C(169), UINT8_C( 74), UINT8_C(195), UINT8_C(156), UINT8_C(226), UINT8_C(114), UINT8_C(119), UINT8_C(203), UINT8_C( 15), UINT8_C( 80), UINT8_C(207), UINT8_C(120), UINT8_C(164), UINT8_C(171), UINT8_C( 95), UINT8_C(130), UINT8_C(161), UINT8_C( 3), UINT8_C( 82), UINT8_C(197), UINT8_C( 78), UINT8_C(216), UINT8_C(176), UINT8_C(131), UINT8_C(213), UINT8_C( 21), UINT8_C( 48), UINT8_C(162), UINT8_C( 0), UINT8_C( 81), UINT8_C( 67), UINT8_C(172), UINT8_C( 0), UINT8_C(139), UINT8_C( 31), UINT8_C( 64), UINT8_C(179), UINT8_C(194), UINT8_C( 79), UINT8_C(133), UINT8_C( 0), UINT8_C(160), UINT8_C( 85), UINT8_C( 7), UINT8_C(224), UINT8_C(182), UINT8_C( 23), UINT8_C( 28), UINT8_C( 83), UINT8_C(152), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(112), UINT8_C( 57), UINT8_C( 80), UINT8_C( 0), UINT8_C( 11), UINT8_C( 0), UINT8_C(117)) }, { simde_x_mm512_set_epu8(UINT8_C(108), UINT8_C( 96), UINT8_C( 9), UINT8_C(145), UINT8_C(131), UINT8_C(204), UINT8_C(151), UINT8_C(200), UINT8_C(172), UINT8_C(193), UINT8_C(248), UINT8_C(227), UINT8_C(159), UINT8_C( 34), UINT8_C( 70), UINT8_C( 14), UINT8_C(229), UINT8_C( 98), UINT8_C( 73), UINT8_C(209), UINT8_C( 59), UINT8_C(209), UINT8_C( 82), UINT8_C( 90), UINT8_C(172), UINT8_C( 10), UINT8_C(185), UINT8_C(194), UINT8_C(184), UINT8_C( 76), UINT8_C(173), UINT8_C( 75), UINT8_C(136), UINT8_C(130), UINT8_C(227), UINT8_C(162), UINT8_C(154), UINT8_C(158), UINT8_C(121), UINT8_C( 19), UINT8_C(211), UINT8_C(197), UINT8_C( 15), UINT8_C( 80), UINT8_C( 75), UINT8_C( 26), UINT8_C( 77), UINT8_C(210), UINT8_C(117), UINT8_C( 85), UINT8_C(204), UINT8_C( 0), UINT8_C(244), UINT8_C( 11), UINT8_C(128), UINT8_C(245), UINT8_C(177), UINT8_C(132), UINT8_C(179), UINT8_C(117), UINT8_C(215), UINT8_C( 43), UINT8_C(143), UINT8_C( 75)), UINT64_C( 1314676378), simde_x_mm512_set_epu8(UINT8_C(122), UINT8_C(121), UINT8_C( 15), UINT8_C(167), UINT8_C(146), UINT8_C( 1), UINT8_C( 27), UINT8_C( 93), UINT8_C(177), UINT8_C(197), UINT8_C(189), UINT8_C(233), UINT8_C(110), UINT8_C( 47), UINT8_C( 66), UINT8_C( 91), UINT8_C( 52), UINT8_C( 7), UINT8_C(197), UINT8_C( 20), UINT8_C( 34), UINT8_C(208), UINT8_C(127), UINT8_C( 73), UINT8_C(145), UINT8_C( 2), UINT8_C( 27), UINT8_C(235), UINT8_C(109), UINT8_C(152), UINT8_C(220), UINT8_C(143), UINT8_C(189), UINT8_C(222), UINT8_C(102), UINT8_C(126), UINT8_C( 80), UINT8_C( 51), UINT8_C(253), UINT8_C(186), UINT8_C( 51), UINT8_C(206), UINT8_C(118), UINT8_C( 48), UINT8_C( 59), UINT8_C(184), UINT8_C(197), UINT8_C(107), UINT8_C( 52), UINT8_C( 5), UINT8_C(208), UINT8_C(231), UINT8_C(151), UINT8_C(166), UINT8_C(151), UINT8_C( 49), UINT8_C( 74), UINT8_C(239), UINT8_C(157), UINT8_C(112), UINT8_C(217), UINT8_C(111), UINT8_C( 21), UINT8_C(221)), simde_x_mm512_set_epu8(UINT8_C( 54), UINT8_C( 10), UINT8_C( 52), UINT8_C(157), UINT8_C( 51), UINT8_C( 63), UINT8_C(190), UINT8_C(105), UINT8_C(159), UINT8_C( 57), UINT8_C(152), UINT8_C( 2), UINT8_C(134), UINT8_C(112), UINT8_C( 29), UINT8_C( 8), UINT8_C(240), UINT8_C( 55), UINT8_C(110), UINT8_C(144), UINT8_C(247), UINT8_C( 78), UINT8_C(254), UINT8_C( 89), UINT8_C(189), UINT8_C(169), UINT8_C(164), UINT8_C(239), UINT8_C(227), UINT8_C(187), UINT8_C( 11), UINT8_C(211), UINT8_C(183), UINT8_C( 9), UINT8_C(204), UINT8_C(138), UINT8_C(223), UINT8_C( 91), UINT8_C(198), UINT8_C( 18), UINT8_C(178), UINT8_C(209), UINT8_C(152), UINT8_C(100), UINT8_C(247), UINT8_C(142), UINT8_C( 53), UINT8_C( 94), UINT8_C(238), UINT8_C(239), UINT8_C(151), UINT8_C(119), UINT8_C(229), UINT8_C( 96), UINT8_C(245), UINT8_C( 0), UINT8_C(141), UINT8_C( 95), UINT8_C(101), UINT8_C(146), UINT8_C(141), UINT8_C( 37), UINT8_C( 41), UINT8_C(148)), simde_x_mm512_set_epu8(UINT8_C(108), UINT8_C( 96), UINT8_C( 9), UINT8_C(145), UINT8_C(131), UINT8_C(204), UINT8_C(151), UINT8_C(200), UINT8_C(172), UINT8_C(193), UINT8_C(248), UINT8_C(227), UINT8_C(159), UINT8_C( 34), UINT8_C( 70), UINT8_C( 14), UINT8_C(229), UINT8_C( 98), UINT8_C( 73), UINT8_C(209), UINT8_C( 59), UINT8_C(209), UINT8_C( 82), UINT8_C( 90), UINT8_C(172), UINT8_C( 10), UINT8_C(185), UINT8_C(194), UINT8_C(184), UINT8_C( 76), UINT8_C(173), UINT8_C( 75), UINT8_C(136), UINT8_C(213), UINT8_C(227), UINT8_C(162), UINT8_C( 0), UINT8_C( 0), UINT8_C( 55), UINT8_C( 19), UINT8_C(211), UINT8_C( 0), UINT8_C( 15), UINT8_C( 0), UINT8_C( 0), UINT8_C( 42), UINT8_C( 77), UINT8_C(210), UINT8_C(117), UINT8_C( 0), UINT8_C(204), UINT8_C(112), UINT8_C( 0), UINT8_C( 70), UINT8_C( 0), UINT8_C(245), UINT8_C( 0), UINT8_C(132), UINT8_C(179), UINT8_C( 0), UINT8_C( 76), UINT8_C( 43), UINT8_C( 0), UINT8_C( 75)) }, { simde_x_mm512_set_epu8(UINT8_C(206), UINT8_C(206), UINT8_C( 30), UINT8_C( 15), UINT8_C(114), UINT8_C(103), UINT8_C( 13), UINT8_C(110), UINT8_C(166), UINT8_C( 46), UINT8_C(130), UINT8_C(151), UINT8_C( 16), UINT8_C( 40), UINT8_C(154), UINT8_C( 67), UINT8_C(184), UINT8_C( 37), UINT8_C(207), UINT8_C( 94), UINT8_C( 79), UINT8_C(202), UINT8_C(122), UINT8_C(203), UINT8_C(179), UINT8_C(164), UINT8_C( 37), UINT8_C(173), UINT8_C( 95), UINT8_C(170), UINT8_C( 47), UINT8_C(161), UINT8_C(255), UINT8_C(185), UINT8_C(163), UINT8_C(181), UINT8_C( 29), UINT8_C(148), UINT8_C(134), UINT8_C( 11), UINT8_C(137), UINT8_C(213), UINT8_C(115), UINT8_C(139), UINT8_C( 77), UINT8_C(129), UINT8_C( 55), UINT8_C( 6), UINT8_C(222), UINT8_C(184), UINT8_C(104), UINT8_C( 68), UINT8_C(167), UINT8_C( 53), UINT8_C( 35), UINT8_C( 81), UINT8_C(255), UINT8_C(129), UINT8_C(219), UINT8_C(221), UINT8_C(186), UINT8_C( 75), UINT8_C(231), UINT8_C(243)), UINT64_C( 1370699036), simde_x_mm512_set_epu8(UINT8_C(228), UINT8_C( 29), UINT8_C(156), UINT8_C(201), UINT8_C(217), UINT8_C( 60), UINT8_C( 61), UINT8_C( 86), UINT8_C(205), UINT8_C(204), UINT8_C( 26), UINT8_C( 35), UINT8_C(113), UINT8_C( 54), UINT8_C( 44), UINT8_C(150), UINT8_C( 5), UINT8_C(127), UINT8_C(118), UINT8_C(122), UINT8_C( 8), UINT8_C( 76), UINT8_C( 62), UINT8_C( 54), UINT8_C(171), UINT8_C(236), UINT8_C(228), UINT8_C( 23), UINT8_C(167), UINT8_C( 85), UINT8_C(106), UINT8_C(203), UINT8_C( 75), UINT8_C(211), UINT8_C(183), UINT8_C(233), UINT8_C(230), UINT8_C(155), UINT8_C(159), UINT8_C(231), UINT8_C( 53), UINT8_C( 16), UINT8_C(224), UINT8_C(218), UINT8_C(254), UINT8_C(170), UINT8_C(229), UINT8_C(158), UINT8_C(154), UINT8_C(108), UINT8_C(114), UINT8_C(193), UINT8_C(254), UINT8_C( 39), UINT8_C(143), UINT8_C(155), UINT8_C(241), UINT8_C(232), UINT8_C(128), UINT8_C(233), UINT8_C(178), UINT8_C( 6), UINT8_C(154), UINT8_C(184)), simde_x_mm512_set_epu8(UINT8_C(165), UINT8_C(173), UINT8_C(234), UINT8_C(165), UINT8_C(146), UINT8_C(243), UINT8_C( 52), UINT8_C( 89), UINT8_C(227), UINT8_C( 93), UINT8_C(108), UINT8_C( 47), UINT8_C(207), UINT8_C(115), UINT8_C(178), UINT8_C(155), UINT8_C(250), UINT8_C(115), UINT8_C( 2), UINT8_C( 73), UINT8_C(254), UINT8_C(119), UINT8_C( 72), UINT8_C(108), UINT8_C( 90), UINT8_C( 96), UINT8_C( 56), UINT8_C(182), UINT8_C( 64), UINT8_C( 8), UINT8_C(172), UINT8_C(169), UINT8_C( 32), UINT8_C( 75), UINT8_C( 92), UINT8_C(114), UINT8_C( 16), UINT8_C( 36), UINT8_C(161), UINT8_C( 46), UINT8_C(214), UINT8_C(247), UINT8_C(176), UINT8_C( 43), UINT8_C(168), UINT8_C(161), UINT8_C(148), UINT8_C( 77), UINT8_C(231), UINT8_C(174), UINT8_C( 75), UINT8_C( 48), UINT8_C(123), UINT8_C(251), UINT8_C(221), UINT8_C(100), UINT8_C( 39), UINT8_C(159), UINT8_C( 61), UINT8_C( 37), UINT8_C(160), UINT8_C(166), UINT8_C( 87), UINT8_C(103)), simde_x_mm512_set_epu8(UINT8_C(206), UINT8_C(206), UINT8_C( 30), UINT8_C( 15), UINT8_C(114), UINT8_C(103), UINT8_C( 13), UINT8_C(110), UINT8_C(166), UINT8_C( 46), UINT8_C(130), UINT8_C(151), UINT8_C( 16), UINT8_C( 40), UINT8_C(154), UINT8_C( 67), UINT8_C(184), UINT8_C( 37), UINT8_C(207), UINT8_C( 94), UINT8_C( 79), UINT8_C(202), UINT8_C(122), UINT8_C(203), UINT8_C(179), UINT8_C(164), UINT8_C( 37), UINT8_C(173), UINT8_C( 95), UINT8_C(170), UINT8_C( 47), UINT8_C(161), UINT8_C(255), UINT8_C(136), UINT8_C(163), UINT8_C(119), UINT8_C( 29), UINT8_C(148), UINT8_C(134), UINT8_C(185), UINT8_C( 0), UINT8_C(213), UINT8_C( 48), UINT8_C(175), UINT8_C( 77), UINT8_C(129), UINT8_C( 81), UINT8_C( 81), UINT8_C(222), UINT8_C(184), UINT8_C( 39), UINT8_C(145), UINT8_C(167), UINT8_C( 0), UINT8_C( 35), UINT8_C( 55), UINT8_C(255), UINT8_C(129), UINT8_C(219), UINT8_C(196), UINT8_C( 18), UINT8_C( 0), UINT8_C(231), UINT8_C(243)) }, { simde_x_mm512_set_epu8(UINT8_C(215), UINT8_C( 43), UINT8_C(249), UINT8_C(194), UINT8_C(157), UINT8_C(199), UINT8_C( 98), UINT8_C(233), UINT8_C( 27), UINT8_C(181), UINT8_C(227), UINT8_C( 75), UINT8_C( 73), UINT8_C( 96), UINT8_C(149), UINT8_C(223), UINT8_C(104), UINT8_C( 32), UINT8_C( 22), UINT8_C(168), UINT8_C( 89), UINT8_C( 99), UINT8_C(164), UINT8_C(153), UINT8_C( 65), UINT8_C(170), UINT8_C( 53), UINT8_C( 47), UINT8_C(191), UINT8_C(169), UINT8_C( 40), UINT8_C( 76), UINT8_C(114), UINT8_C(220), UINT8_C( 85), UINT8_C(210), UINT8_C(141), UINT8_C( 16), UINT8_C( 66), UINT8_C(216), UINT8_C(211), UINT8_C(107), UINT8_C(217), UINT8_C(119), UINT8_C(250), UINT8_C( 64), UINT8_C(254), UINT8_C( 91), UINT8_C( 35), UINT8_C(245), UINT8_C(193), UINT8_C( 56), UINT8_C(248), UINT8_C(186), UINT8_C( 68), UINT8_C(150), UINT8_C(220), UINT8_C(248), UINT8_C(101), UINT8_C( 0), UINT8_C(116), UINT8_C(116), UINT8_C( 64), UINT8_C(175)), UINT64_C( 4015290579), simde_x_mm512_set_epu8(UINT8_C( 21), UINT8_C(153), UINT8_C(252), UINT8_C(180), UINT8_C(184), UINT8_C( 70), UINT8_C( 16), UINT8_C(164), UINT8_C( 32), UINT8_C(157), UINT8_C(177), UINT8_C(185), UINT8_C( 75), UINT8_C(213), UINT8_C(118), UINT8_C( 48), UINT8_C(250), UINT8_C(247), UINT8_C(144), UINT8_C(182), UINT8_C( 89), UINT8_C(193), UINT8_C(215), UINT8_C(109), UINT8_C( 32), UINT8_C( 44), UINT8_C( 30), UINT8_C(115), UINT8_C(122), UINT8_C(173), UINT8_C( 22), UINT8_C(177), UINT8_C(163), UINT8_C(190), UINT8_C(161), UINT8_C(157), UINT8_C(186), UINT8_C(207), UINT8_C(230), UINT8_C( 94), UINT8_C(188), UINT8_C( 54), UINT8_C(164), UINT8_C(150), UINT8_C( 11), UINT8_C( 80), UINT8_C(167), UINT8_C(171), UINT8_C( 7), UINT8_C(177), UINT8_C(223), UINT8_C(132), UINT8_C(186), UINT8_C( 43), UINT8_C( 15), UINT8_C(205), UINT8_C( 56), UINT8_C(209), UINT8_C( 90), UINT8_C(226), UINT8_C(128), UINT8_C( 65), UINT8_C( 94), UINT8_C(244)), simde_x_mm512_set_epu8(UINT8_C( 31), UINT8_C( 58), UINT8_C(132), UINT8_C( 37), UINT8_C(154), UINT8_C( 6), UINT8_C( 75), UINT8_C(156), UINT8_C(149), UINT8_C(144), UINT8_C(181), UINT8_C(171), UINT8_C(160), UINT8_C( 77), UINT8_C(155), UINT8_C(252), UINT8_C(181), UINT8_C(149), UINT8_C( 69), UINT8_C( 75), UINT8_C(202), UINT8_C( 32), UINT8_C( 21), UINT8_C( 82), UINT8_C(184), UINT8_C(152), UINT8_C(217), UINT8_C( 20), UINT8_C( 41), UINT8_C( 41), UINT8_C( 1), UINT8_C( 97), UINT8_C(165), UINT8_C(111), UINT8_C(131), UINT8_C( 21), UINT8_C( 63), UINT8_C( 20), UINT8_C(145), UINT8_C(147), UINT8_C( 66), UINT8_C( 29), UINT8_C(208), UINT8_C( 15), UINT8_C(209), UINT8_C(114), UINT8_C(194), UINT8_C( 3), UINT8_C( 96), UINT8_C(111), UINT8_C( 93), UINT8_C(100), UINT8_C(124), UINT8_C( 70), UINT8_C(116), UINT8_C(210), UINT8_C( 69), UINT8_C(255), UINT8_C(157), UINT8_C(160), UINT8_C( 53), UINT8_C( 12), UINT8_C(237), UINT8_C(134)), simde_x_mm512_set_epu8(UINT8_C(215), UINT8_C( 43), UINT8_C(249), UINT8_C(194), UINT8_C(157), UINT8_C(199), UINT8_C( 98), UINT8_C(233), UINT8_C( 27), UINT8_C(181), UINT8_C(227), UINT8_C( 75), UINT8_C( 73), UINT8_C( 96), UINT8_C(149), UINT8_C(223), UINT8_C(104), UINT8_C( 32), UINT8_C( 22), UINT8_C(168), UINT8_C( 89), UINT8_C( 99), UINT8_C(164), UINT8_C(153), UINT8_C( 65), UINT8_C(170), UINT8_C( 53), UINT8_C( 47), UINT8_C(191), UINT8_C(169), UINT8_C( 40), UINT8_C( 76), UINT8_C( 0), UINT8_C( 79), UINT8_C( 30), UINT8_C(210), UINT8_C(123), UINT8_C(187), UINT8_C( 85), UINT8_C( 0), UINT8_C(211), UINT8_C( 25), UINT8_C(217), UINT8_C(135), UINT8_C(250), UINT8_C( 0), UINT8_C(254), UINT8_C( 91), UINT8_C( 35), UINT8_C( 66), UINT8_C(130), UINT8_C( 32), UINT8_C( 62), UINT8_C(186), UINT8_C( 68), UINT8_C(150), UINT8_C( 0), UINT8_C( 0), UINT8_C(101), UINT8_C( 66), UINT8_C(116), UINT8_C(116), UINT8_C( 0), UINT8_C(110)) }, { simde_x_mm512_set_epu8(UINT8_C(181), UINT8_C( 56), UINT8_C(192), UINT8_C(150), UINT8_C( 44), UINT8_C(209), UINT8_C( 13), UINT8_C(238), UINT8_C(199), UINT8_C(135), UINT8_C(144), UINT8_C( 8), UINT8_C( 13), UINT8_C(143), UINT8_C(242), UINT8_C(219), UINT8_C(103), UINT8_C(252), UINT8_C(214), UINT8_C(167), UINT8_C( 3), UINT8_C(136), UINT8_C(222), UINT8_C(241), UINT8_C( 7), UINT8_C( 92), UINT8_C(141), UINT8_C(140), UINT8_C(104), UINT8_C(125), UINT8_C(212), UINT8_C(114), UINT8_C(114), UINT8_C(229), UINT8_C(235), UINT8_C(144), UINT8_C(233), UINT8_C(188), UINT8_C( 95), UINT8_C(193), UINT8_C( 27), UINT8_C(148), UINT8_C( 89), UINT8_C(138), UINT8_C(232), UINT8_C(105), UINT8_C(208), UINT8_C(244), UINT8_C( 62), UINT8_C(166), UINT8_C(227), UINT8_C( 87), UINT8_C(254), UINT8_C( 17), UINT8_C(235), UINT8_C(147), UINT8_C(215), UINT8_C( 8), UINT8_C(107), UINT8_C( 75), UINT8_C(237), UINT8_C(121), UINT8_C( 75), UINT8_C( 80)), UINT64_C( 3983161372), simde_x_mm512_set_epu8(UINT8_C(105), UINT8_C(165), UINT8_C(196), UINT8_C(175), UINT8_C( 21), UINT8_C( 33), UINT8_C( 59), UINT8_C( 91), UINT8_C(173), UINT8_C( 84), UINT8_C(102), UINT8_C(234), UINT8_C(240), UINT8_C( 97), UINT8_C( 44), UINT8_C(117), UINT8_C(253), UINT8_C( 63), UINT8_C(177), UINT8_C( 7), UINT8_C( 84), UINT8_C(101), UINT8_C( 54), UINT8_C(237), UINT8_C(110), UINT8_C( 66), UINT8_C( 52), UINT8_C(158), UINT8_C(218), UINT8_C(146), UINT8_C(129), UINT8_C( 7), UINT8_C(170), UINT8_C( 34), UINT8_C( 11), UINT8_C( 33), UINT8_C(109), UINT8_C( 75), UINT8_C(249), UINT8_C(174), UINT8_C( 89), UINT8_C(221), UINT8_C(115), UINT8_C(113), UINT8_C(179), UINT8_C( 55), UINT8_C(200), UINT8_C( 84), UINT8_C(219), UINT8_C(209), UINT8_C(106), UINT8_C(223), UINT8_C(220), UINT8_C(150), UINT8_C(243), UINT8_C( 88), UINT8_C( 23), UINT8_C( 59), UINT8_C(210), UINT8_C(197), UINT8_C( 39), UINT8_C(187), UINT8_C( 89), UINT8_C( 99)), simde_x_mm512_set_epu8(UINT8_C(202), UINT8_C( 54), UINT8_C( 42), UINT8_C(168), UINT8_C( 22), UINT8_C(221), UINT8_C( 17), UINT8_C(189), UINT8_C( 39), UINT8_C(155), UINT8_C(135), UINT8_C(162), UINT8_C(148), UINT8_C(181), UINT8_C(120), UINT8_C( 52), UINT8_C(206), UINT8_C( 24), UINT8_C(206), UINT8_C(117), UINT8_C(240), UINT8_C( 17), UINT8_C(210), UINT8_C(188), UINT8_C(193), UINT8_C(112), UINT8_C( 80), UINT8_C(199), UINT8_C(136), UINT8_C( 33), UINT8_C( 97), UINT8_C(247), UINT8_C(161), UINT8_C( 97), UINT8_C( 42), UINT8_C(181), UINT8_C( 25), UINT8_C( 62), UINT8_C( 47), UINT8_C( 32), UINT8_C( 10), UINT8_C(145), UINT8_C( 81), UINT8_C( 51), UINT8_C(106), UINT8_C( 74), UINT8_C(253), UINT8_C( 27), UINT8_C(198), UINT8_C( 49), UINT8_C(205), UINT8_C(189), UINT8_C( 32), UINT8_C(168), UINT8_C(102), UINT8_C( 88), UINT8_C( 86), UINT8_C(170), UINT8_C(185), UINT8_C( 98), UINT8_C(232), UINT8_C( 43), UINT8_C( 26), UINT8_C(165)), simde_x_mm512_set_epu8(UINT8_C(181), UINT8_C( 56), UINT8_C(192), UINT8_C(150), UINT8_C( 44), UINT8_C(209), UINT8_C( 13), UINT8_C(238), UINT8_C(199), UINT8_C(135), UINT8_C(144), UINT8_C( 8), UINT8_C( 13), UINT8_C(143), UINT8_C(242), UINT8_C(219), UINT8_C(103), UINT8_C(252), UINT8_C(214), UINT8_C(167), UINT8_C( 3), UINT8_C(136), UINT8_C(222), UINT8_C(241), UINT8_C( 7), UINT8_C( 92), UINT8_C(141), UINT8_C(140), UINT8_C(104), UINT8_C(125), UINT8_C(212), UINT8_C(114), UINT8_C( 9), UINT8_C( 0), UINT8_C( 0), UINT8_C(144), UINT8_C( 84), UINT8_C( 13), UINT8_C( 95), UINT8_C(142), UINT8_C( 27), UINT8_C( 76), UINT8_C( 34), UINT8_C(138), UINT8_C( 73), UINT8_C(105), UINT8_C( 0), UINT8_C(244), UINT8_C( 62), UINT8_C(166), UINT8_C( 0), UINT8_C( 34), UINT8_C(188), UINT8_C( 17), UINT8_C(235), UINT8_C(147), UINT8_C(215), UINT8_C( 8), UINT8_C(107), UINT8_C( 99), UINT8_C( 0), UINT8_C(144), UINT8_C( 75), UINT8_C( 80)) }, { simde_x_mm512_set_epu8(UINT8_C( 98), UINT8_C( 81), UINT8_C(150), UINT8_C(221), UINT8_C( 96), UINT8_C( 40), UINT8_C(142), UINT8_C( 23), UINT8_C( 87), UINT8_C(207), UINT8_C(191), UINT8_C( 92), UINT8_C( 77), UINT8_C(157), UINT8_C(184), UINT8_C(138), UINT8_C(172), UINT8_C( 66), UINT8_C( 9), UINT8_C(115), UINT8_C(240), UINT8_C(175), UINT8_C(107), UINT8_C( 49), UINT8_C( 50), UINT8_C(243), UINT8_C( 59), UINT8_C( 91), UINT8_C( 0), UINT8_C( 76), UINT8_C( 9), UINT8_C( 2), UINT8_C( 15), UINT8_C( 39), UINT8_C(150), UINT8_C( 31), UINT8_C( 12), UINT8_C(247), UINT8_C(212), UINT8_C( 40), UINT8_C(158), UINT8_C( 81), UINT8_C(244), UINT8_C( 20), UINT8_C(204), UINT8_C( 92), UINT8_C(198), UINT8_C(205), UINT8_C( 11), UINT8_C( 60), UINT8_C(191), UINT8_C( 47), UINT8_C( 22), UINT8_C(151), UINT8_C(149), UINT8_C( 16), UINT8_C(247), UINT8_C(164), UINT8_C( 21), UINT8_C( 75), UINT8_C(153), UINT8_C(240), UINT8_C(253), UINT8_C(242)), UINT64_C( 56397128), simde_x_mm512_set_epu8(UINT8_C(183), UINT8_C(245), UINT8_C(114), UINT8_C(158), UINT8_C( 37), UINT8_C( 8), UINT8_C(173), UINT8_C( 26), UINT8_C(128), UINT8_C( 46), UINT8_C(239), UINT8_C(202), UINT8_C( 36), UINT8_C(170), UINT8_C( 57), UINT8_C( 95), UINT8_C(183), UINT8_C(182), UINT8_C( 41), UINT8_C(122), UINT8_C(112), UINT8_C(137), UINT8_C(112), UINT8_C( 40), UINT8_C(142), UINT8_C( 91), UINT8_C( 37), UINT8_C( 25), UINT8_C(202), UINT8_C(195), UINT8_C(221), UINT8_C(250), UINT8_C(146), UINT8_C(239), UINT8_C( 69), UINT8_C(186), UINT8_C(170), UINT8_C(230), UINT8_C(161), UINT8_C(178), UINT8_C( 40), UINT8_C(125), UINT8_C(237), UINT8_C(124), UINT8_C(248), UINT8_C( 56), UINT8_C( 8), UINT8_C( 55), UINT8_C(249), UINT8_C(227), UINT8_C(149), UINT8_C(232), UINT8_C( 71), UINT8_C(244), UINT8_C(128), UINT8_C( 10), UINT8_C(194), UINT8_C(169), UINT8_C(169), UINT8_C( 87), UINT8_C(140), UINT8_C(131), UINT8_C( 55), UINT8_C(150)), simde_x_mm512_set_epu8(UINT8_C(110), UINT8_C( 33), UINT8_C(123), UINT8_C(125), UINT8_C( 28), UINT8_C(250), UINT8_C(180), UINT8_C(218), UINT8_C(138), UINT8_C(236), UINT8_C( 38), UINT8_C(136), UINT8_C(156), UINT8_C(154), UINT8_C(163), UINT8_C(128), UINT8_C( 3), UINT8_C( 20), UINT8_C(100), UINT8_C(126), UINT8_C(164), UINT8_C( 42), UINT8_C(151), UINT8_C(206), UINT8_C( 0), UINT8_C( 4), UINT8_C(235), UINT8_C(224), UINT8_C( 89), UINT8_C(213), UINT8_C( 61), UINT8_C(138), UINT8_C( 77), UINT8_C( 74), UINT8_C( 61), UINT8_C(173), UINT8_C( 87), UINT8_C(142), UINT8_C( 26), UINT8_C(213), UINT8_C( 90), UINT8_C(221), UINT8_C( 14), UINT8_C( 55), UINT8_C(246), UINT8_C(136), UINT8_C(230), UINT8_C(198), UINT8_C(193), UINT8_C(138), UINT8_C(163), UINT8_C( 46), UINT8_C( 13), UINT8_C(134), UINT8_C(202), UINT8_C( 58), UINT8_C( 24), UINT8_C(220), UINT8_C( 93), UINT8_C(159), UINT8_C(158), UINT8_C(228), UINT8_C( 77), UINT8_C( 48)), simde_x_mm512_set_epu8(UINT8_C( 98), UINT8_C( 81), UINT8_C(150), UINT8_C(221), UINT8_C( 96), UINT8_C( 40), UINT8_C(142), UINT8_C( 23), UINT8_C( 87), UINT8_C(207), UINT8_C(191), UINT8_C( 92), UINT8_C( 77), UINT8_C(157), UINT8_C(184), UINT8_C(138), UINT8_C(172), UINT8_C( 66), UINT8_C( 9), UINT8_C(115), UINT8_C(240), UINT8_C(175), UINT8_C(107), UINT8_C( 49), UINT8_C( 50), UINT8_C(243), UINT8_C( 59), UINT8_C( 91), UINT8_C( 0), UINT8_C( 76), UINT8_C( 9), UINT8_C( 2), UINT8_C( 15), UINT8_C( 39), UINT8_C(150), UINT8_C( 31), UINT8_C( 12), UINT8_C(247), UINT8_C(135), UINT8_C( 0), UINT8_C(158), UINT8_C( 0), UINT8_C(244), UINT8_C( 69), UINT8_C( 2), UINT8_C( 0), UINT8_C(198), UINT8_C(205), UINT8_C( 56), UINT8_C( 60), UINT8_C(191), UINT8_C( 47), UINT8_C( 58), UINT8_C(110), UINT8_C(149), UINT8_C( 0), UINT8_C(247), UINT8_C( 0), UINT8_C( 21), UINT8_C( 75), UINT8_C( 0), UINT8_C(240), UINT8_C(253), UINT8_C(242)) }, { simde_x_mm512_set_epu8(UINT8_C( 53), UINT8_C(227), UINT8_C(166), UINT8_C(167), UINT8_C(220), UINT8_C( 62), UINT8_C(127), UINT8_C(188), UINT8_C(162), UINT8_C( 93), UINT8_C( 87), UINT8_C( 20), UINT8_C(253), UINT8_C(151), UINT8_C(167), UINT8_C( 97), UINT8_C(141), UINT8_C( 56), UINT8_C( 46), UINT8_C(112), UINT8_C(183), UINT8_C(242), UINT8_C(234), UINT8_C(225), UINT8_C( 17), UINT8_C(244), UINT8_C(208), UINT8_C(134), UINT8_C( 62), UINT8_C(227), UINT8_C(122), UINT8_C(145), UINT8_C( 35), UINT8_C(116), UINT8_C( 86), UINT8_C( 57), UINT8_C( 43), UINT8_C( 6), UINT8_C( 33), UINT8_C( 4), UINT8_C(148), UINT8_C( 62), UINT8_C( 78), UINT8_C(134), UINT8_C( 59), UINT8_C(194), UINT8_C(251), UINT8_C(176), UINT8_C(131), UINT8_C( 3), UINT8_C(158), UINT8_C( 64), UINT8_C( 7), UINT8_C( 23), UINT8_C(141), UINT8_C( 30), UINT8_C( 80), UINT8_C(118), UINT8_C(234), UINT8_C(166), UINT8_C(190), UINT8_C(141), UINT8_C( 8), UINT8_C(147)), UINT64_C( 22184990), simde_x_mm512_set_epu8(UINT8_C( 84), UINT8_C( 30), UINT8_C(240), UINT8_C( 3), UINT8_C( 77), UINT8_C(250), UINT8_C(105), UINT8_C( 84), UINT8_C( 36), UINT8_C(146), UINT8_C( 3), UINT8_C( 91), UINT8_C( 21), UINT8_C( 50), UINT8_C( 15), UINT8_C(197), UINT8_C(184), UINT8_C(164), UINT8_C(243), UINT8_C( 49), UINT8_C(139), UINT8_C(155), UINT8_C( 62), UINT8_C(216), UINT8_C(245), UINT8_C( 40), UINT8_C( 99), UINT8_C(208), UINT8_C(124), UINT8_C(170), UINT8_C(118), UINT8_C(243), UINT8_C(134), UINT8_C( 41), UINT8_C(113), UINT8_C( 32), UINT8_C(248), UINT8_C( 57), UINT8_C( 20), UINT8_C(118), UINT8_C(143), UINT8_C(142), UINT8_C(167), UINT8_C(111), UINT8_C( 61), UINT8_C(150), UINT8_C( 44), UINT8_C(181), UINT8_C(206), UINT8_C(236), UINT8_C( 29), UINT8_C(231), UINT8_C(142), UINT8_C( 18), UINT8_C( 45), UINT8_C(166), UINT8_C( 68), UINT8_C(228), UINT8_C( 11), UINT8_C( 98), UINT8_C(196), UINT8_C(139), UINT8_C( 34), UINT8_C( 25)), simde_x_mm512_set_epu8(UINT8_C( 5), UINT8_C( 56), UINT8_C(136), UINT8_C(206), UINT8_C(185), UINT8_C( 86), UINT8_C(170), UINT8_C(156), UINT8_C( 56), UINT8_C( 24), UINT8_C(243), UINT8_C( 74), UINT8_C( 28), UINT8_C( 35), UINT8_C(114), UINT8_C( 4), UINT8_C( 15), UINT8_C(140), UINT8_C(233), UINT8_C(248), UINT8_C(211), UINT8_C(113), UINT8_C(132), UINT8_C( 79), UINT8_C( 67), UINT8_C( 31), UINT8_C(151), UINT8_C(212), UINT8_C( 77), UINT8_C(114), UINT8_C(132), UINT8_C(196), UINT8_C(210), UINT8_C(113), UINT8_C(221), UINT8_C(195), UINT8_C( 83), UINT8_C(114), UINT8_C(111), UINT8_C(219), UINT8_C(131), UINT8_C(244), UINT8_C(252), UINT8_C(192), UINT8_C(162), UINT8_C(101), UINT8_C(212), UINT8_C( 52), UINT8_C( 99), UINT8_C(238), UINT8_C( 29), UINT8_C(150), UINT8_C(119), UINT8_C( 43), UINT8_C( 64), UINT8_C(199), UINT8_C(174), UINT8_C( 41), UINT8_C( 78), UINT8_C( 66), UINT8_C(152), UINT8_C(112), UINT8_C( 89), UINT8_C(237)), simde_x_mm512_set_epu8(UINT8_C( 53), UINT8_C(227), UINT8_C(166), UINT8_C(167), UINT8_C(220), UINT8_C( 62), UINT8_C(127), UINT8_C(188), UINT8_C(162), UINT8_C( 93), UINT8_C( 87), UINT8_C( 20), UINT8_C(253), UINT8_C(151), UINT8_C(167), UINT8_C( 97), UINT8_C(141), UINT8_C( 56), UINT8_C( 46), UINT8_C(112), UINT8_C(183), UINT8_C(242), UINT8_C(234), UINT8_C(225), UINT8_C( 17), UINT8_C(244), UINT8_C(208), UINT8_C(134), UINT8_C( 62), UINT8_C(227), UINT8_C(122), UINT8_C(145), UINT8_C( 35), UINT8_C(116), UINT8_C( 86), UINT8_C( 57), UINT8_C( 43), UINT8_C( 6), UINT8_C( 33), UINT8_C( 0), UINT8_C(148), UINT8_C( 0), UINT8_C( 78), UINT8_C( 0), UINT8_C( 59), UINT8_C(194), UINT8_C( 0), UINT8_C(176), UINT8_C(107), UINT8_C( 3), UINT8_C(158), UINT8_C( 64), UINT8_C( 7), UINT8_C( 0), UINT8_C(141), UINT8_C( 30), UINT8_C( 80), UINT8_C(118), UINT8_C(234), UINT8_C( 32), UINT8_C( 44), UINT8_C( 27), UINT8_C( 0), UINT8_C(147)) }, { simde_x_mm512_set_epu8(UINT8_C(235), UINT8_C(117), UINT8_C(121), UINT8_C(105), UINT8_C( 64), UINT8_C( 2), UINT8_C( 13), UINT8_C(246), UINT8_C(161), UINT8_C(246), UINT8_C(247), UINT8_C( 22), UINT8_C(159), UINT8_C( 42), UINT8_C(201), UINT8_C( 79), UINT8_C( 21), UINT8_C(172), UINT8_C(230), UINT8_C(126), UINT8_C(213), UINT8_C(157), UINT8_C(132), UINT8_C(237), UINT8_C(124), UINT8_C(235), UINT8_C(111), UINT8_C( 29), UINT8_C( 89), UINT8_C( 58), UINT8_C(255), UINT8_C(134), UINT8_C(101), UINT8_C( 62), UINT8_C(180), UINT8_C(233), UINT8_C(164), UINT8_C(115), UINT8_C(231), UINT8_C( 98), UINT8_C(155), UINT8_C(153), UINT8_C(181), UINT8_C(181), UINT8_C( 69), UINT8_C(210), UINT8_C(151), UINT8_C( 85), UINT8_C( 48), UINT8_C( 82), UINT8_C( 52), UINT8_C( 54), UINT8_C( 55), UINT8_C(158), UINT8_C(113), UINT8_C(247), UINT8_C(228), UINT8_C(233), UINT8_C( 78), UINT8_C( 66), UINT8_C( 93), UINT8_C(202), UINT8_C(157), UINT8_C(212)), UINT64_C( 2103571959), simde_x_mm512_set_epu8(UINT8_C(144), UINT8_C( 21), UINT8_C(157), UINT8_C( 81), UINT8_C(156), UINT8_C(133), UINT8_C(103), UINT8_C(155), UINT8_C(182), UINT8_C(168), UINT8_C(151), UINT8_C(201), UINT8_C( 69), UINT8_C(193), UINT8_C(130), UINT8_C( 74), UINT8_C(226), UINT8_C(223), UINT8_C(134), UINT8_C( 37), UINT8_C( 29), UINT8_C( 47), UINT8_C( 30), UINT8_C(250), UINT8_C(230), UINT8_C( 55), UINT8_C( 98), UINT8_C( 11), UINT8_C(116), UINT8_C( 66), UINT8_C(237), UINT8_C(241), UINT8_C(184), UINT8_C(235), UINT8_C(101), UINT8_C( 24), UINT8_C(135), UINT8_C( 72), UINT8_C(219), UINT8_C(214), UINT8_C( 61), UINT8_C(252), UINT8_C(209), UINT8_C(153), UINT8_C( 26), UINT8_C( 69), UINT8_C(254), UINT8_C( 81), UINT8_C(112), UINT8_C( 1), UINT8_C( 13), UINT8_C(163), UINT8_C( 13), UINT8_C( 42), UINT8_C(243), UINT8_C(185), UINT8_C( 74), UINT8_C( 36), UINT8_C(112), UINT8_C(104), UINT8_C( 13), UINT8_C(101), UINT8_C( 7), UINT8_C( 51)), simde_x_mm512_set_epu8(UINT8_C(133), UINT8_C(195), UINT8_C( 74), UINT8_C( 77), UINT8_C(127), UINT8_C(119), UINT8_C(202), UINT8_C( 85), UINT8_C(236), UINT8_C( 29), UINT8_C(224), UINT8_C( 26), UINT8_C(102), UINT8_C(226), UINT8_C(128), UINT8_C(173), UINT8_C(247), UINT8_C(167), UINT8_C(253), UINT8_C(107), UINT8_C(195), UINT8_C( 44), UINT8_C(250), UINT8_C( 78), UINT8_C( 22), UINT8_C(248), UINT8_C( 49), UINT8_C(127), UINT8_C(226), UINT8_C(177), UINT8_C(135), UINT8_C( 20), UINT8_C(199), UINT8_C(239), UINT8_C(100), UINT8_C(134), UINT8_C(164), UINT8_C(130), UINT8_C(104), UINT8_C(154), UINT8_C( 70), UINT8_C(141), UINT8_C(181), UINT8_C(104), UINT8_C(215), UINT8_C( 61), UINT8_C(212), UINT8_C( 26), UINT8_C(110), UINT8_C(215), UINT8_C(220), UINT8_C(161), UINT8_C( 37), UINT8_C( 95), UINT8_C(129), UINT8_C(178), UINT8_C(134), UINT8_C( 99), UINT8_C(129), UINT8_C( 29), UINT8_C(214), UINT8_C(225), UINT8_C(121), UINT8_C(165)), simde_x_mm512_set_epu8(UINT8_C(235), UINT8_C(117), UINT8_C(121), UINT8_C(105), UINT8_C( 64), UINT8_C( 2), UINT8_C( 13), UINT8_C(246), UINT8_C(161), UINT8_C(246), UINT8_C(247), UINT8_C( 22), UINT8_C(159), UINT8_C( 42), UINT8_C(201), UINT8_C( 79), UINT8_C( 21), UINT8_C(172), UINT8_C(230), UINT8_C(126), UINT8_C(213), UINT8_C(157), UINT8_C(132), UINT8_C(237), UINT8_C(124), UINT8_C(235), UINT8_C(111), UINT8_C( 29), UINT8_C( 89), UINT8_C( 58), UINT8_C(255), UINT8_C(134), UINT8_C(101), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(231), UINT8_C( 60), UINT8_C(155), UINT8_C(111), UINT8_C( 28), UINT8_C(181), UINT8_C( 69), UINT8_C(210), UINT8_C(151), UINT8_C( 55), UINT8_C( 2), UINT8_C( 0), UINT8_C( 0), UINT8_C( 2), UINT8_C( 55), UINT8_C( 0), UINT8_C(113), UINT8_C( 7), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 75), UINT8_C( 93), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_subs_epu8(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_u8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_subs_epu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask64 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { UINT64_C( 1678504309), simde_x_mm512_set_epu8(UINT8_C(188), UINT8_C(227), UINT8_C(248), UINT8_C( 6), UINT8_C(158), UINT8_C(170), UINT8_C( 99), UINT8_C(202), UINT8_C(160), UINT8_C(209), UINT8_C( 53), UINT8_C(241), UINT8_C(169), UINT8_C( 74), UINT8_C(195), UINT8_C(156), UINT8_C(226), UINT8_C(114), UINT8_C(119), UINT8_C(203), UINT8_C( 15), UINT8_C( 80), UINT8_C(207), UINT8_C(120), UINT8_C(164), UINT8_C(171), UINT8_C( 95), UINT8_C(130), UINT8_C(161), UINT8_C( 3), UINT8_C( 82), UINT8_C(197), UINT8_C( 78), UINT8_C(216), UINT8_C(176), UINT8_C(131), UINT8_C(213), UINT8_C( 21), UINT8_C( 48), UINT8_C(162), UINT8_C(135), UINT8_C( 81), UINT8_C( 67), UINT8_C(242), UINT8_C( 97), UINT8_C( 89), UINT8_C(174), UINT8_C( 64), UINT8_C(170), UINT8_C(194), UINT8_C(235), UINT8_C(133), UINT8_C( 33), UINT8_C(160), UINT8_C(236), UINT8_C( 7), UINT8_C(224), UINT8_C( 63), UINT8_C(130), UINT8_C( 28), UINT8_C( 83), UINT8_C(152), UINT8_C(149), UINT8_C(111)), simde_x_mm512_set_epu8(UINT8_C(164), UINT8_C( 45), UINT8_C(193), UINT8_C(111), UINT8_C( 96), UINT8_C(220), UINT8_C( 54), UINT8_C(232), UINT8_C(214), UINT8_C(244), UINT8_C(236), UINT8_C( 74), UINT8_C( 47), UINT8_C(180), UINT8_C(143), UINT8_C(214), UINT8_C( 6), UINT8_C(178), UINT8_C( 35), UINT8_C(129), UINT8_C( 21), UINT8_C(157), UINT8_C( 30), UINT8_C( 29), UINT8_C(226), UINT8_C(222), UINT8_C( 14), UINT8_C(170), UINT8_C(223), UINT8_C(200), UINT8_C( 41), UINT8_C(245), UINT8_C( 7), UINT8_C(251), UINT8_C( 30), UINT8_C(217), UINT8_C( 93), UINT8_C(228), UINT8_C(143), UINT8_C( 9), UINT8_C(221), UINT8_C( 49), UINT8_C(207), UINT8_C( 44), UINT8_C( 52), UINT8_C(108), UINT8_C(124), UINT8_C( 14), UINT8_C(144), UINT8_C(229), UINT8_C(102), UINT8_C( 16), UINT8_C( 61), UINT8_C( 2), UINT8_C(169), UINT8_C( 72), UINT8_C(194), UINT8_C(122), UINT8_C(216), UINT8_C(151), UINT8_C(244), UINT8_C(227), UINT8_C( 5), UINT8_C( 98)), simde_x_mm512_set_epu8(UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(146), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 45), UINT8_C( 0), UINT8_C( 50), UINT8_C( 50), UINT8_C( 26), UINT8_C( 0), UINT8_C(133), UINT8_C(117), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 13)) }, { UINT64_C( 4292756020), simde_x_mm512_set_epu8(UINT8_C(177), UINT8_C(132), UINT8_C(179), UINT8_C(117), UINT8_C(215), UINT8_C( 43), UINT8_C(143), UINT8_C( 75), UINT8_C( 53), UINT8_C( 65), UINT8_C(131), UINT8_C(189), UINT8_C(196), UINT8_C(200), UINT8_C(234), UINT8_C( 92), UINT8_C( 99), UINT8_C(187), UINT8_C(123), UINT8_C(180), UINT8_C(209), UINT8_C(163), UINT8_C( 86), UINT8_C( 26), UINT8_C(192), UINT8_C( 36), UINT8_C(205), UINT8_C(108), UINT8_C(173), UINT8_C( 71), UINT8_C(190), UINT8_C( 46), UINT8_C(173), UINT8_C(156), UINT8_C( 59), UINT8_C(250), UINT8_C( 13), UINT8_C(171), UINT8_C( 1), UINT8_C( 47), UINT8_C(227), UINT8_C( 45), UINT8_C(181), UINT8_C( 45), UINT8_C(116), UINT8_C( 89), UINT8_C(112), UINT8_C(127), UINT8_C( 42), UINT8_C(134), UINT8_C(128), UINT8_C(206), UINT8_C( 87), UINT8_C(206), UINT8_C( 39), UINT8_C( 7), UINT8_C( 48), UINT8_C( 47), UINT8_C( 79), UINT8_C( 32), UINT8_C( 39), UINT8_C(205), UINT8_C(174), UINT8_C(244)), simde_x_mm512_set_epu8(UINT8_C(103), UINT8_C( 74), UINT8_C( 78), UINT8_C(251), UINT8_C( 78), UINT8_C( 92), UINT8_C( 94), UINT8_C(154), UINT8_C(108), UINT8_C( 96), UINT8_C( 9), UINT8_C(145), UINT8_C(131), UINT8_C(204), UINT8_C(151), UINT8_C(200), UINT8_C(172), UINT8_C(193), UINT8_C(248), UINT8_C(227), UINT8_C(159), UINT8_C( 34), UINT8_C( 70), UINT8_C( 14), UINT8_C(229), UINT8_C( 98), UINT8_C( 73), UINT8_C(209), UINT8_C( 59), UINT8_C(209), UINT8_C( 82), UINT8_C( 90), UINT8_C(172), UINT8_C( 10), UINT8_C(185), UINT8_C(194), UINT8_C(184), UINT8_C( 76), UINT8_C(173), UINT8_C( 75), UINT8_C(136), UINT8_C(130), UINT8_C(227), UINT8_C(162), UINT8_C(154), UINT8_C(158), UINT8_C(121), UINT8_C( 19), UINT8_C(211), UINT8_C(197), UINT8_C( 15), UINT8_C( 80), UINT8_C( 75), UINT8_C( 26), UINT8_C( 77), UINT8_C(210), UINT8_C(117), UINT8_C( 85), UINT8_C(204), UINT8_C( 0), UINT8_C(244), UINT8_C( 11), UINT8_C(128), UINT8_C(245)), simde_x_mm512_set_epu8(UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C(146), UINT8_C( 0), UINT8_C( 56), UINT8_C( 0), UINT8_C( 95), UINT8_C( 0), UINT8_C( 0), UINT8_C( 91), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 32), UINT8_C( 0), UINT8_C(194), UINT8_C( 0), UINT8_C( 0)) }, { UINT64_C( 3647935965), simde_x_mm512_set_epu8(UINT8_C(141), UINT8_C( 95), UINT8_C(101), UINT8_C(146), UINT8_C(141), UINT8_C( 37), UINT8_C( 41), UINT8_C(148), UINT8_C(122), UINT8_C(121), UINT8_C( 15), UINT8_C(167), UINT8_C(146), UINT8_C( 1), UINT8_C( 27), UINT8_C( 93), UINT8_C(177), UINT8_C(197), UINT8_C(189), UINT8_C(233), UINT8_C(110), UINT8_C( 47), UINT8_C( 66), UINT8_C( 91), UINT8_C( 52), UINT8_C( 7), UINT8_C(197), UINT8_C( 20), UINT8_C( 34), UINT8_C(208), UINT8_C(127), UINT8_C( 73), UINT8_C(145), UINT8_C( 2), UINT8_C( 27), UINT8_C(235), UINT8_C(109), UINT8_C(152), UINT8_C(220), UINT8_C(143), UINT8_C(189), UINT8_C(222), UINT8_C(102), UINT8_C(126), UINT8_C( 80), UINT8_C( 51), UINT8_C(253), UINT8_C(186), UINT8_C( 51), UINT8_C(206), UINT8_C(118), UINT8_C( 48), UINT8_C( 59), UINT8_C(184), UINT8_C(197), UINT8_C(107), UINT8_C( 52), UINT8_C( 5), UINT8_C(208), UINT8_C(231), UINT8_C(151), UINT8_C(166), UINT8_C(151), UINT8_C( 49)), simde_x_mm512_set_epu8(UINT8_C(255), UINT8_C(129), UINT8_C(219), UINT8_C(221), UINT8_C(186), UINT8_C( 75), UINT8_C(231), UINT8_C(243), UINT8_C( 54), UINT8_C( 10), UINT8_C( 52), UINT8_C(157), UINT8_C( 51), UINT8_C( 63), UINT8_C(190), UINT8_C(105), UINT8_C(159), UINT8_C( 57), UINT8_C(152), UINT8_C( 2), UINT8_C(134), UINT8_C(112), UINT8_C( 29), UINT8_C( 8), UINT8_C(240), UINT8_C( 55), UINT8_C(110), UINT8_C(144), UINT8_C(247), UINT8_C( 78), UINT8_C(254), UINT8_C( 89), UINT8_C(189), UINT8_C(169), UINT8_C(164), UINT8_C(239), UINT8_C(227), UINT8_C(187), UINT8_C( 11), UINT8_C(211), UINT8_C(183), UINT8_C( 9), UINT8_C(204), UINT8_C(138), UINT8_C(223), UINT8_C( 91), UINT8_C(198), UINT8_C( 18), UINT8_C(178), UINT8_C(209), UINT8_C(152), UINT8_C(100), UINT8_C(247), UINT8_C(142), UINT8_C( 53), UINT8_C( 94), UINT8_C(238), UINT8_C(239), UINT8_C(151), UINT8_C(119), UINT8_C(229), UINT8_C( 96), UINT8_C(245), UINT8_C( 0)), simde_x_mm512_set_epu8(UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(213), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 55), UINT8_C(168), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 42), UINT8_C( 0), UINT8_C( 13), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(112), UINT8_C( 0), UINT8_C( 70), UINT8_C( 0), UINT8_C( 49)) }, { UINT64_C( 2805277521), simde_x_mm512_set_epu8(UINT8_C(241), UINT8_C(232), UINT8_C(128), UINT8_C(233), UINT8_C(178), UINT8_C( 6), UINT8_C(154), UINT8_C(184), UINT8_C(219), UINT8_C(217), UINT8_C( 22), UINT8_C( 47), UINT8_C( 81), UINT8_C(179), UINT8_C( 53), UINT8_C( 28), UINT8_C(206), UINT8_C(206), UINT8_C( 30), UINT8_C( 15), UINT8_C(114), UINT8_C(103), UINT8_C( 13), UINT8_C(110), UINT8_C(166), UINT8_C( 46), UINT8_C(130), UINT8_C(151), UINT8_C( 16), UINT8_C( 40), UINT8_C(154), UINT8_C( 67), UINT8_C(184), UINT8_C( 37), UINT8_C(207), UINT8_C( 94), UINT8_C( 79), UINT8_C(202), UINT8_C(122), UINT8_C(203), UINT8_C(179), UINT8_C(164), UINT8_C( 37), UINT8_C(173), UINT8_C( 95), UINT8_C(170), UINT8_C( 47), UINT8_C(161), UINT8_C(255), UINT8_C(185), UINT8_C(163), UINT8_C(181), UINT8_C( 29), UINT8_C(148), UINT8_C(134), UINT8_C( 11), UINT8_C(137), UINT8_C(213), UINT8_C(115), UINT8_C(139), UINT8_C( 77), UINT8_C(129), UINT8_C( 55), UINT8_C( 6)), simde_x_mm512_set_epu8(UINT8_C( 39), UINT8_C(159), UINT8_C( 61), UINT8_C( 37), UINT8_C(160), UINT8_C(166), UINT8_C( 87), UINT8_C(103), UINT8_C(228), UINT8_C( 29), UINT8_C(156), UINT8_C(201), UINT8_C(217), UINT8_C( 60), UINT8_C( 61), UINT8_C( 86), UINT8_C(205), UINT8_C(204), UINT8_C( 26), UINT8_C( 35), UINT8_C(113), UINT8_C( 54), UINT8_C( 44), UINT8_C(150), UINT8_C( 5), UINT8_C(127), UINT8_C(118), UINT8_C(122), UINT8_C( 8), UINT8_C( 76), UINT8_C( 62), UINT8_C( 54), UINT8_C(171), UINT8_C(236), UINT8_C(228), UINT8_C( 23), UINT8_C(167), UINT8_C( 85), UINT8_C(106), UINT8_C(203), UINT8_C( 75), UINT8_C(211), UINT8_C(183), UINT8_C(233), UINT8_C(230), UINT8_C(155), UINT8_C(159), UINT8_C(231), UINT8_C( 53), UINT8_C( 16), UINT8_C(224), UINT8_C(218), UINT8_C(254), UINT8_C(170), UINT8_C(229), UINT8_C(158), UINT8_C(154), UINT8_C(108), UINT8_C(114), UINT8_C(193), UINT8_C(254), UINT8_C( 39), UINT8_C(143), UINT8_C(155)), simde_x_mm512_set_epu8(UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 13), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(117), UINT8_C( 16), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 15), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(105), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0)) }, { UINT64_C( 2080103780), simde_x_mm512_set_epu8(UINT8_C( 35), UINT8_C(245), UINT8_C(193), UINT8_C( 56), UINT8_C(248), UINT8_C(186), UINT8_C( 68), UINT8_C(150), UINT8_C(220), UINT8_C(248), UINT8_C(101), UINT8_C( 0), UINT8_C(116), UINT8_C(116), UINT8_C( 64), UINT8_C(175), UINT8_C(165), UINT8_C(173), UINT8_C(234), UINT8_C(165), UINT8_C(146), UINT8_C(243), UINT8_C( 52), UINT8_C( 89), UINT8_C(227), UINT8_C( 93), UINT8_C(108), UINT8_C( 47), UINT8_C(207), UINT8_C(115), UINT8_C(178), UINT8_C(155), UINT8_C(250), UINT8_C(115), UINT8_C( 2), UINT8_C( 73), UINT8_C(254), UINT8_C(119), UINT8_C( 72), UINT8_C(108), UINT8_C( 90), UINT8_C( 96), UINT8_C( 56), UINT8_C(182), UINT8_C( 64), UINT8_C( 8), UINT8_C(172), UINT8_C(169), UINT8_C( 32), UINT8_C( 75), UINT8_C( 92), UINT8_C(114), UINT8_C( 16), UINT8_C( 36), UINT8_C(161), UINT8_C( 46), UINT8_C(214), UINT8_C(247), UINT8_C(176), UINT8_C( 43), UINT8_C(168), UINT8_C(161), UINT8_C(148), UINT8_C( 77)), simde_x_mm512_set_epu8(UINT8_C( 56), UINT8_C(209), UINT8_C( 90), UINT8_C(226), UINT8_C(128), UINT8_C( 65), UINT8_C( 94), UINT8_C(244), UINT8_C(183), UINT8_C( 61), UINT8_C(186), UINT8_C(254), UINT8_C(239), UINT8_C( 84), UINT8_C(120), UINT8_C(211), UINT8_C(215), UINT8_C( 43), UINT8_C(249), UINT8_C(194), UINT8_C(157), UINT8_C(199), UINT8_C( 98), UINT8_C(233), UINT8_C( 27), UINT8_C(181), UINT8_C(227), UINT8_C( 75), UINT8_C( 73), UINT8_C( 96), UINT8_C(149), UINT8_C(223), UINT8_C(104), UINT8_C( 32), UINT8_C( 22), UINT8_C(168), UINT8_C( 89), UINT8_C( 99), UINT8_C(164), UINT8_C(153), UINT8_C( 65), UINT8_C(170), UINT8_C( 53), UINT8_C( 47), UINT8_C(191), UINT8_C(169), UINT8_C( 40), UINT8_C( 76), UINT8_C(114), UINT8_C(220), UINT8_C( 85), UINT8_C(210), UINT8_C(141), UINT8_C( 16), UINT8_C( 66), UINT8_C(216), UINT8_C(211), UINT8_C(107), UINT8_C(217), UINT8_C(119), UINT8_C(250), UINT8_C( 64), UINT8_C(254), UINT8_C( 91)), simde_x_mm512_set_epu8(UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 83), UINT8_C( 0), UINT8_C( 0), UINT8_C(165), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 25), UINT8_C( 0), UINT8_C( 3), UINT8_C(135), UINT8_C( 0), UINT8_C( 0), UINT8_C(132), UINT8_C( 93), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 20), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(140), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 97), UINT8_C( 0), UINT8_C( 0)) }, { UINT64_C( 3123384269), simde_x_mm512_set_epu8(UINT8_C( 96), UINT8_C(111), UINT8_C( 93), UINT8_C(100), UINT8_C(124), UINT8_C( 70), UINT8_C(116), UINT8_C(210), UINT8_C( 69), UINT8_C(255), UINT8_C(157), UINT8_C(160), UINT8_C( 53), UINT8_C( 12), UINT8_C(237), UINT8_C(134), UINT8_C( 21), UINT8_C(153), UINT8_C(252), UINT8_C(180), UINT8_C(184), UINT8_C( 70), UINT8_C( 16), UINT8_C(164), UINT8_C( 32), UINT8_C(157), UINT8_C(177), UINT8_C(185), UINT8_C( 75), UINT8_C(213), UINT8_C(118), UINT8_C( 48), UINT8_C(250), UINT8_C(247), UINT8_C(144), UINT8_C(182), UINT8_C( 89), UINT8_C(193), UINT8_C(215), UINT8_C(109), UINT8_C( 32), UINT8_C( 44), UINT8_C( 30), UINT8_C(115), UINT8_C(122), UINT8_C(173), UINT8_C( 22), UINT8_C(177), UINT8_C(163), UINT8_C(190), UINT8_C(161), UINT8_C(157), UINT8_C(186), UINT8_C(207), UINT8_C(230), UINT8_C( 94), UINT8_C(188), UINT8_C( 54), UINT8_C(164), UINT8_C(150), UINT8_C( 11), UINT8_C( 80), UINT8_C(167), UINT8_C(171)), simde_x_mm512_set_epu8(UINT8_C( 62), UINT8_C(166), UINT8_C(227), UINT8_C( 87), UINT8_C(254), UINT8_C( 17), UINT8_C(235), UINT8_C(147), UINT8_C(215), UINT8_C( 8), UINT8_C(107), UINT8_C( 75), UINT8_C(237), UINT8_C(121), UINT8_C( 75), UINT8_C( 80), UINT8_C( 31), UINT8_C( 58), UINT8_C(132), UINT8_C( 37), UINT8_C(154), UINT8_C( 6), UINT8_C( 75), UINT8_C(156), UINT8_C(149), UINT8_C(144), UINT8_C(181), UINT8_C(171), UINT8_C(160), UINT8_C( 77), UINT8_C(155), UINT8_C(252), UINT8_C(181), UINT8_C(149), UINT8_C( 69), UINT8_C( 75), UINT8_C(202), UINT8_C( 32), UINT8_C( 21), UINT8_C( 82), UINT8_C(184), UINT8_C(152), UINT8_C(217), UINT8_C( 20), UINT8_C( 41), UINT8_C( 41), UINT8_C( 1), UINT8_C( 97), UINT8_C(165), UINT8_C(111), UINT8_C(131), UINT8_C( 21), UINT8_C( 63), UINT8_C( 20), UINT8_C(145), UINT8_C(147), UINT8_C( 66), UINT8_C( 29), UINT8_C(208), UINT8_C( 15), UINT8_C(209), UINT8_C(114), UINT8_C(194), UINT8_C( 3)), simde_x_mm512_set_epu8(UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 69), UINT8_C( 0), UINT8_C( 75), UINT8_C(107), UINT8_C( 0), UINT8_C( 0), UINT8_C(194), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 81), UINT8_C( 0), UINT8_C( 21), UINT8_C( 80), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(123), UINT8_C(187), UINT8_C( 85), UINT8_C( 0), UINT8_C(122), UINT8_C( 25), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(168)) }, { UINT64_C( 3899248884), simde_x_mm512_set_epu8(UINT8_C(219), UINT8_C(209), UINT8_C(106), UINT8_C(223), UINT8_C(220), UINT8_C(150), UINT8_C(243), UINT8_C( 88), UINT8_C( 23), UINT8_C( 59), UINT8_C(210), UINT8_C(197), UINT8_C( 39), UINT8_C(187), UINT8_C( 89), UINT8_C( 99), UINT8_C( 19), UINT8_C(249), UINT8_C( 10), UINT8_C( 39), UINT8_C(237), UINT8_C(106), UINT8_C( 56), UINT8_C( 28), UINT8_C(181), UINT8_C( 56), UINT8_C(192), UINT8_C(150), UINT8_C( 44), UINT8_C(209), UINT8_C( 13), UINT8_C(238), UINT8_C(199), UINT8_C(135), UINT8_C(144), UINT8_C( 8), UINT8_C( 13), UINT8_C(143), UINT8_C(242), UINT8_C(219), UINT8_C(103), UINT8_C(252), UINT8_C(214), UINT8_C(167), UINT8_C( 3), UINT8_C(136), UINT8_C(222), UINT8_C(241), UINT8_C( 7), UINT8_C( 92), UINT8_C(141), UINT8_C(140), UINT8_C(104), UINT8_C(125), UINT8_C(212), UINT8_C(114), UINT8_C(114), UINT8_C(229), UINT8_C(235), UINT8_C(144), UINT8_C(233), UINT8_C(188), UINT8_C( 95), UINT8_C(193)), simde_x_mm512_set_epu8(UINT8_C(198), UINT8_C( 49), UINT8_C(205), UINT8_C(189), UINT8_C( 32), UINT8_C(168), UINT8_C(102), UINT8_C( 88), UINT8_C( 86), UINT8_C(170), UINT8_C(185), UINT8_C( 98), UINT8_C(232), UINT8_C( 43), UINT8_C( 26), UINT8_C(165), UINT8_C(105), UINT8_C(165), UINT8_C(196), UINT8_C(175), UINT8_C( 21), UINT8_C( 33), UINT8_C( 59), UINT8_C( 91), UINT8_C(173), UINT8_C( 84), UINT8_C(102), UINT8_C(234), UINT8_C(240), UINT8_C( 97), UINT8_C( 44), UINT8_C(117), UINT8_C(253), UINT8_C( 63), UINT8_C(177), UINT8_C( 7), UINT8_C( 84), UINT8_C(101), UINT8_C( 54), UINT8_C(237), UINT8_C(110), UINT8_C( 66), UINT8_C( 52), UINT8_C(158), UINT8_C(218), UINT8_C(146), UINT8_C(129), UINT8_C( 7), UINT8_C(170), UINT8_C( 34), UINT8_C( 11), UINT8_C( 33), UINT8_C(109), UINT8_C( 75), UINT8_C(249), UINT8_C(174), UINT8_C( 89), UINT8_C(221), UINT8_C(115), UINT8_C(113), UINT8_C(179), UINT8_C( 55), UINT8_C(200), UINT8_C( 84)), simde_x_mm512_set_epu8(UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 72), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(186), UINT8_C(162), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(234), UINT8_C( 0), UINT8_C( 58), UINT8_C( 0), UINT8_C(107), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 25), UINT8_C( 8), UINT8_C(120), UINT8_C( 31), UINT8_C( 0), UINT8_C(133), UINT8_C( 0), UINT8_C( 0)) }, { UINT64_C( 1783299355), simde_x_mm512_set_epu8(UINT8_C(158), UINT8_C( 81), UINT8_C(244), UINT8_C( 20), UINT8_C(204), UINT8_C( 92), UINT8_C(198), UINT8_C(205), UINT8_C( 11), UINT8_C( 60), UINT8_C(191), UINT8_C( 47), UINT8_C( 22), UINT8_C(151), UINT8_C(149), UINT8_C( 16), UINT8_C(247), UINT8_C(164), UINT8_C( 21), UINT8_C( 75), UINT8_C(153), UINT8_C(240), UINT8_C(253), UINT8_C(242), UINT8_C(202), UINT8_C( 54), UINT8_C( 42), UINT8_C(168), UINT8_C( 22), UINT8_C(221), UINT8_C( 17), UINT8_C(189), UINT8_C( 39), UINT8_C(155), UINT8_C(135), UINT8_C(162), UINT8_C(148), UINT8_C(181), UINT8_C(120), UINT8_C( 52), UINT8_C(206), UINT8_C( 24), UINT8_C(206), UINT8_C(117), UINT8_C(240), UINT8_C( 17), UINT8_C(210), UINT8_C(188), UINT8_C(193), UINT8_C(112), UINT8_C( 80), UINT8_C(199), UINT8_C(136), UINT8_C( 33), UINT8_C( 97), UINT8_C(247), UINT8_C(161), UINT8_C( 97), UINT8_C( 42), UINT8_C(181), UINT8_C( 25), UINT8_C( 62), UINT8_C( 47), UINT8_C( 32)), simde_x_mm512_set_epu8(UINT8_C(249), UINT8_C(227), UINT8_C(149), UINT8_C(232), UINT8_C( 71), UINT8_C(244), UINT8_C(128), UINT8_C( 10), UINT8_C(194), UINT8_C(169), UINT8_C(169), UINT8_C( 87), UINT8_C(140), UINT8_C(131), UINT8_C( 55), UINT8_C(150), UINT8_C(104), UINT8_C(180), UINT8_C(190), UINT8_C(178), UINT8_C( 3), UINT8_C( 92), UINT8_C(141), UINT8_C( 72), UINT8_C( 98), UINT8_C( 81), UINT8_C(150), UINT8_C(221), UINT8_C( 96), UINT8_C( 40), UINT8_C(142), UINT8_C( 23), UINT8_C( 87), UINT8_C(207), UINT8_C(191), UINT8_C( 92), UINT8_C( 77), UINT8_C(157), UINT8_C(184), UINT8_C(138), UINT8_C(172), UINT8_C( 66), UINT8_C( 9), UINT8_C(115), UINT8_C(240), UINT8_C(175), UINT8_C(107), UINT8_C( 49), UINT8_C( 50), UINT8_C(243), UINT8_C( 59), UINT8_C( 91), UINT8_C( 0), UINT8_C( 76), UINT8_C( 9), UINT8_C( 2), UINT8_C( 15), UINT8_C( 39), UINT8_C(150), UINT8_C( 31), UINT8_C( 12), UINT8_C(247), UINT8_C(212), UINT8_C( 40)), simde_x_mm512_set_epu8(UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 71), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(103), UINT8_C( 0), UINT8_C(143), UINT8_C( 0), UINT8_C( 21), UINT8_C(108), UINT8_C(136), UINT8_C( 0), UINT8_C( 0), UINT8_C(245), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C(150), UINT8_C( 13), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_subs_epu8(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_u8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_subs_epu16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_x_mm512_set_epu16(UINT16_C( 21075), UINT16_C( 30017), UINT16_C( 53638), UINT16_C( 29710), UINT16_C( 19457), UINT16_C( 52740), UINT16_C( 21427), UINT16_C( 28826), UINT16_C( 25482), UINT16_C( 53693), UINT16_C( 15582), UINT16_C( 20114), UINT16_C( 50775), UINT16_C( 49946), UINT16_C( 61394), UINT16_C( 29932), UINT16_C( 34864), UINT16_C( 6190), UINT16_C( 26590), UINT16_C( 10803), UINT16_C( 48982), UINT16_C( 15816), UINT16_C( 14967), UINT16_C( 24063), UINT16_C( 50823), UINT16_C( 57442), UINT16_C( 48719), UINT16_C( 25507), UINT16_C( 19912), UINT16_C( 45607), UINT16_C( 52932), UINT16_C( 48380)), simde_x_mm512_set_epu16(UINT16_C( 27175), UINT16_C( 38414), UINT16_C( 58622), UINT16_C( 10212), UINT16_C( 13894), UINT16_C( 60916), UINT16_C( 54812), UINT16_C( 62458), UINT16_C( 29698), UINT16_C( 6009), UINT16_C( 28893), UINT16_C( 33802), UINT16_C( 61579), UINT16_C( 6787), UINT16_C( 9325), UINT16_C( 7645), UINT16_C( 63403), UINT16_C( 55903), UINT16_C( 43011), UINT16_C( 1124), UINT16_C( 21781), UINT16_C( 17119), UINT16_C( 46075), UINT16_C( 33402), UINT16_C( 58029), UINT16_C( 19092), UINT16_C( 21408), UINT16_C( 51092), UINT16_C( 22843), UINT16_C( 28625), UINT16_C( 63214), UINT16_C( 5251)), simde_x_mm512_set_epu16(UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 19498), UINT16_C( 5563), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 47684), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 43159), UINT16_C( 52069), UINT16_C( 22287), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 9679), UINT16_C( 27201), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 38350), UINT16_C( 27311), UINT16_C( 0), UINT16_C( 0), UINT16_C( 16982), UINT16_C( 0), UINT16_C( 43129)) }, { simde_x_mm512_set_epu16(UINT16_C( 38028), UINT16_C( 5509), UINT16_C( 52010), UINT16_C( 16909), UINT16_C( 2419), UINT16_C( 22142), UINT16_C( 59427), UINT16_C( 64359), UINT16_C( 9839), UINT16_C( 6329), UINT16_C( 65297), UINT16_C( 49651), UINT16_C( 3666), UINT16_C( 20122), UINT16_C( 63837), UINT16_C( 6503), UINT16_C( 29169), UINT16_C( 60855), UINT16_C( 62823), UINT16_C( 40827), UINT16_C( 7221), UINT16_C( 61818), UINT16_C( 970), UINT16_C( 49978), UINT16_C( 54525), UINT16_C( 54749), UINT16_C( 35566), UINT16_C( 3894), UINT16_C( 39622), UINT16_C( 46778), UINT16_C( 11824), UINT16_C( 56668)), simde_x_mm512_set_epu16(UINT16_C( 1604), UINT16_C( 19874), UINT16_C( 53403), UINT16_C( 63570), UINT16_C( 13041), UINT16_C( 1566), UINT16_C( 53745), UINT16_C( 62111), UINT16_C( 7377), UINT16_C( 42156), UINT16_C( 56287), UINT16_C( 34285), UINT16_C( 14877), UINT16_C( 24009), UINT16_C( 33220), UINT16_C( 8308), UINT16_C( 53811), UINT16_C( 55306), UINT16_C( 1074), UINT16_C( 12341), UINT16_C( 19989), UINT16_C( 16491), UINT16_C( 4144), UINT16_C( 53822), UINT16_C( 19285), UINT16_C( 36338), UINT16_C( 40278), UINT16_C( 36022), UINT16_C( 9755), UINT16_C( 36151), UINT16_C( 42425), UINT16_C( 62124)), simde_x_mm512_set_epu16(UINT16_C( 36424), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 20576), UINT16_C( 5682), UINT16_C( 2248), UINT16_C( 2462), UINT16_C( 0), UINT16_C( 9010), UINT16_C( 15366), UINT16_C( 0), UINT16_C( 0), UINT16_C( 30617), UINT16_C( 0), UINT16_C( 0), UINT16_C( 5549), UINT16_C( 61749), UINT16_C( 28486), UINT16_C( 0), UINT16_C( 45327), UINT16_C( 0), UINT16_C( 0), UINT16_C( 35240), UINT16_C( 18411), UINT16_C( 0), UINT16_C( 0), UINT16_C( 29867), UINT16_C( 10627), UINT16_C( 0), UINT16_C( 0)) }, { simde_x_mm512_set_epu16(UINT16_C( 691), UINT16_C( 60713), UINT16_C( 62283), UINT16_C( 34144), UINT16_C( 43752), UINT16_C( 58796), UINT16_C( 9130), UINT16_C( 47263), UINT16_C( 11275), UINT16_C( 38444), UINT16_C( 90), UINT16_C( 45403), UINT16_C( 30523), UINT16_C( 27008), UINT16_C( 28387), UINT16_C( 17266), UINT16_C( 55759), UINT16_C( 27096), UINT16_C( 57208), UINT16_C( 58724), UINT16_C( 42582), UINT16_C( 61127), UINT16_C( 21734), UINT16_C( 45841), UINT16_C( 53555), UINT16_C( 44341), UINT16_C( 18272), UINT16_C( 28327), UINT16_C( 7123), UINT16_C( 33320), UINT16_C( 24489), UINT16_C( 49868)), simde_x_mm512_set_epu16(UINT16_C( 44159), UINT16_C( 15856), UINT16_C( 7686), UINT16_C( 36968), UINT16_C( 50344), UINT16_C( 55789), UINT16_C( 11300), UINT16_C( 27000), UINT16_C( 58901), UINT16_C( 3626), UINT16_C( 12716), UINT16_C( 34965), UINT16_C( 31697), UINT16_C( 5622), UINT16_C( 24444), UINT16_C( 57310), UINT16_C( 57273), UINT16_C( 2890), UINT16_C( 26732), UINT16_C( 56773), UINT16_C( 51586), UINT16_C( 27415), UINT16_C( 7653), UINT16_C( 31511), UINT16_C( 44454), UINT16_C( 2398), UINT16_C( 23365), UINT16_C( 52633), UINT16_C( 47315), UINT16_C( 4204), UINT16_C( 45083), UINT16_C( 15021)), simde_x_mm512_set_epu16(UINT16_C( 0), UINT16_C( 44857), UINT16_C( 54597), UINT16_C( 0), UINT16_C( 0), UINT16_C( 3007), UINT16_C( 0), UINT16_C( 20263), UINT16_C( 0), UINT16_C( 34818), UINT16_C( 0), UINT16_C( 10438), UINT16_C( 0), UINT16_C( 21386), UINT16_C( 3943), UINT16_C( 0), UINT16_C( 0), UINT16_C( 24206), UINT16_C( 30476), UINT16_C( 1951), UINT16_C( 0), UINT16_C( 33712), UINT16_C( 14081), UINT16_C( 14330), UINT16_C( 9101), UINT16_C( 41943), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 29116), UINT16_C( 0), UINT16_C( 34847)) }, { simde_x_mm512_set_epu16(UINT16_C( 4451), UINT16_C( 62415), UINT16_C( 11648), UINT16_C( 14185), UINT16_C( 57037), UINT16_C( 40857), UINT16_C( 33903), UINT16_C( 19019), UINT16_C( 26210), UINT16_C( 35593), UINT16_C( 46653), UINT16_C( 25468), UINT16_C( 20366), UINT16_C( 4961), UINT16_C( 40068), UINT16_C( 61378), UINT16_C( 6653), UINT16_C( 63816), UINT16_C( 35813), UINT16_C( 51292), UINT16_C( 60619), UINT16_C( 730), UINT16_C( 44859), UINT16_C( 16986), UINT16_C( 9316), UINT16_C( 28795), UINT16_C( 47263), UINT16_C( 36113), UINT16_C( 41862), UINT16_C( 7963), UINT16_C( 28019), UINT16_C( 13728)), simde_x_mm512_set_epu16(UINT16_C( 54766), UINT16_C( 29411), UINT16_C( 30463), UINT16_C( 60634), UINT16_C( 45144), UINT16_C( 37285), UINT16_C( 11448), UINT16_C( 27155), UINT16_C( 53867), UINT16_C( 11820), UINT16_C( 49024), UINT16_C( 10540), UINT16_C( 17477), UINT16_C( 45777), UINT16_C( 28024), UINT16_C( 51105), UINT16_C( 24400), UINT16_C( 57953), UINT16_C( 53407), UINT16_C( 28592), UINT16_C( 34479), UINT16_C( 47445), UINT16_C( 19926), UINT16_C( 36275), UINT16_C( 7501), UINT16_C( 16620), UINT16_C( 6953), UINT16_C( 3437), UINT16_C( 5790), UINT16_C( 5348), UINT16_C( 17145), UINT16_C( 36745)), simde_x_mm512_set_epu16(UINT16_C( 0), UINT16_C( 33004), UINT16_C( 0), UINT16_C( 0), UINT16_C( 11893), UINT16_C( 3572), UINT16_C( 22455), UINT16_C( 0), UINT16_C( 0), UINT16_C( 23773), UINT16_C( 0), UINT16_C( 14928), UINT16_C( 2889), UINT16_C( 0), UINT16_C( 12044), UINT16_C( 10273), UINT16_C( 0), UINT16_C( 5863), UINT16_C( 0), UINT16_C( 22700), UINT16_C( 26140), UINT16_C( 0), UINT16_C( 24933), UINT16_C( 0), UINT16_C( 1815), UINT16_C( 12175), UINT16_C( 40310), UINT16_C( 32676), UINT16_C( 36072), UINT16_C( 2615), UINT16_C( 10874), UINT16_C( 0)) }, { simde_x_mm512_set_epu16(UINT16_C( 33975), UINT16_C( 18949), UINT16_C( 63249), UINT16_C( 45002), UINT16_C( 64479), UINT16_C( 62490), UINT16_C( 22138), UINT16_C( 54505), UINT16_C( 43), UINT16_C( 59270), UINT16_C( 45446), UINT16_C( 43143), UINT16_C( 39490), UINT16_C( 41833), UINT16_C( 28092), UINT16_C( 6346), UINT16_C( 10308), UINT16_C( 572), UINT16_C( 5), UINT16_C( 15306), UINT16_C( 46107), UINT16_C( 59725), UINT16_C( 38116), UINT16_C( 36408), UINT16_C( 51860), UINT16_C( 61863), UINT16_C( 39379), UINT16_C( 19197), UINT16_C( 37943), UINT16_C( 45506), UINT16_C( 3690), UINT16_C( 61686)), simde_x_mm512_set_epu16(UINT16_C( 53628), UINT16_C( 14774), UINT16_C( 5244), UINT16_C( 18107), UINT16_C( 49140), UINT16_C( 31910), UINT16_C( 36671), UINT16_C( 45498), UINT16_C( 46302), UINT16_C( 50428), UINT16_C( 55100), UINT16_C( 19911), UINT16_C( 3330), UINT16_C( 28633), UINT16_C( 10550), UINT16_C( 56178), UINT16_C( 23697), UINT16_C( 19726), UINT16_C( 39129), UINT16_C( 46658), UINT16_C( 4326), UINT16_C( 42894), UINT16_C( 48134), UINT16_C( 16035), UINT16_C( 14223), UINT16_C( 50376), UINT16_C( 56066), UINT16_C( 61784), UINT16_C( 6710), UINT16_C( 21116), UINT16_C( 55957), UINT16_C( 10253)), simde_x_mm512_set_epu16(UINT16_C( 0), UINT16_C( 4175), UINT16_C( 58005), UINT16_C( 26895), UINT16_C( 15339), UINT16_C( 30580), UINT16_C( 0), UINT16_C( 9007), UINT16_C( 0), UINT16_C( 8842), UINT16_C( 0), UINT16_C( 23232), UINT16_C( 36160), UINT16_C( 13200), UINT16_C( 17542), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 41781), UINT16_C( 16831), UINT16_C( 0), UINT16_C( 20373), UINT16_C( 37637), UINT16_C( 11487), UINT16_C( 0), UINT16_C( 0), UINT16_C( 31233), UINT16_C( 24390), UINT16_C( 0), UINT16_C( 51433)) }, { simde_x_mm512_set_epu16(UINT16_C( 1468), UINT16_C( 61147), UINT16_C( 1296), UINT16_C( 37821), UINT16_C( 49916), UINT16_C( 3731), UINT16_C( 58247), UINT16_C( 37833), UINT16_C( 474), UINT16_C( 27447), UINT16_C( 56500), UINT16_C( 9176), UINT16_C( 2726), UINT16_C( 53392), UINT16_C( 63435), UINT16_C( 26907), UINT16_C( 40836), UINT16_C( 1244), UINT16_C( 61609), UINT16_C( 42904), UINT16_C( 58011), UINT16_C( 17743), UINT16_C( 15263), UINT16_C( 61713), UINT16_C( 27307), UINT16_C( 32391), UINT16_C( 42266), UINT16_C( 36235), UINT16_C( 23369), UINT16_C( 50245), UINT16_C( 59696), UINT16_C( 18168)), simde_x_mm512_set_epu16(UINT16_C( 23449), UINT16_C( 17725), UINT16_C( 44617), UINT16_C( 31466), UINT16_C( 31308), UINT16_C( 63353), UINT16_C( 34185), UINT16_C( 33150), UINT16_C( 26890), UINT16_C( 34945), UINT16_C( 52751), UINT16_C( 41898), UINT16_C( 33581), UINT16_C( 55689), UINT16_C( 19108), UINT16_C( 45621), UINT16_C( 4587), UINT16_C( 27034), UINT16_C( 65517), UINT16_C( 28332), UINT16_C( 41747), UINT16_C( 40576), UINT16_C( 59697), UINT16_C( 25722), UINT16_C( 41113), UINT16_C( 15592), UINT16_C( 6092), UINT16_C( 56264), UINT16_C( 52740), UINT16_C( 47873), UINT16_C( 59382), UINT16_C( 23859)), simde_x_mm512_set_epu16(UINT16_C( 0), UINT16_C( 43422), UINT16_C( 0), UINT16_C( 6355), UINT16_C( 18608), UINT16_C( 0), UINT16_C( 24062), UINT16_C( 4683), UINT16_C( 0), UINT16_C( 0), UINT16_C( 3749), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 44327), UINT16_C( 0), UINT16_C( 36249), UINT16_C( 0), UINT16_C( 0), UINT16_C( 14572), UINT16_C( 16264), UINT16_C( 0), UINT16_C( 0), UINT16_C( 35991), UINT16_C( 0), UINT16_C( 16799), UINT16_C( 36174), UINT16_C( 0), UINT16_C( 0), UINT16_C( 2372), UINT16_C( 314), UINT16_C( 0)) }, { simde_x_mm512_set_epu16(UINT16_C( 42795), UINT16_C( 13394), UINT16_C( 56119), UINT16_C( 28906), UINT16_C( 46556), UINT16_C( 57073), UINT16_C( 9174), UINT16_C( 39931), UINT16_C( 547), UINT16_C( 3767), UINT16_C( 52959), UINT16_C( 48990), UINT16_C( 64235), UINT16_C( 58389), UINT16_C( 26281), UINT16_C( 29309), UINT16_C( 29052), UINT16_C( 34694), UINT16_C( 5995), UINT16_C( 6270), UINT16_C( 20539), UINT16_C( 10179), UINT16_C( 38688), UINT16_C( 14327), UINT16_C( 15491), UINT16_C( 18652), UINT16_C( 19903), UINT16_C( 30123), UINT16_C( 25261), UINT16_C( 48076), UINT16_C( 10742), UINT16_C( 60984)), simde_x_mm512_set_epu16(UINT16_C( 5754), UINT16_C( 42498), UINT16_C( 48947), UINT16_C( 41678), UINT16_C( 61715), UINT16_C( 60738), UINT16_C( 30602), UINT16_C( 37004), UINT16_C( 11508), UINT16_C( 7979), UINT16_C( 61659), UINT16_C( 59616), UINT16_C( 41386), UINT16_C( 41040), UINT16_C( 17421), UINT16_C( 63555), UINT16_C( 27523), UINT16_C( 26800), UINT16_C( 25010), UINT16_C( 27339), UINT16_C( 56486), UINT16_C( 19128), UINT16_C( 15279), UINT16_C( 63719), UINT16_C( 51613), UINT16_C( 5129), UINT16_C( 42918), UINT16_C( 27704), UINT16_C( 60753), UINT16_C( 31238), UINT16_C( 35194), UINT16_C( 56682)), simde_x_mm512_set_epu16(UINT16_C( 37041), UINT16_C( 0), UINT16_C( 7172), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 2927), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 22849), UINT16_C( 17349), UINT16_C( 8860), UINT16_C( 0), UINT16_C( 1529), UINT16_C( 7894), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 23409), UINT16_C( 0), UINT16_C( 0), UINT16_C( 13523), UINT16_C( 0), UINT16_C( 2419), UINT16_C( 0), UINT16_C( 16838), UINT16_C( 0), UINT16_C( 4302)) }, { simde_x_mm512_set_epu16(UINT16_C( 27021), UINT16_C( 31131), UINT16_C( 63), UINT16_C( 55006), UINT16_C( 64465), UINT16_C( 34252), UINT16_C( 43748), UINT16_C( 49428), UINT16_C( 50369), UINT16_C( 25422), UINT16_C( 14520), UINT16_C( 51640), UINT16_C( 20399), UINT16_C( 31915), UINT16_C( 49018), UINT16_C( 59334), UINT16_C( 49204), UINT16_C( 62465), UINT16_C( 49892), UINT16_C( 58520), UINT16_C( 13977), UINT16_C( 51690), UINT16_C( 42246), UINT16_C( 63457), UINT16_C( 4753), UINT16_C( 14919), UINT16_C( 47008), UINT16_C( 7420), UINT16_C( 12098), UINT16_C( 31014), UINT16_C( 17813), UINT16_C( 51080)), simde_x_mm512_set_epu16(UINT16_C( 53007), UINT16_C( 61893), UINT16_C( 36710), UINT16_C( 53426), UINT16_C( 8030), UINT16_C( 20316), UINT16_C( 27324), UINT16_C( 24735), UINT16_C( 58762), UINT16_C( 62832), UINT16_C( 33606), UINT16_C( 6874), UINT16_C( 61584), UINT16_C( 2658), UINT16_C( 64632), UINT16_C( 57217), UINT16_C( 49112), UINT16_C( 22778), UINT16_C( 18985), UINT16_C( 10063), UINT16_C( 33785), UINT16_C( 16016), UINT16_C( 35319), UINT16_C( 18364), UINT16_C( 45360), UINT16_C( 60575), UINT16_C( 35960), UINT16_C( 48902), UINT16_C( 57525), UINT16_C( 38426), UINT16_C( 41010), UINT16_C( 54032)), simde_x_mm512_set_epu16(UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 1580), UINT16_C( 56435), UINT16_C( 13936), UINT16_C( 16424), UINT16_C( 24693), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 44766), UINT16_C( 0), UINT16_C( 29257), UINT16_C( 0), UINT16_C( 2117), UINT16_C( 92), UINT16_C( 39687), UINT16_C( 30907), UINT16_C( 48457), UINT16_C( 0), UINT16_C( 35674), UINT16_C( 6927), UINT16_C( 45093), UINT16_C( 0), UINT16_C( 0), UINT16_C( 11048), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_subs_epu16(test_vec[i].a, test_vec[i].b); simde_assert_m512i_u16(r, ==, test_vec[i].r); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_subs_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_subs_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_subs_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_subs_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_subs_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_subs_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_subs_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_subs_epu16) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/test-avx512-footer.h000066400000000000000000000022611400333146700207250ustar00rootroot00000000000000#if defined(SIMDE_TEST_BARE) int main(void) { int retval = EXIT_SUCCESS; fprintf(stdout, "1..%zu\n", (sizeof(test_suite_tests) / sizeof(test_suite_tests[0]))); for (size_t i = 0 ; i < (sizeof(test_suite_tests) / sizeof(test_suite_tests[0])) ; i++) { int res = test_suite_tests[i].func(); if (res != 0) { retval = EXIT_FAILURE; fprintf(stdout, "not ok %zu " HEDLEY_STRINGIFY(SIMDE_TEST_X86_AVX512_INSN) "/%s\n", i + 1, test_suite_tests[i].name); } else { fprintf(stdout, "ok %zu " HEDLEY_STRINGIFY(SIMDE_TEST_X86_AVX512_INSN) "/%s\n", i + 1, test_suite_tests[i].name); } } return retval; } #else #if defined(__cplusplus) static MunitSuite suite = { const_cast("/" HEDLEY_STRINGIFY(SIMDE_TEST_X86_AVX512_INSN)), test_suite_tests, NULL, 1, MUNIT_SUITE_OPTION_NONE }; #else static MunitSuite suite = { (char*) "/" HEDLEY_STRINGIFY(SIMDE_TEST_X86_AVX512_INSN), test_suite_tests, NULL, 1, MUNIT_SUITE_OPTION_NONE }; #endif HEDLEY_C_DECL MunitSuite* SIMDE_TEST_GENERATE_VARIANT_SYMBOL_CURRENT(HEDLEY_CONCAT(simde_test_x86_avx512_get_suite_,SIMDE_TEST_X86_AVX512_INSN)) (void) { return &suite; } #endif simde-0.7.2/test/x86/avx512/test-avx512.h000066400000000000000000000066721400333146700174430ustar00rootroot00000000000000#if !defined(SIMDE_TEST_X86_TEST_AVX512_H) #define SIMDE_TEST_X86_TEST_AVX512_H #include "../test-x86.h" #include "../test-avx.h" #include "../../../simde/x86/avx512/storeu.h" #include "../../../simde/x86/avx512/loadu.h" SIMDE_TEST_X86_GENERATE_FLOAT_TYPE_FUNCS_(__m512, 32, 16, simde_mm512_storeu_ps) SIMDE_TEST_X86_GENERATE_FLOAT_TYPE_FUNCS_(__m512d, 64, 8, simde_mm512_storeu_pd) SIMDE_TEST_X86_GENERATE_INT_TYPE_FUNCS_(__m512i, 8, 64, simde_mm512_storeu_si512) SIMDE_TEST_X86_GENERATE_INT_TYPE_FUNCS_(__m512i, 16, 32, simde_mm512_storeu_si512) SIMDE_TEST_X86_GENERATE_INT_TYPE_FUNCS_(__m512i, 32, 16, simde_mm512_storeu_si512) SIMDE_TEST_X86_GENERATE_INT_TYPE_FUNCS_(__m512i, 64, 8, simde_mm512_storeu_si512) SIMDE_TEST_X86_GENERATE_UINT_TYPE_FUNCS_(__m512i, 8, 64, simde_mm512_storeu_si512) SIMDE_TEST_X86_GENERATE_UINT_TYPE_FUNCS_(__m512i, 16, 32, simde_mm512_storeu_si512) SIMDE_TEST_X86_GENERATE_UINT_TYPE_FUNCS_(__m512i, 32, 16, simde_mm512_storeu_si512) SIMDE_TEST_X86_GENERATE_UINT_TYPE_FUNCS_(__m512i, 64, 8, simde_mm512_storeu_si512) #define SIMDE_TEST_X86_GENERATE_MASK_FUNCS_(EL) \ static simde__mmask##EL \ simde_test_x86_random_mmask##EL(void) { \ return HEDLEY_STATIC_CAST(simde__mmask##EL, simde_test_codegen_random_u##EL()); \ } \ \ static void \ simde_test_x86_write_mmask##EL(int indent, simde__mmask##EL value, SimdeTestVecPos pos) { \ simde_test_codegen_write_u##EL(indent, HEDLEY_STATIC_CAST(uint##EL##_t, value), pos); \ } SIMDE_TEST_X86_GENERATE_MASK_FUNCS_(8) SIMDE_TEST_X86_GENERATE_MASK_FUNCS_(16) SIMDE_TEST_X86_GENERATE_MASK_FUNCS_(32) SIMDE_TEST_X86_GENERATE_MASK_FUNCS_(64) #define simde_test_x86_assert_equal_f32x16(a, b, precision) do { if (simde_test_x86_assert_equal_f32x16_(a, b, 1e-##precision##f, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_f64x8(a, b, precision) do { if (simde_test_x86_assert_equal_f64x8_(a, b, 1e-##precision, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_i8x64(a, b) do { if (simde_test_x86_assert_equal_i8x64_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_i16x32(a, b) do { if (simde_test_x86_assert_equal_i16x32_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_i32x16(a, b) do { if (simde_test_x86_assert_equal_i32x16_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_i64x8(a, b) do { if (simde_test_x86_assert_equal_i64x8_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_u8x64(a, b) do { if (simde_test_x86_assert_equal_u8x64_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_u16x32(a, b) do { if (simde_test_x86_assert_equal_u16x32_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_u32x16(a, b) do { if (simde_test_x86_assert_equal_u32x16_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_u64x8(a, b) do { if (simde_test_x86_assert_equal_u64x8_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #if !defined(SIMDE_TEST_BARE) #define SIMDE_TEST_DECLARE_SUITE(name) SIMDE_TEST_SUITE_DECLARE_GETTERS(HEDLEY_CONCAT(simde_test_x86_avx512_get_suite_,name)) #include "declare-suites.h" #undef SIMDE_TEST_DECLARE_SUITE #endif #endif /* !defined(SIMDE_TEST_X86_TEST_AVX512_H) */ simde-0.7.2/test/x86/avx512/test.c000066400000000000000000002574771400333146700164250ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan * 2020 Christopher Moore */ #define SIMDE_TEST_X86_AVX512_INSN test #include #include #include static int test_simde_mm512_mask_test_epi16_mask (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask32 k1; const int16_t a[32]; const int16_t b[32]; const simde__mmask32 r; } test_vec[] = { { UINT32_C( 729589500), { -INT16_C( 21039), INT16_C( 7594), -INT16_C( 26143), -INT16_C( 3584), INT16_C( 10710), INT16_C( 22134), INT16_C( 11147), -INT16_C( 19843), INT16_C( 21651), INT16_C( 20557), INT16_C( 25768), INT16_C( 18307), -INT16_C( 21839), -INT16_C( 13908), INT16_C( 9876), -INT16_C( 8232), INT16_C( 16084), INT16_C( 8140), -INT16_C( 13132), INT16_C( 2833), -INT16_C( 29249), -INT16_C( 5655), -INT16_C( 24960), -INT16_C( 10165), INT16_C( 11736), -INT16_C( 31959), -INT16_C( 21330), INT16_C( 21460), -INT16_C( 23722), -INT16_C( 23028), -INT16_C( 20057), INT16_C( 31534) }, { INT16_C( 21038), -INT16_C( 7595), INT16_C( 26142), -INT16_C( 24339), -INT16_C( 10711), -INT16_C( 22135), -INT16_C( 11148), INT16_C( 19842), -INT16_C( 21652), INT16_C( 6864), -INT16_C( 25769), -INT16_C( 21127), -INT16_C( 31170), INT16_C( 13907), -INT16_C( 9877), -INT16_C( 26260), -INT16_C( 16085), INT16_C( 18812), INT16_C( 26919), INT16_C( 20969), INT16_C( 29248), -INT16_C( 19206), INT16_C( 31814), -INT16_C( 19711), -INT16_C( 11737), INT16_C( 32461), INT16_C( 18285), -INT16_C( 21461), INT16_C( 32717), INT16_C( 14562), INT16_C( 20056), -INT16_C( 31535) }, UINT32_C( 577536520) }, { UINT32_C( 302971227), { -INT16_C( 13115), -INT16_C( 19071), INT16_C( 12077), INT16_C( 8771), INT16_C( 27746), INT16_C( 13498), -INT16_C( 8099), -INT16_C( 27443), -INT16_C( 2264), -INT16_C( 16926), -INT16_C( 10784), -INT16_C( 22105), -INT16_C( 11549), INT16_C( 9016), -INT16_C( 28981), -INT16_C( 14112), INT16_C( 23332), INT16_C( 15189), -INT16_C( 15727), INT16_C( 13431), -INT16_C( 29429), INT16_C( 26707), INT16_C( 8302), INT16_C( 6606), -INT16_C( 27258), INT16_C( 26398), -INT16_C( 14997), INT16_C( 19984), INT16_C( 32041), INT16_C( 11498), -INT16_C( 13609), INT16_C( 17374) }, { INT16_C( 13114), INT16_C( 19070), -INT16_C( 12078), -INT16_C( 8772), INT16_C( 3933), -INT16_C( 13499), INT16_C( 16687), -INT16_C( 18710), INT16_C( 2263), INT16_C( 16925), INT16_C( 11725), INT16_C( 25744), INT16_C( 7869), -INT16_C( 9017), -INT16_C( 22787), INT16_C( 14111), -INT16_C( 24871), -INT16_C( 21631), INT16_C( 15726), -INT16_C( 13432), -INT16_C( 12980), INT16_C( 31639), -INT16_C( 32497), -INT16_C( 6607), INT16_C( 20105), INT16_C( 22056), -INT16_C( 18309), INT16_C( 14778), -INT16_C( 32042), -INT16_C( 11499), INT16_C( 13608), INT16_C( 266) }, UINT32_C( 33708112) }, { UINT32_C(3777173016), { -INT16_C( 627), INT16_C( 2271), -INT16_C( 31277), -INT16_C( 28867), INT16_C( 6684), INT16_C( 22437), -INT16_C( 31723), INT16_C( 20962), -INT16_C( 22682), -INT16_C( 23040), INT16_C( 32539), -INT16_C( 28658), -INT16_C( 1712), INT16_C( 10354), -INT16_C( 11131), INT16_C( 4868), -INT16_C( 60), INT16_C( 17436), -INT16_C( 30076), -INT16_C( 13425), INT16_C( 1908), INT16_C( 23586), -INT16_C( 28532), INT16_C( 17326), -INT16_C( 20820), -INT16_C( 16377), INT16_C( 27569), INT16_C( 620), INT16_C( 12144), -INT16_C( 5489), -INT16_C( 27410), -INT16_C( 16130) }, { INT16_C( 626), -INT16_C( 2272), -INT16_C( 20595), INT16_C( 28866), -INT16_C( 6685), -INT16_C( 16435), INT16_C( 31722), -INT16_C( 27134), -INT16_C( 5079), INT16_C( 23491), -INT16_C( 32540), INT16_C( 21660), INT16_C( 1711), INT16_C( 29522), INT16_C( 11130), -INT16_C( 4869), INT16_C( 6958), -INT16_C( 17437), -INT16_C( 22838), -INT16_C( 21205), -INT16_C( 1909), INT16_C( 30061), INT16_C( 28531), -INT16_C( 25333), -INT16_C( 12709), INT16_C( 16376), -INT16_C( 27570), -INT16_C( 621), -INT16_C( 6501), INT16_C( 5488), INT16_C( 27409), INT16_C( 16129) }, UINT32_C( 18940416) }, { UINT32_C(3714267986), { INT16_C( 25185), INT16_C( 12792), INT16_C( 10477), -INT16_C( 22254), -INT16_C( 21385), INT16_C( 30944), INT16_C( 22376), -INT16_C( 32250), -INT16_C( 9887), -INT16_C( 2295), INT16_C( 3391), -INT16_C( 24650), -INT16_C( 27822), -INT16_C( 23404), -INT16_C( 2097), INT16_C( 12417), INT16_C( 29785), INT16_C( 10160), -INT16_C( 15630), INT16_C( 1744), -INT16_C( 20221), INT16_C( 27518), -INT16_C( 22520), INT16_C( 27023), INT16_C( 10865), INT16_C( 31162), INT16_C( 7223), INT16_C( 16892), -INT16_C( 28512), INT16_C( 28645), INT16_C( 26503), -INT16_C( 8033) }, { INT16_C( 20699), -INT16_C( 12793), -INT16_C( 10478), INT16_C( 5844), INT16_C( 21384), -INT16_C( 28543), INT16_C( 4347), INT16_C( 32249), -INT16_C( 19687), INT16_C( 2294), -INT16_C( 3392), INT16_C( 24649), INT16_C( 11906), INT16_C( 2511), INT16_C( 28309), INT16_C( 29161), -INT16_C( 3906), -INT16_C( 11969), INT16_C( 5064), INT16_C( 20711), INT16_C( 26726), INT16_C( 25313), -INT16_C( 9607), -INT16_C( 27937), -INT16_C( 10866), INT16_C( 20122), -INT16_C( 7224), INT16_C( 19118), INT16_C( 32018), -INT16_C( 22700), INT16_C( 15851), -INT16_C( 21992) }, UINT32_C(3630379328) }, { UINT32_C(3903854973), { -INT16_C( 2437), -INT16_C( 21731), -INT16_C( 4834), INT16_C( 8329), INT16_C( 11606), INT16_C( 3105), -INT16_C( 19486), INT16_C( 1449), INT16_C( 8891), INT16_C( 9098), -INT16_C( 26655), INT16_C( 27927), INT16_C( 22472), -INT16_C( 24819), -INT16_C( 10289), INT16_C( 8872), -INT16_C( 8755), -INT16_C( 5231), INT16_C( 6859), INT16_C( 12017), INT16_C( 5085), INT16_C( 19123), -INT16_C( 6969), INT16_C( 25454), -INT16_C( 1827), -INT16_C( 29521), -INT16_C( 28888), INT16_C( 19755), -INT16_C( 13338), -INT16_C( 28015), -INT16_C( 21719), INT16_C( 2485) }, { -INT16_C( 119), INT16_C( 21730), -INT16_C( 4582), -INT16_C( 27617), -INT16_C( 11607), -INT16_C( 3106), INT16_C( 19485), -INT16_C( 1450), -INT16_C( 8892), -INT16_C( 11335), -INT16_C( 7045), -INT16_C( 12512), -INT16_C( 22473), INT16_C( 24818), INT16_C( 24916), -INT16_C( 8873), INT16_C( 14944), INT16_C( 31281), INT16_C( 20520), -INT16_C( 12018), -INT16_C( 5086), INT16_C( 16324), INT16_C( 6968), INT16_C( 31801), -INT16_C( 3336), INT16_C( 29520), INT16_C( 28887), INT16_C( 3650), INT16_C( 13337), INT16_C( 28014), -INT16_C( 14699), -INT16_C( 2486) }, UINT32_C(1218447373) }, { UINT32_C(4154045996), { -INT16_C( 17182), -INT16_C( 24092), INT16_C( 4177), INT16_C( 10821), -INT16_C( 30163), -INT16_C( 31643), -INT16_C( 29846), -INT16_C( 18436), INT16_C( 9984), INT16_C( 24019), INT16_C( 16234), -INT16_C( 26639), INT16_C( 5577), INT16_C( 9464), INT16_C( 25036), INT16_C( 492), -INT16_C( 12044), INT16_C( 17826), -INT16_C( 6176), INT16_C( 21871), -INT16_C( 10143), -INT16_C( 13115), -INT16_C( 9373), INT16_C( 25822), -INT16_C( 19965), INT16_C( 28097), -INT16_C( 19471), -INT16_C( 11259), -INT16_C( 701), -INT16_C( 30727), INT16_C( 19902), -INT16_C( 19808) }, { INT16_C( 17181), -INT16_C( 520), INT16_C( 26410), INT16_C( 30546), INT16_C( 30162), INT16_C( 31642), INT16_C( 29845), -INT16_C( 4374), -INT16_C( 9985), -INT16_C( 24020), -INT16_C( 16235), INT16_C( 26638), INT16_C( 28445), -INT16_C( 9465), -INT16_C( 22340), -INT16_C( 9843), -INT16_C( 31253), INT16_C( 5590), INT16_C( 10733), -INT16_C( 16499), INT16_C( 10142), INT16_C( 13114), INT16_C( 9372), -INT16_C( 25823), INT16_C( 19964), -INT16_C( 28098), INT16_C( 19470), INT16_C( 11258), INT16_C( 700), INT16_C( 30726), -INT16_C( 27734), -INT16_C( 27310) }, UINT32_C(3221852168) }, { UINT32_C( 562338000), { INT16_C( 18896), INT16_C( 2168), -INT16_C( 14015), INT16_C( 27690), INT16_C( 30605), INT16_C( 4579), INT16_C( 13180), -INT16_C( 18771), -INT16_C( 22799), -INT16_C( 25709), -INT16_C( 6070), -INT16_C( 20492), -INT16_C( 4029), -INT16_C( 1922), -INT16_C( 8877), INT16_C( 19329), -INT16_C( 27654), INT16_C( 1068), -INT16_C( 23352), INT16_C( 17071), INT16_C( 15460), -INT16_C( 1714), -INT16_C( 1157), -INT16_C( 5392), INT16_C( 15077), -INT16_C( 27727), -INT16_C( 17384), INT16_C( 23399), -INT16_C( 6483), INT16_C( 340), INT16_C( 1988), -INT16_C( 30939) }, { INT16_C( 6703), -INT16_C( 2169), INT16_C( 14014), INT16_C( 9017), -INT16_C( 30606), -INT16_C( 4580), -INT16_C( 13181), -INT16_C( 2218), INT16_C( 22798), INT16_C( 14773), INT16_C( 8785), INT16_C( 11508), INT16_C( 4028), INT16_C( 1921), INT16_C( 8876), -INT16_C( 9396), -INT16_C( 11204), -INT16_C( 1069), INT16_C( 3082), INT16_C( 32030), INT16_C( 14996), INT16_C( 6251), -INT16_C( 16122), INT16_C( 5391), -INT16_C( 15078), INT16_C( 27726), INT16_C( 17383), -INT16_C( 23400), INT16_C( 6482), -INT16_C( 341), -INT16_C( 1989), INT16_C( 30938) }, UINT32_C( 297088) }, { UINT32_C( 168615504), { INT16_C( 24431), INT16_C( 14936), INT16_C( 3341), -INT16_C( 25332), -INT16_C( 3277), INT16_C( 28215), INT16_C( 15657), -INT16_C( 24176), -INT16_C( 2560), INT16_C( 7101), -INT16_C( 3073), INT16_C( 12678), INT16_C( 29635), INT16_C( 10854), INT16_C( 29391), -INT16_C( 21077), -INT16_C( 29486), INT16_C( 3002), -INT16_C( 31414), -INT16_C( 25440), -INT16_C( 27411), -INT16_C( 24085), INT16_C( 31490), INT16_C( 17773), -INT16_C( 20527), -INT16_C( 26255), -INT16_C( 17121), -INT16_C( 7591), -INT16_C( 17871), -INT16_C( 18455), INT16_C( 7587), INT16_C( 30197) }, { INT16_C( 28073), -INT16_C( 3227), -INT16_C( 3342), INT16_C( 27504), INT16_C( 23735), -INT16_C( 17908), INT16_C( 20439), -INT16_C( 22120), INT16_C( 2559), -INT16_C( 7102), INT16_C( 3072), -INT16_C( 5981), -INT16_C( 29636), -INT16_C( 8289), -INT16_C( 27479), INT16_C( 21076), -INT16_C( 18175), -INT16_C( 3003), -INT16_C( 18772), INT16_C( 25439), INT16_C( 27410), -INT16_C( 5859), -INT16_C( 19013), -INT16_C( 17774), -INT16_C( 11074), -INT16_C( 16738), INT16_C( 17120), INT16_C( 7590), INT16_C( 17870), INT16_C( 30972), INT16_C( 20953), -INT16_C( 9270) }, UINT32_C( 33835088) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__mmask32 r = simde_mm512_mask_test_epi16_mask(test_vec[i].k1, a, b); simde_assert_equal_mmask32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int16_t a_[32]; int16_t b_[32]; simde__mmask32 k1 = simde_test_x86_random_mmask32(); simde_test_codegen_random_memory(sizeof(a_), HEDLEY_REINTERPRET_CAST(uint8_t*, a_)); simde_test_codegen_random_memory(sizeof(b_), HEDLEY_REINTERPRET_CAST(uint8_t*, b_)); for (size_t j = 0 ; j < 32 ; j++) if (rand() & 1) a_[j] = ~b_[j]; simde__m512i a = simde_mm512_loadu_epi16(a_); simde__m512i b = simde_mm512_loadu_epi16(b_); simde__mmask32 r = simde_mm512_mask_test_epi16_mask(k1, a, b); simde_test_x86_write_mmask32(2, k1, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x32(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_mmask32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_mask_test_epi32_mask(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m512i a; simde__m512i b; simde__mmask16 r; } test_vec[8] = { { UINT16_C(13733), simde_mm512_set_epi32(INT32_C(-1058044212), INT32_C( 1745554146), INT32_C( -938028173), INT32_C( 1123843978), INT32_C( 0), INT32_C( 369104615), INT32_C( 288860030), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1441724088), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)), simde_mm512_set_epi32(INT32_C( -173456664), INT32_C( 0), INT32_C( 1847983972), INT32_C( 0), INT32_C( 1618889484), INT32_C(-1607295283), INT32_C( 0), INT32_C( 0), INT32_C( 1566474746), INT32_C( 0), INT32_C( 0), INT32_C( -427236235), INT32_C( 1934991438), INT32_C( -587656653), INT32_C( -72145550), INT32_C( 0)), UINT16_C( 9216) }, { UINT16_C(63633), simde_mm512_set_epi32(INT32_C( -839841286), INT32_C( 0), INT32_C(-1528318158), INT32_C(-1752243244), INT32_C( 0), INT32_C(-1210241348), INT32_C(-1908519219), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 521483609), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 543914332), INT32_C( 2140721152), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)), UINT16_C( 4096) }, { UINT16_C(46032), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 329000619), INT32_C(-1407469850), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -102278232), INT32_C( 43450551), INT32_C( 0), INT32_C( 1603148048), INT32_C( 0), INT32_C( 306941628), INT32_C( 0), INT32_C( 1716086600)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( -262488052), INT32_C( 0), INT32_C( -979944078), INT32_C( 0), INT32_C( 0), INT32_C( -671140903), INT32_C( 2141932562), INT32_C( -937827627), INT32_C( -402281975), INT32_C( 1596901579), INT32_C( 735909604), INT32_C( 0), INT32_C(-1202496908), INT32_C( 0), INT32_C( 1187626051)), UINT16_C( 4304) }, { UINT16_C(22689), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 1163951893), INT32_C( 0), INT32_C( 0), INT32_C( -972105386), INT32_C( 0), INT32_C( -189029459), INT32_C( -225058862), INT32_C(-1537122616), INT32_C( 0), INT32_C(-1756788873), INT32_C( 0), INT32_C( 0), INT32_C( -184793757), INT32_C( 1418114595)), simde_mm512_set_epi32(INT32_C(-1354385088), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C(-1841164366), INT32_C( -94325362), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 594617140), INT32_C( 0), INT32_C( 239227634), INT32_C( 0), INT32_C( 895132574), INT32_C( -123965580)), UINT16_C( 1) }, { UINT16_C(25521), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 1916556758), INT32_C( 729046451), INT32_C( 516587379), INT32_C( 1306886393), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 2010256146), INT32_C( -351991039), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)), simde_mm512_set_epi32(INT32_C(-1549799175), INT32_C( -926637576), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1633420708), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C(-1070649171), INT32_C( -629074644), INT32_C( -658686596), INT32_C( 1058031607), INT32_C( 0), INT32_C( 590507721)), UINT16_C(16416) }, { UINT16_C(62461), simde_mm512_set_epi32(INT32_C(-1777930200), INT32_C(-2074304971), INT32_C(-1238248254), INT32_C( 0), INT32_C( 0), INT32_C(-2126101806), INT32_C( 1488735838), INT32_C(-1941730876), INT32_C( 0), INT32_C( 0), INT32_C(-1287988933), INT32_C( -234166092), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1899039383)), simde_mm512_set_epi32(INT32_C(-2019508611), INT32_C( 0), INT32_C( 630346390), INT32_C( 1865079685), INT32_C( -135665525), INT32_C( 0), INT32_C( 751170486), INT32_C( 580518147), INT32_C( 875750845), INT32_C( 263799439), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -889468036), INT32_C( 606319308), INT32_C( 0)), UINT16_C(41728) }, { UINT16_C(64894), simde_mm512_set_epi32(INT32_C( 1983606396), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1668321234), INT32_C( 1230207891), INT32_C(-1334904081), INT32_C(-1369460101), INT32_C( 0), INT32_C( 0), INT32_C( 556969985), INT32_C( 844640930), INT32_C(-1447169687), INT32_C( -621740127), INT32_C( 1847803343), INT32_C( 0)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -578157153), INT32_C( 0), INT32_C(-1740019927), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 136767631), INT32_C( 0), INT32_C(-1994887012), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C(-1203448248)), UINT16_C( 1040) }, { UINT16_C( 72), simde_mm512_set_epi32(INT32_C( 0), INT32_C( -302271050), INT32_C( 0), INT32_C( 0), INT32_C( 1073370549), INT32_C( 911478852), INT32_C( 2032788905), INT32_C( 0), INT32_C( 398076492), INT32_C( 901163928), INT32_C( 0), INT32_C(-1022133203), INT32_C( -345543230), INT32_C( 0), INT32_C( 2110415254), INT32_C( 0)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 1062395075), INT32_C( -125220770), INT32_C( 0), INT32_C( 0), INT32_C(-1603705335), INT32_C(-1563808089), INT32_C( 0), INT32_C( 0), INT32_C( 1591467282), INT32_C( -57010818), INT32_C( 0), INT32_C( 0), INT32_C( 1904594022), INT32_C( 0), INT32_C( -170170916)), UINT16_C( 64) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask16 r = simde_mm512_mask_test_epi32_mask(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_equal_mmask16(r, HEDLEY_STATIC_CAST(uint16_t, test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_test_epi64_mask(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask8 k; simde__m512i a; simde__m512i b; simde__mmask8 r; } test_vec[8] = { { UINT8_C( 51), simde_mm512_set_epi64(INT64_C(-1021777151925940720), INT64_C(-8606874489654438743), INT64_C( 2982642907250026668), INT64_C( 1121557266302837638), INT64_C( 2431134246593116065), INT64_C( 613583019816833791), INT64_C( 1064733599562411669), INT64_C( 2423175833785490495)), simde_mm512_set_epi64(INT64_C( 1320056489604494378), INT64_C( 8016850443050245285), INT64_C(-1985397303830803362), INT64_C( 3490054501014041612), INT64_C(-5459290507928993205), INT64_C( 7649116850053105819), INT64_C( 2306883418990405350), INT64_C( -533675138939657219)), UINT8_C( 51) }, { UINT8_C(107), simde_mm512_set_epi64(INT64_C(-5107242476853461048), INT64_C( 8011490560794608442), INT64_C( 6781928010904462143), INT64_C(-2849262341734469560), INT64_C( -680409599063304510), INT64_C(-5566534159487294326), INT64_C( 1845249206901740837), INT64_C( -995351966812894672)), simde_mm512_set_epi64(INT64_C( 5576021829988608395), INT64_C( 6950975376569962720), INT64_C(-6894298607029431267), INT64_C( 627810606408873629), INT64_C( 5554191263207946561), INT64_C(-8076655846804767103), INT64_C( 298038848695084217), INT64_C( 984575648746107146)), UINT8_C(107) }, { UINT8_C( 37), simde_mm512_set_epi64(INT64_C( 7893528652770105648), INT64_C( 6530680127391412513), INT64_C( 8552423929938430730), INT64_C(-2756847038438887047), INT64_C(-4669427049793346259), INT64_C(-1241821792536551210), INT64_C(-3299061013747874632), INT64_C(-6836802816564857048)), simde_mm512_set_epi64(INT64_C(-6002777026216974201), INT64_C(-4636334703117125394), INT64_C( 1607894826575669284), INT64_C( 4780984352817723804), INT64_C( 6091782245426739371), INT64_C( 678622961946838177), INT64_C(-5016020090857558708), INT64_C( 6626170086889501267)), UINT8_C( 37) }, { UINT8_C(219), simde_mm512_set_epi64(INT64_C(-2186517171747754940), INT64_C(-7709006576096545178), INT64_C(-4148769136791016488), INT64_C(-1135438727484114896), INT64_C( 8260953320246547916), INT64_C(-1100980776979386438), INT64_C(-5918460005278991975), INT64_C(-9003997367625569086)), simde_mm512_set_epi64(INT64_C( 4216784995205036162), INT64_C( -610629877628633270), INT64_C( 8989369503655563805), INT64_C( 7802074548335401303), INT64_C(-8877282827036376912), INT64_C( 503689220758847744), INT64_C(-1271186344370845002), INT64_C(-7863377575460447119)), UINT8_C(219) }, { UINT8_C(231), simde_mm512_set_epi64(INT64_C(-1315792135193853217), INT64_C( 645488177526442731), INT64_C(-9165154544452808856), INT64_C( 6660247336280542891), INT64_C( 6275701276122863077), INT64_C( 6727198969638040444), INT64_C(-7884988216343642316), INT64_C(-2772157409458686119)), simde_mm512_set_epi64(INT64_C(-3947691112861846019), INT64_C(-6027683744476655452), INT64_C( 7878786769508596925), INT64_C( 898779895364838727), INT64_C( 1691913248175202869), INT64_C( 925600519637433490), INT64_C(-7786666185996880531), INT64_C( 83985053119720048)), UINT8_C(231) }, { UINT8_C(185), simde_mm512_set_epi64(INT64_C(-1840026123275896761), INT64_C( 2958979742732699034), INT64_C( 8087751231081622093), INT64_C(-8600769129449664089), INT64_C( 4437078864330373252), INT64_C(-7780807660685149956), INT64_C(-2075154109213332653), INT64_C( 4181892367727099352)), simde_mm512_set_epi64(INT64_C( 774147715399030450), INT64_C( -164954981177608742), INT64_C( 2704996814905355295), INT64_C(-8428935692581869552), INT64_C( 9058580247051597222), INT64_C(-5265103403663862315), INT64_C( -26456365246944797), INT64_C(-2390663458830548776)), UINT8_C(185) }, { UINT8_C(101), simde_mm512_set_epi64(INT64_C( -500144007601202828), INT64_C( 1632600048776630049), INT64_C( -841897496147484648), INT64_C(-9027608573250153699), INT64_C( 5145549241007852287), INT64_C( 3253065934430492129), INT64_C(-7159974623801904746), INT64_C(-5093282550607071469)), simde_mm512_set_epi64(INT64_C(-5525231520825547752), INT64_C(-8701772324698157173), INT64_C(-8951932473020511764), INT64_C( 3775424668993203418), INT64_C( 6848385375992501495), INT64_C( 530028931531319450), INT64_C(-2966939250988120843), INT64_C( 8223727294178057916)), UINT8_C(101) }, { UINT8_C( 49), simde_mm512_set_epi64(INT64_C(-6089529524580685715), INT64_C(-7930651770137634148), INT64_C( 5513049874271089607), INT64_C( -418315818469941774), INT64_C( 5583867531021541749), INT64_C( 3646229040969122240), INT64_C( 298432255739992787), INT64_C( 8383322056459699927)), simde_mm512_set_epi64(INT64_C( 6173222450906561792), INT64_C(-2639921841557235108), INT64_C( 1632309106004579853), INT64_C(-5099037996367773534), INT64_C(-4229050909659913228), INT64_C(-6254506698641899913), INT64_C( 1797655404582044593), INT64_C( 7065183211262770455)), UINT8_C( 49) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__mmask8 r = simde_mm512_mask_test_epi64_mask(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_equal_mmask8(r, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_test_epi8_mask (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde__mmask64 k1; const int8_t a[64]; const int8_t b[64]; const simde__mmask64 r; } test_vec[] = { { UINT64_C(15709700008231627249), { INT8_C( 54), INT8_C( 59), INT8_C( 44), -INT8_C( 56), INT8_C( 94), -INT8_C( 10), INT8_C( 44), -INT8_C( 123), INT8_C( 39), INT8_C( 69), -INT8_C( 11), INT8_C( 99), INT8_C( 87), -INT8_C( 46), INT8_C( 7), -INT8_C( 48), -INT8_C( 115), -INT8_C( 104), INT8_C( 62), INT8_C( 125), INT8_C( 11), -INT8_C( 24), INT8_C( 106), -INT8_C( 84), -INT8_C( 99), INT8_C( 3), -INT8_C( 125), INT8_C( 4), -INT8_C( 100), -INT8_C( 121), -INT8_C( 105), INT8_C( 74), INT8_C( 40), -INT8_C( 2), -INT8_C( 94), -INT8_C( 95), INT8_C( 116), -INT8_C( 35), INT8_C( 38), -INT8_C( 101), INT8_C( 21), -INT8_C( 29), -INT8_C( 25), INT8_C( 113), INT8_C( 35), -INT8_C( 18), INT8_C( 34), -INT8_C( 80), -INT8_C( 122), INT8_C( 124), -INT8_C( 47), INT8_C( 111), INT8_C( 30), INT8_C( 95), -INT8_C( 113), -INT8_C( 54), INT8_C( 65), INT8_C( 18), -INT8_C( 49), -INT8_C( 34), -INT8_C( 103), -INT8_C( 94), -INT8_C( 72), -INT8_C( 92) }, { -INT8_C( 61), -INT8_C( 60), -INT8_C( 45), INT8_C( 55), -INT8_C( 95), -INT8_C( 7), -INT8_C( 45), -INT8_C( 103), INT8_C( 121), -INT8_C( 70), INT8_C( 10), -INT8_C( 100), -INT8_C( 88), INT8_C( 45), INT8_C( 77), INT8_C( 47), INT8_C( 99), INT8_C( 7), -INT8_C( 63), -INT8_C( 126), INT8_C( 43), INT8_C( 80), INT8_C( 113), INT8_C( 83), INT8_C( 98), INT8_C( 30), -INT8_C( 114), -INT8_C( 5), INT8_C( 99), INT8_C( 19), INT8_C( 45), INT8_C( 38), -INT8_C( 41), INT8_C( 1), INT8_C( 93), INT8_C( 120), -INT8_C( 6), INT8_C( 48), INT8_C( 17), INT8_C( 116), -INT8_C( 22), INT8_C( 28), INT8_C( 16), -INT8_C( 109), INT8_C( 73), INT8_C( 93), -INT8_C( 62), -INT8_C( 84), INT8_C( 100), -INT8_C( 125), INT8_C( 46), -INT8_C( 112), -INT8_C( 45), -INT8_C( 96), -INT8_C( 29), INT8_C( 53), -INT8_C( 66), INT8_C( 113), INT8_C( 48), INT8_C( 33), -INT8_C( 124), INT8_C( 93), INT8_C( 71), INT8_C( 91) }, UINT64_C( 1297055008672465313) }, { UINT64_C(16213930982914302159), { -INT8_C( 18), INT8_C( 103), -INT8_C( 84), INT8_C( 66), -INT8_C( 63), -INT8_C( 47), -INT8_C( 93), -INT8_C( 103), -INT8_C( 81), -INT8_C( 71), -INT8_C( 125), -INT8_C( 126), INT8_C( 60), INT8_C( 73), -INT8_C( 117), -INT8_C( 3), -INT8_C( 77), -INT8_C( 62), -INT8_C( 106), INT8_C( 116), INT8_C( 126), -INT8_C( 98), -INT8_C( 41), INT8_C( 68), -INT8_C( 59), INT8_C( 63), INT8_C( 19), INT8_C( 67), -INT8_C( 76), -INT8_C( 59), INT8_C( 104), -INT8_C( 94), INT8_C( 22), -INT8_C( 70), INT8_C( 30), INT8_C( 64), -INT8_C( 25), -INT8_C( 63), INT8_C( 63), -INT8_C( 106), -INT8_C( 61), INT8_C( 124), -INT8_C( 34), INT8_C( 0), -INT8_C( 10), INT8_C( 97), -INT8_C( 61), INT8_C( 22), INT8_C( 63), INT8_C( 97), -INT8_C( 117), INT8_C( 90), -INT8_C( 85), -INT8_C( 5), -INT8_C( 97), -INT8_C( 58), INT8_C( 58), -INT8_C( 11), -INT8_C( 32), -INT8_C( 18), -INT8_C( 17), INT8_C( 5), INT8_C( 102), INT8_C( 110) }, { -INT8_C( 42), -INT8_C( 81), -INT8_C( 82), -INT8_C( 67), INT8_C( 112), INT8_C( 46), INT8_C( 84), INT8_C( 102), INT8_C( 49), INT8_C( 70), -INT8_C( 62), INT8_C( 125), -INT8_C( 61), -INT8_C( 122), INT8_C( 124), INT8_C( 2), -INT8_C( 32), -INT8_C( 122), -INT8_C( 100), -INT8_C( 117), -INT8_C( 127), INT8_C( 97), INT8_C( 40), -INT8_C( 69), INT8_C( 58), INT8_C( 9), -INT8_C( 86), INT8_C( 41), INT8_C( 14), INT8_C( 58), -INT8_C( 105), -INT8_C( 27), -INT8_C( 23), INT8_C( 69), -INT8_C( 94), INT8_C( 90), INT8_C( 115), -INT8_C( 10), -INT8_C( 64), -INT8_C( 92), INT8_C( 60), -INT8_C( 125), INT8_C( 33), -INT8_C( 1), INT8_C( 9), -INT8_C( 98), INT8_C( 1), -INT8_C( 23), INT8_C( 36), -INT8_C( 98), INT8_C( 116), -INT8_C( 91), -INT8_C( 1), -INT8_C( 99), INT8_C( 96), INT8_C( 57), -INT8_C( 90), INT8_C( 10), INT8_C( 99), -INT8_C( 76), INT8_C( 69), -INT8_C( 6), -INT8_C( 103), INT8_C( 46) }, UINT64_C( 9295781545615163399) }, { UINT64_C( 481084291829640640), { INT8_C( 54), INT8_C( 23), -INT8_C( 121), -INT8_C( 112), -INT8_C( 73), INT8_C( 109), INT8_C( 1), -INT8_C( 16), -INT8_C( 31), -INT8_C( 77), -INT8_C( 30), -INT8_C( 65), INT8_C( 124), INT8_C( 73), -INT8_C( 24), INT8_C( 76), INT8_C( 104), -INT8_C( 94), -INT8_C( 81), INT8_C( 46), INT8_C( 27), -INT8_C( 34), -INT8_C( 126), -INT8_C( 32), INT8_C( 63), INT8_C( 0), INT8_C( 45), -INT8_C( 36), -INT8_C( 88), -INT8_C( 83), -INT8_C( 30), -INT8_C( 25), -INT8_C( 71), INT8_C( 106), INT8_C( 119), INT8_C( 125), -INT8_C( 52), INT8_C( 95), INT8_C( 120), -INT8_C( 33), INT8_C( 62), INT8_C( 90), INT8_C( 78), -INT8_C( 87), -INT8_C( 93), INT8_C( 54), INT8_C( 77), INT8_C( 125), -INT8_C( 39), -INT8_C( 56), -INT8_C( 84), -INT8_C( 12), -INT8_C( 36), -INT8_C( 113), -INT8_C( 48), -INT8_C( 67), INT8_C( 55), -INT8_C( 3), -INT8_C( 57), INT8_C( 57), -INT8_C( 86), INT8_C( 63), INT8_C( 32), INT8_C( 111) }, { -INT8_C( 63), -INT8_C( 24), INT8_C( 25), INT8_C( 111), INT8_C( 72), -INT8_C( 110), -INT8_C( 2), -INT8_C( 122), -INT8_C( 20), INT8_C( 76), -INT8_C( 124), -INT8_C( 112), -INT8_C( 125), -INT8_C( 49), INT8_C( 13), INT8_C( 92), -INT8_C( 105), -INT8_C( 71), INT8_C( 80), -INT8_C( 15), -INT8_C( 24), INT8_C( 33), -INT8_C( 118), INT8_C( 31), INT8_C( 30), -INT8_C( 1), -INT8_C( 123), -INT8_C( 10), INT8_C( 87), INT8_C( 82), -INT8_C( 120), INT8_C( 24), INT8_C( 58), -INT8_C( 95), -INT8_C( 120), -INT8_C( 126), INT8_C( 51), -INT8_C( 122), INT8_C( 8), INT8_C( 32), -INT8_C( 45), -INT8_C( 115), -INT8_C( 80), INT8_C( 86), INT8_C( 92), -INT8_C( 67), -INT8_C( 78), -INT8_C( 12), INT8_C( 119), INT8_C( 2), -INT8_C( 27), INT8_C( 95), INT8_C( 35), INT8_C( 112), INT8_C( 126), INT8_C( 66), INT8_C( 111), INT8_C( 3), INT8_C( 56), -INT8_C( 58), INT8_C( 85), -INT8_C( 64), -INT8_C( 33), -INT8_C( 112) }, UINT64_C( 147812851065728384) }, { UINT64_C(10541233849462194310), { INT8_C( 12), -INT8_C( 125), -INT8_C( 15), -INT8_C( 107), -INT8_C( 15), -INT8_C( 22), INT8_C( 4), INT8_C( 58), INT8_C( 120), INT8_C( 20), INT8_C( 36), INT8_C( 66), -INT8_C( 25), INT8_C( 33), INT8_C( 43), INT8_C( 72), -INT8_C( 32), -INT8_C( 36), -INT8_C( 45), INT8_C( 49), -INT8_C( 121), -INT8_C( 75), -INT8_C( 80), INT8_C( 118), INT8_C( 39), INT8_C( 53), -INT8_C( 119), INT8_C( 8), INT8_C( 49), -INT8_C( 46), INT8_C( 17), INT8_C( 73), INT8_C( 85), INT8_C( 80), -INT8_C( 98), INT8_C( 78), -INT8_C( 66), -INT8_C( 94), -INT8_C( 119), INT8_C( 83), -INT8_C( 53), -INT8_C( 73), -INT8_C( 103), INT8_C( 12), INT8_C( 54), -INT8_C( 91), -INT8_C( 4), INT8_C( 28), -INT8_C( 41), -INT8_C( 33), -INT8_C( 6), -INT8_C( 114), INT8_C( 108), -INT8_C( 67), INT8_C( 4), -INT8_C( 108), INT8_C( 96), -INT8_C( 114), INT8_C( 92), -INT8_C( 99), INT8_C( 96), INT8_C( 102), -INT8_C( 51), -INT8_C( 74) }, { -INT8_C( 13), INT8_C( 108), -INT8_C( 63), INT8_C( 106), INT8_C( 14), -INT8_C( 110), -INT8_C( 67), -INT8_C( 59), -INT8_C( 121), INT8_C( 86), -INT8_C( 46), -INT8_C( 67), INT8_C( 24), -INT8_C( 34), -INT8_C( 44), -INT8_C( 73), -INT8_C( 67), -INT8_C( 50), INT8_C( 69), -INT8_C( 50), INT8_C( 120), INT8_C( 74), -INT8_C( 111), INT8_C( 88), -INT8_C( 40), INT8_C( 92), INT8_C( 105), INT8_C( 56), -INT8_C( 61), INT8_C( 54), -INT8_C( 18), -INT8_C( 74), -INT8_C( 94), -INT8_C( 81), INT8_C( 32), -INT8_C( 79), INT8_C( 65), -INT8_C( 35), INT8_C( 118), -INT8_C( 55), INT8_C( 52), INT8_C( 72), -INT8_C( 122), INT8_C( 76), INT8_C( 38), INT8_C( 90), INT8_C( 3), -INT8_C( 29), INT8_C( 40), INT8_C( 73), -INT8_C( 79), -INT8_C( 95), -INT8_C( 109), INT8_C( 66), -INT8_C( 7), INT8_C( 107), -INT8_C( 97), INT8_C( 98), -INT8_C( 93), INT8_C( 98), -INT8_C( 104), -INT8_C( 110), INT8_C( 24), INT8_C( 59) }, UINT64_C( 9369769811380535300) }, { UINT64_C(12422559118012878291), { -INT8_C( 70), -INT8_C( 66), -INT8_C( 24), INT8_C( 66), -INT8_C( 81), -INT8_C( 31), -INT8_C( 47), INT8_C( 18), -INT8_C( 64), INT8_C( 64), -INT8_C( 70), INT8_C( 33), INT8_C( 79), -INT8_C( 74), -INT8_C( 44), INT8_C( 5), INT8_C( 55), INT8_C( 60), -INT8_C( 17), -INT8_C( 59), INT8_C( 123), -INT8_C( 22), INT8_C( 40), INT8_C( 31), INT8_C( 46), -INT8_C( 102), INT8_C( 77), -INT8_C( 30), INT8_C( 23), INT8_MAX, -INT8_C( 114), INT8_C( 7), INT8_C( 7), INT8_C( 97), INT8_C( 97), -INT8_C( 18), INT8_C( 49), INT8_C( 25), INT8_C( 6), INT8_C( 111), -INT8_C( 99), INT8_C( 40), -INT8_C( 112), -INT8_C( 20), -INT8_C( 49), -INT8_C( 44), INT8_C( 59), -INT8_C( 44), -INT8_C( 42), INT8_C( 55), INT8_C( 47), -INT8_C( 116), INT8_C( 16), INT8_C( 23), INT8_C( 16), -INT8_C( 81), INT8_C( 91), INT8_C( 93), INT8_C( 45), -INT8_C( 9), -INT8_C( 116), -INT8_C( 107), -INT8_C( 33), -INT8_C( 53) }, { INT8_C( 16), INT8_C( 65), INT8_C( 23), -INT8_C( 65), INT8_C( 80), INT8_C( 30), INT8_C( 46), -INT8_C( 19), -INT8_C( 34), -INT8_C( 65), -INT8_C( 39), INT8_C( 85), INT8_C( 36), INT8_C( 89), INT8_C( 41), -INT8_C( 6), -INT8_C( 56), -INT8_C( 61), INT8_C( 4), -INT8_C( 40), -INT8_C( 124), INT8_C( 21), INT8_C( 23), -INT8_C( 32), INT8_C( 114), INT8_C( 56), -INT8_C( 96), -INT8_C( 126), -INT8_C( 24), INT8_MIN, -INT8_C( 104), -INT8_C( 8), -INT8_C( 63), -INT8_C( 80), -INT8_C( 72), INT8_C( 17), -INT8_C( 50), -INT8_C( 26), -INT8_C( 2), -INT8_C( 84), -INT8_C( 91), -INT8_C( 41), INT8_C( 2), -INT8_C( 55), INT8_C( 48), INT8_C( 43), -INT8_C( 60), -INT8_C( 8), -INT8_C( 18), -INT8_C( 56), -INT8_C( 48), INT8_C( 115), -INT8_C( 35), -INT8_C( 24), INT8_C( 83), INT8_C( 80), INT8_C( 32), -INT8_C( 13), -INT8_C( 46), INT8_C( 8), INT8_C( 115), INT8_C( 106), INT8_C( 1), INT8_C( 52) }, UINT64_C( 18447070327213313) }, { UINT64_C( 1677542360563664629), { -INT8_C( 92), INT8_C( 95), INT8_C( 14), INT8_C( 39), -INT8_C( 118), INT8_C( 104), INT8_C( 109), -INT8_C( 32), -INT8_C( 127), -INT8_C( 17), INT8_C( 49), INT8_C( 53), -INT8_C( 67), -INT8_C( 98), INT8_C( 112), INT8_C( 22), -INT8_C( 125), -INT8_C( 5), -INT8_C( 10), -INT8_C( 28), -INT8_C( 79), -INT8_C( 126), INT8_C( 119), -INT8_C( 114), INT8_C( 81), -INT8_C( 50), INT8_C( 31), INT8_C( 94), INT8_C( 92), INT8_C( 7), INT8_C( 118), INT8_C( 21), INT8_C( 103), -INT8_C( 124), INT8_C( 72), -INT8_C( 8), INT8_C( 95), -INT8_C( 86), INT8_C( 125), -INT8_C( 70), INT8_C( 12), INT8_C( 74), -INT8_C( 77), -INT8_C( 55), -INT8_C( 24), -INT8_C( 18), -INT8_C( 78), -INT8_C( 42), -INT8_C( 22), INT8_C( 70), -INT8_C( 5), -INT8_C( 125), -INT8_C( 56), INT8_C( 114), INT8_C( 17), INT8_C( 70), -INT8_C( 47), INT8_C( 49), -INT8_C( 92), INT8_C( 87), -INT8_C( 105), INT8_C( 26), -INT8_C( 102), INT8_C( 46) }, { -INT8_C( 98), -INT8_C( 96), INT8_C( 126), -INT8_C( 40), INT8_C( 111), -INT8_C( 105), -INT8_C( 110), INT8_C( 123), -INT8_C( 31), INT8_C( 16), INT8_C( 69), -INT8_C( 54), -INT8_C( 1), -INT8_C( 27), -INT8_C( 31), -INT8_C( 23), INT8_C( 124), -INT8_C( 36), INT8_C( 108), INT8_C( 71), INT8_C( 78), INT8_C( 125), -INT8_C( 115), INT8_C( 73), -INT8_C( 82), INT8_C( 49), -INT8_C( 96), INT8_C( 70), INT8_C( 76), -INT8_C( 8), -INT8_C( 117), -INT8_C( 22), -INT8_C( 104), INT8_C( 9), -INT8_C( 62), INT8_C( 7), -INT8_C( 96), INT8_C( 85), -INT8_C( 126), -INT8_C( 126), INT8_C( 101), -INT8_C( 57), INT8_C( 76), INT8_C( 100), -INT8_C( 83), INT8_C( 45), INT8_C( 77), INT8_C( 41), INT8_C( 9), -INT8_C( 71), INT8_C( 112), INT8_C( 87), INT8_C( 55), -INT8_C( 3), -INT8_C( 95), -INT8_C( 27), INT8_C( 46), INT8_C( 65), INT8_C( 43), INT8_C( 122), INT8_C( 57), -INT8_C( 73), INT8_C( 101), -INT8_C( 47) }, UINT64_C( 1604710282972644501) }, { UINT64_C( 4529592901992082943), { -INT8_C( 104), INT8_C( 19), INT8_C( 84), INT8_C( 110), INT8_C( 116), INT8_C( 91), -INT8_C( 115), INT8_C( 48), INT8_C( 40), INT8_C( 51), INT8_C( 58), INT8_C( 16), INT8_C( 67), INT8_C( 100), INT8_C( 91), INT8_C( 55), -INT8_C( 106), INT8_C( 93), INT8_C( 73), -INT8_C( 87), -INT8_C( 49), INT8_C( 95), INT8_C( 117), INT8_C( 82), INT8_C( 71), INT8_C( 60), -INT8_C( 113), INT8_C( 5), INT8_C( 70), -INT8_C( 18), INT8_C( 81), -INT8_C( 25), INT8_C( 96), INT8_C( 93), INT8_C( 74), INT8_C( 62), -INT8_C( 72), -INT8_C( 40), INT8_C( 121), INT8_C( 16), INT8_C( 11), -INT8_C( 77), -INT8_C( 69), INT8_C( 120), -INT8_C( 7), INT8_C( 113), INT8_C( 56), -INT8_C( 75), INT8_C( 116), -INT8_C( 7), INT8_C( 57), INT8_C( 68), INT8_C( 89), -INT8_C( 81), -INT8_C( 23), -INT8_C( 9), INT8_C( 124), INT8_C( 39), INT8_C( 32), -INT8_C( 61), INT8_C( 118), INT8_C( 124), -INT8_C( 109), INT8_C( 24) }, { -INT8_C( 39), -INT8_C( 20), -INT8_C( 85), -INT8_C( 111), -INT8_C( 60), INT8_C( 36), -INT8_C( 95), -INT8_C( 49), -INT8_C( 41), INT8_C( 101), INT8_C( 30), -INT8_C( 17), -INT8_C( 41), INT8_C( 86), -INT8_C( 92), -INT8_C( 56), INT8_C( 105), -INT8_C( 94), -INT8_C( 74), INT8_C( 86), INT8_C( 48), -INT8_C( 96), -INT8_C( 118), -INT8_C( 83), INT8_C( 97), -INT8_C( 61), INT8_C( 112), -INT8_C( 41), INT8_C( 63), INT8_C( 17), -INT8_C( 82), INT8_C( 24), -INT8_C( 3), INT8_C( 89), -INT8_C( 87), -INT8_C( 63), INT8_C( 125), INT8_C( 75), -INT8_C( 111), INT8_C( 84), -INT8_C( 80), -INT8_C( 81), INT8_C( 68), -INT8_C( 121), INT8_C( 6), -INT8_C( 24), INT8_C( 80), INT8_C( 111), -INT8_C( 117), INT8_C( 6), -INT8_C( 58), -INT8_C( 69), -INT8_C( 90), INT8_C( 80), INT8_C( 104), INT8_C( 8), INT8_C( 20), -INT8_C( 40), -INT8_C( 33), INT8_C( 83), -INT8_C( 22), -INT8_C( 115), INT8_C( 108), -INT8_C( 25) }, UINT64_C( 4053310815464334417) }, { UINT64_C( 6137157573880594433), { -INT8_C( 8), INT8_C( 37), INT8_C( 66), INT8_C( 73), INT8_C( 125), -INT8_C( 98), -INT8_C( 119), INT8_C( 124), INT8_C( 38), INT8_C( 95), -INT8_C( 18), INT8_C( 65), -INT8_C( 46), INT8_C( 82), -INT8_C( 56), -INT8_C( 62), INT8_C( 32), -INT8_C( 91), -INT8_C( 68), INT8_C( 69), -INT8_C( 99), INT8_C( 86), -INT8_C( 125), -INT8_C( 32), -INT8_C( 74), -INT8_C( 116), INT8_C( 107), INT8_C( 8), INT8_C( 11), INT8_C( 71), INT8_C( 40), -INT8_C( 65), INT8_C( 41), -INT8_C( 104), -INT8_C( 7), -INT8_C( 120), INT8_C( 106), -INT8_C( 125), -INT8_C( 85), -INT8_C( 112), -INT8_C( 30), -INT8_C( 103), -INT8_C( 47), -INT8_C( 4), INT8_C( 33), INT8_C( 19), INT8_C( 14), -INT8_C( 57), INT8_C( 70), INT8_C( 35), INT8_C( 46), -INT8_C( 3), -INT8_C( 38), INT8_C( 11), -INT8_C( 60), INT8_C( 48), -INT8_C( 67), -INT8_C( 96), -INT8_C( 9), -INT8_C( 93), INT8_C( 119), INT8_MIN, INT8_C( 99), -INT8_C( 32) }, { INT8_C( 76), INT8_C( 30), -INT8_C( 112), -INT8_C( 74), -INT8_C( 95), INT8_C( 59), INT8_C( 70), -INT8_C( 125), -INT8_C( 44), INT8_C( 23), INT8_C( 55), -INT8_C( 41), -INT8_C( 80), -INT8_C( 83), -INT8_C( 97), -INT8_C( 18), -INT8_C( 33), -INT8_C( 51), -INT8_C( 21), -INT8_C( 70), INT8_C( 98), -INT8_C( 87), INT8_C( 124), INT8_C( 31), INT8_C( 73), INT8_C( 115), -INT8_C( 108), -INT8_C( 9), -INT8_C( 12), -INT8_C( 72), -INT8_C( 41), INT8_C( 64), -INT8_C( 42), INT8_C( 103), -INT8_C( 10), INT8_C( 119), -INT8_C( 94), INT8_C( 60), -INT8_C( 6), INT8_C( 118), INT8_C( 83), INT8_C( 49), INT8_C( 77), INT8_C( 3), -INT8_C( 34), -INT8_C( 20), -INT8_C( 15), -INT8_C( 66), -INT8_C( 71), -INT8_C( 36), INT8_C( 120), INT8_C( 28), -INT8_C( 122), -INT8_C( 12), INT8_C( 59), -INT8_C( 49), INT8_C( 104), -INT8_C( 49), -INT8_C( 58), INT8_C( 92), -INT8_C( 120), -INT8_C( 99), -INT8_C( 100), INT8_C( 94) }, UINT64_C( 362680799549472769) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__mmask64 r = simde_mm512_mask_test_epi8_mask(test_vec[i].k1, a, b); simde_assert_equal_mmask64(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int8_t a_[64]; int8_t b_[64]; simde__mmask64 k1 = simde_test_x86_random_mmask64(); simde_test_codegen_random_memory(sizeof(a_), HEDLEY_REINTERPRET_CAST(uint8_t*, a_)); simde_test_codegen_random_memory(sizeof(b_), HEDLEY_REINTERPRET_CAST(uint8_t*, b_)); for (size_t j = 0 ; j < 64 ; j++) if (rand() & 1) a_[j] = ~b_[j]; simde__m512i a = simde_mm512_loadu_epi8(a_); simde__m512i b = simde_mm512_loadu_epi8(b_); simde__mmask64 r = simde_mm512_mask_test_epi8_mask(k1, a, b); simde_test_x86_write_mmask64(2, k1, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x64(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_mmask64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_test_epi16_mask (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[32]; const int16_t b[32]; const simde__mmask32 r; } test_vec[] = { { { -INT16_C( 10245), INT16_C( 18801), INT16_C( 31642), -INT16_C( 30834), INT16_C( 4021), -INT16_C( 24214), INT16_C( 3143), INT16_C( 21759), INT16_C( 5919), INT16_C( 28063), -INT16_C( 18558), INT16_C( 14004), -INT16_C( 15245), -INT16_C( 788), -INT16_C( 30313), -INT16_C( 3515), INT16_C( 13971), INT16_C( 19259), -INT16_C( 1420), -INT16_C( 29772), -INT16_C( 24822), INT16_C( 27851), -INT16_C( 21228), INT16_C( 13267), INT16_C( 6664), -INT16_C( 29948), INT16_C( 19325), INT16_C( 17857), INT16_C( 17484), -INT16_C( 6928), INT16_C( 12391), -INT16_C( 27800) }, { INT16_C( 10244), -INT16_C( 18802), INT16_C( 17119), INT16_C( 5953), -INT16_C( 4022), INT16_C( 24213), INT16_C( 26781), INT16_C( 25233), INT16_C( 13018), INT16_C( 22512), INT16_C( 18557), -INT16_C( 14005), INT16_C( 15244), INT16_C( 23213), -INT16_C( 21495), INT16_C( 3514), INT16_C( 18644), -INT16_C( 19260), INT16_C( 1419), -INT16_C( 10805), INT16_C( 24821), -INT16_C( 27852), -INT16_C( 14903), -INT16_C( 23563), -INT16_C( 6665), INT16_C( 29947), INT16_C( 17966), -INT16_C( 17858), -INT16_C( 5247), -INT16_C( 30188), -INT16_C( 12392), INT16_C( 27799) }, UINT32_C( 885613516) }, { { INT16_C( 4355), INT16_C( 22602), -INT16_C( 15849), INT16_C( 25519), -INT16_C( 26514), -INT16_C( 8328), INT16_C( 16186), INT16_C( 15176), INT16_C( 9956), -INT16_C( 8659), -INT16_C( 9919), INT16_C( 29603), -INT16_C( 27243), -INT16_C( 26945), INT16_C( 6837), -INT16_C( 3886), INT16_C( 9516), INT16_C( 18143), -INT16_C( 18121), INT16_C( 19610), INT16_C( 20839), -INT16_C( 23822), -INT16_C( 31087), INT16_C( 24898), INT16_C( 1812), -INT16_C( 9466), INT16_C( 24137), -INT16_C( 7388), INT16_C( 30497), -INT16_C( 19442), INT16_C( 20114), INT16_C( 7589) }, { -INT16_C( 4356), -INT16_C( 9644), -INT16_C( 4378), INT16_C( 30246), INT16_C( 20736), INT16_C( 17534), -INT16_C( 16187), INT16_C( 24428), INT16_C( 29232), INT16_C( 31035), INT16_C( 24528), -INT16_C( 29604), INT16_C( 27242), INT16_C( 26944), -INT16_C( 6838), INT16_C( 18054), -INT16_C( 9517), -INT16_C( 18144), INT16_C( 18120), -INT16_C( 14032), -INT16_C( 20840), INT16_C( 23821), INT16_C( 31086), -INT16_C( 24899), -INT16_C( 1813), -INT16_C( 17641), INT16_C( 29783), -INT16_C( 16057), -INT16_C( 30498), INT16_C( 10538), -INT16_C( 20115), INT16_C( 16751) }, UINT32_C(2919794622) }, { { -INT16_C( 15752), -INT16_C( 2825), INT16_C( 30044), INT16_C( 16469), INT16_C( 30696), INT16_C( 26898), -INT16_C( 3822), INT16_C( 3323), INT16_C( 5870), -INT16_C( 2908), INT16_C( 1191), INT16_C( 4091), INT16_C( 10184), INT16_C( 15686), INT16_C( 8528), -INT16_C( 30914), -INT16_C( 27952), -INT16_C( 17194), -INT16_C( 27468), -INT16_C( 24703), -INT16_C( 16373), INT16_C( 11998), -INT16_C( 21889), INT16_C( 14309), -INT16_C( 30159), -INT16_C( 7124), INT16_C( 10247), -INT16_C( 1753), INT16_C( 20718), -INT16_C( 19658), INT16_C( 29820), -INT16_C( 31607) }, { INT16_C( 4103), INT16_C( 2824), -INT16_C( 30045), -INT16_C( 16470), -INT16_C( 30697), -INT16_C( 26899), -INT16_C( 20941), -INT16_C( 3324), -INT16_C( 11814), INT16_C( 2907), -INT16_C( 32179), -INT16_C( 4092), INT16_C( 15088), INT16_C( 27811), -INT16_C( 8529), -INT16_C( 18760), -INT16_C( 15890), -INT16_C( 27967), INT16_C( 27467), INT16_C( 25169), INT16_C( 16372), INT16_C( 10233), -INT16_C( 531), -INT16_C( 14310), INT16_C( 30158), INT16_C( 7123), -INT16_C( 10248), -INT16_C( 6133), -INT16_C( 20719), -INT16_C( 16299), INT16_C( 3469), INT16_C( 31606) }, UINT32_C(1751889216) }, { { INT16_C( 2253), -INT16_C( 9518), -INT16_C( 2678), INT16_C( 18558), INT16_C( 17914), -INT16_C( 9036), -INT16_C( 9487), INT16_C( 9521), INT16_C( 30510), INT16_C( 11234), -INT16_C( 8024), -INT16_C( 3270), INT16_C( 2708), INT16_C( 9621), INT16_C( 30858), INT16_C( 22534), -INT16_C( 10111), -INT16_C( 6861), -INT16_C( 20199), INT16_C( 23698), INT16_C( 3575), -INT16_C( 5696), -INT16_C( 3353), -INT16_C( 9457), -INT16_C( 6197), INT16_C( 9687), -INT16_C( 7689), INT16_C( 7395), INT16_C( 25863), -INT16_C( 27929), -INT16_C( 4642), -INT16_C( 15351) }, { -INT16_C( 2254), INT16_C( 9517), -INT16_C( 16484), -INT16_C( 18559), -INT16_C( 17915), INT16_C( 21280), INT16_C( 9486), -INT16_C( 9522), INT16_C( 29964), INT16_C( 1155), INT16_C( 8023), INT16_C( 17039), INT16_C( 16464), INT16_C( 13694), -INT16_C( 30859), -INT16_C( 22535), INT16_C( 10110), INT16_C( 6860), INT16_C( 20198), -INT16_C( 4910), -INT16_C( 3576), INT16_C( 5695), INT16_C( 3352), INT16_C( 9456), INT16_C( 29571), -INT16_C( 9688), -INT16_C( 18286), -INT16_C( 7396), -INT16_C( 25864), INT16_C( 27928), INT16_C( 4641), -INT16_C( 24812) }, UINT32_C(2231909156) }, { { INT16_C( 9041), INT16_C( 9642), -INT16_C( 19085), -INT16_C( 19117), INT16_C( 31931), -INT16_C( 15038), INT16_C( 17383), -INT16_C( 32600), -INT16_C( 6106), INT16_C( 24788), INT16_C( 1209), INT16_C( 2608), INT16_C( 27263), INT16_C( 6932), -INT16_C( 16763), INT16_C( 24863), -INT16_C( 29727), INT16_C( 9129), -INT16_C( 7553), -INT16_C( 18471), -INT16_C( 19573), INT16_C( 29655), INT16_C( 9555), INT16_C( 7436), -INT16_C( 5646), -INT16_C( 31998), INT16_C( 13549), INT16_C( 12497), -INT16_C( 31606), -INT16_C( 2619), -INT16_C( 6812), INT16_C( 17403) }, { -INT16_C( 9042), INT16_C( 11621), -INT16_C( 28482), INT16_C( 19116), -INT16_C( 31932), INT16_C( 15037), -INT16_C( 13939), INT16_C( 32599), INT16_C( 14002), -INT16_C( 24789), -INT16_C( 917), -INT16_C( 2609), -INT16_C( 27264), -INT16_C( 6933), INT16_C( 16762), INT16_C( 10255), INT16_C( 29726), -INT16_C( 9130), INT16_C( 516), INT16_C( 18470), -INT16_C( 7290), INT16_C( 4995), -INT16_C( 9556), INT16_C( 24211), -INT16_C( 16879), INT16_C( 31997), -INT16_C( 12870), INT16_C( 14961), INT16_C( 23650), -INT16_C( 9186), INT16_C( 11678), -INT16_C( 17404) }, UINT32_C(2108982598) }, { { -INT16_C( 12657), INT16_C( 29389), -INT16_C( 32627), -INT16_C( 1451), INT16_C( 1129), INT16_C( 23456), INT16_C( 9124), INT16_C( 7380), -INT16_C( 7801), -INT16_C( 28013), INT16_C( 23030), -INT16_C( 17430), -INT16_C( 21837), INT16_C( 14817), -INT16_C( 22330), INT16_C( 22062), INT16_C( 31863), -INT16_C( 11285), INT16_C( 16617), INT16_C( 5581), INT16_C( 6028), INT16_C( 24972), INT16_C( 28107), -INT16_C( 20355), INT16_C( 25422), INT16_C( 29414), -INT16_C( 16651), -INT16_C( 14291), INT16_C( 7387), -INT16_C( 23851), INT16_C( 16996), -INT16_C( 8378) }, { INT16_C( 12656), INT16_C( 22962), INT16_C( 32626), -INT16_C( 402), -INT16_C( 1130), -INT16_C( 23457), -INT16_C( 9125), INT16_C( 26708), INT16_C( 7800), INT16_C( 28012), INT16_C( 3292), INT16_C( 17429), -INT16_C( 2418), -INT16_C( 3567), INT16_C( 22329), -INT16_C( 22063), -INT16_C( 31864), -INT16_C( 1533), INT16_C( 28931), -INT16_C( 26120), INT16_C( 22380), -INT16_C( 14274), -INT16_C( 28108), -INT16_C( 21456), -INT16_C( 25423), -INT16_C( 29415), INT16_C( 11945), INT16_C( 14290), -INT16_C( 7388), INT16_C( 23850), -INT16_C( 1222), -INT16_C( 15865) }, UINT32_C(3300799626) }, { { -INT16_C( 26169), INT16_C( 17028), INT16_C( 30871), -INT16_C( 1688), -INT16_C( 24805), INT16_C( 13473), INT16_C( 4179), INT16_C( 23485), -INT16_C( 21361), -INT16_C( 15522), -INT16_C( 17495), INT16_C( 1419), -INT16_C( 10043), -INT16_C( 14686), INT16_C( 32399), INT16_C( 22216), INT16_C( 19479), INT16_C( 13283), INT16_C( 19662), -INT16_C( 5587), INT16_C( 23965), INT16_C( 22729), INT16_C( 19431), INT16_C( 30352), -INT16_C( 4360), -INT16_C( 29061), INT16_C( 1768), -INT16_C( 4823), -INT16_C( 7201), -INT16_C( 18509), INT16_C( 18213), INT16_C( 24589) }, { -INT16_C( 23097), -INT16_C( 19770), -INT16_C( 30872), INT16_C( 1687), INT16_C( 24804), -INT16_C( 13474), -INT16_C( 4180), -INT16_C( 23486), INT16_C( 31965), -INT16_C( 30907), INT16_C( 28370), -INT16_C( 1420), INT16_C( 10042), -INT16_C( 31823), -INT16_C( 16734), INT16_C( 27108), -INT16_C( 21917), -INT16_C( 13284), -INT16_C( 19663), INT16_C( 5586), INT16_C( 12307), -INT16_C( 16415), INT16_C( 8991), -INT16_C( 669), -INT16_C( 22369), INT16_C( 29060), -INT16_C( 1769), INT16_C( 20843), INT16_C( 7200), -INT16_C( 15404), -INT16_C( 18214), INT16_C( 15660) }, UINT32_C(2851202819) }, { { INT16_C( 16382), -INT16_C( 26259), INT16_C( 28224), INT16_C( 19465), -INT16_C( 10248), -INT16_C( 23176), -INT16_C( 10938), -INT16_C( 6927), -INT16_C( 20915), -INT16_C( 28118), -INT16_C( 26651), INT16_C( 6842), INT16_C( 17199), INT16_C( 27442), -INT16_C( 27399), -INT16_C( 12782), INT16_C( 32675), INT16_C( 14834), -INT16_C( 23193), INT16_C( 23942), -INT16_C( 302), INT16_C( 9883), -INT16_C( 2914), INT16_C( 1738), -INT16_C( 2694), INT16_C( 31199), INT16_C( 20364), INT16_C( 22633), -INT16_C( 25813), -INT16_C( 1085), INT16_C( 2912), -INT16_C( 23607) }, { -INT16_C( 16383), INT16_C( 26258), INT16_C( 6331), -INT16_C( 28989), -INT16_C( 15082), -INT16_C( 19094), INT16_C( 10937), INT16_C( 23713), -INT16_C( 32577), INT16_C( 28117), INT16_C( 26650), -INT16_C( 2231), INT16_C( 9268), -INT16_C( 27443), -INT16_C( 25552), INT16_C( 12781), -INT16_C( 32676), INT16_C( 6295), INT16_C( 23192), -INT16_C( 20570), INT16_C( 4128), -INT16_C( 9884), INT16_C( 1338), -INT16_C( 1739), INT16_C( 2693), -INT16_C( 24730), -INT16_C( 20365), -INT16_C( 22634), INT16_C( 25812), INT16_C( 1084), INT16_C( 10496), INT16_C( 23606) }, UINT32_C(1113217468) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__mmask32 r = simde_mm512_test_epi16_mask(a, b); simde_assert_equal_mmask32(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int16_t a_[32]; int16_t b_[32]; simde_test_codegen_random_memory(sizeof(a_), HEDLEY_REINTERPRET_CAST(uint8_t*, a_)); simde_test_codegen_random_memory(sizeof(b_), HEDLEY_REINTERPRET_CAST(uint8_t*, b_)); for (size_t j = 0 ; j < 32 ; j++) if (rand() & 1) a_[j] = ~b_[j]; simde__m512i a = simde_mm512_loadu_epi16(a_); simde__m512i b = simde_mm512_loadu_epi16(b_); simde__mmask32 r = simde_mm512_test_epi16_mask(a, b); simde_test_x86_write_i16x32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_mmask32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_test_epi32_mask (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int32_t a[16]; const int32_t b[16]; const simde__mmask16 r; } test_vec[] = { { { -INT32_C( 1791876952), INT32_C( 1844499212), -INT32_C( 365199811), -INT32_C( 1866534401), -INT32_C( 232364997), INT32_C( 1453189598), -INT32_C( 461789425), INT32_C( 971959440), -INT32_C( 355524340), -INT32_C( 2146953968), INT32_C( 864052768), INT32_C( 199938932), INT32_C( 54748553), INT32_C( 1624897104), INT32_C( 776295070), INT32_C( 1281897535) }, { INT32_C( 1791876951), -INT32_C( 491122398), INT32_C( 10679440), INT32_C( 1236071149), -INT32_C( 930854296), INT32_C( 709694853), INT32_C( 461789424), -INT32_C( 971959441), INT32_C( 355524339), INT32_C( 2146953967), -INT32_C( 864052769), -INT32_C( 199938933), -INT32_C( 54748554), -INT32_C( 1624897105), -INT32_C( 776295071), -INT32_C( 1281897536) }, UINT16_C( 62) }, { { INT32_C( 356677238), INT32_C( 6053022), -INT32_C( 909044215), -INT32_C( 745850423), -INT32_C( 656871898), INT32_C( 1601452805), INT32_C( 634323680), -INT32_C( 206023555), INT32_C( 1343095986), -INT32_C( 1117669651), INT32_C( 1664295297), -INT32_C( 1158762358), -INT32_C( 73592075), INT32_C( 542773838), INT32_C( 699834084), INT32_C( 1123018103) }, { -INT32_C( 356677239), INT32_C( 1968377588), -INT32_C( 1176992209), INT32_C( 745850422), INT32_C( 656871897), INT32_C( 122126627), -INT32_C( 634323681), INT32_C( 206023554), INT32_C( 703988789), INT32_C( 1117669650), INT32_C( 117143248), INT32_C( 355626812), -INT32_C( 1539548799), -INT32_C( 106134310), INT32_C( 80993409), -INT32_C( 1123018104) }, UINT16_C(32038) }, { { -INT32_C( 1256567730), -INT32_C( 1903775732), -INT32_C( 1021880878), -INT32_C( 2029233078), INT32_C( 646471133), -INT32_C( 1782033256), -INT32_C( 2056503897), INT32_C( 1206237333), INT32_C( 1562435178), -INT32_C( 258166754), INT32_C( 658140595), -INT32_C( 1212544241), -INT32_C( 22118461), INT32_C( 1800265398), -INT32_C( 506967100), -INT32_C( 733180567) }, { INT32_C( 1256567729), -INT32_C( 1049575411), INT32_C( 1021880877), INT32_C( 2029233077), -INT32_C( 646471134), INT32_C( 1782033255), -INT32_C( 1560143285), -INT32_C( 1808014877), -INT32_C( 1562435179), INT32_C( 258166753), -INT32_C( 1370796807), INT32_C( 287784943), INT32_C( 1223335649), -INT32_C( 1800265399), INT32_C( 506967099), INT32_C( 733180566) }, UINT16_C( 7362) }, { { -INT32_C( 1774688435), INT32_C( 1689956846), INT32_C( 1778816600), INT32_C( 1734383060), -INT32_C( 1672529762), -INT32_C( 798399687), INT32_C( 1044867632), INT32_C( 1731118106), INT32_C( 1744780664), -INT32_C( 277403568), INT32_C( 1655951952), -INT32_C( 1295906541), INT32_C( 2085491266), INT32_C( 854392578), INT32_C( 199072177), INT32_C( 602644472) }, { INT32_C( 1440213253), -INT32_C( 1689956847), -INT32_C( 1778816601), -INT32_C( 1599422482), INT32_C( 1672529761), INT32_C( 798399686), -INT32_C( 927791152), -INT32_C( 1410621786), -INT32_C( 1744780665), -INT32_C( 1221311216), -INT32_C( 1655951953), INT32_C( 1295906540), -INT32_C( 2085491267), INT32_C( 1521698698), -INT32_C( 199072178), -INT32_C( 1952510716) }, UINT16_C(41673) }, { { INT32_C( 4120059), INT32_C( 88068023), INT32_C( 972644661), INT32_C( 1069848687), -INT32_C( 1495964580), -INT32_C( 995051713), INT32_C( 2000019868), -INT32_C( 1425017777), -INT32_C( 1985099141), -INT32_C( 312115224), INT32_C( 608280808), -INT32_C( 371249999), -INT32_C( 1090510320), INT32_C( 1377285814), -INT32_C( 406238057), -INT32_C( 1517102783) }, { -INT32_C( 4120060), INT32_C( 607052274), -INT32_C( 1396402181), -INT32_C( 678058813), INT32_C( 1495964579), -INT32_C( 391364271), -INT32_C( 808487538), INT32_C( 1400136014), INT32_C( 1985099140), INT32_C( 312115223), -INT32_C( 608280809), -INT32_C( 1498262525), INT32_C( 1090510319), -INT32_C( 1406620898), INT32_C( 1870395425), -INT32_C( 574427047) }, UINT16_C(59630) }, { { INT32_C( 1995142522), -INT32_C( 1025328991), INT32_C( 1237123720), INT32_C( 1944581197), -INT32_C( 1632683255), INT32_C( 2001403434), INT32_C( 1825100299), INT32_C( 599571980), INT32_C( 1448627380), INT32_C( 503137172), -INT32_C( 440448360), INT32_C( 2115725344), INT32_C( 1859488131), INT32_C( 293810182), INT32_C( 1990734309), -INT32_C( 125731111) }, { -INT32_C( 1995142523), INT32_C( 1547578820), -INT32_C( 1237123721), -INT32_C( 2030134620), -INT32_C( 1466768222), INT32_C( 398005525), INT32_C( 1797666401), -INT32_C( 198617233), -INT32_C( 1501675807), -INT32_C( 503137173), INT32_C( 1587102906), -INT32_C( 2115725345), -INT32_C( 1859488132), -INT32_C( 324476021), -INT32_C( 1990734310), INT32_C( 125731110) }, UINT16_C( 9722) }, { { -INT32_C( 1236631766), -INT32_C( 726892159), -INT32_C( 418972991), INT32_C( 988710011), INT32_C( 1318750596), -INT32_C( 1543400492), -INT32_C( 213622153), -INT32_C( 1249473866), INT32_C( 1242584808), INT32_C( 164082504), INT32_C( 686869933), -INT32_C( 1827472114), -INT32_C( 955660677), -INT32_C( 1191058623), INT32_C( 1621039920), INT32_C( 1824172932) }, { INT32_C( 1253559041), INT32_C( 726892158), -INT32_C( 1672264629), -INT32_C( 1636977373), -INT32_C( 1318750597), INT32_C( 1543400491), INT32_C( 213622152), INT32_C( 1249473865), -INT32_C( 1097584576), -INT32_C( 102111058), INT32_C( 1335180332), INT32_C( 1827472113), -INT32_C( 1558293896), INT32_C( 1191058622), INT32_C( 39041465), INT32_C( 1867369519) }, UINT16_C(55053) }, { { INT32_C( 2003202614), -INT32_C( 475412712), INT32_C( 1141350272), INT32_C( 1446086632), -INT32_C( 1605510442), INT32_C( 1184231666), INT32_C( 1653332520), -INT32_C( 1518195858), -INT32_C( 103716083), -INT32_C( 845194817), -INT32_C( 603306187), INT32_C( 310425404), INT32_C( 1591703846), -INT32_C( 423016258), INT32_C( 759780642), -INT32_C( 1598267758) }, { -INT32_C( 2003202615), INT32_C( 475412711), -INT32_C( 1141350273), -INT32_C( 1446086633), INT32_C( 70025286), -INT32_C( 353700152), -INT32_C( 585616565), INT32_C( 1518195857), INT32_C( 1239619426), -INT32_C( 815449777), -INT32_C( 1316332135), INT32_C( 475683030), -INT32_C( 1591703847), -INT32_C( 947123588), -INT32_C( 811228098), -INT32_C( 634772872) }, UINT16_C(61296) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__mmask16 r = simde_mm512_test_epi32_mask(a, b); simde_assert_equal_mmask16(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int32_t a_[16]; int32_t b_[16]; simde_test_codegen_random_memory(sizeof(a_), HEDLEY_REINTERPRET_CAST(uint8_t*, a_)); simde_test_codegen_random_memory(sizeof(b_), HEDLEY_REINTERPRET_CAST(uint8_t*, b_)); for (size_t j = 0 ; j < 16 ; j++) if (rand() & 1) a_[j] = ~b_[j]; simde__m512i a = simde_mm512_loadu_epi32(a_); simde__m512i b = simde_mm512_loadu_epi32(b_); simde__mmask16 r = simde_mm512_test_epi32_mask(a, b); simde_test_x86_write_i32x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_mmask16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_test_epi64_mask (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int64_t a[8]; const int64_t b[8]; const simde__mmask8 r; } test_vec[] = { { { -INT64_C( 1935719795857156052), -INT64_C( 6142336982440005382), -INT64_C( 3755735260302931735), INT64_C( 8565924432478488344), -INT64_C( 7625521580720828296), INT64_C( 3735482225277759375), INT64_C( 1389743071009894742), INT64_C( 4211258168981445345) }, { -INT64_C( 118278527101448595), INT64_C( 530621773771449840), INT64_C( 3755735260302931734), -INT64_C( 8565924432478488345), INT64_C( 7625521580720828295), -INT64_C( 3735482225277759376), -INT64_C( 2277422861904188195), -INT64_C( 1416974286643090513) }, UINT8_C(195) }, { { INT64_C( 3457588026753764434), INT64_C( 1717706511224668770), INT64_C( 5859823844452325122), -INT64_C( 136148487240149447), -INT64_C( 4072292484336041004), INT64_C( 7336384931628364992), INT64_C( 899403616361728444), -INT64_C( 4966276177273367448) }, { -INT64_C( 3457588026753764435), -INT64_C( 1717706511224668771), INT64_C( 7682991252262311636), INT64_C( 136148487240149446), INT64_C( 7005422631704761958), -INT64_C( 7336384931628364993), -INT64_C( 899403616361728445), INT64_C( 4966276177273367447) }, UINT8_C( 20) }, { { INT64_C( 8111300885188067376), INT64_C( 8433345625914411573), -INT64_C( 1415332508365564871), -INT64_C( 6383252409235804746), INT64_C( 5059076272894766680), INT64_C( 3611479204207783805), -INT64_C( 2146346760320498508), INT64_C( 3145857604965282112) }, { -INT64_C( 5472754878176835303), -INT64_C( 8433345625914411574), -INT64_C( 8007106683827110144), INT64_C( 6383252409235804745), -INT64_C( 5059076272894766681), -INT64_C( 3611479204207783806), INT64_C( 659405438131680690), -INT64_C( 3145857604965282113) }, UINT8_C( 69) }, { { -INT64_C( 3849252375540350663), INT64_C( 3904726803647939769), -INT64_C( 216416808084101174), INT64_C( 8434442409886415556), INT64_C( 4350871313423223305), INT64_C( 8504883711772806009), -INT64_C( 6152271255572725614), -INT64_C( 8271005281003021549) }, { INT64_C( 3849252375540350662), -INT64_C( 3904726803647939770), INT64_C( 216416808084101173), -INT64_C( 5671441591776678087), -INT64_C( 4389237782810184900), -INT64_C( 8504883711772806010), INT64_C( 6152271255572725613), INT64_C( 7011924461457909682) }, UINT8_C(152) }, { { -INT64_C( 7428643418601908980), INT64_C( 2294962712527383143), -INT64_C( 5612589600369147080), -INT64_C( 5974249584702473137), INT64_C( 6472844327359209748), -INT64_C( 2618330417723800803), -INT64_C( 4939930458487302337), INT64_C( 137830299006646676) }, { INT64_C( 3349624863759605754), INT64_C( 467355425918448966), INT64_C( 5612589600369147079), INT64_C( 5974249584702473136), -INT64_C( 6472844327359209749), INT64_C( 908254077988168780), INT64_C( 4939930458487302336), -INT64_C( 137830299006646677) }, UINT8_C( 35) }, { { INT64_C( 3748689352051745012), INT64_C( 3765597295249760617), INT64_C( 8461435782336917084), -INT64_C( 7396666994511550337), -INT64_C( 6102301345150566269), INT64_C( 5468756357428140262), INT64_C( 3501708573596139009), -INT64_C( 4673070998841589811) }, { -INT64_C( 3748689352051745013), INT64_C( 2957844122011673298), INT64_C( 8976476619900634683), INT64_C( 6553670180077683509), INT64_C( 7755375136595222255), -INT64_C( 5333888708520436925), -INT64_C( 3501708573596139010), INT64_C( 4673070998841589810) }, UINT8_C( 62) }, { { INT64_C( 3340205165359449056), INT64_C( 8961155326498102258), INT64_C( 5187257904649764459), -INT64_C( 7718806074294071207), INT64_C( 4351002021841475629), INT64_C( 2783331170132641387), -INT64_C( 6088879537201316956), -INT64_C( 8404271755238698236) }, { -INT64_C( 3340205165359449057), INT64_C( 4010264786319470234), -INT64_C( 7508926013520240691), INT64_C( 1795458474933612802), -INT64_C( 8222122719232016645), -INT64_C( 2783331170132641388), -INT64_C( 4653973519871652290), -INT64_C( 8879680603371353420) }, UINT8_C(222) }, { { -INT64_C( 7072750869937231124), -INT64_C( 1235259015713123945), -INT64_C( 4453778579372969693), -INT64_C( 6391677988780903072), -INT64_C( 5475290417517831931), INT64_C( 8023331291259204593), -INT64_C( 2934575014531591542), -INT64_C( 7984994343078542901) }, { INT64_C( 7072750869937231123), INT64_C( 1235259015713123944), -INT64_C( 6953033857279573993), INT64_C( 6391677988780903071), INT64_C( 159581583610422626), -INT64_C( 37474756131855391), -INT64_C( 8338930464116371428), INT64_C( 7984994343078542900) }, UINT8_C(116) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__mmask8 r = simde_mm512_test_epi64_mask(a, b); simde_assert_equal_mmask8(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int64_t a_[8]; int64_t b_[8]; simde_test_codegen_random_memory(sizeof(a_), HEDLEY_REINTERPRET_CAST(uint8_t*, a_)); simde_test_codegen_random_memory(sizeof(b_), HEDLEY_REINTERPRET_CAST(uint8_t*, b_)); for (size_t j = 0 ; j < 8 ; j++) if (rand() & 1) a_[j] = ~b_[j]; simde__m512i a = simde_mm512_loadu_epi64(a_); simde__m512i b = simde_mm512_loadu_epi64(b_); simde__mmask8 r = simde_mm512_test_epi64_mask(a, b); simde_test_x86_write_i64x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_mmask8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_test_epi8_mask (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t a[64]; const int8_t b[64]; const simde__mmask64 r; } test_vec[] = { { { -INT8_C( 41), INT8_C( 28), -INT8_C( 10), -INT8_C( 79), INT8_C( 87), INT8_C( 18), -INT8_C( 97), -INT8_C( 8), INT8_C( 52), INT8_C( 80), INT8_C( 67), INT8_C( 113), INT8_C( 122), INT8_C( 64), INT8_C( 57), -INT8_C( 57), INT8_C( 36), INT8_C( 82), -INT8_C( 12), -INT8_C( 67), -INT8_C( 18), INT8_C( 48), -INT8_C( 17), -INT8_C( 14), -INT8_C( 22), -INT8_C( 117), INT8_C( 115), INT8_C( 110), INT8_C( 49), INT8_C( 37), INT8_C( 92), INT8_C( 9), -INT8_C( 34), -INT8_C( 43), -INT8_C( 6), -INT8_C( 3), INT8_C( 54), -INT8_C( 103), INT8_C( 46), -INT8_C( 9), -INT8_C( 18), -INT8_C( 10), INT8_C( 105), -INT8_C( 103), INT8_C( 107), INT8_C( 110), INT8_C( 97), -INT8_C( 112), -INT8_C( 28), INT8_C( 10), INT8_C( 2), -INT8_C( 117), -INT8_C( 29), -INT8_C( 121), INT8_C( 67), -INT8_C( 50), -INT8_C( 90), -INT8_C( 80), INT8_C( 60), -INT8_C( 62), -INT8_C( 35), -INT8_C( 25), INT8_C( 77), -INT8_C( 58) }, { -INT8_C( 22), INT8_C( 71), -INT8_C( 73), INT8_C( 78), -INT8_C( 31), -INT8_C( 97), INT8_C( 70), -INT8_C( 49), -INT8_C( 53), -INT8_C( 81), INT8_C( 55), INT8_C( 54), INT8_C( 84), INT8_C( 84), -INT8_C( 58), INT8_C( 56), INT8_C( 94), -INT8_C( 83), INT8_C( 11), INT8_C( 66), INT8_C( 53), -INT8_C( 49), INT8_C( 16), INT8_C( 72), INT8_MAX, INT8_C( 76), -INT8_C( 116), -INT8_C( 84), -INT8_C( 28), -INT8_C( 38), INT8_C( 115), -INT8_C( 50), INT8_C( 33), INT8_C( 42), INT8_C( 28), INT8_C( 2), -INT8_C( 55), INT8_C( 98), -INT8_C( 47), -INT8_C( 108), INT8_C( 17), INT8_C( 9), -INT8_C( 53), INT8_C( 102), INT8_C( 93), -INT8_C( 111), -INT8_C( 98), -INT8_C( 69), INT8_C( 63), -INT8_C( 87), -INT8_C( 3), INT8_C( 116), INT8_C( 121), INT8_C( 13), -INT8_C( 68), -INT8_C( 8), INT8_C( 89), INT8_C( 72), -INT8_C( 91), INT8_C( 61), INT8_C( 34), INT8_C( 24), INT8_C( 11), INT8_C( 68) }, UINT64_C(14173835750605405431) }, { { INT8_C( 42), -INT8_C( 95), -INT8_C( 42), -INT8_C( 82), INT8_C( 100), INT8_C( 11), INT8_C( 61), INT8_C( 49), -INT8_C( 23), INT8_C( 85), -INT8_C( 2), -INT8_C( 25), -INT8_C( 121), INT8_C( 7), INT8_C( 61), INT8_C( 47), -INT8_C( 52), INT8_MAX, INT8_C( 88), INT8_C( 1), INT8_C( 29), INT8_C( 103), -INT8_C( 119), -INT8_C( 61), -INT8_C( 93), INT8_C( 53), INT8_C( 80), -INT8_C( 71), INT8_C( 51), -INT8_C( 13), INT8_C( 23), INT8_C( 93), INT8_C( 107), -INT8_C( 19), INT8_C( 32), -INT8_C( 7), INT8_C( 40), -INT8_C( 109), -INT8_C( 4), INT8_C( 60), -INT8_C( 110), -INT8_C( 6), INT8_C( 87), INT8_C( 54), INT8_C( 2), -INT8_C( 108), INT8_C( 7), INT8_C( 98), -INT8_C( 74), INT8_C( 100), -INT8_C( 90), -INT8_C( 68), -INT8_C( 53), -INT8_C( 18), INT8_C( 13), INT8_C( 120), -INT8_C( 52), INT8_C( 121), -INT8_C( 37), INT8_C( 0), INT8_C( 81), INT8_C( 64), INT8_C( 93), -INT8_C( 25) }, { INT8_C( 88), INT8_C( 94), INT8_C( 32), INT8_C( 81), -INT8_C( 101), INT8_C( 28), -INT8_C( 115), INT8_C( 46), INT8_C( 22), -INT8_C( 28), INT8_C( 103), INT8_C( 24), INT8_C( 120), -INT8_C( 49), -INT8_C( 38), INT8_C( 46), INT8_C( 51), INT8_MIN, -INT8_C( 89), -INT8_C( 2), -INT8_C( 30), -INT8_C( 29), INT8_C( 118), -INT8_C( 81), INT8_C( 92), -INT8_C( 54), -INT8_C( 81), INT8_C( 70), INT8_C( 53), INT8_C( 12), -INT8_C( 101), -INT8_C( 115), INT8_C( 106), -INT8_C( 69), -INT8_C( 33), INT8_C( 6), -INT8_C( 41), INT8_C( 108), INT8_C( 52), -INT8_C( 19), INT8_C( 81), -INT8_C( 101), INT8_C( 6), -INT8_C( 55), INT8_C( 106), -INT8_C( 32), -INT8_C( 8), -INT8_C( 99), INT8_C( 97), -INT8_C( 97), -INT8_C( 101), INT8_C( 67), -INT8_C( 126), INT8_C( 17), -INT8_C( 14), -INT8_C( 34), -INT8_C( 37), -INT8_C( 95), INT8_C( 36), INT8_C( 16), -INT8_C( 82), -INT8_C( 65), -INT8_C( 98), INT8_C( 24) }, UINT64_C( 4870422836182836961) }, { { INT8_C( 93), -INT8_C( 97), INT8_C( 109), -INT8_C( 63), INT8_C( 100), -INT8_C( 86), INT8_C( 67), -INT8_C( 112), INT8_C( 41), INT8_C( 29), -INT8_C( 31), -INT8_C( 16), -INT8_C( 20), -INT8_C( 72), INT8_C( 60), INT8_C( 74), INT8_C( 89), -INT8_C( 64), INT8_C( 93), -INT8_C( 75), INT8_C( 96), -INT8_C( 60), INT8_C( 111), INT8_C( 116), INT8_C( 20), INT8_C( 53), INT8_C( 105), INT8_C( 22), -INT8_C( 7), INT8_C( 20), INT8_C( 110), INT8_C( 74), -INT8_C( 86), -INT8_C( 73), INT8_C( 11), INT8_C( 14), INT8_C( 98), INT8_C( 79), -INT8_C( 97), -INT8_C( 117), INT8_C( 115), INT8_C( 116), INT8_C( 125), -INT8_C( 57), INT8_C( 93), -INT8_C( 100), -INT8_C( 64), INT8_C( 90), INT8_C( 92), -INT8_C( 21), -INT8_C( 88), INT8_C( 84), INT8_C( 3), -INT8_C( 73), INT8_C( 120), INT8_C( 125), -INT8_C( 20), INT8_C( 50), INT8_C( 125), -INT8_C( 59), -INT8_C( 2), -INT8_C( 100), INT8_C( 28), -INT8_C( 88) }, { -INT8_C( 94), INT8_C( 96), -INT8_C( 74), INT8_C( 36), -INT8_C( 81), INT8_C( 85), -INT8_C( 81), INT8_C( 28), -INT8_C( 42), INT8_C( 15), INT8_C( 116), INT8_C( 15), -INT8_C( 85), INT8_C( 23), -INT8_C( 95), INT8_C( 7), INT8_C( 2), -INT8_C( 24), -INT8_C( 94), -INT8_C( 78), -INT8_C( 97), INT8_C( 26), INT8_C( 47), -INT8_C( 117), -INT8_C( 21), -INT8_C( 83), -INT8_C( 106), -INT8_C( 23), INT8_C( 6), -INT8_C( 21), -INT8_C( 111), -INT8_C( 87), INT8_C( 75), INT8_C( 72), -INT8_C( 51), -INT8_C( 5), -INT8_C( 99), INT8_C( 125), INT8_C( 23), INT8_C( 115), -INT8_C( 116), -INT8_C( 117), -INT8_C( 126), INT8_C( 56), -INT8_C( 94), INT8_C( 35), INT8_C( 63), -INT8_C( 91), INT8_C( 12), -INT8_C( 30), INT8_C( 87), -INT8_C( 85), -INT8_C( 4), -INT8_C( 121), INT8_C( 55), -INT8_C( 24), INT8_C( 52), -INT8_C( 51), -INT8_C( 47), INT8_C( 58), -INT8_C( 72), INT8_C( 99), -INT8_C( 29), INT8_C( 3) }, UINT64_C( 1577105314603005652) }, { { -INT8_C( 8), INT8_C( 28), INT8_C( 84), -INT8_C( 27), -INT8_C( 64), INT8_MAX, INT8_C( 115), -INT8_C( 37), INT8_C( 124), -INT8_C( 99), INT8_C( 88), INT8_C( 33), -INT8_C( 1), INT8_C( 48), INT8_C( 126), -INT8_C( 7), INT8_C( 111), INT8_C( 63), INT8_C( 63), -INT8_C( 100), -INT8_C( 43), INT8_C( 118), INT8_C( 15), -INT8_C( 109), INT8_C( 103), INT8_C( 87), INT8_C( 126), INT8_C( 114), INT8_C( 52), -INT8_C( 94), -INT8_C( 106), -INT8_C( 53), INT8_C( 37), INT8_C( 19), -INT8_C( 49), INT8_C( 17), INT8_MAX, -INT8_C( 55), INT8_C( 86), -INT8_C( 74), -INT8_C( 69), -INT8_C( 115), INT8_C( 29), -INT8_C( 62), -INT8_C( 67), -INT8_C( 105), INT8_C( 80), -INT8_C( 94), -INT8_C( 36), -INT8_C( 112), INT8_C( 63), INT8_C( 78), -INT8_C( 18), INT8_C( 78), -INT8_C( 31), -INT8_C( 19), -INT8_C( 91), INT8_C( 96), INT8_C( 16), -INT8_C( 61), -INT8_C( 95), -INT8_C( 32), -INT8_C( 115), -INT8_C( 58) }, { INT8_C( 105), -INT8_C( 29), -INT8_C( 85), -INT8_C( 5), -INT8_C( 83), INT8_C( 107), -INT8_C( 79), -INT8_C( 69), -INT8_C( 125), INT8_C( 98), -INT8_C( 55), -INT8_C( 34), -INT8_C( 7), -INT8_C( 49), -INT8_C( 127), INT8_C( 114), -INT8_C( 112), -INT8_C( 64), -INT8_C( 64), -INT8_C( 58), INT8_C( 14), -INT8_C( 95), INT8_C( 100), -INT8_C( 77), INT8_C( 1), INT8_C( 116), -INT8_C( 48), -INT8_C( 93), -INT8_C( 53), INT8_C( 93), INT8_C( 105), INT8_C( 52), INT8_C( 65), INT8_C( 21), INT8_C( 48), -INT8_C( 18), INT8_MIN, -INT8_C( 31), -INT8_C( 87), INT8_C( 4), INT8_C( 68), INT8_C( 114), -INT8_C( 30), INT8_C( 61), INT8_C( 66), INT8_C( 99), -INT8_C( 81), -INT8_C( 46), INT8_C( 35), INT8_C( 111), -INT8_C( 104), INT8_C( 49), INT8_C( 17), -INT8_C( 3), -INT8_C( 28), INT8_C( 18), INT8_C( 113), -INT8_C( 76), -INT8_C( 75), INT8_C( 60), INT8_C( 18), INT8_C( 31), INT8_C( 113), INT8_C( 83) }, UINT64_C(14367785333426787577) }, { { INT8_C( 93), -INT8_C( 50), INT8_C( 5), INT8_C( 79), -INT8_C( 118), INT8_C( 116), INT8_C( 113), -INT8_C( 87), INT8_C( 126), INT8_C( 118), INT8_C( 54), INT8_C( 69), INT8_C( 75), INT8_C( 8), INT8_C( 47), -INT8_C( 45), INT8_C( 121), -INT8_C( 36), -INT8_C( 87), INT8_C( 8), INT8_C( 5), INT8_C( 109), INT8_C( 107), INT8_C( 18), INT8_C( 18), INT8_C( 89), -INT8_C( 114), INT8_C( 94), -INT8_C( 103), -INT8_C( 40), INT8_C( 54), INT8_C( 107), INT8_C( 38), -INT8_C( 52), -INT8_C( 70), -INT8_C( 80), -INT8_C( 80), -INT8_C( 68), -INT8_C( 61), INT8_C( 93), -INT8_C( 123), INT8_C( 92), -INT8_C( 5), INT8_C( 17), INT8_C( 118), INT8_C( 42), INT8_C( 82), -INT8_C( 17), INT8_C( 120), -INT8_C( 4), INT8_C( 108), -INT8_C( 67), -INT8_C( 11), INT8_C( 110), -INT8_C( 109), INT8_C( 13), INT8_C( 48), -INT8_C( 96), -INT8_C( 115), -INT8_C( 55), INT8_C( 121), -INT8_C( 61), INT8_C( 52), INT8_C( 100) }, { -INT8_C( 94), -INT8_C( 18), -INT8_C( 6), -INT8_C( 39), -INT8_C( 47), -INT8_C( 117), -INT8_C( 114), INT8_C( 86), INT8_C( 83), -INT8_C( 119), INT8_C( 103), -INT8_C( 55), -INT8_C( 76), -INT8_C( 9), -INT8_C( 72), INT8_C( 44), INT8_C( 1), INT8_C( 35), INT8_C( 86), -INT8_C( 9), -INT8_C( 6), -INT8_C( 110), -INT8_C( 2), INT8_C( 42), INT8_C( 93), INT8_C( 100), -INT8_C( 13), INT8_MIN, INT8_C( 58), INT8_C( 39), -INT8_C( 55), -INT8_C( 36), INT8_C( 21), -INT8_C( 61), -INT8_C( 75), -INT8_C( 26), INT8_C( 79), INT8_C( 67), INT8_C( 60), -INT8_C( 94), -INT8_C( 51), -INT8_C( 93), INT8_C( 108), -INT8_C( 127), -INT8_C( 102), INT8_C( 36), -INT8_C( 83), -INT8_C( 100), INT8_C( 72), INT8_C( 3), -INT8_C( 109), INT8_C( 66), -INT8_C( 107), -INT8_C( 111), INT8_C( 108), -INT8_C( 14), -INT8_C( 11), INT8_C( 95), INT8_C( 114), INT8_C( 47), -INT8_C( 122), INT8_C( 60), INT8_C( 11), -INT8_C( 101) }, UINT64_C( 653511295613619482) }, { { INT8_C( 91), -INT8_C( 55), -INT8_C( 18), -INT8_C( 31), INT8_C( 100), INT8_C( 100), INT8_C( 16), INT8_C( 95), INT8_C( 55), INT8_C( 118), -INT8_C( 117), INT8_C( 9), -INT8_C( 57), -INT8_C( 86), INT8_C( 52), INT8_C( 23), -INT8_C( 43), INT8_C( 118), -INT8_C( 30), -INT8_C( 55), -INT8_C( 58), -INT8_C( 108), INT8_C( 109), INT8_C( 80), -INT8_C( 20), INT8_C( 18), INT8_C( 90), -INT8_C( 121), -INT8_C( 109), -INT8_C( 67), INT8_C( 51), -INT8_C( 39), INT8_C( 46), INT8_C( 107), -INT8_C( 64), -INT8_C( 109), INT8_C( 30), -INT8_C( 15), INT8_C( 59), -INT8_C( 79), INT8_C( 5), -INT8_C( 58), -INT8_C( 70), -INT8_C( 52), INT8_C( 56), -INT8_C( 69), -INT8_C( 29), -INT8_C( 28), INT8_C( 49), INT8_C( 41), -INT8_C( 83), INT8_C( 29), -INT8_C( 30), -INT8_C( 81), INT8_C( 73), INT8_C( 91), INT8_C( 118), -INT8_C( 88), INT8_C( 51), INT8_C( 9), INT8_C( 101), -INT8_C( 80), -INT8_C( 30), INT8_C( 11) }, { -INT8_C( 92), INT8_C( 54), INT8_C( 17), INT8_C( 30), -INT8_C( 101), INT8_C( 77), -INT8_C( 49), -INT8_C( 96), INT8_C( 19), -INT8_C( 119), INT8_C( 108), -INT8_C( 124), INT8_C( 32), INT8_C( 79), -INT8_C( 53), INT8_C( 0), INT8_C( 84), -INT8_C( 119), INT8_C( 29), INT8_C( 54), INT8_C( 57), INT8_C( 107), -INT8_C( 110), -INT8_C( 81), INT8_C( 19), -INT8_C( 19), -INT8_C( 72), INT8_C( 120), INT8_C( 124), -INT8_C( 101), -INT8_C( 125), INT8_C( 32), -INT8_C( 47), -INT8_C( 108), INT8_C( 63), INT8_C( 108), -INT8_C( 31), INT8_C( 14), INT8_C( 12), -INT8_C( 11), -INT8_C( 104), INT8_C( 120), INT8_C( 121), -INT8_C( 72), -INT8_C( 57), INT8_C( 68), -INT8_C( 72), INT8_C( 27), -INT8_C( 50), -INT8_C( 42), INT8_C( 82), INT8_C( 7), INT8_C( 65), -INT8_C( 28), -INT8_C( 74), INT8_C( 84), -INT8_C( 47), INT8_C( 110), -INT8_C( 52), INT8_C( 77), INT8_C( 9), INT8_C( 79), INT8_C( 110), -INT8_C( 37) }, UINT64_C(15832491078507898144) }, { { -INT8_C( 50), INT8_C( 11), INT8_C( 17), INT8_C( 45), -INT8_C( 72), -INT8_C( 66), INT8_C( 22), -INT8_C( 119), -INT8_C( 96), -INT8_C( 123), -INT8_C( 15), INT8_C( 121), -INT8_C( 30), -INT8_C( 83), -INT8_C( 54), -INT8_C( 44), INT8_C( 123), -INT8_C( 121), -INT8_C( 67), INT8_C( 106), INT8_C( 44), INT8_C( 63), -INT8_C( 77), -INT8_C( 40), INT8_C( 0), INT8_C( 41), INT8_C( 0), -INT8_C( 66), -INT8_C( 42), -INT8_C( 49), INT8_C( 59), -INT8_C( 92), -INT8_C( 38), INT8_C( 94), INT8_C( 76), -INT8_C( 33), -INT8_C( 28), -INT8_C( 30), INT8_C( 44), INT8_C( 23), -INT8_C( 23), INT8_C( 29), INT8_C( 121), -INT8_C( 52), -INT8_C( 83), -INT8_C( 93), -INT8_C( 124), INT8_C( 41), INT8_C( 123), -INT8_C( 26), -INT8_C( 109), INT8_C( 85), INT8_C( 114), -INT8_C( 114), INT8_C( 34), -INT8_C( 116), -INT8_C( 72), -INT8_C( 39), -INT8_C( 119), INT8_C( 11), -INT8_C( 88), -INT8_C( 42), INT8_C( 42), INT8_C( 85) }, { INT8_C( 52), -INT8_C( 31), INT8_C( 21), -INT8_C( 46), -INT8_C( 61), INT8_C( 65), -INT8_C( 23), INT8_C( 118), INT8_C( 95), INT8_C( 122), -INT8_C( 6), INT8_C( 12), INT8_C( 29), INT8_C( 82), INT8_C( 53), INT8_C( 71), INT8_C( 104), -INT8_C( 56), -INT8_C( 98), -INT8_C( 38), INT8_C( 37), -INT8_C( 64), INT8_C( 76), INT8_C( 39), -INT8_C( 103), -INT8_C( 42), -INT8_C( 1), INT8_C( 65), -INT8_C( 84), INT8_C( 124), -INT8_C( 60), -INT8_C( 31), INT8_C( 93), -INT8_C( 39), -INT8_C( 77), INT8_C( 32), INT8_C( 27), -INT8_C( 100), -INT8_C( 106), INT8_C( 122), INT8_C( 22), -INT8_C( 112), -INT8_C( 122), INT8_C( 51), -INT8_C( 29), -INT8_C( 68), INT8_C( 123), INT8_C( 75), -INT8_C( 124), INT8_C( 25), INT8_C( 37), -INT8_C( 86), -INT8_C( 39), INT8_C( 113), -INT8_C( 47), INT8_C( 115), INT8_C( 71), -INT8_C( 47), -INT8_C( 76), -INT8_C( 12), INT8_C( 77), INT8_C( 120), -INT8_C( 43), -INT8_C( 86) }, UINT64_C( 3896936268564499479) }, { { -INT8_C( 95), -INT8_C( 68), INT8_C( 116), INT8_C( 68), -INT8_C( 88), -INT8_C( 77), INT8_C( 3), INT8_C( 109), -INT8_C( 75), -INT8_C( 56), INT8_C( 38), INT8_C( 68), -INT8_C( 23), INT8_C( 102), INT8_C( 82), INT8_C( 61), INT8_C( 76), INT8_C( 69), INT8_C( 121), -INT8_C( 121), INT8_C( 101), INT8_C( 26), -INT8_C( 15), -INT8_C( 100), INT8_C( 107), INT8_C( 66), -INT8_C( 14), -INT8_C( 19), INT8_C( 94), -INT8_C( 48), -INT8_C( 30), INT8_C( 54), -INT8_C( 116), INT8_C( 110), INT8_C( 87), INT8_C( 106), INT8_C( 93), INT8_C( 0), -INT8_C( 41), -INT8_C( 40), -INT8_C( 55), -INT8_C( 2), INT8_C( 2), INT8_C( 101), -INT8_C( 117), INT8_C( 45), INT8_C( 46), -INT8_C( 40), INT8_C( 94), INT8_C( 28), -INT8_C( 103), -INT8_C( 61), -INT8_C( 125), -INT8_C( 118), INT8_C( 116), -INT8_C( 106), INT8_C( 31), -INT8_C( 82), -INT8_C( 112), -INT8_C( 58), INT8_C( 126), INT8_C( 35), -INT8_C( 4), INT8_C( 44) }, { INT8_C( 94), INT8_C( 40), INT8_C( 116), -INT8_C( 69), INT8_C( 87), INT8_C( 76), INT8_C( 87), INT8_C( 108), INT8_C( 74), INT8_C( 55), INT8_C( 107), -INT8_C( 43), INT8_C( 100), -INT8_C( 103), -INT8_C( 83), -INT8_C( 62), -INT8_C( 11), -INT8_C( 70), -INT8_C( 122), INT8_C( 120), -INT8_C( 33), -INT8_C( 27), INT8_C( 14), INT8_C( 71), -INT8_C( 108), INT8_C( 107), INT8_C( 13), INT8_C( 18), -INT8_C( 114), INT8_C( 9), INT8_C( 29), -INT8_C( 19), INT8_C( 49), -INT8_C( 111), -INT8_C( 88), -INT8_C( 120), -INT8_C( 35), -INT8_C( 1), -INT8_C( 11), INT8_C( 39), INT8_C( 54), INT8_C( 96), -INT8_C( 3), -INT8_C( 102), -INT8_C( 7), -INT8_C( 86), INT8_C( 93), -INT8_C( 18), INT8_C( 101), -INT8_C( 29), INT8_C( 102), INT8_C( 68), -INT8_C( 56), INT8_C( 117), -INT8_C( 117), INT8_C( 92), -INT8_C( 32), -INT8_C( 104), INT8_C( 111), INT8_C( 110), -INT8_C( 94), -INT8_C( 116), INT8_C( 91), -INT8_C( 45) }, UINT64_C( 6528515597080272070) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__mmask64 r = simde_mm512_test_epi8_mask(a, b); simde_assert_equal_mmask64(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { int8_t a_[64]; int8_t b_[64]; simde_test_codegen_random_memory(sizeof(a_), HEDLEY_REINTERPRET_CAST(uint8_t*, a_)); simde_test_codegen_random_memory(sizeof(b_), HEDLEY_REINTERPRET_CAST(uint8_t*, b_)); for (size_t j = 0 ; j < 64 ; j++) if (rand() & 1) a_[j] = ~b_[j]; simde__m512i a = simde_mm512_loadu_epi8(a_); simde__m512i b = simde_mm512_loadu_epi8(b_); simde__mmask64 r = simde_mm512_test_epi8_mask(a, b); simde_test_x86_write_i8x64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_mmask64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_test_epi16_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_test_epi32_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_test_epi64_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_test_epi8_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_test_epi16_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_test_epi32_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_test_epi64_mask) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_test_epi8_mask) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/unpackhi.c000066400000000000000000007720351400333146700172400ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN unpackhi #include #include #include static int test_simde_mm512_unpackhi_epi8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int8_t a[64]; const int8_t b[64]; const int8_t r[64]; } test_vec[] = { { { -INT8_C( 38), -INT8_C( 108), INT8_C( 78), -INT8_C( 75), INT8_C( 17), -INT8_C( 126), -INT8_C( 86), -INT8_C( 124), -INT8_C( 8), INT8_C( 83), -INT8_C( 101), -INT8_C( 3), INT8_C( 9), INT8_C( 117), INT8_C( 11), INT8_C( 58), -INT8_C( 88), INT8_C( 72), -INT8_C( 104), -INT8_C( 2), INT8_C( 22), INT8_C( 10), -INT8_C( 57), -INT8_C( 70), INT8_C( 115), -INT8_C( 6), -INT8_C( 90), -INT8_C( 111), -INT8_C( 44), INT8_C( 11), -INT8_C( 33), -INT8_C( 82), -INT8_C( 97), INT8_C( 45), INT8_C( 99), -INT8_C( 80), -INT8_C( 80), INT8_C( 13), INT8_C( 52), -INT8_C( 88), INT8_C( 96), -INT8_C( 49), -INT8_C( 91), INT8_C( 105), INT8_C( 68), -INT8_C( 80), -INT8_C( 92), -INT8_C( 19), -INT8_C( 8), INT8_C( 60), -INT8_C( 21), INT8_C( 14), INT8_C( 70), -INT8_C( 78), -INT8_C( 56), -INT8_C( 70), -INT8_C( 84), INT8_C( 111), INT8_C( 75), INT8_MIN, INT8_C( 122), INT8_C( 42), INT8_C( 46), INT8_C( 25) }, { INT8_C( 88), -INT8_C( 111), -INT8_C( 55), INT8_C( 8), -INT8_C( 97), -INT8_C( 3), -INT8_C( 80), -INT8_C( 1), -INT8_C( 52), INT8_C( 85), INT8_C( 105), INT8_C( 16), INT8_C( 6), INT8_C( 13), -INT8_C( 3), -INT8_C( 2), INT8_C( 73), -INT8_C( 24), INT8_C( 13), -INT8_C( 112), -INT8_C( 102), -INT8_C( 43), INT8_C( 74), INT8_C( 70), INT8_C( 68), -INT8_C( 107), -INT8_C( 58), -INT8_C( 66), -INT8_C( 64), -INT8_C( 12), -INT8_C( 41), INT8_C( 24), -INT8_C( 122), -INT8_C( 96), INT8_C( 32), INT8_C( 37), -INT8_C( 99), -INT8_C( 48), INT8_C( 36), INT8_C( 105), INT8_C( 37), -INT8_C( 115), INT8_C( 122), INT8_C( 43), -INT8_C( 102), INT8_C( 119), INT8_C( 42), -INT8_C( 28), INT8_C( 96), INT8_C( 55), INT8_C( 116), -INT8_C( 6), INT8_C( 12), -INT8_C( 66), INT8_C( 65), INT8_C( 81), INT8_C( 83), INT8_C( 7), INT8_C( 15), INT8_C( 19), -INT8_C( 4), -INT8_C( 25), INT8_C( 43), -INT8_C( 126) }, { -INT8_C( 8), -INT8_C( 52), INT8_C( 83), INT8_C( 85), -INT8_C( 101), INT8_C( 105), -INT8_C( 3), INT8_C( 16), INT8_C( 9), INT8_C( 6), INT8_C( 117), INT8_C( 13), INT8_C( 11), -INT8_C( 3), INT8_C( 58), -INT8_C( 2), INT8_C( 115), INT8_C( 68), -INT8_C( 6), -INT8_C( 107), -INT8_C( 90), -INT8_C( 58), -INT8_C( 111), -INT8_C( 66), -INT8_C( 44), -INT8_C( 64), INT8_C( 11), -INT8_C( 12), -INT8_C( 33), -INT8_C( 41), -INT8_C( 82), INT8_C( 24), INT8_C( 96), INT8_C( 37), -INT8_C( 49), -INT8_C( 115), -INT8_C( 91), INT8_C( 122), INT8_C( 105), INT8_C( 43), INT8_C( 68), -INT8_C( 102), -INT8_C( 80), INT8_C( 119), -INT8_C( 92), INT8_C( 42), -INT8_C( 19), -INT8_C( 28), -INT8_C( 84), INT8_C( 83), INT8_C( 111), INT8_C( 7), INT8_C( 75), INT8_C( 15), INT8_MIN, INT8_C( 19), INT8_C( 122), -INT8_C( 4), INT8_C( 42), -INT8_C( 25), INT8_C( 46), INT8_C( 43), INT8_C( 25), -INT8_C( 126) } }, { { -INT8_C( 121), INT8_C( 75), -INT8_C( 89), INT8_C( 37), INT8_C( 27), -INT8_C( 53), -INT8_C( 114), INT8_C( 65), INT8_C( 89), INT8_C( 8), INT8_C( 108), -INT8_C( 13), INT8_MIN, -INT8_C( 106), -INT8_C( 41), -INT8_C( 32), -INT8_C( 51), INT8_C( 75), -INT8_C( 38), -INT8_C( 38), INT8_C( 9), INT8_C( 27), INT8_C( 43), INT8_C( 93), INT8_C( 35), INT8_C( 58), INT8_C( 112), INT8_C( 31), INT8_C( 33), -INT8_C( 100), -INT8_C( 95), -INT8_C( 87), -INT8_C( 25), INT8_C( 72), -INT8_C( 50), INT8_C( 3), INT8_C( 19), INT8_C( 92), INT8_C( 68), INT8_C( 108), INT8_C( 101), -INT8_C( 80), INT8_C( 96), -INT8_C( 27), INT8_C( 71), INT8_C( 55), -INT8_C( 59), INT8_C( 20), -INT8_C( 125), -INT8_C( 97), -INT8_C( 18), -INT8_C( 116), -INT8_C( 69), INT8_C( 25), -INT8_C( 23), -INT8_C( 34), INT8_C( 84), INT8_C( 90), -INT8_C( 3), INT8_C( 117), -INT8_C( 10), -INT8_C( 98), INT8_C( 30), -INT8_C( 35) }, { -INT8_C( 26), -INT8_C( 20), -INT8_C( 32), -INT8_C( 7), INT8_C( 73), INT8_C( 36), INT8_C( 102), -INT8_C( 82), -INT8_C( 43), -INT8_C( 58), -INT8_C( 109), INT8_C( 28), -INT8_C( 3), INT8_C( 88), INT8_C( 48), INT8_MIN, -INT8_C( 9), INT8_C( 31), INT8_C( 13), -INT8_C( 78), INT8_C( 56), -INT8_C( 10), -INT8_C( 112), -INT8_C( 116), INT8_C( 80), -INT8_C( 115), INT8_C( 2), INT8_C( 70), INT8_C( 43), INT8_C( 32), INT8_C( 36), INT8_C( 17), INT8_C( 13), INT8_C( 4), INT8_C( 11), INT8_C( 86), INT8_C( 41), INT8_C( 113), INT8_C( 4), -INT8_C( 2), INT8_C( 55), -INT8_C( 105), INT8_C( 26), INT8_C( 52), -INT8_C( 17), INT8_C( 74), -INT8_C( 75), -INT8_C( 26), INT8_C( 105), -INT8_C( 62), -INT8_C( 103), -INT8_C( 94), -INT8_C( 72), INT8_C( 41), INT8_C( 46), INT8_C( 9), -INT8_C( 73), INT8_C( 48), INT8_C( 79), -INT8_C( 30), INT8_C( 81), INT8_C( 115), -INT8_C( 12), INT8_C( 94) }, { INT8_C( 89), -INT8_C( 43), INT8_C( 8), -INT8_C( 58), INT8_C( 108), -INT8_C( 109), -INT8_C( 13), INT8_C( 28), INT8_MIN, -INT8_C( 3), -INT8_C( 106), INT8_C( 88), -INT8_C( 41), INT8_C( 48), -INT8_C( 32), INT8_MIN, INT8_C( 35), INT8_C( 80), INT8_C( 58), -INT8_C( 115), INT8_C( 112), INT8_C( 2), INT8_C( 31), INT8_C( 70), INT8_C( 33), INT8_C( 43), -INT8_C( 100), INT8_C( 32), -INT8_C( 95), INT8_C( 36), -INT8_C( 87), INT8_C( 17), INT8_C( 101), INT8_C( 55), -INT8_C( 80), -INT8_C( 105), INT8_C( 96), INT8_C( 26), -INT8_C( 27), INT8_C( 52), INT8_C( 71), -INT8_C( 17), INT8_C( 55), INT8_C( 74), -INT8_C( 59), -INT8_C( 75), INT8_C( 20), -INT8_C( 26), INT8_C( 84), -INT8_C( 73), INT8_C( 90), INT8_C( 48), -INT8_C( 3), INT8_C( 79), INT8_C( 117), -INT8_C( 30), -INT8_C( 10), INT8_C( 81), -INT8_C( 98), INT8_C( 115), INT8_C( 30), -INT8_C( 12), -INT8_C( 35), INT8_C( 94) } }, { { INT8_C( 120), -INT8_C( 1), -INT8_C( 76), -INT8_C( 95), INT8_C( 112), -INT8_C( 72), -INT8_C( 97), -INT8_C( 89), INT8_C( 79), -INT8_C( 71), -INT8_C( 37), INT8_C( 62), INT8_C( 3), -INT8_C( 112), INT8_C( 36), INT8_C( 109), INT8_C( 82), -INT8_C( 67), INT8_C( 15), INT8_C( 11), -INT8_C( 25), INT8_C( 61), INT8_C( 20), -INT8_C( 98), INT8_C( 110), INT8_C( 99), INT8_MIN, -INT8_C( 65), -INT8_C( 41), INT8_C( 116), INT8_C( 29), INT8_C( 79), INT8_C( 115), -INT8_C( 47), -INT8_C( 16), -INT8_C( 29), -INT8_C( 119), -INT8_C( 113), -INT8_C( 118), -INT8_C( 40), INT8_C( 72), INT8_C( 102), INT8_C( 22), INT8_C( 75), -INT8_C( 10), INT8_C( 58), -INT8_C( 72), INT8_C( 73), -INT8_C( 8), -INT8_C( 57), INT8_C( 84), -INT8_C( 33), INT8_C( 5), INT8_C( 104), INT8_C( 125), INT8_C( 115), -INT8_C( 53), -INT8_C( 3), INT8_C( 50), -INT8_C( 94), INT8_C( 114), INT8_C( 79), -INT8_C( 15), -INT8_C( 27) }, { INT8_C( 32), -INT8_C( 31), -INT8_C( 55), -INT8_C( 87), INT8_C( 112), INT8_C( 83), -INT8_C( 127), -INT8_C( 72), -INT8_C( 71), -INT8_C( 105), INT8_C( 4), -INT8_C( 80), -INT8_C( 47), -INT8_C( 68), -INT8_C( 7), -INT8_C( 55), -INT8_C( 124), INT8_C( 77), -INT8_C( 88), -INT8_C( 119), -INT8_C( 75), INT8_C( 37), -INT8_C( 4), INT8_MIN, INT8_C( 35), INT8_C( 46), INT8_C( 35), -INT8_C( 107), INT8_C( 125), INT8_C( 20), INT8_C( 122), -INT8_C( 99), -INT8_C( 10), INT8_C( 67), INT8_C( 70), INT8_C( 102), -INT8_C( 105), -INT8_C( 57), INT8_C( 31), INT8_C( 80), INT8_C( 94), INT8_C( 35), INT8_C( 0), INT8_C( 47), -INT8_C( 33), -INT8_C( 7), -INT8_C( 7), INT8_C( 99), INT8_C( 70), -INT8_C( 95), -INT8_C( 20), -INT8_C( 5), -INT8_C( 57), -INT8_C( 24), INT8_C( 124), -INT8_C( 22), INT8_C( 22), -INT8_C( 97), INT8_MAX, -INT8_C( 109), -INT8_C( 77), -INT8_C( 7), INT8_C( 48), -INT8_C( 87) }, { INT8_C( 79), -INT8_C( 71), -INT8_C( 71), -INT8_C( 105), -INT8_C( 37), INT8_C( 4), INT8_C( 62), -INT8_C( 80), INT8_C( 3), -INT8_C( 47), -INT8_C( 112), -INT8_C( 68), INT8_C( 36), -INT8_C( 7), INT8_C( 109), -INT8_C( 55), INT8_C( 110), INT8_C( 35), INT8_C( 99), INT8_C( 46), INT8_MIN, INT8_C( 35), -INT8_C( 65), -INT8_C( 107), -INT8_C( 41), INT8_C( 125), INT8_C( 116), INT8_C( 20), INT8_C( 29), INT8_C( 122), INT8_C( 79), -INT8_C( 99), INT8_C( 72), INT8_C( 94), INT8_C( 102), INT8_C( 35), INT8_C( 22), INT8_C( 0), INT8_C( 75), INT8_C( 47), -INT8_C( 10), -INT8_C( 33), INT8_C( 58), -INT8_C( 7), -INT8_C( 72), -INT8_C( 7), INT8_C( 73), INT8_C( 99), -INT8_C( 53), INT8_C( 22), -INT8_C( 3), -INT8_C( 97), INT8_C( 50), INT8_MAX, -INT8_C( 94), -INT8_C( 109), INT8_C( 114), -INT8_C( 77), INT8_C( 79), -INT8_C( 7), -INT8_C( 15), INT8_C( 48), -INT8_C( 27), -INT8_C( 87) } }, { { INT8_C( 61), INT8_C( 118), INT8_C( 16), -INT8_C( 44), INT8_C( 61), INT8_C( 47), INT8_C( 36), -INT8_C( 101), INT8_C( 82), INT8_C( 37), -INT8_C( 53), INT8_C( 49), INT8_C( 30), -INT8_C( 60), -INT8_C( 107), INT8_C( 101), INT8_C( 101), -INT8_C( 127), INT8_C( 96), INT8_C( 44), INT8_C( 106), -INT8_C( 36), INT8_C( 22), INT8_MIN, INT8_C( 123), -INT8_C( 107), INT8_C( 20), INT8_C( 47), -INT8_C( 113), INT8_C( 68), -INT8_C( 40), -INT8_C( 52), -INT8_C( 69), -INT8_C( 24), -INT8_C( 96), -INT8_C( 8), INT8_C( 23), -INT8_C( 60), -INT8_C( 108), INT8_C( 105), -INT8_C( 23), INT8_C( 95), -INT8_C( 101), INT8_C( 8), INT8_C( 35), INT8_C( 48), INT8_C( 109), -INT8_C( 120), -INT8_C( 79), -INT8_C( 51), -INT8_C( 75), INT8_C( 27), -INT8_C( 86), -INT8_C( 53), -INT8_C( 100), INT8_C( 37), INT8_C( 97), -INT8_C( 80), INT8_C( 84), -INT8_C( 16), -INT8_C( 12), INT8_C( 45), -INT8_C( 68), -INT8_C( 81) }, { INT8_C( 21), INT8_C( 92), -INT8_C( 88), INT8_C( 45), INT8_C( 32), INT8_C( 60), -INT8_C( 106), INT8_C( 10), -INT8_C( 101), INT8_C( 49), INT8_C( 18), -INT8_C( 66), INT8_C( 97), INT8_MAX, INT8_C( 70), INT8_C( 19), INT8_C( 76), -INT8_C( 5), INT8_C( 46), -INT8_C( 10), -INT8_C( 57), -INT8_C( 54), INT8_C( 28), INT8_C( 40), INT8_C( 122), INT8_C( 112), INT8_C( 24), INT8_C( 111), -INT8_C( 99), -INT8_C( 44), INT8_C( 30), -INT8_C( 77), INT8_C( 48), -INT8_C( 58), -INT8_C( 32), INT8_C( 80), INT8_C( 2), INT8_C( 118), INT8_C( 90), -INT8_C( 99), -INT8_C( 88), INT8_C( 108), INT8_C( 91), INT8_C( 9), -INT8_C( 21), -INT8_C( 94), INT8_C( 28), INT8_C( 56), -INT8_C( 99), INT8_C( 75), INT8_C( 46), INT8_C( 100), INT8_C( 21), INT8_C( 74), -INT8_C( 116), -INT8_C( 112), -INT8_C( 69), -INT8_C( 92), -INT8_C( 1), INT8_C( 88), INT8_C( 120), INT8_C( 29), INT8_C( 11), -INT8_C( 88) }, { INT8_C( 82), -INT8_C( 101), INT8_C( 37), INT8_C( 49), -INT8_C( 53), INT8_C( 18), INT8_C( 49), -INT8_C( 66), INT8_C( 30), INT8_C( 97), -INT8_C( 60), INT8_MAX, -INT8_C( 107), INT8_C( 70), INT8_C( 101), INT8_C( 19), INT8_C( 123), INT8_C( 122), -INT8_C( 107), INT8_C( 112), INT8_C( 20), INT8_C( 24), INT8_C( 47), INT8_C( 111), -INT8_C( 113), -INT8_C( 99), INT8_C( 68), -INT8_C( 44), -INT8_C( 40), INT8_C( 30), -INT8_C( 52), -INT8_C( 77), -INT8_C( 23), -INT8_C( 88), INT8_C( 95), INT8_C( 108), -INT8_C( 101), INT8_C( 91), INT8_C( 8), INT8_C( 9), INT8_C( 35), -INT8_C( 21), INT8_C( 48), -INT8_C( 94), INT8_C( 109), INT8_C( 28), -INT8_C( 120), INT8_C( 56), INT8_C( 97), -INT8_C( 69), -INT8_C( 80), -INT8_C( 92), INT8_C( 84), -INT8_C( 1), -INT8_C( 16), INT8_C( 88), -INT8_C( 12), INT8_C( 120), INT8_C( 45), INT8_C( 29), -INT8_C( 68), INT8_C( 11), -INT8_C( 81), -INT8_C( 88) } }, { { -INT8_C( 28), -INT8_C( 21), -INT8_C( 7), -INT8_C( 26), INT8_C( 98), INT8_C( 83), -INT8_C( 124), INT8_C( 10), -INT8_C( 64), -INT8_C( 33), INT8_C( 19), -INT8_C( 85), -INT8_C( 127), INT8_C( 48), -INT8_C( 29), INT8_C( 31), INT8_C( 123), INT8_C( 18), -INT8_C( 125), -INT8_C( 112), INT8_C( 92), INT8_C( 16), INT8_C( 32), INT8_C( 23), -INT8_C( 76), INT8_C( 31), INT8_C( 112), INT8_C( 45), INT8_C( 61), INT8_C( 123), -INT8_C( 43), INT8_C( 33), INT8_C( 103), -INT8_C( 50), INT8_C( 7), -INT8_C( 55), INT8_C( 34), -INT8_C( 117), -INT8_C( 45), -INT8_C( 30), INT8_C( 107), -INT8_C( 26), -INT8_C( 115), -INT8_C( 20), INT8_C( 22), INT8_C( 113), INT8_C( 11), -INT8_C( 111), -INT8_C( 125), -INT8_C( 113), INT8_C( 34), -INT8_C( 33), -INT8_C( 97), INT8_C( 66), -INT8_C( 9), INT8_C( 83), INT8_C( 98), INT8_C( 103), INT8_MIN, -INT8_C( 97), -INT8_C( 30), INT8_C( 86), -INT8_C( 64), INT8_C( 73) }, { INT8_C( 36), -INT8_C( 57), INT8_C( 18), INT8_C( 70), INT8_C( 83), -INT8_C( 27), INT8_C( 40), -INT8_C( 66), -INT8_C( 52), -INT8_C( 74), -INT8_C( 86), -INT8_C( 30), INT8_C( 39), -INT8_C( 74), INT8_C( 116), -INT8_C( 86), INT8_C( 69), -INT8_C( 106), -INT8_C( 119), -INT8_C( 28), -INT8_C( 40), INT8_MIN, INT8_C( 55), INT8_C( 58), -INT8_C( 25), -INT8_C( 72), -INT8_C( 39), -INT8_C( 54), INT8_C( 14), -INT8_C( 103), INT8_C( 19), INT8_C( 50), INT8_C( 97), INT8_C( 38), INT8_C( 121), -INT8_C( 76), INT8_C( 11), -INT8_C( 95), INT8_C( 114), -INT8_C( 41), INT8_C( 87), INT8_C( 28), -INT8_C( 70), INT8_C( 126), -INT8_C( 46), INT8_C( 46), INT8_C( 40), INT8_C( 23), -INT8_C( 60), -INT8_C( 78), -INT8_C( 5), -INT8_C( 100), INT8_C( 50), INT8_C( 51), -INT8_C( 41), INT8_C( 26), -INT8_C( 21), -INT8_C( 80), -INT8_C( 28), -INT8_C( 7), INT8_C( 74), -INT8_C( 9), INT8_C( 43), -INT8_C( 85) }, { -INT8_C( 64), -INT8_C( 52), -INT8_C( 33), -INT8_C( 74), INT8_C( 19), -INT8_C( 86), -INT8_C( 85), -INT8_C( 30), -INT8_C( 127), INT8_C( 39), INT8_C( 48), -INT8_C( 74), -INT8_C( 29), INT8_C( 116), INT8_C( 31), -INT8_C( 86), -INT8_C( 76), -INT8_C( 25), INT8_C( 31), -INT8_C( 72), INT8_C( 112), -INT8_C( 39), INT8_C( 45), -INT8_C( 54), INT8_C( 61), INT8_C( 14), INT8_C( 123), -INT8_C( 103), -INT8_C( 43), INT8_C( 19), INT8_C( 33), INT8_C( 50), INT8_C( 107), INT8_C( 87), -INT8_C( 26), INT8_C( 28), -INT8_C( 115), -INT8_C( 70), -INT8_C( 20), INT8_C( 126), INT8_C( 22), -INT8_C( 46), INT8_C( 113), INT8_C( 46), INT8_C( 11), INT8_C( 40), -INT8_C( 111), INT8_C( 23), INT8_C( 98), -INT8_C( 21), INT8_C( 103), -INT8_C( 80), INT8_MIN, -INT8_C( 28), -INT8_C( 97), -INT8_C( 7), -INT8_C( 30), INT8_C( 74), INT8_C( 86), -INT8_C( 9), -INT8_C( 64), INT8_C( 43), INT8_C( 73), -INT8_C( 85) } }, { { INT8_C( 29), -INT8_C( 92), INT8_C( 95), INT8_C( 41), INT8_C( 70), -INT8_C( 47), INT8_C( 0), -INT8_C( 99), -INT8_C( 19), -INT8_C( 70), INT8_C( 28), -INT8_C( 64), -INT8_C( 24), INT8_C( 68), -INT8_C( 41), -INT8_C( 84), -INT8_C( 10), -INT8_C( 45), INT8_C( 73), INT8_C( 41), INT8_C( 6), INT8_C( 32), INT8_C( 67), -INT8_C( 15), -INT8_C( 48), INT8_C( 39), -INT8_C( 22), INT8_C( 26), INT8_C( 30), INT8_C( 21), -INT8_C( 59), INT8_C( 60), -INT8_C( 70), INT8_C( 36), INT8_C( 101), INT8_C( 0), -INT8_C( 11), INT8_C( 101), -INT8_C( 99), -INT8_C( 29), INT8_C( 32), -INT8_C( 71), -INT8_C( 93), INT8_C( 8), -INT8_C( 2), INT8_C( 122), -INT8_C( 75), -INT8_C( 12), INT8_C( 77), -INT8_C( 2), INT8_C( 29), INT8_C( 83), INT8_C( 30), INT8_C( 96), INT8_C( 68), -INT8_C( 18), -INT8_C( 121), INT8_C( 46), INT8_C( 9), -INT8_C( 90), INT8_C( 68), -INT8_C( 50), -INT8_C( 30), -INT8_C( 2) }, { -INT8_C( 13), INT8_C( 71), -INT8_C( 2), -INT8_C( 24), -INT8_C( 84), -INT8_C( 101), -INT8_C( 53), -INT8_C( 52), INT8_C( 85), INT8_C( 110), -INT8_C( 43), INT8_C( 83), -INT8_C( 23), -INT8_C( 118), INT8_C( 71), INT8_C( 54), -INT8_C( 120), INT8_C( 101), -INT8_C( 118), -INT8_C( 90), -INT8_C( 59), -INT8_C( 50), -INT8_C( 108), INT8_C( 77), -INT8_C( 3), -INT8_C( 99), -INT8_C( 13), INT8_C( 65), INT8_C( 108), -INT8_C( 43), INT8_C( 63), INT8_C( 95), INT8_C( 28), INT8_C( 61), INT8_C( 71), -INT8_C( 56), -INT8_C( 40), INT8_C( 19), -INT8_C( 107), INT8_C( 45), -INT8_C( 127), INT8_C( 106), INT8_MIN, INT8_C( 106), -INT8_C( 12), -INT8_C( 56), -INT8_C( 95), INT8_C( 124), INT8_C( 45), INT8_C( 43), INT8_C( 34), -INT8_C( 14), -INT8_C( 7), -INT8_C( 74), INT8_C( 63), -INT8_C( 10), INT8_C( 84), INT8_C( 50), INT8_C( 55), -INT8_C( 64), INT8_C( 7), INT8_C( 118), INT8_C( 31), INT8_C( 35) }, { -INT8_C( 19), INT8_C( 85), -INT8_C( 70), INT8_C( 110), INT8_C( 28), -INT8_C( 43), -INT8_C( 64), INT8_C( 83), -INT8_C( 24), -INT8_C( 23), INT8_C( 68), -INT8_C( 118), -INT8_C( 41), INT8_C( 71), -INT8_C( 84), INT8_C( 54), -INT8_C( 48), -INT8_C( 3), INT8_C( 39), -INT8_C( 99), -INT8_C( 22), -INT8_C( 13), INT8_C( 26), INT8_C( 65), INT8_C( 30), INT8_C( 108), INT8_C( 21), -INT8_C( 43), -INT8_C( 59), INT8_C( 63), INT8_C( 60), INT8_C( 95), INT8_C( 32), -INT8_C( 127), -INT8_C( 71), INT8_C( 106), -INT8_C( 93), INT8_MIN, INT8_C( 8), INT8_C( 106), -INT8_C( 2), -INT8_C( 12), INT8_C( 122), -INT8_C( 56), -INT8_C( 75), -INT8_C( 95), -INT8_C( 12), INT8_C( 124), -INT8_C( 121), INT8_C( 84), INT8_C( 46), INT8_C( 50), INT8_C( 9), INT8_C( 55), -INT8_C( 90), -INT8_C( 64), INT8_C( 68), INT8_C( 7), -INT8_C( 50), INT8_C( 118), -INT8_C( 30), INT8_C( 31), -INT8_C( 2), INT8_C( 35) } }, { { -INT8_C( 77), INT8_C( 102), -INT8_C( 20), -INT8_C( 116), INT8_C( 121), -INT8_C( 127), -INT8_C( 71), -INT8_C( 5), -INT8_C( 21), INT8_C( 58), INT8_C( 101), -INT8_C( 33), INT8_C( 2), INT8_C( 6), INT8_C( 91), INT8_C( 47), INT8_C( 49), INT8_C( 125), INT8_C( 33), INT8_C( 43), INT8_C( 51), INT8_C( 97), INT8_C( 33), -INT8_C( 121), -INT8_C( 109), INT8_C( 89), INT8_C( 71), -INT8_C( 101), -INT8_C( 49), INT8_C( 102), -INT8_C( 66), -INT8_C( 125), -INT8_C( 51), -INT8_C( 86), INT8_C( 15), INT8_C( 70), INT8_C( 43), -INT8_C( 56), INT8_C( 65), INT8_C( 22), INT8_C( 2), -INT8_C( 89), -INT8_C( 11), INT8_C( 4), -INT8_C( 83), INT8_C( 80), INT8_C( 51), -INT8_C( 33), -INT8_C( 51), INT8_C( 85), INT8_C( 10), INT8_C( 1), -INT8_C( 74), INT8_C( 43), -INT8_C( 120), INT8_C( 73), -INT8_C( 124), -INT8_C( 48), -INT8_C( 28), INT8_C( 84), INT8_C( 54), -INT8_C( 93), -INT8_C( 41), INT8_C( 3) }, { INT8_C( 77), -INT8_C( 26), INT8_C( 74), INT8_C( 121), -INT8_C( 82), -INT8_C( 117), -INT8_C( 113), -INT8_C( 79), INT8_C( 50), -INT8_C( 123), -INT8_C( 75), -INT8_C( 32), -INT8_C( 43), -INT8_C( 23), -INT8_C( 65), -INT8_C( 93), INT8_C( 62), -INT8_C( 55), -INT8_C( 92), -INT8_C( 12), -INT8_C( 12), INT8_C( 44), INT8_C( 61), INT8_C( 121), -INT8_C( 4), INT8_C( 34), -INT8_C( 51), INT8_C( 51), -INT8_C( 59), -INT8_C( 92), INT8_C( 54), INT8_C( 18), -INT8_C( 118), INT8_MIN, -INT8_C( 117), INT8_C( 56), INT8_C( 12), INT8_C( 27), -INT8_C( 23), INT8_C( 62), -INT8_C( 96), -INT8_C( 97), INT8_C( 30), INT8_C( 117), -INT8_C( 120), -INT8_C( 35), INT8_C( 24), -INT8_C( 58), -INT8_C( 90), -INT8_C( 68), -INT8_C( 70), -INT8_C( 101), -INT8_C( 23), -INT8_C( 9), INT8_C( 20), -INT8_C( 27), INT8_C( 25), -INT8_C( 31), INT8_C( 24), -INT8_C( 34), -INT8_C( 123), INT8_C( 79), -INT8_C( 15), INT8_C( 15) }, { -INT8_C( 21), INT8_C( 50), INT8_C( 58), -INT8_C( 123), INT8_C( 101), -INT8_C( 75), -INT8_C( 33), -INT8_C( 32), INT8_C( 2), -INT8_C( 43), INT8_C( 6), -INT8_C( 23), INT8_C( 91), -INT8_C( 65), INT8_C( 47), -INT8_C( 93), -INT8_C( 109), -INT8_C( 4), INT8_C( 89), INT8_C( 34), INT8_C( 71), -INT8_C( 51), -INT8_C( 101), INT8_C( 51), -INT8_C( 49), -INT8_C( 59), INT8_C( 102), -INT8_C( 92), -INT8_C( 66), INT8_C( 54), -INT8_C( 125), INT8_C( 18), INT8_C( 2), -INT8_C( 96), -INT8_C( 89), -INT8_C( 97), -INT8_C( 11), INT8_C( 30), INT8_C( 4), INT8_C( 117), -INT8_C( 83), -INT8_C( 120), INT8_C( 80), -INT8_C( 35), INT8_C( 51), INT8_C( 24), -INT8_C( 33), -INT8_C( 58), -INT8_C( 124), INT8_C( 25), -INT8_C( 48), -INT8_C( 31), -INT8_C( 28), INT8_C( 24), INT8_C( 84), -INT8_C( 34), INT8_C( 54), -INT8_C( 123), -INT8_C( 93), INT8_C( 79), -INT8_C( 41), -INT8_C( 15), INT8_C( 3), INT8_C( 15) } }, { { -INT8_C( 49), INT8_C( 124), INT8_C( 71), -INT8_C( 37), -INT8_C( 105), INT8_C( 49), INT8_C( 26), INT8_C( 55), -INT8_C( 48), INT8_C( 56), -INT8_C( 83), INT8_C( 88), INT8_C( 22), -INT8_C( 59), INT8_C( 30), -INT8_C( 68), -INT8_C( 126), -INT8_C( 40), INT8_C( 87), INT8_C( 107), -INT8_C( 49), INT8_C( 107), INT8_C( 80), -INT8_C( 23), INT8_C( 76), INT8_C( 105), -INT8_C( 57), -INT8_C( 47), -INT8_C( 72), -INT8_C( 72), -INT8_C( 32), -INT8_C( 121), INT8_C( 53), INT8_C( 40), INT8_C( 99), -INT8_C( 52), INT8_C( 89), INT8_C( 125), INT8_C( 4), INT8_C( 41), -INT8_C( 75), -INT8_C( 79), -INT8_C( 127), -INT8_C( 53), INT8_C( 118), -INT8_C( 97), -INT8_C( 120), -INT8_C( 8), INT8_C( 119), -INT8_C( 33), INT8_C( 99), INT8_C( 70), INT8_C( 75), -INT8_C( 76), INT8_C( 47), -INT8_C( 105), INT8_C( 29), -INT8_C( 9), INT8_C( 105), -INT8_C( 43), -INT8_C( 81), INT8_C( 73), INT8_C( 92), -INT8_C( 28) }, { INT8_C( 113), -INT8_C( 65), -INT8_C( 79), -INT8_C( 54), INT8_C( 60), -INT8_C( 75), -INT8_C( 13), -INT8_C( 14), INT8_C( 102), INT8_C( 116), -INT8_C( 67), -INT8_C( 36), INT8_C( 19), INT8_C( 69), -INT8_C( 43), -INT8_C( 118), INT8_C( 37), INT8_C( 56), -INT8_C( 47), INT8_C( 112), -INT8_C( 20), INT8_C( 0), INT8_C( 7), INT8_C( 9), -INT8_C( 9), INT8_C( 112), -INT8_C( 34), -INT8_C( 89), -INT8_C( 70), INT8_C( 59), -INT8_C( 117), INT8_C( 43), -INT8_C( 6), INT8_C( 60), -INT8_C( 10), INT8_C( 55), -INT8_C( 15), -INT8_C( 23), INT8_C( 41), INT8_C( 87), INT8_C( 94), -INT8_C( 26), INT8_C( 52), INT8_C( 113), INT8_C( 44), INT8_C( 9), -INT8_C( 4), INT8_C( 81), INT8_C( 65), -INT8_C( 51), -INT8_C( 63), INT8_C( 46), -INT8_C( 51), -INT8_C( 56), INT8_C( 55), -INT8_C( 59), INT8_C( 57), INT8_C( 22), INT8_C( 108), -INT8_C( 13), INT8_C( 81), -INT8_C( 9), INT8_C( 30), INT8_C( 75) }, { -INT8_C( 48), INT8_C( 102), INT8_C( 56), INT8_C( 116), -INT8_C( 83), -INT8_C( 67), INT8_C( 88), -INT8_C( 36), INT8_C( 22), INT8_C( 19), -INT8_C( 59), INT8_C( 69), INT8_C( 30), -INT8_C( 43), -INT8_C( 68), -INT8_C( 118), INT8_C( 76), -INT8_C( 9), INT8_C( 105), INT8_C( 112), -INT8_C( 57), -INT8_C( 34), -INT8_C( 47), -INT8_C( 89), -INT8_C( 72), -INT8_C( 70), -INT8_C( 72), INT8_C( 59), -INT8_C( 32), -INT8_C( 117), -INT8_C( 121), INT8_C( 43), -INT8_C( 75), INT8_C( 94), -INT8_C( 79), -INT8_C( 26), -INT8_C( 127), INT8_C( 52), -INT8_C( 53), INT8_C( 113), INT8_C( 118), INT8_C( 44), -INT8_C( 97), INT8_C( 9), -INT8_C( 120), -INT8_C( 4), -INT8_C( 8), INT8_C( 81), INT8_C( 29), INT8_C( 57), -INT8_C( 9), INT8_C( 22), INT8_C( 105), INT8_C( 108), -INT8_C( 43), -INT8_C( 13), -INT8_C( 81), INT8_C( 81), INT8_C( 73), -INT8_C( 9), INT8_C( 92), INT8_C( 30), -INT8_C( 28), INT8_C( 75) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_unpackhi_epi8(a, b); simde_test_x86_assert_equal_i8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_unpackhi_epi8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int8_t src[64]; const simde__mmask64 k; const int8_t a[64]; const int8_t b[64]; const int8_t r[64]; } test_vec[] = { { { -INT8_C( 106), -INT8_C( 100), INT8_C( 33), -INT8_C( 32), -INT8_C( 88), -INT8_C( 74), -INT8_C( 39), -INT8_C( 122), INT8_C( 117), -INT8_C( 7), -INT8_C( 83), INT8_C( 69), -INT8_C( 23), -INT8_C( 53), -INT8_C( 6), INT8_C( 78), -INT8_C( 78), INT8_C( 43), -INT8_C( 27), -INT8_C( 111), -INT8_C( 112), INT8_C( 0), -INT8_C( 107), INT8_C( 42), -INT8_C( 99), -INT8_C( 2), INT8_C( 93), INT8_C( 2), INT8_C( 121), -INT8_C( 15), INT8_C( 68), -INT8_C( 112), -INT8_C( 9), -INT8_C( 93), INT8_C( 60), -INT8_C( 87), -INT8_C( 69), -INT8_C( 81), -INT8_C( 55), INT8_C( 72), -INT8_C( 107), -INT8_C( 87), -INT8_C( 93), INT8_C( 61), INT8_C( 51), -INT8_C( 103), INT8_C( 13), -INT8_C( 35), -INT8_C( 15), -INT8_C( 3), -INT8_C( 3), -INT8_C( 79), INT8_C( 0), INT8_C( 54), INT8_C( 42), INT8_C( 31), INT8_C( 98), INT8_C( 29), -INT8_C( 117), INT8_C( 53), INT8_C( 47), INT8_C( 23), INT8_C( 76), -INT8_C( 12) }, UINT64_C(15624862570805900888), { INT8_C( 30), -INT8_C( 5), INT8_C( 117), INT8_C( 77), -INT8_C( 126), -INT8_C( 33), -INT8_C( 26), -INT8_C( 81), INT8_C( 7), -INT8_C( 96), -INT8_C( 38), INT8_C( 33), -INT8_C( 39), INT8_C( 47), INT8_C( 94), -INT8_C( 45), INT8_C( 0), INT8_C( 79), INT8_C( 78), -INT8_C( 47), INT8_C( 102), -INT8_C( 3), INT8_C( 33), -INT8_C( 89), -INT8_C( 103), INT8_C( 103), -INT8_C( 31), -INT8_C( 12), -INT8_C( 97), INT8_C( 72), -INT8_C( 59), -INT8_C( 105), -INT8_C( 58), INT8_C( 77), INT8_C( 31), INT8_C( 42), INT8_C( 29), -INT8_C( 67), INT8_C( 120), INT8_C( 114), INT8_C( 81), -INT8_C( 91), -INT8_C( 50), INT8_C( 76), -INT8_C( 107), -INT8_C( 99), -INT8_C( 105), INT8_C( 51), INT8_C( 113), -INT8_C( 68), INT8_C( 32), INT8_C( 12), -INT8_C( 106), INT8_C( 42), INT8_C( 5), -INT8_C( 2), -INT8_C( 76), INT8_C( 39), -INT8_C( 124), -INT8_C( 100), INT8_C( 99), -INT8_C( 71), -INT8_C( 14), -INT8_C( 46) }, { -INT8_C( 74), INT8_C( 38), INT8_C( 81), INT8_C( 18), -INT8_C( 32), INT8_C( 5), -INT8_C( 64), INT8_C( 28), -INT8_C( 35), INT8_C( 6), INT8_C( 41), INT8_C( 101), INT8_C( 71), -INT8_C( 120), INT8_C( 104), INT8_C( 1), -INT8_C( 35), INT8_C( 0), -INT8_C( 21), INT8_C( 41), -INT8_C( 99), -INT8_C( 25), -INT8_C( 85), -INT8_C( 94), INT8_C( 107), INT8_C( 125), INT8_C( 58), INT8_C( 88), INT8_C( 88), INT8_C( 93), INT8_C( 70), INT8_C( 24), -INT8_C( 126), -INT8_C( 99), INT8_C( 8), INT8_C( 47), -INT8_C( 114), -INT8_C( 79), INT8_C( 82), INT8_C( 8), -INT8_C( 122), -INT8_C( 63), -INT8_C( 54), -INT8_C( 81), -INT8_C( 69), INT8_C( 37), -INT8_C( 117), INT8_C( 124), -INT8_C( 28), INT8_C( 111), INT8_C( 74), -INT8_C( 123), -INT8_C( 45), -INT8_C( 69), INT8_C( 54), -INT8_C( 102), -INT8_C( 11), -INT8_C( 44), -INT8_C( 72), -INT8_C( 104), INT8_C( 88), INT8_C( 45), INT8_C( 75), INT8_C( 118) }, { -INT8_C( 106), -INT8_C( 100), INT8_C( 33), INT8_C( 6), -INT8_C( 38), -INT8_C( 74), INT8_C( 33), -INT8_C( 122), INT8_C( 117), INT8_C( 71), INT8_C( 47), INT8_C( 69), INT8_C( 94), INT8_C( 104), -INT8_C( 6), INT8_C( 1), -INT8_C( 78), INT8_C( 107), INT8_C( 103), INT8_C( 125), -INT8_C( 112), INT8_C( 58), -INT8_C( 107), INT8_C( 42), -INT8_C( 97), -INT8_C( 2), INT8_C( 72), INT8_C( 93), -INT8_C( 59), -INT8_C( 15), -INT8_C( 105), -INT8_C( 112), INT8_C( 81), -INT8_C( 93), INT8_C( 60), -INT8_C( 63), -INT8_C( 50), -INT8_C( 54), -INT8_C( 55), -INT8_C( 81), -INT8_C( 107), -INT8_C( 87), -INT8_C( 93), INT8_C( 37), INT8_C( 51), -INT8_C( 117), INT8_C( 13), INT8_C( 124), -INT8_C( 15), -INT8_C( 11), INT8_C( 39), -INT8_C( 79), -INT8_C( 124), INT8_C( 54), -INT8_C( 100), -INT8_C( 104), INT8_C( 98), INT8_C( 29), -INT8_C( 117), INT8_C( 45), -INT8_C( 14), INT8_C( 23), -INT8_C( 46), INT8_C( 118) } }, { { INT8_C( 46), -INT8_C( 20), INT8_C( 7), INT8_C( 75), -INT8_C( 99), -INT8_C( 90), -INT8_C( 83), -INT8_C( 45), INT8_C( 17), -INT8_C( 2), -INT8_C( 126), -INT8_C( 107), INT8_C( 107), INT8_C( 96), -INT8_C( 29), -INT8_C( 44), INT8_C( 11), -INT8_C( 50), -INT8_C( 59), -INT8_C( 64), -INT8_C( 62), INT8_C( 41), -INT8_C( 84), -INT8_C( 104), INT8_C( 76), INT8_C( 87), -INT8_C( 90), INT8_C( 34), INT8_C( 86), INT8_C( 54), INT8_C( 40), -INT8_C( 41), INT8_C( 23), -INT8_C( 34), INT8_C( 104), INT8_C( 120), INT8_C( 60), INT8_C( 90), INT8_C( 69), INT8_C( 99), INT8_C( 27), -INT8_C( 72), INT8_C( 24), -INT8_C( 117), INT8_C( 20), -INT8_C( 18), INT8_C( 28), -INT8_C( 108), INT8_C( 70), INT8_C( 42), -INT8_C( 34), -INT8_C( 53), -INT8_C( 115), INT8_C( 60), INT8_C( 87), -INT8_C( 53), -INT8_C( 50), -INT8_C( 4), -INT8_C( 99), INT8_C( 15), -INT8_C( 103), INT8_C( 124), INT8_C( 16), -INT8_C( 50) }, UINT64_C(11122572813131943228), { -INT8_C( 61), -INT8_C( 92), INT8_C( 99), INT8_C( 1), -INT8_C( 86), INT8_C( 87), -INT8_C( 83), INT8_MAX, INT8_C( 121), -INT8_C( 102), INT8_C( 94), INT8_C( 71), -INT8_C( 84), INT8_C( 52), -INT8_C( 49), -INT8_C( 81), -INT8_C( 60), INT8_C( 32), INT8_C( 47), -INT8_C( 126), -INT8_C( 10), -INT8_C( 86), -INT8_C( 86), INT8_C( 68), INT8_C( 59), -INT8_C( 120), INT8_C( 47), INT8_C( 89), INT8_C( 69), -INT8_C( 100), INT8_C( 95), INT8_C( 47), -INT8_C( 3), -INT8_C( 91), -INT8_C( 108), INT8_C( 97), -INT8_C( 104), -INT8_C( 123), -INT8_C( 34), INT8_C( 79), -INT8_C( 26), -INT8_C( 38), INT8_C( 80), INT8_C( 23), INT8_C( 52), INT8_C( 86), INT8_C( 124), -INT8_C( 73), INT8_C( 41), INT8_C( 30), -INT8_C( 39), INT8_C( 33), INT8_C( 89), -INT8_C( 99), -INT8_C( 59), -INT8_C( 107), -INT8_C( 35), -INT8_C( 100), -INT8_C( 22), INT8_C( 55), -INT8_C( 36), INT8_C( 78), INT8_C( 6), INT8_C( 70) }, { -INT8_C( 104), -INT8_C( 73), INT8_C( 61), INT8_C( 62), INT8_C( 2), INT8_C( 25), -INT8_C( 112), INT8_C( 26), -INT8_C( 28), -INT8_C( 32), INT8_C( 106), INT8_C( 106), INT8_C( 53), -INT8_C( 87), -INT8_C( 8), INT8_C( 40), -INT8_C( 72), INT8_C( 112), INT8_C( 40), INT8_C( 53), INT8_C( 15), -INT8_C( 103), INT8_C( 80), -INT8_C( 22), -INT8_C( 34), -INT8_C( 21), -INT8_C( 19), -INT8_C( 98), -INT8_C( 13), -INT8_C( 93), INT8_C( 53), -INT8_C( 112), -INT8_C( 69), INT8_C( 80), INT8_C( 77), -INT8_C( 12), INT8_C( 30), INT8_C( 79), -INT8_C( 9), INT8_C( 119), INT8_C( 54), -INT8_C( 111), INT8_C( 113), -INT8_C( 99), -INT8_C( 118), INT8_C( 39), -INT8_C( 101), INT8_C( 8), INT8_C( 18), -INT8_C( 90), -INT8_C( 43), -INT8_C( 107), -INT8_C( 113), -INT8_C( 15), -INT8_C( 117), -INT8_C( 61), -INT8_C( 87), -INT8_C( 56), -INT8_C( 70), INT8_C( 55), INT8_C( 80), INT8_C( 119), -INT8_C( 84), INT8_C( 61) }, { INT8_C( 46), -INT8_C( 20), -INT8_C( 102), -INT8_C( 32), INT8_C( 94), INT8_C( 106), -INT8_C( 83), -INT8_C( 45), -INT8_C( 84), -INT8_C( 2), INT8_C( 52), -INT8_C( 87), INT8_C( 107), INT8_C( 96), -INT8_C( 29), INT8_C( 40), INT8_C( 59), -INT8_C( 50), -INT8_C( 120), -INT8_C( 64), -INT8_C( 62), -INT8_C( 19), -INT8_C( 84), -INT8_C( 104), INT8_C( 69), -INT8_C( 13), -INT8_C( 90), -INT8_C( 93), INT8_C( 86), INT8_C( 53), INT8_C( 40), -INT8_C( 41), -INT8_C( 26), INT8_C( 54), INT8_C( 104), -INT8_C( 111), INT8_C( 60), INT8_C( 90), INT8_C( 69), INT8_C( 99), INT8_C( 52), -INT8_C( 72), INT8_C( 24), -INT8_C( 117), INT8_C( 124), -INT8_C( 18), -INT8_C( 73), -INT8_C( 108), -INT8_C( 35), -INT8_C( 87), -INT8_C( 34), -INT8_C( 56), -INT8_C( 22), INT8_C( 60), INT8_C( 55), -INT8_C( 53), -INT8_C( 50), INT8_C( 80), -INT8_C( 99), INT8_C( 119), INT8_C( 6), INT8_C( 124), INT8_C( 16), INT8_C( 61) } }, { { INT8_C( 57), INT8_C( 66), INT8_C( 7), -INT8_C( 33), INT8_C( 19), INT8_C( 42), -INT8_C( 34), -INT8_C( 120), INT8_C( 31), -INT8_C( 107), -INT8_C( 91), -INT8_C( 52), -INT8_C( 123), INT8_C( 9), -INT8_C( 10), -INT8_C( 125), INT8_C( 59), -INT8_C( 90), -INT8_C( 126), -INT8_C( 74), INT8_C( 18), INT8_C( 118), -INT8_C( 15), -INT8_C( 37), -INT8_C( 108), -INT8_C( 83), -INT8_C( 88), -INT8_C( 82), INT8_C( 10), INT8_C( 74), -INT8_C( 99), -INT8_C( 71), -INT8_C( 85), INT8_C( 125), -INT8_C( 66), INT8_C( 72), -INT8_C( 79), -INT8_C( 111), INT8_C( 117), -INT8_C( 58), -INT8_C( 109), INT8_C( 99), -INT8_C( 116), -INT8_C( 114), -INT8_C( 21), -INT8_C( 36), INT8_C( 26), -INT8_C( 28), -INT8_C( 80), -INT8_C( 29), -INT8_C( 80), INT8_C( 51), -INT8_C( 106), INT8_C( 21), -INT8_C( 67), INT8_C( 85), INT8_C( 124), INT8_C( 114), -INT8_C( 34), -INT8_C( 16), INT8_C( 17), INT8_C( 107), INT8_C( 125), INT8_C( 65) }, UINT64_C( 4070353243830355645), { -INT8_C( 52), -INT8_C( 103), INT8_C( 13), -INT8_C( 64), INT8_C( 105), -INT8_C( 43), INT8_C( 104), INT8_C( 57), INT8_C( 85), INT8_C( 44), -INT8_C( 108), INT8_C( 23), INT8_C( 1), INT8_C( 82), -INT8_C( 12), INT8_C( 126), -INT8_C( 73), -INT8_C( 87), -INT8_C( 18), -INT8_C( 37), INT8_C( 29), INT8_C( 12), -INT8_C( 88), -INT8_C( 60), -INT8_C( 10), -INT8_C( 33), -INT8_C( 62), INT8_C( 50), INT8_C( 108), INT8_C( 35), INT8_C( 6), -INT8_C( 69), -INT8_C( 83), INT8_C( 42), INT8_C( 108), -INT8_C( 2), INT8_C( 89), INT8_C( 93), -INT8_C( 99), -INT8_C( 42), -INT8_C( 44), INT8_C( 28), -INT8_C( 26), INT8_C( 113), INT8_C( 49), INT8_C( 106), -INT8_C( 10), INT8_C( 60), INT8_C( 30), INT8_MIN, INT8_C( 79), -INT8_C( 101), INT8_C( 46), -INT8_C( 45), INT8_C( 35), -INT8_C( 83), -INT8_C( 80), INT8_C( 25), INT8_C( 22), INT8_C( 92), -INT8_C( 89), INT8_C( 24), -INT8_C( 64), -INT8_C( 71) }, { INT8_C( 121), -INT8_C( 3), -INT8_C( 12), INT8_C( 68), INT8_C( 56), INT8_C( 101), INT8_C( 44), INT8_C( 0), INT8_C( 109), -INT8_C( 56), INT8_C( 67), INT8_C( 125), -INT8_C( 23), INT8_C( 13), INT8_C( 34), -INT8_C( 41), INT8_C( 25), INT8_C( 32), -INT8_C( 92), -INT8_C( 31), -INT8_C( 62), INT8_C( 80), INT8_C( 27), INT8_C( 77), INT8_C( 3), -INT8_C( 70), -INT8_C( 41), INT8_C( 38), INT8_C( 36), -INT8_C( 38), -INT8_C( 3), -INT8_C( 33), -INT8_C( 93), INT8_C( 3), INT8_C( 4), -INT8_C( 100), INT8_C( 19), INT8_MAX, -INT8_C( 43), INT8_C( 113), INT8_C( 20), INT8_C( 108), -INT8_C( 34), -INT8_C( 47), INT8_C( 81), -INT8_C( 116), INT8_C( 24), INT8_C( 1), -INT8_C( 33), -INT8_C( 93), -INT8_C( 37), -INT8_C( 97), INT8_C( 54), INT8_C( 35), -INT8_C( 108), INT8_C( 101), INT8_C( 93), INT8_C( 30), INT8_C( 76), INT8_C( 8), INT8_C( 56), INT8_C( 111), -INT8_C( 125), -INT8_C( 111) }, { INT8_C( 85), INT8_C( 66), INT8_C( 44), -INT8_C( 56), -INT8_C( 108), INT8_C( 67), -INT8_C( 34), INT8_C( 125), INT8_C( 31), -INT8_C( 23), INT8_C( 82), -INT8_C( 52), -INT8_C( 123), INT8_C( 9), -INT8_C( 10), -INT8_C( 125), INT8_C( 59), INT8_C( 3), -INT8_C( 33), -INT8_C( 70), -INT8_C( 62), INT8_C( 118), INT8_C( 50), INT8_C( 38), -INT8_C( 108), -INT8_C( 83), -INT8_C( 88), -INT8_C( 38), INT8_C( 10), -INT8_C( 3), -INT8_C( 99), -INT8_C( 33), -INT8_C( 44), INT8_C( 125), INT8_C( 28), INT8_C( 72), -INT8_C( 26), -INT8_C( 34), INT8_C( 117), -INT8_C( 47), -INT8_C( 109), INT8_C( 99), INT8_C( 106), -INT8_C( 116), -INT8_C( 21), -INT8_C( 36), INT8_C( 60), INT8_C( 1), -INT8_C( 80), -INT8_C( 29), INT8_C( 25), INT8_C( 30), INT8_C( 22), INT8_C( 76), INT8_C( 92), INT8_C( 85), INT8_C( 124), INT8_C( 114), -INT8_C( 34), INT8_C( 111), -INT8_C( 64), -INT8_C( 125), INT8_C( 125), INT8_C( 65) } }, { { -INT8_C( 78), -INT8_C( 78), INT8_C( 92), -INT8_C( 8), INT8_C( 88), INT8_C( 74), INT8_C( 77), INT8_C( 10), -INT8_C( 126), INT8_C( 59), -INT8_C( 38), -INT8_C( 120), -INT8_C( 111), INT8_C( 21), INT8_C( 113), INT8_C( 88), -INT8_C( 25), -INT8_C( 110), INT8_C( 95), -INT8_C( 64), -INT8_C( 23), INT8_C( 123), INT8_C( 73), INT8_C( 9), -INT8_C( 22), INT8_C( 105), INT8_C( 0), -INT8_C( 110), INT8_C( 81), -INT8_C( 55), INT8_C( 115), -INT8_C( 125), INT8_C( 102), INT8_C( 55), INT8_C( 81), -INT8_C( 62), -INT8_C( 34), -INT8_C( 115), INT8_C( 49), -INT8_C( 44), INT8_C( 49), INT8_C( 38), -INT8_C( 88), -INT8_C( 4), -INT8_C( 34), INT8_C( 95), INT8_C( 7), -INT8_C( 38), INT8_C( 78), -INT8_C( 13), INT8_C( 60), -INT8_C( 37), -INT8_C( 71), INT8_C( 26), INT8_C( 33), INT8_C( 17), INT8_C( 68), -INT8_C( 74), -INT8_C( 19), INT8_C( 100), INT8_C( 2), INT8_MIN, INT8_C( 62), -INT8_C( 16) }, UINT64_C(16768031963675583631), { -INT8_C( 80), -INT8_C( 6), -INT8_C( 75), INT8_C( 96), -INT8_C( 72), INT8_C( 35), -INT8_C( 6), -INT8_C( 47), -INT8_C( 121), -INT8_C( 44), INT8_MIN, INT8_C( 39), -INT8_C( 126), -INT8_C( 112), -INT8_C( 106), INT8_C( 107), INT8_C( 87), -INT8_C( 10), -INT8_C( 43), -INT8_C( 107), INT8_C( 6), -INT8_C( 2), -INT8_C( 18), -INT8_C( 1), INT8_C( 10), -INT8_C( 59), INT8_C( 42), -INT8_C( 82), INT8_C( 49), INT8_C( 114), INT8_C( 34), INT8_C( 61), -INT8_C( 5), -INT8_C( 6), INT8_C( 97), -INT8_C( 1), INT8_C( 61), -INT8_C( 33), INT8_C( 42), INT8_C( 55), -INT8_C( 101), -INT8_C( 103), -INT8_C( 100), -INT8_C( 71), -INT8_C( 83), INT8_C( 3), INT8_C( 95), INT8_C( 56), -INT8_C( 75), INT8_C( 108), INT8_C( 5), INT8_C( 58), INT8_C( 12), INT8_C( 116), INT8_C( 39), -INT8_C( 19), INT8_C( 13), -INT8_C( 21), INT8_C( 124), INT8_C( 70), INT8_C( 42), INT8_C( 90), -INT8_C( 77), INT8_C( 116) }, { INT8_C( 5), -INT8_C( 57), INT8_C( 73), INT8_C( 26), -INT8_C( 34), -INT8_C( 20), INT8_C( 30), INT8_C( 39), INT8_C( 4), -INT8_C( 63), INT8_C( 110), INT8_C( 63), INT8_C( 51), INT8_C( 92), INT8_C( 87), INT8_MIN, INT8_C( 117), INT8_C( 121), INT8_C( 31), INT8_C( 9), INT8_C( 66), INT8_C( 88), -INT8_C( 3), -INT8_C( 89), INT8_C( 31), INT8_C( 42), -INT8_C( 85), INT8_C( 48), INT8_C( 32), -INT8_C( 9), INT8_C( 41), INT8_C( 53), INT8_C( 100), INT8_C( 50), -INT8_C( 8), -INT8_C( 87), INT8_C( 3), -INT8_C( 116), INT8_C( 56), -INT8_C( 45), INT8_C( 110), -INT8_C( 10), INT8_C( 27), INT8_C( 10), -INT8_C( 52), INT8_MAX, -INT8_C( 91), INT8_C( 126), INT8_C( 112), -INT8_C( 40), INT8_C( 99), -INT8_C( 19), INT8_C( 103), -INT8_C( 118), -INT8_C( 19), -INT8_C( 99), INT8_C( 115), INT8_C( 73), INT8_C( 18), INT8_C( 45), INT8_C( 80), -INT8_C( 46), INT8_C( 26), INT8_C( 67) }, { -INT8_C( 121), INT8_C( 4), -INT8_C( 44), -INT8_C( 63), INT8_C( 88), INT8_C( 74), INT8_C( 77), INT8_C( 63), -INT8_C( 126), INT8_C( 59), -INT8_C( 112), -INT8_C( 120), -INT8_C( 111), INT8_C( 87), INT8_C( 107), INT8_C( 88), INT8_C( 10), INT8_C( 31), -INT8_C( 59), -INT8_C( 64), -INT8_C( 23), INT8_C( 123), -INT8_C( 82), INT8_C( 9), INT8_C( 49), INT8_C( 105), INT8_C( 0), -INT8_C( 9), INT8_C( 81), INT8_C( 41), INT8_C( 61), INT8_C( 53), INT8_C( 102), INT8_C( 110), INT8_C( 81), -INT8_C( 10), -INT8_C( 100), INT8_C( 27), INT8_C( 49), -INT8_C( 44), INT8_C( 49), INT8_C( 38), INT8_C( 3), -INT8_C( 4), -INT8_C( 34), INT8_C( 95), INT8_C( 7), -INT8_C( 38), INT8_C( 78), -INT8_C( 13), -INT8_C( 21), -INT8_C( 37), INT8_C( 124), INT8_C( 18), INT8_C( 33), INT8_C( 45), INT8_C( 68), -INT8_C( 74), -INT8_C( 19), -INT8_C( 46), INT8_C( 2), INT8_C( 26), INT8_C( 116), INT8_C( 67) } }, { { -INT8_C( 6), INT8_C( 99), INT8_C( 69), -INT8_C( 11), INT8_C( 48), INT8_C( 50), INT8_C( 13), INT8_C( 118), -INT8_C( 37), -INT8_C( 93), INT8_C( 79), -INT8_C( 88), INT8_C( 103), -INT8_C( 120), INT8_C( 11), INT8_C( 63), INT8_C( 61), INT8_C( 53), INT8_MIN, -INT8_C( 13), INT8_C( 114), INT8_C( 7), -INT8_C( 35), -INT8_C( 21), INT8_C( 109), -INT8_C( 36), -INT8_C( 54), -INT8_C( 122), INT8_C( 45), INT8_C( 53), -INT8_C( 79), -INT8_C( 74), INT8_C( 15), -INT8_C( 36), INT8_C( 104), INT8_C( 74), INT8_C( 44), INT8_C( 10), INT8_C( 47), -INT8_C( 114), -INT8_C( 92), INT8_C( 51), -INT8_C( 68), -INT8_C( 41), INT8_C( 9), INT8_C( 7), INT8_C( 126), INT8_C( 13), INT8_C( 20), INT8_C( 75), -INT8_C( 74), -INT8_C( 66), -INT8_C( 70), -INT8_C( 29), -INT8_C( 110), -INT8_C( 92), -INT8_C( 31), INT8_C( 1), -INT8_C( 21), INT8_C( 109), INT8_C( 13), -INT8_C( 10), INT8_C( 107), INT8_C( 88) }, UINT64_C(13330734482189192426), { -INT8_C( 73), -INT8_C( 14), INT8_C( 61), INT8_C( 24), -INT8_C( 21), -INT8_C( 22), -INT8_C( 37), INT8_C( 0), -INT8_C( 20), INT8_C( 40), INT8_C( 124), -INT8_C( 65), -INT8_C( 26), INT8_C( 67), INT8_C( 25), INT8_C( 89), -INT8_C( 71), INT8_C( 28), INT8_C( 110), INT8_C( 105), -INT8_C( 45), -INT8_C( 4), -INT8_C( 55), INT8_C( 66), INT8_C( 67), -INT8_C( 99), -INT8_C( 87), -INT8_C( 63), -INT8_C( 99), INT8_C( 44), INT8_C( 5), INT8_C( 54), INT8_C( 118), -INT8_C( 35), -INT8_C( 54), INT8_C( 86), INT8_C( 104), -INT8_C( 106), -INT8_C( 43), -INT8_C( 38), -INT8_C( 6), -INT8_C( 80), INT8_C( 37), -INT8_C( 102), INT8_C( 103), INT8_C( 20), -INT8_C( 62), -INT8_C( 23), -INT8_C( 79), INT8_C( 12), -INT8_C( 14), -INT8_C( 23), INT8_C( 29), INT8_C( 40), INT8_C( 47), INT8_C( 89), -INT8_C( 44), -INT8_C( 21), -INT8_C( 67), -INT8_C( 8), -INT8_C( 33), -INT8_C( 40), -INT8_C( 58), -INT8_C( 41) }, { INT8_C( 108), -INT8_C( 40), INT8_C( 19), -INT8_C( 19), -INT8_C( 67), -INT8_C( 39), INT8_C( 59), INT8_C( 125), -INT8_C( 40), -INT8_C( 127), -INT8_C( 91), -INT8_C( 96), -INT8_C( 49), INT8_C( 79), -INT8_C( 76), -INT8_C( 26), INT8_C( 71), INT8_C( 86), -INT8_C( 83), -INT8_C( 32), INT8_C( 37), INT8_C( 76), INT8_C( 40), -INT8_C( 125), -INT8_C( 91), -INT8_C( 116), INT8_C( 43), -INT8_C( 35), -INT8_C( 102), -INT8_C( 6), INT8_C( 105), INT8_C( 19), -INT8_C( 2), INT8_C( 36), INT8_C( 119), INT8_C( 5), -INT8_C( 53), INT8_C( 84), INT8_C( 126), -INT8_C( 50), INT8_C( 38), INT8_C( 9), INT8_C( 62), -INT8_C( 13), INT8_C( 2), INT8_C( 62), INT8_C( 48), -INT8_C( 101), -INT8_C( 30), -INT8_C( 120), -INT8_C( 107), -INT8_C( 72), -INT8_C( 75), INT8_C( 93), -INT8_C( 13), -INT8_C( 48), -INT8_C( 32), INT8_C( 33), -INT8_C( 72), -INT8_C( 78), INT8_MAX, INT8_C( 49), -INT8_C( 57), -INT8_C( 117) }, { -INT8_C( 6), -INT8_C( 40), INT8_C( 69), -INT8_C( 127), INT8_C( 48), -INT8_C( 91), -INT8_C( 65), -INT8_C( 96), -INT8_C( 37), -INT8_C( 93), INT8_C( 67), INT8_C( 79), INT8_C( 103), -INT8_C( 120), INT8_C( 11), INT8_C( 63), INT8_C( 61), INT8_C( 53), INT8_MIN, -INT8_C( 13), -INT8_C( 87), INT8_C( 43), -INT8_C( 63), -INT8_C( 35), -INT8_C( 99), -INT8_C( 36), INT8_C( 44), -INT8_C( 6), INT8_C( 5), INT8_C( 53), INT8_C( 54), INT8_C( 19), -INT8_C( 6), -INT8_C( 36), INT8_C( 104), INT8_C( 74), INT8_C( 44), INT8_C( 62), -INT8_C( 102), -INT8_C( 114), -INT8_C( 92), INT8_C( 51), -INT8_C( 68), INT8_C( 62), INT8_C( 9), INT8_C( 7), -INT8_C( 23), INT8_C( 13), INT8_C( 20), INT8_C( 75), -INT8_C( 74), -INT8_C( 66), -INT8_C( 70), -INT8_C( 29), -INT8_C( 110), -INT8_C( 92), -INT8_C( 33), INT8_C( 1), -INT8_C( 21), INT8_C( 49), -INT8_C( 58), -INT8_C( 57), INT8_C( 107), -INT8_C( 117) } }, { { -INT8_C( 98), INT8_C( 16), -INT8_C( 91), -INT8_C( 65), -INT8_C( 53), -INT8_C( 32), -INT8_C( 3), -INT8_C( 20), INT8_C( 40), INT8_C( 116), -INT8_C( 14), INT8_C( 122), -INT8_C( 42), -INT8_C( 14), INT8_C( 126), INT8_C( 87), -INT8_C( 95), -INT8_C( 19), INT8_C( 15), INT8_C( 44), -INT8_C( 119), INT8_C( 28), INT8_C( 34), -INT8_C( 14), -INT8_C( 81), -INT8_C( 117), -INT8_C( 78), INT8_C( 30), INT8_C( 59), -INT8_C( 34), INT8_C( 27), -INT8_C( 49), INT8_C( 109), INT8_C( 45), -INT8_C( 105), INT8_C( 15), INT8_C( 63), -INT8_C( 106), INT8_C( 73), -INT8_C( 59), INT8_C( 116), -INT8_C( 67), INT8_C( 55), -INT8_C( 98), -INT8_C( 120), -INT8_C( 42), -INT8_C( 23), INT8_C( 92), -INT8_C( 22), -INT8_C( 88), INT8_C( 117), INT8_C( 55), -INT8_C( 85), INT8_C( 61), -INT8_C( 96), -INT8_C( 77), INT8_C( 9), INT8_C( 60), -INT8_C( 42), INT8_C( 46), -INT8_C( 84), INT8_C( 11), -INT8_C( 75), INT8_C( 97) }, UINT64_C( 9673742337990292340), { -INT8_C( 113), -INT8_C( 54), -INT8_C( 59), -INT8_C( 15), INT8_C( 6), -INT8_C( 37), INT8_C( 124), INT8_C( 117), INT8_C( 92), -INT8_C( 50), INT8_C( 88), INT8_C( 41), -INT8_C( 60), -INT8_C( 15), -INT8_C( 35), -INT8_C( 112), -INT8_C( 88), -INT8_C( 94), INT8_C( 20), -INT8_C( 109), INT8_C( 95), INT8_C( 28), INT8_C( 30), -INT8_C( 113), INT8_C( 1), INT8_C( 120), -INT8_C( 73), INT8_C( 16), -INT8_C( 100), -INT8_C( 68), INT8_C( 117), INT8_C( 120), INT8_C( 105), INT8_C( 70), INT8_C( 110), -INT8_C( 118), INT8_C( 61), INT8_C( 81), INT8_C( 8), -INT8_C( 53), INT8_C( 39), -INT8_C( 49), INT8_C( 123), INT8_C( 7), INT8_C( 12), INT8_C( 78), INT8_C( 89), INT8_C( 101), INT8_C( 81), INT8_C( 38), -INT8_C( 68), INT8_C( 108), -INT8_C( 2), -INT8_C( 89), -INT8_C( 48), -INT8_C( 46), -INT8_C( 15), -INT8_C( 13), -INT8_C( 11), INT8_C( 91), INT8_C( 52), INT8_C( 31), INT8_C( 117), INT8_C( 64) }, { INT8_C( 104), INT8_C( 77), -INT8_C( 2), INT8_C( 73), -INT8_C( 7), -INT8_C( 76), -INT8_C( 99), INT8_C( 17), -INT8_C( 117), INT8_C( 50), INT8_C( 6), -INT8_C( 29), -INT8_C( 104), -INT8_C( 83), -INT8_C( 39), -INT8_C( 104), INT8_C( 42), INT8_C( 31), INT8_C( 17), INT8_C( 5), -INT8_C( 116), INT8_C( 124), INT8_C( 66), INT8_C( 59), INT8_C( 126), INT8_C( 89), -INT8_C( 27), INT8_C( 62), -INT8_C( 125), -INT8_C( 87), INT8_C( 124), INT8_C( 108), -INT8_C( 120), INT8_C( 47), INT8_C( 9), INT8_C( 23), -INT8_C( 107), INT8_C( 93), -INT8_C( 64), -INT8_C( 36), -INT8_C( 119), -INT8_C( 35), -INT8_C( 114), -INT8_C( 75), -INT8_C( 50), INT8_C( 41), -INT8_C( 70), INT8_C( 98), INT8_C( 72), INT8_C( 115), INT8_C( 21), INT8_C( 8), -INT8_C( 16), -INT8_C( 108), -INT8_C( 25), -INT8_C( 92), -INT8_C( 14), INT8_C( 73), -INT8_C( 23), INT8_C( 8), INT8_C( 53), -INT8_C( 75), -INT8_C( 5), -INT8_C( 69) }, { -INT8_C( 98), INT8_C( 16), -INT8_C( 50), -INT8_C( 65), INT8_C( 88), INT8_C( 6), INT8_C( 41), -INT8_C( 20), -INT8_C( 60), -INT8_C( 104), -INT8_C( 14), INT8_C( 122), -INT8_C( 35), -INT8_C( 39), -INT8_C( 112), INT8_C( 87), INT8_C( 1), -INT8_C( 19), INT8_C( 15), INT8_C( 44), -INT8_C( 73), INT8_C( 28), INT8_C( 34), INT8_C( 62), -INT8_C( 81), -INT8_C( 117), -INT8_C( 78), -INT8_C( 87), INT8_C( 117), -INT8_C( 34), INT8_C( 27), -INT8_C( 49), INT8_C( 39), -INT8_C( 119), -INT8_C( 49), INT8_C( 15), INT8_C( 63), -INT8_C( 114), INT8_C( 7), -INT8_C( 59), INT8_C( 12), -INT8_C( 67), INT8_C( 55), INT8_C( 41), -INT8_C( 120), -INT8_C( 42), -INT8_C( 23), INT8_C( 92), -INT8_C( 22), -INT8_C( 88), INT8_C( 117), INT8_C( 55), -INT8_C( 85), INT8_C( 61), INT8_C( 91), -INT8_C( 77), INT8_C( 9), INT8_C( 53), INT8_C( 31), INT8_C( 46), -INT8_C( 84), INT8_C( 11), -INT8_C( 75), -INT8_C( 69) } }, { { INT8_C( 112), INT8_MAX, INT8_C( 23), INT8_C( 112), -INT8_C( 17), INT8_C( 98), INT8_C( 83), -INT8_C( 35), -INT8_C( 82), INT8_C( 91), -INT8_C( 2), INT8_C( 65), -INT8_C( 84), -INT8_C( 88), -INT8_C( 89), -INT8_C( 115), INT8_C( 48), INT8_C( 117), -INT8_C( 105), -INT8_C( 59), INT8_C( 116), -INT8_C( 18), -INT8_C( 72), INT8_C( 98), -INT8_C( 97), INT8_C( 27), -INT8_C( 73), -INT8_C( 75), INT8_C( 115), INT8_C( 43), -INT8_C( 12), INT8_C( 47), INT8_C( 49), INT8_C( 120), INT8_C( 103), -INT8_C( 56), INT8_C( 36), INT8_C( 44), -INT8_C( 20), INT8_C( 119), INT8_C( 60), -INT8_C( 6), INT8_C( 98), INT8_C( 9), INT8_C( 97), INT8_C( 6), INT8_C( 2), INT8_C( 41), -INT8_C( 69), INT8_C( 27), INT8_C( 106), -INT8_C( 44), -INT8_C( 37), INT8_C( 16), -INT8_C( 33), -INT8_C( 36), -INT8_C( 105), INT8_C( 73), INT8_C( 41), INT8_C( 11), INT8_C( 0), INT8_C( 34), -INT8_C( 112), -INT8_C( 42) }, UINT64_C(16524310519058897855), { INT8_C( 1), INT8_C( 78), INT8_C( 89), INT8_C( 46), INT8_C( 92), INT8_C( 88), INT8_C( 32), -INT8_C( 22), INT8_C( 24), INT8_C( 63), -INT8_C( 117), -INT8_C( 71), -INT8_C( 82), INT8_C( 84), -INT8_C( 58), INT8_C( 89), INT8_C( 85), -INT8_C( 34), INT8_C( 116), INT8_C( 29), -INT8_C( 16), INT8_C( 57), -INT8_C( 107), -INT8_C( 5), -INT8_C( 112), INT8_C( 106), -INT8_C( 117), -INT8_C( 88), INT8_C( 75), -INT8_C( 68), -INT8_C( 14), -INT8_C( 27), -INT8_C( 18), -INT8_C( 76), INT8_C( 77), -INT8_C( 3), INT8_C( 31), INT8_C( 95), INT8_C( 44), -INT8_C( 6), -INT8_C( 17), -INT8_C( 28), INT8_C( 105), -INT8_C( 66), INT8_C( 65), INT8_C( 108), INT8_C( 29), -INT8_C( 62), -INT8_C( 13), INT8_C( 54), INT8_C( 89), INT8_C( 34), -INT8_C( 102), INT8_C( 104), -INT8_C( 95), INT8_C( 15), INT8_C( 72), INT8_C( 49), INT8_C( 44), -INT8_C( 88), -INT8_C( 81), INT8_C( 69), -INT8_C( 99), -INT8_C( 122) }, { -INT8_C( 72), INT8_C( 123), -INT8_C( 12), -INT8_C( 11), -INT8_C( 54), -INT8_C( 52), -INT8_C( 119), -INT8_C( 19), -INT8_C( 56), INT8_C( 99), INT8_C( 45), -INT8_C( 17), INT8_C( 71), -INT8_C( 106), -INT8_C( 15), INT8_C( 107), INT8_C( 87), -INT8_C( 53), INT8_C( 77), INT8_C( 123), -INT8_C( 99), -INT8_C( 21), -INT8_C( 71), -INT8_C( 66), INT8_C( 90), INT8_C( 39), -INT8_C( 115), -INT8_C( 114), INT8_C( 58), -INT8_C( 80), -INT8_C( 92), INT8_C( 109), -INT8_C( 50), -INT8_C( 35), -INT8_C( 55), -INT8_C( 93), INT8_C( 71), -INT8_C( 120), INT8_C( 2), -INT8_C( 84), INT8_C( 108), INT8_C( 9), -INT8_C( 114), -INT8_C( 43), INT8_C( 87), -INT8_C( 101), -INT8_C( 18), INT8_C( 71), INT8_C( 47), -INT8_C( 105), INT8_C( 20), INT8_C( 6), -INT8_C( 42), -INT8_C( 89), -INT8_C( 112), INT8_C( 121), INT8_C( 65), INT8_C( 31), -INT8_C( 23), -INT8_C( 91), INT8_C( 97), INT8_C( 0), -INT8_C( 16), -INT8_C( 98) }, { INT8_C( 24), -INT8_C( 56), INT8_C( 63), INT8_C( 99), -INT8_C( 117), INT8_C( 45), INT8_C( 83), -INT8_C( 17), -INT8_C( 82), INT8_C( 71), -INT8_C( 2), -INT8_C( 106), -INT8_C( 58), -INT8_C( 88), -INT8_C( 89), INT8_C( 107), -INT8_C( 112), INT8_C( 117), -INT8_C( 105), -INT8_C( 59), INT8_C( 116), -INT8_C( 18), -INT8_C( 88), -INT8_C( 114), -INT8_C( 97), INT8_C( 58), -INT8_C( 68), -INT8_C( 75), -INT8_C( 14), INT8_C( 43), -INT8_C( 12), INT8_C( 47), INT8_C( 49), INT8_C( 108), -INT8_C( 28), INT8_C( 9), INT8_C( 105), INT8_C( 44), -INT8_C( 66), -INT8_C( 43), INT8_C( 60), -INT8_C( 6), INT8_C( 108), INT8_C( 9), INT8_C( 97), -INT8_C( 18), INT8_C( 2), INT8_C( 41), -INT8_C( 69), INT8_C( 65), INT8_C( 106), -INT8_C( 44), INT8_C( 44), INT8_C( 16), -INT8_C( 88), -INT8_C( 36), -INT8_C( 81), INT8_C( 73), INT8_C( 69), INT8_C( 11), INT8_C( 0), -INT8_C( 16), -INT8_C( 122), -INT8_C( 98) } }, { { INT8_C( 9), INT8_C( 116), INT8_C( 8), -INT8_C( 105), INT8_C( 30), INT8_C( 1), INT8_C( 40), INT8_C( 77), INT8_C( 3), INT8_C( 4), INT8_C( 74), -INT8_C( 58), INT8_C( 94), INT8_C( 104), INT8_C( 78), INT8_C( 1), -INT8_C( 22), -INT8_C( 16), -INT8_C( 36), -INT8_C( 125), -INT8_C( 22), INT8_C( 46), INT8_C( 40), INT8_C( 53), -INT8_C( 36), INT8_C( 111), INT8_C( 103), INT8_C( 17), -INT8_C( 81), INT8_C( 93), -INT8_C( 122), -INT8_C( 106), INT8_C( 118), INT8_C( 95), INT8_C( 68), -INT8_C( 44), -INT8_C( 50), -INT8_C( 72), -INT8_C( 67), INT8_C( 4), INT8_C( 11), INT8_C( 8), INT8_C( 18), -INT8_C( 28), INT8_C( 40), INT8_C( 26), INT8_C( 121), INT8_C( 71), INT8_C( 124), -INT8_C( 10), -INT8_C( 92), INT8_C( 66), INT8_C( 104), -INT8_C( 85), INT8_C( 75), -INT8_C( 52), INT8_C( 7), INT8_C( 92), -INT8_C( 26), -INT8_C( 93), INT8_C( 82), -INT8_C( 77), INT8_C( 63), -INT8_C( 77) }, UINT64_C( 6731250091584344763), { INT8_C( 80), -INT8_C( 111), -INT8_C( 69), -INT8_C( 9), -INT8_C( 117), INT8_C( 48), -INT8_C( 14), -INT8_C( 27), -INT8_C( 124), INT8_C( 23), INT8_C( 77), -INT8_C( 97), -INT8_C( 114), -INT8_C( 109), INT8_C( 45), INT8_C( 66), INT8_C( 28), INT8_C( 58), INT8_C( 72), -INT8_C( 32), INT8_C( 48), -INT8_C( 77), INT8_C( 98), INT8_C( 111), INT8_C( 97), INT8_C( 4), -INT8_C( 106), INT8_C( 30), INT8_C( 41), INT8_C( 103), INT8_C( 12), -INT8_C( 31), -INT8_C( 79), INT8_C( 75), -INT8_C( 80), INT8_C( 11), INT8_C( 87), -INT8_C( 89), INT8_C( 34), -INT8_C( 73), -INT8_C( 59), INT8_C( 122), -INT8_C( 104), INT8_C( 110), -INT8_C( 117), INT8_C( 124), -INT8_C( 2), INT8_C( 47), -INT8_C( 44), -INT8_C( 33), INT8_C( 38), INT8_C( 32), INT8_C( 115), -INT8_C( 15), -INT8_C( 99), -INT8_C( 67), INT8_C( 52), INT8_C( 97), -INT8_C( 93), -INT8_C( 91), INT8_C( 76), -INT8_C( 59), -INT8_C( 39), INT8_C( 40) }, { INT8_C( 95), INT8_C( 113), INT8_C( 52), -INT8_C( 58), INT8_C( 111), -INT8_C( 46), INT8_C( 105), -INT8_C( 119), INT8_C( 101), -INT8_C( 65), INT8_C( 108), INT8_C( 106), INT8_C( 42), -INT8_C( 69), -INT8_C( 66), -INT8_C( 124), INT8_C( 108), -INT8_C( 9), -INT8_C( 31), -INT8_C( 68), -INT8_C( 50), INT8_C( 40), -INT8_C( 117), -INT8_C( 27), -INT8_C( 23), INT8_C( 3), -INT8_C( 50), -INT8_C( 70), INT8_C( 115), INT8_C( 47), -INT8_C( 61), INT8_C( 105), -INT8_C( 86), -INT8_C( 73), -INT8_C( 59), INT8_C( 63), -INT8_C( 105), -INT8_C( 104), INT8_C( 60), INT8_C( 117), INT8_C( 20), INT8_C( 28), INT8_C( 93), -INT8_C( 7), INT8_C( 75), -INT8_C( 94), -INT8_C( 21), INT8_C( 124), INT8_C( 83), -INT8_C( 41), INT8_C( 58), -INT8_C( 114), -INT8_C( 124), -INT8_C( 123), INT8_C( 111), INT8_C( 11), INT8_C( 27), INT8_C( 1), INT8_C( 114), INT8_C( 14), INT8_C( 11), -INT8_C( 61), -INT8_C( 115), INT8_C( 5) }, { -INT8_C( 124), INT8_C( 101), INT8_C( 8), -INT8_C( 65), INT8_C( 77), INT8_C( 108), INT8_C( 40), INT8_C( 106), INT8_C( 3), INT8_C( 42), INT8_C( 74), -INT8_C( 58), INT8_C( 45), INT8_C( 104), INT8_C( 66), INT8_C( 1), INT8_C( 97), -INT8_C( 23), -INT8_C( 36), -INT8_C( 125), -INT8_C( 106), -INT8_C( 50), INT8_C( 40), INT8_C( 53), INT8_C( 41), INT8_C( 115), INT8_C( 103), INT8_C( 47), -INT8_C( 81), -INT8_C( 61), -INT8_C( 31), -INT8_C( 106), -INT8_C( 59), INT8_C( 20), INT8_C( 68), INT8_C( 28), -INT8_C( 50), -INT8_C( 72), INT8_C( 110), INT8_C( 4), INT8_C( 11), INT8_C( 8), INT8_C( 124), -INT8_C( 28), -INT8_C( 2), -INT8_C( 21), INT8_C( 121), INT8_C( 71), INT8_C( 124), INT8_C( 27), -INT8_C( 92), INT8_C( 1), INT8_C( 104), INT8_C( 114), -INT8_C( 91), -INT8_C( 52), INT8_C( 76), INT8_C( 92), -INT8_C( 59), -INT8_C( 61), -INT8_C( 39), -INT8_C( 77), INT8_C( 40), -INT8_C( 77) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi8(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_mask_unpackhi_epi8(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_unpackhi_epi8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask64 k; const int8_t a[64]; const int8_t b[64]; const int8_t r[64]; } test_vec[] = { { UINT64_C( 1842617825699503392), { -INT8_C( 16), INT8_C( 31), -INT8_C( 45), INT8_C( 14), INT8_C( 73), INT8_C( 111), INT8_C( 28), INT8_C( 3), -INT8_C( 127), INT8_C( 39), -INT8_C( 33), -INT8_C( 80), INT8_C( 106), INT8_C( 116), -INT8_C( 49), INT8_C( 40), INT8_C( 63), -INT8_C( 74), -INT8_C( 44), INT8_C( 32), -INT8_C( 92), INT8_C( 8), INT8_C( 21), -INT8_C( 80), INT8_C( 101), -INT8_C( 14), INT8_C( 103), INT8_C( 103), -INT8_C( 62), -INT8_C( 85), -INT8_C( 85), -INT8_C( 101), INT8_C( 74), INT8_C( 34), INT8_C( 59), -INT8_C( 50), INT8_C( 89), -INT8_C( 61), -INT8_C( 117), INT8_C( 110), INT8_C( 88), INT8_C( 31), -INT8_C( 103), INT8_C( 62), -INT8_C( 118), -INT8_C( 14), INT8_C( 63), -INT8_C( 35), INT8_C( 93), -INT8_C( 60), INT8_C( 115), -INT8_C( 92), INT8_C( 125), -INT8_C( 64), INT8_C( 21), -INT8_C( 19), -INT8_C( 103), -INT8_C( 62), INT8_C( 103), INT8_C( 61), INT8_C( 92), -INT8_C( 11), -INT8_C( 124), -INT8_C( 29) }, { -INT8_C( 90), INT8_C( 83), INT8_C( 65), INT8_C( 112), -INT8_C( 103), INT8_C( 105), INT8_C( 54), -INT8_C( 107), -INT8_C( 59), INT8_C( 107), INT8_C( 94), INT8_C( 125), INT8_C( 117), INT8_C( 102), -INT8_C( 66), -INT8_C( 98), -INT8_C( 75), INT8_C( 37), -INT8_C( 46), -INT8_C( 38), -INT8_C( 16), -INT8_C( 17), -INT8_C( 12), -INT8_C( 63), -INT8_C( 110), -INT8_C( 38), INT8_C( 103), -INT8_C( 38), -INT8_C( 74), -INT8_C( 98), INT8_C( 38), -INT8_C( 39), -INT8_C( 119), -INT8_C( 37), -INT8_C( 25), -INT8_C( 43), -INT8_C( 63), INT8_C( 4), INT8_C( 125), -INT8_C( 114), -INT8_C( 124), -INT8_C( 61), INT8_C( 81), -INT8_C( 109), INT8_C( 72), INT8_C( 89), INT8_C( 72), -INT8_C( 3), INT8_C( 38), -INT8_C( 125), INT8_C( 75), -INT8_C( 25), INT8_C( 9), -INT8_C( 89), -INT8_C( 53), -INT8_C( 5), -INT8_C( 1), INT8_C( 99), INT8_C( 104), -INT8_C( 86), -INT8_C( 57), INT8_C( 51), INT8_C( 121), INT8_C( 117) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 94), INT8_C( 0), INT8_C( 0), INT8_C( 106), INT8_C( 0), INT8_C( 116), INT8_C( 102), -INT8_C( 49), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 101), -INT8_C( 110), -INT8_C( 14), -INT8_C( 38), INT8_C( 103), INT8_C( 0), INT8_C( 0), -INT8_C( 38), -INT8_C( 62), INT8_C( 0), -INT8_C( 85), -INT8_C( 98), -INT8_C( 85), INT8_C( 0), -INT8_C( 101), INT8_C( 0), INT8_C( 0), -INT8_C( 124), INT8_C( 31), INT8_C( 0), INT8_C( 0), INT8_C( 81), INT8_C( 0), INT8_C( 0), -INT8_C( 118), INT8_C( 72), INT8_C( 0), INT8_C( 89), INT8_C( 0), INT8_C( 0), -INT8_C( 35), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 103), INT8_C( 0), INT8_C( 0), -INT8_C( 86), INT8_C( 92), INT8_C( 0), INT8_C( 0), INT8_C( 51), -INT8_C( 124), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { UINT64_C( 9399397783548227804), { INT8_C( 121), INT8_C( 36), -INT8_C( 115), INT8_C( 4), -INT8_C( 20), -INT8_C( 8), -INT8_C( 107), INT8_C( 19), INT8_C( 100), -INT8_C( 39), INT8_C( 31), -INT8_C( 78), INT8_C( 28), INT8_C( 82), INT8_C( 122), -INT8_C( 47), INT8_C( 68), -INT8_C( 88), -INT8_C( 46), -INT8_C( 13), -INT8_C( 79), -INT8_C( 83), INT8_C( 120), INT8_C( 1), INT8_C( 79), -INT8_C( 109), INT8_C( 44), INT8_C( 5), -INT8_C( 81), INT8_C( 95), INT8_C( 39), -INT8_C( 87), INT8_C( 65), INT8_C( 108), -INT8_C( 27), INT8_C( 62), -INT8_C( 53), INT8_C( 83), INT8_C( 66), -INT8_C( 76), -INT8_C( 58), INT8_C( 117), INT8_C( 91), INT8_C( 116), INT8_C( 107), -INT8_C( 88), -INT8_C( 2), INT8_C( 93), -INT8_C( 103), INT8_C( 46), INT8_C( 111), INT8_C( 35), -INT8_C( 51), -INT8_C( 50), -INT8_C( 127), -INT8_C( 91), -INT8_C( 52), -INT8_C( 51), INT8_C( 50), INT8_C( 22), -INT8_C( 67), INT8_C( 126), -INT8_C( 41), -INT8_C( 17) }, { INT8_C( 1), INT8_C( 54), INT8_C( 70), INT8_C( 4), -INT8_C( 68), -INT8_C( 34), -INT8_C( 87), INT8_C( 122), INT8_C( 49), INT8_C( 28), INT8_C( 102), -INT8_C( 15), INT8_C( 5), INT8_C( 57), INT8_C( 2), -INT8_C( 58), -INT8_C( 67), INT8_C( 86), INT8_C( 112), -INT8_C( 90), INT8_C( 99), -INT8_C( 45), INT8_C( 62), -INT8_C( 69), -INT8_C( 84), INT8_C( 6), -INT8_C( 111), INT8_C( 102), INT8_C( 81), INT8_C( 62), -INT8_C( 75), -INT8_C( 43), -INT8_C( 55), INT8_C( 122), -INT8_C( 49), INT8_C( 97), -INT8_C( 64), INT8_C( 107), -INT8_C( 31), -INT8_C( 41), INT8_C( 101), -INT8_C( 124), -INT8_C( 60), -INT8_C( 13), INT8_C( 48), INT8_C( 19), INT8_C( 6), -INT8_C( 80), INT8_C( 17), -INT8_C( 9), INT8_C( 45), INT8_C( 80), -INT8_C( 117), -INT8_C( 3), INT8_C( 68), INT8_C( 61), -INT8_C( 4), INT8_C( 32), INT8_C( 92), INT8_C( 74), -INT8_C( 21), -INT8_C( 68), INT8_C( 74), INT8_C( 108) }, { INT8_C( 0), INT8_C( 0), -INT8_C( 39), INT8_C( 28), INT8_C( 31), INT8_C( 0), -INT8_C( 78), -INT8_C( 15), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 122), INT8_C( 2), -INT8_C( 47), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 109), INT8_C( 6), INT8_C( 44), -INT8_C( 111), INT8_C( 5), INT8_C( 102), INT8_C( 0), INT8_C( 81), INT8_C( 95), INT8_C( 0), INT8_C( 0), -INT8_C( 75), -INT8_C( 87), -INT8_C( 43), -INT8_C( 58), INT8_C( 101), INT8_C( 0), -INT8_C( 124), INT8_C( 91), -INT8_C( 60), INT8_C( 116), INT8_C( 0), INT8_C( 0), INT8_C( 48), -INT8_C( 88), INT8_C( 19), -INT8_C( 2), INT8_C( 0), INT8_C( 93), INT8_C( 0), -INT8_C( 52), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 50), INT8_C( 92), INT8_C( 22), INT8_C( 0), INT8_C( 0), -INT8_C( 21), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 108) } }, { UINT64_C(12562662648533992732), { INT8_C( 117), -INT8_C( 7), -INT8_C( 90), -INT8_C( 108), INT8_C( 118), -INT8_C( 119), INT8_C( 45), -INT8_C( 36), INT8_C( 8), -INT8_C( 87), -INT8_C( 41), INT8_C( 85), -INT8_C( 69), -INT8_C( 124), -INT8_C( 119), INT8_C( 10), INT8_C( 96), INT8_C( 103), INT8_C( 52), INT8_C( 24), INT8_C( 34), INT8_C( 121), INT8_C( 109), -INT8_C( 67), -INT8_C( 60), -INT8_C( 125), INT8_C( 22), -INT8_C( 22), -INT8_C( 48), INT8_C( 115), INT8_C( 64), -INT8_C( 79), INT8_C( 75), -INT8_C( 117), INT8_C( 13), INT8_C( 113), INT8_C( 51), -INT8_C( 109), INT8_C( 62), INT8_C( 51), -INT8_C( 60), INT8_C( 112), INT8_C( 60), INT8_C( 8), INT8_C( 33), -INT8_C( 50), INT8_C( 45), INT8_C( 44), INT8_C( 40), -INT8_C( 18), -INT8_C( 58), INT8_C( 75), -INT8_C( 66), INT8_C( 104), -INT8_C( 81), -INT8_C( 102), -INT8_C( 95), -INT8_C( 4), -INT8_C( 98), INT8_C( 24), INT8_C( 21), -INT8_C( 5), INT8_C( 115), INT8_C( 62) }, { -INT8_C( 55), INT8_C( 57), -INT8_C( 64), -INT8_C( 65), INT8_C( 123), INT8_C( 124), -INT8_C( 54), INT8_C( 1), -INT8_C( 105), INT8_C( 97), -INT8_C( 53), -INT8_C( 16), -INT8_C( 83), INT8_C( 51), -INT8_C( 122), -INT8_C( 36), INT8_C( 89), -INT8_C( 83), INT8_C( 30), -INT8_C( 91), INT8_C( 30), -INT8_C( 23), INT8_C( 115), -INT8_C( 19), INT8_C( 88), -INT8_C( 90), -INT8_C( 70), INT8_C( 21), -INT8_C( 61), -INT8_C( 87), -INT8_C( 7), -INT8_C( 70), -INT8_C( 75), -INT8_C( 27), -INT8_C( 107), -INT8_C( 39), INT8_C( 39), INT8_C( 66), INT8_C( 33), -INT8_C( 54), -INT8_C( 86), -INT8_C( 33), -INT8_C( 74), INT8_C( 111), INT8_C( 123), -INT8_C( 37), -INT8_C( 110), INT8_C( 96), -INT8_C( 13), -INT8_C( 72), -INT8_C( 69), -INT8_C( 6), -INT8_C( 31), INT8_C( 32), -INT8_C( 74), -INT8_C( 103), INT8_C( 71), INT8_C( 114), INT8_C( 48), -INT8_C( 72), -INT8_C( 80), INT8_C( 119), INT8_C( 119), -INT8_C( 80) }, { INT8_C( 0), INT8_C( 0), -INT8_C( 87), INT8_C( 97), -INT8_C( 41), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 69), INT8_C( 0), -INT8_C( 124), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 10), -INT8_C( 36), -INT8_C( 60), INT8_C( 88), -INT8_C( 125), -INT8_C( 90), INT8_C( 0), -INT8_C( 70), -INT8_C( 22), INT8_C( 21), INT8_C( 0), -INT8_C( 61), INT8_C( 115), INT8_C( 0), INT8_C( 64), -INT8_C( 7), -INT8_C( 79), INT8_C( 0), INT8_C( 0), -INT8_C( 86), INT8_C( 112), -INT8_C( 33), INT8_C( 60), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 33), INT8_C( 123), INT8_C( 0), -INT8_C( 37), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 96), -INT8_C( 95), INT8_C( 71), -INT8_C( 4), INT8_C( 0), -INT8_C( 98), INT8_C( 0), INT8_C( 24), INT8_C( 0), INT8_C( 0), -INT8_C( 80), -INT8_C( 5), INT8_C( 119), INT8_C( 0), INT8_C( 119), INT8_C( 0), -INT8_C( 80) } }, { UINT64_C(17893169858524891965), { INT8_C( 11), -INT8_C( 64), -INT8_C( 26), -INT8_C( 121), -INT8_C( 23), INT8_C( 47), INT8_C( 50), INT8_C( 8), -INT8_C( 102), -INT8_C( 109), -INT8_C( 3), INT8_C( 1), INT8_C( 93), -INT8_C( 108), -INT8_C( 124), INT8_C( 97), -INT8_C( 72), -INT8_C( 91), INT8_C( 30), -INT8_C( 109), -INT8_C( 9), -INT8_C( 50), -INT8_C( 45), INT8_C( 32), -INT8_C( 74), INT8_C( 8), -INT8_C( 125), INT8_C( 71), -INT8_C( 64), -INT8_C( 104), INT8_C( 73), -INT8_C( 41), INT8_C( 54), INT8_C( 97), INT8_C( 18), INT8_C( 0), INT8_C( 104), -INT8_C( 98), -INT8_C( 96), INT8_C( 54), INT8_C( 76), -INT8_C( 63), -INT8_C( 35), -INT8_C( 89), -INT8_C( 50), INT8_C( 28), -INT8_C( 40), INT8_C( 119), -INT8_C( 94), INT8_C( 126), -INT8_C( 63), -INT8_C( 42), -INT8_C( 70), -INT8_C( 72), -INT8_C( 1), INT8_C( 82), INT8_C( 2), -INT8_C( 2), -INT8_C( 29), INT8_C( 24), -INT8_C( 101), -INT8_C( 98), INT8_C( 12), -INT8_C( 64) }, { -INT8_C( 35), -INT8_C( 37), -INT8_C( 113), -INT8_C( 1), -INT8_C( 8), INT8_C( 10), INT8_C( 83), INT8_C( 14), -INT8_C( 33), INT8_C( 57), -INT8_C( 47), -INT8_C( 91), INT8_C( 57), INT8_C( 122), -INT8_C( 116), -INT8_C( 21), -INT8_C( 9), INT8_C( 13), -INT8_C( 4), INT8_C( 1), -INT8_C( 81), -INT8_C( 27), INT8_C( 113), INT8_C( 9), -INT8_C( 94), -INT8_C( 98), -INT8_C( 101), INT8_C( 52), -INT8_C( 73), INT8_C( 30), -INT8_C( 68), -INT8_C( 120), -INT8_C( 62), INT8_C( 54), INT8_C( 51), -INT8_C( 100), INT8_C( 88), -INT8_C( 81), -INT8_C( 47), INT8_C( 16), -INT8_C( 66), INT8_C( 126), INT8_C( 78), -INT8_C( 79), INT8_C( 51), INT8_C( 109), -INT8_C( 6), -INT8_C( 110), INT8_MAX, -INT8_C( 22), INT8_C( 75), INT8_C( 52), INT8_C( 9), INT8_C( 66), INT8_C( 45), -INT8_C( 114), INT8_C( 30), INT8_C( 4), -INT8_C( 24), -INT8_C( 4), INT8_C( 27), -INT8_C( 117), INT8_C( 2), INT8_C( 111) }, { -INT8_C( 102), INT8_C( 0), -INT8_C( 109), INT8_C( 57), -INT8_C( 3), -INT8_C( 47), INT8_C( 0), INT8_C( 0), INT8_C( 93), INT8_C( 57), INT8_C( 0), INT8_C( 122), INT8_C( 0), INT8_C( 0), INT8_C( 97), INT8_C( 0), INT8_C( 0), -INT8_C( 94), INT8_C( 0), -INT8_C( 98), -INT8_C( 125), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 64), -INT8_C( 73), INT8_C( 0), INT8_C( 30), INT8_C( 0), -INT8_C( 68), INT8_C( 0), INT8_C( 0), INT8_C( 76), -INT8_C( 66), -INT8_C( 63), INT8_C( 126), INT8_C( 0), INT8_C( 78), INT8_C( 0), INT8_C( 0), -INT8_C( 50), INT8_C( 51), INT8_C( 28), INT8_C( 109), INT8_C( 0), INT8_C( 0), INT8_C( 119), INT8_C( 0), INT8_C( 2), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 29), INT8_C( 0), INT8_C( 24), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 117), INT8_C( 12), INT8_C( 2), -INT8_C( 64), INT8_C( 111) } }, { UINT64_C(11620352878424023674), { INT8_C( 64), INT8_C( 41), INT8_C( 40), -INT8_C( 58), -INT8_C( 31), INT8_C( 106), -INT8_C( 76), -INT8_C( 77), -INT8_C( 10), INT8_C( 26), -INT8_C( 102), INT8_C( 13), INT8_C( 4), -INT8_C( 65), -INT8_C( 31), INT8_C( 69), INT8_C( 38), INT8_C( 70), INT8_C( 44), INT8_C( 27), -INT8_C( 44), INT8_C( 7), -INT8_C( 106), -INT8_C( 27), INT8_C( 57), -INT8_C( 78), -INT8_C( 126), -INT8_C( 15), INT8_C( 63), INT8_C( 71), INT8_C( 104), -INT8_C( 30), -INT8_C( 122), INT8_C( 82), INT8_MIN, INT8_C( 117), INT8_C( 32), INT8_C( 74), INT8_C( 116), INT8_C( 76), INT8_C( 117), -INT8_C( 60), INT8_C( 63), -INT8_C( 106), INT8_C( 3), -INT8_C( 125), INT8_C( 54), -INT8_C( 5), INT8_C( 73), INT8_C( 65), INT8_C( 98), -INT8_C( 74), INT8_C( 105), INT8_C( 56), INT8_C( 92), INT8_C( 81), INT8_C( 66), -INT8_C( 48), -INT8_C( 9), -INT8_C( 93), -INT8_C( 28), -INT8_C( 117), INT8_C( 55), -INT8_C( 21) }, { -INT8_C( 79), INT8_C( 68), -INT8_C( 97), INT8_C( 102), -INT8_C( 67), -INT8_C( 34), -INT8_C( 117), -INT8_C( 93), INT8_C( 58), -INT8_C( 11), INT8_MIN, INT8_C( 34), INT8_C( 105), -INT8_C( 83), -INT8_C( 105), INT8_C( 84), INT8_C( 37), -INT8_C( 127), -INT8_C( 90), INT8_C( 41), INT8_C( 57), -INT8_C( 49), INT8_C( 94), -INT8_C( 19), INT8_C( 101), -INT8_C( 66), -INT8_C( 69), INT8_C( 118), -INT8_C( 22), INT8_C( 90), -INT8_C( 79), INT8_C( 7), -INT8_C( 95), INT8_C( 0), -INT8_C( 116), INT8_C( 5), -INT8_C( 84), -INT8_C( 106), -INT8_C( 11), -INT8_C( 29), -INT8_C( 68), INT8_C( 64), -INT8_C( 56), INT8_C( 11), INT8_C( 46), INT8_C( 28), -INT8_C( 36), -INT8_C( 15), INT8_C( 1), INT8_C( 44), -INT8_C( 81), INT8_C( 26), -INT8_C( 103), INT8_C( 27), -INT8_C( 85), -INT8_C( 77), -INT8_C( 119), -INT8_C( 9), -INT8_C( 75), -INT8_C( 79), INT8_C( 76), INT8_C( 1), -INT8_C( 90), INT8_C( 2) }, { INT8_C( 0), INT8_C( 58), INT8_C( 0), -INT8_C( 11), -INT8_C( 102), INT8_MIN, INT8_C( 13), INT8_C( 0), INT8_C( 0), INT8_C( 105), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 105), INT8_C( 69), INT8_C( 84), INT8_C( 0), INT8_C( 101), -INT8_C( 78), INT8_C( 0), INT8_C( 0), -INT8_C( 69), INT8_C( 0), INT8_C( 0), INT8_C( 63), -INT8_C( 22), INT8_C( 0), INT8_C( 90), INT8_C( 0), -INT8_C( 79), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 56), -INT8_C( 106), INT8_C( 0), INT8_C( 3), INT8_C( 0), INT8_C( 0), INT8_C( 28), INT8_C( 0), INT8_C( 0), -INT8_C( 5), -INT8_C( 15), INT8_C( 66), -INT8_C( 119), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 93), INT8_C( 0), -INT8_C( 28), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 90), INT8_C( 0), INT8_C( 2) } }, { UINT64_C(14978542943581159871), { INT8_C( 107), -INT8_C( 44), -INT8_C( 17), -INT8_C( 46), INT8_C( 75), INT8_C( 81), -INT8_C( 125), -INT8_C( 44), INT8_C( 4), INT8_C( 98), -INT8_C( 33), -INT8_C( 19), -INT8_C( 67), INT8_C( 81), -INT8_C( 50), -INT8_C( 23), -INT8_C( 54), -INT8_C( 37), INT8_C( 54), -INT8_C( 47), INT8_C( 124), INT8_C( 66), INT8_C( 109), INT8_C( 54), -INT8_C( 35), -INT8_C( 12), -INT8_C( 72), -INT8_C( 64), INT8_C( 47), -INT8_C( 107), INT8_C( 54), INT8_C( 48), INT8_C( 120), INT8_C( 50), INT8_C( 57), -INT8_C( 20), INT8_C( 76), INT8_C( 118), INT8_MAX, -INT8_C( 15), INT8_C( 49), -INT8_C( 68), -INT8_C( 59), -INT8_C( 37), INT8_C( 87), -INT8_C( 57), -INT8_C( 121), INT8_C( 60), -INT8_C( 89), -INT8_C( 3), INT8_C( 61), INT8_C( 29), -INT8_C( 3), -INT8_C( 71), INT8_C( 57), INT8_C( 63), -INT8_C( 111), -INT8_C( 100), -INT8_C( 37), INT8_C( 89), INT8_C( 91), INT8_C( 112), -INT8_C( 1), -INT8_C( 25) }, { INT8_C( 70), -INT8_C( 37), -INT8_C( 125), INT8_C( 91), INT8_C( 80), INT8_C( 112), INT8_C( 38), INT8_C( 101), -INT8_C( 75), -INT8_C( 56), -INT8_C( 18), -INT8_C( 82), -INT8_C( 77), INT8_C( 18), -INT8_C( 97), INT8_C( 115), -INT8_C( 11), -INT8_C( 124), INT8_C( 88), INT8_C( 31), -INT8_C( 92), -INT8_C( 68), -INT8_C( 40), INT8_C( 64), -INT8_C( 16), -INT8_C( 125), INT8_C( 49), INT8_C( 76), -INT8_C( 55), -INT8_C( 5), INT8_C( 43), -INT8_C( 35), -INT8_C( 17), INT8_C( 26), INT8_C( 33), -INT8_C( 113), INT8_C( 17), INT8_C( 75), INT8_C( 45), INT8_C( 108), -INT8_C( 75), -INT8_C( 26), -INT8_C( 40), -INT8_C( 127), INT8_C( 25), -INT8_C( 16), -INT8_C( 93), -INT8_C( 59), -INT8_C( 89), INT8_C( 21), -INT8_C( 46), INT8_C( 111), -INT8_C( 6), INT8_MIN, -INT8_C( 17), -INT8_C( 124), INT8_C( 41), INT8_C( 69), -INT8_C( 92), INT8_C( 123), -INT8_C( 44), INT8_C( 41), INT8_C( 61), INT8_C( 4) }, { INT8_C( 4), -INT8_C( 75), INT8_C( 98), -INT8_C( 56), -INT8_C( 33), -INT8_C( 18), INT8_C( 0), -INT8_C( 82), -INT8_C( 67), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 23), INT8_C( 115), INT8_C( 0), -INT8_C( 16), -INT8_C( 12), INT8_C( 0), -INT8_C( 72), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 47), INT8_C( 0), -INT8_C( 107), -INT8_C( 5), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 35), INT8_C( 0), -INT8_C( 75), INT8_C( 0), INT8_C( 0), -INT8_C( 59), -INT8_C( 40), -INT8_C( 37), INT8_C( 0), INT8_C( 87), INT8_C( 0), INT8_C( 0), -INT8_C( 16), -INT8_C( 121), -INT8_C( 93), INT8_C( 60), INT8_C( 0), INT8_C( 0), INT8_C( 41), -INT8_C( 100), INT8_C( 69), -INT8_C( 37), INT8_C( 0), INT8_C( 89), INT8_C( 123), INT8_C( 91), -INT8_C( 44), INT8_C( 112), INT8_C( 41), INT8_C( 0), INT8_C( 0), -INT8_C( 25), INT8_C( 4) } }, { UINT64_C( 4647964056426211003), { INT8_C( 83), -INT8_C( 114), -INT8_C( 31), -INT8_C( 103), INT8_C( 23), INT8_C( 85), INT8_C( 19), -INT8_C( 122), -INT8_C( 98), -INT8_C( 92), INT8_C( 117), INT8_C( 7), -INT8_C( 97), -INT8_C( 54), INT8_C( 87), -INT8_C( 38), INT8_C( 59), INT8_C( 32), INT8_C( 46), -INT8_C( 57), INT8_C( 27), INT8_C( 50), INT8_C( 96), -INT8_C( 70), -INT8_C( 86), -INT8_C( 71), -INT8_C( 19), -INT8_C( 73), -INT8_C( 58), INT8_C( 25), INT8_MAX, INT8_C( 102), INT8_C( 118), -INT8_C( 126), INT8_C( 36), INT8_C( 36), -INT8_C( 108), -INT8_C( 3), INT8_C( 26), INT8_C( 71), INT8_C( 111), -INT8_C( 19), -INT8_C( 39), -INT8_C( 41), INT8_C( 43), -INT8_C( 82), INT8_C( 8), -INT8_C( 91), INT8_C( 91), INT8_C( 109), -INT8_C( 47), -INT8_C( 32), -INT8_C( 113), -INT8_C( 9), INT8_C( 17), INT8_C( 19), INT8_C( 95), -INT8_C( 94), -INT8_C( 55), INT8_C( 92), INT8_C( 10), -INT8_C( 13), -INT8_C( 51), -INT8_C( 114) }, { -INT8_C( 80), -INT8_C( 39), INT8_C( 24), -INT8_C( 29), INT8_C( 36), INT8_C( 124), -INT8_C( 91), INT8_C( 119), INT8_C( 2), -INT8_C( 66), INT8_C( 51), INT8_C( 31), INT8_C( 106), -INT8_C( 20), INT8_C( 113), INT8_C( 47), -INT8_C( 126), -INT8_C( 119), -INT8_C( 94), INT8_C( 1), INT8_C( 32), INT8_C( 1), INT8_C( 75), INT8_C( 88), INT8_C( 38), INT8_C( 108), INT8_C( 0), INT8_C( 118), -INT8_C( 24), INT8_C( 108), -INT8_C( 36), -INT8_C( 117), INT8_C( 3), -INT8_C( 126), INT8_C( 2), INT8_C( 63), -INT8_C( 124), INT8_C( 86), INT8_C( 112), -INT8_C( 92), INT8_C( 114), -INT8_C( 77), -INT8_C( 10), INT8_C( 45), -INT8_C( 4), -INT8_C( 108), INT8_C( 34), INT8_C( 88), INT8_C( 114), -INT8_C( 62), -INT8_C( 35), -INT8_C( 72), -INT8_C( 67), -INT8_C( 4), -INT8_C( 11), -INT8_C( 28), INT8_C( 62), -INT8_C( 4), -INT8_C( 41), INT8_C( 22), -INT8_C( 92), INT8_C( 2), -INT8_C( 125), -INT8_C( 114) }, { -INT8_C( 98), INT8_C( 2), INT8_C( 0), -INT8_C( 66), INT8_C( 117), INT8_C( 51), INT8_C( 0), INT8_C( 31), INT8_C( 0), INT8_C( 106), -INT8_C( 54), INT8_C( 0), INT8_C( 0), INT8_C( 113), -INT8_C( 38), INT8_C( 0), -INT8_C( 86), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 19), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_MAX, -INT8_C( 36), INT8_C( 102), -INT8_C( 117), INT8_C( 0), INT8_C( 114), -INT8_C( 19), -INT8_C( 77), INT8_C( 0), -INT8_C( 10), INT8_C( 0), INT8_C( 45), INT8_C( 0), -INT8_C( 4), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 34), -INT8_C( 91), INT8_C( 88), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 22), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 114), INT8_C( 0) } }, { UINT64_C(15681621470748660046), { INT8_C( 84), -INT8_C( 3), INT8_C( 39), INT8_C( 86), INT8_C( 42), INT8_C( 106), INT8_C( 77), INT8_C( 121), INT8_C( 59), -INT8_C( 11), INT8_C( 40), -INT8_C( 25), INT8_C( 76), INT8_C( 122), INT8_C( 37), -INT8_C( 82), -INT8_C( 2), INT8_C( 9), -INT8_C( 118), -INT8_C( 40), INT8_C( 100), INT8_C( 118), -INT8_C( 96), -INT8_C( 29), INT8_C( 126), -INT8_C( 75), -INT8_C( 94), INT8_C( 60), -INT8_C( 31), -INT8_C( 88), INT8_C( 90), -INT8_C( 125), INT8_C( 53), -INT8_C( 71), INT8_C( 124), -INT8_C( 104), -INT8_C( 32), -INT8_C( 111), -INT8_C( 22), -INT8_C( 110), -INT8_C( 87), INT8_C( 81), INT8_C( 94), INT8_C( 107), -INT8_C( 58), -INT8_C( 57), INT8_C( 55), INT8_C( 46), -INT8_C( 57), INT8_C( 85), -INT8_C( 96), INT8_C( 72), -INT8_C( 18), -INT8_C( 54), -INT8_C( 116), -INT8_C( 123), INT8_C( 46), -INT8_C( 78), -INT8_C( 79), INT8_C( 99), INT8_C( 2), -INT8_C( 21), -INT8_C( 79), INT8_C( 34) }, { -INT8_C( 109), INT8_C( 105), INT8_C( 43), INT8_C( 103), INT8_C( 110), -INT8_C( 105), -INT8_C( 108), -INT8_C( 33), -INT8_C( 106), INT8_C( 47), -INT8_C( 6), INT8_C( 58), -INT8_C( 52), INT8_C( 85), -INT8_C( 43), INT8_C( 89), -INT8_C( 29), INT8_C( 88), -INT8_C( 127), INT8_C( 31), -INT8_C( 22), -INT8_C( 85), INT8_C( 100), -INT8_C( 24), INT8_C( 111), INT8_C( 8), INT8_C( 125), -INT8_C( 72), INT8_C( 37), -INT8_C( 25), INT8_C( 51), -INT8_C( 49), INT8_C( 5), -INT8_C( 54), -INT8_C( 61), -INT8_C( 86), INT8_C( 43), -INT8_C( 61), INT8_C( 108), INT8_C( 66), -INT8_C( 9), -INT8_C( 16), -INT8_C( 107), -INT8_C( 110), INT8_C( 12), INT8_C( 63), -INT8_C( 45), INT8_C( 86), INT8_C( 66), INT8_C( 19), INT8_C( 73), INT8_C( 43), -INT8_C( 125), -INT8_C( 75), INT8_C( 50), -INT8_C( 63), INT8_C( 6), INT8_C( 47), -INT8_C( 3), INT8_C( 114), INT8_C( 70), -INT8_C( 116), INT8_C( 15), -INT8_C( 26) }, { INT8_C( 0), -INT8_C( 106), -INT8_C( 11), INT8_C( 47), INT8_C( 0), INT8_C( 0), -INT8_C( 25), INT8_C( 0), INT8_C( 76), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 37), INT8_C( 0), -INT8_C( 82), INT8_C( 89), INT8_C( 0), INT8_C( 0), -INT8_C( 75), INT8_C( 8), -INT8_C( 94), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 37), INT8_C( 0), INT8_C( 0), INT8_C( 90), INT8_C( 0), INT8_C( 0), -INT8_C( 49), INT8_C( 0), INT8_C( 0), INT8_C( 81), INT8_C( 0), INT8_C( 0), -INT8_C( 107), INT8_C( 0), -INT8_C( 110), -INT8_C( 58), INT8_C( 12), -INT8_C( 57), INT8_C( 63), INT8_C( 0), INT8_C( 0), INT8_C( 46), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 3), INT8_C( 0), INT8_C( 114), INT8_C( 2), INT8_C( 0), INT8_C( 0), -INT8_C( 116), -INT8_C( 79), INT8_C( 0), INT8_C( 34), -INT8_C( 26) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_maskz_unpackhi_epi8(test_vec[i].k, a, b); simde_test_x86_assert_equal_i8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm512_unpackhi_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[32]; const int16_t b[32]; const int16_t r[32]; } test_vec[] = { { { -INT16_C( 31862), -INT16_C( 28008), -INT16_C( 17358), -INT16_C( 9367), -INT16_C( 8035), INT16_C( 28789), INT16_C( 9734), INT16_C( 32189), -INT16_C( 29888), INT16_C( 14371), INT16_C( 13788), -INT16_C( 27397), INT16_C( 24497), -INT16_C( 8132), -INT16_C( 27140), -INT16_C( 30909), -INT16_C( 9447), INT16_C( 19225), -INT16_C( 32105), INT16_C( 13350), -INT16_C( 25502), INT16_C( 26788), INT16_C( 25026), INT16_C( 741), INT16_C( 2541), -INT16_C( 14021), INT16_C( 13886), -INT16_C( 4003), -INT16_C( 26219), -INT16_C( 28208), INT16_C( 5167), INT16_C( 18456) }, { INT16_C( 12783), -INT16_C( 30829), -INT16_C( 17997), INT16_C( 5563), INT16_C( 24661), INT16_C( 6270), INT16_C( 25537), -INT16_C( 20966), INT16_C( 21868), -INT16_C( 21641), -INT16_C( 10869), INT16_C( 8347), INT16_C( 27502), -INT16_C( 25166), -INT16_C( 13697), INT16_C( 28645), INT16_C( 30972), -INT16_C( 20490), -INT16_C( 20174), -INT16_C( 30779), INT16_C( 17169), -INT16_C( 11361), -INT16_C( 17754), INT16_C( 4993), -INT16_C( 1777), -INT16_C( 25666), INT16_C( 22990), INT16_C( 15547), INT16_C( 28100), INT16_C( 17626), -INT16_C( 16584), INT16_C( 13491) }, { -INT16_C( 8035), INT16_C( 24661), INT16_C( 28789), INT16_C( 6270), INT16_C( 9734), INT16_C( 25537), INT16_C( 32189), -INT16_C( 20966), INT16_C( 24497), INT16_C( 27502), -INT16_C( 8132), -INT16_C( 25166), -INT16_C( 27140), -INT16_C( 13697), -INT16_C( 30909), INT16_C( 28645), -INT16_C( 25502), INT16_C( 17169), INT16_C( 26788), -INT16_C( 11361), INT16_C( 25026), -INT16_C( 17754), INT16_C( 741), INT16_C( 4993), -INT16_C( 26219), INT16_C( 28100), -INT16_C( 28208), INT16_C( 17626), INT16_C( 5167), -INT16_C( 16584), INT16_C( 18456), INT16_C( 13491) } }, { { -INT16_C( 22216), INT16_C( 27363), -INT16_C( 22438), INT16_C( 27889), -INT16_C( 28181), -INT16_C( 28097), -INT16_C( 16309), INT16_C( 23205), INT16_C( 25529), -INT16_C( 30731), -INT16_C( 20036), -INT16_C( 32572), -INT16_C( 25058), INT16_C( 22212), INT16_C( 30557), -INT16_C( 27254), INT16_C( 28192), INT16_C( 31743), -INT16_C( 3818), INT16_C( 743), INT16_C( 9858), -INT16_C( 12908), INT16_C( 14822), -INT16_C( 24537), INT16_C( 7580), INT16_C( 22567), -INT16_C( 5170), -INT16_C( 4904), -INT16_C( 25207), -INT16_C( 6333), -INT16_C( 13036), INT16_C( 13692) }, { INT16_C( 31803), INT16_C( 21168), -INT16_C( 26771), -INT16_C( 4268), -INT16_C( 5955), -INT16_C( 23620), -INT16_C( 7391), -INT16_C( 17085), INT16_C( 27392), -INT16_C( 12779), -INT16_C( 4778), -INT16_C( 8005), -INT16_C( 374), -INT16_C( 24633), INT16_C( 17355), INT16_C( 2004), -INT16_C( 31553), INT16_C( 11353), -INT16_C( 21221), -INT16_C( 10213), -INT16_C( 10347), -INT16_C( 18821), -INT16_C( 16453), -INT16_C( 17549), -INT16_C( 30678), -INT16_C( 32630), INT16_C( 17781), INT16_C( 96), INT16_C( 10051), INT16_C( 3743), INT16_C( 29547), INT16_C( 10773) }, { -INT16_C( 28181), -INT16_C( 5955), -INT16_C( 28097), -INT16_C( 23620), -INT16_C( 16309), -INT16_C( 7391), INT16_C( 23205), -INT16_C( 17085), -INT16_C( 25058), -INT16_C( 374), INT16_C( 22212), -INT16_C( 24633), INT16_C( 30557), INT16_C( 17355), -INT16_C( 27254), INT16_C( 2004), INT16_C( 9858), -INT16_C( 10347), -INT16_C( 12908), -INT16_C( 18821), INT16_C( 14822), -INT16_C( 16453), -INT16_C( 24537), -INT16_C( 17549), -INT16_C( 25207), INT16_C( 10051), -INT16_C( 6333), INT16_C( 3743), -INT16_C( 13036), INT16_C( 29547), INT16_C( 13692), INT16_C( 10773) } }, { { INT16_C( 28407), INT16_C( 4695), INT16_C( 29211), -INT16_C( 20246), INT16_C( 25930), INT16_C( 1382), -INT16_C( 9948), INT16_C( 20160), INT16_C( 19041), -INT16_C( 10289), INT16_C( 12175), -INT16_C( 11561), INT16_C( 30295), -INT16_C( 15647), -INT16_C( 2327), -INT16_C( 7956), INT16_C( 17253), -INT16_C( 32526), -INT16_C( 9034), INT16_C( 49), -INT16_C( 26815), INT16_C( 26117), -INT16_C( 14991), -INT16_C( 11596), -INT16_C( 31984), -INT16_C( 24663), -INT16_C( 32589), INT16_C( 2674), INT16_C( 21494), -INT16_C( 8244), -INT16_C( 18359), -INT16_C( 20801) }, { -INT16_C( 19972), -INT16_C( 19921), INT16_C( 24717), -INT16_C( 12366), -INT16_C( 18441), INT16_C( 26677), -INT16_C( 5764), -INT16_C( 29637), -INT16_C( 7059), INT16_C( 8236), -INT16_C( 24987), INT16_C( 23338), -INT16_C( 2319), INT16_C( 14907), -INT16_C( 1362), -INT16_C( 21783), INT16_C( 6316), INT16_C( 14684), INT16_C( 3704), INT16_C( 28424), INT16_C( 15813), INT16_C( 17112), INT16_C( 4903), -INT16_C( 27442), -INT16_C( 1289), INT16_C( 23732), -INT16_C( 8552), -INT16_C( 30280), -INT16_C( 3116), -INT16_C( 32060), -INT16_C( 21011), -INT16_C( 26323) }, { INT16_C( 25930), -INT16_C( 18441), INT16_C( 1382), INT16_C( 26677), -INT16_C( 9948), -INT16_C( 5764), INT16_C( 20160), -INT16_C( 29637), INT16_C( 30295), -INT16_C( 2319), -INT16_C( 15647), INT16_C( 14907), -INT16_C( 2327), -INT16_C( 1362), -INT16_C( 7956), -INT16_C( 21783), -INT16_C( 26815), INT16_C( 15813), INT16_C( 26117), INT16_C( 17112), -INT16_C( 14991), INT16_C( 4903), -INT16_C( 11596), -INT16_C( 27442), INT16_C( 21494), -INT16_C( 3116), -INT16_C( 8244), -INT16_C( 32060), -INT16_C( 18359), -INT16_C( 21011), -INT16_C( 20801), -INT16_C( 26323) } }, { { -INT16_C( 30267), INT16_C( 15827), -INT16_C( 9320), INT16_C( 23980), -INT16_C( 31719), INT16_C( 16543), INT16_C( 28311), -INT16_C( 28716), -INT16_C( 30616), INT16_C( 491), -INT16_C( 23706), INT16_C( 14986), INT16_C( 20118), -INT16_C( 31556), -INT16_C( 5637), -INT16_C( 16355), -INT16_C( 3981), INT16_C( 3069), -INT16_C( 21812), -INT16_C( 6808), INT16_C( 2094), -INT16_C( 14811), -INT16_C( 1674), -INT16_C( 8619), INT16_C( 16513), -INT16_C( 6177), INT16_C( 27364), INT16_C( 31265), -INT16_C( 8776), -INT16_C( 19202), INT16_C( 7367), INT16_C( 14964) }, { INT16_C( 29196), -INT16_C( 10171), -INT16_C( 21220), INT16_C( 19133), -INT16_C( 7499), INT16_C( 11024), INT16_C( 26075), INT16_C( 23562), -INT16_C( 5722), -INT16_C( 30141), INT16_C( 25683), INT16_C( 3076), INT16_C( 834), INT16_C( 2496), INT16_C( 13343), INT16_C( 11075), -INT16_C( 30554), -INT16_C( 15868), -INT16_C( 16075), -INT16_C( 5363), INT16_C( 7588), INT16_C( 32534), INT16_C( 8323), INT16_C( 10716), INT16_C( 7946), INT16_C( 23987), -INT16_C( 18556), -INT16_C( 14743), INT16_C( 10682), -INT16_C( 9777), INT16_C( 4702), INT16_C( 1029) }, { -INT16_C( 31719), -INT16_C( 7499), INT16_C( 16543), INT16_C( 11024), INT16_C( 28311), INT16_C( 26075), -INT16_C( 28716), INT16_C( 23562), INT16_C( 20118), INT16_C( 834), -INT16_C( 31556), INT16_C( 2496), -INT16_C( 5637), INT16_C( 13343), -INT16_C( 16355), INT16_C( 11075), INT16_C( 2094), INT16_C( 7588), -INT16_C( 14811), INT16_C( 32534), -INT16_C( 1674), INT16_C( 8323), -INT16_C( 8619), INT16_C( 10716), -INT16_C( 8776), INT16_C( 10682), -INT16_C( 19202), -INT16_C( 9777), INT16_C( 7367), INT16_C( 4702), INT16_C( 14964), INT16_C( 1029) } }, { { INT16_C( 2458), -INT16_C( 12345), -INT16_C( 11062), INT16_C( 28346), -INT16_C( 11791), INT16_C( 29934), -INT16_C( 13583), -INT16_C( 1123), INT16_C( 20713), INT16_C( 27993), -INT16_C( 15864), -INT16_C( 15821), INT16_C( 748), INT16_C( 19100), -INT16_C( 24300), -INT16_C( 20914), INT16_C( 5546), INT16_C( 29822), INT16_C( 14569), -INT16_C( 9245), -INT16_C( 12023), -INT16_C( 1201), -INT16_C( 4709), -INT16_C( 31498), INT16_C( 20285), INT16_C( 17906), INT16_C( 9490), -INT16_C( 504), -INT16_C( 23512), INT16_C( 15432), -INT16_C( 27067), -INT16_C( 4117) }, { INT16_C( 27052), -INT16_C( 27293), INT16_C( 18081), -INT16_C( 21648), -INT16_C( 16361), -INT16_C( 19802), -INT16_C( 25427), -INT16_C( 5577), INT16_C( 10732), -INT16_C( 464), INT16_C( 14414), INT16_C( 30460), INT16_C( 17628), INT16_C( 8627), -INT16_C( 24870), -INT16_C( 31216), INT16_C( 29447), -INT16_C( 22500), -INT16_C( 29510), -INT16_C( 11949), -INT16_C( 1716), -INT16_C( 1660), -INT16_C( 17514), -INT16_C( 32028), INT16_C( 5348), INT16_C( 12928), INT16_C( 31820), INT16_C( 10409), INT16_C( 23744), -INT16_C( 26039), INT16_C( 23034), INT16_C( 289) }, { -INT16_C( 11791), -INT16_C( 16361), INT16_C( 29934), -INT16_C( 19802), -INT16_C( 13583), -INT16_C( 25427), -INT16_C( 1123), -INT16_C( 5577), INT16_C( 748), INT16_C( 17628), INT16_C( 19100), INT16_C( 8627), -INT16_C( 24300), -INT16_C( 24870), -INT16_C( 20914), -INT16_C( 31216), -INT16_C( 12023), -INT16_C( 1716), -INT16_C( 1201), -INT16_C( 1660), -INT16_C( 4709), -INT16_C( 17514), -INT16_C( 31498), -INT16_C( 32028), -INT16_C( 23512), INT16_C( 23744), INT16_C( 15432), -INT16_C( 26039), -INT16_C( 27067), INT16_C( 23034), -INT16_C( 4117), INT16_C( 289) } }, { { INT16_C( 15820), -INT16_C( 31063), -INT16_C( 567), INT16_C( 5720), -INT16_C( 8970), -INT16_C( 29681), -INT16_C( 3177), INT16_C( 31502), -INT16_C( 29177), INT16_C( 21421), INT16_C( 22026), -INT16_C( 13701), -INT16_C( 15182), -INT16_C( 21403), -INT16_C( 31203), -INT16_C( 5459), INT16_C( 22467), -INT16_C( 29584), -INT16_C( 14252), INT16_C( 19106), -INT16_C( 19804), INT16_C( 15319), -INT16_C( 6747), -INT16_C( 21066), INT16_C( 25716), INT16_C( 32256), INT16_C( 31930), INT16_C( 27977), -INT16_C( 20928), INT16_C( 24089), -INT16_C( 14540), -INT16_C( 2232) }, { -INT16_C( 18402), INT16_C( 29315), INT16_C( 9857), INT16_C( 9660), -INT16_C( 27688), INT16_C( 32097), INT16_C( 6009), -INT16_C( 4822), INT16_C( 11131), INT16_C( 13931), -INT16_C( 19289), -INT16_C( 6237), -INT16_C( 17310), -INT16_C( 27067), -INT16_C( 29309), -INT16_C( 24179), INT16_C( 4422), -INT16_C( 14573), -INT16_C( 12233), INT16_C( 4076), INT16_C( 19811), -INT16_C( 9076), -INT16_C( 18587), -INT16_C( 7991), INT16_C( 13794), -INT16_C( 30442), -INT16_C( 17943), INT16_C( 19568), -INT16_C( 18826), -INT16_C( 1566), INT16_C( 28739), -INT16_C( 30309) }, { -INT16_C( 8970), -INT16_C( 27688), -INT16_C( 29681), INT16_C( 32097), -INT16_C( 3177), INT16_C( 6009), INT16_C( 31502), -INT16_C( 4822), -INT16_C( 15182), -INT16_C( 17310), -INT16_C( 21403), -INT16_C( 27067), -INT16_C( 31203), -INT16_C( 29309), -INT16_C( 5459), -INT16_C( 24179), -INT16_C( 19804), INT16_C( 19811), INT16_C( 15319), -INT16_C( 9076), -INT16_C( 6747), -INT16_C( 18587), -INT16_C( 21066), -INT16_C( 7991), -INT16_C( 20928), -INT16_C( 18826), INT16_C( 24089), -INT16_C( 1566), -INT16_C( 14540), INT16_C( 28739), -INT16_C( 2232), -INT16_C( 30309) } }, { { -INT16_C( 20863), -INT16_C( 18352), INT16_C( 15742), -INT16_C( 7481), INT16_C( 21386), -INT16_C( 4162), -INT16_C( 30710), -INT16_C( 4912), -INT16_C( 6467), -INT16_C( 22923), -INT16_C( 6496), INT16_C( 5874), -INT16_C( 10852), -INT16_C( 8433), -INT16_C( 21947), -INT16_C( 14743), -INT16_C( 18087), -INT16_C( 10370), INT16_C( 17910), -INT16_C( 32327), INT16_C( 30872), -INT16_C( 23696), INT16_C( 16384), -INT16_C( 17009), INT16_C( 1319), -INT16_C( 14493), INT16_C( 22251), -INT16_C( 30755), -INT16_C( 5077), INT16_C( 28774), -INT16_C( 12393), -INT16_C( 4042) }, { -INT16_C( 19319), INT16_C( 32711), -INT16_C( 32263), -INT16_C( 28416), INT16_C( 29177), -INT16_C( 1740), -INT16_C( 15183), -INT16_C( 10058), INT16_C( 6601), -INT16_C( 19297), INT16_C( 31855), -INT16_C( 26053), -INT16_C( 24215), INT16_C( 10), INT16_C( 16497), -INT16_C( 1296), -INT16_C( 18444), -INT16_C( 4743), INT16_C( 31288), INT16_C( 12671), -INT16_C( 19477), -INT16_C( 25558), -INT16_C( 8073), INT16_C( 16501), INT16_C( 5370), INT16_C( 27124), INT16_C( 12177), -INT16_C( 1532), INT16_C( 3793), INT16_C( 17146), -INT16_C( 5553), INT16_C( 17212) }, { INT16_C( 21386), INT16_C( 29177), -INT16_C( 4162), -INT16_C( 1740), -INT16_C( 30710), -INT16_C( 15183), -INT16_C( 4912), -INT16_C( 10058), -INT16_C( 10852), -INT16_C( 24215), -INT16_C( 8433), INT16_C( 10), -INT16_C( 21947), INT16_C( 16497), -INT16_C( 14743), -INT16_C( 1296), INT16_C( 30872), -INT16_C( 19477), -INT16_C( 23696), -INT16_C( 25558), INT16_C( 16384), -INT16_C( 8073), -INT16_C( 17009), INT16_C( 16501), -INT16_C( 5077), INT16_C( 3793), INT16_C( 28774), INT16_C( 17146), -INT16_C( 12393), -INT16_C( 5553), -INT16_C( 4042), INT16_C( 17212) } }, { { -INT16_C( 19039), -INT16_C( 9679), -INT16_C( 20433), INT16_C( 6667), INT16_C( 13923), -INT16_C( 9289), INT16_C( 11286), INT16_C( 4123), INT16_C( 4160), -INT16_C( 11910), INT16_C( 32319), INT16_C( 4299), -INT16_C( 14964), -INT16_C( 9390), -INT16_C( 29009), INT16_C( 20767), INT16_C( 20548), INT16_C( 29483), INT16_C( 13824), INT16_C( 25486), INT16_C( 17772), -INT16_C( 31938), INT16_C( 23153), -INT16_C( 20077), INT16_C( 3434), -INT16_C( 22141), INT16_C( 20107), INT16_C( 6330), INT16_C( 3092), -INT16_C( 15373), INT16_C( 4763), -INT16_C( 8428) }, { INT16_C( 16226), INT16_C( 25170), -INT16_C( 8074), -INT16_C( 7482), INT16_C( 1061), -INT16_C( 27035), -INT16_C( 1698), -INT16_C( 14264), -INT16_C( 13562), -INT16_C( 28046), INT16_C( 11289), INT16_C( 11690), -INT16_C( 25288), -INT16_C( 11279), INT16_C( 1456), INT16_C( 4786), INT16_C( 1349), -INT16_C( 17547), INT16_C( 15333), INT16_C( 2973), INT16_C( 831), -INT16_C( 24927), -INT16_C( 5636), INT16_C( 614), -INT16_C( 10060), -INT16_C( 12652), INT16_C( 15876), INT16_C( 15867), -INT16_C( 4900), -INT16_C( 29680), -INT16_C( 15374), INT16_C( 14238) }, { INT16_C( 13923), INT16_C( 1061), -INT16_C( 9289), -INT16_C( 27035), INT16_C( 11286), -INT16_C( 1698), INT16_C( 4123), -INT16_C( 14264), -INT16_C( 14964), -INT16_C( 25288), -INT16_C( 9390), -INT16_C( 11279), -INT16_C( 29009), INT16_C( 1456), INT16_C( 20767), INT16_C( 4786), INT16_C( 17772), INT16_C( 831), -INT16_C( 31938), -INT16_C( 24927), INT16_C( 23153), -INT16_C( 5636), -INT16_C( 20077), INT16_C( 614), INT16_C( 3092), -INT16_C( 4900), -INT16_C( 15373), -INT16_C( 29680), INT16_C( 4763), -INT16_C( 15374), -INT16_C( 8428), INT16_C( 14238) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_unpackhi_epi16(a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_unpackhi_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t src[32]; const simde__mmask32 k; const int16_t a[32]; const int16_t b[32]; const int16_t r[32]; } test_vec[] = { { { INT16_C( 27404), INT16_C( 14898), INT16_C( 956), INT16_C( 8286), -INT16_C( 9167), -INT16_C( 28939), -INT16_C( 23261), -INT16_C( 21730), -INT16_C( 18437), -INT16_C( 23643), -INT16_C( 7882), -INT16_C( 22201), -INT16_C( 1304), INT16_C( 6099), INT16_C( 6423), INT16_C( 26613), -INT16_C( 15814), -INT16_C( 495), -INT16_C( 30970), INT16_C( 11012), INT16_C( 10310), -INT16_C( 12798), INT16_C( 25997), INT16_C( 828), INT16_C( 17628), INT16_C( 16888), INT16_C( 20782), -INT16_C( 1807), -INT16_C( 19919), INT16_C( 23316), INT16_C( 20982), INT16_C( 9513) }, UINT32_C(3281474170), { -INT16_C( 10999), INT16_C( 15076), INT16_C( 4558), -INT16_C( 25737), INT16_C( 3094), -INT16_C( 11382), INT16_C( 21395), -INT16_C( 12020), INT16_C( 22329), -INT16_C( 18273), INT16_C( 28988), INT16_C( 3093), INT16_C( 10978), INT16_C( 30531), INT16_C( 14987), -INT16_C( 908), INT16_C( 1833), -INT16_C( 21846), INT16_C( 7965), -INT16_C( 32241), -INT16_C( 28677), INT16_C( 11658), -INT16_C( 12230), INT16_C( 25269), -INT16_C( 15334), INT16_C( 23931), -INT16_C( 21628), -INT16_C( 5755), -INT16_C( 32735), -INT16_C( 16868), -INT16_C( 5159), -INT16_C( 26551) }, { -INT16_C( 3747), -INT16_C( 32059), -INT16_C( 3828), INT16_C( 17090), -INT16_C( 2562), -INT16_C( 26588), INT16_C( 28868), -INT16_C( 1972), INT16_C( 11937), INT16_C( 152), INT16_C( 16414), INT16_C( 2506), INT16_C( 23138), INT16_C( 2357), INT16_C( 3285), INT16_C( 12967), -INT16_C( 6594), INT16_C( 20879), -INT16_C( 10456), -INT16_C( 1442), INT16_C( 13928), INT16_C( 32675), INT16_C( 19165), INT16_C( 2402), -INT16_C( 1724), INT16_C( 31131), -INT16_C( 31019), -INT16_C( 21839), INT16_C( 23239), -INT16_C( 15538), -INT16_C( 21852), INT16_C( 20575) }, { INT16_C( 27404), -INT16_C( 2562), INT16_C( 956), -INT16_C( 26588), INT16_C( 21395), INT16_C( 28868), -INT16_C( 12020), -INT16_C( 21730), -INT16_C( 18437), INT16_C( 23138), -INT16_C( 7882), -INT16_C( 22201), INT16_C( 14987), INT16_C( 6099), -INT16_C( 908), INT16_C( 26613), -INT16_C( 28677), INT16_C( 13928), INT16_C( 11658), INT16_C( 11012), -INT16_C( 12230), -INT16_C( 12798), INT16_C( 25997), INT16_C( 2402), -INT16_C( 32735), INT16_C( 23239), INT16_C( 20782), -INT16_C( 1807), -INT16_C( 19919), INT16_C( 23316), -INT16_C( 26551), INT16_C( 20575) } }, { { -INT16_C( 24230), -INT16_C( 25235), -INT16_C( 22849), -INT16_C( 17737), INT16_C( 12370), INT16_C( 20871), -INT16_C( 21136), INT16_C( 23210), -INT16_C( 6012), -INT16_C( 28647), -INT16_C( 25235), -INT16_C( 23120), -INT16_C( 21717), INT16_C( 24492), -INT16_C( 1920), INT16_C( 8052), INT16_C( 12436), INT16_C( 9523), INT16_C( 18908), INT16_C( 9488), INT16_C( 6234), -INT16_C( 25047), INT16_C( 17782), INT16_C( 15752), INT16_C( 32414), -INT16_C( 14616), INT16_C( 8792), -INT16_C( 17733), -INT16_C( 15138), -INT16_C( 26221), -INT16_C( 8581), -INT16_C( 17166) }, UINT32_C(2849954152), { INT16_C( 30818), INT16_C( 12844), -INT16_C( 24923), INT16_C( 18976), INT16_C( 16041), -INT16_C( 11355), -INT16_C( 14591), INT16_C( 16007), INT16_C( 28203), INT16_C( 3255), -INT16_C( 21685), -INT16_C( 5841), -INT16_C( 18691), -INT16_C( 3930), INT16_C( 7153), -INT16_C( 21391), -INT16_C( 26493), INT16_C( 16267), INT16_C( 5044), -INT16_C( 22293), INT16_C( 29810), -INT16_C( 2382), -INT16_C( 14482), -INT16_C( 3920), INT16_C( 31923), INT16_C( 22111), -INT16_C( 14985), -INT16_C( 32306), -INT16_C( 21470), INT16_C( 2866), -INT16_C( 7062), -INT16_C( 8230) }, { INT16_C( 24965), -INT16_C( 16372), -INT16_C( 11253), INT16_C( 3907), -INT16_C( 10867), INT16_C( 6102), INT16_C( 31893), -INT16_C( 1152), -INT16_C( 16188), INT16_C( 17787), -INT16_C( 11616), INT16_C( 16599), INT16_C( 15974), INT16_C( 16313), -INT16_C( 16172), -INT16_C( 21916), -INT16_C( 28521), INT16_C( 649), INT16_C( 22744), INT16_C( 16863), -INT16_C( 28302), INT16_C( 10253), INT16_C( 28528), -INT16_C( 18657), -INT16_C( 17549), -INT16_C( 22343), INT16_C( 25721), -INT16_C( 12195), -INT16_C( 23631), -INT16_C( 15610), INT16_C( 5371), INT16_C( 18855) }, { -INT16_C( 24230), -INT16_C( 25235), -INT16_C( 22849), INT16_C( 6102), INT16_C( 12370), INT16_C( 31893), INT16_C( 16007), INT16_C( 23210), -INT16_C( 18691), -INT16_C( 28647), -INT16_C( 25235), INT16_C( 16313), INT16_C( 7153), INT16_C( 24492), -INT16_C( 21391), -INT16_C( 21916), INT16_C( 12436), -INT16_C( 28302), -INT16_C( 2382), INT16_C( 10253), -INT16_C( 14482), -INT16_C( 25047), -INT16_C( 3920), -INT16_C( 18657), -INT16_C( 21470), -INT16_C( 14616), INT16_C( 8792), -INT16_C( 15610), -INT16_C( 15138), INT16_C( 5371), -INT16_C( 8581), INT16_C( 18855) } }, { { -INT16_C( 8359), -INT16_C( 30047), -INT16_C( 28292), -INT16_C( 6980), -INT16_C( 10294), -INT16_C( 22686), INT16_C( 10785), INT16_C( 22251), INT16_C( 30350), -INT16_C( 845), INT16_C( 11361), INT16_C( 28292), -INT16_C( 29332), -INT16_C( 2188), INT16_C( 31028), -INT16_C( 28185), -INT16_C( 28936), -INT16_C( 25956), -INT16_C( 11312), INT16_C( 25754), -INT16_C( 16760), INT16_C( 10190), INT16_C( 5161), -INT16_C( 13729), -INT16_C( 4141), INT16_C( 10413), -INT16_C( 315), INT16_C( 7941), -INT16_C( 8026), INT16_C( 3476), INT16_C( 24408), -INT16_C( 8315) }, UINT32_C(2477708793), { -INT16_C( 23908), -INT16_C( 16752), INT16_C( 18317), -INT16_C( 12565), -INT16_C( 21776), -INT16_C( 19159), INT16_C( 18487), -INT16_C( 19919), INT16_C( 26763), -INT16_C( 29351), INT16_C( 8841), -INT16_C( 13846), -INT16_C( 16520), -INT16_C( 10625), -INT16_C( 3418), INT16_C( 12584), INT16_C( 20641), -INT16_C( 17565), -INT16_C( 10745), INT16_C( 16522), -INT16_C( 13515), INT16_C( 3233), INT16_C( 32025), -INT16_C( 2625), INT16_C( 2319), INT16_C( 15384), -INT16_C( 19012), -INT16_C( 11938), INT16_C( 3220), -INT16_C( 15552), -INT16_C( 2812), -INT16_C( 3672) }, { -INT16_C( 8644), -INT16_C( 3932), -INT16_C( 29702), -INT16_C( 32484), -INT16_C( 7171), INT16_C( 24408), -INT16_C( 3264), -INT16_C( 32013), INT16_C( 11313), -INT16_C( 11678), INT16_C( 30398), INT16_C( 11431), INT16_C( 9304), -INT16_C( 6903), -INT16_C( 20628), -INT16_C( 3802), INT16_C( 528), -INT16_C( 28485), -INT16_C( 2619), -INT16_C( 17710), -INT16_C( 23889), INT16_C( 29661), -INT16_C( 21325), INT16_C( 5869), INT16_C( 16688), INT16_C( 31068), -INT16_C( 22030), INT16_C( 24313), -INT16_C( 2086), INT16_C( 26287), -INT16_C( 10861), -INT16_C( 26197) }, { -INT16_C( 21776), -INT16_C( 30047), -INT16_C( 28292), INT16_C( 24408), INT16_C( 18487), -INT16_C( 3264), -INT16_C( 19919), -INT16_C( 32013), -INT16_C( 16520), -INT16_C( 845), -INT16_C( 10625), INT16_C( 28292), -INT16_C( 3418), -INT16_C( 2188), INT16_C( 12584), -INT16_C( 3802), -INT16_C( 28936), -INT16_C( 23889), INT16_C( 3233), INT16_C( 29661), -INT16_C( 16760), -INT16_C( 21325), INT16_C( 5161), INT16_C( 5869), INT16_C( 3220), -INT16_C( 2086), -INT16_C( 315), INT16_C( 7941), -INT16_C( 2812), INT16_C( 3476), INT16_C( 24408), -INT16_C( 26197) } }, { { INT16_C( 2339), -INT16_C( 3730), -INT16_C( 2095), -INT16_C( 2880), INT16_C( 13424), INT16_C( 25522), -INT16_C( 28156), INT16_C( 2159), INT16_C( 18419), INT16_C( 2222), INT16_C( 8093), -INT16_C( 27361), -INT16_C( 19874), -INT16_C( 21784), INT16_C( 25037), -INT16_C( 27374), -INT16_C( 21444), INT16_C( 1472), -INT16_C( 30798), INT16_C( 16440), -INT16_C( 901), -INT16_C( 2601), INT16_C( 5056), -INT16_C( 8224), INT16_C( 31617), INT16_C( 30620), INT16_C( 16500), -INT16_C( 31288), -INT16_C( 15169), INT16_C( 15332), INT16_C( 30235), -INT16_C( 11) }, UINT32_C(4066153294), { -INT16_C( 29379), -INT16_C( 15697), -INT16_C( 22433), INT16_C( 3492), -INT16_C( 32064), INT16_C( 4085), INT16_C( 29009), INT16_C( 7647), INT16_C( 4718), -INT16_C( 22770), -INT16_C( 27249), -INT16_C( 18787), INT16_C( 10179), INT16_C( 12735), -INT16_C( 4464), INT16_C( 17191), INT16_C( 30180), -INT16_C( 25832), INT16_C( 7862), -INT16_C( 26743), -INT16_C( 19714), -INT16_C( 32650), INT16_C( 3214), INT16_C( 15516), INT16_C( 25593), -INT16_C( 8304), -INT16_C( 25382), INT16_C( 27607), INT16_C( 11867), -INT16_C( 448), INT16_C( 19993), INT16_C( 26374) }, { INT16_C( 18929), -INT16_C( 24781), INT16_C( 16785), INT16_C( 28792), INT16_C( 19031), INT16_C( 8368), INT16_C( 18256), -INT16_C( 11996), -INT16_C( 6969), -INT16_C( 19821), INT16_C( 15775), INT16_C( 22128), -INT16_C( 18885), -INT16_C( 9277), INT16_C( 15013), -INT16_C( 2507), -INT16_C( 12366), -INT16_C( 3651), INT16_C( 26471), INT16_C( 25183), INT16_C( 2806), INT16_C( 21581), INT16_C( 25029), -INT16_C( 25129), -INT16_C( 3278), -INT16_C( 12421), INT16_C( 3308), INT16_C( 9521), -INT16_C( 10772), -INT16_C( 21126), INT16_C( 13313), -INT16_C( 7247) }, { INT16_C( 2339), INT16_C( 19031), INT16_C( 4085), INT16_C( 8368), INT16_C( 13424), INT16_C( 25522), INT16_C( 7647), INT16_C( 2159), INT16_C( 10179), -INT16_C( 18885), INT16_C( 8093), -INT16_C( 27361), -INT16_C( 4464), -INT16_C( 21784), INT16_C( 25037), -INT16_C( 2507), -INT16_C( 21444), INT16_C( 1472), -INT16_C( 32650), INT16_C( 21581), INT16_C( 3214), -INT16_C( 2601), INT16_C( 15516), -INT16_C( 8224), INT16_C( 31617), -INT16_C( 10772), INT16_C( 16500), -INT16_C( 31288), INT16_C( 19993), INT16_C( 13313), INT16_C( 26374), -INT16_C( 7247) } }, { { -INT16_C( 29581), -INT16_C( 23827), -INT16_C( 26970), INT16_C( 7499), INT16_C( 12876), INT16_C( 13829), -INT16_C( 17581), -INT16_C( 31540), INT16_C( 9315), INT16_C( 5411), -INT16_C( 8990), -INT16_C( 12609), INT16_C( 26785), -INT16_C( 15610), -INT16_C( 2863), INT16_C( 29552), INT16_C( 10013), -INT16_C( 24346), -INT16_C( 7304), INT16_C( 31671), INT16_C( 19291), -INT16_C( 1156), -INT16_C( 6562), INT16_C( 24620), INT16_C( 21867), -INT16_C( 31118), -INT16_C( 10260), -INT16_C( 21379), INT16_C( 21627), -INT16_C( 19538), INT16_C( 19717), -INT16_C( 29048) }, UINT32_C(2775428591), { INT16_C( 14869), INT16_C( 26177), INT16_C( 24970), INT16_C( 14240), INT16_C( 30897), -INT16_C( 24567), -INT16_C( 2061), -INT16_C( 285), INT16_C( 32651), INT16_C( 29071), -INT16_C( 22083), INT16_C( 31401), INT16_C( 9639), -INT16_C( 27443), -INT16_C( 4834), INT16_C( 10547), -INT16_C( 18378), -INT16_C( 8629), -INT16_C( 26472), -INT16_C( 8609), -INT16_C( 11971), -INT16_C( 17018), -INT16_C( 18011), -INT16_C( 32331), -INT16_C( 29042), -INT16_C( 6714), -INT16_C( 14445), INT16_C( 27618), -INT16_C( 11596), INT16_C( 14483), -INT16_C( 11545), -INT16_C( 21711) }, { INT16_C( 10897), INT16_C( 22009), INT16_C( 32250), INT16_C( 31919), -INT16_C( 24691), -INT16_C( 29314), INT16_C( 15358), -INT16_C( 16122), INT16_C( 30957), -INT16_C( 26897), -INT16_C( 26794), INT16_C( 15455), -INT16_C( 30086), INT16_C( 30444), INT16_C( 18498), INT16_C( 23540), INT16_C( 13714), -INT16_C( 2546), -INT16_C( 31755), INT16_C( 11608), -INT16_C( 26841), INT16_C( 20209), -INT16_C( 17101), INT16_C( 4879), INT16_C( 27530), -INT16_C( 31433), INT16_C( 12389), -INT16_C( 2043), INT16_C( 16944), INT16_C( 19678), -INT16_C( 16909), -INT16_C( 27839) }, { INT16_C( 30897), -INT16_C( 24691), -INT16_C( 24567), -INT16_C( 29314), INT16_C( 12876), INT16_C( 15358), -INT16_C( 285), -INT16_C( 16122), INT16_C( 9639), INT16_C( 5411), -INT16_C( 27443), INT16_C( 30444), INT16_C( 26785), INT16_C( 18498), -INT16_C( 2863), INT16_C( 23540), -INT16_C( 11971), -INT16_C( 24346), -INT16_C( 17018), INT16_C( 20209), INT16_C( 19291), -INT16_C( 17101), -INT16_C( 32331), INT16_C( 24620), -INT16_C( 11596), -INT16_C( 31118), INT16_C( 14483), -INT16_C( 21379), INT16_C( 21627), -INT16_C( 16909), INT16_C( 19717), -INT16_C( 27839) } }, { { INT16_C( 29748), INT16_C( 6882), INT16_C( 21770), INT16_C( 23585), INT16_C( 19519), -INT16_C( 6274), -INT16_C( 2156), -INT16_C( 27355), INT16_C( 28989), -INT16_C( 21844), INT16_C( 11238), INT16_C( 261), INT16_C( 29367), -INT16_C( 32231), INT16_C( 93), -INT16_C( 15466), -INT16_C( 2026), INT16_C( 2614), -INT16_C( 4337), INT16_C( 25261), INT16_C( 23732), -INT16_C( 6375), INT16_C( 11133), INT16_C( 15341), -INT16_C( 15677), INT16_C( 24101), INT16_C( 17867), -INT16_C( 27503), INT16_C( 21790), -INT16_C( 23705), -INT16_C( 1777), -INT16_C( 20179) }, UINT32_C(1340282387), { INT16_C( 1264), INT16_C( 19542), INT16_C( 2084), INT16_C( 20074), -INT16_C( 24161), INT16_C( 26966), -INT16_C( 30376), -INT16_C( 8095), INT16_C( 29142), INT16_C( 15463), INT16_C( 5793), INT16_C( 20708), INT16_C( 18639), -INT16_C( 3555), -INT16_C( 24855), -INT16_C( 11775), INT16_C( 22356), INT16_C( 30867), -INT16_C( 25918), -INT16_C( 13445), -INT16_C( 16336), INT16_C( 20246), INT16_C( 10082), -INT16_C( 23619), INT16_C( 9828), INT16_C( 27450), -INT16_C( 21718), INT16_C( 22832), -INT16_C( 29575), INT16_C( 27182), -INT16_C( 6615), INT16_C( 15412) }, { INT16_C( 15650), INT16_C( 15480), -INT16_C( 6166), -INT16_C( 15856), -INT16_C( 29977), INT16_C( 18773), INT16_C( 5402), -INT16_C( 15078), INT16_C( 4395), INT16_C( 32337), -INT16_C( 16386), INT16_C( 23840), -INT16_C( 29836), -INT16_C( 29495), INT16_C( 19749), INT16_C( 19625), INT16_C( 23497), -INT16_C( 22002), -INT16_C( 15042), INT16_C( 21054), INT16_C( 15485), INT16_C( 28416), INT16_C( 12403), INT16_C( 25440), -INT16_C( 27314), -INT16_C( 26108), -INT16_C( 25836), INT16_C( 3350), -INT16_C( 16632), -INT16_C( 21393), -INT16_C( 22833), INT16_C( 20506) }, { -INT16_C( 24161), -INT16_C( 29977), INT16_C( 21770), INT16_C( 23585), -INT16_C( 30376), -INT16_C( 6274), -INT16_C( 2156), -INT16_C( 27355), INT16_C( 28989), -INT16_C( 29836), -INT16_C( 3555), INT16_C( 261), -INT16_C( 24855), -INT16_C( 32231), INT16_C( 93), -INT16_C( 15466), -INT16_C( 16336), INT16_C( 15485), -INT16_C( 4337), INT16_C( 25261), INT16_C( 23732), INT16_C( 12403), -INT16_C( 23619), INT16_C( 25440), -INT16_C( 29575), -INT16_C( 16632), INT16_C( 27182), -INT16_C( 21393), INT16_C( 21790), -INT16_C( 23705), INT16_C( 15412), -INT16_C( 20179) } }, { { -INT16_C( 8200), INT16_C( 7627), INT16_C( 14415), INT16_C( 28823), -INT16_C( 26145), INT16_C( 31619), INT16_C( 5373), -INT16_C( 7776), -INT16_C( 7221), INT16_C( 4552), -INT16_C( 15205), INT16_C( 26952), INT16_C( 1141), INT16_C( 2516), -INT16_C( 32052), INT16_C( 25973), INT16_C( 26326), -INT16_C( 10505), -INT16_C( 4938), INT16_C( 32521), -INT16_C( 3759), -INT16_C( 25166), -INT16_C( 20803), -INT16_C( 24534), -INT16_C( 30623), INT16_C( 949), INT16_C( 10548), INT16_C( 10924), INT16_C( 14178), -INT16_C( 31045), -INT16_C( 24906), INT16_C( 6122) }, UINT32_C(3928995989), { INT16_C( 12610), INT16_C( 23899), INT16_C( 23145), -INT16_C( 31816), INT16_C( 4410), -INT16_C( 13862), INT16_C( 3653), INT16_C( 13101), -INT16_C( 31324), INT16_C( 22071), -INT16_C( 14205), -INT16_C( 26962), INT16_C( 4142), INT16_C( 6793), INT16_C( 11541), INT16_C( 14781), INT16_C( 21984), INT16_C( 16940), INT16_C( 23416), INT16_C( 3744), -INT16_C( 20266), -INT16_C( 23140), INT16_C( 16081), INT16_C( 17494), -INT16_C( 16623), -INT16_C( 138), -INT16_C( 19484), INT16_C( 6584), -INT16_C( 751), INT16_C( 23181), INT16_C( 18927), -INT16_C( 3545) }, { -INT16_C( 7842), INT16_C( 11554), INT16_C( 795), -INT16_C( 21871), INT16_C( 6798), -INT16_C( 30861), -INT16_C( 8461), -INT16_C( 15264), INT16_C( 4397), -INT16_C( 7119), -INT16_C( 836), INT16_C( 13296), INT16_C( 24827), -INT16_C( 31668), INT16_C( 12714), INT16_C( 25970), -INT16_C( 27486), -INT16_C( 22471), INT16_C( 18128), -INT16_C( 5212), -INT16_C( 12935), INT16_C( 2848), -INT16_C( 19750), -INT16_C( 16551), -INT16_C( 20456), INT16_C( 6500), INT16_C( 17407), INT16_C( 5189), -INT16_C( 23069), -INT16_C( 14499), -INT16_C( 29111), INT16_C( 22692) }, { INT16_C( 4410), INT16_C( 7627), -INT16_C( 13862), INT16_C( 28823), INT16_C( 3653), INT16_C( 31619), INT16_C( 5373), -INT16_C( 15264), -INT16_C( 7221), INT16_C( 4552), -INT16_C( 15205), -INT16_C( 31668), INT16_C( 11541), INT16_C( 12714), -INT16_C( 32052), INT16_C( 25970), -INT16_C( 20266), -INT16_C( 12935), -INT16_C( 23140), INT16_C( 2848), -INT16_C( 3759), -INT16_C( 19750), -INT16_C( 20803), -INT16_C( 24534), -INT16_C( 30623), -INT16_C( 23069), INT16_C( 10548), -INT16_C( 14499), INT16_C( 14178), -INT16_C( 29111), -INT16_C( 3545), INT16_C( 22692) } }, { { INT16_C( 24111), INT16_C( 18895), INT16_C( 29112), -INT16_C( 20943), -INT16_C( 25302), INT16_C( 30083), -INT16_C( 20197), -INT16_C( 14464), INT16_C( 5444), INT16_C( 11860), INT16_C( 13339), INT16_C( 10750), -INT16_C( 4538), INT16_C( 30577), INT16_C( 4039), -INT16_C( 30598), INT16_C( 21399), INT16_C( 263), INT16_C( 14517), -INT16_C( 583), -INT16_C( 9112), -INT16_C( 21884), -INT16_C( 85), INT16_C( 692), INT16_C( 29968), INT16_C( 30809), -INT16_C( 2432), -INT16_C( 163), -INT16_C( 9975), INT16_C( 10666), -INT16_C( 21228), -INT16_C( 20122) }, UINT32_C(1303004863), { -INT16_C( 5605), -INT16_C( 25661), -INT16_C( 30067), INT16_C( 19149), -INT16_C( 15934), -INT16_C( 28138), -INT16_C( 30757), -INT16_C( 3057), INT16_C( 25047), INT16_C( 2921), -INT16_C( 1076), INT16_C( 19118), INT16_C( 12231), INT16_C( 16015), -INT16_C( 27113), INT16_C( 12252), INT16_C( 16740), -INT16_C( 6319), INT16_C( 9362), -INT16_C( 9743), INT16_C( 12426), -INT16_C( 15290), -INT16_C( 1684), -INT16_C( 30138), INT16_C( 22557), INT16_C( 8406), -INT16_C( 26406), INT16_C( 29196), INT16_C( 28663), -INT16_C( 25873), INT16_C( 29464), -INT16_C( 29841) }, { -INT16_C( 18976), -INT16_C( 2946), INT16_C( 24777), INT16_C( 7548), -INT16_C( 9637), -INT16_C( 15498), -INT16_C( 20265), -INT16_C( 12535), -INT16_C( 28114), -INT16_C( 13322), INT16_C( 24489), INT16_C( 25709), INT16_C( 6527), INT16_C( 4917), -INT16_C( 7806), -INT16_C( 28795), INT16_C( 11369), -INT16_C( 22810), -INT16_C( 22990), INT16_C( 19993), -INT16_C( 26169), INT16_C( 4116), -INT16_C( 492), INT16_C( 26923), INT16_C( 22644), INT16_C( 23617), INT16_C( 29621), -INT16_C( 7937), -INT16_C( 4740), INT16_C( 16787), INT16_C( 9956), INT16_C( 2215) }, { -INT16_C( 15934), -INT16_C( 9637), -INT16_C( 28138), -INT16_C( 15498), -INT16_C( 30757), -INT16_C( 20265), -INT16_C( 20197), -INT16_C( 12535), INT16_C( 5444), INT16_C( 6527), INT16_C( 16015), INT16_C( 10750), -INT16_C( 4538), INT16_C( 30577), INT16_C( 12252), -INT16_C( 30598), INT16_C( 21399), -INT16_C( 26169), INT16_C( 14517), INT16_C( 4116), -INT16_C( 9112), -INT16_C( 492), -INT16_C( 85), INT16_C( 26923), INT16_C( 28663), INT16_C( 30809), -INT16_C( 25873), INT16_C( 16787), -INT16_C( 9975), INT16_C( 10666), -INT16_C( 29841), -INT16_C( 20122) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi16(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_mask_unpackhi_epi16(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_unpackhi_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask32 k; const int16_t a[32]; const int16_t b[32]; const int16_t r[32]; } test_vec[] = { { UINT32_C(1239030295), { INT16_C( 18650), -INT16_C( 27490), INT16_C( 19500), INT16_C( 28558), -INT16_C( 10413), INT16_C( 27919), INT16_C( 29877), INT16_C( 3112), -INT16_C( 10096), INT16_C( 3591), -INT16_C( 28859), -INT16_C( 17191), INT16_C( 29106), -INT16_C( 8568), -INT16_C( 14808), INT16_C( 2221), INT16_C( 9590), -INT16_C( 18732), INT16_C( 16210), -INT16_C( 2398), -INT16_C( 24718), INT16_C( 31150), -INT16_C( 2282), -INT16_C( 22376), INT16_C( 9645), -INT16_C( 14167), -INT16_C( 17121), -INT16_C( 28178), INT16_C( 29165), -INT16_C( 13923), INT16_C( 30269), INT16_C( 8518) }, { -INT16_C( 17641), INT16_C( 28773), -INT16_C( 18662), -INT16_C( 19754), INT16_C( 16072), INT16_C( 13954), INT16_C( 23812), INT16_C( 28020), -INT16_C( 16551), INT16_C( 28718), INT16_C( 5555), -INT16_C( 23903), -INT16_C( 32004), -INT16_C( 27079), INT16_C( 32220), -INT16_C( 27353), INT16_C( 16085), -INT16_C( 28131), INT16_C( 20981), INT16_C( 18839), -INT16_C( 27457), -INT16_C( 10586), INT16_C( 30781), INT16_C( 11499), -INT16_C( 5601), INT16_C( 23348), INT16_C( 5237), -INT16_C( 23168), -INT16_C( 30030), -INT16_C( 4840), -INT16_C( 1249), -INT16_C( 15536) }, { -INT16_C( 10413), INT16_C( 16072), INT16_C( 27919), INT16_C( 0), INT16_C( 29877), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 32004), INT16_C( 0), -INT16_C( 27079), -INT16_C( 14808), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 27457), INT16_C( 0), -INT16_C( 10586), -INT16_C( 2282), INT16_C( 0), -INT16_C( 22376), INT16_C( 11499), INT16_C( 29165), INT16_C( 0), INT16_C( 0), -INT16_C( 4840), INT16_C( 0), INT16_C( 0), INT16_C( 8518), INT16_C( 0) } }, { UINT32_C(2476879138), { -INT16_C( 13928), INT16_C( 22709), -INT16_C( 20415), INT16_C( 29135), -INT16_C( 28928), INT16_C( 10196), INT16_C( 4989), -INT16_C( 22951), INT16_C( 4945), -INT16_C( 19359), -INT16_C( 16149), INT16_C( 26981), INT16_C( 14101), -INT16_C( 22358), INT16_C( 8981), INT16_C( 27845), -INT16_C( 20866), INT16_C( 22410), -INT16_C( 19029), -INT16_C( 9503), INT16_C( 26583), -INT16_C( 12089), INT16_C( 13822), -INT16_C( 26202), INT16_C( 23351), -INT16_C( 26141), INT16_C( 16546), -INT16_C( 10629), INT16_C( 8563), INT16_C( 15874), -INT16_C( 3643), INT16_C( 26061) }, { INT16_C( 1430), INT16_C( 30905), -INT16_C( 29800), -INT16_C( 23380), -INT16_C( 16086), INT16_C( 10258), -INT16_C( 13793), INT16_C( 9766), -INT16_C( 32436), -INT16_C( 13109), -INT16_C( 13828), -INT16_C( 1383), -INT16_C( 5989), -INT16_C( 18296), INT16_C( 3062), -INT16_C( 20793), -INT16_C( 27179), INT16_C( 26186), -INT16_C( 8999), -INT16_C( 32469), INT16_C( 5156), -INT16_C( 27072), INT16_C( 19904), -INT16_C( 20378), -INT16_C( 24107), -INT16_C( 30185), INT16_C( 23120), -INT16_C( 30483), INT16_C( 5694), -INT16_C( 30196), -INT16_C( 4996), -INT16_C( 3634) }, { INT16_C( 0), -INT16_C( 16086), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 13793), INT16_C( 0), INT16_C( 0), INT16_C( 14101), INT16_C( 0), -INT16_C( 22358), -INT16_C( 18296), INT16_C( 0), INT16_C( 3062), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 5156), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 19904), INT16_C( 0), -INT16_C( 20378), INT16_C( 8563), INT16_C( 5694), INT16_C( 0), INT16_C( 0), -INT16_C( 3643), INT16_C( 0), INT16_C( 0), -INT16_C( 3634) } }, { UINT32_C(2616854452), { -INT16_C( 6224), -INT16_C( 6044), INT16_C( 20957), INT16_C( 30709), INT16_C( 24442), -INT16_C( 22992), INT16_C( 6688), INT16_C( 17993), -INT16_C( 7188), -INT16_C( 19183), INT16_C( 7974), -INT16_C( 15587), INT16_C( 13787), INT16_C( 31010), INT16_C( 18629), INT16_C( 31791), -INT16_C( 22719), -INT16_C( 29132), INT16_C( 6533), INT16_C( 31298), -INT16_C( 23353), INT16_C( 19020), INT16_C( 27128), -INT16_C( 7177), INT16_C( 11098), -INT16_C( 16155), -INT16_C( 9163), -INT16_C( 6902), INT16_C( 30564), INT16_C( 30453), -INT16_C( 13157), INT16_C( 24892) }, { -INT16_C( 13209), INT16_C( 17095), INT16_C( 21391), INT16_C( 1180), INT16_C( 52), -INT16_C( 31351), INT16_C( 32165), -INT16_C( 27346), INT16_C( 29682), INT16_C( 2942), INT16_C( 22791), -INT16_C( 3600), INT16_C( 6355), INT16_C( 27390), -INT16_C( 19855), INT16_C( 26669), -INT16_C( 20165), -INT16_C( 4615), -INT16_C( 1951), -INT16_C( 24895), INT16_C( 22661), -INT16_C( 27645), INT16_C( 17725), INT16_C( 10606), -INT16_C( 17849), -INT16_C( 25352), -INT16_C( 20940), INT16_C( 15978), -INT16_C( 3211), INT16_C( 22645), -INT16_C( 14905), -INT16_C( 2048) }, { INT16_C( 0), INT16_C( 0), -INT16_C( 22992), INT16_C( 0), INT16_C( 6688), INT16_C( 32165), INT16_C( 0), -INT16_C( 27346), INT16_C( 13787), INT16_C( 6355), INT16_C( 31010), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 22661), INT16_C( 0), -INT16_C( 27645), INT16_C( 27128), INT16_C( 17725), -INT16_C( 7177), INT16_C( 10606), INT16_C( 30564), -INT16_C( 3211), INT16_C( 0), INT16_C( 22645), -INT16_C( 13157), INT16_C( 0), INT16_C( 0), -INT16_C( 2048) } }, { UINT32_C(1125662070), { -INT16_C( 412), -INT16_C( 5067), -INT16_C( 18848), INT16_C( 16441), INT16_C( 30688), INT16_C( 2036), -INT16_C( 14512), INT16_C( 28922), -INT16_C( 10547), INT16_C( 5608), -INT16_C( 7725), INT16_C( 17161), -INT16_C( 6765), -INT16_C( 29831), INT16_C( 1640), INT16_C( 26806), INT16_C( 22), INT16_C( 8972), INT16_C( 2241), -INT16_C( 13548), INT16_C( 9690), -INT16_C( 1001), -INT16_C( 12679), -INT16_C( 17557), -INT16_C( 32063), INT16_C( 2739), -INT16_C( 25982), INT16_C( 11081), -INT16_C( 1493), INT16_C( 21446), -INT16_C( 15362), INT16_C( 17298) }, { -INT16_C( 26320), INT16_C( 57), INT16_C( 26068), -INT16_C( 12152), -INT16_C( 18814), INT16_C( 2112), -INT16_C( 27559), INT16_C( 16733), INT16_C( 12148), -INT16_C( 3954), INT16_C( 19441), -INT16_C( 29400), -INT16_C( 25953), INT16_C( 8028), INT16_C( 8534), INT16_C( 23644), INT16_C( 3071), -INT16_C( 23791), -INT16_C( 32560), -INT16_C( 4881), INT16_C( 24814), -INT16_C( 16893), INT16_C( 12500), -INT16_C( 29674), -INT16_C( 24798), INT16_C( 10955), INT16_C( 28778), INT16_C( 18077), -INT16_C( 10207), -INT16_C( 17224), INT16_C( 22407), INT16_C( 28033) }, { INT16_C( 0), -INT16_C( 18814), INT16_C( 2036), INT16_C( 0), -INT16_C( 14512), -INT16_C( 27559), INT16_C( 28922), INT16_C( 0), -INT16_C( 6765), INT16_C( 0), -INT16_C( 29831), INT16_C( 8028), INT16_C( 1640), INT16_C( 8534), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 16893), -INT16_C( 12679), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1493), -INT16_C( 10207), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 17298), INT16_C( 0) } }, { UINT32_C(1175988243), { -INT16_C( 9005), -INT16_C( 7450), INT16_C( 28818), INT16_C( 14716), -INT16_C( 680), -INT16_C( 11928), -INT16_C( 21347), INT16_C( 17832), INT16_C( 2261), -INT16_C( 4821), -INT16_C( 27107), -INT16_C( 14943), INT16_C( 3268), -INT16_C( 2897), -INT16_C( 21863), -INT16_C( 21633), INT16_C( 10018), INT16_C( 3736), -INT16_C( 30650), INT16_C( 1389), INT16_C( 7485), -INT16_C( 27070), INT16_C( 5612), INT16_C( 23158), INT16_C( 20757), INT16_C( 2285), -INT16_C( 9096), INT16_C( 19496), INT16_C( 7748), INT16_C( 1378), INT16_C( 6746), -INT16_C( 13405) }, { INT16_C( 11234), INT16_C( 5178), -INT16_C( 1805), INT16_C( 17722), -INT16_C( 7606), -INT16_C( 2186), INT16_C( 14976), INT16_C( 17712), -INT16_C( 13492), INT16_C( 19417), INT16_C( 28099), -INT16_C( 16208), -INT16_C( 19743), INT16_C( 27100), -INT16_C( 27893), INT16_C( 22652), INT16_C( 27564), INT16_C( 24472), -INT16_C( 1796), -INT16_C( 8192), INT16_C( 28360), -INT16_C( 21536), -INT16_C( 2280), -INT16_C( 4441), INT16_C( 32616), -INT16_C( 23765), INT16_C( 21903), -INT16_C( 18635), -INT16_C( 16578), INT16_C( 17190), -INT16_C( 9068), -INT16_C( 1634) }, { -INT16_C( 680), -INT16_C( 7606), INT16_C( 0), INT16_C( 0), -INT16_C( 21347), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 27100), INT16_C( 0), -INT16_C( 27893), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 21536), INT16_C( 5612), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 16578), INT16_C( 1378), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 13405), INT16_C( 0) } }, { UINT32_C( 492850220), { -INT16_C( 6421), INT16_C( 9651), -INT16_C( 15038), -INT16_C( 16043), INT16_C( 14467), INT16_C( 1397), -INT16_C( 13665), INT16_C( 3847), -INT16_C( 15670), INT16_C( 27446), -INT16_C( 9279), INT16_C( 18896), INT16_C( 15069), INT16_C( 26093), -INT16_C( 4977), -INT16_C( 9289), INT16_C( 3171), INT16_C( 18546), INT16_C( 17619), -INT16_C( 15106), INT16_C( 22524), -INT16_C( 9283), INT16_C( 17567), INT16_C( 5881), -INT16_C( 3092), INT16_C( 23893), -INT16_C( 11481), INT16_C( 6826), INT16_C( 22622), -INT16_C( 26035), INT16_C( 4149), INT16_C( 27942) }, { INT16_C( 9952), INT16_C( 14905), INT16_C( 17708), INT16_C( 21677), INT16_C( 14033), -INT16_C( 1371), -INT16_C( 1973), -INT16_C( 12936), -INT16_C( 10402), INT16_C( 10957), INT16_C( 22312), -INT16_C( 15335), -INT16_C( 29213), INT16_C( 10775), INT16_C( 31022), INT16_C( 9040), INT16_C( 941), -INT16_C( 12366), INT16_C( 11995), -INT16_C( 29745), -INT16_C( 23005), INT16_C( 18614), -INT16_C( 27596), INT16_C( 28981), -INT16_C( 31805), INT16_C( 14931), -INT16_C( 16637), -INT16_C( 18756), -INT16_C( 6770), INT16_C( 15929), INT16_C( 1340), -INT16_C( 1642) }, { INT16_C( 0), INT16_C( 0), INT16_C( 1397), -INT16_C( 1371), INT16_C( 0), -INT16_C( 1973), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 26093), INT16_C( 10775), INT16_C( 0), INT16_C( 0), -INT16_C( 9289), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 27596), INT16_C( 5881), INT16_C( 0), INT16_C( 22622), INT16_C( 0), -INT16_C( 26035), INT16_C( 15929), INT16_C( 4149), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { UINT32_C(2267271795), { -INT16_C( 12085), -INT16_C( 8585), INT16_C( 30855), INT16_C( 24874), INT16_C( 3232), INT16_C( 29238), -INT16_C( 1435), -INT16_C( 19292), -INT16_C( 5792), INT16_C( 29321), INT16_C( 11320), -INT16_C( 29834), INT16_C( 30002), INT16_C( 32669), INT16_C( 9406), -INT16_C( 13005), INT16_C( 1258), -INT16_C( 6299), INT16_C( 30417), INT16_C( 31259), -INT16_C( 25277), INT16_C( 7318), -INT16_C( 5367), -INT16_C( 1708), INT16_C( 2786), INT16_C( 27534), INT16_C( 17718), -INT16_C( 26828), INT16_C( 20354), -INT16_C( 21621), -INT16_C( 17029), INT16_C( 16184) }, { -INT16_C( 4953), -INT16_C( 8159), -INT16_C( 28527), INT16_C( 16284), -INT16_C( 2163), -INT16_C( 9830), INT16_C( 1701), INT16_C( 14221), -INT16_C( 2244), INT16_C( 20579), INT16_C( 27833), INT16_C( 19485), INT16_C( 24228), -INT16_C( 19404), -INT16_C( 20424), -INT16_C( 13747), INT16_C( 11076), INT16_C( 26566), INT16_C( 26136), -INT16_C( 32462), -INT16_C( 1965), INT16_C( 31311), INT16_C( 12201), INT16_C( 17170), INT16_C( 4679), INT16_C( 17945), INT16_C( 6933), -INT16_C( 8928), INT16_C( 28785), INT16_C( 794), INT16_C( 12209), -INT16_C( 20596) }, { INT16_C( 3232), -INT16_C( 2163), INT16_C( 0), INT16_C( 0), -INT16_C( 1435), INT16_C( 1701), -INT16_C( 19292), INT16_C( 0), INT16_C( 0), INT16_C( 24228), INT16_C( 0), INT16_C( 0), INT16_C( 9406), INT16_C( 0), -INT16_C( 13005), -INT16_C( 13747), -INT16_C( 25277), -INT16_C( 1965), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 12201), INT16_C( 0), INT16_C( 0), INT16_C( 20354), INT16_C( 28785), -INT16_C( 21621), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 20596) } }, { UINT32_C(2249471327), { -INT16_C( 10567), -INT16_C( 17382), -INT16_C( 26866), INT16_C( 15773), -INT16_C( 21880), -INT16_C( 14047), -INT16_C( 20550), INT16_C( 7462), -INT16_C( 7264), -INT16_C( 19719), INT16_C( 28970), -INT16_C( 30391), -INT16_C( 23447), -INT16_C( 2532), -INT16_C( 24664), INT16_C( 10752), INT16_C( 2467), -INT16_C( 24149), -INT16_C( 16391), INT16_C( 22573), INT16_C( 1849), -INT16_C( 11064), INT16_C( 4072), INT16_C( 27994), -INT16_C( 10634), INT16_C( 27220), INT16_C( 31793), -INT16_C( 8341), INT16_C( 30308), INT16_C( 4744), INT16_C( 16730), -INT16_C( 7411) }, { INT16_C( 24565), -INT16_C( 5672), INT16_C( 2698), INT16_C( 21398), INT16_C( 19145), INT16_C( 10654), INT16_C( 10041), -INT16_C( 30578), INT16_C( 32656), INT16_C( 21473), -INT16_C( 29332), -INT16_C( 2363), -INT16_C( 7007), -INT16_C( 6653), INT16_C( 3737), INT16_C( 12134), INT16_C( 4630), -INT16_C( 4063), INT16_C( 29774), INT16_C( 2575), INT16_C( 21057), -INT16_C( 1641), -INT16_C( 2609), -INT16_C( 2207), -INT16_C( 2444), -INT16_C( 11782), INT16_C( 25268), INT16_C( 2644), -INT16_C( 27399), -INT16_C( 26284), -INT16_C( 20468), -INT16_C( 26930) }, { -INT16_C( 21880), INT16_C( 19145), -INT16_C( 14047), INT16_C( 10654), -INT16_C( 20550), INT16_C( 0), INT16_C( 7462), INT16_C( 0), -INT16_C( 23447), INT16_C( 0), -INT16_C( 2532), INT16_C( 0), -INT16_C( 24664), INT16_C( 3737), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 11064), INT16_C( 0), INT16_C( 4072), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 27399), INT16_C( 4744), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 26930) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_maskz_unpackhi_epi16(test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm512_unpackhi_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[16]; const int32_t b[16]; const int32_t r[16]; } test_vec[] = { { { -INT32_C( 2071539538), -INT32_C( 1292562566), -INT32_C( 411127984), -INT32_C( 1435371435), INT32_C( 903717735), -INT32_C( 1003795278), -INT32_C( 469164256), -INT32_C( 618004435), INT32_C( 1247785168), INT32_C( 285038015), INT32_C( 1509391108), -INT32_C( 637245069), -INT32_C( 1106255604), INT32_C( 1300380205), -INT32_C( 2043573415), INT32_C( 1533107083) }, { -INT32_C( 878264308), INT32_C( 450601749), -INT32_C( 1854680290), INT32_C( 1215002428), -INT32_C( 2063173032), INT32_C( 231901364), -INT32_C( 1617755373), INT32_C( 1811608671), -INT32_C( 902389580), INT32_C( 1642336835), INT32_C( 569530341), INT32_C( 661216719), -INT32_C( 1951567913), INT32_C( 177766391), -INT32_C( 492229757), -INT32_C( 733109217) }, { -INT32_C( 411127984), -INT32_C( 1854680290), -INT32_C( 1435371435), INT32_C( 1215002428), -INT32_C( 469164256), -INT32_C( 1617755373), -INT32_C( 618004435), INT32_C( 1811608671), INT32_C( 1509391108), INT32_C( 569530341), -INT32_C( 637245069), INT32_C( 661216719), -INT32_C( 2043573415), -INT32_C( 492229757), INT32_C( 1533107083), -INT32_C( 733109217) } }, { { -INT32_C( 2002877628), INT32_C( 2062123669), -INT32_C( 1466180391), INT32_C( 298845241), INT32_C( 1788640627), INT32_C( 2138387964), -INT32_C( 2141118880), INT32_C( 122990274), -INT32_C( 946867662), INT32_C( 1296201844), -INT32_C( 1896424108), INT32_C( 1436534498), INT32_C( 1069562691), -INT32_C( 775998096), INT32_C( 374415443), INT32_C( 18720207) }, { INT32_C( 197700759), INT32_C( 2035812900), -INT32_C( 905490712), INT32_C( 1461691924), INT32_C( 1385684961), INT32_C( 1747146260), INT32_C( 1165915254), -INT32_C( 1337550055), INT32_C( 1807421255), INT32_C( 31724313), INT32_C( 1993075554), INT32_C( 1942940561), -INT32_C( 540711478), INT32_C( 826796219), INT32_C( 1970783580), -INT32_C( 1490698912) }, { -INT32_C( 1466180391), -INT32_C( 905490712), INT32_C( 298845241), INT32_C( 1461691924), -INT32_C( 2141118880), INT32_C( 1165915254), INT32_C( 122990274), -INT32_C( 1337550055), -INT32_C( 1896424108), INT32_C( 1993075554), INT32_C( 1436534498), INT32_C( 1942940561), INT32_C( 374415443), INT32_C( 1970783580), INT32_C( 18720207), -INT32_C( 1490698912) } }, { { -INT32_C( 434970420), INT32_C( 1441265651), INT32_C( 1959572450), INT32_C( 1760008862), -INT32_C( 1169707777), -INT32_C( 252997996), -INT32_C( 1285201325), -INT32_C( 329610720), INT32_C( 1574071658), INT32_C( 1186183780), INT32_C( 196771693), INT32_C( 410231065), -INT32_C( 506283187), -INT32_C( 1647198903), INT32_C( 1078998560), INT32_C( 707570624) }, { INT32_C( 2089352984), INT32_C( 650263481), -INT32_C( 751731270), INT32_C( 1827382302), -INT32_C( 1454523041), -INT32_C( 1706680454), INT32_C( 383424085), INT32_C( 1497368385), -INT32_C( 1076508666), -INT32_C( 1109026813), INT32_C( 865080853), INT32_C( 446659514), -INT32_C( 1279005384), INT32_C( 1632438540), -INT32_C( 512284513), INT32_C( 893040687) }, { INT32_C( 1959572450), -INT32_C( 751731270), INT32_C( 1760008862), INT32_C( 1827382302), -INT32_C( 1285201325), INT32_C( 383424085), -INT32_C( 329610720), INT32_C( 1497368385), INT32_C( 196771693), INT32_C( 865080853), INT32_C( 410231065), INT32_C( 446659514), INT32_C( 1078998560), -INT32_C( 512284513), INT32_C( 707570624), INT32_C( 893040687) } }, { { -INT32_C( 2064379776), -INT32_C( 1119757912), -INT32_C( 1443835153), -INT32_C( 2034003891), -INT32_C( 1992718723), INT32_C( 803899023), -INT32_C( 586128722), -INT32_C( 1710077414), INT32_C( 35522138), -INT32_C( 826318625), INT32_C( 2138615858), -INT32_C( 1123730624), INT32_C( 1363558082), INT32_C( 1937780933), -INT32_C( 1403940718), INT32_C( 893871067) }, { INT32_C( 1228432746), -INT32_C( 149358651), -INT32_C( 411594585), -INT32_C( 1918599989), -INT32_C( 2132809029), -INT32_C( 1376559333), -INT32_C( 883276560), INT32_C( 302031272), -INT32_C( 866437114), -INT32_C( 675056848), -INT32_C( 809551357), INT32_C( 1918657463), INT32_C( 1777482574), -INT32_C( 1961433701), -INT32_C( 766087126), INT32_C( 417617425) }, { -INT32_C( 1443835153), -INT32_C( 411594585), -INT32_C( 2034003891), -INT32_C( 1918599989), -INT32_C( 586128722), -INT32_C( 883276560), -INT32_C( 1710077414), INT32_C( 302031272), INT32_C( 2138615858), -INT32_C( 809551357), -INT32_C( 1123730624), INT32_C( 1918657463), -INT32_C( 1403940718), -INT32_C( 766087126), INT32_C( 893871067), INT32_C( 417617425) } }, { { -INT32_C( 1075560561), -INT32_C( 1231640653), -INT32_C( 1719314974), INT32_C( 118219449), -INT32_C( 1200554723), INT32_C( 205752034), INT32_C( 148806135), INT32_C( 2132853488), -INT32_C( 1254226942), -INT32_C( 1905535828), -INT32_C( 483921622), -INT32_C( 253087021), INT32_C( 296311343), -INT32_C( 685839136), INT32_C( 1994456198), -INT32_C( 1040908097) }, { -INT32_C( 1317653755), INT32_C( 826269959), -INT32_C( 1525389614), -INT32_C( 946405736), INT32_C( 970473304), -INT32_C( 1324288469), -INT32_C( 1306005261), -INT32_C( 160228111), INT32_C( 1470622031), -INT32_C( 1651972406), -INT32_C( 465396404), -INT32_C( 190064485), INT32_C( 1127056151), INT32_C( 1844723066), INT32_C( 522132526), -INT32_C( 2011852232) }, { -INT32_C( 1719314974), -INT32_C( 1525389614), INT32_C( 118219449), -INT32_C( 946405736), INT32_C( 148806135), -INT32_C( 1306005261), INT32_C( 2132853488), -INT32_C( 160228111), -INT32_C( 483921622), -INT32_C( 465396404), -INT32_C( 253087021), -INT32_C( 190064485), INT32_C( 1994456198), INT32_C( 522132526), -INT32_C( 1040908097), -INT32_C( 2011852232) } }, { { INT32_C( 1172290683), -INT32_C( 287152222), -INT32_C( 1596840700), INT32_C( 362053117), INT32_C( 2052636928), INT32_C( 753356030), -INT32_C( 1588918680), INT32_C( 321478808), -INT32_C( 1101461476), INT32_C( 1957444463), INT32_C( 1578401376), -INT32_C( 59528965), INT32_C( 1735838569), -INT32_C( 2137760233), -INT32_C( 48111772), INT32_C( 1544571456) }, { -INT32_C( 1055168174), INT32_C( 87410597), INT32_C( 1097025862), INT32_C( 1514002161), -INT32_C( 1178422111), INT32_C( 1983469074), INT32_C( 1970494005), -INT32_C( 153975644), -INT32_C( 1833439763), -INT32_C( 90640972), INT32_C( 675085110), INT32_C( 1937930706), INT32_C( 1059865645), -INT32_C( 793352806), INT32_C( 1665477055), -INT32_C( 1688660051) }, { -INT32_C( 1596840700), INT32_C( 1097025862), INT32_C( 362053117), INT32_C( 1514002161), -INT32_C( 1588918680), INT32_C( 1970494005), INT32_C( 321478808), -INT32_C( 153975644), INT32_C( 1578401376), INT32_C( 675085110), -INT32_C( 59528965), INT32_C( 1937930706), -INT32_C( 48111772), INT32_C( 1665477055), INT32_C( 1544571456), -INT32_C( 1688660051) } }, { { -INT32_C( 1188228860), INT32_C( 884196862), -INT32_C( 1822625855), -INT32_C( 1777934487), -INT32_C( 1093258461), INT32_C( 1485737112), INT32_C( 1673253813), -INT32_C( 268560917), INT32_C( 615000870), -INT32_C( 1302831887), -INT32_C( 1270500021), -INT32_C( 1219802220), INT32_C( 393552254), INT32_C( 1651442605), -INT32_C( 1027265833), INT32_C( 1706148671) }, { -INT32_C( 527869201), INT32_C( 26403510), INT32_C( 733403031), -INT32_C( 1579024094), -INT32_C( 810002398), INT32_C( 842082139), -INT32_C( 1846216879), -INT32_C( 1443453254), -INT32_C( 1232502784), -INT32_C( 105440414), INT32_C( 354708978), -INT32_C( 1867118994), -INT32_C( 1168150946), -INT32_C( 420703851), INT32_C( 1115152776), -INT32_C( 2014548345) }, { -INT32_C( 1822625855), INT32_C( 733403031), -INT32_C( 1777934487), -INT32_C( 1579024094), INT32_C( 1673253813), -INT32_C( 1846216879), -INT32_C( 268560917), -INT32_C( 1443453254), -INT32_C( 1270500021), INT32_C( 354708978), -INT32_C( 1219802220), -INT32_C( 1867118994), -INT32_C( 1027265833), INT32_C( 1115152776), INT32_C( 1706148671), -INT32_C( 2014548345) } }, { { INT32_C( 1346205166), -INT32_C( 2092305263), -INT32_C( 795316894), -INT32_C( 765374861), INT32_C( 1368178876), -INT32_C( 650610607), -INT32_C( 534991015), INT32_C( 191301661), INT32_C( 240886909), -INT32_C( 74275687), -INT32_C( 2050282991), INT32_C( 894905465), INT32_C( 1049093101), -INT32_C( 1256669349), -INT32_C( 1936378770), -INT32_C( 1181221572) }, { INT32_C( 986248097), -INT32_C( 1456121193), -INT32_C( 47316604), INT32_C( 439584045), -INT32_C( 1017529752), -INT32_C( 411537031), -INT32_C( 512553307), -INT32_C( 1399190773), -INT32_C( 1779997954), INT32_C( 1094589628), INT32_C( 1262382109), INT32_C( 1499820529), -INT32_C( 1541554645), -INT32_C( 728984273), -INT32_C( 1363804253), INT32_C( 140201994) }, { -INT32_C( 795316894), -INT32_C( 47316604), -INT32_C( 765374861), INT32_C( 439584045), -INT32_C( 534991015), -INT32_C( 512553307), INT32_C( 191301661), -INT32_C( 1399190773), -INT32_C( 2050282991), INT32_C( 1262382109), INT32_C( 894905465), INT32_C( 1499820529), -INT32_C( 1936378770), -INT32_C( 1363804253), -INT32_C( 1181221572), INT32_C( 140201994) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_unpackhi_epi32(a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_unpackhi_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t src[16]; const simde__mmask16 k; const int32_t a[16]; const int32_t b[16]; const int32_t r[16]; } test_vec[] = { { { -INT32_C( 845938351), INT32_C( 181822741), -INT32_C( 904073692), INT32_C( 1797241754), -INT32_C( 1981644741), -INT32_C( 725414939), -INT32_C( 676171526), -INT32_C( 1255219612), -INT32_C( 192757025), INT32_C( 1291737128), -INT32_C( 518579385), INT32_C( 306984662), -INT32_C( 1667551562), INT32_C( 863002169), INT32_C( 839525070), -INT32_C( 538494976) }, UINT16_C(27131), { -INT32_C( 759028780), -INT32_C( 2031285905), INT32_C( 918406378), INT32_C( 1902408662), -INT32_C( 2133877489), -INT32_C( 626876976), -INT32_C( 1240226865), INT32_C( 1444875906), -INT32_C( 1607868111), -INT32_C( 735635734), -INT32_C( 1341463334), INT32_C( 1730244183), -INT32_C( 605556725), INT32_C( 1555466637), -INT32_C( 1357723348), INT32_C( 134558423) }, { -INT32_C( 22532589), INT32_C( 533909317), INT32_C( 198237619), INT32_C( 1467150667), INT32_C( 1865570786), INT32_C( 265087202), -INT32_C( 2000757071), INT32_C( 613466896), INT32_C( 924989938), -INT32_C( 1135152120), INT32_C( 499590865), -INT32_C( 93046504), INT32_C( 1953080978), INT32_C( 1082340751), INT32_C( 617169172), -INT32_C( 146253563) }, { INT32_C( 918406378), INT32_C( 198237619), -INT32_C( 904073692), INT32_C( 1467150667), -INT32_C( 1240226865), -INT32_C( 2000757071), INT32_C( 1444875906), INT32_C( 613466896), -INT32_C( 1341463334), INT32_C( 1291737128), -INT32_C( 518579385), -INT32_C( 93046504), -INT32_C( 1667551562), INT32_C( 617169172), INT32_C( 134558423), -INT32_C( 538494976) } }, { { -INT32_C( 1691456878), INT32_C( 811041887), -INT32_C( 1018356053), -INT32_C( 373440169), -INT32_C( 144890264), INT32_C( 1882710364), INT32_C( 664010786), -INT32_C( 333521574), -INT32_C( 1501082553), INT32_C( 2094522065), INT32_C( 1396647164), INT32_C( 1312619750), INT32_C( 2118490658), -INT32_C( 1645314949), -INT32_C( 674921603), -INT32_C( 1480334496) }, UINT16_C(19248), { INT32_C( 623444302), -INT32_C( 1136056707), -INT32_C( 1229443207), -INT32_C( 1034888579), INT32_C( 1195363161), INT32_C( 768261224), INT32_C( 1443965587), INT32_C( 547439058), -INT32_C( 1085945022), INT32_C( 1786482417), INT32_C( 991966142), INT32_C( 1727950861), -INT32_C( 1532084933), -INT32_C( 1932429063), INT32_C( 1977803427), INT32_C( 1704297507) }, { INT32_C( 1076157007), INT32_C( 648716136), -INT32_C( 530396206), INT32_C( 2001100859), -INT32_C( 1776552803), INT32_C( 270724205), -INT32_C( 226163505), -INT32_C( 682091896), INT32_C( 1545043188), -INT32_C( 293354980), -INT32_C( 925964915), -INT32_C( 499182267), INT32_C( 1987598857), INT32_C( 377920071), INT32_C( 654838686), INT32_C( 436101157) }, { -INT32_C( 1691456878), INT32_C( 811041887), -INT32_C( 1018356053), -INT32_C( 373440169), INT32_C( 1443965587), -INT32_C( 226163505), INT32_C( 664010786), -INT32_C( 333521574), INT32_C( 991966142), -INT32_C( 925964915), INT32_C( 1396647164), -INT32_C( 499182267), INT32_C( 2118490658), -INT32_C( 1645314949), INT32_C( 1704297507), -INT32_C( 1480334496) } }, { { -INT32_C( 109701411), INT32_C( 1709701592), INT32_C( 590198494), -INT32_C( 737841717), INT32_C( 239828423), -INT32_C( 1255878377), INT32_C( 47984093), INT32_C( 1780276109), -INT32_C( 916221199), INT32_C( 1764641675), -INT32_C( 863216895), -INT32_C( 1868525112), INT32_C( 631171854), -INT32_C( 1696939075), INT32_C( 2107422704), -INT32_C( 2081900398) }, UINT16_C(19274), { INT32_C( 2056705356), -INT32_C( 891971778), INT32_C( 73113187), -INT32_C( 856725202), INT32_C( 1771023502), -INT32_C( 484409530), -INT32_C( 442780931), -INT32_C( 2127501771), -INT32_C( 84162629), -INT32_C( 1027288994), -INT32_C( 1664737170), INT32_C( 409580937), -INT32_C( 1467877278), INT32_C( 1972085112), -INT32_C( 2007357613), -INT32_C( 938898931) }, { -INT32_C( 1329462191), INT32_C( 1131579348), INT32_C( 819935399), INT32_C( 1330137325), -INT32_C( 1174877887), -INT32_C( 1121025174), -INT32_C( 1220179798), INT32_C( 1686130194), INT32_C( 655639122), INT32_C( 1886029513), -INT32_C( 1415493186), -INT32_C( 738465390), INT32_C( 495776691), INT32_C( 568048246), INT32_C( 1423450178), -INT32_C( 1044883345) }, { -INT32_C( 109701411), INT32_C( 819935399), INT32_C( 590198494), INT32_C( 1330137325), INT32_C( 239828423), -INT32_C( 1255878377), -INT32_C( 2127501771), INT32_C( 1780276109), -INT32_C( 1664737170), -INT32_C( 1415493186), -INT32_C( 863216895), -INT32_C( 738465390), INT32_C( 631171854), -INT32_C( 1696939075), -INT32_C( 938898931), -INT32_C( 2081900398) } }, { { INT32_C( 1692978331), INT32_C( 282415698), INT32_C( 784103068), INT32_C( 302102367), INT32_C( 540052906), -INT32_C( 1992225977), -INT32_C( 1696785877), INT32_C( 224171634), -INT32_C( 1250802590), INT32_C( 868566935), INT32_C( 476152253), -INT32_C( 500211144), INT32_C( 906190831), -INT32_C( 1782627222), -INT32_C( 785343394), -INT32_C( 1780577230) }, UINT16_C(20688), { INT32_C( 261580618), -INT32_C( 74361702), -INT32_C( 1604466319), -INT32_C( 1342223188), INT32_C( 1106471298), INT32_C( 786322174), INT32_C( 28971043), -INT32_C( 279802971), -INT32_C( 1912608270), -INT32_C( 1350004674), INT32_C( 89122393), -INT32_C( 1246474702), -INT32_C( 1242126153), INT32_C( 501535994), -INT32_C( 1977704731), INT32_C( 477786153) }, { -INT32_C( 1733723814), INT32_C( 1648832777), INT32_C( 1248368151), -INT32_C( 1677779740), -INT32_C( 1101990460), -INT32_C( 1344588598), -INT32_C( 63309101), -INT32_C( 1005014166), INT32_C( 895271212), INT32_C( 177775603), INT32_C( 492044345), -INT32_C( 508013796), INT32_C( 329189705), INT32_C( 281180989), -INT32_C( 552731787), -INT32_C( 609999441) }, { INT32_C( 1692978331), INT32_C( 282415698), INT32_C( 784103068), INT32_C( 302102367), INT32_C( 28971043), -INT32_C( 1992225977), -INT32_C( 279802971), -INT32_C( 1005014166), -INT32_C( 1250802590), INT32_C( 868566935), INT32_C( 476152253), -INT32_C( 500211144), -INT32_C( 1977704731), -INT32_C( 1782627222), INT32_C( 477786153), -INT32_C( 1780577230) } }, { { -INT32_C( 636419865), -INT32_C( 572216924), -INT32_C( 973391447), -INT32_C( 710495348), -INT32_C( 85440836), INT32_C( 906668737), INT32_C( 1427445670), INT32_C( 607172925), INT32_C( 1593721274), -INT32_C( 1824791830), -INT32_C( 1470548453), -INT32_C( 1501691926), INT32_C( 111175237), -INT32_C( 1237538032), -INT32_C( 16035134), -INT32_C( 987546613) }, UINT16_C( 8573), { INT32_C( 1594124323), INT32_C( 1419059195), INT32_C( 1163165639), -INT32_C( 961832666), -INT32_C( 596526176), INT32_C( 2100180082), INT32_C( 1471756851), INT32_C( 595081215), -INT32_C( 1702724449), INT32_C( 1676613532), -INT32_C( 1113046121), INT32_C( 2088981468), -INT32_C( 2124810993), INT32_C( 1560184617), -INT32_C( 1045186367), -INT32_C( 1897648913) }, { INT32_C( 1143498408), INT32_C( 346560381), INT32_C( 936464474), -INT32_C( 1280092764), INT32_C( 1932790858), INT32_C( 1439642260), -INT32_C( 652835862), INT32_C( 1466432175), -INT32_C( 560164768), INT32_C( 49431719), INT32_C( 943309972), INT32_C( 1676405785), -INT32_C( 1915346951), INT32_C( 1021486418), -INT32_C( 686360280), INT32_C( 1412398579) }, { INT32_C( 1163165639), -INT32_C( 572216924), -INT32_C( 961832666), -INT32_C( 1280092764), INT32_C( 1471756851), -INT32_C( 652835862), INT32_C( 595081215), INT32_C( 607172925), -INT32_C( 1113046121), -INT32_C( 1824791830), -INT32_C( 1470548453), -INT32_C( 1501691926), INT32_C( 111175237), -INT32_C( 686360280), -INT32_C( 16035134), -INT32_C( 987546613) } }, { { -INT32_C( 1254962419), -INT32_C( 1548278769), INT32_C( 31256808), -INT32_C( 714815524), INT32_C( 962738919), INT32_C( 141903328), INT32_C( 836799294), INT32_C( 377818889), -INT32_C( 372525094), -INT32_C( 997424420), INT32_C( 1338402930), INT32_C( 388246064), INT32_C( 1162905445), INT32_C( 172869068), INT32_C( 1513827665), INT32_C( 376488252) }, UINT16_C(15480), { -INT32_C( 1933683457), -INT32_C( 537644775), -INT32_C( 1542904704), -INT32_C( 1960088005), INT32_C( 22149043), INT32_C( 1026466305), INT32_C( 1828613116), -INT32_C( 2119666047), -INT32_C( 468883509), INT32_C( 415433112), INT32_C( 1639762982), -INT32_C( 286398405), -INT32_C( 504414497), -INT32_C( 602005792), INT32_C( 172498057), INT32_C( 1569452434) }, { -INT32_C( 264136616), -INT32_C( 1073216358), INT32_C( 203539665), -INT32_C( 1929769300), INT32_C( 745400908), -INT32_C( 1861711096), INT32_C( 966480295), -INT32_C( 1701370046), INT32_C( 1502271679), -INT32_C( 1374055715), INT32_C( 62536534), -INT32_C( 1768967095), -INT32_C( 1497170786), INT32_C( 775408519), INT32_C( 1583862556), -INT32_C( 1174864134) }, { -INT32_C( 1254962419), -INT32_C( 1548278769), INT32_C( 31256808), -INT32_C( 1929769300), INT32_C( 1828613116), INT32_C( 966480295), -INT32_C( 2119666047), INT32_C( 377818889), -INT32_C( 372525094), -INT32_C( 997424420), -INT32_C( 286398405), -INT32_C( 1768967095), INT32_C( 172498057), INT32_C( 1583862556), INT32_C( 1513827665), INT32_C( 376488252) } }, { { -INT32_C( 1273789737), INT32_C( 1784818708), -INT32_C( 1318249369), INT32_C( 1866988752), INT32_C( 2132085240), -INT32_C( 257077804), INT32_C( 441324832), -INT32_C( 355252717), -INT32_C( 593566008), INT32_C( 2051473427), -INT32_C( 315902948), -INT32_C( 1453559119), INT32_C( 1344893308), -INT32_C( 566110530), -INT32_C( 487445), -INT32_C( 1628844842) }, UINT16_C(34994), { -INT32_C( 1031223941), INT32_C( 1802937664), -INT32_C( 287430766), -INT32_C( 94414127), -INT32_C( 338682198), -INT32_C( 176440068), -INT32_C( 1530834501), INT32_C( 1781298159), INT32_C( 2032973113), -INT32_C( 320560294), -INT32_C( 1680162102), -INT32_C( 946521828), INT32_C( 1420977751), -INT32_C( 582406878), INT32_C( 1853950590), -INT32_C( 1227313539) }, { -INT32_C( 1120991901), INT32_C( 1923683496), -INT32_C( 217152554), INT32_C( 347775932), INT32_C( 728263689), INT32_C( 403222938), INT32_C( 948341435), -INT32_C( 1678811336), INT32_C( 207101540), INT32_C( 159252786), INT32_C( 1090292868), INT32_C( 961852976), -INT32_C( 1134183390), INT32_C( 685075821), INT32_C( 794909687), INT32_C( 516575418) }, { -INT32_C( 1273789737), -INT32_C( 217152554), -INT32_C( 1318249369), INT32_C( 1866988752), -INT32_C( 1530834501), INT32_C( 948341435), INT32_C( 441324832), -INT32_C( 1678811336), -INT32_C( 593566008), INT32_C( 2051473427), -INT32_C( 315902948), INT32_C( 961852976), INT32_C( 1344893308), -INT32_C( 566110530), -INT32_C( 487445), INT32_C( 516575418) } }, { { -INT32_C( 1591074194), -INT32_C( 1481987805), INT32_C( 1709745717), INT32_C( 2124364892), INT32_C( 1715143929), INT32_C( 1770983537), INT32_C( 647557227), -INT32_C( 1371249856), -INT32_C( 1454411899), INT32_C( 1297152280), -INT32_C( 72140641), INT32_C( 1853510261), -INT32_C( 942361258), INT32_C( 808477637), -INT32_C( 1823028909), -INT32_C( 1321034964) }, UINT16_C(37130), { -INT32_C( 1416945062), INT32_C( 585312879), -INT32_C( 1602987994), -INT32_C( 1688876346), -INT32_C( 1023534446), -INT32_C( 1584704950), INT32_C( 658290661), -INT32_C( 1011267991), -INT32_C( 680639384), -INT32_C( 1795534226), INT32_C( 1899261610), -INT32_C( 888370887), -INT32_C( 292746589), INT32_C( 1116674396), INT32_C( 963234768), INT32_C( 2046632465) }, { -INT32_C( 716149914), INT32_C( 1751730877), -INT32_C( 220619079), -INT32_C( 876747481), INT32_C( 1287211759), INT32_C( 881739875), INT32_C( 611186451), -INT32_C( 2137167334), -INT32_C( 1823085098), -INT32_C( 235159752), -INT32_C( 2065443747), -INT32_C( 1454399303), INT32_C( 1324681450), INT32_C( 1669497680), -INT32_C( 1803030662), INT32_C( 806691930) }, { -INT32_C( 1591074194), -INT32_C( 220619079), INT32_C( 1709745717), -INT32_C( 876747481), INT32_C( 1715143929), INT32_C( 1770983537), INT32_C( 647557227), -INT32_C( 1371249856), INT32_C( 1899261610), INT32_C( 1297152280), -INT32_C( 72140641), INT32_C( 1853510261), INT32_C( 963234768), INT32_C( 808477637), -INT32_C( 1823028909), INT32_C( 806691930) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi32(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_mask_unpackhi_epi32(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_unpackhi_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask16 k; const int32_t a[16]; const int32_t b[16]; const int32_t r[16]; } test_vec[] = { { UINT16_C(51483), { INT32_C( 1016400561), INT32_C( 499657637), INT32_C( 914546290), INT32_C( 167035570), INT32_C( 2082096633), INT32_C( 1908618834), -INT32_C( 2117789817), INT32_C( 2001461702), -INT32_C( 1850482453), INT32_C( 2108586763), INT32_C( 196292697), -INT32_C( 350902286), INT32_C( 1063727085), -INT32_C( 307221915), INT32_C( 141456962), INT32_C( 1115732311) }, { -INT32_C( 1529662567), INT32_C( 136413615), -INT32_C( 1542138446), INT32_C( 1771055484), -INT32_C( 1112934568), INT32_C( 1688885538), INT32_C( 644618703), INT32_C( 1818881234), -INT32_C( 821019616), INT32_C( 1893151422), -INT32_C( 2095781113), INT32_C( 1827513364), -INT32_C( 1104505188), -INT32_C( 1088236305), -INT32_C( 1058697491), -INT32_C( 1691595141) }, { INT32_C( 914546290), -INT32_C( 1542138446), INT32_C( 0), INT32_C( 1771055484), -INT32_C( 2117789817), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 196292697), INT32_C( 0), INT32_C( 0), INT32_C( 1827513364), INT32_C( 0), INT32_C( 0), INT32_C( 1115732311), -INT32_C( 1691595141) } }, { UINT16_C(15499), { INT32_C( 1097746794), -INT32_C( 852658759), -INT32_C( 428719623), -INT32_C( 662958418), -INT32_C( 290690100), INT32_C( 276601386), -INT32_C( 2040596646), -INT32_C( 54335086), -INT32_C( 348245710), -INT32_C( 1615238234), INT32_C( 1518676908), INT32_C( 87163193), -INT32_C( 1745625235), -INT32_C( 744001671), -INT32_C( 111606169), INT32_C( 586554351) }, { -INT32_C( 217238452), INT32_C( 1267910303), INT32_C( 715528433), -INT32_C( 2043684839), INT32_C( 807281335), -INT32_C( 117193326), -INT32_C( 1141744437), -INT32_C( 992089992), -INT32_C( 1145574884), -INT32_C( 1576580431), INT32_C( 2077076834), INT32_C( 1023605893), -INT32_C( 1318248417), -INT32_C( 1314229787), INT32_C( 1164745933), -INT32_C( 1593161339) }, { -INT32_C( 428719623), INT32_C( 715528433), INT32_C( 0), -INT32_C( 2043684839), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 992089992), INT32_C( 0), INT32_C( 0), INT32_C( 87163193), INT32_C( 1023605893), -INT32_C( 111606169), INT32_C( 1164745933), INT32_C( 0), INT32_C( 0) } }, { UINT16_C(49459), { INT32_C( 1678500957), INT32_C( 1410428295), -INT32_C( 347040023), INT32_C( 1091268563), -INT32_C( 894242784), INT32_C( 224886689), -INT32_C( 816386875), -INT32_C( 359626099), -INT32_C( 179397522), -INT32_C( 230072567), -INT32_C( 908223754), INT32_C( 705357833), INT32_C( 2062859481), INT32_C( 25713468), -INT32_C( 707731897), -INT32_C( 675323800) }, { INT32_C( 114036476), INT32_C( 1677203053), -INT32_C( 1188178256), -INT32_C( 1746716738), INT32_C( 806541556), INT32_C( 2066848307), -INT32_C( 514850440), INT32_C( 1589120865), -INT32_C( 1956346851), INT32_C( 1257135258), -INT32_C( 251389134), INT32_C( 1200154451), -INT32_C( 210265409), -INT32_C( 1385256908), INT32_C( 177127081), -INT32_C( 345487667) }, { -INT32_C( 347040023), -INT32_C( 1188178256), INT32_C( 0), INT32_C( 0), -INT32_C( 816386875), -INT32_C( 514850440), INT32_C( 0), INT32_C( 0), -INT32_C( 908223754), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 675323800), -INT32_C( 345487667) } }, { UINT16_C(52426), { INT32_C( 1680434550), -INT32_C( 1283433553), -INT32_C( 711208116), -INT32_C( 1854973414), -INT32_C( 1153850291), -INT32_C( 545660335), INT32_C( 1428506604), -INT32_C( 1474171086), INT32_C( 67914324), -INT32_C( 222851930), INT32_C( 2043105887), -INT32_C( 99993683), INT32_C( 750142427), INT32_C( 302722853), -INT32_C( 1469632394), INT32_C( 1951434783) }, { INT32_C( 2037931475), INT32_C( 1231761385), INT32_C( 801256322), INT32_C( 1160367466), INT32_C( 913432336), -INT32_C( 2058847217), -INT32_C( 886132820), INT32_C( 171933239), -INT32_C( 981223461), INT32_C( 1762586599), -INT32_C( 1936142302), -INT32_C( 1361985123), -INT32_C( 1327218015), INT32_C( 1781869758), INT32_C( 305488859), -INT32_C( 1122142750) }, { INT32_C( 0), INT32_C( 801256322), INT32_C( 0), INT32_C( 1160367466), INT32_C( 0), INT32_C( 0), -INT32_C( 1474171086), INT32_C( 171933239), INT32_C( 0), INT32_C( 0), -INT32_C( 99993683), -INT32_C( 1361985123), INT32_C( 0), INT32_C( 0), INT32_C( 1951434783), -INT32_C( 1122142750) } }, { UINT16_C(41004), { -INT32_C( 1869671550), INT32_C( 341946748), INT32_C( 248970813), -INT32_C( 1873774676), INT32_C( 1555828263), -INT32_C( 1363110024), -INT32_C( 953965910), -INT32_C( 496545953), -INT32_C( 546113693), -INT32_C( 420162648), INT32_C( 2129971922), INT32_C( 1745831233), -INT32_C( 859518125), INT32_C( 226133091), -INT32_C( 2032886490), INT32_C( 1332231148) }, { -INT32_C( 617620942), -INT32_C( 2134826066), INT32_C( 788444653), INT32_C( 1318456826), INT32_C( 1008360153), INT32_C( 88708319), INT32_C( 495656241), -INT32_C( 1955728552), INT32_C( 2087099598), -INT32_C( 1392760897), -INT32_C( 673514788), -INT32_C( 517640184), -INT32_C( 1457700918), INT32_C( 78603987), -INT32_C( 618579325), -INT32_C( 60387794) }, { INT32_C( 0), INT32_C( 0), -INT32_C( 1873774676), INT32_C( 1318456826), INT32_C( 0), INT32_C( 495656241), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 618579325), INT32_C( 0), -INT32_C( 60387794) } }, { UINT16_C(52266), { INT32_C( 1962142072), INT32_C( 1869533333), -INT32_C( 857770329), INT32_C( 1946921559), INT32_C( 47898195), INT32_C( 54353378), -INT32_C( 1617794247), -INT32_C( 546522009), INT32_C( 995319718), -INT32_C( 693386961), -INT32_C( 1885173192), -INT32_C( 2012959436), INT32_C( 1837817483), INT32_C( 1970390844), -INT32_C( 1726741710), INT32_C( 1702396095) }, { INT32_C( 262261728), -INT32_C( 974762867), INT32_C( 190089430), -INT32_C( 1047308234), INT32_C( 1932467511), INT32_C( 401121509), INT32_C( 1655831715), INT32_C( 1573398909), -INT32_C( 2106824203), -INT32_C( 1958260043), INT32_C( 295082971), INT32_C( 735193588), INT32_C( 748552519), INT32_C( 1145276065), -INT32_C( 5770110), INT32_C( 324824862) }, { INT32_C( 0), INT32_C( 190089430), INT32_C( 0), -INT32_C( 1047308234), INT32_C( 0), INT32_C( 1655831715), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 2012959436), INT32_C( 735193588), INT32_C( 0), INT32_C( 0), INT32_C( 1702396095), INT32_C( 324824862) } }, { UINT16_C(51672), { -INT32_C( 602174059), -INT32_C( 1351092712), -INT32_C( 623351033), INT32_C( 886775702), -INT32_C( 1883603637), INT32_C( 1753431489), -INT32_C( 1713921732), INT32_C( 1231204276), INT32_C( 1428520252), INT32_C( 2063900020), -INT32_C( 1621763064), INT32_C( 1205023228), INT32_C( 1876332206), INT32_C( 131619531), -INT32_C( 1331646469), -INT32_C( 1678179745) }, { -INT32_C( 185589888), -INT32_C( 999295812), -INT32_C( 849099311), -INT32_C( 1542178826), -INT32_C( 1877742651), INT32_C( 1083697989), -INT32_C( 84920165), -INT32_C( 1164580294), -INT32_C( 978418167), INT32_C( 1267277434), -INT32_C( 652677661), -INT32_C( 377672412), INT32_C( 1568247832), INT32_C( 396169340), -INT32_C( 2096001464), -INT32_C( 2126666120) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 1542178826), -INT32_C( 1713921732), INT32_C( 0), INT32_C( 1231204276), -INT32_C( 1164580294), -INT32_C( 1621763064), INT32_C( 0), INT32_C( 0), -INT32_C( 377672412), INT32_C( 0), INT32_C( 0), -INT32_C( 1678179745), -INT32_C( 2126666120) } }, { UINT16_C(60460), { -INT32_C( 821385402), INT32_C( 196931058), INT32_C( 1127801030), INT32_C( 1121145033), INT32_C( 1246973869), INT32_C( 2010684262), INT32_C( 1545490462), INT32_C( 390613713), -INT32_C( 454601999), INT32_C( 99590975), INT32_C( 1296574340), INT32_C( 613423991), -INT32_C( 781261973), -INT32_C( 1656141954), INT32_C( 1777952663), -INT32_C( 1551875663) }, { -INT32_C( 762878061), -INT32_C( 1881704949), INT32_C( 349970333), -INT32_C( 1523028934), -INT32_C( 831019441), -INT32_C( 2056535827), -INT32_C( 655465433), INT32_C( 947613349), -INT32_C( 519437610), INT32_C( 359719288), INT32_C( 975784960), INT32_C( 148922809), -INT32_C( 170502392), INT32_C( 1048199447), INT32_C( 1242982565), -INT32_C( 1383951657) }, { INT32_C( 0), INT32_C( 0), INT32_C( 1121145033), -INT32_C( 1523028934), INT32_C( 0), -INT32_C( 655465433), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 613423991), INT32_C( 148922809), INT32_C( 0), INT32_C( 1242982565), -INT32_C( 1551875663), -INT32_C( 1383951657) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_maskz_unpackhi_epi32(test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_unpackhi_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t a[8]; const int64_t b[8]; const int64_t r[8]; } test_vec[] = { { { INT64_C( 5674973837736279222), INT64_C( 1704843706748687788), -INT64_C( 2673855454977685508), INT64_C( 7390821164380328012), -INT64_C( 5519861701786044300), -INT64_C( 8880554723753151143), -INT64_C( 1373376201226907275), INT64_C( 3598748551275955674) }, { -INT64_C( 8104881503415520309), INT64_C( 3417945947432930147), -INT64_C( 605674648809090292), INT64_C( 8604534647075985787), -INT64_C( 4401197301858979547), INT64_C( 3207437185291396506), INT64_C( 8899798961286072686), -INT64_C( 7139017439677512502) }, { INT64_C( 1704843706748687788), INT64_C( 3417945947432930147), INT64_C( 7390821164380328012), INT64_C( 8604534647075985787), -INT64_C( 8880554723753151143), INT64_C( 3207437185291396506), INT64_C( 3598748551275955674), -INT64_C( 7139017439677512502) } }, { { -INT64_C( 6993840614198987057), INT64_C( 2762244475499748360), INT64_C( 7546683454867451199), INT64_C( 6578294010817703736), INT64_C( 3360944426133832193), INT64_C( 5544964737207866902), INT64_C( 1287825918278739417), -INT64_C( 8530650538818159179) }, { -INT64_C( 5286887723346483380), INT64_C( 9016900953806534857), -INT64_C( 6267087682875839204), INT64_C( 2063926056140392082), INT64_C( 3388730460615947184), INT64_C( 2254375981441977128), -INT64_C( 6525123138314551595), -INT64_C( 7508235156986044703) }, { INT64_C( 2762244475499748360), INT64_C( 9016900953806534857), INT64_C( 6578294010817703736), INT64_C( 2063926056140392082), INT64_C( 5544964737207866902), INT64_C( 2254375981441977128), -INT64_C( 8530650538818159179), -INT64_C( 7508235156986044703) } }, { { -INT64_C( 5463729958601401194), INT64_C( 3358500432824512889), -INT64_C( 6070735980921901304), INT64_C( 8810495403391484103), -INT64_C( 7653970804518539898), INT64_C( 1899300861932292308), -INT64_C( 8948099700948053413), -INT64_C( 2390154280872890762) }, { -INT64_C( 4515780095567115612), INT64_C( 7867024461783786753), -INT64_C( 3397574104711994469), INT64_C( 2795044881987931026), INT64_C( 8428269494163847753), -INT64_C( 3954524210633120353), -INT64_C( 6321935581393735400), -INT64_C( 5355805335178023676) }, { INT64_C( 3358500432824512889), INT64_C( 7867024461783786753), INT64_C( 8810495403391484103), INT64_C( 2795044881987931026), INT64_C( 1899300861932292308), -INT64_C( 3954524210633120353), -INT64_C( 2390154280872890762), -INT64_C( 5355805335178023676) } }, { { INT64_C( 7781906569962950934), INT64_C( 6034324575617844679), -INT64_C( 2690891062124880405), -INT64_C( 6010321335520052362), INT64_C( 3158206844560114334), INT64_C( 1533438474208735417), -INT64_C( 8400130205098308421), INT64_C( 8591747521593593337) }, { INT64_C( 6923639730661847686), -INT64_C( 3537969947851082330), -INT64_C( 5456166221627982681), INT64_C( 4533858979574756506), INT64_C( 2880401063509806323), -INT64_C( 8046561543942812302), INT64_C( 1965984521606968538), INT64_C( 17867119812119624) }, { INT64_C( 6034324575617844679), -INT64_C( 3537969947851082330), -INT64_C( 6010321335520052362), INT64_C( 4533858979574756506), INT64_C( 1533438474208735417), -INT64_C( 8046561543942812302), INT64_C( 8591747521593593337), INT64_C( 17867119812119624) } }, { { INT64_C( 1796817116571723846), -INT64_C( 6964269516958341152), -INT64_C( 9060261440426629722), -INT64_C( 7920210737626885624), INT64_C( 2515339843687001898), INT64_C( 2131122330677249327), INT64_C( 5450911646426122218), INT64_C( 1528821352979572825) }, { INT64_C( 687588226762117413), -INT64_C( 6564117700413655725), INT64_C( 929550991278680920), -INT64_C( 2047191976181086315), -INT64_C( 7179347720035140742), INT64_C( 4875232268415031882), INT64_C( 8230545872734023800), INT64_C( 5669620523642120716) }, { -INT64_C( 6964269516958341152), -INT64_C( 6564117700413655725), -INT64_C( 7920210737626885624), -INT64_C( 2047191976181086315), INT64_C( 2131122330677249327), INT64_C( 4875232268415031882), INT64_C( 1528821352979572825), INT64_C( 5669620523642120716) } }, { { INT64_C( 1795247188014725380), INT64_C( 1551707346594254681), INT64_C( 4072793568706586421), INT64_C( 397637285874727010), INT64_C( 9119855377055426218), -INT64_C( 6323814244269445748), INT64_C( 6419671222899670707), INT64_C( 8484240513725045432) }, { -INT64_C( 2676125203499773639), -INT64_C( 6969530513325121493), -INT64_C( 428522275801791810), -INT64_C( 1661421789014418256), -INT64_C( 8598740181978784125), -INT64_C( 6102819577427219005), INT64_C( 1273231586017463407), INT64_C( 6026518686306491914) }, { INT64_C( 1551707346594254681), -INT64_C( 6969530513325121493), INT64_C( 397637285874727010), -INT64_C( 1661421789014418256), -INT64_C( 6323814244269445748), -INT64_C( 6102819577427219005), INT64_C( 8484240513725045432), INT64_C( 6026518686306491914) } }, { { -INT64_C( 3584356179152410532), INT64_C( 4906051486565099388), -INT64_C( 1366101536430197353), INT64_C( 8224496603040749312), -INT64_C( 7923120276673706191), -INT64_C( 1446479750494385965), -INT64_C( 8910389985721130105), INT64_C( 8578925485341605510) }, { INT64_C( 8676113926111639998), -INT64_C( 178157761981226169), -INT64_C( 2163206275760428047), -INT64_C( 8900366555989324099), INT64_C( 3882596483972280260), INT64_C( 114375668867858979), INT64_C( 8790870050429468099), -INT64_C( 3815965023055020865) }, { INT64_C( 4906051486565099388), -INT64_C( 178157761981226169), INT64_C( 8224496603040749312), -INT64_C( 8900366555989324099), -INT64_C( 1446479750494385965), INT64_C( 114375668867858979), INT64_C( 8578925485341605510), -INT64_C( 3815965023055020865) } }, { { INT64_C( 5064883971355337511), INT64_C( 3975118758575282968), -INT64_C( 3339045211638226573), INT64_C( 5017679719933880016), INT64_C( 229020489171523268), -INT64_C( 762702069681615424), -INT64_C( 2028082101023817737), INT64_C( 6221543192684802548) }, { INT64_C( 1522616162351690034), -INT64_C( 7282123080597942920), -INT64_C( 2924385492342205069), -INT64_C( 5505579620542896977), -INT64_C( 5089926017148922127), -INT64_C( 3783407034755209920), INT64_C( 5724845474327962296), -INT64_C( 8352723087043503136) }, { INT64_C( 3975118758575282968), -INT64_C( 7282123080597942920), INT64_C( 5017679719933880016), -INT64_C( 5505579620542896977), -INT64_C( 762702069681615424), -INT64_C( 3783407034755209920), INT64_C( 6221543192684802548), -INT64_C( 8352723087043503136) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_unpackhi_epi64(a, b); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_unpackhi_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t src[8]; const simde__mmask8 k; const int64_t a[8]; const int64_t b[8]; const int64_t r[8]; } test_vec[] = { { { INT64_C( 7069803644757198776), -INT64_C( 4615162892095733676), -INT64_C( 592481269516063786), -INT64_C( 1951860547106282793), -INT64_C( 4776033971357979552), INT64_C( 5002361671084263902), -INT64_C( 4358438887271442656), INT64_C( 2173316391371875352) }, UINT8_C(151), { -INT64_C( 709390414091619285), INT64_C( 1573069356133635006), INT64_C( 606233209243590810), INT64_C( 128886772494413063), INT64_C( 5007284282810091865), INT64_C( 2853255272115790405), INT64_C( 7429215763538876973), INT64_C( 2190546868698210446) }, { INT64_C( 7491494037575544876), -INT64_C( 1605388421232398332), -INT64_C( 6997445859961327587), INT64_C( 925324662345530868), -INT64_C( 6655275033583256551), INT64_C( 3336302082892533414), INT64_C( 5678071947195074077), INT64_C( 5907279296854478606) }, { INT64_C( 1573069356133635006), -INT64_C( 1605388421232398332), INT64_C( 128886772494413063), -INT64_C( 1951860547106282793), INT64_C( 2853255272115790405), INT64_C( 5002361671084263902), -INT64_C( 4358438887271442656), INT64_C( 5907279296854478606) } }, { { INT64_C( 1485397804821887067), -INT64_C( 2594421111010111041), INT64_C( 5310541361681377801), -INT64_C( 851970217608758620), -INT64_C( 6094342348518540311), -INT64_C( 5694949449454066851), INT64_C( 8766475294402150436), -INT64_C( 500319502935400576) }, UINT8_C( 61), { -INT64_C( 6780779749143129604), -INT64_C( 581535033743376305), -INT64_C( 3219152390828831732), INT64_C( 8600428855462947994), -INT64_C( 6301129233037032402), -INT64_C( 2750750709133486620), -INT64_C( 9191284002811764566), INT64_C( 6799862801798878315) }, { INT64_C( 2629214055428676731), -INT64_C( 768756215722218067), INT64_C( 7771248126770479220), -INT64_C( 1592112153495505320), -INT64_C( 5229179405259165430), -INT64_C( 5231346292924190938), INT64_C( 6192989337208027527), -INT64_C( 380336530459311520) }, { -INT64_C( 581535033743376305), -INT64_C( 2594421111010111041), INT64_C( 8600428855462947994), -INT64_C( 1592112153495505320), -INT64_C( 2750750709133486620), -INT64_C( 5231346292924190938), INT64_C( 8766475294402150436), -INT64_C( 500319502935400576) } }, { { -INT64_C( 3046769058687715850), -INT64_C( 712503947079194334), INT64_C( 8645343478219091111), -INT64_C( 7810088800309880655), -INT64_C( 5334713237796298637), -INT64_C( 7811151688720826829), INT64_C( 3958718521792730723), -INT64_C( 6468731249084647927) }, UINT8_C( 84), { -INT64_C( 122831116445431603), -INT64_C( 7038706715525976393), -INT64_C( 1013211252603499851), -INT64_C( 6991614324525197775), INT64_C( 3770077475849560211), INT64_C( 8004712632414256368), -INT64_C( 4257513995879761346), INT64_C( 7875214485939097308) }, { -INT64_C( 940388094190813119), -INT64_C( 4073364536899791537), -INT64_C( 1219725549161624134), INT64_C( 2233757779520879544), INT64_C( 2439491752443101158), INT64_C( 9166411585423033573), INT64_C( 6373121923404920175), -INT64_C( 2785500613664174757) }, { -INT64_C( 3046769058687715850), -INT64_C( 712503947079194334), -INT64_C( 6991614324525197775), -INT64_C( 7810088800309880655), INT64_C( 8004712632414256368), -INT64_C( 7811151688720826829), INT64_C( 7875214485939097308), -INT64_C( 6468731249084647927) } }, { { -INT64_C( 1613524498428840792), -INT64_C( 2943791305104767519), INT64_C( 3349213380578243459), -INT64_C( 674204802864421672), -INT64_C( 1954830502096509198), INT64_C( 5192843631966959569), INT64_C( 911105038106651042), -INT64_C( 4932148811623101623) }, UINT8_C(226), { -INT64_C( 3973360625745808899), -INT64_C( 8239945592549338029), -INT64_C( 3859920510259160257), -INT64_C( 8141295836826761818), INT64_C( 529019795796519679), -INT64_C( 488165872602782584), INT64_C( 8093966657315011530), INT64_C( 3725204992451339920) }, { INT64_C( 5132725608845101952), -INT64_C( 4633873112000936449), -INT64_C( 1485453012652994776), INT64_C( 858709281465037954), -INT64_C( 4102644257746080502), INT64_C( 6373210507922310079), -INT64_C( 2790634902909175040), INT64_C( 2104723718819304352) }, { -INT64_C( 1613524498428840792), -INT64_C( 4633873112000936449), INT64_C( 3349213380578243459), -INT64_C( 674204802864421672), -INT64_C( 1954830502096509198), INT64_C( 6373210507922310079), INT64_C( 3725204992451339920), INT64_C( 2104723718819304352) } }, { { -INT64_C( 5928997778794056664), INT64_C( 7530813971257964538), INT64_C( 1783153292805852302), -INT64_C( 897772364895137957), INT64_C( 7100377162194317805), INT64_C( 5521993398515264210), -INT64_C( 4076337739640773412), INT64_C( 1584632822595170368) }, UINT8_C( 48), { INT64_C( 7227990955471204833), -INT64_C( 8182076068310447992), -INT64_C( 1887076183540250465), -INT64_C( 5123086641714570505), INT64_C( 284433193656156180), INT64_C( 2360746577768354570), INT64_C( 8714605466890518230), INT64_C( 3812602887049051481) }, { -INT64_C( 6191124988141888106), -INT64_C( 7824543943707611192), -INT64_C( 1127994923696646048), INT64_C( 3810056961364428683), INT64_C( 2265972895572730663), INT64_C( 6006062423011250441), INT64_C( 1257221751015995370), -INT64_C( 4258543097014807789) }, { -INT64_C( 5928997778794056664), INT64_C( 7530813971257964538), INT64_C( 1783153292805852302), -INT64_C( 897772364895137957), INT64_C( 2360746577768354570), INT64_C( 6006062423011250441), -INT64_C( 4076337739640773412), INT64_C( 1584632822595170368) } }, { { -INT64_C( 8018660757471482816), -INT64_C( 2623996657626898353), INT64_C( 7511669188945889447), -INT64_C( 2755077514440606878), -INT64_C( 7187137741158527189), -INT64_C( 1146883776033417144), -INT64_C( 6631450114154204612), INT64_C( 4451638983883100758) }, UINT8_C(181), { -INT64_C( 3915423366592073928), INT64_C( 7722832208524927870), -INT64_C( 3976589726295671265), -INT64_C( 4751834691028730125), INT64_C( 3386928682753790214), INT64_C( 3679808221115060908), INT64_C( 111320118149889548), -INT64_C( 9043112067202390724) }, { INT64_C( 1538746084056735866), -INT64_C( 3092621876665812020), INT64_C( 8906468869616943498), -INT64_C( 2844659382430600057), -INT64_C( 4261426724832330147), -INT64_C( 1335396026422093863), -INT64_C( 5173475773515493766), -INT64_C( 8338061567367286508) }, { INT64_C( 7722832208524927870), -INT64_C( 2623996657626898353), -INT64_C( 4751834691028730125), -INT64_C( 2755077514440606878), INT64_C( 3679808221115060908), -INT64_C( 1335396026422093863), -INT64_C( 6631450114154204612), -INT64_C( 8338061567367286508) } }, { { -INT64_C( 8918201848107479680), INT64_C( 1375178542360748229), -INT64_C( 3560939127798195285), INT64_C( 1309291379511284081), INT64_C( 8216191539417349846), INT64_C( 8382237705590418343), -INT64_C( 8010525915789385909), INT64_C( 8420424661542573110) }, UINT8_C(217), { INT64_C( 2797852451683038566), -INT64_C( 8823286804088155246), INT64_C( 8849349408075322035), -INT64_C( 2695249943931086748), -INT64_C( 6367005662361805439), -INT64_C( 6279857409327787283), INT64_C( 1650870196476411400), INT64_C( 1150508682674766249) }, { -INT64_C( 4231361816949948789), -INT64_C( 5881996165970841408), INT64_C( 6501517465147514737), INT64_C( 623026708646661896), INT64_C( 2236863357412454509), -INT64_C( 8614485485033282529), -INT64_C( 6855070614581177902), INT64_C( 3493346093460509941) }, { -INT64_C( 8823286804088155246), INT64_C( 1375178542360748229), -INT64_C( 3560939127798195285), INT64_C( 623026708646661896), -INT64_C( 6279857409327787283), INT64_C( 8382237705590418343), INT64_C( 1150508682674766249), INT64_C( 3493346093460509941) } }, { { INT64_C( 3497748872912249356), INT64_C( 7296790800698269604), -INT64_C( 3948876406478803207), INT64_C( 5768466005105535001), -INT64_C( 6406294586842709033), INT64_C( 8042484957435792805), INT64_C( 7147267899479065747), -INT64_C( 635204218856117349) }, UINT8_C(208), { INT64_C( 348038273292296170), INT64_C( 3248759616153717980), INT64_C( 1781131515178417385), -INT64_C( 5821130368954558951), INT64_C( 7065219299381480208), INT64_C( 6382999729258858175), -INT64_C( 127741380596069515), -INT64_C( 5461286742673493055) }, { -INT64_C( 8750555892677929817), -INT64_C( 2299788635459980723), INT64_C( 8534106568873118093), INT64_C( 1731689519062570160), -INT64_C( 2840693246252432570), -INT64_C( 8029168252790406841), -INT64_C( 9196506772007709645), -INT64_C( 3039968717940958139) }, { INT64_C( 3497748872912249356), INT64_C( 7296790800698269604), -INT64_C( 3948876406478803207), INT64_C( 5768466005105535001), INT64_C( 6382999729258858175), INT64_C( 8042484957435792805), -INT64_C( 5461286742673493055), -INT64_C( 3039968717940958139) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi64(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_mask_unpackhi_epi64(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_unpackhi_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const int64_t a[8]; const int64_t b[8]; const int64_t r[8]; } test_vec[] = { { UINT8_C(153), { -INT64_C( 8112061316095643358), INT64_C( 6935063002730107714), -INT64_C( 8212087054093038310), -INT64_C( 2607746116477873009), INT64_C( 3144832950630399454), -INT64_C( 5137730184067198396), INT64_C( 4921647922986935914), -INT64_C( 8765048687182594872) }, { -INT64_C( 7702493116525469689), -INT64_C( 9014066700485133679), INT64_C( 1259476586114192513), INT64_C( 5114663378355418315), INT64_C( 7382897138880386447), INT64_C( 8711789030675930443), -INT64_C( 8941289340094502767), -INT64_C( 314705051237516003) }, { INT64_C( 6935063002730107714), INT64_C( 0), INT64_C( 0), INT64_C( 5114663378355418315), -INT64_C( 5137730184067198396), INT64_C( 0), INT64_C( 0), -INT64_C( 314705051237516003) } }, { UINT8_C(135), { -INT64_C( 5875859059498033961), -INT64_C( 1917092265710921897), -INT64_C( 6480081549246251315), -INT64_C( 1761174571067478505), INT64_C( 281901227435879401), INT64_C( 8934645429188398939), -INT64_C( 3293675335527130245), INT64_C( 6045918023986601201) }, { -INT64_C( 3096937802851485867), -INT64_C( 7768449762043438073), INT64_C( 1446924468239245447), -INT64_C( 5937155702654636322), -INT64_C( 6394068444791232269), -INT64_C( 6611974618886688368), -INT64_C( 3070053105838688980), INT64_C( 7633521611942754793) }, { -INT64_C( 1917092265710921897), -INT64_C( 7768449762043438073), -INT64_C( 1761174571067478505), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 7633521611942754793) } }, { UINT8_C(224), { INT64_C( 5540033489920907166), INT64_C( 4556251704725245059), INT64_C( 7626860197896948725), -INT64_C( 6359002367070889431), -INT64_C( 1546745235567460320), -INT64_C( 761050062070877593), -INT64_C( 2658604054839187776), INT64_C( 7171837558290971095) }, { -INT64_C( 4391248934469730115), -INT64_C( 140064324810700776), -INT64_C( 8603624947397496053), -INT64_C( 8185309017209743934), INT64_C( 5012719882948773489), -INT64_C( 862882530471279158), INT64_C( 4582777096366109792), INT64_C( 7205948465858413216) }, { INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), -INT64_C( 862882530471279158), INT64_C( 7171837558290971095), INT64_C( 7205948465858413216) } }, { UINT8_C(177), { INT64_C( 5061802720489189811), INT64_C( 165828678393022336), -INT64_C( 377810925094215978), -INT64_C( 1847230989765627135), -INT64_C( 2410944925150790782), -INT64_C( 5390499936422085918), INT64_C( 8099610054905737334), INT64_C( 2874557315520681163) }, { -INT64_C( 4904149681540743243), INT64_C( 5278813408176356271), INT64_C( 8983307628280486349), -INT64_C( 630400309860317069), INT64_C( 5706899244913896349), INT64_C( 491489641323306421), INT64_C( 1974245321395571000), -INT64_C( 2671070819006554187) }, { INT64_C( 165828678393022336), INT64_C( 0), INT64_C( 0), INT64_C( 0), -INT64_C( 5390499936422085918), INT64_C( 491489641323306421), INT64_C( 0), -INT64_C( 2671070819006554187) } }, { UINT8_C( 30), { -INT64_C( 6942565444498861832), -INT64_C( 8340139830770323222), INT64_C( 7981591915461356065), -INT64_C( 6767355895724917629), -INT64_C( 3749461226211191851), -INT64_C( 3315751378116882324), INT64_C( 233243253361619007), INT64_C( 4732736115633153889) }, { -INT64_C( 1473080054470065438), -INT64_C( 5268272069701115295), -INT64_C( 3960141369283656825), -INT64_C( 840903116746062937), -INT64_C( 2292537272392225083), INT64_C( 7481840039537838503), -INT64_C( 4626753622671206248), -INT64_C( 1693030055064461445) }, { INT64_C( 0), -INT64_C( 5268272069701115295), -INT64_C( 6767355895724917629), -INT64_C( 840903116746062937), -INT64_C( 3315751378116882324), INT64_C( 0), INT64_C( 0), INT64_C( 0) } }, { UINT8_C( 20), { -INT64_C( 2007351597602755528), INT64_C( 7985773938750014054), INT64_C( 4507335351563900057), -INT64_C( 3147372254168278505), -INT64_C( 4560890390205241739), INT64_C( 4485249486166915513), INT64_C( 2414171336890893475), -INT64_C( 6960398803146907145) }, { -INT64_C( 1607182324373407336), INT64_C( 2197746152424123805), -INT64_C( 6708426467054661701), -INT64_C( 8030870343051295038), -INT64_C( 4796557763825959850), -INT64_C( 2775486076469499679), INT64_C( 9195090110878505805), -INT64_C( 4299820068894879541) }, { INT64_C( 0), INT64_C( 0), -INT64_C( 3147372254168278505), INT64_C( 0), INT64_C( 4485249486166915513), INT64_C( 0), INT64_C( 0), INT64_C( 0) } }, { UINT8_C(244), { -INT64_C( 2095632079641097515), -INT64_C( 7089832664592529712), -INT64_C( 15524902365195400), -INT64_C( 5534588735990232252), -INT64_C( 1794783307867863429), INT64_C( 7659445327702071277), -INT64_C( 4426703642843175580), -INT64_C( 6247284131365912901) }, { -INT64_C( 7100499098819753354), -INT64_C( 2348460646870504691), -INT64_C( 4937067789841936324), INT64_C( 2511218222881405697), -INT64_C( 4973339936969148243), INT64_C( 1957695701676279760), -INT64_C( 6724628912605389327), INT64_C( 5951978209226654412) }, { INT64_C( 0), INT64_C( 0), -INT64_C( 5534588735990232252), INT64_C( 0), INT64_C( 7659445327702071277), INT64_C( 1957695701676279760), -INT64_C( 6247284131365912901), INT64_C( 5951978209226654412) } }, { UINT8_C(100), { -INT64_C( 990848356323315639), INT64_C( 6095881269656193925), INT64_C( 4169656188011907520), INT64_C( 4069620111938311530), -INT64_C( 2486296676270633759), -INT64_C( 397131255520211342), INT64_C( 8436624239100720037), INT64_C( 388723695290217278) }, { INT64_C( 7261790460150090898), INT64_C( 833741416274152285), -INT64_C( 4107569250562498946), INT64_C( 2528587095903275758), INT64_C( 1566080278840911972), -INT64_C( 3345543044703434720), INT64_C( 7145622119506409948), -INT64_C( 2767306435414581789) }, { INT64_C( 0), INT64_C( 0), INT64_C( 4069620111938311530), INT64_C( 0), INT64_C( 0), -INT64_C( 3345543044703434720), INT64_C( 388723695290217278), INT64_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_maskz_unpackhi_epi64(test_vec[i].k, a, b); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_unpackhi_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 b[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 501.25), SIMDE_FLOAT32_C( 354.44), SIMDE_FLOAT32_C( -760.98), SIMDE_FLOAT32_C( 367.33), SIMDE_FLOAT32_C( 563.03), SIMDE_FLOAT32_C( 427.95), SIMDE_FLOAT32_C( 773.44), SIMDE_FLOAT32_C( 713.02), SIMDE_FLOAT32_C( 119.86), SIMDE_FLOAT32_C( -12.94), SIMDE_FLOAT32_C( 867.61), SIMDE_FLOAT32_C( -313.75), SIMDE_FLOAT32_C( 147.55), SIMDE_FLOAT32_C( -416.41), SIMDE_FLOAT32_C( -877.86), SIMDE_FLOAT32_C( -808.32) }, { SIMDE_FLOAT32_C( -972.10), SIMDE_FLOAT32_C( 298.29), SIMDE_FLOAT32_C( 288.84), SIMDE_FLOAT32_C( 810.71), SIMDE_FLOAT32_C( 626.39), SIMDE_FLOAT32_C( -563.97), SIMDE_FLOAT32_C( -14.08), SIMDE_FLOAT32_C( -444.31), SIMDE_FLOAT32_C( 614.59), SIMDE_FLOAT32_C( 98.84), SIMDE_FLOAT32_C( 586.04), SIMDE_FLOAT32_C( 326.60), SIMDE_FLOAT32_C( -948.81), SIMDE_FLOAT32_C( -681.79), SIMDE_FLOAT32_C( 949.12), SIMDE_FLOAT32_C( 552.44) }, { SIMDE_FLOAT32_C( -760.98), SIMDE_FLOAT32_C( 288.84), SIMDE_FLOAT32_C( 367.33), SIMDE_FLOAT32_C( 810.71), SIMDE_FLOAT32_C( 773.44), SIMDE_FLOAT32_C( -14.08), SIMDE_FLOAT32_C( 713.02), SIMDE_FLOAT32_C( -444.31), SIMDE_FLOAT32_C( 867.61), SIMDE_FLOAT32_C( 586.04), SIMDE_FLOAT32_C( -313.75), SIMDE_FLOAT32_C( 326.60), SIMDE_FLOAT32_C( -877.86), SIMDE_FLOAT32_C( 949.12), SIMDE_FLOAT32_C( -808.32), SIMDE_FLOAT32_C( 552.44) } }, { { SIMDE_FLOAT32_C( 672.66), SIMDE_FLOAT32_C( -811.86), SIMDE_FLOAT32_C( -80.22), SIMDE_FLOAT32_C( 235.69), SIMDE_FLOAT32_C( 616.10), SIMDE_FLOAT32_C( -306.79), SIMDE_FLOAT32_C( -51.30), SIMDE_FLOAT32_C( -264.04), SIMDE_FLOAT32_C( 680.27), SIMDE_FLOAT32_C( -183.69), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( -172.18), SIMDE_FLOAT32_C( 399.90), SIMDE_FLOAT32_C( 544.35), SIMDE_FLOAT32_C( 19.50), SIMDE_FLOAT32_C( 427.80) }, { SIMDE_FLOAT32_C( -157.35), SIMDE_FLOAT32_C( -691.65), SIMDE_FLOAT32_C( 238.51), SIMDE_FLOAT32_C( -530.97), SIMDE_FLOAT32_C( -255.62), SIMDE_FLOAT32_C( -775.57), SIMDE_FLOAT32_C( 24.72), SIMDE_FLOAT32_C( -641.03), SIMDE_FLOAT32_C( 323.27), SIMDE_FLOAT32_C( -389.24), SIMDE_FLOAT32_C( 685.56), SIMDE_FLOAT32_C( 374.47), SIMDE_FLOAT32_C( -71.03), SIMDE_FLOAT32_C( 634.69), SIMDE_FLOAT32_C( -73.09), SIMDE_FLOAT32_C( -398.37) }, { SIMDE_FLOAT32_C( -80.22), SIMDE_FLOAT32_C( 238.51), SIMDE_FLOAT32_C( 235.69), SIMDE_FLOAT32_C( -530.97), SIMDE_FLOAT32_C( -51.30), SIMDE_FLOAT32_C( 24.72), SIMDE_FLOAT32_C( -264.04), SIMDE_FLOAT32_C( -641.03), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( 685.56), SIMDE_FLOAT32_C( -172.18), SIMDE_FLOAT32_C( 374.47), SIMDE_FLOAT32_C( 19.50), SIMDE_FLOAT32_C( -73.09), SIMDE_FLOAT32_C( 427.80), SIMDE_FLOAT32_C( -398.37) } }, { { SIMDE_FLOAT32_C( 822.83), SIMDE_FLOAT32_C( 846.69), SIMDE_FLOAT32_C( 837.31), SIMDE_FLOAT32_C( 438.93), SIMDE_FLOAT32_C( -460.10), SIMDE_FLOAT32_C( -213.99), SIMDE_FLOAT32_C( -825.11), SIMDE_FLOAT32_C( -779.83), SIMDE_FLOAT32_C( 602.32), SIMDE_FLOAT32_C( 597.10), SIMDE_FLOAT32_C( 47.99), SIMDE_FLOAT32_C( 2.23), SIMDE_FLOAT32_C( 141.46), SIMDE_FLOAT32_C( -932.50), SIMDE_FLOAT32_C( -569.97), SIMDE_FLOAT32_C( 984.10) }, { SIMDE_FLOAT32_C( -624.16), SIMDE_FLOAT32_C( 668.54), SIMDE_FLOAT32_C( -546.86), SIMDE_FLOAT32_C( 120.22), SIMDE_FLOAT32_C( 892.97), SIMDE_FLOAT32_C( 477.86), SIMDE_FLOAT32_C( 479.19), SIMDE_FLOAT32_C( 216.25), SIMDE_FLOAT32_C( -911.39), SIMDE_FLOAT32_C( 164.75), SIMDE_FLOAT32_C( -409.28), SIMDE_FLOAT32_C( 17.58), SIMDE_FLOAT32_C( -200.56), SIMDE_FLOAT32_C( 517.63), SIMDE_FLOAT32_C( 619.21), SIMDE_FLOAT32_C( -377.73) }, { SIMDE_FLOAT32_C( 837.31), SIMDE_FLOAT32_C( -546.86), SIMDE_FLOAT32_C( 438.93), SIMDE_FLOAT32_C( 120.22), SIMDE_FLOAT32_C( -825.11), SIMDE_FLOAT32_C( 479.19), SIMDE_FLOAT32_C( -779.83), SIMDE_FLOAT32_C( 216.25), SIMDE_FLOAT32_C( 47.99), SIMDE_FLOAT32_C( -409.28), SIMDE_FLOAT32_C( 2.23), SIMDE_FLOAT32_C( 17.58), SIMDE_FLOAT32_C( -569.97), SIMDE_FLOAT32_C( 619.21), SIMDE_FLOAT32_C( 984.10), SIMDE_FLOAT32_C( -377.73) } }, { { SIMDE_FLOAT32_C( 364.32), SIMDE_FLOAT32_C( 456.52), SIMDE_FLOAT32_C( -938.80), SIMDE_FLOAT32_C( 904.22), SIMDE_FLOAT32_C( -757.47), SIMDE_FLOAT32_C( -763.91), SIMDE_FLOAT32_C( -875.61), SIMDE_FLOAT32_C( 844.86), SIMDE_FLOAT32_C( 833.20), SIMDE_FLOAT32_C( 172.39), SIMDE_FLOAT32_C( -152.92), SIMDE_FLOAT32_C( -25.35), SIMDE_FLOAT32_C( 239.88), SIMDE_FLOAT32_C( 277.11), SIMDE_FLOAT32_C( -41.24), SIMDE_FLOAT32_C( 615.73) }, { SIMDE_FLOAT32_C( -54.35), SIMDE_FLOAT32_C( 411.89), SIMDE_FLOAT32_C( -264.06), SIMDE_FLOAT32_C( -161.38), SIMDE_FLOAT32_C( -110.25), SIMDE_FLOAT32_C( -784.87), SIMDE_FLOAT32_C( -945.13), SIMDE_FLOAT32_C( -21.64), SIMDE_FLOAT32_C( 379.88), SIMDE_FLOAT32_C( -354.42), SIMDE_FLOAT32_C( 995.95), SIMDE_FLOAT32_C( -820.68), SIMDE_FLOAT32_C( -836.79), SIMDE_FLOAT32_C( 615.15), SIMDE_FLOAT32_C( -198.41), SIMDE_FLOAT32_C( 527.53) }, { SIMDE_FLOAT32_C( -938.80), SIMDE_FLOAT32_C( -264.06), SIMDE_FLOAT32_C( 904.22), SIMDE_FLOAT32_C( -161.38), SIMDE_FLOAT32_C( -875.61), SIMDE_FLOAT32_C( -945.13), SIMDE_FLOAT32_C( 844.86), SIMDE_FLOAT32_C( -21.64), SIMDE_FLOAT32_C( -152.92), SIMDE_FLOAT32_C( 995.95), SIMDE_FLOAT32_C( -25.35), SIMDE_FLOAT32_C( -820.68), SIMDE_FLOAT32_C( -41.24), SIMDE_FLOAT32_C( -198.41), SIMDE_FLOAT32_C( 615.73), SIMDE_FLOAT32_C( 527.53) } }, { { SIMDE_FLOAT32_C( 71.67), SIMDE_FLOAT32_C( -137.20), SIMDE_FLOAT32_C( 431.75), SIMDE_FLOAT32_C( 314.20), SIMDE_FLOAT32_C( 98.89), SIMDE_FLOAT32_C( 556.14), SIMDE_FLOAT32_C( 159.06), SIMDE_FLOAT32_C( -67.91), SIMDE_FLOAT32_C( -271.47), SIMDE_FLOAT32_C( -993.86), SIMDE_FLOAT32_C( 906.74), SIMDE_FLOAT32_C( 968.41), SIMDE_FLOAT32_C( 283.25), SIMDE_FLOAT32_C( -134.50), SIMDE_FLOAT32_C( 584.13), SIMDE_FLOAT32_C( -771.10) }, { SIMDE_FLOAT32_C( -722.60), SIMDE_FLOAT32_C( -679.92), SIMDE_FLOAT32_C( 67.52), SIMDE_FLOAT32_C( 167.15), SIMDE_FLOAT32_C( -464.79), SIMDE_FLOAT32_C( 122.39), SIMDE_FLOAT32_C( -854.49), SIMDE_FLOAT32_C( 915.09), SIMDE_FLOAT32_C( 767.97), SIMDE_FLOAT32_C( -858.54), SIMDE_FLOAT32_C( -905.59), SIMDE_FLOAT32_C( 931.19), SIMDE_FLOAT32_C( 756.61), SIMDE_FLOAT32_C( -104.00), SIMDE_FLOAT32_C( 458.72), SIMDE_FLOAT32_C( -171.72) }, { SIMDE_FLOAT32_C( 431.75), SIMDE_FLOAT32_C( 67.52), SIMDE_FLOAT32_C( 314.20), SIMDE_FLOAT32_C( 167.15), SIMDE_FLOAT32_C( 159.06), SIMDE_FLOAT32_C( -854.49), SIMDE_FLOAT32_C( -67.91), SIMDE_FLOAT32_C( 915.09), SIMDE_FLOAT32_C( 906.74), SIMDE_FLOAT32_C( -905.59), SIMDE_FLOAT32_C( 968.41), SIMDE_FLOAT32_C( 931.19), SIMDE_FLOAT32_C( 584.13), SIMDE_FLOAT32_C( 458.72), SIMDE_FLOAT32_C( -771.10), SIMDE_FLOAT32_C( -171.72) } }, { { SIMDE_FLOAT32_C( 758.80), SIMDE_FLOAT32_C( -109.54), SIMDE_FLOAT32_C( -857.52), SIMDE_FLOAT32_C( -142.31), SIMDE_FLOAT32_C( -553.40), SIMDE_FLOAT32_C( 301.54), SIMDE_FLOAT32_C( 789.78), SIMDE_FLOAT32_C( 175.13), SIMDE_FLOAT32_C( 307.68), SIMDE_FLOAT32_C( 696.52), SIMDE_FLOAT32_C( 143.54), SIMDE_FLOAT32_C( -409.06), SIMDE_FLOAT32_C( -437.98), SIMDE_FLOAT32_C( -272.33), SIMDE_FLOAT32_C( -180.16), SIMDE_FLOAT32_C( -160.59) }, { SIMDE_FLOAT32_C( 47.75), SIMDE_FLOAT32_C( 887.36), SIMDE_FLOAT32_C( -993.44), SIMDE_FLOAT32_C( 582.95), SIMDE_FLOAT32_C( 9.75), SIMDE_FLOAT32_C( -847.93), SIMDE_FLOAT32_C( 498.04), SIMDE_FLOAT32_C( -222.27), SIMDE_FLOAT32_C( -706.47), SIMDE_FLOAT32_C( 592.44), SIMDE_FLOAT32_C( -291.09), SIMDE_FLOAT32_C( -949.86), SIMDE_FLOAT32_C( -511.56), SIMDE_FLOAT32_C( -832.37), SIMDE_FLOAT32_C( -121.58), SIMDE_FLOAT32_C( -752.76) }, { SIMDE_FLOAT32_C( -857.52), SIMDE_FLOAT32_C( -993.44), SIMDE_FLOAT32_C( -142.31), SIMDE_FLOAT32_C( 582.95), SIMDE_FLOAT32_C( 789.78), SIMDE_FLOAT32_C( 498.04), SIMDE_FLOAT32_C( 175.13), SIMDE_FLOAT32_C( -222.27), SIMDE_FLOAT32_C( 143.54), SIMDE_FLOAT32_C( -291.09), SIMDE_FLOAT32_C( -409.06), SIMDE_FLOAT32_C( -949.86), SIMDE_FLOAT32_C( -180.16), SIMDE_FLOAT32_C( -121.58), SIMDE_FLOAT32_C( -160.59), SIMDE_FLOAT32_C( -752.76) } }, { { SIMDE_FLOAT32_C( 58.09), SIMDE_FLOAT32_C( 20.91), SIMDE_FLOAT32_C( 104.93), SIMDE_FLOAT32_C( 504.70), SIMDE_FLOAT32_C( -677.55), SIMDE_FLOAT32_C( -105.30), SIMDE_FLOAT32_C( -320.17), SIMDE_FLOAT32_C( 630.13), SIMDE_FLOAT32_C( -408.78), SIMDE_FLOAT32_C( 823.36), SIMDE_FLOAT32_C( -778.93), SIMDE_FLOAT32_C( 153.24), SIMDE_FLOAT32_C( -448.97), SIMDE_FLOAT32_C( 40.91), SIMDE_FLOAT32_C( 992.65), SIMDE_FLOAT32_C( 598.78) }, { SIMDE_FLOAT32_C( -71.73), SIMDE_FLOAT32_C( 999.22), SIMDE_FLOAT32_C( 181.73), SIMDE_FLOAT32_C( 938.02), SIMDE_FLOAT32_C( -848.71), SIMDE_FLOAT32_C( -320.23), SIMDE_FLOAT32_C( -284.25), SIMDE_FLOAT32_C( -555.18), SIMDE_FLOAT32_C( -727.79), SIMDE_FLOAT32_C( 424.66), SIMDE_FLOAT32_C( -505.04), SIMDE_FLOAT32_C( -239.35), SIMDE_FLOAT32_C( 592.29), SIMDE_FLOAT32_C( 373.38), SIMDE_FLOAT32_C( 7.89), SIMDE_FLOAT32_C( -349.61) }, { SIMDE_FLOAT32_C( 104.93), SIMDE_FLOAT32_C( 181.73), SIMDE_FLOAT32_C( 504.70), SIMDE_FLOAT32_C( 938.02), SIMDE_FLOAT32_C( -320.17), SIMDE_FLOAT32_C( -284.25), SIMDE_FLOAT32_C( 630.13), SIMDE_FLOAT32_C( -555.18), SIMDE_FLOAT32_C( -778.93), SIMDE_FLOAT32_C( -505.04), SIMDE_FLOAT32_C( 153.24), SIMDE_FLOAT32_C( -239.35), SIMDE_FLOAT32_C( 992.65), SIMDE_FLOAT32_C( 7.89), SIMDE_FLOAT32_C( 598.78), SIMDE_FLOAT32_C( -349.61) } }, { { SIMDE_FLOAT32_C( -605.71), SIMDE_FLOAT32_C( -887.18), SIMDE_FLOAT32_C( -844.92), SIMDE_FLOAT32_C( -283.26), SIMDE_FLOAT32_C( 7.52), SIMDE_FLOAT32_C( -165.09), SIMDE_FLOAT32_C( -653.13), SIMDE_FLOAT32_C( 598.74), SIMDE_FLOAT32_C( -341.73), SIMDE_FLOAT32_C( -432.06), SIMDE_FLOAT32_C( -248.02), SIMDE_FLOAT32_C( 209.30), SIMDE_FLOAT32_C( 608.85), SIMDE_FLOAT32_C( -255.36), SIMDE_FLOAT32_C( -191.92), SIMDE_FLOAT32_C( -462.88) }, { SIMDE_FLOAT32_C( -256.15), SIMDE_FLOAT32_C( 989.81), SIMDE_FLOAT32_C( -524.85), SIMDE_FLOAT32_C( -104.86), SIMDE_FLOAT32_C( -330.42), SIMDE_FLOAT32_C( 190.90), SIMDE_FLOAT32_C( 339.96), SIMDE_FLOAT32_C( -58.21), SIMDE_FLOAT32_C( -384.44), SIMDE_FLOAT32_C( 834.93), SIMDE_FLOAT32_C( 702.44), SIMDE_FLOAT32_C( -792.14), SIMDE_FLOAT32_C( 208.31), SIMDE_FLOAT32_C( -289.67), SIMDE_FLOAT32_C( -141.76), SIMDE_FLOAT32_C( 602.60) }, { SIMDE_FLOAT32_C( -844.92), SIMDE_FLOAT32_C( -524.85), SIMDE_FLOAT32_C( -283.26), SIMDE_FLOAT32_C( -104.86), SIMDE_FLOAT32_C( -653.13), SIMDE_FLOAT32_C( 339.96), SIMDE_FLOAT32_C( 598.74), SIMDE_FLOAT32_C( -58.21), SIMDE_FLOAT32_C( -248.02), SIMDE_FLOAT32_C( 702.44), SIMDE_FLOAT32_C( 209.30), SIMDE_FLOAT32_C( -792.14), SIMDE_FLOAT32_C( -191.92), SIMDE_FLOAT32_C( -141.76), SIMDE_FLOAT32_C( -462.88), SIMDE_FLOAT32_C( 602.60) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 r = simde_mm512_unpackhi_ps(a, b); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_unpackhi_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[16]; const simde__mmask8 k; const simde_float32 a[16]; const simde_float32 b[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( -291.23), SIMDE_FLOAT32_C( -139.21), SIMDE_FLOAT32_C( -441.81), SIMDE_FLOAT32_C( 845.97), SIMDE_FLOAT32_C( 226.47), SIMDE_FLOAT32_C( 261.10), SIMDE_FLOAT32_C( -120.23), SIMDE_FLOAT32_C( -359.39), SIMDE_FLOAT32_C( 746.86), SIMDE_FLOAT32_C( 235.27), SIMDE_FLOAT32_C( -137.88), SIMDE_FLOAT32_C( -417.19), SIMDE_FLOAT32_C( -188.37), SIMDE_FLOAT32_C( -129.27), SIMDE_FLOAT32_C( 402.91), SIMDE_FLOAT32_C( 826.61) }, UINT8_C(205), { SIMDE_FLOAT32_C( 236.89), SIMDE_FLOAT32_C( -388.23), SIMDE_FLOAT32_C( 941.34), SIMDE_FLOAT32_C( -208.36), SIMDE_FLOAT32_C( -307.76), SIMDE_FLOAT32_C( -934.29), SIMDE_FLOAT32_C( -828.65), SIMDE_FLOAT32_C( -292.89), SIMDE_FLOAT32_C( 823.73), SIMDE_FLOAT32_C( -702.66), SIMDE_FLOAT32_C( -158.41), SIMDE_FLOAT32_C( -940.13), SIMDE_FLOAT32_C( 882.43), SIMDE_FLOAT32_C( -63.81), SIMDE_FLOAT32_C( -231.36), SIMDE_FLOAT32_C( -256.78) }, { SIMDE_FLOAT32_C( 494.38), SIMDE_FLOAT32_C( -385.40), SIMDE_FLOAT32_C( 969.69), SIMDE_FLOAT32_C( -244.52), SIMDE_FLOAT32_C( 494.37), SIMDE_FLOAT32_C( -389.71), SIMDE_FLOAT32_C( -497.66), SIMDE_FLOAT32_C( -270.36), SIMDE_FLOAT32_C( 472.42), SIMDE_FLOAT32_C( 85.15), SIMDE_FLOAT32_C( 541.28), SIMDE_FLOAT32_C( -656.85), SIMDE_FLOAT32_C( -511.95), SIMDE_FLOAT32_C( 367.89), SIMDE_FLOAT32_C( -73.71), SIMDE_FLOAT32_C( 724.95) }, { SIMDE_FLOAT32_C( 941.34), SIMDE_FLOAT32_C( -139.21), SIMDE_FLOAT32_C( -208.36), SIMDE_FLOAT32_C( -244.52), SIMDE_FLOAT32_C( 226.47), SIMDE_FLOAT32_C( 261.10), SIMDE_FLOAT32_C( -292.89), SIMDE_FLOAT32_C( -270.36), SIMDE_FLOAT32_C( 746.86), SIMDE_FLOAT32_C( 235.27), SIMDE_FLOAT32_C( -137.88), SIMDE_FLOAT32_C( -417.19), SIMDE_FLOAT32_C( -188.37), SIMDE_FLOAT32_C( -129.27), SIMDE_FLOAT32_C( 402.91), SIMDE_FLOAT32_C( 826.61) } }, { { SIMDE_FLOAT32_C( 979.66), SIMDE_FLOAT32_C( -132.37), SIMDE_FLOAT32_C( -483.42), SIMDE_FLOAT32_C( -328.09), SIMDE_FLOAT32_C( -66.66), SIMDE_FLOAT32_C( -312.07), SIMDE_FLOAT32_C( 379.02), SIMDE_FLOAT32_C( -242.93), SIMDE_FLOAT32_C( -14.73), SIMDE_FLOAT32_C( -779.39), SIMDE_FLOAT32_C( -183.06), SIMDE_FLOAT32_C( -132.30), SIMDE_FLOAT32_C( 156.80), SIMDE_FLOAT32_C( 585.58), SIMDE_FLOAT32_C( 610.92), SIMDE_FLOAT32_C( -348.82) }, UINT8_C(248), { SIMDE_FLOAT32_C( 580.60), SIMDE_FLOAT32_C( 406.66), SIMDE_FLOAT32_C( 694.56), SIMDE_FLOAT32_C( -809.10), SIMDE_FLOAT32_C( 908.99), SIMDE_FLOAT32_C( -575.80), SIMDE_FLOAT32_C( 663.32), SIMDE_FLOAT32_C( -5.86), SIMDE_FLOAT32_C( 965.47), SIMDE_FLOAT32_C( -993.54), SIMDE_FLOAT32_C( 482.19), SIMDE_FLOAT32_C( 333.36), SIMDE_FLOAT32_C( -67.24), SIMDE_FLOAT32_C( 207.14), SIMDE_FLOAT32_C( 313.03), SIMDE_FLOAT32_C( 800.39) }, { SIMDE_FLOAT32_C( 723.72), SIMDE_FLOAT32_C( 984.93), SIMDE_FLOAT32_C( -266.27), SIMDE_FLOAT32_C( -588.35), SIMDE_FLOAT32_C( 363.95), SIMDE_FLOAT32_C( 490.80), SIMDE_FLOAT32_C( 396.93), SIMDE_FLOAT32_C( 584.57), SIMDE_FLOAT32_C( -692.25), SIMDE_FLOAT32_C( -735.37), SIMDE_FLOAT32_C( -258.63), SIMDE_FLOAT32_C( 893.33), SIMDE_FLOAT32_C( 875.54), SIMDE_FLOAT32_C( 392.54), SIMDE_FLOAT32_C( -906.49), SIMDE_FLOAT32_C( 456.15) }, { SIMDE_FLOAT32_C( 979.66), SIMDE_FLOAT32_C( -132.37), SIMDE_FLOAT32_C( -483.42), SIMDE_FLOAT32_C( -588.35), SIMDE_FLOAT32_C( 663.32), SIMDE_FLOAT32_C( 396.93), SIMDE_FLOAT32_C( -5.86), SIMDE_FLOAT32_C( 584.57), SIMDE_FLOAT32_C( -14.73), SIMDE_FLOAT32_C( -779.39), SIMDE_FLOAT32_C( -183.06), SIMDE_FLOAT32_C( -132.30), SIMDE_FLOAT32_C( 156.80), SIMDE_FLOAT32_C( 585.58), SIMDE_FLOAT32_C( 610.92), SIMDE_FLOAT32_C( -348.82) } }, { { SIMDE_FLOAT32_C( -200.80), SIMDE_FLOAT32_C( 788.07), SIMDE_FLOAT32_C( 647.05), SIMDE_FLOAT32_C( -291.81), SIMDE_FLOAT32_C( -787.73), SIMDE_FLOAT32_C( 310.36), SIMDE_FLOAT32_C( 702.33), SIMDE_FLOAT32_C( -822.26), SIMDE_FLOAT32_C( 316.83), SIMDE_FLOAT32_C( 184.52), SIMDE_FLOAT32_C( 511.11), SIMDE_FLOAT32_C( -750.41), SIMDE_FLOAT32_C( -608.34), SIMDE_FLOAT32_C( -175.86), SIMDE_FLOAT32_C( -950.03), SIMDE_FLOAT32_C( -884.62) }, UINT8_C(108), { SIMDE_FLOAT32_C( -216.30), SIMDE_FLOAT32_C( -472.96), SIMDE_FLOAT32_C( -826.97), SIMDE_FLOAT32_C( -725.50), SIMDE_FLOAT32_C( 923.97), SIMDE_FLOAT32_C( 757.59), SIMDE_FLOAT32_C( -417.75), SIMDE_FLOAT32_C( -811.41), SIMDE_FLOAT32_C( -501.04), SIMDE_FLOAT32_C( -524.42), SIMDE_FLOAT32_C( -935.86), SIMDE_FLOAT32_C( 891.50), SIMDE_FLOAT32_C( -430.91), SIMDE_FLOAT32_C( 520.29), SIMDE_FLOAT32_C( -309.30), SIMDE_FLOAT32_C( -642.84) }, { SIMDE_FLOAT32_C( 167.33), SIMDE_FLOAT32_C( 398.89), SIMDE_FLOAT32_C( -430.57), SIMDE_FLOAT32_C( -522.30), SIMDE_FLOAT32_C( 101.22), SIMDE_FLOAT32_C( -252.83), SIMDE_FLOAT32_C( 794.53), SIMDE_FLOAT32_C( -714.26), SIMDE_FLOAT32_C( -741.72), SIMDE_FLOAT32_C( -955.89), SIMDE_FLOAT32_C( -322.60), SIMDE_FLOAT32_C( 82.42), SIMDE_FLOAT32_C( -905.92), SIMDE_FLOAT32_C( -207.21), SIMDE_FLOAT32_C( 891.49), SIMDE_FLOAT32_C( -122.21) }, { SIMDE_FLOAT32_C( -200.80), SIMDE_FLOAT32_C( 788.07), SIMDE_FLOAT32_C( -725.50), SIMDE_FLOAT32_C( -522.30), SIMDE_FLOAT32_C( -787.73), SIMDE_FLOAT32_C( 794.53), SIMDE_FLOAT32_C( -811.41), SIMDE_FLOAT32_C( -822.26), SIMDE_FLOAT32_C( 316.83), SIMDE_FLOAT32_C( 184.52), SIMDE_FLOAT32_C( 511.11), SIMDE_FLOAT32_C( -750.41), SIMDE_FLOAT32_C( -608.34), SIMDE_FLOAT32_C( -175.86), SIMDE_FLOAT32_C( -950.03), SIMDE_FLOAT32_C( -884.62) } }, { { SIMDE_FLOAT32_C( 319.83), SIMDE_FLOAT32_C( -935.48), SIMDE_FLOAT32_C( 152.29), SIMDE_FLOAT32_C( 243.79), SIMDE_FLOAT32_C( 822.11), SIMDE_FLOAT32_C( 734.54), SIMDE_FLOAT32_C( 432.39), SIMDE_FLOAT32_C( -678.93), SIMDE_FLOAT32_C( -789.89), SIMDE_FLOAT32_C( 496.52), SIMDE_FLOAT32_C( -787.43), SIMDE_FLOAT32_C( -220.80), SIMDE_FLOAT32_C( 16.81), SIMDE_FLOAT32_C( -96.74), SIMDE_FLOAT32_C( 136.36), SIMDE_FLOAT32_C( -815.85) }, UINT8_C(254), { SIMDE_FLOAT32_C( 705.79), SIMDE_FLOAT32_C( -338.16), SIMDE_FLOAT32_C( 403.37), SIMDE_FLOAT32_C( -547.04), SIMDE_FLOAT32_C( -543.63), SIMDE_FLOAT32_C( 689.11), SIMDE_FLOAT32_C( -288.75), SIMDE_FLOAT32_C( -499.52), SIMDE_FLOAT32_C( -633.49), SIMDE_FLOAT32_C( 793.66), SIMDE_FLOAT32_C( -405.44), SIMDE_FLOAT32_C( 159.30), SIMDE_FLOAT32_C( 685.15), SIMDE_FLOAT32_C( 472.35), SIMDE_FLOAT32_C( -520.87), SIMDE_FLOAT32_C( 749.67) }, { SIMDE_FLOAT32_C( -375.36), SIMDE_FLOAT32_C( 722.92), SIMDE_FLOAT32_C( 571.78), SIMDE_FLOAT32_C( -640.83), SIMDE_FLOAT32_C( 155.31), SIMDE_FLOAT32_C( 892.85), SIMDE_FLOAT32_C( -430.72), SIMDE_FLOAT32_C( -348.16), SIMDE_FLOAT32_C( -894.59), SIMDE_FLOAT32_C( 348.48), SIMDE_FLOAT32_C( 668.65), SIMDE_FLOAT32_C( 8.68), SIMDE_FLOAT32_C( -515.16), SIMDE_FLOAT32_C( 852.79), SIMDE_FLOAT32_C( 310.83), SIMDE_FLOAT32_C( -809.37) }, { SIMDE_FLOAT32_C( 319.83), SIMDE_FLOAT32_C( 571.78), SIMDE_FLOAT32_C( -547.04), SIMDE_FLOAT32_C( -640.83), SIMDE_FLOAT32_C( -288.75), SIMDE_FLOAT32_C( -430.72), SIMDE_FLOAT32_C( -499.52), SIMDE_FLOAT32_C( -348.16), SIMDE_FLOAT32_C( -789.89), SIMDE_FLOAT32_C( 496.52), SIMDE_FLOAT32_C( -787.43), SIMDE_FLOAT32_C( -220.80), SIMDE_FLOAT32_C( 16.81), SIMDE_FLOAT32_C( -96.74), SIMDE_FLOAT32_C( 136.36), SIMDE_FLOAT32_C( -815.85) } }, { { SIMDE_FLOAT32_C( -485.36), SIMDE_FLOAT32_C( -285.81), SIMDE_FLOAT32_C( -356.40), SIMDE_FLOAT32_C( -29.00), SIMDE_FLOAT32_C( -596.70), SIMDE_FLOAT32_C( 354.84), SIMDE_FLOAT32_C( 471.48), SIMDE_FLOAT32_C( -230.19), SIMDE_FLOAT32_C( 148.51), SIMDE_FLOAT32_C( -933.96), SIMDE_FLOAT32_C( 929.12), SIMDE_FLOAT32_C( -166.34), SIMDE_FLOAT32_C( 538.39), SIMDE_FLOAT32_C( -591.75), SIMDE_FLOAT32_C( -416.67), SIMDE_FLOAT32_C( -836.97) }, UINT8_C(155), { SIMDE_FLOAT32_C( -844.89), SIMDE_FLOAT32_C( -477.80), SIMDE_FLOAT32_C( 286.48), SIMDE_FLOAT32_C( -952.04), SIMDE_FLOAT32_C( 91.48), SIMDE_FLOAT32_C( 938.32), SIMDE_FLOAT32_C( -846.63), SIMDE_FLOAT32_C( -560.04), SIMDE_FLOAT32_C( 606.97), SIMDE_FLOAT32_C( 162.05), SIMDE_FLOAT32_C( -75.19), SIMDE_FLOAT32_C( 459.76), SIMDE_FLOAT32_C( -527.12), SIMDE_FLOAT32_C( 115.44), SIMDE_FLOAT32_C( 974.40), SIMDE_FLOAT32_C( 187.07) }, { SIMDE_FLOAT32_C( 759.04), SIMDE_FLOAT32_C( -54.60), SIMDE_FLOAT32_C( 590.37), SIMDE_FLOAT32_C( 113.88), SIMDE_FLOAT32_C( -583.12), SIMDE_FLOAT32_C( -639.81), SIMDE_FLOAT32_C( -737.62), SIMDE_FLOAT32_C( -517.08), SIMDE_FLOAT32_C( -710.70), SIMDE_FLOAT32_C( 96.05), SIMDE_FLOAT32_C( -978.69), SIMDE_FLOAT32_C( -302.45), SIMDE_FLOAT32_C( 679.38), SIMDE_FLOAT32_C( -815.66), SIMDE_FLOAT32_C( -171.28), SIMDE_FLOAT32_C( 834.49) }, { SIMDE_FLOAT32_C( 286.48), SIMDE_FLOAT32_C( 590.37), SIMDE_FLOAT32_C( -356.40), SIMDE_FLOAT32_C( 113.88), SIMDE_FLOAT32_C( -846.63), SIMDE_FLOAT32_C( 354.84), SIMDE_FLOAT32_C( 471.48), SIMDE_FLOAT32_C( -517.08), SIMDE_FLOAT32_C( 148.51), SIMDE_FLOAT32_C( -933.96), SIMDE_FLOAT32_C( 929.12), SIMDE_FLOAT32_C( -166.34), SIMDE_FLOAT32_C( 538.39), SIMDE_FLOAT32_C( -591.75), SIMDE_FLOAT32_C( -416.67), SIMDE_FLOAT32_C( -836.97) } }, { { SIMDE_FLOAT32_C( -293.47), SIMDE_FLOAT32_C( -884.79), SIMDE_FLOAT32_C( 882.45), SIMDE_FLOAT32_C( 798.01), SIMDE_FLOAT32_C( -946.47), SIMDE_FLOAT32_C( -964.17), SIMDE_FLOAT32_C( -762.02), SIMDE_FLOAT32_C( 660.49), SIMDE_FLOAT32_C( 197.88), SIMDE_FLOAT32_C( 162.78), SIMDE_FLOAT32_C( 120.25), SIMDE_FLOAT32_C( 670.75), SIMDE_FLOAT32_C( -721.78), SIMDE_FLOAT32_C( 94.65), SIMDE_FLOAT32_C( -142.18), SIMDE_FLOAT32_C( -962.74) }, UINT8_C(128), { SIMDE_FLOAT32_C( -551.81), SIMDE_FLOAT32_C( 151.14), SIMDE_FLOAT32_C( -543.07), SIMDE_FLOAT32_C( -191.62), SIMDE_FLOAT32_C( 413.52), SIMDE_FLOAT32_C( -60.15), SIMDE_FLOAT32_C( 97.68), SIMDE_FLOAT32_C( -490.43), SIMDE_FLOAT32_C( -38.84), SIMDE_FLOAT32_C( 795.24), SIMDE_FLOAT32_C( -811.05), SIMDE_FLOAT32_C( 145.50), SIMDE_FLOAT32_C( -376.04), SIMDE_FLOAT32_C( -976.56), SIMDE_FLOAT32_C( 852.03), SIMDE_FLOAT32_C( -260.84) }, { SIMDE_FLOAT32_C( 905.89), SIMDE_FLOAT32_C( 650.04), SIMDE_FLOAT32_C( -207.31), SIMDE_FLOAT32_C( 941.71), SIMDE_FLOAT32_C( 888.02), SIMDE_FLOAT32_C( -546.82), SIMDE_FLOAT32_C( 139.59), SIMDE_FLOAT32_C( 50.80), SIMDE_FLOAT32_C( 573.44), SIMDE_FLOAT32_C( -189.66), SIMDE_FLOAT32_C( 329.03), SIMDE_FLOAT32_C( -331.91), SIMDE_FLOAT32_C( 668.17), SIMDE_FLOAT32_C( 366.29), SIMDE_FLOAT32_C( -291.87), SIMDE_FLOAT32_C( -883.64) }, { SIMDE_FLOAT32_C( -293.47), SIMDE_FLOAT32_C( -884.79), SIMDE_FLOAT32_C( 882.45), SIMDE_FLOAT32_C( 798.01), SIMDE_FLOAT32_C( -946.47), SIMDE_FLOAT32_C( -964.17), SIMDE_FLOAT32_C( -762.02), SIMDE_FLOAT32_C( 50.80), SIMDE_FLOAT32_C( 197.88), SIMDE_FLOAT32_C( 162.78), SIMDE_FLOAT32_C( 120.25), SIMDE_FLOAT32_C( 670.75), SIMDE_FLOAT32_C( -721.78), SIMDE_FLOAT32_C( 94.65), SIMDE_FLOAT32_C( -142.18), SIMDE_FLOAT32_C( -962.74) } }, { { SIMDE_FLOAT32_C( -482.57), SIMDE_FLOAT32_C( 165.06), SIMDE_FLOAT32_C( -75.26), SIMDE_FLOAT32_C( 930.95), SIMDE_FLOAT32_C( -895.09), SIMDE_FLOAT32_C( -977.57), SIMDE_FLOAT32_C( -559.48), SIMDE_FLOAT32_C( 66.06), SIMDE_FLOAT32_C( 817.66), SIMDE_FLOAT32_C( -370.53), SIMDE_FLOAT32_C( -788.44), SIMDE_FLOAT32_C( -558.38), SIMDE_FLOAT32_C( -347.09), SIMDE_FLOAT32_C( -936.41), SIMDE_FLOAT32_C( 180.79), SIMDE_FLOAT32_C( -441.20) }, UINT8_C(109), { SIMDE_FLOAT32_C( 973.48), SIMDE_FLOAT32_C( -499.49), SIMDE_FLOAT32_C( 601.65), SIMDE_FLOAT32_C( -573.34), SIMDE_FLOAT32_C( 640.10), SIMDE_FLOAT32_C( -347.55), SIMDE_FLOAT32_C( -999.90), SIMDE_FLOAT32_C( -549.55), SIMDE_FLOAT32_C( 981.48), SIMDE_FLOAT32_C( -331.82), SIMDE_FLOAT32_C( -881.39), SIMDE_FLOAT32_C( 347.77), SIMDE_FLOAT32_C( 376.32), SIMDE_FLOAT32_C( -765.03), SIMDE_FLOAT32_C( 865.20), SIMDE_FLOAT32_C( -458.63) }, { SIMDE_FLOAT32_C( 159.72), SIMDE_FLOAT32_C( 796.15), SIMDE_FLOAT32_C( -353.72), SIMDE_FLOAT32_C( 182.14), SIMDE_FLOAT32_C( -763.33), SIMDE_FLOAT32_C( 712.34), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( -133.87), SIMDE_FLOAT32_C( 923.90), SIMDE_FLOAT32_C( 441.43), SIMDE_FLOAT32_C( 519.04), SIMDE_FLOAT32_C( 987.49), SIMDE_FLOAT32_C( -377.78), SIMDE_FLOAT32_C( -922.16), SIMDE_FLOAT32_C( 701.12), SIMDE_FLOAT32_C( -404.31) }, { SIMDE_FLOAT32_C( 601.65), SIMDE_FLOAT32_C( 165.06), SIMDE_FLOAT32_C( -573.34), SIMDE_FLOAT32_C( 182.14), SIMDE_FLOAT32_C( -895.09), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( -549.55), SIMDE_FLOAT32_C( 66.06), SIMDE_FLOAT32_C( 817.66), SIMDE_FLOAT32_C( -370.53), SIMDE_FLOAT32_C( -788.44), SIMDE_FLOAT32_C( -558.38), SIMDE_FLOAT32_C( -347.09), SIMDE_FLOAT32_C( -936.41), SIMDE_FLOAT32_C( 180.79), SIMDE_FLOAT32_C( -441.20) } }, { { SIMDE_FLOAT32_C( -421.65), SIMDE_FLOAT32_C( 302.77), SIMDE_FLOAT32_C( 22.35), SIMDE_FLOAT32_C( -781.55), SIMDE_FLOAT32_C( 955.22), SIMDE_FLOAT32_C( 22.45), SIMDE_FLOAT32_C( -331.11), SIMDE_FLOAT32_C( 936.70), SIMDE_FLOAT32_C( 690.63), SIMDE_FLOAT32_C( -212.49), SIMDE_FLOAT32_C( 284.46), SIMDE_FLOAT32_C( 66.95), SIMDE_FLOAT32_C( 22.48), SIMDE_FLOAT32_C( 149.66), SIMDE_FLOAT32_C( 608.33), SIMDE_FLOAT32_C( -817.80) }, UINT8_C(168), { SIMDE_FLOAT32_C( -745.39), SIMDE_FLOAT32_C( 364.34), SIMDE_FLOAT32_C( 182.47), SIMDE_FLOAT32_C( 966.95), SIMDE_FLOAT32_C( -635.85), SIMDE_FLOAT32_C( -951.39), SIMDE_FLOAT32_C( 890.85), SIMDE_FLOAT32_C( 805.58), SIMDE_FLOAT32_C( 567.65), SIMDE_FLOAT32_C( 878.34), SIMDE_FLOAT32_C( -572.21), SIMDE_FLOAT32_C( 645.49), SIMDE_FLOAT32_C( 579.46), SIMDE_FLOAT32_C( 23.49), SIMDE_FLOAT32_C( -776.17), SIMDE_FLOAT32_C( -117.78) }, { SIMDE_FLOAT32_C( -954.16), SIMDE_FLOAT32_C( -557.72), SIMDE_FLOAT32_C( -162.56), SIMDE_FLOAT32_C( 68.29), SIMDE_FLOAT32_C( 111.17), SIMDE_FLOAT32_C( -225.86), SIMDE_FLOAT32_C( -241.07), SIMDE_FLOAT32_C( 898.68), SIMDE_FLOAT32_C( -941.40), SIMDE_FLOAT32_C( 825.88), SIMDE_FLOAT32_C( -78.84), SIMDE_FLOAT32_C( 208.26), SIMDE_FLOAT32_C( 434.20), SIMDE_FLOAT32_C( 103.36), SIMDE_FLOAT32_C( -845.93), SIMDE_FLOAT32_C( 688.81) }, { SIMDE_FLOAT32_C( -421.65), SIMDE_FLOAT32_C( 302.77), SIMDE_FLOAT32_C( 22.35), SIMDE_FLOAT32_C( 68.29), SIMDE_FLOAT32_C( 955.22), SIMDE_FLOAT32_C( -241.07), SIMDE_FLOAT32_C( -331.11), SIMDE_FLOAT32_C( 898.68), SIMDE_FLOAT32_C( 690.63), SIMDE_FLOAT32_C( -212.49), SIMDE_FLOAT32_C( 284.46), SIMDE_FLOAT32_C( 66.95), SIMDE_FLOAT32_C( 22.48), SIMDE_FLOAT32_C( 149.66), SIMDE_FLOAT32_C( 608.33), SIMDE_FLOAT32_C( -817.80) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 r = simde_mm512_mask_unpackhi_ps(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_maskz_unpackhi_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask16 k; const simde_float32 a[16]; const simde_float32 b[16]; const simde_float32 r[16]; } test_vec[] = { { UINT16_C(52453), { SIMDE_FLOAT32_C( -753.67), SIMDE_FLOAT32_C( -929.72), SIMDE_FLOAT32_C( 958.55), SIMDE_FLOAT32_C( -635.03), SIMDE_FLOAT32_C( 813.44), SIMDE_FLOAT32_C( -572.90), SIMDE_FLOAT32_C( -350.63), SIMDE_FLOAT32_C( -428.42), SIMDE_FLOAT32_C( -919.26), SIMDE_FLOAT32_C( -171.52), SIMDE_FLOAT32_C( 935.83), SIMDE_FLOAT32_C( 125.81), SIMDE_FLOAT32_C( 572.34), SIMDE_FLOAT32_C( -746.26), SIMDE_FLOAT32_C( 391.38), SIMDE_FLOAT32_C( -448.91) }, { SIMDE_FLOAT32_C( 293.95), SIMDE_FLOAT32_C( -500.41), SIMDE_FLOAT32_C( -983.39), SIMDE_FLOAT32_C( 919.70), SIMDE_FLOAT32_C( -282.02), SIMDE_FLOAT32_C( 527.32), SIMDE_FLOAT32_C( 354.40), SIMDE_FLOAT32_C( 182.05), SIMDE_FLOAT32_C( -816.83), SIMDE_FLOAT32_C( -162.74), SIMDE_FLOAT32_C( 223.66), SIMDE_FLOAT32_C( 371.74), SIMDE_FLOAT32_C( -962.96), SIMDE_FLOAT32_C( -571.14), SIMDE_FLOAT32_C( -839.06), SIMDE_FLOAT32_C( -716.63) }, { SIMDE_FLOAT32_C( 958.55), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -635.03), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 354.40), SIMDE_FLOAT32_C( -428.42), SIMDE_FLOAT32_C( 182.05), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 125.81), SIMDE_FLOAT32_C( 371.74), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -448.91), SIMDE_FLOAT32_C( -716.63) } }, { UINT16_C(19907), { SIMDE_FLOAT32_C( -351.66), SIMDE_FLOAT32_C( -687.42), SIMDE_FLOAT32_C( -453.41), SIMDE_FLOAT32_C( 297.70), SIMDE_FLOAT32_C( -115.84), SIMDE_FLOAT32_C( -372.67), SIMDE_FLOAT32_C( -873.82), SIMDE_FLOAT32_C( -180.01), SIMDE_FLOAT32_C( 753.15), SIMDE_FLOAT32_C( 698.52), SIMDE_FLOAT32_C( 73.73), SIMDE_FLOAT32_C( 144.52), SIMDE_FLOAT32_C( -750.38), SIMDE_FLOAT32_C( -632.32), SIMDE_FLOAT32_C( 644.11), SIMDE_FLOAT32_C( -733.77) }, { SIMDE_FLOAT32_C( -712.62), SIMDE_FLOAT32_C( -637.91), SIMDE_FLOAT32_C( 793.55), SIMDE_FLOAT32_C( 641.79), SIMDE_FLOAT32_C( 544.14), SIMDE_FLOAT32_C( 976.71), SIMDE_FLOAT32_C( -520.96), SIMDE_FLOAT32_C( -232.20), SIMDE_FLOAT32_C( 348.45), SIMDE_FLOAT32_C( -483.91), SIMDE_FLOAT32_C( 196.65), SIMDE_FLOAT32_C( 509.39), SIMDE_FLOAT32_C( -200.54), SIMDE_FLOAT32_C( 695.80), SIMDE_FLOAT32_C( 628.88), SIMDE_FLOAT32_C( 447.80) }, { SIMDE_FLOAT32_C( -453.41), SIMDE_FLOAT32_C( 793.55), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -180.01), SIMDE_FLOAT32_C( -232.20), SIMDE_FLOAT32_C( 73.73), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 144.52), SIMDE_FLOAT32_C( 509.39), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -733.77), SIMDE_FLOAT32_C( 0.00) } }, { UINT16_C(11533), { SIMDE_FLOAT32_C( -254.50), SIMDE_FLOAT32_C( -107.46), SIMDE_FLOAT32_C( -197.20), SIMDE_FLOAT32_C( -128.31), SIMDE_FLOAT32_C( 712.53), SIMDE_FLOAT32_C( -444.05), SIMDE_FLOAT32_C( -429.79), SIMDE_FLOAT32_C( -213.73), SIMDE_FLOAT32_C( 700.47), SIMDE_FLOAT32_C( -180.18), SIMDE_FLOAT32_C( 153.95), SIMDE_FLOAT32_C( 344.58), SIMDE_FLOAT32_C( 86.05), SIMDE_FLOAT32_C( 441.33), SIMDE_FLOAT32_C( 706.67), SIMDE_FLOAT32_C( -120.40) }, { SIMDE_FLOAT32_C( 83.12), SIMDE_FLOAT32_C( 250.82), SIMDE_FLOAT32_C( -143.69), SIMDE_FLOAT32_C( 562.16), SIMDE_FLOAT32_C( -981.39), SIMDE_FLOAT32_C( -795.24), SIMDE_FLOAT32_C( -921.75), SIMDE_FLOAT32_C( 215.27), SIMDE_FLOAT32_C( 714.16), SIMDE_FLOAT32_C( -122.29), SIMDE_FLOAT32_C( -88.94), SIMDE_FLOAT32_C( 343.04), SIMDE_FLOAT32_C( -674.50), SIMDE_FLOAT32_C( -80.56), SIMDE_FLOAT32_C( 518.51), SIMDE_FLOAT32_C( 71.00) }, { SIMDE_FLOAT32_C( -197.20), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -128.31), SIMDE_FLOAT32_C( 562.16), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 153.95), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 344.58), SIMDE_FLOAT32_C( 343.04), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 518.51), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { UINT16_C(60827), { SIMDE_FLOAT32_C( 942.69), SIMDE_FLOAT32_C( 524.51), SIMDE_FLOAT32_C( -122.74), SIMDE_FLOAT32_C( -487.10), SIMDE_FLOAT32_C( -689.22), SIMDE_FLOAT32_C( -422.26), SIMDE_FLOAT32_C( 332.72), SIMDE_FLOAT32_C( 464.73), SIMDE_FLOAT32_C( 922.32), SIMDE_FLOAT32_C( -581.23), SIMDE_FLOAT32_C( -93.94), SIMDE_FLOAT32_C( 629.00), SIMDE_FLOAT32_C( 298.37), SIMDE_FLOAT32_C( 989.17), SIMDE_FLOAT32_C( -120.19), SIMDE_FLOAT32_C( -845.32) }, { SIMDE_FLOAT32_C( 551.34), SIMDE_FLOAT32_C( -101.58), SIMDE_FLOAT32_C( -640.56), SIMDE_FLOAT32_C( 629.58), SIMDE_FLOAT32_C( -886.31), SIMDE_FLOAT32_C( -926.40), SIMDE_FLOAT32_C( -492.71), SIMDE_FLOAT32_C( 24.75), SIMDE_FLOAT32_C( 416.64), SIMDE_FLOAT32_C( -167.21), SIMDE_FLOAT32_C( 944.19), SIMDE_FLOAT32_C( -64.86), SIMDE_FLOAT32_C( 903.79), SIMDE_FLOAT32_C( 756.16), SIMDE_FLOAT32_C( 256.46), SIMDE_FLOAT32_C( 846.48) }, { SIMDE_FLOAT32_C( -122.74), SIMDE_FLOAT32_C( -640.56), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 629.58), SIMDE_FLOAT32_C( 332.72), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 24.75), SIMDE_FLOAT32_C( -93.94), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 629.00), SIMDE_FLOAT32_C( -64.86), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 256.46), SIMDE_FLOAT32_C( -845.32), SIMDE_FLOAT32_C( 846.48) } }, { UINT16_C( 9611), { SIMDE_FLOAT32_C( -640.62), SIMDE_FLOAT32_C( 591.45), SIMDE_FLOAT32_C( -288.54), SIMDE_FLOAT32_C( 692.10), SIMDE_FLOAT32_C( 56.18), SIMDE_FLOAT32_C( -366.22), SIMDE_FLOAT32_C( -889.14), SIMDE_FLOAT32_C( 962.24), SIMDE_FLOAT32_C( -737.23), SIMDE_FLOAT32_C( 409.23), SIMDE_FLOAT32_C( 951.41), SIMDE_FLOAT32_C( 142.58), SIMDE_FLOAT32_C( 563.90), SIMDE_FLOAT32_C( 502.75), SIMDE_FLOAT32_C( -959.00), SIMDE_FLOAT32_C( 923.35) }, { SIMDE_FLOAT32_C( 132.33), SIMDE_FLOAT32_C( -845.31), SIMDE_FLOAT32_C( 996.94), SIMDE_FLOAT32_C( 639.62), SIMDE_FLOAT32_C( 179.44), SIMDE_FLOAT32_C( 413.58), SIMDE_FLOAT32_C( -527.59), SIMDE_FLOAT32_C( 123.63), SIMDE_FLOAT32_C( -651.28), SIMDE_FLOAT32_C( -623.79), SIMDE_FLOAT32_C( -120.21), SIMDE_FLOAT32_C( 605.18), SIMDE_FLOAT32_C( -777.31), SIMDE_FLOAT32_C( -839.53), SIMDE_FLOAT32_C( 738.90), SIMDE_FLOAT32_C( -417.93) }, { SIMDE_FLOAT32_C( -288.54), SIMDE_FLOAT32_C( 996.94), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 639.62), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 123.63), SIMDE_FLOAT32_C( 951.41), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 142.58), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 738.90), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { UINT16_C(47427), { SIMDE_FLOAT32_C( -725.84), SIMDE_FLOAT32_C( -191.90), SIMDE_FLOAT32_C( 84.13), SIMDE_FLOAT32_C( -614.97), SIMDE_FLOAT32_C( -229.66), SIMDE_FLOAT32_C( 346.90), SIMDE_FLOAT32_C( 794.26), SIMDE_FLOAT32_C( -278.25), SIMDE_FLOAT32_C( -510.51), SIMDE_FLOAT32_C( 358.16), SIMDE_FLOAT32_C( -775.50), SIMDE_FLOAT32_C( -469.51), SIMDE_FLOAT32_C( 281.51), SIMDE_FLOAT32_C( 356.83), SIMDE_FLOAT32_C( -314.82), SIMDE_FLOAT32_C( 278.45) }, { SIMDE_FLOAT32_C( -3.55), SIMDE_FLOAT32_C( 864.62), SIMDE_FLOAT32_C( -307.97), SIMDE_FLOAT32_C( 468.87), SIMDE_FLOAT32_C( -11.75), SIMDE_FLOAT32_C( 40.75), SIMDE_FLOAT32_C( 845.07), SIMDE_FLOAT32_C( 868.04), SIMDE_FLOAT32_C( -354.07), SIMDE_FLOAT32_C( -932.24), SIMDE_FLOAT32_C( -971.49), SIMDE_FLOAT32_C( -615.17), SIMDE_FLOAT32_C( -350.17), SIMDE_FLOAT32_C( 780.43), SIMDE_FLOAT32_C( -164.81), SIMDE_FLOAT32_C( -76.00) }, { SIMDE_FLOAT32_C( 84.13), SIMDE_FLOAT32_C( -307.97), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -278.25), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -775.50), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -615.17), SIMDE_FLOAT32_C( -314.82), SIMDE_FLOAT32_C( -164.81), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -76.00) } }, { UINT16_C(61115), { SIMDE_FLOAT32_C( 309.02), SIMDE_FLOAT32_C( 358.87), SIMDE_FLOAT32_C( 266.23), SIMDE_FLOAT32_C( 103.28), SIMDE_FLOAT32_C( -919.38), SIMDE_FLOAT32_C( 755.71), SIMDE_FLOAT32_C( -538.56), SIMDE_FLOAT32_C( -694.88), SIMDE_FLOAT32_C( -713.79), SIMDE_FLOAT32_C( 742.95), SIMDE_FLOAT32_C( 661.95), SIMDE_FLOAT32_C( -28.61), SIMDE_FLOAT32_C( 21.40), SIMDE_FLOAT32_C( -341.60), SIMDE_FLOAT32_C( -163.99), SIMDE_FLOAT32_C( 713.43) }, { SIMDE_FLOAT32_C( -872.73), SIMDE_FLOAT32_C( 824.26), SIMDE_FLOAT32_C( -245.82), SIMDE_FLOAT32_C( 972.34), SIMDE_FLOAT32_C( 692.31), SIMDE_FLOAT32_C( 400.12), SIMDE_FLOAT32_C( -959.90), SIMDE_FLOAT32_C( 720.81), SIMDE_FLOAT32_C( 784.95), SIMDE_FLOAT32_C( -310.06), SIMDE_FLOAT32_C( 501.24), SIMDE_FLOAT32_C( -379.86), SIMDE_FLOAT32_C( 613.93), SIMDE_FLOAT32_C( -910.23), SIMDE_FLOAT32_C( -460.54), SIMDE_FLOAT32_C( -77.04) }, { SIMDE_FLOAT32_C( 266.23), SIMDE_FLOAT32_C( -245.82), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 972.34), SIMDE_FLOAT32_C( -538.56), SIMDE_FLOAT32_C( -959.90), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 720.81), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 501.24), SIMDE_FLOAT32_C( -28.61), SIMDE_FLOAT32_C( -379.86), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -460.54), SIMDE_FLOAT32_C( 713.43), SIMDE_FLOAT32_C( -77.04) } }, { UINT16_C(43528), { SIMDE_FLOAT32_C( -973.76), SIMDE_FLOAT32_C( 529.26), SIMDE_FLOAT32_C( 561.40), SIMDE_FLOAT32_C( -512.32), SIMDE_FLOAT32_C( 834.38), SIMDE_FLOAT32_C( 847.61), SIMDE_FLOAT32_C( -769.38), SIMDE_FLOAT32_C( 496.33), SIMDE_FLOAT32_C( -181.01), SIMDE_FLOAT32_C( 252.02), SIMDE_FLOAT32_C( -845.27), SIMDE_FLOAT32_C( 655.01), SIMDE_FLOAT32_C( -34.55), SIMDE_FLOAT32_C( -718.00), SIMDE_FLOAT32_C( 479.27), SIMDE_FLOAT32_C( 719.63) }, { SIMDE_FLOAT32_C( -745.66), SIMDE_FLOAT32_C( 171.58), SIMDE_FLOAT32_C( 119.74), SIMDE_FLOAT32_C( -705.55), SIMDE_FLOAT32_C( -107.61), SIMDE_FLOAT32_C( -95.31), SIMDE_FLOAT32_C( -15.62), SIMDE_FLOAT32_C( -606.37), SIMDE_FLOAT32_C( 524.83), SIMDE_FLOAT32_C( -401.68), SIMDE_FLOAT32_C( -516.59), SIMDE_FLOAT32_C( -935.71), SIMDE_FLOAT32_C( 521.28), SIMDE_FLOAT32_C( 932.05), SIMDE_FLOAT32_C( 869.98), SIMDE_FLOAT32_C( 547.51) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -705.55), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -516.59), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -935.71), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 869.98), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 547.51) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 r = simde_mm512_maskz_unpackhi_ps(test_vec[i].k, a, b); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_unpackhi_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 b[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( -303.04), SIMDE_FLOAT64_C( 484.86), SIMDE_FLOAT64_C( -578.12), SIMDE_FLOAT64_C( 269.18), SIMDE_FLOAT64_C( -655.06), SIMDE_FLOAT64_C( -192.80), SIMDE_FLOAT64_C( -504.95), SIMDE_FLOAT64_C( -13.86) }, { SIMDE_FLOAT64_C( -659.88), SIMDE_FLOAT64_C( -876.52), SIMDE_FLOAT64_C( 331.70), SIMDE_FLOAT64_C( 855.30), SIMDE_FLOAT64_C( -350.13), SIMDE_FLOAT64_C( -147.74), SIMDE_FLOAT64_C( 998.52), SIMDE_FLOAT64_C( 390.85) }, { SIMDE_FLOAT64_C( 484.86), SIMDE_FLOAT64_C( -876.52), SIMDE_FLOAT64_C( 269.18), SIMDE_FLOAT64_C( 855.30), SIMDE_FLOAT64_C( -192.80), SIMDE_FLOAT64_C( -147.74), SIMDE_FLOAT64_C( -13.86), SIMDE_FLOAT64_C( 390.85) } }, { { SIMDE_FLOAT64_C( -87.98), SIMDE_FLOAT64_C( 370.18), SIMDE_FLOAT64_C( -919.77), SIMDE_FLOAT64_C( 771.23), SIMDE_FLOAT64_C( 18.30), SIMDE_FLOAT64_C( 191.55), SIMDE_FLOAT64_C( -358.05), SIMDE_FLOAT64_C( 800.62) }, { SIMDE_FLOAT64_C( 678.98), SIMDE_FLOAT64_C( 604.48), SIMDE_FLOAT64_C( -562.21), SIMDE_FLOAT64_C( -868.39), SIMDE_FLOAT64_C( 766.01), SIMDE_FLOAT64_C( -121.09), SIMDE_FLOAT64_C( 212.43), SIMDE_FLOAT64_C( -537.02) }, { SIMDE_FLOAT64_C( 370.18), SIMDE_FLOAT64_C( 604.48), SIMDE_FLOAT64_C( 771.23), SIMDE_FLOAT64_C( -868.39), SIMDE_FLOAT64_C( 191.55), SIMDE_FLOAT64_C( -121.09), SIMDE_FLOAT64_C( 800.62), SIMDE_FLOAT64_C( -537.02) } }, { { SIMDE_FLOAT64_C( -636.22), SIMDE_FLOAT64_C( 634.32), SIMDE_FLOAT64_C( 732.16), SIMDE_FLOAT64_C( -291.28), SIMDE_FLOAT64_C( -558.48), SIMDE_FLOAT64_C( -772.79), SIMDE_FLOAT64_C( 694.86), SIMDE_FLOAT64_C( -218.36) }, { SIMDE_FLOAT64_C( -649.31), SIMDE_FLOAT64_C( 26.55), SIMDE_FLOAT64_C( -363.06), SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( 878.82), SIMDE_FLOAT64_C( -364.54), SIMDE_FLOAT64_C( -608.59), SIMDE_FLOAT64_C( -209.17) }, { SIMDE_FLOAT64_C( 634.32), SIMDE_FLOAT64_C( 26.55), SIMDE_FLOAT64_C( -291.28), SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( -772.79), SIMDE_FLOAT64_C( -364.54), SIMDE_FLOAT64_C( -218.36), SIMDE_FLOAT64_C( -209.17) } }, { { SIMDE_FLOAT64_C( -994.36), SIMDE_FLOAT64_C( -528.36), SIMDE_FLOAT64_C( -437.93), SIMDE_FLOAT64_C( 23.94), SIMDE_FLOAT64_C( 663.19), SIMDE_FLOAT64_C( 204.02), SIMDE_FLOAT64_C( -175.44), SIMDE_FLOAT64_C( 342.17) }, { SIMDE_FLOAT64_C( -191.50), SIMDE_FLOAT64_C( 262.35), SIMDE_FLOAT64_C( 473.78), SIMDE_FLOAT64_C( -425.48), SIMDE_FLOAT64_C( -858.74), SIMDE_FLOAT64_C( -313.78), SIMDE_FLOAT64_C( 37.50), SIMDE_FLOAT64_C( -494.96) }, { SIMDE_FLOAT64_C( -528.36), SIMDE_FLOAT64_C( 262.35), SIMDE_FLOAT64_C( 23.94), SIMDE_FLOAT64_C( -425.48), SIMDE_FLOAT64_C( 204.02), SIMDE_FLOAT64_C( -313.78), SIMDE_FLOAT64_C( 342.17), SIMDE_FLOAT64_C( -494.96) } }, { { SIMDE_FLOAT64_C( -679.47), SIMDE_FLOAT64_C( -230.35), SIMDE_FLOAT64_C( 213.75), SIMDE_FLOAT64_C( -237.95), SIMDE_FLOAT64_C( -3.14), SIMDE_FLOAT64_C( -91.39), SIMDE_FLOAT64_C( 543.69), SIMDE_FLOAT64_C( 347.54) }, { SIMDE_FLOAT64_C( 935.16), SIMDE_FLOAT64_C( -819.37), SIMDE_FLOAT64_C( -651.90), SIMDE_FLOAT64_C( 813.98), SIMDE_FLOAT64_C( -183.91), SIMDE_FLOAT64_C( -260.49), SIMDE_FLOAT64_C( -395.18), SIMDE_FLOAT64_C( -178.27) }, { SIMDE_FLOAT64_C( -230.35), SIMDE_FLOAT64_C( -819.37), SIMDE_FLOAT64_C( -237.95), SIMDE_FLOAT64_C( 813.98), SIMDE_FLOAT64_C( -91.39), SIMDE_FLOAT64_C( -260.49), SIMDE_FLOAT64_C( 347.54), SIMDE_FLOAT64_C( -178.27) } }, { { SIMDE_FLOAT64_C( 211.15), SIMDE_FLOAT64_C( 166.89), SIMDE_FLOAT64_C( 845.67), SIMDE_FLOAT64_C( -125.66), SIMDE_FLOAT64_C( -629.09), SIMDE_FLOAT64_C( -329.77), SIMDE_FLOAT64_C( -783.49), SIMDE_FLOAT64_C( 179.41) }, { SIMDE_FLOAT64_C( 932.58), SIMDE_FLOAT64_C( 690.29), SIMDE_FLOAT64_C( 753.93), SIMDE_FLOAT64_C( -926.16), SIMDE_FLOAT64_C( -623.49), SIMDE_FLOAT64_C( -208.57), SIMDE_FLOAT64_C( -421.12), SIMDE_FLOAT64_C( -302.96) }, { SIMDE_FLOAT64_C( 166.89), SIMDE_FLOAT64_C( 690.29), SIMDE_FLOAT64_C( -125.66), SIMDE_FLOAT64_C( -926.16), SIMDE_FLOAT64_C( -329.77), SIMDE_FLOAT64_C( -208.57), SIMDE_FLOAT64_C( 179.41), SIMDE_FLOAT64_C( -302.96) } }, { { SIMDE_FLOAT64_C( 561.08), SIMDE_FLOAT64_C( 792.63), SIMDE_FLOAT64_C( 459.09), SIMDE_FLOAT64_C( -442.06), SIMDE_FLOAT64_C( -298.76), SIMDE_FLOAT64_C( 2.78), SIMDE_FLOAT64_C( 905.48), SIMDE_FLOAT64_C( -363.60) }, { SIMDE_FLOAT64_C( 183.42), SIMDE_FLOAT64_C( -746.41), SIMDE_FLOAT64_C( -549.61), SIMDE_FLOAT64_C( 999.50), SIMDE_FLOAT64_C( -6.90), SIMDE_FLOAT64_C( 55.20), SIMDE_FLOAT64_C( -178.77), SIMDE_FLOAT64_C( -795.75) }, { SIMDE_FLOAT64_C( 792.63), SIMDE_FLOAT64_C( -746.41), SIMDE_FLOAT64_C( -442.06), SIMDE_FLOAT64_C( 999.50), SIMDE_FLOAT64_C( 2.78), SIMDE_FLOAT64_C( 55.20), SIMDE_FLOAT64_C( -363.60), SIMDE_FLOAT64_C( -795.75) } }, { { SIMDE_FLOAT64_C( -777.91), SIMDE_FLOAT64_C( -333.10), SIMDE_FLOAT64_C( 78.59), SIMDE_FLOAT64_C( -407.00), SIMDE_FLOAT64_C( 337.13), SIMDE_FLOAT64_C( 295.09), SIMDE_FLOAT64_C( 772.42), SIMDE_FLOAT64_C( 269.71) }, { SIMDE_FLOAT64_C( -14.62), SIMDE_FLOAT64_C( 526.35), SIMDE_FLOAT64_C( 343.55), SIMDE_FLOAT64_C( 361.89), SIMDE_FLOAT64_C( -682.22), SIMDE_FLOAT64_C( 922.43), SIMDE_FLOAT64_C( -941.07), SIMDE_FLOAT64_C( 878.86) }, { SIMDE_FLOAT64_C( -333.10), SIMDE_FLOAT64_C( 526.35), SIMDE_FLOAT64_C( -407.00), SIMDE_FLOAT64_C( 361.89), SIMDE_FLOAT64_C( 295.09), SIMDE_FLOAT64_C( 922.43), SIMDE_FLOAT64_C( 269.71), SIMDE_FLOAT64_C( 878.86) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__m512d r = simde_mm512_unpackhi_pd(a, b); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_unpackhi_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 b[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 622.29), SIMDE_FLOAT64_C( -234.57), SIMDE_FLOAT64_C( 242.01), SIMDE_FLOAT64_C( 603.16), SIMDE_FLOAT64_C( -763.13), SIMDE_FLOAT64_C( -189.71), SIMDE_FLOAT64_C( -905.64), SIMDE_FLOAT64_C( -228.31) }, UINT8_C(111), { SIMDE_FLOAT64_C( -284.33), SIMDE_FLOAT64_C( 76.05), SIMDE_FLOAT64_C( 485.40), SIMDE_FLOAT64_C( 792.35), SIMDE_FLOAT64_C( 520.35), SIMDE_FLOAT64_C( -375.72), SIMDE_FLOAT64_C( 317.52), SIMDE_FLOAT64_C( -414.64) }, { SIMDE_FLOAT64_C( -632.01), SIMDE_FLOAT64_C( -915.99), SIMDE_FLOAT64_C( 85.58), SIMDE_FLOAT64_C( -240.99), SIMDE_FLOAT64_C( 54.79), SIMDE_FLOAT64_C( 838.88), SIMDE_FLOAT64_C( 324.71), SIMDE_FLOAT64_C( 651.03) }, { SIMDE_FLOAT64_C( 76.05), SIMDE_FLOAT64_C( -915.99), SIMDE_FLOAT64_C( 792.35), SIMDE_FLOAT64_C( -240.99), SIMDE_FLOAT64_C( -763.13), SIMDE_FLOAT64_C( 838.88), SIMDE_FLOAT64_C( -414.64), SIMDE_FLOAT64_C( -228.31) } }, { { SIMDE_FLOAT64_C( 707.10), SIMDE_FLOAT64_C( 330.16), SIMDE_FLOAT64_C( -750.77), SIMDE_FLOAT64_C( -538.69), SIMDE_FLOAT64_C( 746.68), SIMDE_FLOAT64_C( -52.02), SIMDE_FLOAT64_C( -916.39), SIMDE_FLOAT64_C( -487.89) }, UINT8_C(246), { SIMDE_FLOAT64_C( 686.77), SIMDE_FLOAT64_C( -251.02), SIMDE_FLOAT64_C( 0.28), SIMDE_FLOAT64_C( 781.13), SIMDE_FLOAT64_C( 520.67), SIMDE_FLOAT64_C( -181.67), SIMDE_FLOAT64_C( -503.21), SIMDE_FLOAT64_C( -403.28) }, { SIMDE_FLOAT64_C( -696.27), SIMDE_FLOAT64_C( -710.85), SIMDE_FLOAT64_C( -882.93), SIMDE_FLOAT64_C( -71.98), SIMDE_FLOAT64_C( 606.67), SIMDE_FLOAT64_C( -297.57), SIMDE_FLOAT64_C( 296.01), SIMDE_FLOAT64_C( 690.68) }, { SIMDE_FLOAT64_C( 707.10), SIMDE_FLOAT64_C( -710.85), SIMDE_FLOAT64_C( 781.13), SIMDE_FLOAT64_C( -538.69), SIMDE_FLOAT64_C( -181.67), SIMDE_FLOAT64_C( -297.57), SIMDE_FLOAT64_C( -403.28), SIMDE_FLOAT64_C( 690.68) } }, { { SIMDE_FLOAT64_C( 788.01), SIMDE_FLOAT64_C( -944.98), SIMDE_FLOAT64_C( -254.53), SIMDE_FLOAT64_C( 626.89), SIMDE_FLOAT64_C( 379.74), SIMDE_FLOAT64_C( -603.50), SIMDE_FLOAT64_C( 333.99), SIMDE_FLOAT64_C( -290.11) }, UINT8_C( 94), { SIMDE_FLOAT64_C( 795.30), SIMDE_FLOAT64_C( -543.43), SIMDE_FLOAT64_C( 593.70), SIMDE_FLOAT64_C( 878.91), SIMDE_FLOAT64_C( -31.32), SIMDE_FLOAT64_C( 783.69), SIMDE_FLOAT64_C( 565.68), SIMDE_FLOAT64_C( 717.66) }, { SIMDE_FLOAT64_C( -216.03), SIMDE_FLOAT64_C( 346.80), SIMDE_FLOAT64_C( 238.33), SIMDE_FLOAT64_C( 602.30), SIMDE_FLOAT64_C( 843.60), SIMDE_FLOAT64_C( 835.06), SIMDE_FLOAT64_C( 906.03), SIMDE_FLOAT64_C( -867.26) }, { SIMDE_FLOAT64_C( 788.01), SIMDE_FLOAT64_C( 346.80), SIMDE_FLOAT64_C( 878.91), SIMDE_FLOAT64_C( 602.30), SIMDE_FLOAT64_C( 783.69), SIMDE_FLOAT64_C( -603.50), SIMDE_FLOAT64_C( 717.66), SIMDE_FLOAT64_C( -290.11) } }, { { SIMDE_FLOAT64_C( 952.12), SIMDE_FLOAT64_C( -165.95), SIMDE_FLOAT64_C( 739.41), SIMDE_FLOAT64_C( -345.45), SIMDE_FLOAT64_C( -869.94), SIMDE_FLOAT64_C( 430.09), SIMDE_FLOAT64_C( -557.44), SIMDE_FLOAT64_C( -814.92) }, UINT8_C(172), { SIMDE_FLOAT64_C( -930.55), SIMDE_FLOAT64_C( 564.82), SIMDE_FLOAT64_C( -427.95), SIMDE_FLOAT64_C( 403.44), SIMDE_FLOAT64_C( -725.29), SIMDE_FLOAT64_C( 217.77), SIMDE_FLOAT64_C( 198.74), SIMDE_FLOAT64_C( -268.72) }, { SIMDE_FLOAT64_C( -188.52), SIMDE_FLOAT64_C( 77.65), SIMDE_FLOAT64_C( 699.95), SIMDE_FLOAT64_C( -404.83), SIMDE_FLOAT64_C( -356.67), SIMDE_FLOAT64_C( 417.61), SIMDE_FLOAT64_C( 379.14), SIMDE_FLOAT64_C( 990.13) }, { SIMDE_FLOAT64_C( 952.12), SIMDE_FLOAT64_C( -165.95), SIMDE_FLOAT64_C( 403.44), SIMDE_FLOAT64_C( -404.83), SIMDE_FLOAT64_C( -869.94), SIMDE_FLOAT64_C( 417.61), SIMDE_FLOAT64_C( -557.44), SIMDE_FLOAT64_C( 990.13) } }, { { SIMDE_FLOAT64_C( -344.05), SIMDE_FLOAT64_C( -18.56), SIMDE_FLOAT64_C( 833.73), SIMDE_FLOAT64_C( -509.00), SIMDE_FLOAT64_C( -112.53), SIMDE_FLOAT64_C( 966.47), SIMDE_FLOAT64_C( -556.87), SIMDE_FLOAT64_C( 721.52) }, UINT8_C(219), { SIMDE_FLOAT64_C( 97.68), SIMDE_FLOAT64_C( 851.58), SIMDE_FLOAT64_C( 135.98), SIMDE_FLOAT64_C( 540.24), SIMDE_FLOAT64_C( -963.34), SIMDE_FLOAT64_C( 311.54), SIMDE_FLOAT64_C( 609.69), SIMDE_FLOAT64_C( 601.48) }, { SIMDE_FLOAT64_C( 883.59), SIMDE_FLOAT64_C( 13.12), SIMDE_FLOAT64_C( 876.19), SIMDE_FLOAT64_C( 101.36), SIMDE_FLOAT64_C( -788.13), SIMDE_FLOAT64_C( -392.54), SIMDE_FLOAT64_C( 912.84), SIMDE_FLOAT64_C( 289.52) }, { SIMDE_FLOAT64_C( 851.58), SIMDE_FLOAT64_C( 13.12), SIMDE_FLOAT64_C( 833.73), SIMDE_FLOAT64_C( 101.36), SIMDE_FLOAT64_C( 311.54), SIMDE_FLOAT64_C( 966.47), SIMDE_FLOAT64_C( 601.48), SIMDE_FLOAT64_C( 289.52) } }, { { SIMDE_FLOAT64_C( -692.58), SIMDE_FLOAT64_C( -491.99), SIMDE_FLOAT64_C( 932.85), SIMDE_FLOAT64_C( 725.03), SIMDE_FLOAT64_C( 887.14), SIMDE_FLOAT64_C( 922.98), SIMDE_FLOAT64_C( -619.02), SIMDE_FLOAT64_C( -131.42) }, UINT8_C(109), { SIMDE_FLOAT64_C( -128.02), SIMDE_FLOAT64_C( 756.05), SIMDE_FLOAT64_C( 723.19), SIMDE_FLOAT64_C( 315.11), SIMDE_FLOAT64_C( 477.57), SIMDE_FLOAT64_C( 429.08), SIMDE_FLOAT64_C( -587.21), SIMDE_FLOAT64_C( 329.15) }, { SIMDE_FLOAT64_C( -434.95), SIMDE_FLOAT64_C( 953.03), SIMDE_FLOAT64_C( 365.82), SIMDE_FLOAT64_C( 876.59), SIMDE_FLOAT64_C( 562.71), SIMDE_FLOAT64_C( -32.71), SIMDE_FLOAT64_C( 760.18), SIMDE_FLOAT64_C( -424.16) }, { SIMDE_FLOAT64_C( 756.05), SIMDE_FLOAT64_C( -491.99), SIMDE_FLOAT64_C( 315.11), SIMDE_FLOAT64_C( 876.59), SIMDE_FLOAT64_C( 887.14), SIMDE_FLOAT64_C( -32.71), SIMDE_FLOAT64_C( 329.15), SIMDE_FLOAT64_C( -131.42) } }, { { SIMDE_FLOAT64_C( -156.52), SIMDE_FLOAT64_C( -138.46), SIMDE_FLOAT64_C( -212.30), SIMDE_FLOAT64_C( 450.94), SIMDE_FLOAT64_C( -225.62), SIMDE_FLOAT64_C( -922.78), SIMDE_FLOAT64_C( 758.36), SIMDE_FLOAT64_C( 282.39) }, UINT8_C(216), { SIMDE_FLOAT64_C( 483.39), SIMDE_FLOAT64_C( 169.53), SIMDE_FLOAT64_C( 933.05), SIMDE_FLOAT64_C( 864.37), SIMDE_FLOAT64_C( -961.89), SIMDE_FLOAT64_C( 689.77), SIMDE_FLOAT64_C( -263.65), SIMDE_FLOAT64_C( 794.16) }, { SIMDE_FLOAT64_C( 412.95), SIMDE_FLOAT64_C( -948.54), SIMDE_FLOAT64_C( 271.73), SIMDE_FLOAT64_C( -157.97), SIMDE_FLOAT64_C( -535.75), SIMDE_FLOAT64_C( -399.12), SIMDE_FLOAT64_C( 407.08), SIMDE_FLOAT64_C( -582.72) }, { SIMDE_FLOAT64_C( -156.52), SIMDE_FLOAT64_C( -138.46), SIMDE_FLOAT64_C( -212.30), SIMDE_FLOAT64_C( -157.97), SIMDE_FLOAT64_C( 689.77), SIMDE_FLOAT64_C( -922.78), SIMDE_FLOAT64_C( 794.16), SIMDE_FLOAT64_C( -582.72) } }, { { SIMDE_FLOAT64_C( 966.70), SIMDE_FLOAT64_C( 283.67), SIMDE_FLOAT64_C( 979.99), SIMDE_FLOAT64_C( -66.01), SIMDE_FLOAT64_C( 43.85), SIMDE_FLOAT64_C( -444.18), SIMDE_FLOAT64_C( 777.47), SIMDE_FLOAT64_C( 905.40) }, UINT8_C( 28), { SIMDE_FLOAT64_C( 228.41), SIMDE_FLOAT64_C( -320.22), SIMDE_FLOAT64_C( 420.75), SIMDE_FLOAT64_C( -13.23), SIMDE_FLOAT64_C( 962.17), SIMDE_FLOAT64_C( 430.81), SIMDE_FLOAT64_C( -529.84), SIMDE_FLOAT64_C( 131.70) }, { SIMDE_FLOAT64_C( 363.87), SIMDE_FLOAT64_C( -665.47), SIMDE_FLOAT64_C( 169.81), SIMDE_FLOAT64_C( 53.63), SIMDE_FLOAT64_C( 70.89), SIMDE_FLOAT64_C( -36.03), SIMDE_FLOAT64_C( -533.41), SIMDE_FLOAT64_C( 122.35) }, { SIMDE_FLOAT64_C( 966.70), SIMDE_FLOAT64_C( 283.67), SIMDE_FLOAT64_C( -13.23), SIMDE_FLOAT64_C( 53.63), SIMDE_FLOAT64_C( 430.81), SIMDE_FLOAT64_C( -444.18), SIMDE_FLOAT64_C( 777.47), SIMDE_FLOAT64_C( 905.40) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__m512d r = simde_mm512_mask_unpackhi_pd(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_maskz_unpackhi_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 b[8]; const simde_float64 r[8]; } test_vec[] = { { UINT8_C(242), { SIMDE_FLOAT64_C( -414.51), SIMDE_FLOAT64_C( -12.29), SIMDE_FLOAT64_C( 160.05), SIMDE_FLOAT64_C( 653.04), SIMDE_FLOAT64_C( -28.81), SIMDE_FLOAT64_C( -415.64), SIMDE_FLOAT64_C( 403.48), SIMDE_FLOAT64_C( 868.73) }, { SIMDE_FLOAT64_C( -715.00), SIMDE_FLOAT64_C( -297.46), SIMDE_FLOAT64_C( -191.42), SIMDE_FLOAT64_C( 464.99), SIMDE_FLOAT64_C( 12.91), SIMDE_FLOAT64_C( 240.45), SIMDE_FLOAT64_C( 671.85), SIMDE_FLOAT64_C( 163.80) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -297.46), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -415.64), SIMDE_FLOAT64_C( 240.45), SIMDE_FLOAT64_C( 868.73), SIMDE_FLOAT64_C( 163.80) } }, { UINT8_C( 85), { SIMDE_FLOAT64_C( 676.07), SIMDE_FLOAT64_C( 842.78), SIMDE_FLOAT64_C( -171.45), SIMDE_FLOAT64_C( 899.43), SIMDE_FLOAT64_C( -868.32), SIMDE_FLOAT64_C( 497.70), SIMDE_FLOAT64_C( -644.76), SIMDE_FLOAT64_C( -32.60) }, { SIMDE_FLOAT64_C( -827.94), SIMDE_FLOAT64_C( -663.55), SIMDE_FLOAT64_C( 469.25), SIMDE_FLOAT64_C( 386.02), SIMDE_FLOAT64_C( -683.55), SIMDE_FLOAT64_C( 95.06), SIMDE_FLOAT64_C( 971.52), SIMDE_FLOAT64_C( 304.16) }, { SIMDE_FLOAT64_C( 842.78), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 899.43), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 497.70), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -32.60), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C(113), { SIMDE_FLOAT64_C( 624.56), SIMDE_FLOAT64_C( -724.64), SIMDE_FLOAT64_C( -160.53), SIMDE_FLOAT64_C( 28.03), SIMDE_FLOAT64_C( -855.91), SIMDE_FLOAT64_C( 124.47), SIMDE_FLOAT64_C( 730.57), SIMDE_FLOAT64_C( -47.33) }, { SIMDE_FLOAT64_C( -410.54), SIMDE_FLOAT64_C( -256.52), SIMDE_FLOAT64_C( -806.88), SIMDE_FLOAT64_C( -738.68), SIMDE_FLOAT64_C( 907.28), SIMDE_FLOAT64_C( -267.36), SIMDE_FLOAT64_C( 937.39), SIMDE_FLOAT64_C( 750.06) }, { SIMDE_FLOAT64_C( -724.64), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 124.47), SIMDE_FLOAT64_C( -267.36), SIMDE_FLOAT64_C( -47.33), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C(176), { SIMDE_FLOAT64_C( 836.82), SIMDE_FLOAT64_C( 881.74), SIMDE_FLOAT64_C( 58.89), SIMDE_FLOAT64_C( -807.94), SIMDE_FLOAT64_C( -150.85), SIMDE_FLOAT64_C( 230.95), SIMDE_FLOAT64_C( -471.49), SIMDE_FLOAT64_C( -681.61) }, { SIMDE_FLOAT64_C( -383.03), SIMDE_FLOAT64_C( -155.04), SIMDE_FLOAT64_C( 413.45), SIMDE_FLOAT64_C( -411.51), SIMDE_FLOAT64_C( -850.88), SIMDE_FLOAT64_C( 668.57), SIMDE_FLOAT64_C( -786.95), SIMDE_FLOAT64_C( -575.52) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 230.95), SIMDE_FLOAT64_C( 668.57), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -575.52) } }, { UINT8_C(127), { SIMDE_FLOAT64_C( 241.08), SIMDE_FLOAT64_C( -431.43), SIMDE_FLOAT64_C( 632.51), SIMDE_FLOAT64_C( -28.35), SIMDE_FLOAT64_C( 521.24), SIMDE_FLOAT64_C( -778.03), SIMDE_FLOAT64_C( 715.13), SIMDE_FLOAT64_C( 714.36) }, { SIMDE_FLOAT64_C( -516.71), SIMDE_FLOAT64_C( 622.41), SIMDE_FLOAT64_C( -553.00), SIMDE_FLOAT64_C( -579.33), SIMDE_FLOAT64_C( 372.48), SIMDE_FLOAT64_C( -991.81), SIMDE_FLOAT64_C( -742.51), SIMDE_FLOAT64_C( 254.22) }, { SIMDE_FLOAT64_C( -431.43), SIMDE_FLOAT64_C( 622.41), SIMDE_FLOAT64_C( -28.35), SIMDE_FLOAT64_C( -579.33), SIMDE_FLOAT64_C( -778.03), SIMDE_FLOAT64_C( -991.81), SIMDE_FLOAT64_C( 714.36), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C(235), { SIMDE_FLOAT64_C( -550.44), SIMDE_FLOAT64_C( -896.63), SIMDE_FLOAT64_C( -701.96), SIMDE_FLOAT64_C( -21.93), SIMDE_FLOAT64_C( -578.24), SIMDE_FLOAT64_C( -84.99), SIMDE_FLOAT64_C( 823.02), SIMDE_FLOAT64_C( 835.22) }, { SIMDE_FLOAT64_C( 503.50), SIMDE_FLOAT64_C( 972.15), SIMDE_FLOAT64_C( 503.78), SIMDE_FLOAT64_C( 716.55), SIMDE_FLOAT64_C( -603.37), SIMDE_FLOAT64_C( -988.18), SIMDE_FLOAT64_C( -42.37), SIMDE_FLOAT64_C( -34.81) }, { SIMDE_FLOAT64_C( -896.63), SIMDE_FLOAT64_C( 972.15), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 716.55), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -988.18), SIMDE_FLOAT64_C( 835.22), SIMDE_FLOAT64_C( -34.81) } }, { UINT8_C( 35), { SIMDE_FLOAT64_C( 929.28), SIMDE_FLOAT64_C( -513.57), SIMDE_FLOAT64_C( 866.30), SIMDE_FLOAT64_C( 644.41), SIMDE_FLOAT64_C( -799.21), SIMDE_FLOAT64_C( -650.41), SIMDE_FLOAT64_C( 266.82), SIMDE_FLOAT64_C( -352.22) }, { SIMDE_FLOAT64_C( -229.74), SIMDE_FLOAT64_C( -360.70), SIMDE_FLOAT64_C( -344.02), SIMDE_FLOAT64_C( 27.76), SIMDE_FLOAT64_C( 893.52), SIMDE_FLOAT64_C( 723.06), SIMDE_FLOAT64_C( 477.32), SIMDE_FLOAT64_C( 996.89) }, { SIMDE_FLOAT64_C( -513.57), SIMDE_FLOAT64_C( -360.70), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 723.06), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { UINT8_C(169), { SIMDE_FLOAT64_C( -544.62), SIMDE_FLOAT64_C( -581.34), SIMDE_FLOAT64_C( -63.90), SIMDE_FLOAT64_C( -721.59), SIMDE_FLOAT64_C( -746.13), SIMDE_FLOAT64_C( -560.39), SIMDE_FLOAT64_C( -749.45), SIMDE_FLOAT64_C( 757.66) }, { SIMDE_FLOAT64_C( -843.85), SIMDE_FLOAT64_C( -352.82), SIMDE_FLOAT64_C( 769.48), SIMDE_FLOAT64_C( 113.78), SIMDE_FLOAT64_C( 612.37), SIMDE_FLOAT64_C( 413.81), SIMDE_FLOAT64_C( 43.06), SIMDE_FLOAT64_C( -901.20) }, { SIMDE_FLOAT64_C( -581.34), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 113.78), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 413.81), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -901.20) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__m512d r = simde_mm512_maskz_unpackhi_pd(test_vec[i].k, a, b); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_unpackhi_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_unpackhi_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_unpackhi_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_unpackhi_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_unpackhi_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_unpackhi_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_unpackhi_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_unpackhi_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_unpackhi_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_unpackhi_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_unpackhi_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_unpackhi_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_unpackhi_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_unpackhi_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_unpackhi_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_unpackhi_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_unpackhi_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_unpackhi_pd) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/unpacklo.c000066400000000000000000001217271400333146700172460ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN unpacklo #include #include static int test_simde_mm512_unpacklo_epi8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int8_t a[64]; const int8_t b[64]; const int8_t r[64]; } test_vec[] = { { { INT8_C( 80), INT8_C( 87), INT8_C( 13), -INT8_C( 102), INT8_C( 22), -INT8_C( 121), -INT8_C( 61), -INT8_C( 74), INT8_C( 31), INT8_C( 47), -INT8_C( 90), -INT8_C( 58), -INT8_C( 19), -INT8_C( 27), INT8_C( 88), -INT8_C( 107), INT8_C( 92), INT8_C( 47), INT8_C( 105), -INT8_C( 121), INT8_C( 54), INT8_C( 74), -INT8_C( 28), -INT8_C( 100), INT8_C( 2), -INT8_C( 70), INT8_C( 98), INT8_C( 92), INT8_C( 28), INT8_C( 53), INT8_C( 83), INT8_C( 108), -INT8_C( 115), INT8_C( 96), INT8_C( 6), -INT8_C( 93), -INT8_C( 24), -INT8_C( 55), INT8_C( 89), INT8_C( 7), -INT8_C( 8), INT8_C( 0), -INT8_C( 51), -INT8_C( 27), -INT8_C( 27), INT8_C( 37), INT8_C( 122), INT8_C( 65), INT8_C( 85), -INT8_C( 29), -INT8_C( 56), -INT8_C( 117), INT8_C( 45), -INT8_C( 84), INT8_C( 39), INT8_C( 47), INT8_C( 103), -INT8_C( 119), -INT8_C( 116), -INT8_C( 125), -INT8_C( 66), -INT8_C( 33), -INT8_C( 17), INT8_C( 75) }, { INT8_C( 63), -INT8_C( 11), -INT8_C( 17), INT8_C( 39), -INT8_C( 66), INT8_C( 72), INT8_C( 47), -INT8_C( 74), INT8_C( 72), -INT8_C( 4), -INT8_C( 100), INT8_C( 45), INT8_C( 34), INT8_C( 22), INT8_C( 110), INT8_C( 119), -INT8_C( 6), INT8_C( 54), INT8_C( 2), INT8_C( 39), -INT8_C( 29), INT8_C( 41), INT8_C( 87), INT8_C( 74), -INT8_C( 78), -INT8_C( 29), -INT8_C( 51), INT8_C( 112), -INT8_C( 62), -INT8_C( 68), -INT8_C( 68), INT8_C( 1), -INT8_C( 79), -INT8_C( 85), INT8_C( 41), INT8_C( 111), -INT8_C( 13), INT8_C( 88), INT8_C( 37), INT8_C( 60), INT8_C( 84), -INT8_C( 63), INT8_C( 105), INT8_C( 118), -INT8_C( 40), -INT8_C( 40), -INT8_C( 19), -INT8_C( 46), INT8_C( 14), -INT8_C( 17), -INT8_C( 7), -INT8_C( 15), INT8_C( 24), INT8_C( 80), INT8_C( 59), -INT8_C( 54), INT8_C( 51), INT8_C( 8), INT8_C( 59), -INT8_C( 11), -INT8_C( 60), -INT8_C( 9), -INT8_C( 9), INT8_C( 117) }, { INT8_C( 80), INT8_C( 63), INT8_C( 87), -INT8_C( 11), INT8_C( 13), -INT8_C( 17), -INT8_C( 102), INT8_C( 39), INT8_C( 22), -INT8_C( 66), -INT8_C( 121), INT8_C( 72), -INT8_C( 61), INT8_C( 47), -INT8_C( 74), -INT8_C( 74), INT8_C( 92), -INT8_C( 6), INT8_C( 47), INT8_C( 54), INT8_C( 105), INT8_C( 2), -INT8_C( 121), INT8_C( 39), INT8_C( 54), -INT8_C( 29), INT8_C( 74), INT8_C( 41), -INT8_C( 28), INT8_C( 87), -INT8_C( 100), INT8_C( 74), -INT8_C( 115), -INT8_C( 79), INT8_C( 96), -INT8_C( 85), INT8_C( 6), INT8_C( 41), -INT8_C( 93), INT8_C( 111), -INT8_C( 24), -INT8_C( 13), -INT8_C( 55), INT8_C( 88), INT8_C( 89), INT8_C( 37), INT8_C( 7), INT8_C( 60), INT8_C( 85), INT8_C( 14), -INT8_C( 29), -INT8_C( 17), -INT8_C( 56), -INT8_C( 7), -INT8_C( 117), -INT8_C( 15), INT8_C( 45), INT8_C( 24), -INT8_C( 84), INT8_C( 80), INT8_C( 39), INT8_C( 59), INT8_C( 47), -INT8_C( 54) } }, { { -INT8_C( 94), INT8_C( 32), -INT8_C( 28), -INT8_C( 107), INT8_C( 120), INT8_C( 10), -INT8_C( 47), -INT8_C( 52), -INT8_C( 53), INT8_C( 59), INT8_C( 67), -INT8_C( 93), INT8_C( 19), INT8_C( 48), INT8_C( 117), INT8_C( 33), INT8_C( 32), INT8_C( 111), INT8_C( 19), INT8_C( 56), -INT8_C( 65), INT8_C( 78), INT8_C( 3), -INT8_C( 13), INT8_C( 87), INT8_C( 62), -INT8_C( 24), INT8_C( 27), INT8_C( 53), -INT8_C( 33), -INT8_C( 111), -INT8_C( 41), -INT8_C( 1), INT8_C( 117), INT8_C( 108), INT8_C( 119), INT8_MAX, INT8_C( 62), INT8_C( 68), INT8_C( 75), INT8_C( 121), -INT8_C( 121), -INT8_C( 18), -INT8_C( 116), -INT8_C( 73), INT8_C( 100), -INT8_C( 83), -INT8_C( 41), -INT8_C( 45), -INT8_C( 64), INT8_C( 16), -INT8_C( 110), INT8_C( 15), INT8_C( 19), -INT8_C( 123), INT8_C( 102), INT8_C( 81), INT8_C( 110), -INT8_C( 127), -INT8_C( 122), INT8_C( 77), INT8_C( 18), INT8_C( 93), INT8_C( 77) }, { -INT8_C( 120), -INT8_C( 55), -INT8_C( 60), INT8_C( 7), INT8_C( 7), INT8_C( 8), INT8_C( 82), INT8_MIN, -INT8_C( 113), INT8_C( 65), INT8_C( 12), INT8_C( 71), -INT8_C( 91), -INT8_C( 70), INT8_C( 30), INT8_C( 120), INT8_C( 122), INT8_C( 46), INT8_C( 10), -INT8_C( 119), INT8_C( 65), -INT8_C( 112), -INT8_C( 17), -INT8_C( 110), -INT8_C( 2), INT8_C( 113), INT8_C( 24), INT8_C( 75), -INT8_C( 125), INT8_C( 117), -INT8_C( 104), INT8_C( 11), INT8_C( 63), INT8_C( 93), INT8_C( 19), INT8_C( 70), INT8_C( 101), INT8_C( 101), -INT8_C( 57), -INT8_C( 11), -INT8_C( 90), -INT8_C( 45), INT8_C( 60), INT8_C( 75), -INT8_C( 115), INT8_C( 90), -INT8_C( 61), INT8_C( 8), -INT8_C( 119), -INT8_C( 50), -INT8_C( 111), -INT8_C( 54), INT8_C( 94), -INT8_C( 127), INT8_C( 93), INT8_C( 92), -INT8_C( 14), INT8_C( 117), -INT8_C( 89), INT8_C( 117), -INT8_C( 21), INT8_C( 64), -INT8_C( 127), INT8_C( 42) }, { -INT8_C( 94), -INT8_C( 120), INT8_C( 32), -INT8_C( 55), -INT8_C( 28), -INT8_C( 60), -INT8_C( 107), INT8_C( 7), INT8_C( 120), INT8_C( 7), INT8_C( 10), INT8_C( 8), -INT8_C( 47), INT8_C( 82), -INT8_C( 52), INT8_MIN, INT8_C( 32), INT8_C( 122), INT8_C( 111), INT8_C( 46), INT8_C( 19), INT8_C( 10), INT8_C( 56), -INT8_C( 119), -INT8_C( 65), INT8_C( 65), INT8_C( 78), -INT8_C( 112), INT8_C( 3), -INT8_C( 17), -INT8_C( 13), -INT8_C( 110), -INT8_C( 1), INT8_C( 63), INT8_C( 117), INT8_C( 93), INT8_C( 108), INT8_C( 19), INT8_C( 119), INT8_C( 70), INT8_MAX, INT8_C( 101), INT8_C( 62), INT8_C( 101), INT8_C( 68), -INT8_C( 57), INT8_C( 75), -INT8_C( 11), -INT8_C( 45), -INT8_C( 119), -INT8_C( 64), -INT8_C( 50), INT8_C( 16), -INT8_C( 111), -INT8_C( 110), -INT8_C( 54), INT8_C( 15), INT8_C( 94), INT8_C( 19), -INT8_C( 127), -INT8_C( 123), INT8_C( 93), INT8_C( 102), INT8_C( 92) } }, { { -INT8_C( 99), -INT8_C( 108), INT8_C( 112), INT8_C( 2), -INT8_C( 7), INT8_C( 55), -INT8_C( 9), -INT8_C( 96), INT8_C( 11), INT8_C( 51), -INT8_C( 21), -INT8_C( 104), -INT8_C( 114), -INT8_C( 81), -INT8_C( 96), INT8_C( 23), INT8_C( 125), INT8_C( 50), -INT8_C( 31), -INT8_C( 37), -INT8_C( 77), INT8_C( 62), INT8_C( 55), -INT8_C( 91), -INT8_C( 76), -INT8_C( 34), INT8_C( 26), -INT8_C( 97), INT8_C( 30), -INT8_C( 101), -INT8_C( 55), -INT8_C( 69), INT8_C( 47), INT8_C( 57), -INT8_C( 66), INT8_C( 41), INT8_C( 113), -INT8_C( 75), -INT8_C( 55), INT8_C( 124), -INT8_C( 23), -INT8_C( 76), INT8_C( 20), INT8_C( 119), INT8_C( 99), -INT8_C( 75), -INT8_C( 114), -INT8_C( 32), -INT8_C( 25), INT8_C( 111), -INT8_C( 69), -INT8_C( 102), -INT8_C( 82), -INT8_C( 14), INT8_C( 63), INT8_C( 98), -INT8_C( 47), INT8_C( 89), INT8_C( 1), -INT8_C( 17), -INT8_C( 11), -INT8_C( 54), -INT8_C( 85), INT8_C( 36) }, { INT8_C( 3), INT8_C( 105), INT8_C( 77), INT8_C( 116), INT8_C( 30), INT8_C( 22), -INT8_C( 16), INT8_C( 7), -INT8_C( 53), INT8_C( 5), INT8_C( 126), INT8_C( 46), -INT8_C( 70), INT8_C( 12), INT8_C( 15), -INT8_C( 95), INT8_C( 124), -INT8_C( 54), INT8_C( 59), INT8_C( 42), -INT8_C( 67), INT8_C( 122), -INT8_C( 116), -INT8_C( 114), -INT8_C( 45), -INT8_C( 115), INT8_C( 125), -INT8_C( 56), INT8_C( 87), INT8_C( 40), -INT8_C( 19), INT8_C( 90), -INT8_C( 111), INT8_C( 58), -INT8_C( 49), -INT8_C( 80), INT8_C( 81), -INT8_C( 65), -INT8_C( 73), INT8_C( 28), -INT8_C( 60), INT8_C( 54), INT8_C( 74), INT8_C( 126), INT8_C( 66), INT8_C( 89), INT8_C( 31), -INT8_C( 66), INT8_C( 36), INT8_C( 90), -INT8_C( 24), -INT8_C( 31), -INT8_C( 44), INT8_C( 116), INT8_C( 111), -INT8_C( 88), INT8_C( 1), -INT8_C( 20), INT8_C( 112), INT8_C( 88), INT8_C( 21), INT8_C( 93), -INT8_C( 77), -INT8_C( 90) }, { -INT8_C( 99), INT8_C( 3), -INT8_C( 108), INT8_C( 105), INT8_C( 112), INT8_C( 77), INT8_C( 2), INT8_C( 116), -INT8_C( 7), INT8_C( 30), INT8_C( 55), INT8_C( 22), -INT8_C( 9), -INT8_C( 16), -INT8_C( 96), INT8_C( 7), INT8_C( 125), INT8_C( 124), INT8_C( 50), -INT8_C( 54), -INT8_C( 31), INT8_C( 59), -INT8_C( 37), INT8_C( 42), -INT8_C( 77), -INT8_C( 67), INT8_C( 62), INT8_C( 122), INT8_C( 55), -INT8_C( 116), -INT8_C( 91), -INT8_C( 114), INT8_C( 47), -INT8_C( 111), INT8_C( 57), INT8_C( 58), -INT8_C( 66), -INT8_C( 49), INT8_C( 41), -INT8_C( 80), INT8_C( 113), INT8_C( 81), -INT8_C( 75), -INT8_C( 65), -INT8_C( 55), -INT8_C( 73), INT8_C( 124), INT8_C( 28), -INT8_C( 25), INT8_C( 36), INT8_C( 111), INT8_C( 90), -INT8_C( 69), -INT8_C( 24), -INT8_C( 102), -INT8_C( 31), -INT8_C( 82), -INT8_C( 44), -INT8_C( 14), INT8_C( 116), INT8_C( 63), INT8_C( 111), INT8_C( 98), -INT8_C( 88) } }, { { -INT8_C( 104), -INT8_C( 126), INT8_C( 86), -INT8_C( 23), INT8_C( 65), INT8_C( 14), INT8_C( 5), INT8_C( 6), INT8_C( 68), INT8_C( 79), -INT8_C( 124), -INT8_C( 122), -INT8_C( 87), -INT8_C( 92), INT8_C( 69), -INT8_C( 51), -INT8_C( 2), INT8_C( 45), -INT8_C( 82), -INT8_C( 45), -INT8_C( 94), INT8_C( 29), INT8_C( 123), -INT8_C( 93), INT8_C( 9), -INT8_C( 21), -INT8_C( 4), INT8_C( 30), INT8_C( 73), -INT8_C( 81), -INT8_C( 59), -INT8_C( 31), INT8_C( 49), INT8_C( 27), -INT8_C( 54), INT8_C( 114), INT8_C( 41), -INT8_C( 49), INT8_C( 120), INT8_C( 109), INT8_C( 30), -INT8_C( 3), -INT8_C( 12), -INT8_C( 57), -INT8_C( 95), INT8_C( 57), -INT8_C( 108), -INT8_C( 97), INT8_C( 102), INT8_C( 66), INT8_C( 114), INT8_C( 8), INT8_C( 95), -INT8_C( 19), -INT8_C( 84), INT8_C( 105), -INT8_C( 39), -INT8_C( 88), -INT8_C( 121), INT8_C( 34), INT8_C( 87), INT8_C( 76), INT8_C( 3), -INT8_C( 120) }, { INT8_C( 104), -INT8_C( 51), -INT8_C( 6), -INT8_C( 111), -INT8_C( 100), INT8_C( 115), -INT8_C( 1), -INT8_C( 70), INT8_C( 112), -INT8_C( 13), -INT8_C( 126), INT8_C( 17), INT8_C( 44), INT8_C( 22), -INT8_C( 80), -INT8_C( 110), INT8_C( 89), INT8_C( 35), -INT8_C( 101), -INT8_C( 72), INT8_C( 16), INT8_C( 71), INT8_C( 33), -INT8_C( 23), -INT8_C( 17), -INT8_C( 87), INT8_C( 11), INT8_C( 70), -INT8_C( 11), INT8_C( 14), -INT8_C( 50), INT8_C( 93), -INT8_C( 37), -INT8_C( 56), -INT8_C( 17), INT8_C( 119), INT8_C( 59), -INT8_C( 18), INT8_C( 50), -INT8_C( 85), -INT8_C( 31), -INT8_C( 76), -INT8_C( 68), INT8_C( 13), -INT8_C( 54), INT8_C( 109), -INT8_C( 97), INT8_C( 35), -INT8_C( 112), INT8_C( 58), -INT8_C( 36), -INT8_C( 96), -INT8_C( 127), -INT8_C( 3), -INT8_C( 118), INT8_C( 112), -INT8_C( 90), -INT8_C( 107), -INT8_C( 74), -INT8_C( 100), -INT8_C( 92), -INT8_C( 124), -INT8_C( 7), INT8_MAX }, { -INT8_C( 104), INT8_C( 104), -INT8_C( 126), -INT8_C( 51), INT8_C( 86), -INT8_C( 6), -INT8_C( 23), -INT8_C( 111), INT8_C( 65), -INT8_C( 100), INT8_C( 14), INT8_C( 115), INT8_C( 5), -INT8_C( 1), INT8_C( 6), -INT8_C( 70), -INT8_C( 2), INT8_C( 89), INT8_C( 45), INT8_C( 35), -INT8_C( 82), -INT8_C( 101), -INT8_C( 45), -INT8_C( 72), -INT8_C( 94), INT8_C( 16), INT8_C( 29), INT8_C( 71), INT8_C( 123), INT8_C( 33), -INT8_C( 93), -INT8_C( 23), INT8_C( 49), -INT8_C( 37), INT8_C( 27), -INT8_C( 56), -INT8_C( 54), -INT8_C( 17), INT8_C( 114), INT8_C( 119), INT8_C( 41), INT8_C( 59), -INT8_C( 49), -INT8_C( 18), INT8_C( 120), INT8_C( 50), INT8_C( 109), -INT8_C( 85), INT8_C( 102), -INT8_C( 112), INT8_C( 66), INT8_C( 58), INT8_C( 114), -INT8_C( 36), INT8_C( 8), -INT8_C( 96), INT8_C( 95), -INT8_C( 127), -INT8_C( 19), -INT8_C( 3), -INT8_C( 84), -INT8_C( 118), INT8_C( 105), INT8_C( 112) } }, { { INT8_C( 77), -INT8_C( 24), -INT8_C( 9), -INT8_C( 120), -INT8_C( 42), INT8_C( 41), INT8_C( 52), -INT8_C( 73), -INT8_C( 35), -INT8_C( 16), -INT8_C( 60), -INT8_C( 89), INT8_C( 93), INT8_C( 100), -INT8_C( 53), -INT8_C( 19), -INT8_C( 98), -INT8_C( 89), -INT8_C( 114), INT8_C( 32), -INT8_C( 92), INT8_C( 24), -INT8_C( 112), INT8_C( 75), -INT8_C( 83), INT8_C( 71), -INT8_C( 25), INT8_C( 81), -INT8_C( 53), -INT8_C( 32), -INT8_C( 47), INT8_C( 24), -INT8_C( 55), -INT8_C( 56), -INT8_C( 95), -INT8_C( 97), -INT8_C( 15), -INT8_C( 43), INT8_C( 87), -INT8_C( 50), -INT8_C( 59), INT8_C( 27), INT8_C( 117), INT8_C( 35), INT8_MAX, INT8_C( 64), INT8_C( 16), INT8_C( 30), -INT8_C( 25), -INT8_C( 98), INT8_C( 62), -INT8_C( 116), -INT8_C( 74), -INT8_C( 50), -INT8_C( 41), INT8_C( 100), INT8_C( 21), -INT8_C( 66), -INT8_C( 75), -INT8_C( 31), -INT8_C( 98), -INT8_C( 122), -INT8_C( 7), INT8_C( 103) }, { INT8_C( 78), -INT8_C( 102), INT8_C( 7), INT8_C( 63), INT8_C( 111), INT8_C( 94), INT8_C( 13), INT8_C( 53), INT8_C( 121), -INT8_C( 125), INT8_C( 88), -INT8_C( 7), -INT8_C( 61), INT8_C( 104), INT8_C( 23), -INT8_C( 85), INT8_C( 7), INT8_C( 85), INT8_C( 55), -INT8_C( 67), INT8_C( 35), INT8_C( 14), INT8_C( 33), INT8_C( 57), -INT8_C( 52), -INT8_C( 41), INT8_C( 26), INT8_C( 106), INT8_C( 93), INT8_C( 19), -INT8_C( 46), -INT8_C( 84), -INT8_C( 82), -INT8_C( 39), -INT8_C( 21), INT8_C( 29), INT8_C( 55), -INT8_C( 7), INT8_C( 82), -INT8_C( 80), INT8_C( 124), -INT8_C( 86), -INT8_C( 87), INT8_C( 63), INT8_C( 19), -INT8_C( 64), -INT8_C( 22), INT8_C( 26), INT8_C( 21), INT8_C( 33), -INT8_C( 41), INT8_C( 57), INT8_C( 47), -INT8_C( 7), INT8_C( 114), -INT8_C( 5), -INT8_C( 48), -INT8_C( 116), INT8_C( 102), INT8_C( 45), -INT8_C( 97), INT8_C( 56), -INT8_C( 39), INT8_C( 77) }, { INT8_C( 77), INT8_C( 78), -INT8_C( 24), -INT8_C( 102), -INT8_C( 9), INT8_C( 7), -INT8_C( 120), INT8_C( 63), -INT8_C( 42), INT8_C( 111), INT8_C( 41), INT8_C( 94), INT8_C( 52), INT8_C( 13), -INT8_C( 73), INT8_C( 53), -INT8_C( 98), INT8_C( 7), -INT8_C( 89), INT8_C( 85), -INT8_C( 114), INT8_C( 55), INT8_C( 32), -INT8_C( 67), -INT8_C( 92), INT8_C( 35), INT8_C( 24), INT8_C( 14), -INT8_C( 112), INT8_C( 33), INT8_C( 75), INT8_C( 57), -INT8_C( 55), -INT8_C( 82), -INT8_C( 56), -INT8_C( 39), -INT8_C( 95), -INT8_C( 21), -INT8_C( 97), INT8_C( 29), -INT8_C( 15), INT8_C( 55), -INT8_C( 43), -INT8_C( 7), INT8_C( 87), INT8_C( 82), -INT8_C( 50), -INT8_C( 80), -INT8_C( 25), INT8_C( 21), -INT8_C( 98), INT8_C( 33), INT8_C( 62), -INT8_C( 41), -INT8_C( 116), INT8_C( 57), -INT8_C( 74), INT8_C( 47), -INT8_C( 50), -INT8_C( 7), -INT8_C( 41), INT8_C( 114), INT8_C( 100), -INT8_C( 5) } }, { { INT8_C( 17), -INT8_C( 59), INT8_C( 107), INT8_C( 72), -INT8_C( 66), -INT8_C( 67), -INT8_C( 8), INT8_C( 58), INT8_C( 104), -INT8_C( 94), INT8_C( 121), INT8_C( 123), INT8_C( 98), INT8_C( 100), -INT8_C( 107), INT8_C( 120), -INT8_C( 123), INT8_C( 108), -INT8_C( 79), -INT8_C( 75), INT8_C( 101), INT8_C( 35), -INT8_C( 80), INT8_C( 53), -INT8_C( 81), INT8_C( 22), INT8_C( 99), INT8_C( 78), INT8_C( 78), INT8_C( 60), -INT8_C( 100), INT8_C( 95), INT8_C( 1), INT8_C( 7), -INT8_C( 89), -INT8_C( 65), -INT8_C( 60), -INT8_C( 96), -INT8_C( 7), INT8_C( 44), INT8_C( 66), INT8_C( 115), -INT8_C( 89), -INT8_C( 92), -INT8_C( 41), INT8_C( 60), INT8_C( 28), INT8_C( 92), -INT8_C( 87), -INT8_C( 51), INT8_C( 17), INT8_C( 14), -INT8_C( 16), -INT8_C( 62), INT8_C( 68), -INT8_C( 97), -INT8_C( 40), -INT8_C( 89), -INT8_C( 18), INT8_C( 39), -INT8_C( 29), -INT8_C( 118), -INT8_C( 122), -INT8_C( 27) }, { -INT8_C( 111), INT8_C( 46), -INT8_C( 92), INT8_C( 85), -INT8_C( 50), -INT8_C( 98), -INT8_C( 126), INT8_C( 16), INT8_C( 17), INT8_C( 41), -INT8_C( 76), -INT8_C( 24), INT8_C( 102), -INT8_C( 47), INT8_C( 68), INT8_C( 15), -INT8_C( 98), INT8_C( 86), INT8_C( 29), -INT8_C( 113), INT8_C( 24), INT8_C( 97), INT8_C( 46), -INT8_C( 16), INT8_C( 8), INT8_C( 28), INT8_C( 23), -INT8_C( 20), -INT8_C( 90), -INT8_C( 98), -INT8_C( 47), INT8_C( 55), -INT8_C( 52), INT8_C( 117), -INT8_C( 115), -INT8_C( 102), INT8_C( 19), INT8_C( 15), -INT8_C( 86), INT8_C( 36), INT8_C( 56), INT8_C( 94), INT8_C( 12), -INT8_C( 98), INT8_C( 47), INT8_C( 81), -INT8_C( 83), -INT8_C( 50), -INT8_C( 89), -INT8_C( 53), INT8_C( 93), -INT8_C( 65), INT8_C( 44), -INT8_C( 117), -INT8_C( 81), INT8_C( 53), -INT8_C( 88), -INT8_C( 57), INT8_C( 33), INT8_C( 78), INT8_C( 101), -INT8_C( 14), -INT8_C( 122), INT8_C( 49) }, { INT8_C( 17), -INT8_C( 111), -INT8_C( 59), INT8_C( 46), INT8_C( 107), -INT8_C( 92), INT8_C( 72), INT8_C( 85), -INT8_C( 66), -INT8_C( 50), -INT8_C( 67), -INT8_C( 98), -INT8_C( 8), -INT8_C( 126), INT8_C( 58), INT8_C( 16), -INT8_C( 123), -INT8_C( 98), INT8_C( 108), INT8_C( 86), -INT8_C( 79), INT8_C( 29), -INT8_C( 75), -INT8_C( 113), INT8_C( 101), INT8_C( 24), INT8_C( 35), INT8_C( 97), -INT8_C( 80), INT8_C( 46), INT8_C( 53), -INT8_C( 16), INT8_C( 1), -INT8_C( 52), INT8_C( 7), INT8_C( 117), -INT8_C( 89), -INT8_C( 115), -INT8_C( 65), -INT8_C( 102), -INT8_C( 60), INT8_C( 19), -INT8_C( 96), INT8_C( 15), -INT8_C( 7), -INT8_C( 86), INT8_C( 44), INT8_C( 36), -INT8_C( 87), -INT8_C( 89), -INT8_C( 51), -INT8_C( 53), INT8_C( 17), INT8_C( 93), INT8_C( 14), -INT8_C( 65), -INT8_C( 16), INT8_C( 44), -INT8_C( 62), -INT8_C( 117), INT8_C( 68), -INT8_C( 81), -INT8_C( 97), INT8_C( 53) } }, { { INT8_C( 103), INT8_C( 19), -INT8_C( 53), INT8_C( 123), INT8_C( 34), INT8_C( 117), -INT8_C( 97), INT8_C( 90), -INT8_C( 45), -INT8_C( 84), -INT8_C( 7), INT8_C( 3), -INT8_C( 3), -INT8_C( 90), -INT8_C( 47), -INT8_C( 92), INT8_C( 113), INT8_C( 46), INT8_C( 99), -INT8_C( 98), -INT8_C( 71), INT8_C( 18), -INT8_C( 45), INT8_C( 97), -INT8_C( 39), -INT8_C( 12), -INT8_C( 80), INT8_C( 62), -INT8_C( 26), INT8_C( 54), INT8_C( 111), INT8_C( 77), INT8_C( 73), INT8_C( 58), -INT8_C( 56), INT8_C( 107), -INT8_C( 81), INT8_C( 104), -INT8_C( 59), -INT8_C( 125), INT8_C( 20), -INT8_C( 66), -INT8_C( 122), INT8_C( 17), INT8_C( 101), INT8_C( 87), -INT8_C( 75), -INT8_C( 42), -INT8_C( 123), INT8_C( 24), INT8_C( 116), INT8_C( 62), INT8_C( 42), INT8_C( 71), -INT8_C( 96), INT8_C( 4), INT8_C( 59), INT8_C( 80), INT8_C( 66), INT8_C( 33), -INT8_C( 122), -INT8_C( 78), INT8_C( 111), -INT8_C( 49) }, { -INT8_C( 20), INT8_C( 55), INT8_C( 58), -INT8_C( 100), -INT8_C( 97), -INT8_C( 1), INT8_C( 31), -INT8_C( 77), -INT8_C( 66), -INT8_C( 91), -INT8_C( 60), INT8_C( 35), -INT8_C( 4), INT8_C( 121), -INT8_C( 7), -INT8_C( 127), -INT8_C( 111), INT8_C( 110), -INT8_C( 65), -INT8_C( 68), -INT8_C( 75), INT8_C( 95), -INT8_C( 64), -INT8_C( 15), -INT8_C( 81), INT8_C( 2), INT8_C( 18), INT8_C( 53), -INT8_C( 76), -INT8_C( 127), INT8_C( 4), -INT8_C( 95), -INT8_C( 71), INT8_C( 62), INT8_C( 61), INT8_C( 88), INT8_C( 62), INT8_C( 92), INT8_C( 12), -INT8_C( 4), INT8_C( 1), -INT8_C( 48), INT8_C( 31), -INT8_C( 3), INT8_C( 74), INT8_C( 24), INT8_C( 126), -INT8_C( 37), -INT8_C( 122), INT8_C( 61), -INT8_C( 105), INT8_C( 60), -INT8_C( 99), INT8_C( 87), INT8_C( 45), INT8_C( 76), INT8_C( 90), INT8_C( 63), -INT8_C( 126), INT8_C( 14), -INT8_C( 63), -INT8_C( 122), -INT8_C( 81), INT8_C( 122) }, { INT8_C( 103), -INT8_C( 20), INT8_C( 19), INT8_C( 55), -INT8_C( 53), INT8_C( 58), INT8_C( 123), -INT8_C( 100), INT8_C( 34), -INT8_C( 97), INT8_C( 117), -INT8_C( 1), -INT8_C( 97), INT8_C( 31), INT8_C( 90), -INT8_C( 77), INT8_C( 113), -INT8_C( 111), INT8_C( 46), INT8_C( 110), INT8_C( 99), -INT8_C( 65), -INT8_C( 98), -INT8_C( 68), -INT8_C( 71), -INT8_C( 75), INT8_C( 18), INT8_C( 95), -INT8_C( 45), -INT8_C( 64), INT8_C( 97), -INT8_C( 15), INT8_C( 73), -INT8_C( 71), INT8_C( 58), INT8_C( 62), -INT8_C( 56), INT8_C( 61), INT8_C( 107), INT8_C( 88), -INT8_C( 81), INT8_C( 62), INT8_C( 104), INT8_C( 92), -INT8_C( 59), INT8_C( 12), -INT8_C( 125), -INT8_C( 4), -INT8_C( 123), -INT8_C( 122), INT8_C( 24), INT8_C( 61), INT8_C( 116), -INT8_C( 105), INT8_C( 62), INT8_C( 60), INT8_C( 42), -INT8_C( 99), INT8_C( 71), INT8_C( 87), -INT8_C( 96), INT8_C( 45), INT8_C( 4), INT8_C( 76) } }, { { -INT8_C( 59), -INT8_C( 20), -INT8_C( 46), INT8_C( 3), INT8_C( 72), -INT8_C( 34), -INT8_C( 1), INT8_C( 73), -INT8_C( 81), INT8_C( 30), INT8_C( 70), -INT8_C( 7), INT8_C( 54), -INT8_C( 60), -INT8_C( 44), -INT8_C( 67), INT8_C( 2), INT8_C( 108), -INT8_C( 7), -INT8_C( 97), -INT8_C( 61), INT8_C( 38), -INT8_C( 21), INT8_C( 29), INT8_C( 101), INT8_C( 109), INT8_C( 44), INT8_C( 38), -INT8_C( 12), -INT8_C( 37), -INT8_C( 96), -INT8_C( 71), -INT8_C( 56), INT8_C( 115), -INT8_C( 68), INT8_C( 16), INT8_C( 81), -INT8_C( 69), INT8_C( 90), INT8_C( 0), -INT8_C( 39), -INT8_C( 96), -INT8_C( 7), INT8_C( 15), INT8_C( 101), -INT8_C( 50), -INT8_C( 52), INT8_C( 103), INT8_C( 58), -INT8_C( 59), INT8_C( 6), -INT8_C( 3), -INT8_C( 21), -INT8_C( 15), INT8_C( 27), INT8_C( 81), INT8_C( 95), INT8_C( 71), INT8_C( 119), INT8_C( 83), INT8_C( 34), INT8_C( 24), INT8_C( 12), -INT8_C( 22) }, { -INT8_C( 117), -INT8_C( 56), -INT8_C( 5), -INT8_C( 36), -INT8_C( 125), INT8_C( 85), -INT8_C( 35), INT8_C( 92), -INT8_C( 11), -INT8_C( 42), INT8_C( 107), INT8_C( 90), -INT8_C( 92), INT8_C( 56), -INT8_C( 63), -INT8_C( 34), -INT8_C( 3), -INT8_C( 57), -INT8_C( 36), -INT8_C( 23), -INT8_C( 71), -INT8_C( 9), INT8_C( 58), INT8_C( 24), INT8_C( 62), -INT8_C( 79), INT8_C( 107), INT8_C( 96), -INT8_C( 55), INT8_C( 119), INT8_C( 75), INT8_C( 84), INT8_C( 63), INT8_C( 70), INT8_C( 49), -INT8_C( 62), -INT8_C( 101), INT8_C( 14), INT8_C( 30), -INT8_C( 112), -INT8_C( 28), -INT8_C( 119), -INT8_C( 21), -INT8_C( 119), -INT8_C( 63), -INT8_C( 84), INT8_C( 103), -INT8_C( 65), INT8_C( 116), INT8_C( 67), -INT8_C( 88), INT8_C( 45), INT8_C( 58), -INT8_C( 30), INT8_C( 69), INT8_C( 120), -INT8_C( 109), -INT8_C( 80), -INT8_C( 39), INT8_C( 93), INT8_C( 39), INT8_C( 36), -INT8_C( 79), INT8_C( 102) }, { -INT8_C( 59), -INT8_C( 117), -INT8_C( 20), -INT8_C( 56), -INT8_C( 46), -INT8_C( 5), INT8_C( 3), -INT8_C( 36), INT8_C( 72), -INT8_C( 125), -INT8_C( 34), INT8_C( 85), -INT8_C( 1), -INT8_C( 35), INT8_C( 73), INT8_C( 92), INT8_C( 2), -INT8_C( 3), INT8_C( 108), -INT8_C( 57), -INT8_C( 7), -INT8_C( 36), -INT8_C( 97), -INT8_C( 23), -INT8_C( 61), -INT8_C( 71), INT8_C( 38), -INT8_C( 9), -INT8_C( 21), INT8_C( 58), INT8_C( 29), INT8_C( 24), -INT8_C( 56), INT8_C( 63), INT8_C( 115), INT8_C( 70), -INT8_C( 68), INT8_C( 49), INT8_C( 16), -INT8_C( 62), INT8_C( 81), -INT8_C( 101), -INT8_C( 69), INT8_C( 14), INT8_C( 90), INT8_C( 30), INT8_C( 0), -INT8_C( 112), INT8_C( 58), INT8_C( 116), -INT8_C( 59), INT8_C( 67), INT8_C( 6), -INT8_C( 88), -INT8_C( 3), INT8_C( 45), -INT8_C( 21), INT8_C( 58), -INT8_C( 15), -INT8_C( 30), INT8_C( 27), INT8_C( 69), INT8_C( 81), INT8_C( 120) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_unpacklo_epi8(a, b); simde_test_x86_assert_equal_i8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm512_unpacklo_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[32]; const int16_t b[32]; const int16_t r[32]; } test_vec[] = { { { -INT16_C( 31331), -INT16_C( 31002), INT16_C( 23655), INT16_C( 30198), INT16_C( 10179), -INT16_C( 9689), INT16_C( 31352), INT16_C( 27403), INT16_C( 9684), -INT16_C( 30117), -INT16_C( 8450), INT16_C( 25365), -INT16_C( 3020), -INT16_C( 21287), INT16_C( 13982), INT16_C( 15230), INT16_C( 26043), INT16_C( 8898), -INT16_C( 18239), -INT16_C( 31593), -INT16_C( 16417), INT16_C( 22367), INT16_C( 27193), INT16_C( 3779), INT16_C( 7824), -INT16_C( 29032), -INT16_C( 20996), INT16_C( 12530), -INT16_C( 13406), INT16_C( 16604), INT16_C( 23297), -INT16_C( 17029) }, { INT16_C( 15808), -INT16_C( 32289), INT16_C( 30709), -INT16_C( 11258), INT16_C( 25910), INT16_C( 28460), -INT16_C( 4145), INT16_C( 24445), INT16_C( 5645), INT16_C( 2798), -INT16_C( 7997), INT16_C( 25914), INT16_C( 6059), -INT16_C( 21083), INT16_C( 8562), INT16_C( 12906), INT16_C( 18782), INT16_C( 21683), -INT16_C( 17984), -INT16_C( 2520), INT16_C( 21534), -INT16_C( 4506), -INT16_C( 7357), INT16_C( 20813), INT16_C( 15353), -INT16_C( 17061), -INT16_C( 27365), -INT16_C( 14558), -INT16_C( 14164), INT16_C( 7796), -INT16_C( 8471), INT16_C( 18256) }, { -INT16_C( 31331), INT16_C( 15808), -INT16_C( 31002), -INT16_C( 32289), INT16_C( 23655), INT16_C( 30709), INT16_C( 30198), -INT16_C( 11258), INT16_C( 9684), INT16_C( 5645), -INT16_C( 30117), INT16_C( 2798), -INT16_C( 8450), -INT16_C( 7997), INT16_C( 25365), INT16_C( 25914), INT16_C( 26043), INT16_C( 18782), INT16_C( 8898), INT16_C( 21683), -INT16_C( 18239), -INT16_C( 17984), -INT16_C( 31593), -INT16_C( 2520), INT16_C( 7824), INT16_C( 15353), -INT16_C( 29032), -INT16_C( 17061), -INT16_C( 20996), -INT16_C( 27365), INT16_C( 12530), -INT16_C( 14558) } }, { { INT16_C( 1063), -INT16_C( 5989), -INT16_C( 15171), -INT16_C( 8994), INT16_C( 17432), INT16_C( 23754), INT16_C( 5928), INT16_C( 8621), INT16_C( 2131), INT16_C( 28382), INT16_C( 413), INT16_C( 18997), -INT16_C( 22071), -INT16_C( 19864), -INT16_C( 18041), -INT16_C( 20487), -INT16_C( 27203), INT16_C( 31383), INT16_C( 30041), INT16_C( 29014), INT16_C( 8378), -INT16_C( 7475), INT16_C( 31288), -INT16_C( 29949), -INT16_C( 7550), INT16_C( 8441), INT16_C( 12259), -INT16_C( 21398), -INT16_C( 11560), INT16_C( 24670), INT16_C( 22411), INT16_C( 18447) }, { -INT16_C( 22804), INT16_C( 17859), INT16_C( 6427), -INT16_C( 10825), -INT16_C( 31686), INT16_C( 29367), -INT16_C( 17409), -INT16_C( 32259), -INT16_C( 2403), -INT16_C( 32607), INT16_C( 2853), -INT16_C( 468), -INT16_C( 29986), INT16_C( 26974), INT16_C( 28129), -INT16_C( 12622), INT16_C( 29971), INT16_C( 11795), -INT16_C( 13682), -INT16_C( 14332), -INT16_C( 17585), INT16_C( 20026), INT16_C( 14198), INT16_C( 5071), INT16_C( 28974), INT16_C( 21395), -INT16_C( 16516), INT16_C( 23121), -INT16_C( 20663), INT16_C( 11204), INT16_C( 30236), INT16_C( 12281) }, { INT16_C( 1063), -INT16_C( 22804), -INT16_C( 5989), INT16_C( 17859), -INT16_C( 15171), INT16_C( 6427), -INT16_C( 8994), -INT16_C( 10825), INT16_C( 2131), -INT16_C( 2403), INT16_C( 28382), -INT16_C( 32607), INT16_C( 413), INT16_C( 2853), INT16_C( 18997), -INT16_C( 468), -INT16_C( 27203), INT16_C( 29971), INT16_C( 31383), INT16_C( 11795), INT16_C( 30041), -INT16_C( 13682), INT16_C( 29014), -INT16_C( 14332), -INT16_C( 7550), INT16_C( 28974), INT16_C( 8441), INT16_C( 21395), INT16_C( 12259), -INT16_C( 16516), -INT16_C( 21398), INT16_C( 23121) } }, { { INT16_C( 3307), INT16_C( 31070), INT16_C( 25303), INT16_C( 9794), INT16_C( 31773), -INT16_C( 27532), INT16_C( 17332), -INT16_C( 7513), INT16_C( 15284), INT16_C( 12597), -INT16_C( 30726), INT16_C( 17547), INT16_C( 20278), INT16_C( 21359), INT16_C( 26821), -INT16_C( 20350), -INT16_C( 8076), INT16_C( 19242), INT16_C( 27714), INT16_C( 24689), -INT16_C( 6680), -INT16_C( 25356), -INT16_C( 25815), -INT16_C( 8834), -INT16_C( 19242), -INT16_C( 12018), -INT16_C( 26053), INT16_C( 28949), -INT16_C( 31511), -INT16_C( 20540), INT16_C( 18412), INT16_C( 24671) }, { -INT16_C( 30425), INT16_C( 27308), INT16_C( 7669), -INT16_C( 8502), -INT16_C( 16893), INT16_C( 11386), -INT16_C( 1703), INT16_C( 12297), INT16_C( 6317), -INT16_C( 6143), INT16_C( 5810), -INT16_C( 25767), INT16_C( 7834), -INT16_C( 31158), -INT16_C( 21915), -INT16_C( 29466), -INT16_C( 28109), INT16_C( 10742), -INT16_C( 16208), -INT16_C( 19705), -INT16_C( 32386), -INT16_C( 10017), -INT16_C( 6022), INT16_C( 9992), INT16_C( 2304), -INT16_C( 19953), INT16_C( 26911), -INT16_C( 18098), -INT16_C( 26489), -INT16_C( 5057), INT16_C( 9538), INT16_C( 30328) }, { INT16_C( 3307), -INT16_C( 30425), INT16_C( 31070), INT16_C( 27308), INT16_C( 25303), INT16_C( 7669), INT16_C( 9794), -INT16_C( 8502), INT16_C( 15284), INT16_C( 6317), INT16_C( 12597), -INT16_C( 6143), -INT16_C( 30726), INT16_C( 5810), INT16_C( 17547), -INT16_C( 25767), -INT16_C( 8076), -INT16_C( 28109), INT16_C( 19242), INT16_C( 10742), INT16_C( 27714), -INT16_C( 16208), INT16_C( 24689), -INT16_C( 19705), -INT16_C( 19242), INT16_C( 2304), -INT16_C( 12018), -INT16_C( 19953), -INT16_C( 26053), INT16_C( 26911), INT16_C( 28949), -INT16_C( 18098) } }, { { INT16_C( 28600), INT16_C( 26783), -INT16_C( 22993), -INT16_C( 20965), -INT16_C( 1497), -INT16_C( 23930), -INT16_C( 28958), -INT16_C( 7223), -INT16_C( 9833), -INT16_C( 18795), -INT16_C( 7358), -INT16_C( 13969), -INT16_C( 20868), -INT16_C( 16715), INT16_C( 11731), -INT16_C( 29900), -INT16_C( 11364), -INT16_C( 13069), INT16_C( 3705), -INT16_C( 24198), INT16_C( 8), -INT16_C( 5309), INT16_C( 3214), INT16_C( 9678), INT16_C( 25573), INT16_C( 10203), INT16_C( 19015), -INT16_C( 15376), -INT16_C( 23048), -INT16_C( 13439), -INT16_C( 18733), INT16_C( 28503) }, { INT16_C( 19081), INT16_C( 827), -INT16_C( 19111), INT16_C( 24996), -INT16_C( 6219), INT16_C( 17228), INT16_C( 6899), -INT16_C( 9880), INT16_C( 17278), -INT16_C( 15104), -INT16_C( 3699), -INT16_C( 31352), INT16_C( 2454), INT16_C( 26961), -INT16_C( 22337), INT16_C( 18905), INT16_C( 5362), INT16_C( 19276), -INT16_C( 3894), INT16_C( 32685), -INT16_C( 1577), -INT16_C( 13629), INT16_C( 11028), -INT16_C( 27997), -INT16_C( 23441), -INT16_C( 937), -INT16_C( 8299), INT16_C( 11138), -INT16_C( 11288), -INT16_C( 22379), INT16_C( 28283), INT16_C( 28145) }, { INT16_C( 28600), INT16_C( 19081), INT16_C( 26783), INT16_C( 827), -INT16_C( 22993), -INT16_C( 19111), -INT16_C( 20965), INT16_C( 24996), -INT16_C( 9833), INT16_C( 17278), -INT16_C( 18795), -INT16_C( 15104), -INT16_C( 7358), -INT16_C( 3699), -INT16_C( 13969), -INT16_C( 31352), -INT16_C( 11364), INT16_C( 5362), -INT16_C( 13069), INT16_C( 19276), INT16_C( 3705), -INT16_C( 3894), -INT16_C( 24198), INT16_C( 32685), INT16_C( 25573), -INT16_C( 23441), INT16_C( 10203), -INT16_C( 937), INT16_C( 19015), -INT16_C( 8299), -INT16_C( 15376), INT16_C( 11138) } }, { { INT16_C( 15746), INT16_C( 19641), INT16_C( 26157), INT16_C( 1228), -INT16_C( 28833), INT16_C( 29646), INT16_C( 29370), INT16_C( 10501), INT16_C( 23574), -INT16_C( 21722), -INT16_C( 22469), INT16_C( 9430), INT16_C( 27515), -INT16_C( 2356), -INT16_C( 16935), INT16_C( 23651), INT16_C( 7418), INT16_C( 10152), INT16_C( 29826), -INT16_C( 7637), -INT16_C( 1789), -INT16_C( 16811), INT16_C( 23403), -INT16_C( 32281), INT16_C( 3511), -INT16_C( 3284), INT16_C( 949), INT16_C( 12311), -INT16_C( 7314), INT16_C( 18470), -INT16_C( 30048), -INT16_C( 25948) }, { INT16_C( 19622), INT16_C( 10689), -INT16_C( 4927), -INT16_C( 15349), INT16_C( 24805), INT16_C( 20866), INT16_C( 27323), INT16_C( 29650), -INT16_C( 137), INT16_C( 11622), INT16_C( 32002), INT16_C( 28765), -INT16_C( 31648), INT16_C( 184), INT16_C( 23566), -INT16_C( 19302), INT16_C( 23465), INT16_C( 27357), -INT16_C( 6073), INT16_C( 11310), -INT16_C( 20151), INT16_C( 1149), INT16_C( 20507), -INT16_C( 28041), -INT16_C( 8881), INT16_C( 20927), INT16_C( 7514), -INT16_C( 17727), INT16_C( 31393), -INT16_C( 20550), INT16_C( 21718), INT16_C( 32611) }, { INT16_C( 15746), INT16_C( 19622), INT16_C( 19641), INT16_C( 10689), INT16_C( 26157), -INT16_C( 4927), INT16_C( 1228), -INT16_C( 15349), INT16_C( 23574), -INT16_C( 137), -INT16_C( 21722), INT16_C( 11622), -INT16_C( 22469), INT16_C( 32002), INT16_C( 9430), INT16_C( 28765), INT16_C( 7418), INT16_C( 23465), INT16_C( 10152), INT16_C( 27357), INT16_C( 29826), -INT16_C( 6073), -INT16_C( 7637), INT16_C( 11310), INT16_C( 3511), -INT16_C( 8881), -INT16_C( 3284), INT16_C( 20927), INT16_C( 949), INT16_C( 7514), INT16_C( 12311), -INT16_C( 17727) } }, { { INT16_C( 16815), -INT16_C( 2327), INT16_C( 6185), INT16_C( 29219), -INT16_C( 24375), -INT16_C( 7049), -INT16_C( 4368), INT16_C( 16246), INT16_C( 14028), INT16_C( 9872), INT16_C( 21075), -INT16_C( 2847), -INT16_C( 25652), -INT16_C( 23901), INT16_C( 1776), -INT16_C( 24798), INT16_C( 2887), INT16_C( 29078), -INT16_C( 18141), -INT16_C( 4893), INT16_C( 23129), INT16_C( 19152), INT16_C( 18249), INT16_C( 5513), INT16_C( 6781), -INT16_C( 12229), INT16_C( 7276), INT16_C( 14532), INT16_C( 26552), -INT16_C( 22310), -INT16_C( 915), -INT16_C( 19129) }, { -INT16_C( 8952), INT16_C( 11046), INT16_C( 2454), -INT16_C( 4072), -INT16_C( 6044), -INT16_C( 21190), -INT16_C( 15569), -INT16_C( 21310), -INT16_C( 547), INT16_C( 18812), INT16_C( 16410), -INT16_C( 11647), INT16_C( 23719), INT16_C( 5498), -INT16_C( 16040), INT16_C( 24778), -INT16_C( 3937), INT16_C( 13708), -INT16_C( 23303), INT16_C( 23845), INT16_C( 24460), -INT16_C( 17398), -INT16_C( 13277), INT16_C( 104), -INT16_C( 6710), -INT16_C( 7094), -INT16_C( 13531), -INT16_C( 12874), INT16_C( 12327), -INT16_C( 32542), -INT16_C( 21263), -INT16_C( 28448) }, { INT16_C( 16815), -INT16_C( 8952), -INT16_C( 2327), INT16_C( 11046), INT16_C( 6185), INT16_C( 2454), INT16_C( 29219), -INT16_C( 4072), INT16_C( 14028), -INT16_C( 547), INT16_C( 9872), INT16_C( 18812), INT16_C( 21075), INT16_C( 16410), -INT16_C( 2847), -INT16_C( 11647), INT16_C( 2887), -INT16_C( 3937), INT16_C( 29078), INT16_C( 13708), -INT16_C( 18141), -INT16_C( 23303), -INT16_C( 4893), INT16_C( 23845), INT16_C( 6781), -INT16_C( 6710), -INT16_C( 12229), -INT16_C( 7094), INT16_C( 7276), -INT16_C( 13531), INT16_C( 14532), -INT16_C( 12874) } }, { { INT16_C( 27804), -INT16_C( 27194), -INT16_C( 5360), -INT16_C( 25101), -INT16_C( 693), INT16_C( 28249), -INT16_C( 15926), -INT16_C( 27538), -INT16_C( 18266), -INT16_C( 13192), INT16_C( 11908), -INT16_C( 21607), INT16_C( 31582), INT16_C( 20267), INT16_C( 3111), -INT16_C( 15392), -INT16_C( 22920), -INT16_C( 30376), INT16_C( 19345), -INT16_C( 9178), INT16_C( 32585), INT16_C( 4938), -INT16_C( 18112), -INT16_C( 6233), INT16_C( 8049), -INT16_C( 2637), INT16_C( 19533), -INT16_C( 21599), -INT16_C( 13113), -INT16_C( 4358), -INT16_C( 9512), INT16_C( 20913) }, { INT16_C( 2432), INT16_C( 4826), INT16_C( 85), -INT16_C( 24850), INT16_C( 14719), -INT16_C( 16463), INT16_C( 22770), INT16_C( 25510), INT16_C( 22903), -INT16_C( 15271), -INT16_C( 1371), INT16_C( 27759), INT16_C( 27078), -INT16_C( 24742), INT16_C( 2884), -INT16_C( 15120), -INT16_C( 13803), INT16_C( 27350), -INT16_C( 14902), INT16_C( 18696), -INT16_C( 17922), -INT16_C( 4088), -INT16_C( 20719), -INT16_C( 30637), -INT16_C( 21496), -INT16_C( 20916), -INT16_C( 17498), INT16_C( 27930), INT16_C( 29988), INT16_C( 26636), -INT16_C( 896), -INT16_C( 27347) }, { INT16_C( 27804), INT16_C( 2432), -INT16_C( 27194), INT16_C( 4826), -INT16_C( 5360), INT16_C( 85), -INT16_C( 25101), -INT16_C( 24850), -INT16_C( 18266), INT16_C( 22903), -INT16_C( 13192), -INT16_C( 15271), INT16_C( 11908), -INT16_C( 1371), -INT16_C( 21607), INT16_C( 27759), -INT16_C( 22920), -INT16_C( 13803), -INT16_C( 30376), INT16_C( 27350), INT16_C( 19345), -INT16_C( 14902), -INT16_C( 9178), INT16_C( 18696), INT16_C( 8049), -INT16_C( 21496), -INT16_C( 2637), -INT16_C( 20916), INT16_C( 19533), -INT16_C( 17498), -INT16_C( 21599), INT16_C( 27930) } }, { { INT16_C( 966), -INT16_C( 28417), INT16_C( 1992), -INT16_C( 14631), -INT16_C( 7744), -INT16_C( 11850), INT16_C( 2704), -INT16_C( 26279), -INT16_C( 23114), INT16_C( 23879), INT16_C( 24928), -INT16_C( 31286), -INT16_C( 10538), INT16_C( 22509), INT16_C( 6866), -INT16_C( 26388), -INT16_C( 5090), -INT16_C( 6616), INT16_C( 499), -INT16_C( 19283), INT16_C( 25570), INT16_C( 29573), -INT16_C( 8339), INT16_C( 9228), INT16_C( 21380), -INT16_C( 6783), INT16_C( 19380), -INT16_C( 29846), INT16_C( 22305), -INT16_C( 3102), -INT16_C( 12686), -INT16_C( 28533) }, { -INT16_C( 19526), -INT16_C( 20874), INT16_C( 9140), -INT16_C( 27038), -INT16_C( 6265), -INT16_C( 3063), INT16_C( 5574), INT16_C( 19224), -INT16_C( 26264), INT16_C( 7472), -INT16_C( 25884), INT16_C( 1448), -INT16_C( 29967), INT16_C( 25592), -INT16_C( 31912), INT16_C( 5107), INT16_C( 27190), -INT16_C( 5439), INT16_C( 9101), INT16_C( 5249), -INT16_C( 30198), -INT16_C( 12023), INT16_C( 8608), INT16_C( 2076), INT16_C( 19643), -INT16_C( 24795), -INT16_C( 12826), -INT16_C( 10331), -INT16_C( 25257), -INT16_C( 20421), INT16_C( 11809), INT16_C( 22467) }, { INT16_C( 966), -INT16_C( 19526), -INT16_C( 28417), -INT16_C( 20874), INT16_C( 1992), INT16_C( 9140), -INT16_C( 14631), -INT16_C( 27038), -INT16_C( 23114), -INT16_C( 26264), INT16_C( 23879), INT16_C( 7472), INT16_C( 24928), -INT16_C( 25884), -INT16_C( 31286), INT16_C( 1448), -INT16_C( 5090), INT16_C( 27190), -INT16_C( 6616), -INT16_C( 5439), INT16_C( 499), INT16_C( 9101), -INT16_C( 19283), INT16_C( 5249), INT16_C( 21380), INT16_C( 19643), -INT16_C( 6783), -INT16_C( 24795), INT16_C( 19380), -INT16_C( 12826), -INT16_C( 29846), -INT16_C( 10331) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_unpacklo_epi16(a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_unpacklo_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_unpacklo_epi16) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/xor.c000066400000000000000000002517241400333146700162430ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson * 2020 Himanshi Mathur * 2020 Hidayat Khan */ #define SIMDE_TEST_X86_AVX512_INSN xor #include #include #include static int test_simde_mm512_xor_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[16]; const int32_t b[16]; const int32_t r[16]; } test_vec[] = { { { INT32_C( 2140215653), INT32_C( 1293320897), -INT32_C( 1520002534), INT32_C( 294609697), INT32_C( 1091317370), INT32_C( 313266332), -INT32_C( 1143786207), INT32_C( 1969865590), INT32_C( 1062829004), -INT32_C( 597668763), INT32_C( 1683087224), INT32_C( 739479593), -INT32_C( 468073407), -INT32_C( 573950189), -INT32_C( 974763361), INT32_C( 527665652) }, { INT32_C( 816665849), INT32_C( 1611972509), -INT32_C( 1280024725), -INT32_C( 1519923382), -INT32_C( 1677919607), -INT32_C( 1493407047), INT32_C( 1584582304), -INT32_C( 1720041634), INT32_C( 70381333), INT32_C( 746736592), -INT32_C( 1868567415), INT32_C( 1303201364), INT32_C( 2110232181), -INT32_C( 1733384071), INT32_C( 69343053), -INT32_C( 1142043763) }, { INT32_C( 1329352092), INT32_C( 755121500), INT32_C( 382911857), -INT32_C( 1259819925), -INT32_C( 621751053), -INT32_C( 1269794267), -INT32_C( 442369151), -INT32_C( 334234584), INT32_C( 996709593), -INT32_C( 253622347), -INT32_C( 187812879), INT32_C( 1639896701), -INT32_C( 1713483212), INT32_C( 1164222314), -INT32_C( 1044095534), -INT32_C( 1533132167) } }, { { -INT32_C( 1116734570), INT32_C( 1613310165), INT32_C( 813802444), INT32_C( 429161548), -INT32_C( 1674708291), INT32_C( 1572063288), INT32_C( 653388597), -INT32_C( 887480686), INT32_C( 1388041778), -INT32_C( 190487029), INT32_C( 304982209), INT32_C( 851454399), -INT32_C( 666906362), INT32_C( 1120951633), INT32_C( 2076068325), INT32_C( 1217737327) }, { -INT32_C( 807130602), INT32_C( 590835955), INT32_C( 594477440), -INT32_C( 617573334), INT32_C( 291751240), INT32_C( 1236534164), INT32_C( 789759175), -INT32_C( 74502962), -INT32_C( 134459010), -INT32_C( 1974206140), -INT32_C( 1593126896), -INT32_C( 699738723), -INT32_C( 399365675), -INT32_C( 1537291680), INT32_C( 615597302), -INT32_C( 1491990022) }, { INT32_C( 1921765760), INT32_C( 1126062118), INT32_C( 334406220), -INT32_C( 1029379994), -INT32_C( 1924251659), INT32_C( 336055212), INT32_C( 165883890), INT32_C( 815084124), -INT32_C( 1522038964), INT32_C( 2129749839), -INT32_C( 1289258799), -INT32_C( 460653534), INT32_C( 806222035), -INT32_C( 426862799), INT32_C( 1594821907), -INT32_C( 276350059) } }, { { -INT32_C( 289806681), INT32_C( 1144433863), INT32_C( 2078393105), -INT32_C( 621193477), INT32_C( 197942373), -INT32_C( 778195138), -INT32_C( 1500149552), -INT32_C( 376058582), INT32_C( 1008015921), -INT32_C( 817846632), -INT32_C( 1418259563), -INT32_C( 1728720625), -INT32_C( 875417463), -INT32_C( 433179879), -INT32_C( 401326522), -INT32_C( 293254336) }, { -INT32_C( 1611473168), INT32_C( 1779296220), INT32_C( 1625584338), INT32_C( 1628081346), INT32_C( 1516234414), -INT32_C( 67171629), INT32_C( 1608454162), -INT32_C( 114353618), -INT32_C( 353092690), INT32_C( 1984997008), INT32_C( 1607474741), -INT32_C( 1472407712), -INT32_C( 3064222), INT32_C( 279389725), -INT32_C( 1869192019), INT32_C( 302689758) }, { INT32_C( 1900755031), INT32_C( 775637787), INT32_C( 453358019), -INT32_C( 1141646791), INT32_C( 1368636107), INT32_C( 711108077), -INT32_C( 112551742), INT32_C( 280680196), -INT32_C( 689888865), -INT32_C( 1190133752), -INT32_C( 190378592), INT32_C( 818480751), INT32_C( 872615659), -INT32_C( 158788348), INT32_C( 2021808875), -INT32_C( 57678178) } }, { { INT32_C( 419761061), INT32_C( 1205817843), -INT32_C( 1808728463), -INT32_C( 270741600), INT32_C( 62568967), -INT32_C( 1758665902), -INT32_C( 1620063715), INT32_C( 975059798), INT32_C( 672464530), -INT32_C( 1296717020), INT32_C( 968463109), -INT32_C( 943239776), INT32_C( 1563835967), INT32_C( 1537408671), INT32_C( 771343793), INT32_C( 1593811067) }, { -INT32_C( 1519602963), INT32_C( 1238941430), -INT32_C( 210338261), -INT32_C( 240423445), -INT32_C( 1881846659), INT32_C( 210890163), INT32_C( 374811810), -INT32_C( 1712882298), INT32_C( 1497831658), INT32_C( 2098029338), -INT32_C( 368469787), INT32_C( 2040566000), -INT32_C( 1929731099), INT32_C( 1291609713), -INT32_C( 1096372677), -INT32_C( 147699721) }, { -INT32_C( 1133921976), INT32_C( 235400965), INT32_C( 1732673626), INT32_C( 511156811), -INT32_C( 1938819462), -INT32_C( 1682108703), -INT32_C( 1992759617), -INT32_C( 1543944496), INT32_C( 1901323896), -INT32_C( 809973698), -INT32_C( 743437344), -INT32_C( 1100489392), -INT32_C( 775119398), INT32_C( 392075502), -INT32_C( 1822463094), -INT32_C( 1446123636) } }, { { -INT32_C( 1891676286), -INT32_C( 1841226010), INT32_C( 1540983227), -INT32_C( 1986737150), INT32_C( 397242270), INT32_C( 823916557), -INT32_C( 1551338568), INT32_C( 1077412441), INT32_C( 1885334403), -INT32_C( 1567613993), -INT32_C( 1973232663), INT32_C( 1561190391), INT32_C( 194947553), INT32_C( 332812599), INT32_C( 1009120275), INT32_C( 1926064119) }, { -INT32_C( 1175618672), -INT32_C( 1361499621), INT32_C( 2080692609), INT32_C( 36764393), -INT32_C( 844078996), INT32_C( 35128981), -INT32_C( 1577126054), -INT32_C( 1409287093), INT32_C( 1241153522), INT32_C( 1667693632), -INT32_C( 2140278031), -INT32_C( 250677594), INT32_C( 771501117), INT32_C( 1049317354), -INT32_C( 683384889), INT32_C( 1735623030) }, { INT32_C( 919745554), INT32_C( 1016607997), INT32_C( 668815418), -INT32_C( 1952172309), -INT32_C( 635629582), INT32_C( 855899800), INT32_C( 41321698), -INT32_C( 339214830), INT32_C( 967152753), -INT32_C( 1040783465), INT32_C( 168765720), -INT32_C( 1409078959), INT32_C( 643991004), INT32_C( 761077469), -INT32_C( 345920556), INT32_C( 364832385) } }, { { -INT32_C( 1727546569), -INT32_C( 2058639326), INT32_C( 1338749765), INT32_C( 373465026), -INT32_C( 671124678), -INT32_C( 1919302723), -INT32_C( 1233004256), -INT32_C( 1559179697), -INT32_C( 107798480), -INT32_C( 385456720), -INT32_C( 898044456), -INT32_C( 1625696711), -INT32_C( 376937145), -INT32_C( 1132367764), INT32_C( 902481945), INT32_C( 792056806) }, { -INT32_C( 1841227009), INT32_C( 1539669343), -INT32_C( 709735776), INT32_C( 1652239698), -INT32_C( 1711680384), -INT32_C( 1429694270), INT32_C( 525789643), INT32_C( 1259471626), INT32_C( 1193841178), INT32_C( 812285822), INT32_C( 876026405), INT32_C( 332646634), -INT32_C( 1055303881), INT32_C( 1713064637), -INT32_C( 1819389369), INT32_C( 379883641) }, { INT32_C( 189178312), -INT32_C( 561064579), -INT32_C( 1703282203), INT32_C( 1949938320), INT32_C( 1309059002), INT32_C( 659643263), -INT32_C( 1445515029), -INT32_C( 402469563), -INT32_C( 1094995414), -INT32_C( 647226674), -INT32_C( 28313603), -INT32_C( 1932910893), INT32_C( 680604272), -INT32_C( 627426607), -INT32_C( 1505453474), INT32_C( 965821343) } }, { { -INT32_C( 349030303), INT32_C( 1785046111), -INT32_C( 1042757457), INT32_C( 1737927527), INT32_C( 1161594549), INT32_C( 1160192042), -INT32_C( 184434809), -INT32_C( 1720055340), -INT32_C( 339521427), -INT32_C( 1970209792), -INT32_C( 1248287430), -INT32_C( 360789525), INT32_C( 1913349951), INT32_C( 360975298), INT32_C( 1185309231), INT32_C( 1025588088) }, { -INT32_C( 388091286), INT32_C( 1475451470), -INT32_C( 582153313), -INT32_C( 1991214562), -INT32_C( 1091731760), -INT32_C( 868334684), -INT32_C( 1166550823), -INT32_C( 734386821), -INT32_C( 499790847), -INT32_C( 1911490173), INT32_C( 998569952), INT32_C( 1668173516), INT32_C( 240093475), INT32_C( 1396415836), INT32_C( 1050460006), -INT32_C( 1184904202) }, { INT32_C( 65800715), INT32_C( 1033125393), INT32_C( 479576880), -INT32_C( 288960135), -INT32_C( 70125467), -INT32_C( 1994824818), INT32_C( 1333159774), INT32_C( 1296051375), INT32_C( 167156844), INT32_C( 75498883), -INT32_C( 1910746406), -INT32_C( 1995405529), INT32_C( 2084892188), INT32_C( 1186961054), INT32_C( 2017123657), -INT32_C( 2072055666) } }, { { INT32_C( 1950700306), -INT32_C( 237283605), -INT32_C( 1190591724), INT32_C( 2981687), -INT32_C( 576818779), -INT32_C( 20979385), INT32_C( 750065778), -INT32_C( 830997516), -INT32_C( 852723094), INT32_C( 1566901338), INT32_C( 353305803), INT32_C( 6679193), INT32_C( 1342823370), -INT32_C( 1377161447), INT32_C( 1791982968), INT32_C( 243243187) }, { -INT32_C( 928073445), -INT32_C( 35749180), -INT32_C( 356403761), -INT32_C( 863827258), INT32_C( 822300177), INT32_C( 1598338091), -INT32_C( 1771112626), INT32_C( 117434161), INT32_C( 676273718), INT32_C( 693777629), INT32_C( 320965188), INT32_C( 1812143731), -INT32_C( 1549201693), -INT32_C( 1144932349), -INT32_C( 2083228063), INT32_C( 1723727495) }, { -INT32_C( 1125389815), INT32_C( 201708591), INT32_C( 1405662939), -INT32_C( 860981775), -INT32_C( 325238860), -INT32_C( 1577364116), -INT32_C( 1159991492), -INT32_C( 930603835), -INT32_C( 446471076), INT32_C( 1950298247), INT32_C( 103714447), INT32_C( 1818686698), -INT32_C( 207563479), INT32_C( 371955482), -INT32_C( 384103655), INT32_C( 1757570612) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_castps_si512(simde_mm512_xor_ps(simde_mm512_castsi512_ps(a), simde_mm512_castsi512_ps(b))); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_xor_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t a[8]; const int64_t b[8]; const int64_t r[8]; } test_vec[] = { { { -INT64_C( 8917272025905183984), -INT64_C( 8866677502414823733), INT64_C( 1464795012987523672), INT64_C( 9076492970404562796), -INT64_C( 471420776636169871), INT64_C( 3961263463326435322), -INT64_C( 4926361178749313089), INT64_C( 6187424904041771752) }, { -INT64_C( 6187074978812186357), INT64_C( 9129308580762170105), INT64_C( 6737626754639454484), INT64_C( 8553348640616316182), -INT64_C( 7747167415395374764), INT64_C( 2270104474263871152), INT64_C( 8997904634902970919), -INT64_C( 1284389140319940514) }, { INT64_C( 3322640854544675355), -INT64_C( 413510917362488270), INT64_C( 5319635821359362892), INT64_C( 812256522989615738), INT64_C( 7856992465972914213), INT64_C( 2988204169892698442), -INT64_C( 4072109430317720168), -INT64_C( 4903636269142318410) } }, { { -INT64_C( 6505531527298525732), INT64_C( 9073156322736741803), INT64_C( 3499178133975168607), INT64_C( 8184285759661098913), INT64_C( 3319482116807104653), INT64_C( 6307874133959206792), -INT64_C( 1590393193835026219), INT64_C( 3334991353612573190) }, { -INT64_C( 189158219624270114), INT64_C( 4609744475273820848), INT64_C( 3508270164975553796), -INT64_C( 7960726155283276816), INT64_C( 2082343515212321040), INT64_C( 4039683120892358364), -INT64_C( 7716078895957568331), INT64_C( 8874843879011272792) }, { INT64_C( 6406454113495114498), INT64_C( 4761230285051520795), INT64_C( 9133478509509467), -INT64_C( 2300849903224578991), INT64_C( 3671791598235879837), INT64_C( 8036045162057386324), INT64_C( 9009225591048387680), INT64_C( 6152343202689438302) } }, { { INT64_C( 1660001668875280251), -INT64_C( 6688666841194484293), INT64_C( 866481487179763680), -INT64_C( 6014321076695304337), -INT64_C( 4943803188177761355), INT64_C( 8602557963703392155), INT64_C( 2348766465129802213), -INT64_C( 4226418528419391895) }, { -INT64_C( 276208556333754517), -INT64_C( 8793904538853466362), -INT64_C( 1578788849987643064), -INT64_C( 1646247322702001477), INT64_C( 2188768063495513738), INT64_C( 7993857117939491420), -INT64_C( 8683858962037236194), INT64_C( 8744573975962379253) }, { -INT64_C( 1503296891938387952), INT64_C( 2799218735084909245), -INT64_C( 1868607137191635288), INT64_C( 5021386112833600980), -INT64_C( 6556108815733478081), INT64_C( 1841284304668030407), -INT64_C( 6348735240143046661), -INT64_C( 4899273116385921636) } }, { { -INT64_C( 7645977387302788113), -INT64_C( 3352234231512672247), INT64_C( 7099980801611090558), INT64_C( 2943395267179314445), -INT64_C( 4607844101183449547), -INT64_C( 8977088685291950508), INT64_C( 143793199971752471), INT64_C( 1279544180712774249) }, { INT64_C( 5042683055231339073), INT64_C( 106425879644314133), -INT64_C( 7957364987157442167), INT64_C( 2375707624005304798), INT64_C( 1229740195508336126), -INT64_C( 4558434066425528247), -INT64_C( 6013628711494954681), INT64_C( 863595646972066585) }, { -INT64_C( 3449971071047439954), -INT64_C( 3458657327579413988), -INT64_C( 929447554171085833), INT64_C( 585818806074473683), -INT64_C( 3378460560296254517), INT64_C( 4888597167206592029), -INT64_C( 5947680971553895600), INT64_C( 1890879682201149808) } }, { { INT64_C( 19342552227360650), INT64_C( 9015339881571254999), INT64_C( 2111175543395945328), INT64_C( 1063893217915658645), INT64_C( 5113671542448273537), INT64_C( 294076048327577819), INT64_C( 4136299984689877214), INT64_C( 6998346057415234657) }, { -INT64_C( 5822897953418392471), -INT64_C( 2652605089542910307), -INT64_C( 6710896084442025600), INT64_C( 482490459503952710), -INT64_C( 4249188140389718191), INT64_C( 3686292135995040188), -INT64_C( 7820050545947625371), -INT64_C( 4155849880699296371) }, { -INT64_C( 5803909723175885853), -INT64_C( 6472545252534039478), -INT64_C( 4642510373644240144), INT64_C( 608429854075970259), -INT64_C( 8939450874709434416), INT64_C( 3980218559741096295), -INT64_C( 6188350928172226373), -INT64_C( 6391617066844278292) } }, { { INT64_C( 6004850654787511453), INT64_C( 5940721718117239162), INT64_C( 8299177194198841098), INT64_C( 8739678760146743174), -INT64_C( 33468914264828954), INT64_C( 4810661481483717294), -INT64_C( 6371855048832433144), INT64_C( 2475934475524100073) }, { INT64_C( 6400425042904156857), INT64_C( 7634775463157369383), -INT64_C( 8012522724089233327), INT64_C( 3036153849740553193), -INT64_C( 1737102447742047799), -INT64_C( 2861167923869488012), INT64_C( 265573619744849241), INT64_C( 8622106189528170193) }, { INT64_C( 830741991547514404), INT64_C( 4288980697190663005), -INT64_C( 2026245280167086245), INT64_C( 6010905068804162671), INT64_C( 1760215543021156911), -INT64_C( 7311046646325205286), -INT64_C( 6612095300032300719), INT64_C( 6195711379304100152) } }, { { -INT64_C( 64316270072716246), INT64_C( 1935443446427172380), -INT64_C( 1091493333936354380), -INT64_C( 6895415197380722231), -INT64_C( 1805936298755591266), INT64_C( 8997027290326566875), -INT64_C( 7447454384865349272), INT64_C( 6845659238006585079) }, { -INT64_C( 3918924663269530680), -INT64_C( 5736519009328467646), INT64_C( 3896138832546275085), INT64_C( 114649980956301072), -INT64_C( 3878013855955832913), -INT64_C( 8817041431659451413), -INT64_C( 3144109803129131596), -INT64_C( 3898751961541556299) }, { INT64_C( 3929019560870856674), -INT64_C( 6142951854191587490), -INT64_C( 4121953400583433031), -INT64_C( 6784178172063836455), INT64_C( 3233162805184420913), -INT64_C( 470539099348898768), INT64_C( 5546380282716151004), -INT64_C( 7573801740836425406) } }, { { INT64_C( 4005776973137074199), INT64_C( 7761109657262433250), -INT64_C( 4087271545684412689), INT64_C( 8176212645445862943), INT64_C( 1663312718869912950), -INT64_C( 5987727983636667819), -INT64_C( 2064163572997672891), INT64_C( 4576071772732176903) }, { INT64_C( 3480407709618747496), INT64_C( 2300011067054476999), INT64_C( 2573836082898378693), -INT64_C( 8237295633617103203), INT64_C( 8697230954808831974), -INT64_C( 7715133602037231102), INT64_C( 1008851380274702642), -INT64_C( 7795902896402923754) }, { INT64_C( 566192077266575487), INT64_C( 8385218711665306917), -INT64_C( 1945833864310642390), -INT64_C( 227289569249472382), INT64_C( 8045543724174291600), INT64_C( 4037767070791741527), -INT64_C( 1343560506274506377), -INT64_C( 6030853617239309039) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_castpd_si512(simde_mm512_xor_pd(simde_mm512_castsi512_pd(a), simde_mm512_castsi512_pd(b))); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_xor_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[16]; const int32_t b[16]; const int32_t r[16]; } test_vec[] = { { { INT32_C( 116835021), INT32_C( 228055744), -INT32_C( 583287396), -INT32_C( 1948084724), -INT32_C( 539666230), -INT32_C( 47767560), INT32_C( 757520745), -INT32_C( 166532568), INT32_C( 1828456620), INT32_C( 2138739938), INT32_C( 1566356817), INT32_C( 384384587), -INT32_C( 1879720297), INT32_C( 1200365021), INT32_C( 527741942), INT32_C( 1226147485) }, { INT32_C( 1907757455), -INT32_C( 135253851), INT32_C( 810831333), INT32_C( 575028619), -INT32_C( 642630405), INT32_C( 1327513177), -INT32_C( 1905355535), -INT32_C( 1445428454), INT32_C( 974884501), -INT32_C( 1557066818), -INT32_C( 455899560), -INT32_C( 1090118973), -INT32_C( 1348945834), -INT32_C( 385959689), INT32_C( 1719102796), -INT32_C( 2062528528) }, { INT32_C( 2000737090), -INT32_C( 93853083), -INT32_C( 311431047), -INT32_C( 1448825977), INT32_C( 107439153), -INT32_C( 1308155487), -INT32_C( 1555542632), INT32_C( 1607100722), INT32_C( 1458013753), -INT32_C( 599023780), -INT32_C( 1181797623), -INT32_C( 1443945336), INT32_C( 544016065), -INT32_C( 1351441110), INT32_C( 2030296762), -INT32_C( 872035987) } }, { { -INT32_C( 1681904675), -INT32_C( 1891700426), INT32_C( 997396855), -INT32_C( 2114291413), INT32_C( 691049010), -INT32_C( 1777258678), -INT32_C( 1929607012), -INT32_C( 1273885481), INT32_C( 1850724919), INT32_C( 1006472643), -INT32_C( 898207585), INT32_C( 457994473), INT32_C( 1279556610), INT32_C( 1206015659), -INT32_C( 1227563298), INT32_C( 577431275) }, { INT32_C( 2073082552), -INT32_C( 424243897), -INT32_C( 407819011), -INT32_C( 1643971172), INT32_C( 636110713), INT32_C( 2070727837), -INT32_C( 1775157077), -INT32_C( 558326746), -INT32_C( 1655027370), -INT32_C( 729542442), -INT32_C( 642042563), -INT32_C( 1418150606), -INT32_C( 1580178940), -INT32_C( 635618001), -INT32_C( 1536143747), INT32_C( 1082271978) }, { -INT32_C( 531590811), INT32_C( 1770552433), -INT32_C( 591250038), INT32_C( 536380599), INT32_C( 215668043), -INT32_C( 310516777), INT32_C( 449693751), INT32_C( 1789563121), -INT32_C( 216687775), -INT32_C( 277242603), INT32_C( 332219810), -INT32_C( 1338716709), -INT32_C( 309060090), -INT32_C( 1644205180), INT32_C( 312774819), INT32_C( 1659424257) } }, { { INT32_C( 1222564977), INT32_C( 706503405), -INT32_C( 922495081), -INT32_C( 1720419436), INT32_C( 221922782), -INT32_C( 1615998), -INT32_C( 1868343386), -INT32_C( 237951361), -INT32_C( 281432318), -INT32_C( 1474734831), -INT32_C( 1066328788), INT32_C( 1985602968), -INT32_C( 1400662998), -INT32_C( 1850971413), INT32_C( 1092767681), INT32_C( 2016605045) }, { -INT32_C( 1285067870), -INT32_C( 329547328), INT32_C( 917294238), -INT32_C( 592640335), -INT32_C( 2054672230), INT32_C( 1545024667), -INT32_C( 107136636), -INT32_C( 831402196), -INT32_C( 92153542), -INT32_C( 119088038), INT32_C( 1496224680), INT32_C( 859167641), -INT32_C( 1481064948), INT32_C( 1979961330), INT32_C( 879796488), -INT32_C( 1425874576) }, { -INT32_C( 71714861), -INT32_C( 968367315), -INT32_C( 5301495), INT32_C( 1171881253), -INT32_C( 2001570492), -INT32_C( 1544527079), INT32_C( 1765699106), INT32_C( 1067510099), INT32_C( 364410424), INT32_C( 1358924107), -INT32_C( 1721790844), INT32_C( 1164721665), INT32_C( 188426790), -INT32_C( 407919335), INT32_C( 1968369353), -INT32_C( 751824379) } }, { { INT32_C( 346391482), INT32_C( 118262879), -INT32_C( 1201587425), INT32_C( 585864726), INT32_C( 1204396884), INT32_C( 2076036467), -INT32_C( 558879378), -INT32_C( 913722865), -INT32_C( 1780601034), -INT32_C( 627184965), INT32_C( 1016266277), -INT32_C( 379683436), -INT32_C( 1825560544), INT32_C( 1661988341), INT32_C( 708951835), -INT32_C( 1477194895) }, { -INT32_C( 1254239750), -INT32_C( 510666052), INT32_C( 1813848536), -INT32_C( 1101693794), -INT32_C( 1722645084), -INT32_C( 1896062605), -INT32_C( 1850196704), INT32_C( 37333768), INT32_C( 968324733), INT32_C( 672810576), INT32_C( 93599847), INT32_C( 1489300148), -INT32_C( 487516561), -INT32_C( 1754206857), INT32_C( 858269738), INT32_C( 1362452948) }, { -INT32_C( 1583852992), -INT32_C( 427600157), -INT32_C( 730064185), -INT32_C( 1665209720), -INT32_C( 560257296), -INT32_C( 180245504), INT32_C( 1325928526), -INT32_C( 877651705), -INT32_C( 1402382005), -INT32_C( 225989397), INT32_C( 956745282), -INT32_C( 1315268832), INT32_C( 1908489807), -INT32_C( 193003390), INT32_C( 426350385), -INT32_C( 154752347) } }, { { INT32_C( 663416279), -INT32_C( 1689279437), -INT32_C( 1868503844), INT32_C( 1038640334), -INT32_C( 249505414), -INT32_C( 242708282), -INT32_C( 1926975047), -INT32_C( 354526958), INT32_C( 2047961158), -INT32_C( 384474868), INT32_C( 326743365), -INT32_C( 1806671334), INT32_C( 42364987), -INT32_C( 1158476287), -INT32_C( 767092801), -INT32_C( 1212406416) }, { -INT32_C( 1724789363), INT32_C( 1937917486), INT32_C( 361168123), -INT32_C( 1716856994), INT32_C( 1218129991), -INT32_C( 33386946), INT32_C( 382749093), -INT32_C( 70415250), -INT32_C( 2003435943), INT32_C( 1073420100), INT32_C( 1901429267), -INT32_C( 1609826471), INT32_C( 1844028975), -INT32_C( 630461643), -INT32_C( 1544537291), INT32_C( 564116935) }, { -INT32_C( 1095032742), -INT32_C( 389160419), -INT32_C( 2061101017), -INT32_C( 1539132528), -INT32_C( 1178867395), INT32_C( 260710136), -INT32_C( 1678444516), INT32_C( 286512508), -INT32_C( 226191841), -INT32_C( 689015224), INT32_C( 1647064918), INT32_C( 878419267), INT32_C( 1869600276), INT32_C( 1620632884), INT32_C( 1907829898), -INT32_C( 1776052041) } }, { { -INT32_C( 5688133), INT32_C( 1598006347), INT32_C( 2144375846), -INT32_C( 1038034029), -INT32_C( 1221654142), INT32_C( 697408500), -INT32_C( 1630698794), -INT32_C( 71340993), -INT32_C( 335910752), INT32_C( 860502284), INT32_C( 1622285261), INT32_C( 2015548150), -INT32_C( 802204965), -INT32_C( 1007042067), -INT32_C( 2107521469), -INT32_C( 763551694) }, { -INT32_C( 1782679416), INT32_C( 2143815857), INT32_C( 450853411), INT32_C( 680657485), INT32_C( 1106821716), -INT32_C( 972754301), -INT32_C( 347576648), INT32_C( 264095366), -INT32_C( 274433218), -INT32_C( 1485935484), INT32_C( 868306662), -INT32_C( 1537452976), -INT32_C( 1713023978), -INT32_C( 10491578), -INT32_C( 689264817), -INT32_C( 1411012755) }, { INT32_C( 1779911731), INT32_C( 553102586), INT32_C( 1695542789), -INT32_C( 357377570), -INT32_C( 153629738), -INT32_C( 275420809), INT32_C( 1971709038), -INT32_C( 201151815), INT32_C( 73274270), -INT32_C( 1809558136), INT32_C( 1400067371), -INT32_C( 595687770), INT32_C( 1237976781), INT32_C( 1017522347), INT32_C( 1418420492), INT32_C( 2040039263) } }, { { -INT32_C( 1482979037), -INT32_C( 615642635), -INT32_C( 1492185001), INT32_C( 2051763044), INT32_C( 101920959), INT32_C( 1761964570), -INT32_C( 2025853159), -INT32_C( 1187896170), -INT32_C( 1570714195), INT32_C( 780054487), INT32_C( 601263551), -INT32_C( 1214438920), INT32_C( 1824370770), INT32_C( 1003864610), INT32_C( 1220679089), -INT32_C( 419302087) }, { -INT32_C( 1719049534), -INT32_C( 792197359), -INT32_C( 1930191212), INT32_C( 289706175), INT32_C( 1652359488), INT32_C( 1973244868), -INT32_C( 1581424536), INT32_C( 394837845), INT32_C( 850465313), -INT32_C( 1375569639), -INT32_C( 700779241), -INT32_C( 974619003), INT32_C( 1143432576), INT32_C( 549045432), INT32_C( 2042787620), INT32_C( 1469139510) }, { INT32_C( 1041427425), INT32_C( 193335012), INT32_C( 737971907), INT32_C( 1796209627), INT32_C( 1684943359), INT32_C( 479732190), INT32_C( 646090609), -INT32_C( 1363502141), -INT32_C( 1865293940), -INT32_C( 2139302194), -INT32_C( 169051992), INT32_C( 1920294781), INT32_C( 681235922), INT32_C( 460064410), INT32_C( 822108821), -INT32_C( 1332691185) } }, { { INT32_C( 1988706908), -INT32_C( 769356869), INT32_C( 94920320), INT32_C( 1573556445), -INT32_C( 1365118474), -INT32_C( 623945035), INT32_C( 122917329), INT32_C( 945743067), -INT32_C( 508631258), -INT32_C( 223096206), INT32_C( 234314800), -INT32_C( 496320020), INT32_C( 1754336178), INT32_C( 927096934), -INT32_C( 868248079), -INT32_C( 1610310278) }, { -INT32_C( 142429563), -INT32_C( 1242942076), INT32_C( 2126700945), INT32_C( 1432366499), -INT32_C( 1631719112), INT32_C( 1121386321), INT32_C( 252646805), INT32_C( 934220722), INT32_C( 1244606918), -INT32_C( 117499545), -INT32_C( 1653161222), INT32_C( 670291951), INT32_C( 449229000), INT32_C( 1163697328), INT32_C( 1666476977), INT32_C( 1167786879) }, { -INT32_C( 2129923879), INT32_C( 1741602367), INT32_C( 2070593297), INT32_C( 145407358), INT32_C( 807141582), -INT32_C( 1743149596), INT32_C( 140280900), INT32_C( 267515753), -INT32_C( 1417554208), INT32_C( 172804885), -INT32_C( 1870553398), -INT32_C( 979823101), INT32_C( 1918352250), INT32_C( 1914633430), -INT32_C( 1351877056), -INT32_C( 442524155) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_xor_epi32(a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_xor_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t src[16]; const simde__mmask16 k; const int32_t a[16]; const int32_t b[16]; const int32_t r[16]; } test_vec[] = { { { INT32_C( 153880592), -INT32_C( 1121219888), INT32_C( 485857388), -INT32_C( 948181383), INT32_C( 41449270), -INT32_C( 1756100706), INT32_C( 1579137812), INT32_C( 587505939), INT32_C( 2049716394), INT32_C( 641161146), INT32_C( 1765944559), INT32_C( 1211153938), -INT32_C( 733304523), -INT32_C( 999579984), INT32_C( 1814202969), -INT32_C( 695261652) }, UINT16_C(47959), { -INT32_C( 2028859056), INT32_C( 2058551864), -INT32_C( 1690778261), INT32_C( 1464102413), -INT32_C( 1376390078), -INT32_C( 617066567), INT32_C( 1258448059), -INT32_C( 1979295430), -INT32_C( 1575937686), -INT32_C( 2044869605), -INT32_C( 1776200311), INT32_C( 99444419), INT32_C( 330555994), -INT32_C( 303043791), -INT32_C( 1992756913), -INT32_C( 1273807286) }, { INT32_C( 1918313559), INT32_C( 1912108264), -INT32_C( 1928914231), -INT32_C( 627903360), INT32_C( 149833174), INT32_C( 2146819376), INT32_C( 403189198), -INT32_C( 1010033813), INT32_C( 657793855), INT32_C( 1637363351), -INT32_C( 923885752), INT32_C( 1789034643), -INT32_C( 160263994), INT32_C( 997615469), -INT32_C( 11305324), -INT32_C( 641589094) }, { -INT32_C( 180013817), INT32_C( 189494992), INT32_C( 373284770), -INT32_C( 948181383), -INT32_C( 1524911724), -INT32_C( 1756100706), INT32_C( 1393183093), INT32_C( 587505939), -INT32_C( 2061223339), -INT32_C( 410654068), INT32_C( 1765944559), INT32_C( 1867507280), -INT32_C( 440307044), -INT32_C( 694580132), INT32_C( 1814202969), INT32_C( 1842421456) } }, { { -INT32_C( 620627901), INT32_C( 1849465126), -INT32_C( 868865479), INT32_C( 1882642602), -INT32_C( 697849751), -INT32_C( 1559044849), -INT32_C( 173841061), -INT32_C( 909154939), -INT32_C( 2086350755), -INT32_C( 1561206679), -INT32_C( 1267783926), INT32_C( 1764074752), INT32_C( 1564445774), -INT32_C( 1006546583), INT32_C( 1018864823), INT32_C( 1728416009) }, UINT16_C(43353), { -INT32_C( 594951446), -INT32_C( 737963163), INT32_C( 1836647240), -INT32_C( 1392916628), INT32_C( 654139941), -INT32_C( 523586522), -INT32_C( 160836623), INT32_C( 631292474), -INT32_C( 369022587), INT32_C( 96339133), INT32_C( 1953642248), INT32_C( 606104575), -INT32_C( 213246259), -INT32_C( 1009511214), INT32_C( 582630887), -INT32_C( 2058921472) }, { INT32_C( 1081034883), INT32_C( 1413819724), INT32_C( 1674164068), -INT32_C( 276305630), -INT32_C( 622604025), -INT32_C( 845301787), -INT32_C( 185640972), INT32_C( 897136305), -INT32_C( 898242434), INT32_C( 2048899861), -INT32_C( 1780619150), -INT32_C( 645634607), INT32_C( 481519414), INT32_C( 334057502), INT32_C( 1493686440), -INT32_C( 1936817906) }, { -INT32_C( 1662612887), INT32_C( 1849465126), -INT32_C( 868865479), INT32_C( 1132342862), -INT32_C( 65096926), -INT32_C( 1559044849), INT32_C( 42370053), -INT32_C( 909154939), INT32_C( 544522747), -INT32_C( 1561206679), -INT32_C( 1267783926), -INT32_C( 39580114), INT32_C( 1564445774), -INT32_C( 801291060), INT32_C( 1018864823), INT32_C( 164223758) } }, { { INT32_C( 2136343657), INT32_C( 838432191), INT32_C( 801560157), INT32_C( 1913146171), -INT32_C( 779174990), -INT32_C( 1276872949), INT32_C( 1561193295), -INT32_C( 689333396), INT32_C( 1582645151), INT32_C( 311447221), INT32_C( 1614894628), INT32_C( 1423067553), INT32_C( 270884868), INT32_C( 650316247), INT32_C( 1636028660), INT32_C( 188181612) }, UINT16_C(36011), { -INT32_C( 86351766), -INT32_C( 1269760397), INT32_C( 822014558), INT32_C( 1804599878), -INT32_C( 697014510), INT32_C( 279341197), INT32_C( 8131273), -INT32_C( 2004080610), -INT32_C( 75340152), -INT32_C( 1011821979), INT32_C( 183807684), -INT32_C( 1015708496), INT32_C( 2040130028), INT32_C( 462045010), INT32_C( 1880884818), -INT32_C( 1208440786) }, { INT32_C( 1957853711), INT32_C( 288842316), -INT32_C( 1055184112), -INT32_C( 1669033552), -INT32_C( 871031430), -INT32_C( 1360486564), -INT32_C( 736230234), -INT32_C( 1148512596), -INT32_C( 584106352), -INT32_C( 1309775968), INT32_C( 1114769810), INT32_C( 366933658), INT32_C( 1877079059), INT32_C( 958318995), INT32_C( 2047753421), -INT32_C( 483026605) }, { -INT32_C( 1905780123), -INT32_C( 1519936449), INT32_C( 801560157), -INT32_C( 149646346), -INT32_C( 779174990), -INT32_C( 1102121007), INT32_C( 1561193295), INT32_C( 856112818), INT32_C( 1582645151), INT32_C( 311447221), INT32_C( 1216784214), -INT32_C( 693407190), INT32_C( 270884868), INT32_C( 650316247), INT32_C( 1636028660), INT32_C( 1422733693) } }, { { INT32_C( 2025874903), INT32_C( 1579790028), INT32_C( 1386322872), -INT32_C( 1536721007), INT32_C( 118704499), -INT32_C( 532664046), -INT32_C( 1051046290), -INT32_C( 1079734296), -INT32_C( 1070111244), -INT32_C( 887136237), -INT32_C( 1944141573), -INT32_C( 1288665793), -INT32_C( 507886386), -INT32_C( 473826699), INT32_C( 832838473), -INT32_C( 1611642454) }, UINT16_C(10414), { INT32_C( 2122891615), -INT32_C( 1421966195), INT32_C( 1093697040), -INT32_C( 343539919), -INT32_C( 1578763552), -INT32_C( 2101596450), INT32_C( 1355507295), INT32_C( 1702394117), -INT32_C( 924647365), -INT32_C( 1787616636), -INT32_C( 774462560), -INT32_C( 2101584990), INT32_C( 857973077), INT32_C( 800514000), INT32_C( 1266647366), INT32_C( 917567483) }, { INT32_C( 2097059064), INT32_C( 1443983798), -INT32_C( 1222121708), -INT32_C( 1741036734), INT32_C( 1422613892), -INT32_C( 2105245380), -INT32_C( 20053245), -INT32_C( 214663429), -INT32_C( 932236782), -INT32_C( 1222737501), -INT32_C( 1418836632), -INT32_C( 1388074968), INT32_C( 1090588165), -INT32_C( 1815837296), -INT32_C( 2070834551), INT32_C( 595051792) }, { INT32_C( 2025874903), -INT32_C( 47188677), -INT32_C( 166225660), INT32_C( 1941707891), INT32_C( 118704499), INT32_C( 3715042), -INT32_C( 1051046290), -INT32_C( 1773402114), -INT32_C( 1070111244), -INT32_C( 887136237), -INT32_C( 1944141573), INT32_C( 805303178), -INT32_C( 507886386), -INT32_C( 1133356480), INT32_C( 832838473), -INT32_C( 1611642454) } }, { { -INT32_C( 1695815945), -INT32_C( 799995289), INT32_C( 2021376079), INT32_C( 1831190120), INT32_C( 1554917068), INT32_C( 904884908), INT32_C( 364478468), INT32_C( 1010315333), INT32_C( 2127962902), INT32_C( 2102274093), INT32_C( 1358285288), INT32_C( 1404902023), -INT32_C( 323982528), -INT32_C( 501112866), INT32_C( 1710742048), INT32_C( 564277002) }, UINT16_C(30803), { -INT32_C( 308248417), -INT32_C( 222918403), -INT32_C( 1794359848), INT32_C( 1073827216), INT32_C( 1541463865), -INT32_C( 1187708735), INT32_C( 132661349), INT32_C( 8403809), -INT32_C( 1175641924), -INT32_C( 2119392343), INT32_C( 1880602848), INT32_C( 1068570629), -INT32_C( 1197829641), -INT32_C( 160313455), INT32_C( 1895717391), INT32_C( 1366326933) }, { INT32_C( 1191927198), -INT32_C( 523651328), INT32_C( 1968300143), -INT32_C( 273415432), INT32_C( 614944403), INT32_C( 756685085), INT32_C( 144513139), INT32_C( 895094422), INT32_C( 1820091755), -INT32_C( 1957935844), INT32_C( 486579493), INT32_C( 839693471), INT32_C( 542553090), INT32_C( 1078817229), INT32_C( 541649545), INT32_C( 1683333880) }, { -INT32_C( 1431577343), INT32_C( 310329341), INT32_C( 2021376079), INT32_C( 1831190120), INT32_C( 2135396778), INT32_C( 904884908), INT32_C( 259348502), INT32_C( 1010315333), INT32_C( 2127962902), INT32_C( 2102274093), INT32_C( 1358285288), INT32_C( 230534298), -INT32_C( 1731451403), -INT32_C( 1237533092), INT32_C( 1354150022), INT32_C( 564277002) } }, { { INT32_C( 600887559), INT32_C( 1018108951), INT32_C( 1499115450), INT32_C( 1720477284), -INT32_C( 410590694), -INT32_C( 584592557), -INT32_C( 1224904514), INT32_C( 437932562), INT32_C( 977136163), -INT32_C( 1049105401), INT32_C( 1757596), INT32_C( 1382458935), -INT32_C( 600183415), INT32_C( 2126078400), -INT32_C( 483019055), INT32_C( 754798344) }, UINT16_C(15162), { -INT32_C( 584629914), INT32_C( 481215234), INT32_C( 717481667), INT32_C( 1897352248), -INT32_C( 489498840), -INT32_C( 1952930986), -INT32_C( 2066046585), INT32_C( 868160717), INT32_C( 1460791125), INT32_C( 1853145002), -INT32_C( 577226843), -INT32_C( 1420841085), -INT32_C( 594730618), INT32_C( 1281828549), -INT32_C( 1798290745), -INT32_C( 1396207530) }, { INT32_C( 553900151), INT32_C( 1032812440), INT32_C( 823863214), INT32_C( 1574791894), INT32_C( 1345940107), INT32_C( 1469882768), INT32_C( 971730403), INT32_C( 1961210877), INT32_C( 597027211), INT32_C( 258024800), INT32_C( 591428684), INT32_C( 1904221670), INT32_C( 415414664), INT32_C( 1030774362), -INT32_C( 931701813), -INT32_C( 1707320306) }, { INT32_C( 600887559), INT32_C( 555857050), INT32_C( 1499115450), INT32_C( 751445742), -INT32_C( 1293173341), -INT32_C( 603716922), -INT32_C( 1224904514), INT32_C( 437932562), INT32_C( 1954811614), INT32_C( 1628805834), INT32_C( 1757596), -INT32_C( 623922587), -INT32_C( 1001416690), INT32_C( 1897363615), -INT32_C( 483019055), INT32_C( 754798344) } }, { { -INT32_C( 1497509307), INT32_C( 1135943415), -INT32_C( 2140736102), -INT32_C( 1678580205), -INT32_C( 88886112), -INT32_C( 566746350), -INT32_C( 1918455937), INT32_C( 1361568523), -INT32_C( 1393039947), -INT32_C( 1628460029), -INT32_C( 1273080159), -INT32_C( 598798276), -INT32_C( 673774652), -INT32_C( 1514860762), -INT32_C( 919381058), -INT32_C( 216376770) }, UINT16_C( 4415), { -INT32_C( 1883421793), -INT32_C( 1744927), INT32_C( 1662001427), -INT32_C( 714681090), INT32_C( 1625590955), INT32_C( 1706861106), -INT32_C( 2034238869), -INT32_C( 1919418386), INT32_C( 589059394), -INT32_C( 954072652), INT32_C( 556413475), -INT32_C( 1292464121), INT32_C( 1326701085), -INT32_C( 407580804), -INT32_C( 1217563703), -INT32_C( 1270545038) }, { INT32_C( 248996186), -INT32_C( 2032797085), INT32_C( 866582572), -INT32_C( 1377395312), -INT32_C( 201524873), -INT32_C( 1847938872), -INT32_C( 1773582300), -INT32_C( 1488220595), INT32_C( 1387602671), INT32_C( 1238927900), INT32_C( 461143946), -INT32_C( 1815584228), INT32_C( 596034907), -INT32_C( 1716166283), -INT32_C( 164561239), INT32_C( 2073918348) }, { -INT32_C( 2123750715), INT32_C( 2033231234), INT32_C( 1354178879), INT32_C( 2021673326), -INT32_C( 1827113508), -INT32_C( 194637062), -INT32_C( 1918455937), INT32_C( 1361568523), INT32_C( 1906931629), -INT32_C( 1628460029), -INT32_C( 1273080159), -INT32_C( 598798276), INT32_C( 1821712198), -INT32_C( 1514860762), -INT32_C( 919381058), -INT32_C( 216376770) } }, { { -INT32_C( 1160883555), INT32_C( 1728292572), INT32_C( 1115848486), INT32_C( 1037454050), -INT32_C( 2057217009), INT32_C( 1713247933), -INT32_C( 1604563436), INT32_C( 1729886665), INT32_C( 673311051), -INT32_C( 1232132976), -INT32_C( 2047340125), INT32_C( 1807994459), -INT32_C( 386915285), INT32_C( 1313803834), INT32_C( 653175645), -INT32_C( 259192411) }, UINT16_C(44787), { -INT32_C( 1479376104), INT32_C( 834172473), -INT32_C( 1107290885), INT32_C( 1877093247), INT32_C( 1652366355), INT32_C( 1477368426), -INT32_C( 1906134271), -INT32_C( 1136831069), INT32_C( 291704792), -INT32_C( 2126373755), -INT32_C( 1354808784), -INT32_C( 2128731539), -INT32_C( 1444701378), INT32_C( 1996616054), INT32_C( 1208312740), -INT32_C( 1878768968) }, { -INT32_C( 677288110), -INT32_C( 1286019965), -INT32_C( 1805477850), -INT32_C( 132808518), -INT32_C( 1834878948), -INT32_C( 1911970838), -INT32_C( 1076490746), -INT32_C( 1555047855), -INT32_C( 981798590), -INT32_C( 75901995), INT32_C( 630184811), INT32_C( 2015208540), -INT32_C( 2029338723), INT32_C( 1746211682), INT32_C( 1915218977), INT32_C( 135624646) }, { INT32_C( 1886643274), -INT32_C( 2099211590), INT32_C( 1115848486), INT32_C( 1037454050), -INT32_C( 253959153), -INT32_C( 704153216), INT32_C( 833928455), INT32_C( 527601650), INT32_C( 673311051), INT32_C( 2050740048), -INT32_C( 1968137893), -INT32_C( 117209551), -INT32_C( 386915285), INT32_C( 521462292), INT32_C( 653175645), -INT32_C( 1743702658) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi32(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_mask_xor_epi32(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_xor_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask16 k; const int32_t a[16]; const int32_t b[16]; const int32_t r[16]; } test_vec[] = { { UINT16_C(31138), { -INT32_C( 268230814), -INT32_C( 127828744), -INT32_C( 652244455), -INT32_C( 178928494), -INT32_C( 1171138727), INT32_C( 1381371835), INT32_C( 1040805621), -INT32_C( 726160526), -INT32_C( 1027294518), INT32_C( 1337664822), INT32_C( 1093196463), -INT32_C( 147358050), INT32_C( 296904789), INT32_C( 1818429559), -INT32_C( 374707081), -INT32_C( 507682537) }, { INT32_C( 1386512668), INT32_C( 1453481894), -INT32_C( 661140935), -INT32_C( 1630548408), -INT32_C( 1364229833), INT32_C( 18551434), -INT32_C( 1762998913), INT32_C( 1131980583), -INT32_C( 829088728), -INT32_C( 1272694917), INT32_C( 1250737154), -INT32_C( 1041736566), INT32_C( 1735432157), INT32_C( 694717354), INT32_C( 2009027152), INT32_C( 565852153) }, { INT32_C( 0), -INT32_C( 1362943138), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1397562673), INT32_C( 0), -INT32_C( 1748038571), INT32_C( 206677730), INT32_C( 0), INT32_C( 0), INT32_C( 920592916), INT32_C( 1992490888), INT32_C( 1158382557), -INT32_C( 1642775001), INT32_C( 0) } }, { UINT16_C(20563), { INT32_C( 344444656), INT32_C( 265324931), -INT32_C( 1133814828), -INT32_C( 1940633316), INT32_C( 421002928), -INT32_C( 429168601), -INT32_C( 1709283873), INT32_C( 1995075974), -INT32_C( 1014271680), -INT32_C( 808297477), -INT32_C( 745849162), INT32_C( 929030023), INT32_C( 72382429), -INT32_C( 1091846945), -INT32_C( 1487402719), -INT32_C( 1172422022) }, { -INT32_C( 1350719052), -INT32_C( 1149349884), INT32_C( 361630094), -INT32_C( 968036887), INT32_C( 1137417572), INT32_C( 2046998105), INT32_C( 958487231), INT32_C( 1374961565), -INT32_C( 318737944), INT32_C( 1336442561), INT32_C( 1885615751), -INT32_C( 2009616093), -INT32_C( 1479867826), INT32_C( 1998704056), -INT32_C( 978304472), INT32_C( 1779868802) }, { -INT32_C( 1141217468), -INT32_C( 1263609465), INT32_C( 0), INT32_C( 0), INT32_C( 1524392916), INT32_C( 0), -INT32_C( 1556136096), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 1550092909), INT32_C( 0), INT32_C( 1659390729), INT32_C( 0) } }, { UINT16_C( 5654), { -INT32_C( 7022761), -INT32_C( 1959453913), -INT32_C( 1019455092), -INT32_C( 1396339999), INT32_C( 1383759153), -INT32_C( 1533697292), INT32_C( 2101942119), -INT32_C( 644653183), INT32_C( 1574446902), -INT32_C( 823587262), INT32_C( 1217471847), -INT32_C( 504080720), -INT32_C( 952865069), INT32_C( 2003618064), INT32_C( 1643427296), INT32_C( 1245349652) }, { -INT32_C( 240643154), -INT32_C( 2000711391), INT32_C( 1724928438), INT32_C( 2068366503), INT32_C( 1128430643), INT32_C( 633056837), INT32_C( 2005315427), -INT32_C( 440221385), -INT32_C( 170497324), -INT32_C( 1317169413), -INT32_C( 1911009817), INT32_C( 1141465105), INT32_C( 562580700), INT32_C( 1581663226), INT32_C( 701877490), INT32_C( 1628411789) }, { INT32_C( 0), INT32_C( 59410950), -INT32_C( 1511258054), INT32_C( 0), INT32_C( 288882946), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 2140444857), -INT32_C( 964073344), INT32_C( 0), -INT32_C( 423878129), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { UINT16_C(58625), { -INT32_C( 746849194), -INT32_C( 970956114), -INT32_C( 98159887), -INT32_C( 28966282), INT32_C( 1765884195), INT32_C( 1949643935), INT32_C( 1812775773), INT32_C( 2068974884), -INT32_C( 1202795254), INT32_C( 561934128), INT32_C( 387687585), -INT32_C( 921279834), INT32_C( 1110595491), -INT32_C( 373921909), INT32_C( 1331020330), -INT32_C( 624252976) }, { -INT32_C( 1517152139), INT32_C( 684068999), INT32_C( 1514201524), -INT32_C( 433891773), INT32_C( 958944685), -INT32_C( 417144900), INT32_C( 1899394977), -INT32_C( 1807024097), -INT32_C( 1606820584), -INT32_C( 1580662803), INT32_C( 620431584), INT32_C( 201989726), INT32_C( 809841523), -INT32_C( 1290311918), -INT32_C( 47886882), INT32_C( 1704030285) }, { INT32_C( 1995039779), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 410456082), INT32_C( 0), INT32_C( 870362177), INT32_C( 0), INT32_C( 0), INT32_C( 1520500889), -INT32_C( 1301245964), -INT32_C( 1084500067) } }, { UINT16_C(51790), { -INT32_C( 825672955), -INT32_C( 657020195), -INT32_C( 654887474), -INT32_C( 2046072255), -INT32_C( 1309860454), -INT32_C( 151073839), INT32_C( 1516653513), -INT32_C( 1239042895), -INT32_C( 846926096), INT32_C( 1722112920), -INT32_C( 784360304), -INT32_C( 1588114937), INT32_C( 961758312), -INT32_C( 667987697), INT32_C( 1312003485), INT32_C( 973363274) }, { -INT32_C( 553154234), INT32_C( 1950723300), INT32_C( 1346733129), INT32_C( 938581199), -INT32_C( 277855008), INT32_C( 868786070), INT32_C( 2122447668), -INT32_C( 1715960493), -INT32_C( 226967794), -INT32_C( 1251557780), INT32_C( 285584194), INT32_C( 659093063), -INT32_C( 786974661), -INT32_C( 1945837737), INT32_C( 755664346), INT32_C( 432456458) }, { INT32_C( 0), -INT32_C( 1399650759), -INT32_C( 2001555065), -INT32_C( 1308952946), INT32_C( 0), INT32_C( 0), INT32_C( 619163901), INT32_C( 0), INT32_C( 0), -INT32_C( 742136332), INT32_C( 0), -INT32_C( 2044740544), INT32_C( 0), INT32_C( 0), INT32_C( 1664684103), INT32_C( 599956288) } }, { UINT16_C(16258), { INT32_C( 1929244171), -INT32_C( 1474478173), -INT32_C( 1717607087), -INT32_C( 1554851188), -INT32_C( 1350391381), INT32_C( 1077173301), INT32_C( 1342389897), INT32_C( 1670350167), INT32_C( 399871092), INT32_C( 482407115), -INT32_C( 457810089), -INT32_C( 460912583), -INT32_C( 409793871), -INT32_C( 265828506), INT32_C( 1564486149), INT32_C( 599838639) }, { INT32_C( 641439067), -INT32_C( 549192825), -INT32_C( 1815873190), -INT32_C( 1300805119), -INT32_C( 1147598252), -INT32_C( 693387055), -INT32_C( 1707873302), INT32_C( 364770234), INT32_C( 272365704), INT32_C( 1307541235), INT32_C( 2027991671), INT32_C( 1361795068), INT32_C( 839697505), INT32_C( 1862907781), INT32_C( 1544109218), -INT32_C( 1200502992) }, { INT32_C( 0), INT32_C( 2002699300), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1983018733), INT32_C( 133068028), INT32_C( 1362070584), -INT32_C( 1672024800), -INT32_C( 1246994491), -INT32_C( 710947120), -INT32_C( 1624346397), INT32_C( 0), INT32_C( 0) } }, { UINT16_C(44223), { -INT32_C( 1221872696), -INT32_C( 513170944), INT32_C( 1178101275), -INT32_C( 1022715209), -INT32_C( 713388084), INT32_C( 135404799), -INT32_C( 372293000), -INT32_C( 1013608454), INT32_C( 1115340866), INT32_C( 2116281443), INT32_C( 29645898), -INT32_C( 1027289354), INT32_C( 1570258526), -INT32_C( 765089190), -INT32_C( 423938580), INT32_C( 111759556) }, { INT32_C( 1950884881), INT32_C( 1391619080), -INT32_C( 1101809976), -INT32_C( 494921852), -INT32_C( 1354819499), -INT32_C( 1367235134), -INT32_C( 1634386726), -INT32_C( 1650180468), INT32_C( 1779559522), INT32_C( 549192536), INT32_C( 1054740409), INT32_C( 2065719078), INT32_C( 975855735), -INT32_C( 538399995), INT32_C( 1954381287), INT32_C( 504439228) }, { -INT32_C( 1016883239), -INT32_C( 1281634808), -INT32_C( 127151917), INT32_C( 562728243), INT32_C( 2051381145), -INT32_C( 1500268227), INT32_C( 0), INT32_C( 1580314998), INT32_C( 0), INT32_C( 0), INT32_C( 1058690035), -INT32_C( 1176202800), INT32_C( 0), INT32_C( 227344735), INT32_C( 0), INT32_C( 414740856) } }, { UINT16_C( 8973), { INT32_C( 1160144521), INT32_C( 1700126854), INT32_C( 1053063966), INT32_C( 564018167), INT32_C( 1590535029), -INT32_C( 2378622), INT32_C( 975214376), INT32_C( 1063071414), INT32_C( 444892052), -INT32_C( 2122327709), INT32_C( 1287668565), -INT32_C( 194158977), -INT32_C( 2074985982), INT32_C( 394538735), INT32_C( 2068948165), INT32_C( 1723510482) }, { -INT32_C( 1786691790), INT32_C( 1830223896), -INT32_C( 1011230908), INT32_C( 934750005), INT32_C( 1354500705), -INT32_C( 43499464), -INT32_C( 1216759324), -INT32_C( 1709362328), -INT32_C( 1959813518), -INT32_C( 470268257), INT32_C( 1403433501), INT32_C( 982146521), -INT32_C( 1618262425), INT32_C( 1805513606), INT32_C( 337778348), -INT32_C( 1129431223) }, { -INT32_C( 794338885), INT32_C( 0), -INT32_C( 42095526), INT32_C( 371793090), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 1851056666), INT32_C( 1653055484), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 2082069865), INT32_C( 0), INT32_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_maskz_xor_epi32(test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm512_xor_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t a[8]; const int64_t b[8]; const int64_t r[8]; } test_vec[] = { { { INT64_C( 7833673612016144483), -INT64_C( 6286797061584980339), -INT64_C( 1894311286508114473), INT64_C( 7457901623545148981), INT64_C( 7758400186936267162), -INT64_C( 7827311421232976810), INT64_C( 5569310173466725242), -INT64_C( 1852634981124308256) }, { -INT64_C( 6235004628972997840), -INT64_C( 3743688304300620355), -INT64_C( 8561205822545100854), INT64_C( 2103383255765864446), -INT64_C( 2114542116453788245), -INT64_C( 7978576687197911256), -INT64_C( 1001589171339167514), -INT64_C( 1694078873958054286) }, { -INT64_C( 4193382358810418349), INT64_C( 7263022192053547824), INT64_C( 7820062331317703197), INT64_C( 8813401457401952715), -INT64_C( 8571197845464267727), INT64_C( 151340619297975166), -INT64_C( 4660229538766687332), INT64_C( 1024413272115446930) } }, { { INT64_C( 8415412740149389505), INT64_C( 8136937495115801748), INT64_C( 8472142827390787501), -INT64_C( 232688289166536801), -INT64_C( 3621830638293505029), -INT64_C( 8157403346233676848), -INT64_C( 8940555774217696396), INT64_C( 1209819125977694866) }, { INT64_C( 3014619373212577232), -INT64_C( 8706215558938034241), -INT64_C( 5583923536013140710), -INT64_C( 3039284310411517779), -INT64_C( 181353180207109384), INT64_C( 7516167267833216947), -INT64_C( 1874932065944318681), -INT64_C( 4710068369810294072) }, { INT64_C( 6710250289300899089), -INT64_C( 594058874992207061), -INT64_C( 4101950076832841033), INT64_C( 2960861935095328562), INT64_C( 3514804452886151427), -INT64_C( 1835816431532026781), INT64_C( 7356107569477813843), -INT64_C( 5879346165530970022) } }, { { -INT64_C( 1666178802879790940), INT64_C( 8017248771709566347), INT64_C( 6370254332653643850), INT64_C( 3219730174574923307), -INT64_C( 3070348517276991929), INT64_C( 6495017567146349163), -INT64_C( 5439991518983235431), -INT64_C( 5474315090744007085) }, { INT64_C( 8012065384011076881), INT64_C( 7498569038459756453), -INT64_C( 8909788908341826275), INT64_C( 7194894378406510264), INT64_C( 4751279587353037180), INT64_C( 8288320232820773582), INT64_C( 7109622136361818100), -INT64_C( 1430348694698849934) }, { -INT64_C( 8660395485888014923), INT64_C( 527688101943681582), -INT64_C( 2576724598806067881), INT64_C( 5726223408186454163), -INT64_C( 7742811636499433669), INT64_C( 2964766335513985189), -INT64_C( 3014254901916840083), INT64_C( 6350412050110132001) } }, { { -INT64_C( 6753046675043064470), INT64_C( 4194897748161412973), INT64_C( 3956704487311102429), INT64_C( 4426793051552903443), -INT64_C( 6594523062152668411), -INT64_C( 2500730345433285685), -INT64_C( 4041049702590534928), INT64_C( 851383767448348095) }, { INT64_C( 7741231053211505812), INT64_C( 7420181171167614491), INT64_C( 7776599092500323953), -INT64_C( 8935477080687233300), INT64_C( 6912388001218618338), -INT64_C( 6814099961521935981), INT64_C( 9145319435102817234), -INT64_C( 7467689775987562262) }, { -INT64_C( 3952406250137193986), INT64_C( 6687425642990135670), INT64_C( 6702779462972608428), -INT64_C( 4714725391518644225), -INT64_C( 318013443086337817), INT64_C( 8945533351659557464), -INT64_C( 5115529358649122526), -INT64_C( 7814353725753193131) } }, { { -INT64_C( 363657528445588654), -INT64_C( 5313838659546157200), -INT64_C( 6481207919005857538), INT64_C( 4635854309387370247), -INT64_C( 1807691163233783929), INT64_C( 6833305750270831930), INT64_C( 3401057919711447504), INT64_C( 1561413775749067132) }, { -INT64_C( 8859444360416820816), INT64_C( 3069616949972669147), -INT64_C( 10721871889266815), -INT64_C( 5170953676681197927), INT64_C( 749826453865906582), INT64_C( 288756504667163261), INT64_C( 4172734175648855909), -INT64_C( 212340540276977075) }, { INT64_C( 9221376615234022114), -INT64_C( 7144947895540580949), INT64_C( 6473914840305078143), -INT64_C( 546965349575837282), -INT64_C( 1401145767694422511), INT64_C( 6545166095291246407), INT64_C( 1646764641365311157), -INT64_C( 1682414516424613071) } }, { { INT64_C( 6059952658790214623), -INT64_C( 6712846976013345200), INT64_C( 6538509831123994084), -INT64_C( 7337563655741956666), -INT64_C( 6786149137027429721), INT64_C( 1152722674958794016), -INT64_C( 4236109339647833136), INT64_C( 5314845287287874925) }, { INT64_C( 5484145387571211049), -INT64_C( 6138219535088906346), INT64_C( 2375857380799177524), INT64_C( 3786695004706696354), -INT64_C( 8939704712816601994), INT64_C( 8564742865895594328), -INT64_C( 4446283047571463305), INT64_C( 1643131752719750285) }, { INT64_C( 1730191473646272758), INT64_C( 578577205123273158), INT64_C( 8810670071267321552), -INT64_C( 5861795896640474780), INT64_C( 2467137782418923217), INT64_C( 8728925404920833144), INT64_C( 539853474565384359), INT64_C( 6849847759138550240) } }, { { -INT64_C( 7108746993929192473), INT64_C( 2952580909138746433), -INT64_C( 3318415959729659377), INT64_C( 8351705111930271794), -INT64_C( 8460680085403819854), INT64_C( 2384100826312238396), INT64_C( 4161117182544079274), INT64_C( 1385686332839962939) }, { INT64_C( 6874356853280244370), INT64_C( 2681720383917028356), INT64_C( 363617062994806731), -INT64_C( 7318342761473894660), INT64_C( 2164784578008873609), -INT64_C( 8795130151555541168), -INT64_C( 3115818213941246733), INT64_C( 2097447710665838201) }, { -INT64_C( 4450058466010859147), INT64_C( 994959937184262213), -INT64_C( 3100361591938341436), -INT64_C( 1614767496023839026), -INT64_C( 7737328937327470021), -INT64_C( 6564158462417554836), -INT64_C( 1333872736001888935), INT64_C( 1018187299377882946) } }, { { -INT64_C( 4803982436490178277), -INT64_C( 1771749786168752086), -INT64_C( 7193825152807419778), INT64_C( 1704113265784478708), INT64_C( 7923416425212768202), INT64_C( 8576185165226802729), INT64_C( 160712378890954874), INT64_C( 6585724530256992338) }, { INT64_C( 9029586771277755040), -INT64_C( 3064805812496326649), -INT64_C( 8261663046046104505), -INT64_C( 5408189745344027899), INT64_C( 1640407638281258483), INT64_C( 4951300961883020935), -INT64_C( 2550739608973727279), -INT64_C( 6368259520359861034) }, { -INT64_C( 4603986968108719173), INT64_C( 3611576490511247405), INT64_C( 1257290549943567417), -INT64_C( 6677578309627889423), INT64_C( 8878362109278874169), INT64_C( 3725100340635705518), -INT64_C( 2404072443576090197), -INT64_C( 217769111299019644) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_xor_epi64(a, b); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_mask_xor_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t src[8]; const simde__mmask8 k; const int64_t a[8]; const int64_t b[8]; const int64_t r[8]; } test_vec[] = { { { INT64_C( 45749859996612839), INT64_C( 2814572066373608668), -INT64_C( 6582897561231417342), -INT64_C( 8687305550852076307), INT64_C( 8500406415190396854), INT64_C( 5093384555923498535), INT64_C( 8909703965306565750), INT64_C( 546313591099609965) }, UINT8_C(244), { -INT64_C( 2585626980132258187), INT64_C( 726882011601479497), -INT64_C( 8038622157332801693), -INT64_C( 8691410639997796728), INT64_C( 4087190315360841190), -INT64_C( 6654652016916703164), -INT64_C( 2859196078284653307), INT64_C( 229866223008298134) }, { -INT64_C( 7945590762679324780), -INT64_C( 8839772767949909393), -INT64_C( 6397194970787743473), -INT64_C( 5121297404723757619), -INT64_C( 3311000718894445522), INT64_C( 1769302264377325704), INT64_C( 4288746735372129484), -INT64_C( 9076703424935504851) }, { INT64_C( 45749859996612839), INT64_C( 2814572066373608668), INT64_C( 3983867818266324588), -INT64_C( 8687305550852076307), -INT64_C( 1534483321180642872), -INT64_C( 4960646585427500852), -INT64_C( 2029249597307622967), -INT64_C( 9135072113565048645) } }, { { -INT64_C( 4389873590959520546), INT64_C( 3847016815465829697), -INT64_C( 5127463024880002259), -INT64_C( 1350777454883832725), -INT64_C( 8622911936856637864), -INT64_C( 6379571547940445736), -INT64_C( 8262950684676879856), -INT64_C( 3957664130431951338) }, UINT8_C(203), { -INT64_C( 3288302065678132158), -INT64_C( 6867158103737196692), INT64_C( 29078381664489015), -INT64_C( 2093545317731299942), INT64_C( 800449542886982807), -INT64_C( 7919193623266236573), -INT64_C( 4301136478309459027), -INT64_C( 7828699826288344616) }, { -INT64_C( 1617647112863104542), INT64_C( 5065560676272060766), INT64_C( 5928115323832446807), -INT64_C( 1137963685561880410), INT64_C( 7301739104183587045), -INT64_C( 8568825949546695114), INT64_C( 3848130783034911386), INT64_C( 4286595986196214142) }, { INT64_C( 4310345289044696480), -INT64_C( 1801852656978844110), -INT64_C( 5127463024880002259), INT64_C( 1353085047476567356), -INT64_C( 8622911936856637864), -INT64_C( 6379571547940445736), -INT64_C( 1069598699435757257), -INT64_C( 6329859119562035034) } }, { { INT64_C( 2488796161233055261), -INT64_C( 6022150448521041999), INT64_C( 4933174169979353009), INT64_C( 8640803886955777369), INT64_C( 4344324970652602684), -INT64_C( 898031332366467893), -INT64_C( 3887263497667196412), INT64_C( 7478337318462021367) }, UINT8_C(245), { INT64_C( 1608838238175268707), INT64_C( 319276987121676509), INT64_C( 2531375131368175488), -INT64_C( 6984319707087211591), INT64_C( 3257643290499388229), INT64_C( 6514463296540085930), -INT64_C( 2908017913228894916), -INT64_C( 1228629849485806824) }, { INT64_C( 4402061463343995290), -INT64_C( 1223561056477925679), INT64_C( 5177755488274713062), INT64_C( 8115643581275557640), -INT64_C( 4361610958211469656), INT64_C( 631194590693431583), INT64_C( 5869570511533257725), INT64_C( 5115705028593717175) }, { INT64_C( 3117896433371354873), -INT64_C( 6022150448521041999), INT64_C( 7276222062497414758), INT64_C( 8640803886955777369), -INT64_C( 1275353631281103379), INT64_C( 5956698770186540981), -INT64_C( 8732406741720305983), -INT64_C( 6337227441341793105) } }, { { -INT64_C( 1655642640894306224), -INT64_C( 1928527008934104302), -INT64_C( 1780464257288025679), INT64_C( 4645871961807080853), INT64_C( 396184772487084747), INT64_C( 7694247929610583034), -INT64_C( 876321354710525762), -INT64_C( 8490798505132667909) }, UINT8_C(153), { -INT64_C( 2797138209894817876), -INT64_C( 5764892471572950838), INT64_C( 1083398023113731424), -INT64_C( 3864555640558062810), INT64_C( 5039295415212501623), -INT64_C( 2395207474291048132), -INT64_C( 629670723305287568), -INT64_C( 2391099797848452888) }, { INT64_C( 4019150386831347461), INT64_C( 4521628108579294052), -INT64_C( 8456435593802240804), -INT64_C( 9148392511393210172), INT64_C( 5071836999787413040), -INT64_C( 7700616903254299628), INT64_C( 919268992504468373), -INT64_C( 2070738063835246265) }, { -INT64_C( 1231608716974305111), -INT64_C( 1928527008934104302), -INT64_C( 1780464257288025679), INT64_C( 5428023262223111138), INT64_C( 256112104962790471), INT64_C( 7694247929610583034), -INT64_C( 876321354710525762), INT64_C( 4436707353278300591) } }, { { INT64_C( 2479355900382364993), INT64_C( 1111304417577308446), -INT64_C( 2590687349108723784), -INT64_C( 4039761275362786215), -INT64_C( 8189705280462062040), INT64_C( 4210251584504159459), INT64_C( 1509346594468311936), -INT64_C( 7144943817645321846) }, UINT8_C( 14), { -INT64_C( 8721207303514172734), INT64_C( 8557758222839600753), INT64_C( 6723388035183008125), -INT64_C( 395352253016219803), INT64_C( 9146538221902954815), -INT64_C( 5145499759254898594), INT64_C( 8396340341971405148), INT64_C( 170411947343828522) }, { -INT64_C( 3244744075203875731), -INT64_C( 7474133439378900910), -INT64_C( 5127834652165225900), -INT64_C( 7466837154034172005), INT64_C( 8659025868352572388), -INT64_C( 173585767618699581), INT64_C( 4130986808766912526), -INT64_C( 3838482065090187530) }, { INT64_C( 2479355900382364993), -INT64_C( 1259379519748575709), -INT64_C( 1902770539115504855), INT64_C( 7125567705071739134), -INT64_C( 8189705280462062040), INT64_C( 4210251584504159459), INT64_C( 1509346594468311936), -INT64_C( 7144943817645321846) } }, { { -INT64_C( 7322204454426325821), -INT64_C( 4462169255779407764), -INT64_C( 6962650059328556152), -INT64_C( 5561527802563267039), -INT64_C( 5928710453305019577), INT64_C( 782735190010253669), INT64_C( 1049778440287304264), INT64_C( 1524159294001730182) }, UINT8_C(217), { INT64_C( 4406147630084273090), INT64_C( 6172405503935594122), INT64_C( 9083848596339727098), INT64_C( 7864517138114643045), -INT64_C( 8017162673287577708), INT64_C( 4755687165104520592), -INT64_C( 658691503089303246), INT64_C( 1979444766424770400) }, { -INT64_C( 8774039058048637787), -INT64_C( 694063673707057962), INT64_C( 7347955965608798833), INT64_C( 5114374633758407510), -INT64_C( 4392929463379061619), -INT64_C( 2660713234140481191), -INT64_C( 1381242144590764183), -INT64_C( 4973730811908373486) }, { -INT64_C( 4964763279348885657), -INT64_C( 4462169255779407764), -INT64_C( 6962650059328556152), INT64_C( 3160832092535504691), INT64_C( 6031583149779954457), INT64_C( 782735190010253669), INT64_C( 1877732794698114651), -INT64_C( 6808980108460137614) } }, { { INT64_C( 8216894590175538007), -INT64_C( 8015102044995065729), -INT64_C( 8319041892314537462), INT64_C( 8069950358681507095), INT64_C( 121240403729136420), -INT64_C( 1487121492127105372), INT64_C( 4995924851354721104), -INT64_C( 940257476621986452) }, UINT8_C(196), { INT64_C( 1947321805534736039), -INT64_C( 8561651653089111504), -INT64_C( 2092489041890510734), INT64_C( 1500721690142866054), -INT64_C( 8085793806064609071), INT64_C( 2163588935650821455), INT64_C( 4667027440343170452), -INT64_C( 1686560576050858730) }, { -INT64_C( 2582427849531715781), -INT64_C( 3968128441716664540), INT64_C( 5195024434345973022), -INT64_C( 4187110105943349020), -INT64_C( 81476173827179916), -INT64_C( 5323890722526513120), INT64_C( 1931383667822610259), INT64_C( 8313639412841224719) }, { INT64_C( 8216894590175538007), -INT64_C( 8015102044995065729), -INT64_C( 6130081342750364308), INT64_C( 8069950358681507095), INT64_C( 121240403729136420), -INT64_C( 1487121492127105372), INT64_C( 6487782212416142023), -INT64_C( 7221564986028855527) } }, { { INT64_C( 7515754918875348596), INT64_C( 3232135209222413451), INT64_C( 2229984471521781921), INT64_C( 8541946416010364953), -INT64_C( 2412501877195996971), -INT64_C( 4885037687150329554), INT64_C( 9039434068048209220), INT64_C( 1659050078198114957) }, UINT8_C( 33), { -INT64_C( 429157281734249269), INT64_C( 3203978284322067029), INT64_C( 7455724837908040293), -INT64_C( 2817354451018840157), INT64_C( 335448553716800499), -INT64_C( 917138322303370029), -INT64_C( 8711837563429726603), INT64_C( 8377595263254839645) }, { -INT64_C( 8340727286980282227), -INT64_C( 7636439070152493692), -INT64_C( 7697670767163939903), INT64_C( 3969997471685310498), -INT64_C( 8826859548180268741), -INT64_C( 3251989183356983769), -INT64_C( 6246875829788793216), -INT64_C( 3603330975078199780) }, { INT64_C( 8517603223431072838), INT64_C( 3232135209222413451), INT64_C( 2229984471521781921), INT64_C( 8541946416010364953), -INT64_C( 2412501877195996971), INT64_C( 2421589282539805428), INT64_C( 9039434068048209220), INT64_C( 1659050078198114957) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi64(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_mask_xor_epi64(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_maskz_xor_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde__mmask8 k; const int64_t a[8]; const int64_t b[8]; const int64_t r[8]; } test_vec[] = { { UINT8_C(152), { -INT64_C( 8982237529053997842), INT64_C( 3103674849943855424), INT64_C( 1982415417451867211), -INT64_C( 9083321110428971905), INT64_C( 7887070877457153635), INT64_C( 7936046385343671330), INT64_C( 1142105130860059500), -INT64_C( 2327418192463860577) }, { INT64_C( 9182915906006801623), -INT64_C( 3782010632694931727), -INT64_C( 4191618534024375243), INT64_C( 7829976987492367438), -INT64_C( 5664436787247809914), -INT64_C( 6275684222459520321), INT64_C( 2362774424294093278), -INT64_C( 972614327583719042) }, { INT64_C( 0), INT64_C( 0), INT64_C( 0), -INT64_C( 1344283778605633487), -INT64_C( 2587491794353060635), INT64_C( 0), INT64_C( 0), INT64_C( 3257172424768237025) } }, { UINT8_C( 14), { INT64_C( 1409767669188450032), INT64_C( 535417729420012870), -INT64_C( 117662871603022948), -INT64_C( 2123625407593156124), INT64_C( 2053434539035022643), -INT64_C( 3346989035654740496), INT64_C( 5028564073104500622), -INT64_C( 4300579678214802307) }, { INT64_C( 4002766890185262234), -INT64_C( 2181348245731657548), -INT64_C( 911567200057529934), -INT64_C( 648602612603414614), -INT64_C( 7910868655864926281), -INT64_C( 981726171945503701), INT64_C( 3172889850273926303), -INT64_C( 2970011671702574219) }, { INT64_C( 0), -INT64_C( 1813713950618380814), INT64_C( 938028879597345326), INT64_C( 1475191305885214286), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0) } }, { UINT8_C(149), { -INT64_C( 122716449265167314), INT64_C( 3987427480104258177), -INT64_C( 2447171538408346827), -INT64_C( 3365926546413069491), INT64_C( 4949525000721972647), INT64_C( 6082763436880293385), INT64_C( 6273216357046166716), -INT64_C( 8772636185306919988) }, { INT64_C( 8870376520984048402), INT64_C( 2542591394626541015), -INT64_C( 1180399248176107203), INT64_C( 1283984387118967405), -INT64_C( 3908843060537718317), -INT64_C( 4206915619632812268), -INT64_C( 4662164243526913735), INT64_C( 2190117534882835284) }, { -INT64_C( 8838896490382629060), INT64_C( 0), INT64_C( 3573473610564262408), INT64_C( 0), -INT64_C( 8254885696627231628), INT64_C( 0), INT64_C( 0), -INT64_C( 7483423931756404584) } }, { UINT8_C(230), { -INT64_C( 7827641925076753334), INT64_C( 7539554054630529375), -INT64_C( 798410052431249940), -INT64_C( 1276666438538543227), INT64_C( 2340348715907686594), INT64_C( 7084207804521158000), INT64_C( 821033393377053478), -INT64_C( 4272309647950633948) }, { -INT64_C( 813422530811728427), -INT64_C( 8173422651522728262), -INT64_C( 7654040801051243466), -INT64_C( 4698359519177329658), INT64_C( 6158402678492002303), -INT64_C( 5386841171882060983), -INT64_C( 6509328055499932508), INT64_C( 1734864526403044972) }, { INT64_C( 0), -INT64_C( 1858918950121707547), INT64_C( 7001998150177485274), INT64_C( 0), INT64_C( 0), -INT64_C( 2923345196226853319), -INT64_C( 5850495760465381502), -INT64_C( 2547120826193042872) } }, { UINT8_C(241), { INT64_C( 7402477510674193255), -INT64_C( 1863195392533566616), -INT64_C( 1608300861362718454), INT64_C( 9026368236341560280), INT64_C( 2030747075921039797), -INT64_C( 5213830620423384300), INT64_C( 3226169786223897081), -INT64_C( 7548099076051449941) }, { -INT64_C( 1412565848515727723), -INT64_C( 5042819090609471071), INT64_C( 5906290208258063991), -INT64_C( 3559159576159619549), INT64_C( 3046259127309826945), -INT64_C( 5262319553509592963), INT64_C( 968688341824409210), INT64_C( 6482559857543185902) }, { -INT64_C( 8439907556581526030), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 3920617936044698164), INT64_C( 98160633604465513), INT64_C( 2428795596282832771), -INT64_C( 3546182036552269243) } }, { UINT8_C(168), { -INT64_C( 1654971735735160050), -INT64_C( 3618396837604997422), -INT64_C( 2321671188407392160), -INT64_C( 852569122218184746), -INT64_C( 8527980720679920445), INT64_C( 1070063051582525133), INT64_C( 3153274373031825801), INT64_C( 5582450922785503646) }, { INT64_C( 5365668961424373301), INT64_C( 8590950464161730174), INT64_C( 7872605299379151040), -INT64_C( 41541984007884651), -INT64_C( 6690553381164190369), INT64_C( 7788140668098870447), INT64_C( 2478321804687601704), INT64_C( 2196821751906784692) }, { INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 812751503155928899), INT64_C( 0), INT64_C( 7119249835665306210), INT64_C( 0), INT64_C( 5982033988820085802) } }, { UINT8_C( 34), { -INT64_C( 7099385906741118360), -INT64_C( 7139975430881601607), -INT64_C( 7228194637974216392), INT64_C( 1322782488553664140), INT64_C( 5057636448096527218), INT64_C( 7509173144721521260), INT64_C( 3869178845850853074), -INT64_C( 792411965939119037) }, { -INT64_C( 7115483786937574292), INT64_C( 849890848911280179), -INT64_C( 8527612870863474487), INT64_C( 5429785197312153057), -INT64_C( 3592881174692371884), -INT64_C( 8281559221398316595), -INT64_C( 1513871340195597267), INT64_C( 4748387116286637243) }, { INT64_C( 0), -INT64_C( 7556234139783962742), INT64_C( 0), INT64_C( 0), INT64_C( 0), -INT64_C( 1935407695096568927), INT64_C( 0), INT64_C( 0) } }, { UINT8_C(139), { INT64_C( 4935371154977638186), -INT64_C( 3940202631690419418), INT64_C( 3960841935430796437), -INT64_C( 6899968217622343414), -INT64_C( 9198094981434153942), -INT64_C( 8237377322867269296), INT64_C( 5165840138584555705), INT64_C( 7019203302184899410) }, { -INT64_C( 2992976723773572708), -INT64_C( 4056219280331761360), -INT64_C( 8039221936049258773), INT64_C( 4421622930838481093), -INT64_C( 3253945696776891781), -INT64_C( 1614628432426396734), -INT64_C( 7999378326465488595), -INT64_C( 7870629585987349017) }, { -INT64_C( 7923174882398929226), INT64_C( 1073260354067536406), INT64_C( 0), -INT64_C( 7105939320326063665), INT64_C( 0), INT64_C( 0), INT64_C( 0), -INT64_C( 888106185265150283) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_maskz_xor_epi64(test_vec[i].k, a, b); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm512_xor_si512(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_x_mm512_set_epu64(UINT64_C(0xedb78aa51009d043), UINT64_C(0xf8d6e1466c80412e), UINT64_C(0x8d2f88ccf8d072d2), UINT64_C(0xd42ce380801d56eb), UINT64_C(0x4f7a9f9f877cf207), UINT64_C(0x9ebf29784a068fec), UINT64_C(0x14e312298b327bb2), UINT64_C(0xa4cae715b44968c3)), simde_x_mm512_set_epu64(UINT64_C(0xf857a2af697b20a2), UINT64_C(0xaff5e6cb29617e18), UINT64_C(0x91849348f298760e), UINT64_C(0x1c2d26b7c60b0e1b), UINT64_C(0x27f2529cc5e0d6da), UINT64_C(0x947c2755a9d7153c), UINT64_C(0xab6596dc73591502), UINT64_C(0x6cb918d0cf4b1739)), simde_x_mm512_set_epu64(UINT64_C(0x15e0280a7972f0e1), UINT64_C(0x5723078d45e13f36), UINT64_C(0x1cab1b840a4804dc), UINT64_C(0xc801c537461658f0), UINT64_C(0x6888cd03429c24dd), UINT64_C(0x0ac30e2de3d19ad0), UINT64_C(0xbf8684f5f86b6eb0), UINT64_C(0xc873ffc57b027ffa)) }, { simde_x_mm512_set_epu64(UINT64_C(0x7fb6b2cc6cfe2095), UINT64_C(0x2b06563737a7554c), UINT64_C(0x20e62cd04a81579d), UINT64_C(0x0c22d8b7c6b9afd0), UINT64_C(0x7778227f653f51e3), UINT64_C(0x0c5d93feab9aa991), UINT64_C(0x45c2fff1a0832972), UINT64_C(0xc6fefc42436c6b46)), simde_x_mm512_set_epu64(UINT64_C(0xe44de61268819c50), UINT64_C(0xec0f232714f1db42), UINT64_C(0x1de6bf50b7448d81), UINT64_C(0x326b4ae523cd1df1), UINT64_C(0x89856b1e9b31c8bb), UINT64_C(0xb4ab21a1f0881bd7), UINT64_C(0x3d731298d95d6f21), UINT64_C(0xffc08a64375bf884)), simde_x_mm512_set_epu64(UINT64_C(0x9bfb54de047fbcc5), UINT64_C(0xc709751023568e0e), UINT64_C(0x3d009380fdc5da1c), UINT64_C(0x3e499252e574b221), UINT64_C(0xfefd4961fe0e9958), UINT64_C(0xb8f6b25f5b12b246), UINT64_C(0x78b1ed6979de4653), UINT64_C(0x393e7626743793c2)) }, { simde_x_mm512_set_epu64(UINT64_C(0xdcc26142d37fb5e3), UINT64_C(0x63c9856267e9999a), UINT64_C(0xbf8d48fd4246771e), UINT64_C(0xe34d12aa2d211228), UINT64_C(0x38f5235239303226), UINT64_C(0x264f8a7d4f0c7a44), UINT64_C(0x3e3aa9db569a6f79), UINT64_C(0x47f95a5dbaf7c3fe)), simde_x_mm512_set_epu64(UINT64_C(0x24f03a01ff0412a4), UINT64_C(0x5e69b3ae6e914583), UINT64_C(0x61a1b3291cf72225), UINT64_C(0x5b7b6dafc3dcc6f8), UINT64_C(0x643061d1edb662f2), UINT64_C(0x3eebdc3f8f4056db), UINT64_C(0x11c4727a73fc286e), UINT64_C(0x561b4fb689bf8f8c)), simde_x_mm512_set_epu64(UINT64_C(0xf8325b432c7ba747), UINT64_C(0x3da036cc0978dc19), UINT64_C(0xde2cfbd45eb1553b), UINT64_C(0xb8367f05eefdd4d0), UINT64_C(0x5cc54283d48650d4), UINT64_C(0x18a45642c04c2c9f), UINT64_C(0x2ffedba125664717), UINT64_C(0x11e215eb33484c72)) }, { simde_x_mm512_set_epu64(UINT64_C(0xa3db7414654163c1), UINT64_C(0x20295e6408f5e85e), UINT64_C(0x6544618e6bd8d65e), UINT64_C(0x5e62e456253a5970), UINT64_C(0x35200d7cedf89e63), UINT64_C(0x3a187ccb9bdbc4ff), UINT64_C(0x8c83780e03d2ec0a), UINT64_C(0x25da10ac4ca3d5f3)), simde_x_mm512_set_epu64(UINT64_C(0xb4da361a4ff83c1b), UINT64_C(0x7f54a2cd02321d93), UINT64_C(0x2222e01cb6f3b71d), UINT64_C(0x48d83f4fe210f439), UINT64_C(0x30b5939d74f88fb5), UINT64_C(0x08451aa5c5aafb71), UINT64_C(0x0908270bde506014), UINT64_C(0x14d2968168fbf701)), simde_x_mm512_set_epu64(UINT64_C(0x1701420e2ab95fda), UINT64_C(0x5f7dfca90ac7f5cd), UINT64_C(0x47668192dd2b6143), UINT64_C(0x16badb19c72aad49), UINT64_C(0x05959ee1990011d6), UINT64_C(0x325d666e5e713f8e), UINT64_C(0x858b5f05dd828c1e), UINT64_C(0x3108862d245822f2)) }, { simde_x_mm512_set_epu64(UINT64_C(0xb9888f8a15c6f599), UINT64_C(0xdae6980a3c15b8d5), UINT64_C(0x17114f3e96d162e1), UINT64_C(0xaa441d9be0eb3305), UINT64_C(0x7328bea0eddeb5b8), UINT64_C(0x38d955208ba6ab2c), UINT64_C(0xd5a6f9d82f72b047), UINT64_C(0x468d076219769ecc)), simde_x_mm512_set_epu64(UINT64_C(0xf5fce010c130811a), UINT64_C(0x5b4c8bc96595cc6f), UINT64_C(0x9ec90bdb77fd0d0f), UINT64_C(0xe13db6113bafebb0), UINT64_C(0xbe6dfb35371e254d), UINT64_C(0xee5939c207b9c26b), UINT64_C(0x7c3ef03f0a2d4864), UINT64_C(0xe807e98806d6b3fa)), simde_x_mm512_set_epu64(UINT64_C(0x4c746f9ad4f67483), UINT64_C(0x81aa13c3598074ba), UINT64_C(0x89d844e5e12c6fee), UINT64_C(0x4b79ab8adb44d8b5), UINT64_C(0xcd454595dac090f5), UINT64_C(0xd6806ce28c1f6947), UINT64_C(0xa99809e7255ff823), UINT64_C(0xae8aeeea1fa02d36)) }, { simde_x_mm512_set_epu64(UINT64_C(0x8234186be169c857), UINT64_C(0x6e3be8c42ba36d9a), UINT64_C(0x9eebbbe6bd8adb2a), UINT64_C(0x6ce901141909d2cf), UINT64_C(0x35459cc296fca858), UINT64_C(0x1a7d575fa8651237), UINT64_C(0x4b008fe37abafacd), UINT64_C(0xf35eba645c1d884d)), simde_x_mm512_set_epu64(UINT64_C(0x7ea8964c6c682a7c), UINT64_C(0x8b6605b470502155), UINT64_C(0x4b16327f96bf6e87), UINT64_C(0xae618aa0114ea6c6), UINT64_C(0x3c1572ee53b136fa), UINT64_C(0xacef14edc9d741a1), UINT64_C(0x96f4d64c8555893a), UINT64_C(0x1fb0ce0c9ed59cf4)), simde_x_mm512_set_epu64(UINT64_C(0xfc9c8e278d01e22b), UINT64_C(0xe55ded705bf34ccf), UINT64_C(0xd5fd89992b35b5ad), UINT64_C(0xc2888bb408477409), UINT64_C(0x0950ee2cc54d9ea2), UINT64_C(0xb69243b261b25396), UINT64_C(0xddf459afffef73f7), UINT64_C(0xecee7468c2c814b9)) }, { simde_x_mm512_set_epu64(UINT64_C(0xd4967d973e742c64), UINT64_C(0xcb3e880be1980939), UINT64_C(0xc418352686ff3548), UINT64_C(0xdb9cc81b4939caef), UINT64_C(0x99908ab055e14bf0), UINT64_C(0xd01deeb18277fd8f), UINT64_C(0xe1f43dbe1a24fb3a), UINT64_C(0xdaa3b7846091d1be)), simde_x_mm512_set_epu64(UINT64_C(0x475003e212ada19b), UINT64_C(0x490bdb33ee5d5470), UINT64_C(0x61249881556eac3a), UINT64_C(0xbf42ccd4a27e5259), UINT64_C(0x613173560a9ec8e6), UINT64_C(0x8736f836c78d1256), UINT64_C(0xa4248bd9dac1f2cc), UINT64_C(0x7008605d8072d787)), simde_x_mm512_set_epu64(UINT64_C(0x93c67e752cd98dff), UINT64_C(0x823553380fc55d49), UINT64_C(0xa53cada7d3919972), UINT64_C(0x64de04cfeb4798b6), UINT64_C(0xf8a1f9e65f7f8316), UINT64_C(0x572b168745faefd9), UINT64_C(0x45d0b667c0e509f6), UINT64_C(0xaaabd7d9e0e30639)) }, { simde_x_mm512_set_epu64(UINT64_C(0xc6500379d74d1915), UINT64_C(0x2deb735fa56e277e), UINT64_C(0xc2e0f463b67c41f4), UINT64_C(0x8f539a5e01d0c88f), UINT64_C(0x68e4935ea747c9c2), UINT64_C(0xdc21f9b373f8b465), UINT64_C(0xf3592239b25cb40f), UINT64_C(0xf4139e2d72ff74c8)), simde_x_mm512_set_epu64(UINT64_C(0x7e1193710ce44a9c), UINT64_C(0x253a368d6b9cc286), UINT64_C(0x310c01bdff0560df), UINT64_C(0xe73fc91eec559d39), UINT64_C(0xc18711aa058fbe1a), UINT64_C(0x80fe26999b91720a), UINT64_C(0x12959cadf8f60c1b), UINT64_C(0xae1ad9214abbd4ef)), simde_x_mm512_set_epu64(UINT64_C(0xb8419008dba95389), UINT64_C(0x08d145d2cef2e5f8), UINT64_C(0xf3ecf5de4979212b), UINT64_C(0x686c5340ed8555b6), UINT64_C(0xa96382f4a2c877d8), UINT64_C(0x5cdfdf2ae869c66f), UINT64_C(0xe1ccbe944aaab814), UINT64_C(0x5a09470c3844a027)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_xor_si512(test_vec[i].a, test_vec[i].b); simde_assert_m512i_u64(r, ==, test_vec[i].r); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_xor_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_xor_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_xor_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_xor_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_xor_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_xor_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_xor_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_xor_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_xor_si512) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/avx512/xorsign.c000066400000000000000000000511561400333146700171210ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #define SIMDE_TEST_X86_AVX512_INSN xorsign #include #include static int test_simde_x_mm512_xorsign_ps (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde_float32 a[16]; const simde_float32 b[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 171.14), SIMDE_FLOAT32_C( -720.88), SIMDE_FLOAT32_C( 91.20), SIMDE_FLOAT32_C( -603.00), SIMDE_FLOAT32_C( 989.54), SIMDE_FLOAT32_C( -571.77), SIMDE_FLOAT32_C( 86.22), SIMDE_FLOAT32_C( -501.69), SIMDE_FLOAT32_C( 265.51), SIMDE_FLOAT32_C( 768.92), SIMDE_FLOAT32_C( -453.43), SIMDE_FLOAT32_C( -713.07), SIMDE_FLOAT32_C( 899.54), SIMDE_FLOAT32_C( -39.26), SIMDE_FLOAT32_C( -227.05), SIMDE_FLOAT32_C( 101.70) }, { SIMDE_FLOAT32_C( 235.94), SIMDE_FLOAT32_C( 354.55), SIMDE_FLOAT32_C( 653.88), SIMDE_FLOAT32_C( 954.61), SIMDE_FLOAT32_C( 287.59), SIMDE_FLOAT32_C( -584.93), SIMDE_FLOAT32_C( 648.35), SIMDE_FLOAT32_C( -109.54), SIMDE_FLOAT32_C( 674.70), SIMDE_FLOAT32_C( 116.20), SIMDE_FLOAT32_C( 333.77), SIMDE_FLOAT32_C( -680.08), SIMDE_FLOAT32_C( -22.25), SIMDE_FLOAT32_C( -768.22), SIMDE_FLOAT32_C( 768.15), SIMDE_FLOAT32_C( -851.11) }, { SIMDE_FLOAT32_C( 171.14), SIMDE_FLOAT32_C( -720.88), SIMDE_FLOAT32_C( 91.20), SIMDE_FLOAT32_C( -603.00), SIMDE_FLOAT32_C( 989.54), SIMDE_FLOAT32_C( 571.77), SIMDE_FLOAT32_C( 86.22), SIMDE_FLOAT32_C( 501.69), SIMDE_FLOAT32_C( 265.51), SIMDE_FLOAT32_C( 768.92), SIMDE_FLOAT32_C( -453.43), SIMDE_FLOAT32_C( 713.07), SIMDE_FLOAT32_C( -899.54), SIMDE_FLOAT32_C( 39.26), SIMDE_FLOAT32_C( -227.05), SIMDE_FLOAT32_C( -101.70) } }, { { SIMDE_FLOAT32_C( -489.10), SIMDE_FLOAT32_C( -140.65), SIMDE_FLOAT32_C( -454.11), SIMDE_FLOAT32_C( -499.56), SIMDE_FLOAT32_C( 287.59), SIMDE_FLOAT32_C( 632.11), SIMDE_FLOAT32_C( -1.25), SIMDE_FLOAT32_C( -446.90), SIMDE_FLOAT32_C( 401.02), SIMDE_FLOAT32_C( 545.32), SIMDE_FLOAT32_C( -159.97), SIMDE_FLOAT32_C( 300.56), SIMDE_FLOAT32_C( -493.93), SIMDE_FLOAT32_C( 612.97), SIMDE_FLOAT32_C( -597.74), SIMDE_FLOAT32_C( 742.01) }, { SIMDE_FLOAT32_C( -32.48), SIMDE_FLOAT32_C( -943.85), SIMDE_FLOAT32_C( 696.62), SIMDE_FLOAT32_C( -744.89), SIMDE_FLOAT32_C( -528.78), SIMDE_FLOAT32_C( 344.97), SIMDE_FLOAT32_C( 145.58), SIMDE_FLOAT32_C( -854.08), SIMDE_FLOAT32_C( -538.83), SIMDE_FLOAT32_C( -520.66), SIMDE_FLOAT32_C( -534.16), SIMDE_FLOAT32_C( 438.92), SIMDE_FLOAT32_C( -288.87), SIMDE_FLOAT32_C( -766.01), SIMDE_FLOAT32_C( 587.81), SIMDE_FLOAT32_C( 222.03) }, { SIMDE_FLOAT32_C( 489.10), SIMDE_FLOAT32_C( 140.65), SIMDE_FLOAT32_C( -454.11), SIMDE_FLOAT32_C( 499.56), SIMDE_FLOAT32_C( -287.59), SIMDE_FLOAT32_C( 632.11), SIMDE_FLOAT32_C( -1.25), SIMDE_FLOAT32_C( 446.90), SIMDE_FLOAT32_C( -401.02), SIMDE_FLOAT32_C( -545.32), SIMDE_FLOAT32_C( 159.97), SIMDE_FLOAT32_C( 300.56), SIMDE_FLOAT32_C( 493.93), SIMDE_FLOAT32_C( -612.97), SIMDE_FLOAT32_C( -597.74), SIMDE_FLOAT32_C( 742.01) } }, { { SIMDE_FLOAT32_C( 93.34), SIMDE_FLOAT32_C( -866.30), SIMDE_FLOAT32_C( 722.47), SIMDE_FLOAT32_C( -619.07), SIMDE_FLOAT32_C( 765.80), SIMDE_FLOAT32_C( -278.78), SIMDE_FLOAT32_C( -65.97), SIMDE_FLOAT32_C( 166.82), SIMDE_FLOAT32_C( -733.46), SIMDE_FLOAT32_C( 774.05), SIMDE_FLOAT32_C( -532.61), SIMDE_FLOAT32_C( -227.39), SIMDE_FLOAT32_C( 387.03), SIMDE_FLOAT32_C( -130.35), SIMDE_FLOAT32_C( -485.39), SIMDE_FLOAT32_C( -645.45) }, { SIMDE_FLOAT32_C( -74.20), SIMDE_FLOAT32_C( -788.76), SIMDE_FLOAT32_C( -390.34), SIMDE_FLOAT32_C( 397.02), SIMDE_FLOAT32_C( 556.21), SIMDE_FLOAT32_C( 755.24), SIMDE_FLOAT32_C( 542.94), SIMDE_FLOAT32_C( -982.62), SIMDE_FLOAT32_C( -765.42), SIMDE_FLOAT32_C( -991.23), SIMDE_FLOAT32_C( 456.30), SIMDE_FLOAT32_C( -54.29), SIMDE_FLOAT32_C( -757.24), SIMDE_FLOAT32_C( 44.10), SIMDE_FLOAT32_C( -832.27), SIMDE_FLOAT32_C( 336.11) }, { SIMDE_FLOAT32_C( -93.34), SIMDE_FLOAT32_C( 866.30), SIMDE_FLOAT32_C( -722.47), SIMDE_FLOAT32_C( -619.07), SIMDE_FLOAT32_C( 765.80), SIMDE_FLOAT32_C( -278.78), SIMDE_FLOAT32_C( -65.97), SIMDE_FLOAT32_C( -166.82), SIMDE_FLOAT32_C( 733.46), SIMDE_FLOAT32_C( -774.05), SIMDE_FLOAT32_C( -532.61), SIMDE_FLOAT32_C( 227.39), SIMDE_FLOAT32_C( -387.03), SIMDE_FLOAT32_C( -130.35), SIMDE_FLOAT32_C( 485.39), SIMDE_FLOAT32_C( -645.45) } }, { { SIMDE_FLOAT32_C( 177.80), SIMDE_FLOAT32_C( 890.20), SIMDE_FLOAT32_C( 717.03), SIMDE_FLOAT32_C( -56.40), SIMDE_FLOAT32_C( -388.58), SIMDE_FLOAT32_C( -348.94), SIMDE_FLOAT32_C( -889.57), SIMDE_FLOAT32_C( -122.04), SIMDE_FLOAT32_C( -574.89), SIMDE_FLOAT32_C( -422.18), SIMDE_FLOAT32_C( 650.57), SIMDE_FLOAT32_C( 812.14), SIMDE_FLOAT32_C( 447.47), SIMDE_FLOAT32_C( -834.82), SIMDE_FLOAT32_C( -833.31), SIMDE_FLOAT32_C( -626.73) }, { SIMDE_FLOAT32_C( -623.58), SIMDE_FLOAT32_C( -223.65), SIMDE_FLOAT32_C( 770.29), SIMDE_FLOAT32_C( 932.63), SIMDE_FLOAT32_C( -468.42), SIMDE_FLOAT32_C( 313.23), SIMDE_FLOAT32_C( 950.00), SIMDE_FLOAT32_C( -233.83), SIMDE_FLOAT32_C( 322.00), SIMDE_FLOAT32_C( 406.30), SIMDE_FLOAT32_C( 711.87), SIMDE_FLOAT32_C( 564.77), SIMDE_FLOAT32_C( -549.60), SIMDE_FLOAT32_C( 879.61), SIMDE_FLOAT32_C( -99.13), SIMDE_FLOAT32_C( 628.20) }, { SIMDE_FLOAT32_C( -177.80), SIMDE_FLOAT32_C( -890.20), SIMDE_FLOAT32_C( 717.03), SIMDE_FLOAT32_C( -56.40), SIMDE_FLOAT32_C( 388.58), SIMDE_FLOAT32_C( -348.94), SIMDE_FLOAT32_C( -889.57), SIMDE_FLOAT32_C( 122.04), SIMDE_FLOAT32_C( -574.89), SIMDE_FLOAT32_C( -422.18), SIMDE_FLOAT32_C( 650.57), SIMDE_FLOAT32_C( 812.14), SIMDE_FLOAT32_C( -447.47), SIMDE_FLOAT32_C( -834.82), SIMDE_FLOAT32_C( 833.31), SIMDE_FLOAT32_C( -626.73) } }, { { SIMDE_FLOAT32_C( 769.80), SIMDE_FLOAT32_C( -382.09), SIMDE_FLOAT32_C( -428.19), SIMDE_FLOAT32_C( -618.78), SIMDE_FLOAT32_C( 268.96), SIMDE_FLOAT32_C( -317.77), SIMDE_FLOAT32_C( 259.19), SIMDE_FLOAT32_C( 694.07), SIMDE_FLOAT32_C( 260.05), SIMDE_FLOAT32_C( -90.25), SIMDE_FLOAT32_C( 506.21), SIMDE_FLOAT32_C( -292.48), SIMDE_FLOAT32_C( 74.94), SIMDE_FLOAT32_C( 672.90), SIMDE_FLOAT32_C( 80.79), SIMDE_FLOAT32_C( 451.36) }, { SIMDE_FLOAT32_C( -550.76), SIMDE_FLOAT32_C( -148.92), SIMDE_FLOAT32_C( 383.98), SIMDE_FLOAT32_C( -19.17), SIMDE_FLOAT32_C( -835.69), SIMDE_FLOAT32_C( 333.99), SIMDE_FLOAT32_C( 746.99), SIMDE_FLOAT32_C( 486.31), SIMDE_FLOAT32_C( -259.72), SIMDE_FLOAT32_C( 458.87), SIMDE_FLOAT32_C( 51.08), SIMDE_FLOAT32_C( 190.69), SIMDE_FLOAT32_C( 338.47), SIMDE_FLOAT32_C( 951.96), SIMDE_FLOAT32_C( -181.11), SIMDE_FLOAT32_C( 108.28) }, { SIMDE_FLOAT32_C( -769.80), SIMDE_FLOAT32_C( 382.09), SIMDE_FLOAT32_C( -428.19), SIMDE_FLOAT32_C( 618.78), SIMDE_FLOAT32_C( -268.96), SIMDE_FLOAT32_C( -317.77), SIMDE_FLOAT32_C( 259.19), SIMDE_FLOAT32_C( 694.07), SIMDE_FLOAT32_C( -260.05), SIMDE_FLOAT32_C( -90.25), SIMDE_FLOAT32_C( 506.21), SIMDE_FLOAT32_C( -292.48), SIMDE_FLOAT32_C( 74.94), SIMDE_FLOAT32_C( 672.90), SIMDE_FLOAT32_C( -80.79), SIMDE_FLOAT32_C( 451.36) } }, { { SIMDE_FLOAT32_C( -430.14), SIMDE_FLOAT32_C( 390.70), SIMDE_FLOAT32_C( 489.50), SIMDE_FLOAT32_C( 838.83), SIMDE_FLOAT32_C( -927.07), SIMDE_FLOAT32_C( -251.31), SIMDE_FLOAT32_C( 532.90), SIMDE_FLOAT32_C( 332.98), SIMDE_FLOAT32_C( 658.44), SIMDE_FLOAT32_C( 39.11), SIMDE_FLOAT32_C( -959.50), SIMDE_FLOAT32_C( -266.62), SIMDE_FLOAT32_C( -287.99), SIMDE_FLOAT32_C( 121.29), SIMDE_FLOAT32_C( -815.27), SIMDE_FLOAT32_C( 161.25) }, { SIMDE_FLOAT32_C( 972.37), SIMDE_FLOAT32_C( 568.72), SIMDE_FLOAT32_C( -857.92), SIMDE_FLOAT32_C( -863.32), SIMDE_FLOAT32_C( -97.30), SIMDE_FLOAT32_C( 889.08), SIMDE_FLOAT32_C( 623.00), SIMDE_FLOAT32_C( 642.99), SIMDE_FLOAT32_C( 347.94), SIMDE_FLOAT32_C( -325.92), SIMDE_FLOAT32_C( -166.32), SIMDE_FLOAT32_C( -313.58), SIMDE_FLOAT32_C( -373.97), SIMDE_FLOAT32_C( 652.57), SIMDE_FLOAT32_C( 794.69), SIMDE_FLOAT32_C( 195.90) }, { SIMDE_FLOAT32_C( -430.14), SIMDE_FLOAT32_C( 390.70), SIMDE_FLOAT32_C( -489.50), SIMDE_FLOAT32_C( -838.83), SIMDE_FLOAT32_C( 927.07), SIMDE_FLOAT32_C( -251.31), SIMDE_FLOAT32_C( 532.90), SIMDE_FLOAT32_C( 332.98), SIMDE_FLOAT32_C( 658.44), SIMDE_FLOAT32_C( -39.11), SIMDE_FLOAT32_C( 959.50), SIMDE_FLOAT32_C( 266.62), SIMDE_FLOAT32_C( 287.99), SIMDE_FLOAT32_C( 121.29), SIMDE_FLOAT32_C( -815.27), SIMDE_FLOAT32_C( 161.25) } }, { { SIMDE_FLOAT32_C( 43.27), SIMDE_FLOAT32_C( 284.19), SIMDE_FLOAT32_C( 34.72), SIMDE_FLOAT32_C( 116.20), SIMDE_FLOAT32_C( -967.12), SIMDE_FLOAT32_C( -432.37), SIMDE_FLOAT32_C( -550.81), SIMDE_FLOAT32_C( 691.32), SIMDE_FLOAT32_C( 606.74), SIMDE_FLOAT32_C( -510.31), SIMDE_FLOAT32_C( -575.30), SIMDE_FLOAT32_C( -681.25), SIMDE_FLOAT32_C( 610.98), SIMDE_FLOAT32_C( -390.57), SIMDE_FLOAT32_C( 480.00), SIMDE_FLOAT32_C( 583.36) }, { SIMDE_FLOAT32_C( -821.85), SIMDE_FLOAT32_C( 622.08), SIMDE_FLOAT32_C( 720.04), SIMDE_FLOAT32_C( 80.86), SIMDE_FLOAT32_C( 511.16), SIMDE_FLOAT32_C( 343.03), SIMDE_FLOAT32_C( -276.16), SIMDE_FLOAT32_C( -140.90), SIMDE_FLOAT32_C( -982.89), SIMDE_FLOAT32_C( 557.52), SIMDE_FLOAT32_C( 545.52), SIMDE_FLOAT32_C( -356.85), SIMDE_FLOAT32_C( 210.09), SIMDE_FLOAT32_C( 340.21), SIMDE_FLOAT32_C( 839.04), SIMDE_FLOAT32_C( -746.64) }, { SIMDE_FLOAT32_C( -43.27), SIMDE_FLOAT32_C( 284.19), SIMDE_FLOAT32_C( 34.72), SIMDE_FLOAT32_C( 116.20), SIMDE_FLOAT32_C( -967.12), SIMDE_FLOAT32_C( -432.37), SIMDE_FLOAT32_C( 550.81), SIMDE_FLOAT32_C( -691.32), SIMDE_FLOAT32_C( -606.74), SIMDE_FLOAT32_C( -510.31), SIMDE_FLOAT32_C( -575.30), SIMDE_FLOAT32_C( 681.25), SIMDE_FLOAT32_C( 610.98), SIMDE_FLOAT32_C( -390.57), SIMDE_FLOAT32_C( 480.00), SIMDE_FLOAT32_C( -583.36) } }, { { SIMDE_FLOAT32_C( -375.59), SIMDE_FLOAT32_C( -126.23), SIMDE_FLOAT32_C( 369.56), SIMDE_FLOAT32_C( -342.71), SIMDE_FLOAT32_C( 441.39), SIMDE_FLOAT32_C( 818.74), SIMDE_FLOAT32_C( -651.39), SIMDE_FLOAT32_C( 48.13), SIMDE_FLOAT32_C( -691.57), SIMDE_FLOAT32_C( -226.69), SIMDE_FLOAT32_C( 366.88), SIMDE_FLOAT32_C( 919.42), SIMDE_FLOAT32_C( 382.74), SIMDE_FLOAT32_C( -153.11), SIMDE_FLOAT32_C( 502.77), SIMDE_FLOAT32_C( 560.89) }, { SIMDE_FLOAT32_C( -531.03), SIMDE_FLOAT32_C( 222.81), SIMDE_FLOAT32_C( -358.25), SIMDE_FLOAT32_C( 980.13), SIMDE_FLOAT32_C( -434.15), SIMDE_FLOAT32_C( 365.59), SIMDE_FLOAT32_C( -160.76), SIMDE_FLOAT32_C( -417.04), SIMDE_FLOAT32_C( -76.89), SIMDE_FLOAT32_C( -615.25), SIMDE_FLOAT32_C( 226.10), SIMDE_FLOAT32_C( -866.80), SIMDE_FLOAT32_C( 724.97), SIMDE_FLOAT32_C( 65.15), SIMDE_FLOAT32_C( -613.44), SIMDE_FLOAT32_C( -650.63) }, { SIMDE_FLOAT32_C( 375.59), SIMDE_FLOAT32_C( -126.23), SIMDE_FLOAT32_C( -369.56), SIMDE_FLOAT32_C( -342.71), SIMDE_FLOAT32_C( -441.39), SIMDE_FLOAT32_C( 818.74), SIMDE_FLOAT32_C( 651.39), SIMDE_FLOAT32_C( -48.13), SIMDE_FLOAT32_C( 691.57), SIMDE_FLOAT32_C( 226.69), SIMDE_FLOAT32_C( 366.88), SIMDE_FLOAT32_C( -919.42), SIMDE_FLOAT32_C( 382.74), SIMDE_FLOAT32_C( -153.11), SIMDE_FLOAT32_C( -502.77), SIMDE_FLOAT32_C( -560.89) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 r = simde_x_mm512_xorsign_ps(a, b); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512 a = simde_test_x86_random_f32x16(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m512 b = simde_test_x86_random_f32x16(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m512 r = simde_x_mm512_xorsign_ps(a, b); simde_test_x86_write_f32x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f32x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_x_mm512_xorsign_pd (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde_float64 a[8]; const simde_float64 b[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 938.91), SIMDE_FLOAT64_C( 756.12), SIMDE_FLOAT64_C( 6.66), SIMDE_FLOAT64_C( 380.31), SIMDE_FLOAT64_C( 574.86), SIMDE_FLOAT64_C( 355.27), SIMDE_FLOAT64_C( -571.56), SIMDE_FLOAT64_C( 883.30) }, { SIMDE_FLOAT64_C( -871.43), SIMDE_FLOAT64_C( 795.32), SIMDE_FLOAT64_C( 802.72), SIMDE_FLOAT64_C( 511.32), SIMDE_FLOAT64_C( -357.79), SIMDE_FLOAT64_C( 305.49), SIMDE_FLOAT64_C( 72.21), SIMDE_FLOAT64_C( 111.18) }, { SIMDE_FLOAT64_C( -938.91), SIMDE_FLOAT64_C( 756.12), SIMDE_FLOAT64_C( 6.66), SIMDE_FLOAT64_C( 380.31), SIMDE_FLOAT64_C( -574.86), SIMDE_FLOAT64_C( 355.27), SIMDE_FLOAT64_C( -571.56), SIMDE_FLOAT64_C( 883.30) } }, { { SIMDE_FLOAT64_C( -471.70), SIMDE_FLOAT64_C( 713.96), SIMDE_FLOAT64_C( 91.31), SIMDE_FLOAT64_C( 94.15), SIMDE_FLOAT64_C( 79.55), SIMDE_FLOAT64_C( 930.55), SIMDE_FLOAT64_C( 677.11), SIMDE_FLOAT64_C( -997.33) }, { SIMDE_FLOAT64_C( -684.70), SIMDE_FLOAT64_C( -96.79), SIMDE_FLOAT64_C( -864.13), SIMDE_FLOAT64_C( -959.73), SIMDE_FLOAT64_C( 968.36), SIMDE_FLOAT64_C( -477.57), SIMDE_FLOAT64_C( -610.36), SIMDE_FLOAT64_C( 907.27) }, { SIMDE_FLOAT64_C( 471.70), SIMDE_FLOAT64_C( -713.96), SIMDE_FLOAT64_C( -91.31), SIMDE_FLOAT64_C( -94.15), SIMDE_FLOAT64_C( 79.55), SIMDE_FLOAT64_C( -930.55), SIMDE_FLOAT64_C( -677.11), SIMDE_FLOAT64_C( -997.33) } }, { { SIMDE_FLOAT64_C( -721.46), SIMDE_FLOAT64_C( 396.30), SIMDE_FLOAT64_C( 287.57), SIMDE_FLOAT64_C( 853.41), SIMDE_FLOAT64_C( -248.43), SIMDE_FLOAT64_C( 716.01), SIMDE_FLOAT64_C( 736.70), SIMDE_FLOAT64_C( -119.85) }, { SIMDE_FLOAT64_C( 511.33), SIMDE_FLOAT64_C( 539.42), SIMDE_FLOAT64_C( -608.54), SIMDE_FLOAT64_C( -846.46), SIMDE_FLOAT64_C( -155.09), SIMDE_FLOAT64_C( 463.67), SIMDE_FLOAT64_C( 264.73), SIMDE_FLOAT64_C( 373.21) }, { SIMDE_FLOAT64_C( -721.46), SIMDE_FLOAT64_C( 396.30), SIMDE_FLOAT64_C( -287.57), SIMDE_FLOAT64_C( -853.41), SIMDE_FLOAT64_C( 248.43), SIMDE_FLOAT64_C( 716.01), SIMDE_FLOAT64_C( 736.70), SIMDE_FLOAT64_C( -119.85) } }, { { SIMDE_FLOAT64_C( 177.63), SIMDE_FLOAT64_C( -643.96), SIMDE_FLOAT64_C( -532.64), SIMDE_FLOAT64_C( -742.82), SIMDE_FLOAT64_C( -713.41), SIMDE_FLOAT64_C( -855.54), SIMDE_FLOAT64_C( -740.15), SIMDE_FLOAT64_C( -398.11) }, { SIMDE_FLOAT64_C( 47.67), SIMDE_FLOAT64_C( -604.28), SIMDE_FLOAT64_C( -357.84), SIMDE_FLOAT64_C( 16.03), SIMDE_FLOAT64_C( -81.86), SIMDE_FLOAT64_C( 31.80), SIMDE_FLOAT64_C( -76.70), SIMDE_FLOAT64_C( 196.69) }, { SIMDE_FLOAT64_C( 177.63), SIMDE_FLOAT64_C( 643.96), SIMDE_FLOAT64_C( 532.64), SIMDE_FLOAT64_C( -742.82), SIMDE_FLOAT64_C( 713.41), SIMDE_FLOAT64_C( -855.54), SIMDE_FLOAT64_C( 740.15), SIMDE_FLOAT64_C( -398.11) } }, { { SIMDE_FLOAT64_C( -571.89), SIMDE_FLOAT64_C( -789.13), SIMDE_FLOAT64_C( 50.09), SIMDE_FLOAT64_C( 179.68), SIMDE_FLOAT64_C( 926.88), SIMDE_FLOAT64_C( -213.20), SIMDE_FLOAT64_C( -940.18), SIMDE_FLOAT64_C( 438.22) }, { SIMDE_FLOAT64_C( -673.78), SIMDE_FLOAT64_C( -548.71), SIMDE_FLOAT64_C( 591.76), SIMDE_FLOAT64_C( 171.13), SIMDE_FLOAT64_C( 914.96), SIMDE_FLOAT64_C( -143.51), SIMDE_FLOAT64_C( -455.66), SIMDE_FLOAT64_C( 92.59) }, { SIMDE_FLOAT64_C( 571.89), SIMDE_FLOAT64_C( 789.13), SIMDE_FLOAT64_C( 50.09), SIMDE_FLOAT64_C( 179.68), SIMDE_FLOAT64_C( 926.88), SIMDE_FLOAT64_C( 213.20), SIMDE_FLOAT64_C( 940.18), SIMDE_FLOAT64_C( 438.22) } }, { { SIMDE_FLOAT64_C( 212.52), SIMDE_FLOAT64_C( 11.70), SIMDE_FLOAT64_C( 349.77), SIMDE_FLOAT64_C( 499.11), SIMDE_FLOAT64_C( 156.16), SIMDE_FLOAT64_C( 609.62), SIMDE_FLOAT64_C( -899.00), SIMDE_FLOAT64_C( -796.17) }, { SIMDE_FLOAT64_C( -994.66), SIMDE_FLOAT64_C( -256.84), SIMDE_FLOAT64_C( 219.86), SIMDE_FLOAT64_C( -76.52), SIMDE_FLOAT64_C( 774.97), SIMDE_FLOAT64_C( -856.84), SIMDE_FLOAT64_C( -879.83), SIMDE_FLOAT64_C( -796.92) }, { SIMDE_FLOAT64_C( -212.52), SIMDE_FLOAT64_C( -11.70), SIMDE_FLOAT64_C( 349.77), SIMDE_FLOAT64_C( -499.11), SIMDE_FLOAT64_C( 156.16), SIMDE_FLOAT64_C( -609.62), SIMDE_FLOAT64_C( 899.00), SIMDE_FLOAT64_C( 796.17) } }, { { SIMDE_FLOAT64_C( -645.97), SIMDE_FLOAT64_C( 170.26), SIMDE_FLOAT64_C( 382.75), SIMDE_FLOAT64_C( -719.09), SIMDE_FLOAT64_C( 957.06), SIMDE_FLOAT64_C( 442.58), SIMDE_FLOAT64_C( 719.12), SIMDE_FLOAT64_C( -716.72) }, { SIMDE_FLOAT64_C( 893.87), SIMDE_FLOAT64_C( 310.88), SIMDE_FLOAT64_C( 454.40), SIMDE_FLOAT64_C( 808.82), SIMDE_FLOAT64_C( -832.63), SIMDE_FLOAT64_C( 998.74), SIMDE_FLOAT64_C( -98.58), SIMDE_FLOAT64_C( 379.89) }, { SIMDE_FLOAT64_C( -645.97), SIMDE_FLOAT64_C( 170.26), SIMDE_FLOAT64_C( 382.75), SIMDE_FLOAT64_C( -719.09), SIMDE_FLOAT64_C( -957.06), SIMDE_FLOAT64_C( 442.58), SIMDE_FLOAT64_C( -719.12), SIMDE_FLOAT64_C( -716.72) } }, { { SIMDE_FLOAT64_C( 10.44), SIMDE_FLOAT64_C( -748.81), SIMDE_FLOAT64_C( -120.99), SIMDE_FLOAT64_C( -833.40), SIMDE_FLOAT64_C( 860.81), SIMDE_FLOAT64_C( -19.99), SIMDE_FLOAT64_C( -629.57), SIMDE_FLOAT64_C( 866.15) }, { SIMDE_FLOAT64_C( 723.17), SIMDE_FLOAT64_C( 590.29), SIMDE_FLOAT64_C( -210.37), SIMDE_FLOAT64_C( 498.14), SIMDE_FLOAT64_C( 733.45), SIMDE_FLOAT64_C( -90.20), SIMDE_FLOAT64_C( 701.22), SIMDE_FLOAT64_C( -912.52) }, { SIMDE_FLOAT64_C( 10.44), SIMDE_FLOAT64_C( -748.81), SIMDE_FLOAT64_C( 120.99), SIMDE_FLOAT64_C( -833.40), SIMDE_FLOAT64_C( 860.81), SIMDE_FLOAT64_C( 19.99), SIMDE_FLOAT64_C( -629.57), SIMDE_FLOAT64_C( -866.15) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__m512d r = simde_x_mm512_xorsign_pd(a, b); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512d a = simde_test_x86_random_f64x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m512d b = simde_test_x86_random_f64x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m512d r = simde_x_mm512_xorsign_pd(a, b); simde_test_x86_write_f64x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f64x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(x_mm512_xorsign_ps) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm512_xorsign_pd) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/clmul.c000066400000000000000000000470241400333146700155150ustar00rootroot00000000000000/* Copyright (c) 2020 Evan Nemerson * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define SIMDE_TESTS_CURRENT_ISAX clmul #include #include static int test_simde_x_clmul_u64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint64_t a; const uint64_t b; const uint64_t r; } test_vec[] = { { UINT64_C( 3172393302982392208), UINT64_C(13735374816641287390), UINT64_C( 3846784924301700320) }, { UINT64_C( 2044753197648351232), UINT64_C(17171186745849913133), UINT64_C( 9324690142177808384) }, { UINT64_C(15878177146981999432), UINT64_C(17725848129279761057), UINT64_C(14704261962833067592) }, { UINT64_C(14248111140186106732), UINT64_C(11051947085071581716), UINT64_C( 2350367993186272112) }, { UINT64_C( 3864970927616292810), UINT64_C( 3037243358930395708), UINT64_C( 4601354589070078104) }, { UINT64_C(14194084542956518303), UINT64_C( 5148061316303689350), UINT64_C(18148658662839280834) }, { UINT64_C(10905099652190648717), UINT64_C(16104009427115953300), UINT64_C( 6274968844062237796) }, { UINT64_C( 5809100127999444266), UINT64_C(17936880103521309735), UINT64_C( 9053755385840400022) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint64_t r = simde_x_clmul_u64(test_vec[i].a, test_vec[i].b); simde_assert_equal_u64(r, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { uint64_t a = simde_test_codegen_random_u64(); uint64_t b = simde_test_codegen_random_u64(); uint64_t r = simde_x_clmul_u64(a, b); simde_test_codegen_write_u64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_codegen_write_u64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_u64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_clmulepi64_epi128 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int64_t a[2]; const int64_t b[2]; const int64_t r[2]; } test_vec[] = { { { -INT64_C( 1223702322309085129), -INT64_C( 8879567886794638539) }, { INT64_C( 7036755598991405826), -INT64_C( 7046740992093250117) }, { INT64_C( 2473170614696794478), INT64_C( 2784948867630424147) } }, { { -INT64_C( 1917888373892700613), INT64_C( 8617134813468493061) }, { INT64_C( 304687083854804713), -INT64_C( 9179921235781725292) }, { -INT64_C( 3162169552144031667), INT64_C( 131895041431953025) } }, { { INT64_C( 3393838533477392516), -INT64_C( 6248506160320443813) }, { INT64_C( 2728261705044924814), -INT64_C( 1597447023443835378) }, { -INT64_C( 3677024356011843784), INT64_C( 1792451366425607224) } }, { { INT64_C( 8787708278673885338), -INT64_C( 7121999645218556224) }, { INT64_C( 8458129975209667500), -INT64_C( 2907745100206865833) }, { INT64_C( 4051687636460756032), INT64_C( 7169855552534900382) } }, { { -INT64_C( 4972660111729507483), -INT64_C( 8759108909232666683) }, { -INT64_C( 8220718353488812785), -INT64_C( 5279645577665465434) }, { INT64_C( 4586024153164530195), INT64_C( 6527978219423536898) } }, { { -INT64_C( 6168462655054260815), INT64_C( 1360068410154590078) }, { INT64_C( 7897188145811727061), -INT64_C( 9138636182688571098) }, { INT64_C( 8359736955429931494), INT64_C( 444600807865829376) } }, { { INT64_C( 2157102855485155168), INT64_C( 3622676809066638896) }, { INT64_C( 7434551563239935736), -INT64_C( 1799456849400005194) }, { INT64_C( 7717179827338582336), INT64_C( 747598803828444825) } }, { { -INT64_C( 851711651099167885), -INT64_C( 1001364843725806454) }, { -INT64_C( 2024969269964351683), INT64_C( 4812501714981208922) }, { INT64_C( 3859857334761625956), INT64_C( 4404639048340928077) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b); simde__m128i r; switch(i & 3) { case 0: r = simde_mm_clmulepi64_si128(a, b, 0); break; case 1: r = simde_mm_clmulepi64_si128(a, b, 1); break; case 2: r = simde_mm_clmulepi64_si128(a, b, 16); break; case 3: r = simde_mm_clmulepi64_si128(a, b, 17); break; default: HEDLEY_UNREACHABLE(); } simde_test_x86_assert_equal_i64x2(r, simde_x_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i64x2(); simde__m128i b = simde_test_x86_random_i64x2(); simde__m128i r; switch(i & 3) { case 0: r = simde_mm_clmulepi64_si128(a, b, 0); break; case 1: r = simde_mm_clmulepi64_si128(a, b, 1); break; case 2: r = simde_mm_clmulepi64_si128(a, b, 16); break; case 3: r = simde_mm_clmulepi64_si128(a, b, 17); break; } simde_test_x86_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_clmulepi64_epi128 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int64_t a[4]; const int64_t b[4]; const int64_t r[4]; } test_vec[] = { { { -INT64_C( 8861067646028006915), INT64_C( 8317570772367584624), -INT64_C( 8479226455593734885), INT64_C( 4655311630250670693) }, { -INT64_C( 24828561109850826), -INT64_C( 2740442853538738953), INT64_C( 6245039060616885418), -INT64_C( 5596787430870122544) }, { -INT64_C( 3605505260195947138), INT64_C( 8993753863940455899), -INT64_C( 4734815501727457266), INT64_C( 2980667195098018359) } }, { { -INT64_C( 2798286907617568867), INT64_C( 7052730161524750920), INT64_C( 562435620224459400), INT64_C( 4673170309893549227) }, { INT64_C( 8305365776004329350), INT64_C( 8893019314550579025), -INT64_C( 4324931084003054661), INT64_C( 797917258246198574) }, { INT64_C( 7257084772150149552), INT64_C( 1360287739278401518), -INT64_C( 6290854813130530827), INT64_C( 3504988492777302678) } }, { { INT64_C( 7283082647590633076), -INT64_C( 6790626808863749036), -INT64_C( 6440553767362054461), -INT64_C( 4381007043721983580) }, { -INT64_C( 5656110618526963961), -INT64_C( 8392532913333670143), INT64_C( 5660748388734737265), -INT64_C( 7521386788528191859) }, { INT64_C( 1518845827335983732), INT64_C( 3548116382125449697), -INT64_C( 3695683971048001449), INT64_C( 6487700039540703498) } }, { { INT64_C( 8601371122160828753), -INT64_C( 8973451863074711772), INT64_C( 7996418085041790208), INT64_C( 7649900638574833409) }, { INT64_C( 3951475540046109929), INT64_C( 1829192730767668173), -INT64_C( 3835451934689210762), -INT64_C( 2328776664548635093) }, { INT64_C( 2331280469638464916), INT64_C( 909250411302452898), -INT64_C( 6456576299602598101), INT64_C( 3116995424408104456) } }, { { INT64_C( 624052268936643937), -INT64_C( 2704049405787911352), INT64_C( 2642477159451540759), -INT64_C( 1307707947028576508) }, { INT64_C( 8439474315985951195), INT64_C( 7499143535494825940), INT64_C( 4515407529790423948), INT64_C( 248844939888759290) }, { INT64_C( 9077383510885573499), INT64_C( 260944091473821904), -INT64_C( 1092040103143313820), INT64_C( 519807910688705558) } }, { { -INT64_C( 5562163387622042580), -INT64_C( 9080947942458940096), INT64_C( 3438692059224424769), INT64_C( 3597296407246467522) }, { -INT64_C( 9183020781049018626), -INT64_C( 5154639738173294080), -INT64_C( 1103123323737113012), -INT64_C( 8136941776144800035) }, { -INT64_C( 7404742307908519552), INT64_C( 4664504263945087872), INT64_C( 4990989065081341336), INT64_C( 1275537234475318560) } }, { { -INT64_C( 392381640637440051), -INT64_C( 746357658714333324), -INT64_C( 6916359408935577627), INT64_C( 8191367829375868557) }, { -INT64_C( 8707415128020215756), INT64_C( 7298673085559019947), INT64_C( 8453956785101390063), -INT64_C( 7030048190026754982) }, { INT64_C( 41632217925301279), INT64_C( 2511585804835601581), -INT64_C( 3040775301231111182), INT64_C( 6698050587281837184) } }, { { -INT64_C( 6416513484366942376), INT64_C( 7995090548861593103), INT64_C( 4855289321808227333), -INT64_C( 8263908277331233721) }, { INT64_C( 2469075490216353186), -INT64_C( 8701335448767611566), INT64_C( 1819126401625972497), -INT64_C( 3059321611736088375) }, { -INT64_C( 7112732884917849042), INT64_C( 3926000826017084906), INT64_C( 606846234052846655), INT64_C( 8057559887502386125) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi64(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi64(test_vec[i].b); simde__m256i r; switch(i & 3) { case 0: r = simde_mm256_clmulepi64_epi128(a, b, 0); break; case 1: r = simde_mm256_clmulepi64_epi128(a, b, 1); break; case 2: r = simde_mm256_clmulepi64_epi128(a, b, 16); break; case 3: r = simde_mm256_clmulepi64_epi128(a, b, 17); break; default: HEDLEY_UNREACHABLE(); } simde_test_x86_assert_equal_i64x4(r, simde_x_mm256_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i a = simde_test_x86_random_i64x4(); simde__m256i b = simde_test_x86_random_i64x4(); simde__m256i r; switch(i & 3) { case 0: r = simde_mm256_clmulepi64_epi128(a, b, 0); break; case 1: r = simde_mm256_clmulepi64_epi128(a, b, 1); break; case 2: r = simde_mm256_clmulepi64_epi128(a, b, 16); break; case 3: r = simde_mm256_clmulepi64_epi128(a, b, 17); break; } simde_test_x86_write_i64x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_clmulepi64_epi128 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int64_t a[8]; const int64_t b[8]; const int64_t r[8]; } test_vec[] = { { { -INT64_C( 4508748162316205256), -INT64_C( 3099372905628098829), INT64_C( 1016139251664777007), INT64_C( 4077612542125204877), -INT64_C( 5671456079578199782), -INT64_C( 2619368007630660594), INT64_C( 1145311247069902226), -INT64_C( 6767389031831375652) }, { -INT64_C( 7502866647642040208), -INT64_C( 4910877245634443694), -INT64_C( 8712927427156658812), -INT64_C( 4787193028275804570), -INT64_C( 8992902091992483055), INT64_C( 3365977461916539858), INT64_C( 2612731813066721039), INT64_C( 3547044437640259119) }, { INT64_C( 7408876882598746752), INT64_C( 7975566437154259906), -INT64_C( 7050998394792826820), INT64_C( 515545422847956525), INT64_C( 480365564844410554), INT64_C( 6443112411602834649), INT64_C( 8842248389601172078), INT64_C( 126356363133991767) } }, { { -INT64_C( 4462225071124306595), -INT64_C( 112462523525131112), INT64_C( 2133857325112992955), -INT64_C( 5840373422947127646), INT64_C( 6101057788694085368), -INT64_C( 7507491933819344413), INT64_C( 2789888179682970027), INT64_C( 2114961351064965467) }, { -INT64_C( 3915684273188333928), -INT64_C( 1040776515624944102), -INT64_C( 6440357149699039500), -INT64_C( 372484428694190190), -INT64_C( 6417076916505879999), -INT64_C( 1410985712041489355), -INT64_C( 3838468523262871862), -INT64_C( 438725240223101812) }, { -INT64_C( 4547041686557986496), INT64_C( 5174280718535161140), -INT64_C( 4052619716788453016), INT64_C( 4691236899805083089), INT64_C( 2621818318605369123), INT64_C( 6491826105943833570), INT64_C( 6742861852404586798), INT64_C( 659489253161718155) } }, { { INT64_C( 7160742299907929903), -INT64_C( 900076532547060322), INT64_C( 8737198701753722943), INT64_C( 5008091942838911439), INT64_C( 5771750718125940872), INT64_C( 9185769016342804318), INT64_C( 1492721472516751952), -INT64_C( 553068161141104649) }, { INT64_C( 1350737615534379964), INT64_C( 6736843039141635263), -INT64_C( 6778390594892172139), INT64_C( 7050670702599309936), INT64_C( 1160958178873018454), INT64_C( 6745141857603269937), -INT64_C( 626802408982891262), INT64_C( 6248657622998252976) }, { -INT64_C( 1487980720625885147), INT64_C( 2020763090915505301), -INT64_C( 3819958634599510576), INT64_C( 1236167621747222011), -INT64_C( 2121305814586519288), INT64_C( 1348880127149281018), INT64_C( 5474740827809109760), INT64_C( 294120293277450556) } }, { { INT64_C( 5015177187211445817), INT64_C( 4132367358880885043), INT64_C( 7409040097932578900), -INT64_C( 7744375673736877521), -INT64_C( 7542483310570706951), INT64_C( 5319011699478746720), INT64_C( 1700661811459962236), -INT64_C( 4426530695618800223) }, { -INT64_C( 4057705442362544204), -INT64_C( 6109362486595664903), -INT64_C( 944497729116152860), -INT64_C( 6027926296062907443), INT64_C( 4853882362087614255), -INT64_C( 3561668997202986057), -INT64_C( 5330150999214821181), -INT64_C( 6535105179831961942) }, { INT64_C( 6360605561114465723), INT64_C( 1874229146988567910), -INT64_C( 7830980124631947605), INT64_C( 6777172427082462138), INT64_C( 6513423505254783264), INT64_C( 3838992877777853549), INT64_C( 3476875994488591594), INT64_C( 8855849316839725126) } }, { { INT64_C( 4492507234734576331), -INT64_C( 5473360150391322286), -INT64_C( 3546918803188636377), -INT64_C( 9087772883149647162), INT64_C( 330715171927825361), -INT64_C( 7341065912809636999), INT64_C( 9143275604432054512), -INT64_C( 2037426853844986750) }, { -INT64_C( 5085177230146187082), -INT64_C( 1613903879242273139), INT64_C( 3918380942342636586), INT64_C( 7794648714629225014), -INT64_C( 4075403372112025736), INT64_C( 131556394091910384), -INT64_C( 1616998204104715867), -INT64_C( 7169627097723110049) }, { -INT64_C( 3555569070185312534), INT64_C( 1929572009916099035), -INT64_C( 6395368968046142570), INT64_C( 1709120932256690080), INT64_C( 2523358017318337272), INT64_C( 247934416460539433), INT64_C( 244155990319815984), INT64_C( 3215872428597678272) } }, { { -INT64_C( 8590961978253224507), -INT64_C( 7840127111964488358), -INT64_C( 4993511927647037930), INT64_C( 664649571826066808), -INT64_C( 5925102877851218524), -INT64_C( 7864392723294439589), -INT64_C( 4955067607878416345), -INT64_C( 125379126724457739) }, { INT64_C( 8295348044106975774), INT64_C( 3694526249633119286), -INT64_C( 6386401476306689696), INT64_C( 7229159512343623164), -INT64_C( 4253663533883617684), -INT64_C( 6111460234302671889), -INT64_C( 3263512319597026211), INT64_C( 2525248044784915809) }, { INT64_C( 8089940058166097068), INT64_C( 4477169705139521861), -INT64_C( 1959136724352530176), INT64_C( 398152220925199002), -INT64_C( 2261768737661003756), INT64_C( 8063510882445894936), INT64_C( 8314094987896017049), INT64_C( 5675994356149893427) } }, { { -INT64_C( 4442063450844700663), -INT64_C( 1637078751344930479), -INT64_C( 6667107275842742831), INT64_C( 2674669911162921283), -INT64_C( 5346874038663002826), INT64_C( 5286550275917777639), INT64_C( 4977438571891807535), -INT64_C( 1563770842758750709) }, { INT64_C( 305612682310190883), INT64_C( 6272622752075124555), INT64_C( 8174837505515387946), INT64_C( 3735986652260643385), -INT64_C( 2806032233167860500), -INT64_C( 8860928630086400377), -INT64_C( 1982670522702245980), -INT64_C( 1938331273838651947) }, { -INT64_C( 5520935947472743149), INT64_C( 4537885097490984960), INT64_C( 8936024623814528617), INT64_C( 2290930245258479633), -INT64_C( 3134810519185361790), INT64_C( 6379269477255759439), INT64_C( 7072358411685850147), INT64_C( 4195183069244768317) } }, { { INT64_C( 6042983852705567862), -INT64_C( 4223521988775085949), INT64_C( 965824450707592408), INT64_C( 6359096840191948897), -INT64_C( 5764827233489571872), -INT64_C( 2047185047042326096), INT64_C( 1462207194453140997), INT64_C( 2972568486031604998) }, { -INT64_C( 8882271837156434977), INT64_C( 4522998559612946646), -INT64_C( 137423594610545839), INT64_C( 1546465536708609539), -INT64_C( 713298132974154814), -INT64_C( 6153153657742856668), -INT64_C( 2396541879086868794), -INT64_C( 8316000295776967604) }, { INT64_C( 6427478446298047098), INT64_C( 1161562776606732481), INT64_C( 4521385688827072163), INT64_C( 334400680930988147), INT64_C( 2179120516870574272), INT64_C( 7717920184672985687), -INT64_C( 7275182691465067096), INT64_C( 1538449054100258674) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r; switch(i & 3) { case 0: r = simde_mm512_clmulepi64_epi128(a, b, 0); break; case 1: r = simde_mm512_clmulepi64_epi128(a, b, 1); break; case 2: r = simde_mm512_clmulepi64_epi128(a, b, 16); break; case 3: r = simde_mm512_clmulepi64_epi128(a, b, 17); break; default: HEDLEY_UNREACHABLE(); } simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i a = simde_test_x86_random_i64x8(); simde__m512i b = simde_test_x86_random_i64x8(); simde__m512i r; switch(i & 3) { case 0: r = simde_mm512_clmulepi64_epi128(a, b, 0); break; case 1: r = simde_mm512_clmulepi64_epi128(a, b, 1); break; case 2: r = simde_mm512_clmulepi64_epi128(a, b, 16); break; case 3: r = simde_mm512_clmulepi64_epi128(a, b, 17); break; } simde_test_x86_write_i64x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(x_clmul_u64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_clmulepi64_epi128) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_clmulepi64_epi128) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_clmulepi64_epi128) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/declare-suites.h000066400000000000000000000006631400333146700173150ustar00rootroot00000000000000SIMDE_TEST_DECLARE_SUITE(mmx) SIMDE_TEST_DECLARE_SUITE(sse) SIMDE_TEST_DECLARE_SUITE(sse2) SIMDE_TEST_DECLARE_SUITE(sse3) SIMDE_TEST_DECLARE_SUITE(ssse3) SIMDE_TEST_DECLARE_SUITE(sse4_1) SIMDE_TEST_DECLARE_SUITE(sse4_2) SIMDE_TEST_DECLARE_SUITE(avx) SIMDE_TEST_DECLARE_SUITE(avx2) SIMDE_TEST_DECLARE_SUITE(fma) SIMDE_TEST_DECLARE_SUITE(gfni) SIMDE_TEST_DECLARE_SUITE(clmul) SIMDE_TEST_DECLARE_SUITE(svml) SIMDE_TEST_DECLARE_SUITE(xop) simde-0.7.2/test/x86/fma.c000066400000000000000000005256361400333146700151560ustar00rootroot00000000000000/* Copyright (c) 2019 Evan Nemerson * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define SIMDE_TESTS_CURRENT_ISAX fma #include #include static int test_simde_mm_fmadd_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d c; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -364.57), SIMDE_FLOAT64_C( -702.81)), simde_mm_set_pd(SIMDE_FLOAT64_C( -890.80), SIMDE_FLOAT64_C( -433.89)), simde_mm_set_pd(SIMDE_FLOAT64_C( 486.26), SIMDE_FLOAT64_C( -304.02)), simde_mm_set_pd(SIMDE_FLOAT64_C(325245.22), SIMDE_FLOAT64_C(304638.21)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 278.35), SIMDE_FLOAT64_C( -601.69)), simde_mm_set_pd(SIMDE_FLOAT64_C( -850.98), SIMDE_FLOAT64_C( 10.48)), simde_mm_set_pd(SIMDE_FLOAT64_C( -876.47), SIMDE_FLOAT64_C( -253.46)), simde_mm_set_pd(SIMDE_FLOAT64_C(-237746.75), SIMDE_FLOAT64_C(-6559.17)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -495.32), SIMDE_FLOAT64_C( 626.54)), simde_mm_set_pd(SIMDE_FLOAT64_C( 243.15), SIMDE_FLOAT64_C( -595.67)), simde_mm_set_pd(SIMDE_FLOAT64_C( 652.27), SIMDE_FLOAT64_C( 684.47)), simde_mm_set_pd(SIMDE_FLOAT64_C(-119784.79), SIMDE_FLOAT64_C(-372526.61)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -675.35), SIMDE_FLOAT64_C( -855.85)), simde_mm_set_pd(SIMDE_FLOAT64_C( 863.96), SIMDE_FLOAT64_C( -244.88)), simde_mm_set_pd(SIMDE_FLOAT64_C( 266.15), SIMDE_FLOAT64_C( -217.90)), simde_mm_set_pd(SIMDE_FLOAT64_C(-583209.24), SIMDE_FLOAT64_C(209362.65)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -945.02), SIMDE_FLOAT64_C( -266.12)), simde_mm_set_pd(SIMDE_FLOAT64_C( 248.34), SIMDE_FLOAT64_C( -754.68)), simde_mm_set_pd(SIMDE_FLOAT64_C( 686.19), SIMDE_FLOAT64_C( 201.29)), simde_mm_set_pd(SIMDE_FLOAT64_C(-234000.08), SIMDE_FLOAT64_C(201036.73)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -923.40), SIMDE_FLOAT64_C( 347.92)), simde_mm_set_pd(SIMDE_FLOAT64_C( -299.09), SIMDE_FLOAT64_C( -322.35)), simde_mm_set_pd(SIMDE_FLOAT64_C( -172.16), SIMDE_FLOAT64_C( 792.83)), simde_mm_set_pd(SIMDE_FLOAT64_C(276007.55), SIMDE_FLOAT64_C(-111359.18)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -997.96), SIMDE_FLOAT64_C( -774.36)), simde_mm_set_pd(SIMDE_FLOAT64_C( 336.57), SIMDE_FLOAT64_C( -666.28)), simde_mm_set_pd(SIMDE_FLOAT64_C( 18.66), SIMDE_FLOAT64_C( 857.72)), simde_mm_set_pd(SIMDE_FLOAT64_C(-335864.74), SIMDE_FLOAT64_C(516798.30)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 425.15), SIMDE_FLOAT64_C( -554.19)), simde_mm_set_pd(SIMDE_FLOAT64_C( -602.50), SIMDE_FLOAT64_C( -329.67)), simde_mm_set_pd(SIMDE_FLOAT64_C( -208.43), SIMDE_FLOAT64_C( 819.37)), simde_mm_set_pd(SIMDE_FLOAT64_C(-256361.30), SIMDE_FLOAT64_C(183519.19)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_fmadd_pd(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_fmadd_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d b; simde__m256d c; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 463.71), SIMDE_FLOAT64_C( -551.83), SIMDE_FLOAT64_C( 568.05), SIMDE_FLOAT64_C( -826.17)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 440.29), SIMDE_FLOAT64_C( 762.39), SIMDE_FLOAT64_C( -806.23), SIMDE_FLOAT64_C( -848.48)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 222.93), SIMDE_FLOAT64_C( -604.06), SIMDE_FLOAT64_C( -844.49), SIMDE_FLOAT64_C( 221.50)), simde_mm256_set_pd(SIMDE_FLOAT64_C(204389.81), SIMDE_FLOAT64_C(-421313.73), SIMDE_FLOAT64_C(-458823.44), SIMDE_FLOAT64_C(701210.22)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 418.00), SIMDE_FLOAT64_C( -725.82), SIMDE_FLOAT64_C( -54.90), SIMDE_FLOAT64_C( -342.22)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 280.70), SIMDE_FLOAT64_C( 983.58), SIMDE_FLOAT64_C( -289.88), SIMDE_FLOAT64_C( 305.30)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -404.92), SIMDE_FLOAT64_C( -664.17), SIMDE_FLOAT64_C( 164.15), SIMDE_FLOAT64_C( -785.83)), simde_mm256_set_pd(SIMDE_FLOAT64_C(116927.68), SIMDE_FLOAT64_C(-714566.21), SIMDE_FLOAT64_C( 16078.56), SIMDE_FLOAT64_C(-105265.60)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 267.84), SIMDE_FLOAT64_C( 153.22), SIMDE_FLOAT64_C( 565.53), SIMDE_FLOAT64_C( 45.62)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 253.29), SIMDE_FLOAT64_C( -448.85), SIMDE_FLOAT64_C( -379.10), SIMDE_FLOAT64_C( 896.99)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 305.51), SIMDE_FLOAT64_C( -18.42), SIMDE_FLOAT64_C( 560.02), SIMDE_FLOAT64_C( -441.54)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 68146.70), SIMDE_FLOAT64_C(-68791.22), SIMDE_FLOAT64_C(-213832.40), SIMDE_FLOAT64_C( 40479.14)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 698.02), SIMDE_FLOAT64_C( -282.65), SIMDE_FLOAT64_C( -531.77), SIMDE_FLOAT64_C( -673.05)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -227.36), SIMDE_FLOAT64_C( 165.86), SIMDE_FLOAT64_C( -853.86), SIMDE_FLOAT64_C( 210.39)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -443.02), SIMDE_FLOAT64_C( -362.32), SIMDE_FLOAT64_C( 833.55), SIMDE_FLOAT64_C( 692.62)), simde_mm256_set_pd(SIMDE_FLOAT64_C(-159144.85), SIMDE_FLOAT64_C(-47242.65), SIMDE_FLOAT64_C(454890.68), SIMDE_FLOAT64_C(-140910.37)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -601.68), SIMDE_FLOAT64_C( 654.88), SIMDE_FLOAT64_C( 957.42), SIMDE_FLOAT64_C( 563.37)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -958.65), SIMDE_FLOAT64_C( 523.00), SIMDE_FLOAT64_C( -211.18), SIMDE_FLOAT64_C( -889.28)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 180.36), SIMDE_FLOAT64_C( 481.63), SIMDE_FLOAT64_C( -222.77), SIMDE_FLOAT64_C( -51.21)), simde_mm256_set_pd(SIMDE_FLOAT64_C(576980.89), SIMDE_FLOAT64_C(342983.87), SIMDE_FLOAT64_C(-202410.73), SIMDE_FLOAT64_C(-501044.88)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 731.42), SIMDE_FLOAT64_C( -631.15), SIMDE_FLOAT64_C( -982.89), SIMDE_FLOAT64_C( -397.65)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 69.37), SIMDE_FLOAT64_C( -394.43), SIMDE_FLOAT64_C( -18.09), SIMDE_FLOAT64_C( 272.24)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 920.14), SIMDE_FLOAT64_C( -196.58), SIMDE_FLOAT64_C( 324.68), SIMDE_FLOAT64_C( -193.62)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 51658.75), SIMDE_FLOAT64_C(248747.91), SIMDE_FLOAT64_C( 18105.16), SIMDE_FLOAT64_C(-108449.86)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -561.20), SIMDE_FLOAT64_C( -459.54), SIMDE_FLOAT64_C( -681.08), SIMDE_FLOAT64_C( -49.66)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 197.69), SIMDE_FLOAT64_C( -813.71), SIMDE_FLOAT64_C( -990.48), SIMDE_FLOAT64_C( -180.87)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -453.27), SIMDE_FLOAT64_C( -557.45), SIMDE_FLOAT64_C( -780.15), SIMDE_FLOAT64_C( 693.73)), simde_mm256_set_pd(SIMDE_FLOAT64_C(-111396.90), SIMDE_FLOAT64_C(373374.84), SIMDE_FLOAT64_C(673815.97), SIMDE_FLOAT64_C( 9675.73)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 297.04), SIMDE_FLOAT64_C( 950.40), SIMDE_FLOAT64_C( -454.41), SIMDE_FLOAT64_C( 419.22)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 303.25), SIMDE_FLOAT64_C( -917.33), SIMDE_FLOAT64_C( 128.78), SIMDE_FLOAT64_C( 208.96)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 735.53), SIMDE_FLOAT64_C( 976.90), SIMDE_FLOAT64_C( 803.26), SIMDE_FLOAT64_C( 610.54)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 90812.91), SIMDE_FLOAT64_C(-870853.53), SIMDE_FLOAT64_C(-57715.66), SIMDE_FLOAT64_C( 88210.75)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_fmadd_pd(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_fmadd_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 c; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 68.47), SIMDE_FLOAT32_C( -20.99), SIMDE_FLOAT32_C( -768.39), SIMDE_FLOAT32_C( 464.52)), simde_mm_set_ps(SIMDE_FLOAT32_C( 297.92), SIMDE_FLOAT32_C( 902.90), SIMDE_FLOAT32_C( 496.10), SIMDE_FLOAT32_C( -932.73)), simde_mm_set_ps(SIMDE_FLOAT32_C( -341.38), SIMDE_FLOAT32_C( -852.40), SIMDE_FLOAT32_C( 426.68), SIMDE_FLOAT32_C( 755.10)), simde_mm_set_ps(SIMDE_FLOAT32_C( 20057.20), SIMDE_FLOAT32_C(-19804.27), SIMDE_FLOAT32_C(-380771.59), SIMDE_FLOAT32_C(-432516.62)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 969.18), SIMDE_FLOAT32_C( 318.32), SIMDE_FLOAT32_C( -273.65), SIMDE_FLOAT32_C( 39.39)), simde_mm_set_ps(SIMDE_FLOAT32_C( 995.10), SIMDE_FLOAT32_C( 620.67), SIMDE_FLOAT32_C( 664.82), SIMDE_FLOAT32_C( 711.85)), simde_mm_set_ps(SIMDE_FLOAT32_C( 473.88), SIMDE_FLOAT32_C( 360.15), SIMDE_FLOAT32_C( -250.82), SIMDE_FLOAT32_C( -88.76)), simde_mm_set_ps(SIMDE_FLOAT32_C(964904.88), SIMDE_FLOAT32_C(197931.83), SIMDE_FLOAT32_C(-182178.80), SIMDE_FLOAT32_C( 27951.01)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 164.14), SIMDE_FLOAT32_C( -848.02), SIMDE_FLOAT32_C( 235.35), SIMDE_FLOAT32_C( -999.97)), simde_mm_set_ps(SIMDE_FLOAT32_C( 466.54), SIMDE_FLOAT32_C( 41.59), SIMDE_FLOAT32_C( -619.09), SIMDE_FLOAT32_C( 332.19)), simde_mm_set_ps(SIMDE_FLOAT32_C( -720.05), SIMDE_FLOAT32_C( 91.37), SIMDE_FLOAT32_C( 3.41), SIMDE_FLOAT32_C( -151.75)), simde_mm_set_ps(SIMDE_FLOAT32_C( 75857.83), SIMDE_FLOAT32_C(-35177.78), SIMDE_FLOAT32_C(-145699.44), SIMDE_FLOAT32_C(-332331.78)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -664.98), SIMDE_FLOAT32_C( -765.11), SIMDE_FLOAT32_C( -950.95), SIMDE_FLOAT32_C( 967.68)), simde_mm_set_ps(SIMDE_FLOAT32_C( 463.42), SIMDE_FLOAT32_C( 310.01), SIMDE_FLOAT32_C( -859.78), SIMDE_FLOAT32_C( -247.59)), simde_mm_set_ps(SIMDE_FLOAT32_C( 372.25), SIMDE_FLOAT32_C( -546.43), SIMDE_FLOAT32_C( -18.65), SIMDE_FLOAT32_C( -608.78)), simde_mm_set_ps(SIMDE_FLOAT32_C(-307792.78), SIMDE_FLOAT32_C(-237738.19), SIMDE_FLOAT32_C(817589.19), SIMDE_FLOAT32_C(-240196.67)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 739.81), SIMDE_FLOAT32_C( -275.42), SIMDE_FLOAT32_C( -462.27), SIMDE_FLOAT32_C( -299.55)), simde_mm_set_ps(SIMDE_FLOAT32_C( -308.92), SIMDE_FLOAT32_C( 948.18), SIMDE_FLOAT32_C( -344.73), SIMDE_FLOAT32_C( -942.49)), simde_mm_set_ps(SIMDE_FLOAT32_C( 780.24), SIMDE_FLOAT32_C( 819.52), SIMDE_FLOAT32_C( -913.65), SIMDE_FLOAT32_C( 715.95)), simde_mm_set_ps(SIMDE_FLOAT32_C(-227761.88), SIMDE_FLOAT32_C(-260328.23), SIMDE_FLOAT32_C(158444.69), SIMDE_FLOAT32_C(283038.81)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -766.06), SIMDE_FLOAT32_C( -563.42), SIMDE_FLOAT32_C( -122.27), SIMDE_FLOAT32_C( -338.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( 496.12), SIMDE_FLOAT32_C( -751.97), SIMDE_FLOAT32_C( 655.86), SIMDE_FLOAT32_C( 174.40)), simde_mm_set_ps(SIMDE_FLOAT32_C( -985.27), SIMDE_FLOAT32_C( 574.75), SIMDE_FLOAT32_C( 212.10), SIMDE_FLOAT32_C( -683.32)), simde_mm_set_ps(SIMDE_FLOAT32_C(-381042.97), SIMDE_FLOAT32_C(424249.66), SIMDE_FLOAT32_C(-79979.90), SIMDE_FLOAT32_C(-59630.52)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -663.22), SIMDE_FLOAT32_C( 549.14), SIMDE_FLOAT32_C( 733.90), SIMDE_FLOAT32_C( 785.76)), simde_mm_set_ps(SIMDE_FLOAT32_C( -149.60), SIMDE_FLOAT32_C( -221.89), SIMDE_FLOAT32_C( -452.29), SIMDE_FLOAT32_C( -18.14)), simde_mm_set_ps(SIMDE_FLOAT32_C( 979.50), SIMDE_FLOAT32_C( -484.31), SIMDE_FLOAT32_C( -965.78), SIMDE_FLOAT32_C( -291.28)), simde_mm_set_ps(SIMDE_FLOAT32_C(100197.21), SIMDE_FLOAT32_C(-122332.99), SIMDE_FLOAT32_C(-332901.44), SIMDE_FLOAT32_C(-14544.97)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 82.89), SIMDE_FLOAT32_C( -639.53), SIMDE_FLOAT32_C( 680.97), SIMDE_FLOAT32_C( -745.76)), simde_mm_set_ps(SIMDE_FLOAT32_C( 377.68), SIMDE_FLOAT32_C( -229.15), SIMDE_FLOAT32_C( 986.42), SIMDE_FLOAT32_C( -430.87)), simde_mm_set_ps(SIMDE_FLOAT32_C( 506.24), SIMDE_FLOAT32_C( -791.48), SIMDE_FLOAT32_C( -896.55), SIMDE_FLOAT32_C( -775.82)), simde_mm_set_ps(SIMDE_FLOAT32_C( 31812.13), SIMDE_FLOAT32_C(145756.81), SIMDE_FLOAT32_C(670825.81), SIMDE_FLOAT32_C(320549.81)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_fmadd_ps(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_fmadd_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 b; simde__m256 c; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 39.90), SIMDE_FLOAT32_C( 46.80), SIMDE_FLOAT32_C( -90.30), SIMDE_FLOAT32_C( -57.20), SIMDE_FLOAT32_C( 71.50), SIMDE_FLOAT32_C( 75.00), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( 14.60)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -90.60), SIMDE_FLOAT32_C( -15.30), SIMDE_FLOAT32_C( -46.70), SIMDE_FLOAT32_C( 73.50), SIMDE_FLOAT32_C( -27.40), SIMDE_FLOAT32_C( -79.00), SIMDE_FLOAT32_C( -14.10), SIMDE_FLOAT32_C( 22.30)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -19.50), SIMDE_FLOAT32_C( 61.50), SIMDE_FLOAT32_C( -38.80), SIMDE_FLOAT32_C( -19.20), SIMDE_FLOAT32_C( 54.40), SIMDE_FLOAT32_C( -71.00), SIMDE_FLOAT32_C( -11.30), SIMDE_FLOAT32_C( -2.70)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -3634.44), SIMDE_FLOAT32_C( -654.54), SIMDE_FLOAT32_C( 4178.21), SIMDE_FLOAT32_C( -4223.40), SIMDE_FLOAT32_C( -1904.70), SIMDE_FLOAT32_C( -5996.00), SIMDE_FLOAT32_C( -7.07), SIMDE_FLOAT32_C( 322.88)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 56.00), SIMDE_FLOAT32_C( -61.10), SIMDE_FLOAT32_C( -84.20), SIMDE_FLOAT32_C( -8.30), SIMDE_FLOAT32_C( 96.60), SIMDE_FLOAT32_C( 92.70), SIMDE_FLOAT32_C( -19.40), SIMDE_FLOAT32_C( -41.30)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -20.80), SIMDE_FLOAT32_C( -77.90), SIMDE_FLOAT32_C( 22.80), SIMDE_FLOAT32_C( -62.40), SIMDE_FLOAT32_C( 47.20), SIMDE_FLOAT32_C( 23.30), SIMDE_FLOAT32_C( -14.70), SIMDE_FLOAT32_C( 1.80)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -2.50), SIMDE_FLOAT32_C( -40.20), SIMDE_FLOAT32_C( -64.40), SIMDE_FLOAT32_C( 46.00), SIMDE_FLOAT32_C( 19.60), SIMDE_FLOAT32_C( 30.00), SIMDE_FLOAT32_C( 23.60), SIMDE_FLOAT32_C( 20.60)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -1167.30), SIMDE_FLOAT32_C( 4719.49), SIMDE_FLOAT32_C( -1984.16), SIMDE_FLOAT32_C( 563.92), SIMDE_FLOAT32_C( 4579.12), SIMDE_FLOAT32_C( 2189.91), SIMDE_FLOAT32_C( 308.78), SIMDE_FLOAT32_C( -53.74)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -73.60), SIMDE_FLOAT32_C( -63.70), SIMDE_FLOAT32_C( -7.10), SIMDE_FLOAT32_C( -70.90), SIMDE_FLOAT32_C( 23.30), SIMDE_FLOAT32_C( 22.20), SIMDE_FLOAT32_C( 4.90), SIMDE_FLOAT32_C( -85.30)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 75.60), SIMDE_FLOAT32_C( -6.90), SIMDE_FLOAT32_C( 73.70), SIMDE_FLOAT32_C( -85.70), SIMDE_FLOAT32_C( -25.90), SIMDE_FLOAT32_C( -59.90), SIMDE_FLOAT32_C( -56.20), SIMDE_FLOAT32_C( -30.70)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 54.00), SIMDE_FLOAT32_C( -79.70), SIMDE_FLOAT32_C( 71.20), SIMDE_FLOAT32_C( -74.20), SIMDE_FLOAT32_C( -48.90), SIMDE_FLOAT32_C( -7.20), SIMDE_FLOAT32_C( -59.10), SIMDE_FLOAT32_C( -84.70)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -5510.16), SIMDE_FLOAT32_C( 359.83), SIMDE_FLOAT32_C( -452.07), SIMDE_FLOAT32_C( 6001.93), SIMDE_FLOAT32_C( -652.37), SIMDE_FLOAT32_C( -1336.98), SIMDE_FLOAT32_C( -334.48), SIMDE_FLOAT32_C( 2534.01)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 57.50), SIMDE_FLOAT32_C( 93.40), SIMDE_FLOAT32_C( -2.20), SIMDE_FLOAT32_C( 77.20), SIMDE_FLOAT32_C( 79.40), SIMDE_FLOAT32_C( -81.10), SIMDE_FLOAT32_C( 25.80), SIMDE_FLOAT32_C( -5.40)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -36.80), SIMDE_FLOAT32_C( -46.10), SIMDE_FLOAT32_C( 57.50), SIMDE_FLOAT32_C( 47.70), SIMDE_FLOAT32_C( 38.00), SIMDE_FLOAT32_C( 48.30), SIMDE_FLOAT32_C( 86.60), SIMDE_FLOAT32_C( 85.60)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 92.60), SIMDE_FLOAT32_C( 68.60), SIMDE_FLOAT32_C( -48.10), SIMDE_FLOAT32_C( -53.80), SIMDE_FLOAT32_C( -45.80), SIMDE_FLOAT32_C( 33.60), SIMDE_FLOAT32_C( 47.80), SIMDE_FLOAT32_C( 61.30)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -2023.40), SIMDE_FLOAT32_C( -4237.14), SIMDE_FLOAT32_C( -174.60), SIMDE_FLOAT32_C( 3628.64), SIMDE_FLOAT32_C( 2971.40), SIMDE_FLOAT32_C( -3883.53), SIMDE_FLOAT32_C( 2282.08), SIMDE_FLOAT32_C( -400.94)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 39.30), SIMDE_FLOAT32_C( 47.70), SIMDE_FLOAT32_C( -46.40), SIMDE_FLOAT32_C( 22.40), SIMDE_FLOAT32_C( -47.70), SIMDE_FLOAT32_C( -87.50), SIMDE_FLOAT32_C( 56.70), SIMDE_FLOAT32_C( -98.30)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 47.80), SIMDE_FLOAT32_C( 25.10), SIMDE_FLOAT32_C( 86.40), SIMDE_FLOAT32_C( 20.80), SIMDE_FLOAT32_C( -68.30), SIMDE_FLOAT32_C( -7.70), SIMDE_FLOAT32_C( 87.10), SIMDE_FLOAT32_C( 24.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 30.50), SIMDE_FLOAT32_C( 80.40), SIMDE_FLOAT32_C( -81.20), SIMDE_FLOAT32_C( -60.10), SIMDE_FLOAT32_C( -62.20), SIMDE_FLOAT32_C( 51.30), SIMDE_FLOAT32_C( -56.00), SIMDE_FLOAT32_C( -52.90)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 1909.04), SIMDE_FLOAT32_C( 1277.67), SIMDE_FLOAT32_C( -4090.16), SIMDE_FLOAT32_C( 405.82), SIMDE_FLOAT32_C( 3195.71), SIMDE_FLOAT32_C( 725.05), SIMDE_FLOAT32_C( 4882.57), SIMDE_FLOAT32_C( -2412.10)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 35.30), SIMDE_FLOAT32_C( -51.40), SIMDE_FLOAT32_C( -71.80), SIMDE_FLOAT32_C( 28.30), SIMDE_FLOAT32_C( 41.70), SIMDE_FLOAT32_C( -29.90), SIMDE_FLOAT32_C( 47.10), SIMDE_FLOAT32_C( -23.50)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -72.20), SIMDE_FLOAT32_C( 5.10), SIMDE_FLOAT32_C( 50.30), SIMDE_FLOAT32_C( 8.80), SIMDE_FLOAT32_C( 10.30), SIMDE_FLOAT32_C( 88.00), SIMDE_FLOAT32_C( -32.10), SIMDE_FLOAT32_C( -71.80)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 92.50), SIMDE_FLOAT32_C( -22.70), SIMDE_FLOAT32_C( -32.50), SIMDE_FLOAT32_C( -64.00), SIMDE_FLOAT32_C( 53.40), SIMDE_FLOAT32_C( 57.00), SIMDE_FLOAT32_C( 85.20), SIMDE_FLOAT32_C( 51.90)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -2456.16), SIMDE_FLOAT32_C( -284.84), SIMDE_FLOAT32_C( -3644.04), SIMDE_FLOAT32_C( 185.04), SIMDE_FLOAT32_C( 482.91), SIMDE_FLOAT32_C( -2574.20), SIMDE_FLOAT32_C( -1426.71), SIMDE_FLOAT32_C( 1739.20)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 62.00), SIMDE_FLOAT32_C( -58.50), SIMDE_FLOAT32_C( -89.10), SIMDE_FLOAT32_C( 51.50), SIMDE_FLOAT32_C( 2.30), SIMDE_FLOAT32_C( -87.50), SIMDE_FLOAT32_C( -72.60), SIMDE_FLOAT32_C( 96.30)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -25.70), SIMDE_FLOAT32_C( 80.90), SIMDE_FLOAT32_C( -77.80), SIMDE_FLOAT32_C( 4.90), SIMDE_FLOAT32_C( 70.20), SIMDE_FLOAT32_C( 32.70), SIMDE_FLOAT32_C( -60.70), SIMDE_FLOAT32_C( 68.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -99.00), SIMDE_FLOAT32_C( -12.20), SIMDE_FLOAT32_C( 41.70), SIMDE_FLOAT32_C( 9.80), SIMDE_FLOAT32_C( -34.40), SIMDE_FLOAT32_C( -50.10), SIMDE_FLOAT32_C( 35.40), SIMDE_FLOAT32_C( 62.60)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -1692.40), SIMDE_FLOAT32_C( -4744.85), SIMDE_FLOAT32_C( 6973.68), SIMDE_FLOAT32_C( 262.15), SIMDE_FLOAT32_C( 127.06), SIMDE_FLOAT32_C( -2911.35), SIMDE_FLOAT32_C( 4442.22), SIMDE_FLOAT32_C( 6611.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -40.00), SIMDE_FLOAT32_C( 62.80), SIMDE_FLOAT32_C( -40.00), SIMDE_FLOAT32_C( 16.60), SIMDE_FLOAT32_C( 60.10), SIMDE_FLOAT32_C( 22.60), SIMDE_FLOAT32_C( -12.40), SIMDE_FLOAT32_C( 91.30)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -98.70), SIMDE_FLOAT32_C( 17.00), SIMDE_FLOAT32_C( -23.90), SIMDE_FLOAT32_C( 29.60), SIMDE_FLOAT32_C( -52.60), SIMDE_FLOAT32_C( -30.60), SIMDE_FLOAT32_C( 43.40), SIMDE_FLOAT32_C( 76.50)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 61.00), SIMDE_FLOAT32_C( -10.10), SIMDE_FLOAT32_C( 48.20), SIMDE_FLOAT32_C( 50.20), SIMDE_FLOAT32_C( 12.20), SIMDE_FLOAT32_C( 64.80), SIMDE_FLOAT32_C( -68.90), SIMDE_FLOAT32_C( -86.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 4009.00), SIMDE_FLOAT32_C( 1057.50), SIMDE_FLOAT32_C( 1004.20), SIMDE_FLOAT32_C( 541.56), SIMDE_FLOAT32_C( -3149.06), SIMDE_FLOAT32_C( -626.76), SIMDE_FLOAT32_C( -607.06), SIMDE_FLOAT32_C( 6898.45)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_fmadd_ps(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_fmadd_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d c; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 96.50), SIMDE_FLOAT64_C( -99.50)), simde_mm_set_pd(SIMDE_FLOAT64_C( 16.90), SIMDE_FLOAT64_C( -76.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( 53.90), SIMDE_FLOAT64_C( 6.40)), simde_mm_set_pd(SIMDE_FLOAT64_C( 96.50), SIMDE_FLOAT64_C( 7648.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 53.00), SIMDE_FLOAT64_C( -2.10)), simde_mm_set_pd(SIMDE_FLOAT64_C( -68.70), SIMDE_FLOAT64_C( -11.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( -76.30), SIMDE_FLOAT64_C( 62.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( 53.00), SIMDE_FLOAT64_C( 87.47)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 34.00), SIMDE_FLOAT64_C( 30.50)), simde_mm_set_pd(SIMDE_FLOAT64_C( -42.50), SIMDE_FLOAT64_C( 32.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( -36.20), SIMDE_FLOAT64_C( 36.10)), simde_mm_set_pd(SIMDE_FLOAT64_C( 34.00), SIMDE_FLOAT64_C( 1033.45)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -75.50), SIMDE_FLOAT64_C( -58.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( 29.70), SIMDE_FLOAT64_C( -42.30)), simde_mm_set_pd(SIMDE_FLOAT64_C( 95.20), SIMDE_FLOAT64_C( 92.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( -75.50), SIMDE_FLOAT64_C( 2545.40)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -59.60), SIMDE_FLOAT64_C( 12.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( -84.80), SIMDE_FLOAT64_C( 50.40)), simde_mm_set_pd(SIMDE_FLOAT64_C( 42.20), SIMDE_FLOAT64_C( -77.30)), simde_mm_set_pd(SIMDE_FLOAT64_C( -59.60), SIMDE_FLOAT64_C( 572.86)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 78.40), SIMDE_FLOAT64_C( -77.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( -76.10), SIMDE_FLOAT64_C( -11.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( 5.80), SIMDE_FLOAT64_C( -75.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( 78.40), SIMDE_FLOAT64_C( 771.10)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 6.70), SIMDE_FLOAT64_C( 47.60)), simde_mm_set_pd(SIMDE_FLOAT64_C( 24.30), SIMDE_FLOAT64_C( 93.60)), simde_mm_set_pd(SIMDE_FLOAT64_C( 93.40), SIMDE_FLOAT64_C( -50.10)), simde_mm_set_pd(SIMDE_FLOAT64_C( 6.70), SIMDE_FLOAT64_C( 4405.26)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -59.60), SIMDE_FLOAT64_C( -73.40)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1.20), SIMDE_FLOAT64_C( 10.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( 45.00), SIMDE_FLOAT64_C( -86.50)), simde_mm_set_pd(SIMDE_FLOAT64_C( -59.60), SIMDE_FLOAT64_C( -886.56)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_fmadd_sd(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_fmadd_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 c; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 29.70), SIMDE_FLOAT32_C( -13.10), SIMDE_FLOAT32_C( -92.70), SIMDE_FLOAT32_C( 44.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( 25.90), SIMDE_FLOAT32_C( 67.70), SIMDE_FLOAT32_C( -12.20), SIMDE_FLOAT32_C( 72.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( 45.60), SIMDE_FLOAT32_C( 36.90), SIMDE_FLOAT32_C( -98.40), SIMDE_FLOAT32_C( -64.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( 29.70), SIMDE_FLOAT32_C( -13.10), SIMDE_FLOAT32_C( -92.70), SIMDE_FLOAT32_C( 3169.76)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 16.70), SIMDE_FLOAT32_C( 85.50), SIMDE_FLOAT32_C( 89.70), SIMDE_FLOAT32_C( -23.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( 70.80), SIMDE_FLOAT32_C( 99.80), SIMDE_FLOAT32_C( -87.00), SIMDE_FLOAT32_C( 9.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( 42.40), SIMDE_FLOAT32_C( 38.10), SIMDE_FLOAT32_C( -58.60), SIMDE_FLOAT32_C( -71.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( 16.70), SIMDE_FLOAT32_C( 85.50), SIMDE_FLOAT32_C( 89.70), SIMDE_FLOAT32_C( -285.90)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -85.70), SIMDE_FLOAT32_C( 66.60), SIMDE_FLOAT32_C( -84.60), SIMDE_FLOAT32_C( -90.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( 61.30), SIMDE_FLOAT32_C( -91.00), SIMDE_FLOAT32_C( -35.60), SIMDE_FLOAT32_C( -66.30)), simde_mm_set_ps(SIMDE_FLOAT32_C( 76.30), SIMDE_FLOAT32_C( -46.00), SIMDE_FLOAT32_C( 54.10), SIMDE_FLOAT32_C( 17.60)), simde_mm_set_ps(SIMDE_FLOAT32_C( -85.70), SIMDE_FLOAT32_C( 66.60), SIMDE_FLOAT32_C( -84.60), SIMDE_FLOAT32_C( 6017.75)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 14.40), SIMDE_FLOAT32_C( -25.60), SIMDE_FLOAT32_C( -65.60), SIMDE_FLOAT32_C( -71.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( 65.40), SIMDE_FLOAT32_C( 95.90), SIMDE_FLOAT32_C( 51.70), SIMDE_FLOAT32_C( -84.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( -47.60), SIMDE_FLOAT32_C( -50.00), SIMDE_FLOAT32_C( 88.40), SIMDE_FLOAT32_C( -28.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( 14.40), SIMDE_FLOAT32_C( -25.60), SIMDE_FLOAT32_C( -65.60), SIMDE_FLOAT32_C( 5970.70)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 29.50), SIMDE_FLOAT32_C( -26.70), SIMDE_FLOAT32_C( 8.30), SIMDE_FLOAT32_C( -34.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( 76.70), SIMDE_FLOAT32_C( -34.90), SIMDE_FLOAT32_C( -78.80), SIMDE_FLOAT32_C( 84.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( 45.30), SIMDE_FLOAT32_C( -18.40), SIMDE_FLOAT32_C( -36.50), SIMDE_FLOAT32_C( -89.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( 29.50), SIMDE_FLOAT32_C( -26.70), SIMDE_FLOAT32_C( 8.30), SIMDE_FLOAT32_C(-3021.15)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 64.00), SIMDE_FLOAT32_C( 46.60), SIMDE_FLOAT32_C( -17.50), SIMDE_FLOAT32_C( 24.10)), simde_mm_set_ps(SIMDE_FLOAT32_C( -67.40), SIMDE_FLOAT32_C( -16.40), SIMDE_FLOAT32_C( 38.30), SIMDE_FLOAT32_C( -92.30)), simde_mm_set_ps(SIMDE_FLOAT32_C( -66.80), SIMDE_FLOAT32_C( 10.60), SIMDE_FLOAT32_C( -6.70), SIMDE_FLOAT32_C( -49.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( 64.00), SIMDE_FLOAT32_C( 46.60), SIMDE_FLOAT32_C( -17.50), SIMDE_FLOAT32_C(-2273.43)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -98.50), SIMDE_FLOAT32_C( 15.30), SIMDE_FLOAT32_C( -33.40), SIMDE_FLOAT32_C( 4.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( 97.00), SIMDE_FLOAT32_C( -35.60), SIMDE_FLOAT32_C( 63.50), SIMDE_FLOAT32_C( -94.30)), simde_mm_set_ps(SIMDE_FLOAT32_C( -9.90), SIMDE_FLOAT32_C( -97.20), SIMDE_FLOAT32_C( -13.80), SIMDE_FLOAT32_C( 11.60)), simde_mm_set_ps(SIMDE_FLOAT32_C( -98.50), SIMDE_FLOAT32_C( 15.30), SIMDE_FLOAT32_C( -33.40), SIMDE_FLOAT32_C( -441.04)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -11.00), SIMDE_FLOAT32_C( -65.00), SIMDE_FLOAT32_C( -76.20), SIMDE_FLOAT32_C( 54.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( 37.10), SIMDE_FLOAT32_C( -97.90), SIMDE_FLOAT32_C( -36.50), SIMDE_FLOAT32_C( 50.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( -26.80), SIMDE_FLOAT32_C( -74.90), SIMDE_FLOAT32_C( -84.40), SIMDE_FLOAT32_C( 35.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( -11.00), SIMDE_FLOAT32_C( -65.00), SIMDE_FLOAT32_C( -76.20), SIMDE_FLOAT32_C( 2814.06)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_fmadd_ss(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_fmaddsub_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d c; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -72.20), SIMDE_FLOAT64_C( 74.40)), simde_mm_set_pd(SIMDE_FLOAT64_C( 41.60), SIMDE_FLOAT64_C( -13.50)), simde_mm_set_pd(SIMDE_FLOAT64_C( 18.90), SIMDE_FLOAT64_C( 65.30)), simde_mm_set_pd(SIMDE_FLOAT64_C(-2984.62), SIMDE_FLOAT64_C(-1069.70)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 14.70), SIMDE_FLOAT64_C( 97.50)), simde_mm_set_pd(SIMDE_FLOAT64_C( 47.70), SIMDE_FLOAT64_C( 86.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( 75.80), SIMDE_FLOAT64_C( 19.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( 776.99), SIMDE_FLOAT64_C( 8443.10)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -24.00), SIMDE_FLOAT64_C( 39.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( 42.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( -66.10), SIMDE_FLOAT64_C( -55.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( -66.10), SIMDE_FLOAT64_C( 1731.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -45.20), SIMDE_FLOAT64_C( 65.10)), simde_mm_set_pd(SIMDE_FLOAT64_C( -64.40), SIMDE_FLOAT64_C( 58.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( 79.80), SIMDE_FLOAT64_C( 19.30)), simde_mm_set_pd(SIMDE_FLOAT64_C( 2990.68), SIMDE_FLOAT64_C( 3756.50)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 14.50), SIMDE_FLOAT64_C( -64.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( 92.20), SIMDE_FLOAT64_C( -68.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( -2.50), SIMDE_FLOAT64_C( -96.40)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1334.40), SIMDE_FLOAT64_C( 4561.52)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 61.50), SIMDE_FLOAT64_C( 42.40)), simde_mm_set_pd(SIMDE_FLOAT64_C( 77.20), SIMDE_FLOAT64_C( 23.30)), simde_mm_set_pd(SIMDE_FLOAT64_C( 32.20), SIMDE_FLOAT64_C( 12.10)), simde_mm_set_pd(SIMDE_FLOAT64_C( 4780.00), SIMDE_FLOAT64_C( 975.82)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -38.20), SIMDE_FLOAT64_C( 8.10)), simde_mm_set_pd(SIMDE_FLOAT64_C( -10.10), SIMDE_FLOAT64_C( 98.60)), simde_mm_set_pd(SIMDE_FLOAT64_C( -14.20), SIMDE_FLOAT64_C( 22.60)), simde_mm_set_pd(SIMDE_FLOAT64_C( 371.62), SIMDE_FLOAT64_C( 776.06)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 8.40), SIMDE_FLOAT64_C( -30.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( -39.30), SIMDE_FLOAT64_C( 73.40)), simde_mm_set_pd(SIMDE_FLOAT64_C( 25.30), SIMDE_FLOAT64_C( 2.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( -304.82), SIMDE_FLOAT64_C(-2256.28)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_fmaddsub_pd(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_fmaddsub_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d b; simde__m256d c; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -52.10), SIMDE_FLOAT64_C( -92.00), SIMDE_FLOAT64_C( -82.90), SIMDE_FLOAT64_C( -49.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -49.30), SIMDE_FLOAT64_C( -97.40), SIMDE_FLOAT64_C( 58.80), SIMDE_FLOAT64_C( 67.60)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 23.30), SIMDE_FLOAT64_C( 87.10), SIMDE_FLOAT64_C( 71.70), SIMDE_FLOAT64_C( 97.10)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 2591.83), SIMDE_FLOAT64_C( 8873.70), SIMDE_FLOAT64_C(-4802.82), SIMDE_FLOAT64_C(-3409.50)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -83.80), SIMDE_FLOAT64_C( 50.40), SIMDE_FLOAT64_C( -94.80), SIMDE_FLOAT64_C( -86.80)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 3.10), SIMDE_FLOAT64_C( -46.80), SIMDE_FLOAT64_C( -3.10), SIMDE_FLOAT64_C( 83.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -11.70), SIMDE_FLOAT64_C( 76.10), SIMDE_FLOAT64_C( 44.50), SIMDE_FLOAT64_C( 28.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -271.48), SIMDE_FLOAT64_C(-2434.82), SIMDE_FLOAT64_C( 338.38), SIMDE_FLOAT64_C(-7232.40)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -64.40), SIMDE_FLOAT64_C( 40.90), SIMDE_FLOAT64_C( 36.80), SIMDE_FLOAT64_C( -1.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -57.50), SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( -21.50), SIMDE_FLOAT64_C( -1.70)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 21.60), SIMDE_FLOAT64_C( -36.20), SIMDE_FLOAT64_C( -67.50), SIMDE_FLOAT64_C( -19.30)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 3724.60), SIMDE_FLOAT64_C( 240.70), SIMDE_FLOAT64_C( -858.70), SIMDE_FLOAT64_C( 21.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -37.10), SIMDE_FLOAT64_C( 2.20), SIMDE_FLOAT64_C( -99.10), SIMDE_FLOAT64_C( 78.20)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -60.20), SIMDE_FLOAT64_C( 29.30), SIMDE_FLOAT64_C( 2.50), SIMDE_FLOAT64_C( -40.10)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 59.60), SIMDE_FLOAT64_C( -28.40), SIMDE_FLOAT64_C( 58.10), SIMDE_FLOAT64_C( 96.90)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 2293.02), SIMDE_FLOAT64_C( 92.86), SIMDE_FLOAT64_C( -189.65), SIMDE_FLOAT64_C(-3232.72)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -88.70), SIMDE_FLOAT64_C( -20.50), SIMDE_FLOAT64_C( 28.00), SIMDE_FLOAT64_C( -13.70)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -49.60), SIMDE_FLOAT64_C( -13.90), SIMDE_FLOAT64_C( 71.80), SIMDE_FLOAT64_C( -29.40)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -95.70), SIMDE_FLOAT64_C( 48.30), SIMDE_FLOAT64_C( 78.20), SIMDE_FLOAT64_C( -6.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 4303.82), SIMDE_FLOAT64_C( 236.65), SIMDE_FLOAT64_C( 2088.60), SIMDE_FLOAT64_C( 408.78)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 74.60), SIMDE_FLOAT64_C( 40.20), SIMDE_FLOAT64_C( -4.40), SIMDE_FLOAT64_C( 51.30)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 21.60), SIMDE_FLOAT64_C( -83.50), SIMDE_FLOAT64_C( -2.00), SIMDE_FLOAT64_C( -6.60)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 13.70), SIMDE_FLOAT64_C( 39.10), SIMDE_FLOAT64_C( 92.60), SIMDE_FLOAT64_C( -41.90)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 1625.06), SIMDE_FLOAT64_C(-3395.80), SIMDE_FLOAT64_C( 101.40), SIMDE_FLOAT64_C( -296.68)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -14.60), SIMDE_FLOAT64_C( -32.40), SIMDE_FLOAT64_C( 94.80), SIMDE_FLOAT64_C( -5.20)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 15.40), SIMDE_FLOAT64_C( -34.50), SIMDE_FLOAT64_C( 91.60), SIMDE_FLOAT64_C( -58.60)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -70.70), SIMDE_FLOAT64_C( -91.10), SIMDE_FLOAT64_C( -42.30), SIMDE_FLOAT64_C( 64.70)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -295.54), SIMDE_FLOAT64_C( 1208.90), SIMDE_FLOAT64_C( 8641.38), SIMDE_FLOAT64_C( 240.02)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 68.20), SIMDE_FLOAT64_C( -45.40), SIMDE_FLOAT64_C( 33.10), SIMDE_FLOAT64_C( 17.10)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 52.00), SIMDE_FLOAT64_C( 24.80), SIMDE_FLOAT64_C( 6.10), SIMDE_FLOAT64_C( 68.80)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 30.10), SIMDE_FLOAT64_C( 11.20), SIMDE_FLOAT64_C( -78.00), SIMDE_FLOAT64_C( -47.50)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 3576.50), SIMDE_FLOAT64_C(-1137.12), SIMDE_FLOAT64_C( 123.91), SIMDE_FLOAT64_C( 1223.98)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_fmaddsub_pd(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_fmaddsub_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 c; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -75.30), SIMDE_FLOAT32_C( 37.60), SIMDE_FLOAT32_C( 76.00), SIMDE_FLOAT32_C( -4.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( -15.80), SIMDE_FLOAT32_C( 64.20), SIMDE_FLOAT32_C( 50.90), SIMDE_FLOAT32_C( 26.10)), simde_mm_set_ps(SIMDE_FLOAT32_C( 64.80), SIMDE_FLOAT32_C( -10.00), SIMDE_FLOAT32_C( -97.40), SIMDE_FLOAT32_C( -90.30)), simde_mm_set_ps(SIMDE_FLOAT32_C( 1254.54), SIMDE_FLOAT32_C( 2423.92), SIMDE_FLOAT32_C( 3771.00), SIMDE_FLOAT32_C( -19.32)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -48.70), SIMDE_FLOAT32_C( 50.40), SIMDE_FLOAT32_C( -22.00), SIMDE_FLOAT32_C( 76.40)), simde_mm_set_ps(SIMDE_FLOAT32_C( -80.30), SIMDE_FLOAT32_C( -99.30), SIMDE_FLOAT32_C( -86.10), SIMDE_FLOAT32_C( 30.10)), simde_mm_set_ps(SIMDE_FLOAT32_C( -41.10), SIMDE_FLOAT32_C( 57.20), SIMDE_FLOAT32_C( -41.90), SIMDE_FLOAT32_C( -88.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( 3869.51), SIMDE_FLOAT32_C(-5061.92), SIMDE_FLOAT32_C( 1852.30), SIMDE_FLOAT32_C( 2388.14)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 99.70), SIMDE_FLOAT32_C( 2.10), SIMDE_FLOAT32_C( 41.80), SIMDE_FLOAT32_C( -15.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( 16.20), SIMDE_FLOAT32_C( -74.30), SIMDE_FLOAT32_C( -71.40), SIMDE_FLOAT32_C( 51.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( 71.10), SIMDE_FLOAT32_C( -90.60), SIMDE_FLOAT32_C( -33.50), SIMDE_FLOAT32_C( -68.40)), simde_mm_set_ps(SIMDE_FLOAT32_C( 1686.24), SIMDE_FLOAT32_C( -65.43), SIMDE_FLOAT32_C(-3018.02), SIMDE_FLOAT32_C( -717.44)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 39.90), SIMDE_FLOAT32_C( 12.10), SIMDE_FLOAT32_C( -93.10), SIMDE_FLOAT32_C( -73.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( -72.70), SIMDE_FLOAT32_C( -61.90), SIMDE_FLOAT32_C( 1.90), SIMDE_FLOAT32_C( 89.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( -63.40), SIMDE_FLOAT32_C( -46.10), SIMDE_FLOAT32_C( 50.20), SIMDE_FLOAT32_C( -74.10)), simde_mm_set_ps(SIMDE_FLOAT32_C(-2964.13), SIMDE_FLOAT32_C( -702.89), SIMDE_FLOAT32_C( -126.69), SIMDE_FLOAT32_C(-6494.10)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -37.90), SIMDE_FLOAT32_C( 16.10), SIMDE_FLOAT32_C( 65.80), SIMDE_FLOAT32_C( 65.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( -95.90), SIMDE_FLOAT32_C( 9.30), SIMDE_FLOAT32_C( 33.70), SIMDE_FLOAT32_C( -30.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( 4.30), SIMDE_FLOAT32_C( -27.90), SIMDE_FLOAT32_C( -62.30), SIMDE_FLOAT32_C( -71.40)), simde_mm_set_ps(SIMDE_FLOAT32_C( 3638.91), SIMDE_FLOAT32_C( 177.63), SIMDE_FLOAT32_C( 2155.16), SIMDE_FLOAT32_C(-1936.76)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 40.20), SIMDE_FLOAT32_C( -28.10), SIMDE_FLOAT32_C( -39.20), SIMDE_FLOAT32_C( 15.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( -33.70), SIMDE_FLOAT32_C( -55.90), SIMDE_FLOAT32_C( -9.80), SIMDE_FLOAT32_C( -88.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( 3.20), SIMDE_FLOAT32_C( -50.90), SIMDE_FLOAT32_C( 35.30), SIMDE_FLOAT32_C( -45.30)), simde_mm_set_ps(SIMDE_FLOAT32_C(-1351.54), SIMDE_FLOAT32_C( 1621.69), SIMDE_FLOAT32_C( 419.46), SIMDE_FLOAT32_C(-1277.70)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 2.60), SIMDE_FLOAT32_C( 70.50), SIMDE_FLOAT32_C( 56.20), SIMDE_FLOAT32_C( 5.90)), simde_mm_set_ps(SIMDE_FLOAT32_C( -66.40), SIMDE_FLOAT32_C( 95.00), SIMDE_FLOAT32_C( 95.50), SIMDE_FLOAT32_C( -15.90)), simde_mm_set_ps(SIMDE_FLOAT32_C( -60.10), SIMDE_FLOAT32_C( -25.30), SIMDE_FLOAT32_C( -69.10), SIMDE_FLOAT32_C( -77.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( -232.74), SIMDE_FLOAT32_C( 6722.80), SIMDE_FLOAT32_C( 5298.00), SIMDE_FLOAT32_C( -16.11)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -91.20), SIMDE_FLOAT32_C( 32.90), SIMDE_FLOAT32_C( -8.90), SIMDE_FLOAT32_C( -97.30)), simde_mm_set_ps(SIMDE_FLOAT32_C( -78.50), SIMDE_FLOAT32_C( 49.50), SIMDE_FLOAT32_C( 63.70), SIMDE_FLOAT32_C( -83.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( 10.30), SIMDE_FLOAT32_C( 73.30), SIMDE_FLOAT32_C( -68.20), SIMDE_FLOAT32_C( 60.90)), simde_mm_set_ps(SIMDE_FLOAT32_C( 7169.50), SIMDE_FLOAT32_C( 1555.25), SIMDE_FLOAT32_C( -635.13), SIMDE_FLOAT32_C( 8015.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_fmaddsub_ps(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_fmaddsub_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 b; simde__m256 c; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( -61.10), SIMDE_FLOAT32_C( -95.60), SIMDE_FLOAT32_C( 56.00), SIMDE_FLOAT32_C( 46.30), SIMDE_FLOAT32_C( -62.80), SIMDE_FLOAT32_C( 38.90), SIMDE_FLOAT32_C( -92.60), SIMDE_FLOAT32_C( 65.40)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -38.30), SIMDE_FLOAT32_C( -1.90), SIMDE_FLOAT32_C( -28.00), SIMDE_FLOAT32_C( -43.20), SIMDE_FLOAT32_C( -19.40), SIMDE_FLOAT32_C( 57.60), SIMDE_FLOAT32_C( -97.20), SIMDE_FLOAT32_C( 81.20)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 93.20), SIMDE_FLOAT32_C( -43.00), SIMDE_FLOAT32_C( -47.40), SIMDE_FLOAT32_C( -77.00), SIMDE_FLOAT32_C( -59.90), SIMDE_FLOAT32_C( 17.90), SIMDE_FLOAT32_C( -9.60), SIMDE_FLOAT32_C( -61.30)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 2433.33), SIMDE_FLOAT32_C( 224.64), SIMDE_FLOAT32_C(-1615.40), SIMDE_FLOAT32_C(-1923.16), SIMDE_FLOAT32_C( 1158.42), SIMDE_FLOAT32_C( 2222.74), SIMDE_FLOAT32_C( 8991.12), SIMDE_FLOAT32_C( 5371.78)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -23.40), SIMDE_FLOAT32_C( -24.60), SIMDE_FLOAT32_C( 35.70), SIMDE_FLOAT32_C( 59.90), SIMDE_FLOAT32_C( -91.00), SIMDE_FLOAT32_C( -25.40), SIMDE_FLOAT32_C( -88.30), SIMDE_FLOAT32_C( -99.80)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -30.70), SIMDE_FLOAT32_C( 97.10), SIMDE_FLOAT32_C( 86.90), SIMDE_FLOAT32_C( -81.10), SIMDE_FLOAT32_C( -71.30), SIMDE_FLOAT32_C( -61.20), SIMDE_FLOAT32_C( -26.10), SIMDE_FLOAT32_C( 31.60)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -74.30), SIMDE_FLOAT32_C( -19.40), SIMDE_FLOAT32_C( -70.80), SIMDE_FLOAT32_C( -13.00), SIMDE_FLOAT32_C( 82.90), SIMDE_FLOAT32_C( -75.70), SIMDE_FLOAT32_C( -31.50), SIMDE_FLOAT32_C( 73.50)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 644.08), SIMDE_FLOAT32_C(-2369.26), SIMDE_FLOAT32_C( 3031.53), SIMDE_FLOAT32_C(-4844.89), SIMDE_FLOAT32_C( 6571.20), SIMDE_FLOAT32_C( 1630.18), SIMDE_FLOAT32_C( 2273.13), SIMDE_FLOAT32_C(-3227.18)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 70.20), SIMDE_FLOAT32_C( -20.40), SIMDE_FLOAT32_C( -51.50), SIMDE_FLOAT32_C( 82.30), SIMDE_FLOAT32_C( 31.30), SIMDE_FLOAT32_C( 17.80), SIMDE_FLOAT32_C( -39.60), SIMDE_FLOAT32_C( 66.80)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -72.50), SIMDE_FLOAT32_C( 52.00), SIMDE_FLOAT32_C( -54.80), SIMDE_FLOAT32_C( 14.00), SIMDE_FLOAT32_C( 91.80), SIMDE_FLOAT32_C( -80.70), SIMDE_FLOAT32_C( -97.90), SIMDE_FLOAT32_C( -99.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -76.10), SIMDE_FLOAT32_C( 26.90), SIMDE_FLOAT32_C( 24.90), SIMDE_FLOAT32_C( -50.60), SIMDE_FLOAT32_C( 66.90), SIMDE_FLOAT32_C( 82.40), SIMDE_FLOAT32_C( 98.50), SIMDE_FLOAT32_C( 9.60)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-5165.60), SIMDE_FLOAT32_C(-1087.70), SIMDE_FLOAT32_C( 2847.10), SIMDE_FLOAT32_C( 1202.80), SIMDE_FLOAT32_C( 2940.24), SIMDE_FLOAT32_C(-1518.86), SIMDE_FLOAT32_C( 3975.34), SIMDE_FLOAT32_C(-6622.80)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -28.40), SIMDE_FLOAT32_C( 22.80), SIMDE_FLOAT32_C( 16.40), SIMDE_FLOAT32_C( 80.20), SIMDE_FLOAT32_C( -24.10), SIMDE_FLOAT32_C( -83.00), SIMDE_FLOAT32_C( -74.10), SIMDE_FLOAT32_C( -49.60)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -73.70), SIMDE_FLOAT32_C( 59.00), SIMDE_FLOAT32_C( 36.90), SIMDE_FLOAT32_C( 7.50), SIMDE_FLOAT32_C( -74.80), SIMDE_FLOAT32_C( -84.40), SIMDE_FLOAT32_C( 79.60), SIMDE_FLOAT32_C( -90.70)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -76.40), SIMDE_FLOAT32_C( 28.10), SIMDE_FLOAT32_C( -13.60), SIMDE_FLOAT32_C( -71.50), SIMDE_FLOAT32_C( -52.20), SIMDE_FLOAT32_C( -30.20), SIMDE_FLOAT32_C( -62.60), SIMDE_FLOAT32_C( 2.30)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 2016.68), SIMDE_FLOAT32_C( 1317.10), SIMDE_FLOAT32_C( 591.56), SIMDE_FLOAT32_C( 673.00), SIMDE_FLOAT32_C( 1750.48), SIMDE_FLOAT32_C( 7035.40), SIMDE_FLOAT32_C(-5960.96), SIMDE_FLOAT32_C( 4496.42)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 74.80), SIMDE_FLOAT32_C( 79.60), SIMDE_FLOAT32_C( -91.30), SIMDE_FLOAT32_C( 86.60), SIMDE_FLOAT32_C( 41.70), SIMDE_FLOAT32_C( -74.30), SIMDE_FLOAT32_C( -75.60), SIMDE_FLOAT32_C( 28.50)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 96.00), SIMDE_FLOAT32_C( 10.10), SIMDE_FLOAT32_C( -63.40), SIMDE_FLOAT32_C( 96.90), SIMDE_FLOAT32_C( 66.20), SIMDE_FLOAT32_C( -75.30), SIMDE_FLOAT32_C( -11.80), SIMDE_FLOAT32_C( 30.70)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 31.70), SIMDE_FLOAT32_C( -47.90), SIMDE_FLOAT32_C( 27.70), SIMDE_FLOAT32_C( 40.70), SIMDE_FLOAT32_C( -22.80), SIMDE_FLOAT32_C( 35.80), SIMDE_FLOAT32_C( -30.10), SIMDE_FLOAT32_C( 88.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 7212.50), SIMDE_FLOAT32_C( 851.86), SIMDE_FLOAT32_C( 5816.12), SIMDE_FLOAT32_C( 8350.84), SIMDE_FLOAT32_C( 2737.74), SIMDE_FLOAT32_C( 5558.99), SIMDE_FLOAT32_C( 861.98), SIMDE_FLOAT32_C( 786.95)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 91.80), SIMDE_FLOAT32_C( -99.10), SIMDE_FLOAT32_C( -91.30), SIMDE_FLOAT32_C( 69.40), SIMDE_FLOAT32_C( 38.40), SIMDE_FLOAT32_C( -90.40), SIMDE_FLOAT32_C( 62.20), SIMDE_FLOAT32_C( -62.50)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 82.70), SIMDE_FLOAT32_C( -63.90), SIMDE_FLOAT32_C( 57.00), SIMDE_FLOAT32_C( -53.70), SIMDE_FLOAT32_C( -62.00), SIMDE_FLOAT32_C( 87.90), SIMDE_FLOAT32_C( -60.70), SIMDE_FLOAT32_C( -94.50)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 65.30), SIMDE_FLOAT32_C( 61.10), SIMDE_FLOAT32_C( -35.30), SIMDE_FLOAT32_C( -37.60), SIMDE_FLOAT32_C( 3.40), SIMDE_FLOAT32_C( 10.20), SIMDE_FLOAT32_C( 25.70), SIMDE_FLOAT32_C( 31.10)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 7657.16), SIMDE_FLOAT32_C( 6271.39), SIMDE_FLOAT32_C(-5239.40), SIMDE_FLOAT32_C(-3689.18), SIMDE_FLOAT32_C(-2377.40), SIMDE_FLOAT32_C(-7956.36), SIMDE_FLOAT32_C(-3749.84), SIMDE_FLOAT32_C( 5875.15)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -9.70), SIMDE_FLOAT32_C( 54.70), SIMDE_FLOAT32_C( -66.40), SIMDE_FLOAT32_C( -34.70), SIMDE_FLOAT32_C( -27.90), SIMDE_FLOAT32_C( 92.40), SIMDE_FLOAT32_C( -11.40), SIMDE_FLOAT32_C( 14.60)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.90), SIMDE_FLOAT32_C( -71.50), SIMDE_FLOAT32_C( 67.00), SIMDE_FLOAT32_C( -56.30), SIMDE_FLOAT32_C( 74.40), SIMDE_FLOAT32_C( 9.80), SIMDE_FLOAT32_C( -5.30), SIMDE_FLOAT32_C( 63.80)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -47.10), SIMDE_FLOAT32_C( 81.20), SIMDE_FLOAT32_C( 31.00), SIMDE_FLOAT32_C( 11.50), SIMDE_FLOAT32_C( 67.80), SIMDE_FLOAT32_C( -14.20), SIMDE_FLOAT32_C( -62.80), SIMDE_FLOAT32_C( 84.70)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -38.37), SIMDE_FLOAT32_C(-3992.25), SIMDE_FLOAT32_C(-4417.80), SIMDE_FLOAT32_C( 1942.11), SIMDE_FLOAT32_C(-2007.96), SIMDE_FLOAT32_C( 919.72), SIMDE_FLOAT32_C( -2.38), SIMDE_FLOAT32_C( 846.78)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -14.10), SIMDE_FLOAT32_C( -90.60), SIMDE_FLOAT32_C( 37.70), SIMDE_FLOAT32_C( 63.50), SIMDE_FLOAT32_C( -67.90), SIMDE_FLOAT32_C( -75.70), SIMDE_FLOAT32_C( 48.30), SIMDE_FLOAT32_C( 69.80)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 21.20), SIMDE_FLOAT32_C( -56.80), SIMDE_FLOAT32_C( -51.20), SIMDE_FLOAT32_C( -55.60), SIMDE_FLOAT32_C( 65.10), SIMDE_FLOAT32_C( 21.30), SIMDE_FLOAT32_C( -29.20), SIMDE_FLOAT32_C( -61.60)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 55.80), SIMDE_FLOAT32_C( -16.50), SIMDE_FLOAT32_C( 90.30), SIMDE_FLOAT32_C( 10.50), SIMDE_FLOAT32_C( -35.10), SIMDE_FLOAT32_C( 8.40), SIMDE_FLOAT32_C( -35.70), SIMDE_FLOAT32_C( 70.80)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -243.12), SIMDE_FLOAT32_C( 5162.58), SIMDE_FLOAT32_C(-1839.94), SIMDE_FLOAT32_C(-3541.10), SIMDE_FLOAT32_C(-4455.39), SIMDE_FLOAT32_C(-1620.81), SIMDE_FLOAT32_C(-1446.06), SIMDE_FLOAT32_C(-4370.48)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_fmaddsub_ps(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_fmsub_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d c; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -86.00), SIMDE_FLOAT64_C( -88.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( 79.20), SIMDE_FLOAT64_C( -72.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( 69.30), SIMDE_FLOAT64_C( -94.80)), simde_mm_set_pd(SIMDE_FLOAT64_C(-6880.50), SIMDE_FLOAT64_C( 6543.29)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 29.00), SIMDE_FLOAT64_C( -23.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( 88.90), SIMDE_FLOAT64_C( 98.30)), simde_mm_set_pd(SIMDE_FLOAT64_C( -27.80), SIMDE_FLOAT64_C( -64.50)), simde_mm_set_pd(SIMDE_FLOAT64_C( 2605.90), SIMDE_FLOAT64_C(-2196.40)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -56.40), SIMDE_FLOAT64_C( 49.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( 79.10), SIMDE_FLOAT64_C( -51.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( -66.70), SIMDE_FLOAT64_C( 16.60)), simde_mm_set_pd(SIMDE_FLOAT64_C(-4394.54), SIMDE_FLOAT64_C(-2596.43)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -73.50), SIMDE_FLOAT64_C( 25.30)), simde_mm_set_pd(SIMDE_FLOAT64_C( 95.60), SIMDE_FLOAT64_C( 38.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( 96.40), SIMDE_FLOAT64_C( 40.10)), simde_mm_set_pd(SIMDE_FLOAT64_C(-7123.00), SIMDE_FLOAT64_C( 939.01)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 96.70), SIMDE_FLOAT64_C( -25.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( -31.20), SIMDE_FLOAT64_C( -59.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( -18.60), SIMDE_FLOAT64_C( -15.90)), simde_mm_set_pd(SIMDE_FLOAT64_C(-2998.44), SIMDE_FLOAT64_C( 1567.31)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -63.20), SIMDE_FLOAT64_C( -69.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( -27.30), SIMDE_FLOAT64_C( 57.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( 17.60), SIMDE_FLOAT64_C( 32.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1707.76), SIMDE_FLOAT64_C(-4066.03)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -33.50), SIMDE_FLOAT64_C( 64.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( -68.40), SIMDE_FLOAT64_C( -49.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( 44.70), SIMDE_FLOAT64_C( 88.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( 2246.70), SIMDE_FLOAT64_C(-3271.94)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -79.60), SIMDE_FLOAT64_C( -61.60)), simde_mm_set_pd(SIMDE_FLOAT64_C( -29.20), SIMDE_FLOAT64_C( -21.10)), simde_mm_set_pd(SIMDE_FLOAT64_C( -94.70), SIMDE_FLOAT64_C( -26.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( 2419.02), SIMDE_FLOAT64_C( 1325.96)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_fmsub_pd(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_fmsub_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d b; simde__m256d c; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 34.80), SIMDE_FLOAT64_C( 57.60), SIMDE_FLOAT64_C( 21.20), SIMDE_FLOAT64_C( 58.70)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -15.50), SIMDE_FLOAT64_C( -85.90), SIMDE_FLOAT64_C( 76.40), SIMDE_FLOAT64_C( 37.40)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -67.00), SIMDE_FLOAT64_C( -15.40), SIMDE_FLOAT64_C( 94.00), SIMDE_FLOAT64_C( -95.50)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -472.40), SIMDE_FLOAT64_C(-4932.44), SIMDE_FLOAT64_C( 1525.68), SIMDE_FLOAT64_C( 2290.88)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 79.40), SIMDE_FLOAT64_C( -18.40), SIMDE_FLOAT64_C( -87.30), SIMDE_FLOAT64_C( -43.70)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 46.70), SIMDE_FLOAT64_C( -61.00), SIMDE_FLOAT64_C( 22.50), SIMDE_FLOAT64_C( -19.30)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -92.50), SIMDE_FLOAT64_C( 24.60), SIMDE_FLOAT64_C( 48.50), SIMDE_FLOAT64_C( 81.10)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 3800.48), SIMDE_FLOAT64_C( 1097.80), SIMDE_FLOAT64_C(-2012.75), SIMDE_FLOAT64_C( 762.31)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -53.30), SIMDE_FLOAT64_C( 37.50), SIMDE_FLOAT64_C( -12.20), SIMDE_FLOAT64_C( 77.20)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 44.30), SIMDE_FLOAT64_C( 68.70), SIMDE_FLOAT64_C( 45.00), SIMDE_FLOAT64_C( -94.90)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 10.90), SIMDE_FLOAT64_C( -78.60), SIMDE_FLOAT64_C( 59.40), SIMDE_FLOAT64_C( 54.70)), simde_mm256_set_pd(SIMDE_FLOAT64_C(-2372.09), SIMDE_FLOAT64_C( 2654.85), SIMDE_FLOAT64_C( -608.40), SIMDE_FLOAT64_C(-7380.98)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -45.20), SIMDE_FLOAT64_C( -98.30), SIMDE_FLOAT64_C( 6.30), SIMDE_FLOAT64_C( -64.20)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 47.00), SIMDE_FLOAT64_C( -17.30), SIMDE_FLOAT64_C( 90.50), SIMDE_FLOAT64_C( 33.20)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -47.40), SIMDE_FLOAT64_C( -48.00), SIMDE_FLOAT64_C( 92.50), SIMDE_FLOAT64_C( -62.30)), simde_mm256_set_pd(SIMDE_FLOAT64_C(-2077.00), SIMDE_FLOAT64_C( 1748.59), SIMDE_FLOAT64_C( 477.65), SIMDE_FLOAT64_C(-2069.14)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -66.50), SIMDE_FLOAT64_C( 50.50), SIMDE_FLOAT64_C( -60.50), SIMDE_FLOAT64_C( 97.50)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -77.70), SIMDE_FLOAT64_C( -31.10), SIMDE_FLOAT64_C( 56.50), SIMDE_FLOAT64_C( -49.90)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -78.10), SIMDE_FLOAT64_C( -33.20), SIMDE_FLOAT64_C( 60.50), SIMDE_FLOAT64_C( 91.20)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 5245.15), SIMDE_FLOAT64_C(-1537.35), SIMDE_FLOAT64_C(-3478.75), SIMDE_FLOAT64_C(-4956.45)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 97.80), SIMDE_FLOAT64_C( 3.10), SIMDE_FLOAT64_C( -8.70), SIMDE_FLOAT64_C( 56.90)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 88.60), SIMDE_FLOAT64_C( -73.80), SIMDE_FLOAT64_C( 92.30), SIMDE_FLOAT64_C( 21.50)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -81.80), SIMDE_FLOAT64_C( -53.80), SIMDE_FLOAT64_C( -76.80), SIMDE_FLOAT64_C( -90.20)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 8746.88), SIMDE_FLOAT64_C( -174.98), SIMDE_FLOAT64_C( -726.21), SIMDE_FLOAT64_C( 1313.55)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -20.60), SIMDE_FLOAT64_C( -46.30), SIMDE_FLOAT64_C( 51.00), SIMDE_FLOAT64_C( 60.50)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -26.30), SIMDE_FLOAT64_C( -65.50), SIMDE_FLOAT64_C( -31.40), SIMDE_FLOAT64_C( -0.20)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -79.80), SIMDE_FLOAT64_C( 98.80), SIMDE_FLOAT64_C( 31.60), SIMDE_FLOAT64_C( -29.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 621.58), SIMDE_FLOAT64_C( 2933.85), SIMDE_FLOAT64_C(-1633.00), SIMDE_FLOAT64_C( 16.90)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 73.70), SIMDE_FLOAT64_C( -28.30), SIMDE_FLOAT64_C( -1.90), SIMDE_FLOAT64_C( -61.50)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -19.60), SIMDE_FLOAT64_C( -92.40), SIMDE_FLOAT64_C( -22.30), SIMDE_FLOAT64_C( -53.90)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -75.90), SIMDE_FLOAT64_C( 72.50), SIMDE_FLOAT64_C( -50.10), SIMDE_FLOAT64_C( 18.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C(-1368.62), SIMDE_FLOAT64_C( 2542.42), SIMDE_FLOAT64_C( 92.47), SIMDE_FLOAT64_C( 3296.85)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_fmsub_pd(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_fmsub_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 c; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -36.50), SIMDE_FLOAT32_C( 13.70), SIMDE_FLOAT32_C( -3.10), SIMDE_FLOAT32_C( 21.40)), simde_mm_set_ps(SIMDE_FLOAT32_C( 80.60), SIMDE_FLOAT32_C( 11.30), SIMDE_FLOAT32_C( 96.80), SIMDE_FLOAT32_C( -38.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( -8.50), SIMDE_FLOAT32_C( -28.20), SIMDE_FLOAT32_C( -26.80), SIMDE_FLOAT32_C( -95.00)), simde_mm_set_ps(SIMDE_FLOAT32_C(-2933.40), SIMDE_FLOAT32_C( 183.01), SIMDE_FLOAT32_C( -273.28), SIMDE_FLOAT32_C( -722.48)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 59.20), SIMDE_FLOAT32_C( -6.20), SIMDE_FLOAT32_C( -52.90), SIMDE_FLOAT32_C( -75.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( 61.80), SIMDE_FLOAT32_C( -76.10), SIMDE_FLOAT32_C( -87.70), SIMDE_FLOAT32_C( -40.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( 34.20), SIMDE_FLOAT32_C( 37.10), SIMDE_FLOAT32_C( 7.30), SIMDE_FLOAT32_C( 67.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( 3624.36), SIMDE_FLOAT32_C( 434.72), SIMDE_FLOAT32_C( 4632.03), SIMDE_FLOAT32_C( 2998.05)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -85.40), SIMDE_FLOAT32_C( 36.60), SIMDE_FLOAT32_C( -55.80), SIMDE_FLOAT32_C( 5.90)), simde_mm_set_ps(SIMDE_FLOAT32_C( -37.10), SIMDE_FLOAT32_C( 37.80), SIMDE_FLOAT32_C( -6.30), SIMDE_FLOAT32_C( 90.40)), simde_mm_set_ps(SIMDE_FLOAT32_C( 16.90), SIMDE_FLOAT32_C( -83.90), SIMDE_FLOAT32_C( 82.90), SIMDE_FLOAT32_C( 23.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( 3151.44), SIMDE_FLOAT32_C( 1467.38), SIMDE_FLOAT32_C( 268.64), SIMDE_FLOAT32_C( 510.36)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 59.30), SIMDE_FLOAT32_C( 97.10), SIMDE_FLOAT32_C( -5.30), SIMDE_FLOAT32_C( -37.40)), simde_mm_set_ps(SIMDE_FLOAT32_C( -7.50), SIMDE_FLOAT32_C( 42.80), SIMDE_FLOAT32_C( -32.50), SIMDE_FLOAT32_C( -34.60)), simde_mm_set_ps(SIMDE_FLOAT32_C( 25.50), SIMDE_FLOAT32_C( 87.80), SIMDE_FLOAT32_C( 95.90), SIMDE_FLOAT32_C( -68.30)), simde_mm_set_ps(SIMDE_FLOAT32_C( -470.25), SIMDE_FLOAT32_C( 4068.08), SIMDE_FLOAT32_C( 76.35), SIMDE_FLOAT32_C( 1362.34)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -87.90), SIMDE_FLOAT32_C( -35.50), SIMDE_FLOAT32_C( -15.00), SIMDE_FLOAT32_C( 72.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( 78.40), SIMDE_FLOAT32_C( 83.00), SIMDE_FLOAT32_C( 34.70), SIMDE_FLOAT32_C( -8.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( 99.60), SIMDE_FLOAT32_C( 96.00), SIMDE_FLOAT32_C( 45.40), SIMDE_FLOAT32_C( -79.90)), simde_mm_set_ps(SIMDE_FLOAT32_C(-6990.96), SIMDE_FLOAT32_C(-3042.50), SIMDE_FLOAT32_C( -565.90), SIMDE_FLOAT32_C( -538.90)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 26.30), SIMDE_FLOAT32_C( 69.80), SIMDE_FLOAT32_C( -48.50), SIMDE_FLOAT32_C( -58.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( 85.00), SIMDE_FLOAT32_C( -97.40), SIMDE_FLOAT32_C( 16.90), SIMDE_FLOAT32_C( -37.30)), simde_mm_set_ps(SIMDE_FLOAT32_C( -22.30), SIMDE_FLOAT32_C( 21.90), SIMDE_FLOAT32_C( -79.20), SIMDE_FLOAT32_C( -99.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( 2257.80), SIMDE_FLOAT32_C(-6820.42), SIMDE_FLOAT32_C( -740.45), SIMDE_FLOAT32_C( 2281.25)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 24.00), SIMDE_FLOAT32_C( 51.40), SIMDE_FLOAT32_C( -24.70), SIMDE_FLOAT32_C( -32.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( 34.10), SIMDE_FLOAT32_C( 90.10), SIMDE_FLOAT32_C( 39.10), SIMDE_FLOAT32_C( -33.10)), simde_mm_set_ps(SIMDE_FLOAT32_C( 63.90), SIMDE_FLOAT32_C( -54.20), SIMDE_FLOAT32_C( -27.60), SIMDE_FLOAT32_C( 31.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( 754.50), SIMDE_FLOAT32_C( 4685.34), SIMDE_FLOAT32_C( -938.17), SIMDE_FLOAT32_C( 1044.05)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 45.70), SIMDE_FLOAT32_C( -95.60), SIMDE_FLOAT32_C( 14.60), SIMDE_FLOAT32_C( -3.40)), simde_mm_set_ps(SIMDE_FLOAT32_C( -90.50), SIMDE_FLOAT32_C( -20.20), SIMDE_FLOAT32_C( 91.40), SIMDE_FLOAT32_C( 25.10)), simde_mm_set_ps(SIMDE_FLOAT32_C( -26.90), SIMDE_FLOAT32_C( 29.30), SIMDE_FLOAT32_C( 77.50), SIMDE_FLOAT32_C( -80.00)), simde_mm_set_ps(SIMDE_FLOAT32_C(-4108.95), SIMDE_FLOAT32_C( 1901.82), SIMDE_FLOAT32_C( 1256.94), SIMDE_FLOAT32_C( -5.34)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_fmsub_ps(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_fmsub_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 b; simde__m256 c; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 71.60), SIMDE_FLOAT32_C( 70.70), SIMDE_FLOAT32_C( 40.60), SIMDE_FLOAT32_C( -9.30), SIMDE_FLOAT32_C( -79.10), SIMDE_FLOAT32_C( 52.30), SIMDE_FLOAT32_C( -67.90), SIMDE_FLOAT32_C( 25.70)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -51.50), SIMDE_FLOAT32_C( 43.80), SIMDE_FLOAT32_C( 41.70), SIMDE_FLOAT32_C( -77.20), SIMDE_FLOAT32_C( -5.00), SIMDE_FLOAT32_C( 96.70), SIMDE_FLOAT32_C( -13.50), SIMDE_FLOAT32_C( -2.70)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -28.70), SIMDE_FLOAT32_C( -28.30), SIMDE_FLOAT32_C( 1.80), SIMDE_FLOAT32_C( -81.10), SIMDE_FLOAT32_C( -82.10), SIMDE_FLOAT32_C( -69.80), SIMDE_FLOAT32_C( 42.10), SIMDE_FLOAT32_C( 74.70)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-3658.70), SIMDE_FLOAT32_C( 3124.96), SIMDE_FLOAT32_C( 1691.22), SIMDE_FLOAT32_C( 799.06), SIMDE_FLOAT32_C( 477.60), SIMDE_FLOAT32_C( 5127.21), SIMDE_FLOAT32_C( 874.55), SIMDE_FLOAT32_C( -144.09)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -49.00), SIMDE_FLOAT32_C( -78.70), SIMDE_FLOAT32_C( -72.10), SIMDE_FLOAT32_C( 26.10), SIMDE_FLOAT32_C( -91.90), SIMDE_FLOAT32_C( 1.40), SIMDE_FLOAT32_C( 89.80), SIMDE_FLOAT32_C( 94.20)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -13.70), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( 57.80), SIMDE_FLOAT32_C( 33.00), SIMDE_FLOAT32_C( -83.50), SIMDE_FLOAT32_C( -8.10), SIMDE_FLOAT32_C( 91.30), SIMDE_FLOAT32_C( 65.20)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -67.90), SIMDE_FLOAT32_C( -56.40), SIMDE_FLOAT32_C( 5.90), SIMDE_FLOAT32_C( 2.40), SIMDE_FLOAT32_C( 91.80), SIMDE_FLOAT32_C( 50.80), SIMDE_FLOAT32_C( 64.70), SIMDE_FLOAT32_C( -56.10)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 739.20), SIMDE_FLOAT32_C( 1.31), SIMDE_FLOAT32_C(-4173.28), SIMDE_FLOAT32_C( 858.90), SIMDE_FLOAT32_C( 7581.85), SIMDE_FLOAT32_C( -62.14), SIMDE_FLOAT32_C( 8134.04), SIMDE_FLOAT32_C( 6197.94)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 40.00), SIMDE_FLOAT32_C( -5.30), SIMDE_FLOAT32_C( 85.00), SIMDE_FLOAT32_C( 83.70), SIMDE_FLOAT32_C( 96.80), SIMDE_FLOAT32_C( -59.70), SIMDE_FLOAT32_C( -72.50), SIMDE_FLOAT32_C( -8.10)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 77.50), SIMDE_FLOAT32_C( 50.00), SIMDE_FLOAT32_C( 72.40), SIMDE_FLOAT32_C( 98.40), SIMDE_FLOAT32_C( 69.10), SIMDE_FLOAT32_C( 35.80), SIMDE_FLOAT32_C( -92.90), SIMDE_FLOAT32_C( 63.70)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 73.80), SIMDE_FLOAT32_C( -94.30), SIMDE_FLOAT32_C( -79.50), SIMDE_FLOAT32_C( 64.60), SIMDE_FLOAT32_C( 63.40), SIMDE_FLOAT32_C( -65.00), SIMDE_FLOAT32_C( 75.20), SIMDE_FLOAT32_C( 48.70)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 3026.20), SIMDE_FLOAT32_C( -170.70), SIMDE_FLOAT32_C( 6233.50), SIMDE_FLOAT32_C( 8171.48), SIMDE_FLOAT32_C( 6625.48), SIMDE_FLOAT32_C(-2072.26), SIMDE_FLOAT32_C( 6660.05), SIMDE_FLOAT32_C( -564.67)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -82.20), SIMDE_FLOAT32_C( 93.30), SIMDE_FLOAT32_C( 9.70), SIMDE_FLOAT32_C( -2.70), SIMDE_FLOAT32_C( 86.00), SIMDE_FLOAT32_C( -20.80), SIMDE_FLOAT32_C( 67.70), SIMDE_FLOAT32_C( -47.20)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -83.70), SIMDE_FLOAT32_C( 12.00), SIMDE_FLOAT32_C( 23.10), SIMDE_FLOAT32_C( -42.00), SIMDE_FLOAT32_C( 46.30), SIMDE_FLOAT32_C( 48.20), SIMDE_FLOAT32_C( 86.90), SIMDE_FLOAT32_C( -91.50)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -48.60), SIMDE_FLOAT32_C( 92.30), SIMDE_FLOAT32_C( -12.70), SIMDE_FLOAT32_C( -48.20), SIMDE_FLOAT32_C( 60.90), SIMDE_FLOAT32_C( 43.20), SIMDE_FLOAT32_C( -71.30), SIMDE_FLOAT32_C( -56.60)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 6928.74), SIMDE_FLOAT32_C( 1027.30), SIMDE_FLOAT32_C( 236.77), SIMDE_FLOAT32_C( 161.60), SIMDE_FLOAT32_C( 3920.90), SIMDE_FLOAT32_C(-1045.76), SIMDE_FLOAT32_C( 5954.43), SIMDE_FLOAT32_C( 4375.40)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 32.60), SIMDE_FLOAT32_C( 90.30), SIMDE_FLOAT32_C( -31.90), SIMDE_FLOAT32_C( 33.60), SIMDE_FLOAT32_C( 47.40), SIMDE_FLOAT32_C( 49.30), SIMDE_FLOAT32_C( -73.00), SIMDE_FLOAT32_C( 55.80)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -72.60), SIMDE_FLOAT32_C( 34.70), SIMDE_FLOAT32_C( -8.30), SIMDE_FLOAT32_C( -47.40), SIMDE_FLOAT32_C( -91.00), SIMDE_FLOAT32_C( -99.10), SIMDE_FLOAT32_C( -84.60), SIMDE_FLOAT32_C( -13.50)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -84.20), SIMDE_FLOAT32_C( 35.00), SIMDE_FLOAT32_C( -58.10), SIMDE_FLOAT32_C( 81.70), SIMDE_FLOAT32_C( 1.20), SIMDE_FLOAT32_C( -33.20), SIMDE_FLOAT32_C( 36.00), SIMDE_FLOAT32_C( -80.90)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-2282.56), SIMDE_FLOAT32_C( 3098.41), SIMDE_FLOAT32_C( 322.87), SIMDE_FLOAT32_C(-1674.34), SIMDE_FLOAT32_C(-4314.60), SIMDE_FLOAT32_C(-4852.43), SIMDE_FLOAT32_C( 6139.80), SIMDE_FLOAT32_C( -672.40)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -58.90), SIMDE_FLOAT32_C( 53.10), SIMDE_FLOAT32_C( -76.60), SIMDE_FLOAT32_C( 83.00), SIMDE_FLOAT32_C( 91.20), SIMDE_FLOAT32_C( -33.50), SIMDE_FLOAT32_C( -65.20), SIMDE_FLOAT32_C( -55.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -47.40), SIMDE_FLOAT32_C( -20.10), SIMDE_FLOAT32_C( -89.40), SIMDE_FLOAT32_C( 87.90), SIMDE_FLOAT32_C( -65.50), SIMDE_FLOAT32_C( -20.70), SIMDE_FLOAT32_C( 88.30), SIMDE_FLOAT32_C( 20.40)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 50.40), SIMDE_FLOAT32_C( 75.40), SIMDE_FLOAT32_C( 79.80), SIMDE_FLOAT32_C( 5.10), SIMDE_FLOAT32_C( -6.50), SIMDE_FLOAT32_C( -47.90), SIMDE_FLOAT32_C( 48.50), SIMDE_FLOAT32_C( -69.90)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 2741.46), SIMDE_FLOAT32_C(-1142.71), SIMDE_FLOAT32_C( 6768.24), SIMDE_FLOAT32_C( 7290.60), SIMDE_FLOAT32_C(-5967.10), SIMDE_FLOAT32_C( 741.35), SIMDE_FLOAT32_C(-5805.66), SIMDE_FLOAT32_C(-1052.10)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 8.30), SIMDE_FLOAT32_C( 22.80), SIMDE_FLOAT32_C( -55.20), SIMDE_FLOAT32_C( -62.40), SIMDE_FLOAT32_C( -29.10), SIMDE_FLOAT32_C( 56.20), SIMDE_FLOAT32_C( 96.20), SIMDE_FLOAT32_C( 45.90)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 9.40), SIMDE_FLOAT32_C( -58.60), SIMDE_FLOAT32_C( -71.50), SIMDE_FLOAT32_C( 52.70), SIMDE_FLOAT32_C( -96.40), SIMDE_FLOAT32_C( 75.70), SIMDE_FLOAT32_C( -3.70), SIMDE_FLOAT32_C( 35.60)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -87.20), SIMDE_FLOAT32_C( -73.80), SIMDE_FLOAT32_C( -51.80), SIMDE_FLOAT32_C( 49.30), SIMDE_FLOAT32_C( 9.90), SIMDE_FLOAT32_C( 32.40), SIMDE_FLOAT32_C( -44.20), SIMDE_FLOAT32_C( 88.50)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 165.22), SIMDE_FLOAT32_C(-1262.28), SIMDE_FLOAT32_C( 3998.60), SIMDE_FLOAT32_C(-3337.78), SIMDE_FLOAT32_C( 2795.34), SIMDE_FLOAT32_C( 4221.94), SIMDE_FLOAT32_C( -311.74), SIMDE_FLOAT32_C( 1545.54)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -71.10), SIMDE_FLOAT32_C( -36.70), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( -19.80), SIMDE_FLOAT32_C( -33.20), SIMDE_FLOAT32_C( 94.30), SIMDE_FLOAT32_C( 1.20), SIMDE_FLOAT32_C( 43.50)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 91.40), SIMDE_FLOAT32_C( 40.00), SIMDE_FLOAT32_C( 26.00), SIMDE_FLOAT32_C( 80.90), SIMDE_FLOAT32_C( -92.20), SIMDE_FLOAT32_C( -86.10), SIMDE_FLOAT32_C( 71.10), SIMDE_FLOAT32_C( 10.10)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 29.80), SIMDE_FLOAT32_C( -33.80), SIMDE_FLOAT32_C( -52.50), SIMDE_FLOAT32_C( 52.00), SIMDE_FLOAT32_C( -20.10), SIMDE_FLOAT32_C( -49.80), SIMDE_FLOAT32_C( 36.10), SIMDE_FLOAT32_C( 37.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-6528.34), SIMDE_FLOAT32_C(-1434.20), SIMDE_FLOAT32_C( 104.50), SIMDE_FLOAT32_C(-1653.82), SIMDE_FLOAT32_C( 3081.14), SIMDE_FLOAT32_C(-8069.43), SIMDE_FLOAT32_C( 49.22), SIMDE_FLOAT32_C( 402.35)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_fmsub_ps(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_fmsub_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d c; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 43.20), SIMDE_FLOAT64_C( -60.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( -12.80), SIMDE_FLOAT64_C( 56.50)), simde_mm_set_pd(SIMDE_FLOAT64_C( -27.10), SIMDE_FLOAT64_C( 60.10)), simde_mm_set_pd(SIMDE_FLOAT64_C( 43.20), SIMDE_FLOAT64_C(-3461.40)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -79.30), SIMDE_FLOAT64_C( 88.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( 44.30), SIMDE_FLOAT64_C( 37.40)), simde_mm_set_pd(SIMDE_FLOAT64_C( 77.70), SIMDE_FLOAT64_C( 22.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( -79.30), SIMDE_FLOAT64_C( 3302.86)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -1.70), SIMDE_FLOAT64_C( -49.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( 87.10), SIMDE_FLOAT64_C( -41.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( -71.70), SIMDE_FLOAT64_C( 16.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( -1.70), SIMDE_FLOAT64_C( 2025.60)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -81.20), SIMDE_FLOAT64_C( 22.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( -90.90), SIMDE_FLOAT64_C( 95.10)), simde_mm_set_pd(SIMDE_FLOAT64_C( -81.00), SIMDE_FLOAT64_C( -21.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( -81.20), SIMDE_FLOAT64_C( 2113.20)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 86.00), SIMDE_FLOAT64_C( 69.40)), simde_mm_set_pd(SIMDE_FLOAT64_C( -68.40), SIMDE_FLOAT64_C( -83.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( 95.80), SIMDE_FLOAT64_C( 94.30)), simde_mm_set_pd(SIMDE_FLOAT64_C( 86.00), SIMDE_FLOAT64_C(-5903.08)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -19.30), SIMDE_FLOAT64_C( -49.30)), simde_mm_set_pd(SIMDE_FLOAT64_C( -62.80), SIMDE_FLOAT64_C( 42.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( 30.00), SIMDE_FLOAT64_C( -69.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( -19.30), SIMDE_FLOAT64_C(-2001.60)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 23.40), SIMDE_FLOAT64_C( -19.50)), simde_mm_set_pd(SIMDE_FLOAT64_C( 85.50), SIMDE_FLOAT64_C( 56.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( 27.00), SIMDE_FLOAT64_C( -47.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( 23.40), SIMDE_FLOAT64_C(-1062.35)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -59.40), SIMDE_FLOAT64_C( 23.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( -67.50), SIMDE_FLOAT64_C( 79.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( 98.40), SIMDE_FLOAT64_C( -48.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( -59.40), SIMDE_FLOAT64_C( 1870.40)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_fmsub_sd(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_fmsub_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 c; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 81.20), SIMDE_FLOAT32_C( 26.30), SIMDE_FLOAT32_C( 21.90), SIMDE_FLOAT32_C( 41.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( 37.70), SIMDE_FLOAT32_C( 61.40), SIMDE_FLOAT32_C( 87.60), SIMDE_FLOAT32_C( -37.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( 85.80), SIMDE_FLOAT32_C( -48.50), SIMDE_FLOAT32_C( 52.10), SIMDE_FLOAT32_C( 67.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( 81.20), SIMDE_FLOAT32_C( 26.30), SIMDE_FLOAT32_C( 21.90), SIMDE_FLOAT32_C(-1622.66)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -23.60), SIMDE_FLOAT32_C( -82.80), SIMDE_FLOAT32_C( 55.80), SIMDE_FLOAT32_C( -90.30)), simde_mm_set_ps(SIMDE_FLOAT32_C( 53.10), SIMDE_FLOAT32_C( -75.20), SIMDE_FLOAT32_C( -26.00), SIMDE_FLOAT32_C( 93.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( 47.50), SIMDE_FLOAT32_C( 39.90), SIMDE_FLOAT32_C( -49.20), SIMDE_FLOAT32_C( -86.90)), simde_mm_set_ps(SIMDE_FLOAT32_C( -23.60), SIMDE_FLOAT32_C( -82.80), SIMDE_FLOAT32_C( 55.80), SIMDE_FLOAT32_C(-8383.24)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( -99.10), SIMDE_FLOAT32_C( 26.00), SIMDE_FLOAT32_C( 32.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( 29.60), SIMDE_FLOAT32_C( -93.20), SIMDE_FLOAT32_C( -96.10), SIMDE_FLOAT32_C( 87.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( 76.20), SIMDE_FLOAT32_C( -98.50), SIMDE_FLOAT32_C( 4.10), SIMDE_FLOAT32_C( -66.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( -99.10), SIMDE_FLOAT32_C( 26.00), SIMDE_FLOAT32_C( 2910.25)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -58.80), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( -50.10), SIMDE_FLOAT32_C( -58.60)), simde_mm_set_ps(SIMDE_FLOAT32_C( 53.70), SIMDE_FLOAT32_C( -83.00), SIMDE_FLOAT32_C( -66.70), SIMDE_FLOAT32_C( 96.60)), simde_mm_set_ps(SIMDE_FLOAT32_C( -97.40), SIMDE_FLOAT32_C( 97.80), SIMDE_FLOAT32_C( 93.40), SIMDE_FLOAT32_C( -82.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( -58.80), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( -50.10), SIMDE_FLOAT32_C(-5578.26)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 26.20), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 53.40), SIMDE_FLOAT32_C( 40.40)), simde_mm_set_ps(SIMDE_FLOAT32_C( -60.30), SIMDE_FLOAT32_C( -94.00), SIMDE_FLOAT32_C( 14.10), SIMDE_FLOAT32_C( -94.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( 75.80), SIMDE_FLOAT32_C( 16.70), SIMDE_FLOAT32_C( -3.80), SIMDE_FLOAT32_C( -98.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( 26.20), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 53.40), SIMDE_FLOAT32_C(-3719.30)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 6.90), SIMDE_FLOAT32_C( 37.30), SIMDE_FLOAT32_C( 95.60), SIMDE_FLOAT32_C( 26.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( 6.20), SIMDE_FLOAT32_C( 51.70), SIMDE_FLOAT32_C( -27.80), SIMDE_FLOAT32_C( 35.60)), simde_mm_set_ps(SIMDE_FLOAT32_C( 96.60), SIMDE_FLOAT32_C( 16.30), SIMDE_FLOAT32_C( -87.40), SIMDE_FLOAT32_C( 51.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( 6.90), SIMDE_FLOAT32_C( 37.30), SIMDE_FLOAT32_C( 95.60), SIMDE_FLOAT32_C( 881.72)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -41.80), SIMDE_FLOAT32_C( -50.90), SIMDE_FLOAT32_C( 94.30), SIMDE_FLOAT32_C( 92.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( -51.70), SIMDE_FLOAT32_C( 66.70), SIMDE_FLOAT32_C( 35.70), SIMDE_FLOAT32_C( 84.90)), simde_mm_set_ps(SIMDE_FLOAT32_C( -89.60), SIMDE_FLOAT32_C( -35.50), SIMDE_FLOAT32_C( -45.20), SIMDE_FLOAT32_C( -87.60)), simde_mm_set_ps(SIMDE_FLOAT32_C( -41.80), SIMDE_FLOAT32_C( -50.90), SIMDE_FLOAT32_C( 94.30), SIMDE_FLOAT32_C( 7940.85)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 60.00), SIMDE_FLOAT32_C( 45.70), SIMDE_FLOAT32_C( 16.60), SIMDE_FLOAT32_C( 40.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( -13.60), SIMDE_FLOAT32_C( -11.50), SIMDE_FLOAT32_C( -61.10), SIMDE_FLOAT32_C( -64.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( 96.70), SIMDE_FLOAT32_C( -80.10), SIMDE_FLOAT32_C( 37.00), SIMDE_FLOAT32_C( 74.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( 60.00), SIMDE_FLOAT32_C( 45.70), SIMDE_FLOAT32_C( 16.60), SIMDE_FLOAT32_C(-2687.64)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_fmsub_ss(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_fmsubadd_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d c; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -50.60), SIMDE_FLOAT64_C( -67.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( -79.80), SIMDE_FLOAT64_C( -83.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( 34.70), SIMDE_FLOAT64_C( -10.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( 4003.18), SIMDE_FLOAT64_C( 5567.60)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -31.80), SIMDE_FLOAT64_C( -73.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( -80.50), SIMDE_FLOAT64_C( 26.40)), simde_mm_set_pd(SIMDE_FLOAT64_C( -10.90), SIMDE_FLOAT64_C( -36.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( 2570.80), SIMDE_FLOAT64_C(-1982.58)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 7.90), SIMDE_FLOAT64_C( -20.50)), simde_mm_set_pd(SIMDE_FLOAT64_C( 91.90), SIMDE_FLOAT64_C( -31.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( -17.90), SIMDE_FLOAT64_C( -72.60)), simde_mm_set_pd(SIMDE_FLOAT64_C( 743.91), SIMDE_FLOAT64_C( 579.30)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 0.90), SIMDE_FLOAT64_C( 20.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( 75.20), SIMDE_FLOAT64_C( -63.30)), simde_mm_set_pd(SIMDE_FLOAT64_C( 33.00), SIMDE_FLOAT64_C( 76.30)), simde_mm_set_pd(SIMDE_FLOAT64_C( 34.68), SIMDE_FLOAT64_C(-1240.34)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 45.60), SIMDE_FLOAT64_C( -62.50)), simde_mm_set_pd(SIMDE_FLOAT64_C( -70.50), SIMDE_FLOAT64_C( 21.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( -20.00), SIMDE_FLOAT64_C( 73.20)), simde_mm_set_pd(SIMDE_FLOAT64_C(-3194.80), SIMDE_FLOAT64_C(-1239.30)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -10.30), SIMDE_FLOAT64_C( -71.50)), simde_mm_set_pd(SIMDE_FLOAT64_C( 63.70), SIMDE_FLOAT64_C( -56.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( -15.40), SIMDE_FLOAT64_C( 29.10)), simde_mm_set_pd(SIMDE_FLOAT64_C( -640.71), SIMDE_FLOAT64_C( 4083.15)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -6.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( -84.60), SIMDE_FLOAT64_C( -37.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( 53.20), SIMDE_FLOAT64_C( -28.50)), simde_mm_set_pd(SIMDE_FLOAT64_C( 31.40), SIMDE_FLOAT64_C( 231.63)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -13.10), SIMDE_FLOAT64_C( -9.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( -34.00), SIMDE_FLOAT64_C( -63.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( 31.60), SIMDE_FLOAT64_C( -13.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( 413.80), SIMDE_FLOAT64_C( 561.30)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_fmsubadd_pd(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_fmsubadd_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d b; simde__m256d c; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -49.40), SIMDE_FLOAT64_C( -57.60), SIMDE_FLOAT64_C( -73.20), SIMDE_FLOAT64_C( -70.10)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.10), SIMDE_FLOAT64_C( 46.20), SIMDE_FLOAT64_C( -46.70), SIMDE_FLOAT64_C( -70.80)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 79.00), SIMDE_FLOAT64_C( -79.60), SIMDE_FLOAT64_C( 19.80), SIMDE_FLOAT64_C( -16.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -74.06), SIMDE_FLOAT64_C(-2740.72), SIMDE_FLOAT64_C( 3398.64), SIMDE_FLOAT64_C( 4947.08)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -12.30), SIMDE_FLOAT64_C( 53.50), SIMDE_FLOAT64_C( -97.80), SIMDE_FLOAT64_C( -85.20)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 67.10), SIMDE_FLOAT64_C( -30.10), SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( -23.80)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 49.80), SIMDE_FLOAT64_C( 87.30), SIMDE_FLOAT64_C( -23.10), SIMDE_FLOAT64_C( 15.90)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -875.13), SIMDE_FLOAT64_C(-1523.05), SIMDE_FLOAT64_C( 52.44), SIMDE_FLOAT64_C( 2043.66)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -27.20), SIMDE_FLOAT64_C( -72.40), SIMDE_FLOAT64_C( 53.20), SIMDE_FLOAT64_C( -9.50)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -71.40), SIMDE_FLOAT64_C( 0.20), SIMDE_FLOAT64_C( -61.10), SIMDE_FLOAT64_C( -97.30)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 85.60), SIMDE_FLOAT64_C( 27.60), SIMDE_FLOAT64_C( 19.30), SIMDE_FLOAT64_C( 46.60)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 1856.48), SIMDE_FLOAT64_C( 13.12), SIMDE_FLOAT64_C(-3269.82), SIMDE_FLOAT64_C( 970.95)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 42.40), SIMDE_FLOAT64_C( -47.00), SIMDE_FLOAT64_C( 57.40), SIMDE_FLOAT64_C( -79.50)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -85.60), SIMDE_FLOAT64_C( -55.10), SIMDE_FLOAT64_C( 8.90), SIMDE_FLOAT64_C( -9.70)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 78.80), SIMDE_FLOAT64_C( 18.80), SIMDE_FLOAT64_C( -90.80), SIMDE_FLOAT64_C( 46.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C(-3708.24), SIMDE_FLOAT64_C( 2608.50), SIMDE_FLOAT64_C( 601.66), SIMDE_FLOAT64_C( 817.15)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 34.00), SIMDE_FLOAT64_C( 57.40), SIMDE_FLOAT64_C( 76.30), SIMDE_FLOAT64_C( 99.20)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 74.30), SIMDE_FLOAT64_C( 64.30), SIMDE_FLOAT64_C( -88.20), SIMDE_FLOAT64_C( -42.40)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -20.50), SIMDE_FLOAT64_C( 98.80), SIMDE_FLOAT64_C( -81.30), SIMDE_FLOAT64_C( 9.70)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 2546.70), SIMDE_FLOAT64_C( 3789.62), SIMDE_FLOAT64_C(-6648.36), SIMDE_FLOAT64_C(-4196.38)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 26.30), SIMDE_FLOAT64_C( -10.40), SIMDE_FLOAT64_C( -16.90), SIMDE_FLOAT64_C( -91.70)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -5.60), SIMDE_FLOAT64_C( -40.40), SIMDE_FLOAT64_C( 57.90), SIMDE_FLOAT64_C( 93.60)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -66.10), SIMDE_FLOAT64_C( -60.00), SIMDE_FLOAT64_C( -42.50), SIMDE_FLOAT64_C( -45.60)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -81.18), SIMDE_FLOAT64_C( 360.16), SIMDE_FLOAT64_C( -936.01), SIMDE_FLOAT64_C(-8628.72)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -77.70), SIMDE_FLOAT64_C( 79.90), SIMDE_FLOAT64_C( 16.20), SIMDE_FLOAT64_C( -77.30)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 18.40), SIMDE_FLOAT64_C( 71.60), SIMDE_FLOAT64_C( -95.70), SIMDE_FLOAT64_C( -21.80)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -83.90), SIMDE_FLOAT64_C( 14.30), SIMDE_FLOAT64_C( -44.90), SIMDE_FLOAT64_C( 72.40)), simde_mm256_set_pd(SIMDE_FLOAT64_C(-1345.78), SIMDE_FLOAT64_C( 5735.14), SIMDE_FLOAT64_C(-1505.44), SIMDE_FLOAT64_C( 1757.54)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 68.20), SIMDE_FLOAT64_C( 18.60), SIMDE_FLOAT64_C( 38.50), SIMDE_FLOAT64_C( 98.50)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -61.10), SIMDE_FLOAT64_C( -31.60), SIMDE_FLOAT64_C( 70.50), SIMDE_FLOAT64_C( 85.20)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 94.30), SIMDE_FLOAT64_C( 91.40), SIMDE_FLOAT64_C( -28.70), SIMDE_FLOAT64_C( 64.60)), simde_mm256_set_pd(SIMDE_FLOAT64_C(-4261.32), SIMDE_FLOAT64_C( -496.36), SIMDE_FLOAT64_C( 2742.95), SIMDE_FLOAT64_C( 8456.80)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_fmsubadd_pd(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_fmsubadd_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 c; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -29.90), SIMDE_FLOAT32_C( -50.10), SIMDE_FLOAT32_C( 13.10), SIMDE_FLOAT32_C( 52.30)), simde_mm_set_ps(SIMDE_FLOAT32_C( -63.90), SIMDE_FLOAT32_C( -96.40), SIMDE_FLOAT32_C( 84.20), SIMDE_FLOAT32_C( -48.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( 53.80), SIMDE_FLOAT32_C( -3.40), SIMDE_FLOAT32_C( 13.90), SIMDE_FLOAT32_C( -46.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( 1856.81), SIMDE_FLOAT32_C( 4826.24), SIMDE_FLOAT32_C( 1089.12), SIMDE_FLOAT32_C(-2566.86)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 35.60), SIMDE_FLOAT32_C( 8.10), SIMDE_FLOAT32_C( -35.10), SIMDE_FLOAT32_C( 22.30)), simde_mm_set_ps(SIMDE_FLOAT32_C( 77.40), SIMDE_FLOAT32_C( -43.50), SIMDE_FLOAT32_C( -53.00), SIMDE_FLOAT32_C( 60.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( 15.60), SIMDE_FLOAT32_C( -4.70), SIMDE_FLOAT32_C( 24.20), SIMDE_FLOAT32_C( -46.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( 2739.84), SIMDE_FLOAT32_C( -357.05), SIMDE_FLOAT32_C( 1836.10), SIMDE_FLOAT32_C( 1309.14)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -92.90), SIMDE_FLOAT32_C( 31.90), SIMDE_FLOAT32_C( -29.90), SIMDE_FLOAT32_C( -95.30)), simde_mm_set_ps(SIMDE_FLOAT32_C( 46.90), SIMDE_FLOAT32_C( -89.80), SIMDE_FLOAT32_C( 18.10), SIMDE_FLOAT32_C( -72.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( 79.60), SIMDE_FLOAT32_C( -32.40), SIMDE_FLOAT32_C( -3.60), SIMDE_FLOAT32_C( -57.10)), simde_mm_set_ps(SIMDE_FLOAT32_C(-4436.61), SIMDE_FLOAT32_C(-2897.02), SIMDE_FLOAT32_C( -537.59), SIMDE_FLOAT32_C( 6871.21)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 68.50), SIMDE_FLOAT32_C( 48.90), SIMDE_FLOAT32_C( 86.30), SIMDE_FLOAT32_C( 72.10)), simde_mm_set_ps(SIMDE_FLOAT32_C( -80.00), SIMDE_FLOAT32_C( -44.60), SIMDE_FLOAT32_C( -3.60), SIMDE_FLOAT32_C( -91.10)), simde_mm_set_ps(SIMDE_FLOAT32_C( -57.30), SIMDE_FLOAT32_C( 2.10), SIMDE_FLOAT32_C( -33.70), SIMDE_FLOAT32_C( -13.60)), simde_mm_set_ps(SIMDE_FLOAT32_C(-5422.70), SIMDE_FLOAT32_C(-2178.84), SIMDE_FLOAT32_C( -276.98), SIMDE_FLOAT32_C(-6581.91)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -38.10), SIMDE_FLOAT32_C( -61.30), SIMDE_FLOAT32_C( 38.90), SIMDE_FLOAT32_C( -79.30)), simde_mm_set_ps(SIMDE_FLOAT32_C( 64.20), SIMDE_FLOAT32_C( 71.60), SIMDE_FLOAT32_C( -99.30), SIMDE_FLOAT32_C( -87.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( -46.40), SIMDE_FLOAT32_C( 45.20), SIMDE_FLOAT32_C( -56.00), SIMDE_FLOAT32_C( 0.40)), simde_mm_set_ps(SIMDE_FLOAT32_C(-2399.62), SIMDE_FLOAT32_C(-4343.88), SIMDE_FLOAT32_C(-3806.77), SIMDE_FLOAT32_C( 6915.36)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -88.30), SIMDE_FLOAT32_C( -23.50), SIMDE_FLOAT32_C( 48.80), SIMDE_FLOAT32_C( -55.10)), simde_mm_set_ps(SIMDE_FLOAT32_C( -31.80), SIMDE_FLOAT32_C( 50.50), SIMDE_FLOAT32_C( -24.10), SIMDE_FLOAT32_C( -80.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( -18.70), SIMDE_FLOAT32_C( -24.70), SIMDE_FLOAT32_C( -56.50), SIMDE_FLOAT32_C( 57.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( 2826.64), SIMDE_FLOAT32_C(-1211.45), SIMDE_FLOAT32_C(-1119.58), SIMDE_FLOAT32_C( 4476.72)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 48.60), SIMDE_FLOAT32_C( 33.60), SIMDE_FLOAT32_C( 8.60), SIMDE_FLOAT32_C( 57.90)), simde_mm_set_ps(SIMDE_FLOAT32_C( 52.40), SIMDE_FLOAT32_C( 2.70), SIMDE_FLOAT32_C( 57.50), SIMDE_FLOAT32_C( -10.40)), simde_mm_set_ps(SIMDE_FLOAT32_C( -26.60), SIMDE_FLOAT32_C( -67.20), SIMDE_FLOAT32_C( 5.80), SIMDE_FLOAT32_C( 75.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( 2573.24), SIMDE_FLOAT32_C( 23.52), SIMDE_FLOAT32_C( 488.70), SIMDE_FLOAT32_C( -527.16)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -25.60), SIMDE_FLOAT32_C( 57.60), SIMDE_FLOAT32_C( -91.00), SIMDE_FLOAT32_C( 53.30)), simde_mm_set_ps(SIMDE_FLOAT32_C( -92.00), SIMDE_FLOAT32_C( 35.10), SIMDE_FLOAT32_C( 8.60), SIMDE_FLOAT32_C( 0.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( -41.20), SIMDE_FLOAT32_C( -81.00), SIMDE_FLOAT32_C( -21.80), SIMDE_FLOAT32_C( -49.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( 2396.40), SIMDE_FLOAT32_C( 1940.76), SIMDE_FLOAT32_C( -760.80), SIMDE_FLOAT32_C( -49.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_fmsubadd_ps(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_fmsubadd_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 b; simde__m256 c; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 80.60), SIMDE_FLOAT32_C( -80.20), SIMDE_FLOAT32_C( 25.10), SIMDE_FLOAT32_C( 54.40), SIMDE_FLOAT32_C( -94.50), SIMDE_FLOAT32_C( -99.70), SIMDE_FLOAT32_C( 67.30), SIMDE_FLOAT32_C( -5.80)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -98.10), SIMDE_FLOAT32_C( -47.30), SIMDE_FLOAT32_C( -82.80), SIMDE_FLOAT32_C( -26.80), SIMDE_FLOAT32_C( 87.80), SIMDE_FLOAT32_C( 71.10), SIMDE_FLOAT32_C( 92.80), SIMDE_FLOAT32_C( -97.90)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -36.00), SIMDE_FLOAT32_C( -59.40), SIMDE_FLOAT32_C( -69.40), SIMDE_FLOAT32_C( 50.50), SIMDE_FLOAT32_C( 70.50), SIMDE_FLOAT32_C( 26.60), SIMDE_FLOAT32_C( 29.70), SIMDE_FLOAT32_C( -14.80)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-7870.86), SIMDE_FLOAT32_C( 3734.06), SIMDE_FLOAT32_C(-2008.88), SIMDE_FLOAT32_C(-1407.42), SIMDE_FLOAT32_C(-8367.60), SIMDE_FLOAT32_C(-7062.07), SIMDE_FLOAT32_C( 6215.74), SIMDE_FLOAT32_C( 553.02)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -68.50), SIMDE_FLOAT32_C( -56.10), SIMDE_FLOAT32_C( 89.00), SIMDE_FLOAT32_C( -96.30), SIMDE_FLOAT32_C( 41.10), SIMDE_FLOAT32_C( -67.50), SIMDE_FLOAT32_C( 59.30), SIMDE_FLOAT32_C( -62.90)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -50.40), SIMDE_FLOAT32_C( -79.00), SIMDE_FLOAT32_C( 93.10), SIMDE_FLOAT32_C( -46.20), SIMDE_FLOAT32_C( -86.10), SIMDE_FLOAT32_C( 19.30), SIMDE_FLOAT32_C( -62.90), SIMDE_FLOAT32_C( -49.30)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -28.70), SIMDE_FLOAT32_C( -24.80), SIMDE_FLOAT32_C( 30.30), SIMDE_FLOAT32_C( -97.00), SIMDE_FLOAT32_C( -57.70), SIMDE_FLOAT32_C( -32.40), SIMDE_FLOAT32_C( -8.20), SIMDE_FLOAT32_C( 75.20)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 3481.10), SIMDE_FLOAT32_C( 4407.10), SIMDE_FLOAT32_C( 8255.60), SIMDE_FLOAT32_C( 4352.06), SIMDE_FLOAT32_C(-3481.01), SIMDE_FLOAT32_C(-1335.15), SIMDE_FLOAT32_C(-3721.77), SIMDE_FLOAT32_C( 3176.17)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -40.40), SIMDE_FLOAT32_C( 50.40), SIMDE_FLOAT32_C( 3.90), SIMDE_FLOAT32_C( -96.60), SIMDE_FLOAT32_C( 84.00), SIMDE_FLOAT32_C( 63.30), SIMDE_FLOAT32_C( 71.70), SIMDE_FLOAT32_C( -5.40)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 88.10), SIMDE_FLOAT32_C( 75.30), SIMDE_FLOAT32_C( -17.10), SIMDE_FLOAT32_C( -27.60), SIMDE_FLOAT32_C( 47.20), SIMDE_FLOAT32_C( -72.70), SIMDE_FLOAT32_C( -49.20), SIMDE_FLOAT32_C( -33.10)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 96.20), SIMDE_FLOAT32_C( 64.10), SIMDE_FLOAT32_C( 96.10), SIMDE_FLOAT32_C( -18.70), SIMDE_FLOAT32_C( -31.60), SIMDE_FLOAT32_C( 43.60), SIMDE_FLOAT32_C( -90.90), SIMDE_FLOAT32_C( -27.30)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-3655.44), SIMDE_FLOAT32_C( 3859.22), SIMDE_FLOAT32_C( -162.79), SIMDE_FLOAT32_C( 2647.46), SIMDE_FLOAT32_C( 3996.40), SIMDE_FLOAT32_C(-4558.31), SIMDE_FLOAT32_C(-3436.74), SIMDE_FLOAT32_C( 151.44)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -59.60), SIMDE_FLOAT32_C( -83.90), SIMDE_FLOAT32_C( 58.10), SIMDE_FLOAT32_C( -6.90), SIMDE_FLOAT32_C( 99.80), SIMDE_FLOAT32_C( -64.30), SIMDE_FLOAT32_C( 87.70), SIMDE_FLOAT32_C( 55.40)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 53.60), SIMDE_FLOAT32_C( 89.30), SIMDE_FLOAT32_C( -70.60), SIMDE_FLOAT32_C( 99.70), SIMDE_FLOAT32_C( -5.30), SIMDE_FLOAT32_C( 5.60), SIMDE_FLOAT32_C( 86.80), SIMDE_FLOAT32_C( -0.20)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 15.90), SIMDE_FLOAT32_C( -12.20), SIMDE_FLOAT32_C( 93.70), SIMDE_FLOAT32_C( -91.90), SIMDE_FLOAT32_C( 34.20), SIMDE_FLOAT32_C( -64.50), SIMDE_FLOAT32_C( 97.10), SIMDE_FLOAT32_C( -8.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-3210.46), SIMDE_FLOAT32_C(-7504.47), SIMDE_FLOAT32_C(-4195.56), SIMDE_FLOAT32_C( -779.83), SIMDE_FLOAT32_C( -563.14), SIMDE_FLOAT32_C( -424.58), SIMDE_FLOAT32_C( 7515.26), SIMDE_FLOAT32_C( -19.08)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 49.20), SIMDE_FLOAT32_C( -59.10), SIMDE_FLOAT32_C( -10.90), SIMDE_FLOAT32_C( -67.30), SIMDE_FLOAT32_C( 52.90), SIMDE_FLOAT32_C( -9.10), SIMDE_FLOAT32_C( -30.60), SIMDE_FLOAT32_C( -79.10)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -82.90), SIMDE_FLOAT32_C( 24.10), SIMDE_FLOAT32_C( 5.20), SIMDE_FLOAT32_C( -4.60), SIMDE_FLOAT32_C( -64.40), SIMDE_FLOAT32_C( -6.30), SIMDE_FLOAT32_C( 88.20), SIMDE_FLOAT32_C( 59.20)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 59.30), SIMDE_FLOAT32_C( -23.80), SIMDE_FLOAT32_C( 86.10), SIMDE_FLOAT32_C( 45.80), SIMDE_FLOAT32_C( -77.20), SIMDE_FLOAT32_C( 3.40), SIMDE_FLOAT32_C( 70.60), SIMDE_FLOAT32_C( -87.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-4137.98), SIMDE_FLOAT32_C(-1448.11), SIMDE_FLOAT32_C( -142.78), SIMDE_FLOAT32_C( 355.38), SIMDE_FLOAT32_C(-3329.56), SIMDE_FLOAT32_C( 60.73), SIMDE_FLOAT32_C(-2769.52), SIMDE_FLOAT32_C(-4769.72)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -41.90), SIMDE_FLOAT32_C( 60.40), SIMDE_FLOAT32_C( -79.60), SIMDE_FLOAT32_C( 95.50), SIMDE_FLOAT32_C( 31.30), SIMDE_FLOAT32_C( -95.40), SIMDE_FLOAT32_C( 27.30), SIMDE_FLOAT32_C( 96.90)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 46.90), SIMDE_FLOAT32_C( -42.30), SIMDE_FLOAT32_C( 95.50), SIMDE_FLOAT32_C( -75.00), SIMDE_FLOAT32_C( 48.70), SIMDE_FLOAT32_C( 76.90), SIMDE_FLOAT32_C( 81.90), SIMDE_FLOAT32_C( 70.10)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -69.40), SIMDE_FLOAT32_C( 89.00), SIMDE_FLOAT32_C( -88.50), SIMDE_FLOAT32_C( 76.60), SIMDE_FLOAT32_C( -55.90), SIMDE_FLOAT32_C( -98.10), SIMDE_FLOAT32_C( -24.00), SIMDE_FLOAT32_C( -35.80)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-1895.71), SIMDE_FLOAT32_C(-2465.92), SIMDE_FLOAT32_C(-7513.30), SIMDE_FLOAT32_C(-7085.90), SIMDE_FLOAT32_C( 1580.21), SIMDE_FLOAT32_C(-7434.36), SIMDE_FLOAT32_C( 2259.87), SIMDE_FLOAT32_C( 6756.89)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 12.90), SIMDE_FLOAT32_C( 65.20), SIMDE_FLOAT32_C( 56.70), SIMDE_FLOAT32_C( 39.40), SIMDE_FLOAT32_C( -25.60), SIMDE_FLOAT32_C( -1.40), SIMDE_FLOAT32_C( 44.70), SIMDE_FLOAT32_C( -72.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -49.00), SIMDE_FLOAT32_C( 97.00), SIMDE_FLOAT32_C( -63.50), SIMDE_FLOAT32_C( -40.00), SIMDE_FLOAT32_C( 48.40), SIMDE_FLOAT32_C( 30.20), SIMDE_FLOAT32_C( -73.80), SIMDE_FLOAT32_C( -79.50)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 5.30), SIMDE_FLOAT32_C( 28.90), SIMDE_FLOAT32_C( 61.30), SIMDE_FLOAT32_C( -5.70), SIMDE_FLOAT32_C( 39.10), SIMDE_FLOAT32_C( -88.70), SIMDE_FLOAT32_C( 17.20), SIMDE_FLOAT32_C( 0.40)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -637.40), SIMDE_FLOAT32_C( 6353.30), SIMDE_FLOAT32_C(-3661.75), SIMDE_FLOAT32_C(-1581.70), SIMDE_FLOAT32_C(-1278.14), SIMDE_FLOAT32_C( -130.98), SIMDE_FLOAT32_C(-3316.06), SIMDE_FLOAT32_C( 5724.40)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 1.50), SIMDE_FLOAT32_C( -69.70), SIMDE_FLOAT32_C( -80.70), SIMDE_FLOAT32_C( 7.80), SIMDE_FLOAT32_C( -92.30), SIMDE_FLOAT32_C( 11.90), SIMDE_FLOAT32_C( 59.30), SIMDE_FLOAT32_C( -21.40)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -78.70), SIMDE_FLOAT32_C( -69.80), SIMDE_FLOAT32_C( 38.10), SIMDE_FLOAT32_C( 22.10), SIMDE_FLOAT32_C( -96.20), SIMDE_FLOAT32_C( 60.20), SIMDE_FLOAT32_C( 49.80), SIMDE_FLOAT32_C( -68.50)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 52.80), SIMDE_FLOAT32_C( 2.20), SIMDE_FLOAT32_C( -17.20), SIMDE_FLOAT32_C( 60.50), SIMDE_FLOAT32_C( -86.40), SIMDE_FLOAT32_C( -89.40), SIMDE_FLOAT32_C( -67.80), SIMDE_FLOAT32_C( 4.40)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -170.85), SIMDE_FLOAT32_C( 4867.26), SIMDE_FLOAT32_C(-3057.47), SIMDE_FLOAT32_C( 232.88), SIMDE_FLOAT32_C( 8965.66), SIMDE_FLOAT32_C( 626.98), SIMDE_FLOAT32_C( 3020.94), SIMDE_FLOAT32_C( 1470.30)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_fmsubadd_ps(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_fnmadd_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d c; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -93.70), SIMDE_FLOAT64_C( 14.40)), simde_mm_set_pd(SIMDE_FLOAT64_C( 8.90), SIMDE_FLOAT64_C( -15.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( -87.90), SIMDE_FLOAT64_C( -34.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( 746.03), SIMDE_FLOAT64_C( 194.16)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 38.10), SIMDE_FLOAT64_C( -13.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( 63.40), SIMDE_FLOAT64_C( -68.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( 13.30), SIMDE_FLOAT64_C( -61.60)), simde_mm_set_pd(SIMDE_FLOAT64_C(-2402.24), SIMDE_FLOAT64_C( -969.76)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 94.40), SIMDE_FLOAT64_C( 89.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( -60.60), SIMDE_FLOAT64_C( -24.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( -87.30), SIMDE_FLOAT64_C( 84.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( 5633.34), SIMDE_FLOAT64_C( 2305.33)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -6.90), SIMDE_FLOAT64_C( 88.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( 6.60), SIMDE_FLOAT64_C( -57.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( 35.50), SIMDE_FLOAT64_C( 30.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( 81.04), SIMDE_FLOAT64_C( 5163.44)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 51.80), SIMDE_FLOAT64_C( 95.50)), simde_mm_set_pd(SIMDE_FLOAT64_C( -57.60), SIMDE_FLOAT64_C( -59.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( -97.40), SIMDE_FLOAT64_C( -60.30)), simde_mm_set_pd(SIMDE_FLOAT64_C( 2886.28), SIMDE_FLOAT64_C( 5650.60)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 71.70), SIMDE_FLOAT64_C( -99.40)), simde_mm_set_pd(SIMDE_FLOAT64_C( 27.40), SIMDE_FLOAT64_C( 37.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( 62.10), SIMDE_FLOAT64_C( 17.90)), simde_mm_set_pd(SIMDE_FLOAT64_C(-1902.48), SIMDE_FLOAT64_C( 3785.16)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 43.60), SIMDE_FLOAT64_C( 78.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( -37.30), SIMDE_FLOAT64_C( -4.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( -23.90), SIMDE_FLOAT64_C( -9.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1602.38), SIMDE_FLOAT64_C( 369.24)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 26.20), SIMDE_FLOAT64_C( -96.60)), simde_mm_set_pd(SIMDE_FLOAT64_C( 57.90), SIMDE_FLOAT64_C( 91.50)), simde_mm_set_pd(SIMDE_FLOAT64_C( 97.90), SIMDE_FLOAT64_C( 18.30)), simde_mm_set_pd(SIMDE_FLOAT64_C(-1419.08), SIMDE_FLOAT64_C( 8857.20)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_fnmadd_pd(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_fnmadd_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d b; simde__m256d c; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 17.60), SIMDE_FLOAT64_C( -99.20), SIMDE_FLOAT64_C( 64.80), SIMDE_FLOAT64_C( -66.40)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -84.50), SIMDE_FLOAT64_C( 62.70), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 62.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 6.20), SIMDE_FLOAT64_C( -52.40), SIMDE_FLOAT64_C( -54.70), SIMDE_FLOAT64_C( 93.30)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 1493.40), SIMDE_FLOAT64_C( 6167.44), SIMDE_FLOAT64_C( 10.10), SIMDE_FLOAT64_C( 4210.10)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -70.30), SIMDE_FLOAT64_C( 67.00), SIMDE_FLOAT64_C( 26.40), SIMDE_FLOAT64_C( 52.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 8.30), SIMDE_FLOAT64_C( -6.70), SIMDE_FLOAT64_C( -38.30), SIMDE_FLOAT64_C( -42.20)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 77.70), SIMDE_FLOAT64_C( 26.30), SIMDE_FLOAT64_C( 10.50), SIMDE_FLOAT64_C( 36.60)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 661.19), SIMDE_FLOAT64_C( 475.20), SIMDE_FLOAT64_C( 1021.62), SIMDE_FLOAT64_C( 2231.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -57.10), SIMDE_FLOAT64_C( 58.80), SIMDE_FLOAT64_C( 93.20), SIMDE_FLOAT64_C( -86.80)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 59.10), SIMDE_FLOAT64_C( 76.50), SIMDE_FLOAT64_C( 45.10), SIMDE_FLOAT64_C( 67.70)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 60.20), SIMDE_FLOAT64_C( 65.10), SIMDE_FLOAT64_C( -17.00), SIMDE_FLOAT64_C( -84.40)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 3434.81), SIMDE_FLOAT64_C(-4433.10), SIMDE_FLOAT64_C(-4220.32), SIMDE_FLOAT64_C( 5791.96)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 34.60), SIMDE_FLOAT64_C( -5.80), SIMDE_FLOAT64_C( 89.80), SIMDE_FLOAT64_C( -83.20)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 43.00), SIMDE_FLOAT64_C( 3.10), SIMDE_FLOAT64_C( -37.70), SIMDE_FLOAT64_C( -40.60)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 26.40), SIMDE_FLOAT64_C( -59.60), SIMDE_FLOAT64_C( -71.50), SIMDE_FLOAT64_C( 60.40)), simde_mm256_set_pd(SIMDE_FLOAT64_C(-1461.40), SIMDE_FLOAT64_C( -41.62), SIMDE_FLOAT64_C( 3313.96), SIMDE_FLOAT64_C(-3317.52)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 40.20), SIMDE_FLOAT64_C( -24.50), SIMDE_FLOAT64_C( -31.60), SIMDE_FLOAT64_C( 3.30)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 39.80), SIMDE_FLOAT64_C( 32.70), SIMDE_FLOAT64_C( 20.30), SIMDE_FLOAT64_C( 49.70)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 69.00), SIMDE_FLOAT64_C( 7.80), SIMDE_FLOAT64_C( 99.70), SIMDE_FLOAT64_C( 49.20)), simde_mm256_set_pd(SIMDE_FLOAT64_C(-1530.96), SIMDE_FLOAT64_C( 808.95), SIMDE_FLOAT64_C( 741.18), SIMDE_FLOAT64_C( -114.81)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -82.30), SIMDE_FLOAT64_C( -8.50), SIMDE_FLOAT64_C( -80.50), SIMDE_FLOAT64_C( 9.10)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 52.10), SIMDE_FLOAT64_C( -96.40), SIMDE_FLOAT64_C( 3.00), SIMDE_FLOAT64_C( -86.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -93.70), SIMDE_FLOAT64_C( 8.90), SIMDE_FLOAT64_C( 46.10), SIMDE_FLOAT64_C( -50.90)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 4194.13), SIMDE_FLOAT64_C( -810.50), SIMDE_FLOAT64_C( 287.60), SIMDE_FLOAT64_C( 731.70)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 72.30), SIMDE_FLOAT64_C( 96.70), SIMDE_FLOAT64_C( -51.00), SIMDE_FLOAT64_C( -38.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 37.50), SIMDE_FLOAT64_C( 93.30), SIMDE_FLOAT64_C( 79.70), SIMDE_FLOAT64_C( 71.40)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 54.00), SIMDE_FLOAT64_C( 6.80), SIMDE_FLOAT64_C( -77.40), SIMDE_FLOAT64_C( -48.10)), simde_mm256_set_pd(SIMDE_FLOAT64_C(-2657.25), SIMDE_FLOAT64_C(-9015.31), SIMDE_FLOAT64_C( 3987.30), SIMDE_FLOAT64_C( 2665.10)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -52.40), SIMDE_FLOAT64_C( -75.40), SIMDE_FLOAT64_C( -96.00), SIMDE_FLOAT64_C( -23.10)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -21.90), SIMDE_FLOAT64_C( -53.30), SIMDE_FLOAT64_C( -90.50), SIMDE_FLOAT64_C( -18.30)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -63.30), SIMDE_FLOAT64_C( -23.10), SIMDE_FLOAT64_C( -88.90), SIMDE_FLOAT64_C( 67.10)), simde_mm256_set_pd(SIMDE_FLOAT64_C(-1210.86), SIMDE_FLOAT64_C(-4041.92), SIMDE_FLOAT64_C(-8776.90), SIMDE_FLOAT64_C( -355.63)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_fnmadd_pd(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_fnmadd_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 c; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 5.20), SIMDE_FLOAT32_C( 59.60), SIMDE_FLOAT32_C( 87.70), SIMDE_FLOAT32_C( 47.10)), simde_mm_set_ps(SIMDE_FLOAT32_C( -48.20), SIMDE_FLOAT32_C( -88.00), SIMDE_FLOAT32_C( 90.80), SIMDE_FLOAT32_C( -22.90)), simde_mm_set_ps(SIMDE_FLOAT32_C( -19.00), SIMDE_FLOAT32_C( 40.90), SIMDE_FLOAT32_C( 74.00), SIMDE_FLOAT32_C( 71.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( 231.64), SIMDE_FLOAT32_C( 5285.70), SIMDE_FLOAT32_C(-7889.16), SIMDE_FLOAT32_C( 1150.39)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 23.70), SIMDE_FLOAT32_C( 46.10), SIMDE_FLOAT32_C( -5.90), SIMDE_FLOAT32_C( 49.30)), simde_mm_set_ps(SIMDE_FLOAT32_C( 6.50), SIMDE_FLOAT32_C( 83.40), SIMDE_FLOAT32_C( -86.10), SIMDE_FLOAT32_C( 15.60)), simde_mm_set_ps(SIMDE_FLOAT32_C( -10.40), SIMDE_FLOAT32_C( -37.00), SIMDE_FLOAT32_C( -97.90), SIMDE_FLOAT32_C( 43.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( -164.45), SIMDE_FLOAT32_C(-3881.74), SIMDE_FLOAT32_C( -605.89), SIMDE_FLOAT32_C( -725.28)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -41.10), SIMDE_FLOAT32_C( 98.60), SIMDE_FLOAT32_C( -66.40), SIMDE_FLOAT32_C( 31.30)), simde_mm_set_ps(SIMDE_FLOAT32_C( 80.90), SIMDE_FLOAT32_C( -40.10), SIMDE_FLOAT32_C( -24.70), SIMDE_FLOAT32_C( 7.90)), simde_mm_set_ps(SIMDE_FLOAT32_C( 99.20), SIMDE_FLOAT32_C( -40.90), SIMDE_FLOAT32_C( -69.50), SIMDE_FLOAT32_C( 9.90)), simde_mm_set_ps(SIMDE_FLOAT32_C( 3424.19), SIMDE_FLOAT32_C( 3912.96), SIMDE_FLOAT32_C(-1709.58), SIMDE_FLOAT32_C( -237.37)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( 18.10), SIMDE_FLOAT32_C( -38.40), SIMDE_FLOAT32_C( -54.30)), simde_mm_set_ps(SIMDE_FLOAT32_C( -5.80), SIMDE_FLOAT32_C( 84.90), SIMDE_FLOAT32_C( -77.80), SIMDE_FLOAT32_C( -32.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( -23.10), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 5.40), SIMDE_FLOAT32_C( 61.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( -21.36), SIMDE_FLOAT32_C(-1533.69), SIMDE_FLOAT32_C(-2982.12), SIMDE_FLOAT32_C(-1714.11)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -95.30), SIMDE_FLOAT32_C( -61.60), SIMDE_FLOAT32_C( -95.50), SIMDE_FLOAT32_C( -55.10)), simde_mm_set_ps(SIMDE_FLOAT32_C( -67.20), SIMDE_FLOAT32_C( 95.00), SIMDE_FLOAT32_C( 94.10), SIMDE_FLOAT32_C( 87.40)), simde_mm_set_ps(SIMDE_FLOAT32_C( 45.20), SIMDE_FLOAT32_C( -12.10), SIMDE_FLOAT32_C( -17.00), SIMDE_FLOAT32_C( -48.70)), simde_mm_set_ps(SIMDE_FLOAT32_C(-6358.96), SIMDE_FLOAT32_C( 5839.90), SIMDE_FLOAT32_C( 8969.55), SIMDE_FLOAT32_C( 4767.04)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -85.20), SIMDE_FLOAT32_C( -17.40), SIMDE_FLOAT32_C( 5.50), SIMDE_FLOAT32_C( 51.40)), simde_mm_set_ps(SIMDE_FLOAT32_C( 84.50), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 61.30), SIMDE_FLOAT32_C( -9.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( -44.50), SIMDE_FLOAT32_C( -83.00), SIMDE_FLOAT32_C( -17.60), SIMDE_FLOAT32_C( -95.10)), simde_mm_set_ps(SIMDE_FLOAT32_C( 7154.90), SIMDE_FLOAT32_C( -72.56), SIMDE_FLOAT32_C( -354.75), SIMDE_FLOAT32_C( 393.20)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 40.00), SIMDE_FLOAT32_C( -99.20), SIMDE_FLOAT32_C( -45.30), SIMDE_FLOAT32_C( 65.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( 3.60), SIMDE_FLOAT32_C( -27.50), SIMDE_FLOAT32_C( 92.40), SIMDE_FLOAT32_C( -74.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( 91.90), SIMDE_FLOAT32_C( 62.60), SIMDE_FLOAT32_C( 33.10), SIMDE_FLOAT32_C( 17.30)), simde_mm_set_ps(SIMDE_FLOAT32_C( -52.10), SIMDE_FLOAT32_C(-2665.40), SIMDE_FLOAT32_C( 4218.82), SIMDE_FLOAT32_C( 4842.10)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 46.00), SIMDE_FLOAT32_C( 7.80), SIMDE_FLOAT32_C( 62.40), SIMDE_FLOAT32_C( 98.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( 60.60), SIMDE_FLOAT32_C( -96.70), SIMDE_FLOAT32_C( 86.60), SIMDE_FLOAT32_C( 94.40)), simde_mm_set_ps(SIMDE_FLOAT32_C( 57.50), SIMDE_FLOAT32_C( -34.30), SIMDE_FLOAT32_C( -42.40), SIMDE_FLOAT32_C( -32.30)), simde_mm_set_ps(SIMDE_FLOAT32_C(-2730.10), SIMDE_FLOAT32_C( 719.96), SIMDE_FLOAT32_C(-5446.24), SIMDE_FLOAT32_C(-9302.38)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_fnmadd_ps(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_fnmadd_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 b; simde__m256 c; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( -74.50), SIMDE_FLOAT32_C( 76.00), SIMDE_FLOAT32_C( -65.60), SIMDE_FLOAT32_C( -57.80), SIMDE_FLOAT32_C( 48.90), SIMDE_FLOAT32_C( 17.90), SIMDE_FLOAT32_C( 92.90), SIMDE_FLOAT32_C( 17.80)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -79.20), SIMDE_FLOAT32_C( 67.50), SIMDE_FLOAT32_C( -50.60), SIMDE_FLOAT32_C( 96.50), SIMDE_FLOAT32_C( -92.70), SIMDE_FLOAT32_C( 12.20), SIMDE_FLOAT32_C( -41.10), SIMDE_FLOAT32_C( -24.10)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -20.70), SIMDE_FLOAT32_C( 76.20), SIMDE_FLOAT32_C( -47.10), SIMDE_FLOAT32_C( -61.40), SIMDE_FLOAT32_C( 55.90), SIMDE_FLOAT32_C( 79.30), SIMDE_FLOAT32_C( -95.40), SIMDE_FLOAT32_C( 98.20)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-5921.10), SIMDE_FLOAT32_C(-5053.80), SIMDE_FLOAT32_C(-3366.46), SIMDE_FLOAT32_C( 5516.30), SIMDE_FLOAT32_C( 4588.93), SIMDE_FLOAT32_C( -139.08), SIMDE_FLOAT32_C( 3722.79), SIMDE_FLOAT32_C( 527.18)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 34.30), SIMDE_FLOAT32_C( 90.50), SIMDE_FLOAT32_C( -43.40), SIMDE_FLOAT32_C( -95.00), SIMDE_FLOAT32_C( -62.70), SIMDE_FLOAT32_C( -17.10), SIMDE_FLOAT32_C( 30.50), SIMDE_FLOAT32_C( 1.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -23.20), SIMDE_FLOAT32_C( 28.90), SIMDE_FLOAT32_C( 78.70), SIMDE_FLOAT32_C( 6.50), SIMDE_FLOAT32_C( -13.60), SIMDE_FLOAT32_C( 7.60), SIMDE_FLOAT32_C( -56.70), SIMDE_FLOAT32_C( 52.80)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -24.30), SIMDE_FLOAT32_C( 65.20), SIMDE_FLOAT32_C( 27.90), SIMDE_FLOAT32_C( -88.40), SIMDE_FLOAT32_C( -43.70), SIMDE_FLOAT32_C( 61.70), SIMDE_FLOAT32_C( -22.10), SIMDE_FLOAT32_C( -51.30)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 771.46), SIMDE_FLOAT32_C(-2550.25), SIMDE_FLOAT32_C( 3443.48), SIMDE_FLOAT32_C( 529.10), SIMDE_FLOAT32_C( -896.42), SIMDE_FLOAT32_C( 191.66), SIMDE_FLOAT32_C( 1707.25), SIMDE_FLOAT32_C( -104.10)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 95.30), SIMDE_FLOAT32_C( -81.70), SIMDE_FLOAT32_C( 51.00), SIMDE_FLOAT32_C( 6.50), SIMDE_FLOAT32_C( 46.00), SIMDE_FLOAT32_C( 76.10), SIMDE_FLOAT32_C( -72.70), SIMDE_FLOAT32_C( 10.10)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 67.00), SIMDE_FLOAT32_C( -43.80), SIMDE_FLOAT32_C( 5.70), SIMDE_FLOAT32_C( 9.00), SIMDE_FLOAT32_C( 39.70), SIMDE_FLOAT32_C( -47.40), SIMDE_FLOAT32_C( -89.40), SIMDE_FLOAT32_C( -69.10)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 5.10), SIMDE_FLOAT32_C( -27.10), SIMDE_FLOAT32_C( 24.30), SIMDE_FLOAT32_C( -90.10), SIMDE_FLOAT32_C( 48.70), SIMDE_FLOAT32_C( 91.00), SIMDE_FLOAT32_C( 80.80), SIMDE_FLOAT32_C( -24.60)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-6380.00), SIMDE_FLOAT32_C(-3605.56), SIMDE_FLOAT32_C( -266.40), SIMDE_FLOAT32_C( -148.60), SIMDE_FLOAT32_C(-1777.50), SIMDE_FLOAT32_C( 3698.14), SIMDE_FLOAT32_C(-6418.58), SIMDE_FLOAT32_C( 673.31)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 5.80), SIMDE_FLOAT32_C( 2.80), SIMDE_FLOAT32_C( 37.80), SIMDE_FLOAT32_C( -55.50), SIMDE_FLOAT32_C( 60.80), SIMDE_FLOAT32_C( -46.40), SIMDE_FLOAT32_C( -53.70), SIMDE_FLOAT32_C( -55.70)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 76.00), SIMDE_FLOAT32_C( 65.10), SIMDE_FLOAT32_C( 67.70), SIMDE_FLOAT32_C( -84.20), SIMDE_FLOAT32_C( 63.00), SIMDE_FLOAT32_C( -82.10), SIMDE_FLOAT32_C( -55.20), SIMDE_FLOAT32_C( 20.10)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 60.20), SIMDE_FLOAT32_C( -85.50), SIMDE_FLOAT32_C( 58.00), SIMDE_FLOAT32_C( 40.40), SIMDE_FLOAT32_C( 31.70), SIMDE_FLOAT32_C( -6.20), SIMDE_FLOAT32_C( 83.70), SIMDE_FLOAT32_C( -68.50)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -380.60), SIMDE_FLOAT32_C( -267.78), SIMDE_FLOAT32_C(-2501.06), SIMDE_FLOAT32_C(-4632.70), SIMDE_FLOAT32_C(-3798.70), SIMDE_FLOAT32_C(-3815.64), SIMDE_FLOAT32_C(-2880.54), SIMDE_FLOAT32_C( 1051.07)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -30.40), SIMDE_FLOAT32_C( 81.70), SIMDE_FLOAT32_C( -68.60), SIMDE_FLOAT32_C( 46.50), SIMDE_FLOAT32_C( 53.40), SIMDE_FLOAT32_C( -1.10), SIMDE_FLOAT32_C( -70.80), SIMDE_FLOAT32_C( 10.20)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 99.90), SIMDE_FLOAT32_C( -78.30), SIMDE_FLOAT32_C( -52.60), SIMDE_FLOAT32_C( 28.60), SIMDE_FLOAT32_C( 62.90), SIMDE_FLOAT32_C( -65.50), SIMDE_FLOAT32_C( -51.00), SIMDE_FLOAT32_C( -0.20)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 25.50), SIMDE_FLOAT32_C( 2.70), SIMDE_FLOAT32_C( 99.80), SIMDE_FLOAT32_C( -76.10), SIMDE_FLOAT32_C( -4.50), SIMDE_FLOAT32_C( 7.40), SIMDE_FLOAT32_C( 81.50), SIMDE_FLOAT32_C( 1.50)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 3062.46), SIMDE_FLOAT32_C( 6399.81), SIMDE_FLOAT32_C(-3508.56), SIMDE_FLOAT32_C(-1406.00), SIMDE_FLOAT32_C(-3363.36), SIMDE_FLOAT32_C( -64.65), SIMDE_FLOAT32_C(-3529.30), SIMDE_FLOAT32_C( 3.54)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 78.00), SIMDE_FLOAT32_C( 45.70), SIMDE_FLOAT32_C( 59.30), SIMDE_FLOAT32_C( 35.50), SIMDE_FLOAT32_C( 91.10), SIMDE_FLOAT32_C( -96.00), SIMDE_FLOAT32_C( -93.90), SIMDE_FLOAT32_C( -0.10)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -80.10), SIMDE_FLOAT32_C( 37.30), SIMDE_FLOAT32_C( 94.60), SIMDE_FLOAT32_C( -45.10), SIMDE_FLOAT32_C( -34.70), SIMDE_FLOAT32_C( -33.50), SIMDE_FLOAT32_C( -17.00), SIMDE_FLOAT32_C( -46.50)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -72.60), SIMDE_FLOAT32_C( 47.00), SIMDE_FLOAT32_C( -93.40), SIMDE_FLOAT32_C( -34.90), SIMDE_FLOAT32_C( 77.40), SIMDE_FLOAT32_C( -96.40), SIMDE_FLOAT32_C( 74.90), SIMDE_FLOAT32_C( 16.90)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 6175.20), SIMDE_FLOAT32_C(-1657.61), SIMDE_FLOAT32_C(-5703.18), SIMDE_FLOAT32_C( 1566.15), SIMDE_FLOAT32_C( 3238.57), SIMDE_FLOAT32_C(-3312.40), SIMDE_FLOAT32_C(-1521.40), SIMDE_FLOAT32_C( 12.25)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -42.00), SIMDE_FLOAT32_C( 100.00), SIMDE_FLOAT32_C( -84.50), SIMDE_FLOAT32_C( 27.60), SIMDE_FLOAT32_C( 27.10), SIMDE_FLOAT32_C( -76.60), SIMDE_FLOAT32_C( -36.20), SIMDE_FLOAT32_C( 16.50)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 98.50), SIMDE_FLOAT32_C( -46.90), SIMDE_FLOAT32_C( -21.70), SIMDE_FLOAT32_C( 90.80), SIMDE_FLOAT32_C( 42.70), SIMDE_FLOAT32_C( 48.80), SIMDE_FLOAT32_C( 91.30), SIMDE_FLOAT32_C( 90.10)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 74.40), SIMDE_FLOAT32_C( -15.10), SIMDE_FLOAT32_C( 42.70), SIMDE_FLOAT32_C( -90.90), SIMDE_FLOAT32_C( -30.80), SIMDE_FLOAT32_C( 48.00), SIMDE_FLOAT32_C( 12.60), SIMDE_FLOAT32_C( 59.70)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 4211.40), SIMDE_FLOAT32_C( 4674.90), SIMDE_FLOAT32_C(-1790.95), SIMDE_FLOAT32_C(-2596.98), SIMDE_FLOAT32_C(-1187.97), SIMDE_FLOAT32_C( 3786.08), SIMDE_FLOAT32_C( 3317.66), SIMDE_FLOAT32_C(-1426.95)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -49.00), SIMDE_FLOAT32_C( 74.90), SIMDE_FLOAT32_C( -48.00), SIMDE_FLOAT32_C( 46.70), SIMDE_FLOAT32_C( 4.40), SIMDE_FLOAT32_C( 44.70), SIMDE_FLOAT32_C( -68.40), SIMDE_FLOAT32_C( 74.50)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 8.00), SIMDE_FLOAT32_C( -94.30), SIMDE_FLOAT32_C( -6.20), SIMDE_FLOAT32_C( -21.50), SIMDE_FLOAT32_C( 61.90), SIMDE_FLOAT32_C( 14.50), SIMDE_FLOAT32_C( -69.00), SIMDE_FLOAT32_C( -34.50)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 11.20), SIMDE_FLOAT32_C( 22.30), SIMDE_FLOAT32_C( -35.00), SIMDE_FLOAT32_C( 30.60), SIMDE_FLOAT32_C( 72.90), SIMDE_FLOAT32_C( 97.50), SIMDE_FLOAT32_C( 2.70), SIMDE_FLOAT32_C( 72.80)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 403.20), SIMDE_FLOAT32_C( 7085.37), SIMDE_FLOAT32_C( -332.60), SIMDE_FLOAT32_C( 1034.65), SIMDE_FLOAT32_C( -199.46), SIMDE_FLOAT32_C( -550.65), SIMDE_FLOAT32_C(-4716.90), SIMDE_FLOAT32_C( 2643.05)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_fnmadd_ps(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_fnmadd_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d c; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 47.80), SIMDE_FLOAT64_C( -80.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( 55.10), SIMDE_FLOAT64_C( 17.30)), simde_mm_set_pd(SIMDE_FLOAT64_C( -52.00), SIMDE_FLOAT64_C( -7.60)), simde_mm_set_pd(SIMDE_FLOAT64_C( 47.80), SIMDE_FLOAT64_C( 1388.51)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -16.50), SIMDE_FLOAT64_C( 77.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( -98.70), SIMDE_FLOAT64_C( -77.50)), simde_mm_set_pd(SIMDE_FLOAT64_C( -99.70), SIMDE_FLOAT64_C( 69.50)), simde_mm_set_pd(SIMDE_FLOAT64_C( -16.50), SIMDE_FLOAT64_C( 6099.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 40.70), SIMDE_FLOAT64_C( -56.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( -41.00), SIMDE_FLOAT64_C( -43.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( -9.90), SIMDE_FLOAT64_C( 48.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( 40.70), SIMDE_FLOAT64_C(-2370.30)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -21.60), SIMDE_FLOAT64_C( -51.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( 91.50), SIMDE_FLOAT64_C( 24.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( 83.10), SIMDE_FLOAT64_C( 15.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( -21.60), SIMDE_FLOAT64_C( 1303.02)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 36.60), SIMDE_FLOAT64_C( 90.60)), simde_mm_set_pd(SIMDE_FLOAT64_C( 25.20), SIMDE_FLOAT64_C( -17.40)), simde_mm_set_pd(SIMDE_FLOAT64_C( 21.60), SIMDE_FLOAT64_C( 29.60)), simde_mm_set_pd(SIMDE_FLOAT64_C( 36.60), SIMDE_FLOAT64_C( 1606.04)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 28.70), SIMDE_FLOAT64_C( -13.60)), simde_mm_set_pd(SIMDE_FLOAT64_C( -49.50), SIMDE_FLOAT64_C( 1.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( 78.40), SIMDE_FLOAT64_C( 70.60)), simde_mm_set_pd(SIMDE_FLOAT64_C( 28.70), SIMDE_FLOAT64_C( 95.08)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -83.30), SIMDE_FLOAT64_C( -83.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( 89.10), SIMDE_FLOAT64_C( 5.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.10), SIMDE_FLOAT64_C( 56.50)), simde_mm_set_pd(SIMDE_FLOAT64_C( -83.30), SIMDE_FLOAT64_C( 529.60)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -8.80), SIMDE_FLOAT64_C( 91.50)), simde_mm_set_pd(SIMDE_FLOAT64_C( 82.10), SIMDE_FLOAT64_C( -69.30)), simde_mm_set_pd(SIMDE_FLOAT64_C( 92.70), SIMDE_FLOAT64_C( -85.50)), simde_mm_set_pd(SIMDE_FLOAT64_C( -8.80), SIMDE_FLOAT64_C( 6255.45)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_fnmadd_sd(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_fnmadd_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 c; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -92.60), SIMDE_FLOAT32_C( -98.70), SIMDE_FLOAT32_C( 10.90), SIMDE_FLOAT32_C( -61.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( 48.20), SIMDE_FLOAT32_C( 4.60), SIMDE_FLOAT32_C( -98.40), SIMDE_FLOAT32_C( 56.90)), simde_mm_set_ps(SIMDE_FLOAT32_C( -83.90), SIMDE_FLOAT32_C( 54.30), SIMDE_FLOAT32_C( 54.70), SIMDE_FLOAT32_C( 20.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( -92.60), SIMDE_FLOAT32_C( -98.70), SIMDE_FLOAT32_C( 10.90), SIMDE_FLOAT32_C( 3491.60)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -61.10), SIMDE_FLOAT32_C( -33.00), SIMDE_FLOAT32_C( -47.10), SIMDE_FLOAT32_C( 31.40)), simde_mm_set_ps(SIMDE_FLOAT32_C( 47.10), SIMDE_FLOAT32_C( -73.50), SIMDE_FLOAT32_C( -40.70), SIMDE_FLOAT32_C( -95.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( -73.00), SIMDE_FLOAT32_C( -68.20), SIMDE_FLOAT32_C( 35.20), SIMDE_FLOAT32_C( 48.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( -61.10), SIMDE_FLOAT32_C( -33.00), SIMDE_FLOAT32_C( -47.10), SIMDE_FLOAT32_C( 3053.48)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -17.60), SIMDE_FLOAT32_C( -75.20), SIMDE_FLOAT32_C( -94.50), SIMDE_FLOAT32_C( 95.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( 56.20), SIMDE_FLOAT32_C( -24.90), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( -33.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( -8.10), SIMDE_FLOAT32_C( 95.10), SIMDE_FLOAT32_C( -66.20), SIMDE_FLOAT32_C( 51.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( -17.60), SIMDE_FLOAT32_C( -75.20), SIMDE_FLOAT32_C( -94.50), SIMDE_FLOAT32_C( 3222.10)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 15.40), SIMDE_FLOAT32_C( -42.20), SIMDE_FLOAT32_C( -38.90), SIMDE_FLOAT32_C( -40.10)), simde_mm_set_ps(SIMDE_FLOAT32_C( -84.90), SIMDE_FLOAT32_C( -51.00), SIMDE_FLOAT32_C( -45.70), SIMDE_FLOAT32_C( 14.40)), simde_mm_set_ps(SIMDE_FLOAT32_C( -82.20), SIMDE_FLOAT32_C( 60.00), SIMDE_FLOAT32_C( -19.40), SIMDE_FLOAT32_C( 90.10)), simde_mm_set_ps(SIMDE_FLOAT32_C( 15.40), SIMDE_FLOAT32_C( -42.20), SIMDE_FLOAT32_C( -38.90), SIMDE_FLOAT32_C( 667.54)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -29.00), SIMDE_FLOAT32_C( 45.90), SIMDE_FLOAT32_C( -65.60), SIMDE_FLOAT32_C( -2.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( -46.90), SIMDE_FLOAT32_C( 6.70), SIMDE_FLOAT32_C( -97.90), SIMDE_FLOAT32_C( -72.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( -7.70), SIMDE_FLOAT32_C( -29.90), SIMDE_FLOAT32_C( 69.80), SIMDE_FLOAT32_C( -66.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( -29.00), SIMDE_FLOAT32_C( 45.90), SIMDE_FLOAT32_C( -65.60), SIMDE_FLOAT32_C( -226.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -58.10), SIMDE_FLOAT32_C( -47.10), SIMDE_FLOAT32_C( 68.70), SIMDE_FLOAT32_C( 33.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( 40.90), SIMDE_FLOAT32_C( -18.60), SIMDE_FLOAT32_C( -92.90), SIMDE_FLOAT32_C( 19.60)), simde_mm_set_ps(SIMDE_FLOAT32_C( -14.90), SIMDE_FLOAT32_C( 50.40), SIMDE_FLOAT32_C( -64.40), SIMDE_FLOAT32_C( -4.60)), simde_mm_set_ps(SIMDE_FLOAT32_C( -58.10), SIMDE_FLOAT32_C( -47.10), SIMDE_FLOAT32_C( 68.70), SIMDE_FLOAT32_C( -667.08)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -90.60), SIMDE_FLOAT32_C( -45.50), SIMDE_FLOAT32_C( -20.60), SIMDE_FLOAT32_C( -95.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( -94.80), SIMDE_FLOAT32_C( 21.50), SIMDE_FLOAT32_C( 77.40), SIMDE_FLOAT32_C( -58.90)), simde_mm_set_ps(SIMDE_FLOAT32_C( -8.20), SIMDE_FLOAT32_C( 56.80), SIMDE_FLOAT32_C( 16.40), SIMDE_FLOAT32_C( -52.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( -90.60), SIMDE_FLOAT32_C( -45.50), SIMDE_FLOAT32_C( -20.60), SIMDE_FLOAT32_C(-5677.45)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 16.00), SIMDE_FLOAT32_C( -22.20), SIMDE_FLOAT32_C( -70.50), SIMDE_FLOAT32_C( -57.60)), simde_mm_set_ps(SIMDE_FLOAT32_C( 86.70), SIMDE_FLOAT32_C( 31.60), SIMDE_FLOAT32_C( -15.30), SIMDE_FLOAT32_C( -77.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( -72.10), SIMDE_FLOAT32_C( 13.20), SIMDE_FLOAT32_C( 17.70), SIMDE_FLOAT32_C( -65.30)), simde_mm_set_ps(SIMDE_FLOAT32_C( 16.00), SIMDE_FLOAT32_C( -22.20), SIMDE_FLOAT32_C( -70.50), SIMDE_FLOAT32_C(-4512.02)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_fnmadd_ss(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_fnmsub_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d c; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -85.20), SIMDE_FLOAT64_C( -77.60)), simde_mm_set_pd(SIMDE_FLOAT64_C( 22.30), SIMDE_FLOAT64_C( 10.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( 40.90), SIMDE_FLOAT64_C( 66.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1859.06), SIMDE_FLOAT64_C( 709.30)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 12.10), SIMDE_FLOAT64_C( -42.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( -71.60), SIMDE_FLOAT64_C( -43.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( 73.80), SIMDE_FLOAT64_C( -65.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( 792.56), SIMDE_FLOAT64_C(-1809.03)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 47.90), SIMDE_FLOAT64_C( 8.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( -70.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( -58.60), SIMDE_FLOAT64_C( -21.30)), simde_mm_set_pd(SIMDE_FLOAT64_C( -133.00), SIMDE_FLOAT64_C( 637.26)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 70.80), SIMDE_FLOAT64_C( -62.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( 22.60), SIMDE_FLOAT64_C( -27.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( 58.00), SIMDE_FLOAT64_C( 35.60)), simde_mm_set_pd(SIMDE_FLOAT64_C(-1658.08), SIMDE_FLOAT64_C(-1784.22)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -59.60), SIMDE_FLOAT64_C( -26.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( 10.40), SIMDE_FLOAT64_C( -3.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( -36.80), SIMDE_FLOAT64_C( -20.60)), simde_mm_set_pd(SIMDE_FLOAT64_C( 656.64), SIMDE_FLOAT64_C( -78.96)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 75.80), SIMDE_FLOAT64_C( -40.50)), simde_mm_set_pd(SIMDE_FLOAT64_C( 82.60), SIMDE_FLOAT64_C( 14.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( -69.00), SIMDE_FLOAT64_C( 52.40)), simde_mm_set_pd(SIMDE_FLOAT64_C(-6192.08), SIMDE_FLOAT64_C( 551.05)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -54.60), SIMDE_FLOAT64_C( 2.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( 17.90), SIMDE_FLOAT64_C( 72.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( 95.70), SIMDE_FLOAT64_C( 56.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( 881.64), SIMDE_FLOAT64_C( -202.40)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 21.40), SIMDE_FLOAT64_C( 40.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( 42.50), SIMDE_FLOAT64_C( 29.10)), simde_mm_set_pd(SIMDE_FLOAT64_C( 24.40), SIMDE_FLOAT64_C( -57.60)), simde_mm_set_pd(SIMDE_FLOAT64_C( -933.90), SIMDE_FLOAT64_C(-1112.22)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_fnmsub_pd(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_fnmsub_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d b; simde__m256d c; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -97.30), SIMDE_FLOAT64_C( 40.60), SIMDE_FLOAT64_C( -78.70), SIMDE_FLOAT64_C( 0.60)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 43.40), SIMDE_FLOAT64_C( -67.40), SIMDE_FLOAT64_C( 62.50), SIMDE_FLOAT64_C( -5.70)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 51.10), SIMDE_FLOAT64_C( 66.40), SIMDE_FLOAT64_C( 79.40), SIMDE_FLOAT64_C( 4.20)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 4171.72), SIMDE_FLOAT64_C( 2670.04), SIMDE_FLOAT64_C( 4839.35), SIMDE_FLOAT64_C( -0.78)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -37.90), SIMDE_FLOAT64_C( -91.10), SIMDE_FLOAT64_C( 99.40), SIMDE_FLOAT64_C( -64.60)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 55.70), SIMDE_FLOAT64_C( -31.70), SIMDE_FLOAT64_C( 33.10), SIMDE_FLOAT64_C( 94.30)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 18.80), SIMDE_FLOAT64_C( 56.10), SIMDE_FLOAT64_C( -19.80), SIMDE_FLOAT64_C( -98.50)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 2092.23), SIMDE_FLOAT64_C(-2943.97), SIMDE_FLOAT64_C(-3270.34), SIMDE_FLOAT64_C( 6190.28)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 44.20), SIMDE_FLOAT64_C( 98.00), SIMDE_FLOAT64_C( -20.60), SIMDE_FLOAT64_C( 99.20)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -36.50), SIMDE_FLOAT64_C( 37.70), SIMDE_FLOAT64_C( 27.10), SIMDE_FLOAT64_C( -85.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -54.80), SIMDE_FLOAT64_C( 46.70), SIMDE_FLOAT64_C( -59.70), SIMDE_FLOAT64_C( -80.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 1668.10), SIMDE_FLOAT64_C(-3741.30), SIMDE_FLOAT64_C( 617.96), SIMDE_FLOAT64_C( 8512.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -13.40), SIMDE_FLOAT64_C( 16.00), SIMDE_FLOAT64_C( -82.10), SIMDE_FLOAT64_C( 27.40)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -41.30), SIMDE_FLOAT64_C( 84.40), SIMDE_FLOAT64_C( -52.10), SIMDE_FLOAT64_C( 16.60)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 7.30), SIMDE_FLOAT64_C( -49.40), SIMDE_FLOAT64_C( -31.90), SIMDE_FLOAT64_C( 69.30)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -560.72), SIMDE_FLOAT64_C(-1301.00), SIMDE_FLOAT64_C(-4245.51), SIMDE_FLOAT64_C( -524.14)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -16.80), SIMDE_FLOAT64_C( -78.00), SIMDE_FLOAT64_C( -43.90), SIMDE_FLOAT64_C( -53.60)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -11.30), SIMDE_FLOAT64_C( -83.60), SIMDE_FLOAT64_C( -78.30), SIMDE_FLOAT64_C( -1.10)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -94.20), SIMDE_FLOAT64_C( 36.20), SIMDE_FLOAT64_C( 66.40), SIMDE_FLOAT64_C( 12.70)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -95.64), SIMDE_FLOAT64_C(-6557.00), SIMDE_FLOAT64_C(-3503.77), SIMDE_FLOAT64_C( -71.66)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 94.80), SIMDE_FLOAT64_C( 27.60), SIMDE_FLOAT64_C( 5.70), SIMDE_FLOAT64_C( -73.90)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 11.80), SIMDE_FLOAT64_C( -83.40), SIMDE_FLOAT64_C( 89.00), SIMDE_FLOAT64_C( 39.10)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 61.70), SIMDE_FLOAT64_C( 98.90), SIMDE_FLOAT64_C( -6.00), SIMDE_FLOAT64_C( -89.20)), simde_mm256_set_pd(SIMDE_FLOAT64_C(-1180.34), SIMDE_FLOAT64_C( 2202.94), SIMDE_FLOAT64_C( -501.30), SIMDE_FLOAT64_C( 2978.69)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -66.00), SIMDE_FLOAT64_C( -99.10), SIMDE_FLOAT64_C( -51.20), SIMDE_FLOAT64_C( 98.20)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 58.10), SIMDE_FLOAT64_C( -66.70), SIMDE_FLOAT64_C( -86.20), SIMDE_FLOAT64_C( 25.30)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -71.40), SIMDE_FLOAT64_C( 40.80), SIMDE_FLOAT64_C( -71.40), SIMDE_FLOAT64_C( 8.90)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 3906.00), SIMDE_FLOAT64_C(-6650.77), SIMDE_FLOAT64_C(-4342.04), SIMDE_FLOAT64_C(-2493.36)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 63.90), SIMDE_FLOAT64_C( 7.50), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( -97.90)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -39.10), SIMDE_FLOAT64_C( -73.10), SIMDE_FLOAT64_C( -53.20), SIMDE_FLOAT64_C( 81.20)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -32.20), SIMDE_FLOAT64_C( 71.70), SIMDE_FLOAT64_C( 39.30), SIMDE_FLOAT64_C( -11.60)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 2530.69), SIMDE_FLOAT64_C( 476.55), SIMDE_FLOAT64_C( -39.30), SIMDE_FLOAT64_C( 7961.08)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_fnmsub_pd(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_fnmsub_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 c; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 18.60), SIMDE_FLOAT32_C( -96.60), SIMDE_FLOAT32_C( -17.10), SIMDE_FLOAT32_C( -50.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( 3.20), SIMDE_FLOAT32_C( -15.90), SIMDE_FLOAT32_C( 83.80), SIMDE_FLOAT32_C( -57.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( -50.90), SIMDE_FLOAT32_C( -53.70), SIMDE_FLOAT32_C( 66.30), SIMDE_FLOAT32_C( 53.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( -8.62), SIMDE_FLOAT32_C(-1482.24), SIMDE_FLOAT32_C( 1366.68), SIMDE_FLOAT32_C(-2967.35)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 19.40), SIMDE_FLOAT32_C( 81.00), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( 26.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( -29.00), SIMDE_FLOAT32_C( 15.30), SIMDE_FLOAT32_C( -89.70), SIMDE_FLOAT32_C( -71.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( 61.60), SIMDE_FLOAT32_C( -84.00), SIMDE_FLOAT32_C( -77.60), SIMDE_FLOAT32_C( 49.30)), simde_mm_set_ps(SIMDE_FLOAT32_C( 501.00), SIMDE_FLOAT32_C(-1155.30), SIMDE_FLOAT32_C( 140.39), SIMDE_FLOAT32_C( 1831.86)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 10.90), SIMDE_FLOAT32_C( -73.80), SIMDE_FLOAT32_C( -37.10), SIMDE_FLOAT32_C( 92.30)), simde_mm_set_ps(SIMDE_FLOAT32_C( 27.80), SIMDE_FLOAT32_C( 31.90), SIMDE_FLOAT32_C( -77.70), SIMDE_FLOAT32_C( -29.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( 72.70), SIMDE_FLOAT32_C( 50.20), SIMDE_FLOAT32_C( -64.40), SIMDE_FLOAT32_C( 81.90)), simde_mm_set_ps(SIMDE_FLOAT32_C( -375.72), SIMDE_FLOAT32_C( 2304.02), SIMDE_FLOAT32_C(-2818.27), SIMDE_FLOAT32_C( 2668.64)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -2.50), SIMDE_FLOAT32_C( -77.00), SIMDE_FLOAT32_C( -97.10), SIMDE_FLOAT32_C( -6.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( 1.40), SIMDE_FLOAT32_C( 38.10), SIMDE_FLOAT32_C( 96.80), SIMDE_FLOAT32_C( -90.40)), simde_mm_set_ps(SIMDE_FLOAT32_C( -31.70), SIMDE_FLOAT32_C( -86.40), SIMDE_FLOAT32_C( -62.20), SIMDE_FLOAT32_C( -64.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( 35.20), SIMDE_FLOAT32_C( 3020.10), SIMDE_FLOAT32_C( 9461.48), SIMDE_FLOAT32_C( -495.78)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -29.50), SIMDE_FLOAT32_C( -45.60), SIMDE_FLOAT32_C( -87.90), SIMDE_FLOAT32_C( -82.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( -16.40), SIMDE_FLOAT32_C( -50.10), SIMDE_FLOAT32_C( -30.70), SIMDE_FLOAT32_C( -73.60)), simde_mm_set_ps(SIMDE_FLOAT32_C( 49.20), SIMDE_FLOAT32_C( 55.00), SIMDE_FLOAT32_C( 57.30), SIMDE_FLOAT32_C( -33.30)), simde_mm_set_ps(SIMDE_FLOAT32_C( -533.00), SIMDE_FLOAT32_C(-2339.56), SIMDE_FLOAT32_C(-2755.83), SIMDE_FLOAT32_C(-6001.90)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -90.10), SIMDE_FLOAT32_C( 83.90), SIMDE_FLOAT32_C( -87.40), SIMDE_FLOAT32_C( -87.30)), simde_mm_set_ps(SIMDE_FLOAT32_C( -1.40), SIMDE_FLOAT32_C( -10.10), SIMDE_FLOAT32_C( 29.30), SIMDE_FLOAT32_C( -74.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( -21.90), SIMDE_FLOAT32_C( 46.80), SIMDE_FLOAT32_C( -76.50), SIMDE_FLOAT32_C( -94.40)), simde_mm_set_ps(SIMDE_FLOAT32_C( -104.24), SIMDE_FLOAT32_C( 800.59), SIMDE_FLOAT32_C( 2637.32), SIMDE_FLOAT32_C(-6435.64)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -29.30), SIMDE_FLOAT32_C( -94.30), SIMDE_FLOAT32_C( -8.20), SIMDE_FLOAT32_C( -67.90)), simde_mm_set_ps(SIMDE_FLOAT32_C( -39.50), SIMDE_FLOAT32_C( 47.60), SIMDE_FLOAT32_C( 50.70), SIMDE_FLOAT32_C( 19.40)), simde_mm_set_ps(SIMDE_FLOAT32_C( 2.50), SIMDE_FLOAT32_C( 40.50), SIMDE_FLOAT32_C( -73.30), SIMDE_FLOAT32_C( 7.40)), simde_mm_set_ps(SIMDE_FLOAT32_C(-1159.85), SIMDE_FLOAT32_C( 4448.18), SIMDE_FLOAT32_C( 489.04), SIMDE_FLOAT32_C( 1309.86)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 15.30), SIMDE_FLOAT32_C( 14.60), SIMDE_FLOAT32_C( -68.80), SIMDE_FLOAT32_C( 92.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( -34.50), SIMDE_FLOAT32_C( 77.40), SIMDE_FLOAT32_C( 73.70), SIMDE_FLOAT32_C( -25.10)), simde_mm_set_ps(SIMDE_FLOAT32_C( -70.20), SIMDE_FLOAT32_C( -4.40), SIMDE_FLOAT32_C( -93.70), SIMDE_FLOAT32_C( 16.30)), simde_mm_set_ps(SIMDE_FLOAT32_C( 598.05), SIMDE_FLOAT32_C(-1125.64), SIMDE_FLOAT32_C( 5164.26), SIMDE_FLOAT32_C( 2312.98)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_fnmsub_ps(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_fnmsub_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 b; simde__m256 c; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( -91.80), SIMDE_FLOAT32_C( -53.10), SIMDE_FLOAT32_C( -79.10), SIMDE_FLOAT32_C( 50.50), SIMDE_FLOAT32_C( -81.20), SIMDE_FLOAT32_C( -11.90), SIMDE_FLOAT32_C( -72.60), SIMDE_FLOAT32_C( 13.60)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 11.90), SIMDE_FLOAT32_C( -8.00), SIMDE_FLOAT32_C( 73.10), SIMDE_FLOAT32_C( 73.00), SIMDE_FLOAT32_C( -15.70), SIMDE_FLOAT32_C( 33.70), SIMDE_FLOAT32_C( -36.30), SIMDE_FLOAT32_C( -25.80)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -90.00), SIMDE_FLOAT32_C( -13.00), SIMDE_FLOAT32_C( -28.10), SIMDE_FLOAT32_C( -49.40), SIMDE_FLOAT32_C( -74.60), SIMDE_FLOAT32_C( -32.00), SIMDE_FLOAT32_C( -63.50), SIMDE_FLOAT32_C( -18.90)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 1182.42), SIMDE_FLOAT32_C( -411.80), SIMDE_FLOAT32_C( 5810.31), SIMDE_FLOAT32_C(-3637.10), SIMDE_FLOAT32_C(-1200.24), SIMDE_FLOAT32_C( 433.03), SIMDE_FLOAT32_C(-2571.88), SIMDE_FLOAT32_C( 369.78)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -57.30), SIMDE_FLOAT32_C( 71.50), SIMDE_FLOAT32_C( 39.90), SIMDE_FLOAT32_C( -77.10), SIMDE_FLOAT32_C( -9.90), SIMDE_FLOAT32_C( -16.00), SIMDE_FLOAT32_C( 74.80), SIMDE_FLOAT32_C( 77.40)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -53.60), SIMDE_FLOAT32_C( 49.60), SIMDE_FLOAT32_C( 94.20), SIMDE_FLOAT32_C( 1.20), SIMDE_FLOAT32_C( -56.30), SIMDE_FLOAT32_C( 26.10), SIMDE_FLOAT32_C( -23.40), SIMDE_FLOAT32_C( -47.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 82.40), SIMDE_FLOAT32_C( -13.50), SIMDE_FLOAT32_C( -97.40), SIMDE_FLOAT32_C( 84.50), SIMDE_FLOAT32_C( -48.30), SIMDE_FLOAT32_C( 98.50), SIMDE_FLOAT32_C( -91.50), SIMDE_FLOAT32_C( 24.60)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-3153.68), SIMDE_FLOAT32_C(-3532.90), SIMDE_FLOAT32_C(-3661.18), SIMDE_FLOAT32_C( 8.02), SIMDE_FLOAT32_C( -509.07), SIMDE_FLOAT32_C( 319.10), SIMDE_FLOAT32_C( 1841.82), SIMDE_FLOAT32_C( 3613.20)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 11.60), SIMDE_FLOAT32_C( 59.90), SIMDE_FLOAT32_C( -3.20), SIMDE_FLOAT32_C( 4.40), SIMDE_FLOAT32_C( -98.80), SIMDE_FLOAT32_C( 29.00), SIMDE_FLOAT32_C( -86.20), SIMDE_FLOAT32_C( 19.50)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 63.60), SIMDE_FLOAT32_C( -94.60), SIMDE_FLOAT32_C( -81.40), SIMDE_FLOAT32_C( 9.90), SIMDE_FLOAT32_C( -69.00), SIMDE_FLOAT32_C( -83.90), SIMDE_FLOAT32_C( 22.00), SIMDE_FLOAT32_C( -56.30)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 80.90), SIMDE_FLOAT32_C( -7.90), SIMDE_FLOAT32_C( -92.10), SIMDE_FLOAT32_C( 65.40), SIMDE_FLOAT32_C( -26.30), SIMDE_FLOAT32_C( -26.90), SIMDE_FLOAT32_C( -44.20), SIMDE_FLOAT32_C( -39.60)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -818.66), SIMDE_FLOAT32_C( 5674.44), SIMDE_FLOAT32_C( -168.38), SIMDE_FLOAT32_C( -108.96), SIMDE_FLOAT32_C(-6790.90), SIMDE_FLOAT32_C( 2460.00), SIMDE_FLOAT32_C( 1940.60), SIMDE_FLOAT32_C( 1137.45)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -73.70), SIMDE_FLOAT32_C( -39.20), SIMDE_FLOAT32_C( 40.90), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( -64.50), SIMDE_FLOAT32_C( 35.70), SIMDE_FLOAT32_C( -58.10), SIMDE_FLOAT32_C( -23.80)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 98.30), SIMDE_FLOAT32_C( 77.60), SIMDE_FLOAT32_C( 33.80), SIMDE_FLOAT32_C( 94.20), SIMDE_FLOAT32_C( 8.60), SIMDE_FLOAT32_C( -96.70), SIMDE_FLOAT32_C( -22.70), SIMDE_FLOAT32_C( -38.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -52.20), SIMDE_FLOAT32_C( -35.80), SIMDE_FLOAT32_C( 76.20), SIMDE_FLOAT32_C( -32.30), SIMDE_FLOAT32_C( -84.60), SIMDE_FLOAT32_C( 76.00), SIMDE_FLOAT32_C( -84.30), SIMDE_FLOAT32_C( 87.60)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 7296.91), SIMDE_FLOAT32_C( 3077.72), SIMDE_FLOAT32_C(-1458.62), SIMDE_FLOAT32_C( -24.22), SIMDE_FLOAT32_C( 639.30), SIMDE_FLOAT32_C( 3376.19), SIMDE_FLOAT32_C(-1234.57), SIMDE_FLOAT32_C( -992.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -10.70), SIMDE_FLOAT32_C( 52.50), SIMDE_FLOAT32_C( 95.50), SIMDE_FLOAT32_C( -35.90), SIMDE_FLOAT32_C( -55.60), SIMDE_FLOAT32_C( 1.10), SIMDE_FLOAT32_C( -20.80), SIMDE_FLOAT32_C( -55.50)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -20.50), SIMDE_FLOAT32_C( 25.80), SIMDE_FLOAT32_C( 85.10), SIMDE_FLOAT32_C( -30.10), SIMDE_FLOAT32_C( 98.50), SIMDE_FLOAT32_C( -42.90), SIMDE_FLOAT32_C( 14.30), SIMDE_FLOAT32_C( 52.60)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 81.70), SIMDE_FLOAT32_C( 21.40), SIMDE_FLOAT32_C( 41.10), SIMDE_FLOAT32_C( 65.30), SIMDE_FLOAT32_C( -66.60), SIMDE_FLOAT32_C( 6.20), SIMDE_FLOAT32_C( 29.60), SIMDE_FLOAT32_C( 47.20)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -301.05), SIMDE_FLOAT32_C(-1375.90), SIMDE_FLOAT32_C(-8168.15), SIMDE_FLOAT32_C(-1145.89), SIMDE_FLOAT32_C( 5543.20), SIMDE_FLOAT32_C( 40.99), SIMDE_FLOAT32_C( 267.84), SIMDE_FLOAT32_C( 2872.10)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -94.70), SIMDE_FLOAT32_C( 14.30), SIMDE_FLOAT32_C( 36.30), SIMDE_FLOAT32_C( -95.40), SIMDE_FLOAT32_C( -85.70), SIMDE_FLOAT32_C( 15.60), SIMDE_FLOAT32_C( -45.20), SIMDE_FLOAT32_C( -87.50)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -75.50), SIMDE_FLOAT32_C( 52.00), SIMDE_FLOAT32_C( 88.60), SIMDE_FLOAT32_C( -12.10), SIMDE_FLOAT32_C( -27.40), SIMDE_FLOAT32_C( 41.00), SIMDE_FLOAT32_C( -70.80), SIMDE_FLOAT32_C( 22.60)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -14.10), SIMDE_FLOAT32_C( -90.60), SIMDE_FLOAT32_C( 84.80), SIMDE_FLOAT32_C( -47.50), SIMDE_FLOAT32_C( -49.90), SIMDE_FLOAT32_C( 72.50), SIMDE_FLOAT32_C( 90.90), SIMDE_FLOAT32_C( -74.60)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-7135.75), SIMDE_FLOAT32_C( -653.00), SIMDE_FLOAT32_C(-3300.98), SIMDE_FLOAT32_C(-1106.84), SIMDE_FLOAT32_C(-2298.28), SIMDE_FLOAT32_C( -712.10), SIMDE_FLOAT32_C(-3291.06), SIMDE_FLOAT32_C( 2052.10)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -59.20), SIMDE_FLOAT32_C( -79.60), SIMDE_FLOAT32_C( 47.00), SIMDE_FLOAT32_C( -96.90), SIMDE_FLOAT32_C( -44.60), SIMDE_FLOAT32_C( 50.20), SIMDE_FLOAT32_C( 10.60), SIMDE_FLOAT32_C( -70.80)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -68.90), SIMDE_FLOAT32_C( 37.70), SIMDE_FLOAT32_C( 58.60), SIMDE_FLOAT32_C( -25.80), SIMDE_FLOAT32_C( 57.80), SIMDE_FLOAT32_C( -89.20), SIMDE_FLOAT32_C( 27.50), SIMDE_FLOAT32_C( 46.60)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 61.30), SIMDE_FLOAT32_C( -66.60), SIMDE_FLOAT32_C( 75.60), SIMDE_FLOAT32_C( -6.00), SIMDE_FLOAT32_C( -95.90), SIMDE_FLOAT32_C( 11.80), SIMDE_FLOAT32_C( 59.10), SIMDE_FLOAT32_C( 34.90)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-4140.18), SIMDE_FLOAT32_C( 3067.52), SIMDE_FLOAT32_C(-2829.80), SIMDE_FLOAT32_C(-2494.02), SIMDE_FLOAT32_C( 2673.78), SIMDE_FLOAT32_C( 4466.04), SIMDE_FLOAT32_C( -350.60), SIMDE_FLOAT32_C( 3264.38)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -53.00), SIMDE_FLOAT32_C( -46.10), SIMDE_FLOAT32_C( 53.90), SIMDE_FLOAT32_C( 19.20), SIMDE_FLOAT32_C( -73.10), SIMDE_FLOAT32_C( 23.40), SIMDE_FLOAT32_C( 67.90), SIMDE_FLOAT32_C( -74.50)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -80.10), SIMDE_FLOAT32_C( 56.30), SIMDE_FLOAT32_C( -45.20), SIMDE_FLOAT32_C( 32.20), SIMDE_FLOAT32_C( -17.90), SIMDE_FLOAT32_C( -44.50), SIMDE_FLOAT32_C( 62.10), SIMDE_FLOAT32_C( 8.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 30.70), SIMDE_FLOAT32_C( 11.70), SIMDE_FLOAT32_C( -61.10), SIMDE_FLOAT32_C( 76.30), SIMDE_FLOAT32_C( 57.20), SIMDE_FLOAT32_C( 36.40), SIMDE_FLOAT32_C( 67.50), SIMDE_FLOAT32_C( 2.90)), simde_mm256_set_ps(SIMDE_FLOAT32_C(-4276.00), SIMDE_FLOAT32_C( 2583.73), SIMDE_FLOAT32_C( 2497.38), SIMDE_FLOAT32_C( -694.54), SIMDE_FLOAT32_C(-1365.69), SIMDE_FLOAT32_C( 1004.90), SIMDE_FLOAT32_C(-4284.09), SIMDE_FLOAT32_C( 593.10)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_fnmsub_ps(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_fnmsub_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d c; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -60.10), SIMDE_FLOAT64_C( -84.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( -22.80), SIMDE_FLOAT64_C( 63.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( 81.10), SIMDE_FLOAT64_C( -77.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( -60.10), SIMDE_FLOAT64_C( 5386.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 37.60), SIMDE_FLOAT64_C( 78.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( -10.00), SIMDE_FLOAT64_C( 53.60)), simde_mm_set_pd(SIMDE_FLOAT64_C( -88.70), SIMDE_FLOAT64_C( 54.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( 37.60), SIMDE_FLOAT64_C(-4283.94)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -38.20), SIMDE_FLOAT64_C( 72.10)), simde_mm_set_pd(SIMDE_FLOAT64_C( -86.10), SIMDE_FLOAT64_C( 25.50)), simde_mm_set_pd(SIMDE_FLOAT64_C( -52.90), SIMDE_FLOAT64_C( -86.30)), simde_mm_set_pd(SIMDE_FLOAT64_C( -38.20), SIMDE_FLOAT64_C(-1752.25)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 7.50), SIMDE_FLOAT64_C( 35.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( 5.30), SIMDE_FLOAT64_C( 97.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( -49.40), SIMDE_FLOAT64_C( -58.60)), simde_mm_set_pd(SIMDE_FLOAT64_C( 7.50), SIMDE_FLOAT64_C(-3336.40)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 41.40), SIMDE_FLOAT64_C( -46.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( -70.70), SIMDE_FLOAT64_C( -78.10)), simde_mm_set_pd(SIMDE_FLOAT64_C( 77.70), SIMDE_FLOAT64_C( -33.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( 41.40), SIMDE_FLOAT64_C(-3629.69)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -63.30), SIMDE_FLOAT64_C( -78.40)), simde_mm_set_pd(SIMDE_FLOAT64_C( 58.80), SIMDE_FLOAT64_C( 11.50)), simde_mm_set_pd(SIMDE_FLOAT64_C( -18.00), SIMDE_FLOAT64_C( -49.30)), simde_mm_set_pd(SIMDE_FLOAT64_C( -63.30), SIMDE_FLOAT64_C( 950.90)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -80.50), SIMDE_FLOAT64_C( 28.50)), simde_mm_set_pd(SIMDE_FLOAT64_C( -48.00), SIMDE_FLOAT64_C( 38.10)), simde_mm_set_pd(SIMDE_FLOAT64_C( -73.40), SIMDE_FLOAT64_C( -29.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( -80.50), SIMDE_FLOAT64_C(-1056.85)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 42.80), SIMDE_FLOAT64_C( -10.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( 8.60), SIMDE_FLOAT64_C( -39.60)), simde_mm_set_pd(SIMDE_FLOAT64_C( -25.50), SIMDE_FLOAT64_C( 42.10)), simde_mm_set_pd(SIMDE_FLOAT64_C( 42.80), SIMDE_FLOAT64_C( -473.74)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_fnmsub_sd(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_fnmsub_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 c; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 17.30), SIMDE_FLOAT32_C( 17.40), SIMDE_FLOAT32_C( 41.70), SIMDE_FLOAT32_C( 37.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( 61.00), SIMDE_FLOAT32_C( 15.10), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 43.30)), simde_mm_set_ps(SIMDE_FLOAT32_C( 28.00), SIMDE_FLOAT32_C( 83.70), SIMDE_FLOAT32_C( 43.30), SIMDE_FLOAT32_C( -38.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( 17.30), SIMDE_FLOAT32_C( 17.40), SIMDE_FLOAT32_C( 41.70), SIMDE_FLOAT32_C(-1563.90)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -37.00), SIMDE_FLOAT32_C( -28.20), SIMDE_FLOAT32_C( 12.60), SIMDE_FLOAT32_C( -73.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( 5.90), SIMDE_FLOAT32_C( 68.10), SIMDE_FLOAT32_C( 57.10), SIMDE_FLOAT32_C( 23.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( 17.40), SIMDE_FLOAT32_C( 89.40), SIMDE_FLOAT32_C( 38.60), SIMDE_FLOAT32_C( -36.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( -37.00), SIMDE_FLOAT32_C( -28.20), SIMDE_FLOAT32_C( 12.60), SIMDE_FLOAT32_C( 1785.80)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 35.60), SIMDE_FLOAT32_C( -64.00), SIMDE_FLOAT32_C( 95.10), SIMDE_FLOAT32_C( -83.40)), simde_mm_set_ps(SIMDE_FLOAT32_C( 90.40), SIMDE_FLOAT32_C( 58.10), SIMDE_FLOAT32_C( -8.40), SIMDE_FLOAT32_C( -87.90)), simde_mm_set_ps(SIMDE_FLOAT32_C( 26.50), SIMDE_FLOAT32_C( -91.50), SIMDE_FLOAT32_C( 38.20), SIMDE_FLOAT32_C( 39.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( 35.60), SIMDE_FLOAT32_C( -64.00), SIMDE_FLOAT32_C( 95.10), SIMDE_FLOAT32_C(-7370.06)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 26.00), SIMDE_FLOAT32_C( 35.10), SIMDE_FLOAT32_C( 90.70), SIMDE_FLOAT32_C( -77.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( -18.70), SIMDE_FLOAT32_C( 97.20), SIMDE_FLOAT32_C( -13.90), SIMDE_FLOAT32_C( 3.60)), simde_mm_set_ps(SIMDE_FLOAT32_C( -66.00), SIMDE_FLOAT32_C( -38.90), SIMDE_FLOAT32_C( 92.90), SIMDE_FLOAT32_C( 44.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( 26.00), SIMDE_FLOAT32_C( 35.10), SIMDE_FLOAT32_C( 90.70), SIMDE_FLOAT32_C( 233.72)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 63.90), SIMDE_FLOAT32_C( -84.10), SIMDE_FLOAT32_C( 20.70), SIMDE_FLOAT32_C( -87.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( 2.30), SIMDE_FLOAT32_C( -39.50), SIMDE_FLOAT32_C( -17.30), SIMDE_FLOAT32_C( -98.60)), simde_mm_set_ps(SIMDE_FLOAT32_C( -59.10), SIMDE_FLOAT32_C( -12.50), SIMDE_FLOAT32_C( 12.60), SIMDE_FLOAT32_C( 34.90)), simde_mm_set_ps(SIMDE_FLOAT32_C( 63.90), SIMDE_FLOAT32_C( -84.10), SIMDE_FLOAT32_C( 20.70), SIMDE_FLOAT32_C(-8613.10)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -37.30), SIMDE_FLOAT32_C( -17.50), SIMDE_FLOAT32_C( -37.30), SIMDE_FLOAT32_C( -7.90)), simde_mm_set_ps(SIMDE_FLOAT32_C( 85.10), SIMDE_FLOAT32_C( -93.00), SIMDE_FLOAT32_C( -6.70), SIMDE_FLOAT32_C( 16.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( 17.50), SIMDE_FLOAT32_C( -83.60), SIMDE_FLOAT32_C( 98.60), SIMDE_FLOAT32_C( -20.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( -37.30), SIMDE_FLOAT32_C( -17.50), SIMDE_FLOAT32_C( -37.30), SIMDE_FLOAT32_C( 146.90)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -98.40), SIMDE_FLOAT32_C( 46.60), SIMDE_FLOAT32_C( -57.20), SIMDE_FLOAT32_C( -62.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( 23.90), SIMDE_FLOAT32_C( 59.10), SIMDE_FLOAT32_C( 62.20), SIMDE_FLOAT32_C( 48.60)), simde_mm_set_ps(SIMDE_FLOAT32_C( -80.80), SIMDE_FLOAT32_C( -51.00), SIMDE_FLOAT32_C( 63.40), SIMDE_FLOAT32_C( 30.10)), simde_mm_set_ps(SIMDE_FLOAT32_C( -98.40), SIMDE_FLOAT32_C( 46.60), SIMDE_FLOAT32_C( -57.20), SIMDE_FLOAT32_C( 3017.12)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -52.30), SIMDE_FLOAT32_C( 90.80), SIMDE_FLOAT32_C( 10.20), SIMDE_FLOAT32_C( 40.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( 25.10), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 38.80), SIMDE_FLOAT32_C( 1.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( -21.30), SIMDE_FLOAT32_C( -30.30), SIMDE_FLOAT32_C( 80.90), SIMDE_FLOAT32_C( -98.10)), simde_mm_set_ps(SIMDE_FLOAT32_C( -52.30), SIMDE_FLOAT32_C( 90.80), SIMDE_FLOAT32_C( 10.20), SIMDE_FLOAT32_C( 36.90)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_fnmsub_ss(test_vec[i].a, test_vec[i].b, test_vec[i].c); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm_fmadd_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_fmadd_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_fmadd_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_fmadd_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_fmadd_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_fmadd_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_fmaddsub_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_fmaddsub_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_fmaddsub_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_fmaddsub_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_fmsub_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_fmsub_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_fmsub_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_fmsub_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_fmsub_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_fmsub_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_fmsubadd_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_fmsubadd_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_fmsubadd_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_fmsubadd_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_fnmadd_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_fnmadd_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_fnmadd_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_fnmadd_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_fnmadd_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_fnmadd_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_fnmsub_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_fnmsub_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_fnmsub_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_fnmsub_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_fnmsub_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_fnmsub_ss) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/gfni.c000066400000000000000000021441171400333146700153270ustar00rootroot00000000000000/* Copyright (c) 2020 Evan Nemerson * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define SIMDE_TESTS_CURRENT_ISAX gfni #include #include static int test_simde_mm_gf2p8affine_epi64_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i x; simde__m128i A; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( 92), INT8_C(-112), INT8_C( 58), INT8_C(-121), INT8_C( -84), INT8_C( 106), INT8_C( -83), INT8_C( -57), INT8_C( -16), INT8_C( 29), INT8_C( -68), INT8_C( -73), INT8_C( 115), INT8_C( -71), INT8_C( 124), INT8_C(-123)), simde_mm_set_epi8(INT8_C(-125), INT8_C( 82), INT8_C( -93), INT8_C(-115), INT8_C( 48), INT8_C( 63), INT8_C( -3), INT8_C( 71), INT8_C( 31), INT8_C( -71), INT8_C(-118), INT8_C( 42), INT8_C(-101), INT8_C( 45), INT8_C( 94), INT8_C( 25)), simde_mm_set_epi8(INT8_C( 65), INT8_C( 78), INT8_C( -72), INT8_C( -98), INT8_C( -56), INT8_C( 8), INT8_C( 37), INT8_C( 92), INT8_C( -34), INT8_C(-113), INT8_C( 0), INT8_C( 17), INT8_C( 38), INT8_C( -46), INT8_C( 86), INT8_C( -75)) }, { simde_mm_set_epi8(INT8_C( -51), INT8_C( 87), INT8_C( -81), INT8_C( -31), INT8_C( -33), INT8_C( -54), INT8_C( 4), INT8_C( -53), INT8_C( 74), INT8_C( -65), INT8_C( -58), INT8_C( -69), INT8_C( 85), INT8_C( 106), INT8_C( -20), INT8_C( -61)), simde_mm_set_epi8(INT8_C( 91), INT8_C(-104), INT8_C( 75), INT8_C( 27), INT8_C( 22), INT8_C( 10), INT8_C( 2), INT8_C( -59), INT8_C( -6), INT8_C( -24), INT8_C( 10), INT8_C( 64), INT8_C( -9), INT8_C( -53), INT8_C( -12), INT8_C( 73)), simde_mm_set_epi8(INT8_C( 68), INT8_C(-113), INT8_C( -68), INT8_C( -5), INT8_C( 34), INT8_C( 36), INT8_C( -31), INT8_C( -87), INT8_C( 24), INT8_C( 114), INT8_C(-100), INT8_C( 34), INT8_C( 59), INT8_C( 75), INT8_C( 93), INT8_C( 124)) }, { simde_mm_set_epi8(INT8_C( 53), INT8_C( -39), INT8_C( 123), INT8_C( -73), INT8_C(-128), INT8_C( 112), INT8_C( -47), INT8_C( 125), INT8_C( -38), INT8_C(-114), INT8_C( 26), INT8_C( 58), INT8_C( 61), INT8_C( -20), INT8_C( -15), INT8_C( 106)), simde_mm_set_epi8(INT8_C( 118), INT8_C( 69), INT8_C( 100), INT8_C( 56), INT8_C( 50), INT8_C( 20), INT8_C(-109), INT8_C( 106), INT8_C( 101), INT8_C( 21), INT8_C( 4), INT8_C( -48), INT8_C( 106), INT8_C( -7), INT8_C( -14), INT8_C( -89)), simde_mm_set_epi8(INT8_C( -16), INT8_C( 5), INT8_C( 9), INT8_C( 97), INT8_C( 49), INT8_C( 18), INT8_C(-115), INT8_C( -1), INT8_C( 106), INT8_C( -2), INT8_C( -5), INT8_C( 10), INT8_C( -2), INT8_C( -90), INT8_C( -40), INT8_C( 25)) }, { simde_mm_set_epi8(INT8_C( -7), INT8_C( 100), INT8_C( 118), INT8_C( 67), INT8_C( 122), INT8_C( 119), INT8_C( 90), INT8_C( -55), INT8_C( -23), INT8_C( 110), INT8_C( 28), INT8_C( -91), INT8_C( 51), INT8_C( 59), INT8_C( -59), INT8_C( 53)), simde_mm_set_epi8(INT8_C(-101), INT8_C( 15), INT8_C( -63), INT8_C( 65), INT8_C( 7), INT8_C( -86), INT8_C( -61), INT8_C( -99), INT8_C( 72), INT8_C( 45), INT8_C( 23), INT8_C( 121), INT8_C(-107), INT8_C( 110), INT8_C( -55), INT8_C( -65)), simde_mm_set_epi8(INT8_C( 5), INT8_C(-113), INT8_C( 125), INT8_C(-111), INT8_C( 76), INT8_C( -94), INT8_C( 108), INT8_C( -92), INT8_C( 87), INT8_C( 75), INT8_C( -80), INT8_C( 99), INT8_C( 61), INT8_C( -42), INT8_C( -96), INT8_C( 47)) }, { simde_mm_set_epi8(INT8_C( 23), INT8_C( 51), INT8_C( -97), INT8_C(-120), INT8_C( 74), INT8_C( -72), INT8_C( 10), INT8_C( -6), INT8_C( 4), INT8_C( 92), INT8_C( 126), INT8_C( -75), INT8_C( -6), INT8_C( 29), INT8_C( 112), INT8_C(-128)), simde_mm_set_epi8(INT8_C( -67), INT8_C( -37), INT8_C(-104), INT8_C( -9), INT8_C(-118), INT8_C( 101), INT8_C( 85), INT8_C( -70), INT8_C( -55), INT8_C( 77), INT8_C(-100), INT8_C( 82), INT8_C( -87), INT8_C( 63), INT8_C( -31), INT8_C( 99)), simde_mm_set_epi8(INT8_C( 38), INT8_C( -26), INT8_C( 46), INT8_C( 121), INT8_C( 22), INT8_C( 31), INT8_C( 124), INT8_C( -17), INT8_C( 87), INT8_C(-121), INT8_C( -33), INT8_C( 45), INT8_C( -84), INT8_C( -65), INT8_C( 102), INT8_C( 36)) }, { simde_mm_set_epi8(INT8_C( -93), INT8_C( -32), INT8_C( 26), INT8_C( -58), INT8_C( 54), INT8_C( 13), INT8_C( -9), INT8_C( 107), INT8_C( 45), INT8_C( -14), INT8_C( -76), INT8_C( -47), INT8_C( -81), INT8_C( 125), INT8_C( 36), INT8_C(-128)), simde_mm_set_epi8(INT8_C( -18), INT8_C( 50), INT8_C(-118), INT8_C( 32), INT8_C( 64), INT8_C(-128), INT8_C( 45), INT8_C( -2), INT8_C( -74), INT8_C( 107), INT8_C( -62), INT8_C( 23), INT8_C( 71), INT8_C( -89), INT8_C(-114), INT8_C( 59)), simde_mm_set_epi8(INT8_C( -40), INT8_C(-114), INT8_C( -79), INT8_C( 3), INT8_C( 126), INT8_C( 53), INT8_C( 10), INT8_C( 41), INT8_C( -45), INT8_C( -41), INT8_C( 71), INT8_C( 49), INT8_C( 73), INT8_C( 76), INT8_C( -85), INT8_C( 20)) }, { simde_mm_set_epi8(INT8_C( -67), INT8_C( -83), INT8_C( 49), INT8_C(-106), INT8_C( -17), INT8_C(-100), INT8_C( 12), INT8_C( -4), INT8_C( 115), INT8_C( 51), INT8_C( -39), INT8_C( 71), INT8_C( 77), INT8_C( -21), INT8_C( -53), INT8_C( -85)), simde_mm_set_epi8(INT8_C( -61), INT8_C( 108), INT8_C( 88), INT8_C( -96), INT8_C( 69), INT8_C( -99), INT8_C( -76), INT8_C( 17), INT8_C( 11), INT8_C( -75), INT8_C( -45), INT8_C( 118), INT8_C( 33), INT8_C( -59), INT8_C( 86), INT8_C( 64)), simde_mm_set_epi8(INT8_C( 83), INT8_C( -73), INT8_C( 110), INT8_C(-113), INT8_C( -95), INT8_C( -88), INT8_C( 37), INT8_C( -11), INT8_C( -77), INT8_C( 95), INT8_C( -61), INT8_C(-115), INT8_C( -63), INT8_C( -38), INT8_C( -64), INT8_C( 54)) }, { simde_mm_set_epi8(INT8_C( 122), INT8_C( 121), INT8_C( 24), INT8_C( 62), INT8_C( 21), INT8_C( -43), INT8_C( 93), INT8_C( -47), INT8_C( 58), INT8_C(-128), INT8_C( 124), INT8_C( 87), INT8_C( 28), INT8_C( 96), INT8_C( -90), INT8_C( -42)), simde_mm_set_epi8(INT8_C( -56), INT8_C( 37), INT8_C(-105), INT8_C( -59), INT8_C( 68), INT8_C( 78), INT8_C(-104), INT8_C( -97), INT8_C(-118), INT8_C( 78), INT8_C( 55), INT8_C( 96), INT8_C( -17), INT8_C( -60), INT8_C( 41), INT8_C( -68)), simde_mm_set_epi8(INT8_C( -53), INT8_C( -31), INT8_C( 84), INT8_C( 76), INT8_C(-123), INT8_C( 113), INT8_C( 93), INT8_C( -49), INT8_C( -19), INT8_C( -64), INT8_C( 118), INT8_C( 58), INT8_C(-112), INT8_C(-105), INT8_C( -67), INT8_C( -33)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_gf2p8affine_epi64_epi8(test_vec[i].x, test_vec[i].A, INT8_C( 113)); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_gf2p8affine_epi64_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i x; simde__m256i A; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi8(INT8_C(-125), INT8_C( 82), INT8_C( -93), INT8_C(-115), INT8_C( 48), INT8_C( 63), INT8_C( -3), INT8_C( 71), INT8_C( 31), INT8_C( -71), INT8_C(-118), INT8_C( 42), INT8_C(-101), INT8_C( 45), INT8_C( 94), INT8_C( 25), INT8_C( 92), INT8_C(-112), INT8_C( 58), INT8_C(-121), INT8_C( -84), INT8_C( 106), INT8_C( -83), INT8_C( -57), INT8_C( -16), INT8_C( 29), INT8_C( -68), INT8_C( -73), INT8_C( 115), INT8_C( -71), INT8_C( 124), INT8_C(-123)), simde_mm256_set_epi8(INT8_C( 91), INT8_C(-104), INT8_C( 75), INT8_C( 27), INT8_C( 22), INT8_C( 10), INT8_C( 2), INT8_C( -59), INT8_C( -6), INT8_C( -24), INT8_C( 10), INT8_C( 64), INT8_C( -9), INT8_C( -53), INT8_C( -12), INT8_C( 73), INT8_C( -51), INT8_C( 87), INT8_C( -81), INT8_C( -31), INT8_C( -33), INT8_C( -54), INT8_C( 4), INT8_C( -53), INT8_C( 74), INT8_C( -65), INT8_C( -58), INT8_C( -69), INT8_C( 85), INT8_C( 106), INT8_C( -20), INT8_C( -61)), simde_mm256_set_epi8(INT8_C( -74), INT8_C( 39), INT8_C( -74), INT8_C( 116), INT8_C( -33), INT8_C(-112), INT8_C( -22), INT8_C( 33), INT8_C( -25), INT8_C( -94), INT8_C( 37), INT8_C( 5), INT8_C( -60), INT8_C( -48), INT8_C( -84), INT8_C(-126), INT8_C(-113), INT8_C( 107), INT8_C( -39), INT8_C( 7), INT8_C(-105), INT8_C( 112), INT8_C( 8), INT8_C( -68), INT8_C(-113), INT8_C( 121), INT8_C( 71), INT8_C( 25), INT8_C( 116), INT8_C(-117), INT8_C( 124), INT8_C( -58)) }, { simde_mm256_set_epi8(INT8_C( 118), INT8_C( 69), INT8_C( 100), INT8_C( 56), INT8_C( 50), INT8_C( 20), INT8_C(-109), INT8_C( 106), INT8_C( 101), INT8_C( 21), INT8_C( 4), INT8_C( -48), INT8_C( 106), INT8_C( -7), INT8_C( -14), INT8_C( -89), INT8_C( 53), INT8_C( -39), INT8_C( 123), INT8_C( -73), INT8_C(-128), INT8_C( 112), INT8_C( -47), INT8_C( 125), INT8_C( -38), INT8_C(-114), INT8_C( 26), INT8_C( 58), INT8_C( 61), INT8_C( -20), INT8_C( -15), INT8_C( 106)), simde_mm256_set_epi8(INT8_C(-101), INT8_C( 15), INT8_C( -63), INT8_C( 65), INT8_C( 7), INT8_C( -86), INT8_C( -61), INT8_C( -99), INT8_C( 72), INT8_C( 45), INT8_C( 23), INT8_C( 121), INT8_C(-107), INT8_C( 110), INT8_C( -55), INT8_C( -65), INT8_C( -7), INT8_C( 100), INT8_C( 118), INT8_C( 67), INT8_C( 122), INT8_C( 119), INT8_C( 90), INT8_C( -55), INT8_C( -23), INT8_C( 110), INT8_C( 28), INT8_C( -91), INT8_C( 51), INT8_C( 59), INT8_C( -59), INT8_C( 53)), simde_mm256_set_epi8(INT8_C( -56), INT8_C( -59), INT8_C( 58), INT8_C( -58), INT8_C( 22), INT8_C( -41), INT8_C( 12), INT8_C( 120), INT8_C( 111), INT8_C( 48), INT8_C( 114), INT8_C( -31), INT8_C( 72), INT8_C( 126), INT8_C( -17), INT8_C( 114), INT8_C( 9), INT8_C( -73), INT8_C( 125), INT8_C( -12), INT8_C( 69), INT8_C( 121), INT8_C( 102), INT8_C( 39), INT8_C( 111), INT8_C( 86), INT8_C( 101), INT8_C( -34), INT8_C( -37), INT8_C(-100), INT8_C( 56), INT8_C( 41)) }, { simde_mm256_set_epi8(INT8_C( -67), INT8_C( -37), INT8_C(-104), INT8_C( -9), INT8_C(-118), INT8_C( 101), INT8_C( 85), INT8_C( -70), INT8_C( -55), INT8_C( 77), INT8_C(-100), INT8_C( 82), INT8_C( -87), INT8_C( 63), INT8_C( -31), INT8_C( 99), INT8_C( 23), INT8_C( 51), INT8_C( -97), INT8_C(-120), INT8_C( 74), INT8_C( -72), INT8_C( 10), INT8_C( -6), INT8_C( 4), INT8_C( 92), INT8_C( 126), INT8_C( -75), INT8_C( -6), INT8_C( 29), INT8_C( 112), INT8_C(-128)), simde_mm256_set_epi8(INT8_C( -18), INT8_C( 50), INT8_C(-118), INT8_C( 32), INT8_C( 64), INT8_C(-128), INT8_C( 45), INT8_C( -2), INT8_C( -74), INT8_C( 107), INT8_C( -62), INT8_C( 23), INT8_C( 71), INT8_C( -89), INT8_C(-114), INT8_C( 59), INT8_C( -93), INT8_C( -32), INT8_C( 26), INT8_C( -58), INT8_C( 54), INT8_C( 13), INT8_C( -9), INT8_C( 107), INT8_C( 45), INT8_C( -14), INT8_C( -76), INT8_C( -47), INT8_C( -81), INT8_C( 125), INT8_C( 36), INT8_C(-128)), simde_mm256_set_epi8(INT8_C( 108), INT8_C( 112), INT8_C( 38), INT8_C( -65), INT8_C( 35), INT8_C( 31), INT8_C( 86), INT8_C( 106), INT8_C( -49), INT8_C( -45), INT8_C(-109), INT8_C( -92), INT8_C( 122), INT8_C( 16), INT8_C( -82), INT8_C( 52), INT8_C( -44), INT8_C( 127), INT8_C( 59), INT8_C( 43), INT8_C( 119), INT8_C( -84), INT8_C( -67), INT8_C( -69), INT8_C( -79), INT8_C(-124), INT8_C( -31), INT8_C( 79), INT8_C( 10), INT8_C(-105), INT8_C( -73), INT8_C( 90)) }, { simde_mm256_set_epi8(INT8_C( -61), INT8_C( 108), INT8_C( 88), INT8_C( -96), INT8_C( 69), INT8_C( -99), INT8_C( -76), INT8_C( 17), INT8_C( 11), INT8_C( -75), INT8_C( -45), INT8_C( 118), INT8_C( 33), INT8_C( -59), INT8_C( 86), INT8_C( 64), INT8_C( -67), INT8_C( -83), INT8_C( 49), INT8_C(-106), INT8_C( -17), INT8_C(-100), INT8_C( 12), INT8_C( -4), INT8_C( 115), INT8_C( 51), INT8_C( -39), INT8_C( 71), INT8_C( 77), INT8_C( -21), INT8_C( -53), INT8_C( -85)), simde_mm256_set_epi8(INT8_C( -56), INT8_C( 37), INT8_C(-105), INT8_C( -59), INT8_C( 68), INT8_C( 78), INT8_C(-104), INT8_C( -97), INT8_C(-118), INT8_C( 78), INT8_C( 55), INT8_C( 96), INT8_C( -17), INT8_C( -60), INT8_C( 41), INT8_C( -68), INT8_C( 122), INT8_C( 121), INT8_C( 24), INT8_C( 62), INT8_C( 21), INT8_C( -43), INT8_C( 93), INT8_C( -47), INT8_C( 58), INT8_C(-128), INT8_C( 124), INT8_C( 87), INT8_C( 28), INT8_C( 96), INT8_C( -90), INT8_C( -42)), simde_mm256_set_epi8(INT8_C( 26), INT8_C( -96), INT8_C( -40), INT8_C( 11), INT8_C( -51), INT8_C( 28), INT8_C( 113), INT8_C(-114), INT8_C( 84), INT8_C( -49), INT8_C(-120), INT8_C( 7), INT8_C( 76), INT8_C( -83), INT8_C( -37), INT8_C( -2), INT8_C( 85), INT8_C( -86), INT8_C( -62), INT8_C( -22), INT8_C( 64), INT8_C( -84), INT8_C( -13), INT8_C( 68), INT8_C( 81), INT8_C( -3), INT8_C( 42), INT8_C( 117), INT8_C( -87), INT8_C( 27), INT8_C( 126), INT8_C( -73)) }, { simde_mm256_set_epi8(INT8_C( -54), INT8_C( -58), INT8_C(-101), INT8_C( 125), INT8_C( -73), INT8_C( -39), INT8_C(-121), INT8_C( 30), INT8_C( -17), INT8_C( 12), INT8_C( -86), INT8_C( -34), INT8_C( 61), INT8_C( 53), INT8_C( 73), INT8_C( 85), INT8_C( 87), INT8_C( -21), INT8_C( -86), INT8_C( 121), INT8_C( 27), INT8_C( 115), INT8_C(-124), INT8_C( -40), INT8_C( -59), INT8_C( 52), INT8_C( 99), INT8_C( -34), INT8_C( 76), INT8_C( 41), INT8_C( -93), INT8_C(-112)), simde_mm256_set_epi8(INT8_C( 31), INT8_C( -32), INT8_C( -69), INT8_C(-120), INT8_C( 29), INT8_C( 35), INT8_C( -67), INT8_C( 30), INT8_C( -13), INT8_C( -62), INT8_C( 104), INT8_C( 56), INT8_C( -20), INT8_C( 50), INT8_C( 79), INT8_C( 105), INT8_C( -40), INT8_C(-109), INT8_C(-116), INT8_C( 88), INT8_C( 42), INT8_C( 110), INT8_C(-119), INT8_C( -31), INT8_C( -21), INT8_C( 111), INT8_C(-125), INT8_C(-103), INT8_C( -29), INT8_C( 95), INT8_C(-104), INT8_C( 45)), simde_mm256_set_epi8(INT8_C( -16), INT8_C( -4), INT8_C( 82), INT8_C( 12), INT8_C( 56), INT8_C( -11), INT8_C(-117), INT8_C( -72), INT8_C(-109), INT8_C( 72), INT8_C( -43), INT8_C( -58), INT8_C( 29), INT8_C( -63), INT8_C( 14), INT8_C( -85), INT8_C( -78), INT8_C(-105), INT8_C( -4), INT8_C( 105), INT8_C( 66), INT8_C( 38), INT8_C( 39), INT8_C( -36), INT8_C( -73), INT8_C( -99), INT8_C( -20), INT8_C( -68), INT8_C( -66), INT8_C( 3), INT8_C(-126), INT8_C( -15)) }, { simde_mm256_set_epi8(INT8_C( 112), INT8_C( 2), INT8_C( -69), INT8_C(-108), INT8_C( -48), INT8_C( 49), INT8_C( -97), INT8_C( 65), INT8_C( 82), INT8_C(-120), INT8_C( 44), INT8_C( -16), INT8_C( 69), INT8_C( 113), INT8_C( 37), INT8_C( -3), INT8_C(-126), INT8_C( 14), INT8_C( 120), INT8_C( 66), INT8_C( -26), INT8_C( 114), INT8_C( -17), INT8_C( 54), INT8_C(-115), INT8_C( -70), INT8_C( 81), INT8_C( -70), INT8_C(-100), INT8_C( 69), INT8_C( -73), INT8_C( 75)), simde_mm256_set_epi8(INT8_C( 13), INT8_C( 114), INT8_C( -55), INT8_C( -70), INT8_C( -86), INT8_C( -74), INT8_C( -53), INT8_C( 93), INT8_C(-107), INT8_C( 88), INT8_C(-116), INT8_C(-100), INT8_C( -14), INT8_C( -86), INT8_C( 18), INT8_C(-108), INT8_C( 24), INT8_C( -37), INT8_C( 81), INT8_C( 91), INT8_C( 122), INT8_C( 76), INT8_C( -21), INT8_C( -97), INT8_C( 13), INT8_C( -19), INT8_C( 87), INT8_C(-108), INT8_C( 39), INT8_C( 66), INT8_C( 79), INT8_C( -12)), simde_mm256_set_epi8(INT8_C(-110), INT8_C( -66), INT8_C( 74), INT8_C( -77), INT8_C( -44), INT8_C(-111), INT8_C( -47), INT8_C( -57), INT8_C( 125), INT8_C( 87), INT8_C( 87), INT8_C(-128), INT8_C( 90), INT8_C( 60), INT8_C( 120), INT8_C( 34), INT8_C( -36), INT8_C( 69), INT8_C(-114), INT8_C( 96), INT8_C( 82), INT8_C( -81), INT8_C( 103), INT8_C( 113), INT8_C(-123), INT8_C( 103), INT8_C( -7), INT8_C( 103), INT8_C( 94), INT8_C( -86), INT8_C( -84), INT8_C( 66)) }, { simde_mm256_set_epi8(INT8_C( -86), INT8_C( 40), INT8_C( 4), INT8_C(-124), INT8_C( 35), INT8_C( 115), INT8_C( 21), INT8_C( 14), INT8_C( -63), INT8_C(-123), INT8_C( -55), INT8_C( 82), INT8_C(-104), INT8_C( -96), INT8_C( -98), INT8_C(-116), INT8_C( 118), INT8_C( 105), INT8_C( -99), INT8_C( 73), INT8_C( -46), INT8_C( 60), INT8_C( 67), INT8_C( 75), INT8_C(-111), INT8_C( 102), INT8_C( -17), INT8_C( -13), INT8_C( 71), INT8_C( 80), INT8_C( 109), INT8_C( -40)), simde_mm256_set_epi8(INT8_C( 6), INT8_C( 112), INT8_C( -23), INT8_C( -13), INT8_C( 68), INT8_C( 65), INT8_C( 87), INT8_C( -31), INT8_C(-110), INT8_C( -3), INT8_C( 46), INT8_C( -24), INT8_C( 26), INT8_C(-108), INT8_C( -39), INT8_C( 72), INT8_C( 66), INT8_C( -83), INT8_C( -22), INT8_C( -71), INT8_C( -5), INT8_C( -66), INT8_C( 36), INT8_C( 92), INT8_C(-115), INT8_C( 63), INT8_C( -65), INT8_C( 54), INT8_C( -13), INT8_C( -99), INT8_C( 49), INT8_C( 36)), simde_mm256_set_epi8(INT8_C(-117), INT8_C( 78), INT8_C(-107), INT8_C( 25), INT8_C( -17), INT8_C( 91), INT8_C( 51), INT8_C( -40), INT8_C( 39), INT8_C( -53), INT8_C( -7), INT8_C( 104), INT8_C( 2), INT8_C( -95), INT8_C( 49), INT8_C( 87), INT8_C( 64), INT8_C(-117), INT8_C( 4), INT8_C( -11), INT8_C( -30), INT8_C( 94), INT8_C( 126), INT8_C( -64), INT8_C( -8), INT8_C( -69), INT8_C( -34), INT8_C( 40), INT8_C( 18), INT8_C( -86), INT8_C( -11), INT8_C( -72)) }, { simde_mm256_set_epi8(INT8_C( 96), INT8_C( -97), INT8_C( -92), INT8_C( 96), INT8_C( 90), INT8_C( -49), INT8_C(-117), INT8_C( 30), INT8_C(-114), INT8_C(-106), INT8_C( 10), INT8_C(-120), INT8_C( 27), INT8_C( -2), INT8_C( 89), INT8_C( 41), INT8_C( -25), INT8_C(-109), INT8_C( 120), INT8_C( -70), INT8_C( 60), INT8_C( -33), INT8_C( 121), INT8_C( 49), INT8_C( -80), INT8_C( 118), INT8_C( -64), INT8_C( 38), INT8_C( 20), INT8_C( -60), INT8_C( -96), INT8_C( -5)), simde_mm256_set_epi8(INT8_C( -77), INT8_C( 102), INT8_C( 95), INT8_C( -28), INT8_C( 18), INT8_C( -12), INT8_C( 60), INT8_C( -53), INT8_C( 29), INT8_C( 49), INT8_C( 11), INT8_C( -44), INT8_C( 116), INT8_C( 50), INT8_C(-112), INT8_C( 1), INT8_C( 6), INT8_C( 22), INT8_C( -56), INT8_C( -34), INT8_C(-113), INT8_C( -15), INT8_C( -61), INT8_C( 70), INT8_C( -23), INT8_C( 15), INT8_C( -49), INT8_C( -19), INT8_C( -17), INT8_C(-127), INT8_C( -34), INT8_C( 64)), simde_mm256_set_epi8(INT8_C( 1), INT8_C( -96), INT8_C( 104), INT8_C( 1), INT8_C( 76), INT8_C( 123), INT8_C( -69), INT8_C(-116), INT8_C( -76), INT8_C( -54), INT8_C( -27), INT8_C(-119), INT8_C( 25), INT8_C( -27), INT8_C( 37), INT8_C( 116), INT8_C( 68), INT8_C( 57), INT8_C( 62), INT8_C( 117), INT8_C( 73), INT8_C( 82), INT8_C( 78), INT8_C( -66), INT8_C( -32), INT8_C( 72), INT8_C( 100), INT8_C( -43), INT8_C( -38), INT8_C( 58), INT8_C( -96), INT8_C( 11)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_gf2p8affine_epi64_epi8(test_vec[i].x, test_vec[i].A, 196); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_gf2p8affine_epi64_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i x; simde__m512i A; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi8(INT8_C( 91), INT8_C(-104), INT8_C( 75), INT8_C( 27), INT8_C( 22), INT8_C( 10), INT8_C( 2), INT8_C( -59), INT8_C( -6), INT8_C( -24), INT8_C( 10), INT8_C( 64), INT8_C( -9), INT8_C( -53), INT8_C( -12), INT8_C( 73), INT8_C( -51), INT8_C( 87), INT8_C( -81), INT8_C( -31), INT8_C( -33), INT8_C( -54), INT8_C( 4), INT8_C( -53), INT8_C( 74), INT8_C( -65), INT8_C( -58), INT8_C( -69), INT8_C( 85), INT8_C( 106), INT8_C( -20), INT8_C( -61), INT8_C(-125), INT8_C( 82), INT8_C( -93), INT8_C(-115), INT8_C( 48), INT8_C( 63), INT8_C( -3), INT8_C( 71), INT8_C( 31), INT8_C( -71), INT8_C(-118), INT8_C( 42), INT8_C(-101), INT8_C( 45), INT8_C( 94), INT8_C( 25), INT8_C( 92), INT8_C(-112), INT8_C( 58), INT8_C(-121), INT8_C( -84), INT8_C( 106), INT8_C( -83), INT8_C( -57), INT8_C( -16), INT8_C( 29), INT8_C( -68), INT8_C( -73), INT8_C( 115), INT8_C( -71), INT8_C( 124), INT8_C(-123)), simde_mm512_set_epi8(INT8_C(-101), INT8_C( 15), INT8_C( -63), INT8_C( 65), INT8_C( 7), INT8_C( -86), INT8_C( -61), INT8_C( -99), INT8_C( 72), INT8_C( 45), INT8_C( 23), INT8_C( 121), INT8_C(-107), INT8_C( 110), INT8_C( -55), INT8_C( -65), INT8_C( -7), INT8_C( 100), INT8_C( 118), INT8_C( 67), INT8_C( 122), INT8_C( 119), INT8_C( 90), INT8_C( -55), INT8_C( -23), INT8_C( 110), INT8_C( 28), INT8_C( -91), INT8_C( 51), INT8_C( 59), INT8_C( -59), INT8_C( 53), INT8_C( 118), INT8_C( 69), INT8_C( 100), INT8_C( 56), INT8_C( 50), INT8_C( 20), INT8_C(-109), INT8_C( 106), INT8_C( 101), INT8_C( 21), INT8_C( 4), INT8_C( -48), INT8_C( 106), INT8_C( -7), INT8_C( -14), INT8_C( -89), INT8_C( 53), INT8_C( -39), INT8_C( 123), INT8_C( -73), INT8_C(-128), INT8_C( 112), INT8_C( -47), INT8_C( 125), INT8_C( -38), INT8_C(-114), INT8_C( 26), INT8_C( 58), INT8_C( 61), INT8_C( -20), INT8_C( -15), INT8_C( 106)), simde_mm512_set_epi8(INT8_C( -61), INT8_C( -58), INT8_C( 66), INT8_C(-113), INT8_C( 97), INT8_C( -47), INT8_C( 114), INT8_C( -27), INT8_C( -63), INT8_C( -7), INT8_C( 78), INT8_C( 104), INT8_C( 66), INT8_C( 41), INT8_C( 56), INT8_C( 93), INT8_C( 33), INT8_C( 120), INT8_C(-107), INT8_C( -31), INT8_C( 40), INT8_C( -46), INT8_C( 39), INT8_C( 123), INT8_C( 87), INT8_C( 101), INT8_C( -9), INT8_C( -85), INT8_C( -63), INT8_C( -20), INT8_C( 89), INT8_C( -64), INT8_C( -46), INT8_C( 46), INT8_C( 79), INT8_C( -84), INT8_C( -27), INT8_C( -39), INT8_C( -49), INT8_C( 50), INT8_C( -81), INT8_C( -31), INT8_C( 9), INT8_C( 16), INT8_C( -64), INT8_C( -28), INT8_C( 117), INT8_C( -8), INT8_C( 7), INT8_C( -76), INT8_C( -55), INT8_C( 17), INT8_C( -7), INT8_C( -64), INT8_C( 54), INT8_C( -9), INT8_C( 38), INT8_C(-127), INT8_C( 74), INT8_C( 42), INT8_C(-102), INT8_C( 40), INT8_C( -56), INT8_C( 0)) }, { simde_mm512_set_epi8(INT8_C( -18), INT8_C( 50), INT8_C(-118), INT8_C( 32), INT8_C( 64), INT8_C(-128), INT8_C( 45), INT8_C( -2), INT8_C( -74), INT8_C( 107), INT8_C( -62), INT8_C( 23), INT8_C( 71), INT8_C( -89), INT8_C(-114), INT8_C( 59), INT8_C( -93), INT8_C( -32), INT8_C( 26), INT8_C( -58), INT8_C( 54), INT8_C( 13), INT8_C( -9), INT8_C( 107), INT8_C( 45), INT8_C( -14), INT8_C( -76), INT8_C( -47), INT8_C( -81), INT8_C( 125), INT8_C( 36), INT8_C(-128), INT8_C( -67), INT8_C( -37), INT8_C(-104), INT8_C( -9), INT8_C(-118), INT8_C( 101), INT8_C( 85), INT8_C( -70), INT8_C( -55), INT8_C( 77), INT8_C(-100), INT8_C( 82), INT8_C( -87), INT8_C( 63), INT8_C( -31), INT8_C( 99), INT8_C( 23), INT8_C( 51), INT8_C( -97), INT8_C(-120), INT8_C( 74), INT8_C( -72), INT8_C( 10), INT8_C( -6), INT8_C( 4), INT8_C( 92), INT8_C( 126), INT8_C( -75), INT8_C( -6), INT8_C( 29), INT8_C( 112), INT8_C(-128)), simde_mm512_set_epi8(INT8_C( -56), INT8_C( 37), INT8_C(-105), INT8_C( -59), INT8_C( 68), INT8_C( 78), INT8_C(-104), INT8_C( -97), INT8_C(-118), INT8_C( 78), INT8_C( 55), INT8_C( 96), INT8_C( -17), INT8_C( -60), INT8_C( 41), INT8_C( -68), INT8_C( 122), INT8_C( 121), INT8_C( 24), INT8_C( 62), INT8_C( 21), INT8_C( -43), INT8_C( 93), INT8_C( -47), INT8_C( 58), INT8_C(-128), INT8_C( 124), INT8_C( 87), INT8_C( 28), INT8_C( 96), INT8_C( -90), INT8_C( -42), INT8_C( -61), INT8_C( 108), INT8_C( 88), INT8_C( -96), INT8_C( 69), INT8_C( -99), INT8_C( -76), INT8_C( 17), INT8_C( 11), INT8_C( -75), INT8_C( -45), INT8_C( 118), INT8_C( 33), INT8_C( -59), INT8_C( 86), INT8_C( 64), INT8_C( -67), INT8_C( -83), INT8_C( 49), INT8_C(-106), INT8_C( -17), INT8_C(-100), INT8_C( 12), INT8_C( -4), INT8_C( 115), INT8_C( 51), INT8_C( -39), INT8_C( 71), INT8_C( 77), INT8_C( -21), INT8_C( -53), INT8_C( -85)), simde_mm512_set_epi8(INT8_C( 12), INT8_C( 99), INT8_C(-119), INT8_C( 3), INT8_C( 56), INT8_C( -52), INT8_C( -46), INT8_C( -56), INT8_C( 73), INT8_C( 119), INT8_C( -99), INT8_C( 112), INT8_C( -50), INT8_C(-103), INT8_C( -62), INT8_C( -55), INT8_C( 81), INT8_C( 73), INT8_C( -72), INT8_C( 51), INT8_C(-124), INT8_C( -60), INT8_C( 53), INT8_C( 93), INT8_C( -91), INT8_C( 94), INT8_C( -25), INT8_C( -6), INT8_C( -82), INT8_C(-108), INT8_C( -72), INT8_C( -61), INT8_C( 35), INT8_C( 13), INT8_C( -86), INT8_C( 19), INT8_C( 79), INT8_C( -97), INT8_C( 49), INT8_C( -31), INT8_C( -3), INT8_C( -79), INT8_C( 2), INT8_C( -18), INT8_C( 11), INT8_C( 68), INT8_C( -26), INT8_C(-115), INT8_C( 88), INT8_C( 52), INT8_C( 16), INT8_C( 73), INT8_C( 122), INT8_C( 115), INT8_C( -22), INT8_C( -5), INT8_C( 25), INT8_C(-105), INT8_C( -33), INT8_C( -90), INT8_C( 35), INT8_C( 21), INT8_C( -40), INT8_C( -27)) }, { simde_mm512_set_epi8(INT8_C( 31), INT8_C( -32), INT8_C( -69), INT8_C(-120), INT8_C( 29), INT8_C( 35), INT8_C( -67), INT8_C( 30), INT8_C( -13), INT8_C( -62), INT8_C( 104), INT8_C( 56), INT8_C( -20), INT8_C( 50), INT8_C( 79), INT8_C( 105), INT8_C( -40), INT8_C(-109), INT8_C(-116), INT8_C( 88), INT8_C( 42), INT8_C( 110), INT8_C(-119), INT8_C( -31), INT8_C( -21), INT8_C( 111), INT8_C(-125), INT8_C(-103), INT8_C( -29), INT8_C( 95), INT8_C(-104), INT8_C( 45), INT8_C( -54), INT8_C( -58), INT8_C(-101), INT8_C( 125), INT8_C( -73), INT8_C( -39), INT8_C(-121), INT8_C( 30), INT8_C( -17), INT8_C( 12), INT8_C( -86), INT8_C( -34), INT8_C( 61), INT8_C( 53), INT8_C( 73), INT8_C( 85), INT8_C( 87), INT8_C( -21), INT8_C( -86), INT8_C( 121), INT8_C( 27), INT8_C( 115), INT8_C(-124), INT8_C( -40), INT8_C( -59), INT8_C( 52), INT8_C( 99), INT8_C( -34), INT8_C( 76), INT8_C( 41), INT8_C( -93), INT8_C(-112)), simde_mm512_set_epi8(INT8_C( 13), INT8_C( 114), INT8_C( -55), INT8_C( -70), INT8_C( -86), INT8_C( -74), INT8_C( -53), INT8_C( 93), INT8_C(-107), INT8_C( 88), INT8_C(-116), INT8_C(-100), INT8_C( -14), INT8_C( -86), INT8_C( 18), INT8_C(-108), INT8_C( 24), INT8_C( -37), INT8_C( 81), INT8_C( 91), INT8_C( 122), INT8_C( 76), INT8_C( -21), INT8_C( -97), INT8_C( 13), INT8_C( -19), INT8_C( 87), INT8_C(-108), INT8_C( 39), INT8_C( 66), INT8_C( 79), INT8_C( -12), INT8_C( 112), INT8_C( 2), INT8_C( -69), INT8_C(-108), INT8_C( -48), INT8_C( 49), INT8_C( -97), INT8_C( 65), INT8_C( 82), INT8_C(-120), INT8_C( 44), INT8_C( -16), INT8_C( 69), INT8_C( 113), INT8_C( 37), INT8_C( -3), INT8_C(-126), INT8_C( 14), INT8_C( 120), INT8_C( 66), INT8_C( -26), INT8_C( 114), INT8_C( -17), INT8_C( 54), INT8_C(-115), INT8_C( -70), INT8_C( 81), INT8_C( -70), INT8_C(-100), INT8_C( 69), INT8_C( -73), INT8_C( 75)), simde_mm512_set_epi8(INT8_C( 104), INT8_C(-127), INT8_C(-113), INT8_C( -96), INT8_C( 18), INT8_C(-124), INT8_C( 84), INT8_C( -83), INT8_C( 52), INT8_C( -34), INT8_C( 13), INT8_C( -60), INT8_C( 61), INT8_C(-102), INT8_C( -63), INT8_C( 12), INT8_C( -39), INT8_C( 72), INT8_C(-104), INT8_C( 27), INT8_C( 112), INT8_C( -82), INT8_C( -10), INT8_C( 35), INT8_C( -97), INT8_C( -54), INT8_C( -88), INT8_C( 19), INT8_C( -36), INT8_C( -44), INT8_C( 68), INT8_C( 88), INT8_C( -50), INT8_C( -62), INT8_C( -58), INT8_C( 32), INT8_C( -17), INT8_C( 17), INT8_C( -73), INT8_C( 54), INT8_C( 125), INT8_C( 83), INT8_C( -32), INT8_C( -56), INT8_C( -26), INT8_C( 96), INT8_C( -50), INT8_C( 53), INT8_C( -80), INT8_C( 37), INT8_C( 25), INT8_C( 43), INT8_C( 88), INT8_C(-106), INT8_C(-126), INT8_C( -50), INT8_C( 106), INT8_C( 100), INT8_C( -64), INT8_C(-128), INT8_C( 79), INT8_C( 53), INT8_C( 63), INT8_C( 4)) }, { simde_mm512_set_epi8(INT8_C( 6), INT8_C( 112), INT8_C( -23), INT8_C( -13), INT8_C( 68), INT8_C( 65), INT8_C( 87), INT8_C( -31), INT8_C(-110), INT8_C( -3), INT8_C( 46), INT8_C( -24), INT8_C( 26), INT8_C(-108), INT8_C( -39), INT8_C( 72), INT8_C( 66), INT8_C( -83), INT8_C( -22), INT8_C( -71), INT8_C( -5), INT8_C( -66), INT8_C( 36), INT8_C( 92), INT8_C(-115), INT8_C( 63), INT8_C( -65), INT8_C( 54), INT8_C( -13), INT8_C( -99), INT8_C( 49), INT8_C( 36), INT8_C( -86), INT8_C( 40), INT8_C( 4), INT8_C(-124), INT8_C( 35), INT8_C( 115), INT8_C( 21), INT8_C( 14), INT8_C( -63), INT8_C(-123), INT8_C( -55), INT8_C( 82), INT8_C(-104), INT8_C( -96), INT8_C( -98), INT8_C(-116), INT8_C( 118), INT8_C( 105), INT8_C( -99), INT8_C( 73), INT8_C( -46), INT8_C( 60), INT8_C( 67), INT8_C( 75), INT8_C(-111), INT8_C( 102), INT8_C( -17), INT8_C( -13), INT8_C( 71), INT8_C( 80), INT8_C( 109), INT8_C( -40)), simde_mm512_set_epi8(INT8_C( -77), INT8_C( 102), INT8_C( 95), INT8_C( -28), INT8_C( 18), INT8_C( -12), INT8_C( 60), INT8_C( -53), INT8_C( 29), INT8_C( 49), INT8_C( 11), INT8_C( -44), INT8_C( 116), INT8_C( 50), INT8_C(-112), INT8_C( 1), INT8_C( 6), INT8_C( 22), INT8_C( -56), INT8_C( -34), INT8_C(-113), INT8_C( -15), INT8_C( -61), INT8_C( 70), INT8_C( -23), INT8_C( 15), INT8_C( -49), INT8_C( -19), INT8_C( -17), INT8_C(-127), INT8_C( -34), INT8_C( 64), INT8_C( 96), INT8_C( -97), INT8_C( -92), INT8_C( 96), INT8_C( 90), INT8_C( -49), INT8_C(-117), INT8_C( 30), INT8_C(-114), INT8_C(-106), INT8_C( 10), INT8_C(-120), INT8_C( 27), INT8_C( -2), INT8_C( 89), INT8_C( 41), INT8_C( -25), INT8_C(-109), INT8_C( 120), INT8_C( -70), INT8_C( 60), INT8_C( -33), INT8_C( 121), INT8_C( 49), INT8_C( -80), INT8_C( 118), INT8_C( -64), INT8_C( 38), INT8_C( 20), INT8_C( -60), INT8_C( -96), INT8_C( -5)), simde_mm512_set_epi8(INT8_C( -8), INT8_C( -79), INT8_C( 44), INT8_C( 10), INT8_C( -63), INT8_C( 42), INT8_C( -90), INT8_C( -24), INT8_C( 22), INT8_C(-125), INT8_C( 11), INT8_C( 102), INT8_C( 91), INT8_C( 43), INT8_C( -88), INT8_C( 28), INT8_C( 54), INT8_C( -86), INT8_C( 118), INT8_C( 27), INT8_C( 44), INT8_C( 43), INT8_C( -70), INT8_C( 64), INT8_C( 66), INT8_C( 48), INT8_C( 77), INT8_C( 80), INT8_C(-111), INT8_C( 2), INT8_C( 103), INT8_C( 70), INT8_C( 106), INT8_C( -2), INT8_C( -89), INT8_C( -63), INT8_C(-100), INT8_C( 55), INT8_C( 87), INT8_C( -89), INT8_C(-102), INT8_C( -39), INT8_C( 103), INT8_C( 36), INT8_C( -91), INT8_C(-118), INT8_C( -79), INT8_C( -12), INT8_C( 93), INT8_C( 38), INT8_C( 122), INT8_C( -5), INT8_C(-102), INT8_C( 111), INT8_C( -84), INT8_C( -48), INT8_C( -9), INT8_C( -36), INT8_C( 57), INT8_C( 16), INT8_C(-105), INT8_C( 52), INT8_C( 86), INT8_C( 81)) }, { simde_mm512_set_epi8(INT8_C( 35), INT8_C( 81), INT8_C( 98), INT8_C( 19), INT8_C( 101), INT8_C( 68), INT8_C( -12), INT8_C( 76), INT8_C( 124), INT8_C(-120), INT8_C( -47), INT8_C( -64), INT8_C( 123), INT8_C( -63), INT8_C(-104), INT8_C( 112), INT8_C( 121), INT8_C( 5), INT8_C(-114), INT8_C( -67), INT8_C( -30), INT8_C( 49), INT8_C( -62), INT8_C( -64), INT8_C( 23), INT8_C( 30), INT8_C( 77), INT8_C( -34), INT8_C( -79), INT8_C( 41), INT8_C( 56), INT8_C( 80), INT8_C( 101), INT8_C( 101), INT8_C( 86), INT8_C( -92), INT8_C( 67), INT8_C( 78), INT8_C( 66), INT8_C( -34), INT8_C( -60), INT8_C( 101), INT8_C( -49), INT8_C( 58), INT8_C( 85), INT8_C( 98), INT8_C( -83), INT8_C( 120), INT8_C( -94), INT8_C( -69), INT8_C( 114), INT8_C( 51), INT8_C( -6), INT8_C( 41), INT8_C( 1), INT8_C( -18), INT8_C( 83), INT8_C( -45), INT8_C( 15), INT8_C( 29), INT8_C( 33), INT8_C( -73), INT8_C( 69), INT8_C( 101)), simde_mm512_set_epi8(INT8_C( -32), INT8_C(-108), INT8_C(-123), INT8_C( -29), INT8_C( 2), INT8_C( -87), INT8_C( -27), INT8_C( 55), INT8_C(-108), INT8_C( 107), INT8_C( 45), INT8_C( -42), INT8_C( -86), INT8_C(-117), INT8_C( 52), INT8_C(-105), INT8_C( -99), INT8_C(-127), INT8_C( 96), INT8_C( -45), INT8_C( -78), INT8_C( 74), INT8_C( -81), INT8_C(-117), INT8_C( -7), INT8_C( -62), INT8_C( -97), INT8_C( -59), INT8_C( -31), INT8_C(-122), INT8_C( 86), INT8_C( -86), INT8_C( -59), INT8_C( 56), INT8_C( -37), INT8_C( 107), INT8_C( -21), INT8_C( 121), INT8_C( 126), INT8_C( -35), INT8_C(-108), INT8_C( 116), INT8_C( 37), INT8_C( 25), INT8_C( 118), INT8_C(-106), INT8_C( 26), INT8_C( 84), INT8_C(-100), INT8_C( 47), INT8_C( 87), INT8_C( 88), INT8_C( 91), INT8_C( 11), INT8_C( 5), INT8_C(-103), INT8_C( 16), INT8_C( 32), INT8_C( 111), INT8_C( 4), INT8_C(-119), INT8_C( 25), INT8_C( 22), INT8_C( -98)), simde_mm512_set_epi8(INT8_C(-100), INT8_C( 38), INT8_C( 57), INT8_C( -9), INT8_C(-117), INT8_C(-114), INT8_C(-118), INT8_C( -82), INT8_C( 111), INT8_C(-114), INT8_C( -35), INT8_C( -78), INT8_C( -66), INT8_C( 20), INT8_C( 71), INT8_C(-108), INT8_C( 74), INT8_C(-117), INT8_C(-126), INT8_C( -4), INT8_C( 90), INT8_C(-121), INT8_C( 14), INT8_C( -10), INT8_C( -45), INT8_C( 75), INT8_C( -82), INT8_C( -81), INT8_C( 119), INT8_C( 8), INT8_C( 80), INT8_C( 31), INT8_C( -6), INT8_C( -6), INT8_C(-121), INT8_C( 47), INT8_C( 29), INT8_C( -97), INT8_C( -96), INT8_C( -20), INT8_C( 5), INT8_C( 62), INT8_C( 49), INT8_C( -44), INT8_C( -45), INT8_C( -11), INT8_C( -59), INT8_C( 54), INT8_C( -76), INT8_C( 100), INT8_C( -76), INT8_C( 94), INT8_C(-114), INT8_C( 78), INT8_C( -9), INT8_C( 84), INT8_C( 20), INT8_C(-124), INT8_C(-119), INT8_C( -84), INT8_C( 51), INT8_C( 74), INT8_C( -3), INT8_C( -5)) }, { simde_mm512_set_epi8(INT8_C( 66), INT8_C( -80), INT8_C( -63), INT8_C( 61), INT8_C( -93), INT8_C( -68), INT8_C( -6), INT8_C( 29), INT8_C( 23), INT8_C( -68), INT8_C( -72), INT8_C( -60), INT8_C( 43), INT8_C( -79), INT8_C( 121), INT8_C( -29), INT8_C( 97), INT8_C(-101), INT8_C( -12), INT8_C( -68), INT8_C( -5), INT8_C( 120), INT8_C( 60), INT8_C( 117), INT8_C(-113), INT8_C( 37), INT8_C( 65), INT8_C( 75), INT8_C( 114), INT8_C(-102), INT8_C( 95), INT8_C( -11), INT8_C( 60), INT8_C(-109), INT8_C( 126), INT8_C( -65), INT8_C( -87), INT8_C( 29), INT8_C( 126), INT8_C( 71), INT8_C( -89), INT8_C( 115), INT8_C( -78), INT8_C( 20), INT8_C( 106), INT8_C( 12), INT8_C( -71), INT8_C( -99), INT8_C( 22), INT8_C( -59), INT8_C( -40), INT8_C( 71), INT8_C( -27), INT8_C( -58), INT8_C( 93), INT8_C( 56), INT8_C( -74), INT8_C(-107), INT8_C( -37), INT8_C( -36), INT8_C( -90), INT8_C( 79), INT8_C( 105), INT8_C(-115)), simde_mm512_set_epi8(INT8_C( 97), INT8_C( -34), INT8_C( 16), INT8_C( 27), INT8_C( -61), INT8_C( 47), INT8_C( -46), INT8_C( -28), INT8_C( -1), INT8_C( 48), INT8_C( 49), INT8_C( -97), INT8_C( -82), INT8_C( -5), INT8_C(-101), INT8_C( 74), INT8_C( -12), INT8_C(-117), INT8_C( 2), INT8_C( 111), INT8_C( 103), INT8_C( 106), INT8_C( 49), INT8_C( 41), INT8_C( -15), INT8_C( -75), INT8_C( 18), INT8_C( 70), INT8_C( 6), INT8_C( -21), INT8_C( -83), INT8_C( -3), INT8_C( -80), INT8_C( -7), INT8_C( 18), INT8_C( 44), INT8_C( -49), INT8_C( -88), INT8_C( 47), INT8_C( -96), INT8_C( -84), INT8_C( -86), INT8_C( 82), INT8_C( -89), INT8_C( 103), INT8_C( 19), INT8_C( -12), INT8_C( 24), INT8_C( 26), INT8_C( 98), INT8_C( -35), INT8_C( -11), INT8_C( 90), INT8_C( -96), INT8_C( 20), INT8_C( 127), INT8_C(-100), INT8_C( 125), INT8_C( 105), INT8_C( -43), INT8_C( 115), INT8_C(-111), INT8_C( 110), INT8_C( 23)), simde_mm512_set_epi8(INT8_C( -88), INT8_C( 60), INT8_C( 57), INT8_C( 95), INT8_C( 49), INT8_C( -76), INT8_C( -65), INT8_C( -2), INT8_C( -29), INT8_C( -64), INT8_C( -39), INT8_C( -64), INT8_C( 91), INT8_C( 77), INT8_C( 108), INT8_C( 122), INT8_C( 27), INT8_C( 13), INT8_C(-102), INT8_C( 9), INT8_C( -51), INT8_C( 42), INT8_C( 10), INT8_C( 67), INT8_C( 7), INT8_C( -37), INT8_C( 75), INT8_C(-105), INT8_C( -16), INT8_C( -71), INT8_C( -54), INT8_C( 22), INT8_C( -49), INT8_C( -77), INT8_C(-119), INT8_C( 122), INT8_C( 113), INT8_C( 118), INT8_C(-119), INT8_C( 77), INT8_C( 78), INT8_C( -20), INT8_C( -53), INT8_C( -68), INT8_C( -77), INT8_C( -37), INT8_C( 78), INT8_C( 76), INT8_C(-125), INT8_C( -13), INT8_C( -5), INT8_C( 76), INT8_C( 89), INT8_C( -20), INT8_C(-105), INT8_C( -29), INT8_C( -34), INT8_C( -26), INT8_C( -28), INT8_C( 65), INT8_C( 101), INT8_C( -67), INT8_C( -16), INT8_C( 26)) }, { simde_mm512_set_epi8(INT8_C( 8), INT8_C( 123), INT8_C( 10), INT8_C( 2), INT8_C( 101), INT8_C( 99), INT8_C( 109), INT8_C( 10), INT8_C( -81), INT8_C(-128), INT8_C( -42), INT8_C( 28), INT8_C( -14), INT8_C( -60), INT8_C( 95), INT8_C( 76), INT8_C( 34), INT8_C( -72), INT8_C( -83), INT8_C(-103), INT8_C( 124), INT8_C( -24), INT8_C( -55), INT8_C( 22), INT8_C(-115), INT8_C( 43), INT8_C( 53), INT8_C(-100), INT8_C( 126), INT8_C( 123), INT8_C( 70), INT8_C( 79), INT8_C( 47), INT8_C( 116), INT8_C( 38), INT8_C( 25), INT8_C( -54), INT8_C( 18), INT8_C( -58), INT8_C(-123), INT8_C( 40), INT8_C( -54), INT8_C(-122), INT8_C( -64), INT8_C( 111), INT8_C( 66), INT8_C( 57), INT8_C( -34), INT8_C( 67), INT8_C( -68), INT8_C( -84), INT8_C( -7), INT8_C( -99), INT8_C( 12), INT8_C(-115), INT8_C( 121), INT8_C( 67), INT8_C( 39), INT8_C(-116), INT8_C( -98), INT8_C( 21), INT8_C(-107), INT8_C( 14), INT8_C( 51)), simde_mm512_set_epi8(INT8_C( -90), INT8_C( -61), INT8_C( 119), INT8_C( -75), INT8_C( -77), INT8_C( -66), INT8_C( -5), INT8_C(-119), INT8_C( -73), INT8_C( 56), INT8_C( 64), INT8_C( 21), INT8_C( 66), INT8_C( 53), INT8_C( 117), INT8_C( -41), INT8_C( 120), INT8_C(-102), INT8_C( -22), INT8_C( 29), INT8_C( 18), INT8_C( -50), INT8_C(-111), INT8_C( 88), INT8_C( -27), INT8_C( 98), INT8_C( 39), INT8_C( 14), INT8_C( -13), INT8_C( -52), INT8_C( -33), INT8_C( 115), INT8_C( 79), INT8_C( 6), INT8_C( -29), INT8_C( 19), INT8_C( -11), INT8_C( -57), INT8_C( -61), INT8_C( 26), INT8_C( 90), INT8_C( 106), INT8_C( 71), INT8_C( -8), INT8_C(-111), INT8_C( 50), INT8_C( 2), INT8_C( 100), INT8_C( -12), INT8_C( 6), INT8_C( 19), INT8_C( 121), INT8_C( 91), INT8_C( -20), INT8_C( -24), INT8_C( 37), INT8_C( 9), INT8_C( 18), INT8_C( 3), INT8_C( 25), INT8_C( -7), INT8_C( 60), INT8_C( 12), INT8_C( 112)), simde_mm512_set_epi8(INT8_C( -31), INT8_C( 15), INT8_C(-106), INT8_C( 118), INT8_C( -55), INT8_C(-109), INT8_C( 41), INT8_C(-106), INT8_C( 112), INT8_C(-128), INT8_C( -57), INT8_C( 1), INT8_C( 77), INT8_C( -67), INT8_C( -83), INT8_C( 62), INT8_C( 50), INT8_C( 22), INT8_C( -83), INT8_C( 91), INT8_C( -3), INT8_C( 104), INT8_C( 37), INT8_C( -60), INT8_C( -96), INT8_C( -11), INT8_C( -2), INT8_C( -91), INT8_C( 110), INT8_C( -42), INT8_C( 65), INT8_C( -4), INT8_C( 53), INT8_C( -53), INT8_C( -55), INT8_C( 101), INT8_C( 110), INT8_C( 118), INT8_C( -36), INT8_C( 59), INT8_C( -96), INT8_C( -6), INT8_C( -6), INT8_C(-106), INT8_C( -40), INT8_C( -23), INT8_C(-115), INT8_C( 71), INT8_C( -14), INT8_C( 79), INT8_C( 82), INT8_C( 9), INT8_C( 58), INT8_C( -38), INT8_C( 39), INT8_C( 104), INT8_C(-118), INT8_C( -54), INT8_C( 8), INT8_C( -76), INT8_C( -58), INT8_C( -42), INT8_C( 30), INT8_C( 16)) }, { simde_mm512_set_epi8(INT8_C( -97), INT8_C( 120), INT8_C( -74), INT8_C( 62), INT8_C( 90), INT8_C( -96), INT8_C( 75), INT8_C( -78), INT8_C( -6), INT8_C( -71), INT8_C( -63), INT8_C( 18), INT8_C( 60), INT8_C( 34), INT8_C( -5), INT8_C( -95), INT8_C( 20), INT8_C( -75), INT8_C(-106), INT8_C( -37), INT8_C( 52), INT8_C( 127), INT8_C(-121), INT8_C( 89), INT8_C( 64), INT8_C( 32), INT8_C(-117), INT8_C( -70), INT8_C( 112), INT8_C( -57), INT8_C( 67), INT8_C( 101), INT8_C( -17), INT8_C( 32), INT8_C( 117), INT8_C( 37), INT8_C( -21), INT8_C( 53), INT8_C( 33), INT8_C(-118), INT8_C( -95), INT8_C( 107), INT8_C( 7), INT8_C( 73), INT8_C( 99), INT8_C( 101), INT8_C( 84), INT8_C( -74), INT8_C( -8), INT8_C( 64), INT8_C( 124), INT8_C( -76), INT8_C( 79), INT8_C( 2), INT8_C( 35), INT8_C( -37), INT8_C( -29), INT8_C( -61), INT8_C( -22), INT8_C( -1), INT8_C( -84), INT8_C( 62), INT8_C(-103), INT8_C( 64)), simde_mm512_set_epi8(INT8_C( -96), INT8_C( 30), INT8_C( 11), INT8_C( -55), INT8_C( -14), INT8_C(-106), INT8_C( 14), INT8_C( -64), INT8_C( 108), INT8_C( -88), INT8_C(-108), INT8_C( 18), INT8_C( 38), INT8_C( 83), INT8_C( 5), INT8_C( 37), INT8_C( -18), INT8_C( 57), INT8_C( -44), INT8_C( -90), INT8_C( 25), INT8_C(-119), INT8_C( 27), INT8_C( -2), INT8_C( -43), INT8_C( 122), INT8_C( 24), INT8_C( 50), INT8_C( 13), INT8_C( -1), INT8_C( 74), INT8_C( -40), INT8_C( 45), INT8_C(-118), INT8_C( -70), INT8_C( 117), INT8_C( -65), INT8_C( 72), INT8_C(-106), INT8_C( 16), INT8_C( -24), INT8_C( -94), INT8_C( 77), INT8_C( -48), INT8_C( -36), INT8_C( 38), INT8_C( -91), INT8_C( -47), INT8_C( 42), INT8_C(-103), INT8_C( -94), INT8_C( 107), INT8_C( -44), INT8_C( 48), INT8_C( -99), INT8_C( 117), INT8_C( 113), INT8_C( 88), INT8_C(-102), INT8_C( 23), INT8_C( -5), INT8_C( 85), INT8_C( -40), INT8_C( -45)), simde_mm512_set_epi8(INT8_C( -36), INT8_C( -12), INT8_C(-113), INT8_C( 120), INT8_C(-109), INT8_C( -87), INT8_C( -83), INT8_C( -19), INT8_C( -94), INT8_C( 91), INT8_C( -58), INT8_C( 21), INT8_C( 104), INT8_C( -86), INT8_C( 66), INT8_C( 116), INT8_C( 90), INT8_C( 14), INT8_C( 62), INT8_C( -73), INT8_C( -47), INT8_C( 28), INT8_C(-102), INT8_C( -45), INT8_C( -30), INT8_C( 43), INT8_C( 13), INT8_C( -71), INT8_C( 103), INT8_C( 41), INT8_C( -71), INT8_C( -56), INT8_C( 67), INT8_C( 28), INT8_C( -88), INT8_C( 92), INT8_C( 26), INT8_C(-128), INT8_C( 5), INT8_C( 54), INT8_C( 125), INT8_C( 12), INT8_C(-109), INT8_C( 77), INT8_C( 25), INT8_C( 79), INT8_C( 112), INT8_C( 119), INT8_C( -37), INT8_C(-103), INT8_C( 93), INT8_C( -40), INT8_C( -59), INT8_C( 12), INT8_C( 107), INT8_C( -79), INT8_C( 18), INT8_C( 3), INT8_C( -3), INT8_C(-109), INT8_C( -70), INT8_C( 13), INT8_C( -59), INT8_C( -14)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_gf2p8affine_epi64_epi8(test_vec[i].x, test_vec[i].A, INT8_C( 1)); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_mask_gf2p8affine_epi64_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i src; simde__mmask16 k; simde__m128i x; simde__m128i A; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( -53), INT8_C( 109), INT8_C( -72), INT8_C( -6), INT8_C( 112), INT8_C( 75), INT8_C( -92), INT8_C( 12), INT8_C( 98), INT8_C( -63), INT8_C( 64), INT8_C( -41), INT8_C( 43), INT8_C( -55), INT8_C( -46), INT8_C( 88)), UINT16_C( 5407), simde_mm_set_epi8(INT8_C( 3), INT8_C( -64), INT8_C( 77), INT8_C( -56), INT8_C(-127), INT8_C( -33), INT8_C( 83), INT8_C( 46), INT8_C( -91), INT8_C( 125), INT8_C( -59), INT8_C( -28), INT8_C(-110), INT8_C( 94), INT8_C( 67), INT8_C( 64)), simde_mm_set_epi8(INT8_C( -18), INT8_C( 56), INT8_C( -33), INT8_C( 5), INT8_C( -31), INT8_C( 37), INT8_C( 43), INT8_C( -26), INT8_C( -84), INT8_C( 85), INT8_C( 28), INT8_C( -96), INT8_C( -54), INT8_C( 79), INT8_C( 16), INT8_C( 119)), simde_mm_set_epi8(INT8_C( -53), INT8_C( 109), INT8_C( -72), INT8_C( 103), INT8_C( 112), INT8_C( 117), INT8_C( -92), INT8_C( -4), INT8_C( 98), INT8_C( -63), INT8_C( 64), INT8_C( -91), INT8_C( 79), INT8_C( 118), INT8_C(-128), INT8_C(-110)) }, { simde_mm_set_epi8(INT8_C( 117), INT8_C( -64), INT8_C( 56), INT8_C( -56), INT8_C( 44), INT8_C( -78), INT8_C( 20), INT8_C( -85), INT8_C( -38), INT8_C( -31), INT8_C( -26), INT8_C(-115), INT8_C(-118), INT8_C( 4), INT8_C( -21), INT8_C( 76)), UINT16_C(23105), simde_mm_set_epi8(INT8_C( 122), INT8_C( -86), INT8_C( 110), INT8_C( -49), INT8_C( -67), INT8_C( 44), INT8_C( -99), INT8_C(-105), INT8_C( 48), INT8_C( 110), INT8_C( -78), INT8_C( 59), INT8_C(-113), INT8_C(-109), INT8_C( 95), INT8_C( -41)), simde_mm_set_epi8(INT8_C( 96), INT8_C( 1), INT8_C( 107), INT8_C( 0), INT8_C( 79), INT8_C( -78), INT8_C( 84), INT8_C( 107), INT8_C( 99), INT8_C( 90), INT8_C( 101), INT8_C( -53), INT8_C( 26), INT8_C( 105), INT8_C( -25), INT8_C( 22)), simde_mm_set_epi8(INT8_C( 117), INT8_C(-123), INT8_C( 56), INT8_C( 51), INT8_C(-105), INT8_C( -78), INT8_C( 50), INT8_C( -85), INT8_C( -38), INT8_C( 15), INT8_C( -26), INT8_C(-115), INT8_C(-118), INT8_C( 4), INT8_C( -21), INT8_C( -25)) }, { simde_mm_set_epi8(INT8_C( 117), INT8_C( -4), INT8_C( -7), INT8_C( 29), INT8_C(-111), INT8_C( 111), INT8_C( -38), INT8_C( -85), INT8_C( -27), INT8_C( 107), INT8_C( -26), INT8_C( -23), INT8_C( -57), INT8_C( 127), INT8_C( -37), INT8_C( 116)), UINT16_C(46922), simde_mm_set_epi8(INT8_C( 35), INT8_C( 35), INT8_C( -52), INT8_C( 65), INT8_C( -57), INT8_C( 52), INT8_C(-105), INT8_C( -52), INT8_C( -3), INT8_C( -72), INT8_C( 12), INT8_C( -81), INT8_C( 102), INT8_C( 5), INT8_C( 69), INT8_C( -78)), simde_mm_set_epi8(INT8_C( 36), INT8_C( -42), INT8_C( 126), INT8_C( 34), INT8_C( 123), INT8_C( 57), INT8_C( 98), INT8_C( 50), INT8_C( 11), INT8_C( 55), INT8_C( 39), INT8_C( -87), INT8_C( 121), INT8_C( -10), INT8_C( 48), INT8_C( -27)), simde_mm_set_epi8(INT8_C( 51), INT8_C( -4), INT8_C( 71), INT8_C( 70), INT8_C(-111), INT8_C( 108), INT8_C( 125), INT8_C( 71), INT8_C( -27), INT8_C( 29), INT8_C( -26), INT8_C( -23), INT8_C( -17), INT8_C( 127), INT8_C( -87), INT8_C( 116)) }, { simde_mm_set_epi8(INT8_C( 49), INT8_C( -42), INT8_C( 99), INT8_C( -33), INT8_C( 102), INT8_C( -69), INT8_C( 80), INT8_C( 28), INT8_C( -57), INT8_C(-105), INT8_C( -54), INT8_C( 29), INT8_C( 127), INT8_C(-103), INT8_C( -56), INT8_C( -9)), UINT16_C(13222), simde_mm_set_epi8(INT8_C( 63), INT8_C( 76), INT8_C( -21), INT8_C( 103), INT8_C( -99), INT8_C( -8), INT8_C( -71), INT8_C( 6), INT8_C( -27), INT8_C( 107), INT8_C( 23), INT8_C( 20), INT8_C( -71), INT8_C( 77), INT8_C( 119), INT8_C(-117)), simde_mm_set_epi8(INT8_C( 7), INT8_C( 81), INT8_C( -54), INT8_C(-116), INT8_C( 110), INT8_C(-127), INT8_C( 83), INT8_C( 47), INT8_C( -56), INT8_C(-107), INT8_C( -7), INT8_C( 36), INT8_C( -9), INT8_C( 1), INT8_C( 85), INT8_C( -61)), simde_mm_set_epi8(INT8_C( 49), INT8_C( -42), INT8_C( 96), INT8_C( 73), INT8_C( 102), INT8_C( -69), INT8_C( -95), INT8_C( 108), INT8_C( -46), INT8_C(-105), INT8_C( 74), INT8_C( 29), INT8_C( 127), INT8_C( 92), INT8_C(-125), INT8_C( -9)) }, { simde_mm_set_epi8(INT8_C( -28), INT8_C( 17), INT8_C( 126), INT8_C( 55), INT8_C( -19), INT8_C( 71), INT8_C( 38), INT8_C(-110), INT8_C( -94), INT8_C( 23), INT8_C( -88), INT8_C( -8), INT8_C( 30), INT8_C( -59), INT8_C( 51), INT8_C( -12)), UINT16_C(30769), simde_mm_set_epi8(INT8_C( -20), INT8_C( 56), INT8_C( 74), INT8_C( 73), INT8_C( 35), INT8_C( 7), INT8_C( 57), INT8_C( -59), INT8_C( -71), INT8_C( -43), INT8_C(-119), INT8_C( 46), INT8_C( -19), INT8_C( 31), INT8_C( 36), INT8_C( -80)), simde_mm_set_epi8(INT8_C(-105), INT8_C( 21), INT8_C( -18), INT8_C( 101), INT8_C(-105), INT8_C( -48), INT8_C( 19), INT8_C(-120), INT8_C( 94), INT8_C(-111), INT8_C( 11), INT8_C( -31), INT8_C(-128), INT8_C( 1), INT8_C(-123), INT8_C( 66)), simde_mm_set_epi8(INT8_C( -28), INT8_C( -37), INT8_C( -35), INT8_C( -45), INT8_C( 34), INT8_C( 71), INT8_C( 38), INT8_C(-110), INT8_C( -94), INT8_C( 23), INT8_C( 17), INT8_C( -23), INT8_C( 30), INT8_C( -59), INT8_C( 51), INT8_C( 113)) }, { simde_mm_set_epi8(INT8_C( -95), INT8_C( -71), INT8_C( 70), INT8_C( -61), INT8_C( -28), INT8_C( 74), INT8_C( 70), INT8_C( 59), INT8_C( -6), INT8_C( -54), INT8_C( 90), INT8_C( 108), INT8_C( -12), INT8_C( 40), INT8_C( 67), INT8_C( -19)), UINT16_C(59146), simde_mm_set_epi8(INT8_C( 38), INT8_C( 113), INT8_C( -51), INT8_C( 35), INT8_C( -49), INT8_C( 20), INT8_C( 32), INT8_C( 74), INT8_C( -76), INT8_C(-126), INT8_C( -53), INT8_C( -83), INT8_C( -23), INT8_C( 109), INT8_C( 7), INT8_C( 82)), simde_mm_set_epi8(INT8_C( -75), INT8_C( -32), INT8_C(-100), INT8_C( 104), INT8_C( -83), INT8_C( 7), INT8_C( 51), INT8_C( -62), INT8_C(-102), INT8_C( 127), INT8_C( 113), INT8_C( 115), INT8_C( 31), INT8_C( 20), INT8_C( 95), INT8_C( 34)), simde_mm_set_epi8(INT8_C( -82), INT8_C( -59), INT8_C( 101), INT8_C( -61), INT8_C( -28), INT8_C( 80), INT8_C( 123), INT8_C( 86), INT8_C( -6), INT8_C( -54), INT8_C( 90), INT8_C( 108), INT8_C( -20), INT8_C( 40), INT8_C( -41), INT8_C( -19)) }, { simde_mm_set_epi8(INT8_C( 17), INT8_C( 122), INT8_C( -7), INT8_C( -75), INT8_C( 76), INT8_C( 29), INT8_C( -90), INT8_C( 121), INT8_C( 121), INT8_C( 21), INT8_C( 69), INT8_C( 30), INT8_C( 36), INT8_C( -84), INT8_C( 26), INT8_C( 107)), UINT16_C(23704), simde_mm_set_epi8(INT8_C( -95), INT8_C( -68), INT8_C( -65), INT8_C( 63), INT8_C( -44), INT8_C( -41), INT8_C( -55), INT8_C( 46), INT8_C( -5), INT8_C( -8), INT8_C( 32), INT8_C( 101), INT8_C(-121), INT8_C( 60), INT8_C( -4), INT8_C( 40)), simde_mm_set_epi8(INT8_C( -4), INT8_C( -76), INT8_C( 74), INT8_C( -56), INT8_C( -36), INT8_C( 121), INT8_C(-105), INT8_C( 109), INT8_C( 83), INT8_C( 99), INT8_C( -62), INT8_C( -33), INT8_C( 15), INT8_C( 27), INT8_C(-106), INT8_C( 56)), simde_mm_set_epi8(INT8_C( 17), INT8_C( -59), INT8_C( -7), INT8_C( 58), INT8_C( 102), INT8_C( -62), INT8_C( -90), INT8_C( 121), INT8_C( -12), INT8_C( 21), INT8_C( 69), INT8_C( -50), INT8_C( 112), INT8_C( -84), INT8_C( 26), INT8_C( 107)) }, { simde_mm_set_epi8(INT8_C( -94), INT8_C( -39), INT8_C( 28), INT8_C( 17), INT8_C( 32), INT8_C( -23), INT8_C( -32), INT8_C( 102), INT8_C( 60), INT8_C( 10), INT8_C(-120), INT8_C( -82), INT8_C( -91), INT8_C( -44), INT8_C( 8), INT8_C( -86)), UINT16_C(18462), simde_mm_set_epi8(INT8_C( 121), INT8_C( -31), INT8_C(-109), INT8_C( -76), INT8_C( 27), INT8_C( -91), INT8_C( -72), INT8_C(-121), INT8_C( 100), INT8_C( 45), INT8_C( -84), INT8_C( 24), INT8_C( 66), INT8_C( 120), INT8_C( 55), INT8_C( 85)), simde_mm_set_epi8(INT8_C(-124), INT8_C( 103), INT8_C( -54), INT8_C( -95), INT8_C( -17), INT8_C( -76), INT8_C( 88), INT8_C(-114), INT8_C( 2), INT8_C( -31), INT8_C(-107), INT8_C( 94), INT8_C( 104), INT8_C( -6), INT8_C( 21), INT8_C( 36)), simde_mm_set_epi8(INT8_C( -94), INT8_C( -21), INT8_C( 28), INT8_C( 17), INT8_C( 24), INT8_C( -23), INT8_C( -32), INT8_C( 102), INT8_C( 60), INT8_C( 10), INT8_C(-120), INT8_C( 116), INT8_C( 51), INT8_C( -4), INT8_C( 93), INT8_C( -86)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_mask_gf2p8affine_epi64_epi8(test_vec[i].src, test_vec[i].k, test_vec[i].x, test_vec[i].A, INT8_C( 32)); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_mask_gf2p8affine_epi64_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i src; simde__mmask32 k; simde__m256i x; simde__m256i A; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi8(INT8_C( -91), INT8_C( 125), INT8_C( -59), INT8_C( -28), INT8_C(-110), INT8_C( 94), INT8_C( 67), INT8_C( 64), INT8_C( -85), INT8_C( 127), INT8_C( -76), INT8_C(-117), INT8_C( 37), INT8_C(-123), INT8_C( 21), INT8_C( 31), INT8_C( -53), INT8_C( 109), INT8_C( -72), INT8_C( -6), INT8_C( 112), INT8_C( 75), INT8_C( -92), INT8_C( 12), INT8_C( 98), INT8_C( -63), INT8_C( 64), INT8_C( -41), INT8_C( 43), INT8_C( -55), INT8_C( -46), INT8_C( 88)), UINT32_C(2178896686), simde_mm256_set_epi8(INT8_C( 117), INT8_C( -64), INT8_C( 56), INT8_C( -56), INT8_C( 44), INT8_C( -78), INT8_C( 20), INT8_C( -85), INT8_C( -38), INT8_C( -31), INT8_C( -26), INT8_C(-115), INT8_C(-118), INT8_C( 4), INT8_C( -21), INT8_C( 76), INT8_C( -18), INT8_C( 56), INT8_C( -33), INT8_C( 5), INT8_C( -31), INT8_C( 37), INT8_C( 43), INT8_C( -26), INT8_C( -84), INT8_C( 85), INT8_C( 28), INT8_C( -96), INT8_C( -54), INT8_C( 79), INT8_C( 16), INT8_C( 119)), simde_mm256_set_epi8(INT8_C( 99), INT8_C( 90), INT8_C( 101), INT8_C( -53), INT8_C( 26), INT8_C( 105), INT8_C( -25), INT8_C( 22), INT8_C( 122), INT8_C( -86), INT8_C( 110), INT8_C( -49), INT8_C( -67), INT8_C( 44), INT8_C( -99), INT8_C(-105), INT8_C( 48), INT8_C( 110), INT8_C( -78), INT8_C( 59), INT8_C(-113), INT8_C(-109), INT8_C( 95), INT8_C( -41), INT8_C( -26), INT8_C( 72), INT8_C( 78), INT8_C( 30), INT8_C( 13), INT8_C( 4), INT8_C( 90), INT8_C( 65)), simde_mm256_set_epi8(INT8_C( 24), INT8_C( 125), INT8_C( -59), INT8_C( -28), INT8_C(-110), INT8_C( 94), INT8_C( 67), INT8_C(-120), INT8_C( -33), INT8_C( 17), INT8_C( -76), INT8_C( -88), INT8_C( 3), INT8_C( -43), INT8_C( -31), INT8_C( -89), INT8_C( -53), INT8_C(-111), INT8_C( -72), INT8_C( 3), INT8_C( 112), INT8_C( 75), INT8_C( 122), INT8_C( 124), INT8_C( 98), INT8_C( -63), INT8_C( 2), INT8_C( -41), INT8_C( -4), INT8_C( 80), INT8_C( 97), INT8_C( 88)) }, { simde_mm256_set_epi8(INT8_C( -14), INT8_C( 79), INT8_C( 79), INT8_C( -73), INT8_C( 50), INT8_C( -99), INT8_C( -73), INT8_C( 74), INT8_C( 117), INT8_C( -4), INT8_C( -7), INT8_C( 29), INT8_C(-111), INT8_C( 111), INT8_C( -38), INT8_C( -85), INT8_C( -27), INT8_C( 107), INT8_C( -26), INT8_C( -23), INT8_C( -57), INT8_C( 127), INT8_C( -37), INT8_C( 116), INT8_C( 96), INT8_C( 1), INT8_C( 107), INT8_C( 0), INT8_C( 79), INT8_C( -78), INT8_C( 84), INT8_C( 107)), UINT32_C(1711621554), simde_mm256_set_epi8(INT8_C( -57), INT8_C(-105), INT8_C( -54), INT8_C( 29), INT8_C( 127), INT8_C(-103), INT8_C( -56), INT8_C( -9), INT8_C( 36), INT8_C( -42), INT8_C( 126), INT8_C( 34), INT8_C( 123), INT8_C( 57), INT8_C( 98), INT8_C( 50), INT8_C( 11), INT8_C( 55), INT8_C( 39), INT8_C( -87), INT8_C( 121), INT8_C( -10), INT8_C( 48), INT8_C( -27), INT8_C( 35), INT8_C( 35), INT8_C( -52), INT8_C( 65), INT8_C( -57), INT8_C( 52), INT8_C(-105), INT8_C( -52)), simde_mm256_set_epi8(INT8_C( 63), INT8_C( 76), INT8_C( -21), INT8_C( 103), INT8_C( -99), INT8_C( -8), INT8_C( -71), INT8_C( 6), INT8_C( -27), INT8_C( 107), INT8_C( 23), INT8_C( 20), INT8_C( -71), INT8_C( 77), INT8_C( 119), INT8_C(-117), INT8_C( -71), INT8_C( 102), INT8_C( -10), INT8_C( -14), INT8_C( 15), INT8_C( 106), INT8_C( 51), INT8_C( -90), INT8_C( 49), INT8_C( -42), INT8_C( 99), INT8_C( -33), INT8_C( 102), INT8_C( -69), INT8_C( 80), INT8_C( 28)), simde_mm256_set_epi8(INT8_C( -14), INT8_C( 103), INT8_C(-119), INT8_C( -73), INT8_C( 50), INT8_C( 6), INT8_C( 4), INT8_C( 74), INT8_C( 117), INT8_C( -4), INT8_C( -7), INT8_C( 29), INT8_C(-111), INT8_C( 99), INT8_C( -38), INT8_C( -32), INT8_C( -27), INT8_C( -78), INT8_C( -26), INT8_C( -23), INT8_C( -57), INT8_C( 64), INT8_C( -37), INT8_C( -94), INT8_C( 15), INT8_C( 1), INT8_C( 111), INT8_C( 90), INT8_C( 79), INT8_C( -78), INT8_C( 97), INT8_C( 107)) }, { simde_mm256_set_epi8(INT8_C( -28), INT8_C( 17), INT8_C( 126), INT8_C( 55), INT8_C( -19), INT8_C( 71), INT8_C( 38), INT8_C(-110), INT8_C( -94), INT8_C( 23), INT8_C( -88), INT8_C( -8), INT8_C( 30), INT8_C( -59), INT8_C( 51), INT8_C( -12), INT8_C( 7), INT8_C( 81), INT8_C( -54), INT8_C(-116), INT8_C( 110), INT8_C(-127), INT8_C( 83), INT8_C( 47), INT8_C( -56), INT8_C(-107), INT8_C( -7), INT8_C( 36), INT8_C( -9), INT8_C( 1), INT8_C( 85), INT8_C( -61)), UINT32_C(2266658865), simde_mm256_set_epi8(INT8_C(-105), INT8_C( 21), INT8_C( -18), INT8_C( 101), INT8_C(-105), INT8_C( -48), INT8_C( 19), INT8_C(-120), INT8_C( 94), INT8_C(-111), INT8_C( 11), INT8_C( -31), INT8_C(-128), INT8_C( 1), INT8_C(-123), INT8_C( 66), INT8_C( -20), INT8_C( 56), INT8_C( 74), INT8_C( 73), INT8_C( 35), INT8_C( 7), INT8_C( 57), INT8_C( -59), INT8_C( -71), INT8_C( -43), INT8_C(-119), INT8_C( 46), INT8_C( -19), INT8_C( 31), INT8_C( 36), INT8_C( -80)), simde_mm256_set_epi8(INT8_C( -76), INT8_C(-126), INT8_C( -53), INT8_C( -83), INT8_C( -23), INT8_C( 109), INT8_C( 7), INT8_C( 82), INT8_C( 96), INT8_C(-118), INT8_C( -29), INT8_C( 25), INT8_C( -59), INT8_C( -4), INT8_C( -25), INT8_C( 10), INT8_C( -95), INT8_C( -71), INT8_C( 70), INT8_C( -61), INT8_C( -28), INT8_C( 74), INT8_C( 70), INT8_C( 59), INT8_C( -6), INT8_C( -54), INT8_C( 90), INT8_C( 108), INT8_C( -12), INT8_C( 40), INT8_C( 67), INT8_C( -19)), simde_mm256_set_epi8(INT8_C( 100), INT8_C( 17), INT8_C( 126), INT8_C( 55), INT8_C( -19), INT8_C( 3), INT8_C( 18), INT8_C( 10), INT8_C( -94), INT8_C( 23), INT8_C( -88), INT8_C( 19), INT8_C( 95), INT8_C( -59), INT8_C( 115), INT8_C( -12), INT8_C( 7), INT8_C(-102), INT8_C( 27), INT8_C( 124), INT8_C( -35), INT8_C(-127), INT8_C( 83), INT8_C( 47), INT8_C( -56), INT8_C(-107), INT8_C( -43), INT8_C( -32), INT8_C( -9), INT8_C( 1), INT8_C( 85), INT8_C( 22)) }, { simde_mm256_set_epi8(INT8_C( 121), INT8_C( 21), INT8_C( 69), INT8_C( 30), INT8_C( 36), INT8_C( -84), INT8_C( 26), INT8_C( 107), INT8_C( -75), INT8_C( -32), INT8_C(-100), INT8_C( 104), INT8_C( -83), INT8_C( 7), INT8_C( 51), INT8_C( -62), INT8_C(-102), INT8_C( 127), INT8_C( 113), INT8_C( 115), INT8_C( 31), INT8_C( 20), INT8_C( 95), INT8_C( 34), INT8_C( 38), INT8_C( 113), INT8_C( -51), INT8_C( 35), INT8_C( -49), INT8_C( 20), INT8_C( 32), INT8_C( 74)), UINT32_C(1277011577), simde_mm256_set_epi8(INT8_C( 83), INT8_C( 99), INT8_C( -62), INT8_C( -33), INT8_C( 15), INT8_C( 27), INT8_C(-106), INT8_C( 56), INT8_C( -95), INT8_C( -68), INT8_C( -65), INT8_C( 63), INT8_C( -44), INT8_C( -41), INT8_C( -55), INT8_C( 46), INT8_C( -5), INT8_C( -8), INT8_C( 32), INT8_C( 101), INT8_C(-121), INT8_C( 60), INT8_C( -4), INT8_C( 40), INT8_C( 49), INT8_C(-121), INT8_C( -33), INT8_C( -15), INT8_C( 124), INT8_C( 101), INT8_C( 92), INT8_C(-104)), simde_mm256_set_epi8(INT8_C( 84), INT8_C( -52), INT8_C( 27), INT8_C( -95), INT8_C( -59), INT8_C( -38), INT8_C( 72), INT8_C( 30), INT8_C( -94), INT8_C( -39), INT8_C( 28), INT8_C( 17), INT8_C( 32), INT8_C( -23), INT8_C( -32), INT8_C( 102), INT8_C( 60), INT8_C( 10), INT8_C(-120), INT8_C( -82), INT8_C( -91), INT8_C( -44), INT8_C( 8), INT8_C( -86), INT8_C( -4), INT8_C( -76), INT8_C( 74), INT8_C( -56), INT8_C( -36), INT8_C( 121), INT8_C(-105), INT8_C( 109)), simde_mm256_set_epi8(INT8_C( 121), INT8_C( -22), INT8_C( 69), INT8_C( 30), INT8_C( -28), INT8_C( -46), INT8_C( 26), INT8_C( 107), INT8_C( -75), INT8_C( -32), INT8_C(-100), INT8_C( -33), INT8_C( 34), INT8_C(-119), INT8_C( 51), INT8_C( -5), INT8_C( 88), INT8_C( 127), INT8_C( -80), INT8_C( 115), INT8_C( 31), INT8_C( 103), INT8_C( -5), INT8_C( 34), INT8_C( 38), INT8_C( 5), INT8_C( 118), INT8_C( -1), INT8_C( 42), INT8_C( 20), INT8_C( 32), INT8_C( -68)) }, { simde_mm256_set_epi8(INT8_C(-124), INT8_C( 103), INT8_C( -54), INT8_C( -95), INT8_C( -17), INT8_C( -76), INT8_C( 88), INT8_C(-114), INT8_C( 2), INT8_C( -31), INT8_C(-107), INT8_C( 94), INT8_C( 104), INT8_C( -6), INT8_C( 21), INT8_C( 36), INT8_C( 121), INT8_C( -31), INT8_C(-109), INT8_C( -76), INT8_C( 27), INT8_C( -91), INT8_C( -72), INT8_C(-121), INT8_C( 100), INT8_C( 45), INT8_C( -84), INT8_C( 24), INT8_C( 66), INT8_C( 120), INT8_C( 55), INT8_C( 85)), UINT32_C(3331867243), simde_mm256_set_epi8(INT8_C( -14), INT8_C(-126), INT8_C( -83), INT8_C( 96), INT8_C( -66), INT8_C( 90), INT8_C( -90), INT8_C( -89), INT8_C( -40), INT8_C( -65), INT8_C( 123), INT8_C( -41), INT8_C( -49), INT8_C( 85), INT8_C( 43), INT8_C( -95), INT8_C( 5), INT8_C( -81), INT8_C( 110), INT8_C( -86), INT8_C( 44), INT8_C( -55), INT8_C( -28), INT8_C( -56), INT8_C( 76), INT8_C( -15), INT8_C(-120), INT8_C( -25), INT8_C( 79), INT8_C( 84), INT8_C( 15), INT8_C( 29)), simde_mm256_set_epi8(INT8_C( -17), INT8_C( 62), INT8_C( 30), INT8_C( 74), INT8_C( 10), INT8_C( 78), INT8_C( -4), INT8_C( -8), INT8_C( -3), INT8_C(-107), INT8_C( 8), INT8_C( 87), INT8_C(-103), INT8_C( 34), INT8_C( 54), INT8_C( 10), INT8_C( 84), INT8_C(-128), INT8_C(-105), INT8_C( 112), INT8_C( -50), INT8_C( -23), INT8_C( -84), INT8_C( -97), INT8_C( -21), INT8_C( -52), INT8_C( 24), INT8_C( -62), INT8_C( 125), INT8_C(-108), INT8_C( -58), INT8_C( 91)), simde_mm256_set_epi8(INT8_C( 59), INT8_C( -41), INT8_C( -54), INT8_C( -95), INT8_C( -17), INT8_C( -58), INT8_C( 115), INT8_C(-114), INT8_C( -3), INT8_C( -31), INT8_C(-107), INT8_C( -48), INT8_C( 30), INT8_C( -6), INT8_C( 21), INT8_C( 36), INT8_C( 121), INT8_C( -94), INT8_C(-109), INT8_C( -76), INT8_C( 27), INT8_C( -91), INT8_C( 91), INT8_C(-121), INT8_C( 100), INT8_C( -83), INT8_C( -43), INT8_C( 24), INT8_C( 79), INT8_C( 120), INT8_C(-108), INT8_C( -23)) }, { simde_mm256_set_epi8(INT8_C( -75), INT8_C( 81), INT8_C( 40), INT8_C( -29), INT8_C( -6), INT8_C( -70), INT8_C( -30), INT8_C( -15), INT8_C( 74), INT8_C( -68), INT8_C( -59), INT8_C( -94), INT8_C( 110), INT8_C( 112), INT8_C( -76), INT8_C( -57), INT8_C(-115), INT8_C( -95), INT8_C( 123), INT8_C( -26), INT8_C( -67), INT8_C( -55), INT8_C( 111), INT8_C( 39), INT8_C( 74), INT8_C( 65), INT8_C( 50), INT8_C( -20), INT8_C( -17), INT8_C( 24), INT8_C( 43), INT8_C( 11)), UINT32_C(2569613341), simde_mm256_set_epi8(INT8_C( 17), INT8_C( -34), INT8_C( 64), INT8_C( 47), INT8_C( 37), INT8_C( -59), INT8_C( 63), INT8_C( -84), INT8_C( 92), INT8_C(-115), INT8_C(-116), INT8_C( -81), INT8_C( 37), INT8_C(-120), INT8_C( -30), INT8_C( 9), INT8_C( 94), INT8_C( 16), INT8_C( 4), INT8_C(-128), INT8_C(-108), INT8_C(-101), INT8_C( -9), INT8_C( 51), INT8_C( 65), INT8_C( 86), INT8_C( -18), INT8_C( -72), INT8_C( 59), INT8_C(-108), INT8_C(-124), INT8_C( -72)), simde_mm256_set_epi8(INT8_C( -62), INT8_C( 100), INT8_C( 93), INT8_C( -47), INT8_C( 119), INT8_C( -97), INT8_C( 60), INT8_C( -11), INT8_C( -73), INT8_C( 49), INT8_C(-118), INT8_C( 27), INT8_C( -43), INT8_C( -52), INT8_C( 6), INT8_C( 113), INT8_C( 97), INT8_C( 119), INT8_C( -54), INT8_C( -41), INT8_C( 46), INT8_C( -26), INT8_C( 13), INT8_C(-110), INT8_C( -4), INT8_C( 30), INT8_C( 9), INT8_C( 116), INT8_C( -89), INT8_C( 7), INT8_C( 11), INT8_C( 83)), simde_mm256_set_epi8(INT8_C( 105), INT8_C( 81), INT8_C( 40), INT8_C( -28), INT8_C( -79), INT8_C( -70), INT8_C( -30), INT8_C( -64), INT8_C( 74), INT8_C( -68), INT8_C( 65), INT8_C( -94), INT8_C( 64), INT8_C( 112), INT8_C( -76), INT8_C( -98), INT8_C(-115), INT8_C( -95), INT8_C( 83), INT8_C(-123), INT8_C( -67), INT8_C( -55), INT8_C( 111), INT8_C( 39), INT8_C( 74), INT8_C( 65), INT8_C( 50), INT8_C( -19), INT8_C( -6), INT8_C(-120), INT8_C( 43), INT8_C( -19)) }, { simde_mm256_set_epi8(INT8_C( -10), INT8_C( 41), INT8_C( -97), INT8_C(-113), INT8_C( 11), INT8_C(-128), INT8_C( -95), INT8_C(-126), INT8_C( 71), INT8_C( 54), INT8_C( 41), INT8_C( 29), INT8_C( 68), INT8_C( -53), INT8_C( -78), INT8_C( 122), INT8_C(-110), INT8_C(-109), INT8_C( -52), INT8_C( -84), INT8_C( 98), INT8_C( -88), INT8_C(-117), INT8_C( 7), INT8_C(-106), INT8_C( -20), INT8_C( 29), INT8_C( -5), INT8_C( -21), INT8_C( 109), INT8_C( -97), INT8_C( -60)), UINT32_C(3674607040), simde_mm256_set_epi8(INT8_C( 51), INT8_C( 98), INT8_C(-100), INT8_C( 4), INT8_C( -58), INT8_C( 111), INT8_C( 124), INT8_C( -27), INT8_C( -60), INT8_C( -35), INT8_C( 13), INT8_C( -18), INT8_C( -5), INT8_C( 9), INT8_C( 15), INT8_C( 46), INT8_C(-105), INT8_C( -55), INT8_C( 100), INT8_C( 4), INT8_C( 11), INT8_C(-115), INT8_C( 82), INT8_C( -5), INT8_C( 51), INT8_C( 124), INT8_C( -80), INT8_C( 29), INT8_C( -61), INT8_C( 111), INT8_C(-115), INT8_C(-123)), simde_mm256_set_epi8(INT8_C( -91), INT8_C( -60), INT8_C( 62), INT8_C( -1), INT8_C( 19), INT8_C( 32), INT8_C( 116), INT8_C( 35), INT8_C(-124), INT8_C( 15), INT8_C(-113), INT8_C(-125), INT8_C( -98), INT8_C( 113), INT8_C( 93), INT8_C( 23), INT8_C( 127), INT8_C(-118), INT8_C( 21), INT8_C( 52), INT8_C(-111), INT8_C( 102), INT8_C( 71), INT8_C( -75), INT8_C( 102), INT8_C( 66), INT8_C( -33), INT8_C( -93), INT8_C( -36), INT8_C( -6), INT8_C( 49), INT8_C( 6)), simde_mm256_set_epi8(INT8_C( -99), INT8_C( 18), INT8_C( -97), INT8_C( 102), INT8_C( -69), INT8_C(-128), INT8_C(-111), INT8_C( 83), INT8_C( 71), INT8_C( 54), INT8_C( 41), INT8_C( 29), INT8_C( 68), INT8_C(-111), INT8_C( -40), INT8_C( 122), INT8_C(-110), INT8_C(-109), INT8_C( -52), INT8_C( -84), INT8_C(-100), INT8_C(-128), INT8_C(-117), INT8_C( 91), INT8_C( -41), INT8_C( -94), INT8_C( 29), INT8_C( -5), INT8_C( -21), INT8_C( 109), INT8_C( -97), INT8_C( -60)) }, { simde_mm256_set_epi8(INT8_C( -90), INT8_C( -71), INT8_C( 70), INT8_C(-109), INT8_C( -13), INT8_C( 98), INT8_C( 125), INT8_C( 124), INT8_C( -61), INT8_C( -67), INT8_C( 5), INT8_C( -92), INT8_C( 29), INT8_C( -5), INT8_C( 71), INT8_C( 99), INT8_C( 21), INT8_C( -93), INT8_C( 49), INT8_C( -93), INT8_C( 15), INT8_C( 16), INT8_C( 34), INT8_C( -44), INT8_C( -31), INT8_C( 58), INT8_C( 81), INT8_C( -54), INT8_C( -62), INT8_C( -59), INT8_C( 87), INT8_C( -96)), UINT32_C(2409021262), simde_mm256_set_epi8(INT8_C( -63), INT8_C( 90), INT8_C( 104), INT8_C( -31), INT8_C( 15), INT8_C( 24), INT8_C( -61), INT8_C( -4), INT8_C( -78), INT8_C( 93), INT8_C(-127), INT8_C( 105), INT8_C( 32), INT8_C( 40), INT8_C( 120), INT8_C( -39), INT8_C( 125), INT8_C( -19), INT8_C( -33), INT8_C( 93), INT8_C( 67), INT8_C( 66), INT8_C( -76), INT8_C( -84), INT8_C( 35), INT8_C( 117), INT8_C( -99), INT8_C( 91), INT8_C( -5), INT8_C( 18), INT8_C( -73), INT8_C( 54)), simde_mm256_set_epi8(INT8_C( 79), INT8_C( -59), INT8_C( -96), INT8_C( 42), INT8_C( -10), INT8_C( 69), INT8_C( -63), INT8_C( 86), INT8_C( 88), INT8_C( 57), INT8_C( -72), INT8_C( 125), INT8_C( 32), INT8_C( -66), INT8_C( -47), INT8_C( -11), INT8_C( 63), INT8_C( 94), INT8_C(-107), INT8_C( -5), INT8_C( -1), INT8_C( -3), INT8_C( -12), INT8_C( 101), INT8_C( -3), INT8_C(-119), INT8_C( -62), INT8_C( 38), INT8_C( 42), INT8_C(-111), INT8_C( -85), INT8_C( 30)), simde_mm256_set_epi8(INT8_C( -17), INT8_C( -71), INT8_C( 70), INT8_C(-109), INT8_C( 105), INT8_C( -80), INT8_C( 118), INT8_C( -70), INT8_C( -68), INT8_C( -67), INT8_C( 5), INT8_C( -69), INT8_C( 29), INT8_C( -72), INT8_C( -98), INT8_C( 99), INT8_C( 36), INT8_C( -93), INT8_C( -70), INT8_C( -35), INT8_C( 117), INT8_C( -56), INT8_C( 36), INT8_C( 96), INT8_C( -31), INT8_C( 62), INT8_C( 81), INT8_C( -54), INT8_C( -33), INT8_C( 84), INT8_C(-128), INT8_C( -96)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_mask_gf2p8affine_epi64_epi8(test_vec[i].src, test_vec[i].k, test_vec[i].x, test_vec[i].A, INT8_C( 41)); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_gf2p8affine_epi64_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask64 k; simde__m512i x; simde__m512i A; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi8(INT8_C( -38), INT8_C( -31), INT8_C( -26), INT8_C(-115), INT8_C(-118), INT8_C( 4), INT8_C( -21), INT8_C( 76), INT8_C( -18), INT8_C( 56), INT8_C( -33), INT8_C( 5), INT8_C( -31), INT8_C( 37), INT8_C( 43), INT8_C( -26), INT8_C( -84), INT8_C( 85), INT8_C( 28), INT8_C( -96), INT8_C( -54), INT8_C( 79), INT8_C( 16), INT8_C( 119), INT8_C( 3), INT8_C( -64), INT8_C( 77), INT8_C( -56), INT8_C(-127), INT8_C( -33), INT8_C( 83), INT8_C( 46), INT8_C( -91), INT8_C( 125), INT8_C( -59), INT8_C( -28), INT8_C(-110), INT8_C( 94), INT8_C( 67), INT8_C( 64), INT8_C( -85), INT8_C( 127), INT8_C( -76), INT8_C(-117), INT8_C( 37), INT8_C(-123), INT8_C( 21), INT8_C( 31), INT8_C( -53), INT8_C( 109), INT8_C( -72), INT8_C( -6), INT8_C( 112), INT8_C( 75), INT8_C( -92), INT8_C( 12), INT8_C( 98), INT8_C( -63), INT8_C( 64), INT8_C( -41), INT8_C( 43), INT8_C( -55), INT8_C( -46), INT8_C( 88)), UINT64_C( 8484844130360497323), simde_mm512_set_epi8(INT8_C( -14), INT8_C( 79), INT8_C( 79), INT8_C( -73), INT8_C( 50), INT8_C( -99), INT8_C( -73), INT8_C( 74), INT8_C( 117), INT8_C( -4), INT8_C( -7), INT8_C( 29), INT8_C(-111), INT8_C( 111), INT8_C( -38), INT8_C( -85), INT8_C( -27), INT8_C( 107), INT8_C( -26), INT8_C( -23), INT8_C( -57), INT8_C( 127), INT8_C( -37), INT8_C( 116), INT8_C( 96), INT8_C( 1), INT8_C( 107), INT8_C( 0), INT8_C( 79), INT8_C( -78), INT8_C( 84), INT8_C( 107), INT8_C( 99), INT8_C( 90), INT8_C( 101), INT8_C( -53), INT8_C( 26), INT8_C( 105), INT8_C( -25), INT8_C( 22), INT8_C( 122), INT8_C( -86), INT8_C( 110), INT8_C( -49), INT8_C( -67), INT8_C( 44), INT8_C( -99), INT8_C(-105), INT8_C( 48), INT8_C( 110), INT8_C( -78), INT8_C( 59), INT8_C(-113), INT8_C(-109), INT8_C( 95), INT8_C( -41), INT8_C( -26), INT8_C( 72), INT8_C( 78), INT8_C( 30), INT8_C( 13), INT8_C( 4), INT8_C( 90), INT8_C( 65)), simde_mm512_set_epi8(INT8_C( -27), INT8_C( 107), INT8_C( 23), INT8_C( 20), INT8_C( -71), INT8_C( 77), INT8_C( 119), INT8_C(-117), INT8_C( -71), INT8_C( 102), INT8_C( -10), INT8_C( -14), INT8_C( 15), INT8_C( 106), INT8_C( 51), INT8_C( -90), INT8_C( 49), INT8_C( -42), INT8_C( 99), INT8_C( -33), INT8_C( 102), INT8_C( -69), INT8_C( 80), INT8_C( 28), INT8_C( -57), INT8_C(-105), INT8_C( -54), INT8_C( 29), INT8_C( 127), INT8_C(-103), INT8_C( -56), INT8_C( -9), INT8_C( 36), INT8_C( -42), INT8_C( 126), INT8_C( 34), INT8_C( 123), INT8_C( 57), INT8_C( 98), INT8_C( 50), INT8_C( 11), INT8_C( 55), INT8_C( 39), INT8_C( -87), INT8_C( 121), INT8_C( -10), INT8_C( 48), INT8_C( -27), INT8_C( 35), INT8_C( 35), INT8_C( -52), INT8_C( 65), INT8_C( -57), INT8_C( 52), INT8_C(-105), INT8_C( -52), INT8_C( -3), INT8_C( -72), INT8_C( 12), INT8_C( -81), INT8_C( 102), INT8_C( 5), INT8_C( 69), INT8_C( -78)), simde_mm512_set_epi8(INT8_C( -38), INT8_C( -91), INT8_C( -91), INT8_C( -22), INT8_C(-118), INT8_C( -51), INT8_C( -21), INT8_C( 63), INT8_C( 99), INT8_C(-114), INT8_C( -33), INT8_C( 5), INT8_C( -31), INT8_C( 37), INT8_C( 43), INT8_C( -26), INT8_C( -84), INT8_C( 85), INT8_C( -51), INT8_C( -20), INT8_C( -43), INT8_C( 79), INT8_C( 16), INT8_C( 119), INT8_C( 109), INT8_C(-109), INT8_C( 77), INT8_C( -56), INT8_C( 54), INT8_C( -33), INT8_C( 83), INT8_C( 46), INT8_C( -91), INT8_C( 125), INT8_C( -76), INT8_C( -28), INT8_C( 116), INT8_C(-121), INT8_C( 67), INT8_C( 64), INT8_C( 42), INT8_C( 127), INT8_C( -2), INT8_C( 55), INT8_C( 37), INT8_C(-123), INT8_C( -46), INT8_C( 31), INT8_C( -53), INT8_C( 109), INT8_C( -72), INT8_C( -25), INT8_C( 112), INT8_C(-108), INT8_C( -92), INT8_C( 12), INT8_C(-116), INT8_C( -63), INT8_C(-109), INT8_C( -41), INT8_C( 51), INT8_C( -55), INT8_C( 109), INT8_C( 16)) }, { simde_mm512_set_epi8(INT8_C( -20), INT8_C( 56), INT8_C( 74), INT8_C( 73), INT8_C( 35), INT8_C( 7), INT8_C( 57), INT8_C( -59), INT8_C( -71), INT8_C( -43), INT8_C(-119), INT8_C( 46), INT8_C( -19), INT8_C( 31), INT8_C( 36), INT8_C( -80), INT8_C( -97), INT8_C( 23), INT8_C( 98), INT8_C( 40), INT8_C(-121), INT8_C( 26), INT8_C( 120), INT8_C( 49), INT8_C( -28), INT8_C( 17), INT8_C( 126), INT8_C( 55), INT8_C( -19), INT8_C( 71), INT8_C( 38), INT8_C(-110), INT8_C( -94), INT8_C( 23), INT8_C( -88), INT8_C( -8), INT8_C( 30), INT8_C( -59), INT8_C( 51), INT8_C( -12), INT8_C( 7), INT8_C( 81), INT8_C( -54), INT8_C(-116), INT8_C( 110), INT8_C(-127), INT8_C( 83), INT8_C( 47), INT8_C( -56), INT8_C(-107), INT8_C( -7), INT8_C( 36), INT8_C( -9), INT8_C( 1), INT8_C( 85), INT8_C( -61), INT8_C( 63), INT8_C( 76), INT8_C( -21), INT8_C( 103), INT8_C( -99), INT8_C( -8), INT8_C( -71), INT8_C( 6)), UINT64_C( 6814240774331401538), simde_mm512_set_epi8(INT8_C( -75), INT8_C( -32), INT8_C(-100), INT8_C( 104), INT8_C( -83), INT8_C( 7), INT8_C( 51), INT8_C( -62), INT8_C(-102), INT8_C( 127), INT8_C( 113), INT8_C( 115), INT8_C( 31), INT8_C( 20), INT8_C( 95), INT8_C( 34), INT8_C( 38), INT8_C( 113), INT8_C( -51), INT8_C( 35), INT8_C( -49), INT8_C( 20), INT8_C( 32), INT8_C( 74), INT8_C( -76), INT8_C(-126), INT8_C( -53), INT8_C( -83), INT8_C( -23), INT8_C( 109), INT8_C( 7), INT8_C( 82), INT8_C( 96), INT8_C(-118), INT8_C( -29), INT8_C( 25), INT8_C( -59), INT8_C( -4), INT8_C( -25), INT8_C( 10), INT8_C( -95), INT8_C( -71), INT8_C( 70), INT8_C( -61), INT8_C( -28), INT8_C( 74), INT8_C( 70), INT8_C( 59), INT8_C( -6), INT8_C( -54), INT8_C( 90), INT8_C( 108), INT8_C( -12), INT8_C( 40), INT8_C( 67), INT8_C( -19), INT8_C(-105), INT8_C( 21), INT8_C( -18), INT8_C( 101), INT8_C(-105), INT8_C( -48), INT8_C( 19), INT8_C(-120)), simde_mm512_set_epi8(INT8_C( 60), INT8_C( 10), INT8_C(-120), INT8_C( -82), INT8_C( -91), INT8_C( -44), INT8_C( 8), INT8_C( -86), INT8_C( -4), INT8_C( -76), INT8_C( 74), INT8_C( -56), INT8_C( -36), INT8_C( 121), INT8_C(-105), INT8_C( 109), INT8_C( 83), INT8_C( 99), INT8_C( -62), INT8_C( -33), INT8_C( 15), INT8_C( 27), INT8_C(-106), INT8_C( 56), INT8_C( -95), INT8_C( -68), INT8_C( -65), INT8_C( 63), INT8_C( -44), INT8_C( -41), INT8_C( -55), INT8_C( 46), INT8_C( -5), INT8_C( -8), INT8_C( 32), INT8_C( 101), INT8_C(-121), INT8_C( 60), INT8_C( -4), INT8_C( 40), INT8_C( 49), INT8_C(-121), INT8_C( -33), INT8_C( -15), INT8_C( 124), INT8_C( 101), INT8_C( 92), INT8_C(-104), INT8_C( 17), INT8_C( 122), INT8_C( -7), INT8_C( -75), INT8_C( 76), INT8_C( 29), INT8_C( -90), INT8_C( 121), INT8_C( 121), INT8_C( 21), INT8_C( 69), INT8_C( 30), INT8_C( 36), INT8_C( -84), INT8_C( 26), INT8_C( 107)), simde_mm512_set_epi8(INT8_C( -20), INT8_C( 45), INT8_C( 74), INT8_C( 94), INT8_C( -21), INT8_C(-117), INT8_C( 10), INT8_C( -59), INT8_C( -7), INT8_C( -43), INT8_C(-119), INT8_C( -31), INT8_C( -19), INT8_C( 31), INT8_C( 36), INT8_C( -49), INT8_C( -97), INT8_C( 23), INT8_C( 98), INT8_C( 40), INT8_C( -49), INT8_C( 26), INT8_C( -86), INT8_C( -32), INT8_C( 80), INT8_C( -13), INT8_C( 32), INT8_C( 55), INT8_C( -19), INT8_C( 71), INT8_C( 38), INT8_C( -54), INT8_C(-116), INT8_C( 23), INT8_C( -88), INT8_C( -8), INT8_C( 30), INT8_C( -59), INT8_C( 51), INT8_C( -12), INT8_C( 7), INT8_C( 81), INT8_C( -54), INT8_C(-116), INT8_C( 110), INT8_C(-127), INT8_C( 83), INT8_C( 49), INT8_C( 103), INT8_C(-107), INT8_C( -7), INT8_C( 36), INT8_C( -9), INT8_C( 80), INT8_C( 85), INT8_C( 95), INT8_C( 63), INT8_C( -38), INT8_C( -21), INT8_C( 103), INT8_C( -99), INT8_C( -8), INT8_C( 44), INT8_C( 6)) }, { simde_mm512_set_epi8(INT8_C( 76), INT8_C( -15), INT8_C(-120), INT8_C( -25), INT8_C( 79), INT8_C( 84), INT8_C( 15), INT8_C( 29), INT8_C( 123), INT8_C( 56), INT8_C( 84), INT8_C( 4), INT8_C( -58), INT8_C(-104), INT8_C( 66), INT8_C( 107), INT8_C(-124), INT8_C( 103), INT8_C( -54), INT8_C( -95), INT8_C( -17), INT8_C( -76), INT8_C( 88), INT8_C(-114), INT8_C( 2), INT8_C( -31), INT8_C(-107), INT8_C( 94), INT8_C( 104), INT8_C( -6), INT8_C( 21), INT8_C( 36), INT8_C( 121), INT8_C( -31), INT8_C(-109), INT8_C( -76), INT8_C( 27), INT8_C( -91), INT8_C( -72), INT8_C(-121), INT8_C( 100), INT8_C( 45), INT8_C( -84), INT8_C( 24), INT8_C( 66), INT8_C( 120), INT8_C( 55), INT8_C( 85), INT8_C( 84), INT8_C( -52), INT8_C( 27), INT8_C( -95), INT8_C( -59), INT8_C( -38), INT8_C( 72), INT8_C( 30), INT8_C( -94), INT8_C( -39), INT8_C( 28), INT8_C( 17), INT8_C( 32), INT8_C( -23), INT8_C( -32), INT8_C( 102)), UINT64_C( 409667768288928968), simde_mm512_set_epi8(INT8_C(-115), INT8_C( -95), INT8_C( 123), INT8_C( -26), INT8_C( -67), INT8_C( -55), INT8_C( 111), INT8_C( 39), INT8_C( 74), INT8_C( 65), INT8_C( 50), INT8_C( -20), INT8_C( -17), INT8_C( 24), INT8_C( 43), INT8_C( 11), INT8_C( -17), INT8_C( 62), INT8_C( 30), INT8_C( 74), INT8_C( 10), INT8_C( 78), INT8_C( -4), INT8_C( -8), INT8_C( -3), INT8_C(-107), INT8_C( 8), INT8_C( 87), INT8_C(-103), INT8_C( 34), INT8_C( 54), INT8_C( 10), INT8_C( 84), INT8_C(-128), INT8_C(-105), INT8_C( 112), INT8_C( -50), INT8_C( -23), INT8_C( -84), INT8_C( -97), INT8_C( -21), INT8_C( -52), INT8_C( 24), INT8_C( -62), INT8_C( 125), INT8_C(-108), INT8_C( -58), INT8_C( 91), INT8_C( -14), INT8_C(-126), INT8_C( -83), INT8_C( 96), INT8_C( -66), INT8_C( 90), INT8_C( -90), INT8_C( -89), INT8_C( -40), INT8_C( -65), INT8_C( 123), INT8_C( -41), INT8_C( -49), INT8_C( 85), INT8_C( 43), INT8_C( -95)), simde_mm512_set_epi8(INT8_C( -4), INT8_C( 30), INT8_C( 9), INT8_C( 116), INT8_C( -89), INT8_C( 7), INT8_C( 11), INT8_C( 83), INT8_C( 17), INT8_C( -34), INT8_C( 64), INT8_C( 47), INT8_C( 37), INT8_C( -59), INT8_C( 63), INT8_C( -84), INT8_C( 92), INT8_C(-115), INT8_C(-116), INT8_C( -81), INT8_C( 37), INT8_C(-120), INT8_C( -30), INT8_C( 9), INT8_C( 94), INT8_C( 16), INT8_C( 4), INT8_C(-128), INT8_C(-108), INT8_C(-101), INT8_C( -9), INT8_C( 51), INT8_C( 65), INT8_C( 86), INT8_C( -18), INT8_C( -72), INT8_C( 59), INT8_C(-108), INT8_C(-124), INT8_C( -72), INT8_C( -39), INT8_C( -73), INT8_C( 42), INT8_C( 8), INT8_C(-103), INT8_C( 41), INT8_C( 48), INT8_C( 29), INT8_C( -75), INT8_C( 81), INT8_C( 40), INT8_C( -29), INT8_C( -6), INT8_C( -70), INT8_C( -30), INT8_C( -15), INT8_C( 74), INT8_C( -68), INT8_C( -59), INT8_C( -94), INT8_C( 110), INT8_C( 112), INT8_C( -76), INT8_C( -57)), simde_mm512_set_epi8(INT8_C( 76), INT8_C( -15), INT8_C(-120), INT8_C( -25), INT8_C( 79), INT8_C( 3), INT8_C( 15), INT8_C( 12), INT8_C(-114), INT8_C( 56), INT8_C( -7), INT8_C( 4), INT8_C( 119), INT8_C( -95), INT8_C( 9), INT8_C( -47), INT8_C(-124), INT8_C(-119), INT8_C( -47), INT8_C( -95), INT8_C( -49), INT8_C(-111), INT8_C( -18), INT8_C(-114), INT8_C( -74), INT8_C( -31), INT8_C( 9), INT8_C( 94), INT8_C( 98), INT8_C( -6), INT8_C( -81), INT8_C( 36), INT8_C( 121), INT8_C( -31), INT8_C( 31), INT8_C( -76), INT8_C( 47), INT8_C( -46), INT8_C( -72), INT8_C(-121), INT8_C( 84), INT8_C( 5), INT8_C( -84), INT8_C( 24), INT8_C( 16), INT8_C( 120), INT8_C( 55), INT8_C( -14), INT8_C( 61), INT8_C( -87), INT8_C(-110), INT8_C( -95), INT8_C( -59), INT8_C( 13), INT8_C( 72), INT8_C( 30), INT8_C( 34), INT8_C( 38), INT8_C( 28), INT8_C( 17), INT8_C(-117), INT8_C( -23), INT8_C( -32), INT8_C( 102)) }, { simde_mm512_set_epi8(INT8_C( -11), INT8_C( 39), INT8_C( -6), INT8_C( 77), INT8_C( -37), INT8_C( 6), INT8_C( 13), INT8_C( -64), INT8_C( -10), INT8_C( 41), INT8_C( -97), INT8_C(-113), INT8_C( 11), INT8_C(-128), INT8_C( -95), INT8_C(-126), INT8_C( 71), INT8_C( 54), INT8_C( 41), INT8_C( 29), INT8_C( 68), INT8_C( -53), INT8_C( -78), INT8_C( 122), INT8_C(-110), INT8_C(-109), INT8_C( -52), INT8_C( -84), INT8_C( 98), INT8_C( -88), INT8_C(-117), INT8_C( 7), INT8_C(-106), INT8_C( -20), INT8_C( 29), INT8_C( -5), INT8_C( -21), INT8_C( 109), INT8_C( -97), INT8_C( -60), INT8_C( -62), INT8_C( 100), INT8_C( 93), INT8_C( -47), INT8_C( 119), INT8_C( -97), INT8_C( 60), INT8_C( -11), INT8_C( -73), INT8_C( 49), INT8_C(-118), INT8_C( 27), INT8_C( -43), INT8_C( -52), INT8_C( 6), INT8_C( 113), INT8_C( 97), INT8_C( 119), INT8_C( -54), INT8_C( -41), INT8_C( 46), INT8_C( -26), INT8_C( 13), INT8_C(-110)), UINT64_C( 3710033834925854085), simde_mm512_set_epi8(INT8_C( -31), INT8_C( 58), INT8_C( 81), INT8_C( -54), INT8_C( -62), INT8_C( -59), INT8_C( 87), INT8_C( -96), INT8_C( -91), INT8_C( -60), INT8_C( 62), INT8_C( -1), INT8_C( 19), INT8_C( 32), INT8_C( 116), INT8_C( 35), INT8_C(-124), INT8_C( 15), INT8_C(-113), INT8_C(-125), INT8_C( -98), INT8_C( 113), INT8_C( 93), INT8_C( 23), INT8_C( 127), INT8_C(-118), INT8_C( 21), INT8_C( 52), INT8_C(-111), INT8_C( 102), INT8_C( 71), INT8_C( -75), INT8_C( 102), INT8_C( 66), INT8_C( -33), INT8_C( -93), INT8_C( -36), INT8_C( -6), INT8_C( 49), INT8_C( 6), INT8_C( 51), INT8_C( 98), INT8_C(-100), INT8_C( 4), INT8_C( -58), INT8_C( 111), INT8_C( 124), INT8_C( -27), INT8_C( -60), INT8_C( -35), INT8_C( 13), INT8_C( -18), INT8_C( -5), INT8_C( 9), INT8_C( 15), INT8_C( 46), INT8_C(-105), INT8_C( -55), INT8_C( 100), INT8_C( 4), INT8_C( 11), INT8_C(-115), INT8_C( 82), INT8_C( -5)), simde_mm512_set_epi8(INT8_C( -63), INT8_C( 90), INT8_C( 104), INT8_C( -31), INT8_C( 15), INT8_C( 24), INT8_C( -61), INT8_C( -4), INT8_C( -78), INT8_C( 93), INT8_C(-127), INT8_C( 105), INT8_C( 32), INT8_C( 40), INT8_C( 120), INT8_C( -39), INT8_C( 125), INT8_C( -19), INT8_C( -33), INT8_C( 93), INT8_C( 67), INT8_C( 66), INT8_C( -76), INT8_C( -84), INT8_C( 35), INT8_C( 117), INT8_C( -99), INT8_C( 91), INT8_C( -5), INT8_C( 18), INT8_C( -73), INT8_C( 54), INT8_C( -20), INT8_C( 57), INT8_C( -83), INT8_C( -25), INT8_C(-113), INT8_C(-106), INT8_C( -65), INT8_C( 78), INT8_C( -90), INT8_C( -71), INT8_C( 70), INT8_C(-109), INT8_C( -13), INT8_C( 98), INT8_C( 125), INT8_C( 124), INT8_C( -61), INT8_C( -67), INT8_C( 5), INT8_C( -92), INT8_C( 29), INT8_C( -5), INT8_C( 71), INT8_C( 99), INT8_C( 21), INT8_C( -93), INT8_C( 49), INT8_C( -93), INT8_C( 15), INT8_C( 16), INT8_C( 34), INT8_C( -44)), simde_mm512_set_epi8(INT8_C( -11), INT8_C( 39), INT8_C( 28), INT8_C( -54), INT8_C( -37), INT8_C( 6), INT8_C( -34), INT8_C( 109), INT8_C( -10), INT8_C( 101), INT8_C( 123), INT8_C( -70), INT8_C( 100), INT8_C( 81), INT8_C( -95), INT8_C(-126), INT8_C( 33), INT8_C( 54), INT8_C(-123), INT8_C( -59), INT8_C( 68), INT8_C( -53), INT8_C( -78), INT8_C( 122), INT8_C(-110), INT8_C(-109), INT8_C( -52), INT8_C( -61), INT8_C( -35), INT8_C( -34), INT8_C(-117), INT8_C( -56), INT8_C( -21), INT8_C( 89), INT8_C( 29), INT8_C( -5), INT8_C( -21), INT8_C( 109), INT8_C( 91), INT8_C( 45), INT8_C( -62), INT8_C( 18), INT8_C( -18), INT8_C( -47), INT8_C( 63), INT8_C( 79), INT8_C( -14), INT8_C( -85), INT8_C( -68), INT8_C( 49), INT8_C(-118), INT8_C( 27), INT8_C( 94), INT8_C( -19), INT8_C( 6), INT8_C( 15), INT8_C( -45), INT8_C( 119), INT8_C( -54), INT8_C( -41), INT8_C( 46), INT8_C( 60), INT8_C( 13), INT8_C(-100)) }, { simde_mm512_set_epi8(INT8_C( -69), INT8_C( -81), INT8_C(-105), INT8_C( -85), INT8_C( -82), INT8_C( 108), INT8_C(-118), INT8_C( 109), INT8_C( 112), INT8_C(-112), INT8_C( -34), INT8_C( -5), INT8_C( -77), INT8_C(-118), INT8_C( -13), INT8_C( 64), INT8_C( 122), INT8_C(-122), INT8_C( 12), INT8_C(-116), INT8_C(-107), INT8_C( -96), INT8_C( 15), INT8_C( -28), INT8_C( -78), INT8_C( 119), INT8_C( 1), INT8_C( -6), INT8_C(-124), INT8_C( 118), INT8_C( 59), INT8_C( -58), INT8_C( 79), INT8_C( -59), INT8_C( -96), INT8_C( 42), INT8_C( -10), INT8_C( 69), INT8_C( -63), INT8_C( 86), INT8_C( 88), INT8_C( 57), INT8_C( -72), INT8_C( 125), INT8_C( 32), INT8_C( -66), INT8_C( -47), INT8_C( -11), INT8_C( 63), INT8_C( 94), INT8_C(-107), INT8_C( -5), INT8_C( -1), INT8_C( -3), INT8_C( -12), INT8_C( 101), INT8_C( -3), INT8_C(-119), INT8_C( -62), INT8_C( 38), INT8_C( 42), INT8_C(-111), INT8_C( -85), INT8_C( 30)), UINT64_C(13505615964757104331), simde_mm512_set_epi8(INT8_C( 119), INT8_C( -26), INT8_C( -34), INT8_C( -29), INT8_C( 61), INT8_C( -97), INT8_C( 121), INT8_C( -50), INT8_C( 115), INT8_C( 70), INT8_C(-126), INT8_C( 82), INT8_C( 49), INT8_C( -85), INT8_C( -85), INT8_C( 63), INT8_C( -92), INT8_C( 55), INT8_C( 97), INT8_C( 48), INT8_C( 61), INT8_C( 17), INT8_C( 44), INT8_C( 43), INT8_C( 34), INT8_C( 28), INT8_C( -52), INT8_C( 105), INT8_C( 4), INT8_C( 65), INT8_C( -75), INT8_C( 34), INT8_C( 39), INT8_C( 110), INT8_C( 14), INT8_C( 27), INT8_C( -36), INT8_C( -59), INT8_C( -76), INT8_C( 54), INT8_C( 38), INT8_C( -22), INT8_C( -75), INT8_C( -86), INT8_C( -68), INT8_C( 73), INT8_C( -18), INT8_C( -72), INT8_C( -76), INT8_C( -29), INT8_C(-118), INT8_C( 60), INT8_C( 27), INT8_C( 123), INT8_C( 54), INT8_C(-106), INT8_C( -24), INT8_C(-112), INT8_C( -63), INT8_C( 6), INT8_C( 30), INT8_C( -40), INT8_C( 40), INT8_C(-118)), simde_mm512_set_epi8(INT8_C(-125), INT8_C( 59), INT8_C( -7), INT8_C( -57), INT8_C( -54), INT8_C( -24), INT8_C(-127), INT8_C( 28), INT8_C( 72), INT8_C( 65), INT8_C( -12), INT8_C( -17), INT8_C(-122), INT8_C( 99), INT8_C( -39), INT8_C( -66), INT8_C(-112), INT8_C( 104), INT8_C( -8), INT8_C(-107), INT8_C( 107), INT8_C( -79), INT8_C( -16), INT8_C( 2), INT8_C( 85), INT8_C( -99), INT8_C( -15), INT8_C( 35), INT8_C(-123), INT8_C( -63), INT8_C( -50), INT8_C( 41), INT8_C( 40), INT8_C( -15), INT8_C( -56), INT8_C( 80), INT8_C( -27), INT8_C( -56), INT8_C( 15), INT8_C( 83), INT8_C( -33), INT8_C( -5), INT8_C( 78), INT8_C( 31), INT8_C( -93), INT8_C(-105), INT8_C( 63), INT8_C( 3), INT8_C( -15), INT8_C( 48), INT8_C(-120), INT8_C( -78), INT8_C( 93), INT8_C( 78), INT8_C(-110), INT8_C( 1), INT8_C( -12), INT8_C( 47), INT8_C( 72), INT8_C( 51), INT8_C( 89), INT8_C( 59), INT8_C( -98), INT8_C( 89)), simde_mm512_set_epi8(INT8_C( 104), INT8_C( -81), INT8_C( -54), INT8_C( 27), INT8_C( -7), INT8_C( 108), INT8_C( 77), INT8_C( 76), INT8_C( 112), INT8_C( 99), INT8_C( 76), INT8_C( -5), INT8_C( 42), INT8_C( 67), INT8_C( -13), INT8_C( -57), INT8_C( 59), INT8_C(-122), INT8_C( 12), INT8_C( 51), INT8_C(-107), INT8_C( 125), INT8_C( 64), INT8_C( -28), INT8_C( -78), INT8_C( 119), INT8_C( -86), INT8_C( -6), INT8_C( 123), INT8_C( 118), INT8_C( 57), INT8_C( -58), INT8_C( 79), INT8_C( -59), INT8_C( -96), INT8_C( 42), INT8_C( -10), INT8_C( 34), INT8_C( -63), INT8_C( 86), INT8_C( 88), INT8_C( 57), INT8_C( -76), INT8_C( -7), INT8_C( 32), INT8_C( -66), INT8_C( -47), INT8_C( 109), INT8_C( 63), INT8_C( 94), INT8_C(-107), INT8_C( 124), INT8_C( -1), INT8_C(-124), INT8_C( 32), INT8_C( 101), INT8_C( 33), INT8_C(-112), INT8_C( -62), INT8_C( 38), INT8_C( 14), INT8_C(-111), INT8_C( -11), INT8_C( -11)) }, { simde_mm512_set_epi8(INT8_C( -91), INT8_C( 33), INT8_C( -8), INT8_C( 37), INT8_C( 32), INT8_C( 45), INT8_C(-122), INT8_C( 70), INT8_C( 117), INT8_C( -19), INT8_C( -75), INT8_C( 13), INT8_C( -99), INT8_C( 93), INT8_C( -75), INT8_C( 20), INT8_C( 19), INT8_C( 8), INT8_C(-110), INT8_C( -98), INT8_C( 3), INT8_C( 62), INT8_C( 35), INT8_C( 2), INT8_C( 100), INT8_C( 93), INT8_C( 79), INT8_C( 89), INT8_C( -78), INT8_C( 31), INT8_C( 94), INT8_C( 113), INT8_C(-119), INT8_C( 14), INT8_C(-123), INT8_C( -25), INT8_C( 58), INT8_C( -66), INT8_C( -71), INT8_C( 92), INT8_C( 108), INT8_C( 124), INT8_C( -29), INT8_C( -77), INT8_C( -24), INT8_C(-128), INT8_C(-121), INT8_C( 28), INT8_C(-127), INT8_C( 21), INT8_C( 32), INT8_C( -79), INT8_C( -97), INT8_C( -30), INT8_C(-115), INT8_C( 53), INT8_C( 70), INT8_C( 75), INT8_C( -26), INT8_C( 77), INT8_C( -45), INT8_C(-100), INT8_C( 30), INT8_C( 32)), UINT64_C(15325554837812698489), simde_mm512_set_epi8(INT8_C( 122), INT8_C( 103), INT8_C( -34), INT8_C( 26), INT8_C( -92), INT8_C(-106), INT8_C( 83), INT8_C( -36), INT8_C( 61), INT8_C( -16), INT8_C(-112), INT8_C( -39), INT8_C( 99), INT8_C( 110), INT8_C( 8), INT8_C( -91), INT8_C( 19), INT8_C( 65), INT8_C( 30), INT8_C( 69), INT8_C( -83), INT8_C( 78), INT8_C( 47), INT8_C( -80), INT8_C( 55), INT8_C( 16), INT8_C( 42), INT8_C(-127), INT8_C( 9), INT8_C(-125), INT8_C( 29), INT8_C(-101), INT8_C( 97), INT8_C( -40), INT8_C( 70), INT8_C( -20), INT8_C(-125), INT8_C(-105), INT8_C( -47), INT8_C( 87), INT8_C(-126), INT8_C( 27), INT8_C( 91), INT8_C( -52), INT8_C( -69), INT8_C( 76), INT8_C(-116), INT8_C( 24), INT8_C( -37), INT8_C( -95), INT8_C( -14), INT8_C( -88), INT8_C( -7), INT8_C( 96), INT8_C( 114), INT8_C( 40), INT8_C( -93), INT8_C( -16), INT8_C( -29), INT8_C( 102), INT8_C( -36), INT8_C( 111), INT8_C( -60), INT8_C( 50)), simde_mm512_set_epi8(INT8_C( 4), INT8_C( 6), INT8_C( 63), INT8_C( -47), INT8_C( -7), INT8_C( -11), INT8_C( -31), INT8_C( -27), INT8_C(-102), INT8_C( -51), INT8_C( -75), INT8_C( -30), INT8_C(-125), INT8_C(-103), INT8_C( 96), INT8_C( 41), INT8_C( -33), INT8_C( -83), INT8_C( 79), INT8_C( -21), INT8_C( -93), INT8_C( -15), INT8_C( 127), INT8_C( 107), INT8_C(-114), INT8_C( 54), INT8_C( -68), INT8_C(-124), INT8_C( -75), INT8_C( 94), INT8_C( 96), INT8_C( -35), INT8_C( -99), INT8_C( -46), INT8_C( -57), INT8_C( 42), INT8_C( 102), INT8_C( -83), INT8_C( -30), INT8_C( 107), INT8_C( 39), INT8_C( -54), INT8_C( -46), INT8_C( 23), INT8_C( 43), INT8_C( 67), INT8_C( 68), INT8_C( -29), INT8_C( 71), INT8_C( 38), INT8_C( -71), INT8_C( -2), INT8_C( -12), INT8_C( -12), INT8_C( 127), INT8_C( 43), INT8_C( 65), INT8_C( 114), INT8_C(-111), INT8_C( -99), INT8_C( 48), INT8_C( -90), INT8_C( -33), INT8_C( -6)), simde_mm512_set_epi8(INT8_C( 10), INT8_C( 121), INT8_C( -8), INT8_C( 6), INT8_C( 32), INT8_C( 77), INT8_C(-122), INT8_C( 70), INT8_C( -46), INT8_C( -19), INT8_C( 50), INT8_C( 13), INT8_C( 1), INT8_C( 18), INT8_C(-117), INT8_C( 107), INT8_C( 19), INT8_C( 58), INT8_C(-110), INT8_C( -98), INT8_C( -98), INT8_C(-112), INT8_C( 120), INT8_C( 2), INT8_C( -60), INT8_C( -98), INT8_C( -8), INT8_C( 89), INT8_C( 29), INT8_C( 6), INT8_C( 94), INT8_C( 21), INT8_C(-119), INT8_C( 14), INT8_C( 21), INT8_C( -3), INT8_C( 52), INT8_C( 2), INT8_C( -71), INT8_C( -77), INT8_C( 17), INT8_C( 124), INT8_C( -29), INT8_C( 19), INT8_C( 39), INT8_C(-128), INT8_C(-121), INT8_C( 28), INT8_C(-127), INT8_C( 47), INT8_C( 32), INT8_C( 38), INT8_C( -26), INT8_C( -81), INT8_C(-115), INT8_C( 26), INT8_C( 70), INT8_C( 107), INT8_C( 26), INT8_C( -45), INT8_C( 121), INT8_C(-100), INT8_C( 30), INT8_C( -90)) }, { simde_mm512_set_epi8(INT8_C( -62), INT8_C( -73), INT8_C( 0), INT8_C(-101), INT8_C( 116), INT8_C( 69), INT8_C( 92), INT8_C( 108), INT8_C( 36), INT8_C( -46), INT8_C( -54), INT8_C( -54), INT8_C( -20), INT8_C( 67), INT8_C( -51), INT8_C( -85), INT8_C( -23), INT8_C( 69), INT8_C(-114), INT8_C( 64), INT8_C( 81), INT8_C( 77), INT8_C( -92), INT8_C( 40), INT8_C( -30), INT8_C( -93), INT8_C( -92), INT8_C( 118), INT8_C( -39), INT8_C( -80), INT8_C( -72), INT8_C( -77), INT8_C( 65), INT8_C( 123), INT8_C( -53), INT8_C( 39), INT8_C( -96), INT8_C( 45), INT8_C( 84), INT8_C( -69), INT8_C( 0), INT8_C( -17), INT8_C( -85), INT8_C( 75), INT8_C( 94), INT8_C(-101), INT8_C(-110), INT8_C( 18), INT8_C( 15), INT8_C( 3), INT8_C( 69), INT8_C( -46), INT8_C( -17), INT8_C( -89), INT8_C(-105), INT8_C( 25), INT8_C( 75), INT8_C( -48), INT8_C( -24), INT8_C( 121), INT8_C( 24), INT8_C( -37), INT8_C( -70), INT8_C( 117)), UINT64_C( 8321631579246341407), simde_mm512_set_epi8(INT8_C(-108), INT8_C( -86), INT8_C( -86), INT8_C( 72), INT8_C( 33), INT8_C( 33), INT8_C( -55), INT8_C( 78), INT8_C( 32), INT8_C( 80), INT8_C( 63), INT8_C( -95), INT8_C( -12), INT8_C( 117), INT8_C( 6), INT8_C( -80), INT8_C( 125), INT8_C( -75), INT8_C( 125), INT8_C( -17), INT8_C( 20), INT8_C(-128), INT8_C( -32), INT8_C( -1), INT8_C( 85), INT8_C( -34), INT8_C( 19), INT8_C( 84), INT8_C( 40), INT8_C( 92), INT8_C( -28), INT8_C( -58), INT8_C( -25), INT8_C( 122), INT8_C( 47), INT8_C(-100), INT8_C(-128), INT8_C( -1), INT8_C( 114), INT8_C( -5), INT8_C( 125), INT8_C( 104), INT8_C( -10), INT8_C( -76), INT8_C( 115), INT8_C( 98), INT8_C( 54), INT8_C( 111), INT8_C(-112), INT8_C( 15), INT8_C( -43), INT8_C( -23), INT8_C(-121), INT8_C( 73), INT8_C( -23), INT8_C(-123), INT8_C( -93), INT8_C( -81), INT8_C( -81), INT8_C(-110), INT8_C( -89), INT8_C( -34), INT8_C( 8), INT8_C( 108)), simde_mm512_set_epi8(INT8_C( -28), INT8_C( 40), INT8_C( 110), INT8_C( -17), INT8_C( 44), INT8_C( 103), INT8_C( -7), INT8_C( 8), INT8_C( 70), INT8_C( 121), INT8_C( -7), INT8_C( -36), INT8_C( -32), INT8_C( 126), INT8_C( 91), INT8_C( 92), INT8_C( -4), INT8_C( 58), INT8_C( 0), INT8_C( -32), INT8_C(-125), INT8_C( -97), INT8_C( -73), INT8_C( -36), INT8_C( 101), INT8_C( -91), INT8_C( 92), INT8_C(-100), INT8_C( 92), INT8_C( -53), INT8_C( 4), INT8_C( 1), INT8_C( 13), INT8_C( 64), INT8_C( -32), INT8_C( -67), INT8_C( 106), INT8_C( 112), INT8_C( -54), INT8_C( 55), INT8_C( 64), INT8_C( -37), INT8_C( 81), INT8_C( 53), INT8_C( 79), INT8_C( 81), INT8_C( -60), INT8_C( -46), INT8_C( -11), INT8_C( 90), INT8_C( 97), INT8_C( 68), INT8_C(-127), INT8_C( 86), INT8_C( 32), INT8_C(-118), INT8_C( 76), INT8_C( -43), INT8_C( 41), INT8_C(-111), INT8_C( -3), INT8_C( -53), INT8_C( 102), INT8_C(-116)), simde_mm512_set_epi8(INT8_C( -62), INT8_C( -20), INT8_C( -20), INT8_C(-101), INT8_C( 116), INT8_C( 69), INT8_C( -70), INT8_C(-118), INT8_C( 36), INT8_C( 57), INT8_C(-112), INT8_C( 68), INT8_C( -70), INT8_C( -32), INT8_C( -51), INT8_C( -85), INT8_C( -23), INT8_C( -24), INT8_C(-114), INT8_C( 83), INT8_C( 42), INT8_C( -47), INT8_C( 19), INT8_C( -80), INT8_C( -3), INT8_C( 104), INT8_C( -92), INT8_C( 94), INT8_C( -39), INT8_C( 98), INT8_C( -72), INT8_C( -77), INT8_C( 126), INT8_C( -61), INT8_C( -53), INT8_C( 28), INT8_C( 100), INT8_C( 45), INT8_C( 84), INT8_C( -69), INT8_C( 13), INT8_C( -17), INT8_C( 113), INT8_C( 75), INT8_C( 94), INT8_C( 69), INT8_C( 68), INT8_C( 18), INT8_C(-102), INT8_C( 3), INT8_C(-119), INT8_C( -46), INT8_C( 39), INT8_C(-112), INT8_C(-105), INT8_C(-123), INT8_C( 75), INT8_C( -48), INT8_C( -24), INT8_C( -24), INT8_C( 75), INT8_C( -3), INT8_C( -99), INT8_C( 105)) }, { simde_mm512_set_epi8(INT8_C( -17), INT8_C( -70), INT8_C( -8), INT8_C( -6), INT8_C( -25), INT8_C( 67), INT8_C(-120), INT8_C( 82), INT8_C( 90), INT8_C( -2), INT8_C( 90), INT8_C( 114), INT8_C( -38), INT8_C( -75), INT8_C( 88), INT8_C(-108), INT8_C( -25), INT8_C(-124), INT8_C( -83), INT8_C( -55), INT8_C( -40), INT8_C( 90), INT8_C( 105), INT8_C( 22), INT8_C( 103), INT8_C( 26), INT8_C( -91), INT8_C( 114), INT8_C( 98), INT8_C( 100), INT8_C( 66), INT8_C( 64), INT8_C( -59), INT8_C( -34), INT8_C( 81), INT8_C( -52), INT8_C( -90), INT8_C( 63), INT8_C(-114), INT8_C( 90), INT8_C( 11), INT8_C( 25), INT8_C( 93), INT8_C( -33), INT8_C( 76), INT8_C( -7), INT8_C( -19), INT8_C( 23), INT8_C( -78), INT8_C( 66), INT8_C( 45), INT8_C( -6), INT8_C( -98), INT8_C( -55), INT8_C( 30), INT8_C( -34), INT8_C( 89), INT8_C( -71), INT8_C( -37), INT8_C( -43), INT8_C( -19), INT8_C( -25), INT8_C( -17), INT8_C( -4)), UINT64_C( 2599823307644986375), simde_mm512_set_epi8(INT8_C( -99), INT8_C( 38), INT8_C( 18), INT8_C(-109), INT8_C( 29), INT8_C( -64), INT8_C( 95), INT8_C( 126), INT8_C( 95), INT8_C( 35), INT8_C( 71), INT8_C( 92), INT8_C( 50), INT8_C( -1), INT8_C( -76), INT8_C( 25), INT8_C( 42), INT8_C( 60), INT8_C( -11), INT8_C( -16), INT8_C(-122), INT8_C( 72), INT8_C( 102), INT8_C( 38), INT8_C( 86), INT8_C( 14), INT8_C( 82), INT8_C( -45), INT8_C(-108), INT8_C(-114), INT8_C( 112), INT8_C(-118), INT8_C( 4), INT8_C( -43), INT8_C( 38), INT8_C( -4), INT8_C( 78), INT8_C( 122), INT8_C( 16), INT8_C( -58), INT8_C( -65), INT8_C(-101), INT8_C( -18), INT8_C( 14), INT8_C( 29), INT8_C( -4), INT8_C( 93), INT8_C( 106), INT8_C(-112), INT8_C( 63), INT8_C( -1), INT8_C( 100), INT8_C( 79), INT8_C( 72), INT8_C( -17), INT8_C( 88), INT8_C( -3), INT8_C( -83), INT8_C( -17), INT8_C( 122), INT8_C( 121), INT8_C( -91), INT8_C( 5), INT8_C( 38)), simde_mm512_set_epi8(INT8_C( -22), INT8_C(-124), INT8_C( -17), INT8_C( -97), INT8_C( 33), INT8_C( 121), INT8_C(-115), INT8_C( 31), INT8_C( 21), INT8_C( -5), INT8_C( 49), INT8_C( 61), INT8_C( -59), INT8_C( -39), INT8_C( 80), INT8_C( 122), INT8_C( -84), INT8_C( -34), INT8_C(-110), INT8_C(-125), INT8_C( 33), INT8_C( -5), INT8_C(-115), INT8_C( 79), INT8_C( 18), INT8_C( 37), INT8_C( 104), INT8_C( -81), INT8_C( 38), INT8_C( -46), INT8_C( 97), INT8_C( -2), INT8_C( 71), INT8_C( 69), INT8_C( -82), INT8_C( 125), INT8_C(-121), INT8_C( 79), INT8_C( 33), INT8_C(-125), INT8_C(-101), INT8_C( 49), INT8_C( 13), INT8_C( -58), INT8_C( -47), INT8_C( -76), INT8_C( -78), INT8_C( 97), INT8_C( 116), INT8_C( -3), INT8_C( 89), INT8_C( -19), INT8_C( 76), INT8_C( 18), INT8_C( -76), INT8_C( -28), INT8_C( 61), INT8_C(-124), INT8_C( 120), INT8_C(-125), INT8_C( 58), INT8_C( 0), INT8_C( 85), INT8_C( 8)), simde_mm512_set_epi8(INT8_C( -17), INT8_C( -70), INT8_C( 13), INT8_C( -6), INT8_C( -25), INT8_C( 66), INT8_C(-120), INT8_C( 82), INT8_C( 90), INT8_C( -2), INT8_C( 90), INT8_C(-122), INT8_C( -38), INT8_C(-121), INT8_C( 88), INT8_C(-108), INT8_C( -25), INT8_C( 31), INT8_C( -55), INT8_C( -55), INT8_C( 42), INT8_C( 105), INT8_C( 105), INT8_C( 116), INT8_C( 78), INT8_C( 26), INT8_C( -91), INT8_C( 114), INT8_C( 98), INT8_C( 47), INT8_C( -77), INT8_C( 64), INT8_C( -59), INT8_C( -34), INT8_C( 81), INT8_C( -64), INT8_C( -91), INT8_C( 63), INT8_C( 32), INT8_C( 90), INT8_C( 55), INT8_C( -7), INT8_C( 75), INT8_C( 72), INT8_C( 76), INT8_C( 113), INT8_C( -19), INT8_C( 30), INT8_C(-123), INT8_C( 111), INT8_C( 58), INT8_C( -6), INT8_C( -98), INT8_C( -87), INT8_C( 30), INT8_C( -34), INT8_C( 89), INT8_C( -71), INT8_C( -37), INT8_C( -43), INT8_C( -19), INT8_C( 61), INT8_C( 34), INT8_C( 102)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_gf2p8affine_epi64_epi8(test_vec[i].src, test_vec[i].k, test_vec[i].x, test_vec[i].A, INT8_C( 40)); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_maskz_gf2p8affine_epi64_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m128i x; simde__m128i A; simde__m128i r; } test_vec[8] = { { UINT16_C(28514), simde_mm_set_epi8(INT8_C( -8), INT8_C( 96), INT8_C( 18), INT8_C( 58), INT8_C( 122), INT8_C( 7), INT8_C( 79), INT8_C(-100), INT8_C( -48), INT8_C( 91), INT8_C( -3), INT8_C(-127), INT8_C( 14), INT8_C(-111), INT8_C( 27), INT8_C( -76)), simde_mm_set_epi8(INT8_C(-117), INT8_C( -3), INT8_C( 125), INT8_C(-126), INT8_C( 35), INT8_C( 127), INT8_C( 14), INT8_C( 124), INT8_C( -32), INT8_C( 64), INT8_C( -6), INT8_C( 5), INT8_C( -90), INT8_C( 20), INT8_C( -31), INT8_C( 65)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 96), INT8_C( -81), INT8_C( 0), INT8_C( 88), INT8_C( -40), INT8_C(-103), INT8_C( -36), INT8_C( 0), INT8_C( 75), INT8_C( 103), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-116), INT8_C( 0)) }, { UINT16_C( 6767), simde_mm_set_epi8(INT8_C( 126), INT8_C( 73), INT8_C( -25), INT8_C( 25), INT8_C( -85), INT8_C( 30), INT8_C( 88), INT8_C( 126), INT8_C( -49), INT8_C( 27), INT8_C( -87), INT8_C( -78), INT8_C( 121), INT8_C( -77), INT8_C( 64), INT8_C(-105)), simde_mm_set_epi8(INT8_C( 117), INT8_C( 88), INT8_C( -38), INT8_C( -50), INT8_C( -20), INT8_C( -14), INT8_C( 13), INT8_C(-126), INT8_C( 54), INT8_C( 27), INT8_C( -19), INT8_C( 2), INT8_C( -62), INT8_C( -13), INT8_C( -21), INT8_C( 121)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 72), INT8_C( 78), INT8_C( 0), INT8_C( 54), INT8_C( 0), INT8_C( 0), INT8_C(-120), INT8_C( -63), INT8_C( 0), INT8_C( -30), INT8_C( -33), INT8_C(-124), INT8_C( 63)) }, { UINT16_C(22163), simde_mm_set_epi8(INT8_C( 7), INT8_C(-121), INT8_C( 35), INT8_C( -53), INT8_C( 31), INT8_C( -93), INT8_C( 71), INT8_C( -88), INT8_C( 118), INT8_C( 62), INT8_C( 61), INT8_C( 63), INT8_C( 14), INT8_C( 14), INT8_C( 31), INT8_C( 112)), simde_mm_set_epi8(INT8_C( 95), INT8_C( 63), INT8_C(-121), INT8_C( 59), INT8_C( -53), INT8_C( -35), INT8_C( 37), INT8_C( -31), INT8_C( -96), INT8_C( 81), INT8_C( 92), INT8_C( -51), INT8_C( 48), INT8_C( -43), INT8_C( 1), INT8_C( -58)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 67), INT8_C( 0), INT8_C( -82), INT8_C( 0), INT8_C( -18), INT8_C( 70), INT8_C( 0), INT8_C( -43), INT8_C( 0), INT8_C( 0), INT8_C( 29), INT8_C( 0), INT8_C( 0), INT8_C( 12), INT8_C( -7)) }, { UINT16_C(17293), simde_mm_set_epi8(INT8_C( 43), INT8_C( -29), INT8_C( -33), INT8_C(-101), INT8_C( -92), INT8_C( -95), INT8_C( 82), INT8_C( 35), INT8_C( 3), INT8_C( -99), INT8_C( 5), INT8_C( 91), INT8_C( 76), INT8_C( -65), INT8_C( -10), INT8_C( -12)), simde_mm_set_epi8(INT8_C( 56), INT8_C( 110), INT8_C( 34), INT8_C( 85), INT8_C( 25), INT8_C( -71), INT8_C(-127), INT8_C( 0), INT8_C( 99), INT8_C( 28), INT8_C( 1), INT8_C( -35), INT8_C( -39), INT8_C( -86), INT8_C( -44), INT8_C( -63)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 67), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 69), INT8_C( 41), INT8_C( -52), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -39), INT8_C( 63), INT8_C( 0), INT8_C( 96)) }, { UINT16_C(60810), simde_mm_set_epi8(INT8_C(-115), INT8_C( 123), INT8_C( -93), INT8_C( 59), INT8_C( 66), INT8_C( 53), INT8_C( -76), INT8_C( 34), INT8_C( 80), INT8_C( 112), INT8_C( -24), INT8_C( -52), INT8_C( 68), INT8_C( 70), INT8_C( 6), INT8_C( 61)), simde_mm_set_epi8(INT8_C( -92), INT8_C(-121), INT8_C( 78), INT8_C( 54), INT8_C( -41), INT8_C( 118), INT8_C( -81), INT8_C( 86), INT8_C(-103), INT8_C( 105), INT8_C(-114), INT8_C(-114), INT8_C( -29), INT8_C(-127), INT8_C( 122), INT8_C( 87)), simde_mm_set_epi8(INT8_C( -54), INT8_C( -3), INT8_C( -26), INT8_C( 0), INT8_C( 58), INT8_C( 12), INT8_C( 0), INT8_C( -25), INT8_C( 99), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 46), INT8_C( 0), INT8_C( 32), INT8_C( 0)) }, { UINT16_C(11150), simde_mm_set_epi8(INT8_C( 29), INT8_C( -45), INT8_C( -10), INT8_C( 88), INT8_C(-127), INT8_C( 63), INT8_C( -37), INT8_C( 20), INT8_C( -46), INT8_C( 91), INT8_C( -7), INT8_C( 17), INT8_C(-101), INT8_C( -78), INT8_C(-112), INT8_C( -57)), simde_mm_set_epi8(INT8_C( -1), INT8_C( 108), INT8_C( 6), INT8_C( 104), INT8_C( -14), INT8_C( 32), INT8_C( 24), INT8_C( 11), INT8_C( 24), INT8_C( 56), INT8_C(-122), INT8_C( 6), INT8_C( 113), INT8_C( -67), INT8_C(-122), INT8_C( -34)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C(-126), INT8_C( 0), INT8_C( -32), INT8_C( 0), INT8_C( -12), INT8_C( 38), INT8_C( 123), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 120), INT8_C( -39), INT8_C( 39), INT8_C( 0)) }, { UINT16_C(19041), simde_mm_set_epi8(INT8_C( 121), INT8_C( -69), INT8_C( -17), INT8_C( 9), INT8_C( -15), INT8_C( -58), INT8_C(-102), INT8_C( -34), INT8_C( -5), INT8_C( 28), INT8_C( 80), INT8_C( -95), INT8_C( -19), INT8_C( 42), INT8_C( 52), INT8_C( -49)), simde_mm_set_epi8(INT8_C( -28), INT8_C( -43), INT8_C( -70), INT8_C( -76), INT8_C( -94), INT8_C(-108), INT8_C( -89), INT8_C( -53), INT8_C( 93), INT8_C( 113), INT8_C( -28), INT8_C( 100), INT8_C( -99), INT8_C(-106), INT8_C(-107), INT8_C( -54)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 110), INT8_C( 0), INT8_C( 0), INT8_C( -67), INT8_C( 0), INT8_C( -15), INT8_C( 0), INT8_C( 0), INT8_C( -17), INT8_C(-116), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 20)) }, { UINT16_C(64646), simde_mm_set_epi8(INT8_C( -52), INT8_C(-101), INT8_C( 102), INT8_C(-127), INT8_C( -21), INT8_C( 125), INT8_C( 50), INT8_C( -75), INT8_C( 86), INT8_C( 40), INT8_C( -12), INT8_C( 70), INT8_C( -13), INT8_C( -31), INT8_C(-109), INT8_C( 62)), simde_mm_set_epi8(INT8_C( 114), INT8_C( 64), INT8_C( -41), INT8_C(-112), INT8_C( -98), INT8_C( -51), INT8_C( 83), INT8_C( 93), INT8_C( 63), INT8_C( -30), INT8_C( -83), INT8_C( 98), INT8_C( 110), INT8_C( -3), INT8_C( 105), INT8_C( -23)), simde_mm_set_epi8(INT8_C( -81), INT8_C(-112), INT8_C( 119), INT8_C( -88), INT8_C( -85), INT8_C( -85), INT8_C( 0), INT8_C( 0), INT8_C(-123), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 54), INT8_C( 9), INT8_C( 0)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_maskz_gf2p8affine_epi64_epi8(test_vec[i].k, test_vec[i].x, test_vec[i].A, INT8_C( 112)); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_maskz_gf2p8affine_epi64_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask32 k; simde__m256i x; simde__m256i A; simde__m256i r; } test_vec[8] = { { UINT32_C(2667081570), simde_mm256_set_epi8(INT8_C(-117), INT8_C( -3), INT8_C( 125), INT8_C(-126), INT8_C( 35), INT8_C( 127), INT8_C( 14), INT8_C( 124), INT8_C( -32), INT8_C( 64), INT8_C( -6), INT8_C( 5), INT8_C( -90), INT8_C( 20), INT8_C( -31), INT8_C( 65), INT8_C( -8), INT8_C( 96), INT8_C( 18), INT8_C( 58), INT8_C( 122), INT8_C( 7), INT8_C( 79), INT8_C(-100), INT8_C( -48), INT8_C( 91), INT8_C( -3), INT8_C(-127), INT8_C( 14), INT8_C(-111), INT8_C( 27), INT8_C( -76)), simde_mm256_set_epi8(INT8_C( 54), INT8_C( 27), INT8_C( -19), INT8_C( 2), INT8_C( -62), INT8_C( -13), INT8_C( -21), INT8_C( 121), INT8_C( 126), INT8_C( 73), INT8_C( -25), INT8_C( 25), INT8_C( -85), INT8_C( 30), INT8_C( 88), INT8_C( 126), INT8_C( -49), INT8_C( 27), INT8_C( -87), INT8_C( -78), INT8_C( 121), INT8_C( -77), INT8_C( 64), INT8_C(-105), INT8_C( -59), INT8_C( 68), INT8_C( 37), INT8_C( 123), INT8_C( -35), INT8_C(-104), INT8_C( 26), INT8_C( 111)), simde_mm256_set_epi8(INT8_C( -10), INT8_C( 0), INT8_C( 0), INT8_C( -42), INT8_C( -95), INT8_C( 53), INT8_C( 97), INT8_C( 0), INT8_C( -97), INT8_C( 30), INT8_C( 56), INT8_C( 98), INT8_C( 72), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -76), INT8_C( -56), INT8_C( 0), INT8_C( -78), INT8_C( 68), INT8_C( 2), INT8_C( 88), INT8_C( 0), INT8_C(-105), INT8_C( 117), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 12), INT8_C( 0)) }, { UINT32_C(3975286146), simde_mm256_set_epi8(INT8_C( -96), INT8_C( 81), INT8_C( 92), INT8_C( -51), INT8_C( 48), INT8_C( -43), INT8_C( 1), INT8_C( -58), INT8_C( 7), INT8_C(-121), INT8_C( 35), INT8_C( -53), INT8_C( 31), INT8_C( -93), INT8_C( 71), INT8_C( -88), INT8_C( 118), INT8_C( 62), INT8_C( 61), INT8_C( 63), INT8_C( 14), INT8_C( 14), INT8_C( 31), INT8_C( 112), INT8_C(-116), INT8_C( -66), INT8_C( 120), INT8_C( 120), INT8_C( 41), INT8_C( 92), INT8_C( 86), INT8_C(-109)), simde_mm256_set_epi8(INT8_C( 43), INT8_C( -29), INT8_C( -33), INT8_C(-101), INT8_C( -92), INT8_C( -95), INT8_C( 82), INT8_C( 35), INT8_C( 3), INT8_C( -99), INT8_C( 5), INT8_C( 91), INT8_C( 76), INT8_C( -65), INT8_C( -10), INT8_C( -12), INT8_C( -73), INT8_C( 117), INT8_C(-114), INT8_C( -69), INT8_C(-117), INT8_C( -76), INT8_C( 67), INT8_C(-115), INT8_C( 95), INT8_C( 63), INT8_C(-121), INT8_C( 59), INT8_C( -53), INT8_C( -35), INT8_C( 37), INT8_C( -31)), simde_mm256_set_epi8(INT8_C( 84), INT8_C( 124), INT8_C( -54), INT8_C( 0), INT8_C( 38), INT8_C( 86), INT8_C( 0), INT8_C( 0), INT8_C( 105), INT8_C(-117), INT8_C( 127), INT8_C( -97), INT8_C( 0), INT8_C( 0), INT8_C( -79), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -65), INT8_C( -65), INT8_C( 0), INT8_C(-101), INT8_C( 49), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 59), INT8_C( 0)) }, { UINT32_C(3651851457), simde_mm256_set_epi8(INT8_C(-115), INT8_C( 123), INT8_C( -93), INT8_C( 59), INT8_C( 66), INT8_C( 53), INT8_C( -76), INT8_C( 34), INT8_C( 80), INT8_C( 112), INT8_C( -24), INT8_C( -52), INT8_C( 68), INT8_C( 70), INT8_C( 6), INT8_C( 61), INT8_C(-102), INT8_C( -31), INT8_C( -61), INT8_C( -1), INT8_C( 87), INT8_C( 86), INT8_C( -19), INT8_C(-118), INT8_C( 56), INT8_C( 110), INT8_C( 34), INT8_C( 85), INT8_C( 25), INT8_C( -71), INT8_C(-127), INT8_C( 0)), simde_mm256_set_epi8(INT8_C( -46), INT8_C( 91), INT8_C( -7), INT8_C( 17), INT8_C(-101), INT8_C( -78), INT8_C(-112), INT8_C( -57), INT8_C( -66), INT8_C( -70), INT8_C(-120), INT8_C( 26), INT8_C( -56), INT8_C(-100), INT8_C( 43), INT8_C(-114), INT8_C( -92), INT8_C(-121), INT8_C( 78), INT8_C( 54), INT8_C( -41), INT8_C( 118), INT8_C( -81), INT8_C( 86), INT8_C(-103), INT8_C( 105), INT8_C(-114), INT8_C(-114), INT8_C( -29), INT8_C(-127), INT8_C( 122), INT8_C( 87)), simde_mm256_set_epi8(INT8_C( 36), INT8_C( 62), INT8_C( 0), INT8_C( -71), INT8_C( -19), INT8_C( 0), INT8_C( 0), INT8_C( 78), INT8_C( -30), INT8_C( 0), INT8_C( -62), INT8_C( 0), INT8_C( 104), INT8_C( 0), INT8_C( -77), INT8_C( 0), INT8_C(-120), INT8_C( 5), INT8_C( 0), INT8_C( -8), INT8_C( 0), INT8_C( -44), INT8_C( 0), INT8_C( 0), INT8_C( 5), INT8_C( 70), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -39)) }, { UINT32_C(2168445716), simde_mm256_set_epi8(INT8_C( -5), INT8_C( 28), INT8_C( 80), INT8_C( -95), INT8_C( -19), INT8_C( 42), INT8_C( 52), INT8_C( -49), INT8_C( -78), INT8_C( 121), INT8_C( 126), INT8_C( 119), INT8_C( 15), INT8_C( 89), INT8_C( 74), INT8_C( 97), INT8_C( -1), INT8_C( 108), INT8_C( 6), INT8_C( 104), INT8_C( -14), INT8_C( 32), INT8_C( 24), INT8_C( 11), INT8_C( 24), INT8_C( 56), INT8_C(-122), INT8_C( 6), INT8_C( 113), INT8_C( -67), INT8_C(-122), INT8_C( -34)), simde_mm256_set_epi8(INT8_C( -49), INT8_C( 54), INT8_C(-125), INT8_C( 111), INT8_C( 64), INT8_C( -66), INT8_C( -4), INT8_C(-122), INT8_C( -28), INT8_C( -43), INT8_C( -70), INT8_C( -76), INT8_C( -94), INT8_C(-108), INT8_C( -89), INT8_C( -53), INT8_C( 93), INT8_C( 113), INT8_C( -28), INT8_C( 100), INT8_C( -99), INT8_C(-106), INT8_C(-107), INT8_C( -54), INT8_C( 121), INT8_C( -69), INT8_C( -17), INT8_C( 9), INT8_C( -15), INT8_C( -58), INT8_C(-102), INT8_C( -34)), simde_mm256_set_epi8(INT8_C( -90), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 69), INT8_C( 0), INT8_C( 0), INT8_C( 18), INT8_C( 84), INT8_C( 32), INT8_C( 50), INT8_C( 10), INT8_C( -59), INT8_C( -64), INT8_C( -76), INT8_C( 0), INT8_C( -55), INT8_C( 127), INT8_C( 0), INT8_C( 59), INT8_C( -69), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-101), INT8_C( 0), INT8_C( -97), INT8_C( 0), INT8_C( 0)) }, { UINT32_C(4091646782), simde_mm256_set_epi8(INT8_C( 107), INT8_C( 124), INT8_C( -95), INT8_C( 119), INT8_C( -70), INT8_C( 31), INT8_C(-107), INT8_C( 56), INT8_C( 114), INT8_C( 64), INT8_C( -41), INT8_C(-112), INT8_C( -98), INT8_C( -51), INT8_C( 83), INT8_C( 93), INT8_C( 63), INT8_C( -30), INT8_C( -83), INT8_C( 98), INT8_C( 110), INT8_C( -3), INT8_C( 105), INT8_C( -23), INT8_C( -52), INT8_C(-101), INT8_C( 102), INT8_C(-127), INT8_C( -21), INT8_C( 125), INT8_C( 50), INT8_C( -75)), simde_mm256_set_epi8(INT8_C( -33), INT8_C(-115), INT8_C( -46), INT8_C( 86), INT8_C( -86), INT8_C( -28), INT8_C( -72), INT8_C( 4), INT8_C( -56), INT8_C( -54), INT8_C( -60), INT8_C( 85), INT8_C( 73), INT8_C( 2), INT8_C( -40), INT8_C( -46), INT8_C( -45), INT8_C( 89), INT8_C( -24), INT8_C( 92), INT8_C( 7), INT8_C( 49), INT8_C( 106), INT8_C( -69), INT8_C( -61), INT8_C( -44), INT8_C( 92), INT8_C( 28), INT8_C( 21), INT8_C( -36), INT8_C( -96), INT8_C( 50)), simde_mm256_set_epi8(INT8_C( -55), INT8_C( 49), INT8_C( -35), INT8_C( 124), INT8_C( 0), INT8_C( 0), INT8_C( 75), INT8_C( -73), INT8_C( 108), INT8_C( 6), INT8_C( -65), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-119), INT8_C( 34), INT8_C( 0), INT8_C( 0), INT8_C( -93), INT8_C( 0), INT8_C( 0), INT8_C( 15), INT8_C(-118), INT8_C( 0), INT8_C( 0), INT8_C(-127), INT8_C( -85), INT8_C( -31), INT8_C(-125), INT8_C( 38), INT8_C( 0)) }, { UINT32_C( 484348831), simde_mm256_set_epi8(INT8_C( 103), INT8_C(-123), INT8_C( -94), INT8_C( -49), INT8_C(-110), INT8_C(-117), INT8_C( -20), INT8_C( -24), INT8_C(-118), INT8_C(-114), INT8_C( 58), INT8_C( 97), INT8_C(-121), INT8_C(-105), INT8_C( 15), INT8_C( 90), INT8_C( 56), INT8_C( -93), INT8_C( -6), INT8_C( -16), INT8_C( 110), INT8_C( 3), INT8_C( -8), INT8_C( -93), INT8_C( 96), INT8_C(-124), INT8_C( -43), INT8_C( 119), INT8_C( -95), INT8_C( 114), INT8_C(-109), INT8_C( -39)), simde_mm256_set_epi8(INT8_C( 95), INT8_C( -75), INT8_C( -52), INT8_C( 24), INT8_C( -86), INT8_C( 91), INT8_C( -23), INT8_C( -49), INT8_C( 111), INT8_C( -77), INT8_C( 6), INT8_C( 42), INT8_C( 120), INT8_C( -42), INT8_C( 70), INT8_C( 14), INT8_C( 84), INT8_C( 113), INT8_C( -38), INT8_C( 102), INT8_C( -38), INT8_C(-125), INT8_C( 39), INT8_C( 72), INT8_C( -14), INT8_C( 109), INT8_C( 110), INT8_C( 60), INT8_C( -93), INT8_C( 6), INT8_C( 127), INT8_C( 94)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -62), INT8_C(-107), INT8_C( -96), INT8_C( 0), INT8_C( 0), INT8_C(-115), INT8_C( 104), INT8_C( 0), INT8_C( -80), INT8_C( -14), INT8_C( -64), INT8_C( 73), INT8_C( 0), INT8_C( 16), INT8_C( 0), INT8_C( 0), INT8_C( 47), INT8_C( 0), INT8_C( 0), INT8_C( -69), INT8_C( -71), INT8_C( 65), INT8_C( 0), INT8_C( 0), INT8_C( -63), INT8_C( -59), INT8_C( 125), INT8_C( -90), INT8_C( 90)) }, { UINT32_C(1399212686), simde_mm256_set_epi8(INT8_C( 41), INT8_C( -37), INT8_C(-117), INT8_C( 95), INT8_C( -14), INT8_C(-122), INT8_C( -91), INT8_C( -89), INT8_C( -40), INT8_C( 17), INT8_C( -71), INT8_C( 29), INT8_C(-114), INT8_C( 39), INT8_C( -61), INT8_C(-105), INT8_C( -8), INT8_C( -35), INT8_C( 35), INT8_C( 20), INT8_C( 59), INT8_C( -64), INT8_C( 20), INT8_C( -16), INT8_C(-106), INT8_C( -49), INT8_C( -6), INT8_C( 106), INT8_C( -37), INT8_C( 21), INT8_C( -6), INT8_C( -23)), simde_mm256_set_epi8(INT8_C( 63), INT8_C( -90), INT8_C( 32), INT8_C( -78), INT8_C( -95), INT8_C( -2), INT8_C(-119), INT8_C( 67), INT8_C(-107), INT8_C( -81), INT8_C( 37), INT8_C( 116), INT8_C( -57), INT8_C( 58), INT8_C(-116), INT8_C( 117), INT8_C( 93), INT8_C( 117), INT8_C(-121), INT8_C( -82), INT8_C( -77), INT8_C(-126), INT8_C( -76), INT8_C( -31), INT8_C(-112), INT8_C(-125), INT8_C( -27), INT8_C( -87), INT8_C( -3), INT8_C( -45), INT8_C( 106), INT8_C( 28)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 49), INT8_C( 0), INT8_C( 104), INT8_C( 0), INT8_C( 0), INT8_C( 110), INT8_C( -59), INT8_C( 0), INT8_C( -25), INT8_C( 120), INT8_C( 0), INT8_C( 0), INT8_C( 13), INT8_C( -73), INT8_C( 0), INT8_C( 0), INT8_C( 36), INT8_C( 0), INT8_C( 0), INT8_C( -14), INT8_C( 0), INT8_C( -59), INT8_C( 0), INT8_C( -95), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -89), INT8_C( -62), INT8_C( -59), INT8_C( 0)) }, { UINT32_C(2838825552), simde_mm256_set_epi8(INT8_C( 16), INT8_C( -91), INT8_C( 46), INT8_C( -50), INT8_C( 114), INT8_C( -52), INT8_C( 74), INT8_C( -49), INT8_C( 12), INT8_C( 87), INT8_C( 122), INT8_C( 68), INT8_C( 75), INT8_C( 81), INT8_C( -37), INT8_C( -8), INT8_C( -53), INT8_C( 104), INT8_C( 84), INT8_C(-122), INT8_C( -31), INT8_C( -86), INT8_C( -25), INT8_C( 110), INT8_C( -6), INT8_C( 33), INT8_C( -77), INT8_C( -84), INT8_C( 8), INT8_C( 51), INT8_C(-125), INT8_C( -60)), simde_mm256_set_epi8(INT8_C( 104), INT8_C( -53), INT8_C( 118), INT8_C( 34), INT8_C( 19), INT8_C( -6), INT8_C(-103), INT8_C( 93), INT8_C( 101), INT8_C( 100), INT8_C( -55), INT8_C( 108), INT8_C( 6), INT8_C( -85), INT8_C( 4), INT8_C( -17), INT8_C(-116), INT8_C( 108), INT8_C( 39), INT8_C( 48), INT8_C( -56), INT8_C( -50), INT8_C( -30), INT8_C( -76), INT8_C( -28), INT8_C(-125), INT8_C( 119), INT8_C(-118), INT8_C( 52), INT8_C( 56), INT8_C( -74), INT8_C( 39)), simde_mm256_set_epi8(INT8_C( 45), INT8_C( 0), INT8_C( -83), INT8_C( 0), INT8_C(-103), INT8_C( 0), INT8_C( 0), INT8_C(-105), INT8_C( 0), INT8_C( 0), INT8_C( -31), INT8_C(-115), INT8_C( 0), INT8_C( -13), INT8_C( 0), INT8_C( -11), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-112), INT8_C( 0), INT8_C( 83), INT8_C( 0), INT8_C( 0), INT8_C( -86), INT8_C( 0), INT8_C(-102), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_maskz_gf2p8affine_epi64_epi8(test_vec[i].k, test_vec[i].x, test_vec[i].A, 217); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_gf2p8affine_epi64_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask64 k; simde__m512i x; simde__m512i A; simde__m512i r; } test_vec[8] = { { UINT64_C( 2216089611417448290), simde_mm512_set_epi8(INT8_C( 54), INT8_C( 27), INT8_C( -19), INT8_C( 2), INT8_C( -62), INT8_C( -13), INT8_C( -21), INT8_C( 121), INT8_C( 126), INT8_C( 73), INT8_C( -25), INT8_C( 25), INT8_C( -85), INT8_C( 30), INT8_C( 88), INT8_C( 126), INT8_C( -49), INT8_C( 27), INT8_C( -87), INT8_C( -78), INT8_C( 121), INT8_C( -77), INT8_C( 64), INT8_C(-105), INT8_C( -59), INT8_C( 68), INT8_C( 37), INT8_C( 123), INT8_C( -35), INT8_C(-104), INT8_C( 26), INT8_C( 111), INT8_C(-117), INT8_C( -3), INT8_C( 125), INT8_C(-126), INT8_C( 35), INT8_C( 127), INT8_C( 14), INT8_C( 124), INT8_C( -32), INT8_C( 64), INT8_C( -6), INT8_C( 5), INT8_C( -90), INT8_C( 20), INT8_C( -31), INT8_C( 65), INT8_C( -8), INT8_C( 96), INT8_C( 18), INT8_C( 58), INT8_C( 122), INT8_C( 7), INT8_C( 79), INT8_C(-100), INT8_C( -48), INT8_C( 91), INT8_C( -3), INT8_C(-127), INT8_C( 14), INT8_C(-111), INT8_C( 27), INT8_C( -76)), simde_mm512_set_epi8(INT8_C( 3), INT8_C( -99), INT8_C( 5), INT8_C( 91), INT8_C( 76), INT8_C( -65), INT8_C( -10), INT8_C( -12), INT8_C( -73), INT8_C( 117), INT8_C(-114), INT8_C( -69), INT8_C(-117), INT8_C( -76), INT8_C( 67), INT8_C(-115), INT8_C( 95), INT8_C( 63), INT8_C(-121), INT8_C( 59), INT8_C( -53), INT8_C( -35), INT8_C( 37), INT8_C( -31), INT8_C( -96), INT8_C( 81), INT8_C( 92), INT8_C( -51), INT8_C( 48), INT8_C( -43), INT8_C( 1), INT8_C( -58), INT8_C( 7), INT8_C(-121), INT8_C( 35), INT8_C( -53), INT8_C( 31), INT8_C( -93), INT8_C( 71), INT8_C( -88), INT8_C( 118), INT8_C( 62), INT8_C( 61), INT8_C( 63), INT8_C( 14), INT8_C( 14), INT8_C( 31), INT8_C( 112), INT8_C(-116), INT8_C( -66), INT8_C( 120), INT8_C( 120), INT8_C( 41), INT8_C( 92), INT8_C( 86), INT8_C(-109), INT8_C( 117), INT8_C( 88), INT8_C( -38), INT8_C( -50), INT8_C( -20), INT8_C( -14), INT8_C( 13), INT8_C(-126)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -42), INT8_C( -20), INT8_C( -55), INT8_C( 25), INT8_C( 0), INT8_C(-101), INT8_C( -70), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-101), INT8_C( 0), INT8_C( 0), INT8_C( 5), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -89), INT8_C( 126), INT8_C( -67), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 5), INT8_C( 0), INT8_C(-115), INT8_C( 0), INT8_C( 0), INT8_C( 106), INT8_C( 27), INT8_C(-120), INT8_C( 11), INT8_C( 0), INT8_C( -79), INT8_C( 62), INT8_C( 123), INT8_C(-116), INT8_C( 52), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -51), INT8_C(-109), INT8_C( 0), INT8_C( -34), INT8_C(-114), INT8_C( -35), INT8_C(-114), INT8_C( 0), INT8_C( 20), INT8_C( 108), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 43), INT8_C( 0)) }, { UINT64_C( 3162617222919180835), simde_mm512_set_epi8(INT8_C( -66), INT8_C( -70), INT8_C(-120), INT8_C( 26), INT8_C( -56), INT8_C(-100), INT8_C( 43), INT8_C(-114), INT8_C( -92), INT8_C(-121), INT8_C( 78), INT8_C( 54), INT8_C( -41), INT8_C( 118), INT8_C( -81), INT8_C( 86), INT8_C(-103), INT8_C( 105), INT8_C(-114), INT8_C(-114), INT8_C( -29), INT8_C(-127), INT8_C( 122), INT8_C( 87), INT8_C(-115), INT8_C( 123), INT8_C( -93), INT8_C( 59), INT8_C( 66), INT8_C( 53), INT8_C( -76), INT8_C( 34), INT8_C( 80), INT8_C( 112), INT8_C( -24), INT8_C( -52), INT8_C( 68), INT8_C( 70), INT8_C( 6), INT8_C( 61), INT8_C(-102), INT8_C( -31), INT8_C( -61), INT8_C( -1), INT8_C( 87), INT8_C( 86), INT8_C( -19), INT8_C(-118), INT8_C( 56), INT8_C( 110), INT8_C( 34), INT8_C( 85), INT8_C( 25), INT8_C( -71), INT8_C(-127), INT8_C( 0), INT8_C( 99), INT8_C( 28), INT8_C( 1), INT8_C( -35), INT8_C( -39), INT8_C( -86), INT8_C( -44), INT8_C( -63)), simde_mm512_set_epi8(INT8_C( 93), INT8_C( 113), INT8_C( -28), INT8_C( 100), INT8_C( -99), INT8_C(-106), INT8_C(-107), INT8_C( -54), INT8_C( 121), INT8_C( -69), INT8_C( -17), INT8_C( 9), INT8_C( -15), INT8_C( -58), INT8_C(-102), INT8_C( -34), INT8_C( -5), INT8_C( 28), INT8_C( 80), INT8_C( -95), INT8_C( -19), INT8_C( 42), INT8_C( 52), INT8_C( -49), INT8_C( -78), INT8_C( 121), INT8_C( 126), INT8_C( 119), INT8_C( 15), INT8_C( 89), INT8_C( 74), INT8_C( 97), INT8_C( -1), INT8_C( 108), INT8_C( 6), INT8_C( 104), INT8_C( -14), INT8_C( 32), INT8_C( 24), INT8_C( 11), INT8_C( 24), INT8_C( 56), INT8_C(-122), INT8_C( 6), INT8_C( 113), INT8_C( -67), INT8_C(-122), INT8_C( -34), INT8_C( 29), INT8_C( -45), INT8_C( -10), INT8_C( 88), INT8_C(-127), INT8_C( 63), INT8_C( -37), INT8_C( 20), INT8_C( -46), INT8_C( 91), INT8_C( -7), INT8_C( 17), INT8_C(-101), INT8_C( -78), INT8_C(-112), INT8_C( -57)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( -38), INT8_C( 0), INT8_C( 85), INT8_C( 0), INT8_C( -45), INT8_C( 7), INT8_C( -6), INT8_C( 20), INT8_C(-121), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -52), INT8_C(-101), INT8_C( 75), INT8_C( 121), INT8_C( 0), INT8_C( -26), INT8_C( -14), INT8_C( -65), INT8_C( 6), INT8_C(-121), INT8_C( 110), INT8_C( 0), INT8_C( 0), INT8_C(-114), INT8_C( 12), INT8_C( 0), INT8_C( 2), INT8_C( 109), INT8_C( -11), INT8_C( 0), INT8_C( 69), INT8_C( 0), INT8_C( 0), INT8_C( 54), INT8_C( 0), INT8_C( 0), INT8_C(-121), INT8_C( 0), INT8_C( 55), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 52), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( -56), INT8_C( 0), INT8_C( 0), INT8_C(-102), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 33), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 50), INT8_C( 83)) }, { UINT64_C(16489290895671470027), simde_mm512_set_epi8(INT8_C( -45), INT8_C( 89), INT8_C( -24), INT8_C( 92), INT8_C( 7), INT8_C( 49), INT8_C( 106), INT8_C( -69), INT8_C( -61), INT8_C( -44), INT8_C( 92), INT8_C( 28), INT8_C( 21), INT8_C( -36), INT8_C( -96), INT8_C( 50), INT8_C( 107), INT8_C( 124), INT8_C( -95), INT8_C( 119), INT8_C( -70), INT8_C( 31), INT8_C(-107), INT8_C( 56), INT8_C( 114), INT8_C( 64), INT8_C( -41), INT8_C(-112), INT8_C( -98), INT8_C( -51), INT8_C( 83), INT8_C( 93), INT8_C( 63), INT8_C( -30), INT8_C( -83), INT8_C( 98), INT8_C( 110), INT8_C( -3), INT8_C( 105), INT8_C( -23), INT8_C( -52), INT8_C(-101), INT8_C( 102), INT8_C(-127), INT8_C( -21), INT8_C( 125), INT8_C( 50), INT8_C( -75), INT8_C( 86), INT8_C( 40), INT8_C( -12), INT8_C( 70), INT8_C( -13), INT8_C( -31), INT8_C(-109), INT8_C( 62), INT8_C( -49), INT8_C( 54), INT8_C(-125), INT8_C( 111), INT8_C( 64), INT8_C( -66), INT8_C( -4), INT8_C(-122)), simde_mm512_set_epi8(INT8_C( -14), INT8_C( 109), INT8_C( 110), INT8_C( 60), INT8_C( -93), INT8_C( 6), INT8_C( 127), INT8_C( 94), INT8_C( 103), INT8_C(-123), INT8_C( -94), INT8_C( -49), INT8_C(-110), INT8_C(-117), INT8_C( -20), INT8_C( -24), INT8_C(-118), INT8_C(-114), INT8_C( 58), INT8_C( 97), INT8_C(-121), INT8_C(-105), INT8_C( 15), INT8_C( 90), INT8_C( 56), INT8_C( -93), INT8_C( -6), INT8_C( -16), INT8_C( 110), INT8_C( 3), INT8_C( -8), INT8_C( -93), INT8_C( 96), INT8_C(-124), INT8_C( -43), INT8_C( 119), INT8_C( -95), INT8_C( 114), INT8_C(-109), INT8_C( -39), INT8_C( -1), INT8_C( 101), INT8_C( 115), INT8_C( 83), INT8_C( 28), INT8_C( -34), INT8_C(-109), INT8_C( -97), INT8_C( -33), INT8_C(-115), INT8_C( -46), INT8_C( 86), INT8_C( -86), INT8_C( -28), INT8_C( -72), INT8_C( 4), INT8_C( -56), INT8_C( -54), INT8_C( -60), INT8_C( 85), INT8_C( 73), INT8_C( 2), INT8_C( -40), INT8_C( -46)), simde_mm512_set_epi8(INT8_C( 7), INT8_C( 45), INT8_C( -8), INT8_C( 0), INT8_C( 0), INT8_C( 123), INT8_C( 0), INT8_C( 0), INT8_C( -98), INT8_C( -45), INT8_C( 0), INT8_C( 12), INT8_C( 0), INT8_C( 59), INT8_C( 0), INT8_C( 87), INT8_C( 115), INT8_C( 0), INT8_C( -8), INT8_C( 98), INT8_C( 20), INT8_C( 0), INT8_C( 34), INT8_C( 0), INT8_C( -57), INT8_C( 0), INT8_C( 100), INT8_C( 60), INT8_C( 0), INT8_C( -54), INT8_C( 0), INT8_C( 0), INT8_C( 80), INT8_C( 0), INT8_C( 2), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 119), INT8_C( 0), INT8_C( 115), INT8_C( 0), INT8_C( 0), INT8_C(-111), INT8_C( 0), INT8_C( -89), INT8_C( 0), INT8_C( 0), INT8_C( 105), INT8_C( 0), INT8_C( 115), INT8_C( 0), INT8_C( 0), INT8_C(-106), INT8_C(-101), INT8_C( 103), INT8_C( 66), INT8_C( -39), INT8_C( 0), INT8_C( 0), INT8_C( 96), INT8_C( 0), INT8_C( 48), INT8_C( -42)) }, { UINT64_C( 6084884706841798472), simde_mm512_set_epi8(INT8_C(-112), INT8_C(-125), INT8_C( -27), INT8_C( -87), INT8_C( -3), INT8_C( -45), INT8_C( 106), INT8_C( 28), INT8_C( 41), INT8_C( -37), INT8_C(-117), INT8_C( 95), INT8_C( -14), INT8_C(-122), INT8_C( -91), INT8_C( -89), INT8_C( -40), INT8_C( 17), INT8_C( -71), INT8_C( 29), INT8_C(-114), INT8_C( 39), INT8_C( -61), INT8_C(-105), INT8_C( -8), INT8_C( -35), INT8_C( 35), INT8_C( 20), INT8_C( 59), INT8_C( -64), INT8_C( 20), INT8_C( -16), INT8_C(-106), INT8_C( -49), INT8_C( -6), INT8_C( 106), INT8_C( -37), INT8_C( 21), INT8_C( -6), INT8_C( -23), INT8_C( 118), INT8_C( -97), INT8_C(-119), INT8_C( -44), INT8_C( 83), INT8_C( 102), INT8_C( 74), INT8_C(-114), INT8_C( 95), INT8_C( -75), INT8_C( -52), INT8_C( 24), INT8_C( -86), INT8_C( 91), INT8_C( -23), INT8_C( -49), INT8_C( 111), INT8_C( -77), INT8_C( 6), INT8_C( 42), INT8_C( 120), INT8_C( -42), INT8_C( 70), INT8_C( 14)), simde_mm512_set_epi8(INT8_C( 16), INT8_C( -91), INT8_C( 46), INT8_C( -50), INT8_C( 114), INT8_C( -52), INT8_C( 74), INT8_C( -49), INT8_C( 12), INT8_C( 87), INT8_C( 122), INT8_C( 68), INT8_C( 75), INT8_C( 81), INT8_C( -37), INT8_C( -8), INT8_C( -53), INT8_C( 104), INT8_C( 84), INT8_C(-122), INT8_C( -31), INT8_C( -86), INT8_C( -25), INT8_C( 110), INT8_C( -6), INT8_C( 33), INT8_C( -77), INT8_C( -84), INT8_C( 8), INT8_C( 51), INT8_C(-125), INT8_C( -60), INT8_C( -65), INT8_C( 89), INT8_C( -90), INT8_C(-126), INT8_C( -87), INT8_C( 53), INT8_C( 10), INT8_C( 80), INT8_C( 63), INT8_C( -90), INT8_C( 32), INT8_C( -78), INT8_C( -95), INT8_C( -2), INT8_C(-119), INT8_C( 67), INT8_C(-107), INT8_C( -81), INT8_C( 37), INT8_C( 116), INT8_C( -57), INT8_C( 58), INT8_C(-116), INT8_C( 117), INT8_C( 93), INT8_C( 117), INT8_C(-121), INT8_C( -82), INT8_C( -77), INT8_C(-126), INT8_C( -76), INT8_C( -31)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 75), INT8_C( 0), INT8_C( 109), INT8_C( 0), INT8_C( -94), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-106), INT8_C(-114), INT8_C( 93), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -44), INT8_C( -74), INT8_C( -22), INT8_C( 0), INT8_C(-123), INT8_C( 64), INT8_C( 0), INT8_C( -87), INT8_C( 0), INT8_C( 0), INT8_C( 33), INT8_C(-109), INT8_C( 0), INT8_C( 0), INT8_C( -13), INT8_C( 18), INT8_C( 0), INT8_C( 105), INT8_C( 40), INT8_C( 0), INT8_C( 22), INT8_C( -82), INT8_C( 0), INT8_C( -88), INT8_C( 0), INT8_C(-127), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -43), INT8_C( 44), INT8_C( 0), INT8_C( 0), INT8_C( -55), INT8_C( 0), INT8_C( 0), INT8_C( 73), INT8_C( 47), INT8_C( 108), INT8_C( 0), INT8_C( 97), INT8_C( 0), INT8_C( 0), INT8_C( -68), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { UINT64_C(16466136098061989415), simde_mm512_set_epi8(INT8_C( -81), INT8_C( 119), INT8_C( -63), INT8_C( -22), INT8_C(-118), INT8_C( -5), INT8_C( -43), INT8_C( -96), INT8_C(-103), INT8_C( 22), INT8_C( 19), INT8_C( 79), INT8_C( 40), INT8_C( 50), INT8_C( -52), INT8_C( -40), INT8_C( -71), INT8_C( 37), INT8_C( -98), INT8_C( 112), INT8_C( 106), INT8_C( 49), INT8_C( -36), INT8_C( -93), INT8_C( 66), INT8_C( -50), INT8_C(-100), INT8_C( -11), INT8_C( 123), INT8_C( 32), INT8_C( -64), INT8_C( -56), INT8_C( 95), INT8_C( -27), INT8_C( -37), INT8_C( 82), INT8_C( -65), INT8_C( 74), INT8_C( 101), INT8_C( -10), INT8_C( 104), INT8_C( -53), INT8_C( 118), INT8_C( 34), INT8_C( 19), INT8_C( -6), INT8_C(-103), INT8_C( 93), INT8_C( 101), INT8_C( 100), INT8_C( -55), INT8_C( 108), INT8_C( 6), INT8_C( -85), INT8_C( 4), INT8_C( -17), INT8_C(-116), INT8_C( 108), INT8_C( 39), INT8_C( 48), INT8_C( -56), INT8_C( -50), INT8_C( -30), INT8_C( -76)), simde_mm512_set_epi8(INT8_C( 39), INT8_C( 12), INT8_C( -93), INT8_C( -40), INT8_C( 15), INT8_C( 114), INT8_C( 37), INT8_C( -75), INT8_C( -90), INT8_C( 35), INT8_C( 103), INT8_C( -54), INT8_C( 73), INT8_C( -37), INT8_C( 9), INT8_C( 80), INT8_C( 26), INT8_C( -85), INT8_C( -3), INT8_C( 57), INT8_C( -52), INT8_C( -30), INT8_C( -59), INT8_C( 8), INT8_C( -56), INT8_C( 70), INT8_C( 58), INT8_C( -6), INT8_C( -31), INT8_C( 86), INT8_C( -56), INT8_C( -31), INT8_C( -79), INT8_C( 14), INT8_C( 39), INT8_C(-116), INT8_C( -35), INT8_C( 92), INT8_C( -7), INT8_C(-122), INT8_C( -44), INT8_C(-112), INT8_C( 127), INT8_C( 54), INT8_C( 84), INT8_C( 113), INT8_C( 62), INT8_C( -16), INT8_C( -9), INT8_C( -30), INT8_C(-125), INT8_C( 52), INT8_C( 95), INT8_C( 118), INT8_C( 32), INT8_C( -80), INT8_C( -77), INT8_C( -73), INT8_C( 106), INT8_C(-116), INT8_C( -44), INT8_C( 115), INT8_C( 124), INT8_C( 50)), simde_mm512_set_epi8(INT8_C( -1), INT8_C( -23), INT8_C( -50), INT8_C( 0), INT8_C( 0), INT8_C( -84), INT8_C( 0), INT8_C( 0), INT8_C( 56), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 87), INT8_C( -14), INT8_C( 0), INT8_C(-117), INT8_C( 44), INT8_C( -24), INT8_C( 0), INT8_C( -46), INT8_C( 123), INT8_C(-118), INT8_C( 106), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 7), INT8_C( 0), INT8_C( -99), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -50), INT8_C( 56), INT8_C( 0), INT8_C( 51), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 8), INT8_C( 31), INT8_C( 40), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 75), INT8_C( 0), INT8_C( 14), INT8_C( 78), INT8_C( 0), INT8_C( -31), INT8_C(-122), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-122), INT8_C( 0), INT8_C( 0), INT8_C( 97), INT8_C(-112), INT8_C( -22)) }, { UINT64_C(10141573627661406757), simde_mm512_set_epi8(INT8_C( 63), INT8_C( 28), INT8_C( -98), INT8_C( 53), INT8_C( 38), INT8_C( -97), INT8_C( 41), INT8_C( 23), INT8_C( -69), INT8_C( 29), INT8_C(-117), INT8_C( 81), INT8_C( 91), INT8_C( -84), INT8_C( 67), INT8_C( -26), INT8_C(-106), INT8_C( 41), INT8_C( -18), INT8_C( 77), INT8_C( 80), INT8_C( 122), INT8_C( 27), INT8_C( 7), INT8_C( -44), INT8_C( 117), INT8_C( 58), INT8_C( -92), INT8_C( 91), INT8_C(-116), INT8_C( -45), INT8_C( -65), INT8_C( 110), INT8_C( -99), INT8_C( 26), INT8_C( 95), INT8_C( -85), INT8_C(-102), INT8_C( 106), INT8_C( 71), INT8_C( -2), INT8_C( 90), INT8_C( 49), INT8_C(-107), INT8_C( 41), INT8_C( -92), INT8_C(-110), INT8_C( 38), INT8_C( 38), INT8_C( 15), INT8_C( -94), INT8_C( -90), INT8_C( 107), INT8_C( -97), INT8_C( 21), INT8_C( 27), INT8_C( -10), INT8_C(-108), INT8_C( -20), INT8_C( -86), INT8_C( 23), INT8_C( 68), INT8_C( -52), INT8_C(-120)), simde_mm512_set_epi8(INT8_C( 61), INT8_C( -61), INT8_C( 13), INT8_C( 2), INT8_C( -62), INT8_C(-117), INT8_C( 105), INT8_C( -23), INT8_C( 79), INT8_C( 54), INT8_C( -95), INT8_C( -18), INT8_C( 97), INT8_C( 38), INT8_C( -79), INT8_C( -93), INT8_C( 75), INT8_C( -4), INT8_C( -52), INT8_C( -36), INT8_C( 23), INT8_C( 21), INT8_C( -48), INT8_C( -79), INT8_C( 10), INT8_C( -23), INT8_C( -6), INT8_C( 57), INT8_C( 126), INT8_C( 10), INT8_C( -59), INT8_C( 45), INT8_C( 35), INT8_C( 45), INT8_C( 17), INT8_C( 32), INT8_C( 74), INT8_C( -19), INT8_C( -96), INT8_C( 111), INT8_C( 9), INT8_C( 3), INT8_C(-116), INT8_C( 122), INT8_C(-109), INT8_C( 90), INT8_C( -38), INT8_C( -19), INT8_C( 89), INT8_C( -28), INT8_C( 79), INT8_C( -48), INT8_C( 127), INT8_C( 39), INT8_C( -62), INT8_C( 57), INT8_C( -98), INT8_C( 124), INT8_C( -59), INT8_C(-117), INT8_C(-125), INT8_C( 101), INT8_C( 26), INT8_C( 113)), simde_mm512_set_epi8(INT8_C( 66), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 65), INT8_C( 49), INT8_C( 0), INT8_C( 0), INT8_C( -72), INT8_C( 0), INT8_C( 4), INT8_C( 49), INT8_C(-109), INT8_C( -81), INT8_C( -40), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 112), INT8_C( 10), INT8_C( 0), INT8_C( -22), INT8_C( 33), INT8_C( 0), INT8_C( 113), INT8_C( -73), INT8_C( -73), INT8_C( -75), INT8_C( 0), INT8_C( 76), INT8_C( -21), INT8_C( 0), INT8_C( 108), INT8_C(-104), INT8_C( 0), INT8_C( 0), INT8_C( -8), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -36), INT8_C( 4), INT8_C( 73), INT8_C( 0), INT8_C( 105), INT8_C( 0), INT8_C( 0), INT8_C( -35), INT8_C( 0), INT8_C( 5), INT8_C( 0), INT8_C( 0), INT8_C( -91), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -54), INT8_C( 0), INT8_C( 0), INT8_C( 62), INT8_C( 0), INT8_C( -23)) }, { UINT64_C( 9475610514207139221), simde_mm512_set_epi8(INT8_C(-118), INT8_C( 118), INT8_C( 91), INT8_C( 46), INT8_C(-114), INT8_C( -42), INT8_C( 89), INT8_C( 67), INT8_C( 116), INT8_C( 126), INT8_C( 1), INT8_C( -41), INT8_C( 65), INT8_C(-125), INT8_C( 78), INT8_C( 101), INT8_C(-117), INT8_C( -31), INT8_C( 78), INT8_C( 100), INT8_C( 23), INT8_C( 20), INT8_C( 26), INT8_C( 22), INT8_C( 127), INT8_C( -64), INT8_C(-101), INT8_C( 121), INT8_C( 1), INT8_C( 115), INT8_C( 26), INT8_C( 10), INT8_C(-106), INT8_C( 7), INT8_C( -61), INT8_C(-100), INT8_C( 98), INT8_C( 89), INT8_C( -63), INT8_C( -47), INT8_C( -25), INT8_C( -25), INT8_C( -72), INT8_C(-114), INT8_C( 10), INT8_C( 82), INT8_C( -92), INT8_C( -12), INT8_C( -47), INT8_C( -28), INT8_C( -48), INT8_C( 53), INT8_C( -55), INT8_C( 77), INT8_C( -44), INT8_C( 56), INT8_C( 116), INT8_C(-126), INT8_C( 6), INT8_C( 75), INT8_C( 121), INT8_C( -27), INT8_C( 70), INT8_C( -31)), simde_mm512_set_epi8(INT8_C( 78), INT8_C( 122), INT8_C( -84), INT8_C( -42), INT8_C( 39), INT8_C( 29), INT8_C(-115), INT8_C( -86), INT8_C( -59), INT8_C( -20), INT8_C( -12), INT8_C(-109), INT8_C( 101), INT8_C( -88), INT8_C( -86), INT8_C( 53), INT8_C( 71), INT8_C( 13), INT8_C( -92), INT8_C( -64), INT8_C( 27), INT8_C( 27), INT8_C(-118), INT8_C( 46), INT8_C( 77), INT8_C( -63), INT8_C( 15), INT8_C( 115), INT8_C(-127), INT8_C( 113), INT8_C(-122), INT8_C(-128), INT8_C( -15), INT8_C( 46), INT8_C( 1), INT8_C( -61), INT8_C( -47), INT8_C( -78), INT8_C( -25), INT8_C( -43), INT8_C( 27), INT8_C( -59), INT8_C( -58), INT8_C( 14), INT8_C( -30), INT8_C( 21), INT8_C( -79), INT8_C( 73), INT8_C( -11), INT8_C( 23), INT8_C( 2), INT8_C( 25), INT8_C( -11), INT8_C( 76), INT8_C(-126), INT8_C( -38), INT8_C( 7), INT8_C( -48), INT8_C(-125), INT8_C( -67), INT8_C( 74), INT8_C( -92), INT8_C( -44), INT8_C( 92)), simde_mm512_set_epi8(INT8_C( 15), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 9), INT8_C( 95), INT8_C( 69), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 50), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -7), INT8_C( -89), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-128), INT8_C( -25), INT8_C( -34), INT8_C( -10), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 111), INT8_C( 0), INT8_C( 0), INT8_C( -13), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 85), INT8_C( 0), INT8_C( 96), INT8_C( 95), INT8_C( -3), INT8_C( 68), INT8_C( 0), INT8_C( 0), INT8_C(-114), INT8_C( 0), INT8_C( -99), INT8_C( 102), INT8_C( 0), INT8_C( 0), INT8_C( -19), INT8_C( 0), INT8_C( -49), INT8_C( 0), INT8_C( 38)) }, { UINT64_C( 3401470949633102944), simde_mm512_set_epi8(INT8_C(-104), INT8_C(-104), INT8_C(-124), INT8_C( -27), INT8_C( -3), INT8_C( 93), INT8_C( 108), INT8_C( 0), INT8_C( -69), INT8_C( -66), INT8_C( 77), INT8_C( 58), INT8_C( -19), INT8_C( 100), INT8_C(-128), INT8_C( -58), INT8_C( 7), INT8_C( -9), INT8_C( -26), INT8_C(-108), INT8_C( 35), INT8_C( 86), INT8_C( -43), INT8_C( -27), INT8_C( 28), INT8_C(-101), INT8_C( 36), INT8_C( -57), INT8_C( -58), INT8_C( -71), INT8_C( -70), INT8_C( -23), INT8_C( -36), INT8_C( -65), INT8_C( -77), INT8_C( 109), INT8_C( -18), INT8_C( 121), INT8_C( 64), INT8_C( 98), INT8_C( 123), INT8_C( -41), INT8_C(-125), INT8_C( -77), INT8_C( -97), INT8_C( -69), INT8_C( -26), INT8_C( -51), INT8_C( -68), INT8_C( -76), INT8_C( -28), INT8_C( -68), INT8_C( 123), INT8_C(-118), INT8_C( 117), INT8_C( -62), INT8_C( 121), INT8_C( 3), INT8_C( 62), INT8_C( -4), INT8_C(-118), INT8_C( -53), INT8_C(-107), INT8_C( 26)), simde_mm512_set_epi8(INT8_C( 127), INT8_C(-120), INT8_C( -7), INT8_C( -23), INT8_C( 43), INT8_C( 101), INT8_C( -13), INT8_C( -38), INT8_C( 26), INT8_C( -88), INT8_C( -66), INT8_C( 89), INT8_C( -49), INT8_C( -11), INT8_C( 84), INT8_C( 63), INT8_C( 60), INT8_C(-112), INT8_C( 94), INT8_C( 33), INT8_C( 90), INT8_C( 39), INT8_C( -66), INT8_C(-115), INT8_C( -83), INT8_C(-100), INT8_C( 33), INT8_C( 122), INT8_C( 63), INT8_C( -80), INT8_C( 28), INT8_C( -93), INT8_C( -72), INT8_C( 6), INT8_C( 39), INT8_C( -53), INT8_C( 31), INT8_C( -90), INT8_C(-102), INT8_C( -51), INT8_C( -61), INT8_C( -92), INT8_C( 73), INT8_C( 54), INT8_C( 120), INT8_C( 92), INT8_C( 79), INT8_C( 73), INT8_C( 26), INT8_C( -5), INT8_C(-126), INT8_C( 101), INT8_C( 59), INT8_C( 51), INT8_C( -54), INT8_C( -55), INT8_C( 1), INT8_C( 26), INT8_C( -69), INT8_C( 103), INT8_C( 37), INT8_C( 8), INT8_C( 21), INT8_C( -87)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 80), INT8_C( 0), INT8_C( -25), INT8_C( 84), INT8_C(-111), INT8_C( -65), INT8_C( 0), INT8_C( 0), INT8_C( 20), INT8_C( -2), INT8_C( 0), INT8_C(-107), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 110), INT8_C(-111), INT8_C( -49), INT8_C( 0), INT8_C( 0), INT8_C( 115), INT8_C( 77), INT8_C( -51), INT8_C( 48), INT8_C( 0), INT8_C( 0), INT8_C( -33), INT8_C( 21), INT8_C( 24), INT8_C( 0), INT8_C( -32), INT8_C( 0), INT8_C( 0), INT8_C( -31), INT8_C( 0), INT8_C( 0), INT8_C( 55), INT8_C( 0), INT8_C( 16), INT8_C( 0), INT8_C( 48), INT8_C( 18), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 18), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -85), INT8_C( 0), INT8_C( -35), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 108), INT8_C(-107), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_gf2p8affine_epi64_epi8(test_vec[i].k, test_vec[i].x, test_vec[i].A, 191); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_gf2p8affineinv_epi64_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i x; simde__m128i A; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( 58), INT8_C( -43), INT8_C( 69), INT8_C( 40), INT8_C( -27), INT8_C(-105), INT8_C( 74), INT8_C(-119), INT8_C( 24), INT8_C( -36), INT8_C( 85), INT8_C( 31), INT8_C( 10), INT8_C( -63), INT8_C( 1), INT8_C( 111)), simde_mm_set_epi8(INT8_C( -23), INT8_C( 64), INT8_C( 17), INT8_C(-118), INT8_C(-108), INT8_C(-111), INT8_C( 0), INT8_C( 45), INT8_C( 94), INT8_C( 64), INT8_C( 99), INT8_C( -32), INT8_C( 92), INT8_C( -94), INT8_C( 121), INT8_C( 115)), simde_mm_set_epi8(INT8_C( -73), INT8_C( 28), INT8_C( 38), INT8_C( -87), INT8_C( 39), INT8_C(-120), INT8_C( -86), INT8_C( 42), INT8_C( 105), INT8_C( 75), INT8_C( -53), INT8_C(-122), INT8_C( 79), INT8_C(-117), INT8_C( -14), INT8_C( 59)) }, { simde_mm_set_epi8(INT8_C( 125), INT8_C( 92), INT8_C( -40), INT8_C( 78), INT8_C( -48), INT8_C(-102), INT8_C( -48), INT8_C( -60), INT8_C(-104), INT8_C( 50), INT8_C( 111), INT8_C( -80), INT8_C( 75), INT8_C( 75), INT8_C( -50), INT8_C( 85)), simde_mm_set_epi8(INT8_C( -33), INT8_C( 19), INT8_C( -14), INT8_C( 38), INT8_C( -80), INT8_C( -72), INT8_C( 4), INT8_C( -22), INT8_C( 57), INT8_C( -46), INT8_C( 98), INT8_C( -35), INT8_C( 77), INT8_C(-102), INT8_C( 53), INT8_C( 14)), simde_mm_set_epi8(INT8_C( -93), INT8_C(-121), INT8_C( -3), INT8_C( 24), INT8_C( 22), INT8_C( -48), INT8_C( 22), INT8_C( 31), INT8_C( 108), INT8_C( -47), INT8_C( 94), INT8_C( 87), INT8_C( -94), INT8_C( -94), INT8_C( 56), INT8_C( -85)) }, { simde_mm_set_epi8(INT8_C( -18), INT8_C( -67), INT8_C( -39), INT8_C( -64), INT8_C( -69), INT8_C( -25), INT8_C( 53), INT8_C( -3), INT8_C( 35), INT8_C( -75), INT8_C(-109), INT8_C( 14), INT8_C( -96), INT8_C(-128), INT8_C( 82), INT8_C( 64)), simde_mm_set_epi8(INT8_C( 125), INT8_C( -15), INT8_C( -11), INT8_C( 2), INT8_C( 36), INT8_C( 95), INT8_C(-105), INT8_C(-112), INT8_C( -4), INT8_C( 76), INT8_C( 54), INT8_C( -65), INT8_C( 65), INT8_C( 18), INT8_C( -19), INT8_C( -94)), simde_mm_set_epi8(INT8_C( -19), INT8_C( 84), INT8_C( -34), INT8_C( 24), INT8_C( -11), INT8_C( -44), INT8_C(-128), INT8_C(-104), INT8_C( 20), INT8_C(-110), INT8_C( -12), INT8_C( 118), INT8_C( -13), INT8_C( 11), INT8_C( 33), INT8_C( 71)) }, { simde_mm_set_epi8(INT8_C( -1), INT8_C( 76), INT8_C(-126), INT8_C( 62), INT8_C( 70), INT8_C( -18), INT8_C(-123), INT8_C(-104), INT8_C( -12), INT8_C( -12), INT8_C( 120), INT8_C( -6), INT8_C( -20), INT8_C( 70), INT8_C( 51), INT8_C( -27)), simde_mm_set_epi8(INT8_C( -65), INT8_C( -81), INT8_C( -8), INT8_C( -60), INT8_C( 123), INT8_C( 65), INT8_C( -54), INT8_C( -19), INT8_C( 17), INT8_C( -96), INT8_C( 47), INT8_C( 16), INT8_C( 48), INT8_C( 113), INT8_C( -7), INT8_C( -55)), simde_mm_set_epi8(INT8_C( 127), INT8_C( 84), INT8_C( -12), INT8_C( -69), INT8_C( -65), INT8_C( 44), INT8_C( -88), INT8_C( 37), INT8_C( 100), INT8_C( 100), INT8_C( -5), INT8_C( -4), INT8_C(-118), INT8_C( -6), INT8_C( 96), INT8_C( -14)) }, { simde_mm_set_epi8(INT8_C( -4), INT8_C( 47), INT8_C( 110), INT8_C( -54), INT8_C( -29), INT8_C( 114), INT8_C( -26), INT8_C( -27), INT8_C( 82), INT8_C( -32), INT8_C( -46), INT8_C( 81), INT8_C( 113), INT8_C( -64), INT8_C( -33), INT8_C( 40)), simde_mm_set_epi8(INT8_C( 25), INT8_C( -38), INT8_C( 109), INT8_C( 24), INT8_C( -65), INT8_C( -78), INT8_C(-113), INT8_C( -16), INT8_C( 14), INT8_C( 38), INT8_C( 121), INT8_C( 52), INT8_C(-126), INT8_C( 39), INT8_C( 80), INT8_C( -84)), simde_mm_set_epi8(INT8_C( 28), INT8_C( 48), INT8_C( -41), INT8_C( 44), INT8_C(-114), INT8_C( 12), INT8_C( 110), INT8_C( 79), INT8_C( -71), INT8_C( 96), INT8_C( 21), INT8_C( 16), INT8_C( -8), INT8_C( -92), INT8_C( 78), INT8_C( -58)) }, { simde_mm_set_epi8(INT8_C( 83), INT8_C( 5), INT8_C( 114), INT8_C( 83), INT8_C( 22), INT8_C( 94), INT8_C( -39), INT8_C( -38), INT8_C( 30), INT8_C(-105), INT8_C( 103), INT8_C( -18), INT8_C( 92), INT8_C( 29), INT8_C( -79), INT8_C( -94)), simde_mm_set_epi8(INT8_C( 74), INT8_C( 100), INT8_C(-114), INT8_C( 59), INT8_C( 73), INT8_C( -61), INT8_C( 30), INT8_C(-124), INT8_C( 41), INT8_C( 101), INT8_C( -53), INT8_C( -66), INT8_C( 55), INT8_C( 15), INT8_C( 6), INT8_C( 114)), simde_mm_set_epi8(INT8_C(-111), INT8_C( 32), INT8_C( 73), INT8_C(-111), INT8_C( 15), INT8_C( 55), INT8_C(-102), INT8_C( 103), INT8_C(-116), INT8_C( 79), INT8_C( 123), INT8_C( 5), INT8_C( 31), INT8_C( -80), INT8_C( 39), INT8_C( 6)) }, { simde_mm_set_epi8(INT8_C( 37), INT8_C( 18), INT8_C( 127), INT8_C( -7), INT8_C( -59), INT8_C( -34), INT8_C( 4), INT8_C(-122), INT8_C( 76), INT8_C( -20), INT8_C(-120), INT8_C( 61), INT8_C( 105), INT8_C(-116), INT8_C( 95), INT8_C( 77)), simde_mm_set_epi8(INT8_C( -26), INT8_C( 45), INT8_C( 112), INT8_C( 97), INT8_C( 60), INT8_C( 9), INT8_C( 86), INT8_C( -33), INT8_C( 14), INT8_C( 28), INT8_C(-121), INT8_C( -41), INT8_C(-101), INT8_C( 24), INT8_C( 0), INT8_C( 2)), simde_mm_set_epi8(INT8_C( 48), INT8_C( -37), INT8_C( 118), INT8_C( 2), INT8_C( 125), INT8_C( 99), INT8_C( -93), INT8_C( -36), INT8_C( 11), INT8_C( 36), INT8_C( -94), INT8_C( -94), INT8_C( -80), INT8_C(-106), INT8_C(-126), INT8_C( 37)) }, { simde_mm_set_epi8(INT8_C( 59), INT8_C( -71), INT8_C( 127), INT8_C( 88), INT8_C( -2), INT8_C( -99), INT8_C( 37), INT8_C(-101), INT8_C( 66), INT8_C( 123), INT8_C( -27), INT8_C( 88), INT8_C( -48), INT8_C( 84), INT8_C( -82), INT8_C( -53)), simde_mm_set_epi8(INT8_C( 112), INT8_C(-103), INT8_C( -33), INT8_C( -60), INT8_C( -26), INT8_C( 65), INT8_C( -62), INT8_C( 59), INT8_C( -87), INT8_C( 60), INT8_C(-118), INT8_C(-122), INT8_C( -40), INT8_C( 117), INT8_C( -94), INT8_C( -63)), simde_mm_set_epi8(INT8_C( 50), INT8_C( 38), INT8_C( -68), INT8_C( 55), INT8_C( -19), INT8_C( 8), INT8_C( 119), INT8_C( -18), INT8_C( -96), INT8_C( 80), INT8_C( 71), INT8_C( 19), INT8_C(-116), INT8_C( -69), INT8_C( 37), INT8_C( 28)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_gf2p8affineinv_epi64_epi8(test_vec[i].x, test_vec[i].A, INT8_C( 54)); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_gf2p8affineinv_epi64_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i x; simde__m256i A; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi8(INT8_C( -23), INT8_C( 64), INT8_C( 17), INT8_C(-118), INT8_C(-108), INT8_C(-111), INT8_C( 0), INT8_C( 45), INT8_C( 94), INT8_C( 64), INT8_C( 99), INT8_C( -32), INT8_C( 92), INT8_C( -94), INT8_C( 121), INT8_C( 115), INT8_C( 58), INT8_C( -43), INT8_C( 69), INT8_C( 40), INT8_C( -27), INT8_C(-105), INT8_C( 74), INT8_C(-119), INT8_C( 24), INT8_C( -36), INT8_C( 85), INT8_C( 31), INT8_C( 10), INT8_C( -63), INT8_C( 1), INT8_C( 111)), simde_mm256_set_epi8(INT8_C( -33), INT8_C( 19), INT8_C( -14), INT8_C( 38), INT8_C( -80), INT8_C( -72), INT8_C( 4), INT8_C( -22), INT8_C( 57), INT8_C( -46), INT8_C( 98), INT8_C( -35), INT8_C( 77), INT8_C(-102), INT8_C( 53), INT8_C( 14), INT8_C( 125), INT8_C( 92), INT8_C( -40), INT8_C( 78), INT8_C( -48), INT8_C(-102), INT8_C( -48), INT8_C( -60), INT8_C(-104), INT8_C( 50), INT8_C( 111), INT8_C( -80), INT8_C( 75), INT8_C( 75), INT8_C( -50), INT8_C( 85)), simde_mm256_set_epi8(INT8_C( 110), INT8_C( 80), INT8_C( -5), INT8_C( 68), INT8_C( 42), INT8_C(-101), INT8_C(-116), INT8_C( 64), INT8_C(-114), INT8_C( -33), INT8_C( 44), INT8_C( -47), INT8_C( -96), INT8_C( 14), INT8_C( -68), INT8_C( 39), INT8_C(-115), INT8_C( -42), INT8_C( -5), INT8_C( -90), INT8_C( 0), INT8_C( 13), INT8_C( 127), INT8_C(-125), INT8_C(-122), INT8_C( 75), INT8_C( 70), INT8_C( 54), INT8_C( 67), INT8_C( -9), INT8_C( 56), INT8_C( -66)) }, { simde_mm256_set_epi8(INT8_C( 125), INT8_C( -15), INT8_C( -11), INT8_C( 2), INT8_C( 36), INT8_C( 95), INT8_C(-105), INT8_C(-112), INT8_C( -4), INT8_C( 76), INT8_C( 54), INT8_C( -65), INT8_C( 65), INT8_C( 18), INT8_C( -19), INT8_C( -94), INT8_C( -18), INT8_C( -67), INT8_C( -39), INT8_C( -64), INT8_C( -69), INT8_C( -25), INT8_C( 53), INT8_C( -3), INT8_C( 35), INT8_C( -75), INT8_C(-109), INT8_C( 14), INT8_C( -96), INT8_C(-128), INT8_C( 82), INT8_C( 64)), simde_mm256_set_epi8(INT8_C( -65), INT8_C( -81), INT8_C( -8), INT8_C( -60), INT8_C( 123), INT8_C( 65), INT8_C( -54), INT8_C( -19), INT8_C( 17), INT8_C( -96), INT8_C( 47), INT8_C( 16), INT8_C( 48), INT8_C( 113), INT8_C( -7), INT8_C( -55), INT8_C( -1), INT8_C( 76), INT8_C(-126), INT8_C( 62), INT8_C( 70), INT8_C( -18), INT8_C(-123), INT8_C(-104), INT8_C( -12), INT8_C( -12), INT8_C( 120), INT8_C( -6), INT8_C( -20), INT8_C( 70), INT8_C( 51), INT8_C( -27)), simde_mm256_set_epi8(INT8_C( -71), INT8_C( -5), INT8_C( -88), INT8_C( -84), INT8_C( 93), INT8_C( -14), INT8_C( -95), INT8_C( -91), INT8_C(-117), INT8_C( 17), INT8_C( 26), INT8_C( -16), INT8_C( 101), INT8_C( -8), INT8_C( 21), INT8_C( 62), INT8_C( -24), INT8_C( 25), INT8_C( -66), INT8_C( 91), INT8_C( -67), INT8_C( -47), INT8_C( -58), INT8_C(-109), INT8_C( -8), INT8_C( -48), INT8_C(-125), INT8_C( 4), INT8_C(-116), INT8_C( -65), INT8_C( -1), INT8_C( -84)) }, { simde_mm256_set_epi8(INT8_C( 25), INT8_C( -38), INT8_C( 109), INT8_C( 24), INT8_C( -65), INT8_C( -78), INT8_C(-113), INT8_C( -16), INT8_C( 14), INT8_C( 38), INT8_C( 121), INT8_C( 52), INT8_C(-126), INT8_C( 39), INT8_C( 80), INT8_C( -84), INT8_C( -4), INT8_C( 47), INT8_C( 110), INT8_C( -54), INT8_C( -29), INT8_C( 114), INT8_C( -26), INT8_C( -27), INT8_C( 82), INT8_C( -32), INT8_C( -46), INT8_C( 81), INT8_C( 113), INT8_C( -64), INT8_C( -33), INT8_C( 40)), simde_mm256_set_epi8(INT8_C( 74), INT8_C( 100), INT8_C(-114), INT8_C( 59), INT8_C( 73), INT8_C( -61), INT8_C( 30), INT8_C(-124), INT8_C( 41), INT8_C( 101), INT8_C( -53), INT8_C( -66), INT8_C( 55), INT8_C( 15), INT8_C( 6), INT8_C( 114), INT8_C( 83), INT8_C( 5), INT8_C( 114), INT8_C( 83), INT8_C( 22), INT8_C( 94), INT8_C( -39), INT8_C( -38), INT8_C( 30), INT8_C(-105), INT8_C( 103), INT8_C( -18), INT8_C( 92), INT8_C( 29), INT8_C( -79), INT8_C( -94)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( -35), INT8_C( 53), INT8_C( -86), INT8_C( 100), INT8_C( 10), INT8_C( -28), INT8_C( -1), INT8_C( -48), INT8_C( 54), INT8_C( 9), INT8_C( -50), INT8_C(-107), INT8_C( 28), INT8_C( -3), INT8_C( -83), INT8_C( 56), INT8_C( 28), INT8_C( -61), INT8_C( 106), INT8_C( -77), INT8_C( 117), INT8_C(-123), INT8_C( -29), INT8_C( -43), INT8_C( -97), INT8_C( 3), INT8_C( -27), INT8_C( 47), INT8_C( 92), INT8_C(-116), INT8_C( 60)) }, { simde_mm256_set_epi8(INT8_C( -26), INT8_C( 45), INT8_C( 112), INT8_C( 97), INT8_C( 60), INT8_C( 9), INT8_C( 86), INT8_C( -33), INT8_C( 14), INT8_C( 28), INT8_C(-121), INT8_C( -41), INT8_C(-101), INT8_C( 24), INT8_C( 0), INT8_C( 2), INT8_C( 37), INT8_C( 18), INT8_C( 127), INT8_C( -7), INT8_C( -59), INT8_C( -34), INT8_C( 4), INT8_C(-122), INT8_C( 76), INT8_C( -20), INT8_C(-120), INT8_C( 61), INT8_C( 105), INT8_C(-116), INT8_C( 95), INT8_C( 77)), simde_mm256_set_epi8(INT8_C( 112), INT8_C(-103), INT8_C( -33), INT8_C( -60), INT8_C( -26), INT8_C( 65), INT8_C( -62), INT8_C( 59), INT8_C( -87), INT8_C( 60), INT8_C(-118), INT8_C(-122), INT8_C( -40), INT8_C( 117), INT8_C( -94), INT8_C( -63), INT8_C( 59), INT8_C( -71), INT8_C( 127), INT8_C( 88), INT8_C( -2), INT8_C( -99), INT8_C( 37), INT8_C(-101), INT8_C( 66), INT8_C( 123), INT8_C( -27), INT8_C( 88), INT8_C( -48), INT8_C( 84), INT8_C( -82), INT8_C( -53)), simde_mm256_set_epi8(INT8_C( 71), INT8_C( -19), INT8_C( -57), INT8_C( 56), INT8_C(-119), INT8_C( 25), INT8_C( -68), INT8_C(-108), INT8_C( 9), INT8_C( 96), INT8_C( 104), INT8_C( -39), INT8_C( 70), INT8_C( 25), INT8_C(-116), INT8_C( -51), INT8_C( -68), INT8_C( 67), INT8_C( -85), INT8_C( 74), INT8_C( -23), INT8_C(-127), INT8_C( -17), INT8_C(-120), INT8_C( 109), INT8_C( 33), INT8_C( -19), INT8_C( -85), INT8_C( 18), INT8_C( -70), INT8_C(-105), INT8_C( 40)) }, { simde_mm256_set_epi8(INT8_C( -97), INT8_C( 10), INT8_C( 84), INT8_C( 49), INT8_C( 94), INT8_C( -59), INT8_C( -83), INT8_C( 28), INT8_C( -40), INT8_C( 110), INT8_C( -8), INT8_C( 28), INT8_C( -35), INT8_C( -5), INT8_C(-119), INT8_C( -82), INT8_C( 99), INT8_C( 84), INT8_C( 122), INT8_C( 69), INT8_C( 115), INT8_C( 3), INT8_C(-108), INT8_C( -90), INT8_C( -26), INT8_C( -78), INT8_C( -27), INT8_C( -66), INT8_C( -90), INT8_C( -23), INT8_C(-125), INT8_C( -64)), simde_mm256_set_epi8(INT8_C( -20), INT8_C(-128), INT8_C( 30), INT8_C( -62), INT8_C(-115), INT8_C( 37), INT8_C( 44), INT8_C( 9), INT8_C( 62), INT8_C( 115), INT8_C( -32), INT8_C( 96), INT8_C( 97), INT8_C(-125), INT8_C( 100), INT8_C( -75), INT8_C( 118), INT8_C(-122), INT8_C( 110), INT8_C( 15), INT8_C( 92), INT8_C( -45), INT8_C( -12), INT8_C( 60), INT8_C(-110), INT8_C( 81), INT8_C( 58), INT8_C( -38), INT8_C( 114), INT8_C( -35), INT8_C( -67), INT8_C( 20)), simde_mm256_set_epi8(INT8_C( 74), INT8_C(-120), INT8_C( 37), INT8_C( 64), INT8_C( 84), INT8_C( -17), INT8_C( 54), INT8_C( -25), INT8_C( 106), INT8_C( -31), INT8_C(-121), INT8_C( 123), INT8_C( 43), INT8_C( -9), INT8_C( 72), INT8_C( -42), INT8_C( 109), INT8_C( -70), INT8_C( 106), INT8_C(-112), INT8_C( 25), INT8_C( 95), INT8_C( -10), INT8_C( -53), INT8_C( 44), INT8_C(-128), INT8_C( 29), INT8_C( 24), INT8_C( 96), INT8_C( 39), INT8_C( -27), INT8_C( -97)) }, { simde_mm256_set_epi8(INT8_C(-122), INT8_C(-125), INT8_C( 103), INT8_C( 110), INT8_C( 98), INT8_C( -78), INT8_C( -7), INT8_C( -29), INT8_C( -88), INT8_C( 88), INT8_C( 10), INT8_C( 24), INT8_C( 100), INT8_C( -11), INT8_C( -62), INT8_C( 52), INT8_C( 97), INT8_C( 31), INT8_C( -35), INT8_C( 120), INT8_C( 4), INT8_C( -5), INT8_C(-110), INT8_C( 0), INT8_C( -61), INT8_C(-127), INT8_C( -20), INT8_C( 85), INT8_C( -84), INT8_C( -4), INT8_C(-113), INT8_C(-106)), simde_mm256_set_epi8(INT8_C( 118), INT8_C( -60), INT8_C( -8), INT8_C( -47), INT8_C( -94), INT8_C( 14), INT8_C(-105), INT8_C( -53), INT8_C( 120), INT8_C( 100), INT8_C( 83), INT8_C( 27), INT8_C( -60), INT8_C( 75), INT8_C( 26), INT8_C( -28), INT8_C( 71), INT8_C( 30), INT8_C( 66), INT8_C( -19), INT8_C( -84), INT8_C( -56), INT8_C(-116), INT8_C( 117), INT8_C( 51), INT8_C( -44), INT8_C( 73), INT8_C( -1), INT8_C( 111), INT8_C( 113), INT8_C( -22), INT8_C( 64)), simde_mm256_set_epi8(INT8_C( 60), INT8_C( 82), INT8_C( 58), INT8_C( 81), INT8_C( -71), INT8_C( 63), INT8_C( -40), INT8_C( 85), INT8_C( -15), INT8_C( -88), INT8_C( 74), INT8_C( 31), INT8_C( 126), INT8_C( -59), INT8_C( -76), INT8_C( 37), INT8_C( 5), INT8_C( -23), INT8_C( 57), INT8_C( 50), INT8_C( -83), INT8_C( 108), INT8_C(-111), INT8_C(-116), INT8_C( -37), INT8_C( 63), INT8_C( 34), INT8_C( -17), INT8_C( 39), INT8_C( 67), INT8_C( -91), INT8_C( -36)) }, { simde_mm256_set_epi8(INT8_C( 14), INT8_C( -28), INT8_C( 119), INT8_C( -89), INT8_C( 93), INT8_C( 68), INT8_C( -8), INT8_C( -52), INT8_C( 70), INT8_C( -88), INT8_C( 112), INT8_C(-115), INT8_C(-117), INT8_C( -66), INT8_C( 98), INT8_C( -40), INT8_C( 80), INT8_C( 0), INT8_C( -25), INT8_C( -44), INT8_C( -35), INT8_C( 89), INT8_C( 84), INT8_C( 53), INT8_C( 2), INT8_C( -40), INT8_C( 71), INT8_C( 53), INT8_C( 10), INT8_C( -48), INT8_C( -64), INT8_C( -40)), simde_mm256_set_epi8(INT8_C( -12), INT8_C( -97), INT8_C(-116), INT8_C( 114), INT8_C( 34), INT8_C( 77), INT8_C( 118), INT8_C( -26), INT8_C(-124), INT8_C( -29), INT8_C(-117), INT8_C( -60), INT8_C( -21), INT8_C( 89), INT8_C( -92), INT8_C(-120), INT8_C( -47), INT8_C( 45), INT8_C( -68), INT8_C( 28), INT8_C( -35), INT8_C(-100), INT8_C( -85), INT8_C(-104), INT8_C( 112), INT8_C( 16), INT8_C( -41), INT8_C( 56), INT8_C( 84), INT8_C( 104), INT8_C(-121), INT8_C( -39)), simde_mm256_set_epi8(INT8_C( -2), INT8_C( -33), INT8_C( -33), INT8_C( 67), INT8_C( -6), INT8_C( -74), INT8_C( 74), INT8_C( 25), INT8_C( 100), INT8_C(-127), INT8_C( 70), INT8_C(-102), INT8_C( -53), INT8_C( 12), INT8_C( -36), INT8_C( 58), INT8_C( -67), INT8_C(-116), INT8_C( -84), INT8_C( 5), INT8_C( 109), INT8_C( -9), INT8_C( 93), INT8_C( -38), INT8_C( 112), INT8_C(-125), INT8_C( 124), INT8_C( 86), INT8_C( -55), INT8_C( 99), INT8_C( -92), INT8_C(-125)) }, { simde_mm256_set_epi8(INT8_C( 120), INT8_C( 79), INT8_C( -76), INT8_C( 90), INT8_C( 3), INT8_C(-113), INT8_C( 126), INT8_C( 10), INT8_C( 50), INT8_C(-103), INT8_C( 60), INT8_C(-119), INT8_C(-101), INT8_C( 54), INT8_C( -8), INT8_C( -9), INT8_C( -13), INT8_C( 119), INT8_C(-104), INT8_C( 81), INT8_C( -64), INT8_C( -40), INT8_C( 34), INT8_C( 104), INT8_C( 75), INT8_C(-123), INT8_C( -3), INT8_C( 12), INT8_C( 46), INT8_C( -83), INT8_C( 115), INT8_C( 41)), simde_mm256_set_epi8(INT8_C( -17), INT8_C(-103), INT8_C( 72), INT8_C(-128), INT8_C( 126), INT8_C( 35), INT8_C( -85), INT8_C( 118), INT8_C( 57), INT8_C( 125), INT8_C( -6), INT8_C( -57), INT8_C( -20), INT8_C( 11), INT8_C( -19), INT8_C( 80), INT8_C( -4), INT8_C( 76), INT8_C( 97), INT8_C( 6), INT8_C( 67), INT8_C(-100), INT8_C( 112), INT8_C( 10), INT8_C( -21), INT8_C( -47), INT8_C( 73), INT8_C( 57), INT8_C( -34), INT8_C( 45), INT8_C( 49), INT8_C( 73)), simde_mm256_set_epi8(INT8_C( -60), INT8_C( -72), INT8_C( 125), INT8_C(-116), INT8_C( 81), INT8_C( -89), INT8_C( -92), INT8_C( 73), INT8_C( 123), INT8_C( 81), INT8_C( -97), INT8_C( 86), INT8_C( -89), INT8_C( 115), INT8_C( -49), INT8_C( -3), INT8_C(-125), INT8_C( 32), INT8_C( -14), INT8_C( 50), INT8_C( -93), INT8_C( -25), INT8_C(-127), INT8_C( -11), INT8_C( 40), INT8_C( -42), INT8_C( 122), INT8_C( -84), INT8_C( -25), INT8_C( -81), INT8_C( 64), INT8_C( 32)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_gf2p8affineinv_epi64_epi8(test_vec[i].x, test_vec[i].A, 140); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_gf2p8affineinv_epi64_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i x; simde__m512i A; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi8(INT8_C( -33), INT8_C( 19), INT8_C( -14), INT8_C( 38), INT8_C( -80), INT8_C( -72), INT8_C( 4), INT8_C( -22), INT8_C( 57), INT8_C( -46), INT8_C( 98), INT8_C( -35), INT8_C( 77), INT8_C(-102), INT8_C( 53), INT8_C( 14), INT8_C( 125), INT8_C( 92), INT8_C( -40), INT8_C( 78), INT8_C( -48), INT8_C(-102), INT8_C( -48), INT8_C( -60), INT8_C(-104), INT8_C( 50), INT8_C( 111), INT8_C( -80), INT8_C( 75), INT8_C( 75), INT8_C( -50), INT8_C( 85), INT8_C( -23), INT8_C( 64), INT8_C( 17), INT8_C(-118), INT8_C(-108), INT8_C(-111), INT8_C( 0), INT8_C( 45), INT8_C( 94), INT8_C( 64), INT8_C( 99), INT8_C( -32), INT8_C( 92), INT8_C( -94), INT8_C( 121), INT8_C( 115), INT8_C( 58), INT8_C( -43), INT8_C( 69), INT8_C( 40), INT8_C( -27), INT8_C(-105), INT8_C( 74), INT8_C(-119), INT8_C( 24), INT8_C( -36), INT8_C( 85), INT8_C( 31), INT8_C( 10), INT8_C( -63), INT8_C( 1), INT8_C( 111)), simde_mm512_set_epi8(INT8_C( -65), INT8_C( -81), INT8_C( -8), INT8_C( -60), INT8_C( 123), INT8_C( 65), INT8_C( -54), INT8_C( -19), INT8_C( 17), INT8_C( -96), INT8_C( 47), INT8_C( 16), INT8_C( 48), INT8_C( 113), INT8_C( -7), INT8_C( -55), INT8_C( -1), INT8_C( 76), INT8_C(-126), INT8_C( 62), INT8_C( 70), INT8_C( -18), INT8_C(-123), INT8_C(-104), INT8_C( -12), INT8_C( -12), INT8_C( 120), INT8_C( -6), INT8_C( -20), INT8_C( 70), INT8_C( 51), INT8_C( -27), INT8_C( 125), INT8_C( -15), INT8_C( -11), INT8_C( 2), INT8_C( 36), INT8_C( 95), INT8_C(-105), INT8_C(-112), INT8_C( -4), INT8_C( 76), INT8_C( 54), INT8_C( -65), INT8_C( 65), INT8_C( 18), INT8_C( -19), INT8_C( -94), INT8_C( -18), INT8_C( -67), INT8_C( -39), INT8_C( -64), INT8_C( -69), INT8_C( -25), INT8_C( 53), INT8_C( -3), INT8_C( 35), INT8_C( -75), INT8_C(-109), INT8_C( 14), INT8_C( -96), INT8_C(-128), INT8_C( 82), INT8_C( 64)), simde_mm512_set_epi8(INT8_C( -61), INT8_C( 84), INT8_C( -54), INT8_C( 16), INT8_C( -61), INT8_C( -1), INT8_C(-101), INT8_C( -46), INT8_C( 113), INT8_C( -17), INT8_C( 10), INT8_C( 118), INT8_C( 8), INT8_C( 5), INT8_C( -79), INT8_C( 42), INT8_C( 127), INT8_C( 100), INT8_C(-120), INT8_C(-118), INT8_C(-102), INT8_C( 95), INT8_C(-102), INT8_C( 86), INT8_C( 52), INT8_C( 35), INT8_C( -69), INT8_C( 48), INT8_C( 120), INT8_C( 120), INT8_C( 116), INT8_C( -13), INT8_C(-124), INT8_C( 75), INT8_C( -36), INT8_C( -84), INT8_C( -72), INT8_C( -26), INT8_C( -97), INT8_C( -51), INT8_C( 89), INT8_C( -18), INT8_C( -36), INT8_C( -18), INT8_C( -71), INT8_C( -6), INT8_C( 44), INT8_C( 65), INT8_C( 108), INT8_C( 11), INT8_C( 76), INT8_C( 123), INT8_C( -38), INT8_C( 38), INT8_C(-125), INT8_C( -77), INT8_C( 17), INT8_C( -22), INT8_C(-122), INT8_C( -79), INT8_C(-125), INT8_C(-124), INT8_C(-104), INT8_C(-120)) }, { simde_mm512_set_epi8(INT8_C( 74), INT8_C( 100), INT8_C(-114), INT8_C( 59), INT8_C( 73), INT8_C( -61), INT8_C( 30), INT8_C(-124), INT8_C( 41), INT8_C( 101), INT8_C( -53), INT8_C( -66), INT8_C( 55), INT8_C( 15), INT8_C( 6), INT8_C( 114), INT8_C( 83), INT8_C( 5), INT8_C( 114), INT8_C( 83), INT8_C( 22), INT8_C( 94), INT8_C( -39), INT8_C( -38), INT8_C( 30), INT8_C(-105), INT8_C( 103), INT8_C( -18), INT8_C( 92), INT8_C( 29), INT8_C( -79), INT8_C( -94), INT8_C( 25), INT8_C( -38), INT8_C( 109), INT8_C( 24), INT8_C( -65), INT8_C( -78), INT8_C(-113), INT8_C( -16), INT8_C( 14), INT8_C( 38), INT8_C( 121), INT8_C( 52), INT8_C(-126), INT8_C( 39), INT8_C( 80), INT8_C( -84), INT8_C( -4), INT8_C( 47), INT8_C( 110), INT8_C( -54), INT8_C( -29), INT8_C( 114), INT8_C( -26), INT8_C( -27), INT8_C( 82), INT8_C( -32), INT8_C( -46), INT8_C( 81), INT8_C( 113), INT8_C( -64), INT8_C( -33), INT8_C( 40)), simde_mm512_set_epi8(INT8_C( 112), INT8_C(-103), INT8_C( -33), INT8_C( -60), INT8_C( -26), INT8_C( 65), INT8_C( -62), INT8_C( 59), INT8_C( -87), INT8_C( 60), INT8_C(-118), INT8_C(-122), INT8_C( -40), INT8_C( 117), INT8_C( -94), INT8_C( -63), INT8_C( 59), INT8_C( -71), INT8_C( 127), INT8_C( 88), INT8_C( -2), INT8_C( -99), INT8_C( 37), INT8_C(-101), INT8_C( 66), INT8_C( 123), INT8_C( -27), INT8_C( 88), INT8_C( -48), INT8_C( 84), INT8_C( -82), INT8_C( -53), INT8_C( -26), INT8_C( 45), INT8_C( 112), INT8_C( 97), INT8_C( 60), INT8_C( 9), INT8_C( 86), INT8_C( -33), INT8_C( 14), INT8_C( 28), INT8_C(-121), INT8_C( -41), INT8_C(-101), INT8_C( 24), INT8_C( 0), INT8_C( 2), INT8_C( 37), INT8_C( 18), INT8_C( 127), INT8_C( -7), INT8_C( -59), INT8_C( -34), INT8_C( 4), INT8_C(-122), INT8_C( 76), INT8_C( -20), INT8_C(-120), INT8_C( 61), INT8_C( 105), INT8_C(-116), INT8_C( 95), INT8_C( 77)), simde_mm512_set_epi8(INT8_C( -92), INT8_C( -62), INT8_C( -9), INT8_C(-101), INT8_C( 111), INT8_C( 34), INT8_C( 99), INT8_C(-114), INT8_C( -60), INT8_C( 71), INT8_C( -75), INT8_C( 36), INT8_C( 99), INT8_C( 53), INT8_C(-124), INT8_C( -73), INT8_C( 27), INT8_C( -87), INT8_C(-108), INT8_C( 27), INT8_C( -44), INT8_C( 51), INT8_C( -32), INT8_C( 69), INT8_C( -33), INT8_C( -97), INT8_C( 101), INT8_C( -56), INT8_C(-100), INT8_C( 32), INT8_C( -78), INT8_C( -76), INT8_C( 94), INT8_C( 0), INT8_C( -95), INT8_C( 52), INT8_C( 62), INT8_C( 65), INT8_C( -46), INT8_C( 95), INT8_C(-104), INT8_C( -80), INT8_C( -83), INT8_C( 48), INT8_C( 16), INT8_C( -92), INT8_C( -85), INT8_C( 42), INT8_C( -33), INT8_C( -67), INT8_C(-113), INT8_C( 54), INT8_C(-127), INT8_C( 71), INT8_C( 121), INT8_C( -32), INT8_C( -84), INT8_C( 51), INT8_C( -9), INT8_C( 16), INT8_C(-104), INT8_C( -8), INT8_C( 49), INT8_C( -78)) }, { simde_mm512_set_epi8(INT8_C( -20), INT8_C(-128), INT8_C( 30), INT8_C( -62), INT8_C(-115), INT8_C( 37), INT8_C( 44), INT8_C( 9), INT8_C( 62), INT8_C( 115), INT8_C( -32), INT8_C( 96), INT8_C( 97), INT8_C(-125), INT8_C( 100), INT8_C( -75), INT8_C( 118), INT8_C(-122), INT8_C( 110), INT8_C( 15), INT8_C( 92), INT8_C( -45), INT8_C( -12), INT8_C( 60), INT8_C(-110), INT8_C( 81), INT8_C( 58), INT8_C( -38), INT8_C( 114), INT8_C( -35), INT8_C( -67), INT8_C( 20), INT8_C( -97), INT8_C( 10), INT8_C( 84), INT8_C( 49), INT8_C( 94), INT8_C( -59), INT8_C( -83), INT8_C( 28), INT8_C( -40), INT8_C( 110), INT8_C( -8), INT8_C( 28), INT8_C( -35), INT8_C( -5), INT8_C(-119), INT8_C( -82), INT8_C( 99), INT8_C( 84), INT8_C( 122), INT8_C( 69), INT8_C( 115), INT8_C( 3), INT8_C(-108), INT8_C( -90), INT8_C( -26), INT8_C( -78), INT8_C( -27), INT8_C( -66), INT8_C( -90), INT8_C( -23), INT8_C(-125), INT8_C( -64)), simde_mm512_set_epi8(INT8_C( 118), INT8_C( -60), INT8_C( -8), INT8_C( -47), INT8_C( -94), INT8_C( 14), INT8_C(-105), INT8_C( -53), INT8_C( 120), INT8_C( 100), INT8_C( 83), INT8_C( 27), INT8_C( -60), INT8_C( 75), INT8_C( 26), INT8_C( -28), INT8_C( 71), INT8_C( 30), INT8_C( 66), INT8_C( -19), INT8_C( -84), INT8_C( -56), INT8_C(-116), INT8_C( 117), INT8_C( 51), INT8_C( -44), INT8_C( 73), INT8_C( -1), INT8_C( 111), INT8_C( 113), INT8_C( -22), INT8_C( 64), INT8_C(-122), INT8_C(-125), INT8_C( 103), INT8_C( 110), INT8_C( 98), INT8_C( -78), INT8_C( -7), INT8_C( -29), INT8_C( -88), INT8_C( 88), INT8_C( 10), INT8_C( 24), INT8_C( 100), INT8_C( -11), INT8_C( -62), INT8_C( 52), INT8_C( 97), INT8_C( 31), INT8_C( -35), INT8_C( 120), INT8_C( 4), INT8_C( -5), INT8_C(-110), INT8_C( 0), INT8_C( -61), INT8_C(-127), INT8_C( -20), INT8_C( 85), INT8_C( -84), INT8_C( -4), INT8_C(-113), INT8_C(-106)), simde_mm512_set_epi8(INT8_C( 82), INT8_C( 120), INT8_C( -19), INT8_C( 116), INT8_C( 110), INT8_C( 31), INT8_C( -57), INT8_C( -18), INT8_C( 32), INT8_C( -79), INT8_C( -19), INT8_C( 44), INT8_C( -14), INT8_C( 15), INT8_C( 109), INT8_C( 88), INT8_C(-128), INT8_C( 91), INT8_C(-114), INT8_C( 31), INT8_C( 57), INT8_C( 36), INT8_C( -48), INT8_C( 125), INT8_C(-108), INT8_C( 12), INT8_C( -26), INT8_C( 49), INT8_C(-128), INT8_C( 37), INT8_C( -63), INT8_C( -97), INT8_C( -21), INT8_C( -19), INT8_C( 6), INT8_C(-120), INT8_C( 121), INT8_C( -51), INT8_C( 40), INT8_C( 0), INT8_C( -28), INT8_C( 14), INT8_C( -71), INT8_C( 76), INT8_C(-104), INT8_C( 79), INT8_C( -81), INT8_C( 98), INT8_C( -3), INT8_C(-118), INT8_C( -72), INT8_C( -1), INT8_C( -54), INT8_C( -27), INT8_C(-106), INT8_C( -86), INT8_C( 71), INT8_C( 53), INT8_C( -42), INT8_C( 85), INT8_C( 49), INT8_C( -5), INT8_C( 104), INT8_C( 97)) }, { simde_mm512_set_epi8(INT8_C( -12), INT8_C( -97), INT8_C(-116), INT8_C( 114), INT8_C( 34), INT8_C( 77), INT8_C( 118), INT8_C( -26), INT8_C(-124), INT8_C( -29), INT8_C(-117), INT8_C( -60), INT8_C( -21), INT8_C( 89), INT8_C( -92), INT8_C(-120), INT8_C( -47), INT8_C( 45), INT8_C( -68), INT8_C( 28), INT8_C( -35), INT8_C(-100), INT8_C( -85), INT8_C(-104), INT8_C( 112), INT8_C( 16), INT8_C( -41), INT8_C( 56), INT8_C( 84), INT8_C( 104), INT8_C(-121), INT8_C( -39), INT8_C( 14), INT8_C( -28), INT8_C( 119), INT8_C( -89), INT8_C( 93), INT8_C( 68), INT8_C( -8), INT8_C( -52), INT8_C( 70), INT8_C( -88), INT8_C( 112), INT8_C(-115), INT8_C(-117), INT8_C( -66), INT8_C( 98), INT8_C( -40), INT8_C( 80), INT8_C( 0), INT8_C( -25), INT8_C( -44), INT8_C( -35), INT8_C( 89), INT8_C( 84), INT8_C( 53), INT8_C( 2), INT8_C( -40), INT8_C( 71), INT8_C( 53), INT8_C( 10), INT8_C( -48), INT8_C( -64), INT8_C( -40)), simde_mm512_set_epi8(INT8_C( -17), INT8_C(-103), INT8_C( 72), INT8_C(-128), INT8_C( 126), INT8_C( 35), INT8_C( -85), INT8_C( 118), INT8_C( 57), INT8_C( 125), INT8_C( -6), INT8_C( -57), INT8_C( -20), INT8_C( 11), INT8_C( -19), INT8_C( 80), INT8_C( -4), INT8_C( 76), INT8_C( 97), INT8_C( 6), INT8_C( 67), INT8_C(-100), INT8_C( 112), INT8_C( 10), INT8_C( -21), INT8_C( -47), INT8_C( 73), INT8_C( 57), INT8_C( -34), INT8_C( 45), INT8_C( 49), INT8_C( 73), INT8_C( 120), INT8_C( 79), INT8_C( -76), INT8_C( 90), INT8_C( 3), INT8_C(-113), INT8_C( 126), INT8_C( 10), INT8_C( 50), INT8_C(-103), INT8_C( 60), INT8_C(-119), INT8_C(-101), INT8_C( 54), INT8_C( -8), INT8_C( -9), INT8_C( -13), INT8_C( 119), INT8_C(-104), INT8_C( 81), INT8_C( -64), INT8_C( -40), INT8_C( 34), INT8_C( 104), INT8_C( 75), INT8_C(-123), INT8_C( -3), INT8_C( 12), INT8_C( 46), INT8_C( -83), INT8_C( 115), INT8_C( 41)), simde_mm512_set_epi8(INT8_C( -84), INT8_C( -32), INT8_C( 33), INT8_C( 69), INT8_C( 62), INT8_C(-100), INT8_C( 17), INT8_C( 21), INT8_C( 50), INT8_C( 122), INT8_C(-122), INT8_C( -63), INT8_C( 13), INT8_C( 78), INT8_C( -87), INT8_C( 116), INT8_C( 56), INT8_C( -29), INT8_C( 6), INT8_C( -55), INT8_C( 110), INT8_C( 122), INT8_C( -13), INT8_C( -31), INT8_C( 105), INT8_C( 11), INT8_C( -34), INT8_C( 57), INT8_C(-123), INT8_C( 24), INT8_C( 115), INT8_C( -49), INT8_C( -31), INT8_C( 108), INT8_C( 26), INT8_C( 63), INT8_C( 56), INT8_C( 101), INT8_C( 2), INT8_C( -15), INT8_C( -87), INT8_C( 111), INT8_C(-119), INT8_C( 46), INT8_C( -74), INT8_C( 80), INT8_C( 113), INT8_C( 22), INT8_C( 127), INT8_C( -97), INT8_C( -60), INT8_C( 24), INT8_C( 89), INT8_C(-106), INT8_C(-126), INT8_C( -36), INT8_C( -35), INT8_C( -61), INT8_C( 116), INT8_C( 117), INT8_C( 49), INT8_C(-122), INT8_C(-108), INT8_C( -61)) }, { simde_mm512_set_epi8(INT8_C( -8), INT8_C( -52), INT8_C( 78), INT8_C( -24), INT8_C( -96), INT8_C( -43), INT8_C( 60), INT8_C( -80), INT8_C( 106), INT8_C( 106), INT8_C( -45), INT8_C(-107), INT8_C( -69), INT8_C( -94), INT8_C( 13), INT8_C( 102), INT8_C( -34), INT8_C( -8), INT8_C( 0), INT8_C( -97), INT8_C( 121), INT8_C( 10), INT8_C( -78), INT8_C( -54), INT8_C( -36), INT8_C( -63), INT8_C( -81), INT8_C( 10), INT8_C( 65), INT8_C( -80), INT8_C(-116), INT8_C(-125), INT8_C( -11), INT8_C( 15), INT8_C( -74), INT8_C( -56), INT8_C( -11), INT8_C( -46), INT8_C( 40), INT8_C( 101), INT8_C( -18), INT8_C( 80), INT8_C( 29), INT8_C( 31), INT8_C( 12), INT8_C( -5), INT8_C( -95), INT8_C( 18), INT8_C( 112), INT8_C(-106), INT8_C( 61), INT8_C( -64), INT8_C( 64), INT8_C( -70), INT8_C(-105), INT8_C( 77), INT8_C( -45), INT8_C( 65), INT8_C( 85), INT8_C( 87), INT8_C( 21), INT8_C( -16), INT8_C( 77), INT8_C( 33)), simde_mm512_set_epi8(INT8_C( 67), INT8_C( 45), INT8_C( -96), INT8_C( 46), INT8_C( -3), INT8_C( 10), INT8_C( -28), INT8_C( 72), INT8_C( 95), INT8_C( 17), INT8_C( -18), INT8_C( 35), INT8_C( -13), INT8_C( 31), INT8_C( -33), INT8_C( 72), INT8_C( -16), INT8_C( -88), INT8_C( 5), INT8_C( -29), INT8_C( -38), INT8_C( 19), INT8_C( 117), INT8_C( 47), INT8_C(-125), INT8_C( -23), INT8_C( 85), INT8_C(-124), INT8_C( 1), INT8_C( 20), INT8_C( -25), INT8_C(-100), INT8_C( -3), INT8_C( -75), INT8_C( 69), INT8_C( 119), INT8_C( 113), INT8_C( -53), INT8_C( -61), INT8_C( 67), INT8_C( 0), INT8_C(-115), INT8_C( 11), INT8_C( -86), INT8_C( 51), INT8_C(-124), INT8_C( 33), INT8_C(-127), INT8_C( 15), INT8_C( -42), INT8_C( -15), INT8_C( 69), INT8_C(-105), INT8_C( 93), INT8_C( 9), INT8_C( 127), INT8_C( -40), INT8_C( -93), INT8_C( -66), INT8_C( -79), INT8_C( -24), INT8_C( 26), INT8_C( 16), INT8_C( -79)), simde_mm512_set_epi8(INT8_C( -7), INT8_C( 15), INT8_C( -19), INT8_C( 37), INT8_C( -44), INT8_C(-118), INT8_C( 96), INT8_C( 127), INT8_C( -61), INT8_C( -61), INT8_C( 80), INT8_C( 83), INT8_C( 11), INT8_C( 126), INT8_C( 121), INT8_C( -24), INT8_C( -11), INT8_C( 22), INT8_C( -97), INT8_C( -33), INT8_C( 124), INT8_C( 42), INT8_C( -20), INT8_C( -29), INT8_C( -55), INT8_C( 95), INT8_C( -38), INT8_C( 8), INT8_C( -37), INT8_C( -15), INT8_C( 14), INT8_C( 84), INT8_C(-123), INT8_C( 25), INT8_C( 67), INT8_C( 57), INT8_C(-123), INT8_C( 33), INT8_C( -2), INT8_C( 0), INT8_C( -65), INT8_C(-105), INT8_C( -97), INT8_C( 97), INT8_C( 125), INT8_C( 109), INT8_C( -5), INT8_C( 127), INT8_C( 31), INT8_C( 50), INT8_C( 52), INT8_C( 16), INT8_C(-114), INT8_C( 43), INT8_C(-112), INT8_C( 93), INT8_C( -68), INT8_C( 117), INT8_C( 5), INT8_C( -18), INT8_C(-104), INT8_C( -6), INT8_C(-113), INT8_C( 7)) }, { simde_mm512_set_epi8(INT8_C( 97), INT8_C( -65), INT8_C(-103), INT8_C( 115), INT8_C( 86), INT8_C( 81), INT8_C( 35), INT8_C( 115), INT8_C( 87), INT8_C( 60), INT8_C( 103), INT8_C( 16), INT8_C( 105), INT8_C(-103), INT8_C( 41), INT8_C( -70), INT8_C( -56), INT8_C( -81), INT8_C( 5), INT8_C( -52), INT8_C( 73), INT8_C(-113), INT8_C( 7), INT8_C( -33), INT8_C( 0), INT8_C( -58), INT8_C( -97), INT8_C( 16), INT8_C( -71), INT8_C( 101), INT8_C( 93), INT8_C(-124), INT8_C( 66), INT8_C( 101), INT8_C( -22), INT8_C( 62), INT8_C( 45), INT8_C( -13), INT8_C( 97), INT8_C( 6), INT8_C( -10), INT8_C( 81), INT8_C( -11), INT8_C( 50), INT8_C( 76), INT8_C(-105), INT8_C( -48), INT8_C( -19), INT8_C( 3), INT8_C( 85), INT8_C( 45), INT8_C( -62), INT8_C(-101), INT8_C( 125), INT8_C( -54), INT8_C( 33), INT8_C( -47), INT8_C( 78), INT8_C( 54), INT8_C( -7), INT8_C( 46), INT8_C( 104), INT8_C( 81), INT8_C( -1)), simde_mm512_set_epi8(INT8_C( -4), INT8_C( -11), INT8_C( -65), INT8_C( 94), INT8_C( -93), INT8_C( -39), INT8_C(-128), INT8_C( 35), INT8_C( -73), INT8_C( -74), INT8_C( 30), INT8_C( 126), INT8_C( 3), INT8_C( 4), INT8_C( -68), INT8_C( 86), INT8_C( 115), INT8_C(-119), INT8_C( -25), INT8_C( -5), INT8_C( 23), INT8_C( -82), INT8_C( 66), INT8_C( 77), INT8_C( 97), INT8_C( 3), INT8_C( -67), INT8_C( -81), INT8_C( 99), INT8_C( -85), INT8_C( 58), INT8_C( 54), INT8_C( -4), INT8_C( 74), INT8_C( 58), INT8_C( 19), INT8_C( -90), INT8_C( 92), INT8_C( 10), INT8_C( -48), INT8_C( 74), INT8_C( 86), INT8_C( 107), INT8_C( -99), INT8_C( 17), INT8_C( 28), INT8_C( 69), INT8_C( 73), INT8_C( 99), INT8_C( 107), INT8_C( -54), INT8_C( 4), INT8_C( -60), INT8_C(-105), INT8_C( 39), INT8_C( -97), INT8_C( -99), INT8_C( -57), INT8_C( -58), INT8_C( 117), INT8_C(-113), INT8_C( 51), INT8_C( 82), INT8_C( -16)), simde_mm512_set_epi8(INT8_C( 37), INT8_C( -66), INT8_C( -65), INT8_C( 81), INT8_C( -51), INT8_C( -71), INT8_C( -51), INT8_C( 81), INT8_C( 117), INT8_C( -14), INT8_C(-103), INT8_C( 124), INT8_C( 118), INT8_C( -65), INT8_C( 76), INT8_C( -29), INT8_C( -87), INT8_C( 2), INT8_C( 54), INT8_C( -50), INT8_C( -53), INT8_C( 40), INT8_C( -6), INT8_C( 55), INT8_C( -97), INT8_C( -45), INT8_C( -31), INT8_C( 59), INT8_C( -87), INT8_C( 56), INT8_C( -65), INT8_C( 1), INT8_C( 64), INT8_C( 116), INT8_C( 103), INT8_C( -2), INT8_C( 13), INT8_C( 22), INT8_C(-103), INT8_C( -75), INT8_C( 68), INT8_C( -91), INT8_C( 53), INT8_C( -86), INT8_C( 8), INT8_C( 97), INT8_C( -52), INT8_C( 98), INT8_C( -64), INT8_C( 36), INT8_C( 112), INT8_C( -90), INT8_C( -83), INT8_C( -66), INT8_C( 44), INT8_C( 82), INT8_C( -51), INT8_C( 68), INT8_C(-112), INT8_C( -17), INT8_C( -42), INT8_C(-104), INT8_C( -74), INT8_C( 120)) }, { simde_mm512_set_epi8(INT8_C(-114), INT8_C( 84), INT8_C( 109), INT8_C( 40), INT8_C( -55), INT8_C( -5), INT8_C( -28), INT8_C( 111), INT8_C( -61), INT8_C(-126), INT8_C( -9), INT8_C( 122), INT8_C( 121), INT8_C( 80), INT8_C( 116), INT8_C( 66), INT8_C( 56), INT8_C( 92), INT8_C( -15), INT8_C( 88), INT8_C( -82), INT8_C( 24), INT8_C( 22), INT8_C( 55), INT8_C( -92), INT8_C( -3), INT8_C( -13), INT8_C( 70), INT8_C( -88), INT8_C( -90), INT8_C( -47), INT8_C( -66), INT8_C( -40), INT8_C( 5), INT8_C( -96), INT8_C(-120), INT8_C( -65), INT8_C( -98), INT8_C( 120), INT8_C( 54), INT8_C( -63), INT8_C( 36), INT8_C(-107), INT8_C( 2), INT8_C( 82), INT8_C( -28), INT8_C( 7), INT8_C( -92), INT8_C( -8), INT8_C( -92), INT8_C( 58), INT8_C( -89), INT8_C( 15), INT8_C( -88), INT8_C( -89), INT8_C( 119), INT8_C( -67), INT8_C( -95), INT8_C( 25), INT8_C( 99), INT8_C( 100), INT8_C( 30), INT8_C( 28), INT8_C( 104)), simde_mm512_set_epi8(INT8_C( 28), INT8_C( -13), INT8_C(-115), INT8_C( -62), INT8_C( 120), INT8_C( -67), INT8_C(-119), INT8_C( -4), INT8_C( 94), INT8_C( -5), INT8_C( 64), INT8_C(-117), INT8_C( 28), INT8_C( 9), INT8_C( -53), INT8_C( -49), INT8_C( -43), INT8_C(-111), INT8_C( 22), INT8_C( -21), INT8_C( -86), INT8_C( -5), INT8_C( -70), INT8_C( -26), INT8_C( 45), INT8_C( 108), INT8_C( -27), INT8_C( 115), INT8_C( 92), INT8_C(-105), INT8_C(-116), INT8_C( 45), INT8_C(-102), INT8_C( 64), INT8_C( 16), INT8_C(-122), INT8_C( 30), INT8_C( -80), INT8_C( -75), INT8_C( 7), INT8_C( 86), INT8_C(-123), INT8_C( 35), INT8_C(-106), INT8_C( 49), INT8_C( -37), INT8_C(-112), INT8_C( 59), INT8_C( 117), INT8_C( 45), INT8_C( 79), INT8_C( -73), INT8_C( 102), INT8_C( 108), INT8_C( -22), INT8_C( -90), INT8_C(-117), INT8_C( 37), INT8_C( 47), INT8_C( 92), INT8_C( -73), INT8_C( 49), INT8_C( 93), INT8_C( -22)), simde_mm512_set_epi8(INT8_C( -29), INT8_C( 85), INT8_C( -82), INT8_C(-115), INT8_C( -28), INT8_C( -61), INT8_C( 68), INT8_C( 7), INT8_C( 118), INT8_C( 2), INT8_C( 63), INT8_C(-127), INT8_C( 73), INT8_C( 16), INT8_C(-116), INT8_C( 62), INT8_C( -82), INT8_C( 122), INT8_C( -80), INT8_C(-128), INT8_C( 86), INT8_C( 41), INT8_C( -50), INT8_C( -54), INT8_C( 90), INT8_C( 92), INT8_C( -33), INT8_C( 8), INT8_C( -49), INT8_C( 84), INT8_C( -19), INT8_C( 36), INT8_C( 91), INT8_C( 113), INT8_C( -87), INT8_C( -53), INT8_C( 105), INT8_C( 39), INT8_C( -94), INT8_C( -68), INT8_C( -85), INT8_C( -6), INT8_C( -8), INT8_C( -24), INT8_C( 34), INT8_C( 114), INT8_C(-101), INT8_C( 69), INT8_C( -3), INT8_C( 93), INT8_C( 100), INT8_C(-116), INT8_C( 78), INT8_C( 7), INT8_C(-116), INT8_C( -76), INT8_C( 83), INT8_C( 10), INT8_C( 32), INT8_C( 92), INT8_C( -19), INT8_C( 118), INT8_C( 121), INT8_C( 86)) }, { simde_mm512_set_epi8(INT8_C( -86), INT8_C(-116), INT8_C( -59), INT8_C( 47), INT8_C( -4), INT8_C( -72), INT8_C( 122), INT8_C( -93), INT8_C( 3), INT8_C( -4), INT8_C( 98), INT8_C( -88), INT8_C( 13), INT8_C( 87), INT8_C( -90), INT8_C( -13), INT8_C( -64), INT8_C( 35), INT8_C( -69), INT8_C( -60), INT8_C(-105), INT8_C(-107), INT8_C( 45), INT8_C(-115), INT8_C( -43), INT8_C( 92), INT8_C( 124), INT8_C( -91), INT8_C( -66), INT8_C( 92), INT8_C( -47), INT8_C( 62), INT8_C( -88), INT8_C( 45), INT8_C( 56), INT8_C( -57), INT8_C( 113), INT8_C( -35), INT8_C( 68), INT8_C( 64), INT8_C( -69), INT8_C( -19), INT8_C( 1), INT8_C( 9), INT8_C( 74), INT8_C( -29), INT8_C( 92), INT8_C( -4), INT8_C( 83), INT8_C( 90), INT8_C(-123), INT8_C( -68), INT8_C( -29), INT8_C( -84), INT8_C( 68), INT8_C( 70), INT8_C( -49), INT8_C( 62), INT8_C( 38), INT8_C( -13), INT8_C( 41), INT8_C( 104), INT8_C( 104), INT8_C( -63)), simde_mm512_set_epi8(INT8_C( -19), INT8_C( 22), INT8_C( 18), INT8_C( 11), INT8_C( 51), INT8_C( -67), INT8_C( -37), INT8_C( -51), INT8_C( -24), INT8_C( -36), INT8_C( 100), INT8_C( 37), INT8_C(-113), INT8_C( 10), INT8_C( -40), INT8_C( 117), INT8_C( 102), INT8_C( -9), INT8_C(-115), INT8_C( 91), INT8_C( 102), INT8_C( 70), INT8_C( -21), INT8_C( 80), INT8_C( 31), INT8_C( 56), INT8_C( -30), INT8_C(-124), INT8_C( 52), INT8_C( 118), INT8_C( 89), INT8_C( -37), INT8_C(-109), INT8_C( 4), INT8_C( 120), INT8_C( 53), INT8_C( 13), INT8_C( -50), INT8_C( -58), INT8_C( -53), INT8_C(-113), INT8_C(-126), INT8_C( -64), INT8_C( -5), INT8_C(-127), INT8_C( -16), INT8_C( -73), INT8_C( 19), INT8_C(-125), INT8_C( 29), INT8_C(-126), INT8_C( -33), INT8_C( 119), INT8_C( 57), INT8_C( 99), INT8_C( -29), INT8_C( -31), INT8_C( -11), INT8_C(-123), INT8_C( 105), INT8_C( -74), INT8_C(-113), INT8_C( -55), INT8_C( -21)), simde_mm512_set_epi8(INT8_C( -73), INT8_C( -4), INT8_C( 106), INT8_C( -31), INT8_C( 12), INT8_C( 21), INT8_C( -55), INT8_C( 24), INT8_C( -22), INT8_C( 126), INT8_C( 4), INT8_C( -68), INT8_C( 30), INT8_C( -58), INT8_C( -45), INT8_C( 78), INT8_C( -26), INT8_C( -75), INT8_C( 115), INT8_C( -97), INT8_C( -58), INT8_C( -18), INT8_C( 83), INT8_C( -28), INT8_C( -93), INT8_C( 73), INT8_C( -28), INT8_C( 21), INT8_C(-113), INT8_C( 73), INT8_C( -62), INT8_C(-118), INT8_C( 8), INT8_C( 1), INT8_C( 122), INT8_C( 41), INT8_C( 125), INT8_C( 47), INT8_C( -60), INT8_C( -59), INT8_C(-114), INT8_C( 91), INT8_C( 70), INT8_C( -23), INT8_C(-109), INT8_C( -65), INT8_C(-126), INT8_C( 93), INT8_C( 61), INT8_C( -78), INT8_C( -85), INT8_C( 19), INT8_C( 54), INT8_C( 39), INT8_C( -92), INT8_C( -31), INT8_C( -66), INT8_C( 65), INT8_C( 27), INT8_C( 32), INT8_C( -57), INT8_C( 28), INT8_C( 28), INT8_C( -20)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_gf2p8affineinv_epi64_epi8(test_vec[i].x, test_vec[i].A, 159); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_mask_gf2p8affineinv_epi64_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i src; simde__mmask16 k; simde__m128i x; simde__m128i A; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( 86), INT8_C(-109), INT8_C(-116), INT8_C( 52), INT8_C( -80), INT8_C( -39), INT8_C( -82), INT8_C( 33), INT8_C( 37), INT8_C( 31), INT8_C( 47), INT8_C( 64), INT8_C( 83), INT8_C( 59), INT8_C( -2), INT8_C(-123)), UINT16_C(11144), simde_mm_set_epi8(INT8_C( -42), INT8_C( 62), INT8_C( 88), INT8_C( 7), INT8_C( 61), INT8_C( 116), INT8_C( -89), INT8_C( 122), INT8_C( 102), INT8_C( -48), INT8_C( 124), INT8_C( 70), INT8_C( 47), INT8_C( 38), INT8_C( 113), INT8_C( -68)), simde_mm_set_epi8(INT8_C( -6), INT8_C( 43), INT8_C( 55), INT8_C( 12), INT8_C( 60), INT8_C( 72), INT8_C( 58), INT8_C( 119), INT8_C(-125), INT8_C(-116), INT8_C( -2), INT8_C( 35), INT8_C( 46), INT8_C( -63), INT8_C( -77), INT8_C( -15)), simde_mm_set_epi8(INT8_C( 86), INT8_C(-109), INT8_C( 64), INT8_C( 52), INT8_C( -41), INT8_C( -39), INT8_C( 52), INT8_C(-101), INT8_C( -67), INT8_C( 31), INT8_C( 47), INT8_C( 64), INT8_C( -16), INT8_C( 59), INT8_C( -2), INT8_C(-123)) }, { simde_mm_set_epi8(INT8_C( 78), INT8_C( -70), INT8_C( 8), INT8_C( -28), INT8_C( -17), INT8_C( 126), INT8_C(-111), INT8_C(-124), INT8_C( -60), INT8_C(-108), INT8_C( 94), INT8_C( -85), INT8_C( 51), INT8_C( 20), INT8_C( 52), INT8_C(-114)), UINT16_C(16002), simde_mm_set_epi8(INT8_C( 120), INT8_C( -68), INT8_C( -84), INT8_C( -49), INT8_C( 87), INT8_C( -79), INT8_C( -74), INT8_C( 92), INT8_C( 64), INT8_C(-128), INT8_C( -63), INT8_C( -93), INT8_C( 85), INT8_C( -44), INT8_C( 26), INT8_C(-115)), simde_mm_set_epi8(INT8_C( 58), INT8_C( -45), INT8_C( 126), INT8_C( -7), INT8_C( 69), INT8_C( 5), INT8_C( -82), INT8_C(-111), INT8_C( -84), INT8_C( 28), INT8_C( -27), INT8_C(-123), INT8_C( 82), INT8_C(-107), INT8_C( -80), INT8_C( 79)), simde_mm_set_epi8(INT8_C( 78), INT8_C( -70), INT8_C( 68), INT8_C( 68), INT8_C( 34), INT8_C( 119), INT8_C( 127), INT8_C(-124), INT8_C( 28), INT8_C(-108), INT8_C( 94), INT8_C( -85), INT8_C( 51), INT8_C( 20), INT8_C( -96), INT8_C(-114)) }, { simde_mm_set_epi8(INT8_C(-118), INT8_C( -5), INT8_C( 100), INT8_C( 20), INT8_C( 96), INT8_C(-121), INT8_C( 54), INT8_C( 113), INT8_C( -38), INT8_C( 70), INT8_C( -49), INT8_C( 4), INT8_C( -35), INT8_C( 89), INT8_C(-103), INT8_C( 21)), UINT16_C(17590), simde_mm_set_epi8(INT8_C( 27), INT8_C( -2), INT8_C( 62), INT8_C( 34), INT8_C( 118), INT8_C( 94), INT8_C( -81), INT8_C( 50), INT8_C( -73), INT8_C( 61), INT8_C( 67), INT8_C( -62), INT8_C( 2), INT8_C( 50), INT8_C( -54), INT8_C( -9)), simde_mm_set_epi8(INT8_C( 100), INT8_C( 45), INT8_C( -71), INT8_C(-119), INT8_C( 49), INT8_C( -65), INT8_C( 94), INT8_C( 96), INT8_C( -71), INT8_C( 118), INT8_C( -91), INT8_C( -10), INT8_C( 38), INT8_C( -84), INT8_C( -72), INT8_C( -61)), simde_mm_set_epi8(INT8_C(-118), INT8_C( 17), INT8_C( 100), INT8_C( 20), INT8_C( 96), INT8_C( -90), INT8_C( 54), INT8_C( 113), INT8_C( -43), INT8_C( 70), INT8_C( 58), INT8_C( -47), INT8_C( -35), INT8_C( -46), INT8_C( 48), INT8_C( 21)) }, { simde_mm_set_epi8(INT8_C( 103), INT8_C( -8), INT8_C( 63), INT8_C( 127), INT8_C( -97), INT8_C( 62), INT8_C( 26), INT8_C( 78), INT8_C( -69), INT8_C( -62), INT8_C(-105), INT8_C( 38), INT8_C( 99), INT8_C( -23), INT8_C( 54), INT8_C( -37)), UINT16_C( 3274), simde_mm_set_epi8(INT8_C( 56), INT8_C( -71), INT8_C( 70), INT8_C( 52), INT8_C( 105), INT8_C( 74), INT8_C( -52), INT8_C( 125), INT8_C( 71), INT8_C( 78), INT8_C( 102), INT8_C( -87), INT8_C( -42), INT8_C( 16), INT8_C( 120), INT8_C( -7)), simde_mm_set_epi8(INT8_C(-115), INT8_C( 83), INT8_C( -51), INT8_C(-117), INT8_C( -24), INT8_C( -79), INT8_C( 55), INT8_C(-125), INT8_C(-119), INT8_C(-109), INT8_C( -25), INT8_C( 26), INT8_C( -62), INT8_C( 72), INT8_C( 66), INT8_C(-110)), simde_mm_set_epi8(INT8_C( 103), INT8_C( -8), INT8_C( 63), INT8_C( 127), INT8_C(-104), INT8_C( 27), INT8_C( 26), INT8_C( 78), INT8_C( -80), INT8_C( 39), INT8_C(-105), INT8_C( 38), INT8_C( -41), INT8_C( -23), INT8_C( 45), INT8_C( -37)) }, { simde_mm_set_epi8(INT8_C( -26), INT8_C( 57), INT8_C( -91), INT8_C( 79), INT8_C( 72), INT8_C( -31), INT8_C(-110), INT8_C( 35), INT8_C( -42), INT8_C( 93), INT8_C( -26), INT8_C( 48), INT8_C( 88), INT8_C( 90), INT8_C( 73), INT8_C(-121)), UINT16_C(12470), simde_mm_set_epi8(INT8_C( 83), INT8_C( -70), INT8_C( -20), INT8_C( -26), INT8_C( 113), INT8_C( 81), INT8_C(-104), INT8_C( 87), INT8_C( 17), INT8_C( 59), INT8_C( -92), INT8_C( -92), INT8_C( -62), INT8_C( 84), INT8_C( -91), INT8_C( 84)), simde_mm_set_epi8(INT8_C( 105), INT8_C( 4), INT8_C( 0), INT8_C( 126), INT8_C( 17), INT8_C( -37), INT8_C( 124), INT8_C( 81), INT8_C( -49), INT8_C( -80), INT8_C( 50), INT8_C( 114), INT8_C( 103), INT8_C( 115), INT8_C( 11), INT8_C( 121)), simde_mm_set_epi8(INT8_C( -26), INT8_C( 57), INT8_C( 109), INT8_C( -99), INT8_C( 72), INT8_C( -31), INT8_C(-110), INT8_C( 35), INT8_C( -20), INT8_C( 93), INT8_C( -79), INT8_C( -79), INT8_C( 88), INT8_C(-121), INT8_C( 60), INT8_C(-121)) }, { simde_mm_set_epi8(INT8_C( -60), INT8_C( 101), INT8_C( 20), INT8_C( -20), INT8_C( 122), INT8_C( 120), INT8_C(-101), INT8_C( -57), INT8_C( 8), INT8_C( -41), INT8_C( 78), INT8_C( -24), INT8_C( -85), INT8_C( -99), INT8_C( -85), INT8_C( -95)), UINT16_C(17946), simde_mm_set_epi8(INT8_C(-126), INT8_C( 90), INT8_C( 106), INT8_C(-128), INT8_C( 45), INT8_C( 80), INT8_C(-116), INT8_C( 9), INT8_C( 97), INT8_C(-126), INT8_C( -49), INT8_C( 31), INT8_C( 80), INT8_C( -15), INT8_C(-103), INT8_C( -93)), simde_mm_set_epi8(INT8_C( -32), INT8_C(-111), INT8_C( -86), INT8_C( 81), INT8_C( -69), INT8_C( -40), INT8_C(-105), INT8_C( 25), INT8_C( 10), INT8_C( 111), INT8_C( 0), INT8_C( 102), INT8_C( -29), INT8_C( -79), INT8_C( -33), INT8_C( -18)), simde_mm_set_epi8(INT8_C( -60), INT8_C( -81), INT8_C( 20), INT8_C( -20), INT8_C( 122), INT8_C(-117), INT8_C(-111), INT8_C( -57), INT8_C( 8), INT8_C( -41), INT8_C( 78), INT8_C( 31), INT8_C( 5), INT8_C( -99), INT8_C( 68), INT8_C( -95)) }, { simde_mm_set_epi8(INT8_C( -56), INT8_C( -96), INT8_C(-107), INT8_C( 81), INT8_C( 28), INT8_C( 89), INT8_C(-117), INT8_C(-113), INT8_C( 93), INT8_C( 34), INT8_C( -81), INT8_C( 29), INT8_C( -48), INT8_C( -91), INT8_C(-101), INT8_C( 33)), UINT16_C(50993), simde_mm_set_epi8(INT8_C( -39), INT8_C( 81), INT8_C( -54), INT8_C( 103), INT8_C( -31), INT8_C( 76), INT8_C( -6), INT8_C( 19), INT8_C( 0), INT8_C( -53), INT8_C( -62), INT8_C( 52), INT8_C( -64), INT8_C( 34), INT8_C( -62), INT8_C( 62)), simde_mm_set_epi8(INT8_C( -50), INT8_C(-100), INT8_C( 91), INT8_C( -73), INT8_C(-128), INT8_C( -69), INT8_C( 75), INT8_C( 41), INT8_C( 62), INT8_C( -50), INT8_C( 30), INT8_C( -6), INT8_C( 85), INT8_C( -1), INT8_C( 20), INT8_C( -57)), simde_mm_set_epi8(INT8_C( -77), INT8_C( 105), INT8_C(-107), INT8_C( 81), INT8_C( 28), INT8_C(-114), INT8_C( 45), INT8_C( -51), INT8_C( 93), INT8_C( 34), INT8_C( 0), INT8_C( -75), INT8_C( -48), INT8_C( -91), INT8_C(-101), INT8_C( -74)) }, { simde_mm_set_epi8(INT8_C( 27), INT8_C( 42), INT8_C( -96), INT8_C( -57), INT8_C(-122), INT8_C( 20), INT8_C( 54), INT8_C( 94), INT8_C( 37), INT8_C( 113), INT8_C( -54), INT8_C( 15), INT8_C(-119), INT8_C(-119), INT8_C( 102), INT8_C( 12)), UINT16_C(59513), simde_mm_set_epi8(INT8_C( 20), INT8_C( -1), INT8_C( 126), INT8_C( -58), INT8_C(-122), INT8_C( -63), INT8_C( 0), INT8_C( 21), INT8_C( 125), INT8_C(-120), INT8_C( 11), INT8_C( 36), INT8_C( -16), INT8_C( 30), INT8_C( -96), INT8_C( 28)), simde_mm_set_epi8(INT8_C( -37), INT8_C( 0), INT8_C( 84), INT8_C( 112), INT8_C( 67), INT8_C(-128), INT8_C( -76), INT8_C( -90), INT8_C( -77), INT8_C( 21), INT8_C( 79), INT8_C( -2), INT8_C( 5), INT8_C( 83), INT8_C( 16), INT8_C( 106)), simde_mm_set_epi8(INT8_C( 82), INT8_C( 102), INT8_C( 30), INT8_C( -57), INT8_C( -34), INT8_C( 20), INT8_C( 54), INT8_C( 94), INT8_C( 37), INT8_C(-102), INT8_C( 75), INT8_C( 0), INT8_C( 63), INT8_C(-119), INT8_C( 102), INT8_C( -95)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_mask_gf2p8affineinv_epi64_epi8(test_vec[i].src, test_vec[i].k, test_vec[i].x, test_vec[i].A, 238); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_mask_gf2p8affineinv_epi64_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i src; simde__mmask32 k; simde__m256i x; simde__m256i A; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi8(INT8_C( 102), INT8_C( -48), INT8_C( 124), INT8_C( 70), INT8_C( 47), INT8_C( 38), INT8_C( 113), INT8_C( -68), INT8_C( -88), INT8_C( 75), INT8_C( 45), INT8_C( 101), INT8_C( -48), INT8_C( 40), INT8_C( 43), INT8_C(-120), INT8_C( 86), INT8_C(-109), INT8_C(-116), INT8_C( 52), INT8_C( -80), INT8_C( -39), INT8_C( -82), INT8_C( 33), INT8_C( 37), INT8_C( 31), INT8_C( 47), INT8_C( 64), INT8_C( 83), INT8_C( 59), INT8_C( -2), INT8_C(-123)), UINT32_C(1031055226), simde_mm256_set_epi8(INT8_C( 78), INT8_C( -70), INT8_C( 8), INT8_C( -28), INT8_C( -17), INT8_C( 126), INT8_C(-111), INT8_C(-124), INT8_C( -60), INT8_C(-108), INT8_C( 94), INT8_C( -85), INT8_C( 51), INT8_C( 20), INT8_C( 52), INT8_C(-114), INT8_C( -6), INT8_C( 43), INT8_C( 55), INT8_C( 12), INT8_C( 60), INT8_C( 72), INT8_C( 58), INT8_C( 119), INT8_C(-125), INT8_C(-116), INT8_C( -2), INT8_C( 35), INT8_C( 46), INT8_C( -63), INT8_C( -77), INT8_C( -15)), simde_mm256_set_epi8(INT8_C( -84), INT8_C( 28), INT8_C( -27), INT8_C(-123), INT8_C( 82), INT8_C(-107), INT8_C( -80), INT8_C( 79), INT8_C( 120), INT8_C( -68), INT8_C( -84), INT8_C( -49), INT8_C( 87), INT8_C( -79), INT8_C( -74), INT8_C( 92), INT8_C( 64), INT8_C(-128), INT8_C( -63), INT8_C( -93), INT8_C( 85), INT8_C( -44), INT8_C( 26), INT8_C(-115), INT8_C(-101), INT8_C( 76), INT8_C(-128), INT8_C( -17), INT8_C(-103), INT8_C( 2), INT8_C( 62), INT8_C(-126)), simde_mm256_set_epi8(INT8_C( 102), INT8_C( -48), INT8_C( 113), INT8_C(-120), INT8_C( 40), INT8_C(-113), INT8_C( 113), INT8_C( 110), INT8_C( -88), INT8_C( -59), INT8_C(-120), INT8_C( 0), INT8_C( -48), INT8_C( 100), INT8_C( 43), INT8_C(-120), INT8_C( -17), INT8_C(-109), INT8_C( 51), INT8_C( 52), INT8_C( -80), INT8_C(-116), INT8_C( 70), INT8_C( 70), INT8_C( 37), INT8_C( 122), INT8_C( 93), INT8_C( -39), INT8_C( 114), INT8_C( 59), INT8_C( 112), INT8_C(-123)) }, { simde_mm256_set_epi8(INT8_C( -22), INT8_C( -88), INT8_C(-110), INT8_C( -48), INT8_C( 76), INT8_C( -59), INT8_C( 68), INT8_C( -74), INT8_C(-118), INT8_C( -5), INT8_C( 100), INT8_C( 20), INT8_C( 96), INT8_C(-121), INT8_C( 54), INT8_C( 113), INT8_C( -38), INT8_C( 70), INT8_C( -49), INT8_C( 4), INT8_C( -35), INT8_C( 89), INT8_C(-103), INT8_C( 21), INT8_C( 58), INT8_C( -45), INT8_C( 126), INT8_C( -7), INT8_C( 69), INT8_C( 5), INT8_C( -82), INT8_C(-111)), UINT32_C( 36883191), simde_mm256_set_epi8(INT8_C( -69), INT8_C( -62), INT8_C(-105), INT8_C( 38), INT8_C( 99), INT8_C( -23), INT8_C( 54), INT8_C( -37), INT8_C( 100), INT8_C( 45), INT8_C( -71), INT8_C(-119), INT8_C( 49), INT8_C( -65), INT8_C( 94), INT8_C( 96), INT8_C( -71), INT8_C( 118), INT8_C( -91), INT8_C( -10), INT8_C( 38), INT8_C( -84), INT8_C( -72), INT8_C( -61), INT8_C( 27), INT8_C( -2), INT8_C( 62), INT8_C( 34), INT8_C( 118), INT8_C( 94), INT8_C( -81), INT8_C( 50)), simde_mm256_set_epi8(INT8_C( 56), INT8_C( -71), INT8_C( 70), INT8_C( 52), INT8_C( 105), INT8_C( 74), INT8_C( -52), INT8_C( 125), INT8_C( 71), INT8_C( 78), INT8_C( 102), INT8_C( -87), INT8_C( -42), INT8_C( 16), INT8_C( 120), INT8_C( -7), INT8_C( 86), INT8_C( 101), INT8_C( 24), INT8_C( 125), INT8_C( -69), INT8_C( 65), INT8_C( 12), INT8_C( -54), INT8_C( 103), INT8_C( -8), INT8_C( 63), INT8_C( 127), INT8_C( -97), INT8_C( 62), INT8_C( 26), INT8_C( 78)), simde_mm256_set_epi8(INT8_C( -22), INT8_C( -88), INT8_C(-110), INT8_C( -48), INT8_C( 76), INT8_C( -59), INT8_C( -55), INT8_C( -74), INT8_C(-118), INT8_C( -5), INT8_C( 28), INT8_C( -20), INT8_C( 96), INT8_C(-121), INT8_C( -36), INT8_C( 113), INT8_C( -40), INT8_C(-108), INT8_C( -49), INT8_C( 4), INT8_C( 24), INT8_C( 89), INT8_C( -75), INT8_C( 21), INT8_C(-108), INT8_C( -40), INT8_C( 88), INT8_C( -72), INT8_C( 69), INT8_C( -9), INT8_C( 23), INT8_C( -33)) }, { simde_mm256_set_epi8(INT8_C( -26), INT8_C( 57), INT8_C( -91), INT8_C( 79), INT8_C( 72), INT8_C( -31), INT8_C(-110), INT8_C( 35), INT8_C( -42), INT8_C( 93), INT8_C( -26), INT8_C( 48), INT8_C( 88), INT8_C( 90), INT8_C( 73), INT8_C(-121), INT8_C(-115), INT8_C( 83), INT8_C( -51), INT8_C(-117), INT8_C( -24), INT8_C( -79), INT8_C( 55), INT8_C(-125), INT8_C(-119), INT8_C(-109), INT8_C( -25), INT8_C( 26), INT8_C( -62), INT8_C( 72), INT8_C( 66), INT8_C(-110)), UINT32_C(1476407478), simde_mm256_set_epi8(INT8_C( 105), INT8_C( 4), INT8_C( 0), INT8_C( 126), INT8_C( 17), INT8_C( -37), INT8_C( 124), INT8_C( 81), INT8_C( -49), INT8_C( -80), INT8_C( 50), INT8_C( 114), INT8_C( 103), INT8_C( 115), INT8_C( 11), INT8_C( 121), INT8_C( 83), INT8_C( -70), INT8_C( -20), INT8_C( -26), INT8_C( 113), INT8_C( 81), INT8_C(-104), INT8_C( 87), INT8_C( 17), INT8_C( 59), INT8_C( -92), INT8_C( -92), INT8_C( -62), INT8_C( 84), INT8_C( -91), INT8_C( 84)), simde_mm256_set_epi8(INT8_C( 97), INT8_C(-126), INT8_C( -49), INT8_C( 31), INT8_C( 80), INT8_C( -15), INT8_C(-103), INT8_C( -93), INT8_C( 122), INT8_C( 28), INT8_C( 31), INT8_C( 41), INT8_C( -69), INT8_C( 33), INT8_C( 70), INT8_C( 26), INT8_C( -60), INT8_C( 101), INT8_C( 20), INT8_C( -20), INT8_C( 122), INT8_C( 120), INT8_C(-101), INT8_C( -57), INT8_C( 8), INT8_C( -41), INT8_C( 78), INT8_C( -24), INT8_C( -85), INT8_C( -99), INT8_C( -85), INT8_C( -95)), simde_mm256_set_epi8(INT8_C( -26), INT8_C( -78), INT8_C( -91), INT8_C( 69), INT8_C( 125), INT8_C( -31), INT8_C(-110), INT8_C( 35), INT8_C( -42), INT8_C( 93), INT8_C( -26), INT8_C( 48), INT8_C( 88), INT8_C( 90), INT8_C( 73), INT8_C(-121), INT8_C(-115), INT8_C( 83), INT8_C( -76), INT8_C( -39), INT8_C( -24), INT8_C( -79), INT8_C( 55), INT8_C(-125), INT8_C( 104), INT8_C(-109), INT8_C( 75), INT8_C( 75), INT8_C( -62), INT8_C( 27), INT8_C( 51), INT8_C(-110)) }, { simde_mm256_set_epi8(INT8_C( 93), INT8_C( 34), INT8_C( -81), INT8_C( 29), INT8_C( -48), INT8_C( -91), INT8_C(-101), INT8_C( 33), INT8_C( -32), INT8_C(-111), INT8_C( -86), INT8_C( 81), INT8_C( -69), INT8_C( -40), INT8_C(-105), INT8_C( 25), INT8_C( 10), INT8_C( 111), INT8_C( 0), INT8_C( 102), INT8_C( -29), INT8_C( -79), INT8_C( -33), INT8_C( -18), INT8_C(-126), INT8_C( 90), INT8_C( 106), INT8_C(-128), INT8_C( 45), INT8_C( 80), INT8_C(-116), INT8_C( 9)), UINT32_C( 475630479), simde_mm256_set_epi8(INT8_C( 62), INT8_C( -50), INT8_C( 30), INT8_C( -6), INT8_C( 85), INT8_C( -1), INT8_C( 20), INT8_C( -57), INT8_C( -39), INT8_C( 81), INT8_C( -54), INT8_C( 103), INT8_C( -31), INT8_C( 76), INT8_C( -6), INT8_C( 19), INT8_C( 0), INT8_C( -53), INT8_C( -62), INT8_C( 52), INT8_C( -64), INT8_C( 34), INT8_C( -62), INT8_C( 62), INT8_C( 85), INT8_C( -50), INT8_C(-124), INT8_C( -51), INT8_C( -15), INT8_C( 123), INT8_C( -57), INT8_C( 49)), simde_mm256_set_epi8(INT8_C( 10), INT8_C( 16), INT8_C( 30), INT8_C(-118), INT8_C( -14), INT8_C( 16), INT8_C( -24), INT8_C( 121), INT8_C( 27), INT8_C( 42), INT8_C( -96), INT8_C( -57), INT8_C(-122), INT8_C( 20), INT8_C( 54), INT8_C( 94), INT8_C( 37), INT8_C( 113), INT8_C( -54), INT8_C( 15), INT8_C(-119), INT8_C(-119), INT8_C( 102), INT8_C( 12), INT8_C( -50), INT8_C(-100), INT8_C( 91), INT8_C( -73), INT8_C(-128), INT8_C( -69), INT8_C( 75), INT8_C( 41)), simde_mm256_set_epi8(INT8_C( 93), INT8_C( 34), INT8_C( -81), INT8_C( -79), INT8_C(-102), INT8_C( 49), INT8_C(-101), INT8_C( 33), INT8_C( -32), INT8_C( 92), INT8_C( -86), INT8_C( 20), INT8_C( 60), INT8_C( -40), INT8_C(-105), INT8_C(-105), INT8_C( 78), INT8_C( 111), INT8_C( 0), INT8_C( 102), INT8_C(-123), INT8_C( -79), INT8_C( 15), INT8_C(-115), INT8_C( -19), INT8_C( 90), INT8_C( 106), INT8_C(-128), INT8_C( 103), INT8_C( 40), INT8_C( 35), INT8_C( -20)) }, { simde_mm256_set_epi8(INT8_C( -37), INT8_C( 0), INT8_C( 84), INT8_C( 112), INT8_C( 67), INT8_C(-128), INT8_C( -76), INT8_C( -90), INT8_C( -77), INT8_C( 21), INT8_C( 79), INT8_C( -2), INT8_C( 5), INT8_C( 83), INT8_C( 16), INT8_C( 106), INT8_C( 20), INT8_C( -1), INT8_C( 126), INT8_C( -58), INT8_C(-122), INT8_C( -63), INT8_C( 0), INT8_C( 21), INT8_C( 125), INT8_C(-120), INT8_C( 11), INT8_C( 36), INT8_C( -16), INT8_C( 30), INT8_C( -96), INT8_C( 28)), UINT32_C(1359096050), simde_mm256_set_epi8(INT8_C( -64), INT8_C( 109), INT8_C( 8), INT8_C( 18), INT8_C( 97), INT8_C( -33), INT8_C( 74), INT8_C( 101), INT8_C( 114), INT8_C( 73), INT8_C( 13), INT8_C( -15), INT8_C( -63), INT8_C( -87), INT8_C( 43), INT8_C( -66), INT8_C( 75), INT8_C( -79), INT8_C( 108), INT8_C( -46), INT8_C(-111), INT8_C( 109), INT8_C( 40), INT8_C( 40), INT8_C( -24), INT8_C( -34), INT8_C( 68), INT8_C( 73), INT8_C( 83), INT8_C( -82), INT8_C( 100), INT8_C( 122)), simde_mm256_set_epi8(INT8_C( 49), INT8_C( 63), INT8_C( -20), INT8_C( 36), INT8_C( 62), INT8_C( 14), INT8_C( -92), INT8_C( -1), INT8_C( 45), INT8_C( 43), INT8_C( -36), INT8_C( -24), INT8_C(-118), INT8_C( 66), INT8_C( -23), INT8_C( 1), INT8_C( 115), INT8_C( -91), INT8_C(-114), INT8_C( -33), INT8_C( 119), INT8_C( -27), INT8_C( -46), INT8_C( -37), INT8_C(-114), INT8_C( 33), INT8_C( 101), INT8_C( -50), INT8_C( 17), INT8_C( -4), INT8_C( 39), INT8_C( -76)), simde_mm256_set_epi8(INT8_C( -37), INT8_C( 40), INT8_C( 84), INT8_C( 81), INT8_C( 67), INT8_C(-128), INT8_C( -76), INT8_C( 25), INT8_C( -77), INT8_C( 21), INT8_C( 79), INT8_C( -2), INT8_C( 5), INT8_C( 83), INT8_C(-116), INT8_C( 106), INT8_C( 20), INT8_C( -1), INT8_C( -62), INT8_C( -58), INT8_C( -43), INT8_C( -63), INT8_C( 0), INT8_C( 21), INT8_C( 103), INT8_C( 87), INT8_C( 58), INT8_C( 105), INT8_C( -16), INT8_C( 30), INT8_C( 29), INT8_C( 28)) }, { simde_mm256_set_epi8(INT8_C( -37), INT8_C( 29), INT8_C( 33), INT8_C( -67), INT8_C( -58), INT8_C( 64), INT8_C( -22), INT8_C( 127), INT8_C( 113), INT8_C( -3), INT8_C( -82), INT8_C( 61), INT8_C( 114), INT8_C( 95), INT8_C( 56), INT8_C( 11), INT8_C( 39), INT8_C( 125), INT8_C( 59), INT8_C( 11), INT8_C( 10), INT8_C( 91), INT8_C( 91), INT8_C(-118), INT8_C(-117), INT8_C( 125), INT8_C( -40), INT8_C( 88), INT8_C( 24), INT8_C( -96), INT8_C( 22), INT8_C(-107)), UINT32_C( 847147114), simde_mm256_set_epi8(INT8_C( 122), INT8_C( 119), INT8_C( 9), INT8_C( 26), INT8_C( 110), INT8_C( -42), INT8_C( -67), INT8_C( -7), INT8_C( 125), INT8_C( 120), INT8_C( -78), INT8_C( -83), INT8_C( -76), INT8_C( 43), INT8_C( -6), INT8_C( 63), INT8_C( -16), INT8_C( 16), INT8_C( -21), INT8_C(-128), INT8_C( 14), INT8_C( -28), INT8_C( 76), INT8_C(-108), INT8_C( -40), INT8_C( -35), INT8_C( 36), INT8_C( -38), INT8_C( 3), INT8_C( -17), INT8_C( 32), INT8_C( 103)), simde_mm256_set_epi8(INT8_C( -92), INT8_C( -10), INT8_C( -37), INT8_C( -55), INT8_C( 26), INT8_C( 99), INT8_C( 118), INT8_C( 104), INT8_C( 69), INT8_C( -79), INT8_C( 96), INT8_C( 14), INT8_C( 27), INT8_C( 115), INT8_C( -2), INT8_C( -45), INT8_C( 88), INT8_C( -52), INT8_C( 70), INT8_C(-120), INT8_C( 78), INT8_C( 99), INT8_C( 69), INT8_C(-111), INT8_C( 113), INT8_C( 76), INT8_C( 28), INT8_C( 7), INT8_C( -98), INT8_C( -99), INT8_C(-123), INT8_C( 88)), simde_mm256_set_epi8(INT8_C( -37), INT8_C( 29), INT8_C( 37), INT8_C( -23), INT8_C( -58), INT8_C( 64), INT8_C( 43), INT8_C( 127), INT8_C( 113), INT8_C( -87), INT8_C( -26), INT8_C( 13), INT8_C( 15), INT8_C( 70), INT8_C( -99), INT8_C( 11), INT8_C( 39), INT8_C( -50), INT8_C( 71), INT8_C( 16), INT8_C( 10), INT8_C( 91), INT8_C( 91), INT8_C(-118), INT8_C(-117), INT8_C( -65), INT8_C( 111), INT8_C( 88), INT8_C( 111), INT8_C( -96), INT8_C( 84), INT8_C(-107)) }, { simde_mm256_set_epi8(INT8_C( 54), INT8_C( -69), INT8_C( 72), INT8_C( 111), INT8_C( 33), INT8_C( -99), INT8_C( -4), INT8_C( 32), INT8_C(-118), INT8_C( 119), INT8_C( 126), INT8_C( -30), INT8_C( -60), INT8_C( -48), INT8_C( -93), INT8_C( 113), INT8_C( 44), INT8_C( 13), INT8_C( 40), INT8_C( 25), INT8_C( -54), INT8_C( -49), INT8_C(-104), INT8_C(-106), INT8_C( -8), INT8_C( 71), INT8_C( 97), INT8_C( -37), INT8_C( -97), INT8_C( 104), INT8_C( -72), INT8_C( -12)), UINT32_C(3956112043), simde_mm256_set_epi8(INT8_C( 51), INT8_C( 38), INT8_C( -90), INT8_C( 14), INT8_C( -23), INT8_C( 121), INT8_C( 31), INT8_C( -78), INT8_C( -80), INT8_C( -80), INT8_C( -11), INT8_C( 101), INT8_C( -38), INT8_C( -92), INT8_C( -62), INT8_C( 21), INT8_C( -98), INT8_C( 125), INT8_C( 126), INT8_C( 47), INT8_C( -61), INT8_C( 119), INT8_C( 16), INT8_C( -89), INT8_C( 113), INT8_C( -56), INT8_C( -82), INT8_C(-119), INT8_C( 18), INT8_C( -58), INT8_C( 99), INT8_C( -43)), simde_mm256_set_epi8(INT8_C( 113), INT8_C( -85), INT8_C( -34), INT8_C( 49), INT8_C( 43), INT8_C( -40), INT8_C( -68), INT8_C( 40), INT8_C( 117), INT8_C( 67), INT8_C( -53), INT8_C( 2), INT8_C( -36), INT8_C( 64), INT8_C( -49), INT8_C( 42), INT8_C( -73), INT8_C( 13), INT8_C( 54), INT8_C(-116), INT8_C(-102), INT8_C( -83), INT8_C( -29), INT8_C( 55), INT8_C( 57), INT8_C( 87), INT8_C( 73), INT8_C( -70), INT8_C(-103), INT8_C(-105), INT8_C( -99), INT8_C( 2)), simde_mm256_set_epi8(INT8_C( 2), INT8_C( 5), INT8_C( -17), INT8_C( 111), INT8_C( -49), INT8_C( -99), INT8_C(-120), INT8_C(-100), INT8_C( -53), INT8_C( -53), INT8_C( 126), INT8_C( -30), INT8_C( 60), INT8_C( 22), INT8_C( -93), INT8_C(-110), INT8_C( 44), INT8_C( -24), INT8_C( -44), INT8_C( -94), INT8_C( -28), INT8_C( -49), INT8_C( -47), INT8_C(-106), INT8_C( -5), INT8_C( 71), INT8_C( -31), INT8_C( -37), INT8_C( -56), INT8_C( 104), INT8_C(-106), INT8_C( -53)) }, { simde_mm256_set_epi8(INT8_C( 14), INT8_C( 10), INT8_C( 41), INT8_C( 17), INT8_C( -27), INT8_C(-110), INT8_C( 84), INT8_C( -17), INT8_C( 100), INT8_C( 28), INT8_C( -98), INT8_C( -75), INT8_C( 86), INT8_C( -13), INT8_C( -71), INT8_C( -97), INT8_C(-109), INT8_C( -82), INT8_C( 68), INT8_C( 69), INT8_C( 3), INT8_C( 91), INT8_C( -46), INT8_C( 55), INT8_C( -27), INT8_C( -27), INT8_C( -39), INT8_C( -77), INT8_C( 21), INT8_C( 50), INT8_C( 79), INT8_C( -83)), UINT32_C(2605025778), simde_mm256_set_epi8(INT8_C( 38), INT8_C( -41), INT8_C( 112), INT8_C( 117), INT8_C( 58), INT8_C( 17), INT8_C(-102), INT8_C( -45), INT8_C( 5), INT8_C( 37), INT8_C( 20), INT8_C( -10), INT8_C( 125), INT8_C( -30), INT8_C( -78), INT8_C(-119), INT8_C( -34), INT8_C( -69), INT8_C( 5), INT8_C( -4), INT8_C( 57), INT8_C( -46), INT8_C( 38), INT8_C(-103), INT8_C( 38), INT8_C( 94), INT8_C( 6), INT8_C( 108), INT8_C( 102), INT8_C( 123), INT8_C( 103), INT8_C( 112)), simde_mm256_set_epi8(INT8_C( 98), INT8_C( 31), INT8_C( -53), INT8_C( 122), INT8_C( -44), INT8_C( 121), INT8_C( -29), INT8_C( -24), INT8_C( 49), INT8_C(-103), INT8_C( -9), INT8_C(-119), INT8_C( -99), INT8_C( 40), INT8_C( -76), INT8_C( 102), INT8_C( -62), INT8_C( -47), INT8_C(-114), INT8_C( 87), INT8_C( -23), INT8_C( -34), INT8_C( -44), INT8_C( 6), INT8_C( 21), INT8_C( -94), INT8_C( -31), INT8_C( 89), INT8_C( -3), INT8_C( 101), INT8_C( 62), INT8_C( -72)), simde_mm256_set_epi8(INT8_C( -35), INT8_C( 10), INT8_C( 41), INT8_C( 61), INT8_C( -89), INT8_C(-110), INT8_C( 53), INT8_C( 115), INT8_C( 100), INT8_C( 59), INT8_C( -98), INT8_C( -75), INT8_C( 86), INT8_C(-109), INT8_C( -71), INT8_C( 45), INT8_C( 83), INT8_C( -82), INT8_C( 68), INT8_C( 69), INT8_C( -62), INT8_C( 91), INT8_C( -46), INT8_C( -56), INT8_C( -10), INT8_C( -71), INT8_C( -6), INT8_C( 30), INT8_C( 21), INT8_C( 50), INT8_C( 13), INT8_C( -83)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_mask_gf2p8affineinv_epi64_epi8(test_vec[i].src, test_vec[i].k, test_vec[i].x, test_vec[i].A, INT8_C( 78)); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_gf2p8affineinv_epi64_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask64 k; simde__m512i x; simde__m512i A; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi8(INT8_C( -60), INT8_C(-108), INT8_C( 94), INT8_C( -85), INT8_C( 51), INT8_C( 20), INT8_C( 52), INT8_C(-114), INT8_C( -6), INT8_C( 43), INT8_C( 55), INT8_C( 12), INT8_C( 60), INT8_C( 72), INT8_C( 58), INT8_C( 119), INT8_C(-125), INT8_C(-116), INT8_C( -2), INT8_C( 35), INT8_C( 46), INT8_C( -63), INT8_C( -77), INT8_C( -15), INT8_C( -42), INT8_C( 62), INT8_C( 88), INT8_C( 7), INT8_C( 61), INT8_C( 116), INT8_C( -89), INT8_C( 122), INT8_C( 102), INT8_C( -48), INT8_C( 124), INT8_C( 70), INT8_C( 47), INT8_C( 38), INT8_C( 113), INT8_C( -68), INT8_C( -88), INT8_C( 75), INT8_C( 45), INT8_C( 101), INT8_C( -48), INT8_C( 40), INT8_C( 43), INT8_C(-120), INT8_C( 86), INT8_C(-109), INT8_C(-116), INT8_C( 52), INT8_C( -80), INT8_C( -39), INT8_C( -82), INT8_C( 33), INT8_C( 37), INT8_C( 31), INT8_C( 47), INT8_C( 64), INT8_C( 83), INT8_C( 59), INT8_C( -2), INT8_C(-123)), UINT64_C( 5672856459990176132), simde_mm512_set_epi8(INT8_C( -22), INT8_C( -88), INT8_C(-110), INT8_C( -48), INT8_C( 76), INT8_C( -59), INT8_C( 68), INT8_C( -74), INT8_C(-118), INT8_C( -5), INT8_C( 100), INT8_C( 20), INT8_C( 96), INT8_C(-121), INT8_C( 54), INT8_C( 113), INT8_C( -38), INT8_C( 70), INT8_C( -49), INT8_C( 4), INT8_C( -35), INT8_C( 89), INT8_C(-103), INT8_C( 21), INT8_C( 58), INT8_C( -45), INT8_C( 126), INT8_C( -7), INT8_C( 69), INT8_C( 5), INT8_C( -82), INT8_C(-111), INT8_C( -84), INT8_C( 28), INT8_C( -27), INT8_C(-123), INT8_C( 82), INT8_C(-107), INT8_C( -80), INT8_C( 79), INT8_C( 120), INT8_C( -68), INT8_C( -84), INT8_C( -49), INT8_C( 87), INT8_C( -79), INT8_C( -74), INT8_C( 92), INT8_C( 64), INT8_C(-128), INT8_C( -63), INT8_C( -93), INT8_C( 85), INT8_C( -44), INT8_C( 26), INT8_C(-115), INT8_C(-101), INT8_C( 76), INT8_C(-128), INT8_C( -17), INT8_C(-103), INT8_C( 2), INT8_C( 62), INT8_C(-126)), simde_mm512_set_epi8(INT8_C( 71), INT8_C( 78), INT8_C( 102), INT8_C( -87), INT8_C( -42), INT8_C( 16), INT8_C( 120), INT8_C( -7), INT8_C( 86), INT8_C( 101), INT8_C( 24), INT8_C( 125), INT8_C( -69), INT8_C( 65), INT8_C( 12), INT8_C( -54), INT8_C( 103), INT8_C( -8), INT8_C( 63), INT8_C( 127), INT8_C( -97), INT8_C( 62), INT8_C( 26), INT8_C( 78), INT8_C( -69), INT8_C( -62), INT8_C(-105), INT8_C( 38), INT8_C( 99), INT8_C( -23), INT8_C( 54), INT8_C( -37), INT8_C( 100), INT8_C( 45), INT8_C( -71), INT8_C(-119), INT8_C( 49), INT8_C( -65), INT8_C( 94), INT8_C( 96), INT8_C( -71), INT8_C( 118), INT8_C( -91), INT8_C( -10), INT8_C( 38), INT8_C( -84), INT8_C( -72), INT8_C( -61), INT8_C( 27), INT8_C( -2), INT8_C( 62), INT8_C( 34), INT8_C( 118), INT8_C( 94), INT8_C( -81), INT8_C( 50), INT8_C( -73), INT8_C( 61), INT8_C( 67), INT8_C( -62), INT8_C( 2), INT8_C( 50), INT8_C( -54), INT8_C( -9)), simde_mm512_set_epi8(INT8_C( -60), INT8_C( 96), INT8_C( 94), INT8_C( -85), INT8_C(-100), INT8_C( 4), INT8_C( 52), INT8_C(-114), INT8_C( 80), INT8_C( 43), INT8_C( -31), INT8_C( -57), INT8_C( 107), INT8_C( 72), INT8_C( -57), INT8_C( 119), INT8_C(-125), INT8_C(-116), INT8_C( -2), INT8_C( 35), INT8_C(-102), INT8_C( -63), INT8_C( -77), INT8_C( -15), INT8_C( -43), INT8_C( 13), INT8_C( -66), INT8_C( 7), INT8_C( 61), INT8_C( 4), INT8_C( -89), INT8_C( 122), INT8_C( 44), INT8_C( -47), INT8_C( -63), INT8_C( 70), INT8_C( -15), INT8_C(-114), INT8_C( -95), INT8_C( -4), INT8_C( -88), INT8_C( -81), INT8_C( 14), INT8_C( 16), INT8_C( 53), INT8_C( -76), INT8_C( 115), INT8_C(-120), INT8_C( 75), INT8_C(-109), INT8_C(-116), INT8_C( 98), INT8_C( -80), INT8_C( -39), INT8_C( -82), INT8_C( 83), INT8_C( 39), INT8_C( 31), INT8_C( 47), INT8_C( 64), INT8_C( 83), INT8_C( 35), INT8_C( -2), INT8_C(-123)) }, { simde_mm512_set_epi8(INT8_C( 83), INT8_C( -70), INT8_C( -20), INT8_C( -26), INT8_C( 113), INT8_C( 81), INT8_C(-104), INT8_C( 87), INT8_C( 17), INT8_C( 59), INT8_C( -92), INT8_C( -92), INT8_C( -62), INT8_C( 84), INT8_C( -91), INT8_C( 84), INT8_C( 12), INT8_C( 33), INT8_C( -50), INT8_C( -22), INT8_C( 88), INT8_C( 0), INT8_C( 48), INT8_C( -74), INT8_C( -26), INT8_C( 57), INT8_C( -91), INT8_C( 79), INT8_C( 72), INT8_C( -31), INT8_C(-110), INT8_C( 35), INT8_C( -42), INT8_C( 93), INT8_C( -26), INT8_C( 48), INT8_C( 88), INT8_C( 90), INT8_C( 73), INT8_C(-121), INT8_C(-115), INT8_C( 83), INT8_C( -51), INT8_C(-117), INT8_C( -24), INT8_C( -79), INT8_C( 55), INT8_C(-125), INT8_C(-119), INT8_C(-109), INT8_C( -25), INT8_C( 26), INT8_C( -62), INT8_C( 72), INT8_C( 66), INT8_C(-110), INT8_C( 56), INT8_C( -71), INT8_C( 70), INT8_C( 52), INT8_C( 105), INT8_C( 74), INT8_C( -52), INT8_C( 125)), UINT64_C(14965517028695411577), simde_mm512_set_epi8(INT8_C( -32), INT8_C(-111), INT8_C( -86), INT8_C( 81), INT8_C( -69), INT8_C( -40), INT8_C(-105), INT8_C( 25), INT8_C( 10), INT8_C( 111), INT8_C( 0), INT8_C( 102), INT8_C( -29), INT8_C( -79), INT8_C( -33), INT8_C( -18), INT8_C(-126), INT8_C( 90), INT8_C( 106), INT8_C(-128), INT8_C( 45), INT8_C( 80), INT8_C(-116), INT8_C( 9), INT8_C( 97), INT8_C(-126), INT8_C( -49), INT8_C( 31), INT8_C( 80), INT8_C( -15), INT8_C(-103), INT8_C( -93), INT8_C( 122), INT8_C( 28), INT8_C( 31), INT8_C( 41), INT8_C( -69), INT8_C( 33), INT8_C( 70), INT8_C( 26), INT8_C( -60), INT8_C( 101), INT8_C( 20), INT8_C( -20), INT8_C( 122), INT8_C( 120), INT8_C(-101), INT8_C( -57), INT8_C( 8), INT8_C( -41), INT8_C( 78), INT8_C( -24), INT8_C( -85), INT8_C( -99), INT8_C( -85), INT8_C( -95), INT8_C( 105), INT8_C( 4), INT8_C( 0), INT8_C( 126), INT8_C( 17), INT8_C( -37), INT8_C( 124), INT8_C( 81)), simde_mm512_set_epi8(INT8_C( 37), INT8_C( 113), INT8_C( -54), INT8_C( 15), INT8_C(-119), INT8_C(-119), INT8_C( 102), INT8_C( 12), INT8_C( -50), INT8_C(-100), INT8_C( 91), INT8_C( -73), INT8_C(-128), INT8_C( -69), INT8_C( 75), INT8_C( 41), INT8_C( 62), INT8_C( -50), INT8_C( 30), INT8_C( -6), INT8_C( 85), INT8_C( -1), INT8_C( 20), INT8_C( -57), INT8_C( -39), INT8_C( 81), INT8_C( -54), INT8_C( 103), INT8_C( -31), INT8_C( 76), INT8_C( -6), INT8_C( 19), INT8_C( 0), INT8_C( -53), INT8_C( -62), INT8_C( 52), INT8_C( -64), INT8_C( 34), INT8_C( -62), INT8_C( 62), INT8_C( 85), INT8_C( -50), INT8_C(-124), INT8_C( -51), INT8_C( -15), INT8_C( 123), INT8_C( -57), INT8_C( 49), INT8_C( -56), INT8_C( -96), INT8_C(-107), INT8_C( 81), INT8_C( 28), INT8_C( 89), INT8_C(-117), INT8_C(-113), INT8_C( 93), INT8_C( 34), INT8_C( -81), INT8_C( 29), INT8_C( -48), INT8_C( -91), INT8_C(-101), INT8_C( 33)), simde_mm512_set_epi8(INT8_C( -30), INT8_C( 89), INT8_C( -20), INT8_C( -26), INT8_C( -93), INT8_C( 83), INT8_C( -25), INT8_C( -17), INT8_C( 15), INT8_C( 59), INT8_C( -84), INT8_C( 76), INT8_C( -62), INT8_C( 84), INT8_C( -91), INT8_C( 84), INT8_C( 12), INT8_C( 33), INT8_C( -53), INT8_C( 25), INT8_C( 88), INT8_C( 0), INT8_C( 0), INT8_C( -74), INT8_C( -26), INT8_C( 82), INT8_C( 58), INT8_C( -82), INT8_C( 72), INT8_C( -31), INT8_C( 71), INT8_C( 35), INT8_C( -42), INT8_C( 98), INT8_C( 60), INT8_C( 48), INT8_C( 88), INT8_C( -66), INT8_C( 6), INT8_C(-124), INT8_C(-115), INT8_C( 111), INT8_C(-112), INT8_C( -6), INT8_C( -24), INT8_C( -79), INT8_C( -40), INT8_C( 82), INT8_C(-119), INT8_C(-109), INT8_C( -25), INT8_C( 26), INT8_C( -76), INT8_C( 72), INT8_C( -76), INT8_C( -34), INT8_C( 56), INT8_C( 47), INT8_C( -84), INT8_C( 53), INT8_C( 10), INT8_C( 74), INT8_C( -52), INT8_C(-124)) }, { simde_mm512_set_epi8(INT8_C( -24), INT8_C( -34), INT8_C( 68), INT8_C( 73), INT8_C( 83), INT8_C( -82), INT8_C( 100), INT8_C( 122), INT8_C(-100), INT8_C(-111), INT8_C( 81), INT8_C( -18), INT8_C( 81), INT8_C( 2), INT8_C( 40), INT8_C( -14), INT8_C( -37), INT8_C( 0), INT8_C( 84), INT8_C( 112), INT8_C( 67), INT8_C(-128), INT8_C( -76), INT8_C( -90), INT8_C( -77), INT8_C( 21), INT8_C( 79), INT8_C( -2), INT8_C( 5), INT8_C( 83), INT8_C( 16), INT8_C( 106), INT8_C( 20), INT8_C( -1), INT8_C( 126), INT8_C( -58), INT8_C(-122), INT8_C( -63), INT8_C( 0), INT8_C( 21), INT8_C( 125), INT8_C(-120), INT8_C( 11), INT8_C( 36), INT8_C( -16), INT8_C( 30), INT8_C( -96), INT8_C( 28), INT8_C( 10), INT8_C( 16), INT8_C( 30), INT8_C(-118), INT8_C( -14), INT8_C( 16), INT8_C( -24), INT8_C( 121), INT8_C( 27), INT8_C( 42), INT8_C( -96), INT8_C( -57), INT8_C(-122), INT8_C( 20), INT8_C( 54), INT8_C( 94)), UINT64_C( 5454260275361163304), simde_mm512_set_epi8(INT8_C( 39), INT8_C( 125), INT8_C( 59), INT8_C( 11), INT8_C( 10), INT8_C( 91), INT8_C( 91), INT8_C(-118), INT8_C(-117), INT8_C( 125), INT8_C( -40), INT8_C( 88), INT8_C( 24), INT8_C( -96), INT8_C( 22), INT8_C(-107), INT8_C( 49), INT8_C( 63), INT8_C( -20), INT8_C( 36), INT8_C( 62), INT8_C( 14), INT8_C( -92), INT8_C( -1), INT8_C( 45), INT8_C( 43), INT8_C( -36), INT8_C( -24), INT8_C(-118), INT8_C( 66), INT8_C( -23), INT8_C( 1), INT8_C( 115), INT8_C( -91), INT8_C(-114), INT8_C( -33), INT8_C( 119), INT8_C( -27), INT8_C( -46), INT8_C( -37), INT8_C(-114), INT8_C( 33), INT8_C( 101), INT8_C( -50), INT8_C( 17), INT8_C( -4), INT8_C( 39), INT8_C( -76), INT8_C( -64), INT8_C( 109), INT8_C( 8), INT8_C( 18), INT8_C( 97), INT8_C( -33), INT8_C( 74), INT8_C( 101), INT8_C( 114), INT8_C( 73), INT8_C( 13), INT8_C( -15), INT8_C( -63), INT8_C( -87), INT8_C( 43), INT8_C( -66)), simde_mm512_set_epi8(INT8_C( 113), INT8_C( 76), INT8_C( 28), INT8_C( 7), INT8_C( -98), INT8_C( -99), INT8_C(-123), INT8_C( 88), INT8_C( 122), INT8_C( 119), INT8_C( 9), INT8_C( 26), INT8_C( 110), INT8_C( -42), INT8_C( -67), INT8_C( -7), INT8_C( 125), INT8_C( 120), INT8_C( -78), INT8_C( -83), INT8_C( -76), INT8_C( 43), INT8_C( -6), INT8_C( 63), INT8_C( -16), INT8_C( 16), INT8_C( -21), INT8_C(-128), INT8_C( 14), INT8_C( -28), INT8_C( 76), INT8_C(-108), INT8_C( -40), INT8_C( -35), INT8_C( 36), INT8_C( -38), INT8_C( 3), INT8_C( -17), INT8_C( 32), INT8_C( 103), INT8_C(-115), INT8_C( 83), INT8_C( -28), INT8_C( -74), INT8_C( 50), INT8_C( 126), INT8_C( 112), INT8_C( 106), INT8_C( -37), INT8_C( 29), INT8_C( 33), INT8_C( -67), INT8_C( -58), INT8_C( 64), INT8_C( -22), INT8_C( 127), INT8_C( 113), INT8_C( -3), INT8_C( -82), INT8_C( 61), INT8_C( 114), INT8_C( 95), INT8_C( 56), INT8_C( 11)), simde_mm512_set_epi8(INT8_C( -24), INT8_C( 69), INT8_C( 68), INT8_C( 73), INT8_C( 114), INT8_C( -82), INT8_C( -21), INT8_C( 126), INT8_C( 15), INT8_C(-111), INT8_C( -43), INT8_C(-102), INT8_C( 81), INT8_C( 2), INT8_C( 40), INT8_C( -86), INT8_C( -37), INT8_C( 57), INT8_C( -29), INT8_C( 112), INT8_C( 122), INT8_C( 124), INT8_C( -76), INT8_C( -90), INT8_C( 57), INT8_C( -37), INT8_C( 79), INT8_C( -8), INT8_C( 5), INT8_C( 83), INT8_C( 121), INT8_C( 106), INT8_C(-109), INT8_C( -1), INT8_C( 126), INT8_C( -62), INT8_C(-122), INT8_C( -63), INT8_C( 0), INT8_C( 51), INT8_C( 125), INT8_C(-128), INT8_C( -54), INT8_C( 36), INT8_C( 10), INT8_C( -56), INT8_C( -96), INT8_C( -43), INT8_C( 10), INT8_C( 16), INT8_C( 3), INT8_C(-118), INT8_C( 86), INT8_C( 16), INT8_C( -24), INT8_C( 121), INT8_C( 27), INT8_C( 42), INT8_C( 109), INT8_C( -57), INT8_C( 29), INT8_C( 20), INT8_C( 54), INT8_C( 94)) }, { simde_mm512_set_epi8(INT8_C( 63), INT8_C( 74), INT8_C( 76), INT8_C( -51), INT8_C( -21), INT8_C( -51), INT8_C( 122), INT8_C( -85), INT8_C( 54), INT8_C( -69), INT8_C( 72), INT8_C( 111), INT8_C( 33), INT8_C( -99), INT8_C( -4), INT8_C( 32), INT8_C(-118), INT8_C( 119), INT8_C( 126), INT8_C( -30), INT8_C( -60), INT8_C( -48), INT8_C( -93), INT8_C( 113), INT8_C( 44), INT8_C( 13), INT8_C( 40), INT8_C( 25), INT8_C( -54), INT8_C( -49), INT8_C(-104), INT8_C(-106), INT8_C( -8), INT8_C( 71), INT8_C( 97), INT8_C( -37), INT8_C( -97), INT8_C( 104), INT8_C( -72), INT8_C( -12), INT8_C( -92), INT8_C( -10), INT8_C( -37), INT8_C( -55), INT8_C( 26), INT8_C( 99), INT8_C( 118), INT8_C( 104), INT8_C( 69), INT8_C( -79), INT8_C( 96), INT8_C( 14), INT8_C( 27), INT8_C( 115), INT8_C( -2), INT8_C( -45), INT8_C( 88), INT8_C( -52), INT8_C( 70), INT8_C(-120), INT8_C( 78), INT8_C( 99), INT8_C( 69), INT8_C(-111)), UINT64_C( 8198995025376732117), simde_mm512_set_epi8(INT8_C( -27), INT8_C( -27), INT8_C( -39), INT8_C( -77), INT8_C( 21), INT8_C( 50), INT8_C( 79), INT8_C( -83), INT8_C( 113), INT8_C( -85), INT8_C( -34), INT8_C( 49), INT8_C( 43), INT8_C( -40), INT8_C( -68), INT8_C( 40), INT8_C( 117), INT8_C( 67), INT8_C( -53), INT8_C( 2), INT8_C( -36), INT8_C( 64), INT8_C( -49), INT8_C( 42), INT8_C( -73), INT8_C( 13), INT8_C( 54), INT8_C(-116), INT8_C(-102), INT8_C( -83), INT8_C( -29), INT8_C( 55), INT8_C( 57), INT8_C( 87), INT8_C( 73), INT8_C( -70), INT8_C(-103), INT8_C(-105), INT8_C( -99), INT8_C( 2), INT8_C( 51), INT8_C( 38), INT8_C( -90), INT8_C( 14), INT8_C( -23), INT8_C( 121), INT8_C( 31), INT8_C( -78), INT8_C( -80), INT8_C( -80), INT8_C( -11), INT8_C( 101), INT8_C( -38), INT8_C( -92), INT8_C( -62), INT8_C( 21), INT8_C( -98), INT8_C( 125), INT8_C( 126), INT8_C( 47), INT8_C( -61), INT8_C( 119), INT8_C( 16), INT8_C( -89)), simde_mm512_set_epi8(INT8_C( 38), INT8_C( -41), INT8_C( 112), INT8_C( 117), INT8_C( 58), INT8_C( 17), INT8_C(-102), INT8_C( -45), INT8_C( 5), INT8_C( 37), INT8_C( 20), INT8_C( -10), INT8_C( 125), INT8_C( -30), INT8_C( -78), INT8_C(-119), INT8_C( -34), INT8_C( -69), INT8_C( 5), INT8_C( -4), INT8_C( 57), INT8_C( -46), INT8_C( 38), INT8_C(-103), INT8_C( 38), INT8_C( 94), INT8_C( 6), INT8_C( 108), INT8_C( 102), INT8_C( 123), INT8_C( 103), INT8_C( 112), INT8_C( -27), INT8_C(-114), INT8_C(-101), INT8_C(-115), INT8_C(-101), INT8_C( 69), INT8_C(-119), INT8_C( -14), INT8_C( 14), INT8_C( 10), INT8_C( 41), INT8_C( 17), INT8_C( -27), INT8_C(-110), INT8_C( 84), INT8_C( -17), INT8_C( 100), INT8_C( 28), INT8_C( -98), INT8_C( -75), INT8_C( 86), INT8_C( -13), INT8_C( -71), INT8_C( -97), INT8_C(-109), INT8_C( -82), INT8_C( 68), INT8_C( 69), INT8_C( 3), INT8_C( 91), INT8_C( -46), INT8_C( 55)), simde_mm512_set_epi8(INT8_C( 63), INT8_C( 36), INT8_C( 71), INT8_C( -33), INT8_C( -21), INT8_C( -51), INT8_C( 122), INT8_C(-113), INT8_C(-122), INT8_C( 108), INT8_C( 72), INT8_C( 111), INT8_C( 124), INT8_C( -99), INT8_C( -4), INT8_C( 32), INT8_C( 61), INT8_C( 119), INT8_C( -31), INT8_C( -30), INT8_C( -4), INT8_C( 87), INT8_C( 90), INT8_C( 113), INT8_C( 109), INT8_C( 13), INT8_C( 40), INT8_C( 25), INT8_C( 108), INT8_C( -49), INT8_C(-104), INT8_C( 33), INT8_C( -8), INT8_C( 71), INT8_C( 97), INT8_C( -91), INT8_C( -97), INT8_C( 104), INT8_C( 51), INT8_C( -12), INT8_C( -66), INT8_C( 15), INT8_C( -37), INT8_C( -55), INT8_C( 26), INT8_C(-128), INT8_C( 67), INT8_C( 104), INT8_C( 69), INT8_C( -11), INT8_C( -74), INT8_C( 14), INT8_C( 27), INT8_C( 115), INT8_C( -64), INT8_C( 95), INT8_C( 116), INT8_C( 49), INT8_C( 70), INT8_C( 112), INT8_C( 78), INT8_C( 99), INT8_C( 69), INT8_C( -30)) }, { simde_mm512_set_epi8(INT8_C( 123), INT8_C( 45), INT8_C( 89), INT8_C( -95), INT8_C(-117), INT8_C( -47), INT8_C( 71), INT8_C( 107), INT8_C( -86), INT8_C( -74), INT8_C( 66), INT8_C( -36), INT8_C( -2), INT8_C( 91), INT8_C( -51), INT8_C( 23), INT8_C( -45), INT8_C( -60), INT8_C( -99), INT8_C( 65), INT8_C(-113), INT8_C( -7), INT8_C( -76), INT8_C( 51), INT8_C( 18), INT8_C( 27), INT8_C( -14), INT8_C( 124), INT8_C( -66), INT8_C( -18), INT8_C( -5), INT8_C( 0), INT8_C( 98), INT8_C( 31), INT8_C( -53), INT8_C( 122), INT8_C( -44), INT8_C( 121), INT8_C( -29), INT8_C( -24), INT8_C( 49), INT8_C(-103), INT8_C( -9), INT8_C(-119), INT8_C( -99), INT8_C( 40), INT8_C( -76), INT8_C( 102), INT8_C( -62), INT8_C( -47), INT8_C(-114), INT8_C( 87), INT8_C( -23), INT8_C( -34), INT8_C( -44), INT8_C( 6), INT8_C( 21), INT8_C( -94), INT8_C( -31), INT8_C( 89), INT8_C( -3), INT8_C( 101), INT8_C( 62), INT8_C( -72)), UINT64_C(14383600667434858867), simde_mm512_set_epi8(INT8_C( 7), INT8_C( 50), INT8_C(-110), INT8_C( 70), INT8_C( -40), INT8_C( 59), INT8_C( 127), INT8_C( -94), INT8_C( -96), INT8_C( -66), INT8_C( -54), INT8_C(-111), INT8_C( -88), INT8_C( -27), INT8_C( -51), INT8_C( -96), INT8_C( 118), INT8_C( -71), INT8_C( 125), INT8_C( 110), INT8_C(-115), INT8_C(-128), INT8_C( 71), INT8_C(-120), INT8_C(-113), INT8_C( 21), INT8_C( -93), INT8_C(-123), INT8_C( -55), INT8_C( -21), INT8_C( -23), INT8_C( 47), INT8_C( 101), INT8_C(-127), INT8_C( 66), INT8_C( -9), INT8_C( -88), INT8_C( -89), INT8_C( 79), INT8_C( -89), INT8_C(-125), INT8_C( 89), INT8_C( 66), INT8_C( 35), INT8_C( -61), INT8_C( 48), INT8_C( -39), INT8_C( 121), INT8_C( 7), INT8_C( -12), INT8_C( 120), INT8_C( 61), INT8_C( -91), INT8_C( 115), INT8_C( 9), INT8_C( 59), INT8_C( -83), INT8_C(-127), INT8_C( 36), INT8_C( -14), INT8_C( -98), INT8_C( 13), INT8_C( -23), INT8_C( -82)), simde_mm512_set_epi8(INT8_C( -30), INT8_C( 110), INT8_C( -97), INT8_C( -76), INT8_C( -96), INT8_C( 126), INT8_C( -48), INT8_C(-117), INT8_C( 20), INT8_C( 23), INT8_C( 95), INT8_C( -52), INT8_C( 15), INT8_C( -26), INT8_C( -88), INT8_C( -3), INT8_C( 69), INT8_C( -98), INT8_C(-100), INT8_C( 33), INT8_C( -17), INT8_C( -10), INT8_C( -53), INT8_C( 53), INT8_C( -25), INT8_C( 38), INT8_C( 75), INT8_C( 71), INT8_C( 106), INT8_C( -45), INT8_C( -51), INT8_C( 79), INT8_C( 54), INT8_C( -50), INT8_C( -94), INT8_C( 16), INT8_C( 3), INT8_C( -22), INT8_C( -34), INT8_C( 54), INT8_C( 100), INT8_C(-117), INT8_C( 118), INT8_C( 16), INT8_C( 34), INT8_C( -48), INT8_C( -90), INT8_C( 66), INT8_C( -43), INT8_C(-128), INT8_C( 122), INT8_C( -97), INT8_C( 32), INT8_C( -77), INT8_C( -10), INT8_C( -9), INT8_C( -5), INT8_C( -84), INT8_C(-115), INT8_C(-106), INT8_C( 40), INT8_C(-127), INT8_C( 47), INT8_C( -89)), simde_mm512_set_epi8(INT8_C( -6), INT8_C( -70), INT8_C( 89), INT8_C( -95), INT8_C(-117), INT8_C( 95), INT8_C( -42), INT8_C( -72), INT8_C( -13), INT8_C( -74), INT8_C( 66), INT8_C( 10), INT8_C( -59), INT8_C( -7), INT8_C( -51), INT8_C( 23), INT8_C( -32), INT8_C( 73), INT8_C( -99), INT8_C( -51), INT8_C(-113), INT8_C( -7), INT8_C( -76), INT8_C( 51), INT8_C( 21), INT8_C( 27), INT8_C( -14), INT8_C( 48), INT8_C( 38), INT8_C( -18), INT8_C( -5), INT8_C(-113), INT8_C( 98), INT8_C( -12), INT8_C( -28), INT8_C( 107), INT8_C( -44), INT8_C( -84), INT8_C( -29), INT8_C( -84), INT8_C( -50), INT8_C(-103), INT8_C( -9), INT8_C(-119), INT8_C( -99), INT8_C( -66), INT8_C( 24), INT8_C( 102), INT8_C( -58), INT8_C( -47), INT8_C( 123), INT8_C( 23), INT8_C( 82), INT8_C( -34), INT8_C( -44), INT8_C( 29), INT8_C( 21), INT8_C( 39), INT8_C(-113), INT8_C( 33), INT8_C( -3), INT8_C( 101), INT8_C( -3), INT8_C( -62)) }, { simde_mm512_set_epi8(INT8_C( 29), INT8_C( 58), INT8_C( 95), INT8_C( 42), INT8_C( 15), INT8_C( 43), INT8_C( 119), INT8_C( 59), INT8_C(-127), INT8_C( -47), INT8_C( -59), INT8_C( -3), INT8_C( -51), INT8_C( 41), INT8_C( 7), INT8_C( -3), INT8_C( 80), INT8_C( 88), INT8_C( 103), INT8_C( -43), INT8_C(-102), INT8_C( 82), INT8_C( -24), INT8_C( -40), INT8_C( -58), INT8_C( 26), INT8_C( 95), INT8_C( -4), INT8_C( -1), INT8_C(-103), INT8_C( -99), INT8_C( -29), INT8_C( -5), INT8_C( 1), INT8_C( -57), INT8_C( 73), INT8_C( -83), INT8_C( 48), INT8_C( 42), INT8_C(-113), INT8_C( 102), INT8_C( 39), INT8_C( -51), INT8_C( 5), INT8_C( -36), INT8_C( -70), INT8_C( 38), INT8_C(-126), INT8_C( -24), INT8_C(-125), INT8_C( -94), INT8_C( -42), INT8_C( 104), INT8_C( -11), INT8_C( -43), INT8_C( 61), INT8_C( -61), INT8_C( -93), INT8_C( 105), INT8_C( 12), INT8_C( -70), INT8_C( 125), INT8_C( 23), INT8_C( -91)), UINT64_C( 6749609748297998815), simde_mm512_set_epi8(INT8_C( 127), INT8_C( -48), INT8_C( 24), INT8_C( 106), INT8_C( 125), INT8_C( 90), INT8_C( -32), INT8_C( -90), INT8_C( 74), INT8_C( 94), INT8_C( -67), INT8_C( 22), INT8_C( 81), INT8_C( -59), INT8_C( 46), INT8_C( 91), INT8_C(-123), INT8_C( 70), INT8_C( -77), INT8_C( 126), INT8_C( 3), INT8_C( 23), INT8_C( -31), INT8_C( -25), INT8_C( -96), INT8_C( 10), INT8_C( 22), INT8_C( 54), INT8_C( 56), INT8_C( -52), INT8_C(-102), INT8_C(-105), INT8_C( 107), INT8_C( 65), INT8_C( -61), INT8_C( -79), INT8_C( 123), INT8_C( -51), INT8_C( 17), INT8_C( 126), INT8_C( -83), INT8_C( 58), INT8_C( -81), INT8_C( 47), INT8_C( 44), INT8_C( 77), INT8_C( 115), INT8_C( -95), INT8_C( 123), INT8_C( -61), INT8_C( -18), INT8_C( 14), INT8_C( 36), INT8_C( -5), INT8_C( 29), INT8_C( 95), INT8_C( -38), INT8_C( -24), INT8_C( 52), INT8_C( -93), INT8_C(-118), INT8_C( -53), INT8_C( 97), INT8_C( -62)), simde_mm512_set_epi8(INT8_C( -63), INT8_C( -92), INT8_C( 23), INT8_C(-124), INT8_C( -91), INT8_C( -30), INT8_C( -20), INT8_C( -98), INT8_C( -66), INT8_C( 89), INT8_C( 27), INT8_C( -23), INT8_C(-113), INT8_C( -53), INT8_C( 27), INT8_C( 25), INT8_C( -63), INT8_C( 31), INT8_C( -46), INT8_C( 82), INT8_C( 109), INT8_C( 43), INT8_C( 6), INT8_C( 80), INT8_C( 22), INT8_C( 76), INT8_C( -79), INT8_C(-105), INT8_C( -5), INT8_C( 36), INT8_C( 67), INT8_C( -67), INT8_C(-115), INT8_C( 66), INT8_C(-116), INT8_C( -56), INT8_C( -74), INT8_C( -28), INT8_C( 19), INT8_C(-120), INT8_C( -8), INT8_C( -85), INT8_C( 1), INT8_C( 105), INT8_C( 112), INT8_C( 102), INT8_C(-119), INT8_C( -90), INT8_C( 31), INT8_C( -89), INT8_C( 26), INT8_C( -72), INT8_C( -9), INT8_C(-119), INT8_C( 92), INT8_C( 109), INT8_C( 41), INT8_C( 49), INT8_C( -25), INT8_C( 124), INT8_C( 121), INT8_C( -59), INT8_C( -23), INT8_C( -27)), simde_mm512_set_epi8(INT8_C( 29), INT8_C( 95), INT8_C( 95), INT8_C( -58), INT8_C( -92), INT8_C( 122), INT8_C( 119), INT8_C( 116), INT8_C( -24), INT8_C( -47), INT8_C( -75), INT8_C( -3), INT8_C( -81), INT8_C( 41), INT8_C( -23), INT8_C( 113), INT8_C( 80), INT8_C( -21), INT8_C( 57), INT8_C( -43), INT8_C( -74), INT8_C(-126), INT8_C( -1), INT8_C( -40), INT8_C( -58), INT8_C( 86), INT8_C( 95), INT8_C( -4), INT8_C( 18), INT8_C( 38), INT8_C( 17), INT8_C( -29), INT8_C( -5), INT8_C( 1), INT8_C( -57), INT8_C( 11), INT8_C( -53), INT8_C( 48), INT8_C( 68), INT8_C( 80), INT8_C( -95), INT8_C( 23), INT8_C( -51), INT8_C( -12), INT8_C( -36), INT8_C( -70), INT8_C( 38), INT8_C(-126), INT8_C( 104), INT8_C( -88), INT8_C( -40), INT8_C( -42), INT8_C( 104), INT8_C( -11), INT8_C( -43), INT8_C(-122), INT8_C( 24), INT8_C( -11), INT8_C( 105), INT8_C( 71), INT8_C( 9), INT8_C( 0), INT8_C( -69), INT8_C( 117)) }, { simde_mm512_set_epi8(INT8_C( 123), INT8_C( 59), INT8_C( -91), INT8_C( 35), INT8_C( -96), INT8_C( 111), INT8_C( 38), INT8_C( 53), INT8_C( -15), INT8_C( 111), INT8_C( -21), INT8_C( 77), INT8_C( -7), INT8_C( 35), INT8_C(-125), INT8_C(-119), INT8_C( -5), INT8_C( 114), INT8_C( 88), INT8_C( -19), INT8_C( -98), INT8_C( 73), INT8_C(-102), INT8_C( -44), INT8_C(-127), INT8_C( 61), INT8_C( -35), INT8_C( -36), INT8_C( 43), INT8_C(-126), INT8_C( 127), INT8_C( 23), INT8_C( 90), INT8_C( 73), INT8_C( 125), INT8_C( 90), INT8_C( 71), INT8_C( 84), INT8_C( 18), INT8_C( 112), INT8_C( -25), INT8_C( -21), INT8_C( 9), INT8_C( 8), INT8_C( -84), INT8_C( -30), INT8_C(-123), INT8_C( 33), INT8_C( 32), INT8_C( 18), INT8_C( 63), INT8_C( 60), INT8_C( 93), INT8_C( 67), INT8_C( 62), INT8_C( -65), INT8_C(-117), INT8_C( 119), INT8_C( -99), INT8_C( -90), INT8_C( -44), INT8_C( 30), INT8_C( 25), INT8_C( 39)), UINT64_C( 9640592840815503565), simde_mm512_set_epi8(INT8_C(-106), INT8_C( 126), INT8_C( 20), INT8_C( 89), INT8_C( 9), INT8_C( 9), INT8_C(-100), INT8_C( 107), INT8_C( -76), INT8_C( -35), INT8_C( 20), INT8_C( -62), INT8_C( 87), INT8_C( -82), INT8_C( 123), INT8_C( 1), INT8_C( 14), INT8_C( 13), INT8_C( 95), INT8_C( -21), INT8_C( 44), INT8_C( 107), INT8_C( -28), INT8_C( 63), INT8_C( 111), INT8_C( 92), INT8_C( 71), INT8_C( -91), INT8_C( -35), INT8_C( 39), INT8_C( 94), INT8_C( -48), INT8_C( -58), INT8_C( -10), INT8_C( -87), INT8_C( -15), INT8_C( -78), INT8_C( -53), INT8_C( -23), INT8_C( 66), INT8_C( 37), INT8_C( 69), INT8_C( 119), INT8_C( -65), INT8_C(-114), INT8_C( 56), INT8_C(-111), INT8_C( 59), INT8_C( -64), INT8_C( 122), INT8_C( 12), INT8_C( 76), INT8_C( -91), INT8_C( 97), INT8_C( 28), INT8_C( -82), INT8_C(-121), INT8_C( 25), INT8_C(-101), INT8_C( -69), INT8_C( 41), INT8_C( -61), INT8_C( 35), INT8_C( 67)), simde_mm512_set_epi8(INT8_C(-123), INT8_C( 79), INT8_C( -35), INT8_C( -70), INT8_C( 26), INT8_C( 99), INT8_C(-116), INT8_C( -30), INT8_C( 114), INT8_C( 115), INT8_C( -91), INT8_C( -62), INT8_C( -40), INT8_C( 109), INT8_C( 100), INT8_C( -48), INT8_C( -65), INT8_C( -78), INT8_C(-117), INT8_C(-106), INT8_C( 26), INT8_C(-101), INT8_C( -76), INT8_C( 6), INT8_C( 33), INT8_C( -13), INT8_C( -85), INT8_C( 120), INT8_C( 40), INT8_C( -3), INT8_C( 13), INT8_C( 24), INT8_C( 80), INT8_C( 72), INT8_C( 95), INT8_C( -4), INT8_C( 23), INT8_C( 83), INT8_C( -88), INT8_C(-123), INT8_C(-118), INT8_C( 82), INT8_C( 40), INT8_C( 10), INT8_C( 44), INT8_C( 43), INT8_C( 53), INT8_C( 7), INT8_C( 60), INT8_C( 40), INT8_C( 118), INT8_C( -43), INT8_C( 16), INT8_C( -92), INT8_C( 82), INT8_C( -97), INT8_C( -23), INT8_C( 21), INT8_C( 23), INT8_C( -52), INT8_C( 79), INT8_C( -51), INT8_C(-125), INT8_C( -68)), simde_mm512_set_epi8(INT8_C( 38), INT8_C( 59), INT8_C( -91), INT8_C( 35), INT8_C( -96), INT8_C(-114), INT8_C( 38), INT8_C( 95), INT8_C( 25), INT8_C( 15), INT8_C( -21), INT8_C( 77), INT8_C( -67), INT8_C( 35), INT8_C( -61), INT8_C(-119), INT8_C( -5), INT8_C( -91), INT8_C( 88), INT8_C( -19), INT8_C( -98), INT8_C( -34), INT8_C(-102), INT8_C( -44), INT8_C(-127), INT8_C( 61), INT8_C( -35), INT8_C( -29), INT8_C( -55), INT8_C( 59), INT8_C( 127), INT8_C( 23), INT8_C( 90), INT8_C( 44), INT8_C( 125), INT8_C( 100), INT8_C( 71), INT8_C( 48), INT8_C( 18), INT8_C( 112), INT8_C( -93), INT8_C( 122), INT8_C( 119), INT8_C( 119), INT8_C( 70), INT8_C( -30), INT8_C( 76), INT8_C( 124), INT8_C( 32), INT8_C(-107), INT8_C( -2), INT8_C( 60), INT8_C( 125), INT8_C( 67), INT8_C( 62), INT8_C( -65), INT8_C( -7), INT8_C(-113), INT8_C( -99), INT8_C( -90), INT8_C( 65), INT8_C( -25), INT8_C( 25), INT8_C(-119)) }, { simde_mm512_set_epi8(INT8_C( -57), INT8_C( 69), INT8_C( 98), INT8_C( -33), INT8_C( 62), INT8_C( 94), INT8_C( -38), INT8_C( -71), INT8_C( -19), INT8_C( 108), INT8_C( 21), INT8_C( -42), INT8_C( 46), INT8_C( 24), INT8_C(-124), INT8_C( 94), INT8_C( -15), INT8_C( 105), INT8_C( 125), INT8_C( 58), INT8_C( 33), INT8_C( -54), INT8_C( 16), INT8_C(-125), INT8_C( 45), INT8_C( 66), INT8_C(-109), INT8_C( -81), INT8_C( -67), INT8_C( 112), INT8_C( 1), INT8_C( -76), INT8_C( 64), INT8_C( -13), INT8_C( 77), INT8_C(-127), INT8_C( -63), INT8_C( -74), INT8_C( -63), INT8_C( -99), INT8_C( -45), INT8_C( 47), INT8_C( 113), INT8_C( 20), INT8_C(-123), INT8_C( 127), INT8_C( 16), INT8_C( -25), INT8_C( -9), INT8_C( 70), INT8_C( 76), INT8_C( -62), INT8_C(-112), INT8_C( 53), INT8_C( -85), INT8_C( 23), INT8_C(-107), INT8_C( 101), INT8_C( -95), INT8_C( 15), INT8_C( -98), INT8_C( 96), INT8_C( -14), INT8_C( 115)), UINT64_C( 7455883527348184901), simde_mm512_set_epi8(INT8_C( -76), INT8_C( -37), INT8_C( 101), INT8_C( 42), INT8_C( -10), INT8_C( 29), INT8_C( -32), INT8_C(-104), INT8_C( -52), INT8_C( 91), INT8_C( -27), INT8_C( 64), INT8_C( 61), INT8_C( 70), INT8_C( 125), INT8_C( 77), INT8_C( 50), INT8_C( -84), INT8_C(-108), INT8_C( 43), INT8_C( 26), INT8_C( 9), INT8_C( 13), INT8_C( -36), INT8_C( 39), INT8_C(-122), INT8_C( 26), INT8_C( -88), INT8_C( 96), INT8_C(-114), INT8_C( 52), INT8_C( 21), INT8_C( 98), INT8_C( -75), INT8_C( 31), INT8_C( 27), INT8_C( 37), INT8_C(-110), INT8_C( 11), INT8_C(-105), INT8_C( 12), INT8_C( -9), INT8_C( 19), INT8_C( -84), INT8_C(-104), INT8_C(-100), INT8_C( -12), INT8_C( -62), INT8_C( -91), INT8_C( 17), INT8_C( 1), INT8_C( 99), INT8_C( -18), INT8_C( 34), INT8_C( 14), INT8_C( 126), INT8_C( 91), INT8_C( -84), INT8_C(-123), INT8_C( -78), INT8_C( 66), INT8_C( 52), INT8_C( 99), INT8_C( 74)), simde_mm512_set_epi8(INT8_C( 121), INT8_C( -29), INT8_C( 49), INT8_C( 35), INT8_C( 77), INT8_C(-104), INT8_C( 57), INT8_C( 64), INT8_C( 121), INT8_C( 98), INT8_C( 15), INT8_C( -48), INT8_C(-126), INT8_C( 9), INT8_C( 84), INT8_C( 7), INT8_C( -35), INT8_C( -9), INT8_C( 12), INT8_C( -76), INT8_C( 70), INT8_C( 4), INT8_C( 117), INT8_C(-104), INT8_C( -90), INT8_C( 57), INT8_C( -33), INT8_C( 119), INT8_C(-114), INT8_C( -15), INT8_C( 50), INT8_C( 107), INT8_C( 64), INT8_C( 46), INT8_C( -20), INT8_C( 32), INT8_C( -25), INT8_C( 9), INT8_C( 82), INT8_C( 67), INT8_C( 95), INT8_C( 56), INT8_C( -84), INT8_C( 66), INT8_C( -2), INT8_C( -24), INT8_C( 69), INT8_C( -5), INT8_C( 107), INT8_C(-107), INT8_C( 112), INT8_C( -8), INT8_C( -8), INT8_C( 25), INT8_C( 107), INT8_C( -42), INT8_C( -78), INT8_C( 17), INT8_C(-102), INT8_C( -56), INT8_C( 67), INT8_C( -37), INT8_C( -38), INT8_C( -67)), simde_mm512_set_epi8(INT8_C( -57), INT8_C( 55), INT8_C( -37), INT8_C( -33), INT8_C( 62), INT8_C( 63), INT8_C( -5), INT8_C(-104), INT8_C( -19), INT8_C( -75), INT8_C( -37), INT8_C( -95), INT8_C( -24), INT8_C( 24), INT8_C(-124), INT8_C( 94), INT8_C( -2), INT8_C( 105), INT8_C( 125), INT8_C( 91), INT8_C( 76), INT8_C( 84), INT8_C( 125), INT8_C(-125), INT8_C( 13), INT8_C( 66), INT8_C(-109), INT8_C(-121), INT8_C( -67), INT8_C( 112), INT8_C( 1), INT8_C( -76), INT8_C( -12), INT8_C( -13), INT8_C( 52), INT8_C(-127), INT8_C( -7), INT8_C( 32), INT8_C( 109), INT8_C( -99), INT8_C( -45), INT8_C( -6), INT8_C( 113), INT8_C( 20), INT8_C(-123), INT8_C( -78), INT8_C( 84), INT8_C( -25), INT8_C( -9), INT8_C( 87), INT8_C( -49), INT8_C( -62), INT8_C(-112), INT8_C( 119), INT8_C( 87), INT8_C( 85), INT8_C(-107), INT8_C( 32), INT8_C( -95), INT8_C( 15), INT8_C( -98), INT8_C(-104), INT8_C( -14), INT8_C( -21)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_gf2p8affineinv_epi64_epi8(test_vec[i].src, test_vec[i].k, test_vec[i].x, test_vec[i].A, 172); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_maskz_gf2p8affineinv_epi64_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m128i x; simde__m128i A; simde__m128i r; } test_vec[8] = { { UINT16_C(65157), simde_mm_set_epi8(INT8_C( -88), INT8_C( 75), INT8_C( 45), INT8_C( 101), INT8_C( -48), INT8_C( 40), INT8_C( 43), INT8_C(-120), INT8_C( 86), INT8_C(-109), INT8_C(-116), INT8_C( 52), INT8_C( -80), INT8_C( -39), INT8_C( -82), INT8_C( 33)), simde_mm_set_epi8(INT8_C( -42), INT8_C( 62), INT8_C( 88), INT8_C( 7), INT8_C( 61), INT8_C( 116), INT8_C( -89), INT8_C( 122), INT8_C( 102), INT8_C( -48), INT8_C( 124), INT8_C( 70), INT8_C( 47), INT8_C( 38), INT8_C( 113), INT8_C( -68)), simde_mm_set_epi8(INT8_C( 28), INT8_C( 122), INT8_C(-128), INT8_C( 93), INT8_C( -29), INT8_C( -30), INT8_C( -54), INT8_C( 0), INT8_C( 8), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 33), INT8_C( 0), INT8_C( -12)) }, { UINT16_C(46065), simde_mm_set_epi8(INT8_C( -60), INT8_C(-108), INT8_C( 94), INT8_C( -85), INT8_C( 51), INT8_C( 20), INT8_C( 52), INT8_C(-114), INT8_C( -6), INT8_C( 43), INT8_C( 55), INT8_C( 12), INT8_C( 60), INT8_C( 72), INT8_C( 58), INT8_C( 119)), simde_mm_set_epi8(INT8_C(-101), INT8_C( 76), INT8_C(-128), INT8_C( -17), INT8_C(-103), INT8_C( 2), INT8_C( 62), INT8_C(-126), INT8_C( 78), INT8_C( -70), INT8_C( 8), INT8_C( -28), INT8_C( -17), INT8_C( 126), INT8_C(-111), INT8_C(-124)), simde_mm_set_epi8(INT8_C( 42), INT8_C( 0), INT8_C( 5), INT8_C( -26), INT8_C( 0), INT8_C( 0), INT8_C( 32), INT8_C(-104), INT8_C( -31), INT8_C( -43), INT8_C( 84), INT8_C( -36), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-120)) }, { UINT16_C( 6797), simde_mm_set_epi8(INT8_C( -84), INT8_C( 28), INT8_C( -27), INT8_C(-123), INT8_C( 82), INT8_C(-107), INT8_C( -80), INT8_C( 79), INT8_C( 120), INT8_C( -68), INT8_C( -84), INT8_C( -49), INT8_C( 87), INT8_C( -79), INT8_C( -74), INT8_C( 92)), simde_mm_set_epi8(INT8_C( -38), INT8_C( 70), INT8_C( -49), INT8_C( 4), INT8_C( -35), INT8_C( 89), INT8_C(-103), INT8_C( 21), INT8_C( 58), INT8_C( -45), INT8_C( 126), INT8_C( -7), INT8_C( 69), INT8_C( 5), INT8_C( -82), INT8_C(-111)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 107), INT8_C( 52), INT8_C( 0), INT8_C( -75), INT8_C( 0), INT8_C( 101), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-110), INT8_C( -57), INT8_C( 0), INT8_C( 117)) }, { UINT16_C(13937), simde_mm_set_epi8(INT8_C( -73), INT8_C( 61), INT8_C( 67), INT8_C( -62), INT8_C( 2), INT8_C( 50), INT8_C( -54), INT8_C( -9), INT8_C( -22), INT8_C( -88), INT8_C(-110), INT8_C( -48), INT8_C( 76), INT8_C( -59), INT8_C( 68), INT8_C( -74)), simde_mm_set_epi8(INT8_C( -71), INT8_C( 118), INT8_C( -91), INT8_C( -10), INT8_C( 38), INT8_C( -84), INT8_C( -72), INT8_C( -61), INT8_C( 27), INT8_C( -2), INT8_C( 62), INT8_C( 34), INT8_C( 118), INT8_C( 94), INT8_C( -81), INT8_C( 50)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C(-118), INT8_C( 97), INT8_C( 0), INT8_C( 98), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( 9), INT8_C( -56), INT8_C( -99), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 98)) }, { UINT16_C(24160), simde_mm_set_epi8(INT8_C( 103), INT8_C( -8), INT8_C( 63), INT8_C( 127), INT8_C( -97), INT8_C( 62), INT8_C( 26), INT8_C( 78), INT8_C( -69), INT8_C( -62), INT8_C(-105), INT8_C( 38), INT8_C( 99), INT8_C( -23), INT8_C( 54), INT8_C( -37)), simde_mm_set_epi8(INT8_C( 71), INT8_C( 78), INT8_C( 102), INT8_C( -87), INT8_C( -42), INT8_C( 16), INT8_C( 120), INT8_C( -7), INT8_C( 86), INT8_C( 101), INT8_C( 24), INT8_C( 125), INT8_C( -69), INT8_C( 65), INT8_C( 12), INT8_C( -54)), simde_mm_set_epi8(INT8_C( 0), INT8_C( -75), INT8_C( 0), INT8_C( -47), INT8_C( -21), INT8_C( 58), INT8_C( 121), INT8_C( 0), INT8_C( 0), INT8_C( 120), INT8_C( 99), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { UINT16_C(52349), simde_mm_set_epi8(INT8_C(-115), INT8_C( 83), INT8_C( -51), INT8_C(-117), INT8_C( -24), INT8_C( -79), INT8_C( 55), INT8_C(-125), INT8_C(-119), INT8_C(-109), INT8_C( -25), INT8_C( 26), INT8_C( -62), INT8_C( 72), INT8_C( 66), INT8_C(-110)), simde_mm_set_epi8(INT8_C( -26), INT8_C( 57), INT8_C( -91), INT8_C( 79), INT8_C( 72), INT8_C( -31), INT8_C(-110), INT8_C( 35), INT8_C( -42), INT8_C( 93), INT8_C( -26), INT8_C( 48), INT8_C( 88), INT8_C( 90), INT8_C( 73), INT8_C(-121)), simde_mm_set_epi8(INT8_C(-105), INT8_C( -47), INT8_C( 0), INT8_C( 0), INT8_C( 68), INT8_C( -91), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 18), INT8_C( -32), INT8_C( -84), INT8_C( -64), INT8_C( 55), INT8_C( 0), INT8_C( -52)) }, { UINT16_C(12470), simde_mm_set_epi8(INT8_C( 83), INT8_C( -70), INT8_C( -20), INT8_C( -26), INT8_C( 113), INT8_C( 81), INT8_C(-104), INT8_C( 87), INT8_C( 17), INT8_C( 59), INT8_C( -92), INT8_C( -92), INT8_C( -62), INT8_C( 84), INT8_C( -91), INT8_C( 84)), simde_mm_set_epi8(INT8_C( 105), INT8_C( 4), INT8_C( 0), INT8_C( 126), INT8_C( 17), INT8_C( -37), INT8_C( 124), INT8_C( 81), INT8_C( -49), INT8_C( -80), INT8_C( 50), INT8_C( 114), INT8_C( 103), INT8_C( 115), INT8_C( 11), INT8_C( 121)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( -35), INT8_C( 45), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 92), INT8_C( 0), INT8_C( 1), INT8_C( 1), INT8_C( 0), INT8_C( 55), INT8_C(-116), INT8_C( 0)) }, { UINT16_C(43937), simde_mm_set_epi8(INT8_C( 122), INT8_C( 28), INT8_C( 31), INT8_C( 41), INT8_C( -69), INT8_C( 33), INT8_C( 70), INT8_C( 26), INT8_C( -60), INT8_C( 101), INT8_C( 20), INT8_C( -20), INT8_C( 122), INT8_C( 120), INT8_C(-101), INT8_C( -57)), simde_mm_set_epi8(INT8_C(-126), INT8_C( 90), INT8_C( 106), INT8_C(-128), INT8_C( 45), INT8_C( 80), INT8_C(-116), INT8_C( 9), INT8_C( 97), INT8_C(-126), INT8_C( -49), INT8_C( 31), INT8_C( 80), INT8_C( -15), INT8_C(-103), INT8_C( -93)), simde_mm_set_epi8(INT8_C( 19), INT8_C( 0), INT8_C( 38), INT8_C( 0), INT8_C( 126), INT8_C( 0), INT8_C( -57), INT8_C( 17), INT8_C( 55), INT8_C( 0), INT8_C( 97), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 125)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_maskz_gf2p8affineinv_epi64_epi8(test_vec[i].k, test_vec[i].x, test_vec[i].A, INT8_C( 94)); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_maskz_gf2p8affineinv_epi64_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask32 k; simde__m256i x; simde__m256i A; simde__m256i r; } test_vec[8] = { { UINT32_C(1396440709), simde_mm256_set_epi8(INT8_C( -42), INT8_C( 62), INT8_C( 88), INT8_C( 7), INT8_C( 61), INT8_C( 116), INT8_C( -89), INT8_C( 122), INT8_C( 102), INT8_C( -48), INT8_C( 124), INT8_C( 70), INT8_C( 47), INT8_C( 38), INT8_C( 113), INT8_C( -68), INT8_C( -88), INT8_C( 75), INT8_C( 45), INT8_C( 101), INT8_C( -48), INT8_C( 40), INT8_C( 43), INT8_C(-120), INT8_C( 86), INT8_C(-109), INT8_C(-116), INT8_C( 52), INT8_C( -80), INT8_C( -39), INT8_C( -82), INT8_C( 33)), simde_mm256_set_epi8(INT8_C( 78), INT8_C( -70), INT8_C( 8), INT8_C( -28), INT8_C( -17), INT8_C( 126), INT8_C(-111), INT8_C(-124), INT8_C( -60), INT8_C(-108), INT8_C( 94), INT8_C( -85), INT8_C( 51), INT8_C( 20), INT8_C( 52), INT8_C(-114), INT8_C( -6), INT8_C( 43), INT8_C( 55), INT8_C( 12), INT8_C( 60), INT8_C( 72), INT8_C( 58), INT8_C( 119), INT8_C(-125), INT8_C(-116), INT8_C( -2), INT8_C( 35), INT8_C( 46), INT8_C( -63), INT8_C( -77), INT8_C( -15)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 66), INT8_C( 0), INT8_C( -81), INT8_C( 0), INT8_C( 0), INT8_C( 112), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -75), INT8_C( 33), INT8_C( 108), INT8_C( 0), INT8_C( -72), INT8_C( -88), INT8_C( -14), INT8_C( -22), INT8_C( 67), INT8_C( -13), INT8_C( 97), INT8_C( 88), INT8_C( -79), INT8_C( 0), INT8_C( 59), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 59), INT8_C( 0), INT8_C( 91)) }, { UINT32_C(2567061122), simde_mm256_set_epi8(INT8_C( 58), INT8_C( -45), INT8_C( 126), INT8_C( -7), INT8_C( 69), INT8_C( 5), INT8_C( -82), INT8_C(-111), INT8_C( -84), INT8_C( 28), INT8_C( -27), INT8_C(-123), INT8_C( 82), INT8_C(-107), INT8_C( -80), INT8_C( 79), INT8_C( 120), INT8_C( -68), INT8_C( -84), INT8_C( -49), INT8_C( 87), INT8_C( -79), INT8_C( -74), INT8_C( 92), INT8_C( 64), INT8_C(-128), INT8_C( -63), INT8_C( -93), INT8_C( 85), INT8_C( -44), INT8_C( 26), INT8_C(-115)), simde_mm256_set_epi8(INT8_C( -73), INT8_C( 61), INT8_C( 67), INT8_C( -62), INT8_C( 2), INT8_C( 50), INT8_C( -54), INT8_C( -9), INT8_C( -22), INT8_C( -88), INT8_C(-110), INT8_C( -48), INT8_C( 76), INT8_C( -59), INT8_C( 68), INT8_C( -74), INT8_C(-118), INT8_C( -5), INT8_C( 100), INT8_C( 20), INT8_C( 96), INT8_C(-121), INT8_C( 54), INT8_C( 113), INT8_C( -38), INT8_C( 70), INT8_C( -49), INT8_C( 4), INT8_C( -35), INT8_C( 89), INT8_C(-103), INT8_C( 21)), simde_mm256_set_epi8(INT8_C( -35), INT8_C( 0), INT8_C( 0), INT8_C( -43), INT8_C( -7), INT8_C( 0), INT8_C( 0), INT8_C( -82), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -99), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -57), INT8_C( 18), INT8_C( -17), INT8_C( 29), INT8_C( -9), INT8_C( 0), INT8_C(-112), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -14), INT8_C( 0)) }, { UINT32_C(1985916722), simde_mm256_set_epi8(INT8_C( 103), INT8_C( -8), INT8_C( 63), INT8_C( 127), INT8_C( -97), INT8_C( 62), INT8_C( 26), INT8_C( 78), INT8_C( -69), INT8_C( -62), INT8_C(-105), INT8_C( 38), INT8_C( 99), INT8_C( -23), INT8_C( 54), INT8_C( -37), INT8_C( 100), INT8_C( 45), INT8_C( -71), INT8_C(-119), INT8_C( 49), INT8_C( -65), INT8_C( 94), INT8_C( 96), INT8_C( -71), INT8_C( 118), INT8_C( -91), INT8_C( -10), INT8_C( 38), INT8_C( -84), INT8_C( -72), INT8_C( -61)), simde_mm256_set_epi8(INT8_C(-119), INT8_C(-109), INT8_C( -25), INT8_C( 26), INT8_C( -62), INT8_C( 72), INT8_C( 66), INT8_C(-110), INT8_C( 56), INT8_C( -71), INT8_C( 70), INT8_C( 52), INT8_C( 105), INT8_C( 74), INT8_C( -52), INT8_C( 125), INT8_C( 71), INT8_C( 78), INT8_C( 102), INT8_C( -87), INT8_C( -42), INT8_C( 16), INT8_C( 120), INT8_C( -7), INT8_C( 86), INT8_C( 101), INT8_C( 24), INT8_C( 125), INT8_C( -69), INT8_C( 65), INT8_C( 12), INT8_C( -54)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 61), INT8_C( -38), INT8_C( 55), INT8_C( 0), INT8_C( -82), INT8_C( 57), INT8_C( 0), INT8_C( 0), INT8_C( 108), INT8_C( 0), INT8_C( 84), INT8_C( -11), INT8_C(-111), INT8_C( -7), INT8_C( 0), INT8_C( -22), INT8_C( 0), INT8_C( 44), INT8_C( 0), INT8_C( 55), INT8_C( -48), INT8_C( -20), INT8_C(-114), INT8_C( 0), INT8_C( 0), INT8_C( 53), INT8_C( -43), INT8_C( 0), INT8_C( 0), INT8_C(-123), INT8_C( 0)) }, { UINT32_C(3903928195), simde_mm256_set_epi8(INT8_C( 17), INT8_C( 59), INT8_C( -92), INT8_C( -92), INT8_C( -62), INT8_C( 84), INT8_C( -91), INT8_C( 84), INT8_C( 12), INT8_C( 33), INT8_C( -50), INT8_C( -22), INT8_C( 88), INT8_C( 0), INT8_C( 48), INT8_C( -74), INT8_C( -26), INT8_C( 57), INT8_C( -91), INT8_C( 79), INT8_C( 72), INT8_C( -31), INT8_C(-110), INT8_C( 35), INT8_C( -42), INT8_C( 93), INT8_C( -26), INT8_C( 48), INT8_C( 88), INT8_C( 90), INT8_C( 73), INT8_C(-121)), simde_mm256_set_epi8(INT8_C( 8), INT8_C( -41), INT8_C( 78), INT8_C( -24), INT8_C( -85), INT8_C( -99), INT8_C( -85), INT8_C( -95), INT8_C( 105), INT8_C( 4), INT8_C( 0), INT8_C( 126), INT8_C( 17), INT8_C( -37), INT8_C( 124), INT8_C( 81), INT8_C( -49), INT8_C( -80), INT8_C( 50), INT8_C( 114), INT8_C( 103), INT8_C( 115), INT8_C( 11), INT8_C( 121), INT8_C( 83), INT8_C( -70), INT8_C( -20), INT8_C( -26), INT8_C( 113), INT8_C( 81), INT8_C(-104), INT8_C( 87)), simde_mm256_set_epi8(INT8_C( 88), INT8_C( 87), INT8_C( 123), INT8_C( 0), INT8_C( 89), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -17), INT8_C( 0), INT8_C( 52), INT8_C(-100), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 79), INT8_C( 0), INT8_C( 0), INT8_C( -84), INT8_C( 78), INT8_C( 0), INT8_C( 95), INT8_C( 19), INT8_C( 37), INT8_C( 24), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 81), INT8_C( 55)) }, { UINT32_C(2054724551), simde_mm256_set_epi8(INT8_C( 10), INT8_C( 111), INT8_C( 0), INT8_C( 102), INT8_C( -29), INT8_C( -79), INT8_C( -33), INT8_C( -18), INT8_C(-126), INT8_C( 90), INT8_C( 106), INT8_C(-128), INT8_C( 45), INT8_C( 80), INT8_C(-116), INT8_C( 9), INT8_C( 97), INT8_C(-126), INT8_C( -49), INT8_C( 31), INT8_C( 80), INT8_C( -15), INT8_C(-103), INT8_C( -93), INT8_C( 122), INT8_C( 28), INT8_C( 31), INT8_C( 41), INT8_C( -69), INT8_C( 33), INT8_C( 70), INT8_C( 26)), simde_mm256_set_epi8(INT8_C( 85), INT8_C( -50), INT8_C(-124), INT8_C( -51), INT8_C( -15), INT8_C( 123), INT8_C( -57), INT8_C( 49), INT8_C( -56), INT8_C( -96), INT8_C(-107), INT8_C( 81), INT8_C( 28), INT8_C( 89), INT8_C(-117), INT8_C(-113), INT8_C( 93), INT8_C( 34), INT8_C( -81), INT8_C( 29), INT8_C( -48), INT8_C( -91), INT8_C(-101), INT8_C( 33), INT8_C( -32), INT8_C(-111), INT8_C( -86), INT8_C( 81), INT8_C( -69), INT8_C( -40), INT8_C(-105), INT8_C( 25)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( -50), INT8_C( 126), INT8_C( 82), INT8_C( 90), INT8_C( 0), INT8_C( 4), INT8_C( 0), INT8_C( 0), INT8_C( -68), INT8_C( 105), INT8_C(-107), INT8_C( -61), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 16), INT8_C( 0), INT8_C( 0), INT8_C( -77), INT8_C( 48), INT8_C( 0), INT8_C( 10), INT8_C( -80), INT8_C( -38), INT8_C( -75), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -30), INT8_C( 85), INT8_C( -31)) }, { UINT32_C(3223503422), simde_mm256_set_epi8(INT8_C( 37), INT8_C( 113), INT8_C( -54), INT8_C( 15), INT8_C(-119), INT8_C(-119), INT8_C( 102), INT8_C( 12), INT8_C( -50), INT8_C(-100), INT8_C( 91), INT8_C( -73), INT8_C(-128), INT8_C( -69), INT8_C( 75), INT8_C( 41), INT8_C( 62), INT8_C( -50), INT8_C( 30), INT8_C( -6), INT8_C( 85), INT8_C( -1), INT8_C( 20), INT8_C( -57), INT8_C( -39), INT8_C( 81), INT8_C( -54), INT8_C( 103), INT8_C( -31), INT8_C( 76), INT8_C( -6), INT8_C( 19)), simde_mm256_set_epi8(INT8_C( 20), INT8_C( -1), INT8_C( 126), INT8_C( -58), INT8_C(-122), INT8_C( -63), INT8_C( 0), INT8_C( 21), INT8_C( 125), INT8_C(-120), INT8_C( 11), INT8_C( 36), INT8_C( -16), INT8_C( 30), INT8_C( -96), INT8_C( 28), INT8_C( 10), INT8_C( 16), INT8_C( 30), INT8_C(-118), INT8_C( -14), INT8_C( 16), INT8_C( -24), INT8_C( 121), INT8_C( 27), INT8_C( 42), INT8_C( -96), INT8_C( -57), INT8_C(-122), INT8_C( 20), INT8_C( 54), INT8_C( 94)), simde_mm256_set_epi8(INT8_C( 107), INT8_C( -26), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -43), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -18), INT8_C( 0), INT8_C( 85), INT8_C( 63), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -35), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -59), INT8_C( 36), INT8_C( 12), INT8_C( -17), INT8_C( 35), INT8_C( 0)) }, { UINT32_C( 89329770), simde_mm256_set_epi8(INT8_C( 75), INT8_C( -79), INT8_C( 108), INT8_C( -46), INT8_C(-111), INT8_C( 109), INT8_C( 40), INT8_C( 40), INT8_C( -24), INT8_C( -34), INT8_C( 68), INT8_C( 73), INT8_C( 83), INT8_C( -82), INT8_C( 100), INT8_C( 122), INT8_C(-100), INT8_C(-111), INT8_C( 81), INT8_C( -18), INT8_C( 81), INT8_C( 2), INT8_C( 40), INT8_C( -14), INT8_C( -37), INT8_C( 0), INT8_C( 84), INT8_C( 112), INT8_C( 67), INT8_C(-128), INT8_C( -76), INT8_C( -90)), simde_mm256_set_epi8(INT8_C( 115), INT8_C( -91), INT8_C(-114), INT8_C( -33), INT8_C( 119), INT8_C( -27), INT8_C( -46), INT8_C( -37), INT8_C(-114), INT8_C( 33), INT8_C( 101), INT8_C( -50), INT8_C( 17), INT8_C( -4), INT8_C( 39), INT8_C( -76), INT8_C( -64), INT8_C( 109), INT8_C( 8), INT8_C( 18), INT8_C( 97), INT8_C( -33), INT8_C( 74), INT8_C( 101), INT8_C( 114), INT8_C( 73), INT8_C( 13), INT8_C( -15), INT8_C( -63), INT8_C( -87), INT8_C( 43), INT8_C( -66)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 47), INT8_C( 0), INT8_C( -46), INT8_C( 0), INT8_C( 103), INT8_C( 0), INT8_C( 89), INT8_C( 0), INT8_C( 0), INT8_C( 45), INT8_C( 75), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -6), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 126), INT8_C( 7), INT8_C( 0), INT8_C( -73), INT8_C( 0), INT8_C(-119), INT8_C( 0)) }, { UINT32_C(2319640833), simde_mm256_set_epi8(INT8_C( 113), INT8_C( -3), INT8_C( -82), INT8_C( 61), INT8_C( 114), INT8_C( 95), INT8_C( 56), INT8_C( 11), INT8_C( 39), INT8_C( 125), INT8_C( 59), INT8_C( 11), INT8_C( 10), INT8_C( 91), INT8_C( 91), INT8_C(-118), INT8_C(-117), INT8_C( 125), INT8_C( -40), INT8_C( 88), INT8_C( 24), INT8_C( -96), INT8_C( 22), INT8_C(-107), INT8_C( 49), INT8_C( 63), INT8_C( -20), INT8_C( 36), INT8_C( 62), INT8_C( 14), INT8_C( -92), INT8_C( -1)), simde_mm256_set_epi8(INT8_C( -16), INT8_C( 16), INT8_C( -21), INT8_C(-128), INT8_C( 14), INT8_C( -28), INT8_C( 76), INT8_C(-108), INT8_C( -40), INT8_C( -35), INT8_C( 36), INT8_C( -38), INT8_C( 3), INT8_C( -17), INT8_C( 32), INT8_C( 103), INT8_C(-115), INT8_C( 83), INT8_C( -28), INT8_C( -74), INT8_C( 50), INT8_C( 126), INT8_C( 112), INT8_C( 106), INT8_C( -37), INT8_C( 29), INT8_C( 33), INT8_C( -67), INT8_C( -58), INT8_C( 64), INT8_C( -22), INT8_C( 127)), simde_mm256_set_epi8(INT8_C(-107), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -80), INT8_C( 0), INT8_C( 4), INT8_C( 0), INT8_C( 0), INT8_C(-126), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 17), INT8_C( 0), INT8_C( 77), INT8_C( 8), INT8_C( 36), INT8_C( 0), INT8_C( 67), INT8_C( 0), INT8_C( 0), INT8_C( 104), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -92)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_maskz_gf2p8affineinv_epi64_epi8(test_vec[i].k, test_vec[i].x, test_vec[i].A, INT8_C( 126)); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_gf2p8affineinv_epi64_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask64 k; simde__m512i x; simde__m512i A; simde__m512i r; } test_vec[8] = { { UINT64_C( 2674908657002217093), simde_mm512_set_epi8(INT8_C( 78), INT8_C( -70), INT8_C( 8), INT8_C( -28), INT8_C( -17), INT8_C( 126), INT8_C(-111), INT8_C(-124), INT8_C( -60), INT8_C(-108), INT8_C( 94), INT8_C( -85), INT8_C( 51), INT8_C( 20), INT8_C( 52), INT8_C(-114), INT8_C( -6), INT8_C( 43), INT8_C( 55), INT8_C( 12), INT8_C( 60), INT8_C( 72), INT8_C( 58), INT8_C( 119), INT8_C(-125), INT8_C(-116), INT8_C( -2), INT8_C( 35), INT8_C( 46), INT8_C( -63), INT8_C( -77), INT8_C( -15), INT8_C( -42), INT8_C( 62), INT8_C( 88), INT8_C( 7), INT8_C( 61), INT8_C( 116), INT8_C( -89), INT8_C( 122), INT8_C( 102), INT8_C( -48), INT8_C( 124), INT8_C( 70), INT8_C( 47), INT8_C( 38), INT8_C( 113), INT8_C( -68), INT8_C( -88), INT8_C( 75), INT8_C( 45), INT8_C( 101), INT8_C( -48), INT8_C( 40), INT8_C( 43), INT8_C(-120), INT8_C( 86), INT8_C(-109), INT8_C(-116), INT8_C( 52), INT8_C( -80), INT8_C( -39), INT8_C( -82), INT8_C( 33)), simde_mm512_set_epi8(INT8_C( -22), INT8_C( -88), INT8_C(-110), INT8_C( -48), INT8_C( 76), INT8_C( -59), INT8_C( 68), INT8_C( -74), INT8_C(-118), INT8_C( -5), INT8_C( 100), INT8_C( 20), INT8_C( 96), INT8_C(-121), INT8_C( 54), INT8_C( 113), INT8_C( -38), INT8_C( 70), INT8_C( -49), INT8_C( 4), INT8_C( -35), INT8_C( 89), INT8_C(-103), INT8_C( 21), INT8_C( 58), INT8_C( -45), INT8_C( 126), INT8_C( -7), INT8_C( 69), INT8_C( 5), INT8_C( -82), INT8_C(-111), INT8_C( -84), INT8_C( 28), INT8_C( -27), INT8_C(-123), INT8_C( 82), INT8_C(-107), INT8_C( -80), INT8_C( 79), INT8_C( 120), INT8_C( -68), INT8_C( -84), INT8_C( -49), INT8_C( 87), INT8_C( -79), INT8_C( -74), INT8_C( 92), INT8_C( 64), INT8_C(-128), INT8_C( -63), INT8_C( -93), INT8_C( 85), INT8_C( -44), INT8_C( 26), INT8_C(-115), INT8_C(-101), INT8_C( 76), INT8_C(-128), INT8_C( -17), INT8_C(-103), INT8_C( 2), INT8_C( 62), INT8_C(-126)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( -5), INT8_C( 0), INT8_C( 0), INT8_C( 50), INT8_C( 0), INT8_C( -21), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 75), INT8_C(-110), INT8_C( -11), INT8_C( -43), INT8_C( 35), INT8_C( 0), INT8_C( 0), INT8_C(-115), INT8_C( 0), INT8_C( 22), INT8_C(-123), INT8_C( -67), INT8_C( -89), INT8_C( 0), INT8_C( 34), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 116), INT8_C( 0), INT8_C(-102), INT8_C( 0), INT8_C( 0), INT8_C( -86), INT8_C( 54), INT8_C( 0), INT8_C( 0), INT8_C(-116), INT8_C( 56), INT8_C( 18), INT8_C( 0), INT8_C( -7), INT8_C( 46), INT8_C( 77), INT8_C( 25), INT8_C( 56), INT8_C( -29), INT8_C( 120), INT8_C( -70), INT8_C( -31), INT8_C( 0), INT8_C(-102), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-117), INT8_C( 0), INT8_C( 7)) }, { UINT64_C(13203784183059761911), simde_mm512_set_epi8(INT8_C( 56), INT8_C( -71), INT8_C( 70), INT8_C( 52), INT8_C( 105), INT8_C( 74), INT8_C( -52), INT8_C( 125), INT8_C( 71), INT8_C( 78), INT8_C( 102), INT8_C( -87), INT8_C( -42), INT8_C( 16), INT8_C( 120), INT8_C( -7), INT8_C( 86), INT8_C( 101), INT8_C( 24), INT8_C( 125), INT8_C( -69), INT8_C( 65), INT8_C( 12), INT8_C( -54), INT8_C( 103), INT8_C( -8), INT8_C( 63), INT8_C( 127), INT8_C( -97), INT8_C( 62), INT8_C( 26), INT8_C( 78), INT8_C( -69), INT8_C( -62), INT8_C(-105), INT8_C( 38), INT8_C( 99), INT8_C( -23), INT8_C( 54), INT8_C( -37), INT8_C( 100), INT8_C( 45), INT8_C( -71), INT8_C(-119), INT8_C( 49), INT8_C( -65), INT8_C( 94), INT8_C( 96), INT8_C( -71), INT8_C( 118), INT8_C( -91), INT8_C( -10), INT8_C( 38), INT8_C( -84), INT8_C( -72), INT8_C( -61), INT8_C( 27), INT8_C( -2), INT8_C( 62), INT8_C( 34), INT8_C( 118), INT8_C( 94), INT8_C( -81), INT8_C( 50)), simde_mm512_set_epi8(INT8_C( -49), INT8_C( -80), INT8_C( 50), INT8_C( 114), INT8_C( 103), INT8_C( 115), INT8_C( 11), INT8_C( 121), INT8_C( 83), INT8_C( -70), INT8_C( -20), INT8_C( -26), INT8_C( 113), INT8_C( 81), INT8_C(-104), INT8_C( 87), INT8_C( 17), INT8_C( 59), INT8_C( -92), INT8_C( -92), INT8_C( -62), INT8_C( 84), INT8_C( -91), INT8_C( 84), INT8_C( 12), INT8_C( 33), INT8_C( -50), INT8_C( -22), INT8_C( 88), INT8_C( 0), INT8_C( 48), INT8_C( -74), INT8_C( -26), INT8_C( 57), INT8_C( -91), INT8_C( 79), INT8_C( 72), INT8_C( -31), INT8_C(-110), INT8_C( 35), INT8_C( -42), INT8_C( 93), INT8_C( -26), INT8_C( 48), INT8_C( 88), INT8_C( 90), INT8_C( 73), INT8_C(-121), INT8_C(-115), INT8_C( 83), INT8_C( -51), INT8_C(-117), INT8_C( -24), INT8_C( -79), INT8_C( 55), INT8_C(-125), INT8_C(-119), INT8_C(-109), INT8_C( -25), INT8_C( 26), INT8_C( -62), INT8_C( 72), INT8_C( 66), INT8_C(-110)), simde_mm512_set_epi8(INT8_C( 106), INT8_C( 0), INT8_C( -9), INT8_C(-101), INT8_C( 0), INT8_C( 77), INT8_C( 94), INT8_C( -85), INT8_C( 0), INT8_C( 0), INT8_C( 87), INT8_C( 8), INT8_C( -37), INT8_C( 97), INT8_C( 0), INT8_C( -54), INT8_C( 0), INT8_C( 81), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 12), INT8_C( -1), INT8_C( 47), INT8_C( 103), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -83), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -25), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 104), INT8_C( 83), INT8_C( 0), INT8_C( 0), INT8_C( 4), INT8_C( 0), INT8_C(-110), INT8_C( -59), INT8_C( 0), INT8_C( 0), INT8_C( 109), INT8_C( 0), INT8_C( -38), INT8_C( 0), INT8_C( 115), INT8_C( -50), INT8_C( 109), INT8_C( -76), INT8_C( 0), INT8_C( -54), INT8_C( 19), INT8_C( 126)) }, { UINT64_C( 7567173815354752081), simde_mm512_set_epi8(INT8_C( 93), INT8_C( 34), INT8_C( -81), INT8_C( 29), INT8_C( -48), INT8_C( -91), INT8_C(-101), INT8_C( 33), INT8_C( -32), INT8_C(-111), INT8_C( -86), INT8_C( 81), INT8_C( -69), INT8_C( -40), INT8_C(-105), INT8_C( 25), INT8_C( 10), INT8_C( 111), INT8_C( 0), INT8_C( 102), INT8_C( -29), INT8_C( -79), INT8_C( -33), INT8_C( -18), INT8_C(-126), INT8_C( 90), INT8_C( 106), INT8_C(-128), INT8_C( 45), INT8_C( 80), INT8_C(-116), INT8_C( 9), INT8_C( 97), INT8_C(-126), INT8_C( -49), INT8_C( 31), INT8_C( 80), INT8_C( -15), INT8_C(-103), INT8_C( -93), INT8_C( 122), INT8_C( 28), INT8_C( 31), INT8_C( 41), INT8_C( -69), INT8_C( 33), INT8_C( 70), INT8_C( 26), INT8_C( -60), INT8_C( 101), INT8_C( 20), INT8_C( -20), INT8_C( 122), INT8_C( 120), INT8_C(-101), INT8_C( -57), INT8_C( 8), INT8_C( -41), INT8_C( 78), INT8_C( -24), INT8_C( -85), INT8_C( -99), INT8_C( -85), INT8_C( -95)), simde_mm512_set_epi8(INT8_C( 27), INT8_C( 42), INT8_C( -96), INT8_C( -57), INT8_C(-122), INT8_C( 20), INT8_C( 54), INT8_C( 94), INT8_C( 37), INT8_C( 113), INT8_C( -54), INT8_C( 15), INT8_C(-119), INT8_C(-119), INT8_C( 102), INT8_C( 12), INT8_C( -50), INT8_C(-100), INT8_C( 91), INT8_C( -73), INT8_C(-128), INT8_C( -69), INT8_C( 75), INT8_C( 41), INT8_C( 62), INT8_C( -50), INT8_C( 30), INT8_C( -6), INT8_C( 85), INT8_C( -1), INT8_C( 20), INT8_C( -57), INT8_C( -39), INT8_C( 81), INT8_C( -54), INT8_C( 103), INT8_C( -31), INT8_C( 76), INT8_C( -6), INT8_C( 19), INT8_C( 0), INT8_C( -53), INT8_C( -62), INT8_C( 52), INT8_C( -64), INT8_C( 34), INT8_C( -62), INT8_C( 62), INT8_C( 85), INT8_C( -50), INT8_C(-124), INT8_C( -51), INT8_C( -15), INT8_C( 123), INT8_C( -57), INT8_C( 49), INT8_C( -56), INT8_C( -96), INT8_C(-107), INT8_C( 81), INT8_C( 28), INT8_C( 89), INT8_C(-117), INT8_C(-113)), simde_mm512_set_epi8(INT8_C( 0), INT8_C(-116), INT8_C( -88), INT8_C( 0), INT8_C( -54), INT8_C( 0), INT8_C( 0), INT8_C( -45), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 66), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 59), INT8_C( -38), INT8_C( 8), INT8_C( -16), INT8_C( -20), INT8_C( 17), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -65), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -64), INT8_C( 53), INT8_C( 115), INT8_C( 0), INT8_C( -39), INT8_C(-107), INT8_C( 0), INT8_C( 23), INT8_C(-107), INT8_C( 0), INT8_C( 126), INT8_C(-127), INT8_C( -21), INT8_C( 41), INT8_C( -49), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 96), INT8_C( 0), INT8_C( 76), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -49)) }, { UINT64_C( 725113122122164345), simde_mm512_set_epi8(INT8_C( 114), INT8_C( 73), INT8_C( 13), INT8_C( -15), INT8_C( -63), INT8_C( -87), INT8_C( 43), INT8_C( -66), INT8_C( 75), INT8_C( -79), INT8_C( 108), INT8_C( -46), INT8_C(-111), INT8_C( 109), INT8_C( 40), INT8_C( 40), INT8_C( -24), INT8_C( -34), INT8_C( 68), INT8_C( 73), INT8_C( 83), INT8_C( -82), INT8_C( 100), INT8_C( 122), INT8_C(-100), INT8_C(-111), INT8_C( 81), INT8_C( -18), INT8_C( 81), INT8_C( 2), INT8_C( 40), INT8_C( -14), INT8_C( -37), INT8_C( 0), INT8_C( 84), INT8_C( 112), INT8_C( 67), INT8_C(-128), INT8_C( -76), INT8_C( -90), INT8_C( -77), INT8_C( 21), INT8_C( 79), INT8_C( -2), INT8_C( 5), INT8_C( 83), INT8_C( 16), INT8_C( 106), INT8_C( 20), INT8_C( -1), INT8_C( 126), INT8_C( -58), INT8_C(-122), INT8_C( -63), INT8_C( 0), INT8_C( 21), INT8_C( 125), INT8_C(-120), INT8_C( 11), INT8_C( 36), INT8_C( -16), INT8_C( 30), INT8_C( -96), INT8_C( 28)), simde_mm512_set_epi8(INT8_C( 113), INT8_C( -3), INT8_C( -82), INT8_C( 61), INT8_C( 114), INT8_C( 95), INT8_C( 56), INT8_C( 11), INT8_C( 39), INT8_C( 125), INT8_C( 59), INT8_C( 11), INT8_C( 10), INT8_C( 91), INT8_C( 91), INT8_C(-118), INT8_C(-117), INT8_C( 125), INT8_C( -40), INT8_C( 88), INT8_C( 24), INT8_C( -96), INT8_C( 22), INT8_C(-107), INT8_C( 49), INT8_C( 63), INT8_C( -20), INT8_C( 36), INT8_C( 62), INT8_C( 14), INT8_C( -92), INT8_C( -1), INT8_C( 45), INT8_C( 43), INT8_C( -36), INT8_C( -24), INT8_C(-118), INT8_C( 66), INT8_C( -23), INT8_C( 1), INT8_C( 115), INT8_C( -91), INT8_C(-114), INT8_C( -33), INT8_C( 119), INT8_C( -27), INT8_C( -46), INT8_C( -37), INT8_C(-114), INT8_C( 33), INT8_C( 101), INT8_C( -50), INT8_C( 17), INT8_C( -4), INT8_C( 39), INT8_C( -76), INT8_C( -64), INT8_C( 109), INT8_C( 8), INT8_C( 18), INT8_C( 97), INT8_C( -33), INT8_C( 74), INT8_C( 101)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 12), INT8_C( 0), INT8_C( 67), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 58), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 83), INT8_C( 72), INT8_C(-119), INT8_C( 47), INT8_C( 0), INT8_C(-124), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -30), INT8_C( 0), INT8_C( 126), INT8_C( 0), INT8_C( 79), INT8_C( -67), INT8_C(-117), INT8_C( 2), INT8_C( 0), INT8_C( 0), INT8_C( 122), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -37), INT8_C( -55), INT8_C( 66), INT8_C( 0), INT8_C( -49), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 8), INT8_C( 111), INT8_C( 118), INT8_C( -38), INT8_C( 0), INT8_C( 0), INT8_C( -53)) }, { UINT64_C(15788812967589505663), simde_mm512_set_epi8(INT8_C( 69), INT8_C( -79), INT8_C( 96), INT8_C( 14), INT8_C( 27), INT8_C( 115), INT8_C( -2), INT8_C( -45), INT8_C( 88), INT8_C( -52), INT8_C( 70), INT8_C(-120), INT8_C( 78), INT8_C( 99), INT8_C( 69), INT8_C(-111), INT8_C( 113), INT8_C( 76), INT8_C( 28), INT8_C( 7), INT8_C( -98), INT8_C( -99), INT8_C(-123), INT8_C( 88), INT8_C( 122), INT8_C( 119), INT8_C( 9), INT8_C( 26), INT8_C( 110), INT8_C( -42), INT8_C( -67), INT8_C( -7), INT8_C( 125), INT8_C( 120), INT8_C( -78), INT8_C( -83), INT8_C( -76), INT8_C( 43), INT8_C( -6), INT8_C( 63), INT8_C( -16), INT8_C( 16), INT8_C( -21), INT8_C(-128), INT8_C( 14), INT8_C( -28), INT8_C( 76), INT8_C(-108), INT8_C( -40), INT8_C( -35), INT8_C( 36), INT8_C( -38), INT8_C( 3), INT8_C( -17), INT8_C( 32), INT8_C( 103), INT8_C(-115), INT8_C( 83), INT8_C( -28), INT8_C( -74), INT8_C( 50), INT8_C( 126), INT8_C( 112), INT8_C( 106)), simde_mm512_set_epi8(INT8_C( -98), INT8_C( 125), INT8_C( 126), INT8_C( 47), INT8_C( -61), INT8_C( 119), INT8_C( 16), INT8_C( -89), INT8_C( 113), INT8_C( -56), INT8_C( -82), INT8_C(-119), INT8_C( 18), INT8_C( -58), INT8_C( 99), INT8_C( -43), INT8_C( 63), INT8_C( 74), INT8_C( 76), INT8_C( -51), INT8_C( -21), INT8_C( -51), INT8_C( 122), INT8_C( -85), INT8_C( 54), INT8_C( -69), INT8_C( 72), INT8_C( 111), INT8_C( 33), INT8_C( -99), INT8_C( -4), INT8_C( 32), INT8_C(-118), INT8_C( 119), INT8_C( 126), INT8_C( -30), INT8_C( -60), INT8_C( -48), INT8_C( -93), INT8_C( 113), INT8_C( 44), INT8_C( 13), INT8_C( 40), INT8_C( 25), INT8_C( -54), INT8_C( -49), INT8_C(-104), INT8_C(-106), INT8_C( -8), INT8_C( 71), INT8_C( 97), INT8_C( -37), INT8_C( -97), INT8_C( 104), INT8_C( -72), INT8_C( -12), INT8_C( -92), INT8_C( -10), INT8_C( -37), INT8_C( -55), INT8_C( 26), INT8_C( 99), INT8_C( 118), INT8_C( 104)), simde_mm512_set_epi8(INT8_C( -50), INT8_C( -76), INT8_C( 0), INT8_C( -95), INT8_C( -70), INT8_C( 0), INT8_C( 49), INT8_C( 34), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 49), INT8_C( 114), INT8_C( -36), INT8_C( 0), INT8_C( 97), INT8_C( 0), INT8_C( 0), INT8_C( 83), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 3), INT8_C( -16), INT8_C( 0), INT8_C( -57), INT8_C( 22), INT8_C( 92), INT8_C( 67), INT8_C( 0), INT8_C( -69), INT8_C( 88), INT8_C( -11), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -49), INT8_C( -70), INT8_C( 0), INT8_C( 0), INT8_C( -29), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 47), INT8_C(-114), INT8_C( 71), INT8_C( 0), INT8_C( 127), INT8_C( 0), INT8_C( -30), INT8_C( 0), INT8_C( 0), INT8_C( -74), INT8_C( 105), INT8_C( 122), INT8_C(-110), INT8_C( -98), INT8_C( 86), INT8_C( -56)) }, { UINT64_C(12731945964385124885), simde_mm512_set_epi8(INT8_C( 100), INT8_C( 28), INT8_C( -98), INT8_C( -75), INT8_C( 86), INT8_C( -13), INT8_C( -71), INT8_C( -97), INT8_C(-109), INT8_C( -82), INT8_C( 68), INT8_C( 69), INT8_C( 3), INT8_C( 91), INT8_C( -46), INT8_C( 55), INT8_C( -27), INT8_C( -27), INT8_C( -39), INT8_C( -77), INT8_C( 21), INT8_C( 50), INT8_C( 79), INT8_C( -83), INT8_C( 113), INT8_C( -85), INT8_C( -34), INT8_C( 49), INT8_C( 43), INT8_C( -40), INT8_C( -68), INT8_C( 40), INT8_C( 117), INT8_C( 67), INT8_C( -53), INT8_C( 2), INT8_C( -36), INT8_C( 64), INT8_C( -49), INT8_C( 42), INT8_C( -73), INT8_C( 13), INT8_C( 54), INT8_C(-116), INT8_C(-102), INT8_C( -83), INT8_C( -29), INT8_C( 55), INT8_C( 57), INT8_C( 87), INT8_C( 73), INT8_C( -70), INT8_C(-103), INT8_C(-105), INT8_C( -99), INT8_C( 2), INT8_C( 51), INT8_C( 38), INT8_C( -90), INT8_C( 14), INT8_C( -23), INT8_C( 121), INT8_C( 31), INT8_C( -78)), simde_mm512_set_epi8(INT8_C( -62), INT8_C( -47), INT8_C(-114), INT8_C( 87), INT8_C( -23), INT8_C( -34), INT8_C( -44), INT8_C( 6), INT8_C( 21), INT8_C( -94), INT8_C( -31), INT8_C( 89), INT8_C( -3), INT8_C( 101), INT8_C( 62), INT8_C( -72), INT8_C( 38), INT8_C( -41), INT8_C( 112), INT8_C( 117), INT8_C( 58), INT8_C( 17), INT8_C(-102), INT8_C( -45), INT8_C( 5), INT8_C( 37), INT8_C( 20), INT8_C( -10), INT8_C( 125), INT8_C( -30), INT8_C( -78), INT8_C(-119), INT8_C( -34), INT8_C( -69), INT8_C( 5), INT8_C( -4), INT8_C( 57), INT8_C( -46), INT8_C( 38), INT8_C(-103), INT8_C( 38), INT8_C( 94), INT8_C( 6), INT8_C( 108), INT8_C( 102), INT8_C( 123), INT8_C( 103), INT8_C( 112), INT8_C( -27), INT8_C(-114), INT8_C(-101), INT8_C(-115), INT8_C(-101), INT8_C( 69), INT8_C(-119), INT8_C( -14), INT8_C( 14), INT8_C( 10), INT8_C( 41), INT8_C( 17), INT8_C( -27), INT8_C(-110), INT8_C( 84), INT8_C( -17)), simde_mm512_set_epi8(INT8_C( -24), INT8_C( 0), INT8_C( -28), INT8_C( 74), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -29), INT8_C( 0), INT8_C( -33), INT8_C( -81), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 53), INT8_C( 53), INT8_C( 86), INT8_C( -50), INT8_C( 0), INT8_C( 82), INT8_C( 0), INT8_C( -98), INT8_C( 0), INT8_C( 125), INT8_C( 9), INT8_C( 0), INT8_C( 0), INT8_C( 22), INT8_C( 0), INT8_C( -2), INT8_C( 44), INT8_C( 118), INT8_C( 0), INT8_C( 86), INT8_C( -19), INT8_C( 0), INT8_C( 75), INT8_C( 0), INT8_C( 124), INT8_C( 0), INT8_C(-106), INT8_C( 0), INT8_C( 0), INT8_C( -10), INT8_C( 0), INT8_C( 0), INT8_C( -2), INT8_C( -23), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 34), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 4), INT8_C( 0), INT8_C(-111), INT8_C( 0), INT8_C( -68)) }, { UINT64_C( 3574159949714469990), simde_mm512_set_epi8(INT8_C( 7), INT8_C( -12), INT8_C( 120), INT8_C( 61), INT8_C( -91), INT8_C( 115), INT8_C( 9), INT8_C( 59), INT8_C( -83), INT8_C(-127), INT8_C( 36), INT8_C( -14), INT8_C( -98), INT8_C( 13), INT8_C( -23), INT8_C( -82), INT8_C( -57), INT8_C(-100), INT8_C( -48), INT8_C(-103), INT8_C( 117), INT8_C(-122), INT8_C( -71), INT8_C( 115), INT8_C( 123), INT8_C( 45), INT8_C( 89), INT8_C( -95), INT8_C(-117), INT8_C( -47), INT8_C( 71), INT8_C( 107), INT8_C( -86), INT8_C( -74), INT8_C( 66), INT8_C( -36), INT8_C( -2), INT8_C( 91), INT8_C( -51), INT8_C( 23), INT8_C( -45), INT8_C( -60), INT8_C( -99), INT8_C( 65), INT8_C(-113), INT8_C( -7), INT8_C( -76), INT8_C( 51), INT8_C( 18), INT8_C( 27), INT8_C( -14), INT8_C( 124), INT8_C( -66), INT8_C( -18), INT8_C( -5), INT8_C( 0), INT8_C( 98), INT8_C( 31), INT8_C( -53), INT8_C( 122), INT8_C( -44), INT8_C( 121), INT8_C( -29), INT8_C( -24)), simde_mm512_set_epi8(INT8_C( -43), INT8_C(-128), INT8_C( 122), INT8_C( -97), INT8_C( 32), INT8_C( -77), INT8_C( -10), INT8_C( -9), INT8_C( -5), INT8_C( -84), INT8_C(-115), INT8_C(-106), INT8_C( 40), INT8_C(-127), INT8_C( 47), INT8_C( -89), INT8_C( 7), INT8_C( 50), INT8_C(-110), INT8_C( 70), INT8_C( -40), INT8_C( 59), INT8_C( 127), INT8_C( -94), INT8_C( -96), INT8_C( -66), INT8_C( -54), INT8_C(-111), INT8_C( -88), INT8_C( -27), INT8_C( -51), INT8_C( -96), INT8_C( 118), INT8_C( -71), INT8_C( 125), INT8_C( 110), INT8_C(-115), INT8_C(-128), INT8_C( 71), INT8_C(-120), INT8_C(-113), INT8_C( 21), INT8_C( -93), INT8_C(-123), INT8_C( -55), INT8_C( -21), INT8_C( -23), INT8_C( 47), INT8_C( 101), INT8_C(-127), INT8_C( 66), INT8_C( -9), INT8_C( -88), INT8_C( -89), INT8_C( 79), INT8_C( -89), INT8_C(-125), INT8_C( 89), INT8_C( 66), INT8_C( 35), INT8_C( -61), INT8_C( 48), INT8_C( -39), INT8_C( 121)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 106), INT8_C( 6), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 12), INT8_C( 34), INT8_C( 0), INT8_C( 0), INT8_C( 48), INT8_C( -96), INT8_C( 0), INT8_C( 0), INT8_C( -45), INT8_C( 10), INT8_C( -12), INT8_C( -18), INT8_C(-126), INT8_C( 0), INT8_C( 107), INT8_C( -1), INT8_C( 1), INT8_C( -39), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 18), INT8_C( 0), INT8_C( 0), INT8_C( 118), INT8_C( -13), INT8_C( 0), INT8_C( 0), INT8_C( -99), INT8_C( -90), INT8_C( 74), INT8_C( 0), INT8_C( 43), INT8_C( 0), INT8_C( 0), INT8_C( -56), INT8_C( 0), INT8_C( -81), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 2), INT8_C( 0), INT8_C( 92), INT8_C( 85), INT8_C( 0), INT8_C( -32), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -69), INT8_C( -67), INT8_C( 0), INT8_C( 0), INT8_C( 33), INT8_C(-106), INT8_C( 0)) }, { UINT64_C( 7245014237231228482), simde_mm512_set_epi8(INT8_C( 102), INT8_C( 39), INT8_C( -51), INT8_C( 5), INT8_C( -36), INT8_C( -70), INT8_C( 38), INT8_C(-126), INT8_C( -24), INT8_C(-125), INT8_C( -94), INT8_C( -42), INT8_C( 104), INT8_C( -11), INT8_C( -43), INT8_C( 61), INT8_C( -61), INT8_C( -93), INT8_C( 105), INT8_C( 12), INT8_C( -70), INT8_C( 125), INT8_C( 23), INT8_C( -91), INT8_C( -30), INT8_C( 110), INT8_C( -97), INT8_C( -76), INT8_C( -96), INT8_C( 126), INT8_C( -48), INT8_C(-117), INT8_C( 20), INT8_C( 23), INT8_C( 95), INT8_C( -52), INT8_C( 15), INT8_C( -26), INT8_C( -88), INT8_C( -3), INT8_C( 69), INT8_C( -98), INT8_C(-100), INT8_C( 33), INT8_C( -17), INT8_C( -10), INT8_C( -53), INT8_C( 53), INT8_C( -25), INT8_C( 38), INT8_C( 75), INT8_C( 71), INT8_C( 106), INT8_C( -45), INT8_C( -51), INT8_C( 79), INT8_C( 54), INT8_C( -50), INT8_C( -94), INT8_C( 16), INT8_C( 3), INT8_C( -22), INT8_C( -34), INT8_C( 54)), simde_mm512_set_epi8(INT8_C( 123), INT8_C( -61), INT8_C( -18), INT8_C( 14), INT8_C( 36), INT8_C( -5), INT8_C( 29), INT8_C( 95), INT8_C( -38), INT8_C( -24), INT8_C( 52), INT8_C( -93), INT8_C(-118), INT8_C( -53), INT8_C( 97), INT8_C( -62), INT8_C( 93), INT8_C( -85), INT8_C( 110), INT8_C( 78), INT8_C( 27), INT8_C( -48), INT8_C( -31), INT8_C( -33), INT8_C( 29), INT8_C( 58), INT8_C( 95), INT8_C( 42), INT8_C( 15), INT8_C( 43), INT8_C( 119), INT8_C( 59), INT8_C(-127), INT8_C( -47), INT8_C( -59), INT8_C( -3), INT8_C( -51), INT8_C( 41), INT8_C( 7), INT8_C( -3), INT8_C( 80), INT8_C( 88), INT8_C( 103), INT8_C( -43), INT8_C(-102), INT8_C( 82), INT8_C( -24), INT8_C( -40), INT8_C( -58), INT8_C( 26), INT8_C( 95), INT8_C( -4), INT8_C( -1), INT8_C(-103), INT8_C( -99), INT8_C( -29), INT8_C( -5), INT8_C( 1), INT8_C( -57), INT8_C( 73), INT8_C( -83), INT8_C( 48), INT8_C( 42), INT8_C(-113)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 50), INT8_C( -39), INT8_C( 0), INT8_C( 0), INT8_C( -67), INT8_C( 0), INT8_C( 0), INT8_C(-114), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -86), INT8_C( 0), INT8_C( 2), INT8_C( -81), INT8_C( 0), INT8_C( -1), INT8_C(-112), INT8_C( -88), INT8_C( 0), INT8_C( 68), INT8_C( -66), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-113), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 84), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-119), INT8_C( 0), INT8_C( 78), INT8_C( -69), INT8_C( 0), INT8_C( -4), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 11), INT8_C( 0), INT8_C( -96), INT8_C( 0), INT8_C( 0), INT8_C( -37), INT8_C( 28), INT8_C( 0), INT8_C( 0), INT8_C( 20), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 9), INT8_C( 0)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_gf2p8affineinv_epi64_epi8(test_vec[i].k, test_vec[i].x, test_vec[i].A, 189); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_gf2p8mul_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( -58), INT8_C( 123), INT8_C( 47), INT8_C( -69), INT8_C(-122), INT8_C(-128), INT8_C( 42), INT8_C( 103), INT8_C(-125), INT8_C( 0), INT8_C( 21), INT8_C( 57), INT8_C(-125), INT8_C( 27), INT8_C( 126), INT8_C( 26)), simde_mm_set_epi8(INT8_C( 125), INT8_C(-110), INT8_C( -61), INT8_C( -52), INT8_C( 91), INT8_C( -51), INT8_C( 46), INT8_C( 77), INT8_C( 126), INT8_C( 102), INT8_C( 36), INT8_C( 89), INT8_C( 53), INT8_C( 15), INT8_C( 47), INT8_C( -91)), simde_mm_set_epi8(INT8_C(-125), INT8_C( -50), INT8_C( 46), INT8_C( 100), INT8_C( -93), INT8_C( 13), INT8_C(-128), INT8_C( 93), INT8_C( -3), INT8_C( 0), INT8_C( -62), INT8_C(-107), INT8_C(-102), INT8_C(-103), INT8_C( -27), INT8_C( -80)) }, { simde_mm_set_epi8(INT8_C(-105), INT8_C( 84), INT8_C( 32), INT8_C( -1), INT8_C( -57), INT8_C( -96), INT8_C( 93), INT8_C( -24), INT8_C( 47), INT8_C( 26), INT8_C(-124), INT8_C( -74), INT8_C( 44), INT8_C(-100), INT8_C( 65), INT8_C( -86)), simde_mm_set_epi8(INT8_C( 74), INT8_C( 13), INT8_C( -32), INT8_C( -64), INT8_C( 89), INT8_C( 97), INT8_C( -84), INT8_C( 6), INT8_C( 20), INT8_C( 76), INT8_C( 95), INT8_C( 31), INT8_C( 84), INT8_C( 99), INT8_C(-109), INT8_C( -83)), simde_mm_set_epi8(INT8_C( 27), INT8_C(-119), INT8_C( 31), INT8_C( -38), INT8_C( 92), INT8_C( -14), INT8_C( -85), INT8_C( 70), INT8_C( 122), INT8_C( 98), INT8_C( 51), INT8_C( 125), INT8_C( -77), INT8_C( -75), INT8_C( 114), INT8_C( -55)) }, { simde_mm_set_epi8(INT8_C( -91), INT8_C( -71), INT8_C( 87), INT8_C( -92), INT8_C( 75), INT8_C( -44), INT8_C( 61), INT8_C( 96), INT8_C( -18), INT8_C( -37), INT8_C( -6), INT8_C( -31), INT8_C( 31), INT8_C( 58), INT8_C( 110), INT8_C( 64)), simde_mm_set_epi8(INT8_C( 47), INT8_C( -7), INT8_C( 97), INT8_C( 96), INT8_C( 79), INT8_C( 104), INT8_C( -1), INT8_C( 57), INT8_C( 34), INT8_C( 115), INT8_C( 45), INT8_C(-100), INT8_C( -72), INT8_C( 125), INT8_C( -10), INT8_C( 6)), simde_mm_set_epi8(INT8_C( 110), INT8_C( 35), INT8_C( 69), INT8_C( -55), INT8_C( -39), INT8_C( -82), INT8_C( 44), INT8_C( -93), INT8_C( 3), INT8_C( 43), INT8_C( -36), INT8_C(-114), INT8_C( -57), INT8_C(-105), INT8_C( -45), INT8_C(-101)) }, { simde_mm_set_epi8(INT8_C(-128), INT8_C( 25), INT8_C( -81), INT8_C( -53), INT8_C( -19), INT8_C( 25), INT8_C( -96), INT8_C( 81), INT8_C( 14), INT8_C( -65), INT8_C( 80), INT8_C( 40), INT8_C( -41), INT8_C( 42), INT8_C( 54), INT8_C( 95)), simde_mm_set_epi8(INT8_C( -67), INT8_C(-113), INT8_C( -30), INT8_C( -24), INT8_C(-126), INT8_C( -91), INT8_C( 113), INT8_C( 114), INT8_C( -22), INT8_C( 54), INT8_C( -86), INT8_C( 85), INT8_C( 50), INT8_C(-122), INT8_C( -72), INT8_C( 27)), simde_mm_set_epi8(INT8_C( 51), INT8_C( -77), INT8_C( -15), INT8_C( 58), INT8_C( 103), INT8_C( 68), INT8_C( 28), INT8_C(-116), INT8_C( 91), INT8_C( -96), INT8_C( 109), INT8_C( -48), INT8_C( -49), INT8_C( 32), INT8_C( 34), INT8_C( -88)) }, { simde_mm_set_epi8(INT8_C( 91), INT8_C( -42), INT8_C( 121), INT8_C( 102), INT8_C( -61), INT8_C( -31), INT8_C( 58), INT8_C( 119), INT8_C( 82), INT8_C( -13), INT8_C( -95), INT8_C( 125), INT8_C( -30), INT8_C( -60), INT8_C( 23), INT8_C( -52)), simde_mm_set_epi8(INT8_C(-125), INT8_C(-124), INT8_C( -84), INT8_C( -88), INT8_C(-108), INT8_C( -61), INT8_C( 56), INT8_C( 72), INT8_C( -2), INT8_C( 79), INT8_C( 1), INT8_C( -6), INT8_C( 116), INT8_C( 83), INT8_C( 48), INT8_C( -70)), simde_mm_set_epi8(INT8_C(-113), INT8_C( 87), INT8_C( 113), INT8_C( -94), INT8_C( -23), INT8_C( -70), INT8_C( 71), INT8_C( 81), INT8_C( 97), INT8_C( -92), INT8_C( -95), INT8_C( 1), INT8_C( -26), INT8_C( 94), INT8_C( -67), INT8_C( -88)) }, { simde_mm_set_epi8(INT8_C( -7), INT8_C( 123), INT8_C( -98), INT8_C( -17), INT8_C( 95), INT8_C( -15), INT8_C( 17), INT8_C( 113), INT8_C( -45), INT8_C( -59), INT8_C( 123), INT8_C( 108), INT8_C( -51), INT8_C( 102), INT8_C( -38), INT8_C( -29)), simde_mm_set_epi8(INT8_C( -21), INT8_C( -80), INT8_C( 86), INT8_C( 103), INT8_C( 12), INT8_C( -96), INT8_C( 24), INT8_C( 118), INT8_C( -7), INT8_C( -63), INT8_C( -34), INT8_C( -12), INT8_C( 117), INT8_C( 111), INT8_C(-126), INT8_C( -74)), simde_mm_set_epi8(INT8_C( -25), INT8_C( -63), INT8_C( -58), INT8_C(-116), INT8_C( -87), INT8_C( 45), INT8_C(-125), INT8_C(-111), INT8_C( 117), INT8_C( 25), INT8_C( 37), INT8_C( -4), INT8_C( -13), INT8_C( -88), INT8_C( -41), INT8_C( 86)) }, { simde_mm_set_epi8(INT8_C( 17), INT8_C( 14), INT8_C( -61), INT8_C( 9), INT8_C( -68), INT8_C( 42), INT8_C( -15), INT8_C(-123), INT8_C( -11), INT8_C( 1), INT8_C( 19), INT8_C( 11), INT8_C( 27), INT8_C( -47), INT8_C( -84), INT8_C( 127)), simde_mm_set_epi8(INT8_C( 102), INT8_C( 62), INT8_C(-100), INT8_C( 53), INT8_C( -45), INT8_C(-125), INT8_C( 97), INT8_C( -82), INT8_C( -10), INT8_C( -29), INT8_C( 9), INT8_C( 117), INT8_C( 31), INT8_C( -55), INT8_C( -99), INT8_C( 8)), simde_mm_set_epi8(INT8_C( 92), INT8_C( 111), INT8_C( -85), INT8_C(-122), INT8_C(-115), INT8_C( -94), INT8_C( -22), INT8_C( 80), INT8_C( 83), INT8_C( -29), INT8_C(-117), INT8_C( 26), INT8_C( 50), INT8_C( 70), INT8_C( 120), INT8_C( -43)) }, { simde_mm_set_epi8(INT8_C( -69), INT8_C( 107), INT8_C( -90), INT8_C( 49), INT8_C( 80), INT8_C( 122), INT8_C( -46), INT8_C(-115), INT8_C( 85), INT8_C(-120), INT8_C( -81), INT8_C( -30), INT8_C( -51), INT8_C( -64), INT8_C( -74), INT8_C(-110)), simde_mm_set_epi8(INT8_C( 112), INT8_C( 61), INT8_C( -18), INT8_C( -23), INT8_C( 3), INT8_C( -88), INT8_C( -95), INT8_C( 37), INT8_C( 16), INT8_C( -91), INT8_C( 91), INT8_C( -61), INT8_C( 41), INT8_C( -70), INT8_C( 71), INT8_C( 106)), simde_mm_set_epi8(INT8_C( -64), INT8_C(-116), INT8_C( -85), INT8_C( -33), INT8_C( -16), INT8_C( 109), INT8_C( -61), INT8_C( -97), INT8_C( 39), INT8_C( -40), INT8_C(-125), INT8_C( -28), INT8_C( 55), INT8_C( -47), INT8_C( 77), INT8_C( -65)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_gf2p8mul_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_gf2p8mul_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi8(INT8_C( -64), INT8_C( 5), INT8_C( -52), INT8_C( 61), INT8_C( 44), INT8_C( 127), INT8_C( 41), INT8_C( 104), INT8_C(-104), INT8_C( 0), INT8_C( 31), INT8_C(-117), INT8_C( 23), INT8_C(-108), INT8_C( 112), INT8_C( 80), INT8_C( 49), INT8_C(-108), INT8_C( 117), INT8_C( -92), INT8_C( 104), INT8_C( 74), INT8_C(-105), INT8_C( 65), INT8_C( -42), INT8_C(-113), INT8_C( -11), INT8_C( 61), INT8_C( 123), INT8_C( -51), INT8_C( 60), INT8_C( -55)), simde_mm256_set_epi8(INT8_C( 9), INT8_C( 66), INT8_C( 63), INT8_C(-117), INT8_C( 98), INT8_C( -93), INT8_C( 123), INT8_C(-119), INT8_C( -37), INT8_C(-125), INT8_C( 28), INT8_C( 106), INT8_C( 9), INT8_C( -53), INT8_C( 11), INT8_C( -65), INT8_C(-105), INT8_C( -83), INT8_C( 35), INT8_C( -41), INT8_C( -68), INT8_C( 71), INT8_C( -97), INT8_C(-119), INT8_C( 76), INT8_C( 41), INT8_C( 47), INT8_C( -23), INT8_C( 107), INT8_C( 108), INT8_C( 6), INT8_C( 9)), simde_mm256_set_epi8(INT8_C(-102), INT8_C( 81), INT8_C( -12), INT8_C( 29), INT8_C( 90), INT8_C( 7), INT8_C( 119), INT8_C(-113), INT8_C( 101), INT8_C( 0), INT8_C( 111), INT8_C( 82), INT8_C( -81), INT8_C( 37), INT8_C( 61), INT8_C( 17), INT8_C( 89), INT8_C( 87), INT8_C( -67), INT8_C( 118), INT8_C( 41), INT8_C( -16), INT8_C( 64), INT8_C( -78), INT8_C(-114), INT8_C( -53), INT8_C( -99), INT8_C( 95), INT8_C( 104), INT8_C( 104), INT8_C(-120), INT8_C( -37)) }, { simde_mm256_set_epi8(INT8_C( 109), INT8_C( 111), INT8_C( -4), INT8_C(-109), INT8_C( 87), INT8_C( -68), INT8_C( 77), INT8_C( 111), INT8_C(-125), INT8_C( -95), INT8_C( 5), INT8_C( 54), INT8_C( -26), INT8_C( -96), INT8_C( 42), INT8_C( 54), INT8_C( -13), INT8_C( 113), INT8_C( -23), INT8_C( 95), INT8_C( 52), INT8_C( 70), INT8_C( -41), INT8_C( 48), INT8_C( -32), INT8_C( -33), INT8_C( -47), INT8_C( 114), INT8_C( -7), INT8_C( -79), INT8_C( 124), INT8_C( 54)), simde_mm256_set_epi8(INT8_C( -13), INT8_C( 56), INT8_C( -1), INT8_C( -30), INT8_C( -15), INT8_C( -2), INT8_C( 36), INT8_C( 56), INT8_C( -23), INT8_C( 5), INT8_C( -38), INT8_C( -84), INT8_C( 22), INT8_C( -3), INT8_C(-108), INT8_C( 92), INT8_C( -79), INT8_C( 88), INT8_C( 56), INT8_C( 72), INT8_C(-105), INT8_C( -59), INT8_C( -78), INT8_C( 22), INT8_C( 108), INT8_C( 107), INT8_C( 87), INT8_C( -27), INT8_C( -6), INT8_C( 7), INT8_C( 111), INT8_C( 73)), simde_mm256_set_epi8(INT8_C( 16), INT8_C( -80), INT8_C( 9), INT8_C( -8), INT8_C( 122), INT8_C( 10), INT8_C( 76), INT8_C( -80), INT8_C( -80), INT8_C( 19), INT8_C( -97), INT8_C( -73), INT8_C(-128), INT8_C( -20), INT8_C( -30), INT8_C( 17), INT8_C( 44), INT8_C( -85), INT8_C( 63), INT8_C( -28), INT8_C( -92), INT8_C( 21), INT8_C( 109), INT8_C(-115), INT8_C( -51), INT8_C( 1), INT8_C( 116), INT8_C( -39), INT8_C( 23), INT8_C( 58), INT8_C(-119), INT8_C( -78)) }, { simde_mm256_set_epi8(INT8_C( -47), INT8_C( 94), INT8_C( -65), INT8_C( 47), INT8_C( -15), INT8_C( 76), INT8_C( 46), INT8_C( -47), INT8_C( 127), INT8_C( -95), INT8_C(-108), INT8_C( 25), INT8_C( 127), INT8_C( 20), INT8_C( 84), INT8_C( 14), INT8_C(-101), INT8_C( 118), INT8_C( 44), INT8_C( 105), INT8_C( 84), INT8_C( 8), INT8_C( -86), INT8_C( -72), INT8_C( 3), INT8_C( 97), INT8_C(-127), INT8_C( 72), INT8_C( -12), INT8_C( 62), INT8_C( 21), INT8_C(-118)), simde_mm256_set_epi8(INT8_C( -91), INT8_C( 95), INT8_C( 10), INT8_C( -45), INT8_C( -37), INT8_C( 13), INT8_C( -37), INT8_C( 29), INT8_C( 23), INT8_C( 79), INT8_C( -38), INT8_C( 123), INT8_C( -40), INT8_C( 80), INT8_C( 90), INT8_C( -3), INT8_C(-128), INT8_C( -44), INT8_C( 29), INT8_C( 89), INT8_C( 84), INT8_C( -98), INT8_C( 62), INT8_C( -67), INT8_C( 44), INT8_C( 35), INT8_C( 39), INT8_C( 48), INT8_C( -65), INT8_C( 11), INT8_C( 20), INT8_C( -83)), simde_mm256_set_epi8(INT8_C( 82), INT8_C( -70), INT8_C( -22), INT8_C( -24), INT8_C( 126), INT8_C( 49), INT8_C( 80), INT8_C( -43), INT8_C( -41), INT8_C( -32), INT8_C( 50), INT8_C( 127), INT8_C( 105), INT8_C( 44), INT8_C( -43), INT8_C(-111), INT8_C( -75), INT8_C( 117), INT8_C( 17), INT8_C( -61), INT8_C( -96), INT8_C(-100), INT8_C( 36), INT8_C( -61), INT8_C( 116), INT8_C( 55), INT8_C( 33), INT8_C( 47), INT8_C( -7), INT8_C( -87), INT8_C( 31), INT8_C( -75)) }, { simde_mm256_set_epi8(INT8_C( -87), INT8_C( 1), INT8_C( 86), INT8_C( 46), INT8_C( 82), INT8_C( -21), INT8_C(-103), INT8_C( 67), INT8_C( 41), INT8_C( -98), INT8_C( 111), INT8_C( 83), INT8_C( -84), INT8_C( 75), INT8_C( -53), INT8_C( 41), INT8_C( -83), INT8_C( 56), INT8_C(-112), INT8_C( -60), INT8_C( 31), INT8_C( -28), INT8_C(-106), INT8_C( 106), INT8_C( -91), INT8_C( -27), INT8_C( 55), INT8_C( 18), INT8_C(-100), INT8_C( 105), INT8_C( 73), INT8_C( -31)), simde_mm256_set_epi8(INT8_C( 28), INT8_C(-125), INT8_C( 109), INT8_C( 10), INT8_C( 55), INT8_C(-108), INT8_C( 122), INT8_C( 107), INT8_C( 115), INT8_C( 51), INT8_C( 105), INT8_C( -65), INT8_C( -95), INT8_C( -77), INT8_C( -14), INT8_C( -35), INT8_C( 117), INT8_C(-115), INT8_C( 85), INT8_C( -81), INT8_C( -75), INT8_C( 80), INT8_C( 61), INT8_C( 67), INT8_C( 49), INT8_C(-121), INT8_C(-126), INT8_C( 113), INT8_C( 118), INT8_C( 104), INT8_C(-114), INT8_C(-124)), simde_mm256_set_epi8(INT8_C( -45), INT8_C(-125), INT8_C( -31), INT8_C( 55), INT8_C( 92), INT8_C( 56), INT8_C( 47), INT8_C( 56), INT8_C( 36), INT8_C( -36), INT8_C( -21), INT8_C( -53), INT8_C(-101), INT8_C( 24), INT8_C( -79), INT8_C(-111), INT8_C(-107), INT8_C( 28), INT8_C( 4), INT8_C( 2), INT8_C( 92), INT8_C( -25), INT8_C( -38), INT8_C( 123), INT8_C( 124), INT8_C( 71), INT8_C( -80), INT8_C( -77), INT8_C( 108), INT8_C( -17), INT8_C( 114), INT8_C( 85)) }, { simde_mm256_set_epi8(INT8_C( 114), INT8_C(-109), INT8_C( 63), INT8_C( 126), INT8_C( -5), INT8_C( 116), INT8_C( 117), INT8_C( 7), INT8_C( -37), INT8_C(-120), INT8_C( 98), INT8_C( -61), INT8_C( -52), INT8_C( 72), INT8_C( -17), INT8_C( -96), INT8_C( 120), INT8_C(-106), INT8_C( -53), INT8_C(-128), INT8_C( 83), INT8_C( 90), INT8_C( -1), INT8_C( -38), INT8_C( 61), INT8_C( 90), INT8_C( 66), INT8_C( 11), INT8_C( 28), INT8_C( 59), INT8_C( 65), INT8_C( -43)), simde_mm256_set_epi8(INT8_C( 13), INT8_C( -5), INT8_C(-110), INT8_C( 28), INT8_C( -82), INT8_C( -14), INT8_C( 26), INT8_C( 50), INT8_C( -38), INT8_C( -44), INT8_C( -75), INT8_C( 65), INT8_C( -1), INT8_C( -39), INT8_C( -61), INT8_C( 107), INT8_C( 88), INT8_C( 116), INT8_C( 14), INT8_C( -78), INT8_C( 48), INT8_C( 107), INT8_C( 119), INT8_C( -26), INT8_C(-118), INT8_C( 36), INT8_C( 90), INT8_C( 95), INT8_C(-115), INT8_C( 31), INT8_C( 100), INT8_C( 59)), simde_mm256_set_epi8(INT8_C( 28), INT8_C( 108), INT8_C( 17), INT8_C( -97), INT8_C( -76), INT8_C( -25), INT8_C( 126), INT8_C( -98), INT8_C( -76), INT8_C( -61), INT8_C(-126), INT8_C( -27), INT8_C( -78), INT8_C( 110), INT8_C( 68), INT8_C( -34), INT8_C( 5), INT8_C( 42), INT8_C(-114), INT8_C( -14), INT8_C( -55), INT8_C( -52), INT8_C( -49), INT8_C( -80), INT8_C( 32), INT8_C( -58), INT8_C( -59), INT8_C( 47), INT8_C( 14), INT8_C( -1), INT8_C( 12), INT8_C(-116)) }, { simde_mm256_set_epi8(INT8_C( 124), INT8_C( -99), INT8_C( -16), INT8_C( -83), INT8_C( 4), INT8_C( 119), INT8_C(-113), INT8_C( -73), INT8_C( -61), INT8_C( -4), INT8_C( -56), INT8_C( 44), INT8_C(-102), INT8_C( -29), INT8_C( -70), INT8_C( 103), INT8_C( 10), INT8_C( 112), INT8_C( -76), INT8_C( -75), INT8_C( -63), INT8_C( -6), INT8_C( -93), INT8_C( 14), INT8_C( 64), INT8_C( -16), INT8_C( 98), INT8_C( 22), INT8_C( -15), INT8_C( 4), INT8_C( 109), INT8_C( 54)), simde_mm256_set_epi8(INT8_C( 83), INT8_C( -21), INT8_C( 47), INT8_C( 102), INT8_C( 74), INT8_C( 92), INT8_C( 76), INT8_C( -29), INT8_C( -56), INT8_C( 123), INT8_C( 82), INT8_C(-108), INT8_C( 122), INT8_C( 41), INT8_C(-112), INT8_C( 64), INT8_C( 20), INT8_C( 33), INT8_C(-101), INT8_C( -46), INT8_C( -38), INT8_C( 16), INT8_C(-109), INT8_C( -97), INT8_C( 53), INT8_C( 76), INT8_C( -8), INT8_C( -77), INT8_C(-106), INT8_C( 4), INT8_C( 38), INT8_C( 9)), simde_mm256_set_epi8(INT8_C( 55), INT8_C( -54), INT8_C( 14), INT8_C( 71), INT8_C( 51), INT8_C( -89), INT8_C( -66), INT8_C( -75), INT8_C( 40), INT8_C( 42), INT8_C( 111), INT8_C( -73), INT8_C( -95), INT8_C( -59), INT8_C( -53), INT8_C( -88), INT8_C(-120), INT8_C( -14), INT8_C( 122), INT8_C( 41), INT8_C( -98), INT8_C( 57), INT8_C(-118), INT8_C( -5), INT8_C( -17), INT8_C( 126), INT8_C( -67), INT8_C( 66), INT8_C( -67), INT8_C( 16), INT8_C( 122), INT8_C( -99)) }, { simde_mm256_set_epi8(INT8_C( -80), INT8_C( 32), INT8_C( -33), INT8_C( 47), INT8_C(-126), INT8_C(-105), INT8_C( 54), INT8_C( -79), INT8_C( -78), INT8_C(-125), INT8_C( -29), INT8_C( 27), INT8_C( 43), INT8_C( 111), INT8_C( -3), INT8_C( 66), INT8_C(-107), INT8_C( 82), INT8_C( -81), INT8_C( 118), INT8_C( -96), INT8_C( 44), INT8_C( 84), INT8_C( 19), INT8_C( -58), INT8_C( -34), INT8_C( 5), INT8_C( 7), INT8_C( 120), INT8_C( 104), INT8_C( 117), INT8_C( -13)), simde_mm256_set_epi8(INT8_C( -42), INT8_C( -83), INT8_C( -1), INT8_C( -91), INT8_C(-106), INT8_C( -87), INT8_C( -79), INT8_C(-103), INT8_C( -35), INT8_C( 0), INT8_C( 122), INT8_C( 25), INT8_C(-103), INT8_C( 107), INT8_C( -40), INT8_C( 90), INT8_C( -4), INT8_C( -20), INT8_C( -85), INT8_C(-125), INT8_C( -77), INT8_C( -60), INT8_C( -82), INT8_C( 16), INT8_C( 127), INT8_C( 8), INT8_C(-109), INT8_C( -71), INT8_C( -52), INT8_C( -33), INT8_C( 97), INT8_C( 66)), simde_mm256_set_epi8(INT8_C( 104), INT8_C( 124), INT8_C( -63), INT8_C( 110), INT8_C( 88), INT8_C( -47), INT8_C( -65), INT8_C( -62), INT8_C( -11), INT8_C( 0), INT8_C( -73), INT8_C( 104), INT8_C( -78), INT8_C( 53), INT8_C( -95), INT8_C( -59), INT8_C(-122), INT8_C(-110), INT8_C( 41), INT8_C(-119), INT8_C( -29), INT8_C( -76), INT8_C( -72), INT8_C( 43), INT8_C( 20), INT8_C( -86), INT8_C( -23), INT8_C( 2), INT8_C( 109), INT8_C( 123), INT8_C( 19), INT8_C( 111)) }, { simde_mm256_set_epi8(INT8_C( 103), INT8_C( 114), INT8_C( 5), INT8_C( -85), INT8_C( 92), INT8_C( 100), INT8_C( 42), INT8_C(-120), INT8_C( -89), INT8_C( 109), INT8_C( 5), INT8_C( 42), INT8_C( 40), INT8_C( 95), INT8_C( -31), INT8_C(-119), INT8_C( 31), INT8_C( 56), INT8_C( -94), INT8_C( 107), INT8_C(-116), INT8_C( 33), INT8_C( 0), INT8_C(-121), INT8_C( 119), INT8_C( -25), INT8_C( 42), INT8_C( 10), INT8_C( 65), INT8_C(-105), INT8_C( -79), INT8_C( 11)), simde_mm256_set_epi8(INT8_C( -74), INT8_C( -88), INT8_C( -86), INT8_C( 80), INT8_C( 68), INT8_C( 25), INT8_C( -82), INT8_C( 21), INT8_C( -25), INT8_C( 108), INT8_C( 70), INT8_C( -18), INT8_C( -29), INT8_C( 82), INT8_C( -24), INT8_C( -6), INT8_C( -97), INT8_C( 91), INT8_C( 91), INT8_C(-124), INT8_C(-115), INT8_C( -57), INT8_C( 83), INT8_C( -1), INT8_C(-120), INT8_C( 103), INT8_C( 40), INT8_C(-122), INT8_C( 121), INT8_C( 30), INT8_C( -79), INT8_C( -92)), simde_mm256_set_epi8(INT8_C( 124), INT8_C( 90), INT8_C( 52), INT8_C( 61), INT8_C(-127), INT8_C( 115), INT8_C( 92), INT8_C( -58), INT8_C( -83), INT8_C( -5), INT8_C( 69), INT8_C( 50), INT8_C( 38), INT8_C( 19), INT8_C( -12), INT8_C( -72), INT8_C( 87), INT8_C( -68), INT8_C( 65), INT8_C( -90), INT8_C( 70), INT8_C( 84), INT8_C( 0), INT8_C( -82), INT8_C( 6), INT8_C(-103), INT8_C( 124), INT8_C( 75), INT8_C( 16), INT8_C( 56), INT8_C( -20), INT8_C( -96)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_gf2p8mul_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_gf2p8mul_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi8(INT8_C( -4), INT8_C( -99), INT8_C( -5), INT8_C( 60), INT8_C( -45), INT8_C( 78), INT8_C( -33), INT8_C( 92), INT8_C( 53), INT8_C( -32), INT8_C( 109), INT8_C( -22), INT8_C( -84), INT8_C( 118), INT8_C( 86), INT8_C( -65), INT8_C(-122), INT8_C( 15), INT8_C( 72), INT8_C( 67), INT8_C( 66), INT8_C( 122), INT8_C( -51), INT8_C( 61), INT8_C( 85), INT8_C( 78), INT8_C( 46), INT8_C( -91), INT8_C( -77), INT8_C( -87), INT8_C( 21), INT8_C(-107), INT8_C(-117), INT8_C( 93), INT8_C( 83), INT8_C( -77), INT8_C(-119), INT8_C( -17), INT8_C( -95), INT8_C( 4), INT8_C( -62), INT8_C( -96), INT8_C( 27), INT8_C( -4), INT8_C( -68), INT8_C( -9), INT8_C( 59), INT8_C( 90), INT8_C( 45), INT8_C(-111), INT8_C( 29), INT8_C( 76), INT8_C( 12), INT8_C( -34), INT8_C( 9), INT8_C( -31), INT8_C( 87), INT8_C( 64), INT8_C( -55), INT8_C( -10), INT8_C( -99), INT8_C( -8), INT8_C( 49), INT8_C( 15)), simde_mm512_set_epi8(INT8_C( -73), INT8_C( -62), INT8_C(-104), INT8_C( 43), INT8_C( -14), INT8_C( -77), INT8_C(-110), INT8_C( -27), INT8_C( 39), INT8_C( -90), INT8_C( 61), INT8_C( 95), INT8_C( 0), INT8_C( -99), INT8_C( 111), INT8_C( -43), INT8_C( 93), INT8_C( 29), INT8_C( 80), INT8_C( -59), INT8_C( 4), INT8_C( 54), INT8_C( -23), INT8_C( 91), INT8_C( 35), INT8_C( 119), INT8_C( -89), INT8_C( -95), INT8_C( -31), INT8_C( -75), INT8_C( 30), INT8_C(-102), INT8_C( 31), INT8_C(-124), INT8_C( -19), INT8_C(-105), INT8_C(-127), INT8_C( -12), INT8_C( -14), INT8_C( -63), INT8_C( -34), INT8_C( 8), INT8_C( -37), INT8_C( 2), INT8_C( -57), INT8_C(-117), INT8_C( 33), INT8_C( 54), INT8_C( 92), INT8_C( 112), INT8_C(-110), INT8_C( 65), INT8_C( 118), INT8_C( -69), INT8_C( 91), INT8_C( -29), INT8_C( 115), INT8_C( 82), INT8_C( 92), INT8_C( -47), INT8_C( -89), INT8_C(-107), INT8_C( 77), INT8_C(-124)), simde_mm512_set_epi8(INT8_C( -41), INT8_C( -11), INT8_C( -29), INT8_C( 126), INT8_C( -39), INT8_C( 81), INT8_C( 52), INT8_C( 103), INT8_C( 113), INT8_C( 21), INT8_C( 2), INT8_C( 27), INT8_C( 0), INT8_C( 26), INT8_C( 77), INT8_C( 76), INT8_C(-102), INT8_C( -69), INT8_C( 113), INT8_C( -23), INT8_C( 19), INT8_C( 63), INT8_C( -79), INT8_C(-112), INT8_C( -79), INT8_C( 120), INT8_C(-105), INT8_C( 69), INT8_C( 105), INT8_C( 100), INT8_C( -99), INT8_C(-110), INT8_C( -64), INT8_C( 32), INT8_C( 45), INT8_C(-125), INT8_C( -1), INT8_C( 105), INT8_C( 36), INT8_C( 41), INT8_C( -50), INT8_C( 119), INT8_C( -21), INT8_C( -29), INT8_C( 126), INT8_C( -14), INT8_C( 26), INT8_C( -91), INT8_C(-108), INT8_C( 15), INT8_C( -13), INT8_C( -54), INT8_C( 94), INT8_C( 76), INT8_C( -75), INT8_C(-123), INT8_C( -20), INT8_C( 71), INT8_C( -81), INT8_C( 79), INT8_C( 64), INT8_C( -28), INT8_C(-110), INT8_C( -3)) }, { simde_mm512_set_epi8(INT8_C( -5), INT8_C( 74), INT8_C(-121), INT8_C( -14), INT8_C( 64), INT8_C( 79), INT8_C( -35), INT8_C( -56), INT8_C( -76), INT8_C( -75), INT8_C( 63), INT8_C( 84), INT8_C(-118), INT8_C( 38), INT8_C( 75), INT8_C( 74), INT8_C( 81), INT8_C(-120), INT8_C( 19), INT8_C( -37), INT8_C( -94), INT8_C( 16), INT8_C( 48), INT8_C( -62), INT8_C( -7), INT8_C( -62), INT8_C( 16), INT8_C( 74), INT8_C( -28), INT8_C( 37), INT8_C( 113), INT8_C( -95), INT8_C( 28), INT8_C( 57), INT8_C( -25), INT8_C( 99), INT8_C( 10), INT8_C( 126), INT8_C( -39), INT8_C( 71), INT8_C( 42), INT8_C( -81), INT8_C( -68), INT8_C( 111), INT8_C( -94), INT8_C( 58), INT8_C( 41), INT8_C( 41), INT8_C( 10), INT8_C(-105), INT8_C(-126), INT8_C(-109), INT8_C(-112), INT8_C( -69), INT8_C( 26), INT8_C( -60), INT8_C( 40), INT8_C( -74), INT8_C( 51), INT8_C( 95), INT8_C(-102), INT8_C( 114), INT8_C( 108), INT8_C(-102)), simde_mm512_set_epi8(INT8_C( 110), INT8_C( 25), INT8_C( -27), INT8_C( 65), INT8_C( 89), INT8_C(-115), INT8_C( 91), INT8_C( 61), INT8_C( 80), INT8_C( -68), INT8_C( 74), INT8_C( 103), INT8_C( -94), INT8_C( -54), INT8_C( -40), INT8_C( 56), INT8_C( -28), INT8_C( 49), INT8_C( 62), INT8_C( -41), INT8_C( -21), INT8_C( 30), INT8_C( -37), INT8_C( 117), INT8_C( 100), INT8_C( 44), INT8_C( 7), INT8_C( -14), INT8_C( -84), INT8_C(-105), INT8_C( 23), INT8_C( 2), INT8_C( 29), INT8_C( 100), INT8_C( 78), INT8_C( -50), INT8_C( -62), INT8_C( 98), INT8_C( 46), INT8_C( -26), INT8_C( 83), INT8_C(-123), INT8_C( 25), INT8_C( -45), INT8_C( 40), INT8_C( -21), INT8_C(-111), INT8_C(-102), INT8_C( -57), INT8_C( 24), INT8_C( 8), INT8_C( 108), INT8_C( 31), INT8_C( -15), INT8_C( 25), INT8_C( -63), INT8_C( -11), INT8_C( 23), INT8_C( -32), INT8_C( -46), INT8_C( 29), INT8_C( -51), INT8_C( -22), INT8_C( 42)), simde_mm512_set_epi8(INT8_C( 67), INT8_C( -32), INT8_C( 71), INT8_C( 32), INT8_C( -79), INT8_C( -86), INT8_C( 86), INT8_C(-125), INT8_C( 87), INT8_C(-106), INT8_C( -60), INT8_C( 5), INT8_C( 18), INT8_C( -94), INT8_C( 85), INT8_C( 41), INT8_C( 3), INT8_C( 96), INT8_C(-113), INT8_C( -84), INT8_C( 34), INT8_C( -5), INT8_C( 33), INT8_C( 38), INT8_C( -1), INT8_C( 92), INT8_C( 112), INT8_C( -4), INT8_C( 9), INT8_C(-128), INT8_C( 29), INT8_C( 89), INT8_C( 87), INT8_C( 71), INT8_C( -8), INT8_C( -86), INT8_C( -43), INT8_C( 23), INT8_C( 12), INT8_C( 33), INT8_C(-122), INT8_C( -43), INT8_C( 30), INT8_C( -94), INT8_C( -32), INT8_C(-126), INT8_C( -56), INT8_C( -32), INT8_C( -9), INT8_C( 103), INT8_C( 124), INT8_C(-110), INT8_C( -14), INT8_C( 101), INT8_C( 113), INT8_C( -40), INT8_C( 96), INT8_C( -70), INT8_C( -90), INT8_C( 71), INT8_C( 27), INT8_C( -90), INT8_C( 88), INT8_C( 85)) }, { simde_mm512_set_epi8(INT8_C( -51), INT8_C( -16), INT8_C( 20), INT8_C( 64), INT8_C( -10), INT8_C( 6), INT8_C( -17), INT8_C( 63), INT8_C( 42), INT8_C( 59), INT8_C( -61), INT8_C( -8), INT8_C( -94), INT8_C( -9), INT8_C( -35), INT8_C( -25), INT8_C( 77), INT8_C( 23), INT8_C( 81), INT8_C( -11), INT8_C( -38), INT8_C( -20), INT8_C( -57), INT8_C( 96), INT8_C( -2), INT8_C( 59), INT8_C( 52), INT8_C( -34), INT8_C(-128), INT8_C( 58), INT8_C( 64), INT8_C( 13), INT8_C( 8), INT8_C( 113), INT8_C( 70), INT8_C( 79), INT8_C( -45), INT8_C( 116), INT8_C( -81), INT8_C( -2), INT8_C( -75), INT8_C( -34), INT8_C( -53), INT8_C( 106), INT8_C( 39), INT8_C( 109), INT8_C( 44), INT8_C( 110), INT8_C( -32), INT8_C( 70), INT8_C( -97), INT8_C( -32), INT8_C( 18), INT8_C(-104), INT8_C( 24), INT8_C( -67), INT8_C( 84), INT8_C( 119), INT8_C( 4), INT8_C(-114), INT8_C( 117), INT8_C( -79), INT8_C( 117), INT8_C( -27)), simde_mm512_set_epi8(INT8_C( -58), INT8_C( -25), INT8_C( -39), INT8_C( -81), INT8_C( 1), INT8_C( -9), INT8_C( -77), INT8_C(-100), INT8_C( -25), INT8_C(-116), INT8_C( 50), INT8_C( 21), INT8_C( 8), INT8_C( 32), INT8_C( -55), INT8_C( -35), INT8_C( 101), INT8_C( 0), INT8_C( 8), INT8_C( 35), INT8_C( -69), INT8_C( -14), INT8_C( 1), INT8_C( 93), INT8_C( 103), INT8_C( -51), INT8_C( 23), INT8_C( -61), INT8_C(-106), INT8_C( -78), INT8_C( 61), INT8_C(-128), INT8_C( 8), INT8_C( 55), INT8_C( 50), INT8_C( -11), INT8_C( 30), INT8_C( -21), INT8_C( -39), INT8_C( 76), INT8_C( -43), INT8_C( 6), INT8_C( 126), INT8_C( 77), INT8_C( -11), INT8_C(-113), INT8_C( -24), INT8_C( -61), INT8_C( -88), INT8_C(-103), INT8_C( 36), INT8_C( 97), INT8_C( -1), INT8_C( -96), INT8_C( 98), INT8_C( 61), INT8_C( 121), INT8_C( 55), INT8_C( 80), INT8_C(-109), INT8_C( -86), INT8_C( 43), INT8_C( 15), INT8_C(-118)), simde_mm512_set_epi8(INT8_C( 30), INT8_C( 57), INT8_C( 118), INT8_C( 120), INT8_C( -10), INT8_C( 4), INT8_C( 1), INT8_C( 112), INT8_C( 83), INT8_C( -85), INT8_C( 10), INT8_C( 44), INT8_C( 103), INT8_C( -55), INT8_C( 93), INT8_C( -23), INT8_C( -57), INT8_C( 0), INT8_C( -66), INT8_C(-115), INT8_C(-106), INT8_C( 48), INT8_C( -57), INT8_C( -1), INT8_C( -63), INT8_C( -46), INT8_C( -31), INT8_C( 16), INT8_C( 111), INT8_C( -4), INT8_C( -39), INT8_C( -38), INT8_C( 64), INT8_C( -65), INT8_C(-104), INT8_C( 29), INT8_C(-127), INT8_C( 31), INT8_C( 54), INT8_C(-101), INT8_C( 15), INT8_C( -14), INT8_C(-110), INT8_C( 97), INT8_C( 116), INT8_C( 97), INT8_C(-120), INT8_C( -53), INT8_C( 34), INT8_C( 44), INT8_C( 44), INT8_C( -63), INT8_C(-116), INT8_C( -88), INT8_C( 71), INT8_C(-113), INT8_C( 71), INT8_C( 13), INT8_C( 91), INT8_C( 106), INT8_C( -59), INT8_C( -26), INT8_C( -43), INT8_C( 114)) }, { simde_mm512_set_epi8(INT8_C( -29), INT8_C( -63), INT8_C(-113), INT8_C( 44), INT8_C( 114), INT8_C(-121), INT8_C( 65), INT8_C( 73), INT8_C( 93), INT8_C( -51), INT8_C( 47), INT8_C( -96), INT8_C( 16), INT8_C( 19), INT8_C( 61), INT8_C( -73), INT8_C( -90), INT8_C( 88), INT8_C( -56), INT8_C(-111), INT8_C( -21), INT8_C( 6), INT8_C( -9), INT8_C( 59), INT8_C( -52), INT8_C( 6), INT8_C( -14), INT8_C( -21), INT8_C(-113), INT8_C( 52), INT8_C( -7), INT8_C( 32), INT8_C( 62), INT8_C( -45), INT8_C( -81), INT8_C( -22), INT8_C( 43), INT8_C( -35), INT8_C( 67), INT8_C( -9), INT8_C( 37), INT8_C( 53), INT8_C( 30), INT8_C( 96), INT8_C( 33), INT8_C( 66), INT8_C( 34), INT8_C( 87), INT8_C(-107), INT8_C( -51), INT8_C( 57), INT8_C( -97), INT8_C( -38), INT8_C( -2), INT8_C( 93), INT8_C( -61), INT8_C( -43), INT8_C( 116), INT8_C( -51), INT8_C( 106), INT8_C( 79), INT8_C( 58), INT8_C( 49), INT8_C( 69)), simde_mm512_set_epi8(INT8_C( -68), INT8_C( 49), INT8_C( 86), INT8_C( -50), INT8_C(-126), INT8_C( -45), INT8_C( -71), INT8_C( -40), INT8_C( 83), INT8_C( 81), INT8_C( 91), INT8_C( -57), INT8_C( -48), INT8_C( -76), INT8_C( -92), INT8_C( 108), INT8_C(-106), INT8_C( -64), INT8_C(-101), INT8_C( 88), INT8_C( 100), INT8_C( -66), INT8_C( -52), INT8_C( -42), INT8_C( 121), INT8_C( -44), INT8_C( -99), INT8_C( 102), INT8_C( -99), INT8_C( -31), INT8_C( -96), INT8_C( 103), INT8_C( 107), INT8_C( 46), INT8_C( -99), INT8_C( -60), INT8_C( 10), INT8_C( 89), INT8_C( 120), INT8_C( -32), INT8_C( 120), INT8_C( 29), INT8_C( 34), INT8_C( 67), INT8_C( 84), INT8_C( 0), INT8_C( -12), INT8_C( -71), INT8_C(-123), INT8_C( 19), INT8_C( -10), INT8_C( 38), INT8_C(-123), INT8_C( 126), INT8_C( 39), INT8_C( -58), INT8_C( -71), INT8_C( -58), INT8_C( 99), INT8_C( -60), INT8_C( -68), INT8_C( -3), INT8_C( 75), INT8_C( 46)), simde_mm512_set_epi8(INT8_C( -46), INT8_C( 54), INT8_C(-121), INT8_C(-105), INT8_C( -63), INT8_C(-101), INT8_C( 54), INT8_C( -2), INT8_C( -22), INT8_C( 34), INT8_C( -31), INT8_C( -23), INT8_C( -81), INT8_C( 114), INT8_C( -68), INT8_C( 22), INT8_C( 107), INT8_C( 8), INT8_C( -42), INT8_C( 86), INT8_C( 45), INT8_C( -87), INT8_C(-120), INT8_C( -63), INT8_C( -95), INT8_C( -50), INT8_C( -64), INT8_C( -32), INT8_C( -30), INT8_C( 4), INT8_C( 90), INT8_C( 84), INT8_C( 49), INT8_C( 59), INT8_C( -60), INT8_C( 6), INT8_C( 21), INT8_C( -9), INT8_C( -95), INT8_C( 73), INT8_C( 26), INT8_C( 47), INT8_C( -47), INT8_C( -45), INT8_C( 58), INT8_C( 0), INT8_C( 90), INT8_C( -24), INT8_C( 3), INT8_C( 40), INT8_C( 23), INT8_C( 9), INT8_C( -25), INT8_C( -9), INT8_C( -35), INT8_C( -42), INT8_C(-124), INT8_C( 47), INT8_C( 15), INT8_C( -25), INT8_C( 90), INT8_C(-109), INT8_C( 52), INT8_C( -29)) }, { simde_mm512_set_epi8(INT8_C( 41), INT8_C( 68), INT8_C(-104), INT8_C( -96), INT8_C(-111), INT8_C( 125), INT8_C( 11), INT8_C( -6), INT8_C( -21), INT8_C( -87), INT8_C( -54), INT8_C( 82), INT8_C(-123), INT8_C(-127), INT8_C( 73), INT8_C( -34), INT8_C( 77), INT8_C( 87), INT8_C(-123), INT8_C(-102), INT8_C(-108), INT8_C( 93), INT8_C( 26), INT8_C( -3), INT8_C( 27), INT8_C( 18), INT8_C(-111), INT8_C( 105), INT8_C( 89), INT8_C( 55), INT8_C( 68), INT8_C( 113), INT8_C(-125), INT8_C( 61), INT8_C( 52), INT8_C( 73), INT8_C( 27), INT8_C( -42), INT8_C( -95), INT8_C( 45), INT8_C( 79), INT8_C( -40), INT8_C(-118), INT8_C( 34), INT8_C( -1), INT8_C( 75), INT8_C( -51), INT8_C( -19), INT8_C( 57), INT8_C( -23), INT8_C( 12), INT8_C( -71), INT8_C( 97), INT8_C( 34), INT8_C( -4), INT8_C( 18), INT8_C( 1), INT8_C( 13), INT8_C( -64), INT8_C( -42), INT8_C(-121), INT8_C( 33), INT8_C( -43), INT8_C( 31)), simde_mm512_set_epi8(INT8_C( 56), INT8_C( -91), INT8_C( -61), INT8_C( -63), INT8_C( -93), INT8_C( 89), INT8_C( -67), INT8_C(-116), INT8_C( -66), INT8_C( -94), INT8_C( 89), INT8_C( 102), INT8_C( -4), INT8_C( 48), INT8_C( -83), INT8_C( 66), INT8_C( 9), INT8_C(-126), INT8_C( -25), INT8_C( 16), INT8_C( 73), INT8_C( 103), INT8_C( -81), INT8_C(-128), INT8_C( 15), INT8_C( 22), INT8_C( 55), INT8_C( 80), INT8_C( -81), INT8_C( 0), INT8_C( -38), INT8_C( 82), INT8_C( 95), INT8_C( 92), INT8_C( 76), INT8_C(-103), INT8_C( -34), INT8_C( 124), INT8_C( 35), INT8_C( -89), INT8_C(-117), INT8_C( 39), INT8_C( -86), INT8_C( -79), INT8_C( 27), INT8_C(-117), INT8_C( 39), INT8_C( 98), INT8_C( 43), INT8_C( 114), INT8_C( -27), INT8_C( 105), INT8_C( 115), INT8_C( -73), INT8_C( -36), INT8_C( 88), INT8_C( 24), INT8_C( -46), INT8_C( -79), INT8_C( -20), INT8_C( 37), INT8_C( -42), INT8_C( 109), INT8_C(-108)), simde_mm512_set_epi8(INT8_C( -94), INT8_C( 108), INT8_C(-118), INT8_C( 4), INT8_C( -41), INT8_C( 91), INT8_C( 67), INT8_C(-121), INT8_C(-120), INT8_C( 104), INT8_C(-124), INT8_C( 30), INT8_C( -33), INT8_C( 67), INT8_C( 74), INT8_C(-128), INT8_C( 19), INT8_C(-106), INT8_C(-121), INT8_C( 99), INT8_C( 98), INT8_C( 119), INT8_C( 84), INT8_C( 126), INT8_C(-103), INT8_C( 87), INT8_C( -76), INT8_C( -49), INT8_C( -58), INT8_C( 0), INT8_C( 121), INT8_C( -20), INT8_C( -75), INT8_C( 35), INT8_C( -60), INT8_C( 106), INT8_C(-100), INT8_C( -60), INT8_C( 31), INT8_C( 101), INT8_C( 19), INT8_C( 96), INT8_C( 46), INT8_C( -88), INT8_C( -54), INT8_C( 9), INT8_C( -99), INT8_C( 54), INT8_C( -7), INT8_C( -73), INT8_C( -48), INT8_C( 67), INT8_C( 78), INT8_C( 100), INT8_C( -92), INT8_C( 71), INT8_C( 24), INT8_C( 125), INT8_C( -48), INT8_C( 41), INT8_C( -26), INT8_C( 83), INT8_C( 106), INT8_C(-114)) }, { simde_mm512_set_epi8(INT8_C( -17), INT8_C( -17), INT8_C( -13), INT8_C(-118), INT8_C( 63), INT8_C( -11), INT8_C( 118), INT8_C( 73), INT8_C( 99), INT8_C( 93), INT8_C( 61), INT8_C( 101), INT8_C( 102), INT8_C( 27), INT8_C( 7), INT8_C( 7), INT8_C( 97), INT8_C( -39), INT8_C( 109), INT8_C( 38), INT8_C( 28), INT8_C( 25), INT8_C( 88), INT8_C( -93), INT8_C( -85), INT8_C( -67), INT8_C( -93), INT8_C( -38), INT8_C( -92), INT8_C( 8), INT8_C( 92), INT8_C( 121), INT8_C( -63), INT8_C( 124), INT8_C( -30), INT8_C( 49), INT8_C( 16), INT8_C( -64), INT8_C( -63), INT8_C( 45), INT8_C( 71), INT8_C( 7), INT8_C( 60), INT8_C( -4), INT8_C( 35), INT8_C( -33), INT8_C( 103), INT8_C( -39), INT8_C( -52), INT8_C( -76), INT8_C(-111), INT8_C( -1), INT8_C( -81), INT8_C( -28), INT8_C( -54), INT8_C( -58), INT8_C( 29), INT8_C( -65), INT8_C( -28), INT8_C( 0), INT8_C( -86), INT8_C( 55), INT8_C( 112), INT8_C( 13)), simde_mm512_set_epi8(INT8_C( 82), INT8_C( 26), INT8_C( 80), INT8_C( 123), INT8_C( 3), INT8_C( -26), INT8_C( 118), INT8_C( 101), INT8_C( 0), INT8_C( 98), INT8_C( 62), INT8_C(-104), INT8_C( -29), INT8_C( 74), INT8_C( -77), INT8_C( 16), INT8_C( -28), INT8_C( -44), INT8_C( 127), INT8_C( -23), INT8_C( -3), INT8_C( 0), INT8_C( 50), INT8_C(-100), INT8_C(-126), INT8_C( 48), INT8_C( 112), INT8_C( -71), INT8_C( -63), INT8_C( 121), INT8_C( 13), INT8_C( -19), INT8_C( 121), INT8_C(-109), INT8_C( 50), INT8_C(-104), INT8_C( 42), INT8_C( -21), INT8_C( 94), INT8_C( 63), INT8_C( 17), INT8_C( 110), INT8_C( 23), INT8_C(-117), INT8_C( 45), INT8_C( 101), INT8_C( -36), INT8_C( -70), INT8_C( 46), INT8_C( -39), INT8_C( -42), INT8_C( -56), INT8_C(-104), INT8_C( 22), INT8_C( 125), INT8_C( -45), INT8_C( -28), INT8_C( -26), INT8_C( 115), INT8_C( 116), INT8_C( -70), INT8_C( 120), INT8_C( 67), INT8_C( -37)), simde_mm512_set_epi8(INT8_C( 100), INT8_C(-114), INT8_C( 59), INT8_C( -54), INT8_C( 65), INT8_C(-102), INT8_C( -56), INT8_C( 72), INT8_C( 0), INT8_C( 69), INT8_C( 97), INT8_C( 95), INT8_C( -3), INT8_C( 116), INT8_C( 52), INT8_C( 112), INT8_C( 94), INT8_C( 105), INT8_C( -58), INT8_C( 100), INT8_C( 57), INT8_C( 0), INT8_C( -78), INT8_C( -95), INT8_C(-117), INT8_C( 111), INT8_C( 44), INT8_C( 57), INT8_C( 45), INT8_C( -27), INT8_C( -31), INT8_C( 97), INT8_C( -62), INT8_C( 97), INT8_C( 34), INT8_C( 93), INT8_C(-106), INT8_C( 67), INT8_C( -32), INT8_C(-127), INT8_C( 91), INT8_C( 17), INT8_C( 89), INT8_C( 64), INT8_C( -96), INT8_C( 87), INT8_C( 91), INT8_C(-103), INT8_C( 76), INT8_C( -21), INT8_C( 59), INT8_C( 99), INT8_C( -31), INT8_C( -84), INT8_C( -87), INT8_C( 2), INT8_C( -6), INT8_C(-103), INT8_C( 79), INT8_C( 0), INT8_C( -4), INT8_C( 43), INT8_C(-113), INT8_C( 24)) }, { simde_mm512_set_epi8(INT8_C( 12), INT8_C( 35), INT8_C( -11), INT8_C( 12), INT8_C( 84), INT8_C( 110), INT8_C( -78), INT8_C( 50), INT8_C( 123), INT8_C( -38), INT8_C(-112), INT8_C( -95), INT8_C( 77), INT8_C( 100), INT8_C( -99), INT8_C( -64), INT8_C( -50), INT8_C( 118), INT8_C( 122), INT8_C( 6), INT8_C( 87), INT8_C( 75), INT8_C( 100), INT8_C(-128), INT8_C( 28), INT8_C( 71), INT8_C( 64), INT8_C( 100), INT8_C(-113), INT8_C( 80), INT8_C( -17), INT8_C( 21), INT8_C( 54), INT8_C( -61), INT8_C(-110), INT8_C( 25), INT8_C( -7), INT8_C( -32), INT8_C( -1), INT8_C( 38), INT8_C( 126), INT8_C( 80), INT8_C( -50), INT8_C(-119), INT8_C( -41), INT8_C( 126), INT8_C( -56), INT8_C( 43), INT8_C( -1), INT8_C( 73), INT8_C( 86), INT8_C( 16), INT8_C( 55), INT8_C(-107), INT8_C( 43), INT8_C( 20), INT8_C( 38), INT8_C( 123), INT8_C( 35), INT8_C( -39), INT8_C( 90), INT8_C( -87), INT8_C( -4), INT8_C( 113)), simde_mm512_set_epi8(INT8_C(-127), INT8_C( -25), INT8_C( 112), INT8_C( 97), INT8_C( -63), INT8_C( 24), INT8_C(-111), INT8_C( 37), INT8_C( -32), INT8_C( -41), INT8_C( -92), INT8_C( 72), INT8_C( -61), INT8_C(-126), INT8_C( 127), INT8_C( -11), INT8_C( 62), INT8_C( -28), INT8_C( -67), INT8_C( 70), INT8_C( -14), INT8_C( -57), INT8_C( -40), INT8_C( -89), INT8_C( -18), INT8_C( 113), INT8_C( 12), INT8_C( -64), INT8_C( 115), INT8_C(-102), INT8_C( -42), INT8_C( 40), INT8_C( 47), INT8_C( 110), INT8_C( 50), INT8_C( 34), INT8_C( 49), INT8_C( 108), INT8_C( -93), INT8_C( -73), INT8_C( -86), INT8_C( 37), INT8_C( -57), INT8_C( 32), INT8_C(-107), INT8_C( 51), INT8_C( 73), INT8_C( 69), INT8_C( 101), INT8_C( -91), INT8_C( 39), INT8_C( -46), INT8_C( 30), INT8_C( -85), INT8_C( -17), INT8_C( 56), INT8_C( 77), INT8_C( 36), INT8_C( -26), INT8_C( 37), INT8_C( -74), INT8_C( -56), INT8_C( -54), INT8_C( -42)), simde_mm512_set_epi8(INT8_C( 86), INT8_C( -51), INT8_C( 73), INT8_C( -70), INT8_C( 43), INT8_C( -25), INT8_C(-107), INT8_C( -32), INT8_C( -39), INT8_C( 123), INT8_C( -87), INT8_C( -86), INT8_C(-122), INT8_C( 24), INT8_C( 3), INT8_C( 27), INT8_C( 68), INT8_C( 22), INT8_C( 5), INT8_C(-113), INT8_C(-125), INT8_C( 13), INT8_C( -81), INT8_C(-100), INT8_C( -58), INT8_C( 19), INT8_C( 45), INT8_C( -72), INT8_C( 82), INT8_C( -12), INT8_C( 72), INT8_C( 62), INT8_C( -77), INT8_C( -53), INT8_C( 1), INT8_C( 63), INT8_C( -30), INT8_C( -51), INT8_C( -83), INT8_C(-114), INT8_C( 7), INT8_C( -27), INT8_C(-127), INT8_C(-112), INT8_C( -37), INT8_C( 122), INT8_C( 2), INT8_C( -87), INT8_C( 67), INT8_C( 52), INT8_C(-105), INT8_C(-113), INT8_C( 76), INT8_C( -87), INT8_C( -9), INT8_C( 77), INT8_C( -42), INT8_C( 14), INT8_C( -18), INT8_C( -18), INT8_C( 71), INT8_C( 1), INT8_C( -61), INT8_C(-115)) }, { simde_mm512_set_epi8(INT8_C( 101), INT8_C( 35), INT8_C( 11), INT8_C( 63), INT8_C( -2), INT8_C( 92), INT8_C( 46), INT8_C( -43), INT8_C( -37), INT8_C( -24), INT8_C(-107), INT8_C( 71), INT8_C( 69), INT8_C( 90), INT8_C( 122), INT8_C( -45), INT8_C( -15), INT8_C( 79), INT8_C( -81), INT8_C( -64), INT8_C( 120), INT8_C( -97), INT8_C( 103), INT8_C( 6), INT8_C( -33), INT8_C( 3), INT8_C( 25), INT8_C( -43), INT8_C( -10), INT8_C(-118), INT8_C( -19), INT8_C( -1), INT8_C( 90), INT8_C( 80), INT8_C( -78), INT8_C( -35), INT8_C(-116), INT8_C( -61), INT8_C( -53), INT8_C( 15), INT8_C( 16), INT8_C( -21), INT8_C( 25), INT8_C( -84), INT8_C( 73), INT8_C( 33), INT8_C(-115), INT8_C( 46), INT8_C( -54), INT8_C( -80), INT8_C(-111), INT8_C( -99), INT8_C( -32), INT8_C( 18), INT8_C( 19), INT8_C( 32), INT8_C( -40), INT8_C( -86), INT8_C( 56), INT8_C( 38), INT8_C(-101), INT8_C( 28), INT8_C( -24), INT8_C( -51)), simde_mm512_set_epi8(INT8_C( 122), INT8_C( -7), INT8_C(-121), INT8_C( -36), INT8_C( -76), INT8_C( 123), INT8_C(-107), INT8_C( -57), INT8_C( 81), INT8_C( -39), INT8_C(-114), INT8_C( 36), INT8_C( -55), INT8_C( 6), INT8_C( -15), INT8_C( 116), INT8_C( 18), INT8_C( 61), INT8_C( 63), INT8_C( 89), INT8_C(-120), INT8_C( 76), INT8_C( -4), INT8_C( -22), INT8_C( -49), INT8_C( -2), INT8_C( 62), INT8_C(-126), INT8_C( 34), INT8_C( -29), INT8_C( 86), INT8_C( -25), INT8_C( 22), INT8_C( 83), INT8_C( 105), INT8_C( -57), INT8_C( -20), INT8_C( 78), INT8_C( 88), INT8_C( 17), INT8_C( 75), INT8_C(-105), INT8_C( 98), INT8_C( 98), INT8_C( -70), INT8_C( -99), INT8_C( -8), INT8_C( 6), INT8_C( -22), INT8_C( 12), INT8_C( 105), INT8_C( 79), INT8_C( 94), INT8_C( -23), INT8_C( 126), INT8_C( -81), INT8_C( -52), INT8_C( 127), INT8_C(-121), INT8_C( 105), INT8_C( 22), INT8_C( 82), INT8_C( -12), INT8_C( 40)), simde_mm512_set_epi8(INT8_C( -7), INT8_C( 2), INT8_C( -58), INT8_C( 41), INT8_C( -69), INT8_C( 26), INT8_C( -86), INT8_C( 117), INT8_C( -72), INT8_C( -13), INT8_C( 99), INT8_C( 63), INT8_C( 80), INT8_C( -57), INT8_C( -94), INT8_C(-111), INT8_C( 112), INT8_C(-103), INT8_C( 77), INT8_C( -56), INT8_C( -65), INT8_C( 18), INT8_C( 15), INT8_C( 74), INT8_C( 32), INT8_C( 25), INT8_C( 24), INT8_C( 8), INT8_C( 30), INT8_C( 99), INT8_C( 89), INT8_C( -61), INT8_C( 16), INT8_C( 64), INT8_C( -99), INT8_C( 23), INT8_C(-102), INT8_C( -40), INT8_C( 22), INT8_C( -1), INT8_C( -36), INT8_C( 30), INT8_C( 37), INT8_C( -89), INT8_C( 82), INT8_C( -69), INT8_C( 124), INT8_C( -28), INT8_C( 93), INT8_C( 1), INT8_C( -87), INT8_C( 54), INT8_C(-117), INT8_C( -37), INT8_C( 35), INT8_C( 60), INT8_C( 8), INT8_C( -83), INT8_C( -73), INT8_C( -30), INT8_C( 4), INT8_C( -94), INT8_C(-109), INT8_C( -6)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_gf2p8mul_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_mask_gf2p8mul_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i src; simde__mmask16 k; simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( -78), INT8_C(-104), INT8_C( -68), INT8_C( -85), INT8_C( -44), INT8_C( -82), INT8_C(-123), INT8_C( -25), INT8_C( 17), INT8_C( -39), INT8_C(-122), INT8_C( -7), INT8_C( -27), INT8_C(-102), INT8_C( -45), INT8_C( -70)), UINT16_C(13630), simde_mm_set_epi8(INT8_C( -57), INT8_C( 80), INT8_C( 4), INT8_C( 91), INT8_C( 8), INT8_C( 3), INT8_C( 85), INT8_C( 32), INT8_C( 3), INT8_C(-125), INT8_C( -81), INT8_C( 47), INT8_C(-124), INT8_C( -92), INT8_C( -4), INT8_C( -78)), simde_mm_set_epi8(INT8_C( -92), INT8_C( -33), INT8_C( 92), INT8_C( 1), INT8_C( -2), INT8_C( -90), INT8_C( -22), INT8_C( -96), INT8_C(-104), INT8_C( -34), INT8_C(-102), INT8_C( 121), INT8_C( -13), INT8_C( 124), INT8_C( 118), INT8_C( -38)), simde_mm_set_epi8(INT8_C( -78), INT8_C(-104), INT8_C( 107), INT8_C( 91), INT8_C( -44), INT8_C( -15), INT8_C(-123), INT8_C( -57), INT8_C( 17), INT8_C( -39), INT8_C( -92), INT8_C( 40), INT8_C( -34), INT8_C(-106), INT8_C( -86), INT8_C( -70)) }, { simde_mm_set_epi8(INT8_C( 8), INT8_C( 16), INT8_C(-112), INT8_C( 88), INT8_C(-114), INT8_C( 116), INT8_C( -93), INT8_C(-113), INT8_C( 103), INT8_C( 3), INT8_C( 104), INT8_C( 32), INT8_C( 61), INT8_C( 121), INT8_C(-120), INT8_C( 66)), UINT16_C(12254), simde_mm_set_epi8(INT8_C( 8), INT8_C( 41), INT8_C( -4), INT8_C( 11), INT8_C( 102), INT8_C( -75), INT8_C( 105), INT8_C( 124), INT8_C( -63), INT8_C( -72), INT8_C( -35), INT8_C( -84), INT8_C( -54), INT8_C( 124), INT8_C( -28), INT8_C( -52)), simde_mm_set_epi8(INT8_C(-119), INT8_C( -4), INT8_C( -96), INT8_C( 111), INT8_C(-105), INT8_C( -93), INT8_C( -51), INT8_C( 60), INT8_C( 97), INT8_C( -89), INT8_C( 84), INT8_C( -74), INT8_C( 11), INT8_C( 54), INT8_C( -91), INT8_C( 93)), simde_mm_set_epi8(INT8_C( 8), INT8_C( 16), INT8_C( 76), INT8_C( 88), INT8_C( -40), INT8_C( -54), INT8_C( -68), INT8_C( -66), INT8_C( 52), INT8_C( 106), INT8_C( 104), INT8_C( -36), INT8_C( 79), INT8_C(-117), INT8_C(-116), INT8_C( 66)) }, { simde_mm_set_epi8(INT8_C( -84), INT8_C( 19), INT8_C(-119), INT8_C( -46), INT8_C( -34), INT8_C(-109), INT8_C( -78), INT8_C(-106), INT8_C(-116), INT8_C( 40), INT8_C( 112), INT8_C( 53), INT8_C( 44), INT8_C( 10), INT8_C( 106), INT8_C( 86)), UINT16_C(13667), simde_mm_set_epi8(INT8_C( -54), INT8_C( -74), INT8_C( 70), INT8_C( -82), INT8_C( -26), INT8_C( 44), INT8_C( 12), INT8_C( 103), INT8_C( 15), INT8_C( 32), INT8_C( 3), INT8_C( -77), INT8_C( -94), INT8_C( 22), INT8_C( -50), INT8_C( -43)), simde_mm_set_epi8(INT8_C( -3), INT8_C( -12), INT8_C( 125), INT8_C( 88), INT8_C( -82), INT8_C( 40), INT8_C( 88), INT8_C( 17), INT8_C( 125), INT8_C(-128), INT8_C( -99), INT8_C(-101), INT8_C( -3), INT8_C( 16), INT8_C( 9), INT8_C( 121)), simde_mm_set_epi8(INT8_C( -84), INT8_C( 19), INT8_C( 103), INT8_C( 49), INT8_C( -34), INT8_C(-116), INT8_C( -78), INT8_C( 77), INT8_C(-116), INT8_C( -85), INT8_C( -68), INT8_C( 53), INT8_C( 44), INT8_C( 10), INT8_C( -28), INT8_C( -20)) }, { simde_mm_set_epi8(INT8_C( 121), INT8_C( -75), INT8_C( -88), INT8_C( 9), INT8_C( 92), INT8_C(-103), INT8_C( 113), INT8_C( 103), INT8_C( 78), INT8_C( 118), INT8_C(-117), INT8_C( 32), INT8_C(-109), INT8_C( 124), INT8_C(-123), INT8_C( -55)), UINT16_C(50954), simde_mm_set_epi8(INT8_C( 23), INT8_C( -20), INT8_C( 101), INT8_C( 49), INT8_C( -3), INT8_C( 66), INT8_C(-123), INT8_C( 122), INT8_C(-127), INT8_C( 22), INT8_C( 125), INT8_C( -47), INT8_C( 61), INT8_C( 11), INT8_C( 117), INT8_C( 23)), simde_mm_set_epi8(INT8_C( 30), INT8_C( 65), INT8_C( 84), INT8_C( 81), INT8_C(-101), INT8_C( -97), INT8_C( 50), INT8_C( -25), INT8_C( 95), INT8_C( 103), INT8_C( -13), INT8_C( -85), INT8_C( 77), INT8_C( 3), INT8_C( 29), INT8_C(-110)), simde_mm_set_epi8(INT8_C( -95), INT8_C( -1), INT8_C( -88), INT8_C( 9), INT8_C( 92), INT8_C( -23), INT8_C(-110), INT8_C( 68), INT8_C( 78), INT8_C( 118), INT8_C(-117), INT8_C( 32), INT8_C( -29), INT8_C( 124), INT8_C( 46), INT8_C( -55)) }, { simde_mm_set_epi8(INT8_C( 46), INT8_C( 44), INT8_C( 122), INT8_C( 80), INT8_C( -75), INT8_C( 74), INT8_C( 120), INT8_C( 80), INT8_C( -90), INT8_C( 48), INT8_C( -67), INT8_C( -1), INT8_C( 46), INT8_C( -13), INT8_C( 100), INT8_C(-128)), UINT16_C(60019), simde_mm_set_epi8(INT8_C( 67), INT8_C( 62), INT8_C( 20), INT8_C( 19), INT8_C( 63), INT8_C( -95), INT8_C(-114), INT8_C( -93), INT8_C( -46), INT8_C(-104), INT8_C( -3), INT8_C( -91), INT8_C( -90), INT8_C( -96), INT8_C( -86), INT8_C( 65)), simde_mm_set_epi8(INT8_C( 118), INT8_C( 124), INT8_C( 95), INT8_C( -87), INT8_C( 46), INT8_C(-105), INT8_C( 28), INT8_C( -77), INT8_C( 35), INT8_C( -75), INT8_C( -22), INT8_C( -22), INT8_C( -29), INT8_C( -57), INT8_C( -40), INT8_C(-108)), simde_mm_set_epi8(INT8_C( 30), INT8_C( 70), INT8_C( -32), INT8_C( 80), INT8_C( -64), INT8_C( 74), INT8_C( 42), INT8_C( 80), INT8_C( -90), INT8_C( 37), INT8_C(-101), INT8_C( 32), INT8_C( 46), INT8_C( -13), INT8_C( 12), INT8_C( -82)) }, { simde_mm_set_epi8(INT8_C( 73), INT8_C( -62), INT8_C( -6), INT8_C( 28), INT8_C( 122), INT8_C( 63), INT8_C( 33), INT8_C( 56), INT8_C( 124), INT8_C(-123), INT8_C( 40), INT8_C( -58), INT8_C( -75), INT8_C(-124), INT8_C( 56), INT8_C( 47)), UINT16_C(59040), simde_mm_set_epi8(INT8_C( -94), INT8_C( -16), INT8_C( 58), INT8_C( 115), INT8_C( 58), INT8_C( 92), INT8_C(-119), INT8_C( 20), INT8_C( -50), INT8_C( -33), INT8_C( 46), INT8_C( 102), INT8_C( 14), INT8_C( 87), INT8_C( -93), INT8_C( 26)), simde_mm_set_epi8(INT8_C(-108), INT8_C( -77), INT8_C( 121), INT8_C( 106), INT8_C( 57), INT8_C( -95), INT8_C(-114), INT8_C( 50), INT8_C( 28), INT8_C( -67), INT8_C( -23), INT8_C( 89), INT8_C( -64), INT8_C( -49), INT8_C( 76), INT8_C( -55)), simde_mm_set_epi8(INT8_C( 90), INT8_C( 31), INT8_C( 127), INT8_C( 28), INT8_C( 122), INT8_C( -26), INT8_C( 73), INT8_C( 56), INT8_C( 107), INT8_C(-123), INT8_C( 109), INT8_C( -58), INT8_C( -75), INT8_C(-124), INT8_C( 56), INT8_C( 47)) }, { simde_mm_set_epi8(INT8_C( 105), INT8_C( 65), INT8_C( 100), INT8_C( 127), INT8_C( -79), INT8_C( 71), INT8_C(-127), INT8_C( 57), INT8_C(-124), INT8_C( 74), INT8_C( -70), INT8_C( 66), INT8_C( 110), INT8_C( 68), INT8_C(-127), INT8_C( 15)), UINT16_C(28581), simde_mm_set_epi8(INT8_C( 115), INT8_C( 16), INT8_C( -82), INT8_C( 55), INT8_C( -37), INT8_C( 123), INT8_C( -34), INT8_C( 120), INT8_C( -28), INT8_C( -74), INT8_C( 93), INT8_C( -65), INT8_C( 18), INT8_C(-107), INT8_C(-126), INT8_C( -54)), simde_mm_set_epi8(INT8_C( -50), INT8_C( -60), INT8_C( -91), INT8_C( 71), INT8_C( 57), INT8_C( 2), INT8_C(-112), INT8_C( 2), INT8_C( -12), INT8_C( 47), INT8_C( -10), INT8_C( -44), INT8_C( 70), INT8_C( 121), INT8_C( -16), INT8_C( 28)), simde_mm_set_epi8(INT8_C( 105), INT8_C( -12), INT8_C( 76), INT8_C( 127), INT8_C( 120), INT8_C( -10), INT8_C( 1), INT8_C( -16), INT8_C(-113), INT8_C( 74), INT8_C( -62), INT8_C( 66), INT8_C( 110), INT8_C(-123), INT8_C(-127), INT8_C( 27)) }, { simde_mm_set_epi8(INT8_C( -16), INT8_C( 27), INT8_C( 91), INT8_C( -68), INT8_C( -69), INT8_C( -47), INT8_C( -9), INT8_C( -68), INT8_C( -93), INT8_C( 57), INT8_C( -55), INT8_C( -75), INT8_C( 14), INT8_C( -42), INT8_C( 18), INT8_C( 98)), UINT16_C(11737), simde_mm_set_epi8(INT8_C( 67), INT8_C( 65), INT8_C( 46), INT8_C( 78), INT8_C( 7), INT8_C( -97), INT8_C(-111), INT8_C( -83), INT8_C( 116), INT8_C( 51), INT8_C( -98), INT8_C( 85), INT8_C( -36), INT8_C( 121), INT8_C( 86), INT8_C( 14)), simde_mm_set_epi8(INT8_C( -29), INT8_C( 35), INT8_C( 25), INT8_C( -17), INT8_C( -88), INT8_C( 94), INT8_C( 10), INT8_C( 30), INT8_C( -99), INT8_C( -94), INT8_C( 3), INT8_C( 6), INT8_C( 13), INT8_C(-105), INT8_C( -54), INT8_C( 42)), simde_mm_set_epi8(INT8_C( -16), INT8_C( 27), INT8_C(-109), INT8_C( -68), INT8_C( 117), INT8_C( 4), INT8_C( -9), INT8_C( -30), INT8_C( 59), INT8_C( -76), INT8_C( -55), INT8_C( -27), INT8_C( 59), INT8_C( -42), INT8_C( 18), INT8_C( -73)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_mask_gf2p8mul_epi8(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_mask_gf2p8mul_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i src; simde__mmask32 k; simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi8(INT8_C( -91), INT8_C( -2), INT8_C( -76), INT8_C( 64), INT8_C( -37), INT8_C( 76), INT8_C( -74), INT8_C( 84), INT8_C( 46), INT8_C(-109), INT8_C( -48), INT8_C( -25), INT8_C( -53), INT8_C( 4), INT8_C( 47), INT8_C( -25), INT8_C( 127), INT8_C( 44), INT8_C(-128), INT8_C( -9), INT8_C( 2), INT8_C( -74), INT8_C( 109), INT8_C( -46), INT8_C( 68), INT8_C( -38), INT8_C( 39), INT8_C( -69), INT8_C( -24), INT8_C( -82), INT8_C( 77), INT8_C( 53)), UINT32_C( 192195297), simde_mm256_set_epi8(INT8_C( 25), INT8_C( 115), INT8_C( -10), INT8_C(-115), INT8_C( 42), INT8_C( 35), INT8_C( 71), INT8_C( 118), INT8_C( 2), INT8_C( 3), INT8_C( -13), INT8_C( 18), INT8_C( -70), INT8_C( 47), INT8_C( -37), INT8_C( -23), INT8_C( -75), INT8_C( 90), INT8_C( -46), INT8_C( -66), INT8_C( 88), INT8_C( 127), INT8_C( 1), INT8_C( 80), INT8_C(-110), INT8_C(-125), INT8_C( -55), INT8_C( 121), INT8_C( 27), INT8_C( 18), INT8_C( -47), INT8_C( -14)), simde_mm256_set_epi8(INT8_C( 44), INT8_C( -43), INT8_C( 19), INT8_C(-117), INT8_C( 72), INT8_C(-122), INT8_C( 32), INT8_C( -8), INT8_C( 49), INT8_C( 100), INT8_C(-117), INT8_C( -73), INT8_C( 86), INT8_C(-117), INT8_C( 93), INT8_C( -15), INT8_C( 32), INT8_C( 41), INT8_C( 52), INT8_C( 102), INT8_C(-116), INT8_C( 95), INT8_C(-110), INT8_C(-100), INT8_C( -46), INT8_C( -18), INT8_C( 18), INT8_C( 29), INT8_C( 59), INT8_C( -3), INT8_C(-126), INT8_C(-100)), simde_mm256_set_epi8(INT8_C( -91), INT8_C( -2), INT8_C( -76), INT8_C( 64), INT8_C( 37), INT8_C( 76), INT8_C( 56), INT8_C( 105), INT8_C( 46), INT8_C( -84), INT8_C( -24), INT8_C( -16), INT8_C( -53), INT8_C( 120), INT8_C( 47), INT8_C( -25), INT8_C( 81), INT8_C( 44), INT8_C( -17), INT8_C( -9), INT8_C( 116), INT8_C( -74), INT8_C(-110), INT8_C( -46), INT8_C( 36), INT8_C( 20), INT8_C( -83), INT8_C( -69), INT8_C( -24), INT8_C( -82), INT8_C( 77), INT8_C( 50)) }, { simde_mm256_set_epi8(INT8_C( -98), INT8_C( -77), INT8_C(-113), INT8_C( 36), INT8_C( -15), INT8_C( 69), INT8_C( 25), INT8_C( 84), INT8_C( -26), INT8_C( 60), INT8_C(-123), INT8_C( -24), INT8_C( 102), INT8_C( -21), INT8_C( -59), INT8_C( -55), INT8_C( 60), INT8_C( -75), INT8_C( 43), INT8_C( 127), INT8_C( -69), INT8_C( 6), INT8_C( -47), INT8_C( 20), INT8_C( -17), INT8_C( 115), INT8_C( -76), INT8_C( 83), INT8_C( 65), INT8_C( 21), INT8_C( -52), INT8_C( 111)), UINT32_C(4285006847), simde_mm256_set_epi8(INT8_C( -30), INT8_C( 116), INT8_C( -42), INT8_C( -27), INT8_C( 76), INT8_C( 18), INT8_C( 4), INT8_C( 119), INT8_C( -3), INT8_C( 32), INT8_C( -48), INT8_C( -28), INT8_C( -49), INT8_C( -35), INT8_C( -48), INT8_C( 36), INT8_C( 73), INT8_C( -87), INT8_C( 24), INT8_C( 25), INT8_C( -95), INT8_C( -9), INT8_C( 114), INT8_C( 29), INT8_C( -98), INT8_C( -68), INT8_C( -90), INT8_C( -92), INT8_C( 36), INT8_C( 3), INT8_C( -12), INT8_C( 68)), simde_mm256_set_epi8(INT8_C( 73), INT8_C( 22), INT8_C( -48), INT8_C(-104), INT8_C( 37), INT8_C( 121), INT8_C( 59), INT8_C( -39), INT8_C( 75), INT8_C( -53), INT8_C( 54), INT8_C( 71), INT8_C( 86), INT8_C( 79), INT8_C(-116), INT8_C(-116), INT8_C( 82), INT8_C( 60), INT8_C( -13), INT8_C( 91), INT8_C( 29), INT8_C( -30), INT8_C( -14), INT8_C( 53), INT8_C( 60), INT8_C( -17), INT8_C( 18), INT8_C( 121), INT8_C( 109), INT8_C( -61), INT8_C( -32), INT8_C( -37)), simde_mm256_set_epi8(INT8_C( 13), INT8_C( 34), INT8_C( -4), INT8_C( 113), INT8_C( 36), INT8_C( 35), INT8_C( -20), INT8_C(-124), INT8_C( -26), INT8_C( 8), INT8_C( 60), INT8_C( -24), INT8_C( -37), INT8_C( -21), INT8_C( -59), INT8_C( -55), INT8_C( 60), INT8_C( -75), INT8_C( 43), INT8_C( 127), INT8_C( -69), INT8_C( 6), INT8_C( -3), INT8_C( 47), INT8_C( -73), INT8_C(-125), INT8_C( -39), INT8_C(-108), INT8_C( -96), INT8_C( 94), INT8_C( 114), INT8_C( 61)) }, { simde_mm256_set_epi8(INT8_C( -22), INT8_C( -97), INT8_C( -20), INT8_C( -42), INT8_C( 103), INT8_C( -36), INT8_C( 60), INT8_C( 86), INT8_C( 45), INT8_C( 81), INT8_C(-106), INT8_C( -30), INT8_C(-122), INT8_C( -41), INT8_C( -20), INT8_C( 2), INT8_C( -90), INT8_C( -62), INT8_C( 50), INT8_C( 118), INT8_C( -23), INT8_C(-115), INT8_C( -58), INT8_C( 17), INT8_C( 34), INT8_C( -55), INT8_C( 56), INT8_C(-123), INT8_C( -62), INT8_C( -51), INT8_C( -75), INT8_C( -84)), UINT32_C( 326632141), simde_mm256_set_epi8(INT8_C( -1), INT8_C(-103), INT8_C( 74), INT8_C( 71), INT8_C( 102), INT8_C( 79), INT8_C( -88), INT8_C( -21), INT8_C( -55), INT8_C( -47), INT8_C( 60), INT8_C(-128), INT8_C( -92), INT8_C( 4), INT8_C(-127), INT8_C( 39), INT8_C( 48), INT8_C( -18), INT8_C( -62), INT8_C( -30), INT8_C( -55), INT8_C( 85), INT8_C( -74), INT8_C( -71), INT8_C( 33), INT8_C( -48), INT8_C( -45), INT8_C( 13), INT8_C( 65), INT8_C( -14), INT8_C( 34), INT8_C( -49)), simde_mm256_set_epi8(INT8_C( 59), INT8_C( 113), INT8_C( 22), INT8_C(-109), INT8_C( 95), INT8_C( 41), INT8_C( -94), INT8_C( 123), INT8_C( 74), INT8_C( -82), INT8_C( 77), INT8_C( -25), INT8_C( 83), INT8_C( -15), INT8_C( 28), INT8_C( -50), INT8_C(-128), INT8_C( -11), INT8_C( 57), INT8_C( -2), INT8_C(-108), INT8_C( 106), INT8_C( 35), INT8_C( 1), INT8_C( 5), INT8_C( -6), INT8_C(-104), INT8_C( -92), INT8_C( -39), INT8_C( 110), INT8_C( -82), INT8_C(-103)), simde_mm256_set_epi8(INT8_C( -22), INT8_C( -97), INT8_C( -20), INT8_C( 53), INT8_C( 103), INT8_C( -36), INT8_C( -54), INT8_C( -95), INT8_C( 45), INT8_C( -24), INT8_C( -82), INT8_C( -47), INT8_C( -41), INT8_C( -41), INT8_C( -20), INT8_C( 2), INT8_C( -90), INT8_C( -62), INT8_C( 50), INT8_C( 118), INT8_C( -23), INT8_C(-115), INT8_C( -16), INT8_C( 17), INT8_C( -91), INT8_C( 14), INT8_C( 56), INT8_C(-123), INT8_C( 37), INT8_C( 112), INT8_C( -75), INT8_C( -65)) }, { simde_mm256_set_epi8(INT8_C( -69), INT8_C( -5), INT8_C( 96), INT8_C( -19), INT8_C( -24), INT8_C( 46), INT8_C(-107), INT8_C( -57), INT8_C( 92), INT8_C( -53), INT8_C( -87), INT8_C( 121), INT8_C(-124), INT8_C( 40), INT8_C( 24), INT8_C(-107), INT8_C(-103), INT8_C( -6), INT8_C( -77), INT8_C( -88), INT8_C( 21), INT8_C( 39), INT8_C( 120), INT8_C( -43), INT8_C(-111), INT8_C(-107), INT8_C( 26), INT8_C( 43), INT8_C( 101), INT8_C( 24), INT8_C( -62), INT8_C( -16)), UINT32_C(4165065595), simde_mm256_set_epi8(INT8_C( -81), INT8_C( 75), INT8_C( 109), INT8_C( -86), INT8_C(-107), INT8_C( 7), INT8_C( -83), INT8_C( 113), INT8_C(-128), INT8_C( -49), INT8_C( -84), INT8_C( 113), INT8_C( -36), INT8_C( 21), INT8_C( 99), INT8_C( 100), INT8_C( 66), INT8_C( 114), INT8_C(-105), INT8_C( -62), INT8_C( -37), INT8_C( 48), INT8_C( 65), INT8_C(-118), INT8_C( -84), INT8_C( 37), INT8_C( -40), INT8_C( 48), INT8_C( -82), INT8_C( 127), INT8_C( 21), INT8_C( -35)), simde_mm256_set_epi8(INT8_C( 76), INT8_C( -66), INT8_C(-100), INT8_C( -14), INT8_C( -13), INT8_C( -55), INT8_C( 35), INT8_C( -56), INT8_C( 29), INT8_C( 22), INT8_C( 29), INT8_C( 96), INT8_C(-115), INT8_C( -69), INT8_C( -39), INT8_C( -34), INT8_C(-111), INT8_C( -78), INT8_C( 105), INT8_C( 29), INT8_C( 63), INT8_C( -14), INT8_C( 47), INT8_C( -81), INT8_C( -23), INT8_C( 40), INT8_C( 110), INT8_C( 29), INT8_C( -69), INT8_C( 86), INT8_C( 67), INT8_C( 123)), simde_mm256_set_epi8(INT8_C( -3), INT8_C( 10), INT8_C( 92), INT8_C( -8), INT8_C(-124), INT8_C( 46), INT8_C(-107), INT8_C( -57), INT8_C( 92), INT8_C( -48), INT8_C( -87), INT8_C( 121), INT8_C(-124), INT8_C( 40), INT8_C( 24), INT8_C( -20), INT8_C( 88), INT8_C( 98), INT8_C( -77), INT8_C( 57), INT8_C(-108), INT8_C( 39), INT8_C( 26), INT8_C( -70), INT8_C(-111), INT8_C( -1), INT8_C( -98), INT8_C( 70), INT8_C(-116), INT8_C( 24), INT8_C( 8), INT8_C( -88)) }, { simde_mm256_set_epi8(INT8_C(-114), INT8_C( -40), INT8_C( -54), INT8_C( -97), INT8_C( -71), INT8_C( 124), INT8_C( 89), INT8_C( -34), INT8_C( 70), INT8_C( -84), INT8_C( 86), INT8_C(-125), INT8_C( 53), INT8_C( 93), INT8_C( -15), INT8_C( -38), INT8_C( -17), INT8_C( -84), INT8_C( -87), INT8_C( -41), INT8_C( 4), INT8_C( 6), INT8_C( -55), INT8_C( 16), INT8_C( 79), INT8_C( 94), INT8_C( -49), INT8_C( 7), INT8_C( 36), INT8_C( 55), INT8_C( 3), INT8_C( -4)), UINT32_C( 494773061), simde_mm256_set_epi8(INT8_C( -26), INT8_C( 33), INT8_C( -10), INT8_C(-107), INT8_C( -30), INT8_C( 42), INT8_C( 123), INT8_C( -24), INT8_C( -15), INT8_C( 125), INT8_C( 24), INT8_C( 60), INT8_C( 111), INT8_C( -28), INT8_C( -11), INT8_C( -35), INT8_C( -64), INT8_C( 64), INT8_C( 89), INT8_C( -4), INT8_C(-116), INT8_C( 18), INT8_C( 31), INT8_C( 82), INT8_C( -22), INT8_C( 38), INT8_C( 77), INT8_C(-108), INT8_C( -27), INT8_C( -92), INT8_C( 86), INT8_C(-118)), simde_mm256_set_epi8(INT8_C( -6), INT8_C(-119), INT8_C( 65), INT8_C( -71), INT8_C( 49), INT8_C( -54), INT8_C( 9), INT8_C( 31), INT8_C( 64), INT8_C( 27), INT8_C(-127), INT8_C( -4), INT8_C( -16), INT8_C( -20), INT8_C( 22), INT8_C( -73), INT8_C( 107), INT8_C( -27), INT8_C( 46), INT8_C(-114), INT8_C( -84), INT8_C( -92), INT8_C( 17), INT8_C( -19), INT8_C( 85), INT8_C( 89), INT8_C( -54), INT8_C( -15), INT8_C( -27), INT8_C(-123), INT8_C( -97), INT8_C( 55)), simde_mm256_set_epi8(INT8_C(-114), INT8_C( -40), INT8_C( -54), INT8_C( 11), INT8_C( 31), INT8_C( -83), INT8_C( 89), INT8_C( -83), INT8_C( 70), INT8_C( -45), INT8_C( -84), INT8_C(-105), INT8_C( 92), INT8_C( 44), INT8_C( -15), INT8_C( 113), INT8_C(-108), INT8_C( -84), INT8_C( -26), INT8_C( -41), INT8_C( 4), INT8_C( 6), INT8_C( -12), INT8_C( -64), INT8_C( 79), INT8_C( 24), INT8_C( -49), INT8_C( 7), INT8_C( 36), INT8_C( 5), INT8_C( 3), INT8_C( 19)) }, { simde_mm256_set_epi8(INT8_C( 106), INT8_C( 11), INT8_C( 13), INT8_C( 35), INT8_C( 50), INT8_C( 77), INT8_C( -99), INT8_C( 106), INT8_C( 71), INT8_C( -97), INT8_C( 97), INT8_C( 115), INT8_C( 125), INT8_C( -47), INT8_C( -39), INT8_C( -88), INT8_C( 107), INT8_C( 0), INT8_C( 12), INT8_C( 1), INT8_C( -45), INT8_C( -47), INT8_C( 108), INT8_C( 7), INT8_C( -30), INT8_C( 56), INT8_C( -69), INT8_C( 109), INT8_C( 62), INT8_C( 124), INT8_C(-119), INT8_C( 13)), UINT32_C( 992532866), simde_mm256_set_epi8(INT8_C( 33), INT8_C( 26), INT8_C( 71), INT8_C( -5), INT8_C(-126), INT8_C(-109), INT8_C( 104), INT8_C( 16), INT8_C( 76), INT8_C( -14), INT8_C( -26), INT8_C( -63), INT8_C( 100), INT8_C( 82), INT8_C( 9), INT8_C( 30), INT8_C(-118), INT8_C( 84), INT8_C( -83), INT8_C( 95), INT8_C( 80), INT8_C( -48), INT8_C(-127), INT8_C( -15), INT8_C( -58), INT8_C( 36), INT8_C(-124), INT8_C( 71), INT8_C( -64), INT8_C( -55), INT8_C( 46), INT8_C( 5)), simde_mm256_set_epi8(INT8_C( 25), INT8_C( 72), INT8_C( -18), INT8_C( -20), INT8_C(-114), INT8_C( -17), INT8_C(-113), INT8_C( -97), INT8_C( 115), INT8_C( 3), INT8_C( 72), INT8_C( -46), INT8_C( -91), INT8_C( -72), INT8_C( -23), INT8_C( 44), INT8_C( -40), INT8_C(-122), INT8_C( 119), INT8_C( -9), INT8_C( -14), INT8_C( -27), INT8_C( 124), INT8_C( 49), INT8_C(-122), INT8_C( 72), INT8_C( 86), INT8_C( 3), INT8_C(-121), INT8_C( 78), INT8_C( -16), INT8_C( -20)), simde_mm256_set_epi8(INT8_C( 106), INT8_C( 11), INT8_C( 47), INT8_C( -3), INT8_C( -36), INT8_C( 77), INT8_C( -28), INT8_C( 51), INT8_C( 71), INT8_C( -97), INT8_C( -44), INT8_C( 115), INT8_C( 11), INT8_C( -47), INT8_C( -39), INT8_C( -88), INT8_C( 82), INT8_C( 64), INT8_C( 12), INT8_C( 106), INT8_C( 107), INT8_C( -47), INT8_C( 108), INT8_C( 113), INT8_C( 88), INT8_C( 56), INT8_C( -69), INT8_C( 109), INT8_C( 62), INT8_C( 124), INT8_C( -2), INT8_C( 13)) }, { simde_mm256_set_epi8(INT8_C( -51), INT8_C( 19), INT8_C( 98), INT8_C( -5), INT8_C( -37), INT8_C( -88), INT8_C( -92), INT8_C( -13), INT8_C( -69), INT8_C( -50), INT8_C( 32), INT8_C(-117), INT8_C( 116), INT8_C(-102), INT8_C(-124), INT8_C( -28), INT8_C( 73), INT8_C( 123), INT8_C( 43), INT8_C( -7), INT8_C( -30), INT8_C( -95), INT8_C( 90), INT8_C( 120), INT8_C( 95), INT8_C( 109), INT8_C( -55), INT8_C(-125), INT8_C( 104), INT8_C( -93), INT8_C( 0), INT8_C( 30)), UINT32_C( 261247883), simde_mm256_set_epi8(INT8_C( 49), INT8_C( 93), INT8_C( 31), INT8_C( -47), INT8_C( 118), INT8_C( 121), INT8_C(-116), INT8_C( 80), INT8_C( 41), INT8_C(-105), INT8_C( 111), INT8_C( -16), INT8_C( 123), INT8_C( 66), INT8_C( -25), INT8_C(-128), INT8_C( -10), INT8_C( -38), INT8_C( -67), INT8_C( -19), INT8_C( -21), INT8_C( 89), INT8_C( 36), INT8_C( 76), INT8_C( -50), INT8_C( 9), INT8_C( 35), INT8_C( 67), INT8_C( 104), INT8_C( -68), INT8_C( 121), INT8_C( -77)), simde_mm256_set_epi8(INT8_C( 35), INT8_C( 21), INT8_C( 83), INT8_C( -76), INT8_C( 58), INT8_C( -71), INT8_C( 4), INT8_C( 1), INT8_C( 112), INT8_C( -37), INT8_C( -98), INT8_C( 116), INT8_C(-115), INT8_C( -49), INT8_C( 48), INT8_C( 61), INT8_C( -87), INT8_C( -23), INT8_C( -78), INT8_C( 61), INT8_C(-103), INT8_C(-124), INT8_C( 22), INT8_C( -33), INT8_C(-126), INT8_C( 48), INT8_C( 116), INT8_C( 77), INT8_C( -81), INT8_C(-109), INT8_C( -93), INT8_C( 18)), simde_mm256_set_epi8(INT8_C( -51), INT8_C( 19), INT8_C( 98), INT8_C( -5), INT8_C( 18), INT8_C( 38), INT8_C( 6), INT8_C( 80), INT8_C( 95), INT8_C( -50), INT8_C( 32), INT8_C( 15), INT8_C( 116), INT8_C(-102), INT8_C( 13), INT8_C( -28), INT8_C( 73), INT8_C( 10), INT8_C( 43), INT8_C( -85), INT8_C( -30), INT8_C( -95), INT8_C( -82), INT8_C(-108), INT8_C( 17), INT8_C( 109), INT8_C( -55), INT8_C(-125), INT8_C( 75), INT8_C( -93), INT8_C( -32), INT8_C( -72)) }, { simde_mm256_set_epi8(INT8_C(-122), INT8_C( 7), INT8_C(-104), INT8_C( -70), INT8_C( 100), INT8_C( 82), INT8_C( 124), INT8_C(-114), INT8_C( -43), INT8_C( -26), INT8_C( 6), INT8_C( -56), INT8_C( 88), INT8_C( -11), INT8_C( 17), INT8_C( 70), INT8_C( -71), INT8_C( 77), INT8_C( 118), INT8_C( 52), INT8_C( 40), INT8_C( 53), INT8_C( -66), INT8_C( 96), INT8_C( 16), INT8_C( 115), INT8_C( 73), INT8_C(-110), INT8_C( 117), INT8_C( -92), INT8_C( -4), INT8_C( -36)), UINT32_C(3600672243), simde_mm256_set_epi8(INT8_C( 26), INT8_C( 11), INT8_C( 41), INT8_C( 33), INT8_C(-112), INT8_C( 22), INT8_C( 74), INT8_C( -31), INT8_C( 40), INT8_C( -36), INT8_C( 102), INT8_C( 98), INT8_C( -33), INT8_C( -94), INT8_C(-102), INT8_C( 123), INT8_C( 15), INT8_C( 121), INT8_C( -65), INT8_C( -97), INT8_C( -4), INT8_C( -34), INT8_C( -4), INT8_C( -26), INT8_C( 55), INT8_C( -78), INT8_C(-101), INT8_C( 114), INT8_C(-106), INT8_C( -60), INT8_C( -56), INT8_C( 45)), simde_mm256_set_epi8(INT8_C( -62), INT8_C( 113), INT8_C(-120), INT8_C( 75), INT8_C( -11), INT8_C( -23), INT8_C( -14), INT8_C( -91), INT8_C( 98), INT8_C( 84), INT8_C(-127), INT8_C( -73), INT8_C( -23), INT8_C( 19), INT8_C( -28), INT8_C( 10), INT8_C( -94), INT8_C( 52), INT8_C( -78), INT8_C( 109), INT8_C( -50), INT8_C( 112), INT8_C( -89), INT8_C( 81), INT8_C(-117), INT8_C( -78), INT8_C(-116), INT8_C( 16), INT8_C( 2), INT8_C( 54), INT8_C( 39), INT8_C( -22)), simde_mm256_set_epi8(INT8_C( 65), INT8_C( 54), INT8_C(-104), INT8_C( -24), INT8_C( 100), INT8_C( 82), INT8_C( -4), INT8_C(-114), INT8_C( -55), INT8_C( -26), INT8_C( 6), INT8_C( 70), INT8_C( 106), INT8_C( 51), INT8_C( 17), INT8_C( 3), INT8_C( 36), INT8_C(-105), INT8_C( 66), INT8_C( 52), INT8_C( 40), INT8_C( -10), INT8_C( -66), INT8_C( -95), INT8_C( 36), INT8_C( -23), INT8_C( 91), INT8_C( 97), INT8_C( 117), INT8_C( -92), INT8_C( 38), INT8_C( 58)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_mask_gf2p8mul_epi8(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_gf2p8mul_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask64 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi8(INT8_C( -60), INT8_C( 108), INT8_C( -83), INT8_C( -82), INT8_C( 83), INT8_C( 115), INT8_C( 91), INT8_C( 16), INT8_C( -37), INT8_C( -12), INT8_C( -15), INT8_C( -3), INT8_C( 11), INT8_C( -4), INT8_C( -51), INT8_C( 33), INT8_C( 47), INT8_C( 126), INT8_C(-125), INT8_C( -56), INT8_C( -7), INT8_C( 90), INT8_C( -35), INT8_C( -10), INT8_C(-123), INT8_C( -76), INT8_C( -34), INT8_C( 14), INT8_C( 81), INT8_C(-107), INT8_C( 67), INT8_C( 83), INT8_C( 111), INT8_C( -30), INT8_C( 29), INT8_C( -75), INT8_C( -31), INT8_C( -33), INT8_C( 84), INT8_C( 119), INT8_C(-127), INT8_C( -76), INT8_C( 60), INT8_C( 55), INT8_C( -47), INT8_C( -11), INT8_C( -2), INT8_C( -56), INT8_C( -92), INT8_C( 40), INT8_C( 0), INT8_C(-123), INT8_C( 2), INT8_C( 48), INT8_C(-127), INT8_C(-114), INT8_C( 34), INT8_C( 5), INT8_C( 111), INT8_C( -64), INT8_C( 86), INT8_C( 93), INT8_C( -59), INT8_C( 72)), UINT64_C( 9042485797202566251), simde_mm512_set_epi8(INT8_C( -52), INT8_C( 16), INT8_C( -41), INT8_C( 113), INT8_C( 80), INT8_C( -23), INT8_C( 111), INT8_C(-116), INT8_C( 66), INT8_C( 19), INT8_C( 99), INT8_C( 47), INT8_C(-121), INT8_C( 52), INT8_C( 55), INT8_C( 84), INT8_C(-118), INT8_C( -50), INT8_C( 104), INT8_C( 17), INT8_C( 60), INT8_C( 70), INT8_C( 15), INT8_C( -35), INT8_C(-110), INT8_C( -93), INT8_C( 9), INT8_C( -88), INT8_C( 112), INT8_C( -61), INT8_C( -39), INT8_C( 11), INT8_C( -74), INT8_C(-114), INT8_C( -45), INT8_C( 55), INT8_C( 62), INT8_C( 126), INT8_C( -20), INT8_C( 55), INT8_C( -72), INT8_C( 75), INT8_C(-120), INT8_C(-119), INT8_C( 123), INT8_C( -34), INT8_C( -73), INT8_C(-123), INT8_C(-101), INT8_C( -43), INT8_C(-116), INT8_C( -63), INT8_C( -10), INT8_C( -51), INT8_C( -93), INT8_C( -84), INT8_C( 47), INT8_C(-117), INT8_C( -67), INT8_C( -19), INT8_C( 61), INT8_C( 13), INT8_C( 12), INT8_C( 114)), simde_mm512_set_epi8(INT8_C( -47), INT8_C( 117), INT8_C( 127), INT8_C( 68), INT8_C( -69), INT8_C( 117), INT8_C( 102), INT8_C( -21), INT8_C( -16), INT8_C( -38), INT8_C( 121), INT8_C( 93), INT8_C( 29), INT8_C( -6), INT8_C( 16), INT8_C( -65), INT8_C( -80), INT8_C( 4), INT8_C( -36), INT8_C(-112), INT8_C( 9), INT8_C( -7), INT8_C( 94), INT8_C( -68), INT8_C( -5), INT8_C( -87), INT8_C( 68), INT8_C( 24), INT8_C( -32), INT8_C(-119), INT8_C( 106), INT8_C( 64), INT8_C( -62), INT8_C( -70), INT8_C( 123), INT8_C( 13), INT8_C( -44), INT8_C( -1), INT8_C(-119), INT8_C( -23), INT8_C( 42), INT8_C( -86), INT8_C( -57), INT8_C(-127), INT8_C( -75), INT8_C( 82), INT8_C( 117), INT8_C( 8), INT8_C( -85), INT8_C( 58), INT8_C( -87), INT8_C( 32), INT8_C( 52), INT8_C( -70), INT8_C( -17), INT8_C(-115), INT8_C( 97), INT8_C( 94), INT8_C( -99), INT8_C( 121), INT8_C( -25), INT8_C( -15), INT8_C( -40), INT8_C( -25)), simde_mm512_set_epi8(INT8_C( -60), INT8_C( 17), INT8_C( -38), INT8_C(-128), INT8_C( 74), INT8_C( 30), INT8_C( 91), INT8_C( 19), INT8_C( -37), INT8_C( 122), INT8_C( 91), INT8_C( 3), INT8_C( 81), INT8_C(-114), INT8_C( -51), INT8_C( -37), INT8_C( 47), INT8_C( 21), INT8_C(-125), INT8_C( 83), INT8_C( -57), INT8_C( 4), INT8_C( -35), INT8_C( -10), INT8_C(-105), INT8_C( -63), INT8_C( 82), INT8_C( 14), INT8_C( -93), INT8_C( -51), INT8_C( 67), INT8_C( 83), INT8_C( 111), INT8_C(-120), INT8_C( 92), INT8_C( -75), INT8_C( -31), INT8_C(-119), INT8_C( 84), INT8_C( 119), INT8_C( 54), INT8_C( -31), INT8_C( 60), INT8_C( 55), INT8_C( -47), INT8_C( -49), INT8_C( -21), INT8_C( -56), INT8_C( -92), INT8_C( 40), INT8_C( 0), INT8_C( 83), INT8_C( -27), INT8_C( 18), INT8_C(-127), INT8_C(-114), INT8_C( 34), INT8_C( -16), INT8_C( -10), INT8_C( -64), INT8_C( 34), INT8_C( 93), INT8_C( -41), INT8_C( 61)) }, { simde_mm512_set_epi8(INT8_C( 44), INT8_C( 82), INT8_C( 126), INT8_C( -9), INT8_C( -66), INT8_C( 2), INT8_C( -88), INT8_C( 110), INT8_C(-110), INT8_C( -36), INT8_C( 43), INT8_C( 92), INT8_C( 101), INT8_C( -61), INT8_C( 81), INT8_C( -12), INT8_C( 79), INT8_C( 58), INT8_C( 2), INT8_C(-105), INT8_C( 15), INT8_C(-109), INT8_C( -77), INT8_C( 32), INT8_C( 15), INT8_C( 8), INT8_C( -14), INT8_C(-128), INT8_C( -25), INT8_C(-113), INT8_C( 0), INT8_C( 20), INT8_C( 104), INT8_C(-104), INT8_C( 23), INT8_C( -53), INT8_C( -95), INT8_C( 23), INT8_C( -13), INT8_C(-106), INT8_C(-108), INT8_C(-108), INT8_C(-107), INT8_C( 102), INT8_C( 55), INT8_C( 5), INT8_C( -1), INT8_C( 122), INT8_C( 107), INT8_C( -68), INT8_C( 21), INT8_C( -60), INT8_C( 19), INT8_C( 79), INT8_C( 73), INT8_C( 69), INT8_C( -76), INT8_C( 116), INT8_C( -6), INT8_C( 121), INT8_C( 60), INT8_C( 67), INT8_C( 112), INT8_C( -18)), UINT64_C( 73569253378751484), simde_mm512_set_epi8(INT8_C( 6), INT8_C( -84), INT8_C( 92), INT8_C( -25), INT8_C( 18), INT8_C( 109), INT8_C( -63), INT8_C( -13), INT8_C( -9), INT8_C( -29), INT8_C( -53), INT8_C( -70), INT8_C( 94), INT8_C( 29), INT8_C( 26), INT8_C( 82), INT8_C( 92), INT8_C( -86), INT8_C( 48), INT8_C( 61), INT8_C( -99), INT8_C( 106), INT8_C(-120), INT8_C( 116), INT8_C( 75), INT8_C(-105), INT8_C(-107), INT8_C( 104), INT8_C( -27), INT8_C( -81), INT8_C( -23), INT8_C( -51), INT8_C( -4), INT8_C( 48), INT8_C( -8), INT8_C( -41), INT8_C( 5), INT8_C(-113), INT8_C( 112), INT8_C( -40), INT8_C( -87), INT8_C(-117), INT8_C( 117), INT8_C( 50), INT8_C( 100), INT8_C( 10), INT8_C(-102), INT8_C( -59), INT8_C( -1), INT8_C( 33), INT8_C( 78), INT8_C( 5), INT8_C( 44), INT8_C( 41), INT8_C( -90), INT8_C(-111), INT8_C( 108), INT8_C( 66), INT8_C( -27), INT8_C( 52), INT8_C( -58), INT8_C( -74), INT8_C( 9), INT8_C( -86)), simde_mm512_set_epi8(INT8_C( 75), INT8_C( 87), INT8_C( 76), INT8_C( 1), INT8_C( 102), INT8_C( 81), INT8_C( 11), INT8_C( 95), INT8_C( 68), INT8_C(-126), INT8_C( 16), INT8_C(-115), INT8_C(-103), INT8_C( 28), INT8_C( -85), INT8_C( -40), INT8_C( -73), INT8_C(-106), INT8_C( 13), INT8_C( 80), INT8_C( 3), INT8_C( 86), INT8_C( -66), INT8_C( 43), INT8_C( -82), INT8_C( 33), INT8_C( 75), INT8_C( 49), INT8_C( -19), INT8_C( 105), INT8_C( -42), INT8_C( 105), INT8_C( -47), INT8_C(-126), INT8_C( 47), INT8_C( -64), INT8_C( 100), INT8_C( -59), INT8_C( 87), INT8_C( -74), INT8_C( 110), INT8_C( -19), INT8_C( 46), INT8_C( 69), INT8_C( -19), INT8_C( 50), INT8_C( 107), INT8_C( -50), INT8_C( 109), INT8_C( -80), INT8_C( 36), INT8_C( -57), INT8_C( 37), INT8_C( 115), INT8_C(-117), INT8_C( 90), INT8_C( -63), INT8_C( -76), INT8_C( 78), INT8_C( -35), INT8_C( 32), INT8_C(-106), INT8_C( 123), INT8_C( -25)), simde_mm512_set_epi8(INT8_C( 44), INT8_C( 82), INT8_C( 126), INT8_C( -9), INT8_C( -66), INT8_C( 2), INT8_C( -88), INT8_C( 13), INT8_C(-110), INT8_C( -36), INT8_C( 43), INT8_C( 92), INT8_C( 101), INT8_C( 87), INT8_C( 81), INT8_C( 56), INT8_C( 79), INT8_C( -39), INT8_C( 2), INT8_C( 36), INT8_C( -68), INT8_C( 88), INT8_C( 47), INT8_C( 32), INT8_C( -42), INT8_C( -22), INT8_C( -14), INT8_C( 29), INT8_C( 37), INT8_C(-113), INT8_C( 0), INT8_C( 20), INT8_C( 36), INT8_C(-104), INT8_C( 23), INT8_C( -13), INT8_C( -17), INT8_C( 126), INT8_C( 21), INT8_C(-106), INT8_C(-108), INT8_C(-108), INT8_C(-107), INT8_C( 102), INT8_C( 55), INT8_C( -17), INT8_C( 88), INT8_C( 6), INT8_C( -6), INT8_C( 65), INT8_C( 32), INT8_C( -10), INT8_C( 107), INT8_C( 79), INT8_C( -86), INT8_C( 111), INT8_C(-114), INT8_C(-111), INT8_C( 100), INT8_C( -40), INT8_C( -77), INT8_C( -56), INT8_C( 112), INT8_C( -18)) }, { simde_mm512_set_epi8(INT8_C( 60), INT8_C(-120), INT8_C( -42), INT8_C(-124), INT8_C( 8), INT8_C( -37), INT8_C(-106), INT8_C( 14), INT8_C( 67), INT8_C( -50), INT8_C( 121), INT8_C( 84), INT8_C( -68), INT8_C( -98), INT8_C( -25), INT8_C(-109), INT8_C( 45), INT8_C( 109), INT8_C( 12), INT8_C( 96), INT8_C( -57), INT8_C( 107), INT8_C( -54), INT8_C(-115), INT8_C( 41), INT8_C( -50), INT8_C( 31), INT8_C( -53), INT8_C( 127), INT8_C( 89), INT8_C( -60), INT8_C(-105), INT8_C( 82), INT8_C( 80), INT8_C(-112), INT8_C( -28), INT8_C( 60), INT8_C( 46), INT8_C( 78), INT8_C( 54), INT8_C( 44), INT8_C( 100), INT8_C( 93), INT8_C( -69), INT8_C( 103), INT8_C( 22), INT8_C( -91), INT8_C(-117), INT8_C( 6), INT8_C( 11), INT8_C( 74), INT8_C( -33), INT8_C( 52), INT8_C( -33), INT8_C( 27), INT8_C(-116), INT8_C( 17), INT8_C( -16), INT8_C( 55), INT8_C( 89), INT8_C( -22), INT8_C(-105), INT8_C( 16), INT8_C( 53)), UINT64_C( 4895941386511560426), simde_mm512_set_epi8(INT8_C( -58), INT8_C( 107), INT8_C( 108), INT8_C( 44), INT8_C( 63), INT8_C( 23), INT8_C(-111), INT8_C( 45), INT8_C(-124), INT8_C( 15), INT8_C( 66), INT8_C( 41), INT8_C( -67), INT8_C( 85), INT8_C( -77), INT8_C(-107), INT8_C( -87), INT8_C( 8), INT8_C( -14), INT8_C( -36), INT8_C( -62), INT8_C( -81), INT8_C( -95), INT8_C( -56), INT8_C(-119), INT8_C( 121), INT8_C( 17), INT8_C( -21), INT8_C( 26), INT8_C( -3), INT8_C( 81), INT8_C( 19), INT8_C(-118), INT8_C( -71), INT8_C(-118), INT8_C( -42), INT8_C(-124), INT8_C( 94), INT8_C( 108), INT8_C(-112), INT8_C( -84), INT8_C( 75), INT8_C(-109), INT8_C( -61), INT8_C( 82), INT8_C( 103), INT8_C(-112), INT8_C( 60), INT8_C( -13), INT8_C( 104), INT8_C( 72), INT8_C( 14), INT8_C( -7), INT8_C( 8), INT8_C( -12), INT8_C( 40), INT8_C( 1), INT8_C(-117), INT8_C( 3), INT8_C(-113), INT8_C( -82), INT8_C( 28), INT8_C(-126), INT8_C( -51)), simde_mm512_set_epi8(INT8_C( 39), INT8_C( 69), INT8_C( 62), INT8_C( 39), INT8_C( 115), INT8_C( 118), INT8_C( -66), INT8_C( -78), INT8_C( -59), INT8_C(-115), INT8_C( 51), INT8_C( 110), INT8_C( 119), INT8_C(-101), INT8_C( 33), INT8_C( 56), INT8_C(-106), INT8_C( -7), INT8_C( -15), INT8_C( 2), INT8_C( 34), INT8_C( 71), INT8_C( 105), INT8_C(-101), INT8_C( -79), INT8_C( -68), INT8_C( -32), INT8_C( 37), INT8_C( -95), INT8_C( 31), INT8_C(-121), INT8_C( 120), INT8_C( 52), INT8_C( -13), INT8_C( 114), INT8_C( 9), INT8_C( 109), INT8_C( -67), INT8_C( -91), INT8_C( 125), INT8_C( 114), INT8_C( 94), INT8_C( -75), INT8_C( -31), INT8_C( 115), INT8_C( 0), INT8_C( -73), INT8_C( -89), INT8_C( 22), INT8_C(-127), INT8_C( 84), INT8_C( -21), INT8_C( -84), INT8_C( -59), INT8_C( -52), INT8_C( 86), INT8_C( 101), INT8_C( -65), INT8_C(-118), INT8_C( -28), INT8_C(-104), INT8_C(-109), INT8_C( 70), INT8_C( 82)), simde_mm512_set_epi8(INT8_C( 60), INT8_C( 89), INT8_C( -42), INT8_C(-124), INT8_C( 8), INT8_C( -37), INT8_C( 3), INT8_C( 26), INT8_C( 88), INT8_C(-118), INT8_C( 18), INT8_C( 92), INT8_C( -68), INT8_C( -98), INT8_C( -25), INT8_C( 106), INT8_C( 120), INT8_C(-119), INT8_C( 79), INT8_C( 96), INT8_C( -57), INT8_C( 107), INT8_C( -54), INT8_C(-115), INT8_C( 39), INT8_C( -96), INT8_C( 31), INT8_C( -53), INT8_C( 127), INT8_C( 89), INT8_C( -60), INT8_C(-105), INT8_C( 82), INT8_C( -11), INT8_C(-112), INT8_C( 60), INT8_C( 60), INT8_C( 46), INT8_C( 78), INT8_C( 54), INT8_C( 44), INT8_C( 100), INT8_C( 62), INT8_C( -69), INT8_C( 103), INT8_C( 0), INT8_C( -63), INT8_C( 92), INT8_C( -75), INT8_C( 11), INT8_C( 74), INT8_C( -33), INT8_C( 26), INT8_C( 114), INT8_C( -57), INT8_C(-116), INT8_C( 101), INT8_C( 125), INT8_C(-123), INT8_C( 89), INT8_C( 121), INT8_C(-105), INT8_C( -20), INT8_C( 53)) }, { simde_mm512_set_epi8(INT8_C( 93), INT8_C(-100), INT8_C( -64), INT8_C( -33), INT8_C( 14), INT8_C( -92), INT8_C( -89), INT8_C( -55), INT8_C( -10), INT8_C( 45), INT8_C( 126), INT8_C( -55), INT8_C( -68), INT8_C( -21), INT8_C(-104), INT8_C( 8), INT8_C( 63), INT8_C(-125), INT8_C( -76), INT8_C(-105), INT8_C( 46), INT8_C( 21), INT8_C( 6), INT8_C( -49), INT8_C(-107), INT8_C( -51), INT8_C( -9), INT8_C( -74), INT8_C( 35), INT8_C( 7), INT8_C( -7), INT8_C( -91), INT8_C( 8), INT8_C( -32), INT8_C( -98), INT8_C( -11), INT8_C( 69), INT8_C( 57), INT8_C( 7), INT8_C( 43), INT8_C(-112), INT8_C( -37), INT8_C( -25), INT8_C(-127), INT8_C( -4), INT8_C( -92), INT8_C( -28), INT8_C( 117), INT8_C( 56), INT8_C( -85), INT8_C( -59), INT8_C(-123), INT8_C(-120), INT8_C( 53), INT8_C( -24), INT8_C( 7), INT8_C( 22), INT8_C( 42), INT8_C( -90), INT8_C( -17), INT8_C( 113), INT8_C( 37), INT8_C(-120), INT8_C(-104)), UINT64_C( 1522514442861711303), simde_mm512_set_epi8(INT8_C( 103), INT8_C( -59), INT8_C( 7), INT8_C(-120), INT8_C( 113), INT8_C( -69), INT8_C( 22), INT8_C( 38), INT8_C( 124), INT8_C( -61), INT8_C( -20), INT8_C( 109), INT8_C(-103), INT8_C(-127), INT8_C( -19), INT8_C( 82), INT8_C( -84), INT8_C( 88), INT8_C( -40), INT8_C( -35), INT8_C( -65), INT8_C( 11), INT8_C( -16), INT8_C( 117), INT8_C( -14), INT8_C( 32), INT8_C( 2), INT8_C( -30), INT8_C( 65), INT8_C( -22), INT8_C( -87), INT8_C( 2), INT8_C( 21), INT8_C( -49), INT8_C( 55), INT8_C( -48), INT8_C( -96), INT8_C( -37), INT8_C( 21), INT8_C( 40), INT8_C( 107), INT8_C(-118), INT8_C( 62), INT8_C( -71), INT8_C( 102), INT8_C( -54), INT8_C( 28), INT8_C( -89), INT8_C( 90), INT8_C( -88), INT8_C( 51), INT8_C(-123), INT8_C( 82), INT8_C( 7), INT8_C( -87), INT8_C( -10), INT8_C(-123), INT8_C( 90), INT8_C( -21), INT8_C( -40), INT8_C( -95), INT8_C( -75), INT8_C( 68), INT8_C( -36)), simde_mm512_set_epi8(INT8_C( -25), INT8_C( 126), INT8_C( 90), INT8_C(-102), INT8_C( -35), INT8_C( -55), INT8_C( -99), INT8_C( 35), INT8_C( 8), INT8_C( 42), INT8_C( 44), INT8_C( 115), INT8_C( 24), INT8_C(-103), INT8_C(-117), INT8_C( -37), INT8_C(-102), INT8_C( 116), INT8_C( -73), INT8_C( 117), INT8_C( -39), INT8_C( 21), INT8_C(-106), INT8_C( 54), INT8_C(-121), INT8_C( 20), INT8_C(-122), INT8_C( -82), INT8_C( 46), INT8_C( 40), INT8_C( 111), INT8_C( -44), INT8_C( -64), INT8_C( -62), INT8_C( -85), INT8_C(-117), INT8_C( 122), INT8_C( -22), INT8_C( 2), INT8_C( 102), INT8_C( 54), INT8_C( -43), INT8_C( -69), INT8_C( -40), INT8_C( 57), INT8_C( 5), INT8_C( -81), INT8_C( -27), INT8_C(-125), INT8_C( 110), INT8_C(-105), INT8_C(-103), INT8_C( -37), INT8_C(-124), INT8_C( -56), INT8_C(-126), INT8_C( 1), INT8_C(-116), INT8_C( 89), INT8_C( 18), INT8_C( 9), INT8_C( 115), INT8_C( 39), INT8_C(-126)), simde_mm512_set_epi8(INT8_C( 93), INT8_C(-100), INT8_C( -64), INT8_C(-119), INT8_C( 14), INT8_C( 5), INT8_C( -89), INT8_C( -58), INT8_C( -10), INT8_C( 45), INT8_C( 56), INT8_C( -55), INT8_C( -68), INT8_C( -21), INT8_C(-104), INT8_C( -50), INT8_C( 63), INT8_C(-125), INT8_C( -76), INT8_C(-105), INT8_C( 9), INT8_C(-105), INT8_C( 43), INT8_C( -49), INT8_C( 87), INT8_C( -74), INT8_C( -9), INT8_C( 19), INT8_C( 35), INT8_C( 7), INT8_C( -7), INT8_C( -77), INT8_C( 89), INT8_C( -32), INT8_C( -98), INT8_C( -66), INT8_C( 69), INT8_C(-107), INT8_C( 7), INT8_C( 43), INT8_C(-112), INT8_C( -37), INT8_C( -41), INT8_C(-127), INT8_C( 53), INT8_C( -49), INT8_C( -28), INT8_C( 117), INT8_C( 12), INT8_C( -85), INT8_C( -59), INT8_C(-123), INT8_C( -50), INT8_C( -79), INT8_C( 1), INT8_C( 126), INT8_C(-123), INT8_C( 119), INT8_C( -90), INT8_C( -17), INT8_C( 113), INT8_C(-110), INT8_C( -97), INT8_C( -10)) }, { simde_mm512_set_epi8(INT8_C( -16), INT8_C( -86), INT8_C( -49), INT8_C( -47), INT8_C( 51), INT8_C( -95), INT8_C(-106), INT8_C( 23), INT8_C(-116), INT8_C(-105), INT8_C( -79), INT8_C( -83), INT8_C( -9), INT8_C( 63), INT8_C( -98), INT8_C( -11), INT8_C( -9), INT8_C( -7), INT8_C( 24), INT8_C( 14), INT8_C( -52), INT8_C( 32), INT8_C( -30), INT8_C( 34), INT8_C( 22), INT8_C( 13), INT8_C( -76), INT8_C( -68), INT8_C( -68), INT8_C( 6), INT8_C( -62), INT8_C( -44), INT8_C( 101), INT8_C( -62), INT8_C( 117), INT8_C( -65), INT8_C( 70), INT8_C(-105), INT8_C(-122), INT8_C( 44), INT8_C( 57), INT8_C( -52), INT8_C( 104), INT8_C( -45), INT8_C( -74), INT8_C(-128), INT8_C( -55), INT8_C( 81), INT8_C( -35), INT8_C( 118), INT8_C(-122), INT8_C( -10), INT8_C( -27), INT8_C(-106), INT8_C( -23), INT8_C( 36), INT8_C( -96), INT8_C( -41), INT8_C( 31), INT8_C( -96), INT8_C( -28), INT8_C( 71), INT8_C( -69), INT8_C( 13)), UINT64_C(17791771045880406762), simde_mm512_set_epi8(INT8_C( -59), INT8_C( 122), INT8_C(-123), INT8_C( 104), INT8_C( -66), INT8_C(-122), INT8_C( -54), INT8_C( 109), INT8_C(-116), INT8_C( 25), INT8_C( 86), INT8_C( -75), INT8_C( 123), INT8_C( 2), INT8_C( -95), INT8_C( -53), INT8_C( 82), INT8_C(-124), INT8_C( 122), INT8_C( -24), INT8_C(-112), INT8_C( 6), INT8_C( -42), INT8_C( 57), INT8_C( -79), INT8_C( -67), INT8_C( 91), INT8_C( 25), INT8_C( 123), INT8_C( -63), INT8_C( 18), INT8_C( -11), INT8_C( 27), INT8_C( -47), INT8_C( 98), INT8_C( 75), INT8_C( 50), INT8_C( -38), INT8_C( -29), INT8_C( 78), INT8_C( 88), INT8_C(-122), INT8_C( 99), INT8_C( 4), INT8_C( -97), INT8_C( 44), INT8_C( -12), INT8_C( -32), INT8_C(-105), INT8_C( -57), INT8_C( -84), INT8_C(-105), INT8_C( 75), INT8_C( 16), INT8_C( 15), INT8_C( -88), INT8_C( 86), INT8_C( 94), INT8_C( 44), INT8_C( -31), INT8_C( 83), INT8_C( 56), INT8_C( 31), INT8_C(-109)), simde_mm512_set_epi8(INT8_C( 53), INT8_C( -74), INT8_C( 4), INT8_C( 7), INT8_C( 104), INT8_C(-117), INT8_C( 125), INT8_C( 47), INT8_C( 78), INT8_C( -53), INT8_C( 60), INT8_C( -62), INT8_C( 100), INT8_C( -7), INT8_C( -80), INT8_C( -18), INT8_C( 68), INT8_C( 12), INT8_C( 63), INT8_C( 27), INT8_C( 87), INT8_C( -25), INT8_C(-102), INT8_C(-117), INT8_C( -9), INT8_C( 7), INT8_C(-117), INT8_C( -5), INT8_C( 64), INT8_C( 111), INT8_C( 9), INT8_C( 20), INT8_C( -40), INT8_C( -35), INT8_C( -16), INT8_C( -81), INT8_C( 114), INT8_C( 80), INT8_C(-124), INT8_C( -58), INT8_C(-110), INT8_C( 113), INT8_C( -6), INT8_C( 59), INT8_C( 48), INT8_C( -34), INT8_C( 27), INT8_C( -87), INT8_C(-109), INT8_C(-112), INT8_C( -86), INT8_C(-108), INT8_C( -83), INT8_C( -63), INT8_C( -64), INT8_C(-125), INT8_C( 125), INT8_C( -33), INT8_C( 25), INT8_C( 49), INT8_C( -11), INT8_C( 39), INT8_C( 115), INT8_C( -12)), simde_mm512_set_epi8(INT8_C( -53), INT8_C( 118), INT8_C( 34), INT8_C( 3), INT8_C( 51), INT8_C( 122), INT8_C( -87), INT8_C( 23), INT8_C( 105), INT8_C( -51), INT8_C( -4), INT8_C( -83), INT8_C( -25), INT8_C( 63), INT8_C( -98), INT8_C( -74), INT8_C( -9), INT8_C( -7), INT8_C( 24), INT8_C( 32), INT8_C( -52), INT8_C( 32), INT8_C( -30), INT8_C( 7), INT8_C( -34), INT8_C( 13), INT8_C( -76), INT8_C( -68), INT8_C( -68), INT8_C( 6), INT8_C( -62), INT8_C( -44), INT8_C( 101), INT8_C(-112), INT8_C( 117), INT8_C( -99), INT8_C( 71), INT8_C(-105), INT8_C( 70), INT8_C( 44), INT8_C( -66), INT8_C(-125), INT8_C( 104), INT8_C( -45), INT8_C( 85), INT8_C(-128), INT8_C( -55), INT8_C( 81), INT8_C( -35), INT8_C( 118), INT8_C( 99), INT8_C( -10), INT8_C( -27), INT8_C(-106), INT8_C( 44), INT8_C( 36), INT8_C( -10), INT8_C( 110), INT8_C( -95), INT8_C( -96), INT8_C( -60), INT8_C( 71), INT8_C(-122), INT8_C( 13)) }, { simde_mm512_set_epi8(INT8_C( -82), INT8_C( 8), INT8_C( 44), INT8_C( 122), INT8_C( -73), INT8_C( -6), INT8_C(-102), INT8_C( 73), INT8_C( 123), INT8_C( 117), INT8_C( 75), INT8_C( 44), INT8_C( 67), INT8_C( -86), INT8_C( 16), INT8_C( -61), INT8_C( 119), INT8_C( -20), INT8_C( 21), INT8_C( 125), INT8_C( 12), INT8_C( 117), INT8_C( 57), INT8_C( -17), INT8_C( -31), INT8_C( -68), INT8_C( 117), INT8_C( -57), INT8_C(-106), INT8_C( 52), INT8_C( -9), INT8_C( -33), INT8_C( -99), INT8_C( -47), INT8_C( 84), INT8_C( 18), INT8_C(-122), INT8_C( 36), INT8_C( -89), INT8_C( -23), INT8_C( -91), INT8_C( -73), INT8_C( 2), INT8_C( 78), INT8_C( -22), INT8_C( 45), INT8_C(-119), INT8_C( 40), INT8_C( -56), INT8_C( 14), INT8_C( 48), INT8_C( -63), INT8_C(-122), INT8_C( 34), INT8_C(-110), INT8_C( 31), INT8_C( 96), INT8_C( 110), INT8_C( -90), INT8_C(-122), INT8_C( 73), INT8_C( 42), INT8_C( 46), INT8_C( 121)), UINT64_C(11267890912837098650), simde_mm512_set_epi8(INT8_C( 11), INT8_C( -76), INT8_C( -53), INT8_C( -7), INT8_C( -81), INT8_C( 74), INT8_C( 93), INT8_C( -99), INT8_C( 127), INT8_C( -84), INT8_C( 117), INT8_C( -10), INT8_C( 62), INT8_C( -82), INT8_C( 5), INT8_C( -52), INT8_C( 44), INT8_C( 46), INT8_C( -12), INT8_C( 95), INT8_C( 126), INT8_C( 19), INT8_C( 86), INT8_C( -57), INT8_C( -74), INT8_C( 86), INT8_C( 91), INT8_C(-111), INT8_C( 29), INT8_C( 108), INT8_C( 6), INT8_C( -86), INT8_C( 68), INT8_C( -55), INT8_C(-117), INT8_C( -50), INT8_C(-116), INT8_C( 66), INT8_C( 103), INT8_C( -37), INT8_C( -58), INT8_C( 19), INT8_C( 36), INT8_C( -6), INT8_C( -57), INT8_C(-127), INT8_C( -70), INT8_C( 101), INT8_C( -73), INT8_C( 78), INT8_C( 49), INT8_C( -43), INT8_C( -26), INT8_C( 87), INT8_C( -63), INT8_C( -52), INT8_C( 105), INT8_C( 122), INT8_C( -13), INT8_C(-124), INT8_C( 79), INT8_C( 9), INT8_C( 64), INT8_C( -1)), simde_mm512_set_epi8(INT8_C( -36), INT8_C( -58), INT8_C( 45), INT8_C( 55), INT8_C( 4), INT8_C( 32), INT8_C( -82), INT8_C( 116), INT8_C( -74), INT8_C( 107), INT8_C(-116), INT8_C( 71), INT8_C( -85), INT8_C( 5), INT8_C( -6), INT8_C( -50), INT8_C(-118), INT8_C( -56), INT8_C( -39), INT8_C( -31), INT8_C(-118), INT8_C( -97), INT8_C( 25), INT8_C( 84), INT8_C( 85), INT8_C( 111), INT8_C( -62), INT8_C(-113), INT8_C( -52), INT8_C( -16), INT8_C( -45), INT8_C( 104), INT8_C( -73), INT8_C( 14), INT8_C( -17), INT8_C( 10), INT8_C( -32), INT8_C( -25), INT8_C( -32), INT8_C( 113), INT8_C( 23), INT8_C( -77), INT8_C( 6), INT8_C( -76), INT8_C( 66), INT8_C(-124), INT8_C( -41), INT8_C( 124), INT8_C( -67), INT8_C(-104), INT8_C( -20), INT8_C( -65), INT8_C( -26), INT8_C( 102), INT8_C( 0), INT8_C( -81), INT8_C( -33), INT8_C( 37), INT8_C( 70), INT8_C( 106), INT8_C( 114), INT8_C( 31), INT8_C(-116), INT8_C( -46)), simde_mm512_set_epi8(INT8_C( -59), INT8_C( 8), INT8_C( 44), INT8_C( -62), INT8_C(-118), INT8_C(-125), INT8_C(-102), INT8_C( 73), INT8_C( 123), INT8_C( 28), INT8_C( 75), INT8_C( 61), INT8_C( 26), INT8_C( 32), INT8_C( 63), INT8_C( -30), INT8_C( -46), INT8_C( -20), INT8_C( 21), INT8_C( 52), INT8_C( 12), INT8_C(-119), INT8_C( -57), INT8_C( -56), INT8_C( -31), INT8_C( -68), INT8_C( 117), INT8_C( -4), INT8_C(-106), INT8_C( 87), INT8_C( -9), INT8_C(-104), INT8_C( -56), INT8_C(-110), INT8_C( 84), INT8_C( -83), INT8_C(-112), INT8_C( 36), INT8_C( -73), INT8_C(-122), INT8_C( -80), INT8_C( -73), INT8_C( 2), INT8_C( 78), INT8_C( -22), INT8_C( 45), INT8_C(-119), INT8_C( -68), INT8_C( 66), INT8_C( -58), INT8_C( 48), INT8_C( 76), INT8_C( 73), INT8_C( -5), INT8_C(-110), INT8_C( 31), INT8_C( -92), INT8_C( 110), INT8_C( -90), INT8_C( 34), INT8_C( 103), INT8_C( 42), INT8_C( 96), INT8_C( 121)) }, { simde_mm512_set_epi8(INT8_C( 50), INT8_C( 69), INT8_C( -72), INT8_C( -43), INT8_C(-119), INT8_C( -95), INT8_C( 87), INT8_C( -97), INT8_C(-126), INT8_C( 126), INT8_C( 57), INT8_C( 112), INT8_C( 12), INT8_C(-105), INT8_C( -43), INT8_C( 27), INT8_C( 37), INT8_C( -53), INT8_C( 44), INT8_C( 2), INT8_C( -47), INT8_C( 117), INT8_C( 52), INT8_C( 37), INT8_C(-113), INT8_C( -95), INT8_C(-114), INT8_C( 121), INT8_C( 123), INT8_C( 15), INT8_C( 21), INT8_C( 26), INT8_C( 84), INT8_C( 25), INT8_C(-108), INT8_C( -88), INT8_C( -21), INT8_C( -63), INT8_C( 120), INT8_C( 2), INT8_C( -96), INT8_C( -76), INT8_C( -43), INT8_C( 25), INT8_C( 38), INT8_C( 117), INT8_C( -54), INT8_C( -27), INT8_C( 105), INT8_C( -3), INT8_C( 8), INT8_C( -91), INT8_C( 53), INT8_C( -99), INT8_C( 98), INT8_C( 127), INT8_C( 1), INT8_C( -55), INT8_C( -38), INT8_C(-120), INT8_C( 94), INT8_C( 108), INT8_C( -84), INT8_C( -34)), UINT64_C( 2897475998749673702), simde_mm512_set_epi8(INT8_C( -99), INT8_C( 11), INT8_C(-127), INT8_C( -35), INT8_C( -6), INT8_C(-124), INT8_C( 91), INT8_C( -85), INT8_C( 99), INT8_C( -25), INT8_C( -96), INT8_C( 99), INT8_C( 22), INT8_C( -94), INT8_C( -42), INT8_C( 18), INT8_C( -36), INT8_C( -57), INT8_C( -77), INT8_C( -35), INT8_C( 1), INT8_C( 112), INT8_C( 97), INT8_C(-108), INT8_C( -71), INT8_C( -6), INT8_C( 28), INT8_C( 80), INT8_C( 127), INT8_C( 29), INT8_C( -10), INT8_C( -91), INT8_C( 59), INT8_C( 67), INT8_C( -23), INT8_C( 23), INT8_C( -23), INT8_C( -25), INT8_C( 21), INT8_C( -90), INT8_C( 93), INT8_C( -29), INT8_C( 36), INT8_C( 92), INT8_C( -10), INT8_C( -93), INT8_C(-120), INT8_C( -83), INT8_C( -15), INT8_C(-128), INT8_C(-114), INT8_C( 66), INT8_C( -85), INT8_C( 31), INT8_C( -36), INT8_C( 77), INT8_C( -6), INT8_C( 99), INT8_C( 9), INT8_C( -69), INT8_C( 26), INT8_C( 7), INT8_C( 53), INT8_C(-125)), simde_mm512_set_epi8(INT8_C( 111), INT8_C( 123), INT8_C( -56), INT8_C( 52), INT8_C( -78), INT8_C( -44), INT8_C( 78), INT8_C( -71), INT8_C(-102), INT8_C( 70), INT8_C( -41), INT8_C( 107), INT8_C( 127), INT8_C(-122), INT8_C( -80), INT8_C( -63), INT8_C( 91), INT8_C( -71), INT8_C( 105), INT8_C( -59), INT8_C( -39), INT8_C( -58), INT8_C( -13), INT8_C( 24), INT8_C( -67), INT8_C( 96), INT8_C( -28), INT8_C( 36), INT8_C( -24), INT8_C( 107), INT8_C( -62), INT8_C( 91), INT8_C( 123), INT8_C( 68), INT8_C( 22), INT8_C( 64), INT8_C( -7), INT8_C( 62), INT8_C( 38), INT8_C( 117), INT8_C( 125), INT8_C( -93), INT8_C( -71), INT8_C( -13), INT8_C( -13), INT8_C( 0), INT8_C( -44), INT8_C( 38), INT8_C(-116), INT8_C( 64), INT8_C(-125), INT8_C( 76), INT8_C( 126), INT8_C( 125), INT8_C(-102), INT8_C( -49), INT8_C( 105), INT8_C( 100), INT8_C( 87), INT8_C( 85), INT8_C( 33), INT8_C( -82), INT8_C(-122), INT8_C(-115)), simde_mm512_set_epi8(INT8_C( 50), INT8_C( 69), INT8_C( 115), INT8_C( -43), INT8_C( 119), INT8_C( -95), INT8_C( 87), INT8_C( -97), INT8_C(-126), INT8_C( 126), INT8_C( 7), INT8_C( -9), INT8_C( 12), INT8_C( -53), INT8_C( -43), INT8_C( 61), INT8_C( 13), INT8_C(-120), INT8_C( -12), INT8_C( 2), INT8_C( -47), INT8_C( 26), INT8_C( 40), INT8_C( 79), INT8_C(-113), INT8_C( -95), INT8_C(-114), INT8_C( -75), INT8_C( 123), INT8_C( 15), INT8_C( 21), INT8_C( -37), INT8_C( 112), INT8_C( 124), INT8_C(-108), INT8_C( -88), INT8_C( -21), INT8_C( -63), INT8_C( -24), INT8_C( 2), INT8_C( -76), INT8_C( -76), INT8_C( 24), INT8_C( 3), INT8_C( 38), INT8_C( 117), INT8_C( -54), INT8_C( -65), INT8_C( 4), INT8_C( 77), INT8_C( 8), INT8_C( -91), INT8_C( 121), INT8_C( -99), INT8_C( 98), INT8_C( 127), INT8_C( -3), INT8_C( -16), INT8_C( -39), INT8_C(-120), INT8_C( 94), INT8_C( 103), INT8_C( 123), INT8_C( -34)) }, { simde_mm512_set_epi8(INT8_C( 42), INT8_C(-115), INT8_C( 123), INT8_C( 21), INT8_C( 107), INT8_C( 83), INT8_C( 8), INT8_C( -8), INT8_C( -95), INT8_C( 23), INT8_C( -44), INT8_C( 116), INT8_C( -12), INT8_C( 83), INT8_C( 52), INT8_C( 2), INT8_C( 15), INT8_C( 81), INT8_C( 76), INT8_C( 85), INT8_C( -35), INT8_C( 93), INT8_C( 27), INT8_C( -46), INT8_C( -42), INT8_C( 1), INT8_C( -14), INT8_C( 121), INT8_C( -47), INT8_C( -92), INT8_C( 21), INT8_C( 25), INT8_C( -50), INT8_C( 56), INT8_C( -1), INT8_C( -27), INT8_C(-104), INT8_C( -84), INT8_C( 97), INT8_C( 47), INT8_C( 24), INT8_C( 91), INT8_C( 112), INT8_C( -60), INT8_C( 30), INT8_C(-107), INT8_C(-111), INT8_C( 96), INT8_C( 30), INT8_C( -2), INT8_C(-109), INT8_C( 127), INT8_C( -64), INT8_C( -18), INT8_C( -75), INT8_C( -64), INT8_C( -48), INT8_C( 57), INT8_C( 27), INT8_C( 26), INT8_C(-119), INT8_C( -95), INT8_C( 15), INT8_C( -95)), UINT64_C(10629194982122454645), simde_mm512_set_epi8(INT8_C( 62), INT8_C( 73), INT8_C( -67), INT8_C( 27), INT8_C( -60), INT8_C(-109), INT8_C( 33), INT8_C( 36), INT8_C( -13), INT8_C( 42), INT8_C( 41), INT8_C( -70), INT8_C( 94), INT8_C( 39), INT8_C(-123), INT8_C( 38), INT8_C( -66), INT8_C(-110), INT8_C( 55), INT8_C( -62), INT8_C( -32), INT8_C( 61), INT8_C( 21), INT8_C(-111), INT8_C( 20), INT8_C(-117), INT8_C(-105), INT8_C(-126), INT8_C( -6), INT8_C( 47), INT8_C( -54), INT8_C( 116), INT8_C( -85), INT8_C( 57), INT8_C(-112), INT8_C( 31), INT8_C(-109), INT8_C( -19), INT8_C( 86), INT8_C(-126), INT8_C( -89), INT8_C( -72), INT8_C( -35), INT8_C( 82), INT8_C( -99), INT8_C( 1), INT8_C( -28), INT8_C( 124), INT8_C( 52), INT8_C( 115), INT8_C( 85), INT8_C( 40), INT8_C(-124), INT8_C( -25), INT8_C( 95), INT8_C( -49), INT8_C( 26), INT8_C( 78), INT8_C( -88), INT8_C( 95), INT8_C( -42), INT8_C(-119), INT8_C( -27), INT8_C( 46)), simde_mm512_set_epi8(INT8_C( -62), INT8_C( -7), INT8_C( -53), INT8_C( 59), INT8_C( 14), INT8_C( -30), INT8_C( 104), INT8_C( -55), INT8_C( 7), INT8_C( 102), INT8_C( -93), INT8_C( -26), INT8_C( -17), INT8_C(-127), INT8_C(-119), INT8_C( -98), INT8_C( 65), INT8_C( -48), INT8_C( 121), INT8_C( 66), INT8_C( -69), INT8_C( -56), INT8_C( 87), INT8_C( -4), INT8_C( -55), INT8_C( 111), INT8_C( 50), INT8_C( 124), INT8_C( 43), INT8_C( 104), INT8_C( 0), INT8_C( -55), INT8_C( 71), INT8_C( 44), INT8_C( -51), INT8_C( -76), INT8_C( 67), INT8_C( 119), INT8_C( -71), INT8_C( -67), INT8_C( -88), INT8_C( 53), INT8_C( -42), INT8_C( -53), INT8_C( -99), INT8_C( -89), INT8_C(-118), INT8_C( -2), INT8_C( -84), INT8_C( -65), INT8_C( -66), INT8_C( 121), INT8_C( -61), INT8_C( -77), INT8_C( 116), INT8_C( -70), INT8_C( 113), INT8_C( -63), INT8_C( -68), INT8_C( -78), INT8_C( -23), INT8_C( -40), INT8_C( 54), INT8_C( -12)), simde_mm512_set_epi8(INT8_C( 87), INT8_C(-115), INT8_C( 123), INT8_C( 19), INT8_C( 107), INT8_C( 83), INT8_C( -57), INT8_C( 65), INT8_C( -17), INT8_C( 23), INT8_C( -44), INT8_C( 116), INT8_C( -12), INT8_C( 83), INT8_C( -19), INT8_C( 2), INT8_C( 15), INT8_C( 27), INT8_C( 28), INT8_C( -7), INT8_C(-101), INT8_C(-125), INT8_C( 27), INT8_C( -46), INT8_C( 45), INT8_C( 1), INT8_C( -14), INT8_C( 121), INT8_C( -47), INT8_C( -63), INT8_C( 21), INT8_C( -11), INT8_C( -50), INT8_C( 56), INT8_C( 105), INT8_C( 67), INT8_C(-104), INT8_C( 16), INT8_C( 97), INT8_C( 82), INT8_C( -12), INT8_C( -15), INT8_C( 112), INT8_C( -60), INT8_C( 30), INT8_C( -89), INT8_C(-111), INT8_C( 16), INT8_C( -12), INT8_C( -63), INT8_C( 49), INT8_C( 92), INT8_C( 109), INT8_C( -18), INT8_C( -33), INT8_C( -64), INT8_C( -48), INT8_C( 68), INT8_C( 78), INT8_C( 120), INT8_C(-119), INT8_C( 33), INT8_C( 15), INT8_C( 70)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_gf2p8mul_epi8(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_maskz_gf2p8mul_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask16 k; simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { UINT16_C(29185), simde_mm_set_epi8(INT8_C( -10), INT8_C( 32), INT8_C( 126), INT8_C(-118), INT8_C( 97), INT8_C( -54), INT8_C( -78), INT8_C( 30), INT8_C( 6), INT8_C( 88), INT8_C( 8), INT8_C( -88), INT8_C( 37), INT8_C(-105), INT8_C( -43), INT8_C( 10)), simde_mm_set_epi8(INT8_C( 86), INT8_C( 34), INT8_C( 53), INT8_C(-104), INT8_C( 65), INT8_C( 51), INT8_C( 68), INT8_C( 58), INT8_C( 41), INT8_C( -52), INT8_C( 100), INT8_C( -28), INT8_C( -14), INT8_C( 92), INT8_C( 63), INT8_C(-111)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 44), INT8_C( 101), INT8_C( -87), INT8_C( 0), INT8_C( 0), INT8_C(-121), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -35)) }, { UINT16_C(59388), simde_mm_set_epi8(INT8_C(-109), INT8_C( 72), INT8_C( 43), INT8_C( 122), INT8_C( 102), INT8_C( -82), INT8_C( 18), INT8_C(-114), INT8_C( 68), INT8_C( -77), INT8_C(-112), INT8_C( -24), INT8_C( 48), INT8_C( -78), INT8_C( -9), INT8_C( 12)), simde_mm_set_epi8(INT8_C( 117), INT8_C( -80), INT8_C( 47), INT8_C( -43), INT8_C( 69), INT8_C( -22), INT8_C( 13), INT8_C( 76), INT8_C( -31), INT8_C( -18), INT8_C( 35), INT8_C( -68), INT8_C( -94), INT8_C( -30), INT8_C( -17), INT8_C( 44)), simde_mm_set_epi8(INT8_C( 6), INT8_C( 14), INT8_C(-123), INT8_C( 0), INT8_C( 0), INT8_C( 20), INT8_C( -54), INT8_C( -14), INT8_C( -41), INT8_C( -78), INT8_C( 54), INT8_C(-102), INT8_C( 73), INT8_C( 69), INT8_C( 0), INT8_C( 0)) }, { UINT16_C(26979), simde_mm_set_epi8(INT8_C( -8), INT8_C( -14), INT8_C( -53), INT8_C(-117), INT8_C( -21), INT8_C( 27), INT8_C( 63), INT8_C( 11), INT8_C(-100), INT8_C( 101), INT8_C( -23), INT8_C( 95), INT8_C( 77), INT8_C( 86), INT8_C( 90), INT8_C( 79)), simde_mm_set_epi8(INT8_C( -69), INT8_C( 7), INT8_C( -57), INT8_C( 95), INT8_C( 119), INT8_C( -13), INT8_C( 52), INT8_C( -49), INT8_C( -57), INT8_C( 36), INT8_C( 17), INT8_C( -61), INT8_C( -43), INT8_C( -58), INT8_C( 107), INT8_C( 88)), simde_mm_set_epi8(INT8_C( 0), INT8_C( -24), INT8_C( 119), INT8_C( 0), INT8_C( 57), INT8_C( 0), INT8_C( 0), INT8_C( 104), INT8_C( 0), INT8_C(-101), INT8_C( -5), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -52), INT8_C(-108)) }, { UINT16_C(42638), simde_mm_set_epi8(INT8_C( -29), INT8_C( -92), INT8_C( 29), INT8_C( -85), INT8_C( -19), INT8_C( -87), INT8_C( 68), INT8_C( -71), INT8_C( 11), INT8_C( 124), INT8_C( 70), INT8_C( -95), INT8_C( -56), INT8_C( 58), INT8_C( -18), INT8_C( 102)), simde_mm_set_epi8(INT8_C( 7), INT8_C( -64), INT8_C( 82), INT8_C( 103), INT8_C( 103), INT8_C( 41), INT8_C( -38), INT8_C( -41), INT8_C( 30), INT8_C( -59), INT8_C( 99), INT8_C( 31), INT8_C( -62), INT8_C( -42), INT8_C( -76), INT8_C( -24)), simde_mm_set_epi8(INT8_C( -97), INT8_C( 0), INT8_C( -16), INT8_C( 0), INT8_C( 0), INT8_C( 106), INT8_C( 121), INT8_C( 0), INT8_C( -46), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -32), INT8_C( 23), INT8_C( 14), INT8_C( 0)) }, { UINT16_C( 7351), simde_mm_set_epi8(INT8_C( -85), INT8_C( -52), INT8_C( 38), INT8_C( 11), INT8_C( 18), INT8_C( -75), INT8_C( 112), INT8_C( -81), INT8_C( 105), INT8_C( 79), INT8_C( -70), INT8_C( 11), INT8_C( -86), INT8_C( 72), INT8_C( 43), INT8_C(-118)), simde_mm_set_epi8(INT8_C( -12), INT8_C( 82), INT8_C( -77), INT8_C( -57), INT8_C( 106), INT8_C( -53), INT8_C( -59), INT8_C( 69), INT8_C(-110), INT8_C( -9), INT8_C( 80), INT8_C( 7), INT8_C( 7), INT8_C( 113), INT8_C( -36), INT8_C( -3)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 48), INT8_C( 46), INT8_C( -26), INT8_C( 0), INT8_C( 0), INT8_C( 18), INT8_C( 0), INT8_C( 26), INT8_C( 49), INT8_C( 0), INT8_C( -6), INT8_C( 27), INT8_C( 54)) }, { UINT16_C(26851), simde_mm_set_epi8(INT8_C(-118), INT8_C( 121), INT8_C(-118), INT8_C( 72), INT8_C( -13), INT8_C( -73), INT8_C( 99), INT8_C(-100), INT8_C( 70), INT8_C( -52), INT8_C( 55), INT8_C( 31), INT8_C( 122), INT8_C(-104), INT8_C( -45), INT8_C( -49)), simde_mm_set_epi8(INT8_C(-120), INT8_C(-116), INT8_C( -9), INT8_C( 25), INT8_C( 115), INT8_C( 123), INT8_C( 61), INT8_C(-108), INT8_C( 126), INT8_C( 107), INT8_C( -34), INT8_C( 121), INT8_C( -93), INT8_C( -91), INT8_C( -44), INT8_C( 120)), simde_mm_set_epi8(INT8_C( 0), INT8_C( -56), INT8_C( 5), INT8_C( 0), INT8_C( 124), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -83), INT8_C( 86), INT8_C( -3), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 32), INT8_C( -27)) }, { UINT16_C(42651), simde_mm_set_epi8(INT8_C( 87), INT8_C( 3), INT8_C( 34), INT8_C( 16), INT8_C( 12), INT8_C( 64), INT8_C( 85), INT8_C( -66), INT8_C( 70), INT8_C( -63), INT8_C( -19), INT8_C( 10), INT8_C( 31), INT8_C( -86), INT8_C( -82), INT8_C( -92)), simde_mm_set_epi8(INT8_C( 16), INT8_C(-112), INT8_C( -76), INT8_C( 22), INT8_C( -61), INT8_C( 17), INT8_C( 14), INT8_C( 50), INT8_C(-126), INT8_C( 74), INT8_C( -39), INT8_C( 102), INT8_C( 3), INT8_C( 101), INT8_C( -19), INT8_C( 41)), simde_mm_set_epi8(INT8_C( 7), INT8_C( 0), INT8_C( 2), INT8_C( 0), INT8_C( 0), INT8_C( 44), INT8_C( 123), INT8_C( 0), INT8_C( -20), INT8_C( 0), INT8_C( 0), INT8_C( -47), INT8_C( 33), INT8_C( 0), INT8_C( 115), INT8_C( -76)) }, { UINT16_C(47845), simde_mm_set_epi8(INT8_C( 87), INT8_C( 53), INT8_C( -89), INT8_C( -46), INT8_C( 26), INT8_C(-123), INT8_C( -52), INT8_C( -54), INT8_C(-106), INT8_C( -59), INT8_C(-103), INT8_C( -45), INT8_C( -29), INT8_C( 90), INT8_C( 23), INT8_C(-121)), simde_mm_set_epi8(INT8_C( 36), INT8_C( 38), INT8_C( 66), INT8_C( -47), INT8_C( -23), INT8_C( -52), INT8_C( -74), INT8_C( -39), INT8_C( 27), INT8_C( -34), INT8_C( -39), INT8_C( 18), INT8_C( -29), INT8_C( 16), INT8_C( 82), INT8_C(-107)), simde_mm_set_epi8(INT8_C( 73), INT8_C( 0), INT8_C( 27), INT8_C( 67), INT8_C( -46), INT8_C( 0), INT8_C(-113), INT8_C( 0), INT8_C( -34), INT8_C( -30), INT8_C(-105), INT8_C( 0), INT8_C( 0), INT8_C( -41), INT8_C( 0), INT8_C( 50)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_maskz_gf2p8mul_epi8(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_maskz_gf2p8mul_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask32 k; simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { UINT32_C(2075432365), simde_mm256_set_epi8(INT8_C( 86), INT8_C( 126), INT8_C( -66), INT8_C( -91), INT8_C( -5), INT8_C(-106), INT8_C( -15), INT8_C( 89), INT8_C( -94), INT8_C( -2), INT8_C(-101), INT8_C( 60), INT8_C(-124), INT8_C( -68), INT8_C( -2), INT8_C( 111), INT8_C( -61), INT8_C( -18), INT8_C( 111), INT8_C( -99), INT8_C( -83), INT8_C( 74), INT8_C( 44), INT8_C( 121), INT8_C( 68), INT8_C( -43), INT8_C( 102), INT8_C( -12), INT8_C( -75), INT8_C( 115), INT8_C( 102), INT8_C( 30)), simde_mm256_set_epi8(INT8_C( 83), INT8_C( -80), INT8_C( -53), INT8_C( 73), INT8_C( 44), INT8_C( -89), INT8_C(-125), INT8_C( 21), INT8_C( 52), INT8_C(-114), INT8_C( 44), INT8_C( 76), INT8_C( 111), INT8_C( -49), INT8_C( -31), INT8_C( -69), INT8_C( -77), INT8_C( 69), INT8_C( -95), INT8_C(-121), INT8_C( -56), INT8_C( -77), INT8_C(-118), INT8_C( 83), INT8_C( 89), INT8_C(-108), INT8_C( -45), INT8_C( 75), INT8_C( 52), INT8_C( 44), INT8_C( -49), INT8_C( 106)), simde_mm256_set_epi8(INT8_C( 0), INT8_C(-121), INT8_C( -94), INT8_C( 52), INT8_C( 10), INT8_C( 0), INT8_C( 44), INT8_C( -63), INT8_C( -9), INT8_C( 0), INT8_C( 8), INT8_C(-110), INT8_C( 0), INT8_C( -23), INT8_C( 0), INT8_C( 0), INT8_C(-123), INT8_C( 0), INT8_C( 0), INT8_C( 102), INT8_C( 0), INT8_C( -85), INT8_C( 0), INT8_C( 51), INT8_C( -50), INT8_C( 0), INT8_C( -77), INT8_C( 0), INT8_C( 22), INT8_C(-128), INT8_C( 0), INT8_C( -32)) }, { UINT32_C(1069894965), simde_mm256_set_epi8(INT8_C( -54), INT8_C( 16), INT8_C( 24), INT8_C( -22), INT8_C( 61), INT8_C( 108), INT8_C( 61), INT8_C( 52), INT8_C(-116), INT8_C( -40), INT8_C( 94), INT8_C(-119), INT8_C( -35), INT8_C( -8), INT8_C( 32), INT8_C( -5), INT8_C( -65), INT8_C( 21), INT8_C( -98), INT8_C(-108), INT8_C( 33), INT8_C( -44), INT8_C( 105), INT8_C( -50), INT8_C( 63), INT8_C( -8), INT8_C( 33), INT8_C( 91), INT8_C( -25), INT8_C( 114), INT8_C( 75), INT8_C( 17)), simde_mm256_set_epi8(INT8_C( -88), INT8_C( -18), INT8_C( -33), INT8_C( 59), INT8_C( 62), INT8_C( -54), INT8_C( -64), INT8_C( 55), INT8_C( 106), INT8_C( -53), INT8_C( 118), INT8_C( 100), INT8_C( -51), INT8_C( 119), INT8_C( 124), INT8_C(-117), INT8_C( 10), INT8_C(-115), INT8_C( 59), INT8_C( 50), INT8_C( -2), INT8_C( 21), INT8_C( 66), INT8_C( 92), INT8_C(-122), INT8_C( -88), INT8_C( -10), INT8_C( -64), INT8_C( -80), INT8_C( -20), INT8_C( 46), INT8_C( -74)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( -3), INT8_C( 82), INT8_C( 97), INT8_C( 119), INT8_C( 112), INT8_C( 59), INT8_C( 95), INT8_C( 54), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-111), INT8_C( 0), INT8_C( -36), INT8_C( 0), INT8_C(-121), INT8_C( 0), INT8_C( 0), INT8_C( 12), INT8_C( 70), INT8_C( 0), INT8_C( 32), INT8_C( 0), INT8_C( 0), INT8_C( 31), INT8_C( 83), INT8_C( 0), INT8_C( 22), INT8_C( 0), INT8_C( 35)) }, { UINT32_C(4143594711), simde_mm256_set_epi8(INT8_C( -56), INT8_C( 41), INT8_C( 98), INT8_C( -20), INT8_C( 45), INT8_C( -36), INT8_C( -44), INT8_C( 43), INT8_C( 59), INT8_C( -52), INT8_C( 81), INT8_C(-111), INT8_C( 13), INT8_C( 79), INT8_C( -10), INT8_C( -28), INT8_C( 27), INT8_C( 109), INT8_C( -96), INT8_C( 29), INT8_C( -11), INT8_C( 6), INT8_C( -28), INT8_C( -2), INT8_C(-120), INT8_C( 13), INT8_C( -21), INT8_C(-118), INT8_C( 58), INT8_C( 122), INT8_C( 9), INT8_C( -90)), simde_mm256_set_epi8(INT8_C( -80), INT8_C( 23), INT8_C( 22), INT8_C( 112), INT8_C( -19), INT8_C( 5), INT8_C( 121), INT8_C(-111), INT8_C(-110), INT8_C( 102), INT8_C( -7), INT8_C( 1), INT8_C( -93), INT8_C( 15), INT8_C( 111), INT8_C(-121), INT8_C( -48), INT8_C(-113), INT8_C( 117), INT8_C( 42), INT8_C( -20), INT8_C( -35), INT8_C( 127), INT8_C( 40), INT8_C( -34), INT8_C( 91), INT8_C( -77), INT8_C( -48), INT8_C( -63), INT8_C( 115), INT8_C( 111), INT8_C( 25)), simde_mm256_set_epi8(INT8_C( -25), INT8_C( 121), INT8_C( 45), INT8_C( -43), INT8_C( 0), INT8_C(-127), INT8_C(-107), INT8_C( 0), INT8_C( 111), INT8_C( -67), INT8_C( -44), INT8_C(-111), INT8_C( 118), INT8_C( 0), INT8_C( 37), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -86), INT8_C( 95), INT8_C(-126), INT8_C( -8), INT8_C( 0), INT8_C( 0), INT8_C( -28), INT8_C( -62), INT8_C( 0), INT8_C( 110), INT8_C( 0), INT8_C( 31), INT8_C( 58), INT8_C( 111)) }, { UINT32_C(2889346475), simde_mm256_set_epi8(INT8_C( -3), INT8_C( 41), INT8_C( 18), INT8_C( 80), INT8_C( -91), INT8_C( -20), INT8_C( 113), INT8_C( 8), INT8_C( 78), INT8_C( -34), INT8_C( 125), INT8_C( 84), INT8_C( 101), INT8_C( -30), INT8_C( -57), INT8_C(-112), INT8_C( 22), INT8_C(-119), INT8_C( 6), INT8_C( 60), INT8_C( 125), INT8_C( -3), INT8_C( -85), INT8_C( 72), INT8_C( 113), INT8_C(-117), INT8_C( 15), INT8_C( -1), INT8_C( -32), INT8_C( 51), INT8_C( 36), INT8_C( 98)), simde_mm256_set_epi8(INT8_C( -87), INT8_C( 102), INT8_C( -12), INT8_C( -17), INT8_C( -14), INT8_C( 39), INT8_C( -22), INT8_C(-103), INT8_C( -72), INT8_C( 33), INT8_C( -49), INT8_C( -96), INT8_C( 65), INT8_C( -66), INT8_C( -3), INT8_C(-104), INT8_C( 2), INT8_C( -22), INT8_C( 13), INT8_C( -73), INT8_C( 103), INT8_C( 104), INT8_C( 98), INT8_C( 2), INT8_C( -43), INT8_C(-128), INT8_C( 33), INT8_C( -53), INT8_C( 34), INT8_C( -9), INT8_C( 66), INT8_C( 38)), simde_mm256_set_epi8(INT8_C( -72), INT8_C( 0), INT8_C( 42), INT8_C( 0), INT8_C( -63), INT8_C( 54), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 59), INT8_C( -51), INT8_C( 0), INT8_C( -79), INT8_C(-124), INT8_C( 109), INT8_C( 44), INT8_C( -16), INT8_C( 46), INT8_C( 0), INT8_C( 35), INT8_C( 4), INT8_C( 0), INT8_C(-112), INT8_C( 30), INT8_C( 0), INT8_C( -12), INT8_C( 0), INT8_C( -60), INT8_C( 0), INT8_C(-117), INT8_C( -93)) }, { UINT32_C(1359163250), simde_mm256_set_epi8(INT8_C( 13), INT8_C(-110), INT8_C( -86), INT8_C( 63), INT8_C( 24), INT8_C( -72), INT8_C( 79), INT8_C( 53), INT8_C( -69), INT8_C( 59), INT8_C( 10), INT8_C( -63), INT8_C( 4), INT8_C( -51), INT8_C( -2), INT8_C( -60), INT8_C(-103), INT8_C( 50), INT8_C( 98), INT8_C( 94), INT8_C( -99), INT8_C( 42), INT8_C( -66), INT8_C( -32), INT8_C( 23), INT8_C( 123), INT8_C( -82), INT8_C( -15), INT8_C( 59), INT8_C( 103), INT8_C( -4), INT8_C( -72)), simde_mm256_set_epi8(INT8_C( -38), INT8_C( 35), INT8_C( -53), INT8_C( 81), INT8_C(-120), INT8_C( -95), INT8_C( -95), INT8_C( -44), INT8_C( 40), INT8_C( 122), INT8_C( -72), INT8_C( -65), INT8_C( 39), INT8_C( 19), INT8_C( -27), INT8_C( 27), INT8_C( 75), INT8_C( -14), INT8_C( 124), INT8_C( -3), INT8_C( -83), INT8_C( 78), INT8_C( -51), INT8_C( -75), INT8_C( 59), INT8_C( -65), INT8_C( 73), INT8_C( 17), INT8_C(-109), INT8_C( -83), INT8_C( 62), INT8_C( 50)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 112), INT8_C( 0), INT8_C( -69), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-125), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -61), INT8_C( -39), INT8_C( 0), INT8_C( 0), INT8_C( -45), INT8_C( 0), INT8_C( -27), INT8_C( -39), INT8_C( 11), INT8_C( -84), INT8_C( 0), INT8_C( 78), INT8_C(-111), INT8_C( 120), INT8_C( 0), INT8_C( 0), INT8_C( 116), INT8_C( 0)) }, { UINT32_C(2859068752), simde_mm256_set_epi8(INT8_C( -47), INT8_C(-118), INT8_C( -29), INT8_C( 72), INT8_C(-124), INT8_C( -55), INT8_C( -53), INT8_C( 65), INT8_C( 23), INT8_C( 73), INT8_C( 28), INT8_C( -74), INT8_C( 83), INT8_C(-104), INT8_C( -52), INT8_C( 109), INT8_C( 75), INT8_C( 19), INT8_C(-105), INT8_C(-116), INT8_C( 68), INT8_C(-123), INT8_C( -79), INT8_C( 45), INT8_C( 38), INT8_C( 52), INT8_C( -74), INT8_C( 23), INT8_C( 41), INT8_C(-113), INT8_C(-106), INT8_C( 126)), simde_mm256_set_epi8(INT8_C(-114), INT8_C( -80), INT8_C( -55), INT8_C( 63), INT8_C( -38), INT8_C( 48), INT8_C( 56), INT8_C(-128), INT8_C( -96), INT8_C( -24), INT8_C( -83), INT8_C(-108), INT8_C( 54), INT8_C( 28), INT8_C( -3), INT8_C( 68), INT8_C( 11), INT8_C( -6), INT8_C( -89), INT8_C( 10), INT8_C( 46), INT8_C(-109), INT8_C( 46), INT8_C( -5), INT8_C( 89), INT8_C( 15), INT8_C( 89), INT8_C( 124), INT8_C( -68), INT8_C( -68), INT8_C( 62), INT8_C( 5)), simde_mm256_set_epi8(INT8_C(-115), INT8_C( 0), INT8_C( -93), INT8_C( 0), INT8_C( 61), INT8_C( 0), INT8_C( 14), INT8_C( 0), INT8_C( 0), INT8_C( -31), INT8_C( -93), INT8_C( 0), INT8_C( 56), INT8_C( 0), INT8_C( 0), INT8_C( -79), INT8_C( -77), INT8_C( 44), INT8_C( 90), INT8_C( 0), INT8_C( -51), INT8_C( 48), INT8_C( 0), INT8_C( -15), INT8_C( 0), INT8_C( 55), INT8_C( 0), INT8_C( -18), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { UINT32_C(2285590888), simde_mm256_set_epi8(INT8_C( -11), INT8_C( 65), INT8_C(-119), INT8_C( 51), INT8_C( -33), INT8_C( 126), INT8_C( -87), INT8_C( -81), INT8_C( -7), INT8_C(-105), INT8_C( -7), INT8_C( -24), INT8_C( -7), INT8_C( 43), INT8_C( 89), INT8_C( 116), INT8_C( 76), INT8_C( -69), INT8_C( -38), INT8_C( 109), INT8_C( 35), INT8_C( 30), INT8_C( 19), INT8_C( -43), INT8_C( 57), INT8_C( -65), INT8_C( -74), INT8_C( 55), INT8_C( -77), INT8_C( 10), INT8_C( -78), INT8_C( 126)), simde_mm256_set_epi8(INT8_C( 64), INT8_C( 118), INT8_C( 102), INT8_C( -9), INT8_C(-124), INT8_C( 6), INT8_C( -90), INT8_C( 117), INT8_C( -62), INT8_C( 60), INT8_C( 88), INT8_C( -59), INT8_C( 32), INT8_C( -2), INT8_C( 5), INT8_C( 91), INT8_C( -41), INT8_C( 74), INT8_C( -4), INT8_C( 18), INT8_C( 42), INT8_C( 125), INT8_C( -2), INT8_C( 93), INT8_C( -35), INT8_C( -70), INT8_C( 95), INT8_C( -50), INT8_C( -56), INT8_C( -79), INT8_C( 12), INT8_C( -53)), simde_mm256_set_epi8(INT8_C( 9), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -97), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -92), INT8_C( 125), INT8_C( 18), INT8_C( 0), INT8_C( 38), INT8_C( 20), INT8_C( 0), INT8_C( -51), INT8_C( 0), INT8_C( 80), INT8_C( 73), INT8_C( 0), INT8_C( 0), INT8_C( 112), INT8_C( 0), INT8_C( -51), INT8_C( 31), INT8_C( 0), INT8_C( -92), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { UINT32_C(2241497041), simde_mm256_set_epi8(INT8_C( 59), INT8_C( -12), INT8_C( 3), INT8_C( -7), INT8_C(-126), INT8_C( -43), INT8_C( -72), INT8_C( 85), INT8_C( -18), INT8_C( 26), INT8_C( 113), INT8_C( -76), INT8_C( -9), INT8_C( 83), INT8_C( 1), INT8_C( -60), INT8_C( -28), INT8_C( -90), INT8_C( 13), INT8_C( -79), INT8_C( -86), INT8_C( 33), INT8_C( -41), INT8_C( 104), INT8_C( -60), INT8_C( -11), INT8_C( 108), INT8_C( 4), INT8_C( 14), INT8_C( 12), INT8_C( 33), INT8_C( 63)), simde_mm256_set_epi8(INT8_C( 70), INT8_C( -70), INT8_C( -65), INT8_C( -40), INT8_C( -56), INT8_C( 122), INT8_C( 34), INT8_C( 73), INT8_C( 72), INT8_C( 107), INT8_C( -32), INT8_C( 89), INT8_C( 87), INT8_C(-114), INT8_C( -55), INT8_C( -35), INT8_C(-101), INT8_C( -75), INT8_C(-120), INT8_C( -18), INT8_C(-103), INT8_C( 8), INT8_C( -32), INT8_C( 18), INT8_C(-106), INT8_C( 119), INT8_C( -99), INT8_C( 10), INT8_C( -27), INT8_C( -28), INT8_C( 54), INT8_C( 13)), simde_mm256_set_epi8(INT8_C( -40), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-120), INT8_C( 0), INT8_C( 87), INT8_C( -94), INT8_C( 0), INT8_C( 0), INT8_C( 52), INT8_C(-109), INT8_C( 0), INT8_C( -55), INT8_C( 0), INT8_C( -34), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 19), INT8_C( 86), INT8_C( 10), INT8_C( -69), INT8_C( -76), INT8_C( 0), INT8_C( 40), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 32)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_maskz_gf2p8mul_epi8(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_maskz_gf2p8mul_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__mmask64 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { UINT64_C( 563128178629459346), simde_mm512_set_epi8(INT8_C( -18), INT8_C( 102), INT8_C( 52), INT8_C( -28), INT8_C( -18), INT8_C( 37), INT8_C(-101), INT8_C( -22), INT8_C( -18), INT8_C( -25), INT8_C( -40), INT8_C( 60), INT8_C( 37), INT8_C( 31), INT8_C( 13), INT8_C( -86), INT8_C( 118), INT8_C( 118), INT8_C( 10), INT8_C( 111), INT8_C( -70), INT8_C( -98), INT8_C( -17), INT8_C( 17), INT8_C( -87), INT8_C( 72), INT8_C( 94), INT8_C( -67), INT8_C( 111), INT8_C(-117), INT8_C( 87), INT8_C( -37), INT8_C( 72), INT8_C(-111), INT8_C( 127), INT8_C( 114), INT8_C( -14), INT8_C( 52), INT8_C( -67), INT8_C(-107), INT8_C( 33), INT8_C( -76), INT8_C( -61), INT8_C( -70), INT8_C( -47), INT8_C( -46), INT8_C( -17), INT8_C( -5), INT8_C( 36), INT8_C(-122), INT8_C( -2), INT8_C( 118), INT8_C(-112), INT8_C( 86), INT8_C( 8), INT8_C( 36), INT8_C( -62), INT8_C( 114), INT8_C(-111), INT8_C( 6), INT8_C( -62), INT8_C(-114), INT8_C( -28), INT8_C( -63)), simde_mm512_set_epi8(INT8_C( 20), INT8_C( 112), INT8_C( -65), INT8_C( 63), INT8_C( -38), INT8_C( -92), INT8_C( 35), INT8_C( -68), INT8_C( 24), INT8_C(-112), INT8_C( 117), INT8_C( 120), INT8_C( -74), INT8_C( 16), INT8_C( 14), INT8_C( 12), INT8_C( -90), INT8_C( 66), INT8_C( 100), INT8_C( 3), INT8_C( -62), INT8_C( -98), INT8_C( 63), INT8_C( 104), INT8_C( -1), INT8_C( -89), INT8_C( -83), INT8_C( -86), INT8_C( -3), INT8_C( -25), INT8_C( 115), INT8_C( -60), INT8_C(-112), INT8_C( -70), INT8_C( 46), INT8_C( 98), INT8_C( -8), INT8_C( 89), INT8_C( 75), INT8_C( 90), INT8_C( -71), INT8_C( -73), INT8_C( 118), INT8_C( 84), INT8_C( -47), INT8_C( -14), INT8_C(-128), INT8_C( -85), INT8_C( 101), INT8_C( -88), INT8_C( -62), INT8_C( -52), INT8_C( 117), INT8_C(-103), INT8_C( 102), INT8_C( -43), INT8_C(-120), INT8_C( 111), INT8_C( -3), INT8_C( 122), INT8_C( -66), INT8_C( -65), INT8_C( -88), INT8_C( 107)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 69), INT8_C( 80), INT8_C( -7), INT8_C( 83), INT8_C( 35), INT8_C( 0), INT8_C( 78), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 126), INT8_C( 0), INT8_C( -59), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 20), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-121), INT8_C( -63), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 113), INT8_C( 0), INT8_C( 0), INT8_C( 108), INT8_C( 0), INT8_C( -41), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 13), INT8_C( -60), INT8_C( 43), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-101), INT8_C( 0), INT8_C( -3), INT8_C( -55), INT8_C(-103), INT8_C( 127), INT8_C( 0), INT8_C(-100), INT8_C(-122), INT8_C( 0), INT8_C( 0), INT8_C( 7), INT8_C( 0), INT8_C( 0), INT8_C( -76), INT8_C( 0)) }, { UINT64_C( 2181765975377201771), simde_mm512_set_epi8(INT8_C( -72), INT8_C( -88), INT8_C( -65), INT8_C( 59), INT8_C( 13), INT8_C( 91), INT8_C( -20), INT8_C( -40), INT8_C( -98), INT8_C( -43), INT8_C( 79), INT8_C( 111), INT8_C( 73), INT8_C( -73), INT8_C( 96), INT8_C( 51), INT8_C( -65), INT8_C( -70), INT8_C( 8), INT8_C( 49), INT8_C( 105), INT8_C( -14), INT8_C(-119), INT8_C( 105), INT8_C( 6), INT8_C( 79), INT8_C( -38), INT8_C( 73), INT8_C(-120), INT8_C( -3), INT8_C( 91), INT8_C( 88), INT8_C( -77), INT8_C( -81), INT8_C( -18), INT8_C( 72), INT8_C( -96), INT8_C( 15), INT8_C( -73), INT8_C( -59), INT8_C( -2), INT8_C( -52), INT8_C( -45), INT8_C(-122), INT8_C(-110), INT8_C( 47), INT8_C( 73), INT8_C( 50), INT8_C( 44), INT8_C( 40), INT8_C( 95), INT8_C( 89), INT8_C( -46), INT8_C( 90), INT8_C(-119), INT8_C( 106), INT8_C(-117), INT8_C( -78), INT8_C( -91), INT8_C( 30), INT8_C( 102), INT8_C(-102), INT8_C( 93), INT8_C( 8)), simde_mm512_set_epi8(INT8_C( 98), INT8_C( -34), INT8_C( 92), INT8_C( 106), INT8_C( -99), INT8_C( -55), INT8_C( 79), INT8_C( -90), INT8_C( -71), INT8_C( 74), INT8_C(-102), INT8_C( 119), INT8_C( -96), INT8_C( 34), INT8_C( 78), INT8_C( 73), INT8_C( 57), INT8_C( -86), INT8_C( -53), INT8_C( 60), INT8_C( -46), INT8_C( 9), INT8_C( 58), INT8_C( -42), INT8_C( -8), INT8_C( 70), INT8_C(-125), INT8_C( -42), INT8_C( 80), INT8_C( 123), INT8_C( 112), INT8_C( 80), INT8_C( -4), INT8_C( 54), INT8_C( 48), INT8_C( 2), INT8_C( -2), INT8_C(-106), INT8_C( 35), INT8_C( -73), INT8_C( 109), INT8_C(-128), INT8_C( 40), INT8_C( -16), INT8_C( 58), INT8_C(-108), INT8_C(-128), INT8_C( -80), INT8_C( -38), INT8_C( 49), INT8_C( 15), INT8_C(-105), INT8_C( 26), INT8_C( 1), INT8_C( 70), INT8_C( -26), INT8_C( -39), INT8_C( -82), INT8_C( -94), INT8_C( 41), INT8_C( -39), INT8_C( -70), INT8_C( -64), INT8_C( 69)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -42), INT8_C( 91), INT8_C( -26), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C(-110), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 100), INT8_C( 5), INT8_C( -60), INT8_C( 0), INT8_C( 0), INT8_C( 2), INT8_C( 16), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 38), INT8_C( 0), INT8_C( 0), INT8_C( 45), INT8_C( 35), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -19), INT8_C( 0), INT8_C(-112), INT8_C( 23), INT8_C( 0), INT8_C( 0), INT8_C( 59), INT8_C( 0), INT8_C(-115), INT8_C( -25), INT8_C( 0), INT8_C( 0), INT8_C( 16), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -23), INT8_C( 0), INT8_C( 0), INT8_C( -6), INT8_C( 0), INT8_C( 32), INT8_C( 0), INT8_C( 0), INT8_C( 37), INT8_C( -79), INT8_C( 0), INT8_C( 98), INT8_C( 0), INT8_C( -27), INT8_C( 30)) }, { UINT64_C(10345383608333278604), simde_mm512_set_epi8(INT8_C( -16), INT8_C(-104), INT8_C( 52), INT8_C( -18), INT8_C( 70), INT8_C( 121), INT8_C( 105), INT8_C(-111), INT8_C(-105), INT8_C( 24), INT8_C(-109), INT8_C( 107), INT8_C( -55), INT8_C( -18), INT8_C( 61), INT8_C( 62), INT8_C( 80), INT8_C( -60), INT8_C(-118), INT8_C( 49), INT8_C( -65), INT8_C( -46), INT8_C( -50), INT8_C( -7), INT8_C( -13), INT8_C( 7), INT8_C( 42), INT8_C( 64), INT8_C( 71), INT8_C( -32), INT8_C(-113), INT8_C( 118), INT8_C( 68), INT8_C( -74), INT8_C( 37), INT8_C( -72), INT8_C(-103), INT8_C( 12), INT8_C( 101), INT8_C( -17), INT8_C( 110), INT8_C( 57), INT8_C(-120), INT8_C( 103), INT8_C( 9), INT8_C( 66), INT8_C( -12), INT8_C( 117), INT8_C( 9), INT8_C( 24), INT8_C( 41), INT8_C( 102), INT8_C( 28), INT8_C( 72), INT8_C( 46), INT8_C( 91), INT8_C( -93), INT8_C( -58), INT8_C(-105), INT8_C(-111), INT8_C(-104), INT8_C( 20), INT8_C( -29), INT8_C( 92)), simde_mm512_set_epi8(INT8_C(-108), INT8_C( 121), INT8_C( 26), INT8_C( 74), INT8_C( 101), INT8_C( 96), INT8_C( -96), INT8_C( 102), INT8_C( -3), INT8_C( 34), INT8_C(-128), INT8_C( 1), INT8_C( -84), INT8_C( 34), INT8_C(-107), INT8_C( -40), INT8_C( -85), INT8_C( -12), INT8_C( -19), INT8_C( 68), INT8_C( 109), INT8_C( 112), INT8_C( -23), INT8_C( 53), INT8_C( 52), INT8_C( 15), INT8_C( -24), INT8_C( -97), INT8_C( 52), INT8_C( -11), INT8_C( -32), INT8_C( -69), INT8_C(-108), INT8_C( -89), INT8_C( 2), INT8_C( -73), INT8_C( 16), INT8_C( 33), INT8_C( 25), INT8_C( -54), INT8_C( 44), INT8_C( -79), INT8_C( 89), INT8_C( 43), INT8_C( 76), INT8_C( 64), INT8_C(-106), INT8_C( 24), INT8_C(-114), INT8_C( 120), INT8_C( 81), INT8_C(-120), INT8_C( 9), INT8_C( 118), INT8_C( 115), INT8_C( 125), INT8_C( 95), INT8_C( 78), INT8_C( 20), INT8_C( 120), INT8_C( 108), INT8_C( -55), INT8_C( 0), INT8_C( 49)), simde_mm512_set_epi8(INT8_C( -48), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 109), INT8_C( -48), INT8_C(-123), INT8_C(-105), INT8_C( -14), INT8_C( 0), INT8_C( 0), INT8_C( 107), INT8_C( 0), INT8_C( 0), INT8_C( -99), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 78), INT8_C( 0), INT8_C( -28), INT8_C(-128), INT8_C(-111), INT8_C( 43), INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( -52), INT8_C( 0), INT8_C(-110), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -38), INT8_C( 0), INT8_C( 31), INT8_C( 83), INT8_C(-105), INT8_C( 106), INT8_C(-110), INT8_C(-111), INT8_C( 122), INT8_C( 0), INT8_C( -24), INT8_C( 26), INT8_C( 0), INT8_C( 69), INT8_C( 0), INT8_C(-110), INT8_C( 0), INT8_C( 0), INT8_C( -42), INT8_C( -4), INT8_C( 25), INT8_C( 0), INT8_C( -95), INT8_C( -96), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 107), INT8_C( 45), INT8_C( 0), INT8_C( 0)) }, { UINT64_C( 8690992657061241782), simde_mm512_set_epi8(INT8_C( 108), INT8_C( -46), INT8_C( -39), INT8_C( 38), INT8_C( 25), INT8_C( -15), INT8_C( -31), INT8_C( 123), INT8_C( -79), INT8_C( 58), INT8_C( 29), INT8_C( -40), INT8_C( 58), INT8_C( -58), INT8_C( 101), INT8_C( -19), INT8_C( -10), INT8_C( -73), INT8_C(-128), INT8_C( -74), INT8_C( -14), INT8_C( 95), INT8_C( 63), INT8_C( -17), INT8_C( -47), INT8_C( 19), INT8_C( -5), INT8_C( -48), INT8_C(-125), INT8_C( -6), INT8_C( 73), INT8_C( 9), INT8_C( 107), INT8_C( 124), INT8_C( 5), INT8_C( -88), INT8_C( 49), INT8_C( 112), INT8_C( -22), INT8_C( 107), INT8_C( 32), INT8_C(-108), INT8_C( 54), INT8_C(-109), INT8_C( 100), INT8_C( 59), INT8_C( -18), INT8_C( -24), INT8_C(-102), INT8_C( 122), INT8_C( 8), INT8_C( -47), INT8_C( 124), INT8_C( 30), INT8_C( -80), INT8_C( 14), INT8_C( -74), INT8_C( 37), INT8_C( -30), INT8_C( -99), INT8_C( -11), INT8_C( 8), INT8_C( 44), INT8_C( -78)), simde_mm512_set_epi8(INT8_C( -14), INT8_C( -68), INT8_C(-119), INT8_C( -42), INT8_C( -49), INT8_C(-118), INT8_C( -65), INT8_C( -79), INT8_C( -54), INT8_C( 103), INT8_C( 16), INT8_C( 75), INT8_C( 9), INT8_C( -19), INT8_C( 116), INT8_C(-100), INT8_C(-118), INT8_C( -38), INT8_C( -33), INT8_C( 20), INT8_C( -91), INT8_C( 3), INT8_C( -9), INT8_C(-109), INT8_C( -49), INT8_C( 87), INT8_C( 38), INT8_C( 52), INT8_C( -86), INT8_C( 11), INT8_C(-111), INT8_C( 41), INT8_C( -51), INT8_C( 101), INT8_C( -93), INT8_C( -14), INT8_C( 16), INT8_C( -59), INT8_C( 17), INT8_C( 83), INT8_C( 68), INT8_C( 119), INT8_C( -56), INT8_C( 85), INT8_C( 3), INT8_C( 25), INT8_C( 87), INT8_C( 34), INT8_C( -18), INT8_C( -1), INT8_C( 16), INT8_C( -68), INT8_C(-125), INT8_C( -4), INT8_C( -85), INT8_C( -94), INT8_C( -56), INT8_C(-108), INT8_C( -53), INT8_C( -32), INT8_C( 19), INT8_C( 29), INT8_C(-121), INT8_C( 55)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 49), INT8_C( -88), INT8_C( 71), INT8_C( -87), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 86), INT8_C( 0), INT8_C( 0), INT8_C( 85), INT8_C( -15), INT8_C( -83), INT8_C( 0), INT8_C( 0), INT8_C(-113), INT8_C( 0), INT8_C( 0), INT8_C( 123), INT8_C( -63), INT8_C( 0), INT8_C( 42), INT8_C( -27), INT8_C(-106), INT8_C( -2), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-124), INT8_C( 20), INT8_C( 0), INT8_C( 0), INT8_C( -68), INT8_C( 0), INT8_C( 7), INT8_C( 0), INT8_C(-118), INT8_C( 0), INT8_C( -46), INT8_C( 88), INT8_C( 0), INT8_C( -38), INT8_C( 0), INT8_C( -84), INT8_C( 0), INT8_C( 0), INT8_C( -49), INT8_C( -87), INT8_C( 88), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 36), INT8_C(-122), INT8_C( 97), INT8_C( 0), INT8_C( -75), INT8_C( -14), INT8_C( 0), INT8_C( -24), INT8_C( 53), INT8_C( 0)) }, { UINT64_C( 524532266746496782), simde_mm512_set_epi8(INT8_C(-114), INT8_C( 83), INT8_C( 4), INT8_C( 120), INT8_C(-113), INT8_C( -9), INT8_C(-110), INT8_C( 26), INT8_C( -55), INT8_C(-124), INT8_C( 92), INT8_C( -58), INT8_C( -61), INT8_C( -48), INT8_C( 87), INT8_C(-111), INT8_C(-125), INT8_C( -44), INT8_C( -30), INT8_C( 87), INT8_C( -96), INT8_C( -49), INT8_C(-116), INT8_C( -48), INT8_C( 73), INT8_C( -23), INT8_C( 32), INT8_C( -54), INT8_C( -76), INT8_C( -15), INT8_C( 122), INT8_C( -84), INT8_C( 95), INT8_C(-105), INT8_C( 21), INT8_C( 44), INT8_C( 115), INT8_C( -40), INT8_C( 21), INT8_C(-128), INT8_C( -83), INT8_C( 22), INT8_C( -84), INT8_C( -21), INT8_C( -15), INT8_C( 15), INT8_C( -1), INT8_C( -79), INT8_C( 97), INT8_C( 116), INT8_C( -66), INT8_C( 17), INT8_C( 39), INT8_C( -92), INT8_C( 46), INT8_C( -87), INT8_C( 114), INT8_C( 77), INT8_C( 119), INT8_C( -79), INT8_C( 99), INT8_C( -92), INT8_C( -13), INT8_C( -32)), simde_mm512_set_epi8(INT8_C( 99), INT8_C( 77), INT8_C( -25), INT8_C( -30), INT8_C( 118), INT8_C( -65), INT8_C( 57), INT8_C( 84), INT8_C( 75), INT8_C( -7), INT8_C( 13), INT8_C( 86), INT8_C( 51), INT8_C( -2), INT8_C( 37), INT8_C( 103), INT8_C( -36), INT8_C( 90), INT8_C( -86), INT8_C( -68), INT8_C( 118), INT8_C( 76), INT8_C( -81), INT8_C( -69), INT8_C( -18), INT8_C( -3), INT8_C( -24), INT8_C( 44), INT8_C( 49), INT8_C( -57), INT8_C( 3), INT8_C( -46), INT8_C( -4), INT8_C(-105), INT8_C( -32), INT8_C( -12), INT8_C(-108), INT8_C( 10), INT8_C( 65), INT8_C( -56), INT8_C( -22), INT8_C(-125), INT8_C( 27), INT8_C( 91), INT8_C(-121), INT8_C( 13), INT8_C( 3), INT8_C( 85), INT8_C( -67), INT8_C( 62), INT8_C( 9), INT8_C( 59), INT8_C( -12), INT8_C(-126), INT8_C( 89), INT8_C( 92), INT8_C( 27), INT8_C( 109), INT8_C( 81), INT8_C( 124), INT8_C( -26), INT8_C(-124), INT8_C( 97), INT8_C( -76)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 35), INT8_C( 80), INT8_C( 9), INT8_C( 0), INT8_C(-127), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 99), INT8_C( 30), INT8_C( 6), INT8_C( 42), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 117), INT8_C( 84), INT8_C( 0), INT8_C(-121), INT8_C( 0), INT8_C( 39), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -66), INT8_C( 0), INT8_C(-108), INT8_C( 0), INT8_C( 0), INT8_C( 3), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 49), INT8_C( 0), INT8_C( -42), INT8_C( 0), INT8_C( 0), INT8_C( 75), INT8_C( 26), INT8_C( 31), INT8_C( 99), INT8_C( -83), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 84), INT8_C( -26), INT8_C( 48), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 75), INT8_C( -95), INT8_C( 40), INT8_C( 0)) }, { UINT64_C(11088747391775221819), simde_mm512_set_epi8(INT8_C( 18), INT8_C( 68), INT8_C( -36), INT8_C( 40), INT8_C( -7), INT8_C( 57), INT8_C( -45), INT8_C( -65), INT8_C( 90), INT8_C( -9), INT8_C( -50), INT8_C(-111), INT8_C( -42), INT8_C( 56), INT8_C( 101), INT8_C( -20), INT8_C( 109), INT8_C( 99), INT8_C( 45), INT8_C( -70), INT8_C( 109), INT8_C( 24), INT8_C( -93), INT8_C( 76), INT8_C( -69), INT8_C( 30), INT8_C( 82), INT8_C( 53), INT8_C( 109), INT8_C(-114), INT8_C( 13), INT8_C( 106), INT8_C( -49), INT8_C( -1), INT8_C( 83), INT8_C( 64), INT8_C( -87), INT8_C( 113), INT8_C(-107), INT8_C(-124), INT8_C( -84), INT8_C( -86), INT8_C( -69), INT8_C( 17), INT8_C( 21), INT8_C( 0), INT8_C(-112), INT8_C(-124), INT8_C( -36), INT8_C( 67), INT8_C( -3), INT8_C( -91), INT8_C( -58), INT8_C( 40), INT8_C( -55), INT8_C( -7), INT8_C( -26), INT8_C( 33), INT8_C( -53), INT8_C(-109), INT8_C( -74), INT8_C( -52), INT8_C( 35), INT8_C( 74)), simde_mm512_set_epi8(INT8_C( -26), INT8_C( -16), INT8_C( -61), INT8_C( 8), INT8_C( 123), INT8_C( -86), INT8_C( 101), INT8_C( 111), INT8_C( -22), INT8_C( 66), INT8_C( -59), INT8_C( 41), INT8_C( -88), INT8_C( 9), INT8_C( -2), INT8_C( 40), INT8_C( 16), INT8_C( 2), INT8_C( -72), INT8_C( 92), INT8_C( 5), INT8_C( -1), INT8_C( 68), INT8_C( 57), INT8_C( 119), INT8_C( -69), INT8_C( 106), INT8_C( 93), INT8_C( 103), INT8_C( -78), INT8_C( -70), INT8_C( -37), INT8_C( 86), INT8_C( 88), INT8_C( -70), INT8_C( 62), INT8_C( -82), INT8_C( -3), INT8_C( 127), INT8_C( -76), INT8_C(-120), INT8_C( 11), INT8_C( -6), INT8_C( 106), INT8_C( -38), INT8_C(-103), INT8_C( -23), INT8_C( -21), INT8_C( -37), INT8_C( 33), INT8_C( -5), INT8_C( -21), INT8_C(-105), INT8_C( -10), INT8_C( -36), INT8_C(-101), INT8_C( -57), INT8_C( -56), INT8_C( -24), INT8_C( 100), INT8_C( -47), INT8_C( 113), INT8_C( -88), INT8_C( -75)), simde_mm512_set_epi8(INT8_C( 53), INT8_C( 0), INT8_C( 0), INT8_C( 91), INT8_C( -90), INT8_C( 0), INT8_C( 0), INT8_C(-127), INT8_C( 116), INT8_C( 124), INT8_C( 6), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 38), INT8_C( -91), INT8_C( 0), INT8_C( 0), INT8_C( 51), INT8_C( 0), INT8_C( 0), INT8_C( -48), INT8_C( 0), INT8_C( -11), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -37), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -59), INT8_C( 0), INT8_C( -42), INT8_C( 0), INT8_C( 124), INT8_C( -62), INT8_C( 0), INT8_C( 0), INT8_C(-112), INT8_C( 0), INT8_C( 0), INT8_C( 10), INT8_C( 0), INT8_C( 0), INT8_C( 47), INT8_C( 0), INT8_C( 74), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 58), INT8_C( 102), INT8_C(-123), INT8_C( 0), INT8_C( 63), INT8_C( 12)) }, { UINT64_C( 8965231002922847986), simde_mm512_set_epi8(INT8_C( 41), INT8_C( -56), INT8_C( -91), INT8_C( 45), INT8_C( 74), INT8_C(-122), INT8_C( 117), INT8_C( -40), INT8_C( -25), INT8_C( -60), INT8_C( -35), INT8_C( 54), INT8_C( -44), INT8_C( -13), INT8_C( -32), INT8_C( 25), INT8_C( -25), INT8_C( -95), INT8_C( -11), INT8_C( 75), INT8_C( -45), INT8_C( -97), INT8_C(-128), INT8_C( 103), INT8_C( -59), INT8_C( -93), INT8_C( -10), INT8_C( -86), INT8_C(-109), INT8_C( 100), INT8_C( -78), INT8_C( 46), INT8_C( 68), INT8_C(-102), INT8_C(-120), INT8_C( 114), INT8_C(-115), INT8_C( -50), INT8_C( -78), INT8_C( -74), INT8_C( 15), INT8_C( 121), INT8_C(-126), INT8_C( 49), INT8_C(-101), INT8_C( 86), INT8_C( -39), INT8_C( -38), INT8_C( 88), INT8_C( 114), INT8_C( 112), INT8_C( 44), INT8_C( 69), INT8_C( -15), INT8_C( -95), INT8_C( -87), INT8_C( 66), INT8_C( 105), INT8_C( -16), INT8_C( -88), INT8_C( -15), INT8_C( -82), INT8_C( 102), INT8_C( 10)), simde_mm512_set_epi8(INT8_C( 7), INT8_C( -68), INT8_C(-100), INT8_C( -35), INT8_C( 107), INT8_C( 69), INT8_C( -57), INT8_C( 95), INT8_C( 88), INT8_C( -70), INT8_C(-117), INT8_C( 33), INT8_C(-104), INT8_C( -98), INT8_C( 31), INT8_C( -37), INT8_C(-115), INT8_C( 11), INT8_C( 44), INT8_C( 74), INT8_C( -24), INT8_C( 49), INT8_C( -72), INT8_C( -55), INT8_C( -44), INT8_C( -40), INT8_C( 16), INT8_C( -5), INT8_C( 8), INT8_C( 25), INT8_C( 46), INT8_C( 95), INT8_C( -76), INT8_C(-109), INT8_C( 22), INT8_C( -82), INT8_C( 81), INT8_C( -64), INT8_C( 26), INT8_C( -23), INT8_C( 89), INT8_C( 25), INT8_C( -6), INT8_C(-121), INT8_C( 17), INT8_C( -11), INT8_C( 3), INT8_C( 7), INT8_C( 119), INT8_C( -5), INT8_C( 23), INT8_C( -59), INT8_C( -12), INT8_C( 76), INT8_C( -47), INT8_C( 85), INT8_C( 94), INT8_C(-125), INT8_C( -3), INT8_C( -36), INT8_C( 97), INT8_C( -2), INT8_C( 33), INT8_C( 108)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( -16), INT8_C( -60), INT8_C( -56), INT8_C( 38), INT8_C( 126), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 15), INT8_C( 27), INT8_C( 0), INT8_C( 44), INT8_C( 0), INT8_C( 85), INT8_C( 0), INT8_C( -2), INT8_C(-121), INT8_C(-103), INT8_C( 0), INT8_C( 0), INT8_C( -54), INT8_C(-123), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 117), INT8_C( -12), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 4), INT8_C( -29), INT8_C( 69), INT8_C( 0), INT8_C( -91), INT8_C( 0), INT8_C( 77), INT8_C( 0), INT8_C( 106), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -24), INT8_C( 0), INT8_C( 0), INT8_C( 48), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 64), INT8_C( 50), INT8_C( 59), INT8_C( 0), INT8_C( -42), INT8_C( -79), INT8_C(-102), INT8_C( 49), INT8_C( 0), INT8_C( 0), INT8_C( 18), INT8_C( 0)) }, { UINT64_C(12281436803553867002), simde_mm512_set_epi8(INT8_C(-106), INT8_C( -68), INT8_C( 76), INT8_C( -42), INT8_C( -94), INT8_C( -89), INT8_C(-100), INT8_C( 77), INT8_C( 115), INT8_C( 103), INT8_C( 102), INT8_C( -67), INT8_C( 110), INT8_C( -50), INT8_C( -7), INT8_C( -51), INT8_C( 62), INT8_C( 120), INT8_C( 63), INT8_C( -65), INT8_C( -72), INT8_C( 111), INT8_C( -11), INT8_C( -35), INT8_C( 67), INT8_C( -48), INT8_C( -29), INT8_C( -80), INT8_C( 107), INT8_C( -37), INT8_C( 38), INT8_C( -54), INT8_C( 34), INT8_C( 80), INT8_C( -47), INT8_C( 84), INT8_C( 76), INT8_C(-111), INT8_C( 126), INT8_C( 94), INT8_C( 105), INT8_C( 103), INT8_C( 19), INT8_C( -95), INT8_C( 110), INT8_C( 48), INT8_C( 58), INT8_C( -16), INT8_C( 64), INT8_C( 97), INT8_C( 74), INT8_C( 117), INT8_C(-108), INT8_C( -89), INT8_C( 30), INT8_C( 8), INT8_C( -15), INT8_C( -36), INT8_C( -53), INT8_C( -86), INT8_C( 115), INT8_C( 102), INT8_C( 23), INT8_C( 50)), simde_mm512_set_epi8(INT8_C( 113), INT8_C( 57), INT8_C( -7), INT8_C( -33), INT8_C( -50), INT8_C( 52), INT8_C( -77), INT8_C( 103), INT8_C( 96), INT8_C( 14), INT8_C( 62), INT8_C(-117), INT8_C( 25), INT8_C( 126), INT8_C( 71), INT8_C( -60), INT8_C( 92), INT8_C( -34), INT8_C(-107), INT8_C( -83), INT8_C(-117), INT8_C(-107), INT8_C( 48), INT8_C( 123), INT8_C( -3), INT8_C( 100), INT8_C( -20), INT8_C( 48), INT8_C( -43), INT8_C( 120), INT8_C( 88), INT8_C( -88), INT8_C( 46), INT8_C( 112), INT8_C( -72), INT8_C( -77), INT8_C( 11), INT8_C( 21), INT8_C( 18), INT8_C( 38), INT8_C( 121), INT8_C( 83), INT8_C( 92), INT8_C( 66), INT8_C( 84), INT8_C( 8), INT8_C( -4), INT8_C(-110), INT8_C( -81), INT8_C( 108), INT8_C( -16), INT8_C( 70), INT8_C( -25), INT8_C( 27), INT8_C( 14), INT8_C( 59), INT8_C( 104), INT8_C( 34), INT8_C( 111), INT8_C( 122), INT8_C( -81), INT8_C( -30), INT8_C( 115), INT8_C( -84)), simde_mm512_set_epi8(INT8_C( -46), INT8_C( 0), INT8_C( 100), INT8_C( 0), INT8_C( -71), INT8_C( 0), INT8_C( -94), INT8_C( 0), INT8_C( 0), INT8_C( 92), INT8_C( 28), INT8_C( 112), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 92), INT8_C( -84), INT8_C( 0), INT8_C( -31), INT8_C( 69), INT8_C( 0), INT8_C( -88), INT8_C( 35), INT8_C( 0), INT8_C( -98), INT8_C( 4), INT8_C( 0), INT8_C( 0), INT8_C( 62), INT8_C(-126), INT8_C( -21), INT8_C( 94), INT8_C(-121), INT8_C( 6), INT8_C( 0), INT8_C( 48), INT8_C( 93), INT8_C( -22), INT8_C( 0), INT8_C( 0), INT8_C( 83), INT8_C(-116), INT8_C( -57), INT8_C(-101), INT8_C( 0), INT8_C( 0), INT8_C( 120), INT8_C( 0), INT8_C( 104), INT8_C( 97), INT8_C( 0), INT8_C( 35), INT8_C( 0), INT8_C( 0), INT8_C( -46), INT8_C( 125), INT8_C( 93), INT8_C(-103), INT8_C( -80), INT8_C( 0), INT8_C( 51), INT8_C( 0)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_maskz_gf2p8mul_epi8(test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm_gf2p8affine_epi64_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_gf2p8affine_epi64_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_gf2p8affine_epi64_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_gf2p8affine_epi64_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_gf2p8affine_epi64_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_gf2p8affine_epi64_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_gf2p8affine_epi64_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_gf2p8affine_epi64_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_gf2p8affine_epi64_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_gf2p8affineinv_epi64_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_gf2p8affineinv_epi64_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_gf2p8affineinv_epi64_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_gf2p8affineinv_epi64_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_gf2p8affineinv_epi64_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_gf2p8affineinv_epi64_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_gf2p8affineinv_epi64_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_gf2p8affineinv_epi64_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_gf2p8affineinv_epi64_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_gf2p8mul_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_gf2p8mul_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_gf2p8mul_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mask_gf2p8mul_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_mask_gf2p8mul_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_gf2p8mul_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskz_gf2p8mul_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_maskz_gf2p8mul_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_maskz_gf2p8mul_epi8) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/meson.build000066400000000000000000000022651400333146700163750ustar00rootroot00000000000000simde_test_x86_tests = [ 'mmx', 'sse', 'sse2', 'sse3', 'ssse3', 'sse4.1', 'sse4.2', 'avx', 'avx2', 'fma', 'gfni', 'clmul', 'svml', 'xop' ] subdir('avx512') simde_test_x86_sources = [] simde_test_x86_libs = [] foreach name : simde_test_x86_tests foreach lang : ['c', 'cpp'] source_file = name + '.c' if lang == 'cpp' source_file = configure_file(input: name + '.c', output: name + '.cpp', copy: true) endif simde_test_x86_sources += source_file foreach emul : ['emul', 'native'] extra_flags = ['-DSIMDE_TEST_BARE'] if emul == 'emul' extra_flags += '-DSIMDE_NO_NATIVE' endif x = executable(name + '-' + emul + '-' + lang, source_file, c_args: simde_c_args + simde_c_defs + simde_native_c_flags + extra_flags, cpp_args: simde_c_args + simde_c_defs + simde_native_c_flags + extra_flags, include_directories: simde_include_dir, dependencies: simde_deps) test('x86/' + name + '/' + emul + '/' + lang, x, protocol: 'tap', # Emscripten tests must be run from builddir workdir: meson.current_build_dir()) endforeach endforeach endforeach simde-0.7.2/test/x86/mmx.c000066400000000000000000010454541400333146700152100ustar00rootroot00000000000000/* Copyright (c) 2017 Evan Nemerson * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define SIMDE_TESTS_CURRENT_ISAX mmx #include static int test_simde_mm_set1_pi8(SIMDE_MUNIT_TEST_ARGS) { const struct { int8_t a; simde__m64 r; } test_vec[8] = { { INT8_C( -16), simde_mm_set_pi8(INT8_C( -16), INT8_C( -16), INT8_C( -16), INT8_C( -16), INT8_C( -16), INT8_C( -16), INT8_C( -16), INT8_C( -16)) }, { INT8_C(-120), simde_mm_set_pi8(INT8_C(-120), INT8_C(-120), INT8_C(-120), INT8_C(-120), INT8_C(-120), INT8_C(-120), INT8_C(-120), INT8_C(-120)) }, { INT8_C( 86), simde_mm_set_pi8(INT8_C( 86), INT8_C( 86), INT8_C( 86), INT8_C( 86), INT8_C( 86), INT8_C( 86), INT8_C( 86), INT8_C( 86)) }, { INT8_C( -12), simde_mm_set_pi8(INT8_C( -12), INT8_C( -12), INT8_C( -12), INT8_C( -12), INT8_C( -12), INT8_C( -12), INT8_C( -12), INT8_C( -12)) }, { INT8_C( 3), simde_mm_set_pi8(INT8_C( 3), INT8_C( 3), INT8_C( 3), INT8_C( 3), INT8_C( 3), INT8_C( 3), INT8_C( 3), INT8_C( 3)) }, { INT8_C( 25), simde_mm_set_pi8(INT8_C( 25), INT8_C( 25), INT8_C( 25), INT8_C( 25), INT8_C( 25), INT8_C( 25), INT8_C( 25), INT8_C( 25)) }, { INT8_C( 40), simde_mm_set_pi8(INT8_C( 40), INT8_C( 40), INT8_C( 40), INT8_C( 40), INT8_C( 40), INT8_C( 40), INT8_C( 40), INT8_C( 40)) }, { INT8_C( -12), simde_mm_set_pi8(INT8_C( -12), INT8_C( -12), INT8_C( -12), INT8_C( -12), INT8_C( -12), INT8_C( -12), INT8_C( -12), INT8_C( -12)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_set1_pi8(test_vec[i].a); simde_test_x86_assert_equal_i8x8(r, test_vec[i].r); } return 0; } static int test_simde_mm_set1_pi16(SIMDE_MUNIT_TEST_ARGS) { int16_t v; simde_test_codegen_random_memory(sizeof(v), HEDLEY_REINTERPRET_CAST(uint8_t*, &v)); simde__m64 x = simde_mm_set1_pi16(v); int16_t* r = HEDLEY_REINTERPRET_CAST(int16_t*, &x); simde_mm_empty(); simde_assert_equal_i16(r[0], v); simde_assert_equal_i16(r[1], v); simde_assert_equal_i16(r[2], v); simde_assert_equal_i16(r[3], v); return 0; } static int test_simde_mm_set1_pi32(SIMDE_MUNIT_TEST_ARGS) { int32_t v; simde_test_codegen_random_memory(sizeof(v), HEDLEY_REINTERPRET_CAST(uint8_t*, &v)); simde__m64 x = simde_mm_set1_pi32(v); int32_t* r = HEDLEY_REINTERPRET_CAST(int32_t*, &x); simde_mm_empty(); simde_assert_equal_i32(r[0], v); simde_assert_equal_i32(r[1], v); return 0; } static int test_simde_mm_setr_pi8(SIMDE_MUNIT_TEST_ARGS) { int8_t d[8 / sizeof(int8_t)]; simde_test_codegen_random_memory(sizeof(d), HEDLEY_REINTERPRET_CAST(uint8_t*, d)); simde__m64 x = simde_mm_setr_pi8(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7]); int8_t* c = HEDLEY_REINTERPRET_CAST(int8_t*, &x); simde_mm_empty(); simde_assert_equal_i8(c[0], d[0]); simde_assert_equal_i8(c[1], d[1]); simde_assert_equal_i8(c[2], d[2]); simde_assert_equal_i8(c[3], d[3]); simde_assert_equal_i8(c[4], d[4]); simde_assert_equal_i8(c[5], d[5]); simde_assert_equal_i8(c[6], d[6]); simde_assert_equal_i8(c[7], d[7]); return 0; } static int test_simde_mm_setr_pi16(SIMDE_MUNIT_TEST_ARGS) { int16_t d[8 / sizeof(int16_t)]; simde_test_codegen_random_memory(sizeof(d), HEDLEY_REINTERPRET_CAST(uint8_t*, d)); simde__m64 x = simde_mm_setr_pi16(d[0], d[1], d[2], d[3]); int16_t* s = HEDLEY_REINTERPRET_CAST(int16_t*, &x); simde_mm_empty(); simde_assert_equal_i16(s[0], d[0]); simde_assert_equal_i16(s[1], d[1]); simde_assert_equal_i16(s[2], d[2]); simde_assert_equal_i16(s[3], d[3]); return 0; } static int test_simde_mm_setr_pi32(SIMDE_MUNIT_TEST_ARGS) { int32_t d[8 / sizeof(int32_t)]; simde_test_codegen_random_memory(sizeof(d), HEDLEY_REINTERPRET_CAST(uint8_t*, d)); simde__m64 x = simde_mm_setr_pi32(d[0], d[1]); int32_t* i = HEDLEY_REINTERPRET_CAST(int32_t*, &x); simde_mm_empty(); simde_assert_equal_i32(i[0], d[0]); simde_assert_equal_i32(i[1], d[1]); return 0; } static int test_simde_mm_setzero_si64(SIMDE_MUNIT_TEST_ARGS) { simde__m64 a = simde_mm_set1_pi32(0); simde__m64 r = simde_mm_setzero_si64(); simde_test_x86_assert_equal_i32x2(a, r); return 0; } static int test_simde_mm_add_pi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi8(INT8_C( 25), INT8_C(-106), INT8_C( 93), INT8_C( 86), INT8_C( -56), INT8_C( 101), INT8_C( 79), INT8_C( 83)), simde_mm_set_pi8(INT8_C( -38), INT8_C( -6), INT8_C( 47), INT8_C( 59), INT8_C( -67), INT8_C( -36), INT8_C( 127), INT8_C( 104)), simde_mm_set_pi8(INT8_C( -13), INT8_C(-112), INT8_C(-116), INT8_C(-111), INT8_C(-123), INT8_C( 65), INT8_C( -50), INT8_C( -69)) }, { simde_mm_set_pi8(INT8_C(-105), INT8_C( 113), INT8_C( 22), INT8_C( -91), INT8_C( 59), INT8_C( -4), INT8_C( 67), INT8_C( 43)), simde_mm_set_pi8(INT8_C( -13), INT8_C( 93), INT8_C( 81), INT8_C( 108), INT8_C(-104), INT8_C( 123), INT8_C( 105), INT8_C( 119)), simde_mm_set_pi8(INT8_C(-118), INT8_C( -50), INT8_C( 103), INT8_C( 17), INT8_C( -45), INT8_C( 119), INT8_C( -84), INT8_C( -94)) }, { simde_mm_set_pi8(INT8_C( -8), INT8_C( 52), INT8_C( 92), INT8_C( 121), INT8_C( 58), INT8_C(-104), INT8_C( 27), INT8_C( -80)), simde_mm_set_pi8(INT8_C( 62), INT8_C(-100), INT8_C( 5), INT8_C( -95), INT8_C( -16), INT8_C( 109), INT8_C( 127), INT8_C( 62)), simde_mm_set_pi8(INT8_C( 54), INT8_C( -48), INT8_C( 97), INT8_C( 26), INT8_C( 42), INT8_C( 5), INT8_C(-102), INT8_C( -18)) }, { simde_mm_set_pi8(INT8_C( 32), INT8_C( 124), INT8_C( 115), INT8_C( 3), INT8_C( 104), INT8_C( 27), INT8_C( 43), INT8_C( -11)), simde_mm_set_pi8(INT8_C( -22), INT8_C( 27), INT8_C( -47), INT8_C( 45), INT8_C( -96), INT8_C( -49), INT8_C( -74), INT8_C( -34)), simde_mm_set_pi8(INT8_C( 10), INT8_C(-105), INT8_C( 68), INT8_C( 48), INT8_C( 8), INT8_C( -22), INT8_C( -31), INT8_C( -45)) }, { simde_mm_set_pi8(INT8_C( -14), INT8_C( -79), INT8_C( -38), INT8_C( -93), INT8_C( -55), INT8_C( 83), INT8_C( 78), INT8_C( -90)), simde_mm_set_pi8(INT8_C( 91), INT8_C( -61), INT8_C(-124), INT8_C( -64), INT8_C( 76), INT8_C( -15), INT8_C(-117), INT8_C( 11)), simde_mm_set_pi8(INT8_C( 77), INT8_C( 116), INT8_C( 94), INT8_C( 99), INT8_C( 21), INT8_C( 68), INT8_C( -39), INT8_C( -79)) }, { simde_mm_set_pi8(INT8_C(-119), INT8_C( 33), INT8_C( -57), INT8_C( 54), INT8_C( -18), INT8_C( 79), INT8_C( 86), INT8_C( -25)), simde_mm_set_pi8(INT8_C(-115), INT8_C(-114), INT8_C( 72), INT8_C(-126), INT8_C( -80), INT8_C( 114), INT8_C(-126), INT8_C( 42)), simde_mm_set_pi8(INT8_C( 22), INT8_C( -81), INT8_C( 15), INT8_C( -72), INT8_C( -98), INT8_C( -63), INT8_C( -40), INT8_C( 17)) }, { simde_mm_set_pi8(INT8_C( 60), INT8_C( 109), INT8_C( 93), INT8_C( -45), INT8_C( -62), INT8_C(-104), INT8_C( -41), INT8_C( 72)), simde_mm_set_pi8(INT8_C( 72), INT8_C( -86), INT8_C( 21), INT8_C( 79), INT8_C( 43), INT8_C( 23), INT8_C( -74), INT8_C( -62)), simde_mm_set_pi8(INT8_C(-124), INT8_C( 23), INT8_C( 114), INT8_C( 34), INT8_C( -19), INT8_C( -81), INT8_C(-115), INT8_C( 10)) }, { simde_mm_set_pi8(INT8_C( 110), INT8_C( 106), INT8_C( -94), INT8_C( 102), INT8_C( -82), INT8_C( 108), INT8_C( -12), INT8_C( -48)), simde_mm_set_pi8(INT8_C( 108), INT8_C( 3), INT8_C( -91), INT8_C( 65), INT8_C( 30), INT8_C( 106), INT8_C( -1), INT8_C( 100)), simde_mm_set_pi8(INT8_C( -38), INT8_C( 109), INT8_C( 71), INT8_C( -89), INT8_C( -52), INT8_C( -42), INT8_C( -13), INT8_C( 52)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_add_pi8(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_paddb(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi8(INT8_C( 25), INT8_C(-106), INT8_C( 93), INT8_C( 86), INT8_C( -56), INT8_C( 101), INT8_C( 79), INT8_C( 83)), simde_mm_set_pi8(INT8_C( -38), INT8_C( -6), INT8_C( 47), INT8_C( 59), INT8_C( -67), INT8_C( -36), INT8_C( 127), INT8_C( 104)), simde_mm_set_pi8(INT8_C( -13), INT8_C(-112), INT8_C(-116), INT8_C(-111), INT8_C(-123), INT8_C( 65), INT8_C( -50), INT8_C( -69)) }, { simde_mm_set_pi8(INT8_C(-105), INT8_C( 113), INT8_C( 22), INT8_C( -91), INT8_C( 59), INT8_C( -4), INT8_C( 67), INT8_C( 43)), simde_mm_set_pi8(INT8_C( -13), INT8_C( 93), INT8_C( 81), INT8_C( 108), INT8_C(-104), INT8_C( 123), INT8_C( 105), INT8_C( 119)), simde_mm_set_pi8(INT8_C(-118), INT8_C( -50), INT8_C( 103), INT8_C( 17), INT8_C( -45), INT8_C( 119), INT8_C( -84), INT8_C( -94)) }, { simde_mm_set_pi8(INT8_C( -8), INT8_C( 52), INT8_C( 92), INT8_C( 121), INT8_C( 58), INT8_C(-104), INT8_C( 27), INT8_C( -80)), simde_mm_set_pi8(INT8_C( 62), INT8_C(-100), INT8_C( 5), INT8_C( -95), INT8_C( -16), INT8_C( 109), INT8_C( 127), INT8_C( 62)), simde_mm_set_pi8(INT8_C( 54), INT8_C( -48), INT8_C( 97), INT8_C( 26), INT8_C( 42), INT8_C( 5), INT8_C(-102), INT8_C( -18)) }, { simde_mm_set_pi8(INT8_C( 32), INT8_C( 124), INT8_C( 115), INT8_C( 3), INT8_C( 104), INT8_C( 27), INT8_C( 43), INT8_C( -11)), simde_mm_set_pi8(INT8_C( -22), INT8_C( 27), INT8_C( -47), INT8_C( 45), INT8_C( -96), INT8_C( -49), INT8_C( -74), INT8_C( -34)), simde_mm_set_pi8(INT8_C( 10), INT8_C(-105), INT8_C( 68), INT8_C( 48), INT8_C( 8), INT8_C( -22), INT8_C( -31), INT8_C( -45)) }, { simde_mm_set_pi8(INT8_C( -14), INT8_C( -79), INT8_C( -38), INT8_C( -93), INT8_C( -55), INT8_C( 83), INT8_C( 78), INT8_C( -90)), simde_mm_set_pi8(INT8_C( 91), INT8_C( -61), INT8_C(-124), INT8_C( -64), INT8_C( 76), INT8_C( -15), INT8_C(-117), INT8_C( 11)), simde_mm_set_pi8(INT8_C( 77), INT8_C( 116), INT8_C( 94), INT8_C( 99), INT8_C( 21), INT8_C( 68), INT8_C( -39), INT8_C( -79)) }, { simde_mm_set_pi8(INT8_C(-119), INT8_C( 33), INT8_C( -57), INT8_C( 54), INT8_C( -18), INT8_C( 79), INT8_C( 86), INT8_C( -25)), simde_mm_set_pi8(INT8_C(-115), INT8_C(-114), INT8_C( 72), INT8_C(-126), INT8_C( -80), INT8_C( 114), INT8_C(-126), INT8_C( 42)), simde_mm_set_pi8(INT8_C( 22), INT8_C( -81), INT8_C( 15), INT8_C( -72), INT8_C( -98), INT8_C( -63), INT8_C( -40), INT8_C( 17)) }, { simde_mm_set_pi8(INT8_C( 60), INT8_C( 109), INT8_C( 93), INT8_C( -45), INT8_C( -62), INT8_C(-104), INT8_C( -41), INT8_C( 72)), simde_mm_set_pi8(INT8_C( 72), INT8_C( -86), INT8_C( 21), INT8_C( 79), INT8_C( 43), INT8_C( 23), INT8_C( -74), INT8_C( -62)), simde_mm_set_pi8(INT8_C(-124), INT8_C( 23), INT8_C( 114), INT8_C( 34), INT8_C( -19), INT8_C( -81), INT8_C(-115), INT8_C( 10)) }, { simde_mm_set_pi8(INT8_C( 110), INT8_C( 106), INT8_C( -94), INT8_C( 102), INT8_C( -82), INT8_C( 108), INT8_C( -12), INT8_C( -48)), simde_mm_set_pi8(INT8_C( 108), INT8_C( 3), INT8_C( -91), INT8_C( 65), INT8_C( 30), INT8_C( 106), INT8_C( -1), INT8_C( 100)), simde_mm_set_pi8(INT8_C( -38), INT8_C( 109), INT8_C( 71), INT8_C( -89), INT8_C( -52), INT8_C( -42), INT8_C( -13), INT8_C( 52)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_paddb(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_add_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -13258), INT16_C( -8776), INT16_C( 32365), INT16_C( -3887)), simde_mm_set_pi16(INT16_C( 20018), INT16_C( 23417), INT16_C( -774), INT16_C( 5810)), simde_mm_set_pi16(INT16_C( 6760), INT16_C( 14641), INT16_C( 31591), INT16_C( 1923)) }, { simde_mm_set_pi16(INT16_C( 11335), INT16_C( 29732), INT16_C( 26059), INT16_C( -15004)), simde_mm_set_pi16(INT16_C( -13772), INT16_C( -20922), INT16_C( 1993), INT16_C( -30395)), simde_mm_set_pi16(INT16_C( -2437), INT16_C( 8810), INT16_C( 28052), INT16_C( 20137)) }, { simde_mm_set_pi16(INT16_C( 159), INT16_C( 23628), INT16_C( -17224), INT16_C( -23288)), simde_mm_set_pi16(INT16_C( -18303), INT16_C( 7699), INT16_C( 22351), INT16_C( -16238)), simde_mm_set_pi16(INT16_C( -18144), INT16_C( 31327), INT16_C( 5127), INT16_C( 26010)) }, { simde_mm_set_pi16(INT16_C( 9097), INT16_C( -5982), INT16_C( 28191), INT16_C( -32707)), simde_mm_set_pi16(INT16_C( -16920), INT16_C( -18039), INT16_C( -32259), INT16_C( 10405)), simde_mm_set_pi16(INT16_C( -7823), INT16_C( -24021), INT16_C( -4068), INT16_C( -22302)) }, { simde_mm_set_pi16(INT16_C( 2097), INT16_C( 24451), INT16_C( 25533), INT16_C( -14205)), simde_mm_set_pi16(INT16_C( -28269), INT16_C( 4484), INT16_C( -22223), INT16_C( 17945)), simde_mm_set_pi16(INT16_C( -26172), INT16_C( 28935), INT16_C( 3310), INT16_C( 3740)) }, { simde_mm_set_pi16(INT16_C( -17654), INT16_C( 12451), INT16_C( 12325), INT16_C( 5198)), simde_mm_set_pi16(INT16_C( -26590), INT16_C( 31889), INT16_C( -14656), INT16_C( 6378)), simde_mm_set_pi16(INT16_C( 21292), INT16_C( -21196), INT16_C( -2331), INT16_C( 11576)) }, { simde_mm_set_pi16(INT16_C( 31498), INT16_C( -18726), INT16_C( -9720), INT16_C( -17042)), simde_mm_set_pi16(INT16_C( 17025), INT16_C( 13186), INT16_C( -25923), INT16_C( 15017)), simde_mm_set_pi16(INT16_C( -17013), INT16_C( -5540), INT16_C( 29893), INT16_C( -2025)) }, { simde_mm_set_pi16(INT16_C( 9904), INT16_C( -28061), INT16_C( -32123), INT16_C( -1285)), simde_mm_set_pi16(INT16_C( -7190), INT16_C( -1918), INT16_C( 26654), INT16_C( -31449)), simde_mm_set_pi16(INT16_C( 2714), INT16_C( -29979), INT16_C( -5469), INT16_C( -32734)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_add_pi16(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_paddw(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -13258), INT16_C( -8776), INT16_C( 32365), INT16_C( -3887)), simde_mm_set_pi16(INT16_C( 20018), INT16_C( 23417), INT16_C( -774), INT16_C( 5810)), simde_mm_set_pi16(INT16_C( 6760), INT16_C( 14641), INT16_C( 31591), INT16_C( 1923)) }, { simde_mm_set_pi16(INT16_C( 11335), INT16_C( 29732), INT16_C( 26059), INT16_C( -15004)), simde_mm_set_pi16(INT16_C( -13772), INT16_C( -20922), INT16_C( 1993), INT16_C( -30395)), simde_mm_set_pi16(INT16_C( -2437), INT16_C( 8810), INT16_C( 28052), INT16_C( 20137)) }, { simde_mm_set_pi16(INT16_C( 159), INT16_C( 23628), INT16_C( -17224), INT16_C( -23288)), simde_mm_set_pi16(INT16_C( -18303), INT16_C( 7699), INT16_C( 22351), INT16_C( -16238)), simde_mm_set_pi16(INT16_C( -18144), INT16_C( 31327), INT16_C( 5127), INT16_C( 26010)) }, { simde_mm_set_pi16(INT16_C( 9097), INT16_C( -5982), INT16_C( 28191), INT16_C( -32707)), simde_mm_set_pi16(INT16_C( -16920), INT16_C( -18039), INT16_C( -32259), INT16_C( 10405)), simde_mm_set_pi16(INT16_C( -7823), INT16_C( -24021), INT16_C( -4068), INT16_C( -22302)) }, { simde_mm_set_pi16(INT16_C( 2097), INT16_C( 24451), INT16_C( 25533), INT16_C( -14205)), simde_mm_set_pi16(INT16_C( -28269), INT16_C( 4484), INT16_C( -22223), INT16_C( 17945)), simde_mm_set_pi16(INT16_C( -26172), INT16_C( 28935), INT16_C( 3310), INT16_C( 3740)) }, { simde_mm_set_pi16(INT16_C( -17654), INT16_C( 12451), INT16_C( 12325), INT16_C( 5198)), simde_mm_set_pi16(INT16_C( -26590), INT16_C( 31889), INT16_C( -14656), INT16_C( 6378)), simde_mm_set_pi16(INT16_C( 21292), INT16_C( -21196), INT16_C( -2331), INT16_C( 11576)) }, { simde_mm_set_pi16(INT16_C( 31498), INT16_C( -18726), INT16_C( -9720), INT16_C( -17042)), simde_mm_set_pi16(INT16_C( 17025), INT16_C( 13186), INT16_C( -25923), INT16_C( 15017)), simde_mm_set_pi16(INT16_C( -17013), INT16_C( -5540), INT16_C( 29893), INT16_C( -2025)) }, { simde_mm_set_pi16(INT16_C( 9904), INT16_C( -28061), INT16_C( -32123), INT16_C( -1285)), simde_mm_set_pi16(INT16_C( -7190), INT16_C( -1918), INT16_C( 26654), INT16_C( -31449)), simde_mm_set_pi16(INT16_C( 2714), INT16_C( -29979), INT16_C( -5469), INT16_C( -32734)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_paddw(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_add_pi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( -1528799955), INT32_C( -1825996932)), simde_mm_set_pi32(INT32_C( -1229665745), INT32_C( 989894561)), simde_mm_set_pi32(INT32_C( 1536501596), INT32_C( -836102371)) }, { simde_mm_set_pi32(INT32_C( 1936809596), INT32_C( 1331021923)), simde_mm_set_pi32(INT32_C( -505769092), INT32_C( 1471336810)), simde_mm_set_pi32(INT32_C( 1431040504), INT32_C( -1492608563)) }, { simde_mm_set_pi32(INT32_C( 783830780), INT32_C( 1923113282)), simde_mm_set_pi32(INT32_C( 1700161106), INT32_C( -175473923)), simde_mm_set_pi32(INT32_C( -1810975410), INT32_C( 1747639359)) }, { simde_mm_set_pi32(INT32_C( 1195975755), INT32_C( 1329173130)), simde_mm_set_pi32(INT32_C( -611537759), INT32_C( 787308680)), simde_mm_set_pi32(INT32_C( 584437996), INT32_C( 2116481810)) }, { simde_mm_set_pi32(INT32_C( 950103059), INT32_C( 570905377)), simde_mm_set_pi32(INT32_C( 1696944201), INT32_C( -1762697792)), simde_mm_set_pi32(INT32_C( -1647920036), INT32_C( -1191792415)) }, { simde_mm_set_pi32(INT32_C( 40870864), INT32_C( 149169565)), simde_mm_set_pi32(INT32_C( 1633277631), INT32_C( -224026523)), simde_mm_set_pi32(INT32_C( 1674148495), INT32_C( -74856958)) }, { simde_mm_set_pi32(INT32_C( -718937511), INT32_C( 1453252371)), simde_mm_set_pi32(INT32_C( 56683182), INT32_C( -594741944)), simde_mm_set_pi32(INT32_C( -662254329), INT32_C( 858510427)) }, { simde_mm_set_pi32(INT32_C( -950411567), INT32_C( -1493828)), simde_mm_set_pi32(INT32_C( -1680249611), INT32_C( 321011369)), simde_mm_set_pi32(INT32_C( 1664306118), INT32_C( 319517541)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_add_pi32(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_paddd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( -1528799955), INT32_C( -1825996932)), simde_mm_set_pi32(INT32_C( -1229665745), INT32_C( 989894561)), simde_mm_set_pi32(INT32_C( 1536501596), INT32_C( -836102371)) }, { simde_mm_set_pi32(INT32_C( 1936809596), INT32_C( 1331021923)), simde_mm_set_pi32(INT32_C( -505769092), INT32_C( 1471336810)), simde_mm_set_pi32(INT32_C( 1431040504), INT32_C( -1492608563)) }, { simde_mm_set_pi32(INT32_C( 783830780), INT32_C( 1923113282)), simde_mm_set_pi32(INT32_C( 1700161106), INT32_C( -175473923)), simde_mm_set_pi32(INT32_C( -1810975410), INT32_C( 1747639359)) }, { simde_mm_set_pi32(INT32_C( 1195975755), INT32_C( 1329173130)), simde_mm_set_pi32(INT32_C( -611537759), INT32_C( 787308680)), simde_mm_set_pi32(INT32_C( 584437996), INT32_C( 2116481810)) }, { simde_mm_set_pi32(INT32_C( 950103059), INT32_C( 570905377)), simde_mm_set_pi32(INT32_C( 1696944201), INT32_C( -1762697792)), simde_mm_set_pi32(INT32_C( -1647920036), INT32_C( -1191792415)) }, { simde_mm_set_pi32(INT32_C( 40870864), INT32_C( 149169565)), simde_mm_set_pi32(INT32_C( 1633277631), INT32_C( -224026523)), simde_mm_set_pi32(INT32_C( 1674148495), INT32_C( -74856958)) }, { simde_mm_set_pi32(INT32_C( -718937511), INT32_C( 1453252371)), simde_mm_set_pi32(INT32_C( 56683182), INT32_C( -594741944)), simde_mm_set_pi32(INT32_C( -662254329), INT32_C( 858510427)) }, { simde_mm_set_pi32(INT32_C( -950411567), INT32_C( -1493828)), simde_mm_set_pi32(INT32_C( -1680249611), INT32_C( 321011369)), simde_mm_set_pi32(INT32_C( 1664306118), INT32_C( 319517541)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_paddd(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_adds_pi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi8( 99, 16, -73, -73, 34, 32, 87, 42), simde_mm_set_pi8( -29, -82, -26, -38, 66, -51, 82, 53), simde_mm_set_pi8( 70, -66, -99, -111, 100, -19, 127, 95) }, { simde_mm_set_pi8( -63, -116, -41, -11, -99, -60, -36, -15), simde_mm_set_pi8( 84, -113, 107, 81, -28, -25, -90, -115), simde_mm_set_pi8( 21, -128, 66, 70, -127, -85, -126, -128) }, { simde_mm_set_pi8( -79, -104, -10, -65, 84, -40, -102, 75), simde_mm_set_pi8( 30, 54, 127, 16, -7, -31, -83, -89), simde_mm_set_pi8( -49, -50, 117, -49, 77, -71, -128, -14) }, { simde_mm_set_pi8(-115, -50, 111, 104, -19, -48, 122, 59), simde_mm_set_pi8( -74, -15, 43, 9, 94, -81, -68, 15), simde_mm_set_pi8(-128, -65, 127, 113, 75, -128, 54, 74) }, { simde_mm_set_pi8( 18, -79, 5, 80, 99, 108, 39, -27), simde_mm_set_pi8( 127, 44, 22, -80, -86, -11, 108, -95), simde_mm_set_pi8( 127, -35, 27, 0, 13, 97, 127, -122) }, { simde_mm_set_pi8( -35, 62, 102, -79, 117, 108, 56, -21), simde_mm_set_pi8( 68, 119, -10, 17, 40, -124, -75, -39), simde_mm_set_pi8( 33, 127, 92, -62, 127, -16, -19, -60) }, { simde_mm_set_pi8( 45, -5, -10, -4, -23, -76, -111, -38), simde_mm_set_pi8( 24, -15, -2, 75, 11, -108, -5, 124), simde_mm_set_pi8( 69, -20, -12, 71, -12, -128, -116, 86) }, { simde_mm_set_pi8( 116, 38, 87, 5, -25, -119, 117, -12), simde_mm_set_pi8( -51, 25, -122, 40, -111, -50, -55, -109), simde_mm_set_pi8( 65, 63, -35, 45, -128, -128, 62, -121) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_adds_pi8(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_paddsb(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi8( 99, 16, -73, -73, 34, 32, 87, 42), simde_mm_set_pi8( -29, -82, -26, -38, 66, -51, 82, 53), simde_mm_set_pi8( 70, -66, -99, -111, 100, -19, 127, 95) }, { simde_mm_set_pi8( -63, -116, -41, -11, -99, -60, -36, -15), simde_mm_set_pi8( 84, -113, 107, 81, -28, -25, -90, -115), simde_mm_set_pi8( 21, -128, 66, 70, -127, -85, -126, -128) }, { simde_mm_set_pi8( -79, -104, -10, -65, 84, -40, -102, 75), simde_mm_set_pi8( 30, 54, 127, 16, -7, -31, -83, -89), simde_mm_set_pi8( -49, -50, 117, -49, 77, -71, -128, -14) }, { simde_mm_set_pi8(-115, -50, 111, 104, -19, -48, 122, 59), simde_mm_set_pi8( -74, -15, 43, 9, 94, -81, -68, 15), simde_mm_set_pi8(-128, -65, 127, 113, 75, -128, 54, 74) }, { simde_mm_set_pi8( 18, -79, 5, 80, 99, 108, 39, -27), simde_mm_set_pi8( 127, 44, 22, -80, -86, -11, 108, -95), simde_mm_set_pi8( 127, -35, 27, 0, 13, 97, 127, -122) }, { simde_mm_set_pi8( -35, 62, 102, -79, 117, 108, 56, -21), simde_mm_set_pi8( 68, 119, -10, 17, 40, -124, -75, -39), simde_mm_set_pi8( 33, 127, 92, -62, 127, -16, -19, -60) }, { simde_mm_set_pi8( 45, -5, -10, -4, -23, -76, -111, -38), simde_mm_set_pi8( 24, -15, -2, 75, 11, -108, -5, 124), simde_mm_set_pi8( 69, -20, -12, 71, -12, -128, -116, 86) }, { simde_mm_set_pi8( 116, 38, 87, 5, -25, -119, 117, -12), simde_mm_set_pi8( -51, 25, -122, 40, -111, -50, -55, -109), simde_mm_set_pi8( 65, 63, -35, 45, -128, -128, 62, -121) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_paddsb(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_adds_pu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_x_mm_set_pu8(UINT8_C( 81), UINT8_C( 21), UINT8_C( 204), UINT8_C( 252), UINT8_C( 129), UINT8_C( 215), UINT8_C( 184), UINT8_C( 80)), simde_x_mm_set_pu8(UINT8_C( 23), UINT8_C( 216), UINT8_C( 110), UINT8_C( 125), UINT8_C( 171), UINT8_C( 145), UINT8_C( 61), UINT8_C( 141)), simde_x_mm_set_pu8(UINT8_C( 104), UINT8_C( 237), UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), UINT8_C( 245), UINT8_C( 221)) }, { simde_x_mm_set_pu8(UINT8_C( 239), UINT8_C( 124), UINT8_C( 164), UINT8_C( 178), UINT8_C( 97), UINT8_C( 133), UINT8_C( 53), UINT8_C( 7)), simde_x_mm_set_pu8(UINT8_C( 55), UINT8_C( 60), UINT8_C( 93), UINT8_C( 144), UINT8_C( 87), UINT8_C( 38), UINT8_C( 29), UINT8_C( 227)), simde_x_mm_set_pu8(UINT8_C( 255), UINT8_C( 184), UINT8_C( 255), UINT8_C( 255), UINT8_C( 184), UINT8_C( 171), UINT8_C( 82), UINT8_C( 234)) }, { simde_x_mm_set_pu8(UINT8_C( 2), UINT8_C( 239), UINT8_C( 120), UINT8_C( 239), UINT8_C( 57), UINT8_C( 159), UINT8_C( 235), UINT8_C( 22)), simde_x_mm_set_pu8(UINT8_C( 220), UINT8_C( 9), UINT8_C( 135), UINT8_C( 55), UINT8_C( 21), UINT8_C( 1), UINT8_C( 123), UINT8_C( 167)), simde_x_mm_set_pu8(UINT8_C( 222), UINT8_C( 248), UINT8_C( 255), UINT8_C( 255), UINT8_C( 78), UINT8_C( 160), UINT8_C( 255), UINT8_C( 189)) }, { simde_x_mm_set_pu8(UINT8_C( 169), UINT8_C( 122), UINT8_C( 209), UINT8_C( 107), UINT8_C( 53), UINT8_C( 194), UINT8_C( 157), UINT8_C( 250)), simde_x_mm_set_pu8(UINT8_C( 190), UINT8_C( 161), UINT8_C( 50), UINT8_C( 2), UINT8_C( 227), UINT8_C( 196), UINT8_C( 34), UINT8_C( 128)), simde_x_mm_set_pu8(UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), UINT8_C( 109), UINT8_C( 255), UINT8_C( 255), UINT8_C( 191), UINT8_C( 255)) }, { simde_x_mm_set_pu8(UINT8_C( 127), UINT8_C( 206), UINT8_C( 75), UINT8_C( 228), UINT8_C( 24), UINT8_C( 253), UINT8_C( 247), UINT8_C( 227)), simde_x_mm_set_pu8(UINT8_C( 199), UINT8_C( 181), UINT8_C( 197), UINT8_C( 15), UINT8_C( 201), UINT8_C( 118), UINT8_C( 220), UINT8_C( 22)), simde_x_mm_set_pu8(UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), UINT8_C( 243), UINT8_C( 225), UINT8_C( 255), UINT8_C( 255), UINT8_C( 249)) }, { simde_x_mm_set_pu8(UINT8_C( 160), UINT8_C( 45), UINT8_C( 121), UINT8_C( 199), UINT8_C( 155), UINT8_C( 201), UINT8_C( 54), UINT8_C( 92)), simde_x_mm_set_pu8(UINT8_C( 29), UINT8_C( 158), UINT8_C( 69), UINT8_C( 12), UINT8_C( 220), UINT8_C( 133), UINT8_C( 37), UINT8_C( 27)), simde_x_mm_set_pu8(UINT8_C( 189), UINT8_C( 203), UINT8_C( 190), UINT8_C( 211), UINT8_C( 255), UINT8_C( 255), UINT8_C( 91), UINT8_C( 119)) }, { simde_x_mm_set_pu8(UINT8_C( 173), UINT8_C( 130), UINT8_C( 79), UINT8_C( 240), UINT8_C( 183), UINT8_C( 112), UINT8_C( 65), UINT8_C( 13)), simde_x_mm_set_pu8(UINT8_C( 24), UINT8_C( 152), UINT8_C( 239), UINT8_C( 128), UINT8_C( 83), UINT8_C( 69), UINT8_C( 122), UINT8_C( 121)), simde_x_mm_set_pu8(UINT8_C( 197), UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), UINT8_C( 181), UINT8_C( 187), UINT8_C( 134)) }, { simde_x_mm_set_pu8(UINT8_C( 242), UINT8_C( 255), UINT8_C( 149), UINT8_C( 159), UINT8_C( 60), UINT8_C( 134), UINT8_C( 24), UINT8_C( 232)), simde_x_mm_set_pu8(UINT8_C( 209), UINT8_C( 150), UINT8_C( 4), UINT8_C( 97), UINT8_C( 136), UINT8_C( 88), UINT8_C( 70), UINT8_C( 193)), simde_x_mm_set_pu8(UINT8_C( 255), UINT8_C( 255), UINT8_C( 153), UINT8_C( 255), UINT8_C( 196), UINT8_C( 222), UINT8_C( 94), UINT8_C( 255)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_adds_pu8(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_u8x8(r, test_vec[i].r); } return 0; } static int test_simde_m_paddusb(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_x_mm_set_pu8(UINT8_C( 81), UINT8_C( 21), UINT8_C( 204), UINT8_C( 252), UINT8_C( 129), UINT8_C( 215), UINT8_C( 184), UINT8_C( 80)), simde_x_mm_set_pu8(UINT8_C( 23), UINT8_C( 216), UINT8_C( 110), UINT8_C( 125), UINT8_C( 171), UINT8_C( 145), UINT8_C( 61), UINT8_C( 141)), simde_x_mm_set_pu8(UINT8_C( 104), UINT8_C( 237), UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), UINT8_C( 245), UINT8_C( 221)) }, { simde_x_mm_set_pu8(UINT8_C( 239), UINT8_C( 124), UINT8_C( 164), UINT8_C( 178), UINT8_C( 97), UINT8_C( 133), UINT8_C( 53), UINT8_C( 7)), simde_x_mm_set_pu8(UINT8_C( 55), UINT8_C( 60), UINT8_C( 93), UINT8_C( 144), UINT8_C( 87), UINT8_C( 38), UINT8_C( 29), UINT8_C( 227)), simde_x_mm_set_pu8(UINT8_C( 255), UINT8_C( 184), UINT8_C( 255), UINT8_C( 255), UINT8_C( 184), UINT8_C( 171), UINT8_C( 82), UINT8_C( 234)) }, { simde_x_mm_set_pu8(UINT8_C( 2), UINT8_C( 239), UINT8_C( 120), UINT8_C( 239), UINT8_C( 57), UINT8_C( 159), UINT8_C( 235), UINT8_C( 22)), simde_x_mm_set_pu8(UINT8_C( 220), UINT8_C( 9), UINT8_C( 135), UINT8_C( 55), UINT8_C( 21), UINT8_C( 1), UINT8_C( 123), UINT8_C( 167)), simde_x_mm_set_pu8(UINT8_C( 222), UINT8_C( 248), UINT8_C( 255), UINT8_C( 255), UINT8_C( 78), UINT8_C( 160), UINT8_C( 255), UINT8_C( 189)) }, { simde_x_mm_set_pu8(UINT8_C( 169), UINT8_C( 122), UINT8_C( 209), UINT8_C( 107), UINT8_C( 53), UINT8_C( 194), UINT8_C( 157), UINT8_C( 250)), simde_x_mm_set_pu8(UINT8_C( 190), UINT8_C( 161), UINT8_C( 50), UINT8_C( 2), UINT8_C( 227), UINT8_C( 196), UINT8_C( 34), UINT8_C( 128)), simde_x_mm_set_pu8(UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), UINT8_C( 109), UINT8_C( 255), UINT8_C( 255), UINT8_C( 191), UINT8_C( 255)) }, { simde_x_mm_set_pu8(UINT8_C( 127), UINT8_C( 206), UINT8_C( 75), UINT8_C( 228), UINT8_C( 24), UINT8_C( 253), UINT8_C( 247), UINT8_C( 227)), simde_x_mm_set_pu8(UINT8_C( 199), UINT8_C( 181), UINT8_C( 197), UINT8_C( 15), UINT8_C( 201), UINT8_C( 118), UINT8_C( 220), UINT8_C( 22)), simde_x_mm_set_pu8(UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), UINT8_C( 243), UINT8_C( 225), UINT8_C( 255), UINT8_C( 255), UINT8_C( 249)) }, { simde_x_mm_set_pu8(UINT8_C( 160), UINT8_C( 45), UINT8_C( 121), UINT8_C( 199), UINT8_C( 155), UINT8_C( 201), UINT8_C( 54), UINT8_C( 92)), simde_x_mm_set_pu8(UINT8_C( 29), UINT8_C( 158), UINT8_C( 69), UINT8_C( 12), UINT8_C( 220), UINT8_C( 133), UINT8_C( 37), UINT8_C( 27)), simde_x_mm_set_pu8(UINT8_C( 189), UINT8_C( 203), UINT8_C( 190), UINT8_C( 211), UINT8_C( 255), UINT8_C( 255), UINT8_C( 91), UINT8_C( 119)) }, { simde_x_mm_set_pu8(UINT8_C( 173), UINT8_C( 130), UINT8_C( 79), UINT8_C( 240), UINT8_C( 183), UINT8_C( 112), UINT8_C( 65), UINT8_C( 13)), simde_x_mm_set_pu8(UINT8_C( 24), UINT8_C( 152), UINT8_C( 239), UINT8_C( 128), UINT8_C( 83), UINT8_C( 69), UINT8_C( 122), UINT8_C( 121)), simde_x_mm_set_pu8(UINT8_C( 197), UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), UINT8_C( 181), UINT8_C( 187), UINT8_C( 134)) }, { simde_x_mm_set_pu8(UINT8_C( 242), UINT8_C( 255), UINT8_C( 149), UINT8_C( 159), UINT8_C( 60), UINT8_C( 134), UINT8_C( 24), UINT8_C( 232)), simde_x_mm_set_pu8(UINT8_C( 209), UINT8_C( 150), UINT8_C( 4), UINT8_C( 97), UINT8_C( 136), UINT8_C( 88), UINT8_C( 70), UINT8_C( 193)), simde_x_mm_set_pu8(UINT8_C( 255), UINT8_C( 255), UINT8_C( 153), UINT8_C( 255), UINT8_C( 196), UINT8_C( 222), UINT8_C( 94), UINT8_C( 255)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_paddusb(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_u8x8(r, test_vec[i].r); } return 0; } static int test_simde_mm_adds_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -31309), INT16_C( -5581), INT16_C( -13514), INT16_C( -24682)), simde_mm_set_pi16(INT16_C( 19892), INT16_C( -12160), INT16_C( 3266), INT16_C( 9002)), simde_mm_set_pi16(INT16_C( -11417), INT16_C( -17741), INT16_C( -10248), INT16_C( -15680)) }, { simde_mm_set_pi16(INT16_C( 20564), INT16_C( -25554), INT16_C( 18522), INT16_C( -107)), simde_mm_set_pi16(INT16_C( 12328), INT16_C( 12883), INT16_C( 2251), INT16_C( -19119)), simde_mm_set_pi16(INT16_C( 32767), INT16_C( -12671), INT16_C( 20773), INT16_C( -19226)) }, { simde_mm_set_pi16(INT16_C( 20106), INT16_C( -15513), INT16_C( -25552), INT16_C( -23751)), simde_mm_set_pi16(INT16_C( 11380), INT16_C( 4698), INT16_C( 16886), INT16_C( 11304)), simde_mm_set_pi16(INT16_C( 31486), INT16_C( -10815), INT16_C( -8666), INT16_C( -12447)) }, { simde_mm_set_pi16(INT16_C( -30807), INT16_C( -12488), INT16_C( 12150), INT16_C( 344)), simde_mm_set_pi16(INT16_C( -21735), INT16_C( 11424), INT16_C( 19342), INT16_C( -22640)), simde_mm_set_pi16(INT16_C( -32768), INT16_C( -1064), INT16_C( 31492), INT16_C( -22296)) }, { simde_mm_set_pi16(INT16_C( 23188), INT16_C( -20941), INT16_C( 26991), INT16_C( -11383)), simde_mm_set_pi16(INT16_C( 20582), INT16_C( 6628), INT16_C( 32097), INT16_C( 23397)), simde_mm_set_pi16(INT16_C( 32767), INT16_C( -14313), INT16_C( 32767), INT16_C( 12014)) }, { simde_mm_set_pi16(INT16_C( 1789), INT16_C( 28566), INT16_C( 18995), INT16_C( -32500)), simde_mm_set_pi16(INT16_C( -32609), INT16_C( -30393), INT16_C( 1798), INT16_C( 28485)), simde_mm_set_pi16(INT16_C( -30820), INT16_C( -1827), INT16_C( 20793), INT16_C( -4015)) }, { simde_mm_set_pi16(INT16_C( 18491), INT16_C( -11781), INT16_C( -27491), INT16_C( 337)), simde_mm_set_pi16(INT16_C( 420), INT16_C( 28774), INT16_C( -31111), INT16_C( 15256)), simde_mm_set_pi16(INT16_C( 18911), INT16_C( 16993), INT16_C( -32768), INT16_C( 15593)) }, { simde_mm_set_pi16(INT16_C( -15687), INT16_C( 25487), INT16_C( 23048), INT16_C( -8478)), simde_mm_set_pi16(INT16_C( 9271), INT16_C( -4756), INT16_C( -12087), INT16_C( -15383)), simde_mm_set_pi16(INT16_C( -6416), INT16_C( 20731), INT16_C( 10961), INT16_C( -23861)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_adds_pi16(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_paddsw(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -31309), INT16_C( -5581), INT16_C( -13514), INT16_C( -24682)), simde_mm_set_pi16(INT16_C( 19892), INT16_C( -12160), INT16_C( 3266), INT16_C( 9002)), simde_mm_set_pi16(INT16_C( -11417), INT16_C( -17741), INT16_C( -10248), INT16_C( -15680)) }, { simde_mm_set_pi16(INT16_C( 20564), INT16_C( -25554), INT16_C( 18522), INT16_C( -107)), simde_mm_set_pi16(INT16_C( 12328), INT16_C( 12883), INT16_C( 2251), INT16_C( -19119)), simde_mm_set_pi16(INT16_C( 32767), INT16_C( -12671), INT16_C( 20773), INT16_C( -19226)) }, { simde_mm_set_pi16(INT16_C( 20106), INT16_C( -15513), INT16_C( -25552), INT16_C( -23751)), simde_mm_set_pi16(INT16_C( 11380), INT16_C( 4698), INT16_C( 16886), INT16_C( 11304)), simde_mm_set_pi16(INT16_C( 31486), INT16_C( -10815), INT16_C( -8666), INT16_C( -12447)) }, { simde_mm_set_pi16(INT16_C( -30807), INT16_C( -12488), INT16_C( 12150), INT16_C( 344)), simde_mm_set_pi16(INT16_C( -21735), INT16_C( 11424), INT16_C( 19342), INT16_C( -22640)), simde_mm_set_pi16(INT16_C( -32768), INT16_C( -1064), INT16_C( 31492), INT16_C( -22296)) }, { simde_mm_set_pi16(INT16_C( 23188), INT16_C( -20941), INT16_C( 26991), INT16_C( -11383)), simde_mm_set_pi16(INT16_C( 20582), INT16_C( 6628), INT16_C( 32097), INT16_C( 23397)), simde_mm_set_pi16(INT16_C( 32767), INT16_C( -14313), INT16_C( 32767), INT16_C( 12014)) }, { simde_mm_set_pi16(INT16_C( 1789), INT16_C( 28566), INT16_C( 18995), INT16_C( -32500)), simde_mm_set_pi16(INT16_C( -32609), INT16_C( -30393), INT16_C( 1798), INT16_C( 28485)), simde_mm_set_pi16(INT16_C( -30820), INT16_C( -1827), INT16_C( 20793), INT16_C( -4015)) }, { simde_mm_set_pi16(INT16_C( 18491), INT16_C( -11781), INT16_C( -27491), INT16_C( 337)), simde_mm_set_pi16(INT16_C( 420), INT16_C( 28774), INT16_C( -31111), INT16_C( 15256)), simde_mm_set_pi16(INT16_C( 18911), INT16_C( 16993), INT16_C( -32768), INT16_C( 15593)) }, { simde_mm_set_pi16(INT16_C( -15687), INT16_C( 25487), INT16_C( 23048), INT16_C( -8478)), simde_mm_set_pi16(INT16_C( 9271), INT16_C( -4756), INT16_C( -12087), INT16_C( -15383)), simde_mm_set_pi16(INT16_C( -6416), INT16_C( 20731), INT16_C( 10961), INT16_C( -23861)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_paddsw(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_adds_pu16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_x_mm_set_pu16(UINT16_C(43150), UINT16_C( 5470), UINT16_C(60072), UINT16_C(50068)), simde_x_mm_set_pu16(UINT16_C( 7332), UINT16_C( 4270), UINT16_C(46463), UINT16_C( 9473)), simde_x_mm_set_pu16(UINT16_C(50482), UINT16_C( 9740), UINT16_C(65535), UINT16_C(59541)) }, { simde_x_mm_set_pu16(UINT16_C( 2434), UINT16_C(31906), UINT16_C( 3723), UINT16_C(47234)), simde_x_mm_set_pu16(UINT16_C(58902), UINT16_C(62845), UINT16_C(51771), UINT16_C(64034)), simde_x_mm_set_pu16(UINT16_C(61336), UINT16_C(65535), UINT16_C(55494), UINT16_C(65535)) }, { simde_x_mm_set_pu16(UINT16_C( 129), UINT16_C(16274), UINT16_C( 9343), UINT16_C(27425)), simde_x_mm_set_pu16(UINT16_C(21184), UINT16_C(38810), UINT16_C(32910), UINT16_C(34144)), simde_x_mm_set_pu16(UINT16_C(21313), UINT16_C(55084), UINT16_C(42253), UINT16_C(61569)) }, { simde_x_mm_set_pu16(UINT16_C(64726), UINT16_C(55325), UINT16_C( 5040), UINT16_C(34690)), simde_x_mm_set_pu16(UINT16_C(18928), UINT16_C(15762), UINT16_C(23760), UINT16_C(30303)), simde_x_mm_set_pu16(UINT16_C(65535), UINT16_C(65535), UINT16_C(28800), UINT16_C(64993)) }, { simde_x_mm_set_pu16(UINT16_C(12447), UINT16_C(56063), UINT16_C(19893), UINT16_C(38115)), simde_x_mm_set_pu16(UINT16_C(53854), UINT16_C( 9599), UINT16_C(53148), UINT16_C(47295)), simde_x_mm_set_pu16(UINT16_C(65535), UINT16_C(65535), UINT16_C(65535), UINT16_C(65535)) }, { simde_x_mm_set_pu16(UINT16_C(30591), UINT16_C(42550), UINT16_C(36715), UINT16_C(13411)), simde_x_mm_set_pu16(UINT16_C(46515), UINT16_C(57187), UINT16_C(46870), UINT16_C(44207)), simde_x_mm_set_pu16(UINT16_C(65535), UINT16_C(65535), UINT16_C(65535), UINT16_C(57618)) }, { simde_x_mm_set_pu16(UINT16_C(12664), UINT16_C(64378), UINT16_C(29354), UINT16_C(42615)), simde_x_mm_set_pu16(UINT16_C(62249), UINT16_C(64644), UINT16_C(45128), UINT16_C(47328)), simde_x_mm_set_pu16(UINT16_C(65535), UINT16_C(65535), UINT16_C(65535), UINT16_C(65535)) }, { simde_x_mm_set_pu16(UINT16_C(65124), UINT16_C( 3867), UINT16_C(20702), UINT16_C(63422)), simde_x_mm_set_pu16(UINT16_C(51381), UINT16_C(37432), UINT16_C(48951), UINT16_C(45184)), simde_x_mm_set_pu16(UINT16_C(65535), UINT16_C(41299), UINT16_C(65535), UINT16_C(65535)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_adds_pu16(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } return 0; } static int test_simde_m_paddusw(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_x_mm_set_pu16(UINT16_C(43150), UINT16_C( 5470), UINT16_C(60072), UINT16_C(50068)), simde_x_mm_set_pu16(UINT16_C( 7332), UINT16_C( 4270), UINT16_C(46463), UINT16_C( 9473)), simde_x_mm_set_pu16(UINT16_C(50482), UINT16_C( 9740), UINT16_C(65535), UINT16_C(59541)) }, { simde_x_mm_set_pu16(UINT16_C( 2434), UINT16_C(31906), UINT16_C( 3723), UINT16_C(47234)), simde_x_mm_set_pu16(UINT16_C(58902), UINT16_C(62845), UINT16_C(51771), UINT16_C(64034)), simde_x_mm_set_pu16(UINT16_C(61336), UINT16_C(65535), UINT16_C(55494), UINT16_C(65535)) }, { simde_x_mm_set_pu16(UINT16_C( 129), UINT16_C(16274), UINT16_C( 9343), UINT16_C(27425)), simde_x_mm_set_pu16(UINT16_C(21184), UINT16_C(38810), UINT16_C(32910), UINT16_C(34144)), simde_x_mm_set_pu16(UINT16_C(21313), UINT16_C(55084), UINT16_C(42253), UINT16_C(61569)) }, { simde_x_mm_set_pu16(UINT16_C(64726), UINT16_C(55325), UINT16_C( 5040), UINT16_C(34690)), simde_x_mm_set_pu16(UINT16_C(18928), UINT16_C(15762), UINT16_C(23760), UINT16_C(30303)), simde_x_mm_set_pu16(UINT16_C(65535), UINT16_C(65535), UINT16_C(28800), UINT16_C(64993)) }, { simde_x_mm_set_pu16(UINT16_C(12447), UINT16_C(56063), UINT16_C(19893), UINT16_C(38115)), simde_x_mm_set_pu16(UINT16_C(53854), UINT16_C( 9599), UINT16_C(53148), UINT16_C(47295)), simde_x_mm_set_pu16(UINT16_C(65535), UINT16_C(65535), UINT16_C(65535), UINT16_C(65535)) }, { simde_x_mm_set_pu16(UINT16_C(30591), UINT16_C(42550), UINT16_C(36715), UINT16_C(13411)), simde_x_mm_set_pu16(UINT16_C(46515), UINT16_C(57187), UINT16_C(46870), UINT16_C(44207)), simde_x_mm_set_pu16(UINT16_C(65535), UINT16_C(65535), UINT16_C(65535), UINT16_C(57618)) }, { simde_x_mm_set_pu16(UINT16_C(12664), UINT16_C(64378), UINT16_C(29354), UINT16_C(42615)), simde_x_mm_set_pu16(UINT16_C(62249), UINT16_C(64644), UINT16_C(45128), UINT16_C(47328)), simde_x_mm_set_pu16(UINT16_C(65535), UINT16_C(65535), UINT16_C(65535), UINT16_C(65535)) }, { simde_x_mm_set_pu16(UINT16_C(65124), UINT16_C( 3867), UINT16_C(20702), UINT16_C(63422)), simde_x_mm_set_pu16(UINT16_C(51381), UINT16_C(37432), UINT16_C(48951), UINT16_C(45184)), simde_x_mm_set_pu16(UINT16_C(65535), UINT16_C(41299), UINT16_C(65535), UINT16_C(65535)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_paddusw(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } return 0; } static int test_simde_mm_and_si64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( 340534654), INT32_C( 867835838)), simde_mm_set_pi32(INT32_C( -1715051141), INT32_C( 327376215)), simde_mm_set_pi32(INT32_C( 272901498), INT32_C( 327294230)) }, { simde_mm_set_pi32(INT32_C( 364465166), INT32_C( -1853449223)), simde_mm_set_pi32(INT32_C( 425932704), INT32_C( -538031667)), simde_mm_set_pi32(INT32_C( 287376384), INT32_C( -1853486647)) }, { simde_mm_set_pi32(INT32_C( 1222276268), INT32_C( -1950390417)), simde_mm_set_pi32(INT32_C( 104967923), INT32_C( 339992254)), simde_mm_set_pi32(INT32_C( 4203680), INT32_C( 214574)) }, { simde_mm_set_pi32(INT32_C( 678635361), INT32_C( 1353498548)), simde_mm_set_pi32(INT32_C( 1051418126), INT32_C( -1022663537)), simde_mm_set_pi32(INT32_C( 673383936), INT32_C( 1074275460)) }, { simde_mm_set_pi32(INT32_C( 1823492970), INT32_C( -1726291925)), simde_mm_set_pi32(INT32_C( 1139854805), INT32_C( 874111018)), simde_mm_set_pi32(INT32_C( 1085294912), INT32_C( 270065706)) }, { simde_mm_set_pi32(INT32_C( 188716107), INT32_C( 919243794)), simde_mm_set_pi32(INT32_C( -505381577), INT32_C( -1684778331)), simde_mm_set_pi32(INT32_C( 18879491), INT32_C( 310378496)) }, { simde_mm_set_pi32(INT32_C( -1486610662), INT32_C( 307692640)), simde_mm_set_pi32(INT32_C( -1793851837), INT32_C( 1963802755)), simde_mm_set_pi32(INT32_C( -2063589886), INT32_C( 268763136)) }, { simde_mm_set_pi32(INT32_C( -630259527), INT32_C( -82339396)), simde_mm_set_pi32(INT32_C( 1607040389), INT32_C( 867785548)), simde_mm_set_pi32(INT32_C( 1514733697), INT32_C( 856758540)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_and_si64(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_pand(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( 340534654), INT32_C( 867835838)), simde_mm_set_pi32(INT32_C( -1715051141), INT32_C( 327376215)), simde_mm_set_pi32(INT32_C( 272901498), INT32_C( 327294230)) }, { simde_mm_set_pi32(INT32_C( 364465166), INT32_C( -1853449223)), simde_mm_set_pi32(INT32_C( 425932704), INT32_C( -538031667)), simde_mm_set_pi32(INT32_C( 287376384), INT32_C( -1853486647)) }, { simde_mm_set_pi32(INT32_C( 1222276268), INT32_C( -1950390417)), simde_mm_set_pi32(INT32_C( 104967923), INT32_C( 339992254)), simde_mm_set_pi32(INT32_C( 4203680), INT32_C( 214574)) }, { simde_mm_set_pi32(INT32_C( 678635361), INT32_C( 1353498548)), simde_mm_set_pi32(INT32_C( 1051418126), INT32_C( -1022663537)), simde_mm_set_pi32(INT32_C( 673383936), INT32_C( 1074275460)) }, { simde_mm_set_pi32(INT32_C( 1823492970), INT32_C( -1726291925)), simde_mm_set_pi32(INT32_C( 1139854805), INT32_C( 874111018)), simde_mm_set_pi32(INT32_C( 1085294912), INT32_C( 270065706)) }, { simde_mm_set_pi32(INT32_C( 188716107), INT32_C( 919243794)), simde_mm_set_pi32(INT32_C( -505381577), INT32_C( -1684778331)), simde_mm_set_pi32(INT32_C( 18879491), INT32_C( 310378496)) }, { simde_mm_set_pi32(INT32_C( -1486610662), INT32_C( 307692640)), simde_mm_set_pi32(INT32_C( -1793851837), INT32_C( 1963802755)), simde_mm_set_pi32(INT32_C( -2063589886), INT32_C( 268763136)) }, { simde_mm_set_pi32(INT32_C( -630259527), INT32_C( -82339396)), simde_mm_set_pi32(INT32_C( 1607040389), INT32_C( 867785548)), simde_mm_set_pi32(INT32_C( 1514733697), INT32_C( 856758540)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_pand(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_andnot_si64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( 874898289), INT32_C( -802292997)), simde_mm_set_pi32(INT32_C( 329777422), INT32_C( 479831177)), simde_mm_set_pi32(INT32_C( 59244558), INT32_C( 210764800)) }, { simde_mm_set_pi32(INT32_C( -944824913), INT32_C( 1953730462)), simde_mm_set_pi32(INT32_C( -914930437), INT32_C( -556614726)), simde_mm_set_pi32(INT32_C( 139477072), INT32_C( -1971310560)) }, { simde_mm_set_pi32(INT32_C( -253535493), INT32_C( 1477705121)), simde_mm_set_pi32(INT32_C( -1581892884), INT32_C( -1606801005)), simde_mm_set_pi32(INT32_C( 18096132), INT32_C( -1607991278)) }, { simde_mm_set_pi32(INT32_C( -585861604), INT32_C( 825554783)), simde_mm_set_pi32(INT32_C( -1758500210), INT32_C( -643533489)), simde_mm_set_pi32(INT32_C( 36374658), INT32_C( -931135488)) }, { simde_mm_set_pi32(INT32_C( -5443449), INT32_C( 694842285)), simde_mm_set_pi32(INT32_C( -1613805192), INT32_C( 215848721)), simde_mm_set_pi32(INT32_C( 4393336), INT32_C( 76907536)) }, { simde_mm_set_pi32(INT32_C( 1431251288), INT32_C( 1009645294)), simde_mm_set_pi32(INT32_C( -1668167014), INT32_C( -733286899)), simde_mm_set_pi32(INT32_C( -2003778942), INT32_C( -1069414399)) }, { simde_mm_set_pi32(INT32_C( 1707128575), INT32_C( -1462185330)), simde_mm_set_pi32(INT32_C( -1016415616), INT32_C( -1881637541)), simde_mm_set_pi32(INT32_C( -2111174656), INT32_C( 117452113)) }, { simde_mm_set_pi32(INT32_C( 336066190), INT32_C( -2007360384)), simde_mm_set_pi32(INT32_C( -1959332116), INT32_C( -820920813)), simde_mm_set_pi32(INT32_C( -1959788448), INT32_C( 1191289363)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_andnot_si64(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_pandn(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( 874898289), INT32_C( -802292997)), simde_mm_set_pi32(INT32_C( 329777422), INT32_C( 479831177)), simde_mm_set_pi32(INT32_C( 59244558), INT32_C( 210764800)) }, { simde_mm_set_pi32(INT32_C( -944824913), INT32_C( 1953730462)), simde_mm_set_pi32(INT32_C( -914930437), INT32_C( -556614726)), simde_mm_set_pi32(INT32_C( 139477072), INT32_C( -1971310560)) }, { simde_mm_set_pi32(INT32_C( -253535493), INT32_C( 1477705121)), simde_mm_set_pi32(INT32_C( -1581892884), INT32_C( -1606801005)), simde_mm_set_pi32(INT32_C( 18096132), INT32_C( -1607991278)) }, { simde_mm_set_pi32(INT32_C( -585861604), INT32_C( 825554783)), simde_mm_set_pi32(INT32_C( -1758500210), INT32_C( -643533489)), simde_mm_set_pi32(INT32_C( 36374658), INT32_C( -931135488)) }, { simde_mm_set_pi32(INT32_C( -5443449), INT32_C( 694842285)), simde_mm_set_pi32(INT32_C( -1613805192), INT32_C( 215848721)), simde_mm_set_pi32(INT32_C( 4393336), INT32_C( 76907536)) }, { simde_mm_set_pi32(INT32_C( 1431251288), INT32_C( 1009645294)), simde_mm_set_pi32(INT32_C( -1668167014), INT32_C( -733286899)), simde_mm_set_pi32(INT32_C( -2003778942), INT32_C( -1069414399)) }, { simde_mm_set_pi32(INT32_C( 1707128575), INT32_C( -1462185330)), simde_mm_set_pi32(INT32_C( -1016415616), INT32_C( -1881637541)), simde_mm_set_pi32(INT32_C( -2111174656), INT32_C( 117452113)) }, { simde_mm_set_pi32(INT32_C( 336066190), INT32_C( -2007360384)), simde_mm_set_pi32(INT32_C( -1959332116), INT32_C( -820920813)), simde_mm_set_pi32(INT32_C( -1959788448), INT32_C( 1191289363)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_pandn(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_cmpeq_pi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi8(INT8_C( 61), INT8_C(-117), INT8_C(-117), INT8_C( -23), INT8_C( -19), INT8_C( 6), INT8_C( -24), INT8_C( 89)), simde_mm_set_pi8(INT8_C( 47), INT8_C( 71), INT8_C(-105), INT8_C( 13), INT8_C( -26), INT8_C( 93), INT8_C( 118), INT8_C( -58)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C( 78), INT8_C( 11), INT8_C( -2), INT8_C( 86), INT8_C( -50), INT8_C( -49), INT8_C( -1), INT8_C( 92)), simde_mm_set_pi8(INT8_C( -85), INT8_C( -99), INT8_C( -41), INT8_C( 116), INT8_C( 74), INT8_C( 114), INT8_C( -3), INT8_C( -98)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C( 60), INT8_C( 10), INT8_C( -34), INT8_C( 30), INT8_C( 48), INT8_C( -13), INT8_C(-106), INT8_C( 105)), simde_mm_set_pi8(INT8_C( 81), INT8_C( 108), INT8_C( -65), INT8_C( -58), INT8_C( -30), INT8_C( -90), INT8_C( 42), INT8_C( 0)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C(-113), INT8_C( -67), INT8_C( -55), INT8_C( 84), INT8_C( -92), INT8_C( -66), INT8_C( 7), INT8_C( 21)), simde_mm_set_pi8(INT8_C(-113), INT8_C( -67), INT8_C( -55), INT8_C( 84), INT8_C( -92), INT8_C( -66), INT8_C( 7), INT8_C( 21)), simde_mm_set_pi8(INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1)) }, { simde_mm_set_pi8(INT8_C( -72), INT8_C( -56), INT8_C(-104), INT8_C( -6), INT8_C( 37), INT8_C(-114), INT8_C( 84), INT8_C( 21)), simde_mm_set_pi8(INT8_C( 77), INT8_C( -25), INT8_C(-104), INT8_C( 0), INT8_C( -39), INT8_C( 38), INT8_C( -54), INT8_C( -90)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C( 46), INT8_C( 120), INT8_C( -13), INT8_C(-125), INT8_C( 50), INT8_C( 10), INT8_C( 120), INT8_C( -10)), simde_mm_set_pi8(INT8_C( 85), INT8_C( 89), INT8_C( 9), INT8_C( 65), INT8_C( -82), INT8_C( -80), INT8_C( 65), INT8_C( -65)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C( -12), INT8_C( -41), INT8_C( -54), INT8_C( 92), INT8_C( -87), INT8_C( -82), INT8_C(-120), INT8_C( 37)), simde_mm_set_pi8(INT8_C( 94), INT8_C( -21), INT8_C( 36), INT8_C(-121), INT8_C( -62), INT8_C( -4), INT8_C( 42), INT8_C(-119)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C( -8), INT8_C( -60), INT8_C( 35), INT8_C( -31), INT8_C(-103), INT8_C( -7), INT8_C( -39), INT8_C( 47)), simde_mm_set_pi8(INT8_C( 13), INT8_C( -84), INT8_C(-126), INT8_C(-127), INT8_C( -82), INT8_C( 37), INT8_C( 60), INT8_C( 30)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_cmpeq_pi8(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_pcmpeqb(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi8(INT8_C( 61), INT8_C(-117), INT8_C(-117), INT8_C( -23), INT8_C( -19), INT8_C( 6), INT8_C( -24), INT8_C( 89)), simde_mm_set_pi8(INT8_C( 47), INT8_C( 71), INT8_C(-105), INT8_C( 13), INT8_C( -26), INT8_C( 93), INT8_C( 118), INT8_C( -58)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C( 78), INT8_C( 11), INT8_C( -2), INT8_C( 86), INT8_C( -50), INT8_C( -49), INT8_C( -1), INT8_C( 92)), simde_mm_set_pi8(INT8_C( -85), INT8_C( -99), INT8_C( -41), INT8_C( 116), INT8_C( 74), INT8_C( 114), INT8_C( -3), INT8_C( -98)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C( 60), INT8_C( 10), INT8_C( -34), INT8_C( 30), INT8_C( 48), INT8_C( -13), INT8_C(-106), INT8_C( 105)), simde_mm_set_pi8(INT8_C( 81), INT8_C( 108), INT8_C( -65), INT8_C( -58), INT8_C( -30), INT8_C( -90), INT8_C( 42), INT8_C( 0)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C(-113), INT8_C( -67), INT8_C( -55), INT8_C( 84), INT8_C( -92), INT8_C( -66), INT8_C( 7), INT8_C( 21)), simde_mm_set_pi8(INT8_C(-113), INT8_C( -67), INT8_C( -55), INT8_C( 84), INT8_C( -92), INT8_C( -66), INT8_C( 7), INT8_C( 21)), simde_mm_set_pi8(INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1)) }, { simde_mm_set_pi8(INT8_C( -72), INT8_C( -56), INT8_C(-104), INT8_C( -6), INT8_C( 37), INT8_C(-114), INT8_C( 84), INT8_C( 21)), simde_mm_set_pi8(INT8_C( 77), INT8_C( -25), INT8_C(-104), INT8_C( 0), INT8_C( -39), INT8_C( 38), INT8_C( -54), INT8_C( -90)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C( 46), INT8_C( 120), INT8_C( -13), INT8_C(-125), INT8_C( 50), INT8_C( 10), INT8_C( 120), INT8_C( -10)), simde_mm_set_pi8(INT8_C( 85), INT8_C( 89), INT8_C( 9), INT8_C( 65), INT8_C( -82), INT8_C( -80), INT8_C( 65), INT8_C( -65)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C( -12), INT8_C( -41), INT8_C( -54), INT8_C( 92), INT8_C( -87), INT8_C( -82), INT8_C(-120), INT8_C( 37)), simde_mm_set_pi8(INT8_C( 94), INT8_C( -21), INT8_C( 36), INT8_C(-121), INT8_C( -62), INT8_C( -4), INT8_C( 42), INT8_C(-119)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C( -8), INT8_C( -60), INT8_C( 35), INT8_C( -31), INT8_C(-103), INT8_C( -7), INT8_C( -39), INT8_C( 47)), simde_mm_set_pi8(INT8_C( 13), INT8_C( -84), INT8_C(-126), INT8_C(-127), INT8_C( -82), INT8_C( 37), INT8_C( 60), INT8_C( 30)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_pcmpeqb(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_cmpeq_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -13903), INT16_C( -28259), INT16_C( 10786), INT16_C( 24518)), simde_mm_set_pi16(INT16_C( 5267), INT16_C( 1924), INT16_C( 13281), INT16_C( -25055)), simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_pi16(INT16_C( -21949), INT16_C( -13483), INT16_C( -390), INT16_C( 6377)), simde_mm_set_pi16(INT16_C( -9583), INT16_C( 6876), INT16_C( 23768), INT16_C( 6209)), simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_pi16(INT16_C( 11364), INT16_C( 28383), INT16_C( 13353), INT16_C( 14261)), simde_mm_set_pi16(INT16_C( 13422), INT16_C( 32033), INT16_C( 4055), INT16_C( 5623)), simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_pi16(INT16_C( 206), INT16_C( -1567), INT16_C( -17153), INT16_C( 18166)), simde_mm_set_pi16(INT16_C( 30519), INT16_C( 30643), INT16_C( 32735), INT16_C( -4195)), simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_pi16(INT16_C( 25406), INT16_C( -18343), INT16_C( -15870), INT16_C( -15505)), simde_mm_set_pi16(INT16_C( 25406), INT16_C( -18343), INT16_C( -15870), INT16_C( -15505)), simde_mm_set_pi16(INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1)) }, { simde_mm_set_pi16(INT16_C( 21393), INT16_C( 22815), INT16_C( 322), INT16_C( 9608)), simde_mm_set_pi16(INT16_C( 23953), INT16_C( -31672), INT16_C( -7546), INT16_C( 31996)), simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_pi16(INT16_C( -16506), INT16_C( -921), INT16_C( -32189), INT16_C( 18444)), simde_mm_set_pi16(INT16_C( -10340), INT16_C( -28110), INT16_C( 24057), INT16_C( -7047)), simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_pi16(INT16_C( -1173), INT16_C( -25844), INT16_C( -10729), INT16_C( 22121)), simde_mm_set_pi16(INT16_C( 25970), INT16_C( 12718), INT16_C( 25424), INT16_C( 11867)), simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_cmpeq_pi16(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_u16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_pcmpeqw(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -13903), INT16_C( -28259), INT16_C( 10786), INT16_C( 24518)), simde_mm_set_pi16(INT16_C( 5267), INT16_C( 1924), INT16_C( 13281), INT16_C( -25055)), simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_pi16(INT16_C( -21949), INT16_C( -13483), INT16_C( -390), INT16_C( 6377)), simde_mm_set_pi16(INT16_C( -9583), INT16_C( 6876), INT16_C( 23768), INT16_C( 6209)), simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_pi16(INT16_C( 11364), INT16_C( 28383), INT16_C( 13353), INT16_C( 14261)), simde_mm_set_pi16(INT16_C( 13422), INT16_C( 32033), INT16_C( 4055), INT16_C( 5623)), simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_pi16(INT16_C( 206), INT16_C( -1567), INT16_C( -17153), INT16_C( 18166)), simde_mm_set_pi16(INT16_C( 30519), INT16_C( 30643), INT16_C( 32735), INT16_C( -4195)), simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_pi16(INT16_C( 25406), INT16_C( -18343), INT16_C( -15870), INT16_C( -15505)), simde_mm_set_pi16(INT16_C( 25406), INT16_C( -18343), INT16_C( -15870), INT16_C( -15505)), simde_mm_set_pi16(INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1)) }, { simde_mm_set_pi16(INT16_C( 21393), INT16_C( 22815), INT16_C( 322), INT16_C( 9608)), simde_mm_set_pi16(INT16_C( 23953), INT16_C( -31672), INT16_C( -7546), INT16_C( 31996)), simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_pi16(INT16_C( -16506), INT16_C( -921), INT16_C( -32189), INT16_C( 18444)), simde_mm_set_pi16(INT16_C( -10340), INT16_C( -28110), INT16_C( 24057), INT16_C( -7047)), simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_pi16(INT16_C( -1173), INT16_C( -25844), INT16_C( -10729), INT16_C( 22121)), simde_mm_set_pi16(INT16_C( 25970), INT16_C( 12718), INT16_C( 25424), INT16_C( 11867)), simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_pcmpeqw(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_u16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_cmpeq_pi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( -883578301), INT32_C( 417988218)), simde_mm_set_pi32(INT32_C( 450681489), INT32_C( 406936792)), simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_pi32(INT32_C( 1860119652), INT32_C( 934622249)), simde_mm_set_pi32(INT32_C( 2099328110), INT32_C( 368512983)), simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_pi32(INT32_C( -102694706), INT32_C( 1190575359)), simde_mm_set_pi32(INT32_C( 2008250167), INT32_C( -274890785)), simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_pi32(INT32_C( 126096531), INT32_C( -1641991199)), simde_mm_set_pi32(INT32_C( 126096531), INT32_C( -1641991199)), simde_mm_set_pi32(INT32_C( -1), INT32_C( -1)) }, { simde_mm_set_pi32(INT32_C( -1202101442), INT32_C( -1016086014)), simde_mm_set_pi32(INT32_C( -1034786090), INT32_C( -993100857)), simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_pi32(INT32_C( 1495225233), INT32_C( 629670210)), simde_mm_set_pi32(INT32_C( -2075632239), INT32_C( 2096947846)), simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_pi32(INT32_C( -60309626), INT32_C( 1208779331)), simde_mm_set_pi32(INT32_C( -1842161764), INT32_C( -461808135)), simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_pi32(INT32_C( -1693648021), INT32_C( 1449776663)), simde_mm_set_pi32(INT32_C( 833512818), INT32_C( 777741136)), simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_cmpeq_pi32(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_pcmpeqd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( -883578301), INT32_C( 417988218)), simde_mm_set_pi32(INT32_C( 450681489), INT32_C( 406936792)), simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_pi32(INT32_C( 1860119652), INT32_C( 934622249)), simde_mm_set_pi32(INT32_C( 2099328110), INT32_C( 368512983)), simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_pi32(INT32_C( -102694706), INT32_C( 1190575359)), simde_mm_set_pi32(INT32_C( 2008250167), INT32_C( -274890785)), simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_pi32(INT32_C( 126096531), INT32_C( -1641991199)), simde_mm_set_pi32(INT32_C( 126096531), INT32_C( -1641991199)), simde_mm_set_pi32(INT32_C( -1), INT32_C( -1)) }, { simde_mm_set_pi32(INT32_C( -1202101442), INT32_C( -1016086014)), simde_mm_set_pi32(INT32_C( -1034786090), INT32_C( -993100857)), simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_pi32(INT32_C( 1495225233), INT32_C( 629670210)), simde_mm_set_pi32(INT32_C( -2075632239), INT32_C( 2096947846)), simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_pi32(INT32_C( -60309626), INT32_C( 1208779331)), simde_mm_set_pi32(INT32_C( -1842161764), INT32_C( -461808135)), simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_pi32(INT32_C( -1693648021), INT32_C( 1449776663)), simde_mm_set_pi32(INT32_C( 833512818), INT32_C( 777741136)), simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_pcmpeqd(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_cmpgt_pi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi8(INT8_C( -77), INT8_C( 29), INT8_C( -34), INT8_C(-110), INT8_C( -78), INT8_C( -8), INT8_C( 92), INT8_C( 44)), simde_mm_set_pi8(INT8_C( -57), INT8_C( 99), INT8_C( -10), INT8_C( 28), INT8_C( 46), INT8_C( 79), INT8_C( -76), INT8_C( 59)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C( 96), INT8_C( -9), INT8_C( -61), INT8_C( 46), INT8_C( 104), INT8_C(-105), INT8_C( 89), INT8_C( 48)), simde_mm_set_pi8(INT8_C( 109), INT8_C( 70), INT8_C( 13), INT8_C( 90), INT8_C(-116), INT8_C( -23), INT8_C( 10), INT8_C( -96)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1)) }, { simde_mm_set_pi8(INT8_C( -24), INT8_C( -2), INT8_C( 73), INT8_C( 36), INT8_C( -29), INT8_C( -70), INT8_C( 73), INT8_C(-121)), simde_mm_set_pi8(INT8_C( 17), INT8_C( -17), INT8_C( 77), INT8_C( -2), INT8_C( 111), INT8_C(-111), INT8_C( -66), INT8_C( -30)), simde_mm_set_pi8(INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C( 72), INT8_C(-102), INT8_C(-121), INT8_C( 41), INT8_C( -29), INT8_C(-100), INT8_C( -70), INT8_C( 82)), simde_mm_set_pi8(INT8_C( 101), INT8_C( 118), INT8_C(-110), INT8_C( -74), INT8_C( -57), INT8_C( -2), INT8_C( 89), INT8_C( -16)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1)) }, { simde_mm_set_pi8(INT8_C( 64), INT8_C( 2), INT8_C(-118), INT8_C( 23), INT8_C( -88), INT8_C(-120), INT8_C( 61), INT8_C( 114)), simde_mm_set_pi8(INT8_C( 60), INT8_C( 91), INT8_C( 96), INT8_C( -22), INT8_C( 38), INT8_C( 49), INT8_C( 80), INT8_C( -29)), simde_mm_set_pi8(INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1)) }, { simde_mm_set_pi8(INT8_C( 31), INT8_C( -32), INT8_C(-121), INT8_C( 9), INT8_C( 80), INT8_C( 108), INT8_C( 29), INT8_C( 2)), simde_mm_set_pi8(INT8_C(-119), INT8_C( 33), INT8_C( 9), INT8_C( 101), INT8_C( 101), INT8_C( 79), INT8_C( 41), INT8_C( 87)), simde_mm_set_pi8(INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C( 96), INT8_C( -75), INT8_C(-121), INT8_C(-101), INT8_C( 10), INT8_C(-126), INT8_C( 58), INT8_C( 60)), simde_mm_set_pi8(INT8_C( 101), INT8_C( -73), INT8_C( 126), INT8_C( 105), INT8_C( -48), INT8_C(-119), INT8_C( -97), INT8_C( -90)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1)) }, { simde_mm_set_pi8(INT8_C( 118), INT8_C( 118), INT8_C( -21), INT8_C( -49), INT8_C( 85), INT8_C( 69), INT8_C( 84), INT8_C( 111)), simde_mm_set_pi8(INT8_C( -96), INT8_C( 121), INT8_C(-110), INT8_C( -87), INT8_C( -73), INT8_C( 37), INT8_C( 45), INT8_C(-120)), simde_mm_set_pi8(INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_cmpgt_pi8(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_pcmpgtb(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi8(INT8_C( -77), INT8_C( 29), INT8_C( -34), INT8_C(-110), INT8_C( -78), INT8_C( -8), INT8_C( 92), INT8_C( 44)), simde_mm_set_pi8(INT8_C( -57), INT8_C( 99), INT8_C( -10), INT8_C( 28), INT8_C( 46), INT8_C( 79), INT8_C( -76), INT8_C( 59)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C( 96), INT8_C( -9), INT8_C( -61), INT8_C( 46), INT8_C( 104), INT8_C(-105), INT8_C( 89), INT8_C( 48)), simde_mm_set_pi8(INT8_C( 109), INT8_C( 70), INT8_C( 13), INT8_C( 90), INT8_C(-116), INT8_C( -23), INT8_C( 10), INT8_C( -96)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1)) }, { simde_mm_set_pi8(INT8_C( -24), INT8_C( -2), INT8_C( 73), INT8_C( 36), INT8_C( -29), INT8_C( -70), INT8_C( 73), INT8_C(-121)), simde_mm_set_pi8(INT8_C( 17), INT8_C( -17), INT8_C( 77), INT8_C( -2), INT8_C( 111), INT8_C(-111), INT8_C( -66), INT8_C( -30)), simde_mm_set_pi8(INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C( 72), INT8_C(-102), INT8_C(-121), INT8_C( 41), INT8_C( -29), INT8_C(-100), INT8_C( -70), INT8_C( 82)), simde_mm_set_pi8(INT8_C( 101), INT8_C( 118), INT8_C(-110), INT8_C( -74), INT8_C( -57), INT8_C( -2), INT8_C( 89), INT8_C( -16)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1)) }, { simde_mm_set_pi8(INT8_C( 64), INT8_C( 2), INT8_C(-118), INT8_C( 23), INT8_C( -88), INT8_C(-120), INT8_C( 61), INT8_C( 114)), simde_mm_set_pi8(INT8_C( 60), INT8_C( 91), INT8_C( 96), INT8_C( -22), INT8_C( 38), INT8_C( 49), INT8_C( 80), INT8_C( -29)), simde_mm_set_pi8(INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1)) }, { simde_mm_set_pi8(INT8_C( 31), INT8_C( -32), INT8_C(-121), INT8_C( 9), INT8_C( 80), INT8_C( 108), INT8_C( 29), INT8_C( 2)), simde_mm_set_pi8(INT8_C(-119), INT8_C( 33), INT8_C( 9), INT8_C( 101), INT8_C( 101), INT8_C( 79), INT8_C( 41), INT8_C( 87)), simde_mm_set_pi8(INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C( 96), INT8_C( -75), INT8_C(-121), INT8_C(-101), INT8_C( 10), INT8_C(-126), INT8_C( 58), INT8_C( 60)), simde_mm_set_pi8(INT8_C( 101), INT8_C( -73), INT8_C( 126), INT8_C( 105), INT8_C( -48), INT8_C(-119), INT8_C( -97), INT8_C( -90)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1)) }, { simde_mm_set_pi8(INT8_C( 118), INT8_C( 118), INT8_C( -21), INT8_C( -49), INT8_C( 85), INT8_C( 69), INT8_C( 84), INT8_C( 111)), simde_mm_set_pi8(INT8_C( -96), INT8_C( 121), INT8_C(-110), INT8_C( -87), INT8_C( -73), INT8_C( 37), INT8_C( 45), INT8_C(-120)), simde_mm_set_pi8(INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_pcmpgtb(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_cmpgt_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( 27287), INT16_C( -17445), INT16_C( 7868), INT16_C( 17731)), simde_mm_set_pi16(INT16_C( -32130), INT16_C( -12389), INT16_C( -15721), INT16_C( -10529)), simde_mm_set_pi16(INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1)) }, { simde_mm_set_pi16(INT16_C( -23331), INT16_C( 19282), INT16_C( 27710), INT16_C( 4608)), simde_mm_set_pi16(INT16_C( -32646), INT16_C( -2319), INT16_C( 19710), INT16_C( 25425)), simde_mm_set_pi16(INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0)) }, { simde_mm_set_pi16(INT16_C( 29350), INT16_C( -12356), INT16_C( -18117), INT16_C( -29182)), simde_mm_set_pi16(INT16_C( 10015), INT16_C( -4879), INT16_C( 30741), INT16_C( -4144)), simde_mm_set_pi16(INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_pi16(INT16_C( 30697), INT16_C( -4215), INT16_C( 31556), INT16_C( 11913)), simde_mm_set_pi16(INT16_C( -27176), INT16_C( 17667), INT16_C( -30447), INT16_C( -2179)), simde_mm_set_pi16(INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1)) }, { simde_mm_set_pi16(INT16_C( 9207), INT16_C( 4793), INT16_C( -24596), INT16_C( 10085)), simde_mm_set_pi16(INT16_C( -18727), INT16_C( -929), INT16_C( 7051), INT16_C( 8853)), simde_mm_set_pi16(INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1)) }, { simde_mm_set_pi16(INT16_C( 22734), INT16_C( 5890), INT16_C( -3490), INT16_C( -24930)), simde_mm_set_pi16(INT16_C( 23656), INT16_C( 14548), INT16_C( 31806), INT16_C( -18379)), simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_pi16(INT16_C( -28756), INT16_C( 2211), INT16_C( -15605), INT16_C( -32010)), simde_mm_set_pi16(INT16_C( -12192), INT16_C( -10879), INT16_C( 28731), INT16_C( 7911)), simde_mm_set_pi16(INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_pi16(INT16_C( -9646), INT16_C( -8544), INT16_C( -843), INT16_C( 12140)), simde_mm_set_pi16(INT16_C( 4324), INT16_C( 29706), INT16_C( 13667), INT16_C( -9123)), simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_cmpgt_pi16(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_pcmpgtw(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( 27287), INT16_C( -17445), INT16_C( 7868), INT16_C( 17731)), simde_mm_set_pi16(INT16_C( -32130), INT16_C( -12389), INT16_C( -15721), INT16_C( -10529)), simde_mm_set_pi16(INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1)) }, { simde_mm_set_pi16(INT16_C( -23331), INT16_C( 19282), INT16_C( 27710), INT16_C( 4608)), simde_mm_set_pi16(INT16_C( -32646), INT16_C( -2319), INT16_C( 19710), INT16_C( 25425)), simde_mm_set_pi16(INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0)) }, { simde_mm_set_pi16(INT16_C( 29350), INT16_C( -12356), INT16_C( -18117), INT16_C( -29182)), simde_mm_set_pi16(INT16_C( 10015), INT16_C( -4879), INT16_C( 30741), INT16_C( -4144)), simde_mm_set_pi16(INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_pi16(INT16_C( 30697), INT16_C( -4215), INT16_C( 31556), INT16_C( 11913)), simde_mm_set_pi16(INT16_C( -27176), INT16_C( 17667), INT16_C( -30447), INT16_C( -2179)), simde_mm_set_pi16(INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1)) }, { simde_mm_set_pi16(INT16_C( 9207), INT16_C( 4793), INT16_C( -24596), INT16_C( 10085)), simde_mm_set_pi16(INT16_C( -18727), INT16_C( -929), INT16_C( 7051), INT16_C( 8853)), simde_mm_set_pi16(INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1)) }, { simde_mm_set_pi16(INT16_C( 22734), INT16_C( 5890), INT16_C( -3490), INT16_C( -24930)), simde_mm_set_pi16(INT16_C( 23656), INT16_C( 14548), INT16_C( 31806), INT16_C( -18379)), simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_pi16(INT16_C( -28756), INT16_C( 2211), INT16_C( -15605), INT16_C( -32010)), simde_mm_set_pi16(INT16_C( -12192), INT16_C( -10879), INT16_C( 28731), INT16_C( 7911)), simde_mm_set_pi16(INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_pi16(INT16_C( -9646), INT16_C( -8544), INT16_C( -843), INT16_C( 12140)), simde_mm_set_pi16(INT16_C( 4324), INT16_C( 29706), INT16_C( 13667), INT16_C( -9123)), simde_mm_set_pi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_pcmpgtw(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_cmpgt_pi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( -1143248233), INT32_C( 1162026684)), simde_mm_set_pi32(INT32_C( -811892098), INT32_C( -689978729)), simde_mm_set_pi32(INT32_C( 0), INT32_C( -1)) }, { simde_mm_set_pi32(INT32_C( 1263707357), INT32_C( 302017598)), simde_mm_set_pi32(INT32_C( -151945094), INT32_C( 1666272510)), simde_mm_set_pi32(INT32_C( -1), INT32_C( 0)) }, { simde_mm_set_pi32(INT32_C( -809733466), INT32_C( -1912424133)), simde_mm_set_pi32(INT32_C( -319740129), INT32_C( -271550443)), simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_pi32(INT32_C( -276203543), INT32_C( 780761924)), simde_mm_set_pi32(INT32_C( 1157862872), INT32_C( -142767855)), simde_mm_set_pi32(INT32_C( 0), INT32_C( -1)) }, { simde_mm_set_pi32(INT32_C( 314123255), INT32_C( 660971500)), simde_mm_set_pi32(INT32_C( -60836135), INT32_C( 580197259)), simde_mm_set_pi32(INT32_C( -1), INT32_C( -1)) }, { simde_mm_set_pi32(INT32_C( 386029774), INT32_C( -1633750434)), simde_mm_set_pi32(INT32_C( 953441384), INT32_C( -1204454338)), simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_pi32(INT32_C( 144936876), INT32_C( -2097757429)), simde_mm_set_pi32(INT32_C( -712912800), INT32_C( 518484027)), simde_mm_set_pi32(INT32_C( -1), INT32_C( 0)) }, { simde_mm_set_pi32(INT32_C( -559883694), INT32_C( 795671733)), simde_mm_set_pi32(INT32_C( 1946816740), INT32_C( -597871261)), simde_mm_set_pi32(INT32_C( 0), INT32_C( -1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_cmpgt_pi32(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_pcmpgtd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( -1143248233), INT32_C( 1162026684)), simde_mm_set_pi32(INT32_C( -811892098), INT32_C( -689978729)), simde_mm_set_pi32(INT32_C( 0), INT32_C( -1)) }, { simde_mm_set_pi32(INT32_C( 1263707357), INT32_C( 302017598)), simde_mm_set_pi32(INT32_C( -151945094), INT32_C( 1666272510)), simde_mm_set_pi32(INT32_C( -1), INT32_C( 0)) }, { simde_mm_set_pi32(INT32_C( -809733466), INT32_C( -1912424133)), simde_mm_set_pi32(INT32_C( -319740129), INT32_C( -271550443)), simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_pi32(INT32_C( -276203543), INT32_C( 780761924)), simde_mm_set_pi32(INT32_C( 1157862872), INT32_C( -142767855)), simde_mm_set_pi32(INT32_C( 0), INT32_C( -1)) }, { simde_mm_set_pi32(INT32_C( 314123255), INT32_C( 660971500)), simde_mm_set_pi32(INT32_C( -60836135), INT32_C( 580197259)), simde_mm_set_pi32(INT32_C( -1), INT32_C( -1)) }, { simde_mm_set_pi32(INT32_C( 386029774), INT32_C( -1633750434)), simde_mm_set_pi32(INT32_C( 953441384), INT32_C( -1204454338)), simde_mm_set_pi32(INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_pi32(INT32_C( 144936876), INT32_C( -2097757429)), simde_mm_set_pi32(INT32_C( -712912800), INT32_C( 518484027)), simde_mm_set_pi32(INT32_C( -1), INT32_C( 0)) }, { simde_mm_set_pi32(INT32_C( -559883694), INT32_C( 795671733)), simde_mm_set_pi32(INT32_C( 1946816740), INT32_C( -597871261)), simde_mm_set_pi32(INT32_C( 0), INT32_C( -1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_pcmpgtd(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_cvtm64_si64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; int64_t r; } test_vec[8] = { { simde_x_mm_set_pi64(INT64_C( 2133233461862191637)), INT64_C( 2133233461862191637) }, { simde_x_mm_set_pi64(INT64_C(-1973285463394951226)), INT64_C(-1973285463394951226) }, { simde_x_mm_set_pi64(INT64_C(-5080660655112358315)), INT64_C(-5080660655112358315) }, { simde_x_mm_set_pi64(INT64_C(-2729804181976621239)), INT64_C(-2729804181976621239) }, { simde_x_mm_set_pi64(INT64_C( 2995193706671491592)), INT64_C( 2995193706671491592) }, { simde_x_mm_set_pi64(INT64_C( 5468114770221852232)), INT64_C( 5468114770221852232) }, { simde_x_mm_set_pi64(INT64_C( 8741870191125799000)), INT64_C( 8741870191125799000) }, { simde_x_mm_set_pi64(INT64_C(-2719280269483103979)), INT64_C(-2719280269483103979) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int64_t r = simde_mm_cvtm64_si64(test_vec[i].a); simde_assert_equal_i64(r, test_vec[i].r); } return 0; } static int test_simde_mm_cvtsi32_si64(SIMDE_MUNIT_TEST_ARGS) { const struct { int32_t a; simde__m64 r; } test_vec[8] = { { INT32_C( -1348583717), simde_mm_set_pi32(INT32_C( 0), INT32_C( -1348583717)) }, { INT32_C( -756715702), simde_mm_set_pi32(INT32_C( 0), INT32_C( -756715702)) }, { INT32_C( -1433924355), simde_mm_set_pi32(INT32_C( 0), INT32_C( -1433924355)) }, { INT32_C( -1317069830), simde_mm_set_pi32(INT32_C( 0), INT32_C( -1317069830)) }, { INT32_C( 1132090539), simde_mm_set_pi32(INT32_C( 0), INT32_C( 1132090539)) }, { INT32_C( -1685122075), simde_mm_set_pi32(INT32_C( 0), INT32_C( -1685122075)) }, { INT32_C( -782778794), simde_mm_set_pi32(INT32_C( 0), INT32_C( -782778794)) }, { INT32_C( -1603608856), simde_mm_set_pi32(INT32_C( 0), INT32_C( -1603608856)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_cvtsi32_si64(test_vec[i].a); simde_mm_empty(); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_from_int(SIMDE_MUNIT_TEST_ARGS) { const struct { int32_t a; simde__m64 r; } test_vec[8] = { { INT32_C( -1348583717), simde_mm_set_pi32(INT32_C( 0), INT32_C( -1348583717)) }, { INT32_C( -756715702), simde_mm_set_pi32(INT32_C( 0), INT32_C( -756715702)) }, { INT32_C( -1433924355), simde_mm_set_pi32(INT32_C( 0), INT32_C( -1433924355)) }, { INT32_C( -1317069830), simde_mm_set_pi32(INT32_C( 0), INT32_C( -1317069830)) }, { INT32_C( 1132090539), simde_mm_set_pi32(INT32_C( 0), INT32_C( 1132090539)) }, { INT32_C( -1685122075), simde_mm_set_pi32(INT32_C( 0), INT32_C( -1685122075)) }, { INT32_C( -782778794), simde_mm_set_pi32(INT32_C( 0), INT32_C( -782778794)) }, { INT32_C( -1603608856), simde_mm_set_pi32(INT32_C( 0), INT32_C( -1603608856)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_from_int(test_vec[i].a); simde_mm_empty(); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_cvtsi64_m64(SIMDE_MUNIT_TEST_ARGS) { const struct { int64_t a; simde__m64 r; } test_vec[8] = { { INT64_C( 2448316468135826021), simde_x_mm_set_pi64(INT64_C( 2448316468135826021)) }, { INT64_C(-5945835882033612295), simde_x_mm_set_pi64(INT64_C(-5945835882033612295)) }, { INT64_C( 5992090895212857513), simde_x_mm_set_pi64(INT64_C( 5992090895212857513)) }, { INT64_C(-6796228402041923924), simde_x_mm_set_pi64(INT64_C(-6796228402041923924)) }, { INT64_C(-8511645703056027592), simde_x_mm_set_pi64(INT64_C(-8511645703056027592)) }, { INT64_C(-8723546203794185453), simde_x_mm_set_pi64(INT64_C(-8723546203794185453)) }, { INT64_C( 4345402151036158873), simde_x_mm_set_pi64(INT64_C( 4345402151036158873)) }, { INT64_C(-6661466122659936384), simde_x_mm_set_pi64(INT64_C(-6661466122659936384)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_cvtsi64_m64(test_vec[i].a); simde_mm_empty(); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_from_int64(SIMDE_MUNIT_TEST_ARGS) { const struct { int64_t a; simde__m64 r; } test_vec[8] = { { INT64_C( 2448316468135826021), simde_x_mm_set_pi64(INT64_C( 2448316468135826021)) }, { INT64_C(-5945835882033612295), simde_x_mm_set_pi64(INT64_C(-5945835882033612295)) }, { INT64_C( 5992090895212857513), simde_x_mm_set_pi64(INT64_C( 5992090895212857513)) }, { INT64_C(-6796228402041923924), simde_x_mm_set_pi64(INT64_C(-6796228402041923924)) }, { INT64_C(-8511645703056027592), simde_x_mm_set_pi64(INT64_C(-8511645703056027592)) }, { INT64_C(-8723546203794185453), simde_x_mm_set_pi64(INT64_C(-8723546203794185453)) }, { INT64_C( 4345402151036158873), simde_x_mm_set_pi64(INT64_C( 4345402151036158873)) }, { INT64_C(-6661466122659936384), simde_x_mm_set_pi64(INT64_C(-6661466122659936384)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_from_int64(test_vec[i].a); simde_mm_empty(); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_cvtsi64_si32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; int32_t r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( 1382271190), INT32_C( -17653840)), INT32_C( -17653840), }, { simde_mm_set_pi32(INT32_C( 2132466748), INT32_C( -1483731059)), INT32_C( -1483731059), }, { simde_mm_set_pi32(INT32_C( -822228698), INT32_C( 1004225555)), INT32_C( 1004225555), }, { simde_mm_set_pi32(INT32_C( 558984757), INT32_C( -1886991323)), INT32_C( -1886991323), }, { simde_mm_set_pi32(INT32_C( 927499451), INT32_C( 1754078566)), INT32_C( 1754078566), }, { simde_mm_set_pi32(INT32_C( -1298862100), INT32_C( -1081030334)), INT32_C( -1081030334), }, { simde_mm_set_pi32(INT32_C( -2034437538), INT32_C( 1272751087)), INT32_C( 1272751087), }, { simde_mm_set_pi32(INT32_C( -1114400737), INT32_C( 1318901980)), INT32_C( 1318901980), } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int32_t r = simde_mm_cvtsi64_si32(test_vec[i].a); simde_mm_empty(); simde_assert_equal_i32(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_madd_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -30343), INT16_C( -26392), INT16_C( 12299), INT16_C( 4601)), simde_mm_set_pi16(INT16_C( 1486), INT16_C( 26809), INT16_C( 7836), INT16_C( -25805)), simde_mm_set_pi32(INT32_C( -752632826), INT32_C( -22353841)) }, { simde_mm_set_pi16(INT16_C( 1890), INT16_C( 31305), INT16_C( -30077), INT16_C( 2552)), simde_mm_set_pi16(INT16_C( -26920), INT16_C( -29540), INT16_C( 15300), INT16_C( 26578)), simde_mm_set_pi32(INT32_C( -975628500), INT32_C( -392351044)) }, { simde_mm_set_pi16(INT16_C( 22384), INT16_C( 696), INT16_C( 25907), INT16_C( -24876)), simde_mm_set_pi16(INT16_C( -11857), INT16_C( 27254), INT16_C( -31966), INT16_C( 7796)), simde_mm_set_pi32(INT32_C( -246438304), INT32_C( -1022076458)) }, { simde_mm_set_pi16(INT16_C( 29956), INT16_C( -2269), INT16_C( 6641), INT16_C( -23007)), simde_mm_set_pi16(INT16_C( 8143), INT16_C( 30485), INT16_C( 15411), INT16_C( -14515)), simde_mm_set_pi32(INT32_C( 174761243), INT32_C( 436291056)) }, { simde_mm_set_pi16(INT16_C( 7615), INT16_C( 20384), INT16_C( 5326), INT16_C( -12172)), simde_mm_set_pi16(INT16_C( 26893), INT16_C( 19452), INT16_C( 1570), INT16_C( -21018)), simde_mm_set_pi32(INT32_C( 601299763), INT32_C( 264192916)) }, { simde_mm_set_pi16(INT16_C( 21548), INT16_C( 8299), INT16_C( -27943), INT16_C( -19629)), simde_mm_set_pi16(INT16_C( -7799), INT16_C( -19736), INT16_C( -28205), INT16_C( 18816)), simde_mm_set_pi32(INT32_C( -331841916), INT32_C( 418793051)) }, { simde_mm_set_pi16(INT16_C( -14814), INT16_C( -21565), INT16_C( 4061), INT16_C( 32148)), simde_mm_set_pi16(INT16_C( 26150), INT16_C( 16339), INT16_C( -29106), INT16_C( 3765)), simde_mm_set_pi32(INT32_C( -739736635), INT32_C( 2837754)) }, { simde_mm_set_pi16(INT16_C( -14349), INT16_C( 29040), INT16_C( 10943), INT16_C( -14909)), simde_mm_set_pi16(INT16_C( 4672), INT16_C( 28858), INT16_C( 1393), INT16_C( 4521)), simde_mm_set_pi32(INT32_C( 770997792), INT32_C( -52159990)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_madd_pi16(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_pmaddwd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -30343), INT16_C( -26392), INT16_C( 12299), INT16_C( 4601)), simde_mm_set_pi16(INT16_C( 1486), INT16_C( 26809), INT16_C( 7836), INT16_C( -25805)), simde_mm_set_pi32(INT32_C( -752632826), INT32_C( -22353841)) }, { simde_mm_set_pi16(INT16_C( 1890), INT16_C( 31305), INT16_C( -30077), INT16_C( 2552)), simde_mm_set_pi16(INT16_C( -26920), INT16_C( -29540), INT16_C( 15300), INT16_C( 26578)), simde_mm_set_pi32(INT32_C( -975628500), INT32_C( -392351044)) }, { simde_mm_set_pi16(INT16_C( 22384), INT16_C( 696), INT16_C( 25907), INT16_C( -24876)), simde_mm_set_pi16(INT16_C( -11857), INT16_C( 27254), INT16_C( -31966), INT16_C( 7796)), simde_mm_set_pi32(INT32_C( -246438304), INT32_C( -1022076458)) }, { simde_mm_set_pi16(INT16_C( 29956), INT16_C( -2269), INT16_C( 6641), INT16_C( -23007)), simde_mm_set_pi16(INT16_C( 8143), INT16_C( 30485), INT16_C( 15411), INT16_C( -14515)), simde_mm_set_pi32(INT32_C( 174761243), INT32_C( 436291056)) }, { simde_mm_set_pi16(INT16_C( 7615), INT16_C( 20384), INT16_C( 5326), INT16_C( -12172)), simde_mm_set_pi16(INT16_C( 26893), INT16_C( 19452), INT16_C( 1570), INT16_C( -21018)), simde_mm_set_pi32(INT32_C( 601299763), INT32_C( 264192916)) }, { simde_mm_set_pi16(INT16_C( 21548), INT16_C( 8299), INT16_C( -27943), INT16_C( -19629)), simde_mm_set_pi16(INT16_C( -7799), INT16_C( -19736), INT16_C( -28205), INT16_C( 18816)), simde_mm_set_pi32(INT32_C( -331841916), INT32_C( 418793051)) }, { simde_mm_set_pi16(INT16_C( -14814), INT16_C( -21565), INT16_C( 4061), INT16_C( 32148)), simde_mm_set_pi16(INT16_C( 26150), INT16_C( 16339), INT16_C( -29106), INT16_C( 3765)), simde_mm_set_pi32(INT32_C( -739736635), INT32_C( 2837754)) }, { simde_mm_set_pi16(INT16_C( -14349), INT16_C( 29040), INT16_C( 10943), INT16_C( -14909)), simde_mm_set_pi16(INT16_C( 4672), INT16_C( 28858), INT16_C( 1393), INT16_C( 4521)), simde_mm_set_pi32(INT32_C( 770997792), INT32_C( -52159990)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_pmaddwd(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_mulhi_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( 8979), INT16_C( 5154), INT16_C( -16978), INT16_C( 30928)), simde_mm_set_pi16(INT16_C( 131), INT16_C( -26526), INT16_C( -28508), INT16_C( 3156)), simde_mm_set_pi16(INT16_C( 17), INT16_C( -2087), INT16_C( 7385), INT16_C( 1489)) }, { simde_mm_set_pi16(INT16_C( -20724), INT16_C( -32562), INT16_C( -4287), INT16_C( -11994)), simde_mm_set_pi16(INT16_C( -1407), INT16_C( -20477), INT16_C( 2350), INT16_C( -5112)), simde_mm_set_pi16(INT16_C( 444), INT16_C( 10174), INT16_C( -154), INT16_C( 935)) }, { simde_mm_set_pi16(INT16_C( -19242), INT16_C( -20442), INT16_C( -24803), INT16_C( 26694)), simde_mm_set_pi16(INT16_C( 13233), INT16_C( -6736), INT16_C( 457), INT16_C( 16731)), simde_mm_set_pi16(INT16_C( -3886), INT16_C( 2101), INT16_C( -173), INT16_C( 6814)) }, { simde_mm_set_pi16(INT16_C( -7830), INT16_C( 18993), INT16_C( 2047), INT16_C( 32735)), simde_mm_set_pi16(INT16_C( 17045), INT16_C( -23188), INT16_C( -16247), INT16_C( -6369)), simde_mm_set_pi16(INT16_C( -2037), INT16_C( -6721), INT16_C( -508), INT16_C( -3182)) }, { simde_mm_set_pi16(INT16_C( -20331), INT16_C( -1771), INT16_C( 7319), INT16_C( -2172)), simde_mm_set_pi16(INT16_C( 27473), INT16_C( 3736), INT16_C( 26635), INT16_C( -24632)), simde_mm_set_pi16(INT16_C( -8523), INT16_C( -101), INT16_C( 2974), INT16_C( 816)) }, { simde_mm_set_pi16(INT16_C( 18863), INT16_C( 29355), INT16_C( 22063), INT16_C( 24992)), simde_mm_set_pi16(INT16_C( 31646), INT16_C( 10850), INT16_C( -1174), INT16_C( 6386)), simde_mm_set_pi16(INT16_C( 9108), INT16_C( 4859), INT16_C( -396), INT16_C( 2435)) }, { simde_mm_set_pi16(INT16_C( 12919), INT16_C( 27836), INT16_C( -15473), INT16_C( 31227)), simde_mm_set_pi16(INT16_C( -2051), INT16_C( 6265), INT16_C( -13839), INT16_C( 14795)), simde_mm_set_pi16(INT16_C( -405), INT16_C( 2661), INT16_C( 3267), INT16_C( 7049)) }, { simde_mm_set_pi16(INT16_C( -20265), INT16_C( -2387), INT16_C( 1893), INT16_C( 16606)), simde_mm_set_pi16(INT16_C( 31589), INT16_C( -8123), INT16_C( 26642), INT16_C( 6982)), simde_mm_set_pi16(INT16_C( -9768), INT16_C( 295), INT16_C( 769), INT16_C( 1769)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_mulhi_pi16(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_pmulhw(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( 8979), INT16_C( 5154), INT16_C( -16978), INT16_C( 30928)), simde_mm_set_pi16(INT16_C( 131), INT16_C( -26526), INT16_C( -28508), INT16_C( 3156)), simde_mm_set_pi16(INT16_C( 17), INT16_C( -2087), INT16_C( 7385), INT16_C( 1489)) }, { simde_mm_set_pi16(INT16_C( -20724), INT16_C( -32562), INT16_C( -4287), INT16_C( -11994)), simde_mm_set_pi16(INT16_C( -1407), INT16_C( -20477), INT16_C( 2350), INT16_C( -5112)), simde_mm_set_pi16(INT16_C( 444), INT16_C( 10174), INT16_C( -154), INT16_C( 935)) }, { simde_mm_set_pi16(INT16_C( -19242), INT16_C( -20442), INT16_C( -24803), INT16_C( 26694)), simde_mm_set_pi16(INT16_C( 13233), INT16_C( -6736), INT16_C( 457), INT16_C( 16731)), simde_mm_set_pi16(INT16_C( -3886), INT16_C( 2101), INT16_C( -173), INT16_C( 6814)) }, { simde_mm_set_pi16(INT16_C( -7830), INT16_C( 18993), INT16_C( 2047), INT16_C( 32735)), simde_mm_set_pi16(INT16_C( 17045), INT16_C( -23188), INT16_C( -16247), INT16_C( -6369)), simde_mm_set_pi16(INT16_C( -2037), INT16_C( -6721), INT16_C( -508), INT16_C( -3182)) }, { simde_mm_set_pi16(INT16_C( -20331), INT16_C( -1771), INT16_C( 7319), INT16_C( -2172)), simde_mm_set_pi16(INT16_C( 27473), INT16_C( 3736), INT16_C( 26635), INT16_C( -24632)), simde_mm_set_pi16(INT16_C( -8523), INT16_C( -101), INT16_C( 2974), INT16_C( 816)) }, { simde_mm_set_pi16(INT16_C( 18863), INT16_C( 29355), INT16_C( 22063), INT16_C( 24992)), simde_mm_set_pi16(INT16_C( 31646), INT16_C( 10850), INT16_C( -1174), INT16_C( 6386)), simde_mm_set_pi16(INT16_C( 9108), INT16_C( 4859), INT16_C( -396), INT16_C( 2435)) }, { simde_mm_set_pi16(INT16_C( 12919), INT16_C( 27836), INT16_C( -15473), INT16_C( 31227)), simde_mm_set_pi16(INT16_C( -2051), INT16_C( 6265), INT16_C( -13839), INT16_C( 14795)), simde_mm_set_pi16(INT16_C( -405), INT16_C( 2661), INT16_C( 3267), INT16_C( 7049)) }, { simde_mm_set_pi16(INT16_C( -20265), INT16_C( -2387), INT16_C( 1893), INT16_C( 16606)), simde_mm_set_pi16(INT16_C( 31589), INT16_C( -8123), INT16_C( 26642), INT16_C( 6982)), simde_mm_set_pi16(INT16_C( -9768), INT16_C( 295), INT16_C( 769), INT16_C( 1769)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_pmulhw(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_mullo_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( 1243), INT16_C( 20416), INT16_C( 15667), INT16_C( 4430)), simde_mm_set_pi16(INT16_C( -5775), INT16_C( 26694), INT16_C( 17028), INT16_C( 23537)), simde_mm_set_pi16(INT16_C( 30635), INT16_C( -12672), INT16_C( -19380), INT16_C( 1134)) }, { simde_mm_set_pi16(INT16_C( -5230), INT16_C( -20726), INT16_C( -32301), INT16_C( 4324)), simde_mm_set_pi16(INT16_C( 31416), INT16_C( -24870), INT16_C( 28490), INT16_C( -28474)), simde_mm_set_pi16(INT16_C( -6928), INT16_C( 14980), INT16_C( 1022), INT16_C( 20568)) }, { simde_mm_set_pi16(INT16_C( 359), INT16_C( 28315), INT16_C( 30109), INT16_C( 30370)), simde_mm_set_pi16(INT16_C( 11362), INT16_C( -24534), INT16_C( -7779), INT16_C( -31174)), simde_mm_set_pi16(INT16_C( 15726), INT16_C( 1390), INT16_C( 7753), INT16_C( -21324)) }, { simde_mm_set_pi16(INT16_C( -7682), INT16_C( -17472), INT16_C( 1125), INT16_C( -30733)), simde_mm_set_pi16(INT16_C( 27323), INT16_C( 21286), INT16_C( 28332), INT16_C( -26848)), simde_mm_set_pi16(INT16_C( 16522), INT16_C( 7808), INT16_C( 23004), INT16_C( 21344)) }, { simde_mm_set_pi16(INT16_C( 28468), INT16_C( -4021), INT16_C( 23325), INT16_C( -24525)), simde_mm_set_pi16(INT16_C( 29242), INT16_C( -5135), INT16_C( 12241), INT16_C( -5671)), simde_mm_set_pi16(INT16_C( 22984), INT16_C( 3995), INT16_C( -19027), INT16_C( 13883)) }, { simde_mm_set_pi16(INT16_C( -11233), INT16_C( -9235), INT16_C( -23340), INT16_C( -55)), simde_mm_set_pi16(INT16_C( -21567), INT16_C( -13689), INT16_C( 21540), INT16_C( 32686)), simde_mm_set_pi16(INT16_C( -24481), INT16_C( -1029), INT16_C( -16944), INT16_C( -28258)) }, { simde_mm_set_pi16(INT16_C( 24703), INT16_C( -27133), INT16_C( 13289), INT16_C( 20833)), simde_mm_set_pi16(INT16_C( -32748), INT16_C( 15704), INT16_C( 10635), INT16_C( -13911)), simde_mm_set_pi16(INT16_C( 2540), INT16_C( 18440), INT16_C( -32637), INT16_C( -7671)) }, { simde_mm_set_pi16(INT16_C( -20397), INT16_C( -17293), INT16_C( -2038), INT16_C( -24305)), simde_mm_set_pi16(INT16_C( -25280), INT16_C( 2678), INT16_C( -17798), INT16_C( 10227)), simde_mm_set_pi16(INT16_C( -1088), INT16_C( 23298), INT16_C( 30916), INT16_C( 10813)), } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_mullo_pi16(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_pmullw(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( 1243), INT16_C( 20416), INT16_C( 15667), INT16_C( 4430)), simde_mm_set_pi16(INT16_C( -5775), INT16_C( 26694), INT16_C( 17028), INT16_C( 23537)), simde_mm_set_pi16(INT16_C( 30635), INT16_C( -12672), INT16_C( -19380), INT16_C( 1134)) }, { simde_mm_set_pi16(INT16_C( -5230), INT16_C( -20726), INT16_C( -32301), INT16_C( 4324)), simde_mm_set_pi16(INT16_C( 31416), INT16_C( -24870), INT16_C( 28490), INT16_C( -28474)), simde_mm_set_pi16(INT16_C( -6928), INT16_C( 14980), INT16_C( 1022), INT16_C( 20568)) }, { simde_mm_set_pi16(INT16_C( 359), INT16_C( 28315), INT16_C( 30109), INT16_C( 30370)), simde_mm_set_pi16(INT16_C( 11362), INT16_C( -24534), INT16_C( -7779), INT16_C( -31174)), simde_mm_set_pi16(INT16_C( 15726), INT16_C( 1390), INT16_C( 7753), INT16_C( -21324)) }, { simde_mm_set_pi16(INT16_C( -7682), INT16_C( -17472), INT16_C( 1125), INT16_C( -30733)), simde_mm_set_pi16(INT16_C( 27323), INT16_C( 21286), INT16_C( 28332), INT16_C( -26848)), simde_mm_set_pi16(INT16_C( 16522), INT16_C( 7808), INT16_C( 23004), INT16_C( 21344)) }, { simde_mm_set_pi16(INT16_C( 28468), INT16_C( -4021), INT16_C( 23325), INT16_C( -24525)), simde_mm_set_pi16(INT16_C( 29242), INT16_C( -5135), INT16_C( 12241), INT16_C( -5671)), simde_mm_set_pi16(INT16_C( 22984), INT16_C( 3995), INT16_C( -19027), INT16_C( 13883)) }, { simde_mm_set_pi16(INT16_C( -11233), INT16_C( -9235), INT16_C( -23340), INT16_C( -55)), simde_mm_set_pi16(INT16_C( -21567), INT16_C( -13689), INT16_C( 21540), INT16_C( 32686)), simde_mm_set_pi16(INT16_C( -24481), INT16_C( -1029), INT16_C( -16944), INT16_C( -28258)) }, { simde_mm_set_pi16(INT16_C( 24703), INT16_C( -27133), INT16_C( 13289), INT16_C( 20833)), simde_mm_set_pi16(INT16_C( -32748), INT16_C( 15704), INT16_C( 10635), INT16_C( -13911)), simde_mm_set_pi16(INT16_C( 2540), INT16_C( 18440), INT16_C( -32637), INT16_C( -7671)) }, { simde_mm_set_pi16(INT16_C( -20397), INT16_C( -17293), INT16_C( -2038), INT16_C( -24305)), simde_mm_set_pi16(INT16_C( -25280), INT16_C( 2678), INT16_C( -17798), INT16_C( 10227)), simde_mm_set_pi16(INT16_C( -1088), INT16_C( 23298), INT16_C( 30916), INT16_C( 10813)), } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_pmullw(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_or_si64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t a; const int64_t b; const int64_t r; } test_vec[] = { { INT64_C( 3312320436217201359), INT64_C( 662252913283123072), INT64_C( 3312393021315565519) }, { INT64_C( 3388857949796257675), INT64_C( 7457130408406401460), INT64_C( 8034330312552992703) }, { -INT64_C( 5449490564862084274), INT64_C( 7663705856399316172), -INT64_C( 117102388756643890) }, { -INT64_C( 8085327080866302142), -INT64_C( 439425686846071066), -INT64_C( 4505815857774618) }, { -INT64_C( 4234638224587525293), INT64_C( 3884478865390108574), -INT64_C( 721707359331680289) }, { INT64_C( 1302274952955758247), -INT64_C( 7359314694205575878), -INT64_C( 7215049796416868417) }, { INT64_C( 3577930219192912362), -INT64_C( 3133495794381963941), -INT64_C( 745382297381913093) }, { INT64_C( 3087896977390130332), -INT64_C( 3767562487154644266), -INT64_C( 1441453207671832866) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m64 a = simde_mm_cvtsi64_m64(test_vec[i].a); simde__m64 b = simde_mm_cvtsi64_m64(test_vec[i].b); simde__m64 r = simde_mm_or_si64(a, b); simde_test_x86_assert_equal_u64x1(r, simde_mm_cvtsi64_m64(test_vec[i].r)); } return 0; } static int test_simde_m_por (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t a; const int64_t b; const int64_t r; } test_vec[] = { { INT64_C( 3312320436217201359), INT64_C( 662252913283123072), INT64_C( 3312393021315565519) }, { INT64_C( 3388857949796257675), INT64_C( 7457130408406401460), INT64_C( 8034330312552992703) }, { -INT64_C( 5449490564862084274), INT64_C( 7663705856399316172), -INT64_C( 117102388756643890) }, { -INT64_C( 8085327080866302142), -INT64_C( 439425686846071066), -INT64_C( 4505815857774618) }, { -INT64_C( 4234638224587525293), INT64_C( 3884478865390108574), -INT64_C( 721707359331680289) }, { INT64_C( 1302274952955758247), -INT64_C( 7359314694205575878), -INT64_C( 7215049796416868417) }, { INT64_C( 3577930219192912362), -INT64_C( 3133495794381963941), -INT64_C( 745382297381913093) }, { INT64_C( 3087896977390130332), -INT64_C( 3767562487154644266), -INT64_C( 1441453207671832866) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m64 a = simde_mm_cvtsi64_m64(test_vec[i].a); simde__m64 b = simde_mm_cvtsi64_m64(test_vec[i].b); simde__m64 r = simde_m_por(a, b); simde_test_x86_assert_equal_u64x1(r, simde_mm_cvtsi64_m64(test_vec[i].r)); } return 0; } static int test_simde_mm_packs_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -17383), INT16_C( -12181), INT16_C( -2968), INT16_C( 26626)), simde_mm_set_pi16(INT16_C( -10040), INT16_C( 13688), INT16_C( -30953), INT16_C( -4037)), simde_mm_set_pi8 (INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 127)) }, { simde_mm_set_pi16(INT16_C( -20194), INT16_C( 12331), INT16_C( -23109), INT16_C( 25162)), simde_mm_set_pi16(INT16_C( -1071), INT16_C( 20521), INT16_C( 860), INT16_C( 5875)), simde_mm_set_pi8 (INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127)) }, { simde_mm_set_pi16(INT16_C( -12255), INT16_C( 13277), INT16_C( -28950), INT16_C( 5253)), simde_mm_set_pi16(INT16_C( 25343), INT16_C( -1252), INT16_C( 3561), INT16_C( 7538)), simde_mm_set_pi8 (INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127)) }, { simde_mm_set_pi16(INT16_C( -11251), INT16_C( -21118), INT16_C( -2077), INT16_C( -20336)), simde_mm_set_pi16(INT16_C( 23412), INT16_C( 7898), INT16_C( -3571), INT16_C( 9242)), simde_mm_set_pi8 (INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C(-128)) }, { simde_mm_set_pi16(INT16_C( 28180), INT16_C( 25339), INT16_C( 20328), INT16_C( 3051)), simde_mm_set_pi16(INT16_C( 31135), INT16_C( 3581), INT16_C( 11552), INT16_C( 25034)), simde_mm_set_pi8 (INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127)) }, { simde_mm_set_pi16(INT16_C( 14129), INT16_C( -2982), INT16_C( -13260), INT16_C( -12225)), simde_mm_set_pi16(INT16_C( -557), INT16_C( -14564), INT16_C( -28065), INT16_C( 25636)), simde_mm_set_pi8 (INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C(-128)) }, { simde_mm_set_pi16(INT16_C( 31333), INT16_C( 20796), INT16_C( 16795), INT16_C( -5127)), simde_mm_set_pi16(INT16_C( 22060), INT16_C( 10681), INT16_C( 28763), INT16_C( 2847)), simde_mm_set_pi8 (INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C(-128)) }, { simde_mm_set_pi16(INT16_C( 167), INT16_C( 233), INT16_C( 115), INT16_C( 126)), simde_mm_set_pi16(INT16_C( 10), INT16_C( 94), INT16_C( 181), INT16_C( 233)), simde_mm_set_pi8 (INT8_C( 10), INT8_C( 94), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 115), INT8_C( 126)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_packs_pi16(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_packsswb(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -17383), INT16_C( -12181), INT16_C( -2968), INT16_C( 26626)), simde_mm_set_pi16(INT16_C( -10040), INT16_C( 13688), INT16_C( -30953), INT16_C( -4037)), simde_mm_set_pi8 (INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 127)) }, { simde_mm_set_pi16(INT16_C( -20194), INT16_C( 12331), INT16_C( -23109), INT16_C( 25162)), simde_mm_set_pi16(INT16_C( -1071), INT16_C( 20521), INT16_C( 860), INT16_C( 5875)), simde_mm_set_pi8 (INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127)) }, { simde_mm_set_pi16(INT16_C( -12255), INT16_C( 13277), INT16_C( -28950), INT16_C( 5253)), simde_mm_set_pi16(INT16_C( 25343), INT16_C( -1252), INT16_C( 3561), INT16_C( 7538)), simde_mm_set_pi8 (INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127)) }, { simde_mm_set_pi16(INT16_C( -11251), INT16_C( -21118), INT16_C( -2077), INT16_C( -20336)), simde_mm_set_pi16(INT16_C( 23412), INT16_C( 7898), INT16_C( -3571), INT16_C( 9242)), simde_mm_set_pi8 (INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C(-128)) }, { simde_mm_set_pi16(INT16_C( 28180), INT16_C( 25339), INT16_C( 20328), INT16_C( 3051)), simde_mm_set_pi16(INT16_C( 31135), INT16_C( 3581), INT16_C( 11552), INT16_C( 25034)), simde_mm_set_pi8 (INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127)) }, { simde_mm_set_pi16(INT16_C( 14129), INT16_C( -2982), INT16_C( -13260), INT16_C( -12225)), simde_mm_set_pi16(INT16_C( -557), INT16_C( -14564), INT16_C( -28065), INT16_C( 25636)), simde_mm_set_pi8 (INT8_C(-128), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C(-128)) }, { simde_mm_set_pi16(INT16_C( 31333), INT16_C( 20796), INT16_C( 16795), INT16_C( -5127)), simde_mm_set_pi16(INT16_C( 22060), INT16_C( 10681), INT16_C( 28763), INT16_C( 2847)), simde_mm_set_pi8 (INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C(-128)) }, { simde_mm_set_pi16(INT16_C( 167), INT16_C( 233), INT16_C( 115), INT16_C( 126)), simde_mm_set_pi16(INT16_C( 10), INT16_C( 94), INT16_C( 181), INT16_C( 233)), simde_mm_set_pi8 (INT8_C( 10), INT8_C( 94), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 115), INT8_C( 126)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_packsswb(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_packssdw(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( -2875748), INT32_C( -4)), simde_mm_set_pi32(INT32_C( -53), INT32_C( 934884)), simde_mm_set_pi16(INT16_C( -53), INT16_C( 32767), INT16_C( -32768), INT16_C( -4)) }, { simde_mm_set_pi32(INT32_C( 1), INT32_C( -216790321)), simde_mm_set_pi32(INT32_C( 120), INT32_C( -379925)), simde_mm_set_pi16(INT16_C( 120), INT16_C( -32768), INT16_C( 1), INT16_C( -32768)) }, { simde_mm_set_pi32(INT32_C( -18), INT32_C( 281)), simde_mm_set_pi32(INT32_C( -33064), INT32_C( 130)), simde_mm_set_pi16(INT16_C( -32768), INT16_C( 130), INT16_C( -18), INT16_C( 281)) }, { simde_mm_set_pi32(INT32_C( -51729), INT32_C( 14)), simde_mm_set_pi32(INT32_C( 6852), INT32_C( -36)), simde_mm_set_pi16(INT16_C( 6852), INT16_C( -36), INT16_C( -32768), INT16_C( 14)) }, { simde_mm_set_pi32(INT32_C( -1), INT32_C( -210)), simde_mm_set_pi32(INT32_C( 3024991), INT32_C( 30957735)), simde_mm_set_pi16(INT16_C( 32767), INT16_C( 32767), INT16_C( -1), INT16_C( -210)) }, { simde_mm_set_pi32(INT32_C( 28), INT32_C( 890)), simde_mm_set_pi32(INT32_C( -2031601), INT32_C( -5309)), simde_mm_set_pi16(INT16_C( -32768), INT16_C( -5309), INT16_C( 28), INT16_C( 890)) }, { simde_mm_set_pi32(INT32_C( -80), INT32_C( 4267394)), simde_mm_set_pi32(INT32_C( 34757305), INT32_C( 127105)), simde_mm_set_pi16(INT16_C( 32767), INT16_C( 32767), INT16_C( -80), INT16_C( 32767)) }, { simde_mm_set_pi32(INT32_C( -2773123), INT32_C( -42)), simde_mm_set_pi32(INT32_C( 33), INT32_C( 3534549)), simde_mm_set_pi16(INT16_C( 33), INT16_C( 32767), INT16_C( -32768), INT16_C( -42)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_packssdw(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_packs_pi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( -2875748), INT32_C( -4)), simde_mm_set_pi32(INT32_C( -53), INT32_C( 934884)), simde_mm_set_pi16(INT16_C( -53), INT16_C( 32767), INT16_C( -32768), INT16_C( -4)) }, { simde_mm_set_pi32(INT32_C( 1), INT32_C( -216790321)), simde_mm_set_pi32(INT32_C( 120), INT32_C( -379925)), simde_mm_set_pi16(INT16_C( 120), INT16_C( -32768), INT16_C( 1), INT16_C( -32768)) }, { simde_mm_set_pi32(INT32_C( -18), INT32_C( 281)), simde_mm_set_pi32(INT32_C( -33064), INT32_C( 130)), simde_mm_set_pi16(INT16_C( -32768), INT16_C( 130), INT16_C( -18), INT16_C( 281)) }, { simde_mm_set_pi32(INT32_C( -51729), INT32_C( 14)), simde_mm_set_pi32(INT32_C( 6852), INT32_C( -36)), simde_mm_set_pi16(INT16_C( 6852), INT16_C( -36), INT16_C( -32768), INT16_C( 14)) }, { simde_mm_set_pi32(INT32_C( -1), INT32_C( -210)), simde_mm_set_pi32(INT32_C( 3024991), INT32_C( 30957735)), simde_mm_set_pi16(INT16_C( 32767), INT16_C( 32767), INT16_C( -1), INT16_C( -210)) }, { simde_mm_set_pi32(INT32_C( 28), INT32_C( 890)), simde_mm_set_pi32(INT32_C( -2031601), INT32_C( -5309)), simde_mm_set_pi16(INT16_C( -32768), INT16_C( -5309), INT16_C( 28), INT16_C( 890)) }, { simde_mm_set_pi32(INT32_C( -80), INT32_C( 4267394)), simde_mm_set_pi32(INT32_C( 34757305), INT32_C( 127105)), simde_mm_set_pi16(INT16_C( 32767), INT16_C( 32767), INT16_C( -80), INT16_C( 32767)) }, { simde_mm_set_pi32(INT32_C( -2773123), INT32_C( -42)), simde_mm_set_pi32(INT32_C( 33), INT32_C( 3534549)), simde_mm_set_pi16(INT16_C( 33), INT16_C( 32767), INT16_C( -32768), INT16_C( -42)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_packs_pi32(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_packs_pu16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -2), INT16_C( 113), INT16_C( 49), INT16_C( -647)), simde_mm_set_pi16(INT16_C( 56), INT16_C( 5), INT16_C( 1), INT16_C( -54)), simde_mm_set_pi8 (INT8_C( 56), INT8_C( 5), INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 113), INT8_C( 49), INT8_C( 0)) }, { simde_mm_set_pi16(INT16_C( -1), INT16_C( -206), INT16_C( -1650), INT16_C( -109)), simde_mm_set_pi16(INT16_C( -3828), INT16_C( 2), INT16_C( 471), INT16_C( 2)), simde_mm_set_pi8 (INT8_C( 0), INT8_C( 2), INT8_C( -1), INT8_C( 2), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_pi16(INT16_C( 3), INT16_C( -2), INT16_C( 500), INT16_C( -100)), simde_mm_set_pi16(INT16_C( -1574), INT16_C( -1), INT16_C( -1), INT16_C( 2)), simde_mm_set_pi8 (INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 2), INT8_C( 3), INT8_C( 0), INT8_C( -1), INT8_C( 0)) }, { simde_mm_set_pi16(INT16_C( -13), INT16_C( -217), INT16_C( 3305), INT16_C( -10)), simde_mm_set_pi16(INT16_C( -370), INT16_C( 181), INT16_C( 1), INT16_C( -1434)), simde_mm_set_pi8 (INT8_C( 0), INT8_C( -75), INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0)) }, { simde_mm_set_pi16(INT16_C( 867), INT16_C( -63), INT16_C( -1003), INT16_C( 13)), simde_mm_set_pi16(INT16_C( -29854), INT16_C( -6), INT16_C( 33), INT16_C( 5)), simde_mm_set_pi8 (INT8_C( 0), INT8_C( 0), INT8_C( 33), INT8_C( 5), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 13)) }, { simde_mm_set_pi16(INT16_C( 771), INT16_C( -1), INT16_C( -13), INT16_C( -2)), simde_mm_set_pi16(INT16_C( -65), INT16_C( 55), INT16_C( 295), INT16_C( 17510)), simde_mm_set_pi8 (INT8_C( 0), INT8_C( 55), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_pi16(INT16_C( 50), INT16_C( 32337), INT16_C( 13), INT16_C( 20449)), simde_mm_set_pi16(INT16_C( -897), INT16_C( -113), INT16_C( -3866), INT16_C( -15759)), simde_mm_set_pi8 (INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 50), INT8_C( -1), INT8_C( 13), INT8_C( -1)) }, { simde_mm_set_pi16(INT16_C( 0), INT16_C( 4501), INT16_C( 202), INT16_C( 9748)), simde_mm_set_pi16(INT16_C( -2), INT16_C( -1), INT16_C( -16348), INT16_C( -6302)), simde_mm_set_pi8 (INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -54), INT8_C( -1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_packs_pu16(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_u8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_packuswb(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -2), INT16_C( 113), INT16_C( 49), INT16_C( -647)), simde_mm_set_pi16(INT16_C( 56), INT16_C( 5), INT16_C( 1), INT16_C( -54)), simde_mm_set_pi8 (INT8_C( 56), INT8_C( 5), INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 113), INT8_C( 49), INT8_C( 0)) }, { simde_mm_set_pi16(INT16_C( -1), INT16_C( -206), INT16_C( -1650), INT16_C( -109)), simde_mm_set_pi16(INT16_C( -3828), INT16_C( 2), INT16_C( 471), INT16_C( 2)), simde_mm_set_pi8 (INT8_C( 0), INT8_C( 2), INT8_C( -1), INT8_C( 2), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_pi16(INT16_C( 3), INT16_C( -2), INT16_C( 500), INT16_C( -100)), simde_mm_set_pi16(INT16_C( -1574), INT16_C( -1), INT16_C( -1), INT16_C( 2)), simde_mm_set_pi8 (INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 2), INT8_C( 3), INT8_C( 0), INT8_C( -1), INT8_C( 0)) }, { simde_mm_set_pi16(INT16_C( -13), INT16_C( -217), INT16_C( 3305), INT16_C( -10)), simde_mm_set_pi16(INT16_C( -370), INT16_C( 181), INT16_C( 1), INT16_C( -1434)), simde_mm_set_pi8 (INT8_C( 0), INT8_C( -75), INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0)) }, { simde_mm_set_pi16(INT16_C( 867), INT16_C( -63), INT16_C( -1003), INT16_C( 13)), simde_mm_set_pi16(INT16_C( -29854), INT16_C( -6), INT16_C( 33), INT16_C( 5)), simde_mm_set_pi8 (INT8_C( 0), INT8_C( 0), INT8_C( 33), INT8_C( 5), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 13)) }, { simde_mm_set_pi16(INT16_C( 771), INT16_C( -1), INT16_C( -13), INT16_C( -2)), simde_mm_set_pi16(INT16_C( -65), INT16_C( 55), INT16_C( 295), INT16_C( 17510)), simde_mm_set_pi8 (INT8_C( 0), INT8_C( 55), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_pi16(INT16_C( 50), INT16_C( 32337), INT16_C( 13), INT16_C( 20449)), simde_mm_set_pi16(INT16_C( -897), INT16_C( -113), INT16_C( -3866), INT16_C( -15759)), simde_mm_set_pi8 (INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 50), INT8_C( -1), INT8_C( 13), INT8_C( -1)) }, { simde_mm_set_pi16(INT16_C( 0), INT16_C( 4501), INT16_C( 202), INT16_C( 9748)), simde_mm_set_pi16(INT16_C( -2), INT16_C( -1), INT16_C( -16348), INT16_C( -6302)), simde_mm_set_pi8 (INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -54), INT8_C( -1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_packuswb(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_u8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_sll_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 count; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -2612), INT16_C( -7275), INT16_C( 24980), INT16_C( 12744)), simde_mm_cvtsi64_m64(15), simde_mm_set_pi16(INT16_C( 0), INT16_C( -32768), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_pi16(INT16_C( 17143), INT16_C( -12000), INT16_C( 32255), INT16_C( 5448)), simde_mm_cvtsi64_m64(10), simde_mm_set_pi16(INT16_C( -9216), INT16_C( -32768), INT16_C( -1024), INT16_C( 8192)) }, { simde_mm_set_pi16(INT16_C( 1219), INT16_C( -18409), INT16_C( 24763), INT16_C( 13023)), simde_mm_cvtsi64_m64(3), simde_mm_set_pi16(INT16_C( 9752), INT16_C( -16200), INT16_C( 1496), INT16_C( -26888)) }, { simde_mm_set_pi16(INT16_C( -30853), INT16_C( -438), INT16_C( -13150), INT16_C( -2468)), simde_mm_cvtsi64_m64(10), simde_mm_set_pi16(INT16_C( -5120), INT16_C( 10240), INT16_C( -30720), INT16_C( 28672)) }, { simde_mm_set_pi16(INT16_C( -20343), INT16_C( 30713), INT16_C( 26566), INT16_C( 9213)), simde_mm_cvtsi64_m64(7), simde_mm_set_pi16(INT16_C( 17536), INT16_C( -896), INT16_C( -7424), INT16_C( -384)) }, { simde_mm_set_pi16(INT16_C( -14337), INT16_C( -4898), INT16_C( 32658), INT16_C( -4944)), simde_mm_cvtsi64_m64(5), simde_mm_set_pi16(INT16_C( -32), INT16_C( -25664), INT16_C( -3520), INT16_C( -27136)) }, { simde_mm_set_pi16(INT16_C( 21648), INT16_C( 25416), INT16_C( 19921), INT16_C( -16738)), simde_mm_cvtsi64_m64(0), simde_mm_set_pi16(INT16_C( 21648), INT16_C( 25416), INT16_C( 19921), INT16_C( -16738)) }, { simde_mm_set_pi16(INT16_C( -10368), INT16_C( -19483), INT16_C( -15412), INT16_C( -29979)), simde_mm_cvtsi64_m64(9), simde_mm_set_pi16(INT16_C( 0), INT16_C( -13824), INT16_C( -26624), INT16_C( -13824)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_sll_pi16(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_psllw(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 count; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -2612), INT16_C( -7275), INT16_C( 24980), INT16_C( 12744)), simde_mm_cvtsi64_m64(15), simde_mm_set_pi16(INT16_C( 0), INT16_C( -32768), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_pi16(INT16_C( 17143), INT16_C( -12000), INT16_C( 32255), INT16_C( 5448)), simde_mm_cvtsi64_m64(10), simde_mm_set_pi16(INT16_C( -9216), INT16_C( -32768), INT16_C( -1024), INT16_C( 8192)) }, { simde_mm_set_pi16(INT16_C( 1219), INT16_C( -18409), INT16_C( 24763), INT16_C( 13023)), simde_mm_cvtsi64_m64(3), simde_mm_set_pi16(INT16_C( 9752), INT16_C( -16200), INT16_C( 1496), INT16_C( -26888)) }, { simde_mm_set_pi16(INT16_C( -30853), INT16_C( -438), INT16_C( -13150), INT16_C( -2468)), simde_mm_cvtsi64_m64(10), simde_mm_set_pi16(INT16_C( -5120), INT16_C( 10240), INT16_C( -30720), INT16_C( 28672)) }, { simde_mm_set_pi16(INT16_C( -20343), INT16_C( 30713), INT16_C( 26566), INT16_C( 9213)), simde_mm_cvtsi64_m64(7), simde_mm_set_pi16(INT16_C( 17536), INT16_C( -896), INT16_C( -7424), INT16_C( -384)) }, { simde_mm_set_pi16(INT16_C( -14337), INT16_C( -4898), INT16_C( 32658), INT16_C( -4944)), simde_mm_cvtsi64_m64(5), simde_mm_set_pi16(INT16_C( -32), INT16_C( -25664), INT16_C( -3520), INT16_C( -27136)) }, { simde_mm_set_pi16(INT16_C( 21648), INT16_C( 25416), INT16_C( 19921), INT16_C( -16738)), simde_mm_cvtsi64_m64(0), simde_mm_set_pi16(INT16_C( 21648), INT16_C( 25416), INT16_C( 19921), INT16_C( -16738)) }, { simde_mm_set_pi16(INT16_C( -10368), INT16_C( -19483), INT16_C( -15412), INT16_C( -29979)), simde_mm_cvtsi64_m64(9), simde_mm_set_pi16(INT16_C( 0), INT16_C( -13824), INT16_C( -26624), INT16_C( -13824)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_psllw(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_sll_pi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 count; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( 2135609954), INT32_C( -1662756041)), simde_mm_cvtsi64_m64(10), simde_mm_set_pi32(INT32_C( 726239232), INT32_C( -1855136768)) }, { simde_mm_set_pi32(INT32_C( 1984991847), INT32_C( -75949890)), simde_mm_cvtsi64_m64(14), simde_mm_set_pi32(INT32_C( 614055936), INT32_C( 1177518080)) }, { simde_mm_set_pi32(INT32_C( -1315562518), INT32_C( -1717142831)), simde_mm_cvtsi64_m64(22), simde_mm_set_pi32(INT32_C( -92274688), INT32_C( -1270874112)) }, { simde_mm_set_pi32(INT32_C( -814215595), INT32_C( 805054469)), simde_mm_cvtsi64_m64(9), simde_mm_set_pi32(INT32_C( -266556928), INT32_C( -128972288)) }, { simde_mm_set_pi32(INT32_C( -1588862908), INT32_C( 2132697891)), simde_mm_cvtsi64_m64(1), simde_mm_set_pi32(INT32_C( 1117241480), INT32_C( -29571514)) }, { simde_mm_set_pi32(INT32_C( 782274620), INT32_C( -2120419106)), simde_mm_cvtsi64_m64(16), simde_mm_set_pi32(INT32_C( -1875116032), INT32_C( -119668736)) }, { simde_mm_set_pi32(INT32_C( -1687581332), INT32_C( -1263634481)), simde_mm_cvtsi64_m64(16), simde_mm_set_pi32(INT32_C( -1922301952), INT32_C( 2010054656)) }, { simde_mm_set_pi32(INT32_C( -1258319564), INT32_C( 975343739)), simde_mm_cvtsi64_m64(5), simde_mm_set_pi32(INT32_C( -1611520384), INT32_C( 1146228576)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_sll_pi32(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_pslld(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 count; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( 2135609954), INT32_C( -1662756041)), simde_mm_cvtsi64_m64(10), simde_mm_set_pi32(INT32_C( 726239232), INT32_C( -1855136768)) }, { simde_mm_set_pi32(INT32_C( 1984991847), INT32_C( -75949890)), simde_mm_cvtsi64_m64(14), simde_mm_set_pi32(INT32_C( 614055936), INT32_C( 1177518080)) }, { simde_mm_set_pi32(INT32_C( -1315562518), INT32_C( -1717142831)), simde_mm_cvtsi64_m64(22), simde_mm_set_pi32(INT32_C( -92274688), INT32_C( -1270874112)) }, { simde_mm_set_pi32(INT32_C( -814215595), INT32_C( 805054469)), simde_mm_cvtsi64_m64(9), simde_mm_set_pi32(INT32_C( -266556928), INT32_C( -128972288)) }, { simde_mm_set_pi32(INT32_C( -1588862908), INT32_C( 2132697891)), simde_mm_cvtsi64_m64(1), simde_mm_set_pi32(INT32_C( 1117241480), INT32_C( -29571514)) }, { simde_mm_set_pi32(INT32_C( 782274620), INT32_C( -2120419106)), simde_mm_cvtsi64_m64(16), simde_mm_set_pi32(INT32_C( -1875116032), INT32_C( -119668736)) }, { simde_mm_set_pi32(INT32_C( -1687581332), INT32_C( -1263634481)), simde_mm_cvtsi64_m64(16), simde_mm_set_pi32(INT32_C( -1922301952), INT32_C( 2010054656)) }, { simde_mm_set_pi32(INT32_C( -1258319564), INT32_C( 975343739)), simde_mm_cvtsi64_m64(5), simde_mm_set_pi32(INT32_C( -1611520384), INT32_C( 1146228576)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_pslld(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_sll_si64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 count; simde__m64 r; } test_vec[8] = { { simde_mm_cvtsi64_m64(INT64_C( 3171924675130206313)), simde_mm_cvtsi64_m64(26), simde_mm_cvtsi64_m64(INT64_C( -4688886433618853888)) }, { simde_mm_cvtsi64_m64(INT64_C( 8810857393431583130)), simde_mm_cvtsi64_m64(35), simde_mm_cvtsi64_m64(INT64_C( 8135977920570064896)) }, { simde_mm_cvtsi64_m64(INT64_C( 8253138385445189600)), simde_mm_cvtsi64_m64(60), simde_mm_cvtsi64_m64(INT64_C( 0)) }, { simde_mm_cvtsi64_m64(INT64_C( -109691783123384247)), simde_mm_cvtsi64_m64(35), simde_mm_cvtsi64_m64(INT64_C( 4699016138212769792)) }, { simde_mm_cvtsi64_m64(INT64_C( 797909880260215132)), simde_mm_cvtsi64_m64(19), simde_mm_cvtsi64_m64(INT64_C( -686801717540421632)) }, { simde_mm_cvtsi64_m64(INT64_C( -2366434973696685665)), simde_mm_cvtsi64_m64(28), simde_mm_cvtsi64_m64(INT64_C( -2410559835486552064)) }, { simde_mm_cvtsi64_m64(INT64_C( 3032641446696114060)), simde_mm_cvtsi64_m64(28), simde_mm_cvtsi64_m64(INT64_C( 4618209939532283904)) }, { simde_mm_cvtsi64_m64(INT64_C( 5741540145978860560)), simde_mm_cvtsi64_m64(44), simde_mm_cvtsi64_m64(INT64_C( 7944631217658265600)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_sll_si64(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_psllq(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 count; simde__m64 r; } test_vec[8] = { { simde_mm_cvtsi64_m64(INT64_C( 3171924675130206313)), simde_mm_cvtsi64_m64(26), simde_mm_cvtsi64_m64(INT64_C( -4688886433618853888)) }, { simde_mm_cvtsi64_m64(INT64_C( 8810857393431583130)), simde_mm_cvtsi64_m64(35), simde_mm_cvtsi64_m64(INT64_C( 8135977920570064896)) }, { simde_mm_cvtsi64_m64(INT64_C( 8253138385445189600)), simde_mm_cvtsi64_m64(60), simde_mm_cvtsi64_m64(INT64_C( 0)) }, { simde_mm_cvtsi64_m64(INT64_C( -109691783123384247)), simde_mm_cvtsi64_m64(35), simde_mm_cvtsi64_m64(INT64_C( 4699016138212769792)) }, { simde_mm_cvtsi64_m64(INT64_C( 797909880260215132)), simde_mm_cvtsi64_m64(19), simde_mm_cvtsi64_m64(INT64_C( -686801717540421632)) }, { simde_mm_cvtsi64_m64(INT64_C( -2366434973696685665)), simde_mm_cvtsi64_m64(28), simde_mm_cvtsi64_m64(INT64_C( -2410559835486552064)) }, { simde_mm_cvtsi64_m64(INT64_C( 3032641446696114060)), simde_mm_cvtsi64_m64(28), simde_mm_cvtsi64_m64(INT64_C( 4618209939532283904)) }, { simde_mm_cvtsi64_m64(INT64_C( 5741540145978860560)), simde_mm_cvtsi64_m64(44), simde_mm_cvtsi64_m64(INT64_C( 7944631217658265600)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_psllq(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_slli_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; int count; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -13543), INT16_C( 6360), INT16_C( -1306), INT16_C( -5948)), 6, simde_mm_set_pi16(INT16_C( -14784), INT16_C( 13824), INT16_C( -18048), INT16_C( 12544)) }, { simde_mm_set_pi16(INT16_C( 6506), INT16_C( -28533), INT16_C( 3988), INT16_C( -31210)), 10, simde_mm_set_pi16(INT16_C( -22528), INT16_C( 11264), INT16_C( 20480), INT16_C( 22528)) }, { simde_mm_set_pi16(INT16_C( 19388), INT16_C( -4520), INT16_C( 9582), INT16_C( 11067)), 9, simde_mm_set_pi16(INT16_C( 30720), INT16_C( -20480), INT16_C( -9216), INT16_C( 30208)) }, { simde_mm_set_pi16(INT16_C( 12000), INT16_C( 28876), INT16_C( 29834), INT16_C( -13742)), 13, simde_mm_set_pi16(INT16_C( 0), INT16_C( -32768), INT16_C( 16384), INT16_C( 16384)) }, { simde_mm_set_pi16(INT16_C( 4648), INT16_C( -2151), INT16_C( -26641), INT16_C( -27659)), 13, simde_mm_set_pi16(INT16_C( 0), INT16_C( 8192), INT16_C( -8192), INT16_C( -24576)) }, { simde_mm_set_pi16(INT16_C( -2353), INT16_C( 20317), INT16_C( 7426), INT16_C( 24788)), 8, simde_mm_set_pi16(INT16_C( -12544), INT16_C( 23808), INT16_C( 512), INT16_C( -11264)) }, { simde_mm_set_pi16(INT16_C( -6174), INT16_C( 31492), INT16_C( 28575), INT16_C( -20383)), 1, simde_mm_set_pi16(INT16_C( -12348), INT16_C( -2552), INT16_C( -8386), INT16_C( 24770)) }, { simde_mm_set_pi16(INT16_C( -30371), INT16_C( 17334), INT16_C( 2428), INT16_C( -4558)), 5, simde_mm_set_pi16(INT16_C( 11168), INT16_C( 30400), INT16_C( 12160), INT16_C( -14784)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_slli_pi16(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_psllwi(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; int count; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -13543), INT16_C( 6360), INT16_C( -1306), INT16_C( -5948)), 6, simde_mm_set_pi16(INT16_C( -14784), INT16_C( 13824), INT16_C( -18048), INT16_C( 12544)) }, { simde_mm_set_pi16(INT16_C( 6506), INT16_C( -28533), INT16_C( 3988), INT16_C( -31210)), 10, simde_mm_set_pi16(INT16_C( -22528), INT16_C( 11264), INT16_C( 20480), INT16_C( 22528)) }, { simde_mm_set_pi16(INT16_C( 19388), INT16_C( -4520), INT16_C( 9582), INT16_C( 11067)), 9, simde_mm_set_pi16(INT16_C( 30720), INT16_C( -20480), INT16_C( -9216), INT16_C( 30208)) }, { simde_mm_set_pi16(INT16_C( 12000), INT16_C( 28876), INT16_C( 29834), INT16_C( -13742)), 13, simde_mm_set_pi16(INT16_C( 0), INT16_C( -32768), INT16_C( 16384), INT16_C( 16384)) }, { simde_mm_set_pi16(INT16_C( 4648), INT16_C( -2151), INT16_C( -26641), INT16_C( -27659)), 13, simde_mm_set_pi16(INT16_C( 0), INT16_C( 8192), INT16_C( -8192), INT16_C( -24576)) }, { simde_mm_set_pi16(INT16_C( -2353), INT16_C( 20317), INT16_C( 7426), INT16_C( 24788)), 8, simde_mm_set_pi16(INT16_C( -12544), INT16_C( 23808), INT16_C( 512), INT16_C( -11264)) }, { simde_mm_set_pi16(INT16_C( -6174), INT16_C( 31492), INT16_C( 28575), INT16_C( -20383)), 1, simde_mm_set_pi16(INT16_C( -12348), INT16_C( -2552), INT16_C( -8386), INT16_C( 24770)) }, { simde_mm_set_pi16(INT16_C( -30371), INT16_C( 17334), INT16_C( 2428), INT16_C( -4558)), 5, simde_mm_set_pi16(INT16_C( 11168), INT16_C( 30400), INT16_C( 12160), INT16_C( -14784)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_psllwi(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_slli_pi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; int count; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( 83881529), INT32_C( 1357951601)), 27, simde_mm_set_pi32(INT32_C( -939524096), INT32_C( -2013265920)) }, { simde_mm_set_pi32(INT32_C( -2138298674), INT32_C( -2019079679)), 21, simde_mm_set_pi32(INT32_C( -641728512), INT32_C( 1075838976)) }, { simde_mm_set_pi32(INT32_C( -281448798), INT32_C( -1557273316)), 27, simde_mm_set_pi32(INT32_C( 268435456), INT32_C( -536870912)) }, { simde_mm_set_pi32(INT32_C( -1207542290), INT32_C( -694741539)), 7, simde_mm_set_pi32(INT32_C( 53409536), INT32_C( 1267396224)) }, { simde_mm_set_pi32(INT32_C( 902716495), INT32_C( 943182057)), 20, simde_mm_set_pi32(INT32_C( -990904320), INT32_C( 244318208)) }, { simde_mm_set_pi32(INT32_C( 7423865), INT32_C( -1974692036)), 5, simde_mm_set_pi32(INT32_C( 237563680), INT32_C( 1234364288)) }, { simde_mm_set_pi32(INT32_C( 174727032), INT32_C( -891064659)), 23, simde_mm_set_pi32(INT32_C( -1140850688), INT32_C( 1451229184)) }, { simde_mm_set_pi32(INT32_C( 134754342), INT32_C( -1894000042)), 27, simde_mm_set_pi32(INT32_C( 805306368), INT32_C( -1342177280)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_slli_pi32(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_pslldi(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; int count; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( 83881529), INT32_C( 1357951601)), 27, simde_mm_set_pi32(INT32_C( -939524096), INT32_C( -2013265920)) }, { simde_mm_set_pi32(INT32_C( -2138298674), INT32_C( -2019079679)), 21, simde_mm_set_pi32(INT32_C( -641728512), INT32_C( 1075838976)) }, { simde_mm_set_pi32(INT32_C( -281448798), INT32_C( -1557273316)), 27, simde_mm_set_pi32(INT32_C( 268435456), INT32_C( -536870912)) }, { simde_mm_set_pi32(INT32_C( -1207542290), INT32_C( -694741539)), 7, simde_mm_set_pi32(INT32_C( 53409536), INT32_C( 1267396224)) }, { simde_mm_set_pi32(INT32_C( 902716495), INT32_C( 943182057)), 20, simde_mm_set_pi32(INT32_C( -990904320), INT32_C( 244318208)) }, { simde_mm_set_pi32(INT32_C( 7423865), INT32_C( -1974692036)), 5, simde_mm_set_pi32(INT32_C( 237563680), INT32_C( 1234364288)) }, { simde_mm_set_pi32(INT32_C( 174727032), INT32_C( -891064659)), 23, simde_mm_set_pi32(INT32_C( -1140850688), INT32_C( 1451229184)) }, { simde_mm_set_pi32(INT32_C( 134754342), INT32_C( -1894000042)), 27, simde_mm_set_pi32(INT32_C( 805306368), INT32_C( -1342177280)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_pslldi(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_slli_si64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; int count; simde__m64 r; } test_vec[8] = { { simde_mm_cvtsi64_m64(INT64_C( -3655983719573882447)), 37, simde_mm_cvtsi64_m64(INT64_C( 5043809618745098240)) }, { simde_mm_cvtsi64_m64(INT64_C( 5373634195600553823)), 49, simde_mm_cvtsi64_m64(INT64_C( 7979815589747097600)) }, { simde_mm_cvtsi64_m64(INT64_C( 955832682335824267)), 11, simde_mm_cvtsi64_m64(INT64_C( 2190461610555627520)) }, { simde_mm_cvtsi64_m64(INT64_C( 4435237962953354472)), 32, simde_mm_cvtsi64_m64(INT64_C( -6041177681452597248)) }, { simde_mm_cvtsi64_m64(INT64_C( 509713568463920999)), 0, simde_mm_cvtsi64_m64(INT64_C( 509713568463920999)) }, { simde_mm_cvtsi64_m64(INT64_C( 3092984209993521199)), 24, simde_mm_cvtsi64_m64(INT64_C( -4581130211545841664)) }, { simde_mm_cvtsi64_m64(INT64_C( -9034725437056781767)), 38, simde_mm_cvtsi64_m64(INT64_C( 4817882106908639232)) }, { simde_mm_cvtsi64_m64(INT64_C( 8352260709189542260)), 34, simde_mm_cvtsi64_m64(INT64_C( -8446635447710384128)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_slli_si64(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i64x1(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_psllqi(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; int count; simde__m64 r; } test_vec[8] = { { simde_mm_cvtsi64_m64(INT64_C( -3655983719573882447)), 37, simde_mm_cvtsi64_m64(INT64_C( 5043809618745098240)) }, { simde_mm_cvtsi64_m64(INT64_C( 5373634195600553823)), 49, simde_mm_cvtsi64_m64(INT64_C( 7979815589747097600)) }, { simde_mm_cvtsi64_m64(INT64_C( 955832682335824267)), 11, simde_mm_cvtsi64_m64(INT64_C( 2190461610555627520)) }, { simde_mm_cvtsi64_m64(INT64_C( 4435237962953354472)), 32, simde_mm_cvtsi64_m64(INT64_C( -6041177681452597248)) }, { simde_mm_cvtsi64_m64(INT64_C( 509713568463920999)), 0, simde_mm_cvtsi64_m64(INT64_C( 509713568463920999)) }, { simde_mm_cvtsi64_m64(INT64_C( 3092984209993521199)), 24, simde_mm_cvtsi64_m64(INT64_C( -4581130211545841664)) }, { simde_mm_cvtsi64_m64(INT64_C( -9034725437056781767)), 38, simde_mm_cvtsi64_m64(INT64_C( 4817882106908639232)) }, { simde_mm_cvtsi64_m64(INT64_C( 8352260709189542260)), 34, simde_mm_cvtsi64_m64(INT64_C( -8446635447710384128)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_psllqi(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i64x1(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_srl_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 count; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -3979), INT16_C( -30013), INT16_C( 22836), INT16_C( 7438)), simde_mm_cvtsi64_m64(5), simde_mm_set_pi16(INT16_C( 1923), INT16_C( 1110), INT16_C( 713), INT16_C( 232)) }, { simde_mm_set_pi16(INT16_C( -17889), INT16_C( -31199), INT16_C( 2233), INT16_C( 29176)), simde_mm_cvtsi64_m64(9), simde_mm_set_pi16(INT16_C( 93), INT16_C( 67), INT16_C( 4), INT16_C( 56)) }, { simde_mm_set_pi16(INT16_C( -14320), INT16_C( -29349), INT16_C( -4712), INT16_C( 3031)), simde_mm_cvtsi64_m64(6), simde_mm_set_pi16(INT16_C( 800), INT16_C( 565), INT16_C( 950), INT16_C( 47)) }, { simde_mm_set_pi16(INT16_C( 28706), INT16_C( -15113), INT16_C( -3287), INT16_C( -13609)), simde_mm_cvtsi64_m64(13), simde_mm_set_pi16(INT16_C( 3), INT16_C( 6), INT16_C( 7), INT16_C( 6)) }, { simde_mm_set_pi16(INT16_C( -4348), INT16_C( 14324), INT16_C( 12491), INT16_C( -32763)), simde_mm_cvtsi64_m64(2), simde_mm_set_pi16(INT16_C( 15297), INT16_C( 3581), INT16_C( 3122), INT16_C( 8193)) }, { simde_mm_set_pi16(INT16_C( -1454), INT16_C( -3136), INT16_C( 16900), INT16_C( -26266)), simde_mm_cvtsi64_m64(11), simde_mm_set_pi16(INT16_C( 31), INT16_C( 30), INT16_C( 8), INT16_C( 19)) }, { simde_mm_set_pi16(INT16_C( 23032), INT16_C( 21033), INT16_C( 2074), INT16_C( -30320)), simde_mm_cvtsi64_m64(9), simde_mm_set_pi16(INT16_C( 44), INT16_C( 41), INT16_C( 4), INT16_C( 68)) }, { simde_mm_set_pi16(INT16_C( 2403), INT16_C( 6070), INT16_C( -16381), INT16_C( 15198)), simde_mm_cvtsi64_m64(10), simde_mm_set_pi16(INT16_C( 2), INT16_C( 5), INT16_C( 48), INT16_C( 14)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_srl_pi16(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_psrlw(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 count; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -3979), INT16_C( -30013), INT16_C( 22836), INT16_C( 7438)), simde_mm_cvtsi64_m64(5), simde_mm_set_pi16(INT16_C( 1923), INT16_C( 1110), INT16_C( 713), INT16_C( 232)) }, { simde_mm_set_pi16(INT16_C( -17889), INT16_C( -31199), INT16_C( 2233), INT16_C( 29176)), simde_mm_cvtsi64_m64(9), simde_mm_set_pi16(INT16_C( 93), INT16_C( 67), INT16_C( 4), INT16_C( 56)) }, { simde_mm_set_pi16(INT16_C( -14320), INT16_C( -29349), INT16_C( -4712), INT16_C( 3031)), simde_mm_cvtsi64_m64(6), simde_mm_set_pi16(INT16_C( 800), INT16_C( 565), INT16_C( 950), INT16_C( 47)) }, { simde_mm_set_pi16(INT16_C( 28706), INT16_C( -15113), INT16_C( -3287), INT16_C( -13609)), simde_mm_cvtsi64_m64(13), simde_mm_set_pi16(INT16_C( 3), INT16_C( 6), INT16_C( 7), INT16_C( 6)) }, { simde_mm_set_pi16(INT16_C( -4348), INT16_C( 14324), INT16_C( 12491), INT16_C( -32763)), simde_mm_cvtsi64_m64(2), simde_mm_set_pi16(INT16_C( 15297), INT16_C( 3581), INT16_C( 3122), INT16_C( 8193)) }, { simde_mm_set_pi16(INT16_C( -1454), INT16_C( -3136), INT16_C( 16900), INT16_C( -26266)), simde_mm_cvtsi64_m64(11), simde_mm_set_pi16(INT16_C( 31), INT16_C( 30), INT16_C( 8), INT16_C( 19)) }, { simde_mm_set_pi16(INT16_C( 23032), INT16_C( 21033), INT16_C( 2074), INT16_C( -30320)), simde_mm_cvtsi64_m64(9), simde_mm_set_pi16(INT16_C( 44), INT16_C( 41), INT16_C( 4), INT16_C( 68)) }, { simde_mm_set_pi16(INT16_C( 2403), INT16_C( 6070), INT16_C( -16381), INT16_C( 15198)), simde_mm_cvtsi64_m64(10), simde_mm_set_pi16(INT16_C( 2), INT16_C( 5), INT16_C( 48), INT16_C( 14)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_psrlw(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_srl_pi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 count; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( 1162874425), INT32_C( 701403552)), simde_mm_cvtsi64_m64(12), simde_mm_set_pi32(INT32_C( 283904), INT32_C( 171241)) }, { simde_mm_set_pi32(INT32_C( -1730008971), INT32_C( 1480718473)), simde_mm_cvtsi64_m64(7), simde_mm_set_pi32(INT32_C( 20038736), INT32_C( 11568113)) }, { simde_mm_set_pi32(INT32_C( -2020652937), INT32_C( -14094139)), simde_mm_cvtsi64_m64(9), simde_mm_set_pi32(INT32_C( 4442020), INT32_C( 8361080)) }, { simde_mm_set_pi32(INT32_C( 1211264864), INT32_C( -549692031)), simde_mm_cvtsi64_m64(13), simde_mm_set_pi32(INT32_C( 147859), INT32_C( 457186)) }, { simde_mm_set_pi32(INT32_C( 526771625), INT32_C( -1372326605)), simde_mm_cvtsi64_m64(6), simde_mm_set_pi32(INT32_C( 8230806), INT32_C( 45666260)) }, { simde_mm_set_pi32(INT32_C( 257774375), INT32_C( 1425803958)), simde_mm_cvtsi64_m64(0), simde_mm_set_pi32(INT32_C( 257774375), INT32_C( 1425803958)) }, { simde_mm_set_pi32(INT32_C( 751075720), INT32_C( -1937798467)), simde_mm_cvtsi64_m64(6), simde_mm_set_pi32(INT32_C( 11735558), INT32_C( 36830762)) }, { simde_mm_set_pi32(INT32_C( -703624712), INT32_C( 1484883517)), simde_mm_cvtsi64_m64(14), simde_mm_set_pi32(INT32_C( 219198), INT32_C( 90630)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_srl_pi32(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_psrld(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 count; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( 1162874425), INT32_C( 701403552)), simde_mm_cvtsi64_m64(12), simde_mm_set_pi32(INT32_C( 283904), INT32_C( 171241)) }, { simde_mm_set_pi32(INT32_C( -1730008971), INT32_C( 1480718473)), simde_mm_cvtsi64_m64(7), simde_mm_set_pi32(INT32_C( 20038736), INT32_C( 11568113)) }, { simde_mm_set_pi32(INT32_C( -2020652937), INT32_C( -14094139)), simde_mm_cvtsi64_m64(9), simde_mm_set_pi32(INT32_C( 4442020), INT32_C( 8361080)) }, { simde_mm_set_pi32(INT32_C( 1211264864), INT32_C( -549692031)), simde_mm_cvtsi64_m64(13), simde_mm_set_pi32(INT32_C( 147859), INT32_C( 457186)) }, { simde_mm_set_pi32(INT32_C( 526771625), INT32_C( -1372326605)), simde_mm_cvtsi64_m64(6), simde_mm_set_pi32(INT32_C( 8230806), INT32_C( 45666260)) }, { simde_mm_set_pi32(INT32_C( 257774375), INT32_C( 1425803958)), simde_mm_cvtsi64_m64(0), simde_mm_set_pi32(INT32_C( 257774375), INT32_C( 1425803958)) }, { simde_mm_set_pi32(INT32_C( 751075720), INT32_C( -1937798467)), simde_mm_cvtsi64_m64(6), simde_mm_set_pi32(INT32_C( 11735558), INT32_C( 36830762)) }, { simde_mm_set_pi32(INT32_C( -703624712), INT32_C( 1484883517)), simde_mm_cvtsi64_m64(14), simde_mm_set_pi32(INT32_C( 219198), INT32_C( 90630)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_psrld(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_srl_si64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 count; simde__m64 r; } test_vec[8] = { { simde_mm_cvtsi64_m64(INT64_C( -1550745422537000797)), simde_mm_cvtsi64_m64(27), simde_mm_cvtsi64_m64(INT64_C( 125884999716)) }, { simde_mm_cvtsi64_m64(INT64_C( -4905487896917789484)), simde_mm_cvtsi64_m64(51), simde_mm_cvtsi64_m64(INT64_C( 6013)) }, { simde_mm_cvtsi64_m64(INT64_C( 784798283774789910)), simde_mm_cvtsi64_m64(61), simde_mm_cvtsi64_m64(INT64_C( 0)) }, { simde_mm_cvtsi64_m64(INT64_C( -7160969444731528566)), simde_mm_cvtsi64_m64(36), simde_mm_cvtsi64_m64(INT64_C( 164229635)) }, { simde_mm_cvtsi64_m64(INT64_C( -123534753035910002)), simde_mm_cvtsi64_m64(20), simde_mm_cvtsi64_m64(INT64_C( 17474374123262)) }, { simde_mm_cvtsi64_m64(INT64_C( 5720385725637272506)), simde_mm_cvtsi64_m64(33), simde_mm_cvtsi64_m64(INT64_C( 665940545)) }, { simde_mm_cvtsi64_m64(INT64_C( -3398235017645277558)), simde_mm_cvtsi64_m64(63), simde_mm_cvtsi64_m64(INT64_C( 1)) }, { simde_mm_cvtsi64_m64(INT64_C( -5355948413550293775)), simde_mm_cvtsi64_m64(7), simde_mm_cvtsi64_m64(INT64_C( 102271841094994201)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_srl_si64(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i64x1(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_psrlq(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 count; simde__m64 r; } test_vec[8] = { { simde_mm_cvtsi64_m64(INT64_C( -1550745422537000797)), simde_mm_cvtsi64_m64(27), simde_mm_cvtsi64_m64(INT64_C( 125884999716)) }, { simde_mm_cvtsi64_m64(INT64_C( -4905487896917789484)), simde_mm_cvtsi64_m64(51), simde_mm_cvtsi64_m64(INT64_C( 6013)) }, { simde_mm_cvtsi64_m64(INT64_C( 784798283774789910)), simde_mm_cvtsi64_m64(61), simde_mm_cvtsi64_m64(INT64_C( 0)) }, { simde_mm_cvtsi64_m64(INT64_C( -7160969444731528566)), simde_mm_cvtsi64_m64(36), simde_mm_cvtsi64_m64(INT64_C( 164229635)) }, { simde_mm_cvtsi64_m64(INT64_C( -123534753035910002)), simde_mm_cvtsi64_m64(20), simde_mm_cvtsi64_m64(INT64_C( 17474374123262)) }, { simde_mm_cvtsi64_m64(INT64_C( 5720385725637272506)), simde_mm_cvtsi64_m64(33), simde_mm_cvtsi64_m64(INT64_C( 665940545)) }, { simde_mm_cvtsi64_m64(INT64_C( -3398235017645277558)), simde_mm_cvtsi64_m64(63), simde_mm_cvtsi64_m64(INT64_C( 1)) }, { simde_mm_cvtsi64_m64(INT64_C( -5355948413550293775)), simde_mm_cvtsi64_m64(7), simde_mm_cvtsi64_m64(INT64_C( 102271841094994201)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_psrlq(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i64x1(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_srli_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; int count; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -15698), INT16_C( -32310), INT16_C( 339), INT16_C( 3496)), 10, simde_mm_set_pi16(INT16_C( 48), INT16_C( 32), INT16_C( 0), INT16_C( 3)) }, { simde_mm_set_pi16(INT16_C( -27263), INT16_C( -18160), INT16_C( -20487), INT16_C( -21173)), 6, simde_mm_set_pi16(INT16_C( 598), INT16_C( 740), INT16_C( 703), INT16_C( 693)) }, { simde_mm_set_pi16(INT16_C( 23805), INT16_C( -14941), INT16_C( 6558), INT16_C( -23896)), 6, simde_mm_set_pi16(INT16_C( 371), INT16_C( 790), INT16_C( 102), INT16_C( 650)) }, { simde_mm_set_pi16(INT16_C( 22534), INT16_C( -27358), INT16_C( -9489), INT16_C( -15972)), 7, simde_mm_set_pi16(INT16_C( 176), INT16_C( 298), INT16_C( 437), INT16_C( 387)) }, { simde_mm_set_pi16(INT16_C( 2212), INT16_C( -29223), INT16_C( -19783), INT16_C( -4105)), 0, simde_mm_set_pi16(INT16_C( 2212), INT16_C( -29223), INT16_C( -19783), INT16_C( -4105)) }, { simde_mm_set_pi16(INT16_C( 24559), INT16_C( -21850), INT16_C( -30646), INT16_C( 21423)), 14, simde_mm_set_pi16(INT16_C( 1), INT16_C( 2), INT16_C( 2), INT16_C( 1)) }, { simde_mm_set_pi16(INT16_C( -3241), INT16_C( -31506), INT16_C( 3662), INT16_C( 16805)), 5, simde_mm_set_pi16(INT16_C( 1946), INT16_C( 1063), INT16_C( 114), INT16_C( 525)) }, { simde_mm_set_pi16(INT16_C( -13677), INT16_C( 7117), INT16_C( -15559), INT16_C( -8368)), 14, simde_mm_set_pi16(INT16_C( 3), INT16_C( 0), INT16_C( 3), INT16_C( 3)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_srli_pi16(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_psrlwi(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; int count; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -15698), INT16_C( -32310), INT16_C( 339), INT16_C( 3496)), 10, simde_mm_set_pi16(INT16_C( 48), INT16_C( 32), INT16_C( 0), INT16_C( 3)) }, { simde_mm_set_pi16(INT16_C( -27263), INT16_C( -18160), INT16_C( -20487), INT16_C( -21173)), 6, simde_mm_set_pi16(INT16_C( 598), INT16_C( 740), INT16_C( 703), INT16_C( 693)) }, { simde_mm_set_pi16(INT16_C( 23805), INT16_C( -14941), INT16_C( 6558), INT16_C( -23896)), 6, simde_mm_set_pi16(INT16_C( 371), INT16_C( 790), INT16_C( 102), INT16_C( 650)) }, { simde_mm_set_pi16(INT16_C( 22534), INT16_C( -27358), INT16_C( -9489), INT16_C( -15972)), 7, simde_mm_set_pi16(INT16_C( 176), INT16_C( 298), INT16_C( 437), INT16_C( 387)) }, { simde_mm_set_pi16(INT16_C( 2212), INT16_C( -29223), INT16_C( -19783), INT16_C( -4105)), 0, simde_mm_set_pi16(INT16_C( 2212), INT16_C( -29223), INT16_C( -19783), INT16_C( -4105)) }, { simde_mm_set_pi16(INT16_C( 24559), INT16_C( -21850), INT16_C( -30646), INT16_C( 21423)), 14, simde_mm_set_pi16(INT16_C( 1), INT16_C( 2), INT16_C( 2), INT16_C( 1)) }, { simde_mm_set_pi16(INT16_C( -3241), INT16_C( -31506), INT16_C( 3662), INT16_C( 16805)), 5, simde_mm_set_pi16(INT16_C( 1946), INT16_C( 1063), INT16_C( 114), INT16_C( 525)) }, { simde_mm_set_pi16(INT16_C( -13677), INT16_C( 7117), INT16_C( -15559), INT16_C( -8368)), 14, simde_mm_set_pi16(INT16_C( 3), INT16_C( 0), INT16_C( 3), INT16_C( 3)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_psrlwi(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_srli_pi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; int count; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( 116105102), INT32_C( -612588364)), 14, simde_mm_set_pi32(INT32_C( 7086), INT32_C( 224754)) }, { simde_mm_set_pi32(INT32_C( -569249998), INT32_C( 1055993616)), 8, simde_mm_set_pi32(INT32_C( 14553583), INT32_C( 4124975)) }, { simde_mm_set_pi32(INT32_C( 851549428), INT32_C( -1334511981)), 15, simde_mm_set_pi32(INT32_C( 25987), INT32_C( 90345)) }, { simde_mm_set_pi32(INT32_C( -1526427094), INT32_C( 130645372)), 14, simde_mm_set_pi32(INT32_C( 168978), INT32_C( 7973)) }, { simde_mm_set_pi32(INT32_C( -1832776933), INT32_C( -28796512)), 0, simde_mm_set_pi32(INT32_C( -1832776933), INT32_C( -28796512)) }, { simde_mm_set_pi32(INT32_C( -1521422315), INT32_C( 230241179)), 4, simde_mm_set_pi32(INT32_C( 173346561), INT32_C( 14390073)) }, { simde_mm_set_pi32(INT32_C( 981909051), INT32_C( -764766890)), 15, simde_mm_set_pi32(INT32_C( 29965), INT32_C( 107733)) }, { simde_mm_set_pi32(INT32_C( -1889202569), INT32_C( 1472716773)), 10, simde_mm_set_pi32(INT32_C( 2349379), INT32_C( 1438199)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_srli_pi32(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_psrldi(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; int count; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( 116105102), INT32_C( -612588364)), 14, simde_mm_set_pi32(INT32_C( 7086), INT32_C( 224754)) }, { simde_mm_set_pi32(INT32_C( -569249998), INT32_C( 1055993616)), 8, simde_mm_set_pi32(INT32_C( 14553583), INT32_C( 4124975)) }, { simde_mm_set_pi32(INT32_C( 851549428), INT32_C( -1334511981)), 15, simde_mm_set_pi32(INT32_C( 25987), INT32_C( 90345)) }, { simde_mm_set_pi32(INT32_C( -1526427094), INT32_C( 130645372)), 14, simde_mm_set_pi32(INT32_C( 168978), INT32_C( 7973)) }, { simde_mm_set_pi32(INT32_C( -1832776933), INT32_C( -28796512)), 0, simde_mm_set_pi32(INT32_C( -1832776933), INT32_C( -28796512)) }, { simde_mm_set_pi32(INT32_C( -1521422315), INT32_C( 230241179)), 4, simde_mm_set_pi32(INT32_C( 173346561), INT32_C( 14390073)) }, { simde_mm_set_pi32(INT32_C( 981909051), INT32_C( -764766890)), 15, simde_mm_set_pi32(INT32_C( 29965), INT32_C( 107733)) }, { simde_mm_set_pi32(INT32_C( -1889202569), INT32_C( 1472716773)), 10, simde_mm_set_pi32(INT32_C( 2349379), INT32_C( 1438199)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_psrldi(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_srli_si64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; int count; simde__m64 r; } test_vec[8] = { { simde_mm_cvtsi64_m64(INT64_C( -8294501885901195762)), 62, simde_mm_cvtsi64_m64(INT64_C( 2)) }, { simde_mm_cvtsi64_m64(INT64_C( 7027314223871146181)), 7, simde_mm_cvtsi64_m64(INT64_C( 54900892373993329)) }, { simde_mm_cvtsi64_m64(INT64_C( 2649805052949317833)), 19, simde_mm_cvtsi64_m64(INT64_C( 5054102044962)) }, { simde_mm_cvtsi64_m64(INT64_C( 778555941675423413)), 12, simde_mm_cvtsi64_m64(INT64_C( 190077134198101)) }, { simde_mm_cvtsi64_m64(INT64_C( 1453695186595163432)), 17, simde_mm_cvtsi64_m64(INT64_C( 11090814106713)) }, { simde_mm_cvtsi64_m64(INT64_C( 834539484136231083)), 22, simde_mm_cvtsi64_m64(INT64_C( 198969718011)) }, { simde_mm_cvtsi64_m64(INT64_C( 1883775849744838333)), 12, simde_mm_cvtsi64_m64(INT64_C( 459906213316610)) }, { simde_mm_cvtsi64_m64(INT64_C( 7946503469684399228)), 61, simde_mm_cvtsi64_m64(INT64_C( 3)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_srli_si64(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i64x1(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_psrlqi(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; int count; simde__m64 r; } test_vec[8] = { { simde_mm_cvtsi64_m64(INT64_C( -8294501885901195762)), 62, simde_mm_cvtsi64_m64(INT64_C( 2)) }, { simde_mm_cvtsi64_m64(INT64_C( 7027314223871146181)), 7, simde_mm_cvtsi64_m64(INT64_C( 54900892373993329)) }, { simde_mm_cvtsi64_m64(INT64_C( 2649805052949317833)), 19, simde_mm_cvtsi64_m64(INT64_C( 5054102044962)) }, { simde_mm_cvtsi64_m64(INT64_C( 778555941675423413)), 12, simde_mm_cvtsi64_m64(INT64_C( 190077134198101)) }, { simde_mm_cvtsi64_m64(INT64_C( 1453695186595163432)), 17, simde_mm_cvtsi64_m64(INT64_C( 11090814106713)) }, { simde_mm_cvtsi64_m64(INT64_C( 834539484136231083)), 22, simde_mm_cvtsi64_m64(INT64_C( 198969718011)) }, { simde_mm_cvtsi64_m64(INT64_C( 1883775849744838333)), 12, simde_mm_cvtsi64_m64(INT64_C( 459906213316610)) }, { simde_mm_cvtsi64_m64(INT64_C( 7946503469684399228)), 61, simde_mm_cvtsi64_m64(INT64_C( 3)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_psrlqi(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i64x1(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_srai_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; int count; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -32259), INT16_C( -10390), INT16_C( 28627), INT16_C( 18747)), 6, simde_mm_set_pi16(INT16_C( -505), INT16_C( -163), INT16_C( 447), INT16_C( 292)) }, { simde_mm_set_pi16(INT16_C( -300), INT16_C( -3262), INT16_C( -2861), INT16_C( -11389)), 15, simde_mm_set_pi16(INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1)) }, { simde_mm_set_pi16(INT16_C( 6480), INT16_C( -15684), INT16_C( 3587), INT16_C( 7844)), 2, simde_mm_set_pi16(INT16_C( 1620), INT16_C( -3921), INT16_C( 896), INT16_C( 1961)) }, { simde_mm_set_pi16(INT16_C( -6044), INT16_C( -15946), INT16_C( 1721), INT16_C( -30273)), 5, simde_mm_set_pi16(INT16_C( -189), INT16_C( -499), INT16_C( 53), INT16_C( -947)) }, { simde_mm_set_pi16(INT16_C( 24609), INT16_C( 14431), INT16_C( 1917), INT16_C( -13176)), 13, simde_mm_set_pi16(INT16_C( 3), INT16_C( 1), INT16_C( 0), INT16_C( -2)) }, { simde_mm_set_pi16(INT16_C( 13575), INT16_C( 32610), INT16_C( -4763), INT16_C( 10748)), 12, simde_mm_set_pi16(INT16_C( 3), INT16_C( 7), INT16_C( -2), INT16_C( 2)) }, { simde_mm_set_pi16(INT16_C( -2824), INT16_C( 28483), INT16_C( -23495), INT16_C( -17241)), 11, simde_mm_set_pi16(INT16_C( -2), INT16_C( 13), INT16_C( -12), INT16_C( -9)) }, { simde_mm_set_pi16(INT16_C( -5294), INT16_C( 29284), INT16_C( -3542), INT16_C( 21806)), 10, simde_mm_set_pi16(INT16_C( -6), INT16_C( 28), INT16_C( -4), INT16_C( 21)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_srai_pi16(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_psrawi(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; int count; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -32259), INT16_C( -10390), INT16_C( 28627), INT16_C( 18747)), 6, simde_mm_set_pi16(INT16_C( -505), INT16_C( -163), INT16_C( 447), INT16_C( 292)) }, { simde_mm_set_pi16(INT16_C( -300), INT16_C( -3262), INT16_C( -2861), INT16_C( -11389)), 15, simde_mm_set_pi16(INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1)) }, { simde_mm_set_pi16(INT16_C( 6480), INT16_C( -15684), INT16_C( 3587), INT16_C( 7844)), 2, simde_mm_set_pi16(INT16_C( 1620), INT16_C( -3921), INT16_C( 896), INT16_C( 1961)) }, { simde_mm_set_pi16(INT16_C( -6044), INT16_C( -15946), INT16_C( 1721), INT16_C( -30273)), 5, simde_mm_set_pi16(INT16_C( -189), INT16_C( -499), INT16_C( 53), INT16_C( -947)) }, { simde_mm_set_pi16(INT16_C( 24609), INT16_C( 14431), INT16_C( 1917), INT16_C( -13176)), 13, simde_mm_set_pi16(INT16_C( 3), INT16_C( 1), INT16_C( 0), INT16_C( -2)) }, { simde_mm_set_pi16(INT16_C( 13575), INT16_C( 32610), INT16_C( -4763), INT16_C( 10748)), 12, simde_mm_set_pi16(INT16_C( 3), INT16_C( 7), INT16_C( -2), INT16_C( 2)) }, { simde_mm_set_pi16(INT16_C( -2824), INT16_C( 28483), INT16_C( -23495), INT16_C( -17241)), 11, simde_mm_set_pi16(INT16_C( -2), INT16_C( 13), INT16_C( -12), INT16_C( -9)) }, { simde_mm_set_pi16(INT16_C( -5294), INT16_C( 29284), INT16_C( -3542), INT16_C( 21806)), 10, simde_mm_set_pi16(INT16_C( -6), INT16_C( 28), INT16_C( -4), INT16_C( 21)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_psrawi(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_srai_pi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; int count; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( -2114070678), INT32_C( 1876117819)), 6, simde_mm_set_pi32(INT32_C( -33032355), INT32_C( 29314340)) }, { simde_mm_set_pi32(INT32_C( -19598526), INT32_C( -187444349)), 15, simde_mm_set_pi32(INT32_C( -599), INT32_C( -5721)) }, { simde_mm_set_pi32(INT32_C( 424723132), INT32_C( 235085476)), 2, simde_mm_set_pi32(INT32_C( 106180783), INT32_C( 58771369)) }, { simde_mm_set_pi32(INT32_C( -396049994), INT32_C( 112822719)), 5, simde_mm_set_pi32(INT32_C( -12376563), INT32_C( 3525709)) }, { simde_mm_set_pi32(INT32_C( 1612789855), INT32_C( 125684872)), 13, simde_mm_set_pi32(INT32_C( 196873), INT32_C( 15342)) }, { simde_mm_set_pi32(INT32_C( 889683810), INT32_C( -312137220)), 12, simde_mm_set_pi32(INT32_C( 217207), INT32_C( -76206)) }, { simde_mm_set_pi32(INT32_C( -185045181), INT32_C( -1539720025)), 11, simde_mm_set_pi32(INT32_C( -90355), INT32_C( -751817)) }, { simde_mm_set_pi32(INT32_C( -346918300), INT32_C( -232106706)), 10, simde_mm_set_pi32(INT32_C( -338788), INT32_C( -226667)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_srai_pi32(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_psradi(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; int count; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( -2114070678), INT32_C( 1876117819)), 6, simde_mm_set_pi32(INT32_C( -33032355), INT32_C( 29314340)) }, { simde_mm_set_pi32(INT32_C( -19598526), INT32_C( -187444349)), 15, simde_mm_set_pi32(INT32_C( -599), INT32_C( -5721)) }, { simde_mm_set_pi32(INT32_C( 424723132), INT32_C( 235085476)), 2, simde_mm_set_pi32(INT32_C( 106180783), INT32_C( 58771369)) }, { simde_mm_set_pi32(INT32_C( -396049994), INT32_C( 112822719)), 5, simde_mm_set_pi32(INT32_C( -12376563), INT32_C( 3525709)) }, { simde_mm_set_pi32(INT32_C( 1612789855), INT32_C( 125684872)), 13, simde_mm_set_pi32(INT32_C( 196873), INT32_C( 15342)) }, { simde_mm_set_pi32(INT32_C( 889683810), INT32_C( -312137220)), 12, simde_mm_set_pi32(INT32_C( 217207), INT32_C( -76206)) }, { simde_mm_set_pi32(INT32_C( -185045181), INT32_C( -1539720025)), 11, simde_mm_set_pi32(INT32_C( -90355), INT32_C( -751817)) }, { simde_mm_set_pi32(INT32_C( -346918300), INT32_C( -232106706)), 10, simde_mm_set_pi32(INT32_C( -338788), INT32_C( -226667)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_psradi(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_sra_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 count; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( 17561), INT16_C( 10489), INT16_C( -28823), INT16_C( -32541)), simde_mm_cvtsi64_m64(11), simde_mm_set_pi16(INT16_C( 8), INT16_C( 5), INT16_C( -15), INT16_C( -16)) }, { simde_mm_set_pi16(INT16_C( -23916), INT16_C( 22319), INT16_C( -24731), INT16_C( -24948)), simde_mm_cvtsi64_m64(6), simde_mm_set_pi16(INT16_C( -374), INT16_C( 348), INT16_C( -387), INT16_C( -390)) }, { simde_mm_set_pi16(INT16_C( 10305), INT16_C( -29863), INT16_C( -25929), INT16_C( 26582)), simde_mm_cvtsi64_m64(4), simde_mm_set_pi16(INT16_C( 644), INT16_C( -1867), INT16_C( -1621), INT16_C( 1661)) }, { simde_mm_set_pi16(INT16_C( -11917), INT16_C( 7165), INT16_C( 860), INT16_C( -7108)), simde_mm_cvtsi64_m64(3), simde_mm_set_pi16(INT16_C( -1490), INT16_C( 895), INT16_C( 107), INT16_C( -889)) }, { simde_mm_set_pi16(INT16_C( 30600), INT16_C( 3146), INT16_C( -22841), INT16_C( -27601)), simde_mm_cvtsi64_m64(0), simde_mm_set_pi16(INT16_C( 30600), INT16_C( 3146), INT16_C( -22841), INT16_C( -27601)) }, { simde_mm_set_pi16(INT16_C( 7952), INT16_C( 8542), INT16_C( -27736), INT16_C( 20289)), simde_mm_cvtsi64_m64(1), simde_mm_set_pi16(INT16_C( 3976), INT16_C( 4271), INT16_C( -13868), INT16_C( 10144)) }, { simde_mm_set_pi16(INT16_C( -24594), INT16_C( -8796), INT16_C( -25195), INT16_C( 300)), simde_mm_cvtsi64_m64(10), simde_mm_set_pi16(INT16_C( -25), INT16_C( -9), INT16_C( -25), INT16_C( 0)) }, { simde_mm_set_pi16(INT16_C( 9552), INT16_C( 20569), INT16_C( 1838), INT16_C( 26385)), simde_mm_cvtsi64_m64(9), simde_mm_set_pi16(INT16_C( 18), INT16_C( 40), INT16_C( 3), INT16_C( 51)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_sra_pi16(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_psraw(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 count; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( 17561), INT16_C( 10489), INT16_C( -28823), INT16_C( -32541)), simde_mm_cvtsi64_m64(11), simde_mm_set_pi16(INT16_C( 8), INT16_C( 5), INT16_C( -15), INT16_C( -16)) }, { simde_mm_set_pi16(INT16_C( -23916), INT16_C( 22319), INT16_C( -24731), INT16_C( -24948)), simde_mm_cvtsi64_m64(6), simde_mm_set_pi16(INT16_C( -374), INT16_C( 348), INT16_C( -387), INT16_C( -390)) }, { simde_mm_set_pi16(INT16_C( 10305), INT16_C( -29863), INT16_C( -25929), INT16_C( 26582)), simde_mm_cvtsi64_m64(4), simde_mm_set_pi16(INT16_C( 644), INT16_C( -1867), INT16_C( -1621), INT16_C( 1661)) }, { simde_mm_set_pi16(INT16_C( -11917), INT16_C( 7165), INT16_C( 860), INT16_C( -7108)), simde_mm_cvtsi64_m64(3), simde_mm_set_pi16(INT16_C( -1490), INT16_C( 895), INT16_C( 107), INT16_C( -889)) }, { simde_mm_set_pi16(INT16_C( 30600), INT16_C( 3146), INT16_C( -22841), INT16_C( -27601)), simde_mm_cvtsi64_m64(0), simde_mm_set_pi16(INT16_C( 30600), INT16_C( 3146), INT16_C( -22841), INT16_C( -27601)) }, { simde_mm_set_pi16(INT16_C( 7952), INT16_C( 8542), INT16_C( -27736), INT16_C( 20289)), simde_mm_cvtsi64_m64(1), simde_mm_set_pi16(INT16_C( 3976), INT16_C( 4271), INT16_C( -13868), INT16_C( 10144)) }, { simde_mm_set_pi16(INT16_C( -24594), INT16_C( -8796), INT16_C( -25195), INT16_C( 300)), simde_mm_cvtsi64_m64(10), simde_mm_set_pi16(INT16_C( -25), INT16_C( -9), INT16_C( -25), INT16_C( 0)) }, { simde_mm_set_pi16(INT16_C( 9552), INT16_C( 20569), INT16_C( 1838), INT16_C( 26385)), simde_mm_cvtsi64_m64(9), simde_mm_set_pi16(INT16_C( 18), INT16_C( 40), INT16_C( 3), INT16_C( 51)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_psraw(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_sra_pi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 count; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( -1917317013), INT32_C( -1383526879)), simde_mm_cvtsi64_m64(15), simde_mm_set_pi32(INT32_C( -58512), INT32_C( -42222)) }, { simde_mm_set_pi32(INT32_C( -736945287), INT32_C( 858975517)), simde_mm_cvtsi64_m64(18), simde_mm_set_pi32(INT32_C( -2812), INT32_C( 3276)) }, { simde_mm_set_pi32(INT32_C( 1016725733), INT32_C( -1716419270)), simde_mm_cvtsi64_m64(20), simde_mm_set_pi32(INT32_C( 969), INT32_C( -1637)) }, { simde_mm_set_pi32(INT32_C( 884929023), INT32_C( -2109726169)), simde_mm_cvtsi64_m64(17), simde_mm_set_pi32(INT32_C( 6751), INT32_C( -16096)) }, { simde_mm_set_pi32(INT32_C( 1766981669), INT32_C( 1505895116)), simde_mm_cvtsi64_m64(8), simde_mm_set_pi32(INT32_C( 6902272), INT32_C( 5882402)) }, { simde_mm_set_pi32(INT32_C( 1732469741), INT32_C( -2109399559)), simde_mm_cvtsi64_m64(22), simde_mm_set_pi32(INT32_C( 413), INT32_C( -503)) }, { simde_mm_set_pi32(INT32_C( -1207208411), INT32_C( 962459192)), simde_mm_cvtsi64_m64(24), simde_mm_set_pi32(INT32_C( -72), INT32_C( 57)) }, { simde_mm_set_pi32(INT32_C( 519578965), INT32_C( 1181576220)), simde_mm_cvtsi64_m64(2), simde_mm_set_pi32(INT32_C( 129894741), INT32_C( 295394055)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_sra_pi32(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_psrad(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 count; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( -1917317013), INT32_C( -1383526879)), simde_mm_cvtsi64_m64(15), simde_mm_set_pi32(INT32_C( -58512), INT32_C( -42222)) }, { simde_mm_set_pi32(INT32_C( -736945287), INT32_C( 858975517)), simde_mm_cvtsi64_m64(18), simde_mm_set_pi32(INT32_C( -2812), INT32_C( 3276)) }, { simde_mm_set_pi32(INT32_C( 1016725733), INT32_C( -1716419270)), simde_mm_cvtsi64_m64(20), simde_mm_set_pi32(INT32_C( 969), INT32_C( -1637)) }, { simde_mm_set_pi32(INT32_C( 884929023), INT32_C( -2109726169)), simde_mm_cvtsi64_m64(17), simde_mm_set_pi32(INT32_C( 6751), INT32_C( -16096)) }, { simde_mm_set_pi32(INT32_C( 1766981669), INT32_C( 1505895116)), simde_mm_cvtsi64_m64(8), simde_mm_set_pi32(INT32_C( 6902272), INT32_C( 5882402)) }, { simde_mm_set_pi32(INT32_C( 1732469741), INT32_C( -2109399559)), simde_mm_cvtsi64_m64(22), simde_mm_set_pi32(INT32_C( 413), INT32_C( -503)) }, { simde_mm_set_pi32(INT32_C( -1207208411), INT32_C( 962459192)), simde_mm_cvtsi64_m64(24), simde_mm_set_pi32(INT32_C( -72), INT32_C( 57)) }, { simde_mm_set_pi32(INT32_C( 519578965), INT32_C( 1181576220)), simde_mm_cvtsi64_m64(2), simde_mm_set_pi32(INT32_C( 129894741), INT32_C( 295394055)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_psrad(test_vec[i].a, test_vec[i].count); simde_mm_empty(); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_sub_pi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi8(INT8_C( -68), INT8_C(-128), INT8_C(-110), INT8_C( -46), INT8_C( 64), INT8_C( -87), INT8_C( 123), INT8_C( 83)), simde_mm_set_pi8(INT8_C( -44), INT8_C( -27), INT8_C(-126), INT8_C( 47), INT8_C( -5), INT8_C( 124), INT8_C(-111), INT8_C( 88)), simde_mm_set_pi8(INT8_C( -24), INT8_C(-101), INT8_C( 16), INT8_C( -93), INT8_C( 69), INT8_C( 45), INT8_C( -22), INT8_C( -5)) }, { simde_mm_set_pi8(INT8_C( 7), INT8_C( 68), INT8_C( -53), INT8_C( -82), INT8_C( 50), INT8_C(-107), INT8_C( 109), INT8_C( 33)), simde_mm_set_pi8(INT8_C( 121), INT8_C( 50), INT8_C( 31), INT8_C( 80), INT8_C( 118), INT8_C( 8), INT8_C( -73), INT8_C( 38)), simde_mm_set_pi8(INT8_C(-114), INT8_C( 18), INT8_C( -84), INT8_C( 94), INT8_C( -68), INT8_C(-115), INT8_C( -74), INT8_C( -5)) }, { simde_mm_set_pi8(INT8_C( 60), INT8_C( 5), INT8_C( 4), INT8_C( -85), INT8_C( -61), INT8_C( 71), INT8_C( -19), INT8_C( -92)), simde_mm_set_pi8(INT8_C( -51), INT8_C( 118), INT8_C( 99), INT8_C( 14), INT8_C( 124), INT8_C(-115), INT8_C( 49), INT8_C( 19)), simde_mm_set_pi8(INT8_C( 111), INT8_C(-113), INT8_C( -95), INT8_C( -99), INT8_C( 71), INT8_C( -70), INT8_C( -68), INT8_C(-111)) }, { simde_mm_set_pi8(INT8_C( 80), INT8_C( 47), INT8_C( 46), INT8_C( -13), INT8_C( 94), INT8_C( -69), INT8_C( -72), INT8_C( -28)), simde_mm_set_pi8(INT8_C( 45), INT8_C( 99), INT8_C( 14), INT8_C( 4), INT8_C( 89), INT8_C( -77), INT8_C( -4), INT8_C( 109)), simde_mm_set_pi8(INT8_C( 35), INT8_C( -52), INT8_C( 32), INT8_C( -17), INT8_C( 5), INT8_C( 8), INT8_C( -68), INT8_C( 119)) }, { simde_mm_set_pi8(INT8_C( 117), INT8_C(-101), INT8_C( -54), INT8_C( -50), INT8_C( 55), INT8_C( -97), INT8_C( -74), INT8_C( 79)), simde_mm_set_pi8(INT8_C( 116), INT8_C( 19), INT8_C( 84), INT8_C( 90), INT8_C( -15), INT8_C( -49), INT8_C( 34), INT8_C(-124)), simde_mm_set_pi8(INT8_C( 1), INT8_C(-120), INT8_C( 118), INT8_C( 116), INT8_C( 70), INT8_C( -48), INT8_C(-108), INT8_C( -53)) }, { simde_mm_set_pi8(INT8_C( 43), INT8_C( -88), INT8_C( 7), INT8_C( -31), INT8_C( -45), INT8_C( -6), INT8_C( -61), INT8_C( -47)), simde_mm_set_pi8(INT8_C(-110), INT8_C( 87), INT8_C(-102), INT8_C( -63), INT8_C( -35), INT8_C( 78), INT8_C( 96), INT8_C( 51)), simde_mm_set_pi8(INT8_C(-103), INT8_C( 81), INT8_C( 109), INT8_C( 32), INT8_C( -10), INT8_C( -84), INT8_C( 99), INT8_C( -98)) }, { simde_mm_set_pi8(INT8_C(-113), INT8_C( -62), INT8_C(-117), INT8_C( 34), INT8_C( -40), INT8_C( 24), INT8_C( -20), INT8_C( 52)), simde_mm_set_pi8(INT8_C( 53), INT8_C( -16), INT8_C( 75), INT8_C( 38), INT8_C( 2), INT8_C( -75), INT8_C( -51), INT8_C( 92)), simde_mm_set_pi8(INT8_C( 90), INT8_C( -46), INT8_C( 64), INT8_C( -4), INT8_C( -42), INT8_C( 99), INT8_C( 31), INT8_C( -40)) }, { simde_mm_set_pi8(INT8_C( -94), INT8_C( -1), INT8_C( -70), INT8_C( 90), INT8_C(-105), INT8_C( -20), INT8_C( -71), INT8_C( -95)), simde_mm_set_pi8(INT8_C( -97), INT8_C( 49), INT8_C( 71), INT8_C( 69), INT8_C( -48), INT8_C( 31), INT8_C( -19), INT8_C( 28)), simde_mm_set_pi8(INT8_C( 3), INT8_C( -50), INT8_C( 115), INT8_C( 21), INT8_C( -57), INT8_C( -51), INT8_C( -52), INT8_C(-123)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_sub_pi8(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_psubb(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi8(INT8_C( -68), INT8_C(-128), INT8_C(-110), INT8_C( -46), INT8_C( 64), INT8_C( -87), INT8_C( 123), INT8_C( 83)), simde_mm_set_pi8(INT8_C( -44), INT8_C( -27), INT8_C(-126), INT8_C( 47), INT8_C( -5), INT8_C( 124), INT8_C(-111), INT8_C( 88)), simde_mm_set_pi8(INT8_C( -24), INT8_C(-101), INT8_C( 16), INT8_C( -93), INT8_C( 69), INT8_C( 45), INT8_C( -22), INT8_C( -5)) }, { simde_mm_set_pi8(INT8_C( 7), INT8_C( 68), INT8_C( -53), INT8_C( -82), INT8_C( 50), INT8_C(-107), INT8_C( 109), INT8_C( 33)), simde_mm_set_pi8(INT8_C( 121), INT8_C( 50), INT8_C( 31), INT8_C( 80), INT8_C( 118), INT8_C( 8), INT8_C( -73), INT8_C( 38)), simde_mm_set_pi8(INT8_C(-114), INT8_C( 18), INT8_C( -84), INT8_C( 94), INT8_C( -68), INT8_C(-115), INT8_C( -74), INT8_C( -5)) }, { simde_mm_set_pi8(INT8_C( 60), INT8_C( 5), INT8_C( 4), INT8_C( -85), INT8_C( -61), INT8_C( 71), INT8_C( -19), INT8_C( -92)), simde_mm_set_pi8(INT8_C( -51), INT8_C( 118), INT8_C( 99), INT8_C( 14), INT8_C( 124), INT8_C(-115), INT8_C( 49), INT8_C( 19)), simde_mm_set_pi8(INT8_C( 111), INT8_C(-113), INT8_C( -95), INT8_C( -99), INT8_C( 71), INT8_C( -70), INT8_C( -68), INT8_C(-111)) }, { simde_mm_set_pi8(INT8_C( 80), INT8_C( 47), INT8_C( 46), INT8_C( -13), INT8_C( 94), INT8_C( -69), INT8_C( -72), INT8_C( -28)), simde_mm_set_pi8(INT8_C( 45), INT8_C( 99), INT8_C( 14), INT8_C( 4), INT8_C( 89), INT8_C( -77), INT8_C( -4), INT8_C( 109)), simde_mm_set_pi8(INT8_C( 35), INT8_C( -52), INT8_C( 32), INT8_C( -17), INT8_C( 5), INT8_C( 8), INT8_C( -68), INT8_C( 119)) }, { simde_mm_set_pi8(INT8_C( 117), INT8_C(-101), INT8_C( -54), INT8_C( -50), INT8_C( 55), INT8_C( -97), INT8_C( -74), INT8_C( 79)), simde_mm_set_pi8(INT8_C( 116), INT8_C( 19), INT8_C( 84), INT8_C( 90), INT8_C( -15), INT8_C( -49), INT8_C( 34), INT8_C(-124)), simde_mm_set_pi8(INT8_C( 1), INT8_C(-120), INT8_C( 118), INT8_C( 116), INT8_C( 70), INT8_C( -48), INT8_C(-108), INT8_C( -53)) }, { simde_mm_set_pi8(INT8_C( 43), INT8_C( -88), INT8_C( 7), INT8_C( -31), INT8_C( -45), INT8_C( -6), INT8_C( -61), INT8_C( -47)), simde_mm_set_pi8(INT8_C(-110), INT8_C( 87), INT8_C(-102), INT8_C( -63), INT8_C( -35), INT8_C( 78), INT8_C( 96), INT8_C( 51)), simde_mm_set_pi8(INT8_C(-103), INT8_C( 81), INT8_C( 109), INT8_C( 32), INT8_C( -10), INT8_C( -84), INT8_C( 99), INT8_C( -98)) }, { simde_mm_set_pi8(INT8_C(-113), INT8_C( -62), INT8_C(-117), INT8_C( 34), INT8_C( -40), INT8_C( 24), INT8_C( -20), INT8_C( 52)), simde_mm_set_pi8(INT8_C( 53), INT8_C( -16), INT8_C( 75), INT8_C( 38), INT8_C( 2), INT8_C( -75), INT8_C( -51), INT8_C( 92)), simde_mm_set_pi8(INT8_C( 90), INT8_C( -46), INT8_C( 64), INT8_C( -4), INT8_C( -42), INT8_C( 99), INT8_C( 31), INT8_C( -40)) }, { simde_mm_set_pi8(INT8_C( -94), INT8_C( -1), INT8_C( -70), INT8_C( 90), INT8_C(-105), INT8_C( -20), INT8_C( -71), INT8_C( -95)), simde_mm_set_pi8(INT8_C( -97), INT8_C( 49), INT8_C( 71), INT8_C( 69), INT8_C( -48), INT8_C( 31), INT8_C( -19), INT8_C( 28)), simde_mm_set_pi8(INT8_C( 3), INT8_C( -50), INT8_C( 115), INT8_C( 21), INT8_C( -57), INT8_C( -51), INT8_C( -52), INT8_C(-123)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_psubb(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_sub_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -19579), INT16_C( 12561), INT16_C( 18345), INT16_C( 16319)), simde_mm_set_pi16(INT16_C( -28282), INT16_C( 12811), INT16_C( -17042), INT16_C( 32694)), simde_mm_set_pi16(INT16_C( 8703), INT16_C( -250), INT16_C( -30149), INT16_C( -16375)) }, { simde_mm_set_pi16(INT16_C( 26389), INT16_C( -16440), INT16_C( 31193), INT16_C( 17420)), simde_mm_set_pi16(INT16_C( -16772), INT16_C( -30407), INT16_C( 13204), INT16_C( -3950)), simde_mm_set_pi16(INT16_C( -22375), INT16_C( 13967), INT16_C( 17989), INT16_C( 21370)) }, { simde_mm_set_pi16(INT16_C( 27021), INT16_C( -21341), INT16_C( -29765), INT16_C( -27825)), simde_mm_set_pi16(INT16_C( 32255), INT16_C( -11881), INT16_C( -17239), INT16_C( 17727)), simde_mm_set_pi16(INT16_C( -5234), INT16_C( -9460), INT16_C( -12526), INT16_C( 19984)) }, { simde_mm_set_pi16(INT16_C( -1061), INT16_C( 10691), INT16_C( 5402), INT16_C( -29779)), simde_mm_set_pi16(INT16_C( -3105), INT16_C( 17443), INT16_C( 29683), INT16_C( -4669)), simde_mm_set_pi16(INT16_C( 2044), INT16_C( -6752), INT16_C( -24281), INT16_C( -25110)) }, { simde_mm_set_pi16(INT16_C( -27429), INT16_C( -24038), INT16_C( -27170), INT16_C( 23974)), simde_mm_set_pi16(INT16_C( 26527), INT16_C( -23757), INT16_C( 12822), INT16_C( 25106)), simde_mm_set_pi16(INT16_C( 11580), INT16_C( -281), INT16_C( 25544), INT16_C( -1132)) }, { simde_mm_set_pi16(INT16_C( -22000), INT16_C( 31301), INT16_C( 3019), INT16_C( 5319)), simde_mm_set_pi16(INT16_C( 17233), INT16_C( -4995), INT16_C( -32364), INT16_C( 13233)), simde_mm_set_pi16(INT16_C( 26303), INT16_C( -29240), INT16_C( -30153), INT16_C( -7914)) }, { simde_mm_set_pi16(INT16_C( -3486), INT16_C( -1801), INT16_C( 6573), INT16_C( -2443)), simde_mm_set_pi16(INT16_C( 12310), INT16_C( 34), INT16_C( -20082), INT16_C( -25128)), simde_mm_set_pi16(INT16_C( -15796), INT16_C( -1835), INT16_C( 26655), INT16_C( 22685)) }, { simde_mm_set_pi16(INT16_C( -4220), INT16_C( 17506), INT16_C( 6973), INT16_C( -8771)), simde_mm_set_pi16(INT16_C( -28953), INT16_C( 20334), INT16_C( 30681), INT16_C( -3329)), simde_mm_set_pi16(INT16_C( 24733), INT16_C( -2828), INT16_C( -23708), INT16_C( -5442)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_sub_pi16(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_psubw(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -19579), INT16_C( 12561), INT16_C( 18345), INT16_C( 16319)), simde_mm_set_pi16(INT16_C( -28282), INT16_C( 12811), INT16_C( -17042), INT16_C( 32694)), simde_mm_set_pi16(INT16_C( 8703), INT16_C( -250), INT16_C( -30149), INT16_C( -16375)) }, { simde_mm_set_pi16(INT16_C( 26389), INT16_C( -16440), INT16_C( 31193), INT16_C( 17420)), simde_mm_set_pi16(INT16_C( -16772), INT16_C( -30407), INT16_C( 13204), INT16_C( -3950)), simde_mm_set_pi16(INT16_C( -22375), INT16_C( 13967), INT16_C( 17989), INT16_C( 21370)) }, { simde_mm_set_pi16(INT16_C( 27021), INT16_C( -21341), INT16_C( -29765), INT16_C( -27825)), simde_mm_set_pi16(INT16_C( 32255), INT16_C( -11881), INT16_C( -17239), INT16_C( 17727)), simde_mm_set_pi16(INT16_C( -5234), INT16_C( -9460), INT16_C( -12526), INT16_C( 19984)) }, { simde_mm_set_pi16(INT16_C( -1061), INT16_C( 10691), INT16_C( 5402), INT16_C( -29779)), simde_mm_set_pi16(INT16_C( -3105), INT16_C( 17443), INT16_C( 29683), INT16_C( -4669)), simde_mm_set_pi16(INT16_C( 2044), INT16_C( -6752), INT16_C( -24281), INT16_C( -25110)) }, { simde_mm_set_pi16(INT16_C( -27429), INT16_C( -24038), INT16_C( -27170), INT16_C( 23974)), simde_mm_set_pi16(INT16_C( 26527), INT16_C( -23757), INT16_C( 12822), INT16_C( 25106)), simde_mm_set_pi16(INT16_C( 11580), INT16_C( -281), INT16_C( 25544), INT16_C( -1132)) }, { simde_mm_set_pi16(INT16_C( -22000), INT16_C( 31301), INT16_C( 3019), INT16_C( 5319)), simde_mm_set_pi16(INT16_C( 17233), INT16_C( -4995), INT16_C( -32364), INT16_C( 13233)), simde_mm_set_pi16(INT16_C( 26303), INT16_C( -29240), INT16_C( -30153), INT16_C( -7914)) }, { simde_mm_set_pi16(INT16_C( -3486), INT16_C( -1801), INT16_C( 6573), INT16_C( -2443)), simde_mm_set_pi16(INT16_C( 12310), INT16_C( 34), INT16_C( -20082), INT16_C( -25128)), simde_mm_set_pi16(INT16_C( -15796), INT16_C( -1835), INT16_C( 26655), INT16_C( 22685)) }, { simde_mm_set_pi16(INT16_C( -4220), INT16_C( 17506), INT16_C( 6973), INT16_C( -8771)), simde_mm_set_pi16(INT16_C( -28953), INT16_C( 20334), INT16_C( 30681), INT16_C( -3329)), simde_mm_set_pi16(INT16_C( 24733), INT16_C( -2828), INT16_C( -23708), INT16_C( -5442)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_psubw(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_sub_pi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( 1529386227), INT32_C( -668097316)), simde_mm_set_pi32(INT32_C( 473233841), INT32_C( -90529672)), simde_mm_set_pi32(INT32_C( 1056152386), INT32_C( -577567644)) }, { simde_mm_set_pi32(INT32_C( -1931729107), INT32_C( -722204778)), simde_mm_set_pi32(INT32_C( -1308867233), INT32_C( -379543807)), simde_mm_set_pi32(INT32_C( -622861874), INT32_C( -342660971)) }, { simde_mm_set_pi32(INT32_C( -291860960), INT32_C( -190367090)), simde_mm_set_pi32(INT32_C( 135041259), INT32_C( 1788100299)), simde_mm_set_pi32(INT32_C( -426902219), INT32_C( -1978467389)) }, { simde_mm_set_pi32(INT32_C( 1883589163), INT32_C( 323765200)), simde_mm_set_pi32(INT32_C( 645555820), INT32_C( 651498122)), simde_mm_set_pi32(INT32_C( 1238033343), INT32_C( -327732922)) }, { simde_mm_set_pi32(INT32_C( 1636190981), INT32_C( -1768384078)), simde_mm_set_pi32(INT32_C( 292739084), INT32_C( -81452554)), simde_mm_set_pi32(INT32_C( 1343451897), INT32_C( -1686931524)) }, { simde_mm_set_pi32(INT32_C( -1203362066), INT32_C( 1430164168)), simde_mm_set_pi32(INT32_C( 1181972217), INT32_C( -1859714213)), simde_mm_set_pi32(INT32_C( 1909633013), INT32_C( -1005088915)) }, { simde_mm_set_pi32(INT32_C( -81132926), INT32_C( 156813953)), simde_mm_set_pi32(INT32_C( 1408689560), INT32_C( -1315494890)), simde_mm_set_pi32(INT32_C( -1489822486), INT32_C( 1472308843)) }, { simde_mm_set_pi32(INT32_C( -99259746), INT32_C( -1543487401)), simde_mm_set_pi32(INT32_C( 1211860803), INT32_C( 322815885)), simde_mm_set_pi32(INT32_C( -1311120549), INT32_C( -1866303286)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_sub_pi32(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_psubd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( 1529386227), INT32_C( -668097316)), simde_mm_set_pi32(INT32_C( 473233841), INT32_C( -90529672)), simde_mm_set_pi32(INT32_C( 1056152386), INT32_C( -577567644)) }, { simde_mm_set_pi32(INT32_C( -1931729107), INT32_C( -722204778)), simde_mm_set_pi32(INT32_C( -1308867233), INT32_C( -379543807)), simde_mm_set_pi32(INT32_C( -622861874), INT32_C( -342660971)) }, { simde_mm_set_pi32(INT32_C( -291860960), INT32_C( -190367090)), simde_mm_set_pi32(INT32_C( 135041259), INT32_C( 1788100299)), simde_mm_set_pi32(INT32_C( -426902219), INT32_C( -1978467389)) }, { simde_mm_set_pi32(INT32_C( 1883589163), INT32_C( 323765200)), simde_mm_set_pi32(INT32_C( 645555820), INT32_C( 651498122)), simde_mm_set_pi32(INT32_C( 1238033343), INT32_C( -327732922)) }, { simde_mm_set_pi32(INT32_C( 1636190981), INT32_C( -1768384078)), simde_mm_set_pi32(INT32_C( 292739084), INT32_C( -81452554)), simde_mm_set_pi32(INT32_C( 1343451897), INT32_C( -1686931524)) }, { simde_mm_set_pi32(INT32_C( -1203362066), INT32_C( 1430164168)), simde_mm_set_pi32(INT32_C( 1181972217), INT32_C( -1859714213)), simde_mm_set_pi32(INT32_C( 1909633013), INT32_C( -1005088915)) }, { simde_mm_set_pi32(INT32_C( -81132926), INT32_C( 156813953)), simde_mm_set_pi32(INT32_C( 1408689560), INT32_C( -1315494890)), simde_mm_set_pi32(INT32_C( -1489822486), INT32_C( 1472308843)) }, { simde_mm_set_pi32(INT32_C( -99259746), INT32_C( -1543487401)), simde_mm_set_pi32(INT32_C( 1211860803), INT32_C( 322815885)), simde_mm_set_pi32(INT32_C( -1311120549), INT32_C( -1866303286)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_psubd(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_subs_pi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi8(INT8_C( 47), INT8_C( -51), INT8_C( -9), INT8_C( 46), INT8_C( 37), INT8_C( 125), INT8_C(-121), INT8_C( 88)), simde_mm_set_pi8(INT8_C( -37), INT8_C( -59), INT8_C( -18), INT8_C( -39), INT8_C( -68), INT8_C( 127), INT8_C( -66), INT8_C( -14)), simde_mm_set_pi8(INT8_C( 84), INT8_C( 8), INT8_C( 9), INT8_C( 85), INT8_C( 105), INT8_C( -2), INT8_C( -55), INT8_C( 102)) }, { simde_mm_set_pi8(INT8_C( -68), INT8_C( 111), INT8_C( 54), INT8_C( -2), INT8_C( -96), INT8_C( -30), INT8_C( 7), INT8_C( -1)), simde_mm_set_pi8(INT8_C( 71), INT8_C( 109), INT8_C( 43), INT8_C( -28), INT8_C(-128), INT8_C( -98), INT8_C( 65), INT8_C( -86)), simde_mm_set_pi8(INT8_C(-128), INT8_C( 2), INT8_C( 11), INT8_C( 26), INT8_C( 32), INT8_C( 68), INT8_C( -58), INT8_C( 85)) }, { simde_mm_set_pi8(INT8_C(-124), INT8_C(-105), INT8_C( 39), INT8_C( 68), INT8_C( -44), INT8_C( -60), INT8_C( -44), INT8_C( -99)), simde_mm_set_pi8(INT8_C( -9), INT8_C(-127), INT8_C( 77), INT8_C( -14), INT8_C( -70), INT8_C( -39), INT8_C( -18), INT8_C( -40)), simde_mm_set_pi8(INT8_C(-115), INT8_C( 22), INT8_C( -38), INT8_C( 82), INT8_C( 26), INT8_C( -21), INT8_C( -26), INT8_C( -59)) }, { simde_mm_set_pi8(INT8_C( 52), INT8_C( 33), INT8_C( 97), INT8_C( 39), INT8_C(-126), INT8_C( -11), INT8_C( 17), INT8_C( 108)), simde_mm_set_pi8(INT8_C( 65), INT8_C( 112), INT8_C( 108), INT8_C( 33), INT8_C( 68), INT8_C(-103), INT8_C( -45), INT8_C( 7)), simde_mm_set_pi8(INT8_C( -13), INT8_C( -79), INT8_C( -11), INT8_C( 6), INT8_C(-128), INT8_C( 92), INT8_C( 62), INT8_C( 101)) }, { simde_mm_set_pi8(INT8_C(-105), INT8_C( 75), INT8_C( 127), INT8_C( -57), INT8_C( 88), INT8_C( -25), INT8_C( -75), INT8_C( -74)), simde_mm_set_pi8(INT8_C(-125), INT8_C( -81), INT8_C( 60), INT8_C(-108), INT8_C( 78), INT8_C( -60), INT8_C( 88), INT8_C( 30)), simde_mm_set_pi8(INT8_C( 20), INT8_C( 127), INT8_C( 67), INT8_C( 51), INT8_C( 10), INT8_C( 35), INT8_C(-128), INT8_C(-104)) }, { simde_mm_set_pi8(INT8_C( -28), INT8_C( -97), INT8_C( 80), INT8_C( -43), INT8_C( -70), INT8_C( 45), INT8_C( 10), INT8_C( -67)), simde_mm_set_pi8(INT8_C(-109), INT8_C( 97), INT8_C( 25), INT8_C( 63), INT8_C( -65), INT8_C( -95), INT8_C(-111), INT8_C( -39)), simde_mm_set_pi8(INT8_C( 81), INT8_C(-128), INT8_C( 55), INT8_C(-106), INT8_C( -5), INT8_C( 127), INT8_C( 121), INT8_C( -28)) }, { simde_mm_set_pi8(INT8_C( 52), INT8_C( -18), INT8_C( -86), INT8_C( -29), INT8_C( 69), INT8_C( 92), INT8_C( 89), INT8_C( -66)), simde_mm_set_pi8(INT8_C( 16), INT8_C( 0), INT8_C( 95), INT8_C( 95), INT8_C( 115), INT8_C( -53), INT8_C( 55), INT8_C( 75)), simde_mm_set_pi8(INT8_C( 36), INT8_C( -18), INT8_C(-128), INT8_C(-124), INT8_C( -46), INT8_C( 127), INT8_C( 34), INT8_C(-128)) }, { simde_mm_set_pi8(INT8_C( 99), INT8_C( -48), INT8_C( 16), INT8_C( 126), INT8_C(-110), INT8_C(-111), INT8_C( -66), INT8_C( 83)), simde_mm_set_pi8(INT8_C(-118), INT8_C( 118), INT8_C( 100), INT8_C(-121), INT8_C( -17), INT8_C( 74), INT8_C( -47), INT8_C( -77)), simde_mm_set_pi8(INT8_C( 127), INT8_C(-128), INT8_C( -84), INT8_C( 127), INT8_C( -93), INT8_C(-128), INT8_C( -19), INT8_C( 127)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_subs_pi8(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_psubsb(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi8(INT8_C( 47), INT8_C( -51), INT8_C( -9), INT8_C( 46), INT8_C( 37), INT8_C( 125), INT8_C(-121), INT8_C( 88)), simde_mm_set_pi8(INT8_C( -37), INT8_C( -59), INT8_C( -18), INT8_C( -39), INT8_C( -68), INT8_C( 127), INT8_C( -66), INT8_C( -14)), simde_mm_set_pi8(INT8_C( 84), INT8_C( 8), INT8_C( 9), INT8_C( 85), INT8_C( 105), INT8_C( -2), INT8_C( -55), INT8_C( 102)) }, { simde_mm_set_pi8(INT8_C( -68), INT8_C( 111), INT8_C( 54), INT8_C( -2), INT8_C( -96), INT8_C( -30), INT8_C( 7), INT8_C( -1)), simde_mm_set_pi8(INT8_C( 71), INT8_C( 109), INT8_C( 43), INT8_C( -28), INT8_C(-128), INT8_C( -98), INT8_C( 65), INT8_C( -86)), simde_mm_set_pi8(INT8_C(-128), INT8_C( 2), INT8_C( 11), INT8_C( 26), INT8_C( 32), INT8_C( 68), INT8_C( -58), INT8_C( 85)) }, { simde_mm_set_pi8(INT8_C(-124), INT8_C(-105), INT8_C( 39), INT8_C( 68), INT8_C( -44), INT8_C( -60), INT8_C( -44), INT8_C( -99)), simde_mm_set_pi8(INT8_C( -9), INT8_C(-127), INT8_C( 77), INT8_C( -14), INT8_C( -70), INT8_C( -39), INT8_C( -18), INT8_C( -40)), simde_mm_set_pi8(INT8_C(-115), INT8_C( 22), INT8_C( -38), INT8_C( 82), INT8_C( 26), INT8_C( -21), INT8_C( -26), INT8_C( -59)) }, { simde_mm_set_pi8(INT8_C( 52), INT8_C( 33), INT8_C( 97), INT8_C( 39), INT8_C(-126), INT8_C( -11), INT8_C( 17), INT8_C( 108)), simde_mm_set_pi8(INT8_C( 65), INT8_C( 112), INT8_C( 108), INT8_C( 33), INT8_C( 68), INT8_C(-103), INT8_C( -45), INT8_C( 7)), simde_mm_set_pi8(INT8_C( -13), INT8_C( -79), INT8_C( -11), INT8_C( 6), INT8_C(-128), INT8_C( 92), INT8_C( 62), INT8_C( 101)) }, { simde_mm_set_pi8(INT8_C(-105), INT8_C( 75), INT8_C( 127), INT8_C( -57), INT8_C( 88), INT8_C( -25), INT8_C( -75), INT8_C( -74)), simde_mm_set_pi8(INT8_C(-125), INT8_C( -81), INT8_C( 60), INT8_C(-108), INT8_C( 78), INT8_C( -60), INT8_C( 88), INT8_C( 30)), simde_mm_set_pi8(INT8_C( 20), INT8_C( 127), INT8_C( 67), INT8_C( 51), INT8_C( 10), INT8_C( 35), INT8_C(-128), INT8_C(-104)) }, { simde_mm_set_pi8(INT8_C( -28), INT8_C( -97), INT8_C( 80), INT8_C( -43), INT8_C( -70), INT8_C( 45), INT8_C( 10), INT8_C( -67)), simde_mm_set_pi8(INT8_C(-109), INT8_C( 97), INT8_C( 25), INT8_C( 63), INT8_C( -65), INT8_C( -95), INT8_C(-111), INT8_C( -39)), simde_mm_set_pi8(INT8_C( 81), INT8_C(-128), INT8_C( 55), INT8_C(-106), INT8_C( -5), INT8_C( 127), INT8_C( 121), INT8_C( -28)) }, { simde_mm_set_pi8(INT8_C( 52), INT8_C( -18), INT8_C( -86), INT8_C( -29), INT8_C( 69), INT8_C( 92), INT8_C( 89), INT8_C( -66)), simde_mm_set_pi8(INT8_C( 16), INT8_C( 0), INT8_C( 95), INT8_C( 95), INT8_C( 115), INT8_C( -53), INT8_C( 55), INT8_C( 75)), simde_mm_set_pi8(INT8_C( 36), INT8_C( -18), INT8_C(-128), INT8_C(-124), INT8_C( -46), INT8_C( 127), INT8_C( 34), INT8_C(-128)) }, { simde_mm_set_pi8(INT8_C( 99), INT8_C( -48), INT8_C( 16), INT8_C( 126), INT8_C(-110), INT8_C(-111), INT8_C( -66), INT8_C( 83)), simde_mm_set_pi8(INT8_C(-118), INT8_C( 118), INT8_C( 100), INT8_C(-121), INT8_C( -17), INT8_C( 74), INT8_C( -47), INT8_C( -77)), simde_mm_set_pi8(INT8_C( 127), INT8_C(-128), INT8_C( -84), INT8_C( 127), INT8_C( -93), INT8_C(-128), INT8_C( -19), INT8_C( 127)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_psubsb(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_subs_pu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi8(INT8_C( 108), INT8_C(-104), INT8_C( 106), INT8_C( 91), INT8_C( 54), INT8_C( 95), INT8_C( -86), INT8_C( -68)), simde_mm_set_pi8(INT8_C( -73), INT8_C( -12), INT8_C( 13), INT8_C( -7), INT8_C(-102), INT8_C( -27), INT8_C( -93), INT8_C( -1)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 93), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 7), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C( -3), INT8_C(-122), INT8_C(-110), INT8_C( -87), INT8_C( -28), INT8_C( -38), INT8_C( 30), INT8_C( -22)), simde_mm_set_pi8(INT8_C( -24), INT8_C( 92), INT8_C( -83), INT8_C( -90), INT8_C(-108), INT8_C(-117), INT8_C( 101), INT8_C( -58)), simde_mm_set_pi8(INT8_C( 21), INT8_C( 42), INT8_C( 0), INT8_C( 3), INT8_C( 80), INT8_C( 79), INT8_C( 0), INT8_C( 36)) }, { simde_mm_set_pi8(INT8_C( -50), INT8_C( 51), INT8_C( -7), INT8_C( -68), INT8_C( -7), INT8_C( 11), INT8_C( 15), INT8_C( 2)), simde_mm_set_pi8(INT8_C( -4), INT8_C(-101), INT8_C(-106), INT8_C( -43), INT8_C(-124), INT8_C( 1), INT8_C( -19), INT8_C( 18)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 99), INT8_C( 0), INT8_C( 117), INT8_C( 10), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C( 20), INT8_C( -33), INT8_C( -99), INT8_C( -4), INT8_C(-119), INT8_C( 72), INT8_C( 104), INT8_C( -43)), simde_mm_set_pi8(INT8_C( -27), INT8_C( 116), INT8_C( 127), INT8_C( 71), INT8_C( 110), INT8_C( 47), INT8_C( 56), INT8_C( -18)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 107), INT8_C( 30), INT8_C( -75), INT8_C( 27), INT8_C( 25), INT8_C( 48), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C( 24), INT8_C( 44), INT8_C( 126), INT8_C( -16), INT8_C( 48), INT8_C( 119), INT8_C( 122), INT8_C( 92)), simde_mm_set_pi8(INT8_C( -53), INT8_C( 93), INT8_C( 123), INT8_C( 43), INT8_C( -1), INT8_C( -86), INT8_C( 12), INT8_C( -40)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 3), INT8_C( -59), INT8_C( 0), INT8_C( 0), INT8_C( 110), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C( 43), INT8_C( -29), INT8_C( 72), INT8_C( -16), INT8_C( 73), INT8_C( 36), INT8_C( 38), INT8_C(-122)), simde_mm_set_pi8(INT8_C( 68), INT8_C( 17), INT8_C(-105), INT8_C( 112), INT8_C( 123), INT8_C(-118), INT8_C( 37), INT8_C( 35)), simde_mm_set_pi8(INT8_C( 0), INT8_C( -46), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( 1), INT8_C( 99)) }, { simde_mm_set_pi8(INT8_C( 78), INT8_C( 25), INT8_C(-123), INT8_C(-114), INT8_C( 56), INT8_C( 33), INT8_C( -54), INT8_C( 46)), simde_mm_set_pi8(INT8_C( -71), INT8_C( 113), INT8_C( -52), INT8_C( -21), INT8_C(-112), INT8_C( -45), INT8_C( 117), INT8_C( -91)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 85), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C( 35), INT8_C( 56), INT8_C( 106), INT8_C( 118), INT8_C( -12), INT8_C( -92), INT8_C( -24), INT8_C( 93)), simde_mm_set_pi8(INT8_C(-118), INT8_C( -26), INT8_C( -47), INT8_C( 86), INT8_C( -69), INT8_C( 43), INT8_C( 117), INT8_C( 101)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 32), INT8_C( 57), INT8_C( 121), INT8_C( 115), INT8_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_subs_pu8(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_psubusb(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi8(INT8_C( 108), INT8_C(-104), INT8_C( 106), INT8_C( 91), INT8_C( 54), INT8_C( 95), INT8_C( -86), INT8_C( -68)), simde_mm_set_pi8(INT8_C( -73), INT8_C( -12), INT8_C( 13), INT8_C( -7), INT8_C(-102), INT8_C( -27), INT8_C( -93), INT8_C( -1)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 93), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 7), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C( -3), INT8_C(-122), INT8_C(-110), INT8_C( -87), INT8_C( -28), INT8_C( -38), INT8_C( 30), INT8_C( -22)), simde_mm_set_pi8(INT8_C( -24), INT8_C( 92), INT8_C( -83), INT8_C( -90), INT8_C(-108), INT8_C(-117), INT8_C( 101), INT8_C( -58)), simde_mm_set_pi8(INT8_C( 21), INT8_C( 42), INT8_C( 0), INT8_C( 3), INT8_C( 80), INT8_C( 79), INT8_C( 0), INT8_C( 36)) }, { simde_mm_set_pi8(INT8_C( -50), INT8_C( 51), INT8_C( -7), INT8_C( -68), INT8_C( -7), INT8_C( 11), INT8_C( 15), INT8_C( 2)), simde_mm_set_pi8(INT8_C( -4), INT8_C(-101), INT8_C(-106), INT8_C( -43), INT8_C(-124), INT8_C( 1), INT8_C( -19), INT8_C( 18)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 99), INT8_C( 0), INT8_C( 117), INT8_C( 10), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C( 20), INT8_C( -33), INT8_C( -99), INT8_C( -4), INT8_C(-119), INT8_C( 72), INT8_C( 104), INT8_C( -43)), simde_mm_set_pi8(INT8_C( -27), INT8_C( 116), INT8_C( 127), INT8_C( 71), INT8_C( 110), INT8_C( 47), INT8_C( 56), INT8_C( -18)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 107), INT8_C( 30), INT8_C( -75), INT8_C( 27), INT8_C( 25), INT8_C( 48), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C( 24), INT8_C( 44), INT8_C( 126), INT8_C( -16), INT8_C( 48), INT8_C( 119), INT8_C( 122), INT8_C( 92)), simde_mm_set_pi8(INT8_C( -53), INT8_C( 93), INT8_C( 123), INT8_C( 43), INT8_C( -1), INT8_C( -86), INT8_C( 12), INT8_C( -40)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 3), INT8_C( -59), INT8_C( 0), INT8_C( 0), INT8_C( 110), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C( 43), INT8_C( -29), INT8_C( 72), INT8_C( -16), INT8_C( 73), INT8_C( 36), INT8_C( 38), INT8_C(-122)), simde_mm_set_pi8(INT8_C( 68), INT8_C( 17), INT8_C(-105), INT8_C( 112), INT8_C( 123), INT8_C(-118), INT8_C( 37), INT8_C( 35)), simde_mm_set_pi8(INT8_C( 0), INT8_C( -46), INT8_C( 0), INT8_C(-128), INT8_C( 0), INT8_C( 0), INT8_C( 1), INT8_C( 99)) }, { simde_mm_set_pi8(INT8_C( 78), INT8_C( 25), INT8_C(-123), INT8_C(-114), INT8_C( 56), INT8_C( 33), INT8_C( -54), INT8_C( 46)), simde_mm_set_pi8(INT8_C( -71), INT8_C( 113), INT8_C( -52), INT8_C( -21), INT8_C(-112), INT8_C( -45), INT8_C( 117), INT8_C( -91)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 85), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C( 35), INT8_C( 56), INT8_C( 106), INT8_C( 118), INT8_C( -12), INT8_C( -92), INT8_C( -24), INT8_C( 93)), simde_mm_set_pi8(INT8_C(-118), INT8_C( -26), INT8_C( -47), INT8_C( 86), INT8_C( -69), INT8_C( 43), INT8_C( 117), INT8_C( 101)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 32), INT8_C( 57), INT8_C( 121), INT8_C( 115), INT8_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_psubusb(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_subs_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( 997), INT16_C( -2676), INT16_C( -29256), INT16_C( -21534)), simde_mm_set_pi16(INT16_C( 25057), INT16_C( -8634), INT16_C( 14564), INT16_C( 23460)), simde_mm_set_pi16(INT16_C( -24060), INT16_C( 5958), INT16_C( -32768), INT16_C( -32768)) }, { simde_mm_set_pi16(INT16_C( 17773), INT16_C( -21379), INT16_C( -10016), INT16_C( -25057)), simde_mm_set_pi16(INT16_C( -17494), INT16_C( -5727), INT16_C( -23865), INT16_C( -12297)), simde_mm_set_pi16(INT16_C( 32767), INT16_C( -15652), INT16_C( 13849), INT16_C( -12760)) }, { simde_mm_set_pi16(INT16_C( -18595), INT16_C( -25519), INT16_C( 25647), INT16_C( 18081)), simde_mm_set_pi16(INT16_C( -16730), INT16_C( 8578), INT16_C( -24195), INT16_C( -23138)), simde_mm_set_pi16(INT16_C( -1865), INT16_C( -32768), INT16_C( 32767), INT16_C( 32767)) }, { simde_mm_set_pi16(INT16_C( 30835), INT16_C( -1900), INT16_C( -12465), INT16_C( -32273)), simde_mm_set_pi16(INT16_C( 22212), INT16_C( 29314), INT16_C( 30369), INT16_C( -7474)), simde_mm_set_pi16(INT16_C( 8623), INT16_C( -31214), INT16_C( -32768), INT16_C( -24799)) }, { simde_mm_set_pi16(INT16_C( -4511), INT16_C( -11707), INT16_C( -456), INT16_C( 4939)), simde_mm_set_pi16(INT16_C( 9564), INT16_C( -6551), INT16_C( 15884), INT16_C( 25916)), simde_mm_set_pi16(INT16_C( -14075), INT16_C( -5156), INT16_C( -16340), INT16_C( -20977)) }, { simde_mm_set_pi16(INT16_C( 16747), INT16_C( 26115), INT16_C( 28725), INT16_C( -9489)), simde_mm_set_pi16(INT16_C( 18589), INT16_C( 10790), INT16_C( 16046), INT16_C( 7670)), simde_mm_set_pi16(INT16_C( -1842), INT16_C( 15325), INT16_C( 12679), INT16_C( -17159)) }, { simde_mm_set_pi16(INT16_C( 12230), INT16_C( 31818), INT16_C( -20400), INT16_C( 29194)), simde_mm_set_pi16(INT16_C( 13624), INT16_C( -27762), INT16_C( -3717), INT16_C( 9357)), simde_mm_set_pi16(INT16_C( -1394), INT16_C( 32767), INT16_C( -16683), INT16_C( 19837)) }, { simde_mm_set_pi16(INT16_C( 4223), INT16_C( 22129), INT16_C( 27682), INT16_C( 6112)), simde_mm_set_pi16(INT16_C( 25462), INT16_C( 1497), INT16_C( -20195), INT16_C( -31363)), simde_mm_set_pi16(INT16_C( -21239), INT16_C( 20632), INT16_C( 32767), INT16_C( 32767)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_subs_pi16(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_psubsw(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( 997), INT16_C( -2676), INT16_C( -29256), INT16_C( -21534)), simde_mm_set_pi16(INT16_C( 25057), INT16_C( -8634), INT16_C( 14564), INT16_C( 23460)), simde_mm_set_pi16(INT16_C( -24060), INT16_C( 5958), INT16_C( -32768), INT16_C( -32768)) }, { simde_mm_set_pi16(INT16_C( 17773), INT16_C( -21379), INT16_C( -10016), INT16_C( -25057)), simde_mm_set_pi16(INT16_C( -17494), INT16_C( -5727), INT16_C( -23865), INT16_C( -12297)), simde_mm_set_pi16(INT16_C( 32767), INT16_C( -15652), INT16_C( 13849), INT16_C( -12760)) }, { simde_mm_set_pi16(INT16_C( -18595), INT16_C( -25519), INT16_C( 25647), INT16_C( 18081)), simde_mm_set_pi16(INT16_C( -16730), INT16_C( 8578), INT16_C( -24195), INT16_C( -23138)), simde_mm_set_pi16(INT16_C( -1865), INT16_C( -32768), INT16_C( 32767), INT16_C( 32767)) }, { simde_mm_set_pi16(INT16_C( 30835), INT16_C( -1900), INT16_C( -12465), INT16_C( -32273)), simde_mm_set_pi16(INT16_C( 22212), INT16_C( 29314), INT16_C( 30369), INT16_C( -7474)), simde_mm_set_pi16(INT16_C( 8623), INT16_C( -31214), INT16_C( -32768), INT16_C( -24799)) }, { simde_mm_set_pi16(INT16_C( -4511), INT16_C( -11707), INT16_C( -456), INT16_C( 4939)), simde_mm_set_pi16(INT16_C( 9564), INT16_C( -6551), INT16_C( 15884), INT16_C( 25916)), simde_mm_set_pi16(INT16_C( -14075), INT16_C( -5156), INT16_C( -16340), INT16_C( -20977)) }, { simde_mm_set_pi16(INT16_C( 16747), INT16_C( 26115), INT16_C( 28725), INT16_C( -9489)), simde_mm_set_pi16(INT16_C( 18589), INT16_C( 10790), INT16_C( 16046), INT16_C( 7670)), simde_mm_set_pi16(INT16_C( -1842), INT16_C( 15325), INT16_C( 12679), INT16_C( -17159)) }, { simde_mm_set_pi16(INT16_C( 12230), INT16_C( 31818), INT16_C( -20400), INT16_C( 29194)), simde_mm_set_pi16(INT16_C( 13624), INT16_C( -27762), INT16_C( -3717), INT16_C( 9357)), simde_mm_set_pi16(INT16_C( -1394), INT16_C( 32767), INT16_C( -16683), INT16_C( 19837)) }, { simde_mm_set_pi16(INT16_C( 4223), INT16_C( 22129), INT16_C( 27682), INT16_C( 6112)), simde_mm_set_pi16(INT16_C( 25462), INT16_C( 1497), INT16_C( -20195), INT16_C( -31363)), simde_mm_set_pi16(INT16_C( -21239), INT16_C( 20632), INT16_C( 32767), INT16_C( 32767)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_psubsw(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_subs_pu16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -14933), INT16_C( 874), INT16_C( -12812), INT16_C( -23674)), simde_mm_set_pi16(INT16_C( 10199), INT16_C( -21634), INT16_C( -16349), INT16_C( -2233)), simde_mm_set_pi16(INT16_C( -25132), INT16_C( 0), INT16_C( 3537), INT16_C( 0)) }, { simde_mm_set_pi16(INT16_C( -30411), INT16_C( 14403), INT16_C( 16019), INT16_C( -7235)), simde_mm_set_pi16(INT16_C( 20809), INT16_C( 30553), INT16_C( -13348), INT16_C( -9019)), simde_mm_set_pi16(INT16_C( 14316), INT16_C( 0), INT16_C( 0), INT16_C( 1784)) }, { simde_mm_set_pi16(INT16_C( -3263), INT16_C( 17129), INT16_C( 7120), INT16_C( 17541)), simde_mm_set_pi16(INT16_C( 17758), INT16_C( -24273), INT16_C( -16817), INT16_C( -26381)), simde_mm_set_pi16(INT16_C( -21021), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_pi16(INT16_C( 28253), INT16_C( -27429), INT16_C( -2971), INT16_C( -25455)), simde_mm_set_pi16(INT16_C( -28858), INT16_C( 23971), INT16_C( 30194), INT16_C( 29959)), simde_mm_set_pi16(INT16_C( 0), INT16_C( 14136), INT16_C( 32371), INT16_C( 10122)) }, { simde_mm_set_pi16(INT16_C( -5264), INT16_C( -5469), INT16_C( 2876), INT16_C( 12913)), simde_mm_set_pi16(INT16_C( -25438), INT16_C( -13476), INT16_C( -20493), INT16_C( 9684)), simde_mm_set_pi16(INT16_C( 20174), INT16_C( 8007), INT16_C( 0), INT16_C( 3229)) }, { simde_mm_set_pi16(INT16_C( -6406), INT16_C( 29502), INT16_C( -32502), INT16_C( 29440)), simde_mm_set_pi16(INT16_C( 24669), INT16_C( 29936), INT16_C( -12635), INT16_C( -28492)), simde_mm_set_pi16(INT16_C( -31075), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_pi16(INT16_C( 1295), INT16_C( 17975), INT16_C( -25873), INT16_C( -8332)), simde_mm_set_pi16(INT16_C( -30157), INT16_C( 122), INT16_C( -20762), INT16_C( 12983)), simde_mm_set_pi16(INT16_C( 0), INT16_C( 17853), INT16_C( 0), INT16_C( -21315)) }, { simde_mm_set_pi16(INT16_C( -17654), INT16_C( -28720), INT16_C( -25036), INT16_C( -2408)), simde_mm_set_pi16(INT16_C( 32575), INT16_C( 13887), INT16_C( 23741), INT16_C( -32273)), simde_mm_set_pi16(INT16_C( 15307), INT16_C( 22929), INT16_C( 16759), INT16_C( 29865)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_subs_pu16(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_psubusw(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -14933), INT16_C( 874), INT16_C( -12812), INT16_C( -23674)), simde_mm_set_pi16(INT16_C( 10199), INT16_C( -21634), INT16_C( -16349), INT16_C( -2233)), simde_mm_set_pi16(INT16_C( -25132), INT16_C( 0), INT16_C( 3537), INT16_C( 0)) }, { simde_mm_set_pi16(INT16_C( -30411), INT16_C( 14403), INT16_C( 16019), INT16_C( -7235)), simde_mm_set_pi16(INT16_C( 20809), INT16_C( 30553), INT16_C( -13348), INT16_C( -9019)), simde_mm_set_pi16(INT16_C( 14316), INT16_C( 0), INT16_C( 0), INT16_C( 1784)) }, { simde_mm_set_pi16(INT16_C( -3263), INT16_C( 17129), INT16_C( 7120), INT16_C( 17541)), simde_mm_set_pi16(INT16_C( 17758), INT16_C( -24273), INT16_C( -16817), INT16_C( -26381)), simde_mm_set_pi16(INT16_C( -21021), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_pi16(INT16_C( 28253), INT16_C( -27429), INT16_C( -2971), INT16_C( -25455)), simde_mm_set_pi16(INT16_C( -28858), INT16_C( 23971), INT16_C( 30194), INT16_C( 29959)), simde_mm_set_pi16(INT16_C( 0), INT16_C( 14136), INT16_C( 32371), INT16_C( 10122)) }, { simde_mm_set_pi16(INT16_C( -5264), INT16_C( -5469), INT16_C( 2876), INT16_C( 12913)), simde_mm_set_pi16(INT16_C( -25438), INT16_C( -13476), INT16_C( -20493), INT16_C( 9684)), simde_mm_set_pi16(INT16_C( 20174), INT16_C( 8007), INT16_C( 0), INT16_C( 3229)) }, { simde_mm_set_pi16(INT16_C( -6406), INT16_C( 29502), INT16_C( -32502), INT16_C( 29440)), simde_mm_set_pi16(INT16_C( 24669), INT16_C( 29936), INT16_C( -12635), INT16_C( -28492)), simde_mm_set_pi16(INT16_C( -31075), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_pi16(INT16_C( 1295), INT16_C( 17975), INT16_C( -25873), INT16_C( -8332)), simde_mm_set_pi16(INT16_C( -30157), INT16_C( 122), INT16_C( -20762), INT16_C( 12983)), simde_mm_set_pi16(INT16_C( 0), INT16_C( 17853), INT16_C( 0), INT16_C( -21315)) }, { simde_mm_set_pi16(INT16_C( -17654), INT16_C( -28720), INT16_C( -25036), INT16_C( -2408)), simde_mm_set_pi16(INT16_C( 32575), INT16_C( 13887), INT16_C( 23741), INT16_C( -32273)), simde_mm_set_pi16(INT16_C( 15307), INT16_C( 22929), INT16_C( 16759), INT16_C( 29865)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_psubusw(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_unpackhi_pi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi8(INT8_C(-127), INT8_C( 48), INT8_C( 42), INT8_C( 115), INT8_C( -77), INT8_C( 4), INT8_C( 25), INT8_C( -42)), simde_mm_set_pi8(INT8_C( 57), INT8_C( 92), INT8_C( -39), INT8_C( -42), INT8_C( 73), INT8_C( 4), INT8_C( 41), INT8_C( 118)), simde_mm_set_pi8(INT8_C( 57), INT8_C(-127), INT8_C( 92), INT8_C( 48), INT8_C( -39), INT8_C( 42), INT8_C( -42), INT8_C( 115)) }, { simde_mm_set_pi8(INT8_C( -16), INT8_C( 120), INT8_C( 16), INT8_C( 116), INT8_C( -35), INT8_C(-100), INT8_C( 0), INT8_C( -39)), simde_mm_set_pi8(INT8_C( 63), INT8_C( -73), INT8_C( 48), INT8_C( -66), INT8_C( -33), INT8_C(-102), INT8_C( -62), INT8_C( 118)), simde_mm_set_pi8(INT8_C( 63), INT8_C( -16), INT8_C( -73), INT8_C( 120), INT8_C( 48), INT8_C( 16), INT8_C( -66), INT8_C( 116)) }, { simde_mm_set_pi8(INT8_C( -24), INT8_C( -49), INT8_C( 20), INT8_C( 34), INT8_C( -1), INT8_C( 63), INT8_C( 11), INT8_C( -36)), simde_mm_set_pi8(INT8_C( -97), INT8_C( 52), INT8_C( 62), INT8_C( -48), INT8_C( -15), INT8_C( 24), INT8_C( 18), INT8_C( -28)), simde_mm_set_pi8(INT8_C( -97), INT8_C( -24), INT8_C( 52), INT8_C( -49), INT8_C( 62), INT8_C( 20), INT8_C( -48), INT8_C( 34)) }, { simde_mm_set_pi8(INT8_C( 34), INT8_C( -74), INT8_C( -88), INT8_C( -68), INT8_C( 80), INT8_C( 80), INT8_C( -27), INT8_C(-109)), simde_mm_set_pi8(INT8_C( -14), INT8_C( 17), INT8_C( -50), INT8_C( 50), INT8_C( -72), INT8_C(-111), INT8_C( -32), INT8_C(-114)), simde_mm_set_pi8(INT8_C( -14), INT8_C( 34), INT8_C( 17), INT8_C( -74), INT8_C( -50), INT8_C( -88), INT8_C( 50), INT8_C( -68)) }, { simde_mm_set_pi8(INT8_C( -82), INT8_C( 34), INT8_C( 79), INT8_C( 75), INT8_C( -45), INT8_C( 43), INT8_C( -97), INT8_C( 55)), simde_mm_set_pi8(INT8_C( 126), INT8_C( 126), INT8_C( 113), INT8_C( 122), INT8_C( 7), INT8_C( 69), INT8_C( 31), INT8_C( 83)), simde_mm_set_pi8(INT8_C( 126), INT8_C( -82), INT8_C( 126), INT8_C( 34), INT8_C( 113), INT8_C( 79), INT8_C( 122), INT8_C( 75)) }, { simde_mm_set_pi8(INT8_C( -4), INT8_C( -98), INT8_C( 7), INT8_C( 88), INT8_C( -93), INT8_C( 56), INT8_C( -38), INT8_C( -15)), simde_mm_set_pi8(INT8_C( 75), INT8_C( 97), INT8_C( 76), INT8_C( 26), INT8_C(-119), INT8_C( -96), INT8_C( -74), INT8_C( -24)), simde_mm_set_pi8(INT8_C( 75), INT8_C( -4), INT8_C( 97), INT8_C( -98), INT8_C( 76), INT8_C( 7), INT8_C( 26), INT8_C( 88)) }, { simde_mm_set_pi8(INT8_C( 124), INT8_C( 71), INT8_C( -14), INT8_C( 19), INT8_C( -69), INT8_C( -31), INT8_C( 35), INT8_C( -82)), simde_mm_set_pi8(INT8_C( -31), INT8_C( 125), INT8_C( 35), INT8_C( 84), INT8_C( 105), INT8_C(-115), INT8_C( 11), INT8_C( -12)), simde_mm_set_pi8(INT8_C( -31), INT8_C( 124), INT8_C( 125), INT8_C( 71), INT8_C( 35), INT8_C( -14), INT8_C( 84), INT8_C( 19)) }, { simde_mm_set_pi8(INT8_C( 45), INT8_C( -51), INT8_C( -71), INT8_C( -47), INT8_C( -27), INT8_C( 20), INT8_C(-117), INT8_C( -5)), simde_mm_set_pi8(INT8_C( -92), INT8_C( -74), INT8_C( 58), INT8_C( 117), INT8_C( -53), INT8_C( 43), INT8_C( 66), INT8_C( -55)), simde_mm_set_pi8(INT8_C( -92), INT8_C( 45), INT8_C( -74), INT8_C( -51), INT8_C( 58), INT8_C( -71), INT8_C( 117), INT8_C( -47)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_unpackhi_pi8(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_punpckhbw(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi8(INT8_C(-127), INT8_C( 48), INT8_C( 42), INT8_C( 115), INT8_C( -77), INT8_C( 4), INT8_C( 25), INT8_C( -42)), simde_mm_set_pi8(INT8_C( 57), INT8_C( 92), INT8_C( -39), INT8_C( -42), INT8_C( 73), INT8_C( 4), INT8_C( 41), INT8_C( 118)), simde_mm_set_pi8(INT8_C( 57), INT8_C(-127), INT8_C( 92), INT8_C( 48), INT8_C( -39), INT8_C( 42), INT8_C( -42), INT8_C( 115)) }, { simde_mm_set_pi8(INT8_C( -16), INT8_C( 120), INT8_C( 16), INT8_C( 116), INT8_C( -35), INT8_C(-100), INT8_C( 0), INT8_C( -39)), simde_mm_set_pi8(INT8_C( 63), INT8_C( -73), INT8_C( 48), INT8_C( -66), INT8_C( -33), INT8_C(-102), INT8_C( -62), INT8_C( 118)), simde_mm_set_pi8(INT8_C( 63), INT8_C( -16), INT8_C( -73), INT8_C( 120), INT8_C( 48), INT8_C( 16), INT8_C( -66), INT8_C( 116)) }, { simde_mm_set_pi8(INT8_C( -24), INT8_C( -49), INT8_C( 20), INT8_C( 34), INT8_C( -1), INT8_C( 63), INT8_C( 11), INT8_C( -36)), simde_mm_set_pi8(INT8_C( -97), INT8_C( 52), INT8_C( 62), INT8_C( -48), INT8_C( -15), INT8_C( 24), INT8_C( 18), INT8_C( -28)), simde_mm_set_pi8(INT8_C( -97), INT8_C( -24), INT8_C( 52), INT8_C( -49), INT8_C( 62), INT8_C( 20), INT8_C( -48), INT8_C( 34)) }, { simde_mm_set_pi8(INT8_C( 34), INT8_C( -74), INT8_C( -88), INT8_C( -68), INT8_C( 80), INT8_C( 80), INT8_C( -27), INT8_C(-109)), simde_mm_set_pi8(INT8_C( -14), INT8_C( 17), INT8_C( -50), INT8_C( 50), INT8_C( -72), INT8_C(-111), INT8_C( -32), INT8_C(-114)), simde_mm_set_pi8(INT8_C( -14), INT8_C( 34), INT8_C( 17), INT8_C( -74), INT8_C( -50), INT8_C( -88), INT8_C( 50), INT8_C( -68)) }, { simde_mm_set_pi8(INT8_C( -82), INT8_C( 34), INT8_C( 79), INT8_C( 75), INT8_C( -45), INT8_C( 43), INT8_C( -97), INT8_C( 55)), simde_mm_set_pi8(INT8_C( 126), INT8_C( 126), INT8_C( 113), INT8_C( 122), INT8_C( 7), INT8_C( 69), INT8_C( 31), INT8_C( 83)), simde_mm_set_pi8(INT8_C( 126), INT8_C( -82), INT8_C( 126), INT8_C( 34), INT8_C( 113), INT8_C( 79), INT8_C( 122), INT8_C( 75)) }, { simde_mm_set_pi8(INT8_C( -4), INT8_C( -98), INT8_C( 7), INT8_C( 88), INT8_C( -93), INT8_C( 56), INT8_C( -38), INT8_C( -15)), simde_mm_set_pi8(INT8_C( 75), INT8_C( 97), INT8_C( 76), INT8_C( 26), INT8_C(-119), INT8_C( -96), INT8_C( -74), INT8_C( -24)), simde_mm_set_pi8(INT8_C( 75), INT8_C( -4), INT8_C( 97), INT8_C( -98), INT8_C( 76), INT8_C( 7), INT8_C( 26), INT8_C( 88)) }, { simde_mm_set_pi8(INT8_C( 124), INT8_C( 71), INT8_C( -14), INT8_C( 19), INT8_C( -69), INT8_C( -31), INT8_C( 35), INT8_C( -82)), simde_mm_set_pi8(INT8_C( -31), INT8_C( 125), INT8_C( 35), INT8_C( 84), INT8_C( 105), INT8_C(-115), INT8_C( 11), INT8_C( -12)), simde_mm_set_pi8(INT8_C( -31), INT8_C( 124), INT8_C( 125), INT8_C( 71), INT8_C( 35), INT8_C( -14), INT8_C( 84), INT8_C( 19)) }, { simde_mm_set_pi8(INT8_C( 45), INT8_C( -51), INT8_C( -71), INT8_C( -47), INT8_C( -27), INT8_C( 20), INT8_C(-117), INT8_C( -5)), simde_mm_set_pi8(INT8_C( -92), INT8_C( -74), INT8_C( 58), INT8_C( 117), INT8_C( -53), INT8_C( 43), INT8_C( 66), INT8_C( -55)), simde_mm_set_pi8(INT8_C( -92), INT8_C( 45), INT8_C( -74), INT8_C( -51), INT8_C( 58), INT8_C( -71), INT8_C( 117), INT8_C( -47)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_punpckhbw(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_unpackhi_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -14965), INT16_C( 28080), INT16_C( -15604), INT16_C( -10099)), simde_mm_set_pi16(INT16_C( 16538), INT16_C( -18813), INT16_C( -254), INT16_C( -23207)), simde_mm_set_pi16(INT16_C( 16538), INT16_C( -14965), INT16_C( -18813), INT16_C( 28080)) }, { simde_mm_set_pi16(INT16_C( -4346), INT16_C( -29603), INT16_C( 1361), INT16_C( 16092)), simde_mm_set_pi16(INT16_C( -4428), INT16_C( -25960), INT16_C( 7111), INT16_C( 29823)), simde_mm_set_pi16(INT16_C( -4428), INT16_C( -4346), INT16_C( -25960), INT16_C( -29603)) }, { simde_mm_set_pi16(INT16_C( -22197), INT16_C( -13478), INT16_C( 29243), INT16_C( -7146)), simde_mm_set_pi16(INT16_C( -6022), INT16_C( -10408), INT16_C( -5121), INT16_C( -15640)), simde_mm_set_pi16(INT16_C( -6022), INT16_C( -22197), INT16_C( -10408), INT16_C( -13478)) }, { simde_mm_set_pi16(INT16_C( -21336), INT16_C( 14878), INT16_C( 14164), INT16_C( 2727)), simde_mm_set_pi16(INT16_C( 12579), INT16_C( -20797), INT16_C( 18011), INT16_C( 5438)), simde_mm_set_pi16(INT16_C( 12579), INT16_C( -21336), INT16_C( -20797), INT16_C( 14878)) }, { simde_mm_set_pi16(INT16_C( -20790), INT16_C( -21719), INT16_C( -12256), INT16_C( -17410)), simde_mm_set_pi16(INT16_C( 4576), INT16_C( 6842), INT16_C( -12668), INT16_C( -11854)), simde_mm_set_pi16(INT16_C( 4576), INT16_C( -20790), INT16_C( 6842), INT16_C( -21719)) }, { simde_mm_set_pi16(INT16_C( -12751), INT16_C( 22951), INT16_C( -11466), INT16_C( -26387)), simde_mm_set_pi16(INT16_C( -27771), INT16_C( -31462), INT16_C( 14453), INT16_C( -2204)), simde_mm_set_pi16(INT16_C( -27771), INT16_C( -12751), INT16_C( -31462), INT16_C( 22951)) }, { simde_mm_set_pi16(INT16_C( -15685), INT16_C( 13196), INT16_C( 17198), INT16_C( 29713)), simde_mm_set_pi16(INT16_C( 29600), INT16_C( -21832), INT16_C( -7500), INT16_C( 31712)), simde_mm_set_pi16(INT16_C( 29600), INT16_C( -15685), INT16_C( -21832), INT16_C( 13196)) }, { simde_mm_set_pi16(INT16_C( -16681), INT16_C( -16529), INT16_C( 32728), INT16_C( 31459)), simde_mm_set_pi16(INT16_C( 20407), INT16_C( -12854), INT16_C( 18433), INT16_C( 3119)), simde_mm_set_pi16(INT16_C( 20407), INT16_C( -16681), INT16_C( -12854), INT16_C( -16529)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_unpackhi_pi16(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_punpckhwd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -14965), INT16_C( 28080), INT16_C( -15604), INT16_C( -10099)), simde_mm_set_pi16(INT16_C( 16538), INT16_C( -18813), INT16_C( -254), INT16_C( -23207)), simde_mm_set_pi16(INT16_C( 16538), INT16_C( -14965), INT16_C( -18813), INT16_C( 28080)) }, { simde_mm_set_pi16(INT16_C( -4346), INT16_C( -29603), INT16_C( 1361), INT16_C( 16092)), simde_mm_set_pi16(INT16_C( -4428), INT16_C( -25960), INT16_C( 7111), INT16_C( 29823)), simde_mm_set_pi16(INT16_C( -4428), INT16_C( -4346), INT16_C( -25960), INT16_C( -29603)) }, { simde_mm_set_pi16(INT16_C( -22197), INT16_C( -13478), INT16_C( 29243), INT16_C( -7146)), simde_mm_set_pi16(INT16_C( -6022), INT16_C( -10408), INT16_C( -5121), INT16_C( -15640)), simde_mm_set_pi16(INT16_C( -6022), INT16_C( -22197), INT16_C( -10408), INT16_C( -13478)) }, { simde_mm_set_pi16(INT16_C( -21336), INT16_C( 14878), INT16_C( 14164), INT16_C( 2727)), simde_mm_set_pi16(INT16_C( 12579), INT16_C( -20797), INT16_C( 18011), INT16_C( 5438)), simde_mm_set_pi16(INT16_C( 12579), INT16_C( -21336), INT16_C( -20797), INT16_C( 14878)) }, { simde_mm_set_pi16(INT16_C( -20790), INT16_C( -21719), INT16_C( -12256), INT16_C( -17410)), simde_mm_set_pi16(INT16_C( 4576), INT16_C( 6842), INT16_C( -12668), INT16_C( -11854)), simde_mm_set_pi16(INT16_C( 4576), INT16_C( -20790), INT16_C( 6842), INT16_C( -21719)) }, { simde_mm_set_pi16(INT16_C( -12751), INT16_C( 22951), INT16_C( -11466), INT16_C( -26387)), simde_mm_set_pi16(INT16_C( -27771), INT16_C( -31462), INT16_C( 14453), INT16_C( -2204)), simde_mm_set_pi16(INT16_C( -27771), INT16_C( -12751), INT16_C( -31462), INT16_C( 22951)) }, { simde_mm_set_pi16(INT16_C( -15685), INT16_C( 13196), INT16_C( 17198), INT16_C( 29713)), simde_mm_set_pi16(INT16_C( 29600), INT16_C( -21832), INT16_C( -7500), INT16_C( 31712)), simde_mm_set_pi16(INT16_C( 29600), INT16_C( -15685), INT16_C( -21832), INT16_C( 13196)) }, { simde_mm_set_pi16(INT16_C( -16681), INT16_C( -16529), INT16_C( 32728), INT16_C( 31459)), simde_mm_set_pi16(INT16_C( 20407), INT16_C( -12854), INT16_C( 18433), INT16_C( 3119)), simde_mm_set_pi16(INT16_C( 20407), INT16_C( -16681), INT16_C( -12854), INT16_C( -16529)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_punpckhwd(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_unpackhi_pi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( -1658263771), INT32_C( -1249023590)), simde_mm_set_pi32(INT32_C( -1692091894), INT32_C( 429039047)), simde_mm_set_pi32(INT32_C( -1692091894), INT32_C( -1658263771)) }, { simde_mm_set_pi32(INT32_C( 900819254), INT32_C( -1069899126)), simde_mm_set_pi32(INT32_C( -400543833), INT32_C( -2013963668)), simde_mm_set_pi32(INT32_C( -400543833), INT32_C( 900819254)) }, { simde_mm_set_pi32(INT32_C( -1005749657), INT32_C( -188276900)), simde_mm_set_pi32(INT32_C( 810155385), INT32_C( -436942778)), simde_mm_set_pi32(INT32_C( 810155385), INT32_C( -1005749657)) }, { simde_mm_set_pi32(INT32_C( 43596265), INT32_C( -1556778284)), simde_mm_set_pi32(INT32_C( -1634766739), INT32_C( -297104207)), simde_mm_set_pi32(INT32_C( -1634766739), INT32_C( 43596265)) }, { simde_mm_set_pi32(INT32_C( 820557065), INT32_C( 2171)), simde_mm_set_pi32(INT32_C( 1748389432), INT32_C( 1779087168)), simde_mm_set_pi32(INT32_C( 1748389432), INT32_C( 820557065)) }, { simde_mm_set_pi32(INT32_C( -106826552), INT32_C( -791842435)), simde_mm_set_pi32(INT32_C( 2006847448), INT32_C( 484681450)), simde_mm_set_pi32(INT32_C( 2006847448), INT32_C( -106826552)) }, { simde_mm_set_pi32(INT32_C( 1892029634), INT32_C( -899748289)), simde_mm_set_pi32(INT32_C( 1496471605), INT32_C( 840905121)), simde_mm_set_pi32(INT32_C( 1496471605), INT32_C( 1892029634)) }, { simde_mm_set_pi32(INT32_C( 1293223526), INT32_C( -574905244)), simde_mm_set_pi32(INT32_C( 57909389), INT32_C( -70830945)), simde_mm_set_pi32(INT32_C( 57909389), INT32_C( 1293223526)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_unpackhi_pi32(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_punpckhdq(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( -1658263771), INT32_C( -1249023590)), simde_mm_set_pi32(INT32_C( -1692091894), INT32_C( 429039047)), simde_mm_set_pi32(INT32_C( -1692091894), INT32_C( -1658263771)) }, { simde_mm_set_pi32(INT32_C( 900819254), INT32_C( -1069899126)), simde_mm_set_pi32(INT32_C( -400543833), INT32_C( -2013963668)), simde_mm_set_pi32(INT32_C( -400543833), INT32_C( 900819254)) }, { simde_mm_set_pi32(INT32_C( -1005749657), INT32_C( -188276900)), simde_mm_set_pi32(INT32_C( 810155385), INT32_C( -436942778)), simde_mm_set_pi32(INT32_C( 810155385), INT32_C( -1005749657)) }, { simde_mm_set_pi32(INT32_C( 43596265), INT32_C( -1556778284)), simde_mm_set_pi32(INT32_C( -1634766739), INT32_C( -297104207)), simde_mm_set_pi32(INT32_C( -1634766739), INT32_C( 43596265)) }, { simde_mm_set_pi32(INT32_C( 820557065), INT32_C( 2171)), simde_mm_set_pi32(INT32_C( 1748389432), INT32_C( 1779087168)), simde_mm_set_pi32(INT32_C( 1748389432), INT32_C( 820557065)) }, { simde_mm_set_pi32(INT32_C( -106826552), INT32_C( -791842435)), simde_mm_set_pi32(INT32_C( 2006847448), INT32_C( 484681450)), simde_mm_set_pi32(INT32_C( 2006847448), INT32_C( -106826552)) }, { simde_mm_set_pi32(INT32_C( 1892029634), INT32_C( -899748289)), simde_mm_set_pi32(INT32_C( 1496471605), INT32_C( 840905121)), simde_mm_set_pi32(INT32_C( 1496471605), INT32_C( 1892029634)) }, { simde_mm_set_pi32(INT32_C( 1293223526), INT32_C( -574905244)), simde_mm_set_pi32(INT32_C( 57909389), INT32_C( -70830945)), simde_mm_set_pi32(INT32_C( 57909389), INT32_C( 1293223526)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_punpckhdq(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_unpacklo_pi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi8(INT8_C( -15), INT8_C( -27), INT8_C( -29), INT8_C( 2), INT8_C( 11), INT8_C( 105), INT8_C( -49), INT8_C( 15)), simde_mm_set_pi8(INT8_C( -90), INT8_C( 43), INT8_C( 55), INT8_C( 50), INT8_C(-102), INT8_C( 25), INT8_C( -40), INT8_C( 47)), simde_mm_set_pi8(INT8_C(-102), INT8_C( 11), INT8_C( 25), INT8_C( 105), INT8_C( -40), INT8_C( -49), INT8_C( 47), INT8_C( 15)) }, { simde_mm_set_pi8(INT8_C( 1), INT8_C( 83), INT8_C(-101), INT8_C( 117), INT8_C( -52), INT8_C( -74), INT8_C( -59), INT8_C( 121)), simde_mm_set_pi8(INT8_C(-102), INT8_C( 12), INT8_C( -28), INT8_C( 82), INT8_C(-122), INT8_C( 94), INT8_C( 127), INT8_C( -48)), simde_mm_set_pi8(INT8_C(-122), INT8_C( -52), INT8_C( 94), INT8_C( -74), INT8_C( 127), INT8_C( -59), INT8_C( -48), INT8_C( 121)) }, { simde_mm_set_pi8(INT8_C( 13), INT8_C( 67), INT8_C( -73), INT8_C( -36), INT8_C( -93), INT8_C( 101), INT8_C(-107), INT8_C( 118)), simde_mm_set_pi8(INT8_C( 46), INT8_C( -72), INT8_C( -50), INT8_C( 34), INT8_C(-111), INT8_C( -17), INT8_C(-128), INT8_C(-126)), simde_mm_set_pi8(INT8_C(-111), INT8_C( -93), INT8_C( -17), INT8_C( 101), INT8_C(-128), INT8_C(-107), INT8_C(-126), INT8_C( 118)) }, { simde_mm_set_pi8(INT8_C( 4), INT8_C( -40), INT8_C( -73), INT8_C( 122), INT8_C( 85), INT8_C( 7), INT8_C( -54), INT8_C(-119)), simde_mm_set_pi8(INT8_C( -37), INT8_C( -80), INT8_C(-128), INT8_C( 69), INT8_C( 112), INT8_C( 50), INT8_C( 44), INT8_C( -11)), simde_mm_set_pi8(INT8_C( 112), INT8_C( 85), INT8_C( 50), INT8_C( 7), INT8_C( 44), INT8_C( -54), INT8_C( -11), INT8_C(-119)) }, { simde_mm_set_pi8(INT8_C(-113), INT8_C( 30), INT8_C( 68), INT8_C( 96), INT8_C( -94), INT8_C( -13), INT8_C( -38), INT8_C( -63)), simde_mm_set_pi8(INT8_C( -9), INT8_C( 29), INT8_C( 5), INT8_C( -22), INT8_C( 66), INT8_C( 94), INT8_C( -79), INT8_C( -1)), simde_mm_set_pi8(INT8_C( 66), INT8_C( -94), INT8_C( 94), INT8_C( -13), INT8_C( -79), INT8_C( -38), INT8_C( -1), INT8_C( -63)) }, { simde_mm_set_pi8(INT8_C( -42), INT8_C( -42), INT8_C( 41), INT8_C( -13), INT8_C( -41), INT8_C( -33), INT8_C( -24), INT8_C( -5)), simde_mm_set_pi8(INT8_C( 85), INT8_C( 79), INT8_C( 19), INT8_C( -95), INT8_C( 42), INT8_C(-124), INT8_C( -96), INT8_C(-122)), simde_mm_set_pi8(INT8_C( 42), INT8_C( -41), INT8_C(-124), INT8_C( -33), INT8_C( -96), INT8_C( -24), INT8_C(-122), INT8_C( -5)) }, { simde_mm_set_pi8(INT8_C( 28), INT8_C( 99), INT8_C( -57), INT8_C( 79), INT8_C( 40), INT8_C( -97), INT8_C( -80), INT8_C( 16)), simde_mm_set_pi8(INT8_C( 60), INT8_C( 0), INT8_C( -13), INT8_C( -90), INT8_C( 17), INT8_C( 14), INT8_C(-115), INT8_C( 116)), simde_mm_set_pi8(INT8_C( 17), INT8_C( 40), INT8_C( 14), INT8_C( -97), INT8_C(-115), INT8_C( -80), INT8_C( 116), INT8_C( 16)) }, { simde_mm_set_pi8(INT8_C( -26), INT8_C( -43), INT8_C( -21), INT8_C( 73), INT8_C( 83), INT8_C( 33), INT8_C( 105), INT8_C( 57)), simde_mm_set_pi8(INT8_C( -29), INT8_C( 84), INT8_C( 15), INT8_C( -83), INT8_C( -51), INT8_C( 60), INT8_C( -18), INT8_C( 19)), simde_mm_set_pi8(INT8_C( -51), INT8_C( 83), INT8_C( 60), INT8_C( 33), INT8_C( -18), INT8_C( 105), INT8_C( 19), INT8_C( 57)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_unpacklo_pi8(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_punpcklbw(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi8(INT8_C( -15), INT8_C( -27), INT8_C( -29), INT8_C( 2), INT8_C( 11), INT8_C( 105), INT8_C( -49), INT8_C( 15)), simde_mm_set_pi8(INT8_C( -90), INT8_C( 43), INT8_C( 55), INT8_C( 50), INT8_C(-102), INT8_C( 25), INT8_C( -40), INT8_C( 47)), simde_mm_set_pi8(INT8_C(-102), INT8_C( 11), INT8_C( 25), INT8_C( 105), INT8_C( -40), INT8_C( -49), INT8_C( 47), INT8_C( 15)) }, { simde_mm_set_pi8(INT8_C( 1), INT8_C( 83), INT8_C(-101), INT8_C( 117), INT8_C( -52), INT8_C( -74), INT8_C( -59), INT8_C( 121)), simde_mm_set_pi8(INT8_C(-102), INT8_C( 12), INT8_C( -28), INT8_C( 82), INT8_C(-122), INT8_C( 94), INT8_C( 127), INT8_C( -48)), simde_mm_set_pi8(INT8_C(-122), INT8_C( -52), INT8_C( 94), INT8_C( -74), INT8_C( 127), INT8_C( -59), INT8_C( -48), INT8_C( 121)) }, { simde_mm_set_pi8(INT8_C( 13), INT8_C( 67), INT8_C( -73), INT8_C( -36), INT8_C( -93), INT8_C( 101), INT8_C(-107), INT8_C( 118)), simde_mm_set_pi8(INT8_C( 46), INT8_C( -72), INT8_C( -50), INT8_C( 34), INT8_C(-111), INT8_C( -17), INT8_C(-128), INT8_C(-126)), simde_mm_set_pi8(INT8_C(-111), INT8_C( -93), INT8_C( -17), INT8_C( 101), INT8_C(-128), INT8_C(-107), INT8_C(-126), INT8_C( 118)) }, { simde_mm_set_pi8(INT8_C( 4), INT8_C( -40), INT8_C( -73), INT8_C( 122), INT8_C( 85), INT8_C( 7), INT8_C( -54), INT8_C(-119)), simde_mm_set_pi8(INT8_C( -37), INT8_C( -80), INT8_C(-128), INT8_C( 69), INT8_C( 112), INT8_C( 50), INT8_C( 44), INT8_C( -11)), simde_mm_set_pi8(INT8_C( 112), INT8_C( 85), INT8_C( 50), INT8_C( 7), INT8_C( 44), INT8_C( -54), INT8_C( -11), INT8_C(-119)) }, { simde_mm_set_pi8(INT8_C(-113), INT8_C( 30), INT8_C( 68), INT8_C( 96), INT8_C( -94), INT8_C( -13), INT8_C( -38), INT8_C( -63)), simde_mm_set_pi8(INT8_C( -9), INT8_C( 29), INT8_C( 5), INT8_C( -22), INT8_C( 66), INT8_C( 94), INT8_C( -79), INT8_C( -1)), simde_mm_set_pi8(INT8_C( 66), INT8_C( -94), INT8_C( 94), INT8_C( -13), INT8_C( -79), INT8_C( -38), INT8_C( -1), INT8_C( -63)) }, { simde_mm_set_pi8(INT8_C( -42), INT8_C( -42), INT8_C( 41), INT8_C( -13), INT8_C( -41), INT8_C( -33), INT8_C( -24), INT8_C( -5)), simde_mm_set_pi8(INT8_C( 85), INT8_C( 79), INT8_C( 19), INT8_C( -95), INT8_C( 42), INT8_C(-124), INT8_C( -96), INT8_C(-122)), simde_mm_set_pi8(INT8_C( 42), INT8_C( -41), INT8_C(-124), INT8_C( -33), INT8_C( -96), INT8_C( -24), INT8_C(-122), INT8_C( -5)) }, { simde_mm_set_pi8(INT8_C( 28), INT8_C( 99), INT8_C( -57), INT8_C( 79), INT8_C( 40), INT8_C( -97), INT8_C( -80), INT8_C( 16)), simde_mm_set_pi8(INT8_C( 60), INT8_C( 0), INT8_C( -13), INT8_C( -90), INT8_C( 17), INT8_C( 14), INT8_C(-115), INT8_C( 116)), simde_mm_set_pi8(INT8_C( 17), INT8_C( 40), INT8_C( 14), INT8_C( -97), INT8_C(-115), INT8_C( -80), INT8_C( 116), INT8_C( 16)) }, { simde_mm_set_pi8(INT8_C( -26), INT8_C( -43), INT8_C( -21), INT8_C( 73), INT8_C( 83), INT8_C( 33), INT8_C( 105), INT8_C( 57)), simde_mm_set_pi8(INT8_C( -29), INT8_C( 84), INT8_C( 15), INT8_C( -83), INT8_C( -51), INT8_C( 60), INT8_C( -18), INT8_C( 19)), simde_mm_set_pi8(INT8_C( -51), INT8_C( 83), INT8_C( 60), INT8_C( 33), INT8_C( -18), INT8_C( 105), INT8_C( 19), INT8_C( 57)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_punpcklbw(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_unpacklo_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( 14920), INT16_C( -14108), INT16_C( -18371), INT16_C( 4165)), simde_mm_set_pi16(INT16_C( 24125), INT16_C( 29535), INT16_C( 14450), INT16_C( 764)), simde_mm_set_pi16(INT16_C( 14450), INT16_C( -18371), INT16_C( 764), INT16_C( 4165)) }, { simde_mm_set_pi16(INT16_C( -29305), INT16_C( -20968), INT16_C( -31863), INT16_C( 1945)), simde_mm_set_pi16(INT16_C( 22380), INT16_C( -9274), INT16_C( -14525), INT16_C( 28073)), simde_mm_set_pi16(INT16_C( -14525), INT16_C( -31863), INT16_C( 28073), INT16_C( 1945)) }, { simde_mm_set_pi16(INT16_C( 29396), INT16_C( -4481), INT16_C( 16009), INT16_C( -7692)), simde_mm_set_pi16(INT16_C( 19262), INT16_C( -10592), INT16_C( 1200), INT16_C( -22541)), simde_mm_set_pi16(INT16_C( 1200), INT16_C( 16009), INT16_C( -22541), INT16_C( -7692)) }, { simde_mm_set_pi16(INT16_C( 10778), INT16_C( -30276), INT16_C( 31580), INT16_C( 4144)), simde_mm_set_pi16(INT16_C( -15899), INT16_C( 20583), INT16_C( -12863), INT16_C( 13808)), simde_mm_set_pi16(INT16_C( -12863), INT16_C( 31580), INT16_C( 13808), INT16_C( 4144)) }, { simde_mm_set_pi16(INT16_C( -30267), INT16_C( -14054), INT16_C( 22036), INT16_C( -6987)), simde_mm_set_pi16(INT16_C( -22296), INT16_C( 22035), INT16_C( -11029), INT16_C( 3882)), simde_mm_set_pi16(INT16_C( -11029), INT16_C( 22036), INT16_C( 3882), INT16_C( -6987)) }, { simde_mm_set_pi16(INT16_C( 1373), INT16_C( 25788), INT16_C( -14639), INT16_C( 18996)), simde_mm_set_pi16(INT16_C( 6580), INT16_C( 13730), INT16_C( -12979), INT16_C( -26646)), simde_mm_set_pi16(INT16_C( -12979), INT16_C( -14639), INT16_C( -26646), INT16_C( 18996)) }, { simde_mm_set_pi16(INT16_C( 27110), INT16_C( 18497), INT16_C( -15879), INT16_C( -18233)), simde_mm_set_pi16(INT16_C( -26068), INT16_C( -29214), INT16_C( 32362), INT16_C( -26103)), simde_mm_set_pi16(INT16_C( 32362), INT16_C( -15879), INT16_C( -26103), INT16_C( -18233)) }, { simde_mm_set_pi16(INT16_C( -3448), INT16_C( 28151), INT16_C( 21394), INT16_C( 2546)), simde_mm_set_pi16(INT16_C( 30183), INT16_C( -1624), INT16_C( 11589), INT16_C( 23080)), simde_mm_set_pi16(INT16_C( 11589), INT16_C( 21394), INT16_C( 23080), INT16_C( 2546)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_unpacklo_pi16(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_punpcklwd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( 14920), INT16_C( -14108), INT16_C( -18371), INT16_C( 4165)), simde_mm_set_pi16(INT16_C( 24125), INT16_C( 29535), INT16_C( 14450), INT16_C( 764)), simde_mm_set_pi16(INT16_C( 14450), INT16_C( -18371), INT16_C( 764), INT16_C( 4165)) }, { simde_mm_set_pi16(INT16_C( -29305), INT16_C( -20968), INT16_C( -31863), INT16_C( 1945)), simde_mm_set_pi16(INT16_C( 22380), INT16_C( -9274), INT16_C( -14525), INT16_C( 28073)), simde_mm_set_pi16(INT16_C( -14525), INT16_C( -31863), INT16_C( 28073), INT16_C( 1945)) }, { simde_mm_set_pi16(INT16_C( 29396), INT16_C( -4481), INT16_C( 16009), INT16_C( -7692)), simde_mm_set_pi16(INT16_C( 19262), INT16_C( -10592), INT16_C( 1200), INT16_C( -22541)), simde_mm_set_pi16(INT16_C( 1200), INT16_C( 16009), INT16_C( -22541), INT16_C( -7692)) }, { simde_mm_set_pi16(INT16_C( 10778), INT16_C( -30276), INT16_C( 31580), INT16_C( 4144)), simde_mm_set_pi16(INT16_C( -15899), INT16_C( 20583), INT16_C( -12863), INT16_C( 13808)), simde_mm_set_pi16(INT16_C( -12863), INT16_C( 31580), INT16_C( 13808), INT16_C( 4144)) }, { simde_mm_set_pi16(INT16_C( -30267), INT16_C( -14054), INT16_C( 22036), INT16_C( -6987)), simde_mm_set_pi16(INT16_C( -22296), INT16_C( 22035), INT16_C( -11029), INT16_C( 3882)), simde_mm_set_pi16(INT16_C( -11029), INT16_C( 22036), INT16_C( 3882), INT16_C( -6987)) }, { simde_mm_set_pi16(INT16_C( 1373), INT16_C( 25788), INT16_C( -14639), INT16_C( 18996)), simde_mm_set_pi16(INT16_C( 6580), INT16_C( 13730), INT16_C( -12979), INT16_C( -26646)), simde_mm_set_pi16(INT16_C( -12979), INT16_C( -14639), INT16_C( -26646), INT16_C( 18996)) }, { simde_mm_set_pi16(INT16_C( 27110), INT16_C( 18497), INT16_C( -15879), INT16_C( -18233)), simde_mm_set_pi16(INT16_C( -26068), INT16_C( -29214), INT16_C( 32362), INT16_C( -26103)), simde_mm_set_pi16(INT16_C( 32362), INT16_C( -15879), INT16_C( -26103), INT16_C( -18233)) }, { simde_mm_set_pi16(INT16_C( -3448), INT16_C( 28151), INT16_C( 21394), INT16_C( 2546)), simde_mm_set_pi16(INT16_C( 30183), INT16_C( -1624), INT16_C( 11589), INT16_C( 23080)), simde_mm_set_pi16(INT16_C( 11589), INT16_C( 21394), INT16_C( 23080), INT16_C( 2546)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_punpcklwd(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_unpacklo_pi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( -996466818), INT32_C( 42237187)), simde_mm_set_pi32(INT32_C( -37002499), INT32_C( -1170856260)), simde_mm_set_pi32(INT32_C( -1170856260), INT32_C( 42237187)) }, { simde_mm_set_pi32(INT32_C( 2063937130), INT32_C( 491318053)), simde_mm_set_pi32(INT32_C( -1702472225), INT32_C( 404431239)), simde_mm_set_pi32(INT32_C( 404431239), INT32_C( 491318053)) }, { simde_mm_set_pi32(INT32_C( 482157619), INT32_C( 2096228641)), simde_mm_set_pi32(INT32_C( 1577000773), INT32_C( -1308575062)), simde_mm_set_pi32(INT32_C( -1308575062), INT32_C( 2096228641)) }, { simde_mm_set_pi32(INT32_C( -296283078), INT32_C( -1136099560)), simde_mm_set_pi32(INT32_C( 813050106), INT32_C( 140703223)), simde_mm_set_pi32(INT32_C( 140703223), INT32_C( -1136099560)) }, { simde_mm_set_pi32(INT32_C( -1874282519), INT32_C( 1046328641)), simde_mm_set_pi32(INT32_C( 1711474246), INT32_C( 663714514)), simde_mm_set_pi32(INT32_C( 663714514), INT32_C( 1046328641)) }, { simde_mm_set_pi32(INT32_C( 414254548), INT32_C( -1137400610)), simde_mm_set_pi32(INT32_C( 1336205549), INT32_C( -1985285725)), simde_mm_set_pi32(INT32_C( -1985285725), INT32_C( -1137400610)) }, { simde_mm_set_pi32(INT32_C( -1928184284), INT32_C( 711404402)), simde_mm_set_pi32(INT32_C( 894723783), INT32_C( -331643442)), simde_mm_set_pi32(INT32_C( -331643442), INT32_C( 711404402)) }, { simde_mm_set_pi32(INT32_C( -1171624194), INT32_C( -943645737)), simde_mm_set_pi32(INT32_C( -1212436628), INT32_C( -1787000320)), simde_mm_set_pi32(INT32_C( -1787000320), INT32_C( -943645737)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_unpacklo_pi32(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_punpckldq(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( -996466818), INT32_C( 42237187)), simde_mm_set_pi32(INT32_C( -37002499), INT32_C( -1170856260)), simde_mm_set_pi32(INT32_C( -1170856260), INT32_C( 42237187)) }, { simde_mm_set_pi32(INT32_C( 2063937130), INT32_C( 491318053)), simde_mm_set_pi32(INT32_C( -1702472225), INT32_C( 404431239)), simde_mm_set_pi32(INT32_C( 404431239), INT32_C( 491318053)) }, { simde_mm_set_pi32(INT32_C( 482157619), INT32_C( 2096228641)), simde_mm_set_pi32(INT32_C( 1577000773), INT32_C( -1308575062)), simde_mm_set_pi32(INT32_C( -1308575062), INT32_C( 2096228641)) }, { simde_mm_set_pi32(INT32_C( -296283078), INT32_C( -1136099560)), simde_mm_set_pi32(INT32_C( 813050106), INT32_C( 140703223)), simde_mm_set_pi32(INT32_C( 140703223), INT32_C( -1136099560)) }, { simde_mm_set_pi32(INT32_C( -1874282519), INT32_C( 1046328641)), simde_mm_set_pi32(INT32_C( 1711474246), INT32_C( 663714514)), simde_mm_set_pi32(INT32_C( 663714514), INT32_C( 1046328641)) }, { simde_mm_set_pi32(INT32_C( 414254548), INT32_C( -1137400610)), simde_mm_set_pi32(INT32_C( 1336205549), INT32_C( -1985285725)), simde_mm_set_pi32(INT32_C( -1985285725), INT32_C( -1137400610)) }, { simde_mm_set_pi32(INT32_C( -1928184284), INT32_C( 711404402)), simde_mm_set_pi32(INT32_C( 894723783), INT32_C( -331643442)), simde_mm_set_pi32(INT32_C( -331643442), INT32_C( 711404402)) }, { simde_mm_set_pi32(INT32_C( -1171624194), INT32_C( -943645737)), simde_mm_set_pi32(INT32_C( -1212436628), INT32_C( -1787000320)), simde_mm_set_pi32(INT32_C( -1787000320), INT32_C( -943645737)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_punpckldq(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_xor_si64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_cvtsi64_m64(INT64_C( 3540462192578516470)), simde_mm_cvtsi64_m64(INT64_C( 7953957601195225655)), simde_mm_cvtsi64_m64(INT64_C( 6863518614534072257)) }, { simde_mm_cvtsi64_m64(INT64_C( 3280097856998777041)), simde_mm_cvtsi64_m64(INT64_C( 7227524436289590224)), simde_mm_cvtsi64_m64(INT64_C( 5316618871007982337)) }, { simde_mm_cvtsi64_m64(INT64_C( -73768962290391525)), simde_mm_cvtsi64_m64(INT64_C( -8786938381172726443)), simde_mm_cvtsi64_m64(INT64_C( 8716556128933069646)) }, { simde_mm_cvtsi64_m64(INT64_C( -3834999859910724293)), simde_mm_cvtsi64_m64(INT64_C( 1473106142712794056)), simde_mm_cvtsi64_m64(INT64_C( -2398499088890937613)) }, { simde_mm_cvtsi64_m64(INT64_C( -2129742113263669437)), simde_mm_cvtsi64_m64(INT64_C( 8747348426473787001)), simde_mm_cvtsi64_m64(INT64_C( -7271780848289947334)) }, { simde_mm_cvtsi64_m64(INT64_C( 3415454954475332549)), simde_mm_cvtsi64_m64(INT64_C( -4751919769270097997)), simde_mm_cvtsi64_m64(INT64_C( -7968019982084324234)) }, { simde_mm_cvtsi64_m64(INT64_C( 2939655727369393330)), simde_mm_cvtsi64_m64(INT64_C( -201574666518844870)), simde_mm_cvtsi64_m64(INT64_C( -3028638143195201912)) }, { simde_mm_cvtsi64_m64(INT64_C( 2745915445215058834)), simde_mm_cvtsi64_m64(INT64_C( 3063327936426889284)), simde_mm_cvtsi64_m64(INT64_C( 907566634544925654)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_xor_si64(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i64x1(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_pxor(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_cvtsi64_m64(INT64_C( 3540462192578516470)), simde_mm_cvtsi64_m64(INT64_C( 7953957601195225655)), simde_mm_cvtsi64_m64(INT64_C( 6863518614534072257)) }, { simde_mm_cvtsi64_m64(INT64_C( 3280097856998777041)), simde_mm_cvtsi64_m64(INT64_C( 7227524436289590224)), simde_mm_cvtsi64_m64(INT64_C( 5316618871007982337)) }, { simde_mm_cvtsi64_m64(INT64_C( -73768962290391525)), simde_mm_cvtsi64_m64(INT64_C( -8786938381172726443)), simde_mm_cvtsi64_m64(INT64_C( 8716556128933069646)) }, { simde_mm_cvtsi64_m64(INT64_C( -3834999859910724293)), simde_mm_cvtsi64_m64(INT64_C( 1473106142712794056)), simde_mm_cvtsi64_m64(INT64_C( -2398499088890937613)) }, { simde_mm_cvtsi64_m64(INT64_C( -2129742113263669437)), simde_mm_cvtsi64_m64(INT64_C( 8747348426473787001)), simde_mm_cvtsi64_m64(INT64_C( -7271780848289947334)) }, { simde_mm_cvtsi64_m64(INT64_C( 3415454954475332549)), simde_mm_cvtsi64_m64(INT64_C( -4751919769270097997)), simde_mm_cvtsi64_m64(INT64_C( -7968019982084324234)) }, { simde_mm_cvtsi64_m64(INT64_C( 2939655727369393330)), simde_mm_cvtsi64_m64(INT64_C( -201574666518844870)), simde_mm_cvtsi64_m64(INT64_C( -3028638143195201912)) }, { simde_mm_cvtsi64_m64(INT64_C( 2745915445215058834)), simde_mm_cvtsi64_m64(INT64_C( 3063327936426889284)), simde_mm_cvtsi64_m64(INT64_C( 907566634544925654)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_pxor(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_i64x1(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_to_int(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; int32_t r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( 187717888), INT32_C( 752961943)), INT32_C( 752961943) }, { simde_mm_set_pi32(INT32_C( 1573710578), INT32_C( 101880394)), INT32_C( 101880394) }, { simde_mm_set_pi32(INT32_C( 1011596849), INT32_C( 885891666)), INT32_C( 885891666) }, { simde_mm_set_pi32(INT32_C( -1107434699), INT32_C( -838173825)), INT32_C( -838173825) }, { simde_mm_set_pi32(INT32_C( 1945069486), INT32_C( 466583902)), INT32_C( 466583902) }, { simde_mm_set_pi32(INT32_C( 458761181), INT32_C( 257379889)), INT32_C( 257379889) }, { simde_mm_set_pi32(INT32_C( 848486959), INT32_C( -1415343346)), INT32_C( -1415343346) }, { simde_mm_set_pi32(INT32_C( -1452285617), INT32_C( -1697816479)), INT32_C( -1697816479) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int32_t r = simde_m_to_int(test_vec[i].a); simde_mm_empty(); simde_assert_equal_i32(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_to_int64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; int64_t r; } test_vec[8] = { { simde_mm_cvtsi64_m64(INT64_C( -2003895301208818234)), INT64_C( -2003895301208818234) }, { simde_mm_cvtsi64_m64(INT64_C( -372926738147273591)), INT64_C( -372926738147273591) }, { simde_mm_cvtsi64_m64(INT64_C( -3656592147926155100)), INT64_C( -3656592147926155100) }, { simde_mm_cvtsi64_m64(INT64_C( 5100863564862776395)), INT64_C( 5100863564862776395) }, { simde_mm_cvtsi64_m64(INT64_C( -214027610699488575)), INT64_C( -214027610699488575) }, { simde_mm_cvtsi64_m64(INT64_C( -7630939822071486777)), INT64_C( -7630939822071486777) }, { simde_mm_cvtsi64_m64(INT64_C( 9123236376678660233)), INT64_C( 9123236376678660233) }, { simde_mm_cvtsi64_m64(INT64_C( 3260252501062812952)), INT64_C( 3260252501062812952) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int64_t r = simde_m_to_int64(test_vec[i].a); simde_mm_empty(); simde_assert_equal_i64(r, test_vec[i].r); } simde_mm_empty(); return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm_set1_pi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_set1_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_set1_pi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_setr_pi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_setr_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_setr_pi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_setzero_si64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_add_pi8) SIMDE_TEST_FUNC_LIST_ENTRY(m_paddb) SIMDE_TEST_FUNC_LIST_ENTRY(mm_add_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(m_paddw) SIMDE_TEST_FUNC_LIST_ENTRY(mm_add_pi32) SIMDE_TEST_FUNC_LIST_ENTRY(m_paddd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_adds_pi8) SIMDE_TEST_FUNC_LIST_ENTRY(m_paddsb) SIMDE_TEST_FUNC_LIST_ENTRY(mm_adds_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(m_paddsw) SIMDE_TEST_FUNC_LIST_ENTRY(mm_adds_pu8) SIMDE_TEST_FUNC_LIST_ENTRY(m_paddusb) SIMDE_TEST_FUNC_LIST_ENTRY(mm_adds_pu16) SIMDE_TEST_FUNC_LIST_ENTRY(m_paddusw) SIMDE_TEST_FUNC_LIST_ENTRY(mm_and_si64) SIMDE_TEST_FUNC_LIST_ENTRY(m_pand) SIMDE_TEST_FUNC_LIST_ENTRY(mm_andnot_si64) SIMDE_TEST_FUNC_LIST_ENTRY(m_pandn) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpeq_pi8) SIMDE_TEST_FUNC_LIST_ENTRY(m_pcmpeqb) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpeq_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(m_pcmpeqw) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpeq_pi32) SIMDE_TEST_FUNC_LIST_ENTRY(m_pcmpeqd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpgt_pi8) SIMDE_TEST_FUNC_LIST_ENTRY(m_pcmpgtb) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpgt_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(m_pcmpgtw) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpgt_pi32) SIMDE_TEST_FUNC_LIST_ENTRY(m_pcmpgtd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtm64_si64) SIMDE_TEST_FUNC_LIST_ENTRY(m_to_int64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsi32_si64) SIMDE_TEST_FUNC_LIST_ENTRY(m_from_int) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsi64_m64) SIMDE_TEST_FUNC_LIST_ENTRY(m_from_int64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsi64_si32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_madd_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(m_pmaddwd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mulhi_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(m_pmulhw) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mullo_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(m_pmullw) SIMDE_TEST_FUNC_LIST_ENTRY(mm_or_si64) SIMDE_TEST_FUNC_LIST_ENTRY(m_por) SIMDE_TEST_FUNC_LIST_ENTRY(mm_packs_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(m_packsswb) SIMDE_TEST_FUNC_LIST_ENTRY(mm_packs_pi32) SIMDE_TEST_FUNC_LIST_ENTRY(m_packssdw) SIMDE_TEST_FUNC_LIST_ENTRY(mm_packs_pu16) SIMDE_TEST_FUNC_LIST_ENTRY(m_packuswb) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sll_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(m_psllw) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sll_pi32) SIMDE_TEST_FUNC_LIST_ENTRY(m_pslld) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sll_si64) SIMDE_TEST_FUNC_LIST_ENTRY(m_psllq) SIMDE_TEST_FUNC_LIST_ENTRY(mm_slli_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(m_psllwi) SIMDE_TEST_FUNC_LIST_ENTRY(mm_slli_pi32) SIMDE_TEST_FUNC_LIST_ENTRY(m_pslldi) SIMDE_TEST_FUNC_LIST_ENTRY(mm_slli_si64) SIMDE_TEST_FUNC_LIST_ENTRY(m_psllqi) SIMDE_TEST_FUNC_LIST_ENTRY(mm_srl_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(m_psrlw) SIMDE_TEST_FUNC_LIST_ENTRY(mm_srl_pi32) SIMDE_TEST_FUNC_LIST_ENTRY(m_psrld) SIMDE_TEST_FUNC_LIST_ENTRY(mm_srl_si64) SIMDE_TEST_FUNC_LIST_ENTRY(m_psrlq) SIMDE_TEST_FUNC_LIST_ENTRY(mm_srli_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(m_psrlwi) SIMDE_TEST_FUNC_LIST_ENTRY(mm_srli_pi32) SIMDE_TEST_FUNC_LIST_ENTRY(m_psrldi) SIMDE_TEST_FUNC_LIST_ENTRY(mm_srli_si64) SIMDE_TEST_FUNC_LIST_ENTRY(m_psrlqi) SIMDE_TEST_FUNC_LIST_ENTRY(mm_srai_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(m_psrawi) SIMDE_TEST_FUNC_LIST_ENTRY(mm_srai_pi32) SIMDE_TEST_FUNC_LIST_ENTRY(m_psradi) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sra_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(m_psraw) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sra_pi32) SIMDE_TEST_FUNC_LIST_ENTRY(m_psrad) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sub_pi8) SIMDE_TEST_FUNC_LIST_ENTRY(m_psubb) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sub_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(m_psubw) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sub_pi32) SIMDE_TEST_FUNC_LIST_ENTRY(m_psubd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_subs_pi8) SIMDE_TEST_FUNC_LIST_ENTRY(m_psubsb) SIMDE_TEST_FUNC_LIST_ENTRY(mm_subs_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(m_psubsw) SIMDE_TEST_FUNC_LIST_ENTRY(mm_subs_pu8) SIMDE_TEST_FUNC_LIST_ENTRY(m_psubusb) SIMDE_TEST_FUNC_LIST_ENTRY(mm_subs_pu16) SIMDE_TEST_FUNC_LIST_ENTRY(m_psubusw) SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpackhi_pi8) SIMDE_TEST_FUNC_LIST_ENTRY(m_punpckhbw) SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpackhi_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(m_punpckhwd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpackhi_pi32) SIMDE_TEST_FUNC_LIST_ENTRY(m_punpckhdq) SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpacklo_pi8) SIMDE_TEST_FUNC_LIST_ENTRY(m_punpcklbw) SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpacklo_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(m_punpcklwd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpacklo_pi32) SIMDE_TEST_FUNC_LIST_ENTRY(m_punpckldq) SIMDE_TEST_FUNC_LIST_ENTRY(mm_xor_si64) SIMDE_TEST_FUNC_LIST_ENTRY(m_pxor) SIMDE_TEST_FUNC_LIST_ENTRY(m_to_int) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/run-tests.c000066400000000000000000000022141400333146700163350ustar00rootroot00000000000000#include "test-x86.h" #include "run-tests.h" static MunitSuite suites[] = { #define SIMDE_TEST_DECLARE_SUITE(name) \ { NULL, NULL, NULL, 1, MUNIT_SUITE_OPTION_NONE }, \ { NULL, NULL, NULL, 1, MUNIT_SUITE_OPTION_NONE }, \ { NULL, NULL, NULL, 1, MUNIT_SUITE_OPTION_NONE }, \ { NULL, NULL, NULL, 1, MUNIT_SUITE_OPTION_NONE }, #include "declare-suites.h" #undef SIMDE_TEST_DECLARE_SUITE { NULL, NULL, NULL, 0, MUNIT_SUITE_OPTION_NONE }, { NULL, NULL, NULL, 0, MUNIT_SUITE_OPTION_NONE } }; static MunitSuite suite = { "/x86", NULL, suites, 1, MUNIT_SUITE_OPTION_NONE }; MunitSuite* simde_tests_x86_get_suite(void) { int i = 0; #define SIMDE_TEST_DECLARE_SUITE(name) \ suites[i++] = *HEDLEY_CONCAT3(simde_test_x86_get_suite_, name, _native_c)(); \ suites[i++] = *HEDLEY_CONCAT3(simde_test_x86_get_suite_, name, _native_cpp)(); \ suites[i++] = *HEDLEY_CONCAT3(simde_test_x86_get_suite_, name, _emul_c)(); \ suites[i++] = *HEDLEY_CONCAT3(simde_test_x86_get_suite_, name, _emul_cpp)(); #include "declare-suites.h" #undef SIMDE_TEST_DECLARE_SUITE suites[i++] = *simde_tests_x86_avx512_get_suite(); return &suite; } simde-0.7.2/test/x86/run-tests.h000066400000000000000000000003351400333146700163440ustar00rootroot00000000000000#if defined(SIMDE_TESTS_X86_RUN_TESTS_H) #error File already included. #endif #define SIMDE_TESTS_X86_RUN_TESTS_H #include "../munit/munit.h" #include "avx512/run-tests.h" MunitSuite* simde_tests_x86_get_suite(void); simde-0.7.2/test/x86/skel.c000066400000000000000000002027171400333146700153410ustar00rootroot00000000000000/* These are just some skeletons I've been using to speed up the process of creating new tests for SSE functions. */ static int test_simde_mm_xxx_ps (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const simde_float32 a[4]; const simde_float32 b[4]; const simde_float32 r[4]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 b = simde_mm_loadu_ps(test_vec[i].b); simde__m128 r = simde_mm_xxx_ps(a, b); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128 a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m128 b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m128 r = simde_mm_xxx_ps(a, b); simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_xxx_ps (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const simde_float32 a[4]; const simde_float32 b[4]; const simde_float32 r[4]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) #endif }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 b = simde_mm_loadu_ps(test_vec[i].b); simde__m128 r = simde_mm_xxx_ps(a, b); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); simde_float32 values[8 * 2 * sizeof(simde__m128)]; simde_test_x86_random_f32x4_full(8, 2, values, -1000.0f, 1000.0f, SIMDE_TEST_VEC_FLOAT_NAN); for (size_t i = 0 ; i < 8 ; i++) { simde__m128 a = simde_test_x86_random_extract_f32x4(i, 2, 0, values); simde__m128 b = simde_test_x86_random_extract_f32x4(i, 2, 1, values); simde__m128 r = simde_mm_xxx_ps(a, b); simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_xxx_pd (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const simde_float64 a[2]; const simde_float64 b[2]; const simde_float64 r[2]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d b = simde_mm_loadu_pd(test_vec[i].b); simde__m128d r = simde_mm_xxx_pd(a, b); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128d a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m128d b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m128d r = simde_mm_xxx_pd(a, b); simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_xxx_pd (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const simde_float64 a[2]; const simde_float64 b[2]; const simde_float64 r[2]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) #endif }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d b = simde_mm_loadu_pd(test_vec[i].b); simde__m128d r = simde_mm_xxx_pd(a, b); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); simde_float64 values[8 * 2 * sizeof(simde__m128d)]; simde_test_x86_random_f64x2_full(8, 2, values, -1000.0, 1000.0, SIMDE_TEST_VEC_FLOAT_NAN); for (size_t i = 0 ; i < 8 ; i++) { simde__m128d a = simde_test_x86_random_extract_f64x2(i, 2, 0, values); simde__m128d b = simde_test_x86_random_extract_f64x2(i, 2, 1, values); simde__m128d r = simde_mm_xxx_pd(a, b); simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_xxx_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const int8_t a[16]; const int8_t b[16]; const int8_t r[16]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_xxx_epi8(a, b); simde_test_x86_assert_equal_i8x16(r, simde_x_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i8x16(); simde__m128i b = simde_test_x86_random_i8x16(); simde__m128i r = simde_mm_xxx_epi8(a, b); simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_mask_xxx_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const int8_t src[16]; const simde__mmask16 k; const int8_t a[16]; const int8_t b[16]; const int8_t r[16]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i src = simde_mm_loadu_epi8(test_vec[i].src); simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_mask_xxx_epi8(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i8x16(r, simde_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i src = simde_test_x86_random_i8x16(); simde__mmask16 k = simde_test_x86_random_mmask16(); simde__m128i a = simde_test_x86_random_i8x16(); simde__m128i b = simde_test_x86_random_i8x16(); simde__m128i r = simde_mm_mask_xxx_epi8(src, k, a, b); simde_test_x86_write_i8x16(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask16(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_maskz_xxx_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const simde__mmask16 k; const int8_t a[16]; const int8_t b[16]; const int8_t r[16]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_maskz_xxx_epi8(test_vec[i].k, a, b); simde_test_x86_assert_equal_i8x16(r, simde_x_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask16 k = simde_test_x86_random_mmask16(); simde__m128i a = simde_test_x86_random_i8x16(); simde__m128i b = simde_test_x86_random_i8x16(); simde__m128i r = simde_mm_maskz_xxx_epi8(k, a, b); simde_test_x86_write_mmask16(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_xxx_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const int16_t a[8]; const int16_t b[8]; const int16_t r[8]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_xxx_epi16(a, b); simde_test_x86_assert_equal_i16x8(r, simde_x_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i b = simde_test_x86_random_i16x8(); simde__m128i r = simde_mm_xxx_epi16(a, b); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_mask_xxx_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const int16_t src[8]; const simde__mmask8 k; const int16_t a[8]; const int16_t b[8]; const int16_t r[8]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i src = simde_mm_loadu_epi16(test_vec[i].src); simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_mask_xxx_epi16(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i src = simde_test_x86_random_i16x8(); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i b = simde_test_x86_random_i16x8(); simde__m128i r = simde_mm_mask_xxx_epi16(src, k, a, b); simde_test_x86_write_i16x8(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_maskz_xxx_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const simde__mmask8 k; const int16_t a[8]; const int16_t b[8]; const int16_t r[8]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_maskz_xxx_epi16(test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x8(r, simde_x_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i b = simde_test_x86_random_i16x8(); simde__m128i r = simde_mm_maskz_xxx_epi16(k, a, b); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_xxx_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const int32_t a[4]; const int32_t b[4]; const int32_t r[4]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_xxx_epi32(a, b); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i32x4(); simde__m128i b = simde_test_x86_random_i32x4(); simde__m128i r = simde_mm_xxx_epi32(a, b); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_mask_xxx_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const int32_t src[4]; const simde__mmask8 k; const int32_t a[4]; const int32_t b[4]; const int32_t r[4]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i src = simde_x_mm_loadu_epi32(test_vec[i].src); simde__m128i a = simde_x_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_mask_xxx_epi32(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i src = simde_test_x86_random_i32x4(); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128i a = simde_test_x86_random_i32x4(); simde__m128i b = simde_test_x86_random_i32x4(); simde__m128i r = simde_mm_mask_xxx_epi32(src, k, a, b); simde_test_x86_write_i32x4(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_maskz_xxx_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const simde__mmask8 k; const int32_t a[4]; const int32_t b[4]; const int32_t r[4]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_maskz_xxx_epi32(test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128i a = simde_test_x86_random_i32x4(); simde__m128i b = simde_test_x86_random_i32x4(); simde__m128i r = simde_mm_maskz_xxx_epi32(k, a, b); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_xxx_epi64 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const int64_t a[2]; const int64_t b[2]; const int64_t r[2]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_xxx_epi64(a, b); simde_test_x86_assert_equal_i64x2(r, simde_x_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i64x2(); simde__m128i b = simde_test_x86_random_i64x2(); simde__m128i r = simde_mm_xxx_epi64(a, b); simde_test_x86_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_mask_xxx_epi64 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const int64_t src[2]; const simde__mmask8 k; const int64_t a[2]; const int64_t b[2]; const int64_t r[2]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i src = simde_x_mm_loadu_epi64(test_vec[i].src); simde__m128i a = simde_x_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_mask_xxx_epi64(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i64x2(r, simde_x_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i src = simde_test_x86_random_i64x2(); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128i a = simde_test_x86_random_i64x2(); simde__m128i b = simde_test_x86_random_i64x2(); simde__m128i r = simde_mm_mask_xxx_epi64(src, k, a, b); simde_test_x86_write_i64x2(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_maskz_xxx_epi64 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const simde__mmask8 k; const int64_t a[2]; const int64_t b[2]; const int64_t r[2]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_maskz_xxx_epi64(test_vec[i].k, a, b); simde_test_x86_assert_equal_i64x2(r, simde_x_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m128i a = simde_test_x86_random_i64x2(); simde__m128i b = simde_test_x86_random_i64x2(); simde__m128i r = simde_mm_maskz_xxx_epi64(k, a, b); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_xxx_ps (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const simde_float32 a[8]; const simde_float32 b[8]; const simde_float32 r[8]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 b = simde_mm256_loadu_ps(test_vec[i].b); simde__m256 r = simde_mm256_xxx_ps(a, b); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256 a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m256 b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m256 r = simde_mm256_xxx_ps(a, b); simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_xxx_pd (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const simde_float64 a[4]; const simde_float64 b[4]; const simde_float64 r[4]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d b = simde_mm256_loadu_pd(test_vec[i].b); simde__m256d r = simde_mm256_xxx_pd(a, b); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256d a = simde_test_x86_random_f64x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256d b = simde_test_x86_random_f64x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256d r = simde_mm256_xxx_pd(a, b); simde_test_x86_write_f64x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f64x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_xxx_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const int8_t a[32]; const int8_t b[32]; const int8_t r[32]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi8(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi8(test_vec[i].b); simde__m256i r = simde_mm256_xxx_epi8(a, b); simde_test_x86_assert_equal_i8x32(r, simde_x_mm256_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i a = simde_test_x86_random_i8x32(); simde__m256i b = simde_test_x86_random_i8x32(); simde__m256i r = simde_mm256_xxx_epi8(a, b); simde_test_x86_write_i8x32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask_xxx_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const int8_t src[32]; const simde__mmask32 k; const int8_t a[32]; const int8_t b[32]; const int8_t r[32]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i src = simde_x_mm256_loadu_epi8(test_vec[i].src); simde__m256i a = simde_x_mm256_loadu_epi8(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi8(test_vec[i].b); simde__m256i r = simde_mm256_mask_xxx_epi8(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i8x32(r, simde_x_mm256_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i src = simde_test_x86_random_i8x32(); simde__mmask32 k = simde_test_x86_random_mmask32(); simde__m256i a = simde_test_x86_random_i8x32(); simde__m256i b = simde_test_x86_random_i8x32(); simde__m256i r = simde_mm256_mask_xxx_epi8(src, k, a, b); simde_test_x86_write_i8x32(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask32(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x32(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_maskz_xxx_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const simde__mmask32 k; const int8_t a[32]; const int8_t b[32]; const int8_t r[32]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi8(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi8(test_vec[i].b); simde__m256i r = simde_mm256_maskz_xxx_epi8(test_vec[i].k, a, b); simde_test_x86_assert_equal_i8x32(r, simde_x_mm256_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask32 k = simde_test_x86_random_mmask32(); simde__m256i a = simde_test_x86_random_i8x32(); simde__m256i b = simde_test_x86_random_i8x32(); simde__m256i r = simde_mm256_maskz_xxx_epi8(k, a, b); simde_test_x86_write_mmask32(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x32(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_xxx_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const int16_t a[16]; const int16_t b[16]; const int16_t r[16]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi16(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi16(test_vec[i].b); simde__m256i r = simde_mm256_xxx_epi16(a, b); simde_test_x86_assert_equal_i16x16(r, simde_x_mm256_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i a = simde_test_x86_random_i16x16(); simde__m256i b = simde_test_x86_random_i16x16(); simde__m256i r = simde_mm256_xxx_epi16(a, b); simde_test_x86_write_i16x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_mask_xxx_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const int16_t src[16]; const simde__mmask16 k; const int16_t a[16]; const int16_t b[16]; const int16_t r[16]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i src = simde_x_mm256_loadu_epi16(test_vec[i].src); simde__m256i a = simde_x_mm256_loadu_epi16(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi16(test_vec[i].b); simde__m256i r = simde_mm256_mask_xxx_epi16(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x16(r, simde_x_mm256_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i src = simde_test_x86_random_i16x16(); simde__mmask16 k = simde_test_x86_random_mmask16(); simde__m256i a = simde_test_x86_random_i16x16(); simde__m256i b = simde_test_x86_random_i16x16(); simde__m256i r = simde_mm256_mask_xxx_epi16(src, k, a, b); simde_test_x86_write_i16x16(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask16(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x16(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_maskz_xxx_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const simde__mmask16 k; const int16_t a[16]; const int16_t b[16]; const int16_t r[16]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi16(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi16(test_vec[i].b); simde__m256i r = simde_mm256_maskz_xxx_epi16(test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x16(r, simde_x_mm256_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask16 k = simde_test_x86_random_mmask16(); simde__m256i a = simde_test_x86_random_i16x16(); simde__m256i b = simde_test_x86_random_i16x16(); simde__m256i r = simde_mm256_maskz_xxx_epi16(k, a, b); simde_test_x86_write_mmask16(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x16(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_xxx_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const int32_t a[8]; const int32_t b[8]; const int32_t r[8]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi32(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi32(test_vec[i].b); simde__m256i r = simde_mm256_xxx_epi32(a, b); simde_test_x86_assert_equal_i32x8(r, simde_x_mm256_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i a = simde_test_x86_random_i32x8(); simde__m256i b = simde_test_x86_random_i32x8(); simde__m256i r = simde_mm256_xxx_epi32(a, b); simde_test_x86_write_i32x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_xxx_epi64 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const int64_t a[4]; const int64_t b[4]; const int64_t r[4]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_x_mm256_loadu_epi64(test_vec[i].a); simde__m256i b = simde_x_mm256_loadu_epi64(test_vec[i].b); simde__m256i r = simde_mm256_xxx_epi64(a, b); simde_test_x86_assert_equal_i64x4(r, simde_x_mm256_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i a = simde_test_x86_random_i64x4(); simde__m256i b = simde_test_x86_random_i64x4(); simde__m256i r = simde_mm256_xxx_epi64(a, b); simde_test_x86_write_i64x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_xxx_ps (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const simde_float32 a[16]; const simde_float32 b[16]; const simde_float32 r[16]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 r = simde_mm512_xxx_ps(a, b); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512 a = simde_test_x86_random_f32x16(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m512 b = simde_test_x86_random_f32x16(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m512 r = simde_mm512_xxx_ps(a, b); simde_test_x86_write_f32x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f32x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_mask_xxx_ps (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const simde_float32 src[16]; const simde__mmask8 k; const simde_float32 a[16]; const simde_float32 b[16]; const simde_float32 r[16]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 r = simde_mm512_mask_xxx_ps(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512 src = simde_test_x86_random_f32x16(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m512 a = simde_test_x86_random_f32x16(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m512 b = simde_test_x86_random_f32x16(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m512 r = simde_mm512_mask_xxx_ps(src, k, a, b); simde_test_x86_write_f32x16(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x16(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_maskz_xxx_ps (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const simde__mmask16 k; const simde_float32 a[16]; const simde_float32 b[16]; const simde_float32 r[16]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 r = simde_mm512_maskz_xxx_ps(test_vec[i].k, a, b); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask16 k = simde_test_x86_random_mmask16(); simde__m512 a = simde_test_x86_random_f32x16(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m512 b = simde_test_x86_random_f32x16(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m512 r = simde_mm512_maskz_xxx_ps(k, a, b); simde_test_x86_write_mmask16(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f32x16(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_xxx_pd (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const simde_float64 a[8]; const simde_float64 b[8]; const simde_float64 r[8]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__m512d r = simde_mm512_xxx_pd(a, b); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512d a = simde_test_x86_random_f64x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m512d b = simde_test_x86_random_f64x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m512d r = simde_mm512_xxx_pd(a, b); simde_test_x86_write_f64x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f64x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_mask_xxx_pd (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 b[8]; const simde_float64 r[8]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__m512d r = simde_mm512_mask_xxx_pd(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512d src = simde_test_x86_random_f64x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m512d a = simde_test_x86_random_f64x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m512d b = simde_test_x86_random_f64x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m512d r = simde_mm512_mask_xxx_pd(src, k, a, b); simde_test_x86_write_f64x8(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_maskz_xxx_pd (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 b[8]; const simde_float64 r[8]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__m512d r = simde_mm512_maskz_xxx_pd(test_vec[i].k, a, b); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m512d a = simde_test_x86_random_f64x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m512d b = simde_test_x86_random_f64x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m512d r = simde_mm512_maskz_xxx_pd(k, a, b); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f64x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_xxx_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const int8_t a[64]; const int8_t b[64]; const int8_t r[64]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_xxx_epi8(a, b); simde_test_x86_assert_equal_i8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i a = simde_test_x86_random_i8x64(); simde__m512i b = simde_test_x86_random_i8x64(); simde__m512i r = simde_mm512_xxx_epi8(a, b); simde_test_x86_write_i8x64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_mask_xxx_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const int8_t src[64]; const simde__mmask64 k; const int8_t a[64]; const int8_t b[64]; const int8_t r[64]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi8(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_mask_xxx_epi8(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i src = simde_test_x86_random_i8x64(); simde__mmask64 k = simde_test_x86_random_mmask64(); simde__m512i a = simde_test_x86_random_i8x64(); simde__m512i b = simde_test_x86_random_i8x64(); simde__m512i r = simde_mm512_mask_xxx_epi8(src, k, a, b); simde_test_x86_write_i8x64(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask64(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x64(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_maskz_xxx_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const simde__mmask64 k; const int8_t a[64]; const int8_t b[64]; const int8_t r[64]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_maskz_xxx_epi8(test_vec[i].k, a, b); simde_test_x86_assert_equal_i8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask64 k = simde_test_x86_random_mmask64(); simde__m512i a = simde_test_x86_random_i8x64(); simde__m512i b = simde_test_x86_random_i8x64(); simde__m512i r = simde_mm512_maskz_xxx_epi8(k, a, b); simde_test_x86_write_mmask64(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x64(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_xxx_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const int16_t a[32]; const int16_t b[32]; const int16_t r[32]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_xxx_epi16(a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i a = simde_test_x86_random_i16x32(); simde__m512i b = simde_test_x86_random_i16x32(); simde__m512i r = simde_mm512_xxx_epi16(a, b); simde_test_x86_write_i16x32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_mask_xxx_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const int16_t src[32]; const simde__mmask32 k; const int16_t a[32]; const int16_t b[32]; const int16_t r[32]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi16(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_mask_xxx_epi16(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i src = simde_test_x86_random_i16x32(); simde__mmask32 k = simde_test_x86_random_mmask32(); simde__m512i a = simde_test_x86_random_i16x32(); simde__m512i b = simde_test_x86_random_i16x32(); simde__m512i r = simde_mm512_mask_xxx_epi16(src, k, a, b); simde_test_x86_write_i16x32(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask32(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x32(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_maskz_xxx_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const simde__mmask32 k; const int16_t a[32]; const int16_t b[32]; const int16_t r[32]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_maskz_xxx_epi16(test_vec[i].k, a, b); simde_test_x86_assert_equal_i16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask32 k = simde_test_x86_random_mmask32(); simde__m512i a = simde_test_x86_random_i16x32(); simde__m512i b = simde_test_x86_random_i16x32(); simde__m512i r = simde_mm512_maskz_xxx_epi16(k, a, b); simde_test_x86_write_mmask32(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x32(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_xxx_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const int32_t a[16]; const int32_t b[16]; const int32_t r[16]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_xxx_epi32(a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i a = simde_test_x86_random_i32x16(); simde__m512i b = simde_test_x86_random_i32x16(); simde__m512i r = simde_mm512_xxx_epi32(a, b); simde_test_x86_write_i32x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_mask_xxx_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const int32_t src[16]; const simde__mmask16 k; const int32_t a[16]; const int32_t b[16]; const int32_t r[16]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi32(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_mask_xxx_epi32(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i src = simde_test_x86_random_i32x16(); simde__mmask16 k = simde_test_x86_random_mmask16(); simde__m512i a = simde_test_x86_random_i32x16(); simde__m512i b = simde_test_x86_random_i32x16(); simde__m512i r = simde_mm512_mask_xxx_epi32(src, k, a, b); simde_test_x86_write_i32x16(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask16(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x16(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_maskz_xxx_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const simde__mmask16 k; const int32_t a[16]; const int32_t b[16]; const int32_t r[16]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_maskz_xxx_epi32(test_vec[i].k, a, b); simde_test_x86_assert_equal_i32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask16 k = simde_test_x86_random_mmask16(); simde__m512i a = simde_test_x86_random_i32x16(); simde__m512i b = simde_test_x86_random_i32x16(); simde__m512i r = simde_mm512_maskz_xxx_epi32(k, a, b); simde_test_x86_write_mmask16(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x16(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_xxx_epi64 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const int64_t a[8]; const int64_t b[8]; const int64_t r[8]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_xxx_epi64(a, b); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i a = simde_test_x86_random_i64x8(); simde__m512i b = simde_test_x86_random_i64x8(); simde__m512i r = simde_mm512_xxx_epi64(a, b); simde_test_x86_write_i64x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_mask_xxx_epi64 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const int64_t src[8]; const simde__mmask8 k; const int64_t a[8]; const int64_t b[8]; const int64_t r[8]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi64(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_mask_xxx_epi64(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i src = simde_test_x86_random_i64x8(); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m512i a = simde_test_x86_random_i64x8(); simde__m512i b = simde_test_x86_random_i64x8(); simde__m512i r = simde_mm512_mask_xxx_epi64(src, k, a, b); simde_test_x86_write_i64x8(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_maskz_xxx_epi64 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const simde__mmask8 k; const int64_t a[8]; const int64_t b[8]; const int64_t r[8]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_maskz_xxx_epi64(test_vec[i].k, a, b); simde_test_x86_assert_equal_i64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m512i a = simde_test_x86_random_i64x8(); simde__m512i b = simde_test_x86_random_i64x8(); simde__m512i r = simde_mm512_maskz_xxx_epi64(k, a, b); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_xxx_epu8 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const uint8_t a[64]; const uint8_t b[64]; const uint8_t r[64]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_xxx_epu8(a, b); simde_test_x86_assert_equal_u8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i a = simde_test_x86_random_u8x64(); simde__m512i b = simde_test_x86_random_u8x64(); simde__m512i r = simde_mm512_xxx_epu8(a, b); simde_test_x86_write_u8x64(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u8x64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u8x64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_mask_xxx_epu8 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const uint8_t src[64]; const simde__mmask64 k; const uint8_t a[64]; const uint8_t b[64]; const uint8_t r[64]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi8(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_mask_xxx_epu8(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_u8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i src = simde_test_x86_random_u8x64(); simde__mmask64 k = simde_test_x86_random_mmask64(); simde__m512i a = simde_test_x86_random_u8x64(); simde__m512i b = simde_test_x86_random_u8x64(); simde__m512i r = simde_mm512_mask_xxx_epu8(src, k, a, b); simde_test_x86_write_u8x64(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask64(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u8x64(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u8x64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u8x64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_maskz_xxx_epu8 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const simde__mmask64 k; const uint8_t a[64]; const uint8_t b[64]; const uint8_t r[64]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi8(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi8(test_vec[i].b); simde__m512i r = simde_mm512_maskz_xxx_epu8(test_vec[i].k, a, b); simde_test_x86_assert_equal_u8x64(r, simde_mm512_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask64 k = simde_test_x86_random_mmask64(); simde__m512i a = simde_test_x86_random_u8x64(); simde__m512i b = simde_test_x86_random_u8x64(); simde__m512i r = simde_mm512_maskz_xxx_epu8(k, a, b); simde_test_x86_write_mmask64(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u8x64(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u8x64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u8x64(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_xxx_epu16 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const uint16_t a[32]; const uint16_t b[32]; const uint16_t r[32]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_xxx_epu16(a, b); simde_test_x86_assert_equal_u16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i a = simde_test_x86_random_u16x32(); simde__m512i b = simde_test_x86_random_u16x32(); simde__m512i r = simde_mm512_xxx_epu16(a, b); simde_test_x86_write_u16x32(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u16x32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u16x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_mask_xxx_epu16 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const uint16_t src[32]; const simde__mmask32 k; const uint16_t a[32]; const uint16_t b[32]; const uint16_t r[32]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi16(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_mask_xxx_epu16(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_u16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i src = simde_test_x86_random_u16x32(); simde__mmask32 k = simde_test_x86_random_mmask32(); simde__m512i a = simde_test_x86_random_u16x32(); simde__m512i b = simde_test_x86_random_u16x32(); simde__m512i r = simde_mm512_mask_xxx_epu16(src, k, a, b); simde_test_x86_write_u16x32(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask32(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u16x32(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u16x32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u16x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_maskz_xxx_epu16 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const simde__mmask32 k; const uint16_t a[32]; const uint16_t b[32]; const uint16_t r[32]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi16(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi16(test_vec[i].b); simde__m512i r = simde_mm512_maskz_xxx_epu16(test_vec[i].k, a, b); simde_test_x86_assert_equal_u16x32(r, simde_mm512_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask32 k = simde_test_x86_random_mmask32(); simde__m512i a = simde_test_x86_random_u16x32(); simde__m512i b = simde_test_x86_random_u16x32(); simde__m512i r = simde_mm512_maskz_xxx_epu16(k, a, b); simde_test_x86_write_mmask32(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u16x32(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u16x32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u16x32(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_xxx_epu32 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const uint32_t a[16]; const uint32_t b[16]; const uint32_t r[16]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_xxx_epu32(a, b); simde_test_x86_assert_equal_u32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i a = simde_test_x86_random_u32x16(); simde__m512i b = simde_test_x86_random_u32x16(); simde__m512i r = simde_mm512_xxx_epu32(a, b); simde_test_x86_write_u32x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u32x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u32x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_mask_xxx_epu32 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const uint32_t src[16]; const simde__mmask16 k; const uint32_t a[16]; const uint32_t b[16]; const uint32_t r[16]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi32(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_mask_xxx_epu32(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_u32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i src = simde_test_x86_random_u32x16(); simde__mmask16 k = simde_test_x86_random_mmask16(); simde__m512i a = simde_test_x86_random_u32x16(); simde__m512i b = simde_test_x86_random_u32x16(); simde__m512i r = simde_mm512_mask_xxx_epu32(src, k, a, b); simde_test_x86_write_u32x16(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask16(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u32x16(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u32x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u32x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_maskz_xxx_epu32 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const simde__mmask16 k; const uint32_t a[16]; const uint32_t b[16]; const uint32_t r[16]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi32(test_vec[i].b); simde__m512i r = simde_mm512_maskz_xxx_epu32(test_vec[i].k, a, b); simde_test_x86_assert_equal_u32x16(r, simde_mm512_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask16 k = simde_test_x86_random_mmask16(); simde__m512i a = simde_test_x86_random_u32x16(); simde__m512i b = simde_test_x86_random_u32x16(); simde__m512i r = simde_mm512_maskz_xxx_epu32(k, a, b); simde_test_x86_write_mmask16(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u32x16(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u32x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u32x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_xxx_epu64 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const uint64_t a[8]; const uint64_t b[8]; const uint64_t r[8]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_xxx_epu64(a, b); simde_test_x86_assert_equal_u64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i a = simde_test_x86_random_u64x8(); simde__m512i b = simde_test_x86_random_u64x8(); simde__m512i r = simde_mm512_xxx_epu64(a, b); simde_test_x86_write_u64x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u64x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u64x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_mask_xxx_epu64 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const uint64_t src[8]; const simde__mmask8 k; const uint64_t a[8]; const uint64_t b[8]; const uint64_t r[8]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i src = simde_mm512_loadu_epi64(test_vec[i].src); simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_mask_xxx_epu64(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_u64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m512i src = simde_test_x86_random_u64x8(); simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m512i a = simde_test_x86_random_u64x8(); simde__m512i b = simde_test_x86_random_u64x8(); simde__m512i r = simde_mm512_mask_xxx_epu64(src, k, a, b); simde_test_x86_write_u64x8(2, src, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u64x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u64x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u64x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_maskz_xxx_epu64 (SIMDE_MUNIT_TEST_ARGS) { #if 0 static const struct { const simde__mmask8 k; const uint64_t a[8]; const uint64_t b[8]; const uint64_t r[8]; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); simde__m512i b = simde_mm512_loadu_epi64(test_vec[i].b); simde__m512i r = simde_mm512_maskz_xxx_epu64(test_vec[i].k, a, b); simde_test_x86_assert_equal_u64x8(r, simde_mm512_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__mmask8 k = simde_test_x86_random_mmask8(); simde__m512i a = simde_test_x86_random_u64x8(); simde__m512i b = simde_test_x86_random_u64x8(); simde__m512i r = simde_mm512_maskz_xxx_epu64(k, a, b); simde_test_x86_write_mmask8(2, k, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u64x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u64x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u64x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm512_xxx_ps_mask (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde_float32 a[16]; const simde_float32 b[16]; const simde__mmask16 r; } test_vec[] = { }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__mmask16 r = simde_mm512_xxx_ps_mask(a, b); simde_assert_mmask16(r, ==, test_vec[i].r); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde_float32 a_[16]; simde_float32 b_[16]; simde_test_codegen_random_vf32(16, a_, SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde_test_codegen_random_vf32(16, b_, SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); for (size_t j = 0 ; j < 16 ; j++) if (!(simde_test_codegen_random_i32() & 1)) a_[j] = b_[j]; simde__m512 a = simde_mm512_loadu_ps(a_); simde__m512 b = simde_mm512_loadu_ps(b_); simde__mmask16 r = simde_mm512_xxx_ps_mask(a, b); simde_test_x86_write_f32x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f32x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_mmask16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } simde-0.7.2/test/x86/sse.c000066400000000000000000012210361400333146700151710ustar00rootroot00000000000000/* Copyright (c) 2017, 2019 Evan Nemerson * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define SIMDE_TESTS_CURRENT_ISAX sse #include #include #if defined(HEDLEY_MSVC_VERSION) # pragma warning(disable:4223) #endif #if defined(HEDLEY_MSVC_VERSION) # pragma warning(disable:4324) #endif static int test_simde_mm_set_ps(SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[sizeof(simde__m128) / sizeof(simde_float32)]; simde_float32 r[sizeof(simde__m128) / sizeof(simde_float32)]; } test_vec[8] = { { { SIMDE_FLOAT32_C( -97.93), SIMDE_FLOAT32_C( 487.12), SIMDE_FLOAT32_C( 843.18), SIMDE_FLOAT32_C( 237.66) }, { SIMDE_FLOAT32_C( -97.93), SIMDE_FLOAT32_C( 487.12), SIMDE_FLOAT32_C( 843.18), SIMDE_FLOAT32_C( 237.66) } }, { { SIMDE_FLOAT32_C( 627.94), SIMDE_FLOAT32_C( -96.43), SIMDE_FLOAT32_C( 561.80), SIMDE_FLOAT32_C( 279.44) }, { SIMDE_FLOAT32_C( 627.94), SIMDE_FLOAT32_C( -96.43), SIMDE_FLOAT32_C( 561.80), SIMDE_FLOAT32_C( 279.44) } }, { { SIMDE_FLOAT32_C( 624.69), SIMDE_FLOAT32_C( 44.07), SIMDE_FLOAT32_C( 250.52), SIMDE_FLOAT32_C( 5.41) }, { SIMDE_FLOAT32_C( 624.69), SIMDE_FLOAT32_C( 44.07), SIMDE_FLOAT32_C( 250.52), SIMDE_FLOAT32_C( 5.41) } }, { { SIMDE_FLOAT32_C( 782.09), SIMDE_FLOAT32_C( 437.78), SIMDE_FLOAT32_C( 640.50), SIMDE_FLOAT32_C( -79.45) }, { SIMDE_FLOAT32_C( 782.09), SIMDE_FLOAT32_C( 437.78), SIMDE_FLOAT32_C( 640.50), SIMDE_FLOAT32_C( -79.45) } }, { { SIMDE_FLOAT32_C( 962.63), SIMDE_FLOAT32_C( -46.96), SIMDE_FLOAT32_C( 706.41), SIMDE_FLOAT32_C( 465.51) }, { SIMDE_FLOAT32_C( 962.63), SIMDE_FLOAT32_C( -46.96), SIMDE_FLOAT32_C( 706.41), SIMDE_FLOAT32_C( 465.51) } }, { { SIMDE_FLOAT32_C( 701.40), SIMDE_FLOAT32_C( 189.03), SIMDE_FLOAT32_C( 149.80), SIMDE_FLOAT32_C( 519.08) }, { SIMDE_FLOAT32_C( 701.40), SIMDE_FLOAT32_C( 189.03), SIMDE_FLOAT32_C( 149.80), SIMDE_FLOAT32_C( 519.08) } }, { { SIMDE_FLOAT32_C( 706.01), SIMDE_FLOAT32_C( 918.36), SIMDE_FLOAT32_C( 149.95), SIMDE_FLOAT32_C( 205.77) }, { SIMDE_FLOAT32_C( 706.01), SIMDE_FLOAT32_C( 918.36), SIMDE_FLOAT32_C( 149.95), SIMDE_FLOAT32_C( 205.77) } }, { { SIMDE_FLOAT32_C( -61.28), SIMDE_FLOAT32_C( 87.71), SIMDE_FLOAT32_C( 709.93), SIMDE_FLOAT32_C( -59.21) }, { SIMDE_FLOAT32_C( -61.28), SIMDE_FLOAT32_C( 87.71), SIMDE_FLOAT32_C( 709.93), SIMDE_FLOAT32_C( -59.21) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32* a = test_vec[i].a; simde__m128 r = simde_mm_set_ps(a[3], a[2], a[1], a[0]); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_set_ps1(SIMDE_MUNIT_TEST_ARGS) { const struct { float a; simde__m128 r; } test_vec[8] = { { 0.29f, simde_mm_set_ps(SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.29)) }, { 0.14f, simde_mm_set_ps(SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 0.14)) }, { 0.98f, simde_mm_set_ps(SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.98)) }, { 0.88f, simde_mm_set_ps(SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 0.88)) }, { 0.92f, simde_mm_set_ps(SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.92)) }, { 0.07f, simde_mm_set_ps(SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.07)) }, { 0.66f, simde_mm_set_ps(SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.66)) }, { 0.47f, simde_mm_set_ps(SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.47)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_set_ps1(test_vec[i].a); simde_test_x86_assert_equal_f32x4(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_set_ss(SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a; simde_float32 r[sizeof(simde__m128) / sizeof(simde_float32)]; } test_vec[8] = { { SIMDE_FLOAT32_C(-521.61), { SIMDE_FLOAT32_C( -521.61), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { SIMDE_FLOAT32_C(992.45), { SIMDE_FLOAT32_C( 992.45), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { SIMDE_FLOAT32_C(274.16), { SIMDE_FLOAT32_C( 274.16), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { SIMDE_FLOAT32_C(897.70), { SIMDE_FLOAT32_C( 897.70), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { SIMDE_FLOAT32_C(-80.03), { SIMDE_FLOAT32_C( -80.03), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { SIMDE_FLOAT32_C(-294.04), { SIMDE_FLOAT32_C( -294.04), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { SIMDE_FLOAT32_C(421.54), { SIMDE_FLOAT32_C( 421.54), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { SIMDE_FLOAT32_C(458.33), { SIMDE_FLOAT32_C( 458.33), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 r = simde_mm_set_ss(test_vec[i].a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_set1_ps(SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a; simde_float32 r[sizeof(simde__m128) / sizeof(simde_float32)]; } test_vec[8] = { { SIMDE_FLOAT32_C(968.47), { SIMDE_FLOAT32_C( 968.47), SIMDE_FLOAT32_C( 968.47), SIMDE_FLOAT32_C( 968.47), SIMDE_FLOAT32_C( 968.47) } }, { SIMDE_FLOAT32_C(560.37), { SIMDE_FLOAT32_C( 560.37), SIMDE_FLOAT32_C( 560.37), SIMDE_FLOAT32_C( 560.37), SIMDE_FLOAT32_C( 560.37) } }, { SIMDE_FLOAT32_C(509.11), { SIMDE_FLOAT32_C( 509.11), SIMDE_FLOAT32_C( 509.11), SIMDE_FLOAT32_C( 509.11), SIMDE_FLOAT32_C( 509.11) } }, { SIMDE_FLOAT32_C(34.32), { SIMDE_FLOAT32_C( 34.32), SIMDE_FLOAT32_C( 34.32), SIMDE_FLOAT32_C( 34.32), SIMDE_FLOAT32_C( 34.32) } }, { SIMDE_FLOAT32_C(-34.10), { SIMDE_FLOAT32_C( -34.10), SIMDE_FLOAT32_C( -34.10), SIMDE_FLOAT32_C( -34.10), SIMDE_FLOAT32_C( -34.10) } }, { SIMDE_FLOAT32_C(357.28), { SIMDE_FLOAT32_C( 357.28), SIMDE_FLOAT32_C( 357.28), SIMDE_FLOAT32_C( 357.28), SIMDE_FLOAT32_C( 357.28) } }, { SIMDE_FLOAT32_C(-650.83), { SIMDE_FLOAT32_C( -650.83), SIMDE_FLOAT32_C( -650.83), SIMDE_FLOAT32_C( -650.83), SIMDE_FLOAT32_C( -650.83) } }, { SIMDE_FLOAT32_C(429.31), { SIMDE_FLOAT32_C( 429.31), SIMDE_FLOAT32_C( 429.31), SIMDE_FLOAT32_C( 429.31), SIMDE_FLOAT32_C( 429.31) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 r = simde_mm_set1_ps(test_vec[i].a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_setr_ps(SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[sizeof(simde__m128) / sizeof(simde_float32)]; simde_float32 r[sizeof(simde__m128) / sizeof(simde_float32)]; } test_vec[8] = { { { SIMDE_FLOAT32_C( 955.73), SIMDE_FLOAT32_C( 396.29), SIMDE_FLOAT32_C( 305.27), SIMDE_FLOAT32_C( 896.93) }, { SIMDE_FLOAT32_C( 896.93), SIMDE_FLOAT32_C( 305.27), SIMDE_FLOAT32_C( 396.29), SIMDE_FLOAT32_C( 955.73) } }, { { SIMDE_FLOAT32_C( 982.30), SIMDE_FLOAT32_C( 209.24), SIMDE_FLOAT32_C( -25.84), SIMDE_FLOAT32_C( 23.25) }, { SIMDE_FLOAT32_C( 23.25), SIMDE_FLOAT32_C( -25.84), SIMDE_FLOAT32_C( 209.24), SIMDE_FLOAT32_C( 982.30) } }, { { SIMDE_FLOAT32_C( 860.95), SIMDE_FLOAT32_C( -26.58), SIMDE_FLOAT32_C( 238.44), SIMDE_FLOAT32_C( 340.72) }, { SIMDE_FLOAT32_C( 340.72), SIMDE_FLOAT32_C( 238.44), SIMDE_FLOAT32_C( -26.58), SIMDE_FLOAT32_C( 860.95) } }, { { SIMDE_FLOAT32_C( -48.15), SIMDE_FLOAT32_C( 640.42), SIMDE_FLOAT32_C( 675.86), SIMDE_FLOAT32_C( 647.82) }, { SIMDE_FLOAT32_C( 647.82), SIMDE_FLOAT32_C( 675.86), SIMDE_FLOAT32_C( 640.42), SIMDE_FLOAT32_C( -48.15) } }, { { SIMDE_FLOAT32_C( 453.33), SIMDE_FLOAT32_C( 576.55), SIMDE_FLOAT32_C( 828.03), SIMDE_FLOAT32_C( 770.37) }, { SIMDE_FLOAT32_C( 770.37), SIMDE_FLOAT32_C( 828.03), SIMDE_FLOAT32_C( 576.55), SIMDE_FLOAT32_C( 453.33) } }, { { SIMDE_FLOAT32_C( 438.60), SIMDE_FLOAT32_C( 313.90), SIMDE_FLOAT32_C( 397.19), SIMDE_FLOAT32_C( 64.15) }, { SIMDE_FLOAT32_C( 64.15), SIMDE_FLOAT32_C( 397.19), SIMDE_FLOAT32_C( 313.90), SIMDE_FLOAT32_C( 438.60) } }, { { SIMDE_FLOAT32_C( 430.55), SIMDE_FLOAT32_C( 275.82), SIMDE_FLOAT32_C( 482.54), SIMDE_FLOAT32_C( 85.82) }, { SIMDE_FLOAT32_C( 85.82), SIMDE_FLOAT32_C( 482.54), SIMDE_FLOAT32_C( 275.82), SIMDE_FLOAT32_C( 430.55) } }, { { SIMDE_FLOAT32_C( 486.66), SIMDE_FLOAT32_C( 277.28), SIMDE_FLOAT32_C( 62.44), SIMDE_FLOAT32_C( 442.39) }, { SIMDE_FLOAT32_C( 442.39), SIMDE_FLOAT32_C( 62.44), SIMDE_FLOAT32_C( 277.28), SIMDE_FLOAT32_C( 486.66) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_float32* a = test_vec[i].a; simde__m128 r = simde_mm_setr_ps(a[3], a[2], a[1], a[0]); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_setzero_ps(SIMDE_MUNIT_TEST_ARGS) { simde_test_x86_assert_equal_f32x4(simde_mm_setzero_ps(), simde_mm_set1_ps(0.0f), 1); return 0; } static int test_simde_x_mm_setone_ps(SIMDE_MUNIT_TEST_ARGS) { simde__m128 r = simde_x_mm_setone_ps(); simde_test_x86_assert_equal_u32x4(simde_mm_castps_si128(r), simde_mm_set1_epi32(~INT32_C(0))); return 0; } static int test_simde_x_mm_abs_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -421.46), SIMDE_FLOAT32_C( -889.21), SIMDE_FLOAT32_C( -223.16), SIMDE_FLOAT32_C( 165.23) }, { SIMDE_FLOAT32_C( 421.46), SIMDE_FLOAT32_C( 889.21), SIMDE_FLOAT32_C( 223.16), SIMDE_FLOAT32_C( 165.23) } }, { { SIMDE_FLOAT32_C( -519.69), SIMDE_FLOAT32_C( 704.36), SIMDE_FLOAT32_C( 72.76), SIMDE_FLOAT32_C( 522.49) }, { SIMDE_FLOAT32_C( 519.69), SIMDE_FLOAT32_C( 704.36), SIMDE_FLOAT32_C( 72.76), SIMDE_FLOAT32_C( 522.49) } }, { { SIMDE_FLOAT32_C( 870.86), SIMDE_FLOAT32_C( -918.56), SIMDE_FLOAT32_C( 997.87), SIMDE_FLOAT32_C( 143.92) }, { SIMDE_FLOAT32_C( 870.86), SIMDE_FLOAT32_C( 918.56), SIMDE_FLOAT32_C( 997.87), SIMDE_FLOAT32_C( 143.92) } }, { { SIMDE_FLOAT32_C( 368.61), SIMDE_FLOAT32_C( 537.92), SIMDE_FLOAT32_C( 637.07), SIMDE_FLOAT32_C( -512.16) }, { SIMDE_FLOAT32_C( 368.61), SIMDE_FLOAT32_C( 537.92), SIMDE_FLOAT32_C( 637.07), SIMDE_FLOAT32_C( 512.16) } }, { { SIMDE_FLOAT32_C( 125.42), SIMDE_FLOAT32_C( 861.61), SIMDE_FLOAT32_C( -914.90), SIMDE_FLOAT32_C( -887.65) }, { SIMDE_FLOAT32_C( 125.42), SIMDE_FLOAT32_C( 861.61), SIMDE_FLOAT32_C( 914.90), SIMDE_FLOAT32_C( 887.65) } }, { { SIMDE_FLOAT32_C( -429.10), SIMDE_FLOAT32_C( 351.84), SIMDE_FLOAT32_C( 840.63), SIMDE_FLOAT32_C( -928.69) }, { SIMDE_FLOAT32_C( 429.10), SIMDE_FLOAT32_C( 351.84), SIMDE_FLOAT32_C( 840.63), SIMDE_FLOAT32_C( 928.69) } }, { { SIMDE_FLOAT32_C( -715.59), SIMDE_FLOAT32_C( 906.22), SIMDE_FLOAT32_C( -64.85), SIMDE_FLOAT32_C( 686.85) }, { SIMDE_FLOAT32_C( 715.59), SIMDE_FLOAT32_C( 906.22), SIMDE_FLOAT32_C( 64.85), SIMDE_FLOAT32_C( 686.85) } }, { { SIMDE_FLOAT32_C( 445.44), SIMDE_FLOAT32_C( 794.89), SIMDE_FLOAT32_C( 44.90), SIMDE_FLOAT32_C( -976.02) }, { SIMDE_FLOAT32_C( 445.44), SIMDE_FLOAT32_C( 794.89), SIMDE_FLOAT32_C( 44.90), SIMDE_FLOAT32_C( 976.02) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_x_mm_abs_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_add_ps (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; simde_float32 b[4]; simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 83.02), SIMDE_FLOAT32_C( 389.11), SIMDE_FLOAT32_C( -551.93), SIMDE_FLOAT32_C( 499.43) }, { SIMDE_FLOAT32_C( 438.42), SIMDE_FLOAT32_C( 399.55), SIMDE_FLOAT32_C( -203.56), SIMDE_FLOAT32_C( 299.49) }, { SIMDE_FLOAT32_C( 521.44), SIMDE_FLOAT32_C( 788.66), SIMDE_FLOAT32_C( -755.49), SIMDE_FLOAT32_C( 798.92) } }, { { SIMDE_FLOAT32_C( -379.62), SIMDE_FLOAT32_C( 462.50), SIMDE_FLOAT32_C( -825.60), SIMDE_FLOAT32_C( 38.74) }, { SIMDE_FLOAT32_C( 149.18), SIMDE_FLOAT32_C( 29.45), SIMDE_FLOAT32_C( 149.99), SIMDE_FLOAT32_C( 766.00) }, { SIMDE_FLOAT32_C( -230.44), SIMDE_FLOAT32_C( 491.94), SIMDE_FLOAT32_C( -675.61), SIMDE_FLOAT32_C( 804.74) } }, { { SIMDE_FLOAT32_C( 856.05), SIMDE_FLOAT32_C( 987.49), SIMDE_FLOAT32_C( -70.35), SIMDE_FLOAT32_C( -659.83) }, { SIMDE_FLOAT32_C( -940.54), SIMDE_FLOAT32_C( 739.82), SIMDE_FLOAT32_C( 922.41), SIMDE_FLOAT32_C( -908.67) }, { SIMDE_FLOAT32_C( -84.49), SIMDE_FLOAT32_C( 1727.30), SIMDE_FLOAT32_C( 852.06), SIMDE_FLOAT32_C( -1568.50) } }, { { SIMDE_FLOAT32_C( -590.79), SIMDE_FLOAT32_C( -852.37), SIMDE_FLOAT32_C( -630.57), SIMDE_FLOAT32_C( -823.06) }, { SIMDE_FLOAT32_C( -434.22), SIMDE_FLOAT32_C( -548.87), SIMDE_FLOAT32_C( 5.97), SIMDE_FLOAT32_C( 648.80) }, { SIMDE_FLOAT32_C( -1025.02), SIMDE_FLOAT32_C( -1401.25), SIMDE_FLOAT32_C( -624.60), SIMDE_FLOAT32_C( -174.26) } }, { { SIMDE_FLOAT32_C( 840.24), SIMDE_FLOAT32_C( 454.04), SIMDE_FLOAT32_C( 148.23), SIMDE_FLOAT32_C( 278.66) }, { SIMDE_FLOAT32_C( -146.41), SIMDE_FLOAT32_C( 944.67), SIMDE_FLOAT32_C( -421.85), SIMDE_FLOAT32_C( 473.97) }, { SIMDE_FLOAT32_C( 693.83), SIMDE_FLOAT32_C( 1398.71), SIMDE_FLOAT32_C( -273.62), SIMDE_FLOAT32_C( 752.63) } }, { { SIMDE_FLOAT32_C( 407.16), SIMDE_FLOAT32_C( -247.45), SIMDE_FLOAT32_C( -487.29), SIMDE_FLOAT32_C( -443.65) }, { SIMDE_FLOAT32_C( 781.99), SIMDE_FLOAT32_C( 662.70), SIMDE_FLOAT32_C( -677.65), SIMDE_FLOAT32_C( 638.04) }, { SIMDE_FLOAT32_C( 1189.16), SIMDE_FLOAT32_C( 415.25), SIMDE_FLOAT32_C( -1164.94), SIMDE_FLOAT32_C( 194.39) } }, { { SIMDE_FLOAT32_C( 650.19), SIMDE_FLOAT32_C( 252.00), SIMDE_FLOAT32_C( 978.21), SIMDE_FLOAT32_C( 709.65) }, { SIMDE_FLOAT32_C( -8.18), SIMDE_FLOAT32_C( 900.62), SIMDE_FLOAT32_C( 800.98), SIMDE_FLOAT32_C( 401.02) }, { SIMDE_FLOAT32_C( 642.00), SIMDE_FLOAT32_C( 1152.62), SIMDE_FLOAT32_C( 1779.19), SIMDE_FLOAT32_C( 1110.67) } }, { { SIMDE_FLOAT32_C( -951.75), SIMDE_FLOAT32_C( -829.59), SIMDE_FLOAT32_C( 577.96), SIMDE_FLOAT32_C( -385.97) }, { SIMDE_FLOAT32_C( -378.46), SIMDE_FLOAT32_C( -416.07), SIMDE_FLOAT32_C( -737.17), SIMDE_FLOAT32_C( -538.23) }, { SIMDE_FLOAT32_C( -1330.21), SIMDE_FLOAT32_C( -1245.67), SIMDE_FLOAT32_C( -159.21), SIMDE_FLOAT32_C( -924.20) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 b = simde_mm_loadu_ps(test_vec[i].b); simde__m128 r = simde_mm_add_ps(a, b); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_add_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 807.64), SIMDE_FLOAT32_C( 618.52), SIMDE_FLOAT32_C( -396.84), SIMDE_FLOAT32_C( -297.31)), simde_mm_set_ps(SIMDE_FLOAT32_C( -471.66), SIMDE_FLOAT32_C( 713.91), SIMDE_FLOAT32_C( 549.43), SIMDE_FLOAT32_C( -169.45)), simde_mm_set_ps(SIMDE_FLOAT32_C( 807.64), SIMDE_FLOAT32_C( 618.52), SIMDE_FLOAT32_C( -396.84), SIMDE_FLOAT32_C( -466.76)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 544.68), SIMDE_FLOAT32_C( 42.73), SIMDE_FLOAT32_C( -460.22), SIMDE_FLOAT32_C( -292.61)), simde_mm_set_ps(SIMDE_FLOAT32_C( 187.56), SIMDE_FLOAT32_C( -798.97), SIMDE_FLOAT32_C( 272.51), SIMDE_FLOAT32_C( 51.52)), simde_mm_set_ps(SIMDE_FLOAT32_C( 544.68), SIMDE_FLOAT32_C( 42.73), SIMDE_FLOAT32_C( -460.22), SIMDE_FLOAT32_C( -241.09)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -110.27), SIMDE_FLOAT32_C( 680.50), SIMDE_FLOAT32_C( 761.66), SIMDE_FLOAT32_C( 277.48)), simde_mm_set_ps(SIMDE_FLOAT32_C( -67.06), SIMDE_FLOAT32_C( 934.00), SIMDE_FLOAT32_C( 502.27), SIMDE_FLOAT32_C( 358.64)), simde_mm_set_ps(SIMDE_FLOAT32_C( -110.27), SIMDE_FLOAT32_C( 680.50), SIMDE_FLOAT32_C( 761.66), SIMDE_FLOAT32_C( 636.12)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 434.09), SIMDE_FLOAT32_C( 948.98), SIMDE_FLOAT32_C( -323.81), SIMDE_FLOAT32_C( -338.37)), simde_mm_set_ps(SIMDE_FLOAT32_C( -221.66), SIMDE_FLOAT32_C( -861.25), SIMDE_FLOAT32_C( 78.84), SIMDE_FLOAT32_C( -873.32)), simde_mm_set_ps(SIMDE_FLOAT32_C( 434.09), SIMDE_FLOAT32_C( 948.98), SIMDE_FLOAT32_C( -323.81), SIMDE_FLOAT32_C(-1211.69)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -878.90), SIMDE_FLOAT32_C( -860.71), SIMDE_FLOAT32_C( 840.94), SIMDE_FLOAT32_C( 25.49)), simde_mm_set_ps(SIMDE_FLOAT32_C( 571.90), SIMDE_FLOAT32_C( -703.66), SIMDE_FLOAT32_C( -53.73), SIMDE_FLOAT32_C( -560.77)), simde_mm_set_ps(SIMDE_FLOAT32_C( -878.90), SIMDE_FLOAT32_C( -860.71), SIMDE_FLOAT32_C( 840.94), SIMDE_FLOAT32_C( -535.28)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 496.95), SIMDE_FLOAT32_C( -413.78), SIMDE_FLOAT32_C( -24.10), SIMDE_FLOAT32_C( 918.19)), simde_mm_set_ps(SIMDE_FLOAT32_C( -8.88), SIMDE_FLOAT32_C( 751.51), SIMDE_FLOAT32_C( 315.02), SIMDE_FLOAT32_C( 734.82)), simde_mm_set_ps(SIMDE_FLOAT32_C( 496.95), SIMDE_FLOAT32_C( -413.78), SIMDE_FLOAT32_C( -24.10), SIMDE_FLOAT32_C( 1653.01)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 101.05), SIMDE_FLOAT32_C( -424.85), SIMDE_FLOAT32_C( -158.08), SIMDE_FLOAT32_C( 354.83)), simde_mm_set_ps(SIMDE_FLOAT32_C( 568.62), SIMDE_FLOAT32_C( -525.68), SIMDE_FLOAT32_C( 73.90), SIMDE_FLOAT32_C( -463.92)), simde_mm_set_ps(SIMDE_FLOAT32_C( 101.05), SIMDE_FLOAT32_C( -424.85), SIMDE_FLOAT32_C( -158.08), SIMDE_FLOAT32_C( -109.09)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -477.51), SIMDE_FLOAT32_C( 986.65), SIMDE_FLOAT32_C( -44.77), SIMDE_FLOAT32_C( 993.26)), simde_mm_set_ps(SIMDE_FLOAT32_C( 591.07), SIMDE_FLOAT32_C( 806.35), SIMDE_FLOAT32_C( -137.78), SIMDE_FLOAT32_C( -899.14)), simde_mm_set_ps(SIMDE_FLOAT32_C( -477.51), SIMDE_FLOAT32_C( 986.65), SIMDE_FLOAT32_C( -44.77), SIMDE_FLOAT32_C( 94.12)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_add_ss(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_f32x4(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_and_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 230308132), INT32_C( -227837326), INT32_C( 2068647778), INT32_C( 339280616)), simde_mm_set_epi32(INT32_C(-2005788472), INT32_C( 635603096), INT32_C( 1918425762), INT32_C( -674479342)), simde_mm_set_epi32(INT32_C( 137500672), INT32_C( 543295504), INT32_C( 1917323810), INT32_C( 336069120)) }, { simde_mm_set_epi32(INT32_C( -291660154), INT32_C( 1180299090), INT32_C( 1302245871), INT32_C(-2053540867)), simde_mm_set_epi32(INT32_C( -87778126), INT32_C( -288172017), INT32_C( 1641035711), INT32_C( -987206598)), simde_mm_set_epi32(INT32_C( -360407934), INT32_C( 1179699202), INT32_C( 1099968943), INT32_C(-2063048648)) }, { simde_mm_set_epi32(INT32_C( 1329542878), INT32_C( 1207699188), INT32_C( 24647547), INT32_C( -417192016)), simde_mm_set_epi32(INT32_C(-1903170010), INT32_C( 1636014785), INT32_C(-1789298238), INT32_C( 431363012)), simde_mm_set_epi32(INT32_C( 235873798), INT32_C( 1098908352), INT32_C( 22548802), INT32_C( 19006336)) }, { simde_mm_set_epi32(INT32_C( 1291921239), INT32_C(-2088318277), INT32_C( 293687175), INT32_C( -70855120)), simde_mm_set_epi32(INT32_C( -181527647), INT32_C( 1885539289), INT32_C( 1200729803), INT32_C(-1998296563)), simde_mm_set_epi32(INT32_C( 1157628673), INT32_C( 133785), INT32_C( 25235075), INT32_C(-2000403968)) }, { simde_mm_set_epi32(INT32_C( -352637975), INT32_C( 1914022535), INT32_C( -883801519), INT32_C(-2099443995)), simde_mm_set_epi32(INT32_C( 82894436), INT32_C( 345641451), INT32_C(-1766434082), INT32_C( 545403119)), simde_mm_set_epi32(INT32_C( 15731296), INT32_C( 269484163), INT32_C(-2112732592), INT32_C( 8388837)) }, { simde_mm_set_epi32(INT32_C(-1731787377), INT32_C( -251709819), INT32_C( 707964452), INT32_C( 1634791391)), simde_mm_set_epi32(INT32_C( -198581371), INT32_C( 1425724652), INT32_C( -85534714), INT32_C( 593903546)), simde_mm_set_epi32(INT32_C(-1878982267), INT32_C( 1358566532), INT32_C( 706906116), INT32_C( 559951770)) }, { simde_mm_set_epi32(INT32_C( 1475548270), INT32_C( 196183104), INT32_C( 1788414168), INT32_C( -339387422)), simde_mm_set_epi32(INT32_C( 372209195), INT32_C( 2023049541), INT32_C( 537336467), INT32_C(-1542274771)), simde_mm_set_epi32(INT32_C( 371397674), INT32_C( 143720512), INT32_C( 536938640), INT32_C(-1610596064)) }, { simde_mm_set_epi32(INT32_C( 1365641873), INT32_C(-1596735940), INT32_C( 1148782984), INT32_C( -513375328)), simde_mm_set_epi32(INT32_C(-1663652637), INT32_C(-1297596491), INT32_C(-1521550159), INT32_C(-2063966090)), simde_mm_set_epi32(INT32_C( 273025153), INT32_C(-1602208204), INT32_C( 71829632), INT32_C(-2141061088)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_castps_si128(simde_mm_and_ps(simde_mm_castsi128_ps(test_vec[i].a), simde_mm_castsi128_ps(test_vec[i].b))); simde_test_x86_assert_equal_i32x4(test_vec[i].r, r); } return 0; } static int test_simde_mm_andnot_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 1177189245), INT32_C( 1897888048), INT32_C(-1445410189), INT32_C(-2027773136)), simde_mm_set_epi32(INT32_C(-1799619754), INT32_C( 1146608002), INT32_C( -118138295), INT32_C( -396850997)), simde_mm_set_epi32(INT32_C(-1869512702), INT32_C( 71336066), INT32_C( 1344606216), INT32_C( 1750599883)) }, { simde_mm_set_epi32(INT32_C( 207339929), INT32_C(-1653197317), INT32_C( 1742328793), INT32_C(-1943741545)), simde_mm_set_epi32(INT32_C( 1475927203), INT32_C(-1393438167), INT32_C( -244601726), INT32_C(-1116772072)), simde_mm_set_epi32(INT32_C( 1403001890), INT32_C( 545374720), INT32_C(-1876808702), INT32_C( 827006984)) }, { simde_mm_set_epi32(INT32_C( 280787508), INT32_C( -476758383), INT32_C( -690500493), INT32_C( 1730225777)), simde_mm_set_epi32(INT32_C( 455229920), INT32_C( 254808493), INT32_C( -318322364), INT32_C( -744606195)), simde_mm_set_epi32(INT32_C( 184680896), INT32_C( 203424044), INT32_C( 687866116), INT32_C(-1868689396)) }, { simde_mm_set_epi32(INT32_C( 1314224819), INT32_C( -883715485), INT32_C( 1830060352), INT32_C( 484123993)), simde_mm_set_epi32(INT32_C(-1600415403), INT32_C( 1247040590), INT32_C( -903138997), INT32_C( 1339483582)), simde_mm_set_epi32(INT32_C(-1601535676), INT32_C( 279564), INT32_C(-2111098869), INT32_C( 1124384934)) }, { simde_mm_set_epi32(INT32_C( 491653802), INT32_C( 1332021673), INT32_C(-1558543881), INT32_C( 1668194718)), simde_mm_set_epi32(INT32_C( 1904424558), INT32_C(-1115627084), INT32_C(-1533509640), INT32_C( 1120379446)), simde_mm_set_epi32(INT32_C( 1619080260), INT32_C(-1333731308), INT32_C( 75529736), INT32_C( 8454688)) }, { simde_mm_set_epi32(INT32_C( 1894428518), INT32_C( 1907474957), INT32_C( -645595730), INT32_C( -336066790)), simde_mm_set_epi32(INT32_C( -923650038), INT32_C(-1468609389), INT32_C(-1710432847), INT32_C(-1709785329)), simde_mm_set_epi32(INT32_C(-2012213240), INT32_C(-2008675182), INT32_C( 34078737), INT32_C( 268875781)) }, { simde_mm_set_epi32(INT32_C( 194114005), INT32_C( 123455954), INT32_C(-1857684581), INT32_C( -281966329)), simde_mm_set_epi32(INT32_C( -794388211), INT32_C(-1374144398), INT32_C( 1808738256), INT32_C( -8964047)), simde_mm_set_epi32(INT32_C( -802813944), INT32_C(-1476382688), INT32_C( 1787429952), INT32_C( 273035312)) }, { simde_mm_set_epi32(INT32_C(-1545262354), INT32_C( 905989253), INT32_C( -414577725), INT32_C( 1431039599)), simde_mm_set_epi32(INT32_C( 1566256833), INT32_C(-1518668169), INT32_C(-1754223695), INT32_C( 1280260013)), simde_mm_set_epi32(INT32_C( 1545208833), INT32_C(-2122665358), INT32_C( 271622192), INT32_C( 134484352)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_castps_si128( simde_mm_andnot_ps( simde_mm_castsi128_ps(test_vec[i].a), simde_mm_castsi128_ps(test_vec[i].b))); simde_test_x86_assert_equal_i32x4(r, test_vec[i].r); } return 0; } static int test_simde_x_mm_not_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[4]; const int32_t r[4]; } test_vec[] = { { { -INT32_C( 1059535058), INT32_C( 2048808202), INT32_C( 683407848), INT32_C( 1249446257) }, { INT32_C( 1059535057), -INT32_C( 2048808203), -INT32_C( 683407849), -INT32_C( 1249446258) } }, { { -INT32_C( 1990686684), INT32_C( 961619650), INT32_C( 1292910096), INT32_C( 1808415805) }, { INT32_C( 1990686683), -INT32_C( 961619651), -INT32_C( 1292910097), -INT32_C( 1808415806) } }, { { INT32_C( 237740548), -INT32_C( 544650761), -INT32_C( 1274592190), INT32_C( 1912504653) }, { -INT32_C( 237740549), INT32_C( 544650760), INT32_C( 1274592189), -INT32_C( 1912504654) } }, { { -INT32_C( 872786423), -INT32_C( 1929098372), -INT32_C( 958786423), INT32_C( 1429382225) }, { INT32_C( 872786422), INT32_C( 1929098371), INT32_C( 958786422), -INT32_C( 1429382226) } }, { { INT32_C( 1029922118), -INT32_C( 383980377), INT32_C( 2107450160), -INT32_C( 1376871260) }, { -INT32_C( 1029922119), INT32_C( 383980376), -INT32_C( 2107450161), INT32_C( 1376871259) } }, { { INT32_C( 1870194930), -INT32_C( 1107526349), -INT32_C( 494676335), -INT32_C( 1070090886) }, { -INT32_C( 1870194931), INT32_C( 1107526348), INT32_C( 494676334), INT32_C( 1070090885) } }, { { -INT32_C( 1157784813), -INT32_C( 1197270649), -INT32_C( 516603588), -INT32_C( 812768035) }, { INT32_C( 1157784812), INT32_C( 1197270648), INT32_C( 516603587), INT32_C( 812768034) } }, { { INT32_C( 1077806860), INT32_C( 368917124), -INT32_C( 1963425776), INT32_C( 1229598518) }, { -INT32_C( 1077806861), -INT32_C( 368917125), INT32_C( 1963425775), -INT32_C( 1229598519) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi32(test_vec[i].a); simde__m128i r = simde_mm_castps_si128(simde_x_mm_not_ps(simde_mm_castsi128_ps(a))); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_x_mm_select_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 b[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 689.01), SIMDE_FLOAT32_C( -11.14), SIMDE_FLOAT32_C( 334.10), SIMDE_FLOAT32_C( 442.90) }, { SIMDE_FLOAT32_C( -212.34), SIMDE_FLOAT32_C( -259.33), SIMDE_FLOAT32_C( 883.69), SIMDE_FLOAT32_C( 488.72) }, { SIMDE_FLOAT32_C( -212.34), SIMDE_FLOAT32_C( -259.33), SIMDE_FLOAT32_C( 334.10), SIMDE_FLOAT32_C( 442.90) } }, { { SIMDE_FLOAT32_C( -26.63), SIMDE_FLOAT32_C( -842.74), SIMDE_FLOAT32_C( -183.46), SIMDE_FLOAT32_C( -506.27) }, { SIMDE_FLOAT32_C( 624.94), SIMDE_FLOAT32_C( -269.87), SIMDE_FLOAT32_C( -399.29), SIMDE_FLOAT32_C( 452.04) }, { SIMDE_FLOAT32_C( -26.63), SIMDE_FLOAT32_C( -842.74), SIMDE_FLOAT32_C( -399.29), SIMDE_FLOAT32_C( -506.27) } }, { { SIMDE_FLOAT32_C( -903.49), SIMDE_FLOAT32_C( -810.21), SIMDE_FLOAT32_C( 457.27), SIMDE_FLOAT32_C( -144.98) }, { SIMDE_FLOAT32_C( -658.80), SIMDE_FLOAT32_C( -235.68), SIMDE_FLOAT32_C( 453.63), SIMDE_FLOAT32_C( -772.14) }, { SIMDE_FLOAT32_C( -903.49), SIMDE_FLOAT32_C( -810.21), SIMDE_FLOAT32_C( 453.63), SIMDE_FLOAT32_C( -772.14) } }, { { SIMDE_FLOAT32_C( 598.69), SIMDE_FLOAT32_C( 276.57), SIMDE_FLOAT32_C( 406.80), SIMDE_FLOAT32_C( 699.05) }, { SIMDE_FLOAT32_C( -976.76), SIMDE_FLOAT32_C( -581.80), SIMDE_FLOAT32_C( 38.73), SIMDE_FLOAT32_C( 712.25) }, { SIMDE_FLOAT32_C( -976.76), SIMDE_FLOAT32_C( -581.80), SIMDE_FLOAT32_C( 38.73), SIMDE_FLOAT32_C( 699.05) } }, { { SIMDE_FLOAT32_C( 407.06), SIMDE_FLOAT32_C( -627.17), SIMDE_FLOAT32_C( 155.16), SIMDE_FLOAT32_C( -805.28) }, { SIMDE_FLOAT32_C( 113.51), SIMDE_FLOAT32_C( 38.85), SIMDE_FLOAT32_C( 683.44), SIMDE_FLOAT32_C( -913.12) }, { SIMDE_FLOAT32_C( 113.51), SIMDE_FLOAT32_C( -627.17), SIMDE_FLOAT32_C( 155.16), SIMDE_FLOAT32_C( -913.12) } }, { { SIMDE_FLOAT32_C( 196.11), SIMDE_FLOAT32_C( -500.01), SIMDE_FLOAT32_C( -419.39), SIMDE_FLOAT32_C( -178.96) }, { SIMDE_FLOAT32_C( 230.12), SIMDE_FLOAT32_C( 181.32), SIMDE_FLOAT32_C( -726.92), SIMDE_FLOAT32_C( 326.63) }, { SIMDE_FLOAT32_C( 196.11), SIMDE_FLOAT32_C( -500.01), SIMDE_FLOAT32_C( -726.92), SIMDE_FLOAT32_C( -178.96) } }, { { SIMDE_FLOAT32_C( 371.12), SIMDE_FLOAT32_C( 730.35), SIMDE_FLOAT32_C( -818.35), SIMDE_FLOAT32_C( 712.32) }, { SIMDE_FLOAT32_C( -505.33), SIMDE_FLOAT32_C( 635.28), SIMDE_FLOAT32_C( 940.19), SIMDE_FLOAT32_C( -906.64) }, { SIMDE_FLOAT32_C( -505.33), SIMDE_FLOAT32_C( 635.28), SIMDE_FLOAT32_C( -818.35), SIMDE_FLOAT32_C( -906.64) } }, { { SIMDE_FLOAT32_C( -88.15), SIMDE_FLOAT32_C( 346.99), SIMDE_FLOAT32_C( 792.41), SIMDE_FLOAT32_C( -64.91) }, { SIMDE_FLOAT32_C( 765.19), SIMDE_FLOAT32_C( -168.85), SIMDE_FLOAT32_C( -352.65), SIMDE_FLOAT32_C( 172.25) }, { SIMDE_FLOAT32_C( -88.15), SIMDE_FLOAT32_C( -168.85), SIMDE_FLOAT32_C( -352.65), SIMDE_FLOAT32_C( -64.91) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 b = simde_mm_loadu_ps(test_vec[i].b); simde__m128 r = simde_x_mm_select_ps(b, a, simde_mm_cmplt_ps(a, b)); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_avg_pu16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_x_mm_set_pu16(UINT16_C( 984), UINT16_C(30280), UINT16_C(42568), UINT16_C(40084)), simde_x_mm_set_pu16(UINT16_C(27500), UINT16_C(27572), UINT16_C(25285), UINT16_C(55528)), simde_x_mm_set_pu16(UINT16_C(14242), UINT16_C(28926), UINT16_C(33927), UINT16_C(47806)) }, { simde_x_mm_set_pu16(UINT16_C(16799), UINT16_C(60083), UINT16_C(41622), UINT16_C(59771)), simde_x_mm_set_pu16(UINT16_C(44269), UINT16_C(15826), UINT16_C(52483), UINT16_C(45725)), simde_x_mm_set_pu16(UINT16_C(30534), UINT16_C(37955), UINT16_C(47053), UINT16_C(52748)) }, { simde_x_mm_set_pu16(UINT16_C(41380), UINT16_C(64067), UINT16_C(33526), UINT16_C(10279)), simde_x_mm_set_pu16(UINT16_C(34327), UINT16_C(29328), UINT16_C( 8579), UINT16_C( 1111)), simde_x_mm_set_pu16(UINT16_C(37854), UINT16_C(46698), UINT16_C(21053), UINT16_C( 5695)) }, { simde_x_mm_set_pu16(UINT16_C(18628), UINT16_C(48112), UINT16_C( 3068), UINT16_C(50432)), simde_x_mm_set_pu16(UINT16_C( 746), UINT16_C(22132), UINT16_C(21846), UINT16_C(48900)), simde_x_mm_set_pu16(UINT16_C( 9687), UINT16_C(35122), UINT16_C(12457), UINT16_C(49666)) }, { simde_x_mm_set_pu16(UINT16_C(10018), UINT16_C( 4381), UINT16_C(59604), UINT16_C( 6897)), simde_x_mm_set_pu16(UINT16_C(44395), UINT16_C(45261), UINT16_C(45129), UINT16_C(12723)), simde_x_mm_set_pu16(UINT16_C(27207), UINT16_C(24821), UINT16_C(52367), UINT16_C( 9810)) }, { simde_x_mm_set_pu16(UINT16_C(25446), UINT16_C(46949), UINT16_C(46494), UINT16_C(27481)), simde_x_mm_set_pu16(UINT16_C(54442), UINT16_C(34061), UINT16_C(46613), UINT16_C(44299)), simde_x_mm_set_pu16(UINT16_C(39944), UINT16_C(40505), UINT16_C(46554), UINT16_C(35890)) }, { simde_x_mm_set_pu16(UINT16_C( 4562), UINT16_C(11032), UINT16_C(30725), UINT16_C(48961)), simde_x_mm_set_pu16(UINT16_C(11741), UINT16_C(52603), UINT16_C(11503), UINT16_C(62380)), simde_x_mm_set_pu16(UINT16_C( 8152), UINT16_C(31818), UINT16_C(21114), UINT16_C(55671)) }, { simde_x_mm_set_pu16(UINT16_C(25787), UINT16_C(46963), UINT16_C( 3756), UINT16_C(41201)), simde_x_mm_set_pu16(UINT16_C(64587), UINT16_C(22291), UINT16_C( 7901), UINT16_C(11300)), simde_x_mm_set_pu16(UINT16_C(45187), UINT16_C(34627), UINT16_C( 5829), UINT16_C(26251)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_avg_pu16(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_u16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_pavgw(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_x_mm_set_pu16(UINT16_C( 984), UINT16_C(30280), UINT16_C(42568), UINT16_C(40084)), simde_x_mm_set_pu16(UINT16_C(27500), UINT16_C(27572), UINT16_C(25285), UINT16_C(55528)), simde_x_mm_set_pu16(UINT16_C(14242), UINT16_C(28926), UINT16_C(33927), UINT16_C(47806)) }, { simde_x_mm_set_pu16(UINT16_C(16799), UINT16_C(60083), UINT16_C(41622), UINT16_C(59771)), simde_x_mm_set_pu16(UINT16_C(44269), UINT16_C(15826), UINT16_C(52483), UINT16_C(45725)), simde_x_mm_set_pu16(UINT16_C(30534), UINT16_C(37955), UINT16_C(47053), UINT16_C(52748)) }, { simde_x_mm_set_pu16(UINT16_C(41380), UINT16_C(64067), UINT16_C(33526), UINT16_C(10279)), simde_x_mm_set_pu16(UINT16_C(34327), UINT16_C(29328), UINT16_C( 8579), UINT16_C( 1111)), simde_x_mm_set_pu16(UINT16_C(37854), UINT16_C(46698), UINT16_C(21053), UINT16_C( 5695)) }, { simde_x_mm_set_pu16(UINT16_C(18628), UINT16_C(48112), UINT16_C( 3068), UINT16_C(50432)), simde_x_mm_set_pu16(UINT16_C( 746), UINT16_C(22132), UINT16_C(21846), UINT16_C(48900)), simde_x_mm_set_pu16(UINT16_C( 9687), UINT16_C(35122), UINT16_C(12457), UINT16_C(49666)) }, { simde_x_mm_set_pu16(UINT16_C(10018), UINT16_C( 4381), UINT16_C(59604), UINT16_C( 6897)), simde_x_mm_set_pu16(UINT16_C(44395), UINT16_C(45261), UINT16_C(45129), UINT16_C(12723)), simde_x_mm_set_pu16(UINT16_C(27207), UINT16_C(24821), UINT16_C(52367), UINT16_C( 9810)) }, { simde_x_mm_set_pu16(UINT16_C(25446), UINT16_C(46949), UINT16_C(46494), UINT16_C(27481)), simde_x_mm_set_pu16(UINT16_C(54442), UINT16_C(34061), UINT16_C(46613), UINT16_C(44299)), simde_x_mm_set_pu16(UINT16_C(39944), UINT16_C(40505), UINT16_C(46554), UINT16_C(35890)) }, { simde_x_mm_set_pu16(UINT16_C( 4562), UINT16_C(11032), UINT16_C(30725), UINT16_C(48961)), simde_x_mm_set_pu16(UINT16_C(11741), UINT16_C(52603), UINT16_C(11503), UINT16_C(62380)), simde_x_mm_set_pu16(UINT16_C( 8152), UINT16_C(31818), UINT16_C(21114), UINT16_C(55671)) }, { simde_x_mm_set_pu16(UINT16_C(25787), UINT16_C(46963), UINT16_C( 3756), UINT16_C(41201)), simde_x_mm_set_pu16(UINT16_C(64587), UINT16_C(22291), UINT16_C( 7901), UINT16_C(11300)), simde_x_mm_set_pu16(UINT16_C(45187), UINT16_C(34627), UINT16_C( 5829), UINT16_C(26251)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_pavgw(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_u16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_avg_pu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_x_mm_set_pu8(UINT8_C( 188), UINT8_C( 166), UINT8_C( 84), UINT8_C( 155), UINT8_C( 198), UINT8_C( 220), UINT8_C( 172), UINT8_C( 150)), simde_x_mm_set_pu8(UINT8_C( 192), UINT8_C( 110), UINT8_C( 123), UINT8_C( 195), UINT8_C( 186), UINT8_C( 160), UINT8_C( 76), UINT8_C( 17)), simde_x_mm_set_pu8(UINT8_C( 190), UINT8_C( 138), UINT8_C( 104), UINT8_C( 175), UINT8_C( 192), UINT8_C( 190), UINT8_C( 124), UINT8_C( 84)) }, { simde_x_mm_set_pu8(UINT8_C( 81), UINT8_C( 231), UINT8_C( 17), UINT8_C( 205), UINT8_C( 110), UINT8_C( 170), UINT8_C( 68), UINT8_C( 253)), simde_x_mm_set_pu8(UINT8_C( 162), UINT8_C( 211), UINT8_C( 109), UINT8_C( 219), UINT8_C( 84), UINT8_C( 95), UINT8_C( 217), UINT8_C( 146)), simde_x_mm_set_pu8(UINT8_C( 122), UINT8_C( 221), UINT8_C( 63), UINT8_C( 212), UINT8_C( 97), UINT8_C( 133), UINT8_C( 143), UINT8_C( 200)) }, { simde_x_mm_set_pu8(UINT8_C( 65), UINT8_C( 248), UINT8_C( 129), UINT8_C( 144), UINT8_C( 4), UINT8_C( 42), UINT8_C( 191), UINT8_C( 186)), simde_x_mm_set_pu8(UINT8_C( 181), UINT8_C( 198), UINT8_C( 22), UINT8_C( 17), UINT8_C( 197), UINT8_C( 123), UINT8_C( 216), UINT8_C( 195)), simde_x_mm_set_pu8(UINT8_C( 123), UINT8_C( 223), UINT8_C( 76), UINT8_C( 81), UINT8_C( 101), UINT8_C( 83), UINT8_C( 204), UINT8_C( 191)) }, { simde_x_mm_set_pu8(UINT8_C( 149), UINT8_C( 191), UINT8_C( 152), UINT8_C( 239), UINT8_C( 227), UINT8_C( 104), UINT8_C( 122), UINT8_C( 140)), simde_x_mm_set_pu8(UINT8_C( 5), UINT8_C( 95), UINT8_C( 201), UINT8_C( 16), UINT8_C( 30), UINT8_C( 151), UINT8_C( 230), UINT8_C( 228)), simde_x_mm_set_pu8(UINT8_C( 77), UINT8_C( 143), UINT8_C( 177), UINT8_C( 128), UINT8_C( 129), UINT8_C( 128), UINT8_C( 176), UINT8_C( 184)) }, { simde_x_mm_set_pu8(UINT8_C( 99), UINT8_C( 248), UINT8_C( 192), UINT8_C( 96), UINT8_C( 9), UINT8_C( 180), UINT8_C( 230), UINT8_C( 75)), simde_x_mm_set_pu8(UINT8_C( 205), UINT8_C( 207), UINT8_C( 224), UINT8_C( 197), UINT8_C( 157), UINT8_C( 96), UINT8_C( 165), UINT8_C( 63)), simde_x_mm_set_pu8(UINT8_C( 152), UINT8_C( 228), UINT8_C( 208), UINT8_C( 147), UINT8_C( 83), UINT8_C( 138), UINT8_C( 198), UINT8_C( 69)) }, { simde_x_mm_set_pu8(UINT8_C( 207), UINT8_C( 50), UINT8_C( 87), UINT8_C( 142), UINT8_C( 106), UINT8_C( 190), UINT8_C( 31), UINT8_C( 212)), simde_x_mm_set_pu8(UINT8_C( 236), UINT8_C( 29), UINT8_C( 22), UINT8_C( 123), UINT8_C( 184), UINT8_C( 176), UINT8_C( 133), UINT8_C( 58)), simde_x_mm_set_pu8(UINT8_C( 222), UINT8_C( 40), UINT8_C( 55), UINT8_C( 133), UINT8_C( 145), UINT8_C( 183), UINT8_C( 82), UINT8_C( 135)) }, { simde_x_mm_set_pu8(UINT8_C( 31), UINT8_C( 172), UINT8_C( 185), UINT8_C( 135), UINT8_C( 147), UINT8_C( 121), UINT8_C( 19), UINT8_C( 169)), simde_x_mm_set_pu8(UINT8_C( 25), UINT8_C( 222), UINT8_C( 106), UINT8_C( 4), UINT8_C( 238), UINT8_C( 72), UINT8_C( 216), UINT8_C( 75)), simde_x_mm_set_pu8(UINT8_C( 28), UINT8_C( 197), UINT8_C( 146), UINT8_C( 70), UINT8_C( 193), UINT8_C( 97), UINT8_C( 118), UINT8_C( 122)) }, { simde_x_mm_set_pu8(UINT8_C( 126), UINT8_C( 117), UINT8_C( 85), UINT8_C( 61), UINT8_C( 8), UINT8_C( 204), UINT8_C( 178), UINT8_C( 71)), simde_x_mm_set_pu8(UINT8_C( 216), UINT8_C( 115), UINT8_C( 127), UINT8_C( 61), UINT8_C( 84), UINT8_C( 84), UINT8_C( 64), UINT8_C( 51)), simde_x_mm_set_pu8(UINT8_C( 171), UINT8_C( 116), UINT8_C( 106), UINT8_C( 61), UINT8_C( 46), UINT8_C( 144), UINT8_C( 121), UINT8_C( 61)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m64 r = simde_mm_avg_pu8(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_u8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_pavgb(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_x_mm_set_pu8(UINT8_C( 188), UINT8_C( 166), UINT8_C( 84), UINT8_C( 155), UINT8_C( 198), UINT8_C( 220), UINT8_C( 172), UINT8_C( 150)), simde_x_mm_set_pu8(UINT8_C( 192), UINT8_C( 110), UINT8_C( 123), UINT8_C( 195), UINT8_C( 186), UINT8_C( 160), UINT8_C( 76), UINT8_C( 17)), simde_x_mm_set_pu8(UINT8_C( 190), UINT8_C( 138), UINT8_C( 104), UINT8_C( 175), UINT8_C( 192), UINT8_C( 190), UINT8_C( 124), UINT8_C( 84)) }, { simde_x_mm_set_pu8(UINT8_C( 81), UINT8_C( 231), UINT8_C( 17), UINT8_C( 205), UINT8_C( 110), UINT8_C( 170), UINT8_C( 68), UINT8_C( 253)), simde_x_mm_set_pu8(UINT8_C( 162), UINT8_C( 211), UINT8_C( 109), UINT8_C( 219), UINT8_C( 84), UINT8_C( 95), UINT8_C( 217), UINT8_C( 146)), simde_x_mm_set_pu8(UINT8_C( 122), UINT8_C( 221), UINT8_C( 63), UINT8_C( 212), UINT8_C( 97), UINT8_C( 133), UINT8_C( 143), UINT8_C( 200)) }, { simde_x_mm_set_pu8(UINT8_C( 65), UINT8_C( 248), UINT8_C( 129), UINT8_C( 144), UINT8_C( 4), UINT8_C( 42), UINT8_C( 191), UINT8_C( 186)), simde_x_mm_set_pu8(UINT8_C( 181), UINT8_C( 198), UINT8_C( 22), UINT8_C( 17), UINT8_C( 197), UINT8_C( 123), UINT8_C( 216), UINT8_C( 195)), simde_x_mm_set_pu8(UINT8_C( 123), UINT8_C( 223), UINT8_C( 76), UINT8_C( 81), UINT8_C( 101), UINT8_C( 83), UINT8_C( 204), UINT8_C( 191)) }, { simde_x_mm_set_pu8(UINT8_C( 149), UINT8_C( 191), UINT8_C( 152), UINT8_C( 239), UINT8_C( 227), UINT8_C( 104), UINT8_C( 122), UINT8_C( 140)), simde_x_mm_set_pu8(UINT8_C( 5), UINT8_C( 95), UINT8_C( 201), UINT8_C( 16), UINT8_C( 30), UINT8_C( 151), UINT8_C( 230), UINT8_C( 228)), simde_x_mm_set_pu8(UINT8_C( 77), UINT8_C( 143), UINT8_C( 177), UINT8_C( 128), UINT8_C( 129), UINT8_C( 128), UINT8_C( 176), UINT8_C( 184)) }, { simde_x_mm_set_pu8(UINT8_C( 99), UINT8_C( 248), UINT8_C( 192), UINT8_C( 96), UINT8_C( 9), UINT8_C( 180), UINT8_C( 230), UINT8_C( 75)), simde_x_mm_set_pu8(UINT8_C( 205), UINT8_C( 207), UINT8_C( 224), UINT8_C( 197), UINT8_C( 157), UINT8_C( 96), UINT8_C( 165), UINT8_C( 63)), simde_x_mm_set_pu8(UINT8_C( 152), UINT8_C( 228), UINT8_C( 208), UINT8_C( 147), UINT8_C( 83), UINT8_C( 138), UINT8_C( 198), UINT8_C( 69)) }, { simde_x_mm_set_pu8(UINT8_C( 207), UINT8_C( 50), UINT8_C( 87), UINT8_C( 142), UINT8_C( 106), UINT8_C( 190), UINT8_C( 31), UINT8_C( 212)), simde_x_mm_set_pu8(UINT8_C( 236), UINT8_C( 29), UINT8_C( 22), UINT8_C( 123), UINT8_C( 184), UINT8_C( 176), UINT8_C( 133), UINT8_C( 58)), simde_x_mm_set_pu8(UINT8_C( 222), UINT8_C( 40), UINT8_C( 55), UINT8_C( 133), UINT8_C( 145), UINT8_C( 183), UINT8_C( 82), UINT8_C( 135)) }, { simde_x_mm_set_pu8(UINT8_C( 31), UINT8_C( 172), UINT8_C( 185), UINT8_C( 135), UINT8_C( 147), UINT8_C( 121), UINT8_C( 19), UINT8_C( 169)), simde_x_mm_set_pu8(UINT8_C( 25), UINT8_C( 222), UINT8_C( 106), UINT8_C( 4), UINT8_C( 238), UINT8_C( 72), UINT8_C( 216), UINT8_C( 75)), simde_x_mm_set_pu8(UINT8_C( 28), UINT8_C( 197), UINT8_C( 146), UINT8_C( 70), UINT8_C( 193), UINT8_C( 97), UINT8_C( 118), UINT8_C( 122)) }, { simde_x_mm_set_pu8(UINT8_C( 126), UINT8_C( 117), UINT8_C( 85), UINT8_C( 61), UINT8_C( 8), UINT8_C( 204), UINT8_C( 178), UINT8_C( 71)), simde_x_mm_set_pu8(UINT8_C( 216), UINT8_C( 115), UINT8_C( 127), UINT8_C( 61), UINT8_C( 84), UINT8_C( 84), UINT8_C( 64), UINT8_C( 51)), simde_x_mm_set_pu8(UINT8_C( 171), UINT8_C( 116), UINT8_C( 106), UINT8_C( 61), UINT8_C( 46), UINT8_C( 144), UINT8_C( 121), UINT8_C( 61)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m64 r = simde_m_pavgb(test_vec[i].a, test_vec[i].b); simde_mm_empty(); simde_test_x86_assert_equal_u8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_cmpeq_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 1552099433), INT32_C(-1589398258), INT32_C( -34553673), INT32_C(-1178885563)), simde_mm_set_epi32(INT32_C( 1552099433), INT32_C( 2025844073), INT32_C( -34553673), INT32_C(-1178885563)), simde_mm_set_epi32(INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C(-1766041281), INT32_C(-1662168132), INT32_C( 997624458), INT32_C( 980393671)), simde_mm_set_epi32(INT32_C( 1122334686), INT32_C(-1662168132), INT32_C( 1502327165), INT32_C( 1228800964)), simde_mm_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 1053856039), INT32_C( 189518021), INT32_C(-2093780846), INT32_C( 717465502)), simde_mm_set_epi32(INT32_C( 1053856039), INT32_C( 189518021), INT32_C( -730260217), INT32_C( 1570531267)), simde_mm_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C(-1211703701), INT32_C( 1792645956), INT32_C( 451470260), INT32_C( 72883505)), simde_mm_set_epi32(INT32_C(-1211703701), INT32_C( -873756537), INT32_C( 1700529530), INT32_C( 72883505)), simde_mm_set_epi32(INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 1123941451), INT32_C(-1499396317), INT32_C( -496690637), INT32_C( 857633319)), simde_mm_set_epi32(INT32_C(-1095910111), INT32_C(-1499396317), INT32_C( -59751101), INT32_C( -47922840)), simde_mm_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C(-1786351786), INT32_C( 618588335), INT32_C(-1648947504), INT32_C(-1160443929)), simde_mm_set_epi32(INT32_C( 1923711258), INT32_C( 775175059), INT32_C(-1633817987), INT32_C( 778628411)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 2132068337), INT32_C( 303020289), INT32_C(-1544950892), INT32_C(-2063582009)), simde_mm_set_epi32(INT32_C( 2132068337), INT32_C( 628767021), INT32_C(-1152607496), INT32_C(-1526726995)), simde_mm_set_epi32(INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 2000861670), INT32_C(-1296714344), INT32_C( 682422961), INT32_C( -767128199)), simde_mm_set_epi32(INT32_C( 2000861670), INT32_C(-1296714344), INT32_C( 682422961), INT32_C(-1784377677)), simde_mm_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_castps_si128( simde_mm_cmpeq_ps( simde_mm_castsi128_ps(test_vec[i].a), simde_mm_castsi128_ps(test_vec[i].b))); simde_test_x86_assert_equal_i32x4(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpeq_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 975888251), INT32_C( 1037275787), INT32_C( -206927716), INT32_C( 1719930130)), simde_mm_set_epi32(INT32_C(-1683768293), INT32_C( 1037275787), INT32_C( -206927716), INT32_C( 1719930130)), simde_mm_set_epi32(INT32_C( 975888251), INT32_C( 1037275787), INT32_C( -206927716), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C(-1524488806), INT32_C( -573553543), INT32_C( 42112282), INT32_C( 112731860)), simde_mm_set_epi32(INT32_C( 1557664843), INT32_C( -39848297), INT32_C( 42112282), INT32_C( 112731860)), simde_mm_set_epi32(INT32_C(-1524488806), INT32_C( -573553543), INT32_C( 42112282), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 1378184574), INT32_C(-1915421318), INT32_C( -131047892), INT32_C( -798352758)), simde_mm_set_epi32(INT32_C( 1378184574), INT32_C(-1967398858), INT32_C( -131047892), INT32_C(-1869734720)), simde_mm_set_epi32(INT32_C( 1378184574), INT32_C(-1915421318), INT32_C( -131047892), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 1981544552), INT32_C( 329373642), INT32_C( -104014353), INT32_C(-2092043281)), simde_mm_set_epi32(INT32_C( 2125102991), INT32_C( 978475086), INT32_C( 1426268882), INT32_C( -436875296)), simde_mm_set_epi32(INT32_C( 1981544552), INT32_C( 329373642), INT32_C( -104014353), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 600612499), INT32_C(-1490414099), INT32_C(-2094604589), INT32_C( 126748780)), simde_mm_set_epi32(INT32_C( 600612499), INT32_C(-1490414099), INT32_C(-2094604589), INT32_C( 126748780)), simde_mm_set_epi32(INT32_C( 600612499), INT32_C(-1490414099), INT32_C(-2094604589), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 60156341), INT32_C( 1267841603), INT32_C(-1347475320), INT32_C(-1548408923)), simde_mm_set_epi32(INT32_C( 60156341), INT32_C( -387226227), INT32_C( -76120938), INT32_C(-1548408923)), simde_mm_set_epi32(INT32_C( 60156341), INT32_C( 1267841603), INT32_C(-1347475320), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C(-2114878036), INT32_C(-1987807687), INT32_C( 1314812087), INT32_C( -402159797)), simde_mm_set_epi32(INT32_C(-2114878036), INT32_C(-1987807687), INT32_C( -580165908), INT32_C( -311736955)), simde_mm_set_epi32(INT32_C(-2114878036), INT32_C(-1987807687), INT32_C( 1314812087), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 1498340262), INT32_C( 467089732), INT32_C( -352096361), INT32_C( -125007519)), simde_mm_set_epi32(INT32_C(-1919145018), INT32_C( 854922987), INT32_C( 767022324), INT32_C( -125007519)), simde_mm_set_epi32(INT32_C( 1498340262), INT32_C( 467089732), INT32_C( -352096361), INT32_C( -1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_castps_si128( simde_mm_cmpeq_ss( simde_mm_castsi128_ps(test_vec[i].a), simde_mm_castsi128_ps(test_vec[i].b))); simde_test_x86_assert_equal_i32x4(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpge_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C(-2117681573), INT32_C( 1744824135), INT32_C(-1062100051), INT32_C( -758867207)), simde_mm_set_epi32(INT32_C( 447802409), INT32_C(-1805122446), INT32_C(-1062100051), INT32_C( 180201744)), simde_mm_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 501801973), INT32_C( -437596087), INT32_C( 96926344), INT32_C( 1479688678)), simde_mm_set_epi32(INT32_C( 2057298249), INT32_C( -899804986), INT32_C( -711059659), INT32_C( 886835596)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 927826628), INT32_C(-1620708712), INT32_C(-2141396655), INT32_C(-1921942403)), simde_mm_set_epi32(INT32_C( -440885672), INT32_C(-1620708712), INT32_C( 2101230656), INT32_C(-1921942403)), simde_mm_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C(-1545828562), INT32_C( -731809091), INT32_C(-1137290929), INT32_C( -77584541)), simde_mm_set_epi32(INT32_C( 1117766142), INT32_C(-2055588954), INT32_C(-1137290929), INT32_C(-1359346144)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C(-1420198491), INT32_C( 1600056611), INT32_C( -347234499), INT32_C( 1584151154)), simde_mm_set_epi32(INT32_C(-1420198491), INT32_C( 1600056611), INT32_C( 1047775179), INT32_C( 673458453)), simde_mm_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( -594070283), INT32_C( -270758299), INT32_C( 185236064), INT32_C( 607788733)), simde_mm_set_epi32(INT32_C( -310503835), INT32_C( 1875198957), INT32_C( 185236064), INT32_C( 290211410)), simde_mm_set_epi32(INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 275701977), INT32_C(-1310829701), INT32_C(-1932286124), INT32_C( 124706827)), simde_mm_set_epi32(INT32_C( 275701977), INT32_C( 2044408096), INT32_C( 371148478), INT32_C( 1787635053)), simde_mm_set_epi32(INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 335124996), INT32_C(-2113521475), INT32_C( 960990723), INT32_C( -969567969)), simde_mm_set_epi32(INT32_C(-1587045050), INT32_C(-2113521475), INT32_C( -168625313), INT32_C( 1040258918)), simde_mm_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_castps_si128( simde_mm_cmpge_ps( simde_mm_castsi128_ps(test_vec[i].a), simde_mm_castsi128_ps(test_vec[i].b))); simde_test_x86_assert_equal_i32x4(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpge_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C(-1012979392), INT32_C(-1453212125), INT32_C( 815083508), INT32_C( 1351772430)), simde_mm_set_epi32(INT32_C( 1169877184), INT32_C(-1037467546), INT32_C( 1421795564), INT32_C( 905862977)), simde_mm_set_epi32(INT32_C(-1012979392), INT32_C(-1453212125), INT32_C( 815083508), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( -324333154), INT32_C( 1314427040), INT32_C( -483618994), INT32_C( 1502460142)), simde_mm_set_epi32(INT32_C( -202797776), INT32_C( 94174629), INT32_C( 926181510), INT32_C( 1502460142)), simde_mm_set_epi32(INT32_C( -324333154), INT32_C( 1314427040), INT32_C( -483618994), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 467112718), INT32_C(-1304161813), INT32_C(-1064952269), INT32_C( 631164600)), simde_mm_set_epi32(INT32_C( 467112718), INT32_C(-1304161813), INT32_C( 200476027), INT32_C( 1663275417)), simde_mm_set_epi32(INT32_C( 467112718), INT32_C(-1304161813), INT32_C(-1064952269), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( -347696391), INT32_C( 148936949), INT32_C( 1398613274), INT32_C( 1250010386)), simde_mm_set_epi32(INT32_C( -347696391), INT32_C( 148936949), INT32_C( 1202050063), INT32_C( 1250010386)), simde_mm_set_epi32(INT32_C( -347696391), INT32_C( 148936949), INT32_C( 1398613274), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C(-1684279233), INT32_C(-2118983098), INT32_C( 160153353), INT32_C( -113728462)), simde_mm_set_epi32(INT32_C(-1684279233), INT32_C(-2118983098), INT32_C( 160153353), INT32_C( 1908868579)), simde_mm_set_epi32(INT32_C(-1684279233), INT32_C(-2118983098), INT32_C( 160153353), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C(-1844989631), INT32_C( 1725503201), INT32_C( 822658308), INT32_C( -958604095)), simde_mm_set_epi32(INT32_C( 1098943561), INT32_C( 1725503201), INT32_C( -755708322), INT32_C( -958604095)), simde_mm_set_epi32(INT32_C(-1844989631), INT32_C( 1725503201), INT32_C( 822658308), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 375717031), INT32_C( -882753921), INT32_C( 1130912475), INT32_C( 2045186948)), simde_mm_set_epi32(INT32_C( 375717031), INT32_C( -882753921), INT32_C( -800937725), INT32_C( 2045186948)), simde_mm_set_epi32(INT32_C( 375717031), INT32_C( -882753921), INT32_C( 1130912475), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 602871888), INT32_C( 470458702), INT32_C(-1169608009), INT32_C( -214741679)), simde_mm_set_epi32(INT32_C( 602871888), INT32_C( -493719927), INT32_C(-1169608009), INT32_C( 2044474163)), simde_mm_set_epi32(INT32_C( 602871888), INT32_C( 470458702), INT32_C(-1169608009), INT32_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_castps_si128( simde_mm_cmpge_ss( simde_mm_castsi128_ps(test_vec[i].a), simde_mm_castsi128_ps(test_vec[i].b))); simde_test_x86_assert_equal_i32x4(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpgt_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 1960669177), INT32_C(-1800221672), INT32_C( -201138064), INT32_C( 1882570539)), simde_mm_set_epi32(INT32_C( 1960669177), INT32_C( 1636503467), INT32_C( 379370104), INT32_C( 1549588428)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 1497868169), INT32_C( 1587819932), INT32_C( 11597982), INT32_C( 367113544)), simde_mm_set_epi32(INT32_C( 1497868169), INT32_C( 1587819932), INT32_C( 11597982), INT32_C(-2087685948)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( -420177326), INT32_C( 1458568188), INT32_C( 1173908260), INT32_C(-1290366123)), simde_mm_set_epi32(INT32_C(-1452540627), INT32_C(-1072533390), INT32_C( 1463315378), INT32_C( 944710330)), simde_mm_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 1987295390), INT32_C( -783471208), INT32_C( 1614612869), INT32_C( 941455851)), simde_mm_set_epi32(INT32_C( 776864087), INT32_C( -275776600), INT32_C( 1614612869), INT32_C( 941455851)), simde_mm_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( -802004143), INT32_C(-2136990592), INT32_C( 1617943513), INT32_C( 1268400008)), simde_mm_set_epi32(INT32_C( -802004143), INT32_C(-2136990592), INT32_C( 1458980297), INT32_C( 1268400008)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C(-1974256896), INT32_C( 1350058685), INT32_C(-1649890899), INT32_C(-2052839662)), simde_mm_set_epi32(INT32_C(-1974256896), INT32_C( 1388826354), INT32_C(-2033282795), INT32_C(-2052839662)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 42472091), INT32_C( 2006715626), INT32_C( 18778978), INT32_C( 1523053306)), simde_mm_set_epi32(INT32_C( 42472091), INT32_C(-1636528069), INT32_C( 18778978), INT32_C( 737106093)), simde_mm_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 1244568026), INT32_C(-1381724018), INT32_C( -492895509), INT32_C( -783840348)), simde_mm_set_epi32(INT32_C( 567191695), INT32_C( -729265936), INT32_C( -492895509), INT32_C( -783840348)), simde_mm_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_castps_si128( simde_mm_cmpgt_ps( simde_mm_castsi128_ps(test_vec[i].a), simde_mm_castsi128_ps(test_vec[i].b))); simde_test_x86_assert_equal_i32x4(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpgt_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 1869056804), INT32_C(-1753986411), INT32_C( -585204320), INT32_C(-1910961055)), simde_mm_set_epi32(INT32_C( 1869056804), INT32_C(-1102496477), INT32_C( -20892909), INT32_C( -63499356)), simde_mm_set_epi32(INT32_C( 1869056804), INT32_C(-1753986411), INT32_C( -585204320), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 528987825), INT32_C( -236154839), INT32_C( 1847822202), INT32_C( 1073580784)), simde_mm_set_epi32(INT32_C( 301802145), INT32_C( -236154839), INT32_C( 1847822202), INT32_C( 1073580784)), simde_mm_set_epi32(INT32_C( 528987825), INT32_C( -236154839), INT32_C( 1847822202), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( -453663293), INT32_C( 374222760), INT32_C( 660253560), INT32_C(-1094975358)), simde_mm_set_epi32(INT32_C( 922033710), INT32_C(-1385400175), INT32_C( 281683050), INT32_C(-1094975358)), simde_mm_set_epi32(INT32_C( -453663293), INT32_C( 374222760), INT32_C( 660253560), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 56611761), INT32_C( -294357766), INT32_C( 221827642), INT32_C(-1086961889)), simde_mm_set_epi32(INT32_C(-2059704292), INT32_C( -294357766), INT32_C(-1642431615), INT32_C(-1453505032)), simde_mm_set_epi32(INT32_C( 56611761), INT32_C( -294357766), INT32_C( 221827642), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( -677567670), INT32_C(-1065179289), INT32_C( 1230662601), INT32_C( 1022052917)), simde_mm_set_epi32(INT32_C( -677567670), INT32_C( 692939036), INT32_C( 1108906800), INT32_C(-2115601689)), simde_mm_set_epi32(INT32_C( -677567670), INT32_C(-1065179289), INT32_C( 1230662601), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 658893489), INT32_C( -592874075), INT32_C( 90769968), INT32_C( 1780967347)), simde_mm_set_epi32(INT32_C( -745618317), INT32_C( -592874075), INT32_C( 90769968), INT32_C( 1901683344)), simde_mm_set_epi32(INT32_C( 658893489), INT32_C( -592874075), INT32_C( 90769968), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 596908542), INT32_C( -725878217), INT32_C( 43862031), INT32_C( 120397305)), simde_mm_set_epi32(INT32_C( 1105546030), INT32_C( -725878217), INT32_C( 43862031), INT32_C(-1679076026)), simde_mm_set_epi32(INT32_C( 596908542), INT32_C( -725878217), INT32_C( 43862031), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C(-2095835916), INT32_C( 2093817071), INT32_C(-1700253832), INT32_C( 1213385208)), simde_mm_set_epi32(INT32_C(-1122300334), INT32_C( 1314746582), INT32_C(-1700253832), INT32_C( 1213385208)), simde_mm_set_epi32(INT32_C(-2095835916), INT32_C( 2093817071), INT32_C(-1700253832), INT32_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_castps_si128( simde_mm_cmpgt_ss( simde_mm_castsi128_ps(test_vec[i].a), simde_mm_castsi128_ps(test_vec[i].b))); simde_test_x86_assert_equal_i32x4(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmple_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C(-1214095902), INT32_C( -66909402), INT32_C( -553169346), INT32_C( 1655881585)), simde_mm_set_epi32(INT32_C( 1411879436), INT32_C( 1800787365), INT32_C( -553169346), INT32_C( -934991244)), simde_mm_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 935354519), INT32_C( -241132058), INT32_C( 1879436355), INT32_C( -995448835)), simde_mm_set_epi32(INT32_C( 935354519), INT32_C( -241132058), INT32_C( 778854493), INT32_C( 1939053113)), simde_mm_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C(-2000937674), INT32_C( 926691548), INT32_C( 1566199240), INT32_C(-2071387770)), simde_mm_set_epi32(INT32_C( -33682990), INT32_C( 926691548), INT32_C( -862341564), INT32_C(-2071387770)), simde_mm_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 1400009830), INT32_C( 2074443710), INT32_C(-1988154474), INT32_C(-1285973673)), simde_mm_set_epi32(INT32_C( 1431717555), INT32_C( 2074443710), INT32_C( -236063022), INT32_C(-1285973673)), simde_mm_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( -969028248), INT32_C( 1961285965), INT32_C( 2034451315), INT32_C( 982408470)), simde_mm_set_epi32(INT32_C( -969028248), INT32_C( 737591133), INT32_C( 2034451315), INT32_C( 1972820242)), simde_mm_set_epi32(INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 689515799), INT32_C( 1226329239), INT32_C( -601248060), INT32_C( -246025007)), simde_mm_set_epi32(INT32_C( 689515799), INT32_C( 1567083199), INT32_C( -315367605), INT32_C( 895186883)), simde_mm_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 1299136382), INT32_C( 66630204), INT32_C( 5361190), INT32_C( 1189577124)), simde_mm_set_epi32(INT32_C(-1645066809), INT32_C( 66630204), INT32_C( -985693313), INT32_C( 1189577124)), simde_mm_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( -861186400), INT32_C(-1832454806), INT32_C( -43975819), INT32_C( 67490279)), simde_mm_set_epi32(INT32_C( -259498670), INT32_C(-1523721259), INT32_C( -43975819), INT32_C( 292054476)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_castps_si128( simde_mm_cmple_ps( simde_mm_castsi128_ps(test_vec[i].a), simde_mm_castsi128_ps(test_vec[i].b))); simde_test_x86_assert_equal_i32x4(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmple_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 577834131), INT32_C( 125783617), INT32_C(-1878005293), INT32_C(-2039280607)), simde_mm_set_epi32(INT32_C( 577834131), INT32_C( 125783617), INT32_C(-1878005293), INT32_C( 2032820947)), simde_mm_set_epi32(INT32_C( 577834131), INT32_C( 125783617), INT32_C(-1878005293), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C(-1229544726), INT32_C(-1899015779), INT32_C( 1583449198), INT32_C(-1177374746)), simde_mm_set_epi32(INT32_C( -10568623), INT32_C( 2105302200), INT32_C( 1583449198), INT32_C( 113127759)), simde_mm_set_epi32(INT32_C(-1229544726), INT32_C(-1899015779), INT32_C( 1583449198), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C(-1403164448), INT32_C(-1420499873), INT32_C(-1053362140), INT32_C(-1673889705)), simde_mm_set_epi32(INT32_C(-1403164448), INT32_C(-1420499873), INT32_C(-1053362140), INT32_C(-1409095555)), simde_mm_set_epi32(INT32_C(-1403164448), INT32_C(-1420499873), INT32_C(-1053362140), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C(-1464154482), INT32_C( -512313078), INT32_C(-1671698744), INT32_C(-1338032665)), simde_mm_set_epi32(INT32_C(-1464154482), INT32_C( 350929661), INT32_C(-1095578259), INT32_C( -137690799)), simde_mm_set_epi32(INT32_C(-1464154482), INT32_C( -512313078), INT32_C(-1671698744), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( -876240227), INT32_C( 638830515), INT32_C( 1194125228), INT32_C( 1201799459)), simde_mm_set_epi32(INT32_C( 370717124), INT32_C( 575915100), INT32_C( 323333140), INT32_C(-1701764718)), simde_mm_set_epi32(INT32_C( -876240227), INT32_C( 638830515), INT32_C( 1194125228), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 1870075868), INT32_C( -538093027), INT32_C( 1796667049), INT32_C( 944394189)), simde_mm_set_epi32(INT32_C( 1870075868), INT32_C(-1718307451), INT32_C( 1796667049), INT32_C( 944394189)), simde_mm_set_epi32(INT32_C( 1870075868), INT32_C( -538093027), INT32_C( 1796667049), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 1976935464), INT32_C(-1126118718), INT32_C( 992307422), INT32_C( 1933654831)), simde_mm_set_epi32(INT32_C( 980618308), INT32_C(-1126118718), INT32_C( 992307422), INT32_C( 1933654831)), simde_mm_set_epi32(INT32_C( 1976935464), INT32_C(-1126118718), INT32_C( 992307422), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 957949500), INT32_C( 241498261), INT32_C(-2057282883), INT32_C( 160259126)), simde_mm_set_epi32(INT32_C( 945346495), INT32_C( 241498261), INT32_C(-2057282883), INT32_C( 160259126)), simde_mm_set_epi32(INT32_C( 957949500), INT32_C( 241498261), INT32_C(-2057282883), INT32_C( -1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_castps_si128( simde_mm_cmple_ss( simde_mm_castsi128_ps(test_vec[i].a), simde_mm_castsi128_ps(test_vec[i].b))); simde_test_x86_assert_equal_i32x4(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmplt_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( -864121404), INT32_C( 820258235), INT32_C(-1696337178), INT32_C(-1482432629)), simde_mm_set_epi32(INT32_C( -864121404), INT32_C( 820258235), INT32_C(-1696337178), INT32_C(-1482432629)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C(-1757897801), INT32_C(-1139623844), INT32_C( 396231785), INT32_C( 1433495183)), simde_mm_set_epi32(INT32_C(-1757897801), INT32_C( 1640940421), INT32_C( 923198702), INT32_C( 1433495183)), simde_mm_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 1329593143), INT32_C( 1809390238), INT32_C( 1760230809), INT32_C( -549294944)), simde_mm_set_epi32(INT32_C( 1011105980), INT32_C( 1809390238), INT32_C( 1760230809), INT32_C( -549294944)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C(-1918857994), INT32_C(-1864521589), INT32_C( 155537477), INT32_C( 1666430711)), simde_mm_set_epi32(INT32_C(-1918857994), INT32_C(-1864521589), INT32_C( 155537477), INT32_C( 735305870)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 197471455), INT32_C( -1555852), INT32_C(-1752379132), INT32_C( 240472065)), simde_mm_set_epi32(INT32_C( 579332444), INT32_C( -1555852), INT32_C(-1752379132), INT32_C( 240472065)), simde_mm_set_epi32(INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 1086999122), INT32_C( 1309092278), INT32_C( 365604292), INT32_C(-1138767432)), simde_mm_set_epi32(INT32_C( 1086999122), INT32_C( -886110099), INT32_C( 219975772), INT32_C(-1138767432)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 1954260003), INT32_C( 1696592095), INT32_C( 2078494375), INT32_C( 650856359)), simde_mm_set_epi32(INT32_C( 1954260003), INT32_C( 2098551061), INT32_C( 2078494375), INT32_C( 650856359)), simde_mm_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C(-1672271308), INT32_C(-1503351515), INT32_C(-1874414686), INT32_C( 435117874)), simde_mm_set_epi32(INT32_C(-1672271308), INT32_C(-1630328519), INT32_C(-1874414686), INT32_C( 435117874)), simde_mm_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_castps_si128( simde_mm_cmplt_ps( simde_mm_castsi128_ps(test_vec[i].a), simde_mm_castsi128_ps(test_vec[i].b))); simde_test_x86_assert_equal_i32x4(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmplt_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 1231873664), INT32_C( 1036602093), INT32_C( -933713727), INT32_C( 2093671800)), simde_mm_set_epi32(INT32_C( 1231873664), INT32_C( 1036602093), INT32_C( 357806524), INT32_C( 2093671800)), simde_mm_set_epi32(INT32_C( 1231873664), INT32_C( 1036602093), INT32_C( -933713727), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 793534664), INT32_C( 376969487), INT32_C( 721355231), INT32_C( 1023311191)), simde_mm_set_epi32(INT32_C(-1925750607), INT32_C(-1255391862), INT32_C( 721355231), INT32_C( 2118013683)), simde_mm_set_epi32(INT32_C( 793534664), INT32_C( 376969487), INT32_C( 721355231), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C(-1783953523), INT32_C( 1823399536), INT32_C( 1402581392), INT32_C(-1166205233)), simde_mm_set_epi32(INT32_C( 1218188972), INT32_C(-1327532162), INT32_C(-1033869830), INT32_C(-1166205233)), simde_mm_set_epi32(INT32_C(-1783953523), INT32_C( 1823399536), INT32_C( 1402581392), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 223867160), INT32_C(-1451057003), INT32_C( 2044754038), INT32_C( 605919704)), simde_mm_set_epi32(INT32_C( 223867160), INT32_C(-1334167478), INT32_C( 2044754038), INT32_C( 605919704)), simde_mm_set_epi32(INT32_C( 223867160), INT32_C(-1451057003), INT32_C( 2044754038), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 951875437), INT32_C( 1770306939), INT32_C(-1002738540), INT32_C( 1475715403)), simde_mm_set_epi32(INT32_C( 935489339), INT32_C( 1770306939), INT32_C(-1002738540), INT32_C( 867597907)), simde_mm_set_epi32(INT32_C( 951875437), INT32_C( 1770306939), INT32_C(-1002738540), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( -400845231), INT32_C(-1931717289), INT32_C( 187179269), INT32_C( 1693770206)), simde_mm_set_epi32(INT32_C( -400845231), INT32_C( 1409986791), INT32_C( 867295140), INT32_C( 1693770206)), simde_mm_set_epi32(INT32_C( -400845231), INT32_C(-1931717289), INT32_C( 187179269), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 1979013327), INT32_C( 1144749620), INT32_C( -915616748), INT32_C(-1688776910)), simde_mm_set_epi32(INT32_C( 1979013327), INT32_C( -522411693), INT32_C( -915616748), INT32_C(-1688776910)), simde_mm_set_epi32(INT32_C( 1979013327), INT32_C( 1144749620), INT32_C( -915616748), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C(-1335736202), INT32_C( 434505386), INT32_C( 1706493603), INT32_C( 1636681283)), simde_mm_set_epi32(INT32_C(-1335736202), INT32_C( 434505386), INT32_C( 1706493603), INT32_C( 754767105)), simde_mm_set_epi32(INT32_C(-1335736202), INT32_C( 434505386), INT32_C( 1706493603), INT32_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_castps_si128( simde_mm_cmplt_ss( simde_mm_castsi128_ps(test_vec[i].a), simde_mm_castsi128_ps(test_vec[i].b))); simde_test_x86_assert_equal_i32x4(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpneq_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 532215048), INT32_C( 130142512), INT32_C(-2127364592), INT32_C( 1091895543)), simde_mm_set_epi32(INT32_C( 532215048), INT32_C( 130142512), INT32_C(-2127364592), INT32_C( 1091895543)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C(-1046985290), INT32_C(-1731603099), INT32_C( 610751781), INT32_C( 1112478841)), simde_mm_set_epi32(INT32_C(-1046985290), INT32_C( 622608062), INT32_C(-1239630421), INT32_C(-1197092706)), simde_mm_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 1281269663), INT32_C( 10288009), INT32_C( 351928123), INT32_C( 68696372)), simde_mm_set_epi32(INT32_C( 1281269663), INT32_C( 1321240790), INT32_C( 1472147540), INT32_C( 221255688)), simde_mm_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 2082220393), INT32_C( 853275580), INT32_C( 1554259301), INT32_C( 1874019211)), simde_mm_set_epi32(INT32_C( 2082220393), INT32_C( 853275580), INT32_C( -464704033), INT32_C( 1874019211)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C(-1607341285), INT32_C( 801224090), INT32_C( 790655246), INT32_C( 1500708888)), simde_mm_set_epi32(INT32_C( 1686421224), INT32_C( -848290084), INT32_C( 1521902022), INT32_C( 974050035)), simde_mm_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 209990814), INT32_C( 2072268417), INT32_C( -198384983), INT32_C( -634248146)), simde_mm_set_epi32(INT32_C(-1549476809), INT32_C( -306787617), INT32_C( -198384983), INT32_C( 461233263)), simde_mm_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 2046647956), INT32_C( 403639600), INT32_C( -500490027), INT32_C( 21369987)), simde_mm_set_epi32(INT32_C( 2046647956), INT32_C( 1922885083), INT32_C( -500490027), INT32_C( 437329742)), simde_mm_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C(-1569313979), INT32_C(-1644454001), INT32_C(-1155267042), INT32_C( -840725054)), simde_mm_set_epi32(INT32_C(-1569313979), INT32_C(-1403674957), INT32_C(-1155267042), INT32_C( -840725054)), simde_mm_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_castps_si128( simde_mm_cmpneq_ps( simde_mm_castsi128_ps(test_vec[i].a), simde_mm_castsi128_ps(test_vec[i].b))); simde_test_x86_assert_equal_i32x4(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpneq_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( -360810495), INT32_C(-1592808236), INT32_C( 688144434), INT32_C( 1729628761)), simde_mm_set_epi32(INT32_C(-1174522920), INT32_C( 592092453), INT32_C( 688144434), INT32_C(-1493258549)), simde_mm_set_epi32(INT32_C( -360810495), INT32_C(-1592808236), INT32_C( 688144434), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 1897806214), INT32_C( -836251000), INT32_C( 1879840962), INT32_C( 1741986879)), simde_mm_set_epi32(INT32_C( 1897806214), INT32_C( 1708676885), INT32_C( -829246870), INT32_C( 1436108855)), simde_mm_set_epi32(INT32_C( 1897806214), INT32_C( -836251000), INT32_C( 1879840962), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C(-1752241902), INT32_C(-2071998685), INT32_C( -472663454), INT32_C( -626361712)), simde_mm_set_epi32(INT32_C( 2013622797), INT32_C(-2071998685), INT32_C(-1052835841), INT32_C( -626361712)), simde_mm_set_epi32(INT32_C(-1752241902), INT32_C(-2071998685), INT32_C( -472663454), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C(-1426139517), INT32_C( 408854398), INT32_C( 1288359889), INT32_C(-1580268734)), simde_mm_set_epi32(INT32_C(-1426139517), INT32_C( 408854398), INT32_C( 1288359889), INT32_C(-1580268734)), simde_mm_set_epi32(INT32_C(-1426139517), INT32_C( 408854398), INT32_C( 1288359889), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 1447450397), INT32_C( 498817306), INT32_C( 48851994), INT32_C(-1751968553)), simde_mm_set_epi32(INT32_C( -142346191), INT32_C(-1158316806), INT32_C( 1062598271), INT32_C( -267767173)), simde_mm_set_epi32(INT32_C( 1447450397), INT32_C( 498817306), INT32_C( 48851994), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 777179606), INT32_C( 1922869133), INT32_C(-2009725956), INT32_C( 2047200466)), simde_mm_set_epi32(INT32_C( 778691217), INT32_C( 807594000), INT32_C(-2009725956), INT32_C( 2047200466)), simde_mm_set_epi32(INT32_C( 777179606), INT32_C( 1922869133), INT32_C(-2009725956), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C(-1844022768), INT32_C( 257130431), INT32_C( 1749772003), INT32_C(-1329350641)), simde_mm_set_epi32(INT32_C( 1038626596), INT32_C( 257130431), INT32_C( 1749772003), INT32_C( 1329214996)), simde_mm_set_epi32(INT32_C(-1844022768), INT32_C( 257130431), INT32_C( 1749772003), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 381813049), INT32_C( -254051472), INT32_C(-1103075616), INT32_C(-1510860170)), simde_mm_set_epi32(INT32_C(-1489319643), INT32_C( -254051472), INT32_C(-1103075616), INT32_C( -696173092)), simde_mm_set_epi32(INT32_C( 381813049), INT32_C( -254051472), INT32_C(-1103075616), INT32_C( -1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_castps_si128( simde_mm_cmpneq_ss( simde_mm_castsi128_ps(test_vec[i].a), simde_mm_castsi128_ps(test_vec[i].b))); simde_test_x86_assert_equal_i32x4(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpnge_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 1888650448), INT32_C( 1952380142), INT32_C( -418024326), INT32_C( 954406307)), simde_mm_set_epi32(INT32_C( 1888650448), INT32_C( 1952380142), INT32_C( 1530340429), INT32_C( 315056976)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( -675143251), INT32_C( 71969785), INT32_C( -921847024), INT32_C( 22067797)), simde_mm_set_epi32(INT32_C( 72561527), INT32_C( 1005464844), INT32_C( -921847024), INT32_C( 22067797)), simde_mm_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C(-2139620548), INT32_C(-1593174748), INT32_C( -604878160), INT32_C(-1601463094)), simde_mm_set_epi32(INT32_C(-2139620548), INT32_C( 492282745), INT32_C( -604878160), INT32_C( 1512309058)), simde_mm_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C(-1508734054), INT32_C( 370976534), INT32_C( 2066803930), INT32_C( -701634011)), simde_mm_set_epi32(INT32_C( 1927420129), INT32_C( 370976534), INT32_C( 2066803930), INT32_C( -701634011)), simde_mm_set_epi32(INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 297952699), INT32_C( 2014428800), INT32_C( -102481167), INT32_C( 2086329997)), simde_mm_set_epi32(INT32_C( 1611455963), INT32_C( 2014428800), INT32_C( -102481167), INT32_C(-2102996894)), simde_mm_set_epi32(INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( -649625825), INT32_C( -985868832), INT32_C( 1191775411), INT32_C( 442065450)), simde_mm_set_epi32(INT32_C( -649625825), INT32_C( 1060858907), INT32_C( -423355707), INT32_C(-2066634583)), simde_mm_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 1343578941), INT32_C( 107928701), INT32_C(-2043979132), INT32_C( 1583468645)), simde_mm_set_epi32(INT32_C( -744928720), INT32_C(-1908186697), INT32_C(-2043979132), INT32_C( -98145208)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 1297942286), INT32_C( 1312694015), INT32_C(-1476884375), INT32_C( 1085937493)), simde_mm_set_epi32(INT32_C( 1297942286), INT32_C( 1312694015), INT32_C(-1476884375), INT32_C( 811271927)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_castps_si128( simde_mm_cmpnge_ps( simde_mm_castsi128_ps(test_vec[i].a), simde_mm_castsi128_ps(test_vec[i].b))); simde_test_x86_assert_equal_i32x4(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpnge_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 106734352), INT32_C(-1177695143), INT32_C( 1251867427), INT32_C(-1113436842)), simde_mm_set_epi32(INT32_C(-2016761807), INT32_C(-1602250813), INT32_C(-1502543805), INT32_C(-1113436842)), simde_mm_set_epi32(INT32_C( 106734352), INT32_C(-1177695143), INT32_C( 1251867427), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( -992010454), INT32_C( 875520944), INT32_C( -773626008), INT32_C( 297000538)), simde_mm_set_epi32(INT32_C( -992010454), INT32_C( 875520944), INT32_C(-1960267017), INT32_C( 267267504)), simde_mm_set_epi32(INT32_C( -992010454), INT32_C( 875520944), INT32_C( -773626008), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C(-1423404927), INT32_C( -273294615), INT32_C( 1115052821), INT32_C( 1762207327)), simde_mm_set_epi32(INT32_C(-1423404927), INT32_C(-2004658753), INT32_C( 1115052821), INT32_C(-1955515955)), simde_mm_set_epi32(INT32_C(-1423404927), INT32_C( -273294615), INT32_C( 1115052821), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 630570300), INT32_C( 1480720620), INT32_C(-2111634368), INT32_C( 381139912)), simde_mm_set_epi32(INT32_C( 91445288), INT32_C( 1480720620), INT32_C(-2111634368), INT32_C( 381139912)), simde_mm_set_epi32(INT32_C( 630570300), INT32_C( 1480720620), INT32_C(-2111634368), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C(-2140660333), INT32_C( -873371869), INT32_C(-1549425333), INT32_C( 1861722511)), simde_mm_set_epi32(INT32_C(-1410997069), INT32_C( -873371869), INT32_C(-1549425333), INT32_C( -149520118)), simde_mm_set_epi32(INT32_C(-2140660333), INT32_C( -873371869), INT32_C(-1549425333), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C(-1896750546), INT32_C( 577432699), INT32_C( -255384715), INT32_C( 717895922)), simde_mm_set_epi32(INT32_C( 1233088565), INT32_C(-1957258082), INT32_C( 1913240068), INT32_C(-1565227266)), simde_mm_set_epi32(INT32_C(-1896750546), INT32_C( 577432699), INT32_C( -255384715), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 1577990746), INT32_C( 1577580126), INT32_C( -298703690), INT32_C( 397262282)), simde_mm_set_epi32(INT32_C(-2105366944), INT32_C( 1950034916), INT32_C( -298703690), INT32_C( 1632078127)), simde_mm_set_epi32(INT32_C( 1577990746), INT32_C( 1577580126), INT32_C( -298703690), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C(-1398991860), INT32_C( -868057052), INT32_C( 897239402), INT32_C( 478239630)), simde_mm_set_epi32(INT32_C(-1398991860), INT32_C(-1311190075), INT32_C(-1018578616), INT32_C(-1096405398)), simde_mm_set_epi32(INT32_C(-1398991860), INT32_C( -868057052), INT32_C( 897239402), INT32_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_castps_si128( simde_mm_cmpnge_ss( simde_mm_castsi128_ps(test_vec[i].a), simde_mm_castsi128_ps(test_vec[i].b))); simde_test_x86_assert_equal_i32x4(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpngt_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C(-1123406769), INT32_C( -826046923), INT32_C( 1391137025), INT32_C( 225408485)), simde_mm_set_epi32(INT32_C(-1123406769), INT32_C( 1170001915), INT32_C( -443741078), INT32_C( 225408485)), simde_mm_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 919359233), INT32_C( 121568041), INT32_C( 1196900513), INT32_C(-2068491895)), simde_mm_set_epi32(INT32_C( -229431781), INT32_C( 121568041), INT32_C( -495278912), INT32_C(-2068491895)), simde_mm_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C(-1546227894), INT32_C( -80396000), INT32_C( 1691185200), INT32_C(-1185796610)), simde_mm_set_epi32(INT32_C( 930129185), INT32_C( 460197193), INT32_C( 1461329142), INT32_C( -46416365)), simde_mm_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 801509529), INT32_C( 166196414), INT32_C( 1533458041), INT32_C(-2125985376)), simde_mm_set_epi32(INT32_C( 801509529), INT32_C( 166196414), INT32_C( 170298010), INT32_C( -614823370)), simde_mm_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( -383558878), INT32_C( 1739426797), INT32_C(-1413157826), INT32_C(-1057748324)), simde_mm_set_epi32(INT32_C( -383558878), INT32_C( 561929576), INT32_C(-1413157826), INT32_C(-1057748324)), simde_mm_set_epi32(INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 303550578), INT32_C( 722713191), INT32_C(-1131128469), INT32_C( -492258520)), simde_mm_set_epi32(INT32_C( -260529955), INT32_C( 722713191), INT32_C(-1131128469), INT32_C( -492258520)), simde_mm_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( -760270686), INT32_C(-1716923162), INT32_C( 225019073), INT32_C(-2108522398)), simde_mm_set_epi32(INT32_C( -103603972), INT32_C( 226706263), INT32_C( 225019073), INT32_C( -506819004)), simde_mm_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 44043689), INT32_C( 1891018514), INT32_C( 705898855), INT32_C( -899283598)), simde_mm_set_epi32(INT32_C( 44043689), INT32_C( 1891018514), INT32_C( -639205394), INT32_C( -899283598)), simde_mm_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_castps_si128( simde_mm_cmpngt_ps( simde_mm_castsi128_ps(test_vec[i].a), simde_mm_castsi128_ps(test_vec[i].b))); simde_test_x86_assert_equal_i32x4(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpngt_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 612778091), INT32_C( 1832771785), INT32_C( -622594968), INT32_C( 910095126)), simde_mm_set_epi32(INT32_C( 458628115), INT32_C( 689947875), INT32_C( -348460368), INT32_C( 227312121)), simde_mm_set_epi32(INT32_C( 612778091), INT32_C( 1832771785), INT32_C( -622594968), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C(-1078825247), INT32_C(-1403039755), INT32_C( 1610931740), INT32_C( 175799384)), simde_mm_set_epi32(INT32_C( -601333689), INT32_C( 307533582), INT32_C( 1610931740), INT32_C( 175799384)), simde_mm_set_epi32(INT32_C(-1078825247), INT32_C(-1403039755), INT32_C( 1610931740), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( -473381137), INT32_C(-1853677516), INT32_C( 4434085), INT32_C( 177860717)), simde_mm_set_epi32(INT32_C( -473381137), INT32_C(-1429612088), INT32_C( 1023613916), INT32_C(-1567281359)), simde_mm_set_epi32(INT32_C( -473381137), INT32_C(-1853677516), INT32_C( 4434085), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C(-1461690128), INT32_C( -186641995), INT32_C(-1876360628), INT32_C( 720924768)), simde_mm_set_epi32(INT32_C( 1256087859), INT32_C( -186641995), INT32_C(-1876360628), INT32_C( 720924768)), simde_mm_set_epi32(INT32_C(-1461690128), INT32_C( -186641995), INT32_C(-1876360628), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 2056917783), INT32_C( -907049588), INT32_C( 1171345475), INT32_C(-1386896922)), simde_mm_set_epi32(INT32_C( 2056917783), INT32_C( -907049588), INT32_C( 2132166674), INT32_C(-1386896922)), simde_mm_set_epi32(INT32_C( 2056917783), INT32_C( -907049588), INT32_C( 1171345475), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 333966351), INT32_C( 1310053758), INT32_C( 1466732857), INT32_C(-1121891710)), simde_mm_set_epi32(INT32_C( 333966351), INT32_C( 1310053758), INT32_C( 1572556388), INT32_C(-1121891710)), simde_mm_set_epi32(INT32_C( 333966351), INT32_C( 1310053758), INT32_C( 1466732857), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 860690730), INT32_C(-1667003350), INT32_C(-1337189707), INT32_C( 1913729946)), simde_mm_set_epi32(INT32_C( 860690730), INT32_C(-1667003350), INT32_C( -470676019), INT32_C( -895735920)), simde_mm_set_epi32(INT32_C( 860690730), INT32_C(-1667003350), INT32_C(-1337189707), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 1440878372), INT32_C( 1637362808), INT32_C( -984574857), INT32_C( 760105912)), simde_mm_set_epi32(INT32_C( 511504303), INT32_C( 1637362808), INT32_C( -984574857), INT32_C( 760105912)), simde_mm_set_epi32(INT32_C( 1440878372), INT32_C( 1637362808), INT32_C( -984574857), INT32_C( -1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_castps_si128( simde_mm_cmpngt_ss( simde_mm_castsi128_ps(test_vec[i].a), simde_mm_castsi128_ps(test_vec[i].b))); simde_test_x86_assert_equal_i32x4(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpnle_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 2084445991), INT32_C(-1270940367), INT32_C( -137999497), INT32_C( 1274474466)), simde_mm_set_epi32(INT32_C( 2084445991), INT32_C( 1003404174), INT32_C( -137999497), INT32_C( 1274474466)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C(-1073857581), INT32_C( 297220883), INT32_C(-1118323043), INT32_C(-2009485771)), simde_mm_set_epi32(INT32_C(-1073857581), INT32_C(-1478175756), INT32_C(-1118323043), INT32_C(-2009485771)), simde_mm_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C(-1368751028), INT32_C( 1620987038), INT32_C( 1536890594), INT32_C( -889040581)), simde_mm_set_epi32(INT32_C( 2024237287), INT32_C( 1620987038), INT32_C( 809803938), INT32_C( -761648004)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 965653593), INT32_C( 1368656732), INT32_C(-2063999421), INT32_C( -170225342)), simde_mm_set_epi32(INT32_C( 965653593), INT32_C( 1368656732), INT32_C(-1629280761), INT32_C( -170225342)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C(-1924982985), INT32_C( 388770010), INT32_C( 1672412411), INT32_C(-1198604482)), simde_mm_set_epi32(INT32_C( -502648824), INT32_C( 1358695998), INT32_C(-1024782381), INT32_C(-1198604482)), simde_mm_set_epi32(INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 1453860753), INT32_C(-1007750264), INT32_C( 2035232495), INT32_C(-1933616133)), simde_mm_set_epi32(INT32_C(-1419270643), INT32_C(-2082779635), INT32_C(-1193599699), INT32_C( 1986918969)), simde_mm_set_epi32(INT32_C( -1), INT32_C( 0), INT32_C( -1), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 731183446), INT32_C( -811315535), INT32_C( -739483356), INT32_C( 137475709)), simde_mm_set_epi32(INT32_C(-1602128714), INT32_C( -811315535), INT32_C( -739483356), INT32_C( 137475709)), simde_mm_set_epi32(INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( -278168455), INT32_C(-1440405844), INT32_C( -351927257), INT32_C( -925114922)), simde_mm_set_epi32(INT32_C( 97237822), INT32_C(-1440405844), INT32_C( 1100655169), INT32_C( 181456962)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_castps_si128( simde_mm_cmpnle_ps( simde_mm_castsi128_ps(test_vec[i].a), simde_mm_castsi128_ps(test_vec[i].b))); simde_test_x86_assert_equal_i32x4(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpnle_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 586573716), INT32_C( 2092571439), INT32_C(-1569791359), INT32_C( -771884114)), simde_mm_set_epi32(INT32_C(-1772197058), INT32_C( 2000867488), INT32_C( 1628528221), INT32_C( -771884114)), simde_mm_set_epi32(INT32_C( 586573716), INT32_C( 2092571439), INT32_C(-1569791359), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 1418270756), INT32_C(-1023444334), INT32_C( 1630284961), INT32_C( 1200999231)), simde_mm_set_epi32(INT32_C(-1887216511), INT32_C( -3136960), INT32_C( 1630284961), INT32_C( 1952383628)), simde_mm_set_epi32(INT32_C( 1418270756), INT32_C(-1023444334), INT32_C( 1630284961), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C(-1506287819), INT32_C( 667997188), INT32_C( -179453871), INT32_C( 1445563364)), simde_mm_set_epi32(INT32_C(-1506287819), INT32_C( 832794192), INT32_C( -179453871), INT32_C( 1445563364)), simde_mm_set_epi32(INT32_C(-1506287819), INT32_C( 667997188), INT32_C( -179453871), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( -921490700), INT32_C( 767377840), INT32_C( 1198841751), INT32_C( 335888297)), simde_mm_set_epi32(INT32_C( -729465888), INT32_C( 767377840), INT32_C(-1211611524), INT32_C( 962373371)), simde_mm_set_epi32(INT32_C( -921490700), INT32_C( 767377840), INT32_C( 1198841751), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 279427637), INT32_C(-1356366148), INT32_C( -778146350), INT32_C( -407135484)), simde_mm_set_epi32(INT32_C( -396894639), INT32_C(-1356366148), INT32_C( -778146350), INT32_C( 1071745245)), simde_mm_set_epi32(INT32_C( 279427637), INT32_C(-1356366148), INT32_C( -778146350), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 646913785), INT32_C(-1754178229), INT32_C( 419585954), INT32_C( -90816511)), simde_mm_set_epi32(INT32_C( 733359934), INT32_C(-1754178229), INT32_C( 1438707211), INT32_C( -90816511)), simde_mm_set_epi32(INT32_C( 646913785), INT32_C(-1754178229), INT32_C( 419585954), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( -929496881), INT32_C( -605733719), INT32_C( 626875427), INT32_C( 862216293)), simde_mm_set_epi32(INT32_C( 769282939), INT32_C( -605733719), INT32_C( 626875427), INT32_C( 862216293)), simde_mm_set_epi32(INT32_C( -929496881), INT32_C( -605733719), INT32_C( 626875427), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 1307976927), INT32_C( 1620554754), INT32_C( 1085448799), INT32_C( 1857805228)), simde_mm_set_epi32(INT32_C(-1751946539), INT32_C( 1620554754), INT32_C( -209862470), INT32_C(-1671218193)), simde_mm_set_epi32(INT32_C( 1307976927), INT32_C( 1620554754), INT32_C( 1085448799), INT32_C( -1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_castps_si128( simde_mm_cmpnle_ss( simde_mm_castsi128_ps(test_vec[i].a), simde_mm_castsi128_ps(test_vec[i].b))); simde_test_x86_assert_equal_i32x4(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpnlt_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( -696394921), INT32_C( -747986358), INT32_C( -647555009), INT32_C( 1026098152)), simde_mm_set_epi32(INT32_C( -633562196), INT32_C( -747986358), INT32_C( -647555009), INT32_C( 1026098152)), simde_mm_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( -452783834), INT32_C( -762534148), INT32_C( 527526057), INT32_C( 875267484)), simde_mm_set_epi32(INT32_C( 2135403130), INT32_C( -762534148), INT32_C( 527526057), INT32_C( -792882965)), simde_mm_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C(-1833897906), INT32_C(-1370978496), INT32_C(-1396909811), INT32_C( 612871018)), simde_mm_set_epi32(INT32_C(-1833897906), INT32_C(-1370978496), INT32_C( 1459624563), INT32_C( 612871018)), simde_mm_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 1770608184), INT32_C(-1440913128), INT32_C( -375433996), INT32_C(-1167135078)), simde_mm_set_epi32(INT32_C( 1685329021), INT32_C( 1594341486), INT32_C( -749207466), INT32_C(-1647221832)), simde_mm_set_epi32(INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C(-2031323732), INT32_C( -384315017), INT32_C(-2075546059), INT32_C(-1034483494)), simde_mm_set_epi32(INT32_C( 261868471), INT32_C( -384315017), INT32_C(-2075546059), INT32_C(-1034483494)), simde_mm_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 749959102), INT32_C( 1174344579), INT32_C( 1538886158), INT32_C( 1204052778)), simde_mm_set_epi32(INT32_C( 915491351), INT32_C( 1174344579), INT32_C( 1538886158), INT32_C( 1204052778)), simde_mm_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 702745450), INT32_C(-1148209866), INT32_C( 305771684), INT32_C( 10161619)), simde_mm_set_epi32(INT32_C( -437200102), INT32_C(-1148209866), INT32_C( 703243165), INT32_C( 10161619)), simde_mm_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C(-1984623370), INT32_C( 1198216493), INT32_C( 1096311405), INT32_C( -697370337)), simde_mm_set_epi32(INT32_C(-1984623370), INT32_C( 1198216493), INT32_C( 1096311405), INT32_C( -697370337)), simde_mm_set_epi32(INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( -1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_castps_si128( simde_mm_cmpnlt_ps( simde_mm_castsi128_ps(test_vec[i].a), simde_mm_castsi128_ps(test_vec[i].b))); simde_test_x86_assert_equal_i32x4(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpnlt_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 1412010535), INT32_C(-1239400933), INT32_C( 594063422), INT32_C( 1188961914)), simde_mm_set_epi32(INT32_C( 1663850825), INT32_C(-1239400933), INT32_C( 594063422), INT32_C( 1188961914)), simde_mm_set_epi32(INT32_C( 1412010535), INT32_C(-1239400933), INT32_C( 594063422), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( -316965064), INT32_C(-1377591008), INT32_C(-1622896924), INT32_C(-1267585285)), simde_mm_set_epi32(INT32_C( -662180320), INT32_C(-1377591008), INT32_C( -505498051), INT32_C( -824079960)), simde_mm_set_epi32(INT32_C( -316965064), INT32_C(-1377591008), INT32_C(-1622896924), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 952041483), INT32_C( -759750393), INT32_C( 571126339), INT32_C(-1097881178)), simde_mm_set_epi32(INT32_C( 952041483), INT32_C( -759750393), INT32_C( 571126339), INT32_C( 206622744)), simde_mm_set_epi32(INT32_C( 952041483), INT32_C( -759750393), INT32_C( 571126339), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( -778057296), INT32_C(-1150074331), INT32_C( 485854915), INT32_C( -514788129)), simde_mm_set_epi32(INT32_C( 235219858), INT32_C(-1150074331), INT32_C( 485854915), INT32_C( -514788129)), simde_mm_set_epi32(INT32_C( -778057296), INT32_C(-1150074331), INT32_C( 485854915), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 1021281951), INT32_C( 386387638), INT32_C( 435001339), INT32_C( 317446933)), simde_mm_set_epi32(INT32_C(-1890218411), INT32_C(-1750380903), INT32_C( 435001339), INT32_C( 317446933)), simde_mm_set_epi32(INT32_C( 1021281951), INT32_C( 386387638), INT32_C( 435001339), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( -137019013), INT32_C( 1227598678), INT32_C( 1179331321), INT32_C(-2004841764)), simde_mm_set_epi32(INT32_C( 1473692950), INT32_C( 1227598678), INT32_C( 1179331321), INT32_C(-2004841764)), simde_mm_set_epi32(INT32_C( -137019013), INT32_C( 1227598678), INT32_C( 1179331321), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( 196197184), INT32_C(-1956925508), INT32_C( 1585557819), INT32_C( 132702049)), simde_mm_set_epi32(INT32_C(-1510035164), INT32_C(-1956925508), INT32_C(-2054438272), INT32_C( 132702049)), simde_mm_set_epi32(INT32_C( 196197184), INT32_C(-1956925508), INT32_C( 1585557819), INT32_C( -1)) }, { simde_mm_set_epi32(INT32_C( -985194720), INT32_C( 1720892194), INT32_C( 1096426703), INT32_C(-1009397670)), simde_mm_set_epi32(INT32_C( -874151390), INT32_C( -332830918), INT32_C( 1096426703), INT32_C( 1294469295)), simde_mm_set_epi32(INT32_C( -985194720), INT32_C( 1720892194), INT32_C( 1096426703), INT32_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_castps_si128( simde_mm_cmpnlt_ss( simde_mm_castsi128_ps(test_vec[i].a), simde_mm_castsi128_ps(test_vec[i].b))); simde_test_x86_assert_equal_i32x4(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpord_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 r; } test_vec[] = { { simde_mm_set_ps(1.0f, NAN, NAN, 2.0f), simde_mm_set_ps( NAN, 3.0f, NAN, 4.0f), simde_mm_move_ss(simde_mm_setzero_ps(), simde_x_mm_setone_ps()) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m128 r = simde_mm_cmpord_ps(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_f32x4(test_vec[i].r, r, 1); } return 0; } static int test_simde_mm_cmpord_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 458.45), SIMDE_FLOAT32_C( -42.89), SIMDE_FLOAT32_C( 680.45), SIMDE_FLOAT32_C( -622.11)), simde_mm_set_ps(SIMDE_FLOAT32_C( 343.10), SIMDE_FLOAT32_C( -612.18), SIMDE_FLOAT32_C( -268.76), nanf("")), simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( 458.45), SIMDE_FLOAT32_C( -42.89), SIMDE_FLOAT32_C( 680.45), SIMDE_FLOAT32_C( -622.11)), simde_mm_setzero_ps()) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -452.28), SIMDE_FLOAT32_C( -680.79), SIMDE_FLOAT32_C( 958.41), SIMDE_FLOAT32_C( 629.53)), simde_mm_set_ps(SIMDE_FLOAT32_C( -707.65), SIMDE_FLOAT32_C( 308.61), SIMDE_FLOAT32_C( 360.51), nanf("")), simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -452.28), SIMDE_FLOAT32_C( -680.79), SIMDE_FLOAT32_C( 958.41), SIMDE_FLOAT32_C( 629.53)), simde_mm_setzero_ps()) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -606.93), SIMDE_FLOAT32_C( -332.63), SIMDE_FLOAT32_C( -293.59), SIMDE_FLOAT32_C( 771.09)), simde_mm_set_ps(SIMDE_FLOAT32_C( 725.97), nanf(""), SIMDE_FLOAT32_C( -698.52), SIMDE_FLOAT32_C( -855.59)), simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -606.93), SIMDE_FLOAT32_C( -332.63), SIMDE_FLOAT32_C( -293.59), SIMDE_FLOAT32_C( 771.09)), simde_x_mm_setone_ps()) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -910.47), SIMDE_FLOAT32_C( -52.21), SIMDE_FLOAT32_C( 524.20), SIMDE_FLOAT32_C( 797.22)), simde_mm_set_ps(SIMDE_FLOAT32_C( 507.03), SIMDE_FLOAT32_C( 414.86), nanf(""), nanf("")), simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -910.47), SIMDE_FLOAT32_C( -52.21), SIMDE_FLOAT32_C( 524.20), SIMDE_FLOAT32_C( 797.22)), simde_mm_setzero_ps()) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -835.84), SIMDE_FLOAT32_C( -92.24), SIMDE_FLOAT32_C( 876.06), SIMDE_FLOAT32_C( 325.47)), simde_mm_set_ps( nanf(""), nanf(""), nanf(""), SIMDE_FLOAT32_C( 830.79)), simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -835.84), SIMDE_FLOAT32_C( -92.24), SIMDE_FLOAT32_C( 876.06), SIMDE_FLOAT32_C( 325.47)), simde_x_mm_setone_ps()) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 486.04), SIMDE_FLOAT32_C( -785.31), SIMDE_FLOAT32_C( -528.45), SIMDE_FLOAT32_C( 809.84)), simde_mm_set_ps( nanf(""), SIMDE_FLOAT32_C( -874.62), nanf(""), SIMDE_FLOAT32_C( -620.27)), simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( 486.04), SIMDE_FLOAT32_C( -785.31), SIMDE_FLOAT32_C( -528.45), SIMDE_FLOAT32_C( 809.84)), simde_x_mm_setone_ps()) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -705.98), SIMDE_FLOAT32_C( -135.25), SIMDE_FLOAT32_C( -941.10), SIMDE_FLOAT32_C( -105.40)), simde_mm_set_ps( nanf(""), nanf(""), nanf(""), SIMDE_FLOAT32_C( 424.16)), simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -705.98), SIMDE_FLOAT32_C( -135.25), SIMDE_FLOAT32_C( -941.10), SIMDE_FLOAT32_C( -105.40)), simde_x_mm_setone_ps()) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 692.24), SIMDE_FLOAT32_C( -983.30), SIMDE_FLOAT32_C( 728.96), SIMDE_FLOAT32_C( 658.52)), simde_mm_set_ps(SIMDE_FLOAT32_C( -329.64), SIMDE_FLOAT32_C( 60.61), nanf(""), SIMDE_FLOAT32_C( 824.02)), simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( 692.24), SIMDE_FLOAT32_C( -983.30), SIMDE_FLOAT32_C( 728.96), SIMDE_FLOAT32_C( 658.52)), simde_x_mm_setone_ps()) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_cmpord_ss(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_f32x4(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_cmpunord_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 r; } test_vec[] = { { simde_mm_set_ps(SIMDE_FLOAT32_C(1.0), SIMDE_FLOAT32_C(2.0), SIMDE_FLOAT32_C(3.0), SIMDE_FLOAT32_C(4.0)), simde_mm_set_ps(SIMDE_FLOAT32_C(1.0), SIMDE_FLOAT32_C(2.0), SIMDE_FLOAT32_C(3.0), SIMDE_FLOAT32_C(4.0)), simde_mm_setzero_ps() }, { simde_mm_set_ps( NAN, NAN, NAN, NAN), simde_mm_set_ps( NAN, NAN, NAN, NAN), simde_x_mm_setone_ps() }, { simde_mm_set_ps(SIMDE_FLOAT32_C(1.0), SIMDE_FLOAT32_C(2.0), SIMDE_FLOAT32_C(3.0), NAN), simde_mm_set_ps(SIMDE_FLOAT32_C(1.0), SIMDE_FLOAT32_C(2.0), SIMDE_FLOAT32_C(3.0), SIMDE_FLOAT32_C(4.0)), simde_mm_move_ss(simde_mm_setzero_ps(), simde_x_mm_setone_ps()) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m128 r = simde_mm_cmpunord_ps(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_f32x4(test_vec[i].r, r, 1); } return 0; } static int test_simde_mm_comieq_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; int r; } test_vec[8] = { { simde_mm_set_ps(0.4193f, 0.8439f, 0.8100f, 0.2669f), simde_mm_set_ps(0.4193f, 0.8439f, 0.8100f, 0.2669f), 1 }, { simde_mm_set_ps(0.9426f, 0.1679f, 0.2845f, 0.1698f), simde_mm_set_ps(0.9426f, 0.6931f, 0.2845f, 0.1698f), 1 }, { simde_mm_set_ps(0.0547f, 0.2368f, 0.3365f, 0.9146f), simde_mm_set_ps(0.0547f, 0.5935f, 0.3365f, 0.5014f), 0 }, { simde_mm_set_ps(0.7179f, 0.8607f, 0.4372f, 0.6140f), simde_mm_set_ps(0.7179f, 0.7239f, 0.4372f, 0.6140f), 1 }, { simde_mm_set_ps(0.4474f, 0.6848f, 0.4305f, 0.8738f), simde_mm_set_ps(0.6059f, 0.8463f, 0.4305f, 0.1517f), 0 }, { simde_mm_set_ps(0.4537f, 0.7254f, 0.9987f, 0.9115f), simde_mm_set_ps(0.1771f, 0.2982f, 0.9987f, 0.9003f), 0 }, { simde_mm_set_ps(0.2162f, 0.6303f, 0.0602f, 0.9986f), simde_mm_set_ps(0.2162f, 0.5872f, 0.0602f, 0.2491f), 0 }, { simde_mm_set_ps(0.4836f, 0.9929f, 0.8942f, 0.2367f), simde_mm_set_ps(0.4836f, 0.9929f, 0.4202f, 0.2367f), 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { int r = simde_mm_comieq_ss(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_comige_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; int r; } test_vec[8] = { { simde_mm_set_ps(0.1174f, 0.9995f, 0.7362f, 0.9966f), simde_mm_set_ps(0.1174f, 0.5850f, 0.0225f, 0.0035f), 1 }, { simde_mm_set_ps(0.6820f, 0.9892f, 0.6235f, 0.1819f), simde_mm_set_ps(0.0898f, 0.9892f, 0.6235f, 0.3305f), 0 }, { simde_mm_set_ps(0.7152f, 0.9286f, 0.9635f, 0.8823f), simde_mm_set_ps(0.3403f, 0.4177f, 0.9635f, 0.8737f), 1 }, { simde_mm_set_ps(0.5619f, 0.6892f, 0.2137f, 0.5336f), simde_mm_set_ps(0.1340f, 0.0152f, 0.9280f, 0.5336f), 1 }, { simde_mm_set_ps(0.5476f, 0.8606f, 0.2177f, 0.5284f), simde_mm_set_ps(0.5476f, 0.6253f, 0.1285f, 0.7135f), 0 }, { simde_mm_set_ps(0.6649f, 0.2053f, 0.5053f, 0.0378f), simde_mm_set_ps(0.0308f, 0.2053f, 0.5053f, 0.8789f), 0 }, { simde_mm_set_ps(0.3714f, 0.8736f, 0.8711f, 0.9491f), simde_mm_set_ps(0.8296f, 0.2212f, 0.5986f, 0.9491f), 1 }, { simde_mm_set_ps(0.8791f, 0.5862f, 0.4977f, 0.0888f), simde_mm_set_ps(0.8669f, 0.7545f, 0.4977f, 0.0888f), 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { int r = simde_mm_comige_ss(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_comigt_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; int r; } test_vec[8] = { { simde_mm_set_ps(0.1174f, 0.9995f, 0.7362f, 0.9966f), simde_mm_set_ps(0.1174f, 0.5850f, 0.0225f, 0.0035f), 1 }, { simde_mm_set_ps(0.6820f, 0.9892f, 0.6235f, 0.1819f), simde_mm_set_ps(0.0898f, 0.9892f, 0.6235f, 0.3305f), 0 }, { simde_mm_set_ps(0.7152f, 0.9286f, 0.9635f, 0.8823f), simde_mm_set_ps(0.3403f, 0.4177f, 0.9635f, 0.8737f), 1 }, { simde_mm_set_ps(0.5619f, 0.6892f, 0.2137f, 0.5336f), simde_mm_set_ps(0.1340f, 0.0152f, 0.9280f, 0.5336f), 0 }, { simde_mm_set_ps(0.5476f, 0.8606f, 0.2177f, 0.5284f), simde_mm_set_ps(0.5476f, 0.6253f, 0.1285f, 0.7135f), 0 }, { simde_mm_set_ps(0.6649f, 0.2053f, 0.5053f, 0.0378f), simde_mm_set_ps(0.0308f, 0.2053f, 0.5053f, 0.8789f), 0 }, { simde_mm_set_ps(0.3714f, 0.8736f, 0.8711f, 0.9491f), simde_mm_set_ps(0.8296f, 0.2212f, 0.5986f, 0.9491f), 0 }, { simde_mm_set_ps(0.8791f, 0.5862f, 0.4977f, 0.0888f), simde_mm_set_ps(0.8669f, 0.7545f, 0.4977f, 0.0888f), 0 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { int r = simde_mm_comigt_ss(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_comile_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; int r; } test_vec[8] = { { simde_mm_set_ps(0.1174f, 0.9995f, 0.7362f, 0.9966f), simde_mm_set_ps(0.1174f, 0.5850f, 0.0225f, 0.0035f), 0 }, { simde_mm_set_ps(0.6820f, 0.9892f, 0.6235f, 0.1819f), simde_mm_set_ps(0.0898f, 0.9892f, 0.6235f, 0.3305f), 1 }, { simde_mm_set_ps(0.7152f, 0.9286f, 0.9635f, 0.8823f), simde_mm_set_ps(0.3403f, 0.4177f, 0.9635f, 0.8737f), 0 }, { simde_mm_set_ps(0.5619f, 0.6892f, 0.2137f, 0.5336f), simde_mm_set_ps(0.1340f, 0.0152f, 0.9280f, 0.5336f), 1 }, { simde_mm_set_ps(0.5476f, 0.8606f, 0.2177f, 0.5284f), simde_mm_set_ps(0.5476f, 0.6253f, 0.1285f, 0.7135f), 1 }, { simde_mm_set_ps(0.6649f, 0.2053f, 0.5053f, 0.0378f), simde_mm_set_ps(0.0308f, 0.2053f, 0.5053f, 0.8789f), 1 }, { simde_mm_set_ps(0.3714f, 0.8736f, 0.8711f, 0.9491f), simde_mm_set_ps(0.8296f, 0.2212f, 0.5986f, 0.9491f), 1 }, { simde_mm_set_ps(0.8791f, 0.5862f, 0.4977f, 0.0888f), simde_mm_set_ps(0.8669f, 0.7545f, 0.4977f, 0.0888f), 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { int r = simde_mm_comile_ss(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_comilt_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; int r; } test_vec[8] = { { simde_mm_set_ps(0.1174f, 0.9995f, 0.7362f, 0.9966f), simde_mm_set_ps(0.1174f, 0.5850f, 0.0225f, 0.0035f), 0 }, { simde_mm_set_ps(0.6820f, 0.9892f, 0.6235f, 0.1819f), simde_mm_set_ps(0.0898f, 0.9892f, 0.6235f, 0.3305f), 1 }, { simde_mm_set_ps(0.7152f, 0.9286f, 0.9635f, 0.8823f), simde_mm_set_ps(0.3403f, 0.4177f, 0.9635f, 0.8737f), 0 }, { simde_mm_set_ps(0.5619f, 0.6892f, 0.2137f, 0.5336f), simde_mm_set_ps(0.1340f, 0.0152f, 0.9280f, 0.5336f), 0 }, { simde_mm_set_ps(0.5476f, 0.8606f, 0.2177f, 0.5284f), simde_mm_set_ps(0.5476f, 0.6253f, 0.1285f, 0.7135f), 1 }, { simde_mm_set_ps(0.6649f, 0.2053f, 0.5053f, 0.0378f), simde_mm_set_ps(0.0308f, 0.2053f, 0.5053f, 0.8789f), 1 }, { simde_mm_set_ps(0.3714f, 0.8736f, 0.8711f, 0.9491f), simde_mm_set_ps(0.8296f, 0.2212f, 0.5986f, 0.9491f), 0 }, { simde_mm_set_ps(0.8791f, 0.5862f, 0.4977f, 0.0888f), simde_mm_set_ps(0.8669f, 0.7545f, 0.4977f, 0.0888f), 0 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { int r = simde_mm_comilt_ss(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_comineq_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; int r; } test_vec[8] = { { simde_mm_set_ps(0.1174f, 0.9995f, 0.7362f, 0.9966f), simde_mm_set_ps(0.1174f, 0.5850f, 0.0225f, 0.0035f), 1 }, { simde_mm_set_ps(0.6820f, 0.9892f, 0.6235f, 0.1819f), simde_mm_set_ps(0.0898f, 0.9892f, 0.6235f, 0.3305f), 1 }, { simde_mm_set_ps(0.7152f, 0.9286f, 0.9635f, 0.8823f), simde_mm_set_ps(0.3403f, 0.4177f, 0.9635f, 0.8737f), 1 }, { simde_mm_set_ps(0.5619f, 0.6892f, 0.2137f, 0.5336f), simde_mm_set_ps(0.1340f, 0.0152f, 0.9280f, 0.5336f), 0 }, { simde_mm_set_ps(0.5476f, 0.8606f, 0.2177f, 0.5284f), simde_mm_set_ps(0.5476f, 0.6253f, 0.1285f, 0.7135f), 1 }, { simde_mm_set_ps(0.6649f, 0.2053f, 0.5053f, 0.0378f), simde_mm_set_ps(0.0308f, 0.2053f, 0.5053f, 0.8789f), 1 }, { simde_mm_set_ps(0.3714f, 0.8736f, 0.8711f, 0.9491f), simde_mm_set_ps(0.8296f, 0.2212f, 0.5986f, 0.9491f), 0 }, { simde_mm_set_ps(0.8791f, 0.5862f, 0.4977f, 0.0888f), simde_mm_set_ps(0.8669f, 0.7545f, 0.4977f, 0.0888f), 0 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { int r = simde_mm_comineq_ss(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpunord_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -653.22), SIMDE_FLOAT32_C( 230.72), SIMDE_FLOAT32_C( -26.51), SIMDE_FLOAT32_C( -933.90)), simde_mm_set_ps(SIMDE_FLOAT32_C( -424.17), SIMDE_FLOAT32_C( -842.33), SIMDE_FLOAT32_C( -110.12), SIMDE_FLOAT32_C( 206.71)), simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -653.22), SIMDE_FLOAT32_C( 230.72), SIMDE_FLOAT32_C( -26.51), SIMDE_FLOAT32_C( -933.90)), simde_mm_setzero_ps()) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -108.53), SIMDE_FLOAT32_C( 720.30), SIMDE_FLOAT32_C( 100.87), SIMDE_FLOAT32_C( 112.49)), simde_mm_set_ps(SIMDE_FLOAT32_C( 506.84), NAN , SIMDE_FLOAT32_C( 498.67), SIMDE_FLOAT32_C( -576.77)), simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -108.53), SIMDE_FLOAT32_C( 720.30), SIMDE_FLOAT32_C( 100.87), SIMDE_FLOAT32_C( 112.49)), simde_mm_setzero_ps()) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -877.11), SIMDE_FLOAT32_C( 574.64), SIMDE_FLOAT32_C( 434.50), SIMDE_FLOAT32_C( -728.82)), simde_mm_set_ps(SIMDE_FLOAT32_C( -677.16), NAN , SIMDE_FLOAT32_C( 284.18), SIMDE_FLOAT32_C( -923.70)), simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( -877.11), SIMDE_FLOAT32_C( 574.64), SIMDE_FLOAT32_C( 434.50), SIMDE_FLOAT32_C( -728.82)), simde_mm_setzero_ps()) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 876.32), SIMDE_FLOAT32_C( -475.29), SIMDE_FLOAT32_C( -377.05), SIMDE_FLOAT32_C( 152.30)), simde_mm_set_ps(SIMDE_FLOAT32_C( 737.32), SIMDE_FLOAT32_C( -937.49), NAN , NAN ), simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( 876.32), SIMDE_FLOAT32_C( -475.29), SIMDE_FLOAT32_C( -377.05), SIMDE_FLOAT32_C( 152.30)), simde_x_mm_setone_ps()) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 752.03), SIMDE_FLOAT32_C( -30.87), SIMDE_FLOAT32_C( -483.32), SIMDE_FLOAT32_C( 405.97)), simde_mm_set_ps(SIMDE_FLOAT32_C( 728.44), NAN , NAN , SIMDE_FLOAT32_C( 923.45)), simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( 752.03), SIMDE_FLOAT32_C( -30.87), SIMDE_FLOAT32_C( -483.32), SIMDE_FLOAT32_C( 405.97)), simde_mm_setzero_ps()) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 354.09), SIMDE_FLOAT32_C( -692.72), SIMDE_FLOAT32_C( 1.69), SIMDE_FLOAT32_C( -654.30)), simde_mm_set_ps(SIMDE_FLOAT32_C( 593.37), SIMDE_FLOAT32_C( 396.32), SIMDE_FLOAT32_C( -487.41), SIMDE_FLOAT32_C( -895.17)), simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( 354.09), SIMDE_FLOAT32_C( -692.72), SIMDE_FLOAT32_C( 1.69), SIMDE_FLOAT32_C( -654.30)), simde_mm_setzero_ps()) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 964.75), SIMDE_FLOAT32_C( -220.47), SIMDE_FLOAT32_C( -310.10), SIMDE_FLOAT32_C( 557.03)), simde_mm_set_ps(SIMDE_FLOAT32_C( -517.95), NAN , SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 260.96)), simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( 964.75), SIMDE_FLOAT32_C( -220.47), SIMDE_FLOAT32_C( -310.10), SIMDE_FLOAT32_C( 557.03)), simde_mm_setzero_ps()) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 73.48), SIMDE_FLOAT32_C( 303.75), SIMDE_FLOAT32_C( 206.16), SIMDE_FLOAT32_C( 533.85)), simde_mm_set_ps(SIMDE_FLOAT32_C( -948.94), SIMDE_FLOAT32_C( -599.14), SIMDE_FLOAT32_C( -836.27), SIMDE_FLOAT32_C( -409.07)), simde_mm_move_ss(simde_mm_set_ps(SIMDE_FLOAT32_C( 73.48), SIMDE_FLOAT32_C( 303.75), SIMDE_FLOAT32_C( 206.16), SIMDE_FLOAT32_C( 533.85)), simde_mm_setzero_ps()) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m128 r = simde_mm_cmpunord_ss(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_f32x4(test_vec[i].r, r, 1); } return 0; } static int test_simde_x_mm_copysign_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 b[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -616.55), SIMDE_FLOAT32_C( -399.29), SIMDE_FLOAT32_C( 528.87), SIMDE_FLOAT32_C( -307.11) }, { SIMDE_FLOAT32_C( 907.35), SIMDE_FLOAT32_C( -230.71), SIMDE_FLOAT32_C( 874.68), SIMDE_FLOAT32_C( 549.16) }, { SIMDE_FLOAT32_C( 616.55), SIMDE_FLOAT32_C( -399.29), SIMDE_FLOAT32_C( 528.87), SIMDE_FLOAT32_C( 307.11) } }, { { SIMDE_FLOAT32_C( 446.77), SIMDE_FLOAT32_C( 35.68), SIMDE_FLOAT32_C( 883.50), SIMDE_FLOAT32_C( -871.02) }, { SIMDE_FLOAT32_C( 531.89), SIMDE_FLOAT32_C( -844.86), SIMDE_FLOAT32_C( -327.10), SIMDE_FLOAT32_C( 604.14) }, { SIMDE_FLOAT32_C( 446.77), SIMDE_FLOAT32_C( -35.68), SIMDE_FLOAT32_C( -883.50), SIMDE_FLOAT32_C( 871.02) } }, { { SIMDE_FLOAT32_C( -238.71), SIMDE_FLOAT32_C( 172.16), SIMDE_FLOAT32_C( 638.91), SIMDE_FLOAT32_C( 321.67) }, { SIMDE_FLOAT32_C( -441.31), SIMDE_FLOAT32_C( -457.17), SIMDE_FLOAT32_C( -768.83), SIMDE_FLOAT32_C( -416.48) }, { SIMDE_FLOAT32_C( -238.71), SIMDE_FLOAT32_C( -172.16), SIMDE_FLOAT32_C( -638.91), SIMDE_FLOAT32_C( -321.67) } }, { { SIMDE_FLOAT32_C( -568.75), SIMDE_FLOAT32_C( -736.25), SIMDE_FLOAT32_C( 662.43), SIMDE_FLOAT32_C( -206.91) }, { SIMDE_FLOAT32_C( 583.40), SIMDE_FLOAT32_C( 854.52), SIMDE_FLOAT32_C( 190.70), SIMDE_FLOAT32_C( 966.85) }, { SIMDE_FLOAT32_C( 568.75), SIMDE_FLOAT32_C( 736.25), SIMDE_FLOAT32_C( 662.43), SIMDE_FLOAT32_C( 206.91) } }, { { SIMDE_FLOAT32_C( -544.77), SIMDE_FLOAT32_C( -280.43), SIMDE_FLOAT32_C( -340.26), SIMDE_FLOAT32_C( -637.42) }, { SIMDE_FLOAT32_C( 488.86), SIMDE_FLOAT32_C( -465.58), SIMDE_FLOAT32_C( 911.74), SIMDE_FLOAT32_C( -64.37) }, { SIMDE_FLOAT32_C( 544.77), SIMDE_FLOAT32_C( -280.43), SIMDE_FLOAT32_C( 340.26), SIMDE_FLOAT32_C( -637.42) } }, { { SIMDE_FLOAT32_C( 570.10), SIMDE_FLOAT32_C( 795.24), SIMDE_FLOAT32_C( 64.61), SIMDE_FLOAT32_C( 101.99) }, { SIMDE_FLOAT32_C( 950.38), SIMDE_FLOAT32_C( 737.51), SIMDE_FLOAT32_C( -293.87), SIMDE_FLOAT32_C( -288.33) }, { SIMDE_FLOAT32_C( 570.10), SIMDE_FLOAT32_C( 795.24), SIMDE_FLOAT32_C( -64.61), SIMDE_FLOAT32_C( -101.99) } }, { { SIMDE_FLOAT32_C( -90.33), SIMDE_FLOAT32_C( -654.96), SIMDE_FLOAT32_C( -966.67), SIMDE_FLOAT32_C( 468.36) }, { SIMDE_FLOAT32_C( -112.14), SIMDE_FLOAT32_C( -735.49), SIMDE_FLOAT32_C( -948.11), SIMDE_FLOAT32_C( 319.11) }, { SIMDE_FLOAT32_C( -90.33), SIMDE_FLOAT32_C( -654.96), SIMDE_FLOAT32_C( -966.67), SIMDE_FLOAT32_C( 468.36) } }, { { SIMDE_FLOAT32_C( -471.74), SIMDE_FLOAT32_C( 714.32), SIMDE_FLOAT32_C( -887.80), SIMDE_FLOAT32_C( -888.34) }, { SIMDE_FLOAT32_C( 568.84), SIMDE_FLOAT32_C( 302.91), SIMDE_FLOAT32_C( -921.50), SIMDE_FLOAT32_C( -975.94) }, { SIMDE_FLOAT32_C( 471.74), SIMDE_FLOAT32_C( 714.32), SIMDE_FLOAT32_C( -887.80), SIMDE_FLOAT32_C( -888.34) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 b = simde_mm_loadu_ps(test_vec[i].b); simde__m128 r = simde_x_mm_copysign_ps(a, b); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_x_mm_xorsign_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 b[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -256.75), SIMDE_FLOAT32_C( 58.01), SIMDE_FLOAT32_C( -152.81), SIMDE_FLOAT32_C( 780.53) }, { SIMDE_FLOAT32_C( -131.60), SIMDE_FLOAT32_C( -695.20), SIMDE_FLOAT32_C( -75.09), SIMDE_FLOAT32_C( -141.32) }, { SIMDE_FLOAT32_C( 256.75), SIMDE_FLOAT32_C( -58.01), SIMDE_FLOAT32_C( 152.81), SIMDE_FLOAT32_C( -780.53) } }, { { SIMDE_FLOAT32_C( 823.76), SIMDE_FLOAT32_C( -840.56), SIMDE_FLOAT32_C( -829.93), SIMDE_FLOAT32_C( -159.51) }, { SIMDE_FLOAT32_C( 664.20), SIMDE_FLOAT32_C( -222.93), SIMDE_FLOAT32_C( 30.99), SIMDE_FLOAT32_C( 36.00) }, { SIMDE_FLOAT32_C( 823.76), SIMDE_FLOAT32_C( 840.56), SIMDE_FLOAT32_C( -829.93), SIMDE_FLOAT32_C( -159.51) } }, { { SIMDE_FLOAT32_C( 637.35), SIMDE_FLOAT32_C( -689.71), SIMDE_FLOAT32_C( 543.62), SIMDE_FLOAT32_C( 362.05) }, { SIMDE_FLOAT32_C( 629.38), SIMDE_FLOAT32_C( -813.91), SIMDE_FLOAT32_C( 470.90), SIMDE_FLOAT32_C( 839.80) }, { SIMDE_FLOAT32_C( 637.35), SIMDE_FLOAT32_C( 689.71), SIMDE_FLOAT32_C( 543.62), SIMDE_FLOAT32_C( 362.05) } }, { { SIMDE_FLOAT32_C( -425.95), SIMDE_FLOAT32_C( -92.61), SIMDE_FLOAT32_C( -251.32), SIMDE_FLOAT32_C( 813.31) }, { SIMDE_FLOAT32_C( -430.82), SIMDE_FLOAT32_C( 216.76), SIMDE_FLOAT32_C( 838.12), SIMDE_FLOAT32_C( 312.43) }, { SIMDE_FLOAT32_C( 425.95), SIMDE_FLOAT32_C( -92.61), SIMDE_FLOAT32_C( -251.32), SIMDE_FLOAT32_C( 813.31) } }, { { SIMDE_FLOAT32_C( -725.23), SIMDE_FLOAT32_C( -314.69), SIMDE_FLOAT32_C( 92.96), SIMDE_FLOAT32_C( 143.17) }, { SIMDE_FLOAT32_C( -9.89), SIMDE_FLOAT32_C( -982.12), SIMDE_FLOAT32_C( -998.15), SIMDE_FLOAT32_C( -186.13) }, { SIMDE_FLOAT32_C( 725.23), SIMDE_FLOAT32_C( 314.69), SIMDE_FLOAT32_C( -92.96), SIMDE_FLOAT32_C( -143.17) } }, { { SIMDE_FLOAT32_C( -822.68), SIMDE_FLOAT32_C( -828.08), SIMDE_FLOAT32_C( 654.36), SIMDE_FLOAT32_C( 841.52) }, { SIMDE_FLOAT32_C( -51.01), SIMDE_FLOAT32_C( -314.64), SIMDE_FLOAT32_C( -122.49), SIMDE_FLOAT32_C( -413.66) }, { SIMDE_FLOAT32_C( 822.68), SIMDE_FLOAT32_C( 828.08), SIMDE_FLOAT32_C( -654.36), SIMDE_FLOAT32_C( -841.52) } }, { { SIMDE_FLOAT32_C( -4.35), SIMDE_FLOAT32_C( -578.87), SIMDE_FLOAT32_C( 948.39), SIMDE_FLOAT32_C( -374.97) }, { SIMDE_FLOAT32_C( -392.78), SIMDE_FLOAT32_C( 419.28), SIMDE_FLOAT32_C( -535.17), SIMDE_FLOAT32_C( 181.27) }, { SIMDE_FLOAT32_C( 4.35), SIMDE_FLOAT32_C( -578.87), SIMDE_FLOAT32_C( -948.39), SIMDE_FLOAT32_C( -374.97) } }, { { SIMDE_FLOAT32_C( -673.33), SIMDE_FLOAT32_C( 213.51), SIMDE_FLOAT32_C( -5.42), SIMDE_FLOAT32_C( -104.15) }, { SIMDE_FLOAT32_C( -569.73), SIMDE_FLOAT32_C( -167.30), SIMDE_FLOAT32_C( -791.72), SIMDE_FLOAT32_C( -294.96) }, { SIMDE_FLOAT32_C( 673.33), SIMDE_FLOAT32_C( -213.51), SIMDE_FLOAT32_C( 5.42), SIMDE_FLOAT32_C( 104.15) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 b = simde_mm_loadu_ps(test_vec[i].b); simde__m128 r = simde_x_mm_xorsign_ps(a, b); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_cvt_pi2ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m64 b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -480.01), SIMDE_FLOAT32_C( 184.13), SIMDE_FLOAT32_C( 826.50), SIMDE_FLOAT32_C( -461.15)), simde_mm_set_pi32(INT32_C( 136), INT32_C( -836)), simde_mm_set_ps(SIMDE_FLOAT32_C( -480.01), SIMDE_FLOAT32_C( 184.13), SIMDE_FLOAT32_C( 136.00), SIMDE_FLOAT32_C( -836.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -265.13), SIMDE_FLOAT32_C( 660.82), SIMDE_FLOAT32_C( -962.73), SIMDE_FLOAT32_C( 878.13)), simde_mm_set_pi32(INT32_C( -766), INT32_C( -811)), simde_mm_set_ps(SIMDE_FLOAT32_C( -265.13), SIMDE_FLOAT32_C( 660.82), SIMDE_FLOAT32_C( -766.00), SIMDE_FLOAT32_C( -811.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 531.34), SIMDE_FLOAT32_C( -581.54), SIMDE_FLOAT32_C( -290.28), SIMDE_FLOAT32_C( 767.75)), simde_mm_set_pi32(INT32_C( 867), INT32_C( -379)), simde_mm_set_ps(SIMDE_FLOAT32_C( 531.34), SIMDE_FLOAT32_C( -581.54), SIMDE_FLOAT32_C( 867.00), SIMDE_FLOAT32_C( -379.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 10.42), SIMDE_FLOAT32_C( -706.07), SIMDE_FLOAT32_C( -747.94), SIMDE_FLOAT32_C( 462.62)), simde_mm_set_pi32(INT32_C( 463), INT32_C( -757)), simde_mm_set_ps(SIMDE_FLOAT32_C( 10.42), SIMDE_FLOAT32_C( -706.07), SIMDE_FLOAT32_C( 463.00), SIMDE_FLOAT32_C( -757.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -542.71), SIMDE_FLOAT32_C( 432.33), SIMDE_FLOAT32_C( 560.90), SIMDE_FLOAT32_C( 894.98)), simde_mm_set_pi32(INT32_C( -614), INT32_C( 188)), simde_mm_set_ps(SIMDE_FLOAT32_C( -542.71), SIMDE_FLOAT32_C( 432.33), SIMDE_FLOAT32_C( -614.00), SIMDE_FLOAT32_C( 188.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -4.09), SIMDE_FLOAT32_C( -652.62), SIMDE_FLOAT32_C( 136.92), SIMDE_FLOAT32_C( -919.87)), simde_mm_set_pi32(INT32_C( -944), INT32_C( -693)), simde_mm_set_ps(SIMDE_FLOAT32_C( -4.09), SIMDE_FLOAT32_C( -652.62), SIMDE_FLOAT32_C( -944.00), SIMDE_FLOAT32_C( -693.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -98.24), SIMDE_FLOAT32_C( -855.63), SIMDE_FLOAT32_C( 645.40), SIMDE_FLOAT32_C( -996.57)), simde_mm_set_pi32(INT32_C( -186), INT32_C( -809)), simde_mm_set_ps(SIMDE_FLOAT32_C( -98.24), SIMDE_FLOAT32_C( -855.63), SIMDE_FLOAT32_C( -186.00), SIMDE_FLOAT32_C( -809.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -305.17), SIMDE_FLOAT32_C( -762.98), SIMDE_FLOAT32_C( -655.45), SIMDE_FLOAT32_C( -521.87)), simde_mm_set_pi32(INT32_C( 481), INT32_C( -37)), simde_mm_set_ps(SIMDE_FLOAT32_C( -305.17), SIMDE_FLOAT32_C( -762.98), SIMDE_FLOAT32_C( 481.00), SIMDE_FLOAT32_C( -37.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_cvt_pi2ps(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_f32x4(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_cvt_ps2pi(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m64 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 585.40), SIMDE_FLOAT32_C( -596.31), SIMDE_FLOAT32_C( -33.58), SIMDE_FLOAT32_C( -973.52)), simde_mm_set_pi32(INT32_C( -34), INT32_C( -974)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 461.40), SIMDE_FLOAT32_C( -885.65), SIMDE_FLOAT32_C( -130.70), SIMDE_FLOAT32_C( 767.11)), simde_mm_set_pi32(INT32_C( -131), INT32_C( 767)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 667.28), SIMDE_FLOAT32_C( 289.45), SIMDE_FLOAT32_C( 167.11), SIMDE_FLOAT32_C( -820.87)), simde_mm_set_pi32(INT32_C( 167), INT32_C( -821)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -445.67), SIMDE_FLOAT32_C( 116.25), SIMDE_FLOAT32_C( 836.77), SIMDE_FLOAT32_C( -145.00)), simde_mm_set_pi32(INT32_C( 837), INT32_C( -145)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 457.72), SIMDE_FLOAT32_C( -725.98), SIMDE_FLOAT32_C( -428.45), SIMDE_FLOAT32_C( 251.28)), simde_mm_set_pi32(INT32_C( -428), INT32_C( 251)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 776.17), SIMDE_FLOAT32_C( 285.46), SIMDE_FLOAT32_C( -148.62), SIMDE_FLOAT32_C( 251.11)), simde_mm_set_pi32(INT32_C( -149), INT32_C( 251)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -758.68), SIMDE_FLOAT32_C( -834.46), SIMDE_FLOAT32_C( 247.62), SIMDE_FLOAT32_C( 252.25)), simde_mm_set_pi32(INT32_C( 248), INT32_C( 252)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 185.27), SIMDE_FLOAT32_C( -33.00), SIMDE_FLOAT32_C( 323.39), SIMDE_FLOAT32_C( 851.62)), simde_mm_set_pi32(INT32_C( 323), INT32_C( 852)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_cvt_ps2pi(test_vec[i].a); simde_test_x86_assert_equal_i32x2(r, test_vec[i].r); } return 0; } static int test_simde_mm_cvt_si2ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; int32_t b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -896.28), SIMDE_FLOAT32_C( -653.23), SIMDE_FLOAT32_C( 62.87), SIMDE_FLOAT32_C( -477.72)), INT32_C(-172), simde_mm_set_ps(SIMDE_FLOAT32_C( -896.28), SIMDE_FLOAT32_C( -653.23), SIMDE_FLOAT32_C( 62.87), SIMDE_FLOAT32_C( -172.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 980.80), SIMDE_FLOAT32_C( 527.97), SIMDE_FLOAT32_C( -324.27), SIMDE_FLOAT32_C( 641.93)), INT32_C(-906), simde_mm_set_ps(SIMDE_FLOAT32_C( 980.80), SIMDE_FLOAT32_C( 527.97), SIMDE_FLOAT32_C( -324.27), SIMDE_FLOAT32_C( -906.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 6.14), SIMDE_FLOAT32_C( 647.61), SIMDE_FLOAT32_C( 446.77), SIMDE_FLOAT32_C( 149.50)), INT32_C(-312), simde_mm_set_ps(SIMDE_FLOAT32_C( 6.14), SIMDE_FLOAT32_C( 647.61), SIMDE_FLOAT32_C( 446.77), SIMDE_FLOAT32_C( -312.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 671.83), SIMDE_FLOAT32_C( 292.90), SIMDE_FLOAT32_C( 137.83), SIMDE_FLOAT32_C( 652.08)), INT32_C( 72), simde_mm_set_ps(SIMDE_FLOAT32_C( 671.83), SIMDE_FLOAT32_C( 292.90), SIMDE_FLOAT32_C( 137.83), SIMDE_FLOAT32_C( 72.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 428.68), SIMDE_FLOAT32_C( 323.04), SIMDE_FLOAT32_C( -369.14), SIMDE_FLOAT32_C( -143.52)), INT32_C( 648), simde_mm_set_ps(SIMDE_FLOAT32_C( 428.68), SIMDE_FLOAT32_C( 323.04), SIMDE_FLOAT32_C( -369.14), SIMDE_FLOAT32_C( 648.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -834.08), SIMDE_FLOAT32_C( 861.01), SIMDE_FLOAT32_C( -60.60), SIMDE_FLOAT32_C( -978.47)), INT32_C( 327), simde_mm_set_ps(SIMDE_FLOAT32_C( -834.08), SIMDE_FLOAT32_C( 861.01), SIMDE_FLOAT32_C( -60.60), SIMDE_FLOAT32_C( 327.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -462.87), SIMDE_FLOAT32_C( -82.73), SIMDE_FLOAT32_C( 641.71), SIMDE_FLOAT32_C( 690.58)), INT32_C( 78), simde_mm_set_ps(SIMDE_FLOAT32_C( -462.87), SIMDE_FLOAT32_C( -82.73), SIMDE_FLOAT32_C( 641.71), SIMDE_FLOAT32_C( 78.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -896.41), SIMDE_FLOAT32_C( -179.07), SIMDE_FLOAT32_C( -535.21), SIMDE_FLOAT32_C( 722.87)), INT32_C(-106), simde_mm_set_ps(SIMDE_FLOAT32_C( -896.41), SIMDE_FLOAT32_C( -179.07), SIMDE_FLOAT32_C( -535.21), SIMDE_FLOAT32_C( -106.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_cvt_si2ss(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_f32x4(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_cvt_ss2si (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const int32_t r; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -434.80), SIMDE_FLOAT32_C( 718.49), SIMDE_FLOAT32_C( -765.08) }, INT32_MIN }, { { -SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 610.10), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -238.39) }, INT32_MIN }, #endif #if !defined(SIMDE_FAST_CONVERSION_RANGE) { { HEDLEY_STATIC_CAST(float, INT32_MAX), SIMDE_FLOAT32_C( 264.19), SIMDE_FLOAT32_C( -247.70), SIMDE_FLOAT32_C( -466.34) }, INT32_MIN }, { { HEDLEY_STATIC_CAST(float, INT32_MIN), SIMDE_FLOAT32_C( -656.10), SIMDE_FLOAT32_C( 528.97), SIMDE_FLOAT32_C( -664.65) }, -INT32_C( 2147483648) }, { { HEDLEY_STATIC_CAST(float, INT32_MAX - 100), SIMDE_FLOAT32_C( -572.40), SIMDE_FLOAT32_C( 986.43), SIMDE_FLOAT32_C( 789.86) }, INT32_C( 2147483520) }, { { HEDLEY_STATIC_CAST(float, INT32_MIN + 100), SIMDE_FLOAT32_C( -226.55), SIMDE_FLOAT32_C( -926.20), SIMDE_FLOAT32_C( 130.01) }, -INT32_C( 2147483520) }, #endif { { SIMDE_FLOAT32_C( 555.67), SIMDE_FLOAT32_C( 330.04), SIMDE_FLOAT32_C( -110.18), SIMDE_FLOAT32_C( 679.85) }, INT32_C( 556) }, { { SIMDE_FLOAT32_C( 809.05), SIMDE_FLOAT32_C( -456.06), SIMDE_FLOAT32_C( 886.35), SIMDE_FLOAT32_C( 696.23) }, INT32_C( 809) }, { { SIMDE_FLOAT32_C( -676.99), SIMDE_FLOAT32_C( 517.95), SIMDE_FLOAT32_C( 287.44), SIMDE_FLOAT32_C( -885.31) }, -INT32_C( 677) }, { { SIMDE_FLOAT32_C( -202.79), SIMDE_FLOAT32_C( 24.50), SIMDE_FLOAT32_C( 108.39), SIMDE_FLOAT32_C( -810.69) }, -INT32_C( 203) }, { { SIMDE_FLOAT32_C( -214.44), SIMDE_FLOAT32_C( 248.05), SIMDE_FLOAT32_C( -729.85), SIMDE_FLOAT32_C( -886.40) }, -INT32_C( 214) }, { { SIMDE_FLOAT32_C( -238.22), SIMDE_FLOAT32_C( -190.61), SIMDE_FLOAT32_C( -31.58), SIMDE_FLOAT32_C( -490.90) }, -INT32_C( 238) }, { { SIMDE_FLOAT32_C( 86.03), SIMDE_FLOAT32_C( 720.06), SIMDE_FLOAT32_C( 886.14), SIMDE_FLOAT32_C( -649.64) }, INT32_C( 86) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); int32_t r = simde_mm_cvt_ss2si(a); simde_assert_equal_i32(r, test_vec[i].r); } return 0; } static int test_simde_mm_cvtpi16_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C(-11158), INT16_C(-16899), INT16_C( 32486), INT16_C( 25075)), simde_mm_set_ps(SIMDE_FLOAT32_C(-11158.00), SIMDE_FLOAT32_C(-16899.00), SIMDE_FLOAT32_C( 32486.00), SIMDE_FLOAT32_C( 25075.00)) }, { simde_mm_set_pi16(INT16_C(-26366), INT16_C(-24687), INT16_C(-32040), INT16_C( -8761)), simde_mm_set_ps(SIMDE_FLOAT32_C(-26366.00), SIMDE_FLOAT32_C(-24687.00), SIMDE_FLOAT32_C(-32040.00), SIMDE_FLOAT32_C( -8761.00)) }, { simde_mm_set_pi16(INT16_C( 472), INT16_C( 26905), INT16_C(-13169), INT16_C( 3569)), simde_mm_set_ps(SIMDE_FLOAT32_C( 472.00), SIMDE_FLOAT32_C( 26905.00), SIMDE_FLOAT32_C(-13169.00), SIMDE_FLOAT32_C( 3569.00)) }, { simde_mm_set_pi16(INT16_C( 29219), INT16_C( -2558), INT16_C( 15879), INT16_C( 28507)), simde_mm_set_ps(SIMDE_FLOAT32_C( 29219.00), SIMDE_FLOAT32_C( -2558.00), SIMDE_FLOAT32_C( 15879.00), SIMDE_FLOAT32_C( 28507.00)) }, { simde_mm_set_pi16(INT16_C( 29483), INT16_C( 27189), INT16_C( 5847), INT16_C( 7015)), simde_mm_set_ps(SIMDE_FLOAT32_C( 29483.00), SIMDE_FLOAT32_C( 27189.00), SIMDE_FLOAT32_C( 5847.00), SIMDE_FLOAT32_C( 7015.00)) }, { simde_mm_set_pi16(INT16_C(-26392), INT16_C(-23898), INT16_C( -5838), INT16_C(-12444)), simde_mm_set_ps(SIMDE_FLOAT32_C(-26392.00), SIMDE_FLOAT32_C(-23898.00), SIMDE_FLOAT32_C( -5838.00), SIMDE_FLOAT32_C(-12444.00)) }, { simde_mm_set_pi16(INT16_C( 16651), INT16_C(-25505), INT16_C(-23268), INT16_C( 15044)), simde_mm_set_ps(SIMDE_FLOAT32_C( 16651.00), SIMDE_FLOAT32_C(-25505.00), SIMDE_FLOAT32_C(-23268.00), SIMDE_FLOAT32_C( 15044.00)) }, { simde_mm_set_pi16(INT16_C( 18794), INT16_C( 24463), INT16_C( -7002), INT16_C(-17845)), simde_mm_set_ps(SIMDE_FLOAT32_C( 18794.00), SIMDE_FLOAT32_C( 24463.00), SIMDE_FLOAT32_C( -7002.00), SIMDE_FLOAT32_C(-17845.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_cvtpi16_ps(test_vec[i].a); simde_test_x86_assert_equal_f32x4(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_cvtpi32_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m64 b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 143.12), SIMDE_FLOAT32_C( 382.05), SIMDE_FLOAT32_C( -756.03), SIMDE_FLOAT32_C( 501.27)), simde_mm_set_pi32(INT32_C( 747), INT32_C( -200)), simde_mm_set_ps(SIMDE_FLOAT32_C( 143.12), SIMDE_FLOAT32_C( 382.05), SIMDE_FLOAT32_C( 747.00), SIMDE_FLOAT32_C( -200.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 972.78), SIMDE_FLOAT32_C( -402.40), SIMDE_FLOAT32_C( 516.01), SIMDE_FLOAT32_C( 710.18)), simde_mm_set_pi32(INT32_C( 533), INT32_C( -843)), simde_mm_set_ps(SIMDE_FLOAT32_C( 972.78), SIMDE_FLOAT32_C( -402.40), SIMDE_FLOAT32_C( 533.00), SIMDE_FLOAT32_C( -843.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 472.16), SIMDE_FLOAT32_C( -696.64), SIMDE_FLOAT32_C( -295.14), SIMDE_FLOAT32_C( 252.19)), simde_mm_set_pi32(INT32_C( -428), INT32_C( 182)), simde_mm_set_ps(SIMDE_FLOAT32_C( 472.16), SIMDE_FLOAT32_C( -696.64), SIMDE_FLOAT32_C( -428.00), SIMDE_FLOAT32_C( 182.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -566.65), SIMDE_FLOAT32_C( 623.99), SIMDE_FLOAT32_C( 879.56), SIMDE_FLOAT32_C( 610.97)), simde_mm_set_pi32(INT32_C( 176), INT32_C( 681)), simde_mm_set_ps(SIMDE_FLOAT32_C( -566.65), SIMDE_FLOAT32_C( 623.99), SIMDE_FLOAT32_C( 176.00), SIMDE_FLOAT32_C( 681.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 572.11), SIMDE_FLOAT32_C( -357.92), SIMDE_FLOAT32_C( 212.83), SIMDE_FLOAT32_C( 936.07)), simde_mm_set_pi32(INT32_C( -310), INT32_C( 515)), simde_mm_set_ps(SIMDE_FLOAT32_C( 572.11), SIMDE_FLOAT32_C( -357.92), SIMDE_FLOAT32_C( -310.00), SIMDE_FLOAT32_C( 515.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 696.01), SIMDE_FLOAT32_C( -960.55), SIMDE_FLOAT32_C( -478.31), SIMDE_FLOAT32_C( -831.25)), simde_mm_set_pi32(INT32_C( 324), INT32_C( -65)), simde_mm_set_ps(SIMDE_FLOAT32_C( 696.01), SIMDE_FLOAT32_C( -960.55), SIMDE_FLOAT32_C( 324.00), SIMDE_FLOAT32_C( -65.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -973.73), SIMDE_FLOAT32_C( 578.94), SIMDE_FLOAT32_C( 658.26), SIMDE_FLOAT32_C( 635.66)), simde_mm_set_pi32(INT32_C( -268), INT32_C( 691)), simde_mm_set_ps(SIMDE_FLOAT32_C( -973.73), SIMDE_FLOAT32_C( 578.94), SIMDE_FLOAT32_C( -268.00), SIMDE_FLOAT32_C( 691.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 654.54), SIMDE_FLOAT32_C( -615.74), SIMDE_FLOAT32_C( -430.24), SIMDE_FLOAT32_C( 224.63)), simde_mm_set_pi32(INT32_C( 370), INT32_C( -505)), simde_mm_set_ps(SIMDE_FLOAT32_C( 654.54), SIMDE_FLOAT32_C( -615.74), SIMDE_FLOAT32_C( 370.00), SIMDE_FLOAT32_C( -505.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_cvtpi32_ps(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_f32x4(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_cvtpi32x2_ps(SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[2]; int32_t b[2]; simde_float32 r[4]; } test_vec[] = { { { INT32_C( 46384), INT32_C( 57201) }, { INT32_C( 54113), INT32_C( 10228) }, { SIMDE_FLOAT32_C( 46384.00), SIMDE_FLOAT32_C( 57201.00), SIMDE_FLOAT32_C( 54113.00), SIMDE_FLOAT32_C( 10228.00) } }, { { INT32_C( 15587), INT32_C( 1419) }, { INT32_C( 22490), INT32_C( 4374) }, { SIMDE_FLOAT32_C( 15587.00), SIMDE_FLOAT32_C( 1419.00), SIMDE_FLOAT32_C( 22490.00), SIMDE_FLOAT32_C( 4374.00) } }, { { INT32_C( 18119), INT32_C( 48934) }, { INT32_C( 35218), INT32_C( 42416) }, { SIMDE_FLOAT32_C( 18119.00), SIMDE_FLOAT32_C( 48934.00), SIMDE_FLOAT32_C( 35218.00), SIMDE_FLOAT32_C( 42416.00) } }, { { INT32_C( 38881), INT32_C( 38044) }, { INT32_C( 13548), INT32_C( 56134) }, { SIMDE_FLOAT32_C( 38881.00), SIMDE_FLOAT32_C( 38044.00), SIMDE_FLOAT32_C( 13548.00), SIMDE_FLOAT32_C( 56134.00) } }, { { INT32_C( 58145), INT32_C( 18338) }, { INT32_C( 47824), INT32_C( 39776) }, { SIMDE_FLOAT32_C( 58145.00), SIMDE_FLOAT32_C( 18338.00), SIMDE_FLOAT32_C( 47824.00), SIMDE_FLOAT32_C( 39776.00) } }, { { INT32_C( 29235), INT32_C( 49415) }, { INT32_C( 52982), INT32_C( 16041) }, { SIMDE_FLOAT32_C( 29235.00), SIMDE_FLOAT32_C( 49415.00), SIMDE_FLOAT32_C( 52982.00), SIMDE_FLOAT32_C( 16041.00) } }, { { INT32_C( 36386), INT32_C( 57813) }, { INT32_C( 46492), INT32_C( 15185) }, { SIMDE_FLOAT32_C( 36386.00), SIMDE_FLOAT32_C( 57813.00), SIMDE_FLOAT32_C( 46492.00), SIMDE_FLOAT32_C( 15185.00) } }, { { INT32_C( 60590), INT32_C( 39597) }, { INT32_C( 40296), INT32_C( 10460) }, { SIMDE_FLOAT32_C( 60590.00), SIMDE_FLOAT32_C( 39597.00), SIMDE_FLOAT32_C( 40296.00), SIMDE_FLOAT32_C( 10460.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m64 a, b; simde__m128 e; simde_memcpy(&a, test_vec[i].a, sizeof(a)); simde_memcpy(&b, test_vec[i].b, sizeof(a)); simde_memcpy(&e, test_vec[i].r, sizeof(e)); simde__m128 r = simde_mm_cvtpi32x2_ps(a, b); simde_test_x86_assert_equal_f32x4(r, e, 1); } simde_mm_empty(); return 0; } static int test_simde_mm_cvtpi8_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_pi8(INT8_C( -61), INT8_C( 15), INT8_C( 46), INT8_C( 20), INT8_C( 68), INT8_C( 82), INT8_C( 13), INT8_C( 113)), simde_mm_set_ps(SIMDE_FLOAT32_C( 68.00), SIMDE_FLOAT32_C( 82.00), SIMDE_FLOAT32_C( 13.00), SIMDE_FLOAT32_C( 113.00)) }, { simde_mm_set_pi8(INT8_C( 67), INT8_C( -77), INT8_C( 44), INT8_C( -51), INT8_C( 66), INT8_C( 66), INT8_C(-123), INT8_C( 31)), simde_mm_set_ps(SIMDE_FLOAT32_C( 66.00), SIMDE_FLOAT32_C( 66.00), SIMDE_FLOAT32_C( -123.00), SIMDE_FLOAT32_C( 31.00)) }, { simde_mm_set_pi8(INT8_C( -62), INT8_C( 28), INT8_C( -11), INT8_C( -61), INT8_C( -61), INT8_C(-110), INT8_C( -20), INT8_C( -51)), simde_mm_set_ps(SIMDE_FLOAT32_C( -61.00), SIMDE_FLOAT32_C( -110.00), SIMDE_FLOAT32_C( -20.00), SIMDE_FLOAT32_C( -51.00)) }, { simde_mm_set_pi8(INT8_C( -63), INT8_C( -73), INT8_C( 71), INT8_C( -82), INT8_C( 68), INT8_C( 25), INT8_C(-121), INT8_C( -82)), simde_mm_set_ps(SIMDE_FLOAT32_C( 68.00), SIMDE_FLOAT32_C( 25.00), SIMDE_FLOAT32_C( -121.00), SIMDE_FLOAT32_C( -82.00)) }, { simde_mm_set_pi8(INT8_C( 66), INT8_C( -52), INT8_C(-113), INT8_C( 92), INT8_C( 68), INT8_C( 36), INT8_C( -82), INT8_C( 20)), simde_mm_set_ps(SIMDE_FLOAT32_C( 68.00), SIMDE_FLOAT32_C( 36.00), SIMDE_FLOAT32_C( -82.00), SIMDE_FLOAT32_C( 20.00)) }, { simde_mm_set_pi8(INT8_C( -62), INT8_C( 34), INT8_C( -41), INT8_C( 10), INT8_C( 68), INT8_C( 34), INT8_C( -68), INT8_C( -51)), simde_mm_set_ps(SIMDE_FLOAT32_C( 68.00), SIMDE_FLOAT32_C( 34.00), SIMDE_FLOAT32_C( -68.00), SIMDE_FLOAT32_C( -51.00)) }, { simde_mm_set_pi8(INT8_C( 67), INT8_C( 107), INT8_C(-121), INT8_C( -82), INT8_C( -60), INT8_C( 116), INT8_C( -88), INT8_C( -10)), simde_mm_set_ps(SIMDE_FLOAT32_C( -60.00), SIMDE_FLOAT32_C( 116.00), SIMDE_FLOAT32_C( -88.00), SIMDE_FLOAT32_C( -10.00)) }, { simde_mm_set_pi8(INT8_C( -61), INT8_C( 67), INT8_C( -6), INT8_C( -31), INT8_C( -61), INT8_C(-115), INT8_C( -82), INT8_C( 20)), simde_mm_set_ps(SIMDE_FLOAT32_C( -61.00), SIMDE_FLOAT32_C( -115.00), SIMDE_FLOAT32_C( -82.00), SIMDE_FLOAT32_C( 20.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_cvtpi8_ps(test_vec[i].a); simde_test_x86_assert_equal_f32x4(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_cvtps_pi16(SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; int16_t r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -263.01), SIMDE_FLOAT32_C( 371.44), SIMDE_FLOAT32_C( -589.91), SIMDE_FLOAT32_C( -375.58) }, { -INT16_C( 263), INT16_C( 371), -INT16_C( 590), -INT16_C( 376) } }, { { SIMDE_FLOAT32_C( 317.69), SIMDE_FLOAT32_C( -570.66), SIMDE_FLOAT32_C( -267.81), SIMDE_FLOAT32_C( 844.93) }, { INT16_C( 318), -INT16_C( 571), -INT16_C( 268), INT16_C( 845) } }, { { SIMDE_FLOAT32_C( 295.60), SIMDE_FLOAT32_C( 531.31), SIMDE_FLOAT32_C( 524.76), SIMDE_FLOAT32_C( 178.54) }, { INT16_C( 296), INT16_C( 531), INT16_C( 525), INT16_C( 179) } }, { { SIMDE_FLOAT32_C( -332.79), SIMDE_FLOAT32_C( -873.21), SIMDE_FLOAT32_C( -995.14), SIMDE_FLOAT32_C( 991.60) }, { -INT16_C( 333), -INT16_C( 873), -INT16_C( 995), INT16_C( 992) } }, { { SIMDE_FLOAT32_C( -312.48), SIMDE_FLOAT32_C( -321.64), SIMDE_FLOAT32_C( 146.29), SIMDE_FLOAT32_C( 734.78) }, { -INT16_C( 312), -INT16_C( 322), INT16_C( 146), INT16_C( 735) } }, { { SIMDE_FLOAT32_C( -467.86), SIMDE_FLOAT32_C( -597.35), SIMDE_FLOAT32_C( 676.08), SIMDE_FLOAT32_C( 861.72) }, { -INT16_C( 468), -INT16_C( 597), INT16_C( 676), INT16_C( 862) } }, { { SIMDE_FLOAT32_C( -765.08), SIMDE_FLOAT32_C( -574.12), SIMDE_FLOAT32_C( 36.42), SIMDE_FLOAT32_C( 212.73) }, { -INT16_C( 765), -INT16_C( 574), INT16_C( 36), INT16_C( 213) } }, { { SIMDE_FLOAT32_C( 564.76), SIMDE_FLOAT32_C( -219.68), SIMDE_FLOAT32_C( 291.77), SIMDE_FLOAT32_C( -698.25) }, { INT16_C( 565), -INT16_C( 220), INT16_C( 292), -INT16_C( 698) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m64 r = simde_mm_cvtps_pi16(a); simde__m64 e; simde_memcpy(&e, &(test_vec[i].r), sizeof(e)); simde_test_x86_assert_equal_i16x4(r, e); } return 0; } static int test_simde_mm_cvtps_pi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const int32_t r[2]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NANF, SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 718.49), SIMDE_FLOAT32_C( -765.08) }, { INT32_MIN, INT32_MIN } }, #endif #if !defined(SIMDE_FAST_CONVERSION_RANGE) { { HEDLEY_STATIC_CAST(float, INT32_MAX), HEDLEY_STATIC_CAST(float, INT32_MAX - 100), SIMDE_FLOAT32_C( 729.56), SIMDE_FLOAT32_C( -32.75) }, { INT32_MIN, INT32_C( 2147483520) } }, { { HEDLEY_STATIC_CAST(float, INT32_MIN), HEDLEY_STATIC_CAST(float, INT32_MIN + 100), SIMDE_FLOAT32_C( -402.31), SIMDE_FLOAT32_C( -757.67) }, { INT32_MIN, -INT32_C( 2147483520) } }, #endif { { SIMDE_FLOAT32_C( 954.39), SIMDE_FLOAT32_C( -602.35), SIMDE_FLOAT32_C( -802.59), SIMDE_FLOAT32_C( 470.56) }, { INT32_C( 954), -INT32_C( 602) } }, { { SIMDE_FLOAT32_C( -253.18), SIMDE_FLOAT32_C( 950.24), SIMDE_FLOAT32_C( -57.78), SIMDE_FLOAT32_C( 758.15) }, { -INT32_C( 253), INT32_C( 950) } }, { { SIMDE_FLOAT32_C( 856.62), SIMDE_FLOAT32_C( 127.52), SIMDE_FLOAT32_C( 286.18), SIMDE_FLOAT32_C( -453.57) }, { INT32_C( 857), INT32_C( 128) } }, { { SIMDE_FLOAT32_C( 420.35), SIMDE_FLOAT32_C( 737.11), SIMDE_FLOAT32_C( -767.49), SIMDE_FLOAT32_C( 960.69) }, { INT32_C( 420), INT32_C( 737) } }, { { SIMDE_FLOAT32_C( -116.84), SIMDE_FLOAT32_C( 629.86), SIMDE_FLOAT32_C( 88.62), SIMDE_FLOAT32_C( 485.45) }, { -INT32_C( 117), INT32_C( 630) } }, { { SIMDE_FLOAT32_C( -731.19), SIMDE_FLOAT32_C( 307.46), SIMDE_FLOAT32_C( 739.75), SIMDE_FLOAT32_C( -587.43) }, { -INT32_C( 731), INT32_C( 307) } }, { { SIMDE_FLOAT32_C( 269.00), SIMDE_FLOAT32_C( 633.12), SIMDE_FLOAT32_C( -560.38), SIMDE_FLOAT32_C( -888.86) }, { INT32_C( 269), INT32_C( 633) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m64 r = simde_mm_cvtps_pi32(a); simde_test_x86_assert_equal_i32x2(r, simde_x_mm_loadu_si64(test_vec[i].r)); } return 0; } static int test_simde_mm_cvtps_pi8(SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; int8_t r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( -89.74), SIMDE_FLOAT32_C( -2.76), SIMDE_FLOAT32_C( -28.75), SIMDE_FLOAT32_C( 92.62) }, { -INT8_C( 90), -INT8_C( 3), -INT8_C( 29), INT8_C( 93), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { SIMDE_FLOAT32_C( -146.68), SIMDE_FLOAT32_C( -116.78), SIMDE_FLOAT32_C( 176.97), SIMDE_FLOAT32_C( 5.88) }, { INT8_MIN, -INT8_C(117), INT8_MAX, INT8_C( 6), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { SIMDE_FLOAT32_C( 159.51), SIMDE_FLOAT32_C( 49.64), SIMDE_FLOAT32_C( 160.41), SIMDE_FLOAT32_C( -150.16) }, { INT8_MAX, INT8_C( 50), INT8_MAX, INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { SIMDE_FLOAT32_C( -127.96), SIMDE_FLOAT32_C( -30.84), SIMDE_FLOAT32_C( 119.71), SIMDE_FLOAT32_C( 85.16) }, { INT8_MIN, -INT8_C( 31), INT8_C(120), INT8_C( 85), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { SIMDE_FLOAT32_C( 102.72), SIMDE_FLOAT32_C( 170.20), SIMDE_FLOAT32_C( -183.68), SIMDE_FLOAT32_C( -37.32) }, { INT8_C(103), INT8_MAX, INT8_MIN, -INT8_C( 37), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { SIMDE_FLOAT32_C( 167.11), SIMDE_FLOAT32_C( 58.87), SIMDE_FLOAT32_C( 128.98), SIMDE_FLOAT32_C( -160.55) }, { INT8_MAX, INT8_C( 59), INT8_MAX, INT8_MIN, INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { SIMDE_FLOAT32_C( -151.25), SIMDE_FLOAT32_C( -18.20), SIMDE_FLOAT32_C( 157.11), SIMDE_FLOAT32_C( 162.11) }, { INT8_MIN, -INT8_C( 18), INT8_MAX, INT8_MAX, INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { SIMDE_FLOAT32_C( -188.54), SIMDE_FLOAT32_C( -194.06), SIMDE_FLOAT32_C( -85.40), SIMDE_FLOAT32_C( -78.27) }, { INT8_MIN, INT8_MIN, -INT8_C( 85), -INT8_C( 78), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m64 r = simde_mm_cvtps_pi8(simde_mm_loadu_ps(test_vec[i].a)); /* The upper half is undefined */ int8_t r_[8]; simde_memcpy(r_, &r, sizeof(r)); simde_assert_equal_vi8(4, r_, test_vec[i].r); } return 0; } static int test_simde_mm_cvtpu16_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[4]; const simde_float32 r[4]; } test_vec[] = { { { -INT16_C( 15210), -INT16_C( 656), INT16_C( 21841), -INT16_C( 5069) }, { SIMDE_FLOAT32_C( 50326.00), SIMDE_FLOAT32_C( 64880.00), SIMDE_FLOAT32_C( 21841.00), SIMDE_FLOAT32_C( 60467.00) } }, { { -INT16_C( 19160), -INT16_C( 18589), INT16_C( 31824), INT16_C( 28058) }, { SIMDE_FLOAT32_C( 46376.00), SIMDE_FLOAT32_C( 46947.00), SIMDE_FLOAT32_C( 31824.00), SIMDE_FLOAT32_C( 28058.00) } }, { { -INT16_C( 25235), -INT16_C( 26832), INT16_C( 23207), INT16_C( 24159) }, { SIMDE_FLOAT32_C( 40301.00), SIMDE_FLOAT32_C( 38704.00), SIMDE_FLOAT32_C( 23207.00), SIMDE_FLOAT32_C( 24159.00) } }, { { INT16_C( 19021), -INT16_C( 32629), INT16_C( 1472), INT16_C( 22326) }, { SIMDE_FLOAT32_C( 19021.00), SIMDE_FLOAT32_C( 32907.00), SIMDE_FLOAT32_C( 1472.00), SIMDE_FLOAT32_C( 22326.00) } }, { { -INT16_C( 22839), INT16_C( 6740), -INT16_C( 30725), INT16_C( 9223) }, { SIMDE_FLOAT32_C( 42697.00), SIMDE_FLOAT32_C( 6740.00), SIMDE_FLOAT32_C( 34811.00), SIMDE_FLOAT32_C( 9223.00) } }, { { INT16_C( 27197), -INT16_C( 29221), INT16_C( 30438), INT16_C( 21498) }, { SIMDE_FLOAT32_C( 27197.00), SIMDE_FLOAT32_C( 36315.00), SIMDE_FLOAT32_C( 30438.00), SIMDE_FLOAT32_C( 21498.00) } }, { { INT16_C( 11027), -INT16_C( 17685), INT16_C( 19077), -INT16_C( 11752) }, { SIMDE_FLOAT32_C( 11027.00), SIMDE_FLOAT32_C( 47851.00), SIMDE_FLOAT32_C( 19077.00), SIMDE_FLOAT32_C( 53784.00) } }, { { -INT16_C( 23660), INT16_C( 21842), -INT16_C( 30551), INT16_C( 29356) }, { SIMDE_FLOAT32_C( 41876.00), SIMDE_FLOAT32_C( 21842.00), SIMDE_FLOAT32_C( 34985.00), SIMDE_FLOAT32_C( 29356.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m64 a = simde_x_mm_loadu_si64(test_vec[i].a); simde__m128 r = simde_mm_cvtpu16_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_cvtpu8_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int8_t a[8]; const simde_float32 r[4]; } test_vec[] = { { { INT8_C( 101), INT8_C( 32), -INT8_C( 83), -INT8_C( 19), INT8_C( 16), INT8_C( 50), -INT8_C( 116), -INT8_C( 15) }, { SIMDE_FLOAT32_C( 101.00), SIMDE_FLOAT32_C( 32.00), SIMDE_FLOAT32_C( 173.00), SIMDE_FLOAT32_C( 237.00) } }, { { -INT8_C( 5), -INT8_C( 57), -INT8_C( 74), -INT8_C( 2), -INT8_C( 118), INT8_C( 93), -INT8_C( 14), -INT8_C( 86) }, { SIMDE_FLOAT32_C( 251.00), SIMDE_FLOAT32_C( 199.00), SIMDE_FLOAT32_C( 182.00), SIMDE_FLOAT32_C( 254.00) } }, { { INT8_C( 54), -INT8_C( 97), -INT8_C( 126), INT8_C( 51), -INT8_C( 9), INT8_C( 72), INT8_C( 9), -INT8_C( 127) }, { SIMDE_FLOAT32_C( 54.00), SIMDE_FLOAT32_C( 159.00), SIMDE_FLOAT32_C( 130.00), SIMDE_FLOAT32_C( 51.00) } }, { { -INT8_C( 100), -INT8_C( 20), INT8_C( 7), -INT8_C( 64), INT8_C( 114), INT8_C( 84), INT8_C( 108), -INT8_C( 41) }, { SIMDE_FLOAT32_C( 156.00), SIMDE_FLOAT32_C( 236.00), SIMDE_FLOAT32_C( 7.00), SIMDE_FLOAT32_C( 192.00) } }, { { INT8_C( 116), INT8_C( 26), -INT8_C( 60), -INT8_C( 123), INT8_C( 76), INT8_C( 80), INT8_C( 118), INT8_C( 71) }, { SIMDE_FLOAT32_C( 116.00), SIMDE_FLOAT32_C( 26.00), SIMDE_FLOAT32_C( 196.00), SIMDE_FLOAT32_C( 133.00) } }, { { INT8_C( 24), INT8_C( 44), INT8_C( 70), -INT8_C( 94), -INT8_C( 119), INT8_C( 56), INT8_C( 76), -INT8_C( 65) }, { SIMDE_FLOAT32_C( 24.00), SIMDE_FLOAT32_C( 44.00), SIMDE_FLOAT32_C( 70.00), SIMDE_FLOAT32_C( 162.00) } }, { { -INT8_C( 40), -INT8_C( 49), -INT8_C( 13), -INT8_C( 49), INT8_C( 23), -INT8_C( 4), INT8_C( 80), -INT8_C( 77) }, { SIMDE_FLOAT32_C( 216.00), SIMDE_FLOAT32_C( 207.00), SIMDE_FLOAT32_C( 243.00), SIMDE_FLOAT32_C( 207.00) } }, { { -INT8_C( 24), INT8_C( 88), INT8_C( 115), INT8_C( 91), -INT8_C( 84), -INT8_C( 32), INT8_C( 50), INT8_C( 32) }, { SIMDE_FLOAT32_C( 232.00), SIMDE_FLOAT32_C( 88.00), SIMDE_FLOAT32_C( 115.00), SIMDE_FLOAT32_C( 91.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m64 a = simde_x_mm_loadu_si64(test_vec[i].a); simde__m128 r = simde_mm_cvtpu8_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_cvtsi32_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; int32_t b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -368.26), SIMDE_FLOAT32_C( -772.15), SIMDE_FLOAT32_C( 700.78), SIMDE_FLOAT32_C( -416.87)), INT32_C( -93207), simde_mm_set_ps(SIMDE_FLOAT32_C( -368.26), SIMDE_FLOAT32_C( -772.15), SIMDE_FLOAT32_C( 700.78), SIMDE_FLOAT32_C(-93207.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 774.49), SIMDE_FLOAT32_C( 920.32), SIMDE_FLOAT32_C( 159.83), SIMDE_FLOAT32_C( -900.78)), INT32_C( -99810), simde_mm_set_ps(SIMDE_FLOAT32_C( 774.49), SIMDE_FLOAT32_C( 920.32), SIMDE_FLOAT32_C( 159.83), SIMDE_FLOAT32_C(-99810.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -606.72), SIMDE_FLOAT32_C( -127.65), SIMDE_FLOAT32_C( -336.22), SIMDE_FLOAT32_C( -528.09)), INT32_C( -24917), simde_mm_set_ps(SIMDE_FLOAT32_C( -606.72), SIMDE_FLOAT32_C( -127.65), SIMDE_FLOAT32_C( -336.22), SIMDE_FLOAT32_C(-24917.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 534.13), SIMDE_FLOAT32_C( -401.63), SIMDE_FLOAT32_C( -949.41), SIMDE_FLOAT32_C( -38.28)), INT32_C( -25377), simde_mm_set_ps(SIMDE_FLOAT32_C( 534.13), SIMDE_FLOAT32_C( -401.63), SIMDE_FLOAT32_C( -949.41), SIMDE_FLOAT32_C(-25377.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 704.87), SIMDE_FLOAT32_C( 236.14), SIMDE_FLOAT32_C( -91.25), SIMDE_FLOAT32_C( -708.13)), INT32_C( 83867), simde_mm_set_ps(SIMDE_FLOAT32_C( 704.87), SIMDE_FLOAT32_C( 236.14), SIMDE_FLOAT32_C( -91.25), SIMDE_FLOAT32_C( 83867.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 570.72), SIMDE_FLOAT32_C( -412.30), SIMDE_FLOAT32_C( -578.88), SIMDE_FLOAT32_C( 196.41)), INT32_C( 72066), simde_mm_set_ps(SIMDE_FLOAT32_C( 570.72), SIMDE_FLOAT32_C( -412.30), SIMDE_FLOAT32_C( -578.88), SIMDE_FLOAT32_C( 72066.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 891.77), SIMDE_FLOAT32_C( -473.67), SIMDE_FLOAT32_C( 332.65), SIMDE_FLOAT32_C( -615.45)), INT32_C( 12054), simde_mm_set_ps(SIMDE_FLOAT32_C( 891.77), SIMDE_FLOAT32_C( -473.67), SIMDE_FLOAT32_C( 332.65), SIMDE_FLOAT32_C( 12054.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -552.37), SIMDE_FLOAT32_C( -873.85), SIMDE_FLOAT32_C( 968.75), SIMDE_FLOAT32_C( -669.38)), INT32_C( 88818), simde_mm_set_ps(SIMDE_FLOAT32_C( -552.37), SIMDE_FLOAT32_C( -873.85), SIMDE_FLOAT32_C( 968.75), SIMDE_FLOAT32_C( 88818.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_cvtsi32_ss(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_f32x4(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_cvtsi64_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; int64_t b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 253.23), SIMDE_FLOAT32_C( -834.96), SIMDE_FLOAT32_C( -59.37), SIMDE_FLOAT32_C( -234.88)), INT64_C( -400), simde_mm_set_ps(SIMDE_FLOAT32_C( 253.23), SIMDE_FLOAT32_C( -834.96), SIMDE_FLOAT32_C( -59.37), SIMDE_FLOAT32_C( -400.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -504.89), SIMDE_FLOAT32_C( 601.98), SIMDE_FLOAT32_C( 647.71), SIMDE_FLOAT32_C( 743.03)), INT64_C( 778), simde_mm_set_ps(SIMDE_FLOAT32_C( -504.89), SIMDE_FLOAT32_C( 601.98), SIMDE_FLOAT32_C( 647.71), SIMDE_FLOAT32_C( 778.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -54.05), SIMDE_FLOAT32_C( -259.82), SIMDE_FLOAT32_C( 622.33), SIMDE_FLOAT32_C( -585.76)), INT64_C( 469), simde_mm_set_ps(SIMDE_FLOAT32_C( -54.05), SIMDE_FLOAT32_C( -259.82), SIMDE_FLOAT32_C( 622.33), SIMDE_FLOAT32_C( 469.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -466.68), SIMDE_FLOAT32_C( 568.80), SIMDE_FLOAT32_C( -210.64), SIMDE_FLOAT32_C( 607.00)), INT64_C( -865), simde_mm_set_ps(SIMDE_FLOAT32_C( -466.68), SIMDE_FLOAT32_C( 568.80), SIMDE_FLOAT32_C( -210.64), SIMDE_FLOAT32_C( -865.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 796.90), SIMDE_FLOAT32_C( 289.94), SIMDE_FLOAT32_C( -605.55), SIMDE_FLOAT32_C( -696.91)), INT64_C( 55), simde_mm_set_ps(SIMDE_FLOAT32_C( 796.90), SIMDE_FLOAT32_C( 289.94), SIMDE_FLOAT32_C( -605.55), SIMDE_FLOAT32_C( 55.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -322.36), SIMDE_FLOAT32_C( -443.46), SIMDE_FLOAT32_C( 641.09), SIMDE_FLOAT32_C( 796.50)), INT64_C( 105), simde_mm_set_ps(SIMDE_FLOAT32_C( -322.36), SIMDE_FLOAT32_C( -443.46), SIMDE_FLOAT32_C( 641.09), SIMDE_FLOAT32_C( 105.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 797.45), SIMDE_FLOAT32_C( -128.18), SIMDE_FLOAT32_C( -171.74), SIMDE_FLOAT32_C( -508.28)), INT64_C( -475), simde_mm_set_ps(SIMDE_FLOAT32_C( 797.45), SIMDE_FLOAT32_C( -128.18), SIMDE_FLOAT32_C( -171.74), SIMDE_FLOAT32_C( -475.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 301.56), SIMDE_FLOAT32_C( -923.23), SIMDE_FLOAT32_C( 417.31), SIMDE_FLOAT32_C( 873.00)), INT64_C( 418), simde_mm_set_ps(SIMDE_FLOAT32_C( 301.56), SIMDE_FLOAT32_C( -923.23), SIMDE_FLOAT32_C( 417.31), SIMDE_FLOAT32_C( 418.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_cvtsi64_ss(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_f32x4(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_cvtss_f32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde_float32 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -625.65), SIMDE_FLOAT32_C( -39.83), SIMDE_FLOAT32_C( -796.10), SIMDE_FLOAT32_C( -156.62)), SIMDE_FLOAT32_C( -156.62) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 931.13), SIMDE_FLOAT32_C( -382.67), SIMDE_FLOAT32_C( 930.16), SIMDE_FLOAT32_C( 630.96)), SIMDE_FLOAT32_C( 630.96) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 353.46), SIMDE_FLOAT32_C( 159.93), SIMDE_FLOAT32_C( 833.30), SIMDE_FLOAT32_C( -880.80)), SIMDE_FLOAT32_C( -880.80) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 198.16), SIMDE_FLOAT32_C( 987.02), SIMDE_FLOAT32_C( 469.43), SIMDE_FLOAT32_C( -215.34)), SIMDE_FLOAT32_C( -215.34) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 929.58), SIMDE_FLOAT32_C( -288.70), SIMDE_FLOAT32_C( -524.51), SIMDE_FLOAT32_C( -342.93)), SIMDE_FLOAT32_C( -342.93) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 83.98), SIMDE_FLOAT32_C( 604.13), SIMDE_FLOAT32_C( 826.04), SIMDE_FLOAT32_C( -567.24)), SIMDE_FLOAT32_C( -567.24) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 328.58), SIMDE_FLOAT32_C( -935.74), SIMDE_FLOAT32_C( -805.95), SIMDE_FLOAT32_C( 456.28)), SIMDE_FLOAT32_C( 456.28) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 520.91), SIMDE_FLOAT32_C( 709.50), SIMDE_FLOAT32_C( 751.51), SIMDE_FLOAT32_C( -700.59)), SIMDE_FLOAT32_C( -700.59) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float32 r = simde_mm_cvtss_f32(test_vec[i].a); simde_assert_equal_f32(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_cvtss_si32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde_float32 r; } test_vec[10] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -420.84), SIMDE_FLOAT32_C( 180.64), SIMDE_FLOAT32_C( -145.08), SIMDE_FLOAT32_C( 328.00)), SIMDE_FLOAT32_C(328.00) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 951.13), SIMDE_FLOAT32_C( 455.59), SIMDE_FLOAT32_C( 803.81), SIMDE_FLOAT32_C( -553.94)), -SIMDE_FLOAT32_C(553.94) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -596.13), SIMDE_FLOAT32_C( -629.54), SIMDE_FLOAT32_C( -358.00), SIMDE_FLOAT32_C( -173.08)), -SIMDE_FLOAT32_C(173.08) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 531.31), SIMDE_FLOAT32_C( -294.53), SIMDE_FLOAT32_C( 348.93), SIMDE_FLOAT32_C( -374.17)), -SIMDE_FLOAT32_C(374.17) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -5.22), SIMDE_FLOAT32_C( -902.50), SIMDE_FLOAT32_C( 534.84), SIMDE_FLOAT32_C( 611.14)), SIMDE_FLOAT32_C(611.14) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 437.42), SIMDE_FLOAT32_C( -64.33), SIMDE_FLOAT32_C( -167.86), SIMDE_FLOAT32_C( -495.17)), -SIMDE_FLOAT32_C(495.17) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 400.50), SIMDE_FLOAT32_C( 665.80), SIMDE_FLOAT32_C( 205.90), SIMDE_FLOAT32_C( 133.58)), SIMDE_FLOAT32_C(133.58) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 762.41), SIMDE_FLOAT32_C( -424.85), SIMDE_FLOAT32_C( 903.51), SIMDE_FLOAT32_C( -209.85)), -SIMDE_FLOAT32_C(209.85) }, { simde_mm_set_ps(-SIMDE_FLOAT32_C(2147483650.0), -SIMDE_FLOAT32_C(2147483650.0), -SIMDE_FLOAT32_C(2147483650.0), -SIMDE_FLOAT32_C(2147483650.0)), -SIMDE_FLOAT32_C(2147483648.0) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 2147483649.0), SIMDE_FLOAT32_C( 2147483649.0), SIMDE_FLOAT32_C( 2147483649.0), SIMDE_FLOAT32_C( 2147483649.0)), SIMDE_FLOAT32_C(2147483648.0) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float32 r = simde_mm_cvtss_f32(test_vec[i].a); simde_assert_equal_f32(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_cvtss_si64(SIMDE_MUNIT_TEST_ARGS) { struct { simde_float32 a[4]; int64_t r; } test_vec[] = { { { SIMDE_FLOAT32_C( 432.13), SIMDE_FLOAT32_C( 656.82), SIMDE_FLOAT32_C( -60.62), SIMDE_FLOAT32_C( 359.43) }, INT64_C( 432) }, { { SIMDE_FLOAT32_C( 849.69), SIMDE_FLOAT32_C( -168.26), SIMDE_FLOAT32_C( 534.64), SIMDE_FLOAT32_C( -909.91) }, INT64_C( 850) }, { { SIMDE_FLOAT32_C( -354.33), SIMDE_FLOAT32_C( 491.23), SIMDE_FLOAT32_C( 177.36), SIMDE_FLOAT32_C( -562.80) }, -INT64_C( 354) }, { { SIMDE_FLOAT32_C( -588.89), SIMDE_FLOAT32_C( 671.54), SIMDE_FLOAT32_C( -880.28), SIMDE_FLOAT32_C( 870.90) }, -INT64_C( 589) }, { { SIMDE_FLOAT32_C( 943.47), SIMDE_FLOAT32_C( -40.20), SIMDE_FLOAT32_C( -409.04), SIMDE_FLOAT32_C( 908.82) }, INT64_C( 943) }, { { SIMDE_FLOAT32_C( -851.90), SIMDE_FLOAT32_C( 774.67), SIMDE_FLOAT32_C( -782.48), SIMDE_FLOAT32_C( 624.28) }, -INT64_C( 852) }, { { SIMDE_FLOAT32_C( -583.19), SIMDE_FLOAT32_C( -593.84), SIMDE_FLOAT32_C( -529.68), SIMDE_FLOAT32_C( 960.44) }, -INT64_C( 583) }, { { SIMDE_FLOAT32_C( -977.60), SIMDE_FLOAT32_C( -925.70), SIMDE_FLOAT32_C( 101.18), SIMDE_FLOAT32_C( 454.53) }, -INT64_C( 978) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); int64_t r = simde_mm_cvtss_si64(a); simde_assert_equal_i64(r, test_vec[i].r); } return 0; } static int test_simde_mm_cvtt_ps2pi (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const int32_t r[2]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NANF, -SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 752.47), SIMDE_FLOAT32_C( -313.93) }, { INT32_MIN, INT32_MIN } }, #endif #if !defined(SIMDE_FAST_CONVERSION_RANGE) { { HEDLEY_STATIC_CAST(float, INT32_MAX), HEDLEY_STATIC_CAST(float, INT32_MAX - 100), SIMDE_FLOAT32_C( 635.26), SIMDE_FLOAT32_C( -364.72) }, { INT32_MIN, INT32_C( 2147483520) } }, { { HEDLEY_STATIC_CAST(float, INT32_MIN), HEDLEY_STATIC_CAST(float, INT32_MIN + 100), SIMDE_FLOAT32_C( -722.55), SIMDE_FLOAT32_C( -645.71) }, { INT32_MIN, -INT32_C( 2147483520) } }, #endif { { SIMDE_FLOAT32_C( 229.49), SIMDE_FLOAT32_C( -522.02), SIMDE_FLOAT32_C( 848.45), SIMDE_FLOAT32_C( 552.88) }, { INT32_C( 229), -INT32_C( 522) } }, { { SIMDE_FLOAT32_C( -356.77), SIMDE_FLOAT32_C( -271.27), SIMDE_FLOAT32_C( -421.48), SIMDE_FLOAT32_C( -727.39) }, { -INT32_C( 356), -INT32_C( 271) } }, { { SIMDE_FLOAT32_C( 269.10), SIMDE_FLOAT32_C( -977.88), SIMDE_FLOAT32_C( -217.05), SIMDE_FLOAT32_C( -590.05) }, { INT32_C( 269), -INT32_C( 977) } }, { { SIMDE_FLOAT32_C( -562.37), SIMDE_FLOAT32_C( -270.05), SIMDE_FLOAT32_C( 582.52), SIMDE_FLOAT32_C( -950.27) }, { -INT32_C( 562), -INT32_C( 270) } }, { { SIMDE_FLOAT32_C( -859.67), SIMDE_FLOAT32_C( 561.63), SIMDE_FLOAT32_C( -377.72), SIMDE_FLOAT32_C( 480.02) }, { -INT32_C( 859), INT32_C( 561) } }, { { SIMDE_FLOAT32_C( 582.73), SIMDE_FLOAT32_C( -146.67), SIMDE_FLOAT32_C( -649.38), SIMDE_FLOAT32_C( -147.99) }, { INT32_C( 582), -INT32_C( 146) } }, { { SIMDE_FLOAT32_C( -586.20), SIMDE_FLOAT32_C( 946.33), SIMDE_FLOAT32_C( 520.10), SIMDE_FLOAT32_C( -556.78) }, { -INT32_C( 586), INT32_C( 946) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m64 r = simde_mm_cvtt_ps2pi(a); simde_test_x86_assert_equal_i32x2(r, simde_x_mm_loadu_si64(test_vec[i].r)); } return 0; } static int test_simde_mm_cvtt_ss2si (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const int32_t r; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -434.80), SIMDE_FLOAT32_C( 718.49), SIMDE_FLOAT32_C( -765.08) }, INT32_MIN }, { { -SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 610.10), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -238.39) }, INT32_MIN }, #endif #if !defined(SIMDE_FAST_CONVERSION_RANGE) { { HEDLEY_STATIC_CAST(float, INT32_MAX), SIMDE_FLOAT32_C( 264.19), SIMDE_FLOAT32_C( -247.70), SIMDE_FLOAT32_C( -466.34) }, INT32_MIN }, { { HEDLEY_STATIC_CAST(float, INT32_MIN), SIMDE_FLOAT32_C( -656.10), SIMDE_FLOAT32_C( 528.97), SIMDE_FLOAT32_C( -664.65) }, -INT32_C( 2147483648) }, { { HEDLEY_STATIC_CAST(float, INT32_MAX - 100), SIMDE_FLOAT32_C( -572.40), SIMDE_FLOAT32_C( 986.43), SIMDE_FLOAT32_C( 789.86) }, INT32_C( 2147483520) }, { { HEDLEY_STATIC_CAST(float, INT32_MIN + 100), SIMDE_FLOAT32_C( -226.55), SIMDE_FLOAT32_C( -926.20), SIMDE_FLOAT32_C( 130.01) }, -INT32_C( 2147483520) }, #endif { { SIMDE_FLOAT32_C( 555.67), SIMDE_FLOAT32_C( 330.04), SIMDE_FLOAT32_C( -110.18), SIMDE_FLOAT32_C( 679.85) }, INT32_C( 555) }, { { SIMDE_FLOAT32_C( 809.05), SIMDE_FLOAT32_C( -456.06), SIMDE_FLOAT32_C( 886.35), SIMDE_FLOAT32_C( 696.23) }, INT32_C( 809) }, { { SIMDE_FLOAT32_C( -676.99), SIMDE_FLOAT32_C( 517.95), SIMDE_FLOAT32_C( 287.44), SIMDE_FLOAT32_C( -885.31) }, -INT32_C( 676) }, { { SIMDE_FLOAT32_C( -202.79), SIMDE_FLOAT32_C( 24.50), SIMDE_FLOAT32_C( 108.39), SIMDE_FLOAT32_C( -810.69) }, -INT32_C( 202) }, { { SIMDE_FLOAT32_C( -214.44), SIMDE_FLOAT32_C( 248.05), SIMDE_FLOAT32_C( -729.85), SIMDE_FLOAT32_C( -886.40) }, -INT32_C( 214) }, { { SIMDE_FLOAT32_C( -238.22), SIMDE_FLOAT32_C( -190.61), SIMDE_FLOAT32_C( -31.58), SIMDE_FLOAT32_C( -490.90) }, -INT32_C( 238) }, { { SIMDE_FLOAT32_C( 86.03), SIMDE_FLOAT32_C( 720.06), SIMDE_FLOAT32_C( 886.14), SIMDE_FLOAT32_C( -649.64) }, INT32_C( 86) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); int32_t r = simde_mm_cvtt_ss2si(a); simde_assert_equal_i32(r, test_vec[i].r); } return 0; } static int test_simde_mm_cvttss_si64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; int64_t r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -26.30), SIMDE_FLOAT32_C( -753.04), SIMDE_FLOAT32_C( 939.86), SIMDE_FLOAT32_C( 229.58)), INT64_C( 229) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -756.13), SIMDE_FLOAT32_C( -951.35), SIMDE_FLOAT32_C( -40.11), SIMDE_FLOAT32_C( 621.33)), INT64_C( 621) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -451.41), SIMDE_FLOAT32_C( 91.05), SIMDE_FLOAT32_C( -819.88), SIMDE_FLOAT32_C( -387.32)), INT64_C(-387) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -882.02), SIMDE_FLOAT32_C( -524.02), SIMDE_FLOAT32_C( 80.52), SIMDE_FLOAT32_C( -645.48)), INT64_C(-645) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -474.88), SIMDE_FLOAT32_C( -59.96), SIMDE_FLOAT32_C( 536.08), SIMDE_FLOAT32_C( -612.35)), INT64_C(-612) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -6.13), SIMDE_FLOAT32_C( 143.04), SIMDE_FLOAT32_C( 295.14), SIMDE_FLOAT32_C( 637.76)), INT64_C( 637) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -573.26), SIMDE_FLOAT32_C( -332.75), SIMDE_FLOAT32_C( -589.80), SIMDE_FLOAT32_C( 18.82)), INT64_C( 18) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 447.07), SIMDE_FLOAT32_C( 103.62), SIMDE_FLOAT32_C( 276.09), SIMDE_FLOAT32_C( 287.14)), INT64_C( 287) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int64_t r = simde_mm_cvttss_si64(test_vec[i].a); simde_assert_equal_i64(r, test_vec[i].r); } return 0; } static int test_simde_mm_div_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 525.22), SIMDE_FLOAT32_C( 469.67), SIMDE_FLOAT32_C( 507.34), SIMDE_FLOAT32_C( -895.29)), simde_mm_set_ps(SIMDE_FLOAT32_C( -817.67), SIMDE_FLOAT32_C( 254.19), SIMDE_FLOAT32_C( 275.30), SIMDE_FLOAT32_C( 248.18)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( 1.85), SIMDE_FLOAT32_C( 1.84), SIMDE_FLOAT32_C( -3.61)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 718.98), SIMDE_FLOAT32_C( 567.01), SIMDE_FLOAT32_C( -547.97), SIMDE_FLOAT32_C( -853.48)), simde_mm_set_ps(SIMDE_FLOAT32_C( -930.62), SIMDE_FLOAT32_C( -977.41), SIMDE_FLOAT32_C( 357.59), SIMDE_FLOAT32_C( -240.75)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( -1.53), SIMDE_FLOAT32_C( 3.55)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 987.48), SIMDE_FLOAT32_C( 42.26), SIMDE_FLOAT32_C( -181.92), SIMDE_FLOAT32_C( 184.12)), simde_mm_set_ps(SIMDE_FLOAT32_C( -876.49), SIMDE_FLOAT32_C( -490.31), SIMDE_FLOAT32_C( 841.85), SIMDE_FLOAT32_C( 60.02)), simde_mm_set_ps(SIMDE_FLOAT32_C( -1.13), SIMDE_FLOAT32_C( -0.09), SIMDE_FLOAT32_C( -0.22), SIMDE_FLOAT32_C( 3.07)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -613.04), SIMDE_FLOAT32_C( -465.73), SIMDE_FLOAT32_C( 556.95), SIMDE_FLOAT32_C( 817.86)), simde_mm_set_ps(SIMDE_FLOAT32_C( -116.73), SIMDE_FLOAT32_C( -977.12), SIMDE_FLOAT32_C( 568.77), SIMDE_FLOAT32_C( 558.83)), simde_mm_set_ps(SIMDE_FLOAT32_C( 5.25), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 1.46)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -672.76), SIMDE_FLOAT32_C( 393.57), SIMDE_FLOAT32_C( -393.40), SIMDE_FLOAT32_C( -853.35)), simde_mm_set_ps(SIMDE_FLOAT32_C( -888.73), SIMDE_FLOAT32_C( -177.78), SIMDE_FLOAT32_C( 921.78), SIMDE_FLOAT32_C( 898.09)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( -2.21), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( -0.95)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 128.49), SIMDE_FLOAT32_C( -969.45), SIMDE_FLOAT32_C( 460.39), SIMDE_FLOAT32_C( 323.29)), simde_mm_set_ps(SIMDE_FLOAT32_C( -32.22), SIMDE_FLOAT32_C( -509.48), SIMDE_FLOAT32_C( -582.48), SIMDE_FLOAT32_C( -592.44)), simde_mm_set_ps(SIMDE_FLOAT32_C( -3.99), SIMDE_FLOAT32_C( 1.90), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( -0.55)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -938.19), SIMDE_FLOAT32_C( 629.18), SIMDE_FLOAT32_C( 564.19), SIMDE_FLOAT32_C( -570.62)), simde_mm_set_ps(SIMDE_FLOAT32_C( 505.64), SIMDE_FLOAT32_C( -444.97), SIMDE_FLOAT32_C( 38.59), SIMDE_FLOAT32_C( 692.28)), simde_mm_set_ps(SIMDE_FLOAT32_C( -1.86), SIMDE_FLOAT32_C( -1.41), SIMDE_FLOAT32_C( 14.62), SIMDE_FLOAT32_C( -0.82)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 37.78), SIMDE_FLOAT32_C( 840.67), SIMDE_FLOAT32_C( 915.86), SIMDE_FLOAT32_C( 911.39)), simde_mm_set_ps(SIMDE_FLOAT32_C( -720.31), SIMDE_FLOAT32_C( -591.88), SIMDE_FLOAT32_C( 194.42), SIMDE_FLOAT32_C( -372.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.05), SIMDE_FLOAT32_C( -1.42), SIMDE_FLOAT32_C( 4.71), SIMDE_FLOAT32_C( -2.44)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_div_ps(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_f32x4(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_div_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -793.82), SIMDE_FLOAT32_C( -34.36), SIMDE_FLOAT32_C( 80.43), SIMDE_FLOAT32_C( -203.82)), simde_mm_set_ps(SIMDE_FLOAT32_C( 824.19), SIMDE_FLOAT32_C( 444.15), SIMDE_FLOAT32_C( 477.20), SIMDE_FLOAT32_C( -757.29)), simde_mm_set_ps(SIMDE_FLOAT32_C( -793.82), SIMDE_FLOAT32_C( -34.36), SIMDE_FLOAT32_C( 80.43), SIMDE_FLOAT32_C( 0.27)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 305.50), SIMDE_FLOAT32_C( -288.99), SIMDE_FLOAT32_C( -230.29), SIMDE_FLOAT32_C( 214.35)), simde_mm_set_ps(SIMDE_FLOAT32_C( 811.96), SIMDE_FLOAT32_C( 942.32), SIMDE_FLOAT32_C( -733.60), SIMDE_FLOAT32_C( 612.28)), simde_mm_set_ps(SIMDE_FLOAT32_C( 305.50), SIMDE_FLOAT32_C( -288.99), SIMDE_FLOAT32_C( -230.29), SIMDE_FLOAT32_C( 0.35)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 273.05), SIMDE_FLOAT32_C( 372.05), SIMDE_FLOAT32_C( -656.44), SIMDE_FLOAT32_C( 200.77)), simde_mm_set_ps(SIMDE_FLOAT32_C( 71.50), SIMDE_FLOAT32_C( -992.70), SIMDE_FLOAT32_C( 826.54), SIMDE_FLOAT32_C( -872.88)), simde_mm_set_ps(SIMDE_FLOAT32_C( 273.05), SIMDE_FLOAT32_C( 372.05), SIMDE_FLOAT32_C( -656.44), SIMDE_FLOAT32_C( -0.23)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 246.96), SIMDE_FLOAT32_C( 324.32), SIMDE_FLOAT32_C( -964.89), SIMDE_FLOAT32_C( 778.58)), simde_mm_set_ps(SIMDE_FLOAT32_C( -432.74), SIMDE_FLOAT32_C( 520.15), SIMDE_FLOAT32_C( 952.89), SIMDE_FLOAT32_C( -146.52)), simde_mm_set_ps(SIMDE_FLOAT32_C( 246.96), SIMDE_FLOAT32_C( 324.32), SIMDE_FLOAT32_C( -964.89), SIMDE_FLOAT32_C( -5.31)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 525.51), SIMDE_FLOAT32_C( 309.52), SIMDE_FLOAT32_C( -262.93), SIMDE_FLOAT32_C( -866.67)), simde_mm_set_ps(SIMDE_FLOAT32_C( 148.82), SIMDE_FLOAT32_C( 425.80), SIMDE_FLOAT32_C( 180.19), SIMDE_FLOAT32_C( 897.18)), simde_mm_set_ps(SIMDE_FLOAT32_C( 525.51), SIMDE_FLOAT32_C( 309.52), SIMDE_FLOAT32_C( -262.93), SIMDE_FLOAT32_C( -0.97)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 306.95), SIMDE_FLOAT32_C( 97.55), SIMDE_FLOAT32_C( 783.15), SIMDE_FLOAT32_C( 211.26)), simde_mm_set_ps(SIMDE_FLOAT32_C( -499.54), SIMDE_FLOAT32_C( 333.83), SIMDE_FLOAT32_C( 267.98), SIMDE_FLOAT32_C( -538.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( 306.95), SIMDE_FLOAT32_C( 97.55), SIMDE_FLOAT32_C( 783.15), SIMDE_FLOAT32_C( -0.39)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -431.78), SIMDE_FLOAT32_C( -941.60), SIMDE_FLOAT32_C( 991.34), SIMDE_FLOAT32_C( -979.59)), simde_mm_set_ps(SIMDE_FLOAT32_C( 814.75), SIMDE_FLOAT32_C( 774.29), SIMDE_FLOAT32_C( 205.12), SIMDE_FLOAT32_C( 569.13)), simde_mm_set_ps(SIMDE_FLOAT32_C( -431.78), SIMDE_FLOAT32_C( -941.60), SIMDE_FLOAT32_C( 991.34), SIMDE_FLOAT32_C( -1.72)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -391.64), SIMDE_FLOAT32_C( 983.60), SIMDE_FLOAT32_C( 175.73), SIMDE_FLOAT32_C( -840.27)), simde_mm_set_ps(SIMDE_FLOAT32_C( 318.68), SIMDE_FLOAT32_C( -883.66), SIMDE_FLOAT32_C( -893.33), SIMDE_FLOAT32_C( 900.04)), simde_mm_set_ps(SIMDE_FLOAT32_C( -391.64), SIMDE_FLOAT32_C( 983.60), SIMDE_FLOAT32_C( 175.73), SIMDE_FLOAT32_C( -0.93)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_div_ss(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_f32x4(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_extract_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; int16_t r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C(-14353), INT16_C( 22942), INT16_C(-17535), INT16_C( 13021)), 13021 }, { simde_mm_set_pi16(INT16_C(-11477), INT16_C( 26597), INT16_C( 17199), INT16_C(-30275)), 17199 }, { simde_mm_set_pi16(INT16_C( 24440), INT16_C(-12101), INT16_C(-13621), INT16_C(-14757)), -12101 }, { simde_mm_set_pi16(INT16_C(-23205), INT16_C(-26140), INT16_C(-19797), INT16_C( 24953)), -23205 } }; int16_t r; r = simde_mm_extract_pi16(test_vec[0].a, 0); simde_assert_equal_i16(test_vec[0].r, r); r = simde_mm_extract_pi16(test_vec[1].a, 1); simde_assert_equal_i16(test_vec[1].r, r); r = simde_mm_extract_pi16(test_vec[2].a, 2); simde_assert_equal_i16(test_vec[2].r, r); r = simde_mm_extract_pi16(test_vec[3].a, 3); simde_assert_equal_i16(test_vec[3].r, r); return 0; } static int test_simde_m_pextrw(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; int16_t r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C(-14353), INT16_C( 22942), INT16_C(-17535), INT16_C( 13021)), 13021 }, { simde_mm_set_pi16(INT16_C(-11477), INT16_C( 26597), INT16_C( 17199), INT16_C(-30275)), 17199 }, { simde_mm_set_pi16(INT16_C( 24440), INT16_C(-12101), INT16_C(-13621), INT16_C(-14757)), -12101 }, { simde_mm_set_pi16(INT16_C(-23205), INT16_C(-26140), INT16_C(-19797), INT16_C( 24953)), -23205 } }; int16_t r; r = simde_m_pextrw(test_vec[0].a, 0); simde_assert_equal_i16(test_vec[0].r, r); r = simde_m_pextrw(test_vec[1].a, 1); simde_assert_equal_i16(test_vec[1].r, r); r = simde_m_pextrw(test_vec[2].a, 2); simde_assert_equal_i16(test_vec[2].r, r); r = simde_m_pextrw(test_vec[3].a, 3); simde_assert_equal_i16(test_vec[3].r, r); return 0; } static int test_simde_mm_insert_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; int16_t b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C(-16831), INT16_C( 27027), INT16_C( -2166), INT16_C(-28596)), INT16_C( 2418), simde_mm_set_pi16(INT16_C(-16831), INT16_C( 27027), INT16_C( -2166), INT16_C( 2418)) }, { simde_mm_set_pi16(INT16_C(-28809), INT16_C( 15538), INT16_C( 32114), INT16_C( 2219)), INT16_C( 1863), simde_mm_set_pi16(INT16_C(-28809), INT16_C( 15538), INT16_C( 1863), INT16_C( 2219)) }, { simde_mm_set_pi16(INT16_C(-29640), INT16_C(-13575), INT16_C( 25300), INT16_C( 9162)), INT16_C( 32619), simde_mm_set_pi16(INT16_C(-29640), INT16_C( 32619), INT16_C( 25300), INT16_C( 9162)) }, { simde_mm_set_pi16(INT16_C( 29435), INT16_C(-21501), INT16_C( 32589), INT16_C(-29370)), INT16_C( 6382), simde_mm_set_pi16(INT16_C( 6382), INT16_C(-21501), INT16_C( 32589), INT16_C(-29370)) } }; simde__m64 r; r = simde_mm_insert_pi16(test_vec[0].a, test_vec[0].b, 0); simde_test_x86_assert_equal_i16x4(r, test_vec[0].r); r = simde_mm_insert_pi16(test_vec[1].a, test_vec[1].b, 1); simde_test_x86_assert_equal_i16x4(r, test_vec[1].r); r = simde_mm_insert_pi16(test_vec[2].a, test_vec[2].b, 2); simde_test_x86_assert_equal_i16x4(r, test_vec[2].r); r = simde_mm_insert_pi16(test_vec[3].a, test_vec[3].b, 3); simde_test_x86_assert_equal_i16x4(r, test_vec[3].r); simde_mm_empty(); return 0; } static int test_simde_m_pinsrw(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; int16_t b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C(-16831), INT16_C( 27027), INT16_C( -2166), INT16_C(-28596)), INT16_C( 2418), simde_mm_set_pi16(INT16_C(-16831), INT16_C( 27027), INT16_C( -2166), INT16_C( 2418)) }, { simde_mm_set_pi16(INT16_C(-28809), INT16_C( 15538), INT16_C( 32114), INT16_C( 2219)), INT16_C( 1863), simde_mm_set_pi16(INT16_C(-28809), INT16_C( 15538), INT16_C( 1863), INT16_C( 2219)) }, { simde_mm_set_pi16(INT16_C(-29640), INT16_C(-13575), INT16_C( 25300), INT16_C( 9162)), INT16_C( 32619), simde_mm_set_pi16(INT16_C(-29640), INT16_C( 32619), INT16_C( 25300), INT16_C( 9162)) }, { simde_mm_set_pi16(INT16_C( 29435), INT16_C(-21501), INT16_C( 32589), INT16_C(-29370)), INT16_C( 6382), simde_mm_set_pi16(INT16_C( 6382), INT16_C(-21501), INT16_C( 32589), INT16_C(-29370)) } }; simde__m64 r; r = simde_m_pinsrw(test_vec[0].a, test_vec[0].b, 0); simde_test_x86_assert_equal_i16x4(r, test_vec[0].r); r = simde_m_pinsrw(test_vec[1].a, test_vec[1].b, 1); simde_test_x86_assert_equal_i16x4(r, test_vec[1].r); r = simde_m_pinsrw(test_vec[2].a, test_vec[2].b, 2); simde_test_x86_assert_equal_i16x4(r, test_vec[2].r); r = simde_m_pinsrw(test_vec[3].a, test_vec[3].b, 3); simde_test_x86_assert_equal_i16x4(r, test_vec[3].r); simde_mm_empty(); return 0; } static int test_simde_mm_load_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { SIMDE_ALIGN_LIKE_16(simde__m128) const simde_float32 a[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 796.55), SIMDE_FLOAT32_C( -42.53), SIMDE_FLOAT32_C( 966.47), SIMDE_FLOAT32_C( -905.61) } }, { { SIMDE_FLOAT32_C( 758.03), SIMDE_FLOAT32_C( 301.98), SIMDE_FLOAT32_C( 267.59), SIMDE_FLOAT32_C( -948.37) } }, { { SIMDE_FLOAT32_C( 433.23), SIMDE_FLOAT32_C( -486.34), SIMDE_FLOAT32_C( -487.77), SIMDE_FLOAT32_C( 180.47) } }, { { SIMDE_FLOAT32_C( -605.53), SIMDE_FLOAT32_C( -707.20), SIMDE_FLOAT32_C( 441.44), SIMDE_FLOAT32_C( -927.94) } }, { { SIMDE_FLOAT32_C( -275.54), SIMDE_FLOAT32_C( 31.37), SIMDE_FLOAT32_C( 841.81), SIMDE_FLOAT32_C( -508.63) } }, { { SIMDE_FLOAT32_C( 463.05), SIMDE_FLOAT32_C( 911.09), SIMDE_FLOAT32_C( -153.25), SIMDE_FLOAT32_C( 259.61) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_test_x86_assert_equal_f32x4(simde_mm_load_ps(test_vec[i].a), simde_mm_loadu_ps(test_vec[i].a), 1); } return 0; } static int test_simde_mm_load_ps1(SIMDE_MUNIT_TEST_ARGS) { const struct { simde_float32 a; simde__m128 r; } test_vec[8] = { { SIMDE_FLOAT32_C( 109.27), simde_mm_set_ps(SIMDE_FLOAT32_C( 109.27), SIMDE_FLOAT32_C( 109.27), SIMDE_FLOAT32_C( 109.27), SIMDE_FLOAT32_C( 109.27)) }, { SIMDE_FLOAT32_C( -226.37), simde_mm_set_ps(SIMDE_FLOAT32_C( -226.37), SIMDE_FLOAT32_C( -226.37), SIMDE_FLOAT32_C( -226.37), SIMDE_FLOAT32_C( -226.37)) }, { SIMDE_FLOAT32_C( 574.72), simde_mm_set_ps(SIMDE_FLOAT32_C( 574.72), SIMDE_FLOAT32_C( 574.72), SIMDE_FLOAT32_C( 574.72), SIMDE_FLOAT32_C( 574.72)) }, { SIMDE_FLOAT32_C( -930.02), simde_mm_set_ps(SIMDE_FLOAT32_C( -930.02), SIMDE_FLOAT32_C( -930.02), SIMDE_FLOAT32_C( -930.02), SIMDE_FLOAT32_C( -930.02)) }, { SIMDE_FLOAT32_C( -710.77), simde_mm_set_ps(SIMDE_FLOAT32_C( -710.77), SIMDE_FLOAT32_C( -710.77), SIMDE_FLOAT32_C( -710.77), SIMDE_FLOAT32_C( -710.77)) }, { SIMDE_FLOAT32_C( 446.62), simde_mm_set_ps(SIMDE_FLOAT32_C( 446.62), SIMDE_FLOAT32_C( 446.62), SIMDE_FLOAT32_C( 446.62), SIMDE_FLOAT32_C( 446.62)) }, { SIMDE_FLOAT32_C( -974.29), simde_mm_set_ps(SIMDE_FLOAT32_C( -974.29), SIMDE_FLOAT32_C( -974.29), SIMDE_FLOAT32_C( -974.29), SIMDE_FLOAT32_C( -974.29)) }, { SIMDE_FLOAT32_C( -406.72), simde_mm_set_ps(SIMDE_FLOAT32_C( -406.72), SIMDE_FLOAT32_C( -406.72), SIMDE_FLOAT32_C( -406.72), SIMDE_FLOAT32_C( -406.72)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_load_ps1(&(test_vec[i].a)); simde_test_x86_assert_equal_f32x4(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_load_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde_float32 a; simde__m128 r; } test_vec[8] = { { SIMDE_FLOAT32_C( 982.60), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 982.60)) }, { SIMDE_FLOAT32_C( -862.06), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -862.06)) }, { SIMDE_FLOAT32_C( 458.44), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 458.44)) }, { SIMDE_FLOAT32_C( -232.03), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -232.03)) }, { SIMDE_FLOAT32_C( -187.73), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -187.73)) }, { SIMDE_FLOAT32_C( 614.96), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 614.96)) }, { SIMDE_FLOAT32_C( -222.01), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -222.01)) }, { SIMDE_FLOAT32_C( -65.37), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -65.37)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_load_ss(&(test_vec[i].a)); simde_test_x86_assert_equal_f32x4(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_loadh_pi (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 b[2]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -931.10), SIMDE_FLOAT32_C( 146.10), SIMDE_FLOAT32_C( 323.97), SIMDE_FLOAT32_C( 666.93) }, { SIMDE_FLOAT32_C( 536.39), SIMDE_FLOAT32_C( -55.33) }, { SIMDE_FLOAT32_C( -931.10), SIMDE_FLOAT32_C( 146.10), SIMDE_FLOAT32_C( 536.39), SIMDE_FLOAT32_C( -55.33) } }, { { SIMDE_FLOAT32_C( 232.94), SIMDE_FLOAT32_C( 310.53), SIMDE_FLOAT32_C( 277.06), SIMDE_FLOAT32_C( 23.22) }, { SIMDE_FLOAT32_C( -820.93), SIMDE_FLOAT32_C( -298.75) }, { SIMDE_FLOAT32_C( 232.94), SIMDE_FLOAT32_C( 310.53), SIMDE_FLOAT32_C( -820.93), SIMDE_FLOAT32_C( -298.75) } }, { { SIMDE_FLOAT32_C( 212.42), SIMDE_FLOAT32_C( -85.39), SIMDE_FLOAT32_C( 335.83), SIMDE_FLOAT32_C( -627.07) }, { SIMDE_FLOAT32_C( -208.48), SIMDE_FLOAT32_C( -609.05) }, { SIMDE_FLOAT32_C( 212.42), SIMDE_FLOAT32_C( -85.39), SIMDE_FLOAT32_C( -208.48), SIMDE_FLOAT32_C( -609.05) } }, { { SIMDE_FLOAT32_C( 560.76), SIMDE_FLOAT32_C( -751.94), SIMDE_FLOAT32_C( -976.19), SIMDE_FLOAT32_C( -259.36) }, { SIMDE_FLOAT32_C( 981.44), SIMDE_FLOAT32_C( -348.97) }, { SIMDE_FLOAT32_C( 560.76), SIMDE_FLOAT32_C( -751.94), SIMDE_FLOAT32_C( 981.44), SIMDE_FLOAT32_C( -348.97) } }, { { SIMDE_FLOAT32_C( 282.19), SIMDE_FLOAT32_C( 412.73), SIMDE_FLOAT32_C( -115.39), SIMDE_FLOAT32_C( -766.32) }, { SIMDE_FLOAT32_C( -979.64), SIMDE_FLOAT32_C( 46.41) }, { SIMDE_FLOAT32_C( 282.19), SIMDE_FLOAT32_C( 412.73), SIMDE_FLOAT32_C( -979.64), SIMDE_FLOAT32_C( 46.41) } }, { { SIMDE_FLOAT32_C( 430.25), SIMDE_FLOAT32_C( -910.75), SIMDE_FLOAT32_C( -807.49), SIMDE_FLOAT32_C( -245.79) }, { SIMDE_FLOAT32_C( 756.18), SIMDE_FLOAT32_C( 728.90) }, { SIMDE_FLOAT32_C( 430.25), SIMDE_FLOAT32_C( -910.75), SIMDE_FLOAT32_C( 756.18), SIMDE_FLOAT32_C( 728.90) } }, { { SIMDE_FLOAT32_C( 698.88), SIMDE_FLOAT32_C( -10.88), SIMDE_FLOAT32_C( 39.43), SIMDE_FLOAT32_C( -24.06) }, { SIMDE_FLOAT32_C( -987.66), SIMDE_FLOAT32_C( 218.51) }, { SIMDE_FLOAT32_C( 698.88), SIMDE_FLOAT32_C( -10.88), SIMDE_FLOAT32_C( -987.66), SIMDE_FLOAT32_C( 218.51) } }, { { SIMDE_FLOAT32_C( 677.19), SIMDE_FLOAT32_C( 224.76), SIMDE_FLOAT32_C( -866.88), SIMDE_FLOAT32_C( 13.01) }, { SIMDE_FLOAT32_C( 597.69), SIMDE_FLOAT32_C( -75.37) }, { SIMDE_FLOAT32_C( 677.19), SIMDE_FLOAT32_C( 224.76), SIMDE_FLOAT32_C( 597.69), SIMDE_FLOAT32_C( -75.37) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_loadh_pi(a, SIMDE_ALIGN_CAST(const simde__m64*, test_vec[i].b)); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_loadl_pi(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde_float32 b[2]; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -759.67), SIMDE_FLOAT32_C( -161.20), SIMDE_FLOAT32_C( -647.60), SIMDE_FLOAT32_C( -354.59)), { SIMDE_FLOAT32_C( 151.83), SIMDE_FLOAT32_C( 650.74) }, simde_mm_set_ps(SIMDE_FLOAT32_C( -759.67), SIMDE_FLOAT32_C( -161.20), SIMDE_FLOAT32_C( 650.74), SIMDE_FLOAT32_C( 151.83)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 622.29), SIMDE_FLOAT32_C( 971.32), SIMDE_FLOAT32_C( 189.94), SIMDE_FLOAT32_C( -546.65)), { SIMDE_FLOAT32_C( 704.03), SIMDE_FLOAT32_C( -840.05) }, simde_mm_set_ps(SIMDE_FLOAT32_C( 622.29), SIMDE_FLOAT32_C( 971.32), SIMDE_FLOAT32_C( -840.05), SIMDE_FLOAT32_C( 704.03)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -740.17), SIMDE_FLOAT32_C( 779.38), SIMDE_FLOAT32_C( -852.02), SIMDE_FLOAT32_C( 399.85)), { SIMDE_FLOAT32_C( -182.16), SIMDE_FLOAT32_C( -228.45) }, simde_mm_set_ps(SIMDE_FLOAT32_C( -740.17), SIMDE_FLOAT32_C( 779.38), SIMDE_FLOAT32_C( -228.45), SIMDE_FLOAT32_C( -182.16)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -846.70), SIMDE_FLOAT32_C( 853.34), SIMDE_FLOAT32_C( -863.68), SIMDE_FLOAT32_C( -823.68)), { SIMDE_FLOAT32_C( -970.25), SIMDE_FLOAT32_C( 188.74) }, simde_mm_set_ps(SIMDE_FLOAT32_C( -846.70), SIMDE_FLOAT32_C( 853.34), SIMDE_FLOAT32_C( 188.74), SIMDE_FLOAT32_C( -970.25)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 199.88), SIMDE_FLOAT32_C( -915.58), SIMDE_FLOAT32_C( 899.92), SIMDE_FLOAT32_C( 424.76)), { SIMDE_FLOAT32_C( 674.47), SIMDE_FLOAT32_C( -152.19) }, simde_mm_set_ps(SIMDE_FLOAT32_C( 199.88), SIMDE_FLOAT32_C( -915.58), SIMDE_FLOAT32_C( -152.19), SIMDE_FLOAT32_C( 674.47)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 686.01), SIMDE_FLOAT32_C( 545.43), SIMDE_FLOAT32_C( -50.76), SIMDE_FLOAT32_C( 611.26)), { SIMDE_FLOAT32_C( -551.50), SIMDE_FLOAT32_C( -609.71) }, simde_mm_set_ps(SIMDE_FLOAT32_C( 686.01), SIMDE_FLOAT32_C( 545.43), SIMDE_FLOAT32_C( -609.71), SIMDE_FLOAT32_C( -551.50)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 29.24), SIMDE_FLOAT32_C( 172.70), SIMDE_FLOAT32_C( -27.31), SIMDE_FLOAT32_C( -586.10)), { SIMDE_FLOAT32_C( -893.37), SIMDE_FLOAT32_C( 126.78) }, simde_mm_set_ps(SIMDE_FLOAT32_C( 29.24), SIMDE_FLOAT32_C( 172.70), SIMDE_FLOAT32_C( 126.78), SIMDE_FLOAT32_C( -893.37)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 57.95), SIMDE_FLOAT32_C( 614.04), SIMDE_FLOAT32_C( 530.49), SIMDE_FLOAT32_C( 660.83)), { SIMDE_FLOAT32_C( -772.86), SIMDE_FLOAT32_C( 749.53) }, simde_mm_set_ps(SIMDE_FLOAT32_C( 57.95), SIMDE_FLOAT32_C( 614.04), SIMDE_FLOAT32_C( 749.53), SIMDE_FLOAT32_C( -772.86)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_loadl_pi(test_vec[i].a, SIMDE_ALIGN_CAST(simde__m64 const*, test_vec[i].b)); simde_test_x86_assert_equal_f32x4(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_loadr_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { SIMDE_ALIGN_LIKE_16(simde__m128) const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -880.56), SIMDE_FLOAT32_C( -284.61), SIMDE_FLOAT32_C( -934.21), SIMDE_FLOAT32_C( 785.08) }, { SIMDE_FLOAT32_C( 785.08), SIMDE_FLOAT32_C( -934.21), SIMDE_FLOAT32_C( -284.61), SIMDE_FLOAT32_C( -880.56) } }, { { SIMDE_FLOAT32_C( -829.46), SIMDE_FLOAT32_C( -887.14), SIMDE_FLOAT32_C( 710.11), SIMDE_FLOAT32_C( -137.75) }, { SIMDE_FLOAT32_C( -137.75), SIMDE_FLOAT32_C( 710.11), SIMDE_FLOAT32_C( -887.14), SIMDE_FLOAT32_C( -829.46) } }, { { SIMDE_FLOAT32_C( 524.87), SIMDE_FLOAT32_C( -290.80), SIMDE_FLOAT32_C( 257.30), SIMDE_FLOAT32_C( -491.27) }, { SIMDE_FLOAT32_C( -491.27), SIMDE_FLOAT32_C( 257.30), SIMDE_FLOAT32_C( -290.80), SIMDE_FLOAT32_C( 524.87) } }, { { SIMDE_FLOAT32_C( -344.66), SIMDE_FLOAT32_C( -151.52), SIMDE_FLOAT32_C( -772.85), SIMDE_FLOAT32_C( -708.41) }, { SIMDE_FLOAT32_C( -708.41), SIMDE_FLOAT32_C( -772.85), SIMDE_FLOAT32_C( -151.52), SIMDE_FLOAT32_C( -344.66) } }, { { SIMDE_FLOAT32_C( 875.04), SIMDE_FLOAT32_C( -304.95), SIMDE_FLOAT32_C( 237.12), SIMDE_FLOAT32_C( 595.93) }, { SIMDE_FLOAT32_C( 595.93), SIMDE_FLOAT32_C( 237.12), SIMDE_FLOAT32_C( -304.95), SIMDE_FLOAT32_C( 875.04) } }, { { SIMDE_FLOAT32_C( -182.45), SIMDE_FLOAT32_C( 875.01), SIMDE_FLOAT32_C( 682.00), SIMDE_FLOAT32_C( 758.45) }, { SIMDE_FLOAT32_C( 758.45), SIMDE_FLOAT32_C( 682.00), SIMDE_FLOAT32_C( 875.01), SIMDE_FLOAT32_C( -182.45) } }, { { SIMDE_FLOAT32_C( -461.05), SIMDE_FLOAT32_C( -321.67), SIMDE_FLOAT32_C( -167.95), SIMDE_FLOAT32_C( -232.59) }, { SIMDE_FLOAT32_C( -232.59), SIMDE_FLOAT32_C( -167.95), SIMDE_FLOAT32_C( -321.67), SIMDE_FLOAT32_C( -461.05) } }, { { SIMDE_FLOAT32_C( 323.21), SIMDE_FLOAT32_C( -420.37), SIMDE_FLOAT32_C( -966.28), SIMDE_FLOAT32_C( 442.65) }, { SIMDE_FLOAT32_C( 442.65), SIMDE_FLOAT32_C( -966.28), SIMDE_FLOAT32_C( -420.37), SIMDE_FLOAT32_C( 323.21) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 r = simde_mm_loadr_ps(test_vec[i].a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_loadu_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde_float32 a[4]; simde__m128 r; } test_vec[8] = { { { SIMDE_FLOAT32_C( 658.17), SIMDE_FLOAT32_C( -216.72), SIMDE_FLOAT32_C( 812.08), SIMDE_FLOAT32_C( 746.73) }, simde_mm_set_ps(SIMDE_FLOAT32_C( 746.73), SIMDE_FLOAT32_C( 812.08), SIMDE_FLOAT32_C( -216.72), SIMDE_FLOAT32_C( 658.17)) }, { { SIMDE_FLOAT32_C( -626.80), SIMDE_FLOAT32_C( 899.16), SIMDE_FLOAT32_C( -494.66), SIMDE_FLOAT32_C( -802.94) }, simde_mm_set_ps(SIMDE_FLOAT32_C( -802.94), SIMDE_FLOAT32_C( -494.66), SIMDE_FLOAT32_C( 899.16), SIMDE_FLOAT32_C( -626.80)) }, { { SIMDE_FLOAT32_C( -126.83), SIMDE_FLOAT32_C( -920.21), SIMDE_FLOAT32_C( 37.07), SIMDE_FLOAT32_C( 514.70) }, simde_mm_set_ps(SIMDE_FLOAT32_C( 514.70), SIMDE_FLOAT32_C( 37.07), SIMDE_FLOAT32_C( -920.21), SIMDE_FLOAT32_C( -126.83)) }, { { SIMDE_FLOAT32_C( 591.39), SIMDE_FLOAT32_C( -2.44), SIMDE_FLOAT32_C( -874.39), SIMDE_FLOAT32_C( -396.41) }, simde_mm_set_ps(SIMDE_FLOAT32_C( -396.41), SIMDE_FLOAT32_C( -874.39), SIMDE_FLOAT32_C( -2.44), SIMDE_FLOAT32_C( 591.39)) }, { { SIMDE_FLOAT32_C( -253.20), SIMDE_FLOAT32_C( -832.85), SIMDE_FLOAT32_C( -949.34), SIMDE_FLOAT32_C( -940.41) }, simde_mm_set_ps(SIMDE_FLOAT32_C( -940.41), SIMDE_FLOAT32_C( -949.34), SIMDE_FLOAT32_C( -832.85), SIMDE_FLOAT32_C( -253.20)) }, { { SIMDE_FLOAT32_C( 693.54), SIMDE_FLOAT32_C( 223.92), SIMDE_FLOAT32_C( -939.11), SIMDE_FLOAT32_C( 355.93) }, simde_mm_set_ps(SIMDE_FLOAT32_C( 355.93), SIMDE_FLOAT32_C( -939.11), SIMDE_FLOAT32_C( 223.92), SIMDE_FLOAT32_C( 693.54)) }, { { SIMDE_FLOAT32_C( -8.12), SIMDE_FLOAT32_C( 751.96), SIMDE_FLOAT32_C( -621.91), SIMDE_FLOAT32_C( 106.31) }, simde_mm_set_ps(SIMDE_FLOAT32_C( 106.31), SIMDE_FLOAT32_C( -621.91), SIMDE_FLOAT32_C( 751.96), SIMDE_FLOAT32_C( -8.12)) }, { { SIMDE_FLOAT32_C( -14.41), SIMDE_FLOAT32_C( 565.48), SIMDE_FLOAT32_C( 361.01), SIMDE_FLOAT32_C( 431.24) }, simde_mm_set_ps(SIMDE_FLOAT32_C( 431.24), SIMDE_FLOAT32_C( 361.01), SIMDE_FLOAT32_C( 565.48), SIMDE_FLOAT32_C( -14.41)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_loadu_ps(test_vec[i].a); simde_test_x86_assert_equal_f32x4(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_maskmove_si64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; int8_t b[8]; simde__m64 mask; int8_t r[8]; } test_vec[8] = { { simde_mm_set_pi8(INT8_C( -5), INT8_C( 112), INT8_C( 109), INT8_C( -9), INT8_C( -10), INT8_C( -30), INT8_C(-103), INT8_C( -13)), { INT8_C( -67), INT8_C( -21), INT8_C( 107), INT8_C( 96), INT8_C( -47), INT8_C( 95), INT8_C( 34), INT8_C(-108) }, simde_mm_set_pi8(-112, 64, -92, 19, -70, -49, -42, 75), { INT8_C( -67), INT8_C(-103), INT8_C( -30), INT8_C( -10), INT8_C( -47), INT8_C( 109), INT8_C( 34), INT8_C( -5) } }, { simde_mm_set_pi8(INT8_C(-111), INT8_C( -8), INT8_C( -84), INT8_C( 80), INT8_C( 8), INT8_C( -61), INT8_C( -80), INT8_C( -85)), { INT8_C( 66), INT8_C( 19), INT8_C( 96), INT8_C( 6), INT8_C( -30), INT8_C( 85), INT8_C( 97), INT8_C( -44) }, simde_mm_set_pi8( -90, -11, -113, 11, -96, -125, 121, -113), { INT8_C( -85), INT8_C( 19), INT8_C( -61), INT8_C( 8), INT8_C( -30), INT8_C( -84), INT8_C( -8), INT8_C(-111) } }, { simde_mm_set_pi8(INT8_C( 120), INT8_C(-105), INT8_C( 2), INT8_C( 21), INT8_C( 93), INT8_C(-124), INT8_C( -2), INT8_C( 79)), { INT8_C( 87), INT8_C( 98), INT8_C( -52), INT8_C( 28), INT8_C( 37), INT8_C(-120), INT8_C( 109), INT8_C( 79) }, simde_mm_set_pi8( 34, 106, -74, 83, -114, -10, 67, 111), { INT8_C( 87), INT8_C( 98), INT8_C(-124), INT8_C( 93), INT8_C( 37), INT8_C( 2), INT8_C( 109), INT8_C( 79) } }, { simde_mm_set_pi8(INT8_C( -26), INT8_C( 96), INT8_C(-115), INT8_C( 78), INT8_C( 35), INT8_C( 49), INT8_C( 36), INT8_C( -25)), { INT8_C( 29), INT8_C( 28), INT8_C( 30), INT8_C( -80), INT8_C( -12), INT8_C( 81), INT8_C( -81), INT8_C( 120) }, simde_mm_set_pi8( -59, -55, -111, -119, 86, -5, 74, 2), { INT8_C( 29), INT8_C( 28), INT8_C( 49), INT8_C( -80), INT8_C( 78), INT8_C(-115), INT8_C( 96), INT8_C( -26) } }, { simde_mm_set_pi8(INT8_C( -37), INT8_C( 41), INT8_C( 123), INT8_C(-107), INT8_C(-123), INT8_C( 32), INT8_C( 33), INT8_C( 5)), { INT8_C(-126), INT8_C( 94), INT8_C( -30), INT8_C( -13), INT8_C( 99), INT8_C( 126), INT8_C( 16), INT8_C( 5) }, simde_mm_set_pi8( 113, 1, 66, 5, -18, 77, -41, -4), { INT8_C( 5), INT8_C( 33), INT8_C( -30), INT8_C(-123), INT8_C( 99), INT8_C( 126), INT8_C( 16), INT8_C( 5) } }, { simde_mm_set_pi8(INT8_C( -90), INT8_C(-113), INT8_C( 97), INT8_C( 73), INT8_C( 33), INT8_C( -3), INT8_C( 109), INT8_C( 63)), { INT8_C( -41), INT8_C( -87), INT8_C(-115), INT8_C( -26), INT8_C( 68), INT8_C( -33), INT8_C( 7), INT8_C( -69) }, simde_mm_set_pi8( 28, 106, -56, -23, -126, -119, 22, -23), { INT8_C( 63), INT8_C( -87), INT8_C( -3), INT8_C( 33), INT8_C( 73), INT8_C( 97), INT8_C( 7), INT8_C( -69) } }, { simde_mm_set_pi8(INT8_C( 12), INT8_C( 9), INT8_C( -66), INT8_C(-114), INT8_C( 19), INT8_C( 94), INT8_C(-103), INT8_C( 38)), { INT8_C( 102), INT8_C( -71), INT8_C( 55), INT8_C( 40), INT8_C(-122), INT8_C( 94), INT8_C( 71), INT8_C( -99) }, simde_mm_set_pi8( 78, 82, -126, 33, 118, 93, -125, 113), { INT8_C( 102), INT8_C(-103), INT8_C( 55), INT8_C( 40), INT8_C(-122), INT8_C( -66), INT8_C( 71), INT8_C( -99) } }, { simde_mm_set_pi8(INT8_C( 67), INT8_C( 108), INT8_C( 55), INT8_C( -68), INT8_C( -5), INT8_C( -18), INT8_C( 115), INT8_C( 126)), { INT8_C( 92), INT8_C( -82), INT8_C( 101), INT8_C( -48), INT8_C( 45), INT8_C( 101), INT8_C( 95), INT8_C( 51) }, simde_mm_set_pi8( -92, -27, 65, -21, 36, 126, 80, 50), { INT8_C( 92), INT8_C( -82), INT8_C( 101), INT8_C( -48), INT8_C( -68), INT8_C( 101), INT8_C( 108), INT8_C( 67) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int8_t r[8], e[8]; simde_memcpy(r, test_vec[i].b, sizeof(r)); simde_memcpy(e, test_vec[i].r, sizeof(e)); #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_NATIVE_ALIASES_TESTING) simde_mm_maskmove_si64(test_vec[i].a, test_vec[i].mask, HEDLEY_REINTERPRET_CAST(char *, r)); #else simde_mm_maskmove_si64(test_vec[i].a, test_vec[i].mask, r); #endif simde_assert_equal_vi8(8, r, e); } simde_mm_empty(); return 0; } static int test_simde_m_maskmovq(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; int8_t b[8]; simde__m64 mask; int8_t r[8]; } test_vec[8] = { { simde_mm_set_pi8(INT8_C( -5), INT8_C( 112), INT8_C( 109), INT8_C( -9), INT8_C( -10), INT8_C( -30), INT8_C(-103), INT8_C( -13)), { INT8_C( -67), INT8_C( -21), INT8_C( 107), INT8_C( 96), INT8_C( -47), INT8_C( 95), INT8_C( 34), INT8_C(-108) }, simde_mm_set_pi8(-112, 64, -92, 19, -70, -49, -42, 75), { INT8_C( -67), INT8_C(-103), INT8_C( -30), INT8_C( -10), INT8_C( -47), INT8_C( 109), INT8_C( 34), INT8_C( -5) } }, { simde_mm_set_pi8(INT8_C(-111), INT8_C( -8), INT8_C( -84), INT8_C( 80), INT8_C( 8), INT8_C( -61), INT8_C( -80), INT8_C( -85)), { INT8_C( 66), INT8_C( 19), INT8_C( 96), INT8_C( 6), INT8_C( -30), INT8_C( 85), INT8_C( 97), INT8_C( -44) }, simde_mm_set_pi8( -90, -11, -113, 11, -96, -125, 121, -113), { INT8_C( -85), INT8_C( 19), INT8_C( -61), INT8_C( 8), INT8_C( -30), INT8_C( -84), INT8_C( -8), INT8_C(-111) } }, { simde_mm_set_pi8(INT8_C( 120), INT8_C(-105), INT8_C( 2), INT8_C( 21), INT8_C( 93), INT8_C(-124), INT8_C( -2), INT8_C( 79)), { INT8_C( 87), INT8_C( 98), INT8_C( -52), INT8_C( 28), INT8_C( 37), INT8_C(-120), INT8_C( 109), INT8_C( 79) }, simde_mm_set_pi8( 34, 106, -74, 83, -114, -10, 67, 111), { INT8_C( 87), INT8_C( 98), INT8_C(-124), INT8_C( 93), INT8_C( 37), INT8_C( 2), INT8_C( 109), INT8_C( 79) } }, { simde_mm_set_pi8(INT8_C( -26), INT8_C( 96), INT8_C(-115), INT8_C( 78), INT8_C( 35), INT8_C( 49), INT8_C( 36), INT8_C( -25)), { INT8_C( 29), INT8_C( 28), INT8_C( 30), INT8_C( -80), INT8_C( -12), INT8_C( 81), INT8_C( -81), INT8_C( 120) }, simde_mm_set_pi8( -59, -55, -111, -119, 86, -5, 74, 2), { INT8_C( 29), INT8_C( 28), INT8_C( 49), INT8_C( -80), INT8_C( 78), INT8_C(-115), INT8_C( 96), INT8_C( -26) } }, { simde_mm_set_pi8(INT8_C( -37), INT8_C( 41), INT8_C( 123), INT8_C(-107), INT8_C(-123), INT8_C( 32), INT8_C( 33), INT8_C( 5)), { INT8_C(-126), INT8_C( 94), INT8_C( -30), INT8_C( -13), INT8_C( 99), INT8_C( 126), INT8_C( 16), INT8_C( 5) }, simde_mm_set_pi8( 113, 1, 66, 5, -18, 77, -41, -4), { INT8_C( 5), INT8_C( 33), INT8_C( -30), INT8_C(-123), INT8_C( 99), INT8_C( 126), INT8_C( 16), INT8_C( 5) } }, { simde_mm_set_pi8(INT8_C( -90), INT8_C(-113), INT8_C( 97), INT8_C( 73), INT8_C( 33), INT8_C( -3), INT8_C( 109), INT8_C( 63)), { INT8_C( -41), INT8_C( -87), INT8_C(-115), INT8_C( -26), INT8_C( 68), INT8_C( -33), INT8_C( 7), INT8_C( -69) }, simde_mm_set_pi8( 28, 106, -56, -23, -126, -119, 22, -23), { INT8_C( 63), INT8_C( -87), INT8_C( -3), INT8_C( 33), INT8_C( 73), INT8_C( 97), INT8_C( 7), INT8_C( -69) } }, { simde_mm_set_pi8(INT8_C( 12), INT8_C( 9), INT8_C( -66), INT8_C(-114), INT8_C( 19), INT8_C( 94), INT8_C(-103), INT8_C( 38)), { INT8_C( 102), INT8_C( -71), INT8_C( 55), INT8_C( 40), INT8_C(-122), INT8_C( 94), INT8_C( 71), INT8_C( -99) }, simde_mm_set_pi8( 78, 82, -126, 33, 118, 93, -125, 113), { INT8_C( 102), INT8_C(-103), INT8_C( 55), INT8_C( 40), INT8_C(-122), INT8_C( -66), INT8_C( 71), INT8_C( -99) } }, { simde_mm_set_pi8(INT8_C( 67), INT8_C( 108), INT8_C( 55), INT8_C( -68), INT8_C( -5), INT8_C( -18), INT8_C( 115), INT8_C( 126)), { INT8_C( 92), INT8_C( -82), INT8_C( 101), INT8_C( -48), INT8_C( 45), INT8_C( 101), INT8_C( 95), INT8_C( 51) }, simde_mm_set_pi8( -92, -27, 65, -21, 36, 126, 80, 50), { INT8_C( 92), INT8_C( -82), INT8_C( 101), INT8_C( -48), INT8_C( -68), INT8_C( 101), INT8_C( 108), INT8_C( 67) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int8_t r[8], e[8]; simde_memcpy(r, test_vec[i].b, sizeof(r)); simde_memcpy(e, test_vec[i].r, sizeof(e)); #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_NATIVE_ALIASES_TESTING) simde_m_maskmovq(test_vec[i].a, test_vec[i].mask, HEDLEY_REINTERPRET_CAST(char *, r)); #else simde_m_maskmovq(test_vec[i].a, test_vec[i].mask, r); #endif simde_assert_equal_vi8(8, r, e); } simde_mm_empty(); return 0; } static int test_simde_mm_max_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -3941), INT16_C(-29039), INT16_C( 9572), INT16_C( 17112)), simde_mm_set_pi16(INT16_C(-32765), INT16_C( 1912), INT16_C( 7655), INT16_C( -8077)), simde_mm_set_pi16(INT16_C( -3941), INT16_C( 1912), INT16_C( 9572), INT16_C( 17112)) }, { simde_mm_set_pi16(INT16_C( -1581), INT16_C( 31995), INT16_C( 26494), INT16_C(-18794)), simde_mm_set_pi16(INT16_C( 16922), INT16_C(-22945), INT16_C( 32135), INT16_C(-23580)), simde_mm_set_pi16(INT16_C( 16922), INT16_C( 31995), INT16_C( 32135), INT16_C(-18794)) }, { simde_mm_set_pi16(INT16_C( -5356), INT16_C(-22344), INT16_C( 30948), INT16_C(-20444)), simde_mm_set_pi16(INT16_C( 23575), INT16_C( 15204), INT16_C( -2804), INT16_C( 7606)), simde_mm_set_pi16(INT16_C( 23575), INT16_C( 15204), INT16_C( 30948), INT16_C( 7606)) }, { simde_mm_set_pi16(INT16_C(-15388), INT16_C( -4009), INT16_C(-12203), INT16_C( 19351)), simde_mm_set_pi16(INT16_C(-11390), INT16_C(-14248), INT16_C( 6877), INT16_C(-11224)), simde_mm_set_pi16(INT16_C(-11390), INT16_C( -4009), INT16_C( 6877), INT16_C( 19351)) }, { simde_mm_set_pi16(INT16_C( 9439), INT16_C(-12374), INT16_C( 28008), INT16_C(-15421)), simde_mm_set_pi16(INT16_C( 2278), INT16_C( 32415), INT16_C(-22150), INT16_C( 3793)), simde_mm_set_pi16(INT16_C( 9439), INT16_C( 32415), INT16_C( 28008), INT16_C( 3793)) }, { simde_mm_set_pi16(INT16_C( 8379), INT16_C(-13717), INT16_C( 19788), INT16_C(-10969)), simde_mm_set_pi16(INT16_C(-11192), INT16_C(-31648), INT16_C( 6691), INT16_C(-15431)), simde_mm_set_pi16(INT16_C( 8379), INT16_C(-13717), INT16_C( 19788), INT16_C(-10969)) }, { simde_mm_set_pi16(INT16_C( -9393), INT16_C( 10266), INT16_C( 19011), INT16_C( 29123)), simde_mm_set_pi16(INT16_C( -1050), INT16_C( 12724), INT16_C(-11587), INT16_C( 10160)), simde_mm_set_pi16(INT16_C( -1050), INT16_C( 12724), INT16_C( 19011), INT16_C( 29123)) }, { simde_mm_set_pi16(INT16_C( 3665), INT16_C(-28011), INT16_C(-19799), INT16_C( 10281)), simde_mm_set_pi16(INT16_C( 18116), INT16_C( 25642), INT16_C( 26951), INT16_C( 15276)), simde_mm_set_pi16(INT16_C( 18116), INT16_C( 25642), INT16_C( 26951), INT16_C( 15276)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_max_pi16(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } return 0; } static int test_simde_m_pmaxsw(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -3941), INT16_C(-29039), INT16_C( 9572), INT16_C( 17112)), simde_mm_set_pi16(INT16_C(-32765), INT16_C( 1912), INT16_C( 7655), INT16_C( -8077)), simde_mm_set_pi16(INT16_C( -3941), INT16_C( 1912), INT16_C( 9572), INT16_C( 17112)) }, { simde_mm_set_pi16(INT16_C( -1581), INT16_C( 31995), INT16_C( 26494), INT16_C(-18794)), simde_mm_set_pi16(INT16_C( 16922), INT16_C(-22945), INT16_C( 32135), INT16_C(-23580)), simde_mm_set_pi16(INT16_C( 16922), INT16_C( 31995), INT16_C( 32135), INT16_C(-18794)) }, { simde_mm_set_pi16(INT16_C( -5356), INT16_C(-22344), INT16_C( 30948), INT16_C(-20444)), simde_mm_set_pi16(INT16_C( 23575), INT16_C( 15204), INT16_C( -2804), INT16_C( 7606)), simde_mm_set_pi16(INT16_C( 23575), INT16_C( 15204), INT16_C( 30948), INT16_C( 7606)) }, { simde_mm_set_pi16(INT16_C(-15388), INT16_C( -4009), INT16_C(-12203), INT16_C( 19351)), simde_mm_set_pi16(INT16_C(-11390), INT16_C(-14248), INT16_C( 6877), INT16_C(-11224)), simde_mm_set_pi16(INT16_C(-11390), INT16_C( -4009), INT16_C( 6877), INT16_C( 19351)) }, { simde_mm_set_pi16(INT16_C( 9439), INT16_C(-12374), INT16_C( 28008), INT16_C(-15421)), simde_mm_set_pi16(INT16_C( 2278), INT16_C( 32415), INT16_C(-22150), INT16_C( 3793)), simde_mm_set_pi16(INT16_C( 9439), INT16_C( 32415), INT16_C( 28008), INT16_C( 3793)) }, { simde_mm_set_pi16(INT16_C( 8379), INT16_C(-13717), INT16_C( 19788), INT16_C(-10969)), simde_mm_set_pi16(INT16_C(-11192), INT16_C(-31648), INT16_C( 6691), INT16_C(-15431)), simde_mm_set_pi16(INT16_C( 8379), INT16_C(-13717), INT16_C( 19788), INT16_C(-10969)) }, { simde_mm_set_pi16(INT16_C( -9393), INT16_C( 10266), INT16_C( 19011), INT16_C( 29123)), simde_mm_set_pi16(INT16_C( -1050), INT16_C( 12724), INT16_C(-11587), INT16_C( 10160)), simde_mm_set_pi16(INT16_C( -1050), INT16_C( 12724), INT16_C( 19011), INT16_C( 29123)) }, { simde_mm_set_pi16(INT16_C( 3665), INT16_C(-28011), INT16_C(-19799), INT16_C( 10281)), simde_mm_set_pi16(INT16_C( 18116), INT16_C( 25642), INT16_C( 26951), INT16_C( 15276)), simde_mm_set_pi16(INT16_C( 18116), INT16_C( 25642), INT16_C( 26951), INT16_C( 15276)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_pmaxsw(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } return 0; } static int test_simde_mm_max_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 b[4]; const simde_float32 r[4]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 353.79), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -20.53) }, { SIMDE_FLOAT32_C( -559.69), SIMDE_MATH_NANF, SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 542.61) }, { SIMDE_FLOAT32_C( -559.69), SIMDE_MATH_NANF, SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 542.61) } }, #endif { { SIMDE_FLOAT32_C( -584.82), SIMDE_FLOAT32_C( -771.96), SIMDE_FLOAT32_C( 713.78), SIMDE_FLOAT32_C( -25.19) }, { SIMDE_FLOAT32_C( -146.60), SIMDE_FLOAT32_C( -788.24), SIMDE_FLOAT32_C( 792.99), SIMDE_FLOAT32_C( -871.40) }, { SIMDE_FLOAT32_C( -146.60), SIMDE_FLOAT32_C( -771.96), SIMDE_FLOAT32_C( 792.99), SIMDE_FLOAT32_C( -25.19) } }, { { SIMDE_FLOAT32_C( -253.85), SIMDE_FLOAT32_C( 796.83), SIMDE_FLOAT32_C( 458.25), SIMDE_FLOAT32_C( 983.39) }, { SIMDE_FLOAT32_C( -205.15), SIMDE_FLOAT32_C( -738.14), SIMDE_FLOAT32_C( 103.55), SIMDE_FLOAT32_C( 362.69) }, { SIMDE_FLOAT32_C( -205.15), SIMDE_FLOAT32_C( 796.83), SIMDE_FLOAT32_C( 458.25), SIMDE_FLOAT32_C( 983.39) } }, { { SIMDE_FLOAT32_C( -535.82), SIMDE_FLOAT32_C( -110.18), SIMDE_FLOAT32_C( -688.86), SIMDE_FLOAT32_C( 231.16) }, { SIMDE_FLOAT32_C( -765.48), SIMDE_FLOAT32_C( -343.50), SIMDE_FLOAT32_C( 811.93), SIMDE_FLOAT32_C( 559.03) }, { SIMDE_FLOAT32_C( -535.82), SIMDE_FLOAT32_C( -110.18), SIMDE_FLOAT32_C( 811.93), SIMDE_FLOAT32_C( 559.03) } }, { { SIMDE_FLOAT32_C( -989.71), SIMDE_FLOAT32_C( -736.05), SIMDE_FLOAT32_C( -461.50), SIMDE_FLOAT32_C( -549.40) }, { SIMDE_FLOAT32_C( -316.97), SIMDE_FLOAT32_C( -791.36), SIMDE_FLOAT32_C( 993.20), SIMDE_FLOAT32_C( 98.21) }, { SIMDE_FLOAT32_C( -316.97), SIMDE_FLOAT32_C( -736.05), SIMDE_FLOAT32_C( 993.20), SIMDE_FLOAT32_C( 98.21) } }, { { SIMDE_FLOAT32_C( -563.32), SIMDE_FLOAT32_C( 706.98), SIMDE_FLOAT32_C( -926.98), SIMDE_FLOAT32_C( 290.07) }, { SIMDE_FLOAT32_C( 918.74), SIMDE_FLOAT32_C( 866.01), SIMDE_FLOAT32_C( 418.68), SIMDE_FLOAT32_C( -335.11) }, { SIMDE_FLOAT32_C( 918.74), SIMDE_FLOAT32_C( 866.01), SIMDE_FLOAT32_C( 418.68), SIMDE_FLOAT32_C( 290.07) } }, { { SIMDE_FLOAT32_C( 662.83), SIMDE_FLOAT32_C( -123.07), SIMDE_FLOAT32_C( -351.72), SIMDE_FLOAT32_C( -542.32) }, { SIMDE_FLOAT32_C( 138.79), SIMDE_FLOAT32_C( 751.84), SIMDE_FLOAT32_C( 820.37), SIMDE_FLOAT32_C( 602.97) }, { SIMDE_FLOAT32_C( 662.83), SIMDE_FLOAT32_C( 751.84), SIMDE_FLOAT32_C( 820.37), SIMDE_FLOAT32_C( 602.97) } }, { { SIMDE_FLOAT32_C( -358.34), SIMDE_FLOAT32_C( -868.49), SIMDE_FLOAT32_C( -165.86), SIMDE_FLOAT32_C( -123.83) }, { SIMDE_FLOAT32_C( -211.99), SIMDE_FLOAT32_C( -353.93), SIMDE_FLOAT32_C( -564.80), SIMDE_FLOAT32_C( -201.70) }, { SIMDE_FLOAT32_C( -211.99), SIMDE_FLOAT32_C( -353.93), SIMDE_FLOAT32_C( -165.86), SIMDE_FLOAT32_C( -123.83) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 b = simde_mm_loadu_ps(test_vec[i].b); simde__m128 r = simde_mm_max_ps(a, b); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_max_pu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_x_mm_set_pu8( 95, 35, 185, 136, 198, 90, 109, 138), simde_x_mm_set_pu8(165, 90, 139, 114, 195, 147, 62, 160), simde_x_mm_set_pu8(165, 90, 185, 136, 198, 147, 109, 160) }, { simde_x_mm_set_pu8(192, 104, 198, 106, 121, 208, 222, 80), simde_x_mm_set_pu8( 14, 66, 107, 237, 19, 244, 121, 240), simde_x_mm_set_pu8(192, 104, 198, 237, 121, 244, 222, 240) }, { simde_x_mm_set_pu8( 95, 71, 177, 202, 66, 32, 85, 206), simde_x_mm_set_pu8(206, 134, 86, 165, 246, 242, 112, 247), simde_x_mm_set_pu8(206, 134, 177, 202, 246, 242, 112, 247) }, { simde_x_mm_set_pu8(247, 220, 6, 72, 190, 176, 185, 129), simde_x_mm_set_pu8(102, 233, 69, 70, 65, 36, 164, 122), simde_x_mm_set_pu8(247, 233, 69, 72, 190, 176, 185, 129) }, { simde_x_mm_set_pu8(121, 66, 178, 31, 46, 35, 117, 91), simde_x_mm_set_pu8(162, 127, 145, 79, 214, 91, 102, 58), simde_x_mm_set_pu8(162, 127, 178, 79, 214, 91, 117, 91) }, { simde_x_mm_set_pu8( 75, 55, 102, 27, 144, 219, 63, 26), simde_x_mm_set_pu8(178, 228, 83, 88, 34, 43, 215, 34), simde_x_mm_set_pu8(178, 228, 102, 88, 144, 219, 215, 34) }, { simde_x_mm_set_pu8( 71, 199, 130, 210, 23, 163, 117, 223), simde_x_mm_set_pu8( 47, 138, 43, 60, 152, 77, 246, 8), simde_x_mm_set_pu8( 71, 199, 130, 210, 152, 163, 246, 223) }, { simde_x_mm_set_pu8( 65, 226, 26, 83, 148, 71, 8, 192), simde_x_mm_set_pu8( 48, 22, 250, 180, 93, 65, 44, 38), simde_x_mm_set_pu8( 65, 226, 250, 180, 148, 71, 44, 192) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m64 r = simde_mm_max_pu8(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_u8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_pmaxub(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_x_mm_set_pu8( 95, 35, 185, 136, 198, 90, 109, 138), simde_x_mm_set_pu8(165, 90, 139, 114, 195, 147, 62, 160), simde_x_mm_set_pu8(165, 90, 185, 136, 198, 147, 109, 160) }, { simde_x_mm_set_pu8(192, 104, 198, 106, 121, 208, 222, 80), simde_x_mm_set_pu8( 14, 66, 107, 237, 19, 244, 121, 240), simde_x_mm_set_pu8(192, 104, 198, 237, 121, 244, 222, 240) }, { simde_x_mm_set_pu8( 95, 71, 177, 202, 66, 32, 85, 206), simde_x_mm_set_pu8(206, 134, 86, 165, 246, 242, 112, 247), simde_x_mm_set_pu8(206, 134, 177, 202, 246, 242, 112, 247) }, { simde_x_mm_set_pu8(247, 220, 6, 72, 190, 176, 185, 129), simde_x_mm_set_pu8(102, 233, 69, 70, 65, 36, 164, 122), simde_x_mm_set_pu8(247, 233, 69, 72, 190, 176, 185, 129) }, { simde_x_mm_set_pu8(121, 66, 178, 31, 46, 35, 117, 91), simde_x_mm_set_pu8(162, 127, 145, 79, 214, 91, 102, 58), simde_x_mm_set_pu8(162, 127, 178, 79, 214, 91, 117, 91) }, { simde_x_mm_set_pu8( 75, 55, 102, 27, 144, 219, 63, 26), simde_x_mm_set_pu8(178, 228, 83, 88, 34, 43, 215, 34), simde_x_mm_set_pu8(178, 228, 102, 88, 144, 219, 215, 34) }, { simde_x_mm_set_pu8( 71, 199, 130, 210, 23, 163, 117, 223), simde_x_mm_set_pu8( 47, 138, 43, 60, 152, 77, 246, 8), simde_x_mm_set_pu8( 71, 199, 130, 210, 152, 163, 246, 223) }, { simde_x_mm_set_pu8( 65, 226, 26, 83, 148, 71, 8, 192), simde_x_mm_set_pu8( 48, 22, 250, 180, 93, 65, 44, 38), simde_x_mm_set_pu8( 65, 226, 250, 180, 148, 71, 44, 192) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m64 r = simde_m_pmaxub(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_u8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_max_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps( 275.98f, 244.68f, 294.03f, 414.26f), simde_mm_set_ps( 915.80f, -0.19f, 23.06f, 81.33f), simde_mm_set_ps( 275.98f, 244.68f, 294.03f, 414.26f) }, { simde_mm_set_ps(-813.38f, 435.35f, 419.41f, -441.22f), simde_mm_set_ps( 389.17f, -536.41f, -137.18f, -787.72f), simde_mm_set_ps(-813.38f, 435.35f, 419.41f, -441.22f) }, { simde_mm_set_ps(-619.96f, -614.05f, -479.78f, -823.70f), simde_mm_set_ps(-814.29f, 295.27f, -132.00f, -70.04f), simde_mm_set_ps(-619.96f, -614.05f, -479.78f, -70.04f) }, { simde_mm_set_ps(-480.26f, -233.90f, 242.17f, -129.02f), simde_mm_set_ps(-777.79f, -728.41f, -33.93f, -163.52f), simde_mm_set_ps(-480.26f, -233.90f, 242.17f, -129.02f) }, { simde_mm_set_ps(-442.14f, 410.97f, 665.05f, -946.79f), simde_mm_set_ps(-545.42f, 47.51f, -78.24f, -648.70f), simde_mm_set_ps(-442.14f, 410.97f, 665.05f, -648.70f) }, { simde_mm_set_ps(-136.30f, -558.74f, 355.69f, 48.70f), simde_mm_set_ps(-820.06f, -448.36f, -48.18f, -396.98f), simde_mm_set_ps(-136.30f, -558.74f, 355.69f, 48.70f) }, { simde_mm_set_ps( 955.43f, -448.98f, -165.93f, 79.87f), simde_mm_set_ps(-380.33f, 295.42f, -77.30f, 721.77f), simde_mm_set_ps( 955.43f, -448.98f, -165.93f, 721.77f) }, { simde_mm_set_ps( -40.78f, 393.73f, -60.99f, -143.02f), simde_mm_set_ps(-232.14f, 77.20f, -606.64f, -624.14f), simde_mm_set_ps( -40.78f, 393.73f, -60.99f, -143.02f) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m128 r = simde_mm_max_ss(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_f32x4(test_vec[i].r, r, 1); } return 0; } static int test_simde_mm_min_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(-17712, 12551, -23434, -13133), simde_mm_set_pi16( 26266, -20490, 7912, 29803), simde_mm_set_pi16(-17712, -20490, -23434, -13133) }, { simde_mm_set_pi16(-24457, 29877, -26735, -29539), simde_mm_set_pi16( 11486, 18014, 20714, -14577), simde_mm_set_pi16(-24457, 18014, -26735, -29539) }, { simde_mm_set_pi16(-20528, 7690, 11233, -7462), simde_mm_set_pi16(-32519, 4248, -31417, -32471), simde_mm_set_pi16(-32519, 4248, -31417, -32471) }, { simde_mm_set_pi16(-31520, 3559, -26842, 21046), simde_mm_set_pi16( 13846, -3714, 16375, 18158), simde_mm_set_pi16(-31520, -3714, -26842, 18158) }, { simde_mm_set_pi16( 21922, 9874, 13654, 24031), simde_mm_set_pi16( 23732, 13322, 8641, -2491), simde_mm_set_pi16( 21922, 9874, 8641, -2491) }, { simde_mm_set_pi16( 14557, 3319, 16372, 28742), simde_mm_set_pi16(-29436, 20833, -11479, -29779), simde_mm_set_pi16(-29436, 3319, -11479, -29779) }, { simde_mm_set_pi16( 14514, 25528, 18329, 19467), simde_mm_set_pi16( 7807, -10832, -5002, -30632), simde_mm_set_pi16( 7807, -10832, -5002, -30632) }, { simde_mm_set_pi16( 10007, 31428, 28911, -29602), simde_mm_set_pi16( 29865, -25102, 11884, 9524), simde_mm_set_pi16( 10007, -25102, 11884, -29602) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m64 r = simde_mm_min_pi16(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_pminsw(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(-17712, 12551, -23434, -13133), simde_mm_set_pi16( 26266, -20490, 7912, 29803), simde_mm_set_pi16(-17712, -20490, -23434, -13133) }, { simde_mm_set_pi16(-24457, 29877, -26735, -29539), simde_mm_set_pi16( 11486, 18014, 20714, -14577), simde_mm_set_pi16(-24457, 18014, -26735, -29539) }, { simde_mm_set_pi16(-20528, 7690, 11233, -7462), simde_mm_set_pi16(-32519, 4248, -31417, -32471), simde_mm_set_pi16(-32519, 4248, -31417, -32471) }, { simde_mm_set_pi16(-31520, 3559, -26842, 21046), simde_mm_set_pi16( 13846, -3714, 16375, 18158), simde_mm_set_pi16(-31520, -3714, -26842, 18158) }, { simde_mm_set_pi16( 21922, 9874, 13654, 24031), simde_mm_set_pi16( 23732, 13322, 8641, -2491), simde_mm_set_pi16( 21922, 9874, 8641, -2491) }, { simde_mm_set_pi16( 14557, 3319, 16372, 28742), simde_mm_set_pi16(-29436, 20833, -11479, -29779), simde_mm_set_pi16(-29436, 3319, -11479, -29779) }, { simde_mm_set_pi16( 14514, 25528, 18329, 19467), simde_mm_set_pi16( 7807, -10832, -5002, -30632), simde_mm_set_pi16( 7807, -10832, -5002, -30632) }, { simde_mm_set_pi16( 10007, 31428, 28911, -29602), simde_mm_set_pi16( 29865, -25102, 11884, 9524), simde_mm_set_pi16( 10007, -25102, 11884, -29602) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m64 r = simde_m_pminsw(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_min_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 b[4]; const simde_float32 r[4]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 52.40), SIMDE_MATH_NANF, SIMDE_MATH_INFINITYF }, { SIMDE_FLOAT32_C( 17.29), SIMDE_MATH_NANF, SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -535.67) }, { SIMDE_FLOAT32_C( 17.29), SIMDE_MATH_NANF, SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -535.67) } }, { { SIMDE_FLOAT32_C( 52.40), -SIMDE_MATH_INFINITYF, -SIMDE_MATH_INFINITYF, SIMDE_MATH_NANF }, { SIMDE_MATH_INFINITYF, SIMDE_FLOAT32_C( 299.86), SIMDE_MATH_NANF, SIMDE_MATH_INFINITYF }, { SIMDE_FLOAT32_C( 52.40), -SIMDE_MATH_INFINITYF, SIMDE_MATH_NANF, SIMDE_MATH_INFINITYF } }, #endif { { SIMDE_FLOAT32_C( -711.87), SIMDE_FLOAT32_C( 44.39), SIMDE_FLOAT32_C( -762.65), SIMDE_FLOAT32_C( -621.30) }, { SIMDE_FLOAT32_C( 258.76), SIMDE_FLOAT32_C( -263.48), SIMDE_FLOAT32_C( 915.20), SIMDE_FLOAT32_C( 737.73) }, { SIMDE_FLOAT32_C( -711.87), SIMDE_FLOAT32_C( -263.48), SIMDE_FLOAT32_C( -762.65), SIMDE_FLOAT32_C( -621.30) } }, { { SIMDE_FLOAT32_C( 263.09), SIMDE_FLOAT32_C( 2.29), SIMDE_FLOAT32_C( -485.34), SIMDE_FLOAT32_C( -760.23) }, { SIMDE_FLOAT32_C( -189.22), SIMDE_FLOAT32_C( 207.98), SIMDE_FLOAT32_C( 885.59), SIMDE_FLOAT32_C( -61.86) }, { SIMDE_FLOAT32_C( -189.22), SIMDE_FLOAT32_C( 2.29), SIMDE_FLOAT32_C( -485.34), SIMDE_FLOAT32_C( -760.23) } }, { { SIMDE_FLOAT32_C( -140.13), SIMDE_FLOAT32_C( -387.88), SIMDE_FLOAT32_C( -850.20), SIMDE_FLOAT32_C( 265.81) }, { SIMDE_FLOAT32_C( 267.32), SIMDE_FLOAT32_C( 453.87), SIMDE_FLOAT32_C( 754.87), SIMDE_FLOAT32_C( -301.67) }, { SIMDE_FLOAT32_C( -140.13), SIMDE_FLOAT32_C( -387.88), SIMDE_FLOAT32_C( -850.20), SIMDE_FLOAT32_C( -301.67) } }, { { SIMDE_FLOAT32_C( 478.04), SIMDE_FLOAT32_C( -504.48), SIMDE_FLOAT32_C( -579.57), SIMDE_FLOAT32_C( 714.80) }, { SIMDE_FLOAT32_C( 431.69), SIMDE_FLOAT32_C( -177.02), SIMDE_FLOAT32_C( -184.05), SIMDE_FLOAT32_C( 719.83) }, { SIMDE_FLOAT32_C( 431.69), SIMDE_FLOAT32_C( -504.48), SIMDE_FLOAT32_C( -579.57), SIMDE_FLOAT32_C( 714.80) } }, { { SIMDE_FLOAT32_C( 867.37), SIMDE_FLOAT32_C( 53.30), SIMDE_FLOAT32_C( -901.47), SIMDE_FLOAT32_C( 126.14) }, { SIMDE_FLOAT32_C( 789.82), SIMDE_FLOAT32_C( -986.27), SIMDE_FLOAT32_C( -136.13), SIMDE_FLOAT32_C( 52.91) }, { SIMDE_FLOAT32_C( 789.82), SIMDE_FLOAT32_C( -986.27), SIMDE_FLOAT32_C( -901.47), SIMDE_FLOAT32_C( 52.91) } }, { { SIMDE_FLOAT32_C( 16.02), SIMDE_FLOAT32_C( 378.53), SIMDE_FLOAT32_C( 292.68), SIMDE_FLOAT32_C( 826.80) }, { SIMDE_FLOAT32_C( -413.49), SIMDE_FLOAT32_C( 178.27), SIMDE_FLOAT32_C( -235.07), SIMDE_FLOAT32_C( 446.39) }, { SIMDE_FLOAT32_C( -413.49), SIMDE_FLOAT32_C( 178.27), SIMDE_FLOAT32_C( -235.07), SIMDE_FLOAT32_C( 446.39) } }, { { SIMDE_FLOAT32_C( 790.39), SIMDE_FLOAT32_C( -85.27), SIMDE_FLOAT32_C( -287.81), SIMDE_FLOAT32_C( 57.70) }, { SIMDE_FLOAT32_C( -631.40), SIMDE_FLOAT32_C( -532.94), SIMDE_FLOAT32_C( 756.03), SIMDE_FLOAT32_C( 846.64) }, { SIMDE_FLOAT32_C( -631.40), SIMDE_FLOAT32_C( -532.94), SIMDE_FLOAT32_C( -287.81), SIMDE_FLOAT32_C( 57.70) } }, { { SIMDE_FLOAT32_C( -37.42), SIMDE_FLOAT32_C( -823.54), SIMDE_FLOAT32_C( 561.44), SIMDE_FLOAT32_C( -605.72) }, { SIMDE_FLOAT32_C( -0.56), SIMDE_FLOAT32_C( -622.61), SIMDE_FLOAT32_C( -885.90), SIMDE_FLOAT32_C( -133.18) }, { SIMDE_FLOAT32_C( -37.42), SIMDE_FLOAT32_C( -823.54), SIMDE_FLOAT32_C( -885.90), SIMDE_FLOAT32_C( -605.72) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 b = simde_mm_loadu_ps(test_vec[i].b); simde__m128 r = simde_mm_min_ps(a, b); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_min_pu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_x_mm_set_pu8( 3, 32, 201, 226, 126, 175, 31, 50), simde_x_mm_set_pu8(220, 148, 109, 216, 32, 219, 221, 116), simde_x_mm_set_pu8( 3, 32, 109, 216, 32, 175, 31, 50) }, { simde_x_mm_set_pu8(123, 186, 147, 62, 85, 163, 217, 248), simde_x_mm_set_pu8( 50, 187, 220, 240, 243, 231, 241, 209), simde_x_mm_set_pu8( 50, 186, 147, 62, 85, 163, 217, 209) }, { simde_x_mm_set_pu8(100, 27, 19, 67, 100, 214, 111, 154), simde_x_mm_set_pu8( 31, 16, 59, 138, 178, 43, 63, 213), simde_x_mm_set_pu8( 31, 16, 19, 67, 100, 43, 63, 154) }, { simde_x_mm_set_pu8(205, 136, 79, 245, 178, 167, 7, 9), simde_x_mm_set_pu8(103, 214, 180, 123, 12, 141, 59, 104), simde_x_mm_set_pu8(103, 136, 79, 123, 12, 141, 7, 9) }, { simde_x_mm_set_pu8(143, 111, 158, 95, 192, 18, 83, 18), simde_x_mm_set_pu8( 81, 138, 112, 76, 64, 169, 64, 35), simde_x_mm_set_pu8( 81, 111, 112, 76, 64, 18, 64, 18) }, { simde_x_mm_set_pu8(246, 106, 240, 187, 202, 248, 5, 105), simde_x_mm_set_pu8(184, 221, 161, 239, 162, 163, 17, 109), simde_x_mm_set_pu8(184, 106, 161, 187, 162, 163, 5, 105) }, { simde_x_mm_set_pu8(172, 59, 82, 1, 130, 31, 233, 87), simde_x_mm_set_pu8( 51, 153, 219, 33, 100, 204, 105, 228), simde_x_mm_set_pu8( 51, 59, 82, 1, 100, 31, 105, 87) }, { simde_x_mm_set_pu8(228, 182, 179, 248, 70, 35, 65, 84), simde_x_mm_set_pu8( 84, 159, 106, 2, 156, 107, 120, 67), simde_x_mm_set_pu8( 84, 159, 106, 2, 70, 35, 65, 67) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m64 r = simde_mm_min_pu8(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_u8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_pminub(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_x_mm_set_pu8( 3, 32, 201, 226, 126, 175, 31, 50), simde_x_mm_set_pu8(220, 148, 109, 216, 32, 219, 221, 116), simde_x_mm_set_pu8( 3, 32, 109, 216, 32, 175, 31, 50) }, { simde_x_mm_set_pu8(123, 186, 147, 62, 85, 163, 217, 248), simde_x_mm_set_pu8( 50, 187, 220, 240, 243, 231, 241, 209), simde_x_mm_set_pu8( 50, 186, 147, 62, 85, 163, 217, 209) }, { simde_x_mm_set_pu8(100, 27, 19, 67, 100, 214, 111, 154), simde_x_mm_set_pu8( 31, 16, 59, 138, 178, 43, 63, 213), simde_x_mm_set_pu8( 31, 16, 19, 67, 100, 43, 63, 154) }, { simde_x_mm_set_pu8(205, 136, 79, 245, 178, 167, 7, 9), simde_x_mm_set_pu8(103, 214, 180, 123, 12, 141, 59, 104), simde_x_mm_set_pu8(103, 136, 79, 123, 12, 141, 7, 9) }, { simde_x_mm_set_pu8(143, 111, 158, 95, 192, 18, 83, 18), simde_x_mm_set_pu8( 81, 138, 112, 76, 64, 169, 64, 35), simde_x_mm_set_pu8( 81, 111, 112, 76, 64, 18, 64, 18) }, { simde_x_mm_set_pu8(246, 106, 240, 187, 202, 248, 5, 105), simde_x_mm_set_pu8(184, 221, 161, 239, 162, 163, 17, 109), simde_x_mm_set_pu8(184, 106, 161, 187, 162, 163, 5, 105) }, { simde_x_mm_set_pu8(172, 59, 82, 1, 130, 31, 233, 87), simde_x_mm_set_pu8( 51, 153, 219, 33, 100, 204, 105, 228), simde_x_mm_set_pu8( 51, 59, 82, 1, 100, 31, 105, 87) }, { simde_x_mm_set_pu8(228, 182, 179, 248, 70, 35, 65, 84), simde_x_mm_set_pu8( 84, 159, 106, 2, 156, 107, 120, 67), simde_x_mm_set_pu8( 84, 159, 106, 2, 70, 35, 65, 67) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m64 r = simde_m_pminub(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_u8x8(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_min_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps( 905.52f, -330.94f, -743.29f, -10.28f), simde_mm_set_ps(-603.64f, -33.37f, 723.28f, -975.56f), simde_mm_set_ps( 905.52f, -330.94f, -743.29f, -975.56f) }, { simde_mm_set_ps(-901.94f, 395.72f, 391.94f, -212.21f), simde_mm_set_ps(-188.76f, 605.72f, -757.32f, -217.92f), simde_mm_set_ps(-901.94f, 395.72f, 391.94f, -217.92f) }, { simde_mm_set_ps( 270.60f, 585.69f, -494.83f, 500.56f), simde_mm_set_ps( 444.26f, 925.14f, -362.96f, 120.59f), simde_mm_set_ps( 270.60f, 585.69f, -494.83f, 120.59f) }, { simde_mm_set_ps( 222.03f, -452.05f, -212.51f, 16.60f), simde_mm_set_ps(-338.89f, 786.83f, -596.87f, 345.56f), simde_mm_set_ps( 222.03f, -452.05f, -212.51f, 16.60f) }, { simde_mm_set_ps( 130.17f, -389.05f, -693.23f, -558.79f), simde_mm_set_ps( 351.18f, 1.64f, 661.55f, 667.31f), simde_mm_set_ps( 130.17f, -389.05f, -693.23f, -558.79f) }, { simde_mm_set_ps(-492.87f, 857.67f, 99.45f, -129.44f), simde_mm_set_ps( 424.94f, 552.64f, 68.12f, 195.21f), simde_mm_set_ps(-492.87f, 857.67f, 99.45f, -129.44f) }, { simde_mm_set_ps( 213.75f, 969.89f, -341.00f, -19.41f), simde_mm_set_ps( 773.33f, -228.51f, 68.57f, -153.07f), simde_mm_set_ps( 213.75f, 969.89f, -341.00f, -153.07f) }, { simde_mm_set_ps(-482.05f, -169.03f, -647.88f, -151.80f), simde_mm_set_ps( 604.32f, -221.45f, 450.87f, -490.43f), simde_mm_set_ps(-482.05f, -169.03f, -647.88f, -490.43f) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m128 r = simde_mm_min_ss(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_f32x4(test_vec[i].r, r, 1); } return 0; } static int test_simde_mm_move_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(-358.23f, 557.58f, 318.93f, -758.38f), simde_mm_set_ps( 1.51f, 975.39f, -136.78f, -157.41f), simde_mm_set_ps(-358.23f, 557.58f, 318.93f, -157.41f) }, { simde_mm_set_ps(-944.08f, -768.00f, 457.67f, 835.58f), simde_mm_set_ps(-700.69f, 960.42f, 670.21f, -766.17f), simde_mm_set_ps(-944.08f, -768.00f, 457.67f, -766.17f) }, { simde_mm_set_ps(-640.75f, 79.11f, -809.30f, -582.60f), simde_mm_set_ps( 451.92f, 260.70f, -368.72f, -418.36f), simde_mm_set_ps(-640.75f, 79.11f, -809.30f, -418.36f) }, { simde_mm_set_ps(-265.37f, -906.15f, 463.48f, 857.51f), simde_mm_set_ps( 52.86f, -189.27f, -89.79f, 636.22f), simde_mm_set_ps(-265.37f, -906.15f, 463.48f, 636.22f) }, { simde_mm_set_ps( 627.30f, -419.51f, 242.55f, -669.89f), simde_mm_set_ps( 891.75f, 884.03f, 808.69f, 48.90f), simde_mm_set_ps( 627.30f, -419.51f, 242.55f, 48.90f) }, { simde_mm_set_ps( 162.10f, -144.97f, -36.34f, 747.42f), simde_mm_set_ps( 962.83f, 377.89f, -519.04f, -497.15f), simde_mm_set_ps( 162.10f, -144.97f, -36.34f, -497.15f) }, { simde_mm_set_ps(-230.32f, 536.55f, -396.11f, 274.97f), simde_mm_set_ps(-442.89f, 237.99f, -587.16f, 603.90f), simde_mm_set_ps(-230.32f, 536.55f, -396.11f, 603.90f) }, { simde_mm_set_ps(-213.83f, 999.36f, 795.03f, 885.52f), simde_mm_set_ps(-878.99f, -162.39f, 89.22f, -749.67f), simde_mm_set_ps(-213.83f, 999.36f, 795.03f, -749.67f) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m128 r = simde_mm_move_ss(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_f32x4(test_vec[i].r, r, 1); } return 0; } static int test_simde_mm_movehl_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps( 448.42f, 812.28f, -569.58f, -612.98f), simde_mm_set_ps(-260.91f, 851.49f, -543.31f, -452.37f), simde_mm_set_ps( 448.42f, 812.28f, -260.91f, 851.49f) }, { simde_mm_set_ps( 164.79f, -510.29f, -875.53f, 338.43f), simde_mm_set_ps( 195.22f, -222.31f, -247.28f, -544.38f), simde_mm_set_ps( 164.79f, -510.29f, 195.22f, -222.31f) }, { simde_mm_set_ps( 632.26f, -381.04f, -465.25f, -913.48f), simde_mm_set_ps(-790.33f, -504.12f, -321.51f, -760.29f), simde_mm_set_ps( 632.26f, -381.04f, -790.33f, -504.12f) }, { simde_mm_set_ps( 203.91f, 884.83f, -352.58f, -259.85f), simde_mm_set_ps( 92.13f, 448.80f, 494.59f, -13.35f), simde_mm_set_ps( 203.91f, 884.83f, 92.13f, 448.80f) }, { simde_mm_set_ps( 223.91f, -533.66f, 185.50f, -579.52f), simde_mm_set_ps(-316.81f, -862.60f, -895.66f, 129.46f), simde_mm_set_ps( 223.91f, -533.66f, -316.81f, -862.60f) }, { simde_mm_set_ps(-731.11f, 221.64f, 388.77f, -5.75f), simde_mm_set_ps(-220.42f, -24.67f, -629.56f, -668.90f), simde_mm_set_ps(-731.11f, 221.64f, -220.42f, -24.67f) }, { simde_mm_set_ps(-367.65f, 429.37f, 435.99f, 954.93f), simde_mm_set_ps( 382.29f, -511.24f, 874.66f, 450.20f), simde_mm_set_ps(-367.65f, 429.37f, 382.29f, -511.24f) }, { simde_mm_set_ps( 917.13f, -437.03f, -611.86f, -766.78f), simde_mm_set_ps( -43.20f, -568.30f, -68.56f, -878.32f), simde_mm_set_ps( 917.13f, -437.03f, -43.20f, -568.30f) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m128 r = simde_mm_movehl_ps(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_f32x4(test_vec[i].r, r, 1); } return 0; } static int test_simde_mm_movelh_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(-126.61f, -366.61f, 342.01f, -290.15f), simde_mm_set_ps( 691.90f, -944.15f, 99.58f, 438.89f), simde_mm_set_ps( 99.58f, 438.89f, 342.01f, -290.15f) }, { simde_mm_set_ps(-879.79f, 930.29f, 951.33f, 492.78f), simde_mm_set_ps( 600.74f, -198.87f, 936.84f, 313.69f), simde_mm_set_ps( 936.84f, 313.69f, 951.33f, 492.78f) }, { simde_mm_set_ps(-467.81f, -802.41f, 821.13f, -596.55f), simde_mm_set_ps( 695.24f, 173.94f, -480.51f, -41.27f), simde_mm_set_ps(-480.51f, -41.27f, 821.13f, -596.55f) }, { simde_mm_set_ps( 645.54f, -99.35f, -669.53f, -518.75f), simde_mm_set_ps( 261.98f, -2.77f, -35.85f, -725.12f), simde_mm_set_ps( -35.85f, -725.12f, -669.53f, -518.75f) }, { simde_mm_set_ps( 851.93f, 376.50f, -125.53f, 315.67f), simde_mm_set_ps( 722.06f, -287.10f, 806.63f, -831.38f), simde_mm_set_ps( 806.63f, -831.38f, -125.53f, 315.67f) }, { simde_mm_set_ps(-180.42f, -861.51f, 293.97f, 929.27f), simde_mm_set_ps( -61.47f, -964.08f, -555.27f, 147.09f), simde_mm_set_ps(-555.27f, 147.09f, 293.97f, 929.27f) }, { simde_mm_set_ps( 294.20f, 18.46f, 779.53f, -177.14f), simde_mm_set_ps( 664.57f, 349.92f, 797.65f, 206.26f), simde_mm_set_ps( 797.65f, 206.26f, 779.53f, -177.14f) }, { simde_mm_set_ps(-737.06f, -946.48f, -251.45f, -808.64f), simde_mm_set_ps(-245.46f, 616.13f, -342.03f, 914.50f), simde_mm_set_ps(-342.03f, 914.50f, -251.45f, -808.64f) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m128 r = simde_mm_movelh_ps(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_f32x4(test_vec[i].r, r, 1); } return 0; } static int test_simde_mm_movemask_pi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; int r; } test_vec[8] = { { simde_mm_set_pi8( 7, -33, 4, -58, -87, 16, 83, -97), 89 }, { simde_mm_set_pi8( 28, -40, -15, -114, -71, -97, -12, -53), 127 }, { simde_mm_set_pi8( -16, -80, 47, 37, 16, -111, 120, -12), 197 }, { simde_mm_set_pi8( 4, -17, -44, -3, -35, 81, -87, 97), 122 }, { simde_mm_set_pi8( -84, 23, 93, 30, 87, 114, 66, 94), 128 }, { simde_mm_set_pi8( -1, -24, -4, -87, 33, 91, 32, 43), 240 }, { simde_mm_set_pi8( -11, 96, -68, 84, 53, -120, 124, -4), 165 }, { simde_mm_set_pi8(-122, 91, -3, -17, -54, 62, 119, -40), 185 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { int r = simde_mm_movemask_pi8(test_vec[i].a); simde_assert_equal_i(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_pmovmskb(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; int r; } test_vec[8] = { { simde_mm_set_pi8( 7, -33, 4, -58, -87, 16, 83, -97), 89 }, { simde_mm_set_pi8( 28, -40, -15, -114, -71, -97, -12, -53), 127 }, { simde_mm_set_pi8( -16, -80, 47, 37, 16, -111, 120, -12), 197 }, { simde_mm_set_pi8( 4, -17, -44, -3, -35, 81, -87, 97), 122 }, { simde_mm_set_pi8( -84, 23, 93, 30, 87, 114, 66, 94), 128 }, { simde_mm_set_pi8( -1, -24, -4, -87, 33, 91, 32, 43), 240 }, { simde_mm_set_pi8( -11, 96, -68, 84, 53, -120, 124, -4), 165 }, { simde_mm_set_pi8(-122, 91, -3, -17, -54, 62, 119, -40), 185 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { int r = simde_m_pmovmskb(test_vec[i].a); simde_assert_equal_i(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_movemask_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; int r; } test_vec[8] = { { simde_mm_castsi128_ps(simde_x_mm_set_epu32(UINT32_C(0xa67d815d), UINT32_C(0x313ba9ba), UINT32_C(0x21c24eef), UINT32_C(0x423f8c9e))), 8 }, { simde_mm_castsi128_ps(simde_x_mm_set_epu32(UINT32_C(0x489edd7d), UINT32_C(0x67cd5a03), UINT32_C(0x615ae189), UINT32_C(0x97259ce3))), 1 }, { simde_mm_castsi128_ps(simde_x_mm_set_epu32(UINT32_C(0xe28d2b70), UINT32_C(0xe91a3281), UINT32_C(0x73d2c004), UINT32_C(0x7cc3587e))), 12 }, { simde_mm_castsi128_ps(simde_x_mm_set_epu32(UINT32_C(0xb38e9200), UINT32_C(0xd86e4d45), UINT32_C(0xd67c3858), UINT32_C(0x6dd9c655))), 14 }, { simde_mm_castsi128_ps(simde_x_mm_set_epu32(UINT32_C(0x3324bf52), UINT32_C(0x86f260cf), UINT32_C(0x1c6c8682), UINT32_C(0x53be68fe))), 4 }, { simde_mm_castsi128_ps(simde_x_mm_set_epu32(UINT32_C(0xf2018c61), UINT32_C(0x250c57a7), UINT32_C(0x0654d448), UINT32_C(0x8a06fe60))), 9 }, { simde_mm_castsi128_ps(simde_x_mm_set_epu32(UINT32_C(0x0430e063), UINT32_C(0x7ffc7ad3), UINT32_C(0x9306516d), UINT32_C(0x5896591c))), 2 }, { simde_mm_castsi128_ps(simde_x_mm_set_epu32(UINT32_C(0xfa68023e), UINT32_C(0x2e799bce), UINT32_C(0x88c4c4ea), UINT32_C(0x31bc8ed8))), 10 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { int r = simde_mm_movemask_ps(test_vec[i].a); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_mul_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 954.95), SIMDE_FLOAT32_C( 261.79), SIMDE_FLOAT32_C( -313.98), SIMDE_FLOAT32_C( 739.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( -996.12), SIMDE_FLOAT32_C( 311.86), SIMDE_FLOAT32_C( -571.44), SIMDE_FLOAT32_C( 595.57)), simde_mm_set_ps(SIMDE_FLOAT32_C(-951244.81), SIMDE_FLOAT32_C(81641.83), SIMDE_FLOAT32_C(179420.73), SIMDE_FLOAT32_C(440245.34)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -636.86), SIMDE_FLOAT32_C( -888.18), SIMDE_FLOAT32_C( 244.79), SIMDE_FLOAT32_C( -720.76)), simde_mm_set_ps(SIMDE_FLOAT32_C( -205.91), SIMDE_FLOAT32_C( -899.67), SIMDE_FLOAT32_C( -45.24), SIMDE_FLOAT32_C( 371.39)), simde_mm_set_ps(SIMDE_FLOAT32_C(131135.84), SIMDE_FLOAT32_C(799068.88), SIMDE_FLOAT32_C(-11074.30), SIMDE_FLOAT32_C(-267683.06)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -261.34), SIMDE_FLOAT32_C( 756.03), SIMDE_FLOAT32_C( -536.77), SIMDE_FLOAT32_C( 470.04)), simde_mm_set_ps(SIMDE_FLOAT32_C( -270.28), SIMDE_FLOAT32_C( -385.03), SIMDE_FLOAT32_C( -550.64), SIMDE_FLOAT32_C( -31.96)), simde_mm_set_ps(SIMDE_FLOAT32_C(70634.98), SIMDE_FLOAT32_C(-291094.25), SIMDE_FLOAT32_C(295567.06), SIMDE_FLOAT32_C(-15022.48)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 670.10), SIMDE_FLOAT32_C( 171.23), SIMDE_FLOAT32_C( -843.34), SIMDE_FLOAT32_C( -179.33)), simde_mm_set_ps(SIMDE_FLOAT32_C( 422.05), SIMDE_FLOAT32_C( 672.20), SIMDE_FLOAT32_C( 26.32), SIMDE_FLOAT32_C( -386.87)), simde_mm_set_ps(SIMDE_FLOAT32_C(282815.69), SIMDE_FLOAT32_C(115100.80), SIMDE_FLOAT32_C(-22196.71), SIMDE_FLOAT32_C(69377.40)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 312.99), SIMDE_FLOAT32_C( 740.76), SIMDE_FLOAT32_C( 751.62), SIMDE_FLOAT32_C( 667.74)), simde_mm_set_ps(SIMDE_FLOAT32_C( 773.99), SIMDE_FLOAT32_C( -645.18), SIMDE_FLOAT32_C( 712.40), SIMDE_FLOAT32_C( 206.98)), simde_mm_set_ps(SIMDE_FLOAT32_C(242251.12), SIMDE_FLOAT32_C(-477923.53), SIMDE_FLOAT32_C(535454.12), SIMDE_FLOAT32_C(138208.83)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 80.67), SIMDE_FLOAT32_C( 206.46), SIMDE_FLOAT32_C( 384.59), SIMDE_FLOAT32_C( -166.53)), simde_mm_set_ps(SIMDE_FLOAT32_C( 113.49), SIMDE_FLOAT32_C( -659.02), SIMDE_FLOAT32_C( 494.54), SIMDE_FLOAT32_C( 459.36)), simde_mm_set_ps(SIMDE_FLOAT32_C( 9155.24), SIMDE_FLOAT32_C(-136061.28), SIMDE_FLOAT32_C(190195.14), SIMDE_FLOAT32_C(-76497.22)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -139.02), SIMDE_FLOAT32_C( -702.78), SIMDE_FLOAT32_C( -728.15), SIMDE_FLOAT32_C( -708.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( 433.72), SIMDE_FLOAT32_C( 494.77), SIMDE_FLOAT32_C( -283.59), SIMDE_FLOAT32_C( 890.69)), simde_mm_set_ps(SIMDE_FLOAT32_C(-60295.76), SIMDE_FLOAT32_C(-347714.47), SIMDE_FLOAT32_C(206496.06), SIMDE_FLOAT32_C(-631232.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 770.76), SIMDE_FLOAT32_C( -739.61), SIMDE_FLOAT32_C( 17.92), SIMDE_FLOAT32_C( 985.37)), simde_mm_set_ps(SIMDE_FLOAT32_C( 803.92), SIMDE_FLOAT32_C( -255.00), SIMDE_FLOAT32_C( 902.16), SIMDE_FLOAT32_C( -11.55)), simde_mm_set_ps(SIMDE_FLOAT32_C(619629.38), SIMDE_FLOAT32_C(188600.55), SIMDE_FLOAT32_C(16166.71), SIMDE_FLOAT32_C(-11381.02)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_mul_ps(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_f32x4(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_mul_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -140.76), SIMDE_FLOAT32_C( 33.48), SIMDE_FLOAT32_C( -910.86), SIMDE_FLOAT32_C( 198.54)), simde_mm_set_ps(SIMDE_FLOAT32_C( -93.54), SIMDE_FLOAT32_C( 951.74), SIMDE_FLOAT32_C( 208.33), SIMDE_FLOAT32_C( -345.25)), simde_mm_set_ps(SIMDE_FLOAT32_C( -140.76), SIMDE_FLOAT32_C( 33.48), SIMDE_FLOAT32_C( -910.86), SIMDE_FLOAT32_C(-68545.93)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 609.37), SIMDE_FLOAT32_C( 468.64), SIMDE_FLOAT32_C( 18.66), SIMDE_FLOAT32_C( 58.90)), simde_mm_set_ps(SIMDE_FLOAT32_C( -358.79), SIMDE_FLOAT32_C( 577.16), SIMDE_FLOAT32_C( 555.29), SIMDE_FLOAT32_C( -529.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( 609.37), SIMDE_FLOAT32_C( 468.64), SIMDE_FLOAT32_C( 18.66), SIMDE_FLOAT32_C(-31205.22)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -332.93), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 337.69), SIMDE_FLOAT32_C( -988.14)), simde_mm_set_ps(SIMDE_FLOAT32_C( -523.52), SIMDE_FLOAT32_C( -948.77), SIMDE_FLOAT32_C( 316.79), SIMDE_FLOAT32_C( 243.63)), simde_mm_set_ps(SIMDE_FLOAT32_C( -332.93), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 337.69), SIMDE_FLOAT32_C(-240740.56)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 364.16), SIMDE_FLOAT32_C( 161.67), SIMDE_FLOAT32_C( 286.28), SIMDE_FLOAT32_C( -439.12)), simde_mm_set_ps(SIMDE_FLOAT32_C( 425.79), SIMDE_FLOAT32_C( -839.17), SIMDE_FLOAT32_C( -537.00), SIMDE_FLOAT32_C( 731.60)), simde_mm_set_ps(SIMDE_FLOAT32_C( 364.16), SIMDE_FLOAT32_C( 161.67), SIMDE_FLOAT32_C( 286.28), SIMDE_FLOAT32_C(-321260.19)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -250.92), SIMDE_FLOAT32_C( -215.18), SIMDE_FLOAT32_C( 745.12), SIMDE_FLOAT32_C( 449.71)), simde_mm_set_ps(SIMDE_FLOAT32_C( 466.48), SIMDE_FLOAT32_C( -916.07), SIMDE_FLOAT32_C( -892.65), SIMDE_FLOAT32_C( 808.60)), simde_mm_set_ps(SIMDE_FLOAT32_C( -250.92), SIMDE_FLOAT32_C( -215.18), SIMDE_FLOAT32_C( 745.12), SIMDE_FLOAT32_C(363635.50)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 423.52), SIMDE_FLOAT32_C( -740.87), SIMDE_FLOAT32_C( -85.09), SIMDE_FLOAT32_C( -24.81)), simde_mm_set_ps(SIMDE_FLOAT32_C( 458.98), SIMDE_FLOAT32_C( 772.91), SIMDE_FLOAT32_C( -603.60), SIMDE_FLOAT32_C( -319.67)), simde_mm_set_ps(SIMDE_FLOAT32_C( 423.52), SIMDE_FLOAT32_C( -740.87), SIMDE_FLOAT32_C( -85.09), SIMDE_FLOAT32_C( 7931.01)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 347.42), SIMDE_FLOAT32_C( 289.85), SIMDE_FLOAT32_C( 603.24), SIMDE_FLOAT32_C( 660.55)), simde_mm_set_ps(SIMDE_FLOAT32_C( -295.61), SIMDE_FLOAT32_C( -117.86), SIMDE_FLOAT32_C( -569.10), SIMDE_FLOAT32_C( 741.88)), simde_mm_set_ps(SIMDE_FLOAT32_C( 347.42), SIMDE_FLOAT32_C( 289.85), SIMDE_FLOAT32_C( 603.24), SIMDE_FLOAT32_C(490048.84)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 162.90), SIMDE_FLOAT32_C( -653.62), SIMDE_FLOAT32_C( 155.70), SIMDE_FLOAT32_C( -76.12)), simde_mm_set_ps(SIMDE_FLOAT32_C( -537.32), SIMDE_FLOAT32_C( 255.52), SIMDE_FLOAT32_C( -774.74), SIMDE_FLOAT32_C( 454.04)), simde_mm_set_ps(SIMDE_FLOAT32_C( 162.90), SIMDE_FLOAT32_C( -653.62), SIMDE_FLOAT32_C( 155.70), SIMDE_FLOAT32_C(-34561.53)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_mul_ss(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_f32x4(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_mulhi_pu16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_x_mm_set_pu16(UINT16_C(53192), UINT16_C(40647), UINT16_C(19455), UINT16_C( 638)), simde_x_mm_set_pu16(UINT16_C(25315), UINT16_C(56464), UINT16_C(55360), UINT16_C(30008)), simde_x_mm_set_pu16(UINT16_C(20546), UINT16_C(35020), UINT16_C(16434), UINT16_C( 292)) }, { simde_x_mm_set_pu16(UINT16_C(37591), UINT16_C(35425), UINT16_C(33771), UINT16_C(61304)), simde_x_mm_set_pu16(UINT16_C( 7821), UINT16_C( 6117), UINT16_C(18035), UINT16_C(21492)), simde_x_mm_set_pu16(UINT16_C( 4486), UINT16_C( 3306), UINT16_C( 9293), UINT16_C(20104)) }, { simde_x_mm_set_pu16(UINT16_C(56491), UINT16_C( 868), UINT16_C(60755), UINT16_C( 8677)), simde_x_mm_set_pu16(UINT16_C(58515), UINT16_C(60020), UINT16_C(23196), UINT16_C(54339)), simde_x_mm_set_pu16(UINT16_C(50439), UINT16_C( 794), UINT16_C(21503), UINT16_C( 7194)) }, { simde_x_mm_set_pu16(UINT16_C(20057), UINT16_C(59149), UINT16_C(59878), UINT16_C( 6128)), simde_x_mm_set_pu16(UINT16_C(47066), UINT16_C(54772), UINT16_C(43338), UINT16_C(21897)), simde_x_mm_set_pu16(UINT16_C(14404), UINT16_C(49434), UINT16_C(39596), UINT16_C( 2047)) }, { simde_x_mm_set_pu16(UINT16_C(59698), UINT16_C(43803), UINT16_C(12280), UINT16_C(21097)), simde_x_mm_set_pu16(UINT16_C(50904), UINT16_C(56386), UINT16_C(59528), UINT16_C( 6251)), simde_x_mm_set_pu16(UINT16_C(46369), UINT16_C(37687), UINT16_C(11154), UINT16_C( 2012)) }, { simde_x_mm_set_pu16(UINT16_C(20821), UINT16_C(22825), UINT16_C(58828), UINT16_C(28131)), simde_x_mm_set_pu16(UINT16_C(56256), UINT16_C(14992), UINT16_C( 5527), UINT16_C(36188)), simde_x_mm_set_pu16(UINT16_C(17872), UINT16_C( 5221), UINT16_C( 4961), UINT16_C(15533)) }, { simde_x_mm_set_pu16(UINT16_C(33433), UINT16_C(25342), UINT16_C(10000), UINT16_C(30758)), simde_x_mm_set_pu16(UINT16_C(22897), UINT16_C(28868), UINT16_C(29159), UINT16_C(17106)), simde_x_mm_set_pu16(UINT16_C(11680), UINT16_C(11162), UINT16_C( 4449), UINT16_C( 8028)) }, { simde_x_mm_set_pu16(UINT16_C(52226), UINT16_C(24593), UINT16_C(54243), UINT16_C(33338)), simde_x_mm_set_pu16(UINT16_C(55013), UINT16_C( 9352), UINT16_C(30196), UINT16_C(22000)), simde_x_mm_set_pu16(UINT16_C(43840), UINT16_C( 3509), UINT16_C(24992), UINT16_C(11191)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_mulhi_pu16(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_u16x4(r, test_vec[i].r); } return 0; } static int test_simde_m_pmulhuw(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_x_mm_set_pu16(UINT16_C(53192), UINT16_C(40647), UINT16_C(19455), UINT16_C( 638)), simde_x_mm_set_pu16(UINT16_C(25315), UINT16_C(56464), UINT16_C(55360), UINT16_C(30008)), simde_x_mm_set_pu16(UINT16_C(20546), UINT16_C(35020), UINT16_C(16434), UINT16_C( 292)) }, { simde_x_mm_set_pu16(UINT16_C(37591), UINT16_C(35425), UINT16_C(33771), UINT16_C(61304)), simde_x_mm_set_pu16(UINT16_C( 7821), UINT16_C( 6117), UINT16_C(18035), UINT16_C(21492)), simde_x_mm_set_pu16(UINT16_C( 4486), UINT16_C( 3306), UINT16_C( 9293), UINT16_C(20104)) }, { simde_x_mm_set_pu16(UINT16_C(56491), UINT16_C( 868), UINT16_C(60755), UINT16_C( 8677)), simde_x_mm_set_pu16(UINT16_C(58515), UINT16_C(60020), UINT16_C(23196), UINT16_C(54339)), simde_x_mm_set_pu16(UINT16_C(50439), UINT16_C( 794), UINT16_C(21503), UINT16_C( 7194)) }, { simde_x_mm_set_pu16(UINT16_C(20057), UINT16_C(59149), UINT16_C(59878), UINT16_C( 6128)), simde_x_mm_set_pu16(UINT16_C(47066), UINT16_C(54772), UINT16_C(43338), UINT16_C(21897)), simde_x_mm_set_pu16(UINT16_C(14404), UINT16_C(49434), UINT16_C(39596), UINT16_C( 2047)) }, { simde_x_mm_set_pu16(UINT16_C(59698), UINT16_C(43803), UINT16_C(12280), UINT16_C(21097)), simde_x_mm_set_pu16(UINT16_C(50904), UINT16_C(56386), UINT16_C(59528), UINT16_C( 6251)), simde_x_mm_set_pu16(UINT16_C(46369), UINT16_C(37687), UINT16_C(11154), UINT16_C( 2012)) }, { simde_x_mm_set_pu16(UINT16_C(20821), UINT16_C(22825), UINT16_C(58828), UINT16_C(28131)), simde_x_mm_set_pu16(UINT16_C(56256), UINT16_C(14992), UINT16_C( 5527), UINT16_C(36188)), simde_x_mm_set_pu16(UINT16_C(17872), UINT16_C( 5221), UINT16_C( 4961), UINT16_C(15533)) }, { simde_x_mm_set_pu16(UINT16_C(33433), UINT16_C(25342), UINT16_C(10000), UINT16_C(30758)), simde_x_mm_set_pu16(UINT16_C(22897), UINT16_C(28868), UINT16_C(29159), UINT16_C(17106)), simde_x_mm_set_pu16(UINT16_C(11680), UINT16_C(11162), UINT16_C( 4449), UINT16_C( 8028)) }, { simde_x_mm_set_pu16(UINT16_C(52226), UINT16_C(24593), UINT16_C(54243), UINT16_C(33338)), simde_x_mm_set_pu16(UINT16_C(55013), UINT16_C( 9352), UINT16_C(30196), UINT16_C(22000)), simde_x_mm_set_pu16(UINT16_C(43840), UINT16_C( 3509), UINT16_C(24992), UINT16_C(11191)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_pmulhuw(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_u16x4(r, test_vec[i].r); } return 0; } static int test_simde_mm_or_ps(SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { INT32_C( 24636191), -INT32_C( 568024895), INT32_C(1143850822), -INT32_C(1856938366) }, { -INT32_C( 367983280), -INT32_C( 385742397), -INT32_C( 219313137), INT32_C( 79432164) }, { -INT32_C( 344461473), -INT32_C( 14243389), -INT32_C( 152187057), -INT32_C(1778556954) } }, { { -INT32_C(1257949964), INT32_C( 445852116), INT32_C(1935655152), INT32_C(1862578206) }, { INT32_C(2019104181), INT32_C( 761355038), -INT32_C( 887074841), INT32_C(1288690776) }, { -INT32_C( 44222987), INT32_C(1072921566), -INT32_C( 75509769), INT32_C(1875901534) } }, { { -INT32_C( 452865008), -INT32_C( 285240067), INT32_C(1935760980), -INT32_C( 991795953) }, { -INT32_C(1724040325), INT32_C(2110234262), INT32_C(1162405869), -INT32_C( 728688445) }, { -INT32_C( 46268549), -INT32_C( 24833), INT32_C(2003435517), -INT32_C( 722240049) } }, { { -INT32_C( 356936980), INT32_C(2077800486), INT32_C( 653146391), INT32_C( 434819230) }, { -INT32_C(1582094581), -INT32_C(1289848122), INT32_C( 620324449), INT32_C(1811450494) }, { -INT32_C( 340019217), -INT32_C( 69272858), INT32_C( 654278519), INT32_C(2080037630) } }, { { INT32_C(1129689373), -INT32_C(2118242966), INT32_C( 78097510), -INT32_C(2028039812) }, { INT32_C(2133381561), -INT32_C(1405991349), INT32_C( 735062957), -INT32_C( 745092682) }, { INT32_C(2138960317), -INT32_C(1380028565), INT32_C( 804761583), -INT32_C( 677455362) } }, { { -INT32_C( 468259974), INT32_C(2120602904), -INT32_C( 24966015), INT32_C(1468375454) }, { -INT32_C(1110004110), -INT32_C(1586951948), -INT32_C( 355649228), INT32_C(2109563650) }, { -INT32_C( 36245638), -INT32_C( 9576964), -INT32_C( 19972683), INT32_C(2143151006) } }, { { INT32_C(1734464335), INT32_C( 719767464), INT32_C(1898473939), INT32_C(2093526282) }, { INT32_C(1345953371), -INT32_C( 604921177), -INT32_C( 557465893), INT32_C(1901822498) }, { INT32_C(2004475743), -INT32_C( 67639377), -INT32_C( 1179685), INT32_C(2111549226) } }, { { -INT32_C( 19350443), INT32_C(1462288003), INT32_C( 835211303), INT32_C(1504547325) }, { -INT32_C( 693508561), INT32_C(1689361032), INT32_C(2067953241), INT32_C(1324129784) }, { -INT32_C( 17170817), INT32_C(2008661643), INT32_C(2076866175), INT32_C(1609407997) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi32(test_vec[i].a), b = simde_x_mm_loadu_epi32(test_vec[i].b), r = simde_mm_castps_si128(simde_mm_or_ps(simde_mm_castsi128_ps(a), simde_mm_castsi128_ps(b))); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_x_mm_negate_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 609.12), SIMDE_FLOAT32_C( -335.30), SIMDE_FLOAT32_C( -903.57), SIMDE_FLOAT32_C( 535.52) }, { SIMDE_FLOAT32_C( -609.12), SIMDE_FLOAT32_C( 335.30), SIMDE_FLOAT32_C( 903.57), SIMDE_FLOAT32_C( -535.52) } }, { { SIMDE_FLOAT32_C( -719.54), SIMDE_FLOAT32_C( 357.37), SIMDE_FLOAT32_C( 467.60), SIMDE_FLOAT32_C( 606.73) }, { SIMDE_FLOAT32_C( 719.54), SIMDE_FLOAT32_C( -357.37), SIMDE_FLOAT32_C( -467.60), SIMDE_FLOAT32_C( -606.73) } }, { { SIMDE_FLOAT32_C( 150.40), SIMDE_FLOAT32_C( 368.42), SIMDE_FLOAT32_C( -425.94), SIMDE_FLOAT32_C( -208.19) }, { SIMDE_FLOAT32_C( -150.40), SIMDE_FLOAT32_C( -368.42), SIMDE_FLOAT32_C( 425.94), SIMDE_FLOAT32_C( 208.19) } }, { { SIMDE_FLOAT32_C( -450.08), SIMDE_FLOAT32_C( 276.04), SIMDE_FLOAT32_C( 89.04), SIMDE_FLOAT32_C( 692.08) }, { SIMDE_FLOAT32_C( 450.08), SIMDE_FLOAT32_C( -276.04), SIMDE_FLOAT32_C( -89.04), SIMDE_FLOAT32_C( -692.08) } }, { { SIMDE_FLOAT32_C( -956.32), SIMDE_FLOAT32_C( -75.40), SIMDE_FLOAT32_C( 701.34), SIMDE_FLOAT32_C( 998.74) }, { SIMDE_FLOAT32_C( 956.32), SIMDE_FLOAT32_C( 75.40), SIMDE_FLOAT32_C( -701.34), SIMDE_FLOAT32_C( -998.74) } }, { { SIMDE_FLOAT32_C( 827.21), SIMDE_FLOAT32_C( 940.48), SIMDE_FLOAT32_C( -609.72), SIMDE_FLOAT32_C( -903.80) }, { SIMDE_FLOAT32_C( -827.21), SIMDE_FLOAT32_C( -940.48), SIMDE_FLOAT32_C( 609.72), SIMDE_FLOAT32_C( 903.80) } }, { { SIMDE_FLOAT32_C( 575.73), SIMDE_FLOAT32_C( -655.11), SIMDE_FLOAT32_C( -276.39), SIMDE_FLOAT32_C( 475.22) }, { SIMDE_FLOAT32_C( -575.73), SIMDE_FLOAT32_C( 655.11), SIMDE_FLOAT32_C( 276.39), SIMDE_FLOAT32_C( -475.22) } }, { { SIMDE_FLOAT32_C( 571.07), SIMDE_FLOAT32_C( 422.77), SIMDE_FLOAT32_C( -914.98), SIMDE_FLOAT32_C( 180.19) }, { SIMDE_FLOAT32_C( -571.07), SIMDE_FLOAT32_C( -422.77), SIMDE_FLOAT32_C( 914.98), SIMDE_FLOAT32_C( -180.19) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_x_mm_negate_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_rcp_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(-8.23f, -5.18f, 3.66f, -3.55f), simde_mm_set_ps(-0.12f, -0.19f, 0.27f, -0.28f) }, { simde_mm_set_ps( 5.27f, 1.94f, -1.37f, 3.59f), simde_mm_set_ps( 0.19f, 0.52f, -0.73f, 0.28f) }, { simde_mm_set_ps( 1.70f, -6.14f, 4.86f, 1.14f), simde_mm_set_ps( 0.59f, -0.16f, 0.21f, 0.88f) }, { simde_mm_set_ps(-6.46f, 0.31f, 0.95f, 1.02f), simde_mm_set_ps(-0.15f, 3.25f, 1.05f, 0.98f) }, { simde_mm_set_ps( 0.27f, 3.55f, -8.33f, -7.65f), simde_mm_set_ps( 3.70f, 0.28f, -0.12f, -0.13f) }, { simde_mm_set_ps( 3.04f, -3.61f, 1.60f, -3.92f), simde_mm_set_ps( 0.33f, -0.28f, 0.62f, -0.25f) }, { simde_mm_set_ps( 4.81f, 3.50f, 6.45f, 9.52f), simde_mm_set_ps( 0.21f, 0.29f, 0.15f, 0.11f) }, { simde_mm_set_ps( 6.39f, 6.57f, -0.50f, 6.01f), simde_mm_set_ps( 0.16f, 0.15f, -2.00f, 0.17f) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m128 r = simde_mm_rcp_ps(test_vec[i].a); simde_test_x86_assert_equal_f32x4(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_rcp_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(-0.17f, -5.82f, 9.03f, 0.51f), simde_mm_set_ps(-0.17f, -5.82f, 9.03f, 1.97f) }, { simde_mm_set_ps( 3.71f, 8.82f, 3.74f, -1.45f), simde_mm_set_ps( 3.71f, 8.82f, 3.74f, -0.69f) }, { simde_mm_set_ps( 6.34f, -2.54f, -3.13f, -5.87f), simde_mm_set_ps( 6.34f, -2.54f, -3.13f, -0.17f) }, { simde_mm_set_ps(-3.12f, 8.32f, 6.67f, 6.69f), simde_mm_set_ps(-3.12f, 8.32f, 6.67f, 0.15f) }, { simde_mm_set_ps( 8.74f, 8.53f, 5.33f, 6.71f), simde_mm_set_ps( 8.74f, 8.53f, 5.33f, 0.15f) }, { simde_mm_set_ps( 8.56f, -4.33f, 4.16f, -1.33f), simde_mm_set_ps( 8.56f, -4.33f, 4.16f, -0.75f) }, { simde_mm_set_ps( 0.83f, -2.25f, -0.87f, 8.44f), simde_mm_set_ps( 0.83f, -2.25f, -0.87f, 0.12f) }, { simde_mm_set_ps( 0.99f, 5.65f, -2.23f, 1.17f), simde_mm_set_ps( 0.99f, 5.65f, -2.23f, 0.85f) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m128 r = simde_mm_rcp_ss(test_vec[i].a); simde_test_x86_assert_equal_f32x4(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_rsqrt_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps( 19.76f, 43.05f, 24.93f, 44.79f), simde_mm_set_ps( 0.22f, 0.15f, 0.20f, 0.15f) }, { simde_mm_set_ps( 53.16f, 37.63f, 14.64f, 91.38f), simde_mm_set_ps( 0.14f, 0.16f, 0.26f, 0.10f) }, { simde_mm_set_ps( 51.45f, 20.86f, 31.69f, 22.93f), simde_mm_set_ps( 0.14f, 0.22f, 0.18f, 0.21f) }, { simde_mm_set_ps( 70.34f, 27.96f, 47.70f, 68.63f), simde_mm_set_ps( 0.12f, 0.19f, 0.14f, 0.12f) }, { simde_mm_set_ps( 15.37f, 83.67f, 71.19f, 29.53f), simde_mm_set_ps( 0.26f, 0.11f, 0.12f, 0.18f) }, { simde_mm_set_ps( 54.38f, 5.48f, 29.73f, 69.45f), simde_mm_set_ps( 0.14f, 0.43f, 0.18f, 0.12f) }, { simde_mm_set_ps( 84.04f, 25.31f, 28.88f, 94.95f), simde_mm_set_ps( 0.11f, 0.20f, 0.19f, 0.10f) }, { simde_mm_set_ps( 62.50f, 1.25f, 73.97f, 57.92f), simde_mm_set_ps( 0.13f, 0.90f, 0.12f, 0.13f) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m128 r = simde_mm_rsqrt_ps(test_vec[i].a); simde_test_x86_assert_equal_f32x4(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_rsqrt_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps( 19.76f, 43.05f, 24.93f, 44.79f), simde_mm_set_ps( 19.76f, 43.05f, 24.93f, 0.15f) }, { simde_mm_set_ps( 53.16f, 37.63f, 14.64f, 91.38f), simde_mm_set_ps( 53.16f, 37.63f, 14.64f, 0.10f) }, { simde_mm_set_ps( 51.45f, 20.86f, 31.69f, 22.93f), simde_mm_set_ps( 51.45f, 20.86f, 31.69f, 0.21f) }, { simde_mm_set_ps( 70.34f, 27.96f, 47.70f, 68.63f), simde_mm_set_ps( 70.34f, 27.96f, 47.70f, 0.12f) }, { simde_mm_set_ps( 15.37f, 83.67f, 71.19f, 29.53f), simde_mm_set_ps( 15.37f, 83.67f, 71.19f, 0.18f) }, { simde_mm_set_ps( 54.38f, 5.48f, 29.73f, 69.45f), simde_mm_set_ps( 54.38f, 5.48f, 29.73f, 0.12f) }, { simde_mm_set_ps( 84.04f, 25.31f, 28.88f, 94.95f), simde_mm_set_ps( 84.04f, 25.31f, 28.88f, 0.10f) }, { simde_mm_set_ps( 62.50f, 1.25f, 73.97f, 57.92f), simde_mm_set_ps( 62.50f, 1.25f, 73.97f, 0.13f) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m128 r = simde_mm_rsqrt_ss(test_vec[i].a); simde_test_x86_assert_equal_f32x4(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_sad_pu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_x_mm_set_pu8(158, 38, 204, 230, 242, 108, 135, 100), simde_x_mm_set_pu8(130, 168, 102, 233, 237, 176, 22, 158), simde_x_mm_set_pu16(0, 0, 0, 507) }, { simde_x_mm_set_pu8( 15, 252, 176, 193, 115, 44, 0, 83), simde_x_mm_set_pu8( 99, 169, 76, 203, 218, 181, 138, 226), simde_x_mm_set_pu16(0, 0, 0, 798) }, { simde_x_mm_set_pu8(230, 50, 152, 234, 252, 79, 170, 145), simde_x_mm_set_pu8(225, 219, 116, 170, 250, 129, 102, 178), simde_x_mm_set_pu16(0, 0, 0, 427) }, { simde_x_mm_set_pu8( 77, 112, 20, 247, 206, 117, 128, 107), simde_x_mm_set_pu8(189, 223, 203, 181, 71, 239, 64, 186), simde_x_mm_set_pu16(0, 0, 0, 872) }, { simde_x_mm_set_pu8(128, 104, 93, 138, 250, 105, 219, 255), simde_x_mm_set_pu8(113, 248, 217, 59, 72, 4, 165, 83), simde_x_mm_set_pu16(0, 0, 0, 867) }, { simde_x_mm_set_pu8(143, 12, 71, 81, 251, 175, 44, 206), simde_x_mm_set_pu8( 80, 100, 129, 82, 59, 63, 26, 22), simde_x_mm_set_pu16(0, 0, 0, 716) }, { simde_x_mm_set_pu8( 7, 202, 222, 71, 138, 18, 223, 92), simde_x_mm_set_pu8(208, 174, 15, 221, 13, 93, 209, 116), simde_x_mm_set_pu16(0, 0, 0, 824) }, { simde_x_mm_set_pu8( 92, 133, 132, 0, 24, 132, 201, 186), simde_x_mm_set_pu8(194, 29, 160, 58, 50, 10, 65, 234), simde_x_mm_set_pu16(0, 0, 0, 624) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_sad_pu8(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_u16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_psadbw(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_x_mm_set_pu8(158, 38, 204, 230, 242, 108, 135, 100), simde_x_mm_set_pu8(130, 168, 102, 233, 237, 176, 22, 158), simde_x_mm_set_pu16(0, 0, 0, 507) }, { simde_x_mm_set_pu8( 15, 252, 176, 193, 115, 44, 0, 83), simde_x_mm_set_pu8( 99, 169, 76, 203, 218, 181, 138, 226), simde_x_mm_set_pu16(0, 0, 0, 798) }, { simde_x_mm_set_pu8(230, 50, 152, 234, 252, 79, 170, 145), simde_x_mm_set_pu8(225, 219, 116, 170, 250, 129, 102, 178), simde_x_mm_set_pu16(0, 0, 0, 427) }, { simde_x_mm_set_pu8( 77, 112, 20, 247, 206, 117, 128, 107), simde_x_mm_set_pu8(189, 223, 203, 181, 71, 239, 64, 186), simde_x_mm_set_pu16(0, 0, 0, 872) }, { simde_x_mm_set_pu8(128, 104, 93, 138, 250, 105, 219, 255), simde_x_mm_set_pu8(113, 248, 217, 59, 72, 4, 165, 83), simde_x_mm_set_pu16(0, 0, 0, 867) }, { simde_x_mm_set_pu8(143, 12, 71, 81, 251, 175, 44, 206), simde_x_mm_set_pu8( 80, 100, 129, 82, 59, 63, 26, 22), simde_x_mm_set_pu16(0, 0, 0, 716) }, { simde_x_mm_set_pu8( 7, 202, 222, 71, 138, 18, 223, 92), simde_x_mm_set_pu8(208, 174, 15, 221, 13, 93, 209, 116), simde_x_mm_set_pu16(0, 0, 0, 824) }, { simde_x_mm_set_pu8( 92, 133, 132, 0, 24, 132, 201, 186), simde_x_mm_set_pu8(194, 29, 160, 58, 50, 10, 65, 234), simde_x_mm_set_pu16(0, 0, 0, 624) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_m_psadbw(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_u16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_shuffle_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16( 20374, -8020, 9831, -21724), simde_mm_set_pi16(-21724, -21724, 9831, 9831) }, { simde_mm_set_pi16( 26825, 6867, -1457, 28819), simde_mm_set_pi16( 28819, 28819, -1457, -1457) }, { simde_mm_set_pi16( -4347, -12641, -8333, -18450), simde_mm_set_pi16(-18450, -18450, -8333, -8333) }, { simde_mm_set_pi16( 22439, 23179, -32421, -3266), simde_mm_set_pi16( -3266, -3266, -32421, -32421) }, { simde_mm_set_pi16( 9337, -3310, 22225, -14472), simde_mm_set_pi16(-14472, -14472, 22225, 22225) }, { simde_mm_set_pi16(-17114, -15656, 26827, -1486), simde_mm_set_pi16( -1486, -1486, 26827, 26827) }, { simde_mm_set_pi16( 8123, 8758, 31545, -8216), simde_mm_set_pi16( -8216, -8216, 31545, 31545) }, { simde_mm_set_pi16(-32324, 31163, -3386, 23646), simde_mm_set_pi16( 23646, 23646, -3386, -3386) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m64 r = simde_mm_shuffle_pi16(test_vec[i].a, 5); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_m_pshufw(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16( 20374, -8020, 9831, -21724), simde_mm_set_pi16(-21724, -21724, 9831, 9831) }, { simde_mm_set_pi16( 26825, 6867, -1457, 28819), simde_mm_set_pi16( 28819, 28819, -1457, -1457) }, { simde_mm_set_pi16( -4347, -12641, -8333, -18450), simde_mm_set_pi16(-18450, -18450, -8333, -8333) }, { simde_mm_set_pi16( 22439, 23179, -32421, -3266), simde_mm_set_pi16( -3266, -3266, -32421, -32421) }, { simde_mm_set_pi16( 9337, -3310, 22225, -14472), simde_mm_set_pi16(-14472, -14472, 22225, 22225) }, { simde_mm_set_pi16(-17114, -15656, 26827, -1486), simde_mm_set_pi16( -1486, -1486, 26827, 26827) }, { simde_mm_set_pi16( 8123, 8758, 31545, -8216), simde_mm_set_pi16( -8216, -8216, 31545, 31545) }, { simde_mm_set_pi16(-32324, 31163, -3386, 23646), simde_mm_set_pi16( 23646, 23646, -3386, -3386) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m64 r = simde_m_pshufw(test_vec[i].a, 5); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_shuffle_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps( 387.45f, -469.79f, 719.43f, 371.94f), simde_mm_set_ps( 641.56f, 341.35f, 292.84f, 441.22f), simde_mm_set_ps( 441.22f, 441.22f, -469.79f, 387.45f) }, { simde_mm_set_ps( 648.82f, 641.81f, -789.10f, 982.80f), simde_mm_set_ps( 472.27f, -304.33f, 524.09f, -589.31f), simde_mm_set_ps(-589.31f, -589.31f, 641.81f, 648.82f) }, { simde_mm_set_ps(-163.67f, -311.30f, -600.60f, 597.71f), simde_mm_set_ps(-247.76f, 246.42f, -742.25f, -20.93f), simde_mm_set_ps( -20.93f, -20.93f, -311.30f, -163.67f) }, { simde_mm_set_ps( 968.74f, 810.41f, -699.53f, 224.20f), simde_mm_set_ps(-966.41f, 917.94f, -300.26f, 64.06f), simde_mm_set_ps( 64.06f, 64.06f, 810.41f, 968.74f) }, { simde_mm_set_ps( 99.15f, 957.94f, 380.12f, -611.50f), simde_mm_set_ps( -77.49f, -255.84f, 787.35f, -671.91f), simde_mm_set_ps(-671.91f, -671.91f, 957.94f, 99.15f) }, { simde_mm_set_ps(-280.55f, -182.50f, 340.17f, 473.64f), simde_mm_set_ps( -3.29f, -413.78f, -406.24f, 521.82f), simde_mm_set_ps( 521.82f, 521.82f, -182.50f, -280.55f) }, { simde_mm_set_ps(-677.92f, 481.01f, 494.26f, 565.24f), simde_mm_set_ps( 205.66f, 769.40f, -900.58f, -847.82f), simde_mm_set_ps(-847.82f, -847.82f, 481.01f, -677.92f) }, { simde_mm_set_ps( 703.71f, 397.64f, 773.55f, -739.53f), simde_mm_set_ps( 99.18f, -932.61f, -902.04f, 169.61f), simde_mm_set_ps( 169.61f, 169.61f, 397.64f, 703.71f) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m128 r = simde_mm_shuffle_ps(test_vec[i].a, test_vec[i].b, 11); simde_test_x86_assert_equal_f32x4(test_vec[i].r, r, 1); } return 0; } static int test_simde_mm_sqrt_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 942.33), SIMDE_FLOAT32_C( 666.40), SIMDE_FLOAT32_C( 694.68), SIMDE_FLOAT32_C( 693.95) }, { SIMDE_FLOAT32_C( 30.70), SIMDE_FLOAT32_C( 25.81), SIMDE_FLOAT32_C( 26.36), SIMDE_FLOAT32_C( 26.34) } }, { { SIMDE_FLOAT32_C( 724.41), SIMDE_FLOAT32_C( 941.55), SIMDE_FLOAT32_C( 455.77), SIMDE_FLOAT32_C( 201.95) }, { SIMDE_FLOAT32_C( 26.91), SIMDE_FLOAT32_C( 30.68), SIMDE_FLOAT32_C( 21.35), SIMDE_FLOAT32_C( 14.21) } }, { { SIMDE_FLOAT32_C( 512.68), SIMDE_FLOAT32_C( 412.12), SIMDE_FLOAT32_C( 807.00), SIMDE_FLOAT32_C( 845.18) }, { SIMDE_FLOAT32_C( 22.64), SIMDE_FLOAT32_C( 20.30), SIMDE_FLOAT32_C( 28.41), SIMDE_FLOAT32_C( 29.07) } }, { { SIMDE_FLOAT32_C( 136.62), SIMDE_FLOAT32_C( 19.19), SIMDE_FLOAT32_C( 410.53), SIMDE_FLOAT32_C( 868.07) }, { SIMDE_FLOAT32_C( 11.69), SIMDE_FLOAT32_C( 4.38), SIMDE_FLOAT32_C( 20.26), SIMDE_FLOAT32_C( 29.46) } }, { { SIMDE_FLOAT32_C( 174.19), SIMDE_FLOAT32_C( 221.68), SIMDE_FLOAT32_C( 554.52), SIMDE_FLOAT32_C( 172.63) }, { SIMDE_FLOAT32_C( 13.20), SIMDE_FLOAT32_C( 14.89), SIMDE_FLOAT32_C( 23.55), SIMDE_FLOAT32_C( 13.14) } }, { { SIMDE_FLOAT32_C( 352.23), SIMDE_FLOAT32_C( 782.29), SIMDE_FLOAT32_C( 336.58), SIMDE_FLOAT32_C( 708.85) }, { SIMDE_FLOAT32_C( 18.77), SIMDE_FLOAT32_C( 27.97), SIMDE_FLOAT32_C( 18.35), SIMDE_FLOAT32_C( 26.62) } }, { { SIMDE_FLOAT32_C( 102.08), SIMDE_FLOAT32_C( 957.70), SIMDE_FLOAT32_C( 725.58), SIMDE_FLOAT32_C( 821.06) }, { SIMDE_FLOAT32_C( 10.10), SIMDE_FLOAT32_C( 30.95), SIMDE_FLOAT32_C( 26.94), SIMDE_FLOAT32_C( 28.65) } }, { { SIMDE_FLOAT32_C( 939.60), SIMDE_FLOAT32_C( 907.66), SIMDE_FLOAT32_C( 921.23), SIMDE_FLOAT32_C( 881.93) }, { SIMDE_FLOAT32_C( 30.65), SIMDE_FLOAT32_C( 30.13), SIMDE_FLOAT32_C( 30.35), SIMDE_FLOAT32_C( 29.70) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_sqrt_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_sqrt_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps( 619.69f, 412.13f, 497.92f, 538.00f), simde_mm_set_ps( 619.69f, 412.13f, 497.92f, 23.19f) }, { simde_mm_set_ps( 620.13f, 731.84f, 667.03f, 801.31f), simde_mm_set_ps( 620.13f, 731.84f, 667.03f, 28.31f) }, { simde_mm_set_ps( 720.02f, 314.91f, 596.04f, 727.65f), simde_mm_set_ps( 720.02f, 314.91f, 596.04f, 26.97f) }, { simde_mm_set_ps( 888.04f, 213.48f, 907.68f, 515.93f), simde_mm_set_ps( 888.04f, 213.48f, 907.68f, 22.71f) }, { simde_mm_set_ps( 196.63f, 888.92f, 163.58f, 421.90f), simde_mm_set_ps( 196.63f, 888.92f, 163.58f, 20.54f) }, { simde_mm_set_ps( 966.24f, 260.28f, 707.39f, 467.81f), simde_mm_set_ps( 966.24f, 260.28f, 707.39f, 21.63f) }, { simde_mm_set_ps( 762.80f, 805.99f, 106.70f, 460.44f), simde_mm_set_ps( 762.80f, 805.99f, 106.70f, 21.46f) }, { simde_mm_set_ps( 398.96f, 926.80f, 642.37f, 392.46f), simde_mm_set_ps( 398.96f, 926.80f, 642.37f, 19.81f) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m128 r = simde_mm_sqrt_ss(test_vec[i].a); simde_test_x86_assert_equal_f32x4(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_store_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; SIMDE_ALIGN_LIKE_16(simde__m128) simde_float32 r[4]; } test_vec[8] = { { simde_mm_set_ps( 797.84f, 342.63f, 173.26f, 427.65f), { 427.65f, 173.26f, 342.63f, 797.84f } }, { simde_mm_set_ps( 911.58f, 127.96f, 79.17f, 4.20f), { 4.20f, 79.17f, 127.96f, 911.58f } }, { simde_mm_set_ps( 206.93f, 418.54f, 921.68f, 840.36f), { 840.36f, 921.68f, 418.54f, 206.93f } }, { simde_mm_set_ps( 692.59f, 90.67f, 787.67f, 591.30f), { 591.30f, 787.67f, 90.67f, 692.59f } }, { simde_mm_set_ps( 57.64f, 25.24f, 486.10f, 797.60f), { 797.60f, 486.10f, 25.24f, 57.64f } }, { simde_mm_set_ps( 987.53f, 693.84f, 633.62f, 186.39f), { 186.39f, 633.62f, 693.84f, 987.53f } }, { simde_mm_set_ps( 772.05f, 227.52f, 175.91f, 879.34f), { 879.34f, 175.91f, 227.52f, 772.05f } }, { simde_mm_set_ps( 640.49f, 596.15f, 891.32f, 37.15f), { 37.15f, 891.32f, 596.15f, 640.49f } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { SIMDE_ALIGN_LIKE_16(simde__m128) simde_float32 r[4]; simde_mm_store_ps(r, test_vec[i].a); simde_assert_equal_vf32(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_store_ps1(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; SIMDE_ALIGN_LIKE_16(simde__m128) simde_float32 r[4]; } test_vec[8] = { { simde_mm_set_ps( 854.57f, 299.85f, 53.35f, 467.01f), { 467.01f, 467.01f, 467.01f, 467.01f } }, { simde_mm_set_ps( 477.02f, 854.94f, 677.55f, 339.79f), { 339.79f, 339.79f, 339.79f, 339.79f } }, { simde_mm_set_ps( 736.99f, 148.01f, 536.77f, 621.38f), { 621.38f, 621.38f, 621.38f, 621.38f } }, { simde_mm_set_ps( 947.26f, 901.13f, 502.63f, 117.54f), { 117.54f, 117.54f, 117.54f, 117.54f } }, { simde_mm_set_ps( 193.39f, 545.92f, 514.75f, 687.45f), { 687.45f, 687.45f, 687.45f, 687.45f } }, { simde_mm_set_ps( 844.08f, 252.82f, 783.61f, 372.00f), { 372.00f, 372.00f, 372.00f, 372.00f } }, { simde_mm_set_ps( 988.23f, 778.23f, 199.92f, 424.51f), { 424.51f, 424.51f, 424.51f, 424.51f } }, { simde_mm_set_ps( 515.70f, 944.74f, 777.26f, 470.44f), { 470.44f, 470.44f, 470.44f, 470.44f } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { SIMDE_ALIGN_LIKE_16(simde__m128) simde_float32 r[4]; simde_mm_store_ps1(r, test_vec[i].a); simde_assert_equal_vf32(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_store_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde_float32 r[4]; } test_vec[8] = { { simde_mm_set_ps( 854.57f, 299.85f, 53.35f, 467.01f), { 467.01f, 0.00f, 0.00f, 0.00f } }, { simde_mm_set_ps( 477.02f, 854.94f, 677.55f, 339.79f), { 339.79f, 0.00f, 0.00f, 0.00f } }, { simde_mm_set_ps( 736.99f, 148.01f, 536.77f, 621.38f), { 621.38f, 0.00f, 0.00f, 0.00f } }, { simde_mm_set_ps( 947.26f, 901.13f, 502.63f, 117.54f), { 117.54f, 0.00f, 0.00f, 0.00f } }, { simde_mm_set_ps( 193.39f, 545.92f, 514.75f, 687.45f), { 687.45f, 0.00f, 0.00f, 0.00f } }, { simde_mm_set_ps( 844.08f, 252.82f, 783.61f, 372.00f), { 372.00f, 0.00f, 0.00f, 0.00f } }, { simde_mm_set_ps( 988.23f, 778.23f, 199.92f, 424.51f), { 424.51f, 0.00f, 0.00f, 0.00f } }, { simde_mm_set_ps( 515.70f, 944.74f, 777.26f, 470.44f), { 470.44f, 0.00f, 0.00f, 0.00f } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde_float32 r[4] = { SIMDE_FLOAT32_C(0.0), }; simde_mm_store_ss(r, test_vec[i].a); simde_assert_equal_vf32(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_store1_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; SIMDE_ALIGN_LIKE_16(simde__m128) simde_float32 r[4]; } test_vec[8] = { { simde_mm_set_ps( 274.23f, 89.27f, 784.72f, 646.53f), { 646.53f, 646.53f, 646.53f, 646.53f } }, { simde_mm_set_ps( 322.23f, 913.84f, 883.41f, 484.76f), { 484.76f, 484.76f, 484.76f, 484.76f } }, { simde_mm_set_ps( 272.81f, 739.51f, 776.03f, 57.85f), { 57.85f, 57.85f, 57.85f, 57.85f } }, { simde_mm_set_ps( 414.64f, 733.36f, 680.70f, 973.86f), { 973.86f, 973.86f, 973.86f, 973.86f } }, { simde_mm_set_ps( 541.43f, 860.27f, 162.49f, 726.46f), { 726.46f, 726.46f, 726.46f, 726.46f } }, { simde_mm_set_ps( 735.30f, 924.35f, 18.46f, 634.38f), { 634.38f, 634.38f, 634.38f, 634.38f } }, { simde_mm_set_ps( 676.23f, 61.60f, 24.97f, 803.79f), { 803.79f, 803.79f, 803.79f, 803.79f } }, { simde_mm_set_ps( 373.81f, 509.18f, 233.96f, 910.43f), { 910.43f, 910.43f, 910.43f, 910.43f } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { SIMDE_ALIGN_LIKE_16(simde__m128) simde_float32 r[4] = { 0, }; simde_mm_store1_ps(r, test_vec[i].a); simde_assert_equal_vf32(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_storeh_pi(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m64 r; } test_vec[8] = { { simde_mm_set_ps( 575.54f, 220.03f, 206.01f, 870.81f), simde_x_mm_set_f32x2(SIMDE_FLOAT32_C( 575.54), SIMDE_FLOAT32_C( 220.03)) }, { simde_mm_set_ps( 289.58f, 629.51f, 767.25f, 704.01f), simde_x_mm_set_f32x2(SIMDE_FLOAT32_C( 289.58), SIMDE_FLOAT32_C( 629.51)) }, { simde_mm_set_ps( 627.14f, 949.08f, 581.33f, 434.65f), simde_x_mm_set_f32x2(SIMDE_FLOAT32_C( 627.14), SIMDE_FLOAT32_C( 949.08)) }, { simde_mm_set_ps( 369.75f, 459.24f, 702.99f, 90.66f), simde_x_mm_set_f32x2(SIMDE_FLOAT32_C( 369.75), SIMDE_FLOAT32_C( 459.24)) }, { simde_mm_set_ps( 57.99f, 910.00f, 605.74f, 76.21f), simde_x_mm_set_f32x2(SIMDE_FLOAT32_C( 57.99), SIMDE_FLOAT32_C( 910.00)) }, { simde_mm_set_ps( 918.98f, 456.74f, 224.68f, 627.20f), simde_x_mm_set_f32x2(SIMDE_FLOAT32_C( 918.98), SIMDE_FLOAT32_C( 456.74)) }, { simde_mm_set_ps( 963.09f, 356.12f, 33.23f, 995.23f), simde_x_mm_set_f32x2(SIMDE_FLOAT32_C( 963.09), SIMDE_FLOAT32_C( 356.12)) }, { simde_mm_set_ps( 967.89f, 231.25f, 230.84f, 538.31f), simde_x_mm_set_f32x2(SIMDE_FLOAT32_C( 967.89), SIMDE_FLOAT32_C( 231.25)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m64 r; simde_mm_storeh_pi(&r, test_vec[i].a); simde_test_x86_assert_equal_f32x2(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_storel_pi(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m64 r; } test_vec[8] = { { simde_mm_set_ps( 258.22f, 461.17f, 500.19f, 792.00f), simde_x_mm_set_f32x2(SIMDE_FLOAT32_C( 500.19), SIMDE_FLOAT32_C( 792.00)) }, { simde_mm_set_ps( 839.05f, 239.15f, 886.65f, 576.97f), simde_x_mm_set_f32x2(SIMDE_FLOAT32_C( 886.65), SIMDE_FLOAT32_C( 576.97)) }, { simde_mm_set_ps( 905.98f, 580.29f, 37.85f, 782.47f), simde_x_mm_set_f32x2(SIMDE_FLOAT32_C( 37.85), SIMDE_FLOAT32_C( 782.47)) }, { simde_mm_set_ps( 367.06f, 905.12f, 385.52f, 288.25f), simde_x_mm_set_f32x2(SIMDE_FLOAT32_C( 385.52), SIMDE_FLOAT32_C( 288.25)) }, { simde_mm_set_ps( 345.47f, 91.39f, 757.41f, 177.58f), simde_x_mm_set_f32x2(SIMDE_FLOAT32_C( 757.41), SIMDE_FLOAT32_C( 177.58)) }, { simde_mm_set_ps( 41.60f, 601.66f, 873.51f, 499.62f), simde_x_mm_set_f32x2(SIMDE_FLOAT32_C( 873.51), SIMDE_FLOAT32_C( 499.62)) }, { simde_mm_set_ps( 185.64f, 301.30f, 85.92f, 684.99f), simde_x_mm_set_f32x2(SIMDE_FLOAT32_C( 85.92), SIMDE_FLOAT32_C( 684.99)) }, { simde_mm_set_ps( 556.83f, 958.68f, 557.15f, 788.14f), simde_x_mm_set_f32x2(SIMDE_FLOAT32_C( 557.15), SIMDE_FLOAT32_C( 788.14)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m64 r; simde_mm_storel_pi(&r, test_vec[i].a); simde_test_x86_assert_equal_f32x2(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_storer_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; SIMDE_ALIGN_LIKE_16(simde__m128) simde_float32 r[4]; } test_vec[8] = { { simde_mm_set_ps( 709.97f, 746.23f, 453.60f, 303.28f), { 709.97f, 746.23f, 453.60f, 303.28f } }, { simde_mm_set_ps( 764.16f, 229.13f, 431.64f, 384.08f), { 764.16f, 229.13f, 431.64f, 384.08f } }, { simde_mm_set_ps( 237.88f, 156.25f, 582.89f, 91.67f), { 237.88f, 156.25f, 582.89f, 91.67f } }, { simde_mm_set_ps( 244.82f, 86.89f, 868.91f, 106.59f), { 244.82f, 86.89f, 868.91f, 106.59f } }, { simde_mm_set_ps( 570.53f, 397.46f, 850.83f, 961.86f), { 570.53f, 397.46f, 850.83f, 961.86f } }, { simde_mm_set_ps( 946.89f, 801.12f, 881.81f, 752.94f), { 946.89f, 801.12f, 881.81f, 752.94f } }, { simde_mm_set_ps( 779.85f, 31.75f, 218.17f, 299.71f), { 779.85f, 31.75f, 218.17f, 299.71f } }, { simde_mm_set_ps( 409.61f, 712.65f, 619.44f, 952.97f), { 409.61f, 712.65f, 619.44f, 952.97f } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { SIMDE_ALIGN_LIKE_16(simde__m128) simde_float32 r[4] = { 0, }; simde_mm_storer_ps(r, test_vec[i].a); simde_assert_equal_vf32(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_storeu_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde_float32 r[4]; } test_vec[8] = { { simde_mm_set_ps( 304.73f, 535.11f, 34.80f, 759.84f), { 759.84f, 34.80f, 535.11f, 304.73f } }, { simde_mm_set_ps( 360.59f, 120.47f, 501.36f, 116.79f), { 116.79f, 501.36f, 120.47f, 360.59f } }, { simde_mm_set_ps( 709.31f, 269.06f, 586.46f, 622.65f), { 622.65f, 586.46f, 269.06f, 709.31f } }, { simde_mm_set_ps( 653.72f, 295.37f, 94.24f, 886.62f), { 886.62f, 94.24f, 295.37f, 653.72f } }, { simde_mm_set_ps( 4.93f, 676.49f, 303.89f, 920.73f), { 920.73f, 303.89f, 676.49f, 4.93f } }, { simde_mm_set_ps( 930.44f, 841.16f, 891.12f, 349.34f), { 349.34f, 891.12f, 841.16f, 930.44f } }, { simde_mm_set_ps( 899.13f, 242.56f, 161.77f, 99.90f), { 99.90f, 161.77f, 242.56f, 899.13f } }, { simde_mm_set_ps( 350.94f, 738.74f, 750.24f, 329.42f), { 329.42f, 750.24f, 738.74f, 350.94f } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde_float32 r[4] = { 0, }; simde_mm_storeu_ps(r, test_vec[i].a); simde_assert_equal_vf32(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_sub_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(-670.13f, -257.04f, 449.06f, 965.86f), simde_mm_set_ps(-637.34f, 741.89f, -14.02f, 918.72f), simde_mm_set_ps( -32.80f, -998.93f, 463.07f, 47.13f) }, { simde_mm_set_ps(-744.89f, -52.84f, 460.90f, 134.66f), simde_mm_set_ps( 177.89f, -904.94f, 443.55f, -742.83f), simde_mm_set_ps(-922.78f, 852.10f, 17.35f, 877.49f) }, { simde_mm_set_ps( 807.56f, 945.11f, 259.44f, 557.41f), simde_mm_set_ps( 404.99f, 399.93f, -68.79f, 957.61f), simde_mm_set_ps( 402.57f, 545.19f, 328.23f, -400.20f) }, { simde_mm_set_ps( 74.77f, 473.53f, -483.74f, 415.65f), simde_mm_set_ps(-608.38f, 553.08f, -146.88f, 64.98f), simde_mm_set_ps( 683.15f, -79.55f, -336.86f, 350.67f) }, { simde_mm_set_ps( 879.68f, -763.80f, -53.30f, 95.40f), simde_mm_set_ps( 164.85f, 427.62f, 174.59f, 925.02f), simde_mm_set_ps( 714.82f, -1191.42f, -227.89f, -829.62f) }, { simde_mm_set_ps( 571.32f, 207.75f, 439.31f, -973.48f), simde_mm_set_ps(-719.88f, -346.38f, -249.12f, -239.62f), simde_mm_set_ps(1291.20f, 554.13f, 688.43f, -733.86f) }, { simde_mm_set_ps( 995.34f, -761.80f, -600.36f, 665.89f), simde_mm_set_ps( 578.64f, -853.98f, 329.02f, -360.51f), simde_mm_set_ps( 416.70f, 92.18f, -929.38f, 1026.41f) }, { simde_mm_set_ps(-961.63f, 818.44f, -364.30f, -740.41f), simde_mm_set_ps( 86.59f, 540.64f, 243.58f, 458.04f), simde_mm_set_ps(-1048.22f, 277.80f, -607.89f, -1198.46f) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m128 r = simde_mm_sub_ps(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_f32x4(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_sub_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(-670.13f, -257.04f, 449.06f, 965.86f), simde_mm_set_ps(-637.34f, 741.89f, -14.02f, 918.72f), simde_mm_set_ps(-670.13f, -257.04f, 449.06f, 47.13f) }, { simde_mm_set_ps(-744.89f, -52.84f, 460.90f, 134.66f), simde_mm_set_ps( 177.89f, -904.94f, 443.55f, -742.83f), simde_mm_set_ps(-744.89f, -52.84f, 460.90f, 877.49f) }, { simde_mm_set_ps( 807.56f, 945.11f, 259.44f, 557.41f), simde_mm_set_ps( 404.99f, 399.93f, -68.79f, 957.61f), simde_mm_set_ps( 807.56f, 945.11f, 259.44f, -400.20f) }, { simde_mm_set_ps( 74.77f, 473.53f, -483.74f, 415.65f), simde_mm_set_ps(-608.38f, 553.08f, -146.88f, 64.98f), simde_mm_set_ps( 74.77f, 473.53f, -483.74f, 350.67f) }, { simde_mm_set_ps( 879.68f, -763.80f, -53.30f, 95.40f), simde_mm_set_ps( 164.85f, 427.62f, 174.59f, 925.02f), simde_mm_set_ps( 879.68f, -763.80f, -53.30f, -829.62f) }, { simde_mm_set_ps( 571.32f, 207.75f, 439.31f, -973.48f), simde_mm_set_ps(-719.88f, -346.38f, -249.12f, -239.62f), simde_mm_set_ps( 571.32f, 207.75f, 439.31f, -733.86f) }, { simde_mm_set_ps( 995.34f, -761.80f, -600.36f, 665.89f), simde_mm_set_ps( 578.64f, -853.98f, 329.02f, -360.51f), simde_mm_set_ps( 995.34f, -761.80f, -600.36f, 1026.41f) }, { simde_mm_set_ps(-961.63f, 818.44f, -364.30f, -740.41f), simde_mm_set_ps( 86.59f, 540.64f, 243.58f, 458.04f), simde_mm_set_ps(-961.63f, 818.44f, -364.30f, -1198.46f) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m128 r = simde_mm_sub_ss(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_f32x4(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_ucomieq_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; int r; } test_vec[8] = { { simde_mm_set_ps(0.4193f, 0.8439f, 0.8100f, 0.2669f), simde_mm_set_ps(0.4193f, 0.8439f, 0.8100f, 0.2669f), 1 }, { simde_mm_set_ps(0.9426f, 0.1679f, 0.2845f, 0.1698f), simde_mm_set_ps(0.9426f, 0.6931f, 0.2845f, 0.1698f), 1 }, { simde_mm_set_ps(0.0547f, 0.2368f, 0.3365f, 0.9146f), simde_mm_set_ps(0.0547f, 0.5935f, 0.3365f, 0.5014f), 0 }, { simde_mm_set_ps(0.7179f, 0.8607f, 0.4372f, 0.6140f), simde_mm_set_ps(0.7179f, 0.7239f, 0.4372f, 0.6140f), 1 }, { simde_mm_set_ps(0.4474f, 0.6848f, 0.4305f, 0.8738f), simde_mm_set_ps(0.6059f, 0.8463f, 0.4305f, 0.1517f), 0 }, { simde_mm_set_ps(0.4537f, 0.7254f, 0.9987f, 0.9115f), simde_mm_set_ps(0.1771f, 0.2982f, 0.9987f, 0.9003f), 0 }, { simde_mm_set_ps(0.2162f, 0.6303f, 0.0602f, 0.9986f), simde_mm_set_ps(0.2162f, 0.5872f, 0.0602f, 0.2491f), 0 }, { simde_mm_set_ps(0.4836f, 0.9929f, 0.8942f, 0.2367f), simde_mm_set_ps(0.4836f, 0.9929f, 0.4202f, 0.2367f), 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { int r = simde_mm_ucomieq_ss(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_ucomige_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; int r; } test_vec[8] = { { simde_mm_set_ps(0.1174f, 0.9995f, 0.7362f, 0.9966f), simde_mm_set_ps(0.1174f, 0.5850f, 0.0225f, 0.0035f), 1 }, { simde_mm_set_ps(0.6820f, 0.9892f, 0.6235f, 0.1819f), simde_mm_set_ps(0.0898f, 0.9892f, 0.6235f, 0.3305f), 0 }, { simde_mm_set_ps(0.7152f, 0.9286f, 0.9635f, 0.8823f), simde_mm_set_ps(0.3403f, 0.4177f, 0.9635f, 0.8737f), 1 }, { simde_mm_set_ps(0.5619f, 0.6892f, 0.2137f, 0.5336f), simde_mm_set_ps(0.1340f, 0.0152f, 0.9280f, 0.5336f), 1 }, { simde_mm_set_ps(0.5476f, 0.8606f, 0.2177f, 0.5284f), simde_mm_set_ps(0.5476f, 0.6253f, 0.1285f, 0.7135f), 0 }, { simde_mm_set_ps(0.6649f, 0.2053f, 0.5053f, 0.0378f), simde_mm_set_ps(0.0308f, 0.2053f, 0.5053f, 0.8789f), 0 }, { simde_mm_set_ps(0.3714f, 0.8736f, 0.8711f, 0.9491f), simde_mm_set_ps(0.8296f, 0.2212f, 0.5986f, 0.9491f), 1 }, { simde_mm_set_ps(0.8791f, 0.5862f, 0.4977f, 0.0888f), simde_mm_set_ps(0.8669f, 0.7545f, 0.4977f, 0.0888f), 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { int r = simde_mm_ucomige_ss(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_ucomigt_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; int r; } test_vec[8] = { { simde_mm_set_ps(0.1174f, 0.9995f, 0.7362f, 0.9966f), simde_mm_set_ps(0.1174f, 0.5850f, 0.0225f, 0.0035f), 1 }, { simde_mm_set_ps(0.6820f, 0.9892f, 0.6235f, 0.1819f), simde_mm_set_ps(0.0898f, 0.9892f, 0.6235f, 0.3305f), 0 }, { simde_mm_set_ps(0.7152f, 0.9286f, 0.9635f, 0.8823f), simde_mm_set_ps(0.3403f, 0.4177f, 0.9635f, 0.8737f), 1 }, { simde_mm_set_ps(0.5619f, 0.6892f, 0.2137f, 0.5336f), simde_mm_set_ps(0.1340f, 0.0152f, 0.9280f, 0.5336f), 0 }, { simde_mm_set_ps(0.5476f, 0.8606f, 0.2177f, 0.5284f), simde_mm_set_ps(0.5476f, 0.6253f, 0.1285f, 0.7135f), 0 }, { simde_mm_set_ps(0.6649f, 0.2053f, 0.5053f, 0.0378f), simde_mm_set_ps(0.0308f, 0.2053f, 0.5053f, 0.8789f), 0 }, { simde_mm_set_ps(0.3714f, 0.8736f, 0.8711f, 0.9491f), simde_mm_set_ps(0.8296f, 0.2212f, 0.5986f, 0.9491f), 0 }, { simde_mm_set_ps(0.8791f, 0.5862f, 0.4977f, 0.0888f), simde_mm_set_ps(0.8669f, 0.7545f, 0.4977f, 0.0888f), 0 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { int r = simde_mm_ucomigt_ss(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_ucomile_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; int r; } test_vec[8] = { { simde_mm_set_ps(0.1174f, 0.9995f, 0.7362f, 0.9966f), simde_mm_set_ps(0.1174f, 0.5850f, 0.0225f, 0.0035f), 0 }, { simde_mm_set_ps(0.6820f, 0.9892f, 0.6235f, 0.1819f), simde_mm_set_ps(0.0898f, 0.9892f, 0.6235f, 0.3305f), 1 }, { simde_mm_set_ps(0.7152f, 0.9286f, 0.9635f, 0.8823f), simde_mm_set_ps(0.3403f, 0.4177f, 0.9635f, 0.8737f), 0 }, { simde_mm_set_ps(0.5619f, 0.6892f, 0.2137f, 0.5336f), simde_mm_set_ps(0.1340f, 0.0152f, 0.9280f, 0.5336f), 1 }, { simde_mm_set_ps(0.5476f, 0.8606f, 0.2177f, 0.5284f), simde_mm_set_ps(0.5476f, 0.6253f, 0.1285f, 0.7135f), 1 }, { simde_mm_set_ps(0.6649f, 0.2053f, 0.5053f, 0.0378f), simde_mm_set_ps(0.0308f, 0.2053f, 0.5053f, 0.8789f), 1 }, { simde_mm_set_ps(0.3714f, 0.8736f, 0.8711f, 0.9491f), simde_mm_set_ps(0.8296f, 0.2212f, 0.5986f, 0.9491f), 1 }, { simde_mm_set_ps(0.8791f, 0.5862f, 0.4977f, 0.0888f), simde_mm_set_ps(0.8669f, 0.7545f, 0.4977f, 0.0888f), 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { int r = simde_mm_ucomile_ss(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_ucomilt_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; int r; } test_vec[8] = { { simde_mm_set_ps(0.1174f, 0.9995f, 0.7362f, 0.9966f), simde_mm_set_ps(0.1174f, 0.5850f, 0.0225f, 0.0035f), 0 }, { simde_mm_set_ps(0.6820f, 0.9892f, 0.6235f, 0.1819f), simde_mm_set_ps(0.0898f, 0.9892f, 0.6235f, 0.3305f), 1 }, { simde_mm_set_ps(0.7152f, 0.9286f, 0.9635f, 0.8823f), simde_mm_set_ps(0.3403f, 0.4177f, 0.9635f, 0.8737f), 0 }, { simde_mm_set_ps(0.5619f, 0.6892f, 0.2137f, 0.5336f), simde_mm_set_ps(0.1340f, 0.0152f, 0.9280f, 0.5336f), 0 }, { simde_mm_set_ps(0.5476f, 0.8606f, 0.2177f, 0.5284f), simde_mm_set_ps(0.5476f, 0.6253f, 0.1285f, 0.7135f), 1 }, { simde_mm_set_ps(0.6649f, 0.2053f, 0.5053f, 0.0378f), simde_mm_set_ps(0.0308f, 0.2053f, 0.5053f, 0.8789f), 1 }, { simde_mm_set_ps(0.3714f, 0.8736f, 0.8711f, 0.9491f), simde_mm_set_ps(0.8296f, 0.2212f, 0.5986f, 0.9491f), 0 }, { simde_mm_set_ps(0.8791f, 0.5862f, 0.4977f, 0.0888f), simde_mm_set_ps(0.8669f, 0.7545f, 0.4977f, 0.0888f), 0 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { int r = simde_mm_ucomilt_ss(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_ucomineq_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; int r; } test_vec[8] = { { simde_mm_set_ps(0.1174f, 0.9995f, 0.7362f, 0.9966f), simde_mm_set_ps(0.1174f, 0.5850f, 0.0225f, 0.0035f), 1 }, { simde_mm_set_ps(0.6820f, 0.9892f, 0.6235f, 0.1819f), simde_mm_set_ps(0.0898f, 0.9892f, 0.6235f, 0.3305f), 1 }, { simde_mm_set_ps(0.7152f, 0.9286f, 0.9635f, 0.8823f), simde_mm_set_ps(0.3403f, 0.4177f, 0.9635f, 0.8737f), 1 }, { simde_mm_set_ps(0.5619f, 0.6892f, 0.2137f, 0.5336f), simde_mm_set_ps(0.1340f, 0.0152f, 0.9280f, 0.5336f), 0 }, { simde_mm_set_ps(0.5476f, 0.8606f, 0.2177f, 0.5284f), simde_mm_set_ps(0.5476f, 0.6253f, 0.1285f, 0.7135f), 1 }, { simde_mm_set_ps(0.6649f, 0.2053f, 0.5053f, 0.0378f), simde_mm_set_ps(0.0308f, 0.2053f, 0.5053f, 0.8789f), 1 }, { simde_mm_set_ps(0.3714f, 0.8736f, 0.8711f, 0.9491f), simde_mm_set_ps(0.8296f, 0.2212f, 0.5986f, 0.9491f), 0 }, { simde_mm_set_ps(0.8791f, 0.5862f, 0.4977f, 0.0888f), simde_mm_set_ps(0.8669f, 0.7545f, 0.4977f, 0.0888f), 0 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { int r = simde_mm_ucomineq_ss(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_unpackhi_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(-756.38f, -450.47f, 901.78f, 859.60f), simde_mm_set_ps( -17.95f, -383.47f, 601.72f, 328.61f), simde_mm_set_ps( -17.95f, -756.38f, -383.47f, -450.47f) }, { simde_mm_set_ps(-696.67f, -318.67f, -746.84f, 486.97f), simde_mm_set_ps( 76.54f, -173.13f, 775.95f, 744.80f), simde_mm_set_ps( 76.54f, -696.67f, -173.13f, -318.67f) }, { simde_mm_set_ps(-181.38f, -64.52f, -525.50f, 383.47f), simde_mm_set_ps(-830.80f, 462.90f, 208.76f, -840.43f), simde_mm_set_ps(-830.80f, -181.38f, 462.90f, -64.52f) }, { simde_mm_set_ps(-437.50f, -751.51f, -255.04f, 713.97f), simde_mm_set_ps(-835.16f, 838.19f, -968.67f, -800.39f), simde_mm_set_ps(-835.16f, -437.50f, 838.19f, -751.51f) }, { simde_mm_set_ps( 252.51f, 219.43f, 234.16f, 718.42f), simde_mm_set_ps( 737.55f, -360.52f, 129.47f, 866.75f), simde_mm_set_ps( 737.55f, 252.51f, -360.52f, 219.43f) }, { simde_mm_set_ps(-543.89f, -12.39f, 996.39f, -521.27f), simde_mm_set_ps(-330.73f, -334.66f, 798.87f, -360.08f), simde_mm_set_ps(-330.73f, -543.89f, -334.66f, -12.39f) }, { simde_mm_set_ps( 633.16f, 239.27f, 625.35f, 330.43f), simde_mm_set_ps(-890.86f, 495.17f, -524.21f, 275.19f), simde_mm_set_ps(-890.86f, 633.16f, 495.17f, 239.27f) }, { simde_mm_set_ps(-443.30f, -851.22f, 842.49f, 697.77f), simde_mm_set_ps( 8.81f, -953.12f, -1.45f, -983.63f), simde_mm_set_ps( 8.81f, -443.30f, -953.12f, -851.22f) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m128 r = simde_mm_unpackhi_ps(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_f32x4(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_unpacklo_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(-756.38f, -450.47f, 901.78f, 859.60f), simde_mm_set_ps( -17.95f, -383.47f, 601.72f, 328.61f), simde_mm_set_ps( 601.72f, 901.78f, 328.61f, 859.60f) }, { simde_mm_set_ps(-696.67f, -318.67f, -746.84f, 486.97f), simde_mm_set_ps( 76.54f, -173.13f, 775.95f, 744.80f), simde_mm_set_ps( 775.95f, -746.84f, 744.80f, 486.97f) }, { simde_mm_set_ps(-181.38f, -64.52f, -525.50f, 383.47f), simde_mm_set_ps(-830.80f, 462.90f, 208.76f, -840.43f), simde_mm_set_ps( 208.76f, -525.50f, -840.43f, 383.47f) }, { simde_mm_set_ps(-437.50f, -751.51f, -255.04f, 713.97f), simde_mm_set_ps(-835.16f, 838.19f, -968.67f, -800.39f), simde_mm_set_ps(-968.67f, -255.04f, -800.39f, 713.97f) }, { simde_mm_set_ps( 252.51f, 219.43f, 234.16f, 718.42f), simde_mm_set_ps( 737.55f, -360.52f, 129.47f, 866.75f), simde_mm_set_ps( 129.47f, 234.16f, 866.75f, 718.42f) }, { simde_mm_set_ps(-543.89f, -12.39f, 996.39f, -521.27f), simde_mm_set_ps(-330.73f, -334.66f, 798.87f, -360.08f), simde_mm_set_ps( 798.87f, 996.39f, -360.08f, -521.27f) }, { simde_mm_set_ps( 633.16f, 239.27f, 625.35f, 330.43f), simde_mm_set_ps(-890.86f, 495.17f, -524.21f, 275.19f), simde_mm_set_ps(-524.21f, 625.35f, 275.19f, 330.43f) }, { simde_mm_set_ps(-443.30f, -851.22f, 842.49f, 697.77f), simde_mm_set_ps( 8.81f, -953.12f, -1.45f, -983.63f), simde_mm_set_ps( -1.45f, 842.49f, -983.63f, 697.77f) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m128 r = simde_mm_unpacklo_ps(test_vec[i].a, test_vec[i].b); simde_test_x86_assert_equal_f32x4(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_undefined_ps(SIMDE_MUNIT_TEST_ARGS) { simde__m128 z = simde_mm_setzero_ps(); simde__m128 v = simde_mm_undefined_ps(); v = simde_mm_xor_ps(v, v); simde_test_x86_assert_equal_i32x4(simde_mm_castps_si128(z), simde_mm_castps_si128(v)); return 0; } static int test_simde_mm_xor_ps(SIMDE_MUNIT_TEST_ARGS) { struct { const int32_t a[4]; const int32_t b[4]; const int32_t r[4]; } test_vec[] = { { { -INT32_C(1471749541), INT32_C( 594543369), INT32_C(1488726073), -INT32_C( 377183697) }, { -INT32_C(1298589699), -INT32_C( 480283399), -INT32_C( 106049451), -INT32_C(1090298013) }, { INT32_C( 450885030), -INT32_C(1070630928), -INT32_C(1592657812), INT32_C(1451752780) } }, { { INT32_C(1265060161), -INT32_C(2039556275), INT32_C( 803088896), -INT32_C( 904371252) }, { INT32_C(1937551738), INT32_C(2052578341), INT32_C( 259196076), -INT32_C(1513195677) }, { INT32_C( 941357115), -INT32_C( 63370392), INT32_C( 548220588), INT32_C(1876300975) } }, { { INT32_C( 233846208), INT32_C( 227761677), INT32_C(1413247624), INT32_C(1327387861) }, { INT32_C( 717396741), INT32_C( 598022519), -INT32_C(2127423459), INT32_C(1344668047) }, { INT32_C( 657632965), INT32_C( 775309178), -INT32_C( 720475499), INT32_C( 523785562) } }, { { INT32_C(1130174006), -INT32_C( 61804172), INT32_C( 944802915), -INT32_C( 444108832) }, { -INT32_C(2129639158), -INT32_C(2136689309), INT32_C(1560401613), INT32_C( 246228951) }, { -INT32_C(1035116740), INT32_C(2096382999), INT32_C(1699830446), -INT32_C( 349550537) } }, { { -INT32_C(1320089027), INT32_C(1588437755), INT32_C( 261553710), INT32_C(2012487021) }, { -INT32_C( 889715609), -INT32_C(2025153351), INT32_C(1256475762), -INT32_C(1319595661) }, { INT32_C(2074676826), -INT32_C( 639156670), INT32_C(1165144668), -INT32_C( 961770466) } }, { { -INT32_C(1755141733), INT32_C(2062880843), INT32_C(2072611854), INT32_C( 284327337) }, { INT32_C(1004202626), -INT32_C( 121494138), -INT32_C( 465394063), -INT32_C( 745170377) }, { -INT32_C(1397210343), -INT32_C(2110310963), -INT32_C(1614075265), -INT32_C(1016600674) } }, { { -INT32_C(1888815036), INT32_C( 369712904), -INT32_C(1785621781), -INT32_C(1834646768) }, { -INT32_C( 204636051), INT32_C( 384536741), INT32_C(1861889590), INT32_C( 205623496) }, { INT32_C(2091350057), INT32_C( 14864301), -INT32_C( 76825379), -INT32_C(1629219880) } }, { { -INT32_C(1868780664), -INT32_C( 156850934), INT32_C(1217083192), INT32_C( 668610746) }, { INT32_C(1427810224), INT32_C(1852507447), -INT32_C( 52664781), INT32_C(2114461174) }, { -INT32_C( 981070792), -INT32_C(1731354563), -INT32_C(1269346037), INT32_C(1506946380) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi32(test_vec[i].a), b = simde_x_mm_loadu_epi32(test_vec[i].b), r = simde_mm_castps_si128(simde_mm_xor_ps(simde_mm_castsi128_ps(a), simde_mm_castsi128_ps(b))), e = simde_x_mm_loadu_epi32(test_vec[i].r); simde_test_x86_assert_equal_i32x4(r, e); } return 0; } static int test_simde_mm_stream_pi(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16( 6761, -22445, -23476, -30705), simde_mm_set_pi16( 6761, -22445, -23476, -30705) }, { simde_mm_set_pi16( 19012, -30608, 30482, -20411), simde_mm_set_pi16( 19012, -30608, 30482, -20411) }, { simde_mm_set_pi16( 23487, -28638, -4660, -3430), simde_mm_set_pi16( 23487, -28638, -4660, -3430) }, { simde_mm_set_pi16( -9884, -9588, -4377, -27077), simde_mm_set_pi16( -9884, -9588, -4377, -27077) }, { simde_mm_set_pi16( 15488, -17321, 2333, 8745), simde_mm_set_pi16( 15488, -17321, 2333, 8745) }, { simde_mm_set_pi16( 30278, 5796, -17311, 13106), simde_mm_set_pi16( 30278, 5796, -17311, 13106) }, { simde_mm_set_pi16(-18652, 26507, -8174, 31557), simde_mm_set_pi16(-18652, 26507, -8174, 31557) }, { simde_mm_set_pi16(-16725, 22668, 9074, 1013), simde_mm_set_pi16(-16725, 22668, 9074, 1013) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde__m64 r; simde_mm_stream_pi(&r, test_vec[i].a); simde_test_x86_assert_equal_i16x4(r, test_vec[i].r); } simde_mm_empty(); return 0; } static int test_simde_mm_stream_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; SIMDE_ALIGN_LIKE_16(simde__m128) simde_float32 r[4]; } test_vec[8] = { { simde_mm_set_ps(-386.97f, 492.19f, 318.83f, 345.85f), { 345.85f, 318.83f, 492.19f, -386.97f } }, { simde_mm_set_ps( 908.43f, 787.88f, -776.77f, -773.68f), { -773.68f, -776.77f, 787.88f, 908.43f } }, { simde_mm_set_ps( 241.81f, 684.64f, -474.83f, 614.26f), { 614.26f, -474.83f, 684.64f, 241.81f } }, { simde_mm_set_ps(-327.50f, -550.14f, -266.51f, -677.19f), { -677.19f, -266.51f, -550.14f, -327.50f } }, { simde_mm_set_ps( 706.39f, -425.59f, 678.55f, -877.83f), { -877.83f, 678.55f, -425.59f, 706.39f } }, { simde_mm_set_ps( 902.50f, 144.03f, -93.04f, 995.74f), { 995.74f, -93.04f, 144.03f, 902.50f } }, { simde_mm_set_ps( 898.99f, -437.71f, -170.25f, 875.61f), { 875.61f, -170.25f, -437.71f, 898.99f } }, { simde_mm_set_ps( 347.85f, -128.18f, 904.62f, 936.88f), { 936.88f, 904.62f, -128.18f, 347.85f } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { SIMDE_ALIGN_LIKE_16(simde__m128) simde_float32 r[4]; simde_mm_stream_ps(r, test_vec[i].a); simde_assert_equal_vf32(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_prefetch(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; SIMDE_ALIGN_LIKE_16(simde__m128) simde_float32 r[4]; } test_vec[8] = { { simde_mm_set_ps(-386.97f, 492.19f, 318.83f, 345.85f), { 345.85f, 318.83f, 492.19f, -386.97f } }, { simde_mm_set_ps( 908.43f, 787.88f, -776.77f, -773.68f), { -773.68f, -776.77f, 787.88f, 908.43f } }, { simde_mm_set_ps( 241.81f, 684.64f, -474.83f, 614.26f), { 614.26f, -474.83f, 684.64f, 241.81f } }, { simde_mm_set_ps(-327.50f, -550.14f, -266.51f, -677.19f), { -677.19f, -266.51f, -550.14f, -327.50f } }, { simde_mm_set_ps( 706.39f, -425.59f, 678.55f, -877.83f), { -877.83f, 678.55f, -425.59f, 706.39f } }, { simde_mm_set_ps( 902.50f, 144.03f, -93.04f, 995.74f), { 995.74f, -93.04f, 144.03f, 902.50f } }, { simde_mm_set_ps( 898.99f, -437.71f, -170.25f, 875.61f), { 875.61f, -170.25f, -437.71f, 898.99f } }, { simde_mm_set_ps( 347.85f, -128.18f, 904.62f, 936.88f), { 936.88f, 904.62f, -128.18f, 347.85f } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { simde_mm_prefetch(HEDLEY_REINTERPRET_CAST(const char *, &test_vec[i].a), SIMDE_MM_HINT_T0); simde_mm_prefetch(HEDLEY_REINTERPRET_CAST(const char *, &test_vec[i].a), SIMDE_MM_HINT_T1); simde_mm_prefetch(HEDLEY_REINTERPRET_CAST(const char *, &test_vec[i].a), SIMDE_MM_HINT_T2); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_ps1) SIMDE_TEST_FUNC_LIST_ENTRY(mm_set1_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_setr_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_setzero_ps) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_setone_ps) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_abs_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_add_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_add_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_and_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_andnot_ps) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_not_ps) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_select_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_avg_pu16) SIMDE_TEST_FUNC_LIST_ENTRY(m_pavgw) SIMDE_TEST_FUNC_LIST_ENTRY(mm_avg_pu8) SIMDE_TEST_FUNC_LIST_ENTRY(m_pavgb) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpeq_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpeq_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpge_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpge_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpgt_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpgt_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmple_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmple_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmplt_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmplt_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpneq_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpneq_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpnge_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpnge_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpngt_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpngt_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpnle_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpnle_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpnlt_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpnlt_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpord_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpord_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpunord_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpunord_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comieq_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comige_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comigt_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comile_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comilt_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comineq_ss) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_copysign_ps) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_xorsign_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvt_pi2ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvt_ps2pi) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvt_si2ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvt_ss2si) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtpi16_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtpi32_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtpi32x2_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtpi8_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtps_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtps_pi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtps_pi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtpu16_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtpu8_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsi32_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsi64_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtss_f32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtss_si32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtss_si64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtt_ps2pi) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtt_ss2si) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvttss_si64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_div_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_div_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_extract_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(m_pextrw) SIMDE_TEST_FUNC_LIST_ENTRY(mm_insert_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(m_pinsrw) SIMDE_TEST_FUNC_LIST_ENTRY(mm_load_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_load_ps1) SIMDE_TEST_FUNC_LIST_ENTRY(mm_load_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadh_pi) SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadl_pi) SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadr_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadu_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskmove_si64) SIMDE_TEST_FUNC_LIST_ENTRY(m_maskmovq) SIMDE_TEST_FUNC_LIST_ENTRY(mm_max_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(m_pmaxsw) SIMDE_TEST_FUNC_LIST_ENTRY(mm_max_pu8) SIMDE_TEST_FUNC_LIST_ENTRY(m_pmaxub) SIMDE_TEST_FUNC_LIST_ENTRY(mm_max_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_max_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_min_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(m_pminsw) SIMDE_TEST_FUNC_LIST_ENTRY(mm_min_pu8) SIMDE_TEST_FUNC_LIST_ENTRY(m_pminub) SIMDE_TEST_FUNC_LIST_ENTRY(mm_min_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_min_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_move_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_movehl_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_movelh_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_movemask_pi8) SIMDE_TEST_FUNC_LIST_ENTRY(m_pmovmskb) SIMDE_TEST_FUNC_LIST_ENTRY(mm_movemask_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mul_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mul_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mulhi_pu16) SIMDE_TEST_FUNC_LIST_ENTRY(m_pmulhuw) SIMDE_TEST_FUNC_LIST_ENTRY(mm_or_ps) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_negate_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_rcp_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_rcp_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_rsqrt_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_rsqrt_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sad_pu8) SIMDE_TEST_FUNC_LIST_ENTRY(m_psadbw) SIMDE_TEST_FUNC_LIST_ENTRY(mm_shuffle_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(m_pshufw) SIMDE_TEST_FUNC_LIST_ENTRY(mm_shuffle_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sqrt_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sqrt_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_store_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_store_ps1) SIMDE_TEST_FUNC_LIST_ENTRY(mm_store_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_store1_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_storeh_pi) SIMDE_TEST_FUNC_LIST_ENTRY(mm_storel_pi) SIMDE_TEST_FUNC_LIST_ENTRY(mm_storer_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_storeu_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sub_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sub_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_ucomieq_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_ucomige_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_ucomigt_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_ucomile_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_ucomilt_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_ucomineq_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpackhi_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpacklo_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_undefined_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_xor_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_stream_pi) SIMDE_TEST_FUNC_LIST_ENTRY(mm_stream_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_prefetch) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/sse2.c000066400000000000000000022626371400333146700152700ustar00rootroot00000000000000/* Copyright (c) 2017, 2019 Evan Nemerson * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "test/test.h" #define SIMDE_TESTS_CURRENT_ISAX sse2 #include #include #if defined(HEDLEY_MSVC_VERSION) # pragma warning(disable:4324) #endif static int test_simde_x_mm_abs_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 147.28), SIMDE_FLOAT64_C( 704.65) }, { SIMDE_FLOAT64_C( 147.28), SIMDE_FLOAT64_C( 704.65) } }, { { SIMDE_FLOAT64_C( 136.85), SIMDE_FLOAT64_C( -756.74) }, { SIMDE_FLOAT64_C( 136.85), SIMDE_FLOAT64_C( 756.74) } }, { { SIMDE_FLOAT64_C( 178.63), SIMDE_FLOAT64_C( -900.20) }, { SIMDE_FLOAT64_C( 178.63), SIMDE_FLOAT64_C( 900.20) } }, { { SIMDE_FLOAT64_C( -651.54), SIMDE_FLOAT64_C( -517.72) }, { SIMDE_FLOAT64_C( 651.54), SIMDE_FLOAT64_C( 517.72) } }, { { SIMDE_FLOAT64_C( 75.39), SIMDE_FLOAT64_C( -705.91) }, { SIMDE_FLOAT64_C( 75.39), SIMDE_FLOAT64_C( 705.91) } }, { { SIMDE_FLOAT64_C( -738.47), SIMDE_FLOAT64_C( -668.92) }, { SIMDE_FLOAT64_C( 738.47), SIMDE_FLOAT64_C( 668.92) } }, { { SIMDE_FLOAT64_C( 212.72), SIMDE_FLOAT64_C( -499.79) }, { SIMDE_FLOAT64_C( 212.72), SIMDE_FLOAT64_C( 499.79) } }, { { SIMDE_FLOAT64_C( 481.67), SIMDE_FLOAT64_C( 233.48) }, { SIMDE_FLOAT64_C( 481.67), SIMDE_FLOAT64_C( 233.48) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d r = simde_x_mm_abs_pd(a); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_add_epi8 (SIMDE_MUNIT_TEST_ARGS) { struct { int8_t a[16]; int8_t b[16]; int8_t r[16]; } test_vec[] = { { { INT8_C( 90), INT8_C( 118), -INT8_C( 35), -INT8_C( 66), INT8_C( 97), INT8_C( 96), INT8_C( 20), -INT8_C( 105), INT8_C( 78), -INT8_C( 32), INT8_C( 110), -INT8_C( 33), -INT8_C( 12), INT8_C( 9), INT8_C( 119), -INT8_C( 73) }, { -INT8_C( 117), -INT8_C( 121), INT8_C( 108), -INT8_C( 124), -INT8_C( 90), INT8_C( 100), -INT8_C( 121), -INT8_C( 115), INT8_C( 60), INT8_C( 124), INT8_C( 32), -INT8_C( 86), INT8_C( 118), -INT8_C( 20), -INT8_C( 123), -INT8_C( 48) }, { -INT8_C( 27), -INT8_C( 3), INT8_C( 73), INT8_C( 66), INT8_C( 7), -INT8_C( 60), -INT8_C( 101), INT8_C( 36), -INT8_C( 118), INT8_C( 92), -INT8_C( 114), -INT8_C( 119), INT8_C( 106), -INT8_C( 11), -INT8_C( 4), -INT8_C( 121) } }, { { INT8_C( 99), INT8_C( 98), -INT8_C( 113), -INT8_C( 60), -INT8_C( 62), -INT8_C( 93), INT8_C( 92), INT8_C( 16), -INT8_C( 125), -INT8_C( 54), -INT8_C( 16), INT8_C( 119), -INT8_C( 45), INT8_C( 103), INT8_C( 47), INT8_C( 95) }, { -INT8_C( 18), -INT8_C( 101), -INT8_C( 29), -INT8_C( 108), -INT8_C( 1), INT8_C( 106), INT8_C( 34), INT8_C( 59), -INT8_C( 26), INT8_C( 66), -INT8_C( 27), INT8_C( 92), INT8_C( 47), INT8_C( 107), INT8_C( 44), -INT8_C( 110) }, { INT8_C( 81), -INT8_C( 3), INT8_C( 114), INT8_C( 88), -INT8_C( 63), INT8_C( 13), INT8_C( 126), INT8_C( 75), INT8_C( 105), INT8_C( 12), -INT8_C( 43), -INT8_C( 45), INT8_C( 2), -INT8_C( 46), INT8_C( 91), -INT8_C( 15) } }, { { -INT8_C( 51), -INT8_C( 69), INT8_C( 86), -INT8_C( 112), INT8_C( 94), -INT8_C( 78), -INT8_C( 96), -INT8_C( 31), INT8_C( 125), -INT8_C( 112), INT8_C( 89), INT8_C( 80), -INT8_C( 9), -INT8_C( 120), -INT8_C( 81), -INT8_C( 27) }, { INT8_C( 35), -INT8_C( 110), INT8_C( 122), INT8_C( 34), -INT8_C( 4), -INT8_C( 100), INT8_C( 94), -INT8_C( 30), -INT8_C( 34), INT8_C( 67), INT8_C( 62), INT8_C( 13), -INT8_C( 82), INT8_C( 107), -INT8_C( 97), INT8_C( 124) }, { -INT8_C( 16), INT8_C( 77), -INT8_C( 48), -INT8_C( 78), INT8_C( 90), INT8_C( 78), -INT8_C( 2), -INT8_C( 61), INT8_C( 91), -INT8_C( 45), -INT8_C( 105), INT8_C( 93), -INT8_C( 91), -INT8_C( 13), INT8_C( 78), INT8_C( 97) } }, { { INT8_C( 38), -INT8_C( 10), INT8_C( 12), -INT8_C( 123), -INT8_C( 88), -INT8_C( 84), INT8_C( 102), INT8_C( 37), INT8_C( 61), -INT8_C( 65), INT8_C( 118), INT8_C( 52), INT8_C( 71), INT8_C( 37), INT8_C( 26), INT8_C( 106) }, { -INT8_C( 72), -INT8_C( 108), -INT8_C( 115), -INT8_C( 76), INT8_C( 48), -INT8_C( 21), -INT8_C( 105), INT8_C( 14), INT8_C( 46), -INT8_C( 43), INT8_C( 28), -INT8_C( 35), INT8_C( 64), -INT8_C( 69), INT8_C( 89), INT8_C( 103) }, { -INT8_C( 34), -INT8_C( 118), -INT8_C( 103), INT8_C( 57), -INT8_C( 40), -INT8_C( 105), -INT8_C( 3), INT8_C( 51), INT8_C( 107), -INT8_C( 108), -INT8_C( 110), INT8_C( 17), -INT8_C( 121), -INT8_C( 32), INT8_C( 115), -INT8_C( 47) } }, { { -INT8_C( 79), INT8_C( 101), -INT8_C( 20), INT8_C( 90), INT8_C( 17), INT8_C( 82), INT8_MAX, INT8_C( 78), INT8_C( 18), -INT8_C( 11), -INT8_C( 125), INT8_C( 89), INT8_C( 27), -INT8_C( 99), -INT8_C( 60), -INT8_C( 45) }, { INT8_C( 49), INT8_C( 81), -INT8_C( 121), INT8_C( 97), INT8_C( 60), INT8_C( 30), INT8_C( 111), INT8_C( 106), -INT8_C( 12), -INT8_C( 117), INT8_C( 71), INT8_C( 52), INT8_C( 71), -INT8_C( 96), -INT8_C( 101), -INT8_C( 8) }, { -INT8_C( 30), -INT8_C( 74), INT8_C( 115), -INT8_C( 69), INT8_C( 77), INT8_C( 112), -INT8_C( 18), -INT8_C( 72), INT8_C( 6), INT8_MIN, -INT8_C( 54), -INT8_C( 115), INT8_C( 98), INT8_C( 61), INT8_C( 95), -INT8_C( 53) } }, { { INT8_C( 5), -INT8_C( 121), INT8_C( 82), INT8_C( 23), -INT8_C( 38), -INT8_C( 46), INT8_C( 101), -INT8_C( 20), -INT8_C( 57), -INT8_C( 24), INT8_C( 69), -INT8_C( 30), -INT8_C( 123), INT8_C( 9), -INT8_C( 75), -INT8_C( 74) }, { INT8_C( 90), INT8_C( 61), INT8_C( 23), -INT8_C( 106), INT8_C( 91), -INT8_C( 121), INT8_C( 1), INT8_C( 79), INT8_C( 18), INT8_C( 72), -INT8_C( 124), INT8_C( 89), -INT8_C( 23), INT8_C( 31), INT8_C( 82), -INT8_C( 18) }, { INT8_C( 95), -INT8_C( 60), INT8_C( 105), -INT8_C( 83), INT8_C( 53), INT8_C( 89), INT8_C( 102), INT8_C( 59), -INT8_C( 39), INT8_C( 48), -INT8_C( 55), INT8_C( 59), INT8_C( 110), INT8_C( 40), INT8_C( 7), -INT8_C( 92) } }, { { -INT8_C( 89), -INT8_C( 92), INT8_C( 5), -INT8_C( 127), INT8_C( 118), INT8_C( 107), INT8_C( 109), INT8_C( 62), INT8_C( 83), -INT8_C( 78), INT8_C( 32), -INT8_C( 39), -INT8_C( 68), -INT8_C( 42), -INT8_C( 113), INT8_C( 22) }, { INT8_C( 19), -INT8_C( 89), -INT8_C( 83), INT8_C( 110), INT8_C( 46), -INT8_C( 82), -INT8_C( 66), INT8_C( 64), -INT8_C( 10), INT8_C( 66), -INT8_C( 102), -INT8_C( 33), INT8_C( 97), -INT8_C( 20), -INT8_C( 50), INT8_C( 8) }, { -INT8_C( 70), INT8_C( 75), -INT8_C( 78), -INT8_C( 17), -INT8_C( 92), INT8_C( 25), INT8_C( 43), INT8_C( 126), INT8_C( 73), -INT8_C( 12), -INT8_C( 70), -INT8_C( 72), INT8_C( 29), -INT8_C( 62), INT8_C( 93), INT8_C( 30) } }, { { -INT8_C( 112), -INT8_C( 45), -INT8_C( 119), INT8_C( 7), INT8_C( 62), -INT8_C( 10), INT8_C( 69), -INT8_C( 110), -INT8_C( 87), INT8_C( 101), INT8_C( 107), INT8_C( 101), INT8_C( 59), -INT8_C( 6), INT8_C( 123), INT8_C( 78) }, { -INT8_C( 95), INT8_C( 40), -INT8_C( 67), -INT8_C( 49), -INT8_C( 42), INT8_C( 123), INT8_C( 16), -INT8_C( 51), -INT8_C( 67), -INT8_C( 86), -INT8_C( 84), INT8_C( 30), -INT8_C( 106), INT8_C( 122), INT8_C( 39), INT8_C( 38) }, { INT8_C( 49), -INT8_C( 5), INT8_C( 70), -INT8_C( 42), INT8_C( 20), INT8_C( 113), INT8_C( 85), INT8_C( 95), INT8_C( 102), INT8_C( 15), INT8_C( 23), -INT8_C( 125), -INT8_C( 47), INT8_C( 116), -INT8_C( 94), INT8_C( 116) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_add_epi8(a, b); simde_test_x86_assert_equal_i8x16(r, simde_x_mm_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm_add_epi16 (SIMDE_MUNIT_TEST_ARGS) { struct { int16_t a[8]; int16_t b[8]; int16_t r[8]; } test_vec[] = { { { -INT16_C( 4111), -INT16_C( 19283), INT16_C( 32346), INT16_C( 31529), INT16_C( 28909), -INT16_C( 11812), INT16_C( 20575), INT16_C( 29075) }, { -INT16_C( 31999), INT16_C( 11862), INT16_C( 4324), -INT16_C( 23595), INT16_C( 24767), -INT16_C( 10354), -INT16_C( 11824), -INT16_C( 16113) }, { INT16_C( 29426), -INT16_C( 7421), -INT16_C( 28866), INT16_C( 7934), -INT16_C( 11860), -INT16_C( 22166), INT16_C( 8751), INT16_C( 12962) } }, { { -INT16_C( 17215), INT16_C( 7029), -INT16_C( 24774), INT16_C( 10134), INT16_C( 29199), INT16_C( 28409), -INT16_C( 29502), -INT16_C( 15137) }, { INT16_C( 13584), -INT16_C( 2830), -INT16_C( 14522), INT16_C( 1431), INT16_C( 9512), -INT16_C( 1828), -INT16_C( 5129), -INT16_C( 18247) }, { -INT16_C( 3631), INT16_C( 4199), INT16_C( 26240), INT16_C( 11565), -INT16_C( 26825), INT16_C( 26581), INT16_C( 30905), INT16_C( 32152) } }, { { INT16_C( 11944), -INT16_C( 7469), INT16_C( 27085), -INT16_C( 9206), INT16_C( 987), -INT16_C( 25013), INT16_C( 10895), -INT16_C( 24734) }, { INT16_C( 21600), -INT16_C( 22892), INT16_C( 11036), INT16_C( 17579), -INT16_C( 30895), INT16_C( 18492), -INT16_C( 2701), INT16_C( 6912) }, { -INT16_C( 31992), -INT16_C( 30361), -INT16_C( 27415), INT16_C( 8373), -INT16_C( 29908), -INT16_C( 6521), INT16_C( 8194), -INT16_C( 17822) } }, { { -INT16_C( 11485), -INT16_C( 3587), INT16_C( 1852), INT16_C( 6093), INT16_C( 6154), -INT16_C( 25931), INT16_C( 5955), -INT16_C( 23751) }, { -INT16_C( 12948), -INT16_C( 30647), -INT16_C( 2823), INT16_C( 19148), INT16_C( 2171), -INT16_C( 4462), -INT16_C( 27907), INT16_C( 8201) }, { -INT16_C( 24433), INT16_C( 31302), -INT16_C( 971), INT16_C( 25241), INT16_C( 8325), -INT16_C( 30393), -INT16_C( 21952), -INT16_C( 15550) } }, { { INT16_C( 1893), -INT16_C( 24303), -INT16_C( 8434), INT16_C( 6584), INT16_C( 28407), INT16_C( 15027), -INT16_C( 4987), -INT16_C( 3619) }, { INT16_C( 9914), -INT16_C( 19591), INT16_C( 17690), -INT16_C( 26883), -INT16_C( 28851), INT16_C( 19076), -INT16_C( 29151), -INT16_C( 31125) }, { INT16_C( 11807), INT16_C( 21642), INT16_C( 9256), -INT16_C( 20299), -INT16_C( 444), -INT16_C( 31433), INT16_C( 31398), INT16_C( 30792) } }, { { INT16_C( 31893), -INT16_C( 23769), -INT16_C( 8357), INT16_C( 21436), INT16_C( 28493), -INT16_C( 11379), INT16_C( 27484), INT16_C( 5828) }, { INT16_C( 16017), -INT16_C( 21303), -INT16_C( 14717), -INT16_C( 11966), -INT16_C( 14763), INT16_C( 30235), -INT16_C( 31148), -INT16_C( 5636) }, { -INT16_C( 17626), INT16_C( 20464), -INT16_C( 23074), INT16_C( 9470), INT16_C( 13730), INT16_C( 18856), -INT16_C( 3664), INT16_C( 192) } }, { { INT16_C( 8963), INT16_C( 24205), INT16_C( 18690), INT16_C( 20657), INT16_C( 16313), INT16_C( 5411), -INT16_C( 6230), INT16_C( 15147) }, { -INT16_C( 3035), -INT16_C( 22041), INT16_C( 10682), INT16_C( 3962), -INT16_C( 27152), INT16_C( 17541), -INT16_C( 32484), INT16_C( 7982) }, { INT16_C( 5928), INT16_C( 2164), INT16_C( 29372), INT16_C( 24619), -INT16_C( 10839), INT16_C( 22952), INT16_C( 26822), INT16_C( 23129) } }, { { -INT16_C( 17500), -INT16_C( 22915), INT16_C( 12036), -INT16_C( 16906), INT16_C( 6510), INT16_C( 6354), -INT16_C( 767), INT16_C( 9811) }, { INT16_C( 15345), -INT16_C( 21553), INT16_C( 18788), INT16_C( 21690), INT16_C( 16351), -INT16_C( 1127), -INT16_C( 14400), INT16_C( 25626) }, { -INT16_C( 2155), INT16_C( 21068), INT16_C( 30824), INT16_C( 4784), INT16_C( 22861), INT16_C( 5227), -INT16_C( 15167), -INT16_C( 30099) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_add_epi16(a, b); simde_test_x86_assert_equal_i16x8(r, simde_x_mm_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm_add_epi32 (SIMDE_MUNIT_TEST_ARGS) { struct { int32_t a[4]; int32_t b[4]; int32_t r[4]; } test_vec[] = { { { INT32_C( 1587156417), INT32_C( 1768270179), -INT32_C( 1942404587), INT32_C( 346970517) }, { INT32_C( 2141391970), INT32_C( 1584534422), INT32_C( 1144809083), -INT32_C( 446909148) }, { -INT32_C( 566418909), -INT32_C( 942162695), -INT32_C( 797595504), -INT32_C( 99938631) } }, { { INT32_C( 776206027), -INT32_C( 1265129313), INT32_C( 2134954218), -INT32_C( 1953239511) }, { -INT32_C( 1861535750), -INT32_C( 974160566), INT32_C( 134884324), -INT32_C( 1393727775) }, { -INT32_C( 1085329723), INT32_C( 2055677417), -INT32_C( 2025128754), INT32_C( 948000010) } }, { { -INT32_C( 69586852), -INT32_C( 1011912232), INT32_C( 1782771777), -INT32_C( 638134562) }, { INT32_C( 1466564877), INT32_C( 1646090622), INT32_C( 1718232965), -INT32_C( 384673907) }, { INT32_C( 1396978025), INT32_C( 634178390), -INT32_C( 793962554), -INT32_C( 1022808469) } }, { { INT32_C( 1625615495), -INT32_C( 1641835683), INT32_C( 1644717443), INT32_C( 1211891259) }, { INT32_C( 2124457471), -INT32_C( 2082423298), INT32_C( 1911114724), INT32_C( 710605730) }, { -INT32_C( 544894330), INT32_C( 570708315), -INT32_C( 739135129), INT32_C( 1922496989) } }, { { INT32_C( 1149910759), INT32_C( 1440918993), INT32_C( 1320676114), -INT32_C( 375983383) }, { -INT32_C( 1788397929), -INT32_C( 686209037), INT32_C( 893911698), -INT32_C( 446717186) }, { -INT32_C( 638487170), INT32_C( 754709956), -INT32_C( 2080379484), -INT32_C( 822700569) } }, { { -INT32_C( 1305810464), -INT32_C( 1475933034), -INT32_C( 503922953), INT32_C( 1204456880) }, { -INT32_C( 1210306109), INT32_C( 193918328), -INT32_C( 163522568), INT32_C( 1524342649) }, { INT32_C( 1778850723), -INT32_C( 1282014706), -INT32_C( 667445521), -INT32_C( 1566167767) } }, { { INT32_C( 504104328), INT32_C( 163975954), -INT32_C( 2115322415), INT32_C( 231257162) }, { INT32_C( 1589945573), -INT32_C( 1838591078), -INT32_C( 1551324886), -INT32_C( 788700344) }, { INT32_C( 2094049901), -INT32_C( 1674615124), INT32_C( 628319995), -INT32_C( 557443182) } }, { { INT32_C( 2079197545), -INT32_C( 310070244), -INT32_C( 1150390415), INT32_C( 164181539) }, { INT32_C( 1969720795), INT32_C( 168284384), -INT32_C( 1045524615), INT32_C( 1536273394) }, { -INT32_C( 246048956), -INT32_C( 141785860), INT32_C( 2099052266), INT32_C( 1700454933) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_add_epi32(a, b); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm_add_epi64 (SIMDE_MUNIT_TEST_ARGS) { struct { int64_t a[2]; int64_t b[2]; int64_t r[2]; } test_vec[] = { { { -INT64_C( 6468439616558299793), INT64_C( 2325632228821341991) }, { -INT64_C( 612652056685655455), -INT64_C( 191691543793121214) }, { -INT64_C( 7081091673243955248), INT64_C( 2133940685028220777) } }, { { -INT64_C( 894566178211475330), INT64_C( 6756798005412736627) }, { -INT64_C( 3896691714656888127), INT64_C( 2845879868330258419) }, { -INT64_C( 4791257892868363457), -INT64_C( 8844066199966556570) } }, { { INT64_C( 7901755739001462504), INT64_C( 1347655258826955098) }, { INT64_C( 8953142355952099055), INT64_C( 248677757309780642) }, { -INT64_C( 1591845978755990057), INT64_C( 1596333016136735740) } }, { { -INT64_C( 8141839393087780454), -INT64_C( 2946030458831039558) }, { -INT64_C( 4972663281470790409), INT64_C( 1165720327465335311) }, { INT64_C( 5332241399150980753), -INT64_C( 1780310131365704247) } }, { { -INT64_C( 15861257455999742), INT64_C( 4357558393977351353) }, { INT64_C( 7214407425212598092), -INT64_C( 7045112387664469068) }, { INT64_C( 7198546167756598350), -INT64_C( 2687553993687117715) } }, { { INT64_C( 4532200698918854304), INT64_C( 7262715306804571977) }, { -INT64_C( 803639368974039520), -INT64_C( 4520672699422448119) }, { INT64_C( 3728561329944814784), INT64_C( 2742042607382123858) } }, { { -INT64_C( 73591731732932298), INT64_C( 6050399403914353275) }, { -INT64_C( 5903761005476331555), -INT64_C( 4762108524214604026) }, { -INT64_C( 5977352737209263853), INT64_C( 1288290879699749249) } }, { { -INT64_C( 7465715716457918288), INT64_C( 2653502295939739981) }, { INT64_C( 4698470722568297185), -INT64_C( 3402942170898265983) }, { -INT64_C( 2767244993889621103), -INT64_C( 749439874958526002) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_add_epi64(a, b); simde_test_x86_assert_equal_i64x2(r, simde_x_mm_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm_add_pd (SIMDE_MUNIT_TEST_ARGS) { struct { simde_float64 a[2]; simde_float64 b[2]; simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 755.33), SIMDE_FLOAT64_C( 721.25) }, { SIMDE_FLOAT64_C( 781.60), SIMDE_FLOAT64_C( -779.68) }, { SIMDE_FLOAT64_C( 1536.93), SIMDE_FLOAT64_C( -58.44) } }, { { SIMDE_FLOAT64_C( -566.45), SIMDE_FLOAT64_C( -614.54) }, { SIMDE_FLOAT64_C( 194.36), SIMDE_FLOAT64_C( -334.34) }, { SIMDE_FLOAT64_C( -372.09), SIMDE_FLOAT64_C( -948.88) } }, { { SIMDE_FLOAT64_C( 813.61), SIMDE_FLOAT64_C( -315.29) }, { SIMDE_FLOAT64_C( 361.18), SIMDE_FLOAT64_C( 614.31) }, { SIMDE_FLOAT64_C( 1174.78), SIMDE_FLOAT64_C( 299.02) } }, { { SIMDE_FLOAT64_C( 824.96), SIMDE_FLOAT64_C( -193.54) }, { SIMDE_FLOAT64_C( 701.59), SIMDE_FLOAT64_C( -521.55) }, { SIMDE_FLOAT64_C( 1526.55), SIMDE_FLOAT64_C( -715.09) } }, { { SIMDE_FLOAT64_C( -703.59), SIMDE_FLOAT64_C( 322.49) }, { SIMDE_FLOAT64_C( -26.00), SIMDE_FLOAT64_C( 910.61) }, { SIMDE_FLOAT64_C( -729.59), SIMDE_FLOAT64_C( 1233.10) } }, { { SIMDE_FLOAT64_C( -720.23), SIMDE_FLOAT64_C( 197.82) }, { SIMDE_FLOAT64_C( -770.39), SIMDE_FLOAT64_C( -888.99) }, { SIMDE_FLOAT64_C( -1490.62), SIMDE_FLOAT64_C( -691.16) } }, { { SIMDE_FLOAT64_C( 238.41), SIMDE_FLOAT64_C( -248.68) }, { SIMDE_FLOAT64_C( -805.44), SIMDE_FLOAT64_C( 805.25) }, { SIMDE_FLOAT64_C( -567.03), SIMDE_FLOAT64_C( 556.57) } }, { { SIMDE_FLOAT64_C( 13.85), SIMDE_FLOAT64_C( -859.57) }, { SIMDE_FLOAT64_C( 840.09), SIMDE_FLOAT64_C( -230.82) }, { SIMDE_FLOAT64_C( 853.93), SIMDE_FLOAT64_C( -1090.39) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d b = simde_mm_loadu_pd(test_vec[i].b); simde__m128d r = simde_mm_add_pd(a, b); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_add_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -348.09), SIMDE_FLOAT64_C( -603.87)), simde_mm_set_pd(SIMDE_FLOAT64_C( 42.81), SIMDE_FLOAT64_C( -955.64)), simde_mm_set_pd(SIMDE_FLOAT64_C( -348.09), SIMDE_FLOAT64_C(-1559.51)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 600.30), SIMDE_FLOAT64_C( 362.82)), simde_mm_set_pd(SIMDE_FLOAT64_C( -245.13), SIMDE_FLOAT64_C( -144.52)), simde_mm_set_pd(SIMDE_FLOAT64_C( 600.30), SIMDE_FLOAT64_C( 218.30)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -909.82), SIMDE_FLOAT64_C( -28.51)), simde_mm_set_pd(SIMDE_FLOAT64_C( -141.49), SIMDE_FLOAT64_C( 174.41)), simde_mm_set_pd(SIMDE_FLOAT64_C( -909.82), SIMDE_FLOAT64_C( 145.90)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -402.79), SIMDE_FLOAT64_C( -225.69)), simde_mm_set_pd(SIMDE_FLOAT64_C( -114.28), SIMDE_FLOAT64_C( 118.74)), simde_mm_set_pd(SIMDE_FLOAT64_C( -402.79), SIMDE_FLOAT64_C( -106.95)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 476.58), SIMDE_FLOAT64_C( 189.13)), simde_mm_set_pd(SIMDE_FLOAT64_C( 158.24), SIMDE_FLOAT64_C( 133.22)), simde_mm_set_pd(SIMDE_FLOAT64_C( 476.58), SIMDE_FLOAT64_C( 322.35)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -902.16), SIMDE_FLOAT64_C( -720.35)), simde_mm_set_pd(SIMDE_FLOAT64_C( -496.01), SIMDE_FLOAT64_C( 563.52)), simde_mm_set_pd(SIMDE_FLOAT64_C( -902.16), SIMDE_FLOAT64_C( -156.83)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 32.48), SIMDE_FLOAT64_C( -172.74)), simde_mm_set_pd(SIMDE_FLOAT64_C( 435.61), SIMDE_FLOAT64_C( 209.72)), simde_mm_set_pd(SIMDE_FLOAT64_C( 32.48), SIMDE_FLOAT64_C( 36.98)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 322.78), SIMDE_FLOAT64_C( -415.13)), simde_mm_set_pd(SIMDE_FLOAT64_C( -49.82), SIMDE_FLOAT64_C( -195.58)), simde_mm_set_pd(SIMDE_FLOAT64_C( 322.78), SIMDE_FLOAT64_C( -610.71)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_add_sd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_add_si64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_cvtsi64_m64(INT64_C( 793111073070173174)), simde_mm_cvtsi64_m64(INT64_C( 2108668061446341817)), simde_mm_cvtsi64_m64(INT64_C( 2901779134516514991)), }, { simde_mm_cvtsi64_m64(INT64_C( 8875506276833571865)), simde_mm_cvtsi64_m64(INT64_C(-8625831155966083456)), simde_mm_cvtsi64_m64(INT64_C( 249675120867488409)), }, { simde_mm_cvtsi64_m64(INT64_C( 2916092148231541839)), simde_mm_cvtsi64_m64(INT64_C( 7640479428881902755)), simde_mm_cvtsi64_m64(INT64_C(-7890172496596107022)), }, { simde_mm_cvtsi64_m64(INT64_C(-3448012693901819300)), simde_mm_cvtsi64_m64(INT64_C(-9198379985559078668)), simde_mm_cvtsi64_m64(INT64_C( 5800351394248653648)), }, { simde_mm_cvtsi64_m64(INT64_C( 3628113225825158935)), simde_mm_cvtsi64_m64(INT64_C(-1333669735654572042)), simde_mm_cvtsi64_m64(INT64_C( 2294443490170586893)), }, { simde_mm_cvtsi64_m64(INT64_C( 5048798289215441413)), simde_mm_cvtsi64_m64(INT64_C( -388036903570542302)), simde_mm_cvtsi64_m64(INT64_C( 4660761385644899111)), }, { simde_mm_cvtsi64_m64(INT64_C( 6446512717337269554)), simde_mm_cvtsi64_m64(INT64_C(-7669829270527021775)), simde_mm_cvtsi64_m64(INT64_C(-1223316553189752221)), }, { simde_mm_cvtsi64_m64(INT64_C( 6296531259101832881)), simde_mm_cvtsi64_m64(INT64_C( 5834912758815977701)), simde_mm_cvtsi64_m64(INT64_C(-6315300055791741034)), } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_add_si64(test_vec[i].a, test_vec[i].b); simde_assert_m64_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_adds_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( 100), INT8_C( 33), INT8_C( 67), INT8_C( 67), INT8_C( 104), INT8_C(-123), INT8_C( -86), INT8_C( 74), INT8_C( -93), INT8_C( -4), INT8_C( -12), INT8_C( 28), INT8_C( 9), INT8_C( 39), INT8_C( 83), INT8_C( -52)), simde_mm_set_epi8(INT8_C( 40), INT8_C( -64), INT8_C( -19), INT8_C( -17), INT8_C( 67), INT8_C( -93), INT8_C( -22), INT8_C( 98), INT8_C( -73), INT8_C( -83), INT8_C( 107), INT8_C( 95), INT8_C( 59), INT8_C( 84), INT8_C( -72), INT8_C(-115)), simde_mm_set_epi8(INT8_C( 127), INT8_C( -31), INT8_C( 48), INT8_C( 50), INT8_C( 127), INT8_C(-128), INT8_C(-108), INT8_C( 127), INT8_C(-128), INT8_C( -87), INT8_C( 95), INT8_C( 123), INT8_C( 68), INT8_C( 123), INT8_C( 11), INT8_C(-128)) }, { simde_mm_set_epi8(INT8_C( 76), INT8_C( 121), INT8_C( 98), INT8_C( 52), INT8_C( 50), INT8_C( -16), INT8_C( 53), INT8_C( 3), INT8_C( -57), INT8_C( -76), INT8_C( -42), INT8_C( 70), INT8_C(-122), INT8_C( 71), INT8_C( -56), INT8_C( -15)), simde_mm_set_epi8(INT8_C( 100), INT8_C( 124), INT8_C( 99), INT8_C( 11), INT8_C( -8), INT8_C( 5), INT8_C( 6), INT8_C( -54), INT8_C( 42), INT8_C( -99), INT8_C( 23), INT8_C(-128), INT8_C( 77), INT8_C( 14), INT8_C( 94), INT8_C( 53)), simde_mm_set_epi8(INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 63), INT8_C( 42), INT8_C( -11), INT8_C( 59), INT8_C( -51), INT8_C( -15), INT8_C(-128), INT8_C( -19), INT8_C( -58), INT8_C( -45), INT8_C( 85), INT8_C( 38), INT8_C( 38)) }, { simde_mm_set_epi8(INT8_C( -13), INT8_C( 29), INT8_C( 30), INT8_C( 13), INT8_C( 51), INT8_C( 11), INT8_C( -27), INT8_C( -12), INT8_C( 97), INT8_C( 87), INT8_C( 67), INT8_C( 70), INT8_C( 2), INT8_C( -40), INT8_C( 49), INT8_C( 116)), simde_mm_set_epi8(INT8_C( 42), INT8_C( -17), INT8_C( -77), INT8_C( 126), INT8_C(-125), INT8_C( -42), INT8_C( 45), INT8_C( -79), INT8_C( -23), INT8_C( 110), INT8_C( 117), INT8_C( -44), INT8_C( -92), INT8_C( -20), INT8_C(-121), INT8_C( 102)), simde_mm_set_epi8(INT8_C( 29), INT8_C( 12), INT8_C( -47), INT8_C( 127), INT8_C( -74), INT8_C( -31), INT8_C( 18), INT8_C( -91), INT8_C( 74), INT8_C( 127), INT8_C( 127), INT8_C( 26), INT8_C( -90), INT8_C( -60), INT8_C( -72), INT8_C( 127)) }, { simde_mm_set_epi8(INT8_C( 55), INT8_C(-106), INT8_C( -49), INT8_C( -49), INT8_C( -85), INT8_C( -58), INT8_C( -56), INT8_C( -25), INT8_C( 78), INT8_C( 18), INT8_C( 71), INT8_C( -12), INT8_C( 86), INT8_C( -84), INT8_C( -77), INT8_C(-116)), simde_mm_set_epi8(INT8_C(-103), INT8_C( 107), INT8_C( 33), INT8_C( -17), INT8_C( 106), INT8_C( 4), INT8_C( -98), INT8_C(-128), INT8_C( 53), INT8_C( 4), INT8_C( 120), INT8_C( -44), INT8_C( -99), INT8_C( 120), INT8_C( -27), INT8_C( 45)), simde_mm_set_epi8(INT8_C( -48), INT8_C( 1), INT8_C( -16), INT8_C( -66), INT8_C( 21), INT8_C( -54), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 22), INT8_C( 127), INT8_C( -56), INT8_C( -13), INT8_C( 36), INT8_C(-104), INT8_C( -71)) }, { simde_mm_set_epi8(INT8_C( 47), INT8_C( 15), INT8_C( 126), INT8_C(-115), INT8_C( -77), INT8_C( -27), INT8_C( -38), INT8_C( 32), INT8_C( -21), INT8_C( -80), INT8_C( 112), INT8_C( 75), INT8_C( -15), INT8_C( -92), INT8_C( 43), INT8_C( -22)), simde_mm_set_epi8(INT8_C( -33), INT8_C( 127), INT8_C( 123), INT8_C( 65), INT8_C( 63), INT8_C( 85), INT8_C( 75), INT8_C( 99), INT8_C( -2), INT8_C( 13), INT8_C( -46), INT8_C( -8), INT8_C( 127), INT8_C(-115), INT8_C(-109), INT8_C( 14)), simde_mm_set_epi8(INT8_C( 14), INT8_C( 127), INT8_C( 127), INT8_C( -50), INT8_C( -14), INT8_C( 58), INT8_C( 37), INT8_C( 127), INT8_C( -23), INT8_C( -67), INT8_C( 66), INT8_C( 67), INT8_C( 112), INT8_C(-128), INT8_C( -66), INT8_C( -8)) }, { simde_mm_set_epi8(INT8_C( 18), INT8_C( 75), INT8_C( 10), INT8_C( 29), INT8_C( 27), INT8_C( 101), INT8_C( -1), INT8_C( 78), INT8_C( -78), INT8_C( 110), INT8_C( 18), INT8_C( 82), INT8_C( -41), INT8_C( 85), INT8_C(-113), INT8_C( 126)), simde_mm_set_epi8(INT8_C( -90), INT8_C( 80), INT8_C(-103), INT8_C(-111), INT8_C( 86), INT8_C( 65), INT8_C( 89), INT8_C( 88), INT8_C( -83), INT8_C(-121), INT8_C( -2), INT8_C( 40), INT8_C( -96), INT8_C( -36), INT8_C( 64), INT8_C( -15)), simde_mm_set_epi8(INT8_C( -72), INT8_C( 127), INT8_C( -93), INT8_C( -82), INT8_C( 113), INT8_C( 127), INT8_C( 88), INT8_C( 127), INT8_C(-128), INT8_C( -11), INT8_C( 16), INT8_C( 122), INT8_C(-128), INT8_C( 49), INT8_C( -49), INT8_C( 111)) }, { simde_mm_set_epi8(INT8_C( -90), INT8_C( 48), INT8_C( -43), INT8_C( 22), INT8_C( 78), INT8_C( -17), INT8_C( -78), INT8_C( -64), INT8_C( -97), INT8_C( -80), INT8_C( -51), INT8_C( 72), INT8_C( 114), INT8_C( -11), INT8_C( -89), INT8_C( -93)), simde_mm_set_epi8(INT8_C( 8), INT8_C( 57), INT8_C( 66), INT8_C(-119), INT8_C( 79), INT8_C( -29), INT8_C( -49), INT8_C( 26), INT8_C( -12), INT8_C( -99), INT8_C(-101), INT8_C( 121), INT8_C(-112), INT8_C( -5), INT8_C( -19), INT8_C( -27)), simde_mm_set_epi8(INT8_C( -82), INT8_C( 105), INT8_C( 23), INT8_C( -97), INT8_C( 127), INT8_C( -46), INT8_C(-127), INT8_C( -38), INT8_C(-109), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 2), INT8_C( -16), INT8_C(-108), INT8_C(-120)) }, { simde_mm_set_epi8(INT8_C( 26), INT8_C( -15), INT8_C( 12), INT8_C( -66), INT8_C( -7), INT8_C(-115), INT8_C( -21), INT8_C( 27), INT8_C( 111), INT8_C(-126), INT8_C( -43), INT8_C( -94), INT8_C( -97), INT8_C( -34), INT8_C( -47), INT8_C( -79)), simde_mm_set_epi8(INT8_C(-124), INT8_C( -47), INT8_C(-123), INT8_C(-115), INT8_C( -15), INT8_C( -87), INT8_C(-121), INT8_C( -50), INT8_C( 103), INT8_C( 85), INT8_C( 34), INT8_C( -85), INT8_C(-124), INT8_C( 70), INT8_C( 14), INT8_C( -44)), simde_mm_set_epi8(INT8_C( -98), INT8_C( -62), INT8_C(-111), INT8_C(-128), INT8_C( -22), INT8_C(-128), INT8_C(-128), INT8_C( -23), INT8_C( 127), INT8_C( -41), INT8_C( -9), INT8_C(-128), INT8_C(-128), INT8_C( 36), INT8_C( -33), INT8_C(-123)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_adds_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_adds_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C( -9187), INT16_C( 11150), INT16_C(-25711), INT16_C( 30917), INT16_C( 5637), INT16_C( 27391), INT16_C( 20667), INT16_C(-25552)), simde_mm_set_epi16(INT16_C( 15244), INT16_C(-26000), INT16_C(-24422), INT16_C(-28473), INT16_C( -7393), INT16_C( 32134), INT16_C( -161), INT16_C( -2948)), simde_mm_set_epi16(INT16_C( 6057), INT16_C(-14850), INT16_C(-32768), INT16_C( 2444), INT16_C( -1756), INT16_C( 32767), INT16_C( 20506), INT16_C(-28500)) }, { simde_mm_set_epi16(INT16_C(-27976), INT16_C( 8581), INT16_C( 17714), INT16_C(-15964), INT16_C(-24791), INT16_C( 29014), INT16_C( -8950), INT16_C(-19859)), simde_mm_set_epi16(INT16_C(-20491), INT16_C(-23795), INT16_C( 5770), INT16_C(-28365), INT16_C( -4266), INT16_C(-14588), INT16_C( 21498), INT16_C( 13063)), simde_mm_set_epi16(INT16_C(-32768), INT16_C(-15214), INT16_C( 23484), INT16_C(-32768), INT16_C(-29057), INT16_C( 14426), INT16_C( 12548), INT16_C( -6796)) }, { simde_mm_set_epi16(INT16_C(-24285), INT16_C(-16974), INT16_C( 21513), INT16_C( 30869), INT16_C(-30698), INT16_C( 2555), INT16_C(-20742), INT16_C(-26329)), simde_mm_set_epi16(INT16_C( 19660), INT16_C(-27596), INT16_C( 16650), INT16_C( 30694), INT16_C( 14408), INT16_C( 7632), INT16_C( 15232), INT16_C( -7024)), simde_mm_set_epi16(INT16_C( -4625), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C(-16290), INT16_C( 10187), INT16_C( -5510), INT16_C(-32768)) }, { simde_mm_set_epi16(INT16_C( -6143), INT16_C( 5190), INT16_C( -240), INT16_C(-14301), INT16_C( 12856), INT16_C( 32740), INT16_C(-13308), INT16_C( 31639)), simde_mm_set_epi16(INT16_C( 21047), INT16_C(-20544), INT16_C(-28076), INT16_C(-30442), INT16_C( 28180), INT16_C(-18015), INT16_C( 12870), INT16_C( 12342)), simde_mm_set_epi16(INT16_C( 14904), INT16_C(-15354), INT16_C(-28316), INT16_C(-32768), INT16_C( 32767), INT16_C( 14725), INT16_C( -438), INT16_C( 32767)) }, { simde_mm_set_epi16(INT16_C( 21004), INT16_C( 26590), INT16_C( -387), INT16_C( 5458), INT16_C( 28558), INT16_C( -1691), INT16_C( 13843), INT16_C( -2265)), simde_mm_set_epi16(INT16_C( 24548), INT16_C(-19288), INT16_C( 1056), INT16_C( 5037), INT16_C( 9790), INT16_C( 12391), INT16_C( -2983), INT16_C( 8158)), simde_mm_set_epi16(INT16_C( 32767), INT16_C( 7302), INT16_C( 669), INT16_C( 10495), INT16_C( 32767), INT16_C( 10700), INT16_C( 10860), INT16_C( 5893)) }, { simde_mm_set_epi16(INT16_C( 23035), INT16_C( 14493), INT16_C( 11060), INT16_C(-15265), INT16_C(-25751), INT16_C(-17380), INT16_C(-20209), INT16_C(-22539)), simde_mm_set_epi16(INT16_C(-10338), INT16_C( 26220), INT16_C( -6324), INT16_C( 16083), INT16_C(-20758), INT16_C( 28594), INT16_C(-27719), INT16_C(-21423)), simde_mm_set_epi16(INT16_C( 12697), INT16_C( 32767), INT16_C( 4736), INT16_C( 818), INT16_C(-32768), INT16_C( 11214), INT16_C(-32768), INT16_C(-32768)) }, { simde_mm_set_epi16(INT16_C( 1437), INT16_C( -1148), INT16_C( -7704), INT16_C( -3845), INT16_C( 5523), INT16_C( 32157), INT16_C( -3057), INT16_C( -2194)), simde_mm_set_epi16(INT16_C( 20255), INT16_C( 16313), INT16_C( 26265), INT16_C( -5377), INT16_C( 31904), INT16_C( 3795), INT16_C( 20716), INT16_C(-30035)), simde_mm_set_epi16(INT16_C( 21692), INT16_C( 15165), INT16_C( 18561), INT16_C( -9222), INT16_C( 32767), INT16_C( 32767), INT16_C( 17659), INT16_C(-32229)) }, { simde_mm_set_epi16(INT16_C( 856), INT16_C( 13772), INT16_C(-17603), INT16_C(-26424), INT16_C( 9957), INT16_C(-11801), INT16_C( 3067), INT16_C(-26950)), simde_mm_set_epi16(INT16_C(-26495), INT16_C(-22337), INT16_C(-30714), INT16_C( 24988), INT16_C(-24287), INT16_C( 11170), INT16_C(-20015), INT16_C( 26834)), simde_mm_set_epi16(INT16_C(-25639), INT16_C( -8565), INT16_C(-32768), INT16_C( -1436), INT16_C(-14330), INT16_C( -631), INT16_C(-16948), INT16_C( -116)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_adds_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_adds_epu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu8(243, 185, 175, 84, 114, 173, 219, 130, 80, 12, 6, 121, 58, 223, 94, 203), simde_x_mm_set_epu8( 46, 142, 32, 64, 239, 92, 213, 158, 92, 20, 62, 216, 2, 162, 3, 226), simde_x_mm_set_epu8(255, 255, 207, 148, 255, 255, 255, 255, 172, 32, 68, 255, 60, 255, 97, 255) }, { simde_x_mm_set_epu8(200, 115, 63, 101, 233, 139, 164, 230, 4, 147, 7, 233, 110, 206, 178, 233), simde_x_mm_set_epu8( 87, 74, 19, 102, 136, 119, 164, 198, 113, 170, 154, 7, 191, 195, 220, 182), simde_x_mm_set_epu8(255, 189, 82, 203, 255, 255, 255, 255, 117, 255, 161, 240, 255, 255, 255, 255) }, { simde_x_mm_set_epu8( 35, 38, 142, 165, 104, 97, 151, 1, 79, 16, 160, 140, 19, 109, 210, 120), simde_x_mm_set_epu8( 2, 202, 138, 112, 199, 233, 201, 65, 233, 49, 101, 216, 62, 35, 235, 214), simde_x_mm_set_epu8( 37, 240, 255, 255, 255, 255, 255, 66, 255, 65, 255, 255, 81, 144, 255, 255) }, { simde_x_mm_set_epu8( 98, 74, 253, 101, 187, 74, 205, 52, 154, 226, 198, 148, 241, 174, 125, 62), simde_x_mm_set_epu8(163, 110, 1, 166, 233, 185, 220, 101, 190, 92, 121, 253, 238, 73, 61, 34), simde_x_mm_set_epu8(255, 184, 254, 255, 255, 255, 255, 153, 255, 255, 255, 255, 255, 247, 186, 96) }, { simde_x_mm_set_epu8( 91, 28, 52, 18, 175, 61, 49, 67, 76, 39, 238, 247, 137, 91, 133, 4), simde_x_mm_set_epu8(142, 255, 123, 14, 70, 48, 62, 186, 134, 31, 154, 34, 3, 30, 40, 184), simde_x_mm_set_epu8(233, 255, 175, 32, 245, 109, 111, 253, 210, 70, 255, 255, 140, 121, 173, 188) }, { simde_x_mm_set_epu8( 32, 230, 94, 17, 123, 186, 43, 67, 13, 45, 219, 214, 133, 19, 25, 150), simde_x_mm_set_epu8(114, 27, 244, 244, 84, 0, 108, 198, 239, 228, 225, 158, 4, 27, 84, 116), simde_x_mm_set_epu8(146, 255, 255, 255, 207, 186, 151, 255, 252, 255, 255, 255, 137, 46, 109, 255) }, { simde_x_mm_set_epu8( 66, 152, 8, 32, 7, 222, 46, 10, 116, 185, 69, 186, 194, 134, 55, 214), simde_x_mm_set_epu8(185, 11, 114, 201, 179, 122, 77, 244, 221, 175, 219, 12, 207, 104, 91, 252), simde_x_mm_set_epu8(251, 163, 122, 233, 186, 255, 123, 254, 255, 255, 255, 198, 255, 238, 146, 255) }, { simde_x_mm_set_epu8(149, 71, 22, 119, 62, 37, 103, 26, 193, 60, 234, 165, 97, 233, 187, 76), simde_x_mm_set_epu8(169, 9, 188, 18, 251, 187, 96, 167, 158, 238, 176, 160, 74, 18, 253, 103), simde_x_mm_set_epu8(255, 80, 210, 137, 255, 224, 199, 193, 255, 255, 255, 255, 171, 251, 255, 179) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_adds_epu8(test_vec[i].a, test_vec[i].b); simde_assert_m128i_u8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_adds_epu16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu16(52397, 2628, 46614, 35162, 54536, 36456, 24004, 11160), simde_x_mm_set_epu16(41921, 12035, 29903, 58497, 1695, 15558, 14248, 61659), simde_x_mm_set_epu16(65535, 14663, 65535, 65535, 56231, 52014, 38252, 65535) }, { simde_x_mm_set_epu16(57345, 6650, 8556, 25986, 61163, 19076, 40550, 40920), simde_x_mm_set_epu16(62607, 15369, 35325, 28241, 54252, 5722, 23748, 36984), simde_x_mm_set_epu16(65535, 22019, 43881, 54227, 65535, 24798, 64298, 65535) }, { simde_x_mm_set_epu16(19370, 64323, 5781, 65431, 30915, 24348, 65190, 30074), simde_x_mm_set_epu16(34245, 57703, 60540, 40683, 24154, 18750, 32124, 33828), simde_x_mm_set_epu16(53615, 65535, 65535, 65535, 55069, 43098, 65535, 63902) }, { simde_x_mm_set_epu16( 1083, 62410, 53296, 45, 57969, 54778, 42038, 36216), simde_x_mm_set_epu16(47446, 36131, 44258, 13796, 53696, 55457, 27279, 19924), simde_x_mm_set_epu16(48529, 65535, 65535, 13841, 65535, 65535, 65535, 56140) }, { simde_x_mm_set_epu16(53022, 40173, 23284, 53830, 27939, 30100, 61471, 602), simde_x_mm_set_epu16(42952, 36449, 22644, 6670, 537, 5689, 73, 2247), simde_x_mm_set_epu16(65535, 65535, 45928, 60500, 28476, 35789, 61544, 2849) }, { simde_x_mm_set_epu16( 8441, 24815, 22801, 35056, 30653, 5655, 39135, 32848), simde_x_mm_set_epu16( 7115, 32196, 31449, 51212, 54481, 9348, 63499, 54202), simde_x_mm_set_epu16(15556, 57011, 54250, 65535, 65535, 15003, 65535, 65535) }, { simde_x_mm_set_epu16( 5059, 20924, 5143, 29698, 39512, 42596, 50907, 48157), simde_x_mm_set_epu16(55259, 30633, 10948, 60956, 47288, 59136, 49334, 11432), simde_x_mm_set_epu16(60318, 51557, 16091, 65535, 65535, 65535, 65535, 59589) }, { simde_x_mm_set_epu16(53397, 1584, 56368, 64962, 35166, 11367, 24855, 22370), simde_x_mm_set_epu16( 5862, 9719, 15493, 14762, 25151, 48370, 30737, 29969), simde_x_mm_set_epu16(59259, 11303, 65535, 65535, 60317, 59737, 55592, 52339) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_adds_epu16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_u16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_and_pd(SIMDE_MUNIT_TEST_ARGS) { simde__m128d all_set = simde_x_mm_setone_pd(), all_unset = simde_mm_setzero_pd(); simde_assert_m128d_equal(simde_mm_and_pd(all_set, all_unset), all_unset); simde_assert_m128d_equal(simde_mm_and_pd(all_set, all_set), all_set); simde_assert_m128d_equal(simde_mm_and_pd(all_unset, all_unset), all_unset); return 0; } static int test_simde_mm_and_si128(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 1143386005), INT32_C( 255040004), INT32_C( 778825143), INT32_C( 1160880262)), simde_mm_set_epi32(INT32_C(-1598010564), INT32_C( 882577136), INT32_C( 1895747884), INT32_C( 78458499)), simde_mm_set_epi32(INT32_C( 2836), INT32_C( 68356608), INT32_C( 543867172), INT32_C( 69279874)) }, { simde_mm_set_epi32(INT32_C( 929630839), INT32_C( 1332223012), INT32_C( -595247247), INT32_C( 1607004091)), simde_mm_set_epi32(INT32_C( -96984995), INT32_C( -496201158), INT32_C( 1667897198), INT32_C( 329068048)), simde_mm_set_epi32(INT32_C( 841482325), INT32_C( 1114116128), INT32_C( 1073747808), INT32_C( 327691792)) }, { simde_mm_set_epi32(INT32_C( 1507410371), INT32_C(-1202228125), INT32_C( 213174798), INT32_C( 1712466479)), simde_mm_set_epi32(INT32_C( -416935364), INT32_C( 76821686), INT32_C( -895281725), INT32_C( 640856929)), simde_mm_set_epi32(INT32_C( 1090519040), INT32_C( 1324066), INT32_C( 144703490), INT32_C( 638722593)) }, { simde_mm_set_epi32(INT32_C(-1967400648), INT32_C( -398277023), INT32_C( 1276094966), INT32_C(-1580835262)), simde_mm_set_epi32(INT32_C( 185492863), INT32_C( 1265367516), INT32_C( -384438464), INT32_C( 1008626379)), simde_mm_set_epi32(INT32_C( 168444216), INT32_C( 1212334144), INT32_C( 1208330560), INT32_C( 537283138)) }, { simde_mm_set_epi32(INT32_C( 1287640091), INT32_C( -654000828), INT32_C( 597524546), INT32_C( 182360913)), simde_mm_set_epi32(INT32_C( 1236330411), INT32_C( 1010510657), INT32_C(-1874705697), INT32_C( -544222805)), simde_mm_set_epi32(INT32_C( 1219544075), INT32_C( 402663744), INT32_C( 66), INT32_C( 177115393)) }, { simde_mm_set_epi32(INT32_C( -783740762), INT32_C( 1592969400), INT32_C(-1896275639), INT32_C( 1398555518)), simde_mm_set_epi32(INT32_C( 618146080), INT32_C( -972493969), INT32_C( -440292799), INT32_C( 888342397)), simde_mm_set_epi32(INT32_C( 4718624), INT32_C( 1174456360), INT32_C(-2067718079), INT32_C( 273679228)) }, { simde_mm_set_epi32(INT32_C( 975551520), INT32_C( 223749592), INT32_C(-1022254731), INT32_C( -845311996)), simde_mm_set_epi32(INT32_C( 1522650069), INT32_C( 1767255815), INT32_C( 1217271913), INT32_C(-1365644996)), simde_mm_set_epi32(INT32_C( 436307968), INT32_C( 156640512), INT32_C( 1073741921), INT32_C(-1936097276)) }, { simde_mm_set_epi32(INT32_C(-1607852092), INT32_C( -146112938), INT32_C( 112326370), INT32_C( 971940993)), simde_mm_set_epi32(INT32_C( 1129446249), INT32_C( -367605030), INT32_C( 2031327443), INT32_C( -763011289)), simde_mm_set_epi32(INT32_C( 6976), INT32_C( -503166382), INT32_C( 1151170), INT32_C( 277087233)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_and_si128(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_andnot_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t a[2]; const int64_t b[2]; const int64_t r[2]; } test_vec[] = { { { -INT64_C( 2301068032488183266), INT64_C( 2211418302004999322) }, { -INT64_C( 6496218963460796338), -INT64_C( 1823812953617724359) }, { INT64_C( 416592882749870144), -INT64_C( 2305839985119459295) } }, { { -INT64_C( 4026537826130773906), -INT64_C( 4293586900818793454) }, { INT64_C( 3627658264586431853), INT64_C( 2793073689318142995) }, { INT64_C( 3620897194948822273), INT64_C( 2486233871494942721) } }, { { INT64_C( 9163842748139474741), INT64_C( 2569644122047224175) }, { -INT64_C( 969348282954885022), INT64_C( 811088657167341923) }, { -INT64_C( 9187294233813168062), INT64_C( 594616163615653888) } }, { { -INT64_C( 7634837049602759393), INT64_C( 7858258033422095925) }, { INT64_C( 8560872385946379772), INT64_C( 5408772763975523373) }, { INT64_C( 6972768561940938976), INT64_C( 144627715121612808) } }, { { INT64_C( 5707717806252392055), -INT64_C( 3300212919446621766) }, { INT64_C( 9059126230790306606), -INT64_C( 7813869476910184169) }, { INT64_C( 3497071796361199880), INT64_C( 111611459737241605) } }, { { INT64_C( 322122041068250894), INT64_C( 3008683809568371225) }, { INT64_C( 127266174305791736), -INT64_C( 2153613612017236628) }, { INT64_C( 109216282184321264), -INT64_C( 4459687314800229020) } }, { { -INT64_C( 3492115216109711814), INT64_C( 5213618401531810613) }, { INT64_C( 6185505972225623532), INT64_C( 2977154160444382105) }, { INT64_C( 1177221022203544004), INT64_C( 2378043193183766664) } }, { { INT64_C( 1499198256367688520), -INT64_C( 638210378185732981) }, { -INT64_C( 6727634533471112091), -INT64_C( 4478722654180704065) }, { -INT64_C( 6764261500459572187), INT64_C( 60904633988546612) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_castpd_si128(simde_mm_andnot_pd(simde_mm_castsi128_pd(a), simde_mm_castsi128_pd(b))); simde_test_x86_assert_equal_i64x2(r, simde_x_mm_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm_andnot_si128(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( -560808079), INT32_C( -229809400), INT32_C(-1262424809), INT32_C( 39478984)), simde_mm_set_epi32(INT32_C(-2116907800), INT32_C( -478633369), INT32_C( -758500702), INT32_C( 774827765)), simde_mm_set_epi32(INT32_C( 20971656), INT32_C( 19955815), INT32_C( 1107956384), INT32_C( 740591669)) }, { simde_mm_set_epi32(INT32_C( -390101732), INT32_C( -878593643), INT32_C( -87858932), INT32_C( 1872700566)), simde_mm_set_epi32(INT32_C( 794830631), INT32_C( 1201718915), INT32_C( 1477008088), INT32_C( -178127418)), simde_mm_set_epi32(INT32_C( 121643555), INT32_C( 67126786), INT32_C( 531152), INT32_C(-1872701120)) }, { simde_mm_set_epi32(INT32_C( -969785513), INT32_C( 743154241), INT32_C( -944974936), INT32_C(-1136592248)), simde_mm_set_epi32(INT32_C( -909998602), INT32_C( 431643866), INT32_C( -708589890), INT32_C( -556429363)), simde_mm_set_epi32(INT32_C( 163610784), INT32_C( 296770714), INT32_C( 272827414), INT32_C( 1117062469)) }, { simde_mm_set_epi32(INT32_C( 1619650408), INT32_C( 861525694), INT32_C(-2058207417), INT32_C( 228720218)), simde_mm_set_epi32(INT32_C( 1416821078), INT32_C( 2107001565), INT32_C(-1248448269), INT32_C(-1204471361)), simde_mm_set_epi32(INT32_C( 343021590), INT32_C( 1283852353), INT32_C( 813957296), INT32_C(-1340866139)) }, { simde_mm_set_epi32(INT32_C( -343490394), INT32_C( 1846187115), INT32_C( -847771260), INT32_C( 97935165)), simde_mm_set_epi32(INT32_C( -69489865), INT32_C(-1109591795), INT32_C( 169478308), INT32_C( 1662522631)), simde_mm_set_epi32(INT32_C( 274279185), INT32_C(-1848350460), INT32_C( 33685536), INT32_C( 1644691458)) }, { simde_mm_set_epi32(INT32_C( 608096731), INT32_C( -775399847), INT32_C( -52780990), INT32_C( 459462722)), simde_mm_set_epi32(INT32_C(-1928888486), INT32_C(-1926941714), INT32_C(-1218438233), INT32_C( 195273416)), simde_mm_set_epi32(INT32_C(-1996411392), INT32_C( 203760038), INT32_C( 52435877), INT32_C( 8462984)) }, { simde_mm_set_epi32(INT32_C( 1829801526), INT32_C( 1678890728), INT32_C(-1629742565), INT32_C( 902941266)), simde_mm_set_epi32(INT32_C( 110066513), INT32_C( -591553870), INT32_C( -950259417), INT32_C( 810403185)), simde_mm_set_epi32(INT32_C( 42955073), INT32_C(-1733556206), INT32_C( 1090527524), INT32_C( 787745)) }, { simde_mm_set_epi32(INT32_C( 321441431), INT32_C(-1200267660), INT32_C( -313751420), INT32_C( 515761953)), simde_mm_set_epi32(INT32_C( -687838781), INT32_C( 1420638186), INT32_C(-1442242179), INT32_C( 1996838037)), simde_mm_set_epi32(INT32_C(-1006624448), INT32_C( 1149772170), INT32_C( 33628537), INT32_C( 1627394196)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_andnot_si128(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_avg_epu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu8( 22, 218, 216, 66, 82, 156, 47, 52, 255, 1, 136, 174, 147, 136, 106, 178), simde_x_mm_set_epu8( 66, 241, 223, 129, 96, 67, 0, 68, 242, 71, 233, 224, 227, 252, 191, 92), simde_x_mm_set_epu8( 44, 230, 220, 98, 89, 112, 24, 60, 249, 36, 185, 199, 187, 194, 149, 135) }, { simde_x_mm_set_epu8(219, 214, 26, 72, 63, 56, 200, 118, 196, 107, 88, 110, 187, 3, 64, 214), simde_x_mm_set_epu8( 44, 175, 103, 82, 87, 192, 180, 37, 0, 200, 53, 214, 25, 17, 19, 149), simde_x_mm_set_epu8(132, 195, 65, 77, 75, 124, 190, 78, 98, 154, 71, 162, 106, 10, 42, 182) }, { simde_x_mm_set_epu8(221, 9, 162, 208, 84, 84, 50, 140, 230, 69, 178, 12, 34, 173, 44, 58), simde_x_mm_set_epu8( 4, 110, 65, 218, 252, 108, 241, 136, 36, 109, 68, 2, 121, 10, 120, 101), simde_x_mm_set_epu8(113, 60, 114, 213, 168, 96, 146, 138, 133, 89, 123, 7, 78, 92, 82, 80) }, { simde_x_mm_set_epu8(173, 38, 26, 251, 66, 136, 168, 132, 170, 244, 145, 27, 76, 168, 97, 129), simde_x_mm_set_epu8(211, 66, 29, 93, 231, 30, 149, 218, 72, 12, 231, 238, 124, 3, 127, 55), simde_x_mm_set_epu8(192, 52, 28, 172, 149, 83, 159, 175, 121, 128, 188, 133, 100, 86, 112, 92) }, { simde_x_mm_set_epu8( 33, 120, 41, 4, 226, 71, 169, 72, 92, 211, 80, 53, 22, 250, 136, 31), simde_x_mm_set_epu8(163, 237, 214, 178, 29, 194, 137, 109, 134, 197, 40, 228, 174, 101, 114, 162), simde_x_mm_set_epu8( 98, 179, 128, 91, 128, 133, 153, 91, 113, 204, 60, 141, 98, 176, 125, 97) }, { simde_x_mm_set_epu8(151, 241, 42, 96, 21, 167, 26, 188, 124, 136, 158, 144, 227, 152, 4, 152), simde_x_mm_set_epu8( 43, 216, 77, 147, 105, 127, 87, 93, 160, 103, 68, 85, 77, 41, 67, 189), simde_x_mm_set_epu8( 97, 229, 60, 122, 63, 147, 57, 141, 142, 120, 113, 115, 152, 97, 36, 171) }, { simde_x_mm_set_epu8(229, 241, 5, 141, 89, 37, 175, 184, 139, 113, 20, 221, 179, 130, 61, 16), simde_x_mm_set_epu8( 74, 70, 240, 235, 217, 244, 23, 139, 224, 48, 224, 137, 221, 180, 178, 80), simde_x_mm_set_epu8(152, 156, 123, 188, 153, 141, 99, 162, 182, 81, 122, 179, 200, 155, 120, 48) }, { simde_x_mm_set_epu8( 30, 40, 139, 23, 169, 60, 77, 114, 84, 55, 70, 122, 10, 27, 47, 237), simde_x_mm_set_epu8(133, 159, 246, 175, 239, 136, 111, 216, 173, 32, 117, 64, 231, 128, 162, 145), simde_x_mm_set_epu8( 82, 100, 193, 99, 204, 98, 94, 165, 129, 44, 94, 93, 121, 78, 105, 191) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_avg_epu8(test_vec[i].a, test_vec[i].b); simde_assert_m128i_u8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_avg_epu16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu16( 5850, 55362, 21148, 12084, 65281, 34990, 37768, 27314), simde_x_mm_set_epu16(17137, 57217, 24643, 68, 62023, 59872, 58364, 48988), simde_x_mm_set_epu16(11494, 56290, 22896, 6076, 63652, 47431, 48066, 38151) }, { simde_x_mm_set_epu16(56278, 6728, 16184, 51318, 50283, 22638, 47875, 16598), simde_x_mm_set_epu16(11439, 26450, 22464, 46117, 200, 13782, 6417, 5013), simde_x_mm_set_epu16(33859, 16589, 19324, 48718, 25242, 18210, 27146, 10806) }, { simde_x_mm_set_epu16(56585, 41680, 21588, 12940, 58949, 45580, 8877, 11322), simde_x_mm_set_epu16( 1134, 16858, 64620, 61832, 9325, 17410, 30986, 30821), simde_x_mm_set_epu16(28860, 29269, 43104, 37386, 34137, 31495, 19932, 21072) }, { simde_x_mm_set_epu16(44326, 6907, 17032, 43140, 43764, 37147, 19624, 24961), simde_x_mm_set_epu16(54082, 7517, 59166, 38362, 18444, 59374, 31747, 32567), simde_x_mm_set_epu16(49204, 7212, 38099, 40751, 31104, 48261, 25686, 28764) }, { simde_x_mm_set_epu16( 8568, 10500, 57927, 43336, 23763, 20533, 5882, 34847), simde_x_mm_set_epu16(41965, 54962, 7618, 35181, 34501, 10468, 44645, 29346), simde_x_mm_set_epu16(25267, 32731, 32773, 39259, 29132, 15501, 25264, 32097) }, { simde_x_mm_set_epu16(38897, 10848, 5543, 6844, 31880, 40592, 58264, 1176), simde_x_mm_set_epu16(11224, 19859, 27007, 22365, 41063, 17493, 19753, 17341), simde_x_mm_set_epu16(25061, 15354, 16275, 14605, 36472, 29043, 39009, 9259) }, { simde_x_mm_set_epu16(58865, 1421, 22821, 44984, 35697, 5341, 45954, 15632), simde_x_mm_set_epu16(19014, 61675, 55796, 6027, 57392, 57481, 56756, 45648), simde_x_mm_set_epu16(38940, 31548, 39309, 25506, 46545, 31411, 51355, 30640) }, { simde_x_mm_set_epu16( 7720, 35607, 43324, 19826, 21559, 18042, 2587, 12269), simde_x_mm_set_epu16(34207, 63151, 61320, 28632, 44320, 30016, 59264, 41617), simde_x_mm_set_epu16(20964, 49379, 52322, 24229, 32940, 24029, 30926, 26943) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_avg_epu16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_u16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_bslli_si128(SIMDE_MUNIT_TEST_ARGS) { simde__m128i a, e, r; a = simde_mm_set_epi8(INT8_C( 24), INT8_C( -55), INT8_C( -96), INT8_C( 87), INT8_C( -58), INT8_C(-112), INT8_C( 23), INT8_C(-126), INT8_C( -8), INT8_C( -11), INT8_C( 18), INT8_C( 30), INT8_C( 114), INT8_C( 65), INT8_C( 26), INT8_C(-121)); e = simde_mm_set_epi8(INT8_C( 24), INT8_C( -55), INT8_C( -96), INT8_C( 87), INT8_C( -58), INT8_C(-112), INT8_C( 23), INT8_C(-126), INT8_C( -8), INT8_C( -11), INT8_C( 18), INT8_C( 30), INT8_C( 114), INT8_C( 65), INT8_C( 26), INT8_C(-121)); r = simde_mm_bslli_si128(a, 0); simde_assert_m128i_i8(r, ==, e); e = simde_mm_set_epi8(INT8_C( 87), INT8_C( -58), INT8_C(-112), INT8_C( 23), INT8_C(-126), INT8_C( -8), INT8_C( -11), INT8_C( 18), INT8_C( 30), INT8_C( 114), INT8_C( 65), INT8_C( 26), INT8_C(-121), INT8_C( 0), INT8_C( 0), INT8_C( 0)); r = simde_mm_bslli_si128(a, 3); simde_assert_m128i_i8(r, ==, e); e = simde_mm_set_epi8(INT8_C( 65), INT8_C( 26), INT8_C(-121), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)); r = simde_mm_bslli_si128(a, 13); simde_assert_m128i_i8(r, ==, e); e = simde_mm_set_epi8(INT8_C( -96), INT8_C( 87), INT8_C( -58), INT8_C(-112), INT8_C( 23), INT8_C(-126), INT8_C( -8), INT8_C( -11), INT8_C( 18), INT8_C( 30), INT8_C( 114), INT8_C( 65), INT8_C( 26), INT8_C(-121), INT8_C( 0), INT8_C( 0)); r = simde_mm_bslli_si128(a, 2); simde_assert_m128i_i8(r, ==, e); e = simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)); r = simde_mm_bslli_si128(a, 19); simde_assert_m128i_i8(r, ==, e); return 0; } static int test_simde_mm_bsrli_si128(SIMDE_MUNIT_TEST_ARGS) { simde__m128i a, e, r; a = simde_mm_set_epi8(INT8_C(-121), INT8_C( -58), INT8_C( -15), INT8_C(-115), INT8_C( -97), INT8_C( -96), INT8_C( -74), INT8_C(-113), INT8_C(-121), INT8_C( 99), INT8_C( 126), INT8_C( 113), INT8_C( -29), INT8_C( 114), INT8_C( -65), INT8_C( 9)); e = simde_mm_set_epi8(INT8_C(-121), INT8_C( -58), INT8_C( -15), INT8_C(-115), INT8_C( -97), INT8_C( -96), INT8_C( -74), INT8_C(-113), INT8_C(-121), INT8_C( 99), INT8_C( 126), INT8_C( 113), INT8_C( -29), INT8_C( 114), INT8_C( -65), INT8_C( 9)); r = simde_mm_bsrli_si128(a, 0); simde_assert_m128i_i8(r, ==, e); e = simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-121), INT8_C( -58), INT8_C( -15), INT8_C(-115), INT8_C( -97), INT8_C( -96), INT8_C( -74), INT8_C(-113), INT8_C(-121), INT8_C( 99), INT8_C( 126), INT8_C( 113), INT8_C( -29)); r = simde_mm_bsrli_si128(a, 3); simde_assert_m128i_i8(r, ==, e); e = simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-121), INT8_C( -58), INT8_C( -15)); r = simde_mm_bsrli_si128(a, 13); simde_assert_m128i_i8(r, ==, e); e = simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C(-121), INT8_C( -58), INT8_C( -15), INT8_C(-115), INT8_C( -97), INT8_C( -96), INT8_C( -74), INT8_C(-113), INT8_C(-121), INT8_C( 99), INT8_C( 126), INT8_C( 113), INT8_C( -29), INT8_C( 114)); r = simde_mm_bsrli_si128(a, 2); simde_assert_m128i_i8(r, ==, e); e = simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)); r = simde_mm_bsrli_si128(a, 19); simde_assert_m128i_i8(r, ==, e); return 0; } static int test_simde_mm_castpd_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128 r; } test_vec[8] = { { simde_mm_castps_pd(simde_mm_set_ps(SIMDE_FLOAT32_C( 499.48), SIMDE_FLOAT32_C( 72.83), SIMDE_FLOAT32_C(-420.10), SIMDE_FLOAT32_C( -361.15))), simde_mm_set_ps(SIMDE_FLOAT32_C( 499.48), SIMDE_FLOAT32_C( 72.83), SIMDE_FLOAT32_C(-420.10), SIMDE_FLOAT32_C( -361.15)) }, { simde_mm_castps_pd(simde_mm_set_ps(SIMDE_FLOAT32_C(-412.40), SIMDE_FLOAT32_C(-314.35), SIMDE_FLOAT32_C(-851.03), SIMDE_FLOAT32_C( 128.01))), simde_mm_set_ps(SIMDE_FLOAT32_C(-412.40), SIMDE_FLOAT32_C(-314.35), SIMDE_FLOAT32_C(-851.03), SIMDE_FLOAT32_C( 128.01)) }, { simde_mm_castps_pd(simde_mm_set_ps(SIMDE_FLOAT32_C(-411.93), SIMDE_FLOAT32_C( 780.67), SIMDE_FLOAT32_C(-928.22), SIMDE_FLOAT32_C( 762.24))), simde_mm_set_ps(SIMDE_FLOAT32_C(-411.93), SIMDE_FLOAT32_C( 780.67), SIMDE_FLOAT32_C(-928.22), SIMDE_FLOAT32_C( 762.24)) }, { simde_mm_castps_pd(simde_mm_set_ps(SIMDE_FLOAT32_C(-614.18), SIMDE_FLOAT32_C( 644.19), SIMDE_FLOAT32_C( -41.15), SIMDE_FLOAT32_C( 871.68))), simde_mm_set_ps(SIMDE_FLOAT32_C(-614.18), SIMDE_FLOAT32_C( 644.19), SIMDE_FLOAT32_C( -41.15), SIMDE_FLOAT32_C( 871.68)) }, { simde_mm_castps_pd(simde_mm_set_ps(SIMDE_FLOAT32_C( 795.82), SIMDE_FLOAT32_C( 486.26), SIMDE_FLOAT32_C(-686.59), SIMDE_FLOAT32_C( 277.69))), simde_mm_set_ps(SIMDE_FLOAT32_C( 795.82), SIMDE_FLOAT32_C( 486.26), SIMDE_FLOAT32_C(-686.59), SIMDE_FLOAT32_C( 277.69)) }, { simde_mm_castps_pd(simde_mm_set_ps(SIMDE_FLOAT32_C( 221.74), SIMDE_FLOAT32_C(-655.22), SIMDE_FLOAT32_C(-366.90), SIMDE_FLOAT32_C( -245.25))), simde_mm_set_ps(SIMDE_FLOAT32_C( 221.74), SIMDE_FLOAT32_C(-655.22), SIMDE_FLOAT32_C(-366.90), SIMDE_FLOAT32_C( -245.25)) }, { simde_mm_castps_pd(simde_mm_set_ps(SIMDE_FLOAT32_C( -83.75), SIMDE_FLOAT32_C( 862.26), SIMDE_FLOAT32_C( 55.37), SIMDE_FLOAT32_C( -26.83))), simde_mm_set_ps(SIMDE_FLOAT32_C( -83.75), SIMDE_FLOAT32_C( 862.26), SIMDE_FLOAT32_C( 55.37), SIMDE_FLOAT32_C( -26.83)) }, { simde_mm_castps_pd(simde_mm_set_ps(SIMDE_FLOAT32_C(-557.26), SIMDE_FLOAT32_C(-554.56), SIMDE_FLOAT32_C(-507.07), SIMDE_FLOAT32_C( 395.47))), simde_mm_set_ps(SIMDE_FLOAT32_C(-557.26), SIMDE_FLOAT32_C(-554.56), SIMDE_FLOAT32_C(-507.07), SIMDE_FLOAT32_C( 395.47)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_castpd_ps(test_vec[i].a); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_castps_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -48.11), SIMDE_FLOAT32_C( 66.54), SIMDE_FLOAT32_C( -702.38), SIMDE_FLOAT32_C( -384.97)), simde_mm_set_ps(SIMDE_FLOAT32_C( -48.11), SIMDE_FLOAT32_C( 66.54), SIMDE_FLOAT32_C( -702.38), SIMDE_FLOAT32_C( -384.97)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 975.22), SIMDE_FLOAT32_C( -352.19), SIMDE_FLOAT32_C( -258.03), SIMDE_FLOAT32_C( 978.92)), simde_mm_set_ps(SIMDE_FLOAT32_C( 975.22), SIMDE_FLOAT32_C( -352.19), SIMDE_FLOAT32_C( -258.03), SIMDE_FLOAT32_C( 978.92)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 24.73), SIMDE_FLOAT32_C( -551.11), SIMDE_FLOAT32_C( -52.52), SIMDE_FLOAT32_C( 259.60)), simde_mm_set_ps(SIMDE_FLOAT32_C( 24.73), SIMDE_FLOAT32_C( -551.11), SIMDE_FLOAT32_C( -52.52), SIMDE_FLOAT32_C( 259.60)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 614.82), SIMDE_FLOAT32_C( 711.79), SIMDE_FLOAT32_C( 715.74), SIMDE_FLOAT32_C( 872.89)), simde_mm_set_ps(SIMDE_FLOAT32_C( 614.82), SIMDE_FLOAT32_C( 711.79), SIMDE_FLOAT32_C( 715.74), SIMDE_FLOAT32_C( 872.89)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 434.09), SIMDE_FLOAT32_C( 97.43), SIMDE_FLOAT32_C( 836.69), SIMDE_FLOAT32_C( 490.93)), simde_mm_set_ps(SIMDE_FLOAT32_C( 434.09), SIMDE_FLOAT32_C( 97.43), SIMDE_FLOAT32_C( 836.69), SIMDE_FLOAT32_C( 490.93)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -964.09), SIMDE_FLOAT32_C( 616.34), SIMDE_FLOAT32_C( -267.39), SIMDE_FLOAT32_C( -457.57)), simde_mm_set_ps(SIMDE_FLOAT32_C( -964.09), SIMDE_FLOAT32_C( 616.34), SIMDE_FLOAT32_C( -267.39), SIMDE_FLOAT32_C( -457.57)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -14.24), SIMDE_FLOAT32_C( 802.19), SIMDE_FLOAT32_C( 741.42), SIMDE_FLOAT32_C( -211.48)), simde_mm_set_ps(SIMDE_FLOAT32_C( -14.24), SIMDE_FLOAT32_C( 802.19), SIMDE_FLOAT32_C( 741.42), SIMDE_FLOAT32_C( -211.48)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -739.71), SIMDE_FLOAT32_C( -918.58), SIMDE_FLOAT32_C( -598.92), SIMDE_FLOAT32_C( -924.03)), simde_mm_set_ps(SIMDE_FLOAT32_C( -739.71), SIMDE_FLOAT32_C( -918.58), SIMDE_FLOAT32_C( -598.92), SIMDE_FLOAT32_C( -924.03)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_castpd_ps(simde_mm_castps_pd(test_vec[i].a)); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_castsi128_pd(SIMDE_MUNIT_TEST_ARGS) { simde__m128i test_vec[8] = { simde_mm_set_epi32(INT32_C(-1784593785), INT32_C( 1037253725), INT32_C( 225827038), INT32_C(-2070942389)), simde_mm_set_epi32(INT32_C( 2006039830), INT32_C( 831495128), INT32_C( 1875760759), INT32_C( 315081037)), simde_mm_set_epi32(INT32_C( -305750616), INT32_C( 602617399), INT32_C( 1569354160), INT32_C(-1091905770)), simde_mm_set_epi32(INT32_C(-1852218105), INT32_C(-1464694454), INT32_C(-1287612023), INT32_C( 1418106957)), simde_mm_set_epi32(INT32_C( 1382189486), INT32_C( 561466363), INT32_C( -455563445), INT32_C( 733917325)), simde_mm_set_epi32(INT32_C( -187102213), INT32_C( -373894547), INT32_C( 335417846), INT32_C( 400855569)), simde_mm_set_epi32(INT32_C( 1405293845), INT32_C( -164981292), INT32_C( 180491437), INT32_C( 1551867928)), simde_mm_set_epi32(INT32_C( 458893421), INT32_C(-1960480477), INT32_C( 1264329759), INT32_C( 1663854158)) }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_castpd_si128(simde_mm_castsi128_pd(test_vec[i])); simde_assert_m128i_equal(r, test_vec[i]); } return 0; } static int test_simde_mm_castsi128_ps(SIMDE_MUNIT_TEST_ARGS) { simde__m128i test_vec[8] = { simde_mm_set_epi32(INT32_C(-1036963898), INT32_C( 1847069037), INT32_C( 740321504), INT32_C( 778754840)), simde_mm_set_epi32(INT32_C( 975137998), INT32_C( -252397546), INT32_C( 1504697866), INT32_C(-1327032545)), simde_mm_set_epi32(INT32_C(-1494981423), INT32_C( -175189577), INT32_C( 2056595322), INT32_C( 1080531273)), simde_mm_set_epi32(INT32_C(-1391843620), INT32_C( 424327107), INT32_C( 948927709), INT32_C( -666077781)), simde_mm_set_epi32(INT32_C( 951847201), INT32_C( -299846327), INT32_C( 575809604), INT32_C(-1150359231)), simde_mm_set_epi32(INT32_C( 837564377), INT32_C( -933128035), INT32_C( -581372672), INT32_C( -490866291)), simde_mm_set_epi32(INT32_C( -169157316), INT32_C( 1521943175), INT32_C( 841770394), INT32_C( -192049832)), simde_mm_set_epi32(INT32_C( -848324384), INT32_C(-1699878899), INT32_C( -332340467), INT32_C( 934012294)) }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_castps_si128(simde_mm_castsi128_ps(test_vec[i])); simde_assert_m128i_i64(r, ==, test_vec[i]); } return 0; } static int test_simde_mm_cmpeq_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( 48), INT8_C( -17), INT8_C( 87), INT8_C( -4), INT8_C( 4), INT8_C( -44), INT8_C( 121), INT8_C( 68), INT8_C( 49), INT8_C(-108), INT8_C( 49), INT8_C( -79), INT8_C( 51), INT8_C( -82), INT8_C( 23), INT8_C( -58)), simde_mm_set_epi8(INT8_C( 87), INT8_C( -42), INT8_C( 33), INT8_C( 126), INT8_C( 91), INT8_C( 115), INT8_C( -90), INT8_C( 48), INT8_C( -49), INT8_C(-119), INT8_C( 23), INT8_C( 50), INT8_C( -10), INT8_C( -15), INT8_C( -16), INT8_C( -58)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1)) }, { simde_mm_set_epi8(INT8_C( 90), INT8_C( -93), INT8_C( 121), INT8_C( 114), INT8_C( 21), INT8_C( -52), INT8_C( -45), INT8_C( -83), INT8_C(-123), INT8_C(-119), INT8_C( -53), INT8_C(-117), INT8_C( -60), INT8_C( -20), INT8_C(-100), INT8_C( 26)), simde_mm_set_epi8(INT8_C( 67), INT8_C( -34), INT8_C(-110), INT8_C( -79), INT8_C( -72), INT8_C( -43), INT8_C( 64), INT8_C( -74), INT8_C( 64), INT8_C( 85), INT8_C( -71), INT8_C( 89), INT8_C( 35), INT8_C( 81), INT8_C( 104), INT8_C( 111)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_epi8(INT8_C( 33), INT8_C( 56), INT8_C( -20), INT8_C( 49), INT8_C( -77), INT8_C(-123), INT8_C( -77), INT8_C(-109), INT8_C( -13), INT8_C( 91), INT8_C( 105), INT8_C( 29), INT8_C( 35), INT8_C( -62), INT8_C( 39), INT8_C( -24)), simde_mm_set_epi8(INT8_C( -80), INT8_C( -37), INT8_C( 43), INT8_C( 121), INT8_C(-104), INT8_C( -93), INT8_C(-100), INT8_C( 55), INT8_C( -82), INT8_C( -92), INT8_C( -6), INT8_C( 2), INT8_C( -33), INT8_C( 114), INT8_C( -94), INT8_C( 58)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_epi8(INT8_C( -50), INT8_C( 20), INT8_C( 99), INT8_C( 4), INT8_C( 66), INT8_C( -69), INT8_C(-117), INT8_C( 25), INT8_C( -96), INT8_C( -11), INT8_C( -75), INT8_C( -79), INT8_C( 88), INT8_C(-123), INT8_C( -55), INT8_C( 22)), simde_mm_set_epi8(INT8_C( 68), INT8_C(-117), INT8_C(-113), INT8_C( 30), INT8_C( 0), INT8_C( 65), INT8_C( -61), INT8_C( -31), INT8_C( -53), INT8_C( -2), INT8_C( -47), INT8_C( 20), INT8_C( -79), INT8_C(-126), INT8_C( 40), INT8_C( 81)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_epi8(INT8_C( 100), INT8_C( -54), INT8_C( -62), INT8_C( -41), INT8_C(-110), INT8_C( -3), INT8_C(-102), INT8_C( -2), INT8_C( 26), INT8_C( -67), INT8_C( -67), INT8_C( -73), INT8_C( 18), INT8_C( 123), INT8_C( 122), INT8_C( 106)), simde_mm_set_epi8(INT8_C( -5), INT8_C( 2), INT8_C( 119), INT8_C( 28), INT8_C( -24), INT8_C( 12), INT8_C( 106), INT8_C( -55), INT8_C( 124), INT8_C( 69), INT8_C( 31), INT8_C(-126), INT8_C( -80), INT8_C( -78), INT8_C( -93), INT8_C( -23)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_epi8(INT8_C( 48), INT8_C( -25), INT8_C( -80), INT8_C( 17), INT8_C( -70), INT8_C( -40), INT8_C( 83), INT8_C( 37), INT8_C( 22), INT8_C( -91), INT8_C( -79), INT8_C( 8), INT8_C( 9), INT8_C( -21), INT8_C( -51), INT8_C( -21)), simde_mm_set_epi8(INT8_C( 55), INT8_C( 114), INT8_C( -79), INT8_C( -59), INT8_C( 15), INT8_C( -50), INT8_C( -69), INT8_C( 7), INT8_C(-113), INT8_C( -95), INT8_C( 112), INT8_C( 5), INT8_C( -30), INT8_C( -68), INT8_C( -27), INT8_C( -43)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_epi8(INT8_C( 34), INT8_C( 111), INT8_C( 52), INT8_C( 93), INT8_C( -12), INT8_C( 98), INT8_C( -88), INT8_C( 63), INT8_C( 64), INT8_C( -98), INT8_C( 18), INT8_C( 40), INT8_C( 119), INT8_C( 68), INT8_C( -90), INT8_C( -37)), simde_mm_set_epi8(INT8_C( 35), INT8_C( 97), INT8_C( 3), INT8_C( 88), INT8_C( -70), INT8_C( -12), INT8_C( -13), INT8_C( 52), INT8_C( 127), INT8_C( -5), INT8_C( -24), INT8_C( -10), INT8_C( -21), INT8_C(-112), INT8_C( -81), INT8_C( 86)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_epi8(INT8_C( -25), INT8_C(-100), INT8_C( -66), INT8_C( 103), INT8_C(-103), INT8_C( 116), INT8_C( -97), INT8_C( -43), INT8_C( 123), INT8_C( -33), INT8_C( -71), INT8_C(-122), INT8_C( 100), INT8_C( 116), INT8_C( 67), INT8_C(-119)), simde_mm_set_epi8(INT8_C( 103), INT8_C( -84), INT8_C( 102), INT8_C( -67), INT8_C( -82), INT8_C( 14), INT8_C( -17), INT8_C( -71), INT8_C( -31), INT8_C(-109), INT8_C( -84), INT8_C( -22), INT8_C( 78), INT8_C(-120), INT8_C( -77), INT8_C( -6)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_cmpeq_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cmpeq_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C( 5875), INT16_C(-30240), INT16_C( 4973), INT16_C(-24835), INT16_C( -2682), INT16_C( 25733), INT16_C( 1837), INT16_C( -8035)), simde_mm_set_epi16(INT16_C( 5875), INT16_C(-30240), INT16_C( 9332), INT16_C(-24835), INT16_C( -8998), INT16_C( 25733), INT16_C( 1837), INT16_C(-18483)), simde_mm_set_epi16(INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0)) }, { simde_mm_set_epi16(INT16_C( -1458), INT16_C( 12290), INT16_C( 394), INT16_C( 6014), INT16_C( 25725), INT16_C( 16049), INT16_C( -659), INT16_C( 13250)), simde_mm_set_epi16(INT16_C( -1458), INT16_C( 12290), INT16_C( 394), INT16_C( 6014), INT16_C( 25725), INT16_C(-30312), INT16_C( -659), INT16_C( 20372)), simde_mm_set_epi16(INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0)) }, { simde_mm_set_epi16(INT16_C( -4375), INT16_C( 1648), INT16_C( -8256), INT16_C( 10030), INT16_C( 20444), INT16_C( -7330), INT16_C( -7889), INT16_C( 23879)), simde_mm_set_epi16(INT16_C( -644), INT16_C( 1648), INT16_C( -8256), INT16_C( 10030), INT16_C( 4813), INT16_C( -7330), INT16_C(-10599), INT16_C(-13677)), simde_mm_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_epi16(INT16_C(-20962), INT16_C(-30592), INT16_C(-23740), INT16_C( -1360), INT16_C( 6756), INT16_C( 10080), INT16_C( 31194), INT16_C(-10248)), simde_mm_set_epi16(INT16_C(-20962), INT16_C(-19403), INT16_C( 31222), INT16_C( 12369), INT16_C( 10909), INT16_C( 10080), INT16_C( 31194), INT16_C(-10248)), simde_mm_set_epi16(INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1)) }, { simde_mm_set_epi16(INT16_C( -5540), INT16_C(-14756), INT16_C(-15994), INT16_C( 1795), INT16_C( 18849), INT16_C( 15779), INT16_C( 5314), INT16_C(-13448)), simde_mm_set_epi16(INT16_C( -5540), INT16_C( 14083), INT16_C(-16603), INT16_C( 1795), INT16_C( 28557), INT16_C(-32040), INT16_C( 5314), INT16_C( -4887)), simde_mm_set_epi16(INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0)) }, { simde_mm_set_epi16(INT16_C(-18621), INT16_C( 6869), INT16_C(-16161), INT16_C(-24568), INT16_C(-10576), INT16_C( 20065), INT16_C( -8241), INT16_C(-21658)), simde_mm_set_epi16(INT16_C(-18621), INT16_C( 6869), INT16_C(-10830), INT16_C(-24568), INT16_C(-10576), INT16_C( 20065), INT16_C( -8094), INT16_C(-21658)), simde_mm_set_epi16(INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1)) }, { simde_mm_set_epi16(INT16_C(-20765), INT16_C( 27683), INT16_C( 13646), INT16_C( 26224), INT16_C(-12316), INT16_C( -2556), INT16_C( -1320), INT16_C(-15938)), simde_mm_set_epi16(INT16_C( -5976), INT16_C( 27683), INT16_C( -6395), INT16_C( 26224), INT16_C(-12316), INT16_C( -2556), INT16_C( -1320), INT16_C(-15143)), simde_mm_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0)) }, { simde_mm_set_epi16(INT16_C( 25864), INT16_C( 17430), INT16_C( 25473), INT16_C( 24392), INT16_C( 27481), INT16_C( 2288), INT16_C( 24811), INT16_C( 18514)), simde_mm_set_epi16(INT16_C( 25864), INT16_C( 8829), INT16_C( 25473), INT16_C( 24392), INT16_C( 27481), INT16_C( 4599), INT16_C( 24811), INT16_C( 18514)), simde_mm_set_epi16(INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_cmpeq_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cmpeq_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32( 385059296, 325951229, -175741819, 120447133), simde_mm_set_epi32( -411118693, 325951229, -175741819, -74467379), simde_mm_set_epi32( 0, -1, -1, 0) }, { simde_mm_set_epi32( 2086724423, 2071647391, 395772386, -878201179), simde_mm_set_epi32( 2086724423, 25827198, 1685929649, -43174974), simde_mm_set_epi32( -1, 0, 0, 0) }, { simde_mm_set_epi32(-1656549033, -529471298, -677159845, -1011499644), simde_mm_set_epi32(-1108138959, 2008596507, 36966751, -1011499644), simde_mm_set_epi32( 0, 0, 0, -1) }, { simde_mm_set_epi32( -42154427, 232395060, 315449676, -694564205), simde_mm_set_epi32(-1781616670, 232395060, 315449676, -694564205), simde_mm_set_epi32( 0, -1, -1, -1) }, { simde_mm_set_epi32(-1373730688, -1555760464, 442771296, 2044385272), simde_mm_set_epi32( -819547083, -1555760464, 442771296, 2044385272), simde_mm_set_epi32( 0, -1, -1, -1) }, { simde_mm_set_epi32( -285007987, 1222927916, -234086536, 711157928), simde_mm_set_epi32( -285007987, 1222927916, 1235303843, 711157928), simde_mm_set_epi32( -1, -1, 0, -1) }, { simde_mm_set_epi32( 1734698060, -250509290, -430142591, 970705024), simde_mm_set_epi32( 1734698060, -1399422252, -430142591, -1199939349), simde_mm_set_epi32( -1, 0, -1, 0) }, { simde_mm_set_epi32( 1285559999, -709744735, -1852486552, -530433851), simde_mm_set_epi32( 1285559999, -709744735, -1768521466, -530433851), simde_mm_set_epi32( -1, -1, 0, -1) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_cmpeq_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cmpeq_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128i r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 780.35), SIMDE_FLOAT64_C( 826.07)), simde_mm_set_pd(SIMDE_FLOAT64_C( 625.03), SIMDE_FLOAT64_C( 826.07)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C(-1), INT32_C(-1)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -334.66), SIMDE_FLOAT64_C( 476.36)), simde_mm_set_pd(SIMDE_FLOAT64_C( -334.66), SIMDE_FLOAT64_C( 556.75)), simde_mm_set_epi32(INT32_C(-1), INT32_C(-1), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 324.97), SIMDE_FLOAT64_C( 726.67)), simde_mm_set_pd(SIMDE_FLOAT64_C( -886.03), SIMDE_FLOAT64_C( 556.75)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 432.42), SIMDE_FLOAT64_C( 208.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( -886.03), SIMDE_FLOAT64_C( 556.75)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 361.87), SIMDE_FLOAT64_C( -173.19)), simde_mm_set_pd(SIMDE_FLOAT64_C( 190.30), SIMDE_FLOAT64_C( -730.35)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 187.80), SIMDE_FLOAT64_C( -428.45)), simde_mm_set_pd(SIMDE_FLOAT64_C( 754.99), SIMDE_FLOAT64_C( -730.35)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 34.65), SIMDE_FLOAT64_C( 814.87)), simde_mm_set_pd(SIMDE_FLOAT64_C( 105.60), SIMDE_FLOAT64_C( 292.36)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 221.70), SIMDE_FLOAT64_C( -277.53)), simde_mm_set_pd(SIMDE_FLOAT64_C( -578.28), SIMDE_FLOAT64_C( 292.36)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_castpd_si128(simde_mm_cmpeq_pd(test_vec[i].a, test_vec[i].b)); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cmpeq_sd(SIMDE_MUNIT_TEST_ARGS) { simde__m128d all_unset = simde_mm_setzero_pd(); simde__m128d all_set = simde_mm_cmpeq_pd(all_unset, all_unset); const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 386.34), SIMDE_FLOAT64_C( 460.38)), simde_mm_set_pd(SIMDE_FLOAT64_C( -116.45), SIMDE_FLOAT64_C( 460.38)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 386.34), SIMDE_FLOAT64_C( 460.38)), all_set) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -493.11), SIMDE_FLOAT64_C( 58.42)), simde_mm_set_pd(SIMDE_FLOAT64_C( 511.42), SIMDE_FLOAT64_C( 58.42)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -493.11), SIMDE_FLOAT64_C( 58.42)), all_set) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 156.83), SIMDE_FLOAT64_C( -432.98)), simde_mm_set_pd(SIMDE_FLOAT64_C( 156.83), SIMDE_FLOAT64_C( -422.70)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 156.83), SIMDE_FLOAT64_C( -432.98)), all_unset) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( -577.06)), simde_mm_set_pd(SIMDE_FLOAT64_C( 404.92), SIMDE_FLOAT64_C( -577.06)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( -577.06)), all_set) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -468.33), SIMDE_FLOAT64_C( -68.91)), simde_mm_set_pd(SIMDE_FLOAT64_C( -638.04), SIMDE_FLOAT64_C( 816.57)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -468.33), SIMDE_FLOAT64_C( -68.91)), all_unset) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 37.53), SIMDE_FLOAT64_C( 339.53)), simde_mm_set_pd(SIMDE_FLOAT64_C( 37.53), SIMDE_FLOAT64_C( 339.53)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 37.53), SIMDE_FLOAT64_C( 339.53)), all_set) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -872.05), SIMDE_FLOAT64_C( -696.39)), simde_mm_set_pd(SIMDE_FLOAT64_C( -872.05), SIMDE_FLOAT64_C( -696.39)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -872.05), SIMDE_FLOAT64_C( -696.39)), all_set) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 251.77), SIMDE_FLOAT64_C( -366.11)), simde_mm_set_pd(SIMDE_FLOAT64_C( 251.77), SIMDE_FLOAT64_C( -622.95)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 251.77), SIMDE_FLOAT64_C( -366.11)), all_unset) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_cmpeq_sd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_equal(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpneq_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128i r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -17.42), SIMDE_FLOAT64_C( -471.42)), simde_mm_set_pd(SIMDE_FLOAT64_C( -120.90), SIMDE_FLOAT64_C( -471.42)), simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 174.13), SIMDE_FLOAT64_C( 302.06)), simde_mm_set_pd(SIMDE_FLOAT64_C( -462.00), SIMDE_FLOAT64_C( 302.06)), simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 456.61), SIMDE_FLOAT64_C( -31.59)), simde_mm_set_pd(SIMDE_FLOAT64_C( 456.61), SIMDE_FLOAT64_C( -31.59)), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 863.75), SIMDE_FLOAT64_C( 743.68)), simde_mm_set_pd(SIMDE_FLOAT64_C( 863.75), SIMDE_FLOAT64_C( -940.38)), simde_mm_set_epi64x(INT64_C( 0), INT64_C(-1)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -207.80), SIMDE_FLOAT64_C( 181.86)), simde_mm_set_pd(SIMDE_FLOAT64_C( -207.80), SIMDE_FLOAT64_C( 980.93)), simde_mm_set_epi64x(INT64_C( 0), INT64_C(-1)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -330.41), SIMDE_FLOAT64_C( 936.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( -223.97), SIMDE_FLOAT64_C( 936.80)), simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -293.06), SIMDE_FLOAT64_C( -978.73)), simde_mm_set_pd(SIMDE_FLOAT64_C( -858.76), SIMDE_FLOAT64_C( -978.73)), simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 215.10), SIMDE_FLOAT64_C( -720.29)), simde_mm_set_pd(SIMDE_FLOAT64_C( -813.22), SIMDE_FLOAT64_C( 235.59)), simde_mm_set_epi64x(INT64_C(-1), INT64_C(-1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_castpd_si128(simde_mm_cmpneq_pd(test_vec[i].a, test_vec[i].b)); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cmpneq_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 489.91), SIMDE_FLOAT64_C( 496.15)), simde_mm_set_pd(SIMDE_FLOAT64_C( -40.59), SIMDE_FLOAT64_C( 496.15)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 489.91), SIMDE_FLOAT64_C( 496.15)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -981.34), SIMDE_FLOAT64_C( 944.87)), simde_mm_set_pd(SIMDE_FLOAT64_C( -433.21), SIMDE_FLOAT64_C( 882.20)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -981.34), SIMDE_FLOAT64_C( 944.87)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 983.93), SIMDE_FLOAT64_C( 764.39)), simde_mm_set_pd(SIMDE_FLOAT64_C( 621.75), SIMDE_FLOAT64_C( 764.39)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 983.93), SIMDE_FLOAT64_C( 764.39)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 274.45), SIMDE_FLOAT64_C( 789.62)), simde_mm_set_pd(SIMDE_FLOAT64_C( 274.45), SIMDE_FLOAT64_C( 789.62)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 274.45), SIMDE_FLOAT64_C( 789.62)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -35.27), SIMDE_FLOAT64_C( 92.02)), simde_mm_set_pd(SIMDE_FLOAT64_C( -35.27), SIMDE_FLOAT64_C( 92.02)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -35.27), SIMDE_FLOAT64_C( 92.02)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -842.06), SIMDE_FLOAT64_C( -358.82)), simde_mm_set_pd(SIMDE_FLOAT64_C( 290.56), SIMDE_FLOAT64_C( 859.30)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -842.06), SIMDE_FLOAT64_C( -358.82)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -575.93), SIMDE_FLOAT64_C( -661.58)), simde_mm_set_pd(SIMDE_FLOAT64_C( -462.75), SIMDE_FLOAT64_C( 732.75)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -575.93), SIMDE_FLOAT64_C( -661.58)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -876.76), SIMDE_FLOAT64_C( -235.41)), simde_mm_set_pd(SIMDE_FLOAT64_C( 264.94), SIMDE_FLOAT64_C( 767.34)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -876.76), SIMDE_FLOAT64_C( -235.41)), simde_x_mm_setone_pd()) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_cmpneq_sd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_equal(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmplt_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( -98), INT8_C( 126), INT8_C( -78), INT8_C( -97), INT8_C( -35), INT8_C( -49), INT8_C( -62), INT8_C( -8), INT8_C( -88), INT8_C( 71), INT8_C( 16), INT8_C( -4), INT8_C( 69), INT8_C( -61), INT8_C( 47), INT8_C( 84)), simde_mm_set_epi8(INT8_C( 5), INT8_C(-114), INT8_C( -27), INT8_C( -61), INT8_C( 56), INT8_C( 115), INT8_C( -53), INT8_C( 16), INT8_C( -80), INT8_C( -18), INT8_C( 83), INT8_C( -9), INT8_C( -3), INT8_C( 36), INT8_C( -57), INT8_C( 89)), simde_mm_set_epi8(INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1)) }, { simde_mm_set_epi8(INT8_C( 94), INT8_C( 78), INT8_C( -26), INT8_C(-126), INT8_C( -98), INT8_C( 65), INT8_C( 38), INT8_C( -71), INT8_C( -54), INT8_C( 20), INT8_C( -52), INT8_C( 55), INT8_C( -76), INT8_C( 37), INT8_C( -95), INT8_C( 91)), simde_mm_set_epi8(INT8_C( -10), INT8_C( -43), INT8_C( 70), INT8_C( -4), INT8_C( -89), INT8_C( -31), INT8_C( -61), INT8_C( 81), INT8_C( 64), INT8_C( -78), INT8_C( 14), INT8_C( 125), INT8_C( 81), INT8_C( 62), INT8_C(-124), INT8_C( 39)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_epi8(INT8_C( 103), INT8_C( 49), INT8_C( 24), INT8_C( 0), INT8_C( -50), INT8_C( 64), INT8_C( 11), INT8_C( 101), INT8_C( 39), INT8_C( 41), INT8_C(-111), INT8_C( -32), INT8_C( 91), INT8_C( 86), INT8_C(-117), INT8_C( 115)), simde_mm_set_epi8(INT8_C( -44), INT8_C( 47), INT8_C( -14), INT8_C( 109), INT8_C( 44), INT8_C( 97), INT8_C( -41), INT8_C( 53), INT8_C(-121), INT8_C( -57), INT8_C( 54), INT8_C( 124), INT8_C( 50), INT8_C( -73), INT8_C( -30), INT8_C( -62)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0)) }, { simde_mm_set_epi8(INT8_C(-110), INT8_C( 5), INT8_C( 41), INT8_C( -3), INT8_C(-114), INT8_C( 14), INT8_C(-117), INT8_C( -89), INT8_C( 52), INT8_C( 62), INT8_C( 41), INT8_C( -25), INT8_C( 114), INT8_C( 56), INT8_C( 58), INT8_C( -99)), simde_mm_set_epi8(INT8_C( -31), INT8_C( -36), INT8_C(-126), INT8_C( -69), INT8_C( 113), INT8_C( -30), INT8_C( -24), INT8_C( 69), INT8_C( -15), INT8_C(-110), INT8_C( 23), INT8_C( 87), INT8_C(-127), INT8_C( -64), INT8_C( -38), INT8_C( -83)), simde_mm_set_epi8(INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1)) }, { simde_mm_set_epi8(INT8_C( -38), INT8_C( -67), INT8_C( -79), INT8_C( -41), INT8_C(-114), INT8_C( 37), INT8_C( -71), INT8_C( 11), INT8_C( 105), INT8_C( 102), INT8_C( 48), INT8_C( 127), INT8_C( 84), INT8_C( 115), INT8_C(-102), INT8_C( -24)), simde_mm_set_epi8(INT8_C( 94), INT8_C( -20), INT8_C( -97), INT8_C( -2), INT8_C(-113), INT8_C( 46), INT8_C( 123), INT8_C( -9), INT8_C( 35), INT8_C( -47), INT8_C( 90), INT8_C( -73), INT8_C(-122), INT8_C( -3), INT8_C(-116), INT8_C( -4)), simde_mm_set_epi8(INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1)) }, { simde_mm_set_epi8(INT8_C( 27), INT8_C( 41), INT8_C( 36), INT8_C( -97), INT8_C( -84), INT8_C( 108), INT8_C( -37), INT8_C( -69), INT8_C( -29), INT8_C( 45), INT8_C( 101), INT8_C( 104), INT8_C( 102), INT8_C( -85), INT8_C( 3), INT8_C( 124)), simde_mm_set_epi8(INT8_C(-119), INT8_C( 16), INT8_C( -80), INT8_C( 97), INT8_C( 97), INT8_C( -44), INT8_C( 71), INT8_C( -43), INT8_C( 39), INT8_C( -54), INT8_C( 15), INT8_C( -61), INT8_C( 100), INT8_C( -92), INT8_C( 5), INT8_C( -93)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0)) }, { simde_mm_set_epi8(INT8_C( 53), INT8_C( 82), INT8_C( -6), INT8_C( 99), INT8_C( 95), INT8_C( -34), INT8_C( -90), INT8_C( -14), INT8_C( -43), INT8_C( -72), INT8_C( -83), INT8_C(-104), INT8_C( -1), INT8_C( -60), INT8_C( 103), INT8_C( -66)), simde_mm_set_epi8(INT8_C( 13), INT8_C( 118), INT8_C( 25), INT8_C( 60), INT8_C( -83), INT8_C( -43), INT8_C( 90), INT8_C( 54), INT8_C( -84), INT8_C(-125), INT8_C( -41), INT8_C( 52), INT8_C( 18), INT8_C( 46), INT8_C( 126), INT8_C( -65)), simde_mm_set_epi8(INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1)) }, { simde_mm_set_epi8(INT8_C( 43), INT8_C( 123), INT8_C( -13), INT8_C( 35), INT8_C(-119), INT8_C( 53), INT8_C( -35), INT8_C( -46), INT8_C( 44), INT8_C( 69), INT8_C( 50), INT8_C(-120), INT8_C( 2), INT8_C( 50), INT8_C( -95), INT8_C( 46)), simde_mm_set_epi8(INT8_C( -57), INT8_C( -76), INT8_C(-104), INT8_C(-127), INT8_C( -27), INT8_C( 127), INT8_C( 127), INT8_C(-109), INT8_C( 40), INT8_C( -63), INT8_C( 87), INT8_C( -27), INT8_C( -1), INT8_C(-101), INT8_C( 11), INT8_C( 44)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i r = simde_mm_cmplt_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cmplt_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C(-19152), INT16_C( 7219), INT16_C( 8875), INT16_C(-12109), INT16_C( -6164), INT16_C(-29571), INT16_C( 29544), INT16_C( 12828)), simde_mm_set_epi16(INT16_C(-19152), INT16_C( -1176), INT16_C(-32721), INT16_C( 28268), INT16_C( 28536), INT16_C(-24890), INT16_C(-20501), INT16_C( 12828)), simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_epi16(INT16_C( -385), INT16_C( 10411), INT16_C( -4671), INT16_C( 18534), INT16_C( 18234), INT16_C( 8064), INT16_C(-32746), INT16_C( 1460)), simde_mm_set_epi16(INT16_C(-11261), INT16_C( 19475), INT16_C( -4671), INT16_C(-23700), INT16_C( 8656), INT16_C( 8064), INT16_C(-28801), INT16_C( 5582)), simde_mm_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1)) }, { simde_mm_set_epi16(INT16_C( 23112), INT16_C( 21760), INT16_C(-29652), INT16_C( -7707), INT16_C( 4438), INT16_C(-14112), INT16_C( 617), INT16_C(-29125)), simde_mm_set_epi16(INT16_C(-20847), INT16_C(-17750), INT16_C( 7413), INT16_C( 13270), INT16_C( 30220), INT16_C(-14112), INT16_C( -140), INT16_C( 23495)), simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1)) }, { simde_mm_set_epi16(INT16_C( 12262), INT16_C(-26458), INT16_C(-17793), INT16_C( 15097), INT16_C(-28884), INT16_C( -39), INT16_C( 29206), INT16_C( 24614)), simde_mm_set_epi16(INT16_C(-12392), INT16_C(-30769), INT16_C(-17793), INT16_C( 15097), INT16_C( 22525), INT16_C( 7510), INT16_C( 28529), INT16_C( -9470)), simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_epi16(INT16_C( 14581), INT16_C( 30465), INT16_C( 26611), INT16_C(-25355), INT16_C( 12222), INT16_C(-12322), INT16_C( 176), INT16_C( -4760)), simde_mm_set_epi16(INT16_C( 10242), INT16_C( 15750), INT16_C(-11513), INT16_C( 7111), INT16_C(-29171), INT16_C(-12322), INT16_C( 176), INT16_C( -4760)), simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_epi16(INT16_C(-12610), INT16_C( 30415), INT16_C(-22748), INT16_C( 8564), INT16_C(-28400), INT16_C(-22984), INT16_C(-31130), INT16_C( 2400)), simde_mm_set_epi16(INT16_C( 17489), INT16_C(-18807), INT16_C( 19401), INT16_C( -73), INT16_C(-28400), INT16_C( -7356), INT16_C( 31412), INT16_C( 2400)), simde_mm_set_epi16(INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0)) }, { simde_mm_set_epi16(INT16_C( 5617), INT16_C( -8984), INT16_C( 20729), INT16_C( 15025), INT16_C(-12038), INT16_C(-32017), INT16_C(-24693), INT16_C( -3874)), simde_mm_set_epi16(INT16_C( -8219), INT16_C( 19022), INT16_C(-28515), INT16_C( 15025), INT16_C( -2982), INT16_C( -314), INT16_C( 16536), INT16_C(-17813)), simde_mm_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0)) }, { simde_mm_set_epi16(INT16_C( 3028), INT16_C( 25056), INT16_C(-30420), INT16_C( 3400), INT16_C( 27498), INT16_C(-24168), INT16_C(-10264), INT16_C( -5651)), simde_mm_set_epi16(INT16_C( 16763), INT16_C( 3971), INT16_C(-30420), INT16_C(-13950), INT16_C( 26793), INT16_C(-27284), INT16_C( 22512), INT16_C(-19434)), simde_mm_set_epi16(INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_cmplt_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cmplt_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(-1255138253, 581685427, -403927939, 1936208412), simde_mm_set_epi32( 212007784, -2144375188, 1870175942, 1936208412), simde_mm_set_epi32( -1, 0, -1, 0) }, { simde_mm_set_epi32(-1412605706, -573136614, -789373589, 1859272017), simde_mm_set_epi32(-1412605706, -306100122, 1194991488, -2146040396), simde_mm_set_epi32( 0, -1, -1, 0) }, { simde_mm_set_epi32(-1857828629, -865462431, 1845130162, -790702535), simde_mm_set_epi32( 1020632409, -786544507, 219144900, 222814568), simde_mm_set_epi32( -1, -1, 0, -1) }, { simde_mm_set_epi32(-1366181206, 485831638, 1980524634, -9151545), simde_mm_set_epi32(-1932199485, 327347510, 706051828, -541415230), simde_mm_set_epi32( 0, 0, 0, 0) }, { simde_mm_set_epi32( 803641510, -1166066951, -1892876327, 1914069030), simde_mm_set_epi32( -812087345, -1002684270, 1476205910, 1869732610), simde_mm_set_epi32( 0, -1, -1, 0) }, { simde_mm_set_epi32(-1773657387, -1529382252, 1397468980, 1171964570), simde_mm_set_epi32( 955610881, 1744018677, 801034206, 1171964570), simde_mm_set_epi32( -1, -1, 0, 0) }, { simde_mm_set_epi32(-1807229965, -1210178631, 1522043695, -1735369601), simde_mm_set_epi32(-1560329504, 1101415557, 1311721597, 1371106332), simde_mm_set_epi32( -1, -1, 0, -1) }, { simde_mm_set_epi32( 1146205833, 1271529399, 1661264708, 2058651784), simde_mm_set_epi32( 624079870, 1320739553, -1066082248, -1119644266), simde_mm_set_epi32( 0, -1, 0, 0) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_cmplt_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cmplt_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128i r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 415.53), SIMDE_FLOAT64_C( -98.38)), simde_mm_set_pd(SIMDE_FLOAT64_C( 415.53), SIMDE_FLOAT64_C( -729.13)), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -102.02), SIMDE_FLOAT64_C( -129.13)), simde_mm_set_pd(SIMDE_FLOAT64_C( 345.59), SIMDE_FLOAT64_C( -901.28)), simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 733.11), SIMDE_FLOAT64_C( 268.99)), simde_mm_set_pd(SIMDE_FLOAT64_C( 733.11), SIMDE_FLOAT64_C( 632.42)), simde_mm_set_epi64x(INT64_C( 0), INT64_C(-1)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 988.26), SIMDE_FLOAT64_C( 0.67)), simde_mm_set_pd(SIMDE_FLOAT64_C( -735.83), SIMDE_FLOAT64_C( 857.46)), simde_mm_set_epi64x(INT64_C( 0), INT64_C(-1)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 631.80), SIMDE_FLOAT64_C( -84.12)), simde_mm_set_pd(SIMDE_FLOAT64_C( 596.99), SIMDE_FLOAT64_C( -84.12)), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 633.74), SIMDE_FLOAT64_C( 134.88)), simde_mm_set_pd(SIMDE_FLOAT64_C( -981.15), SIMDE_FLOAT64_C( -897.95)), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 995.74), SIMDE_FLOAT64_C( -864.54)), simde_mm_set_pd(SIMDE_FLOAT64_C( -773.77), SIMDE_FLOAT64_C( -294.67)), simde_mm_set_epi64x(INT64_C( 0), INT64_C(-1)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -847.57), SIMDE_FLOAT64_C( 363.82)), simde_mm_set_pd(SIMDE_FLOAT64_C( 743.31), SIMDE_FLOAT64_C( -671.22)), simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_castpd_si128(simde_mm_cmplt_pd(test_vec[i].a, test_vec[i].b)); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cmplt_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 884.89), SIMDE_FLOAT64_C( -700.86)), simde_mm_set_pd(SIMDE_FLOAT64_C( 194.09), SIMDE_FLOAT64_C( 342.08)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 884.89), SIMDE_FLOAT64_C( -700.86)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -552.29), SIMDE_FLOAT64_C( -477.43)), simde_mm_set_pd(SIMDE_FLOAT64_C( -288.53), SIMDE_FLOAT64_C( -439.96)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -552.29), SIMDE_FLOAT64_C( -477.43)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -25.82), SIMDE_FLOAT64_C( -940.19)), simde_mm_set_pd(SIMDE_FLOAT64_C( 251.57), SIMDE_FLOAT64_C( 618.81)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -25.82), SIMDE_FLOAT64_C( -940.19)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -905.83), SIMDE_FLOAT64_C( 120.16)), simde_mm_set_pd(SIMDE_FLOAT64_C( -235.64), SIMDE_FLOAT64_C( -293.77)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -905.83), SIMDE_FLOAT64_C( 120.16)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 804.90), SIMDE_FLOAT64_C( 266.33)), simde_mm_set_pd(SIMDE_FLOAT64_C( -104.58), SIMDE_FLOAT64_C( -965.81)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 804.90), SIMDE_FLOAT64_C( 266.33)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 44.09), SIMDE_FLOAT64_C( -365.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( 534.45), SIMDE_FLOAT64_C( -718.87)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 44.09), SIMDE_FLOAT64_C( -365.90)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -108.03), SIMDE_FLOAT64_C( 233.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( -420.51), SIMDE_FLOAT64_C( -879.83)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -108.03), SIMDE_FLOAT64_C( 233.20)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -399.23), SIMDE_FLOAT64_C( 758.04)), simde_mm_set_pd(SIMDE_FLOAT64_C( -334.35), SIMDE_FLOAT64_C( -250.33)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -399.23), SIMDE_FLOAT64_C( 758.04)), simde_mm_setzero_pd()) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_cmplt_sd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_equal(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpnlt_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128i r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 852.01), SIMDE_FLOAT64_C( -875.21)), simde_mm_set_pd(SIMDE_FLOAT64_C( 852.01), SIMDE_FLOAT64_C( -124.49)), simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 396.43), SIMDE_FLOAT64_C( -754.13)), simde_mm_set_pd(SIMDE_FLOAT64_C( 396.43), SIMDE_FLOAT64_C( -446.22)), simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 253.13), SIMDE_FLOAT64_C( 198.68)), simde_mm_set_pd(SIMDE_FLOAT64_C( 253.13), SIMDE_FLOAT64_C( 828.60)), simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 137.84), SIMDE_FLOAT64_C( -995.54)), simde_mm_set_pd(SIMDE_FLOAT64_C( 137.84), SIMDE_FLOAT64_C( -366.89)), simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 981.94), SIMDE_FLOAT64_C( -371.83)), simde_mm_set_pd(SIMDE_FLOAT64_C( -999.24), SIMDE_FLOAT64_C( 567.77)), simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 471.23), SIMDE_FLOAT64_C( -984.85)), simde_mm_set_pd(SIMDE_FLOAT64_C( -365.65), SIMDE_FLOAT64_C( 102.67)), simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -625.86), SIMDE_FLOAT64_C( -91.22)), simde_mm_set_pd(SIMDE_FLOAT64_C( -928.96), SIMDE_FLOAT64_C( -311.29)), simde_mm_set_epi64x(INT64_C(-1), INT64_C(-1)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -444.22), SIMDE_FLOAT64_C( 458.27)), simde_mm_set_pd(SIMDE_FLOAT64_C( 882.56), SIMDE_FLOAT64_C( 290.13)), simde_mm_set_epi64x(INT64_C( 0), INT64_C(-1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_castpd_si128(simde_mm_cmpnlt_pd(test_vec[i].a, test_vec[i].b)); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cmpnlt_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -713.31), SIMDE_FLOAT64_C( -162.56)), simde_mm_set_pd(SIMDE_FLOAT64_C( -134.78), SIMDE_FLOAT64_C( -333.93)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -713.31), SIMDE_FLOAT64_C( -162.56)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 903.93), SIMDE_FLOAT64_C( 249.58)), simde_mm_set_pd(SIMDE_FLOAT64_C( 300.72), SIMDE_FLOAT64_C( -642.46)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 903.93), SIMDE_FLOAT64_C( 249.58)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -697.34), SIMDE_FLOAT64_C( 79.67)), simde_mm_set_pd(SIMDE_FLOAT64_C( -123.52), SIMDE_FLOAT64_C( -418.48)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -697.34), SIMDE_FLOAT64_C( 79.67)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 90.71), SIMDE_FLOAT64_C( -449.42)), simde_mm_set_pd(SIMDE_FLOAT64_C( 629.69), SIMDE_FLOAT64_C( 449.98)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 90.71), SIMDE_FLOAT64_C( -449.42)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 751.69), SIMDE_FLOAT64_C( -170.45)), simde_mm_set_pd(SIMDE_FLOAT64_C( -991.25), SIMDE_FLOAT64_C( 129.62)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 751.69), SIMDE_FLOAT64_C( -170.45)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 961.53), SIMDE_FLOAT64_C( -601.03)), simde_mm_set_pd(SIMDE_FLOAT64_C( -458.00), SIMDE_FLOAT64_C( -521.61)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 961.53), SIMDE_FLOAT64_C( -601.03)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -697.78), SIMDE_FLOAT64_C( 908.22)), simde_mm_set_pd(SIMDE_FLOAT64_C( -418.87), SIMDE_FLOAT64_C( 253.38)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -697.78), SIMDE_FLOAT64_C( 908.22)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 897.84), SIMDE_FLOAT64_C( 98.86)), simde_mm_set_pd(SIMDE_FLOAT64_C( 743.55), SIMDE_FLOAT64_C( -417.08)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 897.84), SIMDE_FLOAT64_C( 98.86)), simde_x_mm_setone_pd()) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_cmpnlt_sd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_equal(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmple_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128i r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 543.54), SIMDE_FLOAT64_C( -463.43)), simde_mm_set_pd(SIMDE_FLOAT64_C( 803.80), SIMDE_FLOAT64_C( -383.88)), simde_mm_set_epi64x(INT64_C(-1), INT64_C(-1)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -420.41), SIMDE_FLOAT64_C( 497.43)), simde_mm_set_pd(SIMDE_FLOAT64_C( -592.95), SIMDE_FLOAT64_C( -224.51)), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -921.01), SIMDE_FLOAT64_C( -601.69)), simde_mm_set_pd(SIMDE_FLOAT64_C( -921.01), SIMDE_FLOAT64_C( -730.20)), simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -456.56), SIMDE_FLOAT64_C( 380.21)), simde_mm_set_pd(SIMDE_FLOAT64_C( -456.56), SIMDE_FLOAT64_C( 380.21)), simde_mm_set_epi64x(INT64_C(-1), INT64_C(-1)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 317.08), SIMDE_FLOAT64_C( 136.54)), simde_mm_set_pd(SIMDE_FLOAT64_C( 944.53), SIMDE_FLOAT64_C( 370.42)), simde_mm_set_epi64x(INT64_C(-1), INT64_C(-1)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -193.09), SIMDE_FLOAT64_C( 515.21)), simde_mm_set_pd(SIMDE_FLOAT64_C( -63.27), SIMDE_FLOAT64_C( 515.21)), simde_mm_set_epi64x(INT64_C(-1), INT64_C(-1)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 14.93), SIMDE_FLOAT64_C( 166.91)), simde_mm_set_pd(SIMDE_FLOAT64_C( 14.93), SIMDE_FLOAT64_C( -633.50)), simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 121.98), SIMDE_FLOAT64_C( -542.50)), simde_mm_set_pd(SIMDE_FLOAT64_C( 121.98), SIMDE_FLOAT64_C( -244.93)), simde_mm_set_epi64x(INT64_C(-1), INT64_C(-1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_castpd_si128(simde_mm_cmple_pd(test_vec[i].a, test_vec[i].b)); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cmple_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -6.97), SIMDE_FLOAT64_C( -531.93)), simde_mm_set_pd(SIMDE_FLOAT64_C( 442.04), SIMDE_FLOAT64_C( 237.56)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -6.97), SIMDE_FLOAT64_C( -531.93)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 953.55), SIMDE_FLOAT64_C( -668.52)), simde_mm_set_pd(SIMDE_FLOAT64_C( 75.21), SIMDE_FLOAT64_C( -841.44)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 953.55), SIMDE_FLOAT64_C( -668.52)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -962.44), SIMDE_FLOAT64_C( 733.31)), simde_mm_set_pd(SIMDE_FLOAT64_C( 366.34), SIMDE_FLOAT64_C( 744.84)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -962.44), SIMDE_FLOAT64_C( 733.31)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 546.64), SIMDE_FLOAT64_C( 333.17)), simde_mm_set_pd(SIMDE_FLOAT64_C( 540.77), SIMDE_FLOAT64_C( -0.80)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 546.64), SIMDE_FLOAT64_C( 333.17)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -407.18), SIMDE_FLOAT64_C( -763.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( 973.34), SIMDE_FLOAT64_C( -496.03)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -407.18), SIMDE_FLOAT64_C( -763.20)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 407.82), SIMDE_FLOAT64_C( 479.81)), simde_mm_set_pd(SIMDE_FLOAT64_C( 198.41), SIMDE_FLOAT64_C( 710.05)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 407.82), SIMDE_FLOAT64_C( 479.81)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -282.72), SIMDE_FLOAT64_C( -348.78)), simde_mm_set_pd(SIMDE_FLOAT64_C( 165.84), SIMDE_FLOAT64_C( -951.18)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -282.72), SIMDE_FLOAT64_C( -348.78)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 642.13), SIMDE_FLOAT64_C( -574.77)), simde_mm_set_pd(SIMDE_FLOAT64_C( -633.14), SIMDE_FLOAT64_C( 741.95)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 642.13), SIMDE_FLOAT64_C( -574.77)), simde_x_mm_setone_pd()) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_cmple_sd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_equal(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpnle_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128i r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -50.93), SIMDE_FLOAT64_C( -877.25)), simde_mm_set_pd(SIMDE_FLOAT64_C( -50.93), SIMDE_FLOAT64_C( 61.42)), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 567.19), SIMDE_FLOAT64_C( 768.82)), simde_mm_set_pd(SIMDE_FLOAT64_C( -689.51), SIMDE_FLOAT64_C( 768.82)), simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 625.00), SIMDE_FLOAT64_C( 979.36)), simde_mm_set_pd(SIMDE_FLOAT64_C( 59.83), SIMDE_FLOAT64_C( 979.36)), simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -994.58), SIMDE_FLOAT64_C( 130.45)), simde_mm_set_pd(SIMDE_FLOAT64_C( -720.49), SIMDE_FLOAT64_C( 130.45)), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 889.79), SIMDE_FLOAT64_C( -677.25)), simde_mm_set_pd(SIMDE_FLOAT64_C( 889.79), SIMDE_FLOAT64_C( -677.25)), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 802.11), SIMDE_FLOAT64_C( -926.46)), simde_mm_set_pd(SIMDE_FLOAT64_C( -136.48), SIMDE_FLOAT64_C( -926.46)), simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -579.79), SIMDE_FLOAT64_C( 368.31)), simde_mm_set_pd(SIMDE_FLOAT64_C( -579.79), SIMDE_FLOAT64_C( -736.86)), simde_mm_set_epi64x(INT64_C( 0), INT64_C(-1)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 408.86), SIMDE_FLOAT64_C( 63.85)), simde_mm_set_pd(SIMDE_FLOAT64_C( 408.86), SIMDE_FLOAT64_C( 878.02)), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_castpd_si128(simde_mm_cmpnle_pd(test_vec[i].a, test_vec[i].b)); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cmpnle_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 863.30), SIMDE_FLOAT64_C( 817.71)), simde_mm_set_pd(SIMDE_FLOAT64_C( 465.11), SIMDE_FLOAT64_C( 402.99)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 863.30), SIMDE_FLOAT64_C( 817.71)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 189.40), SIMDE_FLOAT64_C( -607.91)), simde_mm_set_pd(SIMDE_FLOAT64_C( -476.72), SIMDE_FLOAT64_C( -670.93)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 189.40), SIMDE_FLOAT64_C( -607.91)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -333.27), SIMDE_FLOAT64_C( 662.88)), simde_mm_set_pd(SIMDE_FLOAT64_C( 741.44), SIMDE_FLOAT64_C( -212.71)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -333.27), SIMDE_FLOAT64_C( 662.88)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 426.15), SIMDE_FLOAT64_C( -964.01)), simde_mm_set_pd(SIMDE_FLOAT64_C( 54.04), SIMDE_FLOAT64_C( 321.51)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 426.15), SIMDE_FLOAT64_C( -964.01)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -797.48), SIMDE_FLOAT64_C( 851.48)), simde_mm_set_pd(SIMDE_FLOAT64_C( 907.15), SIMDE_FLOAT64_C( 638.76)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -797.48), SIMDE_FLOAT64_C( 851.48)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 439.23), SIMDE_FLOAT64_C( 238.01)), simde_mm_set_pd(SIMDE_FLOAT64_C( -23.09), SIMDE_FLOAT64_C( 160.20)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 439.23), SIMDE_FLOAT64_C( 238.01)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 537.28), SIMDE_FLOAT64_C( 982.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( 303.40), SIMDE_FLOAT64_C( 928.78)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 537.28), SIMDE_FLOAT64_C( 982.90)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -169.84), SIMDE_FLOAT64_C( -696.10)), simde_mm_set_pd(SIMDE_FLOAT64_C( -302.24), SIMDE_FLOAT64_C( -382.83)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -169.84), SIMDE_FLOAT64_C( -696.10)), simde_mm_setzero_pd()) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_cmpnle_sd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_equal(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpgt_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( 13), INT8_C( -99), INT8_C(-128), INT8_C( 91), INT8_C( -96), INT8_C( 103), INT8_C(-104), INT8_C(-110), INT8_C( -46), INT8_C( -5), INT8_C( 62), INT8_C(-125), INT8_C( -51), INT8_C( -65), INT8_C(-102), INT8_C( -14)), simde_mm_set_epi8(INT8_C( 10), INT8_C( -84), INT8_C( 90), INT8_C(-110), INT8_C( 113), INT8_C( -34), INT8_C( -75), INT8_C(-110), INT8_C( -79), INT8_C(-114), INT8_C( 26), INT8_C(-127), INT8_C( -5), INT8_C( -9), INT8_C(-102), INT8_C( -38)), simde_mm_set_epi8(INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1)) }, { simde_mm_set_epi8(INT8_C(-110), INT8_C( -93), INT8_C( 106), INT8_C( -55), INT8_C( 91), INT8_C( -78), INT8_C( 69), INT8_C( 62), INT8_C( 38), INT8_C(-101), INT8_C( 86), INT8_C(-107), INT8_C( 114), INT8_C( 120), INT8_C(-118), INT8_C( 101)), simde_mm_set_epi8(INT8_C( 58), INT8_C( -88), INT8_C( 75), INT8_C( -55), INT8_C( 92), INT8_C( 51), INT8_C(-109), INT8_C( 62), INT8_C( 123), INT8_C( -42), INT8_C( 0), INT8_C( 40), INT8_C( 114), INT8_C(-115), INT8_C( 34), INT8_C( 101)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_epi8(INT8_C( 79), INT8_C( -2), INT8_C(-126), INT8_C(-121), INT8_C( 71), INT8_C( -59), INT8_C( 95), INT8_C( 38), INT8_C( -95), INT8_C( 103), INT8_C( -55), INT8_C( -42), INT8_C(-124), INT8_C( -82), INT8_C( 102), INT8_C( 97)), simde_mm_set_epi8(INT8_C( -39), INT8_C( -59), INT8_C(-126), INT8_C(-107), INT8_C(-111), INT8_C( 122), INT8_C( -55), INT8_C( 87), INT8_C( -95), INT8_C( -99), INT8_C( 56), INT8_C( 120), INT8_C( 107), INT8_C( -79), INT8_C( -9), INT8_C( -36)), simde_mm_set_epi8(INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1)) }, { simde_mm_set_epi8(INT8_C( -68), INT8_C( 7), INT8_C( -32), INT8_C( 120), INT8_C(-106), INT8_C(-127), INT8_C( 37), INT8_C( 95), INT8_C( -77), INT8_C(-126), INT8_C(-111), INT8_C( -96), INT8_C( 67), INT8_C( 43), INT8_C(-123), INT8_C( 21)), simde_mm_set_epi8(INT8_C( 72), INT8_C( 68), INT8_C( 76), INT8_C( -22), INT8_C( -11), INT8_C( 34), INT8_C( 112), INT8_C( 95), INT8_C( -77), INT8_C( 36), INT8_C( 119), INT8_C( -59), INT8_C( -49), INT8_C( -22), INT8_C(-125), INT8_C( 21)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0)) }, { simde_mm_set_epi8(INT8_C( -18), INT8_C( 13), INT8_C( 66), INT8_C( -52), INT8_C( -92), INT8_C( 28), INT8_C(-122), INT8_C( -12), INT8_C( -60), INT8_C( 125), INT8_C(-104), INT8_C(-118), INT8_C( -76), INT8_C( 42), INT8_C( -48), INT8_C(-120)), simde_mm_set_epi8(INT8_C( -17), INT8_C( 13), INT8_C( 66), INT8_C( -64), INT8_C( -92), INT8_C( 114), INT8_C(-119), INT8_C(-106), INT8_C( 78), INT8_C(-125), INT8_C( 88), INT8_C( -88), INT8_C( 101), INT8_C( 42), INT8_C( -58), INT8_C( -8)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0)) }, { simde_mm_set_epi8(INT8_C(-112), INT8_C( -16), INT8_C( 96), INT8_C( -64), INT8_C( 75), INT8_C( 56), INT8_C( -96), INT8_C( 96), INT8_C( -8), INT8_C( 16), INT8_C( 95), INT8_C( 41), INT8_C( 62), INT8_C( -2), INT8_C(-105), INT8_C(-101)), simde_mm_set_epi8(INT8_C( 42), INT8_C( 7), INT8_C( 90), INT8_C( -93), INT8_C( 75), INT8_C( 14), INT8_C( -5), INT8_C( 61), INT8_C( -8), INT8_C( -49), INT8_C( 95), INT8_C( 82), INT8_C( -93), INT8_C( -80), INT8_C( 6), INT8_C( -48)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_epi8(INT8_C( -56), INT8_C( -85), INT8_C( 9), INT8_C( 3), INT8_C( 32), INT8_C(-105), INT8_C( 93), INT8_C( -78), INT8_C(-113), INT8_C( 96), INT8_C( 61), INT8_C( 14), INT8_C( -92), INT8_C( 53), INT8_C( 51), INT8_C( -7)), simde_mm_set_epi8(INT8_C( 15), INT8_C( 100), INT8_C( 9), INT8_C( 70), INT8_C(-115), INT8_C(-105), INT8_C( 14), INT8_C( -41), INT8_C(-113), INT8_C( -54), INT8_C( -38), INT8_C( 14), INT8_C( -53), INT8_C( 5), INT8_C(-127), INT8_C( -7)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0)) }, { simde_mm_set_epi8(INT8_C( 120), INT8_C( 38), INT8_C( 44), INT8_C( 103), INT8_C( 33), INT8_C( -93), INT8_C(-102), INT8_C( -46), INT8_C( 47), INT8_C( 7), INT8_C( 120), INT8_C( 102), INT8_C( -87), INT8_C( -84), INT8_C( 92), INT8_C( 87)), simde_mm_set_epi8(INT8_C( -11), INT8_C( 89), INT8_C( 26), INT8_C( 69), INT8_C( 108), INT8_C( 127), INT8_C(-102), INT8_C( 49), INT8_C( 53), INT8_C( 57), INT8_C( 120), INT8_C( -23), INT8_C( -87), INT8_C( -84), INT8_C( 113), INT8_C( -36)), simde_mm_set_epi8(INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i r = simde_mm_cmpgt_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cmpgt_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C( 11481), INT16_C(-31028), INT16_C(-28938), INT16_C( 3434), INT16_C( 2523), INT16_C(-16298), INT16_C(-20752), INT16_C( -3418)), simde_mm_set_epi16(INT16_C( 11481), INT16_C(-30562), INT16_C( 4762), INT16_C( -6519), INT16_C( 2523), INT16_C( 9845), INT16_C( -18), INT16_C( -5787)), simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1)) }, { simde_mm_set_epi16(INT16_C( -3487), INT16_C( -2281), INT16_C( 2722), INT16_C(-23699), INT16_C( -5087), INT16_C( 24907), INT16_C( 26126), INT16_C( 26357)), simde_mm_set_epi16(INT16_C( 32178), INT16_C(-24562), INT16_C( -3261), INT16_C(-23699), INT16_C( 2431), INT16_C(-16600), INT16_C( -5679), INT16_C(-12625)), simde_mm_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1)) }, { simde_mm_set_epi16(INT16_C( 28908), INT16_C( -8639), INT16_C(-27999), INT16_C(-19726), INT16_C( 28446), INT16_C( -947), INT16_C( -9756), INT16_C(-32088)), simde_mm_set_epi16(INT16_C(-24056), INT16_C(-13026), INT16_C(-27999), INT16_C( 27584), INT16_C(-22292), INT16_C( 18403), INT16_C(-15329), INT16_C( 30515)), simde_mm_set_epi16(INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0)) }, { simde_mm_set_epi16(INT16_C( 32500), INT16_C( 28770), INT16_C(-12789), INT16_C( 764), INT16_C(-17186), INT16_C( 5823), INT16_C( 5923), INT16_C(-14898)), simde_mm_set_epi16(INT16_C( 5264), INT16_C(-27897), INT16_C(-22472), INT16_C(-17764), INT16_C( 20191), INT16_C( 20077), INT16_C(-20539), INT16_C( -7345)), simde_mm_set_epi16(INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0)) }, { simde_mm_set_epi16(INT16_C(-32420), INT16_C(-10018), INT16_C( 10034), INT16_C( 21195), INT16_C( 23576), INT16_C( 23578), INT16_C( 27261), INT16_C( 22728)), simde_mm_set_epi16(INT16_C(-22785), INT16_C( 9581), INT16_C( -7653), INT16_C(-22519), INT16_C( 2089), INT16_C( 10927), INT16_C( 31136), INT16_C( 28081)), simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_epi16(INT16_C( -8613), INT16_C( 14814), INT16_C( 25977), INT16_C(-32026), INT16_C(-14164), INT16_C( 15788), INT16_C( 26276), INT16_C(-23351)), simde_mm_set_epi16(INT16_C( 18907), INT16_C( 31050), INT16_C( 25483), INT16_C( -1544), INT16_C(-22377), INT16_C(-30002), INT16_C( 26276), INT16_C(-21368)), simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_epi16(INT16_C( -8616), INT16_C( 18300), INT16_C(-13448), INT16_C(-25384), INT16_C(-20778), INT16_C( 9404), INT16_C( 18457), INT16_C(-13013)), simde_mm_set_epi16(INT16_C( 28965), INT16_C(-22807), INT16_C( 20081), INT16_C(-25384), INT16_C( 21664), INT16_C(-19420), INT16_C(-10494), INT16_C( 8092)), simde_mm_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0)) }, { simde_mm_set_epi16(INT16_C(-19643), INT16_C( 19578), INT16_C(-31344), INT16_C(-10120), INT16_C( -1042), INT16_C( 26214), INT16_C( 7476), INT16_C( 19171)), simde_mm_set_epi16(INT16_C( 3338), INT16_C(-31811), INT16_C( 23264), INT16_C( 16135), INT16_C( 10963), INT16_C( 28585), INT16_C( 10267), INT16_C( 15982)), simde_mm_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_cmpgt_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cmpgt_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32( 752453324, -1896477334, 165396566, -1359940954), simde_mm_set_epi32( 752453324, 312141449, -1431099787, -1119899), simde_mm_set_epi32( 0, 0, -1, 0) }, { simde_mm_set_epi32( 107153560, 1681238316, -2021152487, -1327623679), simde_mm_set_epi32( -228460777, 178430829, -333356725, 1712219893), simde_mm_set_epi32( -1, -1, 0, 0) }, { simde_mm_set_epi32( -899341348, -1183976764, 50756911, -774436817), simde_mm_set_epi32( -899341348, -1675909702, 50756911, 393145285), simde_mm_set_epi32( 0, -1, 0, 0) }, { simde_mm_set_epi32(-1576481506, 693332928, -1460910109, -1004570829), simde_mm_set_epi32(-1038801032, -1159952439, -1460910109, -43665635), simde_mm_set_epi32( 0, -1, 0, 0) }, { simde_mm_set_epi32( 2129948770, -838139140, -1126295873, 388220366), simde_mm_set_epi32( 345019143, -1472677220, 1323257453, -1345985713), simde_mm_set_epi32( -1, -1, 0, -1) }, { simde_mm_set_epi32( 324758156, 1228690576, -1773311089, 254589418), simde_mm_set_epi32(-2124621602, 1228690576, 1545100314, 1786599624), simde_mm_set_epi32( -1, 0, 0, 0) }, { simde_mm_set_epi32(-1939857174, 351576089, 62939556, -1061610170), simde_mm_set_epi32(-1899113305, 1851167226, 62939556, -2109881445), simde_mm_set_epi32( 0, 0, 0, -1) }, { simde_mm_set_epi32( 1239120202, 1670117880, -1466463538, 1932307592), simde_mm_set_epi32( 1694384857, 79202881, -114087446, -617386644), simde_mm_set_epi32( 0, -1, 0, -1) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_cmpgt_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cmpgt_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -649.61), SIMDE_FLOAT64_C( 366.73)), simde_mm_set_pd(SIMDE_FLOAT64_C( 333.59), SIMDE_FLOAT64_C( 116.88)), simde_mm_move_sd(simde_mm_setzero_pd(), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -619.22), SIMDE_FLOAT64_C( -854.65)), simde_mm_set_pd(SIMDE_FLOAT64_C( -854.79), SIMDE_FLOAT64_C( 863.33)), simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -950.10), SIMDE_FLOAT64_C( 381.78)), simde_mm_set_pd(SIMDE_FLOAT64_C( 844.77), SIMDE_FLOAT64_C( -217.11)), simde_mm_move_sd(simde_mm_setzero_pd(), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -925.79), SIMDE_FLOAT64_C( -916.91)), simde_mm_set_pd(SIMDE_FLOAT64_C( -17.99), SIMDE_FLOAT64_C( 826.72)), simde_mm_setzero_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 581.21), SIMDE_FLOAT64_C( 639.37)), simde_mm_set_pd(SIMDE_FLOAT64_C( 581.21), SIMDE_FLOAT64_C( 448.67)), simde_mm_move_sd(simde_mm_setzero_pd(), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 702.29), SIMDE_FLOAT64_C( -582.84)), simde_mm_set_pd(SIMDE_FLOAT64_C( 702.29), SIMDE_FLOAT64_C( 186.24)), simde_mm_setzero_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 532.21), SIMDE_FLOAT64_C( 145.56)), simde_mm_set_pd(SIMDE_FLOAT64_C( -677.14), SIMDE_FLOAT64_C( 145.56)), simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 459.86), SIMDE_FLOAT64_C( 265.89)), simde_mm_set_pd(SIMDE_FLOAT64_C( -130.43), SIMDE_FLOAT64_C( 334.48)), simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_cmpgt_pd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_equal(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpgt_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 482.46), SIMDE_FLOAT64_C( 39.32)), simde_mm_set_pd(SIMDE_FLOAT64_C( 175.75), SIMDE_FLOAT64_C( -451.08)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 482.46), SIMDE_FLOAT64_C( 39.32)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 850.79), SIMDE_FLOAT64_C( 999.92)), simde_mm_set_pd(SIMDE_FLOAT64_C( -978.35), SIMDE_FLOAT64_C( 216.37)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 850.79), SIMDE_FLOAT64_C( 999.92)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -218.27), SIMDE_FLOAT64_C( 952.36)), simde_mm_set_pd(SIMDE_FLOAT64_C( -402.87), SIMDE_FLOAT64_C( -852.22)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -218.27), SIMDE_FLOAT64_C( 952.36)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -324.97), SIMDE_FLOAT64_C( -18.67)), simde_mm_set_pd(SIMDE_FLOAT64_C( -602.36), SIMDE_FLOAT64_C( 488.60)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -324.97), SIMDE_FLOAT64_C( -18.67)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -224.88), SIMDE_FLOAT64_C( 278.88)), simde_mm_set_pd(SIMDE_FLOAT64_C( 861.73), SIMDE_FLOAT64_C( -326.54)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -224.88), SIMDE_FLOAT64_C( 278.88)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -160.74), SIMDE_FLOAT64_C( 611.30)), simde_mm_set_pd(SIMDE_FLOAT64_C( 370.13), SIMDE_FLOAT64_C( 18.16)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -160.74), SIMDE_FLOAT64_C( 611.30)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 914.20), SIMDE_FLOAT64_C( 278.69)), simde_mm_set_pd(SIMDE_FLOAT64_C( 703.64), SIMDE_FLOAT64_C( -975.84)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 914.20), SIMDE_FLOAT64_C( 278.69)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 392.89), SIMDE_FLOAT64_C( 45.41)), simde_mm_set_pd(SIMDE_FLOAT64_C( 713.78), SIMDE_FLOAT64_C( -6.71)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 392.89), SIMDE_FLOAT64_C( 45.41)), simde_x_mm_setone_pd()) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_cmpgt_sd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_equal(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpngt_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 65.48), SIMDE_FLOAT64_C( -195.60)), simde_mm_set_pd(SIMDE_FLOAT64_C( 65.48), SIMDE_FLOAT64_C( 18.27)), simde_mm_move_sd(simde_x_mm_setone_pd(), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -563.65), SIMDE_FLOAT64_C( 884.03)), simde_mm_set_pd(SIMDE_FLOAT64_C( 467.71), SIMDE_FLOAT64_C( -906.63)), simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -553.85), SIMDE_FLOAT64_C( 49.09)), simde_mm_set_pd(SIMDE_FLOAT64_C( 731.88), SIMDE_FLOAT64_C( 974.91)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 492.98), SIMDE_FLOAT64_C( 64.21)), simde_mm_set_pd(SIMDE_FLOAT64_C( -392.36), SIMDE_FLOAT64_C( -188.43)), simde_mm_setzero_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -248.75), SIMDE_FLOAT64_C( -404.39)), simde_mm_set_pd(SIMDE_FLOAT64_C( -495.92), SIMDE_FLOAT64_C( -819.81)), simde_mm_setzero_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -932.57), SIMDE_FLOAT64_C( 741.27)), simde_mm_set_pd(SIMDE_FLOAT64_C( -307.42), SIMDE_FLOAT64_C( 170.69)), simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -939.12), SIMDE_FLOAT64_C( -161.45)), simde_mm_set_pd(SIMDE_FLOAT64_C( -939.12), SIMDE_FLOAT64_C( -161.45)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -900.20), SIMDE_FLOAT64_C( -314.63)), simde_mm_set_pd(SIMDE_FLOAT64_C( 138.12), SIMDE_FLOAT64_C( 517.19)), simde_x_mm_setone_pd() } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_cmpngt_pd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_equal(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpngt_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -695.66), SIMDE_FLOAT64_C( 678.35)), simde_mm_set_pd(SIMDE_FLOAT64_C( 356.43), SIMDE_FLOAT64_C( 495.31)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -695.66), SIMDE_FLOAT64_C( 678.35)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -72.63), SIMDE_FLOAT64_C( 895.56)), simde_mm_set_pd(SIMDE_FLOAT64_C( -885.88), SIMDE_FLOAT64_C( 947.04)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -72.63), SIMDE_FLOAT64_C( 895.56)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 72.92), SIMDE_FLOAT64_C( -711.12)), simde_mm_set_pd(SIMDE_FLOAT64_C( -242.49), SIMDE_FLOAT64_C( -686.51)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 72.92), SIMDE_FLOAT64_C( -711.12)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 520.17), SIMDE_FLOAT64_C( 176.32)), simde_mm_set_pd(SIMDE_FLOAT64_C( -442.78), SIMDE_FLOAT64_C( -956.19)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 520.17), SIMDE_FLOAT64_C( 176.32)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 949.11), SIMDE_FLOAT64_C( 112.35)), simde_mm_set_pd(SIMDE_FLOAT64_C( -212.07), SIMDE_FLOAT64_C( 851.84)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 949.11), SIMDE_FLOAT64_C( 112.35)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -142.01), SIMDE_FLOAT64_C( -216.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( -467.63), SIMDE_FLOAT64_C( 481.36)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -142.01), SIMDE_FLOAT64_C( -216.70)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 918.90), SIMDE_FLOAT64_C( 481.59)), simde_mm_set_pd(SIMDE_FLOAT64_C( -147.11), SIMDE_FLOAT64_C( 677.03)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 918.90), SIMDE_FLOAT64_C( 481.59)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -143.49), SIMDE_FLOAT64_C( 447.22)), simde_mm_set_pd(SIMDE_FLOAT64_C( 50.06), SIMDE_FLOAT64_C( 827.25)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -143.49), SIMDE_FLOAT64_C( 447.22)), simde_x_mm_setone_pd()) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_cmpngt_sd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_equal(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpge_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -649.61), SIMDE_FLOAT64_C( 366.73)), simde_mm_set_pd(SIMDE_FLOAT64_C( 333.59), SIMDE_FLOAT64_C( 116.88)), simde_mm_move_sd(simde_mm_setzero_pd(), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -619.22), SIMDE_FLOAT64_C( -854.65)), simde_mm_set_pd(SIMDE_FLOAT64_C( -854.79), SIMDE_FLOAT64_C( 863.33)), simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -950.10), SIMDE_FLOAT64_C( 381.78)), simde_mm_set_pd(SIMDE_FLOAT64_C( 844.77), SIMDE_FLOAT64_C( -217.11)), simde_mm_move_sd(simde_mm_setzero_pd(), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -925.79), SIMDE_FLOAT64_C( -916.91)), simde_mm_set_pd(SIMDE_FLOAT64_C( -17.99), SIMDE_FLOAT64_C( 826.72)), simde_mm_setzero_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 581.21), SIMDE_FLOAT64_C( 639.37)), simde_mm_set_pd(SIMDE_FLOAT64_C( 581.21), SIMDE_FLOAT64_C( 448.67)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 702.29), SIMDE_FLOAT64_C( -582.84)), simde_mm_set_pd(SIMDE_FLOAT64_C( 702.29), SIMDE_FLOAT64_C( 186.24)), simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 532.21), SIMDE_FLOAT64_C( 145.56)), simde_mm_set_pd(SIMDE_FLOAT64_C( -677.14), SIMDE_FLOAT64_C( 145.56)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 459.86), SIMDE_FLOAT64_C( 265.89)), simde_mm_set_pd(SIMDE_FLOAT64_C( -130.43), SIMDE_FLOAT64_C( 334.48)), simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_cmpge_pd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_equal(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpge_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -315.45), SIMDE_FLOAT64_C( 193.79)), simde_mm_set_pd(SIMDE_FLOAT64_C( -204.45), SIMDE_FLOAT64_C( 887.13)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -315.45), SIMDE_FLOAT64_C( 193.79)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -670.47), SIMDE_FLOAT64_C( 937.31)), simde_mm_set_pd(SIMDE_FLOAT64_C( 343.22), SIMDE_FLOAT64_C( -308.01)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -670.47), SIMDE_FLOAT64_C( 937.31)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -903.63), SIMDE_FLOAT64_C( -850.53)), simde_mm_set_pd(SIMDE_FLOAT64_C( -838.64), SIMDE_FLOAT64_C( -936.46)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -903.63), SIMDE_FLOAT64_C( -850.53)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 236.33), SIMDE_FLOAT64_C( 126.98)), simde_mm_set_pd(SIMDE_FLOAT64_C( 872.82), SIMDE_FLOAT64_C( -512.42)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 236.33), SIMDE_FLOAT64_C( 126.98)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 811.87), SIMDE_FLOAT64_C( -15.62)), simde_mm_set_pd(SIMDE_FLOAT64_C( -983.99), SIMDE_FLOAT64_C( 351.32)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 811.87), SIMDE_FLOAT64_C( -15.62)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 42.47), SIMDE_FLOAT64_C( -523.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( 286.68), SIMDE_FLOAT64_C( 254.00)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 42.47), SIMDE_FLOAT64_C( -523.00)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -219.91), SIMDE_FLOAT64_C( -253.29)), simde_mm_set_pd(SIMDE_FLOAT64_C( -554.73), SIMDE_FLOAT64_C( 225.44)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -219.91), SIMDE_FLOAT64_C( -253.29)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -901.30), SIMDE_FLOAT64_C( -538.38)), simde_mm_set_pd(SIMDE_FLOAT64_C( -584.99), SIMDE_FLOAT64_C( 91.26)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -901.30), SIMDE_FLOAT64_C( -538.38)), simde_mm_setzero_pd()) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_cmpge_sd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_equal(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpnge_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 65.48), SIMDE_FLOAT64_C( -195.60)), simde_mm_set_pd(SIMDE_FLOAT64_C( 65.48), SIMDE_FLOAT64_C( 18.27)), simde_mm_move_sd(simde_mm_setzero_pd(), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -563.65), SIMDE_FLOAT64_C( 884.03)), simde_mm_set_pd(SIMDE_FLOAT64_C( 467.71), SIMDE_FLOAT64_C( -906.63)), simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -553.85), SIMDE_FLOAT64_C( 49.09)), simde_mm_set_pd(SIMDE_FLOAT64_C( 731.88), SIMDE_FLOAT64_C( 974.91)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 492.98), SIMDE_FLOAT64_C( 64.21)), simde_mm_set_pd(SIMDE_FLOAT64_C( -392.36), SIMDE_FLOAT64_C( -188.43)), simde_mm_setzero_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -248.75), SIMDE_FLOAT64_C( -404.39)), simde_mm_set_pd(SIMDE_FLOAT64_C( -495.92), SIMDE_FLOAT64_C( -819.81)), simde_mm_setzero_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -932.57), SIMDE_FLOAT64_C( 741.27)), simde_mm_set_pd(SIMDE_FLOAT64_C( -307.42), SIMDE_FLOAT64_C( 170.69)), simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -939.12), SIMDE_FLOAT64_C( -161.45)), simde_mm_set_pd(SIMDE_FLOAT64_C( -939.12), SIMDE_FLOAT64_C( -161.45)), simde_mm_setzero_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -900.20), SIMDE_FLOAT64_C( -314.63)), simde_mm_set_pd(SIMDE_FLOAT64_C( 138.12), SIMDE_FLOAT64_C( 517.19)), simde_x_mm_setone_pd() } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_cmpnge_pd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_equal(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpnge_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -695.66), SIMDE_FLOAT64_C( 678.35)), simde_mm_set_pd(SIMDE_FLOAT64_C( 356.43), SIMDE_FLOAT64_C( 495.31)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -695.66), SIMDE_FLOAT64_C( 678.35)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -72.63), SIMDE_FLOAT64_C( 895.56)), simde_mm_set_pd(SIMDE_FLOAT64_C( -885.88), SIMDE_FLOAT64_C( 947.04)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -72.63), SIMDE_FLOAT64_C( 895.56)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 72.92), SIMDE_FLOAT64_C( -711.12)), simde_mm_set_pd(SIMDE_FLOAT64_C( -242.49), SIMDE_FLOAT64_C( -686.51)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 72.92), SIMDE_FLOAT64_C( -711.12)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 520.17), SIMDE_FLOAT64_C( 176.32)), simde_mm_set_pd(SIMDE_FLOAT64_C( -442.78), SIMDE_FLOAT64_C( -956.19)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 520.17), SIMDE_FLOAT64_C( 176.32)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 949.11), SIMDE_FLOAT64_C( 112.35)), simde_mm_set_pd(SIMDE_FLOAT64_C( -212.07), SIMDE_FLOAT64_C( 851.84)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 949.11), SIMDE_FLOAT64_C( 112.35)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -142.01), SIMDE_FLOAT64_C( -216.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( -467.63), SIMDE_FLOAT64_C( 481.36)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -142.01), SIMDE_FLOAT64_C( -216.70)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 918.90), SIMDE_FLOAT64_C( 481.59)), simde_mm_set_pd(SIMDE_FLOAT64_C( -147.11), SIMDE_FLOAT64_C( 677.03)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 918.90), SIMDE_FLOAT64_C( 481.59)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -143.49), SIMDE_FLOAT64_C( 447.22)), simde_mm_set_pd(SIMDE_FLOAT64_C( 50.06), SIMDE_FLOAT64_C( 827.25)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -143.49), SIMDE_FLOAT64_C( 447.22)), simde_x_mm_setone_pd()) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_cmpnge_sd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_equal(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpord_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 107.72), SIMDE_FLOAT64_C( -915.48)), simde_mm_set_pd(SIMDE_MATH_NAN, SIMDE_FLOAT64_C( -303.84)), simde_mm_move_sd(simde_mm_setzero_pd(), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 173.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( -817.33), SIMDE_FLOAT64_C( 659.40)), simde_mm_move_sd(simde_mm_setzero_pd(), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_MATH_NAN, SIMDE_MATH_NAN), simde_mm_set_pd(SIMDE_FLOAT64_C( -425.32), SIMDE_FLOAT64_C( 993.95)), simde_mm_setzero_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -650.75), SIMDE_MATH_NAN), simde_mm_set_pd(SIMDE_MATH_NAN, SIMDE_FLOAT64_C( -971.81)), simde_mm_setzero_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -761.29), SIMDE_FLOAT64_C( -694.76)), simde_mm_set_pd(SIMDE_FLOAT64_C( -709.09), SIMDE_FLOAT64_C( 614.12)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 498.19), SIMDE_FLOAT64_C( -379.74)), simde_mm_set_pd(SIMDE_FLOAT64_C( -247.48), SIMDE_FLOAT64_C( -578.21)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 101.51), SIMDE_FLOAT64_C( 387.46)), simde_mm_set_pd(SIMDE_FLOAT64_C( 215.97), SIMDE_FLOAT64_C( 173.76)), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 729.13), SIMDE_FLOAT64_C( 771.13)), simde_mm_set_pd(SIMDE_FLOAT64_C( 902.43), SIMDE_FLOAT64_C( -416.43)), simde_x_mm_setone_pd() } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_cmpord_pd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_equal(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpord_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 602.71), SIMDE_FLOAT64_C( -732.62)), simde_mm_set_pd(SIMDE_FLOAT64_C( 116.21), SIMDE_FLOAT64_C( -560.07)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 602.71), SIMDE_FLOAT64_C( -732.62)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 947.95), SIMDE_MATH_NAN), simde_mm_set_pd(SIMDE_FLOAT64_C( -66.03), SIMDE_FLOAT64_C( -86.78)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 947.95), SIMDE_FLOAT64_C( 775.29)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -455.06), SIMDE_FLOAT64_C( 579.65)), simde_mm_set_pd(SIMDE_FLOAT64_C( -960.88), SIMDE_MATH_NAN), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -455.06), SIMDE_FLOAT64_C( 579.65)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 782.89), SIMDE_MATH_NAN), simde_mm_set_pd(SIMDE_FLOAT64_C( -540.96), SIMDE_MATH_NAN), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 782.89), SIMDE_FLOAT64_C( -266.22)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -750.41), SIMDE_FLOAT64_C( -624.09)), simde_mm_set_pd(SIMDE_FLOAT64_C( -599.13), SIMDE_FLOAT64_C( 704.00)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -750.41), SIMDE_FLOAT64_C( -624.09)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 149.22), SIMDE_FLOAT64_C( -876.24)), simde_mm_set_pd(SIMDE_FLOAT64_C( 871.40), SIMDE_FLOAT64_C( 321.55)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 149.22), SIMDE_FLOAT64_C( -876.24)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -822.79), SIMDE_FLOAT64_C( 890.31)), simde_mm_set_pd(SIMDE_FLOAT64_C( -260.78), SIMDE_FLOAT64_C( 386.76)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -822.79), SIMDE_FLOAT64_C( 890.31)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -370.89), SIMDE_FLOAT64_C( -622.25)), simde_mm_set_pd(SIMDE_FLOAT64_C( 587.16), SIMDE_FLOAT64_C( -811.86)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -370.89), SIMDE_FLOAT64_C( -622.25)), simde_x_mm_setone_pd()) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_cmpord_sd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_equal(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpunord_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 670.49)), simde_mm_set_pd(SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 826.75)), simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -774.95), SIMDE_MATH_NAN), simde_mm_set_pd(SIMDE_FLOAT64_C( 247.71), SIMDE_MATH_NAN), simde_mm_move_sd(simde_mm_setzero_pd(), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -88.77), SIMDE_FLOAT64_C( 116.09)), simde_mm_set_pd(SIMDE_FLOAT64_C( -32.79), SIMDE_FLOAT64_C( -442.07)), simde_mm_setzero_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 71.71), SIMDE_FLOAT64_C( 549.42)), simde_mm_set_pd(SIMDE_MATH_NAN, SIMDE_FLOAT64_C( -288.27)), simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -266.24), SIMDE_FLOAT64_C( -147.24)), simde_mm_set_pd(SIMDE_FLOAT64_C( 900.46), SIMDE_FLOAT64_C( -288.71)), simde_mm_setzero_pd() }, { simde_mm_set_pd(SIMDE_MATH_NAN, SIMDE_MATH_NAN), simde_mm_set_pd(SIMDE_FLOAT64_C( 196.30), SIMDE_MATH_NAN), simde_x_mm_setone_pd() }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -447.36), SIMDE_FLOAT64_C( 236.69)), simde_mm_set_pd(SIMDE_FLOAT64_C( -774.85), SIMDE_FLOAT64_C( -611.68)), simde_mm_setzero_pd() }, { simde_mm_set_pd(SIMDE_MATH_NAN, SIMDE_MATH_NAN), simde_mm_set_pd(SIMDE_FLOAT64_C( 711.66), SIMDE_FLOAT64_C( -751.40)), simde_x_mm_setone_pd() } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_cmpunord_pd(test_vec[i].a, test_vec[i].b); simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(test_vec[i].r)); } return 0; } static int test_simde_mm_cmpunord_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -580.90), SIMDE_FLOAT64_C( 757.46)), simde_mm_set_pd(SIMDE_FLOAT64_C( -779.63), SIMDE_FLOAT64_C( 96.79)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -580.90), SIMDE_FLOAT64_C( 757.46)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -607.89), SIMDE_MATH_NAN), simde_mm_set_pd(SIMDE_FLOAT64_C( 751.46), SIMDE_FLOAT64_C( 753.64)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -607.89), SIMDE_FLOAT64_C( -882.75)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 177.62), SIMDE_FLOAT64_C( -618.39)), simde_mm_set_pd(SIMDE_FLOAT64_C( -958.41), SIMDE_MATH_NAN), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 177.62), SIMDE_FLOAT64_C( -618.39)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 910.88), SIMDE_MATH_NAN), simde_mm_set_pd(SIMDE_FLOAT64_C( -924.01), SIMDE_MATH_NAN), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 910.88), SIMDE_FLOAT64_C( 313.76)), simde_x_mm_setone_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -778.12), SIMDE_FLOAT64_C( -472.40)), simde_mm_set_pd(SIMDE_FLOAT64_C( 400.92), SIMDE_FLOAT64_C( -453.41)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -778.12), SIMDE_FLOAT64_C( -472.40)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 933.47), SIMDE_FLOAT64_C( -426.60)), simde_mm_set_pd(SIMDE_FLOAT64_C( 836.37), SIMDE_FLOAT64_C( 329.66)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 933.47), SIMDE_FLOAT64_C( -426.60)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -563.76), SIMDE_FLOAT64_C( 455.35)), simde_mm_set_pd(SIMDE_FLOAT64_C( -169.32), SIMDE_FLOAT64_C( -459.10)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -563.76), SIMDE_FLOAT64_C( 455.35)), simde_mm_setzero_pd()) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -117.48), SIMDE_FLOAT64_C( -934.82)), simde_mm_set_pd(SIMDE_FLOAT64_C( 177.09), SIMDE_FLOAT64_C( 194.89)), simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -117.48), SIMDE_FLOAT64_C( -934.82)), simde_mm_setzero_pd()) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_cmpunord_sd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_equal(r, test_vec[i].r); } return 0; } static int test_simde_mm_comieq_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; int r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -969.61), SIMDE_FLOAT64_C( 839.23)), simde_mm_set_pd(SIMDE_FLOAT64_C( -969.61), SIMDE_FLOAT64_C( -432.69)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 281.50), SIMDE_FLOAT64_C( -752.55)), simde_mm_set_pd(SIMDE_FLOAT64_C( 281.50), SIMDE_FLOAT64_C( -752.55)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 590.09), SIMDE_FLOAT64_C( 270.42)), simde_mm_set_pd(SIMDE_FLOAT64_C( -206.33), SIMDE_FLOAT64_C( 270.42)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 412.70), SIMDE_FLOAT64_C( -500.58)), simde_mm_set_pd(SIMDE_FLOAT64_C( 145.06), SIMDE_FLOAT64_C( 763.45)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -374.23), SIMDE_FLOAT64_C( 380.82)), simde_mm_set_pd(SIMDE_FLOAT64_C( -374.23), SIMDE_FLOAT64_C( 380.82)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -875.64), SIMDE_FLOAT64_C( 30.13)), simde_mm_set_pd(SIMDE_FLOAT64_C( -823.83), SIMDE_FLOAT64_C( 30.13)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 890.78), SIMDE_FLOAT64_C( -652.66)), simde_mm_set_pd(SIMDE_FLOAT64_C( 719.69), SIMDE_FLOAT64_C( -685.53)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 740.88), SIMDE_FLOAT64_C( 116.37)), simde_mm_set_pd(SIMDE_FLOAT64_C( -528.65), SIMDE_FLOAT64_C( 536.46)), 0 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_comieq_sd(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_comige_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; int r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 105.66), SIMDE_FLOAT64_C( 552.43)), simde_mm_set_pd(SIMDE_FLOAT64_C( 105.66), SIMDE_FLOAT64_C( 267.88)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -590.31), SIMDE_FLOAT64_C( -921.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( -590.31), SIMDE_FLOAT64_C( 330.81)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 865.75), SIMDE_FLOAT64_C( -938.03)), simde_mm_set_pd(SIMDE_FLOAT64_C( 865.75), SIMDE_FLOAT64_C( 970.01)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -158.01), SIMDE_FLOAT64_C( 635.18)), simde_mm_set_pd(SIMDE_FLOAT64_C( -394.88), SIMDE_FLOAT64_C( -19.73)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -29.19), SIMDE_FLOAT64_C( -429.43)), simde_mm_set_pd(SIMDE_FLOAT64_C( -29.19), SIMDE_FLOAT64_C( -32.37)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 507.45), SIMDE_FLOAT64_C( -241.62)), simde_mm_set_pd(SIMDE_FLOAT64_C( 507.45), SIMDE_FLOAT64_C( 500.55)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -667.19), SIMDE_FLOAT64_C( 338.98)), simde_mm_set_pd(SIMDE_FLOAT64_C( 225.94), SIMDE_FLOAT64_C( 338.98)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 890.13), SIMDE_FLOAT64_C( -203.09)), simde_mm_set_pd(SIMDE_FLOAT64_C( -221.49), SIMDE_FLOAT64_C( 304.99)), 0 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_comige_sd(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_comigt_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; int r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 105.66), SIMDE_FLOAT64_C( 552.43)), simde_mm_set_pd(SIMDE_FLOAT64_C( 105.66), SIMDE_FLOAT64_C( 267.88)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -590.31), SIMDE_FLOAT64_C( -921.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( -590.31), SIMDE_FLOAT64_C( 330.81)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 865.75), SIMDE_FLOAT64_C( -938.03)), simde_mm_set_pd(SIMDE_FLOAT64_C( 865.75), SIMDE_FLOAT64_C( 970.01)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -158.01), SIMDE_FLOAT64_C( 635.18)), simde_mm_set_pd(SIMDE_FLOAT64_C( -394.88), SIMDE_FLOAT64_C( -19.73)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -29.19), SIMDE_FLOAT64_C( -429.43)), simde_mm_set_pd(SIMDE_FLOAT64_C( -29.19), SIMDE_FLOAT64_C( -32.37)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 507.45), SIMDE_FLOAT64_C( -241.62)), simde_mm_set_pd(SIMDE_FLOAT64_C( 507.45), SIMDE_FLOAT64_C( 500.55)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -667.19), SIMDE_FLOAT64_C( 338.98)), simde_mm_set_pd(SIMDE_FLOAT64_C( 225.94), SIMDE_FLOAT64_C( 338.98)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 890.13), SIMDE_FLOAT64_C( -203.09)), simde_mm_set_pd(SIMDE_FLOAT64_C( -221.49), SIMDE_FLOAT64_C( 304.99)), 0 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_comigt_sd(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_comile_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; int r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 105.66), SIMDE_FLOAT64_C( 552.43)), simde_mm_set_pd(SIMDE_FLOAT64_C( 105.66), SIMDE_FLOAT64_C( 267.88)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -590.31), SIMDE_FLOAT64_C( -921.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( -590.31), SIMDE_FLOAT64_C( 330.81)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 865.75), SIMDE_FLOAT64_C( -938.03)), simde_mm_set_pd(SIMDE_FLOAT64_C( 865.75), SIMDE_FLOAT64_C( 970.01)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -158.01), SIMDE_FLOAT64_C( 635.18)), simde_mm_set_pd(SIMDE_FLOAT64_C( -394.88), SIMDE_FLOAT64_C( -19.73)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -29.19), SIMDE_FLOAT64_C( -429.43)), simde_mm_set_pd(SIMDE_FLOAT64_C( -29.19), SIMDE_FLOAT64_C( -32.37)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 507.45), SIMDE_FLOAT64_C( -241.62)), simde_mm_set_pd(SIMDE_FLOAT64_C( 507.45), SIMDE_FLOAT64_C( 500.55)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -667.19), SIMDE_FLOAT64_C( 338.98)), simde_mm_set_pd(SIMDE_FLOAT64_C( 225.94), SIMDE_FLOAT64_C( 338.98)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 890.13), SIMDE_FLOAT64_C( -203.09)), simde_mm_set_pd(SIMDE_FLOAT64_C( -221.49), SIMDE_FLOAT64_C( 304.99)), 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_comile_sd(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_comilt_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; int r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 766.54), SIMDE_FLOAT64_C( -69.58)), simde_mm_set_pd(SIMDE_FLOAT64_C( 185.38), SIMDE_FLOAT64_C( -69.58)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 489.65), SIMDE_FLOAT64_C( 372.98)), simde_mm_set_pd(SIMDE_FLOAT64_C( 489.65), SIMDE_FLOAT64_C( 372.98)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 79.48), SIMDE_FLOAT64_C( -168.45)), simde_mm_set_pd(SIMDE_FLOAT64_C( -648.03), SIMDE_FLOAT64_C( -710.04)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 907.60), SIMDE_FLOAT64_C( 955.73)), simde_mm_set_pd(SIMDE_FLOAT64_C( 907.60), SIMDE_FLOAT64_C( -965.39)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -237.33), SIMDE_FLOAT64_C( 558.83)), simde_mm_set_pd(SIMDE_FLOAT64_C( 415.12), SIMDE_FLOAT64_C( 558.83)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -796.13), SIMDE_FLOAT64_C( 18.69)), simde_mm_set_pd(SIMDE_FLOAT64_C( -796.13), SIMDE_FLOAT64_C( 18.69)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -380.36), SIMDE_FLOAT64_C( -737.73)), simde_mm_set_pd(SIMDE_FLOAT64_C( -380.36), SIMDE_FLOAT64_C( -737.73)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -975.15), SIMDE_FLOAT64_C( -296.93)), simde_mm_set_pd(SIMDE_FLOAT64_C( -975.15), SIMDE_FLOAT64_C( -296.93)), 0 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_comilt_sd(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_comineq_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; int r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 105.66), SIMDE_FLOAT64_C( 552.43)), simde_mm_set_pd(SIMDE_FLOAT64_C( 105.66), SIMDE_FLOAT64_C( 267.88)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -590.31), SIMDE_FLOAT64_C( -921.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( -590.31), SIMDE_FLOAT64_C( 330.81)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 865.75), SIMDE_FLOAT64_C( -938.03)), simde_mm_set_pd(SIMDE_FLOAT64_C( 865.75), SIMDE_FLOAT64_C( 970.01)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -158.01), SIMDE_FLOAT64_C( 635.18)), simde_mm_set_pd(SIMDE_FLOAT64_C( -394.88), SIMDE_FLOAT64_C( -19.73)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -29.19), SIMDE_FLOAT64_C( -429.43)), simde_mm_set_pd(SIMDE_FLOAT64_C( -29.19), SIMDE_FLOAT64_C( -32.37)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 507.45), SIMDE_FLOAT64_C( -241.62)), simde_mm_set_pd(SIMDE_FLOAT64_C( 507.45), SIMDE_FLOAT64_C( 500.55)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -667.19), SIMDE_FLOAT64_C( 338.98)), simde_mm_set_pd(SIMDE_FLOAT64_C( 225.94), SIMDE_FLOAT64_C( 338.98)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 890.13), SIMDE_FLOAT64_C( -203.09)), simde_mm_set_pd(SIMDE_FLOAT64_C( -221.49), SIMDE_FLOAT64_C( 304.99)), 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_comineq_sd(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_x_mm_copysign_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 dest[2]; const simde_float64 src[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -182.01), SIMDE_FLOAT64_C( 309.30) }, { SIMDE_FLOAT64_C( -125.98), SIMDE_FLOAT64_C( -334.42) }, { SIMDE_FLOAT64_C( -182.01), SIMDE_FLOAT64_C( -309.30) } }, { { SIMDE_FLOAT64_C( -339.97), SIMDE_FLOAT64_C( -147.14) }, { SIMDE_FLOAT64_C( 534.39), SIMDE_FLOAT64_C( -377.91) }, { SIMDE_FLOAT64_C( 339.97), SIMDE_FLOAT64_C( -147.14) } }, { { SIMDE_FLOAT64_C( -466.30), SIMDE_FLOAT64_C( 794.64) }, { SIMDE_FLOAT64_C( 936.51), SIMDE_FLOAT64_C( -627.08) }, { SIMDE_FLOAT64_C( 466.30), SIMDE_FLOAT64_C( -794.64) } }, { { SIMDE_FLOAT64_C( 644.80), SIMDE_FLOAT64_C( 412.58) }, { SIMDE_FLOAT64_C( -738.56), SIMDE_FLOAT64_C( -987.18) }, { SIMDE_FLOAT64_C( -644.80), SIMDE_FLOAT64_C( -412.58) } }, { { SIMDE_FLOAT64_C( -54.12), SIMDE_FLOAT64_C( -858.45) }, { SIMDE_FLOAT64_C( -554.31), SIMDE_FLOAT64_C( 274.31) }, { SIMDE_FLOAT64_C( -54.12), SIMDE_FLOAT64_C( 858.45) } }, { { SIMDE_FLOAT64_C( -106.06), SIMDE_FLOAT64_C( -482.09) }, { SIMDE_FLOAT64_C( -505.26), SIMDE_FLOAT64_C( -310.15) }, { SIMDE_FLOAT64_C( -106.06), SIMDE_FLOAT64_C( -482.09) } }, { { SIMDE_FLOAT64_C( 726.18), SIMDE_FLOAT64_C( 941.28) }, { SIMDE_FLOAT64_C( -987.65), SIMDE_FLOAT64_C( -463.18) }, { SIMDE_FLOAT64_C( -726.18), SIMDE_FLOAT64_C( -941.28) } }, { { SIMDE_FLOAT64_C( -907.04), SIMDE_FLOAT64_C( -842.82) }, { SIMDE_FLOAT64_C( -124.70), SIMDE_FLOAT64_C( -89.06) }, { SIMDE_FLOAT64_C( -907.04), SIMDE_FLOAT64_C( -842.82) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d dest = simde_mm_loadu_pd(test_vec[i].dest); simde__m128d src = simde_mm_loadu_pd(test_vec[i].src); simde__m128d r = simde_x_mm_copysign_pd(dest, src); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_x_mm_xorsign_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 b[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -998.44), SIMDE_FLOAT64_C( -179.45) }, { SIMDE_FLOAT64_C( 34.66), SIMDE_FLOAT64_C( 254.98) }, { SIMDE_FLOAT64_C( -998.44), SIMDE_FLOAT64_C( -179.45) } }, { { SIMDE_FLOAT64_C( -220.74), SIMDE_FLOAT64_C( 718.77) }, { SIMDE_FLOAT64_C( -648.69), SIMDE_FLOAT64_C( -598.91) }, { SIMDE_FLOAT64_C( 220.74), SIMDE_FLOAT64_C( -718.77) } }, { { SIMDE_FLOAT64_C( 84.66), SIMDE_FLOAT64_C( -602.04) }, { SIMDE_FLOAT64_C( 631.55), SIMDE_FLOAT64_C( -486.59) }, { SIMDE_FLOAT64_C( 84.66), SIMDE_FLOAT64_C( 602.04) } }, { { SIMDE_FLOAT64_C( 570.81), SIMDE_FLOAT64_C( 368.00) }, { SIMDE_FLOAT64_C( 372.19), SIMDE_FLOAT64_C( -832.84) }, { SIMDE_FLOAT64_C( 570.81), SIMDE_FLOAT64_C( -368.00) } }, { { SIMDE_FLOAT64_C( -996.05), SIMDE_FLOAT64_C( 875.71) }, { SIMDE_FLOAT64_C( 198.29), SIMDE_FLOAT64_C( -187.87) }, { SIMDE_FLOAT64_C( -996.05), SIMDE_FLOAT64_C( -875.71) } }, { { SIMDE_FLOAT64_C( -462.20), SIMDE_FLOAT64_C( -277.60) }, { SIMDE_FLOAT64_C( 841.75), SIMDE_FLOAT64_C( 127.22) }, { SIMDE_FLOAT64_C( -462.20), SIMDE_FLOAT64_C( -277.60) } }, { { SIMDE_FLOAT64_C( -669.20), SIMDE_FLOAT64_C( -206.42) }, { SIMDE_FLOAT64_C( 600.14), SIMDE_FLOAT64_C( 65.01) }, { SIMDE_FLOAT64_C( -669.20), SIMDE_FLOAT64_C( -206.42) } }, { { SIMDE_FLOAT64_C( 159.77), SIMDE_FLOAT64_C( -896.78) }, { SIMDE_FLOAT64_C( 642.72), SIMDE_FLOAT64_C( 161.33) }, { SIMDE_FLOAT64_C( 159.77), SIMDE_FLOAT64_C( -896.78) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d b = simde_mm_loadu_pd(test_vec[i].b); simde__m128d r = simde_x_mm_xorsign_pd(a, b); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_cvtepi32_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128d r; } test_vec[8] = { { simde_mm_set_epi32( 1668601445, 8850426, 1726684816, -1842005323), simde_mm_set_pd(SIMDE_FLOAT64_C(1726684816.00), SIMDE_FLOAT64_C(-1842005323.00)) }, { simde_mm_set_epi32(-1162443511, 1098837378, -970075414, 1210551220), simde_mm_set_pd(SIMDE_FLOAT64_C(-970075414.00), SIMDE_FLOAT64_C(1210551220.00)) }, { simde_mm_set_epi32( 1014915875, 235168560, 691866984, -431325465), simde_mm_set_pd(SIMDE_FLOAT64_C(691866984.00), SIMDE_FLOAT64_C(-431325465.00)) }, { simde_mm_set_epi32( 1621419008, 1286931249, -1424446000, -169673917), simde_mm_set_pd(SIMDE_FLOAT64_C(-1424446000.00), SIMDE_FLOAT64_C(-169673917.00)) }, { simde_mm_set_epi32( 982570498, 31161721, 410129833, 1249524705), simde_mm_set_pd(SIMDE_FLOAT64_C(410129833.00), SIMDE_FLOAT64_C(1249524705.00)) }, { simde_mm_set_epi32(-1807976526, 584564543, 1386856775, -792093051), simde_mm_set_pd(SIMDE_FLOAT64_C(1386856775.00), SIMDE_FLOAT64_C(-792093051.00)) }, { simde_mm_set_epi32( 1927957259, 324939853, 1056227907, 960202603), simde_mm_set_pd(SIMDE_FLOAT64_C(1056227907.00), SIMDE_FLOAT64_C(960202603.00)) }, { simde_mm_set_epi32( 2096858414, 2117774841, 250894175, 1268045519), simde_mm_set_pd(SIMDE_FLOAT64_C(250894175.00), SIMDE_FLOAT64_C(1268045519.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_cvtepi32_pd(test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_cvtepi32_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128 r; } test_vec[8] = { { simde_mm_set_epi32( 332, -787, -79, -785), simde_mm_set_ps(SIMDE_FLOAT32_C( 332.00), SIMDE_FLOAT32_C(-787.00), SIMDE_FLOAT32_C( -79.00), SIMDE_FLOAT32_C(-785.00)) }, { simde_mm_set_epi32( 394, -936, -733, -136), simde_mm_set_ps(SIMDE_FLOAT32_C( 394.00), SIMDE_FLOAT32_C(-936.00), SIMDE_FLOAT32_C(-733.00), SIMDE_FLOAT32_C(-136.00)) }, { simde_mm_set_epi32( 618, -416, 310, 183), simde_mm_set_ps(SIMDE_FLOAT32_C( 618.00), SIMDE_FLOAT32_C(-416.00), SIMDE_FLOAT32_C( 310.00), SIMDE_FLOAT32_C( 183.00)) }, { simde_mm_set_epi32(-748, 245, 533, -152), simde_mm_set_ps(SIMDE_FLOAT32_C(-748.00), SIMDE_FLOAT32_C( 245.00), SIMDE_FLOAT32_C( 533.00), SIMDE_FLOAT32_C(-152.00)) }, { simde_mm_set_epi32( 42, 893, 849, -741), simde_mm_set_ps(SIMDE_FLOAT32_C( 42.00), SIMDE_FLOAT32_C( 893.00), SIMDE_FLOAT32_C( 849.00), SIMDE_FLOAT32_C(-741.00)) }, { simde_mm_set_epi32( 657, 222, -709, -177), simde_mm_set_ps(SIMDE_FLOAT32_C( 657.00), SIMDE_FLOAT32_C( 222.00), SIMDE_FLOAT32_C(-709.00), SIMDE_FLOAT32_C(-177.00)) }, { simde_mm_set_epi32( 762, -586, 196, 717), simde_mm_set_ps(SIMDE_FLOAT32_C( 762.00), SIMDE_FLOAT32_C(-586.00), SIMDE_FLOAT32_C( 196.00), SIMDE_FLOAT32_C( 717.00)) }, { simde_mm_set_epi32( 322, 178, 766, -110), simde_mm_set_ps(SIMDE_FLOAT32_C( 322.00), SIMDE_FLOAT32_C( 178.00), SIMDE_FLOAT32_C( 766.00), SIMDE_FLOAT32_C(-110.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_cvtepi32_ps(test_vec[i].a); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_cvtpd_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const int32_t r[4]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NAN, -SIMDE_MATH_NAN }, { INT32_MIN, INT32_MIN, INT32_C( 0), INT32_C( 0) } }, #endif #if !defined(SIMDE_FAST_CONVERSION_RANGE) { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1), HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) - 100) }, { INT32_MIN, INT32_C( 2147483547), INT32_C( 0), INT32_C( 0) } }, { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1), HEDLEY_STATIC_CAST(int64_t, INT32_MIN) + 100 }, { INT32_MIN, -INT32_C( 2147483548), INT32_C( 0), INT32_C( 0) } }, #endif { { SIMDE_FLOAT64_C( -220.31), SIMDE_FLOAT64_C( 685.08) }, { -INT32_C( 220), INT32_C( 685), INT32_C( 0), INT32_C( 0) } }, { { SIMDE_FLOAT64_C( -164.88), SIMDE_FLOAT64_C( 725.51) }, { -INT32_C( 165), INT32_C( 726), INT32_C( 0), INT32_C( 0) } }, { { SIMDE_FLOAT64_C( 152.74), SIMDE_FLOAT64_C( 778.03) }, { INT32_C( 153), INT32_C( 778), INT32_C( 0), INT32_C( 0) } }, { { SIMDE_FLOAT64_C( -801.11), SIMDE_FLOAT64_C( -331.66) }, { -INT32_C( 801), -INT32_C( 332), INT32_C( 0), INT32_C( 0) } }, { { SIMDE_FLOAT64_C( -834.04), SIMDE_FLOAT64_C( -51.56) }, { -INT32_C( 834), -INT32_C( 52), INT32_C( 0), INT32_C( 0) } }, { { SIMDE_FLOAT64_C( 737.22), SIMDE_FLOAT64_C( 205.77) }, { INT32_C( 737), INT32_C( 206), INT32_C( 0), INT32_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128i r = simde_mm_cvtpd_epi32(a); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm_cvtpd_pi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const int32_t r[2]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NAN, -SIMDE_MATH_NAN }, { INT32_MIN, INT32_MIN } }, #endif #if !defined(SIMDE_FAST_CONVERSION_RANGE) { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1), HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) - 100) }, { INT32_MIN, INT32_C( 2147483547) } }, { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1), HEDLEY_STATIC_CAST(int64_t, INT32_MIN) + 100 }, { INT32_MIN, -INT32_C( 2147483548) } }, #endif { { SIMDE_FLOAT64_C( -220.31), SIMDE_FLOAT64_C( 685.08) }, { -INT32_C( 220), INT32_C( 685) } }, { { SIMDE_FLOAT64_C( -164.88), SIMDE_FLOAT64_C( 725.51) }, { -INT32_C( 165), INT32_C( 726) } }, { { SIMDE_FLOAT64_C( 152.74), SIMDE_FLOAT64_C( 778.03) }, { INT32_C( 153), INT32_C( 778) } }, { { SIMDE_FLOAT64_C( -801.11), SIMDE_FLOAT64_C( -331.66) }, { -INT32_C( 801), -INT32_C( 332) } }, { { SIMDE_FLOAT64_C( -834.04), SIMDE_FLOAT64_C( -51.56) }, { -INT32_C( 834), -INT32_C( 52) } }, { { SIMDE_FLOAT64_C( 737.22), SIMDE_FLOAT64_C( 205.77) }, { INT32_C( 737), INT32_C( 206) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m64 r = simde_mm_cvtpd_pi32(a); simde_test_x86_assert_equal_i32x2(r, simde_x_mm_loadu_si64(test_vec[i].r)); } return 0; } static int test_simde_mm_cvtpd_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128 r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 655.71), SIMDE_FLOAT64_C( 689.41)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 655.71), SIMDE_FLOAT32_C( 689.41)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -50.10), SIMDE_FLOAT64_C( -149.72)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -50.10), SIMDE_FLOAT32_C( -149.72)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 227.42), SIMDE_FLOAT64_C( 655.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 227.42), SIMDE_FLOAT32_C( 655.70)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -635.17), SIMDE_FLOAT64_C( 938.65)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -635.17), SIMDE_FLOAT32_C( 938.65)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 548.99), SIMDE_FLOAT64_C( -18.53)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 548.99), SIMDE_FLOAT32_C( -18.53)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -548.71), SIMDE_FLOAT64_C( 31.33)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -548.71), SIMDE_FLOAT32_C( 31.33)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -978.36), SIMDE_FLOAT64_C( -341.93)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -978.36), SIMDE_FLOAT32_C( -341.93)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 211.73), SIMDE_FLOAT64_C( 471.24)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 211.73), SIMDE_FLOAT32_C( 471.24)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_cvtpd_ps(test_vec[i].a); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_cvtpi32_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m128d r; } test_vec[8] = { { simde_mm_set_pi32( -42, -579), simde_mm_set_pd(SIMDE_FLOAT64_C( -42.00), SIMDE_FLOAT64_C(-579.00)) }, { simde_mm_set_pi32( -633, 29), simde_mm_set_pd(SIMDE_FLOAT64_C(-633.00), SIMDE_FLOAT64_C( 29.00)) }, { simde_mm_set_pi32( -149, 196), simde_mm_set_pd(SIMDE_FLOAT64_C(-149.00), SIMDE_FLOAT64_C( 196.00)) }, { simde_mm_set_pi32( 308, -433), simde_mm_set_pd(SIMDE_FLOAT64_C( 308.00), SIMDE_FLOAT64_C(-433.00)) }, { simde_mm_set_pi32( -881, 358), simde_mm_set_pd(SIMDE_FLOAT64_C(-881.00), SIMDE_FLOAT64_C( 358.00)) }, { simde_mm_set_pi32( 723, 273), simde_mm_set_pd(SIMDE_FLOAT64_C( 723.00), SIMDE_FLOAT64_C( 273.00)) }, { simde_mm_set_pi32( -182, 457), simde_mm_set_pd(SIMDE_FLOAT64_C(-182.00), SIMDE_FLOAT64_C( 457.00)) }, { simde_mm_set_pi32( -239, -577), simde_mm_set_pd(SIMDE_FLOAT64_C(-239.00), SIMDE_FLOAT64_C(-577.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_cvtpi32_pd(test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_cvtps_epi32(SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const int32_t r[4]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NANF, -SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 718.49), SIMDE_FLOAT32_C( -765.08) }, { INT32_MIN, INT32_MIN, INT32_C( 718), -INT32_C( 765) } }, #endif #if !defined(SIMDE_FAST_CONVERSION_RANGE) { { HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1), HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) - 100), HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1), HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) + 100), }, { INT32_MIN, INT32_C( 2147483520), INT32_MIN, -INT32_C( 2147483520) } }, #endif #if !defined(SIMDE_FAST_ROUND_TIES) { { SIMDE_FLOAT32_C( -1.50), SIMDE_FLOAT32_C( 1.50), SIMDE_FLOAT32_C( -2.50), SIMDE_FLOAT32_C( 2.50) }, { -INT32_C( 2), INT32_C( 2), -INT32_C( 2), INT32_C( 2) } }, { { SIMDE_FLOAT32_C( -3.50), SIMDE_FLOAT32_C( 3.50), SIMDE_FLOAT32_C( -4.50), SIMDE_FLOAT32_C( 4.50) }, { -INT32_C( 4), INT32_C( 4), -INT32_C( 4), INT32_C( 4) } }, #endif { { SIMDE_FLOAT32_C( -95.52), SIMDE_FLOAT32_C( 603.57), SIMDE_FLOAT32_C( -810.91), SIMDE_FLOAT32_C( 527.98) }, { -INT32_C( 96), INT32_C( 604), -INT32_C( 811), INT32_C( 528) } }, { { SIMDE_FLOAT32_C( -768.18), SIMDE_FLOAT32_C( -162.82), SIMDE_FLOAT32_C( -159.43), SIMDE_FLOAT32_C( 588.60) }, { -INT32_C( 768), -INT32_C( 163), -INT32_C( 159), INT32_C( 589) } }, { { SIMDE_FLOAT32_C( 84.90), SIMDE_FLOAT32_C( -904.57), SIMDE_FLOAT32_C( -209.20), SIMDE_FLOAT32_C( 264.55) }, { INT32_C( 85), -INT32_C( 905), -INT32_C( 209), INT32_C( 265) } }, { { SIMDE_FLOAT32_C( -19.50), SIMDE_FLOAT32_C( -416.92), SIMDE_FLOAT32_C( -780.86), SIMDE_FLOAT32_C( -31.81) }, { -INT32_C( 20), -INT32_C( 417), -INT32_C( 781), -INT32_C( 32) } }, { { SIMDE_FLOAT32_C( -561.41), SIMDE_FLOAT32_C( -689.14), SIMDE_FLOAT32_C( 434.56), SIMDE_FLOAT32_C( 432.69) }, { -INT32_C( 561), -INT32_C( 689), INT32_C( 435), INT32_C( 433) } }, { { SIMDE_FLOAT32_C( 170.13), SIMDE_FLOAT32_C( 594.22), SIMDE_FLOAT32_C( -888.51), SIMDE_FLOAT32_C( 321.54) }, { INT32_C( 170), INT32_C( 594), -INT32_C( 889), INT32_C( 322) } }, { { SIMDE_FLOAT32_C( 660.47), SIMDE_FLOAT32_C( -124.04), SIMDE_FLOAT32_C( 493.83), SIMDE_FLOAT32_C( 250.16) }, { INT32_C( 660), -INT32_C( 124), INT32_C( 494), INT32_C( 250) } }, { { SIMDE_FLOAT32_C( -314.21), SIMDE_FLOAT32_C( -16.38), SIMDE_FLOAT32_C( 852.78), SIMDE_FLOAT32_C( 590.27) }, { -INT32_C( 314), -INT32_C( 16), INT32_C( 853), INT32_C( 590) } }, { { SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(-2147483650.0) }, { -INT32_C( 2147483648), -INT32_C( 2147483648), -INT32_C( 2147483648), -INT32_C( 2147483648) } }, { { SIMDE_FLOAT32_C( 2147483649.0), SIMDE_FLOAT32_C( 2147483649.0), SIMDE_FLOAT32_C( 2147483649.0), SIMDE_FLOAT32_C( 2147483649.0) }, { -INT32_C( 2147483648), -INT32_C( 2147483648), -INT32_C( 2147483648), -INT32_C( 2147483648) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128i r = simde_mm_cvtps_epi32(a); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm_cvtps_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128d r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 295.41), SIMDE_FLOAT32_C( -909.65), SIMDE_FLOAT32_C( 156.64), SIMDE_FLOAT32_C( -802.16)), simde_mm_set_pd(SIMDE_FLOAT64_C( 156.64), SIMDE_FLOAT64_C( -802.16)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 649.83), SIMDE_FLOAT32_C( -763.68), SIMDE_FLOAT32_C( 364.80), SIMDE_FLOAT32_C( 389.19)), simde_mm_set_pd(SIMDE_FLOAT64_C( 364.80), SIMDE_FLOAT64_C( 389.19)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 269.92), SIMDE_FLOAT32_C( -207.13), SIMDE_FLOAT32_C( 538.63), SIMDE_FLOAT32_C( 487.11)), simde_mm_set_pd(SIMDE_FLOAT64_C( 538.63), SIMDE_FLOAT64_C( 487.11)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -982.29), SIMDE_FLOAT32_C( 234.64), SIMDE_FLOAT32_C( -53.82), SIMDE_FLOAT32_C( 899.43)), simde_mm_set_pd(SIMDE_FLOAT64_C( -53.82), SIMDE_FLOAT64_C( 899.43)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 945.89), SIMDE_FLOAT32_C( -98.53), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 49.07)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 49.07)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -47.73), SIMDE_FLOAT32_C( 806.42), SIMDE_FLOAT32_C( 11.76), SIMDE_FLOAT32_C( -1.19)), simde_mm_set_pd(SIMDE_FLOAT64_C( 11.76), SIMDE_FLOAT64_C( -1.19)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -961.08), SIMDE_FLOAT32_C( -192.05), SIMDE_FLOAT32_C( 553.30), SIMDE_FLOAT32_C( -994.71)), simde_mm_set_pd(SIMDE_FLOAT64_C( 553.30), SIMDE_FLOAT64_C( -994.71)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 9.30), SIMDE_FLOAT32_C( -203.20), SIMDE_FLOAT32_C( -196.20), SIMDE_FLOAT32_C( 707.05)), simde_mm_set_pd(SIMDE_FLOAT64_C( -196.20), SIMDE_FLOAT64_C( 707.05)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_cvtps_pd(test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_cvtsd_f64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde_float64 r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 298.96), SIMDE_FLOAT64_C( 39.67)), SIMDE_FLOAT64_C( 39.67) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -98.64), SIMDE_FLOAT64_C( -641.95)), SIMDE_FLOAT64_C(-641.95) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -307.30), SIMDE_FLOAT64_C( -193.04)), SIMDE_FLOAT64_C(-193.04) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -648.72), SIMDE_FLOAT64_C( 830.29)), SIMDE_FLOAT64_C( 830.29) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -701.20), SIMDE_FLOAT64_C( -501.79)), SIMDE_FLOAT64_C(-501.79) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 755.28), SIMDE_FLOAT64_C( 648.10)), SIMDE_FLOAT64_C( 648.10) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -664.63), SIMDE_FLOAT64_C( 220.54)), SIMDE_FLOAT64_C( 220.54) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -762.92), SIMDE_FLOAT64_C( -101.29)), SIMDE_FLOAT64_C(-101.29) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde_float64 r = simde_mm_cvtsd_f64(test_vec[i].a); simde_assert_equal_f64(r, test_vec[i].r, 2); } return 0; } static int test_simde_mm_cvtsd_si32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const int32_t r; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( -162.87) }, INT32_MIN }, { { -SIMDE_MATH_NAN, SIMDE_FLOAT64_C( -905.13) }, INT32_MIN }, #endif #if !defined(SIMDE_FAST_CONVERSION_RANGE) { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1), SIMDE_FLOAT64_C( 177.40) }, INT32_MIN }, { { HEDLEY_STATIC_CAST(int64_t, INT32_MAX) - 100, SIMDE_FLOAT64_C( -906.88) }, INT32_C( 2147483547) }, { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1), SIMDE_FLOAT64_C( 676.90) }, INT32_MIN }, { { HEDLEY_STATIC_CAST(int64_t, INT32_MIN) + 100, SIMDE_FLOAT64_C( -848.13) }, -INT32_C( 2147483548) }, #endif { { SIMDE_FLOAT64_C( 353.29), SIMDE_FLOAT64_C( -16.32) }, INT32_C( 353) }, { { SIMDE_FLOAT64_C( 477.70), SIMDE_FLOAT64_C( -131.04) }, INT32_C( 478) }, { { SIMDE_FLOAT64_C( -314.42), SIMDE_FLOAT64_C( -351.80) }, -INT32_C( 314) }, { { SIMDE_FLOAT64_C( -574.04), SIMDE_FLOAT64_C( -761.46) }, -INT32_C( 574) }, { { SIMDE_FLOAT64_C( -428.08), SIMDE_FLOAT64_C( 959.55) }, -INT32_C( 428) }, { { SIMDE_FLOAT64_C( 453.56), SIMDE_FLOAT64_C( -261.91) }, INT32_C( 454) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); int32_t r = simde_mm_cvtsd_si32(a); simde_assert_equal_i32(r, test_vec[i].r); } return 0; } static int test_simde_mm_cvtsd_si64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; int64_t r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 793.30), SIMDE_FLOAT64_C( -706.75)), -707 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 29.13), SIMDE_FLOAT64_C( -309.00)), -309 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 21.24), SIMDE_FLOAT64_C( 368.17)), 368 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -595.30), SIMDE_FLOAT64_C( 351.60)), 352 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -640.13), SIMDE_FLOAT64_C( -466.84)), -467 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -237.20), SIMDE_FLOAT64_C( -994.72)), -995 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -983.23), SIMDE_FLOAT64_C( 645.14)), 645 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -89.10), SIMDE_FLOAT64_C( 585.69)), 586 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int64_t r = simde_mm_cvtsd_si64(test_vec[i].a); simde_assert_equal_i64(r, test_vec[i].r); } return 0; } static int test_simde_mm_cvtsd_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128d b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 522.41), SIMDE_FLOAT32_C( 122.44), SIMDE_FLOAT32_C( 708.76), SIMDE_FLOAT32_C( 910.97)), simde_mm_set_pd(SIMDE_FLOAT64_C( -52.04), SIMDE_FLOAT64_C( 228.75)), simde_mm_set_ps(SIMDE_FLOAT32_C( 522.41), SIMDE_FLOAT32_C( 122.44), SIMDE_FLOAT32_C( 708.76), SIMDE_FLOAT32_C( 228.75)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -311.44), SIMDE_FLOAT32_C( 267.00), SIMDE_FLOAT32_C( 965.23), SIMDE_FLOAT32_C( -248.92)), simde_mm_set_pd(SIMDE_FLOAT64_C( -89.48), SIMDE_FLOAT64_C( 178.71)), simde_mm_set_ps(SIMDE_FLOAT32_C( -311.44), SIMDE_FLOAT32_C( 267.00), SIMDE_FLOAT32_C( 965.23), SIMDE_FLOAT32_C( 178.71)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 252.50), SIMDE_FLOAT32_C( 744.35), SIMDE_FLOAT32_C( 237.50), SIMDE_FLOAT32_C( 713.77)), simde_mm_set_pd(SIMDE_FLOAT64_C( -913.96), SIMDE_FLOAT64_C( 935.45)), simde_mm_set_ps(SIMDE_FLOAT32_C( 252.50), SIMDE_FLOAT32_C( 744.35), SIMDE_FLOAT32_C( 237.50), SIMDE_FLOAT32_C( 935.45)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 781.08), SIMDE_FLOAT32_C( -50.03), SIMDE_FLOAT32_C( -658.11), SIMDE_FLOAT32_C( 945.59)), simde_mm_set_pd(SIMDE_FLOAT64_C( -556.84), SIMDE_FLOAT64_C( 452.90)), simde_mm_set_ps(SIMDE_FLOAT32_C( 781.08), SIMDE_FLOAT32_C( -50.03), SIMDE_FLOAT32_C( -658.11), SIMDE_FLOAT32_C( 452.90)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 459.13), SIMDE_FLOAT32_C( 794.72), SIMDE_FLOAT32_C( 105.91), SIMDE_FLOAT32_C( 688.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( -123.20), SIMDE_FLOAT64_C( 469.36)), simde_mm_set_ps(SIMDE_FLOAT32_C( 459.13), SIMDE_FLOAT32_C( 794.72), SIMDE_FLOAT32_C( 105.91), SIMDE_FLOAT32_C( 469.36)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -699.67), SIMDE_FLOAT32_C( 751.26), SIMDE_FLOAT32_C( 72.14), SIMDE_FLOAT32_C( -162.03)), simde_mm_set_pd(SIMDE_FLOAT64_C( 868.66), SIMDE_FLOAT64_C( 138.18)), simde_mm_set_ps(SIMDE_FLOAT32_C( -699.67), SIMDE_FLOAT32_C( 751.26), SIMDE_FLOAT32_C( 72.14), SIMDE_FLOAT32_C( 138.18)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -144.80), SIMDE_FLOAT32_C( 372.44), SIMDE_FLOAT32_C( -878.31), SIMDE_FLOAT32_C( 984.43)), simde_mm_set_pd(SIMDE_FLOAT64_C( -559.54), SIMDE_FLOAT64_C( 112.58)), simde_mm_set_ps(SIMDE_FLOAT32_C( -144.80), SIMDE_FLOAT32_C( 372.44), SIMDE_FLOAT32_C( -878.31), SIMDE_FLOAT32_C( 112.58)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -958.65), SIMDE_FLOAT32_C( 333.33), SIMDE_FLOAT32_C( -940.30), SIMDE_FLOAT32_C( 396.81)), simde_mm_set_pd(SIMDE_FLOAT64_C( 263.65), SIMDE_FLOAT64_C( 199.76)), simde_mm_set_ps(SIMDE_FLOAT32_C( -958.65), SIMDE_FLOAT32_C( 333.33), SIMDE_FLOAT32_C( -940.30), SIMDE_FLOAT32_C( 199.76)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_cvtsd_ss(test_vec[i].a, test_vec[i].b); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_x_mm_cvtsi128_si16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[8]; const int16_t r; } test_vec[] = { { { -INT16_C( 30955), INT16_C( 704), -INT16_C( 12934), -INT16_C( 10158), INT16_C( 23505), INT16_C( 18623), -INT16_C( 30715), INT16_C( 30631) }, -INT16_C( 30955) }, { { -INT16_C( 18124), -INT16_C( 9599), -INT16_C( 23005), -INT16_C( 5882), -INT16_C( 24114), INT16_C( 22410), INT16_C( 23298), INT16_C( 6106) }, -INT16_C( 18124) }, { { -INT16_C( 25630), INT16_C( 23577), INT16_C( 27496), INT16_C( 14645), -INT16_C( 2874), -INT16_C( 13439), INT16_C( 10620), -INT16_C( 20158) }, -INT16_C( 25630) }, { { -INT16_C( 15390), INT16_C( 1675), -INT16_C( 28310), INT16_C( 14575), INT16_C( 31026), INT16_C( 13455), INT16_C( 27348), -INT16_C( 18613) }, -INT16_C( 15390) }, { { INT16_C( 25605), INT16_C( 27923), INT16_C( 18639), -INT16_C( 27226), INT16_C( 10301), -INT16_C( 18079), -INT16_C( 23727), INT16_C( 13162) }, INT16_C( 25605) }, { { -INT16_C( 2713), -INT16_C( 11975), INT16_C( 10630), -INT16_C( 18423), -INT16_C( 26206), INT16_C( 30700), INT16_C( 14083), INT16_C( 2094) }, -INT16_C( 2713) }, { { INT16_C( 16795), INT16_C( 27253), INT16_C( 7050), -INT16_C( 14592), INT16_C( 24899), -INT16_C( 27520), -INT16_C( 5372), INT16_C( 27592) }, INT16_C( 16795) }, { { INT16_C( 480), INT16_C( 26428), INT16_C( 17962), -INT16_C( 13025), INT16_C( 3295), -INT16_C( 7612), INT16_C( 29251), -INT16_C( 8214) }, INT16_C( 480) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi16(test_vec[i].a); int16_t r = simde_x_mm_cvtsi128_si16(a); simde_assert_equal_i16(r, test_vec[i].r); } return 0; } static int test_simde_mm_cvtsi128_si32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; int32_t r; } test_vec[8] = { { simde_mm_set_epi32( 26453550, -127780894, 765191664, -1527053336), -1527053336 }, { simde_mm_set_epi32(-2072408746, 654549051, -1035182329, -310311602), -310311602 }, { simde_mm_set_epi32(-1491944780, -848128842, 200170171, -471300206), -471300206 }, { simde_mm_set_epi32(-1218501110, 680592926, -869682471, -297305797), -297305797 }, { simde_mm_set_epi32(-1884581495, -571508262, -111379645, -1274133785), -1274133785 }, { simde_mm_set_epi32( 486988098, 416284528, 1359642222, 197671232), 197671232 }, { simde_mm_set_epi32( 296562088, -1151305617, -1413122888, -1640910233), -1640910233 }, { simde_mm_set_epi32(-1262725255, -1253335394, -91416000, -1892793314), -1892793314 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int32_t r = simde_mm_cvtsi128_si32(test_vec[i].a); simde_assert_equal_i32(r, test_vec[i].r); } return 0; } static int test_simde_mm_cvtsi64_si128(SIMDE_MUNIT_TEST_ARGS) { const struct { int64_t a; simde__m128i r; } test_vec[8] = { { INT64_C( 6168135010467220065), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 6168135010467220065)) }, { INT64_C( 3895170522828645721), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 3895170522828645721)) }, { INT64_C( -3378210069702593578), simde_mm_set_epi64x(INT64_C( 0), INT64_C( -3378210069702593578)) }, { INT64_C( 2750396577149404222), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 2750396577149404222)) }, { INT64_C( 1438311486113044813), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 1438311486113044813)) }, { INT64_C( 3416877519561179684), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 3416877519561179684)) }, { INT64_C( 5633937201227624265), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 5633937201227624265)) }, { INT64_C( -3544191055453826903), simde_mm_set_epi64x(INT64_C( 0), INT64_C( -3544191055453826903)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_cvtsi64_si128(test_vec[i].a); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cvtsi128_si64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; int64_t r; } test_vec[8] = { { simde_mm_set_epi64x(INT64_C( 6773505374496819552), INT64_C( 4667650958864037640)), INT64_C( 4667650958864037640) }, { simde_mm_set_epi64x(INT64_C( 1327994882711935975), INT64_C( 6055234041306631062)), INT64_C( 6055234041306631062) }, { simde_mm_set_epi64x(INT64_C( 8972445642279437044), INT64_C( -4761409530754735793)), INT64_C( -4761409530754735793) }, { simde_mm_set_epi64x(INT64_C( 7460890732678939925), INT64_C( 5266150742597997743)), INT64_C( 5266150742597997743) }, { simde_mm_set_epi64x(INT64_C( -6075061397734634308), INT64_C( 487741331498539771)), INT64_C( 487741331498539771) }, { simde_mm_set_epi64x(INT64_C( 2874947710909797095), INT64_C( 2287065406213692181)), INT64_C( 2287065406213692181) }, { simde_mm_set_epi64x(INT64_C( 8598185467708417568), INT64_C( -2745610728130306920)), INT64_C( -2745610728130306920) }, { simde_mm_set_epi64x(INT64_C( 6122366414867950497), INT64_C( 614503884136124395)), INT64_C( 614503884136124395) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int64_t r = simde_mm_cvtsi128_si64(test_vec[i].a); simde_assert_equal_i64(r, test_vec[i].r); } return 0; } static int test_simde_x_mm_cvtsi16_si128 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a; const int16_t r[8]; } test_vec[] = { { -INT16_C( 17602), { -INT16_C( 17602), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { INT16_C( 26279), { INT16_C( 26279), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { -INT16_C( 15939), { -INT16_C( 15939), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { -INT16_C( 9973), { -INT16_C( 9973), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { -INT16_C( 7532), { -INT16_C( 7532), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { INT16_C( 4549), { INT16_C( 4549), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { INT16_C( 6325), { INT16_C( 6325), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { -INT16_C( 6958), { -INT16_C( 6958), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int16_t a = test_vec[i].a; simde__m128i r = simde_x_mm_cvtsi16_si128(a); simde_test_x86_assert_equal_i16x8(r, simde_x_mm_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm_cvtsi32_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; int b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -601.07), SIMDE_FLOAT64_C( 516.34)), -768, simde_mm_set_pd(SIMDE_FLOAT64_C( -601.07), SIMDE_FLOAT64_C( -768.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -939.83), SIMDE_FLOAT64_C( 135.41)), -383, simde_mm_set_pd(SIMDE_FLOAT64_C( -939.83), SIMDE_FLOAT64_C( -383.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 503.65), SIMDE_FLOAT64_C( 859.44)), 872, simde_mm_set_pd(SIMDE_FLOAT64_C( 503.65), SIMDE_FLOAT64_C( 872.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -76.44), SIMDE_FLOAT64_C( 854.87)), 613, simde_mm_set_pd(SIMDE_FLOAT64_C( -76.44), SIMDE_FLOAT64_C( 613.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 520.67), SIMDE_FLOAT64_C( -993.40)), 197, simde_mm_set_pd(SIMDE_FLOAT64_C( 520.67), SIMDE_FLOAT64_C( 197.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 461.59), SIMDE_FLOAT64_C( -572.51)), -157, simde_mm_set_pd(SIMDE_FLOAT64_C( 461.59), SIMDE_FLOAT64_C( -157.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -113.22), SIMDE_FLOAT64_C( 791.22)), -840, simde_mm_set_pd(SIMDE_FLOAT64_C( -113.22), SIMDE_FLOAT64_C( -840.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 707.47), SIMDE_FLOAT64_C( 954.02)), -347, simde_mm_set_pd(SIMDE_FLOAT64_C( 707.47), SIMDE_FLOAT64_C( -347.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_cvtsi32_sd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_cvtsi32_si128(SIMDE_MUNIT_TEST_ARGS) { const struct { int32_t a; simde__m128i r; } test_vec[8] = { { 306582644, simde_mm_set_epi32(0, 0, 0, 306582644) }, { -365974780, simde_mm_set_epi32(0, 0, 0, -365974780) }, { -85065628, simde_mm_set_epi32(0, 0, 0, -85065628) }, { 1053254834, simde_mm_set_epi32(0, 0, 0, 1053254834) }, { -236294791, simde_mm_set_epi32(0, 0, 0, -236294791) }, { 1341442607, simde_mm_set_epi32(0, 0, 0, 1341442607) }, { 336976017, simde_mm_set_epi32(0, 0, 0, 336976017) }, { 1400276059, simde_mm_set_epi32(0, 0, 0, 1400276059) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_cvtsi32_si128(test_vec[i].a); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cvtsi64_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; int64_t b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 229.24), SIMDE_FLOAT64_C( 177.04)), 637, simde_mm_set_pd(SIMDE_FLOAT64_C( 229.24), SIMDE_FLOAT64_C( 637.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 401.10), SIMDE_FLOAT64_C( 284.52)), -162, simde_mm_set_pd(SIMDE_FLOAT64_C( 401.10), SIMDE_FLOAT64_C( -162.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 499.46), SIMDE_FLOAT64_C( 321.47)), -540, simde_mm_set_pd(SIMDE_FLOAT64_C( 499.46), SIMDE_FLOAT64_C( -540.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -710.92), SIMDE_FLOAT64_C( 858.14)), -64, simde_mm_set_pd(SIMDE_FLOAT64_C( -710.92), SIMDE_FLOAT64_C( -64.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -289.27), SIMDE_FLOAT64_C( -887.54)), -238, simde_mm_set_pd(SIMDE_FLOAT64_C( -289.27), SIMDE_FLOAT64_C( -238.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 865.34), SIMDE_FLOAT64_C( 242.15)), 121, simde_mm_set_pd(SIMDE_FLOAT64_C( 865.34), SIMDE_FLOAT64_C( 121.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -376.10), SIMDE_FLOAT64_C( -965.52)), 315, simde_mm_set_pd(SIMDE_FLOAT64_C( -376.10), SIMDE_FLOAT64_C( 315.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 673.51), SIMDE_FLOAT64_C( -882.88)), -72, simde_mm_set_pd(SIMDE_FLOAT64_C( 673.51), SIMDE_FLOAT64_C( -72.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_cvtsi64_sd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_cvtss_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128 b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 546.48), SIMDE_FLOAT64_C( 729.20)), simde_mm_set_ps(SIMDE_FLOAT32_C( 142.68), SIMDE_FLOAT32_C( -75.76), SIMDE_FLOAT32_C(-648.72), SIMDE_FLOAT32_C( 148.36)), simde_mm_set_pd(SIMDE_FLOAT64_C( 546.48), SIMDE_FLOAT64_C( 148.36)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 79.60), SIMDE_FLOAT64_C( 648.49)), simde_mm_set_ps(SIMDE_FLOAT32_C( 631.34), SIMDE_FLOAT32_C( 902.53), SIMDE_FLOAT32_C( -54.65), SIMDE_FLOAT32_C( 614.98)), simde_mm_set_pd(SIMDE_FLOAT64_C( 79.60), SIMDE_FLOAT64_C( 614.98)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 811.47), SIMDE_FLOAT64_C( -95.71)), simde_mm_set_ps(SIMDE_FLOAT32_C( 13.27), SIMDE_FLOAT32_C( 315.63), SIMDE_FLOAT32_C( 407.80), SIMDE_FLOAT32_C(-826.61)), simde_mm_set_pd(SIMDE_FLOAT64_C( 811.47), SIMDE_FLOAT64_C(-826.61)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 315.42), SIMDE_FLOAT64_C( -69.06)), simde_mm_set_ps(SIMDE_FLOAT32_C( 775.15), SIMDE_FLOAT32_C( 935.54), SIMDE_FLOAT32_C(-964.44), SIMDE_FLOAT32_C( 659.62)), simde_mm_set_pd(SIMDE_FLOAT64_C( 315.42), SIMDE_FLOAT64_C( 659.62)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C(-579.75), SIMDE_FLOAT64_C(-291.65)), simde_mm_set_ps(SIMDE_FLOAT32_C( 533.61), SIMDE_FLOAT32_C( 565.53), SIMDE_FLOAT32_C( -36.93), SIMDE_FLOAT32_C( 57.54)), simde_mm_set_pd(SIMDE_FLOAT64_C(-579.75), SIMDE_FLOAT64_C( 57.54)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 979.09), SIMDE_FLOAT64_C(-471.44)), simde_mm_set_ps(SIMDE_FLOAT32_C( 927.62), SIMDE_FLOAT32_C( 955.93), SIMDE_FLOAT32_C(-964.80), SIMDE_FLOAT32_C( 823.88)), simde_mm_set_pd(SIMDE_FLOAT64_C( 979.09), SIMDE_FLOAT64_C( 823.88)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 172.44), SIMDE_FLOAT64_C(-427.74)), simde_mm_set_ps(SIMDE_FLOAT32_C(-343.18), SIMDE_FLOAT32_C(-352.03), SIMDE_FLOAT32_C(-836.30), SIMDE_FLOAT32_C( -61.82)), simde_mm_set_pd(SIMDE_FLOAT64_C( 172.44), SIMDE_FLOAT64_C( -61.82)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 975.42), SIMDE_FLOAT64_C( 394.72)), simde_mm_set_ps(SIMDE_FLOAT32_C( 748.90), SIMDE_FLOAT32_C(-410.84), SIMDE_FLOAT32_C( 636.92), SIMDE_FLOAT32_C( 230.31)), simde_mm_set_pd(SIMDE_FLOAT64_C( 975.42), SIMDE_FLOAT64_C( 230.31)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_cvtss_sd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_close(r, test_vec[i].r, 2); } return 0; } static int test_simde_mm_cvttpd_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const int32_t r[4]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NAN, -SIMDE_MATH_NAN }, { INT32_MIN, INT32_MIN, INT32_C( 0), INT32_C( 0) } }, #endif #if !defined(SIMDE_FAST_CONVERSION_RANGE) && 0 { { SIMDE_FLOAT64_C( 524.21), SIMDE_FLOAT64_C( 51.51) }, { INT32_C( 524), INT32_C( 51), INT32_C( 0), INT32_C( 0) } }, { { SIMDE_FLOAT64_C( 146.80), SIMDE_FLOAT64_C( -434.11) }, { INT32_C( 146), -INT32_C( 434), INT32_C( 0), INT32_C( 0) } }, { { SIMDE_FLOAT64_C( -150.72), SIMDE_FLOAT64_C( 743.64) }, #endif { { SIMDE_FLOAT64_C( 788.74), SIMDE_FLOAT64_C( 212.17) }, { INT32_C( 788), INT32_C( 212), INT32_C( 0), INT32_C( 0) } }, { { SIMDE_FLOAT64_C( -172.36), SIMDE_FLOAT64_C( 455.86) }, { -INT32_C( 172), INT32_C( 455), INT32_C( 0), INT32_C( 0) } }, { { SIMDE_FLOAT64_C( -728.09), SIMDE_FLOAT64_C( 893.73) }, { -INT32_C( 728), INT32_C( 893), INT32_C( 0), INT32_C( 0) } }, { { SIMDE_FLOAT64_C( 333.21), SIMDE_FLOAT64_C( -914.29) }, { INT32_C( 333), -INT32_C( 914), INT32_C( 0), INT32_C( 0) } }, { { SIMDE_FLOAT64_C( 0.95), SIMDE_FLOAT64_C( 701.07) }, { INT32_C( 0), INT32_C( 701), INT32_C( 0), INT32_C( 0) } }, { { SIMDE_FLOAT64_C( 639.75), SIMDE_FLOAT64_C( -803.13) }, { INT32_C( 639), -INT32_C( 803), INT32_C( 0), INT32_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128i r = simde_mm_cvttpd_epi32(a); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm_cvttpd_pi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const int32_t r[2]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NAN, -SIMDE_MATH_NAN }, { INT32_MIN, INT32_MIN } }, #endif #if !defined(SIMDE_FAST_CONVERSION_RANGE) { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1), HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) - 100) }, { INT32_MIN, INT32_C( 2147483547) } }, { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1), HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) + 100) }, { INT32_MIN, -INT32_C( 2147483548) } }, #endif { { SIMDE_FLOAT64_C( 788.74), SIMDE_FLOAT64_C( 212.17) }, { INT32_C( 788), INT32_C( 212) } }, { { SIMDE_FLOAT64_C( -172.36), SIMDE_FLOAT64_C( 455.86) }, { -INT32_C( 172), INT32_C( 455) } }, { { SIMDE_FLOAT64_C( -728.09), SIMDE_FLOAT64_C( 893.73) }, { -INT32_C( 728), INT32_C( 893) } }, { { SIMDE_FLOAT64_C( 333.21), SIMDE_FLOAT64_C( -914.29) }, { INT32_C( 333), -INT32_C( 914) } }, { { SIMDE_FLOAT64_C( 0.95), SIMDE_FLOAT64_C( 701.07) }, { INT32_C( 0), INT32_C( 701) } }, { { SIMDE_FLOAT64_C( 639.75), SIMDE_FLOAT64_C( -803.13) }, { INT32_C( 639), -INT32_C( 803) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m64 r = simde_mm_cvttpd_pi32(a); simde_test_x86_assert_equal_i32x2(r, simde_x_mm_load_si64(test_vec[i].r)); } return 0; } static int test_simde_mm_cvttps_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const int32_t r[4]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -859.90), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -330.33) }, { INT32_MIN, -INT32_C( 859), INT32_MIN, -INT32_C( 330) } }, #endif #if !defined(SIMDE_FAST_CONVERSION_RANGE) { { HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1), HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) - 100), HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1), HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) + 100) }, { INT32_MIN, INT32_C( 2147483520), INT32_MIN, -INT32_C( 2147483520) } }, #endif { { SIMDE_FLOAT32_C( 884.61), SIMDE_FLOAT32_C( 424.21), SIMDE_FLOAT32_C( 434.23), SIMDE_FLOAT32_C( -865.32) }, { INT32_C( 884), INT32_C( 424), INT32_C( 434), -INT32_C( 865) } }, { { SIMDE_FLOAT32_C( 3.17), SIMDE_FLOAT32_C( -163.40), SIMDE_FLOAT32_C( -490.56), SIMDE_FLOAT32_C( 628.48) }, { INT32_C( 3), -INT32_C( 163), -INT32_C( 490), INT32_C( 628) } }, { { SIMDE_FLOAT32_C( 629.16), SIMDE_FLOAT32_C( 267.90), SIMDE_FLOAT32_C( 468.27), SIMDE_FLOAT32_C( 765.29) }, { INT32_C( 629), INT32_C( 267), INT32_C( 468), INT32_C( 765) } }, { { SIMDE_FLOAT32_C( -532.39), SIMDE_FLOAT32_C( 448.09), SIMDE_FLOAT32_C( 543.36), SIMDE_FLOAT32_C( -643.97) }, { -INT32_C( 532), INT32_C( 448), INT32_C( 543), -INT32_C( 643) } }, { { SIMDE_FLOAT32_C( -958.61), SIMDE_FLOAT32_C( -434.16), SIMDE_FLOAT32_C( 958.20), SIMDE_FLOAT32_C( 749.69) }, { -INT32_C( 958), -INT32_C( 434), INT32_C( 958), INT32_C( 749) } }, { { SIMDE_FLOAT32_C( 379.97), SIMDE_FLOAT32_C( -697.16), SIMDE_FLOAT32_C( 790.54), SIMDE_FLOAT32_C( -387.37) }, { INT32_C( 379), -INT32_C( 697), INT32_C( 790), -INT32_C( 387) } }, { { SIMDE_FLOAT32_C( -785.26), SIMDE_FLOAT32_C( 403.54), SIMDE_FLOAT32_C( -475.03), SIMDE_FLOAT32_C( -577.41) }, { -INT32_C( 785), INT32_C( 403), -INT32_C( 475), -INT32_C( 577) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128i r = simde_mm_cvttps_epi32(a); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm_cvttsd_si32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const int32_t r; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 248.78) }, INT32_MIN }, { { -SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 139.38) }, INT32_MIN }, #endif #if !defined(SIMDE_FAST_CONVERSION_RANGE) { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1), SIMDE_FLOAT64_C( -850.89) }, -INT32_C( 2147483648) }, { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1), SIMDE_FLOAT64_C( -30.56) }, -INT32_C( 2147483648) }, { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) - 100), SIMDE_FLOAT64_C( -742.09) }, INT32_C( 2147483547) }, { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) + 100), SIMDE_FLOAT64_C( -496.27) }, -INT32_C( 2147483548) }, #endif { { SIMDE_FLOAT64_C( -57.42), SIMDE_FLOAT64_C( -705.99) }, -INT32_C( 57) }, { { SIMDE_FLOAT64_C( 737.15), SIMDE_FLOAT64_C( -394.42) }, INT32_C( 737) }, { { SIMDE_FLOAT64_C( -193.78), SIMDE_FLOAT64_C( 0.85) }, -INT32_C( 193) }, { { SIMDE_FLOAT64_C( -61.02), SIMDE_FLOAT64_C( 247.60) }, -INT32_C( 61) }, { { SIMDE_FLOAT64_C( 396.64), SIMDE_FLOAT64_C( 103.10) }, INT32_C( 396) }, { { SIMDE_FLOAT64_C( 606.36), SIMDE_FLOAT64_C( -703.92) }, INT32_C( 606) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); int32_t r = simde_mm_cvttsd_si32(a); simde_assert_equal_i32(r, test_vec[i].r); } return 0; } static int test_simde_mm_cvttsd_si64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; int64_t r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C(-345.97), SIMDE_FLOAT64_C( 664.87)), 664 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 648.27), SIMDE_FLOAT64_C(-390.19)), -390 }, { simde_mm_set_pd(SIMDE_FLOAT64_C(-500.63), SIMDE_FLOAT64_C(-258.15)), -258 }, { simde_mm_set_pd(SIMDE_FLOAT64_C(-109.22), SIMDE_FLOAT64_C(-784.27)), -784 }, { simde_mm_set_pd(SIMDE_FLOAT64_C(-688.79), SIMDE_FLOAT64_C(-698.22)), -698 }, { simde_mm_set_pd(SIMDE_FLOAT64_C(-914.25), SIMDE_FLOAT64_C(-650.88)), -650 }, { simde_mm_set_pd(SIMDE_FLOAT64_C(-228.35), SIMDE_FLOAT64_C(-603.46)), -603 }, { simde_mm_set_pd(SIMDE_FLOAT64_C(-556.94), SIMDE_FLOAT64_C( 694.64)), 694 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int64_t r = simde_mm_cvttsd_si64(test_vec[i].a); simde_assert_equal_i64(r, test_vec[i].r); } return 0; } static int test_simde_mm_div_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -184.40), SIMDE_FLOAT64_C( 992.45)), simde_mm_set_pd(SIMDE_FLOAT64_C( 155.72), SIMDE_FLOAT64_C( 856.52)), simde_mm_set_pd(SIMDE_FLOAT64_C( -1.18), SIMDE_FLOAT64_C( 1.16)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -34.37), SIMDE_FLOAT64_C( 596.67)), simde_mm_set_pd(SIMDE_FLOAT64_C( -718.99), SIMDE_FLOAT64_C( -17.98)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( -33.19)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 797.64), SIMDE_FLOAT64_C( 669.98)), simde_mm_set_pd(SIMDE_FLOAT64_C( -872.55), SIMDE_FLOAT64_C( 857.06)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.91), SIMDE_FLOAT64_C( 0.78)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -864.62), SIMDE_FLOAT64_C( 635.60)), simde_mm_set_pd(SIMDE_FLOAT64_C( -556.59), SIMDE_FLOAT64_C( 676.91)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1.55), SIMDE_FLOAT64_C( 0.94)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -635.32), SIMDE_FLOAT64_C( 518.94)), simde_mm_set_pd(SIMDE_FLOAT64_C( -426.58), SIMDE_FLOAT64_C( -331.30)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1.49), SIMDE_FLOAT64_C( -1.57)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -494.90), SIMDE_FLOAT64_C( -42.04)), simde_mm_set_pd(SIMDE_FLOAT64_C( 432.98), SIMDE_FLOAT64_C( 277.38)), simde_mm_set_pd(SIMDE_FLOAT64_C( -1.14), SIMDE_FLOAT64_C( -0.15)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -530.41), SIMDE_FLOAT64_C( 112.07)), simde_mm_set_pd(SIMDE_FLOAT64_C( -914.02), SIMDE_FLOAT64_C( 90.15)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.58), SIMDE_FLOAT64_C( 1.24)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -176.73), SIMDE_FLOAT64_C( -245.01)), simde_mm_set_pd(SIMDE_FLOAT64_C( 315.38), SIMDE_FLOAT64_C( -747.83)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.56), SIMDE_FLOAT64_C( 0.33)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_div_pd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_div_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -164.12), SIMDE_FLOAT64_C( -192.56)), simde_mm_set_pd(SIMDE_FLOAT64_C( -917.87), SIMDE_FLOAT64_C( 429.05)), simde_mm_set_pd(SIMDE_FLOAT64_C( -164.12), SIMDE_FLOAT64_C( -0.45)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 658.48), SIMDE_FLOAT64_C( 164.94)), simde_mm_set_pd(SIMDE_FLOAT64_C( -29.79), SIMDE_FLOAT64_C( 356.73)), simde_mm_set_pd(SIMDE_FLOAT64_C( 658.48), SIMDE_FLOAT64_C( 0.46)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -450.41), SIMDE_FLOAT64_C( 587.65)), simde_mm_set_pd(SIMDE_FLOAT64_C( 553.54), SIMDE_FLOAT64_C( -684.22)), simde_mm_set_pd(SIMDE_FLOAT64_C( -450.41), SIMDE_FLOAT64_C( -0.86)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -742.91), SIMDE_FLOAT64_C( -966.41)), simde_mm_set_pd(SIMDE_FLOAT64_C( 180.37), SIMDE_FLOAT64_C( 175.93)), simde_mm_set_pd(SIMDE_FLOAT64_C( -742.91), SIMDE_FLOAT64_C( -5.49)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -280.67), SIMDE_FLOAT64_C( 415.67)), simde_mm_set_pd(SIMDE_FLOAT64_C( 495.86), SIMDE_FLOAT64_C( -819.23)), simde_mm_set_pd(SIMDE_FLOAT64_C( -280.67), SIMDE_FLOAT64_C( -0.51)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 391.69), SIMDE_FLOAT64_C( 589.87)), simde_mm_set_pd(SIMDE_FLOAT64_C( -651.02), SIMDE_FLOAT64_C( -239.35)), simde_mm_set_pd(SIMDE_FLOAT64_C( 391.69), SIMDE_FLOAT64_C( -2.46)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 423.52), SIMDE_FLOAT64_C( 419.28)), simde_mm_set_pd(SIMDE_FLOAT64_C( 336.50), SIMDE_FLOAT64_C( 431.02)), simde_mm_set_pd(SIMDE_FLOAT64_C( 423.52), SIMDE_FLOAT64_C( 0.97)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 235.17), SIMDE_FLOAT64_C( -333.81)), simde_mm_set_pd(SIMDE_FLOAT64_C( -715.21), SIMDE_FLOAT64_C( 4.82)), simde_mm_set_pd(SIMDE_FLOAT64_C( 235.17), SIMDE_FLOAT64_C( -69.26)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_div_sd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_extract_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; int32_t r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C(-22888), INT16_C(-26241), INT16_C( 16094), INT16_C( 25365), INT16_C(-10975), INT16_C( -4323), INT16_C( 9478), INT16_C(-18966)), 54561 }, { simde_mm_set_epi16(INT16_C( -4494), INT16_C(-23544), INT16_C( 12313), INT16_C( 19220), INT16_C( 16921), INT16_C( 9248), INT16_C( -1076), INT16_C(-18617)), 16921 }, { simde_mm_set_epi16(INT16_C( 5051), INT16_C( 30913), INT16_C( 18404), INT16_C(-11820), INT16_C( 16495), INT16_C( 32647), INT16_C( 21150), INT16_C( 16664)), 16495 }, { simde_mm_set_epi16(INT16_C( 987), INT16_C( 32176), INT16_C(-17758), INT16_C( 21096), INT16_C( -945), INT16_C( 5537), INT16_C( 5495), INT16_C(-18130)), 64591 }, { simde_mm_set_epi16(INT16_C(-16046), INT16_C( 13714), INT16_C( 12272), INT16_C( 32151), INT16_C(-14156), INT16_C( 8851), INT16_C(-19624), INT16_C( -2653)), 51380 }, { simde_mm_set_epi16(INT16_C(-28172), INT16_C( 1666), INT16_C( 15569), INT16_C( -1622), INT16_C( 22048), INT16_C(-24364), INT16_C( 2478), INT16_C( 20826)), 22048 }, { simde_mm_set_epi16(INT16_C(-29653), INT16_C(-27750), INT16_C( 5027), INT16_C( -7816), INT16_C(-20852), INT16_C( 3178), INT16_C(-27881), INT16_C( 3156)), 44684 }, { simde_mm_set_epi16(INT16_C(-26280), INT16_C( 27067), INT16_C( 10815), INT16_C(-30178), INT16_C(-26852), INT16_C( 26399), INT16_C(-30202), INT16_C(-11030)), 38684 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int32_t r = simde_mm_extract_epi16(test_vec[i].a, 3); simde_assert_equal_i32(r, test_vec[i].r); } return 0; } static int test_simde_mm_insert_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; int16_t b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C(-18659), INT16_C( 19491), INT16_C(-30434), INT16_C( -6059), INT16_C( 11985), INT16_C( 5369), INT16_C(-14188), INT16_C( 9668)), 21712, simde_mm_set_epi16(INT16_C(-18659), INT16_C( 19491), INT16_C(-30434), INT16_C( -6059), INT16_C( 21712), INT16_C( 5369), INT16_C(-14188), INT16_C( 9668)) }, { simde_mm_set_epi16(INT16_C( 32684), INT16_C(-21716), INT16_C( 7657), INT16_C( 3627), INT16_C( 12377), INT16_C( 30609), INT16_C(-12611), INT16_C(-11955)), -27473, simde_mm_set_epi16(INT16_C( 32684), INT16_C(-21716), INT16_C( 7657), INT16_C( 3627), INT16_C(-27473), INT16_C( 30609), INT16_C(-12611), INT16_C(-11955)) }, { simde_mm_set_epi16(INT16_C(-18344), INT16_C( -4896), INT16_C(-19094), INT16_C( -638), INT16_C(-30376), INT16_C(-17556), INT16_C(-31358), INT16_C(-17530)), -19116, simde_mm_set_epi16(INT16_C(-18344), INT16_C( -4896), INT16_C(-19094), INT16_C( -638), INT16_C(-19116), INT16_C(-17556), INT16_C(-31358), INT16_C(-17530)) }, { simde_mm_set_epi16(INT16_C(-11121), INT16_C( 29288), INT16_C( -3915), INT16_C( 13306), INT16_C( 30582), INT16_C( 4374), INT16_C( -9323), INT16_C( -2317)), 5778, simde_mm_set_epi16(INT16_C(-11121), INT16_C( 29288), INT16_C( -3915), INT16_C( 13306), INT16_C( 5778), INT16_C( 4374), INT16_C( -9323), INT16_C( -2317)) }, { simde_mm_set_epi16(INT16_C( 7542), INT16_C(-16196), INT16_C(-24612), INT16_C( 8929), INT16_C(-16460), INT16_C( 17259), INT16_C( 672), INT16_C(-18076)), -411, simde_mm_set_epi16(INT16_C( 7542), INT16_C(-16196), INT16_C(-24612), INT16_C( 8929), INT16_C( -411), INT16_C( 17259), INT16_C( 672), INT16_C(-18076)) }, { simde_mm_set_epi16(INT16_C( 15913), INT16_C(-18873), INT16_C( 14978), INT16_C( 31946), INT16_C( -6939), INT16_C( 26150), INT16_C( 18499), INT16_C(-16752)), 1141, simde_mm_set_epi16(INT16_C( 15913), INT16_C(-18873), INT16_C( 14978), INT16_C( 31946), INT16_C( 1141), INT16_C( 26150), INT16_C( 18499), INT16_C(-16752)) }, { simde_mm_set_epi16(INT16_C( 1093), INT16_C( -6101), INT16_C(-30747), INT16_C(-18266), INT16_C( 4085), INT16_C(-14478), INT16_C( -6279), INT16_C(-25531)), -18605, simde_mm_set_epi16(INT16_C( 1093), INT16_C( -6101), INT16_C(-30747), INT16_C(-18266), INT16_C(-18605), INT16_C(-14478), INT16_C( -6279), INT16_C(-25531)) }, { simde_mm_set_epi16(INT16_C( -2463), INT16_C( -3389), INT16_C( 28311), INT16_C( -5667), INT16_C( 24886), INT16_C( 24368), INT16_C( 19484), INT16_C(-11581)), -17420, simde_mm_set_epi16(INT16_C( -2463), INT16_C( -3389), INT16_C( 28311), INT16_C( -5667), INT16_C(-17420), INT16_C( 24368), INT16_C( 19484), INT16_C(-11581)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_insert_epi16(test_vec[i].a, test_vec[i].b, 3); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_load_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 a[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 319.94), SIMDE_FLOAT64_C( 593.75) }, { SIMDE_FLOAT64_C( 319.94), SIMDE_FLOAT64_C( 593.75) } }, { { SIMDE_FLOAT64_C( -220.38), SIMDE_FLOAT64_C( 646.62) }, { SIMDE_FLOAT64_C( -220.38), SIMDE_FLOAT64_C( 646.62) } }, { { SIMDE_FLOAT64_C( 769.82), SIMDE_FLOAT64_C( 960.64) }, { SIMDE_FLOAT64_C( 769.82), SIMDE_FLOAT64_C( 960.64) } }, { { SIMDE_FLOAT64_C( -283.67), SIMDE_FLOAT64_C( -795.53) }, { SIMDE_FLOAT64_C( -283.67), SIMDE_FLOAT64_C( -795.53) } }, { { SIMDE_FLOAT64_C( -643.22), SIMDE_FLOAT64_C( 246.89) }, { SIMDE_FLOAT64_C( -643.22), SIMDE_FLOAT64_C( 246.89) } }, { { SIMDE_FLOAT64_C( -842.54), SIMDE_FLOAT64_C( -513.83) }, { SIMDE_FLOAT64_C( -842.54), SIMDE_FLOAT64_C( -513.83) } }, { { SIMDE_FLOAT64_C( 635.37), SIMDE_FLOAT64_C( 836.97) }, { SIMDE_FLOAT64_C( 635.37), SIMDE_FLOAT64_C( 836.97) } }, { { SIMDE_FLOAT64_C( 838.72), SIMDE_FLOAT64_C( -197.92) }, { SIMDE_FLOAT64_C( 838.72), SIMDE_FLOAT64_C( -197.92) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde_test_x86_assert_equal_f64x2(simde_mm_load_pd(test_vec[i].r), simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_load_pd1(SIMDE_MUNIT_TEST_ARGS) { const struct { simde_float64 a; simde__m128d r; } test_vec[8] = { {SIMDE_FLOAT64_C( -639.28), simde_mm_set_pd(SIMDE_FLOAT64_C(-639.28), SIMDE_FLOAT64_C(-639.28)) }, {SIMDE_FLOAT64_C( 754.31), simde_mm_set_pd(SIMDE_FLOAT64_C( 754.31), SIMDE_FLOAT64_C( 754.31)) }, {SIMDE_FLOAT64_C( -143.09), simde_mm_set_pd(SIMDE_FLOAT64_C(-143.09), SIMDE_FLOAT64_C(-143.09)) }, {SIMDE_FLOAT64_C( -509.95), simde_mm_set_pd(SIMDE_FLOAT64_C(-509.95), SIMDE_FLOAT64_C(-509.95)) }, {SIMDE_FLOAT64_C( 357.11), simde_mm_set_pd(SIMDE_FLOAT64_C( 357.11), SIMDE_FLOAT64_C( 357.11)) }, {SIMDE_FLOAT64_C( 414.83), simde_mm_set_pd(SIMDE_FLOAT64_C( 414.83), SIMDE_FLOAT64_C( 414.83)) }, {SIMDE_FLOAT64_C( 416.46), simde_mm_set_pd(SIMDE_FLOAT64_C( 416.46), SIMDE_FLOAT64_C( 416.46)) }, {SIMDE_FLOAT64_C( 167.42), simde_mm_set_pd(SIMDE_FLOAT64_C( 167.42), SIMDE_FLOAT64_C( 167.42)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_load_pd1(&test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_load_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde_float64 a; simde__m128d r; } test_vec[8] = { {SIMDE_FLOAT64_C( 883.59), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 883.59)) }, {SIMDE_FLOAT64_C( 719.08), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 719.08)) }, {SIMDE_FLOAT64_C( -82.94), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -82.94)) }, {SIMDE_FLOAT64_C( -87.79), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -87.79)) }, {SIMDE_FLOAT64_C( 309.31), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 309.31)) }, {SIMDE_FLOAT64_C( -987.67), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-987.67)) }, {SIMDE_FLOAT64_C( 196.18), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 196.18)) }, {SIMDE_FLOAT64_C( 313.82), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 313.82)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_load_sd(&test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_load_si128(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C(-1485513264), INT32_C( 2130924320), INT32_C( 1226074611), INT32_C( -306486659)), simde_mm_set_epi32(INT32_C(-1485513264), INT32_C( 2130924320), INT32_C( 1226074611), INT32_C( -306486659)) }, { simde_mm_set_epi32(INT32_C( 952258085), INT32_C( -534886765), INT32_C( -354984724), INT32_C(-1450427500)), simde_mm_set_epi32(INT32_C( 952258085), INT32_C( -534886765), INT32_C( -354984724), INT32_C(-1450427500)) }, { simde_mm_set_epi32(INT32_C(-1417585996), INT32_C( 546041970), INT32_C(-1469146664), INT32_C(-2062567602)), simde_mm_set_epi32(INT32_C(-1417585996), INT32_C( 546041970), INT32_C(-1469146664), INT32_C(-2062567602)) }, { simde_mm_set_epi32(INT32_C(-1482230799), INT32_C(-1421432180), INT32_C(-1588201284), INT32_C(-1267673212)), simde_mm_set_epi32(INT32_C(-1482230799), INT32_C(-1421432180), INT32_C(-1588201284), INT32_C(-1267673212)) }, { simde_mm_set_epi32(INT32_C(-1153380991), INT32_C( 1838203743), INT32_C( -180063833), INT32_C( -699223421)), simde_mm_set_epi32(INT32_C(-1153380991), INT32_C( 1838203743), INT32_C( -180063833), INT32_C( -699223421)) }, { simde_mm_set_epi32(INT32_C( -711752348), INT32_C( 464328511), INT32_C( 1773807699), INT32_C( 849844772)), simde_mm_set_epi32(INT32_C( -711752348), INT32_C( 464328511), INT32_C( 1773807699), INT32_C( 849844772)) }, { simde_mm_set_epi32(INT32_C(-1083662155), INT32_C( -641783129), INT32_C(-1893537704), INT32_C( 1971283674)), simde_mm_set_epi32(INT32_C(-1083662155), INT32_C( -641783129), INT32_C(-1893537704), INT32_C( 1971283674)) }, { simde_mm_set_epi32(INT32_C(-1329936037), INT32_C( -364329957), INT32_C(-1886427840), INT32_C(-1935682760)), simde_mm_set_epi32(INT32_C(-1329936037), INT32_C( -364329957), INT32_C(-1886427840), INT32_C(-1935682760)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_load_si128(&test_vec[i].a); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_loadh_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde_float64 b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -625.39), SIMDE_FLOAT64_C( -212.79)), SIMDE_FLOAT64_C( -544.03), simde_mm_set_pd(SIMDE_FLOAT64_C( -544.03), SIMDE_FLOAT64_C( -212.79)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 906.58), SIMDE_FLOAT64_C( -446.43)), SIMDE_FLOAT64_C( -955.62), simde_mm_set_pd(SIMDE_FLOAT64_C( -955.62), SIMDE_FLOAT64_C( -446.43)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 575.38), SIMDE_FLOAT64_C( -468.21)), SIMDE_FLOAT64_C( -790.22), simde_mm_set_pd(SIMDE_FLOAT64_C( -790.22), SIMDE_FLOAT64_C( -468.21)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 375.10), SIMDE_FLOAT64_C( -731.74)), SIMDE_FLOAT64_C( 857.52), simde_mm_set_pd(SIMDE_FLOAT64_C( 857.52), SIMDE_FLOAT64_C( -731.74)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -48.07), SIMDE_FLOAT64_C( -201.78)), SIMDE_FLOAT64_C( -122.99), simde_mm_set_pd(SIMDE_FLOAT64_C( -122.99), SIMDE_FLOAT64_C( -201.78)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 97.66), SIMDE_FLOAT64_C( -743.76)), SIMDE_FLOAT64_C( 123.61), simde_mm_set_pd(SIMDE_FLOAT64_C( 123.61), SIMDE_FLOAT64_C( -743.76)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 622.43), SIMDE_FLOAT64_C( -815.78)), SIMDE_FLOAT64_C( -884.62), simde_mm_set_pd(SIMDE_FLOAT64_C( -884.62), SIMDE_FLOAT64_C( -815.78)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 233.02), SIMDE_FLOAT64_C( 337.90)), SIMDE_FLOAT64_C( 566.08), simde_mm_set_pd(SIMDE_FLOAT64_C( 566.08), SIMDE_FLOAT64_C( 337.90)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_loadh_pd(test_vec[i].a, &test_vec[i].b); simde_assert_m128d_close(r, test_vec[i].r, 4); } return 0; } static int test_simde_mm_loadl_epi64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t a; const int64_t r[2]; } test_vec[] = { { -INT64_C( 5374209034103506743), { -INT64_C( 5374209034103506743), INT64_C(0) } }, { -INT64_C( 8818261387786582106), { -INT64_C( 8818261387786582106), INT64_C(0) } }, { INT64_C( 8778417490344874118), { INT64_C( 8778417490344874118), INT64_C(0) } }, { INT64_C( 1040805703196854697), { INT64_C( 1040805703196854697), INT64_C(0) } }, { -INT64_C( 6883770744639848089), { -INT64_C( 6883770744639848089), INT64_C(0) } }, { -INT64_C( 6091281060752135947), { -INT64_C( 6091281060752135947), INT64_C(0) } }, { INT64_C( 7649374694561713533), { INT64_C( 7649374694561713533), INT64_C(0) } }, { INT64_C( 9018079017176557522), { INT64_C( 9018079017176557522), INT64_C(0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i r = simde_mm_loadl_epi64(SIMDE_ALIGN_CAST(simde__m128i const*, &test_vec[i].a)); simde_test_x86_assert_equal_i64x2(r, simde_x_mm_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm_loadl_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde_float64 b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 398.25), SIMDE_FLOAT64_C( 169.44)), SIMDE_FLOAT64_C( 512.14), simde_mm_set_pd(SIMDE_FLOAT64_C( 398.25), SIMDE_FLOAT64_C( 512.14)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 747.05), SIMDE_FLOAT64_C( 122.36)), SIMDE_FLOAT64_C( -219.24), simde_mm_set_pd(SIMDE_FLOAT64_C( 747.05), SIMDE_FLOAT64_C( -219.24)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 806.68), SIMDE_FLOAT64_C( 439.45)), SIMDE_FLOAT64_C( 545.31), simde_mm_set_pd(SIMDE_FLOAT64_C( 806.68), SIMDE_FLOAT64_C( 545.31)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -494.45), SIMDE_FLOAT64_C( 273.54)), SIMDE_FLOAT64_C( 233.72), simde_mm_set_pd(SIMDE_FLOAT64_C( -494.45), SIMDE_FLOAT64_C( 233.72)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -435.22), SIMDE_FLOAT64_C( -790.14)), SIMDE_FLOAT64_C( 334.56), simde_mm_set_pd(SIMDE_FLOAT64_C( -435.22), SIMDE_FLOAT64_C( 334.56)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -387.92), SIMDE_FLOAT64_C( 587.13)), SIMDE_FLOAT64_C( 782.99), simde_mm_set_pd(SIMDE_FLOAT64_C( -387.92), SIMDE_FLOAT64_C( 782.99)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -656.10), SIMDE_FLOAT64_C( -868.90)), SIMDE_FLOAT64_C( -241.17), simde_mm_set_pd(SIMDE_FLOAT64_C( -656.10), SIMDE_FLOAT64_C( -241.17)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 155.11), SIMDE_FLOAT64_C( -412.38)), SIMDE_FLOAT64_C( 606.64), simde_mm_set_pd(SIMDE_FLOAT64_C( 155.11), SIMDE_FLOAT64_C( 606.64)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_loadl_pd(test_vec[i].a, &test_vec[i].b); simde_assert_m128d_close(r, test_vec[i].r, 4); } return 0; } static int test_simde_mm_loadr_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 mem_addr[2]; simde__m128d r; } test_vec[8] = { { {SIMDE_FLOAT64_C( 808.22), SIMDE_FLOAT64_C( -538.55) }, simde_mm_set_pd(SIMDE_FLOAT64_C( 808.22), SIMDE_FLOAT64_C(-538.55)) }, { {SIMDE_FLOAT64_C( 475.76), SIMDE_FLOAT64_C( 878.69) }, simde_mm_set_pd(SIMDE_FLOAT64_C( 475.76), SIMDE_FLOAT64_C( 878.69)) }, { {SIMDE_FLOAT64_C( -400.00), SIMDE_FLOAT64_C( -135.07) }, simde_mm_set_pd(SIMDE_FLOAT64_C(-400.00), SIMDE_FLOAT64_C(-135.07)) }, { {SIMDE_FLOAT64_C( -32.33), SIMDE_FLOAT64_C( -148.19) }, simde_mm_set_pd(SIMDE_FLOAT64_C( -32.33), SIMDE_FLOAT64_C(-148.19)) }, { {SIMDE_FLOAT64_C( -971.23), SIMDE_FLOAT64_C( -835.90) }, simde_mm_set_pd(SIMDE_FLOAT64_C(-971.23), SIMDE_FLOAT64_C(-835.90)) }, { {SIMDE_FLOAT64_C( -891.74), SIMDE_FLOAT64_C( -424.87) }, simde_mm_set_pd(SIMDE_FLOAT64_C(-891.74), SIMDE_FLOAT64_C(-424.87)) }, { {SIMDE_FLOAT64_C( -199.77), SIMDE_FLOAT64_C( 631.45) }, simde_mm_set_pd(SIMDE_FLOAT64_C(-199.77), SIMDE_FLOAT64_C( 631.45)) }, { {SIMDE_FLOAT64_C( 410.30), SIMDE_FLOAT64_C( 721.68) }, simde_mm_set_pd(SIMDE_FLOAT64_C( 410.30), SIMDE_FLOAT64_C( 721.68)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_loadr_pd(test_vec[i].mem_addr); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_loadu_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde_float64 mem_addr[2]; simde__m128d r; } test_vec[8] = { { {SIMDE_FLOAT64_C( -578.02), SIMDE_FLOAT64_C( 20.66) }, simde_mm_set_pd(SIMDE_FLOAT64_C( 20.66), SIMDE_FLOAT64_C(-578.02)) }, { {SIMDE_FLOAT64_C( 370.06), SIMDE_FLOAT64_C( -720.89) }, simde_mm_set_pd(SIMDE_FLOAT64_C(-720.89), SIMDE_FLOAT64_C( 370.06)) }, { {SIMDE_FLOAT64_C( 584.38), SIMDE_FLOAT64_C( -849.44) }, simde_mm_set_pd(SIMDE_FLOAT64_C(-849.44), SIMDE_FLOAT64_C( 584.38)) }, { {SIMDE_FLOAT64_C( 636.90), SIMDE_FLOAT64_C( 349.95) }, simde_mm_set_pd(SIMDE_FLOAT64_C( 349.95), SIMDE_FLOAT64_C( 636.90)) }, { {SIMDE_FLOAT64_C( -617.52), SIMDE_FLOAT64_C( 599.47) }, simde_mm_set_pd(SIMDE_FLOAT64_C( 599.47), SIMDE_FLOAT64_C(-617.52)) }, { {SIMDE_FLOAT64_C( 633.70), SIMDE_FLOAT64_C( 30.57) }, simde_mm_set_pd(SIMDE_FLOAT64_C( 30.57), SIMDE_FLOAT64_C( 633.70)) }, { {SIMDE_FLOAT64_C( -333.40), SIMDE_FLOAT64_C( 592.38) }, simde_mm_set_pd(SIMDE_FLOAT64_C( 592.38), SIMDE_FLOAT64_C(-333.40)) }, { {SIMDE_FLOAT64_C( -335.86), SIMDE_FLOAT64_C( 212.26) }, simde_mm_set_pd(SIMDE_FLOAT64_C( 212.26), SIMDE_FLOAT64_C(-335.86)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_loadu_pd(test_vec[i].mem_addr); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_loadu_si128 (SIMDE_MUNIT_TEST_ARGS) { #if !defined(__clang__) || !defined(SIMDE_ARCH_ARM) static const struct { const int32_t a[4]; } test_vec[] = { { { -INT32_C( 431648212), INT32_C( 784010008), -INT32_C( 1621020084), INT32_C( 1563595920) } }, { { -INT32_C( 2020743978), INT32_C( 642031476), -INT32_C( 1168838661), -INT32_C( 71485745) } }, { { -INT32_C( 505281848), INT32_C( 1510972686), INT32_C( 1626960080), -INT32_C( 608359675) } }, { { -INT32_C( 2073933297), -INT32_C( 441800983), INT32_C( 1688206997), -INT32_C( 44016587) } }, { { INT32_C( 1843282527), -INT32_C( 1345851937), INT32_C( 1661976670), -INT32_C( 79770388) } }, { { INT32_C( 8364054), -INT32_C( 605738426), INT32_C( 1564443688), INT32_C( 1079746529) } }, { { -INT32_C( 1045612063), -INT32_C( 2056227801), INT32_C( 552108084), INT32_C( 1662789196) } }, { { INT32_C( 207854534), -INT32_C( 286832443), -INT32_C( 1387583796), -INT32_C( 1477597498) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int32_t r[4]; simde_mm_storeu_si128(SIMDE_ALIGN_CAST(simde__m128i*, r), simde_mm_loadu_si128(SIMDE_ALIGN_CAST(const simde__m128i*, test_vec[i].a))); simde_assert_equal_vi32(sizeof(r) / sizeof(r[0]), r, test_vec[i].a); } #endif return 0; } static int test_simde_mm_loadu_si16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a; const int16_t r[8]; } test_vec[] = { { -INT16_C( 11138), { -INT16_C( 11138), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { INT16_C( 23724), { INT16_C( 23724), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { INT16_C( 14484), { INT16_C( 14484), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { INT16_C( 13428), { INT16_C( 13428), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { -INT16_C( 4679), { -INT16_C( 4679), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { -INT16_C( 27444), { -INT16_C( 27444), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { INT16_C( 23920), { INT16_C( 23920), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { INT16_C( 10692), { INT16_C( 10692), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int16_t a = test_vec[i].a; simde__m128i r = HEDLEY_CONCAT(simde,_mm_loadu_si16)(&a); simde_test_x86_assert_equal_i16x8(r, simde_x_mm_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_mm_loadu_si32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a; const int32_t r[4]; } test_vec[] = { { INT32_C( 418822831), { INT32_C( 418822831), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { INT32_C( 1942173819), { INT32_C( 1942173819), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { INT32_C( 1655488478), { INT32_C( 1655488478), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { -INT32_C( 1203443910), { -INT32_C( 1203443910), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { INT32_C( 1326772667), { INT32_C( 1326772667), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { -INT32_C( 494630871), { -INT32_C( 494630871), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { -INT32_C( 445230987), { -INT32_C( 445230987), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { INT32_C( 1544436653), { INT32_C( 1544436653), INT32_C( 0), INT32_C( 0), INT32_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int32_t a = test_vec[i].a; simde__m128i r = HEDLEY_CONCAT(simde,_mm_loadu_si32)(&a); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm_loadu_si64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t a; const int64_t r[2]; } test_vec[] = { { INT64_C( 1937454096935355637), { INT64_C( 1937454096935355637), INT64_C( 0) } }, { INT64_C( 3668957564122271735), { INT64_C( 3668957564122271735), INT64_C( 0) } }, { -INT64_C( 235024424980250958), { -INT64_C( 235024424980250958), INT64_C( 0) } }, { INT64_C( 7233045361154208854), { INT64_C( 7233045361154208854), INT64_C( 0) } }, { INT64_C( 309823741680211445), { INT64_C( 309823741680211445), INT64_C( 0) } }, { INT64_C( 4463101911464528198), { INT64_C( 4463101911464528198), INT64_C( 0) } }, { -INT64_C( 3557326416991718882), { -INT64_C( 3557326416991718882), INT64_C( 0) } }, { INT64_C( 7628307720165229322), { INT64_C( 7628307720165229322), INT64_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { int64_t a = test_vec[i].a; simde__m128i r = HEDLEY_CONCAT(simde,_mm_loadu_si64)(&a); simde_test_x86_assert_equal_i64x2(r, simde_x_mm_loadu_epi64(test_vec[i].r)); } return 0; } static int test_simde_mm_madd_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C( 24289), INT16_C( 22642), INT16_C( 24338), INT16_C( 21466), INT16_C(-21399), INT16_C(-25114), INT16_C( -38), INT16_C( 24157)), simde_mm_set_epi16(INT16_C( -9939), INT16_C(-13077), INT16_C(-13691), INT16_C(-22496), INT16_C( -7750), INT16_C( 2620), INT16_C(-25114), INT16_C( 6535)), simde_mm_set_epi32( -537497805, -816110694, 100043570, 158820327) }, { simde_mm_set_epi16(INT16_C( 13645), INT16_C( 20663), INT16_C( 21053), INT16_C( 5963), INT16_C( -9189), INT16_C( -1395), INT16_C( 25221), INT16_C( 27070)), simde_mm_set_epi16(INT16_C(-28936), INT16_C(-24038), INT16_C( 6837), INT16_C( 18195), INT16_C(-15316), INT16_C(-11637), INT16_C( 5200), INT16_C( 7885)), simde_mm_set_epi32( -891528914, 252436146, 156972339, 344596150) }, { simde_mm_set_epi16(INT16_C(-31367), INT16_C(-13886), INT16_C( 25125), INT16_C(-12503), INT16_C( 15451), INT16_C( -6370), INT16_C(-24113), INT16_C( 2653)), simde_mm_set_epi16(INT16_C( 5595), INT16_C(-23387), INT16_C(-11854), INT16_C( 264), INT16_C( 15071), INT16_C( 3868), INT16_C(-30127), INT16_C( 2383)), simde_mm_set_epi32( 149253517, -301132542, 208222861, 732774450) }, { simde_mm_set_epi16(INT16_C( 6250), INT16_C( 794), INT16_C( -7973), INT16_C( 27046), INT16_C( 13164), INT16_C( 16469), INT16_C( 1989), INT16_C(-24542)), simde_mm_set_epi16(INT16_C( 22452), INT16_C( -4402), INT16_C(-26550), INT16_C( 31252), INT16_C(-29251), INT16_C( -1650), INT16_C(-13867), INT16_C( 26387)), simde_mm_set_epi32( 136829812, 1056924742, -412234014, -675171217) }, { simde_mm_set_epi16(INT16_C(-17693), INT16_C(-18978), INT16_C( 22797), INT16_C( 31393), INT16_C( 32262), INT16_C(-21009), INT16_C( 9435), INT16_C( 20059)), simde_mm_set_epi16(INT16_C( 20064), INT16_C( 4406), INT16_C( 1105), INT16_C(-32185), INT16_C( 26331), INT16_C( -8672), INT16_C( 15113), INT16_C(-24381)), simde_mm_set_epi32( -438609420, -985193020, 1031680770, -346467324) }, { simde_mm_set_epi16(INT16_C( -9214), INT16_C(-31455), INT16_C(-14871), INT16_C( -8603), INT16_C( 17039), INT16_C(-27694), INT16_C( 18091), INT16_C( 27811)), simde_mm_set_epi16(INT16_C( 9903), INT16_C( 7626), INT16_C( -7009), INT16_C(-11696), INT16_C(-31989), INT16_C( 28434), INT16_C(-24743), INT16_C(-27058)), simde_mm_set_epi32( -331122072, 204851527, -1332511767, -1200135651) }, { simde_mm_set_epi16(INT16_C( 20741), INT16_C( 5382), INT16_C(-29692), INT16_C( 12589), INT16_C( 21204), INT16_C( 3076), INT16_C(-24365), INT16_C( -1783)), simde_mm_set_epi16(INT16_C(-15203), INT16_C(-26894), INT16_C( -6878), INT16_C(-23472), INT16_C(-18994), INT16_C( 11044), INT16_C( 15739), INT16_C( -241)), simde_mm_set_epi32( -460068931, -91267432, -368777432, -383051032) }, { simde_mm_set_epi16(INT16_C( 24682), INT16_C( 17647), INT16_C(-19806), INT16_C(-13656), INT16_C( 26394), INT16_C( 4814), INT16_C( -4589), INT16_C( 17983)), simde_mm_set_epi16(INT16_C(-32304), INT16_C(-30224), INT16_C(-20430), INT16_C(-28018), INT16_C( 29012), INT16_C( 7494), INT16_C( -7871), INT16_C( 16228)), simde_mm_set_epi32(-1330690256, 787250388, 801818844, 327948143) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_madd_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_maskmoveu_si128(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i mask; int8_t i[16]; int8_t r[16]; } test_vec[8] = { { simde_mm_set_epi8(INT8_C(-127), INT8_C( 121), INT8_C( 33), INT8_C( 92), INT8_C( 95), INT8_C( 30), INT8_C( 117), INT8_C( 103), INT8_C( -74), INT8_C( -18), INT8_C( 28), INT8_C( 15), INT8_C(-111), INT8_C( -65), INT8_C( 117), INT8_C( -8)), simde_mm_set_epi8(INT8_C( 42), INT8_C( 73), INT8_C( 1), INT8_C( -97), INT8_C( -45), INT8_C( 24), INT8_C( 88), INT8_C( -76), INT8_C(-109), INT8_C( 78), INT8_C( -80), INT8_C( -97), INT8_C( 85), INT8_C( 102), INT8_C( 36), INT8_C( -19)), { 0, -116, -92, 122, -68, 23, -43, 86, -50, -28, -43, -3, 19, -114, 122, 62 }, { -8, -116, -92, 122, 15, 28, -43, -74, 103, -28, -43, 95, 92, -114, 122, 62 } }, { simde_mm_set_epi8(INT8_C( 70), INT8_C( 102), INT8_C( -59), INT8_C( 6), INT8_C( -83), INT8_C( 40), INT8_C( -32), INT8_C( 43), INT8_C( 90), INT8_C(-118), INT8_C( 82), INT8_C( 24), INT8_C(-106), INT8_C( -61), INT8_C( -19), INT8_C(-101)), simde_mm_set_epi8(INT8_C( 17), INT8_C( -58), INT8_C( -68), INT8_C( 86), INT8_C( 20), INT8_C( 40), INT8_C( 60), INT8_C( 30), INT8_C( 31), INT8_C( 30), INT8_C( 18), INT8_C( 67), INT8_C( -9), INT8_C( 103), INT8_C( 21), INT8_C( -50)), { -125, 126, -10, -60, 19, 92, -8, -124, 81, -122, 122, 13, -58, 49, -122, 24 }, { -101, 126, -10, -106, 19, 92, -8, -124, 81, -122, 122, 13, -58, -59, 102, 24 } }, { simde_mm_set_epi8(INT8_C( -26), INT8_C(-127), INT8_C( 58), INT8_C( -79), INT8_C( -88), INT8_C(-105), INT8_C( -66), INT8_C( 41), INT8_C( 75), INT8_C( -34), INT8_C( 97), INT8_C( -55), INT8_C( -65), INT8_C( -30), INT8_C( 23), INT8_C( 28)), simde_mm_set_epi8(INT8_C(-107), INT8_C( 69), INT8_C( -28), INT8_C( -40), INT8_C( 105), INT8_C( 0), INT8_C( 114), INT8_C( 113), INT8_C( -65), INT8_C( -82), INT8_C( 87), INT8_C( -14), INT8_C( -36), INT8_C( 68), INT8_C(-120), INT8_C( 38)), { -114, 55, -16, 51, 110, -44, 59, -6, 43, -95, -82, 119, -56, 9, -47, -20 }, { -114, 23, -16, -65, -55, -44, -34, 75, 43, -95, -82, 119, -79, 58, -47, -26 } }, { simde_mm_set_epi8(INT8_C( 87), INT8_C( 99), INT8_C( 22), INT8_C( 78), INT8_C( 93), INT8_C( -44), INT8_C( -98), INT8_C( 62), INT8_C( -97), INT8_C( -50), INT8_C( -31), INT8_C(-109), INT8_C( 10), INT8_C( -86), INT8_C( -15), INT8_C( 7)), simde_mm_set_epi8(INT8_C( 96), INT8_C( 76), INT8_C( 14), INT8_C(-114), INT8_C( 84), INT8_C( -85), INT8_C( 61), INT8_C( -38), INT8_C( -45), INT8_C( -83), INT8_C( -13), INT8_C( -50), INT8_C( -35), INT8_C(-111), INT8_C(-107), INT8_C( -50)), { -8, 64, -68, 23, -101, 35, 126, 119, -47, 4, 79, 23, 113, 117, -76, 93 }, { 7, -15, -86, 10, -109, -31, -50, -97, 62, 4, -44, 23, 78, 117, -76, 93 } }, { simde_mm_set_epi8(INT8_C( -38), INT8_C(-119), INT8_C( -46), INT8_C( -13), INT8_C( -3), INT8_C(-122), INT8_C( 75), INT8_C( 123), INT8_C( 26), INT8_C( -71), INT8_C( -42), INT8_C(-124), INT8_C( 49), INT8_C( 1), INT8_C( 5), INT8_C( -90)), simde_mm_set_epi8(INT8_C( 116), INT8_C( -8), INT8_C( 33), INT8_C( 63), INT8_C(-110), INT8_C( -94), INT8_C( -34), INT8_C( 66), INT8_C( 51), INT8_C(-117), INT8_C( 28), INT8_C( -95), INT8_C( -52), INT8_C( 122), INT8_C( 118), INT8_C( -99)), { -45, 85, 12, 62, -89, -105, 90, -19, 48, 18, -59, -98, -30, -113, 13, 91 }, { -90, 85, 12, 49, -124, -105, -71, -19, 48, 75, -122, -3, -30, -113, -119, 91 } }, { simde_mm_set_epi8(INT8_C( 52), INT8_C( 36), INT8_C( 112), INT8_C( 70), INT8_C( 110), INT8_C( 75), INT8_C( -6), INT8_C(-101), INT8_C( 3), INT8_C( 113), INT8_C( -32), INT8_C( 119), INT8_C( -19), INT8_C( 117), INT8_C( 31), INT8_C( 119)), simde_mm_set_epi8(INT8_C( 79), INT8_C( 39), INT8_C( 6), INT8_C( 30), INT8_C( 120), INT8_C( -75), INT8_C( 26), INT8_C( 57), INT8_C( 123), INT8_C( 70), INT8_C( 40), INT8_C( 84), INT8_C( 111), INT8_C( -59), INT8_C( -79), INT8_C( -87)), { -74, -2, -124, 73, 44, 83, 18, -48, -78, 27, 30, -26, -3, 56, 89, 125 }, { 119, 31, 117, 73, 44, 83, 18, -48, -78, 27, 75, -26, -3, 56, 89, 125 } }, { simde_mm_set_epi8(INT8_C( -83), INT8_C( -42), INT8_C(-124), INT8_C( -81), INT8_C( -65), INT8_C( 46), INT8_C( -62), INT8_C( 102), INT8_C( 50), INT8_C( 6), INT8_C( -6), INT8_C( -21), INT8_C( -51), INT8_C(-114), INT8_C(-126), INT8_C( 74)), simde_mm_set_epi8(INT8_C( 127), INT8_C(-119), INT8_C( -85), INT8_C( 95), INT8_C( 70), INT8_C( -40), INT8_C( 17), INT8_C( 124), INT8_C( -61), INT8_C(-126), INT8_C( 117), INT8_C( -42), INT8_C( 62), INT8_C(-111), INT8_C(-103), INT8_C( 57)), { 108, 40, -118, -12, 90, -12, -24, 50, 104, 87, -20, -86, -53, 25, -94, -101 }, { 108, -126, -114, -12, -21, -12, 6, 50, 104, 87, 46, -86, -53, -124, -42, -101 } }, { simde_mm_set_epi8(INT8_C( -23), INT8_C( -92), INT8_C( 93), INT8_C( -78), INT8_C( -39), INT8_C( -72), INT8_C( -43), INT8_C( 1), INT8_C(-121), INT8_C( 103), INT8_C( 61), INT8_C( 82), INT8_C( 45), INT8_C(-120), INT8_C( -86), INT8_C( 51)), simde_mm_set_epi8(INT8_C( -82), INT8_C( 92), INT8_C( -46), INT8_C( -97), INT8_C( 59), INT8_C( -16), INT8_C( 95), INT8_C( -85), INT8_C( 38), INT8_C( 36), INT8_C(-125), INT8_C( 74), INT8_C( 14), INT8_C( 75), INT8_C( -74), INT8_C( -25)), { -53, 48, 63, -44, -103, 12, 49, -1, -58, 70, -18, -117, 101, -90, 121, -31 }, { 51, -86, 63, -44, -103, 61, 49, -1, 1, 70, -72, -117, -78, 93, 121, -23 } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int8_t r[16]; simde_memcpy(r, test_vec[i].i, 16); #if defined SIMDE_X86_SSE2_NATIVE && defined SIMDE_NATIVE_ALIASES_TESTING simde_mm_maskmoveu_si128(test_vec[i].a, test_vec[i].mask, HEDLEY_REINTERPRET_CAST(char *, r)); #else simde_mm_maskmoveu_si128(test_vec[i].a, test_vec[i].mask, r); #endif simde_assert_equal_vi8(sizeof(r) / sizeof(r[0]), r, test_vec[i].r); } return 0; } static int test_simde_mm_min_epu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu8(164, 110, 116, 95, 43, 222, 122, 21, 12, 65, 208, 248, 108, 166, 116, 17), simde_x_mm_set_epu8( 56, 62, 214, 11, 248, 124, 69, 2, 198, 169, 227, 124, 192, 250, 140, 32), simde_x_mm_set_epu8( 56, 62, 116, 11, 43, 124, 69, 2, 12, 65, 208, 124, 108, 166, 116, 17) }, { simde_x_mm_set_epu8(136, 183, 233, 96, 11, 36, 213, 146, 238, 174, 59, 229, 56, 186, 203, 28), simde_x_mm_set_epu8( 82, 12, 9, 200, 196, 116, 127, 247, 1, 69, 178, 128, 65, 11, 179, 176), simde_x_mm_set_epu8( 82, 12, 9, 96, 11, 36, 127, 146, 1, 69, 59, 128, 56, 11, 179, 28) }, { simde_x_mm_set_epu8(164, 64, 195, 253, 35, 95, 119, 110, 106, 102, 21, 173, 49, 40, 101, 151), simde_x_mm_set_epu8(109, 25, 32, 203, 64, 71, 33, 98, 48, 43, 195, 67, 254, 158, 167, 217), simde_x_mm_set_epu8(109, 25, 32, 203, 35, 71, 33, 98, 48, 43, 21, 67, 49, 40, 101, 151) }, { simde_x_mm_set_epu8(233, 255, 136, 159, 118, 246, 37, 8, 195, 35, 70, 7, 91, 37, 20, 112), simde_x_mm_set_epu8( 4, 91, 243, 163, 160, 26, 137, 208, 146, 195, 124, 148, 53, 99, 21, 240), simde_x_mm_set_epu8( 4, 91, 136, 159, 118, 26, 37, 8, 146, 35, 70, 7, 53, 37, 20, 112) }, { simde_x_mm_set_epu8(196, 4, 110, 234, 88, 121, 133, 146, 127, 167, 173, 105, 205, 0, 197, 107), simde_x_mm_set_epu8(240, 105, 248, 55, 202, 217, 219, 230, 183, 240, 91, 164, 168, 6, 75, 186), simde_x_mm_set_epu8(196, 4, 110, 55, 88, 121, 133, 146, 127, 167, 91, 105, 168, 0, 75, 107) }, { simde_x_mm_set_epu8(191, 108, 145, 178, 194, 118, 187, 175, 80, 196, 99, 239, 6, 206, 186, 130), simde_x_mm_set_epu8(109, 182, 208, 91, 232, 171, 41, 238, 121, 144, 203, 42, 182, 89, 69, 166), simde_x_mm_set_epu8(109, 108, 145, 91, 194, 118, 41, 175, 80, 144, 99, 42, 6, 89, 69, 130) }, { simde_x_mm_set_epu8( 5, 152, 184, 251, 233, 22, 184, 152, 12, 126, 120, 80, 191, 98, 37, 36), simde_x_mm_set_epu8(194, 116, 229, 250, 247, 241, 153, 192, 20, 172, 224, 148, 240, 246, 120, 0), simde_x_mm_set_epu8( 5, 116, 184, 250, 233, 22, 153, 152, 12, 126, 120, 80, 191, 98, 37, 0) }, { simde_x_mm_set_epu8(188, 102, 137, 134, 213, 1, 140, 166, 143, 171, 248, 89, 128, 81, 226, 136), simde_x_mm_set_epu8(246, 210, 83, 107, 44, 255, 100, 235, 99, 233, 199, 197, 1, 159, 61, 31), simde_x_mm_set_epu8(188, 102, 83, 107, 44, 1, 100, 166, 99, 171, 199, 89, 1, 81, 61, 31) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_min_epu8(test_vec[i].a, test_vec[i].b); simde_assert_m128i_u8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_min_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C(-23442), INT16_C( 29791), INT16_C( 11230), INT16_C( 31253), INT16_C( 3137), INT16_C(-12040), INT16_C( 27814), INT16_C( 29713)), simde_mm_set_epi16(INT16_C( 14398), INT16_C(-10741), INT16_C( -1924), INT16_C( 17666), INT16_C(-14679), INT16_C( -7300), INT16_C(-16134), INT16_C(-29664)), simde_mm_set_epi16(INT16_C(-23442), INT16_C(-10741), INT16_C( -1924), INT16_C( 17666), INT16_C(-14679), INT16_C(-12040), INT16_C(-16134), INT16_C(-29664)) }, { simde_mm_set_epi16(INT16_C(-30537), INT16_C( -5792), INT16_C( 2852), INT16_C(-10862), INT16_C( -4434), INT16_C( 15333), INT16_C( 14522), INT16_C(-13540)), simde_mm_set_epi16(INT16_C( 21004), INT16_C( 2504), INT16_C(-15244), INT16_C( 32759), INT16_C( 325), INT16_C(-19840), INT16_C( 16651), INT16_C(-19536)), simde_mm_set_epi16(INT16_C(-30537), INT16_C( -5792), INT16_C(-15244), INT16_C(-10862), INT16_C( -4434), INT16_C(-19840), INT16_C( 14522), INT16_C(-19536)) }, { simde_mm_set_epi16(INT16_C(-23488), INT16_C(-15363), INT16_C( 9055), INT16_C( 30574), INT16_C( 27238), INT16_C( 5549), INT16_C( 12584), INT16_C( 26007)), simde_mm_set_epi16(INT16_C( 27929), INT16_C( 8395), INT16_C( 16455), INT16_C( 8546), INT16_C( 12331), INT16_C(-15549), INT16_C( -354), INT16_C(-22567)), simde_mm_set_epi16(INT16_C(-23488), INT16_C(-15363), INT16_C( 9055), INT16_C( 8546), INT16_C( 12331), INT16_C(-15549), INT16_C( -354), INT16_C(-22567)) }, { simde_mm_set_epi16(INT16_C( -5633), INT16_C(-30561), INT16_C( 30454), INT16_C( 9480), INT16_C(-15581), INT16_C( 17927), INT16_C( 23333), INT16_C( 5232)), simde_mm_set_epi16(INT16_C( 1115), INT16_C( -3165), INT16_C(-24550), INT16_C(-30256), INT16_C(-27965), INT16_C( 31892), INT16_C( 13667), INT16_C( 5616)), simde_mm_set_epi16(INT16_C( -5633), INT16_C(-30561), INT16_C(-24550), INT16_C(-30256), INT16_C(-27965), INT16_C( 17927), INT16_C( 13667), INT16_C( 5232)) }, { simde_mm_set_epi16(INT16_C(-15356), INT16_C( 28394), INT16_C( 22649), INT16_C(-31342), INT16_C( 32679), INT16_C(-21143), INT16_C(-13056), INT16_C(-14997)), simde_mm_set_epi16(INT16_C( -3991), INT16_C( -1993), INT16_C(-13607), INT16_C( -9242), INT16_C(-18448), INT16_C( 23460), INT16_C(-22522), INT16_C( 19386)), simde_mm_set_epi16(INT16_C(-15356), INT16_C( -1993), INT16_C(-13607), INT16_C(-31342), INT16_C(-18448), INT16_C(-21143), INT16_C(-22522), INT16_C(-14997)) }, { simde_mm_set_epi16(INT16_C(-16532), INT16_C(-28238), INT16_C(-15754), INT16_C(-17489), INT16_C( 20676), INT16_C( 25583), INT16_C( 1742), INT16_C(-17790)), simde_mm_set_epi16(INT16_C( 28086), INT16_C(-12197), INT16_C( -5973), INT16_C( 10734), INT16_C( 31120), INT16_C(-13526), INT16_C(-18855), INT16_C( 17830)), simde_mm_set_epi16(INT16_C(-16532), INT16_C(-28238), INT16_C(-15754), INT16_C(-17489), INT16_C( 20676), INT16_C(-13526), INT16_C(-18855), INT16_C(-17790)) }, { simde_mm_set_epi16(INT16_C( 1432), INT16_C(-18181), INT16_C( -5866), INT16_C(-18280), INT16_C( 3198), INT16_C( 30800), INT16_C(-16542), INT16_C( 9508)), simde_mm_set_epi16(INT16_C(-15756), INT16_C( -6662), INT16_C( -2063), INT16_C(-26176), INT16_C( 5292), INT16_C( -8044), INT16_C( -3850), INT16_C( 30720)), simde_mm_set_epi16(INT16_C(-15756), INT16_C(-18181), INT16_C( -5866), INT16_C(-26176), INT16_C( 3198), INT16_C( -8044), INT16_C(-16542), INT16_C( 9508)) }, { simde_mm_set_epi16(INT16_C(-17306), INT16_C(-30330), INT16_C(-11007), INT16_C(-29530), INT16_C(-28757), INT16_C( -1959), INT16_C(-32687), INT16_C( -7544)), simde_mm_set_epi16(INT16_C( -2350), INT16_C( 21355), INT16_C( 11519), INT16_C( 25835), INT16_C( 25577), INT16_C(-14395), INT16_C( 415), INT16_C( 15647)), simde_mm_set_epi16(INT16_C(-17306), INT16_C(-30330), INT16_C(-11007), INT16_C(-29530), INT16_C(-28757), INT16_C(-14395), INT16_C(-32687), INT16_C( -7544)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_min_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_u16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_min_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[10] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -927.67), SIMDE_FLOAT64_C( -514.32)), simde_mm_set_pd(SIMDE_FLOAT64_C( 342.71), SIMDE_FLOAT64_C( 927.58)), simde_mm_set_pd(SIMDE_FLOAT64_C( -927.67), SIMDE_FLOAT64_C( -514.32)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -704.64), SIMDE_FLOAT64_C( 925.40)), simde_mm_set_pd(SIMDE_FLOAT64_C( -589.60), SIMDE_FLOAT64_C( -498.63)), simde_mm_set_pd(SIMDE_FLOAT64_C( -704.64), SIMDE_FLOAT64_C( -498.63)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -244.39), SIMDE_FLOAT64_C( 572.76)), simde_mm_set_pd(SIMDE_FLOAT64_C( -10.04), SIMDE_FLOAT64_C( 293.99)), simde_mm_set_pd(SIMDE_FLOAT64_C( -244.39), SIMDE_FLOAT64_C( 293.99)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 321.70), SIMDE_FLOAT64_C( -283.39)), simde_mm_set_pd(SIMDE_FLOAT64_C( 60.35), SIMDE_FLOAT64_C( -248.75)), simde_mm_set_pd(SIMDE_FLOAT64_C( 60.35), SIMDE_FLOAT64_C( -283.39)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -67.84), SIMDE_FLOAT64_C( 763.91)), simde_mm_set_pd(SIMDE_FLOAT64_C( 150.47), SIMDE_FLOAT64_C( -773.85)), simde_mm_set_pd(SIMDE_FLOAT64_C( -67.84), SIMDE_FLOAT64_C( -773.85)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -42.86), SIMDE_FLOAT64_C( 169.28)), simde_mm_set_pd(SIMDE_FLOAT64_C( -820.89), SIMDE_FLOAT64_C( 325.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( -820.89), SIMDE_FLOAT64_C( 169.28)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -976.90), SIMDE_FLOAT64_C( 883.11)), simde_mm_set_pd(SIMDE_FLOAT64_C( -450.39), SIMDE_FLOAT64_C( -249.21)), simde_mm_set_pd(SIMDE_FLOAT64_C( -976.90), SIMDE_FLOAT64_C( -249.21)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 495.76), SIMDE_FLOAT64_C( 415.93)), simde_mm_set_pd(SIMDE_FLOAT64_C( -979.87), SIMDE_FLOAT64_C( -567.72)), simde_mm_set_pd(SIMDE_FLOAT64_C( -979.87), SIMDE_FLOAT64_C( -567.72)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -567.72)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -567.72)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -567.72)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 495.76), SIMDE_FLOAT64_C( 0.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( -979.87), SIMDE_FLOAT64_C( 0.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( -979.87), SIMDE_FLOAT64_C( 0.00)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_min_pd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_min_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -927.67), SIMDE_FLOAT64_C( -514.32)), simde_mm_set_pd(SIMDE_FLOAT64_C( 342.71), SIMDE_FLOAT64_C( 927.58)), simde_mm_set_pd(SIMDE_FLOAT64_C( -927.67), SIMDE_FLOAT64_C( -514.32)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -704.64), SIMDE_FLOAT64_C( 925.40)), simde_mm_set_pd(SIMDE_FLOAT64_C( -589.60), SIMDE_FLOAT64_C( -498.63)), simde_mm_set_pd(SIMDE_FLOAT64_C( -704.64), SIMDE_FLOAT64_C( -498.63)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -244.39), SIMDE_FLOAT64_C( 572.76)), simde_mm_set_pd(SIMDE_FLOAT64_C( -10.04), SIMDE_FLOAT64_C( 293.99)), simde_mm_set_pd(SIMDE_FLOAT64_C( -244.39), SIMDE_FLOAT64_C( 293.99)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 321.70), SIMDE_FLOAT64_C( -283.39)), simde_mm_set_pd(SIMDE_FLOAT64_C( 60.35), SIMDE_FLOAT64_C( -248.75)), simde_mm_set_pd(SIMDE_FLOAT64_C( 321.70), SIMDE_FLOAT64_C( -283.39)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -67.84), SIMDE_FLOAT64_C( 763.91)), simde_mm_set_pd(SIMDE_FLOAT64_C( 150.47), SIMDE_FLOAT64_C( -773.85)), simde_mm_set_pd(SIMDE_FLOAT64_C( -67.84), SIMDE_FLOAT64_C( -773.85)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -42.86), SIMDE_FLOAT64_C( 169.28)), simde_mm_set_pd(SIMDE_FLOAT64_C( -820.89), SIMDE_FLOAT64_C( 325.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( -42.86), SIMDE_FLOAT64_C( 169.28)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -976.90), SIMDE_FLOAT64_C( 883.11)), simde_mm_set_pd(SIMDE_FLOAT64_C( -450.39), SIMDE_FLOAT64_C( -249.21)), simde_mm_set_pd(SIMDE_FLOAT64_C( -976.90), SIMDE_FLOAT64_C( -249.21)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 495.76), SIMDE_FLOAT64_C( 415.93)), simde_mm_set_pd(SIMDE_FLOAT64_C( -979.87), SIMDE_FLOAT64_C( -567.72)), simde_mm_set_pd(SIMDE_FLOAT64_C( 495.76), SIMDE_FLOAT64_C( -567.72)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_min_sd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_max_epu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu8(168, 216, 116, 83, 89, 36, 70, 43, 213, 80, 178, 134, 66, 113, 243, 129), simde_x_mm_set_epu8(121, 26, 77, 212, 58, 167, 83, 180, 236, 50, 65, 112, 248, 228, 87, 231), simde_x_mm_set_epu8(168, 216, 116, 212, 89, 167, 83, 180, 236, 80, 178, 134, 248, 228, 243, 231) }, { simde_x_mm_set_epu8(157, 143, 244, 146, 49, 140, 145, 221, 80, 79, 154, 71, 49, 213, 105, 22), simde_x_mm_set_epu8( 81, 58, 115, 104, 26, 180, 63, 33, 16, 205, 98, 228, 235, 156, 147, 109), simde_x_mm_set_epu8(157, 143, 244, 146, 49, 180, 145, 221, 80, 205, 154, 228, 235, 213, 147, 109) }, { simde_x_mm_set_epu8(120, 153, 102, 244, 149, 171, 101, 141, 231, 205, 156, 11, 214, 255, 28, 215), simde_x_mm_set_epu8( 89, 227, 119, 48, 219, 88, 0, 68, 146, 196, 199, 34, 143, 246, 184, 31), simde_x_mm_set_epu8(120, 227, 119, 244, 219, 171, 101, 141, 231, 205, 199, 34, 214, 255, 184, 215) }, { simde_x_mm_set_epu8(201, 221, 39, 38, 119, 106, 89, 236, 8, 81, 136, 17, 62, 33, 200, 24), simde_x_mm_set_epu8( 76, 153, 167, 42, 171, 206, 46, 181, 37, 117, 72, 251, 153, 91, 107, 96), simde_x_mm_set_epu8(201, 221, 167, 42, 171, 206, 89, 236, 37, 117, 136, 251, 153, 91, 200, 96) }, { simde_x_mm_set_epu8( 34, 94, 125, 66, 238, 110, 110, 27, 90, 179, 184, 250, 202, 62, 132, 68), simde_x_mm_set_epu8(135, 208, 31, 76, 51, 5, 50, 220, 43, 120, 10, 131, 247, 241, 134, 232), simde_x_mm_set_epu8(135, 208, 125, 76, 238, 110, 110, 220, 90, 179, 184, 250, 247, 241, 134, 232) }, { simde_x_mm_set_epu8( 12, 112, 35, 12, 111, 1, 16, 229, 119, 199, 69, 96, 220, 123, 153, 230), simde_x_mm_set_epu8(147, 155, 56, 136, 236, 16, 93, 16, 43, 253, 136, 239, 147, 44, 146, 0), simde_x_mm_set_epu8(147, 155, 56, 136, 236, 16, 93, 229, 119, 253, 136, 239, 220, 123, 153, 230) }, { simde_x_mm_set_epu8(138, 177, 86, 183, 144, 112, 42, 67, 100, 123, 214, 234, 34, 240, 19, 10), simde_x_mm_set_epu8( 81, 53, 255, 195, 169, 127, 131, 109, 181, 161, 246, 113, 87, 20, 157, 194), simde_x_mm_set_epu8(138, 177, 255, 195, 169, 127, 131, 109, 181, 161, 246, 234, 87, 240, 157, 194) }, { simde_x_mm_set_epu8( 5, 26, 135, 12, 151, 226, 187, 12, 213, 244, 209, 245, 136, 13, 164, 249), simde_x_mm_set_epu8( 92, 57, 200, 208, 212, 214, 211, 217, 65, 228, 179, 64, 34, 236, 220, 208), simde_x_mm_set_epu8( 92, 57, 200, 208, 212, 226, 211, 217, 213, 244, 209, 245, 136, 236, 220, 249) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_max_epu8(test_vec[i].a, test_vec[i].b); simde_assert_m128i_u8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_max_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C(-22312), INT16_C( 29779), INT16_C( 22820), INT16_C( 17963), INT16_C(-10928), INT16_C(-19834), INT16_C( 17009), INT16_C( -3199)), simde_mm_set_epi16(INT16_C( 31002), INT16_C( 19924), INT16_C( 15015), INT16_C( 21428), INT16_C( -5070), INT16_C( 16752), INT16_C( -1820), INT16_C( 22503)), simde_mm_set_epi16(INT16_C( 31002), INT16_C( 29779), INT16_C( 22820), INT16_C( 21428), INT16_C( -5070), INT16_C( 16752), INT16_C( 17009), INT16_C( 22503)) }, { simde_mm_set_epi16(INT16_C(-25201), INT16_C( -2926), INT16_C( 12684), INT16_C(-28195), INT16_C( 20559), INT16_C(-26041), INT16_C( 12757), INT16_C( 26902)), simde_mm_set_epi16(INT16_C( 20794), INT16_C( 29544), INT16_C( 6836), INT16_C( 16161), INT16_C( 4301), INT16_C( 25316), INT16_C( -5220), INT16_C(-27795)), simde_mm_set_epi16(INT16_C( 20794), INT16_C( 29544), INT16_C( 12684), INT16_C( 16161), INT16_C( 20559), INT16_C( 25316), INT16_C( 12757), INT16_C( 26902)) }, { simde_mm_set_epi16(INT16_C( 30873), INT16_C( 26356), INT16_C(-27221), INT16_C( 25997), INT16_C( -6195), INT16_C(-25589), INT16_C(-10497), INT16_C( 7383)), simde_mm_set_epi16(INT16_C( 23011), INT16_C( 30512), INT16_C( -9384), INT16_C( 68), INT16_C(-27964), INT16_C(-14558), INT16_C(-28682), INT16_C(-18401)), simde_mm_set_epi16(INT16_C( 30873), INT16_C( 30512), INT16_C( -9384), INT16_C( 25997), INT16_C( -6195), INT16_C(-14558), INT16_C(-10497), INT16_C( 7383)) }, { simde_mm_set_epi16(INT16_C(-13859), INT16_C( 10022), INT16_C( 30570), INT16_C( 23020), INT16_C( 2129), INT16_C(-30703), INT16_C( 15905), INT16_C(-14312)), simde_mm_set_epi16(INT16_C( 19609), INT16_C(-22742), INT16_C(-21554), INT16_C( 11957), INT16_C( 9589), INT16_C( 18683), INT16_C(-26277), INT16_C( 27488)), simde_mm_set_epi16(INT16_C( 19609), INT16_C( 10022), INT16_C( 30570), INT16_C( 23020), INT16_C( 9589), INT16_C( 18683), INT16_C( 15905), INT16_C( 27488)) }, { simde_mm_set_epi16(INT16_C( 8798), INT16_C( 32066), INT16_C( -4498), INT16_C( 28187), INT16_C( 23219), INT16_C(-18182), INT16_C(-13762), INT16_C(-31676)), simde_mm_set_epi16(INT16_C(-30768), INT16_C( 8012), INT16_C( 13061), INT16_C( 13020), INT16_C( 11128), INT16_C( 2691), INT16_C( -2063), INT16_C(-31000)), simde_mm_set_epi16(INT16_C( 8798), INT16_C( 32066), INT16_C( 13061), INT16_C( 28187), INT16_C( 23219), INT16_C( 2691), INT16_C( -2063), INT16_C(-31000)) }, { simde_mm_set_epi16(INT16_C( 3184), INT16_C( 8972), INT16_C( 28417), INT16_C( 4325), INT16_C( 30663), INT16_C( 17760), INT16_C( -9093), INT16_C(-26138)), simde_mm_set_epi16(INT16_C(-27749), INT16_C( 14472), INT16_C( -5104), INT16_C( 23824), INT16_C( 11261), INT16_C(-30481), INT16_C(-27860), INT16_C(-28160)), simde_mm_set_epi16(INT16_C( 3184), INT16_C( 14472), INT16_C( 28417), INT16_C( 23824), INT16_C( 30663), INT16_C( 17760), INT16_C( -9093), INT16_C(-26138)) }, { simde_mm_set_epi16(INT16_C(-30031), INT16_C( 22199), INT16_C(-28560), INT16_C( 10819), INT16_C( 25723), INT16_C(-10518), INT16_C( 8944), INT16_C( 4874)), simde_mm_set_epi16(INT16_C( 20789), INT16_C( -61), INT16_C(-22145), INT16_C(-31891), INT16_C(-19039), INT16_C( -2447), INT16_C( 22292), INT16_C(-25150)), simde_mm_set_epi16(INT16_C( 20789), INT16_C( 22199), INT16_C(-22145), INT16_C( 10819), INT16_C( 25723), INT16_C( -2447), INT16_C( 22292), INT16_C( 4874)) }, { simde_mm_set_epi16(INT16_C( 1306), INT16_C(-30964), INT16_C(-26654), INT16_C(-17652), INT16_C(-10764), INT16_C(-11787), INT16_C(-30707), INT16_C(-23303)), simde_mm_set_epi16(INT16_C( 23609), INT16_C(-14128), INT16_C(-11050), INT16_C(-11303), INT16_C( 16868), INT16_C(-19648), INT16_C( 8940), INT16_C( -9008)), simde_mm_set_epi16(INT16_C( 23609), INT16_C(-14128), INT16_C(-11050), INT16_C(-11303), INT16_C( 16868), INT16_C(-11787), INT16_C( 8940), INT16_C( -9008)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_max_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_max_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -303.58), SIMDE_FLOAT64_C( -480.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( 319.11), SIMDE_FLOAT64_C( 666.53)), simde_mm_set_pd(SIMDE_FLOAT64_C( 319.11), SIMDE_FLOAT64_C( 666.53)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -541.77), SIMDE_FLOAT64_C( 944.47)), simde_mm_set_pd(SIMDE_FLOAT64_C( -53.88), SIMDE_FLOAT64_C( 845.28)), simde_mm_set_pd(SIMDE_FLOAT64_C( -53.88), SIMDE_FLOAT64_C( 944.47)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -612.90), SIMDE_FLOAT64_C( -610.67)), simde_mm_set_pd(SIMDE_FLOAT64_C( 230.96), SIMDE_FLOAT64_C( -372.57)), simde_mm_set_pd(SIMDE_FLOAT64_C( 230.96), SIMDE_FLOAT64_C( -372.57)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -791.37), SIMDE_FLOAT64_C( 840.72)), simde_mm_set_pd(SIMDE_FLOAT64_C( -365.40), SIMDE_FLOAT64_C( -868.73)), simde_mm_set_pd(SIMDE_FLOAT64_C( -365.40), SIMDE_FLOAT64_C( 840.72)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 169.29), SIMDE_FLOAT64_C( 679.66)), simde_mm_set_pd(SIMDE_FLOAT64_C( -57.82), SIMDE_FLOAT64_C( 810.96)), simde_mm_set_pd(SIMDE_FLOAT64_C( 169.29), SIMDE_FLOAT64_C( 810.96)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 713.62), SIMDE_FLOAT64_C( 124.72)), simde_mm_set_pd(SIMDE_FLOAT64_C( -297.75), SIMDE_FLOAT64_C( 146.63)), simde_mm_set_pd(SIMDE_FLOAT64_C( 713.62), SIMDE_FLOAT64_C( 146.63)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -67.07), SIMDE_FLOAT64_C( -514.59)), simde_mm_set_pd(SIMDE_FLOAT64_C( 577.06), SIMDE_FLOAT64_C( -935.01)), simde_mm_set_pd(SIMDE_FLOAT64_C( 577.06), SIMDE_FLOAT64_C( -514.59)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 342.23), SIMDE_FLOAT64_C( 198.10)), simde_mm_set_pd(SIMDE_FLOAT64_C( -401.56), SIMDE_FLOAT64_C( -707.36)), simde_mm_set_pd(SIMDE_FLOAT64_C( 342.23), SIMDE_FLOAT64_C( 198.10)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_max_pd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_max_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -303.58), SIMDE_FLOAT64_C( -480.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( 319.11), SIMDE_FLOAT64_C( 666.53)), simde_mm_set_pd(SIMDE_FLOAT64_C( -303.58), SIMDE_FLOAT64_C( 666.53)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -541.77), SIMDE_FLOAT64_C( 944.47)), simde_mm_set_pd(SIMDE_FLOAT64_C( -53.88), SIMDE_FLOAT64_C( 845.28)), simde_mm_set_pd(SIMDE_FLOAT64_C( -541.77), SIMDE_FLOAT64_C( 944.47)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -612.90), SIMDE_FLOAT64_C( -610.67)), simde_mm_set_pd(SIMDE_FLOAT64_C( 230.96), SIMDE_FLOAT64_C( -372.57)), simde_mm_set_pd(SIMDE_FLOAT64_C( -612.90), SIMDE_FLOAT64_C( -372.57)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -791.37), SIMDE_FLOAT64_C( 840.72)), simde_mm_set_pd(SIMDE_FLOAT64_C( -365.40), SIMDE_FLOAT64_C( -868.73)), simde_mm_set_pd(SIMDE_FLOAT64_C( -791.37), SIMDE_FLOAT64_C( 840.72)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 169.29), SIMDE_FLOAT64_C( 679.66)), simde_mm_set_pd(SIMDE_FLOAT64_C( -57.82), SIMDE_FLOAT64_C( 810.96)), simde_mm_set_pd(SIMDE_FLOAT64_C( 169.29), SIMDE_FLOAT64_C( 810.96)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 713.62), SIMDE_FLOAT64_C( 124.72)), simde_mm_set_pd(SIMDE_FLOAT64_C( -297.75), SIMDE_FLOAT64_C( 146.63)), simde_mm_set_pd(SIMDE_FLOAT64_C( 713.62), SIMDE_FLOAT64_C( 146.63)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -67.07), SIMDE_FLOAT64_C( -514.59)), simde_mm_set_pd(SIMDE_FLOAT64_C( 577.06), SIMDE_FLOAT64_C( -935.01)), simde_mm_set_pd(SIMDE_FLOAT64_C( -67.07), SIMDE_FLOAT64_C( -514.59)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 342.23), SIMDE_FLOAT64_C( 198.10)), simde_mm_set_pd(SIMDE_FLOAT64_C( -401.56), SIMDE_FLOAT64_C( -707.36)), simde_mm_set_pd(SIMDE_FLOAT64_C( 342.23), SIMDE_FLOAT64_C( 198.10)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_max_sd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_move_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi64x(INT64_C(-2982745844705455901), INT64_C( 4775804171231816037)), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 4775804171231816037)) }, { simde_mm_set_epi64x(INT64_C( 5762346410957661033), INT64_C( 2977172799723381810)), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 2977172799723381810)) }, { simde_mm_set_epi64x(INT64_C( 1008079402021318109), INT64_C( 2502061726771043310)), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 2502061726771043310)) }, { simde_mm_set_epi64x(INT64_C( 5339677830223010942), INT64_C( 8124798084034539527)), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 8124798084034539527)) }, { simde_mm_set_epi64x(INT64_C(-4521066662096167363), INT64_C( -947809468227977762)), simde_mm_set_epi64x(INT64_C( 0), INT64_C( -947809468227977762)) }, { simde_mm_set_epi64x(INT64_C(-1218747510360922612), INT64_C( 3122441631876631480)), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 3122441631876631480)) }, { simde_mm_set_epi64x(INT64_C(-3523922424397514946), INT64_C( -108841976580709576)), simde_mm_set_epi64x(INT64_C( 0), INT64_C( -108841976580709576)) }, { simde_mm_set_epi64x(INT64_C(-3961221708434347271), INT64_C(-1875395594913971276)), simde_mm_set_epi64x(INT64_C( 0), INT64_C(-1875395594913971276)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_move_epi64(test_vec[i].a); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_move_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 570.81), SIMDE_FLOAT64_C( -941.79)), simde_mm_set_pd(SIMDE_FLOAT64_C( -875.32), SIMDE_FLOAT64_C( -356.30)), simde_mm_set_pd(SIMDE_FLOAT64_C( 570.81), SIMDE_FLOAT64_C( -356.30)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -816.08), SIMDE_FLOAT64_C( 126.72)), simde_mm_set_pd(SIMDE_FLOAT64_C( -886.56), SIMDE_FLOAT64_C( 985.06)), simde_mm_set_pd(SIMDE_FLOAT64_C( -816.08), SIMDE_FLOAT64_C( 985.06)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 358.01), SIMDE_FLOAT64_C( 59.13)), simde_mm_set_pd(SIMDE_FLOAT64_C( -61.40), SIMDE_FLOAT64_C( -717.39)), simde_mm_set_pd(SIMDE_FLOAT64_C( 358.01), SIMDE_FLOAT64_C( -717.39)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 110.04), SIMDE_FLOAT64_C( -44.09)), simde_mm_set_pd(SIMDE_FLOAT64_C( -614.96), SIMDE_FLOAT64_C( -267.93)), simde_mm_set_pd(SIMDE_FLOAT64_C( 110.04), SIMDE_FLOAT64_C( -267.93)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 571.79), SIMDE_FLOAT64_C( -816.23)), simde_mm_set_pd(SIMDE_FLOAT64_C( 917.45), SIMDE_FLOAT64_C( 287.41)), simde_mm_set_pd(SIMDE_FLOAT64_C( 571.79), SIMDE_FLOAT64_C( 287.41)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 827.50), SIMDE_FLOAT64_C( 261.09)), simde_mm_set_pd(SIMDE_FLOAT64_C( 478.77), SIMDE_FLOAT64_C( 33.99)), simde_mm_set_pd(SIMDE_FLOAT64_C( 827.50), SIMDE_FLOAT64_C( 33.99)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -335.82), SIMDE_FLOAT64_C( 465.36)), simde_mm_set_pd(SIMDE_FLOAT64_C( -993.24), SIMDE_FLOAT64_C( 100.89)), simde_mm_set_pd(SIMDE_FLOAT64_C( -335.82), SIMDE_FLOAT64_C( 100.89)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 415.58), SIMDE_FLOAT64_C( -984.83)), simde_mm_set_pd(SIMDE_FLOAT64_C( 764.57), SIMDE_FLOAT64_C( 672.72)), simde_mm_set_pd(SIMDE_FLOAT64_C( 415.58), SIMDE_FLOAT64_C( 672.72)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_move_sd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_movemask_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; int32_t r; } test_vec[] = { { simde_mm_set_epi8(INT8_C(-125), INT8_C( -40), INT8_C( -7), INT8_C( -71), INT8_C( -75), INT8_C( 99), INT8_C( 101), INT8_C( -5), INT8_C( -71), INT8_C( -91), INT8_C( -60), INT8_C( 9), INT8_C( -27), INT8_C( -81), INT8_C( 5), INT8_C( 97)), INT32_C(63980) }, { simde_mm_set_epi8(INT8_C( 44), INT8_C( -98), INT8_C( 82), INT8_C(-127), INT8_C( -28), INT8_C( 122), INT8_C( -22), INT8_C( 46), INT8_C( -41), INT8_C( -35), INT8_C( 15), INT8_C( 43), INT8_C( -37), INT8_C( -12), INT8_C( 17), INT8_C( -17)), INT32_C(23245) }, { simde_mm_set_epi8(INT8_C( -53), INT8_C( -99), INT8_C( 91), INT8_C( -56), INT8_C( 10), INT8_C( 114), INT8_C(-120), INT8_C( 67), INT8_C( -82), INT8_C( 13), INT8_C( 104), INT8_C( 1), INT8_C( 15), INT8_C(-115), INT8_C( 16), INT8_C( 33)), INT32_C(53892) }, { simde_mm_set_epi8(INT8_C( 109), INT8_C( -5), INT8_C( -45), INT8_C( 60), INT8_C( -20), INT8_C( -7), INT8_C( -24), INT8_C( 63), INT8_C( 61), INT8_C( -94), INT8_C(-110), INT8_C( 16), INT8_C( 117), INT8_C( -23), INT8_C( -49), INT8_C( -74)), INT32_C(28263) }, { simde_mm_set_epi8(INT8_C( -88), INT8_C( 110), INT8_C(-108), INT8_C( -88), INT8_C( 28), INT8_C( 110), INT8_C( 0), INT8_C( -12), INT8_C( -90), INT8_C( 44), INT8_C( -42), INT8_C( -87), INT8_C( -48), INT8_C( -87), INT8_C( -21), INT8_C( -64)), INT32_C(45503) }, { simde_mm_set_epi8(INT8_C( 121), INT8_C(-111), INT8_C( -1), INT8_C( -61), INT8_C( 67), INT8_C( 90), INT8_C( 10), INT8_C( 65), INT8_C( 36), INT8_C( -60), INT8_C( 93), INT8_C( -3), INT8_C(-112), INT8_C( -8), INT8_C( 55), INT8_C( -49)), INT32_C(28765) }, { simde_mm_set_epi8(INT8_C( 92), INT8_C( -27), INT8_C( 37), INT8_C( -87), INT8_C( 58), INT8_C( 108), INT8_C( -50), INT8_C( -10), INT8_C( 5), INT8_C( 21), INT8_C( 14), INT8_C( 72), INT8_C( -76), INT8_C( 21), INT8_C(-104), INT8_C( 110)), INT32_C(21258) }, { simde_mm_set_epi8(INT8_C( -60), INT8_C( 23), INT8_C( -54), INT8_C( 54), INT8_C( 31), INT8_C( 13), INT8_C( -93), INT8_C( 18), INT8_C( -62), INT8_C(-128), INT8_C( 70), INT8_C( 59), INT8_C( 17), INT8_C( 49), INT8_C( 95), INT8_C( -96)), INT32_C(41665) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int32_t r = simde_mm_movemask_epi8(test_vec[i].a); simde_assert_equal_i32(r, test_vec[i].r); } return 0; } static int test_simde_mm_movemask_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; int32_t r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -532.45), SIMDE_FLOAT64_C( 863.01)), INT32_C(2) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -749.24), SIMDE_FLOAT64_C( -869.97)), INT32_C(3) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 994.23), SIMDE_FLOAT64_C( 351.47)), INT32_C(0) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 413.53), SIMDE_FLOAT64_C( -655.32)), INT32_C(1) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -253.74), SIMDE_FLOAT64_C( -2.37)), INT32_C(3) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 696.22), SIMDE_FLOAT64_C( -699.75)), INT32_C(1) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 55.24), SIMDE_FLOAT64_C( -722.45)), INT32_C(1) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -592.60), SIMDE_FLOAT64_C( 141.10)), INT32_C(2) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int32_t r = simde_mm_movemask_pd(test_vec[i].a); simde_assert_equal_i32(r, test_vec[i].r); } return 0; } static int test_simde_mm_movepi64_pi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m64 r; } test_vec[8] = { { simde_mm_set_epi64x(INT64_C(-3369091548753669372), INT64_C(-3862634862308997761)), simde_x_mm_set_pi64(INT64_C(-3862634862308997761)) }, { simde_mm_set_epi64x(INT64_C( 1195923961730132400), INT64_C(-4203048506958717476)), simde_x_mm_set_pi64(INT64_C(-4203048506958717476)) }, { simde_mm_set_epi64x(INT64_C( 4316262850566382732), INT64_C( 743544812785944809)), simde_x_mm_set_pi64(INT64_C(743544812785944809)) }, { simde_mm_set_epi64x(INT64_C( 9153964415619232912), INT64_C( 7102186508934354546)), simde_x_mm_set_pi64(INT64_C(7102186508934354546)) }, { simde_mm_set_epi64x(INT64_C( -149536427124813706), INT64_C(-2645616526676309339)), simde_x_mm_set_pi64(INT64_C(-2645616526676309339)) }, { simde_mm_set_epi64x(INT64_C( 7660292028637459230), INT64_C(-4472173852492382560)), simde_x_mm_set_pi64(INT64_C(-4472173852492382560)) }, { simde_mm_set_epi64x(INT64_C( 2373412759770157312), INT64_C( -249935199655019513)), simde_x_mm_set_pi64(INT64_C(-249935199655019513)) }, { simde_mm_set_epi64x(INT64_C(-2495482311671930573), INT64_C( 7782795372632782061)), simde_x_mm_set_pi64(INT64_C(7782795372632782061)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_movepi64_pi64(test_vec[i].a); simde_assert_m64_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_movpi64_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_pi64(INT64_C(8307669974137432024)), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 8307669974137432024)) }, { simde_x_mm_set_pi64(INT64_C(-6174863101947913477)), simde_mm_set_epi64x(INT64_C( 0), INT64_C(-6174863101947913477)) }, { simde_x_mm_set_pi64(INT64_C(-3709498539865079997)), simde_mm_set_epi64x(INT64_C( 0), INT64_C(-3709498539865079997)) }, { simde_x_mm_set_pi64(INT64_C(-5655514474221449119)), simde_mm_set_epi64x(INT64_C( 0), INT64_C(-5655514474221449119)) }, { simde_x_mm_set_pi64(INT64_C(-4407711847161442183)), simde_mm_set_epi64x(INT64_C( 0), INT64_C(-4407711847161442183)) }, { simde_x_mm_set_pi64(INT64_C(-7730135383563833284)), simde_mm_set_epi64x(INT64_C( 0), INT64_C(-7730135383563833284)) }, { simde_x_mm_set_pi64(INT64_C(1417829150564629578)), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 1417829150564629578)) }, { simde_x_mm_set_pi64(INT64_C(5667864625160412978)), simde_mm_set_epi64x(INT64_C( 0), INT64_C( 5667864625160412978)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_movpi64_epi64(test_vec[i].a); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_mul_epu32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu32 (UINT32_C(1251634950), UINT32_C( 3010014325), UINT32_C( 3844926313), UINT32_C( 887004237)), simde_x_mm_set_epu32 (UINT32_C( 771412494), UINT32_C( 328239887), UINT32_C( 3360452571), UINT32_C( 298292680)), simde_x_mm_set_epu64x(UINT64_C( 988006761906381275), UINT64_C( 264586871026085160)) }, { simde_x_mm_set_epu32 (UINT32_C(4076207020), UINT32_C( 368393187), UINT32_C( 3498508084), UINT32_C( 981141316)), simde_x_mm_set_epu32 (UINT32_C( 81654802), UINT32_C( 140153335), UINT32_C( 2110173535), UINT32_C( 3134478151)), simde_x_mm_set_epu64x(UINT64_C( 51631533749328645), UINT64_C( 3075366018045386716)) }, { simde_x_mm_set_epu32 (UINT32_C(4261415154), UINT32_C( 846454649), UINT32_C( 108194122), UINT32_C( 4167432393)), simde_x_mm_set_epu32 (UINT32_C(2698880481), UINT32_C( 1287129030), UINT32_C( 2616406220), UINT32_C( 1248265871)), simde_x_mm_set_epu64x(UINT64_C( 1089496351306360470), UINT64_C( 5202063625881759303)) }, { simde_x_mm_set_epu32 (UINT32_C( 911193301), UINT32_C( 1110766386), UINT32_C( 3009613617), UINT32_C( 1645784878)), simde_x_mm_set_epu32 (UINT32_C(3094480659), UINT32_C( 3697181600), UINT32_C( 4236850839), UINT32_C( 2133678416)), simde_x_mm_set_epu64x(UINT64_C( 4106705044217697600), UINT64_C( 3511575671567793248)) }, { simde_x_mm_set_epu32 (UINT32_C(2154112155), UINT32_C( 2960710803), UINT32_C( 2851801912), UINT32_C( 678710951)), simde_x_mm_set_epu32 (UINT32_C(4001207654), UINT32_C( 4056994829), UINT32_C( 1341523746), UINT32_C( 568161818)), simde_x_mm_set_epu64x(UINT64_C(12011588417935437687), UINT64_C( 385617647816668918)) }, { simde_x_mm_set_epu32 (UINT32_C(3273494172), UINT32_C( 3612698350), UINT32_C( 4103906203), UINT32_C( 1678207566)), simde_x_mm_set_epu32 (UINT32_C( 48120942), UINT32_C( 160747207), UINT32_C( 2820564214), UINT32_C( 1404181744)), simde_x_mm_set_epu64x(UINT64_C( 580731169496008450), UINT64_C( 2356508426819875104)) }, { simde_x_mm_set_epu32 (UINT32_C( 396392525), UINT32_C( 2486526122), UINT32_C( 1177281917), UINT32_C( 3038155803)), simde_x_mm_set_epu32 (UINT32_C( 760783698), UINT32_C( 1253190575), UINT32_C( 4064848310), UINT32_C( 1630883223)), simde_x_mm_set_epu64x(UINT64_C( 3116091100581700150), UINT64_C( 4954877327972793069)) }, { simde_x_mm_set_epu32 (UINT32_C(1438827395), UINT32_C( 1294325524), UINT32_C( 3245229436), UINT32_C( 122146781)), simde_x_mm_set_epu32 (UINT32_C(1030238038), UINT32_C( 4273209339), UINT32_C( 197838277), UINT32_C( 1151380764)), simde_x_mm_set_epu64x(UINT64_C( 5530923916862868636), UINT64_C( 140637454027920684)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_mul_epu32(test_vec[i].a, test_vec[i].b); simde_assert_m128i_u64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_mul_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 602.19), SIMDE_FLOAT64_C( -103.53)), simde_mm_set_pd(SIMDE_FLOAT64_C( -952.79), SIMDE_FLOAT64_C( -150.84)), simde_mm_set_pd(SIMDE_FLOAT64_C(-573760.61), SIMDE_FLOAT64_C( 15616.47)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -140.84), SIMDE_FLOAT64_C( -241.95)), simde_mm_set_pd(SIMDE_FLOAT64_C( 540.86), SIMDE_FLOAT64_C( -754.39)), simde_mm_set_pd(SIMDE_FLOAT64_C( -76174.72), SIMDE_FLOAT64_C( 182524.66)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -648.21), SIMDE_FLOAT64_C( -612.11)), simde_mm_set_pd(SIMDE_FLOAT64_C( -327.08), SIMDE_FLOAT64_C( -865.34)), simde_mm_set_pd(SIMDE_FLOAT64_C( 212016.53), SIMDE_FLOAT64_C( 529683.27)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 518.61), SIMDE_FLOAT64_C( -573.43)), simde_mm_set_pd(SIMDE_FLOAT64_C( -650.79), SIMDE_FLOAT64_C( 196.03)), simde_mm_set_pd(SIMDE_FLOAT64_C(-337506.20), SIMDE_FLOAT64_C(-112409.48)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -750.40), SIMDE_FLOAT64_C( 324.63)), simde_mm_set_pd(SIMDE_FLOAT64_C( 343.74), SIMDE_FLOAT64_C( -4.14)), simde_mm_set_pd(SIMDE_FLOAT64_C(-257942.50), SIMDE_FLOAT64_C( -1343.97)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -48.73), SIMDE_FLOAT64_C( 769.19)), simde_mm_set_pd(SIMDE_FLOAT64_C( 268.16), SIMDE_FLOAT64_C( -953.46)), simde_mm_set_pd(SIMDE_FLOAT64_C( -13067.44), SIMDE_FLOAT64_C(-733391.90)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 188.02), SIMDE_FLOAT64_C( 614.87)), simde_mm_set_pd(SIMDE_FLOAT64_C( 396.91), SIMDE_FLOAT64_C( -399.68)), simde_mm_set_pd(SIMDE_FLOAT64_C( 74627.02), SIMDE_FLOAT64_C(-245751.24)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 813.52), SIMDE_FLOAT64_C( 480.96)), simde_mm_set_pd(SIMDE_FLOAT64_C( 664.31), SIMDE_FLOAT64_C( 447.07)), simde_mm_set_pd(SIMDE_FLOAT64_C( 540429.47), SIMDE_FLOAT64_C( 215022.79)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_mul_pd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_mul_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 815.66), SIMDE_FLOAT64_C( 839.23)), simde_mm_set_pd(SIMDE_FLOAT64_C( 748.66), SIMDE_FLOAT64_C( -52.12)), simde_mm_set_pd(SIMDE_FLOAT64_C( 815.66), SIMDE_FLOAT64_C( -43740.67)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -714.36), SIMDE_FLOAT64_C( -808.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( 401.75), SIMDE_FLOAT64_C( 319.13)), simde_mm_set_pd(SIMDE_FLOAT64_C( -714.36), SIMDE_FLOAT64_C(-257857.04)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 453.36), SIMDE_FLOAT64_C( -764.44)), simde_mm_set_pd(SIMDE_FLOAT64_C( -934.41), SIMDE_FLOAT64_C( -454.88)), simde_mm_set_pd(SIMDE_FLOAT64_C( 453.36), SIMDE_FLOAT64_C( 347728.47)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 489.60), SIMDE_FLOAT64_C( 724.31)), simde_mm_set_pd(SIMDE_FLOAT64_C( -101.25), SIMDE_FLOAT64_C( 196.93)), simde_mm_set_pd(SIMDE_FLOAT64_C( 489.60), SIMDE_FLOAT64_C( 142638.37)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 187.03), SIMDE_FLOAT64_C( 665.07)), simde_mm_set_pd(SIMDE_FLOAT64_C( 487.15), SIMDE_FLOAT64_C( 851.16)), simde_mm_set_pd(SIMDE_FLOAT64_C( 187.03), SIMDE_FLOAT64_C( 566080.98)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 589.48), SIMDE_FLOAT64_C( 648.27)), simde_mm_set_pd(SIMDE_FLOAT64_C( -683.48), SIMDE_FLOAT64_C( -59.67)), simde_mm_set_pd(SIMDE_FLOAT64_C( 589.48), SIMDE_FLOAT64_C( -38682.27)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 838.61), SIMDE_FLOAT64_C( 822.18)), simde_mm_set_pd(SIMDE_FLOAT64_C( -364.43), SIMDE_FLOAT64_C( 962.26)), simde_mm_set_pd(SIMDE_FLOAT64_C( 838.61), SIMDE_FLOAT64_C( 791150.93)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 530.83), SIMDE_FLOAT64_C( 379.76)), simde_mm_set_pd(SIMDE_FLOAT64_C( 27.92), SIMDE_FLOAT64_C( -56.09)), simde_mm_set_pd(SIMDE_FLOAT64_C( 530.83), SIMDE_FLOAT64_C( -21300.74)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_mul_sd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_mul_su32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_x_mm_set_pu32(UINT32_C(3055040779), UINT32_C( 899100968)), simde_x_mm_set_pu32(UINT32_C(1940650668), UINT32_C(3777451497)), simde_mm_cvtsi64_m64(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 3396310297525749096))) }, { simde_x_mm_set_pu32(UINT32_C(2705843438), UINT32_C(2434885276)), simde_x_mm_set_pu32(UINT32_C(3024316392), UINT32_C(3861898348)), simde_mm_cvtsi64_m64(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 9403279424953924048))) }, { simde_x_mm_set_pu32(UINT32_C(3766308026), UINT32_C(1712773120)), simde_x_mm_set_pu32(UINT32_C( 817218479), UINT32_C(3651399110)), simde_mm_cvtsi64_m64(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 6254018245999923200))) }, { simde_x_mm_set_pu32(UINT32_C( 434012470), UINT32_C(1054365092)), simde_x_mm_set_pu32(UINT32_C(2682784668), UINT32_C(2536059630)), simde_mm_cvtsi64_m64(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 2673932745102435960))) }, { simde_x_mm_set_pu32(UINT32_C(3086788421), UINT32_C( 996821946)), simde_x_mm_set_pu32(UINT32_C(3201780597), UINT32_C(3958985305)), simde_mm_cvtsi64_m64(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 3946403435915503530))) }, { simde_x_mm_set_pu32(UINT32_C(3277786031), UINT32_C(4257890741)), simde_x_mm_set_pu32(UINT32_C(1195509971), UINT32_C(2579552899)), simde_mm_cvtsi64_m64(HEDLEY_STATIC_CAST(int64_t, UINT64_C(10983454404571808159))) }, { simde_x_mm_set_pu32(UINT32_C(3106450314), UINT32_C(1125697671)), simde_x_mm_set_pu32(UINT32_C(2878635182), UINT32_C(3892244414)), simde_mm_cvtsi64_m64(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 4381490471802559794))) }, { simde_x_mm_set_pu32(UINT32_C(2670515723), UINT32_C(3917703761)), simde_x_mm_set_pu32(UINT32_C(3656211314), UINT32_C(2327792170)), simde_mm_cvtsi64_m64(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 9119600139235351370))) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_mul_su32(test_vec[i].a, test_vec[i].b); simde_assert_m64_u64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_mulhi_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C(-28198), INT16_C(-30713), INT16_C( 20992), INT16_C(-15285), INT16_C( 16558), INT16_C(-12771), INT16_C(-10872), INT16_C(-32584)), simde_mm_set_epi16(INT16_C( -2396), INT16_C(-16729), INT16_C( 31162), INT16_C(-10205), INT16_C( 24928), INT16_C( 5223), INT16_C( 7262), INT16_C( 25352)), simde_mm_set_epi16(INT16_C( 1030), INT16_C( 7839), INT16_C( 9981), INT16_C( 2380), INT16_C( 6298), INT16_C( -1018), INT16_C( -1205), INT16_C(-12605)) }, { simde_mm_set_epi16(INT16_C(-29475), INT16_C( -4667), INT16_C( 18782), INT16_C(-15431), INT16_C(-27740), INT16_C( 28051), INT16_C( 4978), INT16_C( 1222)), simde_mm_set_epi16(INT16_C(-10541), INT16_C(-14468), INT16_C( 18685), INT16_C( 12375), INT16_C( -5884), INT16_C(-11112), INT16_C( 23337), INT16_C( 12576)), simde_mm_set_epi16(INT16_C( 4740), INT16_C( 1030), INT16_C( 5354), INT16_C( -2914), INT16_C( 2490), INT16_C( -4757), INT16_C( 1772), INT16_C( 234)) }, { simde_mm_set_epi16(INT16_C( 27783), INT16_C( 6960), INT16_C( 17513), INT16_C( -7755), INT16_C( 14695), INT16_C( 12404), INT16_C( -4129), INT16_C(-25366)), simde_mm_set_epi16(INT16_C( 29475), INT16_C( 25763), INT16_C( 29366), INT16_C( 12820), INT16_C( -5355), INT16_C( 7751), INT16_C(-24426), INT16_C( -6617)), simde_mm_set_epi16(INT16_C( 12495), INT16_C( 2736), INT16_C( 7847), INT16_C( -1518), INT16_C( -1201), INT16_C( 1467), INT16_C( 1538), INT16_C( 2561)) }, { simde_mm_set_epi16(INT16_C( 8852), INT16_C( 11654), INT16_C( 12030), INT16_C( 21843), INT16_C( 27012), INT16_C( 24122), INT16_C( -4121), INT16_C( 19864)), simde_mm_set_epi16(INT16_C(-24799), INT16_C(-30738), INT16_C( 19688), INT16_C(-21919), INT16_C( 23874), INT16_C( -4632), INT16_C(-21648), INT16_C(-28317)), simde_mm_set_epi16(INT16_C( -3350), INT16_C( -5467), INT16_C( 3613), INT16_C( -7306), INT16_C( 9840), INT16_C( -1705), INT16_C( 1361), INT16_C( -8583)) }, { simde_mm_set_epi16(INT16_C( 2959), INT16_C(-18532), INT16_C( 4909), INT16_C( 17932), INT16_C( 9150), INT16_C( 13660), INT16_C(-28547), INT16_C( 5006)), simde_mm_set_epi16(INT16_C( 16706), INT16_C(-30015), INT16_C(-32638), INT16_C( 13608), INT16_C( -7846), INT16_C( 14914), INT16_C(-15409), INT16_C(-27711)), simde_mm_set_epi16(INT16_C( 754), INT16_C( 8487), INT16_C( -2445), INT16_C( 3723), INT16_C( -1096), INT16_C( 3108), INT16_C( 6712), INT16_C( -2117)) }, { simde_mm_set_epi16(INT16_C( 23854), INT16_C(-13644), INT16_C(-14015), INT16_C(-13375), INT16_C(-26086), INT16_C( -6430), INT16_C( -5411), INT16_C( 7716)), simde_mm_set_epi16(INT16_C( -3281), INT16_C(-16733), INT16_C(-20310), INT16_C( 760), INT16_C(-18586), INT16_C( 1673), INT16_C(-25298), INT16_C(-31758)), simde_mm_set_epi16(INT16_C( -1195), INT16_C( 3483), INT16_C( 4343), INT16_C( -156), INT16_C( 7397), INT16_C( -165), INT16_C( 2088), INT16_C( -3740)) }, { simde_mm_set_epi16(INT16_C( 5449), INT16_C( 38), INT16_C( 6018), INT16_C( 10627), INT16_C( 20505), INT16_C( 28284), INT16_C( 4633), INT16_C(-26325)), simde_mm_set_epi16(INT16_C( 24784), INT16_C( 11314), INT16_C( 7455), INT16_C( 17813), INT16_C( -6570), INT16_C(-17283), INT16_C( 30512), INT16_C( 2646)), simde_mm_set_epi16(INT16_C( 2060), INT16_C( 6), INT16_C( 684), INT16_C( 2888), INT16_C( -2056), INT16_C( -7459), INT16_C( 2157), INT16_C( -1063)) }, { simde_mm_set_epi16(INT16_C(-21624), INT16_C( 5121), INT16_C( 20041), INT16_C( 13722), INT16_C(-24360), INT16_C(-19124), INT16_C(-16069), INT16_C( 19357)), simde_mm_set_epi16(INT16_C( -7842), INT16_C( 31372), INT16_C(-32681), INT16_C( 23520), INT16_C( -3879), INT16_C( -7485), INT16_C( 22256), INT16_C( 12396)), simde_mm_set_epi16(INT16_C( 2587), INT16_C( 2451), INT16_C( -9994), INT16_C( 4924), INT16_C( 1441), INT16_C( 2184), INT16_C( -5458), INT16_C( 3661)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_mulhi_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_mulhi_epu16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu16(35566, 15689, 63042, 57362, 59041, 31224, 19546, 12829), simde_x_mm_set_epu16(51447, 14621, 39095, 25022, 7138, 40387, 23161, 61024), simde_x_mm_set_epu16(27919, 3500, 37607, 21901, 6430, 19241, 6907, 11945) }, { simde_x_mm_set_epu16(38922, 8893, 7997, 20067, 60307, 12929, 44791, 36818), simde_x_mm_set_epu16(56115, 46352, 39645, 27986, 64864, 64084, 5079, 17389), simde_x_mm_set_epu16(33326, 6289, 4837, 8569, 59688, 12642, 3471, 9769) }, { simde_x_mm_set_epu16(15336, 63669, 63771, 21657, 12681, 61746, 3959, 20213), simde_x_mm_set_epu16(61649, 7462, 20857, 18418, 43120, 17135, 41045, 26167), simde_x_mm_set_epu16(14426, 7249, 20295, 6086, 8343, 16144, 2479, 8070) }, { simde_x_mm_set_epu16(18737, 50787, 58977, 18610, 8077, 2942, 26014, 51355), simde_x_mm_set_epu16( 1776, 1953, 55756, 22299, 19400, 25284, 34496, 57058), simde_x_mm_set_epu16( 507, 1513, 50175, 6332, 2390, 1135, 13692, 44711) }, { simde_x_mm_set_epu16(10154, 39850, 18306, 55081, 15606, 51707, 30878, 20967), simde_x_mm_set_epu16(43083, 50945, 49120, 63736, 15921, 64165, 33035, 50764), simde_x_mm_set_epu16( 6675, 30977, 13720, 53568, 3791, 50625, 15564, 16240) }, { simde_x_mm_set_epu16(12757, 5042, 57712, 50374, 33497, 44643, 9249, 27444), simde_x_mm_set_epu16( 5516, 28001, 37996, 50447, 2209, 25118, 63921, 7578), simde_x_mm_set_epu16( 1073, 2154, 33459, 38775, 1129, 17110, 9021, 3173) }, { simde_x_mm_set_epu16( 6520, 35794, 15094, 63136, 22779, 57672, 2423, 7676), simde_x_mm_set_epu16(20640, 11808, 58236, 53501, 38005, 59820, 7041, 59845), simde_x_mm_set_epu16( 2053, 6449, 13412, 51541, 13209, 52641, 260, 7009) }, { simde_x_mm_set_epu16(60138, 6017, 21659, 30716, 29807, 17606, 41408, 64807), simde_x_mm_set_epu16(25712, 8473, 49119, 61515, 61789, 54600, 37356, 34280), simde_x_mm_set_epu16(23594, 777, 16233, 28831, 28102, 14668, 23602, 33898) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_mulhi_epu16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_u16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_mullo_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C( -7862), INT16_C( 26852), INT16_C( 10752), INT16_C( -9273), INT16_C( -9160), INT16_C( -7080), INT16_C(-16165), INT16_C( -8327)), simde_mm_set_epi16(INT16_C(-20410), INT16_C( 24193), INT16_C(-22278), INT16_C( -948), INT16_C(-31925), INT16_C( -8469), INT16_C( 5801), INT16_C( 10383)), simde_mm_set_epi16(INT16_C( 31292), INT16_C(-27932), INT16_C( 1024), INT16_C( 8980), INT16_C( 11368), INT16_C( -4920), INT16_C( 8851), INT16_C(-17257)) }, { simde_mm_set_epi16(INT16_C( 10435), INT16_C( 19268), INT16_C( 27420), INT16_C( 9542), INT16_C(-22355), INT16_C( 22255), INT16_C(-32016), INT16_C( 23304)), simde_mm_set_epi16(INT16_C( -3883), INT16_C( 14714), INT16_C(-16367), INT16_C( 4175), INT16_C( 13386), INT16_C( 20048), INT16_C(-30329), INT16_C(-26826)), simde_mm_set_epi16(INT16_C(-17857), INT16_C( 616), INT16_C( 7388), INT16_C( -8038), INT16_C( -6654), INT16_C( -848), INT16_C( 31888), INT16_C( -5200)) }, { simde_mm_set_epi16(INT16_C( 16747), INT16_C(-31494), INT16_C( -6008), INT16_C( 256), INT16_C( 13584), INT16_C( -2628), INT16_C( 32210), INT16_C(-21204)), simde_mm_set_epi16(INT16_C( 5844), INT16_C(-28058), INT16_C( -1961), INT16_C( -4057), INT16_C(-28767), INT16_C(-15421), INT16_C(-28399), INT16_C( 6019)), simde_mm_set_epi16(INT16_C( 24220), INT16_C(-28772), INT16_C(-14792), INT16_C( 9984), INT16_C( 20240), INT16_C( 25140), INT16_C( 19698), INT16_C(-28284)) }, { simde_mm_set_epi16(INT16_C( -6420), INT16_C( -8597), INT16_C( -3796), INT16_C( 23244), INT16_C(-31410), INT16_C( -804), INT16_C( 31623), INT16_C( -736)), simde_mm_set_epi16(INT16_C( -5973), INT16_C( -2870), INT16_C( -5873), INT16_C( -1641), INT16_C( -1760), INT16_C( 10653), INT16_C(-28567), INT16_C( 14335)), simde_mm_set_epi16(INT16_C( 8100), INT16_C( 31854), INT16_C( 11668), INT16_C( -1452), INT16_C(-30784), INT16_C( 20204), INT16_C(-26017), INT16_C( 736)) }, { simde_mm_set_epi16(INT16_C(-30942), INT16_C( 23208), INT16_C( -332), INT16_C(-26357), INT16_C( -4575), INT16_C( 25713), INT16_C(-11436), INT16_C(-20469)), simde_mm_set_epi16(INT16_C( 10752), INT16_C( -6855), INT16_C(-32031), INT16_C( 11523), INT16_C( 341), INT16_C( 13013), INT16_C( 12462), INT16_C(-19043)), simde_mm_set_epi16(INT16_C(-27648), INT16_C( 30568), INT16_C( 17460), INT16_C(-17887), INT16_C( 12789), INT16_C(-23547), INT16_C( 25368), INT16_C(-16961)) }, { simde_mm_set_epi16(INT16_C( -9419), INT16_C(-28719), INT16_C( 16604), INT16_C( 20761), INT16_C( 7656), INT16_C( 31821), INT16_C( 14202), INT16_C(-12774)), simde_mm_set_epi16(INT16_C(-24440), INT16_C( -4751), INT16_C(-13213), INT16_C( 10351), INT16_C( 25105), INT16_C( -3784), INT16_C( 2889), INT16_C( 15532)), simde_mm_set_epi16(INT16_C(-27608), INT16_C( -1983), INT16_C( 25876), INT16_C( 4567), INT16_C(-13208), INT16_C(-21032), INT16_C( 4042), INT16_C(-28296)) }, { simde_mm_set_epi16(INT16_C( 24274), INT16_C( 31467), INT16_C( 17654), INT16_C(-30184), INT16_C( -7163), INT16_C( 32482), INT16_C( 19535), INT16_C(-21227)), simde_mm_set_epi16(INT16_C( 18405), INT16_C(-30234), INT16_C( 7564), INT16_C(-18060), INT16_C( 16638), INT16_C(-17950), INT16_C( -411), INT16_C(-23904)), simde_mm_set_epi16(INT16_C( 4058), INT16_C( 12834), INT16_C(-27512), INT16_C( -5408), INT16_C( 31990), INT16_C( 21892), INT16_C( 32043), INT16_C( 30496)) }, { simde_mm_set_epi16(INT16_C(-10768), INT16_C(-21062), INT16_C( 22181), INT16_C( 31606), INT16_C( 16135), INT16_C(-14823), INT16_C(-19116), INT16_C(-13035)), simde_mm_set_epi16(INT16_C( 25288), INT16_C(-13107), INT16_C(-24173), INT16_C(-10010), INT16_C(-10251), INT16_C( 9523), INT16_C( 29977), INT16_C(-13646)), simde_mm_set_epi16(INT16_C( 896), INT16_C( 22002), INT16_C(-31297), INT16_C( 31748), INT16_C( 12979), INT16_C( 5115), INT16_C( 6452), INT16_C( 10906)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_mullo_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_or_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 724.92), SIMDE_FLOAT64_C( 616.22)), simde_mm_set_pd(SIMDE_FLOAT64_C( 797.85), SIMDE_FLOAT64_C( 484.18)), simde_mm_set_pd(SIMDE_FLOAT64_C( 989.98), SIMDE_FLOAT64_C( 128062.24)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 482.51), SIMDE_FLOAT64_C( 841.87)), simde_mm_set_pd(SIMDE_FLOAT64_C( -558.83), SIMDE_FLOAT64_C( 997.07)), simde_mm_set_pd(SIMDE_FLOAT64_C(-129002.75), SIMDE_FLOAT64_C( 1005.87)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 741.60), SIMDE_FLOAT64_C( -412.08)), simde_mm_set_pd(SIMDE_FLOAT64_C( -337.67), SIMDE_FLOAT64_C( -516.98)), simde_mm_set_pd(SIMDE_FLOAT64_C( -95215.80), SIMDE_FLOAT64_C(-106109.48)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 724.85), SIMDE_FLOAT64_C( -403.04)), simde_mm_set_pd(SIMDE_FLOAT64_C( -503.03), SIMDE_FLOAT64_C( -699.51)), simde_mm_set_pd(SIMDE_FLOAT64_C(-130927.93), SIMDE_FLOAT64_C(-122827.50)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 231.42), SIMDE_FLOAT64_C( 688.03)), simde_mm_set_pd(SIMDE_FLOAT64_C( -373.50), SIMDE_FLOAT64_C( 983.44)), simde_mm_set_pd(SIMDE_FLOAT64_C( -511.84), SIMDE_FLOAT64_C( 1015.47)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 625.94), SIMDE_FLOAT64_C( -703.47)), simde_mm_set_pd(SIMDE_FLOAT64_C( -942.06), SIMDE_FLOAT64_C( 249.38)), simde_mm_set_pd(SIMDE_FLOAT64_C( -1024.00), SIMDE_FLOAT64_C( -65535.34)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -97.92), SIMDE_FLOAT64_C( -70.84)), simde_mm_set_pd(SIMDE_FLOAT64_C( -510.77), SIMDE_FLOAT64_C( -381.02)), simde_mm_set_pd(SIMDE_FLOAT64_C( -511.93), SIMDE_FLOAT64_C( -383.36)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -350.87), SIMDE_FLOAT64_C( -439.10)), simde_mm_set_pd(SIMDE_FLOAT64_C( 66.40), SIMDE_FLOAT64_C( 195.88)), simde_mm_set_pd(SIMDE_FLOAT64_C( -351.87), SIMDE_FLOAT64_C( -439.86)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_or_pd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_or_si128(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi64x(INT64_C( 3806780817851842454), INT64_C( 3002076500639794819)), simde_mm_set_epi64x(INT64_C( -1576369425501019200), INT64_C( 5863973371898850910)), simde_mm_set_epi64x(INT64_C( -81065909581643818), INT64_C( 8784698508288454367)) }, { simde_mm_set_epi64x(INT64_C( 4358272343769327172), INT64_C( -4254544166297055533)), simde_mm_set_epi64x(INT64_C( -3870591542062132163), INT64_C( 8365983368440196218)), simde_mm_set_epi64x(INT64_C( -109223286268234115), INT64_C( -793210092996038917)) }, { simde_mm_set_epi64x(INT64_C( -8935978336450140157), INT64_C( -4292132981830530492)), simde_mm_set_epi64x(INT64_C( -5275996428160709349), INT64_C( 1809702168782653061)), simde_mm_set_epi64x(INT64_C( -5188173984729010917), INT64_C( -2486163139644895547)) }, { simde_mm_set_epi64x(INT64_C( -3617483608260678394), INT64_C( -7299761588855953181)), simde_mm_set_epi64x(INT64_C( -3679366837934484296), INT64_C( 5342128716508209170)), simde_mm_set_epi64x(INT64_C( -3603900203459740226), INT64_C( -2687789418219853581)) }, { simde_mm_set_epi64x(INT64_C( 8613776548693408177), INT64_C( -1221094295236221778)), simde_mm_set_epi64x(INT64_C( 8491744443283364215), INT64_C( 4783609441494973751)), simde_mm_set_epi64x(INT64_C( 8636493096189557239), INT64_C( -1193507273608823361)) }, { simde_mm_set_epi64x(INT64_C( 2256952633337952767), INT64_C( -5574602856706714295)), simde_mm_set_epi64x(INT64_C( -7711313128986328449), INT64_C( -5631421726257218112)), simde_mm_set_epi64x(INT64_C( -6918672559143650305), INT64_C( -5477736148453327415)) }, { simde_mm_set_epi64x(INT64_C( 6915809581026069253), INT64_C( -4447049561909832301)), simde_mm_set_epi64x(INT64_C( 7606865206928880870), INT64_C( 526097040835303983)), simde_mm_set_epi64x(INT64_C( 9221656517182193639), INT64_C( -4085336622434885697)) }, { simde_mm_set_epi64x(INT64_C( 6255716227368614659), INT64_C( 3842255123517004943)), simde_mm_set_epi64x(INT64_C( -2544504471973996098), INT64_C( 7324902981920444710)), simde_mm_set_epi64x(INT64_C( -2382159098826458177), INT64_C( 8500384867471056303)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_or_si128(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_packs_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C(-22268), INT16_C( -16), INT16_C( -49), INT16_C( 8), INT16_C( 20029), INT16_C(-30901), INT16_C(-17364), INT16_C( -65)), simde_mm_set_epi16(INT16_C(-20429), INT16_C( 4131), INT16_C(-19140), INT16_C( 23907), INT16_C( -87), INT16_C(-15818), INT16_C( -93), INT16_C( -34)), simde_mm_set_epi8(INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( -87), INT8_C(-128), INT8_C( -93), INT8_C( -34), INT8_C(-128), INT8_C( -16), INT8_C( -49), INT8_C( 8), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C( -65)) }, { simde_mm_set_epi16(INT16_C( -1320), INT16_C( 64), INT16_C( 7903), INT16_C( -86), INT16_C( 17775), INT16_C( -29), INT16_C(-24347), INT16_C( 20534)), simde_mm_set_epi16(INT16_C( -26), INT16_C( 32460), INT16_C( -35), INT16_C( 9), INT16_C( 97), INT16_C(-16116), INT16_C( 21908), INT16_C( 31051)), simde_mm_set_epi8(INT8_C( -26), INT8_C( 127), INT8_C( -35), INT8_C( 9), INT8_C( 97), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C( 64), INT8_C( 127), INT8_C( -86), INT8_C( 127), INT8_C( -29), INT8_C(-128), INT8_C( 127)) }, { simde_mm_set_epi16(INT16_C( 16), INT16_C(-23521), INT16_C( 107), INT16_C( 10693), INT16_C( 37), INT16_C( 32277), INT16_C( -120), INT16_C( -13)), simde_mm_set_epi16(INT16_C( 7912), INT16_C( 127), INT16_C(-27046), INT16_C( -104), INT16_C( 114), INT16_C( -54), INT16_C( -26), INT16_C( 29057)), simde_mm_set_epi8(INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C(-104), INT8_C( 114), INT8_C( -54), INT8_C( -26), INT8_C( 127), INT8_C( 16), INT8_C(-128), INT8_C( 107), INT8_C( 127), INT8_C( 37), INT8_C( 127), INT8_C(-120), INT8_C( -13)) }, { simde_mm_set_epi16(INT16_C( 8), INT16_C( -84), INT16_C( 26), INT16_C( -1727), INT16_C( 53), INT16_C( 29056), INT16_C( -7932), INT16_C( 40)), simde_mm_set_epi16(INT16_C(-25560), INT16_C( 94), INT16_C( 19164), INT16_C( -119), INT16_C(-25450), INT16_C( 26043), INT16_C( -9549), INT16_C( 110)), simde_mm_set_epi8(INT8_C(-128), INT8_C( 94), INT8_C( 127), INT8_C(-119), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 110), INT8_C( 8), INT8_C( -84), INT8_C( 26), INT8_C(-128), INT8_C( 53), INT8_C( 127), INT8_C(-128), INT8_C( 40)) }, { simde_mm_set_epi16(INT16_C( 17087), INT16_C( 3), INT16_C( 26871), INT16_C( 126), INT16_C(-10072), INT16_C( 95), INT16_C( 117), INT16_C( 110)), simde_mm_set_epi16(INT16_C( 7667), INT16_C( -3918), INT16_C( -98), INT16_C( -77), INT16_C( 29383), INT16_C(-21060), INT16_C(-18775), INT16_C( 21121)), simde_mm_set_epi8(INT8_C( 127), INT8_C(-128), INT8_C( -98), INT8_C( -77), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 3), INT8_C( 127), INT8_C( 126), INT8_C(-128), INT8_C( 95), INT8_C( 117), INT8_C( 110)) }, { simde_mm_set_epi16(INT16_C( -120), INT16_C(-29564), INT16_C( -120), INT16_C( -79), INT16_C( -93), INT16_C(-23649), INT16_C( 25423), INT16_C(-23661)), simde_mm_set_epi16(INT16_C( 109), INT16_C(-30808), INT16_C( 45), INT16_C( -18), INT16_C( -4268), INT16_C( 30580), INT16_C( 77), INT16_C( -1896)), simde_mm_set_epi8(INT8_C( 109), INT8_C(-128), INT8_C( 45), INT8_C( -18), INT8_C(-128), INT8_C( 127), INT8_C( 77), INT8_C(-128), INT8_C(-120), INT8_C(-128), INT8_C(-120), INT8_C( -79), INT8_C( -93), INT8_C(-128), INT8_C( 127), INT8_C(-128)) }, { simde_mm_set_epi16(INT16_C( 75), INT16_C( -80), INT16_C( -5), INT16_C( -23), INT16_C( -9879), INT16_C( 116), INT16_C(-20199), INT16_C( 5095)), simde_mm_set_epi16(INT16_C( -3339), INT16_C( -48), INT16_C( -117), INT16_C( -2107), INT16_C( 11715), INT16_C(-13793), INT16_C(-31434), INT16_C( 25021)), simde_mm_set_epi8(INT8_C(-128), INT8_C( -48), INT8_C(-117), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 75), INT8_C( -80), INT8_C( -5), INT8_C( -23), INT8_C(-128), INT8_C( 116), INT8_C(-128), INT8_C( 127)) }, { simde_mm_set_epi16(INT16_C( -15), INT16_C( 110), INT16_C( -521), INT16_C( 75), INT16_C( 12019), INT16_C(-30116), INT16_C( 17702), INT16_C( 14401)), simde_mm_set_epi16(INT16_C(-15008), INT16_C( -80), INT16_C( -127), INT16_C(-29333), INT16_C( -7), INT16_C(-17846), INT16_C( 83), INT16_C( 25637)), simde_mm_set_epi8(INT8_C(-128), INT8_C( -80), INT8_C(-127), INT8_C(-128), INT8_C( -7), INT8_C(-128), INT8_C( 83), INT8_C( 127), INT8_C( -15), INT8_C( 110), INT8_C(-128), INT8_C( 75), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 127)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_packs_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_packs_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 1221393622), INT32_C( 1245122), INT32_C( -546439182), INT32_C( 1653967185)), simde_mm_set_epi32(INT32_C( -5570627), INT32_C( 1604714526), INT32_C( 3276815), INT32_C( -865960168)), simde_mm_set_epi16(INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767)) }, { simde_mm_set_epi32(INT32_C(-1556742099), INT32_C( 3735602), INT32_C( -795913538), INT32_C( 5177351)), simde_mm_set_epi32(INT32_C( 230555532), INT32_C( -681902099), INT32_C(-1460947394), INT32_C( 1435959285)), simde_mm_set_epi16(INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767)) }, { simde_mm_set_epi32(INT32_C( 1058013130), INT32_C( 1801350196), INT32_C( 3735625), INT32_C( 393200)), simde_mm_set_epi32(INT32_C( -5046245), INT32_C( 1947557327), INT32_C( -390520293), INT32_C(-1060577736)), simde_mm_set_epi16(INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767)) }, { simde_mm_set_epi32(INT32_C( 1625994666), INT32_C( 151157112), INT32_C( -6356918), INT32_C( 574958135)), simde_mm_set_epi32(INT32_C( -878149423), INT32_C( -1310820), INT32_C( 7694016), INT32_C( 1656093)), simde_mm_set_epi16(INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767)) }, { simde_mm_set_epi32(INT32_C( 906756004), INT32_C( 589883340), INT32_C(-1375993871), INT32_C( -5221415)), simde_mm_set_epi32(INT32_C(-1492628097), INT32_C( -7536518), INT32_C( 1834989), INT32_C(-2090880115)), simde_mm_set_epi16(INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768)) }, { simde_mm_set_epi32(INT32_C( 1759910713), INT32_C( 2028743221), INT32_C( 1203039561), INT32_C( 3735524)), simde_mm_set_epi32(INT32_C( 5505016), INT32_C( 68681650), INT32_C( 3895727), INT32_C(-1084227687)), simde_mm_set_epi16(INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767)) }, { simde_mm_set_epi32(INT32_C( 1249181759), INT32_C( 850460644), INT32_C( 643956807), INT32_C( 1402185830)), simde_mm_set_epi32(INT32_C( 503821785), INT32_C( -1966044), INT32_C( -1228291), INT32_C( 6420027)), simde_mm_set_epi16(INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767)) }, { simde_mm_set_epi32(INT32_C( 1562990695), INT32_C( 134021098), INT32_C(-1071906850), INT32_C( -558152330)), simde_mm_set_epi32(INT32_C(-1746927677), INT32_C( 7209004), INT32_C( 917512), INT32_C( 4155429)), simde_mm_set_epi16(INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_packs_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_packus_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C( 217), INT16_C(-10893), INT16_C( 10007), INT16_C(-11974), INT16_C( 134), INT16_C( 45), INT16_C( 21), INT16_C( 179)), simde_mm_set_epi16(INT16_C( 14829), INT16_C( 37), INT16_C( 2757), INT16_C(-26385), INT16_C( 26), INT16_C( 196), INT16_C( 2768), INT16_C( 221)), simde_x_mm_set_epu8(255, 37, 255, 0, 26, 196, 255, 221, 217, 0, 255, 0, 134, 45, 21, 179) }, { simde_mm_set_epi16(INT16_C( 84), INT16_C( 11197), INT16_C( 28), INT16_C(-18960), INT16_C( 0), INT16_C( 243), INT16_C( 209), INT16_C( 115)), simde_mm_set_epi16(INT16_C( 26800), INT16_C( 44), INT16_C( 244), INT16_C( 114), INT16_C( 234), INT16_C( 7269), INT16_C( 2441), INT16_C( -9419)), simde_x_mm_set_epu8(255, 44, 244, 114, 234, 255, 255, 0, 84, 255, 28, 0, 0, 243, 209, 115) }, { simde_mm_set_epi16(INT16_C( 26559), INT16_C(-13811), INT16_C( 141), INT16_C( 130), INT16_C(-24149), INT16_C( 185), INT16_C( 9120), INT16_C(-14604)), simde_mm_set_epi16(INT16_C( 190), INT16_C( 162), INT16_C( 3761), INT16_C(-10696), INT16_C( 15175), INT16_C( 6926), INT16_C( 19649), INT16_C( 79)), simde_x_mm_set_epu8(190, 162, 255, 0, 255, 255, 255, 79, 255, 0, 141, 130, 0, 185, 255, 0) }, { simde_mm_set_epi16(INT16_C( 250), INT16_C(-23643), INT16_C(-15994), INT16_C( 173), INT16_C( 97), INT16_C( 158), INT16_C( 82), INT16_C( 231)), simde_mm_set_epi16(INT16_C( 70), INT16_C( 30022), INT16_C( 0), INT16_C( -8717), INT16_C( 6), INT16_C( 206), INT16_C(-25401), INT16_C( 252)), simde_x_mm_set_epu8( 70, 255, 0, 0, 6, 206, 0, 252, 250, 0, 0, 173, 97, 158, 82, 231) }, { simde_mm_set_epi16(INT16_C( 92), INT16_C(-13839), INT16_C( 243), INT16_C( -3624), INT16_C( 252), INT16_C(-29405), INT16_C( 3), INT16_C( 6730)), simde_mm_set_epi16(INT16_C( 4496), INT16_C( 19200), INT16_C( 70), INT16_C( 128), INT16_C( 2496), INT16_C( 60), INT16_C( 18531), INT16_C(-20006)), simde_x_mm_set_epu8(255, 255, 70, 128, 255, 60, 255, 0, 92, 0, 243, 0, 252, 0, 3, 255) }, { simde_mm_set_epi16(INT16_C( 57), INT16_C(-14586), INT16_C( 21134), INT16_C( 7065), INT16_C( 3), INT16_C(-16049), INT16_C( 26223), INT16_C(-20721)), simde_mm_set_epi16(INT16_C( 129), INT16_C( 105), INT16_C( -1899), INT16_C( 221), INT16_C(-24446), INT16_C(-20297), INT16_C( 30906), INT16_C( 192)), simde_x_mm_set_epu8(129, 105, 0, 221, 0, 0, 255, 192, 57, 0, 255, 255, 3, 0, 255, 0) }, { simde_mm_set_epi16(INT16_C( 128), INT16_C( 22639), INT16_C( -9670), INT16_C( 8168), INT16_C( -1055), INT16_C(-24505), INT16_C( 32719), INT16_C( 16999)), simde_mm_set_epi16(INT16_C(-23185), INT16_C( 119), INT16_C( 108), INT16_C( 34), INT16_C(-15892), INT16_C( 2641), INT16_C( 242), INT16_C( -7325)), simde_x_mm_set_epu8( 0, 119, 108, 34, 0, 255, 242, 0, 128, 255, 0, 255, 0, 0, 255, 255) }, { simde_mm_set_epi16(INT16_C( 95), INT16_C( 145), INT16_C( 101), INT16_C( 5449), INT16_C( 163), INT16_C( 19185), INT16_C( 3025), INT16_C( 52)), simde_mm_set_epi16(INT16_C( 2870), INT16_C( 140), INT16_C( 144), INT16_C( 254), INT16_C( 8482), INT16_C( 4388), INT16_C( 201), INT16_C(-14867)), simde_x_mm_set_epu8(255, 140, 144, 254, 255, 255, 201, 0, 95, 145, 101, 255, 163, 255, 255, 52) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_packus_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_sad_epu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu8(215, 90, 59, 114, 199, 190, 5, 88, 189, 152, 95, 90, 71, 40, 85, 39), simde_x_mm_set_epu8( 54, 166, 154, 195, 131, 97, 225, 141, 107, 5, 50, 55, 194, 31, 223, 92), simde_mm_set_epi64x(INT64_C( 847), INT64_C( 632)) }, { simde_x_mm_set_epu8( 73, 8, 35, 63, 9, 118, 137, 154, 163, 61, 8, 4, 96, 39, 181, 31), simde_x_mm_set_epu8(244, 64, 21, 0, 73, 79, 47, 148, 227, 0, 217, 151, 241, 123, 179, 200), simde_mm_set_epi64x(INT64_C( 503), INT64_C( 881)) }, { simde_x_mm_set_epu8(188, 156, 164, 209, 37, 165, 186, 237, 157, 45, 141, 9, 227, 9, 6, 113), simde_x_mm_set_epu8(196, 12, 188, 136, 227, 14, 111, 188, 42, 252, 141, 251, 41, 42, 48, 10), simde_mm_set_epi64x(INT64_C( 714), INT64_C( 928)) }, { simde_x_mm_set_epu8(221, 210, 203, 74, 151, 53, 237, 96, 105, 62, 32, 146, 208, 27, 214, 15), simde_x_mm_set_epu8(106, 143, 238, 35, 165, 158, 48, 47, 51, 172, 84, 44, 119, 233, 73, 10), simde_mm_set_epi64x(INT64_C( 613), INT64_C( 759)) }, { simde_x_mm_set_epu8(158, 146, 218, 39, 84, 176, 15, 200, 114, 100, 110, 72, 37, 118, 124, 52), simde_x_mm_set_epu8(250, 173, 237, 165, 77, 193, 83, 68, 159, 214, 52, 182, 160, 117, 236, 237), simde_mm_set_epi64x(INT64_C( 488), INT64_C( 748)) }, { simde_x_mm_set_epu8(116, 17, 11, 212, 41, 247, 182, 55, 218, 151, 38, 248, 87, 3, 108, 3), simde_x_mm_set_epu8(178, 255, 4, 183, 81, 104, 79, 156, 178, 174, 55, 110, 255, 70, 179, 129), simde_mm_set_epi64x(INT64_C( 723), INT64_C( 650)) }, { simde_x_mm_set_epu8( 26, 112, 229, 82, 174, 243, 79, 54, 103, 25, 150, 156, 120, 47, 29, 212), simde_x_mm_set_epu8(155, 158, 100, 233, 190, 145, 4, 176, 236, 88, 45, 24, 159, 182, 83, 215), simde_mm_set_epi64x(INT64_C( 766), INT64_C( 664)) }, { simde_x_mm_set_epu8( 29, 127, 97, 34, 247, 17, 64, 73, 255, 69, 189, 150, 155, 84, 174, 88), simde_x_mm_set_epu8(224, 212, 67, 184, 190, 48, 118, 149, 209, 255, 9, 200, 126, 242, 201, 30), simde_mm_set_epi64x(INT64_C( 678), INT64_C( 734)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_sad_epu8(test_vec[i].a, test_vec[i].b); simde_assert_m128i_u8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_set_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { int8_t e15; int8_t e14; int8_t e13; int8_t e12; int8_t e11; int8_t e10; int8_t e9; int8_t e8; int8_t e7; int8_t e6; int8_t e5; int8_t e4; int8_t e3; int8_t e2; int8_t e1; int8_t e0; simde__m128i r; } test_vec[8] = { { -48, -30, 88, -96, 9, 89, 20, -95, 63, -76, 126, 67, 85, 88, -17, -107, simde_mm_set_epi8(INT8_C( -48), INT8_C( -30), INT8_C( 88), INT8_C( -96), INT8_C( 9), INT8_C( 89), INT8_C( 20), INT8_C( -95), INT8_C( 63), INT8_C( -76), INT8_C( 126), INT8_C( 67), INT8_C( 85), INT8_C( 88), INT8_C( -17), INT8_C(-107)) }, { 73, -68, -61, 58, -37, 5, -64, -56, -5, 33, -53, -34, -11, 57, 49, 12, simde_mm_set_epi8(INT8_C( 73), INT8_C( -68), INT8_C( -61), INT8_C( 58), INT8_C( -37), INT8_C( 5), INT8_C( -64), INT8_C( -56), INT8_C( -5), INT8_C( 33), INT8_C( -53), INT8_C( -34), INT8_C( -11), INT8_C( 57), INT8_C( 49), INT8_C( 12)) }, { -65, -108, 95, -117, 35, 45, 54, -43, -45, 123, 113, -6, 23, -66, 77, 94, simde_mm_set_epi8(INT8_C( -65), INT8_C(-108), INT8_C( 95), INT8_C(-117), INT8_C( 35), INT8_C( 45), INT8_C( 54), INT8_C( -43), INT8_C( -45), INT8_C( 123), INT8_C( 113), INT8_C( -6), INT8_C( 23), INT8_C( -66), INT8_C( 77), INT8_C( 94)) }, { -72, 95, 112, 68, 56, -74, -97, -55, 22, 53, -22, 68, -107, 99, -5, -94, simde_mm_set_epi8(INT8_C( -72), INT8_C( 95), INT8_C( 112), INT8_C( 68), INT8_C( 56), INT8_C( -74), INT8_C( -97), INT8_C( -55), INT8_C( 22), INT8_C( 53), INT8_C( -22), INT8_C( 68), INT8_C(-107), INT8_C( 99), INT8_C( -5), INT8_C( -94)) }, { -48, 6, 114, 89, -57, -104, -78, -72, -32, -41, -27, -58, -1, -100, -126, -52, simde_mm_set_epi8(INT8_C( -48), INT8_C( 6), INT8_C( 114), INT8_C( 89), INT8_C( -57), INT8_C(-104), INT8_C( -78), INT8_C( -72), INT8_C( -32), INT8_C( -41), INT8_C( -27), INT8_C( -58), INT8_C( -1), INT8_C(-100), INT8_C(-126), INT8_C( -52)) }, { 75, -127, -59, 90, 126, -9, 88, 22, 36, 75, -11, -10, 31, -72, 19, -30, simde_mm_set_epi8(INT8_C( 75), INT8_C(-127), INT8_C( -59), INT8_C( 90), INT8_C( 126), INT8_C( -9), INT8_C( 88), INT8_C( 22), INT8_C( 36), INT8_C( 75), INT8_C( -11), INT8_C( -10), INT8_C( 31), INT8_C( -72), INT8_C( 19), INT8_C( -30)) }, { -66, 57, 86, -24, -102, 97, 37, 79, 98, -52, 75, 113, -66, -45, -97, 50, simde_mm_set_epi8(INT8_C( -66), INT8_C( 57), INT8_C( 86), INT8_C( -24), INT8_C(-102), INT8_C( 97), INT8_C( 37), INT8_C( 79), INT8_C( 98), INT8_C( -52), INT8_C( 75), INT8_C( 113), INT8_C( -66), INT8_C( -45), INT8_C( -97), INT8_C( 50)) }, { -14, -31, -3, 35, 62, 73, 10, 46, 72, 110, -30, 71, -50, -46, 106, -75, simde_mm_set_epi8(INT8_C( -14), INT8_C( -31), INT8_C( -3), INT8_C( 35), INT8_C( 62), INT8_C( 73), INT8_C( 10), INT8_C( 46), INT8_C( 72), INT8_C( 110), INT8_C( -30), INT8_C( 71), INT8_C( -50), INT8_C( -46), INT8_C( 106), INT8_C( -75)) }, }; // printf("\n"); // for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { // int8_t e15 = munit_rand_uint32(); // int8_t e14 = munit_rand_uint32(); // int8_t e13 = munit_rand_uint32(); // int8_t e12 = munit_rand_uint32(); // int8_t e11 = munit_rand_uint32(); // int8_t e10 = munit_rand_uint32(); // int8_t e9 = munit_rand_uint32(); // int8_t e8 = munit_rand_uint32(); // int8_t e7 = munit_rand_uint32(); // int8_t e6 = munit_rand_uint32(); // int8_t e5 = munit_rand_uint32(); // int8_t e4 = munit_rand_uint32(); // int8_t e3 = munit_rand_uint32(); // int8_t e2 = munit_rand_uint32(); // int8_t e1 = munit_rand_uint32(); // int8_t e0 = munit_rand_uint32(); // simde__m128i_private r; // r = simde__m128i_to_private(simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0)); // printf(" { %4" PRId8 ", %4" PRId8 ", %4" PRId8 ", %4" PRId8 ", %4" PRId8 ", %4" PRId8 ", %4" PRId8 ", %4" PRId8 ",\n" // " %4" PRId8 ", %4" PRId8 ", %4" PRId8 ", %4" PRId8 ", %4" PRId8 ", %4" PRId8 ", %4" PRId8 ", %4" PRId8 ",\n", // e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0); // printf(" simde_mm_set_epi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n" // " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n" // " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n" // " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")) },\n", // r.i8[15], r.i8[14], r.i8[13], r.i8[12], r.i8[11], r.i8[10], r.i8[ 9], r.i8[ 8], // r.i8[ 7], r.i8[ 6], r.i8[ 5], r.i8[ 4], r.i8[ 3], r.i8[ 2], r.i8[ 1], r.i8[ 0]); // } // return MUNIT_FAIL; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_set_epi8( test_vec[i].e15, test_vec[i].e14, test_vec[i].e13, test_vec[i].e12, test_vec[i].e11, test_vec[i].e10, test_vec[i].e9, test_vec[i].e8, test_vec[i].e7, test_vec[i].e6, test_vec[i].e5, test_vec[i].e4, test_vec[i].e3, test_vec[i].e2, test_vec[i].e1, test_vec[i].e0); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_set_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { int16_t e7; int16_t e6; int16_t e5; int16_t e4; int16_t e3; int16_t e2; int16_t e1; int16_t e0; simde__m128i r; } test_vec[8] = { { -12714, -18436, 19109, 27542, -4031, 11847, 32066, 4849, simde_mm_set_epi16(INT16_C(-12714), INT16_C(-18436), INT16_C( 19109), INT16_C( 27542), INT16_C( -4031), INT16_C( 11847), INT16_C( 32066), INT16_C( 4849)) }, { 20812, -18306, 32711, 2248, -22144, -30920, 20888, -23709, simde_mm_set_epi16(INT16_C( 20812), INT16_C(-18306), INT16_C( 32711), INT16_C( 2248), INT16_C(-22144), INT16_C(-30920), INT16_C( 20888), INT16_C(-23709)) }, { 8868, -14625, -5258, -12928, -11989, 31315, -9098, 19222, simde_mm_set_epi16(INT16_C( 8868), INT16_C(-14625), INT16_C( -5258), INT16_C(-12928), INT16_C(-11989), INT16_C( 31315), INT16_C( -9098), INT16_C( 19222)) }, { -5334, 23871, 3901, 14443, -13328, 23359, -24889, 28356, simde_mm_set_epi16(INT16_C( -5334), INT16_C( 23871), INT16_C( 3901), INT16_C( 14443), INT16_C(-13328), INT16_C( 23359), INT16_C(-24889), INT16_C( 28356)) }, { 10774, -19043, 31284, 4044, 862, -11938, -27554, -25119, simde_mm_set_epi16(INT16_C( 10774), INT16_C(-19043), INT16_C( 31284), INT16_C( 4044), INT16_C( 862), INT16_C(-11938), INT16_C(-27554), INT16_C(-25119)) }, { 20150, -31510, -29797, -3272, -18019, 16111, -15969, -11740, simde_mm_set_epi16(INT16_C( 20150), INT16_C(-31510), INT16_C(-29797), INT16_C( -3272), INT16_C(-18019), INT16_C( 16111), INT16_C(-15969), INT16_C(-11740)) }, { -3147, -24243, -28710, -5510, -20724, 13872, -9632, -7728, simde_mm_set_epi16(INT16_C( -3147), INT16_C(-24243), INT16_C(-28710), INT16_C( -5510), INT16_C(-20724), INT16_C( 13872), INT16_C( -9632), INT16_C( -7728)) }, { 6318, 11524, 30789, -2974, 3458, -10908, -25743, -20801, simde_mm_set_epi16(INT16_C( 6318), INT16_C( 11524), INT16_C( 30789), INT16_C( -2974), INT16_C( 3458), INT16_C(-10908), INT16_C(-25743), INT16_C(-20801)) }, }; // printf("\n"); // for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { // int16_t e7 = munit_rand_uint32(); // int16_t e6 = munit_rand_uint32(); // int16_t e5 = munit_rand_uint32(); // int16_t e4 = munit_rand_uint32(); // int16_t e3 = munit_rand_uint32(); // int16_t e2 = munit_rand_uint32(); // int16_t e1 = munit_rand_uint32(); // int16_t e0 = munit_rand_uint32(); // simde__m128i_private r; // r = simde__m128i_to_private(simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0)); // printf(" { %6" PRId16 ", %6" PRId16 ", %6" PRId16 ", %6" PRId16 ", %6" PRId16 ", %6" PRId16 ", %6" PRId16 ", %6" PRId16 ",\n", // e7, e6, e5, e4, e3, e2, e1, e0); // printf(" simde_mm_set_epi16(INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n" // " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 ")) },\n", // r.i16[7], r.i16[6], r.i16[5], r.i16[4], r.i16[3], r.i16[2], r.i16[1], r.i16[0]); // } // return MUNIT_FAIL; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_set_epi16( test_vec[i].e7, test_vec[i].e6, test_vec[i].e5, test_vec[i].e4, test_vec[i].e3, test_vec[i].e2, test_vec[i].e1, test_vec[i].e0); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_set_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { int32_t e3; int32_t e2; int32_t e1; int32_t e0; simde__m128i r; } test_vec[8] = { { 391721235, 1158362691, 2131167346, -1830589941, simde_mm_set_epi32(INT32_C( 391721235), INT32_C( 1158362691), INT32_C( 2131167346), INT32_C(-1830589941)) }, { 2141048609, 2073510589, 924258053, 594030571, simde_mm_set_epi32(INT32_C( 2141048609), INT32_C( 2073510589), INT32_C( 924258053), INT32_C( 594030571)) }, { 1247989717, 798714331, -1727766974, 1099259705, simde_mm_set_epi32(INT32_C( 1247989717), INT32_C( 798714331), INT32_C(-1727766974), INT32_C( 1099259705)) }, { 1870669627, 1775697551, -2027090738, -1897466045, simde_mm_set_epi32(INT32_C( 1870669627), INT32_C( 1775697551), INT32_C(-2027090738), INT32_C(-1897466045)) }, { -584467290, 2134946541, 565373055, -212717620, simde_mm_set_epi32(INT32_C( -584467290), INT32_C( 2134946541), INT32_C( 565373055), INT32_C( -212717620)) }, { 2072276971, 1968759191, 2049222745, 64876297, simde_mm_set_epi32(INT32_C( 2072276971), INT32_C( 1968759191), INT32_C( 2049222745), INT32_C( 64876297)) }, { -285499155, -775226349, 1401270915, -476575867, simde_mm_set_epi32(INT32_C( -285499155), INT32_C( -775226349), INT32_C( 1401270915), INT32_C( -476575867)) }, { -135350759, -1402535212, -799024597, 1171022108, simde_mm_set_epi32(INT32_C( -135350759), INT32_C(-1402535212), INT32_C( -799024597), INT32_C( 1171022108)) }, }; // printf("\n"); // for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { // int32_t e3 = munit_rand_uint32(); // int32_t e2 = munit_rand_uint32(); // int32_t e1 = munit_rand_uint32(); // int32_t e0 = munit_rand_uint32(); // simde__m128i_private r; // r = simde__m128i_to_private(simde_mm_set_epi32(e3, e2, e1, e0)); // printf(" { %11" PRId32 ", %11" PRId32 ", %11" PRId32 ", %11" PRId32 ",\n", // e3, e2, e1, e0); // printf(" simde_mm_set_epi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 ")) },\n", // r.i32[3], r.i32[2], r.i32[1], r.i32[0]); // } // return MUNIT_FAIL; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_set_epi32( test_vec[i].e3, test_vec[i].e2, test_vec[i].e1, test_vec[i].e0); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_set_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_pi64(INT64_C( -664890281848034973)), simde_x_mm_set_pi64(INT64_C(-2789670716680390611)), simde_mm_set_epi64x(INT64_C( -664890281848034973), INT64_C(-2789670716680390611)) }, { simde_x_mm_set_pi64(INT64_C( 5148232775303872766)), simde_x_mm_set_pi64(INT64_C(-4313892930136448255)), simde_mm_set_epi64x(INT64_C( 5148232775303872766), INT64_C(-4313892930136448255)) }, { simde_x_mm_set_pi64(INT64_C(-1888312870737326599)), simde_x_mm_set_pi64(INT64_C( 5248373813564878857)), simde_mm_set_epi64x(INT64_C(-1888312870737326599), INT64_C( 5248373813564878857)) }, { simde_x_mm_set_pi64(INT64_C(-1560565807933837504)), simde_x_mm_set_pi64(INT64_C( 7268621988108136806)), simde_mm_set_epi64x(INT64_C(-1560565807933837504), INT64_C( 7268621988108136806)) }, { simde_x_mm_set_pi64(INT64_C(-1956110667393926378)), simde_x_mm_set_pi64(INT64_C( 345154446382384077)), simde_mm_set_epi64x(INT64_C(-1956110667393926378), INT64_C( 345154446382384077)) }, { simde_x_mm_set_pi64(INT64_C(-8505578167241709019)), simde_x_mm_set_pi64(INT64_C( 8252355195326597777)), simde_mm_set_epi64x(INT64_C(-8505578167241709019), INT64_C( 8252355195326597777)) }, { simde_x_mm_set_pi64(INT64_C( 1122841158674863793)), simde_x_mm_set_pi64(INT64_C(-5697643761898453242)), simde_mm_set_epi64x(INT64_C( 1122841158674863793), INT64_C(-5697643761898453242)) }, { simde_x_mm_set_pi64(INT64_C(-6130487997584440381)), simde_x_mm_set_pi64(INT64_C( 8349290391131198480)), simde_mm_set_epi64x(INT64_C(-6130487997584440381), INT64_C( 8349290391131198480)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_set_epi64(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_set_epi64x(SIMDE_MUNIT_TEST_ARGS) { const struct { int64_t e0; int64_t e1; simde__m128i r; } test_vec[8] = { { 4539993052502346892, 6550919315486945587, simde_mm_set_epi64x(INT64_C( 4539993052502346892), INT64_C( 6550919315486945587)) }, { -8973439144672590874, 1846200258209621581, simde_mm_set_epi64x(INT64_C(-8973439144672590874), INT64_C( 1846200258209621581)) }, { 771735515044186414, -5491872275643679405, simde_mm_set_epi64x(INT64_C( 771735515044186414), INT64_C(-5491872275643679405)) }, { 3535609691698693035, -2659398015885158473, simde_mm_set_epi64x(INT64_C( 3535609691698693035), INT64_C(-2659398015885158473)) }, { -5310489553719126486, -1326851720416490864, simde_mm_set_epi64x(INT64_C(-5310489553719126486), INT64_C(-1326851720416490864)) }, { -1132069192689462333, -3126474808030937011, simde_mm_set_epi64x(INT64_C(-1132069192689462333), INT64_C(-3126474808030937011)) }, { 3201360662826502659, 2894150994676591563, simde_mm_set_epi64x(INT64_C( 3201360662826502659), INT64_C( 2894150994676591563)) }, { 5657213110111307867, 4054595932996548594, simde_mm_set_epi64x(INT64_C( 5657213110111307867), INT64_C( 4054595932996548594)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_set_epi64x(test_vec[i].e0, test_vec[i].e1); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_set1_epi8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int8_t a; const int8_t r[16]; } test_vec[] = { { -INT8_C( 57), { -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57) } }, { -INT8_C( 62), { -INT8_C( 62), -INT8_C( 62), -INT8_C( 62), -INT8_C( 62), -INT8_C( 62), -INT8_C( 62), -INT8_C( 62), -INT8_C( 62), -INT8_C( 62), -INT8_C( 62), -INT8_C( 62), -INT8_C( 62), -INT8_C( 62), -INT8_C( 62), -INT8_C( 62), -INT8_C( 62) } }, { -INT8_C( 94), { -INT8_C( 94), -INT8_C( 94), -INT8_C( 94), -INT8_C( 94), -INT8_C( 94), -INT8_C( 94), -INT8_C( 94), -INT8_C( 94), -INT8_C( 94), -INT8_C( 94), -INT8_C( 94), -INT8_C( 94), -INT8_C( 94), -INT8_C( 94), -INT8_C( 94), -INT8_C( 94) } }, { -INT8_C( 11), { -INT8_C( 11), -INT8_C( 11), -INT8_C( 11), -INT8_C( 11), -INT8_C( 11), -INT8_C( 11), -INT8_C( 11), -INT8_C( 11), -INT8_C( 11), -INT8_C( 11), -INT8_C( 11), -INT8_C( 11), -INT8_C( 11), -INT8_C( 11), -INT8_C( 11), -INT8_C( 11) } }, { -INT8_C( 57), { -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57) } }, { INT8_C( 73), { INT8_C( 73), INT8_C( 73), INT8_C( 73), INT8_C( 73), INT8_C( 73), INT8_C( 73), INT8_C( 73), INT8_C( 73), INT8_C( 73), INT8_C( 73), INT8_C( 73), INT8_C( 73), INT8_C( 73), INT8_C( 73), INT8_C( 73), INT8_C( 73) } }, { INT8_C( 60), { INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60) } }, { -INT8_C( 6), { -INT8_C( 6), -INT8_C( 6), -INT8_C( 6), -INT8_C( 6), -INT8_C( 6), -INT8_C( 6), -INT8_C( 6), -INT8_C( 6), -INT8_C( 6), -INT8_C( 6), -INT8_C( 6), -INT8_C( 6), -INT8_C( 6), -INT8_C( 6), -INT8_C( 6), -INT8_C( 6) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i r = simde_mm_set1_epi8(test_vec[i].a); simde_test_x86_assert_equal_i8x16(r, simde_x_mm_loadu_epi8(test_vec[i].r)); } return 0; } static int test_simde_mm_set1_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { int16_t a; simde__m128i r; } test_vec[8] = { { -22932, simde_mm_set_epi16(INT16_C(-22932), INT16_C(-22932), INT16_C(-22932), INT16_C(-22932), INT16_C(-22932), INT16_C(-22932), INT16_C(-22932), INT16_C(-22932)) }, { 23064, simde_mm_set_epi16(INT16_C( 23064), INT16_C( 23064), INT16_C( 23064), INT16_C( 23064), INT16_C( 23064), INT16_C( 23064), INT16_C( 23064), INT16_C( 23064)) }, { 29063, simde_mm_set_epi16(INT16_C( 29063), INT16_C( 29063), INT16_C( 29063), INT16_C( 29063), INT16_C( 29063), INT16_C( 29063), INT16_C( 29063), INT16_C( 29063)) }, { -6254, simde_mm_set_epi16(INT16_C( -6254), INT16_C( -6254), INT16_C( -6254), INT16_C( -6254), INT16_C( -6254), INT16_C( -6254), INT16_C( -6254), INT16_C( -6254)) }, { 23328, simde_mm_set_epi16(INT16_C( 23328), INT16_C( 23328), INT16_C( 23328), INT16_C( 23328), INT16_C( 23328), INT16_C( 23328), INT16_C( 23328), INT16_C( 23328)) }, { 12202, simde_mm_set_epi16(INT16_C( 12202), INT16_C( 12202), INT16_C( 12202), INT16_C( 12202), INT16_C( 12202), INT16_C( 12202), INT16_C( 12202), INT16_C( 12202)) }, { 26711, simde_mm_set_epi16(INT16_C( 26711), INT16_C( 26711), INT16_C( 26711), INT16_C( 26711), INT16_C( 26711), INT16_C( 26711), INT16_C( 26711), INT16_C( 26711)) }, { -9629, simde_mm_set_epi16(INT16_C( -9629), INT16_C( -9629), INT16_C( -9629), INT16_C( -9629), INT16_C( -9629), INT16_C( -9629), INT16_C( -9629), INT16_C( -9629)) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_set1_epi16(test_vec[i].a); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_set1_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a; const int32_t r[4]; } test_vec[] = { { -INT32_C( 1379277210), { -INT32_C( 1379277210), -INT32_C( 1379277210), -INT32_C( 1379277210), -INT32_C( 1379277210) } }, { INT32_C( 1628685468), { INT32_C( 1628685468), INT32_C( 1628685468), INT32_C( 1628685468), INT32_C( 1628685468) } }, { INT32_C( 1687738541), { INT32_C( 1687738541), INT32_C( 1687738541), INT32_C( 1687738541), INT32_C( 1687738541) } }, { INT32_C( 1891425133), { INT32_C( 1891425133), INT32_C( 1891425133), INT32_C( 1891425133), INT32_C( 1891425133) } }, { INT32_C( 1695660386), { INT32_C( 1695660386), INT32_C( 1695660386), INT32_C( 1695660386), INT32_C( 1695660386) } }, { INT32_C( 1846447439), { INT32_C( 1846447439), INT32_C( 1846447439), INT32_C( 1846447439), INT32_C( 1846447439) } }, { INT32_C( 958687000), { INT32_C( 958687000), INT32_C( 958687000), INT32_C( 958687000), INT32_C( 958687000) } }, { -INT32_C( 1238079408), { -INT32_C( 1238079408), -INT32_C( 1238079408), -INT32_C( 1238079408), -INT32_C( 1238079408) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i r = simde_mm_set1_epi32(test_vec[i].a); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_mm_set1_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde_float64 a; simde__m128d r; } test_vec[8] = { { SIMDE_FLOAT64_C( 922.45), simde_mm_set_pd(SIMDE_FLOAT64_C( 922.45), SIMDE_FLOAT64_C( 922.45)) }, { SIMDE_FLOAT64_C( -599.83), simde_mm_set_pd(SIMDE_FLOAT64_C( -599.83), SIMDE_FLOAT64_C( -599.83)) }, { SIMDE_FLOAT64_C( -398.06), simde_mm_set_pd(SIMDE_FLOAT64_C( -398.06), SIMDE_FLOAT64_C( -398.06)) }, { SIMDE_FLOAT64_C( 758.75), simde_mm_set_pd(SIMDE_FLOAT64_C( 758.75), SIMDE_FLOAT64_C( 758.75)) }, { SIMDE_FLOAT64_C( -273.82), simde_mm_set_pd(SIMDE_FLOAT64_C( -273.82), SIMDE_FLOAT64_C( -273.82)) }, { SIMDE_FLOAT64_C( -320.64), simde_mm_set_pd(SIMDE_FLOAT64_C( -320.64), SIMDE_FLOAT64_C( -320.64)) }, { SIMDE_FLOAT64_C( 627.18), simde_mm_set_pd(SIMDE_FLOAT64_C( 627.18), SIMDE_FLOAT64_C( 627.18)) }, { SIMDE_FLOAT64_C( 433.85), simde_mm_set_pd(SIMDE_FLOAT64_C( 433.85), SIMDE_FLOAT64_C( 433.85)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_set1_pd(test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_set_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { double e0; double e1; simde__m128d r; } test_vec[8] = { { (1062807988.00), 4166063422.00, simde_mm_set_pd(SIMDE_FLOAT64_C(1062807988.00), SIMDE_FLOAT64_C(4166063422.00)) }, { (4089462150.00), 3301875355.00, simde_mm_set_pd(SIMDE_FLOAT64_C(4089462150.00), SIMDE_FLOAT64_C(3301875355.00)) }, { (2961047618.00), 1310362259.00, simde_mm_set_pd(SIMDE_FLOAT64_C(2961047618.00), SIMDE_FLOAT64_C(1310362259.00)) }, { (491413403.00), 2980697460.00, simde_mm_set_pd(SIMDE_FLOAT64_C(491413403.00), SIMDE_FLOAT64_C(2980697460.00)) }, { (3027292014.00), 1034055676.00, simde_mm_set_pd(SIMDE_FLOAT64_C(3027292014.00), SIMDE_FLOAT64_C(1034055676.00)) }, { (133655993.00), 2416999239.00, simde_mm_set_pd(SIMDE_FLOAT64_C(133655993.00), SIMDE_FLOAT64_C(2416999239.00)) }, { (2396615078.00), 517112175.00, simde_mm_set_pd(SIMDE_FLOAT64_C(2396615078.00), SIMDE_FLOAT64_C(517112175.00)) }, { (628434760.00), 1544868779.00, simde_mm_set_pd(SIMDE_FLOAT64_C(628434760.00), SIMDE_FLOAT64_C(1544868779.00)) }, }; // printf("\n"); // for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { // double e0 = munit_rand_uint32() ; // double e1 = munit_rand_uint32() ; // simde__m128d_private r; // r = simde__m128d_to_private(simde_mm_set_pd(e0, e1)); // printf(" { (%*.2f), %*.2f,\n", 8, e0, 8, e1); // printf(" simde_mm_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)) },\n", 8, r.f64[1], 8, r.f64[0]); // } // return MUNIT_FAIL; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_set_pd(test_vec[i].e0, test_vec[i].e1); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_set_pd1(SIMDE_MUNIT_TEST_ARGS) { const struct { double a; simde__m128d r; } test_vec[8] = { { (983122077.00), simde_mm_set_pd(SIMDE_FLOAT64_C(983122077.00), SIMDE_FLOAT64_C(983122077.00)) }, { (2243688041.00), simde_mm_set_pd(SIMDE_FLOAT64_C(2243688041.00), SIMDE_FLOAT64_C(2243688041.00)) }, { (1259032742.00), simde_mm_set_pd(SIMDE_FLOAT64_C(1259032742.00), SIMDE_FLOAT64_C(1259032742.00)) }, { (945157531.00), simde_mm_set_pd(SIMDE_FLOAT64_C(945157531.00), SIMDE_FLOAT64_C(945157531.00)) }, { (2547177525.00), simde_mm_set_pd(SIMDE_FLOAT64_C(2547177525.00), SIMDE_FLOAT64_C(2547177525.00)) }, { (2112014239.00), simde_mm_set_pd(SIMDE_FLOAT64_C(2112014239.00), SIMDE_FLOAT64_C(2112014239.00)) }, { (1570949017.00), simde_mm_set_pd(SIMDE_FLOAT64_C(1570949017.00), SIMDE_FLOAT64_C(1570949017.00)) }, { (1215464208.00), simde_mm_set_pd(SIMDE_FLOAT64_C(1215464208.00), SIMDE_FLOAT64_C(1215464208.00)) }, }; // printf("\n"); // for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { // double a = munit_rand_uint32() ; // simde__m128d_private r; // r = simde__m128d_to_private(simde_mm_set_pd1(a)); // printf(" { (%*.2f),\n", 8, a); // printf(" simde_mm_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)) },\n", 8, r.f64[1], 8, r.f64[0]); // } // return MUNIT_FAIL; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_set_pd1(test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_set_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde_float64 a; simde__m128d r; } test_vec[8] = { { SIMDE_FLOAT64_C( -222.00), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -222.00)) }, { SIMDE_FLOAT64_C( 804.62), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 804.62)) }, { SIMDE_FLOAT64_C( 845.92), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 845.92)) }, { SIMDE_FLOAT64_C( 892.20), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 892.20)) }, { SIMDE_FLOAT64_C( 233.47), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 233.47)) }, { SIMDE_FLOAT64_C( -916.51), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -916.51)) }, { SIMDE_FLOAT64_C( -0.11), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -0.11)) }, { SIMDE_FLOAT64_C( -843.72), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -843.72)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_set_sd(test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_set1_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_pi64(INT64_C(5509445936599134262)), simde_mm_set_epi64x(INT64_C(5509445936599134262), INT64_C(5509445936599134262)) }, { simde_x_mm_set_pi64(INT64_C(6533321325309895597)), simde_mm_set_epi64x(INT64_C(6533321325309895597), INT64_C(6533321325309895597)) }, { simde_x_mm_set_pi64(INT64_C(8570268616515205604)), simde_mm_set_epi64x(INT64_C(8570268616515205604), INT64_C(8570268616515205604)) }, { simde_x_mm_set_pi64(INT64_C(6893954556242409981)), simde_mm_set_epi64x(INT64_C(6893954556242409981), INT64_C(6893954556242409981)) }, { simde_x_mm_set_pi64(INT64_C( 479685313418970755)), simde_mm_set_epi64x(INT64_C( 479685313418970755), INT64_C( 479685313418970755)) }, { simde_x_mm_set_pi64(INT64_C(1310625044422752521)), simde_mm_set_epi64x(INT64_C(1310625044422752521), INT64_C(1310625044422752521)) }, { simde_x_mm_set_pi64(INT64_C(-9181800088333422881)), simde_mm_set_epi64x(INT64_C(-9181800088333422881), INT64_C(-9181800088333422881)) }, { simde_x_mm_set_pi64(INT64_C(-4247659939651135559)), simde_mm_set_epi64x(INT64_C(-4247659939651135559), INT64_C(-4247659939651135559)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_set1_epi64(test_vec[i].a); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_set1_epi64x(SIMDE_MUNIT_TEST_ARGS) { const struct { int64_t a; simde__m128i r; } test_vec[8] = { { INT64_C(-7342192307236287075), simde_mm_set_epi64x(INT64_C(-7342192307236287075), INT64_C(-7342192307236287075)) }, { INT64_C(-8079223173243549940), simde_mm_set_epi64x(INT64_C(-8079223173243549940), INT64_C(-8079223173243549940)) }, { INT64_C(8128959178680760661), simde_mm_set_epi64x(INT64_C(8128959178680760661), INT64_C(8128959178680760661)) }, { INT64_C(6271233176655491948), simde_mm_set_epi64x(INT64_C(6271233176655491948), INT64_C(6271233176655491948)) }, { INT64_C(3474926301195230116), simde_mm_set_epi64x(INT64_C(3474926301195230116), INT64_C(3474926301195230116)) }, { INT64_C(-5217363481586450008), simde_mm_set_epi64x(INT64_C(-5217363481586450008), INT64_C(-5217363481586450008)) }, { INT64_C(-7156667910834929798), simde_mm_set_epi64x(INT64_C(-7156667910834929798), INT64_C(-7156667910834929798)) }, { INT64_C(8467790055770652882), simde_mm_set_epi64x(INT64_C(8467790055770652882), INT64_C(8467790055770652882)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_set1_epi64x(test_vec[i].a); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_setr_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { int8_t a[16]; simde__m128i r; } test_vec[8] = { { { -117, 101, -68, -84, 57, -16, 14, 112, -4, -62, 47, -17, 21, 25, -74, 93 }, simde_mm_set_epi8(INT8_C( 93), INT8_C( -74), INT8_C( 25), INT8_C( 21), INT8_C( -17), INT8_C( 47), INT8_C( -62), INT8_C( -4), INT8_C( 112), INT8_C( 14), INT8_C( -16), INT8_C( 57), INT8_C( -84), INT8_C( -68), INT8_C( 101), INT8_C(-117)) }, { { -121, -99, 93, 75, -45, 61, -29, 21, 43, -55, -114, 120, 9, -25, 107, 106 }, simde_mm_set_epi8(INT8_C( 106), INT8_C( 107), INT8_C( -25), INT8_C( 9), INT8_C( 120), INT8_C(-114), INT8_C( -55), INT8_C( 43), INT8_C( 21), INT8_C( -29), INT8_C( 61), INT8_C( -45), INT8_C( 75), INT8_C( 93), INT8_C( -99), INT8_C(-121)) }, { { 17, 120, 33, -15, -38, -48, 75, -19, 105, -73, -87, 91, 57, 125, 70, 11 }, simde_mm_set_epi8(INT8_C( 11), INT8_C( 70), INT8_C( 125), INT8_C( 57), INT8_C( 91), INT8_C( -87), INT8_C( -73), INT8_C( 105), INT8_C( -19), INT8_C( 75), INT8_C( -48), INT8_C( -38), INT8_C( -15), INT8_C( 33), INT8_C( 120), INT8_C( 17)) }, { { 56, -40, 93, 54, 0, -115, -62, 6, 10, -58, -12, 31, -96, 67, 12, 19 }, simde_mm_set_epi8(INT8_C( 19), INT8_C( 12), INT8_C( 67), INT8_C( -96), INT8_C( 31), INT8_C( -12), INT8_C( -58), INT8_C( 10), INT8_C( 6), INT8_C( -62), INT8_C(-115), INT8_C( 0), INT8_C( 54), INT8_C( 93), INT8_C( -40), INT8_C( 56)) }, { { 37, -21, 96, -83, 46, -81, -51, -14, 127, 26, -91, -48, 45, -55, -111, 109 }, simde_mm_set_epi8(INT8_C( 109), INT8_C(-111), INT8_C( -55), INT8_C( 45), INT8_C( -48), INT8_C( -91), INT8_C( 26), INT8_C( 127), INT8_C( -14), INT8_C( -51), INT8_C( -81), INT8_C( 46), INT8_C( -83), INT8_C( 96), INT8_C( -21), INT8_C( 37)) }, { { -77, 43, 114, -94, -36, -86, -18, 18, 14, -4, 99, 78, 44, 70, 105, -91 }, simde_mm_set_epi8(INT8_C( -91), INT8_C( 105), INT8_C( 70), INT8_C( 44), INT8_C( 78), INT8_C( 99), INT8_C( -4), INT8_C( 14), INT8_C( 18), INT8_C( -18), INT8_C( -86), INT8_C( -36), INT8_C( -94), INT8_C( 114), INT8_C( 43), INT8_C( -77)) }, { { 125, -73, -25, -106, -9, 112, -96, 59, 61, -50, 73, -71, 13, 0, -64, -15 }, simde_mm_set_epi8(INT8_C( -15), INT8_C( -64), INT8_C( 0), INT8_C( 13), INT8_C( -71), INT8_C( 73), INT8_C( -50), INT8_C( 61), INT8_C( 59), INT8_C( -96), INT8_C( 112), INT8_C( -9), INT8_C(-106), INT8_C( -25), INT8_C( -73), INT8_C( 125)) }, { { 76, 81, -62, 21, -3, 99, -61, 126, -15, -95, 99, -34, 78, 36, 56, -38 }, simde_mm_set_epi8(INT8_C( -38), INT8_C( 56), INT8_C( 36), INT8_C( 78), INT8_C( -34), INT8_C( 99), INT8_C( -95), INT8_C( -15), INT8_C( 126), INT8_C( -61), INT8_C( 99), INT8_C( -3), INT8_C( 21), INT8_C( -62), INT8_C( 81), INT8_C( 76)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_setr_epi8(test_vec[i].a[ 0], test_vec[i].a[ 1], test_vec[i].a[ 2], test_vec[i].a[ 3], test_vec[i].a[ 4], test_vec[i].a[ 5], test_vec[i].a[ 6], test_vec[i].a[ 7], test_vec[i].a[ 8], test_vec[i].a[ 9], test_vec[i].a[10], test_vec[i].a[11], test_vec[i].a[12], test_vec[i].a[13], test_vec[i].a[14], test_vec[i].a[15]); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_setr_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { int16_t a[8]; simde__m128i r; } test_vec[8] = { { { -10562, -1563, 3119, 8148, -20473, 28066, 19911, 32415 }, simde_mm_set_epi16(INT16_C( 32415), INT16_C( 19911), INT16_C( 28066), INT16_C(-20473), INT16_C( 8148), INT16_C( 3119), INT16_C( -1563), INT16_C(-10562)) }, { { -5842, -19524, 19809, -4522, -18693, -13515, 10296, -11468 }, simde_mm_set_epi16(INT16_C(-11468), INT16_C( 10296), INT16_C(-13515), INT16_C(-18693), INT16_C( -4522), INT16_C( 19809), INT16_C(-19524), INT16_C( -5842)) }, { { 21973, -10968, -22468, 4564, 15035, 4920, 15286, 10966 }, simde_mm_set_epi16(INT16_C( 10966), INT16_C( 15286), INT16_C( 4920), INT16_C( 15035), INT16_C( 4564), INT16_C(-22468), INT16_C(-10968), INT16_C( 21973)) }, { { -30861, 17137, 12124, 23736, -1854, 30822, -26631, 14095 }, simde_mm_set_epi16(INT16_C( 14095), INT16_C(-26631), INT16_C( 30822), INT16_C( -1854), INT16_C( 23736), INT16_C( 12124), INT16_C( 17137), INT16_C(-30861)) }, { { -8301, -14416, -32194, -4341, 1212, 26290, -16654, -9801 }, simde_mm_set_epi16(INT16_C( -9801), INT16_C(-16654), INT16_C( 26290), INT16_C( 1212), INT16_C( -4341), INT16_C(-32194), INT16_C(-14416), INT16_C( -8301)) }, { { -5842, 17831, 171, 10031, 7446, 23430, -5408, -23387 }, simde_mm_set_epi16(INT16_C(-23387), INT16_C( -5408), INT16_C( 23430), INT16_C( 7446), INT16_C( 10031), INT16_C( 171), INT16_C( 17831), INT16_C( -5842)) }, { { 3343, -24774, -5050, 25934, -13848, 27661, 13484, -5817 }, simde_mm_set_epi16(INT16_C( -5817), INT16_C( 13484), INT16_C( 27661), INT16_C(-13848), INT16_C( 25934), INT16_C( -5050), INT16_C(-24774), INT16_C( 3343)) }, { { 27516, -24147, -18268, 10553, 12061, -22335, 29977, -25416 }, simde_mm_set_epi16(INT16_C(-25416), INT16_C( 29977), INT16_C(-22335), INT16_C( 12061), INT16_C( 10553), INT16_C(-18268), INT16_C(-24147), INT16_C( 27516)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_setr_epi16(test_vec[i].a[ 0], test_vec[i].a[ 1], test_vec[i].a[ 2], test_vec[i].a[ 3], test_vec[i].a[ 4], test_vec[i].a[ 5], test_vec[i].a[ 6], test_vec[i].a[ 7]); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_setr_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { int32_t a[4]; simde__m128i r; } test_vec[8] = { { { INT32_C( 576930619), INT32_C(-1056617076), INT32_C( 1391020156), INT32_C( -119436850) }, simde_mm_set_epi32(INT32_C( -119436850), INT32_C( 1391020156), INT32_C(-1056617076), INT32_C( 576930619)) }, { { INT32_C(-2038323421), INT32_C(-1916700674), INT32_C( 1438851519), INT32_C( 1990196695) }, simde_mm_set_epi32(INT32_C( 1990196695), INT32_C( 1438851519), INT32_C(-1916700674), INT32_C(-2038323421)) }, { { INT32_C( 1146758814), INT32_C( 625179194), INT32_C(-1226824864), INT32_C(-1523319395) }, simde_mm_set_epi32(INT32_C(-1523319395), INT32_C(-1226824864), INT32_C( 625179194), INT32_C( 1146758814)) }, { { INT32_C( -276839793), INT32_C( 1178530072), INT32_C(-1956542830), INT32_C( -556652843) }, simde_mm_set_epi32(INT32_C( -556652843), INT32_C(-1956542830), INT32_C( 1178530072), INT32_C( -276839793)) }, { { INT32_C(-1720519476), INT32_C( 147115658), INT32_C( 736217848), INT32_C(-1149123643) }, simde_mm_set_epi32(INT32_C(-1149123643), INT32_C( 736217848), INT32_C( 147115658), INT32_C(-1720519476)) }, { { INT32_C( 1888725856), INT32_C( -696349459), INT32_C(-1872984731), INT32_C( 1198325431) }, simde_mm_set_epi32(INT32_C( 1198325431), INT32_C(-1872984731), INT32_C( -696349459), INT32_C( 1888725856)) }, { { INT32_C( 1999809110), INT32_C( -469856594), INT32_C(-1721902839), INT32_C(-1910021155) }, simde_mm_set_epi32(INT32_C(-1910021155), INT32_C(-1721902839), INT32_C( -469856594), INT32_C( 1999809110)) }, { { INT32_C( 57396463), INT32_C(-1219624618), INT32_C( -492678555), INT32_C(-1751286944) }, simde_mm_set_epi32(INT32_C(-1751286944), INT32_C( -492678555), INT32_C(-1219624618), INT32_C( 57396463)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_setr_epi32(test_vec[i].a[ 0], test_vec[i].a[ 1], test_vec[i].a[ 2], test_vec[i].a[ 3]); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_setr_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 e1; simde__m64 e0; simde__m128i r; } test_vec[8] = { { simde_mm_cvtsi64_m64(INT64_C(-4101257248168872649)), simde_mm_cvtsi64_m64(INT64_C(-2723834683478465794)), simde_mm_set_epi64x(INT64_C(-2723834683478465794), INT64_C(-4101257248168872649)) }, { simde_mm_cvtsi64_m64(INT64_C(-2051996013747413745)), simde_mm_cvtsi64_m64(INT64_C(-3184937756541660331)), simde_mm_set_epi64x(INT64_C(-3184937756541660331), INT64_C(-2051996013747413745)) }, { simde_mm_cvtsi64_m64(INT64_C(-1223296052051875883)), simde_mm_cvtsi64_m64(INT64_C( 3027248353112135930)), simde_mm_set_epi64x(INT64_C( 3027248353112135930), INT64_C(-1223296052051875883)) }, { simde_mm_cvtsi64_m64(INT64_C(-8279962275226206621)), simde_mm_cvtsi64_m64(INT64_C(-2814925648380381958)), simde_mm_set_epi64x(INT64_C(-2814925648380381958), INT64_C(-8279962275226206621)) }, { simde_mm_cvtsi64_m64(INT64_C( 6755033167475904984)), simde_mm_cvtsi64_m64(INT64_C(-8685825248847164354)), simde_mm_set_epi64x(INT64_C(-8685825248847164354), INT64_C( 6755033167475904984)) }, { simde_mm_cvtsi64_m64(INT64_C( 1859833649283237251)), simde_mm_cvtsi64_m64(INT64_C( 4744285272371342192)), simde_mm_set_epi64x(INT64_C( 4744285272371342192), INT64_C( 1859833649283237251)) }, { simde_mm_cvtsi64_m64(INT64_C(-2932310525767688549)), simde_mm_cvtsi64_m64(INT64_C(-5821145293930307405)), simde_mm_set_epi64x(INT64_C(-5821145293930307405), INT64_C(-2932310525767688549)) }, { simde_mm_cvtsi64_m64(INT64_C( 6748921357249852483)), simde_mm_cvtsi64_m64(INT64_C(-4633625703225321444)), simde_mm_set_epi64x(INT64_C(-4633625703225321444), INT64_C( 6748921357249852483)) }, }; // printf("\n"); // for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { // simde__m64_private a, b; // simde__m128i_private r; // munit_rand_memory(sizeof(a), (uint8_t*) &a); // munit_rand_memory(sizeof(b), (uint8_t*) &b); // r = simde__m128i_to_private(simde_mm_setr_epi64(simde__m64_from_private(a), simde__m64_from_private(b))); // printf(" { simde_mm_cvtsi64_m64(INT64_C(%20" PRId64 ")),\n", a.i64[0]); // printf(" simde_mm_cvtsi64_m64(INT64_C(%20" PRId64 ")),\n", b.i64[0]); // printf(" simde_mm_set_epi64x(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")) },\n", r.i64[1], r.i64[0]); // } // return MUNIT_FAIL; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_setr_epi64(test_vec[i].e1, test_vec[i].e0); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_setzero_pd(SIMDE_MUNIT_TEST_ARGS) { simde__m128d a = simde_mm_set1_pd(0); simde__m128d r = simde_mm_setzero_pd(); simde_assert_m128d_equal(a, r); return 0; } static int test_simde_mm_setzero_si128(SIMDE_MUNIT_TEST_ARGS) { simde__m128i a = simde_mm_set1_epi32(0); simde__m128i r = simde_mm_setzero_si128(); simde_assert_m128i_i32(a, ==, r); return 0; } static int test_simde_mm_shuffle_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( -749480461), INT32_C(-1872761030), INT32_C( 1690143325), INT32_C( -258848374)), simde_mm_set_epi32(INT32_C(-1872761030), INT32_C(-1872761030), INT32_C(-1872761030), INT32_C(-1872761030)) }, { simde_mm_set_epi32(INT32_C( 1030695986), INT32_C( 1932252260), INT32_C( 1962976759), INT32_C(-1621624916)), simde_mm_set_epi32(INT32_C( 1932252260), INT32_C( 1932252260), INT32_C( 1932252260), INT32_C( 1932252260)) }, { simde_mm_set_epi32(INT32_C( -897180326), INT32_C( 1675136548), INT32_C( 1746269378), INT32_C( 1984702409)), simde_mm_set_epi32(INT32_C( 1675136548), INT32_C( 1675136548), INT32_C( 1675136548), INT32_C( 1675136548)) }, { simde_mm_set_epi32(INT32_C( -11612835), INT32_C(-1878653813), INT32_C(-2135957543), INT32_C( -134555953)), simde_mm_set_epi32(INT32_C(-1878653813), INT32_C(-1878653813), INT32_C(-1878653813), INT32_C(-1878653813)) }, { simde_mm_set_epi32(INT32_C( 1051337342), INT32_C( 755742115), INT32_C( 338927136), INT32_C( 1410014436)), simde_mm_set_epi32(INT32_C( 755742115), INT32_C( 755742115), INT32_C( 755742115), INT32_C( 755742115)) }, { simde_mm_set_epi32(INT32_C(-1826960183), INT32_C( -119444047), INT32_C(-1224980361), INT32_C( 1323381864)), simde_mm_set_epi32(INT32_C( -119444047), INT32_C( -119444047), INT32_C( -119444047), INT32_C( -119444047)) }, { simde_mm_set_epi32(INT32_C( 1256541920), INT32_C( 1446192699), INT32_C( -117794523), INT32_C(-1904270778)), simde_mm_set_epi32(INT32_C( 1446192699), INT32_C( 1446192699), INT32_C( 1446192699), INT32_C( 1446192699)) }, { simde_mm_set_epi32(INT32_C( 542509546), INT32_C(-1970305999), INT32_C(-1492486994), INT32_C( 1078541043)), simde_mm_set_epi32(INT32_C(-1970305999), INT32_C(-1970305999), INT32_C(-1970305999), INT32_C(-1970305999)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_shuffle_epi32(test_vec[i].a, 0xaa); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_setr_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { double e1; double e0; simde__m128d r; } test_vec[8] = { { 0.74, 0.57, simde_mm_set_pd(SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( 0.74)) }, { 0.52, 0.66, simde_mm_set_pd(SIMDE_FLOAT64_C( 0.66), SIMDE_FLOAT64_C( 0.52)) }, { 0.54, 0.56, simde_mm_set_pd(SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( 0.54)) }, { 0.95, 0.43, simde_mm_set_pd(SIMDE_FLOAT64_C( 0.43), SIMDE_FLOAT64_C( 0.95)) }, { 0.53, 0.46, simde_mm_set_pd(SIMDE_FLOAT64_C( 0.46), SIMDE_FLOAT64_C( 0.53)) }, { 0.33, 0.39, simde_mm_set_pd(SIMDE_FLOAT64_C( 0.39), SIMDE_FLOAT64_C( 0.33)) }, { 0.48, 0.63, simde_mm_set_pd(SIMDE_FLOAT64_C( 0.63), SIMDE_FLOAT64_C( 0.48)) }, { 0.08, 0.44, simde_mm_set_pd(SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( 0.08)) }, }; // printf("\n"); // for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) { // double e1 = munit_rand_double(); // double e0 = munit_rand_double(); // simde__m128d_private r; // r = simde__m128d_to_private(simde_mm_setr_pd(e1, e0)); // printf(" { %*.2f, %*.2f,\n", 8, e1 , 8, e0); // printf(" simde_mm_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)) },\n", 8, r.f64[1], 8, r.f64[0]); // } // return MUNIT_FAIL; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_setr_pd(test_vec[i].e1, test_vec[i].e0); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_shuffle_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 897.05), SIMDE_FLOAT64_C( 524.15)), simde_mm_set_pd(SIMDE_FLOAT64_C( -346.39), SIMDE_FLOAT64_C( -595.93)), simde_mm_set_pd(SIMDE_FLOAT64_C( -595.93), SIMDE_FLOAT64_C( 524.15)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -684.88), SIMDE_FLOAT64_C( 62.45)), simde_mm_set_pd(SIMDE_FLOAT64_C( 765.70), SIMDE_FLOAT64_C( -126.52)), simde_mm_set_pd(SIMDE_FLOAT64_C( -126.52), SIMDE_FLOAT64_C( 62.45)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -871.69), SIMDE_FLOAT64_C( -753.55)), simde_mm_set_pd(SIMDE_FLOAT64_C( -923.31), SIMDE_FLOAT64_C( -103.97)), simde_mm_set_pd(SIMDE_FLOAT64_C( -103.97), SIMDE_FLOAT64_C( -753.55)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -377.03), SIMDE_FLOAT64_C( 701.23)), simde_mm_set_pd(SIMDE_FLOAT64_C( -672.47), SIMDE_FLOAT64_C( -328.63)), simde_mm_set_pd(SIMDE_FLOAT64_C( -328.63), SIMDE_FLOAT64_C( 701.23)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 238.70), SIMDE_FLOAT64_C( 837.56)), simde_mm_set_pd(SIMDE_FLOAT64_C( -429.19), SIMDE_FLOAT64_C( 106.67)), simde_mm_set_pd(SIMDE_FLOAT64_C( 106.67), SIMDE_FLOAT64_C( 837.56)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 571.83), SIMDE_FLOAT64_C( -389.51)), simde_mm_set_pd(SIMDE_FLOAT64_C( 447.48), SIMDE_FLOAT64_C( -8.02)), simde_mm_set_pd(SIMDE_FLOAT64_C( -8.02), SIMDE_FLOAT64_C( -389.51)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -214.27), SIMDE_FLOAT64_C( 549.07)), simde_mm_set_pd(SIMDE_FLOAT64_C( -967.02), SIMDE_FLOAT64_C( -162.29)), simde_mm_set_pd(SIMDE_FLOAT64_C( -162.29), SIMDE_FLOAT64_C( 549.07)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -528.33), SIMDE_FLOAT64_C( 376.34)), simde_mm_set_pd(SIMDE_FLOAT64_C( -959.95), SIMDE_FLOAT64_C( -855.93)), simde_mm_set_pd(SIMDE_FLOAT64_C( -855.93), SIMDE_FLOAT64_C( 376.34)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_shuffle_pd(test_vec[i].a, test_vec[i].b, 0); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_shufflehi_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C( 3588), INT16_C(-23598), INT16_C( -2669), INT16_C( -7880), INT16_C( 20391), INT16_C( 13327), INT16_C( 18868), INT16_C( 31239)), simde_mm_set_epi16(INT16_C( 3588), INT16_C( 3588), INT16_C( -2669), INT16_C( -2669), INT16_C( 20391), INT16_C( 13327), INT16_C( 18868), INT16_C( 31239)) }, { simde_mm_set_epi16(INT16_C( 5701), INT16_C( 15357), INT16_C( 27973), INT16_C(-26447), INT16_C(-18797), INT16_C(-27249), INT16_C( -9707), INT16_C( -1950)), simde_mm_set_epi16(INT16_C( 5701), INT16_C( 5701), INT16_C( 27973), INT16_C( 27973), INT16_C(-18797), INT16_C(-27249), INT16_C( -9707), INT16_C( -1950)) }, { simde_mm_set_epi16(INT16_C(-14544), INT16_C( 26887), INT16_C( -7591), INT16_C( 22567), INT16_C( -8366), INT16_C(-11381), INT16_C( 1736), INT16_C(-23069)), simde_mm_set_epi16(INT16_C(-14544), INT16_C(-14544), INT16_C( -7591), INT16_C( -7591), INT16_C( -8366), INT16_C(-11381), INT16_C( 1736), INT16_C(-23069)) }, { simde_mm_set_epi16(INT16_C( 31637), INT16_C( 12965), INT16_C(-23234), INT16_C(-12784), INT16_C( 364), INT16_C( 7338), INT16_C( 16998), INT16_C(-14384)), simde_mm_set_epi16(INT16_C( 31637), INT16_C( 31637), INT16_C(-23234), INT16_C(-23234), INT16_C( 364), INT16_C( 7338), INT16_C( 16998), INT16_C(-14384)) }, { simde_mm_set_epi16(INT16_C( 20104), INT16_C(-31033), INT16_C( 12782), INT16_C( -8281), INT16_C( 17249), INT16_C( -1757), INT16_C(-22510), INT16_C(-23902)), simde_mm_set_epi16(INT16_C( 20104), INT16_C( 20104), INT16_C( 12782), INT16_C( 12782), INT16_C( 17249), INT16_C( -1757), INT16_C(-22510), INT16_C(-23902)) }, { simde_mm_set_epi16(INT16_C( 28403), INT16_C(-26721), INT16_C( -6834), INT16_C(-28104), INT16_C( -6404), INT16_C( -5723), INT16_C(-30154), INT16_C( -4442)), simde_mm_set_epi16(INT16_C( 28403), INT16_C( 28403), INT16_C( -6834), INT16_C( -6834), INT16_C( -6404), INT16_C( -5723), INT16_C(-30154), INT16_C( -4442)) }, { simde_mm_set_epi16(INT16_C( 18671), INT16_C( -6207), INT16_C( 14078), INT16_C(-30976), INT16_C(-25644), INT16_C(-24126), INT16_C( 10939), INT16_C(-13801)), simde_mm_set_epi16(INT16_C( 18671), INT16_C( 18671), INT16_C( 14078), INT16_C( 14078), INT16_C(-25644), INT16_C(-24126), INT16_C( 10939), INT16_C(-13801)) }, { simde_mm_set_epi16(INT16_C(-28546), INT16_C( 12696), INT16_C(-10401), INT16_C( -8517), INT16_C( 29702), INT16_C(-10694), INT16_C( 25940), INT16_C( 28112)), simde_mm_set_epi16(INT16_C(-28546), INT16_C(-28546), INT16_C(-10401), INT16_C(-10401), INT16_C( 29702), INT16_C(-10694), INT16_C( 25940), INT16_C( 28112)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_shufflehi_epi16(test_vec[i].a, 245); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_shufflelo_epi16(SIMDE_MUNIT_TEST_ARGS) { simde__m128i a, r, e; a = simde_mm_set_epi16(INT16_C(-24821), INT16_C(-30256), INT16_C( 8570), INT16_C( 11360), INT16_C(-20759), INT16_C(-23279), INT16_C( 9158), INT16_C( -6205)); e = simde_mm_set_epi16(INT16_C(-24821), INT16_C(-30256), INT16_C( 8570), INT16_C( 11360), INT16_C( -6205), INT16_C(-23279), INT16_C( 9158), INT16_C(-20759)); r = simde_mm_shufflelo_epi16(a, 39); simde_assert_m128i_i16(r, ==, e); a = simde_mm_set_epi16(INT16_C(-26644), INT16_C( -8695), INT16_C( -9741), INT16_C(-14158), INT16_C( -3323), INT16_C( 7181), INT16_C( 10186), INT16_C(-16906)); e = simde_mm_set_epi16(INT16_C(-26644), INT16_C( -8695), INT16_C( -9741), INT16_C(-14158), INT16_C( 7181), INT16_C(-16906), INT16_C( 10186), INT16_C( -3323)); r = simde_mm_shufflelo_epi16(a, 135); simde_assert_m128i_i16(r, ==, e); a = simde_mm_set_epi16(INT16_C(-20225), INT16_C( 19920), INT16_C( -3607), INT16_C( 11889), INT16_C( 12271), INT16_C(-20589), INT16_C( 17338), INT16_C( -7507)); e = simde_mm_set_epi16(INT16_C(-20225), INT16_C( 19920), INT16_C( -3607), INT16_C( 11889), INT16_C( 12271), INT16_C( 17338), INT16_C( 12271), INT16_C( -7507)); r = simde_mm_shufflelo_epi16(a, 220); simde_assert_m128i_i16(r, ==, e); a = simde_mm_set_epi16(INT16_C( -8042), INT16_C(-18261), INT16_C( 20990), INT16_C(-18752), INT16_C( 26566), INT16_C(-27202), INT16_C( -3939), INT16_C( -1274)); e = simde_mm_set_epi16(INT16_C( -8042), INT16_C(-18261), INT16_C( 20990), INT16_C(-18752), INT16_C( -3939), INT16_C( 26566), INT16_C( -3939), INT16_C( -1274)); r = simde_mm_shufflelo_epi16(a, 116); simde_assert_m128i_i16(r, ==, e); a = simde_mm_set_epi16(INT16_C( 5383), INT16_C(-27918), INT16_C( 16559), INT16_C(-31608), INT16_C( 6504), INT16_C(-11225), INT16_C(-13396), INT16_C( 20261)); e = simde_mm_set_epi16(INT16_C( 5383), INT16_C(-27918), INT16_C( 16559), INT16_C(-31608), INT16_C(-13396), INT16_C(-13396), INT16_C(-13396), INT16_C(-13396)); r = simde_mm_shufflelo_epi16(a, 85); simde_assert_m128i_i16(r, ==, e); a = simde_mm_set_epi16(INT16_C( -8905), INT16_C( 30480), INT16_C( 20250), INT16_C( 30), INT16_C( 24188), INT16_C( 21861), INT16_C( -9955), INT16_C( 6282)); e = simde_mm_set_epi16(INT16_C( -8905), INT16_C( 30480), INT16_C( 20250), INT16_C( 30), INT16_C( 6282), INT16_C( 24188), INT16_C( 21861), INT16_C( 21861)); r = simde_mm_shufflelo_epi16(a, 58); simde_assert_m128i_i16(r, ==, e); a = simde_mm_set_epi16(INT16_C( 7654), INT16_C( 4685), INT16_C( 25749), INT16_C(-30088), INT16_C( -7783), INT16_C( 10182), INT16_C( 23640), INT16_C( 4937)); e = simde_mm_set_epi16(INT16_C( 7654), INT16_C( 4685), INT16_C( 25749), INT16_C(-30088), INT16_C( -7783), INT16_C( 4937), INT16_C( 23640), INT16_C( 4937)); r = simde_mm_shufflelo_epi16(a, 196); simde_assert_m128i_i16(r, ==, e); a = simde_mm_set_epi16(INT16_C(-26752), INT16_C( 9125), INT16_C(-14825), INT16_C( 13732), INT16_C( 15859), INT16_C(-32053), INT16_C(-12419), INT16_C( 17722)); e = simde_mm_set_epi16(INT16_C(-26752), INT16_C( 9125), INT16_C(-14825), INT16_C( 13732), INT16_C( 15859), INT16_C( 17722), INT16_C(-12419), INT16_C(-32053)); r = simde_mm_shufflelo_epi16(a, 198); simde_assert_m128i_i16(r, ==, e); return 0; } static int test_simde_mm_sra_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i count; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C( 28258), INT16_C( 1159), INT16_C( 20634), INT16_C(-30158), INT16_C( 10049), INT16_C(-31721), INT16_C(-26691), INT16_C(-28181)), simde_mm_set_epi16(INT16_C( 11), INT16_C( 6), INT16_C( 10), INT16_C( 8), INT16_C( 15), INT16_C( 3), INT16_C( 8), INT16_C( 1)), simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1)) }, { simde_mm_set_epi16(INT16_C(-25682), INT16_C( 7964), INT16_C( 1259), INT16_C( 18017), INT16_C( 10765), INT16_C(-10649), INT16_C( -9400), INT16_C( 12110)), simde_mm_set_epi16(INT16_C( 3), INT16_C( 8), INT16_C( 3), INT16_C( 2), INT16_C( 10), INT16_C( 5), INT16_C( 2), INT16_C( 2)), simde_mm_set_epi16(INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0)) }, { simde_mm_set_epi16(INT16_C(-24685), INT16_C( 14370), INT16_C( 13079), INT16_C( -6409), INT16_C(-18776), INT16_C( 20941), INT16_C( 22692), INT16_C( 312)), simde_mm_set_epi16(INT16_C( 13), INT16_C( 13), INT16_C( 0), INT16_C( 7), INT16_C( 4), INT16_C( 10), INT16_C( 15), INT16_C( 11)), simde_mm_set_epi16(INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_epi16(INT16_C( 13442), INT16_C(-32489), INT16_C(-21378), INT16_C( 10156), INT16_C( 15393), INT16_C( 20131), INT16_C( 15138), INT16_C(-12589)), simde_mm_set_epi16(INT16_C( 1), INT16_C( 3), INT16_C( 1), INT16_C( 8), INT16_C( 12), INT16_C( 3), INT16_C( 4), INT16_C( 2)), simde_mm_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1)) }, { simde_mm_set_epi16(INT16_C( -9561), INT16_C( 25554), INT16_C( -5305), INT16_C( -7173), INT16_C(-10064), INT16_C( 31075), INT16_C( 30218), INT16_C(-18929)), simde_mm_set_epi16(INT16_C( 5), INT16_C( 10), INT16_C( 8), INT16_C( 1), INT16_C( 12), INT16_C( 3), INT16_C( 10), INT16_C( 10)), simde_mm_set_epi16(INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1)) }, { simde_mm_set_epi16(INT16_C( 14091), INT16_C( 24202), INT16_C( -8543), INT16_C( -7482), INT16_C(-26143), INT16_C( 20277), INT16_C(-27984), INT16_C(-32658)), simde_mm_set_epi16(INT16_C( 5), INT16_C( 7), INT16_C( 6), INT16_C( 6), INT16_C( 10), INT16_C( 3), INT16_C( 7), INT16_C( 11)), simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1)) }, { simde_mm_set_epi16(INT16_C(-19726), INT16_C( 12311), INT16_C( 16279), INT16_C( -6277), INT16_C( 19874), INT16_C(-27089), INT16_C( 14524), INT16_C(-14305)), simde_mm_set_epi16(INT16_C( 7), INT16_C( 7), INT16_C( 3), INT16_C( 1), INT16_C( 12), INT16_C( 1), INT16_C( 10), INT16_C( 9)), simde_mm_set_epi16(INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1)) }, { simde_mm_set_epi16(INT16_C( 19801), INT16_C(-12786), INT16_C( 31632), INT16_C( 19030), INT16_C(-19420), INT16_C(-12406), INT16_C( 12426), INT16_C( 27612)), simde_mm_set_epi16(INT16_C( 2), INT16_C( 0), INT16_C( 10), INT16_C( 3), INT16_C( 9), INT16_C( 0), INT16_C( 1), INT16_C( 8)), simde_mm_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_sra_epi16(test_vec[i].a, test_vec[i].count); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_sll_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i count; simde__m128i r; } test_vec[] = { { simde_mm_set_epi16(INT16_C(-11777), INT16_C( 26803), INT16_C(-29366), INT16_C(-28135), INT16_C( 26578), INT16_C(-22566), INT16_C(-18521), INT16_C( -1087)), simde_mm_set_epi64x(INT64_C(-1766274549416496901), ~INT64_C(0)), simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_epi16(INT16_C( 20694), INT16_C(-29451), INT16_C(-14684), INT16_C( 26977), INT16_C( 9617), INT16_C( 4798), INT16_C( 6081), INT16_C( 26243)), simde_mm_set_epi64x(INT64_C(4317315664183993059), INT64_C( 0)), simde_mm_set_epi16(INT16_C( 20694), INT16_C(-29451), INT16_C(-14684), INT16_C( 26977), INT16_C( 9617), INT16_C( 4798), INT16_C( 6081), INT16_C( 26243)) }, { simde_mm_set_epi16(INT16_C(-19602), INT16_C(-30869), INT16_C( -4506), INT16_C( 7721), INT16_C( 10990), INT16_C(-12116), INT16_C( 29998), INT16_C( -194)), simde_mm_set_epi64x(INT64_C(5323917981768999693), INT64_C( 1)), simde_mm_set_epi16(INT16_C( 26332), INT16_C( 3798), INT16_C( -9012), INT16_C( 15442), INT16_C( 21980), INT16_C(-24232), INT16_C( -5540), INT16_C( -388)) }, { simde_mm_set_epi16(INT16_C( -7669), INT16_C(-27334), INT16_C( 24496), INT16_C( 27065), INT16_C( 13859), INT16_C( 2295), INT16_C( 31737), INT16_C( -2884)), simde_mm_set_epi64x(INT64_C(4743197663988711830), INT64_C( 2)), simde_mm_set_epi16(INT16_C(-30676), INT16_C( 21736), INT16_C( 32448), INT16_C(-22812), INT16_C(-10100), INT16_C( 9180), INT16_C( -4124), INT16_C(-11536)) }, { simde_mm_set_epi16(INT16_C( -8360), INT16_C( 29662), INT16_C( 6226), INT16_C( 10396), INT16_C(-32749), INT16_C( 20802), INT16_C( 12391), INT16_C( 4472)), simde_mm_set_epi64x(INT64_C(-4440768506472940517), INT64_C( 3)), simde_mm_set_epi16(INT16_C( -1344), INT16_C(-24848), INT16_C(-15728), INT16_C( 17632), INT16_C( 152), INT16_C(-30192), INT16_C(-31944), INT16_C(-29760)) }, { simde_mm_set_epi16(INT16_C( 26979), INT16_C( -773), INT16_C( 29656), INT16_C( 12973), INT16_C(-28581), INT16_C( -1290), INT16_C( 25294), INT16_C( -882)), simde_mm_set_epi64x(INT64_C(-8434753600973098893), INT64_C( 4)), simde_mm_set_epi16(INT16_C(-27088), INT16_C(-12368), INT16_C( 15744), INT16_C( 10960), INT16_C( 1456), INT16_C(-20640), INT16_C( 11488), INT16_C(-14112)) }, { simde_mm_set_epi16(INT16_C(-20013), INT16_C( 14301), INT16_C(-17775), INT16_C(-12493), INT16_C(-22187), INT16_C( -2203), INT16_C( 22935), INT16_C( -5230)), simde_mm_set_epi64x(INT64_C(-718166367052449426), INT64_C( 13)), simde_mm_set_epi16(INT16_C( 24576), INT16_C(-24576), INT16_C( 8192), INT16_C( 24576), INT16_C(-24576), INT16_C(-24576), INT16_C( -8192), INT16_C( 16384)) }, { simde_mm_set_epi16(INT16_C( -9377), INT16_C(-13109), INT16_C( 2614), INT16_C(-17099), INT16_C(-13260), INT16_C( 21790), INT16_C( 8183), INT16_C( 12820)), simde_mm_set_epi64x(INT64_C(-3082182550035776352), INT64_C( 14)), simde_mm_set_epi16(INT16_C(-16384), INT16_C(-16384), INT16_C(-32768), INT16_C( 16384), INT16_C( 0), INT16_C(-32768), INT16_C(-16384), INT16_C( 0)) }, { simde_mm_set_epi16(INT16_C( 21339), INT16_C(-22944), INT16_C( 30792), INT16_C(-23288), INT16_C(-13340), INT16_C( 7657), INT16_C( 8339), INT16_C( 10093)), simde_mm_set_epi64x(INT64_C(-8360903661682410487), INT64_C( 15)), simde_mm_set_epi16(INT16_C(-32768), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768)) }, { simde_mm_set_epi16(INT16_C(-12198), INT16_C( 1510), INT16_C( -3241), INT16_C(-10552), INT16_C(-10041), INT16_C( 23083), INT16_C( 11931), INT16_C( 10037)), simde_mm_set_epi64x(INT64_C(7382630779200792207), INT64_C( 16)), simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm_set_epi16(INT16_C( -4565), INT16_C(-19321), INT16_C( 29437), INT16_C( -8916), INT16_C( 18870), INT16_C(-29403), INT16_C( 667), INT16_C(-22848)), simde_mm_set_epi64x(INT64_C( 7403670930710815), INT64_C( 17)), simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_sll_epi16(test_vec[i].a, test_vec[i].count); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_sll_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i count; simde__m128i r; } test_vec[50] = { { simde_mm_set_epi32(INT32_C( 1847585989), INT32_C( -535718080), INT32_C(-1279093253), INT32_C( 656800013)), simde_x_mm_set_epu64x(UINT64_C( 2450913859380011969), UINT64_C(18446744073709551615)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 894927109), INT32_C( 930856884), INT32_C( 350764320), INT32_C( 435252602)), simde_x_mm_set_epu64x(UINT64_C( 4964670149549210828), UINT64_C( 0)), simde_mm_set_epi32(INT32_C( 894927109), INT32_C( 930856884), INT32_C( 350764320), INT32_C( 435252602)) }, { simde_mm_set_epi32(INT32_C( -264520264), INT32_C( 2022498436), INT32_C(-1437067245), INT32_C( 482847980)), simde_x_mm_set_epu64x(UINT64_C( 7326459959939805716), UINT64_C( 1)), simde_mm_set_epi32(INT32_C( -529040528), INT32_C( -249970424), INT32_C( 1420832806), INT32_C( 965695960)) }, { simde_mm_set_epi32(INT32_C( -73269821), INT32_C(-1137239147), INT32_C( 168132057), INT32_C( -131743227)), simde_x_mm_set_epu64x(UINT64_C( 1477135654656320870), UINT64_C( 2)), simde_mm_set_epi32(INT32_C( -293079284), INT32_C( -253989292), INT32_C( 672528228), INT32_C( -526972908)) }, { simde_mm_set_epi32(INT32_C( 676475770), INT32_C( 743649739), INT32_C( 1613393787), INT32_C( 257685631)), simde_x_mm_set_epu64x(UINT64_C(14989079754060836033), UINT64_C( 3)), simde_mm_set_epi32(INT32_C( 1116838864), INT32_C( 1654230616), INT32_C( 22248408), INT32_C( 2061485048)) }, { simde_mm_set_epi32(INT32_C( 1293905571), INT32_C(-1134008712), INT32_C(-1835354706), INT32_C( -173430307)), simde_x_mm_set_epu64x(UINT64_C(15716033284919086785), UINT64_C( 29)), simde_mm_set_epi32(INT32_C( 1610612736), INT32_C( 0), INT32_C(-1073741824), INT32_C(-1610612736)) }, { simde_mm_set_epi32(INT32_C(-1608827194), INT32_C( -758406839), INT32_C(-1895836042), INT32_C(-1122971027)), simde_x_mm_set_epu64x(UINT64_C( 240001894519477005), UINT64_C( 30)), simde_mm_set_epi32( INT32_MIN , INT32_C( 1073741824), INT32_MIN , INT32_C( 1073741824)) }, { simde_mm_set_epi32(INT32_C( 1629035853), INT32_C( 172553194), INT32_C( 533866060), INT32_C( 504662481)), simde_x_mm_set_epu64x(UINT64_C(16117634661514065169), UINT64_C( 31)), simde_mm_set_epi32( INT32_MIN , INT32_C( 0), INT32_C( 0), INT32_MIN ) }, { simde_mm_set_epi32(INT32_C(-1841013582), INT32_C(-1759681954), INT32_C(-1933278842), INT32_C( 1138123852)), simde_x_mm_set_epu64x(UINT64_C(16122278597987411920), UINT64_C( 32)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm_set_epi32(INT32_C( 1016164793), INT32_C( 934378122), INT32_C( 1851284098), INT32_C( 118468072)), simde_x_mm_set_epu64x(UINT64_C( 9847102169886565139), UINT64_C( 33)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_sll_epi32(test_vec[i].a, test_vec[i].count); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_sll_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i count; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi64x(INT64_C( 8055788268748421105), INT64_C(-9066834056558614160)), simde_mm_set_epi64x(INT64_C(-5262793264663215472), INT64_C(16)), simde_mm_set_epi64x(INT64_C(-1675408870841712640), INT64_C( 2483371706739064832)) }, { simde_mm_set_epi64x(INT64_C( 2441732847819780871), INT64_C( -124127278813603777)), simde_mm_set_epi64x(INT64_C(-8018169735231443299), INT64_C( 1)), simde_mm_set_epi64x(INT64_C( 4883465695639561742), INT64_C( -248254557627207554)) }, { simde_mm_set_epi64x(INT64_C(-2211386688605493428), INT64_C( -350563182553241755)), simde_mm_set_epi64x(INT64_C( 1150552132815785095), INT64_C(12)), simde_mm_set_epi64x(INT64_C( -488536336711237632), INT64_C( 2939242011266797568)) }, { simde_mm_set_epi64x(INT64_C( 2987527187015640759), INT64_C( 638426944527652749)), simde_mm_set_epi64x(INT64_C(-1714103729784977145), INT64_C( 5)), simde_mm_set_epi64x(INT64_C( 3367149615952746208), INT64_C( 1982918151175336352)) }, { simde_mm_set_epi64x(INT64_C( 4972525455608644218), INT64_C( 6137457836149854777)), simde_mm_set_epi64x(INT64_C(-8922909725876665702), INT64_C( 2)), simde_mm_set_epi64x(INT64_C( 1443357748725025256), INT64_C( 6103087270889867492)) }, { simde_mm_set_epi64x(INT64_C(-6484089245702098359), INT64_C( 413459708861121590)), simde_mm_set_epi64x(INT64_C( 7011241116916112587), INT64_C(15)), simde_mm_set_epi64x(INT64_C(-1038162179743514624), INT64_C( 8337589858421374976)) }, { simde_mm_set_epi64x(INT64_C(-1797418312522800237), INT64_C( 3481510514608785630)), simde_mm_set_epi64x(INT64_C( 4951339001913100627), INT64_C(13)), simde_mm_set_epi64x(INT64_C(-3949045366557351936), INT64_C( 1867797720205082624)) }, { simde_mm_set_epi64x(INT64_C( 7626804351806608498), INT64_C(-4244380112569402483)), simde_mm_set_epi64x(INT64_C( 1577848631857250403), INT64_C( 4)), simde_mm_set_epi64x(INT64_C(-7098338887061125344), INT64_C( 5876894493727766736)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_sll_epi64(test_vec[i].a, test_vec[i].count); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_sqrt_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 481.04), SIMDE_FLOAT64_C( 845.64) }, { SIMDE_FLOAT64_C( 21.93), SIMDE_FLOAT64_C( 29.08) } }, { { SIMDE_FLOAT64_C( 520.60), SIMDE_FLOAT64_C( 759.12) }, { SIMDE_FLOAT64_C( 22.82), SIMDE_FLOAT64_C( 27.55) } }, { { SIMDE_FLOAT64_C( 35.64), SIMDE_FLOAT64_C( 486.89) }, { SIMDE_FLOAT64_C( 5.97), SIMDE_FLOAT64_C( 22.07) } }, { { SIMDE_FLOAT64_C( -79.78), SIMDE_FLOAT64_C( 723.70) }, { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 26.90) } }, { { SIMDE_FLOAT64_C( 719.24), SIMDE_FLOAT64_C( 373.08) }, { SIMDE_FLOAT64_C( 26.82), SIMDE_FLOAT64_C( 19.32) } }, { { SIMDE_FLOAT64_C( 497.67), SIMDE_FLOAT64_C( 489.69) }, { SIMDE_FLOAT64_C( 22.31), SIMDE_FLOAT64_C( 22.13) } }, { { SIMDE_FLOAT64_C( 925.51), SIMDE_FLOAT64_C( 932.27) }, { SIMDE_FLOAT64_C( 30.42), SIMDE_FLOAT64_C( 30.53) } }, { { SIMDE_FLOAT64_C( -49.82), SIMDE_FLOAT64_C( 705.12) }, { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 26.55) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d r = simde_mm_sqrt_pd(a); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_sqrt_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 1.82), SIMDE_FLOAT64_C( 868.47)), simde_mm_set_pd(SIMDE_FLOAT64_C( 180.11), SIMDE_FLOAT64_C( 621.52)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1.82), SIMDE_FLOAT64_C( 24.93)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 458.20), SIMDE_FLOAT64_C( 211.55)), simde_mm_set_pd(SIMDE_FLOAT64_C( 430.02), SIMDE_FLOAT64_C( 152.28)), simde_mm_set_pd(SIMDE_FLOAT64_C( 458.20), SIMDE_FLOAT64_C( 12.34)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 790.70), SIMDE_FLOAT64_C( 272.49)), simde_mm_set_pd(SIMDE_FLOAT64_C( 882.78), SIMDE_FLOAT64_C( 929.30)), simde_mm_set_pd(SIMDE_FLOAT64_C( 790.70), SIMDE_FLOAT64_C( 30.48)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 103.00), SIMDE_FLOAT64_C( 65.43)), simde_mm_set_pd(SIMDE_FLOAT64_C( 542.46), SIMDE_FLOAT64_C( 784.04)), simde_mm_set_pd(SIMDE_FLOAT64_C( 103.00), SIMDE_FLOAT64_C( 28.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 373.53), SIMDE_FLOAT64_C( 698.61)), simde_mm_set_pd(SIMDE_FLOAT64_C( 142.54), SIMDE_FLOAT64_C( 348.23)), simde_mm_set_pd(SIMDE_FLOAT64_C( 373.53), SIMDE_FLOAT64_C( 18.66)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 528.07), SIMDE_FLOAT64_C( 477.87)), simde_mm_set_pd(SIMDE_FLOAT64_C( 384.87), SIMDE_FLOAT64_C( 433.33)), simde_mm_set_pd(SIMDE_FLOAT64_C( 528.07), SIMDE_FLOAT64_C( 20.82)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 241.09), SIMDE_FLOAT64_C( 679.09)), simde_mm_set_pd(SIMDE_FLOAT64_C( 322.35), SIMDE_FLOAT64_C( 620.04)), simde_mm_set_pd(SIMDE_FLOAT64_C( 241.09), SIMDE_FLOAT64_C( 24.90)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 651.18), SIMDE_FLOAT64_C( 446.59)), simde_mm_set_pd(SIMDE_FLOAT64_C( 886.36), SIMDE_FLOAT64_C( 269.28)), simde_mm_set_pd(SIMDE_FLOAT64_C( 651.18), SIMDE_FLOAT64_C( 16.41)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_sqrt_sd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_srl_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i count; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C( 1445), INT16_C( 14472), INT16_C(-18508), INT16_C( -4645), INT16_C(-24581), INT16_C(-12656), INT16_C( 1275), INT16_C(-25245)), simde_mm_set_epi64x(INT64_C( 4678230141678036905), INT64_C( 4)), simde_mm_set_epi16(INT16_C( 90), INT16_C( 904), INT16_C( 2939), INT16_C( 3805), INT16_C( 2559), INT16_C( 3305), INT16_C( 79), INT16_C( 2518)) }, { simde_mm_set_epi16(INT16_C( 986), INT16_C( 31796), INT16_C(-12770), INT16_C(-28401), INT16_C( 15186), INT16_C(-17595), INT16_C( 31992), INT16_C( 19329)), simde_mm_set_epi64x(INT64_C( 234386534661459961), INT64_C( 2)), simde_mm_set_epi16(INT16_C( 246), INT16_C( 7949), INT16_C( 13191), INT16_C( 9283), INT16_C( 3796), INT16_C( 11985), INT16_C( 7998), INT16_C( 4832)) }, { simde_mm_set_epi16(INT16_C(-23898), INT16_C( 7158), INT16_C( 21829), INT16_C(-16536), INT16_C( 2052), INT16_C( -6635), INT16_C( 18408), INT16_C( -3755)), simde_mm_set_epi64x(INT64_C( 8276161762185938564), INT64_C( 7)), simde_mm_set_epi16(INT16_C( 325), INT16_C( 55), INT16_C( 170), INT16_C( 382), INT16_C( 16), INT16_C( 460), INT16_C( 143), INT16_C( 482)) }, { simde_mm_set_epi16(INT16_C(-19513), INT16_C(-10508), INT16_C(-12500), INT16_C( 22379), INT16_C( 4775), INT16_C( 8063), INT16_C( 8132), INT16_C( 7840)), simde_mm_set_epi64x(INT64_C( 1101003055866698034), INT64_C( 6)), simde_mm_set_epi16(INT16_C( 719), INT16_C( 859), INT16_C( 828), INT16_C( 349), INT16_C( 74), INT16_C( 125), INT16_C( 127), INT16_C( 122)) }, { simde_mm_set_epi16(INT16_C( 9942), INT16_C( 29561), INT16_C( -4121), INT16_C(-26882), INT16_C(-17939), INT16_C( 13186), INT16_C( 6796), INT16_C( 14206)), simde_mm_set_epi64x(INT64_C( 735258903315099979), INT64_C( 1)), simde_mm_set_epi16(INT16_C( 4971), INT16_C( 14780), INT16_C( 30707), INT16_C( 19327), INT16_C( 23798), INT16_C( 6593), INT16_C( 3398), INT16_C( 7103)) }, { simde_mm_set_epi16(INT16_C( 5648), INT16_C(-13469), INT16_C(-23201), INT16_C( 7029), INT16_C(-28211), INT16_C(-14496), INT16_C( 31202), INT16_C(-32095)), simde_mm_set_epi64x(INT64_C( 4870695400140482879), INT64_C(13)), simde_mm_set_epi16(INT16_C( 0), INT16_C( 6), INT16_C( 5), INT16_C( 0), INT16_C( 4), INT16_C( 6), INT16_C( 3), INT16_C( 4)) }, { simde_mm_set_epi16(INT16_C( 11526), INT16_C( 20336), INT16_C( 18003), INT16_C( 21727), INT16_C(-28471), INT16_C(-32732), INT16_C(-25472), INT16_C( 12636)), simde_mm_set_epi64x(INT64_C(-6737308052137237000), INT64_C( 3)), simde_mm_set_epi16(INT16_C( 1440), INT16_C( 2542), INT16_C( 2250), INT16_C( 2715), INT16_C( 4633), INT16_C( 4100), INT16_C( 5008), INT16_C( 1579)) }, { simde_mm_set_epi16(INT16_C(-30386), INT16_C( -2761), INT16_C( 11467), INT16_C( 9929), INT16_C(-19380), INT16_C(-12818), INT16_C( -4584), INT16_C( -6145)), simde_mm_set_epi64x(INT64_C(-2450775638354168945), INT64_C( 3)), simde_mm_set_epi16(INT16_C( 4393), INT16_C( 7846), INT16_C( 1433), INT16_C( 1241), INT16_C( 5769), INT16_C( 6589), INT16_C( 7619), INT16_C( 7423)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_srl_epi16(test_vec[i].a, test_vec[i].count); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_srl_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i count; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 94713992), INT32_C(-1212879397), INT32_C(-1610887536), INT32_C( 83598691)), simde_mm_set_epi64x(INT64_C( 4678230141678036905), INT64_C( 4)), simde_mm_set_epi32(INT32_C( 5919624), INT32_C( 192630493), INT32_C( 167754985), INT32_C( 5224918)) }, { simde_mm_set_epi32(INT32_C( 64650292), INT32_C( -836857585), INT32_C( 995277637), INT32_C( 2096647041)), simde_mm_set_epi64x(INT64_C( 234386534661459961), INT64_C( 2)), simde_mm_set_epi32(INT32_C( 16162573), INT32_C( 864527427), INT32_C( 248819409), INT32_C( 524161760)) }, { simde_mm_set_epi32(INT32_C(-1566172170), INT32_C( 1430634344), INT32_C( 134538773), INT32_C( 1206448469)), simde_mm_set_epi64x(INT64_C( 8276161762185938564), INT64_C( 7)), simde_mm_set_epi32(INT32_C( 21318711), INT32_C( 11176830), INT32_C( 1051084), INT32_C( 9425378)) }, { simde_mm_set_epi32(INT32_C(-1278748940), INT32_C( -819177621), INT32_C( 312942463), INT32_C( 532946592)), simde_mm_set_epi64x(INT64_C( 1101003055866698034), INT64_C( 6)), simde_mm_set_epi32(INT32_C( 47128411), INT32_C( 54309213), INT32_C( 4889725), INT32_C( 8327290)) }, { simde_mm_set_epi32(INT32_C( 651588473), INT32_C( -270035202), INT32_C(-1175637118), INT32_C( 445396862)), simde_mm_set_epi64x(INT64_C( 735258903315099979), INT64_C( 1)), simde_mm_set_epi32(INT32_C( 325794236), INT32_C( 2012466047), INT32_C( 1559665089), INT32_C( 222698431)) }, { simde_mm_set_epi32(INT32_C( 370199395), INT32_C(-1520493707), INT32_C(-1848785056), INT32_C( 2044887713)), simde_mm_set_epi64x(INT64_C( 4870695400140482879), INT64_C(13)), simde_mm_set_epi32(INT32_C( 45190), INT32_C( 338680), INT32_C( 298606), INT32_C( 249620)) }, { simde_mm_set_epi32(INT32_C( 755388272), INT32_C( 1179866335), INT32_C(-1865842652), INT32_C(-1669320356)), simde_mm_set_epi64x(INT64_C(-6737308052137237000), INT64_C( 3)), simde_mm_set_epi32(INT32_C( 94423534), INT32_C( 147483291), INT32_C( 303640580), INT32_C( 328205867)) }, { simde_mm_set_epi32(INT32_C(-1991314121), INT32_C( 751511241), INT32_C(-1270034962), INT32_C( -300357633)), simde_mm_set_epi64x(INT64_C(-2450775638354168945), INT64_C( 3)), simde_mm_set_epi32(INT32_C( 287956646), INT32_C( 93938905), INT32_C( 378116541), INT32_C( 499326207)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_srl_epi32(test_vec[i].a, test_vec[i].count); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_srl_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i count; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi64x(INT64_C( 406793501195693531), INT64_C(-6918709284570423965)), simde_mm_set_epi64x(INT64_C( 4678230141678036905), INT64_C( 4)), simde_mm_set_epi64x(INT64_C( 25424593824730845), INT64_C( 720502174321195478)) }, { simde_mm_set_epi64x(INT64_C( 277670893274960143), INT64_C( 4274684903451806593)), simde_mm_set_epi64x(INT64_C( 234386534661459961), INT64_C( 2)), simde_mm_set_epi64x(INT64_C( 69417723318740035), INT64_C( 1068671225862951648)) }, { simde_mm_set_epi64x(INT64_C(-6726658248624717976), INT64_C( 577839631285416277)), simde_mm_set_epi64x(INT64_C( 8276161762185938564), INT64_C( 7)), simde_mm_set_epi64x(INT64_C( 91563170508475262), INT64_C( 4514372119417314)) }, { simde_mm_set_epi64x(INT64_C(-5492184873618876565), INT64_C( 1344077644647636640)), simde_mm_set_epi64x(INT64_C( 1101003055866698034), INT64_C( 6)), simde_mm_set_epi64x(INT64_C( 202414987501416797), INT64_C( 21001213197619322)) }, { simde_mm_set_epi64x(INT64_C( 2798551186010511102), INT64_C(-5049322973328296066)), simde_mm_set_epi64x(INT64_C( 735258903315099979), INT64_C( 1)), simde_mm_set_epi64x(INT64_C( 1399275593005255551), INT64_C( 6698710550190627775)) }, { simde_mm_set_epi64x(INT64_C( 1589994297298459509), INT64_C(-7940471350808640863)), simde_mm_set_epi64x(INT64_C( 4870695400140482879), INT64_C(13)), simde_mm_set_epi64x(INT64_C( 194091100744440), INT64_C( 1282503994494740)) }, { simde_mm_set_epi64x(INT64_C( 3244367925201818847), INT64_C(-8013733167196262052)), simde_mm_set_epi64x(INT64_C(-6737308052137237000), INT64_C( 3)), simde_mm_set_epi64x(INT64_C( 405545990650227355), INT64_C( 1304126363314161195)) }, { simde_mm_set_epi64x(INT64_C(-8552629025006475575), INT64_C(-5454758622571993089)), simde_mm_set_epi64x(INT64_C(-2450775638354168945), INT64_C( 3)), simde_mm_set_epi64x(INT64_C( 1236764381087884505), INT64_C( 1623998181392194815)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_srl_epi64(test_vec[i].a, test_vec[i].count); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_sra_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i count; simde__m128i r; } test_vec[] = { { simde_mm_set_epi32( -561103335, 1276754862, 1749589432, 517536789), simde_mm_set_epi64x(0, 3), simde_mm_set_epi32( -70137917, 159594357, 218698679, 64692098) }, { simde_mm_set_epi32( -159892315, -1509631224, -1642880399, 1227124763), simde_mm_set_epi64x(0, 31), simde_mm_set_epi32( -1, -1, -1, 0) }, { simde_mm_set_epi32(-1747665335, -1727232090, -1061986990, -1651964431), simde_mm_set_epi64x(0, 21), simde_mm_set_epi32( -834, -824, -507, -788) }, { simde_mm_set_epi32( -43034101, 1748997429, -1014034292, -471404994), simde_mm_set_epi64x(0, 23), simde_mm_set_epi32( -6, 208, -121, -57) }, { simde_mm_set_epi32( 663988211, 279391652, 930358665, 693100359), simde_mm_set_epi64x(0, 31), simde_mm_set_epi32( 0, 0, 0, 0) }, { simde_mm_set_epi32( 1596760027, -525985264, -1328341949, -1278585249), simde_mm_set_epi64x(0, 30), simde_mm_set_epi32( 1, -1, -2, -2) }, { simde_mm_set_epi32( 2099244913, -668946691, -1425692748, 1445785661), simde_mm_set_epi64x(0, 19), simde_mm_set_epi32( 4003, -1276, -2720, 2757) }, { simde_mm_set_epi32( -572539662, 1511976084, -2125946535, -1043884202), simde_mm_set_epi64x(0, 4), simde_mm_set_epi32( -35783729, 94498505, -132871659, -65242763) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_sra_epi32(test_vec[i].a, test_vec[i].count); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_srai_epi16(SIMDE_MUNIT_TEST_ARGS) { simde__m128i a, e, r; a = simde_mm_set_epi16(INT16_C( 11440), INT16_C( 15930), INT16_C( -6862), INT16_C(-12095), INT16_C( 2973), INT16_C(-25395), INT16_C(-12983), INT16_C(-25536)); e = simde_mm_set_epi16(INT16_C( 11440), INT16_C( 15930), INT16_C( -6862), INT16_C(-12095), INT16_C( 2973), INT16_C(-25395), INT16_C(-12983), INT16_C(-25536)); r = simde_mm_srai_epi16(a, 0); simde_assert_m128i_i16(r, ==, e); e = simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1)); r = simde_mm_srai_epi16(a, 16); simde_assert_m128i_i16(r, ==, e); e = simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1)); r = simde_mm_srai_epi16(a, 42); simde_assert_m128i_i16(r, ==, e); e = simde_mm_set_epi16(INT16_C( 89), INT16_C( 124), INT16_C( -54), INT16_C( -95), INT16_C( 23), INT16_C( -199), INT16_C( -102), INT16_C( -200)); r = simde_mm_srai_epi16(a, 7); simde_assert_m128i_i16(r, ==, e); e = simde_mm_set_epi16(INT16_C( 1), INT16_C( 1), INT16_C( -1), INT16_C( -2), INT16_C( 0), INT16_C( -4), INT16_C( -2), INT16_C( -4)); r = simde_mm_srai_epi16(a, 13); simde_assert_m128i_i16(r, ==, e); #if 0 e = simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1)); r = simde_mm_srai_epi16(a, -7); simde_assert_m128i_i16(r, ==, e); e = simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1)); r = simde_mm_srai_epi16(a, -42); simde_assert_m128i_i16(r, ==, e); e = simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1)); r = simde_mm_srai_epi16(a, 1729); simde_assert_m128i_i16(r, ==, e); #endif return 0; } static int test_simde_mm_srai_epi32(SIMDE_MUNIT_TEST_ARGS) { simde__m128i a, e, r; a = simde_mm_set_epi32(INT32_C(-1377123590), INT32_C( 1981969037), INT32_C( 1025592994), INT32_C( 1213959767)); e = simde_mm_set_epi32(INT32_C(-1377123590), INT32_C( 1981969037), INT32_C( 1025592994), INT32_C( 1213959767)); r = simde_mm_srai_epi32(a, 0); simde_assert_m128i_i16(r, ==, e); e = simde_mm_set_epi32(INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0)); r = simde_mm_srai_epi32(a, 32); simde_assert_m128i_i16(r, ==, e); e = simde_mm_set_epi32(INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0)); r = simde_mm_srai_epi32(a, 42); simde_assert_m128i_i16(r, ==, e); e = simde_mm_set_epi32(INT32_C( -10758779), INT32_C( 15484133), INT32_C( 8012445), INT32_C( 9484060)); r = simde_mm_srai_epi32(a, 7); simde_assert_m128i_i16(r, ==, e); e = simde_mm_set_epi32(INT32_C( -168106), INT32_C( 241939), INT32_C( 125194), INT32_C( 148188)); r = simde_mm_srai_epi32(a, 13); simde_assert_m128i_i16(r, ==, e); #if 0 e = simde_mm_set_epi32(INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0)); r = simde_mm_srai_epi32(a, -7); simde_assert_m128i_i16(r, ==, e); e = simde_mm_set_epi32(INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0)); r = simde_mm_srai_epi32(a, -42); simde_assert_m128i_i16(r, ==, e); e = simde_mm_set_epi32(INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0)); r = simde_mm_srai_epi32(a, 1729); simde_assert_m128i_i16(r, ==, e); #endif return 0; } static int test_simde_mm_slli_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C(-29640), INT16_C( 27486), INT16_C(-30681), INT16_C( 22606), INT16_C(-21221), INT16_C( 7042), INT16_C( -7099), INT16_C(-13884)), simde_mm_set_epi16(INT16_C( 25024), INT16_C( 23280), INT16_C( 16696), INT16_C(-15760), INT16_C( 26840), INT16_C( -9200), INT16_C( 8744), INT16_C( 20000)) }, { simde_mm_set_epi16(INT16_C( 15230), INT16_C( 23269), INT16_C(-21546), INT16_C( 15633), INT16_C( 9645), INT16_C(-32001), INT16_C( -1446), INT16_C( -7049)), simde_mm_set_epi16(INT16_C( -9232), INT16_C(-10456), INT16_C( 24240), INT16_C( -6008), INT16_C( 11624), INT16_C( 6136), INT16_C(-11568), INT16_C( 9144)) }, { simde_mm_set_epi16(INT16_C( -4964), INT16_C( 29371), INT16_C( -7375), INT16_C( 7185), INT16_C(-25257), INT16_C( 29335), INT16_C( 15023), INT16_C( 23258)), simde_mm_set_epi16(INT16_C( 25824), INT16_C(-27176), INT16_C( 6536), INT16_C( -8056), INT16_C( -5448), INT16_C(-27464), INT16_C(-10888), INT16_C(-10544)) }, { simde_mm_set_epi16(INT16_C(-29984), INT16_C(-17481), INT16_C(-31241), INT16_C( 11397), INT16_C( 2926), INT16_C(-28904), INT16_C(-20560), INT16_C(-32448)), simde_mm_set_epi16(INT16_C( 22272), INT16_C( -8776), INT16_C( 12216), INT16_C( 25640), INT16_C( 23408), INT16_C( 30912), INT16_C( 32128), INT16_C( 2560)) }, { simde_mm_set_epi16(INT16_C(-18879), INT16_C( 5889), INT16_C(-27972), INT16_C( -4500), INT16_C(-12683), INT16_C( 25849), INT16_C( 24809), INT16_C( 26782)), simde_mm_set_epi16(INT16_C(-19960), INT16_C(-18424), INT16_C(-27168), INT16_C( 29536), INT16_C( 29608), INT16_C( 10184), INT16_C( 1864), INT16_C( 17648)) }, { simde_mm_set_epi16(INT16_C(-12553), INT16_C(-22953), INT16_C( 21946), INT16_C( -9017), INT16_C(-10462), INT16_C( -7608), INT16_C( 26015), INT16_C(-24893)), simde_mm_set_epi16(INT16_C( 30648), INT16_C( 12984), INT16_C(-21040), INT16_C( -6600), INT16_C(-18160), INT16_C( 4672), INT16_C( 11512), INT16_C( -2536)) }, { simde_mm_set_epi16(INT16_C( 23545), INT16_C( -728), INT16_C( 17963), INT16_C(-24889), INT16_C( 18443), INT16_C( 19433), INT16_C(-18886), INT16_C(-28120)), simde_mm_set_epi16(INT16_C( -8248), INT16_C( -5824), INT16_C( 12632), INT16_C( -2504), INT16_C( 16472), INT16_C( 24392), INT16_C(-20016), INT16_C(-28352)) }, { simde_mm_set_epi16(INT16_C( 1885), INT16_C(-18948), INT16_C(-21057), INT16_C( 636), INT16_C( -9667), INT16_C(-20298), INT16_C( 25111), INT16_C( 30554)), simde_mm_set_epi16(INT16_C( 15080), INT16_C(-20512), INT16_C( 28152), INT16_C( 5088), INT16_C(-11800), INT16_C(-31312), INT16_C( 4280), INT16_C(-17712)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i zeros = simde_mm_set1_epi64x(INT64_C(0)); simde__m128i r = simde_mm_slli_epi16(test_vec[i].a, 3); simde_assert_m128i_i16(r, ==, test_vec[i].r); r = simde_mm_slli_epi16(test_vec[i].a, 0); simde_assert_m128i_i16(r, ==, test_vec[i].a); r = simde_mm_slli_epi16(test_vec[i].a, 32); simde_assert_m128i_i16(r, ==, zeros); r = simde_mm_slli_epi16(test_vec[i].a, 33); simde_assert_m128i_i16(r, ==, zeros); } return 0; } static int test_simde_mm_srli_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C(-29640), INT16_C( 27486), INT16_C(-30681), INT16_C( 22606), INT16_C(-21221), INT16_C( 7042), INT16_C( -7099), INT16_C(-13884)), simde_mm_set_epi16(INT16_C( 4487), INT16_C( 3435), INT16_C( 4356), INT16_C( 2825), INT16_C( 5539), INT16_C( 880), INT16_C( 7304), INT16_C( 6456)) }, { simde_mm_set_epi16(INT16_C( 15230), INT16_C( 23269), INT16_C(-21546), INT16_C( 15633), INT16_C( 9645), INT16_C(-32001), INT16_C( -1446), INT16_C( -7049)), simde_mm_set_epi16(INT16_C( 1903), INT16_C( 2908), INT16_C( 5498), INT16_C( 1954), INT16_C( 1205), INT16_C( 4191), INT16_C( 8011), INT16_C( 7310)) }, { simde_mm_set_epi16(INT16_C( -4964), INT16_C( 29371), INT16_C( -7375), INT16_C( 7185), INT16_C(-25257), INT16_C( 29335), INT16_C( 15023), INT16_C( 23258)), simde_mm_set_epi16(INT16_C( 7571), INT16_C( 3671), INT16_C( 7270), INT16_C( 898), INT16_C( 5034), INT16_C( 3666), INT16_C( 1877), INT16_C( 2907)) }, { simde_mm_set_epi16(INT16_C(-29984), INT16_C(-17481), INT16_C(-31241), INT16_C( 11397), INT16_C( 2926), INT16_C(-28904), INT16_C(-20560), INT16_C(-32448)), simde_mm_set_epi16(INT16_C( 4444), INT16_C( 6006), INT16_C( 4286), INT16_C( 1424), INT16_C( 365), INT16_C( 4579), INT16_C( 5622), INT16_C( 4136)) }, { simde_mm_set_epi16(INT16_C(-18879), INT16_C( 5889), INT16_C(-27972), INT16_C( -4500), INT16_C(-12683), INT16_C( 25849), INT16_C( 24809), INT16_C( 26782)), simde_mm_set_epi16(INT16_C( 5832), INT16_C( 736), INT16_C( 4695), INT16_C( 7629), INT16_C( 6606), INT16_C( 3231), INT16_C( 3101), INT16_C( 3347)) }, { simde_mm_set_epi16(INT16_C(-12553), INT16_C(-22953), INT16_C( 21946), INT16_C( -9017), INT16_C(-10462), INT16_C( -7608), INT16_C( 26015), INT16_C(-24893)), simde_mm_set_epi16(INT16_C( 6622), INT16_C( 5322), INT16_C( 2743), INT16_C( 7064), INT16_C( 6884), INT16_C( 7241), INT16_C( 3251), INT16_C( 5080)) }, { simde_mm_set_epi16(INT16_C( 23545), INT16_C( -728), INT16_C( 17963), INT16_C(-24889), INT16_C( 18443), INT16_C( 19433), INT16_C(-18886), INT16_C(-28120)), simde_mm_set_epi16(INT16_C( 2943), INT16_C( 8101), INT16_C( 2245), INT16_C( 5080), INT16_C( 2305), INT16_C( 2429), INT16_C( 5831), INT16_C( 4677)) }, { simde_mm_set_epi16(INT16_C( 1885), INT16_C(-18948), INT16_C(-21057), INT16_C( 636), INT16_C( -9667), INT16_C(-20298), INT16_C( 25111), INT16_C( 30554)), simde_mm_set_epi16(INT16_C( 235), INT16_C( 5823), INT16_C( 5559), INT16_C( 79), INT16_C( 6983), INT16_C( 5654), INT16_C( 3138), INT16_C( 3819)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i zeros = simde_mm_set1_epi64x(INT64_C(0)); simde__m128i r = simde_mm_srli_epi16(test_vec[i].a, 3); simde_assert_m128i_i16(r, ==, test_vec[i].r); r = simde_mm_srli_epi16(test_vec[i].a, 0); simde_assert_m128i_i16(r, ==, test_vec[i].a); r = simde_mm_srli_epi16(test_vec[i].a, 16); simde_assert_m128i_i16(r, ==, zeros); r = simde_mm_srli_epi16(test_vec[i].a, 17); simde_assert_m128i_i16(r, ==, zeros); } return 0; } static int test_simde_mm_slli_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(-1285208672, 1618695439, -1484382898, 97979804), simde_mm_set_epi32( 1822995456, 258646496, -255612480, -1159613568) }, { simde_mm_set_epi32( -215274446, -1750972712, -2134111648, -338295419), simde_mm_set_epi32( 1701152320, -196551936, 427904000, 2059448480) }, { simde_mm_set_epi32( 406577052, -1272707531, -128013424, 1090211344), simde_mm_set_epi32( 125563776, -2071935328, 198537728, 527024640) }, { simde_mm_set_epi32( -547315834, 386023226, 789460810, -2097507270), simde_mm_set_epi32( -334237504, -532158656, -507057856, 1599244096) }, { simde_mm_set_epi32( 1943314584, 126005183, 199695502, -1431967820), simde_mm_set_epi32( 2056524544, -262801440, 2095288768, 1421670016) }, { simde_mm_set_epi32( 1899687789, -1162493730, 1537811436, 825134965), simde_mm_set_epi32( 660467104, 1454906304, 1965325696, 634515104) }, { simde_mm_set_epi32(-1452393292, -1781210226, -1307434085, -2039047771), simde_mm_set_epi32( 768054912, -1164152384, 1111782240, -825019232) }, { simde_mm_set_epi32(-1646930836, 816193989, -1662050152, 347461227), simde_mm_set_epi32(-1162179200, 348403872, -1645997312, -1766142624) } }; static const struct { const int32_t a[4]; const int32_t r[4]; } test_vec_18[] = { { { INT32_C( 2018447505), INT32_C( 2072485070), -INT32_C( 1063800373), INT32_C( 1619529499) }, { INT32_C( 1111752704), INT32_C( 1933049856), -INT32_C( 953417728), INT32_C( 1013710848) } }, { { INT32_C( 1312528525), -INT32_C( 1886008265), INT32_C( 615191858), INT32_C( 1445629892) }, { INT32_C( 1647575040), INT32_C( 819724288), INT32_C( 1422393344), INT32_C( 1058013184) } }, { { INT32_C( 1842248351), -INT32_C( 504867562), INT32_C( 564232198), INT32_C( 495004047) }, { -INT32_C( 360972288), INT32_C( 1415053312), INT32_C( 1572864), -INT32_C( 1506017280) } }, { { -INT32_C( 127157055), -INT32_C( 1148780408), -INT32_C( 622906602), INT32_C( 1630538178) }, { -INT32_C( 217841664), INT32_C( 35651584), -INT32_C( 866648064), INT32_C( 654835712) } }, { { -INT32_C( 1714487421), INT32_C( 1534834260), -INT32_C( 964944842), INT32_C( 132382278) }, { -INT32_C( 32768000), -INT32_C( 649068544), INT32_C( 1893203968), -INT32_C( 115867648) } }, { { INT32_C( 1124093626), INT32_C( 1711179599), INT32_C( 2084560314), INT32_C( 1792897254) }, { INT32_C( 988282880), INT32_C( 490471424), INT32_C( 1994915840), -INT32_C( 1013448704) } }, { { -INT32_C( 1023169681), -INT32_C( 1742832030), -INT32_C( 513893477), INT32_C( 1407730073) }, { -INT32_C( 1380188160), -INT32_C( 108527616), INT32_C( 1852571648), INT32_C( 107216896) } }, { { -INT32_C( 543758192), INT32_C( 709137520), INT32_C( 1487373169), INT32_C( 1656915187) }, { -INT32_C( 1572864000), INT32_C( 1371537408), INT32_C( 230948864), INT32_C( 332136448) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i zeros = simde_mm_set1_epi64x(INT64_C(0)); simde__m128i r = simde_mm_slli_epi32(test_vec[i].a, 5); simde_assert_m128i_i32(r, ==, test_vec[i].r); r = simde_mm_slli_epi32(test_vec[i].a, 0); simde_assert_m128i_i32(r, ==, test_vec[i].a); r = simde_mm_slli_epi32(test_vec[i].a, 32); simde_assert_m128i_i32(r, ==, zeros); r = simde_mm_slli_epi32(test_vec[i].a, 33); simde_assert_m128i_i32(r, ==, zeros); } for (size_t i = 0 ; i < (sizeof(test_vec_18) / sizeof(test_vec_18[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi32(test_vec_18[i].a); simde__m128i r = simde_mm_slli_epi32(a, 18); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec_18[i].r)); } return 0; } static int test_simde_mm_srli_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32( 71624167, 617025209, -286267780, -1151099730), simde_mm_set_epi32( 2238255, 19282037, 125271859, 98245861) }, { simde_mm_set_epi32(-1660949423, 45505817, 1892774959, -917815961), simde_mm_set_epi32( 82313058, 1422056, 59149217, 105535979) }, { simde_mm_set_epi32( 1642659615, -757986143, -1891097222, 940303240), simde_mm_set_epi32( 51333112, 110530661, 75120939, 29384476) }, { simde_mm_set_epi32( 1761409447, 115333600, -589319110, -1530115830), simde_mm_set_epi32( 55044045, 3604175, 115801505, 86401608) }, { simde_mm_set_epi32( -502944468, -1500485927, 32222499, 1115657749), simde_mm_set_epi32( 118500713, 87327542, 1006953, 34864304) }, { simde_mm_set_epi32( -545012251, 924477372, -1883097200, 1327167226), simde_mm_set_epi32( 117186095, 28889917, 75370940, 41473975) }, { simde_mm_set_epi32( 995448668, 377764585, -1462273550, 1306007963), simde_mm_set_epi32( 31107770, 11805143, 88521679, 40812748) }, { simde_mm_set_epi32( 1991954175, 665906947, -606406775, 1678465696), simde_mm_set_epi32( 62248567, 20809592, 115267516, 52452053) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i zeros = simde_mm_set1_epi64x(INT64_C(0)); simde__m128i r = simde_mm_srli_epi32(test_vec[i].a, 5); simde_assert_m128i_i32(r, ==, test_vec[i].r); r = simde_mm_srli_epi32(test_vec[i].a, 0); simde_assert_m128i_i32(r, ==, test_vec[i].a); r = simde_mm_srli_epi32(test_vec[i].a, 64); simde_assert_m128i_i32(r, ==, zeros); r = simde_mm_srli_epi32(test_vec[i].a, 65); simde_assert_m128i_i32(r, ==, zeros); } static const struct { const int32_t a[4]; const int32_t r[4]; } test_vec_18[] = { { { -INT32_C( 1359328745), -INT32_C( 408445706), INT32_C( 239121880), INT32_C( 748205077) }, { INT32_C( 11198), INT32_C( 14825), INT32_C( 912), INT32_C( 2854) } }, { { -INT32_C( 345859164), INT32_C( 1010393205), INT32_C( 1843309992), -INT32_C( 446698290) }, { INT32_C( 15064), INT32_C( 3854), INT32_C( 7031), INT32_C( 14679) } }, { { INT32_C( 764631350), -INT32_C( 837534730), INT32_C( 98325744), -INT32_C( 1405979384) }, { INT32_C( 2916), INT32_C( 13189), INT32_C( 375), INT32_C( 11020) } }, { { -INT32_C( 2053663728), -INT32_C( 1648176907), INT32_C( 1275764862), -INT32_C( 1020106099) }, { INT32_C( 8549), INT32_C( 10096), INT32_C( 4866), INT32_C( 12492) } }, { { -INT32_C( 1175403069), -INT32_C( 259586816), INT32_C( 1660314713), -INT32_C( 384948007) }, { INT32_C( 11900), INT32_C( 15393), INT32_C( 6333), INT32_C( 14915) } }, { { -INT32_C( 1318148420), -INT32_C( 196136842), INT32_C( 1581341137), -INT32_C( 2027850813) }, { INT32_C( 11355), INT32_C( 15635), INT32_C( 6032), INT32_C( 8648) } }, { { INT32_C( 960500280), INT32_C( 1881786391), INT32_C( 97656620), INT32_C( 82764103) }, { INT32_C( 3664), INT32_C( 7178), INT32_C( 372), INT32_C( 315) } }, { { -INT32_C( 38445945), INT32_C( 1592919181), INT32_C( 565982046), -INT32_C( 559358554) }, { INT32_C( 16237), INT32_C( 6076), INT32_C( 2159), INT32_C( 14250) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec_18) / sizeof(test_vec_18[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi32(test_vec_18[i].a); simde__m128i r = simde_mm_srli_epi32(a, 18); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec_18[i].r)); } return 0; } static int test_simde_mm_slli_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi64x(INT64_C(-2315072815474662386), INT64_C( 6072154117607221746)), simde_mm_set_epi64x(INT64_C(-1181415201403959552), INT64_C( 2472475957923215616)) }, { simde_mm_set_epi64x(INT64_C(-2150345518249743204), INT64_C( 3180241355952247476)), simde_mm_set_epi64x(INT64_C( 1456934769676144128), INT64_C( 1242523940277541376)) }, { simde_mm_set_epi64x(INT64_C( 6492638483912689614), INT64_C( 7722522576063149658)), simde_mm_set_epi64x(INT64_C( 954242623894447872), INT64_C(-7641290244232631040)) }, { simde_mm_set_epi64x(INT64_C(-3437371876454060839), INT64_C(-7472017034411611746)), simde_mm_set_epi64x(INT64_C( 2738257582909451392), INT64_C( 2812511428210380544)) }, { simde_mm_set_epi64x(INT64_C(-2994435188669454779), INT64_C(-4713226846452985822)), simde_mm_set_epi64x(INT64_C( 4093921398210372224), INT64_C( 5449518086433018112)) }, { simde_mm_set_epi64x(INT64_C(-1348831542752523511), INT64_C(-2794326256527200530)), simde_mm_set_epi64x(INT64_C(-6629740808937044864), INT64_C(-7185623435000187136)) }, { simde_mm_set_epi64x(INT64_C(-5602242705933140185), INT64_C( 6492190109232091873)), simde_mm_set_epi64x(INT64_C( 2335952515230569344), INT64_C( 896850664777937024)) }, { simde_mm_set_epi64x(INT64_C( 8003331601608352009), INT64_C(-5520322068937257120)), simde_mm_set_epi64x(INT64_C(-8591223121865833344), INT64_C(-5624950023005949952)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i zeros = simde_mm_set1_epi64x(INT64_C(0)); simde__m128i r = simde_mm_slli_epi64(test_vec[i].a, 7); simde_assert_m128i_i32(r, ==, test_vec[i].r); r = simde_mm_slli_epi64(test_vec[i].a, 0); simde_assert_m128i_i32(r, ==, test_vec[i].a); r = simde_mm_slli_epi64(test_vec[i].a, 64); simde_assert_m128i_i32(r, ==, zeros); r = simde_mm_slli_epi64(test_vec[i].a, 65); simde_assert_m128i_i32(r, ==, zeros); } return 0; } static int test_simde_mm_srli_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi64x(INT64_C(-2315072815474662386), INT64_C( 6072154117607221746)), simde_mm_set_epi64x(INT64_C( 126028681704960072), INT64_C( 47438704043806419)) }, { simde_mm_set_epi64x(INT64_C(-2150345518249743204), INT64_C( 3180241355952247476)), simde_mm_set_epi64x(INT64_C( 127315613714529753), INT64_C( 24845635593376933)) }, { simde_mm_set_epi64x(INT64_C( 6492638483912689614), INT64_C( 7722522576063149658)), simde_mm_set_epi64x(INT64_C( 50723738155567887), INT64_C( 60332207625493356)) }, { simde_mm_set_epi64x(INT64_C(-3437371876454060839), INT64_C(-7472017034411611746)), simde_mm_set_epi64x(INT64_C( 117260720291058521), INT64_C( 85740054994515155)) }, { simde_mm_set_epi64x(INT64_C(-2994435188669454779), INT64_C(-4713226846452985822)), simde_mm_set_epi64x(INT64_C( 120721163164375756), INT64_C( 107293103337941920)) }, { simde_mm_set_epi64x(INT64_C(-1348831542752523511), INT64_C(-2794326256527200530)), simde_mm_set_epi64x(INT64_C( 133577441648101782), INT64_C( 122284514196737117)) }, { simde_mm_set_epi64x(INT64_C(-5602242705933140185), INT64_C( 6492190109232091873)), simde_mm_set_epi64x(INT64_C( 100347666935753214), INT64_C( 50720235228375717)) }, { simde_mm_set_epi64x(INT64_C( 8003331601608352009), INT64_C(-5520322068937257120)), simde_mm_set_epi64x(INT64_C( 62526028137565250), INT64_C( 100987671912283550)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r; simde__m128i zeros = simde_mm_set1_epi64x(INT64_C(0)); /* r = simde_mm_srli_epi64(test_vec[i].a, -1); */ /* simde_assert_m128i_i32(r, ==, zeros); */ r = simde_mm_srli_epi64(test_vec[i].a, 0); simde_assert_m128i_i32(r, ==, test_vec[i].a); r = simde_mm_srli_epi64(test_vec[i].a, 7); simde_assert_m128i_i32(r, ==, test_vec[i].r); r = simde_mm_srli_epi64(test_vec[i].a, 64); simde_assert_m128i_i32(r, ==, zeros); r = simde_mm_srli_epi64(test_vec[i].a, 65); simde_assert_m128i_i32(r, ==, zeros); } return 0; } static int test_simde_mm_store_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2]; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 825.31), SIMDE_FLOAT64_C( 176.75)), {SIMDE_FLOAT64_C( 176.75), SIMDE_FLOAT64_C( 825.31) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -248.58), SIMDE_FLOAT64_C( -171.93)), {SIMDE_FLOAT64_C( -171.93), SIMDE_FLOAT64_C( -248.58) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 419.77), SIMDE_FLOAT64_C( 712.85)), {SIMDE_FLOAT64_C( 712.85), SIMDE_FLOAT64_C( 419.77) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 785.55), SIMDE_FLOAT64_C( 78.74)), {SIMDE_FLOAT64_C( 78.74), SIMDE_FLOAT64_C( 785.55) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -963.92), SIMDE_FLOAT64_C( 614.28)), {SIMDE_FLOAT64_C( 614.28), SIMDE_FLOAT64_C( -963.92) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 624.62), SIMDE_FLOAT64_C( -260.28)), {SIMDE_FLOAT64_C( -260.28), SIMDE_FLOAT64_C( 624.62) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -178.24), SIMDE_FLOAT64_C( 945.12)), {SIMDE_FLOAT64_C( 945.12), SIMDE_FLOAT64_C( -178.24) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -271.60), SIMDE_FLOAT64_C( -674.20)), {SIMDE_FLOAT64_C( -674.20), SIMDE_FLOAT64_C( -271.60) } } }; for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) { SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2]; simde_mm_store_pd(r, test_vec[i].a); simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 4); } return 0; } static int test_simde_mm_store_pd1(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2]; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 278.50), SIMDE_FLOAT64_C( 554.87)), {SIMDE_FLOAT64_C( 554.87), SIMDE_FLOAT64_C( 554.87) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -348.28), SIMDE_FLOAT64_C( 361.13)), {SIMDE_FLOAT64_C( 361.13), SIMDE_FLOAT64_C( 361.13) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -701.38), SIMDE_FLOAT64_C( 708.23)), {SIMDE_FLOAT64_C( 708.23), SIMDE_FLOAT64_C( 708.23) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -362.77), SIMDE_FLOAT64_C( -574.16)), {SIMDE_FLOAT64_C( -574.16), SIMDE_FLOAT64_C( -574.16) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 420.63), SIMDE_FLOAT64_C( 850.70)), {SIMDE_FLOAT64_C( 850.70), SIMDE_FLOAT64_C( 850.70) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -223.78), SIMDE_FLOAT64_C( 845.58)), {SIMDE_FLOAT64_C( 845.58), SIMDE_FLOAT64_C( 845.58) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 948.70), SIMDE_FLOAT64_C( 544.62)), {SIMDE_FLOAT64_C( 544.62), SIMDE_FLOAT64_C( 544.62) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -216.79), SIMDE_FLOAT64_C( -830.24)), {SIMDE_FLOAT64_C( -830.24), SIMDE_FLOAT64_C( -830.24) } } }; for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) { SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2]; simde_mm_store_pd1(r, test_vec[i].a); simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 4); } return 0; } static int test_simde_mm_store_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 b[2]; SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2]; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -380.32), SIMDE_FLOAT64_C( 589.10)), {SIMDE_FLOAT64_C( -886.38), SIMDE_FLOAT64_C( 706.27) }, {SIMDE_FLOAT64_C( 589.10), SIMDE_FLOAT64_C( 706.27) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 97.55), SIMDE_FLOAT64_C( -921.93)), {SIMDE_FLOAT64_C( 175.08), SIMDE_FLOAT64_C( -498.43) }, {SIMDE_FLOAT64_C( -921.93), SIMDE_FLOAT64_C( -498.43) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -962.76), SIMDE_FLOAT64_C( -267.73)), {SIMDE_FLOAT64_C( -505.37), SIMDE_FLOAT64_C( -729.92) }, {SIMDE_FLOAT64_C( -267.73), SIMDE_FLOAT64_C( -729.92) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 154.59), SIMDE_FLOAT64_C( -829.83)), {SIMDE_FLOAT64_C( 141.33), SIMDE_FLOAT64_C( 657.26) }, {SIMDE_FLOAT64_C( -829.83), SIMDE_FLOAT64_C( 657.26) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -623.49), SIMDE_FLOAT64_C( -306.50)), {SIMDE_FLOAT64_C( -540.89), SIMDE_FLOAT64_C( 213.61) }, {SIMDE_FLOAT64_C( -306.50), SIMDE_FLOAT64_C( 213.61) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 708.57), SIMDE_FLOAT64_C( -626.05)), {SIMDE_FLOAT64_C( -658.64), SIMDE_FLOAT64_C( 310.68) }, {SIMDE_FLOAT64_C( -626.05), SIMDE_FLOAT64_C( 310.68) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 7.10), SIMDE_FLOAT64_C( 84.59)), {SIMDE_FLOAT64_C( 191.88), SIMDE_FLOAT64_C( -258.06) }, {SIMDE_FLOAT64_C( 84.59), SIMDE_FLOAT64_C( -258.06) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 399.99), SIMDE_FLOAT64_C( -337.50)), {SIMDE_FLOAT64_C( 733.91), SIMDE_FLOAT64_C( -756.49) }, {SIMDE_FLOAT64_C( -337.50), SIMDE_FLOAT64_C( -756.49) } } }; for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) { SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2]; simde_memcpy(r, &(test_vec[i].b), sizeof(test_vec[i].b)); simde_mm_store_sd(r, test_vec[i].a); simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 4); } return 0; } static int test_simde_mm_store_si128(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C(-1969078312), INT32_C( 1646650233), INT32_C(-1190611301), INT32_C( 889904733)), simde_mm_set_epi32(INT32_C(-1969078312), INT32_C( 1646650233), INT32_C(-1190611301), INT32_C( 889904733)) }, { simde_mm_set_epi32(INT32_C( 361491951), INT32_C(-1497327260), INT32_C(-2092062445), INT32_C(-1242536811)), simde_mm_set_epi32(INT32_C( 361491951), INT32_C(-1497327260), INT32_C(-2092062445), INT32_C(-1242536811)) }, { simde_mm_set_epi32(INT32_C( 790325756), INT32_C( -295457696), INT32_C( 30297459), INT32_C( 860807687)), simde_mm_set_epi32(INT32_C( 790325756), INT32_C( -295457696), INT32_C( 30297459), INT32_C( 860807687)) }, { simde_mm_set_epi32(INT32_C(-1228048681), INT32_C( 1236867704), INT32_C(-1927827785), INT32_C(-1233913343)), simde_mm_set_epi32(INT32_C(-1228048681), INT32_C( 1236867704), INT32_C(-1927827785), INT32_C(-1233913343)) }, { simde_mm_set_epi32(INT32_C( 1007412231), INT32_C( -296710614), INT32_C(-1416317108), INT32_C( -839008134)), simde_mm_set_epi32(INT32_C( 1007412231), INT32_C( -296710614), INT32_C(-1416317108), INT32_C( -839008134)) }, { simde_mm_set_epi32(INT32_C( 1325410731), INT32_C( 2049780007), INT32_C( 190337706), INT32_C( 1948643128)), simde_mm_set_epi32(INT32_C( 1325410731), INT32_C( 2049780007), INT32_C( 190337706), INT32_C( 1948643128)) }, { simde_mm_set_epi32(INT32_C(-1295145224), INT32_C( -913388140), INT32_C(-1185110338), INT32_C( 127220065)), simde_mm_set_epi32(INT32_C(-1295145224), INT32_C( -913388140), INT32_C(-1185110338), INT32_C( 127220065)) }, { simde_mm_set_epi32(INT32_C( 479405479), INT32_C( 641965302), INT32_C(-1100092667), INT32_C( 1837148945)), simde_mm_set_epi32(INT32_C( 479405479), INT32_C( 641965302), INT32_C(-1100092667), INT32_C( 1837148945)) } }; for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) { simde__m128i r; simde_mm_store_si128(&r, test_vec[i].a); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_storeh_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 b[2]; SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2]; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -154.79), SIMDE_FLOAT64_C( 689.59)), {SIMDE_FLOAT64_C( -986.30), SIMDE_FLOAT64_C( -463.82) }, {SIMDE_FLOAT64_C( -154.79), SIMDE_FLOAT64_C( -463.82) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 944.07), SIMDE_FLOAT64_C( -598.47)), {SIMDE_FLOAT64_C( -514.42), SIMDE_FLOAT64_C( 652.02) }, {SIMDE_FLOAT64_C( 944.07), SIMDE_FLOAT64_C( 652.02) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -34.72), SIMDE_FLOAT64_C( -771.52)), {SIMDE_FLOAT64_C( 343.91), SIMDE_FLOAT64_C( -171.75) }, {SIMDE_FLOAT64_C( -34.72), SIMDE_FLOAT64_C( -171.75) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 305.40), SIMDE_FLOAT64_C( -671.87)), {SIMDE_FLOAT64_C( -579.65), SIMDE_FLOAT64_C( -985.37) }, {SIMDE_FLOAT64_C( 305.40), SIMDE_FLOAT64_C( -985.37) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 173.51), SIMDE_FLOAT64_C( 643.06)), {SIMDE_FLOAT64_C( 794.84), SIMDE_FLOAT64_C( 233.08) }, {SIMDE_FLOAT64_C( 173.51), SIMDE_FLOAT64_C( 233.08) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -130.21), SIMDE_FLOAT64_C( -290.59)), {SIMDE_FLOAT64_C( 584.05), SIMDE_FLOAT64_C( -167.57) }, {SIMDE_FLOAT64_C( -130.21), SIMDE_FLOAT64_C( -167.57) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -759.48), SIMDE_FLOAT64_C( 428.70)), {SIMDE_FLOAT64_C( 36.98), SIMDE_FLOAT64_C( -189.97) }, {SIMDE_FLOAT64_C( -759.48), SIMDE_FLOAT64_C( -189.97) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 222.49), SIMDE_FLOAT64_C( 621.71)), {SIMDE_FLOAT64_C( -467.95), SIMDE_FLOAT64_C( -910.73) }, {SIMDE_FLOAT64_C( 222.49), SIMDE_FLOAT64_C( -910.73) } } }; for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) { SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2]; simde_memcpy(r, &(test_vec[i].b), sizeof(test_vec[i].b)); simde_mm_storeh_pd(r, test_vec[i].a); simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 4); } return 0; } static int test_simde_mm_storel_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi64x(INT64_C(-8572402204481175152), INT64_C(-3565447379630862345)), simde_mm_set_epi64x(INT64_C(-5836787758646654491), INT64_C( 978262207997446536)), simde_mm_set_epi64x(INT64_C(-5836787758646654491), INT64_C(-3565447379630862345)) }, { simde_mm_set_epi64x(INT64_C( 883894259135204982), INT64_C(-6785295924552521928)), simde_mm_set_epi64x(INT64_C( 5751908210058630765), INT64_C(-7999305285706001942)), simde_mm_set_epi64x(INT64_C( 5751908210058630765), INT64_C(-6785295924552521928)) }, { simde_mm_set_epi64x(INT64_C( 4991496111910955453), INT64_C(-1947231678451890517)), simde_mm_set_epi64x(INT64_C( 1054715717267865334), INT64_C(-5199938312574175167)), simde_mm_set_epi64x(INT64_C( 1054715717267865334), INT64_C(-1947231678451890517)) }, { simde_mm_set_epi64x(INT64_C(-6916286228894702079), INT64_C(-7888320918323423602)), simde_mm_set_epi64x(INT64_C(-4560271213984560857), INT64_C( 1030486561279856923)), simde_mm_set_epi64x(INT64_C(-4560271213984560857), INT64_C(-7888320918323423602)) }, { simde_mm_set_epi64x(INT64_C(-5516402797122916761), INT64_C( 8516393373254709766)), simde_mm_set_epi64x(INT64_C(-8984432431227422893), INT64_C(-1285772213781786319)), simde_mm_set_epi64x(INT64_C(-8984432431227422893), INT64_C( 8516393373254709766)) }, { simde_mm_set_epi64x(INT64_C( 1537881028582424966), INT64_C( 3855597324285413517)), simde_mm_set_epi64x(INT64_C(-1087659369158402202), INT64_C( 5504181592152866903)), simde_mm_set_epi64x(INT64_C(-1087659369158402202), INT64_C( 3855597324285413517)) }, { simde_mm_set_epi64x(INT64_C(-1003754336566127903), INT64_C( 3155788073225494266)), simde_mm_set_epi64x(INT64_C( 7014294951579480267), INT64_C(-6777837266490471507)), simde_mm_set_epi64x(INT64_C( 7014294951579480267), INT64_C( 3155788073225494266)) }, { simde_mm_set_epi64x(INT64_C( 7343239871058385173), INT64_C(-8089093160963830084)), simde_mm_set_epi64x(INT64_C(-7180996141698966448), INT64_C( 1747758344108352756)), simde_mm_set_epi64x(INT64_C(-7180996141698966448), INT64_C(-8089093160963830084)) } }; for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) { simde__m128i r; simde_memcpy(&r, &(test_vec[i].b), sizeof(r)); simde_mm_storel_epi64(&r, test_vec[i].a); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_storel_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 b[2]; SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2]; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -887.08), SIMDE_FLOAT64_C( -520.70)), {SIMDE_FLOAT64_C( -258.49), SIMDE_FLOAT64_C( 913.00) }, {SIMDE_FLOAT64_C( -520.70), SIMDE_FLOAT64_C( 913.00) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 724.04), SIMDE_FLOAT64_C( -774.49)), {SIMDE_FLOAT64_C( 557.37), SIMDE_FLOAT64_C( -701.13) }, {SIMDE_FLOAT64_C( -774.49), SIMDE_FLOAT64_C( -701.13) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -366.90), SIMDE_FLOAT64_C( -168.25)), {SIMDE_FLOAT64_C( 485.14), SIMDE_FLOAT64_C( 500.94) }, {SIMDE_FLOAT64_C( -168.25), SIMDE_FLOAT64_C( 500.94) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -783.51), SIMDE_FLOAT64_C( -187.73)), {SIMDE_FLOAT64_C( -391.92), SIMDE_FLOAT64_C( -506.74) }, {SIMDE_FLOAT64_C( -187.73), SIMDE_FLOAT64_C( -506.74) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -50.27), SIMDE_FLOAT64_C( -405.84)), {SIMDE_FLOAT64_C( -733.12), SIMDE_FLOAT64_C( -697.37) }, {SIMDE_FLOAT64_C( -405.84), SIMDE_FLOAT64_C( -697.37) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -773.31), SIMDE_FLOAT64_C( -470.65)), {SIMDE_FLOAT64_C( 738.01), SIMDE_FLOAT64_C( -908.23) }, {SIMDE_FLOAT64_C( -470.65), SIMDE_FLOAT64_C( -908.23) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -43.07), SIMDE_FLOAT64_C( -143.29)), {SIMDE_FLOAT64_C( 985.95), SIMDE_FLOAT64_C( 19.70) }, {SIMDE_FLOAT64_C( -143.29), SIMDE_FLOAT64_C( 19.70) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 649.59), SIMDE_FLOAT64_C( -925.70)), {SIMDE_FLOAT64_C( 519.96), SIMDE_FLOAT64_C( 348.23) }, {SIMDE_FLOAT64_C( -925.70), SIMDE_FLOAT64_C( 348.23) } } }; for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) { SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2]; simde_memcpy(r, &(test_vec[i].b), sizeof(test_vec[i].b)); simde_mm_storel_pd(r, test_vec[i].a); simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 4); } return 0; } static int test_simde_mm_storer_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 b[2]; SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2]; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 765.90), SIMDE_FLOAT64_C( -392.20)), {SIMDE_FLOAT64_C( -898.96), SIMDE_FLOAT64_C( 810.87) }, {SIMDE_FLOAT64_C( 765.90), SIMDE_FLOAT64_C( -392.20) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 848.80), SIMDE_FLOAT64_C( -20.45)), {SIMDE_FLOAT64_C( -298.33), SIMDE_FLOAT64_C( 199.86) }, {SIMDE_FLOAT64_C( 848.80), SIMDE_FLOAT64_C( -20.45) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -770.38), SIMDE_FLOAT64_C( 73.29)), {SIMDE_FLOAT64_C( -471.45), SIMDE_FLOAT64_C( 85.53) }, {SIMDE_FLOAT64_C( -770.38), SIMDE_FLOAT64_C( 73.29) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 181.32), SIMDE_FLOAT64_C( -528.68)), {SIMDE_FLOAT64_C( 925.12), SIMDE_FLOAT64_C( -79.25) }, {SIMDE_FLOAT64_C( 181.32), SIMDE_FLOAT64_C( -528.68) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 786.51), SIMDE_FLOAT64_C( -396.45)), {SIMDE_FLOAT64_C( -196.75), SIMDE_FLOAT64_C( -493.37) }, {SIMDE_FLOAT64_C( 786.51), SIMDE_FLOAT64_C( -396.45) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 379.82), SIMDE_FLOAT64_C( -482.63)), {SIMDE_FLOAT64_C( 356.61), SIMDE_FLOAT64_C( 6.76) }, {SIMDE_FLOAT64_C( 379.82), SIMDE_FLOAT64_C( -482.63) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -597.31), SIMDE_FLOAT64_C( -427.66)), {SIMDE_FLOAT64_C( -787.49), SIMDE_FLOAT64_C( 322.82) }, {SIMDE_FLOAT64_C( -597.31), SIMDE_FLOAT64_C( -427.66) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 515.42), SIMDE_FLOAT64_C( 801.05)), {SIMDE_FLOAT64_C( -892.50), SIMDE_FLOAT64_C( 794.29) }, {SIMDE_FLOAT64_C( 515.42), SIMDE_FLOAT64_C( 801.05) } } }; for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) { SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2]; simde_memcpy(r, &(test_vec[i].b), sizeof(test_vec[i].b)); simde_mm_storer_pd(r, test_vec[i].a); simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 4); } return 0; } static int test_simde_mm_storeu_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde_float64 b[2]; simde_float64 r[2]; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -787.29), SIMDE_FLOAT64_C( 410.40)), {SIMDE_FLOAT64_C( 579.61), SIMDE_FLOAT64_C( -320.32) }, {SIMDE_FLOAT64_C( 410.40), SIMDE_FLOAT64_C( -787.29) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 944.41), SIMDE_FLOAT64_C( -149.27)), {SIMDE_FLOAT64_C( 850.87), SIMDE_FLOAT64_C( -993.24) }, {SIMDE_FLOAT64_C( -149.27), SIMDE_FLOAT64_C( 944.41) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -415.98), SIMDE_FLOAT64_C( -916.88)), {SIMDE_FLOAT64_C( 966.39), SIMDE_FLOAT64_C( -183.52) }, {SIMDE_FLOAT64_C( -916.88), SIMDE_FLOAT64_C( -415.98) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 431.98), SIMDE_FLOAT64_C( -691.20)), {SIMDE_FLOAT64_C( -659.73), SIMDE_FLOAT64_C( -34.04) }, {SIMDE_FLOAT64_C( -691.20), SIMDE_FLOAT64_C( 431.98) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -483.35), SIMDE_FLOAT64_C( 766.13)), {SIMDE_FLOAT64_C( -638.61), SIMDE_FLOAT64_C( 157.38) }, {SIMDE_FLOAT64_C( 766.13), SIMDE_FLOAT64_C( -483.35) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 386.12), SIMDE_FLOAT64_C( 330.08)), {SIMDE_FLOAT64_C( 588.80), SIMDE_FLOAT64_C( -111.35) }, {SIMDE_FLOAT64_C( 330.08), SIMDE_FLOAT64_C( 386.12) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 45.12), SIMDE_FLOAT64_C( 964.86)), {SIMDE_FLOAT64_C( 199.95), SIMDE_FLOAT64_C( 998.07) }, {SIMDE_FLOAT64_C( 964.86), SIMDE_FLOAT64_C( 45.12) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -30.76), SIMDE_FLOAT64_C( -723.78)), {SIMDE_FLOAT64_C( -8.78), SIMDE_FLOAT64_C( 410.81) }, {SIMDE_FLOAT64_C( -723.78), SIMDE_FLOAT64_C( -30.76) } } }; for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) { SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2]; simde_memcpy(r, &(test_vec[i].b), sizeof(test_vec[i].b)); simde_mm_storeu_pd(r, test_vec[i].a); simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 4); } return 0; } static int test_simde_mm_storeu_si128(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 559775826), INT32_C( -953548695), INT32_C( 811731668), INT32_C( 717258119)), simde_mm_set_epi32(INT32_C( 559775826), INT32_C( -953548695), INT32_C( 811731668), INT32_C( 717258119)) }, { simde_mm_set_epi32(INT32_C( -819991397), INT32_C(-1367017296), INT32_C( 1998597245), INT32_C( -194600747)), simde_mm_set_epi32(INT32_C( -819991397), INT32_C(-1367017296), INT32_C( 1998597245), INT32_C( -194600747)) }, { simde_mm_set_epi32(INT32_C(-1983970353), INT32_C( 1036245224), INT32_C( 1208146280), INT32_C( 2086212378)), simde_mm_set_epi32(INT32_C(-1983970353), INT32_C( 1036245224), INT32_C( 1208146280), INT32_C( 2086212378)) }, { simde_mm_set_epi32(INT32_C(-1115487208), INT32_C( 1901412157), INT32_C( -373768038), INT32_C( 1379732008)), simde_mm_set_epi32(INT32_C(-1115487208), INT32_C( 1901412157), INT32_C( -373768038), INT32_C( 1379732008)) }, { simde_mm_set_epi32(INT32_C( -772363216), INT32_C( 1208166493), INT32_C( 2006133231), INT32_C( -567476934)), simde_mm_set_epi32(INT32_C( -772363216), INT32_C( 1208166493), INT32_C( 2006133231), INT32_C( -567476934)) }, { simde_mm_set_epi32(INT32_C( -117502444), INT32_C( 175751722), INT32_C(-1353399970), INT32_C( -281466966)), simde_mm_set_epi32(INT32_C( -117502444), INT32_C( 175751722), INT32_C(-1353399970), INT32_C( -281466966)) }, { simde_mm_set_epi32(INT32_C( 2118723593), INT32_C(-1657083210), INT32_C( 1907402314), INT32_C( 669913338)), simde_mm_set_epi32(INT32_C( 2118723593), INT32_C(-1657083210), INT32_C( 1907402314), INT32_C( 669913338)) }, { simde_mm_set_epi32(INT32_C( 372135232), INT32_C( 1779530333), INT32_C(-1088754891), INT32_C( 1773872281)), simde_mm_set_epi32(INT32_C( 372135232), INT32_C( 1779530333), INT32_C(-1088754891), INT32_C( 1773872281)) } }; for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) { simde__m128i r; simde_mm_storeu_si128(&r, test_vec[i].a); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_storeu_si16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[8]; const int16_t r; } test_vec[] = { { { -INT16_C( 6988), INT16_C( 26153), -INT16_C( 2289), -INT16_C( 8118), INT16_C( 29038), INT16_C( 9460), -INT16_C( 25910), -INT16_C( 2208) }, -INT16_C( 6988) }, { { INT16_C( 4717), -INT16_C( 14623), INT16_C( 14017), -INT16_C( 21548), INT16_C( 18960), -INT16_C( 20965), INT16_C( 24253), INT16_C( 29067) }, INT16_C( 4717) }, { { -INT16_C( 19389), INT16_C( 21207), INT16_C( 8619), INT16_C( 6450), INT16_C( 9874), INT16_C( 23869), -INT16_C( 25152), INT16_C( 11604) }, -INT16_C( 19389) }, { { INT16_C( 14000), INT16_C( 29171), -INT16_C( 14484), INT16_C( 31772), INT16_C( 14353), -INT16_C( 12758), -INT16_C( 19050), -INT16_C( 9920) }, INT16_C( 14000) }, { { INT16_C( 5993), INT16_C( 5163), INT16_C( 23865), -INT16_C( 13523), INT16_C( 27523), INT16_C( 17448), INT16_C( 32008), -INT16_C( 18319) }, INT16_C( 5993) }, { { INT16_C( 26035), INT16_C( 7977), INT16_C( 17964), INT16_C( 16027), -INT16_C( 14722), INT16_C( 5132), INT16_C( 19579), -INT16_C( 6674) }, INT16_C( 26035) }, { { INT16_C( 6500), -INT16_C( 25095), INT16_C( 10103), -INT16_C( 1432), -INT16_C( 28270), -INT16_C( 26050), -INT16_C( 20466), -INT16_C( 16045) }, INT16_C( 6500) }, { { INT16_C( 31765), INT16_C( 16864), INT16_C( 31682), INT16_C( 16511), -INT16_C( 29631), -INT16_C( 17067), INT16_C( 17368), INT16_C( 15522) }, INT16_C( 31765) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi16(test_vec[i].a); int16_t r; HEDLEY_CONCAT(simde,_mm_storeu_si16)(&r, a); simde_assert_equal_i16(r, test_vec[i].r); } return 0; } static int test_simde_mm_storeu_si32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[4]; const int32_t r; } test_vec[] = { { { -INT32_C( 630341273), -INT32_C( 601100258), INT32_C( 527009452), INT32_C( 382213470) }, -INT32_C( 630341273) }, { { INT32_C( 733254901), INT32_C( 225181130), -INT32_C( 418546734), -INT32_C( 1459105470) }, INT32_C( 733254901) }, { { -INT32_C( 1333562222), INT32_C( 277655396), -INT32_C( 1825508043), INT32_C( 145356818) }, -INT32_C( 1333562222) }, { { INT32_C( 1446207116), INT32_C( 761503323), INT32_C( 1544843545), -INT32_C( 721085374) }, INT32_C( 1446207116) }, { { -INT32_C( 175797872), INT32_C( 1829048888), INT32_C( 436286727), -INT32_C( 1188910547) }, -INT32_C( 175797872) }, { { INT32_C( 1661949192), INT32_C( 227570676), INT32_C( 644457956), INT32_C( 1375432641) }, INT32_C( 1661949192) }, { { INT32_C( 809927160), -INT32_C( 1700967277), -INT32_C( 1347117439), INT32_C( 1365825097) }, INT32_C( 809927160) }, { { INT32_C( 548763692), -INT32_C( 819116565), -INT32_C( 1409968150), -INT32_C( 16912122) }, INT32_C( 548763692) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi32(test_vec[i].a); int32_t r; HEDLEY_CONCAT(simde,_mm_storeu_si32)(&r, a); simde_assert_equal_i32(r, test_vec[i].r); } return 0; } static int test_simde_mm_storeu_si64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int64_t a[2]; const int64_t r; } test_vec[] = { { { INT64_C( 1269957435069449074), INT64_C( 6198123151038108778) }, INT64_C( 1269957435069449074) }, { { -INT64_C( 1631810497504953952), -INT64_C( 5530541008416845765) }, -INT64_C( 1631810497504953952) }, { { -INT64_C( 6740103892576997931), -INT64_C( 59573331693324629) }, -INT64_C( 6740103892576997931) }, { { -INT64_C( 9008073061231320301), -INT64_C( 564917926918647499) }, -INT64_C( 9008073061231320301) }, { { -INT64_C( 1996551244505816721), INT64_C( 965994603972566793) }, -INT64_C( 1996551244505816721) }, { { INT64_C( 815745091936186761), -INT64_C( 8734544458042763860) }, INT64_C( 815745091936186761) }, { { INT64_C( 191535998296794507), -INT64_C( 3305974968983330281) }, INT64_C( 191535998296794507) }, { { INT64_C( 407001106525339075), INT64_C( 6676759969134880266) }, INT64_C( 407001106525339075) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi64(test_vec[i].a); int64_t r; HEDLEY_CONCAT(simde,_mm_storeu_si64)(&r, a); simde_assert_equal_i64(r, test_vec[i].r); } return 0; } static int test_simde_mm_store1_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2]; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 291.96), SIMDE_FLOAT64_C( -70.45)), { SIMDE_FLOAT64_C( -70.45), SIMDE_FLOAT64_C( -70.45) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 896.84), SIMDE_FLOAT64_C( 840.00)), { SIMDE_FLOAT64_C( 840.00), SIMDE_FLOAT64_C( 840.00) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 792.86), SIMDE_FLOAT64_C( 559.02)), { SIMDE_FLOAT64_C( 559.02), SIMDE_FLOAT64_C( 559.02) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 401.40), SIMDE_FLOAT64_C( -245.84)), { SIMDE_FLOAT64_C( -245.84), SIMDE_FLOAT64_C( -245.84) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 441.21), SIMDE_FLOAT64_C( 731.20)), { SIMDE_FLOAT64_C( 731.20), SIMDE_FLOAT64_C( 731.20) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 569.26), SIMDE_FLOAT64_C( -434.33)), { SIMDE_FLOAT64_C( -434.33), SIMDE_FLOAT64_C( -434.33) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -796.24), SIMDE_FLOAT64_C( 534.91)), { SIMDE_FLOAT64_C( 534.91), SIMDE_FLOAT64_C( 534.91) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -221.59), SIMDE_FLOAT64_C( -372.35)), { SIMDE_FLOAT64_C( -372.35), SIMDE_FLOAT64_C( -372.35) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2] ; simde_mm_store1_pd(r, test_vec[i].a); simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 4); } return 0; } static int test_simde_mm_stream_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2]; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -749.31), SIMDE_FLOAT64_C( -483.97)), {SIMDE_FLOAT64_C( -483.97), SIMDE_FLOAT64_C( -749.31) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 587.52), SIMDE_FLOAT64_C( -903.15)), {SIMDE_FLOAT64_C( -903.15), SIMDE_FLOAT64_C( 587.52) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -515.61), SIMDE_FLOAT64_C( 144.37)), {SIMDE_FLOAT64_C( 144.37), SIMDE_FLOAT64_C( -515.61) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -183.60), SIMDE_FLOAT64_C( 483.36)), {SIMDE_FLOAT64_C( 483.36), SIMDE_FLOAT64_C( -183.60) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 33.37), SIMDE_FLOAT64_C( -802.26)), {SIMDE_FLOAT64_C( -802.26), SIMDE_FLOAT64_C( 33.37) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -131.42), SIMDE_FLOAT64_C( -156.48)), {SIMDE_FLOAT64_C( -156.48), SIMDE_FLOAT64_C( -131.42) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -317.87), SIMDE_FLOAT64_C( 140.87)), {SIMDE_FLOAT64_C( 140.87), SIMDE_FLOAT64_C( -317.87) } }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 924.07), SIMDE_FLOAT64_C( 709.42)), {SIMDE_FLOAT64_C( 709.42), SIMDE_FLOAT64_C( 924.07) } } }; for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) { SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2]; simde_mm_stream_pd(r, test_vec[i].a); simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_stream_si128(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 34091183), INT32_C( 572850908), INT32_C( 428781754), INT32_C(-1984722387)), simde_mm_set_epi32(INT32_C( 34091183), INT32_C( 572850908), INT32_C( 428781754), INT32_C(-1984722387)) }, { simde_mm_set_epi32(INT32_C( 2059236852), INT32_C( 436410728), INT32_C( 338757718), INT32_C( 1985336145)), simde_mm_set_epi32(INT32_C( 2059236852), INT32_C( 436410728), INT32_C( 338757718), INT32_C( 1985336145)) }, { simde_mm_set_epi32(INT32_C( -559686487), INT32_C( 981390363), INT32_C( 629822759), INT32_C( 26629572)), simde_mm_set_epi32(INT32_C( -559686487), INT32_C( 981390363), INT32_C( 629822759), INT32_C( 26629572)) }, { simde_mm_set_epi32(INT32_C( 1401959784), INT32_C( -900492538), INT32_C( -328421218), INT32_C( 452144845)), simde_mm_set_epi32(INT32_C( 1401959784), INT32_C( -900492538), INT32_C( -328421218), INT32_C( 452144845)) }, { simde_mm_set_epi32(INT32_C( 1914664610), INT32_C( 1467736241), INT32_C(-2062482935), INT32_C(-1765775255)), simde_mm_set_epi32(INT32_C( 1914664610), INT32_C( 1467736241), INT32_C(-2062482935), INT32_C(-1765775255)) }, { simde_mm_set_epi32(INT32_C( 659730578), INT32_C( 874862437), INT32_C( -487086426), INT32_C(-1161523548)), simde_mm_set_epi32(INT32_C( 659730578), INT32_C( 874862437), INT32_C( -487086426), INT32_C(-1161523548)) }, { simde_mm_set_epi32(INT32_C( 1195652072), INT32_C( -415424127), INT32_C( 77100736), INT32_C( 1699618155)), simde_mm_set_epi32(INT32_C( 1195652072), INT32_C( -415424127), INT32_C( 77100736), INT32_C( 1699618155)) }, { simde_mm_set_epi32(INT32_C( 1626943139), INT32_C( 1327578602), INT32_C(-1477047999), INT32_C( 1569415359)), simde_mm_set_epi32(INT32_C( 1626943139), INT32_C( 1327578602), INT32_C(-1477047999), INT32_C( 1569415359)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r; simde_mm_stream_si128(&r, test_vec[i].a); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_stream_si32(SIMDE_MUNIT_TEST_ARGS) { const struct { int32_t a; int32_t r; } test_vec[8] = { { -895547977, -895547977 }, { 1712937231, 1712937231 }, { -1086654689, -1086654689 }, { 1855506850, 1855506850 }, { 1870001810, 1870001810 }, { -396094407, -396094407 }, { -1262223993, -1262223993 }, { 2015532253, 2015532253 } }; for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) { int32_t r; simde_mm_stream_si32(&r, test_vec[i].a); simde_assert_equal_i32(r, test_vec[i].r); } return 0; } static int test_simde_mm_stream_si64(SIMDE_MUNIT_TEST_ARGS) { const struct { int64_t a; int64_t r; } test_vec[8] = { { INT64_C( -908741869362791955), INT64_C( -908741869362791955) }, { INT64_C( 6977779886002528513), INT64_C( 6977779886002528513) }, { INT64_C(-3803748866185605675), INT64_C(-3803748866185605675) }, { INT64_C( 9126491633461219066), INT64_C( 9126491633461219066) }, { INT64_C(-1680016917440909978), INT64_C(-1680016917440909978) }, { INT64_C( 9194247506078439345), INT64_C( 9194247506078439345) }, { INT64_C(-5911248664473270680), INT64_C(-5911248664473270680) }, { INT64_C(-9131883318362768052), INT64_C(-9131883318362768052) } }; for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) { int64_t r; #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_NATIVE_ALIASES_TESTING) simde_mm_stream_si64(HEDLEY_REINTERPRET_CAST(long long int*, &r), test_vec[i].a); #else simde_mm_stream_si64(&r, test_vec[i].a); #endif simde_assert_equal_i64(r, test_vec[i].r); } return 0; } static int test_simde_mm_sub_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C(-114), INT8_C(-102), INT8_C( -21), INT8_C( 93), INT8_C(-120), INT8_C( 125), INT8_C( -36), INT8_C( -23), INT8_C( -12), INT8_C( 11), INT8_C( 22), INT8_C( 78), INT8_C( -16), INT8_C( 57), INT8_C( 71), INT8_C( 112)), simde_mm_set_epi8(INT8_C( -15), INT8_C( -47), INT8_C( 8), INT8_C( 73), INT8_C( 45), INT8_C( 37), INT8_C( 27), INT8_C( -63), INT8_C( -74), INT8_C( -76), INT8_C( -34), INT8_C( 78), INT8_C( -50), INT8_C( 121), INT8_C(-113), INT8_C(-123)), simde_mm_set_epi8(INT8_C( -99), INT8_C( -55), INT8_C( -29), INT8_C( 20), INT8_C( 91), INT8_C( 88), INT8_C( -63), INT8_C( 40), INT8_C( 62), INT8_C( 87), INT8_C( 56), INT8_C( 0), INT8_C( 34), INT8_C( -64), INT8_C( -72), INT8_C( -21)) }, { simde_mm_set_epi8(INT8_C( -71), INT8_C( -77), INT8_C( -40), INT8_C( 99), INT8_C( -40), INT8_C( 24), INT8_C( 45), INT8_C( 125), INT8_C( 16), INT8_C( 82), INT8_C( -66), INT8_C( -93), INT8_C( 92), INT8_C( 60), INT8_C( 65), INT8_C( 70)), simde_mm_set_epi8(INT8_C( -15), INT8_C( -11), INT8_C( 41), INT8_C( 35), INT8_C( 87), INT8_C( -22), INT8_C( -28), INT8_C( -74), INT8_C( 88), INT8_C(-100), INT8_C( 28), INT8_C( -30), INT8_C( 122), INT8_C( -93), INT8_C( -11), INT8_C( 47)), simde_mm_set_epi8(INT8_C( -56), INT8_C( -66), INT8_C( -81), INT8_C( 64), INT8_C(-127), INT8_C( 46), INT8_C( 73), INT8_C( -57), INT8_C( -72), INT8_C( -74), INT8_C( -94), INT8_C( -63), INT8_C( -30), INT8_C(-103), INT8_C( 76), INT8_C( 23)) }, { simde_mm_set_epi8(INT8_C( 88), INT8_C( 7), INT8_C( 21), INT8_C( 40), INT8_C( -45), INT8_C( -52), INT8_C( 105), INT8_C( 9), INT8_C( -65), INT8_C( -48), INT8_C( 74), INT8_C( -11), INT8_C( 71), INT8_C( -73), INT8_C( -92), INT8_C(-128)), simde_mm_set_epi8(INT8_C( 51), INT8_C( -25), INT8_C( 14), INT8_C( -31), INT8_C( -75), INT8_C( 81), INT8_C( 123), INT8_C( -32), INT8_C( -73), INT8_C(-121), INT8_C( 36), INT8_C( -43), INT8_C( 95), INT8_C( -5), INT8_C( 71), INT8_C( -67)), simde_mm_set_epi8(INT8_C( 37), INT8_C( 32), INT8_C( 7), INT8_C( 71), INT8_C( 30), INT8_C( 123), INT8_C( -18), INT8_C( 41), INT8_C( 8), INT8_C( 73), INT8_C( 38), INT8_C( 32), INT8_C( -24), INT8_C( -68), INT8_C( 93), INT8_C( -61)) }, { simde_mm_set_epi8(INT8_C( -26), INT8_C( -30), INT8_C(-127), INT8_C( -96), INT8_C( -93), INT8_C( 85), INT8_C( -61), INT8_C( 31), INT8_C( 84), INT8_C( 86), INT8_C( 14), INT8_C( 51), INT8_C( -75), INT8_C( -80), INT8_C( 35), INT8_C( 49)), simde_mm_set_epi8(INT8_C(-102), INT8_C( 55), INT8_C( 103), INT8_C( 19), INT8_C(-107), INT8_C( -66), INT8_C(-128), INT8_C( 92), INT8_C(-108), INT8_C( -59), INT8_C( -55), INT8_C( 84), INT8_C( -42), INT8_C( 42), INT8_C( -85), INT8_C( -73)), simde_mm_set_epi8(INT8_C( 76), INT8_C( -85), INT8_C( 26), INT8_C(-115), INT8_C( 14), INT8_C(-105), INT8_C( 67), INT8_C( -61), INT8_C( -64), INT8_C(-111), INT8_C( 69), INT8_C( -33), INT8_C( -33), INT8_C(-122), INT8_C( 120), INT8_C( 122)) }, { simde_mm_set_epi8(INT8_C( -92), INT8_C( 56), INT8_C( -22), INT8_C( -76), INT8_C( -77), INT8_C(-116), INT8_C( -11), INT8_C( 34), INT8_C( -7), INT8_C( 37), INT8_C( -64), INT8_C( -72), INT8_C( 28), INT8_C(-107), INT8_C(-128), INT8_C(-117)), simde_mm_set_epi8(INT8_C( -56), INT8_C( -3), INT8_C( 32), INT8_C( 22), INT8_C( 49), INT8_C(-125), INT8_C( 122), INT8_C( -3), INT8_C(-111), INT8_C( 65), INT8_C( -17), INT8_C( 15), INT8_C( -83), INT8_C( -49), INT8_C( 13), INT8_C( 98)), simde_mm_set_epi8(INT8_C( -36), INT8_C( 59), INT8_C( -54), INT8_C( -98), INT8_C(-126), INT8_C( 9), INT8_C( 123), INT8_C( 37), INT8_C( 104), INT8_C( -28), INT8_C( -47), INT8_C( -87), INT8_C( 111), INT8_C( -58), INT8_C( 115), INT8_C( 41)) }, { simde_mm_set_epi8(INT8_C(-104), INT8_C( 9), INT8_C( 90), INT8_C( -26), INT8_C(-114), INT8_C(-100), INT8_C( -19), INT8_C( 82), INT8_C( 96), INT8_C( 58), INT8_C( 39), INT8_C( 9), INT8_C( -4), INT8_C( 91), INT8_C( -93), INT8_C( -73)), simde_mm_set_epi8(INT8_C( 16), INT8_C( 2), INT8_C( -9), INT8_C( 107), INT8_C(-122), INT8_C(-106), INT8_C( -7), INT8_C( 11), INT8_C( 116), INT8_C( -40), INT8_C( -9), INT8_C( -94), INT8_C( 61), INT8_C( -90), INT8_C( 69), INT8_C( 0)), simde_mm_set_epi8(INT8_C(-120), INT8_C( 7), INT8_C( 99), INT8_C( 123), INT8_C( 8), INT8_C( 6), INT8_C( -12), INT8_C( 71), INT8_C( -20), INT8_C( 98), INT8_C( 48), INT8_C( 103), INT8_C( -65), INT8_C( -75), INT8_C( 94), INT8_C( -73)) }, { simde_mm_set_epi8(INT8_C( -61), INT8_C( -71), INT8_C( 103), INT8_C( 76), INT8_C( 44), INT8_C( 98), INT8_C( 70), INT8_C(-120), INT8_C( 17), INT8_C( 126), INT8_C( -43), INT8_C( 108), INT8_C( -31), INT8_C( 12), INT8_C( -92), INT8_C( -28)), simde_mm_set_epi8(INT8_C(-114), INT8_C( 71), INT8_C( -5), INT8_C( -9), INT8_C( -6), INT8_C( 117), INT8_C( -23), INT8_C( -62), INT8_C( -10), INT8_C( -22), INT8_C( 106), INT8_C( 35), INT8_C( -63), INT8_C( 18), INT8_C( 58), INT8_C( 22)), simde_mm_set_epi8(INT8_C( 53), INT8_C( 114), INT8_C( 108), INT8_C( 85), INT8_C( 50), INT8_C( -19), INT8_C( 93), INT8_C( -58), INT8_C( 27), INT8_C(-108), INT8_C( 107), INT8_C( 73), INT8_C( 32), INT8_C( -6), INT8_C( 106), INT8_C( -50)) }, { simde_mm_set_epi8(INT8_C( 19), INT8_C( -54), INT8_C( 71), INT8_C( 0), INT8_C( -13), INT8_C( 85), INT8_C( 113), INT8_C( 7), INT8_C( -78), INT8_C(-122), INT8_C( -69), INT8_C( -15), INT8_C( -57), INT8_C( -9), INT8_C(-125), INT8_C( 84)), simde_mm_set_epi8(INT8_C( -78), INT8_C( 106), INT8_C(-106), INT8_C( 60), INT8_C( 36), INT8_C( 103), INT8_C( -55), INT8_C( 69), INT8_C(-119), INT8_C( -53), INT8_C( 67), INT8_C( -86), INT8_C( -37), INT8_C( -20), INT8_C( -58), INT8_C( -28)), simde_mm_set_epi8(INT8_C( 97), INT8_C( 96), INT8_C( -79), INT8_C( -60), INT8_C( -49), INT8_C( -18), INT8_C( -88), INT8_C( -62), INT8_C( 41), INT8_C( -69), INT8_C( 120), INT8_C( 71), INT8_C( -20), INT8_C( 11), INT8_C( -67), INT8_C( 112)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_sub_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_sub_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C( 4649), INT16_C(-10562), INT16_C(-25917), INT16_C( 9425), INT16_C( 27983), INT16_C( -7888), INT16_C( 3337), INT16_C(-19608)), simde_mm_set_epi16(INT16_C(-13637), INT16_C( -2631), INT16_C( 26607), INT16_C( 19784), INT16_C(-32723), INT16_C(-19066), INT16_C( 18245), INT16_C(-23690)), simde_mm_set_epi16(INT16_C( 18286), INT16_C( -7931), INT16_C( 13012), INT16_C(-10359), INT16_C( -4830), INT16_C( 11178), INT16_C(-14908), INT16_C( 4082)) }, { simde_mm_set_epi16(INT16_C( 708), INT16_C( 11434), INT16_C( -1239), INT16_C(-25521), INT16_C(-21333), INT16_C( 14389), INT16_C( 1705), INT16_C( 20680)), simde_mm_set_epi16(INT16_C(-28483), INT16_C( 8156), INT16_C(-22073), INT16_C( 17984), INT16_C( 20902), INT16_C( 3569), INT16_C( 31387), INT16_C( 7806)), simde_mm_set_epi16(INT16_C( 29191), INT16_C( 3278), INT16_C( 20834), INT16_C( 22031), INT16_C( 23301), INT16_C( 10820), INT16_C(-29682), INT16_C( 12874)) }, { simde_mm_set_epi16(INT16_C( -3626), INT16_C( 757), INT16_C( 189), INT16_C(-19968), INT16_C( 5676), INT16_C( 7663), INT16_C( 8524), INT16_C( 15372)), simde_mm_set_epi16(INT16_C( 20254), INT16_C(-31977), INT16_C( 18332), INT16_C(-14379), INT16_C( -7613), INT16_C( 19737), INT16_C( 22035), INT16_C( -6952)), simde_mm_set_epi16(INT16_C(-23880), INT16_C( 32734), INT16_C(-18143), INT16_C( -5589), INT16_C( 13289), INT16_C(-12074), INT16_C(-13511), INT16_C( 22324)) }, { simde_mm_set_epi16(INT16_C(-12411), INT16_C( 25999), INT16_C( 8485), INT16_C( -8542), INT16_C( 21018), INT16_C(-31213), INT16_C( 15766), INT16_C( 18574)), simde_mm_set_epi16(INT16_C( 6484), INT16_C(-10154), INT16_C( 20175), INT16_C( 32085), INT16_C( 18950), INT16_C(-19405), INT16_C(-12089), INT16_C( 8199)), simde_mm_set_epi16(INT16_C(-18895), INT16_C(-29383), INT16_C(-11690), INT16_C( 24909), INT16_C( 2068), INT16_C(-11808), INT16_C( 27855), INT16_C( 10375)) }, { simde_mm_set_epi16(INT16_C( 7148), INT16_C(-25537), INT16_C( 5647), INT16_C(-25529), INT16_C( -5324), INT16_C(-12025), INT16_C( 27072), INT16_C(-30360)), simde_mm_set_epi16(INT16_C(-24506), INT16_C( -9630), INT16_C( 25801), INT16_C( 32734), INT16_C( 1516), INT16_C( 10059), INT16_C( 10693), INT16_C( 13623)), simde_mm_set_epi16(INT16_C( 31654), INT16_C(-15907), INT16_C(-20154), INT16_C( 7273), INT16_C( -6840), INT16_C(-22084), INT16_C( 16379), INT16_C( 21553)) }, { simde_mm_set_epi16(INT16_C(-24730), INT16_C(-23496), INT16_C(-16567), INT16_C(-13323), INT16_C(-12986), INT16_C(-31808), INT16_C( 27730), INT16_C( -2264)), simde_mm_set_epi16(INT16_C(-13737), INT16_C(-18451), INT16_C(-16289), INT16_C( 22307), INT16_C( -2961), INT16_C( 22412), INT16_C( 13917), INT16_C( 8259)), simde_mm_set_epi16(INT16_C(-10993), INT16_C( -5045), INT16_C( -278), INT16_C( 29906), INT16_C(-10025), INT16_C( 11316), INT16_C( 13813), INT16_C(-10523)) }, { simde_mm_set_epi16(INT16_C( 5718), INT16_C( 31027), INT16_C( 29094), INT16_C( 1906), INT16_C( -3938), INT16_C( -2339), INT16_C(-13536), INT16_C( 11931)), simde_mm_set_epi16(INT16_C(-23545), INT16_C( 2546), INT16_C( -2953), INT16_C( -8072), INT16_C( 28237), INT16_C(-11239), INT16_C(-13996), INT16_C( 29497)), simde_mm_set_epi16(INT16_C( 29263), INT16_C( 28481), INT16_C( 32047), INT16_C( 9978), INT16_C(-32175), INT16_C( 8900), INT16_C( 460), INT16_C(-17566)) }, { simde_mm_set_epi16(INT16_C( 29491), INT16_C(-30965), INT16_C( 4748), INT16_C(-28809), INT16_C(-21877), INT16_C(-21669), INT16_C(-28233), INT16_C(-28758)), simde_mm_set_epi16(INT16_C( 5029), INT16_C( 4694), INT16_C(-16956), INT16_C(-15561), INT16_C(-23049), INT16_C(-31774), INT16_C( 3835), INT16_C(-12557)), simde_mm_set_epi16(INT16_C( 24462), INT16_C( 29877), INT16_C( 21704), INT16_C(-13248), INT16_C( 1172), INT16_C( 10105), INT16_C(-32068), INT16_C(-16201)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_sub_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_sub_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32( 304731838, -1698487087, 1833951536, 218739560), simde_mm_set_epi32( -893651527, 1743736136, -2144488058, 1195746166), simde_mm_set_epi32( 1198383365, 852744073, -316527702, -977006606) }, { simde_mm_set_epi32( 46410922, -81159089, -1398065099, 111759560), simde_mm_set_epi32(-1866653732, -1446558144, 1369837041, 2056986238), simde_mm_set_epi32( 1913064654, 1365399055, 1527065156, -1945226678) }, { simde_mm_set_epi32( -237632779, 12431872, 371989999, 558644236), simde_mm_set_epi32( 1327399703, 1201457109, -498905831, 1444144344), simde_mm_set_epi32(-1565032482, -1189025237, 870895830, -885500108) }, { simde_mm_set_epi32( -813341297, 556129954, 1377469971, 1033259150), simde_mm_set_epi32( 424990806, 1322220885, 1241953331, -792256505), simde_mm_set_epi32(-1238332103, -766090931, 135516640, 1825515655) }, { simde_mm_set_epi32( 468491327, 370121799, -348860153, 1774225768), simde_mm_set_epi32(-1605969310, 1690927070, 99362635, 700790071), simde_mm_set_epi32( 2074460637, -1320805271, -448222788, 1073435697) }, { simde_mm_set_epi32(-1620663240, -1085682699, -851016768, 1817376552), simde_mm_set_epi32( -900220947, -1067493597, -194029684, 912072771), simde_mm_set_epi32( -720442293, -18189102, -656987084, 905303781) }, { simde_mm_set_epi32( 374765875, 1906706290, -258017571, -887083365), simde_mm_set_epi32(-1543042574, -193470344, 1850594329, -917212359), simde_mm_set_epi32( 1917808449, 2100176634, -2108611900, 30128994) }, { simde_mm_set_epi32( 1932756747, 311201655, -1433687205, -1850241110), simde_mm_set_epi32( 329585238, -1111178441, -1510505502, 251383539), simde_mm_set_epi32( 1603171509, 1422380096, 76818297, -2101624649) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_sub_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_sub_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi64x(INT64_C(-5763845342482697816), INT64_C( 2103077785434280804)), simde_mm_set_epi64x(INT64_C( -84933559585222060), INT64_C(-2626653918467514964)), simde_mm_set_epi64x(INT64_C(-5678911782897475756), INT64_C( 4729731703901795768)) }, { simde_mm_set_epi64x(INT64_C( 1527789798480118137), INT64_C( 8436112421047310932)), simde_mm_set_epi64x(INT64_C(-1700732467797798250), INT64_C(-3973336518996013340)), simde_mm_set_epi64x(INT64_C( 3228522266277916387), INT64_C(-6037295133666227344)) }, { simde_mm_set_epi64x(INT64_C( 4851345631989659335), INT64_C(-7206764788471565568)), simde_mm_set_epi64x(INT64_C( 5406657072094052149), INT64_C( 1553917979932899417)), simde_mm_set_epi64x(INT64_C( -555311440104392814), INT64_C(-8760682768404464985)) }, { simde_mm_set_epi64x(INT64_C( 4880585840903485916), INT64_C(-3214111508108965857)), simde_mm_set_epi64x(INT64_C(-9030069389987018552), INT64_C(-3395779442469856546)), simde_mm_set_epi64x(INT64_C(-4536088842819047148), INT64_C( 181667934360890689)) }, { simde_mm_set_epi64x(INT64_C( 5848110560047382754), INT64_C( 5491947693722128435)), simde_mm_set_epi64x(INT64_C( 213782131019667117), INT64_C( -937970910639813333)), simde_mm_set_epi64x(INT64_C( 5634328429027715637), INT64_C( 6429918604361941768)) }, { simde_mm_set_epi64x(INT64_C(-1470278109522038956), INT64_C(-7185464081677005028)), simde_mm_set_epi64x(INT64_C( 7512013344600346304), INT64_C(-1151368750409397152)), simde_mm_set_epi64x(INT64_C(-8982291454122385260), INT64_C(-6034095331267607876)) }, { simde_mm_set_epi64x(INT64_C(-1414880571892272072), INT64_C(-5464559564131319132)), simde_mm_set_epi64x(INT64_C(-8460263392275774431), INT64_C( 3444946385257741717)), simde_mm_set_epi64x(INT64_C( 7045382820383502359), INT64_C(-8909505949389060849)) }, { simde_mm_set_epi64x(INT64_C( 2041037654020608990), INT64_C(-5135476174064773616)), simde_mm_set_epi64x(INT64_C(-2250411574230731306), INT64_C( 6301008926808412830)), simde_mm_set_epi64x(INT64_C( 4291449228251340296), INT64_C( 7010258972836365170)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_sub_epi64(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_sub_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -989.09), SIMDE_FLOAT64_C( 415.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( -630.71), SIMDE_FLOAT64_C( 755.53)), simde_mm_set_pd(SIMDE_FLOAT64_C( -358.38), SIMDE_FLOAT64_C( -339.83)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -609.69), SIMDE_FLOAT64_C( -266.09)), simde_mm_set_pd(SIMDE_FLOAT64_C( 904.74), SIMDE_FLOAT64_C( 704.00)), simde_mm_set_pd(SIMDE_FLOAT64_C(-1514.43), SIMDE_FLOAT64_C( -970.09)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -864.69), SIMDE_FLOAT64_C( -728.75)), simde_mm_set_pd(SIMDE_FLOAT64_C( -549.96), SIMDE_FLOAT64_C( 478.05)), simde_mm_set_pd(SIMDE_FLOAT64_C( -314.73), SIMDE_FLOAT64_C(-1206.80)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -607.45), SIMDE_FLOAT64_C( -593.32)), simde_mm_set_pd(SIMDE_FLOAT64_C( -648.70), SIMDE_FLOAT64_C( -195.04)), simde_mm_set_pd(SIMDE_FLOAT64_C( 41.24), SIMDE_FLOAT64_C( -398.28)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -442.58), SIMDE_FLOAT64_C( -296.11)), simde_mm_set_pd(SIMDE_FLOAT64_C( 195.46), SIMDE_FLOAT64_C( 287.25)), simde_mm_set_pd(SIMDE_FLOAT64_C( -638.04), SIMDE_FLOAT64_C( -583.37)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -930.71), SIMDE_FLOAT64_C( 996.22)), simde_mm_set_pd(SIMDE_FLOAT64_C( -786.74), SIMDE_FLOAT64_C( 77.74)), simde_mm_set_pd(SIMDE_FLOAT64_C( -143.98), SIMDE_FLOAT64_C( 918.47)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 702.75), SIMDE_FLOAT64_C( -28.87)), simde_mm_set_pd(SIMDE_FLOAT64_C( 970.37), SIMDE_FLOAT64_C( -443.97)), simde_mm_set_pd(SIMDE_FLOAT64_C( -267.62), SIMDE_FLOAT64_C( 415.10)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -369.79), SIMDE_FLOAT64_C( 539.64)), simde_mm_set_pd(SIMDE_FLOAT64_C( -404.57), SIMDE_FLOAT64_C( -587.93)), simde_mm_set_pd(SIMDE_FLOAT64_C( 34.78), SIMDE_FLOAT64_C( 1127.56)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_sub_pd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_sub_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -989.09), SIMDE_FLOAT64_C( 415.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( -630.71), SIMDE_FLOAT64_C( 755.53)), simde_mm_set_pd(SIMDE_FLOAT64_C( -989.09), SIMDE_FLOAT64_C( -339.83)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -609.69), SIMDE_FLOAT64_C( -266.09)), simde_mm_set_pd(SIMDE_FLOAT64_C( 904.74), SIMDE_FLOAT64_C( 704.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( -609.69), SIMDE_FLOAT64_C( -970.09)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -864.69), SIMDE_FLOAT64_C( -728.75)), simde_mm_set_pd(SIMDE_FLOAT64_C( -549.96), SIMDE_FLOAT64_C( 478.05)), simde_mm_set_pd(SIMDE_FLOAT64_C( -864.69), SIMDE_FLOAT64_C(-1206.80)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -607.45), SIMDE_FLOAT64_C( -593.32)), simde_mm_set_pd(SIMDE_FLOAT64_C( -648.70), SIMDE_FLOAT64_C( -195.04)), simde_mm_set_pd(SIMDE_FLOAT64_C( -607.45), SIMDE_FLOAT64_C( -398.28)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -442.58), SIMDE_FLOAT64_C( -296.11)), simde_mm_set_pd(SIMDE_FLOAT64_C( 195.46), SIMDE_FLOAT64_C( 287.25)), simde_mm_set_pd(SIMDE_FLOAT64_C( -442.58), SIMDE_FLOAT64_C( -583.37)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -930.71), SIMDE_FLOAT64_C( 996.22)), simde_mm_set_pd(SIMDE_FLOAT64_C( -786.74), SIMDE_FLOAT64_C( 77.74)), simde_mm_set_pd(SIMDE_FLOAT64_C( -930.71), SIMDE_FLOAT64_C( 918.47)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 702.75), SIMDE_FLOAT64_C( -28.87)), simde_mm_set_pd(SIMDE_FLOAT64_C( 970.37), SIMDE_FLOAT64_C( -443.97)), simde_mm_set_pd(SIMDE_FLOAT64_C( 702.75), SIMDE_FLOAT64_C( 415.10)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -369.79), SIMDE_FLOAT64_C( 539.64)), simde_mm_set_pd(SIMDE_FLOAT64_C( -404.57), SIMDE_FLOAT64_C( -587.93)), simde_mm_set_pd(SIMDE_FLOAT64_C( -369.79), SIMDE_FLOAT64_C( 1127.56)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_sub_sd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_sub_si64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_cvtsi64_m64(INT64_C( -40015113898169895)), simde_mm_cvtsi64_m64(INT64_C( 3843942487505240466)), simde_mm_cvtsi64_m64(INT64_C(-3883957601403410361)), }, { simde_mm_cvtsi64_m64(INT64_C( 8317116700671824816)), simde_mm_cvtsi64_m64(INT64_C( 2891842609034633421)), simde_mm_cvtsi64_m64(INT64_C( 5425274091637191395)), }, { simde_mm_cvtsi64_m64(INT64_C( 922042182678065366)), simde_mm_cvtsi64_m64(INT64_C( 4937799652981992213)), simde_mm_cvtsi64_m64(INT64_C(-4015757470303926847)), }, { simde_mm_cvtsi64_m64(INT64_C( 297526191920431793)), simde_mm_cvtsi64_m64(INT64_C(-8568639315346032946)), simde_mm_cvtsi64_m64(INT64_C( 8866165507266464739)), }, { simde_mm_cvtsi64_m64(INT64_C( 944913740190663659)), simde_mm_cvtsi64_m64(INT64_C(-5569388163200780530)), simde_mm_cvtsi64_m64(INT64_C( 6514301903391444189)), }, { simde_mm_cvtsi64_m64(INT64_C( 2756927115722410076)), simde_mm_cvtsi64_m64(INT64_C( 1302679549898517242)), simde_mm_cvtsi64_m64(INT64_C( 1454247565823892834)), }, { simde_mm_cvtsi64_m64(INT64_C( 977005230827305840)), simde_mm_cvtsi64_m64(INT64_C( 4908563834369883454)), simde_mm_cvtsi64_m64(INT64_C(-3931558603542577614)), }, { simde_mm_cvtsi64_m64(INT64_C(-7062092201406124762)), simde_mm_cvtsi64_m64(INT64_C( 2377066878085823882)), simde_mm_cvtsi64_m64(INT64_C( 9007584994217602972)), } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_sub_si64(test_vec[i].a, test_vec[i].b); simde_assert_m64_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_subs_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( 77), INT8_C( -15), INT8_C( -74), INT8_C( 52), INT8_C( -61), INT8_C( 64), INT8_C( 59), INT8_C( 21), INT8_C( -60), INT8_C( -67), INT8_C( -73), INT8_C( 104), INT8_C( 30), INT8_C( 107), INT8_C( 83), INT8_C(-117)), simde_mm_set_epi8(INT8_C(-110), INT8_C(-112), INT8_C( -53), INT8_C( 89), INT8_C( 74), INT8_C( 81), INT8_C( -22), INT8_C( -97), INT8_C(-128), INT8_C( -54), INT8_C( 101), INT8_C( 96), INT8_C( 36), INT8_C( 87), INT8_C(-125), INT8_C( 28)), simde_mm_set_epi8(INT8_C( 127), INT8_C( 97), INT8_C( -21), INT8_C( -37), INT8_C(-128), INT8_C( -17), INT8_C( 81), INT8_C( 118), INT8_C( 68), INT8_C( -13), INT8_C(-128), INT8_C( 8), INT8_C( -6), INT8_C( 20), INT8_C( 127), INT8_C(-128)) }, { simde_mm_set_epi8(INT8_C( 57), INT8_C( 79), INT8_C( 101), INT8_C( 47), INT8_C( 60), INT8_C( 12), INT8_C( 0), INT8_C( -19), INT8_C( 63), INT8_C( 39), INT8_C(-108), INT8_C( 37), INT8_C( 92), INT8_C( 114), INT8_C(-110), INT8_C( 91)), simde_mm_set_epi8(INT8_C( -59), INT8_C( -81), INT8_C( 49), INT8_C( 126), INT8_C( 33), INT8_C( 120), INT8_C(-127), INT8_C( 80), INT8_C( 109), INT8_C(-100), INT8_C( 21), INT8_C(-125), INT8_C( 7), INT8_C( 60), INT8_C(-122), INT8_C( -61)), simde_mm_set_epi8(INT8_C( 116), INT8_C( 127), INT8_C( 52), INT8_C( -79), INT8_C( 27), INT8_C(-108), INT8_C( 127), INT8_C( -99), INT8_C( -46), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 85), INT8_C( 54), INT8_C( 12), INT8_C( 127)) }, { simde_mm_set_epi8(INT8_C( 84), INT8_C(-104), INT8_C( -82), INT8_C( 105), INT8_C( -43), INT8_C( -36), INT8_C( 16), INT8_C( -15), INT8_C( -34), INT8_C( 120), INT8_C(-110), INT8_C( 90), INT8_C( 78), INT8_C( 45), INT8_C(-124), INT8_C( -84)), simde_mm_set_epi8(INT8_C( -66), INT8_C( -1), INT8_C( 91), INT8_C( 74), INT8_C( 83), INT8_C( -91), INT8_C( -97), INT8_C( 115), INT8_C( -29), INT8_C( 67), INT8_C( -98), INT8_C( -51), INT8_C( 110), INT8_C( -43), INT8_C( 125), INT8_C( 63)), simde_mm_set_epi8(INT8_C( 127), INT8_C(-103), INT8_C(-128), INT8_C( 31), INT8_C(-126), INT8_C( 55), INT8_C( 113), INT8_C(-128), INT8_C( -5), INT8_C( 53), INT8_C( -12), INT8_C( 127), INT8_C( -32), INT8_C( 88), INT8_C(-128), INT8_C(-128)) }, { simde_mm_set_epi8(INT8_C( -75), INT8_C( 37), INT8_C( 126), INT8_C( 21), INT8_C( 92), INT8_C(-124), INT8_C( -81), INT8_C( -6), INT8_C(-117), INT8_C( -14), INT8_C( 38), INT8_C( -68), INT8_C( -45), INT8_C( 114), INT8_C( 32), INT8_C( -13)), simde_mm_set_epi8(INT8_C( 80), INT8_C(-123), INT8_C( -25), INT8_C( 71), INT8_C(-108), INT8_C( -31), INT8_C( 98), INT8_C( -67), INT8_C( -23), INT8_C(-112), INT8_C( -42), INT8_C( -16), INT8_C( -56), INT8_C( 107), INT8_C( 6), INT8_C( 16)), simde_mm_set_epi8(INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( -50), INT8_C( 127), INT8_C( -93), INT8_C(-128), INT8_C( 61), INT8_C( -94), INT8_C( 98), INT8_C( 80), INT8_C( -52), INT8_C( 11), INT8_C( 7), INT8_C( 26), INT8_C( -29)) }, { simde_mm_set_epi8(INT8_C( 85), INT8_C( 18), INT8_C( 15), INT8_C( 100), INT8_C( 107), INT8_C( -69), INT8_C( -96), INT8_C( -20), INT8_C( -18), INT8_C( 42), INT8_C( 98), INT8_C( 104), INT8_C( -70), INT8_C(-121), INT8_C( -91), INT8_C( 77)), simde_mm_set_epi8(INT8_C( 103), INT8_C( -62), INT8_C( 107), INT8_C(-125), INT8_C( -86), INT8_C(-112), INT8_C( -45), INT8_C( 3), INT8_C( -26), INT8_C( 96), INT8_C( 83), INT8_C( 23), INT8_C( 100), INT8_C( 127), INT8_C( -56), INT8_C( -52)), simde_mm_set_epi8(INT8_C( -18), INT8_C( 80), INT8_C( -92), INT8_C( 127), INT8_C( 127), INT8_C( 43), INT8_C( -51), INT8_C( -23), INT8_C( 8), INT8_C( -54), INT8_C( 15), INT8_C( 81), INT8_C(-128), INT8_C(-128), INT8_C( -35), INT8_C( 127)) }, { simde_mm_set_epi8(INT8_C( 63), INT8_C( 16), INT8_C( 100), INT8_C( -10), INT8_C( 78), INT8_C( 116), INT8_C( -91), INT8_C( 21), INT8_C( -10), INT8_C( -27), INT8_C( -92), INT8_C( 31), INT8_C( -23), INT8_C( -53), INT8_C( -1), INT8_C( -1)), simde_mm_set_epi8(INT8_C( 20), INT8_C(-123), INT8_C( 36), INT8_C( -10), INT8_C( 127), INT8_C(-111), INT8_C( -60), INT8_C( 54), INT8_C( 92), INT8_C( 101), INT8_C( -13), INT8_C( -31), INT8_C(-124), INT8_C( 112), INT8_C(-118), INT8_C( -29)), simde_mm_set_epi8(INT8_C( 43), INT8_C( 127), INT8_C( 64), INT8_C( 0), INT8_C( -49), INT8_C( 127), INT8_C( -31), INT8_C( -33), INT8_C(-102), INT8_C(-128), INT8_C( -79), INT8_C( 62), INT8_C( 101), INT8_C(-128), INT8_C( 117), INT8_C( 28)) }, { simde_mm_set_epi8(INT8_C( 1), INT8_C( -28), INT8_C( -45), INT8_C( -32), INT8_C(-103), INT8_C( 27), INT8_C( -38), INT8_C(-127), INT8_C( -89), INT8_C( -74), INT8_C( 47), INT8_C( 91), INT8_C( 46), INT8_C( -24), INT8_C( 60), INT8_C( 23)), simde_mm_set_epi8(INT8_C( -25), INT8_C( -68), INT8_C(-116), INT8_C( 92), INT8_C( 33), INT8_C( -5), INT8_C( -35), INT8_C( -44), INT8_C( -9), INT8_C( -90), INT8_C( 63), INT8_C( 108), INT8_C( 36), INT8_C( 27), INT8_C( 112), INT8_C( -11)), simde_mm_set_epi8(INT8_C( 26), INT8_C( 40), INT8_C( 71), INT8_C(-124), INT8_C(-128), INT8_C( 32), INT8_C( -3), INT8_C( -83), INT8_C( -80), INT8_C( 16), INT8_C( -16), INT8_C( -17), INT8_C( 10), INT8_C( -51), INT8_C( -52), INT8_C( 34)) }, { simde_mm_set_epi8(INT8_C( 29), INT8_C( 123), INT8_C( -8), INT8_C( -35), INT8_C( 3), INT8_C( -97), INT8_C( 124), INT8_C(-121), INT8_C( 52), INT8_C( 75), INT8_C( -93), INT8_C(-127), INT8_C( -78), INT8_C( 87), INT8_C( 102), INT8_C( 119)), simde_mm_set_epi8(INT8_C( 51), INT8_C( -89), INT8_C( -6), INT8_C( 8), INT8_C( -19), INT8_C( -88), INT8_C( 22), INT8_C( 21), INT8_C( -37), INT8_C( -42), INT8_C( -97), INT8_C( 58), INT8_C( 70), INT8_C( -92), INT8_C(-100), INT8_C(-124)), simde_mm_set_epi8(INT8_C( -22), INT8_C( 127), INT8_C( -2), INT8_C( -43), INT8_C( 22), INT8_C( -9), INT8_C( 102), INT8_C(-128), INT8_C( 89), INT8_C( 117), INT8_C( 4), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 127)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_subs_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_subs_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C( 3087), INT16_C(-11046), INT16_C( 16009), INT16_C( -2784), INT16_C( 23836), INT16_C( 341), INT16_C( 25120), INT16_C( 792)), simde_mm_set_epi16(INT16_C(-13938), INT16_C( 11942), INT16_C( 18967), INT16_C(-24666), INT16_C(-12194), INT16_C(-15141), INT16_C( 27868), INT16_C( 7667)), simde_mm_set_epi16(INT16_C( 17025), INT16_C(-22988), INT16_C( -2958), INT16_C( 21882), INT16_C( 32767), INT16_C( 15482), INT16_C( -2748), INT16_C( -6875)) }, { simde_mm_set_epi16(INT16_C( 15944), INT16_C( 21174), INT16_C(-19487), INT16_C( 30166), INT16_C( 9880), INT16_C( 2293), INT16_C( 1544), INT16_C( 6216)), simde_mm_set_epi16(INT16_C(-22637), INT16_C( 27460), INT16_C( 16112), INT16_C(-21899), INT16_C( 28784), INT16_C( -234), INT16_C( -5361), INT16_C( 25377)), simde_mm_set_epi16(INT16_C( 32767), INT16_C( -6286), INT16_C(-32768), INT16_C( 32767), INT16_C(-18904), INT16_C( 2527), INT16_C( 6905), INT16_C(-19161)) }, { simde_mm_set_epi16(INT16_C( 25177), INT16_C( 16000), INT16_C(-30398), INT16_C(-17760), INT16_C( 16727), INT16_C( -4856), INT16_C(-10813), INT16_C( 11418)), simde_mm_set_epi16(INT16_C( 25832), INT16_C(-14964), INT16_C( 17267), INT16_C( -2360), INT16_C( 15960), INT16_C( 12601), INT16_C( 9707), INT16_C( 24108)), simde_mm_set_epi16(INT16_C( -655), INT16_C( 30964), INT16_C(-32768), INT16_C(-15400), INT16_C( 767), INT16_C(-17457), INT16_C(-20520), INT16_C(-12690)) }, { simde_mm_set_epi16(INT16_C(-19601), INT16_C(-21914), INT16_C(-30623), INT16_C( -8160), INT16_C( 24427), INT16_C(-16073), INT16_C( 14239), INT16_C( 20391)), simde_mm_set_epi16(INT16_C(-19582), INT16_C(-27440), INT16_C( -9450), INT16_C(-25104), INT16_C( 11842), INT16_C( 4749), INT16_C( 3094), INT16_C( 19163)), simde_mm_set_epi16(INT16_C( -19), INT16_C( 5526), INT16_C(-21173), INT16_C( 16944), INT16_C( 12585), INT16_C(-20822), INT16_C( 11145), INT16_C( 1228)) }, { simde_mm_set_epi16(INT16_C(-10118), INT16_C( 25388), INT16_C(-18110), INT16_C( -8312), INT16_C( 5249), INT16_C( 27800), INT16_C( 2023), INT16_C( 338)), simde_mm_set_epi16(INT16_C( 14501), INT16_C( 30804), INT16_C( 26885), INT16_C(-32444), INT16_C(-27012), INT16_C(-14925), INT16_C(-31013), INT16_C( 10807)), simde_mm_set_epi16(INT16_C(-24619), INT16_C( -5416), INT16_C(-32768), INT16_C( 24132), INT16_C( 32261), INT16_C( 32767), INT16_C( 32767), INT16_C(-10469)) }, { simde_mm_set_epi16(INT16_C(-17246), INT16_C(-28624), INT16_C( 13423), INT16_C( 27394), INT16_C( 7877), INT16_C(-20368), INT16_C(-24205), INT16_C(-15569)), simde_mm_set_epi16(INT16_C(-21987), INT16_C( -4056), INT16_C( 2917), INT16_C( 23573), INT16_C( -2283), INT16_C( 21821), INT16_C( 32369), INT16_C( 26504)), simde_mm_set_epi16(INT16_C( 4741), INT16_C(-24568), INT16_C( 10506), INT16_C( 3821), INT16_C( 10160), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768)) }, { simde_mm_set_epi16(INT16_C(-10290), INT16_C( 29918), INT16_C(-29258), INT16_C(-28749), INT16_C( 6048), INT16_C(-25677), INT16_C( 24207), INT16_C( 366)), simde_mm_set_epi16(INT16_C( 13339), INT16_C(-11229), INT16_C( 23811), INT16_C( -333), INT16_C(-29847), INT16_C( 21714), INT16_C( 2843), INT16_C( -2618)), simde_mm_set_epi16(INT16_C(-23629), INT16_C( 32767), INT16_C(-32768), INT16_C(-28416), INT16_C( 32767), INT16_C(-32768), INT16_C( 21364), INT16_C( 2984)) }, { simde_mm_set_epi16(INT16_C( 824), INT16_C( 19299), INT16_C(-14246), INT16_C(-19942), INT16_C( 17549), INT16_C( 5220), INT16_C(-11590), INT16_C(-29570)), simde_mm_set_epi16(INT16_C( 30144), INT16_C(-11230), INT16_C(-24828), INT16_C( 29586), INT16_C( 29999), INT16_C( 25519), INT16_C( 5645), INT16_C( 16976)), simde_mm_set_epi16(INT16_C(-29320), INT16_C( 30529), INT16_C( 10582), INT16_C(-32768), INT16_C(-12450), INT16_C(-20299), INT16_C(-17235), INT16_C(-32768)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_subs_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_subs_epu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu8( 29, 76, 238, 61, 229, 243, 175, 238, 75, 27, 166, 154, 166, 157, 121, 248), simde_x_mm_set_epu8(129, 19, 253, 149, 9, 247, 10, 249, 105, 205, 179, 225, 124, 146, 91, 221), simde_x_mm_set_epu8( 0, 57, 0, 0, 220, 0, 165, 0, 0, 0, 0, 0, 42, 11, 30, 27) }, { simde_x_mm_set_epu8(101, 150, 221, 18, 105, 115, 165, 92, 211, 64, 38, 72, 139, 6, 65, 201), simde_x_mm_set_epu8(124, 107, 110, 57, 116, 209, 153, 76, 122, 56, 60, 234, 120, 132, 4, 95), simde_x_mm_set_epu8( 0, 43, 111, 0, 0, 0, 12, 16, 89, 8, 0, 0, 19, 0, 61, 106) }, { simde_x_mm_set_epu8(198, 232, 134, 13, 155, 189, 203, 84, 209, 255, 163, 211, 57, 177, 19, 86), simde_x_mm_set_epu8(205, 92, 216, 169, 196, 192, 93, 101, 208, 230, 232, 36, 70, 151, 125, 72), simde_x_mm_set_epu8( 0, 140, 0, 0, 0, 0, 110, 0, 1, 25, 0, 175, 0, 26, 0, 14) }, { simde_x_mm_set_epu8(150, 141, 253, 10, 218, 100, 243, 17, 87, 99, 224, 222, 198, 181, 26, 41), simde_x_mm_set_epu8(221, 130, 146, 56, 57, 169, 46, 50, 234, 43, 8, 172, 95, 74, 51, 101), simde_x_mm_set_epu8( 0, 11, 107, 0, 161, 0, 197, 0, 0, 56, 216, 50, 103, 107, 0, 0) }, { simde_x_mm_set_epu8( 91, 188, 127, 216, 55, 208, 83, 14, 153, 114, 48, 224, 59, 66, 100, 10), simde_x_mm_set_epu8( 88, 28, 13, 17, 78, 38, 8, 111, 57, 44, 184, 85, 188, 182, 235, 151), simde_x_mm_set_epu8( 3, 160, 114, 199, 0, 170, 75, 0, 96, 70, 0, 139, 0, 0, 0, 0) }, { simde_x_mm_set_epu8(116, 32, 155, 196, 56, 42, 17, 217, 51, 162, 4, 4, 150, 83, 16, 147), simde_x_mm_set_epu8(216, 235, 181, 255, 89, 143, 40, 48, 52, 24, 160, 9, 162, 223, 243, 117), simde_x_mm_set_epu8( 0, 0, 0, 0, 0, 0, 0, 169, 0, 138, 0, 0, 0, 0, 0, 30) }, { simde_x_mm_set_epu8(217, 238, 218, 168, 98, 146, 87, 217, 135, 103, 179, 182, 128, 74, 156, 3), simde_x_mm_set_epu8(157, 0, 179, 231, 176, 37, 226, 198, 145, 138, 239, 164, 0, 170, 52, 61), simde_x_mm_set_epu8( 60, 238, 39, 0, 0, 109, 0, 19, 0, 0, 0, 18, 128, 0, 104, 0) }, { simde_x_mm_set_epu8(181, 83, 160, 141, 77, 119, 160, 171, 112, 95, 47, 88, 0, 90, 237, 18), simde_x_mm_set_epu8(139, 146, 25, 173, 34, 31, 251, 200, 190, 131, 23, 41, 246, 91, 98, 221), simde_x_mm_set_epu8( 42, 0, 135, 0, 43, 88, 0, 0, 0, 0, 24, 47, 0, 0, 139, 0) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_subs_epu8(test_vec[i].a, test_vec[i].b); simde_assert_m128i_u8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_subs_epu16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu16(55440, 59202, 42058, 53369, 32796, 7917, 33818, 17136), simde_x_mm_set_epu16(26104, 52689, 47050, 39249, 59785, 38246, 31610, 10518), simde_x_mm_set_epu16(29336, 6513, 0, 14120, 0, 0, 2208, 6618) }, { simde_x_mm_set_epu16(34216, 34652, 60066, 36214, 4826, 65416, 55052, 33573), simde_x_mm_set_epu16(26443, 15803, 4000, 33420, 50076, 27556, 5522, 41665), simde_x_mm_set_epu16( 7773, 18849, 56066, 2794, 0, 37860, 49530, 0) }, { simde_x_mm_set_epu16(64499, 21603, 35445, 16287, 15728, 23400, 23336, 39270), simde_x_mm_set_epu16(56255, 54924, 45249, 41636, 27152, 13319, 19428, 768), simde_x_mm_set_epu16( 8244, 0, 0, 0, 0, 10081, 3908, 38502) }, { simde_x_mm_set_epu16( 1242, 22793, 21812, 57045, 22651, 26751, 59072, 30159), simde_x_mm_set_epu16(11521, 44413, 36849, 788, 57441, 54148, 2979, 46303), simde_x_mm_set_epu16( 0, 0, 0, 56257, 0, 0, 56093, 0) }, { simde_x_mm_set_epu16(37620, 40488, 64998, 40075, 44204, 34122, 59592, 65445), simde_x_mm_set_epu16(40351, 64891, 27393, 62063, 1981, 56033, 30691, 62006), simde_x_mm_set_epu16( 0, 0, 37605, 0, 42223, 0, 28901, 3439) }, { simde_x_mm_set_epu16(65230, 30209, 16765, 1470, 31101, 49860, 26882, 55440), simde_x_mm_set_epu16(49049, 44537, 10442, 42049, 271, 49034, 11746, 5994), simde_x_mm_set_epu16(16181, 0, 6323, 0, 30830, 826, 15136, 49446) }, { simde_x_mm_set_epu16(37013, 9547, 22144, 27612, 32177, 62691, 50927, 50782), simde_x_mm_set_epu16(18153, 2530, 10375, 48140, 7056, 62459, 20700, 31971), simde_x_mm_set_epu16(18860, 7017, 11769, 0, 25121, 232, 30227, 18811) }, { simde_x_mm_set_epu16( 9831, 28967, 28080, 17470, 59616, 18625, 64250, 31724), simde_x_mm_set_epu16(52094, 35298, 55420, 3659, 42707, 55727, 29250, 17787), simde_x_mm_set_epu16( 0, 0, 0, 13811, 16909, 0, 35000, 13937) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_subs_epu16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_u16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_ucomieq_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; int r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 523.45), SIMDE_FLOAT64_C( -718.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( 39.72), SIMDE_FLOAT64_C( 184.39)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 666.01), SIMDE_FLOAT64_C( -592.10)), simde_mm_set_pd(SIMDE_FLOAT64_C( -592.10), SIMDE_FLOAT64_C( -592.10)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 840.01), SIMDE_FLOAT64_C( -550.36)), simde_mm_set_pd(SIMDE_FLOAT64_C( -550.36), SIMDE_FLOAT64_C( -701.38)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -236.99), SIMDE_FLOAT64_C( 791.25)), simde_mm_set_pd(SIMDE_FLOAT64_C( 791.25), SIMDE_FLOAT64_C( 791.25)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 743.24), SIMDE_FLOAT64_C( 945.47)), simde_mm_set_pd(SIMDE_FLOAT64_C( 945.47), SIMDE_FLOAT64_C( 844.58)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 938.39), SIMDE_FLOAT64_C( -590.62)), simde_mm_set_pd(SIMDE_FLOAT64_C( -590.62), SIMDE_FLOAT64_C( -183.26)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 876.49), SIMDE_FLOAT64_C( 503.26)), simde_mm_set_pd(SIMDE_FLOAT64_C( 503.26), SIMDE_FLOAT64_C( 503.26)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 927.98), SIMDE_FLOAT64_C( -197.60)), simde_mm_set_pd(SIMDE_FLOAT64_C( -197.60), SIMDE_FLOAT64_C( -197.60)), 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_ucomieq_sd(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_ucomige_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; int r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 214.53), SIMDE_FLOAT64_C( 606.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( 814.33), SIMDE_FLOAT64_C( 606.90)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -487.58), SIMDE_FLOAT64_C( 444.56)), simde_mm_set_pd(SIMDE_FLOAT64_C( -781.36), SIMDE_FLOAT64_C( 30.46)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 605.28), SIMDE_FLOAT64_C( -943.32)), simde_mm_set_pd(SIMDE_FLOAT64_C( -943.32), SIMDE_FLOAT64_C( -943.32)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -981.47), SIMDE_FLOAT64_C( 31.75)), simde_mm_set_pd(SIMDE_FLOAT64_C( 31.75), SIMDE_FLOAT64_C( 299.12)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 480.83), SIMDE_FLOAT64_C( 255.57)), simde_mm_set_pd(SIMDE_FLOAT64_C( 946.90), SIMDE_FLOAT64_C( 608.16)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 634.58), SIMDE_FLOAT64_C( 320.38)), simde_mm_set_pd(SIMDE_FLOAT64_C( 320.38), SIMDE_FLOAT64_C( 942.24)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 98.67), SIMDE_FLOAT64_C( 118.05)), simde_mm_set_pd(SIMDE_FLOAT64_C( 118.05), SIMDE_FLOAT64_C( 118.05)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 544.57), SIMDE_FLOAT64_C( 783.14)), simde_mm_set_pd(SIMDE_FLOAT64_C( 636.80), SIMDE_FLOAT64_C( 783.14)), 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_ucomige_sd(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_ucomigt_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; int r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 523.45), SIMDE_FLOAT64_C( -718.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( 39.72), SIMDE_FLOAT64_C( 184.39)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 666.01), SIMDE_FLOAT64_C( -592.10)), simde_mm_set_pd(SIMDE_FLOAT64_C( -592.10), SIMDE_FLOAT64_C( -592.10)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 840.01), SIMDE_FLOAT64_C( -550.36)), simde_mm_set_pd(SIMDE_FLOAT64_C( -550.36), SIMDE_FLOAT64_C( -701.38)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -236.99), SIMDE_FLOAT64_C( 791.25)), simde_mm_set_pd(SIMDE_FLOAT64_C( 791.25), SIMDE_FLOAT64_C( 791.25)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 743.24), SIMDE_FLOAT64_C( 945.47)), simde_mm_set_pd(SIMDE_FLOAT64_C( 945.47), SIMDE_FLOAT64_C( 844.58)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 938.39), SIMDE_FLOAT64_C( -590.62)), simde_mm_set_pd(SIMDE_FLOAT64_C( -590.62), SIMDE_FLOAT64_C( -183.26)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 876.49), SIMDE_FLOAT64_C( 503.26)), simde_mm_set_pd(SIMDE_FLOAT64_C( 503.26), SIMDE_FLOAT64_C( 503.26)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 927.98), SIMDE_FLOAT64_C( -197.60)), simde_mm_set_pd(SIMDE_FLOAT64_C( -197.60), SIMDE_FLOAT64_C( -197.60)), 0 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_ucomigt_sd(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_ucomile_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; int r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 523.45), SIMDE_FLOAT64_C( -718.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( 39.72), SIMDE_FLOAT64_C( 184.39)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 666.01), SIMDE_FLOAT64_C( -592.10)), simde_mm_set_pd(SIMDE_FLOAT64_C( -592.10), SIMDE_FLOAT64_C( -592.10)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 840.01), SIMDE_FLOAT64_C( -550.36)), simde_mm_set_pd(SIMDE_FLOAT64_C( -550.36), SIMDE_FLOAT64_C( -701.38)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -236.99), SIMDE_FLOAT64_C( 791.25)), simde_mm_set_pd(SIMDE_FLOAT64_C( 791.25), SIMDE_FLOAT64_C( 791.25)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 743.24), SIMDE_FLOAT64_C( 945.47)), simde_mm_set_pd(SIMDE_FLOAT64_C( 945.47), SIMDE_FLOAT64_C( 844.58)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 938.39), SIMDE_FLOAT64_C( -590.62)), simde_mm_set_pd(SIMDE_FLOAT64_C( -590.62), SIMDE_FLOAT64_C( -183.26)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 876.49), SIMDE_FLOAT64_C( 503.26)), simde_mm_set_pd(SIMDE_FLOAT64_C( 503.26), SIMDE_FLOAT64_C( 503.26)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 927.98), SIMDE_FLOAT64_C( -197.60)), simde_mm_set_pd(SIMDE_FLOAT64_C( -197.60), SIMDE_FLOAT64_C( -197.60)), 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_ucomile_sd(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_ucomilt_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; int r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 523.45), SIMDE_FLOAT64_C( -718.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( 39.72), SIMDE_FLOAT64_C( 184.39)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 666.01), SIMDE_FLOAT64_C( -592.10)), simde_mm_set_pd(SIMDE_FLOAT64_C( -592.10), SIMDE_FLOAT64_C( -592.10)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 840.01), SIMDE_FLOAT64_C( -550.36)), simde_mm_set_pd(SIMDE_FLOAT64_C( -550.36), SIMDE_FLOAT64_C( -701.38)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -236.99), SIMDE_FLOAT64_C( 791.25)), simde_mm_set_pd(SIMDE_FLOAT64_C( 791.25), SIMDE_FLOAT64_C( 791.25)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 743.24), SIMDE_FLOAT64_C( 945.47)), simde_mm_set_pd(SIMDE_FLOAT64_C( 945.47), SIMDE_FLOAT64_C( 844.58)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 938.39), SIMDE_FLOAT64_C( -590.62)), simde_mm_set_pd(SIMDE_FLOAT64_C( -590.62), SIMDE_FLOAT64_C( -183.26)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 876.49), SIMDE_FLOAT64_C( 503.26)), simde_mm_set_pd(SIMDE_FLOAT64_C( 503.26), SIMDE_FLOAT64_C( 503.26)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 927.98), SIMDE_FLOAT64_C( -197.60)), simde_mm_set_pd(SIMDE_FLOAT64_C( -197.60), SIMDE_FLOAT64_C( -197.60)), 0 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_ucomilt_sd(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_undefined_pd(SIMDE_MUNIT_TEST_ARGS) { simde__m128d z = simde_mm_setzero_pd(); simde__m128d v = simde_mm_undefined_pd(); v = simde_mm_xor_pd(v, v); simde_assert_m128d_equal(v, z); return 0; } static int test_simde_mm_undefined_si128(SIMDE_MUNIT_TEST_ARGS) { simde__m128i z = simde_mm_setzero_si128(); simde__m128i v = simde_mm_undefined_si128(); v = simde_mm_xor_si128(v, v); simde_assert_m128i_equal(v, z); return 0; } static int test_simde_mm_ucomineq_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; int r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 523.45), SIMDE_FLOAT64_C( -718.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( 39.72), SIMDE_FLOAT64_C( 184.39)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 666.01), SIMDE_FLOAT64_C( -592.10)), simde_mm_set_pd(SIMDE_FLOAT64_C( -592.10), SIMDE_FLOAT64_C( -592.10)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 840.01), SIMDE_FLOAT64_C( -550.36)), simde_mm_set_pd(SIMDE_FLOAT64_C( -550.36), SIMDE_FLOAT64_C( -701.38)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -236.99), SIMDE_FLOAT64_C( 791.25)), simde_mm_set_pd(SIMDE_FLOAT64_C( 791.25), SIMDE_FLOAT64_C( 791.25)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 743.24), SIMDE_FLOAT64_C( 945.47)), simde_mm_set_pd(SIMDE_FLOAT64_C( 945.47), SIMDE_FLOAT64_C( 844.58)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 938.39), SIMDE_FLOAT64_C( -590.62)), simde_mm_set_pd(SIMDE_FLOAT64_C( -590.62), SIMDE_FLOAT64_C( -183.26)), 1 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 876.49), SIMDE_FLOAT64_C( 503.26)), simde_mm_set_pd(SIMDE_FLOAT64_C( 503.26), SIMDE_FLOAT64_C( 503.26)), 0 }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 927.98), SIMDE_FLOAT64_C( -197.60)), simde_mm_set_pd(SIMDE_FLOAT64_C( -197.60), SIMDE_FLOAT64_C( -197.60)), 0 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_ucomineq_sd(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_unpackhi_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( -36), INT8_C( 95), INT8_C( 84), INT8_C(-106), INT8_C( 32), INT8_C( 120), INT8_C( 19), INT8_C( -16), INT8_C( 46), INT8_C(-123), INT8_C(-117), INT8_C( 46), INT8_C( 18), INT8_C( -72), INT8_C( -36), INT8_C( 78)), simde_mm_set_epi8(INT8_C( 10), INT8_C(-106), INT8_C( -91), INT8_C( -62), INT8_C( -61), INT8_C( -62), INT8_C( -83), INT8_C( -45), INT8_C( -36), INT8_C( 17), INT8_C( 58), INT8_C(-114), INT8_C( 96), INT8_C(-102), INT8_C( -89), INT8_C( -20)), simde_mm_set_epi8(INT8_C( 10), INT8_C( -36), INT8_C(-106), INT8_C( 95), INT8_C( -91), INT8_C( 84), INT8_C( -62), INT8_C(-106), INT8_C( -61), INT8_C( 32), INT8_C( -62), INT8_C( 120), INT8_C( -83), INT8_C( 19), INT8_C( -45), INT8_C( -16)) }, { simde_mm_set_epi8(INT8_C( -54), INT8_C(-123), INT8_C( 74), INT8_C( 62), INT8_C( 43), INT8_C( 85), INT8_C( -99), INT8_C( -95), INT8_C( -93), INT8_C( -92), INT8_C( 121), INT8_C( 82), INT8_C( 61), INT8_C(-110), INT8_C(-111), INT8_C( -40)), simde_mm_set_epi8(INT8_C( 61), INT8_C( -50), INT8_C( 88), INT8_C( -56), INT8_C( 14), INT8_C( -92), INT8_C(-109), INT8_C( -80), INT8_C( -22), INT8_C( -61), INT8_C(-108), INT8_C( 69), INT8_C( -82), INT8_C( 29), INT8_C( 38), INT8_C( -72)), simde_mm_set_epi8(INT8_C( 61), INT8_C( -54), INT8_C( -50), INT8_C(-123), INT8_C( 88), INT8_C( 74), INT8_C( -56), INT8_C( 62), INT8_C( 14), INT8_C( 43), INT8_C( -92), INT8_C( 85), INT8_C(-109), INT8_C( -99), INT8_C( -80), INT8_C( -95)) }, { simde_mm_set_epi8(INT8_C(-103), INT8_C( -78), INT8_C( -94), INT8_C( -12), INT8_C( -31), INT8_C( -92), INT8_C( -17), INT8_C( 16), INT8_C(-122), INT8_C( 113), INT8_C( -48), INT8_C( -99), INT8_C( 32), INT8_C( -67), INT8_C( 124), INT8_C( 107)), simde_mm_set_epi8(INT8_C( 42), INT8_C( 65), INT8_C( -45), INT8_C( -19), INT8_C( -55), INT8_C( -49), INT8_C( -54), INT8_C( 56), INT8_C( -67), INT8_C( -54), INT8_C(-109), INT8_C( -80), INT8_C( -85), INT8_C( 96), INT8_C( -36), INT8_C( -69)), simde_mm_set_epi8(INT8_C( 42), INT8_C(-103), INT8_C( 65), INT8_C( -78), INT8_C( -45), INT8_C( -94), INT8_C( -19), INT8_C( -12), INT8_C( -55), INT8_C( -31), INT8_C( -49), INT8_C( -92), INT8_C( -54), INT8_C( -17), INT8_C( 56), INT8_C( 16)) }, { simde_mm_set_epi8(INT8_C( -33), INT8_C( -6), INT8_C( -31), INT8_C( -33), INT8_C( -45), INT8_C( -71), INT8_C( 119), INT8_C( 79), INT8_C( 29), INT8_C( 8), INT8_C( -44), INT8_C( -42), INT8_C( 113), INT8_C( -23), INT8_C( 53), INT8_C(-118)), simde_mm_set_epi8(INT8_C( -4), INT8_C( -47), INT8_C( -67), INT8_C( 41), INT8_C( 84), INT8_C( 5), INT8_C( -24), INT8_C( 123), INT8_C( 102), INT8_C( -69), INT8_C( 66), INT8_C( 117), INT8_C(-128), INT8_C( 115), INT8_C( -2), INT8_C( -19)), simde_mm_set_epi8(INT8_C( -4), INT8_C( -33), INT8_C( -47), INT8_C( -6), INT8_C( -67), INT8_C( -31), INT8_C( 41), INT8_C( -33), INT8_C( 84), INT8_C( -45), INT8_C( 5), INT8_C( -71), INT8_C( -24), INT8_C( 119), INT8_C( 123), INT8_C( 79)) }, { simde_mm_set_epi8(INT8_C(-100), INT8_C( -57), INT8_C( -5), INT8_C(-111), INT8_C( 124), INT8_C(-127), INT8_C( -90), INT8_C( -88), INT8_C( 23), INT8_C(-114), INT8_C( -41), INT8_C( -98), INT8_C( 73), INT8_C( 14), INT8_C( 5), INT8_C( 46)), simde_mm_set_epi8(INT8_C( 66), INT8_C(-115), INT8_C( -36), INT8_C( -25), INT8_C( -75), INT8_C(-124), INT8_C( 96), INT8_C( 16), INT8_C( 14), INT8_C( 103), INT8_C( -98), INT8_C(-105), INT8_C( -21), INT8_C( -89), INT8_C( -87), INT8_C( -43)), simde_mm_set_epi8(INT8_C( 66), INT8_C(-100), INT8_C(-115), INT8_C( -57), INT8_C( -36), INT8_C( -5), INT8_C( -25), INT8_C(-111), INT8_C( -75), INT8_C( 124), INT8_C(-124), INT8_C(-127), INT8_C( 96), INT8_C( -90), INT8_C( 16), INT8_C( -88)) }, { simde_mm_set_epi8(INT8_C( -66), INT8_C( -23), INT8_C( -71), INT8_C( 103), INT8_C( 67), INT8_C( -33), INT8_C(-118), INT8_C( -19), INT8_C( 25), INT8_C( -53), INT8_C( 56), INT8_C( 16), INT8_C(-126), INT8_C( 121), INT8_C( 96), INT8_C(-121)), simde_mm_set_epi8(INT8_C( -16), INT8_C( 18), INT8_C( 55), INT8_C(-104), INT8_C(-120), INT8_C( 39), INT8_C( -14), INT8_C( 76), INT8_C( 39), INT8_C( 41), INT8_C( -81), INT8_C( -9), INT8_C( -56), INT8_C(-103), INT8_C( 3), INT8_C( -27)), simde_mm_set_epi8(INT8_C( -16), INT8_C( -66), INT8_C( 18), INT8_C( -23), INT8_C( 55), INT8_C( -71), INT8_C(-104), INT8_C( 103), INT8_C(-120), INT8_C( 67), INT8_C( 39), INT8_C( -33), INT8_C( -14), INT8_C(-118), INT8_C( 76), INT8_C( -19)) }, { simde_mm_set_epi8(INT8_C( 114), INT8_C( -36), INT8_C( 60), INT8_C( -26), INT8_C( 24), INT8_C( -63), INT8_C( -29), INT8_C( 114), INT8_C( 74), INT8_C( -94), INT8_C( 33), INT8_C( -33), INT8_C( 38), INT8_C( 109), INT8_C( 31), INT8_C( -91)), simde_mm_set_epi8(INT8_C( -28), INT8_C( -92), INT8_C( 30), INT8_C(-101), INT8_C( -7), INT8_C( 1), INT8_C(-108), INT8_C( 29), INT8_C( 114), INT8_C( 44), INT8_C( -8), INT8_C(-107), INT8_C( -68), INT8_C( 90), INT8_C( 100), INT8_C( -37)), simde_mm_set_epi8(INT8_C( -28), INT8_C( 114), INT8_C( -92), INT8_C( -36), INT8_C( 30), INT8_C( 60), INT8_C(-101), INT8_C( -26), INT8_C( -7), INT8_C( 24), INT8_C( 1), INT8_C( -63), INT8_C(-108), INT8_C( -29), INT8_C( 29), INT8_C( 114)) }, { simde_mm_set_epi8(INT8_C( 83), INT8_C( -32), INT8_C( -17), INT8_C( -35), INT8_C( 52), INT8_C( -64), INT8_C( 46), INT8_C( 89), INT8_C( -65), INT8_C( -27), INT8_C(-104), INT8_C( 5), INT8_C( 84), INT8_C( 41), INT8_C( 88), INT8_C( 34)), simde_mm_set_epi8(INT8_C( -95), INT8_C( 93), INT8_C(-118), INT8_C( -44), INT8_C( 65), INT8_C( 114), INT8_C( 28), INT8_C( -90), INT8_C( -85), INT8_C( 102), INT8_C( 78), INT8_C( -99), INT8_C(-120), INT8_C( 43), INT8_C( -56), INT8_C( 25)), simde_mm_set_epi8(INT8_C( -95), INT8_C( 83), INT8_C( 93), INT8_C( -32), INT8_C(-118), INT8_C( -17), INT8_C( -44), INT8_C( -35), INT8_C( 65), INT8_C( 52), INT8_C( 114), INT8_C( -64), INT8_C( 28), INT8_C( 46), INT8_C( -90), INT8_C( 89)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_unpackhi_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_unpackhi_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C( 18787), INT16_C( 30957), INT16_C( 6745), INT16_C( 5288), INT16_C(-10333), INT16_C( 29461), INT16_C( 961), INT16_C(-14007)), simde_mm_set_epi16(INT16_C(-29691), INT16_C( 32561), INT16_C(-16442), INT16_C( -4659), INT16_C( 21222), INT16_C(-21527), INT16_C( 30610), INT16_C( 14168)), simde_mm_set_epi16(INT16_C(-29691), INT16_C( 18787), INT16_C( 32561), INT16_C( 30957), INT16_C(-16442), INT16_C( 6745), INT16_C( -4659), INT16_C( 5288)) }, { simde_mm_set_epi16(INT16_C( 14241), INT16_C(-17353), INT16_C( 15871), INT16_C( 3653), INT16_C(-29200), INT16_C( -9979), INT16_C(-30607), INT16_C( 31741)), simde_mm_set_epi16(INT16_C( 16753), INT16_C( 10981), INT16_C( 24190), INT16_C( 25811), INT16_C( 6793), INT16_C( -6051), INT16_C( 1979), INT16_C(-14675)), simde_mm_set_epi16(INT16_C( 16753), INT16_C( 14241), INT16_C( 10981), INT16_C(-17353), INT16_C( 24190), INT16_C( 15871), INT16_C( 25811), INT16_C( 3653)) }, { simde_mm_set_epi16(INT16_C( 24118), INT16_C( -7950), INT16_C( 8813), INT16_C( 23815), INT16_C(-12880), INT16_C( 22441), INT16_C(-31736), INT16_C( 28417)), simde_mm_set_epi16(INT16_C( -2535), INT16_C(-21518), INT16_C( 10955), INT16_C(-16484), INT16_C(-17119), INT16_C( 5667), INT16_C( 5018), INT16_C( -9313)), simde_mm_set_epi16(INT16_C( -2535), INT16_C( 24118), INT16_C(-21518), INT16_C( -7950), INT16_C( 10955), INT16_C( 8813), INT16_C(-16484), INT16_C( 23815)) }, { simde_mm_set_epi16(INT16_C(-15717), INT16_C( 7765), INT16_C(-27156), INT16_C( 26721), INT16_C( -2021), INT16_C( -7166), INT16_C( 832), INT16_C( 3368)), simde_mm_set_epi16(INT16_C(-17604), INT16_C( -2433), INT16_C(-22343), INT16_C( -9047), INT16_C( -8009), INT16_C(-14884), INT16_C(-31015), INT16_C( 9072)), simde_mm_set_epi16(INT16_C(-17604), INT16_C(-15717), INT16_C( -2433), INT16_C( 7765), INT16_C(-22343), INT16_C(-27156), INT16_C( -9047), INT16_C( 26721)) }, { simde_mm_set_epi16(INT16_C( 9613), INT16_C(-25734), INT16_C(-29111), INT16_C( -6271), INT16_C( 28183), INT16_C( 5627), INT16_C( 23471), INT16_C(-31640)), simde_mm_set_epi16(INT16_C( 17448), INT16_C(-17387), INT16_C( 12535), INT16_C( 19499), INT16_C( 11772), INT16_C( 2463), INT16_C( 20494), INT16_C( -6320)), simde_mm_set_epi16(INT16_C( 17448), INT16_C( 9613), INT16_C(-17387), INT16_C(-25734), INT16_C( 12535), INT16_C(-29111), INT16_C( 19499), INT16_C( -6271)) }, { simde_mm_set_epi16(INT16_C(-23597), INT16_C(-19655), INT16_C(-17057), INT16_C( 18059), INT16_C( 9484), INT16_C( 5905), INT16_C( 26068), INT16_C( 7424)), simde_mm_set_epi16(INT16_C(-16983), INT16_C( -3720), INT16_C(-18613), INT16_C( 7615), INT16_C(-29369), INT16_C(-17019), INT16_C( 736), INT16_C( 23842)), simde_mm_set_epi16(INT16_C(-16983), INT16_C(-23597), INT16_C( -3720), INT16_C(-19655), INT16_C(-18613), INT16_C(-17057), INT16_C( 7615), INT16_C( 18059)) }, { simde_mm_set_epi16(INT16_C( 10339), INT16_C( 5875), INT16_C(-28772), INT16_C( 4220), INT16_C( 31801), INT16_C( 29049), INT16_C( 31270), INT16_C(-18878)), simde_mm_set_epi16(INT16_C(-18888), INT16_C( 24242), INT16_C(-31726), INT16_C(-29025), INT16_C( 845), INT16_C( -8031), INT16_C( 4992), INT16_C( -3599)), simde_mm_set_epi16(INT16_C(-18888), INT16_C( 10339), INT16_C( 24242), INT16_C( 5875), INT16_C(-31726), INT16_C(-28772), INT16_C(-29025), INT16_C( 4220)) }, { simde_mm_set_epi16(INT16_C(-14097), INT16_C( 31063), INT16_C(-25063), INT16_C( 16951), INT16_C(-20725), INT16_C( 5387), INT16_C( -3219), INT16_C(-20465)), simde_mm_set_epi16(INT16_C(-23465), INT16_C(-30434), INT16_C( 28479), INT16_C(-15276), INT16_C(-28694), INT16_C( -9228), INT16_C( 22420), INT16_C(-31453)), simde_mm_set_epi16(INT16_C(-23465), INT16_C(-14097), INT16_C(-30434), INT16_C( 31063), INT16_C( 28479), INT16_C(-25063), INT16_C(-15276), INT16_C( 16951)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_unpackhi_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_unpackhi_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 168291084), INT32_C( 803222516), INT32_C(-2059191165), INT32_C( 156619127)), simde_mm_set_epi32(INT32_C( 1247164255), INT32_C(-1585504202), INT32_C( 81979034), INT32_C(-1257437380)), simde_mm_set_epi32(INT32_C( 1247164255), INT32_C( 168291084), INT32_C(-1585504202), INT32_C( 803222516)) }, { simde_mm_set_epi32(INT32_C(-1229392695), INT32_C( -447420261), INT32_C( -26173961), INT32_C( 1549193795)), simde_mm_set_epi32(INT32_C(-1584985518), INT32_C(-1825626458), INT32_C( 1790250510), INT32_C( -280669042)), simde_mm_set_epi32(INT32_C(-1584985518), INT32_C(-1229392695), INT32_C(-1825626458), INT32_C( -447420261)) }, { simde_mm_set_epi32(INT32_C( -648698663), INT32_C( 1485053046), INT32_C(-2125470397), INT32_C( 507664294)), simde_mm_set_epi32(INT32_C( -735759218), INT32_C( -710175418), INT32_C(-1695159870), INT32_C(-1167064304)), simde_mm_set_epi32(INT32_C( -735759218), INT32_C( -648698663), INT32_C( -710175418), INT32_C( 1485053046)) }, { simde_mm_set_epi32(INT32_C( -103259786), INT32_C( -188357300), INT32_C( 452180145), INT32_C(-1396420115)), simde_mm_set_epi32(INT32_C( 1404727965), INT32_C( -804737565), INT32_C(-1054802326), INT32_C( 1642647928)), simde_mm_set_epi32(INT32_C( 1404727965), INT32_C( -103259786), INT32_C( -804737565), INT32_C( -188357300)) }, { simde_mm_set_epi32(INT32_C( 1212827068), INT32_C( 1189440629), INT32_C(-1547155816), INT32_C( 1839063433)), simde_mm_set_epi32(INT32_C( 796540528), INT32_C( -982269468), INT32_C( -40316418), INT32_C( -430354120)), simde_mm_set_epi32(INT32_C( 796540528), INT32_C( 1212827068), INT32_C( -982269468), INT32_C( 1189440629)) }, { simde_mm_set_epi32(INT32_C( 1356454008), INT32_C( -215878264), INT32_C(-1695191474), INT32_C( 378220333)), simde_mm_set_epi32(INT32_C( -864195447), INT32_C(-1443486627), INT32_C(-2133730470), INT32_C( 373467456)), simde_mm_set_epi32(INT32_C( -864195447), INT32_C( 1356454008), INT32_C(-1443486627), INT32_C( -215878264)) }, { simde_mm_set_epi32(INT32_C( 764442598), INT32_C( 1720554406), INT32_C( 1938751418), INT32_C( 1005471402)), simde_mm_set_epi32(INT32_C( 883878116), INT32_C( 255422854), INT32_C( 583152961), INT32_C( -594123403)), simde_mm_set_epi32(INT32_C( 883878116), INT32_C( 764442598), INT32_C( 255422854), INT32_C( 1720554406)) }, { simde_mm_set_epi32(INT32_C( -822423451), INT32_C( -180339328), INT32_C( -689601673), INT32_C(-1524838623)), simde_mm_set_epi32(INT32_C( -665157473), INT32_C(-2141208691), INT32_C(-1935796365), INT32_C( -482464349)), simde_mm_set_epi32(INT32_C( -665157473), INT32_C( -822423451), INT32_C(-2141208691), INT32_C( -180339328)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_unpackhi_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_unpackhi_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi64x(INT64_C( 722804702791611380), INT64_C(-8844158709730520713)), simde_mm_set_epi64x(INT64_C( 5356529690674667574), INT64_C( 352097273025201980)), simde_mm_set_epi64x(INT64_C( 5356529690674667574), INT64_C( 722804702791611380)) }, { simde_mm_set_epi64x(INT64_C(-5280201415118755685), INT64_C( -112416304952585661)), simde_mm_set_epi64x(INT64_C(-6807460961974278490), INT64_C( 7689067396111619214)), simde_mm_set_epi64x(INT64_C(-6807460961974278490), INT64_C(-5280201415118755685)) }, { simde_mm_set_epi64x(INT64_C(-2786139541058872202), INT64_C(-9128825843223472218)), simde_mm_set_epi64x(INT64_C(-3160061775455742650), INT64_C(-7280656200013708528)), simde_mm_set_epi64x(INT64_C(-3160061775455742650), INT64_C(-2786139541058872202)) }, { simde_mm_set_epi64x(INT64_C( -443497399755348660), INT64_C( 1942098937574085101)), simde_mm_set_epi64x(INT64_C( 6033260672941862371), INT64_C(-4530341492272082568)), simde_mm_set_epi64x(INT64_C( 6033260672941862371), INT64_C( -443497399755348660)) }, { simde_mm_set_epi64x(INT64_C( 5209052593953008757), INT64_C(-6644983629697130103)), simde_mm_set_epi64x(INT64_C( 3421115521011270116), INT64_C( -173157692937252552)), simde_mm_set_epi64x(INT64_C( 3421115521011270116), INT64_C( 5209052593953008757)) }, { simde_mm_set_epi64x(INT64_C( 5825925606967211400), INT64_C(-7280791940909813971)), simde_mm_set_epi64x(INT64_C(-3711691179365620643), INT64_C(-9164302586755241664)), simde_mm_set_epi64x(INT64_C(-3711691179365620643), INT64_C( 5825925606967211400)) }, { simde_mm_set_epi64x(INT64_C( 3283255959799829414), INT64_C( 8326873936389097130)), simde_mm_set_epi64x(INT64_C( 3796227602125517190), INT64_C( 2504622899761407349)), simde_mm_set_epi64x(INT64_C( 3796227602125517190), INT64_C( 3283255959799829414)) }, { simde_mm_set_epi64x(INT64_C(-3532281821393830528), INT64_C(-2961816630031757535)), simde_mm_set_epi64x(INT64_C(-2856829591071244403), INT64_C(-8314182075578176093)), simde_mm_set_epi64x(INT64_C(-2856829591071244403), INT64_C(-3532281821393830528)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_unpackhi_epi64(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_unpackhi_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -788.38), SIMDE_FLOAT64_C( -23.22)), simde_mm_set_pd(SIMDE_FLOAT64_C( -996.21), SIMDE_FLOAT64_C( 645.47)), simde_mm_set_pd(SIMDE_FLOAT64_C( -996.21), SIMDE_FLOAT64_C( -788.38)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -986.13), SIMDE_FLOAT64_C( 267.77)), simde_mm_set_pd(SIMDE_FLOAT64_C( 401.03), SIMDE_FLOAT64_C( 978.53)), simde_mm_set_pd(SIMDE_FLOAT64_C( 401.03), SIMDE_FLOAT64_C( -986.13)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -468.32), SIMDE_FLOAT64_C( -478.73)), simde_mm_set_pd(SIMDE_FLOAT64_C( -484.79), SIMDE_FLOAT64_C( -613.68)), simde_mm_set_pd(SIMDE_FLOAT64_C( -484.79), SIMDE_FLOAT64_C( -468.32)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 169.21), SIMDE_FLOAT64_C( 897.06)), simde_mm_set_pd(SIMDE_FLOAT64_C( -872.63), SIMDE_FLOAT64_C( -172.69)), simde_mm_set_pd(SIMDE_FLOAT64_C( -872.63), SIMDE_FLOAT64_C( 169.21)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 499.02), SIMDE_FLOAT64_C( 28.99)), simde_mm_set_pd(SIMDE_FLOAT64_C( 532.77), SIMDE_FLOAT64_C( -718.79)), simde_mm_set_pd(SIMDE_FLOAT64_C( 532.77), SIMDE_FLOAT64_C( 499.02)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 208.34), SIMDE_FLOAT64_C( 635.19)), simde_mm_set_pd(SIMDE_FLOAT64_C( -165.40), SIMDE_FLOAT64_C( 391.08)), simde_mm_set_pd(SIMDE_FLOAT64_C( -165.40), SIMDE_FLOAT64_C( 208.34)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -371.80), SIMDE_FLOAT64_C( 698.49)), simde_mm_set_pd(SIMDE_FLOAT64_C( 603.26), SIMDE_FLOAT64_C( 962.25)), simde_mm_set_pd(SIMDE_FLOAT64_C( 603.26), SIMDE_FLOAT64_C( -371.80)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -939.32), SIMDE_FLOAT64_C( 149.18)), simde_mm_set_pd(SIMDE_FLOAT64_C( 349.36), SIMDE_FLOAT64_C( -60.66)), simde_mm_set_pd(SIMDE_FLOAT64_C( 349.36), SIMDE_FLOAT64_C( -939.32)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_unpackhi_pd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_unpacklo_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( -46), INT8_C( 11), INT8_C( -95), INT8_C( -6), INT8_C(-108), INT8_C(-107), INT8_C( -24), INT8_C( -94), INT8_C( -54), INT8_C( 2), INT8_C( 111), INT8_C( 78), INT8_C( 16), INT8_C( -54), INT8_C( -31), INT8_C( -19)), simde_mm_set_epi8(INT8_C( 40), INT8_C( -29), INT8_C( -79), INT8_C( -49), INT8_C( 12), INT8_C( -63), INT8_C( 87), INT8_C( 55), INT8_C( 121), INT8_C( 100), INT8_C( -21), INT8_C( -2), INT8_C( -22), INT8_C( 29), INT8_C( 110), INT8_C(-110)), simde_mm_set_epi8(INT8_C( 121), INT8_C( -54), INT8_C( 100), INT8_C( 2), INT8_C( -21), INT8_C( 111), INT8_C( -2), INT8_C( 78), INT8_C( -22), INT8_C( 16), INT8_C( 29), INT8_C( -54), INT8_C( 110), INT8_C( -31), INT8_C(-110), INT8_C( -19)) }, { simde_mm_set_epi8(INT8_C( 40), INT8_C( -52), INT8_C( -72), INT8_C( 9), INT8_C( -57), INT8_C( -62), INT8_C(-100), INT8_C( 119), INT8_C( 120), INT8_C( -83), INT8_C( 102), INT8_C( -39), INT8_C( -78), INT8_C( -92), INT8_C( -76), INT8_C( 121)), simde_mm_set_epi8(INT8_C( 7), INT8_C( -69), INT8_C(-112), INT8_C( 84), INT8_C( -8), INT8_C( 23), INT8_C( 71), INT8_C( -37), INT8_C( 104), INT8_C(-121), INT8_C( -93), INT8_C( 99), INT8_C( 47), INT8_C(-114), INT8_C( -52), INT8_C( 101)), simde_mm_set_epi8(INT8_C( 104), INT8_C( 120), INT8_C(-121), INT8_C( -83), INT8_C( -93), INT8_C( 102), INT8_C( 99), INT8_C( -39), INT8_C( 47), INT8_C( -78), INT8_C(-114), INT8_C( -92), INT8_C( -52), INT8_C( -76), INT8_C( 101), INT8_C( 121)) }, { simde_mm_set_epi8(INT8_C( 23), INT8_C( 31), INT8_C( -95), INT8_C( -23), INT8_C( -83), INT8_C( 40), INT8_C( -32), INT8_C( -4), INT8_C( 97), INT8_C( 107), INT8_C(-118), INT8_C( 28), INT8_C( 58), INT8_C( -42), INT8_C( 6), INT8_C( 14)), simde_mm_set_epi8(INT8_C( 87), INT8_C( -63), INT8_C( 17), INT8_C( -66), INT8_C( -73), INT8_C( -52), INT8_C( 21), INT8_C( -51), INT8_C( 77), INT8_C( 127), INT8_C(-123), INT8_C( 35), INT8_C( -87), INT8_C( 10), INT8_C(-116), INT8_C( -15)), simde_mm_set_epi8(INT8_C( 77), INT8_C( 97), INT8_C( 127), INT8_C( 107), INT8_C(-123), INT8_C(-118), INT8_C( 35), INT8_C( 28), INT8_C( -87), INT8_C( 58), INT8_C( 10), INT8_C( -42), INT8_C(-116), INT8_C( 6), INT8_C( -15), INT8_C( 14)) }, { simde_mm_set_epi8(INT8_C( 82), INT8_C( -82), INT8_C( 120), INT8_C(-117), INT8_C( 95), INT8_C( 34), INT8_C( 57), INT8_C(-126), INT8_C( 125), INT8_C( -41), INT8_C( 26), INT8_C( -67), INT8_C( -28), INT8_C( 110), INT8_C( 56), INT8_C( 8)), simde_mm_set_epi8(INT8_C( 43), INT8_C( 84), INT8_C( -22), INT8_C( -23), INT8_C(-118), INT8_C( 101), INT8_C( -61), INT8_C( 0), INT8_C( 102), INT8_C( 10), INT8_C( -14), INT8_C( -26), INT8_C( -16), INT8_C( -9), INT8_C(-102), INT8_C( -6)), simde_mm_set_epi8(INT8_C( 102), INT8_C( 125), INT8_C( 10), INT8_C( -41), INT8_C( -14), INT8_C( 26), INT8_C( -26), INT8_C( -67), INT8_C( -16), INT8_C( -28), INT8_C( -9), INT8_C( 110), INT8_C(-102), INT8_C( 56), INT8_C( -6), INT8_C( 8)) }, { simde_mm_set_epi8(INT8_C( -53), INT8_C( -22), INT8_C( 64), INT8_C( -17), INT8_C( -84), INT8_C(-128), INT8_C(-124), INT8_C( -98), INT8_C( -10), INT8_C( -24), INT8_C( 47), INT8_C( 109), INT8_C( 15), INT8_C( -93), INT8_C( -3), INT8_C( -83)), simde_mm_set_epi8(INT8_C( 102), INT8_C( 24), INT8_C( 10), INT8_C( 77), INT8_C( -47), INT8_C( 121), INT8_C( -9), INT8_C( 31), INT8_C( 5), INT8_C( 32), INT8_C( -40), INT8_C( 72), INT8_C(-114), INT8_C( -28), INT8_C( 76), INT8_C( 98)), simde_mm_set_epi8(INT8_C( 5), INT8_C( -10), INT8_C( 32), INT8_C( -24), INT8_C( -40), INT8_C( 47), INT8_C( 72), INT8_C( 109), INT8_C(-114), INT8_C( 15), INT8_C( -28), INT8_C( -93), INT8_C( 76), INT8_C( -3), INT8_C( 98), INT8_C( -83)) }, { simde_mm_set_epi8(INT8_C( 42), INT8_C(-126), INT8_C( -81), INT8_C( -3), INT8_C( 60), INT8_C( -79), INT8_C( 80), INT8_C( -92), INT8_C( -48), INT8_C( 40), INT8_C(-125), INT8_C( 24), INT8_C( 38), INT8_C( -84), INT8_C( 120), INT8_C( 92)), simde_mm_set_epi8(INT8_C(-118), INT8_C(-121), INT8_C( 29), INT8_C(-128), INT8_C(-101), INT8_C( 4), INT8_C( -66), INT8_C( 29), INT8_C( -3), INT8_C( 82), INT8_C( -7), INT8_C( -87), INT8_C( 76), INT8_C( 52), INT8_C(-124), INT8_C( 86)), simde_mm_set_epi8(INT8_C( -3), INT8_C( -48), INT8_C( 82), INT8_C( 40), INT8_C( -7), INT8_C(-125), INT8_C( -87), INT8_C( 24), INT8_C( 76), INT8_C( 38), INT8_C( 52), INT8_C( -84), INT8_C(-124), INT8_C( 120), INT8_C( 86), INT8_C( 92)) }, { simde_mm_set_epi8(INT8_C(-121), INT8_C( 102), INT8_C( -71), INT8_C(-105), INT8_C(-120), INT8_C( 124), INT8_C( -56), INT8_C( 80), INT8_C( -23), INT8_C( 26), INT8_C(-103), INT8_C( 31), INT8_C( -30), INT8_C( -86), INT8_C( 103), INT8_C( -93)), simde_mm_set_epi8(INT8_C(-114), INT8_C( 9), INT8_C( 28), INT8_C( -23), INT8_C( 125), INT8_C( 28), INT8_C( -55), INT8_C( -13), INT8_C( -41), INT8_C( 123), INT8_C( -52), INT8_C( 49), INT8_C( -94), INT8_C( -66), INT8_C( 69), INT8_C( 75)), simde_mm_set_epi8(INT8_C( -41), INT8_C( -23), INT8_C( 123), INT8_C( 26), INT8_C( -52), INT8_C(-103), INT8_C( 49), INT8_C( 31), INT8_C( -94), INT8_C( -30), INT8_C( -66), INT8_C( -86), INT8_C( 69), INT8_C( 103), INT8_C( 75), INT8_C( -93)) }, { simde_mm_set_epi8(INT8_C( -30), INT8_C( 56), INT8_C( -7), INT8_C( -85), INT8_C( -3), INT8_C( -30), INT8_C( 87), INT8_C( 101), INT8_C(-112), INT8_C( -18), INT8_C( 7), INT8_C( 45), INT8_C( 32), INT8_C( 103), INT8_C( -2), INT8_C( 100)), simde_mm_set_epi8(INT8_C( 75), INT8_C( -55), INT8_C( 1), INT8_C( 61), INT8_C(-126), INT8_C( -76), INT8_C( 61), INT8_C( -69), INT8_C( -86), INT8_C( 110), INT8_C( -52), INT8_C( 110), INT8_C( 96), INT8_C( -55), INT8_C( 76), INT8_C( 15)), simde_mm_set_epi8(INT8_C( -86), INT8_C(-112), INT8_C( 110), INT8_C( -18), INT8_C( -52), INT8_C( 7), INT8_C( 110), INT8_C( 45), INT8_C( 96), INT8_C( 32), INT8_C( -55), INT8_C( 103), INT8_C( 76), INT8_C( -2), INT8_C( 15), INT8_C( 100)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_unpacklo_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_unpacklo_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C(-11765), INT16_C(-24070), INT16_C(-27499), INT16_C( -5982), INT16_C(-13822), INT16_C( 28494), INT16_C( 4298), INT16_C( -7699)), simde_mm_set_epi16(INT16_C( 10467), INT16_C(-20017), INT16_C( 3265), INT16_C( 22327), INT16_C( 31076), INT16_C( -5122), INT16_C( -5603), INT16_C( 28306)), simde_mm_set_epi16(INT16_C( 31076), INT16_C(-13822), INT16_C( -5122), INT16_C( 28494), INT16_C( -5603), INT16_C( 4298), INT16_C( 28306), INT16_C( -7699)) }, { simde_mm_set_epi16(INT16_C( 10444), INT16_C(-18423), INT16_C(-14398), INT16_C(-25481), INT16_C( 30893), INT16_C( 26329), INT16_C(-19804), INT16_C(-19335)), simde_mm_set_epi16(INT16_C( 1979), INT16_C(-28588), INT16_C( -2025), INT16_C( 18395), INT16_C( 26759), INT16_C(-23709), INT16_C( 12174), INT16_C(-13211)), simde_mm_set_epi16(INT16_C( 26759), INT16_C( 30893), INT16_C(-23709), INT16_C( 26329), INT16_C( 12174), INT16_C(-19804), INT16_C(-13211), INT16_C(-19335)) }, { simde_mm_set_epi16(INT16_C( 5919), INT16_C(-24087), INT16_C(-21208), INT16_C( -7940), INT16_C( 24939), INT16_C(-30180), INT16_C( 15062), INT16_C( 1550)), simde_mm_set_epi16(INT16_C( 22465), INT16_C( 4542), INT16_C(-18484), INT16_C( 5581), INT16_C( 19839), INT16_C(-31453), INT16_C(-22262), INT16_C(-29455)), simde_mm_set_epi16(INT16_C( 19839), INT16_C( 24939), INT16_C(-31453), INT16_C(-30180), INT16_C(-22262), INT16_C( 15062), INT16_C(-29455), INT16_C( 1550)) }, { simde_mm_set_epi16(INT16_C( 21166), INT16_C( 30859), INT16_C( 24354), INT16_C( 14722), INT16_C( 32215), INT16_C( 6845), INT16_C( -7058), INT16_C( 14344)), simde_mm_set_epi16(INT16_C( 11092), INT16_C( -5399), INT16_C(-30107), INT16_C(-15616), INT16_C( 26122), INT16_C( -3354), INT16_C( -3849), INT16_C(-25862)), simde_mm_set_epi16(INT16_C( 26122), INT16_C( 32215), INT16_C( -3354), INT16_C( 6845), INT16_C( -3849), INT16_C( -7058), INT16_C(-25862), INT16_C( 14344)) }, { simde_mm_set_epi16(INT16_C(-13334), INT16_C( 16623), INT16_C(-21376), INT16_C(-31586), INT16_C( -2328), INT16_C( 12141), INT16_C( 4003), INT16_C( -595)), simde_mm_set_epi16(INT16_C( 26136), INT16_C( 2637), INT16_C(-11911), INT16_C( -2273), INT16_C( 1312), INT16_C(-10168), INT16_C(-28956), INT16_C( 19554)), simde_mm_set_epi16(INT16_C( 1312), INT16_C( -2328), INT16_C(-10168), INT16_C( 12141), INT16_C(-28956), INT16_C( 4003), INT16_C( 19554), INT16_C( -595)) }, { simde_mm_set_epi16(INT16_C( 10882), INT16_C(-20483), INT16_C( 15537), INT16_C( 20644), INT16_C(-12248), INT16_C(-31976), INT16_C( 9900), INT16_C( 30812)), simde_mm_set_epi16(INT16_C(-30073), INT16_C( 7552), INT16_C(-25852), INT16_C(-16867), INT16_C( -686), INT16_C( -1623), INT16_C( 19508), INT16_C(-31658)), simde_mm_set_epi16(INT16_C( -686), INT16_C(-12248), INT16_C( -1623), INT16_C(-31976), INT16_C( 19508), INT16_C( 9900), INT16_C(-31658), INT16_C( 30812)) }, { simde_mm_set_epi16(INT16_C(-30874), INT16_C(-18025), INT16_C(-30596), INT16_C(-14256), INT16_C( -5862), INT16_C(-26337), INT16_C( -7510), INT16_C( 26531)), simde_mm_set_epi16(INT16_C(-29175), INT16_C( 7401), INT16_C( 32028), INT16_C(-13837), INT16_C(-10373), INT16_C(-13263), INT16_C(-23874), INT16_C( 17739)), simde_mm_set_epi16(INT16_C(-10373), INT16_C( -5862), INT16_C(-13263), INT16_C(-26337), INT16_C(-23874), INT16_C( -7510), INT16_C( 17739), INT16_C( 26531)) }, { simde_mm_set_epi16(INT16_C( -7624), INT16_C( -1621), INT16_C( -542), INT16_C( 22373), INT16_C(-28434), INT16_C( 1837), INT16_C( 8295), INT16_C( -412)), simde_mm_set_epi16(INT16_C( 19401), INT16_C( 317), INT16_C(-32076), INT16_C( 15803), INT16_C(-21906), INT16_C(-13202), INT16_C( 24777), INT16_C( 19471)), simde_mm_set_epi16(INT16_C(-21906), INT16_C(-28434), INT16_C(-13202), INT16_C( 1837), INT16_C( 24777), INT16_C( 8295), INT16_C( 19471), INT16_C( -412)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_unpacklo_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_unpacklo_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( -770989574), INT32_C(-1802114910), INT32_C( -905810098), INT32_C( 281731565)), simde_mm_set_epi32(INT32_C( 686010831), INT32_C( 213997367), INT32_C( 2036657150), INT32_C( -367169902)), simde_mm_set_epi32(INT32_C( 2036657150), INT32_C( -905810098), INT32_C( -367169902), INT32_C( 281731565)) }, { simde_mm_set_epi32(INT32_C( 684505097), INT32_C( -943547273), INT32_C( 2024629977), INT32_C(-1297828743)), simde_mm_set_epi32(INT32_C( 129732692), INT32_C( -132692005), INT32_C( 1753719651), INT32_C( 797887589)), simde_mm_set_epi32(INT32_C( 1753719651), INT32_C( 2024629977), INT32_C( 797887589), INT32_C(-1297828743)) }, { simde_mm_set_epi32(INT32_C( 387949033), INT32_C(-1389829892), INT32_C( 1634437660), INT32_C( 987104782)), simde_mm_set_epi32(INT32_C( 1472270782), INT32_C(-1211361843), INT32_C( 1300202787), INT32_C(-1458926351)), simde_mm_set_epi32(INT32_C( 1300202787), INT32_C( 1634437660), INT32_C(-1458926351), INT32_C( 987104782)) }, { simde_mm_set_epi32(INT32_C( 1387165835), INT32_C( 1596078466), INT32_C( 2111249085), INT32_C( -462538744)), simde_mm_set_epi32(INT32_C( 726985449), INT32_C(-1973042432), INT32_C( 1711993574), INT32_C( -252208390)), simde_mm_set_epi32(INT32_C( 1711993574), INT32_C( 2111249085), INT32_C( -252208390), INT32_C( -462538744)) }, { simde_mm_set_epi32(INT32_C( -873840401), INT32_C(-1400863586), INT32_C( -152555667), INT32_C( 262405549)), simde_mm_set_epi32(INT32_C( 1712851533), INT32_C( -780536033), INT32_C( 86038600), INT32_C(-1897640862)), simde_mm_set_epi32(INT32_C( 86038600), INT32_C( -152555667), INT32_C(-1897640862), INT32_C( 262405549)) }, { simde_mm_set_epi32(INT32_C( 713207805), INT32_C( 1018253476), INT32_C( -802651368), INT32_C( 648837212)), simde_mm_set_epi32(INT32_C(-1970856576), INT32_C(-1694188003), INT32_C( -44893783), INT32_C( 1278510166)), simde_mm_set_epi32(INT32_C( -44893783), INT32_C( -802651368), INT32_C( 1278510166), INT32_C( 648837212)) }, { simde_mm_set_epi32(INT32_C(-2023310953), INT32_C(-2005088176), INT32_C( -384132833), INT32_C( -492148829)), simde_mm_set_epi32(INT32_C(-1912005399), INT32_C( 2099038707), INT32_C( -679752655), INT32_C(-1564588725)), simde_mm_set_epi32(INT32_C( -679752655), INT32_C( -384132833), INT32_C(-1564588725), INT32_C( -492148829)) }, { simde_mm_set_epi32(INT32_C( -499582549), INT32_C( -35498139), INT32_C(-1863448787), INT32_C( 543686244)), simde_mm_set_epi32(INT32_C( 1271464253), INT32_C(-2102116933), INT32_C(-1435579282), INT32_C( 1623804943)), simde_mm_set_epi32(INT32_C(-1435579282), INT32_C(-1863448787), INT32_C( 1623804943), INT32_C( 543686244)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_unpacklo_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_unpacklo_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi64x(INT64_C(-3311375003394119518), INT64_C(-3890424747014823443)), simde_mm_set_epi64x(INT64_C( 2946394084060780343), INT64_C( 8747375856342363794)), simde_mm_set_epi64x(INT64_C( 8747375856342363794), INT64_C(-3890424747014823443)) }, { simde_mm_set_epi64x(INT64_C( 2939927008911727735), INT64_C( 8695719540713370745)), simde_mm_set_epi64x(INT64_C( 557197673524316123), INT64_C( 7532168548195421285)), simde_mm_set_epi64x(INT64_C( 7532168548195421285), INT64_C( 8695719540713370745)) }, { simde_mm_set_epi64x(INT64_C( 1666228412154962172), INT64_C( 7019856298037872142)), simde_mm_set_epi64x(INT64_C( 6323354862629950925), INT64_C( 5584328451169094897)), simde_mm_set_epi64x(INT64_C( 5584328451169094897), INT64_C( 7019856298037872142)) }, { simde_mm_set_epi64x(INT64_C( 5957831897049610626), INT64_C( 9067745777617352712)), simde_mm_set_epi64x(INT64_C( 3122378730444800768), INT64_C( 7352956415334914810)), simde_mm_set_epi64x(INT64_C( 7352956415334914810), INT64_C( 9067745777617352712)) }, { simde_mm_set_epi64x(INT64_C(-3753115941324421986), INT64_C( -655221600322060883)), simde_mm_set_epi64x(INT64_C( 7356641320652896031), INT64_C( 369532975590952034)), simde_mm_set_epi64x(INT64_C( 369532975590952034), INT64_C( -655221600322060883)) }, { simde_mm_set_epi64x(INT64_C( 3063204198745198756), INT64_C(-3447361375000823716)), simde_mm_set_epi64x(INT64_C(-8464764536425759203), INT64_C( -192817328500210602)), simde_mm_set_epi64x(INT64_C( -192817328500210602), INT64_C(-3447361375000823716)) }, { simde_mm_set_epi64x(INT64_C(-8690054370483713968), INT64_C(-1649837951252011101)), simde_mm_set_epi64x(INT64_C(-8212000656381392397), INT64_C(-2919515419863792309)), simde_mm_set_epi64x(INT64_C(-2919515419863792309), INT64_C(-1649837951252011101)) }, { simde_mm_set_epi64x(INT64_C(-2145690705347848347), INT64_C(-8003451597392183708)), simde_mm_set_epi64x(INT64_C( 5460897386860920251), INT64_C(-6165766065381356529)), simde_mm_set_epi64x(INT64_C(-6165766065381356529), INT64_C(-8003451597392183708)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_unpacklo_epi64(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_unpacklo_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 160.82), SIMDE_FLOAT64_C( -868.81)), simde_mm_set_pd(SIMDE_FLOAT64_C( 640.98), SIMDE_FLOAT64_C( 578.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( 578.20), SIMDE_FLOAT64_C( -868.81)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -900.35), SIMDE_FLOAT64_C( 829.02)), simde_mm_set_pd(SIMDE_FLOAT64_C( -680.55), SIMDE_FLOAT64_C( -51.61)), simde_mm_set_pd(SIMDE_FLOAT64_C( -51.61), SIMDE_FLOAT64_C( 829.02)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 560.63), SIMDE_FLOAT64_C( 395.65)), simde_mm_set_pd(SIMDE_FLOAT64_C( -681.25), SIMDE_FLOAT64_C( -57.21)), simde_mm_set_pd(SIMDE_FLOAT64_C( -57.21), SIMDE_FLOAT64_C( 395.65)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 938.21), SIMDE_FLOAT64_C( -628.45)), simde_mm_set_pd(SIMDE_FLOAT64_C( -939.59), SIMDE_FLOAT64_C( -183.36)), simde_mm_set_pd(SIMDE_FLOAT64_C( -183.36), SIMDE_FLOAT64_C( -628.45)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 352.81), SIMDE_FLOAT64_C( -540.34)), simde_mm_set_pd(SIMDE_FLOAT64_C( -819.35), SIMDE_FLOAT64_C( -238.91)), simde_mm_set_pd(SIMDE_FLOAT64_C( -238.91), SIMDE_FLOAT64_C( -540.34)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 435.92), SIMDE_FLOAT64_C( 320.63)), simde_mm_set_pd(SIMDE_FLOAT64_C( -314.42), SIMDE_FLOAT64_C( -394.55)), simde_mm_set_pd(SIMDE_FLOAT64_C( -394.55), SIMDE_FLOAT64_C( 320.63)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -256.77), SIMDE_FLOAT64_C( 784.61)), simde_mm_set_pd(SIMDE_FLOAT64_C( -354.05), SIMDE_FLOAT64_C( -16.87)), simde_mm_set_pd(SIMDE_FLOAT64_C( -16.87), SIMDE_FLOAT64_C( 784.61)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 81.23), SIMDE_FLOAT64_C( 882.56)), simde_mm_set_pd(SIMDE_FLOAT64_C( -661.47), SIMDE_FLOAT64_C( -202.79)), simde_mm_set_pd(SIMDE_FLOAT64_C( -202.79), SIMDE_FLOAT64_C( 882.56)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_unpacklo_pd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_xor_pd(SIMDE_MUNIT_TEST_ARGS) { simde__m128d all_set = simde_x_mm_setone_pd(), all_unset = simde_mm_setzero_pd(); simde_assert_m128d_equal(simde_mm_xor_pd(all_set, all_unset), all_set); simde_assert_m128d_equal(simde_mm_xor_pd(all_set, all_set), all_unset); simde_assert_m128d_equal(simde_mm_xor_pd(all_unset, all_unset), all_unset); return 0; } static int test_simde_mm_xor_si128(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 1623880239), INT32_C( 1318620160), INT32_C(-1283662193), INT32_C(-1453845482)), simde_mm_set_epi32(INT32_C(-1675083604), INT32_C( 603168286), INT32_C( 409798099), INT32_C( 632966287)), simde_mm_set_epi32(INT32_C( -52284797), INT32_C( 1835733534), INT32_C(-1424893092), INT32_C(-1931323239)) }, { simde_mm_set_epi32(INT32_C( 1509092554), INT32_C( 1648495442), INT32_C(-1486316171), INT32_C( 868417203)), simde_mm_set_epi32(INT32_C( 1183220554), INT32_C(-1650741405), INT32_C(-1277877547), INT32_C( -793058853)), simde_mm_set_epi32(INT32_C( 527724416), INT32_C( -2513871), INT32_C( 347979680), INT32_C( -478666904)) }, { simde_mm_set_epi32(INT32_C( 373711788), INT32_C(-1451210820), INT32_C( 1218370771), INT32_C( 1535794325)), simde_mm_set_epi32(INT32_C( -155546503), INT32_C(-2037105503), INT32_C( 1041195962), INT32_C(-1654529737)), simde_mm_set_epi32(INT32_C( -520294443), INT32_C( 789871389), INT32_C( 1989263209), INT32_C( -957629022)) }, { simde_mm_set_epi32(INT32_C(-1223418601), INT32_C( 332961755), INT32_C( 688173092), INT32_C( 352304516)), simde_mm_set_epi32(INT32_C( -734452212), INT32_C( -791801405), INT32_C( 114386244), INT32_C( 996038140)), simde_mm_set_epi32(INT32_C( 1663908635), INT32_C(-1021934056), INT32_C( 802542944), INT32_C( 799139960)) }, { simde_mm_set_epi32(INT32_C( 1204298996), INT32_C( 1777561493), INT32_C( 531158614), INT32_C(-1345218351)), simde_mm_set_epi32(INT32_C( 465699923), INT32_C(-1417149028), INT32_C(-1963684061), INT32_C( -837148929)), simde_mm_set_epi32(INT32_C( 1544167591), INT32_C(-1032099319), INT32_C(-1789109899), INT32_C( 1640728110)) }, { simde_mm_set_epi32(INT32_C( 1401162168), INT32_C( -922039657), INT32_C( 1348044504), INT32_C( 1592606181)), simde_mm_set_epi32(INT32_C(-1635510345), INT32_C(-1462861610), INT32_C(-1206905626), INT32_C( -326154944)), simde_mm_set_epi32(INT32_C( -855630321), INT32_C( 1640254017), INT32_C( -397831618), INT32_C(-1302169435)) }, { simde_mm_set_epi32(INT32_C( 882266138), INT32_C( 2140233068), INT32_C( -978476725), INT32_C( -962797184)), simde_mm_set_epi32(INT32_C( 1476434174), INT32_C( 732384170), INT32_C( 406886944), INT32_C( 1700501859)), simde_mm_set_epi32(INT32_C( 1821821156), INT32_C( 1412830918), INT32_C( -571655317), INT32_C(-1547208477)) }, { simde_mm_set_epi32(INT32_C( 782585313), INT32_C( 1758933973), INT32_C(-1583302414), INT32_C(-1602193751)), simde_mm_set_epi32(INT32_C( 760188951), INT32_C( 624290102), INT32_C( 378021852), INT32_C(-1714147587)), simde_mm_set_epi32(INT32_C( 65723894), INT32_C( 1306712803), INT32_C(-1222074578), INT32_C( 961828948)) } }; for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) { simde__m128i r = simde_mm_xor_si128(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_x_mm_not_si128(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( -817965525), INT32_C( 2140859656), INT32_C( 142941694), INT32_C(-1061432158)), simde_mm_set_epi32(INT32_C( 817965524), INT32_C(-2140859657), INT32_C( -142941695), INT32_C( 1061432157)) }, { simde_mm_set_epi32(INT32_C( 1656377120), INT32_C( 1182756765), INT32_C( 499148047), INT32_C( 1939837842)), simde_mm_set_epi32(INT32_C(-1656377121), INT32_C(-1182756766), INT32_C( -499148048), INT32_C(-1939837843)) }, { simde_mm_set_epi32(INT32_C(-1391390683), INT32_C( -880299242), INT32_C( 1262346433), INT32_C(-1162276292)), simde_mm_set_epi32(INT32_C( 1391390682), INT32_C( 880299241), INT32_C(-1262346434), INT32_C( 1162276291)) }, { simde_mm_set_epi32(INT32_C( 402553699), INT32_C(-1406117325), INT32_C(-1620159472), INT32_C( 1950201834)), simde_mm_set_epi32(INT32_C( -402553700), INT32_C( 1406117324), INT32_C( 1620159471), INT32_C(-1950201835)) }, { simde_mm_set_epi32(INT32_C( 1201512664), INT32_C( -722158977), INT32_C(-1427673018), INT32_C(-1348620069)), simde_mm_set_epi32(INT32_C(-1201512665), INT32_C( 722158976), INT32_C( 1427673017), INT32_C( 1348620068)) }, { simde_mm_set_epi32(INT32_C( 2022239253), INT32_C( 336656978), INT32_C(-2043097029), INT32_C( 2060912582)), simde_mm_set_epi32(INT32_C(-2022239254), INT32_C( -336656979), INT32_C( 2043097028), INT32_C(-2060912583)) }, { simde_mm_set_epi32(INT32_C(-1767401405), INT32_C( 988173440), INT32_C( 653493949), INT32_C( 1545873213)), simde_mm_set_epi32(INT32_C( 1767401404), INT32_C( -988173441), INT32_C( -653493950), INT32_C(-1545873214)) }, { simde_mm_set_epi32(INT32_C( 164259681), INT32_C( 1625402133), INT32_C( 274817939), INT32_C( 1382941610)), simde_mm_set_epi32(INT32_C( -164259682), INT32_C(-1625402134), INT32_C( -274817940), INT32_C(-1382941611)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_x_mm_not_si128(test_vec[i].a); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_x_mm_mul_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi64x(INT64_C(-6673265146157132667), INT64_C(-8036865195274064518)), simde_mm_set_epi64x(INT64_C( 4763310881806863840), INT64_C(-2953190602401137090)), simde_mm_set_epi64x(INT64_C( 804621865193403744), INT64_C( 7037306546512957324)) }, { simde_mm_set_epi64x(INT64_C( 4912321112367014754), INT64_C( 5506077972841640415)), simde_mm_set_epi64x(INT64_C( 5790159379234202843), INT64_C(-7860297575342104977)), simde_mm_set_epi64x(INT64_C(-6503632121046397738), INT64_C(-1366099594229104207)) }, { simde_mm_set_epi64x(INT64_C( 2749162021411530208), INT64_C( 408462426494202626)), simde_mm_set_epi64x(INT64_C( 8447492608754880299), INT64_C(-7046703966410124624)), simde_mm_set_epi64x(INT64_C(-4973831282761794400), INT64_C( 2599589224149726560)) }, { simde_mm_set_epi64x(INT64_C( -88834185851708236), INT64_C(-8089393205327952234)), simde_mm_set_epi64x(INT64_C( 381269932343520540), INT64_C( 2138325983301945876)), simde_mm_set_epi64x(INT64_C(-7088569628310845520), INT64_C( 1233235991476166584)) }, { simde_mm_set_epi64x(INT64_C( -822706701071313394), INT64_C(-2759012498076821456)), simde_mm_set_epi64x(INT64_C( 3465917358098376677), INT64_C(-7954598628423398790)), simde_mm_set_epi64x(INT64_C(-6698232051336684410), INT64_C(-6956668788971772192)) }, { simde_mm_set_epi64x(INT64_C( 8188114688325369058), INT64_C(-5073366312523094897)), simde_mm_set_epi64x(INT64_C(-8915693716470801407), INT64_C( 9186903668894606147)), simde_mm_set_epi64x(INT64_C( 3677373050832155874), INT64_C(-2924803137816977811)) }, { simde_mm_set_epi64x(INT64_C(-5966336380315033651), INT64_C( 8263120995643775133)), simde_mm_set_epi64x(INT64_C(-4262947749795433008), INT64_C(-8185205248719856231)), simde_mm_set_epi64x(INT64_C(-5369329972927887472), INT64_C(-4868166633591505195)) }, { simde_mm_set_epi64x(INT64_C( 2800078338557512603), INT64_C(-7382248080413965284)), simde_mm_set_epi64x(INT64_C( -645055313537887494), INT64_C( 2018860835012845242)), simde_mm_set_epi64x(INT64_C( 3796538949364005726), INT64_C(-1962708987484978088)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_x_mm_mul_epi64(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_x_mm_sub_epu32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu32(UINT32_C( 591915169), UINT32_C(1162556909), UINT32_C(2711661198), UINT32_C( 649386420)), simde_x_mm_set_epu32(UINT32_C(2963858433), UINT32_C( 387638488), UINT32_C(4276033779), UINT32_C(2372843734)), simde_x_mm_set_epu32(UINT32_C(1923024032), UINT32_C( 774918421), UINT32_C(2730594715), UINT32_C(2571509982)) }, { simde_x_mm_set_epu32(UINT32_C( 445936307), UINT32_C(1465838226), UINT32_C(3055798709), UINT32_C(2785403539)), simde_x_mm_set_epu32(UINT32_C(2376292101), UINT32_C(2800453656), UINT32_C(2012288479), UINT32_C(1067663469)), simde_x_mm_set_epu32(UINT32_C(2364611502), UINT32_C(2960351866), UINT32_C(1043510230), UINT32_C(1717740070)) }, { simde_x_mm_set_epu32(UINT32_C( 766825118), UINT32_C(3689178364), UINT32_C(1309713860), UINT32_C(1635279642)), simde_x_mm_set_epu32(UINT32_C(1287494965), UINT32_C(3931214929), UINT32_C( 130800549), UINT32_C(1579059128)), simde_x_mm_set_epu32(UINT32_C(3774297449), UINT32_C(4052930731), UINT32_C(1178913311), UINT32_C( 56220514)) }, { simde_x_mm_set_epu32(UINT32_C(1521150506), UINT32_C( 229274390), UINT32_C(2137370048), UINT32_C(1343959137)), simde_x_mm_set_epu32(UINT32_C( 919906837), UINT32_C(4230649021), UINT32_C(2105941239), UINT32_C(3460244161)), simde_x_mm_set_epu32(UINT32_C( 601243669), UINT32_C( 293592665), UINT32_C( 31428809), UINT32_C(2178682272)) }, { simde_x_mm_set_epu32(UINT32_C(1275529272), UINT32_C(2231818861), UINT32_C(2063802469), UINT32_C(3732401863)), simde_x_mm_set_epu32(UINT32_C(2896374047), UINT32_C(1493829257), UINT32_C(2939390855), UINT32_C(1941911553)), simde_x_mm_set_epu32(UINT32_C(2674122521), UINT32_C( 737989604), UINT32_C(3419378910), UINT32_C(1790490310)) }, { simde_x_mm_set_epu32(UINT32_C(3017205359), UINT32_C(2429422013), UINT32_C(3351841835), UINT32_C(2341203472)), simde_x_mm_set_epu32(UINT32_C(3000898366), UINT32_C(1136654732), UINT32_C(2535059098), UINT32_C( 90134778)), simde_x_mm_set_epu32(UINT32_C( 16306993), UINT32_C(1292767281), UINT32_C( 816782737), UINT32_C(2251068694)) }, { simde_x_mm_set_epu32(UINT32_C( 71842021), UINT32_C(1910901245), UINT32_C( 252676465), UINT32_C(3861146107)), simde_x_mm_set_epu32(UINT32_C(4061170475), UINT32_C(3890236125), UINT32_C(1645686841), UINT32_C(3708385897)), simde_x_mm_set_epu32(UINT32_C( 305638842), UINT32_C(2315632416), UINT32_C(2901956920), UINT32_C( 152760210)) }, { simde_x_mm_set_epu32(UINT32_C(1390785465), UINT32_C( 237201350), UINT32_C(3330556421), UINT32_C( 382557765)), simde_x_mm_set_epu32(UINT32_C( 919261037), UINT32_C(4138415457), UINT32_C( 812238579), UINT32_C( 103076353)), simde_x_mm_set_epu32(UINT32_C( 471524428), UINT32_C( 393753189), UINT32_C(2518317842), UINT32_C( 279481412)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_x_mm_sub_epu32(test_vec[i].a, test_vec[i].b); simde_assert_m128i_u32(r, ==, test_vec[i].r); } return 0; } static int test_simde_x_mm_mod_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi64x(INT64_C(-8053187774363015445), INT64_C( 9050551738356525681)), simde_mm_set_epi64x(INT64_C(-5432362900125533563), INT64_C( 4656333831414330662)), simde_mm_set_epi64x(INT64_C(-2620824874237481882), INT64_C( 4394217906942195019)) }, { simde_mm_set_epi64x(INT64_C( 3643434954039553447), INT64_C(-6234539097175065740)), simde_mm_set_epi64x(INT64_C(-1834126128625936904), INT64_C( 5974848154734978575)), simde_mm_set_epi64x(INT64_C( 1809308825413616543), INT64_C( -259690942440087165)) }, { simde_mm_set_epi64x(INT64_C( 9161306297850640165), INT64_C(-8306180370740150176)), simde_mm_set_epi64x(INT64_C( 2055562205091916701), INT64_C(-6680168448646461201)), simde_mm_set_epi64x(INT64_C( 939057477482973361), INT64_C(-1626011922093688975)) }, { simde_mm_set_epi64x(INT64_C(-8267679289606370918), INT64_C(-5928191487249150522)), simde_mm_set_epi64x(INT64_C( -367540592738432621), INT64_C(-9016984680455221058)), simde_mm_set_epi64x(INT64_C( -181786249360853256), INT64_C(-5928191487249150522)) }, { simde_mm_set_epi64x(INT64_C(-5911217161035399691), INT64_C(-1038656028139092449)), simde_mm_set_epi64x(INT64_C( 170272479168034452), INT64_C( 6507756447489319344)), simde_mm_set_epi64x(INT64_C( -121952869322228323), INT64_C(-1038656028139092449)) }, { simde_mm_set_epi64x(INT64_C(-8644627274378588029), INT64_C( 5613017538463476646)), simde_mm_set_epi64x(INT64_C(-8247421513208151154), INT64_C(-1150990985458942599)), simde_mm_set_epi64x(INT64_C( -397205761170436875), INT64_C( 1009053596627706250)) }, { simde_mm_set_epi64x(INT64_C( 8688276933216716257), INT64_C( -409477294924409172)), simde_mm_set_epi64x(INT64_C( 7651480072460119172), INT64_C( 5980691967331237074)), simde_mm_set_epi64x(INT64_C( 1036796860756597085), INT64_C( -409477294924409172)) }, { simde_mm_set_epi64x(INT64_C(-6308927419868714376), INT64_C( 6327163388033237975)), simde_mm_set_epi64x(INT64_C( 4310605020200368092), INT64_C( 1934689183910316990)), simde_mm_set_epi64x(INT64_C(-1998322399668346284), INT64_C( 523095836302287005)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_x_mm_mod_epi64(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm_set1_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_pd1) SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_epi64x) SIMDE_TEST_FUNC_LIST_ENTRY(mm_set1_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_set1_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_set1_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_set1_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_set1_epi64x) SIMDE_TEST_FUNC_LIST_ENTRY(mm_setr_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_setr_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_setr_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_setr_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_setr_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_setzero_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_setzero_si128) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_abs_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_add_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_add_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_add_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_add_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_add_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_add_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_add_si64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_adds_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_adds_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_adds_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_adds_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_and_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_and_si128) SIMDE_TEST_FUNC_LIST_ENTRY(mm_andnot_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_andnot_si128) SIMDE_TEST_FUNC_LIST_ENTRY(mm_avg_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_avg_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_bslli_si128) SIMDE_TEST_FUNC_LIST_ENTRY(mm_bsrli_si128) SIMDE_TEST_FUNC_LIST_ENTRY(mm_slli_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_slli_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_slli_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_srli_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_srli_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_srli_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sra_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sra_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_srai_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_srai_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_store_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_store_pd1) SIMDE_TEST_FUNC_LIST_ENTRY(mm_store_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_store_si128) SIMDE_TEST_FUNC_LIST_ENTRY(mm_storeh_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_storel_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_storel_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_storer_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_storeu_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_storeu_si128) SIMDE_TEST_FUNC_LIST_ENTRY(mm_storeu_si16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_storeu_si32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_storeu_si64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_store1_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_stream_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_stream_si128) SIMDE_TEST_FUNC_LIST_ENTRY(mm_stream_si32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_stream_si64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sub_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sub_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sub_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sub_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sub_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sub_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sub_si64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_subs_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_subs_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_subs_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_subs_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_min_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_min_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_min_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_min_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_max_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_max_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_max_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_max_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mul_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mul_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mul_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mul_su32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mulhi_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mulhi_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mullo_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpeq_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpeq_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpeq_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpeq_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpeq_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpneq_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpneq_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmplt_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmplt_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmplt_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmplt_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmplt_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpnlt_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpnlt_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmple_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmple_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpnle_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpnle_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpgt_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpgt_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpgt_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpgt_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpgt_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpngt_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpngt_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpge_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpge_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpnge_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpnge_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpord_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpord_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpunord_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpunord_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_castpd_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_castps_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_castsi128_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_castsi128_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comieq_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comige_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comigt_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comile_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comilt_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comineq_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_ucomieq_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_ucomige_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_ucomigt_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_ucomile_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_ucomilt_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_ucomineq_sd) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_copysign_pd) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_xorsign_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtepi32_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtepi32_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtpd_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtpd_pi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtpd_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtpi32_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtps_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtps_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsd_f64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsd_si32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsd_si64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsd_ss) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_cvtsi128_si16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsi128_si32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsi128_si64) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_cvtsi16_si128) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsi32_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsi32_si128) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsi64_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsi64_si128) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtss_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvttpd_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvttpd_pi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvttps_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvttsd_si32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvttsd_si64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_div_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_div_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_extract_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_insert_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_load_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_load_pd1) SIMDE_TEST_FUNC_LIST_ENTRY(mm_load_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_load_si128) SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadh_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadl_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadl_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadr_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadu_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadu_si128) SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadu_si16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadu_si32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadu_si64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_movemask_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_movemask_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskmoveu_si128) SIMDE_TEST_FUNC_LIST_ENTRY(mm_move_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_move_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_movepi64_pi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_movpi64_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_or_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_or_si128) SIMDE_TEST_FUNC_LIST_ENTRY(mm_packs_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_packs_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_packus_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_undefined_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_undefined_si128) SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpackhi_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpackhi_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpackhi_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpackhi_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpackhi_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpacklo_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpacklo_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpacklo_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpacklo_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpacklo_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_shuffle_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_shuffle_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_shufflehi_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_shufflelo_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sll_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sll_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sll_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_srl_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_srl_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_srl_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sqrt_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sqrt_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_madd_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sad_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_xor_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_xor_si128) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_not_si128) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_sub_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_mul_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_mod_epi64) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/sse3.c000066400000000000000000001364011400333146700152540ustar00rootroot00000000000000/* Copyright (c) 2017 Evan Nemerson * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define SIMDE_TESTS_CURRENT_ISAX sse3 #include #include static int test_simde_x_mm_deinterleaveeven_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[8]; const int16_t b[8]; const int16_t r[8]; } test_vec[] = { { { -INT16_C( 21262), -INT16_C( 27496), INT16_C( 1829), INT16_C( 1406), INT16_C( 23866), -INT16_C( 9078), -INT16_C( 20232), -INT16_C( 6581) }, { -INT16_C( 22365), INT16_C( 30750), -INT16_C( 18380), INT16_C( 19916), INT16_C( 20867), -INT16_C( 24), -INT16_C( 27548), INT16_C( 22045) }, { -INT16_C( 21262), INT16_C( 1829), INT16_C( 23866), -INT16_C( 20232), -INT16_C( 22365), -INT16_C( 18380), INT16_C( 20867), -INT16_C( 27548) } }, { { -INT16_C( 19136), INT16_C( 26090), INT16_C( 27069), -INT16_C( 2197), -INT16_C( 2618), -INT16_C( 16429), INT16_C( 7845), INT16_C( 18597) }, { -INT16_C( 15418), -INT16_C( 1344), -INT16_C( 29317), -INT16_C( 441), INT16_C( 12254), INT16_C( 17150), INT16_C( 7107), INT16_C( 920) }, { -INT16_C( 19136), INT16_C( 27069), -INT16_C( 2618), INT16_C( 7845), -INT16_C( 15418), -INT16_C( 29317), INT16_C( 12254), INT16_C( 7107) } }, { { -INT16_C( 31792), -INT16_C( 29335), -INT16_C( 11028), -INT16_C( 19836), INT16_C( 22473), INT16_C( 28273), INT16_C( 5749), INT16_C( 15286) }, { INT16_C( 30425), INT16_C( 21558), INT16_C( 32003), -INT16_C( 7853), INT16_C( 20909), INT16_C( 28707), -INT16_C( 17300), INT16_C( 15476) }, { -INT16_C( 31792), -INT16_C( 11028), INT16_C( 22473), INT16_C( 5749), INT16_C( 30425), INT16_C( 32003), INT16_C( 20909), -INT16_C( 17300) } }, { { -INT16_C( 8897), INT16_C( 11210), INT16_C( 20145), INT16_C( 31453), INT16_C( 20390), INT16_C( 7144), -INT16_C( 24987), INT16_C( 16215) }, { -INT16_C( 29420), INT16_C( 6291), -INT16_C( 6646), -INT16_C( 18439), INT16_C( 7479), -INT16_C( 23768), -INT16_C( 25383), INT16_C( 6368) }, { -INT16_C( 8897), INT16_C( 20145), INT16_C( 20390), -INT16_C( 24987), -INT16_C( 29420), -INT16_C( 6646), INT16_C( 7479), -INT16_C( 25383) } }, { { -INT16_C( 21895), INT16_C( 10819), INT16_C( 8440), -INT16_C( 24924), -INT16_C( 29585), -INT16_C( 10822), INT16_C( 4394), INT16_C( 15892) }, { -INT16_C( 22626), -INT16_C( 22442), INT16_C( 20622), -INT16_C( 15008), -INT16_C( 30611), INT16_C( 18025), INT16_C( 18724), -INT16_C( 25250) }, { -INT16_C( 21895), INT16_C( 8440), -INT16_C( 29585), INT16_C( 4394), -INT16_C( 22626), INT16_C( 20622), -INT16_C( 30611), INT16_C( 18724) } }, { { -INT16_C( 24077), -INT16_C( 5177), INT16_C( 27585), INT16_C( 12682), INT16_C( 17655), INT16_C( 8454), INT16_C( 6741), -INT16_C( 3233) }, { -INT16_C( 18751), INT16_C( 20379), -INT16_C( 1274), INT16_C( 29461), INT16_C( 32387), -INT16_C( 22599), INT16_C( 6087), -INT16_C( 17852) }, { -INT16_C( 24077), INT16_C( 27585), INT16_C( 17655), INT16_C( 6741), -INT16_C( 18751), -INT16_C( 1274), INT16_C( 32387), INT16_C( 6087) } }, { { INT16_C( 3000), INT16_C( 31141), INT16_C( 12150), INT16_C( 28074), -INT16_C( 20365), -INT16_C( 14194), -INT16_C( 4406), -INT16_C( 29509) }, { INT16_C( 22436), -INT16_C( 21797), -INT16_C( 4014), -INT16_C( 10723), -INT16_C( 10642), INT16_C( 13693), -INT16_C( 15635), -INT16_C( 23057) }, { INT16_C( 3000), INT16_C( 12150), -INT16_C( 20365), -INT16_C( 4406), INT16_C( 22436), -INT16_C( 4014), -INT16_C( 10642), -INT16_C( 15635) } }, { { -INT16_C( 27187), INT16_C( 17438), -INT16_C( 13884), INT16_C( 14513), INT16_C( 16505), INT16_C( 17408), -INT16_C( 17362), -INT16_C( 11568) }, { -INT16_C( 21741), INT16_C( 25980), -INT16_C( 26212), INT16_C( 2619), -INT16_C( 18065), INT16_C( 23616), INT16_C( 12155), INT16_C( 18433) }, { -INT16_C( 27187), -INT16_C( 13884), INT16_C( 16505), -INT16_C( 17362), -INT16_C( 21741), -INT16_C( 26212), -INT16_C( 18065), INT16_C( 12155) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_x_mm_deinterleaveeven_epi16(a, b); simde_test_x86_assert_equal_i16x8(r, simde_x_mm_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_x_mm_deinterleaveodd_epi16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int16_t a[8]; const int16_t b[8]; const int16_t r[8]; } test_vec[] = { { { -INT16_C( 10387), INT16_C( 10281), INT16_C( 8337), INT16_C( 25558), INT16_C( 3361), -INT16_C( 20206), INT16_C( 23483), -INT16_C( 23483) }, { INT16_C( 32228), -INT16_C( 11156), -INT16_C( 32507), INT16_C( 22207), -INT16_C( 8391), -INT16_C( 22722), -INT16_C( 8534), INT16_C( 6015) }, { INT16_C( 10281), INT16_C( 25558), -INT16_C( 20206), -INT16_C( 23483), -INT16_C( 11156), INT16_C( 22207), -INT16_C( 22722), INT16_C( 6015) } }, { { -INT16_C( 22347), INT16_C( 17983), INT16_C( 5833), -INT16_C( 5463), -INT16_C( 17373), -INT16_C( 8548), -INT16_C( 7913), -INT16_C( 1150) }, { -INT16_C( 4514), INT16_C( 25551), -INT16_C( 29073), -INT16_C( 22342), -INT16_C( 1939), INT16_C( 6223), -INT16_C( 12330), -INT16_C( 29905) }, { INT16_C( 17983), -INT16_C( 5463), -INT16_C( 8548), -INT16_C( 1150), INT16_C( 25551), -INT16_C( 22342), INT16_C( 6223), -INT16_C( 29905) } }, { { INT16_C( 28535), INT16_C( 16593), INT16_C( 31621), -INT16_C( 22485), -INT16_C( 14537), INT16_C( 20102), INT16_C( 2216), INT16_C( 1609) }, { INT16_C( 6646), INT16_C( 26218), INT16_C( 9383), INT16_C( 5390), INT16_C( 24092), -INT16_C( 3283), INT16_C( 23597), -INT16_C( 23426) }, { INT16_C( 16593), -INT16_C( 22485), INT16_C( 20102), INT16_C( 1609), INT16_C( 26218), INT16_C( 5390), -INT16_C( 3283), -INT16_C( 23426) } }, { { INT16_C( 20683), INT16_C( 20709), INT16_C( 4299), INT16_C( 760), INT16_C( 32471), INT16_C( 32592), -INT16_C( 26234), INT16_C( 32133) }, { -INT16_C( 4174), INT16_C( 23267), -INT16_C( 3821), INT16_C( 12399), -INT16_C( 25521), INT16_C( 31779), -INT16_C( 24072), -INT16_C( 15327) }, { INT16_C( 20709), INT16_C( 760), INT16_C( 32592), INT16_C( 32133), INT16_C( 23267), INT16_C( 12399), INT16_C( 31779), -INT16_C( 15327) } }, { { INT16_C( 1777), -INT16_C( 17388), INT16_C( 3350), -INT16_C( 4674), INT16_C( 3723), INT16_C( 4716), -INT16_C( 3672), INT16_C( 23183) }, { INT16_C( 29409), -INT16_C( 2892), INT16_C( 9059), -INT16_C( 19676), INT16_C( 18367), -INT16_C( 18385), INT16_C( 20713), -INT16_C( 9604) }, { -INT16_C( 17388), -INT16_C( 4674), INT16_C( 4716), INT16_C( 23183), -INT16_C( 2892), -INT16_C( 19676), -INT16_C( 18385), -INT16_C( 9604) } }, { { -INT16_C( 28586), INT16_C( 27799), INT16_C( 21917), INT16_C( 10585), -INT16_C( 15004), INT16_C( 3131), -INT16_C( 13641), -INT16_C( 26522) }, { INT16_C( 6972), -INT16_C( 24692), -INT16_C( 20162), -INT16_C( 430), -INT16_C( 32008), -INT16_C( 7754), INT16_C( 13010), INT16_C( 10684) }, { INT16_C( 27799), INT16_C( 10585), INT16_C( 3131), -INT16_C( 26522), -INT16_C( 24692), -INT16_C( 430), -INT16_C( 7754), INT16_C( 10684) } }, { { INT16_C( 21442), INT16_C( 24725), -INT16_C( 4184), INT16_C( 3209), -INT16_C( 15180), INT16_C( 27416), INT16_C( 32654), -INT16_C( 13821) }, { -INT16_C( 28518), -INT16_C( 10135), -INT16_C( 17343), INT16_C( 14806), -INT16_C( 29634), INT16_C( 4123), -INT16_C( 10306), -INT16_C( 32455) }, { INT16_C( 24725), INT16_C( 3209), INT16_C( 27416), -INT16_C( 13821), -INT16_C( 10135), INT16_C( 14806), INT16_C( 4123), -INT16_C( 32455) } }, { { -INT16_C( 12502), -INT16_C( 11551), INT16_C( 27326), INT16_C( 29407), -INT16_C( 2258), -INT16_C( 17186), -INT16_C( 7818), INT16_C( 4230) }, { -INT16_C( 4239), -INT16_C( 19735), -INT16_C( 16469), -INT16_C( 5652), INT16_C( 1868), INT16_C( 2810), INT16_C( 13278), INT16_C( 2187) }, { -INT16_C( 11551), INT16_C( 29407), -INT16_C( 17186), INT16_C( 4230), -INT16_C( 19735), -INT16_C( 5652), INT16_C( 2810), INT16_C( 2187) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_x_mm_deinterleaveodd_epi16(a, b); simde_test_x86_assert_equal_i16x8(r, simde_x_mm_loadu_epi16(test_vec[i].r)); } return 0; } static int test_simde_x_mm_deinterleaveeven_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[4]; const int32_t b[4]; const int32_t r[4]; } test_vec[] = { { { -INT32_C( 495461067), INT32_C( 380891071), -INT32_C( 1359195410), -INT32_C( 1086867746) }, { INT32_C( 1697186048), INT32_C( 908310888), -INT32_C( 1885575044), INT32_C( 1608021034) }, { -INT32_C( 495461067), -INT32_C( 1359195410), INT32_C( 1697186048), -INT32_C( 1885575044) } }, { { INT32_C( 289558610), INT32_C( 757593407), INT32_C( 635118407), -INT32_C( 622587174) }, { INT32_C( 2084572435), INT32_C( 1118987206), -INT32_C( 153989429), INT32_C( 341158594) }, { INT32_C( 289558610), INT32_C( 635118407), INT32_C( 2084572435), -INT32_C( 153989429) } }, { { INT32_C( 958830586), -INT32_C( 748270196), INT32_C( 1274560881), INT32_C( 1730534740) }, { -INT32_C( 1310497302), -INT32_C( 1795975736), -INT32_C( 1500854813), INT32_C( 1790631792) }, { INT32_C( 958830586), INT32_C( 1274560881), -INT32_C( 1310497302), -INT32_C( 1500854813) } }, { { INT32_C( 61137015), -INT32_C( 1613297106), -INT32_C( 1595224244), -INT32_C( 1761144916) }, { INT32_C( 1028189045), INT32_C( 1691433856), INT32_C( 1896504065), -INT32_C( 1294220229) }, { INT32_C( 61137015), -INT32_C( 1595224244), INT32_C( 1028189045), INT32_C( 1896504065) } }, { { -INT32_C( 743080027), -INT32_C( 697135990), INT32_C( 141974620), -INT32_C( 526418581) }, { -INT32_C( 383850648), INT32_C( 609087267), -INT32_C( 2037033141), -INT32_C( 1070043109) }, { -INT32_C( 743080027), INT32_C( 141974620), -INT32_C( 383850648), -INT32_C( 2037033141) } }, { { INT32_C( 2056515056), -INT32_C( 699398790), -INT32_C( 841038239), -INT32_C( 1397916093) }, { -INT32_C( 2003448987), INT32_C( 111993531), INT32_C( 1418477881), -INT32_C( 1575631694) }, { INT32_C( 2056515056), -INT32_C( 841038239), -INT32_C( 2003448987), INT32_C( 1418477881) } }, { { INT32_C( 723298481), INT32_C( 251751598), INT32_C( 1977409586), -INT32_C( 1021212066) }, { INT32_C( 273462869), -INT32_C( 787023720), -INT32_C( 333012422), INT32_C( 411974502) }, { INT32_C( 723298481), INT32_C( 1977409586), INT32_C( 273462869), -INT32_C( 333012422) } }, { { -INT32_C( 1857836317), INT32_C( 1218528534), -INT32_C( 2084733659), INT32_C( 1564925703) }, { INT32_C( 778932885), -INT32_C( 973110133), -INT32_C( 1917770458), INT32_C( 1151680352) }, { -INT32_C( 1857836317), -INT32_C( 2084733659), INT32_C( 778932885), -INT32_C( 1917770458) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_x_mm_deinterleaveeven_epi32(a, b); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_x_mm_deinterleaveodd_epi32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int32_t a[4]; const int32_t b[4]; const int32_t r[4]; } test_vec[] = { { { -INT32_C( 1143416908), -INT32_C( 314874252), -INT32_C( 564427947), INT32_C( 935545972) }, { INT32_C( 1975807146), INT32_C( 1788688421), -INT32_C( 540372237), INT32_C( 1581754026) }, { -INT32_C( 314874252), INT32_C( 935545972), INT32_C( 1788688421), INT32_C( 1581754026) } }, { { -INT32_C( 250011523), -INT32_C( 606186362), INT32_C( 1270430167), INT32_C( 813857926) }, { INT32_C( 228935656), INT32_C( 1937261439), INT32_C( 2136097493), INT32_C( 1709021928) }, { -INT32_C( 606186362), INT32_C( 813857926), INT32_C( 1937261439), INT32_C( 1709021928) } }, { { INT32_C( 1079441082), INT32_C( 555430986), -INT32_C( 210906003), INT32_C( 958656337) }, { -INT32_C( 1236809418), -INT32_C( 517357812), -INT32_C( 379553023), -INT32_C( 816956139) }, { INT32_C( 555430986), INT32_C( 958656337), -INT32_C( 517357812), -INT32_C( 816956139) } }, { { INT32_C( 2098177075), INT32_C( 1167993560), INT32_C( 1345915903), INT32_C( 831085819) }, { INT32_C( 837275685), -INT32_C( 1877864305), -INT32_C( 1585876340), -INT32_C( 495859793) }, { INT32_C( 1167993560), INT32_C( 831085819), -INT32_C( 1877864305), -INT32_C( 495859793) } }, { { INT32_C( 1130332267), -INT32_C( 1433796949), INT32_C( 83542537), INT32_C( 1144423198) }, { -INT32_C( 478864044), -INT32_C( 1166768082), -INT32_C( 1436815878), INT32_C( 546098357) }, { -INT32_C( 1433796949), INT32_C( 1144423198), -INT32_C( 1166768082), INT32_C( 546098357) } }, { { -INT32_C( 127603635), -INT32_C( 207426070), -INT32_C( 839344977), INT32_C( 1930505759) }, { INT32_C( 2035779403), INT32_C( 154389263), INT32_C( 1840484280), -INT32_C( 1467072421) }, { -INT32_C( 207426070), INT32_C( 1930505759), INT32_C( 154389263), -INT32_C( 1467072421) } }, { { INT32_C( 379646508), -INT32_C( 1911995681), -INT32_C( 27590178), INT32_C( 2071031087) }, { INT32_C( 66373876), INT32_C( 1275865235), INT32_C( 314163383), INT32_C( 750470912) }, { -INT32_C( 1911995681), INT32_C( 2071031087), INT32_C( 1275865235), INT32_C( 750470912) } }, { { INT32_C( 407001913), INT32_C( 2091273118), INT32_C( 2088370765), INT32_C( 1677192303) }, { INT32_C( 1214704820), -INT32_C( 879463916), INT32_C( 853364018), -INT32_C( 832661355) }, { INT32_C( 2091273118), INT32_C( 1677192303), -INT32_C( 879463916), -INT32_C( 832661355) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_x_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_x_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_x_mm_deinterleaveodd_epi32(a, b); simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r)); } return 0; } static int test_simde_x_mm_deinterleaveeven_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 b[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -473.93), SIMDE_FLOAT32_C( -118.41), SIMDE_FLOAT32_C( 999.42), SIMDE_FLOAT32_C( 207.80) }, { SIMDE_FLOAT32_C( -240.27), SIMDE_FLOAT32_C( -38.87), SIMDE_FLOAT32_C( -206.60), SIMDE_FLOAT32_C( -674.33) }, { SIMDE_FLOAT32_C( -473.93), SIMDE_FLOAT32_C( 999.42), SIMDE_FLOAT32_C( -240.27), SIMDE_FLOAT32_C( -206.60) } }, { { SIMDE_FLOAT32_C( 142.19), SIMDE_FLOAT32_C( 224.84), SIMDE_FLOAT32_C( 333.30), SIMDE_FLOAT32_C( -971.97) }, { SIMDE_FLOAT32_C( -728.25), SIMDE_FLOAT32_C( 611.12), SIMDE_FLOAT32_C( 607.96), SIMDE_FLOAT32_C( -422.86) }, { SIMDE_FLOAT32_C( 142.19), SIMDE_FLOAT32_C( 333.30), SIMDE_FLOAT32_C( -728.25), SIMDE_FLOAT32_C( 607.96) } }, { { SIMDE_FLOAT32_C( -141.30), SIMDE_FLOAT32_C( -687.40), SIMDE_FLOAT32_C( 669.82), SIMDE_FLOAT32_C( 768.18) }, { SIMDE_FLOAT32_C( 291.84), SIMDE_FLOAT32_C( -70.28), SIMDE_FLOAT32_C( -453.11), SIMDE_FLOAT32_C( 157.28) }, { SIMDE_FLOAT32_C( -141.30), SIMDE_FLOAT32_C( 669.82), SIMDE_FLOAT32_C( 291.84), SIMDE_FLOAT32_C( -453.11) } }, { { SIMDE_FLOAT32_C( -84.06), SIMDE_FLOAT32_C( -175.69), SIMDE_FLOAT32_C( -309.30), SIMDE_FLOAT32_C( 582.27) }, { SIMDE_FLOAT32_C( -646.52), SIMDE_FLOAT32_C( -858.00), SIMDE_FLOAT32_C( -765.38), SIMDE_FLOAT32_C( -120.45) }, { SIMDE_FLOAT32_C( -84.06), SIMDE_FLOAT32_C( -309.30), SIMDE_FLOAT32_C( -646.52), SIMDE_FLOAT32_C( -765.38) } }, { { SIMDE_FLOAT32_C( 23.59), SIMDE_FLOAT32_C( -765.97), SIMDE_FLOAT32_C( -912.65), SIMDE_FLOAT32_C( 783.32) }, { SIMDE_FLOAT32_C( 195.16), SIMDE_FLOAT32_C( -119.25), SIMDE_FLOAT32_C( -891.01), SIMDE_FLOAT32_C( -662.65) }, { SIMDE_FLOAT32_C( 23.59), SIMDE_FLOAT32_C( -912.65), SIMDE_FLOAT32_C( 195.16), SIMDE_FLOAT32_C( -891.01) } }, { { SIMDE_FLOAT32_C( -894.42), SIMDE_FLOAT32_C( 442.29), SIMDE_FLOAT32_C( -634.62), SIMDE_FLOAT32_C( -622.67) }, { SIMDE_FLOAT32_C( 53.41), SIMDE_FLOAT32_C( 973.34), SIMDE_FLOAT32_C( -45.53), SIMDE_FLOAT32_C( 912.11) }, { SIMDE_FLOAT32_C( -894.42), SIMDE_FLOAT32_C( -634.62), SIMDE_FLOAT32_C( 53.41), SIMDE_FLOAT32_C( -45.53) } }, { { SIMDE_FLOAT32_C( -714.06), SIMDE_FLOAT32_C( -375.71), SIMDE_FLOAT32_C( 680.29), SIMDE_FLOAT32_C( 577.78) }, { SIMDE_FLOAT32_C( 554.02), SIMDE_FLOAT32_C( -772.82), SIMDE_FLOAT32_C( -264.94), SIMDE_FLOAT32_C( -530.04) }, { SIMDE_FLOAT32_C( -714.06), SIMDE_FLOAT32_C( 680.29), SIMDE_FLOAT32_C( 554.02), SIMDE_FLOAT32_C( -264.94) } }, { { SIMDE_FLOAT32_C( 51.48), SIMDE_FLOAT32_C( 425.76), SIMDE_FLOAT32_C( -947.77), SIMDE_FLOAT32_C( 404.96) }, { SIMDE_FLOAT32_C( 567.76), SIMDE_FLOAT32_C( -713.15), SIMDE_FLOAT32_C( -715.49), SIMDE_FLOAT32_C( -408.66) }, { SIMDE_FLOAT32_C( 51.48), SIMDE_FLOAT32_C( -947.77), SIMDE_FLOAT32_C( 567.76), SIMDE_FLOAT32_C( -715.49) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 b = simde_mm_loadu_ps(test_vec[i].b); simde__m128 r = simde_x_mm_deinterleaveeven_ps(a, b); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_x_mm_deinterleaveodd_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 b[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 319.96), SIMDE_FLOAT32_C( 735.94), SIMDE_FLOAT32_C( -176.73), SIMDE_FLOAT32_C( 47.10) }, { SIMDE_FLOAT32_C( -358.23), SIMDE_FLOAT32_C( -903.77), SIMDE_FLOAT32_C( 728.83), SIMDE_FLOAT32_C( -988.23) }, { SIMDE_FLOAT32_C( 735.94), SIMDE_FLOAT32_C( 47.10), SIMDE_FLOAT32_C( -903.77), SIMDE_FLOAT32_C( -988.23) } }, { { SIMDE_FLOAT32_C( 660.33), SIMDE_FLOAT32_C( 459.02), SIMDE_FLOAT32_C( 713.57), SIMDE_FLOAT32_C( 687.45) }, { SIMDE_FLOAT32_C( 238.78), SIMDE_FLOAT32_C( -573.22), SIMDE_FLOAT32_C( -177.47), SIMDE_FLOAT32_C( 830.16) }, { SIMDE_FLOAT32_C( 459.02), SIMDE_FLOAT32_C( 687.45), SIMDE_FLOAT32_C( -573.22), SIMDE_FLOAT32_C( 830.16) } }, { { SIMDE_FLOAT32_C( 997.19), SIMDE_FLOAT32_C( 897.57), SIMDE_FLOAT32_C( 555.92), SIMDE_FLOAT32_C( -485.67) }, { SIMDE_FLOAT32_C( 759.77), SIMDE_FLOAT32_C( 769.53), SIMDE_FLOAT32_C( -961.37), SIMDE_FLOAT32_C( 332.86) }, { SIMDE_FLOAT32_C( 897.57), SIMDE_FLOAT32_C( -485.67), SIMDE_FLOAT32_C( 769.53), SIMDE_FLOAT32_C( 332.86) } }, { { SIMDE_FLOAT32_C( -40.50), SIMDE_FLOAT32_C( 339.87), SIMDE_FLOAT32_C( -944.60), SIMDE_FLOAT32_C( 161.91) }, { SIMDE_FLOAT32_C( -435.47), SIMDE_FLOAT32_C( 115.93), SIMDE_FLOAT32_C( -481.55), SIMDE_FLOAT32_C( 884.50) }, { SIMDE_FLOAT32_C( 339.87), SIMDE_FLOAT32_C( 161.91), SIMDE_FLOAT32_C( 115.93), SIMDE_FLOAT32_C( 884.50) } }, { { SIMDE_FLOAT32_C( -148.13), SIMDE_FLOAT32_C( 341.72), SIMDE_FLOAT32_C( -68.40), SIMDE_FLOAT32_C( 493.64) }, { SIMDE_FLOAT32_C( 437.94), SIMDE_FLOAT32_C( -339.57), SIMDE_FLOAT32_C( 505.41), SIMDE_FLOAT32_C( 98.27) }, { SIMDE_FLOAT32_C( 341.72), SIMDE_FLOAT32_C( 493.64), SIMDE_FLOAT32_C( -339.57), SIMDE_FLOAT32_C( 98.27) } }, { { SIMDE_FLOAT32_C( -880.55), SIMDE_FLOAT32_C( 218.98), SIMDE_FLOAT32_C( -214.27), SIMDE_FLOAT32_C( 358.22) }, { SIMDE_FLOAT32_C( 645.75), SIMDE_FLOAT32_C( 608.25), SIMDE_FLOAT32_C( 188.38), SIMDE_FLOAT32_C( 642.94) }, { SIMDE_FLOAT32_C( 218.98), SIMDE_FLOAT32_C( 358.22), SIMDE_FLOAT32_C( 608.25), SIMDE_FLOAT32_C( 642.94) } }, { { SIMDE_FLOAT32_C( 505.82), SIMDE_FLOAT32_C( -255.70), SIMDE_FLOAT32_C( -842.73), SIMDE_FLOAT32_C( 265.59) }, { SIMDE_FLOAT32_C( -486.16), SIMDE_FLOAT32_C( -804.10), SIMDE_FLOAT32_C( -401.56), SIMDE_FLOAT32_C( 473.34) }, { SIMDE_FLOAT32_C( -255.70), SIMDE_FLOAT32_C( 265.59), SIMDE_FLOAT32_C( -804.10), SIMDE_FLOAT32_C( 473.34) } }, { { SIMDE_FLOAT32_C( 535.77), SIMDE_FLOAT32_C( -346.16), SIMDE_FLOAT32_C( -364.75), SIMDE_FLOAT32_C( -899.70) }, { SIMDE_FLOAT32_C( 769.77), SIMDE_FLOAT32_C( 153.70), SIMDE_FLOAT32_C( 984.80), SIMDE_FLOAT32_C( -378.36) }, { SIMDE_FLOAT32_C( -346.16), SIMDE_FLOAT32_C( -899.70), SIMDE_FLOAT32_C( 153.70), SIMDE_FLOAT32_C( -378.36) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 b = simde_mm_loadu_ps(test_vec[i].b); simde__m128 r = simde_x_mm_deinterleaveodd_ps(a, b); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_x_mm_deinterleaveeven_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 b[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -213.92), SIMDE_FLOAT64_C( -523.36) }, { SIMDE_FLOAT64_C( 418.53), SIMDE_FLOAT64_C( -506.43) }, { SIMDE_FLOAT64_C( -213.92), SIMDE_FLOAT64_C( 418.53) } }, { { SIMDE_FLOAT64_C( 210.38), SIMDE_FLOAT64_C( 323.18) }, { SIMDE_FLOAT64_C( 0.07), SIMDE_FLOAT64_C( -306.56) }, { SIMDE_FLOAT64_C( 210.38), SIMDE_FLOAT64_C( 0.07) } }, { { SIMDE_FLOAT64_C( -309.86), SIMDE_FLOAT64_C( -739.34) }, { SIMDE_FLOAT64_C( -573.79), SIMDE_FLOAT64_C( 274.99) }, { SIMDE_FLOAT64_C( -309.86), SIMDE_FLOAT64_C( -573.79) } }, { { SIMDE_FLOAT64_C( -805.36), SIMDE_FLOAT64_C( -193.80) }, { SIMDE_FLOAT64_C( 247.86), SIMDE_FLOAT64_C( 790.63) }, { SIMDE_FLOAT64_C( -805.36), SIMDE_FLOAT64_C( 247.86) } }, { { SIMDE_FLOAT64_C( -820.99), SIMDE_FLOAT64_C( -241.09) }, { SIMDE_FLOAT64_C( -102.54), SIMDE_FLOAT64_C( -138.57) }, { SIMDE_FLOAT64_C( -820.99), SIMDE_FLOAT64_C( -102.54) } }, { { SIMDE_FLOAT64_C( -904.58), SIMDE_FLOAT64_C( -997.56) }, { SIMDE_FLOAT64_C( -833.83), SIMDE_FLOAT64_C( -291.82) }, { SIMDE_FLOAT64_C( -904.58), SIMDE_FLOAT64_C( -833.83) } }, { { SIMDE_FLOAT64_C( 823.76), SIMDE_FLOAT64_C( 62.64) }, { SIMDE_FLOAT64_C( 610.28), SIMDE_FLOAT64_C( -602.78) }, { SIMDE_FLOAT64_C( 823.76), SIMDE_FLOAT64_C( 610.28) } }, { { SIMDE_FLOAT64_C( -320.72), SIMDE_FLOAT64_C( 398.57) }, { SIMDE_FLOAT64_C( 140.12), SIMDE_FLOAT64_C( 465.37) }, { SIMDE_FLOAT64_C( -320.72), SIMDE_FLOAT64_C( 140.12) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d b = simde_mm_loadu_pd(test_vec[i].b); simde__m128d r = simde_x_mm_deinterleaveeven_pd(a, b); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_x_mm_deinterleaveodd_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 b[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 240.55), SIMDE_FLOAT64_C( 268.44) }, { SIMDE_FLOAT64_C( 124.13), SIMDE_FLOAT64_C( -764.47) }, { SIMDE_FLOAT64_C( 268.44), SIMDE_FLOAT64_C( -764.47) } }, { { SIMDE_FLOAT64_C( 714.48), SIMDE_FLOAT64_C( -430.05) }, { SIMDE_FLOAT64_C( 521.54), SIMDE_FLOAT64_C( -498.21) }, { SIMDE_FLOAT64_C( -430.05), SIMDE_FLOAT64_C( -498.21) } }, { { SIMDE_FLOAT64_C( -36.38), SIMDE_FLOAT64_C( 808.25) }, { SIMDE_FLOAT64_C( 307.62), SIMDE_FLOAT64_C( 363.39) }, { SIMDE_FLOAT64_C( 808.25), SIMDE_FLOAT64_C( 363.39) } }, { { SIMDE_FLOAT64_C( 592.99), SIMDE_FLOAT64_C( 317.46) }, { SIMDE_FLOAT64_C( -310.83), SIMDE_FLOAT64_C( 683.24) }, { SIMDE_FLOAT64_C( 317.46), SIMDE_FLOAT64_C( 683.24) } }, { { SIMDE_FLOAT64_C( 702.91), SIMDE_FLOAT64_C( -799.29) }, { SIMDE_FLOAT64_C( 54.16), SIMDE_FLOAT64_C( 571.93) }, { SIMDE_FLOAT64_C( -799.29), SIMDE_FLOAT64_C( 571.93) } }, { { SIMDE_FLOAT64_C( 355.14), SIMDE_FLOAT64_C( 815.61) }, { SIMDE_FLOAT64_C( -221.09), SIMDE_FLOAT64_C( -615.38) }, { SIMDE_FLOAT64_C( 815.61), SIMDE_FLOAT64_C( -615.38) } }, { { SIMDE_FLOAT64_C( -761.33), SIMDE_FLOAT64_C( 300.07) }, { SIMDE_FLOAT64_C( 74.48), SIMDE_FLOAT64_C( -935.14) }, { SIMDE_FLOAT64_C( 300.07), SIMDE_FLOAT64_C( -935.14) } }, { { SIMDE_FLOAT64_C( -919.14), SIMDE_FLOAT64_C( 156.94) }, { SIMDE_FLOAT64_C( 625.78), SIMDE_FLOAT64_C( 321.42) }, { SIMDE_FLOAT64_C( 156.94), SIMDE_FLOAT64_C( 321.42) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d b = simde_mm_loadu_pd(test_vec[i].b); simde__m128d r = simde_x_mm_deinterleaveodd_pd(a, b); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_addsub_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd( 476.02, -639.97), simde_mm_set_pd( 710.19, -41.14), simde_mm_set_pd( 1186.21, -598.83) }, { simde_mm_set_pd( 650.79, -848.27), simde_mm_set_pd( 773.15, 711.98), simde_mm_set_pd( 1423.94, -1560.25) }, { simde_mm_set_pd( -904.77, -447.30), simde_mm_set_pd( -414.59, -690.17), simde_mm_set_pd(-1319.36, 242.87) }, { simde_mm_set_pd( 727.10, -46.44), simde_mm_set_pd( -635.38, 20.27), simde_mm_set_pd( 91.72, -66.71) }, { simde_mm_set_pd( 74.87, -444.69), simde_mm_set_pd( -222.00, 809.16), simde_mm_set_pd( -147.13, -1253.85) }, { simde_mm_set_pd( 468.30, -546.58), simde_mm_set_pd( 629.89, 504.95), simde_mm_set_pd( 1098.19, -1051.53) }, { simde_mm_set_pd( 908.04, -977.41), simde_mm_set_pd( 521.23, -249.10), simde_mm_set_pd( 1429.27, -728.31) }, { simde_mm_set_pd( 107.41, -431.12), simde_mm_set_pd( 91.73, 142.37), simde_mm_set_pd( 199.14, -573.49) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_addsub_pd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_addsub_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps( 827.09f, 888.55f, 270.24f, 512.98f), simde_mm_set_ps( 691.09f, 805.07f, 343.35f, 695.79f), simde_mm_set_ps( 1518.18f, 83.48f, 613.59f, -182.81f) }, { simde_mm_set_ps( -122.09f, 678.17f, -910.24f, -995.98f), simde_mm_set_ps( -197.90f, 177.04f, -469.81f, -451.24f), simde_mm_set_ps( -319.99f, 501.13f, -1380.05f, -544.74f) }, { simde_mm_set_ps( 589.86f, -922.72f, 221.54f, -598.55f), simde_mm_set_ps( -751.93f, 480.30f, 218.06f, 103.71f), simde_mm_set_ps( -162.07f, -1403.02f, 439.60f, -702.26f) }, { simde_mm_set_ps( -375.10f, 590.75f, 672.39f, 216.94f), simde_mm_set_ps( 184.12f, 575.54f, -189.52f, 591.53f), simde_mm_set_ps( -190.98f, 15.21f, 482.87f, -374.59f) }, { simde_mm_set_ps( 838.92f, -777.48f, -357.82f, 473.60f), simde_mm_set_ps( 655.27f, -960.61f, 194.84f, 470.24f), simde_mm_set_ps( 1494.19f, 183.13f, -162.98f, 3.36f) }, { simde_mm_set_ps( 141.50f, 865.93f, 836.92f, 780.12f), simde_mm_set_ps( 237.78f, -664.15f, 934.51f, 175.34f), simde_mm_set_ps( 379.28f, 1530.08f, 1771.43f, 604.78f) }, { simde_mm_set_ps( -146.63f, 845.58f, -575.02f, -435.05f), simde_mm_set_ps( 46.98f, 315.33f, -622.74f, -392.97f), simde_mm_set_ps( -99.65f, 530.25f, -1197.76f, -42.08f) }, { simde_mm_set_ps( -588.54f, 208.80f, 44.42f, -534.81f), simde_mm_set_ps( 849.82f, -315.73f, -758.03f, 754.33f), simde_mm_set_ps( 261.28f, 524.53f, -713.61f, -1289.14f) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_addsub_ps(test_vec[i].a, test_vec[i].b); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_hadd_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd( 44.10, -542.35), simde_mm_set_pd( -346.60, -427.89), simde_mm_set_pd( -774.49, -498.25) }, { simde_mm_set_pd( 716.10, 840.74), simde_mm_set_pd( -654.24, -672.74), simde_mm_set_pd(-1326.98, 1556.84) }, { simde_mm_set_pd( -397.69, 265.98), simde_mm_set_pd( -595.53, 562.15), simde_mm_set_pd( -33.38, -131.71) }, { simde_mm_set_pd( 416.44, 929.19), simde_mm_set_pd( -225.30, -546.63), simde_mm_set_pd( -771.93, 1345.63) }, { simde_mm_set_pd( 506.73, 886.11), simde_mm_set_pd( 344.49, 957.84), simde_mm_set_pd( 1302.33, 1392.84) }, { simde_mm_set_pd( 886.60, -404.84), simde_mm_set_pd( 386.06, -275.34), simde_mm_set_pd( 110.72, 481.76) }, { simde_mm_set_pd( 4.86, 401.30), simde_mm_set_pd( 316.75, 350.77), simde_mm_set_pd( 667.52, 406.16) }, { simde_mm_set_pd( -409.95, 357.27), simde_mm_set_pd( -949.43, -786.56), simde_mm_set_pd(-1735.99, -52.68) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_hadd_pd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_hadd_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps( 732.68f, -915.27f, -191.77f, -862.58f), simde_mm_set_ps( 81.80f, 547.56f, 259.82f, 55.94f), simde_mm_set_ps( 629.36f, 315.76f, -182.59f, -1054.35f) }, { simde_mm_set_ps( 429.35f, -314.15f, -691.69f, -113.96f), simde_mm_set_ps( -636.15f, 881.85f, 515.05f, -694.57f), simde_mm_set_ps( 245.70f, -179.52f, 115.20f, -805.65f) }, { simde_mm_set_ps( 163.17f, 585.35f, 889.94f, 989.94f), simde_mm_set_ps( 558.88f, -287.71f, 978.54f, -729.07f), simde_mm_set_ps( 271.17f, 249.47f, 748.52f, 1879.88f) }, { simde_mm_set_ps( 396.52f, 255.51f, 531.47f, -510.49f), simde_mm_set_ps( -162.17f, 929.03f, -176.85f, 827.75f), simde_mm_set_ps( 766.86f, 650.90f, 652.03f, 20.98f) }, { simde_mm_set_ps( 348.14f, -946.97f, -177.74f, 520.68f), simde_mm_set_ps( 339.94f, 653.25f, 168.00f, 216.81f), simde_mm_set_ps( 993.19f, 384.81f, -598.83f, 342.94f) }, { simde_mm_set_ps( -341.20f, -395.72f, -751.71f, 483.71f), simde_mm_set_ps( 214.25f, 187.29f, 627.65f, -993.70f), simde_mm_set_ps( 401.54f, -366.05f, -736.92f, -268.00f) }, { simde_mm_set_ps( -117.08f, -155.79f, 327.94f, -604.45f), simde_mm_set_ps( -924.11f, -3.93f, -496.48f, -281.24f), simde_mm_set_ps( -928.04f, -777.72f, -272.87f, -276.51f) }, { simde_mm_set_ps( -207.92f, 955.09f, 949.83f, -476.81f), simde_mm_set_ps( -883.98f, 810.86f, 947.09f, -558.58f), simde_mm_set_ps( -73.12f, 388.51f, 747.17f, 473.02f) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_hadd_ps(test_vec[i].a, test_vec[i].b); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_hsub_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd( -15.50, 258.33), simde_mm_set_pd( 484.94, -432.56), simde_mm_set_pd( -917.50, 273.83) }, { simde_mm_set_pd( 50.11, -735.38), simde_mm_set_pd( 70.36, 538.50), simde_mm_set_pd( 468.14, -785.49) }, { simde_mm_set_pd( 140.13, -672.00), simde_mm_set_pd( -602.17, -745.12), simde_mm_set_pd( -142.95, -812.13) }, { simde_mm_set_pd( 1.89, -114.93), simde_mm_set_pd( 125.81, 137.32), simde_mm_set_pd( 11.51, -116.82) }, { simde_mm_set_pd( -579.13, -899.36), simde_mm_set_pd( 893.51, 328.15), simde_mm_set_pd( -565.36, -320.23) }, { simde_mm_set_pd( -275.68, -217.61), simde_mm_set_pd( 167.25, -93.39), simde_mm_set_pd( -260.64, 58.07) }, { simde_mm_set_pd( 312.59, 137.63), simde_mm_set_pd( 589.59, 751.69), simde_mm_set_pd( 162.10, -174.96) }, { simde_mm_set_pd( 359.94, -880.43), simde_mm_set_pd( 239.69, -581.16), simde_mm_set_pd( -820.85, -1240.37) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_hsub_pd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_hsub_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps( 50.11f, -735.38f, -15.50f, 258.33f), simde_mm_set_ps( 70.36f, 538.50f, 484.94f, -432.56f), simde_mm_set_ps( 468.14f, -917.50f, -785.49f, 273.83f) }, { simde_mm_set_ps( 1.89f, -114.93f, 140.13f, -672.00f), simde_mm_set_ps( 125.81f, 137.32f, -602.17f, -745.12f), simde_mm_set_ps( 11.51f, -142.95f, -116.82f, -812.13f) }, { simde_mm_set_ps( -275.68f, -217.61f, -579.13f, -899.36f), simde_mm_set_ps( 167.25f, -93.39f, 893.51f, 328.15f), simde_mm_set_ps( -260.64f, -565.36f, 58.07f, -320.23f) }, { simde_mm_set_ps( 359.94f, -880.43f, 312.59f, 137.63f), simde_mm_set_ps( 239.69f, -581.16f, 589.59f, 751.69f), simde_mm_set_ps( -820.85f, 162.10f, -1240.37f, -174.96f) }, { simde_mm_set_ps( 923.43f, 905.56f, -615.92f, 454.60f), simde_mm_set_ps( 375.63f, 326.29f, -819.79f, -550.42f), simde_mm_set_ps( -49.34f, 269.37f, -17.87f, 1070.52f) }, { simde_mm_set_ps( 344.96f, -84.73f, -925.77f, 984.26f), simde_mm_set_ps( 584.98f, 981.58f, -824.48f, 268.25f), simde_mm_set_ps( 396.60f, 1092.73f, -429.69f, 1910.03f) }, { simde_mm_set_ps( 405.32f, -74.19f, 712.30f, 820.93f), simde_mm_set_ps( -939.26f, -768.80f, -854.21f, -69.68f), simde_mm_set_ps( 170.46f, 784.53f, -479.51f, 108.63f) }, { simde_mm_set_ps( -199.94f, 783.57f, 779.03f, 578.25f), simde_mm_set_ps( 177.19f, -819.96f, -14.40f, 361.82f), simde_mm_set_ps( -997.15f, 376.22f, 983.51f, -200.78f) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_hsub_ps(test_vec[i].a, test_vec[i].b); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_lddqu_si128(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C(-112), INT8_C( 117), INT8_C( -34), INT8_C( 65), INT8_C( -1), INT8_C( 38), INT8_C( 89), INT8_C(-126), INT8_C( 67), INT8_C( -47), INT8_C( -14), INT8_C( -14), INT8_C( -36), INT8_C( 93), INT8_C( 67), INT8_C( -57)), simde_mm_set_epi8(INT8_C(-112), INT8_C( 117), INT8_C( -34), INT8_C( 65), INT8_C( -1), INT8_C( 38), INT8_C( 89), INT8_C(-126), INT8_C( 67), INT8_C( -47), INT8_C( -14), INT8_C( -14), INT8_C( -36), INT8_C( 93), INT8_C( 67), INT8_C( -57)) }, { simde_mm_set_epi8(INT8_C( 35), INT8_C( -25), INT8_C( 29), INT8_C(-117), INT8_C( -37), INT8_C( 120), INT8_C(-105), INT8_C( 106), INT8_C( 4), INT8_C( 73), INT8_C( -55), INT8_C( -70), INT8_C( 11), INT8_C( -15), INT8_C( -35), INT8_C(-116)), simde_mm_set_epi8(INT8_C( 35), INT8_C( -25), INT8_C( 29), INT8_C(-117), INT8_C( -37), INT8_C( 120), INT8_C(-105), INT8_C( 106), INT8_C( 4), INT8_C( 73), INT8_C( -55), INT8_C( -70), INT8_C( 11), INT8_C( -15), INT8_C( -35), INT8_C(-116)) }, { simde_mm_set_epi8(INT8_C(-101), INT8_C(-119), INT8_C( 63), INT8_C(-115), INT8_C( -96), INT8_C( -31), INT8_C( -21), INT8_C( 40), INT8_C( 85), INT8_C( 109), INT8_C(-125), INT8_C( -15), INT8_C( 21), INT8_C( -59), INT8_C( -50), INT8_C( 101)), simde_mm_set_epi8(INT8_C(-101), INT8_C(-119), INT8_C( 63), INT8_C(-115), INT8_C( -96), INT8_C( -31), INT8_C( -21), INT8_C( 40), INT8_C( 85), INT8_C( 109), INT8_C(-125), INT8_C( -15), INT8_C( 21), INT8_C( -59), INT8_C( -50), INT8_C( 101)) }, { simde_mm_set_epi8(INT8_C( -59), INT8_C( 124), INT8_C( 14), INT8_C( -11), INT8_C( 3), INT8_C( -21), INT8_C( 36), INT8_C(-103), INT8_C( -34), INT8_C( -66), INT8_C( 35), INT8_C( 90), INT8_C( 43), INT8_C( -21), INT8_C( -53), INT8_C( -61)), simde_mm_set_epi8(INT8_C( -59), INT8_C( 124), INT8_C( 14), INT8_C( -11), INT8_C( 3), INT8_C( -21), INT8_C( 36), INT8_C(-103), INT8_C( -34), INT8_C( -66), INT8_C( 35), INT8_C( 90), INT8_C( 43), INT8_C( -21), INT8_C( -53), INT8_C( -61)) }, { simde_mm_set_epi8(INT8_C( -66), INT8_C( -33), INT8_C( 33), INT8_C( -43), INT8_C( 92), INT8_C( -19), INT8_C( -42), INT8_C(-112), INT8_C( -49), INT8_C( 23), INT8_C( 30), INT8_C( 67), INT8_C( -77), INT8_C( 104), INT8_C( 55), INT8_C( -77)), simde_mm_set_epi8(INT8_C( -66), INT8_C( -33), INT8_C( 33), INT8_C( -43), INT8_C( 92), INT8_C( -19), INT8_C( -42), INT8_C(-112), INT8_C( -49), INT8_C( 23), INT8_C( 30), INT8_C( 67), INT8_C( -77), INT8_C( 104), INT8_C( 55), INT8_C( -77)) }, { simde_mm_set_epi8(INT8_C(-109), INT8_C( -50), INT8_C(-103), INT8_C( -95), INT8_C( 10), INT8_C( 39), INT8_C( -20), INT8_C( -38), INT8_C( -87), INT8_C( -89), INT8_C(-100), INT8_C( -30), INT8_C( 0), INT8_C( 13), INT8_C( 36), INT8_C(-101)), simde_mm_set_epi8(INT8_C(-109), INT8_C( -50), INT8_C(-103), INT8_C( -95), INT8_C( 10), INT8_C( 39), INT8_C( -20), INT8_C( -38), INT8_C( -87), INT8_C( -89), INT8_C(-100), INT8_C( -30), INT8_C( 0), INT8_C( 13), INT8_C( 36), INT8_C(-101)) }, { simde_mm_set_epi8(INT8_C( 112), INT8_C( 112), INT8_C( -55), INT8_C( -93), INT8_C( -81), INT8_C( 57), INT8_C( 84), INT8_C( -3), INT8_C( -51), INT8_C( -7), INT8_C( 0), INT8_C(-102), INT8_C( 82), INT8_C( -68), INT8_C( 109), INT8_C( 126)), simde_mm_set_epi8(INT8_C( 112), INT8_C( 112), INT8_C( -55), INT8_C( -93), INT8_C( -81), INT8_C( 57), INT8_C( 84), INT8_C( -3), INT8_C( -51), INT8_C( -7), INT8_C( 0), INT8_C(-102), INT8_C( 82), INT8_C( -68), INT8_C( 109), INT8_C( 126)) }, { simde_mm_set_epi8(INT8_C( 85), INT8_C( 18), INT8_C( 96), INT8_C( -54), INT8_C( -78), INT8_C( 122), INT8_C(-109), INT8_C( 31), INT8_C( 104), INT8_C( -42), INT8_C( 93), INT8_C( -40), INT8_C( -73), INT8_C( 110), INT8_C( -72), INT8_C( -16)), simde_mm_set_epi8(INT8_C( 85), INT8_C( 18), INT8_C( 96), INT8_C( -54), INT8_C( -78), INT8_C( 122), INT8_C(-109), INT8_C( 31), INT8_C( 104), INT8_C( -42), INT8_C( 93), INT8_C( -40), INT8_C( -73), INT8_C( 110), INT8_C( -72), INT8_C( -16)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_lddqu_si128(&test_vec[i].a); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_loaddup_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde_float64 a; simde__m128d r; } test_vec[8] = { {SIMDE_FLOAT64_C( -639.28), simde_mm_set_pd(SIMDE_FLOAT64_C(-639.28), SIMDE_FLOAT64_C(-639.28)) }, {SIMDE_FLOAT64_C( 754.31), simde_mm_set_pd(SIMDE_FLOAT64_C( 754.31), SIMDE_FLOAT64_C( 754.31)) }, {SIMDE_FLOAT64_C( -143.09), simde_mm_set_pd(SIMDE_FLOAT64_C(-143.09), SIMDE_FLOAT64_C(-143.09)) }, {SIMDE_FLOAT64_C( -509.95), simde_mm_set_pd(SIMDE_FLOAT64_C(-509.95), SIMDE_FLOAT64_C(-509.95)) }, {SIMDE_FLOAT64_C( 357.11), simde_mm_set_pd(SIMDE_FLOAT64_C( 357.11), SIMDE_FLOAT64_C( 357.11)) }, {SIMDE_FLOAT64_C( 414.83), simde_mm_set_pd(SIMDE_FLOAT64_C( 414.83), SIMDE_FLOAT64_C( 414.83)) }, {SIMDE_FLOAT64_C( 416.46), simde_mm_set_pd(SIMDE_FLOAT64_C( 416.46), SIMDE_FLOAT64_C( 416.46)) }, {SIMDE_FLOAT64_C( 167.42), simde_mm_set_pd(SIMDE_FLOAT64_C( 167.42), SIMDE_FLOAT64_C( 167.42)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_loaddup_pd(&test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_movedup_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd( 850.06, 701.47), simde_mm_set_pd( 701.47, 701.47) }, { simde_mm_set_pd( -959.23, 823.21), simde_mm_set_pd( 823.21, 823.21) }, { simde_mm_set_pd( 37.96, 501.12), simde_mm_set_pd( 501.12, 501.12) }, { simde_mm_set_pd( 288.76, -831.45), simde_mm_set_pd( -831.45, -831.45) }, { simde_mm_set_pd( -93.81, 587.70), simde_mm_set_pd( 587.70, 587.70) }, { simde_mm_set_pd( 524.72, 282.96), simde_mm_set_pd( 282.96, 282.96) }, { simde_mm_set_pd( -824.72, 818.07), simde_mm_set_pd( 818.07, 818.07) }, { simde_mm_set_pd( 136.95, -565.46), simde_mm_set_pd( -565.46, -565.46) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_movedup_pd(test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_movehdup_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps( -122.14f, 610.86f, -649.87f, 155.05f), simde_mm_set_ps( -122.14f, -122.14f, -649.87f, -649.87f) }, { simde_mm_set_ps( 559.30f, 847.22f, 946.27f, 786.62f), simde_mm_set_ps( 559.30f, 559.30f, 946.27f, 946.27f) }, { simde_mm_set_ps( -110.32f, 87.26f, -69.05f, -39.46f), simde_mm_set_ps( -110.32f, -110.32f, -69.05f, -69.05f) }, { simde_mm_set_ps( -91.69f, -770.73f, 838.47f, 700.02f), simde_mm_set_ps( -91.69f, -91.69f, 838.47f, 838.47f) }, { simde_mm_set_ps( 54.77f, -632.77f, -6.45f, -696.48f), simde_mm_set_ps( 54.77f, 54.77f, -6.45f, -6.45f) }, { simde_mm_set_ps( -313.08f, 792.67f, -389.34f, -153.47f), simde_mm_set_ps( -313.08f, -313.08f, -389.34f, -389.34f) }, { simde_mm_set_ps( -873.54f, 935.41f, -178.48f, 320.54f), simde_mm_set_ps( -873.54f, -873.54f, -178.48f, -178.48f) }, { simde_mm_set_ps( 886.69f, -558.71f, 768.00f, 565.76f), simde_mm_set_ps( 886.69f, 886.69f, 768.00f, 768.00f) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_movehdup_ps(test_vec[i].a); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_moveldup_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps( -122.14f, 610.86f, -649.87f, 155.05f), simde_mm_set_ps( 610.86f, 610.86f, 155.05f, 155.05f) }, { simde_mm_set_ps( 559.30f, 847.22f, 946.27f, 786.62f), simde_mm_set_ps( 847.22f, 847.22f, 786.62f, 786.62f) }, { simde_mm_set_ps( -110.32f, 87.26f, -69.05f, -39.46f), simde_mm_set_ps( 87.26f, 87.26f, -39.46f, -39.46f) }, { simde_mm_set_ps( -91.69f, -770.73f, 838.47f, 700.02f), simde_mm_set_ps( -770.73f, -770.73f, 700.02f, 700.02f) }, { simde_mm_set_ps( 54.77f, -632.77f, -6.45f, -696.48f), simde_mm_set_ps( -632.77f, -632.77f, -696.48f, -696.48f) }, { simde_mm_set_ps( -313.08f, 792.67f, -389.34f, -153.47f), simde_mm_set_ps( 792.67f, 792.67f, -153.47f, -153.47f) }, { simde_mm_set_ps( -873.54f, 935.41f, -178.48f, 320.54f), simde_mm_set_ps( 935.41f, 935.41f, 320.54f, 320.54f) }, { simde_mm_set_ps( 886.69f, -558.71f, 768.00f, 565.76f), simde_mm_set_ps( -558.71f, -558.71f, 565.76f, 565.76f) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_moveldup_ps(test_vec[i].a); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_deinterleaveeven_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_deinterleaveodd_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_deinterleaveeven_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_deinterleaveodd_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_deinterleaveeven_ps) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_deinterleaveodd_ps) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_deinterleaveeven_pd) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_deinterleaveodd_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_addsub_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_addsub_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_hadd_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_hadd_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_hsub_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_hsub_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_lddqu_si128) SIMDE_TEST_FUNC_LIST_ENTRY(mm_loaddup_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_movedup_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_movehdup_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_moveldup_ps) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/sse4.1.c000066400000000000000000007165151400333146700154260ustar00rootroot00000000000000/* Copyright (c) 2017 Evan Nemerson * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define SIMDE_TESTS_CURRENT_ISAX sse4_1 #include #include static int test_simde_mm_blendv_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i mask; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( -85), INT8_C( -39), INT8_C(-107), INT8_C( -76), INT8_C( -25), INT8_C( -32), INT8_C( -50), INT8_C(-115), INT8_C( -54), INT8_C( -26), INT8_C( 56), INT8_C( 16), INT8_C( -17), INT8_C( 98), INT8_C( -10), INT8_C( -86)), simde_mm_set_epi8(INT8_C( 111), INT8_C( -17), INT8_C( 70), INT8_C( -98), INT8_C( 38), INT8_C( 75), INT8_C( 17), INT8_C( -50), INT8_C( -64), INT8_C( 25), INT8_C( -4), INT8_C( 42), INT8_C( 13), INT8_C( 109), INT8_C( 122), INT8_C(-104)), simde_mm_set_epi8(INT8_C( -13), INT8_C( 21), INT8_C( 111), INT8_C( -8), INT8_C( 32), INT8_C( -14), INT8_C( 25), INT8_C( 45), INT8_C( 34), INT8_C( -43), INT8_C( -81), INT8_C( 26), INT8_C(-105), INT8_C( 63), INT8_C( -61), INT8_C( -38)), simde_mm_set_epi8(INT8_C( 111), INT8_C( -39), INT8_C(-107), INT8_C( -98), INT8_C( -25), INT8_C( 75), INT8_C( -50), INT8_C(-115), INT8_C( -54), INT8_C( 25), INT8_C( -4), INT8_C( 16), INT8_C( 13), INT8_C( 98), INT8_C( 122), INT8_C(-104)) }, { simde_mm_set_epi8(INT8_C( -79), INT8_C( 12), INT8_C( 105), INT8_C( -95), INT8_C( -27), INT8_C( 97), INT8_C( 123), INT8_C( -2), INT8_C( -75), INT8_C( -82), INT8_C( -63), INT8_C( 121), INT8_C( 39), INT8_C( -40), INT8_C(-103), INT8_C( -72)), simde_mm_set_epi8(INT8_C( -77), INT8_C( -58), INT8_C( -15), INT8_C( -99), INT8_C(-120), INT8_C( 124), INT8_C( 40), INT8_C( 82), INT8_C( 121), INT8_C( 50), INT8_C(-103), INT8_C( -45), INT8_C(-125), INT8_C( -55), INT8_C( -57), INT8_C( -95)), simde_mm_set_epi8(INT8_C( -97), INT8_C( -10), INT8_C( 107), INT8_C( 95), INT8_C( 99), INT8_C(-103), INT8_C( 112), INT8_C( -80), INT8_C( -21), INT8_C( 52), INT8_C(-128), INT8_C( 9), INT8_C( 47), INT8_C( -45), INT8_C( -98), INT8_C( 67)), simde_mm_set_epi8(INT8_C( -77), INT8_C( -58), INT8_C( 105), INT8_C( -95), INT8_C( -27), INT8_C( 124), INT8_C( 123), INT8_C( 82), INT8_C( 121), INT8_C( -82), INT8_C(-103), INT8_C( 121), INT8_C( 39), INT8_C( -55), INT8_C( -57), INT8_C( -72)) }, { simde_mm_set_epi8(INT8_C( 65), INT8_C( 33), INT8_C( -24), INT8_C( -68), INT8_C( -81), INT8_C( 103), INT8_C( -77), INT8_C( -49), INT8_C( 99), INT8_C( 55), INT8_C( 98), INT8_C( 36), INT8_C( 31), INT8_C( 86), INT8_C( -79), INT8_C( 106)), simde_mm_set_epi8(INT8_C( 101), INT8_C( 91), INT8_C( -24), INT8_C( 33), INT8_C( -68), INT8_C(-109), INT8_C( -92), INT8_C( -49), INT8_C( -88), INT8_C( 86), INT8_C( 106), INT8_C( 125), INT8_C( 81), INT8_C( -60), INT8_C( -32), INT8_C( 38)), simde_mm_set_epi8(INT8_C( 3), INT8_C( 58), INT8_C( 40), INT8_C( 64), INT8_C( 7), INT8_C(-123), INT8_C( 47), INT8_C( 24), INT8_C( -14), INT8_C( 119), INT8_C( 8), INT8_C( 126), INT8_C( 123), INT8_C( 25), INT8_C( 42), INT8_C(-118)), simde_mm_set_epi8(INT8_C( 65), INT8_C( 33), INT8_C( -24), INT8_C( -68), INT8_C( -81), INT8_C(-109), INT8_C( -77), INT8_C( -49), INT8_C( -88), INT8_C( 55), INT8_C( 98), INT8_C( 36), INT8_C( 31), INT8_C( 86), INT8_C( -79), INT8_C( 38)) }, { simde_mm_set_epi8(INT8_C( 34), INT8_C(-102), INT8_C( 26), INT8_C( 6), INT8_C( -98), INT8_C(-126), INT8_C( 34), INT8_C( 109), INT8_C( -65), INT8_C( -47), INT8_C( 54), INT8_C(-102), INT8_C( 52), INT8_C( 72), INT8_C( 95), INT8_C(-117)), simde_mm_set_epi8(INT8_C( -47), INT8_C( 2), INT8_C( 12), INT8_C( -2), INT8_C( 113), INT8_C( 15), INT8_C( -36), INT8_C( 63), INT8_C( 39), INT8_C( 47), INT8_C( -65), INT8_C( 45), INT8_C( -99), INT8_C( 91), INT8_C(-116), INT8_C( 94)), simde_mm_set_epi8(INT8_C( -98), INT8_C( 39), INT8_C( 27), INT8_C(-110), INT8_C( -56), INT8_C( 121), INT8_C( 15), INT8_C( 100), INT8_C( 94), INT8_C( 8), INT8_C(-121), INT8_C( 116), INT8_C( 47), INT8_C(-128), INT8_C( 82), INT8_C( 52)), simde_mm_set_epi8(INT8_C( -47), INT8_C(-102), INT8_C( 26), INT8_C( -2), INT8_C( 113), INT8_C(-126), INT8_C( 34), INT8_C( 109), INT8_C( -65), INT8_C( -47), INT8_C( -65), INT8_C(-102), INT8_C( 52), INT8_C( 91), INT8_C( 95), INT8_C(-117)) }, { simde_mm_set_epi8(INT8_C( 114), INT8_C(-111), INT8_C( -9), INT8_C( -74), INT8_C( 56), INT8_C( -88), INT8_C( 102), INT8_C( 9), INT8_C( 123), INT8_C(-124), INT8_C( 2), INT8_C( -90), INT8_C( -87), INT8_C(-122), INT8_C( -39), INT8_C(-114)), simde_mm_set_epi8(INT8_C( -81), INT8_C( -26), INT8_C( 59), INT8_C( 3), INT8_C( 113), INT8_C( -96), INT8_C(-108), INT8_C( 77), INT8_C( -17), INT8_C( 59), INT8_C( -35), INT8_C( 43), INT8_C( 117), INT8_C( 11), INT8_C(-103), INT8_C( -21)), simde_mm_set_epi8(INT8_C(-116), INT8_C( 124), INT8_C( -23), INT8_C( -24), INT8_C(-105), INT8_C( -39), INT8_C( 45), INT8_C( -32), INT8_C( 5), INT8_C( -60), INT8_C( -98), INT8_C( 111), INT8_C(-112), INT8_C( -60), INT8_C( 8), INT8_C( 13)), simde_mm_set_epi8(INT8_C( -81), INT8_C(-111), INT8_C( 59), INT8_C( 3), INT8_C( 113), INT8_C( -96), INT8_C( 102), INT8_C( 77), INT8_C( 123), INT8_C( 59), INT8_C( -35), INT8_C( -90), INT8_C( 117), INT8_C( 11), INT8_C( -39), INT8_C(-114)) }, { simde_mm_set_epi8(INT8_C( 53), INT8_C( -97), INT8_C( 79), INT8_C( -93), INT8_C( 55), INT8_C(-115), INT8_C( -80), INT8_C( -27), INT8_C( 104), INT8_C( 109), INT8_C( -16), INT8_C( -75), INT8_C( 125), INT8_C( 6), INT8_C( 38), INT8_C( -55)), simde_mm_set_epi8(INT8_C( 48), INT8_C( 25), INT8_C( 24), INT8_C( -17), INT8_C(-102), INT8_C( -66), INT8_C( -6), INT8_C( 22), INT8_C(-100), INT8_C( -56), INT8_C( 25), INT8_C( -26), INT8_C( -37), INT8_C( 49), INT8_C( 8), INT8_C( -6)), simde_mm_set_epi8(INT8_C(-114), INT8_C( 27), INT8_C( -68), INT8_C( 3), INT8_C( 88), INT8_C( 71), INT8_C( 77), INT8_C( 108), INT8_C(-112), INT8_C( 117), INT8_C(-100), INT8_C( -26), INT8_C( 86), INT8_C( -48), INT8_C( -34), INT8_C( -71)), simde_mm_set_epi8(INT8_C( 48), INT8_C( -97), INT8_C( 24), INT8_C( -93), INT8_C( 55), INT8_C(-115), INT8_C( -80), INT8_C( -27), INT8_C(-100), INT8_C( 109), INT8_C( 25), INT8_C( -26), INT8_C( 125), INT8_C( 49), INT8_C( 8), INT8_C( -6)) }, { simde_mm_set_epi8(INT8_C(-121), INT8_C( 29), INT8_C( -87), INT8_C( 80), INT8_C( 19), INT8_C( 64), INT8_C( 104), INT8_C( 67), INT8_C( -22), INT8_C( -16), INT8_C( -57), INT8_C( -78), INT8_C( -96), INT8_C(-113), INT8_C( 114), INT8_C( 15)), simde_mm_set_epi8(INT8_C( 99), INT8_C( -98), INT8_C( 122), INT8_C( 119), INT8_C( 80), INT8_C( -87), INT8_C( -77), INT8_C( -36), INT8_C( 10), INT8_C( 99), INT8_C( 114), INT8_C( -46), INT8_C( -79), INT8_C( -58), INT8_C( -87), INT8_C( 106)), simde_mm_set_epi8(INT8_C( -96), INT8_C( 67), INT8_C( -85), INT8_C( 80), INT8_C( 107), INT8_C( 9), INT8_C( 20), INT8_C( 91), INT8_C( -58), INT8_C( -6), INT8_C( 120), INT8_C( -24), INT8_C( 26), INT8_C( -94), INT8_C( -17), INT8_C( 25)), simde_mm_set_epi8(INT8_C( 99), INT8_C( 29), INT8_C( 122), INT8_C( 80), INT8_C( 19), INT8_C( 64), INT8_C( 104), INT8_C( 67), INT8_C( 10), INT8_C( 99), INT8_C( -57), INT8_C( -46), INT8_C( -96), INT8_C( -58), INT8_C( -87), INT8_C( 15)) }, { simde_mm_set_epi8(INT8_C( -13), INT8_C( 28), INT8_C( -62), INT8_C( 108), INT8_C( -15), INT8_C( 17), INT8_C( -48), INT8_C( 13), INT8_C( 32), INT8_C( -56), INT8_C( 83), INT8_C( -65), INT8_C( 73), INT8_C(-114), INT8_C( 47), INT8_C( 99)), simde_mm_set_epi8(INT8_C( 84), INT8_C( -32), INT8_C( -4), INT8_C( 8), INT8_C( 66), INT8_C( -71), INT8_C( -69), INT8_C( -41), INT8_C( -21), INT8_C( -40), INT8_C( 64), INT8_C( -47), INT8_C( 16), INT8_C( -61), INT8_C( 3), INT8_C( -15)), simde_mm_set_epi8(INT8_C( 27), INT8_C( 104), INT8_C( -26), INT8_C( -51), INT8_C( 114), INT8_C( -9), INT8_C( -56), INT8_C( 125), INT8_C( 41), INT8_C( -51), INT8_C( 26), INT8_C( 121), INT8_C( -16), INT8_C( -59), INT8_C( 93), INT8_C( -56)), simde_mm_set_epi8(INT8_C( -13), INT8_C( 28), INT8_C( -4), INT8_C( 8), INT8_C( -15), INT8_C( -71), INT8_C( -69), INT8_C( 13), INT8_C( 32), INT8_C( -40), INT8_C( 83), INT8_C( -65), INT8_C( 16), INT8_C( -61), INT8_C( 47), INT8_C( -15)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_blendv_epi8(test_vec[i].a, test_vec[i].b, test_vec[i].mask); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_x_mm_blendv_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i mask; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C( -5889), INT16_C( -9240), INT16_C( 13406), INT16_C( 23197), INT16_C( 25390), INT16_C( 20613), INT16_C( -3211), INT16_C( -5281)), simde_mm_set_epi16(INT16_C( -5787), INT16_C( 260), INT16_C(-28529), INT16_C( -341), INT16_C( 26074), INT16_C(-30339), INT16_C( -9562), INT16_C( -4010)), simde_mm_set_epi16(INT16_C( 11580), INT16_C(-26297), INT16_C( 8234), INT16_C( -6769), INT16_C( -3010), INT16_C( 26987), INT16_C( 26235), INT16_C(-23439)), simde_mm_set_epi16(INT16_C( -5889), INT16_C( 260), INT16_C( 13406), INT16_C( -341), INT16_C( 26074), INT16_C( 20613), INT16_C( -3211), INT16_C( -4010)) }, { simde_mm_set_epi16(INT16_C(-10477), INT16_C( 1551), INT16_C(-16362), INT16_C(-15016), INT16_C( -3374), INT16_C(-19084), INT16_C( -221), INT16_C(-19089)), simde_mm_set_epi16(INT16_C( 8603), INT16_C( 8519), INT16_C( 4642), INT16_C( 1129), INT16_C( 6053), INT16_C( 28257), INT16_C( 10987), INT16_C(-17611)), simde_mm_set_epi16(INT16_C(-14703), INT16_C( 23402), INT16_C( -5363), INT16_C( 25451), INT16_C( 4787), INT16_C( 6143), INT16_C( -8049), INT16_C(-25291)), simde_mm_set_epi16(INT16_C( 8603), INT16_C( 1551), INT16_C( 4642), INT16_C(-15016), INT16_C( -3374), INT16_C(-19084), INT16_C( 10987), INT16_C(-17611)) }, { simde_mm_set_epi16(INT16_C(-10958), INT16_C( 239), INT16_C(-29416), INT16_C( 25642), INT16_C(-31255), INT16_C( 18557), INT16_C( 17603), INT16_C( 9411)), simde_mm_set_epi16(INT16_C(-22204), INT16_C(-12104), INT16_C( 21385), INT16_C( 163), INT16_C(-13806), INT16_C( -4673), INT16_C(-31502), INT16_C( 30113)), simde_mm_set_epi16(INT16_C( 26590), INT16_C(-27173), INT16_C( 3052), INT16_C( 29423), INT16_C( 14159), INT16_C( 16581), INT16_C( 15114), INT16_C( 31350)), simde_mm_set_epi16(INT16_C(-10958), INT16_C(-12104), INT16_C(-29416), INT16_C( 25642), INT16_C(-31255), INT16_C( 18557), INT16_C( 17603), INT16_C( 9411)) }, { simde_mm_set_epi16(INT16_C( -8795), INT16_C(-24496), INT16_C(-21018), INT16_C(-23768), INT16_C( 25939), INT16_C( -2265), INT16_C( 13801), INT16_C( 28390)), simde_mm_set_epi16(INT16_C(-31776), INT16_C( 7886), INT16_C( 19773), INT16_C( 11337), INT16_C( -9683), INT16_C(-15212), INT16_C(-11745), INT16_C(-21367)), simde_mm_set_epi16(INT16_C(-30437), INT16_C( -4024), INT16_C(-15989), INT16_C( -4852), INT16_C( 20067), INT16_C( 28092), INT16_C(-20772), INT16_C( 17444)), simde_mm_set_epi16(INT16_C(-31776), INT16_C( 7886), INT16_C( 19773), INT16_C( 11337), INT16_C( 25939), INT16_C( -2265), INT16_C(-11745), INT16_C( 28390)) }, { simde_mm_set_epi16(INT16_C( 11712), INT16_C( 21433), INT16_C( -1195), INT16_C( 25259), INT16_C( 18957), INT16_C( 5262), INT16_C( -9688), INT16_C(-14506)), simde_mm_set_epi16(INT16_C( 24574), INT16_C( 2777), INT16_C(-18691), INT16_C( 4928), INT16_C( 32122), INT16_C(-10541), INT16_C( 2746), INT16_C( 8282)), simde_mm_set_epi16(INT16_C( 5301), INT16_C( 12242), INT16_C( 18664), INT16_C( 26110), INT16_C(-14367), INT16_C( -9137), INT16_C( -7209), INT16_C( 24768)), simde_mm_set_epi16(INT16_C( 11712), INT16_C( 21433), INT16_C( -1195), INT16_C( 25259), INT16_C( 32122), INT16_C(-10541), INT16_C( 2746), INT16_C(-14506)) }, { simde_mm_set_epi16(INT16_C(-20542), INT16_C( 3332), INT16_C( 24354), INT16_C(-23695), INT16_C(-10493), INT16_C( 32452), INT16_C(-26923), INT16_C( 16579)), simde_mm_set_epi16(INT16_C( 29156), INT16_C( 8347), INT16_C(-32604), INT16_C(-24707), INT16_C(-20393), INT16_C(-18566), INT16_C( 17032), INT16_C(-18520)), simde_mm_set_epi16(INT16_C(-23886), INT16_C( 30820), INT16_C( 12721), INT16_C(-28893), INT16_C( -7688), INT16_C( 11154), INT16_C(-11967), INT16_C( 31969)), simde_mm_set_epi16(INT16_C( 29156), INT16_C( 3332), INT16_C( 24354), INT16_C(-24707), INT16_C(-20393), INT16_C( 32452), INT16_C( 17032), INT16_C( 16579)) }, { simde_mm_set_epi16(INT16_C( 22583), INT16_C( 4415), INT16_C(-24878), INT16_C(-15401), INT16_C(-29840), INT16_C(-16999), INT16_C(-23406), INT16_C(-32259)), simde_mm_set_epi16(INT16_C( 31553), INT16_C( 23166), INT16_C(-15560), INT16_C( 31886), INT16_C( 7368), INT16_C(-15822), INT16_C(-17797), INT16_C( 1487)), simde_mm_set_epi16(INT16_C(-12147), INT16_C( 2989), INT16_C(-24661), INT16_C(-22664), INT16_C(-16476), INT16_C( 408), INT16_C(-13750), INT16_C(-15888)), simde_mm_set_epi16(INT16_C( 31553), INT16_C( 4415), INT16_C(-15560), INT16_C( 31886), INT16_C( 7368), INT16_C(-16999), INT16_C(-17797), INT16_C( 1487)) }, { simde_mm_set_epi16(INT16_C( 32299), INT16_C( 7492), INT16_C( 26066), INT16_C( 30675), INT16_C( 26958), INT16_C( 8712), INT16_C(-17582), INT16_C(-18062)), simde_mm_set_epi16(INT16_C( 25592), INT16_C( 2248), INT16_C(-21774), INT16_C( 7511), INT16_C( 6101), INT16_C( 18791), INT16_C(-31731), INT16_C( 22067)), simde_mm_set_epi16(INT16_C(-26320), INT16_C( 16537), INT16_C(-11326), INT16_C( 8393), INT16_C(-25832), INT16_C( 24810), INT16_C(-17206), INT16_C(-26982)), simde_mm_set_epi16(INT16_C( 25592), INT16_C( 7492), INT16_C(-21774), INT16_C( 30675), INT16_C( 6101), INT16_C( 8712), INT16_C(-31731), INT16_C( 22067)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_x_mm_blendv_epi16(test_vec[i].a, test_vec[i].b, test_vec[i].mask); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_x_mm_blendv_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i mask; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( -385885208), INT32_C( 878598813), INT32_C( 1663979653), INT32_C( -210375841)), simde_mm_set_epi32(INT32_C( -379256572), INT32_C(-1869611349), INT32_C( 1708820861), INT32_C( -626593706)), simde_mm_set_epi32(INT32_C( 758946119), INT32_C( 539682191), INT32_C( -197236373), INT32_C( 1719379057)), simde_mm_set_epi32(INT32_C( -385885208), INT32_C( 878598813), INT32_C( 1708820861), INT32_C( -210375841)) }, { simde_mm_set_epi32(INT32_C( -686619121), INT32_C(-1072249512), INT32_C( -221072012), INT32_C( -14437009)), simde_mm_set_epi32(INT32_C( 563814727), INT32_C( 304219241), INT32_C( 396717665), INT32_C( 720091957)), simde_mm_set_epi32(INT32_C( -963552406), INT32_C( -351444117), INT32_C( 313726975), INT32_C( -527459019)), simde_mm_set_epi32(INT32_C( 563814727), INT32_C( 304219241), INT32_C( -221072012), INT32_C( 720091957)) }, { simde_mm_set_epi32(INT32_C( -718143249), INT32_C(-1927781334), INT32_C(-2048309123), INT32_C( 1153639619)), simde_mm_set_epi32(INT32_C(-1455107912), INT32_C( 1401487523), INT32_C( -904729153), INT32_C(-2064484959)), simde_mm_set_epi32(INT32_C( 1742640603), INT32_C( 200045295), INT32_C( 927940805), INT32_C( 990542454)), simde_mm_set_epi32(INT32_C( -718143249), INT32_C(-1927781334), INT32_C(-2048309123), INT32_C( 1153639619)) }, { simde_mm_set_epi32(INT32_C( -576348080), INT32_C(-1377393880), INT32_C( 1700001575), INT32_C( 904490726)), simde_mm_set_epi32(INT32_C(-2082464050), INT32_C( 1295854665), INT32_C( -634534764), INT32_C( -769676151)), simde_mm_set_epi32(INT32_C(-1994657720), INT32_C(-1047794420), INT32_C( 1315139004), INT32_C(-1361296348)), simde_mm_set_epi32(INT32_C(-2082464050), INT32_C( 1295854665), INT32_C( 1700001575), INT32_C( -769676151)) }, { simde_mm_set_epi32(INT32_C( 767579065), INT32_C( -78290261), INT32_C( 1242371214), INT32_C( -634861738)), simde_mm_set_epi32(INT32_C( 1610484441), INT32_C(-1224928448), INT32_C( 2105202387), INT32_C( 179970138)), simde_mm_set_epi32(INT32_C( 347418578), INT32_C( 1223190014), INT32_C( -941499313), INT32_C( -472424256)), simde_mm_set_epi32(INT32_C( 767579065), INT32_C( -78290261), INT32_C( 2105202387), INT32_C( 179970138)) }, { simde_mm_set_epi32(INT32_C(-1346237180), INT32_C( 1596105585), INT32_C( -687636796), INT32_C(-1764409149)), simde_mm_set_epi32(INT32_C( 1910775963), INT32_C(-2136694915), INT32_C(-1336428678), INT32_C( 1116256168)), simde_mm_set_epi32(INT32_C(-1565362076), INT32_C( 833720099), INT32_C( -503829614), INT32_C( -784237343)), simde_mm_set_epi32(INT32_C( 1910775963), INT32_C( 1596105585), INT32_C(-1336428678), INT32_C( 1116256168)) }, { simde_mm_set_epi32(INT32_C( 1480003903), INT32_C(-1630354473), INT32_C(-1955545703), INT32_C(-1533902339)), simde_mm_set_epi32(INT32_C( 2067880574), INT32_C(-1019708274), INT32_C( 482918962), INT32_C(-1166342705)), simde_mm_set_epi32(INT32_C( -796062803), INT32_C(-1616140424), INT32_C(-1079770728), INT32_C( -901070352)), simde_mm_set_epi32(INT32_C( 2067880574), INT32_C(-1019708274), INT32_C( 482918962), INT32_C(-1166342705)) }, { simde_mm_set_epi32(INT32_C( 2116754756), INT32_C( 1708292051), INT32_C( 1766728200), INT32_C(-1152206478)), simde_mm_set_epi32(INT32_C( 1677199560), INT32_C(-1426973353), INT32_C( 399853927), INT32_C(-2079500749)), simde_mm_set_epi32(INT32_C(-1724890983), INT32_C( -742252343), INT32_C(-1692901142), INT32_C(-1127573862)), simde_mm_set_epi32(INT32_C( 1677199560), INT32_C(-1426973353), INT32_C( 399853927), INT32_C(-2079500749)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_x_mm_blendv_epi32(test_vec[i].a, test_vec[i].b, test_vec[i].mask); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_x_mm_blendv_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i mask; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi64x(INT64_C(-1657364347491558755), INT64_C( 7146738194929019743)), simde_mm_set_epi64x(INT64_C(-1628894571107713365), INT64_C( 7339329716385935446)), simde_mm_set_epi64x(INT64_C( 3259648761070806415), INT64_C( -847123769897278351)), simde_mm_set_epi64x(INT64_C(-1657364347491558755), INT64_C( 7339329716385935446)) }, { simde_mm_set_epi64x(INT64_C(-2949006666280549032), INT64_C( -949497057320389265)), simde_mm_set_epi64x(INT64_C( 2421565813772387433), INT64_C( 1703889397640575797)), simde_mm_set_epi64x(INT64_C(-4138426067808590997), INT64_C( 1347447101265517877)), simde_mm_set_epi64x(INT64_C( 2421565813772387433), INT64_C( -949497057320389265)) }, { simde_mm_set_epi64x(INT64_C(-3084401765930998742), INT64_C(-8797420694229801789)), simde_mm_set_epi64x(INT64_C(-6249640892789358429), INT64_C(-3885782121642297951)), simde_mm_set_epi64x(INT64_C( 7484584398766764783), INT64_C( 3985475411089455734)), simde_mm_set_epi64x(INT64_C(-3084401765930998742), INT64_C(-8797420694229801789)) }, { simde_mm_set_epi64x(INT64_C(-2475396151794818264), INT64_C( 7301451168677981926)), simde_mm_set_epi64x(INT64_C(-8944114988549854135), INT64_C(-2725306056029786999)), simde_mm_set_epi64x(INT64_C(-8566989670866752244), INT64_C( 5648479014807684132)), simde_mm_set_epi64x(INT64_C(-8944114988549854135), INT64_C( 7301451168677981926)) }, { simde_mm_set_epi64x(INT64_C( 3296726985485935275), INT64_C( 5335943737281922902)), simde_mm_set_epi64x(INT64_C( 6916978007881880384), INT64_C( 9041775403806105690)), simde_mm_set_epi64x(INT64_C( 1492151431756015102), INT64_C(-4043708754718924608)), simde_mm_set_epi64x(INT64_C( 3296726985485935275), INT64_C( 9041775403806105690)) }, { simde_mm_set_epi64x(INT64_C(-5782044659163159695), INT64_C(-2953377547815665469)), simde_mm_set_epi64x(INT64_C( 8206720273226178429), INT64_C(-5739917464330258520)), simde_mm_set_epi64x(INT64_C(-6723178921984946397), INT64_C(-2163931711375573791)), simde_mm_set_epi64x(INT64_C( 8206720273226178429), INT64_C(-5739917464330258520)) }, { simde_mm_set_epi64x(INT64_C( 6356568364001969111), INT64_C(-8399004837457264131)), simde_mm_set_epi64x(INT64_C( 8881479440638966926), INT64_C( 2074121151536891343)), simde_mm_set_epi64x(INT64_C(-3419063701768263816), INT64_C(-4637579960544214544)), simde_mm_set_epi64x(INT64_C( 8881479440638966926), INT64_C( 2074121151536891343)) }, { simde_mm_set_epi64x(INT64_C( 9091392452380751827), INT64_C( 7588039843063708018)), simde_mm_set_epi64x(INT64_C( 7203517261933583703), INT64_C( 1717359541857637939)), simde_mm_set_epi64x(INT64_C(-7408350357597577015), INT64_C(-7270955037083658598)), simde_mm_set_epi64x(INT64_C( 7203517261933583703), INT64_C( 1717359541857637939)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_x_mm_blendv_epi64(test_vec[i].a, test_vec[i].b, test_vec[i].mask); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_blend_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C( 13825), INT16_C(-17802), INT16_C( -1598), INT16_C( 25326), INT16_C( 14943), INT16_C( -8227), INT16_C( 21745), INT16_C( 9989)), simde_mm_set_epi16(INT16_C(-11058), INT16_C( 4192), INT16_C(-28252), INT16_C( 10795), INT16_C( 19700), INT16_C( 29704), INT16_C( -7052), INT16_C( 16432)), simde_mm_set_epi16(INT16_C( 13825), INT16_C(-17802), INT16_C( -1598), INT16_C( 25326), INT16_C( 14943), INT16_C( -8227), INT16_C( -7052), INT16_C( 16432)) }, { simde_mm_set_epi16(INT16_C(-17914), INT16_C(-32103), INT16_C( 17164), INT16_C( -187), INT16_C( 21942), INT16_C(-25306), INT16_C( -7849), INT16_C(-11297)), simde_mm_set_epi16(INT16_C( -9027), INT16_C(-14363), INT16_C( 16126), INT16_C( 30921), INT16_C( 3730), INT16_C(-20492), INT16_C( -5510), INT16_C( 32723)), simde_mm_set_epi16(INT16_C(-17914), INT16_C(-32103), INT16_C( 17164), INT16_C( -187), INT16_C( 21942), INT16_C(-25306), INT16_C( -5510), INT16_C( 32723)) }, { simde_mm_set_epi16(INT16_C(-25152), INT16_C( 30665), INT16_C( 26270), INT16_C( 23820), INT16_C(-24322), INT16_C( 706), INT16_C(-26336), INT16_C(-13296)), simde_mm_set_epi16(INT16_C(-24671), INT16_C(-19680), INT16_C( 10653), INT16_C( -9500), INT16_C(-16899), INT16_C( 495), INT16_C( 26780), INT16_C(-11315)), simde_mm_set_epi16(INT16_C(-25152), INT16_C( 30665), INT16_C( 26270), INT16_C( 23820), INT16_C(-24322), INT16_C( 706), INT16_C( 26780), INT16_C(-11315)) }, { simde_mm_set_epi16(INT16_C( 20562), INT16_C(-19834), INT16_C( 5180), INT16_C( -5117), INT16_C( 16168), INT16_C( 23520), INT16_C(-16838), INT16_C( 28892)), simde_mm_set_epi16(INT16_C( 2860), INT16_C( -6670), INT16_C(-25365), INT16_C( -4954), INT16_C( 30912), INT16_C( 6045), INT16_C( 2749), INT16_C( 4401)), simde_mm_set_epi16(INT16_C( 20562), INT16_C(-19834), INT16_C( 5180), INT16_C( -5117), INT16_C( 16168), INT16_C( 23520), INT16_C( 2749), INT16_C( 4401)) }, { simde_mm_set_epi16(INT16_C( 25997), INT16_C( -1860), INT16_C( -9126), INT16_C( 12214), INT16_C( 7159), INT16_C( -2303), INT16_C( 7182), INT16_C(-20779)), simde_mm_set_epi16(INT16_C( -7432), INT16_C( 954), INT16_C(-26453), INT16_C( -2062), INT16_C( 14984), INT16_C( -7563), INT16_C( 13369), INT16_C( -4134)), simde_mm_set_epi16(INT16_C( 25997), INT16_C( -1860), INT16_C( -9126), INT16_C( 12214), INT16_C( 7159), INT16_C( -2303), INT16_C( 13369), INT16_C( -4134)) }, { simde_mm_set_epi16(INT16_C( -6104), INT16_C(-18564), INT16_C( 5298), INT16_C( -6001), INT16_C( -6122), INT16_C( 14804), INT16_C(-32440), INT16_C(-10960)), simde_mm_set_epi16(INT16_C( 8197), INT16_C( 14220), INT16_C(-10924), INT16_C( 17196), INT16_C( 21768), INT16_C( 7639), INT16_C( -4935), INT16_C( -993)), simde_mm_set_epi16(INT16_C( -6104), INT16_C(-18564), INT16_C( 5298), INT16_C( -6001), INT16_C( -6122), INT16_C( 14804), INT16_C( -4935), INT16_C( -993)) }, { simde_mm_set_epi16(INT16_C( 6003), INT16_C( -5880), INT16_C( 31967), INT16_C(-28984), INT16_C( -9503), INT16_C( 30320), INT16_C(-19146), INT16_C( 32146)), simde_mm_set_epi16(INT16_C( 4317), INT16_C( 22644), INT16_C(-25807), INT16_C(-30524), INT16_C( 223), INT16_C( -7098), INT16_C( 9867), INT16_C( -9190)), simde_mm_set_epi16(INT16_C( 6003), INT16_C( -5880), INT16_C( 31967), INT16_C(-28984), INT16_C( -9503), INT16_C( 30320), INT16_C( 9867), INT16_C( -9190)) }, { simde_mm_set_epi16(INT16_C( 12033), INT16_C( 14639), INT16_C( 28420), INT16_C( 14334), INT16_C( -3029), INT16_C(-20328), INT16_C( 8988), INT16_C( 23547)), simde_mm_set_epi16(INT16_C( 9966), INT16_C(-21867), INT16_C( 738), INT16_C(-29688), INT16_C( 9349), INT16_C( 2360), INT16_C(-12771), INT16_C(-15402)), simde_mm_set_epi16(INT16_C( 12033), INT16_C( 14639), INT16_C( 28420), INT16_C( 14334), INT16_C( -3029), INT16_C(-20328), INT16_C(-12771), INT16_C(-15402)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_blend_epi16(test_vec[i].a, test_vec[i].b, 3); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_blend_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 307.09), SIMDE_FLOAT64_C( -991.93)), simde_mm_set_pd(SIMDE_FLOAT64_C( -252.92), SIMDE_FLOAT64_C( -56.61)), simde_mm_set_pd(SIMDE_FLOAT64_C( 307.09), SIMDE_FLOAT64_C( -56.61)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 263.25), SIMDE_FLOAT64_C( 576.67)), simde_mm_set_pd(SIMDE_FLOAT64_C( -398.47), SIMDE_FLOAT64_C( -845.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( 263.25), SIMDE_FLOAT64_C( -845.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -156.21), SIMDE_FLOAT64_C( 805.98)), simde_mm_set_pd(SIMDE_FLOAT64_C( -712.32), SIMDE_FLOAT64_C( -557.85)), simde_mm_set_pd(SIMDE_FLOAT64_C( -156.21), SIMDE_FLOAT64_C( -557.85)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 191.78), SIMDE_FLOAT64_C( -309.61)), simde_mm_set_pd(SIMDE_FLOAT64_C( -997.82), SIMDE_FLOAT64_C( -493.58)), simde_mm_set_pd(SIMDE_FLOAT64_C( 191.78), SIMDE_FLOAT64_C( -493.58)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -933.58), SIMDE_FLOAT64_C( -486.36)), simde_mm_set_pd(SIMDE_FLOAT64_C( 880.67), SIMDE_FLOAT64_C( 791.10)), simde_mm_set_pd(SIMDE_FLOAT64_C( -933.58), SIMDE_FLOAT64_C( 791.10)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 45.45), SIMDE_FLOAT64_C( -436.38)), simde_mm_set_pd(SIMDE_FLOAT64_C( 211.82), SIMDE_FLOAT64_C( 653.98)), simde_mm_set_pd(SIMDE_FLOAT64_C( 45.45), SIMDE_FLOAT64_C( 653.98)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 915.51), SIMDE_FLOAT64_C( -445.43)), simde_mm_set_pd(SIMDE_FLOAT64_C( -69.14), SIMDE_FLOAT64_C( -345.82)), simde_mm_set_pd(SIMDE_FLOAT64_C( 915.51), SIMDE_FLOAT64_C( -345.82)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -603.21), SIMDE_FLOAT64_C( -182.69)), simde_mm_set_pd(SIMDE_FLOAT64_C( -463.45), SIMDE_FLOAT64_C( 929.21)), simde_mm_set_pd(SIMDE_FLOAT64_C( -603.21), SIMDE_FLOAT64_C( 929.21)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_blend_pd(test_vec[i].a, test_vec[i].b, 1); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_blend_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 263.25), SIMDE_FLOAT32_C( 576.67), SIMDE_FLOAT32_C( 307.09), SIMDE_FLOAT32_C( -991.93)), simde_mm_set_ps(SIMDE_FLOAT32_C( -398.47), SIMDE_FLOAT32_C( -845.00), SIMDE_FLOAT32_C( -252.92), SIMDE_FLOAT32_C( -56.61)), simde_mm_set_ps(SIMDE_FLOAT32_C( 263.25), SIMDE_FLOAT32_C( 576.67), SIMDE_FLOAT32_C( -252.92), SIMDE_FLOAT32_C( -991.93)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 191.78), SIMDE_FLOAT32_C( -309.61), SIMDE_FLOAT32_C( -156.21), SIMDE_FLOAT32_C( 805.98)), simde_mm_set_ps(SIMDE_FLOAT32_C( -997.82), SIMDE_FLOAT32_C( -493.58), SIMDE_FLOAT32_C( -712.32), SIMDE_FLOAT32_C( -557.85)), simde_mm_set_ps(SIMDE_FLOAT32_C( 191.78), SIMDE_FLOAT32_C( -309.61), SIMDE_FLOAT32_C( -712.32), SIMDE_FLOAT32_C( 805.98)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 45.45), SIMDE_FLOAT32_C( -436.38), SIMDE_FLOAT32_C( -933.58), SIMDE_FLOAT32_C( -486.36)), simde_mm_set_ps(SIMDE_FLOAT32_C( 211.82), SIMDE_FLOAT32_C( 653.98), SIMDE_FLOAT32_C( 880.67), SIMDE_FLOAT32_C( 791.10)), simde_mm_set_ps(SIMDE_FLOAT32_C( 45.45), SIMDE_FLOAT32_C( -436.38), SIMDE_FLOAT32_C( 880.67), SIMDE_FLOAT32_C( -486.36)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -603.21), SIMDE_FLOAT32_C( -182.69), SIMDE_FLOAT32_C( 915.51), SIMDE_FLOAT32_C( -445.43)), simde_mm_set_ps(SIMDE_FLOAT32_C( -463.45), SIMDE_FLOAT32_C( 929.21), SIMDE_FLOAT32_C( -69.14), SIMDE_FLOAT32_C( -345.82)), simde_mm_set_ps(SIMDE_FLOAT32_C( -603.21), SIMDE_FLOAT32_C( -182.69), SIMDE_FLOAT32_C( -69.14), SIMDE_FLOAT32_C( -445.43)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 898.89), SIMDE_FLOAT32_C( -75.69), SIMDE_FLOAT32_C( 396.94), SIMDE_FLOAT32_C( -959.19)), simde_mm_set_ps(SIMDE_FLOAT32_C( -419.62), SIMDE_FLOAT32_C( -299.33), SIMDE_FLOAT32_C( -147.28), SIMDE_FLOAT32_C( -458.75)), simde_mm_set_ps(SIMDE_FLOAT32_C( 898.89), SIMDE_FLOAT32_C( -75.69), SIMDE_FLOAT32_C( -147.28), SIMDE_FLOAT32_C( -959.19)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 480.97), SIMDE_FLOAT32_C( 316.97), SIMDE_FLOAT32_C( -700.74), SIMDE_FLOAT32_C( -81.35)), simde_mm_set_ps(SIMDE_FLOAT32_C( -930.08), SIMDE_FLOAT32_C( 58.79), SIMDE_FLOAT32_C( -729.84), SIMDE_FLOAT32_C( 572.31)), simde_mm_set_ps(SIMDE_FLOAT32_C( 480.97), SIMDE_FLOAT32_C( 316.97), SIMDE_FLOAT32_C( -729.84), SIMDE_FLOAT32_C( -81.35)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -51.53), SIMDE_FLOAT32_C( 742.30), SIMDE_FLOAT32_C( 440.93), SIMDE_FLOAT32_C( -885.24)), simde_mm_set_ps(SIMDE_FLOAT32_C( 883.38), SIMDE_FLOAT32_C( -314.94), SIMDE_FLOAT32_C( -805.26), SIMDE_FLOAT32_C( 843.17)), simde_mm_set_ps(SIMDE_FLOAT32_C( -51.53), SIMDE_FLOAT32_C( 742.30), SIMDE_FLOAT32_C( -805.26), SIMDE_FLOAT32_C( -885.24)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 812.23), SIMDE_FLOAT32_C( -335.37), SIMDE_FLOAT32_C( 369.92), SIMDE_FLOAT32_C( -137.19)), simde_mm_set_ps(SIMDE_FLOAT32_C( 119.89), SIMDE_FLOAT32_C( 901.62), SIMDE_FLOAT32_C( -234.15), SIMDE_FLOAT32_C( 494.86)), simde_mm_set_ps(SIMDE_FLOAT32_C( 812.23), SIMDE_FLOAT32_C( -335.37), SIMDE_FLOAT32_C( -234.15), SIMDE_FLOAT32_C( -137.19)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_blend_ps(test_vec[i].a, test_vec[i].b, 2); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_blendv_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128i mask; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 290.02), SIMDE_FLOAT64_C( -775.12)), simde_mm_set_pd(SIMDE_FLOAT64_C( 567.65), SIMDE_FLOAT64_C( 339.87)), simde_x_mm_set_epu64x(UINT64_C( 0), UINT64_C( 1)), simde_mm_set_pd(SIMDE_FLOAT64_C( 290.02), SIMDE_FLOAT64_C( -775.12)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 702.24), SIMDE_FLOAT64_C( -371.99)), simde_mm_set_pd(SIMDE_FLOAT64_C( 899.02), SIMDE_FLOAT64_C( -664.73)), simde_x_mm_set_epu64x(UINT64_C( 1), UINT64_C( 1)), simde_mm_set_pd(SIMDE_FLOAT64_C( 702.24), SIMDE_FLOAT64_C( -371.99)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 403.27), SIMDE_FLOAT64_C( -740.87)), simde_mm_set_pd(SIMDE_FLOAT64_C( 975.40), SIMDE_FLOAT64_C( -480.24)), simde_x_mm_set_epu64x(UINT64_C( 0), UINT64_C( 0)), simde_mm_set_pd(SIMDE_FLOAT64_C( 403.27), SIMDE_FLOAT64_C( -740.87)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -417.67), SIMDE_FLOAT64_C( 711.92)), simde_mm_set_pd(SIMDE_FLOAT64_C( -897.21), SIMDE_FLOAT64_C( -402.59)), simde_x_mm_set_epu64x(UINT64_C( 1), UINT64_C( 0)), simde_mm_set_pd(SIMDE_FLOAT64_C( -417.67), SIMDE_FLOAT64_C( 711.92)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 578.89), SIMDE_FLOAT64_C( -895.99)), simde_mm_set_pd(SIMDE_FLOAT64_C( -800.50), SIMDE_FLOAT64_C( -144.28)), simde_x_mm_set_epu64x(UINT64_C( 1), UINT64_C( 0)), simde_mm_set_pd(SIMDE_FLOAT64_C( 578.89), SIMDE_FLOAT64_C( -895.99)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 546.78), SIMDE_FLOAT64_C( 883.57)), simde_mm_set_pd(SIMDE_FLOAT64_C( 415.01), SIMDE_FLOAT64_C( 773.21)), simde_x_mm_set_epu64x(UINT64_C( 1), UINT64_C( 0)), simde_mm_set_pd(SIMDE_FLOAT64_C( 546.78), SIMDE_FLOAT64_C( 883.57)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 26.01), SIMDE_FLOAT64_C( -59.50)), simde_mm_set_pd(SIMDE_FLOAT64_C( -530.26), SIMDE_FLOAT64_C( 580.89)), simde_x_mm_set_epu64x(UINT64_C( 0), UINT64_C( 1)), simde_mm_set_pd(SIMDE_FLOAT64_C( 26.01), SIMDE_FLOAT64_C( -59.50)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -344.22), SIMDE_FLOAT64_C( 626.91)), simde_mm_set_pd(SIMDE_FLOAT64_C( 752.48), SIMDE_FLOAT64_C( 453.28)), simde_x_mm_set_epu64x(UINT64_C( 0), UINT64_C( 1)), simde_mm_set_pd(SIMDE_FLOAT64_C( -344.22), SIMDE_FLOAT64_C( 626.91)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_blendv_pd(test_vec[i].a, test_vec[i].b, simde_mm_castsi128_pd(test_vec[i].mask)); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_blendv_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128i mask; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -155.18), SIMDE_FLOAT32_C( -698.08), SIMDE_FLOAT32_C( -535.18), SIMDE_FLOAT32_C( -144.32)), simde_mm_set_ps(SIMDE_FLOAT32_C( -362.54), SIMDE_FLOAT32_C( -779.39), SIMDE_FLOAT32_C( 825.01), SIMDE_FLOAT32_C( -777.93)), simde_x_mm_set_epu32(UINT32_C( 1), UINT32_C( 0), UINT32_C( 1), UINT32_C( 0)), simde_mm_set_ps(SIMDE_FLOAT32_C( -155.18), SIMDE_FLOAT32_C( -698.08), SIMDE_FLOAT32_C( -535.18), SIMDE_FLOAT32_C( -144.32)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -284.42), SIMDE_FLOAT32_C( 692.24), SIMDE_FLOAT32_C( 101.85), SIMDE_FLOAT32_C( -819.28)), simde_mm_set_ps(SIMDE_FLOAT32_C( 327.10), SIMDE_FLOAT32_C( 184.40), SIMDE_FLOAT32_C( 440.54), SIMDE_FLOAT32_C( 732.07)), simde_x_mm_set_epu32(UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 1)), simde_mm_set_ps(SIMDE_FLOAT32_C( -284.42), SIMDE_FLOAT32_C( 692.24), SIMDE_FLOAT32_C( 101.85), SIMDE_FLOAT32_C( -819.28)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 264.15), SIMDE_FLOAT32_C( -480.80), SIMDE_FLOAT32_C( -304.32), SIMDE_FLOAT32_C( 862.50)), simde_mm_set_ps(SIMDE_FLOAT32_C( 230.55), SIMDE_FLOAT32_C( -649.28), SIMDE_FLOAT32_C( 827.43), SIMDE_FLOAT32_C( -462.60)), simde_x_mm_set_epu32(UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0)), simde_mm_set_ps(SIMDE_FLOAT32_C( 264.15), SIMDE_FLOAT32_C( -480.80), SIMDE_FLOAT32_C( -304.32), SIMDE_FLOAT32_C( 862.50)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 235.66), SIMDE_FLOAT32_C( 478.16), SIMDE_FLOAT32_C( 480.58), SIMDE_FLOAT32_C( -53.52)), simde_mm_set_ps(SIMDE_FLOAT32_C( 982.67), SIMDE_FLOAT32_C( -772.62), SIMDE_FLOAT32_C( -150.10), SIMDE_FLOAT32_C( -737.86)), simde_x_mm_set_epu32(UINT32_C( 1), UINT32_C( 1), UINT32_C( 0), UINT32_C( 0)), simde_mm_set_ps(SIMDE_FLOAT32_C( 235.66), SIMDE_FLOAT32_C( 478.16), SIMDE_FLOAT32_C( 480.58), SIMDE_FLOAT32_C( -53.52)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 121.52), SIMDE_FLOAT32_C( 798.57), SIMDE_FLOAT32_C( 563.90), SIMDE_FLOAT32_C( 845.42)), simde_mm_set_ps(SIMDE_FLOAT32_C( 593.41), SIMDE_FLOAT32_C( 201.43), SIMDE_FLOAT32_C( -263.22), SIMDE_FLOAT32_C( 41.51)), simde_x_mm_set_epu32(UINT32_C( 1), UINT32_C( 1), UINT32_C( 1), UINT32_C( 0)), simde_mm_set_ps(SIMDE_FLOAT32_C( 121.52), SIMDE_FLOAT32_C( 798.57), SIMDE_FLOAT32_C( 563.90), SIMDE_FLOAT32_C( 845.42)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -938.86), SIMDE_FLOAT32_C( -190.10), SIMDE_FLOAT32_C( -729.05), SIMDE_FLOAT32_C( 956.23)), simde_mm_set_ps(SIMDE_FLOAT32_C( 574.98), SIMDE_FLOAT32_C( -82.92), SIMDE_FLOAT32_C( 247.07), SIMDE_FLOAT32_C( -802.43)), simde_x_mm_set_epu32(UINT32_C( 0), UINT32_C( 1), UINT32_C( 0), UINT32_C( 0)), simde_mm_set_ps(SIMDE_FLOAT32_C( -938.86), SIMDE_FLOAT32_C( -190.10), SIMDE_FLOAT32_C( -729.05), SIMDE_FLOAT32_C( 956.23)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -592.31), SIMDE_FLOAT32_C( -123.91), SIMDE_FLOAT32_C( 105.67), SIMDE_FLOAT32_C( 596.25)), simde_mm_set_ps(SIMDE_FLOAT32_C( -99.40), SIMDE_FLOAT32_C( 86.32), SIMDE_FLOAT32_C( -827.82), SIMDE_FLOAT32_C( 443.02)), simde_x_mm_set_epu32(UINT32_C( 1), UINT32_C( 1), UINT32_C( 1), UINT32_C( 0)), simde_mm_set_ps(SIMDE_FLOAT32_C( -592.31), SIMDE_FLOAT32_C( -123.91), SIMDE_FLOAT32_C( 105.67), SIMDE_FLOAT32_C( 596.25)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 59.00), SIMDE_FLOAT32_C( -93.37), SIMDE_FLOAT32_C( -934.05), SIMDE_FLOAT32_C( 930.21)), simde_mm_set_ps(SIMDE_FLOAT32_C( -592.32), SIMDE_FLOAT32_C( 420.31), SIMDE_FLOAT32_C( -235.36), SIMDE_FLOAT32_C( -335.98)), simde_x_mm_set_epu32(UINT32_C( 1), UINT32_C( 1), UINT32_C( 1), UINT32_C( 1)), simde_mm_set_ps(SIMDE_FLOAT32_C( 59.00), SIMDE_FLOAT32_C( -93.37), SIMDE_FLOAT32_C( -934.05), SIMDE_FLOAT32_C( 930.21)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_blendv_ps(test_vec[i].a, test_vec[i].b, simde_mm_castsi128_ps(test_vec[i].mask)); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_ceil_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 241.39), SIMDE_FLOAT64_C( 553.26)), simde_mm_set_pd(SIMDE_FLOAT64_C( 242.00), SIMDE_FLOAT64_C( 554.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 307.83), SIMDE_FLOAT64_C( 377.37)), simde_mm_set_pd(SIMDE_FLOAT64_C( 308.00), SIMDE_FLOAT64_C( 378.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 558.16), SIMDE_FLOAT64_C( -404.55)), simde_mm_set_pd(SIMDE_FLOAT64_C( 559.00), SIMDE_FLOAT64_C( -404.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 219.15), SIMDE_FLOAT64_C( 818.17)), simde_mm_set_pd(SIMDE_FLOAT64_C( 220.00), SIMDE_FLOAT64_C( 819.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 25.83), SIMDE_FLOAT64_C( -230.94)), simde_mm_set_pd(SIMDE_FLOAT64_C( 26.00), SIMDE_FLOAT64_C( -230.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 173.52), SIMDE_FLOAT64_C( 64.50)), simde_mm_set_pd(SIMDE_FLOAT64_C( 174.00), SIMDE_FLOAT64_C( 65.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -453.11), SIMDE_FLOAT64_C( -845.77)), simde_mm_set_pd(SIMDE_FLOAT64_C( -453.00), SIMDE_FLOAT64_C( -845.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -50.98), SIMDE_FLOAT64_C( -179.55)), simde_mm_set_pd(SIMDE_FLOAT64_C( -50.00), SIMDE_FLOAT64_C( -179.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_ceil_pd(test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_ceil_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -624.94), SIMDE_FLOAT32_C( -243.25), SIMDE_FLOAT32_C( -61.63), SIMDE_FLOAT32_C( -948.73)), simde_mm_set_ps(SIMDE_FLOAT32_C( -624.00), SIMDE_FLOAT32_C( -243.00), SIMDE_FLOAT32_C( -61.00), SIMDE_FLOAT32_C( -948.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -906.63), SIMDE_FLOAT32_C( -236.42), SIMDE_FLOAT32_C( 571.51), SIMDE_FLOAT32_C( 624.62)), simde_mm_set_ps(SIMDE_FLOAT32_C( -906.00), SIMDE_FLOAT32_C( -236.00), SIMDE_FLOAT32_C( 572.00), SIMDE_FLOAT32_C( 625.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 503.85), SIMDE_FLOAT32_C( -317.51), SIMDE_FLOAT32_C( -529.30), SIMDE_FLOAT32_C( -871.89)), simde_mm_set_ps(SIMDE_FLOAT32_C( 504.00), SIMDE_FLOAT32_C( -317.00), SIMDE_FLOAT32_C( -529.00), SIMDE_FLOAT32_C( -871.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 571.49), SIMDE_FLOAT32_C( -129.77), SIMDE_FLOAT32_C( -739.71), SIMDE_FLOAT32_C( 413.18)), simde_mm_set_ps(SIMDE_FLOAT32_C( 572.00), SIMDE_FLOAT32_C( -129.00), SIMDE_FLOAT32_C( -739.00), SIMDE_FLOAT32_C( 414.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 438.35), SIMDE_FLOAT32_C( 517.03), SIMDE_FLOAT32_C( 278.46), SIMDE_FLOAT32_C( -656.36)), simde_mm_set_ps(SIMDE_FLOAT32_C( 439.00), SIMDE_FLOAT32_C( 518.00), SIMDE_FLOAT32_C( 279.00), SIMDE_FLOAT32_C( -656.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -159.40), SIMDE_FLOAT32_C( -802.82), SIMDE_FLOAT32_C( 663.71), SIMDE_FLOAT32_C( 186.86)), simde_mm_set_ps(SIMDE_FLOAT32_C( -159.00), SIMDE_FLOAT32_C( -802.00), SIMDE_FLOAT32_C( 664.00), SIMDE_FLOAT32_C( 187.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -163.74), SIMDE_FLOAT32_C( 485.54), SIMDE_FLOAT32_C( 138.17), SIMDE_FLOAT32_C( 794.67)), simde_mm_set_ps(SIMDE_FLOAT32_C( -163.00), SIMDE_FLOAT32_C( 486.00), SIMDE_FLOAT32_C( 139.00), SIMDE_FLOAT32_C( 795.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -469.36), SIMDE_FLOAT32_C( -418.24), SIMDE_FLOAT32_C( -419.25), SIMDE_FLOAT32_C( 390.46)), simde_mm_set_ps(SIMDE_FLOAT32_C( -469.00), SIMDE_FLOAT32_C( -418.00), SIMDE_FLOAT32_C( -419.00), SIMDE_FLOAT32_C( 391.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_ceil_ps(test_vec[i].a); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_ceil_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -610.18), SIMDE_FLOAT64_C( 172.21)), simde_mm_set_pd(SIMDE_FLOAT64_C( 234.40), SIMDE_FLOAT64_C( -339.84)), simde_mm_set_pd(SIMDE_FLOAT64_C( -610.18), SIMDE_FLOAT64_C( -339.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -467.52), SIMDE_FLOAT64_C( 904.78)), simde_mm_set_pd(SIMDE_FLOAT64_C( -527.39), SIMDE_FLOAT64_C( -115.63)), simde_mm_set_pd(SIMDE_FLOAT64_C( -467.52), SIMDE_FLOAT64_C( -115.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 191.24), SIMDE_FLOAT64_C( 823.78)), simde_mm_set_pd(SIMDE_FLOAT64_C( 835.27), SIMDE_FLOAT64_C( 744.81)), simde_mm_set_pd(SIMDE_FLOAT64_C( 191.24), SIMDE_FLOAT64_C( 745.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -49.25), SIMDE_FLOAT64_C( -695.95)), simde_mm_set_pd(SIMDE_FLOAT64_C( 800.62), SIMDE_FLOAT64_C( 853.84)), simde_mm_set_pd(SIMDE_FLOAT64_C( -49.25), SIMDE_FLOAT64_C( 854.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 668.83), SIMDE_FLOAT64_C( -182.73)), simde_mm_set_pd(SIMDE_FLOAT64_C( -875.46), SIMDE_FLOAT64_C( 752.10)), simde_mm_set_pd(SIMDE_FLOAT64_C( 668.83), SIMDE_FLOAT64_C( 753.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 242.57), SIMDE_FLOAT64_C( -906.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( -816.22), SIMDE_FLOAT64_C( 339.03)), simde_mm_set_pd(SIMDE_FLOAT64_C( 242.57), SIMDE_FLOAT64_C( 340.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -745.15), SIMDE_FLOAT64_C( -744.04)), simde_mm_set_pd(SIMDE_FLOAT64_C( -802.81), SIMDE_FLOAT64_C( 890.79)), simde_mm_set_pd(SIMDE_FLOAT64_C( -745.15), SIMDE_FLOAT64_C( 891.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 238.48), SIMDE_FLOAT64_C( -167.28)), simde_mm_set_pd(SIMDE_FLOAT64_C( -484.51), SIMDE_FLOAT64_C( 175.95)), simde_mm_set_pd(SIMDE_FLOAT64_C( 238.48), SIMDE_FLOAT64_C( 176.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_ceil_sd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_ceil_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -124.45), SIMDE_FLOAT32_C( 911.12), SIMDE_FLOAT32_C( 900.52), SIMDE_FLOAT32_C( -468.92)), simde_mm_set_ps(SIMDE_FLOAT32_C( -31.56), SIMDE_FLOAT32_C( -29.76), SIMDE_FLOAT32_C( -939.35), SIMDE_FLOAT32_C( 459.71)), simde_mm_set_ps(SIMDE_FLOAT32_C( -124.45), SIMDE_FLOAT32_C( 911.12), SIMDE_FLOAT32_C( 900.52), SIMDE_FLOAT32_C( 460.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 514.22), SIMDE_FLOAT32_C( -972.44), SIMDE_FLOAT32_C( -908.76), SIMDE_FLOAT32_C( -817.52)), simde_mm_set_ps(SIMDE_FLOAT32_C( -711.78), SIMDE_FLOAT32_C( 489.62), SIMDE_FLOAT32_C( -109.52), SIMDE_FLOAT32_C( -2.01)), simde_mm_set_ps(SIMDE_FLOAT32_C( 514.22), SIMDE_FLOAT32_C( -972.44), SIMDE_FLOAT32_C( -908.76), SIMDE_FLOAT32_C( -2.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -269.88), SIMDE_FLOAT32_C( -875.85), SIMDE_FLOAT32_C( -139.27), SIMDE_FLOAT32_C( -841.56)), simde_mm_set_ps(SIMDE_FLOAT32_C( 501.05), SIMDE_FLOAT32_C( 280.43), SIMDE_FLOAT32_C( -173.91), SIMDE_FLOAT32_C( 644.42)), simde_mm_set_ps(SIMDE_FLOAT32_C( -269.88), SIMDE_FLOAT32_C( -875.85), SIMDE_FLOAT32_C( -139.27), SIMDE_FLOAT32_C( 645.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -643.26), SIMDE_FLOAT32_C( 616.48), SIMDE_FLOAT32_C( 835.28), SIMDE_FLOAT32_C( 527.52)), simde_mm_set_ps(SIMDE_FLOAT32_C( 636.48), SIMDE_FLOAT32_C( -46.56), SIMDE_FLOAT32_C( -891.84), SIMDE_FLOAT32_C( 948.43)), simde_mm_set_ps(SIMDE_FLOAT32_C( -643.26), SIMDE_FLOAT32_C( 616.48), SIMDE_FLOAT32_C( 835.28), SIMDE_FLOAT32_C( 949.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 3.13), SIMDE_FLOAT32_C( -654.36), SIMDE_FLOAT32_C( -994.98), SIMDE_FLOAT32_C( -148.95)), simde_mm_set_ps(SIMDE_FLOAT32_C( 806.15), SIMDE_FLOAT32_C( 529.29), SIMDE_FLOAT32_C( -804.42), SIMDE_FLOAT32_C( 408.19)), simde_mm_set_ps(SIMDE_FLOAT32_C( 3.13), SIMDE_FLOAT32_C( -654.36), SIMDE_FLOAT32_C( -994.98), SIMDE_FLOAT32_C( 409.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -138.18), SIMDE_FLOAT32_C( -446.48), SIMDE_FLOAT32_C( -413.95), SIMDE_FLOAT32_C( -252.30)), simde_mm_set_ps(SIMDE_FLOAT32_C( 957.68), SIMDE_FLOAT32_C( 373.35), SIMDE_FLOAT32_C( 271.98), SIMDE_FLOAT32_C( -653.93)), simde_mm_set_ps(SIMDE_FLOAT32_C( -138.18), SIMDE_FLOAT32_C( -446.48), SIMDE_FLOAT32_C( -413.95), SIMDE_FLOAT32_C( -653.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 124.40), SIMDE_FLOAT32_C( -866.66), SIMDE_FLOAT32_C( 288.57), SIMDE_FLOAT32_C( -708.46)), simde_mm_set_ps(SIMDE_FLOAT32_C( 685.14), SIMDE_FLOAT32_C( 776.00), SIMDE_FLOAT32_C( 453.56), SIMDE_FLOAT32_C( -112.81)), simde_mm_set_ps(SIMDE_FLOAT32_C( 124.40), SIMDE_FLOAT32_C( -866.66), SIMDE_FLOAT32_C( 288.57), SIMDE_FLOAT32_C( -112.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -232.75), SIMDE_FLOAT32_C( -137.55), SIMDE_FLOAT32_C( 50.26), SIMDE_FLOAT32_C( -189.71)), simde_mm_set_ps(SIMDE_FLOAT32_C( -288.67), SIMDE_FLOAT32_C( -112.95), SIMDE_FLOAT32_C( 792.47), SIMDE_FLOAT32_C( -200.27)), simde_mm_set_ps(SIMDE_FLOAT32_C( -232.75), SIMDE_FLOAT32_C( -137.55), SIMDE_FLOAT32_C( 50.26), SIMDE_FLOAT32_C( -200.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_ceil_ss(test_vec[i].a, test_vec[i].b); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_cmpeq_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu64x(UINT64_C(13269709557652540150), UINT64_C( 5229646237911669536)), simde_x_mm_set_epu64x(UINT64_C(13269709557652540150), UINT64_C( 5229646237911669536)), simde_x_mm_set_epu64x(UINT64_C(18446744073709551615), UINT64_C(18446744073709551615)) }, { simde_x_mm_set_epu64x(UINT64_C(14039735832537203737), UINT64_C(18065414586371585158)), simde_x_mm_set_epu64x(UINT64_C( 7896134184979754273), UINT64_C(18065414586371585158)), simde_x_mm_set_epu64x(UINT64_C( 0), UINT64_C(18446744073709551615)) }, { simde_x_mm_set_epu64x(UINT64_C( 8088434365645432882), UINT64_C( 2110149590601129498)), simde_x_mm_set_epu64x(UINT64_C(12894308138858138936), UINT64_C( 7645444803376971084)), simde_x_mm_set_epu64x(UINT64_C( 0), UINT64_C( 0)) }, { simde_x_mm_set_epu64x(UINT64_C( 6900998750312547586), UINT64_C(11115665826166806875)), simde_x_mm_set_epu64x(UINT64_C( 253910074237567150), UINT64_C(11115665826166806875)), simde_x_mm_set_epu64x(UINT64_C( 0), UINT64_C(18446744073709551615)) }, { simde_x_mm_set_epu64x(UINT64_C(15513947591537518317), UINT64_C( 6346622963553980783)), simde_x_mm_set_epu64x(UINT64_C( 1894817647097843127), UINT64_C( 9019647896255166506)), simde_x_mm_set_epu64x(UINT64_C( 0), UINT64_C( 0)) }, { simde_x_mm_set_epu64x(UINT64_C( 189073800069710221), UINT64_C( 5830713182261233236)), simde_x_mm_set_epu64x(UINT64_C( 189073800069710221), UINT64_C( 8422068610497871432)), simde_x_mm_set_epu64x(UINT64_C(18446744073709551615), UINT64_C( 0)) }, { simde_x_mm_set_epu64x(UINT64_C(17115829668729212386), UINT64_C(11158226665026575121)), simde_x_mm_set_epu64x(UINT64_C(17115829668729212386), UINT64_C(11158226665026575121)), simde_x_mm_set_epu64x(UINT64_C(18446744073709551615), UINT64_C(18446744073709551615)) }, { simde_x_mm_set_epu64x(UINT64_C( 9543887299291080046), UINT64_C(14913997234931691871)), simde_x_mm_set_epu64x(UINT64_C( 9543887299291080046), UINT64_C(12485648986158046833)), simde_x_mm_set_epu64x(UINT64_C(18446744073709551615), UINT64_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_cmpeq_epi64(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cvtepi16_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(-15854, 0, -12807, 5951, 29318, -11078, -2252, 10096), simde_mm_set_epi32( 29318, -11078, -2252, 10096) }, { simde_mm_set_epi16( 24388, 32521, -30980, -12289, 27092, 24521, 12106, -27709), simde_mm_set_epi32( 27092, 24521, 12106, -27709) }, { simde_mm_set_epi16(-11871, 15750, -7986, 4594, 22346, 1400, -21386, 7662), simde_mm_set_epi32( 22346, 1400, -21386, 7662) }, { simde_mm_set_epi16(-15687, -15477, -26077, -28781, -29737, -30311, 16950, 14614), simde_mm_set_epi32(-29737, -30311, 16950, 14614) }, { simde_mm_set_epi16(-30646, 13049, -24166, 637, 7297, -627, 6143, -26200), simde_mm_set_epi32( 7297, -627, 6143, -26200) }, { simde_mm_set_epi16( 30171, 1922, -122, -17991, 30806, 12552, -28515, -9185), simde_mm_set_epi32( 30806, 12552, -28515, -9185) }, { simde_mm_set_epi16( 29385, 21267, -10152, 25553, 20674, 6661, -29222, -19848), simde_mm_set_epi32( 20674, 6661, -29222, -19848) }, { simde_mm_set_epi16( 24527, 18305, 30094, -5437, -26652, 20361, 10408, -4948), simde_mm_set_epi32(-26652, 20361, 10408, -4948) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_cvtepi16_epi32(test_vec[i].a); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cvtepi16_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C(-25288), INT16_C( 737), INT16_C(-31366), INT16_C(-21360), INT16_C( 1927), INT16_C( 32195), INT16_C( -4472), INT16_C( -2085)), simde_mm_set_epi64x(INT64_C( -4472), INT64_C( -2085)) }, { simde_mm_set_epi16(INT16_C(-30817), INT16_C( 5734), INT16_C( -6209), INT16_C(-18846), INT16_C( 14970), INT16_C( 30345), INT16_C( -8853), INT16_C(-31121)), simde_mm_set_epi64x(INT64_C( -8853), INT64_C( -31121)) }, { simde_mm_set_epi16(INT16_C( -970), INT16_C(-11861), INT16_C(-11667), INT16_C( 14936), INT16_C( 21732), INT16_C(-17006), INT16_C( -2668), INT16_C( 30170)), simde_mm_set_epi64x(INT64_C( -2668), INT64_C( 30170)) }, { simde_mm_set_epi16(INT16_C( 2721), INT16_C(-13375), INT16_C( -1976), INT16_C( 30833), INT16_C(-18247), INT16_C( 16242), INT16_C( 4569), INT16_C( 10132)), simde_mm_set_epi64x(INT64_C( 4569), INT64_C( 10132)) }, { simde_mm_set_epi16(INT16_C(-22879), INT16_C(-24842), INT16_C(-29886), INT16_C(-13312), INT16_C( 24862), INT16_C(-30679), INT16_C( 6258), INT16_C(-19461)), simde_mm_set_epi64x(INT64_C( 6258), INT64_C( -19461)) }, { simde_mm_set_epi16(INT16_C( 15048), INT16_C( 5334), INT16_C( 21517), INT16_C( 10106), INT16_C(-25890), INT16_C( -8759), INT16_C( 21361), INT16_C( 17584)), simde_mm_set_epi64x(INT64_C( 21361), INT64_C( 17584)) }, { simde_mm_set_epi16(INT16_C( 18353), INT16_C( 9848), INT16_C( 20583), INT16_C(-28706), INT16_C( 28353), INT16_C( 979), INT16_C(-15312), INT16_C( -4296)), simde_mm_set_epi64x(INT64_C( -15312), INT64_C( -4296)) }, { simde_mm_set_epi16(INT16_C( 21066), INT16_C( 26185), INT16_C( 29520), INT16_C( -9848), INT16_C(-20587), INT16_C( -254), INT16_C(-19969), INT16_C( -2307)), simde_mm_set_epi64x(INT64_C( -19969), INT64_C( -2307)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_cvtepi16_epi64(test_vec[i].a); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cvtepi32_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C(-1750586859), INT32_C( 1743108010), INT32_C( -56370522), INT32_C( 214732685)), simde_mm_set_epi64x(INT64_C( -56370522), INT64_C( 214732685)) }, { simde_mm_set_epi32(INT32_C( 1019578476), INT32_C( 2006962808), INT32_C( 1356966666), INT32_C( 443191452)), simde_mm_set_epi64x(INT64_C( 1356966666), INT64_C( 443191452)) }, { simde_mm_set_epi32(INT32_C( 811952499), INT32_C( 1388789927), INT32_C( 1562076587), INT32_C( 1374574189)), simde_mm_set_epi64x(INT64_C( 1562076587), INT64_C( 1374574189)) }, { simde_mm_set_epi32(INT32_C(-1130424581), INT32_C( -754503048), INT32_C( -175987570), INT32_C( 252857640)), simde_mm_set_epi64x(INT64_C( -175987570), INT64_C( 252857640)) }, { simde_mm_set_epi32(INT32_C( -662885345), INT32_C( -469769762), INT32_C( 1876427062), INT32_C(-1227144336)), simde_mm_set_epi64x(INT64_C( 1876427062), INT64_C( -1227144336)) }, { simde_mm_set_epi32(INT32_C(-1459145913), INT32_C( -624950308), INT32_C(-1740403350), INT32_C(-2023012954)), simde_mm_set_epi64x(INT64_C( -1740403350), INT64_C( -2023012954)) }, { simde_mm_set_epi32(INT32_C( 807996884), INT32_C( -721517626), INT32_C(-1345085365), INT32_C( 3500292)), simde_mm_set_epi64x(INT64_C( -1345085365), INT64_C( 3500292)) }, { simde_mm_set_epi32(INT32_C( 1631614297), INT32_C(-2082769218), INT32_C( 285385405), INT32_C(-1874296921)), simde_mm_set_epi64x(INT64_C( 285385405), INT64_C( -1874296921)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_cvtepi32_epi64(test_vec[i].a); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cvtepi8_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( -34), INT8_C( 20), INT8_C( 53), INT8_C( 81), INT8_C( 37), INT8_C( -97), INT8_C( -59), INT8_C( 127), INT8_C( -43), INT8_C( -82), INT8_C( 84), INT8_C(-127), INT8_C( 11), INT8_C( 120), INT8_C( -98), INT8_C( -80)), simde_mm_set_epi16(INT16_C( -43), INT16_C( -82), INT16_C( 84), INT16_C( -127), INT16_C( 11), INT16_C( 120), INT16_C( -98), INT16_C( -80)) }, { simde_mm_set_epi8(INT8_C( 66), INT8_C( 57), INT8_C( 40), INT8_C(-113), INT8_C( 71), INT8_C( -5), INT8_C( 70), INT8_C( 22), INT8_C( -70), INT8_C( 14), INT8_C( -76), INT8_C(-101), INT8_C( 13), INT8_C( 64), INT8_C( -86), INT8_C( 34)), simde_mm_set_epi16(INT16_C( -70), INT16_C( 14), INT16_C( -76), INT16_C( -101), INT16_C( 13), INT16_C( 64), INT16_C( -86), INT16_C( 34)) }, { simde_mm_set_epi8(INT8_C( 87), INT8_C( 115), INT8_C( 108), INT8_C( -88), INT8_C( 113), INT8_C( 91), INT8_C( -95), INT8_C( -69), INT8_C( 32), INT8_C( 98), INT8_C( -16), INT8_C( -55), INT8_C( 11), INT8_C( -23), INT8_C( 72), INT8_C( 121)), simde_mm_set_epi16(INT16_C( 32), INT16_C( 98), INT16_C( -16), INT16_C( -55), INT16_C( 11), INT16_C( -23), INT16_C( 72), INT16_C( 121)) }, { simde_mm_set_epi8(INT8_C( 88), INT8_C( 68), INT8_C( -76), INT8_C( 35), INT8_C(-119), INT8_C( -67), INT8_C( 52), INT8_C( 58), INT8_C( 7), INT8_C( 70), INT8_C( -26), INT8_C( 55), INT8_C( 126), INT8_C( 69), INT8_C( 105), INT8_C( 40)), simde_mm_set_epi16(INT16_C( 7), INT16_C( 70), INT16_C( -26), INT16_C( 55), INT16_C( 126), INT16_C( 69), INT16_C( 105), INT16_C( 40)) }, { simde_mm_set_epi8(INT8_C( 110), INT8_C( -4), INT8_C( 41), INT8_C( -98), INT8_C( -91), INT8_C( 117), INT8_C( -33), INT8_C(-110), INT8_C( 126), INT8_C( -63), INT8_C( -65), INT8_C( -86), INT8_C(-108), INT8_C( -64), INT8_C( -4), INT8_C( 25)), simde_mm_set_epi16(INT16_C( 126), INT16_C( -63), INT16_C( -65), INT16_C( -86), INT16_C( -108), INT16_C( -64), INT16_C( -4), INT16_C( 25)) }, { simde_mm_set_epi8(INT8_C( 105), INT8_C( -31), INT8_C( -57), INT8_C( -74), INT8_C( 119), INT8_C( 56), INT8_C( -36), INT8_C(-128), INT8_C( 102), INT8_C( 57), INT8_C( -27), INT8_C( 19), INT8_C( -50), INT8_C( -15), INT8_C( 36), INT8_C(-124)), simde_mm_set_epi16(INT16_C( 102), INT16_C( 57), INT16_C( -27), INT16_C( 19), INT16_C( -50), INT16_C( -15), INT16_C( 36), INT16_C( -124)) }, { simde_mm_set_epi8(INT8_C( 44), INT8_C( -84), INT8_C( -61), INT8_C( 59), INT8_C( 18), INT8_C(-118), INT8_C( 6), INT8_C( 56), INT8_C( -11), INT8_C( 78), INT8_C( -64), INT8_C( -28), INT8_C( -85), INT8_C( -77), INT8_C( -25), INT8_C(-100)), simde_mm_set_epi16(INT16_C( -11), INT16_C( 78), INT16_C( -64), INT16_C( -28), INT16_C( -85), INT16_C( -77), INT16_C( -25), INT16_C( -100)) }, { simde_mm_set_epi8(INT8_C( -13), INT8_C( -24), INT8_C( 94), INT8_C( 58), INT8_C( 1), INT8_C( -14), INT8_C( -90), INT8_C(-123), INT8_C(-101), INT8_C( 71), INT8_C( -51), INT8_C( 87), INT8_C( -34), INT8_C( 77), INT8_C( 124), INT8_C( -63)), simde_mm_set_epi16(INT16_C( -101), INT16_C( 71), INT16_C( -51), INT16_C( 87), INT16_C( -34), INT16_C( 77), INT16_C( 124), INT16_C( -63)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_cvtepi8_epi16(test_vec[i].a); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cvtepi8_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( -34), INT8_C( 20), INT8_C( 53), INT8_C( 81), INT8_C( 37), INT8_C( -97), INT8_C( -59), INT8_C( 127), INT8_C( -43), INT8_C( -82), INT8_C( 84), INT8_C(-127), INT8_C( 11), INT8_C( 120), INT8_C( -98), INT8_C( -80)), simde_mm_set_epi32(INT32_C( 11), INT32_C( 120), INT32_C( -98), INT32_C( -80)) }, { simde_mm_set_epi8(INT8_C( 66), INT8_C( 57), INT8_C( 40), INT8_C(-113), INT8_C( 71), INT8_C( -5), INT8_C( 70), INT8_C( 22), INT8_C( -70), INT8_C( 14), INT8_C( -76), INT8_C(-101), INT8_C( 13), INT8_C( 64), INT8_C( -86), INT8_C( 34)), simde_mm_set_epi32(INT32_C( 13), INT32_C( 64), INT32_C( -86), INT32_C( 34)) }, { simde_mm_set_epi8(INT8_C( 87), INT8_C( 115), INT8_C( 108), INT8_C( -88), INT8_C( 113), INT8_C( 91), INT8_C( -95), INT8_C( -69), INT8_C( 32), INT8_C( 98), INT8_C( -16), INT8_C( -55), INT8_C( 11), INT8_C( -23), INT8_C( 72), INT8_C( 121)), simde_mm_set_epi32(INT32_C( 11), INT32_C( -23), INT32_C( 72), INT32_C( 121)) }, { simde_mm_set_epi8(INT8_C( 88), INT8_C( 68), INT8_C( -76), INT8_C( 35), INT8_C(-119), INT8_C( -67), INT8_C( 52), INT8_C( 58), INT8_C( 7), INT8_C( 70), INT8_C( -26), INT8_C( 55), INT8_C( 126), INT8_C( 69), INT8_C( 105), INT8_C( 40)), simde_mm_set_epi32(INT32_C( 126), INT32_C( 69), INT32_C( 105), INT32_C( 40)) }, { simde_mm_set_epi8(INT8_C( 110), INT8_C( -4), INT8_C( 41), INT8_C( -98), INT8_C( -91), INT8_C( 117), INT8_C( -33), INT8_C(-110), INT8_C( 126), INT8_C( -63), INT8_C( -65), INT8_C( -86), INT8_C(-108), INT8_C( -64), INT8_C( -4), INT8_C( 25)), simde_mm_set_epi32(INT32_C( -108), INT32_C( -64), INT32_C( -4), INT32_C( 25)) }, { simde_mm_set_epi8(INT8_C( 105), INT8_C( -31), INT8_C( -57), INT8_C( -74), INT8_C( 119), INT8_C( 56), INT8_C( -36), INT8_C(-128), INT8_C( 102), INT8_C( 57), INT8_C( -27), INT8_C( 19), INT8_C( -50), INT8_C( -15), INT8_C( 36), INT8_C(-124)), simde_mm_set_epi32(INT32_C( -50), INT32_C( -15), INT32_C( 36), INT32_C( -124)) }, { simde_mm_set_epi8(INT8_C( 44), INT8_C( -84), INT8_C( -61), INT8_C( 59), INT8_C( 18), INT8_C(-118), INT8_C( 6), INT8_C( 56), INT8_C( -11), INT8_C( 78), INT8_C( -64), INT8_C( -28), INT8_C( -85), INT8_C( -77), INT8_C( -25), INT8_C(-100)), simde_mm_set_epi32(INT32_C( -85), INT32_C( -77), INT32_C( -25), INT32_C( -100)) }, { simde_mm_set_epi8(INT8_C( -13), INT8_C( -24), INT8_C( 94), INT8_C( 58), INT8_C( 1), INT8_C( -14), INT8_C( -90), INT8_C(-123), INT8_C(-101), INT8_C( 71), INT8_C( -51), INT8_C( 87), INT8_C( -34), INT8_C( 77), INT8_C( 124), INT8_C( -63)), simde_mm_set_epi32(INT32_C( -34), INT32_C( 77), INT32_C( 124), INT32_C( -63)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_cvtepi8_epi32(test_vec[i].a); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cvtepi8_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( -34), INT8_C( 20), INT8_C( 53), INT8_C( 81), INT8_C( 37), INT8_C( -97), INT8_C( -59), INT8_C( 127), INT8_C( -43), INT8_C( -82), INT8_C( 84), INT8_C(-127), INT8_C( 11), INT8_C( 120), INT8_C( -98), INT8_C( -80)), simde_mm_set_epi64x(INT64_C( -98), INT64_C( -80)) }, { simde_mm_set_epi8(INT8_C( 66), INT8_C( 57), INT8_C( 40), INT8_C(-113), INT8_C( 71), INT8_C( -5), INT8_C( 70), INT8_C( 22), INT8_C( -70), INT8_C( 14), INT8_C( -76), INT8_C(-101), INT8_C( 13), INT8_C( 64), INT8_C( -86), INT8_C( 34)), simde_mm_set_epi64x(INT64_C( -86), INT64_C( 34)) }, { simde_mm_set_epi8(INT8_C( 87), INT8_C( 115), INT8_C( 108), INT8_C( -88), INT8_C( 113), INT8_C( 91), INT8_C( -95), INT8_C( -69), INT8_C( 32), INT8_C( 98), INT8_C( -16), INT8_C( -55), INT8_C( 11), INT8_C( -23), INT8_C( 72), INT8_C( 121)), simde_mm_set_epi64x(INT64_C( 72), INT64_C( 121)) }, { simde_mm_set_epi8(INT8_C( 88), INT8_C( 68), INT8_C( -76), INT8_C( 35), INT8_C(-119), INT8_C( -67), INT8_C( 52), INT8_C( 58), INT8_C( 7), INT8_C( 70), INT8_C( -26), INT8_C( 55), INT8_C( 126), INT8_C( 69), INT8_C( 105), INT8_C( 40)), simde_mm_set_epi64x(INT64_C( 105), INT64_C( 40)) }, { simde_mm_set_epi8(INT8_C( 110), INT8_C( -4), INT8_C( 41), INT8_C( -98), INT8_C( -91), INT8_C( 117), INT8_C( -33), INT8_C(-110), INT8_C( 126), INT8_C( -63), INT8_C( -65), INT8_C( -86), INT8_C(-108), INT8_C( -64), INT8_C( -4), INT8_C( 25)), simde_mm_set_epi64x(INT64_C( -4), INT64_C( 25)) }, { simde_mm_set_epi8(INT8_C( 105), INT8_C( -31), INT8_C( -57), INT8_C( -74), INT8_C( 119), INT8_C( 56), INT8_C( -36), INT8_C(-128), INT8_C( 102), INT8_C( 57), INT8_C( -27), INT8_C( 19), INT8_C( -50), INT8_C( -15), INT8_C( 36), INT8_C(-124)), simde_mm_set_epi64x(INT64_C( 36), INT64_C( -124)) }, { simde_mm_set_epi8(INT8_C( 44), INT8_C( -84), INT8_C( -61), INT8_C( 59), INT8_C( 18), INT8_C(-118), INT8_C( 6), INT8_C( 56), INT8_C( -11), INT8_C( 78), INT8_C( -64), INT8_C( -28), INT8_C( -85), INT8_C( -77), INT8_C( -25), INT8_C(-100)), simde_mm_set_epi64x(INT64_C( -25), INT64_C( -100)) }, { simde_mm_set_epi8(INT8_C( -13), INT8_C( -24), INT8_C( 94), INT8_C( 58), INT8_C( 1), INT8_C( -14), INT8_C( -90), INT8_C(-123), INT8_C(-101), INT8_C( 71), INT8_C( -51), INT8_C( 87), INT8_C( -34), INT8_C( 77), INT8_C( 124), INT8_C( -63)), simde_mm_set_epi64x(INT64_C( 124), INT64_C( -63)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_cvtepi8_epi64(test_vec[i].a); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cvtepu8_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu8(UINT8_C(133), UINT8_C( 55), UINT8_C(102), UINT8_C(130), UINT8_C(197), UINT8_C( 52), UINT8_C(225), UINT8_C( 87), UINT8_C(218), UINT8_C( 15), UINT8_C(214), UINT8_C(152), UINT8_C(108), UINT8_C( 18), UINT8_C(104), UINT8_C( 3)), simde_mm_set_epi16(INT16_C( 218), INT16_C( 15), INT16_C( 214), INT16_C( 152), INT16_C( 108), INT16_C( 18), INT16_C( 104), INT16_C( 3)) }, { simde_x_mm_set_epu8(UINT8_C( 51), UINT8_C(122), UINT8_C(167), UINT8_C(232), UINT8_C( 3), UINT8_C(167), UINT8_C( 89), UINT8_C(206), UINT8_C( 96), UINT8_C(235), UINT8_C(128), UINT8_C(134), UINT8_C(187), UINT8_C(190), UINT8_C(232), UINT8_C(112)), simde_mm_set_epi16(INT16_C( 96), INT16_C( 235), INT16_C( 128), INT16_C( 134), INT16_C( 187), INT16_C( 190), INT16_C( 232), INT16_C( 112)) }, { simde_x_mm_set_epu8(UINT8_C( 15), UINT8_C( 99), UINT8_C(228), UINT8_C( 12), UINT8_C(155), UINT8_C(138), UINT8_C( 87), UINT8_C( 96), UINT8_C( 98), UINT8_C( 66), UINT8_C( 96), UINT8_C( 53), UINT8_C( 65), UINT8_C(187), UINT8_C(183), UINT8_C(114)), simde_mm_set_epi16(INT16_C( 98), INT16_C( 66), INT16_C( 96), INT16_C( 53), INT16_C( 65), INT16_C( 187), INT16_C( 183), INT16_C( 114)) }, { simde_x_mm_set_epu8(UINT8_C( 58), UINT8_C( 51), UINT8_C(210), UINT8_C( 42), UINT8_C(132), UINT8_C(100), UINT8_C( 83), UINT8_C( 44), UINT8_C(115), UINT8_C(204), UINT8_C(195), UINT8_C( 11), UINT8_C( 48), UINT8_C( 39), UINT8_C( 37), UINT8_C(183)), simde_mm_set_epi16(INT16_C( 115), INT16_C( 204), INT16_C( 195), INT16_C( 11), INT16_C( 48), INT16_C( 39), INT16_C( 37), INT16_C( 183)) }, { simde_x_mm_set_epu8(UINT8_C( 56), UINT8_C(101), UINT8_C(162), UINT8_C(107), UINT8_C( 59), UINT8_C( 91), UINT8_C( 63), UINT8_C(129), UINT8_C(189), UINT8_C( 4), UINT8_C( 9), UINT8_C(155), UINT8_C(150), UINT8_C( 39), UINT8_C(140), UINT8_C( 87)), simde_mm_set_epi16(INT16_C( 189), INT16_C( 4), INT16_C( 9), INT16_C( 155), INT16_C( 150), INT16_C( 39), INT16_C( 140), INT16_C( 87)) }, { simde_x_mm_set_epu8(UINT8_C(203), UINT8_C(158), UINT8_C(175), UINT8_C( 0), UINT8_C( 27), UINT8_C( 64), UINT8_C(129), UINT8_C( 41), UINT8_C(208), UINT8_C(180), UINT8_C(174), UINT8_C( 83), UINT8_C(142), UINT8_C(198), UINT8_C( 36), UINT8_C(158)), simde_mm_set_epi16(INT16_C( 208), INT16_C( 180), INT16_C( 174), INT16_C( 83), INT16_C( 142), INT16_C( 198), INT16_C( 36), INT16_C( 158)) }, { simde_x_mm_set_epu8(UINT8_C( 48), UINT8_C(139), UINT8_C( 69), UINT8_C( 42), UINT8_C(239), UINT8_C(255), UINT8_C( 84), UINT8_C(226), UINT8_C(180), UINT8_C(204), UINT8_C( 81), UINT8_C(133), UINT8_C(230), UINT8_C( 25), UINT8_C(218), UINT8_C(139)), simde_mm_set_epi16(INT16_C( 180), INT16_C( 204), INT16_C( 81), INT16_C( 133), INT16_C( 230), INT16_C( 25), INT16_C( 218), INT16_C( 139)) }, { simde_x_mm_set_epu8(UINT8_C(213), UINT8_C( 5), UINT8_C( 37), UINT8_C(243), UINT8_C(196), UINT8_C(241), UINT8_C( 11), UINT8_C( 32), UINT8_C(137), UINT8_C( 5), UINT8_C(216), UINT8_C(243), UINT8_C( 25), UINT8_C( 23), UINT8_C(212), UINT8_C( 57)), simde_mm_set_epi16(INT16_C( 137), INT16_C( 5), INT16_C( 216), INT16_C( 243), INT16_C( 25), INT16_C( 23), INT16_C( 212), INT16_C( 57)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_cvtepu8_epi16(test_vec[i].a); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cvtepu8_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu8(216, 58, 77, 216, 179, 254, 83, 42, 179, 211, 84, 116, 211, 142, 19, 247), simde_mm_set_epi32(211, 142, 19, 247) }, { simde_x_mm_set_epu8(161, 70, 71, 24, 62, 44, 28, 40, 34, 181, 114, 59, 33, 122, 68, 198), simde_mm_set_epi32( 33, 122, 68, 198) }, { simde_x_mm_set_epu8(145, 160, 60, 140, 53, 127, 68, 235, 225, 143, 244, 79, 20, 67, 173, 209), simde_mm_set_epi32( 20, 67, 173, 209) }, { simde_x_mm_set_epu8(117, 146, 120, 54, 143, 242, 141, 149, 73, 58, 87, 32, 199, 183, 202, 12), simde_mm_set_epi32(199, 183, 202, 12) }, { simde_x_mm_set_epu8( 40, 57, 16, 79, 103, 249, 143, 197, 214, 78, 201, 95, 85, 53, 247, 134), simde_mm_set_epi32( 85, 53, 247, 134) }, { simde_x_mm_set_epu8( 72, 163, 211, 103, 152, 114, 2, 140, 52, 64, 1, 9, 184, 145, 190, 245), simde_mm_set_epi32(184, 145, 190, 245) }, { simde_x_mm_set_epu8( 7, 28, 147, 3, 70, 73, 159, 19, 118, 122, 100, 52, 236, 17, 135, 121), simde_mm_set_epi32(236, 17, 135, 121) }, { simde_x_mm_set_epu8(100, 233, 199, 97, 158, 240, 121, 115, 142, 146, 159, 107, 202, 224, 156, 149), simde_mm_set_epi32(202, 224, 156, 149) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_cvtepu8_epi32(test_vec[i].a); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cvtepu8_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu8(UINT8_C(165), UINT8_C(138), UINT8_C( 9), UINT8_C(193), UINT8_C(199), UINT8_C(213), UINT8_C(108), UINT8_C(127), UINT8_C( 76), UINT8_C( 44), UINT8_C( 97), UINT8_C( 43), UINT8_C( 10), UINT8_C( 41), UINT8_C(226), UINT8_C(134)), simde_mm_set_epi64x(INT64_C( 226), INT64_C( 134)) }, { simde_x_mm_set_epu8(UINT8_C(206), UINT8_C(216), UINT8_C(228), UINT8_C( 95), UINT8_C( 16), UINT8_C(235), UINT8_C(117), UINT8_C(249), UINT8_C(118), UINT8_C(169), UINT8_C( 43), UINT8_C(125), UINT8_C(139), UINT8_C( 7), UINT8_C( 78), UINT8_C( 91)), simde_mm_set_epi64x(INT64_C( 78), INT64_C( 91)) }, { simde_x_mm_set_epu8(UINT8_C(217), UINT8_C( 54), UINT8_C( 70), UINT8_C( 24), UINT8_C( 18), UINT8_C(185), UINT8_C( 8), UINT8_C( 83), UINT8_C(138), UINT8_C(127), UINT8_C(171), UINT8_C(232), UINT8_C( 25), UINT8_C( 28), UINT8_C(187), UINT8_C(172)), simde_mm_set_epi64x(INT64_C( 187), INT64_C( 172)) }, { simde_x_mm_set_epu8(UINT8_C(139), UINT8_C(164), UINT8_C(130), UINT8_C(202), UINT8_C(244), UINT8_C(121), UINT8_C( 41), UINT8_C(156), UINT8_C(187), UINT8_C( 51), UINT8_C( 98), UINT8_C( 32), UINT8_C(179), UINT8_C(192), UINT8_C(214), UINT8_C(180)), simde_mm_set_epi64x(INT64_C( 214), INT64_C( 180)) }, { simde_x_mm_set_epu8(UINT8_C(216), UINT8_C( 40), UINT8_C(253), UINT8_C(179), UINT8_C(134), UINT8_C(199), UINT8_C( 8), UINT8_C( 38), UINT8_C(169), UINT8_C(253), UINT8_C(114), UINT8_C( 51), UINT8_C(228), UINT8_C(119), UINT8_C(103), UINT8_C(155)), simde_mm_set_epi64x(INT64_C( 103), INT64_C( 155)) }, { simde_x_mm_set_epu8(UINT8_C(197), UINT8_C(169), UINT8_C( 70), UINT8_C(235), UINT8_C(100), UINT8_C(250), UINT8_C( 64), UINT8_C(127), UINT8_C(118), UINT8_C(162), UINT8_C( 19), UINT8_C(173), UINT8_C(116), UINT8_C(180), UINT8_C( 7), UINT8_C(216)), simde_mm_set_epi64x(INT64_C( 7), INT64_C( 216)) }, { simde_x_mm_set_epu8(UINT8_C( 50), UINT8_C( 69), UINT8_C( 69), UINT8_C(227), UINT8_C( 69), UINT8_C( 0), UINT8_C( 34), UINT8_C(232), UINT8_C( 24), UINT8_C( 46), UINT8_C(141), UINT8_C(128), UINT8_C(115), UINT8_C(157), UINT8_C(112), UINT8_C( 71)), simde_mm_set_epi64x(INT64_C( 112), INT64_C( 71)) }, { simde_x_mm_set_epu8(UINT8_C(120), UINT8_C( 3), UINT8_C(197), UINT8_C(139), UINT8_C( 31), UINT8_C(200), UINT8_C(250), UINT8_C(106), UINT8_C(219), UINT8_C( 4), UINT8_C(206), UINT8_C( 64), UINT8_C(217), UINT8_C(231), UINT8_C(229), UINT8_C( 14)), simde_mm_set_epi64x(INT64_C( 229), INT64_C( 14)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_cvtepu8_epi64(test_vec[i].a); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cvtepu16_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu16(UINT16_C(45997), UINT16_C(54881), UINT16_C( 8073), UINT16_C(50053), UINT16_C(33513), UINT16_C(60494), UINT16_C(44453), UINT16_C(35449)), simde_mm_set_epi32(INT32_C( 33513), INT32_C( 60494), INT32_C( 44453), INT32_C( 35449)) }, { simde_x_mm_set_epu16(UINT16_C(29322), UINT16_C(60312), UINT16_C(23894), UINT16_C( 5611), UINT16_C(43169), UINT16_C(36178), UINT16_C(30857), UINT16_C(27437)), simde_mm_set_epi32(INT32_C( 43169), INT32_C( 36178), INT32_C( 30857), INT32_C( 27437)) }, { simde_x_mm_set_epu16(UINT16_C(36165), UINT16_C(32250), UINT16_C(32420), UINT16_C(54083), UINT16_C(37595), UINT16_C( 6323), UINT16_C(24868), UINT16_C(13806)), simde_mm_set_epi32(INT32_C( 37595), INT32_C( 6323), INT32_C( 24868), INT32_C( 13806)) }, { simde_x_mm_set_epu16(UINT16_C(47473), UINT16_C(25142), UINT16_C(53974), UINT16_C(29717), UINT16_C(38600), UINT16_C(37432), UINT16_C(33857), UINT16_C( 7799)), simde_mm_set_epi32(INT32_C( 38600), INT32_C( 37432), INT32_C( 33857), INT32_C( 7799)) }, { simde_x_mm_set_epu16(UINT16_C( 9476), UINT16_C(49162), UINT16_C(59100), UINT16_C(46196), UINT16_C(39403), UINT16_C(33179), UINT16_C(42032), UINT16_C(16407)), simde_mm_set_epi32(INT32_C( 39403), INT32_C( 33179), INT32_C( 42032), INT32_C( 16407)) }, { simde_x_mm_set_epu16(UINT16_C(13947), UINT16_C(14884), UINT16_C(63463), UINT16_C(60885), UINT16_C(44795), UINT16_C(57577), UINT16_C( 9399), UINT16_C(54740)), simde_mm_set_epi32(INT32_C( 44795), INT32_C( 57577), INT32_C( 9399), INT32_C( 54740)) }, { simde_x_mm_set_epu16(UINT16_C(16348), UINT16_C(41263), UINT16_C(29933), UINT16_C( 7966), UINT16_C(11747), UINT16_C(45808), UINT16_C(10076), UINT16_C(52634)), simde_mm_set_epi32(INT32_C( 11747), INT32_C( 45808), INT32_C( 10076), INT32_C( 52634)) }, { simde_x_mm_set_epu16(UINT16_C(44921), UINT16_C(31426), UINT16_C(50653), UINT16_C(39011), UINT16_C(15796), UINT16_C(28031), UINT16_C(50348), UINT16_C(35430)), simde_mm_set_epi32(INT32_C( 15796), INT32_C( 28031), INT32_C( 50348), INT32_C( 35430)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_cvtepu16_epi32(test_vec[i].a); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cvtepu16_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu16(UINT16_C(36944), UINT16_C(47640), UINT16_C( 2187), UINT16_C(10560), UINT16_C( 1923), UINT16_C(52017), UINT16_C(41190), UINT16_C(51381)), simde_mm_set_epi64x(INT64_C( 41190), INT64_C( 51381)) }, { simde_x_mm_set_epu16(UINT16_C(59110), UINT16_C( 781), UINT16_C( 2267), UINT16_C(32518), UINT16_C(16361), UINT16_C(14242), UINT16_C(28197), UINT16_C(27455)), simde_mm_set_epi64x(INT64_C( 28197), INT64_C( 27455)) }, { simde_x_mm_set_epu16(UINT16_C(45312), UINT16_C( 9417), UINT16_C(53316), UINT16_C(54914), UINT16_C( 6651), UINT16_C(32993), UINT16_C(51134), UINT16_C(31066)), simde_mm_set_epi64x(INT64_C( 51134), INT64_C( 31066)) }, { simde_x_mm_set_epu16(UINT16_C( 6198), UINT16_C(41996), UINT16_C(34257), UINT16_C( 1856), UINT16_C(54306), UINT16_C(57993), UINT16_C(28922), UINT16_C(25456)), simde_mm_set_epi64x(INT64_C( 28922), INT64_C( 25456)) }, { simde_x_mm_set_epu16(UINT16_C(52812), UINT16_C(10727), UINT16_C(10105), UINT16_C(31173), UINT16_C( 2196), UINT16_C(38149), UINT16_C(12797), UINT16_C(56721)), simde_mm_set_epi64x(INT64_C( 12797), INT64_C( 56721)) }, { simde_x_mm_set_epu16(UINT16_C(43893), UINT16_C(22212), UINT16_C(55173), UINT16_C(39507), UINT16_C(17371), UINT16_C( 1379), UINT16_C(37505), UINT16_C(51758)), simde_mm_set_epi64x(INT64_C( 37505), INT64_C( 51758)) }, { simde_x_mm_set_epu16(UINT16_C(10102), UINT16_C(59029), UINT16_C(11878), UINT16_C(57288), UINT16_C(63955), UINT16_C(62443), UINT16_C( 1106), UINT16_C(32843)), simde_mm_set_epi64x(INT64_C( 1106), INT64_C( 32843)) }, { simde_x_mm_set_epu16(UINT16_C( 7384), UINT16_C(18925), UINT16_C(61664), UINT16_C(62156), UINT16_C(64229), UINT16_C(12260), UINT16_C(17269), UINT16_C( 165)), simde_mm_set_epi64x(INT64_C( 17269), INT64_C( 165)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_cvtepu16_epi64(test_vec[i].a); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cvtepu32_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu32(UINT32_C(1089356158), UINT32_C( 716812601), UINT32_C(2600352285), UINT32_C( 851998215)), simde_mm_set_epi64x(INT64_C( 2600352285), INT64_C( 851998215)) }, { simde_x_mm_set_epu32(UINT32_C(2083471993), UINT32_C(3995663695), UINT32_C( 718227173), UINT32_C(3645469582)), simde_mm_set_epi64x(INT64_C( 718227173), INT64_C( 3645469582)) }, { simde_x_mm_set_epu32(UINT32_C(3121544286), UINT32_C( 583537062), UINT32_C( 362422775), UINT32_C(3876080612)), simde_mm_set_epi64x(INT64_C( 362422775), INT64_C( 3876080612)) }, { simde_x_mm_set_epu32(UINT32_C( 336299187), UINT32_C(1772325077), UINT32_C(4159021368), UINT32_C(3897849224)), simde_mm_set_epi64x(INT64_C( 4159021368), INT64_C( 3897849224)) }, { simde_x_mm_set_epu32(UINT32_C(2614142766), UINT32_C(3735995755), UINT32_C(2880699788), UINT32_C( 410975381)), simde_mm_set_epi64x(INT64_C( 2880699788), INT64_C( 410975381)) }, { simde_x_mm_set_epu32(UINT32_C(2584819359), UINT32_C(2278276482), UINT32_C(3087779260), UINT32_C(2573826343)), simde_mm_set_epi64x(INT64_C( 3087779260), INT64_C( 2573826343)) }, { simde_x_mm_set_epu32(UINT32_C( 809379705), UINT32_C( 343015345), UINT32_C( 434582428), UINT32_C( 776037113)), simde_mm_set_epi64x(INT64_C( 434582428), INT64_C( 776037113)) }, { simde_x_mm_set_epu32(UINT32_C(2474833343), UINT32_C(1439643533), UINT32_C(3207542234), UINT32_C(2907982947)), simde_mm_set_epi64x(INT64_C( 3207542234), INT64_C( 2907982947)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_cvtepu32_epi64(test_vec[i].a); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_dp_pd(SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 b[2]; const simde_float64 rff[2]; const simde_float64 r13[2]; const simde_float64 r42[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -185.10), SIMDE_FLOAT64_C( -814.31) }, { SIMDE_FLOAT64_C( -633.40), SIMDE_FLOAT64_C( 264.07) }, { SIMDE_FLOAT64_C(-97792.50), SIMDE_FLOAT64_C(-97792.50) }, { SIMDE_FLOAT64_C(117242.34), SIMDE_FLOAT64_C(117242.34) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( -565.06), SIMDE_FLOAT64_C( 69.81) }, { SIMDE_FLOAT64_C( -630.02), SIMDE_FLOAT64_C( 685.88) }, { SIMDE_FLOAT64_C(403880.38), SIMDE_FLOAT64_C(403880.38) }, { SIMDE_FLOAT64_C(355999.10), SIMDE_FLOAT64_C(355999.10) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( -912.18), SIMDE_FLOAT64_C( -668.17) }, { SIMDE_FLOAT64_C( 101.41), SIMDE_FLOAT64_C( -269.03) }, { SIMDE_FLOAT64_C( 87253.60), SIMDE_FLOAT64_C( 87253.60) }, { SIMDE_FLOAT64_C(-92504.17), SIMDE_FLOAT64_C(-92504.17) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( 285.64), SIMDE_FLOAT64_C( 880.46) }, { SIMDE_FLOAT64_C( -816.65), SIMDE_FLOAT64_C( -835.72) }, { SIMDE_FLOAT64_C(-969085.94), SIMDE_FLOAT64_C(-969085.94) }, { SIMDE_FLOAT64_C(-233267.91), SIMDE_FLOAT64_C(-233267.91) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( -436.25), SIMDE_FLOAT64_C( -256.84) }, { SIMDE_FLOAT64_C( -171.01), SIMDE_FLOAT64_C( -191.74) }, { SIMDE_FLOAT64_C(123849.61), SIMDE_FLOAT64_C(123849.61) }, { SIMDE_FLOAT64_C( 74603.11), SIMDE_FLOAT64_C( 74603.11) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( -839.33), SIMDE_FLOAT64_C( 848.18) }, { SIMDE_FLOAT64_C( -470.04), SIMDE_FLOAT64_C( 103.69) }, { SIMDE_FLOAT64_C(482466.46), SIMDE_FLOAT64_C(482466.46) }, { SIMDE_FLOAT64_C(394518.67), SIMDE_FLOAT64_C(394518.67) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( -686.56), SIMDE_FLOAT64_C( -717.77) }, { SIMDE_FLOAT64_C( 891.14), SIMDE_FLOAT64_C( 567.91) }, { SIMDE_FLOAT64_C(-1019449.84), SIMDE_FLOAT64_C(-1019449.84) }, { SIMDE_FLOAT64_C(-611821.08), SIMDE_FLOAT64_C(-611821.08) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( 596.26), SIMDE_FLOAT64_C( 722.67) }, { SIMDE_FLOAT64_C( -828.42), SIMDE_FLOAT64_C( -588.84) }, { SIMDE_FLOAT64_C(-919490.71), SIMDE_FLOAT64_C(-919490.71) }, { SIMDE_FLOAT64_C(-493953.71), SIMDE_FLOAT64_C(-493953.71) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d b = simde_mm_loadu_pd(test_vec[i].b); simde__m128d r; r = simde_mm_dp_pd(a, b, 0xff); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].rff), 1); r = simde_mm_dp_pd(a, b, 0x13); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r13), 1); r = simde_mm_dp_pd(a, b, 0x42); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r42), 1); } return 0; } static int test_simde_mm_dp_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 b[4]; const simde_float32 rff[4]; const simde_float32 r7f[4]; const simde_float32 r2a[4]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -84.89), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 74.29) }, { SIMDE_FLOAT32_C( -51.70), SIMDE_MATH_NANF, SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 82.57) }, { SIMDE_MATH_NANF, SIMDE_MATH_NANF, SIMDE_MATH_NANF, SIMDE_MATH_NANF }, { SIMDE_MATH_NANF, SIMDE_MATH_NANF, SIMDE_MATH_NANF, SIMDE_MATH_NANF }, { SIMDE_FLOAT32_C( 0.00), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 0.00), SIMDE_MATH_NANF } }, #endif { { SIMDE_FLOAT32_C( 23.12), SIMDE_FLOAT32_C( 22.36), SIMDE_FLOAT32_C( -14.55), SIMDE_FLOAT32_C( 26.71) }, { SIMDE_FLOAT32_C( 54.23), SIMDE_FLOAT32_C( -31.26), SIMDE_FLOAT32_C( 26.29), SIMDE_FLOAT32_C( 36.85) }, { SIMDE_FLOAT32_C( 1156.57), SIMDE_FLOAT32_C( 1156.57), SIMDE_FLOAT32_C( 1156.57), SIMDE_FLOAT32_C( 1156.57) }, { SIMDE_FLOAT32_C( 172.30), SIMDE_FLOAT32_C( 172.30), SIMDE_FLOAT32_C( 172.30), SIMDE_FLOAT32_C( 172.30) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -698.97), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -698.97) } }, { { SIMDE_FLOAT32_C( -56.71), SIMDE_FLOAT32_C( 75.82), SIMDE_FLOAT32_C( 51.12), SIMDE_FLOAT32_C( -94.58) }, { SIMDE_FLOAT32_C( -30.60), SIMDE_FLOAT32_C( 39.38), SIMDE_FLOAT32_C( 88.71), SIMDE_FLOAT32_C( -29.94) }, { SIMDE_FLOAT32_C( 12087.70), SIMDE_FLOAT32_C( 12087.70), SIMDE_FLOAT32_C( 12087.70), SIMDE_FLOAT32_C( 12087.70) }, { SIMDE_FLOAT32_C( 9255.97), SIMDE_FLOAT32_C( 9255.97), SIMDE_FLOAT32_C( 9255.97), SIMDE_FLOAT32_C( 9255.97) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2985.79), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2985.79) } }, { { SIMDE_FLOAT32_C( -53.71), SIMDE_FLOAT32_C( -37.29), SIMDE_FLOAT32_C( 41.85), SIMDE_FLOAT32_C( -92.59) }, { SIMDE_FLOAT32_C( -16.40), SIMDE_FLOAT32_C( -27.78), SIMDE_FLOAT32_C( -72.83), SIMDE_FLOAT32_C( 3.70) }, { SIMDE_FLOAT32_C( -1473.76), SIMDE_FLOAT32_C( -1473.76), SIMDE_FLOAT32_C( -1473.76), SIMDE_FLOAT32_C( -1473.76) }, { SIMDE_FLOAT32_C( -1131.18), SIMDE_FLOAT32_C( -1131.18), SIMDE_FLOAT32_C( -1131.18), SIMDE_FLOAT32_C( -1131.18) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1035.92), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1035.92) } }, { { SIMDE_FLOAT32_C( -12.67), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( -22.01), SIMDE_FLOAT32_C( 35.63) }, { SIMDE_FLOAT32_C( 32.23), SIMDE_FLOAT32_C( 30.33), SIMDE_FLOAT32_C( 18.20), SIMDE_FLOAT32_C( -44.65) }, { SIMDE_FLOAT32_C( -2391.32), SIMDE_FLOAT32_C( -2391.32), SIMDE_FLOAT32_C( -2391.32), SIMDE_FLOAT32_C( -2391.32) }, { SIMDE_FLOAT32_C( -800.44), SIMDE_FLOAT32_C( -800.44), SIMDE_FLOAT32_C( -800.44), SIMDE_FLOAT32_C( -800.44) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 8.49), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 8.49) } }, { { SIMDE_FLOAT32_C( -47.31), SIMDE_FLOAT32_C( -96.35), SIMDE_FLOAT32_C( 82.06), SIMDE_FLOAT32_C( -93.08) }, { SIMDE_FLOAT32_C( -27.61), SIMDE_FLOAT32_C( 8.35), SIMDE_FLOAT32_C( 43.77), SIMDE_FLOAT32_C( 15.68) }, { SIMDE_FLOAT32_C( 2633.98), SIMDE_FLOAT32_C( 2633.98), SIMDE_FLOAT32_C( 2633.98), SIMDE_FLOAT32_C( 2633.98) }, { SIMDE_FLOAT32_C( 4093.47), SIMDE_FLOAT32_C( 4093.47), SIMDE_FLOAT32_C( 4093.47), SIMDE_FLOAT32_C( 4093.47) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -804.52), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -804.52) } }, { { SIMDE_FLOAT32_C( -15.82), SIMDE_FLOAT32_C( -5.11), SIMDE_FLOAT32_C( 21.10), SIMDE_FLOAT32_C( 53.57) }, { SIMDE_FLOAT32_C( -65.73), SIMDE_FLOAT32_C( 9.81), SIMDE_FLOAT32_C( -76.36), SIMDE_FLOAT32_C( -19.43) }, { SIMDE_FLOAT32_C( -1662.34), SIMDE_FLOAT32_C( -1662.34), SIMDE_FLOAT32_C( -1662.34), SIMDE_FLOAT32_C( -1662.34) }, { SIMDE_FLOAT32_C( -621.48), SIMDE_FLOAT32_C( -621.48), SIMDE_FLOAT32_C( -621.48), SIMDE_FLOAT32_C( -621.48) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -50.13), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -50.13) } }, { { SIMDE_FLOAT32_C( 72.52), SIMDE_FLOAT32_C( 65.48), SIMDE_FLOAT32_C( -12.02), SIMDE_FLOAT32_C( -43.88) }, { SIMDE_FLOAT32_C( -62.30), SIMDE_FLOAT32_C( 15.15), SIMDE_FLOAT32_C( 59.82), SIMDE_FLOAT32_C( 25.03) }, { SIMDE_FLOAT32_C( -5343.33), SIMDE_FLOAT32_C( -5343.33), SIMDE_FLOAT32_C( -5343.33), SIMDE_FLOAT32_C( -5343.33) }, { SIMDE_FLOAT32_C( -4245.01), SIMDE_FLOAT32_C( -4245.01), SIMDE_FLOAT32_C( -4245.01), SIMDE_FLOAT32_C( -4245.01) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 992.02), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 992.02) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 b = simde_mm_loadu_ps(test_vec[i].b); simde_test_x86_assert_equal_f32x4(simde_mm_dp_ps(a, b, 0xff), simde_mm_loadu_ps(test_vec[i].rff), 1); simde_test_x86_assert_equal_f32x4(simde_mm_dp_ps(a, b, 0x7f), simde_mm_loadu_ps(test_vec[i].r7f), 1); simde_test_x86_assert_equal_f32x4(simde_mm_dp_ps(a, b, 0x2a), simde_mm_loadu_ps(test_vec[i].r2a), 1); } return 0; } static int test_simde_mm_extract_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; int8_t r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C(-105), INT8_C(-107), INT8_C( -84), INT8_C( -57), INT8_C( 13), INT8_C(-101), INT8_C( -59), INT8_C( -93), INT8_C( 53), INT8_C( 83), INT8_C( -28), INT8_C( 126), INT8_C(-111), INT8_C( -97), INT8_C( 25), INT8_C( -72)), INT8_C(-97) }, { simde_mm_set_epi8(INT8_C( -76), INT8_C( -25), INT8_C( 36), INT8_C( -92), INT8_C( -6), INT8_C( 77), INT8_C( -49), INT8_C(-126), INT8_C( -50), INT8_C( 79), INT8_C( -93), INT8_C( -95), INT8_C( -54), INT8_C(-116), INT8_C( -84), INT8_C( 66)), INT8_C(-116) }, { simde_mm_set_epi8(INT8_C( 83), INT8_C( 123), INT8_C( 120), INT8_C( -66), INT8_C( -3), INT8_C( 66), INT8_C( 110), INT8_C( 76), INT8_C( 0), INT8_C( 83), INT8_C( 20), INT8_C( -12), INT8_C( 83), INT8_C( 4), INT8_C( -39), INT8_C( 118)), INT8_C(4) }, { simde_mm_set_epi8(INT8_C( 32), INT8_C( 30), INT8_C( 127), INT8_C( -79), INT8_C( -10), INT8_C( 64), INT8_C( -45), INT8_C( 101), INT8_C( 44), INT8_C( -25), INT8_C(-110), INT8_C( 105), INT8_C( 83), INT8_C( 27), INT8_C( 15), INT8_C( 3)), INT8_C(27) }, { simde_mm_set_epi8(INT8_C( 79), INT8_C( 95), INT8_C(-109), INT8_C( 26), INT8_C( -81), INT8_C( -5), INT8_C( -84), INT8_C( 115), INT8_C( -48), INT8_C( 104), INT8_C( 67), INT8_C( 26), INT8_C( -41), INT8_C( -30), INT8_C( 94), INT8_C( -19)), INT8_C(-30) }, { simde_mm_set_epi8(INT8_C( -81), INT8_C( -80), INT8_C(-109), INT8_C( -14), INT8_C( 65), INT8_C( 10), INT8_C( 95), INT8_C( 83), INT8_C( 123), INT8_C( 45), INT8_C( 39), INT8_C( 106), INT8_C( -16), INT8_C(-123), INT8_C( -27), INT8_C( 56)), INT8_C(-123) }, { simde_mm_set_epi8(INT8_C( 86), INT8_C( 79), INT8_C( 41), INT8_C( -51), INT8_C( 97), INT8_C( 42), INT8_C( 117), INT8_C(-119), INT8_C( -18), INT8_C(-127), INT8_C( 28), INT8_C( -21), INT8_C(-101), INT8_C( -61), INT8_C( -63), INT8_C( 13)), INT8_C(-61) }, { simde_mm_set_epi8(INT8_C( -22), INT8_C( -42), INT8_C( 109), INT8_C( -75), INT8_C( -84), INT8_C( 60), INT8_C( -44), INT8_C( 8), INT8_C( 118), INT8_C( 102), INT8_C( -6), INT8_C( 0), INT8_C( -28), INT8_C( -81), INT8_C( 2), INT8_C( 94)), INT8_C(-81) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int8_t r = HEDLEY_STATIC_CAST(int8_t, simde_mm_extract_epi8(test_vec[i].a, 2)); simde_assert_equal_i8(r, test_vec[i].r); } return 0; } static int test_simde_mm_extract_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; int32_t r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( -989914365), INT32_C( -320568713), INT32_C( 764201252), INT32_C( -823076780)), -320568713 }, { simde_mm_set_epi32(INT32_C( 1872661147), INT32_C( 1704933787), INT32_C( -552294669), INT32_C(-1494809408)), 1704933787 }, { simde_mm_set_epi32(INT32_C( 1741332044), INT32_C( 170998046), INT32_C( 1368723621), INT32_C(-2071088920)), 170998046 }, { simde_mm_set_epi32(INT32_C(-1942216470), INT32_C(-1426359243), INT32_C( 862877610), INT32_C( 898094861)), -1426359243 }, { simde_mm_set_epi32(INT32_C( 998704029), INT32_C( 2083244310), INT32_C( 1997200410), INT32_C( -459833332)), 2083244310 }, { simde_mm_set_epi32(INT32_C( 1256369906), INT32_C( 953394288), INT32_C( 1499034833), INT32_C( 990189413)), 953394288 }, { simde_mm_set_epi32(INT32_C( 1704589159), INT32_C( -719174981), INT32_C( -821402820), INT32_C( -969954348)), -719174981 }, { simde_mm_set_epi32(INT32_C( 29062750), INT32_C( 1388055657), INT32_C( 1520861645), INT32_C(-1651761160)), 1388055657 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int32_t r = simde_mm_extract_epi32(test_vec[i].a, 2); simde_assert_equal_i32(r, test_vec[i].r); } return 0; } static int test_simde_mm_extract_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; int64_t r; } test_vec[8] = { { simde_mm_set_epi64x(INT64_C( 3272238239526171606), INT64_C( 1654850679014379440)), INT64_C( 3272238239526171606) }, { simde_mm_set_epi64x(INT64_C( -6129313439874067621), INT64_C( 8613384435853876616)), INT64_C( -6129313439874067621) }, { simde_mm_set_epi64x(INT64_C( -6049641799095753965), INT64_C( -2810493496227171537)), INT64_C( -6049641799095753965) }, { simde_mm_set_epi64x(INT64_C( 1486209521702266950), INT64_C( -7779883419844099734)), INT64_C( 1486209521702266950) }, { simde_mm_set_epi64x(INT64_C( -4910022048576872084), INT64_C( -5820696718361059251)), INT64_C( -4910022048576872084) }, { simde_mm_set_epi64x(INT64_C( -1923881977407946859), INT64_C( -5589720307210562868)), INT64_C( -1923881977407946859) }, { simde_mm_set_epi64x(INT64_C( -3527421069943215520), INT64_C( 4545940692351426251)), INT64_C( -3527421069943215520) }, { simde_mm_set_epi64x(INT64_C( 6123229719758625458), INT64_C( 1813312819011147191)), INT64_C( 6123229719758625458) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int64_t r = simde_mm_extract_epi64(test_vec[i].a, 1); simde_assert_equal_i64(r, test_vec[i].r); } return 0; } static int test_simde_mm_extract_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; int32_t r; } test_vec[] = { { { SIMDE_FLOAT32_C( -849.36), SIMDE_FLOAT32_C( -598.79), SIMDE_FLOAT32_C( -735.05), SIMDE_FLOAT32_C( 435.26) }, -INT32_C( 1001105654) }, { { SIMDE_FLOAT32_C( -991.80), SIMDE_FLOAT32_C( -606.95), SIMDE_FLOAT32_C( 532.00), SIMDE_FLOAT32_C( -243.65) }, -INT32_C( 998771917) }, { { SIMDE_FLOAT32_C( 652.00), SIMDE_FLOAT32_C( -803.65), SIMDE_FLOAT32_C( 853.23), SIMDE_FLOAT32_C( -479.16) }, INT32_C( 1143144448) }, { { SIMDE_FLOAT32_C( -496.01), SIMDE_FLOAT32_C( -449.26), SIMDE_FLOAT32_C( 824.10), SIMDE_FLOAT32_C( 206.32) }, -INT32_C( 1007156920) }, { { SIMDE_FLOAT32_C( -549.74), SIMDE_FLOAT32_C( 14.08), SIMDE_FLOAT32_C( 893.37), SIMDE_FLOAT32_C( 921.66) }, -INT32_C( 1006014628) }, { { SIMDE_FLOAT32_C( -296.23), SIMDE_FLOAT32_C( 133.06), SIMDE_FLOAT32_C( -137.87), SIMDE_FLOAT32_C( -577.55) }, -INT32_C( 1013703311) }, { { SIMDE_FLOAT32_C( -98.04), SIMDE_FLOAT32_C( 744.07), SIMDE_FLOAT32_C( 68.13), SIMDE_FLOAT32_C( 315.55) }, -INT32_C( 1027337093) }, { { SIMDE_FLOAT32_C( -258.15), SIMDE_FLOAT32_C( 588.77), SIMDE_FLOAT32_C( -722.13), SIMDE_FLOAT32_C( -107.51) }, -INT32_C( 1014951117) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); int32_t r = simde_mm_extract_ps(a, 0); simde_assert_equal_i32(r, test_vec[i].r); } return 0; } static int test_simde_mm_floor_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -638.24), SIMDE_FLOAT64_C( 771.01)), simde_mm_set_pd(SIMDE_FLOAT64_C( -639.00), SIMDE_FLOAT64_C( 771.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -79.94), SIMDE_FLOAT64_C( 536.55)), simde_mm_set_pd(SIMDE_FLOAT64_C( -80.00), SIMDE_FLOAT64_C( 536.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 145.74), SIMDE_FLOAT64_C( 124.77)), simde_mm_set_pd(SIMDE_FLOAT64_C( 145.00), SIMDE_FLOAT64_C( 124.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 309.89), SIMDE_FLOAT64_C( -929.92)), simde_mm_set_pd(SIMDE_FLOAT64_C( 309.00), SIMDE_FLOAT64_C( -930.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 138.72), SIMDE_FLOAT64_C( 502.95)), simde_mm_set_pd(SIMDE_FLOAT64_C( 138.00), SIMDE_FLOAT64_C( 502.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 512.22), SIMDE_FLOAT64_C( 187.34)), simde_mm_set_pd(SIMDE_FLOAT64_C( 512.00), SIMDE_FLOAT64_C( 187.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 560.17), SIMDE_FLOAT64_C( 420.10)), simde_mm_set_pd(SIMDE_FLOAT64_C( 560.00), SIMDE_FLOAT64_C( 420.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 367.30), SIMDE_FLOAT64_C( 619.17)), simde_mm_set_pd(SIMDE_FLOAT64_C( 367.00), SIMDE_FLOAT64_C( 619.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_floor_pd(test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_floor_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 50.80), SIMDE_FLOAT32_C( 459.30), SIMDE_FLOAT32_C( -834.21), SIMDE_FLOAT32_C( 808.45)), simde_mm_set_ps(SIMDE_FLOAT32_C( 50.00), SIMDE_FLOAT32_C( 459.00), SIMDE_FLOAT32_C( -835.00), SIMDE_FLOAT32_C( 808.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -648.20), SIMDE_FLOAT32_C( 768.10), SIMDE_FLOAT32_C( -885.11), SIMDE_FLOAT32_C( -162.62)), simde_mm_set_ps(SIMDE_FLOAT32_C( -649.00), SIMDE_FLOAT32_C( 768.00), SIMDE_FLOAT32_C( -886.00), SIMDE_FLOAT32_C( -163.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -136.33), SIMDE_FLOAT32_C( 361.95), SIMDE_FLOAT32_C( -729.16), SIMDE_FLOAT32_C( -786.42)), simde_mm_set_ps(SIMDE_FLOAT32_C( -137.00), SIMDE_FLOAT32_C( 361.00), SIMDE_FLOAT32_C( -730.00), SIMDE_FLOAT32_C( -787.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -579.89), SIMDE_FLOAT32_C( 55.32), SIMDE_FLOAT32_C( -998.01), SIMDE_FLOAT32_C( 191.65)), simde_mm_set_ps(SIMDE_FLOAT32_C( -580.00), SIMDE_FLOAT32_C( 55.00), SIMDE_FLOAT32_C( -999.00), SIMDE_FLOAT32_C( 191.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 781.10), SIMDE_FLOAT32_C( 884.98), SIMDE_FLOAT32_C( 903.22), SIMDE_FLOAT32_C( 964.51)), simde_mm_set_ps(SIMDE_FLOAT32_C( 781.00), SIMDE_FLOAT32_C( 884.00), SIMDE_FLOAT32_C( 903.00), SIMDE_FLOAT32_C( 964.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 255.11), SIMDE_FLOAT32_C( 929.26), SIMDE_FLOAT32_C( 869.55), SIMDE_FLOAT32_C( -749.47)), simde_mm_set_ps(SIMDE_FLOAT32_C( 255.00), SIMDE_FLOAT32_C( 929.00), SIMDE_FLOAT32_C( 869.00), SIMDE_FLOAT32_C( -750.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -859.43), SIMDE_FLOAT32_C( 380.85), SIMDE_FLOAT32_C( -956.74), SIMDE_FLOAT32_C( -486.36)), simde_mm_set_ps(SIMDE_FLOAT32_C( -860.00), SIMDE_FLOAT32_C( 380.00), SIMDE_FLOAT32_C( -957.00), SIMDE_FLOAT32_C( -487.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 154.98), SIMDE_FLOAT32_C( 341.89), SIMDE_FLOAT32_C( -431.03), SIMDE_FLOAT32_C( -687.74)), simde_mm_set_ps(SIMDE_FLOAT32_C( 154.00), SIMDE_FLOAT32_C( 341.00), SIMDE_FLOAT32_C( -432.00), SIMDE_FLOAT32_C( -688.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_floor_ps(test_vec[i].a); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_floor_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -651.87), SIMDE_FLOAT64_C( 624.42)), simde_mm_set_pd(SIMDE_FLOAT64_C( 634.56), SIMDE_FLOAT64_C( -443.27)), simde_mm_set_pd(SIMDE_FLOAT64_C( -651.87), SIMDE_FLOAT64_C( -444.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -650.57), SIMDE_FLOAT64_C( -388.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( 862.67), SIMDE_FLOAT64_C( -847.32)), simde_mm_set_pd(SIMDE_FLOAT64_C( -650.57), SIMDE_FLOAT64_C( -848.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -429.60), SIMDE_FLOAT64_C( -927.81)), simde_mm_set_pd(SIMDE_FLOAT64_C( 516.52), SIMDE_FLOAT64_C( -109.38)), simde_mm_set_pd(SIMDE_FLOAT64_C( -429.60), SIMDE_FLOAT64_C( -110.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 119.92), SIMDE_FLOAT64_C( 571.26)), simde_mm_set_pd(SIMDE_FLOAT64_C( 15.01), SIMDE_FLOAT64_C( 819.76)), simde_mm_set_pd(SIMDE_FLOAT64_C( 119.92), SIMDE_FLOAT64_C( 819.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -625.27), SIMDE_FLOAT64_C( 998.84)), simde_mm_set_pd(SIMDE_FLOAT64_C( -570.19), SIMDE_FLOAT64_C( -85.39)), simde_mm_set_pd(SIMDE_FLOAT64_C( -625.27), SIMDE_FLOAT64_C( -86.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 105.61), SIMDE_FLOAT64_C( -345.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( -705.02), SIMDE_FLOAT64_C( -93.90)), simde_mm_set_pd(SIMDE_FLOAT64_C( 105.61), SIMDE_FLOAT64_C( -94.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -824.97), SIMDE_FLOAT64_C( -396.57)), simde_mm_set_pd(SIMDE_FLOAT64_C( -840.00), SIMDE_FLOAT64_C( 609.38)), simde_mm_set_pd(SIMDE_FLOAT64_C( -824.97), SIMDE_FLOAT64_C( 609.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 488.09), SIMDE_FLOAT64_C( -64.70)), simde_mm_set_pd(SIMDE_FLOAT64_C( 92.99), SIMDE_FLOAT64_C( 531.79)), simde_mm_set_pd(SIMDE_FLOAT64_C( 488.09), SIMDE_FLOAT64_C( 531.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_floor_sd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_floor_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 932.95), SIMDE_FLOAT32_C( 946.85), SIMDE_FLOAT32_C( -866.96), SIMDE_FLOAT32_C( 383.68)), simde_mm_set_ps(SIMDE_FLOAT32_C( 82.96), SIMDE_FLOAT32_C( 541.21), SIMDE_FLOAT32_C( 296.52), SIMDE_FLOAT32_C( 677.59)), simde_mm_set_ps(SIMDE_FLOAT32_C( 932.95), SIMDE_FLOAT32_C( 946.85), SIMDE_FLOAT32_C( -866.96), SIMDE_FLOAT32_C( 677.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -824.32), SIMDE_FLOAT32_C( -924.44), SIMDE_FLOAT32_C( -44.45), SIMDE_FLOAT32_C( 340.62)), simde_mm_set_ps(SIMDE_FLOAT32_C( 771.85), SIMDE_FLOAT32_C( -635.91), SIMDE_FLOAT32_C( 706.67), SIMDE_FLOAT32_C( 477.32)), simde_mm_set_ps(SIMDE_FLOAT32_C( -824.32), SIMDE_FLOAT32_C( -924.44), SIMDE_FLOAT32_C( -44.45), SIMDE_FLOAT32_C( 477.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 991.94), SIMDE_FLOAT32_C( 918.28), SIMDE_FLOAT32_C( -617.36), SIMDE_FLOAT32_C( 932.02)), simde_mm_set_ps(SIMDE_FLOAT32_C( -21.99), SIMDE_FLOAT32_C( 274.28), SIMDE_FLOAT32_C( 819.55), SIMDE_FLOAT32_C( -957.22)), simde_mm_set_ps(SIMDE_FLOAT32_C( 991.94), SIMDE_FLOAT32_C( 918.28), SIMDE_FLOAT32_C( -617.36), SIMDE_FLOAT32_C( -958.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 178.34), SIMDE_FLOAT32_C( -961.13), SIMDE_FLOAT32_C( 949.60), SIMDE_FLOAT32_C( -711.62)), simde_mm_set_ps(SIMDE_FLOAT32_C( -951.61), SIMDE_FLOAT32_C( 53.93), SIMDE_FLOAT32_C( 376.79), SIMDE_FLOAT32_C( -892.19)), simde_mm_set_ps(SIMDE_FLOAT32_C( 178.34), SIMDE_FLOAT32_C( -961.13), SIMDE_FLOAT32_C( 949.60), SIMDE_FLOAT32_C( -893.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -858.80), SIMDE_FLOAT32_C( 721.75), SIMDE_FLOAT32_C( 422.70), SIMDE_FLOAT32_C( -573.55)), simde_mm_set_ps(SIMDE_FLOAT32_C( 473.81), SIMDE_FLOAT32_C( -473.48), SIMDE_FLOAT32_C( 815.69), SIMDE_FLOAT32_C( 520.75)), simde_mm_set_ps(SIMDE_FLOAT32_C( -858.80), SIMDE_FLOAT32_C( 721.75), SIMDE_FLOAT32_C( 422.70), SIMDE_FLOAT32_C( 520.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -849.38), SIMDE_FLOAT32_C( 6.45), SIMDE_FLOAT32_C( -927.45), SIMDE_FLOAT32_C( -143.78)), simde_mm_set_ps(SIMDE_FLOAT32_C( 593.59), SIMDE_FLOAT32_C( 314.52), SIMDE_FLOAT32_C( 115.80), SIMDE_FLOAT32_C( 369.25)), simde_mm_set_ps(SIMDE_FLOAT32_C( -849.38), SIMDE_FLOAT32_C( 6.45), SIMDE_FLOAT32_C( -927.45), SIMDE_FLOAT32_C( 369.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -22.36), SIMDE_FLOAT32_C( 891.64), SIMDE_FLOAT32_C( 742.10), SIMDE_FLOAT32_C( 13.68)), simde_mm_set_ps(SIMDE_FLOAT32_C( 900.76), SIMDE_FLOAT32_C( -733.09), SIMDE_FLOAT32_C( -137.10), SIMDE_FLOAT32_C( -90.09)), simde_mm_set_ps(SIMDE_FLOAT32_C( -22.36), SIMDE_FLOAT32_C( 891.64), SIMDE_FLOAT32_C( 742.10), SIMDE_FLOAT32_C( -91.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -977.22), SIMDE_FLOAT32_C( -98.98), SIMDE_FLOAT32_C( -253.27), SIMDE_FLOAT32_C( 664.01)), simde_mm_set_ps(SIMDE_FLOAT32_C( 252.97), SIMDE_FLOAT32_C( 266.57), SIMDE_FLOAT32_C( -16.11), SIMDE_FLOAT32_C( -404.33)), simde_mm_set_ps(SIMDE_FLOAT32_C( -977.22), SIMDE_FLOAT32_C( -98.98), SIMDE_FLOAT32_C( -253.27), SIMDE_FLOAT32_C( -405.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_floor_ss(test_vec[i].a, test_vec[i].b); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_insert_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; int b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( 10), INT8_C( -18), INT8_C(-117), INT8_C( 95), INT8_C(-108), INT8_C( 57), INT8_C( 54), INT8_C( -50), INT8_C( 60), INT8_C( -68), INT8_C( 115), INT8_C( 14), INT8_C( -71), INT8_C( -40), INT8_C( 34), INT8_C( 112)), 6, simde_mm_set_epi8(INT8_C( 10), INT8_C( -18), INT8_C(-117), INT8_C( 6), INT8_C(-108), INT8_C( 57), INT8_C( 54), INT8_C( -50), INT8_C( 60), INT8_C( -68), INT8_C( 115), INT8_C( 14), INT8_C( -71), INT8_C( -40), INT8_C( 34), INT8_C( 112)) }, { simde_mm_set_epi8(INT8_C(-117), INT8_C( 67), INT8_C(-105), INT8_C( 26), INT8_C( 118), INT8_C( 67), INT8_C( 72), INT8_C( 120), INT8_C( -74), INT8_C( -8), INT8_C(-126), INT8_C( -39), INT8_C( 53), INT8_C( 74), INT8_C( 64), INT8_C( -74)), 13, simde_mm_set_epi8(INT8_C(-117), INT8_C( 67), INT8_C(-105), INT8_C( 13), INT8_C( 118), INT8_C( 67), INT8_C( 72), INT8_C( 120), INT8_C( -74), INT8_C( -8), INT8_C(-126), INT8_C( -39), INT8_C( 53), INT8_C( 74), INT8_C( 64), INT8_C( -74)) }, { simde_mm_set_epi8(INT8_C( 10), INT8_C(-114), INT8_C( 116), INT8_C(-108), INT8_C( 36), INT8_C( -74), INT8_C( 76), INT8_C( -2), INT8_C( 118), INT8_C( -60), INT8_C( 3), INT8_C( 48), INT8_C( 76), INT8_C( -18), INT8_C( -69), INT8_C(-110)), 13, simde_mm_set_epi8(INT8_C( 10), INT8_C(-114), INT8_C( 116), INT8_C( 13), INT8_C( 36), INT8_C( -74), INT8_C( 76), INT8_C( -2), INT8_C( 118), INT8_C( -60), INT8_C( 3), INT8_C( 48), INT8_C( 76), INT8_C( -18), INT8_C( -69), INT8_C(-110)) }, { simde_mm_set_epi8(INT8_C( -48), INT8_C( -33), INT8_C( -9), INT8_C( -27), INT8_C( -99), INT8_C( 29), INT8_C( 111), INT8_C( 85), INT8_C( 104), INT8_C( -59), INT8_C( -17), INT8_C( 23), INT8_C( 113), INT8_C( 8), INT8_C( 94), INT8_C( -32)), 0, simde_mm_set_epi8(INT8_C( -48), INT8_C( -33), INT8_C( -9), INT8_C( 0), INT8_C( -99), INT8_C( 29), INT8_C( 111), INT8_C( 85), INT8_C( 104), INT8_C( -59), INT8_C( -17), INT8_C( 23), INT8_C( 113), INT8_C( 8), INT8_C( 94), INT8_C( -32)) }, { simde_mm_set_epi8(INT8_C( -15), INT8_C(-126), INT8_C( 63), INT8_C( 14), INT8_C( 2), INT8_C( 88), INT8_C( -73), INT8_C( 22), INT8_C( -36), INT8_C( 60), INT8_C( -75), INT8_C( -26), INT8_C( -12), INT8_C(-126), INT8_C( 44), INT8_C( -36)), 8, simde_mm_set_epi8(INT8_C( -15), INT8_C(-126), INT8_C( 63), INT8_C( 8), INT8_C( 2), INT8_C( 88), INT8_C( -73), INT8_C( 22), INT8_C( -36), INT8_C( 60), INT8_C( -75), INT8_C( -26), INT8_C( -12), INT8_C(-126), INT8_C( 44), INT8_C( -36)) }, { simde_mm_set_epi8(INT8_C( 41), INT8_C(-102), INT8_C( 115), INT8_C( 65), INT8_C( 22), INT8_C( -32), INT8_C( -29), INT8_C( 85), INT8_C( -61), INT8_C( 79), INT8_C(-107), INT8_C( -9), INT8_C( 89), INT8_C( -3), INT8_C(-124), INT8_C( 71)), 4, simde_mm_set_epi8(INT8_C( 41), INT8_C(-102), INT8_C( 115), INT8_C( 4), INT8_C( 22), INT8_C( -32), INT8_C( -29), INT8_C( 85), INT8_C( -61), INT8_C( 79), INT8_C(-107), INT8_C( -9), INT8_C( 89), INT8_C( -3), INT8_C(-124), INT8_C( 71)) }, { simde_mm_set_epi8(INT8_C( 109), INT8_C( -46), INT8_C( -71), INT8_C( 107), INT8_C( 105), INT8_C( 123), INT8_C( -72), INT8_C( 72), INT8_C( -43), INT8_C( 111), INT8_C( 75), INT8_C( 60), INT8_C( 47), INT8_C( 78), INT8_C( -45), INT8_C(-101)), 7, simde_mm_set_epi8(INT8_C( 109), INT8_C( -46), INT8_C( -71), INT8_C( 7), INT8_C( 105), INT8_C( 123), INT8_C( -72), INT8_C( 72), INT8_C( -43), INT8_C( 111), INT8_C( 75), INT8_C( 60), INT8_C( 47), INT8_C( 78), INT8_C( -45), INT8_C(-101)) }, { simde_mm_set_epi8(INT8_C( -15), INT8_C( -23), INT8_C( 43), INT8_C( 15), INT8_C(-126), INT8_C( 102), INT8_C( 109), INT8_C( 84), INT8_C( -63), INT8_C( 80), INT8_C(-127), INT8_C( -46), INT8_C(-119), INT8_C( 111), INT8_C( -57), INT8_C( 55)), 15, simde_mm_set_epi8(INT8_C( -15), INT8_C( -23), INT8_C( 43), INT8_C( 15), INT8_C(-126), INT8_C( 102), INT8_C( 109), INT8_C( 84), INT8_C( -63), INT8_C( 80), INT8_C(-127), INT8_C( -46), INT8_C(-119), INT8_C( 111), INT8_C( -57), INT8_C( 55)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_insert_epi8(test_vec[i].a, test_vec[i].b, 12); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_insert_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; int32_t b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 1714139829), INT32_C( 980372706), INT32_C(-2140490700), INT32_C( 1671335242)), INT32_C( -570572581), simde_mm_set_epi32(INT32_C( -570572581), INT32_C( 980372706), INT32_C(-2140490700), INT32_C( 1671335242)) }, { simde_mm_set_epi32(INT32_C( -385110226), INT32_C( 1048550831), INT32_C( -768686987), INT32_C( -197993270)), INT32_C( 292040766), simde_mm_set_epi32(INT32_C( 292040766), INT32_C( 1048550831), INT32_C( -768686987), INT32_C( -197993270)) }, { simde_mm_set_epi32(INT32_C( 1804855856), INT32_C( -940421248), INT32_C(-2104492113), INT32_C( 529461525)), INT32_C( 514665012), simde_mm_set_epi32(INT32_C( 514665012), INT32_C( -940421248), INT32_C(-2104492113), INT32_C( 529461525)) }, { simde_mm_set_epi32(INT32_C( -85551175), INT32_C( 885869319), INT32_C( 2003605165), INT32_C( -219227805)), INT32_C( 1214139778), simde_mm_set_epi32(INT32_C( 1214139778), INT32_C( 885869319), INT32_C( 2003605165), INT32_C( -219227805)) }, { simde_mm_set_epi32(INT32_C( -682699624), INT32_C( -729535744), INT32_C( -714850455), INT32_C( 418841580)), INT32_C( -606767281), simde_mm_set_epi32(INT32_C( -606767281), INT32_C( -729535744), INT32_C( -714850455), INT32_C( 418841580)) }, { simde_mm_set_epi32(INT32_C( 1844877442), INT32_C(-1305620543), INT32_C( -803345723), INT32_C(-1617611400)), INT32_C( 166894477), simde_mm_set_epi32(INT32_C( 166894477), INT32_C(-1305620543), INT32_C( -803345723), INT32_C(-1617611400)) }, { simde_mm_set_epi32(INT32_C( 1121577587), INT32_C( 1868325196), INT32_C( 567002451), INT32_C(-2029289287)), INT32_C( 566028624), simde_mm_set_epi32(INT32_C( 566028624), INT32_C( 1868325196), INT32_C( 567002451), INT32_C(-2029289287)) }, { simde_mm_set_epi32(INT32_C( 1745932520), INT32_C( 83968460), INT32_C(-1677360074), INT32_C( -664327669)), INT32_C(-2041481189), simde_mm_set_epi32(INT32_C(-2041481189), INT32_C( 83968460), INT32_C(-1677360074), INT32_C( -664327669)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_insert_epi32(test_vec[i].a, test_vec[i].b, 3); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_insert_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; int64_t b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi64x(INT64_C( 7362174507306405090), INT64_C(-9193337552220811958)), INT64_C( -850374615753703205), simde_mm_set_epi64x(INT64_C( -850374615753703205), INT64_C(-9193337552220811958)) }, { simde_mm_set_epi64x(INT64_C( 1254305542978645806), INT64_C( 4503491530864903285)), INT64_C(-9038724799495474923), simde_mm_set_epi64x(INT64_C(-9038724799495474923), INT64_C( 4503491530864903285)) }, { simde_mm_set_epi64x(INT64_C( -941576252334200268), INT64_C( 7751796878868631424)), INT64_C( 3804779755638396589), simde_mm_set_epi64x(INT64_C( 3804779755638396589), INT64_C( 7751796878868631424)) }, { simde_mm_set_epi64x(INT64_C(-3070259325336878100), INT64_C( 5214690643492116409)), INT64_C(-2932172554506065152), simde_mm_set_epi64x(INT64_C(-2932172554506065152), INT64_C( 5214690643492116409)) }, { simde_mm_set_epi64x(INT64_C(-5607597529679140155), INT64_C(-6947588056948574385)), INT64_C( 716806322442901634), simde_mm_set_epi64x(INT64_C( 716806322442901634), INT64_C(-6947588056948574385)) }, { simde_mm_set_epi64x(INT64_C( 4817139057959919948), INT64_C( 2435256986062520505)), INT64_C(-2853265611616884400), simde_mm_set_epi64x(INT64_C(-2853265611616884400), INT64_C( 2435256986062520505)) }, { simde_mm_set_epi64x(INT64_C(-8768094940408262424), INT64_C( 360641792213091382)), INT64_C(-4200371864439390341), simde_mm_set_epi64x(INT64_C(-4200371864439390341), INT64_C( 360641792213091382)) }, { simde_mm_set_epi64x(INT64_C(-6597763380374804743), INT64_C( 8897544020142746680)), INT64_C( 6543676199726157119), simde_mm_set_epi64x(INT64_C( 6543676199726157119), INT64_C( 8897544020142746680)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_insert_epi64(test_vec[i].a, test_vec[i].b, 1); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_insert_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -841.30), SIMDE_FLOAT32_C( 235.25), SIMDE_FLOAT32_C( -673.83), SIMDE_FLOAT32_C( -700.28)), simde_mm_set_ps(SIMDE_FLOAT32_C( 92.06), SIMDE_FLOAT32_C( 735.95), SIMDE_FLOAT32_C( 171.76), SIMDE_FLOAT32_C( 644.60)), simde_mm_set_ps(SIMDE_FLOAT32_C( -841.30), SIMDE_FLOAT32_C( 235.25), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 658.47), SIMDE_FLOAT32_C( -385.89), SIMDE_FLOAT32_C( 916.54), SIMDE_FLOAT32_C( -937.06)), simde_mm_set_ps(SIMDE_FLOAT32_C( -209.24), SIMDE_FLOAT32_C( -655.00), SIMDE_FLOAT32_C( -722.53), SIMDE_FLOAT32_C( -502.41)), simde_mm_set_ps(SIMDE_FLOAT32_C( 658.47), SIMDE_FLOAT32_C( -385.89), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 364.58), SIMDE_FLOAT32_C( -402.45), SIMDE_FLOAT32_C( -252.76), SIMDE_FLOAT32_C( 525.23)), simde_mm_set_ps(SIMDE_FLOAT32_C( -670.47), SIMDE_FLOAT32_C( 704.70), SIMDE_FLOAT32_C( -13.52), SIMDE_FLOAT32_C( -328.78)), simde_mm_set_ps(SIMDE_FLOAT32_C( 364.58), SIMDE_FLOAT32_C( -402.45), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 313.55), SIMDE_FLOAT32_C( 334.89), SIMDE_FLOAT32_C( -877.87), SIMDE_FLOAT32_C( -974.35)), simde_mm_set_ps(SIMDE_FLOAT32_C( 708.89), SIMDE_FLOAT32_C( 60.29), SIMDE_FLOAT32_C( 416.58), SIMDE_FLOAT32_C( -180.37)), simde_mm_set_ps(SIMDE_FLOAT32_C( 313.55), SIMDE_FLOAT32_C( 334.89), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 857.51), SIMDE_FLOAT32_C( 674.21), SIMDE_FLOAT32_C( 882.64), SIMDE_FLOAT32_C( 238.47)), simde_mm_set_ps(SIMDE_FLOAT32_C( 556.45), SIMDE_FLOAT32_C( -280.75), SIMDE_FLOAT32_C( -829.81), SIMDE_FLOAT32_C( 113.59)), simde_mm_set_ps(SIMDE_FLOAT32_C( 857.51), SIMDE_FLOAT32_C( 674.21), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 520.41), SIMDE_FLOAT32_C( -251.34), SIMDE_FLOAT32_C( 896.76), SIMDE_FLOAT32_C( -419.46)), simde_mm_set_ps(SIMDE_FLOAT32_C( 87.20), SIMDE_FLOAT32_C( 358.54), SIMDE_FLOAT32_C( 859.02), SIMDE_FLOAT32_C( -278.34)), simde_mm_set_ps(SIMDE_FLOAT32_C( 520.41), SIMDE_FLOAT32_C( -251.34), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 533.51), SIMDE_FLOAT32_C( 968.42), SIMDE_FLOAT32_C( -332.89), SIMDE_FLOAT32_C( 137.03)), simde_mm_set_ps(SIMDE_FLOAT32_C( -277.45), SIMDE_FLOAT32_C( 701.40), SIMDE_FLOAT32_C( -500.07), SIMDE_FLOAT32_C( -788.33)), simde_mm_set_ps(SIMDE_FLOAT32_C( 533.51), SIMDE_FLOAT32_C( 968.42), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -468.51), SIMDE_FLOAT32_C( -644.45), SIMDE_FLOAT32_C( -325.06), SIMDE_FLOAT32_C( 994.31)), simde_mm_set_ps(SIMDE_FLOAT32_C( 290.72), SIMDE_FLOAT32_C( 585.35), SIMDE_FLOAT32_C( 669.48), SIMDE_FLOAT32_C( 65.07)), simde_mm_set_ps(SIMDE_FLOAT32_C( -468.51), SIMDE_FLOAT32_C( -644.45), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_insert_ps(test_vec[i].a, test_vec[i].b, 3); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_max_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( -83), INT8_C( 29), INT8_C( 3), INT8_C( 84), INT8_C( 22), INT8_C( -7), INT8_C( 112), INT8_C(-107), INT8_C( -53), INT8_C(-125), INT8_C( -41), INT8_C( -27), INT8_C(-118), INT8_C( 43), INT8_C( 44), INT8_C( 16)), simde_mm_set_epi8(INT8_C( -80), INT8_C( -52), INT8_C( 93), INT8_C( 71), INT8_C( 45), INT8_C( -99), INT8_C( -14), INT8_C( -97), INT8_C( 11), INT8_C( 62), INT8_C( 111), INT8_C( 17), INT8_C(-108), INT8_C( -6), INT8_C( 95), INT8_C( 1)), simde_mm_set_epi8(INT8_C( -80), INT8_C( 29), INT8_C( 93), INT8_C( 84), INT8_C( 45), INT8_C( -7), INT8_C( 112), INT8_C( -97), INT8_C( 11), INT8_C( 62), INT8_C( 111), INT8_C( 17), INT8_C(-108), INT8_C( 43), INT8_C( 95), INT8_C( 16)) }, { simde_mm_set_epi8(INT8_C( 61), INT8_C(-100), INT8_C( 79), INT8_C( 19), INT8_C( -86), INT8_C( 116), INT8_C( -34), INT8_C( 96), INT8_C( 70), INT8_C( -48), INT8_C( -13), INT8_C( 103), INT8_C(-107), INT8_C( 79), INT8_C( 24), INT8_C( -71)), simde_mm_set_epi8(INT8_C( 39), INT8_C( -53), INT8_C(-117), INT8_C( 65), INT8_C( -55), INT8_C( 21), INT8_C( 49), INT8_C( -22), INT8_C( 118), INT8_C( -71), INT8_C(-113), INT8_C( 2), INT8_C( -83), INT8_C( -74), INT8_C( -11), INT8_C( 85)), simde_mm_set_epi8(INT8_C( 61), INT8_C( -53), INT8_C( 79), INT8_C( 65), INT8_C( -55), INT8_C( 116), INT8_C( 49), INT8_C( 96), INT8_C( 118), INT8_C( -48), INT8_C( -13), INT8_C( 103), INT8_C( -83), INT8_C( 79), INT8_C( 24), INT8_C( 85)) }, { simde_mm_set_epi8(INT8_C( -13), INT8_C(-124), INT8_C(-110), INT8_C( 22), INT8_C(-109), INT8_C( 127), INT8_C( -45), INT8_C( -62), INT8_C( 0), INT8_C( 51), INT8_C( -98), INT8_C( -11), INT8_C( -54), INT8_C( -75), INT8_C(-101), INT8_C(-109)), simde_mm_set_epi8(INT8_C( -28), INT8_C(-120), INT8_C( 90), INT8_C( -89), INT8_C( -72), INT8_C( 93), INT8_C( -68), INT8_C(-105), INT8_C( 39), INT8_C( 46), INT8_C( 2), INT8_C( 5), INT8_C( 16), INT8_C( 34), INT8_C( -99), INT8_C( 64)), simde_mm_set_epi8(INT8_C( -13), INT8_C(-120), INT8_C( 90), INT8_C( 22), INT8_C( -72), INT8_C( 127), INT8_C( -45), INT8_C( -62), INT8_C( 39), INT8_C( 51), INT8_C( 2), INT8_C( 5), INT8_C( 16), INT8_C( 34), INT8_C( -99), INT8_C( 64)) }, { simde_mm_set_epi8(INT8_C( -43), INT8_C(-102), INT8_C( 111), INT8_C( 97), INT8_C( -69), INT8_C( 104), INT8_C( -24), INT8_C( -6), INT8_C( 29), INT8_C( 43), INT8_C( 65), INT8_C( 47), INT8_C( -60), INT8_C( 52), INT8_C( 2), INT8_C( -40)), simde_mm_set_epi8(INT8_C( -7), INT8_C( 85), INT8_C( -63), INT8_C( 48), INT8_C(-117), INT8_C( 19), INT8_C( 99), INT8_C( 67), INT8_C( -50), INT8_C(-116), INT8_C( 5), INT8_C( 2), INT8_C( -96), INT8_C( 125), INT8_C( -95), INT8_C( 0)), simde_mm_set_epi8(INT8_C( -7), INT8_C( 85), INT8_C( 111), INT8_C( 97), INT8_C( -69), INT8_C( 104), INT8_C( 99), INT8_C( 67), INT8_C( 29), INT8_C( 43), INT8_C( 65), INT8_C( 47), INT8_C( -60), INT8_C( 125), INT8_C( 2), INT8_C( 0)) }, { simde_mm_set_epi8(INT8_C( 98), INT8_C( -7), INT8_C( -56), INT8_C( 9), INT8_C( 74), INT8_C( 50), INT8_C( -95), INT8_C( 127), INT8_C( 114), INT8_C( 118), INT8_C( -76), INT8_C( 110), INT8_C( -48), INT8_C( -50), INT8_C( 57), INT8_C( -47)), simde_mm_set_epi8(INT8_C( 47), INT8_C( -82), INT8_C( 66), INT8_C( -76), INT8_C( 6), INT8_C( 117), INT8_C( 80), INT8_C( -13), INT8_C( 80), INT8_C(-109), INT8_C( 126), INT8_C( 70), INT8_C( 60), INT8_C( 40), INT8_C( -54), INT8_C( 3)), simde_mm_set_epi8(INT8_C( 98), INT8_C( -7), INT8_C( 66), INT8_C( 9), INT8_C( 74), INT8_C( 117), INT8_C( 80), INT8_C( 127), INT8_C( 114), INT8_C( 118), INT8_C( 126), INT8_C( 110), INT8_C( 60), INT8_C( 40), INT8_C( 57), INT8_C( 3)) }, { simde_mm_set_epi8(INT8_C( -80), INT8_C( -39), INT8_C( 113), INT8_C( -58), INT8_C( 58), INT8_C( -88), INT8_C( 22), INT8_C( -99), INT8_C( 112), INT8_C( -21), INT8_C( 44), INT8_C( -31), INT8_C( -2), INT8_C( 109), INT8_C( 19), INT8_C( -74)), simde_mm_set_epi8(INT8_C( 71), INT8_C( -38), INT8_C( 47), INT8_C( -92), INT8_C( 8), INT8_C( 48), INT8_C( 105), INT8_C( -82), INT8_C( 35), INT8_C( 54), INT8_C( 25), INT8_C( 36), INT8_C( 111), INT8_C( -7), INT8_C( -12), INT8_C( -75)), simde_mm_set_epi8(INT8_C( 71), INT8_C( -38), INT8_C( 113), INT8_C( -58), INT8_C( 58), INT8_C( 48), INT8_C( 105), INT8_C( -82), INT8_C( 112), INT8_C( 54), INT8_C( 44), INT8_C( 36), INT8_C( 111), INT8_C( 109), INT8_C( 19), INT8_C( -74)) }, { simde_mm_set_epi8(INT8_C( 18), INT8_C( -41), INT8_C( -74), INT8_C( 60), INT8_C( -63), INT8_C( -55), INT8_C( -71), INT8_C( 118), INT8_C( 46), INT8_C( -19), INT8_C( 19), INT8_C( -7), INT8_C( -55), INT8_C( -75), INT8_C( 125), INT8_C( -82)), simde_mm_set_epi8(INT8_C(-125), INT8_C(-106), INT8_C(-127), INT8_C( -23), INT8_C( -63), INT8_C( 0), INT8_C( 56), INT8_C( -23), INT8_C( -34), INT8_C( -32), INT8_C( 33), INT8_C( 127), INT8_C( 39), INT8_C( -68), INT8_C(-103), INT8_C( 126)), simde_mm_set_epi8(INT8_C( 18), INT8_C( -41), INT8_C( -74), INT8_C( 60), INT8_C( -63), INT8_C( 0), INT8_C( 56), INT8_C( 118), INT8_C( 46), INT8_C( -19), INT8_C( 33), INT8_C( 127), INT8_C( 39), INT8_C( -68), INT8_C( 125), INT8_C( 126)) }, { simde_mm_set_epi8(INT8_C( -90), INT8_C( -81), INT8_C( 72), INT8_C( -34), INT8_C( -85), INT8_C( 108), INT8_C( 42), INT8_C( 34), INT8_C( -79), INT8_C( -48), INT8_C( 3), INT8_C( 113), INT8_C( -62), INT8_C( 25), INT8_C( -74), INT8_C( 13)), simde_mm_set_epi8(INT8_C( -44), INT8_C( -22), INT8_C( 26), INT8_C( -52), INT8_C( 29), INT8_C( 96), INT8_C( -97), INT8_C( 88), INT8_C( -11), INT8_C(-125), INT8_C( -60), INT8_C( 88), INT8_C( -55), INT8_C( 40), INT8_C( 76), INT8_C( -80)), simde_mm_set_epi8(INT8_C( -44), INT8_C( -22), INT8_C( 72), INT8_C( -34), INT8_C( 29), INT8_C( 108), INT8_C( 42), INT8_C( 88), INT8_C( -11), INT8_C( -48), INT8_C( 3), INT8_C( 113), INT8_C( -55), INT8_C( 40), INT8_C( 76), INT8_C( 13)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_max_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_max_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 477039853), INT32_C( 1555162887), INT32_C( 960008067), INT32_C(-2078514865)), simde_mm_set_epi32(INT32_C( -346254872), INT32_C( 70690131), INT32_C( -249580279), INT32_C( -297974983)), simde_mm_set_epi32(INT32_C( 477039853), INT32_C( 1555162887), INT32_C( 960008067), INT32_C( -297974983)) }, { simde_mm_set_epi32(INT32_C( 1946076095), INT32_C( 1626099315), INT32_C( 367020053), INT32_C( -768900730)), simde_mm_set_epi32(INT32_C( -534331032), INT32_C(-1542646073), INT32_C( 900332023), INT32_C( 1625404528)), simde_mm_set_epi32(INT32_C( 1946076095), INT32_C( 1626099315), INT32_C( 900332023), INT32_C( 1625404528)) }, { simde_mm_set_epi32(INT32_C( 1785938855), INT32_C( 217054136), INT32_C( 6277613), INT32_C( 1323071612)), simde_mm_set_epi32(INT32_C(-1842447194), INT32_C( 1202045241), INT32_C( 391997517), INT32_C( 904511960)), simde_mm_set_epi32(INT32_C( 1785938855), INT32_C( 1202045241), INT32_C( 391997517), INT32_C( 1323071612)) }, { simde_mm_set_epi32(INT32_C( -303416903), INT32_C( 131247622), INT32_C(-2038528381), INT32_C(-1774779832)), simde_mm_set_epi32(INT32_C( 307942484), INT32_C(-1408988466), INT32_C(-1799017519), INT32_C( 1885804693)), simde_mm_set_epi32(INT32_C( 307942484), INT32_C( 131247622), INT32_C(-1799017519), INT32_C( 1885804693)) }, { simde_mm_set_epi32(INT32_C( -424176665), INT32_C( 531131808), INT32_C( -752351935), INT32_C( 650848232)), simde_mm_set_epi32(INT32_C(-1513144536), INT32_C( -875213417), INT32_C(-1592044131), INT32_C(-1714853090)), simde_mm_set_epi32(INT32_C( -424176665), INT32_C( 531131808), INT32_C( -752351935), INT32_C( 650848232)) }, { simde_mm_set_epi32(INT32_C( 1901471013), INT32_C(-1928782687), INT32_C(-1204237637), INT32_C(-1735607658)), simde_mm_set_epi32(INT32_C( 1526238167), INT32_C( 1155872155), INT32_C( -981128645), INT32_C(-1655952235)), simde_mm_set_epi32(INT32_C( 1901471013), INT32_C( 1155872155), INT32_C( -981128645), INT32_C(-1655952235)) }, { simde_mm_set_epi32(INT32_C( 2067002242), INT32_C( -967820279), INT32_C(-1842332164), INT32_C( 1466140749)), simde_mm_set_epi32(INT32_C( -25857443), INT32_C(-1417095277), INT32_C( -31302876), INT32_C(-1114441571)), simde_mm_set_epi32(INT32_C( 2067002242), INT32_C( -967820279), INT32_C( -31302876), INT32_C( 1466140749)) }, { simde_mm_set_epi32(INT32_C( -515864750), INT32_C( 1885657427), INT32_C(-1627411585), INT32_C( -455970760)), simde_mm_set_epi32(INT32_C( -505553594), INT32_C(-1363391302), INT32_C( 1929235496), INT32_C(-1212934452)), simde_mm_set_epi32(INT32_C( -505553594), INT32_C( 1885657427), INT32_C( 1929235496), INT32_C( -455970760)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_max_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_max_epu16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu16(UINT16_C(31378), UINT16_C(24380), UINT16_C(40255), UINT16_C(13201), UINT16_C(58870), UINT16_C(49460), UINT16_C(64732), UINT16_C(36111)), simde_x_mm_set_epu16(UINT16_C(64055), UINT16_C(45511), UINT16_C(61004), UINT16_C(49058), UINT16_C(25195), UINT16_C(65082), UINT16_C( 861), UINT16_C( 2148)), simde_x_mm_set_epu16(UINT16_C(64055), UINT16_C(45511), UINT16_C(61004), UINT16_C(49058), UINT16_C(58870), UINT16_C(65082), UINT16_C(64732), UINT16_C(36111)) }, { simde_x_mm_set_epu16(UINT16_C(17034), UINT16_C(61660), UINT16_C(37691), UINT16_C(48256), UINT16_C(52466), UINT16_C(59150), UINT16_C(48524), UINT16_C(48932)), simde_x_mm_set_epu16(UINT16_C( 9311), UINT16_C(52954), UINT16_C(59922), UINT16_C(37439), UINT16_C(48521), UINT16_C(44576), UINT16_C(58557), UINT16_C( 4347)), simde_x_mm_set_epu16(UINT16_C(17034), UINT16_C(61660), UINT16_C(59922), UINT16_C(48256), UINT16_C(52466), UINT16_C(59150), UINT16_C(58557), UINT16_C(48932)) }, { simde_x_mm_set_epu16(UINT16_C( 3717), UINT16_C(65479), UINT16_C(47591), UINT16_C(37591), UINT16_C(56498), UINT16_C(64377), UINT16_C( 4742), UINT16_C(26061)), simde_x_mm_set_epu16(UINT16_C(53135), UINT16_C(25263), UINT16_C(35893), UINT16_C(43723), UINT16_C(55550), UINT16_C(13729), UINT16_C(44168), UINT16_C(18708)), simde_x_mm_set_epu16(UINT16_C(53135), UINT16_C(65479), UINT16_C(47591), UINT16_C(43723), UINT16_C(56498), UINT16_C(64377), UINT16_C(44168), UINT16_C(26061)) }, { simde_x_mm_set_epu16(UINT16_C(32738), UINT16_C(32324), UINT16_C(12257), UINT16_C( 4392), UINT16_C(48941), UINT16_C(15991), UINT16_C(56181), UINT16_C(64839)), simde_x_mm_set_epu16(UINT16_C(54182), UINT16_C(32645), UINT16_C(19389), UINT16_C(64736), UINT16_C(55568), UINT16_C(54741), UINT16_C(47863), UINT16_C(26870)), simde_x_mm_set_epu16(UINT16_C(54182), UINT16_C(32645), UINT16_C(19389), UINT16_C(64736), UINT16_C(55568), UINT16_C(54741), UINT16_C(56181), UINT16_C(64839)) }, { simde_x_mm_set_epu16(UINT16_C(25092), UINT16_C( 2337), UINT16_C(41136), UINT16_C(62054), UINT16_C(31692), UINT16_C(47896), UINT16_C(26455), UINT16_C(41876)), simde_x_mm_set_epu16(UINT16_C(57533), UINT16_C( 4966), UINT16_C(21312), UINT16_C(22488), UINT16_C(59621), UINT16_C( 4501), UINT16_C(18372), UINT16_C(62293)), simde_x_mm_set_epu16(UINT16_C(57533), UINT16_C( 4966), UINT16_C(41136), UINT16_C(62054), UINT16_C(59621), UINT16_C(47896), UINT16_C(26455), UINT16_C(62293)) }, { simde_x_mm_set_epu16(UINT16_C(48994), UINT16_C(54179), UINT16_C(26228), UINT16_C(61187), UINT16_C(44854), UINT16_C(57084), UINT16_C(16902), UINT16_C( 1404)), simde_x_mm_set_epu16(UINT16_C( 4826), UINT16_C(23694), UINT16_C(32546), UINT16_C(41184), UINT16_C( 725), UINT16_C(27358), UINT16_C(30444), UINT16_C(27027)), simde_x_mm_set_epu16(UINT16_C(48994), UINT16_C(54179), UINT16_C(32546), UINT16_C(61187), UINT16_C(44854), UINT16_C(57084), UINT16_C(30444), UINT16_C(27027)) }, { simde_x_mm_set_epu16(UINT16_C(60922), UINT16_C(51825), UINT16_C( 9165), UINT16_C(16534), UINT16_C( 2526), UINT16_C(29939), UINT16_C(61826), UINT16_C(45025)), simde_x_mm_set_epu16(UINT16_C(10446), UINT16_C(28319), UINT16_C(61630), UINT16_C(32182), UINT16_C(17860), UINT16_C(15017), UINT16_C(59067), UINT16_C(19548)), simde_x_mm_set_epu16(UINT16_C(60922), UINT16_C(51825), UINT16_C(61630), UINT16_C(32182), UINT16_C(17860), UINT16_C(29939), UINT16_C(61826), UINT16_C(45025)) }, { simde_x_mm_set_epu16(UINT16_C(34011), UINT16_C(36834), UINT16_C(12993), UINT16_C(21384), UINT16_C(62643), UINT16_C(55943), UINT16_C(12296), UINT16_C(62062)), simde_x_mm_set_epu16(UINT16_C(22645), UINT16_C(31782), UINT16_C(42377), UINT16_C(17805), UINT16_C(55262), UINT16_C(50550), UINT16_C(53828), UINT16_C(32401)), simde_x_mm_set_epu16(UINT16_C(34011), UINT16_C(36834), UINT16_C(42377), UINT16_C(21384), UINT16_C(62643), UINT16_C(55943), UINT16_C(53828), UINT16_C(62062)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_max_epu16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_u16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_max_epu32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu32(UINT32_C(2532888443), UINT32_C( 529927089), UINT32_C(4207752004), UINT32_C(3510207683)), simde_x_mm_set_epu32(UINT32_C( 574626150), UINT32_C(1395025810), UINT32_C(1034901368), UINT32_C(3021400990)), simde_x_mm_set_epu32(UINT32_C(2532888443), UINT32_C(1395025810), UINT32_C(4207752004), UINT32_C(3510207683)) }, { simde_x_mm_set_epu32(UINT32_C(2363706693), UINT32_C(3928702803), UINT32_C( 881772043), UINT32_C( 57971440)), simde_x_mm_set_epu32(UINT32_C(4216913656), UINT32_C(3303535239), UINT32_C( 568980555), UINT32_C( 308523788)), simde_x_mm_set_epu32(UINT32_C(4216913656), UINT32_C(3928702803), UINT32_C( 881772043), UINT32_C( 308523788)) }, { simde_x_mm_set_epu32(UINT32_C( 548389896), UINT32_C(2976724828), UINT32_C(3723033325), UINT32_C(1050716143)), simde_x_mm_set_epu32(UINT32_C(3591135284), UINT32_C( 856620101), UINT32_C( 833768684), UINT32_C( 203883279)), simde_x_mm_set_epu32(UINT32_C(3591135284), UINT32_C(2976724828), UINT32_C(3723033325), UINT32_C(1050716143)) }, { simde_x_mm_set_epu32(UINT32_C(3717602953), UINT32_C(2736045656), UINT32_C(2539339865), UINT32_C(2199825857)), simde_x_mm_set_epu32(UINT32_C(3331243307), UINT32_C(1452685186), UINT32_C(2337560284), UINT32_C(3592613020)), simde_x_mm_set_epu32(UINT32_C(3717602953), UINT32_C(2736045656), UINT32_C(2539339865), UINT32_C(3592613020)) }, { simde_x_mm_set_epu32(UINT32_C(2462641628), UINT32_C(4101106468), UINT32_C(2939293383), UINT32_C( 448332726)), simde_x_mm_set_epu32(UINT32_C(4091790581), UINT32_C( 227177150), UINT32_C(4294101921), UINT32_C(1459100736)), simde_x_mm_set_epu32(UINT32_C(4091790581), UINT32_C(4101106468), UINT32_C(4294101921), UINT32_C(1459100736)) }, { simde_x_mm_set_epu32(UINT32_C(1973671448), UINT32_C(1498045863), UINT32_C(1898038098), UINT32_C(1965060938)), simde_x_mm_set_epu32(UINT32_C( 29809850), UINT32_C( 861011023), UINT32_C( 63413124), UINT32_C(2176922492)), simde_x_mm_set_epu32(UINT32_C(1973671448), UINT32_C(1498045863), UINT32_C(1898038098), UINT32_C(2176922492)) }, { simde_x_mm_set_epu32(UINT32_C( 526896309), UINT32_C( 817143907), UINT32_C(1563490657), UINT32_C(3681037749)), simde_x_mm_set_epu32(UINT32_C(3002623714), UINT32_C(3298584662), UINT32_C(1905485247), UINT32_C( 108377847)), simde_x_mm_set_epu32(UINT32_C(3002623714), UINT32_C(3298584662), UINT32_C(1905485247), UINT32_C(3681037749)) }, { simde_x_mm_set_epu32(UINT32_C( 749402761), UINT32_C(3319236993), UINT32_C(3240012333), UINT32_C(1509228324)), simde_x_mm_set_epu32(UINT32_C(3285355124), UINT32_C(3206151128), UINT32_C(2987263147), UINT32_C(3706118926)), simde_x_mm_set_epu32(UINT32_C(3285355124), UINT32_C(3319236993), UINT32_C(3240012333), UINT32_C(3706118926)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_max_epu32(test_vec[i].a, test_vec[i].b); simde_assert_m128i_u32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_min_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( -85), INT8_C( 106), INT8_C( -97), INT8_C( -26), INT8_C( -5), INT8_C( 83), INT8_C(-106), INT8_C(-116), INT8_C(-113), INT8_C( -34), INT8_C( -81), INT8_C( 109), INT8_C( -25), INT8_C( -16), INT8_C( 28), INT8_C( -76)), simde_mm_set_epi8(INT8_C( -69), INT8_C( -4), INT8_C( 73), INT8_C( -90), INT8_C( 93), INT8_C(-116), INT8_C( 19), INT8_C( 124), INT8_C( -67), INT8_C( 48), INT8_C( 38), INT8_C( 11), INT8_C( 21), INT8_C( 91), INT8_C( 118), INT8_C( 103)), simde_mm_set_epi8(INT8_C( -85), INT8_C( -4), INT8_C( -97), INT8_C( -90), INT8_C( -5), INT8_C(-116), INT8_C(-106), INT8_C(-116), INT8_C(-113), INT8_C( -34), INT8_C( -81), INT8_C( 11), INT8_C( -25), INT8_C( -16), INT8_C( 28), INT8_C( -76)) }, { simde_mm_set_epi8(INT8_C( -69), INT8_C( -84), INT8_C( 37), INT8_C( -17), INT8_C( 36), INT8_C( -34), INT8_C(-104), INT8_C( 112), INT8_C( 13), INT8_C( 47), INT8_C( -94), INT8_C( -38), INT8_C( 111), INT8_C( -44), INT8_C( -90), INT8_C( -86)), simde_mm_set_epi8(INT8_C(-125), INT8_C( 55), INT8_C( 5), INT8_C( 113), INT8_C( 100), INT8_C( -35), INT8_C( -4), INT8_C( 29), INT8_C( 60), INT8_C( 120), INT8_C( -36), INT8_C( 72), INT8_C( 92), INT8_C( 31), INT8_C( 11), INT8_C(-106)), simde_mm_set_epi8(INT8_C(-125), INT8_C( -84), INT8_C( 5), INT8_C( -17), INT8_C( 36), INT8_C( -35), INT8_C(-104), INT8_C( 29), INT8_C( 13), INT8_C( 47), INT8_C( -94), INT8_C( -38), INT8_C( 92), INT8_C( -44), INT8_C( -90), INT8_C(-106)) }, { simde_mm_set_epi8(INT8_C( 49), INT8_C(-113), INT8_C( -25), INT8_C( -86), INT8_C( 106), INT8_C( 71), INT8_C( 43), INT8_C( 81), INT8_C( -66), INT8_C( 111), INT8_C( 40), INT8_C( 35), INT8_C( 15), INT8_C( 51), INT8_C( 21), INT8_C( 60)), simde_mm_set_epi8(INT8_C( 37), INT8_C( -84), INT8_C( 110), INT8_C( -46), INT8_C( -27), INT8_C( -56), INT8_C( 104), INT8_C( 2), INT8_C(-105), INT8_C( 65), INT8_C( 54), INT8_C( 75), INT8_C( 113), INT8_C( 29), INT8_C( -47), INT8_C( 84)), simde_mm_set_epi8(INT8_C( 37), INT8_C(-113), INT8_C( -25), INT8_C( -86), INT8_C( -27), INT8_C( -56), INT8_C( 43), INT8_C( 2), INT8_C(-105), INT8_C( 65), INT8_C( 40), INT8_C( 35), INT8_C( 15), INT8_C( 29), INT8_C( -47), INT8_C( 60)) }, { simde_mm_set_epi8(INT8_C( -9), INT8_C( -51), INT8_C( -8), INT8_C( -23), INT8_C( 42), INT8_C( 26), INT8_C( -68), INT8_C( -93), INT8_C( -78), INT8_C( -45), INT8_C( -36), INT8_C( -34), INT8_C( -79), INT8_C( -94), INT8_C( 80), INT8_C( 99)), simde_mm_set_epi8(INT8_C( 27), INT8_C( 45), INT8_C( 48), INT8_C( -73), INT8_C( -21), INT8_C( 86), INT8_C( 54), INT8_C( -39), INT8_C( -2), INT8_C( 108), INT8_C( -19), INT8_C( -81), INT8_C( -52), INT8_C( 118), INT8_C( 61), INT8_C( -36)), simde_mm_set_epi8(INT8_C( -9), INT8_C( -51), INT8_C( -8), INT8_C( -73), INT8_C( -21), INT8_C( 26), INT8_C( -68), INT8_C( -93), INT8_C( -78), INT8_C( -45), INT8_C( -36), INT8_C( -81), INT8_C( -79), INT8_C( -94), INT8_C( 61), INT8_C( -36)) }, { simde_mm_set_epi8(INT8_C( 75), INT8_C( 2), INT8_C( -37), INT8_C( -22), INT8_C( 110), INT8_C(-125), INT8_C( 33), INT8_C( -94), INT8_C( -5), INT8_C( -67), INT8_C( -78), INT8_C( -46), INT8_C( 43), INT8_C( -89), INT8_C( 74), INT8_C( 18)), simde_mm_set_epi8(INT8_C( 84), INT8_C( -44), INT8_C(-115), INT8_C( 29), INT8_C(-121), INT8_C( 22), INT8_C( 43), INT8_C(-123), INT8_C( -63), INT8_C( -73), INT8_C( 36), INT8_C( -60), INT8_C( -96), INT8_C( -58), INT8_C( -42), INT8_C( 78)), simde_mm_set_epi8(INT8_C( 75), INT8_C( -44), INT8_C(-115), INT8_C( -22), INT8_C(-121), INT8_C(-125), INT8_C( 33), INT8_C(-123), INT8_C( -63), INT8_C( -73), INT8_C( -78), INT8_C( -60), INT8_C( -96), INT8_C( -89), INT8_C( -42), INT8_C( 18)) }, { simde_mm_set_epi8(INT8_C( -82), INT8_C( -34), INT8_C( 74), INT8_C( 86), INT8_C( -7), INT8_C( 93), INT8_C( -99), INT8_C( -7), INT8_C( 127), INT8_C( 26), INT8_C( -51), INT8_C( 41), INT8_C( 71), INT8_C( 67), INT8_C(-102), INT8_C(-119)), simde_mm_set_epi8(INT8_C( -8), INT8_C( 88), INT8_C( 46), INT8_C( 105), INT8_C( -43), INT8_C(-109), INT8_C( 38), INT8_C( -35), INT8_C( -17), INT8_C( -4), INT8_C(-122), INT8_C(-112), INT8_C( -62), INT8_C( 100), INT8_C( 89), INT8_C( -83)), simde_mm_set_epi8(INT8_C( -82), INT8_C( -34), INT8_C( 46), INT8_C( 86), INT8_C( -43), INT8_C(-109), INT8_C( -99), INT8_C( -35), INT8_C( -17), INT8_C( -4), INT8_C(-122), INT8_C(-112), INT8_C( -62), INT8_C( 67), INT8_C(-102), INT8_C(-119)) }, { simde_mm_set_epi8(INT8_C( 21), INT8_C( -83), INT8_C( -9), INT8_C( -66), INT8_C( -50), INT8_C( -47), INT8_C( -30), INT8_C( 4), INT8_C( -60), INT8_C( 23), INT8_C( 126), INT8_C( 69), INT8_C( 56), INT8_C( 4), INT8_C( 32), INT8_C( 31)), simde_mm_set_epi8(INT8_C( 76), INT8_C( -95), INT8_C( 81), INT8_C( 9), INT8_C(-106), INT8_C( -75), INT8_C( 46), INT8_C( 117), INT8_C(-119), INT8_C( -56), INT8_C(-115), INT8_C( -97), INT8_C( -52), INT8_C( -81), INT8_C(-123), INT8_C( 54)), simde_mm_set_epi8(INT8_C( 21), INT8_C( -95), INT8_C( -9), INT8_C( -66), INT8_C(-106), INT8_C( -75), INT8_C( -30), INT8_C( 4), INT8_C(-119), INT8_C( -56), INT8_C(-115), INT8_C( -97), INT8_C( -52), INT8_C( -81), INT8_C(-123), INT8_C( 31)) }, { simde_mm_set_epi8(INT8_C( -56), INT8_C(-107), INT8_C( 36), INT8_C( -66), INT8_C( -22), INT8_C(-112), INT8_C( 57), INT8_C( 12), INT8_C( -40), INT8_C(-109), INT8_C( -85), INT8_C( 86), INT8_C( -31), INT8_C( -66), INT8_C( 99), INT8_C( -10)), simde_mm_set_epi8(INT8_C( 57), INT8_C(-109), INT8_C( 45), INT8_C( 32), INT8_C( 94), INT8_C( -27), INT8_C( 11), INT8_C( 67), INT8_C(-114), INT8_C( -40), INT8_C(-125), INT8_C(-106), INT8_C( 72), INT8_C( -34), INT8_C(-102), INT8_C( 18)), simde_mm_set_epi8(INT8_C( -56), INT8_C(-109), INT8_C( 36), INT8_C( -66), INT8_C( -22), INT8_C(-112), INT8_C( 11), INT8_C( 12), INT8_C(-114), INT8_C(-109), INT8_C(-125), INT8_C(-106), INT8_C( -31), INT8_C( -66), INT8_C(-102), INT8_C( -10)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_min_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_min_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C(-1390607532), INT32_C( 385446037), INT32_C( -880551963), INT32_C(-1976882160)), simde_mm_set_epi32(INT32_C(-1328784057), INT32_C( 765325983), INT32_C( 188641041), INT32_C(-1795531007)), simde_mm_set_epi32(INT32_C(-1390607532), INT32_C( 385446037), INT32_C( -880551963), INT32_C(-1976882160)) }, { simde_mm_set_epi32(INT32_C( 1033654035), INT32_C(-1435181472), INT32_C( 1188098919), INT32_C(-1789978439)), simde_mm_set_epi32(INT32_C( 667650881), INT32_C( -921357846), INT32_C( 1991872258), INT32_C(-1380518571)), simde_mm_set_epi32(INT32_C( 667650881), INT32_C(-1435181472), INT32_C( 1188098919), INT32_C(-1789978439)) }, { simde_mm_set_epi32(INT32_C( -209415658), INT32_C(-1820339262), INT32_C( 3383029), INT32_C( -894067821)), simde_mm_set_epi32(INT32_C( -460825945), INT32_C(-1201816425), INT32_C( 657326597), INT32_C( 270703936)), simde_mm_set_epi32(INT32_C( -460825945), INT32_C(-1820339262), INT32_C( 3383029), INT32_C( -894067821)) }, { simde_mm_set_epi32(INT32_C( -711299231), INT32_C(-1150752518), INT32_C( 489373999), INT32_C(-1003224360)), simde_mm_set_epi32(INT32_C( -111820496), INT32_C(-1961663677), INT32_C( -829684478), INT32_C(-1602379520)), simde_mm_set_epi32(INT32_C( -711299231), INT32_C(-1961663677), INT32_C( -829684478), INT32_C(-1602379520)) }, { simde_mm_set_epi32(INT32_C( 1660536841), INT32_C( 1244832127), INT32_C( 1920382062), INT32_C( -791791151)), simde_mm_set_epi32(INT32_C( 799949492), INT32_C( 108351731), INT32_C( 1351843398), INT32_C( 1009306115)), simde_mm_set_epi32(INT32_C( 799949492), INT32_C( 108351731), INT32_C( 1351843398), INT32_C( -791791151)) }, { simde_mm_set_epi32(INT32_C(-1327926842), INT32_C( 984094365), INT32_C( 1894460641), INT32_C( -26405962)), simde_mm_set_epi32(INT32_C( 1205481380), INT32_C( 137390510), INT32_C( 590747940), INT32_C( 1878652085)), simde_mm_set_epi32(INT32_C(-1327926842), INT32_C( 137390510), INT32_C( 590747940), INT32_C( -26405962)) }, { simde_mm_set_epi32(INT32_C( 316126780), INT32_C(-1043744394), INT32_C( 787289081), INT32_C( -910852690)), simde_mm_set_epi32(INT32_C(-2087288343), INT32_C(-1056950039), INT32_C( -555736705), INT32_C( 666671486)), simde_mm_set_epi32(INT32_C(-2087288343), INT32_C(-1056950039), INT32_C( -555736705), INT32_C( -910852690)) }, { simde_mm_set_epi32(INT32_C(-1498461986), INT32_C(-1418974686), INT32_C(-1311767695), INT32_C(-1038502387)), simde_mm_set_epi32(INT32_C( -722855220), INT32_C( 492871512), INT32_C( -175913896), INT32_C( -920105808)), simde_mm_set_epi32(INT32_C(-1498461986), INT32_C(-1418974686), INT32_C(-1311767695), INT32_C(-1038502387)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_min_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_min_epu16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu16(UINT16_C(51368), UINT16_C(47248), UINT16_C( 2518), UINT16_C(59587), UINT16_C(64433), UINT16_C(45544), UINT16_C( 8944), UINT16_C(56380)), simde_x_mm_set_epu16(UINT16_C(35287), UINT16_C( 4953), UINT16_C(55160), UINT16_C(11599), UINT16_C( 1282), UINT16_C(22629), UINT16_C( 1908), UINT16_C(57232)), simde_x_mm_set_epu16(UINT16_C(35287), UINT16_C( 4953), UINT16_C( 2518), UINT16_C(11599), UINT16_C( 1282), UINT16_C(22629), UINT16_C( 1908), UINT16_C(56380)) }, { simde_x_mm_set_epu16(UINT16_C(51082), UINT16_C(41324), UINT16_C(63856), UINT16_C(49625), UINT16_C(47646), UINT16_C(40071), UINT16_C(43894), UINT16_C( 6845)), simde_x_mm_set_epu16(UINT16_C(29622), UINT16_C(57397), UINT16_C(58908), UINT16_C(15516), UINT16_C(57300), UINT16_C(16770), UINT16_C( 6179), UINT16_C(31976)), simde_x_mm_set_epu16(UINT16_C(29622), UINT16_C(41324), UINT16_C(58908), UINT16_C(15516), UINT16_C(47646), UINT16_C(16770), UINT16_C( 6179), UINT16_C( 6845)) }, { simde_x_mm_set_epu16(UINT16_C( 7791), UINT16_C( 9357), UINT16_C(51414), UINT16_C(32085), UINT16_C( 5803), UINT16_C(15112), UINT16_C( 4983), UINT16_C(62284)), simde_x_mm_set_epu16(UINT16_C(19191), UINT16_C(53664), UINT16_C(31350), UINT16_C(59818), UINT16_C(36846), UINT16_C(44912), UINT16_C(44597), UINT16_C(18257)), simde_x_mm_set_epu16(UINT16_C( 7791), UINT16_C( 9357), UINT16_C(31350), UINT16_C(32085), UINT16_C( 5803), UINT16_C(15112), UINT16_C( 4983), UINT16_C(18257)) }, { simde_x_mm_set_epu16(UINT16_C(23178), UINT16_C(52616), UINT16_C(54786), UINT16_C(18857), UINT16_C(64323), UINT16_C(44008), UINT16_C(12675), UINT16_C(13653)), simde_x_mm_set_epu16(UINT16_C(16010), UINT16_C(49432), UINT16_C(46679), UINT16_C(33525), UINT16_C(47161), UINT16_C(64878), UINT16_C(40685), UINT16_C(46136)), simde_x_mm_set_epu16(UINT16_C(16010), UINT16_C(49432), UINT16_C(46679), UINT16_C(18857), UINT16_C(47161), UINT16_C(44008), UINT16_C(12675), UINT16_C(13653)) }, { simde_x_mm_set_epu16(UINT16_C(33571), UINT16_C(12594), UINT16_C( 3662), UINT16_C(56380), UINT16_C(54926), UINT16_C(32164), UINT16_C( 3421), UINT16_C(38714)), simde_x_mm_set_epu16(UINT16_C(65175), UINT16_C(47536), UINT16_C(21879), UINT16_C(12881), UINT16_C(50915), UINT16_C(31304), UINT16_C(10941), UINT16_C(57984)), simde_x_mm_set_epu16(UINT16_C(33571), UINT16_C(12594), UINT16_C( 3662), UINT16_C(12881), UINT16_C(50915), UINT16_C(31304), UINT16_C( 3421), UINT16_C(38714)) }, { simde_x_mm_set_epu16(UINT16_C(17540), UINT16_C(62844), UINT16_C(46558), UINT16_C(45098), UINT16_C(32389), UINT16_C(20250), UINT16_C(19285), UINT16_C(40862)), simde_x_mm_set_epu16(UINT16_C(17361), UINT16_C( 8131), UINT16_C(50246), UINT16_C(30481), UINT16_C(30265), UINT16_C(57439), UINT16_C(42358), UINT16_C(24996)), simde_x_mm_set_epu16(UINT16_C(17361), UINT16_C( 8131), UINT16_C(46558), UINT16_C(30481), UINT16_C(30265), UINT16_C(20250), UINT16_C(19285), UINT16_C(24996)) }, { simde_x_mm_set_epu16(UINT16_C(11573), UINT16_C(56754), UINT16_C(38860), UINT16_C(26669), UINT16_C(21209), UINT16_C( 2072), UINT16_C(19855), UINT16_C(11067)), simde_x_mm_set_epu16(UINT16_C( 4235), UINT16_C(30280), UINT16_C(23335), UINT16_C(17584), UINT16_C( 3445), UINT16_C( 1117), UINT16_C(46968), UINT16_C(58704)), simde_x_mm_set_epu16(UINT16_C( 4235), UINT16_C(30280), UINT16_C(23335), UINT16_C(17584), UINT16_C( 3445), UINT16_C( 1117), UINT16_C(19855), UINT16_C(11067)) }, { simde_x_mm_set_epu16(UINT16_C( 1826), UINT16_C(34774), UINT16_C(17130), UINT16_C(44175), UINT16_C(60865), UINT16_C(54658), UINT16_C(62552), UINT16_C(62233)), simde_x_mm_set_epu16(UINT16_C(15183), UINT16_C(55598), UINT16_C( 5779), UINT16_C(28145), UINT16_C(24471), UINT16_C(50174), UINT16_C(53029), UINT16_C(43004)), simde_x_mm_set_epu16(UINT16_C( 1826), UINT16_C(34774), UINT16_C( 5779), UINT16_C(28145), UINT16_C(24471), UINT16_C(50174), UINT16_C(53029), UINT16_C(43004)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_min_epu16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_u16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_min_epu32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu32(UINT32_C( 788201309), UINT32_C(2071601612), UINT32_C(3123609508), UINT32_C(1344608341)), simde_x_mm_set_epu32(UINT32_C(3982697916), UINT32_C( 968486606), UINT32_C(1116860132), UINT32_C(2825138301)), simde_x_mm_set_epu32(UINT32_C( 788201309), UINT32_C( 968486606), UINT32_C(1116860132), UINT32_C(1344608341)) }, { simde_x_mm_set_epu32(UINT32_C( 384132137), UINT32_C(1977877021), UINT32_C(4289747748), UINT32_C(2515962526)), simde_x_mm_set_epu32(UINT32_C(3869012810), UINT32_C( 645527240), UINT32_C(4230349530), UINT32_C(1145244296)), simde_x_mm_set_epu32(UINT32_C( 384132137), UINT32_C( 645527240), UINT32_C(4230349530), UINT32_C(1145244296)) }, { simde_x_mm_set_epu32(UINT32_C(3974730728), UINT32_C(3242218332), UINT32_C(1720053534), UINT32_C(3159005428)), simde_x_mm_set_epu32(UINT32_C(3856942180), UINT32_C(4205396871), UINT32_C(2272972736), UINT32_C(4113553543)), simde_x_mm_set_epu32(UINT32_C(3856942180), UINT32_C(3242218332), UINT32_C(1720053534), UINT32_C(3159005428)) }, { simde_x_mm_set_epu32(UINT32_C(3254877076), UINT32_C(2399148141), UINT32_C(2938842657), UINT32_C(1961058722)), simde_x_mm_set_epu32(UINT32_C(1528033433), UINT32_C( 461232656), UINT32_C(1948639979), UINT32_C(1593317844)), simde_x_mm_set_epu32(UINT32_C(1528033433), UINT32_C( 461232656), UINT32_C(1948639979), UINT32_C(1593317844)) }, { simde_x_mm_set_epu32(UINT32_C(1721814381), UINT32_C(3279771329), UINT32_C(1032528892), UINT32_C(1770589845)), simde_x_mm_set_epu32(UINT32_C( 338961476), UINT32_C(1015681646), UINT32_C(2529171351), UINT32_C(1474803551)), simde_x_mm_set_epu32(UINT32_C( 338961476), UINT32_C(1015681646), UINT32_C(1032528892), UINT32_C(1474803551)) }, { simde_x_mm_set_epu32(UINT32_C(2865994869), UINT32_C(3359283004), UINT32_C(2376950903), UINT32_C(2025629636)), simde_x_mm_set_epu32(UINT32_C(1089016862), UINT32_C(3251738064), UINT32_C(2393163457), UINT32_C( 147184464)), simde_x_mm_set_epu32(UINT32_C(1089016862), UINT32_C(3251738064), UINT32_C(2376950903), UINT32_C( 147184464)) }, { simde_x_mm_set_epu32(UINT32_C(1041113949), UINT32_C(2410530901), UINT32_C( 131686205), UINT32_C(3148398563)), simde_x_mm_set_epu32(UINT32_C(1539533337), UINT32_C(3899801708), UINT32_C(3357338729), UINT32_C( 465532279)), simde_x_mm_set_epu32(UINT32_C(1041113949), UINT32_C(2410530901), UINT32_C( 131686205), UINT32_C( 465532279)) }, { simde_x_mm_set_epu32(UINT32_C(2571079475), UINT32_C(4121171595), UINT32_C(3849859490), UINT32_C(2510281154)), simde_x_mm_set_epu32(UINT32_C(2596335814), UINT32_C(2240823510), UINT32_C(2609625051), UINT32_C(1259317116)), simde_x_mm_set_epu32(UINT32_C(2571079475), UINT32_C(2240823510), UINT32_C(2609625051), UINT32_C(1259317116)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_min_epu32(test_vec[i].a, test_vec[i].b); simde_assert_m128i_u32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_minpos_epu16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu16(UINT16_C(40818), UINT16_C(13812), UINT16_C(10872), UINT16_C(54124), UINT16_C(15100), UINT16_C(22810), UINT16_C(31798), UINT16_C(58017)), simde_x_mm_set_epu16(UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 5), UINT16_C(10872)) }, { simde_x_mm_set_epu16(UINT16_C(37656), UINT16_C(38804), UINT16_C(35573), UINT16_C(33536), UINT16_C(16030), UINT16_C(50431), UINT16_C(35836), UINT16_C(37491)), simde_x_mm_set_epu16(UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 3), UINT16_C(16030)) }, { simde_x_mm_set_epu16(UINT16_C(56214), UINT16_C(56676), UINT16_C(56044), UINT16_C(59619), UINT16_C(21150), UINT16_C(47982), UINT16_C( 92), UINT16_C(46668)), simde_x_mm_set_epu16(UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 1), UINT16_C( 92)) }, { simde_x_mm_set_epu16(UINT16_C(30841), UINT16_C(41481), UINT16_C(22332), UINT16_C(20377), UINT16_C( 8461), UINT16_C(16174), UINT16_C(46168), UINT16_C(38746)), simde_x_mm_set_epu16(UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 3), UINT16_C( 8461)) }, { simde_x_mm_set_epu16(UINT16_C(38550), UINT16_C(63031), UINT16_C(12859), UINT16_C(36699), UINT16_C(19386), UINT16_C(49588), UINT16_C(27067), UINT16_C(12812)), simde_x_mm_set_epu16(UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(12812)) }, { simde_x_mm_set_epu16(UINT16_C(20747), UINT16_C(29576), UINT16_C( 5460), UINT16_C(38860), UINT16_C(52108), UINT16_C( 149), UINT16_C(63981), UINT16_C(19000)), simde_x_mm_set_epu16(UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 2), UINT16_C( 149)) }, { simde_x_mm_set_epu16(UINT16_C(31686), UINT16_C( 5758), UINT16_C(64324), UINT16_C(55924), UINT16_C(43567), UINT16_C(17467), UINT16_C(18633), UINT16_C(20456)), simde_x_mm_set_epu16(UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 6), UINT16_C( 5758)) }, { simde_x_mm_set_epu16(UINT16_C(13431), UINT16_C(48627), UINT16_C(23340), UINT16_C(23935), UINT16_C(12927), UINT16_C(48412), UINT16_C(33835), UINT16_C(15901)), simde_x_mm_set_epu16(UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 3), UINT16_C(12927)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_minpos_epu16(test_vec[i].a); simde_assert_m128i_u16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_mpsadbw_epu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu8(UINT8_C(226), UINT8_C(204), UINT8_C(132), UINT8_C(191), UINT8_C(109), UINT8_C( 5), UINT8_C( 22), UINT8_C(209), UINT8_C(243), UINT8_C(105), UINT8_C(186), UINT8_C(151), UINT8_C( 27), UINT8_C( 30), UINT8_C(223), UINT8_C( 88)), simde_x_mm_set_epu8(UINT8_C(139), UINT8_C(113), UINT8_C( 31), UINT8_C( 64), UINT8_C(155), UINT8_C( 28), UINT8_C(223), UINT8_C(195), UINT8_C(151), UINT8_C( 30), UINT8_C( 86), UINT8_C( 95), UINT8_C(158), UINT8_C( 54), UINT8_C( 10), UINT8_C( 66)), simde_x_mm_set_epu16(UINT16_C( 289), UINT16_C( 222), UINT16_C( 124), UINT16_C( 292), UINT16_C( 582), UINT16_C( 466), UINT16_C( 396), UINT16_C( 354)) }, { simde_x_mm_set_epu8(UINT8_C(171), UINT8_C( 80), UINT8_C(108), UINT8_C(236), UINT8_C(239), UINT8_C(191), UINT8_C(219), UINT8_C(208), UINT8_C(165), UINT8_C( 30), UINT8_C(251), UINT8_C(196), UINT8_C( 25), UINT8_C(255), UINT8_C( 78), UINT8_C( 81)), simde_x_mm_set_epu8(UINT8_C( 98), UINT8_C( 67), UINT8_C(141), UINT8_C(239), UINT8_C( 33), UINT8_C(138), UINT8_C(161), UINT8_C(132), UINT8_C( 23), UINT8_C( 84), UINT8_C(170), UINT8_C(230), UINT8_C(214), UINT8_C(115), UINT8_C( 58), UINT8_C(130)), simde_x_mm_set_epu16(UINT16_C( 154), UINT16_C( 325), UINT16_C( 380), UINT16_C( 374), UINT16_C( 386), UINT16_C( 495), UINT16_C( 331), UINT16_C( 257)) }, { simde_x_mm_set_epu8(UINT8_C( 36), UINT8_C( 1), UINT8_C( 5), UINT8_C( 22), UINT8_C( 73), UINT8_C(242), UINT8_C( 47), UINT8_C( 2), UINT8_C(204), UINT8_C(245), UINT8_C( 42), UINT8_C( 38), UINT8_C( 36), UINT8_C(136), UINT8_C( 36), UINT8_C(163)), simde_x_mm_set_epu8(UINT8_C(168), UINT8_C(213), UINT8_C( 2), UINT8_C( 30), UINT8_C( 25), UINT8_C(127), UINT8_C( 31), UINT8_C(133), UINT8_C(118), UINT8_C(121), UINT8_C(113), UINT8_C(231), UINT8_C( 34), UINT8_C( 80), UINT8_C(116), UINT8_C( 25)), simde_x_mm_set_epu16(UINT16_C( 438), UINT16_C( 637), UINT16_C( 543), UINT16_C( 197), UINT16_C( 414), UINT16_C( 749), UINT16_C( 430), UINT16_C( 116)) }, { simde_x_mm_set_epu8(UINT8_C( 29), UINT8_C( 29), UINT8_C(199), UINT8_C(141), UINT8_C(153), UINT8_C(161), UINT8_C(202), UINT8_C( 57), UINT8_C( 56), UINT8_C( 10), UINT8_C(125), UINT8_C( 91), UINT8_C(212), UINT8_C(168), UINT8_C( 43), UINT8_C(108)), simde_x_mm_set_epu8(UINT8_C( 54), UINT8_C( 56), UINT8_C(193), UINT8_C(184), UINT8_C( 44), UINT8_C( 35), UINT8_C(133), UINT8_C( 78), UINT8_C( 34), UINT8_C(199), UINT8_C(192), UINT8_C(116), UINT8_C( 79), UINT8_C(137), UINT8_C(150), UINT8_C( 19)), simde_x_mm_set_epu16(UINT16_C( 251), UINT16_C( 293), UINT16_C( 234), UINT16_C( 340), UINT16_C( 517), UINT16_C( 460), UINT16_C( 245), UINT16_C( 209)) }, { simde_x_mm_set_epu8(UINT8_C(211), UINT8_C(227), UINT8_C(150), UINT8_C( 37), UINT8_C(162), UINT8_C(249), UINT8_C(215), UINT8_C(195), UINT8_C( 63), UINT8_C(183), UINT8_C(196), UINT8_C( 97), UINT8_C( 60), UINT8_C(192), UINT8_C(194), UINT8_C(164)), simde_x_mm_set_epu8(UINT8_C(120), UINT8_C( 70), UINT8_C(238), UINT8_C(106), UINT8_C( 29), UINT8_C( 66), UINT8_C( 80), UINT8_C(174), UINT8_C( 76), UINT8_C(151), UINT8_C( 56), UINT8_C( 77), UINT8_C( 36), UINT8_C(138), UINT8_C(170), UINT8_C(105)), simde_x_mm_set_epu16(UINT16_C( 444), UINT16_C( 282), UINT16_C( 295), UINT16_C( 333), UINT16_C( 360), UINT16_C( 472), UINT16_C( 227), UINT16_C( 221)) }, { simde_x_mm_set_epu8(UINT8_C(167), UINT8_C(231), UINT8_C(129), UINT8_C(236), UINT8_C(158), UINT8_C( 34), UINT8_C(219), UINT8_C( 52), UINT8_C(201), UINT8_C( 55), UINT8_C( 88), UINT8_C( 76), UINT8_C( 91), UINT8_C(192), UINT8_C( 80), UINT8_C(141)), simde_x_mm_set_epu8(UINT8_C(215), UINT8_C( 0), UINT8_C( 39), UINT8_C(166), UINT8_C( 94), UINT8_C(231), UINT8_C(224), UINT8_C(140), UINT8_C(153), UINT8_C( 38), UINT8_C(229), UINT8_C(239), UINT8_C(167), UINT8_C( 85), UINT8_C(242), UINT8_C(231)), simde_x_mm_set_epu16(UINT16_C( 350), UINT16_C( 573), UINT16_C( 237), UINT16_C( 385), UINT16_C( 448), UINT16_C( 329), UINT16_C( 458), UINT16_C( 208)) }, { simde_x_mm_set_epu8(UINT8_C(125), UINT8_C( 70), UINT8_C(248), UINT8_C(193), UINT8_C(244), UINT8_C( 76), UINT8_C(163), UINT8_C(150), UINT8_C(169), UINT8_C( 24), UINT8_C(114), UINT8_C( 21), UINT8_C(171), UINT8_C( 13), UINT8_C(157), UINT8_C(178)), simde_x_mm_set_epu8(UINT8_C(198), UINT8_C(153), UINT8_C(205), UINT8_C(219), UINT8_C(198), UINT8_C( 83), UINT8_C( 62), UINT8_C( 96), UINT8_C(119), UINT8_C(213), UINT8_C( 90), UINT8_C(147), UINT8_C(232), UINT8_C( 64), UINT8_C(164), UINT8_C(129)), simde_x_mm_set_epu16(UINT16_C( 260), UINT16_C( 272), UINT16_C( 281), UINT16_C( 234), UINT16_C( 237), UINT16_C( 269), UINT16_C( 350), UINT16_C( 447)) }, { simde_x_mm_set_epu8(UINT8_C( 35), UINT8_C(118), UINT8_C(198), UINT8_C( 85), UINT8_C( 8), UINT8_C(150), UINT8_C(121), UINT8_C(243), UINT8_C( 16), UINT8_C(221), UINT8_C(227), UINT8_C( 37), UINT8_C( 87), UINT8_C( 66), UINT8_C(216), UINT8_C(177)), simde_x_mm_set_epu8(UINT8_C(148), UINT8_C(250), UINT8_C( 9), UINT8_C(204), UINT8_C(136), UINT8_C(106), UINT8_C( 76), UINT8_C(162), UINT8_C(111), UINT8_C( 61), UINT8_C( 51), UINT8_C(174), UINT8_C(202), UINT8_C(248), UINT8_C( 64), UINT8_C(174)), simde_x_mm_set_epu16(UINT16_C( 354), UINT16_C( 270), UINT16_C( 529), UINT16_C( 391), UINT16_C( 553), UINT16_C( 58), UINT16_C( 564), UINT16_C( 546)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_mpsadbw_epu8(test_vec[i].a, test_vec[i].b, 7); simde_assert_m128i_u16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_mul_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C(-1795240134), INT32_C( -225235276), INT32_C( 260214100), INT32_C(-1437997118)), simde_mm_set_epi32(INT32_C(-1930516827), INT32_C(-2112091397), INT32_C(-2111031164), INT32_C( 1337416900)), simde_mm_set_epi64x(INT64_C( 475717488740520572), INT64_C(-1923201647764494200)) }, { simde_mm_set_epi32(INT32_C(-2008734650), INT32_C( 1987866784), INT32_C( 1460586541), INT32_C( -308079008)), simde_mm_set_epi32(INT32_C( 1619497162), INT32_C( 1948444521), INT32_C(-1962326453), INT32_C( 355525826)), simde_mm_set_epi64x(INT64_C( 3873248143762690464), INT64_C( -109530043792460608)) }, { simde_mm_set_epi32(INT32_C( -967525151), INT32_C( 610102911), INT32_C( -82193087), INT32_C( 610685272)), simde_mm_set_epi32(INT32_C( -464555473), INT32_C( 1911406284), INT32_C( 1504403418), INT32_C( 1044708348)), simde_mm_set_epi64x(INT64_C( 1166154537972092724), INT64_C( 637988001659050656)) }, { simde_mm_set_epi32(INT32_C( 734959886), INT32_C( 727387476), INT32_C(-1178464916), INT32_C( 759159032)), simde_mm_set_epi32(INT32_C( 227151790), INT32_C( 2027589232), INT32_C( -510665157), INT32_C( 664410245)), simde_mm_set_epi64x(INT64_C( 1474843013829258432), INT64_C( 504393038445082840)) }, { simde_mm_set_epi32(INT32_C( -981875510), INT32_C(-1517853387), INT32_C( 645890048), INT32_C( 1483649404)), simde_mm_set_epi32(INT32_C(-1176179108), INT32_C( -884770028), INT32_C( 1667209942), INT32_C( -150484050)), simde_mm_set_epi64x(INT64_C( 1342951183715884836), INT64_C( -223265571094006200)) }, { simde_mm_set_epi32(INT32_C( -229613172), INT32_C( -220620915), INT32_C( 1178015644), INT32_C(-1087297921)), simde_mm_set_epi32(INT32_C(-1751226172), INT32_C(-1807182523), INT32_C( 804591673), INT32_C( -413406506)), simde_mm_set_epi64x(INT64_C( 398702261796268545), INT64_C( 449496034501674026)) }, { simde_mm_set_epi32(INT32_C(-1537480837), INT32_C( -185479936), INT32_C( 386155243), INT32_C( 479622311)), simde_mm_set_epi32(INT32_C( -579116298), INT32_C( -674452315), INT32_C( -142346909), INT32_C(-1350064613)), simde_mm_set_epi64x(INT64_C( 125097372221251840), INT64_C( -647521109686380643)) }, { simde_mm_set_epi32(INT32_C( 2146108348), INT32_C( 341648975), INT32_C( 901701854), INT32_C( -45673158)), simde_mm_set_epi32(INT32_C( 723948067), INT32_C( 1076365190), INT32_C( -693273363), INT32_C( 420311114)), simde_mm_set_epi64x(INT64_C( 367739063889180250), INT64_C( -19196935918878012)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_mul_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_mullo_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 1431531471), INT32_C(-1934656662), INT32_C( -638668877), INT32_C( 1966621031)), simde_mm_set_epi32(INT32_C( -646581607), INT32_C( 1232530244), INT32_C( 841047033), INT32_C(-1472432488)), simde_mm_set_epi32(INT32_C( 1865056439), INT32_C(-1849485784), INT32_C( 1840285211), INT32_C( 1940934440)) }, { simde_mm_set_epi32(INT32_C( 2119691862), INT32_C( 1656188631), INT32_C( 949015012), INT32_C( -645064268)), simde_mm_set_epi32(INT32_C( 1854664684), INT32_C( 1364588379), INT32_C( -185048322), INT32_C( 1546431794)), simde_mm_set_epi32(INT32_C( -314310328), INT32_C( -410462355), INT32_C( 887719992), INT32_C( -220354264)) }, { simde_mm_set_epi32(INT32_C( 390191893), INT32_C( 1246080619), INT32_C( 2032818940), INT32_C(-1664549536)), simde_mm_set_epi32(INT32_C( 1802476280), INT32_C(-1492538899), INT32_C( -866846002), INT32_C(-1929477089)), simde_mm_set_epi32(INT32_C( -357441960), INT32_C(-1318446065), INT32_C(-2019602744), INT32_C(-2076430688)) }, { simde_mm_set_epi32(INT32_C( -629988407), INT32_C(-1719063308), INT32_C( -231912988), INT32_C( 1462330866)), simde_mm_set_epi32(INT32_C(-1320599620), INT32_C( -769970788), INT32_C( -625134420), INT32_C( 293199422)), simde_mm_set_epi32(INT32_C( -273826148), INT32_C(-1841674064), INT32_C(-1631324880), INT32_C( 1137740956)) }, { simde_mm_set_epi32(INT32_C(-1221152076), INT32_C(-1223458578), INT32_C(-1263311025), INT32_C( 2089857790)), simde_mm_set_epi32(INT32_C(-2068401725), INT32_C( 1531524331), INT32_C( 1064638153), INT32_C( -843878287)), simde_mm_set_epi32(INT32_C( 1009816348), INT32_C( 1660269178), INT32_C( -203954425), INT32_C( -674749922)) }, { simde_mm_set_epi32(INT32_C(-1283522478), INT32_C( 524522527), INT32_C( 1152886109), INT32_C( -438302719)), simde_mm_set_epi32(INT32_C(-2012975476), INT32_C( -67951228), INT32_C( 258710699), INT32_C( -25585755)), simde_mm_set_epi32(INT32_C( 1289709784), INT32_C( -508699908), INT32_C( 1615798559), INT32_C( 345423781)) }, { simde_mm_set_epi32(INT32_C( 199098300), INT32_C(-1941350427), INT32_C(-1170519288), INT32_C( 2117275260)), simde_mm_set_epi32(INT32_C( 1736533752), INT32_C( 668304389), INT32_C(-1871663356), INT32_C( -231105582)), simde_mm_set_epi32(INT32_C( 1957733920), INT32_C(-1823880327), INT32_C( -994597856), INT32_C( -515705416)) }, { simde_mm_set_epi32(INT32_C( 796248112), INT32_C( -74441002), INT32_C( 756194998), INT32_C( 128690934)), simde_mm_set_epi32(INT32_C( -934813484), INT32_C( 1495167051), INT32_C( 549239917), INT32_C( 1315900336)), simde_mm_set_epi32(INT32_C( 1644414912), INT32_C(-1010128718), INT32_C( 723729278), INT32_C(-1157096672)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_mullo_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_x_mm_mullo_epu32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu32(UINT32_C(3025185092), UINT32_C(1071139209), UINT32_C(4112016578), UINT32_C(2016123065)), simde_x_mm_set_epu32(UINT32_C( 290328727), UINT32_C(2226082336), UINT32_C( 501526514), UINT32_C(1649328035)), simde_x_mm_set_epu32(UINT32_C(2095060764), UINT32_C( 725488416), UINT32_C( 347594084), UINT32_C(1695696075)) }, { simde_x_mm_set_epu32(UINT32_C(3106040714), UINT32_C(3664680000), UINT32_C( 790276509), UINT32_C(3508971009)), simde_x_mm_set_epu32(UINT32_C(1699381529), UINT32_C( 875841923), UINT32_C(3450881837), UINT32_C(1814797908)), simde_x_mm_set_epu32(UINT32_C(4101048954), UINT32_C(1809587392), UINT32_C( 27941785), UINT32_C(3377337940)) }, { simde_x_mm_set_epu32(UINT32_C(2893750485), UINT32_C(1249313952), UINT32_C(3305249000), UINT32_C(2880865177)), simde_x_mm_set_epu32(UINT32_C( 24592541), UINT32_C( 966747003), UINT32_C(3635146633), UINT32_C(4165399857)), simde_x_mm_set_epu32(UINT32_C(2814075553), UINT32_C(1297684704), UINT32_C(3327650856), UINT32_C(3182861641)) }, { simde_x_mm_set_epu32(UINT32_C( 189392542), UINT32_C(3720757090), UINT32_C(3597240564), UINT32_C(1088106434)), simde_x_mm_set_epu32(UINT32_C(3260872370), UINT32_C(1980368114), UINT32_C(4105157694), UINT32_C( 87964330)), simde_x_mm_set_epu32(UINT32_C(2043031004), UINT32_C(1681486500), UINT32_C(2632866584), UINT32_C( 358329044)) }, { simde_x_mm_set_epu32(UINT32_C(3751593359), UINT32_C( 552022460), UINT32_C(2369123294), UINT32_C(1803067683)), simde_x_mm_set_epu32(UINT32_C(3768762282), UINT32_C(1316515183), UINT32_C(3641532283), UINT32_C(3816235830)), simde_x_mm_set_epu32(UINT32_C(2583618038), UINT32_C(1284217988), UINT32_C(1857129898), UINT32_C( 431933026)) }, { simde_x_mm_set_epu32(UINT32_C(1492473270), UINT32_C(1074984006), UINT32_C(4252907716), UINT32_C(1202847242)), simde_x_mm_set_epu32(UINT32_C(3343508230), UINT32_C(2110123114), UINT32_C( 454026106), UINT32_C( 903743904)), simde_x_mm_set_epu32(UINT32_C(2342937668), UINT32_C( 686271740), UINT32_C(3120093544), UINT32_C(1876500544)) }, { simde_x_mm_set_epu32(UINT32_C( 998872003), UINT32_C(1287827774), UINT32_C( 838516167), UINT32_C( 572896344)), simde_x_mm_set_epu32(UINT32_C(1333196355), UINT32_C(3474739513), UINT32_C(2465428766), UINT32_C( 183148961)), simde_x_mm_set_epu32(UINT32_C( 299961865), UINT32_C( 909513934), UINT32_C( 369307730), UINT32_C(2022818648)) }, { simde_x_mm_set_epu32(UINT32_C(2381114005), UINT32_C( 17395766), UINT32_C(1942367476), UINT32_C( 903009655)), simde_x_mm_set_epu32(UINT32_C(2255462391), UINT32_C( 872948613), UINT32_C(1238590873), UINT32_C(2084494234)), simde_x_mm_set_epu32(UINT32_C(3387154627), UINT32_C(1307487758), UINT32_C( 38347220), UINT32_C( 339949206)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_x_mm_mullo_epu32(test_vec[i].a, test_vec[i].b); simde_assert_m128i_u32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_packus_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C(-1174506975), INT32_C(-1622542395), INT32_C( -228565455), INT32_C( 1722092951)), simde_mm_set_epi32(INT32_C( -188735178), INT32_C( -859437923), INT32_C( 40707), INT32_C(-2078308106)), simde_x_mm_set_epu16(UINT16_C( 0), UINT16_C( 0), UINT16_C(40707), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(65535)) }, { simde_mm_set_epi32(INT32_C( 279157042), INT32_C( -664033471), INT32_C( 1210432777), INT32_C(-1162534366)), simde_mm_set_epi32(INT32_C( 330673804), INT32_C(-1666667776), INT32_C(-1924642427), INT32_C(-1118465206)), simde_x_mm_set_epu16(UINT16_C(65535), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(65535), UINT16_C( 0), UINT16_C(65535), UINT16_C( 0)) }, { simde_mm_set_epi32(INT32_C(-1675273454), INT32_C( 252374167), INT32_C(-1898193118), INT32_C(-2121147721)), simde_mm_set_epi32(INT32_C(-1175340609), INT32_C( 1859609595), INT32_C( 305301369), INT32_C( 1936442403)), simde_x_mm_set_epu16(UINT16_C( 0), UINT16_C(65535), UINT16_C(65535), UINT16_C(65535), UINT16_C( 0), UINT16_C(65535), UINT16_C( 0), UINT16_C( 0)) }, { simde_mm_set_epi32(INT32_C( -372493069), INT32_C(-1071911212), INT32_C( 122218483), INT32_C( 1644546153)), simde_mm_set_epi32(INT32_C( -603471664), INT32_C( -435236634), INT32_C(-1237577227), INT32_C( 40156)), simde_x_mm_set_epu16(UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(40156), UINT16_C( 0), UINT16_C( 0), UINT16_C(65535), UINT16_C(65535)) }, { simde_mm_set_epi32(INT32_C( -132052641), INT32_C( 1567528732), INT32_C( -1505776), INT32_C( 422872655)), simde_mm_set_epi32(INT32_C( 1525337323), INT32_C(-1409556892), INT32_C( -388396270), INT32_C(-2086963765)), simde_x_mm_set_epu16(UINT16_C(65535), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C(65535), UINT16_C( 0), UINT16_C(65535)) }, { simde_mm_set_epi32(INT32_C(-1206563380), INT32_C( 1557268555), INT32_C( -872999034), INT32_C( 234221095)), simde_mm_set_epi32(INT32_C( 723455818), INT32_C( 773436940), INT32_C( 326719262), INT32_C( -826281673)), simde_x_mm_set_epu16(UINT16_C(65535), UINT16_C(65535), UINT16_C(65535), UINT16_C( 0), UINT16_C( 0), UINT16_C(65535), UINT16_C( 0), UINT16_C(65535)) }, { simde_mm_set_epi32(INT32_C( -738729427), INT32_C( 1061139854), INT32_C(-2031157453), INT32_C( 2064098327)), simde_mm_set_epi32(INT32_C(-1034678142), INT32_C( 97345786), INT32_C( 1187222665), INT32_C( 61643)), simde_x_mm_set_epu16(UINT16_C( 0), UINT16_C(65535), UINT16_C(65535), UINT16_C(61643), UINT16_C( 0), UINT16_C(65535), UINT16_C( 0), UINT16_C(65535)) }, { simde_mm_set_epi32(INT32_C(-2037944282), INT32_C( -355400074), INT32_C( -341422900), INT32_C(-1605623445)), simde_mm_set_epi32(INT32_C( 629990551), INT32_C( 397893118), INT32_C(-1310491239), INT32_C(-2050362652)), simde_x_mm_set_epu16(UINT16_C(65535), UINT16_C(65535), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_packus_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m128i_u16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_round_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; int rounding; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 959.46), SIMDE_FLOAT64_C( -646.56)), SIMDE_MM_FROUND_TO_NEAREST_INT, simde_mm_set_pd(SIMDE_FLOAT64_C( 959.00), SIMDE_FLOAT64_C( -647.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -813.43), SIMDE_FLOAT64_C( 515.75)), SIMDE_MM_FROUND_TO_NEAREST_INT, simde_mm_set_pd(SIMDE_FLOAT64_C( -813.00), SIMDE_FLOAT64_C( 516.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -365.43), SIMDE_FLOAT64_C( 840.28)), SIMDE_MM_FROUND_TO_NEG_INF, simde_mm_set_pd(SIMDE_FLOAT64_C( -366.00), SIMDE_FLOAT64_C( 840.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -843.69), SIMDE_FLOAT64_C( -641.36)), SIMDE_MM_FROUND_TO_NEG_INF, simde_mm_set_pd(SIMDE_FLOAT64_C( -844.00), SIMDE_FLOAT64_C( -642.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -542.64), SIMDE_FLOAT64_C( -757.34)), SIMDE_MM_FROUND_TO_POS_INF, simde_mm_set_pd(SIMDE_FLOAT64_C( -542.00), SIMDE_FLOAT64_C( -757.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -593.16), SIMDE_FLOAT64_C( 973.59)), SIMDE_MM_FROUND_TO_POS_INF, simde_mm_set_pd(SIMDE_FLOAT64_C( -593.00), SIMDE_FLOAT64_C( 974.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 54.67), SIMDE_FLOAT64_C( -54.55)), SIMDE_MM_FROUND_TO_ZERO, simde_mm_set_pd(SIMDE_FLOAT64_C( 54.00), SIMDE_FLOAT64_C( -54.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -31.89), SIMDE_FLOAT64_C( 136.86)), SIMDE_MM_FROUND_TO_ZERO, simde_mm_set_pd(SIMDE_FLOAT64_C( -31.00), SIMDE_FLOAT64_C( 136.00)) } }; for (size_t i = 0 ; i < 2 ; i++) { simde__m128d r = simde_mm_round_pd(test_vec[i].a, SIMDE_MM_FROUND_TO_NEAREST_INT); simde_assert_m128d_close(r, test_vec[i].r, 1); } for (size_t i = 2 ; i < 4 ; i++) { simde__m128d r = simde_mm_round_pd(test_vec[i].a, SIMDE_MM_FROUND_TO_NEG_INF); simde_assert_m128d_close(r, test_vec[i].r, 1); } for (size_t i = 4 ; i < 6 ; i++) { simde__m128d r = simde_mm_round_pd(test_vec[i].a, SIMDE_MM_FROUND_TO_POS_INF); simde_assert_m128d_close(r, test_vec[i].r, 1); } for (size_t i = 6 ; i < 8 ; i++) { simde__m128d r = simde_mm_round_pd(test_vec[i].a, SIMDE_MM_FROUND_TO_ZERO); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_round_ps_nearest (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 942.50), SIMDE_FLOAT32_C( -170.00), SIMDE_FLOAT32_C( -707.50), SIMDE_FLOAT32_C( -141.50) }, { SIMDE_FLOAT32_C( 942.00), SIMDE_FLOAT32_C( -170.00), SIMDE_FLOAT32_C( -708.00), SIMDE_FLOAT32_C( -142.00) } }, { { SIMDE_FLOAT32_C( 730.50), SIMDE_FLOAT32_C( -382.50), SIMDE_FLOAT32_C( -584.75), SIMDE_FLOAT32_C( 316.00) }, { SIMDE_FLOAT32_C( 730.00), SIMDE_FLOAT32_C( -382.00), SIMDE_FLOAT32_C( -585.00), SIMDE_FLOAT32_C( 316.00) } }, { { SIMDE_FLOAT32_C( 664.50), SIMDE_FLOAT32_C( 102.50), SIMDE_FLOAT32_C( -716.00), SIMDE_FLOAT32_C( 350.50) }, { SIMDE_FLOAT32_C( 664.00), SIMDE_FLOAT32_C( 102.00), SIMDE_FLOAT32_C( -716.00), SIMDE_FLOAT32_C( 350.00) } }, { { SIMDE_FLOAT32_C( 658.50), SIMDE_FLOAT32_C( 697.50), SIMDE_FLOAT32_C( -634.50), SIMDE_FLOAT32_C( -560.00) }, { SIMDE_FLOAT32_C( 658.00), SIMDE_FLOAT32_C( 698.00), SIMDE_FLOAT32_C( -634.00), SIMDE_FLOAT32_C( -560.00) } }, { { SIMDE_FLOAT32_C( 909.50), SIMDE_FLOAT32_C( 418.80), SIMDE_FLOAT32_C( -300.50), SIMDE_FLOAT32_C( 899.00) }, { SIMDE_FLOAT32_C( 910.00), SIMDE_FLOAT32_C( 419.00), SIMDE_FLOAT32_C( -300.00), SIMDE_FLOAT32_C( 899.00) } }, { { SIMDE_FLOAT32_C( -435.50), SIMDE_FLOAT32_C( -752.50), SIMDE_FLOAT32_C( 535.50), SIMDE_FLOAT32_C( -728.50) }, { SIMDE_FLOAT32_C( -436.00), SIMDE_FLOAT32_C( -752.00), SIMDE_FLOAT32_C( 536.00), SIMDE_FLOAT32_C( -728.00) } }, { { SIMDE_FLOAT32_C( 455.50), SIMDE_FLOAT32_C( -826.50), SIMDE_FLOAT32_C( 474.00), SIMDE_FLOAT32_C( 43.62) }, { SIMDE_FLOAT32_C( 456.00), SIMDE_FLOAT32_C( -826.00), SIMDE_FLOAT32_C( 474.00), SIMDE_FLOAT32_C( 44.00) } }, { { SIMDE_FLOAT32_C( -224.50), SIMDE_FLOAT32_C( 195.50), SIMDE_FLOAT32_C( 634.46), SIMDE_FLOAT32_C( -670.50) }, { SIMDE_FLOAT32_C( -224.00), SIMDE_FLOAT32_C( 196.00), SIMDE_FLOAT32_C( 634.00), SIMDE_FLOAT32_C( -670.00) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); simde_float32 values[8 * 2 * sizeof(simde__m128)]; simde_test_x86_random_f32x4_full(8, 2, values, -1000.0f, 1000.0f, SIMDE_TEST_VEC_FLOAT_ROUND); for (size_t i = 0 ; i < 8 ; i++) { simde__m128 a = simde_test_x86_random_extract_f32x4(i, 1, 0, values); simde__m128 r = simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT); simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_round_ps_ninf (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 457.50), SIMDE_FLOAT32_C( 497.50), SIMDE_FLOAT32_C( -429.50), SIMDE_FLOAT32_C( 34.50) }, { SIMDE_FLOAT32_C( 457.00), SIMDE_FLOAT32_C( 497.00), SIMDE_FLOAT32_C( -430.00), SIMDE_FLOAT32_C( 34.00) } }, { { SIMDE_FLOAT32_C( 657.50), SIMDE_FLOAT32_C( 359.50), SIMDE_FLOAT32_C( -832.50), SIMDE_FLOAT32_C( -675.50) }, { SIMDE_FLOAT32_C( 657.00), SIMDE_FLOAT32_C( 359.00), SIMDE_FLOAT32_C( -833.00), SIMDE_FLOAT32_C( -676.00) } }, { { SIMDE_FLOAT32_C( -712.50), SIMDE_FLOAT32_C( -7.50), SIMDE_FLOAT32_C( 505.50), SIMDE_FLOAT32_C( -33.50) }, { SIMDE_FLOAT32_C( -713.00), SIMDE_FLOAT32_C( -8.00), SIMDE_FLOAT32_C( 505.00), SIMDE_FLOAT32_C( -34.00) } }, { { SIMDE_FLOAT32_C( 866.50), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( -890.50), SIMDE_FLOAT32_C( -118.50) }, { SIMDE_FLOAT32_C( 866.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -891.00), SIMDE_FLOAT32_C( -119.00) } }, { { SIMDE_FLOAT32_C( -593.50), SIMDE_FLOAT32_C( -471.50), SIMDE_FLOAT32_C( -382.50), SIMDE_FLOAT32_C( -230.50) }, { SIMDE_FLOAT32_C( -594.00), SIMDE_FLOAT32_C( -472.00), SIMDE_FLOAT32_C( -383.00), SIMDE_FLOAT32_C( -231.00) } }, { { SIMDE_FLOAT32_C( 438.44), SIMDE_FLOAT32_C( 337.97), SIMDE_FLOAT32_C( -5.50), SIMDE_FLOAT32_C( -139.50) }, { SIMDE_FLOAT32_C( 438.00), SIMDE_FLOAT32_C( 337.00), SIMDE_FLOAT32_C( -6.00), SIMDE_FLOAT32_C( -140.00) } }, { { SIMDE_FLOAT32_C( 860.50), SIMDE_FLOAT32_C( 968.50), SIMDE_FLOAT32_C( -150.91), SIMDE_FLOAT32_C( 701.26) }, { SIMDE_FLOAT32_C( 860.00), SIMDE_FLOAT32_C( 968.00), SIMDE_FLOAT32_C( -151.00), SIMDE_FLOAT32_C( 701.00) } }, { { SIMDE_FLOAT32_C( 575.50), SIMDE_FLOAT32_C( -179.50), SIMDE_FLOAT32_C( -648.50), SIMDE_FLOAT32_C( -126.48) }, { SIMDE_FLOAT32_C( 575.00), SIMDE_FLOAT32_C( -180.00), SIMDE_FLOAT32_C( -649.00), SIMDE_FLOAT32_C( -127.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); simde_float32 values[8 * 2 * sizeof(simde__m128)]; simde_test_x86_random_f32x4_full(8, 2, values, -1000.0f, 1000.0f, SIMDE_TEST_VEC_FLOAT_ROUND); for (size_t i = 0 ; i < 8 ; i++) { simde__m128 a = simde_test_x86_random_extract_f32x4(i, 1, 0, values); simde__m128 r = simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_round_ps_pinf (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -941.50), SIMDE_FLOAT32_C( -625.50), SIMDE_FLOAT32_C( 506.50), SIMDE_FLOAT32_C( -181.00) }, { SIMDE_FLOAT32_C( -941.00), SIMDE_FLOAT32_C( -625.00), SIMDE_FLOAT32_C( 507.00), SIMDE_FLOAT32_C( -181.00) } }, { { SIMDE_FLOAT32_C( 54.00), SIMDE_FLOAT32_C( 573.50), SIMDE_FLOAT32_C( 234.50), SIMDE_FLOAT32_C( -404.50) }, { SIMDE_FLOAT32_C( 54.00), SIMDE_FLOAT32_C( 574.00), SIMDE_FLOAT32_C( 235.00), SIMDE_FLOAT32_C( -404.00) } }, { { SIMDE_FLOAT32_C( 433.50), SIMDE_FLOAT32_C( -592.50), SIMDE_FLOAT32_C( -199.50), SIMDE_FLOAT32_C( 615.00) }, { SIMDE_FLOAT32_C( 434.00), SIMDE_FLOAT32_C( -592.00), SIMDE_FLOAT32_C( -199.00), SIMDE_FLOAT32_C( 615.00) } }, { { SIMDE_FLOAT32_C( -78.50), SIMDE_FLOAT32_C( 36.50), SIMDE_FLOAT32_C( 27.00), SIMDE_FLOAT32_C( 205.50) }, { SIMDE_FLOAT32_C( -78.00), SIMDE_FLOAT32_C( 37.00), SIMDE_FLOAT32_C( 27.00), SIMDE_FLOAT32_C( 206.00) } }, { { SIMDE_FLOAT32_C( 34.50), SIMDE_FLOAT32_C( 775.75), SIMDE_FLOAT32_C( -628.50), SIMDE_FLOAT32_C( -753.50) }, { SIMDE_FLOAT32_C( 35.00), SIMDE_FLOAT32_C( 776.00), SIMDE_FLOAT32_C( -628.00), SIMDE_FLOAT32_C( -753.00) } }, { { SIMDE_FLOAT32_C( -492.50), SIMDE_FLOAT32_C( -172.50), SIMDE_FLOAT32_C( 519.50), SIMDE_FLOAT32_C( 718.50) }, { SIMDE_FLOAT32_C( -492.00), SIMDE_FLOAT32_C( -172.00), SIMDE_FLOAT32_C( 520.00), SIMDE_FLOAT32_C( 719.00) } }, { { SIMDE_FLOAT32_C( -261.50), SIMDE_FLOAT32_C( -189.00), SIMDE_FLOAT32_C( -542.50), SIMDE_FLOAT32_C( 337.00) }, { SIMDE_FLOAT32_C( -261.00), SIMDE_FLOAT32_C( -189.00), SIMDE_FLOAT32_C( -542.00), SIMDE_FLOAT32_C( 337.00) } }, { { SIMDE_FLOAT32_C( -186.50), SIMDE_FLOAT32_C( 720.50), SIMDE_FLOAT32_C( 36.50), SIMDE_FLOAT32_C( 150.50) }, { SIMDE_FLOAT32_C( -186.00), SIMDE_FLOAT32_C( 721.00), SIMDE_FLOAT32_C( 37.00), SIMDE_FLOAT32_C( 151.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); simde_float32 values[8 * 2 * sizeof(simde__m128)]; simde_test_x86_random_f32x4_full(8, 2, values, -1000.0f, 1000.0f, SIMDE_TEST_VEC_FLOAT_ROUND); for (size_t i = 0 ; i < 8 ; i++) { simde__m128 a = simde_test_x86_random_extract_f32x4(i, 1, 0, values); simde__m128 r = simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_round_ps_zero (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -732.50), SIMDE_FLOAT32_C( 255.50), SIMDE_FLOAT32_C( -865.00), SIMDE_FLOAT32_C( 272.50) }, { SIMDE_FLOAT32_C( -732.00), SIMDE_FLOAT32_C( 255.00), SIMDE_FLOAT32_C( -865.00), SIMDE_FLOAT32_C( 272.00) } }, { { SIMDE_FLOAT32_C( 850.50), SIMDE_FLOAT32_C( 506.50), SIMDE_FLOAT32_C( 215.50), SIMDE_FLOAT32_C( -237.54) }, { SIMDE_FLOAT32_C( 850.00), SIMDE_FLOAT32_C( 506.00), SIMDE_FLOAT32_C( 215.00), SIMDE_FLOAT32_C( -237.00) } }, { { SIMDE_FLOAT32_C( 585.52), SIMDE_FLOAT32_C( 750.50), SIMDE_FLOAT32_C( 284.50), SIMDE_FLOAT32_C( 551.50) }, { SIMDE_FLOAT32_C( 585.00), SIMDE_FLOAT32_C( 750.00), SIMDE_FLOAT32_C( 284.00), SIMDE_FLOAT32_C( 551.00) } }, { { SIMDE_FLOAT32_C( -191.50), SIMDE_FLOAT32_C( -57.50), SIMDE_FLOAT32_C( -785.50), SIMDE_FLOAT32_C( -934.50) }, { SIMDE_FLOAT32_C( -191.00), SIMDE_FLOAT32_C( -57.00), SIMDE_FLOAT32_C( -785.00), SIMDE_FLOAT32_C( -934.00) } }, { { SIMDE_FLOAT32_C( 571.50), SIMDE_FLOAT32_C( -212.50), SIMDE_FLOAT32_C( 548.50), SIMDE_FLOAT32_C( 205.50) }, { SIMDE_FLOAT32_C( 571.00), SIMDE_FLOAT32_C( -212.00), SIMDE_FLOAT32_C( 548.00), SIMDE_FLOAT32_C( 205.00) } }, { { SIMDE_FLOAT32_C( 646.50), SIMDE_FLOAT32_C( -543.50), SIMDE_FLOAT32_C( 793.50), SIMDE_FLOAT32_C( 926.50) }, { SIMDE_FLOAT32_C( 646.00), SIMDE_FLOAT32_C( -543.00), SIMDE_FLOAT32_C( 793.00), SIMDE_FLOAT32_C( 926.00) } }, { { SIMDE_FLOAT32_C( -551.50), SIMDE_FLOAT32_C( -790.50), SIMDE_FLOAT32_C( 174.00), SIMDE_FLOAT32_C( 230.50) }, { SIMDE_FLOAT32_C( -551.00), SIMDE_FLOAT32_C( -790.00), SIMDE_FLOAT32_C( 174.00), SIMDE_FLOAT32_C( 230.00) } }, { { SIMDE_FLOAT32_C( 725.50), SIMDE_FLOAT32_C( 805.50), SIMDE_FLOAT32_C( -665.00), SIMDE_FLOAT32_C( -370.50) }, { SIMDE_FLOAT32_C( 725.00), SIMDE_FLOAT32_C( 805.00), SIMDE_FLOAT32_C( -665.00), SIMDE_FLOAT32_C( -370.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_ZERO); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); simde_float32 values[8 * 2 * sizeof(simde__m128)]; simde_test_x86_random_f32x4_full(8, 2, values, -1000.0f, 1000.0f, SIMDE_TEST_VEC_FLOAT_ROUND); for (size_t i = 0 ; i < 8 ; i++) { simde__m128 a = simde_test_x86_random_extract_f32x4(i, 1, 0, values); simde__m128 r = simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_ZERO); simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_round_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; int rounding; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 201.21), SIMDE_FLOAT32_C( -972.67), SIMDE_FLOAT32_C( -880.37), SIMDE_FLOAT32_C( 126.05)), SIMDE_MM_FROUND_TO_NEAREST_INT, simde_mm_set_ps(SIMDE_FLOAT32_C( 201.00), SIMDE_FLOAT32_C( -973.00), SIMDE_FLOAT32_C( -880.00), SIMDE_FLOAT32_C( 126.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -339.94), SIMDE_FLOAT32_C( 81.10), SIMDE_FLOAT32_C( 664.61), SIMDE_FLOAT32_C( 207.74)), SIMDE_MM_FROUND_TO_NEAREST_INT, simde_mm_set_ps(SIMDE_FLOAT32_C( -340.00), SIMDE_FLOAT32_C( 81.00), SIMDE_FLOAT32_C( 665.00), SIMDE_FLOAT32_C( 208.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -476.74), SIMDE_FLOAT32_C( 320.44), SIMDE_FLOAT32_C( -89.85), SIMDE_FLOAT32_C( -531.60)), SIMDE_MM_FROUND_TO_NEG_INF, simde_mm_set_ps(SIMDE_FLOAT32_C( -477.00), SIMDE_FLOAT32_C( 320.00), SIMDE_FLOAT32_C( -90.00), SIMDE_FLOAT32_C( -532.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -802.32), SIMDE_FLOAT32_C( -381.83), SIMDE_FLOAT32_C( -743.96), SIMDE_FLOAT32_C( -180.00)), SIMDE_MM_FROUND_TO_NEG_INF, simde_mm_set_ps(SIMDE_FLOAT32_C( -803.00), SIMDE_FLOAT32_C( -382.00), SIMDE_FLOAT32_C( -744.00), SIMDE_FLOAT32_C( -180.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 231.59), SIMDE_FLOAT32_C( 681.41), SIMDE_FLOAT32_C( 561.95), SIMDE_FLOAT32_C( -598.34)), SIMDE_MM_FROUND_TO_POS_INF, simde_mm_set_ps(SIMDE_FLOAT32_C( 232.00), SIMDE_FLOAT32_C( 682.00), SIMDE_FLOAT32_C( 562.00), SIMDE_FLOAT32_C( -598.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -440.45), SIMDE_FLOAT32_C( 343.18), SIMDE_FLOAT32_C( 88.66), SIMDE_FLOAT32_C( -98.54)), SIMDE_MM_FROUND_TO_POS_INF, simde_mm_set_ps(SIMDE_FLOAT32_C( -440.00), SIMDE_FLOAT32_C( 344.00), SIMDE_FLOAT32_C( 89.00), SIMDE_FLOAT32_C( -98.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -128.87), SIMDE_FLOAT32_C( -558.72), SIMDE_FLOAT32_C( 864.66), SIMDE_FLOAT32_C( -576.19)), SIMDE_MM_FROUND_TO_ZERO, simde_mm_set_ps(SIMDE_FLOAT32_C( -128.00), SIMDE_FLOAT32_C( -558.00), SIMDE_FLOAT32_C( 864.00), SIMDE_FLOAT32_C( -576.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 669.69), SIMDE_FLOAT32_C( 902.50), SIMDE_FLOAT32_C( -479.42), SIMDE_FLOAT32_C( 565.85)), SIMDE_MM_FROUND_TO_ZERO, simde_mm_set_ps(SIMDE_FLOAT32_C( 669.00), SIMDE_FLOAT32_C( 902.00), SIMDE_FLOAT32_C( -479.00), SIMDE_FLOAT32_C( 565.00)) } }; for (size_t i = 0 ; i < 2 ; i++) { simde__m128 r = simde_mm_round_ps(test_vec[i].a, SIMDE_MM_FROUND_TO_NEAREST_INT); simde_assert_m128_close(r, test_vec[i].r, 1); } for (size_t i = 2 ; i < 4 ; i++) { simde__m128 r = simde_mm_round_ps(test_vec[i].a, SIMDE_MM_FROUND_TO_NEG_INF); simde_assert_m128_close(r, test_vec[i].r, 1); } for (size_t i = 4 ; i < 6 ; i++) { simde__m128 r = simde_mm_round_ps(test_vec[i].a, SIMDE_MM_FROUND_TO_POS_INF); simde_assert_m128_close(r, test_vec[i].r, 1); } for (size_t i = 6 ; i < 8 ; i++) { simde__m128 r = simde_mm_round_ps(test_vec[i].a, SIMDE_MM_FROUND_TO_ZERO); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_round_sd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; int rounding; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -57.59), SIMDE_FLOAT64_C( -91.48)), simde_mm_set_pd(SIMDE_FLOAT64_C( -114.01), SIMDE_FLOAT64_C( 129.18)), SIMDE_MM_FROUND_TO_NEAREST_INT, simde_mm_set_pd(SIMDE_FLOAT64_C( -57.59), SIMDE_FLOAT64_C( 129.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 367.02), SIMDE_FLOAT64_C( -228.73)), simde_mm_set_pd(SIMDE_FLOAT64_C( -106.57), SIMDE_FLOAT64_C( -248.85)), SIMDE_MM_FROUND_TO_NEAREST_INT, simde_mm_set_pd(SIMDE_FLOAT64_C( 367.02), SIMDE_FLOAT64_C( -249.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 218.19), SIMDE_FLOAT64_C( -121.02)), simde_mm_set_pd(SIMDE_FLOAT64_C( -367.84), SIMDE_FLOAT64_C( -492.19)), SIMDE_MM_FROUND_TO_NEG_INF, simde_mm_set_pd(SIMDE_FLOAT64_C( 218.19), SIMDE_FLOAT64_C( -493.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 706.26), SIMDE_FLOAT64_C( 710.85)), simde_mm_set_pd(SIMDE_FLOAT64_C( -391.17), SIMDE_FLOAT64_C( -834.44)), SIMDE_MM_FROUND_TO_NEG_INF, simde_mm_set_pd(SIMDE_FLOAT64_C( 706.26), SIMDE_FLOAT64_C( -835.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -784.90), SIMDE_FLOAT64_C( -810.73)), simde_mm_set_pd(SIMDE_FLOAT64_C( -554.44), SIMDE_FLOAT64_C( -463.60)), SIMDE_MM_FROUND_TO_POS_INF, simde_mm_set_pd(SIMDE_FLOAT64_C( -784.90), SIMDE_FLOAT64_C( -463.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -997.83), SIMDE_FLOAT64_C( 6.91)), simde_mm_set_pd(SIMDE_FLOAT64_C( -51.24), SIMDE_FLOAT64_C( -801.09)), SIMDE_MM_FROUND_TO_POS_INF, simde_mm_set_pd(SIMDE_FLOAT64_C( -997.83), SIMDE_FLOAT64_C( -801.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -237.53), SIMDE_FLOAT64_C( 215.82)), simde_mm_set_pd(SIMDE_FLOAT64_C( -338.32), SIMDE_FLOAT64_C( -289.00)), SIMDE_MM_FROUND_TO_ZERO, simde_mm_set_pd(SIMDE_FLOAT64_C( -237.53), SIMDE_FLOAT64_C( -289.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 964.37), SIMDE_FLOAT64_C( -304.03)), simde_mm_set_pd(SIMDE_FLOAT64_C( 237.14), SIMDE_FLOAT64_C( 621.16)), SIMDE_MM_FROUND_TO_ZERO, simde_mm_set_pd(SIMDE_FLOAT64_C( 964.37), SIMDE_FLOAT64_C( 621.00)) } }; for (size_t i = 0 ; i < 2 ; i++) { simde__m128d r = simde_mm_round_sd(test_vec[i].a, test_vec[i].b, SIMDE_MM_FROUND_TO_NEAREST_INT); simde_assert_m128d_close(r, test_vec[i].r, 1); } for (size_t i = 2 ; i < 4 ; i++) { simde__m128d r = simde_mm_round_sd(test_vec[i].a, test_vec[i].b, SIMDE_MM_FROUND_TO_NEG_INF); simde_assert_m128d_close(r, test_vec[i].r, 1); } for (size_t i = 4 ; i < 6 ; i++) { simde__m128d r = simde_mm_round_sd(test_vec[i].a, test_vec[i].b, SIMDE_MM_FROUND_TO_POS_INF); simde_assert_m128d_close(r, test_vec[i].r, 1); } for (size_t i = 6 ; i < 8 ; i++) { simde__m128d r = simde_mm_round_sd(test_vec[i].a, test_vec[i].b, SIMDE_MM_FROUND_TO_ZERO); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_round_ss(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; int rounding; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -963.12), SIMDE_FLOAT32_C( -638.96), SIMDE_FLOAT32_C( -855.75), SIMDE_FLOAT32_C( 789.49)), simde_mm_set_ps(SIMDE_FLOAT32_C( 969.82), SIMDE_FLOAT32_C( 14.36), SIMDE_FLOAT32_C( 60.33), SIMDE_FLOAT32_C( -666.61)), SIMDE_MM_FROUND_TO_NEAREST_INT, simde_mm_set_ps(SIMDE_FLOAT32_C( -963.12), SIMDE_FLOAT32_C( -638.96), SIMDE_FLOAT32_C( -855.75), SIMDE_FLOAT32_C( -667.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -53.45), SIMDE_FLOAT32_C( 397.58), SIMDE_FLOAT32_C( 386.64), SIMDE_FLOAT32_C( 779.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( 497.83), SIMDE_FLOAT32_C( -657.35), SIMDE_FLOAT32_C( -712.13), SIMDE_FLOAT32_C( 600.92)), SIMDE_MM_FROUND_TO_NEAREST_INT, simde_mm_set_ps(SIMDE_FLOAT32_C( -53.45), SIMDE_FLOAT32_C( 397.58), SIMDE_FLOAT32_C( 386.64), SIMDE_FLOAT32_C( 601.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -527.69), SIMDE_FLOAT32_C( -487.47), SIMDE_FLOAT32_C( -52.23), SIMDE_FLOAT32_C( 18.71)), simde_mm_set_ps(SIMDE_FLOAT32_C( -912.96), SIMDE_FLOAT32_C( 897.40), SIMDE_FLOAT32_C( -873.59), SIMDE_FLOAT32_C( 52.18)), SIMDE_MM_FROUND_TO_NEG_INF, simde_mm_set_ps(SIMDE_FLOAT32_C( -527.69), SIMDE_FLOAT32_C( -487.47), SIMDE_FLOAT32_C( -52.23), SIMDE_FLOAT32_C( 52.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 857.48), SIMDE_FLOAT32_C( -849.82), SIMDE_FLOAT32_C( 971.09), SIMDE_FLOAT32_C( -929.13)), simde_mm_set_ps(SIMDE_FLOAT32_C( 871.04), SIMDE_FLOAT32_C( 284.36), SIMDE_FLOAT32_C( 561.82), SIMDE_FLOAT32_C( -146.68)), SIMDE_MM_FROUND_TO_NEG_INF, simde_mm_set_ps(SIMDE_FLOAT32_C( 857.48), SIMDE_FLOAT32_C( -849.82), SIMDE_FLOAT32_C( 971.09), SIMDE_FLOAT32_C( -147.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -859.81), SIMDE_FLOAT32_C( -105.89), SIMDE_FLOAT32_C( -18.60), SIMDE_FLOAT32_C( -214.59)), simde_mm_set_ps(SIMDE_FLOAT32_C( -360.47), SIMDE_FLOAT32_C( 472.18), SIMDE_FLOAT32_C( 960.99), SIMDE_FLOAT32_C( -396.53)), SIMDE_MM_FROUND_TO_POS_INF, simde_mm_set_ps(SIMDE_FLOAT32_C( -859.81), SIMDE_FLOAT32_C( -105.89), SIMDE_FLOAT32_C( -18.60), SIMDE_FLOAT32_C( -396.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -657.82), SIMDE_FLOAT32_C( -922.27), SIMDE_FLOAT32_C( -130.03), SIMDE_FLOAT32_C( 877.75)), simde_mm_set_ps(SIMDE_FLOAT32_C( -413.60), SIMDE_FLOAT32_C( 824.69), SIMDE_FLOAT32_C( 124.72), SIMDE_FLOAT32_C( 312.34)), SIMDE_MM_FROUND_TO_POS_INF, simde_mm_set_ps(SIMDE_FLOAT32_C( -657.82), SIMDE_FLOAT32_C( -922.27), SIMDE_FLOAT32_C( -130.03), SIMDE_FLOAT32_C( 313.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -636.42), SIMDE_FLOAT32_C( -978.69), SIMDE_FLOAT32_C( 787.73), SIMDE_FLOAT32_C( -705.33)), simde_mm_set_ps(SIMDE_FLOAT32_C( -179.55), SIMDE_FLOAT32_C( 391.06), SIMDE_FLOAT32_C( -805.63), SIMDE_FLOAT32_C( 831.10)), SIMDE_MM_FROUND_TO_ZERO, simde_mm_set_ps(SIMDE_FLOAT32_C( -636.42), SIMDE_FLOAT32_C( -978.69), SIMDE_FLOAT32_C( 787.73), SIMDE_FLOAT32_C( 831.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 529.08), SIMDE_FLOAT32_C( -665.00), SIMDE_FLOAT32_C( 786.13), SIMDE_FLOAT32_C( 843.11)), simde_mm_set_ps(SIMDE_FLOAT32_C( -295.93), SIMDE_FLOAT32_C( -148.90), SIMDE_FLOAT32_C( 483.10), SIMDE_FLOAT32_C( -790.07)), SIMDE_MM_FROUND_TO_ZERO, simde_mm_set_ps(SIMDE_FLOAT32_C( 529.08), SIMDE_FLOAT32_C( -665.00), SIMDE_FLOAT32_C( 786.13), SIMDE_FLOAT32_C( -790.00)) } }; for (size_t i = 0 ; i < 2 ; i++) { simde__m128 r = simde_mm_round_ss(test_vec[i].a, test_vec[i].b, SIMDE_MM_FROUND_TO_NEAREST_INT); simde_assert_m128_close(r, test_vec[i].r, 1); } for (size_t i = 2 ; i < 4 ; i++) { simde__m128 r = simde_mm_round_ss(test_vec[i].a, test_vec[i].b, SIMDE_MM_FROUND_TO_NEG_INF); simde_assert_m128_close(r, test_vec[i].r, 1); } for (size_t i = 4 ; i < 6 ; i++) { simde__m128 r = simde_mm_round_ss(test_vec[i].a, test_vec[i].b, SIMDE_MM_FROUND_TO_POS_INF); simde_assert_m128_close(r, test_vec[i].r, 1); } for (size_t i = 6 ; i < 8 ; i++) { simde__m128 r = simde_mm_round_ss(test_vec[i].a, test_vec[i].b, SIMDE_MM_FROUND_TO_ZERO); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_stream_load_si128(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( -547904967), INT32_C(-1362990942), INT32_C( 2066137598), INT32_C(-1778922116)), simde_mm_set_epi32(INT32_C( -547904967), INT32_C(-1362990942), INT32_C( 2066137598), INT32_C(-1778922116)) }, { simde_mm_set_epi32(INT32_C(-2087163583), INT32_C( 2115433203), INT32_C(-1978815624), INT32_C( 1025228414)), simde_mm_set_epi32(INT32_C(-2087163583), INT32_C( 2115433203), INT32_C(-1978815624), INT32_C( 1025228414)) }, { simde_mm_set_epi32(INT32_C(-1244490055), INT32_C( 1943548537), INT32_C( 589236062), INT32_C( -751161899)), simde_mm_set_epi32(INT32_C(-1244490055), INT32_C( 1943548537), INT32_C( 589236062), INT32_C( -751161899)) }, { simde_mm_set_epi32(INT32_C(-1274215918), INT32_C(-1858225286), INT32_C( -472727069), INT32_C( 528679049)), simde_mm_set_epi32(INT32_C(-1274215918), INT32_C(-1858225286), INT32_C( -472727069), INT32_C( 528679049)) }, { simde_mm_set_epi32(INT32_C(-1568552575), INT32_C( 411221897), INT32_C(-1334506552), INT32_C( 1554165859)), simde_mm_set_epi32(INT32_C(-1568552575), INT32_C( 411221897), INT32_C(-1334506552), INT32_C( 1554165859)) }, { simde_mm_set_epi32(INT32_C( 1044144940), INT32_C(-1916851863), INT32_C( -589120926), INT32_C( 651795910)), simde_mm_set_epi32(INT32_C( 1044144940), INT32_C(-1916851863), INT32_C( -589120926), INT32_C( 651795910)) }, { simde_mm_set_epi32(INT32_C( -25575503), INT32_C(-1782121708), INT32_C( 751836326), INT32_C( 1005598033)), simde_mm_set_epi32(INT32_C( -25575503), INT32_C(-1782121708), INT32_C( 751836326), INT32_C( 1005598033)) }, { simde_mm_set_epi32(INT32_C(-1873195901), INT32_C( -450900536), INT32_C( 904584108), INT32_C( 251416593)), simde_mm_set_epi32(INT32_C(-1873195901), INT32_C( -450900536), INT32_C( 904584108), INT32_C( 251416593)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_NATIVE_ALIASES_TESTING) simde__m128i r = simde_mm_stream_load_si128((__m128i*)&(test_vec[i].a)); #else simde__m128i r = simde_mm_stream_load_si128(&(test_vec[i].a)); #endif simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_test_all_ones(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; int r; } test_vec[8] = { { simde_x_mm_set_epu32(~UINT32_C( 0), UINT32_C(1993838502), UINT32_C(1216384299), UINT32_C(2484321284)), 0 }, { simde_x_mm_set_epu32(UINT32_C( 708002161), ~UINT32_C( 0), UINT32_C(1641125312), UINT32_C(1784735378)), 0 }, { simde_x_mm_set_epu32(UINT32_C(3153393974), UINT32_C(3299961049), ~UINT32_C( 0), UINT32_C( 256198680)), 0 }, { simde_x_mm_set_epu32(UINT32_C(3219291087), UINT32_C(2995285646), UINT32_C(3353269087), ~UINT32_C( 0)), 0 }, { simde_x_mm_set_epu32(~UINT32_C( 0), UINT32_C(1590943288), UINT32_C(4263015539), UINT32_C(1245312939)), 0 }, { simde_x_mm_set_epu32(~UINT32_C( 0), ~UINT32_C( 0), UINT32_C( 453365074), UINT32_C( 94225678)), 0 }, { simde_x_mm_set_epu32(~UINT32_C( 0), ~UINT32_C( 0), ~UINT32_C( 0), UINT32_C(3354153743)), 0 }, { simde_x_mm_set_epu32(~UINT32_C( 0), ~UINT32_C( 0), ~UINT32_C( 0), ~UINT32_C( 0)), 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_test_all_ones(test_vec[i].a); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_test_all_zeros(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; int r; } test_vec[8] = { { simde_x_mm_set_epu32(UINT32_C(1201894317), UINT32_C(2319231887), UINT32_C(1402528928), UINT32_C(3687328034)), simde_x_mm_set_epu32(UINT32_C(1967137742), UINT32_C(1472851836), UINT32_C(2181608867), UINT32_C(1889365185)), 0 }, { simde_x_mm_set_epu32(UINT32_C(2135448036), UINT32_C(1619848109), UINT32_C(2289839916), UINT32_C(3523984647)), simde_x_mm_set_epu32(UINT32_C(2046676780), UINT32_C(1214034345), UINT32_C( 106141409), UINT32_C(4219215983)), 0 }, { simde_x_mm_set_epu32(UINT32_C( 657893603), UINT32_C(3270868697), UINT32_C(1314977284), UINT32_C(2544509951)), simde_x_mm_set_epu32(UINT32_C(2424312328), UINT32_C( 687898658), UINT32_C(2425031026), UINT32_C( 672223232)), 1 }, { simde_x_mm_set_epu32(UINT32_C(3537640743), UINT32_C( 372152333), UINT32_C(4133234814), UINT32_C( 396143667)), simde_x_mm_set_epu32(UINT32_C( 671105736), UINT32_C(3909093360), UINT32_C( 8553473), UINT32_C( 541283144)), 1 }, { simde_x_mm_set_epu32(UINT32_C(2182578541), UINT32_C(4064427053), UINT32_C(2602515508), UINT32_C(4049235221)), simde_x_mm_set_epu32(UINT32_C( 203975314), UINT32_C( 227393856), UINT32_C( 536871690), UINT32_C( 75760138)), 1 }, { simde_x_mm_set_epu32(UINT32_C( 152354605), UINT32_C(2176274914), UINT32_C(3011079004), UINT32_C(3847789567)), simde_x_mm_set_epu32(UINT32_C(1562291537), UINT32_C( 354162251), UINT32_C(1134881489), UINT32_C(2956892685)), 0 }, { simde_x_mm_set_epu32(UINT32_C(2096532328), UINT32_C(1165454747), UINT32_C(3275710596), UINT32_C(1593733565)), simde_x_mm_set_epu32(UINT32_C(1988174269), UINT32_C(1339821769), UINT32_C(1810810088), UINT32_C( 840755548)), 0 }, { simde_x_mm_set_epu32(UINT32_C(3588833624), UINT32_C( 610042197), UINT32_C(1310204993), UINT32_C(1509811379)), simde_x_mm_set_epu32(UINT32_C( 571869223), UINT32_C(1235317762), UINT32_C( 29362474), UINT32_C( 570561100)), 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_test_all_zeros(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_test_mix_ones_zeros(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; int r; } test_vec[8] = { { simde_x_mm_set_epu32(UINT32_C( 817555810), UINT32_C(3902491734), UINT32_C(3977937498), UINT32_C(2772762421)), simde_x_mm_set_epu32(UINT32_C(1924103348), UINT32_C(1357836992), UINT32_C(4064253767), UINT32_C(3196943489)), 1 }, { simde_x_mm_set_epu32(UINT32_C(1969146297), UINT32_C(3716202658), UINT32_C( 759743671), UINT32_C(2926294225)), simde_x_mm_set_epu32(UINT32_C(2325820998), UINT32_C( 578764637), UINT32_C(3535223624), UINT32_C(1368673070)), 0 }, { simde_x_mm_set_epu32(UINT32_C(3794246328), UINT32_C(2295817460), UINT32_C(2963593754), UINT32_C(2249033119)), simde_x_mm_set_epu32(UINT32_C(1810964868), UINT32_C(3773017844), UINT32_C(1790481610), UINT32_C(4104297207)), 1 }, { simde_x_mm_set_epu32(UINT32_C(3721359930), UINT32_C(2092677361), UINT32_C(2672746089), UINT32_C(2784499405)), simde_x_mm_set_epu32(UINT32_C( 573607365), UINT32_C(2202289934), UINT32_C(1622221206), UINT32_C(1510467890)), 0 }, { simde_x_mm_set_epu32(UINT32_C(1688049548), UINT32_C( 227731129), UINT32_C(2187006143), UINT32_C(2191704962)), simde_x_mm_set_epu32(UINT32_C(3739754335), UINT32_C(1136006144), UINT32_C(2036111421), UINT32_C(3950298731)), 1 }, { simde_x_mm_set_epu32(UINT32_C(2420605154), UINT32_C(1034465439), UINT32_C( 270210360), UINT32_C(1245331438)), simde_x_mm_set_epu32(UINT32_C(1874362141), UINT32_C(3260501856), UINT32_C(4024756935), UINT32_C(3049635857)), 0 }, { simde_x_mm_set_epu32(UINT32_C(2202152506), UINT32_C( 285372976), UINT32_C(4113851867), UINT32_C(1775650196)), simde_x_mm_set_epu32(UINT32_C(2951443467), UINT32_C(1206063205), UINT32_C(2278810499), UINT32_C(1561806132)), 1 }, { simde_x_mm_set_epu32(UINT32_C(2386514979), UINT32_C(2103226597), UINT32_C( 427456035), UINT32_C(2075844151)), simde_x_mm_set_epu32(UINT32_C(2028307641), UINT32_C(3795980391), UINT32_C(1321164258), UINT32_C(1543009672)), 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_test_mix_ones_zeros(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_testc_si128(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; int r; } test_vec[8] = { { simde_x_mm_set_epu32(UINT32_C(2830619595), UINT32_C(3721899845), UINT32_C(1506484780), UINT32_C(3754634237)), simde_x_mm_set_epu32(UINT32_C( 135762121), UINT32_C(1485021188), UINT32_C( 66080), UINT32_C(1279789969)), 1 }, { simde_x_mm_set_epu32(UINT32_C( 193385482), UINT32_C(2729396552), UINT32_C(3295558360), UINT32_C(1810466520)), simde_x_mm_set_epu32(UINT32_C(2126857072), UINT32_C(2130569152), UINT32_C( 589776977), UINT32_C(3538187411)), 0 }, { simde_x_mm_set_epu32(UINT32_C( 786883487), UINT32_C(1935834902), UINT32_C(1176572533), UINT32_C(2648243675)), simde_x_mm_set_epu32(UINT32_C( 782262300), UINT32_C(1617050388), UINT32_C( 33620084), UINT32_C( 294672713)), 1 }, { simde_x_mm_set_epu32(UINT32_C(1041307471), UINT32_C( 45744950), UINT32_C(1982345789), UINT32_C(3152632048)), simde_x_mm_set_epu32(UINT32_C(1387031193), UINT32_C( 524221074), UINT32_C(2963552394), UINT32_C( 26167124)), 0 }, { simde_x_mm_set_epu32(UINT32_C(4096311220), UINT32_C(1013684968), UINT32_C(3348640547), UINT32_C(2194828263)), simde_x_mm_set_epu32(UINT32_C(3288368404), UINT32_C( 873007168), UINT32_C( 127402785), UINT32_C( 43000416)), 1 }, { simde_x_mm_set_epu32(UINT32_C(3620270306), UINT32_C( 102683939), UINT32_C(1921801133), UINT32_C(4285709000)), simde_x_mm_set_epu32(UINT32_C(2218828002), UINT32_C( 67671297), UINT32_C(1384665613), UINT32_C(2030217920)), 1 }, { simde_x_mm_set_epu32(UINT32_C( 948424904), UINT32_C(1962159425), UINT32_C(1599818052), UINT32_C( 324039513)), simde_x_mm_set_epu32(UINT32_C( 671486144), UINT32_C(1888759104), UINT32_C(1191182592), UINT32_C( 319836488)), 1 }, { simde_x_mm_set_epu32(UINT32_C(2926329185), UINT32_C(3270382597), UINT32_C( 628918622), UINT32_C(3097066353)), simde_x_mm_set_epu32(UINT32_C(2282240352), UINT32_C(3265794053), UINT32_C( 71305480), UINT32_C( 948529457)), 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_testc_si128(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_testnzc_si128(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; int r; } test_vec[8] = { { simde_x_mm_set_epu32(UINT32_C(1875425415), UINT32_C(3970564046), UINT32_C(3151364481), UINT32_C(3646821095)), simde_x_mm_set_epu32(UINT32_C( 939391786), UINT32_C(1418582693), UINT32_C(3583579146), UINT32_C(2978542122)), 1 }, { simde_x_mm_set_epu32(UINT32_C(1948733162), UINT32_C(3713693944), UINT32_C(2694213253), UINT32_C(1663204587)), simde_x_mm_set_epu32(UINT32_C(1140998794), UINT32_C(1141393520), UINT32_C(2685534720), UINT32_C(1124213826)), 0 }, { simde_x_mm_set_epu32(UINT32_C(1383702507), UINT32_C(4170492791), UINT32_C( 81283149), UINT32_C(2572004949)), simde_x_mm_set_epu32(UINT32_C(1383112482), UINT32_C( 1049365), UINT32_C( 67110920), UINT32_C( 33301)), 0 }, { simde_x_mm_set_epu32(UINT32_C( 122650173), UINT32_C(3420397620), UINT32_C(2510350310), UINT32_C(1202770589)), simde_x_mm_set_epu32(UINT32_C(3675302012), UINT32_C( 291962269), UINT32_C(1109447557), UINT32_C( 739260950)), 1 }, { simde_x_mm_set_epu32(UINT32_C(1579102285), UINT32_C( 270245457), UINT32_C(1078977019), UINT32_C(2085912657)), simde_x_mm_set_epu32(UINT32_C(1242112064), UINT32_C( 1606145), UINT32_C( 688393), UINT32_C( 402685968)), 0 }, { simde_x_mm_set_epu32(UINT32_C(3394408789), UINT32_C(2660724232), UINT32_C(2317277326), UINT32_C(3204989336)), simde_x_mm_set_epu32(UINT32_C(1819319362), UINT32_C( 329857933), UINT32_C(3391127410), UINT32_C( 743662239)), 1 }, { simde_x_mm_set_epu32(UINT32_C(4125624985), UINT32_C( 2864942), UINT32_C( 875515634), UINT32_C(2091317004)), simde_x_mm_set_epu32(UINT32_C( 617089160), UINT32_C( 112418), UINT32_C( 606079728), UINT32_C(1881276672)), 0 }, { simde_x_mm_set_epu32(UINT32_C(2421997128), UINT32_C(3685823722), UINT32_C(2588454762), UINT32_C(2067412407)), simde_x_mm_set_epu32(UINT32_C(3926740437), UINT32_C( 589344338), UINT32_C(2958265721), UINT32_C(1121386306)), 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_testnzc_si128(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_testz_si128(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; int r; } test_vec[8] = { { simde_x_mm_set_epu32(UINT32_C(3990889899), UINT32_C(4230789057), UINT32_C(3808461042), UINT32_C(3371427410)), simde_x_mm_set_epu32(UINT32_C( 331792), UINT32_C( 63127558), UINT32_C( 483990789), UINT32_C( 530828)), 1 }, { simde_x_mm_set_epu32(UINT32_C(3603350831), UINT32_C(2431793515), UINT32_C(1875486201), UINT32_C(3530533257)), simde_x_mm_set_epu32(UINT32_C( 19933776), UINT32_C(1174733824), UINT32_C(2147880964), UINT32_C( 8393234)), 1 }, { simde_x_mm_set_epu32(UINT32_C(3124097223), UINT32_C(4066616999), UINT32_C(2076362673), UINT32_C( 124456258)), simde_x_mm_set_epu32(UINT32_C(1170213928), UINT32_C( 84416520), UINT32_C(2150893634), UINT32_C(3499413545)), 1 }, { simde_x_mm_set_epu32(UINT32_C(4156796004), UINT32_C(1994002854), UINT32_C(4034407880), UINT32_C( 600089084)), simde_x_mm_set_epu32(UINT32_C(2971079954), UINT32_C(3997808651), UINT32_C( 222740062), UINT32_C(3989806580)), 0 }, { simde_x_mm_set_epu32(UINT32_C(4130552440), UINT32_C( 812777701), UINT32_C(2016424386), UINT32_C( 886379222)), simde_x_mm_set_epu32(UINT32_C( 70177905), UINT32_C(2046022589), UINT32_C( 670901459), UINT32_C(2978865170)), 0 }, { simde_x_mm_set_epu32(UINT32_C( 192532312), UINT32_C(1467133872), UINT32_C(1075139299), UINT32_C( 191167596)), simde_x_mm_set_epu32(UINT32_C(1026534956), UINT32_C(4130843248), UINT32_C(3619306010), UINT32_C(3598027302)), 0 }, { simde_x_mm_set_epu32(UINT32_C(2410417876), UINT32_C( 74289906), UINT32_C(4031840239), UINT32_C(2837881625)), simde_x_mm_set_epu32(UINT32_C( 542327083), UINT32_C( 680656909), UINT32_C( 184549392), UINT32_C(1421938882)), 1 }, { simde_x_mm_set_epu32(UINT32_C(3782150825), UINT32_C(3325635017), UINT32_C(1617333560), UINT32_C(3634437083)), simde_x_mm_set_epu32(UINT32_C( 167825730), UINT32_C( 294047748), UINT32_C( 126906945), UINT32_C( 17884164)), 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_testz_si128(test_vec[i].a, test_vec[i].b); simde_assert_equal_i(r, test_vec[i].r); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm_blend_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_blend_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_blend_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_blendv_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_blendv_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_blendv_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_blendv_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_blendv_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_blendv_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_ceil_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_ceil_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_ceil_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_ceil_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpeq_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtepi8_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtepi8_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtepi8_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtepu8_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtepu8_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtepu8_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtepi16_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtepi16_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtepu16_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtepu16_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtepi32_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtepu32_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_dp_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_dp_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_extract_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_extract_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_extract_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_extract_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_floor_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_floor_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_floor_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_floor_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_insert_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_insert_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_insert_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_insert_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_max_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_max_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_max_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_max_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_min_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_min_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_min_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_min_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_minpos_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mpsadbw_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mul_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mullo_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_mullo_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_packus_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_round_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_round_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_round_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_round_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_round_ps_nearest) SIMDE_TEST_FUNC_LIST_ENTRY(mm_round_ps_ninf) SIMDE_TEST_FUNC_LIST_ENTRY(mm_round_ps_pinf) SIMDE_TEST_FUNC_LIST_ENTRY(mm_round_ps_zero) SIMDE_TEST_FUNC_LIST_ENTRY(mm_stream_load_si128) SIMDE_TEST_FUNC_LIST_ENTRY(mm_test_all_ones) SIMDE_TEST_FUNC_LIST_ENTRY(mm_test_all_zeros) SIMDE_TEST_FUNC_LIST_ENTRY(mm_test_mix_ones_zeros) SIMDE_TEST_FUNC_LIST_ENTRY(mm_testc_si128) SIMDE_TEST_FUNC_LIST_ENTRY(mm_testnzc_si128) SIMDE_TEST_FUNC_LIST_ENTRY(mm_testz_si128) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/sse4.2.c000066400000000000000000001612221400333146700154140ustar00rootroot00000000000000/* Copyright (c) 2017 Evan Nemerson * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define SIMDE_TESTS_CURRENT_ISAX sse4_2 #include #include static int test_simde_mm_cmpestrs_8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; int la; simde__m128i b; int lb; int r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C(-105), INT8_C(-116), INT8_C( -45), INT8_C(-102), INT8_C( -3), INT8_C( 92), INT8_C( -99), INT8_C( 100), INT8_C( 30), INT8_C(-115), INT8_C( 82), INT8_C( 84), INT8_C(-106), INT8_C( 66), INT8_C(-107), INT8_C( 116)), 0 , simde_mm_set_epi8(INT8_C( -89), INT8_C( 65), INT8_C( 68), INT8_C( -29), INT8_C(-101), INT8_C( 113), INT8_C( -11), INT8_C( 53), INT8_C( -5), INT8_C( -76), INT8_C( 28), INT8_C(-120), INT8_C( 64), INT8_C( 43), INT8_C(-127), INT8_C( -44)), 2 , 1 }, { simde_mm_set_epi8(INT8_C( 103), INT8_C( 89), INT8_C( 106), INT8_C( -90), INT8_C( 18), INT8_C( 23), INT8_C( 117), INT8_C( 6), INT8_C( -91), INT8_C( -40), INT8_C( 108), INT8_C(-127), INT8_C( -29), INT8_C( -39), INT8_C( 49), INT8_C( -85)), 5 , simde_mm_set_epi8(INT8_C(-104), INT8_C( 100), INT8_C( -73), INT8_C( -23), INT8_C( -48), INT8_C( 87), INT8_C(-118), INT8_C( 66), INT8_C( -75), INT8_C( 35), INT8_C( -1), INT8_C( 111), INT8_C( -30), INT8_C( -6), INT8_C( 10), INT8_C( 91)), 10 , 1 }, { simde_mm_set_epi8(INT8_C( 84), INT8_C( 21), INT8_C( 91), INT8_C( -41), INT8_C( 25), INT8_C( -24), INT8_C( 93), INT8_C(-124), INT8_C( -97), INT8_C( -88), INT8_C( 113), INT8_C( 85), INT8_C( 42), INT8_C( -93), INT8_C( -37), INT8_C( -18)), 8 , simde_mm_set_epi8(INT8_C( 117), INT8_C( -42), INT8_C(-112), INT8_C( -67), INT8_C( -7), INT8_C( -85), INT8_C( -4), INT8_C( 125), INT8_C(-127), INT8_C( -75), INT8_C(-125), INT8_C( 109), INT8_C( 50), INT8_C( -16), INT8_C( 22), INT8_C( 86)), 12 , 1 }, { simde_mm_set_epi8(INT8_C( 109), INT8_C( 78), INT8_C( 15), INT8_C( 113), INT8_C(-118), INT8_C( -55), INT8_C(-119), INT8_C( -4), INT8_C( 29), INT8_C( 32), INT8_C(-107), INT8_C(-117), INT8_C( 79), INT8_C( 29), INT8_C( 126), INT8_C( -75)), 16 , simde_mm_set_epi8(INT8_C( -7), INT8_C( 48), INT8_C( 112), INT8_C( -3), INT8_C( 35), INT8_C( -21), INT8_C( -53), INT8_C(-114), INT8_C( -78), INT8_C( -5), INT8_C( -11), INT8_C( 91), INT8_C( 53), INT8_C( -34), INT8_C( -19), INT8_C( 11)), 8 , 0 }, { simde_mm_set_epi8(INT8_C( 39), INT8_C( 98), INT8_C( -40), INT8_C( -94), INT8_C( -37), INT8_C( -39), INT8_C( -6), INT8_C( -18), INT8_C( -44), INT8_C( 119), INT8_C( -96), INT8_C( 81), INT8_C(-117), INT8_C(-126), INT8_C( 94), INT8_C( -52)), 0 , simde_mm_set_epi8(INT8_C( 52), INT8_C( -46), INT8_C( -6), INT8_C( -85), INT8_C( 63), INT8_C( 85), INT8_C( -29), INT8_C( -39), INT8_C( -42), INT8_C( 92), INT8_C( -15), INT8_C( -6), INT8_C( -75), INT8_C( -86), INT8_C( -68), INT8_C( 108)), 3 , 1 }, { simde_mm_set_epi8(INT8_C( 60), INT8_C( -84), INT8_C( 55), INT8_C( 82), INT8_C( -32), INT8_C( -86), INT8_C( -19), INT8_C( 6), INT8_C( -73), INT8_C( -96), INT8_C( 56), INT8_C(-116), INT8_C( 40), INT8_C( -91), INT8_C( -58), INT8_C( -53)), 5 , simde_mm_set_epi8(INT8_C(-125), INT8_C(-121), INT8_C( 94), INT8_C( -81), INT8_C( 51), INT8_C( -18), INT8_C( 57), INT8_C( 114), INT8_C( 65), INT8_C( 21), INT8_C( 1), INT8_C( 122), INT8_C( -29), INT8_C( -17), INT8_C( 114), INT8_C( 17)), 6 , 1 }, { simde_mm_set_epi8(INT8_C( 7), INT8_C(-112), INT8_C(-109), INT8_C( 25), INT8_C( 65), INT8_C( 3), INT8_C( 18), INT8_C( -17), INT8_C(-117), INT8_C( -64), INT8_C( 123), INT8_C( 112), INT8_C( -54), INT8_C( -32), INT8_C( -28), INT8_C( -54)), 2 , simde_mm_set_epi8(INT8_C( 20), INT8_C( -94), INT8_C( -95), INT8_C( -11), INT8_C( -10), INT8_C( 45), INT8_C( -14), INT8_C(-103), INT8_C(-109), INT8_C(-101), INT8_C( 112), INT8_C( -4), INT8_C( 62), INT8_C(-110), INT8_C( 100), INT8_C( 78)), 14 , 1 }, { simde_mm_set_epi8(INT8_C( 94), INT8_C( -96), INT8_C( -41), INT8_C(-127), INT8_C( 109), INT8_C( -92), INT8_C( 60), INT8_C( 85), INT8_C( -80), INT8_C( -69), INT8_C( -10), INT8_C( 113), INT8_C( -86), INT8_C( 12), INT8_C( -11), INT8_C( 93)), 0 , simde_mm_set_epi8(INT8_C( -1), INT8_C( -87), INT8_C( -78), INT8_C( 26), INT8_C( 30), INT8_C( 110), INT8_C( -36), INT8_C( 70), INT8_C(-126), INT8_C( -29), INT8_C( -65), INT8_C( -41), INT8_C( -71), INT8_C( 1), INT8_C( 121), INT8_C(-119)), 10 , 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_cmpestrs(test_vec[i].a, test_vec[i].la, test_vec[i].b, test_vec[i].lb, 0); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpestrs_16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; int la; simde__m128i b; int lb; int r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C(-26740), INT16_C(-11366), INT16_C( -676), INT16_C(-25244), INT16_C( 7821), INT16_C( 21076), INT16_C(-27070), INT16_C(-27276)), 6 , simde_mm_set_epi16(INT16_C(-22719), INT16_C( 17635), INT16_C(-25743), INT16_C( -2763), INT16_C( -1100), INT16_C( 7304), INT16_C( 16427), INT16_C(-32300)), 7 , 1 }, { simde_mm_set_epi16(INT16_C( 26457), INT16_C( 27302), INT16_C( 4631), INT16_C( 29958), INT16_C(-23080), INT16_C( 27777), INT16_C( -7207), INT16_C( 12715)), 5 , simde_mm_set_epi16(INT16_C(-26524), INT16_C(-18455), INT16_C(-12201), INT16_C(-30142), INT16_C(-19165), INT16_C( -145), INT16_C( -7430), INT16_C( 2651)), 7 , 1 }, { simde_mm_set_epi16(INT16_C( 21525), INT16_C( 23511), INT16_C( 6632), INT16_C( 23940), INT16_C(-24664), INT16_C( 29013), INT16_C( 10915), INT16_C( -9234)), 5 , simde_mm_set_epi16(INT16_C( 30166), INT16_C(-28483), INT16_C( -1621), INT16_C( -899), INT16_C(-32331), INT16_C(-31891), INT16_C( 13040), INT16_C( 5718)), 6 , 1 }, { simde_mm_set_epi16(INT16_C( 27982), INT16_C( 3953), INT16_C(-30007), INT16_C(-30212), INT16_C( 7456), INT16_C(-27253), INT16_C( 20253), INT16_C( 32437)), 7 , simde_mm_set_epi16(INT16_C( -1744), INT16_C( 28925), INT16_C( 9195), INT16_C(-13426), INT16_C(-19717), INT16_C( -2725), INT16_C( 13790), INT16_C( -4853)), 5 , 1 }, { simde_mm_set_epi16(INT16_C( 10082), INT16_C(-10078), INT16_C( -9255), INT16_C( -1298), INT16_C(-11145), INT16_C(-24495), INT16_C(-29822), INT16_C( 24268)), 2 , simde_mm_set_epi16(INT16_C( 13522), INT16_C( -1365), INT16_C( 16213), INT16_C( -7207), INT16_C(-10660), INT16_C( -3590), INT16_C(-19030), INT16_C(-17300)), 7 , 1 }, { simde_mm_set_epi16(INT16_C( 15532), INT16_C( 14162), INT16_C( -8022), INT16_C( -4858), INT16_C(-18528), INT16_C( 14476), INT16_C( 10405), INT16_C(-14645)), 7 , simde_mm_set_epi16(INT16_C(-31865), INT16_C( 24239), INT16_C( 13294), INT16_C( 14706), INT16_C( 16661), INT16_C( 378), INT16_C( -7185), INT16_C( 29201)), 4 , 1 }, { simde_mm_set_epi16(INT16_C( 1936), INT16_C(-27879), INT16_C( 16643), INT16_C( 4847), INT16_C(-29760), INT16_C( 31600), INT16_C(-13600), INT16_C( -6966)), 7 , simde_mm_set_epi16(INT16_C( 5282), INT16_C(-24075), INT16_C( -2515), INT16_C( -3431), INT16_C(-27749), INT16_C( 28924), INT16_C( 16018), INT16_C( 25678)), 0 , 1 }, { simde_mm_set_epi16(INT16_C( 24224), INT16_C(-10367), INT16_C( 28068), INT16_C( 15445), INT16_C(-20293), INT16_C( -2447), INT16_C(-22004), INT16_C( -2723)), 7 , simde_mm_set_epi16(INT16_C( -87), INT16_C(-19942), INT16_C( 7790), INT16_C( -9146), INT16_C(-32029), INT16_C(-16425), INT16_C(-18175), INT16_C( 31113)), 2 , 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_cmpestrs(test_vec[i].a, test_vec[i].la, test_vec[i].b, test_vec[i].lb, 1); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpestrz_8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; int la; simde__m128i b; int lb; int r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( 91), INT8_C( 17), INT8_C( -35), INT8_C( -83), INT8_C( 65), INT8_C( -69), INT8_C( -33), INT8_C( -2), INT8_C( -59), INT8_C( -56), INT8_C( -20), INT8_C(-124), INT8_C( -68), INT8_C( -33), INT8_C( -98), INT8_C( 119)), 9 , simde_mm_set_epi8(INT8_C( -24), INT8_C( 33), INT8_C( 95), INT8_C( 8), INT8_C( 67), INT8_C( -46), INT8_C( 123), INT8_C( -89), INT8_C( -36), INT8_C( 19), INT8_C( -12), INT8_C( 108), INT8_C( 70), INT8_C( -86), INT8_C( 125), INT8_C( 63)), 9 , 1 }, { simde_mm_set_epi8(INT8_C( 31), INT8_C( -36), INT8_C( 70), INT8_C( -37), INT8_C( 120), INT8_C( 70), INT8_C( 10), INT8_C( 73), INT8_C( 94), INT8_C( -22), INT8_C( 117), INT8_C(-123), INT8_C( -97), INT8_C( -97), INT8_C( 94), INT8_C( -19)), 15 , simde_mm_set_epi8(INT8_C(-111), INT8_C( 66), INT8_C( -59), INT8_C( 54), INT8_C( 102), INT8_C(-108), INT8_C(-128), INT8_C(-104), INT8_C( 81), INT8_C( 46), INT8_C(-110), INT8_C( 86), INT8_C( 82), INT8_C( 23), INT8_C( -59), INT8_C( 19)), 1 , 1 }, { simde_mm_set_epi8(INT8_C( 100), INT8_C( 86), INT8_C( 40), INT8_C( -10), INT8_C( -78), INT8_C( 38), INT8_C( 31), INT8_C( 81), INT8_C(-107), INT8_C( 114), INT8_C( 112), INT8_C( 93), INT8_C(-101), INT8_C( 10), INT8_C( 0), INT8_C(-128)), 6 , simde_mm_set_epi8(INT8_C( -95), INT8_C( 81), INT8_C( -72), INT8_C( -74), INT8_C( -66), INT8_C(-106), INT8_C( 76), INT8_C( -42), INT8_C(-123), INT8_C( -44), INT8_C(-103), INT8_C( 44), INT8_C( -40), INT8_C( 125), INT8_C( -32), INT8_C(-115)), 6 , 1 }, { simde_mm_set_epi8(INT8_C( 40), INT8_C( -63), INT8_C( 76), INT8_C( 45), INT8_C(-113), INT8_C( -94), INT8_C( -5), INT8_C( -14), INT8_C( -18), INT8_C( 63), INT8_C( -52), INT8_C( -78), INT8_C(-108), INT8_C( 41), INT8_C( 7), INT8_C( 43)), 0 , simde_mm_set_epi8(INT8_C( -66), INT8_C( 82), INT8_C( 59), INT8_C( 48), INT8_C( 110), INT8_C( 49), INT8_C( 62), INT8_C( -91), INT8_C( -57), INT8_C( 18), INT8_C( 30), INT8_C( 38), INT8_C( -3), INT8_C( -35), INT8_C( -6), INT8_C( -54)), 1 , 1 }, { simde_mm_set_epi8(INT8_C( 76), INT8_C( 37), INT8_C( -49), INT8_C( -67), INT8_C( 68), INT8_C(-123), INT8_C( 61), INT8_C( -77), INT8_C( 82), INT8_C( 19), INT8_C( 13), INT8_C( -91), INT8_C( -17), INT8_C( 115), INT8_C( -42), INT8_C(-127)), 7 , simde_mm_set_epi8(INT8_C( -99), INT8_C( -9), INT8_C( -89), INT8_C( 91), INT8_C(-125), INT8_C( -63), INT8_C( 83), INT8_C( 47), INT8_C( 61), INT8_C(-124), INT8_C( -87), INT8_C( -5), INT8_C( 94), INT8_C( -25), INT8_C( -16), INT8_C( -76)), 6 , 1 }, { simde_mm_set_epi8(INT8_C( -34), INT8_C( -22), INT8_C( -14), INT8_C( -6), INT8_C( -18), INT8_C( 91), INT8_C( -8), INT8_C( 121), INT8_C( 119), INT8_C( 123), INT8_C( 80), INT8_C( 126), INT8_C( -31), INT8_C( -48), INT8_C( 62), INT8_C( -34)), 11 , simde_mm_set_epi8(INT8_C( 31), INT8_C( -81), INT8_C( -83), INT8_C( 83), INT8_C( -41), INT8_C( 100), INT8_C( 3), INT8_C(-110), INT8_C( 111), INT8_C(-115), INT8_C( -38), INT8_C( 116), INT8_C( 30), INT8_C( 34), INT8_C( 109), INT8_C( 42)), 0 , 1 }, { simde_mm_set_epi8(INT8_C( -33), INT8_C(-111), INT8_C( -19), INT8_C(-122), INT8_C( -36), INT8_C( -20), INT8_C( 35), INT8_C( 47), INT8_C(-115), INT8_C( -67), INT8_C( 0), INT8_C( -15), INT8_C( -72), INT8_C( -50), INT8_C( -50), INT8_C( -72)), 8 , simde_mm_set_epi8(INT8_C(-110), INT8_C(-118), INT8_C( 33), INT8_C( 44), INT8_C( 69), INT8_C( -27), INT8_C( -37), INT8_C( -9), INT8_C( 64), INT8_C( -92), INT8_C( 60), INT8_C( 108), INT8_C( 106), INT8_C( 83), INT8_C( -30), INT8_C( 83)), 2 , 1 }, { simde_mm_set_epi8(INT8_C( 77), INT8_C(-108), INT8_C( 64), INT8_C( 98), INT8_C( -64), INT8_C( 49), INT8_C( -82), INT8_C( 37), INT8_C( 71), INT8_C( 88), INT8_C(-109), INT8_C( -84), INT8_C( 109), INT8_C( -36), INT8_C( -4), INT8_C( -89)), 3 , simde_mm_set_epi8(INT8_C( -71), INT8_C( -17), INT8_C( -84), INT8_C( 102), INT8_C( 127), INT8_C( 91), INT8_C( -22), INT8_C( 87), INT8_C( 2), INT8_C(-127), INT8_C( -31), INT8_C(-119), INT8_C( 31), INT8_C( -5), INT8_C( 114), INT8_C( -61)), 6 , 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_cmpestrz(test_vec[i].a, test_vec[i].la, test_vec[i].b, test_vec[i].lb, 0); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpestrz_16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; int la; simde__m128i b; int lb; int r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C( 23313), INT16_C( -8787), INT16_C( 16827), INT16_C( -8194), INT16_C(-14904), INT16_C( -4988), INT16_C(-17185), INT16_C(-24969)), 8 , simde_mm_set_epi16(INT16_C( -6111), INT16_C( 24328), INT16_C( 17362), INT16_C( 31655), INT16_C( -9197), INT16_C( -2964), INT16_C( 18090), INT16_C( 32063)), 5 , 1 }, { simde_mm_set_epi16(INT16_C( 8156), INT16_C( 18139), INT16_C( 30790), INT16_C( 2633), INT16_C( 24298), INT16_C( 30085), INT16_C(-24673), INT16_C( 24301)), 0 , simde_mm_set_epi16(INT16_C(-28350), INT16_C(-15050), INT16_C( 26260), INT16_C(-32616), INT16_C( 20782), INT16_C(-28074), INT16_C( 21015), INT16_C(-15085)), 2 , 1 }, { simde_mm_set_epi16(INT16_C( 25686), INT16_C( 10486), INT16_C(-19930), INT16_C( 8017), INT16_C(-27278), INT16_C( 28765), INT16_C(-25846), INT16_C( 128)), 8 , simde_mm_set_epi16(INT16_C(-24239), INT16_C(-18250), INT16_C(-16746), INT16_C( 19670), INT16_C(-31276), INT16_C(-26324), INT16_C(-10115), INT16_C( -8051)), 1 , 1 }, { simde_mm_set_epi16(INT16_C( 10433), INT16_C( 19501), INT16_C(-28766), INT16_C( -1038), INT16_C( -4545), INT16_C(-13134), INT16_C(-27607), INT16_C( 1835)), 4 , simde_mm_set_epi16(INT16_C(-16814), INT16_C( 15152), INT16_C( 28209), INT16_C( 16037), INT16_C(-14574), INT16_C( 7718), INT16_C( -547), INT16_C( -1334)), 3 , 1 }, { simde_mm_set_epi16(INT16_C( 19493), INT16_C(-12355), INT16_C( 17541), INT16_C( 15795), INT16_C( 21011), INT16_C( 3493), INT16_C( -4237), INT16_C(-10623)), 2 , simde_mm_set_epi16(INT16_C(-25097), INT16_C(-22693), INT16_C(-31807), INT16_C( 21295), INT16_C( 15748), INT16_C(-22021), INT16_C( 24295), INT16_C( -3916)), 3 , 1 }, { simde_mm_set_epi16(INT16_C( -8470), INT16_C( -3334), INT16_C( -4517), INT16_C( -1927), INT16_C( 30587), INT16_C( 20606), INT16_C( -7728), INT16_C( 16094)), 2 , simde_mm_set_epi16(INT16_C( 8111), INT16_C(-21165), INT16_C(-10396), INT16_C( 914), INT16_C( 28557), INT16_C( -9612), INT16_C( 7714), INT16_C( 27946)), 5 , 1 }, { simde_mm_set_epi16(INT16_C( -8303), INT16_C( -4730), INT16_C( -8980), INT16_C( 9007), INT16_C(-29251), INT16_C( 241), INT16_C(-18226), INT16_C(-12616)), 0 , simde_mm_set_epi16(INT16_C(-28022), INT16_C( 8492), INT16_C( 17893), INT16_C( -9225), INT16_C( 16548), INT16_C( 15468), INT16_C( 27219), INT16_C( -7597)), 4 , 1 }, { simde_mm_set_epi16(INT16_C( 19860), INT16_C( 16482), INT16_C(-16335), INT16_C(-20955), INT16_C( 18264), INT16_C(-27732), INT16_C( 28124), INT16_C( -857)), 1 , simde_mm_set_epi16(INT16_C(-17937), INT16_C(-21402), INT16_C( 32603), INT16_C( -5545), INT16_C( 641), INT16_C( -7799), INT16_C( 8187), INT16_C( 29379)), 0 , 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_cmpestrz(test_vec[i].a, test_vec[i].la, test_vec[i].b, test_vec[i].lb, 1); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpgt_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi64x(INT64_C(-2149331112921330677), INT64_C( 3189038460188560982)), simde_mm_set_epi64x(INT64_C( -996047549682722220), INT64_C(-3995545326609437904)), simde_mm_set_epi64x( INT64_C(0), ~INT64_C(0)) }, { simde_mm_set_epi64x(INT64_C( 3213898448913237846), INT64_C( 9188286366666087308)), simde_mm_set_epi64x(INT64_C( 2918885787365950970), INT64_C( 6780053140456787494)), simde_mm_set_epi64x(~INT64_C(0), ~INT64_C(0)) }, { simde_mm_set_epi64x(INT64_C(-6480415937191367948), INT64_C( 6434069133602920016)), simde_mm_set_epi64x(INT64_C( 8054577307931165184), INT64_C( 2226222084862743618)), simde_mm_set_epi64x( INT64_C(0), ~INT64_C(0)) }, { simde_mm_set_epi64x(INT64_C(-6197561420805751907), INT64_C( 4778870285233423339)), simde_mm_set_epi64x(INT64_C( 1839658993612937599), INT64_C( -902367911293731861)), simde_mm_set_epi64x( INT64_C(0), ~INT64_C(0)) }, { simde_mm_set_epi64x(INT64_C( 5091127324004768664), INT64_C(-2002251908801446460)), simde_mm_set_epi64x(INT64_C(-9056506211008935561), INT64_C(-6487933609077704174)), simde_mm_set_epi64x(~INT64_C(0), ~INT64_C(0)) }, { simde_mm_set_epi64x(INT64_C(-4743149223868910453), INT64_C(-4137271544350199785)), simde_mm_set_epi64x(INT64_C( 4762909370147937560), INT64_C( 6560801355595049799)), simde_mm_set_epi64x( INT64_C(0), INT64_C(0)) }, { simde_mm_set_epi64x(INT64_C( 913044205052582612), INT64_C(-2362244502684338485)), simde_mm_set_epi64x(INT64_C( -603710511502052754), INT64_C(-3179203207537477667)), simde_mm_set_epi64x(~INT64_C(0), ~INT64_C(0)) }, { simde_mm_set_epi64x(INT64_C( 6753725813089147170), INT64_C( 7031124288307654085)), simde_mm_set_epi64x(INT64_C( 5046765831366456160), INT64_C( 6981054579474564569)), simde_mm_set_epi64x(~INT64_C(0), ~INT64_C(0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_cmpgt_epi64(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_cmpistrs_8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; int r; } test_vec[] = { { simde_mm_set_epi8(INT8_C( 25), INT8_C( 54), INT8_C( -66), INT8_C( -16), INT8_C( 66), INT8_C(-116), INT8_C( -35), INT8_C( 78), INT8_C( 107), INT8_C( 11), INT8_C(-110), INT8_C( 90), INT8_C( -2), INT8_C(-109), INT8_C( -34), INT8_C( 53)), simde_mm_set_epi8(INT8_C( -86), INT8_C(-125), INT8_C( -30), INT8_C( 1), INT8_C( 69), INT8_C( -79), INT8_C( -16), INT8_C( 34), INT8_C( 73), INT8_C( 71), INT8_C( -50), INT8_C( -27), INT8_C( -56), INT8_C(-106), INT8_C( -90), INT8_C( 104)), 0 }, { simde_mm_set_epi8(INT8_C(0), INT8_C( -93), INT8_C( -2), INT8_C( -97), INT8_C(-117), INT8_C( -46), INT8_C(-107), INT8_C(-101), INT8_C(-104), INT8_C( -97), INT8_C(-123), INT8_C( -15), INT8_C( 101), INT8_C( 123), INT8_C(-123), INT8_C( -2)), simde_mm_set_epi8(INT8_C( -6), INT8_C( 9), INT8_C( 43), INT8_C(-128), INT8_C( -64), INT8_C( 71), INT8_C( -48), INT8_C( 11), INT8_C( 61), INT8_C( -61), INT8_C( 55), INT8_C(-108), INT8_C( 95), INT8_C( -26), INT8_C( -76), INT8_C( 92)), 1 }, { simde_mm_set_epi8(INT8_C( 74), INT8_C(0), INT8_C( 48), INT8_C( 106), INT8_C( -25), INT8_C( 49), INT8_C( -66), INT8_C( 38), INT8_C( -18), INT8_C(-127), INT8_C( 20), INT8_C( -68), INT8_C( 117), INT8_C(-114), INT8_C( 113), INT8_C( -43)), simde_mm_set_epi8(INT8_C( 19), INT8_C( 27), INT8_C( 69), INT8_C( 3), INT8_C( 75), INT8_C( -73), INT8_C( 19), INT8_C( -16), INT8_C( -20), INT8_C( -75), INT8_C( -47), INT8_C( -90), INT8_C(-126), INT8_C( 82), INT8_C( -85), INT8_C( 65)), 1 }, { simde_mm_set_epi8(INT8_C( -36), INT8_C(-128), INT8_C( 0), INT8_C( -37), INT8_C(-116), INT8_C( 107), INT8_C( -26), INT8_C(-121), INT8_C( -65), INT8_C( 100), INT8_C( 78), INT8_C( 8), INT8_C(-100), INT8_C( -73), INT8_C( -59), INT8_C( -67)), simde_mm_set_epi8(INT8_C(-124), INT8_C( -83), INT8_C( -63), INT8_C( -32), INT8_C( 28), INT8_C( 100), INT8_C( 27), INT8_C( 38), INT8_C( -55), INT8_C( 20), INT8_C( -89), INT8_C( -37), INT8_C( 91), INT8_C( 56), INT8_C( -14), INT8_C( -98)), 1 }, { simde_mm_set_epi8(INT8_C(-111), INT8_C( -83), INT8_C( 125), INT8_C( 0), INT8_C( 53), INT8_C( 48), INT8_C( -61), INT8_C( -87), INT8_C( 65), INT8_C( 121), INT8_C( 71), INT8_C( 10), INT8_C( 118), INT8_C( -63), INT8_C( -96), INT8_C( 9)), simde_mm_set_epi8(INT8_C( -41), INT8_C( -1), INT8_C( -57), INT8_C( 113), INT8_C( 101), INT8_C( 39), INT8_C( 86), INT8_C( 5), INT8_C( 19), INT8_C( -8), INT8_C( 110), INT8_C( 44), INT8_C(-100), INT8_C( -52), INT8_C(-126), INT8_C( -3)), 1 }, { simde_mm_set_epi8(INT8_C( -18), INT8_C( 10), INT8_C( 22), INT8_C( -30), INT8_C( 0), INT8_C( 75), INT8_C( 26), INT8_C( 106), INT8_C( -59), INT8_C(-112), INT8_C( 62), INT8_C( 5), INT8_C( -4), INT8_C( -40), INT8_C( 68), INT8_C( 77)), simde_mm_set_epi8(INT8_C( -23), INT8_C( 71), INT8_C( 21), INT8_C(-100), INT8_C( 36), INT8_C( -96), INT8_C( -10), INT8_C( 20), INT8_C( -22), INT8_C( 110), INT8_C( 98), INT8_C( 67), INT8_C( 12), INT8_C( -74), INT8_C( -50), INT8_C( 32)), 1 }, { simde_mm_set_epi8(INT8_C( 106), INT8_C( -84), INT8_C( 30), INT8_C( 79), INT8_C( 124), INT8_C( 0), INT8_C( -53), INT8_C( -99), INT8_C( -15), INT8_C( 108), INT8_C( -91), INT8_C( 4), INT8_C( 21), INT8_C( 48), INT8_C( 29), INT8_C( -55)), simde_mm_set_epi8(INT8_C( 100), INT8_C( 100), INT8_C( 71), INT8_C( 90), INT8_C( -52), INT8_C( 119), INT8_C( -64), INT8_C(-104), INT8_C( 16), INT8_C( -98), INT8_C( 37), INT8_C( -2), INT8_C( -6), INT8_C( -12), INT8_C( 117), INT8_C( 87)), 1 }, { simde_mm_set_epi8(INT8_C( -55), INT8_C( 16), INT8_C( -12), INT8_C(-128), INT8_C( -68), INT8_C( 111), INT8_C(0), INT8_C(-117), INT8_C( 102), INT8_C( -52), INT8_C( -52), INT8_C( -25), INT8_C( -6), INT8_C( 112), INT8_C( 116), INT8_C( 39)), simde_mm_set_epi8(INT8_C( 29), INT8_C( -72), INT8_C( 47), INT8_C( 93), INT8_C( -90), INT8_C( 115), INT8_C( 36), INT8_C( -93), INT8_C( 106), INT8_C( -6), INT8_C( -91), INT8_C( 34), INT8_C( -44), INT8_C( -69), INT8_C( 123), INT8_C( 51)), 1 }, { simde_mm_set_epi8(INT8_C( -55), INT8_C( 16), INT8_C( -12), INT8_C(-128), INT8_C( -68), INT8_C( 111), INT8_C(10), INT8_C(0), INT8_C( 102), INT8_C( -52), INT8_C( -52), INT8_C( -25), INT8_C( -6), INT8_C( 112), INT8_C( 116), INT8_C( 39)), simde_mm_set_epi8(INT8_C( 29), INT8_C( -72), INT8_C( 47), INT8_C( 93), INT8_C( -90), INT8_C( 115), INT8_C( 36), INT8_C( -93), INT8_C( 106), INT8_C( -6), INT8_C( -91), INT8_C( 34), INT8_C( -44), INT8_C( -69), INT8_C( 123), INT8_C( 51)), 1 }, { simde_mm_set_epi8(INT8_C( -55), INT8_C( 16), INT8_C( -12), INT8_C(-128), INT8_C( -68), INT8_C( 111), INT8_C(10), INT8_C(-117), INT8_C( 0), INT8_C( -52), INT8_C( -52), INT8_C( -25), INT8_C( -6), INT8_C( 112), INT8_C( 116), INT8_C( 39)), simde_mm_set_epi8(INT8_C( 29), INT8_C( -72), INT8_C( 47), INT8_C( 93), INT8_C( -90), INT8_C( 115), INT8_C( 36), INT8_C( -93), INT8_C( 106), INT8_C( -6), INT8_C( -91), INT8_C( 34), INT8_C( -44), INT8_C( -69), INT8_C( 123), INT8_C( 51)), 1 }, { simde_mm_set_epi8(INT8_C( -55), INT8_C( 16), INT8_C( -12), INT8_C(-128), INT8_C( -68), INT8_C( 111), INT8_C(10), INT8_C(-117), INT8_C( 102), INT8_C( 0), INT8_C( -52), INT8_C( -25), INT8_C( -6), INT8_C( 112), INT8_C( 116), INT8_C( 39)), simde_mm_set_epi8(INT8_C( 29), INT8_C( -72), INT8_C( 47), INT8_C( 93), INT8_C( -90), INT8_C( 115), INT8_C( 36), INT8_C( -93), INT8_C( 106), INT8_C( -6), INT8_C( -91), INT8_C( 34), INT8_C( -44), INT8_C( -69), INT8_C( 123), INT8_C( 51)), 1 }, { simde_mm_set_epi8(INT8_C( -55), INT8_C( 16), INT8_C( -12), INT8_C(-128), INT8_C( -68), INT8_C( 111), INT8_C(10), INT8_C(-117), INT8_C( 102), INT8_C( -52), INT8_C( 0), INT8_C( -25), INT8_C( -6), INT8_C( 112), INT8_C( 116), INT8_C( 39)), simde_mm_set_epi8(INT8_C( 29), INT8_C( -72), INT8_C( 47), INT8_C( 93), INT8_C( -90), INT8_C( 115), INT8_C( 36), INT8_C( -93), INT8_C( 106), INT8_C( -6), INT8_C( -91), INT8_C( 34), INT8_C( -44), INT8_C( -69), INT8_C( 123), INT8_C( 51)), 1 }, { simde_mm_set_epi8(INT8_C( -55), INT8_C( 16), INT8_C( -12), INT8_C(-128), INT8_C( -68), INT8_C( 111), INT8_C(20), INT8_C(-117), INT8_C( 102), INT8_C( -52), INT8_C( -52), INT8_C( 0), INT8_C( -6), INT8_C( 112), INT8_C( 116), INT8_C( 39)), simde_mm_set_epi8(INT8_C( 29), INT8_C( -72), INT8_C( 47), INT8_C( 93), INT8_C( -90), INT8_C( 115), INT8_C( 36), INT8_C( -93), INT8_C( 106), INT8_C( -6), INT8_C( -91), INT8_C( 34), INT8_C( -44), INT8_C( -69), INT8_C( 123), INT8_C( 51)), 1 }, { simde_mm_set_epi8(INT8_C( -55), INT8_C( 16), INT8_C( -12), INT8_C(-128), INT8_C( -68), INT8_C( 111), INT8_C(50), INT8_C(-117), INT8_C( 102), INT8_C( -52), INT8_C( -52), INT8_C( -25), INT8_C( 0), INT8_C( 112), INT8_C( 116), INT8_C( 39)), simde_mm_set_epi8(INT8_C( 29), INT8_C( -72), INT8_C( 47), INT8_C( 93), INT8_C( -90), INT8_C( 115), INT8_C( 36), INT8_C( -93), INT8_C( 106), INT8_C( -6), INT8_C( -91), INT8_C( 34), INT8_C( -44), INT8_C( -69), INT8_C( 123), INT8_C( 51)), 1 }, { simde_mm_set_epi8(INT8_C( -55), INT8_C( 16), INT8_C( -12), INT8_C(-128), INT8_C( -68), INT8_C( 111), INT8_C(60), INT8_C(-117), INT8_C( 102), INT8_C( -52), INT8_C( -52), INT8_C( -25), INT8_C( -6), INT8_C( 0), INT8_C( 116), INT8_C( 39)), simde_mm_set_epi8(INT8_C( 29), INT8_C( -72), INT8_C( 47), INT8_C( 93), INT8_C( -90), INT8_C( 115), INT8_C( 36), INT8_C( -93), INT8_C( 106), INT8_C( -6), INT8_C( -91), INT8_C( 34), INT8_C( -44), INT8_C( -69), INT8_C( 123), INT8_C( 51)), 1 }, { simde_mm_set_epi8(INT8_C( -55), INT8_C( 16), INT8_C( -12), INT8_C(-128), INT8_C( -68), INT8_C( 111), INT8_C(70), INT8_C(-117), INT8_C( 102), INT8_C( -52), INT8_C( -52), INT8_C( -25), INT8_C( -6), INT8_C( 112), INT8_C( 0), INT8_C( 39)), simde_mm_set_epi8(INT8_C( 29), INT8_C( -72), INT8_C( 47), INT8_C( 93), INT8_C( -90), INT8_C( 115), INT8_C( 36), INT8_C( -93), INT8_C( 106), INT8_C( -6), INT8_C( -91), INT8_C( 34), INT8_C( -44), INT8_C( -69), INT8_C( 123), INT8_C( 51)), 1 }, { simde_mm_set_epi8(INT8_C( -55), INT8_C( 16), INT8_C( -12), INT8_C(-128), INT8_C( -68), INT8_C( 111), INT8_C(80), INT8_C(-117), INT8_C( 102), INT8_C( -52), INT8_C( -52), INT8_C( -25), INT8_C( -6), INT8_C( 112), INT8_C( 116), INT8_C( 0)), simde_mm_set_epi8(INT8_C( 29), INT8_C( -72), INT8_C( 47), INT8_C( 93), INT8_C( -90), INT8_C( 115), INT8_C( 36), INT8_C( -93), INT8_C( 106), INT8_C( -6), INT8_C( -91), INT8_C( 34), INT8_C( -44), INT8_C( -69), INT8_C( 123), INT8_C( 51)), 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_cmpistrs(test_vec[i].a, test_vec[i].b, 0); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpistrs_16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; int r; } test_vec[] = { { simde_mm_set_epi16(INT16_C( 6454), INT16_C(-16656), INT16_C( 17036), INT16_C( -8882), INT16_C( 27403), INT16_C(-28070), INT16_C( -365), INT16_C( -8651)), simde_mm_set_epi16(INT16_C(-21885), INT16_C( -7679), INT16_C( 17841), INT16_C( -4062), INT16_C( 18759), INT16_C(-12571), INT16_C(-14186), INT16_C(-22936)), 0 }, { simde_mm_set_epi16(INT16_C(0), INT16_C( -353), INT16_C(-29742), INT16_C(-27237), INT16_C(-26465), INT16_C(-31247), INT16_C( 25979), INT16_C(-31234)), simde_mm_set_epi16(INT16_C( -1527), INT16_C( 11136), INT16_C(-16313), INT16_C(-12277), INT16_C( 15811), INT16_C( 14228), INT16_C( 24550), INT16_C(-19364)), 1 }, { simde_mm_set_epi16(INT16_C( 19077), INT16_C( 0), INT16_C( -6351), INT16_C(-16858), INT16_C( -4479), INT16_C( 5308), INT16_C( 30094), INT16_C( 29141)), simde_mm_set_epi16(INT16_C( 4891), INT16_C( 17667), INT16_C( 19383), INT16_C( 5104), INT16_C( -4939), INT16_C(-11866), INT16_C(-32174), INT16_C(-21695)), 1 }, { simde_mm_set_epi16(INT16_C( -9088), INT16_C( -7717), INT16_C(0), INT16_C( -6521), INT16_C(-16540), INT16_C( 19976), INT16_C(-25417), INT16_C(-14915)), simde_mm_set_epi16(INT16_C(-31571), INT16_C(-15904), INT16_C( 7268), INT16_C( 6950), INT16_C(-14060), INT16_C(-22565), INT16_C( 23352), INT16_C( -3426)), 1 }, { simde_mm_set_epi16(INT16_C(-28243), INT16_C( 32023), INT16_C( 13616), INT16_C(0), INT16_C( 16761), INT16_C( 18186), INT16_C( 30401), INT16_C(-24567)), simde_mm_set_epi16(INT16_C(-10241), INT16_C(-14479), INT16_C( 25895), INT16_C( 22021), INT16_C( 5112), INT16_C( 28204), INT16_C(-25396), INT16_C(-32003)), 1 }, { simde_mm_set_epi16(INT16_C( -4608), INT16_C( 5858), INT16_C(-12725), INT16_C( 6762), INT16_C(0), INT16_C( 15877), INT16_C( -808), INT16_C( 17485)), simde_mm_set_epi16(INT16_C( -5817), INT16_C( 5532), INT16_C( 9376), INT16_C( -2540), INT16_C( -5522), INT16_C( 25155), INT16_C( 3254), INT16_C(-12768)), 1 }, { simde_mm_set_epi16(INT16_C( 27308), INT16_C( 7759), INT16_C( 31856), INT16_C(-13411), INT16_C( -3732), INT16_C(0), INT16_C( 5424), INT16_C( 7625)), simde_mm_set_epi16(INT16_C( 25700), INT16_C( 18266), INT16_C(-13193), INT16_C(-16232), INT16_C( 4254), INT16_C( 9726), INT16_C( -1292), INT16_C( 30039)), 1 }, { simde_mm_set_epi16(INT16_C(-14064), INT16_C( -2944), INT16_C(-17297), INT16_C(-26741), INT16_C( 26316), INT16_C(-13081), INT16_C( 0), INT16_C( 29735)), simde_mm_set_epi16(INT16_C( 7608), INT16_C( 12125), INT16_C(-22925), INT16_C( 9379), INT16_C( 27386), INT16_C(-23262), INT16_C(-11077), INT16_C( 31539)), 1 }, { simde_mm_set_epi16(INT16_C(-14064), INT16_C( -2944), INT16_C(-17297), INT16_C(-26741), INT16_C( 26316), INT16_C(-13081), INT16_C( 70), INT16_C( 0)), simde_mm_set_epi16(INT16_C( 7608), INT16_C( 12125), INT16_C(-22925), INT16_C( 9379), INT16_C( 27386), INT16_C(-23262), INT16_C(-11077), INT16_C( 31539)), 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_cmpistrs(test_vec[i].a, test_vec[i].b, 1); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpistrz_8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; int r; } test_vec[] = { { simde_mm_set_epi8(INT8_C( 1), INT8_C( 77), INT8_C( -64), INT8_C(-123), INT8_C( 49), INT8_C( -50), INT8_C( 60), INT8_C( 57), INT8_C( 64), INT8_C( -70), INT8_C( 56), INT8_C( -69), INT8_C(-103), INT8_C( -41), INT8_C( 82), INT8_C( -55)), simde_mm_set_epi8(INT8_C(-103), INT8_C( -36), INT8_C( -57), INT8_C( -38), INT8_C( 108), INT8_C( -48), INT8_C( -86), INT8_C( 99), INT8_C( 115), INT8_C( -51), INT8_C(-105), INT8_C( 30), INT8_C( 95), INT8_C( -27), INT8_C( 27), INT8_C(-118)), 0 }, { simde_mm_set_epi8(INT8_C( -20), INT8_C( -41), INT8_C( -11), INT8_C( 77), INT8_C( 7), INT8_C( -34), INT8_C( 46), INT8_C( -70), INT8_C( 58), INT8_C( 42), INT8_C( 57), INT8_C( 56), INT8_C( 69), INT8_C( -64), INT8_C(-121), INT8_C( 96)), simde_mm_set_epi8(INT8_C( 0), INT8_C( -55), INT8_C( -68), INT8_C( 5), INT8_C( 46), INT8_C( 24), INT8_C(-116), INT8_C( -73), INT8_C( 22), INT8_C( -42), INT8_C( -48), INT8_C( 76), INT8_C( 14), INT8_C( 67), INT8_C( 97), INT8_C(-116)), 1 }, { simde_mm_set_epi8(INT8_C( -50), INT8_C( 57), INT8_C( 48), INT8_C(-101), INT8_C( 3), INT8_C( 113), INT8_C( 104), INT8_C(-118), INT8_C( 74), INT8_C( -93), INT8_C( -56), INT8_C( 86), INT8_C( -14), INT8_C( -37), INT8_C( 55), INT8_C( -55)), simde_mm_set_epi8(INT8_C( 119), INT8_C( 0), INT8_C(-110), INT8_C( 99), INT8_C( 83), INT8_C( -37), INT8_C( -75), INT8_C( -18), INT8_C( 109), INT8_C( -9), INT8_C( 40), INT8_C( 86), INT8_C( -54), INT8_C( -27), INT8_C( -52), INT8_C( 75)), 1 }, { simde_mm_set_epi8(INT8_C(-109), INT8_C( 127), INT8_C( -99), INT8_C( -62), INT8_C( 99), INT8_C(-120), INT8_C( 41), INT8_C(-123), INT8_C( -92), INT8_C( 114), INT8_C( 53), INT8_C( 90), INT8_C( -5), INT8_C( -27), INT8_C( 98), INT8_C( -67)), simde_mm_set_epi8(INT8_C( 80), INT8_C( 26), INT8_C( 0), INT8_C(-117), INT8_C( -50), INT8_C( -38), INT8_C( -56), INT8_C( -22), INT8_C( 51), INT8_C( -76), INT8_C( 55), INT8_C( -49), INT8_C( 57), INT8_C( 60), INT8_C( -63), INT8_C(-107)), 1 }, { simde_mm_set_epi8(INT8_C( 21), INT8_C( 6), INT8_C( 94), INT8_C( 46), INT8_C( 20), INT8_C( -10), INT8_C( -62), INT8_C( -7), INT8_C( 32), INT8_C( -63), INT8_C( 113), INT8_C( -62), INT8_C( 0), INT8_C( 63), INT8_C( 77), INT8_C( -53)), simde_mm_set_epi8(INT8_C( 118), INT8_C( 90), INT8_C( 98), INT8_C(0), INT8_C( -82), INT8_C( 25), INT8_C( -11), INT8_C( 94), INT8_C( 100), INT8_C( 3), INT8_C(-109), INT8_C(-117), INT8_C( -61), INT8_C( 100), INT8_C(-120), INT8_C( -94)), 1 }, { simde_mm_set_epi8(INT8_C( 54), INT8_C( -82), INT8_C( 50), INT8_C( 20), INT8_C( -78), INT8_C( 25), INT8_C( -39), INT8_C( 113), INT8_C( -88), INT8_C( -49), INT8_C(-105), INT8_C( 11), INT8_C( 21), INT8_C( -81), INT8_C( -49), INT8_C( 113)), simde_mm_set_epi8(INT8_C( 7), INT8_C( -95), INT8_C( 34), INT8_C( -90), INT8_C( 0), INT8_C( 98), INT8_C( -10), INT8_C( 55), INT8_C( 125), INT8_C( 77), INT8_C( 23), INT8_C( 95), INT8_C( 75), INT8_C( 43), INT8_C( 52), INT8_C( 72)), 1 }, { simde_mm_set_epi8(INT8_C( -47), INT8_C( 15), INT8_C(-110), INT8_C( -19), INT8_C( -43), INT8_C( -27), INT8_C( 31), INT8_C( -52), INT8_C( 95), INT8_C( -61), INT8_C( 75), INT8_C( 103), INT8_C( -10), INT8_C( 24), INT8_C( 91), INT8_C( -50)), simde_mm_set_epi8(INT8_C(-116), INT8_C(-113), INT8_C( 47), INT8_C( -63), INT8_C( 35), INT8_C( 0), INT8_C( 63), INT8_C( 12), INT8_C( 7), INT8_C( 120), INT8_C( -97), INT8_C( 84), INT8_C( 125), INT8_C( -85), INT8_C(-110), INT8_C( -21)), 1 }, { simde_mm_set_epi8(INT8_C( 98), INT8_C( -51), INT8_C( 74), INT8_C( 114), INT8_C(-123), INT8_C( 80), INT8_C( 99), INT8_C( -50), INT8_C( 52), INT8_C( 86), INT8_C( -10), INT8_C( -16), INT8_C(-121), INT8_C( 99), INT8_C(-115), INT8_C( 124)), simde_mm_set_epi8(INT8_C( -84), INT8_C(-104), INT8_C( 72), INT8_C( -97), INT8_C( 90), INT8_C( -38), INT8_C( 0), INT8_C( -55), INT8_C(-118), INT8_C(-106), INT8_C(-109), INT8_C( 101), INT8_C( 87), INT8_C(-102), INT8_C( -96), INT8_C( -13)), 1 }, { simde_mm_set_epi8(INT8_C( 1), INT8_C( 77), INT8_C( -64), INT8_C(-123), INT8_C( 49), INT8_C( -50), INT8_C( 60), INT8_C( 57), INT8_C( 64), INT8_C( -70), INT8_C( 56), INT8_C( -69), INT8_C(-103), INT8_C( -41), INT8_C( 82), INT8_C( -55)), simde_mm_set_epi8(INT8_C(-103), INT8_C( -36), INT8_C( -57), INT8_C( -38), INT8_C( 108), INT8_C( -48), INT8_C( -86), INT8_C( 0), INT8_C( 115), INT8_C( -51), INT8_C(-105), INT8_C( 30), INT8_C( 95), INT8_C( -27), INT8_C( 27), INT8_C(-118)), 1 }, { simde_mm_set_epi8(INT8_C( 1), INT8_C( 77), INT8_C( -64), INT8_C(-123), INT8_C( 49), INT8_C( -50), INT8_C( 60), INT8_C( 57), INT8_C( 64), INT8_C( -70), INT8_C( 56), INT8_C( -69), INT8_C(-103), INT8_C( -41), INT8_C( 82), INT8_C( -55)), simde_mm_set_epi8(INT8_C(-103), INT8_C( -36), INT8_C( -57), INT8_C( -38), INT8_C( 108), INT8_C( -48), INT8_C( -86), INT8_C( 99), INT8_C( 0), INT8_C( -51), INT8_C(-105), INT8_C( 30), INT8_C( 95), INT8_C( -27), INT8_C( 27), INT8_C(-118)), 1 }, { simde_mm_set_epi8(INT8_C( 1), INT8_C( 77), INT8_C( -64), INT8_C(-123), INT8_C( 49), INT8_C( -50), INT8_C( 60), INT8_C( 57), INT8_C( 64), INT8_C( -70), INT8_C( 56), INT8_C( -69), INT8_C(-103), INT8_C( -41), INT8_C( 82), INT8_C( -55)), simde_mm_set_epi8(INT8_C(-103), INT8_C( -36), INT8_C( -57), INT8_C( -38), INT8_C( 108), INT8_C( -48), INT8_C( -86), INT8_C( 99), INT8_C( 115), INT8_C( 0), INT8_C(-105), INT8_C( 30), INT8_C( 95), INT8_C( -27), INT8_C( 27), INT8_C(-118)), 1 }, { simde_mm_set_epi8(INT8_C( 1), INT8_C( 77), INT8_C( -64), INT8_C(-123), INT8_C( 49), INT8_C( -50), INT8_C( 60), INT8_C( 57), INT8_C( 64), INT8_C( -70), INT8_C( 56), INT8_C( -69), INT8_C(-103), INT8_C( -41), INT8_C( 82), INT8_C( -55)), simde_mm_set_epi8(INT8_C(-103), INT8_C( -36), INT8_C( -57), INT8_C( -38), INT8_C( 108), INT8_C( -48), INT8_C( -86), INT8_C( 99), INT8_C( 115), INT8_C( -51), INT8_C(0), INT8_C( 30), INT8_C( 95), INT8_C( -27), INT8_C( 27), INT8_C(-118)), 1 }, { simde_mm_set_epi8(INT8_C( 1), INT8_C( 77), INT8_C( -64), INT8_C(-123), INT8_C( 49), INT8_C( -50), INT8_C( 60), INT8_C( 57), INT8_C( 64), INT8_C( -70), INT8_C( 56), INT8_C( -69), INT8_C(-103), INT8_C( -41), INT8_C( 82), INT8_C( -55)), simde_mm_set_epi8(INT8_C(-103), INT8_C( -36), INT8_C( -57), INT8_C( -38), INT8_C( 108), INT8_C( -48), INT8_C( -86), INT8_C( 99), INT8_C( 115), INT8_C( -51), INT8_C(-105), INT8_C( 0), INT8_C( 95), INT8_C( -27), INT8_C( 27), INT8_C(-118)), 1 }, { simde_mm_set_epi8(INT8_C( 1), INT8_C( 77), INT8_C( -64), INT8_C(-123), INT8_C( 49), INT8_C( -50), INT8_C( 60), INT8_C( 57), INT8_C( 64), INT8_C( -70), INT8_C( 56), INT8_C( -69), INT8_C(-103), INT8_C( -41), INT8_C( 82), INT8_C( -55)), simde_mm_set_epi8(INT8_C(-103), INT8_C( -36), INT8_C( -57), INT8_C( -38), INT8_C( 108), INT8_C( -48), INT8_C( -86), INT8_C( 99), INT8_C( 115), INT8_C( -51), INT8_C(-105), INT8_C( 30), INT8_C( 0), INT8_C( -27), INT8_C( 27), INT8_C(-118)), 1 }, { simde_mm_set_epi8(INT8_C( 1), INT8_C( 77), INT8_C( -64), INT8_C(-123), INT8_C( 49), INT8_C( -50), INT8_C( 60), INT8_C( 57), INT8_C( 64), INT8_C( -70), INT8_C( 56), INT8_C( -69), INT8_C(-103), INT8_C( -41), INT8_C( 82), INT8_C( -55)), simde_mm_set_epi8(INT8_C(-103), INT8_C( -36), INT8_C( -57), INT8_C( -38), INT8_C( 108), INT8_C( -48), INT8_C( -86), INT8_C( 99), INT8_C( 115), INT8_C( -51), INT8_C(-105), INT8_C( 30), INT8_C( 95), INT8_C( 0), INT8_C( 27), INT8_C(-118)), 1 }, { simde_mm_set_epi8(INT8_C( 1), INT8_C( 77), INT8_C( -64), INT8_C(-123), INT8_C( 49), INT8_C( -50), INT8_C( 60), INT8_C( 57), INT8_C( 64), INT8_C( -70), INT8_C( 56), INT8_C( -69), INT8_C(-103), INT8_C( -41), INT8_C( 82), INT8_C( -55)), simde_mm_set_epi8(INT8_C(-103), INT8_C( -36), INT8_C( -57), INT8_C( -38), INT8_C( 108), INT8_C( -48), INT8_C( -86), INT8_C( 99), INT8_C( 115), INT8_C( -51), INT8_C(-105), INT8_C( 30), INT8_C( 95), INT8_C( -27), INT8_C( 0), INT8_C(-118)), 1 }, { simde_mm_set_epi8(INT8_C( 1), INT8_C( 77), INT8_C( -64), INT8_C(-123), INT8_C( 49), INT8_C( -50), INT8_C( 60), INT8_C( 57), INT8_C( 64), INT8_C( -70), INT8_C( 56), INT8_C( -69), INT8_C(-103), INT8_C( -41), INT8_C( 82), INT8_C( -55)), simde_mm_set_epi8(INT8_C(-103), INT8_C( -36), INT8_C( -57), INT8_C( -38), INT8_C( 108), INT8_C( -48), INT8_C( -86), INT8_C( 99), INT8_C( 115), INT8_C( -51), INT8_C(-105), INT8_C( 30), INT8_C( 95), INT8_C( -27), INT8_C( 27), INT8_C(0)), 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_cmpistrz(test_vec[i].a, test_vec[i].b, 0); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_cmpistrz_16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; int r; } test_vec[] = { { simde_mm_set_epi16(INT16_C( 333), INT16_C(-16251), INT16_C( 12750), INT16_C( 15417), INT16_C( 16570), INT16_C( 14523), INT16_C(-26153), INT16_C( 21193)), simde_mm_set_epi16(INT16_C(-26148), INT16_C(-14374), INT16_C( 27856), INT16_C(-21917), INT16_C( 29645), INT16_C(-26850), INT16_C( 24549), INT16_C( 7050)), 0 }, { simde_mm_set_epi16(INT16_C( -4905), INT16_C( -2739), INT16_C( 2014), INT16_C( 11962), INT16_C( 14890), INT16_C( 14648), INT16_C( 17856), INT16_C(-30880)), simde_mm_set_epi16(INT16_C( 0), INT16_C(-17403), INT16_C( 11800), INT16_C(-29513), INT16_C( 5846), INT16_C(-12212), INT16_C( 3651), INT16_C( 24972)), 1 }, { simde_mm_set_epi16(INT16_C(-12743), INT16_C( 12443), INT16_C( 881), INT16_C( 26762), INT16_C( 19107), INT16_C(-14250), INT16_C( -3365), INT16_C( 14281)), simde_mm_set_epi16(INT16_C( 30693), INT16_C(0), INT16_C( 21467), INT16_C(-18962), INT16_C( 28151), INT16_C( 10326), INT16_C(-13595), INT16_C(-13237)), 1 }, { simde_mm_set_epi16(INT16_C(-27777), INT16_C(-25150), INT16_C( 25480), INT16_C( 10629), INT16_C(-23438), INT16_C( 13658), INT16_C( -1051), INT16_C( 25277)), simde_mm_set_epi16(INT16_C( 20506), INT16_C( 31627), INT16_C(0), INT16_C(-14102), INT16_C( 13236), INT16_C( 14287), INT16_C( 14652), INT16_C(-15979)), 1 }, { simde_mm_set_epi16(INT16_C( 5382), INT16_C( 24110), INT16_C( 5366), INT16_C(-15623), INT16_C( 8385), INT16_C( 29122), INT16_C( 63), INT16_C( 19915)), simde_mm_set_epi16(INT16_C( 30208), INT16_C( 25244), INT16_C(-20967), INT16_C( 0), INT16_C( 25603), INT16_C(-27765), INT16_C(-15516), INT16_C(-30558)), 1 }, { simde_mm_set_epi16(INT16_C( 13998), INT16_C( 12820), INT16_C(-19943), INT16_C( -9871), INT16_C(-22321), INT16_C(-26869), INT16_C( 5551), INT16_C(-12431)), simde_mm_set_epi16(INT16_C( 1953), INT16_C( 8870), INT16_C( -1694), INT16_C( -2505), INT16_C( 0), INT16_C( 5983), INT16_C( 19243), INT16_C( 13384)), 1 }, { simde_mm_set_epi16(INT16_C(-12017), INT16_C(-27923), INT16_C(-10779), INT16_C( 8140), INT16_C( 24515), INT16_C( 19303), INT16_C( -2536), INT16_C( 23502)), simde_mm_set_epi16(INT16_C(-29553), INT16_C( 12225), INT16_C( 9080), INT16_C( 16140), INT16_C( 1912), INT16_C(0), INT16_C( 32171), INT16_C(-27925)), 1 }, { simde_mm_set_epi16(INT16_C( 25293), INT16_C( 19058), INT16_C(-31408), INT16_C( 25550), INT16_C( 13398), INT16_C( -2320), INT16_C(-30877), INT16_C(-29316)), simde_mm_set_epi16(INT16_C(-21352), INT16_C( 18591), INT16_C( 23258), INT16_C(-24887), INT16_C(-30058), INT16_C(-27803), INT16_C( 0), INT16_C(-24333)), 1 }, { simde_mm_set_epi16(INT16_C( 25293), INT16_C( 19058), INT16_C(-31408), INT16_C( 25550), INT16_C( 13398), INT16_C( -2320), INT16_C(-30877), INT16_C(-29316)), simde_mm_set_epi16(INT16_C(-21352), INT16_C( 18591), INT16_C( 23258), INT16_C(-24887), INT16_C(-30058), INT16_C(-27803), INT16_C( 870), INT16_C(0)), 1 } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { int r = simde_mm_cmpistrz(test_vec[i].a, test_vec[i].b, 1); simde_assert_equal_i(r, test_vec[i].r); } return 0; } static int test_simde_mm_crc32_u8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { uint32_t crc; uint8_t v; uint32_t r; } test_vec[] = { { UINT32_C(3488119326), UINT8_C(233), UINT32_C( 661382116) }, { UINT32_C(4181338815), UINT8_C(106), UINT32_C(3873165213) }, { UINT32_C(3611029619), UINT8_C(190), UINT32_C(2087866855) }, { UINT32_C(3633137044), UINT8_C(206), UINT32_C( 975142830) }, { UINT32_C(3701195429), UINT8_C( 59), UINT32_C(1041029362) }, { UINT32_C(1574265292), UINT8_C( 54), UINT32_C(2563871276) }, { UINT32_C( 464550963), UINT8_C( 75), UINT32_C(4217027774) }, { UINT32_C(3547716249), UINT8_C(211), UINT32_C( 709509214) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint32_t crc = test_vec[i].crc; uint8_t v = test_vec[i].v; uint32_t r = simde_mm_crc32_u8(crc, v); simde_assert_equal_u32(r, test_vec[i].r); } return 0; } static int test_simde_mm_crc32_u16 (SIMDE_MUNIT_TEST_ARGS) { static const struct { uint32_t crc; uint16_t v; uint32_t r; } test_vec[] = { { UINT32_C( 728173782), UINT16_C(58051), UINT32_C( 765801584) }, { UINT32_C(2531395991), UINT16_C(57124), UINT32_C(2048446530) }, { UINT32_C( 297646163), UINT16_C( 4793), UINT32_C( 145203338) }, { UINT32_C(4018813906), UINT16_C( 4093), UINT32_C(1871435995) }, { UINT32_C(1176812284), UINT16_C(48677), UINT32_C(1916618632) }, { UINT32_C(1019935701), UINT16_C(36390), UINT32_C( 873790012) }, { UINT32_C( 26721567), UINT16_C(47956), UINT32_C(1883589466) }, { UINT32_C(2658379744), UINT16_C(11705), UINT32_C(2809192825) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint32_t crc = test_vec[i].crc; uint16_t v = test_vec[i].v; uint32_t r = simde_mm_crc32_u16(crc, v); simde_assert_equal_u32(r, test_vec[i].r); } return 0; } static int test_simde_mm_crc32_u32 (SIMDE_MUNIT_TEST_ARGS) { static const struct { uint32_t crc; uint32_t v; uint32_t r; } test_vec[] = { { UINT32_C(2436525653), UINT32_C(2335302948), UINT32_C(3283443050) }, { UINT32_C(1145760123), UINT32_C(3888075817), UINT32_C(1275307424) }, { UINT32_C(1404614118), UINT32_C(1676357820), UINT32_C(2140092727) }, { UINT32_C( 546365338), UINT32_C(2107344167), UINT32_C(3150313630) }, { UINT32_C( 386848243), UINT32_C( 899891386), UINT32_C(3310319573) }, { UINT32_C(1383787817), UINT32_C( 674838849), UINT32_C(4185068584) }, { UINT32_C(2877026799), UINT32_C(3155060257), UINT32_C(1654064964) }, { UINT32_C(1826397765), UINT32_C( 401176356), UINT32_C(1688688127) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint32_t crc = test_vec[i].crc; uint32_t v = test_vec[i].v; uint32_t r = simde_mm_crc32_u32(crc, v); simde_assert_equal_u32(r, test_vec[i].r); } return 0; } static int test_simde_mm_crc32_u64 (SIMDE_MUNIT_TEST_ARGS) { static const struct { uint64_t crc; uint64_t v; uint64_t r; } test_vec[] = { { UINT64_C(10964460371209988374), UINT64_C(14849487482734297659), UINT64_C( 2530609228) }, { UINT64_C(14906864906438122131), UINT64_C(10579630055528908036), UINT64_C( 2336937406) }, { UINT64_C( 8450238593151902479), UINT64_C(14846135117717324041), UINT64_C( 2389161291) }, { UINT64_C(15754071801993691947), UINT64_C(17187741549636385145), UINT64_C( 2628533589) }, { UINT64_C(17686444891285660866), UINT64_C(12477846746303524896), UINT64_C( 1813528429) }, { UINT64_C( 3308212454223314746), UINT64_C( 1686784245036627611), UINT64_C( 721365030) }, { UINT64_C( 157211343182889549), UINT64_C(14854147642213948918), UINT64_C( 1805070678) }, { UINT64_C( 7018798198485263495), UINT64_C( 9253000792826939901), UINT64_C( 1576406668) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { uint64_t crc = test_vec[i].crc; uint64_t v = test_vec[i].v; uint64_t r = simde_mm_crc32_u64(crc, v); simde_assert_equal_u64(r, test_vec[i].r); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpestrs_8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpestrs_16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpestrz_8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpestrz_16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpgt_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpistrs_8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpistrs_16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpistrz_8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpistrz_16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_crc32_u8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_crc32_u16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_crc32_u32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_crc32_u64) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/ssse3.c000066400000000000000000003760161400333146700154470ustar00rootroot00000000000000/* Copyright (c) 2017 Evan Nemerson * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define SIMDE_TESTS_CURRENT_ISAX ssse3 #include #include static int test_simde_mm_abs_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C(-128), INT8_C( 127), INT8_C( 0), INT8_C( -1), INT8_C( 125), INT8_C( -56), INT8_C(-120), INT8_C( -18), INT8_C( -45), INT8_C( 42), INT8_C( 62), INT8_C( -99), INT8_C( -57), INT8_C( 32), INT8_C( -68), INT8_C( 66)), simde_x_mm_set_epu8(UINT8_C( 128), UINT8_C( 127), UINT8_C( 0), UINT8_C( 1), UINT8_C( 125), UINT8_C( 56), UINT8_C( 120), UINT8_C( 18), UINT8_C( 45), UINT8_C( 42), UINT8_C( 62), UINT8_C( 99), UINT8_C( 57), UINT8_C( 32), UINT8_C( 68), UINT8_C( 66)) }, { simde_mm_set_epi8(INT8_C( 113), INT8_C( -60), INT8_C( 1), INT8_C( 32), INT8_C( 41), INT8_C( 40), INT8_C( 112), INT8_C( -39), INT8_C( -65), INT8_C( 54), INT8_C(-116), INT8_C( -97), INT8_C( -18), INT8_C( 78), INT8_C( -84), INT8_C( 94)), simde_x_mm_set_epu8(UINT8_C( 113), UINT8_C( 60), UINT8_C( 1), UINT8_C( 32), UINT8_C( 41), UINT8_C( 40), UINT8_C( 112), UINT8_C( 39), UINT8_C( 65), UINT8_C( 54), UINT8_C( 116), UINT8_C( 97), UINT8_C( 18), UINT8_C( 78), UINT8_C( 84), UINT8_C( 94)) }, { simde_mm_set_epi8(INT8_C( 126), INT8_C( -67), INT8_C( -75), INT8_C( 48), INT8_C( -49), INT8_C( -8), INT8_C( 105), INT8_C( -28), INT8_C(-100), INT8_C( -1), INT8_C( 112), INT8_C( -27), INT8_C( -35), INT8_C( 114), INT8_C( -81), INT8_C( 121)), simde_x_mm_set_epu8(UINT8_C( 126), UINT8_C( 67), UINT8_C( 75), UINT8_C( 48), UINT8_C( 49), UINT8_C( 8), UINT8_C( 105), UINT8_C( 28), UINT8_C( 100), UINT8_C( 1), UINT8_C( 112), UINT8_C( 27), UINT8_C( 35), UINT8_C( 114), UINT8_C( 81), UINT8_C( 121)) }, { simde_mm_set_epi8(INT8_C( 94), INT8_C( 74), INT8_C( 0), INT8_C( -58), INT8_C(-112), INT8_C( 29), INT8_C(-113), INT8_C( -48), INT8_C( 92), INT8_C( -26), INT8_C( -61), INT8_C( 19), INT8_C( -82), INT8_C( -78), INT8_C( -59), INT8_C( 102)), simde_x_mm_set_epu8(UINT8_C( 94), UINT8_C( 74), UINT8_C( 0), UINT8_C( 58), UINT8_C( 112), UINT8_C( 29), UINT8_C( 113), UINT8_C( 48), UINT8_C( 92), UINT8_C( 26), UINT8_C( 61), UINT8_C( 19), UINT8_C( 82), UINT8_C( 78), UINT8_C( 59), UINT8_C( 102)) }, { simde_mm_set_epi8(INT8_C( -47), INT8_C( 2), INT8_C( 14), INT8_C( 29), INT8_C( 46), INT8_C( 102), INT8_C(-121), INT8_C( 118), INT8_C( 113), INT8_C( 31), INT8_C( 96), INT8_C( -45), INT8_C( -4), INT8_C( 59), INT8_C( -14), INT8_C(-113)), simde_x_mm_set_epu8(UINT8_C( 47), UINT8_C( 2), UINT8_C( 14), UINT8_C( 29), UINT8_C( 46), UINT8_C( 102), UINT8_C( 121), UINT8_C( 118), UINT8_C( 113), UINT8_C( 31), UINT8_C( 96), UINT8_C( 45), UINT8_C( 4), UINT8_C( 59), UINT8_C( 14), UINT8_C( 113)) }, { simde_mm_set_epi8(INT8_C( -4), INT8_C( -36), INT8_C( -71), INT8_C( 103), INT8_C(-106), INT8_C( 36), INT8_C( -43), INT8_C( 119), INT8_C( 62), INT8_C( 74), INT8_C( 88), INT8_C( 28), INT8_C( 5), INT8_C( 31), INT8_C( -84), INT8_C( -65)), simde_x_mm_set_epu8(UINT8_C( 4), UINT8_C( 36), UINT8_C( 71), UINT8_C( 103), UINT8_C( 106), UINT8_C( 36), UINT8_C( 43), UINT8_C( 119), UINT8_C( 62), UINT8_C( 74), UINT8_C( 88), UINT8_C( 28), UINT8_C( 5), UINT8_C( 31), UINT8_C( 84), UINT8_C( 65)) }, { simde_mm_set_epi8(INT8_C( -76), INT8_C( 66), INT8_C(-116), INT8_C( 14), INT8_C( 42), INT8_C( -27), INT8_C( 102), INT8_C( 115), INT8_C( -18), INT8_C( 33), INT8_C( 48), INT8_C( 113), INT8_C( 64), INT8_C( 25), INT8_C(-128), INT8_C(-121)), simde_x_mm_set_epu8(UINT8_C( 76), UINT8_C( 66), UINT8_C( 116), UINT8_C( 14), UINT8_C( 42), UINT8_C( 27), UINT8_C( 102), UINT8_C( 115), UINT8_C( 18), UINT8_C( 33), UINT8_C( 48), UINT8_C( 113), UINT8_C( 64), UINT8_C( 25), UINT8_C( 128), UINT8_C( 121)) }, { simde_mm_set_epi8(INT8_C( 83), INT8_C( 29), INT8_C( -57), INT8_C(-122), INT8_C( -78), INT8_C( -6), INT8_C( 104), INT8_C( -66), INT8_C( -96), INT8_C( -79), INT8_C( -74), INT8_C( -82), INT8_C( -64), INT8_C( 4), INT8_C( 64), INT8_C( -63)), simde_x_mm_set_epu8(UINT8_C( 83), UINT8_C( 29), UINT8_C( 57), UINT8_C( 122), UINT8_C( 78), UINT8_C( 6), UINT8_C( 104), UINT8_C( 66), UINT8_C( 96), UINT8_C( 79), UINT8_C( 74), UINT8_C( 82), UINT8_C( 64), UINT8_C( 4), UINT8_C( 64), UINT8_C( 63)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_abs_epi8(test_vec[i].a); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_abs_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C(-32768), INT16_C( 32767), INT16_C(0), INT16_C(-1), INT16_C( 17002), INT16_C(-16782), INT16_C( 31724), INT16_C( 17895)), simde_x_mm_set_epu16(UINT16_C(32768), UINT16_C(32767), UINT16_C( 0), UINT16_C( 1), UINT16_C(17002), UINT16_C(16782), UINT16_C(31724), UINT16_C(17895)) }, { simde_mm_set_epi16(INT16_C( 516), INT16_C(-21666), INT16_C( 7667), INT16_C( 17413), INT16_C(-25469), INT16_C( 28873), INT16_C(-13553), INT16_C(-30319)), simde_x_mm_set_epu16(UINT16_C( 516), UINT16_C(21666), UINT16_C( 7667), UINT16_C(17413), UINT16_C(25469), UINT16_C(28873), UINT16_C(13553), UINT16_C(30319)) }, { simde_mm_set_epi16(INT16_C(-19076), INT16_C(-28644), INT16_C( -3095), INT16_C( -4676), INT16_C( 7446), INT16_C( -2630), INT16_C( 16197), INT16_C(-16562)), simde_x_mm_set_epu16(UINT16_C(19076), UINT16_C(28644), UINT16_C( 3095), UINT16_C( 4676), UINT16_C( 7446), UINT16_C( 2630), UINT16_C(16197), UINT16_C(16562)) }, { simde_mm_set_epi16(INT16_C( 17533), INT16_C( 20338), INT16_C( 8248), INT16_C( 6751), INT16_C( -3126), INT16_C( 26964), INT16_C( 14690), INT16_C(-25810)), simde_x_mm_set_epu16(UINT16_C(17533), UINT16_C(20338), UINT16_C( 8248), UINT16_C( 6751), UINT16_C( 3126), UINT16_C(26964), UINT16_C(14690), UINT16_C(25810)) }, { simde_mm_set_epi16(INT16_C( 18848), INT16_C( 6581), INT16_C(-32132), INT16_C( 14259), INT16_C( 20181), INT16_C( 8393), INT16_C( 8677), INT16_C( 24318)), simde_x_mm_set_epu16(UINT16_C(18848), UINT16_C( 6581), UINT16_C(32132), UINT16_C(14259), UINT16_C(20181), UINT16_C( 8393), UINT16_C( 8677), UINT16_C(24318)) }, { simde_mm_set_epi16(INT16_C(-16277), INT16_C(-19021), INT16_C( -2631), INT16_C( 6570), INT16_C( 17968), INT16_C(-24371), INT16_C(-26844), INT16_C( -2593)), simde_x_mm_set_epu16(UINT16_C(16277), UINT16_C(19021), UINT16_C( 2631), UINT16_C( 6570), UINT16_C(17968), UINT16_C(24371), UINT16_C(26844), UINT16_C( 2593)) }, { simde_mm_set_epi16(INT16_C( 23202), INT16_C(-30664), INT16_C( 14496), INT16_C(-10863), INT16_C(-12787), INT16_C( -4044), INT16_C( 13497), INT16_C( 6178)), simde_x_mm_set_epu16(UINT16_C(23202), UINT16_C(30664), UINT16_C(14496), UINT16_C(10863), UINT16_C(12787), UINT16_C( 4044), UINT16_C(13497), UINT16_C( 6178)) }, { simde_mm_set_epi16(INT16_C(-16084), INT16_C( 24093), INT16_C( -9776), INT16_C( 28468), INT16_C( -9561), INT16_C( -3016), INT16_C( -8976), INT16_C(-19890)), simde_x_mm_set_epu16(UINT16_C(16084), UINT16_C(24093), UINT16_C( 9776), UINT16_C(28468), UINT16_C( 9561), UINT16_C( 3016), UINT16_C( 8976), UINT16_C(19890)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_abs_epi16(test_vec[i].a); simde_assert_m128i_u16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_abs_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32( INT32_MIN , INT32_C(2147483647), INT32_C(0), INT32_C(-1)), simde_x_mm_set_epu32(UINT32_C(2147483648), UINT32_C(2147483647), UINT32_C(0), UINT32_C(1)) }, { simde_mm_set_epi32(INT32_C(-1840848639), INT32_C( 1050450514), INT32_C( -157155149), INT32_C(-1343526078)), simde_x_mm_set_epu32(UINT32_C(1840848639), UINT32_C(1050450514), UINT32_C( 157155149), UINT32_C(1343526078)) }, { simde_mm_set_epi32(INT32_C( 1334116049), INT32_C( 2129925302), INT32_C( 23778640), INT32_C( 713371303)), simde_x_mm_set_epu32(UINT32_C(1334116049), UINT32_C(2129925302), UINT32_C( 23778640), UINT32_C( 713371303)) }, { simde_mm_set_epi32(INT32_C( -302860244), INT32_C( 2030687021), INT32_C( 1060978877), INT32_C( -670900580)), simde_x_mm_set_epu32(UINT32_C( 302860244), UINT32_C(2030687021), UINT32_C(1060978877), UINT32_C( 670900580)) }, { simde_mm_set_epi32(INT32_C( 2040528386), INT32_C( 1361895717), INT32_C( 147208745), INT32_C( 773158561)), simde_x_mm_set_epu32(UINT32_C(2040528386), UINT32_C(1361895717), UINT32_C( 147208745), UINT32_C( 773158561)) }, { simde_mm_set_epi32(INT32_C(-1860066775), INT32_C( 109120839), INT32_C( 825660888), INT32_C( 1402710636)), simde_x_mm_set_epu32(UINT32_C(1860066775), UINT32_C( 109120839), UINT32_C( 825660888), UINT32_C(1402710636)) }, { simde_mm_set_epi32(INT32_C( 1113257677), INT32_C( 2062218865), INT32_C( 1785064575), INT32_C( 1289174686)), simde_x_mm_set_epu32(UINT32_C(1113257677), UINT32_C(2062218865), UINT32_C(1785064575), UINT32_C(1289174686)) }, { simde_mm_set_epi32(INT32_C(-2072383870), INT32_C( 1611206266), INT32_C( 1414397723), INT32_C(-1863310079)), simde_x_mm_set_epu32(UINT32_C(2072383870), UINT32_C(1611206266), UINT32_C(1414397723), UINT32_C(1863310079)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_abs_epi32(test_vec[i].a); simde_assert_m128i_u32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_abs_pi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi8(INT8_C( 38), INT8_C( 28), INT8_C( -38), INT8_C(-113), INT8_C(-109), INT8_C( -88), INT8_C( 99), INT8_C( -40)), simde_x_mm_set_pu8(UINT8_C( 38), UINT8_C( 28), UINT8_C( 38), UINT8_C(113), UINT8_C(109), UINT8_C( 88), UINT8_C( 99), UINT8_C( 40)) }, { simde_mm_set_pi8(INT8_C( 57), INT8_C( 21), INT8_C( 63), INT8_C( 38), INT8_C( 75), INT8_C( -74), INT8_C( -71), INT8_C( 58)), simde_x_mm_set_pu8(UINT8_C( 57), UINT8_C( 21), UINT8_C( 63), UINT8_C( 38), UINT8_C( 75), UINT8_C( 74), UINT8_C( 71), UINT8_C( 58)) }, { simde_mm_set_pi8(INT8_C( 107), INT8_C(-123), INT8_C( -46), INT8_C( 116), INT8_C( 49), INT8_C(-110), INT8_C( -27), INT8_C( -14)), simde_x_mm_set_pu8(UINT8_C(107), UINT8_C(123), UINT8_C( 46), UINT8_C(116), UINT8_C( 49), UINT8_C(110), UINT8_C( 27), UINT8_C( 14)) }, { simde_mm_set_pi8(INT8_C( 94), INT8_C( -17), INT8_C(-121), INT8_C( -59), INT8_C( -39), INT8_C(-120), INT8_C( -6), INT8_C(-128)), simde_x_mm_set_pu8(UINT8_C( 94), UINT8_C( 17), UINT8_C(121), UINT8_C( 59), UINT8_C( 39), UINT8_C(120), UINT8_C( 6), UINT8_C(128)) }, { simde_mm_set_pi8(INT8_C(-113), INT8_C( -83), INT8_C( 56), INT8_C( 12), INT8_C( 114), INT8_C( 46), INT8_C( -44), INT8_C( 75)), simde_x_mm_set_pu8(UINT8_C(113), UINT8_C( 83), UINT8_C( 56), UINT8_C( 12), UINT8_C(114), UINT8_C( 46), UINT8_C( 44), UINT8_C( 75)) }, { simde_mm_set_pi8(INT8_C( -28), INT8_C( 63), INT8_C( 103), INT8_C(-127), INT8_C( 94), INT8_C( 94), INT8_C( 64), INT8_C( 107)), simde_x_mm_set_pu8(UINT8_C( 28), UINT8_C( 63), UINT8_C(103), UINT8_C(127), UINT8_C( 94), UINT8_C( 94), UINT8_C( 64), UINT8_C(107)) }, { simde_mm_set_pi8(INT8_C( -42), INT8_C( 122), INT8_C( 121), INT8_C( 5), INT8_C( 93), INT8_C( -41), INT8_C( -24), INT8_C( 13)), simde_x_mm_set_pu8(UINT8_C( 42), UINT8_C(122), UINT8_C(121), UINT8_C( 5), UINT8_C( 93), UINT8_C( 41), UINT8_C( 24), UINT8_C( 13)) }, { simde_mm_set_pi8(INT8_C( -78), INT8_C( -48), INT8_C( 35), INT8_C( 4), INT8_C( -62), INT8_C( -9), INT8_C( 70), INT8_C( 20)), simde_x_mm_set_pu8(UINT8_C( 78), UINT8_C( 48), UINT8_C( 35), UINT8_C( 4), UINT8_C( 62), UINT8_C( 9), UINT8_C( 70), UINT8_C( 20)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_abs_pi8(test_vec[i].a); simde_assert_m64_u8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_abs_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( 22656), INT16_C( -516), INT16_C( 11935), INT16_C(-27223)), simde_x_mm_set_pu16(UINT16_C(22656), UINT16_C( 516), UINT16_C(11935), UINT16_C(27223)) }, { simde_mm_set_pi16(INT16_C(-22991), INT16_C( -6351), INT16_C(-10389), INT16_C( -8080)), simde_x_mm_set_pu16(UINT16_C(22991), UINT16_C( 6351), UINT16_C(10389), UINT16_C( 8080)) }, { simde_mm_set_pi16(INT16_C( 30466), INT16_C(-32585), INT16_C( 19645), INT16_C(-10576)), simde_x_mm_set_pu16(UINT16_C(30466), UINT16_C(32585), UINT16_C(19645), UINT16_C(10576)) }, { simde_mm_set_pi16(INT16_C(-16502), INT16_C( -5192), INT16_C( 4129), INT16_C( 3864)), simde_x_mm_set_pu16(UINT16_C(16502), UINT16_C( 5192), UINT16_C( 4129), UINT16_C( 3864)) }, { simde_mm_set_pi16(INT16_C( 21069), INT16_C( 17958), INT16_C(-13493), INT16_C( 9609)), simde_x_mm_set_pu16(UINT16_C(21069), UINT16_C(17958), UINT16_C(13493), UINT16_C( 9609)) }, { simde_mm_set_pi16(INT16_C( 400), INT16_C( 20835), INT16_C( 20896), INT16_C( 11278)), simde_x_mm_set_pu16(UINT16_C( 400), UINT16_C(20835), UINT16_C(20896), UINT16_C(11278)) }, { simde_mm_set_pi16(INT16_C(-12492), INT16_C(-12858), INT16_C( 23414), INT16_C(-21576)), simde_x_mm_set_pu16(UINT16_C(12492), UINT16_C(12858), UINT16_C(23414), UINT16_C(21576)) }, { simde_mm_set_pi16(INT16_C( 6654), INT16_C(-24897), INT16_C(-24943), INT16_C(-25087)), simde_x_mm_set_pu16(UINT16_C( 6654), UINT16_C(24897), UINT16_C(24943), UINT16_C(25087)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_abs_pi16(test_vec[i].a); simde_assert_m64_u16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_abs_pi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( -13878279), INT32_C(-1713620712)), simde_x_mm_set_pu32(UINT32_C( 13878279), UINT32_C(1713620712)) }, { simde_mm_set_pi32(INT32_C( -727247206), INT32_C( -746817076)), simde_x_mm_set_pu32(UINT32_C( 727247206), UINT32_C( 746817076)) }, { simde_mm_set_pi32(INT32_C( 850085177), INT32_C( -729101966)), simde_x_mm_set_pu32(UINT32_C( 850085177), UINT32_C( 729101966)) }, { simde_mm_set_pi32(INT32_C( 64469638), INT32_C( 403976835)), simde_x_mm_set_pu32(UINT32_C( 64469638), UINT32_C( 403976835)) }, { simde_mm_set_pi32(INT32_C( 1585672991), INT32_C( 1784425824)), simde_x_mm_set_pu32(UINT32_C(1585672991), UINT32_C(1784425824)) }, { simde_mm_set_pi32(INT32_C( -137548456), INT32_C(-1080835717)), simde_x_mm_set_pu32(UINT32_C( 137548456), UINT32_C(1080835717)) }, { simde_mm_set_pi32(INT32_C( -942357541), INT32_C( 223746416)), simde_x_mm_set_pu32(UINT32_C( 942357541), UINT32_C( 223746416)) }, { simde_mm_set_pi32(INT32_C( -21393113), INT32_C( -293603855)), simde_x_mm_set_pu32(UINT32_C( 21393113), UINT32_C( 293603855)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_abs_pi32(test_vec[i].a); simde_assert_m64_u32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_alignr_epi8 (SIMDE_MUNIT_TEST_ARGS) { static const struct { const int8_t a[16]; const int8_t b[16]; const int8_t r[16]; } test_vec[] = { { { INT8_C( 75), -INT8_C( 8), INT8_C( 45), INT8_C( 23), -INT8_C( 117), -INT8_C( 4), INT8_C( 9), -INT8_C( 122), -INT8_C( 78), -INT8_C( 91), -INT8_C( 95), INT8_C( 41), -INT8_C( 56), -INT8_C( 87), -INT8_C( 81), -INT8_C( 26) }, { -INT8_C( 108), INT8_C( 106), INT8_C( 47), -INT8_C( 105), -INT8_C( 73), INT8_C( 46), -INT8_C( 108), -INT8_C( 127), INT8_C( 17), INT8_C( 109), -INT8_C( 60), INT8_C( 111), -INT8_C( 22), INT8_C( 82), -INT8_C( 65), INT8_C( 53) }, { -INT8_C( 108), INT8_C( 106), INT8_C( 47), -INT8_C( 105), -INT8_C( 73), INT8_C( 46), -INT8_C( 108), -INT8_C( 127), INT8_C( 17), INT8_C( 109), -INT8_C( 60), INT8_C( 111), -INT8_C( 22), INT8_C( 82), -INT8_C( 65), INT8_C( 53) } }, { { INT8_C( 74), -INT8_C( 20), INT8_C( 77), -INT8_C( 43), -INT8_C( 24), INT8_C( 86), INT8_C( 91), -INT8_C( 102), -INT8_C( 5), -INT8_C( 3), -INT8_C( 61), -INT8_C( 61), -INT8_C( 90), INT8_C( 115), -INT8_C( 87), INT8_C( 59) }, { -INT8_C( 35), -INT8_C( 40), -INT8_C( 46), -INT8_C( 108), INT8_C( 7), INT8_C( 102), INT8_C( 22), INT8_C( 24), -INT8_C( 45), -INT8_C( 38), -INT8_C( 120), -INT8_C( 67), INT8_C( 44), INT8_C( 71), -INT8_C( 13), INT8_C( 119) }, { INT8_C( 102), INT8_C( 22), INT8_C( 24), -INT8_C( 45), -INT8_C( 38), -INT8_C( 120), -INT8_C( 67), INT8_C( 44), INT8_C( 71), -INT8_C( 13), INT8_C( 119), INT8_C( 74), -INT8_C( 20), INT8_C( 77), -INT8_C( 43), -INT8_C( 24) } }, { { INT8_C( 51), INT8_C( 64), INT8_C( 76), INT8_C( 27), -INT8_C( 106), -INT8_C( 88), -INT8_C( 74), -INT8_C( 111), -INT8_C( 91), INT8_C( 121), INT8_C( 85), INT8_C( 75), -INT8_C( 20), -INT8_C( 2), -INT8_C( 122), -INT8_C( 55) }, { -INT8_C( 41), INT8_C( 88), INT8_C( 94), -INT8_C( 34), -INT8_C( 65), INT8_C( 116), -INT8_C( 10), -INT8_C( 110), INT8_C( 78), INT8_C( 126), INT8_C( 80), INT8_C( 123), -INT8_C( 59), INT8_C( 67), -INT8_C( 14), -INT8_C( 8) }, { INT8_C( 80), INT8_C( 123), -INT8_C( 59), INT8_C( 67), -INT8_C( 14), -INT8_C( 8), INT8_C( 51), INT8_C( 64), INT8_C( 76), INT8_C( 27), -INT8_C( 106), -INT8_C( 88), -INT8_C( 74), -INT8_C( 111), -INT8_C( 91), INT8_C( 121) } }, { { -INT8_C( 125), INT8_C( 62), INT8_C( 20), INT8_C( 25), -INT8_C( 26), -INT8_C( 54), -INT8_C( 86), -INT8_C( 117), INT8_C( 67), -INT8_C( 1), -INT8_C( 41), INT8_C( 48), -INT8_C( 2), INT8_C( 93), -INT8_C( 7), -INT8_C( 43) }, { -INT8_C( 74), INT8_C( 87), -INT8_C( 77), INT8_C( 117), -INT8_C( 53), -INT8_C( 87), INT8_C( 7), INT8_C( 26), INT8_C( 40), INT8_C( 87), -INT8_C( 107), -INT8_C( 19), -INT8_C( 102), -INT8_C( 121), -INT8_C( 26), INT8_C( 29) }, { INT8_C( 29), -INT8_C( 125), INT8_C( 62), INT8_C( 20), INT8_C( 25), -INT8_C( 26), -INT8_C( 54), -INT8_C( 86), -INT8_C( 117), INT8_C( 67), -INT8_C( 1), -INT8_C( 41), INT8_C( 48), -INT8_C( 2), INT8_C( 93), -INT8_C( 7) } }, { { -INT8_C( 59), -INT8_C( 6), INT8_C( 54), -INT8_C( 84), -INT8_C( 60), -INT8_C( 31), INT8_C( 55), INT8_C( 7), -INT8_C( 32), INT8_C( 14), INT8_C( 55), -INT8_C( 34), INT8_C( 108), INT8_C( 49), -INT8_C( 77), INT8_C( 34) }, { -INT8_C( 120), INT8_C( 102), -INT8_C( 105), INT8_C( 84), INT8_C( 16), -INT8_C( 98), INT8_C( 110), INT8_C( 56), -INT8_C( 10), INT8_C( 3), INT8_C( 37), -INT8_C( 112), -INT8_C( 118), INT8_C( 11), -INT8_C( 82), INT8_C( 79) }, { -INT8_C( 60), -INT8_C( 31), INT8_C( 55), INT8_C( 7), -INT8_C( 32), INT8_C( 14), INT8_C( 55), -INT8_C( 34), INT8_C( 108), INT8_C( 49), -INT8_C( 77), INT8_C( 34), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 5), -INT8_C( 28), -INT8_C( 5), -INT8_C( 55), -INT8_C( 59), INT8_C( 51), -INT8_C( 47), -INT8_C( 90), INT8_C( 65), INT8_C( 8), -INT8_C( 124), -INT8_C( 83), INT8_C( 57), INT8_C( 56), -INT8_C( 49), -INT8_C( 62) }, { -INT8_C( 98), INT8_C( 102), INT8_C( 22), -INT8_C( 82), INT8_C( 5), -INT8_C( 124), -INT8_C( 26), -INT8_C( 5), -INT8_C( 121), INT8_C( 12), -INT8_C( 117), INT8_C( 17), INT8_C( 23), INT8_C( 57), INT8_C( 96), INT8_C( 29) }, { INT8_C( 8), -INT8_C( 124), -INT8_C( 83), INT8_C( 57), INT8_C( 56), -INT8_C( 49), -INT8_C( 62), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 30), INT8_C( 92), -INT8_C( 26), -INT8_C( 29), -INT8_C( 113), -INT8_C( 73), -INT8_C( 119), -INT8_C( 48), -INT8_C( 64), INT8_C( 14), INT8_C( 126), -INT8_C( 7), INT8_C( 70), INT8_C( 77), -INT8_C( 69), -INT8_C( 28) }, { -INT8_C( 76), -INT8_C( 47), -INT8_C( 109), -INT8_C( 71), INT8_C( 85), INT8_C( 121), -INT8_C( 76), -INT8_C( 36), -INT8_C( 123), INT8_C( 63), -INT8_C( 19), -INT8_C( 99), INT8_C( 121), INT8_C( 78), -INT8_C( 70), -INT8_C( 105) }, { -INT8_C( 69), -INT8_C( 28), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, }; { simde__m128i a = simde_x_mm_loadu_epi8(test_vec[0].a); simde__m128i b = simde_x_mm_loadu_epi8(test_vec[0].b); simde__m128i r = simde_mm_alignr_epi8(a, b, 0); simde_test_x86_assert_equal_i8x16(r, simde_x_mm_loadu_epi8(test_vec[0].r)); } { simde__m128i a = simde_x_mm_loadu_epi8(test_vec[1].a); simde__m128i b = simde_x_mm_loadu_epi8(test_vec[1].b); simde__m128i r = simde_mm_alignr_epi8(a, b, 1 * 5); simde_test_x86_assert_equal_i8x16(r, simde_x_mm_loadu_epi8(test_vec[1].r)); } { simde__m128i a = simde_x_mm_loadu_epi8(test_vec[2].a); simde__m128i b = simde_x_mm_loadu_epi8(test_vec[2].b); simde__m128i r = simde_mm_alignr_epi8(a, b, 2 * 5); simde_test_x86_assert_equal_i8x16(r, simde_x_mm_loadu_epi8(test_vec[2].r)); } { simde__m128i a = simde_x_mm_loadu_epi8(test_vec[3].a); simde__m128i b = simde_x_mm_loadu_epi8(test_vec[3].b); simde__m128i r = simde_mm_alignr_epi8(a, b, 3 * 5); simde_test_x86_assert_equal_i8x16(r, simde_x_mm_loadu_epi8(test_vec[3].r)); } { simde__m128i a = simde_x_mm_loadu_epi8(test_vec[4].a); simde__m128i b = simde_x_mm_loadu_epi8(test_vec[4].b); simde__m128i r = simde_mm_alignr_epi8(a, b, 4 * 5); simde_test_x86_assert_equal_i8x16(r, simde_x_mm_loadu_epi8(test_vec[4].r)); } { simde__m128i a = simde_x_mm_loadu_epi8(test_vec[5].a); simde__m128i b = simde_x_mm_loadu_epi8(test_vec[5].b); simde__m128i r = simde_mm_alignr_epi8(a, b, 5 * 5); simde_test_x86_assert_equal_i8x16(r, simde_x_mm_loadu_epi8(test_vec[5].r)); } { simde__m128i a = simde_x_mm_loadu_epi8(test_vec[6].a); simde__m128i b = simde_x_mm_loadu_epi8(test_vec[6].b); simde__m128i r = simde_mm_alignr_epi8(a, b, 6 * 5); simde_test_x86_assert_equal_i8x16(r, simde_x_mm_loadu_epi8(test_vec[6].r)); } return 0; } static int test_simde_mm_alignr_pi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r3; simde__m64 r12; } test_vec[8] = { { simde_mm_set_pi8(INT8_C( -39), INT8_C(-110), INT8_C( 56), INT8_C( 87), INT8_C( 10), INT8_C( -78), INT8_C( 61), INT8_C( -21)), simde_mm_set_pi8(INT8_C( 13), INT8_C( -51), INT8_C( 6), INT8_C( -66), INT8_C( -73), INT8_C( 87), INT8_C( -77), INT8_C( 108)), simde_mm_set_pi8(INT8_C( -78), INT8_C( 61), INT8_C( -21), INT8_C( 13), INT8_C( -51), INT8_C( 6), INT8_C( -66), INT8_C( -73)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -39), INT8_C(-110), INT8_C( 56), INT8_C( 87)) }, { simde_mm_set_pi8(INT8_C( 51), INT8_C( -90), INT8_C(-118), INT8_C( -36), INT8_C( 81), INT8_C( 52), INT8_C( 14), INT8_C( 46)), simde_mm_set_pi8(INT8_C( 26), INT8_C( -56), INT8_C( -35), INT8_C( -50), INT8_C( 106), INT8_C( 71), INT8_C( 68), INT8_C( 40)), simde_mm_set_pi8(INT8_C( 52), INT8_C( 14), INT8_C( 46), INT8_C( 26), INT8_C( -56), INT8_C( -35), INT8_C( -50), INT8_C( 106)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 51), INT8_C( -90), INT8_C(-118), INT8_C( -36)) }, { simde_mm_set_pi8(INT8_C( -8), INT8_C( -77), INT8_C(-125), INT8_C( -59), INT8_C( 8), INT8_C( -89), INT8_C( -90), INT8_C( -97)), simde_mm_set_pi8(INT8_C( -59), INT8_C( -51), INT8_C( -30), INT8_C( -57), INT8_C( 35), INT8_C(-105), INT8_C( -5), INT8_C( -3)), simde_mm_set_pi8(INT8_C( -89), INT8_C( -90), INT8_C( -97), INT8_C( -59), INT8_C( -51), INT8_C( -30), INT8_C( -57), INT8_C( 35)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -8), INT8_C( -77), INT8_C(-125), INT8_C( -59)) }, { simde_mm_set_pi8(INT8_C( 67), INT8_C( 48), INT8_C( -81), INT8_C( -50), INT8_C( 41), INT8_C( -92), INT8_C( -5), INT8_C( 14)), simde_mm_set_pi8(INT8_C( -86), INT8_C( -71), INT8_C( 17), INT8_C( 108), INT8_C( -44), INT8_C( 60), INT8_C( 44), INT8_C( 75)), simde_mm_set_pi8(INT8_C( -92), INT8_C( -5), INT8_C( 14), INT8_C( -86), INT8_C( -71), INT8_C( 17), INT8_C( 108), INT8_C( -44)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 67), INT8_C( 48), INT8_C( -81), INT8_C( -50)) }, { simde_mm_set_pi8(INT8_C( -89), INT8_C(-124), INT8_C(-127), INT8_C( 44), INT8_C( 127), INT8_C( 11), INT8_C(-119), INT8_C( -70)), simde_mm_set_pi8(INT8_C( 39), INT8_C( -13), INT8_C( 68), INT8_C( -96), INT8_C(-112), INT8_C(-118), INT8_C( 122), INT8_C( -32)), simde_mm_set_pi8(INT8_C( 11), INT8_C(-119), INT8_C( -70), INT8_C( 39), INT8_C( -13), INT8_C( 68), INT8_C( -96), INT8_C(-112)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -89), INT8_C(-124), INT8_C(-127), INT8_C( 44)) }, { simde_mm_set_pi8(INT8_C(-100), INT8_C( -55), INT8_C( 7), INT8_C( -95), INT8_C( -19), INT8_C(-101), INT8_C( 80), INT8_C( -82)), simde_mm_set_pi8(INT8_C( -54), INT8_C( 2), INT8_C( 109), INT8_C( 126), INT8_C(-123), INT8_C( -75), INT8_C( -35), INT8_C(-107)), simde_mm_set_pi8(INT8_C(-101), INT8_C( 80), INT8_C( -82), INT8_C( -54), INT8_C( 2), INT8_C( 109), INT8_C( 126), INT8_C(-123)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-100), INT8_C( -55), INT8_C( 7), INT8_C( -95)) }, { simde_mm_set_pi8(INT8_C( -17), INT8_C( 109), INT8_C(-102), INT8_C( -75), INT8_C( -61), INT8_C( 83), INT8_C( 8), INT8_C( -7)), simde_mm_set_pi8(INT8_C( 94), INT8_C(-110), INT8_C( 105), INT8_C( 1), INT8_C( 125), INT8_C( 57), INT8_C( -29), INT8_C( 60)), simde_mm_set_pi8(INT8_C( 83), INT8_C( 8), INT8_C( -7), INT8_C( 94), INT8_C(-110), INT8_C( 105), INT8_C( 1), INT8_C( 125)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -17), INT8_C( 109), INT8_C(-102), INT8_C( -75)) }, { simde_mm_set_pi8(INT8_C( 127), INT8_C(-126), INT8_C( -37), INT8_C( -53), INT8_C( 30), INT8_C( 85), INT8_C( -75), INT8_C( 62)), simde_mm_set_pi8(INT8_C( 72), INT8_C( 61), INT8_C(-110), INT8_C( 76), INT8_C( 26), INT8_C(-125), INT8_C( -54), INT8_C( -64)), simde_mm_set_pi8(INT8_C( 85), INT8_C( -75), INT8_C( 62), INT8_C( 72), INT8_C( 61), INT8_C(-110), INT8_C( 76), INT8_C( 26)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 127), INT8_C(-126), INT8_C( -37), INT8_C( -53)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_alignr_pi8(test_vec[i].a, test_vec[i].b, 3); simde_assert_m64_i8(r, ==, test_vec[i].r3); r = simde_mm_alignr_pi8(test_vec[i].a, test_vec[i].b, 12); simde_assert_m64_i8(r, ==, test_vec[i].r12); } return 0; } static int test_simde_mm_hadd_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[] = { { simde_mm_set_epi16(INT16_C(7), INT16_C(6), INT16_C(5), INT16_C(4), INT16_C(3), INT16_C(2), INT16_C(1), INT16_C(0)), simde_mm_set_epi16(INT16_C(15), INT16_C(14), INT16_C(13), INT16_C(12), INT16_C(11), INT16_C(10), INT16_C(9), INT16_C(8)), simde_mm_set_epi16(INT16_C(29), INT16_C(25), INT16_C(21), INT16_C(17), INT16_C(13), INT16_C(9), INT16_C(5), INT16_C(1)) }, { simde_mm_set_epi16(INT16_C( 16862), INT16_C(-22769), INT16_C( 1276), INT16_C(-11614), INT16_C( 27365), INT16_C(-21745), INT16_C(-20072), INT16_C( 24895)), simde_mm_set_epi16(INT16_C( 27022), INT16_C(-16957), INT16_C( -577), INT16_C( 5907), INT16_C( 27331), INT16_C(-14214), INT16_C( 23860), INT16_C( -4618)), simde_mm_set_epi16(INT16_C( 10065), INT16_C( 5330), INT16_C( 13117), INT16_C( 19242), INT16_C( -5907), INT16_C(-10338), INT16_C( 5620), INT16_C( 4823)) }, { simde_mm_set_epi16(INT16_C( 10296), INT16_C( 16929), INT16_C( -7697), INT16_C(-29772), INT16_C( 8760), INT16_C( 11055), INT16_C(-21639), INT16_C( -9735)), simde_mm_set_epi16(INT16_C( 17587), INT16_C( 2522), INT16_C( 12430), INT16_C(-26697), INT16_C( 10766), INT16_C( 15055), INT16_C(-19640), INT16_C( 28548)), simde_mm_set_epi16(INT16_C( 20109), INT16_C(-14267), INT16_C( 25821), INT16_C( 8908), INT16_C( 27225), INT16_C( 28067), INT16_C( 19815), INT16_C(-31374)) }, { simde_mm_set_epi16(INT16_C( 25001), INT16_C( 2984), INT16_C( 25634), INT16_C( 18284), INT16_C( 332), INT16_C( 30339), INT16_C( -8894), INT16_C( 21932)), simde_mm_set_epi16(INT16_C(-29538), INT16_C( -9241), INT16_C(-32628), INT16_C(-14450), INT16_C( 29835), INT16_C( 605), INT16_C( -3960), INT16_C( -9885)), simde_mm_set_epi16(INT16_C( 26757), INT16_C( 18458), INT16_C( 30440), INT16_C(-13845), INT16_C( 27985), INT16_C(-21618), INT16_C( 30671), INT16_C( 13038)) }, { simde_mm_set_epi16(INT16_C( 9544), INT16_C( 1869), INT16_C( 10876), INT16_C( 18425), INT16_C(-23507), INT16_C( -6113), INT16_C(-14498), INT16_C( 22949)), simde_mm_set_epi16(INT16_C(-18197), INT16_C(-29870), INT16_C(-22608), INT16_C( 17229), INT16_C(-25091), INT16_C( 26338), INT16_C( 15760), INT16_C(-13942)), simde_mm_set_epi16(INT16_C( 17469), INT16_C( -5379), INT16_C( 1247), INT16_C( 1818), INT16_C( 11413), INT16_C( 29301), INT16_C(-29620), INT16_C( 8451)) }, { simde_mm_set_epi16(INT16_C(-12014), INT16_C( -2859), INT16_C(-10534), INT16_C( -75), INT16_C( 11851), INT16_C( 10033), INT16_C(-13975), INT16_C( 29701)), simde_mm_set_epi16(INT16_C( 32129), INT16_C(-30871), INT16_C(-23818), INT16_C(-30018), INT16_C( -9498), INT16_C(-14851), INT16_C(-11614), INT16_C( -9352)), simde_mm_set_epi16(INT16_C( 1258), INT16_C( 11700), INT16_C(-24349), INT16_C(-20966), INT16_C(-14873), INT16_C(-10609), INT16_C( 21884), INT16_C( 15726)) }, { simde_mm_set_epi16(INT16_C(-21544), INT16_C(-15577), INT16_C(-26221), INT16_C( -9036), INT16_C( 27367), INT16_C( 25240), INT16_C( 27963), INT16_C( 16531)), simde_mm_set_epi16(INT16_C(-17413), INT16_C( -3083), INT16_C( 7975), INT16_C( 3251), INT16_C(-12476), INT16_C(-31198), INT16_C(-31819), INT16_C( 23479)), simde_mm_set_epi16(INT16_C(-20496), INT16_C( 11226), INT16_C( 21862), INT16_C( -8340), INT16_C( 28415), INT16_C( 30279), INT16_C(-12929), INT16_C(-21042)) }, { simde_mm_set_epi16(INT16_C(-26423), INT16_C( 20632), INT16_C(-27879), INT16_C( 26257), INT16_C(-14251), INT16_C(-18865), INT16_C( -651), INT16_C(-29238)), simde_mm_set_epi16(INT16_C( -3019), INT16_C( 26530), INT16_C(-15590), INT16_C( -2378), INT16_C( 9416), INT16_C(-20831), INT16_C(-30518), INT16_C(-20357)), simde_mm_set_epi16(INT16_C( 23511), INT16_C(-17968), INT16_C(-11415), INT16_C( 14661), INT16_C( -5791), INT16_C( -1622), INT16_C( 32420), INT16_C(-29889)) }, { simde_mm_set_epi16(INT16_C( 31117), INT16_C( 717), INT16_C( -4833), INT16_C(-22028), INT16_C(-11773), INT16_C( -2769), INT16_C(-29232), INT16_C( 7017)), simde_mm_set_epi16(INT16_C( 29029), INT16_C(-19941), INT16_C( 32677), INT16_C( 1632), INT16_C( 3095), INT16_C( 13355), INT16_C( 25984), INT16_C( 16468)), simde_mm_set_epi16(INT16_C( 9088), INT16_C(-31227), INT16_C( 16450), INT16_C(-23084), INT16_C( 31834), INT16_C(-26861), INT16_C(-14542), INT16_C(-22215)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_hadd_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_hadd_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[] = { { simde_mm_set_epi32(INT32_C(65535), INT32_C(0), INT32_C(1), INT32_C(-1)), simde_mm_set_epi32(INT32_C(-32), INT32_C(128), INT32_C(0), INT32_C(-65535)), simde_mm_set_epi32(INT32_C(96), INT32_C(-65535), INT32_C(65535), INT32_C(0)) }, { simde_mm_set_epi32(INT32_C( 935437342), INT32_C( -511860991), INT32_C(-1852765223), INT32_C( -899948884)), simde_mm_set_epi32(INT32_C( -691754121), INT32_C(-1808366785), INT32_C( 1428445569), INT32_C( 851103099)), simde_mm_set_epi32(INT32_C( 1794846390), INT32_C(-2015418628), INT32_C( 423576351), INT32_C( 1542253189)) }, { simde_mm_set_epi32(INT32_C( 1769801323), INT32_C( -534202186), INT32_C( 93006588), INT32_C( -39722690)), simde_mm_set_epi32(INT32_C(-1819301058), INT32_C( 1538855279), INT32_C(-1162552057), INT32_C( 1560248404)), simde_mm_set_epi32(INT32_C( -280445779), INT32_C( 397696347), INT32_C( 1235599137), INT32_C( 53283898)) }, { simde_mm_set_epi32(INT32_C( -236962758), INT32_C( 1371307856), INT32_C(-1291984296), INT32_C(-1633301517)), simde_mm_set_epi32(INT32_C( 553343851), INT32_C( 1511250694), INT32_C( 2041109339), INT32_C( 952253154)), simde_mm_set_epi32(INT32_C( 2064594545), INT32_C(-1301604803), INT32_C( 1134345098), INT32_C( 1369681483)) }, { simde_mm_set_epi32(INT32_C(-2133195983), INT32_C(-1476381094), INT32_C( -832093237), INT32_C( 2141200401)), simde_mm_set_epi32(INT32_C( -724061580), INT32_C( 1346631337), INT32_C( 1990809669), INT32_C( -817015526)), simde_mm_set_epi32(INT32_C( 622569757), INT32_C( 1173794143), INT32_C( 685390219), INT32_C( 1309107164)) }, { simde_mm_set_epi32(INT32_C(-1087680391), INT32_C( 1056661878), INT32_C( -847385783), INT32_C( 1172111556)), simde_mm_set_epi32(INT32_C( 1557644526), INT32_C( -640816363), INT32_C( 1498664548), INT32_C( -26761096)), simde_mm_set_epi32(INT32_C( 916828163), INT32_C( 1471903452), INT32_C( -31018513), INT32_C( 324725773)) }, { simde_mm_set_epi32(INT32_C( -253519263), INT32_C(-1665749954), INT32_C(-1108971746), INT32_C( -797234951)), simde_mm_set_epi32(INT32_C(-1588481692), INT32_C( 1983191492), INT32_C( 429969831), INT32_C( 285907454)), simde_mm_set_epi32(INT32_C( 394709800), INT32_C( 715877285), INT32_C(-1919269217), INT32_C(-1906206697)) }, { simde_mm_set_epi32(INT32_C( 1025048627), INT32_C( 475870360), INT32_C( 1357422197), INT32_C(-1008236470)), simde_mm_set_epi32(INT32_C(-1420053195), INT32_C( 1212624672), INT32_C( 209820777), INT32_C( -555572396)), simde_mm_set_epi32(INT32_C( -207428523), INT32_C( -345751619), INT32_C( 1500918987), INT32_C( 349185727)) }, { simde_mm_set_epi32(INT32_C( 64730366), INT32_C( 1169670008), INT32_C( 1098115199), INT32_C( -738462226)), simde_mm_set_epi32(INT32_C( -991739835), INT32_C( 26982665), INT32_C(-1747857410), INT32_C( -648157645)), simde_mm_set_epi32(INT32_C( -964757170), INT32_C( 1898952241), INT32_C( 1234400374), INT32_C( 359652973)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_hadd_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_hadd_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[] = { { simde_mm_set_pi16(INT16_C( 30239), INT16_C( 25845), INT16_C( -9936), INT16_C(-27213)), simde_mm_set_pi16(INT16_C( -1015), INT16_C(-20873), INT16_C( -7356), INT16_C(-31477)), simde_mm_set_pi16(INT16_C(-21888), INT16_C( 26703), INT16_C( -9452), INT16_C( 28387)) }, { simde_mm_set_pi16(INT16_C(-21918), INT16_C(-12935), INT16_C(-22245), INT16_C(-19824)), simde_mm_set_pi16(INT16_C(-12980), INT16_C( 31520), INT16_C(-30945), INT16_C( 24768)), simde_mm_set_pi16(INT16_C( 18540), INT16_C( -6177), INT16_C( 30683), INT16_C( 23467)) }, { simde_mm_set_pi16(INT16_C( 17846), INT16_C(-20510), INT16_C( 28205), INT16_C( 22502)), simde_mm_set_pi16(INT16_C( 30262), INT16_C( 19390), INT16_C(-31190), INT16_C(-31299)), simde_mm_set_pi16(INT16_C(-15884), INT16_C( 3047), INT16_C( -2664), INT16_C(-14829)) }, { simde_mm_set_pi16(INT16_C( 4995), INT16_C( 5024), INT16_C(-16313), INT16_C( 32041)), simde_mm_set_pi16(INT16_C( 28210), INT16_C( -6335), INT16_C( 24660), INT16_C( 8387)), simde_mm_set_pi16(INT16_C( 21875), INT16_C(-32489), INT16_C( 10019), INT16_C( 15728)) }, { simde_mm_set_pi16(INT16_C( 18074), INT16_C( 21929), INT16_C(-17147), INT16_C( 5980)), simde_mm_set_pi16(INT16_C(-29293), INT16_C(-14044), INT16_C( -7765), INT16_C( 15197)), simde_mm_set_pi16(INT16_C( 22199), INT16_C( 7432), INT16_C(-25533), INT16_C(-11167)) }, { simde_mm_set_pi16(INT16_C( 9742), INT16_C( 25346), INT16_C(-16677), INT16_C(-18703)), simde_mm_set_pi16(INT16_C( 12116), INT16_C( -6252), INT16_C(-29587), INT16_C( -2727)), simde_mm_set_pi16(INT16_C( 5864), INT16_C(-32314), INT16_C(-30448), INT16_C( 30156)) }, { simde_mm_set_pi16(INT16_C( -2031), INT16_C( 13829), INT16_C( 22178), INT16_C( 10932)), simde_mm_set_pi16(INT16_C( -3860), INT16_C(-21638), INT16_C( 11349), INT16_C( 24248)), simde_mm_set_pi16(INT16_C(-25498), INT16_C(-29939), INT16_C( 11798), INT16_C(-32426)) }, { simde_mm_set_pi16(INT16_C( 14725), INT16_C(-26631), INT16_C( 3352), INT16_C( -9709)), simde_mm_set_pi16(INT16_C(-29523), INT16_C( -5771), INT16_C(-32233), INT16_C( 27043)), simde_mm_set_pi16(INT16_C( 30242), INT16_C( -5190), INT16_C(-11906), INT16_C( -6357)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_hadd_pi16(test_vec[i].a, test_vec[i].b); simde_assert_m64_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_hadd_pi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( 1862318516), INT32_C( -295748827)), simde_mm_set_pi32(INT32_C( -228228131), INT32_C( 379370564)), simde_mm_set_pi32(INT32_C( 151142433), INT32_C( 1566569689)) }, { simde_mm_set_pi32(INT32_C(-1716341852), INT32_C(-1567376242)), simde_mm_set_pi32(INT32_C( 627426320), INT32_C(-1742027445)), simde_mm_set_pi32(INT32_C(-1114601125), INT32_C( 1011249202)) }, { simde_mm_set_pi32(INT32_C( -8890841), INT32_C( -182169327)), simde_mm_set_pi32(INT32_C( 1909098453), INT32_C( -755712802)), simde_mm_set_pi32(INT32_C( 1153385651), INT32_C( -191060168)) }, { simde_mm_set_pi32(INT32_C( 1354356939), INT32_C( 781213984)), simde_mm_set_pi32(INT32_C( 1569601432), INT32_C( 249731348)), simde_mm_set_pi32(INT32_C( 1819332780), INT32_C( 2135570923)) }, { simde_mm_set_pi32(INT32_C( -205846038), INT32_C( 760290342)), simde_mm_set_pi32(INT32_C( 261312612), INT32_C( 953650902)), simde_mm_set_pi32(INT32_C( 1214963514), INT32_C( 554444304)) }, { simde_mm_set_pi32(INT32_C( 2091951994), INT32_C( -868981806)), simde_mm_set_pi32(INT32_C( 908827748), INT32_C( 1571868066)), simde_mm_set_pi32(INT32_C(-1814271482), INT32_C( 1222970188)) }, { simde_mm_set_pi32(INT32_C( -4189226), INT32_C( 874353707)), simde_mm_set_pi32(INT32_C( -220724007), INT32_C(-1561593917)), simde_mm_set_pi32(INT32_C(-1782317924), INT32_C( 870164481)) }, { simde_mm_set_pi32(INT32_C(-2051599335), INT32_C( 1030472719)), simde_mm_set_pi32(INT32_C( 98310968), INT32_C(-1800274139)), simde_mm_set_pi32(INT32_C(-1701963171), INT32_C(-1021126616)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_hadd_pi32(test_vec[i].a, test_vec[i].b); simde_assert_m64_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_hadds_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C( 10807), INT16_C(-14198), INT16_C(-16689), INT16_C( 21115), INT16_C( -910), INT16_C( 23363), INT16_C( -393), INT16_C( 24341)), simde_mm_set_epi16(INT16_C( -1948), INT16_C( 14976), INT16_C(-17960), INT16_C(-14860), INT16_C( 77), INT16_C( 22746), INT16_C( 31556), INT16_C(-27912)), simde_mm_set_epi16(INT16_C( 13028), INT16_C(-32768), INT16_C( 22823), INT16_C( 3644), INT16_C( -3391), INT16_C( 4426), INT16_C( 22453), INT16_C( 23948)) }, { simde_mm_set_epi16(INT16_C( 16701), INT16_C( 18413), INT16_C( 19864), INT16_C(-30571), INT16_C( -296), INT16_C( 12833), INT16_C(-21580), INT16_C( 1624)), simde_mm_set_epi16(INT16_C( 8006), INT16_C( 10793), INT16_C( 9022), INT16_C( 6615), INT16_C( 21610), INT16_C( 15062), INT16_C( -1089), INT16_C( 11200)), simde_mm_set_epi16(INT16_C( 18799), INT16_C( 15637), INT16_C( 32767), INT16_C( 10111), INT16_C( 32767), INT16_C(-10707), INT16_C( 12537), INT16_C(-19956)) }, { simde_mm_set_epi16(INT16_C( 2796), INT16_C( 22044), INT16_C( -3038), INT16_C( -2824), INT16_C(-32407), INT16_C(-18333), INT16_C( 4913), INT16_C(-15422)), simde_mm_set_epi16(INT16_C(-26141), INT16_C(-23205), INT16_C( 4999), INT16_C( 14674), INT16_C( 14693), INT16_C( 30409), INT16_C( -432), INT16_C( -2349)), simde_mm_set_epi16(INT16_C(-32768), INT16_C( 19673), INT16_C( 32767), INT16_C( -2781), INT16_C( 24840), INT16_C( -5862), INT16_C(-32768), INT16_C(-10509)) }, { simde_mm_set_epi16(INT16_C( -4935), INT16_C( -6562), INT16_C( 11817), INT16_C(-25630), INT16_C( -5658), INT16_C( 9543), INT16_C( 31115), INT16_C(-17967)), simde_mm_set_epi16(INT16_C(-31577), INT16_C( 203), INT16_C( 2138), INT16_C(-23787), INT16_C( -59), INT16_C( 6347), INT16_C( -3043), INT16_C( 3327)), simde_mm_set_epi16(INT16_C(-31374), INT16_C(-21649), INT16_C( 6288), INT16_C( 284), INT16_C(-11497), INT16_C(-13813), INT16_C( 3885), INT16_C( 13148)) }, { simde_mm_set_epi16(INT16_C( 23859), INT16_C(-22406), INT16_C( 18603), INT16_C(-25617), INT16_C( 15903), INT16_C(-31522), INT16_C( 6447), INT16_C(-30595)), simde_mm_set_epi16(INT16_C(-12207), INT16_C(-15255), INT16_C( 20239), INT16_C(-16128), INT16_C( 1700), INT16_C( -5295), INT16_C(-26969), INT16_C( 1404)), simde_mm_set_epi16(INT16_C(-27462), INT16_C( 4111), INT16_C( -3595), INT16_C(-25565), INT16_C( 1453), INT16_C( -7014), INT16_C(-15619), INT16_C(-24148)) }, { simde_mm_set_epi16(INT16_C(-23882), INT16_C( 20710), INT16_C(-17743), INT16_C(-12929), INT16_C(-26672), INT16_C( 7004), INT16_C( -7681), INT16_C(-15327)), simde_mm_set_epi16(INT16_C( 9169), INT16_C(-20513), INT16_C( 32026), INT16_C(-32016), INT16_C( 11237), INT16_C( 13212), INT16_C( 5807), INT16_C(-22948)), simde_mm_set_epi16(INT16_C(-11344), INT16_C( 10), INT16_C( 24449), INT16_C(-17141), INT16_C( -3172), INT16_C(-30672), INT16_C(-19668), INT16_C(-23008)) }, { simde_mm_set_epi16(INT16_C( -1265), INT16_C(-31938), INT16_C( 205), INT16_C( -2042), INT16_C( 8951), INT16_C( -9923), INT16_C( -5120), INT16_C( 21641)), simde_mm_set_epi16(INT16_C( 29316), INT16_C(-11244), INT16_C(-12652), INT16_C( 22497), INT16_C(-29075), INT16_C(-14808), INT16_C(-17541), INT16_C( -2061)), simde_mm_set_epi16(INT16_C( 18072), INT16_C( 9845), INT16_C(-32768), INT16_C(-19602), INT16_C(-32768), INT16_C( -1837), INT16_C( -972), INT16_C( 16521)) }, { simde_mm_set_epi16(INT16_C( 4920), INT16_C( 25576), INT16_C(-20472), INT16_C( 4642), INT16_C( 22777), INT16_C( 17155), INT16_C(-22672), INT16_C( 7646)), simde_mm_set_epi16(INT16_C(-28362), INT16_C( 2256), INT16_C( 1158), INT16_C(-32564), INT16_C( 15997), INT16_C( 8308), INT16_C( -8252), INT16_C( 27950)), simde_mm_set_epi16(INT16_C(-26106), INT16_C(-31406), INT16_C( 24305), INT16_C( 19698), INT16_C( 30496), INT16_C(-15830), INT16_C( 32767), INT16_C(-15026)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_hadds_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_hadds_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( -2215), INT16_C( -3731), INT16_C( 30165), INT16_C(-21248)), simde_mm_set_pi16(INT16_C( 21370), INT16_C( 7316), INT16_C(-29830), INT16_C( -6216)), simde_mm_set_pi16(INT16_C( 28686), INT16_C(-32768), INT16_C( -5946), INT16_C( 8917)) }, { simde_mm_set_pi16(INT16_C( -8625), INT16_C( -7885), INT16_C( 3501), INT16_C( -8065)), simde_mm_set_pi16(INT16_C( -9522), INT16_C(-24178), INT16_C( 32056), INT16_C( 21007)), simde_mm_set_pi16(INT16_C(-32768), INT16_C( 32767), INT16_C(-16510), INT16_C( -4564)) }, { simde_mm_set_pi16(INT16_C(-17715), INT16_C( 16677), INT16_C( 21397), INT16_C(-29267)), simde_mm_set_pi16(INT16_C( -49), INT16_C(-24556), INT16_C( 4165), INT16_C( 5183)), simde_mm_set_pi16(INT16_C(-24605), INT16_C( 9348), INT16_C( -1038), INT16_C( -7870)) }, { simde_mm_set_pi16(INT16_C( 18073), INT16_C( 17217), INT16_C(-23152), INT16_C( -2700)), simde_mm_set_pi16(INT16_C( 24553), INT16_C(-26123), INT16_C( 21553), INT16_C(-14922)), simde_mm_set_pi16(INT16_C( -1570), INT16_C( 6631), INT16_C( 32767), INT16_C(-25852)) }, { simde_mm_set_pi16(INT16_C( -8686), INT16_C(-29702), INT16_C( 977), INT16_C( -5566)), simde_mm_set_pi16(INT16_C( 10007), INT16_C( 21523), INT16_C(-17042), INT16_C( 8204)), simde_mm_set_pi16(INT16_C( 31530), INT16_C( -8838), INT16_C(-32768), INT16_C( -4589)) }, { simde_mm_set_pi16(INT16_C( 20389), INT16_C( 12774), INT16_C( 24895), INT16_C(-10733)), simde_mm_set_pi16(INT16_C( 4070), INT16_C( 21710), INT16_C(-25629), INT16_C( 32624)), simde_mm_set_pi16(INT16_C( 25780), INT16_C( 6995), INT16_C( 32767), INT16_C( 14162)) }, { simde_mm_set_pi16(INT16_C(-26507), INT16_C(-18711), INT16_C( -30), INT16_C(-27258)), simde_mm_set_pi16(INT16_C( 30229), INT16_C(-20659), INT16_C(-12166), INT16_C( 11942)), simde_mm_set_pi16(INT16_C( 9570), INT16_C( -224), INT16_C(-32768), INT16_C(-27288)) }, { simde_mm_set_pi16(INT16_C( 8239), INT16_C( 4822), INT16_C( -7926), INT16_C( 5523)), simde_mm_set_pi16(INT16_C( 4807), INT16_C(-26347), INT16_C( 27248), INT16_C(-14552)), simde_mm_set_pi16(INT16_C(-21540), INT16_C( 12696), INT16_C( 13061), INT16_C( -2403)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_hadds_pi16(test_vec[i].a, test_vec[i].b); simde_assert_m64_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_hsub_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C(-23858), INT16_C( -4873), INT16_C( 25529), INT16_C( 28813), INT16_C( 32687), INT16_C( 7141), INT16_C(-18881), INT16_C(-22018)), simde_mm_set_epi16(INT16_C( 18662), INT16_C( -3351), INT16_C(-22586), INT16_C( 17125), INT16_C( 13505), INT16_C( 8156), INT16_C(-22303), INT16_C( 260)), simde_mm_set_epi16(INT16_C(-22013), INT16_C(-25825), INT16_C( -5349), INT16_C( 22563), INT16_C( 18985), INT16_C( 3284), INT16_C(-25546), INT16_C( -3137)) }, { simde_mm_set_epi16(INT16_C(-16905), INT16_C(-23899), INT16_C( 22124), INT16_C( -9244), INT16_C( -8704), INT16_C(-14521), INT16_C( 29325), INT16_C( 9647)), simde_mm_set_epi16(INT16_C(-14947), INT16_C( 11007), INT16_C(-31280), INT16_C(-24736), INT16_C( 12124), INT16_C( 14146), INT16_C( 2823), INT16_C( 31264)), simde_mm_set_epi16(INT16_C( 25954), INT16_C( 6544), INT16_C( 2022), INT16_C( 28441), INT16_C( -6994), INT16_C(-31368), INT16_C( -5817), INT16_C(-19678)) }, { simde_mm_set_epi16(INT16_C( 20412), INT16_C(-23342), INT16_C(-11221), INT16_C( 22543), INT16_C( 31042), INT16_C( 25207), INT16_C( 22911), INT16_C( 28646)), simde_mm_set_epi16(INT16_C( 20001), INT16_C(-23713), INT16_C( -2107), INT16_C( -9501), INT16_C(-11898), INT16_C( 13884), INT16_C(-15609), INT16_C( 29959)), simde_mm_set_epi16(INT16_C( 21822), INT16_C( -7394), INT16_C( 25782), INT16_C(-19968), INT16_C( 21782), INT16_C(-31772), INT16_C( -5835), INT16_C( 5735)) }, { simde_mm_set_epi16(INT16_C(-21670), INT16_C( 22724), INT16_C(-29935), INT16_C(-11113), INT16_C(-27399), INT16_C( 1226), INT16_C(-32445), INT16_C( 25109)), simde_mm_set_epi16(INT16_C( 6376), INT16_C( 14599), INT16_C(-22701), INT16_C( 30172), INT16_C( 12539), INT16_C(-29335), INT16_C(-14900), INT16_C( 5269)), simde_mm_set_epi16(INT16_C( 8223), INT16_C(-12663), INT16_C( 23662), INT16_C( 20169), INT16_C(-21142), INT16_C( 18822), INT16_C( 28625), INT16_C( -7982)) }, { simde_mm_set_epi16(INT16_C( 12440), INT16_C(-24086), INT16_C( 16668), INT16_C( -2312), INT16_C( 5277), INT16_C( 2451), INT16_C( 29149), INT16_C( 16634)), simde_mm_set_epi16(INT16_C(-13694), INT16_C( 20767), INT16_C( 26711), INT16_C(-18489), INT16_C( 7419), INT16_C( 20190), INT16_C( 6918), INT16_C(-26228)), simde_mm_set_epi16(INT16_C(-31075), INT16_C( 20336), INT16_C( 12771), INT16_C( 32390), INT16_C( 29010), INT16_C(-18980), INT16_C( -2826), INT16_C(-12515)) }, { simde_mm_set_epi16(INT16_C(-18064), INT16_C(-22080), INT16_C( 31211), INT16_C(-24234), INT16_C( 17815), INT16_C( 19504), INT16_C(-18918), INT16_C(-22478)), simde_mm_set_epi16(INT16_C( 1846), INT16_C( 29599), INT16_C( -8713), INT16_C(-21885), INT16_C(-32548), INT16_C( 8452), INT16_C( -6739), INT16_C(-17538)), simde_mm_set_epi16(INT16_C( 27753), INT16_C(-13172), INT16_C(-24536), INT16_C(-10799), INT16_C( -4016), INT16_C( 10091), INT16_C( 1689), INT16_C( -3560)) }, { simde_mm_set_epi16(INT16_C(-26649), INT16_C( 24453), INT16_C( 28697), INT16_C( 11094), INT16_C( 15458), INT16_C(-20509), INT16_C( 21060), INT16_C( 31323)), simde_mm_set_epi16(INT16_C( 13710), INT16_C(-23834), INT16_C( 8691), INT16_C( 6543), INT16_C( 24367), INT16_C( 6903), INT16_C( -2074), INT16_C(-12200)), simde_mm_set_epi16(INT16_C( 27992), INT16_C( -2148), INT16_C(-17464), INT16_C(-10126), INT16_C(-14434), INT16_C(-17603), INT16_C( 29569), INT16_C( 10263)) }, { simde_mm_set_epi16(INT16_C( 30864), INT16_C( 20241), INT16_C( 16902), INT16_C( 20663), INT16_C(-20841), INT16_C(-32594), INT16_C(-27087), INT16_C( 20516)), simde_mm_set_epi16(INT16_C( 20660), INT16_C( -7551), INT16_C(-22127), INT16_C( 27693), INT16_C( -9668), INT16_C(-19341), INT16_C( 24481), INT16_C(-15352)), simde_mm_set_epi16(INT16_C(-28211), INT16_C(-15716), INT16_C( -9673), INT16_C( 25703), INT16_C(-10623), INT16_C( 3761), INT16_C(-11753), INT16_C(-17933)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_hsub_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_hsub_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( -479907977), INT32_C( -696539570), INT32_C(-1398601783), INT32_C( 536165734)), simde_mm_set_epi32(INT32_C( 5565618), INT32_C( -381463958), INT32_C( 42537933), INT32_C( -711333282)), simde_mm_set_epi32(INT32_C( -387029576), INT32_C( -753871215), INT32_C( -216631593), INT32_C( 1934767517)) }, { simde_mm_set_epi32(INT32_C( 818650962), INT32_C( 824145104), INT32_C(-1844195506), INT32_C(-1825969675)), simde_mm_set_epi32(INT32_C( 2083359703), INT32_C(-2060086869), INT32_C( 1669409252), INT32_C(-1777354497)), simde_mm_set_epi32(INT32_C( 151520724), INT32_C( 848203547), INT32_C( 5494142), INT32_C( 18225831)) }, { simde_mm_set_epi32(INT32_C(-1587888977), INT32_C( -591401974), INT32_C( -147902824), INT32_C( 1830002670)), simde_mm_set_epi32(INT32_C( 399705498), INT32_C(-1859698091), INT32_C( 1482907200), INT32_C( 1392342146)), simde_mm_set_epi32(INT32_C( 2035563707), INT32_C( -90565054), INT32_C( 996487003), INT32_C( 1977905494)) }, { simde_mm_set_epi32(INT32_C(-1814888934), INT32_C(-1949362692), INT32_C( 1433235748), INT32_C( -537299867)), simde_mm_set_epi32(INT32_C( -273399868), INT32_C( 1316606109), INT32_C( 1234934819), INT32_C( 2143067132)), simde_mm_set_epi32(INT32_C( 1590005977), INT32_C( 908132313), INT32_C( -134473758), INT32_C(-1970535615)) }, { simde_mm_set_epi32(INT32_C(-2102962425), INT32_C( -70755375), INT32_C(-2145299060), INT32_C( 305166546)), simde_mm_set_epi32(INT32_C( -140541285), INT32_C(-1875101359), INT32_C(-1065221069), INT32_C( 2018475568)), simde_mm_set_epi32(INT32_C(-1734560074), INT32_C(-1211270659), INT32_C( 2032207050), INT32_C(-1844501690)) }, { simde_mm_set_epi32(INT32_C( -996291163), INT32_C( -192227992), INT32_C(-1330314074), INT32_C( -761581524)), simde_mm_set_epi32(INT32_C(-1442240024), INT32_C(-1983945374), INT32_C( 1651588405), INT32_C(-2097842058)), simde_mm_set_epi32(INT32_C( -541705350), INT32_C( 545536833), INT32_C( 804063171), INT32_C( 568732550)) }, { simde_mm_set_epi32(INT32_C(-1780627386), INT32_C( 769411793), INT32_C( 1731523542), INT32_C( 660129568)), simde_mm_set_epi32(INT32_C( -469002090), INT32_C( -974572540), INT32_C( 829774662), INT32_C(-1194362973)), simde_mm_set_epi32(INT32_C( -505570450), INT32_C(-2024137635), INT32_C(-1744928117), INT32_C(-1071393974)) }, { simde_mm_set_epi32(INT32_C( -87824878), INT32_C(-2145465811), INT32_C( 968986944), INT32_C( 617458465)), simde_mm_set_epi32(INT32_C( 1888345900), INT32_C( 1243027853), INT32_C(-2115148095), INT32_C( -711680076)), simde_mm_set_epi32(INT32_C( -645318047), INT32_C( 1403468019), INT32_C(-2057640933), INT32_C( -351528479)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_hsub_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_hsub_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C(-21341), INT16_C( 1993), INT16_C( 8181), INT16_C( 15718)), simde_mm_set_pi16(INT16_C( -7323), INT16_C( 12151), INT16_C(-10629), INT16_C(-22962)), simde_mm_set_pi16(INT16_C( 19474), INT16_C(-12333), INT16_C( 23334), INT16_C( 7537)) }, { simde_mm_set_pi16(INT16_C( 649), INT16_C( 5069), INT16_C(-10855), INT16_C( -5538)), simde_mm_set_pi16(INT16_C( 84), INT16_C( -4942), INT16_C( -5821), INT16_C( 21098)), simde_mm_set_pi16(INT16_C( -5026), INT16_C( 26919), INT16_C( 4420), INT16_C( 5317)) }, { simde_mm_set_pi16(INT16_C(-28141), INT16_C(-12466), INT16_C(-27863), INT16_C( -5643)), simde_mm_set_pi16(INT16_C( 12491), INT16_C(-24750), INT16_C( 12575), INT16_C( 29904)), simde_mm_set_pi16(INT16_C( 28295), INT16_C( 17329), INT16_C( 15675), INT16_C( 22220)) }, { simde_mm_set_pi16(INT16_C( 25473), INT16_C( 10724), INT16_C(-27121), INT16_C(-18177)), simde_mm_set_pi16(INT16_C( 31789), INT16_C(-29737), INT16_C(-31435), INT16_C(-28245)), simde_mm_set_pi16(INT16_C( 4010), INT16_C( 3190), INT16_C(-14749), INT16_C( 8944)) }, { simde_mm_set_pi16(INT16_C( -2257), INT16_C( 11928), INT16_C( 27923), INT16_C(-24594)), simde_mm_set_pi16(INT16_C(-24230), INT16_C(-17233), INT16_C( -9025), INT16_C( -5110)), simde_mm_set_pi16(INT16_C( 6997), INT16_C( 3915), INT16_C( 14185), INT16_C( 13019)) }, { simde_mm_set_pi16(INT16_C( 22627), INT16_C( 24128), INT16_C( 21245), INT16_C( 29826)), simde_mm_set_pi16(INT16_C( 6099), INT16_C( 1434), INT16_C(-28377), INT16_C( 16981)), simde_mm_set_pi16(INT16_C( -4665), INT16_C(-20178), INT16_C( 1501), INT16_C( 8581)) }, { simde_mm_set_pi16(INT16_C( 21869), INT16_C( 28964), INT16_C( -8199), INT16_C( 29797)), simde_mm_set_pi16(INT16_C(-27694), INT16_C( -486), INT16_C(-29745), INT16_C( 5628)), simde_mm_set_pi16(INT16_C( 27208), INT16_C(-30163), INT16_C( 7095), INT16_C(-27540)) }, { simde_mm_set_pi16(INT16_C( 18843), INT16_C(-25565), INT16_C( 32700), INT16_C(-25604)), simde_mm_set_pi16(INT16_C( -4172), INT16_C( 16324), INT16_C( 20089), INT16_C(-12131)), simde_mm_set_pi16(INT16_C( 20496), INT16_C(-32220), INT16_C( 21128), INT16_C( 7232)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_hsub_pi16(test_vec[i].a, test_vec[i].b); simde_assert_m64_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_hsub_pi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C(-1398601783), INT32_C( 536165734)), simde_mm_set_pi32(INT32_C( -479907977), INT32_C( -696539570)), simde_mm_set_pi32(INT32_C( -216631593), INT32_C( 1934767517)) }, { simde_mm_set_pi32(INT32_C( 42537933), INT32_C( -711333282)), simde_mm_set_pi32(INT32_C( 5565618), INT32_C( -381463958)), simde_mm_set_pi32(INT32_C( -387029576), INT32_C( -753871215)) }, { simde_mm_set_pi32(INT32_C(-1844195506), INT32_C(-1825969675)), simde_mm_set_pi32(INT32_C( 818650962), INT32_C( 824145104)), simde_mm_set_pi32(INT32_C( 5494142), INT32_C( 18225831)) }, { simde_mm_set_pi32(INT32_C( 1669409252), INT32_C(-1777354497)), simde_mm_set_pi32(INT32_C( 2083359703), INT32_C(-2060086869)), simde_mm_set_pi32(INT32_C( 151520724), INT32_C( 848203547)) }, { simde_mm_set_pi32(INT32_C( -147902824), INT32_C( 1830002670)), simde_mm_set_pi32(INT32_C(-1587888977), INT32_C( -591401974)), simde_mm_set_pi32(INT32_C( 996487003), INT32_C( 1977905494)) }, { simde_mm_set_pi32(INT32_C( 1482907200), INT32_C( 1392342146)), simde_mm_set_pi32(INT32_C( 399705498), INT32_C(-1859698091)), simde_mm_set_pi32(INT32_C( 2035563707), INT32_C( -90565054)) }, { simde_mm_set_pi32(INT32_C( 1433235748), INT32_C( -537299867)), simde_mm_set_pi32(INT32_C(-1814888934), INT32_C(-1949362692)), simde_mm_set_pi32(INT32_C( -134473758), INT32_C(-1970535615)) }, { simde_mm_set_pi32(INT32_C( 1234934819), INT32_C( 2143067132)), simde_mm_set_pi32(INT32_C( -273399868), INT32_C( 1316606109)), simde_mm_set_pi32(INT32_C( 1590005977), INT32_C( 908132313)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_hsub_pi32(test_vec[i].a, test_vec[i].b); simde_assert_m64_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_hsubs_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C( 13774), INT16_C(-29689), INT16_C( 10999), INT16_C( -6147), INT16_C(-29800), INT16_C( 18242), INT16_C( 25067), INT16_C( -9729)), simde_mm_set_epi16(INT16_C( 12949), INT16_C( 20521), INT16_C(-28557), INT16_C(-22012), INT16_C(-30915), INT16_C( -6461), INT16_C( 26181), INT16_C( 4500)), simde_mm_set_epi16(INT16_C( 7572), INT16_C( 6545), INT16_C( 24454), INT16_C(-21681), INT16_C(-32768), INT16_C(-17146), INT16_C( 32767), INT16_C(-32768)) }, { simde_mm_set_epi16(INT16_C( 29988), INT16_C(-16786), INT16_C( 24082), INT16_C(-25968), INT16_C( 12374), INT16_C( 28178), INT16_C( 2246), INT16_C(-16612)), simde_mm_set_epi16(INT16_C( 21001), INT16_C( 21933), INT16_C(-28339), INT16_C(-21262), INT16_C(-18820), INT16_C( -7213), INT16_C(-31232), INT16_C( -7091)), simde_mm_set_epi16(INT16_C( 932), INT16_C( 7077), INT16_C( 11607), INT16_C( 24141), INT16_C(-32768), INT16_C(-32768), INT16_C( 15804), INT16_C(-18858)) }, { simde_mm_set_epi16(INT16_C( -2178), INT16_C( 7231), INT16_C( 1749), INT16_C(-11837), INT16_C( 29652), INT16_C(-23237), INT16_C( -3549), INT16_C(-11367)), simde_mm_set_epi16(INT16_C(-28533), INT16_C( 19954), INT16_C( 15418), INT16_C( 23837), INT16_C( 27116), INT16_C( -2562), INT16_C(-12163), INT16_C( 27809)), simde_mm_set_epi16(INT16_C( 32767), INT16_C( 8419), INT16_C(-29678), INT16_C( 32767), INT16_C( 9409), INT16_C(-13586), INT16_C(-32768), INT16_C( -7818)) }, { simde_mm_set_epi16(INT16_C( 9359), INT16_C( 10457), INT16_C( -9481), INT16_C( 7337), INT16_C( -7865), INT16_C( -8059), INT16_C(-27714), INT16_C(-26625)), simde_mm_set_epi16(INT16_C( 1382), INT16_C( 7017), INT16_C( 1603), INT16_C( 12659), INT16_C(-15886), INT16_C( 13804), INT16_C( 23429), INT16_C(-30064)), simde_mm_set_epi16(INT16_C( 5635), INT16_C( 11056), INT16_C( 29690), INT16_C(-32768), INT16_C( 1098), INT16_C( 16818), INT16_C( -194), INT16_C( 1089)) }, { simde_mm_set_epi16(INT16_C( -6864), INT16_C( 32077), INT16_C(-12988), INT16_C(-19165), INT16_C(-26014), INT16_C( 8246), INT16_C( 27640), INT16_C( 25410)), simde_mm_set_epi16(INT16_C( 19800), INT16_C(-22857), INT16_C( -2668), INT16_C( 12159), INT16_C( 9895), INT16_C( -9099), INT16_C(-14776), INT16_C( 4666)), simde_mm_set_epi16(INT16_C(-32768), INT16_C( 14827), INT16_C(-18994), INT16_C( 19442), INT16_C( 32767), INT16_C( -6177), INT16_C( 32767), INT16_C( -2230)) }, { simde_mm_set_epi16(INT16_C( 19814), INT16_C( 25204), INT16_C( 1688), INT16_C(-25917), INT16_C( -4068), INT16_C(-22336), INT16_C( 14502), INT16_C( 27222)), simde_mm_set_epi16(INT16_C(-18197), INT16_C( 12530), INT16_C( 8023), INT16_C( 5629), INT16_C( 32454), INT16_C( 4791), INT16_C( -4481), INT16_C( 19744)), simde_mm_set_epi16(INT16_C( 30727), INT16_C( -2394), INT16_C(-27663), INT16_C( 24225), INT16_C( 5390), INT16_C(-27605), INT16_C(-18268), INT16_C( 12720)) }, { simde_mm_set_epi16(INT16_C( -7171), INT16_C( 12346), INT16_C( 491), INT16_C( -3389), INT16_C(-18032), INT16_C( -9295), INT16_C(-27339), INT16_C( 18234)), simde_mm_set_epi16(INT16_C( -7625), INT16_C( 17942), INT16_C(-16463), INT16_C( 19005), INT16_C(-15122), INT16_C( 15452), INT16_C( 28218), INT16_C( 7688)), simde_mm_set_epi16(INT16_C( 25567), INT16_C( 32767), INT16_C( 30574), INT16_C(-20530), INT16_C( 19517), INT16_C( -3880), INT16_C( 8737), INT16_C( 32767)) }, { simde_mm_set_epi16(INT16_C( 680), INT16_C( 3648), INT16_C( 9121), INT16_C( 26085), INT16_C( 21203), INT16_C( -8528), INT16_C( 17475), INT16_C(-10092)), simde_mm_set_epi16(INT16_C( 9701), INT16_C( 32227), INT16_C( -294), INT16_C(-17758), INT16_C( 13795), INT16_C(-28706), INT16_C( 28077), INT16_C(-10927)), simde_mm_set_epi16(INT16_C( 22526), INT16_C(-17464), INT16_C(-32768), INT16_C(-32768), INT16_C( 2968), INT16_C( 16964), INT16_C(-29731), INT16_C(-27567)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_hsubs_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_hsubs_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C(-29800), INT16_C( 18242), INT16_C( 25067), INT16_C( -9729)), simde_mm_set_pi16(INT16_C( 13774), INT16_C(-29689), INT16_C( 10999), INT16_C( -6147)), simde_mm_set_pi16(INT16_C(-32768), INT16_C(-17146), INT16_C( 32767), INT16_C(-32768)) }, { simde_mm_set_pi16(INT16_C(-30915), INT16_C( -6461), INT16_C( 26181), INT16_C( 4500)), simde_mm_set_pi16(INT16_C( 12949), INT16_C( 20521), INT16_C(-28557), INT16_C(-22012)), simde_mm_set_pi16(INT16_C( 7572), INT16_C( 6545), INT16_C( 24454), INT16_C(-21681)) }, { simde_mm_set_pi16(INT16_C( 12374), INT16_C( 28178), INT16_C( 2246), INT16_C(-16612)), simde_mm_set_pi16(INT16_C( 29988), INT16_C(-16786), INT16_C( 24082), INT16_C(-25968)), simde_mm_set_pi16(INT16_C(-32768), INT16_C(-32768), INT16_C( 15804), INT16_C(-18858)) }, { simde_mm_set_pi16(INT16_C(-18820), INT16_C( -7213), INT16_C(-31232), INT16_C( -7091)), simde_mm_set_pi16(INT16_C( 21001), INT16_C( 21933), INT16_C(-28339), INT16_C(-21262)), simde_mm_set_pi16(INT16_C( 932), INT16_C( 7077), INT16_C( 11607), INT16_C( 24141)) }, { simde_mm_set_pi16(INT16_C( 29652), INT16_C(-23237), INT16_C( -3549), INT16_C(-11367)), simde_mm_set_pi16(INT16_C( -2178), INT16_C( 7231), INT16_C( 1749), INT16_C(-11837)), simde_mm_set_pi16(INT16_C( 9409), INT16_C(-13586), INT16_C(-32768), INT16_C( -7818)) }, { simde_mm_set_pi16(INT16_C( 27116), INT16_C( -2562), INT16_C(-12163), INT16_C( 27809)), simde_mm_set_pi16(INT16_C(-28533), INT16_C( 19954), INT16_C( 15418), INT16_C( 23837)), simde_mm_set_pi16(INT16_C( 32767), INT16_C( 8419), INT16_C(-29678), INT16_C( 32767)) }, { simde_mm_set_pi16(INT16_C( -7865), INT16_C( -8059), INT16_C(-27714), INT16_C(-26625)), simde_mm_set_pi16(INT16_C( 9359), INT16_C( 10457), INT16_C( -9481), INT16_C( 7337)), simde_mm_set_pi16(INT16_C( 1098), INT16_C( 16818), INT16_C( -194), INT16_C( 1089)) }, { simde_mm_set_pi16(INT16_C(-15886), INT16_C( 13804), INT16_C( 23429), INT16_C(-30064)), simde_mm_set_pi16(INT16_C( 1382), INT16_C( 7017), INT16_C( 1603), INT16_C( 12659)), simde_mm_set_pi16(INT16_C( 5635), INT16_C( 11056), INT16_C( 29690), INT16_C(-32768)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_hsubs_pi16(test_vec[i].a, test_vec[i].b); simde_assert_m64_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_maddubs_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu8(UINT8_C( 51), UINT8_C( 0), UINT8_C(149), UINT8_C(143), UINT8_C( 51), UINT8_C( 92), UINT8_C(224), UINT8_C( 61), UINT8_C(140), UINT8_C(247), UINT8_C( 97), UINT8_C( 44), UINT8_C(110), UINT8_C( 56), UINT8_C(160), UINT8_C( 0)), simde_mm_set_epi8 (INT8_C( -33), INT8_C( -19), INT8_C( 30), INT8_C(-109), INT8_C( -30), INT8_C( 17), INT8_C( 4), INT8_C( 76), INT8_C( 94), INT8_C( 61), INT8_C( 21), INT8_C( 25), INT8_C(-111), INT8_C( -97), INT8_C( 96), INT8_C( 46)), simde_mm_set_epi16(INT16_C( -1683), INT16_C(-11117), INT16_C( 34), INT16_C( 5532), INT16_C( 28227), INT16_C( 3137), INT16_C(-17642), INT16_C( 15360)) }, { simde_x_mm_set_epu8(UINT8_C(202), UINT8_C(213), UINT8_C( 43), UINT8_C( 75), UINT8_C(134), UINT8_C(185), UINT8_C(251), UINT8_C( 80), UINT8_C(190), UINT8_C( 88), UINT8_C(158), UINT8_C(164), UINT8_C(129), UINT8_C( 83), UINT8_C( 80), UINT8_C(190)), simde_mm_set_epi8 (INT8_C( -88), INT8_C( 38), INT8_C( 2), INT8_C( 99), INT8_C( 71), INT8_C( 74), INT8_C( 0), INT8_C(-104), INT8_C( -7), INT8_C( -22), INT8_C( -85), INT8_C( -84), INT8_C( 70), INT8_C( 111), INT8_C( -89), INT8_C( -1)), simde_mm_set_epi16(INT16_C( -9682), INT16_C( 7511), INT16_C( 23204), INT16_C( -8320), INT16_C( -3266), INT16_C(-27206), INT16_C( 18243), INT16_C( -7310)) }, { simde_x_mm_set_epu8(UINT8_C( 33), UINT8_C( 10), UINT8_C(112), UINT8_C(214), UINT8_C(240), UINT8_C( 83), UINT8_C(196), UINT8_C( 31), UINT8_C(227), UINT8_C( 73), UINT8_C(215), UINT8_C(104), UINT8_C(224), UINT8_C( 75), UINT8_C(136), UINT8_C( 7)), simde_mm_set_epi8 (INT8_C(-103), INT8_C( 58), INT8_C( -91), INT8_C( -44), INT8_C( 16), INT8_C( -73), INT8_C( -19), INT8_C( -50), INT8_C( 72), INT8_C( -54), INT8_C( -57), INT8_C(-103), INT8_C(-118), INT8_C( 121), INT8_C( 123), INT8_C( -74)), simde_mm_set_epi16(INT16_C( -2819), INT16_C(-19608), INT16_C( -2219), INT16_C( -5274), INT16_C( 12402), INT16_C(-22967), INT16_C(-17357), INT16_C( 16210)) }, { simde_x_mm_set_epu8(UINT8_C(252), UINT8_C( 31), UINT8_C(134), UINT8_C( 31), UINT8_C(122), UINT8_C(123), UINT8_C(150), UINT8_C(213), UINT8_C(153), UINT8_C(103), UINT8_C(181), UINT8_C(195), UINT8_C(170), UINT8_C( 58), UINT8_C(240), UINT8_C( 47)), simde_mm_set_epi8 (INT8_C( -25), INT8_C( 64), INT8_C(-100), INT8_C( -3), INT8_C( 4), INT8_C( -66), INT8_C( 118), INT8_C( 79), INT8_C(-101), INT8_C( -55), INT8_C( -4), INT8_C( -13), INT8_C( -64), INT8_C( 101), INT8_C( -30), INT8_C( 104)), simde_mm_set_epi16(INT16_C( -4316), INT16_C(-13493), INT16_C( -7630), INT16_C( 32767), INT16_C(-21118), INT16_C( -3259), INT16_C( -5022), INT16_C( -2312)) }, { simde_x_mm_set_epu8(UINT8_C(195), UINT8_C( 70), UINT8_C(169), UINT8_C( 25), UINT8_C( 44), UINT8_C(147), UINT8_C(212), UINT8_C(247), UINT8_C(193), UINT8_C(226), UINT8_C( 3), UINT8_C( 32), UINT8_C(176), UINT8_C(206), UINT8_C(162), UINT8_C(147)), simde_mm_set_epi8 (INT8_C( -49), INT8_C(-123), INT8_C( 64), INT8_C( -72), INT8_C( -48), INT8_C( 90), INT8_C( 7), INT8_C( 111), INT8_C( 51), INT8_C( -54), INT8_C( 46), INT8_C( 30), INT8_C( 16), INT8_C( 116), INT8_C( 86), INT8_C( 72)), simde_mm_set_epi16(INT16_C(-18165), INT16_C( 9016), INT16_C( 11118), INT16_C( 28901), INT16_C( -2361), INT16_C( 1098), INT16_C( 26712), INT16_C( 24516)) }, { simde_x_mm_set_epu8(UINT8_C(170), UINT8_C(171), UINT8_C(184), UINT8_C(175), UINT8_C(121), UINT8_C(154), UINT8_C(221), UINT8_C(215), UINT8_C( 81), UINT8_C(122), UINT8_C( 48), UINT8_C(175), UINT8_C(206), UINT8_C(142), UINT8_C( 87), UINT8_C(151)), simde_mm_set_epi8 (INT8_C( -17), INT8_C( 73), INT8_C( -38), INT8_C( -41), INT8_C( 82), INT8_C( 87), INT8_C( 108), INT8_C( 26), INT8_C( 57), INT8_C( 110), INT8_C( 70), INT8_C( 0), INT8_C( 68), INT8_C( -35), INT8_C( -9), INT8_C( -4)), simde_mm_set_epi16(INT16_C( 9593), INT16_C(-14167), INT16_C( 23320), INT16_C( 29458), INT16_C( 18037), INT16_C( 3360), INT16_C( 9038), INT16_C( -1387)) }, { simde_x_mm_set_epu8(UINT8_C(107), UINT8_C( 23), UINT8_C(200), UINT8_C(241), UINT8_C(184), UINT8_C( 99), UINT8_C(201), UINT8_C(184), UINT8_C( 6), UINT8_C(119), UINT8_C(156), UINT8_C(238), UINT8_C( 57), UINT8_C(220), UINT8_C( 31), UINT8_C( 63)), simde_mm_set_epi8 (INT8_C( 55), INT8_C( -43), INT8_C( -4), INT8_C( -8), INT8_C( 98), INT8_C(-128), INT8_C( 34), INT8_C( 78), INT8_C( -53), INT8_C( 85), INT8_C(-119), INT8_C(-112), INT8_C( 64), INT8_C( 97), INT8_C( -95), INT8_C( -65)), simde_mm_set_epi16(INT16_C( 4896), INT16_C( -2728), INT16_C( 5360), INT16_C( 21186), INT16_C( 9797), INT16_C(-32768), INT16_C( 24988), INT16_C( -7040)) }, { simde_x_mm_set_epu8(UINT8_C(240), UINT8_C(158), UINT8_C( 50), UINT8_C(200), UINT8_C(127), UINT8_C(117), UINT8_C(116), UINT8_C(126), UINT8_C( 28), UINT8_C( 15), UINT8_C(211), UINT8_C(171), UINT8_C( 26), UINT8_C(129), UINT8_C( 21), UINT8_C(147)), simde_mm_set_epi8 (INT8_C( 1), INT8_C( -35), INT8_C( 16), INT8_C( 63), INT8_C( 10), INT8_C( -86), INT8_C( -27), INT8_C( -9), INT8_C( -69), INT8_C( 85), INT8_C( -80), INT8_C( 80), INT8_C( 15), INT8_C( 55), INT8_C( 36), INT8_C( -33)), simde_mm_set_epi16(INT16_C( -5290), INT16_C( 13400), INT16_C( -8792), INT16_C( -4266), INT16_C( -657), INT16_C( -3200), INT16_C( 7485), INT16_C( -4095)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_maddubs_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_maddubs_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_x_mm_set_pu8(UINT8_C( 226), UINT8_C( 60), UINT8_C( 180), UINT8_C( 253), UINT8_C( 116), UINT8_C( 114), UINT8_C( 202), UINT8_C( 153)), simde_mm_set_pi8( INT8_C( -83), INT8_C( 11), INT8_C( 102), INT8_C( -8), INT8_C( 96), INT8_C( 7), INT8_C( -47), INT8_C( 11)), simde_mm_set_pi16(INT16_C(-18098), INT16_C( 16336), INT16_C( 11934), INT16_C( -7811)) }, { simde_x_mm_set_pu8(UINT8_C( 204), UINT8_C( 170), UINT8_C( 75), UINT8_C( 84), UINT8_C( 124), UINT8_C( 190), UINT8_C( 100), UINT8_C( 219)), simde_mm_set_pi8( INT8_C( -34), INT8_C( -62), INT8_C( -50), INT8_C( -16), INT8_C( 75), INT8_C( 6), INT8_C( 33), INT8_C( 1)), simde_mm_set_pi16(INT16_C(-17476), INT16_C( -5094), INT16_C( 10440), INT16_C( 3519)) }, { simde_x_mm_set_pu8(UINT8_C( 41), UINT8_C( 19), UINT8_C( 177), UINT8_C( 122), UINT8_C( 36), UINT8_C( 42), UINT8_C( 63), UINT8_C( 32)), simde_mm_set_pi8( INT8_C( 42), INT8_C( -60), INT8_C( 43), INT8_C( 108), INT8_C( -92), INT8_C( 62), INT8_C( -84), INT8_C(-109)), simde_mm_set_pi16(INT16_C( 582), INT16_C( 20787), INT16_C( -708), INT16_C( -8780)) }, { simde_x_mm_set_pu8(UINT8_C( 251), UINT8_C( 60), UINT8_C( 216), UINT8_C( 235), UINT8_C( 217), UINT8_C( 226), UINT8_C( 248), UINT8_C( 212)), simde_mm_set_pi8( INT8_C( 39), INT8_C( 2), INT8_C( -42), INT8_C(-128), INT8_C( 5), INT8_C( 35), INT8_C(-117), INT8_C( 123)), simde_mm_set_pi16(INT16_C( 9909), INT16_C(-32768), INT16_C( 8995), INT16_C( -2940)) }, { simde_x_mm_set_pu8(UINT8_C( 8), UINT8_C( 71), UINT8_C( 143), UINT8_C( 51), UINT8_C( 192), UINT8_C( 71), UINT8_C( 71), UINT8_C( 112)), simde_mm_set_pi8( INT8_C( 67), INT8_C( -27), INT8_C( 86), INT8_C( -45), INT8_C( -88), INT8_C( -88), INT8_C(-120), INT8_C( -56)), simde_mm_set_pi16(INT16_C( -1381), INT16_C( 10003), INT16_C(-23144), INT16_C(-14792)) }, { simde_x_mm_set_pu8(UINT8_C( 146), UINT8_C( 253), UINT8_C( 229), UINT8_C( 229), UINT8_C( 1), UINT8_C( 245), UINT8_C( 28), UINT8_C( 5)), simde_mm_set_pi8( INT8_C( 17), INT8_C( 111), INT8_C( -69), INT8_C(-110), INT8_C( -81), INT8_C( 80), INT8_C( 52), INT8_C( -25)), simde_mm_set_pi16(INT16_C( 30565), INT16_C(-32768), INT16_C( 19519), INT16_C( 1331)) }, { simde_x_mm_set_pu8(UINT8_C( 37), UINT8_C( 74), UINT8_C( 250), UINT8_C( 7), UINT8_C( 132), UINT8_C( 86), UINT8_C( 80), UINT8_C( 39)), simde_mm_set_pi8( INT8_C(-119), INT8_C( -25), INT8_C( 75), INT8_C( 100), INT8_C( -82), INT8_C( -30), INT8_C(-102), INT8_C( -17)), simde_mm_set_pi16(INT16_C( -6253), INT16_C( 19450), INT16_C(-13404), INT16_C( -8823)) }, { simde_x_mm_set_pu8(UINT8_C( 23), UINT8_C( 177), UINT8_C( 190), UINT8_C( 68), UINT8_C( 193), UINT8_C( 21), UINT8_C( 108), UINT8_C( 80)), simde_mm_set_pi8( INT8_C( -20), INT8_C(-112), INT8_C( -45), INT8_C( -43), INT8_C( 85), INT8_C( 92), INT8_C( -55), INT8_C( -79)), simde_mm_set_pi16(INT16_C(-20284), INT16_C(-11474), INT16_C( 18337), INT16_C(-12260)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_maddubs_pi16(test_vec[i].a, test_vec[i].b); simde_assert_m64_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_mulhrs_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C(-22170), INT16_C( 3338), INT16_C( 16927), INT16_C(-30825), INT16_C( -5333), INT16_C(-26742), INT16_C( 73), INT16_C( 12836)), simde_mm_set_epi16(INT16_C(-20552), INT16_C( 2947), INT16_C( 22103), INT16_C( -1959), INT16_C( 9399), INT16_C(-17978), INT16_C( 24358), INT16_C( -8616)), simde_mm_set_epi16(INT16_C( 13905), INT16_C( 300), INT16_C( 11418), INT16_C( 1843), INT16_C( -1530), INT16_C( 14672), INT16_C( 54), INT16_C( -3375)) }, { simde_mm_set_epi16(INT16_C( 13543), INT16_C(-11087), INT16_C( -705), INT16_C( -8016), INT16_C( 5997), INT16_C(-15738), INT16_C(-26326), INT16_C(-25662)), simde_mm_set_epi16(INT16_C( 6685), INT16_C(-23579), INT16_C( 14091), INT16_C( -6956), INT16_C(-18113), INT16_C( 23901), INT16_C( 31684), INT16_C( 2110)), simde_mm_set_epi16(INT16_C( 2763), INT16_C( 7978), INT16_C( -303), INT16_C( 1702), INT16_C( -3315), INT16_C(-11479), INT16_C(-25455), INT16_C( -1652)) }, { simde_mm_set_epi16(INT16_C( 2735), INT16_C(-11575), INT16_C(-10031), INT16_C( 17488), INT16_C(-28766), INT16_C( -754), INT16_C( 27755), INT16_C(-14828)), simde_mm_set_epi16(INT16_C(-18261), INT16_C( -2558), INT16_C( 20250), INT16_C(-32745), INT16_C( 26468), INT16_C( 12462), INT16_C( -600), INT16_C( 27533)), simde_mm_set_epi16(INT16_C( -1524), INT16_C( 904), INT16_C( -6199), INT16_C(-17476), INT16_C(-23235), INT16_C( -287), INT16_C( -508), INT16_C(-12459)) }, { simde_mm_set_epi16(INT16_C( 18118), INT16_C( 19684), INT16_C( 21218), INT16_C( 11012), INT16_C( 4581), INT16_C( 31051), INT16_C(-22370), INT16_C( 18948)), simde_mm_set_epi16(INT16_C( 4495), INT16_C(-26951), INT16_C( 10375), INT16_C( 11197), INT16_C(-22121), INT16_C( 27826), INT16_C(-13849), INT16_C( 15915)), simde_mm_set_epi16(INT16_C( 2485), INT16_C(-16190), INT16_C( 6718), INT16_C( 3763), INT16_C( -3093), INT16_C( 26368), INT16_C( 9454), INT16_C( 9203)) }, { simde_mm_set_epi16(INT16_C( -8749), INT16_C( 9365), INT16_C( -7001), INT16_C( 29368), INT16_C( 22035), INT16_C( 22575), INT16_C( 15151), INT16_C( 8924)), simde_mm_set_epi16(INT16_C( -5832), INT16_C(-10538), INT16_C(-26586), INT16_C(-26292), INT16_C( 24365), INT16_C( 31099), INT16_C( -6578), INT16_C(-26373)), simde_mm_set_epi16(INT16_C( 1557), INT16_C( -3012), INT16_C( 5680), INT16_C(-23564), INT16_C( 16384), INT16_C( 21425), INT16_C( -3041), INT16_C( -7182)) }, { simde_mm_set_epi16(INT16_C(-16986), INT16_C(-15026), INT16_C( 27907), INT16_C( 22865), INT16_C( 12487), INT16_C( -3271), INT16_C(-20289), INT16_C(-18773)), simde_mm_set_epi16(INT16_C( 12991), INT16_C( -159), INT16_C( -6884), INT16_C(-32273), INT16_C( 15955), INT16_C( 23229), INT16_C(-21266), INT16_C( 5578)), simde_mm_set_epi16(INT16_C( -6734), INT16_C( 73), INT16_C( -5863), INT16_C(-22520), INT16_C( 6080), INT16_C( -2319), INT16_C( 13167), INT16_C( -3196)) }, { simde_mm_set_epi16(INT16_C( -4913), INT16_C(-31702), INT16_C( -5693), INT16_C( 25724), INT16_C( 8769), INT16_C( -4014), INT16_C(-21883), INT16_C(-17971)), simde_mm_set_epi16(INT16_C( 5100), INT16_C( -4154), INT16_C( 5428), INT16_C( 15121), INT16_C( 10050), INT16_C( -9982), INT16_C(-14810), INT16_C( 21413)), simde_mm_set_epi16(INT16_C( -765), INT16_C( 4019), INT16_C( -943), INT16_C( 11871), INT16_C( 2689), INT16_C( 1223), INT16_C( 9890), INT16_C(-11744)) }, { simde_mm_set_epi16(INT16_C(-31657), INT16_C(-25785), INT16_C( -931), INT16_C( 4611), INT16_C(-30993), INT16_C(-28215), INT16_C( 22556), INT16_C( 13103)), simde_mm_set_epi16(INT16_C( 16378), INT16_C( 14367), INT16_C(-20270), INT16_C( 6205), INT16_C( 3145), INT16_C( 1055), INT16_C( -2582), INT16_C(-27163)), simde_mm_set_epi16(INT16_C(-15823), INT16_C(-11305), INT16_C( 576), INT16_C( 873), INT16_C( -2975), INT16_C( -908), INT16_C( -1777), INT16_C(-10862)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_mulhrs_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_mulhrs_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( 10873), INT16_C( 20470), INT16_C(-22699), INT16_C( -8299)), simde_mm_set_pi16(INT16_C( 2803), INT16_C( 17070), INT16_C( 3401), INT16_C( 5228)), simde_mm_set_pi16(INT16_C( 930), INT16_C( 10664), INT16_C( -2356), INT16_C( -1324)) }, { simde_mm_set_pi16(INT16_C(-25757), INT16_C(-26678), INT16_C(-28851), INT16_C( -6466)), simde_mm_set_pi16(INT16_C( 14685), INT16_C(-28969), INT16_C(-23351), INT16_C(-21231)), simde_mm_set_pi16(INT16_C(-11543), INT16_C( 23585), INT16_C( 20560), INT16_C( 4189)) }, { simde_mm_set_pi16(INT16_C(-15507), INT16_C( 30038), INT16_C( 30487), INT16_C( 19821)), simde_mm_set_pi16(INT16_C( 12343), INT16_C(-21596), INT16_C(-10324), INT16_C(-29925)), simde_mm_set_pi16(INT16_C( -5841), INT16_C(-19797), INT16_C( -9605), INT16_C(-18101)) }, { simde_mm_set_pi16(INT16_C( -7595), INT16_C( 32328), INT16_C( -530), INT16_C( 25195)), simde_mm_set_pi16(INT16_C( 24804), INT16_C( 24789), INT16_C( 13974), INT16_C(-29149)), simde_mm_set_pi16(INT16_C( -5749), INT16_C( 24456), INT16_C( -226), INT16_C(-22412)) }, { simde_mm_set_pi16(INT16_C(-25620), INT16_C(-10236), INT16_C( -5862), INT16_C( -6331)), simde_mm_set_pi16(INT16_C( 850), INT16_C(-23039), INT16_C(-12194), INT16_C( 18653)), simde_mm_set_pi16(INT16_C( -665), INT16_C( 7197), INT16_C( 2181), INT16_C( -3604)) }, { simde_mm_set_pi16(INT16_C( -3270), INT16_C( 28976), INT16_C(-17448), INT16_C(-17812)), simde_mm_set_pi16(INT16_C( 22918), INT16_C(-11286), INT16_C(-17728), INT16_C( 18555)), simde_mm_set_pi16(INT16_C( -2287), INT16_C( -9980), INT16_C( 9440), INT16_C(-10086)) }, { simde_mm_set_pi16(INT16_C( -7562), INT16_C(-25114), INT16_C(-28747), INT16_C( 15932)), simde_mm_set_pi16(INT16_C( 19935), INT16_C( -3041), INT16_C( -4324), INT16_C( 19473)), simde_mm_set_pi16(INT16_C( -4600), INT16_C( 2331), INT16_C( 3793), INT16_C( 9468)) }, { simde_mm_set_pi16(INT16_C( 2783), INT16_C( -5706), INT16_C( 21220), INT16_C(-16928)), simde_mm_set_pi16(INT16_C( 5658), INT16_C( 25482), INT16_C( -693), INT16_C( 7606)), simde_mm_set_pi16(INT16_C( 481), INT16_C( -4437), INT16_C( -449), INT16_C( -3929)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_mulhrs_pi16(test_vec[i].a, test_vec[i].b); simde_assert_m64_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_shuffle_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( 44), INT8_C( -68), INT8_C( 109), INT8_C( -92), INT8_C( -84), INT8_C( 83), INT8_C( -49), INT8_C( -4), INT8_C( 73), INT8_C( -26), INT8_C( 49), INT8_C( 23), INT8_C( 14), INT8_C( 33), INT8_C( 30), INT8_C( 21)), simde_mm_set_epi8(INT8_C(-115), INT8_C( 102), INT8_C( 8), INT8_C(-108), INT8_C(-116), INT8_C( 49), INT8_C( 91), INT8_C( 2), INT8_C( 32), INT8_C( 37), INT8_C( -49), INT8_C( -84), INT8_C( -92), INT8_C( -12), INT8_C( 37), INT8_C( -66)), simde_mm_set_epi8(INT8_C( 0), INT8_C( -26), INT8_C( -4), INT8_C( 0), INT8_C( 0), INT8_C( 30), INT8_C( -84), INT8_C( 33), INT8_C( 21), INT8_C( 49), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 49), INT8_C( 0)) }, { simde_mm_set_epi8(INT8_C( 89), INT8_C( -43), INT8_C( 48), INT8_C( -87), INT8_C(-114), INT8_C( 41), INT8_C( 3), INT8_C( -2), INT8_C( 123), INT8_C( -90), INT8_C( 32), INT8_C( 61), INT8_C( 41), INT8_C( 90), INT8_C( 114), INT8_C( -84)), simde_mm_set_epi8(INT8_C( 51), INT8_C( 83), INT8_C( 120), INT8_C( 6), INT8_C( 44), INT8_C( -77), INT8_C( -77), INT8_C( -60), INT8_C(-100), INT8_C( -13), INT8_C( 38), INT8_C( 107), INT8_C( -16), INT8_C( 22), INT8_C( 88), INT8_C( -72)), simde_mm_set_epi8(INT8_C( 41), INT8_C( 41), INT8_C( -2), INT8_C( -90), INT8_C( -87), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -90), INT8_C(-114), INT8_C( 0), INT8_C( -90), INT8_C( -2), INT8_C( 0)) }, { simde_mm_set_epi8(INT8_C( -82), INT8_C( 4), INT8_C( -8), INT8_C( 75), INT8_C( -46), INT8_C( -97), INT8_C( 124), INT8_C( -42), INT8_C( -19), INT8_C( -88), INT8_C( 82), INT8_C( -56), INT8_C( -19), INT8_C( 12), INT8_C( -51), INT8_C( -26)), simde_mm_set_epi8(INT8_C(-105), INT8_C( -81), INT8_C( -16), INT8_C( -8), INT8_C( 85), INT8_C( 99), INT8_C( -25), INT8_C(-111), INT8_C(-109), INT8_C( -84), INT8_C( -46), INT8_C( 92), INT8_C( -64), INT8_C( 32), INT8_C( 127), INT8_C( 4)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 82), INT8_C( -19), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 75), INT8_C( 0), INT8_C( -26), INT8_C( -82), INT8_C( -56)) }, { simde_mm_set_epi8(INT8_C( 46), INT8_C( 56), INT8_C( 81), INT8_C( 110), INT8_C( -13), INT8_C( -23), INT8_C( -16), INT8_C( 99), INT8_C( 80), INT8_C( -49), INT8_C( 127), INT8_C( 115), INT8_C( -66), INT8_C( 50), INT8_C( 102), INT8_C(-123)), simde_mm_set_epi8(INT8_C(-104), INT8_C(-119), INT8_C( -63), INT8_C( 97), INT8_C( 38), INT8_C( 25), INT8_C( -72), INT8_C(-100), INT8_C( 24), INT8_C( -38), INT8_C( 119), INT8_C( -8), INT8_C( -44), INT8_C( -42), INT8_C( 68), INT8_C( -82)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 102), INT8_C( -49), INT8_C( -16), INT8_C( 0), INT8_C( 0), INT8_C( 99), INT8_C( 0), INT8_C( 80), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 115), INT8_C( 0)) }, { simde_mm_set_epi8(INT8_C(-115), INT8_C( -99), INT8_C( -23), INT8_C( -12), INT8_C( -38), INT8_C( -56), INT8_C( -78), INT8_C( -83), INT8_C( 114), INT8_C( 18), INT8_C( -67), INT8_C( -35), INT8_C( 83), INT8_C( -4), INT8_C(-107), INT8_C( 44)), simde_mm_set_epi8(INT8_C( 9), INT8_C(-115), INT8_C( -83), INT8_C( 52), INT8_C( -91), INT8_C( -50), INT8_C( -37), INT8_C( -26), INT8_C( -47), INT8_C( -5), INT8_C( 109), INT8_C( 26), INT8_C( 107), INT8_C( 65), INT8_C( -20), INT8_C(-121)), simde_mm_set_epi8(INT8_C( -78), INT8_C( 0), INT8_C( 0), INT8_C( -35), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -23), INT8_C( -56), INT8_C( -38), INT8_C(-107), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_epi8(INT8_C(-115), INT8_C( -8), INT8_C( 39), INT8_C( -2), INT8_C( 29), INT8_C( 101), INT8_C( 79), INT8_C( 16), INT8_C( -89), INT8_C( 91), INT8_C( 104), INT8_C( -22), INT8_C( -92), INT8_C(-127), INT8_C( -33), INT8_C( -57)), simde_mm_set_epi8(INT8_C( 39), INT8_C( -83), INT8_C( -41), INT8_C( -20), INT8_C( 45), INT8_C( 94), INT8_C(-102), INT8_C( 66), INT8_C( -26), INT8_C( 50), INT8_C( -29), INT8_C( -46), INT8_C( -77), INT8_C( 42), INT8_C( 100), INT8_C( 57)), simde_mm_set_epi8(INT8_C( -89), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 39), INT8_C( -8), INT8_C( 0), INT8_C(-127), INT8_C( 0), INT8_C(-127), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 101), INT8_C( -22), INT8_C( 79)) }, { simde_mm_set_epi8(INT8_C(-112), INT8_C( 100), INT8_C( -34), INT8_C( -39), INT8_C( 81), INT8_C( -76), INT8_C( 99), INT8_C(-116), INT8_C( -50), INT8_C( -98), INT8_C( 118), INT8_C( -34), INT8_C( -35), INT8_C( -13), INT8_C( -56), INT8_C( -73)), simde_mm_set_epi8(INT8_C(-108), INT8_C( 119), INT8_C( 11), INT8_C( -79), INT8_C( 109), INT8_C( -42), INT8_C( 22), INT8_C( 52), INT8_C(-123), INT8_C( -66), INT8_C( 127), INT8_C( 2), INT8_C( -56), INT8_C( 51), INT8_C( 46), INT8_C(-126)), simde_mm_set_epi8(INT8_C( 0), INT8_C( -50), INT8_C( 81), INT8_C( 0), INT8_C( -34), INT8_C( 0), INT8_C( -98), INT8_C( -34), INT8_C( 0), INT8_C( 0), INT8_C(-112), INT8_C( -13), INT8_C( 0), INT8_C( -35), INT8_C( 100), INT8_C( 0)) }, { simde_mm_set_epi8(INT8_C(-112), INT8_C( -70), INT8_C( 75), INT8_C( 43), INT8_C( 119), INT8_C( -79), INT8_C( -68), INT8_C( 101), INT8_C( -26), INT8_C( 20), INT8_C( -43), INT8_C( -70), INT8_C( -5), INT8_C( 99), INT8_C( -4), INT8_C( -98)), simde_mm_set_epi8(INT8_C( 92), INT8_C( 83), INT8_C( 95), INT8_C( 6), INT8_C( -16), INT8_C( 77), INT8_C( -25), INT8_C( 16), INT8_C( 51), INT8_C( 91), INT8_C( 21), INT8_C(-110), INT8_C( -9), INT8_C( -12), INT8_C( -77), INT8_C(-112)), simde_mm_set_epi8(INT8_C( 43), INT8_C( -5), INT8_C(-112), INT8_C( 20), INT8_C( 0), INT8_C( 75), INT8_C( 0), INT8_C( -98), INT8_C( -5), INT8_C( 119), INT8_C( -43), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_shuffle_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_shuffle_pi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi8(INT8_C( 74), INT8_C(-101), INT8_C( 15), INT8_C( -66), INT8_C( -20), INT8_C( 87), INT8_C( 16), INT8_C( -79)), simde_mm_set_pi8(INT8_C( 29), INT8_C( -65), INT8_C( 38), INT8_C( -40), INT8_C( -70), INT8_C( 88), INT8_C( -38), INT8_C(-118)), simde_mm_set_pi8(INT8_C( 15), INT8_C( 0), INT8_C(-101), INT8_C( 0), INT8_C( 0), INT8_C( -79), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C( 72), INT8_C( -15), INT8_C( -72), INT8_C( -84), INT8_C( -21), INT8_C( -95), INT8_C( -76), INT8_C( -92)), simde_mm_set_pi8(INT8_C( -94), INT8_C( -81), INT8_C( 23), INT8_C( 85), INT8_C(-111), INT8_C( 24), INT8_C(-116), INT8_C( 34)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( 72), INT8_C( -72), INT8_C( 0), INT8_C( -92), INT8_C( 0), INT8_C( -95)) }, { simde_mm_set_pi8(INT8_C( 72), INT8_C( 95), INT8_C( 109), INT8_C( -45), INT8_C( 11), INT8_C( -2), INT8_C( -6), INT8_C( 80)), simde_mm_set_pi8(INT8_C( -77), INT8_C(-102), INT8_C( 57), INT8_C( -50), INT8_C( 85), INT8_C( -92), INT8_C( 102), INT8_C( -10)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C( -6), INT8_C( 0), INT8_C( 109), INT8_C( 0), INT8_C( 95), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C( -13), INT8_C( -1), INT8_C( -80), INT8_C( 110), INT8_C( -12), INT8_C( -38), INT8_C( 43), INT8_C(-126)), simde_mm_set_pi8(INT8_C( 94), INT8_C( -52), INT8_C( -58), INT8_C( 119), INT8_C( -62), INT8_C( 119), INT8_C( -96), INT8_C(-113)), simde_mm_set_pi8(INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -13), INT8_C( 0), INT8_C( -13), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C(-107), INT8_C(-111), INT8_C( 58), INT8_C( -52), INT8_C( -31), INT8_C( -75), INT8_C(-114), INT8_C( 50)), simde_mm_set_pi8(INT8_C( -40), INT8_C( -66), INT8_C( 33), INT8_C( 101), INT8_C( 11), INT8_C( -41), INT8_C( 77), INT8_C( 71)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 0), INT8_C(-114), INT8_C( 58), INT8_C( -31), INT8_C( 0), INT8_C( 58), INT8_C(-107)) }, { simde_mm_set_pi8(INT8_C( -92), INT8_C( -56), INT8_C( 22), INT8_C( -56), INT8_C( -63), INT8_C( -78), INT8_C( 106), INT8_C( -87)), simde_mm_set_pi8(INT8_C( 95), INT8_C( 65), INT8_C( 46), INT8_C( 64), INT8_C( 39), INT8_C( -81), INT8_C( 83), INT8_C( -54)), simde_mm_set_pi8(INT8_C( -92), INT8_C( 106), INT8_C( -56), INT8_C( -87), INT8_C( -92), INT8_C( 0), INT8_C( -63), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C( 33), INT8_C(-110), INT8_C( 35), INT8_C( -83), INT8_C( -76), INT8_C( 59), INT8_C( 45), INT8_C( -42)), simde_mm_set_pi8(INT8_C( 73), INT8_C( -44), INT8_C( 97), INT8_C( -65), INT8_C( -88), INT8_C( -50), INT8_C( 19), INT8_C( -79)), simde_mm_set_pi8(INT8_C( 45), INT8_C( 0), INT8_C( 45), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -76), INT8_C( 0)) }, { simde_mm_set_pi8(INT8_C( -28), INT8_C( -77), INT8_C( 105), INT8_C( 105), INT8_C( 22), INT8_C( 1), INT8_C( 100), INT8_C( -15)), simde_mm_set_pi8(INT8_C( 115), INT8_C( -11), INT8_C( 20), INT8_C( 80), INT8_C( 40), INT8_C(-114), INT8_C( -49), INT8_C(-108)), simde_mm_set_pi8(INT8_C( 22), INT8_C( 0), INT8_C( 105), INT8_C( -15), INT8_C( -15), INT8_C( 0), INT8_C( 0), INT8_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_shuffle_pi8(test_vec[i].a, test_vec[i].b); simde_assert_m64_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_sign_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( 37), INT8_C( -69), INT8_C( -16), INT8_C( -99), INT8_C( 36), INT8_C( 10), INT8_C( 56), INT8_C( -63), INT8_C(-117), INT8_C( 66), INT8_C( 76), INT8_C( 31), INT8_C( 61), INT8_C( -83), INT8_C( 44), INT8_C( -66)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 30), INT8_C( 67), INT8_C( 75), INT8_C( 24), INT8_C( 56), INT8_C( 14), INT8_C( 75), INT8_C( -85), INT8_C(-103), INT8_C( 1), INT8_C( -80), INT8_C( -93), INT8_C( 37), INT8_C( -46), INT8_C( 95)), simde_mm_set_epi8(INT8_C( 0), INT8_C( -69), INT8_C( -16), INT8_C( -99), INT8_C( 36), INT8_C( 10), INT8_C( 56), INT8_C( -63), INT8_C( 117), INT8_C( -66), INT8_C( 76), INT8_C( -31), INT8_C( -61), INT8_C( -83), INT8_C( -44), INT8_C( -66)) }, { simde_mm_set_epi8(INT8_C( -16), INT8_C( 66), INT8_C( 6), INT8_C( 126), INT8_C( -12), INT8_C( -78), INT8_C(-121), INT8_C( -64), INT8_C( 49), INT8_C( -16), INT8_C( 89), INT8_C( -61), INT8_C( 60), INT8_C( 17), INT8_C( -94), INT8_C( 113)), simde_mm_set_epi8(INT8_C( 70), INT8_C( 27), INT8_C( 101), INT8_C( 119), INT8_C( -80), INT8_C( 103), INT8_C( -28), INT8_C( 79), INT8_C( 90), INT8_C(-127), INT8_C( -36), INT8_C( 57), INT8_C( -22), INT8_C( -74), INT8_C( 75), INT8_C( 106)), simde_mm_set_epi8(INT8_C( -16), INT8_C( 66), INT8_C( 6), INT8_C( 126), INT8_C( 12), INT8_C( -78), INT8_C( 121), INT8_C( -64), INT8_C( 49), INT8_C( 16), INT8_C( -89), INT8_C( -61), INT8_C( -60), INT8_C( -17), INT8_C( -94), INT8_C( 113)) }, { simde_mm_set_epi8(INT8_C( 99), INT8_C( 38), INT8_C(-110), INT8_C( 26), INT8_C( 106), INT8_C( 50), INT8_C( -36), INT8_C(-109), INT8_C( -69), INT8_C( -52), INT8_C( 61), INT8_C( -24), INT8_C( -63), INT8_C( 96), INT8_C( 45), INT8_C( 113)), simde_mm_set_epi8(INT8_C( 91), INT8_C( -69), INT8_C( 13), INT8_C( 48), INT8_C( -63), INT8_C( -35), INT8_C( 91), INT8_C(-109), INT8_C( -12), INT8_C( -94), INT8_C( 121), INT8_C( -64), INT8_C( -56), INT8_C( -95), INT8_C( 123), INT8_C( -38)), simde_mm_set_epi8(INT8_C( 99), INT8_C( -38), INT8_C(-110), INT8_C( 26), INT8_C(-106), INT8_C( -50), INT8_C( -36), INT8_C( 109), INT8_C( 69), INT8_C( 52), INT8_C( 61), INT8_C( 24), INT8_C( 63), INT8_C( -96), INT8_C( 45), INT8_C(-113)) }, { simde_mm_set_epi8(INT8_C( -46), INT8_C( -25), INT8_C( -91), INT8_C( -54), INT8_C( 77), INT8_C( -42), INT8_C( -7), INT8_C( -4), INT8_C( -52), INT8_C( 81), INT8_C( -53), INT8_C( -30), INT8_C( 73), INT8_C( 25), INT8_C( -34), INT8_C( 101)), simde_mm_set_epi8(INT8_C( 122), INT8_C( -35), INT8_C( -5), INT8_C( -48), INT8_C( -70), INT8_C( 5), INT8_C( 5), INT8_C( -1), INT8_C( 84), INT8_C( 42), INT8_C( 8), INT8_C( 11), INT8_C( -3), INT8_C(-102), INT8_C( -19), INT8_C( 36)), simde_mm_set_epi8(INT8_C( -46), INT8_C( 25), INT8_C( 91), INT8_C( 54), INT8_C( -77), INT8_C( -42), INT8_C( -7), INT8_C( 4), INT8_C( -52), INT8_C( 81), INT8_C( -53), INT8_C( -30), INT8_C( -73), INT8_C( -25), INT8_C( 34), INT8_C( 101)) }, { simde_mm_set_epi8(INT8_C( 100), INT8_C(-100), INT8_C( 72), INT8_C( 21), INT8_C( 116), INT8_C( 44), INT8_C(-111), INT8_C( 65), INT8_C( -91), INT8_C( 30), INT8_C( -90), INT8_C( -7), INT8_C( -19), INT8_C( 82), INT8_C(-121), INT8_C(-102)), simde_mm_set_epi8(INT8_C( 52), INT8_C( 93), INT8_C(-112), INT8_C( -21), INT8_C( 14), INT8_C( -56), INT8_C( -64), INT8_C( 86), INT8_C( 73), INT8_C( 115), INT8_C(-120), INT8_C( -32), INT8_C( 80), INT8_C( -83), INT8_C( 50), INT8_C( 58)), simde_mm_set_epi8(INT8_C( 100), INT8_C(-100), INT8_C( -72), INT8_C( -21), INT8_C( 116), INT8_C( -44), INT8_C( 111), INT8_C( 65), INT8_C( -91), INT8_C( 30), INT8_C( 90), INT8_C( 7), INT8_C( -19), INT8_C( -82), INT8_C(-121), INT8_C(-102)) }, { simde_mm_set_epi8(INT8_C(-106), INT8_C( 24), INT8_C( -64), INT8_C(-116), INT8_C( 54), INT8_C( 12), INT8_C( 9), INT8_C( 21), INT8_C( -21), INT8_C( 44), INT8_C( -75), INT8_C( 4), INT8_C(-124), INT8_C( -51), INT8_C( -45), INT8_C( 84)), simde_mm_set_epi8(INT8_C( -44), INT8_C( -17), INT8_C( 49), INT8_C( 10), INT8_C( 41), INT8_C( -6), INT8_C( -23), INT8_C( -29), INT8_C( 18), INT8_C( -37), INT8_C( -56), INT8_C( -37), INT8_C( -49), INT8_C( 7), INT8_C( 101), INT8_C( -47)), simde_mm_set_epi8(INT8_C( 106), INT8_C( -24), INT8_C( -64), INT8_C(-116), INT8_C( 54), INT8_C( -12), INT8_C( -9), INT8_C( -21), INT8_C( -21), INT8_C( -44), INT8_C( 75), INT8_C( -4), INT8_C( 124), INT8_C( -51), INT8_C( -45), INT8_C( -84)) }, { simde_mm_set_epi8(INT8_C( 54), INT8_C( -94), INT8_C( 102), INT8_C( 77), INT8_C( 43), INT8_C( 70), INT8_C( -80), INT8_C( 96), INT8_C( -60), INT8_C( -75), INT8_C( 42), INT8_C( -31), INT8_C( -2), INT8_C( 121), INT8_C( 29), INT8_C( 54)), simde_mm_set_epi8(INT8_C( 44), INT8_C( 49), INT8_C( 46), INT8_C( 120), INT8_C( -14), INT8_C( 89), INT8_C( 3), INT8_C(-114), INT8_C( -54), INT8_C( -45), INT8_C( 113), INT8_C( -76), INT8_C( 25), INT8_C( -90), INT8_C( -10), INT8_C( 65)), simde_mm_set_epi8(INT8_C( 54), INT8_C( -94), INT8_C( 102), INT8_C( 77), INT8_C( -43), INT8_C( 70), INT8_C( -80), INT8_C( -96), INT8_C( 60), INT8_C( 75), INT8_C( 42), INT8_C( 31), INT8_C( -2), INT8_C(-121), INT8_C( -29), INT8_C( 54)) }, { simde_mm_set_epi8(INT8_C( -18), INT8_C( -38), INT8_C( -9), INT8_C( -44), INT8_C( -53), INT8_C( -14), INT8_C( -61), INT8_C( 59), INT8_C( 58), INT8_C( -68), INT8_C( -23), INT8_C( -51), INT8_C( 43), INT8_C( -1), INT8_C( -6), INT8_C( 20)), simde_mm_set_epi8(INT8_C( 9), INT8_C( -16), INT8_C( -71), INT8_C( -90), INT8_C( -92), INT8_C( 58), INT8_C( -15), INT8_C( 115), INT8_C( -11), INT8_C( 34), INT8_C( 122), INT8_C( -11), INT8_C( 46), INT8_C( -86), INT8_C( 51), INT8_C( -3)), simde_mm_set_epi8(INT8_C( -18), INT8_C( 38), INT8_C( 9), INT8_C( 44), INT8_C( 53), INT8_C( -14), INT8_C( 61), INT8_C( 59), INT8_C( -58), INT8_C( -68), INT8_C( -23), INT8_C( 51), INT8_C( 43), INT8_C( 1), INT8_C( -6), INT8_C( -20)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_sign_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_sign_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C(-15759), INT16_C( 25741), INT16_C( 3088), INT16_C(-12147), INT16_C( 11906), INT16_C( 15052), INT16_C( 20544), INT16_C(-28879)), simde_mm_set_epi16(INT16_C( 31434), INT16_C( 0), INT16_C( 15330), INT16_C(-15336), INT16_C(-27365), INT16_C( 7051), INT16_C(-15134), INT16_C( 30438)), simde_mm_set_epi16(INT16_C(-15759), INT16_C( 0), INT16_C( 3088), INT16_C( 12147), INT16_C(-11906), INT16_C( 15052), INT16_C(-20544), INT16_C(-28879)) }, { simde_mm_set_epi16(INT16_C( 25362), INT16_C( 8410), INT16_C( -5772), INT16_C( 11356), INT16_C( -5145), INT16_C( -528), INT16_C( -3800), INT16_C(-22538)), simde_mm_set_epi16(INT16_C( 28705), INT16_C( 22529), INT16_C(-24415), INT16_C( 10605), INT16_C( 8581), INT16_C( 348), INT16_C( 17147), INT16_C(-28679)), simde_mm_set_epi16(INT16_C( 25362), INT16_C( 8410), INT16_C( 5772), INT16_C( 11356), INT16_C( -5145), INT16_C( -528), INT16_C( -3800), INT16_C( 22538)) }, { simde_mm_set_epi16(INT16_C(-15793), INT16_C(-15692), INT16_C(-32639), INT16_C( 2140), INT16_C( -7199), INT16_C(-11564), INT16_C( 8190), INT16_C( 1872)), simde_mm_set_epi16(INT16_C(-14750), INT16_C( -5416), INT16_C( -2422), INT16_C(-28769), INT16_C( 5810), INT16_C( 4853), INT16_C(-22556), INT16_C( 2950)), simde_mm_set_epi16(INT16_C( 15793), INT16_C( 15692), INT16_C( 32639), INT16_C( -2140), INT16_C( -7199), INT16_C(-11564), INT16_C( -8190), INT16_C( 1872)) }, { simde_mm_set_epi16(INT16_C( -6949), INT16_C( 14948), INT16_C(-19128), INT16_C( 99), INT16_C( -9508), INT16_C( 29377), INT16_C( 26021), INT16_C( 21262)), simde_mm_set_epi16(INT16_C( 31478), INT16_C(-23157), INT16_C( 27993), INT16_C( 18819), INT16_C( 21708), INT16_C( 11006), INT16_C(-32250), INT16_C( 28821)), simde_mm_set_epi16(INT16_C( -6949), INT16_C(-14948), INT16_C(-19128), INT16_C( 99), INT16_C( -9508), INT16_C( 29377), INT16_C(-26021), INT16_C( 21262)) }, { simde_mm_set_epi16(INT16_C(-27259), INT16_C( 15633), INT16_C( 24307), INT16_C( 25640), INT16_C( 23376), INT16_C(-30654), INT16_C( 19896), INT16_C(-14888)), simde_mm_set_epi16(INT16_C(-26725), INT16_C( -6818), INT16_C( 478), INT16_C( 25662), INT16_C( 26003), INT16_C( 21963), INT16_C( 1012), INT16_C( 16019)), simde_mm_set_epi16(INT16_C( 27259), INT16_C(-15633), INT16_C( 24307), INT16_C( 25640), INT16_C( 23376), INT16_C(-30654), INT16_C( 19896), INT16_C(-14888)) }, { simde_mm_set_epi16(INT16_C(-18809), INT16_C( 2505), INT16_C(-14233), INT16_C( 26092), INT16_C( 30746), INT16_C( 286), INT16_C(-10480), INT16_C( 18834)), simde_mm_set_epi16(INT16_C( 32423), INT16_C(-20791), INT16_C( -741), INT16_C( 17070), INT16_C( -8670), INT16_C( 1759), INT16_C(-27846), INT16_C( 12891)), simde_mm_set_epi16(INT16_C(-18809), INT16_C( -2505), INT16_C( 14233), INT16_C( 26092), INT16_C(-30746), INT16_C( 286), INT16_C( 10480), INT16_C( 18834)) }, { simde_mm_set_epi16(INT16_C( 10084), INT16_C(-24493), INT16_C( 7465), INT16_C( 3573), INT16_C(-29669), INT16_C( -5452), INT16_C( 25244), INT16_C( 30808)), simde_mm_set_epi16(INT16_C( 7390), INT16_C( 4062), INT16_C( -2410), INT16_C(-18994), INT16_C( 4689), INT16_C( 1376), INT16_C(-23142), INT16_C( 31884)), simde_mm_set_epi16(INT16_C( 10084), INT16_C(-24493), INT16_C( -7465), INT16_C( -3573), INT16_C(-29669), INT16_C( -5452), INT16_C(-25244), INT16_C( 30808)) }, { simde_mm_set_epi16(INT16_C( 11692), INT16_C( 22876), INT16_C(-18223), INT16_C(-22058), INT16_C(-12080), INT16_C( 30075), INT16_C( 16936), INT16_C(-31252)), simde_mm_set_epi16(INT16_C( -4320), INT16_C(-15410), INT16_C( 21240), INT16_C(-12323), INT16_C( -6944), INT16_C(-29167), INT16_C( 21220), INT16_C( 24483)), simde_mm_set_epi16(INT16_C(-11692), INT16_C(-22876), INT16_C(-18223), INT16_C( 22058), INT16_C( 12080), INT16_C(-30075), INT16_C( 16936), INT16_C(-31252)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_sign_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_sign_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 633073821), INT32_C( 604649665), INT32_C(-1958589409), INT32_C( 1034759358)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 406326859), INT32_C(-1416035920), INT32_C(-1557802401)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 604649665), INT32_C( 1958589409), INT32_C(-1034759358)) }, { simde_mm_set_epi32(INT32_C( -264108418), INT32_C( -189626432), INT32_C( 837835203), INT32_C( 1007788657)), simde_mm_set_epi32(INT32_C( 1176200567), INT32_C(-1335368625), INT32_C( 1518459961), INT32_C( -357151894)), simde_mm_set_epi32(INT32_C( -264108418), INT32_C( 189626432), INT32_C( 837835203), INT32_C(-1007788657)) }, { simde_mm_set_epi32(INT32_C( 1663472154), INT32_C( 1781718163), INT32_C(-1144242712), INT32_C(-1050661519)), simde_mm_set_epi32(INT32_C( 1538985264), INT32_C(-1042457709), INT32_C( -190678592), INT32_C( -928941094)), simde_mm_set_epi32(INT32_C( 1663472154), INT32_C(-1781718163), INT32_C( 1144242712), INT32_C( 1050661519)) }, { simde_mm_set_epi32(INT32_C( -756570678), INT32_C( 1305934332), INT32_C( -867054622), INT32_C( 1226432101)), simde_mm_set_epi32(INT32_C( 2061368272), INT32_C(-1174075905), INT32_C( 1412040715), INT32_C( -40178396)), simde_mm_set_epi32(INT32_C( -756570678), INT32_C(-1305934332), INT32_C( -867054622), INT32_C(-1226432101)) }, { simde_mm_set_epi32(INT32_C( 1687963669), INT32_C( 1949077825), INT32_C(-1524717831), INT32_C( -313358438)), simde_mm_set_epi32(INT32_C( 878547179), INT32_C( 248037462), INT32_C( 1232308448), INT32_C( 1353527866)), simde_mm_set_epi32(INT32_C( 1687963669), INT32_C( 1949077825), INT32_C(-1524717831), INT32_C( -313358438)) }, { simde_mm_set_epi32(INT32_C(-1776762740), INT32_C( 906758421), INT32_C( -349391612), INT32_C(-2066885804)), simde_mm_set_epi32(INT32_C( -722521846), INT32_C( 704309731), INT32_C( 316393691), INT32_C( -821598767)), simde_mm_set_epi32(INT32_C( 1776762740), INT32_C( 906758421), INT32_C( -349391612), INT32_C( 2066885804)) }, { simde_mm_set_epi32(INT32_C( 916612685), INT32_C( 726052960), INT32_C( -994759967), INT32_C( -25617098)), simde_mm_set_epi32(INT32_C( 741420664), INT32_C( -229047410), INT32_C( -892112460), INT32_C( 430372417)), simde_mm_set_epi32(INT32_C( 916612685), INT32_C( -726052960), INT32_C( 994759967), INT32_C( -25617098)) }, { simde_mm_set_epi32(INT32_C( -287639596), INT32_C( -873282757), INT32_C( 985459149), INT32_C( 738195988)), simde_mm_set_epi32(INT32_C( 166771110), INT32_C(-1539640973), INT32_C( -182289675), INT32_C( 782906365)), simde_mm_set_epi32(INT32_C( -287639596), INT32_C( 873282757), INT32_C( -985459149), INT32_C( 738195988)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_sign_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_sign_pi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi8(INT8_C( 41), INT8_C( -70), INT8_C( 22), INT8_C( -60), INT8_C( -48), INT8_C( -81), INT8_C( 97), INT8_C(-114)), simde_mm_set_pi8(INT8_C( 0), INT8_C( 54), INT8_C( 116), INT8_C( 111), INT8_C( 3), INT8_C( 16), INT8_C(-115), INT8_C( 56)), simde_mm_set_pi8(INT8_C( 0), INT8_C( -70), INT8_C( 22), INT8_C( -60), INT8_C( -48), INT8_C( -81), INT8_C( -97), INT8_C(-114)) }, { simde_mm_set_pi8(INT8_C( -31), INT8_C( -22), INT8_C( 55), INT8_C(-115), INT8_C( -14), INT8_C( -2), INT8_C( 92), INT8_C( 11)), simde_mm_set_pi8(INT8_C( 67), INT8_C( -80), INT8_C( 19), INT8_C( -63), INT8_C( -35), INT8_C( -59), INT8_C( -4), INT8_C( 14)), simde_mm_set_pi8(INT8_C( -31), INT8_C( 22), INT8_C( 55), INT8_C( 115), INT8_C( 14), INT8_C( 2), INT8_C( -92), INT8_C( 11)) }, { simde_mm_set_pi8(INT8_C( 123), INT8_C( 72), INT8_C( 109), INT8_C( 92), INT8_C( -54), INT8_C( 74), INT8_C( 42), INT8_C( 113)), simde_mm_set_pi8(INT8_C( 49), INT8_C( -54), INT8_C( 108), INT8_C( 45), INT8_C( 63), INT8_C(-116), INT8_C( 46), INT8_C( 95)), simde_mm_set_pi8(INT8_C( 123), INT8_C( -72), INT8_C( 109), INT8_C( 92), INT8_C( -54), INT8_C( -74), INT8_C( 42), INT8_C( 113)) }, { simde_mm_set_pi8(INT8_C( 51), INT8_C( 95), INT8_C( 90), INT8_C( -33), INT8_C( 15), INT8_C(-121), INT8_C( -5), INT8_C( 54)), simde_mm_set_pi8(INT8_C( 88), INT8_C( 89), INT8_C( -32), INT8_C( 32), INT8_C( 7), INT8_C( 21), INT8_C( -69), INT8_C( 56)), simde_mm_set_pi8(INT8_C( 51), INT8_C( 95), INT8_C( -90), INT8_C( -33), INT8_C( 15), INT8_C(-121), INT8_C( 5), INT8_C( 54)) }, { simde_mm_set_pi8(INT8_C( 10), INT8_C( 79), INT8_C( -53), INT8_C( 15), INT8_C( 22), INT8_C( -53), INT8_C( -60), INT8_C( -88)), simde_mm_set_pi8(INT8_C( 40), INT8_C(-102), INT8_C( -67), INT8_C( 115), INT8_C( -77), INT8_C( -15), INT8_C( -36), INT8_C( -80)), simde_mm_set_pi8(INT8_C( 10), INT8_C( -79), INT8_C( 53), INT8_C( 15), INT8_C( -22), INT8_C( 53), INT8_C( 60), INT8_C( 88)) }, { simde_mm_set_pi8(INT8_C(-120), INT8_C(-122), INT8_C( -65), INT8_C( 22), INT8_C(-100), INT8_C( 44), INT8_C( -91), INT8_C( 42)), simde_mm_set_pi8(INT8_C( -87), INT8_C( 50), INT8_C( 14), INT8_C( 36), INT8_C( -95), INT8_C( 69), INT8_C( -70), INT8_C( 38)), simde_mm_set_pi8(INT8_C( 120), INT8_C(-122), INT8_C( -65), INT8_C( 22), INT8_C( 100), INT8_C( 44), INT8_C( 91), INT8_C( 42)) }, { simde_mm_set_pi8(INT8_C( 71), INT8_C( -24), INT8_C( 115), INT8_C( 90), INT8_C( 52), INT8_C( 52), INT8_C( -42), INT8_C( 119)), simde_mm_set_pi8(INT8_C( -78), INT8_C( -10), INT8_C( 31), INT8_C( 106), INT8_C( -76), INT8_C( -74), INT8_C( 82), INT8_C( 103)), simde_mm_set_pi8(INT8_C( -71), INT8_C( 24), INT8_C( 115), INT8_C( 90), INT8_C( -52), INT8_C( -52), INT8_C( -42), INT8_C( 119)) }, { simde_mm_set_pi8(INT8_C( -29), INT8_C( -55), INT8_C(-107), INT8_C( -94), INT8_C(-100), INT8_C(-105), INT8_C( 110), INT8_C( 49)), simde_mm_set_pi8(INT8_C( 116), INT8_C( -73), INT8_C(-114), INT8_C( -3), INT8_C( 58), INT8_C( 101), INT8_C(-111), INT8_C(-116)), simde_mm_set_pi8(INT8_C( -29), INT8_C( 55), INT8_C( 107), INT8_C( 94), INT8_C(-100), INT8_C(-105), INT8_C(-110), INT8_C( -49)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_sign_pi8(test_vec[i].a, test_vec[i].b); simde_assert_m64_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_sign_pi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi16(INT16_C( 10682), INT16_C( 5828), INT16_C(-12113), INT16_C( 24974)), simde_mm_set_pi16(INT16_C( 0), INT16_C( 29807), INT16_C( 784), INT16_C(-29384)), simde_mm_set_pi16(INT16_C( 0), INT16_C( 5828), INT16_C(-12113), INT16_C(-24974)) }, { simde_mm_set_pi16(INT16_C( -7702), INT16_C( 14221), INT16_C( -3330), INT16_C( 23563)), simde_mm_set_pi16(INT16_C( 17328), INT16_C( 5057), INT16_C( -8763), INT16_C( -1010)), simde_mm_set_pi16(INT16_C( -7702), INT16_C( 14221), INT16_C( 3330), INT16_C(-23563)) }, { simde_mm_set_pi16(INT16_C( 31560), INT16_C( 27996), INT16_C(-13750), INT16_C( 10865)), simde_mm_set_pi16(INT16_C( 12746), INT16_C( 27693), INT16_C( 16268), INT16_C( 11871)), simde_mm_set_pi16(INT16_C( 31560), INT16_C( 27996), INT16_C(-13750), INT16_C( 10865)) }, { simde_mm_set_pi16(INT16_C( 13151), INT16_C( 23263), INT16_C( 3975), INT16_C( -1226)), simde_mm_set_pi16(INT16_C( 22617), INT16_C( -8160), INT16_C( 1813), INT16_C(-17608)), simde_mm_set_pi16(INT16_C( 13151), INT16_C(-23263), INT16_C( 3975), INT16_C( 1226)) }, { simde_mm_set_pi16(INT16_C( 2639), INT16_C(-13553), INT16_C( 5835), INT16_C(-15192)), simde_mm_set_pi16(INT16_C( 10394), INT16_C(-17037), INT16_C(-19471), INT16_C( -9040)), simde_mm_set_pi16(INT16_C( 2639), INT16_C( 13553), INT16_C( -5835), INT16_C( 15192)) }, { simde_mm_set_pi16(INT16_C(-30586), INT16_C(-16618), INT16_C(-25556), INT16_C(-23254)), simde_mm_set_pi16(INT16_C(-22222), INT16_C( 3620), INT16_C(-24251), INT16_C(-17882)), simde_mm_set_pi16(INT16_C( 30586), INT16_C(-16618), INT16_C( 25556), INT16_C( 23254)) }, { simde_mm_set_pi16(INT16_C( 18408), INT16_C( 29530), INT16_C( 13364), INT16_C(-10633)), simde_mm_set_pi16(INT16_C(-19722), INT16_C( 8042), INT16_C(-19274), INT16_C( 21095)), simde_mm_set_pi16(INT16_C(-18408), INT16_C( 29530), INT16_C(-13364), INT16_C(-10633)) }, { simde_mm_set_pi16(INT16_C( -7223), INT16_C(-27230), INT16_C(-25449), INT16_C( 28209)), simde_mm_set_pi16(INT16_C( 29879), INT16_C(-28931), INT16_C( 14949), INT16_C(-28276)), simde_mm_set_pi16(INT16_C( -7223), INT16_C( 27230), INT16_C(-25449), INT16_C(-28209)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_sign_pi16(test_vec[i].a, test_vec[i].b); simde_assert_m64_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_sign_pi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m64 a; simde__m64 b; simde__m64 r; } test_vec[8] = { { simde_mm_set_pi32(INT32_C( 700061380), INT32_C( -793812594)), simde_mm_set_pi32(INT32_C( 0), INT32_C( 51416376)), simde_mm_set_pi32(INT32_C( 0), INT32_C( -793812594)) }, { simde_mm_set_pi32(INT32_C( -504744051), INT32_C( -218211317)), simde_mm_set_pi32(INT32_C( 1135612865), INT32_C( -574227442)), simde_mm_set_pi32(INT32_C( -504744051), INT32_C( 218211317)) }, { simde_mm_set_pi32(INT32_C( 2068344156), INT32_C( -901109135)), simde_mm_set_pi32(INT32_C( 835349549), INT32_C( 1066151519)), simde_mm_set_pi32(INT32_C( 2068344156), INT32_C( -901109135)) }, { simde_mm_set_pi32(INT32_C( 861887199), INT32_C( 260569910)), simde_mm_set_pi32(INT32_C( 1482285088), INT32_C( 118864696)), simde_mm_set_pi32(INT32_C( 861887199), INT32_C( 260569910)) }, { simde_mm_set_pi32(INT32_C( 173001487), INT32_C( 382452904)), simde_mm_set_pi32(INT32_C( 681229683), INT32_C(-1275994960)), simde_mm_set_pi32(INT32_C( 173001487), INT32_C( -382452904)) }, { simde_mm_set_pi32(INT32_C(-2004435178), INT32_C(-1674795734)), simde_mm_set_pi32(INT32_C(-1456337372), INT32_C(-1589265882)), simde_mm_set_pi32(INT32_C( 2004435178), INT32_C( 1674795734)) }, { simde_mm_set_pi32(INT32_C( 1206416218), INT32_C( 875878007)), simde_mm_set_pi32(INT32_C(-1292492950), INT32_C(-1263119769)), simde_mm_set_pi32(INT32_C(-1206416218), INT32_C( -875878007)) }, { simde_mm_set_pi32(INT32_C( -473328222), INT32_C(-1667797455)), simde_mm_set_pi32(INT32_C( 1958186749), INT32_C( 979734924)), simde_mm_set_pi32(INT32_C( -473328222), INT32_C(-1667797455)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m64 r = simde_mm_sign_pi32(test_vec[i].a, test_vec[i].b); simde_assert_m64_i32(r, ==, test_vec[i].r); } return 0; } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm_abs_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_abs_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_abs_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_abs_pi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_abs_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_abs_pi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_alignr_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_alignr_pi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_hadd_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_hadd_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_hadd_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_hadd_pi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_hadds_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_hadds_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_hsub_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_hsub_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_hsub_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_hsub_pi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_hsubs_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_hsubs_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maddubs_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maddubs_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mulhrs_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_mulhrs_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_shuffle_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_shuffle_pi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sign_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sign_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sign_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sign_pi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sign_pi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sign_pi32) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/svml.c000066400000000000000000121433321400333146700153640ustar00rootroot00000000000000/* Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ #define SIMDE_TESTS_CURRENT_ISAX svml #include #include static int test_simde_mm_acos_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( 0.35)), simde_mm_set_ps(SIMDE_FLOAT32_C( 1.76), SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( 2.42), SIMDE_FLOAT32_C( 1.21)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( 0.03)), simde_mm_set_ps(SIMDE_FLOAT32_C( 1.05), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 1.88), SIMDE_FLOAT32_C( 1.54)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 0.47)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 1.14), SIMDE_FLOAT32_C( 1.84), SIMDE_FLOAT32_C( 1.08)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( -0.69), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.57)), simde_mm_set_ps(SIMDE_FLOAT32_C( 2.32), SIMDE_FLOAT32_C( 2.33), SIMDE_FLOAT32_C( 1.49), SIMDE_FLOAT32_C( 0.96)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 0.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( 1.89), SIMDE_FLOAT32_C( 2.61), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.80)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -0.92)), simde_mm_set_ps(SIMDE_FLOAT32_C( 2.03), SIMDE_FLOAT32_C( 1.54), SIMDE_FLOAT32_C( 1.96), SIMDE_FLOAT32_C( 2.74)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( -0.66)), simde_mm_set_ps(SIMDE_FLOAT32_C( 1.31), SIMDE_FLOAT32_C( 1.78), SIMDE_FLOAT32_C( 2.94), SIMDE_FLOAT32_C( 2.29)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.69)), simde_mm_set_ps(SIMDE_FLOAT32_C( 1.39), SIMDE_FLOAT32_C( 2.04), SIMDE_FLOAT32_C( 1.34), SIMDE_FLOAT32_C( 0.81)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_acos_ps(test_vec[i].a); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_acos_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( 0.35)), simde_mm_set_pd(SIMDE_FLOAT64_C( 2.42), SIMDE_FLOAT64_C( 1.21)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( 0.04)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1.76), SIMDE_FLOAT64_C( 1.53)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( 0.03)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1.88), SIMDE_FLOAT64_C( 1.54)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 0.50), SIMDE_FLOAT64_C( 0.67)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1.05), SIMDE_FLOAT64_C( 0.84)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 0.47)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1.84), SIMDE_FLOAT64_C( 1.08)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 0.42)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.59), SIMDE_FLOAT64_C( 1.14)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.57)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1.49), SIMDE_FLOAT64_C( 0.96)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -0.68), SIMDE_FLOAT64_C( -0.69)), simde_mm_set_pd(SIMDE_FLOAT64_C( 2.32), SIMDE_FLOAT64_C( 2.33)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_acos_pd(test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_acos_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( 0.35)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 1.05), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 1.88), SIMDE_FLOAT32_C( 1.54), SIMDE_FLOAT32_C( 1.76), SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( 2.42), SIMDE_FLOAT32_C( 1.21)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( -0.69), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 0.47)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 2.32), SIMDE_FLOAT32_C( 2.33), SIMDE_FLOAT32_C( 1.49), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 1.14), SIMDE_FLOAT32_C( 1.84), SIMDE_FLOAT32_C( 1.08)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 0.70)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 2.03), SIMDE_FLOAT32_C( 1.54), SIMDE_FLOAT32_C( 1.96), SIMDE_FLOAT32_C( 2.74), SIMDE_FLOAT32_C( 1.89), SIMDE_FLOAT32_C( 2.61), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.80)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( -0.66)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 1.39), SIMDE_FLOAT32_C( 2.04), SIMDE_FLOAT32_C( 1.34), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 1.31), SIMDE_FLOAT32_C( 1.78), SIMDE_FLOAT32_C( 2.94), SIMDE_FLOAT32_C( 2.29)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.84)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 2.45), SIMDE_FLOAT32_C( 1.12), SIMDE_FLOAT32_C( 2.19), SIMDE_FLOAT32_C( 1.18), SIMDE_FLOAT32_C( 2.45), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 1.54), SIMDE_FLOAT32_C( 0.57)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.39), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( -0.26), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( -0.03)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 1.97), SIMDE_FLOAT32_C( 1.16), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( 1.22), SIMDE_FLOAT32_C( 1.01), SIMDE_FLOAT32_C( 1.83), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 1.60)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.20), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( 0.40)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 1.77), SIMDE_FLOAT32_C( 1.65), SIMDE_FLOAT32_C( 1.22), SIMDE_FLOAT32_C( 2.79), SIMDE_FLOAT32_C( 2.42), SIMDE_FLOAT32_C( 2.45), SIMDE_FLOAT32_C( 2.15), SIMDE_FLOAT32_C( 1.16)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.25)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 1.08), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 1.72), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 1.32)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_acos_ps(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_acos_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( 0.35)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 1.76), SIMDE_FLOAT64_C( 1.53), SIMDE_FLOAT64_C( 2.42), SIMDE_FLOAT64_C( 1.21)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.50), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( 0.03)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 1.05), SIMDE_FLOAT64_C( 0.84), SIMDE_FLOAT64_C( 1.88), SIMDE_FLOAT64_C( 1.54)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 0.47)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.59), SIMDE_FLOAT64_C( 1.14), SIMDE_FLOAT64_C( 1.84), SIMDE_FLOAT64_C( 1.08)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.68), SIMDE_FLOAT64_C( -0.69), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.57)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 2.32), SIMDE_FLOAT64_C( 2.33), SIMDE_FLOAT64_C( 1.49), SIMDE_FLOAT64_C( 0.96)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( -0.86), SIMDE_FLOAT64_C( -0.42), SIMDE_FLOAT64_C( 0.70)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 1.89), SIMDE_FLOAT64_C( 2.61), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 0.80)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.38), SIMDE_FLOAT64_C( -0.92)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 2.03), SIMDE_FLOAT64_C( 1.54), SIMDE_FLOAT64_C( 1.96), SIMDE_FLOAT64_C( 2.74)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.26), SIMDE_FLOAT64_C( -0.21), SIMDE_FLOAT64_C( -0.98), SIMDE_FLOAT64_C( -0.66)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 1.31), SIMDE_FLOAT64_C( 1.78), SIMDE_FLOAT64_C( 2.94), SIMDE_FLOAT64_C( 2.29)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 0.69)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 1.39), SIMDE_FLOAT64_C( 2.04), SIMDE_FLOAT64_C( 1.34), SIMDE_FLOAT64_C( 0.81)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_acos_pd(test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_acos_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( -0.69), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( 0.35)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 2.32), SIMDE_FLOAT32_C( 2.33), SIMDE_FLOAT32_C( 1.49), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 1.14), SIMDE_FLOAT32_C( 1.84), SIMDE_FLOAT32_C( 1.08), SIMDE_FLOAT32_C( 1.05), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 1.88), SIMDE_FLOAT32_C( 1.54), SIMDE_FLOAT32_C( 1.76), SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( 2.42), SIMDE_FLOAT32_C( 1.21)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( -0.66), SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 0.70)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.39), SIMDE_FLOAT32_C( 2.04), SIMDE_FLOAT32_C( 1.34), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 1.31), SIMDE_FLOAT32_C( 1.78), SIMDE_FLOAT32_C( 2.94), SIMDE_FLOAT32_C( 2.29), SIMDE_FLOAT32_C( 2.03), SIMDE_FLOAT32_C( 1.54), SIMDE_FLOAT32_C( 1.96), SIMDE_FLOAT32_C( 2.74), SIMDE_FLOAT32_C( 1.89), SIMDE_FLOAT32_C( 2.61), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.80)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.39), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( -0.26), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.84)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.97), SIMDE_FLOAT32_C( 1.16), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( 1.22), SIMDE_FLOAT32_C( 1.01), SIMDE_FLOAT32_C( 1.83), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 1.60), SIMDE_FLOAT32_C( 2.45), SIMDE_FLOAT32_C( 1.12), SIMDE_FLOAT32_C( 2.19), SIMDE_FLOAT32_C( 1.18), SIMDE_FLOAT32_C( 2.45), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 1.54), SIMDE_FLOAT32_C( 0.57)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( -0.20), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( 0.40)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.08), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 1.72), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 1.32), SIMDE_FLOAT32_C( 1.77), SIMDE_FLOAT32_C( 1.65), SIMDE_FLOAT32_C( 1.22), SIMDE_FLOAT32_C( 2.79), SIMDE_FLOAT32_C( 2.42), SIMDE_FLOAT32_C( 2.45), SIMDE_FLOAT32_C( 2.15), SIMDE_FLOAT32_C( 1.16)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -0.87), SIMDE_FLOAT32_C( -0.80), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( -0.82), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( -0.17)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.75), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( 1.25), SIMDE_FLOAT32_C( 1.22), SIMDE_FLOAT32_C( 2.63), SIMDE_FLOAT32_C( 2.50), SIMDE_FLOAT32_C( 1.91), SIMDE_FLOAT32_C( 2.13), SIMDE_FLOAT32_C( 1.76), SIMDE_FLOAT32_C( 2.53), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( 1.64), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.45), SIMDE_FLOAT32_C( 1.74)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( -0.59), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( -0.91), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( -0.74)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 1.59), SIMDE_FLOAT32_C( 2.20), SIMDE_FLOAT32_C( 2.04), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 1.43), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 2.71), SIMDE_FLOAT32_C( 1.23), SIMDE_FLOAT32_C( 1.47), SIMDE_FLOAT32_C( 1.09), SIMDE_FLOAT32_C( 2.40)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.10)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 2.45), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 2.18), SIMDE_FLOAT32_C( 3.14), SIMDE_FLOAT32_C( 1.92), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 1.28), SIMDE_FLOAT32_C( 2.45), SIMDE_FLOAT32_C( 2.19), SIMDE_FLOAT32_C( 1.64), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 2.43), SIMDE_FLOAT32_C( 1.14), SIMDE_FLOAT32_C( 1.54), SIMDE_FLOAT32_C( 1.67)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( -0.70)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 2.03), SIMDE_FLOAT32_C( 1.94), SIMDE_FLOAT32_C( 2.42), SIMDE_FLOAT32_C( 1.60), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 1.56), SIMDE_FLOAT32_C( 1.91), SIMDE_FLOAT32_C( 1.70), SIMDE_FLOAT32_C( 1.75), SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( 1.17), SIMDE_FLOAT32_C( 1.56), SIMDE_FLOAT32_C( 1.88), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 2.35)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_acos_ps(test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_acos_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 src; simde__mmask16 k; simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.66), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( -0.69), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.35)), UINT16_C(41466), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( -0.75)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.39), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 1.31), SIMDE_FLOAT32_C( -0.66), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.32), SIMDE_FLOAT32_C( 1.49), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 1.84), SIMDE_FLOAT32_C( 1.05), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 1.76), SIMDE_FLOAT32_C( 0.35)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( -0.20), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( -0.39), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.03)), UINT16_C(36797), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.17), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -0.26), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.99)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.74), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 1.32), SIMDE_FLOAT32_C( 1.65), SIMDE_FLOAT32_C( 2.79), SIMDE_FLOAT32_C( 2.45), SIMDE_FLOAT32_C( 1.16), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 1.22), SIMDE_FLOAT32_C( 1.83), SIMDE_FLOAT32_C( 1.60), SIMDE_FLOAT32_C( 1.12), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.14)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( -0.59), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( -0.91), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -0.80), SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( -0.82), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.54)), UINT16_C(16804), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( -0.87), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( -0.07)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( -0.59), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( -0.91), SIMDE_FLOAT32_C( 1.23), SIMDE_FLOAT32_C( 1.09), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 1.25), SIMDE_FLOAT32_C( -0.80), SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( 1.76), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.54)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( -0.76)), UINT16_C( 2107), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( -0.20), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( -0.70), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( 0.98)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 1.56), SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 3.14), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( 1.64), SIMDE_FLOAT32_C( 0.20)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( 0.44)), UINT16_C(22274), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( -0.72), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( 0.24), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( -0.62), SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( 0.48)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( 1.41), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 1.13), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 1.46), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( 2.79), SIMDE_FLOAT32_C( 0.44)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( -0.81), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( -0.29), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.32), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( -0.63), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.43)), UINT16_C(27396), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.96), SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.82), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( -0.72), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -0.49)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 1.99), SIMDE_FLOAT32_C( 2.40), SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 2.53), SIMDE_FLOAT32_C( 1.41), SIMDE_FLOAT32_C( -0.29), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.32), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( -0.63), SIMDE_FLOAT32_C( 1.30), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.43)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( -0.97), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( -0.71), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( -0.89)), UINT16_C( 953), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( -0.46), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( -0.04), SIMDE_FLOAT32_C( -0.25), SIMDE_FLOAT32_C( -0.05), SIMDE_FLOAT32_C( 0.09)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( -0.97), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 2.05), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 1.50), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 1.45), SIMDE_FLOAT32_C( 2.19), SIMDE_FLOAT32_C( 1.61), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 1.48)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( -0.49), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( -0.82), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.93), SIMDE_FLOAT32_C( -0.73), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( -0.20)), UINT16_C(12713), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( -0.25), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 0.85)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 1.93), SIMDE_FLOAT32_C( 1.51), SIMDE_FLOAT32_C( -0.93), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.55)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mask_acos_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_acos_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.50), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( 0.35)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.05), SIMDE_FLOAT64_C( 0.84), SIMDE_FLOAT64_C( 1.88), SIMDE_FLOAT64_C( 1.54), SIMDE_FLOAT64_C( 1.76), SIMDE_FLOAT64_C( 1.53), SIMDE_FLOAT64_C( 2.42), SIMDE_FLOAT64_C( 1.21)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.68), SIMDE_FLOAT64_C( -0.69), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 0.47)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.32), SIMDE_FLOAT64_C( 2.33), SIMDE_FLOAT64_C( 1.49), SIMDE_FLOAT64_C( 0.96), SIMDE_FLOAT64_C( 0.59), SIMDE_FLOAT64_C( 1.14), SIMDE_FLOAT64_C( 1.84), SIMDE_FLOAT64_C( 1.08)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.38), SIMDE_FLOAT64_C( -0.92), SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( -0.86), SIMDE_FLOAT64_C( -0.42), SIMDE_FLOAT64_C( 0.70)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.03), SIMDE_FLOAT64_C( 1.54), SIMDE_FLOAT64_C( 1.96), SIMDE_FLOAT64_C( 2.74), SIMDE_FLOAT64_C( 1.89), SIMDE_FLOAT64_C( 2.61), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 0.80)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( 0.26), SIMDE_FLOAT64_C( -0.21), SIMDE_FLOAT64_C( -0.98), SIMDE_FLOAT64_C( -0.66)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.39), SIMDE_FLOAT64_C( 2.04), SIMDE_FLOAT64_C( 1.34), SIMDE_FLOAT64_C( 0.81), SIMDE_FLOAT64_C( 1.31), SIMDE_FLOAT64_C( 1.78), SIMDE_FLOAT64_C( 2.94), SIMDE_FLOAT64_C( 2.29)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( -0.58), SIMDE_FLOAT64_C( 0.38), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.84)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.45), SIMDE_FLOAT64_C( 1.12), SIMDE_FLOAT64_C( 2.19), SIMDE_FLOAT64_C( 1.18), SIMDE_FLOAT64_C( 2.45), SIMDE_FLOAT64_C( 0.14), SIMDE_FLOAT64_C( 1.54), SIMDE_FLOAT64_C( 0.57)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.39), SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( 0.66), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( 0.53), SIMDE_FLOAT64_C( -0.26), SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( -0.03)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.97), SIMDE_FLOAT64_C( 1.16), SIMDE_FLOAT64_C( 0.85), SIMDE_FLOAT64_C( 1.22), SIMDE_FLOAT64_C( 1.01), SIMDE_FLOAT64_C( 1.83), SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( 1.60)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.20), SIMDE_FLOAT64_C( -0.08), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( -0.94), SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( -0.55), SIMDE_FLOAT64_C( 0.40)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.77), SIMDE_FLOAT64_C( 1.65), SIMDE_FLOAT64_C( 1.22), SIMDE_FLOAT64_C( 2.79), SIMDE_FLOAT64_C( 2.42), SIMDE_FLOAT64_C( 2.45), SIMDE_FLOAT64_C( 2.15), SIMDE_FLOAT64_C( 1.16)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( 0.82), SIMDE_FLOAT64_C( 0.91), SIMDE_FLOAT64_C( 0.60), SIMDE_FLOAT64_C( 0.79), SIMDE_FLOAT64_C( 0.25)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.08), SIMDE_FLOAT64_C( 0.82), SIMDE_FLOAT64_C( 1.72), SIMDE_FLOAT64_C( 0.61), SIMDE_FLOAT64_C( 0.43), SIMDE_FLOAT64_C( 0.93), SIMDE_FLOAT64_C( 0.66), SIMDE_FLOAT64_C( 1.32)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_acos_pd(test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_acos_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d src; simde__mmask8 k; simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.69), SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.35)), UINT8_C(139), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.68), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 0.50), SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( -0.75)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.32), SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( 1.05), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 1.76), SIMDE_FLOAT64_C( 2.42)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 0.26), SIMDE_FLOAT64_C( -0.98), SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( -0.38), SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( -0.42)), UINT8_C(229), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.84), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( -0.21), SIMDE_FLOAT64_C( -0.66), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.92), SIMDE_FLOAT64_C( -0.86)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( 2.04), SIMDE_FLOAT64_C( 0.81), SIMDE_FLOAT64_C( -0.98), SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( 1.54), SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( 2.61)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( -0.26), SIMDE_FLOAT64_C( -0.03), SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( 0.38), SIMDE_FLOAT64_C( 0.99)), UINT8_C(253), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.55), SIMDE_FLOAT64_C( -0.39), SIMDE_FLOAT64_C( 0.66), SIMDE_FLOAT64_C( 0.53), SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( -0.58), SIMDE_FLOAT64_C( -0.77)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.15), SIMDE_FLOAT64_C( 1.97), SIMDE_FLOAT64_C( 0.85), SIMDE_FLOAT64_C( 1.01), SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( 2.45), SIMDE_FLOAT64_C( 0.38), SIMDE_FLOAT64_C( 2.45)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.12), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( 0.91), SIMDE_FLOAT64_C( 0.79), SIMDE_FLOAT64_C( -0.20), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( -0.75)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.54), SIMDE_FLOAT64_C( -0.17), SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( 0.82), SIMDE_FLOAT64_C( 0.60), SIMDE_FLOAT64_C( 0.25), SIMDE_FLOAT64_C( -0.08), SIMDE_FLOAT64_C( -0.94)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.12), SIMDE_FLOAT64_C( 1.74), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( 0.61), SIMDE_FLOAT64_C( 0.93), SIMDE_FLOAT64_C( 1.32), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( 2.79)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( -0.74), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( -0.80), SIMDE_FLOAT64_C( -0.53), SIMDE_FLOAT64_C( -0.82), SIMDE_FLOAT64_C( 0.66)), UINT8_C(145), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.33), SIMDE_FLOAT64_C( 0.46), SIMDE_FLOAT64_C( -0.18), SIMDE_FLOAT64_C( 0.32), SIMDE_FLOAT64_C( -0.87), SIMDE_FLOAT64_C( -0.33), SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( 0.56)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.23), SIMDE_FLOAT64_C( -0.74), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 1.25), SIMDE_FLOAT64_C( -0.80), SIMDE_FLOAT64_C( -0.53), SIMDE_FLOAT64_C( -0.82), SIMDE_FLOAT64_C( 0.98)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.76), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( -0.02), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( 0.51), SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 0.98)), UINT8_C( 75), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.98), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( -0.10), SIMDE_FLOAT64_C( 0.84), SIMDE_FLOAT64_C( -0.59), SIMDE_FLOAT64_C( 0.73), SIMDE_FLOAT64_C( 0.62), SIMDE_FLOAT64_C( 0.14)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.76), SIMDE_FLOAT64_C( 1.14), SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( -0.02), SIMDE_FLOAT64_C( 2.20), SIMDE_FLOAT64_C( 0.51), SIMDE_FLOAT64_C( 0.90), SIMDE_FLOAT64_C( 1.43)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.39), SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( -0.70), SIMDE_FLOAT64_C( 0.82), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( -0.07)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.51), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( -0.57), SIMDE_FLOAT64_C( -0.34), SIMDE_FLOAT64_C( 0.29), SIMDE_FLOAT64_C( -0.58)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.39), SIMDE_FLOAT64_C( 1.56), SIMDE_FLOAT64_C( -0.70), SIMDE_FLOAT64_C( 2.45), SIMDE_FLOAT64_C( 2.18), SIMDE_FLOAT64_C( 1.92), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( 2.19)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.48), SIMDE_FLOAT64_C( 0.94), SIMDE_FLOAT64_C( -0.35), SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( 0.93), SIMDE_FLOAT64_C( -0.33), SIMDE_FLOAT64_C( -0.18)), UINT8_C(213), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.78), SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( 0.90), SIMDE_FLOAT64_C( -0.20), SIMDE_FLOAT64_C( -0.36), SIMDE_FLOAT64_C( -0.03), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( -0.13)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.47), SIMDE_FLOAT64_C( 1.12), SIMDE_FLOAT64_C( -0.35), SIMDE_FLOAT64_C( 1.77), SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( 1.60), SIMDE_FLOAT64_C( -0.33), SIMDE_FLOAT64_C( 1.70)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mask_acos_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_acosh_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 3.69), SIMDE_FLOAT32_C( 4.43), SIMDE_FLOAT32_C( 1.81), SIMDE_FLOAT32_C( 5.44)), simde_mm_set_ps(SIMDE_FLOAT32_C( 1.98), SIMDE_FLOAT32_C( 2.17), SIMDE_FLOAT32_C( 1.20), SIMDE_FLOAT32_C( 2.38)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 5.94), SIMDE_FLOAT32_C( 6.51), SIMDE_FLOAT32_C( 3.32), SIMDE_FLOAT32_C( 4.41)), simde_mm_set_ps(SIMDE_FLOAT32_C( 2.47), SIMDE_FLOAT32_C( 2.56), SIMDE_FLOAT32_C( 1.87), SIMDE_FLOAT32_C( 2.16)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 7.02), SIMDE_FLOAT32_C( 5.69), SIMDE_FLOAT32_C( 3.41), SIMDE_FLOAT32_C( 5.84)), simde_mm_set_ps(SIMDE_FLOAT32_C( 2.64), SIMDE_FLOAT32_C( 2.42), SIMDE_FLOAT32_C( 1.90), SIMDE_FLOAT32_C( 2.45)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 2.06), SIMDE_FLOAT32_C( 2.04), SIMDE_FLOAT32_C( 4.58), SIMDE_FLOAT32_C( 6.19)), simde_mm_set_ps(SIMDE_FLOAT32_C( 1.35), SIMDE_FLOAT32_C( 1.34), SIMDE_FLOAT32_C( 2.20), SIMDE_FLOAT32_C( 2.51)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 3.29), SIMDE_FLOAT32_C( 1.46), SIMDE_FLOAT32_C( 2.92), SIMDE_FLOAT32_C( 6.60)), simde_mm_set_ps(SIMDE_FLOAT32_C( 1.86), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 1.73), SIMDE_FLOAT32_C( 2.57)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 2.83), SIMDE_FLOAT32_C( 4.39), SIMDE_FLOAT32_C( 3.03), SIMDE_FLOAT32_C( 1.25)), simde_mm_set_ps(SIMDE_FLOAT32_C( 1.70), SIMDE_FLOAT32_C( 2.16), SIMDE_FLOAT32_C( 1.77), SIMDE_FLOAT32_C( 0.69)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 5.16), SIMDE_FLOAT32_C( 3.60), SIMDE_FLOAT32_C( 1.08), SIMDE_FLOAT32_C( 2.12)), simde_mm_set_ps(SIMDE_FLOAT32_C( 2.32), SIMDE_FLOAT32_C( 1.95), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 1.38)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 4.89), SIMDE_FLOAT32_C( 2.81), SIMDE_FLOAT32_C( 5.07), SIMDE_FLOAT32_C( 6.57)), simde_mm_set_ps(SIMDE_FLOAT32_C( 2.27), SIMDE_FLOAT32_C( 1.69), SIMDE_FLOAT32_C( 2.31), SIMDE_FLOAT32_C( 2.57)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_acosh_ps(test_vec[i].a); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_acosh_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 1.81), SIMDE_FLOAT64_C( 5.44)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1.20), SIMDE_FLOAT64_C( 2.38)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 3.69), SIMDE_FLOAT64_C( 4.43)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1.98), SIMDE_FLOAT64_C( 2.17)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 3.32), SIMDE_FLOAT64_C( 4.41)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1.87), SIMDE_FLOAT64_C( 2.16)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 5.94), SIMDE_FLOAT64_C( 6.51)), simde_mm_set_pd(SIMDE_FLOAT64_C( 2.47), SIMDE_FLOAT64_C( 2.56)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 3.41), SIMDE_FLOAT64_C( 5.84)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1.90), SIMDE_FLOAT64_C( 2.45)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 7.02), SIMDE_FLOAT64_C( 5.69)), simde_mm_set_pd(SIMDE_FLOAT64_C( 2.64), SIMDE_FLOAT64_C( 2.42)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 4.58), SIMDE_FLOAT64_C( 6.19)), simde_mm_set_pd(SIMDE_FLOAT64_C( 2.20), SIMDE_FLOAT64_C( 2.51)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 2.06), SIMDE_FLOAT64_C( 2.04)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1.35), SIMDE_FLOAT64_C( 1.34)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_acosh_pd(test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_acosh_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 5.94), SIMDE_FLOAT32_C( 6.51), SIMDE_FLOAT32_C( 3.32), SIMDE_FLOAT32_C( 4.41), SIMDE_FLOAT32_C( 3.69), SIMDE_FLOAT32_C( 4.43), SIMDE_FLOAT32_C( 1.81), SIMDE_FLOAT32_C( 5.44)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 2.47), SIMDE_FLOAT32_C( 2.56), SIMDE_FLOAT32_C( 1.87), SIMDE_FLOAT32_C( 2.16), SIMDE_FLOAT32_C( 1.98), SIMDE_FLOAT32_C( 2.17), SIMDE_FLOAT32_C( 1.20), SIMDE_FLOAT32_C( 2.38)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 2.06), SIMDE_FLOAT32_C( 2.04), SIMDE_FLOAT32_C( 4.58), SIMDE_FLOAT32_C( 6.19), SIMDE_FLOAT32_C( 7.02), SIMDE_FLOAT32_C( 5.69), SIMDE_FLOAT32_C( 3.41), SIMDE_FLOAT32_C( 5.84)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 1.35), SIMDE_FLOAT32_C( 1.34), SIMDE_FLOAT32_C( 2.20), SIMDE_FLOAT32_C( 2.51), SIMDE_FLOAT32_C( 2.64), SIMDE_FLOAT32_C( 2.42), SIMDE_FLOAT32_C( 1.90), SIMDE_FLOAT32_C( 2.45)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 2.83), SIMDE_FLOAT32_C( 4.39), SIMDE_FLOAT32_C( 3.03), SIMDE_FLOAT32_C( 1.25), SIMDE_FLOAT32_C( 3.29), SIMDE_FLOAT32_C( 1.46), SIMDE_FLOAT32_C( 2.92), SIMDE_FLOAT32_C( 6.60)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 1.70), SIMDE_FLOAT32_C( 2.16), SIMDE_FLOAT32_C( 1.77), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 1.86), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 1.73), SIMDE_FLOAT32_C( 2.57)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 4.89), SIMDE_FLOAT32_C( 2.81), SIMDE_FLOAT32_C( 5.07), SIMDE_FLOAT32_C( 6.57), SIMDE_FLOAT32_C( 5.16), SIMDE_FLOAT32_C( 3.60), SIMDE_FLOAT32_C( 1.08), SIMDE_FLOAT32_C( 2.12)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 2.27), SIMDE_FLOAT32_C( 1.69), SIMDE_FLOAT32_C( 2.31), SIMDE_FLOAT32_C( 2.57), SIMDE_FLOAT32_C( 2.32), SIMDE_FLOAT32_C( 1.95), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 1.38)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 1.76), SIMDE_FLOAT32_C( 5.76), SIMDE_FLOAT32_C( 2.37), SIMDE_FLOAT32_C( 5.56), SIMDE_FLOAT32_C( 1.76), SIMDE_FLOAT32_C( 7.58), SIMDE_FLOAT32_C( 4.39), SIMDE_FLOAT32_C( 7.08)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 1.17), SIMDE_FLOAT32_C( 2.44), SIMDE_FLOAT32_C( 1.51), SIMDE_FLOAT32_C( 2.40), SIMDE_FLOAT32_C( 1.17), SIMDE_FLOAT32_C( 2.71), SIMDE_FLOAT32_C( 2.16), SIMDE_FLOAT32_C( 2.65)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 3.02), SIMDE_FLOAT32_C( 5.61), SIMDE_FLOAT32_C( 6.46), SIMDE_FLOAT32_C( 5.42), SIMDE_FLOAT32_C( 6.06), SIMDE_FLOAT32_C( 3.43), SIMDE_FLOAT32_C( 6.88), SIMDE_FLOAT32_C( 4.20)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 1.77), SIMDE_FLOAT32_C( 2.41), SIMDE_FLOAT32_C( 2.55), SIMDE_FLOAT32_C( 2.37), SIMDE_FLOAT32_C( 2.49), SIMDE_FLOAT32_C( 1.90), SIMDE_FLOAT32_C( 2.62), SIMDE_FLOAT32_C( 2.11)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 3.63), SIMDE_FLOAT32_C( 4.03), SIMDE_FLOAT32_C( 5.41), SIMDE_FLOAT32_C( 1.18), SIMDE_FLOAT32_C( 1.83), SIMDE_FLOAT32_C( 1.77), SIMDE_FLOAT32_C( 2.47), SIMDE_FLOAT32_C( 5.62)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 1.96), SIMDE_FLOAT32_C( 2.07), SIMDE_FLOAT32_C( 2.37), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 1.21), SIMDE_FLOAT32_C( 1.17), SIMDE_FLOAT32_C( 1.55), SIMDE_FLOAT32_C( 2.41)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 5.85), SIMDE_FLOAT32_C( 6.54), SIMDE_FLOAT32_C( 3.81), SIMDE_FLOAT32_C( 7.00), SIMDE_FLOAT32_C( 7.30), SIMDE_FLOAT32_C( 6.28), SIMDE_FLOAT32_C( 6.91), SIMDE_FLOAT32_C( 5.14)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 2.45), SIMDE_FLOAT32_C( 2.57), SIMDE_FLOAT32_C( 2.01), SIMDE_FLOAT32_C( 2.63), SIMDE_FLOAT32_C( 2.68), SIMDE_FLOAT32_C( 2.52), SIMDE_FLOAT32_C( 2.62), SIMDE_FLOAT32_C( 2.32)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_acosh_ps(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_acosh_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 3.69), SIMDE_FLOAT64_C( 4.43), SIMDE_FLOAT64_C( 1.81), SIMDE_FLOAT64_C( 5.44)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 1.98), SIMDE_FLOAT64_C( 2.17), SIMDE_FLOAT64_C( 1.20), SIMDE_FLOAT64_C( 2.38)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 5.94), SIMDE_FLOAT64_C( 6.51), SIMDE_FLOAT64_C( 3.32), SIMDE_FLOAT64_C( 4.41)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 2.47), SIMDE_FLOAT64_C( 2.56), SIMDE_FLOAT64_C( 1.87), SIMDE_FLOAT64_C( 2.16)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 7.02), SIMDE_FLOAT64_C( 5.69), SIMDE_FLOAT64_C( 3.41), SIMDE_FLOAT64_C( 5.84)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 2.64), SIMDE_FLOAT64_C( 2.42), SIMDE_FLOAT64_C( 1.90), SIMDE_FLOAT64_C( 2.45)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 2.06), SIMDE_FLOAT64_C( 2.04), SIMDE_FLOAT64_C( 4.58), SIMDE_FLOAT64_C( 6.19)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 1.35), SIMDE_FLOAT64_C( 1.34), SIMDE_FLOAT64_C( 2.20), SIMDE_FLOAT64_C( 2.51)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 3.29), SIMDE_FLOAT64_C( 1.46), SIMDE_FLOAT64_C( 2.92), SIMDE_FLOAT64_C( 6.60)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 1.86), SIMDE_FLOAT64_C( 0.93), SIMDE_FLOAT64_C( 1.73), SIMDE_FLOAT64_C( 2.57)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 2.83), SIMDE_FLOAT64_C( 4.39), SIMDE_FLOAT64_C( 3.03), SIMDE_FLOAT64_C( 1.25)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 1.70), SIMDE_FLOAT64_C( 2.16), SIMDE_FLOAT64_C( 1.77), SIMDE_FLOAT64_C( 0.69)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 5.16), SIMDE_FLOAT64_C( 3.60), SIMDE_FLOAT64_C( 1.08), SIMDE_FLOAT64_C( 2.12)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 2.32), SIMDE_FLOAT64_C( 1.95), SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( 1.38)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 4.89), SIMDE_FLOAT64_C( 2.81), SIMDE_FLOAT64_C( 5.07), SIMDE_FLOAT64_C( 6.57)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 2.27), SIMDE_FLOAT64_C( 1.69), SIMDE_FLOAT64_C( 2.31), SIMDE_FLOAT64_C( 2.57)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_acosh_pd(test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_acosh_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( 2.06), SIMDE_FLOAT32_C( 2.04), SIMDE_FLOAT32_C( 4.58), SIMDE_FLOAT32_C( 6.19), SIMDE_FLOAT32_C( 7.02), SIMDE_FLOAT32_C( 5.69), SIMDE_FLOAT32_C( 3.41), SIMDE_FLOAT32_C( 5.84), SIMDE_FLOAT32_C( 5.94), SIMDE_FLOAT32_C( 6.51), SIMDE_FLOAT32_C( 3.32), SIMDE_FLOAT32_C( 4.41), SIMDE_FLOAT32_C( 3.69), SIMDE_FLOAT32_C( 4.43), SIMDE_FLOAT32_C( 1.81), SIMDE_FLOAT32_C( 5.44)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.35), SIMDE_FLOAT32_C( 1.34), SIMDE_FLOAT32_C( 2.20), SIMDE_FLOAT32_C( 2.51), SIMDE_FLOAT32_C( 2.64), SIMDE_FLOAT32_C( 2.42), SIMDE_FLOAT32_C( 1.90), SIMDE_FLOAT32_C( 2.45), SIMDE_FLOAT32_C( 2.47), SIMDE_FLOAT32_C( 2.56), SIMDE_FLOAT32_C( 1.87), SIMDE_FLOAT32_C( 2.16), SIMDE_FLOAT32_C( 1.98), SIMDE_FLOAT32_C( 2.17), SIMDE_FLOAT32_C( 1.20), SIMDE_FLOAT32_C( 2.38)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 4.89), SIMDE_FLOAT32_C( 2.81), SIMDE_FLOAT32_C( 5.07), SIMDE_FLOAT32_C( 6.57), SIMDE_FLOAT32_C( 5.16), SIMDE_FLOAT32_C( 3.60), SIMDE_FLOAT32_C( 1.08), SIMDE_FLOAT32_C( 2.12), SIMDE_FLOAT32_C( 2.83), SIMDE_FLOAT32_C( 4.39), SIMDE_FLOAT32_C( 3.03), SIMDE_FLOAT32_C( 1.25), SIMDE_FLOAT32_C( 3.29), SIMDE_FLOAT32_C( 1.46), SIMDE_FLOAT32_C( 2.92), SIMDE_FLOAT32_C( 6.60)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 2.27), SIMDE_FLOAT32_C( 1.69), SIMDE_FLOAT32_C( 2.31), SIMDE_FLOAT32_C( 2.57), SIMDE_FLOAT32_C( 2.32), SIMDE_FLOAT32_C( 1.95), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 1.38), SIMDE_FLOAT32_C( 1.70), SIMDE_FLOAT32_C( 2.16), SIMDE_FLOAT32_C( 1.77), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 1.86), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 1.73), SIMDE_FLOAT32_C( 2.57)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 3.02), SIMDE_FLOAT32_C( 5.61), SIMDE_FLOAT32_C( 6.46), SIMDE_FLOAT32_C( 5.42), SIMDE_FLOAT32_C( 6.06), SIMDE_FLOAT32_C( 3.43), SIMDE_FLOAT32_C( 6.88), SIMDE_FLOAT32_C( 4.20), SIMDE_FLOAT32_C( 1.76), SIMDE_FLOAT32_C( 5.76), SIMDE_FLOAT32_C( 2.37), SIMDE_FLOAT32_C( 5.56), SIMDE_FLOAT32_C( 1.76), SIMDE_FLOAT32_C( 7.58), SIMDE_FLOAT32_C( 4.39), SIMDE_FLOAT32_C( 7.08)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.77), SIMDE_FLOAT32_C( 2.41), SIMDE_FLOAT32_C( 2.55), SIMDE_FLOAT32_C( 2.37), SIMDE_FLOAT32_C( 2.49), SIMDE_FLOAT32_C( 1.90), SIMDE_FLOAT32_C( 2.62), SIMDE_FLOAT32_C( 2.11), SIMDE_FLOAT32_C( 1.17), SIMDE_FLOAT32_C( 2.44), SIMDE_FLOAT32_C( 1.51), SIMDE_FLOAT32_C( 2.40), SIMDE_FLOAT32_C( 1.17), SIMDE_FLOAT32_C( 2.71), SIMDE_FLOAT32_C( 2.16), SIMDE_FLOAT32_C( 2.65)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 5.85), SIMDE_FLOAT32_C( 6.54), SIMDE_FLOAT32_C( 3.81), SIMDE_FLOAT32_C( 7.00), SIMDE_FLOAT32_C( 7.30), SIMDE_FLOAT32_C( 6.28), SIMDE_FLOAT32_C( 6.91), SIMDE_FLOAT32_C( 5.14), SIMDE_FLOAT32_C( 3.63), SIMDE_FLOAT32_C( 4.03), SIMDE_FLOAT32_C( 5.41), SIMDE_FLOAT32_C( 1.18), SIMDE_FLOAT32_C( 1.83), SIMDE_FLOAT32_C( 1.77), SIMDE_FLOAT32_C( 2.47), SIMDE_FLOAT32_C( 5.62)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 2.45), SIMDE_FLOAT32_C( 2.57), SIMDE_FLOAT32_C( 2.01), SIMDE_FLOAT32_C( 2.63), SIMDE_FLOAT32_C( 2.68), SIMDE_FLOAT32_C( 2.52), SIMDE_FLOAT32_C( 2.62), SIMDE_FLOAT32_C( 2.32), SIMDE_FLOAT32_C( 1.96), SIMDE_FLOAT32_C( 2.07), SIMDE_FLOAT32_C( 2.37), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 1.21), SIMDE_FLOAT32_C( 1.17), SIMDE_FLOAT32_C( 1.55), SIMDE_FLOAT32_C( 2.41)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 3.71), SIMDE_FLOAT32_C( 6.80), SIMDE_FLOAT32_C( 5.37), SIMDE_FLOAT32_C( 5.43), SIMDE_FLOAT32_C( 1.41), SIMDE_FLOAT32_C( 1.67), SIMDE_FLOAT32_C( 3.22), SIMDE_FLOAT32_C( 2.56), SIMDE_FLOAT32_C( 3.67), SIMDE_FLOAT32_C( 1.59), SIMDE_FLOAT32_C( 6.15), SIMDE_FLOAT32_C( 6.46), SIMDE_FLOAT32_C( 4.07), SIMDE_FLOAT32_C( 6.09), SIMDE_FLOAT32_C( 4.70), SIMDE_FLOAT32_C( 3.73)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.99), SIMDE_FLOAT32_C( 2.60), SIMDE_FLOAT32_C( 2.37), SIMDE_FLOAT32_C( 2.38), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 1.10), SIMDE_FLOAT32_C( 1.84), SIMDE_FLOAT32_C( 1.59), SIMDE_FLOAT32_C( 1.97), SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( 2.50), SIMDE_FLOAT32_C( 2.55), SIMDE_FLOAT32_C( 2.08), SIMDE_FLOAT32_C( 2.49), SIMDE_FLOAT32_C( 2.23), SIMDE_FLOAT32_C( 1.99)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 6.58), SIMDE_FLOAT32_C( 7.07), SIMDE_FLOAT32_C( 4.23), SIMDE_FLOAT32_C( 2.35), SIMDE_FLOAT32_C( 2.82), SIMDE_FLOAT32_C( 6.71), SIMDE_FLOAT32_C( 5.97), SIMDE_FLOAT32_C( 6.36), SIMDE_FLOAT32_C( 7.04), SIMDE_FLOAT32_C( 4.76), SIMDE_FLOAT32_C( 7.53), SIMDE_FLOAT32_C( 1.31), SIMDE_FLOAT32_C( 5.39), SIMDE_FLOAT32_C( 4.63), SIMDE_FLOAT32_C( 5.83), SIMDE_FLOAT32_C( 1.86)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 2.57), SIMDE_FLOAT32_C( 2.64), SIMDE_FLOAT32_C( 2.12), SIMDE_FLOAT32_C( 1.50), SIMDE_FLOAT32_C( 1.70), SIMDE_FLOAT32_C( 2.59), SIMDE_FLOAT32_C( 2.47), SIMDE_FLOAT32_C( 2.54), SIMDE_FLOAT32_C( 2.64), SIMDE_FLOAT32_C( 2.24), SIMDE_FLOAT32_C( 2.71), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( 2.37), SIMDE_FLOAT32_C( 2.21), SIMDE_FLOAT32_C( 2.45), SIMDE_FLOAT32_C( 1.23)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.76), SIMDE_FLOAT32_C( 7.01), SIMDE_FLOAT32_C( 2.41), SIMDE_FLOAT32_C( 1.01), SIMDE_FLOAT32_C( 3.19), SIMDE_FLOAT32_C( 7.35), SIMDE_FLOAT32_C( 5.27), SIMDE_FLOAT32_C( 1.77), SIMDE_FLOAT32_C( 2.40), SIMDE_FLOAT32_C( 4.08), SIMDE_FLOAT32_C( 6.64), SIMDE_FLOAT32_C( 7.53), SIMDE_FLOAT32_C( 1.80), SIMDE_FLOAT32_C( 5.70), SIMDE_FLOAT32_C( 4.39), SIMDE_FLOAT32_C( 3.99)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.17), SIMDE_FLOAT32_C( 2.64), SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 1.83), SIMDE_FLOAT32_C( 2.68), SIMDE_FLOAT32_C( 2.35), SIMDE_FLOAT32_C( 1.17), SIMDE_FLOAT32_C( 1.52), SIMDE_FLOAT32_C( 2.08), SIMDE_FLOAT32_C( 2.58), SIMDE_FLOAT32_C( 2.71), SIMDE_FLOAT32_C( 1.19), SIMDE_FLOAT32_C( 2.43), SIMDE_FLOAT32_C( 2.16), SIMDE_FLOAT32_C( 2.06)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 2.85), SIMDE_FLOAT32_C( 3.11), SIMDE_FLOAT32_C( 1.82), SIMDE_FLOAT32_C( 4.19), SIMDE_FLOAT32_C( 7.38), SIMDE_FLOAT32_C( 4.32), SIMDE_FLOAT32_C( 3.22), SIMDE_FLOAT32_C( 3.89), SIMDE_FLOAT32_C( 3.70), SIMDE_FLOAT32_C( 4.43), SIMDE_FLOAT32_C( 5.99), SIMDE_FLOAT32_C( 5.60), SIMDE_FLOAT32_C( 4.35), SIMDE_FLOAT32_C( 3.29), SIMDE_FLOAT32_C( 7.32), SIMDE_FLOAT32_C( 2.00)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.71), SIMDE_FLOAT32_C( 1.80), SIMDE_FLOAT32_C( 1.21), SIMDE_FLOAT32_C( 2.11), SIMDE_FLOAT32_C( 2.69), SIMDE_FLOAT32_C( 2.14), SIMDE_FLOAT32_C( 1.84), SIMDE_FLOAT32_C( 2.03), SIMDE_FLOAT32_C( 1.98), SIMDE_FLOAT32_C( 2.17), SIMDE_FLOAT32_C( 2.48), SIMDE_FLOAT32_C( 2.41), SIMDE_FLOAT32_C( 2.15), SIMDE_FLOAT32_C( 1.86), SIMDE_FLOAT32_C( 2.68), SIMDE_FLOAT32_C( 1.32)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_acosh_ps(test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_acosh_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 src; simde__mmask16 k; simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( 2.81), SIMDE_FLOAT32_C( 6.57), SIMDE_FLOAT32_C( 3.60), SIMDE_FLOAT32_C( 2.12), SIMDE_FLOAT32_C( 4.39), SIMDE_FLOAT32_C( 1.25), SIMDE_FLOAT32_C( 1.46), SIMDE_FLOAT32_C( 6.60), SIMDE_FLOAT32_C( 2.04), SIMDE_FLOAT32_C( 6.19), SIMDE_FLOAT32_C( 5.69), SIMDE_FLOAT32_C( 5.84), SIMDE_FLOAT32_C( 6.51), SIMDE_FLOAT32_C( 4.41), SIMDE_FLOAT32_C( 4.43), SIMDE_FLOAT32_C( 5.44)), UINT16_C(41466), simde_mm512_set_ps(SIMDE_FLOAT32_C( 4.89), SIMDE_FLOAT32_C( 5.07), SIMDE_FLOAT32_C( 5.16), SIMDE_FLOAT32_C( 1.08), SIMDE_FLOAT32_C( 2.83), SIMDE_FLOAT32_C( 3.03), SIMDE_FLOAT32_C( 3.29), SIMDE_FLOAT32_C( 2.92), SIMDE_FLOAT32_C( 2.06), SIMDE_FLOAT32_C( 4.58), SIMDE_FLOAT32_C( 7.02), SIMDE_FLOAT32_C( 3.41), SIMDE_FLOAT32_C( 5.94), SIMDE_FLOAT32_C( 3.32), SIMDE_FLOAT32_C( 3.69), SIMDE_FLOAT32_C( 1.81)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 2.27), SIMDE_FLOAT32_C( 6.57), SIMDE_FLOAT32_C( 2.32), SIMDE_FLOAT32_C( 2.12), SIMDE_FLOAT32_C( 4.39), SIMDE_FLOAT32_C( 1.25), SIMDE_FLOAT32_C( 1.46), SIMDE_FLOAT32_C( 1.73), SIMDE_FLOAT32_C( 1.35), SIMDE_FLOAT32_C( 2.20), SIMDE_FLOAT32_C( 2.64), SIMDE_FLOAT32_C( 1.90), SIMDE_FLOAT32_C( 2.47), SIMDE_FLOAT32_C( 4.41), SIMDE_FLOAT32_C( 1.98), SIMDE_FLOAT32_C( 5.44)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 5.85), SIMDE_FLOAT32_C( 3.81), SIMDE_FLOAT32_C( 7.30), SIMDE_FLOAT32_C( 6.91), SIMDE_FLOAT32_C( 3.63), SIMDE_FLOAT32_C( 5.41), SIMDE_FLOAT32_C( 1.83), SIMDE_FLOAT32_C( 2.47), SIMDE_FLOAT32_C( 3.02), SIMDE_FLOAT32_C( 6.46), SIMDE_FLOAT32_C( 6.06), SIMDE_FLOAT32_C( 6.88), SIMDE_FLOAT32_C( 1.76), SIMDE_FLOAT32_C( 2.37), SIMDE_FLOAT32_C( 1.76), SIMDE_FLOAT32_C( 4.39)), UINT16_C(36797), simde_mm512_set_ps(SIMDE_FLOAT32_C( 3.73), SIMDE_FLOAT32_C( 6.54), SIMDE_FLOAT32_C( 7.00), SIMDE_FLOAT32_C( 6.28), SIMDE_FLOAT32_C( 5.14), SIMDE_FLOAT32_C( 4.03), SIMDE_FLOAT32_C( 1.18), SIMDE_FLOAT32_C( 1.77), SIMDE_FLOAT32_C( 5.62), SIMDE_FLOAT32_C( 5.61), SIMDE_FLOAT32_C( 5.42), SIMDE_FLOAT32_C( 3.43), SIMDE_FLOAT32_C( 4.20), SIMDE_FLOAT32_C( 5.76), SIMDE_FLOAT32_C( 5.56), SIMDE_FLOAT32_C( 7.58)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.99), SIMDE_FLOAT32_C( 3.81), SIMDE_FLOAT32_C( 7.30), SIMDE_FLOAT32_C( 6.91), SIMDE_FLOAT32_C( 2.32), SIMDE_FLOAT32_C( 2.07), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 1.17), SIMDE_FLOAT32_C( 2.41), SIMDE_FLOAT32_C( 6.46), SIMDE_FLOAT32_C( 2.37), SIMDE_FLOAT32_C( 1.90), SIMDE_FLOAT32_C( 2.11), SIMDE_FLOAT32_C( 2.44), SIMDE_FLOAT32_C( 1.76), SIMDE_FLOAT32_C( 2.71)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 3.99), SIMDE_FLOAT32_C( 7.07), SIMDE_FLOAT32_C( 2.35), SIMDE_FLOAT32_C( 6.71), SIMDE_FLOAT32_C( 6.36), SIMDE_FLOAT32_C( 4.76), SIMDE_FLOAT32_C( 1.31), SIMDE_FLOAT32_C( 4.63), SIMDE_FLOAT32_C( 1.86), SIMDE_FLOAT32_C( 6.80), SIMDE_FLOAT32_C( 5.43), SIMDE_FLOAT32_C( 1.67), SIMDE_FLOAT32_C( 2.56), SIMDE_FLOAT32_C( 1.59), SIMDE_FLOAT32_C( 6.46), SIMDE_FLOAT32_C( 6.09)), UINT16_C(16804), simde_mm512_set_ps(SIMDE_FLOAT32_C( 4.39), SIMDE_FLOAT32_C( 6.58), SIMDE_FLOAT32_C( 4.23), SIMDE_FLOAT32_C( 2.82), SIMDE_FLOAT32_C( 5.97), SIMDE_FLOAT32_C( 7.04), SIMDE_FLOAT32_C( 7.53), SIMDE_FLOAT32_C( 5.39), SIMDE_FLOAT32_C( 5.83), SIMDE_FLOAT32_C( 3.71), SIMDE_FLOAT32_C( 5.37), SIMDE_FLOAT32_C( 1.41), SIMDE_FLOAT32_C( 3.22), SIMDE_FLOAT32_C( 3.67), SIMDE_FLOAT32_C( 6.15), SIMDE_FLOAT32_C( 4.07)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 3.99), SIMDE_FLOAT32_C( 2.57), SIMDE_FLOAT32_C( 2.35), SIMDE_FLOAT32_C( 6.71), SIMDE_FLOAT32_C( 6.36), SIMDE_FLOAT32_C( 4.76), SIMDE_FLOAT32_C( 1.31), SIMDE_FLOAT32_C( 2.37), SIMDE_FLOAT32_C( 2.45), SIMDE_FLOAT32_C( 6.80), SIMDE_FLOAT32_C( 2.37), SIMDE_FLOAT32_C( 1.67), SIMDE_FLOAT32_C( 2.56), SIMDE_FLOAT32_C( 1.97), SIMDE_FLOAT32_C( 6.46), SIMDE_FLOAT32_C( 6.09)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 3.15), SIMDE_FLOAT32_C( 2.85), SIMDE_FLOAT32_C( 1.82), SIMDE_FLOAT32_C( 7.38), SIMDE_FLOAT32_C( 3.22), SIMDE_FLOAT32_C( 3.70), SIMDE_FLOAT32_C( 5.99), SIMDE_FLOAT32_C( 4.35), SIMDE_FLOAT32_C( 7.32), SIMDE_FLOAT32_C( 1.76), SIMDE_FLOAT32_C( 2.41), SIMDE_FLOAT32_C( 3.19), SIMDE_FLOAT32_C( 5.27), SIMDE_FLOAT32_C( 2.40), SIMDE_FLOAT32_C( 6.64), SIMDE_FLOAT32_C( 1.80)), UINT16_C( 2107), simde_mm512_set_ps(SIMDE_FLOAT32_C( 7.26), SIMDE_FLOAT32_C( 3.65), SIMDE_FLOAT32_C( 3.11), SIMDE_FLOAT32_C( 4.19), SIMDE_FLOAT32_C( 4.32), SIMDE_FLOAT32_C( 3.89), SIMDE_FLOAT32_C( 4.43), SIMDE_FLOAT32_C( 5.60), SIMDE_FLOAT32_C( 3.29), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 7.01), SIMDE_FLOAT32_C( 1.01), SIMDE_FLOAT32_C( 7.35), SIMDE_FLOAT32_C( 1.77), SIMDE_FLOAT32_C( 4.08), SIMDE_FLOAT32_C( 7.53)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 3.15), SIMDE_FLOAT32_C( 2.85), SIMDE_FLOAT32_C( 1.82), SIMDE_FLOAT32_C( 7.38), SIMDE_FLOAT32_C( 2.14), SIMDE_FLOAT32_C( 3.70), SIMDE_FLOAT32_C( 5.99), SIMDE_FLOAT32_C( 4.35), SIMDE_FLOAT32_C( 7.32), SIMDE_FLOAT32_C( 1.76), SIMDE_FLOAT32_C( 2.64), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 2.68), SIMDE_FLOAT32_C( 2.40), SIMDE_FLOAT32_C( 2.08), SIMDE_FLOAT32_C( 2.71)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 4.25), SIMDE_FLOAT32_C( 1.87), SIMDE_FLOAT32_C( 3.26), SIMDE_FLOAT32_C( 4.89), SIMDE_FLOAT32_C( 5.44), SIMDE_FLOAT32_C( 7.23), SIMDE_FLOAT32_C( 7.32), SIMDE_FLOAT32_C( 4.74), SIMDE_FLOAT32_C( 5.90), SIMDE_FLOAT32_C( 2.33), SIMDE_FLOAT32_C( 1.69), SIMDE_FLOAT32_C( 1.77), SIMDE_FLOAT32_C( 5.03), SIMDE_FLOAT32_C( 1.70), SIMDE_FLOAT32_C( 1.74), SIMDE_FLOAT32_C( 5.75)), UINT16_C(22274), simde_mm512_set_ps(SIMDE_FLOAT32_C( 5.94), SIMDE_FLOAT32_C( 7.32), SIMDE_FLOAT32_C( 1.93), SIMDE_FLOAT32_C( 4.83), SIMDE_FLOAT32_C( 1.46), SIMDE_FLOAT32_C( 5.71), SIMDE_FLOAT32_C( 7.38), SIMDE_FLOAT32_C( 4.66), SIMDE_FLOAT32_C( 7.03), SIMDE_FLOAT32_C( 4.05), SIMDE_FLOAT32_C( 5.08), SIMDE_FLOAT32_C( 3.05), SIMDE_FLOAT32_C( 2.31), SIMDE_FLOAT32_C( 2.24), SIMDE_FLOAT32_C( 1.19), SIMDE_FLOAT32_C( 5.87)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 4.25), SIMDE_FLOAT32_C( 2.68), SIMDE_FLOAT32_C( 3.26), SIMDE_FLOAT32_C( 2.26), SIMDE_FLOAT32_C( 5.44), SIMDE_FLOAT32_C( 2.43), SIMDE_FLOAT32_C( 2.69), SIMDE_FLOAT32_C( 2.22), SIMDE_FLOAT32_C( 5.90), SIMDE_FLOAT32_C( 2.33), SIMDE_FLOAT32_C( 1.69), SIMDE_FLOAT32_C( 1.77), SIMDE_FLOAT32_C( 5.03), SIMDE_FLOAT32_C( 1.70), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 5.75)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 7.21), SIMDE_FLOAT32_C( 1.64), SIMDE_FLOAT32_C( 4.07), SIMDE_FLOAT32_C( 1.71), SIMDE_FLOAT32_C( 4.61), SIMDE_FLOAT32_C( 4.98), SIMDE_FLOAT32_C( 7.05), SIMDE_FLOAT32_C( 4.08), SIMDE_FLOAT32_C( 3.36), SIMDE_FLOAT32_C( 3.60), SIMDE_FLOAT32_C( 3.25), SIMDE_FLOAT32_C( 6.89), SIMDE_FLOAT32_C( 2.22), SIMDE_FLOAT32_C( 6.14), SIMDE_FLOAT32_C( 5.75), SIMDE_FLOAT32_C( 5.73)), UINT16_C(27396), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.12), SIMDE_FLOAT32_C( 2.96), SIMDE_FLOAT32_C( 1.85), SIMDE_FLOAT32_C( 1.78), SIMDE_FLOAT32_C( 6.91), SIMDE_FLOAT32_C( 4.32), SIMDE_FLOAT32_C( 1.60), SIMDE_FLOAT32_C( 4.83), SIMDE_FLOAT32_C( 6.21), SIMDE_FLOAT32_C( 4.26), SIMDE_FLOAT32_C( 3.28), SIMDE_FLOAT32_C( 1.93), SIMDE_FLOAT32_C( 5.40), SIMDE_FLOAT32_C( 5.21), SIMDE_FLOAT32_C( 1.27), SIMDE_FLOAT32_C( 2.68)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 7.21), SIMDE_FLOAT32_C( 1.75), SIMDE_FLOAT32_C( 1.23), SIMDE_FLOAT32_C( 1.71), SIMDE_FLOAT32_C( 2.62), SIMDE_FLOAT32_C( 4.98), SIMDE_FLOAT32_C( 1.05), SIMDE_FLOAT32_C( 2.26), SIMDE_FLOAT32_C( 3.36), SIMDE_FLOAT32_C( 3.60), SIMDE_FLOAT32_C( 3.25), SIMDE_FLOAT32_C( 6.89), SIMDE_FLOAT32_C( 2.22), SIMDE_FLOAT32_C( 2.33), SIMDE_FLOAT32_C( 5.75), SIMDE_FLOAT32_C( 5.73)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 6.05), SIMDE_FLOAT32_C( 4.22), SIMDE_FLOAT32_C( 1.11), SIMDE_FLOAT32_C( 6.41), SIMDE_FLOAT32_C( 5.79), SIMDE_FLOAT32_C( 1.75), SIMDE_FLOAT32_C( 4.65), SIMDE_FLOAT32_C( 6.25), SIMDE_FLOAT32_C( 4.40), SIMDE_FLOAT32_C( 6.40), SIMDE_FLOAT32_C( 4.02), SIMDE_FLOAT32_C( 4.56), SIMDE_FLOAT32_C( 1.96), SIMDE_FLOAT32_C( 6.31), SIMDE_FLOAT32_C( 5.60), SIMDE_FLOAT32_C( 1.37)), UINT16_C( 953), simde_mm512_set_ps(SIMDE_FLOAT32_C( 4.36), SIMDE_FLOAT32_C( 6.97), SIMDE_FLOAT32_C( 4.78), SIMDE_FLOAT32_C( 2.89), SIMDE_FLOAT32_C( 5.32), SIMDE_FLOAT32_C( 3.72), SIMDE_FLOAT32_C( 2.79), SIMDE_FLOAT32_C( 6.54), SIMDE_FLOAT32_C( 4.52), SIMDE_FLOAT32_C( 3.42), SIMDE_FLOAT32_C( 4.69), SIMDE_FLOAT32_C( 2.40), SIMDE_FLOAT32_C( 4.17), SIMDE_FLOAT32_C( 3.47), SIMDE_FLOAT32_C( 4.12), SIMDE_FLOAT32_C( 4.60)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 6.05), SIMDE_FLOAT32_C( 4.22), SIMDE_FLOAT32_C( 1.11), SIMDE_FLOAT32_C( 6.41), SIMDE_FLOAT32_C( 5.79), SIMDE_FLOAT32_C( 1.75), SIMDE_FLOAT32_C( 1.69), SIMDE_FLOAT32_C( 2.57), SIMDE_FLOAT32_C( 2.19), SIMDE_FLOAT32_C( 6.40), SIMDE_FLOAT32_C( 2.23), SIMDE_FLOAT32_C( 1.52), SIMDE_FLOAT32_C( 2.11), SIMDE_FLOAT32_C( 6.31), SIMDE_FLOAT32_C( 5.60), SIMDE_FLOAT32_C( 2.21)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.70), SIMDE_FLOAT32_C( 5.39), SIMDE_FLOAT32_C( 2.67), SIMDE_FLOAT32_C( 7.01), SIMDE_FLOAT32_C( 7.46), SIMDE_FLOAT32_C( 7.45), SIMDE_FLOAT32_C( 7.02), SIMDE_FLOAT32_C( 1.61), SIMDE_FLOAT32_C( 3.61), SIMDE_FLOAT32_C( 1.22), SIMDE_FLOAT32_C( 1.90), SIMDE_FLOAT32_C( 2.91), SIMDE_FLOAT32_C( 4.63), SIMDE_FLOAT32_C( 4.64), SIMDE_FLOAT32_C( 5.75), SIMDE_FLOAT32_C( 3.63)), UINT16_C(12713), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.52), SIMDE_FLOAT32_C( 4.25), SIMDE_FLOAT32_C( 7.02), SIMDE_FLOAT32_C( 6.92), SIMDE_FLOAT32_C( 1.87), SIMDE_FLOAT32_C( 3.28), SIMDE_FLOAT32_C( 6.71), SIMDE_FLOAT32_C( 3.14), SIMDE_FLOAT32_C( 4.50), SIMDE_FLOAT32_C( 4.66), SIMDE_FLOAT32_C( 6.66), SIMDE_FLOAT32_C( 3.47), SIMDE_FLOAT32_C( 7.42), SIMDE_FLOAT32_C( 5.49), SIMDE_FLOAT32_C( 4.26), SIMDE_FLOAT32_C( 7.11)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.70), SIMDE_FLOAT32_C( 5.39), SIMDE_FLOAT32_C( 2.64), SIMDE_FLOAT32_C( 2.62), SIMDE_FLOAT32_C( 7.46), SIMDE_FLOAT32_C( 7.45), SIMDE_FLOAT32_C( 7.02), SIMDE_FLOAT32_C( 1.81), SIMDE_FLOAT32_C( 2.18), SIMDE_FLOAT32_C( 1.22), SIMDE_FLOAT32_C( 2.58), SIMDE_FLOAT32_C( 2.91), SIMDE_FLOAT32_C( 2.69), SIMDE_FLOAT32_C( 4.64), SIMDE_FLOAT32_C( 5.75), SIMDE_FLOAT32_C( 2.65)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mask_acosh_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_acosh_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( 5.94), SIMDE_FLOAT64_C( 6.51), SIMDE_FLOAT64_C( 3.32), SIMDE_FLOAT64_C( 4.41), SIMDE_FLOAT64_C( 3.69), SIMDE_FLOAT64_C( 4.43), SIMDE_FLOAT64_C( 1.81), SIMDE_FLOAT64_C( 5.44)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.47), SIMDE_FLOAT64_C( 2.56), SIMDE_FLOAT64_C( 1.87), SIMDE_FLOAT64_C( 2.16), SIMDE_FLOAT64_C( 1.98), SIMDE_FLOAT64_C( 2.17), SIMDE_FLOAT64_C( 1.20), SIMDE_FLOAT64_C( 2.38)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.06), SIMDE_FLOAT64_C( 2.04), SIMDE_FLOAT64_C( 4.58), SIMDE_FLOAT64_C( 6.19), SIMDE_FLOAT64_C( 7.02), SIMDE_FLOAT64_C( 5.69), SIMDE_FLOAT64_C( 3.41), SIMDE_FLOAT64_C( 5.84)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.35), SIMDE_FLOAT64_C( 1.34), SIMDE_FLOAT64_C( 2.20), SIMDE_FLOAT64_C( 2.51), SIMDE_FLOAT64_C( 2.64), SIMDE_FLOAT64_C( 2.42), SIMDE_FLOAT64_C( 1.90), SIMDE_FLOAT64_C( 2.45)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.83), SIMDE_FLOAT64_C( 4.39), SIMDE_FLOAT64_C( 3.03), SIMDE_FLOAT64_C( 1.25), SIMDE_FLOAT64_C( 3.29), SIMDE_FLOAT64_C( 1.46), SIMDE_FLOAT64_C( 2.92), SIMDE_FLOAT64_C( 6.60)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.70), SIMDE_FLOAT64_C( 2.16), SIMDE_FLOAT64_C( 1.77), SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( 1.86), SIMDE_FLOAT64_C( 0.93), SIMDE_FLOAT64_C( 1.73), SIMDE_FLOAT64_C( 2.57)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 4.89), SIMDE_FLOAT64_C( 2.81), SIMDE_FLOAT64_C( 5.07), SIMDE_FLOAT64_C( 6.57), SIMDE_FLOAT64_C( 5.16), SIMDE_FLOAT64_C( 3.60), SIMDE_FLOAT64_C( 1.08), SIMDE_FLOAT64_C( 2.12)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.27), SIMDE_FLOAT64_C( 1.69), SIMDE_FLOAT64_C( 2.31), SIMDE_FLOAT64_C( 2.57), SIMDE_FLOAT64_C( 2.32), SIMDE_FLOAT64_C( 1.95), SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( 1.38)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.76), SIMDE_FLOAT64_C( 5.76), SIMDE_FLOAT64_C( 2.37), SIMDE_FLOAT64_C( 5.56), SIMDE_FLOAT64_C( 1.76), SIMDE_FLOAT64_C( 7.58), SIMDE_FLOAT64_C( 4.39), SIMDE_FLOAT64_C( 7.08)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.17), SIMDE_FLOAT64_C( 2.44), SIMDE_FLOAT64_C( 1.51), SIMDE_FLOAT64_C( 2.40), SIMDE_FLOAT64_C( 1.17), SIMDE_FLOAT64_C( 2.71), SIMDE_FLOAT64_C( 2.16), SIMDE_FLOAT64_C( 2.65)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.02), SIMDE_FLOAT64_C( 5.61), SIMDE_FLOAT64_C( 6.46), SIMDE_FLOAT64_C( 5.42), SIMDE_FLOAT64_C( 6.06), SIMDE_FLOAT64_C( 3.43), SIMDE_FLOAT64_C( 6.88), SIMDE_FLOAT64_C( 4.20)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.77), SIMDE_FLOAT64_C( 2.41), SIMDE_FLOAT64_C( 2.55), SIMDE_FLOAT64_C( 2.37), SIMDE_FLOAT64_C( 2.49), SIMDE_FLOAT64_C( 1.90), SIMDE_FLOAT64_C( 2.62), SIMDE_FLOAT64_C( 2.11)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.63), SIMDE_FLOAT64_C( 4.03), SIMDE_FLOAT64_C( 5.41), SIMDE_FLOAT64_C( 1.18), SIMDE_FLOAT64_C( 1.83), SIMDE_FLOAT64_C( 1.77), SIMDE_FLOAT64_C( 2.47), SIMDE_FLOAT64_C( 5.62)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.96), SIMDE_FLOAT64_C( 2.07), SIMDE_FLOAT64_C( 2.37), SIMDE_FLOAT64_C( 0.59), SIMDE_FLOAT64_C( 1.21), SIMDE_FLOAT64_C( 1.17), SIMDE_FLOAT64_C( 1.55), SIMDE_FLOAT64_C( 2.41)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 5.85), SIMDE_FLOAT64_C( 6.54), SIMDE_FLOAT64_C( 3.81), SIMDE_FLOAT64_C( 7.00), SIMDE_FLOAT64_C( 7.30), SIMDE_FLOAT64_C( 6.28), SIMDE_FLOAT64_C( 6.91), SIMDE_FLOAT64_C( 5.14)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.45), SIMDE_FLOAT64_C( 2.57), SIMDE_FLOAT64_C( 2.01), SIMDE_FLOAT64_C( 2.63), SIMDE_FLOAT64_C( 2.68), SIMDE_FLOAT64_C( 2.52), SIMDE_FLOAT64_C( 2.62), SIMDE_FLOAT64_C( 2.32)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_acosh_pd(test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_acosh_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d src; simde__mmask8 k; simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.04), SIMDE_FLOAT64_C( 6.19), SIMDE_FLOAT64_C( 5.69), SIMDE_FLOAT64_C( 5.84), SIMDE_FLOAT64_C( 6.51), SIMDE_FLOAT64_C( 4.41), SIMDE_FLOAT64_C( 4.43), SIMDE_FLOAT64_C( 5.44)), UINT8_C(139), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.06), SIMDE_FLOAT64_C( 4.58), SIMDE_FLOAT64_C( 7.02), SIMDE_FLOAT64_C( 3.41), SIMDE_FLOAT64_C( 5.94), SIMDE_FLOAT64_C( 3.32), SIMDE_FLOAT64_C( 3.69), SIMDE_FLOAT64_C( 1.81)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.35), SIMDE_FLOAT64_C( 6.19), SIMDE_FLOAT64_C( 5.69), SIMDE_FLOAT64_C( 5.84), SIMDE_FLOAT64_C( 2.47), SIMDE_FLOAT64_C( 4.41), SIMDE_FLOAT64_C( 1.98), SIMDE_FLOAT64_C( 1.20)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 4.89), SIMDE_FLOAT64_C( 5.07), SIMDE_FLOAT64_C( 5.16), SIMDE_FLOAT64_C( 1.08), SIMDE_FLOAT64_C( 2.83), SIMDE_FLOAT64_C( 3.03), SIMDE_FLOAT64_C( 3.29), SIMDE_FLOAT64_C( 2.92)), UINT8_C(229), simde_mm512_set_pd(SIMDE_FLOAT64_C( 7.08), SIMDE_FLOAT64_C( 2.81), SIMDE_FLOAT64_C( 6.57), SIMDE_FLOAT64_C( 3.60), SIMDE_FLOAT64_C( 2.12), SIMDE_FLOAT64_C( 4.39), SIMDE_FLOAT64_C( 1.25), SIMDE_FLOAT64_C( 1.46)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.65), SIMDE_FLOAT64_C( 1.69), SIMDE_FLOAT64_C( 2.57), SIMDE_FLOAT64_C( 1.08), SIMDE_FLOAT64_C( 2.83), SIMDE_FLOAT64_C( 2.16), SIMDE_FLOAT64_C( 3.29), SIMDE_FLOAT64_C( 0.93)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 5.62), SIMDE_FLOAT64_C( 5.61), SIMDE_FLOAT64_C( 5.42), SIMDE_FLOAT64_C( 3.43), SIMDE_FLOAT64_C( 4.20), SIMDE_FLOAT64_C( 5.76), SIMDE_FLOAT64_C( 5.56), SIMDE_FLOAT64_C( 7.58)), UINT8_C(253), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.47), SIMDE_FLOAT64_C( 3.02), SIMDE_FLOAT64_C( 6.46), SIMDE_FLOAT64_C( 6.06), SIMDE_FLOAT64_C( 6.88), SIMDE_FLOAT64_C( 1.76), SIMDE_FLOAT64_C( 2.37), SIMDE_FLOAT64_C( 1.76)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.55), SIMDE_FLOAT64_C( 1.77), SIMDE_FLOAT64_C( 2.55), SIMDE_FLOAT64_C( 2.49), SIMDE_FLOAT64_C( 2.62), SIMDE_FLOAT64_C( 1.17), SIMDE_FLOAT64_C( 5.56), SIMDE_FLOAT64_C( 1.17)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 4.70), SIMDE_FLOAT64_C( 5.85), SIMDE_FLOAT64_C( 3.81), SIMDE_FLOAT64_C( 7.30), SIMDE_FLOAT64_C( 6.91), SIMDE_FLOAT64_C( 3.63), SIMDE_FLOAT64_C( 5.41), SIMDE_FLOAT64_C( 1.83)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 6.09), SIMDE_FLOAT64_C( 3.73), SIMDE_FLOAT64_C( 6.54), SIMDE_FLOAT64_C( 7.00), SIMDE_FLOAT64_C( 6.28), SIMDE_FLOAT64_C( 5.14), SIMDE_FLOAT64_C( 4.03), SIMDE_FLOAT64_C( 1.18)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 4.70), SIMDE_FLOAT64_C( 1.99), SIMDE_FLOAT64_C( 3.81), SIMDE_FLOAT64_C( 2.63), SIMDE_FLOAT64_C( 2.52), SIMDE_FLOAT64_C( 2.32), SIMDE_FLOAT64_C( 5.41), SIMDE_FLOAT64_C( 0.59)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 4.63), SIMDE_FLOAT64_C( 1.86), SIMDE_FLOAT64_C( 6.80), SIMDE_FLOAT64_C( 5.43), SIMDE_FLOAT64_C( 1.67), SIMDE_FLOAT64_C( 2.56), SIMDE_FLOAT64_C( 1.59), SIMDE_FLOAT64_C( 6.46)), UINT8_C(145), simde_mm512_set_pd(SIMDE_FLOAT64_C( 5.39), SIMDE_FLOAT64_C( 5.83), SIMDE_FLOAT64_C( 3.71), SIMDE_FLOAT64_C( 5.37), SIMDE_FLOAT64_C( 1.41), SIMDE_FLOAT64_C( 3.22), SIMDE_FLOAT64_C( 3.67), SIMDE_FLOAT64_C( 6.15)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.37), SIMDE_FLOAT64_C( 1.86), SIMDE_FLOAT64_C( 6.80), SIMDE_FLOAT64_C( 2.37), SIMDE_FLOAT64_C( 1.67), SIMDE_FLOAT64_C( 2.56), SIMDE_FLOAT64_C( 1.59), SIMDE_FLOAT64_C( 2.50)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.80), SIMDE_FLOAT64_C( 4.39), SIMDE_FLOAT64_C( 6.58), SIMDE_FLOAT64_C( 4.23), SIMDE_FLOAT64_C( 2.82), SIMDE_FLOAT64_C( 5.97), SIMDE_FLOAT64_C( 7.04), SIMDE_FLOAT64_C( 7.53)), UINT8_C( 75), simde_mm512_set_pd(SIMDE_FLOAT64_C( 7.53), SIMDE_FLOAT64_C( 5.70), SIMDE_FLOAT64_C( 3.99), SIMDE_FLOAT64_C( 7.07), SIMDE_FLOAT64_C( 2.35), SIMDE_FLOAT64_C( 6.71), SIMDE_FLOAT64_C( 6.36), SIMDE_FLOAT64_C( 4.76)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.80), SIMDE_FLOAT64_C( 2.43), SIMDE_FLOAT64_C( 6.58), SIMDE_FLOAT64_C( 4.23), SIMDE_FLOAT64_C( 1.50), SIMDE_FLOAT64_C( 5.97), SIMDE_FLOAT64_C( 2.54), SIMDE_FLOAT64_C( 2.24)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 5.60), SIMDE_FLOAT64_C( 3.29), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 7.01), SIMDE_FLOAT64_C( 1.01), SIMDE_FLOAT64_C( 7.35), SIMDE_FLOAT64_C( 1.77), SIMDE_FLOAT64_C( 4.08)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 5.99), SIMDE_FLOAT64_C( 4.35), SIMDE_FLOAT64_C( 7.32), SIMDE_FLOAT64_C( 1.76), SIMDE_FLOAT64_C( 2.41), SIMDE_FLOAT64_C( 3.19), SIMDE_FLOAT64_C( 5.27), SIMDE_FLOAT64_C( 2.40)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 5.60), SIMDE_FLOAT64_C( 2.15), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 1.17), SIMDE_FLOAT64_C( 1.53), SIMDE_FLOAT64_C( 1.83), SIMDE_FLOAT64_C( 1.77), SIMDE_FLOAT64_C( 1.52)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 5.87), SIMDE_FLOAT64_C( 7.39), SIMDE_FLOAT64_C( 3.15), SIMDE_FLOAT64_C( 2.85), SIMDE_FLOAT64_C( 1.82), SIMDE_FLOAT64_C( 7.38), SIMDE_FLOAT64_C( 3.22), SIMDE_FLOAT64_C( 3.70)), UINT8_C(213), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.74), SIMDE_FLOAT64_C( 5.75), SIMDE_FLOAT64_C( 7.26), SIMDE_FLOAT64_C( 3.65), SIMDE_FLOAT64_C( 3.11), SIMDE_FLOAT64_C( 4.19), SIMDE_FLOAT64_C( 4.32), SIMDE_FLOAT64_C( 3.89)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.15), SIMDE_FLOAT64_C( 2.43), SIMDE_FLOAT64_C( 3.15), SIMDE_FLOAT64_C( 1.97), SIMDE_FLOAT64_C( 1.82), SIMDE_FLOAT64_C( 2.11), SIMDE_FLOAT64_C( 3.22), SIMDE_FLOAT64_C( 2.03)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mask_acosh_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_asin_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( 0.35)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( 0.36)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( 0.03)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( 0.03)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 0.47)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 0.49)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( -0.69), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.57)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.61)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 0.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.32), SIMDE_FLOAT32_C( -1.04), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( 0.78)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -0.92)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.46), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.39), SIMDE_FLOAT32_C( -1.17)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( -0.66)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -1.37), SIMDE_FLOAT32_C( -0.72)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.69)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( -0.47), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.76)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_asin_ps(test_vec[i].a); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_asin_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( 0.35)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.85), SIMDE_FLOAT64_C( 0.36)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( 0.04)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( 0.04)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( 0.03)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( 0.03)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 0.50), SIMDE_FLOAT64_C( 0.67)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.52), SIMDE_FLOAT64_C( 0.73)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 0.47)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 0.49)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 0.42)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.98), SIMDE_FLOAT64_C( 0.43)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.57)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.61)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -0.68), SIMDE_FLOAT64_C( -0.69)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( -0.76)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_asin_pd(test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_asin_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( 0.35)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( 0.36)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( -0.69), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 0.47)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 0.49)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 0.70)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.46), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.39), SIMDE_FLOAT32_C( -1.17), SIMDE_FLOAT32_C( -0.32), SIMDE_FLOAT32_C( -1.04), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( 0.78)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( -0.66)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( -0.47), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -1.37), SIMDE_FLOAT32_C( -0.72)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.84)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( -0.62), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( 1.43), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 1.00)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.39), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( -0.26), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( -0.03)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( -0.26), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( -0.03)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.20), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( 0.40)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.20), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( -1.22), SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( 0.41)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.25)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 1.14), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.25)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_asin_ps(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_asin_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( 0.35)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( -0.85), SIMDE_FLOAT64_C( 0.36)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.50), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( 0.03)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.52), SIMDE_FLOAT64_C( 0.73), SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( 0.03)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 0.47)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.98), SIMDE_FLOAT64_C( 0.43), SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 0.49)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.68), SIMDE_FLOAT64_C( -0.69), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.57)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( -0.76), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.61)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( -0.86), SIMDE_FLOAT64_C( -0.42), SIMDE_FLOAT64_C( 0.70)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.32), SIMDE_FLOAT64_C( -1.04), SIMDE_FLOAT64_C( -0.43), SIMDE_FLOAT64_C( 0.78)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.38), SIMDE_FLOAT64_C( -0.92)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.46), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.39), SIMDE_FLOAT64_C( -1.17)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.26), SIMDE_FLOAT64_C( -0.21), SIMDE_FLOAT64_C( -0.98), SIMDE_FLOAT64_C( -0.66)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.26), SIMDE_FLOAT64_C( -0.21), SIMDE_FLOAT64_C( -1.37), SIMDE_FLOAT64_C( -0.72)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 0.69)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( -0.47), SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 0.76)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_asin_pd(test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_asin_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( -0.69), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( 0.35)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( 0.36)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( -0.66), SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 0.70)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( -0.47), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -1.37), SIMDE_FLOAT32_C( -0.72), SIMDE_FLOAT32_C( -0.46), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.39), SIMDE_FLOAT32_C( -1.17), SIMDE_FLOAT32_C( -0.32), SIMDE_FLOAT32_C( -1.04), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( 0.78)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.39), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( -0.26), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.84)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( -0.26), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( -0.62), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( 1.43), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 1.00)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( -0.20), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( 0.40)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 1.14), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( -0.20), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( -1.22), SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( 0.41)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -0.87), SIMDE_FLOAT32_C( -0.80), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( -0.82), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( -0.17)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( -1.06), SIMDE_FLOAT32_C( -0.93), SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( -0.56), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( -0.96), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( -0.17)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( -0.59), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( -0.91), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( -0.74)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( -0.63), SIMDE_FLOAT32_C( -0.47), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 1.37), SIMDE_FLOAT32_C( -1.14), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( -0.83)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.10)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( -0.61), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( 1.17), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( -0.62), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 1.37), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.10)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( -0.70)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.46), SIMDE_FLOAT32_C( -0.37), SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( 1.19), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( 1.17), SIMDE_FLOAT32_C( -0.78)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_asin_ps(test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_asin_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 src; simde__mmask16 k; simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.66), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( -0.69), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.35)), UINT16_C(41466), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( -0.75)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.66), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( 0.35)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( -0.20), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( -0.39), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.03)), UINT16_C(36797), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.17), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -0.26), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.99)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.17), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( -1.22), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( -0.26), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 1.43)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( -0.59), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( -0.91), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -0.80), SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( -0.82), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.54)), UINT16_C(16804), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( -0.87), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( -0.07)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( -0.59), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( -0.91), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( -0.80), SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.54)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( -0.76)), UINT16_C( 2107), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( -0.20), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( -0.70), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( 0.98)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.17), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( 1.37)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( 0.44)), UINT16_C(22274), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( -0.72), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( 0.24), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( -0.62), SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( 0.48)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( 1.17), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 1.19), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( -1.22), SIMDE_FLOAT32_C( 0.44)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( -0.81), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( -0.29), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.32), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( -0.63), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.43)), UINT16_C(27396), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.96), SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.82), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( -0.72), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -0.49)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( -0.83), SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( -0.96), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( -0.29), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.32), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( -0.63), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.43)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( -0.97), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( -0.71), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( -0.89)), UINT16_C( 953), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( -0.46), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( -0.04), SIMDE_FLOAT32_C( -0.25), SIMDE_FLOAT32_C( -0.05), SIMDE_FLOAT32_C( 0.09)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( -0.97), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -0.48), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( -0.62), SIMDE_FLOAT32_C( -0.04), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 0.09)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( -0.49), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( -0.82), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.93), SIMDE_FLOAT32_C( -0.73), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( -0.20)), UINT16_C(12713), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( -0.25), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 0.85)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( -0.93), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 1.22), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 1.02)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mask_asin_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_asin_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.50), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( 0.35)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.52), SIMDE_FLOAT64_C( 0.73), SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( -0.85), SIMDE_FLOAT64_C( 0.36)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.68), SIMDE_FLOAT64_C( -0.69), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 0.47)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( -0.76), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.61), SIMDE_FLOAT64_C( 0.98), SIMDE_FLOAT64_C( 0.43), SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 0.49)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.38), SIMDE_FLOAT64_C( -0.92), SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( -0.86), SIMDE_FLOAT64_C( -0.42), SIMDE_FLOAT64_C( 0.70)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.46), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.39), SIMDE_FLOAT64_C( -1.17), SIMDE_FLOAT64_C( -0.32), SIMDE_FLOAT64_C( -1.04), SIMDE_FLOAT64_C( -0.43), SIMDE_FLOAT64_C( 0.78)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( 0.26), SIMDE_FLOAT64_C( -0.21), SIMDE_FLOAT64_C( -0.98), SIMDE_FLOAT64_C( -0.66)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( -0.47), SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 0.26), SIMDE_FLOAT64_C( -0.21), SIMDE_FLOAT64_C( -1.37), SIMDE_FLOAT64_C( -0.72)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( -0.58), SIMDE_FLOAT64_C( 0.38), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.84)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.88), SIMDE_FLOAT64_C( 0.46), SIMDE_FLOAT64_C( -0.62), SIMDE_FLOAT64_C( 0.39), SIMDE_FLOAT64_C( -0.88), SIMDE_FLOAT64_C( 1.43), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 1.00)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.39), SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( 0.66), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( 0.53), SIMDE_FLOAT64_C( -0.26), SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( -0.03)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.40), SIMDE_FLOAT64_C( 0.41), SIMDE_FLOAT64_C( 0.72), SIMDE_FLOAT64_C( 0.35), SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( -0.26), SIMDE_FLOAT64_C( 0.89), SIMDE_FLOAT64_C( -0.03)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.20), SIMDE_FLOAT64_C( -0.08), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( -0.94), SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( -0.55), SIMDE_FLOAT64_C( 0.40)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.20), SIMDE_FLOAT64_C( -0.08), SIMDE_FLOAT64_C( 0.35), SIMDE_FLOAT64_C( -1.22), SIMDE_FLOAT64_C( -0.85), SIMDE_FLOAT64_C( -0.88), SIMDE_FLOAT64_C( -0.58), SIMDE_FLOAT64_C( 0.41)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( 0.82), SIMDE_FLOAT64_C( 0.91), SIMDE_FLOAT64_C( 0.60), SIMDE_FLOAT64_C( 0.79), SIMDE_FLOAT64_C( 0.25)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.49), SIMDE_FLOAT64_C( 0.75), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( 0.96), SIMDE_FLOAT64_C( 1.14), SIMDE_FLOAT64_C( 0.64), SIMDE_FLOAT64_C( 0.91), SIMDE_FLOAT64_C( 0.25)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_asin_pd(test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_asin_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d src; simde__mmask8 k; simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.69), SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.35)), UINT8_C(139), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.68), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 0.50), SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( -0.75)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( 0.52), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( -0.85)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 0.26), SIMDE_FLOAT64_C( -0.98), SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( -0.38), SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( -0.42)), UINT8_C(229), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.84), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( -0.21), SIMDE_FLOAT64_C( -0.66), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.92), SIMDE_FLOAT64_C( -0.86)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -0.47), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( -0.98), SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( -1.04)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( -0.26), SIMDE_FLOAT64_C( -0.03), SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( 0.38), SIMDE_FLOAT64_C( 0.99)), UINT8_C(253), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.55), SIMDE_FLOAT64_C( -0.39), SIMDE_FLOAT64_C( 0.66), SIMDE_FLOAT64_C( 0.53), SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( -0.58), SIMDE_FLOAT64_C( -0.77)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.58), SIMDE_FLOAT64_C( -0.40), SIMDE_FLOAT64_C( 0.72), SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( 0.89), SIMDE_FLOAT64_C( -0.88), SIMDE_FLOAT64_C( 0.38), SIMDE_FLOAT64_C( -0.88)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.12), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( 0.91), SIMDE_FLOAT64_C( 0.79), SIMDE_FLOAT64_C( -0.20), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( -0.75)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.54), SIMDE_FLOAT64_C( -0.17), SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( 0.82), SIMDE_FLOAT64_C( 0.60), SIMDE_FLOAT64_C( 0.25), SIMDE_FLOAT64_C( -0.08), SIMDE_FLOAT64_C( -0.94)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.12), SIMDE_FLOAT64_C( -0.17), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( 0.96), SIMDE_FLOAT64_C( 0.64), SIMDE_FLOAT64_C( 0.25), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( -1.22)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( -0.74), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( -0.80), SIMDE_FLOAT64_C( -0.53), SIMDE_FLOAT64_C( -0.82), SIMDE_FLOAT64_C( 0.66)), UINT8_C(145), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.33), SIMDE_FLOAT64_C( 0.46), SIMDE_FLOAT64_C( -0.18), SIMDE_FLOAT64_C( 0.32), SIMDE_FLOAT64_C( -0.87), SIMDE_FLOAT64_C( -0.33), SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( 0.56)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( -0.74), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 0.33), SIMDE_FLOAT64_C( -0.80), SIMDE_FLOAT64_C( -0.53), SIMDE_FLOAT64_C( -0.82), SIMDE_FLOAT64_C( 0.59)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.76), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( -0.02), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( 0.51), SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 0.98)), UINT8_C( 75), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.98), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( -0.10), SIMDE_FLOAT64_C( 0.84), SIMDE_FLOAT64_C( -0.59), SIMDE_FLOAT64_C( 0.73), SIMDE_FLOAT64_C( 0.62), SIMDE_FLOAT64_C( 0.14)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.76), SIMDE_FLOAT64_C( 0.43), SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( -0.02), SIMDE_FLOAT64_C( -0.63), SIMDE_FLOAT64_C( 0.51), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 0.14)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.39), SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( -0.70), SIMDE_FLOAT64_C( 0.82), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( -0.07)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.51), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( -0.57), SIMDE_FLOAT64_C( -0.34), SIMDE_FLOAT64_C( 0.29), SIMDE_FLOAT64_C( -0.58)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.39), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( -0.70), SIMDE_FLOAT64_C( -0.88), SIMDE_FLOAT64_C( -0.61), SIMDE_FLOAT64_C( -0.35), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( -0.62)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.48), SIMDE_FLOAT64_C( 0.94), SIMDE_FLOAT64_C( -0.35), SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( 0.93), SIMDE_FLOAT64_C( -0.33), SIMDE_FLOAT64_C( -0.18)), UINT8_C(213), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.78), SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( 0.90), SIMDE_FLOAT64_C( -0.20), SIMDE_FLOAT64_C( -0.36), SIMDE_FLOAT64_C( -0.03), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( -0.13)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.89), SIMDE_FLOAT64_C( 0.46), SIMDE_FLOAT64_C( -0.35), SIMDE_FLOAT64_C( -0.20), SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( -0.03), SIMDE_FLOAT64_C( -0.33), SIMDE_FLOAT64_C( -0.13)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mask_asin_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_asinh_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( -754.38), SIMDE_FLOAT32_C( 346.63)), simde_mm_set_ps(SIMDE_FLOAT32_C( -5.92), SIMDE_FLOAT32_C( 4.36), SIMDE_FLOAT32_C( -7.32), SIMDE_FLOAT32_C( 6.54)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( 34.06)), simde_mm_set_ps(SIMDE_FLOAT32_C( 6.90), SIMDE_FLOAT32_C( 7.20), SIMDE_FLOAT32_C( -6.39), SIMDE_FLOAT32_C( 4.22)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 467.76)), simde_mm_set_ps(SIMDE_FLOAT32_C( 7.41), SIMDE_FLOAT32_C( 6.74), SIMDE_FLOAT32_C( -6.29), SIMDE_FLOAT32_C( 6.84)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 571.46)), simde_mm_set_ps(SIMDE_FLOAT32_C( -7.21), SIMDE_FLOAT32_C( -7.22), SIMDE_FLOAT32_C( 5.13), SIMDE_FLOAT32_C( 7.04)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( 696.87)), simde_mm_set_ps(SIMDE_FLOAT32_C( -6.41), SIMDE_FLOAT32_C( -7.45), SIMDE_FLOAT32_C( -6.73), SIMDE_FLOAT32_C( 7.24)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -923.64)), simde_mm_set_ps(SIMDE_FLOAT32_C( -6.79), SIMDE_FLOAT32_C( 4.04), SIMDE_FLOAT32_C( -6.64), SIMDE_FLOAT32_C( -7.52)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -660.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( 6.26), SIMDE_FLOAT32_C( -6.05), SIMDE_FLOAT32_C( -7.58), SIMDE_FLOAT32_C( -7.19)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 687.09)), simde_mm_set_ps(SIMDE_FLOAT32_C( 5.88), SIMDE_FLOAT32_C( -6.80), SIMDE_FLOAT32_C( 6.15), SIMDE_FLOAT32_C( 7.23)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_asinh_ps(test_vec[i].a); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_asinh_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -754.38), SIMDE_FLOAT64_C( 346.63)), simde_mm_set_pd(SIMDE_FLOAT64_C( -7.32), SIMDE_FLOAT64_C( 6.54)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( 39.01)), simde_mm_set_pd(SIMDE_FLOAT64_C( -5.92), SIMDE_FLOAT64_C( 4.36)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( 34.06)), simde_mm_set_pd(SIMDE_FLOAT64_C( -6.39), SIMDE_FLOAT64_C( 4.22)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( 670.24)), simde_mm_set_pd(SIMDE_FLOAT64_C( 6.90), SIMDE_FLOAT64_C( 7.20)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 467.76)), simde_mm_set_pd(SIMDE_FLOAT64_C( -6.29), SIMDE_FLOAT64_C( 6.84)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( 422.21)), simde_mm_set_pd(SIMDE_FLOAT64_C( 7.41), SIMDE_FLOAT64_C( 6.74)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 571.46)), simde_mm_set_pd(SIMDE_FLOAT64_C( 5.13), SIMDE_FLOAT64_C( 7.04)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( -686.13)), simde_mm_set_pd(SIMDE_FLOAT64_C( -7.21), SIMDE_FLOAT64_C( -7.22)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_asinh_pd(test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_asinh_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( -754.38), SIMDE_FLOAT32_C( 346.63)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 6.90), SIMDE_FLOAT32_C( 7.20), SIMDE_FLOAT32_C( -6.39), SIMDE_FLOAT32_C( 4.22), SIMDE_FLOAT32_C( -5.92), SIMDE_FLOAT32_C( 4.36), SIMDE_FLOAT32_C( -7.32), SIMDE_FLOAT32_C( 6.54)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 467.76)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -7.21), SIMDE_FLOAT32_C( -7.22), SIMDE_FLOAT32_C( 5.13), SIMDE_FLOAT32_C( 7.04), SIMDE_FLOAT32_C( 7.41), SIMDE_FLOAT32_C( 6.74), SIMDE_FLOAT32_C( -6.29), SIMDE_FLOAT32_C( 6.84)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( 696.87)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -6.79), SIMDE_FLOAT32_C( 4.04), SIMDE_FLOAT32_C( -6.64), SIMDE_FLOAT32_C( -7.52), SIMDE_FLOAT32_C( -6.41), SIMDE_FLOAT32_C( -7.45), SIMDE_FLOAT32_C( -6.73), SIMDE_FLOAT32_C( 7.24)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -660.80)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 5.88), SIMDE_FLOAT32_C( -6.80), SIMDE_FLOAT32_C( 6.15), SIMDE_FLOAT32_C( 7.23), SIMDE_FLOAT32_C( 6.26), SIMDE_FLOAT32_C( -6.05), SIMDE_FLOAT32_C( -7.58), SIMDE_FLOAT32_C( -7.19)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -770.35), SIMDE_FLOAT32_C( 443.48), SIMDE_FLOAT32_C( -583.60), SIMDE_FLOAT32_C( 380.46), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 993.90), SIMDE_FLOAT32_C( 28.08), SIMDE_FLOAT32_C( 841.21)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -7.34), SIMDE_FLOAT32_C( 6.79), SIMDE_FLOAT32_C( -7.06), SIMDE_FLOAT32_C( 6.63), SIMDE_FLOAT32_C( -7.34), SIMDE_FLOAT32_C( 7.59), SIMDE_FLOAT32_C( 4.03), SIMDE_FLOAT32_C( 7.43)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( 395.92), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 339.21), SIMDE_FLOAT32_C( 532.35), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( 780.64), SIMDE_FLOAT32_C( -30.79)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -6.65), SIMDE_FLOAT32_C( 6.67), SIMDE_FLOAT32_C( 7.18), SIMDE_FLOAT32_C( 6.52), SIMDE_FLOAT32_C( 6.97), SIMDE_FLOAT32_C( -6.27), SIMDE_FLOAT32_C( 7.35), SIMDE_FLOAT32_C( -4.12)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -203.65), SIMDE_FLOAT32_C( -80.73), SIMDE_FLOAT32_C( 336.73), SIMDE_FLOAT32_C( -944.78), SIMDE_FLOAT32_C( -747.59), SIMDE_FLOAT32_C( -767.23), SIMDE_FLOAT32_C( -554.19), SIMDE_FLOAT32_C( 398.82)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -6.01), SIMDE_FLOAT32_C( -5.08), SIMDE_FLOAT32_C( 6.51), SIMDE_FLOAT32_C( -7.54), SIMDE_FLOAT32_C( -7.31), SIMDE_FLOAT32_C( -7.34), SIMDE_FLOAT32_C( -7.01), SIMDE_FLOAT32_C( 6.68)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 469.66), SIMDE_FLOAT32_C( 680.02), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 818.66), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 600.47), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( 254.31)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 6.85), SIMDE_FLOAT32_C( 7.22), SIMDE_FLOAT32_C( -5.70), SIMDE_FLOAT32_C( 7.40), SIMDE_FLOAT32_C( 7.51), SIMDE_FLOAT32_C( 7.09), SIMDE_FLOAT32_C( 7.37), SIMDE_FLOAT32_C( 6.23)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_asinh_ps(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_asinh_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( 39.01), SIMDE_FLOAT64_C( -754.38), SIMDE_FLOAT64_C( 346.63)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -5.92), SIMDE_FLOAT64_C( 4.36), SIMDE_FLOAT64_C( -7.32), SIMDE_FLOAT64_C( 6.54)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( 670.24), SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( 34.06)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 6.90), SIMDE_FLOAT64_C( 7.20), SIMDE_FLOAT64_C( -6.39), SIMDE_FLOAT64_C( 4.22)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 467.76)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 7.41), SIMDE_FLOAT64_C( 6.74), SIMDE_FLOAT64_C( -6.29), SIMDE_FLOAT64_C( 6.84)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( -686.13), SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 571.46)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -7.21), SIMDE_FLOAT64_C( -7.22), SIMDE_FLOAT64_C( 5.13), SIMDE_FLOAT64_C( 7.04)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -860.95), SIMDE_FLOAT64_C( -417.54), SIMDE_FLOAT64_C( 696.87)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -6.41), SIMDE_FLOAT64_C( -7.45), SIMDE_FLOAT64_C( -6.73), SIMDE_FLOAT64_C( 7.24)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( 28.47), SIMDE_FLOAT64_C( -384.03), SIMDE_FLOAT64_C( -923.64)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -6.79), SIMDE_FLOAT64_C( 4.04), SIMDE_FLOAT64_C( -6.64), SIMDE_FLOAT64_C( -7.52)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 261.31), SIMDE_FLOAT64_C( -212.54), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -660.80)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 6.26), SIMDE_FLOAT64_C( -6.05), SIMDE_FLOAT64_C( -7.58), SIMDE_FLOAT64_C( -7.19)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 178.20), SIMDE_FLOAT64_C( -450.67), SIMDE_FLOAT64_C( 233.37), SIMDE_FLOAT64_C( 687.09)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 5.88), SIMDE_FLOAT64_C( -6.80), SIMDE_FLOAT64_C( 6.15), SIMDE_FLOAT64_C( 7.23)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_asinh_pd(test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_asinh_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 467.76), SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( -754.38), SIMDE_FLOAT32_C( 346.63)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -7.21), SIMDE_FLOAT32_C( -7.22), SIMDE_FLOAT32_C( 5.13), SIMDE_FLOAT32_C( 7.04), SIMDE_FLOAT32_C( 7.41), SIMDE_FLOAT32_C( 6.74), SIMDE_FLOAT32_C( -6.29), SIMDE_FLOAT32_C( 6.84), SIMDE_FLOAT32_C( 6.90), SIMDE_FLOAT32_C( 7.20), SIMDE_FLOAT32_C( -6.39), SIMDE_FLOAT32_C( 4.22), SIMDE_FLOAT32_C( -5.92), SIMDE_FLOAT32_C( 4.36), SIMDE_FLOAT32_C( -7.32), SIMDE_FLOAT32_C( 6.54)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -660.80), SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( 696.87)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 5.88), SIMDE_FLOAT32_C( -6.80), SIMDE_FLOAT32_C( 6.15), SIMDE_FLOAT32_C( 7.23), SIMDE_FLOAT32_C( 6.26), SIMDE_FLOAT32_C( -6.05), SIMDE_FLOAT32_C( -7.58), SIMDE_FLOAT32_C( -7.19), SIMDE_FLOAT32_C( -6.79), SIMDE_FLOAT32_C( 4.04), SIMDE_FLOAT32_C( -6.64), SIMDE_FLOAT32_C( -7.52), SIMDE_FLOAT32_C( -6.41), SIMDE_FLOAT32_C( -7.45), SIMDE_FLOAT32_C( -6.73), SIMDE_FLOAT32_C( 7.24)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( 395.92), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 339.21), SIMDE_FLOAT32_C( 532.35), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( 780.64), SIMDE_FLOAT32_C( -30.79), SIMDE_FLOAT32_C( -770.35), SIMDE_FLOAT32_C( 443.48), SIMDE_FLOAT32_C( -583.60), SIMDE_FLOAT32_C( 380.46), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 993.90), SIMDE_FLOAT32_C( 28.08), SIMDE_FLOAT32_C( 841.21)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -6.65), SIMDE_FLOAT32_C( 6.67), SIMDE_FLOAT32_C( 7.18), SIMDE_FLOAT32_C( 6.52), SIMDE_FLOAT32_C( 6.97), SIMDE_FLOAT32_C( -6.27), SIMDE_FLOAT32_C( 7.35), SIMDE_FLOAT32_C( -4.12), SIMDE_FLOAT32_C( -7.34), SIMDE_FLOAT32_C( 6.79), SIMDE_FLOAT32_C( -7.06), SIMDE_FLOAT32_C( 6.63), SIMDE_FLOAT32_C( -7.34), SIMDE_FLOAT32_C( 7.59), SIMDE_FLOAT32_C( 4.03), SIMDE_FLOAT32_C( 7.43)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 469.66), SIMDE_FLOAT32_C( 680.02), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 818.66), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 600.47), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( 254.31), SIMDE_FLOAT32_C( -203.65), SIMDE_FLOAT32_C( -80.73), SIMDE_FLOAT32_C( 336.73), SIMDE_FLOAT32_C( -944.78), SIMDE_FLOAT32_C( -747.59), SIMDE_FLOAT32_C( -767.23), SIMDE_FLOAT32_C( -554.19), SIMDE_FLOAT32_C( 398.82)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 6.85), SIMDE_FLOAT32_C( 7.22), SIMDE_FLOAT32_C( -5.70), SIMDE_FLOAT32_C( 7.40), SIMDE_FLOAT32_C( 7.51), SIMDE_FLOAT32_C( 7.09), SIMDE_FLOAT32_C( 7.37), SIMDE_FLOAT32_C( 6.23), SIMDE_FLOAT32_C( -6.01), SIMDE_FLOAT32_C( -5.08), SIMDE_FLOAT32_C( 6.51), SIMDE_FLOAT32_C( -7.54), SIMDE_FLOAT32_C( -7.31), SIMDE_FLOAT32_C( -7.34), SIMDE_FLOAT32_C( -7.01), SIMDE_FLOAT32_C( 6.68)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -178.99), SIMDE_FLOAT32_C( 758.79), SIMDE_FLOAT32_C( 324.62), SIMDE_FLOAT32_C( 343.48), SIMDE_FLOAT32_C( -874.31), SIMDE_FLOAT32_C( -797.92), SIMDE_FLOAT32_C( -328.54), SIMDE_FLOAT32_C( -525.83), SIMDE_FLOAT32_C( -192.31), SIMDE_FLOAT32_C( -822.65), SIMDE_FLOAT32_C( 561.36), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( -70.91), SIMDE_FLOAT32_C( 543.35), SIMDE_FLOAT32_C( 120.65), SIMDE_FLOAT32_C( -171.51)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -5.88), SIMDE_FLOAT32_C( 7.32), SIMDE_FLOAT32_C( 6.48), SIMDE_FLOAT32_C( 6.53), SIMDE_FLOAT32_C( -7.47), SIMDE_FLOAT32_C( -7.38), SIMDE_FLOAT32_C( -6.49), SIMDE_FLOAT32_C( -6.96), SIMDE_FLOAT32_C( -5.95), SIMDE_FLOAT32_C( -7.41), SIMDE_FLOAT32_C( 7.02), SIMDE_FLOAT32_C( 7.18), SIMDE_FLOAT32_C( -4.95), SIMDE_FLOAT32_C( 6.99), SIMDE_FLOAT32_C( 5.49), SIMDE_FLOAT32_C( -5.84)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 690.12), SIMDE_FLOAT32_C( 840.65), SIMDE_FLOAT32_C( -21.09), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( -448.89), SIMDE_FLOAT32_C( 731.49), SIMDE_FLOAT32_C( 505.79), SIMDE_FLOAT32_C( 623.70), SIMDE_FLOAT32_C( 831.02), SIMDE_FLOAT32_C( 140.67), SIMDE_FLOAT32_C( 977.36), SIMDE_FLOAT32_C( -906.16), SIMDE_FLOAT32_C( 331.34), SIMDE_FLOAT32_C( 99.93), SIMDE_FLOAT32_C( 462.95), SIMDE_FLOAT32_C( -738.19)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 7.23), SIMDE_FLOAT32_C( 7.43), SIMDE_FLOAT32_C( -3.74), SIMDE_FLOAT32_C( -7.08), SIMDE_FLOAT32_C( -6.80), SIMDE_FLOAT32_C( 7.29), SIMDE_FLOAT32_C( 6.92), SIMDE_FLOAT32_C( 7.13), SIMDE_FLOAT32_C( 7.42), SIMDE_FLOAT32_C( 5.64), SIMDE_FLOAT32_C( 7.58), SIMDE_FLOAT32_C( -7.50), SIMDE_FLOAT32_C( 6.50), SIMDE_FLOAT32_C( 5.30), SIMDE_FLOAT32_C( 6.83), SIMDE_FLOAT32_C( -7.30)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -769.09), SIMDE_FLOAT32_C( 822.06), SIMDE_FLOAT32_C( -573.81), SIMDE_FLOAT32_C( -997.63), SIMDE_FLOAT32_C( -337.60), SIMDE_FLOAT32_C( 923.64), SIMDE_FLOAT32_C( 293.64), SIMDE_FLOAT32_C( -768.12), SIMDE_FLOAT32_C( -576.22), SIMDE_FLOAT32_C( -67.64), SIMDE_FLOAT32_C( 710.38), SIMDE_FLOAT32_C( 977.49), SIMDE_FLOAT32_C( -756.42), SIMDE_FLOAT32_C( 424.81), SIMDE_FLOAT32_C( 27.25), SIMDE_FLOAT32_C( -95.15)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -7.34), SIMDE_FLOAT32_C( 7.40), SIMDE_FLOAT32_C( -7.05), SIMDE_FLOAT32_C( -7.60), SIMDE_FLOAT32_C( -6.52), SIMDE_FLOAT32_C( 7.52), SIMDE_FLOAT32_C( 6.38), SIMDE_FLOAT32_C( -7.34), SIMDE_FLOAT32_C( -7.05), SIMDE_FLOAT32_C( -4.91), SIMDE_FLOAT32_C( 7.26), SIMDE_FLOAT32_C( 7.58), SIMDE_FLOAT32_C( -7.32), SIMDE_FLOAT32_C( 6.74), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( -5.25)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -438.19), SIMDE_FLOAT32_C( -359.76), SIMDE_FLOAT32_C( -752.43), SIMDE_FLOAT32_C( -33.67), SIMDE_FLOAT32_C( 932.66), SIMDE_FLOAT32_C( 7.27), SIMDE_FLOAT32_C( -327.22), SIMDE_FLOAT32_C( -125.20), SIMDE_FLOAT32_C( -182.45), SIMDE_FLOAT32_C( 39.93), SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( 394.67), SIMDE_FLOAT32_C( 14.34), SIMDE_FLOAT32_C( -304.73), SIMDE_FLOAT32_C( 916.26), SIMDE_FLOAT32_C( -696.69)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -6.78), SIMDE_FLOAT32_C( -6.58), SIMDE_FLOAT32_C( -7.32), SIMDE_FLOAT32_C( -4.21), SIMDE_FLOAT32_C( 7.53), SIMDE_FLOAT32_C( 2.68), SIMDE_FLOAT32_C( -6.48), SIMDE_FLOAT32_C( -5.52), SIMDE_FLOAT32_C( -5.90), SIMDE_FLOAT32_C( 4.38), SIMDE_FLOAT32_C( 6.93), SIMDE_FLOAT32_C( 6.67), SIMDE_FLOAT32_C( 3.36), SIMDE_FLOAT32_C( -6.41), SIMDE_FLOAT32_C( 7.51), SIMDE_FLOAT32_C( -7.24)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_asinh_ps(test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_asinh_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 src; simde__mmask16 k; simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -660.80), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( 696.87), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( 467.76), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( 346.63)), UINT16_C(41466), simde_mm512_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( -754.38)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 5.88), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( 6.26), SIMDE_FLOAT32_C( -660.80), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -6.73), SIMDE_FLOAT32_C( -7.21), SIMDE_FLOAT32_C( 5.13), SIMDE_FLOAT32_C( 7.41), SIMDE_FLOAT32_C( -6.29), SIMDE_FLOAT32_C( 6.90), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( -5.92), SIMDE_FLOAT32_C( 346.63)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 469.66), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( -203.65), SIMDE_FLOAT32_C( 336.73), SIMDE_FLOAT32_C( -747.59), SIMDE_FLOAT32_C( -554.19), SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 532.35), SIMDE_FLOAT32_C( 780.64), SIMDE_FLOAT32_C( -770.35), SIMDE_FLOAT32_C( -583.60), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 28.08)), UINT16_C(36797), simde_mm512_set_ps(SIMDE_FLOAT32_C( -171.51), SIMDE_FLOAT32_C( 680.02), SIMDE_FLOAT32_C( 818.66), SIMDE_FLOAT32_C( 600.47), SIMDE_FLOAT32_C( 254.31), SIMDE_FLOAT32_C( -80.73), SIMDE_FLOAT32_C( -944.78), SIMDE_FLOAT32_C( -767.23), SIMDE_FLOAT32_C( 398.82), SIMDE_FLOAT32_C( 395.92), SIMDE_FLOAT32_C( 339.21), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( -30.79), SIMDE_FLOAT32_C( 443.48), SIMDE_FLOAT32_C( 380.46), SIMDE_FLOAT32_C( 993.90)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -5.84), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( 6.23), SIMDE_FLOAT32_C( -5.08), SIMDE_FLOAT32_C( -7.54), SIMDE_FLOAT32_C( -7.34), SIMDE_FLOAT32_C( 6.68), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 6.52), SIMDE_FLOAT32_C( -6.27), SIMDE_FLOAT32_C( -4.12), SIMDE_FLOAT32_C( 6.79), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 7.59)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -95.15), SIMDE_FLOAT32_C( 840.65), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( 731.49), SIMDE_FLOAT32_C( 623.70), SIMDE_FLOAT32_C( 140.67), SIMDE_FLOAT32_C( -906.16), SIMDE_FLOAT32_C( 99.93), SIMDE_FLOAT32_C( -738.19), SIMDE_FLOAT32_C( 758.79), SIMDE_FLOAT32_C( 343.48), SIMDE_FLOAT32_C( -797.92), SIMDE_FLOAT32_C( -525.83), SIMDE_FLOAT32_C( -822.65), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( 543.35)), UINT16_C(16804), simde_mm512_set_ps(SIMDE_FLOAT32_C( 27.25), SIMDE_FLOAT32_C( 690.12), SIMDE_FLOAT32_C( -21.09), SIMDE_FLOAT32_C( -448.89), SIMDE_FLOAT32_C( 505.79), SIMDE_FLOAT32_C( 831.02), SIMDE_FLOAT32_C( 977.36), SIMDE_FLOAT32_C( 331.34), SIMDE_FLOAT32_C( 462.95), SIMDE_FLOAT32_C( -178.99), SIMDE_FLOAT32_C( 324.62), SIMDE_FLOAT32_C( -874.31), SIMDE_FLOAT32_C( -328.54), SIMDE_FLOAT32_C( -192.31), SIMDE_FLOAT32_C( 561.36), SIMDE_FLOAT32_C( -70.91)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -95.15), SIMDE_FLOAT32_C( 7.23), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( 731.49), SIMDE_FLOAT32_C( 623.70), SIMDE_FLOAT32_C( 140.67), SIMDE_FLOAT32_C( -906.16), SIMDE_FLOAT32_C( 6.50), SIMDE_FLOAT32_C( 6.83), SIMDE_FLOAT32_C( 758.79), SIMDE_FLOAT32_C( 6.48), SIMDE_FLOAT32_C( -797.92), SIMDE_FLOAT32_C( -525.83), SIMDE_FLOAT32_C( -5.95), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( 543.35)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -348.70), SIMDE_FLOAT32_C( -438.19), SIMDE_FLOAT32_C( -752.43), SIMDE_FLOAT32_C( 932.66), SIMDE_FLOAT32_C( -327.22), SIMDE_FLOAT32_C( -182.45), SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( 14.34), SIMDE_FLOAT32_C( 916.26), SIMDE_FLOAT32_C( -769.09), SIMDE_FLOAT32_C( -573.81), SIMDE_FLOAT32_C( -337.60), SIMDE_FLOAT32_C( 293.64), SIMDE_FLOAT32_C( -576.22), SIMDE_FLOAT32_C( 710.38), SIMDE_FLOAT32_C( -756.42)), UINT16_C( 2107), simde_mm512_set_ps(SIMDE_FLOAT32_C( 897.27), SIMDE_FLOAT32_C( -197.89), SIMDE_FLOAT32_C( -359.76), SIMDE_FLOAT32_C( -33.67), SIMDE_FLOAT32_C( 7.27), SIMDE_FLOAT32_C( -125.20), SIMDE_FLOAT32_C( 39.93), SIMDE_FLOAT32_C( 394.67), SIMDE_FLOAT32_C( -304.73), SIMDE_FLOAT32_C( -696.69), SIMDE_FLOAT32_C( 822.06), SIMDE_FLOAT32_C( -997.63), SIMDE_FLOAT32_C( 923.64), SIMDE_FLOAT32_C( -768.12), SIMDE_FLOAT32_C( -67.64), SIMDE_FLOAT32_C( 977.49)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -348.70), SIMDE_FLOAT32_C( -438.19), SIMDE_FLOAT32_C( -752.43), SIMDE_FLOAT32_C( 932.66), SIMDE_FLOAT32_C( 2.68), SIMDE_FLOAT32_C( -182.45), SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( 14.34), SIMDE_FLOAT32_C( 916.26), SIMDE_FLOAT32_C( -769.09), SIMDE_FLOAT32_C( 7.40), SIMDE_FLOAT32_C( -7.60), SIMDE_FLOAT32_C( 7.52), SIMDE_FLOAT32_C( -576.22), SIMDE_FLOAT32_C( -4.91), SIMDE_FLOAT32_C( 7.58)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -15.61), SIMDE_FLOAT32_C( -737.13), SIMDE_FLOAT32_C( -314.93), SIMDE_FLOAT32_C( 177.92), SIMDE_FLOAT32_C( 345.93), SIMDE_FLOAT32_C( 888.71), SIMDE_FLOAT32_C( 915.71), SIMDE_FLOAT32_C( 133.52), SIMDE_FLOAT32_C( 484.94), SIMDE_FLOAT32_C( -598.06), SIMDE_FLOAT32_C( -791.07), SIMDE_FLOAT32_C( -765.93), SIMDE_FLOAT32_C( 221.37), SIMDE_FLOAT32_C( -788.36), SIMDE_FLOAT32_C( -775.04), SIMDE_FLOAT32_C( 440.64)), UINT16_C(22274), simde_mm512_set_ps(SIMDE_FLOAT32_C( 496.57), SIMDE_FLOAT32_C( 915.19), SIMDE_FLOAT32_C( -718.40), SIMDE_FLOAT32_C( 159.97), SIMDE_FLOAT32_C( -861.01), SIMDE_FLOAT32_C( 426.61), SIMDE_FLOAT32_C( 932.11), SIMDE_FLOAT32_C( 110.36), SIMDE_FLOAT32_C( 826.84), SIMDE_FLOAT32_C( -76.75), SIMDE_FLOAT32_C( 237.58), SIMDE_FLOAT32_C( -378.50), SIMDE_FLOAT32_C( -601.68), SIMDE_FLOAT32_C( -623.50), SIMDE_FLOAT32_C( -942.47), SIMDE_FLOAT32_C( 475.51)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -15.61), SIMDE_FLOAT32_C( 7.51), SIMDE_FLOAT32_C( -314.93), SIMDE_FLOAT32_C( 5.77), SIMDE_FLOAT32_C( 345.93), SIMDE_FLOAT32_C( 6.75), SIMDE_FLOAT32_C( 7.53), SIMDE_FLOAT32_C( 5.40), SIMDE_FLOAT32_C( 484.94), SIMDE_FLOAT32_C( -598.06), SIMDE_FLOAT32_C( -791.07), SIMDE_FLOAT32_C( -765.93), SIMDE_FLOAT32_C( 221.37), SIMDE_FLOAT32_C( -788.36), SIMDE_FLOAT32_C( -7.54), SIMDE_FLOAT32_C( 440.64)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 883.05), SIMDE_FLOAT32_C( -807.28), SIMDE_FLOAT32_C( -70.05), SIMDE_FLOAT32_C( -784.34), SIMDE_FLOAT32_C( 92.52), SIMDE_FLOAT32_C( 206.60), SIMDE_FLOAT32_C( 834.60), SIMDE_FLOAT32_C( -65.60), SIMDE_FLOAT32_C( -286.07), SIMDE_FLOAT32_C( -212.86), SIMDE_FLOAT32_C( -318.38), SIMDE_FLOAT32_C( 783.48), SIMDE_FLOAT32_C( -628.82), SIMDE_FLOAT32_C( 556.35), SIMDE_FLOAT32_C( 439.43), SIMDE_FLOAT32_C( 434.03)), UINT16_C(27396), simde_mm512_set_ps(SIMDE_FLOAT32_C( -964.25), SIMDE_FLOAT32_C( -406.33), SIMDE_FLOAT32_C( -743.66), SIMDE_FLOAT32_C( -764.58), SIMDE_FLOAT32_C( 789.89), SIMDE_FLOAT32_C( 4.83), SIMDE_FLOAT32_C( -818.54), SIMDE_FLOAT32_C( 161.06), SIMDE_FLOAT32_C( 579.25), SIMDE_FLOAT32_C( -11.78), SIMDE_FLOAT32_C( -308.52), SIMDE_FLOAT32_C( -719.57), SIMDE_FLOAT32_C( 334.00), SIMDE_FLOAT32_C( 274.71), SIMDE_FLOAT32_C( -916.82), SIMDE_FLOAT32_C( -490.00)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 883.05), SIMDE_FLOAT32_C( -6.70), SIMDE_FLOAT32_C( -7.30), SIMDE_FLOAT32_C( -784.34), SIMDE_FLOAT32_C( 7.37), SIMDE_FLOAT32_C( 206.60), SIMDE_FLOAT32_C( -7.40), SIMDE_FLOAT32_C( 5.77), SIMDE_FLOAT32_C( -286.07), SIMDE_FLOAT32_C( -212.86), SIMDE_FLOAT32_C( -318.38), SIMDE_FLOAT32_C( 783.48), SIMDE_FLOAT32_C( -628.82), SIMDE_FLOAT32_C( 6.31), SIMDE_FLOAT32_C( 439.43), SIMDE_FLOAT32_C( 434.03)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 529.63), SIMDE_FLOAT32_C( -24.89), SIMDE_FLOAT32_C( -967.78), SIMDE_FLOAT32_C( 638.94), SIMDE_FLOAT32_C( 450.90), SIMDE_FLOAT32_C( -771.54), SIMDE_FLOAT32_C( 105.79), SIMDE_FLOAT32_C( 590.10), SIMDE_FLOAT32_C( 30.91), SIMDE_FLOAT32_C( 635.35), SIMDE_FLOAT32_C( -84.00), SIMDE_FLOAT32_C( 80.04), SIMDE_FLOAT32_C( -709.46), SIMDE_FLOAT32_C( 607.86), SIMDE_FLOAT32_C( 394.58), SIMDE_FLOAT32_C( -889.11)), UINT16_C( 953), simde_mm512_set_ps(SIMDE_FLOAT32_C( 18.75), SIMDE_FLOAT32_C( 809.05), SIMDE_FLOAT32_C( 144.05), SIMDE_FLOAT32_C( -427.72), SIMDE_FLOAT32_C( 308.28), SIMDE_FLOAT32_C( -177.05), SIMDE_FLOAT32_C( -457.77), SIMDE_FLOAT32_C( 678.24), SIMDE_FLOAT32_C( 66.05), SIMDE_FLOAT32_C( -267.71), SIMDE_FLOAT32_C( 117.28), SIMDE_FLOAT32_C( -576.80), SIMDE_FLOAT32_C( -38.39), SIMDE_FLOAT32_C( -250.14), SIMDE_FLOAT32_C( -53.92), SIMDE_FLOAT32_C( 91.94)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 529.63), SIMDE_FLOAT32_C( -24.89), SIMDE_FLOAT32_C( -967.78), SIMDE_FLOAT32_C( 638.94), SIMDE_FLOAT32_C( 450.90), SIMDE_FLOAT32_C( -771.54), SIMDE_FLOAT32_C( -6.82), SIMDE_FLOAT32_C( 7.21), SIMDE_FLOAT32_C( 4.88), SIMDE_FLOAT32_C( 635.35), SIMDE_FLOAT32_C( 5.46), SIMDE_FLOAT32_C( -7.05), SIMDE_FLOAT32_C( -4.34), SIMDE_FLOAT32_C( 607.86), SIMDE_FLOAT32_C( 394.58), SIMDE_FLOAT32_C( 5.21)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -788.39), SIMDE_FLOAT32_C( 330.43), SIMDE_FLOAT32_C( -493.41), SIMDE_FLOAT32_C( 822.72), SIMDE_FLOAT32_C( 956.68), SIMDE_FLOAT32_C( 954.62), SIMDE_FLOAT32_C( 825.49), SIMDE_FLOAT32_C( -816.27), SIMDE_FLOAT32_C( -209.34), SIMDE_FLOAT32_C( -933.21), SIMDE_FLOAT32_C( -728.70), SIMDE_FLOAT32_C( -420.06), SIMDE_FLOAT32_C( 100.32), SIMDE_FLOAT32_C( 103.15), SIMDE_FLOAT32_C( 439.77), SIMDE_FLOAT32_C( -204.33)), UINT16_C(12713), simde_mm512_set_ps(SIMDE_FLOAT32_C( -841.43), SIMDE_FLOAT32_C( -14.16), SIMDE_FLOAT32_C( 824.88), SIMDE_FLOAT32_C( 793.63), SIMDE_FLOAT32_C( -736.75), SIMDE_FLOAT32_C( -310.57), SIMDE_FLOAT32_C( 728.87), SIMDE_FLOAT32_C( -350.72), SIMDE_FLOAT32_C( 60.89), SIMDE_FLOAT32_C( 109.81), SIMDE_FLOAT32_C( 715.94), SIMDE_FLOAT32_C( -250.60), SIMDE_FLOAT32_C( 944.14), SIMDE_FLOAT32_C( 361.85), SIMDE_FLOAT32_C( -13.07), SIMDE_FLOAT32_C( 852.60)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -788.39), SIMDE_FLOAT32_C( 330.43), SIMDE_FLOAT32_C( 7.41), SIMDE_FLOAT32_C( 7.37), SIMDE_FLOAT32_C( 956.68), SIMDE_FLOAT32_C( 954.62), SIMDE_FLOAT32_C( 825.49), SIMDE_FLOAT32_C( -6.55), SIMDE_FLOAT32_C( 4.80), SIMDE_FLOAT32_C( -933.21), SIMDE_FLOAT32_C( 7.27), SIMDE_FLOAT32_C( -420.06), SIMDE_FLOAT32_C( 7.54), SIMDE_FLOAT32_C( 103.15), SIMDE_FLOAT32_C( 439.77), SIMDE_FLOAT32_C( 7.44)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mask_asinh_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_asinh_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( 670.24), SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( 34.06), SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( 39.01), SIMDE_FLOAT64_C( -754.38), SIMDE_FLOAT64_C( 346.63)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 6.90), SIMDE_FLOAT64_C( 7.20), SIMDE_FLOAT64_C( -6.39), SIMDE_FLOAT64_C( 4.22), SIMDE_FLOAT64_C( -5.92), SIMDE_FLOAT64_C( 4.36), SIMDE_FLOAT64_C( -7.32), SIMDE_FLOAT64_C( 6.54)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( -686.13), SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 571.46), SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 467.76)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -7.21), SIMDE_FLOAT64_C( -7.22), SIMDE_FLOAT64_C( 5.13), SIMDE_FLOAT64_C( 7.04), SIMDE_FLOAT64_C( 7.41), SIMDE_FLOAT64_C( 6.74), SIMDE_FLOAT64_C( -6.29), SIMDE_FLOAT64_C( 6.84)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( 28.47), SIMDE_FLOAT64_C( -384.03), SIMDE_FLOAT64_C( -923.64), SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -860.95), SIMDE_FLOAT64_C( -417.54), SIMDE_FLOAT64_C( 696.87)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -6.79), SIMDE_FLOAT64_C( 4.04), SIMDE_FLOAT64_C( -6.64), SIMDE_FLOAT64_C( -7.52), SIMDE_FLOAT64_C( -6.41), SIMDE_FLOAT64_C( -7.45), SIMDE_FLOAT64_C( -6.73), SIMDE_FLOAT64_C( 7.24)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 178.20), SIMDE_FLOAT64_C( -450.67), SIMDE_FLOAT64_C( 233.37), SIMDE_FLOAT64_C( 687.09), SIMDE_FLOAT64_C( 261.31), SIMDE_FLOAT64_C( -212.54), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -660.80)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 5.88), SIMDE_FLOAT64_C( -6.80), SIMDE_FLOAT64_C( 6.15), SIMDE_FLOAT64_C( 7.23), SIMDE_FLOAT64_C( 6.26), SIMDE_FLOAT64_C( -6.05), SIMDE_FLOAT64_C( -7.58), SIMDE_FLOAT64_C( -7.19)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -770.35), SIMDE_FLOAT64_C( 443.48), SIMDE_FLOAT64_C( -583.60), SIMDE_FLOAT64_C( 380.46), SIMDE_FLOAT64_C( -770.72), SIMDE_FLOAT64_C( 993.90), SIMDE_FLOAT64_C( 28.08), SIMDE_FLOAT64_C( 841.21)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -7.34), SIMDE_FLOAT64_C( 6.79), SIMDE_FLOAT64_C( -7.06), SIMDE_FLOAT64_C( 6.63), SIMDE_FLOAT64_C( -7.34), SIMDE_FLOAT64_C( 7.59), SIMDE_FLOAT64_C( 4.03), SIMDE_FLOAT64_C( 7.43)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -387.90), SIMDE_FLOAT64_C( 395.92), SIMDE_FLOAT64_C( 655.87), SIMDE_FLOAT64_C( 339.21), SIMDE_FLOAT64_C( 532.35), SIMDE_FLOAT64_C( -263.99), SIMDE_FLOAT64_C( 780.64), SIMDE_FLOAT64_C( -30.79)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -6.65), SIMDE_FLOAT64_C( 6.67), SIMDE_FLOAT64_C( 7.18), SIMDE_FLOAT64_C( 6.52), SIMDE_FLOAT64_C( 6.97), SIMDE_FLOAT64_C( -6.27), SIMDE_FLOAT64_C( 7.35), SIMDE_FLOAT64_C( -4.12)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -203.65), SIMDE_FLOAT64_C( -80.73), SIMDE_FLOAT64_C( 336.73), SIMDE_FLOAT64_C( -944.78), SIMDE_FLOAT64_C( -747.59), SIMDE_FLOAT64_C( -767.23), SIMDE_FLOAT64_C( -554.19), SIMDE_FLOAT64_C( 398.82)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -6.01), SIMDE_FLOAT64_C( -5.08), SIMDE_FLOAT64_C( 6.51), SIMDE_FLOAT64_C( -7.54), SIMDE_FLOAT64_C( -7.31), SIMDE_FLOAT64_C( -7.34), SIMDE_FLOAT64_C( -7.01), SIMDE_FLOAT64_C( 6.68)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 469.66), SIMDE_FLOAT64_C( 680.02), SIMDE_FLOAT64_C( -148.69), SIMDE_FLOAT64_C( 818.66), SIMDE_FLOAT64_C( 910.03), SIMDE_FLOAT64_C( 600.47), SIMDE_FLOAT64_C( 791.23), SIMDE_FLOAT64_C( 254.31)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 6.85), SIMDE_FLOAT64_C( 7.22), SIMDE_FLOAT64_C( -5.70), SIMDE_FLOAT64_C( 7.40), SIMDE_FLOAT64_C( 7.51), SIMDE_FLOAT64_C( 7.09), SIMDE_FLOAT64_C( 7.37), SIMDE_FLOAT64_C( 6.23)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_asinh_pd(test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_asinh_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d src; simde__mmask8 k; simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( -686.13), SIMDE_FLOAT64_C( 571.46), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( 467.76), SIMDE_FLOAT64_C( 670.24), SIMDE_FLOAT64_C( 34.06), SIMDE_FLOAT64_C( 39.01), SIMDE_FLOAT64_C( 346.63)), UINT8_C(139), simde_mm512_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( -754.38)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -7.21), SIMDE_FLOAT64_C( 571.46), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( 467.76), SIMDE_FLOAT64_C( 6.90), SIMDE_FLOAT64_C( 34.06), SIMDE_FLOAT64_C( -5.92), SIMDE_FLOAT64_C( -7.32)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 178.20), SIMDE_FLOAT64_C( 233.37), SIMDE_FLOAT64_C( 261.31), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( -384.03), SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -417.54)), UINT8_C(229), simde_mm512_set_pd(SIMDE_FLOAT64_C( 841.21), SIMDE_FLOAT64_C( -450.67), SIMDE_FLOAT64_C( 687.09), SIMDE_FLOAT64_C( -212.54), SIMDE_FLOAT64_C( -660.80), SIMDE_FLOAT64_C( 28.47), SIMDE_FLOAT64_C( -923.64), SIMDE_FLOAT64_C( -860.95)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 7.43), SIMDE_FLOAT64_C( -6.80), SIMDE_FLOAT64_C( 7.23), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( 4.04), SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -7.45)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 398.82), SIMDE_FLOAT64_C( 395.92), SIMDE_FLOAT64_C( 339.21), SIMDE_FLOAT64_C( -263.99), SIMDE_FLOAT64_C( -30.79), SIMDE_FLOAT64_C( 443.48), SIMDE_FLOAT64_C( 380.46), SIMDE_FLOAT64_C( 993.90)), UINT8_C(253), simde_mm512_set_pd(SIMDE_FLOAT64_C( -554.19), SIMDE_FLOAT64_C( -387.90), SIMDE_FLOAT64_C( 655.87), SIMDE_FLOAT64_C( 532.35), SIMDE_FLOAT64_C( 780.64), SIMDE_FLOAT64_C( -770.35), SIMDE_FLOAT64_C( -583.60), SIMDE_FLOAT64_C( -770.72)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -7.01), SIMDE_FLOAT64_C( -6.65), SIMDE_FLOAT64_C( 7.18), SIMDE_FLOAT64_C( 6.97), SIMDE_FLOAT64_C( 7.35), SIMDE_FLOAT64_C( -7.34), SIMDE_FLOAT64_C( 380.46), SIMDE_FLOAT64_C( -7.34)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 120.65), SIMDE_FLOAT64_C( 469.66), SIMDE_FLOAT64_C( -148.69), SIMDE_FLOAT64_C( 910.03), SIMDE_FLOAT64_C( 791.23), SIMDE_FLOAT64_C( -203.65), SIMDE_FLOAT64_C( 336.73), SIMDE_FLOAT64_C( -747.59)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 543.35), SIMDE_FLOAT64_C( -171.51), SIMDE_FLOAT64_C( 680.02), SIMDE_FLOAT64_C( 818.66), SIMDE_FLOAT64_C( 600.47), SIMDE_FLOAT64_C( 254.31), SIMDE_FLOAT64_C( -80.73), SIMDE_FLOAT64_C( -944.78)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 120.65), SIMDE_FLOAT64_C( -5.84), SIMDE_FLOAT64_C( -148.69), SIMDE_FLOAT64_C( 7.40), SIMDE_FLOAT64_C( 7.09), SIMDE_FLOAT64_C( 6.23), SIMDE_FLOAT64_C( 336.73), SIMDE_FLOAT64_C( -7.54)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 99.93), SIMDE_FLOAT64_C( -738.19), SIMDE_FLOAT64_C( 758.79), SIMDE_FLOAT64_C( 343.48), SIMDE_FLOAT64_C( -797.92), SIMDE_FLOAT64_C( -525.83), SIMDE_FLOAT64_C( -822.65), SIMDE_FLOAT64_C( 655.67)), UINT8_C(145), simde_mm512_set_pd(SIMDE_FLOAT64_C( 331.34), SIMDE_FLOAT64_C( 462.95), SIMDE_FLOAT64_C( -178.99), SIMDE_FLOAT64_C( 324.62), SIMDE_FLOAT64_C( -874.31), SIMDE_FLOAT64_C( -328.54), SIMDE_FLOAT64_C( -192.31), SIMDE_FLOAT64_C( 561.36)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 6.50), SIMDE_FLOAT64_C( -738.19), SIMDE_FLOAT64_C( 758.79), SIMDE_FLOAT64_C( 6.48), SIMDE_FLOAT64_C( -797.92), SIMDE_FLOAT64_C( -525.83), SIMDE_FLOAT64_C( -822.65), SIMDE_FLOAT64_C( 7.02)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -756.42), SIMDE_FLOAT64_C( 27.25), SIMDE_FLOAT64_C( 690.12), SIMDE_FLOAT64_C( -21.09), SIMDE_FLOAT64_C( -448.89), SIMDE_FLOAT64_C( 505.79), SIMDE_FLOAT64_C( 831.02), SIMDE_FLOAT64_C( 977.36)), UINT8_C( 75), simde_mm512_set_pd(SIMDE_FLOAT64_C( 977.49), SIMDE_FLOAT64_C( 424.81), SIMDE_FLOAT64_C( -95.15), SIMDE_FLOAT64_C( 840.65), SIMDE_FLOAT64_C( -591.56), SIMDE_FLOAT64_C( 731.49), SIMDE_FLOAT64_C( 623.70), SIMDE_FLOAT64_C( 140.67)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -756.42), SIMDE_FLOAT64_C( 6.74), SIMDE_FLOAT64_C( 690.12), SIMDE_FLOAT64_C( -21.09), SIMDE_FLOAT64_C( -7.08), SIMDE_FLOAT64_C( 505.79), SIMDE_FLOAT64_C( 7.13), SIMDE_FLOAT64_C( 5.64)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 394.67), SIMDE_FLOAT64_C( -304.73), SIMDE_FLOAT64_C( -696.69), SIMDE_FLOAT64_C( 822.06), SIMDE_FLOAT64_C( -997.63), SIMDE_FLOAT64_C( 923.64), SIMDE_FLOAT64_C( -768.12), SIMDE_FLOAT64_C( -67.64)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 510.85), SIMDE_FLOAT64_C( 14.34), SIMDE_FLOAT64_C( 916.26), SIMDE_FLOAT64_C( -769.09), SIMDE_FLOAT64_C( -573.81), SIMDE_FLOAT64_C( -337.60), SIMDE_FLOAT64_C( 293.64), SIMDE_FLOAT64_C( -576.22)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 394.67), SIMDE_FLOAT64_C( 3.36), SIMDE_FLOAT64_C( -696.69), SIMDE_FLOAT64_C( -7.34), SIMDE_FLOAT64_C( -7.05), SIMDE_FLOAT64_C( -6.52), SIMDE_FLOAT64_C( -768.12), SIMDE_FLOAT64_C( -7.05)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 475.51), SIMDE_FLOAT64_C( 936.65), SIMDE_FLOAT64_C( -348.70), SIMDE_FLOAT64_C( -438.19), SIMDE_FLOAT64_C( -752.43), SIMDE_FLOAT64_C( 932.66), SIMDE_FLOAT64_C( -327.22), SIMDE_FLOAT64_C( -182.45)), UINT8_C(213), simde_mm512_set_pd(SIMDE_FLOAT64_C( -775.04), SIMDE_FLOAT64_C( 440.64), SIMDE_FLOAT64_C( 897.27), SIMDE_FLOAT64_C( -197.89), SIMDE_FLOAT64_C( -359.76), SIMDE_FLOAT64_C( -33.67), SIMDE_FLOAT64_C( 7.27), SIMDE_FLOAT64_C( -125.20)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -7.35), SIMDE_FLOAT64_C( 6.78), SIMDE_FLOAT64_C( -348.70), SIMDE_FLOAT64_C( -5.98), SIMDE_FLOAT64_C( -752.43), SIMDE_FLOAT64_C( -4.21), SIMDE_FLOAT64_C( -327.22), SIMDE_FLOAT64_C( -5.52)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mask_asinh_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_atan_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( -754.38), SIMDE_FLOAT32_C( 346.63)), simde_mm_set_ps(SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.55), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( 34.06)), simde_mm_set_ps(SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.54)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 467.76)), simde_mm_set_ps(SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 571.46)), simde_mm_set_ps(SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.56), SIMDE_FLOAT32_C( 1.57)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( 696.87)), simde_mm_set_ps(SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -923.64)), simde_mm_set_ps(SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.54), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -660.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 687.09)), simde_mm_set_ps(SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_atan_ps(test_vec[i].a); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_atan_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -754.38), SIMDE_FLOAT64_C( 346.63)), simde_mm_set_pd(SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.57)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( 39.01)), simde_mm_set_pd(SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.55)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( 34.06)), simde_mm_set_pd(SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.54)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( 670.24)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 1.57)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 467.76)), simde_mm_set_pd(SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.57)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( 422.21)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 1.57)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 571.46)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1.56), SIMDE_FLOAT64_C( 1.57)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( -686.13)), simde_mm_set_pd(SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( -1.57)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_atan_pd(test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_atan_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( -754.38), SIMDE_FLOAT32_C( 346.63)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.54), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.55), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 467.76)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.56), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( 696.87)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.54), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -660.80)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -770.35), SIMDE_FLOAT32_C( 443.48), SIMDE_FLOAT32_C( -583.60), SIMDE_FLOAT32_C( 380.46), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 993.90), SIMDE_FLOAT32_C( 28.08), SIMDE_FLOAT32_C( 841.21)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.54), SIMDE_FLOAT32_C( 1.57)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( 395.92), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 339.21), SIMDE_FLOAT32_C( 532.35), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( 780.64), SIMDE_FLOAT32_C( -30.79)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.54)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -203.65), SIMDE_FLOAT32_C( -80.73), SIMDE_FLOAT32_C( 336.73), SIMDE_FLOAT32_C( -944.78), SIMDE_FLOAT32_C( -747.59), SIMDE_FLOAT32_C( -767.23), SIMDE_FLOAT32_C( -554.19), SIMDE_FLOAT32_C( 398.82)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.56), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 469.66), SIMDE_FLOAT32_C( 680.02), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 818.66), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 600.47), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( 254.31)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.56), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_atan_ps(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_atan_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( 39.01), SIMDE_FLOAT64_C( -754.38), SIMDE_FLOAT64_C( 346.63)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.55), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.57)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( 670.24), SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( 34.06)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.54)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 467.76)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.57)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( -686.13), SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 571.46)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.56), SIMDE_FLOAT64_C( 1.57)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -860.95), SIMDE_FLOAT64_C( -417.54), SIMDE_FLOAT64_C( 696.87)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.57)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( 28.47), SIMDE_FLOAT64_C( -384.03), SIMDE_FLOAT64_C( -923.64)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.54), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( -1.57)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 261.31), SIMDE_FLOAT64_C( -212.54), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -660.80)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( -1.57)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 178.20), SIMDE_FLOAT64_C( -450.67), SIMDE_FLOAT64_C( 233.37), SIMDE_FLOAT64_C( 687.09)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 1.57)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_atan_pd(test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_atan_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 467.76), SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( -754.38), SIMDE_FLOAT32_C( 346.63)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.56), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.54), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.55), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -660.80), SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( 696.87)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.54), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( 395.92), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 339.21), SIMDE_FLOAT32_C( 532.35), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( 780.64), SIMDE_FLOAT32_C( -30.79), SIMDE_FLOAT32_C( -770.35), SIMDE_FLOAT32_C( 443.48), SIMDE_FLOAT32_C( -583.60), SIMDE_FLOAT32_C( 380.46), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 993.90), SIMDE_FLOAT32_C( 28.08), SIMDE_FLOAT32_C( 841.21)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.54), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.54), SIMDE_FLOAT32_C( 1.57)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 469.66), SIMDE_FLOAT32_C( 680.02), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 818.66), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 600.47), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( 254.31), SIMDE_FLOAT32_C( -203.65), SIMDE_FLOAT32_C( -80.73), SIMDE_FLOAT32_C( 336.73), SIMDE_FLOAT32_C( -944.78), SIMDE_FLOAT32_C( -747.59), SIMDE_FLOAT32_C( -767.23), SIMDE_FLOAT32_C( -554.19), SIMDE_FLOAT32_C( 398.82)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.56), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.56), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -178.99), SIMDE_FLOAT32_C( 758.79), SIMDE_FLOAT32_C( 324.62), SIMDE_FLOAT32_C( 343.48), SIMDE_FLOAT32_C( -874.31), SIMDE_FLOAT32_C( -797.92), SIMDE_FLOAT32_C( -328.54), SIMDE_FLOAT32_C( -525.83), SIMDE_FLOAT32_C( -192.31), SIMDE_FLOAT32_C( -822.65), SIMDE_FLOAT32_C( 561.36), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( -70.91), SIMDE_FLOAT32_C( 543.35), SIMDE_FLOAT32_C( 120.65), SIMDE_FLOAT32_C( -171.51)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.56), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.56), SIMDE_FLOAT32_C( -1.56)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 690.12), SIMDE_FLOAT32_C( 840.65), SIMDE_FLOAT32_C( -21.09), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( -448.89), SIMDE_FLOAT32_C( 731.49), SIMDE_FLOAT32_C( 505.79), SIMDE_FLOAT32_C( 623.70), SIMDE_FLOAT32_C( 831.02), SIMDE_FLOAT32_C( 140.67), SIMDE_FLOAT32_C( 977.36), SIMDE_FLOAT32_C( -906.16), SIMDE_FLOAT32_C( 331.34), SIMDE_FLOAT32_C( 99.93), SIMDE_FLOAT32_C( 462.95), SIMDE_FLOAT32_C( -738.19)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.52), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.56), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.56), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -769.09), SIMDE_FLOAT32_C( 822.06), SIMDE_FLOAT32_C( -573.81), SIMDE_FLOAT32_C( -997.63), SIMDE_FLOAT32_C( -337.60), SIMDE_FLOAT32_C( 923.64), SIMDE_FLOAT32_C( 293.64), SIMDE_FLOAT32_C( -768.12), SIMDE_FLOAT32_C( -576.22), SIMDE_FLOAT32_C( -67.64), SIMDE_FLOAT32_C( 710.38), SIMDE_FLOAT32_C( 977.49), SIMDE_FLOAT32_C( -756.42), SIMDE_FLOAT32_C( 424.81), SIMDE_FLOAT32_C( 27.25), SIMDE_FLOAT32_C( -95.15)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.56), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( -1.56)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -438.19), SIMDE_FLOAT32_C( -359.76), SIMDE_FLOAT32_C( -752.43), SIMDE_FLOAT32_C( -33.67), SIMDE_FLOAT32_C( 932.66), SIMDE_FLOAT32_C( 7.27), SIMDE_FLOAT32_C( -327.22), SIMDE_FLOAT32_C( -125.20), SIMDE_FLOAT32_C( -182.45), SIMDE_FLOAT32_C( 39.93), SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( 394.67), SIMDE_FLOAT32_C( 14.34), SIMDE_FLOAT32_C( -304.73), SIMDE_FLOAT32_C( 916.26), SIMDE_FLOAT32_C( -696.69)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.54), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.43), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.56), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.55), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.50), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_atan_ps(test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_atan_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 src; simde__mmask16 k; simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -660.80), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( 696.87), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( 467.76), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( 346.63)), UINT16_C(41466), simde_mm512_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( -754.38)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -660.80), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.56), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 346.63)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 469.66), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( -203.65), SIMDE_FLOAT32_C( 336.73), SIMDE_FLOAT32_C( -747.59), SIMDE_FLOAT32_C( -554.19), SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 532.35), SIMDE_FLOAT32_C( 780.64), SIMDE_FLOAT32_C( -770.35), SIMDE_FLOAT32_C( -583.60), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 28.08)), UINT16_C(36797), simde_mm512_set_ps(SIMDE_FLOAT32_C( -171.51), SIMDE_FLOAT32_C( 680.02), SIMDE_FLOAT32_C( 818.66), SIMDE_FLOAT32_C( 600.47), SIMDE_FLOAT32_C( 254.31), SIMDE_FLOAT32_C( -80.73), SIMDE_FLOAT32_C( -944.78), SIMDE_FLOAT32_C( -767.23), SIMDE_FLOAT32_C( 398.82), SIMDE_FLOAT32_C( 395.92), SIMDE_FLOAT32_C( 339.21), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( -30.79), SIMDE_FLOAT32_C( 443.48), SIMDE_FLOAT32_C( 380.46), SIMDE_FLOAT32_C( 993.90)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -1.56), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.56), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.54), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 1.57)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -95.15), SIMDE_FLOAT32_C( 840.65), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( 731.49), SIMDE_FLOAT32_C( 623.70), SIMDE_FLOAT32_C( 140.67), SIMDE_FLOAT32_C( -906.16), SIMDE_FLOAT32_C( 99.93), SIMDE_FLOAT32_C( -738.19), SIMDE_FLOAT32_C( 758.79), SIMDE_FLOAT32_C( 343.48), SIMDE_FLOAT32_C( -797.92), SIMDE_FLOAT32_C( -525.83), SIMDE_FLOAT32_C( -822.65), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( 543.35)), UINT16_C(16804), simde_mm512_set_ps(SIMDE_FLOAT32_C( 27.25), SIMDE_FLOAT32_C( 690.12), SIMDE_FLOAT32_C( -21.09), SIMDE_FLOAT32_C( -448.89), SIMDE_FLOAT32_C( 505.79), SIMDE_FLOAT32_C( 831.02), SIMDE_FLOAT32_C( 977.36), SIMDE_FLOAT32_C( 331.34), SIMDE_FLOAT32_C( 462.95), SIMDE_FLOAT32_C( -178.99), SIMDE_FLOAT32_C( 324.62), SIMDE_FLOAT32_C( -874.31), SIMDE_FLOAT32_C( -328.54), SIMDE_FLOAT32_C( -192.31), SIMDE_FLOAT32_C( 561.36), SIMDE_FLOAT32_C( -70.91)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -95.15), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( 731.49), SIMDE_FLOAT32_C( 623.70), SIMDE_FLOAT32_C( 140.67), SIMDE_FLOAT32_C( -906.16), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 758.79), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -797.92), SIMDE_FLOAT32_C( -525.83), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( 543.35)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -348.70), SIMDE_FLOAT32_C( -438.19), SIMDE_FLOAT32_C( -752.43), SIMDE_FLOAT32_C( 932.66), SIMDE_FLOAT32_C( -327.22), SIMDE_FLOAT32_C( -182.45), SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( 14.34), SIMDE_FLOAT32_C( 916.26), SIMDE_FLOAT32_C( -769.09), SIMDE_FLOAT32_C( -573.81), SIMDE_FLOAT32_C( -337.60), SIMDE_FLOAT32_C( 293.64), SIMDE_FLOAT32_C( -576.22), SIMDE_FLOAT32_C( 710.38), SIMDE_FLOAT32_C( -756.42)), UINT16_C( 2107), simde_mm512_set_ps(SIMDE_FLOAT32_C( 897.27), SIMDE_FLOAT32_C( -197.89), SIMDE_FLOAT32_C( -359.76), SIMDE_FLOAT32_C( -33.67), SIMDE_FLOAT32_C( 7.27), SIMDE_FLOAT32_C( -125.20), SIMDE_FLOAT32_C( 39.93), SIMDE_FLOAT32_C( 394.67), SIMDE_FLOAT32_C( -304.73), SIMDE_FLOAT32_C( -696.69), SIMDE_FLOAT32_C( 822.06), SIMDE_FLOAT32_C( -997.63), SIMDE_FLOAT32_C( 923.64), SIMDE_FLOAT32_C( -768.12), SIMDE_FLOAT32_C( -67.64), SIMDE_FLOAT32_C( 977.49)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -348.70), SIMDE_FLOAT32_C( -438.19), SIMDE_FLOAT32_C( -752.43), SIMDE_FLOAT32_C( 932.66), SIMDE_FLOAT32_C( 1.43), SIMDE_FLOAT32_C( -182.45), SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( 14.34), SIMDE_FLOAT32_C( 916.26), SIMDE_FLOAT32_C( -769.09), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -576.22), SIMDE_FLOAT32_C( -1.56), SIMDE_FLOAT32_C( 1.57)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -15.61), SIMDE_FLOAT32_C( -737.13), SIMDE_FLOAT32_C( -314.93), SIMDE_FLOAT32_C( 177.92), SIMDE_FLOAT32_C( 345.93), SIMDE_FLOAT32_C( 888.71), SIMDE_FLOAT32_C( 915.71), SIMDE_FLOAT32_C( 133.52), SIMDE_FLOAT32_C( 484.94), SIMDE_FLOAT32_C( -598.06), SIMDE_FLOAT32_C( -791.07), SIMDE_FLOAT32_C( -765.93), SIMDE_FLOAT32_C( 221.37), SIMDE_FLOAT32_C( -788.36), SIMDE_FLOAT32_C( -775.04), SIMDE_FLOAT32_C( 440.64)), UINT16_C(22274), simde_mm512_set_ps(SIMDE_FLOAT32_C( 496.57), SIMDE_FLOAT32_C( 915.19), SIMDE_FLOAT32_C( -718.40), SIMDE_FLOAT32_C( 159.97), SIMDE_FLOAT32_C( -861.01), SIMDE_FLOAT32_C( 426.61), SIMDE_FLOAT32_C( 932.11), SIMDE_FLOAT32_C( 110.36), SIMDE_FLOAT32_C( 826.84), SIMDE_FLOAT32_C( -76.75), SIMDE_FLOAT32_C( 237.58), SIMDE_FLOAT32_C( -378.50), SIMDE_FLOAT32_C( -601.68), SIMDE_FLOAT32_C( -623.50), SIMDE_FLOAT32_C( -942.47), SIMDE_FLOAT32_C( 475.51)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -15.61), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -314.93), SIMDE_FLOAT32_C( 1.56), SIMDE_FLOAT32_C( 345.93), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.56), SIMDE_FLOAT32_C( 484.94), SIMDE_FLOAT32_C( -598.06), SIMDE_FLOAT32_C( -791.07), SIMDE_FLOAT32_C( -765.93), SIMDE_FLOAT32_C( 221.37), SIMDE_FLOAT32_C( -788.36), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 440.64)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 883.05), SIMDE_FLOAT32_C( -807.28), SIMDE_FLOAT32_C( -70.05), SIMDE_FLOAT32_C( -784.34), SIMDE_FLOAT32_C( 92.52), SIMDE_FLOAT32_C( 206.60), SIMDE_FLOAT32_C( 834.60), SIMDE_FLOAT32_C( -65.60), SIMDE_FLOAT32_C( -286.07), SIMDE_FLOAT32_C( -212.86), SIMDE_FLOAT32_C( -318.38), SIMDE_FLOAT32_C( 783.48), SIMDE_FLOAT32_C( -628.82), SIMDE_FLOAT32_C( 556.35), SIMDE_FLOAT32_C( 439.43), SIMDE_FLOAT32_C( 434.03)), UINT16_C(27396), simde_mm512_set_ps(SIMDE_FLOAT32_C( -964.25), SIMDE_FLOAT32_C( -406.33), SIMDE_FLOAT32_C( -743.66), SIMDE_FLOAT32_C( -764.58), SIMDE_FLOAT32_C( 789.89), SIMDE_FLOAT32_C( 4.83), SIMDE_FLOAT32_C( -818.54), SIMDE_FLOAT32_C( 161.06), SIMDE_FLOAT32_C( 579.25), SIMDE_FLOAT32_C( -11.78), SIMDE_FLOAT32_C( -308.52), SIMDE_FLOAT32_C( -719.57), SIMDE_FLOAT32_C( 334.00), SIMDE_FLOAT32_C( 274.71), SIMDE_FLOAT32_C( -916.82), SIMDE_FLOAT32_C( -490.00)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 883.05), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -784.34), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 206.60), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.56), SIMDE_FLOAT32_C( -286.07), SIMDE_FLOAT32_C( -212.86), SIMDE_FLOAT32_C( -318.38), SIMDE_FLOAT32_C( 783.48), SIMDE_FLOAT32_C( -628.82), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 439.43), SIMDE_FLOAT32_C( 434.03)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 529.63), SIMDE_FLOAT32_C( -24.89), SIMDE_FLOAT32_C( -967.78), SIMDE_FLOAT32_C( 638.94), SIMDE_FLOAT32_C( 450.90), SIMDE_FLOAT32_C( -771.54), SIMDE_FLOAT32_C( 105.79), SIMDE_FLOAT32_C( 590.10), SIMDE_FLOAT32_C( 30.91), SIMDE_FLOAT32_C( 635.35), SIMDE_FLOAT32_C( -84.00), SIMDE_FLOAT32_C( 80.04), SIMDE_FLOAT32_C( -709.46), SIMDE_FLOAT32_C( 607.86), SIMDE_FLOAT32_C( 394.58), SIMDE_FLOAT32_C( -889.11)), UINT16_C( 953), simde_mm512_set_ps(SIMDE_FLOAT32_C( 18.75), SIMDE_FLOAT32_C( 809.05), SIMDE_FLOAT32_C( 144.05), SIMDE_FLOAT32_C( -427.72), SIMDE_FLOAT32_C( 308.28), SIMDE_FLOAT32_C( -177.05), SIMDE_FLOAT32_C( -457.77), SIMDE_FLOAT32_C( 678.24), SIMDE_FLOAT32_C( 66.05), SIMDE_FLOAT32_C( -267.71), SIMDE_FLOAT32_C( 117.28), SIMDE_FLOAT32_C( -576.80), SIMDE_FLOAT32_C( -38.39), SIMDE_FLOAT32_C( -250.14), SIMDE_FLOAT32_C( -53.92), SIMDE_FLOAT32_C( 91.94)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 529.63), SIMDE_FLOAT32_C( -24.89), SIMDE_FLOAT32_C( -967.78), SIMDE_FLOAT32_C( 638.94), SIMDE_FLOAT32_C( 450.90), SIMDE_FLOAT32_C( -771.54), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.56), SIMDE_FLOAT32_C( 635.35), SIMDE_FLOAT32_C( 1.56), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.54), SIMDE_FLOAT32_C( 607.86), SIMDE_FLOAT32_C( 394.58), SIMDE_FLOAT32_C( 1.56)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -788.39), SIMDE_FLOAT32_C( 330.43), SIMDE_FLOAT32_C( -493.41), SIMDE_FLOAT32_C( 822.72), SIMDE_FLOAT32_C( 956.68), SIMDE_FLOAT32_C( 954.62), SIMDE_FLOAT32_C( 825.49), SIMDE_FLOAT32_C( -816.27), SIMDE_FLOAT32_C( -209.34), SIMDE_FLOAT32_C( -933.21), SIMDE_FLOAT32_C( -728.70), SIMDE_FLOAT32_C( -420.06), SIMDE_FLOAT32_C( 100.32), SIMDE_FLOAT32_C( 103.15), SIMDE_FLOAT32_C( 439.77), SIMDE_FLOAT32_C( -204.33)), UINT16_C(12713), simde_mm512_set_ps(SIMDE_FLOAT32_C( -841.43), SIMDE_FLOAT32_C( -14.16), SIMDE_FLOAT32_C( 824.88), SIMDE_FLOAT32_C( 793.63), SIMDE_FLOAT32_C( -736.75), SIMDE_FLOAT32_C( -310.57), SIMDE_FLOAT32_C( 728.87), SIMDE_FLOAT32_C( -350.72), SIMDE_FLOAT32_C( 60.89), SIMDE_FLOAT32_C( 109.81), SIMDE_FLOAT32_C( 715.94), SIMDE_FLOAT32_C( -250.60), SIMDE_FLOAT32_C( 944.14), SIMDE_FLOAT32_C( 361.85), SIMDE_FLOAT32_C( -13.07), SIMDE_FLOAT32_C( 852.60)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -788.39), SIMDE_FLOAT32_C( 330.43), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 956.68), SIMDE_FLOAT32_C( 954.62), SIMDE_FLOAT32_C( 825.49), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.55), SIMDE_FLOAT32_C( -933.21), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -420.06), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 103.15), SIMDE_FLOAT32_C( 439.77), SIMDE_FLOAT32_C( 1.57)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mask_atan_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_atan_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( 670.24), SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( 34.06), SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( 39.01), SIMDE_FLOAT64_C( -754.38), SIMDE_FLOAT64_C( 346.63)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.54), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.55), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.57)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( -686.13), SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 571.46), SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 467.76)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.56), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.57)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( 28.47), SIMDE_FLOAT64_C( -384.03), SIMDE_FLOAT64_C( -923.64), SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -860.95), SIMDE_FLOAT64_C( -417.54), SIMDE_FLOAT64_C( 696.87)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.54), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.57)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 178.20), SIMDE_FLOAT64_C( -450.67), SIMDE_FLOAT64_C( 233.37), SIMDE_FLOAT64_C( 687.09), SIMDE_FLOAT64_C( 261.31), SIMDE_FLOAT64_C( -212.54), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -660.80)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( -1.57)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -770.35), SIMDE_FLOAT64_C( 443.48), SIMDE_FLOAT64_C( -583.60), SIMDE_FLOAT64_C( 380.46), SIMDE_FLOAT64_C( -770.72), SIMDE_FLOAT64_C( 993.90), SIMDE_FLOAT64_C( 28.08), SIMDE_FLOAT64_C( 841.21)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 1.54), SIMDE_FLOAT64_C( 1.57)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -387.90), SIMDE_FLOAT64_C( 395.92), SIMDE_FLOAT64_C( 655.87), SIMDE_FLOAT64_C( 339.21), SIMDE_FLOAT64_C( 532.35), SIMDE_FLOAT64_C( -263.99), SIMDE_FLOAT64_C( 780.64), SIMDE_FLOAT64_C( -30.79)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( -1.54)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -203.65), SIMDE_FLOAT64_C( -80.73), SIMDE_FLOAT64_C( 336.73), SIMDE_FLOAT64_C( -944.78), SIMDE_FLOAT64_C( -747.59), SIMDE_FLOAT64_C( -767.23), SIMDE_FLOAT64_C( -554.19), SIMDE_FLOAT64_C( 398.82)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( -1.56), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.57)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 469.66), SIMDE_FLOAT64_C( 680.02), SIMDE_FLOAT64_C( -148.69), SIMDE_FLOAT64_C( 818.66), SIMDE_FLOAT64_C( 910.03), SIMDE_FLOAT64_C( 600.47), SIMDE_FLOAT64_C( 791.23), SIMDE_FLOAT64_C( 254.31)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( -1.56), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 1.57)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_atan_pd(test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_atan_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d src; simde__mmask8 k; simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( -686.13), SIMDE_FLOAT64_C( 571.46), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( 467.76), SIMDE_FLOAT64_C( 670.24), SIMDE_FLOAT64_C( 34.06), SIMDE_FLOAT64_C( 39.01), SIMDE_FLOAT64_C( 346.63)), UINT8_C(139), simde_mm512_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( -754.38)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 571.46), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( 467.76), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 34.06), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( -1.57)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 178.20), SIMDE_FLOAT64_C( 233.37), SIMDE_FLOAT64_C( 261.31), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( -384.03), SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -417.54)), UINT8_C(229), simde_mm512_set_pd(SIMDE_FLOAT64_C( 841.21), SIMDE_FLOAT64_C( -450.67), SIMDE_FLOAT64_C( 687.09), SIMDE_FLOAT64_C( -212.54), SIMDE_FLOAT64_C( -660.80), SIMDE_FLOAT64_C( 28.47), SIMDE_FLOAT64_C( -923.64), SIMDE_FLOAT64_C( -860.95)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( 1.54), SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -1.57)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 398.82), SIMDE_FLOAT64_C( 395.92), SIMDE_FLOAT64_C( 339.21), SIMDE_FLOAT64_C( -263.99), SIMDE_FLOAT64_C( -30.79), SIMDE_FLOAT64_C( 443.48), SIMDE_FLOAT64_C( 380.46), SIMDE_FLOAT64_C( 993.90)), UINT8_C(253), simde_mm512_set_pd(SIMDE_FLOAT64_C( -554.19), SIMDE_FLOAT64_C( -387.90), SIMDE_FLOAT64_C( 655.87), SIMDE_FLOAT64_C( 532.35), SIMDE_FLOAT64_C( 780.64), SIMDE_FLOAT64_C( -770.35), SIMDE_FLOAT64_C( -583.60), SIMDE_FLOAT64_C( -770.72)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 380.46), SIMDE_FLOAT64_C( -1.57)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 120.65), SIMDE_FLOAT64_C( 469.66), SIMDE_FLOAT64_C( -148.69), SIMDE_FLOAT64_C( 910.03), SIMDE_FLOAT64_C( 791.23), SIMDE_FLOAT64_C( -203.65), SIMDE_FLOAT64_C( 336.73), SIMDE_FLOAT64_C( -747.59)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 543.35), SIMDE_FLOAT64_C( -171.51), SIMDE_FLOAT64_C( 680.02), SIMDE_FLOAT64_C( 818.66), SIMDE_FLOAT64_C( 600.47), SIMDE_FLOAT64_C( 254.31), SIMDE_FLOAT64_C( -80.73), SIMDE_FLOAT64_C( -944.78)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 120.65), SIMDE_FLOAT64_C( -1.56), SIMDE_FLOAT64_C( -148.69), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 336.73), SIMDE_FLOAT64_C( -1.57)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 99.93), SIMDE_FLOAT64_C( -738.19), SIMDE_FLOAT64_C( 758.79), SIMDE_FLOAT64_C( 343.48), SIMDE_FLOAT64_C( -797.92), SIMDE_FLOAT64_C( -525.83), SIMDE_FLOAT64_C( -822.65), SIMDE_FLOAT64_C( 655.67)), UINT8_C(145), simde_mm512_set_pd(SIMDE_FLOAT64_C( 331.34), SIMDE_FLOAT64_C( 462.95), SIMDE_FLOAT64_C( -178.99), SIMDE_FLOAT64_C( 324.62), SIMDE_FLOAT64_C( -874.31), SIMDE_FLOAT64_C( -328.54), SIMDE_FLOAT64_C( -192.31), SIMDE_FLOAT64_C( 561.36)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( -738.19), SIMDE_FLOAT64_C( 758.79), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( -797.92), SIMDE_FLOAT64_C( -525.83), SIMDE_FLOAT64_C( -822.65), SIMDE_FLOAT64_C( 1.57)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -756.42), SIMDE_FLOAT64_C( 27.25), SIMDE_FLOAT64_C( 690.12), SIMDE_FLOAT64_C( -21.09), SIMDE_FLOAT64_C( -448.89), SIMDE_FLOAT64_C( 505.79), SIMDE_FLOAT64_C( 831.02), SIMDE_FLOAT64_C( 977.36)), UINT8_C( 75), simde_mm512_set_pd(SIMDE_FLOAT64_C( 977.49), SIMDE_FLOAT64_C( 424.81), SIMDE_FLOAT64_C( -95.15), SIMDE_FLOAT64_C( 840.65), SIMDE_FLOAT64_C( -591.56), SIMDE_FLOAT64_C( 731.49), SIMDE_FLOAT64_C( 623.70), SIMDE_FLOAT64_C( 140.67)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -756.42), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 690.12), SIMDE_FLOAT64_C( -21.09), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 505.79), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 1.56)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 394.67), SIMDE_FLOAT64_C( -304.73), SIMDE_FLOAT64_C( -696.69), SIMDE_FLOAT64_C( 822.06), SIMDE_FLOAT64_C( -997.63), SIMDE_FLOAT64_C( 923.64), SIMDE_FLOAT64_C( -768.12), SIMDE_FLOAT64_C( -67.64)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 510.85), SIMDE_FLOAT64_C( 14.34), SIMDE_FLOAT64_C( 916.26), SIMDE_FLOAT64_C( -769.09), SIMDE_FLOAT64_C( -573.81), SIMDE_FLOAT64_C( -337.60), SIMDE_FLOAT64_C( 293.64), SIMDE_FLOAT64_C( -576.22)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 394.67), SIMDE_FLOAT64_C( 1.50), SIMDE_FLOAT64_C( -696.69), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( -768.12), SIMDE_FLOAT64_C( -1.57)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 475.51), SIMDE_FLOAT64_C( 936.65), SIMDE_FLOAT64_C( -348.70), SIMDE_FLOAT64_C( -438.19), SIMDE_FLOAT64_C( -752.43), SIMDE_FLOAT64_C( 932.66), SIMDE_FLOAT64_C( -327.22), SIMDE_FLOAT64_C( -182.45)), UINT8_C(213), simde_mm512_set_pd(SIMDE_FLOAT64_C( -775.04), SIMDE_FLOAT64_C( 440.64), SIMDE_FLOAT64_C( 897.27), SIMDE_FLOAT64_C( -197.89), SIMDE_FLOAT64_C( -359.76), SIMDE_FLOAT64_C( -33.67), SIMDE_FLOAT64_C( 7.27), SIMDE_FLOAT64_C( -125.20)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( -348.70), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( -752.43), SIMDE_FLOAT64_C( -1.54), SIMDE_FLOAT64_C( -327.22), SIMDE_FLOAT64_C( -1.56)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mask_atan_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_atan2_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 b; simde__m128 r; } test_vec[9] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( 346.63)), simde_mm_set_ps(SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( -754.38)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 3.03), SIMDE_FLOAT32_C( 2.94), SIMDE_FLOAT32_C( 2.71)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( 467.76)), simde_mm_set_ps(SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( -269.45)), simde_mm_set_ps(SIMDE_FLOAT32_C( -2.35), SIMDE_FLOAT32_C( 1.42), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 2.09)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( 467.76)), simde_mm_set_ps(SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( -269.45)), simde_mm_set_ps(SIMDE_FLOAT32_C( -2.35), SIMDE_FLOAT32_C( 1.42), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 2.09)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( 696.87)), simde_mm_set_ps(SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -417.54)), simde_mm_set_ps(SIMDE_FLOAT32_C( 3.08), SIMDE_FLOAT32_C( -1.96), SIMDE_FLOAT32_C( -1.91), SIMDE_FLOAT32_C( 2.11)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -660.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -976.55)), simde_mm_set_ps(SIMDE_FLOAT32_C( -1.19), SIMDE_FLOAT32_C( 1.24), SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( -2.55)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 443.48), SIMDE_FLOAT32_C( 380.46), SIMDE_FLOAT32_C( 993.90), SIMDE_FLOAT32_C( 841.21)), simde_mm_set_ps(SIMDE_FLOAT32_C( -770.35), SIMDE_FLOAT32_C( -583.60), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 28.08)), simde_mm_set_ps(SIMDE_FLOAT32_C( 2.62), SIMDE_FLOAT32_C( 2.56), SIMDE_FLOAT32_C( 2.23), SIMDE_FLOAT32_C( 1.54)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 395.92), SIMDE_FLOAT32_C( 339.21), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( -30.79)), simde_mm_set_ps(SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 532.35), SIMDE_FLOAT32_C( 780.64)), simde_mm_set_ps(SIMDE_FLOAT32_C( 2.35), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( -0.46), SIMDE_FLOAT32_C( -0.04)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -80.73), SIMDE_FLOAT32_C( -944.78), SIMDE_FLOAT32_C( -767.23), SIMDE_FLOAT32_C( 398.82)), simde_mm_set_ps(SIMDE_FLOAT32_C( -203.65), SIMDE_FLOAT32_C( 336.73), SIMDE_FLOAT32_C( -747.59), SIMDE_FLOAT32_C( -554.19)), simde_mm_set_ps(SIMDE_FLOAT32_C( -2.76), SIMDE_FLOAT32_C( -1.23), SIMDE_FLOAT32_C( -2.34), SIMDE_FLOAT32_C( 2.52)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 593.11), SIMDE_FLOAT32_C( 480.49), SIMDE_FLOAT32_C( -877.19), SIMDE_FLOAT32_C( -326.68)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_atan2_ps(test_vec[i].a, test_vec[i].b); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_atan2_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d b; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 39.01), SIMDE_FLOAT64_C( 346.63)), simde_mm_set_pd(SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( -754.38)), simde_mm_set_pd(SIMDE_FLOAT64_C( 2.94), SIMDE_FLOAT64_C( 2.71)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 670.24), SIMDE_FLOAT64_C( 34.06)), simde_mm_set_pd(SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( -297.45)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.93), SIMDE_FLOAT64_C( 3.03)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( 467.76)), simde_mm_set_pd(SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( -269.45)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( 2.09)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -686.13), SIMDE_FLOAT64_C( 571.46)), simde_mm_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( 84.77)), simde_mm_set_pd(SIMDE_FLOAT64_C( -2.35), SIMDE_FLOAT64_C( 1.42)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -860.95), SIMDE_FLOAT64_C( 696.87)), simde_mm_set_pd(SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -417.54)), simde_mm_set_pd(SIMDE_FLOAT64_C( -1.91), SIMDE_FLOAT64_C( 2.11)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 28.47), SIMDE_FLOAT64_C( -923.64)), simde_mm_set_pd(SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( -384.03)), simde_mm_set_pd(SIMDE_FLOAT64_C( 3.08), SIMDE_FLOAT64_C( -1.96)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -212.54), SIMDE_FLOAT64_C( -660.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( 261.31), SIMDE_FLOAT64_C( -976.55)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.68), SIMDE_FLOAT64_C( -2.55)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -754.38), SIMDE_FLOAT64_C( 346.63)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)), simde_mm_set_pd(SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.57)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_atan2_pd(test_vec[i].a, test_vec[i].b); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_atan2_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 b; simde__m256 r; } test_vec[9] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( 467.76), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( 346.63)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( -754.38)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -2.35), SIMDE_FLOAT32_C( 1.42), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 2.09), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 3.03), SIMDE_FLOAT32_C( 2.94), SIMDE_FLOAT32_C( 2.71)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -660.80), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( 696.87)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -417.54)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -1.19), SIMDE_FLOAT32_C( 1.24), SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( -2.55), SIMDE_FLOAT32_C( 3.08), SIMDE_FLOAT32_C( -1.96), SIMDE_FLOAT32_C( -1.91), SIMDE_FLOAT32_C( 2.11)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 395.92), SIMDE_FLOAT32_C( 339.21), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( -30.79), SIMDE_FLOAT32_C( 443.48), SIMDE_FLOAT32_C( 380.46), SIMDE_FLOAT32_C( 993.90), SIMDE_FLOAT32_C( 841.21)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 532.35), SIMDE_FLOAT32_C( 780.64), SIMDE_FLOAT32_C( -770.35), SIMDE_FLOAT32_C( -583.60), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 28.08)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 2.35), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( -0.46), SIMDE_FLOAT32_C( -0.04), SIMDE_FLOAT32_C( 2.62), SIMDE_FLOAT32_C( 2.56), SIMDE_FLOAT32_C( 2.23), SIMDE_FLOAT32_C( 1.54)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 680.02), SIMDE_FLOAT32_C( 818.66), SIMDE_FLOAT32_C( 600.47), SIMDE_FLOAT32_C( 254.31), SIMDE_FLOAT32_C( -80.73), SIMDE_FLOAT32_C( -944.78), SIMDE_FLOAT32_C( -767.23), SIMDE_FLOAT32_C( 398.82)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 469.66), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( -203.65), SIMDE_FLOAT32_C( 336.73), SIMDE_FLOAT32_C( -747.59), SIMDE_FLOAT32_C( -554.19)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 1.75), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( -2.76), SIMDE_FLOAT32_C( -1.23), SIMDE_FLOAT32_C( -2.34), SIMDE_FLOAT32_C( 2.52)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 758.79), SIMDE_FLOAT32_C( 343.48), SIMDE_FLOAT32_C( -797.92), SIMDE_FLOAT32_C( -525.83), SIMDE_FLOAT32_C( -822.65), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( 543.35), SIMDE_FLOAT32_C( -171.51)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -178.99), SIMDE_FLOAT32_C( 324.62), SIMDE_FLOAT32_C( -874.31), SIMDE_FLOAT32_C( -328.54), SIMDE_FLOAT32_C( -192.31), SIMDE_FLOAT32_C( 561.36), SIMDE_FLOAT32_C( -70.91), SIMDE_FLOAT32_C( 120.65)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 1.80), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( -2.40), SIMDE_FLOAT32_C( -2.13), SIMDE_FLOAT32_C( -1.80), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 1.70), SIMDE_FLOAT32_C( -0.96)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 840.65), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( 731.49), SIMDE_FLOAT32_C( 623.70), SIMDE_FLOAT32_C( 140.67), SIMDE_FLOAT32_C( -906.16), SIMDE_FLOAT32_C( 99.93), SIMDE_FLOAT32_C( -738.19)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 690.12), SIMDE_FLOAT32_C( -21.09), SIMDE_FLOAT32_C( -448.89), SIMDE_FLOAT32_C( 505.79), SIMDE_FLOAT32_C( 831.02), SIMDE_FLOAT32_C( 977.36), SIMDE_FLOAT32_C( 331.34), SIMDE_FLOAT32_C( 462.95)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( -1.61), SIMDE_FLOAT32_C( 2.12), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( -1.01)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 822.06), SIMDE_FLOAT32_C( -997.63), SIMDE_FLOAT32_C( 923.64), SIMDE_FLOAT32_C( -768.12), SIMDE_FLOAT32_C( -67.64), SIMDE_FLOAT32_C( 977.49), SIMDE_FLOAT32_C( 424.81), SIMDE_FLOAT32_C( -95.15)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -769.09), SIMDE_FLOAT32_C( -573.81), SIMDE_FLOAT32_C( -337.60), SIMDE_FLOAT32_C( 293.64), SIMDE_FLOAT32_C( -576.22), SIMDE_FLOAT32_C( 710.38), SIMDE_FLOAT32_C( -756.42), SIMDE_FLOAT32_C( 27.25)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 2.32), SIMDE_FLOAT32_C( -2.09), SIMDE_FLOAT32_C( 1.92), SIMDE_FLOAT32_C( -1.21), SIMDE_FLOAT32_C( -3.02), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 2.63), SIMDE_FLOAT32_C( -1.29)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -359.76), SIMDE_FLOAT32_C( -33.67), SIMDE_FLOAT32_C( 7.27), SIMDE_FLOAT32_C( -125.20), SIMDE_FLOAT32_C( 39.93), SIMDE_FLOAT32_C( 394.67), SIMDE_FLOAT32_C( -304.73), SIMDE_FLOAT32_C( -696.69)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -438.19), SIMDE_FLOAT32_C( -752.43), SIMDE_FLOAT32_C( 932.66), SIMDE_FLOAT32_C( -327.22), SIMDE_FLOAT32_C( -182.45), SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( 14.34), SIMDE_FLOAT32_C( 916.26)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -2.45), SIMDE_FLOAT32_C( -3.10), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -2.78), SIMDE_FLOAT32_C( 2.93), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( -1.52), SIMDE_FLOAT32_C( -0.65)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( -754.38), SIMDE_FLOAT32_C( 346.63)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_atan2_ps(test_vec[i].a, test_vec[i].b); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_atan2_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d b; simde__m256d r; } test_vec[9] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 670.24), SIMDE_FLOAT64_C( 34.06), SIMDE_FLOAT64_C( 39.01), SIMDE_FLOAT64_C( 346.63)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( -754.38)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.93), SIMDE_FLOAT64_C( 3.03), SIMDE_FLOAT64_C( 2.94), SIMDE_FLOAT64_C( 2.71)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -686.13), SIMDE_FLOAT64_C( 571.46), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( 467.76)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( -269.45)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -2.35), SIMDE_FLOAT64_C( 1.42), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( 2.09)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 28.47), SIMDE_FLOAT64_C( -923.64), SIMDE_FLOAT64_C( -860.95), SIMDE_FLOAT64_C( 696.87)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( -384.03), SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -417.54)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 3.08), SIMDE_FLOAT64_C( -1.96), SIMDE_FLOAT64_C( -1.91), SIMDE_FLOAT64_C( 2.11)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -450.67), SIMDE_FLOAT64_C( 687.09), SIMDE_FLOAT64_C( -212.54), SIMDE_FLOAT64_C( -660.80)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 178.20), SIMDE_FLOAT64_C( 233.37), SIMDE_FLOAT64_C( 261.31), SIMDE_FLOAT64_C( -976.55)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -1.19), SIMDE_FLOAT64_C( 1.24), SIMDE_FLOAT64_C( -0.68), SIMDE_FLOAT64_C( -2.55)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 443.48), SIMDE_FLOAT64_C( 380.46), SIMDE_FLOAT64_C( 993.90), SIMDE_FLOAT64_C( 841.21)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -770.35), SIMDE_FLOAT64_C( -583.60), SIMDE_FLOAT64_C( -770.72), SIMDE_FLOAT64_C( 28.08)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 2.62), SIMDE_FLOAT64_C( 2.56), SIMDE_FLOAT64_C( 2.23), SIMDE_FLOAT64_C( 1.54)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 395.92), SIMDE_FLOAT64_C( 339.21), SIMDE_FLOAT64_C( -263.99), SIMDE_FLOAT64_C( -30.79)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -387.90), SIMDE_FLOAT64_C( 655.87), SIMDE_FLOAT64_C( 532.35), SIMDE_FLOAT64_C( 780.64)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 2.35), SIMDE_FLOAT64_C( 0.48), SIMDE_FLOAT64_C( -0.46), SIMDE_FLOAT64_C( -0.04)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -80.73), SIMDE_FLOAT64_C( -944.78), SIMDE_FLOAT64_C( -767.23), SIMDE_FLOAT64_C( 398.82)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -203.65), SIMDE_FLOAT64_C( 336.73), SIMDE_FLOAT64_C( -747.59), SIMDE_FLOAT64_C( -554.19)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -2.76), SIMDE_FLOAT64_C( -1.23), SIMDE_FLOAT64_C( -2.34), SIMDE_FLOAT64_C( 2.52)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 680.02), SIMDE_FLOAT64_C( 818.66), SIMDE_FLOAT64_C( 600.47), SIMDE_FLOAT64_C( 254.31)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 469.66), SIMDE_FLOAT64_C( -148.69), SIMDE_FLOAT64_C( 910.03), SIMDE_FLOAT64_C( 791.23)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.97), SIMDE_FLOAT64_C( 1.75), SIMDE_FLOAT64_C( 0.58), SIMDE_FLOAT64_C( 0.31)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( 39.01), SIMDE_FLOAT64_C( -754.38), SIMDE_FLOAT64_C( 346.63)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.57)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_atan2_pd(test_vec[i].a, test_vec[i].b); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_atan2_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__m512 b; simde__m512 r; } test_vec[9] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -660.80), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( 696.87), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( 467.76), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( 346.63)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( -754.38)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -1.19), SIMDE_FLOAT32_C( 1.24), SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( -2.55), SIMDE_FLOAT32_C( 3.08), SIMDE_FLOAT32_C( -1.96), SIMDE_FLOAT32_C( -1.91), SIMDE_FLOAT32_C( 2.11), SIMDE_FLOAT32_C( -2.35), SIMDE_FLOAT32_C( 1.42), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 2.09), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 3.03), SIMDE_FLOAT32_C( 2.94), SIMDE_FLOAT32_C( 2.71)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 680.02), SIMDE_FLOAT32_C( 818.66), SIMDE_FLOAT32_C( 600.47), SIMDE_FLOAT32_C( 254.31), SIMDE_FLOAT32_C( -80.73), SIMDE_FLOAT32_C( -944.78), SIMDE_FLOAT32_C( -767.23), SIMDE_FLOAT32_C( 398.82), SIMDE_FLOAT32_C( 395.92), SIMDE_FLOAT32_C( 339.21), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( -30.79), SIMDE_FLOAT32_C( 443.48), SIMDE_FLOAT32_C( 380.46), SIMDE_FLOAT32_C( 993.90), SIMDE_FLOAT32_C( 841.21)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 469.66), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( -203.65), SIMDE_FLOAT32_C( 336.73), SIMDE_FLOAT32_C( -747.59), SIMDE_FLOAT32_C( -554.19), SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 532.35), SIMDE_FLOAT32_C( 780.64), SIMDE_FLOAT32_C( -770.35), SIMDE_FLOAT32_C( -583.60), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 28.08)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 1.75), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( -2.76), SIMDE_FLOAT32_C( -1.23), SIMDE_FLOAT32_C( -2.34), SIMDE_FLOAT32_C( 2.52), SIMDE_FLOAT32_C( 2.35), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( -0.46), SIMDE_FLOAT32_C( -0.04), SIMDE_FLOAT32_C( 2.62), SIMDE_FLOAT32_C( 2.56), SIMDE_FLOAT32_C( 2.23), SIMDE_FLOAT32_C( 1.54)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 840.65), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( 731.49), SIMDE_FLOAT32_C( 623.70), SIMDE_FLOAT32_C( 140.67), SIMDE_FLOAT32_C( -906.16), SIMDE_FLOAT32_C( 99.93), SIMDE_FLOAT32_C( -738.19), SIMDE_FLOAT32_C( 758.79), SIMDE_FLOAT32_C( 343.48), SIMDE_FLOAT32_C( -797.92), SIMDE_FLOAT32_C( -525.83), SIMDE_FLOAT32_C( -822.65), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( 543.35), SIMDE_FLOAT32_C( -171.51)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 690.12), SIMDE_FLOAT32_C( -21.09), SIMDE_FLOAT32_C( -448.89), SIMDE_FLOAT32_C( 505.79), SIMDE_FLOAT32_C( 831.02), SIMDE_FLOAT32_C( 977.36), SIMDE_FLOAT32_C( 331.34), SIMDE_FLOAT32_C( 462.95), SIMDE_FLOAT32_C( -178.99), SIMDE_FLOAT32_C( 324.62), SIMDE_FLOAT32_C( -874.31), SIMDE_FLOAT32_C( -328.54), SIMDE_FLOAT32_C( -192.31), SIMDE_FLOAT32_C( 561.36), SIMDE_FLOAT32_C( -70.91), SIMDE_FLOAT32_C( 120.65)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( -1.61), SIMDE_FLOAT32_C( 2.12), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( -1.01), SIMDE_FLOAT32_C( 1.80), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( -2.40), SIMDE_FLOAT32_C( -2.13), SIMDE_FLOAT32_C( -1.80), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 1.70), SIMDE_FLOAT32_C( -0.96)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -359.76), SIMDE_FLOAT32_C( -33.67), SIMDE_FLOAT32_C( 7.27), SIMDE_FLOAT32_C( -125.20), SIMDE_FLOAT32_C( 39.93), SIMDE_FLOAT32_C( 394.67), SIMDE_FLOAT32_C( -304.73), SIMDE_FLOAT32_C( -696.69), SIMDE_FLOAT32_C( 822.06), SIMDE_FLOAT32_C( -997.63), SIMDE_FLOAT32_C( 923.64), SIMDE_FLOAT32_C( -768.12), SIMDE_FLOAT32_C( -67.64), SIMDE_FLOAT32_C( 977.49), SIMDE_FLOAT32_C( 424.81), SIMDE_FLOAT32_C( -95.15)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -438.19), SIMDE_FLOAT32_C( -752.43), SIMDE_FLOAT32_C( 932.66), SIMDE_FLOAT32_C( -327.22), SIMDE_FLOAT32_C( -182.45), SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( 14.34), SIMDE_FLOAT32_C( 916.26), SIMDE_FLOAT32_C( -769.09), SIMDE_FLOAT32_C( -573.81), SIMDE_FLOAT32_C( -337.60), SIMDE_FLOAT32_C( 293.64), SIMDE_FLOAT32_C( -576.22), SIMDE_FLOAT32_C( 710.38), SIMDE_FLOAT32_C( -756.42), SIMDE_FLOAT32_C( 27.25)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -2.45), SIMDE_FLOAT32_C( -3.10), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -2.78), SIMDE_FLOAT32_C( 2.93), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( -1.52), SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( 2.32), SIMDE_FLOAT32_C( -2.09), SIMDE_FLOAT32_C( 1.92), SIMDE_FLOAT32_C( -1.21), SIMDE_FLOAT32_C( -3.02), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 2.63), SIMDE_FLOAT32_C( -1.29)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -314.93), SIMDE_FLOAT32_C( 177.92), SIMDE_FLOAT32_C( 345.93), SIMDE_FLOAT32_C( 888.71), SIMDE_FLOAT32_C( 915.71), SIMDE_FLOAT32_C( 133.52), SIMDE_FLOAT32_C( 484.94), SIMDE_FLOAT32_C( -598.06), SIMDE_FLOAT32_C( -791.07), SIMDE_FLOAT32_C( -765.93), SIMDE_FLOAT32_C( 221.37), SIMDE_FLOAT32_C( -788.36), SIMDE_FLOAT32_C( -775.04), SIMDE_FLOAT32_C( 440.64), SIMDE_FLOAT32_C( 897.27), SIMDE_FLOAT32_C( -197.89)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -718.40), SIMDE_FLOAT32_C( 159.97), SIMDE_FLOAT32_C( -861.01), SIMDE_FLOAT32_C( 426.61), SIMDE_FLOAT32_C( 932.11), SIMDE_FLOAT32_C( 110.36), SIMDE_FLOAT32_C( 826.84), SIMDE_FLOAT32_C( -76.75), SIMDE_FLOAT32_C( 237.58), SIMDE_FLOAT32_C( -378.50), SIMDE_FLOAT32_C( -601.68), SIMDE_FLOAT32_C( -623.50), SIMDE_FLOAT32_C( -942.47), SIMDE_FLOAT32_C( 475.51), SIMDE_FLOAT32_C( 936.65), SIMDE_FLOAT32_C( -348.70)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -2.73), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 2.76), SIMDE_FLOAT32_C( 1.12), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( -1.70), SIMDE_FLOAT32_C( -1.28), SIMDE_FLOAT32_C( -2.03), SIMDE_FLOAT32_C( 2.79), SIMDE_FLOAT32_C( -2.24), SIMDE_FLOAT32_C( -2.45), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( -2.63)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -764.58), SIMDE_FLOAT32_C( 789.89), SIMDE_FLOAT32_C( 4.83), SIMDE_FLOAT32_C( -818.54), SIMDE_FLOAT32_C( 161.06), SIMDE_FLOAT32_C( 579.25), SIMDE_FLOAT32_C( -11.78), SIMDE_FLOAT32_C( -308.52), SIMDE_FLOAT32_C( -719.57), SIMDE_FLOAT32_C( 334.00), SIMDE_FLOAT32_C( 274.71), SIMDE_FLOAT32_C( -916.82), SIMDE_FLOAT32_C( -490.00), SIMDE_FLOAT32_C( -799.40), SIMDE_FLOAT32_C( -15.61), SIMDE_FLOAT32_C( -737.13)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -70.05), SIMDE_FLOAT32_C( -784.34), SIMDE_FLOAT32_C( 92.52), SIMDE_FLOAT32_C( 206.60), SIMDE_FLOAT32_C( 834.60), SIMDE_FLOAT32_C( -65.60), SIMDE_FLOAT32_C( -286.07), SIMDE_FLOAT32_C( -212.86), SIMDE_FLOAT32_C( -318.38), SIMDE_FLOAT32_C( 783.48), SIMDE_FLOAT32_C( -628.82), SIMDE_FLOAT32_C( 556.35), SIMDE_FLOAT32_C( 439.43), SIMDE_FLOAT32_C( 434.03), SIMDE_FLOAT32_C( 496.57), SIMDE_FLOAT32_C( 915.19)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -1.66), SIMDE_FLOAT32_C( 2.35), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( -1.32), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 1.68), SIMDE_FLOAT32_C( -3.10), SIMDE_FLOAT32_C( -2.17), SIMDE_FLOAT32_C( -1.99), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 2.73), SIMDE_FLOAT32_C( -1.03), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( -1.07), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( -0.68)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 638.94), SIMDE_FLOAT32_C( 450.90), SIMDE_FLOAT32_C( -771.54), SIMDE_FLOAT32_C( 105.79), SIMDE_FLOAT32_C( 590.10), SIMDE_FLOAT32_C( 30.91), SIMDE_FLOAT32_C( 635.35), SIMDE_FLOAT32_C( -84.00), SIMDE_FLOAT32_C( 80.04), SIMDE_FLOAT32_C( -709.46), SIMDE_FLOAT32_C( 607.86), SIMDE_FLOAT32_C( 394.58), SIMDE_FLOAT32_C( -889.11), SIMDE_FLOAT32_C( -964.25), SIMDE_FLOAT32_C( -406.33), SIMDE_FLOAT32_C( -743.66)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -427.72), SIMDE_FLOAT32_C( 308.28), SIMDE_FLOAT32_C( -177.05), SIMDE_FLOAT32_C( -457.77), SIMDE_FLOAT32_C( 678.24), SIMDE_FLOAT32_C( 66.05), SIMDE_FLOAT32_C( -267.71), SIMDE_FLOAT32_C( 117.28), SIMDE_FLOAT32_C( -576.80), SIMDE_FLOAT32_C( -38.39), SIMDE_FLOAT32_C( -250.14), SIMDE_FLOAT32_C( -53.92), SIMDE_FLOAT32_C( 91.94), SIMDE_FLOAT32_C( -78.84), SIMDE_FLOAT32_C( 883.05), SIMDE_FLOAT32_C( -807.28)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 2.16), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( -1.80), SIMDE_FLOAT32_C( 2.91), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 1.97), SIMDE_FLOAT32_C( -0.62), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( -1.62), SIMDE_FLOAT32_C( 1.96), SIMDE_FLOAT32_C( 1.71), SIMDE_FLOAT32_C( -1.47), SIMDE_FLOAT32_C( -1.65), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( -2.40)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -736.75), SIMDE_FLOAT32_C( -310.57), SIMDE_FLOAT32_C( 728.87), SIMDE_FLOAT32_C( -350.72), SIMDE_FLOAT32_C( 60.89), SIMDE_FLOAT32_C( 109.81), SIMDE_FLOAT32_C( 715.94), SIMDE_FLOAT32_C( -250.60), SIMDE_FLOAT32_C( 944.14), SIMDE_FLOAT32_C( 361.85), SIMDE_FLOAT32_C( -13.07), SIMDE_FLOAT32_C( 852.60), SIMDE_FLOAT32_C( -440.06), SIMDE_FLOAT32_C( 529.63), SIMDE_FLOAT32_C( -24.89), SIMDE_FLOAT32_C( -967.78)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 822.72), SIMDE_FLOAT32_C( 956.68), SIMDE_FLOAT32_C( 954.62), SIMDE_FLOAT32_C( 825.49), SIMDE_FLOAT32_C( -816.27), SIMDE_FLOAT32_C( -209.34), SIMDE_FLOAT32_C( -933.21), SIMDE_FLOAT32_C( -728.70), SIMDE_FLOAT32_C( -420.06), SIMDE_FLOAT32_C( 100.32), SIMDE_FLOAT32_C( 103.15), SIMDE_FLOAT32_C( 439.77), SIMDE_FLOAT32_C( -204.33), SIMDE_FLOAT32_C( 18.75), SIMDE_FLOAT32_C( 809.05), SIMDE_FLOAT32_C( 144.05)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.73), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( 3.07), SIMDE_FLOAT32_C( 2.66), SIMDE_FLOAT32_C( 2.49), SIMDE_FLOAT32_C( -2.81), SIMDE_FLOAT32_C( 1.99), SIMDE_FLOAT32_C( 1.30), SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( 1.09), SIMDE_FLOAT32_C( -2.01), SIMDE_FLOAT32_C( 1.54), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( -1.42)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 467.76), SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( -754.38), SIMDE_FLOAT32_C( 346.63)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_atan2_ps(test_vec[i].a, test_vec[i].b); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_atan2_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 src; simde__mmask16 k; simde__m512 a; simde__m512 b; simde__m512 r; } test_vec[9] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( -770.35), SIMDE_FLOAT32_C( 380.46), SIMDE_FLOAT32_C( 28.08), SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -660.80), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( 346.63)), UINT16_C(25611), simde_mm512_set_ps(SIMDE_FLOAT32_C( 395.92), SIMDE_FLOAT32_C( 532.35), SIMDE_FLOAT32_C( -30.79), SIMDE_FLOAT32_C( -583.60), SIMDE_FLOAT32_C( 993.90), SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( 696.87), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( -754.38)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( 339.21), SIMDE_FLOAT32_C( 780.64), SIMDE_FLOAT32_C( 443.48), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 841.21), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( 467.76), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( 39.01)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -0.04), SIMDE_FLOAT32_C( 380.46), SIMDE_FLOAT32_C( 28.08), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -660.80), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( 3.03), SIMDE_FLOAT32_C( -1.52)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 840.65), SIMDE_FLOAT32_C( -448.89), SIMDE_FLOAT32_C( 623.70), SIMDE_FLOAT32_C( 977.36), SIMDE_FLOAT32_C( 99.93), SIMDE_FLOAT32_C( -178.99), SIMDE_FLOAT32_C( 343.48), SIMDE_FLOAT32_C( -328.54), SIMDE_FLOAT32_C( -822.65), SIMDE_FLOAT32_C( -70.91), SIMDE_FLOAT32_C( -171.51), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 600.47), SIMDE_FLOAT32_C( -203.65), SIMDE_FLOAT32_C( -944.78), SIMDE_FLOAT32_C( -554.19)), UINT16_C(63749), simde_mm512_set_ps(SIMDE_FLOAT32_C( 690.12), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( 505.79), SIMDE_FLOAT32_C( 140.67), SIMDE_FLOAT32_C( 331.34), SIMDE_FLOAT32_C( -738.19), SIMDE_FLOAT32_C( 324.62), SIMDE_FLOAT32_C( -797.92), SIMDE_FLOAT32_C( -192.31), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( 120.65), SIMDE_FLOAT32_C( 680.02), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 254.31), SIMDE_FLOAT32_C( 336.73), SIMDE_FLOAT32_C( -767.23)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -95.15), SIMDE_FLOAT32_C( -21.09), SIMDE_FLOAT32_C( 731.49), SIMDE_FLOAT32_C( 831.02), SIMDE_FLOAT32_C( -906.16), SIMDE_FLOAT32_C( 462.95), SIMDE_FLOAT32_C( 758.79), SIMDE_FLOAT32_C( -874.31), SIMDE_FLOAT32_C( -525.83), SIMDE_FLOAT32_C( 561.36), SIMDE_FLOAT32_C( 543.35), SIMDE_FLOAT32_C( 469.66), SIMDE_FLOAT32_C( 818.66), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( -80.73), SIMDE_FLOAT32_C( -747.59)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.71), SIMDE_FLOAT32_C( -1.61), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 2.79), SIMDE_FLOAT32_C( -178.99), SIMDE_FLOAT32_C( 343.48), SIMDE_FLOAT32_C( -2.40), SIMDE_FLOAT32_C( -822.65), SIMDE_FLOAT32_C( -70.91), SIMDE_FLOAT32_C( -171.51), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 600.47), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( -944.78), SIMDE_FLOAT32_C( -2.34)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 237.58), SIMDE_FLOAT32_C( -765.93), SIMDE_FLOAT32_C( -623.50), SIMDE_FLOAT32_C( -775.04), SIMDE_FLOAT32_C( 936.65), SIMDE_FLOAT32_C( -197.89), SIMDE_FLOAT32_C( -752.43), SIMDE_FLOAT32_C( 7.27), SIMDE_FLOAT32_C( -182.45), SIMDE_FLOAT32_C( 394.67), SIMDE_FLOAT32_C( 916.26), SIMDE_FLOAT32_C( 822.06), SIMDE_FLOAT32_C( -337.60), SIMDE_FLOAT32_C( -768.12), SIMDE_FLOAT32_C( 710.38), SIMDE_FLOAT32_C( 424.81)), UINT16_C(23119), simde_mm512_set_ps(SIMDE_FLOAT32_C( -598.06), SIMDE_FLOAT32_C( -378.50), SIMDE_FLOAT32_C( 221.37), SIMDE_FLOAT32_C( -942.47), SIMDE_FLOAT32_C( 440.64), SIMDE_FLOAT32_C( -348.70), SIMDE_FLOAT32_C( -359.76), SIMDE_FLOAT32_C( 932.66), SIMDE_FLOAT32_C( -125.20), SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( -304.73), SIMDE_FLOAT32_C( -769.09), SIMDE_FLOAT32_C( -997.63), SIMDE_FLOAT32_C( 293.64), SIMDE_FLOAT32_C( -67.64), SIMDE_FLOAT32_C( -756.42)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -76.75), SIMDE_FLOAT32_C( -791.07), SIMDE_FLOAT32_C( -601.68), SIMDE_FLOAT32_C( -788.36), SIMDE_FLOAT32_C( 475.51), SIMDE_FLOAT32_C( 897.27), SIMDE_FLOAT32_C( -438.19), SIMDE_FLOAT32_C( -33.67), SIMDE_FLOAT32_C( -327.22), SIMDE_FLOAT32_C( 39.93), SIMDE_FLOAT32_C( 14.34), SIMDE_FLOAT32_C( -696.69), SIMDE_FLOAT32_C( -573.81), SIMDE_FLOAT32_C( 923.64), SIMDE_FLOAT32_C( -576.22), SIMDE_FLOAT32_C( 977.49)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 237.58), SIMDE_FLOAT32_C( -2.70), SIMDE_FLOAT32_C( -623.50), SIMDE_FLOAT32_C( -2.27), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( -197.89), SIMDE_FLOAT32_C( -2.45), SIMDE_FLOAT32_C( 7.27), SIMDE_FLOAT32_C( -182.45), SIMDE_FLOAT32_C( 1.49), SIMDE_FLOAT32_C( 916.26), SIMDE_FLOAT32_C( 822.06), SIMDE_FLOAT32_C( -2.09), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( -3.02), SIMDE_FLOAT32_C( -0.66)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -743.66), SIMDE_FLOAT32_C( -784.34), SIMDE_FLOAT32_C( 4.83), SIMDE_FLOAT32_C( 834.60), SIMDE_FLOAT32_C( 579.25), SIMDE_FLOAT32_C( -212.86), SIMDE_FLOAT32_C( -719.57), SIMDE_FLOAT32_C( -628.82), SIMDE_FLOAT32_C( -916.82), SIMDE_FLOAT32_C( 434.03), SIMDE_FLOAT32_C( -15.61), SIMDE_FLOAT32_C( -718.40), SIMDE_FLOAT32_C( 177.92), SIMDE_FLOAT32_C( 426.61), SIMDE_FLOAT32_C( 915.71), SIMDE_FLOAT32_C( 826.84)), UINT16_C(57786), simde_mm512_set_ps(SIMDE_FLOAT32_C( -807.28), SIMDE_FLOAT32_C( -764.58), SIMDE_FLOAT32_C( 92.52), SIMDE_FLOAT32_C( -818.54), SIMDE_FLOAT32_C( -65.60), SIMDE_FLOAT32_C( -11.78), SIMDE_FLOAT32_C( -318.38), SIMDE_FLOAT32_C( 334.00), SIMDE_FLOAT32_C( 556.35), SIMDE_FLOAT32_C( -490.00), SIMDE_FLOAT32_C( 496.57), SIMDE_FLOAT32_C( -737.13), SIMDE_FLOAT32_C( 159.97), SIMDE_FLOAT32_C( 345.93), SIMDE_FLOAT32_C( 932.11), SIMDE_FLOAT32_C( 133.52)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -406.33), SIMDE_FLOAT32_C( -70.05), SIMDE_FLOAT32_C( 789.89), SIMDE_FLOAT32_C( 206.60), SIMDE_FLOAT32_C( 161.06), SIMDE_FLOAT32_C( -286.07), SIMDE_FLOAT32_C( -308.52), SIMDE_FLOAT32_C( 783.48), SIMDE_FLOAT32_C( 274.71), SIMDE_FLOAT32_C( 439.43), SIMDE_FLOAT32_C( -799.40), SIMDE_FLOAT32_C( 915.19), SIMDE_FLOAT32_C( -314.93), SIMDE_FLOAT32_C( -861.01), SIMDE_FLOAT32_C( 888.71), SIMDE_FLOAT32_C( 110.36)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -2.04), SIMDE_FLOAT32_C( -1.66), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 834.60), SIMDE_FLOAT32_C( 579.25), SIMDE_FLOAT32_C( -212.86), SIMDE_FLOAT32_C( -719.57), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 1.11), SIMDE_FLOAT32_C( 434.03), SIMDE_FLOAT32_C( 2.59), SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( 2.67), SIMDE_FLOAT32_C( 426.61), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 826.84)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -728.70), SIMDE_FLOAT32_C( 944.14), SIMDE_FLOAT32_C( 103.15), SIMDE_FLOAT32_C( 852.60), SIMDE_FLOAT32_C( 18.75), SIMDE_FLOAT32_C( -24.89), SIMDE_FLOAT32_C( -427.72), SIMDE_FLOAT32_C( 450.90), SIMDE_FLOAT32_C( -457.77), SIMDE_FLOAT32_C( 590.10), SIMDE_FLOAT32_C( -267.71), SIMDE_FLOAT32_C( -84.00), SIMDE_FLOAT32_C( -38.39), SIMDE_FLOAT32_C( 607.86), SIMDE_FLOAT32_C( 91.94), SIMDE_FLOAT32_C( -964.25)), UINT16_C(25589), simde_mm512_set_ps(SIMDE_FLOAT32_C( 715.94), SIMDE_FLOAT32_C( -420.06), SIMDE_FLOAT32_C( 361.85), SIMDE_FLOAT32_C( 439.77), SIMDE_FLOAT32_C( -440.06), SIMDE_FLOAT32_C( 809.05), SIMDE_FLOAT32_C( -967.78), SIMDE_FLOAT32_C( 308.28), SIMDE_FLOAT32_C( -771.54), SIMDE_FLOAT32_C( 678.24), SIMDE_FLOAT32_C( 30.91), SIMDE_FLOAT32_C( 117.28), SIMDE_FLOAT32_C( 80.04), SIMDE_FLOAT32_C( -250.14), SIMDE_FLOAT32_C( 394.58), SIMDE_FLOAT32_C( -78.84)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -933.21), SIMDE_FLOAT32_C( -250.60), SIMDE_FLOAT32_C( 100.32), SIMDE_FLOAT32_C( -13.07), SIMDE_FLOAT32_C( -204.33), SIMDE_FLOAT32_C( 529.63), SIMDE_FLOAT32_C( 144.05), SIMDE_FLOAT32_C( 638.94), SIMDE_FLOAT32_C( -177.05), SIMDE_FLOAT32_C( 105.79), SIMDE_FLOAT32_C( 66.05), SIMDE_FLOAT32_C( 635.35), SIMDE_FLOAT32_C( -576.80), SIMDE_FLOAT32_C( -709.46), SIMDE_FLOAT32_C( -53.92), SIMDE_FLOAT32_C( -889.11)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -728.70), SIMDE_FLOAT32_C( -2.11), SIMDE_FLOAT32_C( 1.30), SIMDE_FLOAT32_C( 852.60), SIMDE_FLOAT32_C( 18.75), SIMDE_FLOAT32_C( -24.89), SIMDE_FLOAT32_C( -1.42), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( -1.80), SIMDE_FLOAT32_C( 1.42), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( -38.39), SIMDE_FLOAT32_C( -2.80), SIMDE_FLOAT32_C( 91.94), SIMDE_FLOAT32_C( -3.05)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 195.04), SIMDE_FLOAT32_C( 266.59), SIMDE_FLOAT32_C( 227.06), SIMDE_FLOAT32_C( 410.49), SIMDE_FLOAT32_C( -523.93), SIMDE_FLOAT32_C( 762.39), SIMDE_FLOAT32_C( 112.81), SIMDE_FLOAT32_C( 686.52), SIMDE_FLOAT32_C( 719.98), SIMDE_FLOAT32_C( 766.36), SIMDE_FLOAT32_C( -14.16), SIMDE_FLOAT32_C( -493.41), SIMDE_FLOAT32_C( -736.75), SIMDE_FLOAT32_C( 954.62), SIMDE_FLOAT32_C( -350.72), SIMDE_FLOAT32_C( -209.34)), UINT16_C(43196), simde_mm512_set_ps(SIMDE_FLOAT32_C( -658.72), SIMDE_FLOAT32_C( -177.76), SIMDE_FLOAT32_C( -265.00), SIMDE_FLOAT32_C( -554.31), SIMDE_FLOAT32_C( 533.87), SIMDE_FLOAT32_C( 51.67), SIMDE_FLOAT32_C( -492.25), SIMDE_FLOAT32_C( 777.74), SIMDE_FLOAT32_C( 793.81), SIMDE_FLOAT32_C( 15.12), SIMDE_FLOAT32_C( -788.39), SIMDE_FLOAT32_C( 824.88), SIMDE_FLOAT32_C( 822.72), SIMDE_FLOAT32_C( -310.57), SIMDE_FLOAT32_C( 825.49), SIMDE_FLOAT32_C( 60.89)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -371.53), SIMDE_FLOAT32_C( 353.46), SIMDE_FLOAT32_C( -605.99), SIMDE_FLOAT32_C( -513.13), SIMDE_FLOAT32_C( -390.22), SIMDE_FLOAT32_C( -973.72), SIMDE_FLOAT32_C( -469.41), SIMDE_FLOAT32_C( 31.72), SIMDE_FLOAT32_C( -35.27), SIMDE_FLOAT32_C( -851.21), SIMDE_FLOAT32_C( -841.43), SIMDE_FLOAT32_C( 330.43), SIMDE_FLOAT32_C( 793.63), SIMDE_FLOAT32_C( 956.68), SIMDE_FLOAT32_C( 728.87), SIMDE_FLOAT32_C( -816.27)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -2.08), SIMDE_FLOAT32_C( 266.59), SIMDE_FLOAT32_C( -2.73), SIMDE_FLOAT32_C( 410.49), SIMDE_FLOAT32_C( 2.20), SIMDE_FLOAT32_C( 762.39), SIMDE_FLOAT32_C( 112.81), SIMDE_FLOAT32_C( 686.52), SIMDE_FLOAT32_C( 1.62), SIMDE_FLOAT32_C( 766.36), SIMDE_FLOAT32_C( -2.39), SIMDE_FLOAT32_C( 1.19), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( -350.72), SIMDE_FLOAT32_C( -209.34)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -858.24), SIMDE_FLOAT32_C( -559.04), SIMDE_FLOAT32_C( -867.90), SIMDE_FLOAT32_C( -91.47), SIMDE_FLOAT32_C( -996.53), SIMDE_FLOAT32_C( 7.89), SIMDE_FLOAT32_C( 519.91), SIMDE_FLOAT32_C( -788.90), SIMDE_FLOAT32_C( 494.45), SIMDE_FLOAT32_C( 338.97), SIMDE_FLOAT32_C( 858.03), SIMDE_FLOAT32_C( -607.40), SIMDE_FLOAT32_C( 289.29), SIMDE_FLOAT32_C( 618.46), SIMDE_FLOAT32_C( 413.47), SIMDE_FLOAT32_C( -978.77)), UINT16_C( 4768), simde_mm512_set_ps(SIMDE_FLOAT32_C( 740.49), SIMDE_FLOAT32_C( -751.81), SIMDE_FLOAT32_C( 13.69), SIMDE_FLOAT32_C( 786.36), SIMDE_FLOAT32_C( -616.97), SIMDE_FLOAT32_C( 500.34), SIMDE_FLOAT32_C( -906.43), SIMDE_FLOAT32_C( 690.06), SIMDE_FLOAT32_C( -252.06), SIMDE_FLOAT32_C( 828.60), SIMDE_FLOAT32_C( -203.59), SIMDE_FLOAT32_C( 933.39), SIMDE_FLOAT32_C( -10.85), SIMDE_FLOAT32_C( -429.78), SIMDE_FLOAT32_C( 190.25), SIMDE_FLOAT32_C( 546.67)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -969.00), SIMDE_FLOAT32_C( 251.09), SIMDE_FLOAT32_C( 109.97), SIMDE_FLOAT32_C( 792.28), SIMDE_FLOAT32_C( -643.59), SIMDE_FLOAT32_C( 926.98), SIMDE_FLOAT32_C( -815.02), SIMDE_FLOAT32_C( 181.20), SIMDE_FLOAT32_C( -206.24), SIMDE_FLOAT32_C( 378.12), SIMDE_FLOAT32_C( -36.10), SIMDE_FLOAT32_C( -538.28), SIMDE_FLOAT32_C( 894.04), SIMDE_FLOAT32_C( 72.41), SIMDE_FLOAT32_C( 681.48), SIMDE_FLOAT32_C( 677.82)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -858.24), SIMDE_FLOAT32_C( -559.04), SIMDE_FLOAT32_C( -867.90), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( -996.53), SIMDE_FLOAT32_C( 7.89), SIMDE_FLOAT32_C( -2.30), SIMDE_FLOAT32_C( -788.90), SIMDE_FLOAT32_C( -2.26), SIMDE_FLOAT32_C( 338.97), SIMDE_FLOAT32_C( -1.75), SIMDE_FLOAT32_C( -607.40), SIMDE_FLOAT32_C( 289.29), SIMDE_FLOAT32_C( 618.46), SIMDE_FLOAT32_C( 413.47), SIMDE_FLOAT32_C( -978.77)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -69.61), SIMDE_FLOAT32_C( -548.92), SIMDE_FLOAT32_C( 625.99), SIMDE_FLOAT32_C( 381.43), SIMDE_FLOAT32_C( 949.66), SIMDE_FLOAT32_C( -196.91), SIMDE_FLOAT32_C( 28.28), SIMDE_FLOAT32_C( -181.88), SIMDE_FLOAT32_C( 536.29), SIMDE_FLOAT32_C( -985.19), SIMDE_FLOAT32_C( 77.09), SIMDE_FLOAT32_C( 315.82), SIMDE_FLOAT32_C( 11.44), SIMDE_FLOAT32_C( -742.19), SIMDE_FLOAT32_C( 808.07), SIMDE_FLOAT32_C( -406.94)), UINT16_C(49835), simde_mm512_set_ps(SIMDE_FLOAT32_C( -137.31), SIMDE_FLOAT32_C( -142.23), SIMDE_FLOAT32_C( 35.44), SIMDE_FLOAT32_C( -260.69), SIMDE_FLOAT32_C( -868.51), SIMDE_FLOAT32_C( -878.61), SIMDE_FLOAT32_C( 777.12), SIMDE_FLOAT32_C( 132.77), SIMDE_FLOAT32_C( -396.93), SIMDE_FLOAT32_C( 836.29), SIMDE_FLOAT32_C( -770.09), SIMDE_FLOAT32_C( 911.50), SIMDE_FLOAT32_C( 393.21), SIMDE_FLOAT32_C( -291.56), SIMDE_FLOAT32_C( 446.83), SIMDE_FLOAT32_C( 802.68)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -821.75), SIMDE_FLOAT32_C( -892.28), SIMDE_FLOAT32_C( -852.69), SIMDE_FLOAT32_C( 9.54), SIMDE_FLOAT32_C( -850.83), SIMDE_FLOAT32_C( 144.77), SIMDE_FLOAT32_C( 932.71), SIMDE_FLOAT32_C( -565.94), SIMDE_FLOAT32_C( -821.82), SIMDE_FLOAT32_C( -929.08), SIMDE_FLOAT32_C( -624.00), SIMDE_FLOAT32_C( -595.23), SIMDE_FLOAT32_C( 666.07), SIMDE_FLOAT32_C( -246.97), SIMDE_FLOAT32_C( -517.48), SIMDE_FLOAT32_C( 645.83)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -2.98), SIMDE_FLOAT32_C( -2.98), SIMDE_FLOAT32_C( 625.99), SIMDE_FLOAT32_C( 381.43), SIMDE_FLOAT32_C( 949.66), SIMDE_FLOAT32_C( -196.91), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( -181.88), SIMDE_FLOAT32_C( -2.69), SIMDE_FLOAT32_C( -985.19), SIMDE_FLOAT32_C( -2.25), SIMDE_FLOAT32_C( 315.82), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( -742.19), SIMDE_FLOAT32_C( 2.43), SIMDE_FLOAT32_C( 0.89)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -660.80), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( 696.87), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( 467.76), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( 346.63)), UINT16_C(41466), simde_mm512_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( -754.38)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -660.80), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 346.63)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mask_atan2_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_atan2_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d a; simde__m512d b; simde__m512d r; } test_vec[9] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( -686.13), SIMDE_FLOAT64_C( 571.46), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( 467.76), SIMDE_FLOAT64_C( 670.24), SIMDE_FLOAT64_C( 34.06), SIMDE_FLOAT64_C( 39.01), SIMDE_FLOAT64_C( 346.63)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( -754.38)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -2.35), SIMDE_FLOAT64_C( 1.42), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( 2.09), SIMDE_FLOAT64_C( 0.93), SIMDE_FLOAT64_C( 3.03), SIMDE_FLOAT64_C( 2.94), SIMDE_FLOAT64_C( 2.71)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -450.67), SIMDE_FLOAT64_C( 687.09), SIMDE_FLOAT64_C( -212.54), SIMDE_FLOAT64_C( -660.80), SIMDE_FLOAT64_C( 28.47), SIMDE_FLOAT64_C( -923.64), SIMDE_FLOAT64_C( -860.95), SIMDE_FLOAT64_C( 696.87)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 178.20), SIMDE_FLOAT64_C( 233.37), SIMDE_FLOAT64_C( 261.31), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( -384.03), SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -417.54)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -1.19), SIMDE_FLOAT64_C( 1.24), SIMDE_FLOAT64_C( -0.68), SIMDE_FLOAT64_C( -2.55), SIMDE_FLOAT64_C( 3.08), SIMDE_FLOAT64_C( -1.96), SIMDE_FLOAT64_C( -1.91), SIMDE_FLOAT64_C( 2.11)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 395.92), SIMDE_FLOAT64_C( 339.21), SIMDE_FLOAT64_C( -263.99), SIMDE_FLOAT64_C( -30.79), SIMDE_FLOAT64_C( 443.48), SIMDE_FLOAT64_C( 380.46), SIMDE_FLOAT64_C( 993.90), SIMDE_FLOAT64_C( 841.21)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -387.90), SIMDE_FLOAT64_C( 655.87), SIMDE_FLOAT64_C( 532.35), SIMDE_FLOAT64_C( 780.64), SIMDE_FLOAT64_C( -770.35), SIMDE_FLOAT64_C( -583.60), SIMDE_FLOAT64_C( -770.72), SIMDE_FLOAT64_C( 28.08)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.35), SIMDE_FLOAT64_C( 0.48), SIMDE_FLOAT64_C( -0.46), SIMDE_FLOAT64_C( -0.04), SIMDE_FLOAT64_C( 2.62), SIMDE_FLOAT64_C( 2.56), SIMDE_FLOAT64_C( 2.23), SIMDE_FLOAT64_C( 1.54)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 680.02), SIMDE_FLOAT64_C( 818.66), SIMDE_FLOAT64_C( 600.47), SIMDE_FLOAT64_C( 254.31), SIMDE_FLOAT64_C( -80.73), SIMDE_FLOAT64_C( -944.78), SIMDE_FLOAT64_C( -767.23), SIMDE_FLOAT64_C( 398.82)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 469.66), SIMDE_FLOAT64_C( -148.69), SIMDE_FLOAT64_C( 910.03), SIMDE_FLOAT64_C( 791.23), SIMDE_FLOAT64_C( -203.65), SIMDE_FLOAT64_C( 336.73), SIMDE_FLOAT64_C( -747.59), SIMDE_FLOAT64_C( -554.19)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.97), SIMDE_FLOAT64_C( 1.75), SIMDE_FLOAT64_C( 0.58), SIMDE_FLOAT64_C( 0.31), SIMDE_FLOAT64_C( -2.76), SIMDE_FLOAT64_C( -1.23), SIMDE_FLOAT64_C( -2.34), SIMDE_FLOAT64_C( 2.52)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 758.79), SIMDE_FLOAT64_C( 343.48), SIMDE_FLOAT64_C( -797.92), SIMDE_FLOAT64_C( -525.83), SIMDE_FLOAT64_C( -822.65), SIMDE_FLOAT64_C( 655.67), SIMDE_FLOAT64_C( 543.35), SIMDE_FLOAT64_C( -171.51)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -178.99), SIMDE_FLOAT64_C( 324.62), SIMDE_FLOAT64_C( -874.31), SIMDE_FLOAT64_C( -328.54), SIMDE_FLOAT64_C( -192.31), SIMDE_FLOAT64_C( 561.36), SIMDE_FLOAT64_C( -70.91), SIMDE_FLOAT64_C( 120.65)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.80), SIMDE_FLOAT64_C( 0.81), SIMDE_FLOAT64_C( -2.40), SIMDE_FLOAT64_C( -2.13), SIMDE_FLOAT64_C( -1.80), SIMDE_FLOAT64_C( 0.86), SIMDE_FLOAT64_C( 1.70), SIMDE_FLOAT64_C( -0.96)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 840.65), SIMDE_FLOAT64_C( -591.56), SIMDE_FLOAT64_C( 731.49), SIMDE_FLOAT64_C( 623.70), SIMDE_FLOAT64_C( 140.67), SIMDE_FLOAT64_C( -906.16), SIMDE_FLOAT64_C( 99.93), SIMDE_FLOAT64_C( -738.19)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 690.12), SIMDE_FLOAT64_C( -21.09), SIMDE_FLOAT64_C( -448.89), SIMDE_FLOAT64_C( 505.79), SIMDE_FLOAT64_C( 831.02), SIMDE_FLOAT64_C( 977.36), SIMDE_FLOAT64_C( 331.34), SIMDE_FLOAT64_C( 462.95)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.88), SIMDE_FLOAT64_C( -1.61), SIMDE_FLOAT64_C( 2.12), SIMDE_FLOAT64_C( 0.89), SIMDE_FLOAT64_C( 0.17), SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( 0.29), SIMDE_FLOAT64_C( -1.01)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 822.06), SIMDE_FLOAT64_C( -997.63), SIMDE_FLOAT64_C( 923.64), SIMDE_FLOAT64_C( -768.12), SIMDE_FLOAT64_C( -67.64), SIMDE_FLOAT64_C( 977.49), SIMDE_FLOAT64_C( 424.81), SIMDE_FLOAT64_C( -95.15)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -769.09), SIMDE_FLOAT64_C( -573.81), SIMDE_FLOAT64_C( -337.60), SIMDE_FLOAT64_C( 293.64), SIMDE_FLOAT64_C( -576.22), SIMDE_FLOAT64_C( 710.38), SIMDE_FLOAT64_C( -756.42), SIMDE_FLOAT64_C( 27.25)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.32), SIMDE_FLOAT64_C( -2.09), SIMDE_FLOAT64_C( 1.92), SIMDE_FLOAT64_C( -1.21), SIMDE_FLOAT64_C( -3.02), SIMDE_FLOAT64_C( 0.94), SIMDE_FLOAT64_C( 2.63), SIMDE_FLOAT64_C( -1.29)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -359.76), SIMDE_FLOAT64_C( -33.67), SIMDE_FLOAT64_C( 7.27), SIMDE_FLOAT64_C( -125.20), SIMDE_FLOAT64_C( 39.93), SIMDE_FLOAT64_C( 394.67), SIMDE_FLOAT64_C( -304.73), SIMDE_FLOAT64_C( -696.69)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -438.19), SIMDE_FLOAT64_C( -752.43), SIMDE_FLOAT64_C( 932.66), SIMDE_FLOAT64_C( -327.22), SIMDE_FLOAT64_C( -182.45), SIMDE_FLOAT64_C( 510.85), SIMDE_FLOAT64_C( 14.34), SIMDE_FLOAT64_C( 916.26)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -2.45), SIMDE_FLOAT64_C( -3.10), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( -2.78), SIMDE_FLOAT64_C( 2.93), SIMDE_FLOAT64_C( 0.66), SIMDE_FLOAT64_C( -1.52), SIMDE_FLOAT64_C( -0.65)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 469.66), SIMDE_FLOAT64_C( 680.02), SIMDE_FLOAT64_C( -148.69), SIMDE_FLOAT64_C( 818.66), SIMDE_FLOAT64_C( 910.03), SIMDE_FLOAT64_C( 600.47), SIMDE_FLOAT64_C( 791.23), SIMDE_FLOAT64_C( 254.31)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 1.57)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_atan2_pd(test_vec[i].a, test_vec[i].b); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_atan2_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d src; simde__mmask8 k; simde__m512d a; simde__m512d b; simde__m512d r; } test_vec[9] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( -384.03), SIMDE_FLOAT64_C( -860.95), SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( 571.46), SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 670.24), SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( 346.63)), UINT8_C(212), simde_mm512_set_pd(SIMDE_FLOAT64_C( 28.47), SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( 696.87), SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( 34.06), SIMDE_FLOAT64_C( -754.38)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( -923.64), SIMDE_FLOAT64_C( -417.54), SIMDE_FLOAT64_C( -686.13), SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( 467.76), SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( 39.01)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.08), SIMDE_FLOAT64_C( -2.82), SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( 3.02), SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 0.82), SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( 346.63)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 395.92), SIMDE_FLOAT64_C( 532.35), SIMDE_FLOAT64_C( -30.79), SIMDE_FLOAT64_C( -583.60), SIMDE_FLOAT64_C( 993.90), SIMDE_FLOAT64_C( 178.20), SIMDE_FLOAT64_C( 687.09), SIMDE_FLOAT64_C( -976.55)), UINT8_C(126), simde_mm512_set_pd(SIMDE_FLOAT64_C( -387.90), SIMDE_FLOAT64_C( 339.21), SIMDE_FLOAT64_C( 780.64), SIMDE_FLOAT64_C( 443.48), SIMDE_FLOAT64_C( -770.72), SIMDE_FLOAT64_C( 841.21), SIMDE_FLOAT64_C( 233.37), SIMDE_FLOAT64_C( -212.54)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 398.82), SIMDE_FLOAT64_C( 655.87), SIMDE_FLOAT64_C( -263.99), SIMDE_FLOAT64_C( -770.35), SIMDE_FLOAT64_C( 380.46), SIMDE_FLOAT64_C( 28.08), SIMDE_FLOAT64_C( -450.67), SIMDE_FLOAT64_C( 261.31)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 395.92), SIMDE_FLOAT64_C( 0.48), SIMDE_FLOAT64_C( 1.90), SIMDE_FLOAT64_C( 2.62), SIMDE_FLOAT64_C( -1.11), SIMDE_FLOAT64_C( 1.54), SIMDE_FLOAT64_C( 2.66), SIMDE_FLOAT64_C( -976.55)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -192.31), SIMDE_FLOAT64_C( 655.67), SIMDE_FLOAT64_C( 120.65), SIMDE_FLOAT64_C( 680.02), SIMDE_FLOAT64_C( 910.03), SIMDE_FLOAT64_C( 254.31), SIMDE_FLOAT64_C( 336.73), SIMDE_FLOAT64_C( -767.23)), UINT8_C( 39), simde_mm512_set_pd(SIMDE_FLOAT64_C( -525.83), SIMDE_FLOAT64_C( 561.36), SIMDE_FLOAT64_C( 543.35), SIMDE_FLOAT64_C( 469.66), SIMDE_FLOAT64_C( 818.66), SIMDE_FLOAT64_C( 791.23), SIMDE_FLOAT64_C( -80.73), SIMDE_FLOAT64_C( -747.59)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -328.54), SIMDE_FLOAT64_C( -822.65), SIMDE_FLOAT64_C( -70.91), SIMDE_FLOAT64_C( -171.51), SIMDE_FLOAT64_C( -148.69), SIMDE_FLOAT64_C( 600.47), SIMDE_FLOAT64_C( -203.65), SIMDE_FLOAT64_C( -944.78)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -192.31), SIMDE_FLOAT64_C( 655.67), SIMDE_FLOAT64_C( 1.70), SIMDE_FLOAT64_C( 680.02), SIMDE_FLOAT64_C( 910.03), SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( -2.76), SIMDE_FLOAT64_C( -2.47)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -95.15), SIMDE_FLOAT64_C( -21.09), SIMDE_FLOAT64_C( 731.49), SIMDE_FLOAT64_C( 831.02), SIMDE_FLOAT64_C( -906.16), SIMDE_FLOAT64_C( 462.95), SIMDE_FLOAT64_C( 758.79), SIMDE_FLOAT64_C( -874.31)), UINT8_C( 45), simde_mm512_set_pd(SIMDE_FLOAT64_C( 27.25), SIMDE_FLOAT64_C( 840.65), SIMDE_FLOAT64_C( -448.89), SIMDE_FLOAT64_C( 623.70), SIMDE_FLOAT64_C( 977.36), SIMDE_FLOAT64_C( 99.93), SIMDE_FLOAT64_C( -178.99), SIMDE_FLOAT64_C( 343.48)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 424.81), SIMDE_FLOAT64_C( 690.12), SIMDE_FLOAT64_C( -591.56), SIMDE_FLOAT64_C( 505.79), SIMDE_FLOAT64_C( 140.67), SIMDE_FLOAT64_C( 331.34), SIMDE_FLOAT64_C( -738.19), SIMDE_FLOAT64_C( 324.62)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -95.15), SIMDE_FLOAT64_C( -21.09), SIMDE_FLOAT64_C( -2.49), SIMDE_FLOAT64_C( 831.02), SIMDE_FLOAT64_C( 1.43), SIMDE_FLOAT64_C( 0.29), SIMDE_FLOAT64_C( 758.79), SIMDE_FLOAT64_C( 0.81)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -327.22), SIMDE_FLOAT64_C( 39.93), SIMDE_FLOAT64_C( 14.34), SIMDE_FLOAT64_C( -696.69), SIMDE_FLOAT64_C( -573.81), SIMDE_FLOAT64_C( 923.64), SIMDE_FLOAT64_C( -576.22), SIMDE_FLOAT64_C( 977.49)), UINT8_C(108), simde_mm512_set_pd(SIMDE_FLOAT64_C( 7.27), SIMDE_FLOAT64_C( -182.45), SIMDE_FLOAT64_C( 394.67), SIMDE_FLOAT64_C( 916.26), SIMDE_FLOAT64_C( 822.06), SIMDE_FLOAT64_C( -337.60), SIMDE_FLOAT64_C( -768.12), SIMDE_FLOAT64_C( 710.38)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 932.66), SIMDE_FLOAT64_C( -125.20), SIMDE_FLOAT64_C( 510.85), SIMDE_FLOAT64_C( -304.73), SIMDE_FLOAT64_C( -769.09), SIMDE_FLOAT64_C( -997.63), SIMDE_FLOAT64_C( 293.64), SIMDE_FLOAT64_C( -67.64)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -327.22), SIMDE_FLOAT64_C( -2.17), SIMDE_FLOAT64_C( 0.66), SIMDE_FLOAT64_C( -696.69), SIMDE_FLOAT64_C( 2.32), SIMDE_FLOAT64_C( -2.82), SIMDE_FLOAT64_C( -576.22), SIMDE_FLOAT64_C( 977.49)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 484.94), SIMDE_FLOAT64_C( 237.58), SIMDE_FLOAT64_C( -765.93), SIMDE_FLOAT64_C( -623.50), SIMDE_FLOAT64_C( -775.04), SIMDE_FLOAT64_C( 936.65), SIMDE_FLOAT64_C( -197.89), SIMDE_FLOAT64_C( -752.43)), UINT8_C(214), simde_mm512_set_pd(SIMDE_FLOAT64_C( 826.84), SIMDE_FLOAT64_C( -598.06), SIMDE_FLOAT64_C( -378.50), SIMDE_FLOAT64_C( 221.37), SIMDE_FLOAT64_C( -942.47), SIMDE_FLOAT64_C( 440.64), SIMDE_FLOAT64_C( -348.70), SIMDE_FLOAT64_C( -359.76)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 133.52), SIMDE_FLOAT64_C( -76.75), SIMDE_FLOAT64_C( -791.07), SIMDE_FLOAT64_C( -601.68), SIMDE_FLOAT64_C( -788.36), SIMDE_FLOAT64_C( 475.51), SIMDE_FLOAT64_C( 897.27), SIMDE_FLOAT64_C( -438.19)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.41), SIMDE_FLOAT64_C( -1.70), SIMDE_FLOAT64_C( -765.93), SIMDE_FLOAT64_C( 2.79), SIMDE_FLOAT64_C( -775.04), SIMDE_FLOAT64_C( 0.75), SIMDE_FLOAT64_C( -0.37), SIMDE_FLOAT64_C( -752.43)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -628.82), SIMDE_FLOAT64_C( -916.82), SIMDE_FLOAT64_C( 434.03), SIMDE_FLOAT64_C( -15.61), SIMDE_FLOAT64_C( -718.40), SIMDE_FLOAT64_C( 177.92), SIMDE_FLOAT64_C( 426.61), SIMDE_FLOAT64_C( 915.71)), UINT8_C( 31), simde_mm512_set_pd(SIMDE_FLOAT64_C( 334.00), SIMDE_FLOAT64_C( 556.35), SIMDE_FLOAT64_C( -490.00), SIMDE_FLOAT64_C( 496.57), SIMDE_FLOAT64_C( -737.13), SIMDE_FLOAT64_C( 159.97), SIMDE_FLOAT64_C( 345.93), SIMDE_FLOAT64_C( 932.11)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 783.48), SIMDE_FLOAT64_C( 274.71), SIMDE_FLOAT64_C( 439.43), SIMDE_FLOAT64_C( -799.40), SIMDE_FLOAT64_C( 915.19), SIMDE_FLOAT64_C( -314.93), SIMDE_FLOAT64_C( -861.01), SIMDE_FLOAT64_C( 888.71)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -628.82), SIMDE_FLOAT64_C( -916.82), SIMDE_FLOAT64_C( 434.03), SIMDE_FLOAT64_C( 2.59), SIMDE_FLOAT64_C( -0.68), SIMDE_FLOAT64_C( 2.67), SIMDE_FLOAT64_C( 2.76), SIMDE_FLOAT64_C( 0.81)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -964.25), SIMDE_FLOAT64_C( -807.28), SIMDE_FLOAT64_C( -764.58), SIMDE_FLOAT64_C( 92.52), SIMDE_FLOAT64_C( -818.54), SIMDE_FLOAT64_C( -65.60), SIMDE_FLOAT64_C( -11.78), SIMDE_FLOAT64_C( -318.38)), UINT8_C( 46), simde_mm512_set_pd(SIMDE_FLOAT64_C( -78.84), SIMDE_FLOAT64_C( -406.33), SIMDE_FLOAT64_C( -70.05), SIMDE_FLOAT64_C( 789.89), SIMDE_FLOAT64_C( 206.60), SIMDE_FLOAT64_C( 161.06), SIMDE_FLOAT64_C( -286.07), SIMDE_FLOAT64_C( -308.52)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -889.11), SIMDE_FLOAT64_C( 883.05), SIMDE_FLOAT64_C( -743.66), SIMDE_FLOAT64_C( -784.34), SIMDE_FLOAT64_C( 4.83), SIMDE_FLOAT64_C( 834.60), SIMDE_FLOAT64_C( 579.25), SIMDE_FLOAT64_C( -212.86)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -964.25), SIMDE_FLOAT64_C( -807.28), SIMDE_FLOAT64_C( -3.05), SIMDE_FLOAT64_C( 92.52), SIMDE_FLOAT64_C( 1.55), SIMDE_FLOAT64_C( 0.19), SIMDE_FLOAT64_C( -0.46), SIMDE_FLOAT64_C( -318.38)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -686.00), SIMDE_FLOAT64_C( 571.00), SIMDE_FLOAT64_C( 422.00), SIMDE_FLOAT64_C( 468.00), SIMDE_FLOAT64_C( 670.00), SIMDE_FLOAT64_C( 34.00), SIMDE_FLOAT64_C( 39.00), SIMDE_FLOAT64_C( 347.00)), UINT8_C(139), simde_mm512_set_pd(SIMDE_FLOAT64_C( -678.00), SIMDE_FLOAT64_C( 85.00), SIMDE_FLOAT64_C( 826.00), SIMDE_FLOAT64_C( -269.00), SIMDE_FLOAT64_C( 497.00), SIMDE_FLOAT64_C( -297.00), SIMDE_FLOAT64_C( -186.00), SIMDE_FLOAT64_C( -754.00)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( 571.00), SIMDE_FLOAT64_C( 422.00), SIMDE_FLOAT64_C( 468.00), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 34.00), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( -1.57)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mask_atan2_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_atanh_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( 0.35)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -0.97), SIMDE_FLOAT32_C( 0.37)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( 0.03)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( 0.03)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 0.47)), simde_mm_set_ps(SIMDE_FLOAT32_C( 1.19), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( -0.28), SIMDE_FLOAT32_C( 0.51)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( -0.69), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.57)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.83), SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.65)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 0.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.32), SIMDE_FLOAT32_C( -1.29), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.87)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -0.92)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.47), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( -1.59)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( -0.66)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -2.30), SIMDE_FLOAT32_C( -0.79)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.69)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( -0.48), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.85)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_atanh_ps(test_vec[i].a); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_atanh_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( 0.35)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.97), SIMDE_FLOAT64_C( 0.37)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( 0.04)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( 0.04)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( 0.03)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( 0.03)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 0.50), SIMDE_FLOAT64_C( 0.67)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.55), SIMDE_FLOAT64_C( 0.81)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 0.47)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.28), SIMDE_FLOAT64_C( 0.51)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 0.42)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1.19), SIMDE_FLOAT64_C( 0.45)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.57)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.65)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -0.68), SIMDE_FLOAT64_C( -0.69)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.83), SIMDE_FLOAT64_C( -0.85)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_atanh_pd(test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_atanh_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( 0.35)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -0.97), SIMDE_FLOAT32_C( 0.37)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( -0.69), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 0.47)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.83), SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 1.19), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( -0.28), SIMDE_FLOAT32_C( 0.51)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 0.70)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.47), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( -1.59), SIMDE_FLOAT32_C( -0.32), SIMDE_FLOAT32_C( -1.29), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.87)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( -0.66)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( -0.48), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -2.30), SIMDE_FLOAT32_C( -0.79)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.84)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -1.02), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( -0.66), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( -1.02), SIMDE_FLOAT32_C( 2.65), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 1.22)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.39), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( -0.26), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( -0.03)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 1.05), SIMDE_FLOAT32_C( -0.03)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.20), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( 0.40)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.20), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( -1.74), SIMDE_FLOAT32_C( -0.97), SIMDE_FLOAT32_C( -1.02), SIMDE_FLOAT32_C( -0.62), SIMDE_FLOAT32_C( 0.42)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.25)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 1.16), SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 1.07), SIMDE_FLOAT32_C( 0.26)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_atanh_ps(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_atanh_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( 0.35)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( -0.97), SIMDE_FLOAT64_C( 0.37)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.50), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( 0.03)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.55), SIMDE_FLOAT64_C( 0.81), SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( 0.03)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 0.47)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 1.19), SIMDE_FLOAT64_C( 0.45), SIMDE_FLOAT64_C( -0.28), SIMDE_FLOAT64_C( 0.51)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.68), SIMDE_FLOAT64_C( -0.69), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.57)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.83), SIMDE_FLOAT64_C( -0.85), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.65)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( -0.86), SIMDE_FLOAT64_C( -0.42), SIMDE_FLOAT64_C( 0.70)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.32), SIMDE_FLOAT64_C( -1.29), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( 0.87)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.38), SIMDE_FLOAT64_C( -0.92)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.47), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.40), SIMDE_FLOAT64_C( -1.59)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.26), SIMDE_FLOAT64_C( -0.21), SIMDE_FLOAT64_C( -0.98), SIMDE_FLOAT64_C( -0.66)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.27), SIMDE_FLOAT64_C( -0.21), SIMDE_FLOAT64_C( -2.30), SIMDE_FLOAT64_C( -0.79)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 0.69)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( -0.48), SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 0.85)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_atanh_pd(test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_atanh_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.67)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 1.05), SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 1.19), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.81)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.85)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 1.22), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( 1.26)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.92)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( 1.19), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 1.42), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 2.65), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 1.59)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 0.70)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 1.22), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( 1.83), SIMDE_FLOAT32_C( 1.10), SIMDE_FLOAT32_C( 1.42), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 0.87)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.24), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.41)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 1.38), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.24), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 1.05), SIMDE_FLOAT32_C( 1.19), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 1.02), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( 0.44)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 0.13)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.22), SIMDE_FLOAT32_C( 1.59), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 1.29), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 1.13), SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 2.65), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 0.13)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.45)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 1.95), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 1.26), SIMDE_FLOAT32_C( 2.65), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.48)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.15)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 1.95), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 1.95), SIMDE_FLOAT32_C( 0.15)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_atanh_ps(test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_atanh_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 src; simde__mmask16 k; simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.67)), UINT16_C(41466), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.12)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.67)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.51)), UINT16_C(36797), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.99)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 2.65)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.24), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.77)), UINT16_C(16804), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.46)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( 1.22), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.24), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.77)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( 0.12)), UINT16_C( 2107), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.99)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.95), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 2.65)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.72)), UINT16_C(22274), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.74)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 1.95), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 1.95), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.72)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.72)), UINT16_C(27396), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.25)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 1.42), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.72)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( 0.06)), UINT16_C( 953), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.54)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 1.22), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( 0.60)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.40)), UINT16_C(12713), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 0.92)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( 1.47), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 1.29), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 2.09), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 1.59)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mask_atanh_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_atanh_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.50), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( 0.35)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.55), SIMDE_FLOAT64_C( 0.81), SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( -0.97), SIMDE_FLOAT64_C( 0.37)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.68), SIMDE_FLOAT64_C( -0.69), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 0.47)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.83), SIMDE_FLOAT64_C( -0.85), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.65), SIMDE_FLOAT64_C( 1.19), SIMDE_FLOAT64_C( 0.45), SIMDE_FLOAT64_C( -0.28), SIMDE_FLOAT64_C( 0.51)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.38), SIMDE_FLOAT64_C( -0.92), SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( -0.86), SIMDE_FLOAT64_C( -0.42), SIMDE_FLOAT64_C( 0.70)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.47), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.40), SIMDE_FLOAT64_C( -1.59), SIMDE_FLOAT64_C( -0.32), SIMDE_FLOAT64_C( -1.29), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( 0.87)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( 0.26), SIMDE_FLOAT64_C( -0.21), SIMDE_FLOAT64_C( -0.98), SIMDE_FLOAT64_C( -0.66)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( -0.48), SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 0.85), SIMDE_FLOAT64_C( 0.27), SIMDE_FLOAT64_C( -0.21), SIMDE_FLOAT64_C( -2.30), SIMDE_FLOAT64_C( -0.79)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( -0.58), SIMDE_FLOAT64_C( 0.38), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.84)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -1.02), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( -0.66), SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( -1.02), SIMDE_FLOAT64_C( 2.65), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 1.22)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.39), SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( 0.66), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( 0.53), SIMDE_FLOAT64_C( -0.26), SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( -0.03)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.41), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( 0.79), SIMDE_FLOAT64_C( 0.35), SIMDE_FLOAT64_C( 0.59), SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 1.05), SIMDE_FLOAT64_C( -0.03)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.20), SIMDE_FLOAT64_C( -0.08), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( -0.94), SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( -0.55), SIMDE_FLOAT64_C( 0.40)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.20), SIMDE_FLOAT64_C( -0.08), SIMDE_FLOAT64_C( 0.35), SIMDE_FLOAT64_C( -1.74), SIMDE_FLOAT64_C( -0.97), SIMDE_FLOAT64_C( -1.02), SIMDE_FLOAT64_C( -0.62), SIMDE_FLOAT64_C( 0.42)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( 0.82), SIMDE_FLOAT64_C( 0.91), SIMDE_FLOAT64_C( 0.60), SIMDE_FLOAT64_C( 0.79), SIMDE_FLOAT64_C( 0.25)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.51), SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( 1.16), SIMDE_FLOAT64_C( 1.53), SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( 1.07), SIMDE_FLOAT64_C( 0.26)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_atanh_pd(test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_atanh_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d src; simde__mmask8 k; simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.69), SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.35)), UINT8_C(139), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.68), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 0.50), SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( -0.75)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.83), SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( 0.55), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( -0.97)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 0.26), SIMDE_FLOAT64_C( -0.98), SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( -0.38), SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( -0.42)), UINT8_C(229), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.84), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( -0.21), SIMDE_FLOAT64_C( -0.66), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.92), SIMDE_FLOAT64_C( -0.86)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.22), SIMDE_FLOAT64_C( -0.48), SIMDE_FLOAT64_C( 0.85), SIMDE_FLOAT64_C( -0.98), SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( -1.29)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( -0.26), SIMDE_FLOAT64_C( -0.03), SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( 0.38), SIMDE_FLOAT64_C( 0.99)), UINT8_C(253), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.55), SIMDE_FLOAT64_C( -0.39), SIMDE_FLOAT64_C( 0.66), SIMDE_FLOAT64_C( 0.53), SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( -0.58), SIMDE_FLOAT64_C( -0.77)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.62), SIMDE_FLOAT64_C( -0.41), SIMDE_FLOAT64_C( 0.79), SIMDE_FLOAT64_C( 0.59), SIMDE_FLOAT64_C( 1.05), SIMDE_FLOAT64_C( -1.02), SIMDE_FLOAT64_C( 0.38), SIMDE_FLOAT64_C( -1.02)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.12), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( 0.91), SIMDE_FLOAT64_C( 0.79), SIMDE_FLOAT64_C( -0.20), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( -0.75)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.54), SIMDE_FLOAT64_C( -0.17), SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( 0.82), SIMDE_FLOAT64_C( 0.60), SIMDE_FLOAT64_C( 0.25), SIMDE_FLOAT64_C( -0.08), SIMDE_FLOAT64_C( -0.94)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.12), SIMDE_FLOAT64_C( -0.17), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( 1.16), SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( 0.26), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( -1.74)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( -0.74), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( -0.80), SIMDE_FLOAT64_C( -0.53), SIMDE_FLOAT64_C( -0.82), SIMDE_FLOAT64_C( 0.66)), UINT8_C(145), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.33), SIMDE_FLOAT64_C( 0.46), SIMDE_FLOAT64_C( -0.18), SIMDE_FLOAT64_C( 0.32), SIMDE_FLOAT64_C( -0.87), SIMDE_FLOAT64_C( -0.33), SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( 0.56)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( -0.74), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 0.33), SIMDE_FLOAT64_C( -0.80), SIMDE_FLOAT64_C( -0.53), SIMDE_FLOAT64_C( -0.82), SIMDE_FLOAT64_C( 0.63)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.76), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( -0.02), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( 0.51), SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 0.98)), UINT8_C( 75), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.98), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( -0.10), SIMDE_FLOAT64_C( 0.84), SIMDE_FLOAT64_C( -0.59), SIMDE_FLOAT64_C( 0.73), SIMDE_FLOAT64_C( 0.62), SIMDE_FLOAT64_C( 0.14)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.76), SIMDE_FLOAT64_C( 0.45), SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( -0.02), SIMDE_FLOAT64_C( -0.68), SIMDE_FLOAT64_C( 0.51), SIMDE_FLOAT64_C( 0.73), SIMDE_FLOAT64_C( 0.14)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.39), SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( -0.70), SIMDE_FLOAT64_C( 0.82), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( -0.07)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.51), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( -0.57), SIMDE_FLOAT64_C( -0.34), SIMDE_FLOAT64_C( 0.29), SIMDE_FLOAT64_C( -0.58)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.39), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( -0.70), SIMDE_FLOAT64_C( -1.02), SIMDE_FLOAT64_C( -0.65), SIMDE_FLOAT64_C( -0.35), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( -0.66)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.48), SIMDE_FLOAT64_C( 0.94), SIMDE_FLOAT64_C( -0.35), SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( 0.93), SIMDE_FLOAT64_C( -0.33), SIMDE_FLOAT64_C( -0.18)), UINT8_C(213), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( 0.90), SIMDE_FLOAT64_C( -0.20), SIMDE_FLOAT64_C( -0.36), SIMDE_FLOAT64_C( -0.03), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( -0.13)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -1.02), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( -0.35), SIMDE_FLOAT64_C( -0.20), SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( -0.03), SIMDE_FLOAT64_C( -0.33), SIMDE_FLOAT64_C( -0.13)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mask_atanh_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_cdfnorm_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -993.83), SIMDE_FLOAT32_C( 92.27), SIMDE_FLOAT32_C( 208.35), SIMDE_FLOAT32_C( 761.44) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00) } }, { { SIMDE_FLOAT32_C( -963.46), SIMDE_FLOAT32_C( 429.93), SIMDE_FLOAT32_C( 318.99), SIMDE_FLOAT32_C( 532.75) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00) } }, { { SIMDE_FLOAT32_C( 677.31), SIMDE_FLOAT32_C( -552.55), SIMDE_FLOAT32_C( 344.89), SIMDE_FLOAT32_C( -275.73) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( -396.40), SIMDE_FLOAT32_C( 319.50), SIMDE_FLOAT32_C( 348.88), SIMDE_FLOAT32_C( -732.73) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( 638.44), SIMDE_FLOAT32_C( -14.16), SIMDE_FLOAT32_C( -165.87), SIMDE_FLOAT32_C( 843.45) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00) } }, { { SIMDE_FLOAT32_C( -841.80), SIMDE_FLOAT32_C( -382.17), SIMDE_FLOAT32_C( -889.98), SIMDE_FLOAT32_C( 238.69) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00) } }, { { SIMDE_FLOAT32_C( -193.56), SIMDE_FLOAT32_C( 381.13), SIMDE_FLOAT32_C( -623.80), SIMDE_FLOAT32_C( -46.41) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( 798.25), SIMDE_FLOAT32_C( -366.96), SIMDE_FLOAT32_C( 249.70), SIMDE_FLOAT32_C( 804.43) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_cdfnorm_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_cdfnorm_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -954.47), SIMDE_FLOAT64_C( -900.72) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( 375.82), SIMDE_FLOAT64_C( 323.80) }, { SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00) } }, { { SIMDE_FLOAT64_C( -882.15), SIMDE_FLOAT64_C( -872.83) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( -880.22), SIMDE_FLOAT64_C( 404.86) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 1.00) } }, { { SIMDE_FLOAT64_C( 587.17), SIMDE_FLOAT64_C( 674.97) }, { SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00) } }, { { SIMDE_FLOAT64_C( -509.08), SIMDE_FLOAT64_C( -152.91) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( -296.61), SIMDE_FLOAT64_C( 576.29) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 1.00) } }, { { SIMDE_FLOAT64_C( -858.64), SIMDE_FLOAT64_C( -995.64) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d r = simde_mm_cdfnorm_pd(a); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_cdfnorm_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 818.12), SIMDE_FLOAT32_C( 842.04), SIMDE_FLOAT32_C( -990.82), SIMDE_FLOAT32_C( -180.40), SIMDE_FLOAT32_C( -703.48), SIMDE_FLOAT32_C( -658.67), SIMDE_FLOAT32_C( -675.01), SIMDE_FLOAT32_C( -213.67) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( -312.75), SIMDE_FLOAT32_C( -440.95), SIMDE_FLOAT32_C( 40.83), SIMDE_FLOAT32_C( -601.56), SIMDE_FLOAT32_C( 516.51), SIMDE_FLOAT32_C( 64.68), SIMDE_FLOAT32_C( 765.54), SIMDE_FLOAT32_C( 383.86) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00) } }, { { SIMDE_FLOAT32_C( -264.08), SIMDE_FLOAT32_C( -961.69), SIMDE_FLOAT32_C( 776.59), SIMDE_FLOAT32_C( -476.70), SIMDE_FLOAT32_C( 398.19), SIMDE_FLOAT32_C( 561.61), SIMDE_FLOAT32_C( -253.27), SIMDE_FLOAT32_C( 994.83) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00) } }, { { SIMDE_FLOAT32_C( -614.21), SIMDE_FLOAT32_C( 933.12), SIMDE_FLOAT32_C( 521.15), SIMDE_FLOAT32_C( 87.99), SIMDE_FLOAT32_C( 511.16), SIMDE_FLOAT32_C( 278.58), SIMDE_FLOAT32_C( -327.57), SIMDE_FLOAT32_C( 329.28) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00) } }, { { SIMDE_FLOAT32_C( 120.61), SIMDE_FLOAT32_C( -318.39), SIMDE_FLOAT32_C( -851.12), SIMDE_FLOAT32_C( 417.13), SIMDE_FLOAT32_C( 22.95), SIMDE_FLOAT32_C( -526.13), SIMDE_FLOAT32_C( -796.54), SIMDE_FLOAT32_C( 710.20) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00) } }, { { SIMDE_FLOAT32_C( 32.92), SIMDE_FLOAT32_C( 244.29), SIMDE_FLOAT32_C( -891.36), SIMDE_FLOAT32_C( -450.57), SIMDE_FLOAT32_C( -691.03), SIMDE_FLOAT32_C( 874.17), SIMDE_FLOAT32_C( 933.29), SIMDE_FLOAT32_C( 44.89) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00) } }, { { SIMDE_FLOAT32_C( 912.48), SIMDE_FLOAT32_C( 709.88), SIMDE_FLOAT32_C( 568.19), SIMDE_FLOAT32_C( 310.67), SIMDE_FLOAT32_C( 271.49), SIMDE_FLOAT32_C( -685.08), SIMDE_FLOAT32_C( 305.50), SIMDE_FLOAT32_C( 657.28) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00) } }, { { SIMDE_FLOAT32_C( -751.96), SIMDE_FLOAT32_C( -173.35), SIMDE_FLOAT32_C( -254.73), SIMDE_FLOAT32_C( 759.20), SIMDE_FLOAT32_C( -894.77), SIMDE_FLOAT32_C( 417.70), SIMDE_FLOAT32_C( 88.48), SIMDE_FLOAT32_C( 225.84) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_cdfnorm_ps(a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_cdfnorm_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( -924.75), SIMDE_FLOAT64_C( -974.37), SIMDE_FLOAT64_C( -748.27), SIMDE_FLOAT64_C( -367.36) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( -632.95), SIMDE_FLOAT64_C( 220.99), SIMDE_FLOAT64_C( 820.62), SIMDE_FLOAT64_C( -652.24) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( -811.15), SIMDE_FLOAT64_C( -815.96), SIMDE_FLOAT64_C( 903.78), SIMDE_FLOAT64_C( 978.99) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00) } }, { { SIMDE_FLOAT64_C( -359.97), SIMDE_FLOAT64_C( -262.68), SIMDE_FLOAT64_C( -977.31), SIMDE_FLOAT64_C( -241.69) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( 96.53), SIMDE_FLOAT64_C( 838.57), SIMDE_FLOAT64_C( 179.14), SIMDE_FLOAT64_C( 108.78) }, { SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00) } }, { { SIMDE_FLOAT64_C( -69.02), SIMDE_FLOAT64_C( -39.14), SIMDE_FLOAT64_C( 24.34), SIMDE_FLOAT64_C( -579.34) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( 73.79), SIMDE_FLOAT64_C( 99.84), SIMDE_FLOAT64_C( 430.49), SIMDE_FLOAT64_C( 713.26) }, { SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00) } }, { { SIMDE_FLOAT64_C( -127.22), SIMDE_FLOAT64_C( -439.34), SIMDE_FLOAT64_C( -849.37), SIMDE_FLOAT64_C( -51.97) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d r = simde_mm256_cdfnorm_pd(a); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_cdfnorm_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( -171.83), SIMDE_FLOAT32_C( -16.40), SIMDE_FLOAT32_C( -352.71), SIMDE_FLOAT32_C( -355.76), SIMDE_FLOAT32_C( -532.92), SIMDE_FLOAT32_C( -657.24), SIMDE_FLOAT32_C( -31.51), SIMDE_FLOAT32_C( -403.96), SIMDE_FLOAT32_C( 10.99), SIMDE_FLOAT32_C( -120.77), SIMDE_FLOAT32_C( 317.51), SIMDE_FLOAT32_C( 262.42), SIMDE_FLOAT32_C( 830.85), SIMDE_FLOAT32_C( -503.76), SIMDE_FLOAT32_C( 762.65), SIMDE_FLOAT32_C( -301.62) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( 330.53), SIMDE_FLOAT32_C( 478.14), SIMDE_FLOAT32_C( -836.82), SIMDE_FLOAT32_C( 378.71), SIMDE_FLOAT32_C( 784.61), SIMDE_FLOAT32_C( 602.57), SIMDE_FLOAT32_C( 441.59), SIMDE_FLOAT32_C( -912.33), SIMDE_FLOAT32_C( -474.27), SIMDE_FLOAT32_C( 991.91), SIMDE_FLOAT32_C( 893.21), SIMDE_FLOAT32_C( 55.17), SIMDE_FLOAT32_C( -251.62), SIMDE_FLOAT32_C( 632.38), SIMDE_FLOAT32_C( 573.89), SIMDE_FLOAT32_C( 576.55) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00) } }, { { SIMDE_FLOAT32_C( -384.02), SIMDE_FLOAT32_C( -778.82), SIMDE_FLOAT32_C( -779.21), SIMDE_FLOAT32_C( 83.07), SIMDE_FLOAT32_C( -436.06), SIMDE_FLOAT32_C( 189.28), SIMDE_FLOAT32_C( 679.10), SIMDE_FLOAT32_C( 574.93), SIMDE_FLOAT32_C( -931.49), SIMDE_FLOAT32_C( -3.39), SIMDE_FLOAT32_C( -162.65), SIMDE_FLOAT32_C( 899.36), SIMDE_FLOAT32_C( 492.85), SIMDE_FLOAT32_C( -399.99), SIMDE_FLOAT32_C( -402.27), SIMDE_FLOAT32_C( -176.62) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( -921.85), SIMDE_FLOAT32_C( -239.09), SIMDE_FLOAT32_C( -797.90), SIMDE_FLOAT32_C( 862.75), SIMDE_FLOAT32_C( -636.52), SIMDE_FLOAT32_C( 643.69), SIMDE_FLOAT32_C( 950.42), SIMDE_FLOAT32_C( -110.78), SIMDE_FLOAT32_C( 635.59), SIMDE_FLOAT32_C( 843.63), SIMDE_FLOAT32_C( 944.39), SIMDE_FLOAT32_C( -616.03), SIMDE_FLOAT32_C( 476.02), SIMDE_FLOAT32_C( 518.27), SIMDE_FLOAT32_C( 960.52), SIMDE_FLOAT32_C( -908.00) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( 739.45), SIMDE_FLOAT32_C( -818.69), SIMDE_FLOAT32_C( 175.06), SIMDE_FLOAT32_C( -696.61), SIMDE_FLOAT32_C( 370.60), SIMDE_FLOAT32_C( -145.84), SIMDE_FLOAT32_C( 878.31), SIMDE_FLOAT32_C( 439.11), SIMDE_FLOAT32_C( 850.77), SIMDE_FLOAT32_C( -284.33), SIMDE_FLOAT32_C( 338.47), SIMDE_FLOAT32_C( 343.62), SIMDE_FLOAT32_C( 315.67), SIMDE_FLOAT32_C( 936.20), SIMDE_FLOAT32_C( -832.99), SIMDE_FLOAT32_C( 393.82) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00) } }, { { SIMDE_FLOAT32_C( -302.88), SIMDE_FLOAT32_C( -630.90), SIMDE_FLOAT32_C( 256.57), SIMDE_FLOAT32_C( 60.60), SIMDE_FLOAT32_C( -987.21), SIMDE_FLOAT32_C( 206.99), SIMDE_FLOAT32_C( 949.82), SIMDE_FLOAT32_C( 648.38), SIMDE_FLOAT32_C( 50.62), SIMDE_FLOAT32_C( 894.21), SIMDE_FLOAT32_C( -967.65), SIMDE_FLOAT32_C( -473.36), SIMDE_FLOAT32_C( 412.48), SIMDE_FLOAT32_C( 992.88), SIMDE_FLOAT32_C( -381.36), SIMDE_FLOAT32_C( 151.93) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00) } }, { { SIMDE_FLOAT32_C( -825.81), SIMDE_FLOAT32_C( 793.70), SIMDE_FLOAT32_C( 455.32), SIMDE_FLOAT32_C( 544.79), SIMDE_FLOAT32_C( -352.14), SIMDE_FLOAT32_C( 333.63), SIMDE_FLOAT32_C( -16.10), SIMDE_FLOAT32_C( -501.36), SIMDE_FLOAT32_C( -950.70), SIMDE_FLOAT32_C( -677.63), SIMDE_FLOAT32_C( 842.26), SIMDE_FLOAT32_C( 364.97), SIMDE_FLOAT32_C( -741.43), SIMDE_FLOAT32_C( -990.74), SIMDE_FLOAT32_C( -241.21), SIMDE_FLOAT32_C( -44.31) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( -621.64), SIMDE_FLOAT32_C( -984.64), SIMDE_FLOAT32_C( -983.70), SIMDE_FLOAT32_C( -608.85), SIMDE_FLOAT32_C( 222.35), SIMDE_FLOAT32_C( 966.12), SIMDE_FLOAT32_C( -960.47), SIMDE_FLOAT32_C( -727.02), SIMDE_FLOAT32_C( 860.32), SIMDE_FLOAT32_C( -928.11), SIMDE_FLOAT32_C( -200.38), SIMDE_FLOAT32_C( 272.80), SIMDE_FLOAT32_C( -935.24), SIMDE_FLOAT32_C( 418.26), SIMDE_FLOAT32_C( -575.27), SIMDE_FLOAT32_C( -761.04) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_cdfnorm_ps(a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_cdfnorm_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[16]; const simde__mmask8 k; const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 742.28), SIMDE_FLOAT32_C( -10.25), SIMDE_FLOAT32_C( -827.23), SIMDE_FLOAT32_C( 995.37), SIMDE_FLOAT32_C( 256.37), SIMDE_FLOAT32_C( 283.72), SIMDE_FLOAT32_C( -388.62), SIMDE_FLOAT32_C( -979.71), SIMDE_FLOAT32_C( -680.17), SIMDE_FLOAT32_C( -749.87), SIMDE_FLOAT32_C( -71.05), SIMDE_FLOAT32_C( -60.71), SIMDE_FLOAT32_C( -405.48), SIMDE_FLOAT32_C( 786.24), SIMDE_FLOAT32_C( -561.14), SIMDE_FLOAT32_C( 561.28) }, UINT8_C(133), { SIMDE_FLOAT32_C( 409.19), SIMDE_FLOAT32_C( -492.65), SIMDE_FLOAT32_C( 57.95), SIMDE_FLOAT32_C( -250.60), SIMDE_FLOAT32_C( -403.16), SIMDE_FLOAT32_C( 437.65), SIMDE_FLOAT32_C( 509.49), SIMDE_FLOAT32_C( -69.63), SIMDE_FLOAT32_C( 308.33), SIMDE_FLOAT32_C( 780.29), SIMDE_FLOAT32_C( -943.64), SIMDE_FLOAT32_C( 322.23), SIMDE_FLOAT32_C( 242.19), SIMDE_FLOAT32_C( 643.12), SIMDE_FLOAT32_C( 64.51), SIMDE_FLOAT32_C( -768.06) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -10.25), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 995.37), SIMDE_FLOAT32_C( 256.37), SIMDE_FLOAT32_C( 283.72), SIMDE_FLOAT32_C( -388.62), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -680.17), SIMDE_FLOAT32_C( -749.87), SIMDE_FLOAT32_C( -71.05), SIMDE_FLOAT32_C( -60.71), SIMDE_FLOAT32_C( -405.48), SIMDE_FLOAT32_C( 786.24), SIMDE_FLOAT32_C( -561.14), SIMDE_FLOAT32_C( 561.28) } }, { { SIMDE_FLOAT32_C( 815.89), SIMDE_FLOAT32_C( 59.87), SIMDE_FLOAT32_C( 488.31), SIMDE_FLOAT32_C( 99.61), SIMDE_FLOAT32_C( 671.25), SIMDE_FLOAT32_C( 508.61), SIMDE_FLOAT32_C( 419.45), SIMDE_FLOAT32_C( 921.38), SIMDE_FLOAT32_C( -562.45), SIMDE_FLOAT32_C( -641.27), SIMDE_FLOAT32_C( -484.11), SIMDE_FLOAT32_C( -776.21), SIMDE_FLOAT32_C( -202.41), SIMDE_FLOAT32_C( -922.83), SIMDE_FLOAT32_C( -317.45), SIMDE_FLOAT32_C( -793.22) }, UINT8_C(110), { SIMDE_FLOAT32_C( 740.50), SIMDE_FLOAT32_C( -43.82), SIMDE_FLOAT32_C( 181.36), SIMDE_FLOAT32_C( 178.15), SIMDE_FLOAT32_C( -534.33), SIMDE_FLOAT32_C( -888.27), SIMDE_FLOAT32_C( -513.52), SIMDE_FLOAT32_C( -754.04), SIMDE_FLOAT32_C( -831.91), SIMDE_FLOAT32_C( 808.71), SIMDE_FLOAT32_C( 488.15), SIMDE_FLOAT32_C( 811.21), SIMDE_FLOAT32_C( -126.78), SIMDE_FLOAT32_C( 720.09), SIMDE_FLOAT32_C( 627.10), SIMDE_FLOAT32_C( 933.09) }, { SIMDE_FLOAT32_C( 815.89), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 671.25), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 921.38), SIMDE_FLOAT32_C( -562.45), SIMDE_FLOAT32_C( -641.27), SIMDE_FLOAT32_C( -484.11), SIMDE_FLOAT32_C( -776.21), SIMDE_FLOAT32_C( -202.41), SIMDE_FLOAT32_C( -922.83), SIMDE_FLOAT32_C( -317.45), SIMDE_FLOAT32_C( -793.22) } }, { { SIMDE_FLOAT32_C( 208.40), SIMDE_FLOAT32_C( -273.28), SIMDE_FLOAT32_C( 604.34), SIMDE_FLOAT32_C( -282.99), SIMDE_FLOAT32_C( -853.84), SIMDE_FLOAT32_C( 525.72), SIMDE_FLOAT32_C( 154.57), SIMDE_FLOAT32_C( -495.10), SIMDE_FLOAT32_C( -958.39), SIMDE_FLOAT32_C( 378.36), SIMDE_FLOAT32_C( 302.49), SIMDE_FLOAT32_C( -881.22), SIMDE_FLOAT32_C( -939.09), SIMDE_FLOAT32_C( 509.27), SIMDE_FLOAT32_C( -296.70), SIMDE_FLOAT32_C( 801.40) }, UINT8_C(108), { SIMDE_FLOAT32_C( 884.66), SIMDE_FLOAT32_C( -20.45), SIMDE_FLOAT32_C( -68.88), SIMDE_FLOAT32_C( 996.39), SIMDE_FLOAT32_C( 466.03), SIMDE_FLOAT32_C( 177.08), SIMDE_FLOAT32_C( -835.52), SIMDE_FLOAT32_C( 274.74), SIMDE_FLOAT32_C( -334.77), SIMDE_FLOAT32_C( 975.69), SIMDE_FLOAT32_C( -852.04), SIMDE_FLOAT32_C( -614.68), SIMDE_FLOAT32_C( 602.80), SIMDE_FLOAT32_C( -918.95), SIMDE_FLOAT32_C( 593.73), SIMDE_FLOAT32_C( -670.48) }, { SIMDE_FLOAT32_C( 208.40), SIMDE_FLOAT32_C( -273.28), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -853.84), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -495.10), SIMDE_FLOAT32_C( -958.39), SIMDE_FLOAT32_C( 378.36), SIMDE_FLOAT32_C( 302.49), SIMDE_FLOAT32_C( -881.22), SIMDE_FLOAT32_C( -939.09), SIMDE_FLOAT32_C( 509.27), SIMDE_FLOAT32_C( -296.70), SIMDE_FLOAT32_C( 801.40) } }, { { SIMDE_FLOAT32_C( 685.39), SIMDE_FLOAT32_C( -689.26), SIMDE_FLOAT32_C( -524.32), SIMDE_FLOAT32_C( 211.10), SIMDE_FLOAT32_C( 465.30), SIMDE_FLOAT32_C( -19.43), SIMDE_FLOAT32_C( 252.72), SIMDE_FLOAT32_C( -156.34), SIMDE_FLOAT32_C( -716.94), SIMDE_FLOAT32_C( 371.50), SIMDE_FLOAT32_C( -95.43), SIMDE_FLOAT32_C( 792.33), SIMDE_FLOAT32_C( -925.20), SIMDE_FLOAT32_C( -294.03), SIMDE_FLOAT32_C( -742.21), SIMDE_FLOAT32_C( 959.46) }, UINT8_C(216), { SIMDE_FLOAT32_C( 188.91), SIMDE_FLOAT32_C( 955.85), SIMDE_FLOAT32_C( 151.56), SIMDE_FLOAT32_C( -634.01), SIMDE_FLOAT32_C( -879.66), SIMDE_FLOAT32_C( -573.70), SIMDE_FLOAT32_C( 31.23), SIMDE_FLOAT32_C( -903.97), SIMDE_FLOAT32_C( -425.74), SIMDE_FLOAT32_C( 416.55), SIMDE_FLOAT32_C( 698.83), SIMDE_FLOAT32_C( -344.69), SIMDE_FLOAT32_C( 10.28), SIMDE_FLOAT32_C( -971.65), SIMDE_FLOAT32_C( -659.31), SIMDE_FLOAT32_C( 321.02) }, { SIMDE_FLOAT32_C( 685.39), SIMDE_FLOAT32_C( -689.26), SIMDE_FLOAT32_C( -524.32), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -19.43), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -716.94), SIMDE_FLOAT32_C( 371.50), SIMDE_FLOAT32_C( -95.43), SIMDE_FLOAT32_C( 792.33), SIMDE_FLOAT32_C( -925.20), SIMDE_FLOAT32_C( -294.03), SIMDE_FLOAT32_C( -742.21), SIMDE_FLOAT32_C( 959.46) } }, { { SIMDE_FLOAT32_C( -495.97), SIMDE_FLOAT32_C( 551.80), SIMDE_FLOAT32_C( -213.68), SIMDE_FLOAT32_C( 484.60), SIMDE_FLOAT32_C( -195.49), SIMDE_FLOAT32_C( 629.98), SIMDE_FLOAT32_C( 767.66), SIMDE_FLOAT32_C( -823.99), SIMDE_FLOAT32_C( -465.45), SIMDE_FLOAT32_C( 560.00), SIMDE_FLOAT32_C( -749.18), SIMDE_FLOAT32_C( 240.52), SIMDE_FLOAT32_C( 817.78), SIMDE_FLOAT32_C( -789.72), SIMDE_FLOAT32_C( -73.95), SIMDE_FLOAT32_C( 6.69) }, UINT8_C(202), { SIMDE_FLOAT32_C( -922.39), SIMDE_FLOAT32_C( 372.68), SIMDE_FLOAT32_C( -713.53), SIMDE_FLOAT32_C( -496.09), SIMDE_FLOAT32_C( -596.09), SIMDE_FLOAT32_C( -617.49), SIMDE_FLOAT32_C( 78.17), SIMDE_FLOAT32_C( 820.46), SIMDE_FLOAT32_C( -918.66), SIMDE_FLOAT32_C( 733.47), SIMDE_FLOAT32_C( -169.26), SIMDE_FLOAT32_C( -890.32), SIMDE_FLOAT32_C( -925.83), SIMDE_FLOAT32_C( -848.24), SIMDE_FLOAT32_C( -386.29), SIMDE_FLOAT32_C( 625.96) }, { SIMDE_FLOAT32_C( -495.97), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -213.68), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -195.49), SIMDE_FLOAT32_C( 629.98), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -465.45), SIMDE_FLOAT32_C( 560.00), SIMDE_FLOAT32_C( -749.18), SIMDE_FLOAT32_C( 240.52), SIMDE_FLOAT32_C( 817.78), SIMDE_FLOAT32_C( -789.72), SIMDE_FLOAT32_C( -73.95), SIMDE_FLOAT32_C( 6.69) } }, { { SIMDE_FLOAT32_C( -61.91), SIMDE_FLOAT32_C( -901.69), SIMDE_FLOAT32_C( -569.52), SIMDE_FLOAT32_C( -431.93), SIMDE_FLOAT32_C( 865.97), SIMDE_FLOAT32_C( -393.51), SIMDE_FLOAT32_C( 102.62), SIMDE_FLOAT32_C( 425.97), SIMDE_FLOAT32_C( -142.69), SIMDE_FLOAT32_C( -656.86), SIMDE_FLOAT32_C( 243.75), SIMDE_FLOAT32_C( 67.59), SIMDE_FLOAT32_C( 269.19), SIMDE_FLOAT32_C( -749.56), SIMDE_FLOAT32_C( 233.72), SIMDE_FLOAT32_C( 346.79) }, UINT8_C(117), { SIMDE_FLOAT32_C( 520.19), SIMDE_FLOAT32_C( 850.70), SIMDE_FLOAT32_C( -972.96), SIMDE_FLOAT32_C( 902.70), SIMDE_FLOAT32_C( -71.13), SIMDE_FLOAT32_C( 847.50), SIMDE_FLOAT32_C( 984.04), SIMDE_FLOAT32_C( -337.66), SIMDE_FLOAT32_C( -321.75), SIMDE_FLOAT32_C( -906.28), SIMDE_FLOAT32_C( -263.49), SIMDE_FLOAT32_C( -169.99), SIMDE_FLOAT32_C( -292.57), SIMDE_FLOAT32_C( -637.53), SIMDE_FLOAT32_C( 768.10), SIMDE_FLOAT32_C( -194.26) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -901.69), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -431.93), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 425.97), SIMDE_FLOAT32_C( -142.69), SIMDE_FLOAT32_C( -656.86), SIMDE_FLOAT32_C( 243.75), SIMDE_FLOAT32_C( 67.59), SIMDE_FLOAT32_C( 269.19), SIMDE_FLOAT32_C( -749.56), SIMDE_FLOAT32_C( 233.72), SIMDE_FLOAT32_C( 346.79) } }, { { SIMDE_FLOAT32_C( -207.05), SIMDE_FLOAT32_C( -663.84), SIMDE_FLOAT32_C( -328.29), SIMDE_FLOAT32_C( 399.44), SIMDE_FLOAT32_C( 438.78), SIMDE_FLOAT32_C( -902.33), SIMDE_FLOAT32_C( -743.25), SIMDE_FLOAT32_C( 781.93), SIMDE_FLOAT32_C( 341.42), SIMDE_FLOAT32_C( 324.33), SIMDE_FLOAT32_C( 51.11), SIMDE_FLOAT32_C( 591.87), SIMDE_FLOAT32_C( -441.94), SIMDE_FLOAT32_C( -602.09), SIMDE_FLOAT32_C( 214.99), SIMDE_FLOAT32_C( -921.75) }, UINT8_MAX, { SIMDE_FLOAT32_C( 242.04), SIMDE_FLOAT32_C( 980.95), SIMDE_FLOAT32_C( 177.48), SIMDE_FLOAT32_C( 89.54), SIMDE_FLOAT32_C( 964.99), SIMDE_FLOAT32_C( 839.82), SIMDE_FLOAT32_C( 767.79), SIMDE_FLOAT32_C( -941.29), SIMDE_FLOAT32_C( -423.68), SIMDE_FLOAT32_C( -402.20), SIMDE_FLOAT32_C( -233.86), SIMDE_FLOAT32_C( -61.21), SIMDE_FLOAT32_C( -634.11), SIMDE_FLOAT32_C( 571.87), SIMDE_FLOAT32_C( 731.74), SIMDE_FLOAT32_C( -297.94) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 341.42), SIMDE_FLOAT32_C( 324.33), SIMDE_FLOAT32_C( 51.11), SIMDE_FLOAT32_C( 591.87), SIMDE_FLOAT32_C( -441.94), SIMDE_FLOAT32_C( -602.09), SIMDE_FLOAT32_C( 214.99), SIMDE_FLOAT32_C( -921.75) } }, { { SIMDE_FLOAT32_C( -756.42), SIMDE_FLOAT32_C( 131.18), SIMDE_FLOAT32_C( -859.16), SIMDE_FLOAT32_C( -658.75), SIMDE_FLOAT32_C( 387.93), SIMDE_FLOAT32_C( 922.77), SIMDE_FLOAT32_C( 682.68), SIMDE_FLOAT32_C( -287.73), SIMDE_FLOAT32_C( -26.12), SIMDE_FLOAT32_C( 274.55), SIMDE_FLOAT32_C( 270.32), SIMDE_FLOAT32_C( 371.79), SIMDE_FLOAT32_C( -510.46), SIMDE_FLOAT32_C( 348.57), SIMDE_FLOAT32_C( 620.40), SIMDE_FLOAT32_C( 731.58) }, UINT8_C(111), { SIMDE_FLOAT32_C( -202.12), SIMDE_FLOAT32_C( -178.88), SIMDE_FLOAT32_C( 294.51), SIMDE_FLOAT32_C( -362.30), SIMDE_FLOAT32_C( -411.10), SIMDE_FLOAT32_C( 353.22), SIMDE_FLOAT32_C( 214.02), SIMDE_FLOAT32_C( 186.70), SIMDE_FLOAT32_C( -880.64), SIMDE_FLOAT32_C( -847.18), SIMDE_FLOAT32_C( 552.59), SIMDE_FLOAT32_C( 691.24), SIMDE_FLOAT32_C( 884.56), SIMDE_FLOAT32_C( -745.35), SIMDE_FLOAT32_C( 934.82), SIMDE_FLOAT32_C( 15.74) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 387.93), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -287.73), SIMDE_FLOAT32_C( -26.12), SIMDE_FLOAT32_C( 274.55), SIMDE_FLOAT32_C( 270.32), SIMDE_FLOAT32_C( 371.79), SIMDE_FLOAT32_C( -510.46), SIMDE_FLOAT32_C( 348.57), SIMDE_FLOAT32_C( 620.40), SIMDE_FLOAT32_C( 731.58) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_mask_cdfnorm_ps(src, test_vec[i].k, a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_cdfnorm_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 515.78), SIMDE_FLOAT64_C( -190.13), SIMDE_FLOAT64_C( -905.08), SIMDE_FLOAT64_C( 734.43), SIMDE_FLOAT64_C( -737.45), SIMDE_FLOAT64_C( 98.47), SIMDE_FLOAT64_C( -95.41), SIMDE_FLOAT64_C( -675.32) }, { SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( -274.83), SIMDE_FLOAT64_C( 838.86), SIMDE_FLOAT64_C( -796.42), SIMDE_FLOAT64_C( 478.49), SIMDE_FLOAT64_C( 554.96), SIMDE_FLOAT64_C( -640.77), SIMDE_FLOAT64_C( -29.13), SIMDE_FLOAT64_C( -94.09) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( 398.68), SIMDE_FLOAT64_C( 316.09), SIMDE_FLOAT64_C( 332.14), SIMDE_FLOAT64_C( 590.41), SIMDE_FLOAT64_C( -417.40), SIMDE_FLOAT64_C( -789.19), SIMDE_FLOAT64_C( -493.08), SIMDE_FLOAT64_C( 967.90) }, { SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 1.00) } }, { { SIMDE_FLOAT64_C( -877.90), SIMDE_FLOAT64_C( 49.76), SIMDE_FLOAT64_C( 604.59), SIMDE_FLOAT64_C( -550.52), SIMDE_FLOAT64_C( -548.72), SIMDE_FLOAT64_C( 124.59), SIMDE_FLOAT64_C( 499.19), SIMDE_FLOAT64_C( 967.06) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00) } }, { { SIMDE_FLOAT64_C( 934.46), SIMDE_FLOAT64_C( 594.11), SIMDE_FLOAT64_C( 701.49), SIMDE_FLOAT64_C( -802.98), SIMDE_FLOAT64_C( -307.42), SIMDE_FLOAT64_C( -393.92), SIMDE_FLOAT64_C( -478.30), SIMDE_FLOAT64_C( 417.75) }, { SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 1.00) } }, { { SIMDE_FLOAT64_C( -555.06), SIMDE_FLOAT64_C( -274.72), SIMDE_FLOAT64_C( -103.76), SIMDE_FLOAT64_C( 999.90), SIMDE_FLOAT64_C( 84.51), SIMDE_FLOAT64_C( 867.11), SIMDE_FLOAT64_C( -94.19), SIMDE_FLOAT64_C( -516.80) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( 183.20), SIMDE_FLOAT64_C( -762.05), SIMDE_FLOAT64_C( -926.39), SIMDE_FLOAT64_C( 765.80), SIMDE_FLOAT64_C( -551.23), SIMDE_FLOAT64_C( -419.47), SIMDE_FLOAT64_C( 733.70), SIMDE_FLOAT64_C( -429.13) }, { SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( 630.29), SIMDE_FLOAT64_C( 338.28), SIMDE_FLOAT64_C( 20.35), SIMDE_FLOAT64_C( -918.43), SIMDE_FLOAT64_C( -537.13), SIMDE_FLOAT64_C( -480.46), SIMDE_FLOAT64_C( -951.37), SIMDE_FLOAT64_C( -602.66) }, { SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_cdfnorm_pd(a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_cdfnorm_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( -488.95), SIMDE_FLOAT64_C( 602.82), SIMDE_FLOAT64_C( 180.74), SIMDE_FLOAT64_C( -325.95), SIMDE_FLOAT64_C( -721.92), SIMDE_FLOAT64_C( 512.04), SIMDE_FLOAT64_C( 182.27), SIMDE_FLOAT64_C( -392.39) }, UINT8_C( 25), { SIMDE_FLOAT64_C( -174.69), SIMDE_FLOAT64_C( 219.93), SIMDE_FLOAT64_C( 649.77), SIMDE_FLOAT64_C( -892.75), SIMDE_FLOAT64_C( -136.71), SIMDE_FLOAT64_C( -906.14), SIMDE_FLOAT64_C( 643.57), SIMDE_FLOAT64_C( 669.62) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 602.82), SIMDE_FLOAT64_C( 180.74), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 512.04), SIMDE_FLOAT64_C( 182.27), SIMDE_FLOAT64_C( -392.39) } }, { { SIMDE_FLOAT64_C( -655.46), SIMDE_FLOAT64_C( 837.15), SIMDE_FLOAT64_C( 772.04), SIMDE_FLOAT64_C( 272.82), SIMDE_FLOAT64_C( 490.61), SIMDE_FLOAT64_C( 38.88), SIMDE_FLOAT64_C( -668.93), SIMDE_FLOAT64_C( -501.66) }, UINT8_C(232), { SIMDE_FLOAT64_C( -130.58), SIMDE_FLOAT64_C( 219.17), SIMDE_FLOAT64_C( 309.61), SIMDE_FLOAT64_C( -572.70), SIMDE_FLOAT64_C( 851.68), SIMDE_FLOAT64_C( 820.66), SIMDE_FLOAT64_C( -969.88), SIMDE_FLOAT64_C( 32.42) }, { SIMDE_FLOAT64_C( -655.46), SIMDE_FLOAT64_C( 837.15), SIMDE_FLOAT64_C( 772.04), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 490.61), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 1.00) } }, { { SIMDE_FLOAT64_C( -505.29), SIMDE_FLOAT64_C( -691.80), SIMDE_FLOAT64_C( -455.53), SIMDE_FLOAT64_C( 676.98), SIMDE_FLOAT64_C( -84.19), SIMDE_FLOAT64_C( -340.34), SIMDE_FLOAT64_C( -497.71), SIMDE_FLOAT64_C( -864.27) }, UINT8_C(183), { SIMDE_FLOAT64_C( -390.46), SIMDE_FLOAT64_C( -0.97), SIMDE_FLOAT64_C( -596.71), SIMDE_FLOAT64_C( -746.89), SIMDE_FLOAT64_C( -331.35), SIMDE_FLOAT64_C( -252.17), SIMDE_FLOAT64_C( -909.75), SIMDE_FLOAT64_C( -559.31) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.17), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 676.98), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -497.71), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( -979.36), SIMDE_FLOAT64_C( 580.86), SIMDE_FLOAT64_C( 479.57), SIMDE_FLOAT64_C( -648.29), SIMDE_FLOAT64_C( -920.80), SIMDE_FLOAT64_C( 377.46), SIMDE_FLOAT64_C( 221.14), SIMDE_FLOAT64_C( 298.38) }, UINT8_C(194), { SIMDE_FLOAT64_C( 648.44), SIMDE_FLOAT64_C( 150.06), SIMDE_FLOAT64_C( -492.27), SIMDE_FLOAT64_C( 678.56), SIMDE_FLOAT64_C( -817.52), SIMDE_FLOAT64_C( 2.44), SIMDE_FLOAT64_C( 986.76), SIMDE_FLOAT64_C( -273.05) }, { SIMDE_FLOAT64_C( -979.36), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 479.57), SIMDE_FLOAT64_C( -648.29), SIMDE_FLOAT64_C( -920.80), SIMDE_FLOAT64_C( 377.46), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( -320.57), SIMDE_FLOAT64_C( -97.43), SIMDE_FLOAT64_C( 386.61), SIMDE_FLOAT64_C( 181.71), SIMDE_FLOAT64_C( 38.30), SIMDE_FLOAT64_C( 696.05), SIMDE_FLOAT64_C( 791.25), SIMDE_FLOAT64_C( -962.67) }, UINT8_C(160), { SIMDE_FLOAT64_C( -955.64), SIMDE_FLOAT64_C( -294.02), SIMDE_FLOAT64_C( -152.83), SIMDE_FLOAT64_C( -865.39), SIMDE_FLOAT64_C( 146.67), SIMDE_FLOAT64_C( -132.19), SIMDE_FLOAT64_C( 715.47), SIMDE_FLOAT64_C( -373.76) }, { SIMDE_FLOAT64_C( -320.57), SIMDE_FLOAT64_C( -97.43), SIMDE_FLOAT64_C( 386.61), SIMDE_FLOAT64_C( 181.71), SIMDE_FLOAT64_C( 38.30), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 791.25), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( 219.52), SIMDE_FLOAT64_C( 794.68), SIMDE_FLOAT64_C( -996.30), SIMDE_FLOAT64_C( -559.34), SIMDE_FLOAT64_C( 93.05), SIMDE_FLOAT64_C( -309.23), SIMDE_FLOAT64_C( -910.90), SIMDE_FLOAT64_C( -756.89) }, UINT8_C( 25), { SIMDE_FLOAT64_C( 767.66), SIMDE_FLOAT64_C( -574.40), SIMDE_FLOAT64_C( -799.05), SIMDE_FLOAT64_C( 754.42), SIMDE_FLOAT64_C( 152.54), SIMDE_FLOAT64_C( -119.63), SIMDE_FLOAT64_C( -343.01), SIMDE_FLOAT64_C( -460.84) }, { SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 794.68), SIMDE_FLOAT64_C( -996.30), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -309.23), SIMDE_FLOAT64_C( -910.90), SIMDE_FLOAT64_C( -756.89) } }, { { SIMDE_FLOAT64_C( -937.91), SIMDE_FLOAT64_C( 695.30), SIMDE_FLOAT64_C( -764.79), SIMDE_FLOAT64_C( 853.34), SIMDE_FLOAT64_C( 732.63), SIMDE_FLOAT64_C( -665.45), SIMDE_FLOAT64_C( 897.70), SIMDE_FLOAT64_C( -561.39) }, UINT8_C(185), { SIMDE_FLOAT64_C( -967.69), SIMDE_FLOAT64_C( 585.27), SIMDE_FLOAT64_C( -950.48), SIMDE_FLOAT64_C( 747.78), SIMDE_FLOAT64_C( -788.49), SIMDE_FLOAT64_C( 269.05), SIMDE_FLOAT64_C( 542.46), SIMDE_FLOAT64_C( -784.79) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 695.30), SIMDE_FLOAT64_C( -764.79), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 897.70), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( 709.71), SIMDE_FLOAT64_C( -364.49), SIMDE_FLOAT64_C( -94.02), SIMDE_FLOAT64_C( 798.81), SIMDE_FLOAT64_C( -121.37), SIMDE_FLOAT64_C( -895.52), SIMDE_FLOAT64_C( 566.47), SIMDE_FLOAT64_C( 304.22) }, UINT8_C(190), { SIMDE_FLOAT64_C( 320.89), SIMDE_FLOAT64_C( -543.23), SIMDE_FLOAT64_C( 185.80), SIMDE_FLOAT64_C( 977.88), SIMDE_FLOAT64_C( -4.07), SIMDE_FLOAT64_C( 247.88), SIMDE_FLOAT64_C( 673.18), SIMDE_FLOAT64_C( 231.13) }, { SIMDE_FLOAT64_C( 709.71), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 566.47), SIMDE_FLOAT64_C( 1.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_mask_cdfnorm_pd(src, test_vec[i].k, a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_cdfnorminv_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( 0.90) }, { SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( 2.33), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( 1.28) } }, { { SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.57) }, { SIMDE_FLOAT32_C( -1.41), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( 0.18) } }, { { SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.19) }, { SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( -0.88) } }, { { SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 0.53) }, { SIMDE_FLOAT32_C( -0.23), SIMDE_FLOAT32_C( -0.50), SIMDE_FLOAT32_C( -2.05), SIMDE_FLOAT32_C( 0.08) } }, { { SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 0.99) }, { SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( -0.25), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( 2.33) } }, { { SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( 0.81) }, { SIMDE_FLOAT32_C( -0.05), SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( 0.88) } }, { { SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 0.92) }, { SIMDE_FLOAT32_C( -1.34), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 1.41) } }, { { SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.84) }, { SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 1.88), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.99) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_cdfnorminv_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_cdfnorminv_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( 0.77) }, { SIMDE_FLOAT64_C( 0.77), SIMDE_FLOAT64_C( 0.74) } }, { { SIMDE_FLOAT64_C( 0.93), SIMDE_FLOAT64_C( 0.34) }, { SIMDE_FLOAT64_C( 1.48), SIMDE_FLOAT64_C( -0.41) } }, { { SIMDE_FLOAT64_C( 0.02), SIMDE_FLOAT64_C( 0.32) }, { SIMDE_FLOAT64_C( -2.05), SIMDE_FLOAT64_C( -0.47) } }, { { SIMDE_FLOAT64_C( 0.60), SIMDE_FLOAT64_C( 0.80) }, { SIMDE_FLOAT64_C( 0.25), SIMDE_FLOAT64_C( 0.84) } }, { { SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( 0.03) }, { SIMDE_FLOAT64_C( 0.15), SIMDE_FLOAT64_C( -1.88) } }, { { SIMDE_FLOAT64_C( 0.61), SIMDE_FLOAT64_C( 0.02) }, { SIMDE_FLOAT64_C( 0.28), SIMDE_FLOAT64_C( -2.05) } }, { { SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( 0.81) }, { SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( 0.88) } }, { { SIMDE_FLOAT64_C( 0.77), SIMDE_FLOAT64_C( 0.04) }, { SIMDE_FLOAT64_C( 0.74), SIMDE_FLOAT64_C( -1.75) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d r = simde_mm_cdfnorminv_pd(a); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_cdfnorminv_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.19) }, { SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( -0.28), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( -0.23), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( -0.88) } }, { { SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( 0.24), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.16) }, { SIMDE_FLOAT32_C( -1.55), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( -0.71), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( -0.95), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( -0.99) } }, { { SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.62) }, { SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( 1.55), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( -1.55), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 2.05), SIMDE_FLOAT32_C( 0.31) } }, { { SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.22) }, { SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( -1.41), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 1.88), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( -0.77) } }, { { SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.55) }, { SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( -1.48), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( -0.05), SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( 0.13) } }, { { SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 0.32) }, { SIMDE_FLOAT32_C( -0.95), SIMDE_FLOAT32_C( -1.88), SIMDE_FLOAT32_C( 1.34), SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.47) } }, { { SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 0.75) }, { SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( -1.55), SIMDE_FLOAT32_C( 1.23), SIMDE_FLOAT32_C( 1.64), SIMDE_FLOAT32_C( -1.75), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.67) } }, { { SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.85) }, { SIMDE_FLOAT32_C( -1.17), SIMDE_FLOAT32_C( 1.55), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( -0.05), SIMDE_FLOAT32_C( -0.47), SIMDE_FLOAT32_C( 2.05), SIMDE_FLOAT32_C( 1.04) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); // simde__m256 b = simde_mm256_loadu_ps(test_vec[i].b); simde__m256 r = simde_mm256_cdfnorminv_ps(a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_cdfnorminv_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( 0.58), SIMDE_FLOAT64_C( 0.84), SIMDE_FLOAT64_C( 0.82), SIMDE_FLOAT64_C( 0.90) }, { SIMDE_FLOAT64_C( 0.20), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( 1.28) } }, { { SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( 0.88) }, { SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( 0.50), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( 1.17) } }, { { SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 0.78) }, { SIMDE_FLOAT64_C( -0.74), SIMDE_FLOAT64_C( -0.25), SIMDE_FLOAT64_C( 0.95), SIMDE_FLOAT64_C( 0.77) } }, { { SIMDE_FLOAT64_C( 0.25), SIMDE_FLOAT64_C( 0.63), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 0.44) }, { SIMDE_FLOAT64_C( -0.67), SIMDE_FLOAT64_C( 0.33), SIMDE_FLOAT64_C( 0.71), SIMDE_FLOAT64_C( -0.15) } }, { { SIMDE_FLOAT64_C( 0.41), SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( 0.39), SIMDE_FLOAT64_C( 0.53) }, { SIMDE_FLOAT64_C( -0.23), SIMDE_FLOAT64_C( -0.92), SIMDE_FLOAT64_C( -0.28), SIMDE_FLOAT64_C( 0.08) } }, { { SIMDE_FLOAT64_C( 0.36), SIMDE_FLOAT64_C( 0.41), SIMDE_FLOAT64_C( 0.39), SIMDE_FLOAT64_C( 0.63) }, { SIMDE_FLOAT64_C( -0.36), SIMDE_FLOAT64_C( -0.23), SIMDE_FLOAT64_C( -0.28), SIMDE_FLOAT64_C( 0.33) } }, { { SIMDE_FLOAT64_C( 0.30), SIMDE_FLOAT64_C( 0.91), SIMDE_FLOAT64_C( 0.94), SIMDE_FLOAT64_C( 0.41) }, { SIMDE_FLOAT64_C( -0.52), SIMDE_FLOAT64_C( 1.34), SIMDE_FLOAT64_C( 1.55), SIMDE_FLOAT64_C( -0.23) } }, { { SIMDE_FLOAT64_C( 0.09), SIMDE_FLOAT64_C( 0.82), SIMDE_FLOAT64_C( 0.52), SIMDE_FLOAT64_C( 0.67) }, { SIMDE_FLOAT64_C( -1.34), SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( 0.44) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d r = simde_mm256_cdfnorminv_pd(a); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_cdfnorminv_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 0.81) }, { SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( 1.55), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( 1.75), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -1.13), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 2.33), SIMDE_FLOAT32_C( -1.64), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 1.08), SIMDE_FLOAT32_C( 0.88) } }, { { SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.08) }, { SIMDE_FLOAT32_C( -1.28), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( -1.13), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 1.34), SIMDE_FLOAT32_C( -0.81), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( -0.61), SIMDE_FLOAT32_C( -0.20), SIMDE_FLOAT32_C( 1.34), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( 1.48), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( -1.41) } }, { { SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 0.01) }, { SIMDE_FLOAT32_C( 1.13), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( -0.20), SIMDE_FLOAT32_C( -0.81), SIMDE_FLOAT32_C( 1.75), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( -2.33), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( 1.34), SIMDE_FLOAT32_C( -1.88), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( -2.33) } }, { { SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 0.34) }, { SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( -1.13), SIMDE_FLOAT32_C( 1.34), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( -1.75), SIMDE_FLOAT32_C( -1.17), SIMDE_FLOAT32_C( -1.04), SIMDE_FLOAT32_C( -0.50), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( -1.55), SIMDE_FLOAT32_C( -1.34), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( -0.95), SIMDE_FLOAT32_C( -0.41) } }, { { SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 0.04) }, { SIMDE_FLOAT32_C( -2.33), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 2.05), SIMDE_FLOAT32_C( 1.88), SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( -0.67), SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( -1.88), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( -0.23), SIMDE_FLOAT32_C( -0.95), SIMDE_FLOAT32_C( -1.75) } }, { { SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.22) }, { SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( 1.64), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( -0.39), SIMDE_FLOAT32_C( -1.48), SIMDE_FLOAT32_C( 1.08), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 1.41), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( -1.48), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( 1.34), SIMDE_FLOAT32_C( -0.23), SIMDE_FLOAT32_C( -0.77) } }, { { SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.60) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.75), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( 1.55), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 1.88), SIMDE_FLOAT32_C( 1.48), SIMDE_FLOAT32_C( -1.55), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( -1.28), SIMDE_FLOAT32_C( -1.23), SIMDE_FLOAT32_C( 0.25) } }, { { SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 0.56) }, { SIMDE_FLOAT32_C( -0.25), SIMDE_FLOAT32_C( -1.55), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( -1.13), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( -0.25), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( -1.41), SIMDE_FLOAT32_C( -1.04), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( -1.55), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( 0.15) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_cdfnorminv_ps(a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_cdfnorminv_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[16]; const simde__mmask8 k; const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.03) }, UINT8_C(249), { SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 0.81) }, { SIMDE_FLOAT32_C( -1.08), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -2.33), SIMDE_FLOAT32_C( -1.88), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( -0.81), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.03) } }, { { SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.65) }, UINT8_C(209), { SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.00) }, { SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( -0.67), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.65) } }, { { SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.10) }, UINT8_C(123), { SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 0.10) }, { SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( -0.47), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.10) } }, { { SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( 0.95) }, UINT8_C( 43), { SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 0.36) }, { SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( -0.61), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( -1.41), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( 0.95) } }, { { SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.70) }, UINT8_C( 66), { SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.72) }, { SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.70) } }, { { SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 0.02) }, UINT8_C(157), { SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( 0.39) }, { SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 1.41), SIMDE_FLOAT32_C( 2.33), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 0.02) } }, { { SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( 0.95) }, UINT8_C( 65), { SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 0.41) }, { SIMDE_FLOAT32_C( 1.17), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( -1.08), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( 0.95) } }, { { SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( 0.52) }, UINT8_C(240), { SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 0.26) }, { SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 1.41), SIMDE_FLOAT32_C( -1.08), SIMDE_FLOAT32_C( 1.08), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( 0.52) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_mask_cdfnorminv_ps(src, test_vec[i].k, a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_cdfnorminv_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 0.33), SIMDE_FLOAT64_C( 0.38), SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( 0.48), SIMDE_FLOAT64_C( 0.39), SIMDE_FLOAT64_C( 0.88), SIMDE_FLOAT64_C( 0.84), SIMDE_FLOAT64_C( 0.89) }, { SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( 0.77), SIMDE_FLOAT64_C( -0.05), SIMDE_FLOAT64_C( -0.28), SIMDE_FLOAT64_C( 1.17), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( 1.23) } }, { { SIMDE_FLOAT64_C( 0.58), SIMDE_FLOAT64_C( 0.28), SIMDE_FLOAT64_C( 0.81), SIMDE_FLOAT64_C( 0.87), SIMDE_FLOAT64_C( 0.33), SIMDE_FLOAT64_C( 0.98), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( 0.51) }, { SIMDE_FLOAT64_C( 0.20), SIMDE_FLOAT64_C( -0.58), SIMDE_FLOAT64_C( 0.88), SIMDE_FLOAT64_C( 1.13), SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( 2.05), SIMDE_FLOAT64_C( -0.41), SIMDE_FLOAT64_C( 0.03) } }, { { SIMDE_FLOAT64_C( 0.85), SIMDE_FLOAT64_C( 0.51), SIMDE_FLOAT64_C( 0.79), SIMDE_FLOAT64_C( 0.79), SIMDE_FLOAT64_C( 0.28), SIMDE_FLOAT64_C( 0.73), SIMDE_FLOAT64_C( 0.95), SIMDE_FLOAT64_C( 0.96) }, { SIMDE_FLOAT64_C( 1.04), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.81), SIMDE_FLOAT64_C( 0.81), SIMDE_FLOAT64_C( -0.58), SIMDE_FLOAT64_C( 0.61), SIMDE_FLOAT64_C( 1.64), SIMDE_FLOAT64_C( 1.75) } }, { { SIMDE_FLOAT64_C( 0.86), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.84), SIMDE_FLOAT64_C( 0.62), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.60), SIMDE_FLOAT64_C( 0.95) }, { SIMDE_FLOAT64_C( 1.08), SIMDE_FLOAT64_C( -0.08), SIMDE_FLOAT64_C( -1.88), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( 0.31), SIMDE_FLOAT64_C( -1.75), SIMDE_FLOAT64_C( 0.25), SIMDE_FLOAT64_C( 1.64) } }, { { SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( 0.38), SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( 0.82), SIMDE_FLOAT64_C( 0.26), SIMDE_FLOAT64_C( 0.28), SIMDE_FLOAT64_C( 0.71), SIMDE_FLOAT64_C( 0.83) }, { SIMDE_FLOAT64_C( -0.20), SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( -0.64), SIMDE_FLOAT64_C( -0.58), SIMDE_FLOAT64_C( 0.55), SIMDE_FLOAT64_C( 0.95) } }, { { SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( 0.52), SIMDE_FLOAT64_C( 0.70), SIMDE_FLOAT64_C( 0.89), SIMDE_FLOAT64_C( 0.51), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.39), SIMDE_FLOAT64_C( 0.35) }, { SIMDE_FLOAT64_C( 0.15), SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( 0.52), SIMDE_FLOAT64_C( 1.23), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -1.75), SIMDE_FLOAT64_C( -0.28), SIMDE_FLOAT64_C( -0.39) } }, { { SIMDE_FLOAT64_C( 0.55), SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( 0.15), SIMDE_FLOAT64_C( 0.84), SIMDE_FLOAT64_C( 0.91), SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( 0.79), SIMDE_FLOAT64_C( 0.77) }, { SIMDE_FLOAT64_C( 0.13), SIMDE_FLOAT64_C( -0.92), SIMDE_FLOAT64_C( -1.04), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( 1.34), SIMDE_FLOAT64_C( -1.28), SIMDE_FLOAT64_C( 0.81), SIMDE_FLOAT64_C( 0.74) } }, { { SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( 0.82), SIMDE_FLOAT64_C( 0.61), SIMDE_FLOAT64_C( 0.20), SIMDE_FLOAT64_C( 0.86), SIMDE_FLOAT64_C( 0.21), SIMDE_FLOAT64_C( 0.15), SIMDE_FLOAT64_C( 0.28) }, { SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( 0.28), SIMDE_FLOAT64_C( -0.84), SIMDE_FLOAT64_C( 1.08), SIMDE_FLOAT64_C( -0.81), SIMDE_FLOAT64_C( -1.04), SIMDE_FLOAT64_C( -0.58) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_cdfnorminv_pd(a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_cdfnorminv_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 0.14), SIMDE_FLOAT64_C( 0.22), SIMDE_FLOAT64_C( 0.14), SIMDE_FLOAT64_C( 0.13), SIMDE_FLOAT64_C( 0.37), SIMDE_FLOAT64_C( 0.15), SIMDE_FLOAT64_C( 0.36), SIMDE_FLOAT64_C( 0.19) }, UINT8_C( 53), { SIMDE_FLOAT64_C( 0.28), SIMDE_FLOAT64_C( 0.13), SIMDE_FLOAT64_C( 0.36), SIMDE_FLOAT64_C( 0.21), SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( 0.26), SIMDE_FLOAT64_C( 0.24) }, { SIMDE_FLOAT64_C( -0.58), SIMDE_FLOAT64_C( 0.22), SIMDE_FLOAT64_C( -0.36), SIMDE_FLOAT64_C( 0.13), SIMDE_FLOAT64_C( -0.92), SIMDE_FLOAT64_C( -1.23), SIMDE_FLOAT64_C( 0.36), SIMDE_FLOAT64_C( 0.19) } }, { { SIMDE_FLOAT64_C( 0.30), SIMDE_FLOAT64_C( 0.25), SIMDE_FLOAT64_C( 0.49), SIMDE_FLOAT64_C( 0.16), SIMDE_FLOAT64_C( 0.37), SIMDE_FLOAT64_C( 0.49), SIMDE_FLOAT64_C( 0.21), SIMDE_FLOAT64_C( 0.47) }, UINT8_C( 92), { SIMDE_FLOAT64_C( 0.27), SIMDE_FLOAT64_C( 0.17), SIMDE_FLOAT64_C( 0.21), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( 0.43), SIMDE_FLOAT64_C( 0.25), SIMDE_FLOAT64_C( 0.19), SIMDE_FLOAT64_C( 0.46) }, { SIMDE_FLOAT64_C( 0.30), SIMDE_FLOAT64_C( 0.25), SIMDE_FLOAT64_C( -0.81), SIMDE_FLOAT64_C( -0.08), SIMDE_FLOAT64_C( -0.18), SIMDE_FLOAT64_C( 0.49), SIMDE_FLOAT64_C( -0.88), SIMDE_FLOAT64_C( 0.47) } }, { { SIMDE_FLOAT64_C( 0.28), SIMDE_FLOAT64_C( 0.46), SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( 0.14), SIMDE_FLOAT64_C( 0.15), SIMDE_FLOAT64_C( 0.31), SIMDE_FLOAT64_C( 0.32), SIMDE_FLOAT64_C( 0.18) }, UINT8_C(232), { SIMDE_FLOAT64_C( 0.43), SIMDE_FLOAT64_C( 0.26), SIMDE_FLOAT64_C( 0.19), SIMDE_FLOAT64_C( 0.19), SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( 0.39), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( 0.39) }, { SIMDE_FLOAT64_C( 0.28), SIMDE_FLOAT64_C( 0.46), SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( -0.88), SIMDE_FLOAT64_C( 0.15), SIMDE_FLOAT64_C( -0.28), SIMDE_FLOAT64_C( -0.41), SIMDE_FLOAT64_C( -0.28) } }, { { SIMDE_FLOAT64_C( 0.45), SIMDE_FLOAT64_C( 0.20), SIMDE_FLOAT64_C( 0.38), SIMDE_FLOAT64_C( 0.17), SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( 0.20), SIMDE_FLOAT64_C( 0.33), SIMDE_FLOAT64_C( 0.25) }, UINT8_C(135), { SIMDE_FLOAT64_C( 0.30), SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( 0.45), SIMDE_FLOAT64_C( 0.39), SIMDE_FLOAT64_C( 0.14), SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 0.35), SIMDE_FLOAT64_C( 0.15) }, { SIMDE_FLOAT64_C( -0.52), SIMDE_FLOAT64_C( -0.92), SIMDE_FLOAT64_C( -0.13), SIMDE_FLOAT64_C( 0.17), SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( 0.20), SIMDE_FLOAT64_C( 0.33), SIMDE_FLOAT64_C( -1.04) } }, { { SIMDE_FLOAT64_C( 0.27), SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( 0.36), SIMDE_FLOAT64_C( 0.49), SIMDE_FLOAT64_C( 0.48), SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( 0.41), SIMDE_FLOAT64_C( 0.25) }, UINT8_C(111), { SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( 0.15), SIMDE_FLOAT64_C( 0.41), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( 0.43), SIMDE_FLOAT64_C( 0.37), SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( 0.31) }, { SIMDE_FLOAT64_C( -1.28), SIMDE_FLOAT64_C( -1.04), SIMDE_FLOAT64_C( -0.23), SIMDE_FLOAT64_C( -0.41), SIMDE_FLOAT64_C( 0.48), SIMDE_FLOAT64_C( -0.33), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( 0.25) } }, { { SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( 0.12), SIMDE_FLOAT64_C( 0.41), SIMDE_FLOAT64_C( 0.27), SIMDE_FLOAT64_C( 0.27), SIMDE_FLOAT64_C( 0.22), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( 0.34) }, UINT8_C( 67), { SIMDE_FLOAT64_C( 0.37), SIMDE_FLOAT64_C( 0.38), SIMDE_FLOAT64_C( 0.31), SIMDE_FLOAT64_C( 0.22), SIMDE_FLOAT64_C( 0.43), SIMDE_FLOAT64_C( 0.48), SIMDE_FLOAT64_C( 0.12), SIMDE_FLOAT64_C( 0.29) }, { SIMDE_FLOAT64_C( -0.33), SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( 0.41), SIMDE_FLOAT64_C( 0.27), SIMDE_FLOAT64_C( 0.27), SIMDE_FLOAT64_C( 0.22), SIMDE_FLOAT64_C( -1.17), SIMDE_FLOAT64_C( 0.34) } }, { { SIMDE_FLOAT64_C( 0.46), SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 0.37), SIMDE_FLOAT64_C( 0.25), SIMDE_FLOAT64_C( 0.25), SIMDE_FLOAT64_C( 0.38), SIMDE_FLOAT64_C( 0.30) }, UINT8_C(205), { SIMDE_FLOAT64_C( 0.22), SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 0.43), SIMDE_FLOAT64_C( 0.16), SIMDE_FLOAT64_C( 0.45), SIMDE_FLOAT64_C( 0.37), SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( 0.36) }, { SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( -0.18), SIMDE_FLOAT64_C( -0.99), SIMDE_FLOAT64_C( 0.25), SIMDE_FLOAT64_C( 0.25), SIMDE_FLOAT64_C( -0.92), SIMDE_FLOAT64_C( -0.36) } }, { { SIMDE_FLOAT64_C( 0.14), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( 0.48), SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( 0.16), SIMDE_FLOAT64_C( 0.38), SIMDE_FLOAT64_C( 0.46) }, UINT8_C( 64), { SIMDE_FLOAT64_C( 0.50), SIMDE_FLOAT64_C( 0.39), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( 0.12), SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( 0.30), SIMDE_FLOAT64_C( 0.13), SIMDE_FLOAT64_C( 0.31) }, { SIMDE_FLOAT64_C( 0.14), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( 0.48), SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( 0.16), SIMDE_FLOAT64_C( -1.13), SIMDE_FLOAT64_C( 0.46) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_mask_cdfnorminv_pd(src, test_vec[i].k, a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_cexp_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 1.08), SIMDE_FLOAT32_C( 8.03), SIMDE_FLOAT32_C( 6.08), SIMDE_FLOAT32_C( 9.10) }, { SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( 2.90), SIMDE_FLOAT32_C( -414.18), SIMDE_FLOAT32_C( 139.46) } }, { { SIMDE_FLOAT32_C( 1.28), SIMDE_FLOAT32_C( 5.24), SIMDE_FLOAT32_C( 3.60), SIMDE_FLOAT32_C( 4.31) }, { SIMDE_FLOAT32_C( 1.81), SIMDE_FLOAT32_C( -3.11), SIMDE_FLOAT32_C( -14.33), SIMDE_FLOAT32_C( -33.68) } }, { { SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 1.48), SIMDE_FLOAT32_C( 6.66), SIMDE_FLOAT32_C( 7.44) }, { SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 1.15), SIMDE_FLOAT32_C( 313.98), SIMDE_FLOAT32_C( 714.61) } }, { { SIMDE_FLOAT32_C( 5.32), SIMDE_FLOAT32_C( 1.63), SIMDE_FLOAT32_C( 3.75), SIMDE_FLOAT32_C( 1.94) }, { SIMDE_FLOAT32_C( -12.09), SIMDE_FLOAT32_C( 204.03), SIMDE_FLOAT32_C( -15.34), SIMDE_FLOAT32_C( 39.66) } }, { { SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 4.84), SIMDE_FLOAT32_C( 7.08), SIMDE_FLOAT32_C( 8.24) }, { SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( -2.54), SIMDE_FLOAT32_C( -447.27), SIMDE_FLOAT32_C( 1100.55) } }, { { SIMDE_FLOAT32_C( 3.07), SIMDE_FLOAT32_C( 7.57), SIMDE_FLOAT32_C( 8.46), SIMDE_FLOAT32_C( 6.20) }, { SIMDE_FLOAT32_C( 6.04), SIMDE_FLOAT32_C( 20.68), SIMDE_FLOAT32_C( 4705.73), SIMDE_FLOAT32_C( -392.35) } }, { { SIMDE_FLOAT32_C( 6.65), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 5.84), SIMDE_FLOAT32_C( 9.64) }, { SIMDE_FLOAT32_C( 596.01), SIMDE_FLOAT32_C( 491.91), SIMDE_FLOAT32_C( -335.85), SIMDE_FLOAT32_C( -73.42) } }, { { SIMDE_FLOAT32_C( 5.18), SIMDE_FLOAT32_C( 4.56), SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( 6.26) }, { SIMDE_FLOAT32_C( -26.97), SIMDE_FLOAT32_C( -175.62), SIMDE_FLOAT32_C( 2.83), SIMDE_FLOAT32_C( -0.07) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_cexp_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_cexp_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 5.22), SIMDE_FLOAT32_C( 1.35), SIMDE_FLOAT32_C( 9.44), SIMDE_FLOAT32_C( 1.49), SIMDE_FLOAT32_C( 1.99), SIMDE_FLOAT32_C( 9.55), SIMDE_FLOAT32_C( 7.98) }, { SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( -1.33), SIMDE_FLOAT32_C( -3.86), SIMDE_FLOAT32_C( -0.06), SIMDE_FLOAT32_C( -1.81), SIMDE_FLOAT32_C( 4.05), SIMDE_FLOAT32_C( -1765.21), SIMDE_FLOAT32_C( 13933.33) } }, { { SIMDE_FLOAT32_C( 9.68), SIMDE_FLOAT32_C( 3.77), SIMDE_FLOAT32_C( 6.40), SIMDE_FLOAT32_C( 1.44), SIMDE_FLOAT32_C( 7.91), SIMDE_FLOAT32_C( 7.80), SIMDE_FLOAT32_C( 1.22), SIMDE_FLOAT32_C( 3.48) }, { SIMDE_FLOAT32_C(-12938.99), SIMDE_FLOAT32_C( -9402.48), SIMDE_FLOAT32_C( 78.49), SIMDE_FLOAT32_C( 596.70), SIMDE_FLOAT32_C( 147.00), SIMDE_FLOAT32_C( 2720.42), SIMDE_FLOAT32_C( -3.20), SIMDE_FLOAT32_C( -1.12) } }, { { SIMDE_FLOAT32_C( 2.89), SIMDE_FLOAT32_C( 8.55), SIMDE_FLOAT32_C( 4.24), SIMDE_FLOAT32_C( 4.12), SIMDE_FLOAT32_C( 7.15), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 6.80), SIMDE_FLOAT32_C( 3.92) }, { SIMDE_FLOAT32_C( -11.54), SIMDE_FLOAT32_C( 13.81), SIMDE_FLOAT32_C( -38.75), SIMDE_FLOAT32_C( -57.58), SIMDE_FLOAT32_C( 761.70), SIMDE_FLOAT32_C( 1021.35), SIMDE_FLOAT32_C( -639.30), SIMDE_FLOAT32_C( -630.42) } }, { { SIMDE_FLOAT32_C( 4.44), SIMDE_FLOAT32_C( 7.17), SIMDE_FLOAT32_C( 7.74), SIMDE_FLOAT32_C( 2.32), SIMDE_FLOAT32_C( 3.91), SIMDE_FLOAT32_C( 7.33), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 4.33) }, { SIMDE_FLOAT32_C( 53.57), SIMDE_FLOAT32_C( 65.71), SIMDE_FLOAT32_C( -1565.39), SIMDE_FLOAT32_C( 1683.01), SIMDE_FLOAT32_C( 24.97), SIMDE_FLOAT32_C( 43.20), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( -1.12) } }, { { SIMDE_FLOAT32_C( 2.55), SIMDE_FLOAT32_C( 1.54), SIMDE_FLOAT32_C( 3.76), SIMDE_FLOAT32_C( 4.04), SIMDE_FLOAT32_C( 3.54), SIMDE_FLOAT32_C( 3.32), SIMDE_FLOAT32_C( 2.02), SIMDE_FLOAT32_C( 3.21) }, { SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 12.80), SIMDE_FLOAT32_C( -26.75), SIMDE_FLOAT32_C( -33.60), SIMDE_FLOAT32_C( -33.92), SIMDE_FLOAT32_C( -6.12), SIMDE_FLOAT32_C( -7.52), SIMDE_FLOAT32_C( -0.52) } }, { { SIMDE_FLOAT32_C( 7.08), SIMDE_FLOAT32_C( 8.42), SIMDE_FLOAT32_C( 4.66), SIMDE_FLOAT32_C( 4.99), SIMDE_FLOAT32_C( 6.22), SIMDE_FLOAT32_C( 5.87), SIMDE_FLOAT32_C( 8.47), SIMDE_FLOAT32_C( 9.11) }, { SIMDE_FLOAT32_C( -637.08), SIMDE_FLOAT32_C( 1002.70), SIMDE_FLOAT32_C( 28.95), SIMDE_FLOAT32_C( -101.59), SIMDE_FLOAT32_C( 460.40), SIMDE_FLOAT32_C( -201.85), SIMDE_FLOAT32_C( -4535.17), SIMDE_FLOAT32_C( 1476.67) } }, { { SIMDE_FLOAT32_C( 4.42), SIMDE_FLOAT32_C( 2.71), SIMDE_FLOAT32_C( 3.23), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 3.64), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 5.49), SIMDE_FLOAT32_C( 8.08) }, { SIMDE_FLOAT32_C( -75.48), SIMDE_FLOAT32_C( 34.76), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 25.28), SIMDE_FLOAT32_C( 38.07), SIMDE_FLOAT32_C( 1.14), SIMDE_FLOAT32_C( -54.29), SIMDE_FLOAT32_C( 236.10) } }, { { SIMDE_FLOAT32_C( 7.19), SIMDE_FLOAT32_C( 3.23), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 1.10), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 5.43), SIMDE_FLOAT32_C( 3.11) }, { SIMDE_FLOAT32_C( -1320.92), SIMDE_FLOAT32_C( -117.08), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 1.33), SIMDE_FLOAT32_C( 1.45), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( -228.04), SIMDE_FLOAT32_C( 7.21) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_cexp_ps(a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_clog_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 467.27), SIMDE_FLOAT32_C( -810.49), SIMDE_FLOAT32_C( -408.53), SIMDE_FLOAT32_C( -463.46) }, { SIMDE_FLOAT32_C( 6.84), SIMDE_FLOAT32_C( -1.05), SIMDE_FLOAT32_C( 6.43), SIMDE_FLOAT32_C( -2.29) } }, { { SIMDE_FLOAT32_C( -597.00), SIMDE_FLOAT32_C( 144.37), SIMDE_FLOAT32_C( 819.91), SIMDE_FLOAT32_C( 258.51) }, { SIMDE_FLOAT32_C( 6.42), SIMDE_FLOAT32_C( 2.90), SIMDE_FLOAT32_C( 6.76), SIMDE_FLOAT32_C( 0.31) } }, { { SIMDE_FLOAT32_C( -690.61), SIMDE_FLOAT32_C( -496.03), SIMDE_FLOAT32_C( -379.26), SIMDE_FLOAT32_C( 822.50) }, { SIMDE_FLOAT32_C( 6.75), SIMDE_FLOAT32_C( -2.52), SIMDE_FLOAT32_C( 6.81), SIMDE_FLOAT32_C( 2.00) } }, { { SIMDE_FLOAT32_C( 369.47), SIMDE_FLOAT32_C( 917.67), SIMDE_FLOAT32_C( 917.67), SIMDE_FLOAT32_C( 649.13) }, { SIMDE_FLOAT32_C( 6.90), SIMDE_FLOAT32_C( 1.19), SIMDE_FLOAT32_C( 7.02), SIMDE_FLOAT32_C( 0.62) } }, { { SIMDE_FLOAT32_C( -165.00), SIMDE_FLOAT32_C( -18.10), SIMDE_FLOAT32_C( 943.19), SIMDE_FLOAT32_C( 635.72) }, { SIMDE_FLOAT32_C( 5.11), SIMDE_FLOAT32_C( -3.03), SIMDE_FLOAT32_C( 7.04), SIMDE_FLOAT32_C( 0.59) } }, { { SIMDE_FLOAT32_C( -21.66), SIMDE_FLOAT32_C( 494.23), SIMDE_FLOAT32_C( -734.58), SIMDE_FLOAT32_C( 417.20) }, { SIMDE_FLOAT32_C( 6.20), SIMDE_FLOAT32_C( 1.61), SIMDE_FLOAT32_C( 6.74), SIMDE_FLOAT32_C( 2.63) } }, { { SIMDE_FLOAT32_C( 812.64), SIMDE_FLOAT32_C( -983.61), SIMDE_FLOAT32_C( 15.40), SIMDE_FLOAT32_C( 505.51) }, { SIMDE_FLOAT32_C( 7.15), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( 6.23), SIMDE_FLOAT32_C( 1.54) } }, { { SIMDE_FLOAT32_C( -497.22), SIMDE_FLOAT32_C( 590.38), SIMDE_FLOAT32_C( 600.11), SIMDE_FLOAT32_C( 970.05) }, { SIMDE_FLOAT32_C( 6.65), SIMDE_FLOAT32_C( 2.27), SIMDE_FLOAT32_C( 7.04), SIMDE_FLOAT32_C( 1.02) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_clog_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_clog_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 312.27), SIMDE_FLOAT32_C( 505.55), SIMDE_FLOAT32_C( 862.46), SIMDE_FLOAT32_C( 31.99), SIMDE_FLOAT32_C( 800.53), SIMDE_FLOAT32_C( 181.00), SIMDE_FLOAT32_C( 161.95), SIMDE_FLOAT32_C( -71.19) }, { SIMDE_FLOAT32_C( 6.39), SIMDE_FLOAT32_C( 1.02), SIMDE_FLOAT32_C( 6.76), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 6.71), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 5.18), SIMDE_FLOAT32_C( -0.41) } }, { { SIMDE_FLOAT32_C( 183.06), SIMDE_FLOAT32_C( 131.57), SIMDE_FLOAT32_C( 568.96), SIMDE_FLOAT32_C( 107.92), SIMDE_FLOAT32_C( 898.15), SIMDE_FLOAT32_C( 154.17), SIMDE_FLOAT32_C( 262.39), SIMDE_FLOAT32_C( 850.07) }, { SIMDE_FLOAT32_C( 5.42), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 6.36), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 6.81), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 6.79), SIMDE_FLOAT32_C( 1.27) } }, { { SIMDE_FLOAT32_C( 459.40), SIMDE_FLOAT32_C( 479.25), SIMDE_FLOAT32_C( 503.31), SIMDE_FLOAT32_C( 451.65), SIMDE_FLOAT32_C( 353.11), SIMDE_FLOAT32_C( 438.44), SIMDE_FLOAT32_C( 777.37), SIMDE_FLOAT32_C( 20.59) }, { SIMDE_FLOAT32_C( 6.50), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 6.52), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 6.33), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 6.66), SIMDE_FLOAT32_C( 0.03) } }, { { SIMDE_FLOAT32_C( -35.16), SIMDE_FLOAT32_C( 449.22), SIMDE_FLOAT32_C( -48.41), SIMDE_FLOAT32_C( 925.44), SIMDE_FLOAT32_C( 309.83), SIMDE_FLOAT32_C( 130.15), SIMDE_FLOAT32_C( 38.89), SIMDE_FLOAT32_C( 722.10) }, { SIMDE_FLOAT32_C( 6.11), SIMDE_FLOAT32_C( 1.65), SIMDE_FLOAT32_C( 6.83), SIMDE_FLOAT32_C( 1.62), SIMDE_FLOAT32_C( 5.82), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 6.58), SIMDE_FLOAT32_C( 1.52) } }, { { SIMDE_FLOAT32_C( 735.70), SIMDE_FLOAT32_C( -98.65), SIMDE_FLOAT32_C( 854.09), SIMDE_FLOAT32_C( 536.23), SIMDE_FLOAT32_C( 182.34), SIMDE_FLOAT32_C( 16.04), SIMDE_FLOAT32_C( 565.04), SIMDE_FLOAT32_C( 465.40) }, { SIMDE_FLOAT32_C( 6.61), SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( 6.92), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 5.21), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 6.60), SIMDE_FLOAT32_C( 0.69) } }, { { SIMDE_FLOAT32_C( 247.61), SIMDE_FLOAT32_C( 134.00), SIMDE_FLOAT32_C( 673.33), SIMDE_FLOAT32_C( 145.76), SIMDE_FLOAT32_C( 388.17), SIMDE_FLOAT32_C( -64.29), SIMDE_FLOAT32_C( -4.17), SIMDE_FLOAT32_C( 947.57) }, { SIMDE_FLOAT32_C( 5.64), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 6.54), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 5.97), SIMDE_FLOAT32_C( -0.16), SIMDE_FLOAT32_C( 6.85), SIMDE_FLOAT32_C( 1.58) } }, { { SIMDE_FLOAT32_C( 514.96), SIMDE_FLOAT32_C( 599.14), SIMDE_FLOAT32_C( 399.22), SIMDE_FLOAT32_C( 968.07), SIMDE_FLOAT32_C( 37.59), SIMDE_FLOAT32_C( 176.60), SIMDE_FLOAT32_C( -11.35), SIMDE_FLOAT32_C( 102.43) }, { SIMDE_FLOAT32_C( 6.67), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 6.95), SIMDE_FLOAT32_C( 1.18), SIMDE_FLOAT32_C( 5.20), SIMDE_FLOAT32_C( 1.36), SIMDE_FLOAT32_C( 4.64), SIMDE_FLOAT32_C( 1.68) } }, { { SIMDE_FLOAT32_C( 725.82), SIMDE_FLOAT32_C( 40.24), SIMDE_FLOAT32_C( 27.87), SIMDE_FLOAT32_C( 35.65), SIMDE_FLOAT32_C( 270.39), SIMDE_FLOAT32_C( 166.76), SIMDE_FLOAT32_C( 857.75), SIMDE_FLOAT32_C( 6.09) }, { SIMDE_FLOAT32_C( 6.59), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 3.81), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 5.76), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 6.75), SIMDE_FLOAT32_C( 0.01) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_clog_ps(a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_csqrt_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 467.84), SIMDE_FLOAT32_C( 803.96), SIMDE_FLOAT32_C( 261.38), SIMDE_FLOAT32_C( -142.34) }, { SIMDE_FLOAT32_C( 26.44), SIMDE_FLOAT32_C( 15.20), SIMDE_FLOAT32_C( 16.72), SIMDE_FLOAT32_C( -4.26) } }, { { SIMDE_FLOAT32_C( 742.87), SIMDE_FLOAT32_C( 79.67), SIMDE_FLOAT32_C( 840.90), SIMDE_FLOAT32_C( -323.18) }, { SIMDE_FLOAT32_C( 27.29), SIMDE_FLOAT32_C( 1.46), SIMDE_FLOAT32_C( 29.51), SIMDE_FLOAT32_C( -5.48) } }, { { SIMDE_FLOAT32_C( -240.48), SIMDE_FLOAT32_C( -541.73), SIMDE_FLOAT32_C( 989.55), SIMDE_FLOAT32_C( 570.06) }, { SIMDE_FLOAT32_C( 13.27), SIMDE_FLOAT32_C( -20.41), SIMDE_FLOAT32_C( 32.65), SIMDE_FLOAT32_C( 8.73) } }, { { SIMDE_FLOAT32_C( 83.09), SIMDE_FLOAT32_C( -1.32), SIMDE_FLOAT32_C( 106.90), SIMDE_FLOAT32_C( -376.28) }, { SIMDE_FLOAT32_C( 9.12), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( 15.78), SIMDE_FLOAT32_C( -11.92) } }, { { SIMDE_FLOAT32_C( -403.08), SIMDE_FLOAT32_C( 970.42), SIMDE_FLOAT32_C( -962.81), SIMDE_FLOAT32_C( 736.64) }, { SIMDE_FLOAT32_C( 18.00), SIMDE_FLOAT32_C( 26.96), SIMDE_FLOAT32_C( 11.17), SIMDE_FLOAT32_C( 32.98) } }, { { SIMDE_FLOAT32_C( 711.24), SIMDE_FLOAT32_C( -757.45), SIMDE_FLOAT32_C( 634.59), SIMDE_FLOAT32_C( -16.19) }, { SIMDE_FLOAT32_C( 29.58), SIMDE_FLOAT32_C( -12.80), SIMDE_FLOAT32_C( 25.19), SIMDE_FLOAT32_C( -0.32) } }, { { SIMDE_FLOAT32_C( 81.29), SIMDE_FLOAT32_C( -815.58), SIMDE_FLOAT32_C( -317.77), SIMDE_FLOAT32_C( -90.40) }, { SIMDE_FLOAT32_C( 21.22), SIMDE_FLOAT32_C( -19.21), SIMDE_FLOAT32_C( 2.51), SIMDE_FLOAT32_C( -18.00) } }, { { SIMDE_FLOAT32_C( -84.58), SIMDE_FLOAT32_C( 322.77), SIMDE_FLOAT32_C( 454.95), SIMDE_FLOAT32_C( -616.74) }, { SIMDE_FLOAT32_C( 11.16), SIMDE_FLOAT32_C( 14.46), SIMDE_FLOAT32_C( 24.71), SIMDE_FLOAT32_C( -12.48) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_csqrt_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_csqrt_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 374.45), SIMDE_FLOAT32_C( -986.58), SIMDE_FLOAT32_C( -44.44), SIMDE_FLOAT32_C( -160.79), SIMDE_FLOAT32_C( -660.98), SIMDE_FLOAT32_C( -996.70), SIMDE_FLOAT32_C( -22.70), SIMDE_FLOAT32_C( -74.73) }, { SIMDE_FLOAT32_C( 26.74), SIMDE_FLOAT32_C( -18.45), SIMDE_FLOAT32_C( 7.82), SIMDE_FLOAT32_C( -10.28), SIMDE_FLOAT32_C( 16.36), SIMDE_FLOAT32_C( -30.47), SIMDE_FLOAT32_C( 5.26), SIMDE_FLOAT32_C( -7.10) } }, { { SIMDE_FLOAT32_C( -335.08), SIMDE_FLOAT32_C( -387.45), SIMDE_FLOAT32_C( 992.50), SIMDE_FLOAT32_C( 334.99), SIMDE_FLOAT32_C( -373.08), SIMDE_FLOAT32_C( -939.30), SIMDE_FLOAT32_C( 219.57), SIMDE_FLOAT32_C( -565.96) }, { SIMDE_FLOAT32_C( 9.41), SIMDE_FLOAT32_C( -20.58), SIMDE_FLOAT32_C( 31.94), SIMDE_FLOAT32_C( 5.24), SIMDE_FLOAT32_C( 17.85), SIMDE_FLOAT32_C( -26.30), SIMDE_FLOAT32_C( 20.33), SIMDE_FLOAT32_C( -13.92) } }, { { SIMDE_FLOAT32_C( 626.25), SIMDE_FLOAT32_C( -390.81), SIMDE_FLOAT32_C( 653.44), SIMDE_FLOAT32_C( 423.64), SIMDE_FLOAT32_C( 320.72), SIMDE_FLOAT32_C( 749.19), SIMDE_FLOAT32_C( -605.94), SIMDE_FLOAT32_C( 183.09) }, { SIMDE_FLOAT32_C( 26.12), SIMDE_FLOAT32_C( -7.48), SIMDE_FLOAT32_C( 26.76), SIMDE_FLOAT32_C( 7.92), SIMDE_FLOAT32_C( 23.83), SIMDE_FLOAT32_C( 15.72), SIMDE_FLOAT32_C( 3.68), SIMDE_FLOAT32_C( 24.89) } }, { { SIMDE_FLOAT32_C( 911.79), SIMDE_FLOAT32_C( 134.97), SIMDE_FLOAT32_C( -550.62), SIMDE_FLOAT32_C( -842.16), SIMDE_FLOAT32_C( 650.87), SIMDE_FLOAT32_C( -128.95), SIMDE_FLOAT32_C( 295.76), SIMDE_FLOAT32_C( 25.32) }, { SIMDE_FLOAT32_C( 30.28), SIMDE_FLOAT32_C( 2.23), SIMDE_FLOAT32_C( 15.09), SIMDE_FLOAT32_C( -27.90), SIMDE_FLOAT32_C( 25.64), SIMDE_FLOAT32_C( -2.52), SIMDE_FLOAT32_C( 17.21), SIMDE_FLOAT32_C( 0.74) } }, { { SIMDE_FLOAT32_C( -115.53), SIMDE_FLOAT32_C( -748.68), SIMDE_FLOAT32_C( 864.53), SIMDE_FLOAT32_C( 223.49), SIMDE_FLOAT32_C( -745.38), SIMDE_FLOAT32_C( -158.17), SIMDE_FLOAT32_C( -851.24), SIMDE_FLOAT32_C( -80.46) }, { SIMDE_FLOAT32_C( 17.92), SIMDE_FLOAT32_C( -20.89), SIMDE_FLOAT32_C( 29.64), SIMDE_FLOAT32_C( 3.77), SIMDE_FLOAT32_C( 2.88), SIMDE_FLOAT32_C( -27.45), SIMDE_FLOAT32_C( 1.38), SIMDE_FLOAT32_C( -29.21) } }, { { SIMDE_FLOAT32_C( 454.37), SIMDE_FLOAT32_C( -858.75), SIMDE_FLOAT32_C( -745.47), SIMDE_FLOAT32_C( -918.71), SIMDE_FLOAT32_C( -798.04), SIMDE_FLOAT32_C( 474.10), SIMDE_FLOAT32_C( -484.67), SIMDE_FLOAT32_C( 828.20) }, { SIMDE_FLOAT32_C( 26.70), SIMDE_FLOAT32_C( -16.08), SIMDE_FLOAT32_C( 14.79), SIMDE_FLOAT32_C( -31.05), SIMDE_FLOAT32_C( 8.07), SIMDE_FLOAT32_C( 29.38), SIMDE_FLOAT32_C( 15.41), SIMDE_FLOAT32_C( 26.87) } }, { { SIMDE_FLOAT32_C( -916.70), SIMDE_FLOAT32_C( -831.23), SIMDE_FLOAT32_C( 251.85), SIMDE_FLOAT32_C( 404.02), SIMDE_FLOAT32_C( 917.96), SIMDE_FLOAT32_C( 645.91), SIMDE_FLOAT32_C( -412.89), SIMDE_FLOAT32_C( 829.74) }, { SIMDE_FLOAT32_C( 12.66), SIMDE_FLOAT32_C( -32.82), SIMDE_FLOAT32_C( 19.08), SIMDE_FLOAT32_C( 10.59), SIMDE_FLOAT32_C( 31.94), SIMDE_FLOAT32_C( 10.11), SIMDE_FLOAT32_C( 16.03), SIMDE_FLOAT32_C( 25.88) } }, { { SIMDE_FLOAT32_C( -219.12), SIMDE_FLOAT32_C( 36.49), SIMDE_FLOAT32_C( 987.58), SIMDE_FLOAT32_C( -568.25), SIMDE_FLOAT32_C( 907.54), SIMDE_FLOAT32_C( 283.34), SIMDE_FLOAT32_C( 457.07), SIMDE_FLOAT32_C( -207.99) }, { SIMDE_FLOAT32_C( 1.23), SIMDE_FLOAT32_C( 14.85), SIMDE_FLOAT32_C( 32.61), SIMDE_FLOAT32_C( -8.71), SIMDE_FLOAT32_C( 30.48), SIMDE_FLOAT32_C( 4.65), SIMDE_FLOAT32_C( 21.90), SIMDE_FLOAT32_C( -4.75) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_csqrt_ps(a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_cos_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( -754.38), SIMDE_FLOAT32_C( 346.63)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.66), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.49)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( 34.06)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( -0.47), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( -0.88)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 467.76)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( -0.94)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 571.46)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 0.95)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( 696.87)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( -0.96), SIMDE_FLOAT32_C( 0.85)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -923.64)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 1.00)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -660.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( 0.48)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 687.09)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( -0.61)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_cos_ps(test_vec[i].a); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_cos_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -754.38), SIMDE_FLOAT64_C( 346.63)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( 0.49)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( 39.01)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.66), SIMDE_FLOAT64_C( 0.26)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( 34.06)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.54), SIMDE_FLOAT64_C( -0.88)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( 670.24)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.59), SIMDE_FLOAT64_C( -0.47)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 467.76)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.75), SIMDE_FLOAT64_C( -0.94)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( 422.21)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.76), SIMDE_FLOAT64_C( 0.33)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 571.46)), simde_mm_set_pd(SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 0.95)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( -686.13)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( 0.30)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_cos_pd(test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_cos_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( -754.38), SIMDE_FLOAT32_C( 346.63)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( -0.47), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( -0.66), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.49)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 467.76)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( -0.94)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( 696.87)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( -0.96), SIMDE_FLOAT32_C( 0.85)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -660.80)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( -0.61), SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( 0.48)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -770.35), SIMDE_FLOAT32_C( 443.48), SIMDE_FLOAT32_C( -583.60), SIMDE_FLOAT32_C( 380.46), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 993.90), SIMDE_FLOAT32_C( 28.08), SIMDE_FLOAT32_C( 841.21)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( -0.87), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( -0.95), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( 0.74)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( 395.92), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 339.21), SIMDE_FLOAT32_C( 532.35), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( 780.64), SIMDE_FLOAT32_C( -30.79)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.09), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.81)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -203.65), SIMDE_FLOAT32_C( -80.73), SIMDE_FLOAT32_C( 336.73), SIMDE_FLOAT32_C( -944.78), SIMDE_FLOAT32_C( -747.59), SIMDE_FLOAT32_C( -767.23), SIMDE_FLOAT32_C( -554.19), SIMDE_FLOAT32_C( 398.82)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( -0.67), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( -0.99)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 469.66), SIMDE_FLOAT32_C( 680.02), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 818.66), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 600.47), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( 254.31)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( -0.51), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( -0.91), SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( -0.99)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_cos_ps(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_cos_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( 39.01), SIMDE_FLOAT64_C( -754.38), SIMDE_FLOAT64_C( 346.63)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.66), SIMDE_FLOAT64_C( 0.26), SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( 0.49)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( 670.24), SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( 34.06)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.59), SIMDE_FLOAT64_C( -0.47), SIMDE_FLOAT64_C( -0.54), SIMDE_FLOAT64_C( -0.88)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 467.76)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.76), SIMDE_FLOAT64_C( 0.33), SIMDE_FLOAT64_C( 0.75), SIMDE_FLOAT64_C( -0.94)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( -686.13), SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 571.46)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( 0.30), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 0.95)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -860.95), SIMDE_FLOAT64_C( -417.54), SIMDE_FLOAT64_C( 696.87)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.94), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( -0.96), SIMDE_FLOAT64_C( 0.85)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( 28.47), SIMDE_FLOAT64_C( -384.03), SIMDE_FLOAT64_C( -923.64)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.27), SIMDE_FLOAT64_C( -0.98), SIMDE_FLOAT64_C( 0.73), SIMDE_FLOAT64_C( 1.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 261.31), SIMDE_FLOAT64_C( -212.54), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -660.80)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.85), SIMDE_FLOAT64_C( 0.46), SIMDE_FLOAT64_C( -0.88), SIMDE_FLOAT64_C( 0.48)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 178.20), SIMDE_FLOAT64_C( -450.67), SIMDE_FLOAT64_C( 233.37), SIMDE_FLOAT64_C( 687.09)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.64), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( 0.63), SIMDE_FLOAT64_C( -0.61)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_cos_pd(test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_cbrt_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -233.95), SIMDE_FLOAT32_C( 484.12), SIMDE_FLOAT32_C( -627.08), SIMDE_FLOAT32_C( -978.93) }, { SIMDE_FLOAT32_C( -6.16), SIMDE_FLOAT32_C( 7.85), SIMDE_FLOAT32_C( -8.56), SIMDE_FLOAT32_C( -9.93) } }, { { SIMDE_FLOAT32_C( -749.83), SIMDE_FLOAT32_C( 484.28), SIMDE_FLOAT32_C( 749.02), SIMDE_FLOAT32_C( 850.44) }, { SIMDE_FLOAT32_C( -9.08), SIMDE_FLOAT32_C( 7.85), SIMDE_FLOAT32_C( 9.08), SIMDE_FLOAT32_C( 9.47) } }, { { SIMDE_FLOAT32_C( -517.39), SIMDE_FLOAT32_C( -725.46), SIMDE_FLOAT32_C( -558.90), SIMDE_FLOAT32_C( -267.33) }, { SIMDE_FLOAT32_C( -8.03), SIMDE_FLOAT32_C( -8.99), SIMDE_FLOAT32_C( -8.24), SIMDE_FLOAT32_C( -6.44) } }, { { SIMDE_FLOAT32_C( 569.35), SIMDE_FLOAT32_C( 995.62), SIMDE_FLOAT32_C( 709.27), SIMDE_FLOAT32_C( -107.57) }, { SIMDE_FLOAT32_C( 8.29), SIMDE_FLOAT32_C( 9.99), SIMDE_FLOAT32_C( 8.92), SIMDE_FLOAT32_C( -4.76) } }, { { SIMDE_FLOAT32_C( 350.06), SIMDE_FLOAT32_C( 89.99), SIMDE_FLOAT32_C( 267.98), SIMDE_FLOAT32_C( -152.18) }, { SIMDE_FLOAT32_C( 7.05), SIMDE_FLOAT32_C( 4.48), SIMDE_FLOAT32_C( 6.45), SIMDE_FLOAT32_C( -5.34) } }, { { SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 317.87), SIMDE_FLOAT32_C( -435.79), SIMDE_FLOAT32_C( -295.24) }, { SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 6.82), SIMDE_FLOAT32_C( -7.58), SIMDE_FLOAT32_C( -6.66) } }, { { SIMDE_FLOAT32_C( 382.46), SIMDE_FLOAT32_C( 327.49), SIMDE_FLOAT32_C( -186.96), SIMDE_FLOAT32_C( 913.54) }, { SIMDE_FLOAT32_C( 7.26), SIMDE_FLOAT32_C( 6.89), SIMDE_FLOAT32_C( -5.72), SIMDE_FLOAT32_C( 9.70) } }, { { SIMDE_FLOAT32_C( 619.00), SIMDE_FLOAT32_C( 936.03), SIMDE_FLOAT32_C( 27.91), SIMDE_FLOAT32_C( -614.95) }, { SIMDE_FLOAT32_C( 8.52), SIMDE_FLOAT32_C( 9.78), SIMDE_FLOAT32_C( 3.03), SIMDE_FLOAT32_C( -8.50) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_cbrt_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_cbrt_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -517.18), SIMDE_FLOAT64_C( 744.08) }, { SIMDE_FLOAT64_C( -8.03), SIMDE_FLOAT64_C( 9.06) } }, { { SIMDE_FLOAT64_C( 664.94), SIMDE_FLOAT64_C( 255.05) }, { SIMDE_FLOAT64_C( 8.73), SIMDE_FLOAT64_C( 6.34) } }, { { SIMDE_FLOAT64_C( 38.42), SIMDE_FLOAT64_C( 432.02) }, { SIMDE_FLOAT64_C( 3.37), SIMDE_FLOAT64_C( 7.56) } }, { { SIMDE_FLOAT64_C( -843.35), SIMDE_FLOAT64_C( -957.81) }, { SIMDE_FLOAT64_C( -9.45), SIMDE_FLOAT64_C( -9.86) } }, { { SIMDE_FLOAT64_C( -560.27), SIMDE_FLOAT64_C( 292.64) }, { SIMDE_FLOAT64_C( -8.24), SIMDE_FLOAT64_C( 6.64) } }, { { SIMDE_FLOAT64_C( 329.56), SIMDE_FLOAT64_C( 633.90) }, { SIMDE_FLOAT64_C( 6.91), SIMDE_FLOAT64_C( 8.59) } }, { { SIMDE_FLOAT64_C( -774.56), SIMDE_FLOAT64_C( 892.85) }, { SIMDE_FLOAT64_C( -9.18), SIMDE_FLOAT64_C( 9.63) } }, { { SIMDE_FLOAT64_C( 705.03), SIMDE_FLOAT64_C( -332.78) }, { SIMDE_FLOAT64_C( 8.90), SIMDE_FLOAT64_C( -6.93) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d r = simde_mm_cbrt_pd(a); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_cbrt_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 14.66), SIMDE_FLOAT32_C( -346.78), SIMDE_FLOAT32_C( 608.16), SIMDE_FLOAT32_C( -175.40), SIMDE_FLOAT32_C( -696.64), SIMDE_FLOAT32_C( -645.46), SIMDE_FLOAT32_C( -765.98), SIMDE_FLOAT32_C( 391.25) }, { SIMDE_FLOAT32_C( 2.45), SIMDE_FLOAT32_C( -7.03), SIMDE_FLOAT32_C( 8.47), SIMDE_FLOAT32_C( -5.60), SIMDE_FLOAT32_C( -8.86), SIMDE_FLOAT32_C( -8.64), SIMDE_FLOAT32_C( -9.15), SIMDE_FLOAT32_C( 7.31) } }, { { SIMDE_FLOAT32_C( -27.85), SIMDE_FLOAT32_C( 887.61), SIMDE_FLOAT32_C( -720.32), SIMDE_FLOAT32_C( -702.24), SIMDE_FLOAT32_C( -320.58), SIMDE_FLOAT32_C( -360.38), SIMDE_FLOAT32_C( -53.29), SIMDE_FLOAT32_C( 251.62) }, { SIMDE_FLOAT32_C( -3.03), SIMDE_FLOAT32_C( 9.61), SIMDE_FLOAT32_C( -8.96), SIMDE_FLOAT32_C( -8.89), SIMDE_FLOAT32_C( -6.84), SIMDE_FLOAT32_C( -7.12), SIMDE_FLOAT32_C( -3.76), SIMDE_FLOAT32_C( 6.31) } }, { { SIMDE_FLOAT32_C( 677.19), SIMDE_FLOAT32_C( 865.20), SIMDE_FLOAT32_C( -346.98), SIMDE_FLOAT32_C( -605.62), SIMDE_FLOAT32_C( -498.20), SIMDE_FLOAT32_C( 696.85), SIMDE_FLOAT32_C( -203.22), SIMDE_FLOAT32_C( -909.19) }, { SIMDE_FLOAT32_C( 8.78), SIMDE_FLOAT32_C( 9.53), SIMDE_FLOAT32_C( -7.03), SIMDE_FLOAT32_C( -8.46), SIMDE_FLOAT32_C( -7.93), SIMDE_FLOAT32_C( 8.87), SIMDE_FLOAT32_C( -5.88), SIMDE_FLOAT32_C( -9.69) } }, { { SIMDE_FLOAT32_C( 46.70), SIMDE_FLOAT32_C( -557.66), SIMDE_FLOAT32_C( -327.34), SIMDE_FLOAT32_C( -489.40), SIMDE_FLOAT32_C( -78.90), SIMDE_FLOAT32_C( -843.63), SIMDE_FLOAT32_C( -527.77), SIMDE_FLOAT32_C( 935.75) }, { SIMDE_FLOAT32_C( 3.60), SIMDE_FLOAT32_C( -8.23), SIMDE_FLOAT32_C( -6.89), SIMDE_FLOAT32_C( -7.88), SIMDE_FLOAT32_C( -4.29), SIMDE_FLOAT32_C( -9.45), SIMDE_FLOAT32_C( -8.08), SIMDE_FLOAT32_C( 9.78) } }, { { SIMDE_FLOAT32_C( -190.41), SIMDE_FLOAT32_C( -919.61), SIMDE_FLOAT32_C( -239.64), SIMDE_FLOAT32_C( 112.95), SIMDE_FLOAT32_C( -565.07), SIMDE_FLOAT32_C( -5.63), SIMDE_FLOAT32_C( -495.80), SIMDE_FLOAT32_C( 407.08) }, { SIMDE_FLOAT32_C( -5.75), SIMDE_FLOAT32_C( -9.72), SIMDE_FLOAT32_C( -6.21), SIMDE_FLOAT32_C( 4.83), SIMDE_FLOAT32_C( -8.27), SIMDE_FLOAT32_C( -1.78), SIMDE_FLOAT32_C( -7.91), SIMDE_FLOAT32_C( 7.41) } }, { { SIMDE_FLOAT32_C( -118.02), SIMDE_FLOAT32_C( -216.12), SIMDE_FLOAT32_C( 704.84), SIMDE_FLOAT32_C( 561.40), SIMDE_FLOAT32_C( 423.50), SIMDE_FLOAT32_C( -348.46), SIMDE_FLOAT32_C( -186.97), SIMDE_FLOAT32_C( 100.69) }, { SIMDE_FLOAT32_C( -4.91), SIMDE_FLOAT32_C( -6.00), SIMDE_FLOAT32_C( 8.90), SIMDE_FLOAT32_C( 8.25), SIMDE_FLOAT32_C( 7.51), SIMDE_FLOAT32_C( -7.04), SIMDE_FLOAT32_C( -5.72), SIMDE_FLOAT32_C( 4.65) } }, { { SIMDE_FLOAT32_C( -483.26), SIMDE_FLOAT32_C( 466.05), SIMDE_FLOAT32_C( 495.07), SIMDE_FLOAT32_C( 18.54), SIMDE_FLOAT32_C( 162.90), SIMDE_FLOAT32_C( -708.15), SIMDE_FLOAT32_C( 109.34), SIMDE_FLOAT32_C( -790.40) }, { SIMDE_FLOAT32_C( -7.85), SIMDE_FLOAT32_C( 7.75), SIMDE_FLOAT32_C( 7.91), SIMDE_FLOAT32_C( 2.65), SIMDE_FLOAT32_C( 5.46), SIMDE_FLOAT32_C( -8.91), SIMDE_FLOAT32_C( 4.78), SIMDE_FLOAT32_C( -9.25) } }, { { SIMDE_FLOAT32_C( -265.81), SIMDE_FLOAT32_C( 782.01), SIMDE_FLOAT32_C( -279.80), SIMDE_FLOAT32_C( 655.29), SIMDE_FLOAT32_C( 938.38), SIMDE_FLOAT32_C( 192.43), SIMDE_FLOAT32_C( 591.04), SIMDE_FLOAT32_C( -252.03) }, { SIMDE_FLOAT32_C( -6.43), SIMDE_FLOAT32_C( 9.21), SIMDE_FLOAT32_C( -6.54), SIMDE_FLOAT32_C( 8.69), SIMDE_FLOAT32_C( 9.79), SIMDE_FLOAT32_C( 5.77), SIMDE_FLOAT32_C( 8.39), SIMDE_FLOAT32_C( -6.32) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_cbrt_ps(a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_cbrt_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( 286.65), SIMDE_FLOAT64_C( -385.66), SIMDE_FLOAT64_C( 84.84), SIMDE_FLOAT64_C( 116.45) }, { SIMDE_FLOAT64_C( 6.59), SIMDE_FLOAT64_C( -7.28), SIMDE_FLOAT64_C( 4.39), SIMDE_FLOAT64_C( 4.88) } }, { { SIMDE_FLOAT64_C( 443.79), SIMDE_FLOAT64_C( 321.91), SIMDE_FLOAT64_C( -219.08), SIMDE_FLOAT64_C( -924.57) }, { SIMDE_FLOAT64_C( 7.63), SIMDE_FLOAT64_C( 6.85), SIMDE_FLOAT64_C( -6.03), SIMDE_FLOAT64_C( -9.74) } }, { { SIMDE_FLOAT64_C( 745.74), SIMDE_FLOAT64_C( 694.64), SIMDE_FLOAT64_C( 266.38), SIMDE_FLOAT64_C( 138.63) }, { SIMDE_FLOAT64_C( 9.07), SIMDE_FLOAT64_C( 8.86), SIMDE_FLOAT64_C( 6.43), SIMDE_FLOAT64_C( 5.18) } }, { { SIMDE_FLOAT64_C( 417.51), SIMDE_FLOAT64_C( 27.01), SIMDE_FLOAT64_C( -921.58), SIMDE_FLOAT64_C( 56.73) }, { SIMDE_FLOAT64_C( 7.47), SIMDE_FLOAT64_C( 3.00), SIMDE_FLOAT64_C( -9.73), SIMDE_FLOAT64_C( 3.84) } }, { { SIMDE_FLOAT64_C( 568.89), SIMDE_FLOAT64_C( 355.21), SIMDE_FLOAT64_C( -243.68), SIMDE_FLOAT64_C( 232.84) }, { SIMDE_FLOAT64_C( 8.29), SIMDE_FLOAT64_C( 7.08), SIMDE_FLOAT64_C( -6.25), SIMDE_FLOAT64_C( 6.15) } }, { { SIMDE_FLOAT64_C( -964.92), SIMDE_FLOAT64_C( -649.34), SIMDE_FLOAT64_C( -100.47), SIMDE_FLOAT64_C( -303.39) }, { SIMDE_FLOAT64_C( -9.88), SIMDE_FLOAT64_C( -8.66), SIMDE_FLOAT64_C( -4.65), SIMDE_FLOAT64_C( -6.72) } }, { { SIMDE_FLOAT64_C( -56.31), SIMDE_FLOAT64_C( -696.56), SIMDE_FLOAT64_C( -500.81), SIMDE_FLOAT64_C( 866.34) }, { SIMDE_FLOAT64_C( -3.83), SIMDE_FLOAT64_C( -8.86), SIMDE_FLOAT64_C( -7.94), SIMDE_FLOAT64_C( 9.53) } }, { { SIMDE_FLOAT64_C( 560.33), SIMDE_FLOAT64_C( 808.06), SIMDE_FLOAT64_C( 566.38), SIMDE_FLOAT64_C( -153.02) }, { SIMDE_FLOAT64_C( 8.24), SIMDE_FLOAT64_C( 9.31), SIMDE_FLOAT64_C( 8.27), SIMDE_FLOAT64_C( -5.35) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d r = simde_mm256_cbrt_pd(a); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_cbrt_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( -13.67), SIMDE_FLOAT32_C( -56.84), SIMDE_FLOAT32_C( -51.43), SIMDE_FLOAT32_C( 570.17), SIMDE_FLOAT32_C( 282.97), SIMDE_FLOAT32_C( -935.16), SIMDE_FLOAT32_C( 302.89), SIMDE_FLOAT32_C( -720.37), SIMDE_FLOAT32_C( -327.22), SIMDE_FLOAT32_C( -98.04), SIMDE_FLOAT32_C( -1.14), SIMDE_FLOAT32_C( -24.91), SIMDE_FLOAT32_C( 315.22), SIMDE_FLOAT32_C( -790.04), SIMDE_FLOAT32_C( -92.39), SIMDE_FLOAT32_C( -624.42) }, { SIMDE_FLOAT32_C( -2.39), SIMDE_FLOAT32_C( -3.84), SIMDE_FLOAT32_C( -3.72), SIMDE_FLOAT32_C( 8.29), SIMDE_FLOAT32_C( 6.57), SIMDE_FLOAT32_C( -9.78), SIMDE_FLOAT32_C( 6.72), SIMDE_FLOAT32_C( -8.96), SIMDE_FLOAT32_C( -6.89), SIMDE_FLOAT32_C( -4.61), SIMDE_FLOAT32_C( -1.05), SIMDE_FLOAT32_C( -2.92), SIMDE_FLOAT32_C( 6.81), SIMDE_FLOAT32_C( -9.24), SIMDE_FLOAT32_C( -4.52), SIMDE_FLOAT32_C( -8.55) } }, { { SIMDE_FLOAT32_C( 534.24), SIMDE_FLOAT32_C( 480.60), SIMDE_FLOAT32_C( -464.10), SIMDE_FLOAT32_C( 924.79), SIMDE_FLOAT32_C( 691.98), SIMDE_FLOAT32_C( 368.05), SIMDE_FLOAT32_C( 181.75), SIMDE_FLOAT32_C( 967.37), SIMDE_FLOAT32_C( -837.71), SIMDE_FLOAT32_C( -61.77), SIMDE_FLOAT32_C( -702.36), SIMDE_FLOAT32_C( 76.18), SIMDE_FLOAT32_C( 549.27), SIMDE_FLOAT32_C( 36.35), SIMDE_FLOAT32_C( -116.93), SIMDE_FLOAT32_C( -464.40) }, { SIMDE_FLOAT32_C( 8.11), SIMDE_FLOAT32_C( 7.83), SIMDE_FLOAT32_C( -7.74), SIMDE_FLOAT32_C( 9.74), SIMDE_FLOAT32_C( 8.84), SIMDE_FLOAT32_C( 7.17), SIMDE_FLOAT32_C( 5.66), SIMDE_FLOAT32_C( 9.89), SIMDE_FLOAT32_C( -9.43), SIMDE_FLOAT32_C( -3.95), SIMDE_FLOAT32_C( -8.89), SIMDE_FLOAT32_C( 4.24), SIMDE_FLOAT32_C( 8.19), SIMDE_FLOAT32_C( 3.31), SIMDE_FLOAT32_C( -4.89), SIMDE_FLOAT32_C( -7.74) } }, { { SIMDE_FLOAT32_C( 979.51), SIMDE_FLOAT32_C( 831.64), SIMDE_FLOAT32_C( -894.23), SIMDE_FLOAT32_C( 262.49), SIMDE_FLOAT32_C( 896.48), SIMDE_FLOAT32_C( 408.65), SIMDE_FLOAT32_C( 542.11), SIMDE_FLOAT32_C( -430.74), SIMDE_FLOAT32_C( -689.38), SIMDE_FLOAT32_C( -459.03), SIMDE_FLOAT32_C( 544.35), SIMDE_FLOAT32_C( 625.84), SIMDE_FLOAT32_C( -249.07), SIMDE_FLOAT32_C( -548.04), SIMDE_FLOAT32_C( -998.58), SIMDE_FLOAT32_C( -714.83) }, { SIMDE_FLOAT32_C( 9.93), SIMDE_FLOAT32_C( 9.40), SIMDE_FLOAT32_C( -9.63), SIMDE_FLOAT32_C( 6.40), SIMDE_FLOAT32_C( 9.64), SIMDE_FLOAT32_C( 7.42), SIMDE_FLOAT32_C( 8.15), SIMDE_FLOAT32_C( -7.55), SIMDE_FLOAT32_C( -8.83), SIMDE_FLOAT32_C( -7.71), SIMDE_FLOAT32_C( 8.17), SIMDE_FLOAT32_C( 8.55), SIMDE_FLOAT32_C( -6.29), SIMDE_FLOAT32_C( -8.18), SIMDE_FLOAT32_C( -10.00), SIMDE_FLOAT32_C( -8.94) } }, { { SIMDE_FLOAT32_C( 932.56), SIMDE_FLOAT32_C( -462.68), SIMDE_FLOAT32_C( -790.04), SIMDE_FLOAT32_C( 624.53), SIMDE_FLOAT32_C( 905.37), SIMDE_FLOAT32_C( 391.72), SIMDE_FLOAT32_C( 591.90), SIMDE_FLOAT32_C( -932.34), SIMDE_FLOAT32_C( -670.05), SIMDE_FLOAT32_C( 889.54), SIMDE_FLOAT32_C( 143.84), SIMDE_FLOAT32_C( 879.22), SIMDE_FLOAT32_C( -74.11), SIMDE_FLOAT32_C( -973.09), SIMDE_FLOAT32_C( -585.18), SIMDE_FLOAT32_C( -94.60) }, { SIMDE_FLOAT32_C( 9.77), SIMDE_FLOAT32_C( -7.73), SIMDE_FLOAT32_C( -9.24), SIMDE_FLOAT32_C( 8.55), SIMDE_FLOAT32_C( 9.67), SIMDE_FLOAT32_C( 7.32), SIMDE_FLOAT32_C( 8.40), SIMDE_FLOAT32_C( -9.77), SIMDE_FLOAT32_C( -8.75), SIMDE_FLOAT32_C( 9.62), SIMDE_FLOAT32_C( 5.24), SIMDE_FLOAT32_C( 9.58), SIMDE_FLOAT32_C( -4.20), SIMDE_FLOAT32_C( -9.91), SIMDE_FLOAT32_C( -8.36), SIMDE_FLOAT32_C( -4.56) } }, { { SIMDE_FLOAT32_C( 858.55), SIMDE_FLOAT32_C( -479.41), SIMDE_FLOAT32_C( -832.11), SIMDE_FLOAT32_C( 755.02), SIMDE_FLOAT32_C( 929.24), SIMDE_FLOAT32_C( 710.00), SIMDE_FLOAT32_C( -675.72), SIMDE_FLOAT32_C( -760.15), SIMDE_FLOAT32_C( -749.03), SIMDE_FLOAT32_C( 868.63), SIMDE_FLOAT32_C( 865.69), SIMDE_FLOAT32_C( 1.90), SIMDE_FLOAT32_C( -679.42), SIMDE_FLOAT32_C( 867.11), SIMDE_FLOAT32_C( 287.07), SIMDE_FLOAT32_C( -746.86) }, { SIMDE_FLOAT32_C( 9.50), SIMDE_FLOAT32_C( -7.83), SIMDE_FLOAT32_C( -9.41), SIMDE_FLOAT32_C( 9.11), SIMDE_FLOAT32_C( 9.76), SIMDE_FLOAT32_C( 8.92), SIMDE_FLOAT32_C( -8.78), SIMDE_FLOAT32_C( -9.13), SIMDE_FLOAT32_C( -9.08), SIMDE_FLOAT32_C( 9.54), SIMDE_FLOAT32_C( 9.53), SIMDE_FLOAT32_C( 1.24), SIMDE_FLOAT32_C( -8.79), SIMDE_FLOAT32_C( 9.54), SIMDE_FLOAT32_C( 6.60), SIMDE_FLOAT32_C( -9.07) } }, { { SIMDE_FLOAT32_C( -595.56), SIMDE_FLOAT32_C( 497.03), SIMDE_FLOAT32_C( 877.67), SIMDE_FLOAT32_C( -690.19), SIMDE_FLOAT32_C( -111.25), SIMDE_FLOAT32_C( 469.57), SIMDE_FLOAT32_C( -622.53), SIMDE_FLOAT32_C( 218.70), SIMDE_FLOAT32_C( 359.11), SIMDE_FLOAT32_C( 521.31), SIMDE_FLOAT32_C( 97.92), SIMDE_FLOAT32_C( -714.99), SIMDE_FLOAT32_C( 548.22), SIMDE_FLOAT32_C( 512.74), SIMDE_FLOAT32_C( 190.41), SIMDE_FLOAT32_C( 406.77) }, { SIMDE_FLOAT32_C( -8.41), SIMDE_FLOAT32_C( 7.92), SIMDE_FLOAT32_C( 9.57), SIMDE_FLOAT32_C( -8.84), SIMDE_FLOAT32_C( -4.81), SIMDE_FLOAT32_C( 7.77), SIMDE_FLOAT32_C( -8.54), SIMDE_FLOAT32_C( 6.02), SIMDE_FLOAT32_C( 7.11), SIMDE_FLOAT32_C( 8.05), SIMDE_FLOAT32_C( 4.61), SIMDE_FLOAT32_C( -8.94), SIMDE_FLOAT32_C( 8.18), SIMDE_FLOAT32_C( 8.00), SIMDE_FLOAT32_C( 5.75), SIMDE_FLOAT32_C( 7.41) } }, { { SIMDE_FLOAT32_C( -966.68), SIMDE_FLOAT32_C( 358.30), SIMDE_FLOAT32_C( 161.79), SIMDE_FLOAT32_C( 962.56), SIMDE_FLOAT32_C( 68.29), SIMDE_FLOAT32_C( 486.07), SIMDE_FLOAT32_C( -797.58), SIMDE_FLOAT32_C( 319.26), SIMDE_FLOAT32_C( 354.70), SIMDE_FLOAT32_C( -931.89), SIMDE_FLOAT32_C( -678.84), SIMDE_FLOAT32_C( 675.28), SIMDE_FLOAT32_C( 935.22), SIMDE_FLOAT32_C( 608.23), SIMDE_FLOAT32_C( 928.43), SIMDE_FLOAT32_C( -660.34) }, { SIMDE_FLOAT32_C( -9.89), SIMDE_FLOAT32_C( 7.10), SIMDE_FLOAT32_C( 5.45), SIMDE_FLOAT32_C( 9.87), SIMDE_FLOAT32_C( 4.09), SIMDE_FLOAT32_C( 7.86), SIMDE_FLOAT32_C( -9.27), SIMDE_FLOAT32_C( 6.83), SIMDE_FLOAT32_C( 7.08), SIMDE_FLOAT32_C( -9.77), SIMDE_FLOAT32_C( -8.79), SIMDE_FLOAT32_C( 8.77), SIMDE_FLOAT32_C( 9.78), SIMDE_FLOAT32_C( 8.47), SIMDE_FLOAT32_C( 9.76), SIMDE_FLOAT32_C( -8.71) } }, { { SIMDE_FLOAT32_C( 105.27), SIMDE_FLOAT32_C( 806.10), SIMDE_FLOAT32_C( -350.53), SIMDE_FLOAT32_C( 994.02), SIMDE_FLOAT32_C( 275.67), SIMDE_FLOAT32_C( 26.95), SIMDE_FLOAT32_C( 212.72), SIMDE_FLOAT32_C( -365.21), SIMDE_FLOAT32_C( -451.74), SIMDE_FLOAT32_C( -689.36), SIMDE_FLOAT32_C( -80.21), SIMDE_FLOAT32_C( -903.52), SIMDE_FLOAT32_C( 823.38), SIMDE_FLOAT32_C( -889.80), SIMDE_FLOAT32_C( 503.25), SIMDE_FLOAT32_C( 856.70) }, { SIMDE_FLOAT32_C( 4.72), SIMDE_FLOAT32_C( 9.31), SIMDE_FLOAT32_C( -7.05), SIMDE_FLOAT32_C( 9.98), SIMDE_FLOAT32_C( 6.51), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 5.97), SIMDE_FLOAT32_C( -7.15), SIMDE_FLOAT32_C( -7.67), SIMDE_FLOAT32_C( -8.83), SIMDE_FLOAT32_C( -4.31), SIMDE_FLOAT32_C( -9.67), SIMDE_FLOAT32_C( 9.37), SIMDE_FLOAT32_C( -9.62), SIMDE_FLOAT32_C( 7.95), SIMDE_FLOAT32_C( 9.50) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_cbrt_ps(a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_cbrt_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( -399.60), SIMDE_FLOAT64_C( 73.77), SIMDE_FLOAT64_C( 137.22), SIMDE_FLOAT64_C( -676.98), SIMDE_FLOAT64_C( -304.40), SIMDE_FLOAT64_C( -35.84), SIMDE_FLOAT64_C( -298.40), SIMDE_FLOAT64_C( -909.21) }, { SIMDE_FLOAT64_C( -7.37), SIMDE_FLOAT64_C( 4.19), SIMDE_FLOAT64_C( 5.16), SIMDE_FLOAT64_C( -8.78), SIMDE_FLOAT64_C( -6.73), SIMDE_FLOAT64_C( -3.30), SIMDE_FLOAT64_C( -6.68), SIMDE_FLOAT64_C( -9.69) } }, { { SIMDE_FLOAT64_C( -369.66), SIMDE_FLOAT64_C( -37.98), SIMDE_FLOAT64_C( 225.69), SIMDE_FLOAT64_C( 708.35), SIMDE_FLOAT64_C( 411.81), SIMDE_FLOAT64_C( -32.59), SIMDE_FLOAT64_C( 605.95), SIMDE_FLOAT64_C( -309.62) }, { SIMDE_FLOAT64_C( -7.18), SIMDE_FLOAT64_C( -3.36), SIMDE_FLOAT64_C( 6.09), SIMDE_FLOAT64_C( 8.91), SIMDE_FLOAT64_C( 7.44), SIMDE_FLOAT64_C( -3.19), SIMDE_FLOAT64_C( 8.46), SIMDE_FLOAT64_C( -6.77) } }, { { SIMDE_FLOAT64_C( 644.51), SIMDE_FLOAT64_C( -178.16), SIMDE_FLOAT64_C( -305.15), SIMDE_FLOAT64_C( 654.50), SIMDE_FLOAT64_C( -229.06), SIMDE_FLOAT64_C( -577.20), SIMDE_FLOAT64_C( 549.91), SIMDE_FLOAT64_C( -450.26) }, { SIMDE_FLOAT64_C( 8.64), SIMDE_FLOAT64_C( -5.63), SIMDE_FLOAT64_C( -6.73), SIMDE_FLOAT64_C( 8.68), SIMDE_FLOAT64_C( -6.12), SIMDE_FLOAT64_C( -8.33), SIMDE_FLOAT64_C( 8.19), SIMDE_FLOAT64_C( -7.66) } }, { { SIMDE_FLOAT64_C( 336.68), SIMDE_FLOAT64_C( -367.59), SIMDE_FLOAT64_C( 113.01), SIMDE_FLOAT64_C( -952.73), SIMDE_FLOAT64_C( 958.03), SIMDE_FLOAT64_C( 319.98), SIMDE_FLOAT64_C( -626.30), SIMDE_FLOAT64_C( -441.56) }, { SIMDE_FLOAT64_C( 6.96), SIMDE_FLOAT64_C( -7.16), SIMDE_FLOAT64_C( 4.83), SIMDE_FLOAT64_C( -9.84), SIMDE_FLOAT64_C( 9.86), SIMDE_FLOAT64_C( 6.84), SIMDE_FLOAT64_C( -8.56), SIMDE_FLOAT64_C( -7.61) } }, { { SIMDE_FLOAT64_C( -606.25), SIMDE_FLOAT64_C( 510.93), SIMDE_FLOAT64_C( -118.54), SIMDE_FLOAT64_C( 89.36), SIMDE_FLOAT64_C( -524.91), SIMDE_FLOAT64_C( 583.06), SIMDE_FLOAT64_C( 180.15), SIMDE_FLOAT64_C( 105.43) }, { SIMDE_FLOAT64_C( -8.46), SIMDE_FLOAT64_C( 7.99), SIMDE_FLOAT64_C( -4.91), SIMDE_FLOAT64_C( 4.47), SIMDE_FLOAT64_C( -8.07), SIMDE_FLOAT64_C( 8.35), SIMDE_FLOAT64_C( 5.65), SIMDE_FLOAT64_C( 4.72) } }, { { SIMDE_FLOAT64_C( -454.92), SIMDE_FLOAT64_C( -594.16), SIMDE_FLOAT64_C( -186.22), SIMDE_FLOAT64_C( 956.89), SIMDE_FLOAT64_C( 373.25), SIMDE_FLOAT64_C( -580.27), SIMDE_FLOAT64_C( -352.73), SIMDE_FLOAT64_C( 17.77) }, { SIMDE_FLOAT64_C( -7.69), SIMDE_FLOAT64_C( -8.41), SIMDE_FLOAT64_C( -5.71), SIMDE_FLOAT64_C( 9.85), SIMDE_FLOAT64_C( 7.20), SIMDE_FLOAT64_C( -8.34), SIMDE_FLOAT64_C( -7.07), SIMDE_FLOAT64_C( 2.61) } }, { { SIMDE_FLOAT64_C( 241.57), SIMDE_FLOAT64_C( 342.12), SIMDE_FLOAT64_C( -327.73), SIMDE_FLOAT64_C( -987.48), SIMDE_FLOAT64_C( 764.92), SIMDE_FLOAT64_C( -777.82), SIMDE_FLOAT64_C( -437.75), SIMDE_FLOAT64_C( 101.60) }, { SIMDE_FLOAT64_C( 6.23), SIMDE_FLOAT64_C( 6.99), SIMDE_FLOAT64_C( -6.89), SIMDE_FLOAT64_C( -9.96), SIMDE_FLOAT64_C( 9.15), SIMDE_FLOAT64_C( -9.20), SIMDE_FLOAT64_C( -7.59), SIMDE_FLOAT64_C( 4.67) } }, { { SIMDE_FLOAT64_C( -145.41), SIMDE_FLOAT64_C( 675.27), SIMDE_FLOAT64_C( 148.87), SIMDE_FLOAT64_C( -187.38), SIMDE_FLOAT64_C( -4.75), SIMDE_FLOAT64_C( 522.57), SIMDE_FLOAT64_C( 371.06), SIMDE_FLOAT64_C( 389.00) }, { SIMDE_FLOAT64_C( -5.26), SIMDE_FLOAT64_C( 8.77), SIMDE_FLOAT64_C( 5.30), SIMDE_FLOAT64_C( -5.72), SIMDE_FLOAT64_C( -1.68), SIMDE_FLOAT64_C( 8.05), SIMDE_FLOAT64_C( 7.19), SIMDE_FLOAT64_C( 7.30) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_cbrt_pd(a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_cbrt_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[16]; const simde__mmask8 k; const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 98.98), SIMDE_FLOAT32_C( 913.15), SIMDE_FLOAT32_C( 690.53), SIMDE_FLOAT32_C( -536.23), SIMDE_FLOAT32_C( -147.17), SIMDE_FLOAT32_C( 971.04), SIMDE_FLOAT32_C( -365.05), SIMDE_FLOAT32_C( 728.65), SIMDE_FLOAT32_C( 340.02), SIMDE_FLOAT32_C( -288.54), SIMDE_FLOAT32_C( 298.61), SIMDE_FLOAT32_C( -421.40), SIMDE_FLOAT32_C( 62.04), SIMDE_FLOAT32_C( 962.27), SIMDE_FLOAT32_C( -847.19), SIMDE_FLOAT32_C( -983.83) }, UINT8_C( 93), { SIMDE_FLOAT32_C( -474.95), SIMDE_FLOAT32_C( -938.65), SIMDE_FLOAT32_C( -941.09), SIMDE_FLOAT32_C( 980.71), SIMDE_FLOAT32_C( -613.85), SIMDE_FLOAT32_C( 304.68), SIMDE_FLOAT32_C( -395.19), SIMDE_FLOAT32_C( -357.37), SIMDE_FLOAT32_C( 667.44), SIMDE_FLOAT32_C( 353.93), SIMDE_FLOAT32_C( 659.42), SIMDE_FLOAT32_C( -91.27), SIMDE_FLOAT32_C( -203.61), SIMDE_FLOAT32_C( -908.72), SIMDE_FLOAT32_C( -992.29), SIMDE_FLOAT32_C( -290.45) }, { SIMDE_FLOAT32_C( -7.80), SIMDE_FLOAT32_C( 913.15), SIMDE_FLOAT32_C( -9.80), SIMDE_FLOAT32_C( 9.94), SIMDE_FLOAT32_C( -8.50), SIMDE_FLOAT32_C( 971.04), SIMDE_FLOAT32_C( -7.34), SIMDE_FLOAT32_C( 728.65), SIMDE_FLOAT32_C( 340.02), SIMDE_FLOAT32_C( -288.54), SIMDE_FLOAT32_C( 298.61), SIMDE_FLOAT32_C( -421.40), SIMDE_FLOAT32_C( 62.04), SIMDE_FLOAT32_C( 962.27), SIMDE_FLOAT32_C( -847.19), SIMDE_FLOAT32_C( -983.83) } }, { { SIMDE_FLOAT32_C( 781.81), SIMDE_FLOAT32_C( -528.52), SIMDE_FLOAT32_C( 562.38), SIMDE_FLOAT32_C( 752.86), SIMDE_FLOAT32_C( 106.43), SIMDE_FLOAT32_C( 291.03), SIMDE_FLOAT32_C( 92.88), SIMDE_FLOAT32_C( 817.89), SIMDE_FLOAT32_C( -410.36), SIMDE_FLOAT32_C( 671.48), SIMDE_FLOAT32_C( -120.07), SIMDE_FLOAT32_C( -448.09), SIMDE_FLOAT32_C( 824.29), SIMDE_FLOAT32_C( -103.90), SIMDE_FLOAT32_C( -767.52), SIMDE_FLOAT32_C( -650.66) }, UINT8_C( 13), { SIMDE_FLOAT32_C( -708.61), SIMDE_FLOAT32_C( -669.94), SIMDE_FLOAT32_C( 343.60), SIMDE_FLOAT32_C( 596.08), SIMDE_FLOAT32_C( -65.13), SIMDE_FLOAT32_C( 986.24), SIMDE_FLOAT32_C( 263.52), SIMDE_FLOAT32_C( -711.20), SIMDE_FLOAT32_C( 645.65), SIMDE_FLOAT32_C( -827.76), SIMDE_FLOAT32_C( 85.19), SIMDE_FLOAT32_C( 736.94), SIMDE_FLOAT32_C( -820.04), SIMDE_FLOAT32_C( 794.74), SIMDE_FLOAT32_C( 518.75), SIMDE_FLOAT32_C( -348.56) }, { SIMDE_FLOAT32_C( -8.92), SIMDE_FLOAT32_C( -528.52), SIMDE_FLOAT32_C( 7.00), SIMDE_FLOAT32_C( 8.42), SIMDE_FLOAT32_C( 106.43), SIMDE_FLOAT32_C( 291.03), SIMDE_FLOAT32_C( 92.88), SIMDE_FLOAT32_C( 817.89), SIMDE_FLOAT32_C( -410.36), SIMDE_FLOAT32_C( 671.48), SIMDE_FLOAT32_C( -120.07), SIMDE_FLOAT32_C( -448.09), SIMDE_FLOAT32_C( 824.29), SIMDE_FLOAT32_C( -103.90), SIMDE_FLOAT32_C( -767.52), SIMDE_FLOAT32_C( -650.66) } }, { { SIMDE_FLOAT32_C( 357.12), SIMDE_FLOAT32_C( 271.61), SIMDE_FLOAT32_C( 757.87), SIMDE_FLOAT32_C( -351.85), SIMDE_FLOAT32_C( -635.52), SIMDE_FLOAT32_C( 575.76), SIMDE_FLOAT32_C( 237.78), SIMDE_FLOAT32_C( -964.04), SIMDE_FLOAT32_C( -544.31), SIMDE_FLOAT32_C( 789.69), SIMDE_FLOAT32_C( 860.25), SIMDE_FLOAT32_C( 351.79), SIMDE_FLOAT32_C( -977.83), SIMDE_FLOAT32_C( -790.40), SIMDE_FLOAT32_C( -690.76), SIMDE_FLOAT32_C( -686.43) }, UINT8_C( 57), { SIMDE_FLOAT32_C( 652.85), SIMDE_FLOAT32_C( 909.64), SIMDE_FLOAT32_C( 474.52), SIMDE_FLOAT32_C( 639.08), SIMDE_FLOAT32_C( 173.16), SIMDE_FLOAT32_C( 763.32), SIMDE_FLOAT32_C( 284.74), SIMDE_FLOAT32_C( 345.41), SIMDE_FLOAT32_C( -151.49), SIMDE_FLOAT32_C( 21.68), SIMDE_FLOAT32_C( 525.36), SIMDE_FLOAT32_C( -356.75), SIMDE_FLOAT32_C( -459.57), SIMDE_FLOAT32_C( -823.20), SIMDE_FLOAT32_C( -999.64), SIMDE_FLOAT32_C( 812.03) }, { SIMDE_FLOAT32_C( 8.68), SIMDE_FLOAT32_C( 271.61), SIMDE_FLOAT32_C( 757.87), SIMDE_FLOAT32_C( 8.61), SIMDE_FLOAT32_C( 5.57), SIMDE_FLOAT32_C( 9.14), SIMDE_FLOAT32_C( 237.78), SIMDE_FLOAT32_C( -964.04), SIMDE_FLOAT32_C( -544.31), SIMDE_FLOAT32_C( 789.69), SIMDE_FLOAT32_C( 860.25), SIMDE_FLOAT32_C( 351.79), SIMDE_FLOAT32_C( -977.83), SIMDE_FLOAT32_C( -790.40), SIMDE_FLOAT32_C( -690.76), SIMDE_FLOAT32_C( -686.43) } }, { { SIMDE_FLOAT32_C( 934.67), SIMDE_FLOAT32_C( -351.49), SIMDE_FLOAT32_C( -823.49), SIMDE_FLOAT32_C( 510.43), SIMDE_FLOAT32_C( 886.29), SIMDE_FLOAT32_C( -787.53), SIMDE_FLOAT32_C( 966.12), SIMDE_FLOAT32_C( 675.98), SIMDE_FLOAT32_C( -927.28), SIMDE_FLOAT32_C( 317.91), SIMDE_FLOAT32_C( 698.16), SIMDE_FLOAT32_C( -717.68), SIMDE_FLOAT32_C( 627.15), SIMDE_FLOAT32_C( -988.28), SIMDE_FLOAT32_C( -178.03), SIMDE_FLOAT32_C( 279.99) }, UINT8_C( 81), { SIMDE_FLOAT32_C( -703.51), SIMDE_FLOAT32_C( -80.92), SIMDE_FLOAT32_C( 94.53), SIMDE_FLOAT32_C( -940.19), SIMDE_FLOAT32_C( -796.18), SIMDE_FLOAT32_C( -560.07), SIMDE_FLOAT32_C( -91.68), SIMDE_FLOAT32_C( 225.49), SIMDE_FLOAT32_C( 965.29), SIMDE_FLOAT32_C( 551.56), SIMDE_FLOAT32_C( 765.92), SIMDE_FLOAT32_C( -857.91), SIMDE_FLOAT32_C( 551.93), SIMDE_FLOAT32_C( 577.95), SIMDE_FLOAT32_C( -923.23), SIMDE_FLOAT32_C( -799.56) }, { SIMDE_FLOAT32_C( -8.89), SIMDE_FLOAT32_C( -351.49), SIMDE_FLOAT32_C( -823.49), SIMDE_FLOAT32_C( 510.43), SIMDE_FLOAT32_C( -9.27), SIMDE_FLOAT32_C( -787.53), SIMDE_FLOAT32_C( -4.51), SIMDE_FLOAT32_C( 675.98), SIMDE_FLOAT32_C( -927.28), SIMDE_FLOAT32_C( 317.91), SIMDE_FLOAT32_C( 698.16), SIMDE_FLOAT32_C( -717.68), SIMDE_FLOAT32_C( 627.15), SIMDE_FLOAT32_C( -988.28), SIMDE_FLOAT32_C( -178.03), SIMDE_FLOAT32_C( 279.99) } }, { { SIMDE_FLOAT32_C( 754.46), SIMDE_FLOAT32_C( 587.20), SIMDE_FLOAT32_C( -913.27), SIMDE_FLOAT32_C( 966.93), SIMDE_FLOAT32_C( 553.32), SIMDE_FLOAT32_C( 762.71), SIMDE_FLOAT32_C( -960.34), SIMDE_FLOAT32_C( -128.78), SIMDE_FLOAT32_C( 460.87), SIMDE_FLOAT32_C( -678.02), SIMDE_FLOAT32_C( -501.63), SIMDE_FLOAT32_C( 472.59), SIMDE_FLOAT32_C( 143.95), SIMDE_FLOAT32_C( 778.36), SIMDE_FLOAT32_C( 393.95), SIMDE_FLOAT32_C( 440.44) }, UINT8_C(131), { SIMDE_FLOAT32_C( -511.52), SIMDE_FLOAT32_C( 500.25), SIMDE_FLOAT32_C( -98.74), SIMDE_FLOAT32_C( -71.59), SIMDE_FLOAT32_C( -591.44), SIMDE_FLOAT32_C( -873.25), SIMDE_FLOAT32_C( -106.29), SIMDE_FLOAT32_C( 960.13), SIMDE_FLOAT32_C( 892.67), SIMDE_FLOAT32_C( 35.80), SIMDE_FLOAT32_C( 512.05), SIMDE_FLOAT32_C( 470.62), SIMDE_FLOAT32_C( 112.57), SIMDE_FLOAT32_C( 712.49), SIMDE_FLOAT32_C( 225.08), SIMDE_FLOAT32_C( -300.23) }, { SIMDE_FLOAT32_C( -8.00), SIMDE_FLOAT32_C( 7.94), SIMDE_FLOAT32_C( -913.27), SIMDE_FLOAT32_C( 966.93), SIMDE_FLOAT32_C( 553.32), SIMDE_FLOAT32_C( 762.71), SIMDE_FLOAT32_C( -960.34), SIMDE_FLOAT32_C( 9.87), SIMDE_FLOAT32_C( 460.87), SIMDE_FLOAT32_C( -678.02), SIMDE_FLOAT32_C( -501.63), SIMDE_FLOAT32_C( 472.59), SIMDE_FLOAT32_C( 143.95), SIMDE_FLOAT32_C( 778.36), SIMDE_FLOAT32_C( 393.95), SIMDE_FLOAT32_C( 440.44) } }, { { SIMDE_FLOAT32_C( 799.22), SIMDE_FLOAT32_C( 192.01), SIMDE_FLOAT32_C( -746.92), SIMDE_FLOAT32_C( 561.93), SIMDE_FLOAT32_C( 231.67), SIMDE_FLOAT32_C( 124.30), SIMDE_FLOAT32_C( 22.80), SIMDE_FLOAT32_C( 553.64), SIMDE_FLOAT32_C( 622.67), SIMDE_FLOAT32_C( -504.61), SIMDE_FLOAT32_C( -302.41), SIMDE_FLOAT32_C( 401.04), SIMDE_FLOAT32_C( 889.34), SIMDE_FLOAT32_C( -861.97), SIMDE_FLOAT32_C( -901.52), SIMDE_FLOAT32_C( -622.17) }, UINT8_C( 8), { SIMDE_FLOAT32_C( -0.26), SIMDE_FLOAT32_C( 306.24), SIMDE_FLOAT32_C( -953.16), SIMDE_FLOAT32_C( 126.49), SIMDE_FLOAT32_C( -800.06), SIMDE_FLOAT32_C( -993.04), SIMDE_FLOAT32_C( 19.16), SIMDE_FLOAT32_C( 235.74), SIMDE_FLOAT32_C( 519.02), SIMDE_FLOAT32_C( -510.22), SIMDE_FLOAT32_C( -651.69), SIMDE_FLOAT32_C( 231.50), SIMDE_FLOAT32_C( 714.86), SIMDE_FLOAT32_C( 48.08), SIMDE_FLOAT32_C( 30.72), SIMDE_FLOAT32_C( -93.13) }, { SIMDE_FLOAT32_C( 799.22), SIMDE_FLOAT32_C( 192.01), SIMDE_FLOAT32_C( -746.92), SIMDE_FLOAT32_C( 5.02), SIMDE_FLOAT32_C( 231.67), SIMDE_FLOAT32_C( 124.30), SIMDE_FLOAT32_C( 22.80), SIMDE_FLOAT32_C( 553.64), SIMDE_FLOAT32_C( 622.67), SIMDE_FLOAT32_C( -504.61), SIMDE_FLOAT32_C( -302.41), SIMDE_FLOAT32_C( 401.04), SIMDE_FLOAT32_C( 889.34), SIMDE_FLOAT32_C( -861.97), SIMDE_FLOAT32_C( -901.52), SIMDE_FLOAT32_C( -622.17) } }, { { SIMDE_FLOAT32_C( 301.16), SIMDE_FLOAT32_C( -407.35), SIMDE_FLOAT32_C( -861.46), SIMDE_FLOAT32_C( -574.54), SIMDE_FLOAT32_C( 615.45), SIMDE_FLOAT32_C( 692.19), SIMDE_FLOAT32_C( -951.86), SIMDE_FLOAT32_C( -889.16), SIMDE_FLOAT32_C( -610.22), SIMDE_FLOAT32_C( 449.17), SIMDE_FLOAT32_C( -999.81), SIMDE_FLOAT32_C( -472.20), SIMDE_FLOAT32_C( 547.65), SIMDE_FLOAT32_C( -621.98), SIMDE_FLOAT32_C( -833.92), SIMDE_FLOAT32_C( -452.61) }, UINT8_C( 61), { SIMDE_FLOAT32_C( -787.08), SIMDE_FLOAT32_C( 673.88), SIMDE_FLOAT32_C( 884.20), SIMDE_FLOAT32_C( -780.12), SIMDE_FLOAT32_C( -306.96), SIMDE_FLOAT32_C( 119.94), SIMDE_FLOAT32_C( 738.89), SIMDE_FLOAT32_C( 182.83), SIMDE_FLOAT32_C( 468.25), SIMDE_FLOAT32_C( -29.60), SIMDE_FLOAT32_C( -102.31), SIMDE_FLOAT32_C( -483.67), SIMDE_FLOAT32_C( -998.88), SIMDE_FLOAT32_C( 804.56), SIMDE_FLOAT32_C( 817.49), SIMDE_FLOAT32_C( -406.23) }, { SIMDE_FLOAT32_C( -9.23), SIMDE_FLOAT32_C( -407.35), SIMDE_FLOAT32_C( 9.60), SIMDE_FLOAT32_C( -9.21), SIMDE_FLOAT32_C( -6.75), SIMDE_FLOAT32_C( 4.93), SIMDE_FLOAT32_C( -951.86), SIMDE_FLOAT32_C( -889.16), SIMDE_FLOAT32_C( -610.22), SIMDE_FLOAT32_C( 449.17), SIMDE_FLOAT32_C( -999.81), SIMDE_FLOAT32_C( -472.20), SIMDE_FLOAT32_C( 547.65), SIMDE_FLOAT32_C( -621.98), SIMDE_FLOAT32_C( -833.92), SIMDE_FLOAT32_C( -452.61) } }, { { SIMDE_FLOAT32_C( 943.11), SIMDE_FLOAT32_C( -757.05), SIMDE_FLOAT32_C( -790.77), SIMDE_FLOAT32_C( 635.29), SIMDE_FLOAT32_C( -708.91), SIMDE_FLOAT32_C( -679.93), SIMDE_FLOAT32_C( -974.93), SIMDE_FLOAT32_C( 740.26), SIMDE_FLOAT32_C( -679.74), SIMDE_FLOAT32_C( -447.13), SIMDE_FLOAT32_C( 287.91), SIMDE_FLOAT32_C( -301.72), SIMDE_FLOAT32_C( -281.05), SIMDE_FLOAT32_C( 835.30), SIMDE_FLOAT32_C( -617.47), SIMDE_FLOAT32_C( -68.13) }, UINT8_C(116), { SIMDE_FLOAT32_C( -733.27), SIMDE_FLOAT32_C( 151.75), SIMDE_FLOAT32_C( -797.77), SIMDE_FLOAT32_C( 386.67), SIMDE_FLOAT32_C( -109.36), SIMDE_FLOAT32_C( 385.06), SIMDE_FLOAT32_C( -145.07), SIMDE_FLOAT32_C( 861.04), SIMDE_FLOAT32_C( -717.26), SIMDE_FLOAT32_C( 371.26), SIMDE_FLOAT32_C( 862.16), SIMDE_FLOAT32_C( -912.69), SIMDE_FLOAT32_C( 188.75), SIMDE_FLOAT32_C( -544.07), SIMDE_FLOAT32_C( -969.58), SIMDE_FLOAT32_C( 431.70) }, { SIMDE_FLOAT32_C( 943.11), SIMDE_FLOAT32_C( -757.05), SIMDE_FLOAT32_C( -9.27), SIMDE_FLOAT32_C( 635.29), SIMDE_FLOAT32_C( -4.78), SIMDE_FLOAT32_C( 7.28), SIMDE_FLOAT32_C( -5.25), SIMDE_FLOAT32_C( 740.26), SIMDE_FLOAT32_C( -679.74), SIMDE_FLOAT32_C( -447.13), SIMDE_FLOAT32_C( 287.91), SIMDE_FLOAT32_C( -301.72), SIMDE_FLOAT32_C( -281.05), SIMDE_FLOAT32_C( 835.30), SIMDE_FLOAT32_C( -617.47), SIMDE_FLOAT32_C( -68.13) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_mask_cbrt_ps(src, test_vec[i].k, a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_cbrt_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( -759.76), SIMDE_FLOAT64_C( 815.00), SIMDE_FLOAT64_C( -816.92), SIMDE_FLOAT64_C( 967.48), SIMDE_FLOAT64_C( -635.21), SIMDE_FLOAT64_C( 789.99), SIMDE_FLOAT64_C( -526.03), SIMDE_FLOAT64_C( -914.28) }, UINT8_C( 48), { SIMDE_FLOAT64_C( 53.18), SIMDE_FLOAT64_C( 207.38), SIMDE_FLOAT64_C( -889.97), SIMDE_FLOAT64_C( -694.52), SIMDE_FLOAT64_C( 45.75), SIMDE_FLOAT64_C( 94.09), SIMDE_FLOAT64_C( -391.74), SIMDE_FLOAT64_C( 959.63) }, { SIMDE_FLOAT64_C( -759.76), SIMDE_FLOAT64_C( 815.00), SIMDE_FLOAT64_C( -816.92), SIMDE_FLOAT64_C( 967.48), SIMDE_FLOAT64_C( 3.58), SIMDE_FLOAT64_C( 4.55), SIMDE_FLOAT64_C( -526.03), SIMDE_FLOAT64_C( -914.28) } }, { { SIMDE_FLOAT64_C( 378.72), SIMDE_FLOAT64_C( -982.35), SIMDE_FLOAT64_C( -413.18), SIMDE_FLOAT64_C( 706.92), SIMDE_FLOAT64_C( 679.73), SIMDE_FLOAT64_C( 156.25), SIMDE_FLOAT64_C( 267.05), SIMDE_FLOAT64_C( -563.13) }, UINT8_C( 62), { SIMDE_FLOAT64_C( -595.59), SIMDE_FLOAT64_C( -667.14), SIMDE_FLOAT64_C( -678.76), SIMDE_FLOAT64_C( -24.40), SIMDE_FLOAT64_C( 817.42), SIMDE_FLOAT64_C( -438.52), SIMDE_FLOAT64_C( -209.40), SIMDE_FLOAT64_C( -999.49) }, { SIMDE_FLOAT64_C( 378.72), SIMDE_FLOAT64_C( -8.74), SIMDE_FLOAT64_C( -8.79), SIMDE_FLOAT64_C( -2.90), SIMDE_FLOAT64_C( 9.35), SIMDE_FLOAT64_C( -7.60), SIMDE_FLOAT64_C( 267.05), SIMDE_FLOAT64_C( -563.13) } }, { { SIMDE_FLOAT64_C( -471.03), SIMDE_FLOAT64_C( 155.40), SIMDE_FLOAT64_C( 790.50), SIMDE_FLOAT64_C( 2.94), SIMDE_FLOAT64_C( 241.12), SIMDE_FLOAT64_C( 295.11), SIMDE_FLOAT64_C( -943.89), SIMDE_FLOAT64_C( -551.50) }, UINT8_C(142), { SIMDE_FLOAT64_C( -638.40), SIMDE_FLOAT64_C( 494.25), SIMDE_FLOAT64_C( -500.77), SIMDE_FLOAT64_C( -30.15), SIMDE_FLOAT64_C( 453.88), SIMDE_FLOAT64_C( 877.94), SIMDE_FLOAT64_C( -12.50), SIMDE_FLOAT64_C( -959.30) }, { SIMDE_FLOAT64_C( -471.03), SIMDE_FLOAT64_C( 7.91), SIMDE_FLOAT64_C( -7.94), SIMDE_FLOAT64_C( -3.11), SIMDE_FLOAT64_C( 241.12), SIMDE_FLOAT64_C( 295.11), SIMDE_FLOAT64_C( -943.89), SIMDE_FLOAT64_C( -9.86) } }, { { SIMDE_FLOAT64_C( 584.87), SIMDE_FLOAT64_C( -332.77), SIMDE_FLOAT64_C( 196.95), SIMDE_FLOAT64_C( -148.09), SIMDE_FLOAT64_C( 104.11), SIMDE_FLOAT64_C( -809.90), SIMDE_FLOAT64_C( 256.33), SIMDE_FLOAT64_C( 436.96) }, UINT8_C(231), { SIMDE_FLOAT64_C( -768.07), SIMDE_FLOAT64_C( 254.39), SIMDE_FLOAT64_C( 72.83), SIMDE_FLOAT64_C( 22.53), SIMDE_FLOAT64_C( 254.89), SIMDE_FLOAT64_C( 601.79), SIMDE_FLOAT64_C( -822.07), SIMDE_FLOAT64_C( 45.39) }, { SIMDE_FLOAT64_C( -9.16), SIMDE_FLOAT64_C( 6.34), SIMDE_FLOAT64_C( 4.18), SIMDE_FLOAT64_C( -148.09), SIMDE_FLOAT64_C( 104.11), SIMDE_FLOAT64_C( 8.44), SIMDE_FLOAT64_C( -9.37), SIMDE_FLOAT64_C( 3.57) } }, { { SIMDE_FLOAT64_C( -395.27), SIMDE_FLOAT64_C( 419.05), SIMDE_FLOAT64_C( -659.50), SIMDE_FLOAT64_C( -339.16), SIMDE_FLOAT64_C( 867.55), SIMDE_FLOAT64_C( 745.64), SIMDE_FLOAT64_C( 22.44), SIMDE_FLOAT64_C( 361.79) }, UINT8_C( 20), { SIMDE_FLOAT64_C( 992.29), SIMDE_FLOAT64_C( -184.33), SIMDE_FLOAT64_C( -877.19), SIMDE_FLOAT64_C( -20.21), SIMDE_FLOAT64_C( -143.62), SIMDE_FLOAT64_C( 707.68), SIMDE_FLOAT64_C( 647.03), SIMDE_FLOAT64_C( -946.67) }, { SIMDE_FLOAT64_C( -395.27), SIMDE_FLOAT64_C( 419.05), SIMDE_FLOAT64_C( -9.57), SIMDE_FLOAT64_C( -339.16), SIMDE_FLOAT64_C( -5.24), SIMDE_FLOAT64_C( 745.64), SIMDE_FLOAT64_C( 22.44), SIMDE_FLOAT64_C( 361.79) } }, { { SIMDE_FLOAT64_C( -440.41), SIMDE_FLOAT64_C( -248.87), SIMDE_FLOAT64_C( -756.57), SIMDE_FLOAT64_C( 815.92), SIMDE_FLOAT64_C( -811.90), SIMDE_FLOAT64_C( -245.23), SIMDE_FLOAT64_C( -952.16), SIMDE_FLOAT64_C( 442.48) }, UINT8_C( 34), { SIMDE_FLOAT64_C( 70.37), SIMDE_FLOAT64_C( -302.63), SIMDE_FLOAT64_C( 429.40), SIMDE_FLOAT64_C( 248.30), SIMDE_FLOAT64_C( 742.77), SIMDE_FLOAT64_C( -965.87), SIMDE_FLOAT64_C( -332.65), SIMDE_FLOAT64_C( -916.73) }, { SIMDE_FLOAT64_C( -440.41), SIMDE_FLOAT64_C( -6.71), SIMDE_FLOAT64_C( -756.57), SIMDE_FLOAT64_C( 815.92), SIMDE_FLOAT64_C( -811.90), SIMDE_FLOAT64_C( -9.88), SIMDE_FLOAT64_C( -952.16), SIMDE_FLOAT64_C( 442.48) } }, { { SIMDE_FLOAT64_C( -305.03), SIMDE_FLOAT64_C( -465.11), SIMDE_FLOAT64_C( 828.91), SIMDE_FLOAT64_C( 717.41), SIMDE_FLOAT64_C( 896.69), SIMDE_FLOAT64_C( -926.23), SIMDE_FLOAT64_C( 709.70), SIMDE_FLOAT64_C( -287.64) }, UINT8_C( 68), { SIMDE_FLOAT64_C( -310.50), SIMDE_FLOAT64_C( 568.74), SIMDE_FLOAT64_C( 904.26), SIMDE_FLOAT64_C( -663.47), SIMDE_FLOAT64_C( 622.07), SIMDE_FLOAT64_C( -536.15), SIMDE_FLOAT64_C( 87.66), SIMDE_FLOAT64_C( 865.50) }, { SIMDE_FLOAT64_C( -305.03), SIMDE_FLOAT64_C( -465.11), SIMDE_FLOAT64_C( 9.67), SIMDE_FLOAT64_C( 717.41), SIMDE_FLOAT64_C( 896.69), SIMDE_FLOAT64_C( -926.23), SIMDE_FLOAT64_C( 4.44), SIMDE_FLOAT64_C( -287.64) } }, { { SIMDE_FLOAT64_C( -720.23), SIMDE_FLOAT64_C( 275.76), SIMDE_FLOAT64_C( -379.73), SIMDE_FLOAT64_C( -672.39), SIMDE_FLOAT64_C( -281.76), SIMDE_FLOAT64_C( -552.12), SIMDE_FLOAT64_C( 397.98), SIMDE_FLOAT64_C( 415.61) }, UINT8_C(204), { SIMDE_FLOAT64_C( -353.72), SIMDE_FLOAT64_C( 158.38), SIMDE_FLOAT64_C( 911.40), SIMDE_FLOAT64_C( 313.63), SIMDE_FLOAT64_C( 241.65), SIMDE_FLOAT64_C( -393.63), SIMDE_FLOAT64_C( 848.52), SIMDE_FLOAT64_C( 70.56) }, { SIMDE_FLOAT64_C( -720.23), SIMDE_FLOAT64_C( 275.76), SIMDE_FLOAT64_C( 9.70), SIMDE_FLOAT64_C( 6.79), SIMDE_FLOAT64_C( -281.76), SIMDE_FLOAT64_C( -552.12), SIMDE_FLOAT64_C( 9.47), SIMDE_FLOAT64_C( 4.13) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_mask_cbrt_pd(src, test_vec[i].k, a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_cos_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 467.76), SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( -754.38), SIMDE_FLOAT32_C( 346.63)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( -0.47), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( -0.66), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.49)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -660.80), SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( 696.87)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( -0.61), SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( -0.96), SIMDE_FLOAT32_C( 0.85)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( 395.92), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 339.21), SIMDE_FLOAT32_C( 532.35), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( 780.64), SIMDE_FLOAT32_C( -30.79), SIMDE_FLOAT32_C( -770.35), SIMDE_FLOAT32_C( 443.48), SIMDE_FLOAT32_C( -583.60), SIMDE_FLOAT32_C( 380.46), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 993.90), SIMDE_FLOAT32_C( 28.08), SIMDE_FLOAT32_C( 841.21)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.09), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( -0.87), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( -0.95), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( 0.74)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 469.66), SIMDE_FLOAT32_C( 680.02), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 818.66), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 600.47), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( 254.31), SIMDE_FLOAT32_C( -203.65), SIMDE_FLOAT32_C( -80.73), SIMDE_FLOAT32_C( 336.73), SIMDE_FLOAT32_C( -944.78), SIMDE_FLOAT32_C( -747.59), SIMDE_FLOAT32_C( -767.23), SIMDE_FLOAT32_C( -554.19), SIMDE_FLOAT32_C( 398.82)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( -0.51), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( -0.91), SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( -0.67), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( -0.99)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -178.99), SIMDE_FLOAT32_C( 758.79), SIMDE_FLOAT32_C( 324.62), SIMDE_FLOAT32_C( 343.48), SIMDE_FLOAT32_C( -874.31), SIMDE_FLOAT32_C( -797.92), SIMDE_FLOAT32_C( -328.54), SIMDE_FLOAT32_C( -525.83), SIMDE_FLOAT32_C( -192.31), SIMDE_FLOAT32_C( -822.65), SIMDE_FLOAT32_C( 561.36), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( -70.91), SIMDE_FLOAT32_C( 543.35), SIMDE_FLOAT32_C( 120.65), SIMDE_FLOAT32_C( -171.51)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( -0.51), SIMDE_FLOAT32_C( -0.50), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -0.24), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( -0.22), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( -0.29)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 690.12), SIMDE_FLOAT32_C( 840.65), SIMDE_FLOAT32_C( -21.09), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( -448.89), SIMDE_FLOAT32_C( 731.49), SIMDE_FLOAT32_C( 505.79), SIMDE_FLOAT32_C( 623.70), SIMDE_FLOAT32_C( 831.02), SIMDE_FLOAT32_C( 140.67), SIMDE_FLOAT32_C( 977.36), SIMDE_FLOAT32_C( -906.16), SIMDE_FLOAT32_C( 331.34), SIMDE_FLOAT32_C( 99.93), SIMDE_FLOAT32_C( 462.95), SIMDE_FLOAT32_C( -738.19)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( -0.62), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -0.09), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( -0.95), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( -1.00)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -769.09), SIMDE_FLOAT32_C( 822.06), SIMDE_FLOAT32_C( -573.81), SIMDE_FLOAT32_C( -997.63), SIMDE_FLOAT32_C( -337.60), SIMDE_FLOAT32_C( 923.64), SIMDE_FLOAT32_C( 293.64), SIMDE_FLOAT32_C( -768.12), SIMDE_FLOAT32_C( -576.22), SIMDE_FLOAT32_C( -67.64), SIMDE_FLOAT32_C( 710.38), SIMDE_FLOAT32_C( 977.49), SIMDE_FLOAT32_C( -756.42), SIMDE_FLOAT32_C( 424.81), SIMDE_FLOAT32_C( 27.25), SIMDE_FLOAT32_C( -95.15)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.83), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( -0.12), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.26), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( -0.90), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( 0.62)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -438.19), SIMDE_FLOAT32_C( -359.76), SIMDE_FLOAT32_C( -752.43), SIMDE_FLOAT32_C( -33.67), SIMDE_FLOAT32_C( 932.66), SIMDE_FLOAT32_C( 7.27), SIMDE_FLOAT32_C( -327.22), SIMDE_FLOAT32_C( -125.20), SIMDE_FLOAT32_C( -182.45), SIMDE_FLOAT32_C( 39.93), SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( 394.67), SIMDE_FLOAT32_C( 14.34), SIMDE_FLOAT32_C( -304.73), SIMDE_FLOAT32_C( 916.26), SIMDE_FLOAT32_C( -696.69)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.06), SIMDE_FLOAT32_C( -0.05), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( -0.63), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( -0.61), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( -0.20), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.74)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_cos_ps(test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_cos_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 src; simde__mmask16 k; simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -660.80), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( 696.87), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( 467.76), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( 346.63)), UINT16_C(41466), simde_mm512_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( -754.38)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( -660.80), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -0.96), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( -0.66), SIMDE_FLOAT32_C( 346.63)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 469.66), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( -203.65), SIMDE_FLOAT32_C( 336.73), SIMDE_FLOAT32_C( -747.59), SIMDE_FLOAT32_C( -554.19), SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 532.35), SIMDE_FLOAT32_C( 780.64), SIMDE_FLOAT32_C( -770.35), SIMDE_FLOAT32_C( -583.60), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 28.08)), UINT16_C(36797), simde_mm512_set_ps(SIMDE_FLOAT32_C( -171.51), SIMDE_FLOAT32_C( 680.02), SIMDE_FLOAT32_C( 818.66), SIMDE_FLOAT32_C( 600.47), SIMDE_FLOAT32_C( 254.31), SIMDE_FLOAT32_C( -80.73), SIMDE_FLOAT32_C( -944.78), SIMDE_FLOAT32_C( -767.23), SIMDE_FLOAT32_C( 398.82), SIMDE_FLOAT32_C( 395.92), SIMDE_FLOAT32_C( 339.21), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( -30.79), SIMDE_FLOAT32_C( 443.48), SIMDE_FLOAT32_C( 380.46), SIMDE_FLOAT32_C( 993.90)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.29), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( -0.67), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( -0.87), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 0.40)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -95.15), SIMDE_FLOAT32_C( 840.65), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( 731.49), SIMDE_FLOAT32_C( 623.70), SIMDE_FLOAT32_C( 140.67), SIMDE_FLOAT32_C( -906.16), SIMDE_FLOAT32_C( 99.93), SIMDE_FLOAT32_C( -738.19), SIMDE_FLOAT32_C( 758.79), SIMDE_FLOAT32_C( 343.48), SIMDE_FLOAT32_C( -797.92), SIMDE_FLOAT32_C( -525.83), SIMDE_FLOAT32_C( -822.65), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( 543.35)), UINT16_C(16804), simde_mm512_set_ps(SIMDE_FLOAT32_C( 27.25), SIMDE_FLOAT32_C( 690.12), SIMDE_FLOAT32_C( -21.09), SIMDE_FLOAT32_C( -448.89), SIMDE_FLOAT32_C( 505.79), SIMDE_FLOAT32_C( 831.02), SIMDE_FLOAT32_C( 977.36), SIMDE_FLOAT32_C( 331.34), SIMDE_FLOAT32_C( 462.95), SIMDE_FLOAT32_C( -178.99), SIMDE_FLOAT32_C( 324.62), SIMDE_FLOAT32_C( -874.31), SIMDE_FLOAT32_C( -328.54), SIMDE_FLOAT32_C( -192.31), SIMDE_FLOAT32_C( 561.36), SIMDE_FLOAT32_C( -70.91)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -95.15), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( 731.49), SIMDE_FLOAT32_C( 623.70), SIMDE_FLOAT32_C( 140.67), SIMDE_FLOAT32_C( -906.16), SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 758.79), SIMDE_FLOAT32_C( -0.51), SIMDE_FLOAT32_C( -797.92), SIMDE_FLOAT32_C( -525.83), SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( 543.35)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -348.70), SIMDE_FLOAT32_C( -438.19), SIMDE_FLOAT32_C( -752.43), SIMDE_FLOAT32_C( 932.66), SIMDE_FLOAT32_C( -327.22), SIMDE_FLOAT32_C( -182.45), SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( 14.34), SIMDE_FLOAT32_C( 916.26), SIMDE_FLOAT32_C( -769.09), SIMDE_FLOAT32_C( -573.81), SIMDE_FLOAT32_C( -337.60), SIMDE_FLOAT32_C( 293.64), SIMDE_FLOAT32_C( -576.22), SIMDE_FLOAT32_C( 710.38), SIMDE_FLOAT32_C( -756.42)), UINT16_C( 2107), simde_mm512_set_ps(SIMDE_FLOAT32_C( 897.27), SIMDE_FLOAT32_C( -197.89), SIMDE_FLOAT32_C( -359.76), SIMDE_FLOAT32_C( -33.67), SIMDE_FLOAT32_C( 7.27), SIMDE_FLOAT32_C( -125.20), SIMDE_FLOAT32_C( 39.93), SIMDE_FLOAT32_C( 394.67), SIMDE_FLOAT32_C( -304.73), SIMDE_FLOAT32_C( -696.69), SIMDE_FLOAT32_C( 822.06), SIMDE_FLOAT32_C( -997.63), SIMDE_FLOAT32_C( 923.64), SIMDE_FLOAT32_C( -768.12), SIMDE_FLOAT32_C( -67.64), SIMDE_FLOAT32_C( 977.49)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -348.70), SIMDE_FLOAT32_C( -438.19), SIMDE_FLOAT32_C( -752.43), SIMDE_FLOAT32_C( 932.66), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( -182.45), SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( 14.34), SIMDE_FLOAT32_C( 916.26), SIMDE_FLOAT32_C( -769.09), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -576.22), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( -0.90)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -15.61), SIMDE_FLOAT32_C( -737.13), SIMDE_FLOAT32_C( -314.93), SIMDE_FLOAT32_C( 177.92), SIMDE_FLOAT32_C( 345.93), SIMDE_FLOAT32_C( 888.71), SIMDE_FLOAT32_C( 915.71), SIMDE_FLOAT32_C( 133.52), SIMDE_FLOAT32_C( 484.94), SIMDE_FLOAT32_C( -598.06), SIMDE_FLOAT32_C( -791.07), SIMDE_FLOAT32_C( -765.93), SIMDE_FLOAT32_C( 221.37), SIMDE_FLOAT32_C( -788.36), SIMDE_FLOAT32_C( -775.04), SIMDE_FLOAT32_C( 440.64)), UINT16_C(22274), simde_mm512_set_ps(SIMDE_FLOAT32_C( 496.57), SIMDE_FLOAT32_C( 915.19), SIMDE_FLOAT32_C( -718.40), SIMDE_FLOAT32_C( 159.97), SIMDE_FLOAT32_C( -861.01), SIMDE_FLOAT32_C( 426.61), SIMDE_FLOAT32_C( 932.11), SIMDE_FLOAT32_C( 110.36), SIMDE_FLOAT32_C( 826.84), SIMDE_FLOAT32_C( -76.75), SIMDE_FLOAT32_C( 237.58), SIMDE_FLOAT32_C( -378.50), SIMDE_FLOAT32_C( -601.68), SIMDE_FLOAT32_C( -623.50), SIMDE_FLOAT32_C( -942.47), SIMDE_FLOAT32_C( 475.51)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -15.61), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( -314.93), SIMDE_FLOAT32_C( -0.97), SIMDE_FLOAT32_C( 345.93), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( -0.59), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( 484.94), SIMDE_FLOAT32_C( -598.06), SIMDE_FLOAT32_C( -791.07), SIMDE_FLOAT32_C( -765.93), SIMDE_FLOAT32_C( 221.37), SIMDE_FLOAT32_C( -788.36), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 440.64)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 883.05), SIMDE_FLOAT32_C( -807.28), SIMDE_FLOAT32_C( -70.05), SIMDE_FLOAT32_C( -784.34), SIMDE_FLOAT32_C( 92.52), SIMDE_FLOAT32_C( 206.60), SIMDE_FLOAT32_C( 834.60), SIMDE_FLOAT32_C( -65.60), SIMDE_FLOAT32_C( -286.07), SIMDE_FLOAT32_C( -212.86), SIMDE_FLOAT32_C( -318.38), SIMDE_FLOAT32_C( 783.48), SIMDE_FLOAT32_C( -628.82), SIMDE_FLOAT32_C( 556.35), SIMDE_FLOAT32_C( 439.43), SIMDE_FLOAT32_C( 434.03)), UINT16_C(27396), simde_mm512_set_ps(SIMDE_FLOAT32_C( -964.25), SIMDE_FLOAT32_C( -406.33), SIMDE_FLOAT32_C( -743.66), SIMDE_FLOAT32_C( -764.58), SIMDE_FLOAT32_C( 789.89), SIMDE_FLOAT32_C( 4.83), SIMDE_FLOAT32_C( -818.54), SIMDE_FLOAT32_C( 161.06), SIMDE_FLOAT32_C( 579.25), SIMDE_FLOAT32_C( -11.78), SIMDE_FLOAT32_C( -308.52), SIMDE_FLOAT32_C( -719.57), SIMDE_FLOAT32_C( 334.00), SIMDE_FLOAT32_C( 274.71), SIMDE_FLOAT32_C( -916.82), SIMDE_FLOAT32_C( -490.00)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 883.05), SIMDE_FLOAT32_C( -0.48), SIMDE_FLOAT32_C( -0.62), SIMDE_FLOAT32_C( -784.34), SIMDE_FLOAT32_C( -0.22), SIMDE_FLOAT32_C( 206.60), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( -0.67), SIMDE_FLOAT32_C( -286.07), SIMDE_FLOAT32_C( -212.86), SIMDE_FLOAT32_C( -318.38), SIMDE_FLOAT32_C( 783.48), SIMDE_FLOAT32_C( -628.82), SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( 439.43), SIMDE_FLOAT32_C( 434.03)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 529.63), SIMDE_FLOAT32_C( -24.89), SIMDE_FLOAT32_C( -967.78), SIMDE_FLOAT32_C( 638.94), SIMDE_FLOAT32_C( 450.90), SIMDE_FLOAT32_C( -771.54), SIMDE_FLOAT32_C( 105.79), SIMDE_FLOAT32_C( 590.10), SIMDE_FLOAT32_C( 30.91), SIMDE_FLOAT32_C( 635.35), SIMDE_FLOAT32_C( -84.00), SIMDE_FLOAT32_C( 80.04), SIMDE_FLOAT32_C( -709.46), SIMDE_FLOAT32_C( 607.86), SIMDE_FLOAT32_C( 394.58), SIMDE_FLOAT32_C( -889.11)), UINT16_C( 953), simde_mm512_set_ps(SIMDE_FLOAT32_C( 18.75), SIMDE_FLOAT32_C( 809.05), SIMDE_FLOAT32_C( 144.05), SIMDE_FLOAT32_C( -427.72), SIMDE_FLOAT32_C( 308.28), SIMDE_FLOAT32_C( -177.05), SIMDE_FLOAT32_C( -457.77), SIMDE_FLOAT32_C( 678.24), SIMDE_FLOAT32_C( 66.05), SIMDE_FLOAT32_C( -267.71), SIMDE_FLOAT32_C( 117.28), SIMDE_FLOAT32_C( -576.80), SIMDE_FLOAT32_C( -38.39), SIMDE_FLOAT32_C( -250.14), SIMDE_FLOAT32_C( -53.92), SIMDE_FLOAT32_C( 91.94)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 529.63), SIMDE_FLOAT32_C( -24.89), SIMDE_FLOAT32_C( -967.78), SIMDE_FLOAT32_C( 638.94), SIMDE_FLOAT32_C( 450.90), SIMDE_FLOAT32_C( -771.54), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 635.35), SIMDE_FLOAT32_C( -0.51), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( 607.86), SIMDE_FLOAT32_C( 394.58), SIMDE_FLOAT32_C( -0.67)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -788.39), SIMDE_FLOAT32_C( 330.43), SIMDE_FLOAT32_C( -493.41), SIMDE_FLOAT32_C( 822.72), SIMDE_FLOAT32_C( 956.68), SIMDE_FLOAT32_C( 954.62), SIMDE_FLOAT32_C( 825.49), SIMDE_FLOAT32_C( -816.27), SIMDE_FLOAT32_C( -209.34), SIMDE_FLOAT32_C( -933.21), SIMDE_FLOAT32_C( -728.70), SIMDE_FLOAT32_C( -420.06), SIMDE_FLOAT32_C( 100.32), SIMDE_FLOAT32_C( 103.15), SIMDE_FLOAT32_C( 439.77), SIMDE_FLOAT32_C( -204.33)), UINT16_C(12713), simde_mm512_set_ps(SIMDE_FLOAT32_C( -841.43), SIMDE_FLOAT32_C( -14.16), SIMDE_FLOAT32_C( 824.88), SIMDE_FLOAT32_C( 793.63), SIMDE_FLOAT32_C( -736.75), SIMDE_FLOAT32_C( -310.57), SIMDE_FLOAT32_C( 728.87), SIMDE_FLOAT32_C( -350.72), SIMDE_FLOAT32_C( 60.89), SIMDE_FLOAT32_C( 109.81), SIMDE_FLOAT32_C( 715.94), SIMDE_FLOAT32_C( -250.60), SIMDE_FLOAT32_C( 944.14), SIMDE_FLOAT32_C( 361.85), SIMDE_FLOAT32_C( -13.07), SIMDE_FLOAT32_C( 852.60)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -788.39), SIMDE_FLOAT32_C( 330.43), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.37), SIMDE_FLOAT32_C( 956.68), SIMDE_FLOAT32_C( 954.62), SIMDE_FLOAT32_C( 825.49), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( -933.21), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( -420.06), SIMDE_FLOAT32_C( -0.09), SIMDE_FLOAT32_C( 103.15), SIMDE_FLOAT32_C( 439.77), SIMDE_FLOAT32_C( -0.34)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mask_cos_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_cos_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( 670.24), SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( 34.06), SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( 39.01), SIMDE_FLOAT64_C( -754.38), SIMDE_FLOAT64_C( 346.63)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.59), SIMDE_FLOAT64_C( -0.47), SIMDE_FLOAT64_C( -0.54), SIMDE_FLOAT64_C( -0.88), SIMDE_FLOAT64_C( -0.66), SIMDE_FLOAT64_C( 0.26), SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( 0.49)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( -686.13), SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 571.46), SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 467.76)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( 0.30), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 0.95), SIMDE_FLOAT64_C( -0.76), SIMDE_FLOAT64_C( 0.33), SIMDE_FLOAT64_C( 0.75), SIMDE_FLOAT64_C( -0.94)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( 28.47), SIMDE_FLOAT64_C( -384.03), SIMDE_FLOAT64_C( -923.64), SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -860.95), SIMDE_FLOAT64_C( -417.54), SIMDE_FLOAT64_C( 696.87)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.27), SIMDE_FLOAT64_C( -0.98), SIMDE_FLOAT64_C( 0.73), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -0.94), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( -0.96), SIMDE_FLOAT64_C( 0.85)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 178.20), SIMDE_FLOAT64_C( -450.67), SIMDE_FLOAT64_C( 233.37), SIMDE_FLOAT64_C( 687.09), SIMDE_FLOAT64_C( 261.31), SIMDE_FLOAT64_C( -212.54), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -660.80)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.64), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( 0.63), SIMDE_FLOAT64_C( -0.61), SIMDE_FLOAT64_C( -0.85), SIMDE_FLOAT64_C( 0.46), SIMDE_FLOAT64_C( -0.88), SIMDE_FLOAT64_C( 0.48)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -770.35), SIMDE_FLOAT64_C( 443.48), SIMDE_FLOAT64_C( -583.60), SIMDE_FLOAT64_C( 380.46), SIMDE_FLOAT64_C( -770.72), SIMDE_FLOAT64_C( 993.90), SIMDE_FLOAT64_C( 28.08), SIMDE_FLOAT64_C( 841.21)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.79), SIMDE_FLOAT64_C( -0.87), SIMDE_FLOAT64_C( 0.74), SIMDE_FLOAT64_C( -0.95), SIMDE_FLOAT64_C( -0.51), SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( -0.98), SIMDE_FLOAT64_C( 0.74)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -387.90), SIMDE_FLOAT64_C( 395.92), SIMDE_FLOAT64_C( 655.87), SIMDE_FLOAT64_C( 339.21), SIMDE_FLOAT64_C( 532.35), SIMDE_FLOAT64_C( -263.99), SIMDE_FLOAT64_C( 780.64), SIMDE_FLOAT64_C( -30.79)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.09), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( 0.81)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -203.65), SIMDE_FLOAT64_C( -80.73), SIMDE_FLOAT64_C( 336.73), SIMDE_FLOAT64_C( -944.78), SIMDE_FLOAT64_C( -747.59), SIMDE_FLOAT64_C( -767.23), SIMDE_FLOAT64_C( -554.19), SIMDE_FLOAT64_C( 398.82)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.85), SIMDE_FLOAT64_C( 0.58), SIMDE_FLOAT64_C( -0.84), SIMDE_FLOAT64_C( -0.67), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( 0.30), SIMDE_FLOAT64_C( -0.99)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 469.66), SIMDE_FLOAT64_C( 680.02), SIMDE_FLOAT64_C( -148.69), SIMDE_FLOAT64_C( 818.66), SIMDE_FLOAT64_C( 910.03), SIMDE_FLOAT64_C( 600.47), SIMDE_FLOAT64_C( 791.23), SIMDE_FLOAT64_C( 254.31)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.01), SIMDE_FLOAT64_C( 0.13), SIMDE_FLOAT64_C( -0.51), SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 0.51), SIMDE_FLOAT64_C( -0.91), SIMDE_FLOAT64_C( 0.90), SIMDE_FLOAT64_C( -0.99)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_cos_pd(test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_cos_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d src; simde__mmask8 k; simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( -686.13), SIMDE_FLOAT64_C( 571.46), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( 467.76), SIMDE_FLOAT64_C( 670.24), SIMDE_FLOAT64_C( 34.06), SIMDE_FLOAT64_C( 39.01), SIMDE_FLOAT64_C( 346.63)), UINT8_C(139), simde_mm512_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( -754.38)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( 571.46), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( 467.76), SIMDE_FLOAT64_C( 0.59), SIMDE_FLOAT64_C( 34.06), SIMDE_FLOAT64_C( -0.66), SIMDE_FLOAT64_C( 0.92)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 178.20), SIMDE_FLOAT64_C( 233.37), SIMDE_FLOAT64_C( 261.31), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( -384.03), SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -417.54)), UINT8_C(229), simde_mm512_set_pd(SIMDE_FLOAT64_C( 841.21), SIMDE_FLOAT64_C( -450.67), SIMDE_FLOAT64_C( 687.09), SIMDE_FLOAT64_C( -212.54), SIMDE_FLOAT64_C( -660.80), SIMDE_FLOAT64_C( 28.47), SIMDE_FLOAT64_C( -923.64), SIMDE_FLOAT64_C( -860.95)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.74), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( -0.61), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( -0.98), SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( 0.99)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 398.82), SIMDE_FLOAT64_C( 395.92), SIMDE_FLOAT64_C( 339.21), SIMDE_FLOAT64_C( -263.99), SIMDE_FLOAT64_C( -30.79), SIMDE_FLOAT64_C( 443.48), SIMDE_FLOAT64_C( 380.46), SIMDE_FLOAT64_C( 993.90)), UINT8_C(253), simde_mm512_set_pd(SIMDE_FLOAT64_C( -554.19), SIMDE_FLOAT64_C( -387.90), SIMDE_FLOAT64_C( 655.87), SIMDE_FLOAT64_C( 532.35), SIMDE_FLOAT64_C( 780.64), SIMDE_FLOAT64_C( -770.35), SIMDE_FLOAT64_C( -583.60), SIMDE_FLOAT64_C( -770.72)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.30), SIMDE_FLOAT64_C( -0.09), SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( -0.79), SIMDE_FLOAT64_C( 380.46), SIMDE_FLOAT64_C( -0.51)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 120.65), SIMDE_FLOAT64_C( 469.66), SIMDE_FLOAT64_C( -148.69), SIMDE_FLOAT64_C( 910.03), SIMDE_FLOAT64_C( 791.23), SIMDE_FLOAT64_C( -203.65), SIMDE_FLOAT64_C( 336.73), SIMDE_FLOAT64_C( -747.59)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 543.35), SIMDE_FLOAT64_C( -171.51), SIMDE_FLOAT64_C( 680.02), SIMDE_FLOAT64_C( 818.66), SIMDE_FLOAT64_C( 600.47), SIMDE_FLOAT64_C( 254.31), SIMDE_FLOAT64_C( -80.73), SIMDE_FLOAT64_C( -944.78)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 120.65), SIMDE_FLOAT64_C( -0.29), SIMDE_FLOAT64_C( -148.69), SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( -0.91), SIMDE_FLOAT64_C( -0.99), SIMDE_FLOAT64_C( 336.73), SIMDE_FLOAT64_C( -0.67)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 99.93), SIMDE_FLOAT64_C( -738.19), SIMDE_FLOAT64_C( 758.79), SIMDE_FLOAT64_C( 343.48), SIMDE_FLOAT64_C( -797.92), SIMDE_FLOAT64_C( -525.83), SIMDE_FLOAT64_C( -822.65), SIMDE_FLOAT64_C( 655.67)), UINT8_C(145), simde_mm512_set_pd(SIMDE_FLOAT64_C( 331.34), SIMDE_FLOAT64_C( 462.95), SIMDE_FLOAT64_C( -178.99), SIMDE_FLOAT64_C( 324.62), SIMDE_FLOAT64_C( -874.31), SIMDE_FLOAT64_C( -328.54), SIMDE_FLOAT64_C( -192.31), SIMDE_FLOAT64_C( 561.36)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.10), SIMDE_FLOAT64_C( -738.19), SIMDE_FLOAT64_C( 758.79), SIMDE_FLOAT64_C( -0.51), SIMDE_FLOAT64_C( -797.92), SIMDE_FLOAT64_C( -525.83), SIMDE_FLOAT64_C( -822.65), SIMDE_FLOAT64_C( -0.55)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -756.42), SIMDE_FLOAT64_C( 27.25), SIMDE_FLOAT64_C( 690.12), SIMDE_FLOAT64_C( -21.09), SIMDE_FLOAT64_C( -448.89), SIMDE_FLOAT64_C( 505.79), SIMDE_FLOAT64_C( 831.02), SIMDE_FLOAT64_C( 977.36)), UINT8_C( 75), simde_mm512_set_pd(SIMDE_FLOAT64_C( 977.49), SIMDE_FLOAT64_C( 424.81), SIMDE_FLOAT64_C( -95.15), SIMDE_FLOAT64_C( 840.65), SIMDE_FLOAT64_C( -591.56), SIMDE_FLOAT64_C( 731.49), SIMDE_FLOAT64_C( 623.70), SIMDE_FLOAT64_C( 140.67)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -756.42), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( 690.12), SIMDE_FLOAT64_C( -21.09), SIMDE_FLOAT64_C( 0.59), SIMDE_FLOAT64_C( 505.79), SIMDE_FLOAT64_C( -0.09), SIMDE_FLOAT64_C( -0.76)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 394.67), SIMDE_FLOAT64_C( -304.73), SIMDE_FLOAT64_C( -696.69), SIMDE_FLOAT64_C( 822.06), SIMDE_FLOAT64_C( -997.63), SIMDE_FLOAT64_C( 923.64), SIMDE_FLOAT64_C( -768.12), SIMDE_FLOAT64_C( -67.64)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 510.85), SIMDE_FLOAT64_C( 14.34), SIMDE_FLOAT64_C( 916.26), SIMDE_FLOAT64_C( -769.09), SIMDE_FLOAT64_C( -573.81), SIMDE_FLOAT64_C( -337.60), SIMDE_FLOAT64_C( 293.64), SIMDE_FLOAT64_C( -576.22)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 394.67), SIMDE_FLOAT64_C( -0.20), SIMDE_FLOAT64_C( -696.69), SIMDE_FLOAT64_C( -0.83), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( -0.12), SIMDE_FLOAT64_C( -768.12), SIMDE_FLOAT64_C( -0.26)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 475.51), SIMDE_FLOAT64_C( 936.65), SIMDE_FLOAT64_C( -348.70), SIMDE_FLOAT64_C( -438.19), SIMDE_FLOAT64_C( -752.43), SIMDE_FLOAT64_C( 932.66), SIMDE_FLOAT64_C( -327.22), SIMDE_FLOAT64_C( -182.45)), UINT8_C(213), simde_mm512_set_pd(SIMDE_FLOAT64_C( -775.04), SIMDE_FLOAT64_C( 440.64), SIMDE_FLOAT64_C( 897.27), SIMDE_FLOAT64_C( -197.89), SIMDE_FLOAT64_C( -359.76), SIMDE_FLOAT64_C( -33.67), SIMDE_FLOAT64_C( 7.27), SIMDE_FLOAT64_C( -125.20)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.60), SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( -348.70), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -752.43), SIMDE_FLOAT64_C( -0.63), SIMDE_FLOAT64_C( -327.22), SIMDE_FLOAT64_C( 0.89)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mask_cos_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_cosd_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( -754.38), SIMDE_FLOAT32_C( 346.63)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.97)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( 34.06)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( 0.83)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 467.76)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( -0.31)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 571.46)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( -0.85)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( 696.87)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.92)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -923.64)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( -0.92)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -660.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( -0.23), SIMDE_FLOAT32_C( 0.51)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 687.09)), simde_mm_set_ps(SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( 0.84)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_cosd_ps(test_vec[i].a); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_cosd_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -754.38), SIMDE_FLOAT64_C( 346.63)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 0.97)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( 39.01)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.99), SIMDE_FLOAT64_C( 0.78)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( 34.06)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.46), SIMDE_FLOAT64_C( 0.83)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( 670.24)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.74), SIMDE_FLOAT64_C( 0.65)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 467.76)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.01), SIMDE_FLOAT64_C( -0.31)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( 422.21)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 0.47)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 571.46)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.09), SIMDE_FLOAT64_C( -0.85)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( -686.13)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.75), SIMDE_FLOAT64_C( 0.83)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_cosd_pd(test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_cosd_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( -754.38), SIMDE_FLOAT32_C( 346.63)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.97)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 467.76)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( -0.31)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( 696.87)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.92)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -660.80)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( -0.23), SIMDE_FLOAT32_C( 0.51)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -770.35), SIMDE_FLOAT32_C( 443.48), SIMDE_FLOAT32_C( -583.60), SIMDE_FLOAT32_C( 380.46), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 993.90), SIMDE_FLOAT32_C( 28.08), SIMDE_FLOAT32_C( 841.21)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( -0.72), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( -0.52)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( 395.92), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 339.21), SIMDE_FLOAT32_C( 532.35), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( 780.64), SIMDE_FLOAT32_C( -30.79)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 0.86)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -203.65), SIMDE_FLOAT32_C( -80.73), SIMDE_FLOAT32_C( 336.73), SIMDE_FLOAT32_C( -944.78), SIMDE_FLOAT32_C( -747.59), SIMDE_FLOAT32_C( -767.23), SIMDE_FLOAT32_C( -554.19), SIMDE_FLOAT32_C( 398.82)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( -0.71), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( -0.97), SIMDE_FLOAT32_C( 0.78)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 469.66), SIMDE_FLOAT32_C( 680.02), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 818.66), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 600.47), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( 254.31)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( -0.49), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( -0.27)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_cosd_ps(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_cosd_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( 39.01), SIMDE_FLOAT64_C( -754.38), SIMDE_FLOAT64_C( 346.63)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.99), SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 0.97)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( 670.24), SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( 34.06)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.74), SIMDE_FLOAT64_C( 0.65), SIMDE_FLOAT64_C( 0.46), SIMDE_FLOAT64_C( 0.83)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 467.76)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( -0.01), SIMDE_FLOAT64_C( -0.31)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( -686.13), SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 571.46)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.75), SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 0.09), SIMDE_FLOAT64_C( -0.85)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -860.95), SIMDE_FLOAT64_C( -417.54), SIMDE_FLOAT64_C( 696.87)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( -0.78), SIMDE_FLOAT64_C( 0.54), SIMDE_FLOAT64_C( 0.92)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( 28.47), SIMDE_FLOAT64_C( -384.03), SIMDE_FLOAT64_C( -923.64)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.09), SIMDE_FLOAT64_C( 0.88), SIMDE_FLOAT64_C( 0.91), SIMDE_FLOAT64_C( -0.92)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 261.31), SIMDE_FLOAT64_C( -212.54), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -660.80)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( -0.84), SIMDE_FLOAT64_C( -0.23), SIMDE_FLOAT64_C( 0.51)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 178.20), SIMDE_FLOAT64_C( -450.67), SIMDE_FLOAT64_C( 233.37), SIMDE_FLOAT64_C( 687.09)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -0.01), SIMDE_FLOAT64_C( -0.60), SIMDE_FLOAT64_C( 0.84)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_cosd_pd(test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_cosd_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 467.76), SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( -754.38), SIMDE_FLOAT32_C( 346.63)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.97)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -660.80), SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( 696.87)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( -0.23), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.92)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( 395.92), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 339.21), SIMDE_FLOAT32_C( 532.35), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( 780.64), SIMDE_FLOAT32_C( -30.79), SIMDE_FLOAT32_C( -770.35), SIMDE_FLOAT32_C( 443.48), SIMDE_FLOAT32_C( -583.60), SIMDE_FLOAT32_C( 380.46), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 993.90), SIMDE_FLOAT32_C( 28.08), SIMDE_FLOAT32_C( 841.21)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( -0.72), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( -0.52)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 469.66), SIMDE_FLOAT32_C( 680.02), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 818.66), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 600.47), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( 254.31), SIMDE_FLOAT32_C( -203.65), SIMDE_FLOAT32_C( -80.73), SIMDE_FLOAT32_C( 336.73), SIMDE_FLOAT32_C( -944.78), SIMDE_FLOAT32_C( -747.59), SIMDE_FLOAT32_C( -767.23), SIMDE_FLOAT32_C( -554.19), SIMDE_FLOAT32_C( 398.82)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( -0.49), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( -0.71), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( -0.97), SIMDE_FLOAT32_C( 0.78)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -178.99), SIMDE_FLOAT32_C( 758.79), SIMDE_FLOAT32_C( 324.62), SIMDE_FLOAT32_C( 343.48), SIMDE_FLOAT32_C( -874.31), SIMDE_FLOAT32_C( -797.92), SIMDE_FLOAT32_C( -328.54), SIMDE_FLOAT32_C( -525.83), SIMDE_FLOAT32_C( -192.31), SIMDE_FLOAT32_C( -822.65), SIMDE_FLOAT32_C( 561.36), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( -70.91), SIMDE_FLOAT32_C( 543.35), SIMDE_FLOAT32_C( 120.65), SIMDE_FLOAT32_C( -171.51)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( -0.90), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( -0.97), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( -0.22), SIMDE_FLOAT32_C( -0.93), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -0.51), SIMDE_FLOAT32_C( -0.99)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 690.12), SIMDE_FLOAT32_C( 840.65), SIMDE_FLOAT32_C( -21.09), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( -448.89), SIMDE_FLOAT32_C( 731.49), SIMDE_FLOAT32_C( 505.79), SIMDE_FLOAT32_C( 623.70), SIMDE_FLOAT32_C( 831.02), SIMDE_FLOAT32_C( 140.67), SIMDE_FLOAT32_C( 977.36), SIMDE_FLOAT32_C( -906.16), SIMDE_FLOAT32_C( 331.34), SIMDE_FLOAT32_C( 99.93), SIMDE_FLOAT32_C( 462.95), SIMDE_FLOAT32_C( -738.19)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( -0.51), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( -0.62), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( -0.83), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -0.22), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( -0.17), SIMDE_FLOAT32_C( -0.22), SIMDE_FLOAT32_C( 0.95)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -769.09), SIMDE_FLOAT32_C( 822.06), SIMDE_FLOAT32_C( -573.81), SIMDE_FLOAT32_C( -997.63), SIMDE_FLOAT32_C( -337.60), SIMDE_FLOAT32_C( 923.64), SIMDE_FLOAT32_C( 293.64), SIMDE_FLOAT32_C( -768.12), SIMDE_FLOAT32_C( -576.22), SIMDE_FLOAT32_C( -67.64), SIMDE_FLOAT32_C( 710.38), SIMDE_FLOAT32_C( 977.49), SIMDE_FLOAT32_C( -756.42), SIMDE_FLOAT32_C( 424.81), SIMDE_FLOAT32_C( 27.25), SIMDE_FLOAT32_C( -95.15)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.83), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( -0.81), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( -0.22), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( -0.09)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -438.19), SIMDE_FLOAT32_C( -359.76), SIMDE_FLOAT32_C( -752.43), SIMDE_FLOAT32_C( -33.67), SIMDE_FLOAT32_C( 932.66), SIMDE_FLOAT32_C( 7.27), SIMDE_FLOAT32_C( -327.22), SIMDE_FLOAT32_C( -125.20), SIMDE_FLOAT32_C( -182.45), SIMDE_FLOAT32_C( 39.93), SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( 394.67), SIMDE_FLOAT32_C( 14.34), SIMDE_FLOAT32_C( -304.73), SIMDE_FLOAT32_C( 916.26), SIMDE_FLOAT32_C( -696.69)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( -0.87), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( -0.96), SIMDE_FLOAT32_C( 0.92)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_cosd_ps(test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_cosd_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 src; simde__mmask16 k; simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -660.80), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( 696.87), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( 467.76), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( 346.63)), UINT16_C(41466), simde_mm512_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( -754.38)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( -660.80), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( 346.63)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 469.66), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( -203.65), SIMDE_FLOAT32_C( 336.73), SIMDE_FLOAT32_C( -747.59), SIMDE_FLOAT32_C( -554.19), SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 532.35), SIMDE_FLOAT32_C( 780.64), SIMDE_FLOAT32_C( -770.35), SIMDE_FLOAT32_C( -583.60), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 28.08)), UINT16_C(36797), simde_mm512_set_ps(SIMDE_FLOAT32_C( -171.51), SIMDE_FLOAT32_C( 680.02), SIMDE_FLOAT32_C( 818.66), SIMDE_FLOAT32_C( 600.47), SIMDE_FLOAT32_C( 254.31), SIMDE_FLOAT32_C( -80.73), SIMDE_FLOAT32_C( -944.78), SIMDE_FLOAT32_C( -767.23), SIMDE_FLOAT32_C( 398.82), SIMDE_FLOAT32_C( 395.92), SIMDE_FLOAT32_C( 339.21), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( -30.79), SIMDE_FLOAT32_C( 443.48), SIMDE_FLOAT32_C( 380.46), SIMDE_FLOAT32_C( 993.90)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( -0.71), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 0.07)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -95.15), SIMDE_FLOAT32_C( 840.65), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( 731.49), SIMDE_FLOAT32_C( 623.70), SIMDE_FLOAT32_C( 140.67), SIMDE_FLOAT32_C( -906.16), SIMDE_FLOAT32_C( 99.93), SIMDE_FLOAT32_C( -738.19), SIMDE_FLOAT32_C( 758.79), SIMDE_FLOAT32_C( 343.48), SIMDE_FLOAT32_C( -797.92), SIMDE_FLOAT32_C( -525.83), SIMDE_FLOAT32_C( -822.65), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( 543.35)), UINT16_C(16804), simde_mm512_set_ps(SIMDE_FLOAT32_C( 27.25), SIMDE_FLOAT32_C( 690.12), SIMDE_FLOAT32_C( -21.09), SIMDE_FLOAT32_C( -448.89), SIMDE_FLOAT32_C( 505.79), SIMDE_FLOAT32_C( 831.02), SIMDE_FLOAT32_C( 977.36), SIMDE_FLOAT32_C( 331.34), SIMDE_FLOAT32_C( 462.95), SIMDE_FLOAT32_C( -178.99), SIMDE_FLOAT32_C( 324.62), SIMDE_FLOAT32_C( -874.31), SIMDE_FLOAT32_C( -328.54), SIMDE_FLOAT32_C( -192.31), SIMDE_FLOAT32_C( 561.36), SIMDE_FLOAT32_C( -70.91)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -95.15), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( 731.49), SIMDE_FLOAT32_C( 623.70), SIMDE_FLOAT32_C( 140.67), SIMDE_FLOAT32_C( -906.16), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( -0.22), SIMDE_FLOAT32_C( 758.79), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( -797.92), SIMDE_FLOAT32_C( -525.83), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( 543.35)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -348.70), SIMDE_FLOAT32_C( -438.19), SIMDE_FLOAT32_C( -752.43), SIMDE_FLOAT32_C( 932.66), SIMDE_FLOAT32_C( -327.22), SIMDE_FLOAT32_C( -182.45), SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( 14.34), SIMDE_FLOAT32_C( 916.26), SIMDE_FLOAT32_C( -769.09), SIMDE_FLOAT32_C( -573.81), SIMDE_FLOAT32_C( -337.60), SIMDE_FLOAT32_C( 293.64), SIMDE_FLOAT32_C( -576.22), SIMDE_FLOAT32_C( 710.38), SIMDE_FLOAT32_C( -756.42)), UINT16_C( 2107), simde_mm512_set_ps(SIMDE_FLOAT32_C( 897.27), SIMDE_FLOAT32_C( -197.89), SIMDE_FLOAT32_C( -359.76), SIMDE_FLOAT32_C( -33.67), SIMDE_FLOAT32_C( 7.27), SIMDE_FLOAT32_C( -125.20), SIMDE_FLOAT32_C( 39.93), SIMDE_FLOAT32_C( 394.67), SIMDE_FLOAT32_C( -304.73), SIMDE_FLOAT32_C( -696.69), SIMDE_FLOAT32_C( 822.06), SIMDE_FLOAT32_C( -997.63), SIMDE_FLOAT32_C( 923.64), SIMDE_FLOAT32_C( -768.12), SIMDE_FLOAT32_C( -67.64), SIMDE_FLOAT32_C( 977.49)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -348.70), SIMDE_FLOAT32_C( -438.19), SIMDE_FLOAT32_C( -752.43), SIMDE_FLOAT32_C( 932.66), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( -182.45), SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( 14.34), SIMDE_FLOAT32_C( 916.26), SIMDE_FLOAT32_C( -769.09), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -576.22), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( -0.22)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -15.61), SIMDE_FLOAT32_C( -737.13), SIMDE_FLOAT32_C( -314.93), SIMDE_FLOAT32_C( 177.92), SIMDE_FLOAT32_C( 345.93), SIMDE_FLOAT32_C( 888.71), SIMDE_FLOAT32_C( 915.71), SIMDE_FLOAT32_C( 133.52), SIMDE_FLOAT32_C( 484.94), SIMDE_FLOAT32_C( -598.06), SIMDE_FLOAT32_C( -791.07), SIMDE_FLOAT32_C( -765.93), SIMDE_FLOAT32_C( 221.37), SIMDE_FLOAT32_C( -788.36), SIMDE_FLOAT32_C( -775.04), SIMDE_FLOAT32_C( 440.64)), UINT16_C(22274), simde_mm512_set_ps(SIMDE_FLOAT32_C( 496.57), SIMDE_FLOAT32_C( 915.19), SIMDE_FLOAT32_C( -718.40), SIMDE_FLOAT32_C( 159.97), SIMDE_FLOAT32_C( -861.01), SIMDE_FLOAT32_C( 426.61), SIMDE_FLOAT32_C( 932.11), SIMDE_FLOAT32_C( 110.36), SIMDE_FLOAT32_C( 826.84), SIMDE_FLOAT32_C( -76.75), SIMDE_FLOAT32_C( 237.58), SIMDE_FLOAT32_C( -378.50), SIMDE_FLOAT32_C( -601.68), SIMDE_FLOAT32_C( -623.50), SIMDE_FLOAT32_C( -942.47), SIMDE_FLOAT32_C( 475.51)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -15.61), SIMDE_FLOAT32_C( -0.97), SIMDE_FLOAT32_C( -314.93), SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( 345.93), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( 484.94), SIMDE_FLOAT32_C( -598.06), SIMDE_FLOAT32_C( -791.07), SIMDE_FLOAT32_C( -765.93), SIMDE_FLOAT32_C( 221.37), SIMDE_FLOAT32_C( -788.36), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( 440.64)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 883.05), SIMDE_FLOAT32_C( -807.28), SIMDE_FLOAT32_C( -70.05), SIMDE_FLOAT32_C( -784.34), SIMDE_FLOAT32_C( 92.52), SIMDE_FLOAT32_C( 206.60), SIMDE_FLOAT32_C( 834.60), SIMDE_FLOAT32_C( -65.60), SIMDE_FLOAT32_C( -286.07), SIMDE_FLOAT32_C( -212.86), SIMDE_FLOAT32_C( -318.38), SIMDE_FLOAT32_C( 783.48), SIMDE_FLOAT32_C( -628.82), SIMDE_FLOAT32_C( 556.35), SIMDE_FLOAT32_C( 439.43), SIMDE_FLOAT32_C( 434.03)), UINT16_C(27396), simde_mm512_set_ps(SIMDE_FLOAT32_C( -964.25), SIMDE_FLOAT32_C( -406.33), SIMDE_FLOAT32_C( -743.66), SIMDE_FLOAT32_C( -764.58), SIMDE_FLOAT32_C( 789.89), SIMDE_FLOAT32_C( 4.83), SIMDE_FLOAT32_C( -818.54), SIMDE_FLOAT32_C( 161.06), SIMDE_FLOAT32_C( 579.25), SIMDE_FLOAT32_C( -11.78), SIMDE_FLOAT32_C( -308.52), SIMDE_FLOAT32_C( -719.57), SIMDE_FLOAT32_C( 334.00), SIMDE_FLOAT32_C( 274.71), SIMDE_FLOAT32_C( -916.82), SIMDE_FLOAT32_C( -490.00)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 883.05), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( -784.34), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 206.60), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( -0.95), SIMDE_FLOAT32_C( -286.07), SIMDE_FLOAT32_C( -212.86), SIMDE_FLOAT32_C( -318.38), SIMDE_FLOAT32_C( 783.48), SIMDE_FLOAT32_C( -628.82), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 439.43), SIMDE_FLOAT32_C( 434.03)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 529.63), SIMDE_FLOAT32_C( -24.89), SIMDE_FLOAT32_C( -967.78), SIMDE_FLOAT32_C( 638.94), SIMDE_FLOAT32_C( 450.90), SIMDE_FLOAT32_C( -771.54), SIMDE_FLOAT32_C( 105.79), SIMDE_FLOAT32_C( 590.10), SIMDE_FLOAT32_C( 30.91), SIMDE_FLOAT32_C( 635.35), SIMDE_FLOAT32_C( -84.00), SIMDE_FLOAT32_C( 80.04), SIMDE_FLOAT32_C( -709.46), SIMDE_FLOAT32_C( 607.86), SIMDE_FLOAT32_C( 394.58), SIMDE_FLOAT32_C( -889.11)), UINT16_C( 953), simde_mm512_set_ps(SIMDE_FLOAT32_C( 18.75), SIMDE_FLOAT32_C( 809.05), SIMDE_FLOAT32_C( 144.05), SIMDE_FLOAT32_C( -427.72), SIMDE_FLOAT32_C( 308.28), SIMDE_FLOAT32_C( -177.05), SIMDE_FLOAT32_C( -457.77), SIMDE_FLOAT32_C( 678.24), SIMDE_FLOAT32_C( 66.05), SIMDE_FLOAT32_C( -267.71), SIMDE_FLOAT32_C( 117.28), SIMDE_FLOAT32_C( -576.80), SIMDE_FLOAT32_C( -38.39), SIMDE_FLOAT32_C( -250.14), SIMDE_FLOAT32_C( -53.92), SIMDE_FLOAT32_C( 91.94)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 529.63), SIMDE_FLOAT32_C( -24.89), SIMDE_FLOAT32_C( -967.78), SIMDE_FLOAT32_C( 638.94), SIMDE_FLOAT32_C( 450.90), SIMDE_FLOAT32_C( -771.54), SIMDE_FLOAT32_C( -0.14), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 635.35), SIMDE_FLOAT32_C( -0.46), SIMDE_FLOAT32_C( -0.80), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 607.86), SIMDE_FLOAT32_C( 394.58), SIMDE_FLOAT32_C( -0.03)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -788.39), SIMDE_FLOAT32_C( 330.43), SIMDE_FLOAT32_C( -493.41), SIMDE_FLOAT32_C( 822.72), SIMDE_FLOAT32_C( 956.68), SIMDE_FLOAT32_C( 954.62), SIMDE_FLOAT32_C( 825.49), SIMDE_FLOAT32_C( -816.27), SIMDE_FLOAT32_C( -209.34), SIMDE_FLOAT32_C( -933.21), SIMDE_FLOAT32_C( -728.70), SIMDE_FLOAT32_C( -420.06), SIMDE_FLOAT32_C( 100.32), SIMDE_FLOAT32_C( 103.15), SIMDE_FLOAT32_C( 439.77), SIMDE_FLOAT32_C( -204.33)), UINT16_C(12713), simde_mm512_set_ps(SIMDE_FLOAT32_C( -841.43), SIMDE_FLOAT32_C( -14.16), SIMDE_FLOAT32_C( 824.88), SIMDE_FLOAT32_C( 793.63), SIMDE_FLOAT32_C( -736.75), SIMDE_FLOAT32_C( -310.57), SIMDE_FLOAT32_C( 728.87), SIMDE_FLOAT32_C( -350.72), SIMDE_FLOAT32_C( 60.89), SIMDE_FLOAT32_C( 109.81), SIMDE_FLOAT32_C( 715.94), SIMDE_FLOAT32_C( -250.60), SIMDE_FLOAT32_C( 944.14), SIMDE_FLOAT32_C( 361.85), SIMDE_FLOAT32_C( -13.07), SIMDE_FLOAT32_C( 852.60)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -788.39), SIMDE_FLOAT32_C( 330.43), SIMDE_FLOAT32_C( -0.26), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 956.68), SIMDE_FLOAT32_C( 954.62), SIMDE_FLOAT32_C( 825.49), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( -933.21), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -420.06), SIMDE_FLOAT32_C( -0.72), SIMDE_FLOAT32_C( 103.15), SIMDE_FLOAT32_C( 439.77), SIMDE_FLOAT32_C( -0.68)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mask_cosd_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_cosd_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( 670.24), SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( 34.06), SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( 39.01), SIMDE_FLOAT64_C( -754.38), SIMDE_FLOAT64_C( 346.63)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.74), SIMDE_FLOAT64_C( 0.65), SIMDE_FLOAT64_C( 0.46), SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( -0.99), SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 0.97)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( -686.13), SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 571.46), SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 467.76)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.75), SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 0.09), SIMDE_FLOAT64_C( -0.85), SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( -0.01), SIMDE_FLOAT64_C( -0.31)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( 28.47), SIMDE_FLOAT64_C( -384.03), SIMDE_FLOAT64_C( -923.64), SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -860.95), SIMDE_FLOAT64_C( -417.54), SIMDE_FLOAT64_C( 696.87)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.09), SIMDE_FLOAT64_C( 0.88), SIMDE_FLOAT64_C( 0.91), SIMDE_FLOAT64_C( -0.92), SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( -0.78), SIMDE_FLOAT64_C( 0.54), SIMDE_FLOAT64_C( 0.92)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 178.20), SIMDE_FLOAT64_C( -450.67), SIMDE_FLOAT64_C( 233.37), SIMDE_FLOAT64_C( 687.09), SIMDE_FLOAT64_C( 261.31), SIMDE_FLOAT64_C( -212.54), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -660.80)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -0.01), SIMDE_FLOAT64_C( -0.60), SIMDE_FLOAT64_C( 0.84), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( -0.84), SIMDE_FLOAT64_C( -0.23), SIMDE_FLOAT64_C( 0.51)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -770.35), SIMDE_FLOAT64_C( 443.48), SIMDE_FLOAT64_C( -583.60), SIMDE_FLOAT64_C( 380.46), SIMDE_FLOAT64_C( -770.72), SIMDE_FLOAT64_C( 993.90), SIMDE_FLOAT64_C( 28.08), SIMDE_FLOAT64_C( 841.21)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.64), SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( -0.72), SIMDE_FLOAT64_C( 0.94), SIMDE_FLOAT64_C( 0.63), SIMDE_FLOAT64_C( 0.07), SIMDE_FLOAT64_C( 0.88), SIMDE_FLOAT64_C( -0.52)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -387.90), SIMDE_FLOAT64_C( 395.92), SIMDE_FLOAT64_C( 655.87), SIMDE_FLOAT64_C( 339.21), SIMDE_FLOAT64_C( 532.35), SIMDE_FLOAT64_C( -263.99), SIMDE_FLOAT64_C( 780.64), SIMDE_FLOAT64_C( -30.79)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.88), SIMDE_FLOAT64_C( 0.81), SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( 0.93), SIMDE_FLOAT64_C( -0.99), SIMDE_FLOAT64_C( -0.10), SIMDE_FLOAT64_C( 0.49), SIMDE_FLOAT64_C( 0.86)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -203.65), SIMDE_FLOAT64_C( -80.73), SIMDE_FLOAT64_C( 336.73), SIMDE_FLOAT64_C( -944.78), SIMDE_FLOAT64_C( -747.59), SIMDE_FLOAT64_C( -767.23), SIMDE_FLOAT64_C( -554.19), SIMDE_FLOAT64_C( 398.82)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.92), SIMDE_FLOAT64_C( 0.16), SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( -0.71), SIMDE_FLOAT64_C( 0.89), SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( -0.97), SIMDE_FLOAT64_C( 0.78)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 469.66), SIMDE_FLOAT64_C( 680.02), SIMDE_FLOAT64_C( -148.69), SIMDE_FLOAT64_C( 818.66), SIMDE_FLOAT64_C( 910.03), SIMDE_FLOAT64_C( 600.47), SIMDE_FLOAT64_C( 791.23), SIMDE_FLOAT64_C( 254.31)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.34), SIMDE_FLOAT64_C( 0.77), SIMDE_FLOAT64_C( -0.85), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( -0.98), SIMDE_FLOAT64_C( -0.49), SIMDE_FLOAT64_C( 0.32), SIMDE_FLOAT64_C( -0.27)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_cosd_pd(test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_cosd_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d src; simde__mmask8 k; simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( -686.13), SIMDE_FLOAT64_C( 571.46), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( 467.76), SIMDE_FLOAT64_C( 670.24), SIMDE_FLOAT64_C( 34.06), SIMDE_FLOAT64_C( 39.01), SIMDE_FLOAT64_C( 346.63)), UINT8_C(139), simde_mm512_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( -754.38)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.75), SIMDE_FLOAT64_C( 571.46), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( 467.76), SIMDE_FLOAT64_C( -0.74), SIMDE_FLOAT64_C( 34.06), SIMDE_FLOAT64_C( -0.99), SIMDE_FLOAT64_C( 0.83)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 178.20), SIMDE_FLOAT64_C( 233.37), SIMDE_FLOAT64_C( 261.31), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( -384.03), SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -417.54)), UINT8_C(229), simde_mm512_set_pd(SIMDE_FLOAT64_C( 841.21), SIMDE_FLOAT64_C( -450.67), SIMDE_FLOAT64_C( 687.09), SIMDE_FLOAT64_C( -212.54), SIMDE_FLOAT64_C( -660.80), SIMDE_FLOAT64_C( 28.47), SIMDE_FLOAT64_C( -923.64), SIMDE_FLOAT64_C( -860.95)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.52), SIMDE_FLOAT64_C( -0.01), SIMDE_FLOAT64_C( 0.84), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( 0.88), SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -0.78)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 398.82), SIMDE_FLOAT64_C( 395.92), SIMDE_FLOAT64_C( 339.21), SIMDE_FLOAT64_C( -263.99), SIMDE_FLOAT64_C( -30.79), SIMDE_FLOAT64_C( 443.48), SIMDE_FLOAT64_C( 380.46), SIMDE_FLOAT64_C( 993.90)), UINT8_C(253), simde_mm512_set_pd(SIMDE_FLOAT64_C( -554.19), SIMDE_FLOAT64_C( -387.90), SIMDE_FLOAT64_C( 655.87), SIMDE_FLOAT64_C( 532.35), SIMDE_FLOAT64_C( 780.64), SIMDE_FLOAT64_C( -770.35), SIMDE_FLOAT64_C( -583.60), SIMDE_FLOAT64_C( -770.72)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.97), SIMDE_FLOAT64_C( 0.88), SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( -0.99), SIMDE_FLOAT64_C( 0.49), SIMDE_FLOAT64_C( 0.64), SIMDE_FLOAT64_C( 380.46), SIMDE_FLOAT64_C( 0.63)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 120.65), SIMDE_FLOAT64_C( 469.66), SIMDE_FLOAT64_C( -148.69), SIMDE_FLOAT64_C( 910.03), SIMDE_FLOAT64_C( 791.23), SIMDE_FLOAT64_C( -203.65), SIMDE_FLOAT64_C( 336.73), SIMDE_FLOAT64_C( -747.59)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 543.35), SIMDE_FLOAT64_C( -171.51), SIMDE_FLOAT64_C( 680.02), SIMDE_FLOAT64_C( 818.66), SIMDE_FLOAT64_C( 600.47), SIMDE_FLOAT64_C( 254.31), SIMDE_FLOAT64_C( -80.73), SIMDE_FLOAT64_C( -944.78)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 120.65), SIMDE_FLOAT64_C( -0.99), SIMDE_FLOAT64_C( -148.69), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( -0.49), SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 336.73), SIMDE_FLOAT64_C( -0.71)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 99.93), SIMDE_FLOAT64_C( -738.19), SIMDE_FLOAT64_C( 758.79), SIMDE_FLOAT64_C( 343.48), SIMDE_FLOAT64_C( -797.92), SIMDE_FLOAT64_C( -525.83), SIMDE_FLOAT64_C( -822.65), SIMDE_FLOAT64_C( 655.67)), UINT8_C(145), simde_mm512_set_pd(SIMDE_FLOAT64_C( 331.34), SIMDE_FLOAT64_C( 462.95), SIMDE_FLOAT64_C( -178.99), SIMDE_FLOAT64_C( 324.62), SIMDE_FLOAT64_C( -874.31), SIMDE_FLOAT64_C( -328.54), SIMDE_FLOAT64_C( -192.31), SIMDE_FLOAT64_C( 561.36)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.88), SIMDE_FLOAT64_C( -738.19), SIMDE_FLOAT64_C( 758.79), SIMDE_FLOAT64_C( 0.82), SIMDE_FLOAT64_C( -797.92), SIMDE_FLOAT64_C( -525.83), SIMDE_FLOAT64_C( -822.65), SIMDE_FLOAT64_C( -0.93)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -756.42), SIMDE_FLOAT64_C( 27.25), SIMDE_FLOAT64_C( 690.12), SIMDE_FLOAT64_C( -21.09), SIMDE_FLOAT64_C( -448.89), SIMDE_FLOAT64_C( 505.79), SIMDE_FLOAT64_C( 831.02), SIMDE_FLOAT64_C( 977.36)), UINT8_C( 75), simde_mm512_set_pd(SIMDE_FLOAT64_C( 977.49), SIMDE_FLOAT64_C( 424.81), SIMDE_FLOAT64_C( -95.15), SIMDE_FLOAT64_C( 840.65), SIMDE_FLOAT64_C( -591.56), SIMDE_FLOAT64_C( 731.49), SIMDE_FLOAT64_C( 623.70), SIMDE_FLOAT64_C( 140.67)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -756.42), SIMDE_FLOAT64_C( 0.43), SIMDE_FLOAT64_C( 690.12), SIMDE_FLOAT64_C( -21.09), SIMDE_FLOAT64_C( -0.62), SIMDE_FLOAT64_C( 505.79), SIMDE_FLOAT64_C( -0.11), SIMDE_FLOAT64_C( -0.77)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 394.67), SIMDE_FLOAT64_C( -304.73), SIMDE_FLOAT64_C( -696.69), SIMDE_FLOAT64_C( 822.06), SIMDE_FLOAT64_C( -997.63), SIMDE_FLOAT64_C( 923.64), SIMDE_FLOAT64_C( -768.12), SIMDE_FLOAT64_C( -67.64)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 510.85), SIMDE_FLOAT64_C( 14.34), SIMDE_FLOAT64_C( 916.26), SIMDE_FLOAT64_C( -769.09), SIMDE_FLOAT64_C( -573.81), SIMDE_FLOAT64_C( -337.60), SIMDE_FLOAT64_C( 293.64), SIMDE_FLOAT64_C( -576.22)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 394.67), SIMDE_FLOAT64_C( 0.97), SIMDE_FLOAT64_C( -696.69), SIMDE_FLOAT64_C( 0.65), SIMDE_FLOAT64_C( -0.83), SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( -768.12), SIMDE_FLOAT64_C( -0.81)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 475.51), SIMDE_FLOAT64_C( 936.65), SIMDE_FLOAT64_C( -348.70), SIMDE_FLOAT64_C( -438.19), SIMDE_FLOAT64_C( -752.43), SIMDE_FLOAT64_C( 932.66), SIMDE_FLOAT64_C( -327.22), SIMDE_FLOAT64_C( -182.45)), UINT8_C(213), simde_mm512_set_pd(SIMDE_FLOAT64_C( -775.04), SIMDE_FLOAT64_C( 440.64), SIMDE_FLOAT64_C( 897.27), SIMDE_FLOAT64_C( -197.89), SIMDE_FLOAT64_C( -359.76), SIMDE_FLOAT64_C( -33.67), SIMDE_FLOAT64_C( 7.27), SIMDE_FLOAT64_C( -125.20)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( 0.16), SIMDE_FLOAT64_C( -348.70), SIMDE_FLOAT64_C( -0.95), SIMDE_FLOAT64_C( -752.43), SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( -327.22), SIMDE_FLOAT64_C( -0.58)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mask_cosd_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_cosh_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 3.48), SIMDE_FLOAT32_C( 4.71), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 6.41)), simde_mm_set_ps(SIMDE_FLOAT32_C( 16.25), SIMDE_FLOAT32_C( 55.53), SIMDE_FLOAT32_C( 1.06), SIMDE_FLOAT32_C( 303.95)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 7.24), SIMDE_FLOAT32_C( 8.19), SIMDE_FLOAT32_C( 2.86), SIMDE_FLOAT32_C( 4.69)), simde_mm_set_ps(SIMDE_FLOAT32_C( 697.05), SIMDE_FLOAT32_C( 1802.36), SIMDE_FLOAT32_C( 8.76), SIMDE_FLOAT32_C( 54.43)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 9.04), SIMDE_FLOAT32_C( 6.82), SIMDE_FLOAT32_C( 3.02), SIMDE_FLOAT32_C( 7.07)), simde_mm_set_ps(SIMDE_FLOAT32_C( 4216.89), SIMDE_FLOAT32_C( 457.99), SIMDE_FLOAT32_C( 10.27), SIMDE_FLOAT32_C( 588.07)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 4.97), SIMDE_FLOAT32_C( 7.64)), simde_mm_set_ps(SIMDE_FLOAT32_C( 1.31), SIMDE_FLOAT32_C( 1.28), SIMDE_FLOAT32_C( 72.02), SIMDE_FLOAT32_C( 1039.87)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 2.82), SIMDE_FLOAT32_C( -0.24), SIMDE_FLOAT32_C( 2.20), SIMDE_FLOAT32_C( 8.33)), simde_mm_set_ps(SIMDE_FLOAT32_C( 8.42), SIMDE_FLOAT32_C( 1.03), SIMDE_FLOAT32_C( 4.57), SIMDE_FLOAT32_C( 2073.21)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 2.05), SIMDE_FLOAT32_C( 4.66), SIMDE_FLOAT32_C( 2.39), SIMDE_FLOAT32_C( -0.58)), simde_mm_set_ps(SIMDE_FLOAT32_C( 3.95), SIMDE_FLOAT32_C( 52.82), SIMDE_FLOAT32_C( 5.50), SIMDE_FLOAT32_C( 1.17)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 5.94), SIMDE_FLOAT32_C( 3.33), SIMDE_FLOAT32_C( -0.87), SIMDE_FLOAT32_C( 0.87)), simde_mm_set_ps(SIMDE_FLOAT32_C( 189.97), SIMDE_FLOAT32_C( 13.99), SIMDE_FLOAT32_C( 1.40), SIMDE_FLOAT32_C( 1.40)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 5.48), SIMDE_FLOAT32_C( 2.02), SIMDE_FLOAT32_C( 5.78), SIMDE_FLOAT32_C( 8.28)), simde_mm_set_ps(SIMDE_FLOAT32_C( 119.93), SIMDE_FLOAT32_C( 3.84), SIMDE_FLOAT32_C( 161.88), SIMDE_FLOAT32_C( 1972.10)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_cosh_ps(test_vec[i].a); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_cosh_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -0.26), SIMDE_FLOAT64_C( 3.04)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1.03), SIMDE_FLOAT64_C( 10.48)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 1.44), SIMDE_FLOAT64_C( 2.12)), simde_mm_set_pd(SIMDE_FLOAT64_C( 2.23), SIMDE_FLOAT64_C( 4.23)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 1.11), SIMDE_FLOAT64_C( 2.10)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1.68), SIMDE_FLOAT64_C( 4.14)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 3.49), SIMDE_FLOAT64_C( 4.01)), simde_mm_set_pd(SIMDE_FLOAT64_C( 16.41), SIMDE_FLOAT64_C( 27.58)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 1.19), SIMDE_FLOAT64_C( 3.40)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1.80), SIMDE_FLOAT64_C( 15.00)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 4.48), SIMDE_FLOAT64_C( 3.27)), simde_mm_set_pd(SIMDE_FLOAT64_C( 44.12), SIMDE_FLOAT64_C( 13.17)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 2.25), SIMDE_FLOAT64_C( 3.71)), simde_mm_set_pd(SIMDE_FLOAT64_C( 4.80), SIMDE_FLOAT64_C( 20.44)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -0.03), SIMDE_FLOAT64_C( -0.06)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_cosh_pd(test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_cosh_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 7.24), SIMDE_FLOAT32_C( 8.19), SIMDE_FLOAT32_C( 2.86), SIMDE_FLOAT32_C( 4.69), SIMDE_FLOAT32_C( 3.48), SIMDE_FLOAT32_C( 4.71), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 6.41)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 697.05), SIMDE_FLOAT32_C( 1802.36), SIMDE_FLOAT32_C( 8.76), SIMDE_FLOAT32_C( 54.43), SIMDE_FLOAT32_C( 16.25), SIMDE_FLOAT32_C( 55.53), SIMDE_FLOAT32_C( 1.06), SIMDE_FLOAT32_C( 303.95)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 4.97), SIMDE_FLOAT32_C( 7.64), SIMDE_FLOAT32_C( 9.04), SIMDE_FLOAT32_C( 6.82), SIMDE_FLOAT32_C( 3.02), SIMDE_FLOAT32_C( 7.07)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 1.31), SIMDE_FLOAT32_C( 1.28), SIMDE_FLOAT32_C( 72.02), SIMDE_FLOAT32_C( 1039.87), SIMDE_FLOAT32_C( 4216.89), SIMDE_FLOAT32_C( 457.99), SIMDE_FLOAT32_C( 10.27), SIMDE_FLOAT32_C( 588.07)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 2.05), SIMDE_FLOAT32_C( 4.66), SIMDE_FLOAT32_C( 2.39), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( 2.82), SIMDE_FLOAT32_C( -0.24), SIMDE_FLOAT32_C( 2.20), SIMDE_FLOAT32_C( 8.33)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 3.95), SIMDE_FLOAT32_C( 52.82), SIMDE_FLOAT32_C( 5.50), SIMDE_FLOAT32_C( 1.17), SIMDE_FLOAT32_C( 8.42), SIMDE_FLOAT32_C( 1.03), SIMDE_FLOAT32_C( 4.57), SIMDE_FLOAT32_C( 2073.21)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 5.48), SIMDE_FLOAT32_C( 2.02), SIMDE_FLOAT32_C( 5.78), SIMDE_FLOAT32_C( 8.28), SIMDE_FLOAT32_C( 5.94), SIMDE_FLOAT32_C( 3.33), SIMDE_FLOAT32_C( -0.87), SIMDE_FLOAT32_C( 0.87)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 119.93), SIMDE_FLOAT32_C( 3.84), SIMDE_FLOAT32_C( 161.88), SIMDE_FLOAT32_C( 1972.10), SIMDE_FLOAT32_C( 189.97), SIMDE_FLOAT32_C( 13.99), SIMDE_FLOAT32_C( 1.40), SIMDE_FLOAT32_C( 1.40)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( 6.94), SIMDE_FLOAT32_C( 1.29), SIMDE_FLOAT32_C( 6.59), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( 9.97), SIMDE_FLOAT32_C( 4.65), SIMDE_FLOAT32_C( 9.13)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 1.03), SIMDE_FLOAT32_C( 516.39), SIMDE_FLOAT32_C( 1.95), SIMDE_FLOAT32_C( 363.89), SIMDE_FLOAT32_C( 1.03), SIMDE_FLOAT32_C( 10687.75), SIMDE_FLOAT32_C( 52.30), SIMDE_FLOAT32_C( 4614.01)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 2.37), SIMDE_FLOAT32_C( 6.68), SIMDE_FLOAT32_C( 8.11), SIMDE_FLOAT32_C( 6.37), SIMDE_FLOAT32_C( 7.43), SIMDE_FLOAT32_C( 3.05), SIMDE_FLOAT32_C( 8.79), SIMDE_FLOAT32_C( 4.33)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 5.40), SIMDE_FLOAT32_C( 398.16), SIMDE_FLOAT32_C( 1663.79), SIMDE_FLOAT32_C( 292.03), SIMDE_FLOAT32_C( 842.90), SIMDE_FLOAT32_C( 10.58), SIMDE_FLOAT32_C( 3284.12), SIMDE_FLOAT32_C( 37.98)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 3.38), SIMDE_FLOAT32_C( 4.06), SIMDE_FLOAT32_C( 6.35), SIMDE_FLOAT32_C( -0.70), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 1.45), SIMDE_FLOAT32_C( 6.69)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 14.70), SIMDE_FLOAT32_C( 29.00), SIMDE_FLOAT32_C( 286.25), SIMDE_FLOAT32_C( 1.26), SIMDE_FLOAT32_C( 1.08), SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( 2.25), SIMDE_FLOAT32_C( 402.16)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 7.08), SIMDE_FLOAT32_C( 8.24), SIMDE_FLOAT32_C( 3.68), SIMDE_FLOAT32_C( 9.00), SIMDE_FLOAT32_C( 9.51), SIMDE_FLOAT32_C( 7.80), SIMDE_FLOAT32_C( 8.85), SIMDE_FLOAT32_C( 5.90)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 593.98), SIMDE_FLOAT32_C( 1894.77), SIMDE_FLOAT32_C( 19.84), SIMDE_FLOAT32_C( 4051.54), SIMDE_FLOAT32_C( 6747.00), SIMDE_FLOAT32_C( 1220.30), SIMDE_FLOAT32_C( 3487.20), SIMDE_FLOAT32_C( 182.52)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_cosh_ps(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_cosh_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 1.44), SIMDE_FLOAT64_C( 2.12), SIMDE_FLOAT64_C( -0.26), SIMDE_FLOAT64_C( 3.04)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 2.23), SIMDE_FLOAT64_C( 4.23), SIMDE_FLOAT64_C( 1.03), SIMDE_FLOAT64_C( 10.48)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 3.49), SIMDE_FLOAT64_C( 4.01), SIMDE_FLOAT64_C( 1.11), SIMDE_FLOAT64_C( 2.10)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 16.41), SIMDE_FLOAT64_C( 27.58), SIMDE_FLOAT64_C( 1.68), SIMDE_FLOAT64_C( 4.14)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 4.48), SIMDE_FLOAT64_C( 3.27), SIMDE_FLOAT64_C( 1.19), SIMDE_FLOAT64_C( 3.40)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 44.12), SIMDE_FLOAT64_C( 13.17), SIMDE_FLOAT64_C( 1.80), SIMDE_FLOAT64_C( 15.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.03), SIMDE_FLOAT64_C( -0.06), SIMDE_FLOAT64_C( 2.25), SIMDE_FLOAT64_C( 3.71)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 4.80), SIMDE_FLOAT64_C( 20.44)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 1.08), SIMDE_FLOAT64_C( -0.58), SIMDE_FLOAT64_C( 0.75), SIMDE_FLOAT64_C( 4.09)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 1.64), SIMDE_FLOAT64_C( 1.17), SIMDE_FLOAT64_C( 1.29), SIMDE_FLOAT64_C( 29.88)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 2.09), SIMDE_FLOAT64_C( 0.85), SIMDE_FLOAT64_C( -0.77)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 1.23), SIMDE_FLOAT64_C( 4.10), SIMDE_FLOAT64_C( 1.38), SIMDE_FLOAT64_C( 1.31)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 2.78), SIMDE_FLOAT64_C( 1.36), SIMDE_FLOAT64_C( -0.93), SIMDE_FLOAT64_C( 0.02)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 8.09), SIMDE_FLOAT64_C( 2.08), SIMDE_FLOAT64_C( 1.46), SIMDE_FLOAT64_C( 1.00)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 2.53), SIMDE_FLOAT64_C( 0.65), SIMDE_FLOAT64_C( 2.70), SIMDE_FLOAT64_C( 4.06)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 6.32), SIMDE_FLOAT64_C( 1.22), SIMDE_FLOAT64_C( 7.47), SIMDE_FLOAT64_C( 29.00)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_cosh_pd(test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_cosh_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 4.97), SIMDE_FLOAT32_C( 7.64), SIMDE_FLOAT32_C( 9.04), SIMDE_FLOAT32_C( 6.82), SIMDE_FLOAT32_C( 3.02), SIMDE_FLOAT32_C( 7.07), SIMDE_FLOAT32_C( 7.24), SIMDE_FLOAT32_C( 8.19), SIMDE_FLOAT32_C( 2.86), SIMDE_FLOAT32_C( 4.69), SIMDE_FLOAT32_C( 3.48), SIMDE_FLOAT32_C( 4.71), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 6.41)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.31), SIMDE_FLOAT32_C( 1.28), SIMDE_FLOAT32_C( 72.02), SIMDE_FLOAT32_C( 1039.87), SIMDE_FLOAT32_C( 4216.89), SIMDE_FLOAT32_C( 457.99), SIMDE_FLOAT32_C( 10.27), SIMDE_FLOAT32_C( 588.07), SIMDE_FLOAT32_C( 697.05), SIMDE_FLOAT32_C( 1802.36), SIMDE_FLOAT32_C( 8.76), SIMDE_FLOAT32_C( 54.43), SIMDE_FLOAT32_C( 16.25), SIMDE_FLOAT32_C( 55.53), SIMDE_FLOAT32_C( 1.06), SIMDE_FLOAT32_C( 303.95)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 5.48), SIMDE_FLOAT32_C( 2.02), SIMDE_FLOAT32_C( 5.78), SIMDE_FLOAT32_C( 8.28), SIMDE_FLOAT32_C( 5.94), SIMDE_FLOAT32_C( 3.33), SIMDE_FLOAT32_C( -0.87), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( 2.05), SIMDE_FLOAT32_C( 4.66), SIMDE_FLOAT32_C( 2.39), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( 2.82), SIMDE_FLOAT32_C( -0.24), SIMDE_FLOAT32_C( 2.20), SIMDE_FLOAT32_C( 8.33)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 119.93), SIMDE_FLOAT32_C( 3.84), SIMDE_FLOAT32_C( 161.88), SIMDE_FLOAT32_C( 1972.10), SIMDE_FLOAT32_C( 189.97), SIMDE_FLOAT32_C( 13.99), SIMDE_FLOAT32_C( 1.40), SIMDE_FLOAT32_C( 1.40), SIMDE_FLOAT32_C( 3.95), SIMDE_FLOAT32_C( 52.82), SIMDE_FLOAT32_C( 5.50), SIMDE_FLOAT32_C( 1.17), SIMDE_FLOAT32_C( 8.42), SIMDE_FLOAT32_C( 1.03), SIMDE_FLOAT32_C( 4.57), SIMDE_FLOAT32_C( 2073.21)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 2.37), SIMDE_FLOAT32_C( 6.68), SIMDE_FLOAT32_C( 8.11), SIMDE_FLOAT32_C( 6.37), SIMDE_FLOAT32_C( 7.43), SIMDE_FLOAT32_C( 3.05), SIMDE_FLOAT32_C( 8.79), SIMDE_FLOAT32_C( 4.33), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( 6.94), SIMDE_FLOAT32_C( 1.29), SIMDE_FLOAT32_C( 6.59), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( 9.97), SIMDE_FLOAT32_C( 4.65), SIMDE_FLOAT32_C( 9.13)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 5.40), SIMDE_FLOAT32_C( 398.16), SIMDE_FLOAT32_C( 1663.79), SIMDE_FLOAT32_C( 292.03), SIMDE_FLOAT32_C( 842.90), SIMDE_FLOAT32_C( 10.58), SIMDE_FLOAT32_C( 3284.12), SIMDE_FLOAT32_C( 37.98), SIMDE_FLOAT32_C( 1.03), SIMDE_FLOAT32_C( 516.39), SIMDE_FLOAT32_C( 1.95), SIMDE_FLOAT32_C( 363.89), SIMDE_FLOAT32_C( 1.03), SIMDE_FLOAT32_C( 10687.75), SIMDE_FLOAT32_C( 52.30), SIMDE_FLOAT32_C( 4614.01)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 7.08), SIMDE_FLOAT32_C( 8.24), SIMDE_FLOAT32_C( 3.68), SIMDE_FLOAT32_C( 9.00), SIMDE_FLOAT32_C( 9.51), SIMDE_FLOAT32_C( 7.80), SIMDE_FLOAT32_C( 8.85), SIMDE_FLOAT32_C( 5.90), SIMDE_FLOAT32_C( 3.38), SIMDE_FLOAT32_C( 4.06), SIMDE_FLOAT32_C( 6.35), SIMDE_FLOAT32_C( -0.70), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 1.45), SIMDE_FLOAT32_C( 6.69)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 593.98), SIMDE_FLOAT32_C( 1894.77), SIMDE_FLOAT32_C( 19.84), SIMDE_FLOAT32_C( 4051.54), SIMDE_FLOAT32_C( 6747.00), SIMDE_FLOAT32_C( 1220.30), SIMDE_FLOAT32_C( 3487.20), SIMDE_FLOAT32_C( 182.52), SIMDE_FLOAT32_C( 14.70), SIMDE_FLOAT32_C( 29.00), SIMDE_FLOAT32_C( 286.25), SIMDE_FLOAT32_C( 1.26), SIMDE_FLOAT32_C( 1.08), SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( 2.25), SIMDE_FLOAT32_C( 402.16)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 3.52), SIMDE_FLOAT32_C( 8.67), SIMDE_FLOAT32_C( 6.29), SIMDE_FLOAT32_C( 6.39), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 2.69), SIMDE_FLOAT32_C( 1.61), SIMDE_FLOAT32_C( 3.44), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( 7.59), SIMDE_FLOAT32_C( 8.11), SIMDE_FLOAT32_C( 4.11), SIMDE_FLOAT32_C( 7.49), SIMDE_FLOAT32_C( 5.16), SIMDE_FLOAT32_C( 3.56)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 16.91), SIMDE_FLOAT32_C( 2912.75), SIMDE_FLOAT32_C( 269.58), SIMDE_FLOAT32_C( 297.93), SIMDE_FLOAT32_C( 1.05), SIMDE_FLOAT32_C( 1.01), SIMDE_FLOAT32_C( 7.40), SIMDE_FLOAT32_C( 2.60), SIMDE_FLOAT32_C( 15.61), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 989.16), SIMDE_FLOAT32_C( 1663.79), SIMDE_FLOAT32_C( 30.48), SIMDE_FLOAT32_C( 895.03), SIMDE_FLOAT32_C( 87.09), SIMDE_FLOAT32_C( 17.60)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 8.30), SIMDE_FLOAT32_C( 9.12), SIMDE_FLOAT32_C( 4.38), SIMDE_FLOAT32_C( 1.25), SIMDE_FLOAT32_C( 2.03), SIMDE_FLOAT32_C( 8.52), SIMDE_FLOAT32_C( 7.28), SIMDE_FLOAT32_C( 7.93), SIMDE_FLOAT32_C( 9.07), SIMDE_FLOAT32_C( 5.27), SIMDE_FLOAT32_C( 9.88), SIMDE_FLOAT32_C( -0.48), SIMDE_FLOAT32_C( 6.32), SIMDE_FLOAT32_C( 5.05), SIMDE_FLOAT32_C( 7.05), SIMDE_FLOAT32_C( 0.44)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 2011.94), SIMDE_FLOAT32_C( 4568.10), SIMDE_FLOAT32_C( 39.93), SIMDE_FLOAT32_C( 1.89), SIMDE_FLOAT32_C( 3.87), SIMDE_FLOAT32_C( 2507.03), SIMDE_FLOAT32_C( 725.49), SIMDE_FLOAT32_C( 1389.71), SIMDE_FLOAT32_C( 4345.31), SIMDE_FLOAT32_C( 97.21), SIMDE_FLOAT32_C( 9767.86), SIMDE_FLOAT32_C( 1.12), SIMDE_FLOAT32_C( 277.79), SIMDE_FLOAT32_C( 78.01), SIMDE_FLOAT32_C( 576.43), SIMDE_FLOAT32_C( 1.10)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( 9.02), SIMDE_FLOAT32_C( 1.34), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( 2.64), SIMDE_FLOAT32_C( 9.58), SIMDE_FLOAT32_C( 6.12), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 1.33), SIMDE_FLOAT32_C( 4.13), SIMDE_FLOAT32_C( 8.41), SIMDE_FLOAT32_C( 9.88), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 6.84), SIMDE_FLOAT32_C( 4.65), SIMDE_FLOAT32_C( 3.98)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( 4133.39), SIMDE_FLOAT32_C( 2.04), SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( 7.04), SIMDE_FLOAT32_C( 7236.21), SIMDE_FLOAT32_C( 227.43), SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( 2.02), SIMDE_FLOAT32_C( 31.10), SIMDE_FLOAT32_C( 2245.88), SIMDE_FLOAT32_C( 9767.86), SIMDE_FLOAT32_C( 1.06), SIMDE_FLOAT32_C( 467.25), SIMDE_FLOAT32_C( 52.30), SIMDE_FLOAT32_C( 26.77)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 2.09), SIMDE_FLOAT32_C( 2.52), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( 4.31), SIMDE_FLOAT32_C( 9.63), SIMDE_FLOAT32_C( 4.54), SIMDE_FLOAT32_C( 2.70), SIMDE_FLOAT32_C( 3.81), SIMDE_FLOAT32_C( 3.50), SIMDE_FLOAT32_C( 4.72), SIMDE_FLOAT32_C( 7.31), SIMDE_FLOAT32_C( 6.67), SIMDE_FLOAT32_C( 4.58), SIMDE_FLOAT32_C( 2.82), SIMDE_FLOAT32_C( 9.54), SIMDE_FLOAT32_C( 0.67)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 4.10), SIMDE_FLOAT32_C( 6.25), SIMDE_FLOAT32_C( 1.07), SIMDE_FLOAT32_C( 37.23), SIMDE_FLOAT32_C( 7607.22), SIMDE_FLOAT32_C( 46.85), SIMDE_FLOAT32_C( 7.47), SIMDE_FLOAT32_C( 22.59), SIMDE_FLOAT32_C( 16.57), SIMDE_FLOAT32_C( 56.09), SIMDE_FLOAT32_C( 747.59), SIMDE_FLOAT32_C( 394.20), SIMDE_FLOAT32_C( 48.76), SIMDE_FLOAT32_C( 8.42), SIMDE_FLOAT32_C( 6952.47), SIMDE_FLOAT32_C( 1.23)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_cosh_ps(test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_cosh_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 src; simde__mmask16 k; simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( 2.02), SIMDE_FLOAT32_C( 8.28), SIMDE_FLOAT32_C( 3.33), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( 4.66), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( -0.24), SIMDE_FLOAT32_C( 8.33), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 7.64), SIMDE_FLOAT32_C( 6.82), SIMDE_FLOAT32_C( 7.07), SIMDE_FLOAT32_C( 8.19), SIMDE_FLOAT32_C( 4.69), SIMDE_FLOAT32_C( 4.71), SIMDE_FLOAT32_C( 6.41)), UINT16_C(41466), simde_mm512_set_ps(SIMDE_FLOAT32_C( 5.48), SIMDE_FLOAT32_C( 5.78), SIMDE_FLOAT32_C( 5.94), SIMDE_FLOAT32_C( -0.87), SIMDE_FLOAT32_C( 2.05), SIMDE_FLOAT32_C( 2.39), SIMDE_FLOAT32_C( 2.82), SIMDE_FLOAT32_C( 2.20), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( 4.97), SIMDE_FLOAT32_C( 9.04), SIMDE_FLOAT32_C( 3.02), SIMDE_FLOAT32_C( 7.24), SIMDE_FLOAT32_C( 2.86), SIMDE_FLOAT32_C( 3.48), SIMDE_FLOAT32_C( 0.35)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 119.93), SIMDE_FLOAT32_C( 8.28), SIMDE_FLOAT32_C( 189.97), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( 4.66), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( -0.24), SIMDE_FLOAT32_C( 4.57), SIMDE_FLOAT32_C( 1.31), SIMDE_FLOAT32_C( 72.02), SIMDE_FLOAT32_C( 4216.89), SIMDE_FLOAT32_C( 10.27), SIMDE_FLOAT32_C( 697.05), SIMDE_FLOAT32_C( 4.69), SIMDE_FLOAT32_C( 16.25), SIMDE_FLOAT32_C( 6.41)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 7.08), SIMDE_FLOAT32_C( 3.68), SIMDE_FLOAT32_C( 9.51), SIMDE_FLOAT32_C( 8.85), SIMDE_FLOAT32_C( 3.38), SIMDE_FLOAT32_C( 6.35), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 1.45), SIMDE_FLOAT32_C( 2.37), SIMDE_FLOAT32_C( 8.11), SIMDE_FLOAT32_C( 7.43), SIMDE_FLOAT32_C( 8.79), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( 1.29), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( 4.65)), UINT16_C(36797), simde_mm512_set_ps(SIMDE_FLOAT32_C( 3.56), SIMDE_FLOAT32_C( 8.24), SIMDE_FLOAT32_C( 9.00), SIMDE_FLOAT32_C( 7.80), SIMDE_FLOAT32_C( 5.90), SIMDE_FLOAT32_C( 4.06), SIMDE_FLOAT32_C( -0.70), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 6.69), SIMDE_FLOAT32_C( 6.68), SIMDE_FLOAT32_C( 6.37), SIMDE_FLOAT32_C( 3.05), SIMDE_FLOAT32_C( 4.33), SIMDE_FLOAT32_C( 6.94), SIMDE_FLOAT32_C( 6.59), SIMDE_FLOAT32_C( 9.97)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 17.60), SIMDE_FLOAT32_C( 3.68), SIMDE_FLOAT32_C( 9.51), SIMDE_FLOAT32_C( 8.85), SIMDE_FLOAT32_C( 182.52), SIMDE_FLOAT32_C( 29.00), SIMDE_FLOAT32_C( 1.26), SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( 402.16), SIMDE_FLOAT32_C( 8.11), SIMDE_FLOAT32_C( 292.03), SIMDE_FLOAT32_C( 10.58), SIMDE_FLOAT32_C( 37.98), SIMDE_FLOAT32_C( 516.39), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( 10687.75)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 3.98), SIMDE_FLOAT32_C( 9.12), SIMDE_FLOAT32_C( 1.25), SIMDE_FLOAT32_C( 8.52), SIMDE_FLOAT32_C( 7.93), SIMDE_FLOAT32_C( 5.27), SIMDE_FLOAT32_C( -0.48), SIMDE_FLOAT32_C( 5.05), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 8.67), SIMDE_FLOAT32_C( 6.39), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 1.61), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( 8.11), SIMDE_FLOAT32_C( 7.49)), UINT16_C(16804), simde_mm512_set_ps(SIMDE_FLOAT32_C( 4.65), SIMDE_FLOAT32_C( 8.30), SIMDE_FLOAT32_C( 4.38), SIMDE_FLOAT32_C( 2.03), SIMDE_FLOAT32_C( 7.28), SIMDE_FLOAT32_C( 9.07), SIMDE_FLOAT32_C( 9.88), SIMDE_FLOAT32_C( 6.32), SIMDE_FLOAT32_C( 7.05), SIMDE_FLOAT32_C( 3.52), SIMDE_FLOAT32_C( 6.29), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( 2.69), SIMDE_FLOAT32_C( 3.44), SIMDE_FLOAT32_C( 7.59), SIMDE_FLOAT32_C( 4.11)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 3.98), SIMDE_FLOAT32_C( 2011.94), SIMDE_FLOAT32_C( 1.25), SIMDE_FLOAT32_C( 8.52), SIMDE_FLOAT32_C( 7.93), SIMDE_FLOAT32_C( 5.27), SIMDE_FLOAT32_C( -0.48), SIMDE_FLOAT32_C( 277.79), SIMDE_FLOAT32_C( 576.43), SIMDE_FLOAT32_C( 8.67), SIMDE_FLOAT32_C( 269.58), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 1.61), SIMDE_FLOAT32_C( 15.61), SIMDE_FLOAT32_C( 8.11), SIMDE_FLOAT32_C( 7.49)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 2.58), SIMDE_FLOAT32_C( 2.09), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( 9.63), SIMDE_FLOAT32_C( 2.70), SIMDE_FLOAT32_C( 3.50), SIMDE_FLOAT32_C( 7.31), SIMDE_FLOAT32_C( 4.58), SIMDE_FLOAT32_C( 9.54), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( 1.34), SIMDE_FLOAT32_C( 2.64), SIMDE_FLOAT32_C( 6.12), SIMDE_FLOAT32_C( 1.33), SIMDE_FLOAT32_C( 8.41), SIMDE_FLOAT32_C( 0.34)), UINT16_C( 2107), simde_mm512_set_ps(SIMDE_FLOAT32_C( 9.43), SIMDE_FLOAT32_C( 3.41), SIMDE_FLOAT32_C( 2.52), SIMDE_FLOAT32_C( 4.31), SIMDE_FLOAT32_C( 4.54), SIMDE_FLOAT32_C( 3.81), SIMDE_FLOAT32_C( 4.72), SIMDE_FLOAT32_C( 6.67), SIMDE_FLOAT32_C( 2.82), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 9.02), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( 9.58), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 4.13), SIMDE_FLOAT32_C( 9.88)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 2.58), SIMDE_FLOAT32_C( 2.09), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( 9.63), SIMDE_FLOAT32_C( 46.85), SIMDE_FLOAT32_C( 3.50), SIMDE_FLOAT32_C( 7.31), SIMDE_FLOAT32_C( 4.58), SIMDE_FLOAT32_C( 9.54), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( 4133.39), SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( 7236.21), SIMDE_FLOAT32_C( 1.33), SIMDE_FLOAT32_C( 31.10), SIMDE_FLOAT32_C( 9767.86)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 4.41), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( 2.77), SIMDE_FLOAT32_C( 5.48), SIMDE_FLOAT32_C( 6.40), SIMDE_FLOAT32_C( 9.39), SIMDE_FLOAT32_C( 9.54), SIMDE_FLOAT32_C( 5.23), SIMDE_FLOAT32_C( 7.17), SIMDE_FLOAT32_C( 1.21), SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 5.72), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.24), SIMDE_FLOAT32_C( 6.92)), UINT16_C(22274), simde_mm512_set_ps(SIMDE_FLOAT32_C( 7.23), SIMDE_FLOAT32_C( 9.53), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 5.38), SIMDE_FLOAT32_C( -0.24), SIMDE_FLOAT32_C( 6.85), SIMDE_FLOAT32_C( 9.63), SIMDE_FLOAT32_C( 5.11), SIMDE_FLOAT32_C( 9.05), SIMDE_FLOAT32_C( 4.08), SIMDE_FLOAT32_C( 5.81), SIMDE_FLOAT32_C( 2.42), SIMDE_FLOAT32_C( 1.19), SIMDE_FLOAT32_C( 1.07), SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( 7.12)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 4.41), SIMDE_FLOAT32_C( 6883.29), SIMDE_FLOAT32_C( 2.77), SIMDE_FLOAT32_C( 108.51), SIMDE_FLOAT32_C( 6.40), SIMDE_FLOAT32_C( 471.94), SIMDE_FLOAT32_C( 7607.22), SIMDE_FLOAT32_C( 82.84), SIMDE_FLOAT32_C( 7.17), SIMDE_FLOAT32_C( 1.21), SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 5.72), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 1.24), SIMDE_FLOAT32_C( 6.92)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 9.36), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 4.11), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 5.01), SIMDE_FLOAT32_C( 5.64), SIMDE_FLOAT32_C( 9.09), SIMDE_FLOAT32_C( 4.14), SIMDE_FLOAT32_C( 2.93), SIMDE_FLOAT32_C( 3.33), SIMDE_FLOAT32_C( 2.75), SIMDE_FLOAT32_C( 8.81), SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( 7.56), SIMDE_FLOAT32_C( 6.92), SIMDE_FLOAT32_C( 6.89)), UINT16_C(27396), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.80), SIMDE_FLOAT32_C( 2.27), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 8.84), SIMDE_FLOAT32_C( 4.53), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 5.39), SIMDE_FLOAT32_C( 7.69), SIMDE_FLOAT32_C( 4.44), SIMDE_FLOAT32_C( 2.80), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 6.34), SIMDE_FLOAT32_C( 6.01), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( 1.81)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 9.36), SIMDE_FLOAT32_C( 4.89), SIMDE_FLOAT32_C( 1.09), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 3452.50), SIMDE_FLOAT32_C( 5.64), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 109.60), SIMDE_FLOAT32_C( 2.93), SIMDE_FLOAT32_C( 3.33), SIMDE_FLOAT32_C( 2.75), SIMDE_FLOAT32_C( 8.81), SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( 203.74), SIMDE_FLOAT32_C( 6.92), SIMDE_FLOAT32_C( 6.89)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 7.41), SIMDE_FLOAT32_C( 4.36), SIMDE_FLOAT32_C( -0.82), SIMDE_FLOAT32_C( 8.01), SIMDE_FLOAT32_C( 6.98), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( 5.08), SIMDE_FLOAT32_C( 7.75), SIMDE_FLOAT32_C( 4.67), SIMDE_FLOAT32_C( 7.99), SIMDE_FLOAT32_C( 4.04), SIMDE_FLOAT32_C( 4.94), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 7.84), SIMDE_FLOAT32_C( 6.67), SIMDE_FLOAT32_C( -0.39)), UINT16_C( 953), simde_mm512_set_ps(SIMDE_FLOAT32_C( 4.60), SIMDE_FLOAT32_C( 8.95), SIMDE_FLOAT32_C( 5.29), SIMDE_FLOAT32_C( 2.15), SIMDE_FLOAT32_C( 6.20), SIMDE_FLOAT32_C( 3.53), SIMDE_FLOAT32_C( 1.98), SIMDE_FLOAT32_C( 8.23), SIMDE_FLOAT32_C( 4.86), SIMDE_FLOAT32_C( 3.03), SIMDE_FLOAT32_C( 5.15), SIMDE_FLOAT32_C( 1.33), SIMDE_FLOAT32_C( 4.29), SIMDE_FLOAT32_C( 3.12), SIMDE_FLOAT32_C( 4.20), SIMDE_FLOAT32_C( 5.01)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 7.41), SIMDE_FLOAT32_C( 4.36), SIMDE_FLOAT32_C( -0.82), SIMDE_FLOAT32_C( 8.01), SIMDE_FLOAT32_C( 6.98), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( 3.69), SIMDE_FLOAT32_C( 1875.92), SIMDE_FLOAT32_C( 64.52), SIMDE_FLOAT32_C( 7.99), SIMDE_FLOAT32_C( 86.22), SIMDE_FLOAT32_C( 2.02), SIMDE_FLOAT32_C( 36.49), SIMDE_FLOAT32_C( 7.84), SIMDE_FLOAT32_C( 6.67), SIMDE_FLOAT32_C( 74.96)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 6.32), SIMDE_FLOAT32_C( 1.79), SIMDE_FLOAT32_C( 9.02), SIMDE_FLOAT32_C( 9.76), SIMDE_FLOAT32_C( 9.75), SIMDE_FLOAT32_C( 9.04), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 3.35), SIMDE_FLOAT32_C( -0.63), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 2.19), SIMDE_FLOAT32_C( 5.05), SIMDE_FLOAT32_C( 5.07), SIMDE_FLOAT32_C( 6.92), SIMDE_FLOAT32_C( 3.38)), UINT16_C(12713), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( 4.42), SIMDE_FLOAT32_C( 9.04), SIMDE_FLOAT32_C( 8.86), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( 2.79), SIMDE_FLOAT32_C( 8.51), SIMDE_FLOAT32_C( 2.57), SIMDE_FLOAT32_C( 4.83), SIMDE_FLOAT32_C( 5.10), SIMDE_FLOAT32_C( 8.44), SIMDE_FLOAT32_C( 3.12), SIMDE_FLOAT32_C( 9.69), SIMDE_FLOAT32_C( 6.49), SIMDE_FLOAT32_C( 4.43), SIMDE_FLOAT32_C( 9.19)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 6.32), SIMDE_FLOAT32_C( 4216.89), SIMDE_FLOAT32_C( 3522.24), SIMDE_FLOAT32_C( 9.76), SIMDE_FLOAT32_C( 9.75), SIMDE_FLOAT32_C( 9.04), SIMDE_FLOAT32_C( 6.57), SIMDE_FLOAT32_C( 62.61), SIMDE_FLOAT32_C( -0.63), SIMDE_FLOAT32_C( 2314.28), SIMDE_FLOAT32_C( 2.19), SIMDE_FLOAT32_C( 8077.62), SIMDE_FLOAT32_C( 5.07), SIMDE_FLOAT32_C( 6.92), SIMDE_FLOAT32_C( 4899.32)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mask_cosh_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_cosh_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.49), SIMDE_FLOAT64_C( 4.01), SIMDE_FLOAT64_C( 1.11), SIMDE_FLOAT64_C( 2.10), SIMDE_FLOAT64_C( 1.44), SIMDE_FLOAT64_C( 2.12), SIMDE_FLOAT64_C( -0.26), SIMDE_FLOAT64_C( 3.04)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 16.41), SIMDE_FLOAT64_C( 27.58), SIMDE_FLOAT64_C( 1.68), SIMDE_FLOAT64_C( 4.14), SIMDE_FLOAT64_C( 2.23), SIMDE_FLOAT64_C( 4.23), SIMDE_FLOAT64_C( 1.03), SIMDE_FLOAT64_C( 10.48)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.03), SIMDE_FLOAT64_C( -0.06), SIMDE_FLOAT64_C( 2.25), SIMDE_FLOAT64_C( 3.71), SIMDE_FLOAT64_C( 4.48), SIMDE_FLOAT64_C( 3.27), SIMDE_FLOAT64_C( 1.19), SIMDE_FLOAT64_C( 3.40)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 4.80), SIMDE_FLOAT64_C( 20.44), SIMDE_FLOAT64_C( 44.12), SIMDE_FLOAT64_C( 13.17), SIMDE_FLOAT64_C( 1.80), SIMDE_FLOAT64_C( 15.00)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 2.09), SIMDE_FLOAT64_C( 0.85), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( 1.08), SIMDE_FLOAT64_C( -0.58), SIMDE_FLOAT64_C( 0.75), SIMDE_FLOAT64_C( 4.09)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.23), SIMDE_FLOAT64_C( 4.10), SIMDE_FLOAT64_C( 1.38), SIMDE_FLOAT64_C( 1.31), SIMDE_FLOAT64_C( 1.64), SIMDE_FLOAT64_C( 1.17), SIMDE_FLOAT64_C( 1.29), SIMDE_FLOAT64_C( 29.88)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.53), SIMDE_FLOAT64_C( 0.65), SIMDE_FLOAT64_C( 2.70), SIMDE_FLOAT64_C( 4.06), SIMDE_FLOAT64_C( 2.78), SIMDE_FLOAT64_C( 1.36), SIMDE_FLOAT64_C( -0.93), SIMDE_FLOAT64_C( 0.02)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 6.32), SIMDE_FLOAT64_C( 1.22), SIMDE_FLOAT64_C( 7.47), SIMDE_FLOAT64_C( 29.00), SIMDE_FLOAT64_C( 8.09), SIMDE_FLOAT64_C( 2.08), SIMDE_FLOAT64_C( 1.46), SIMDE_FLOAT64_C( 1.00)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( 3.33), SIMDE_FLOAT64_C( 0.25), SIMDE_FLOAT64_C( 3.14), SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( 4.98), SIMDE_FLOAT64_C( 2.08), SIMDE_FLOAT64_C( 4.52)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.05), SIMDE_FLOAT64_C( 13.99), SIMDE_FLOAT64_C( 1.03), SIMDE_FLOAT64_C( 11.57), SIMDE_FLOAT64_C( 1.05), SIMDE_FLOAT64_C( 72.74), SIMDE_FLOAT64_C( 4.06), SIMDE_FLOAT64_C( 45.92)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.84), SIMDE_FLOAT64_C( 3.19), SIMDE_FLOAT64_C( 3.97), SIMDE_FLOAT64_C( 3.02), SIMDE_FLOAT64_C( 3.60), SIMDE_FLOAT64_C( 1.21), SIMDE_FLOAT64_C( 4.34), SIMDE_FLOAT64_C( 1.91)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.37), SIMDE_FLOAT64_C( 12.16), SIMDE_FLOAT64_C( 26.50), SIMDE_FLOAT64_C( 10.27), SIMDE_FLOAT64_C( 18.31), SIMDE_FLOAT64_C( 1.83), SIMDE_FLOAT64_C( 38.36), SIMDE_FLOAT64_C( 3.45)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.39), SIMDE_FLOAT64_C( 1.76), SIMDE_FLOAT64_C( 3.01), SIMDE_FLOAT64_C( -0.83), SIMDE_FLOAT64_C( -0.24), SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( 3.20)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.13), SIMDE_FLOAT64_C( 2.99), SIMDE_FLOAT64_C( 10.17), SIMDE_FLOAT64_C( 1.36), SIMDE_FLOAT64_C( 1.03), SIMDE_FLOAT64_C( 1.05), SIMDE_FLOAT64_C( 1.06), SIMDE_FLOAT64_C( 12.29)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.41), SIMDE_FLOAT64_C( 4.04), SIMDE_FLOAT64_C( 1.55), SIMDE_FLOAT64_C( 4.46), SIMDE_FLOAT64_C( 4.73), SIMDE_FLOAT64_C( 3.80), SIMDE_FLOAT64_C( 4.37), SIMDE_FLOAT64_C( 2.76)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 15.15), SIMDE_FLOAT64_C( 28.42), SIMDE_FLOAT64_C( 2.46), SIMDE_FLOAT64_C( 43.25), SIMDE_FLOAT64_C( 56.65), SIMDE_FLOAT64_C( 22.36), SIMDE_FLOAT64_C( 39.53), SIMDE_FLOAT64_C( 7.93)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_cosh_pd(test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_cosh_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d src; simde__mmask8 k; simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.06), SIMDE_FLOAT64_C( 3.71), SIMDE_FLOAT64_C( 3.27), SIMDE_FLOAT64_C( 3.40), SIMDE_FLOAT64_C( 4.01), SIMDE_FLOAT64_C( 2.10), SIMDE_FLOAT64_C( 2.12), SIMDE_FLOAT64_C( 3.04)), UINT8_C(139), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.03), SIMDE_FLOAT64_C( 2.25), SIMDE_FLOAT64_C( 4.48), SIMDE_FLOAT64_C( 1.19), SIMDE_FLOAT64_C( 3.49), SIMDE_FLOAT64_C( 1.11), SIMDE_FLOAT64_C( 1.44), SIMDE_FLOAT64_C( -0.26)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 3.71), SIMDE_FLOAT64_C( 3.27), SIMDE_FLOAT64_C( 3.40), SIMDE_FLOAT64_C( 16.41), SIMDE_FLOAT64_C( 2.10), SIMDE_FLOAT64_C( 2.23), SIMDE_FLOAT64_C( 1.03)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.53), SIMDE_FLOAT64_C( 2.70), SIMDE_FLOAT64_C( 2.78), SIMDE_FLOAT64_C( -0.93), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 0.85), SIMDE_FLOAT64_C( 1.08), SIMDE_FLOAT64_C( 0.75)), UINT8_C(229), simde_mm512_set_pd(SIMDE_FLOAT64_C( 4.52), SIMDE_FLOAT64_C( 0.65), SIMDE_FLOAT64_C( 4.06), SIMDE_FLOAT64_C( 1.36), SIMDE_FLOAT64_C( 0.02), SIMDE_FLOAT64_C( 2.09), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( -0.58)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 45.92), SIMDE_FLOAT64_C( 1.22), SIMDE_FLOAT64_C( 29.00), SIMDE_FLOAT64_C( -0.93), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 4.10), SIMDE_FLOAT64_C( 1.08), SIMDE_FLOAT64_C( 1.17)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.20), SIMDE_FLOAT64_C( 3.19), SIMDE_FLOAT64_C( 3.02), SIMDE_FLOAT64_C( 1.21), SIMDE_FLOAT64_C( 1.91), SIMDE_FLOAT64_C( 3.33), SIMDE_FLOAT64_C( 3.14), SIMDE_FLOAT64_C( 4.98)), UINT8_C(253), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( 0.84), SIMDE_FLOAT64_C( 3.97), SIMDE_FLOAT64_C( 3.60), SIMDE_FLOAT64_C( 4.34), SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( 0.25), SIMDE_FLOAT64_C( -0.31)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.06), SIMDE_FLOAT64_C( 1.37), SIMDE_FLOAT64_C( 26.50), SIMDE_FLOAT64_C( 18.31), SIMDE_FLOAT64_C( 38.36), SIMDE_FLOAT64_C( 1.05), SIMDE_FLOAT64_C( 3.14), SIMDE_FLOAT64_C( 1.05)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.36), SIMDE_FLOAT64_C( 3.41), SIMDE_FLOAT64_C( 1.55), SIMDE_FLOAT64_C( 4.73), SIMDE_FLOAT64_C( 4.37), SIMDE_FLOAT64_C( 1.39), SIMDE_FLOAT64_C( 3.01), SIMDE_FLOAT64_C( -0.24)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.63), SIMDE_FLOAT64_C( 1.49), SIMDE_FLOAT64_C( 4.04), SIMDE_FLOAT64_C( 4.46), SIMDE_FLOAT64_C( 3.80), SIMDE_FLOAT64_C( 2.76), SIMDE_FLOAT64_C( 1.76), SIMDE_FLOAT64_C( -0.83)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.36), SIMDE_FLOAT64_C( 2.33), SIMDE_FLOAT64_C( 1.55), SIMDE_FLOAT64_C( 43.25), SIMDE_FLOAT64_C( 22.36), SIMDE_FLOAT64_C( 7.93), SIMDE_FLOAT64_C( 3.01), SIMDE_FLOAT64_C( 1.36)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.30), SIMDE_FLOAT64_C( -0.21), SIMDE_FLOAT64_C( 4.28), SIMDE_FLOAT64_C( 3.03), SIMDE_FLOAT64_C( -0.39), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( -0.47), SIMDE_FLOAT64_C( 3.97)), UINT8_C(145), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.99), SIMDE_FLOAT64_C( 3.39), SIMDE_FLOAT64_C( 1.46), SIMDE_FLOAT64_C( 2.97), SIMDE_FLOAT64_C( -0.62), SIMDE_FLOAT64_C( 1.01), SIMDE_FLOAT64_C( 1.42), SIMDE_FLOAT64_C( 3.68)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 9.97), SIMDE_FLOAT64_C( -0.21), SIMDE_FLOAT64_C( 4.28), SIMDE_FLOAT64_C( 9.77), SIMDE_FLOAT64_C( -0.39), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( -0.47), SIMDE_FLOAT64_C( 19.84)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 2.08), SIMDE_FLOAT64_C( 4.07), SIMDE_FLOAT64_C( 1.94), SIMDE_FLOAT64_C( 0.65), SIMDE_FLOAT64_C( 3.52), SIMDE_FLOAT64_C( 4.49), SIMDE_FLOAT64_C( 4.93)), UINT8_C( 75), simde_mm512_set_pd(SIMDE_FLOAT64_C( 4.93), SIMDE_FLOAT64_C( 3.27), SIMDE_FLOAT64_C( 1.71), SIMDE_FLOAT64_C( 4.52), SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 4.19), SIMDE_FLOAT64_C( 3.87), SIMDE_FLOAT64_C( 2.42)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 13.17), SIMDE_FLOAT64_C( 4.07), SIMDE_FLOAT64_C( 1.94), SIMDE_FLOAT64_C( 1.03), SIMDE_FLOAT64_C( 3.52), SIMDE_FLOAT64_C( 23.98), SIMDE_FLOAT64_C( 5.67)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.18), SIMDE_FLOAT64_C( 1.09), SIMDE_FLOAT64_C( -0.09), SIMDE_FLOAT64_C( 4.47), SIMDE_FLOAT64_C( -0.99), SIMDE_FLOAT64_C( 4.77), SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( 1.80)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.53), SIMDE_FLOAT64_C( 2.04), SIMDE_FLOAT64_C( 4.75), SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( 0.28), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( 2.88), SIMDE_FLOAT64_C( 0.27)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.18), SIMDE_FLOAT64_C( 3.91), SIMDE_FLOAT64_C( -0.09), SIMDE_FLOAT64_C( 1.05), SIMDE_FLOAT64_C( 1.04), SIMDE_FLOAT64_C( 1.53), SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( 1.04)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.43), SIMDE_FLOAT64_C( 4.81), SIMDE_FLOAT64_C( 0.95), SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( -0.26), SIMDE_FLOAT64_C( 4.80), SIMDE_FLOAT64_C( 1.02), SIMDE_FLOAT64_C( 1.45)), UINT8_C(213), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.33), SIMDE_FLOAT64_C( 3.32), SIMDE_FLOAT64_C( 4.69), SIMDE_FLOAT64_C( 1.41), SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( 1.90), SIMDE_FLOAT64_C( 2.02), SIMDE_FLOAT64_C( 1.62)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.05), SIMDE_FLOAT64_C( 13.85), SIMDE_FLOAT64_C( 0.95), SIMDE_FLOAT64_C( 2.17), SIMDE_FLOAT64_C( -0.26), SIMDE_FLOAT64_C( 3.42), SIMDE_FLOAT64_C( 1.02), SIMDE_FLOAT64_C( 2.63)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mask_cosh_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_x_mm_deg2rad_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 954.59), SIMDE_FLOAT32_C( -212.53), SIMDE_FLOAT32_C( -73.32), SIMDE_FLOAT32_C( -280.66) }, { SIMDE_FLOAT32_C( 16.66), SIMDE_FLOAT32_C( -3.71), SIMDE_FLOAT32_C( -1.28), SIMDE_FLOAT32_C( -4.90) } }, { { SIMDE_FLOAT32_C( 908.48), SIMDE_FLOAT32_C( 789.59), SIMDE_FLOAT32_C( 675.09), SIMDE_FLOAT32_C( 164.25) }, { SIMDE_FLOAT32_C( 15.86), SIMDE_FLOAT32_C( 13.78), SIMDE_FLOAT32_C( 11.78), SIMDE_FLOAT32_C( 2.87) } }, { { SIMDE_FLOAT32_C( 515.80), SIMDE_FLOAT32_C( -965.27), SIMDE_FLOAT32_C( 659.44), SIMDE_FLOAT32_C( -806.83) }, { SIMDE_FLOAT32_C( 9.00), SIMDE_FLOAT32_C( -16.85), SIMDE_FLOAT32_C( 11.51), SIMDE_FLOAT32_C( -14.08) } }, { { SIMDE_FLOAT32_C( -402.30), SIMDE_FLOAT32_C( 576.73), SIMDE_FLOAT32_C( -978.47), SIMDE_FLOAT32_C( 782.95) }, { SIMDE_FLOAT32_C( -7.02), SIMDE_FLOAT32_C( 10.07), SIMDE_FLOAT32_C( -17.08), SIMDE_FLOAT32_C( 13.67) } }, { { SIMDE_FLOAT32_C( -948.47), SIMDE_FLOAT32_C( 987.01), SIMDE_FLOAT32_C( 630.41), SIMDE_FLOAT32_C( -637.23) }, { SIMDE_FLOAT32_C( -16.55), SIMDE_FLOAT32_C( 17.23), SIMDE_FLOAT32_C( 11.00), SIMDE_FLOAT32_C( -11.12) } }, { { SIMDE_FLOAT32_C( 66.92), SIMDE_FLOAT32_C( 674.00), SIMDE_FLOAT32_C( -52.88), SIMDE_FLOAT32_C( -732.15) }, { SIMDE_FLOAT32_C( 1.17), SIMDE_FLOAT32_C( 11.76), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -12.78) } }, { { SIMDE_FLOAT32_C( 750.47), SIMDE_FLOAT32_C( -906.63), SIMDE_FLOAT32_C( 205.33), SIMDE_FLOAT32_C( -941.95) }, { SIMDE_FLOAT32_C( 13.10), SIMDE_FLOAT32_C( -15.82), SIMDE_FLOAT32_C( 3.58), SIMDE_FLOAT32_C( -16.44) } }, { { SIMDE_FLOAT32_C( 705.35), SIMDE_FLOAT32_C( 774.66), SIMDE_FLOAT32_C( -289.06), SIMDE_FLOAT32_C( -214.64) }, { SIMDE_FLOAT32_C( 12.31), SIMDE_FLOAT32_C( 13.52), SIMDE_FLOAT32_C( -5.05), SIMDE_FLOAT32_C( -3.75) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_x_mm_deg2rad_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_x_mm_deg2rad_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -666.18), SIMDE_FLOAT64_C( -415.97) }, { SIMDE_FLOAT64_C( -11.63), SIMDE_FLOAT64_C( -7.26) } }, { { SIMDE_FLOAT64_C( 793.43), SIMDE_FLOAT64_C( -853.65) }, { SIMDE_FLOAT64_C( 13.85), SIMDE_FLOAT64_C( -14.90) } }, { { SIMDE_FLOAT64_C( 738.56), SIMDE_FLOAT64_C( 967.23) }, { SIMDE_FLOAT64_C( 12.89), SIMDE_FLOAT64_C( 16.88) } }, { { SIMDE_FLOAT64_C( 309.17), SIMDE_FLOAT64_C( 265.53) }, { SIMDE_FLOAT64_C( 5.40), SIMDE_FLOAT64_C( 4.63) } }, { { SIMDE_FLOAT64_C( 844.47), SIMDE_FLOAT64_C( 938.60) }, { SIMDE_FLOAT64_C( 14.74), SIMDE_FLOAT64_C( 16.38) } }, { { SIMDE_FLOAT64_C( -902.86), SIMDE_FLOAT64_C( -334.71) }, { SIMDE_FLOAT64_C( -15.76), SIMDE_FLOAT64_C( -5.84) } }, { { SIMDE_FLOAT64_C( 582.46), SIMDE_FLOAT64_C( -651.74) }, { SIMDE_FLOAT64_C( 10.17), SIMDE_FLOAT64_C( -11.38) } }, { { SIMDE_FLOAT64_C( 196.36), SIMDE_FLOAT64_C( 200.15) }, { SIMDE_FLOAT64_C( 3.43), SIMDE_FLOAT64_C( 3.49) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d r = simde_x_mm_deg2rad_pd(a); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_x_mm256_deg2rad_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 442.73), SIMDE_FLOAT32_C( -968.72), SIMDE_FLOAT32_C( 679.13), SIMDE_FLOAT32_C( 114.21), SIMDE_FLOAT32_C( -467.66), SIMDE_FLOAT32_C( -37.81), SIMDE_FLOAT32_C( 579.12), SIMDE_FLOAT32_C( -687.98) }, { SIMDE_FLOAT32_C( 7.73), SIMDE_FLOAT32_C( -16.91), SIMDE_FLOAT32_C( 11.85), SIMDE_FLOAT32_C( 1.99), SIMDE_FLOAT32_C( -8.16), SIMDE_FLOAT32_C( -0.66), SIMDE_FLOAT32_C( 10.11), SIMDE_FLOAT32_C( -12.01) } }, { { SIMDE_FLOAT32_C( -896.03), SIMDE_FLOAT32_C( 496.82), SIMDE_FLOAT32_C( 46.75), SIMDE_FLOAT32_C( -189.63), SIMDE_FLOAT32_C( 888.19), SIMDE_FLOAT32_C( -178.85), SIMDE_FLOAT32_C( 106.49), SIMDE_FLOAT32_C( -266.59) }, { SIMDE_FLOAT32_C( -15.64), SIMDE_FLOAT32_C( 8.67), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( -3.31), SIMDE_FLOAT32_C( 15.50), SIMDE_FLOAT32_C( -3.12), SIMDE_FLOAT32_C( 1.86), SIMDE_FLOAT32_C( -4.65) } }, { { SIMDE_FLOAT32_C( -577.36), SIMDE_FLOAT32_C( 319.48), SIMDE_FLOAT32_C( -568.91), SIMDE_FLOAT32_C( 369.60), SIMDE_FLOAT32_C( -195.78), SIMDE_FLOAT32_C( -445.13), SIMDE_FLOAT32_C( 676.76), SIMDE_FLOAT32_C( 270.74) }, { SIMDE_FLOAT32_C( -10.08), SIMDE_FLOAT32_C( 5.58), SIMDE_FLOAT32_C( -9.93), SIMDE_FLOAT32_C( 6.45), SIMDE_FLOAT32_C( -3.42), SIMDE_FLOAT32_C( -7.77), SIMDE_FLOAT32_C( 11.81), SIMDE_FLOAT32_C( 4.73) } }, { { SIMDE_FLOAT32_C( 386.69), SIMDE_FLOAT32_C( -818.31), SIMDE_FLOAT32_C( 697.61), SIMDE_FLOAT32_C( 731.13), SIMDE_FLOAT32_C( 89.36), SIMDE_FLOAT32_C( -163.03), SIMDE_FLOAT32_C( 9.17), SIMDE_FLOAT32_C( 76.19) }, { SIMDE_FLOAT32_C( 6.75), SIMDE_FLOAT32_C( -14.28), SIMDE_FLOAT32_C( 12.18), SIMDE_FLOAT32_C( 12.76), SIMDE_FLOAT32_C( 1.56), SIMDE_FLOAT32_C( -2.85), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 1.33) } }, { { SIMDE_FLOAT32_C( 522.23), SIMDE_FLOAT32_C( -876.19), SIMDE_FLOAT32_C( -206.90), SIMDE_FLOAT32_C( 647.79), SIMDE_FLOAT32_C( -633.72), SIMDE_FLOAT32_C( -908.37), SIMDE_FLOAT32_C( 944.64), SIMDE_FLOAT32_C( 520.31) }, { SIMDE_FLOAT32_C( 9.11), SIMDE_FLOAT32_C( -15.29), SIMDE_FLOAT32_C( -3.61), SIMDE_FLOAT32_C( 11.31), SIMDE_FLOAT32_C( -11.06), SIMDE_FLOAT32_C( -15.85), SIMDE_FLOAT32_C( 16.49), SIMDE_FLOAT32_C( 9.08) } }, { { SIMDE_FLOAT32_C( 907.89), SIMDE_FLOAT32_C( 849.63), SIMDE_FLOAT32_C( -208.12), SIMDE_FLOAT32_C( 68.74), SIMDE_FLOAT32_C( -670.75), SIMDE_FLOAT32_C( 677.18), SIMDE_FLOAT32_C( -644.75), SIMDE_FLOAT32_C( -292.10) }, { SIMDE_FLOAT32_C( 15.85), SIMDE_FLOAT32_C( 14.83), SIMDE_FLOAT32_C( -3.63), SIMDE_FLOAT32_C( 1.20), SIMDE_FLOAT32_C( -11.71), SIMDE_FLOAT32_C( 11.82), SIMDE_FLOAT32_C( -11.25), SIMDE_FLOAT32_C( -5.10) } }, { { SIMDE_FLOAT32_C( 675.40), SIMDE_FLOAT32_C( -616.47), SIMDE_FLOAT32_C( 962.11), SIMDE_FLOAT32_C( 134.41), SIMDE_FLOAT32_C( -905.98), SIMDE_FLOAT32_C( -860.48), SIMDE_FLOAT32_C( -24.28), SIMDE_FLOAT32_C( -121.44) }, { SIMDE_FLOAT32_C( 11.79), SIMDE_FLOAT32_C( -10.76), SIMDE_FLOAT32_C( 16.79), SIMDE_FLOAT32_C( 2.35), SIMDE_FLOAT32_C( -15.81), SIMDE_FLOAT32_C( -15.02), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( -2.12) } }, { { SIMDE_FLOAT32_C( -960.63), SIMDE_FLOAT32_C( 687.26), SIMDE_FLOAT32_C( 788.74), SIMDE_FLOAT32_C( 386.45), SIMDE_FLOAT32_C( -901.72), SIMDE_FLOAT32_C( 856.65), SIMDE_FLOAT32_C( -345.73), SIMDE_FLOAT32_C( -616.97) }, { SIMDE_FLOAT32_C( -16.77), SIMDE_FLOAT32_C( 11.99), SIMDE_FLOAT32_C( 13.77), SIMDE_FLOAT32_C( 6.74), SIMDE_FLOAT32_C( -15.74), SIMDE_FLOAT32_C( 14.95), SIMDE_FLOAT32_C( -6.03), SIMDE_FLOAT32_C( -10.77) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 r = simde_x_mm256_deg2rad_ps(a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_x_mm256_deg2rad_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( -467.83), SIMDE_FLOAT64_C( -838.03), SIMDE_FLOAT64_C( -852.25), SIMDE_FLOAT64_C( 261.37) }, { SIMDE_FLOAT64_C( -8.17), SIMDE_FLOAT64_C( -14.63), SIMDE_FLOAT64_C( -14.87), SIMDE_FLOAT64_C( 4.56) } }, { { SIMDE_FLOAT64_C( 838.67), SIMDE_FLOAT64_C( -424.12), SIMDE_FLOAT64_C( -236.36), SIMDE_FLOAT64_C( -471.04) }, { SIMDE_FLOAT64_C( 14.64), SIMDE_FLOAT64_C( -7.40), SIMDE_FLOAT64_C( -4.13), SIMDE_FLOAT64_C( -8.22) } }, { { SIMDE_FLOAT64_C( -834.32), SIMDE_FLOAT64_C( -357.08), SIMDE_FLOAT64_C( 596.48), SIMDE_FLOAT64_C( 991.10) }, { SIMDE_FLOAT64_C( -14.56), SIMDE_FLOAT64_C( -6.23), SIMDE_FLOAT64_C( 10.41), SIMDE_FLOAT64_C( 17.30) } }, { { SIMDE_FLOAT64_C( -638.79), SIMDE_FLOAT64_C( -95.57), SIMDE_FLOAT64_C( -262.62), SIMDE_FLOAT64_C( 117.35) }, { SIMDE_FLOAT64_C( -11.15), SIMDE_FLOAT64_C( -1.67), SIMDE_FLOAT64_C( -4.58), SIMDE_FLOAT64_C( 2.05) } }, { { SIMDE_FLOAT64_C( 253.25), SIMDE_FLOAT64_C( 332.14), SIMDE_FLOAT64_C( 311.92), SIMDE_FLOAT64_C( 451.40) }, { SIMDE_FLOAT64_C( 4.42), SIMDE_FLOAT64_C( 5.80), SIMDE_FLOAT64_C( 5.44), SIMDE_FLOAT64_C( 7.88) } }, { { SIMDE_FLOAT64_C( 635.16), SIMDE_FLOAT64_C( -795.05), SIMDE_FLOAT64_C( -458.24), SIMDE_FLOAT64_C( 422.17) }, { SIMDE_FLOAT64_C( 11.09), SIMDE_FLOAT64_C( -13.88), SIMDE_FLOAT64_C( -8.00), SIMDE_FLOAT64_C( 7.37) } }, { { SIMDE_FLOAT64_C( -505.84), SIMDE_FLOAT64_C( 400.55), SIMDE_FLOAT64_C( 54.12), SIMDE_FLOAT64_C( -409.93) }, { SIMDE_FLOAT64_C( -8.83), SIMDE_FLOAT64_C( 6.99), SIMDE_FLOAT64_C( 0.94), SIMDE_FLOAT64_C( -7.15) } }, { { SIMDE_FLOAT64_C( 241.03), SIMDE_FLOAT64_C( -950.08), SIMDE_FLOAT64_C( 5.55), SIMDE_FLOAT64_C( -683.44) }, { SIMDE_FLOAT64_C( 4.21), SIMDE_FLOAT64_C( -16.58), SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( -11.93) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d r = simde_x_mm256_deg2rad_pd(a); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_x_mm512_deg2rad_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( -204.97), SIMDE_FLOAT32_C( -943.14), SIMDE_FLOAT32_C( 662.36), SIMDE_FLOAT32_C( 286.89), SIMDE_FLOAT32_C( -272.57), SIMDE_FLOAT32_C( 978.11), SIMDE_FLOAT32_C( -911.94), SIMDE_FLOAT32_C( -924.18), SIMDE_FLOAT32_C( -626.92), SIMDE_FLOAT32_C( -721.73), SIMDE_FLOAT32_C( -41.73), SIMDE_FLOAT32_C( 615.09), SIMDE_FLOAT32_C( -253.85), SIMDE_FLOAT32_C( -484.20), SIMDE_FLOAT32_C( 130.81), SIMDE_FLOAT32_C( 548.86) }, { SIMDE_FLOAT32_C( -3.58), SIMDE_FLOAT32_C( -16.46), SIMDE_FLOAT32_C( 11.56), SIMDE_FLOAT32_C( 5.01), SIMDE_FLOAT32_C( -4.76), SIMDE_FLOAT32_C( 17.07), SIMDE_FLOAT32_C( -15.92), SIMDE_FLOAT32_C( -16.13), SIMDE_FLOAT32_C( -10.94), SIMDE_FLOAT32_C( -12.60), SIMDE_FLOAT32_C( -0.73), SIMDE_FLOAT32_C( 10.74), SIMDE_FLOAT32_C( -4.43), SIMDE_FLOAT32_C( -8.45), SIMDE_FLOAT32_C( 2.28), SIMDE_FLOAT32_C( 9.58) } }, { { SIMDE_FLOAT32_C( 759.71), SIMDE_FLOAT32_C( 445.37), SIMDE_FLOAT32_C( -639.90), SIMDE_FLOAT32_C( -816.54), SIMDE_FLOAT32_C( 349.70), SIMDE_FLOAT32_C( -526.35), SIMDE_FLOAT32_C( -291.02), SIMDE_FLOAT32_C( 855.10), SIMDE_FLOAT32_C( -382.23), SIMDE_FLOAT32_C( -58.28), SIMDE_FLOAT32_C( 435.56), SIMDE_FLOAT32_C( 388.92), SIMDE_FLOAT32_C( 616.34), SIMDE_FLOAT32_C( 879.74), SIMDE_FLOAT32_C( -205.65), SIMDE_FLOAT32_C( -284.03) }, { SIMDE_FLOAT32_C( 13.26), SIMDE_FLOAT32_C( 7.77), SIMDE_FLOAT32_C( -11.17), SIMDE_FLOAT32_C( -14.25), SIMDE_FLOAT32_C( 6.10), SIMDE_FLOAT32_C( -9.19), SIMDE_FLOAT32_C( -5.08), SIMDE_FLOAT32_C( 14.92), SIMDE_FLOAT32_C( -6.67), SIMDE_FLOAT32_C( -1.02), SIMDE_FLOAT32_C( 7.60), SIMDE_FLOAT32_C( 6.79), SIMDE_FLOAT32_C( 10.76), SIMDE_FLOAT32_C( 15.35), SIMDE_FLOAT32_C( -3.59), SIMDE_FLOAT32_C( -4.96) } }, { { SIMDE_FLOAT32_C( 252.00), SIMDE_FLOAT32_C( -672.50), SIMDE_FLOAT32_C( -750.03), SIMDE_FLOAT32_C( 219.53), SIMDE_FLOAT32_C( -348.40), SIMDE_FLOAT32_C( 510.16), SIMDE_FLOAT32_C( 308.72), SIMDE_FLOAT32_C( 669.84), SIMDE_FLOAT32_C( 1.09), SIMDE_FLOAT32_C( 327.67), SIMDE_FLOAT32_C( -780.79), SIMDE_FLOAT32_C( -790.56), SIMDE_FLOAT32_C( 999.19), SIMDE_FLOAT32_C( -674.94), SIMDE_FLOAT32_C( 338.16), SIMDE_FLOAT32_C( -623.42) }, { SIMDE_FLOAT32_C( 4.40), SIMDE_FLOAT32_C( -11.74), SIMDE_FLOAT32_C( -13.09), SIMDE_FLOAT32_C( 3.83), SIMDE_FLOAT32_C( -6.08), SIMDE_FLOAT32_C( 8.90), SIMDE_FLOAT32_C( 5.39), SIMDE_FLOAT32_C( 11.69), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 5.72), SIMDE_FLOAT32_C( -13.63), SIMDE_FLOAT32_C( -13.80), SIMDE_FLOAT32_C( 17.44), SIMDE_FLOAT32_C( -11.78), SIMDE_FLOAT32_C( 5.90), SIMDE_FLOAT32_C( -10.88) } }, { { SIMDE_FLOAT32_C( 210.99), SIMDE_FLOAT32_C( 133.74), SIMDE_FLOAT32_C( -196.68), SIMDE_FLOAT32_C( 412.53), SIMDE_FLOAT32_C( -531.14), SIMDE_FLOAT32_C( -816.95), SIMDE_FLOAT32_C( -550.15), SIMDE_FLOAT32_C( -344.98), SIMDE_FLOAT32_C( -32.75), SIMDE_FLOAT32_C( -439.61), SIMDE_FLOAT32_C( -503.00), SIMDE_FLOAT32_C( 19.70), SIMDE_FLOAT32_C( -850.81), SIMDE_FLOAT32_C( 392.70), SIMDE_FLOAT32_C( 36.21), SIMDE_FLOAT32_C( 667.59) }, { SIMDE_FLOAT32_C( 3.68), SIMDE_FLOAT32_C( 2.33), SIMDE_FLOAT32_C( -3.43), SIMDE_FLOAT32_C( 7.20), SIMDE_FLOAT32_C( -9.27), SIMDE_FLOAT32_C( -14.26), SIMDE_FLOAT32_C( -9.60), SIMDE_FLOAT32_C( -6.02), SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( -7.67), SIMDE_FLOAT32_C( -8.78), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -14.85), SIMDE_FLOAT32_C( 6.85), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( 11.65) } }, { { SIMDE_FLOAT32_C( 226.81), SIMDE_FLOAT32_C( -68.31), SIMDE_FLOAT32_C( -92.58), SIMDE_FLOAT32_C( 1.70), SIMDE_FLOAT32_C( 617.13), SIMDE_FLOAT32_C( 53.88), SIMDE_FLOAT32_C( -383.79), SIMDE_FLOAT32_C( -333.97), SIMDE_FLOAT32_C( 936.36), SIMDE_FLOAT32_C( -516.23), SIMDE_FLOAT32_C( -313.77), SIMDE_FLOAT32_C( 516.09), SIMDE_FLOAT32_C( -12.76), SIMDE_FLOAT32_C( -491.30), SIMDE_FLOAT32_C( 729.84), SIMDE_FLOAT32_C( 483.88) }, { SIMDE_FLOAT32_C( 3.96), SIMDE_FLOAT32_C( -1.19), SIMDE_FLOAT32_C( -1.62), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 10.77), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( -6.70), SIMDE_FLOAT32_C( -5.83), SIMDE_FLOAT32_C( 16.34), SIMDE_FLOAT32_C( -9.01), SIMDE_FLOAT32_C( -5.48), SIMDE_FLOAT32_C( 9.01), SIMDE_FLOAT32_C( -0.22), SIMDE_FLOAT32_C( -8.57), SIMDE_FLOAT32_C( 12.74), SIMDE_FLOAT32_C( 8.45) } }, { { SIMDE_FLOAT32_C( 619.03), SIMDE_FLOAT32_C( -43.28), SIMDE_FLOAT32_C( 522.00), SIMDE_FLOAT32_C( -713.37), SIMDE_FLOAT32_C( 394.03), SIMDE_FLOAT32_C( 425.58), SIMDE_FLOAT32_C( 710.40), SIMDE_FLOAT32_C( -291.67), SIMDE_FLOAT32_C( -116.91), SIMDE_FLOAT32_C( -890.48), SIMDE_FLOAT32_C( -316.42), SIMDE_FLOAT32_C( -26.59), SIMDE_FLOAT32_C( -918.69), SIMDE_FLOAT32_C( -397.83), SIMDE_FLOAT32_C( -284.98), SIMDE_FLOAT32_C( 339.56) }, { SIMDE_FLOAT32_C( 10.80), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 9.11), SIMDE_FLOAT32_C( -12.45), SIMDE_FLOAT32_C( 6.88), SIMDE_FLOAT32_C( 7.43), SIMDE_FLOAT32_C( 12.40), SIMDE_FLOAT32_C( -5.09), SIMDE_FLOAT32_C( -2.04), SIMDE_FLOAT32_C( -15.54), SIMDE_FLOAT32_C( -5.52), SIMDE_FLOAT32_C( -0.46), SIMDE_FLOAT32_C( -16.03), SIMDE_FLOAT32_C( -6.94), SIMDE_FLOAT32_C( -4.97), SIMDE_FLOAT32_C( 5.93) } }, { { SIMDE_FLOAT32_C( -935.68), SIMDE_FLOAT32_C( 109.78), SIMDE_FLOAT32_C( -972.99), SIMDE_FLOAT32_C( 894.31), SIMDE_FLOAT32_C( 633.79), SIMDE_FLOAT32_C( 41.84), SIMDE_FLOAT32_C( -852.93), SIMDE_FLOAT32_C( 776.08), SIMDE_FLOAT32_C( -443.88), SIMDE_FLOAT32_C( -301.71), SIMDE_FLOAT32_C( -808.76), SIMDE_FLOAT32_C( -785.15), SIMDE_FLOAT32_C( -67.76), SIMDE_FLOAT32_C( -895.91), SIMDE_FLOAT32_C( 478.10), SIMDE_FLOAT32_C( -636.03) }, { SIMDE_FLOAT32_C( -16.33), SIMDE_FLOAT32_C( 1.92), SIMDE_FLOAT32_C( -16.98), SIMDE_FLOAT32_C( 15.61), SIMDE_FLOAT32_C( 11.06), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( -14.89), SIMDE_FLOAT32_C( 13.55), SIMDE_FLOAT32_C( -7.75), SIMDE_FLOAT32_C( -5.27), SIMDE_FLOAT32_C( -14.12), SIMDE_FLOAT32_C( -13.70), SIMDE_FLOAT32_C( -1.18), SIMDE_FLOAT32_C( -15.64), SIMDE_FLOAT32_C( 8.34), SIMDE_FLOAT32_C( -11.10) } }, { { SIMDE_FLOAT32_C( 320.10), SIMDE_FLOAT32_C( 3.69), SIMDE_FLOAT32_C( -21.63), SIMDE_FLOAT32_C( 500.34), SIMDE_FLOAT32_C( -733.82), SIMDE_FLOAT32_C( 741.17), SIMDE_FLOAT32_C( 921.80), SIMDE_FLOAT32_C( 676.47), SIMDE_FLOAT32_C( -545.48), SIMDE_FLOAT32_C( 136.48), SIMDE_FLOAT32_C( -243.90), SIMDE_FLOAT32_C( 744.83), SIMDE_FLOAT32_C( 297.50), SIMDE_FLOAT32_C( 109.44), SIMDE_FLOAT32_C( -667.13), SIMDE_FLOAT32_C( -475.76) }, { SIMDE_FLOAT32_C( 5.59), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( 8.73), SIMDE_FLOAT32_C( -12.81), SIMDE_FLOAT32_C( 12.94), SIMDE_FLOAT32_C( 16.09), SIMDE_FLOAT32_C( 11.81), SIMDE_FLOAT32_C( -9.52), SIMDE_FLOAT32_C( 2.38), SIMDE_FLOAT32_C( -4.26), SIMDE_FLOAT32_C( 13.00), SIMDE_FLOAT32_C( 5.19), SIMDE_FLOAT32_C( 1.91), SIMDE_FLOAT32_C( -11.64), SIMDE_FLOAT32_C( -8.30) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_x_mm512_deg2rad_ps(a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_x_mm512_deg2rad_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 984.73), SIMDE_FLOAT64_C( 383.87), SIMDE_FLOAT64_C( -286.43), SIMDE_FLOAT64_C( 18.78), SIMDE_FLOAT64_C( -399.99), SIMDE_FLOAT64_C( -675.58), SIMDE_FLOAT64_C( -438.55), SIMDE_FLOAT64_C( -737.71) }, { SIMDE_FLOAT64_C( 17.19), SIMDE_FLOAT64_C( 6.70), SIMDE_FLOAT64_C( -5.00), SIMDE_FLOAT64_C( 0.33), SIMDE_FLOAT64_C( -6.98), SIMDE_FLOAT64_C( -11.79), SIMDE_FLOAT64_C( -7.65), SIMDE_FLOAT64_C( -12.88) } }, { { SIMDE_FLOAT64_C( -671.93), SIMDE_FLOAT64_C( 826.99), SIMDE_FLOAT64_C( -830.65), SIMDE_FLOAT64_C( -694.10), SIMDE_FLOAT64_C( 255.50), SIMDE_FLOAT64_C( 118.40), SIMDE_FLOAT64_C( -39.28), SIMDE_FLOAT64_C( -160.67) }, { SIMDE_FLOAT64_C( -11.73), SIMDE_FLOAT64_C( 14.43), SIMDE_FLOAT64_C( -14.50), SIMDE_FLOAT64_C( -12.11), SIMDE_FLOAT64_C( 4.46), SIMDE_FLOAT64_C( 2.07), SIMDE_FLOAT64_C( -0.69), SIMDE_FLOAT64_C( -2.80) } }, { { SIMDE_FLOAT64_C( -422.40), SIMDE_FLOAT64_C( 720.88), SIMDE_FLOAT64_C( -179.50), SIMDE_FLOAT64_C( -877.62), SIMDE_FLOAT64_C( -132.27), SIMDE_FLOAT64_C( 998.68), SIMDE_FLOAT64_C( 784.22), SIMDE_FLOAT64_C( 465.33) }, { SIMDE_FLOAT64_C( -7.37), SIMDE_FLOAT64_C( 12.58), SIMDE_FLOAT64_C( -3.13), SIMDE_FLOAT64_C( -15.32), SIMDE_FLOAT64_C( -2.31), SIMDE_FLOAT64_C( 17.43), SIMDE_FLOAT64_C( 13.69), SIMDE_FLOAT64_C( 8.12) } }, { { SIMDE_FLOAT64_C( 844.52), SIMDE_FLOAT64_C( -91.48), SIMDE_FLOAT64_C( 575.23), SIMDE_FLOAT64_C( -167.13), SIMDE_FLOAT64_C( -906.69), SIMDE_FLOAT64_C( -808.01), SIMDE_FLOAT64_C( -191.68), SIMDE_FLOAT64_C( 439.44) }, { SIMDE_FLOAT64_C( 14.74), SIMDE_FLOAT64_C( -1.60), SIMDE_FLOAT64_C( 10.04), SIMDE_FLOAT64_C( -2.92), SIMDE_FLOAT64_C( -15.82), SIMDE_FLOAT64_C( -14.10), SIMDE_FLOAT64_C( -3.35), SIMDE_FLOAT64_C( 7.67) } }, { { SIMDE_FLOAT64_C( -327.12), SIMDE_FLOAT64_C( 74.58), SIMDE_FLOAT64_C( -612.17), SIMDE_FLOAT64_C( -701.50), SIMDE_FLOAT64_C( -128.00), SIMDE_FLOAT64_C( 625.20), SIMDE_FLOAT64_C( -218.65), SIMDE_FLOAT64_C( -917.42) }, { SIMDE_FLOAT64_C( -5.71), SIMDE_FLOAT64_C( 1.30), SIMDE_FLOAT64_C( -10.68), SIMDE_FLOAT64_C( -12.24), SIMDE_FLOAT64_C( -2.23), SIMDE_FLOAT64_C( 10.91), SIMDE_FLOAT64_C( -3.82), SIMDE_FLOAT64_C( -16.01) } }, { { SIMDE_FLOAT64_C( -997.92), SIMDE_FLOAT64_C( -38.58), SIMDE_FLOAT64_C( -337.38), SIMDE_FLOAT64_C( -285.85), SIMDE_FLOAT64_C( -318.88), SIMDE_FLOAT64_C( 574.80), SIMDE_FLOAT64_C( 587.94), SIMDE_FLOAT64_C( -489.48) }, { SIMDE_FLOAT64_C( -17.42), SIMDE_FLOAT64_C( -0.67), SIMDE_FLOAT64_C( -5.89), SIMDE_FLOAT64_C( -4.99), SIMDE_FLOAT64_C( -5.57), SIMDE_FLOAT64_C( 10.03), SIMDE_FLOAT64_C( 10.26), SIMDE_FLOAT64_C( -8.54) } }, { { SIMDE_FLOAT64_C( -699.61), SIMDE_FLOAT64_C( -288.00), SIMDE_FLOAT64_C( -454.37), SIMDE_FLOAT64_C( -597.58), SIMDE_FLOAT64_C( 496.99), SIMDE_FLOAT64_C( 888.51), SIMDE_FLOAT64_C( -818.76), SIMDE_FLOAT64_C( -819.32) }, { SIMDE_FLOAT64_C( -12.21), SIMDE_FLOAT64_C( -5.03), SIMDE_FLOAT64_C( -7.93), SIMDE_FLOAT64_C( -10.43), SIMDE_FLOAT64_C( 8.67), SIMDE_FLOAT64_C( 15.51), SIMDE_FLOAT64_C( -14.29), SIMDE_FLOAT64_C( -14.30) } }, { { SIMDE_FLOAT64_C( -315.95), SIMDE_FLOAT64_C( -109.61), SIMDE_FLOAT64_C( -186.03), SIMDE_FLOAT64_C( -677.21), SIMDE_FLOAT64_C( 98.17), SIMDE_FLOAT64_C( -43.95), SIMDE_FLOAT64_C( -639.89), SIMDE_FLOAT64_C( -591.44) }, { SIMDE_FLOAT64_C( -5.51), SIMDE_FLOAT64_C( -1.91), SIMDE_FLOAT64_C( -3.25), SIMDE_FLOAT64_C( -11.82), SIMDE_FLOAT64_C( 1.71), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( -11.17), SIMDE_FLOAT64_C( -10.32) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_x_mm512_deg2rad_pd(a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_div_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( 80), INT8_C( 26), INT8_C( -96), INT8_C( 63), INT8_C( 84), INT8_C( 0), INT8_C( 86), INT8_C( -92), INT8_C( 19), INT8_C( 73), INT8_C( 49), INT8_C( 84), INT8_C( 93), INT8_C( -26), INT8_C( 48), INT8_C( -85)), simde_mm_set_epi8(INT8_C( 4), INT8_C( 4), INT8_C( 3), INT8_C( 27), INT8_C( 44), INT8_C( 48), INT8_C( 3), INT8_C( 53), INT8_C( 11), INT8_C( 6), INT8_C( 2), INT8_C( 14), INT8_C( 89), INT8_C( 10), INT8_C( 3), INT8_C( 1)), simde_mm_set_epi8(INT8_C( 20), INT8_C( 6), INT8_C( -32), INT8_C( 2), INT8_C( 1), INT8_C( 0), INT8_C( 28), INT8_C( -1), INT8_C( 1), INT8_C( 12), INT8_C( 24), INT8_C( 6), INT8_C( 1), INT8_C( -2), INT8_C( 16), INT8_C( -85)) }, { simde_mm_set_epi8(INT8_C( -53), INT8_C(-123), INT8_C( 83), INT8_C( 82), INT8_C( -17), INT8_C( 32), INT8_C( -32), INT8_C( 68), INT8_C( -20), INT8_C( 5), INT8_C( -1), INT8_C( -23), INT8_C( 118), INT8_C(-101), INT8_C( 53), INT8_C( 4)), simde_mm_set_epi8(INT8_C( 9), INT8_C( 1), INT8_C( -68), INT8_C( 1), INT8_C( 1), INT8_C( 1), INT8_C( 22), INT8_C( 17), INT8_C( 4), INT8_C( 8), INT8_C( 6), INT8_C( 10), INT8_C( 55), INT8_C( 3), INT8_C( 14), INT8_C( 14)), simde_mm_set_epi8(INT8_C( -5), INT8_C(-123), INT8_C( -1), INT8_C( 82), INT8_C( -17), INT8_C( 32), INT8_C( -1), INT8_C( 4), INT8_C( -5), INT8_C( 0), INT8_C( 0), INT8_C( -2), INT8_C( 2), INT8_C( -33), INT8_C( 3), INT8_C( 0)) }, { simde_mm_set_epi8(INT8_C( 122), INT8_C( 103), INT8_C( 28), INT8_C(-102), INT8_C( -41), INT8_C(-105), INT8_C( -14), INT8_C(-120), INT8_C( -71), INT8_C( 84), INT8_C( 90), INT8_C( 8), INT8_C( 84), INT8_C( 120), INT8_C( -59), INT8_C( 9)), simde_mm_set_epi8(INT8_C( 59), INT8_C( -21), INT8_C( 22), INT8_C( 53), INT8_C( 22), INT8_C( 3), INT8_C( 5), INT8_C( 6), INT8_C( 2), INT8_C( 21), INT8_C( 3), INT8_C( 3), INT8_C( 2), INT8_C( 10), INT8_C( 10), INT8_C( 3)), simde_mm_set_epi8(INT8_C( 2), INT8_C( -4), INT8_C( 1), INT8_C( -1), INT8_C( -1), INT8_C( -35), INT8_C( -2), INT8_C( -20), INT8_C( -35), INT8_C( 4), INT8_C( 30), INT8_C( 2), INT8_C( 42), INT8_C( 12), INT8_C( -5), INT8_C( 3)) }, { simde_mm_set_epi8(INT8_C( 121), INT8_C( -15), INT8_C(-123), INT8_C( 80), INT8_C( 43), INT8_C( 58), INT8_C( 119), INT8_C( -49), INT8_C( 107), INT8_C( -94), INT8_C( 51), INT8_C(-118), INT8_C( 68), INT8_C( 112), INT8_C( -56), INT8_C(-103)), simde_mm_set_epi8(INT8_C( 44), INT8_C( 13), INT8_C( 14), INT8_C( 8), INT8_C( -24), INT8_C( 77), INT8_C( 118), INT8_C( 21), INT8_C( 1), INT8_C( -34), INT8_C( 2), INT8_C( 29), INT8_C( 14), INT8_C( 53), INT8_C( 1), INT8_C( 54)), simde_mm_set_epi8(INT8_C( 2), INT8_C( -1), INT8_C( -8), INT8_C( 10), INT8_C( -1), INT8_C( 0), INT8_C( 1), INT8_C( -2), INT8_C( 107), INT8_C( 2), INT8_C( 25), INT8_C( -4), INT8_C( 4), INT8_C( 2), INT8_C( -56), INT8_C( -1)) }, { simde_mm_set_epi8(INT8_C( -42), INT8_C( 14), INT8_C(-113), INT8_C( 62), INT8_C( -34), INT8_C( -16), INT8_C(-103), INT8_C(-122), INT8_C(-128), INT8_C( -77), INT8_C( -15), INT8_C( -38), INT8_C( 87), INT8_C( -72), INT8_C( 57), INT8_C( -40)), simde_mm_set_epi8(INT8_C( 30), INT8_C( 124), INT8_C( -94), INT8_C( 4), INT8_C( 46), INT8_C( 11), INT8_C( 3), INT8_C( -54), INT8_C( 11), INT8_C( 8), INT8_C(-114), INT8_C( 3), INT8_C( 6), INT8_C( 1), INT8_C(-121), INT8_C( 4)), simde_mm_set_epi8(INT8_C( -1), INT8_C( 0), INT8_C( 1), INT8_C( 15), INT8_C( 0), INT8_C( -1), INT8_C( -34), INT8_C( 2), INT8_C( -11), INT8_C( -9), INT8_C( 0), INT8_C( -12), INT8_C( 14), INT8_C( -72), INT8_C( 0), INT8_C( -10)) }, { simde_mm_set_epi8(INT8_C( -13), INT8_C( -82), INT8_C( 64), INT8_C( -67), INT8_C(-120), INT8_C( 26), INT8_C(-105), INT8_C( 40), INT8_C( 59), INT8_C( -83), INT8_C( 64), INT8_C( -39), INT8_C( 99), INT8_C( -73), INT8_C( -97), INT8_C( -1)), simde_mm_set_epi8(INT8_C( -27), INT8_C( 114), INT8_C(-109), INT8_C( 8), INT8_C( 12), INT8_C( 4), INT8_C( 2), INT8_C( 2), INT8_C( 3), INT8_C( 11), INT8_C( 3), INT8_C( 11), INT8_C( 82), INT8_C( 14), INT8_C( 120), INT8_C(-107)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -8), INT8_C( -10), INT8_C( 6), INT8_C( -52), INT8_C( 20), INT8_C( 19), INT8_C( -7), INT8_C( 21), INT8_C( -3), INT8_C( 1), INT8_C( -5), INT8_C( 0), INT8_C( 0)) }, { simde_mm_set_epi8(INT8_C( -57), INT8_C( 53), INT8_C( 114), INT8_C( -35), INT8_C( -22), INT8_C( -59), INT8_C( 52), INT8_C( 113), INT8_C( 25), INT8_C( 16), INT8_C( -8), INT8_C( -67), INT8_C( 7), INT8_C( -33), INT8_C( 51), INT8_C( 118)), simde_mm_set_epi8(INT8_C( 14), INT8_C( 15), INT8_C( 24), INT8_C( 83), INT8_C( 4), INT8_C( 45), INT8_C( 4), INT8_C( 34), INT8_C( 9), INT8_C( 19), INT8_C( 4), INT8_C( 11), INT8_C( 8), INT8_C( 14), INT8_C( 102), INT8_C( -88)), simde_mm_set_epi8(INT8_C( -4), INT8_C( 3), INT8_C( 4), INT8_C( 0), INT8_C( -5), INT8_C( -1), INT8_C( 13), INT8_C( 3), INT8_C( 2), INT8_C( 0), INT8_C( -2), INT8_C( -6), INT8_C( 0), INT8_C( -2), INT8_C( 0), INT8_C( -1)) }, { simde_mm_set_epi8(INT8_C( -69), INT8_C( 57), INT8_C( 3), INT8_C( 127), INT8_C( -28), INT8_C( -47), INT8_C(-127), INT8_C( -14), INT8_C( -28), INT8_C( 68), INT8_C( -27), INT8_C( -44), INT8_C( -16), INT8_C( 1), INT8_C( -44), INT8_C( 112)), simde_mm_set_epi8(INT8_C( 57), INT8_C( 1), INT8_C( -43), INT8_C( 103), INT8_C( 4), INT8_C( 1), INT8_C( 2), INT8_C( 96), INT8_C( 9), INT8_C( 57), INT8_C( 54), INT8_C( 105), INT8_C( 1), INT8_C( 31), INT8_C( -85), INT8_C( 104)), simde_mm_set_epi8(INT8_C( -1), INT8_C( 57), INT8_C( 0), INT8_C( 1), INT8_C( -7), INT8_C( -47), INT8_C( -63), INT8_C( 0), INT8_C( -3), INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( -16), INT8_C( 0), INT8_C( 0), INT8_C( 1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_div_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_div_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C( 7569), INT16_C(-21774), INT16_C( 5125), INT16_C( 21356), INT16_C( 9222), INT16_C( 7511), INT16_C(-21561), INT16_C( 29102)), simde_mm_set_epi16(INT16_C( 6450), INT16_C( -2), INT16_C( 190), INT16_C( -44), INT16_C( -3), INT16_C( -9), INT16_C( -911), INT16_C( 3)), simde_mm_set_epi16(INT16_C( 1), INT16_C( 10887), INT16_C( 26), INT16_C( -485), INT16_C( -3074), INT16_C( -834), INT16_C( 23), INT16_C( 9700)) }, { simde_mm_set_epi16(INT16_C( 14790), INT16_C(-17845), INT16_C( 12471), INT16_C( 16666), INT16_C( -4541), INT16_C( 18926), INT16_C( 4112), INT16_C( 26905)), simde_mm_set_epi16(INT16_C( -1), INT16_C( -8), INT16_C( 15), INT16_C( -16), INT16_C( -1), INT16_C( -28), INT16_C( -3387), INT16_C( -5)), simde_mm_set_epi16(INT16_C(-14790), INT16_C( 2230), INT16_C( 831), INT16_C( -1041), INT16_C( 4541), INT16_C( -675), INT16_C( -1), INT16_C( -5381)) }, { simde_mm_set_epi16(INT16_C( 24700), INT16_C( 18820), INT16_C( -6493), INT16_C(-11852), INT16_C( 7293), INT16_C( 18330), INT16_C(-13423), INT16_C( 30834)), simde_mm_set_epi16(INT16_C( 9411), INT16_C( -2), INT16_C( -2), INT16_C( -10), INT16_C( 942), INT16_C( 5062), INT16_C( 3712), INT16_C(-24297)), simde_mm_set_epi16(INT16_C( 2), INT16_C( -9410), INT16_C( 3246), INT16_C( 1185), INT16_C( 7), INT16_C( 3), INT16_C( -3), INT16_C( -1)) }, { simde_mm_set_epi16(INT16_C( -8188), INT16_C( -5752), INT16_C( -6400), INT16_C(-18754), INT16_C( 26203), INT16_C( 11990), INT16_C( 27655), INT16_C( 30479)), simde_mm_set_epi16(INT16_C( -2891), INT16_C( -9), INT16_C( 1), INT16_C( 24), INT16_C( 1410), INT16_C( -7348), INT16_C( 56), INT16_C( -8)), simde_mm_set_epi16(INT16_C( 2), INT16_C( 639), INT16_C( -6400), INT16_C( -781), INT16_C( 18), INT16_C( -1), INT16_C( 493), INT16_C( -3809)) }, { simde_mm_set_epi16(INT16_C( 27464), INT16_C( 30742), INT16_C(-17463), INT16_C( 5584), INT16_C( 16882), INT16_C(-13221), INT16_C(-30009), INT16_C( 27529)), simde_mm_set_epi16(INT16_C( 92), INT16_C( -245), INT16_C( 87), INT16_C( 2027), INT16_C( -218), INT16_C( 181), INT16_C( 1), INT16_C( -448)), simde_mm_set_epi16(INT16_C( 298), INT16_C( -125), INT16_C( -200), INT16_C( 2), INT16_C( -77), INT16_C( -73), INT16_C(-30009), INT16_C( -61)) }, { simde_mm_set_epi16(INT16_C(-28312), INT16_C( -6464), INT16_C( 7438), INT16_C(-24771), INT16_C( 27969), INT16_C( 18884), INT16_C( 17235), INT16_C( 31019)), simde_mm_set_epi16(INT16_C( -3989), INT16_C( 8), INT16_C( -1), INT16_C( -27), INT16_C( 53), INT16_C( -58), INT16_C( 2274), INT16_C( -9)), simde_mm_set_epi16(INT16_C( 7), INT16_C( -808), INT16_C( -7438), INT16_C( 917), INT16_C( 527), INT16_C( -325), INT16_C( 7), INT16_C( -3446)) }, { simde_mm_set_epi16(INT16_C(-31090), INT16_C( 20346), INT16_C( 14276), INT16_C(-27653), INT16_C( 19203), INT16_C(-24798), INT16_C(-17826), INT16_C( 16379)), simde_mm_set_epi16(INT16_C( 3), INT16_C( 8), INT16_C( -60), INT16_C( 14), INT16_C( -435), INT16_C( -1), INT16_C( -395), INT16_C( -1532)), simde_mm_set_epi16(INT16_C(-10363), INT16_C( 2543), INT16_C( -237), INT16_C( -1975), INT16_C( -44), INT16_C( 24798), INT16_C( 45), INT16_C( -10)) }, { simde_mm_set_epi16(INT16_C( -4012), INT16_C( 17981), INT16_C( 26341), INT16_C(-11451), INT16_C(-22746), INT16_C(-13246), INT16_C( -6273), INT16_C( 15936)), simde_mm_set_epi16(INT16_C( -5), INT16_C( 325), INT16_C( 10), INT16_C( -2018), INT16_C(-26192), INT16_C( -15), INT16_C( -29), INT16_C( 2009)), simde_mm_set_epi16(INT16_C( 802), INT16_C( 55), INT16_C( 2634), INT16_C( 5), INT16_C( 0), INT16_C( 883), INT16_C( 216), INT16_C( 7)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_div_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_div_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C(-2101284579), INT32_C( 1788896628), INT32_C( 742774378), INT32_C( -512831871)), simde_mm_set_epi32(INT32_C( -173), INT32_C( -20613654), INT32_C( 28772), INT32_C( 118)), simde_mm_set_epi32(INT32_C( 12146153), INT32_C( -86), INT32_C( 25815), INT32_C( -4346032)) }, { simde_mm_set_epi32(INT32_C( 505370509), INT32_C( -307733024), INT32_C( -192358019), INT32_C( -299231491)), simde_mm_set_epi32(INT32_C( 34268), INT32_C( -6), INT32_C( 6850), INT32_C( 1214711)), simde_mm_set_epi32(INT32_C( 14747), INT32_C( 51288837), INT32_C( -28081), INT32_C( -246)) }, { simde_mm_set_epi32(INT32_C(-1154189768), INT32_C( 94538029), INT32_C( 423884488), INT32_C( 1619435962)), simde_mm_set_epi32(INT32_C( -565), INT32_C( -128659), INT32_C( -59), INT32_C( -208397178)), simde_mm_set_epi32(INT32_C( 2042813), INT32_C( -734), INT32_C( -7184482), INT32_C( -7)) }, { simde_mm_set_epi32(INT32_C(-1938127942), INT32_C( -553846699), INT32_C( 685427224), INT32_C( -86375451)), simde_mm_set_epi32(INT32_C( 1223981911), INT32_C( -108113), INT32_C( 3), INT32_C( -3698)), simde_mm_set_epi32(INT32_C( -1), INT32_C( 5122), INT32_C( 228475741), INT32_C( 23357)) }, { simde_mm_set_epi32(INT32_C(-1690889220), INT32_C( -667367235), INT32_C( 1220206139), INT32_C(-1217543723)), simde_mm_set_epi32(INT32_C( 299), INT32_C( 7724), INT32_C( -1), INT32_C( 173051558)), simde_mm_set_epi32(INT32_C( -5655147), INT32_C( -86401), INT32_C(-1220206139), INT32_C( -7)) }, { simde_mm_set_epi32(INT32_C( 93323521), INT32_C( 1996592708), INT32_C( 2087305602), INT32_C( 27568495)), simde_mm_set_epi32(INT32_C( -2), INT32_C( 15626723), INT32_C( 1507), INT32_C( 5412)), simde_mm_set_epi32(INT32_C( -46661760), INT32_C( 127), INT32_C( 1385073), INT32_C( 5093)) }, { simde_mm_set_epi32(INT32_C( 1825211631), INT32_C( 1750705004), INT32_C( 1935103134), INT32_C(-1042289581)), simde_mm_set_epi32(INT32_C( -20153), INT32_C( -109992928), INT32_C( -4), INT32_C( 3)), simde_mm_set_epi32(INT32_C( -90567), INT32_C( -15), INT32_C( -483775783), INT32_C( -347429860)) }, { simde_mm_set_epi32(INT32_C( -836927167), INT32_C(-2031963629), INT32_C( 1244477192), INT32_C( 662038781)), simde_mm_set_epi32(INT32_C( -226), INT32_C( 320), INT32_C( 17085036), INT32_C( -883)), simde_mm_set_epi32(INT32_C( 3703217), INT32_C( -6349886), INT32_C( 72), INT32_C( -749760)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_div_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_div_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi64x(INT64_C(-8762915026342605517), INT64_C( 6327019035084041530)), simde_mm_set_epi64x(INT64_C( 1040172869250133860), INT64_C( -3393154419)), simde_mm_set_epi64x(INT64_C( -8), INT64_C( -1864642233)) }, { simde_mm_set_epi64x(INT64_C( 7086115847005357544), INT64_C( 7169462887889416879)), simde_mm_set_epi64x(INT64_C( -402272), INT64_C( -6362438)), simde_mm_set_epi64x(INT64_C( -17615235082246), INT64_C( -1126842082844)) }, { simde_mm_set_epi64x(INT64_C( 3227829673356714047), INT64_C( 5122063021698718134)), simde_mm_set_epi64x(INT64_C( 290796), INT64_C( -647054)), simde_mm_set_epi64x(INT64_C( 11099979619240), INT64_C( -7915974588981)) }, { simde_mm_set_epi64x(INT64_C( -712959233727550094), INT64_C( 8175697730423622547)), simde_mm_set_epi64x(INT64_C( -114108996), INT64_C( 727492806)), simde_mm_set_epi64x(INT64_C( 6248054568), INT64_C( 11238183612)) }, { simde_mm_set_epi64x(INT64_C( 7475816922473172733), INT64_C(-1631503293395556188)), simde_mm_set_epi64x(INT64_C( 5), INT64_C( -24770378177)), simde_mm_set_epi64x(INT64_C( 1495163384494634546), INT64_C( 65865094)) }, { simde_mm_set_epi64x(INT64_C(-7220293124938945390), INT64_C( 5345879758546587877)), simde_mm_set_epi64x(INT64_C( -716), INT64_C( 1692902)), simde_mm_set_epi64x(INT64_C( 10084208275054393), INT64_C( 3157819979270)) }, { simde_mm_set_epi64x(INT64_C(-2100788141468237692), INT64_C( 1869244361192362281)), simde_mm_set_epi64x(INT64_C( -1), INT64_C( 27867346395)), simde_mm_set_epi64x(INT64_C( 2100788141468237692), INT64_C( 67076510)) }, { simde_mm_set_epi64x(INT64_C(-4218200756000910912), INT64_C( 8429274423139369867)), simde_mm_set_epi64x(INT64_C( 25), INT64_C( -63869567732)), simde_mm_set_epi64x(INT64_C( -168728030240036436), INT64_C( -131976381)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_div_epi64(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_div_epu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu8(UINT8_C( 15), UINT8_C( 75), UINT8_C(224), UINT8_C(156), UINT8_C( 1), UINT8_C( 34), UINT8_C( 35), UINT8_C(127), UINT8_C(127), UINT8_C(120), UINT8_C(177), UINT8_C( 31), UINT8_C(136), UINT8_C(180), UINT8_C(141), UINT8_C(206)), simde_x_mm_set_epu8(UINT8_C( 45), UINT8_C( 8), UINT8_C( 9), UINT8_C( 13), UINT8_C(246), UINT8_C( 1), UINT8_C( 15), UINT8_C( 2), UINT8_C(152), UINT8_C( 45), UINT8_C( 56), UINT8_C( 26), UINT8_C( 1), UINT8_C( 1), UINT8_C( 16), UINT8_C( 15)), simde_x_mm_set_epu8(UINT8_C( 0), UINT8_C( 9), UINT8_C( 24), UINT8_C( 12), UINT8_C( 0), UINT8_C( 34), UINT8_C( 2), UINT8_C( 63), UINT8_C( 0), UINT8_C( 2), UINT8_C( 3), UINT8_C( 1), UINT8_C(136), UINT8_C(180), UINT8_C( 8), UINT8_C( 13)) }, { simde_x_mm_set_epu8(UINT8_C( 75), UINT8_C(233), UINT8_C(186), UINT8_C(216), UINT8_C(224), UINT8_C( 45), UINT8_C( 40), UINT8_C(134), UINT8_C( 1), UINT8_C( 47), UINT8_C( 23), UINT8_C(119), UINT8_C(229), UINT8_C(107), UINT8_C(175), UINT8_C( 79)), simde_x_mm_set_epu8(UINT8_C( 9), UINT8_C( 12), UINT8_C( 46), UINT8_C( 39), UINT8_C( 11), UINT8_C( 15), UINT8_C( 32), UINT8_C( 13), UINT8_C( 21), UINT8_C(239), UINT8_C( 5), UINT8_C( 2), UINT8_C( 1), UINT8_C( 26), UINT8_C(182), UINT8_C( 29)), simde_x_mm_set_epu8(UINT8_C( 8), UINT8_C( 19), UINT8_C( 4), UINT8_C( 5), UINT8_C( 20), UINT8_C( 3), UINT8_C( 1), UINT8_C( 10), UINT8_C( 0), UINT8_C( 0), UINT8_C( 4), UINT8_C( 59), UINT8_C(229), UINT8_C( 4), UINT8_C( 0), UINT8_C( 2)) }, { simde_x_mm_set_epu8(UINT8_C( 75), UINT8_C(109), UINT8_C( 28), UINT8_C(204), UINT8_C( 53), UINT8_C(255), UINT8_C(143), UINT8_C(254), UINT8_C( 82), UINT8_C(109), UINT8_C(205), UINT8_C( 21), UINT8_C( 16), UINT8_C( 18), UINT8_C(221), UINT8_C(119)), simde_x_mm_set_epu8(UINT8_C(210), UINT8_C(108), UINT8_C( 89), UINT8_C( 21), UINT8_C(154), UINT8_C( 52), UINT8_C( 17), UINT8_C( 8), UINT8_C( 90), UINT8_C( 6), UINT8_C( 1), UINT8_C( 5), UINT8_C( 1), UINT8_C(201), UINT8_C( 23), UINT8_C( 2)), simde_x_mm_set_epu8(UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 9), UINT8_C( 0), UINT8_C( 4), UINT8_C( 8), UINT8_C( 31), UINT8_C( 0), UINT8_C( 18), UINT8_C(205), UINT8_C( 4), UINT8_C( 16), UINT8_C( 0), UINT8_C( 9), UINT8_C( 59)) }, { simde_x_mm_set_epu8(UINT8_C( 23), UINT8_C(229), UINT8_C(200), UINT8_C( 62), UINT8_C(169), UINT8_C(116), UINT8_C(131), UINT8_C(205), UINT8_C(117), UINT8_C( 49), UINT8_C(130), UINT8_C( 21), UINT8_C( 91), UINT8_C(138), UINT8_C(101), UINT8_C(205)), simde_x_mm_set_epu8(UINT8_C( 43), UINT8_C( 65), UINT8_C( 28), UINT8_C( 61), UINT8_C( 12), UINT8_C( 4), UINT8_C( 37), UINT8_C( 4), UINT8_C(237), UINT8_C( 25), UINT8_C( 38), UINT8_C( 15), UINT8_C( 9), UINT8_C( 6), UINT8_C(140), UINT8_C( 10)), simde_x_mm_set_epu8(UINT8_C( 0), UINT8_C( 3), UINT8_C( 7), UINT8_C( 1), UINT8_C( 14), UINT8_C( 29), UINT8_C( 3), UINT8_C( 51), UINT8_C( 0), UINT8_C( 1), UINT8_C( 3), UINT8_C( 1), UINT8_C( 10), UINT8_C( 23), UINT8_C( 0), UINT8_C( 20)) }, { simde_x_mm_set_epu8(UINT8_C(140), UINT8_C(170), UINT8_C(150), UINT8_C(208), UINT8_C( 64), UINT8_C( 6), UINT8_C(116), UINT8_C(102), UINT8_C(200), UINT8_C(110), UINT8_C(136), UINT8_C(125), UINT8_C(201), UINT8_C( 22), UINT8_C(166), UINT8_C(235)), simde_x_mm_set_epu8(UINT8_C( 1), UINT8_C( 7), UINT8_C( 23), UINT8_C( 2), UINT8_C( 12), UINT8_C(103), UINT8_C( 24), UINT8_C( 18), UINT8_C(234), UINT8_C( 11), UINT8_C( 6), UINT8_C( 2), UINT8_C( 5), UINT8_C( 34), UINT8_C( 60), UINT8_C( 13)), simde_x_mm_set_epu8(UINT8_C(140), UINT8_C( 24), UINT8_C( 6), UINT8_C(104), UINT8_C( 5), UINT8_C( 0), UINT8_C( 4), UINT8_C( 5), UINT8_C( 0), UINT8_C( 10), UINT8_C( 22), UINT8_C( 62), UINT8_C( 40), UINT8_C( 0), UINT8_C( 2), UINT8_C( 18)) }, { simde_x_mm_set_epu8(UINT8_C(143), UINT8_C( 77), UINT8_C(114), UINT8_C( 66), UINT8_C( 82), UINT8_C(133), UINT8_C( 93), UINT8_C(122), UINT8_C(225), UINT8_C(230), UINT8_C(202), UINT8_C(147), UINT8_C(170), UINT8_C(252), UINT8_C(163), UINT8_C(161)), simde_x_mm_set_epu8(UINT8_C( 5), UINT8_C( 8), UINT8_C( 15), UINT8_C( 99), UINT8_C( 10), UINT8_C( 4), UINT8_C( 1), UINT8_C( 1), UINT8_C( 15), UINT8_C( 21), UINT8_C( 3), UINT8_C( 1), UINT8_C( 2), UINT8_C( 18), UINT8_C( 18), UINT8_C( 2)), simde_x_mm_set_epu8(UINT8_C( 28), UINT8_C( 9), UINT8_C( 7), UINT8_C( 0), UINT8_C( 8), UINT8_C( 33), UINT8_C( 93), UINT8_C(122), UINT8_C( 15), UINT8_C( 10), UINT8_C( 67), UINT8_C(147), UINT8_C( 85), UINT8_C( 14), UINT8_C( 9), UINT8_C( 80)) }, { simde_x_mm_set_epu8(UINT8_C(125), UINT8_C(134), UINT8_C(114), UINT8_C( 16), UINT8_C(101), UINT8_C( 75), UINT8_C( 71), UINT8_C(136), UINT8_C(137), UINT8_C(104), UINT8_C(249), UINT8_C(115), UINT8_C(110), UINT8_C(132), UINT8_C(229), UINT8_C( 48)), simde_x_mm_set_epu8(UINT8_C( 69), UINT8_C( 11), UINT8_C( 3), UINT8_C( 2), UINT8_C( 2), UINT8_C( 21), UINT8_C( 3), UINT8_C( 1), UINT8_C( 5), UINT8_C( 1), UINT8_C( 3), UINT8_C( 2), UINT8_C( 1), UINT8_C(163), UINT8_C( 1), UINT8_C( 2)), simde_x_mm_set_epu8(UINT8_C( 1), UINT8_C( 12), UINT8_C( 38), UINT8_C( 8), UINT8_C( 50), UINT8_C( 3), UINT8_C( 23), UINT8_C(136), UINT8_C( 27), UINT8_C(104), UINT8_C( 83), UINT8_C( 57), UINT8_C(110), UINT8_C( 0), UINT8_C(229), UINT8_C( 24)) }, { simde_x_mm_set_epu8(UINT8_C( 72), UINT8_C(139), UINT8_C(120), UINT8_C(127), UINT8_C(102), UINT8_C(165), UINT8_C( 82), UINT8_C( 63), UINT8_C(192), UINT8_C( 18), UINT8_C(103), UINT8_C(151), UINT8_C( 81), UINT8_C(222), UINT8_C(212), UINT8_C( 1)), simde_x_mm_set_epu8(UINT8_C( 7), UINT8_C( 26), UINT8_C( 32), UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 3), UINT8_C( 2), UINT8_C( 65), UINT8_C( 24), UINT8_C( 1), UINT8_C( 97), UINT8_C( 14), UINT8_C( 8), UINT8_C( 89), UINT8_C( 11)), simde_x_mm_set_epu8(UINT8_C( 10), UINT8_C( 5), UINT8_C( 3), UINT8_C(127), UINT8_C(102), UINT8_C(165), UINT8_C( 27), UINT8_C( 31), UINT8_C( 2), UINT8_C( 0), UINT8_C(103), UINT8_C( 1), UINT8_C( 5), UINT8_C( 27), UINT8_C( 2), UINT8_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_div_epu8(test_vec[i].a, test_vec[i].b); simde_assert_m128i_u8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_div_epu16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu16(UINT16_C(27566), UINT16_C(40504), UINT16_C( 4629), UINT16_C(53715), UINT16_C( 9716), UINT16_C( 9411), UINT16_C(47476), UINT16_C(41385)), simde_x_mm_set_epu16(UINT16_C( 13), UINT16_C( 6506), UINT16_C( 2031), UINT16_C( 2041), UINT16_C( 41), UINT16_C( 3089), UINT16_C( 4707), UINT16_C( 3)), simde_x_mm_set_epu16(UINT16_C( 2120), UINT16_C( 6), UINT16_C( 2), UINT16_C( 26), UINT16_C( 236), UINT16_C( 3), UINT16_C( 10), UINT16_C(13795)) }, { simde_x_mm_set_epu16(UINT16_C( 9353), UINT16_C( 761), UINT16_C( 3256), UINT16_C(15648), UINT16_C(54529), UINT16_C(37909), UINT16_C( 6524), UINT16_C(24806)), simde_x_mm_set_epu16(UINT16_C(17088), UINT16_C( 3660), UINT16_C( 3), UINT16_C( 9), UINT16_C( 186), UINT16_C( 2), UINT16_C( 7), UINT16_C( 1856)), simde_x_mm_set_epu16(UINT16_C( 0), UINT16_C( 0), UINT16_C( 1085), UINT16_C( 1738), UINT16_C( 293), UINT16_C(18954), UINT16_C( 932), UINT16_C( 13)) }, { simde_x_mm_set_epu16(UINT16_C(19795), UINT16_C(45332), UINT16_C(60579), UINT16_C(32327), UINT16_C(25905), UINT16_C(63671), UINT16_C( 930), UINT16_C(32017)), simde_x_mm_set_epu16(UINT16_C( 8), UINT16_C(30488), UINT16_C( 26), UINT16_C( 3397), UINT16_C( 1518), UINT16_C( 2), UINT16_C( 20), UINT16_C( 6)), simde_x_mm_set_epu16(UINT16_C( 2474), UINT16_C( 1), UINT16_C( 2329), UINT16_C( 9), UINT16_C( 17), UINT16_C(31835), UINT16_C( 46), UINT16_C( 5336)) }, { simde_x_mm_set_epu16(UINT16_C(29801), UINT16_C(62435), UINT16_C(31106), UINT16_C(58247), UINT16_C(47275), UINT16_C(34875), UINT16_C(63847), UINT16_C( 8602)), simde_x_mm_set_epu16(UINT16_C( 5), UINT16_C( 1), UINT16_C( 842), UINT16_C( 1634), UINT16_C( 11), UINT16_C( 25), UINT16_C( 3640), UINT16_C( 932)), simde_x_mm_set_epu16(UINT16_C( 5960), UINT16_C(62435), UINT16_C( 36), UINT16_C( 35), UINT16_C( 4297), UINT16_C( 1395), UINT16_C( 17), UINT16_C( 9)) }, { simde_x_mm_set_epu16(UINT16_C(41564), UINT16_C(16940), UINT16_C(39647), UINT16_C(59460), UINT16_C(17425), UINT16_C(59711), UINT16_C(30880), UINT16_C(42139)), simde_x_mm_set_epu16(UINT16_C(25139), UINT16_C( 3416), UINT16_C( 43), UINT16_C( 6), UINT16_C( 4), UINT16_C( 1256), UINT16_C( 60), UINT16_C( 129)), simde_x_mm_set_epu16(UINT16_C( 1), UINT16_C( 4), UINT16_C( 922), UINT16_C( 9910), UINT16_C( 4356), UINT16_C( 47), UINT16_C( 514), UINT16_C( 326)) }, { simde_x_mm_set_epu16(UINT16_C(39593), UINT16_C(41522), UINT16_C(58894), UINT16_C( 6383), UINT16_C(39956), UINT16_C( 2820), UINT16_C(20260), UINT16_C(57360)), simde_x_mm_set_epu16(UINT16_C( 1), UINT16_C(10468), UINT16_C( 2), UINT16_C( 79), UINT16_C( 5), UINT16_C( 1166), UINT16_C( 2), UINT16_C( 3)), simde_x_mm_set_epu16(UINT16_C(39593), UINT16_C( 3), UINT16_C(29447), UINT16_C( 80), UINT16_C( 7991), UINT16_C( 2), UINT16_C(10130), UINT16_C(19120)) }, { simde_x_mm_set_epu16(UINT16_C(58633), UINT16_C(30014), UINT16_C(57061), UINT16_C(60439), UINT16_C(22536), UINT16_C(20868), UINT16_C(20870), UINT16_C(13916)), simde_x_mm_set_epu16(UINT16_C( 15), UINT16_C( 490), UINT16_C( 2338), UINT16_C( 64), UINT16_C( 876), UINT16_C( 706), UINT16_C( 65), UINT16_C( 320)), simde_x_mm_set_epu16(UINT16_C( 3908), UINT16_C( 61), UINT16_C( 24), UINT16_C( 944), UINT16_C( 25), UINT16_C( 29), UINT16_C( 321), UINT16_C( 43)) }, { simde_x_mm_set_epu16(UINT16_C( 6697), UINT16_C(21906), UINT16_C(59582), UINT16_C(44845), UINT16_C(35883), UINT16_C(64682), UINT16_C(55100), UINT16_C(57711)), simde_x_mm_set_epu16(UINT16_C( 7058), UINT16_C( 10), UINT16_C(60566), UINT16_C( 1), UINT16_C( 1), UINT16_C( 872), UINT16_C( 109), UINT16_C( 1)), simde_x_mm_set_epu16(UINT16_C( 0), UINT16_C( 2190), UINT16_C( 0), UINT16_C(44845), UINT16_C(35883), UINT16_C( 74), UINT16_C( 505), UINT16_C(57711)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_div_epu16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_u16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_div_epu32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu32(UINT32_C(3152261024), UINT32_C(2598586578), UINT32_C(1610828679), UINT32_C(3536337768)), simde_x_mm_set_epu32(UINT32_C( 14157), UINT32_C( 947), UINT32_C(1043337665), UINT32_C( 97937)), simde_x_mm_set_epu32(UINT32_C( 222664), UINT32_C( 2744019), UINT32_C( 1), UINT32_C( 36108)) }, { simde_x_mm_set_epu32(UINT32_C( 75140339), UINT32_C(1941562012), UINT32_C( 857740081), UINT32_C(1336535286)), simde_x_mm_set_epu32(UINT32_C( 22), UINT32_C( 1682), UINT32_C( 11), UINT32_C( 2)), simde_x_mm_set_epu32(UINT32_C( 3415469), UINT32_C( 1154317), UINT32_C( 77976371), UINT32_C( 668267643)) }, { simde_x_mm_set_epu32(UINT32_C( 948661264), UINT32_C(1195769225), UINT32_C( 694120276), UINT32_C(3517239447)), simde_x_mm_set_epu32(UINT32_C( 3949), UINT32_C( 275), UINT32_C( 12430067), UINT32_C( 15794)), simde_x_mm_set_epu32(UINT32_C( 240228), UINT32_C( 4348251), UINT32_C( 55), UINT32_C( 222694)) }, { simde_x_mm_set_epu32(UINT32_C(3023938951), UINT32_C(4109050401), UINT32_C( 287757059), UINT32_C(2648669825)), simde_x_mm_set_epu32(UINT32_C( 57756), UINT32_C( 40), UINT32_C(1080216164), UINT32_C( 173312)), simde_x_mm_set_epu32(UINT32_C( 52357), UINT32_C( 102726260), UINT32_C( 0), UINT32_C( 15282)) }, { simde_x_mm_set_epu32(UINT32_C( 864299658), UINT32_C(2427378437), UINT32_C( 823539242), UINT32_C(1758563044)), simde_x_mm_set_epu32(UINT32_C( 225), UINT32_C( 75), UINT32_C( 11529), UINT32_C( 119418298)), simde_x_mm_set_epu32(UINT32_C( 3841331), UINT32_C( 32365045), UINT32_C( 71431), UINT32_C( 14)) }, { simde_x_mm_set_epu32(UINT32_C(2662820398), UINT32_C(1208068616), UINT32_C(2158211537), UINT32_C(3417661837)), simde_x_mm_set_epu32(UINT32_C( 2367), UINT32_C( 126619), UINT32_C( 55203), UINT32_C( 155)), simde_x_mm_set_epu32(UINT32_C( 1124976), UINT32_C( 9540), UINT32_C( 39095), UINT32_C( 22049431)) }, { simde_x_mm_set_epu32(UINT32_C(1097247740), UINT32_C(3448507951), UINT32_C(4106436665), UINT32_C(3017338787)), simde_x_mm_set_epu32(UINT32_C( 61963115), UINT32_C( 238397327), UINT32_C( 245318), UINT32_C( 3312135)), simde_x_mm_set_epu32(UINT32_C( 17), UINT32_C( 14), UINT32_C( 16739), UINT32_C( 910)) }, { simde_x_mm_set_epu32(UINT32_C(3006363325), UINT32_C(2983927188), UINT32_C(2177891039), UINT32_C(1117727917)), simde_x_mm_set_epu32(UINT32_C( 24), UINT32_C( 12), UINT32_C(1067413818), UINT32_C( 206)), simde_x_mm_set_epu32(UINT32_C( 125265138), UINT32_C( 248660599), UINT32_C( 2), UINT32_C( 5425863)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_div_epu32(test_vec[i].a, test_vec[i].b); simde_assert_m128i_u32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_div_epu64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu64x(UINT64_C(14823946846053138543), UINT64_C( 2773213006356142856)), simde_x_mm_set_epu64x(UINT64_C( 22806630538915743), UINT64_C( 1295)), simde_x_mm_set_epu64x(UINT64_C( 649), UINT64_C( 2141477224985438)) }, { simde_x_mm_set_epu64x(UINT64_C(16338394746286416599), UINT64_C( 4395568244008230294)), simde_x_mm_set_epu64x(UINT64_C( 1610), UINT64_C( 68247035008)), simde_x_mm_set_epu64x(UINT64_C( 10148071270985351), UINT64_C( 64406728)) }, { simde_x_mm_set_epu64x(UINT64_C( 6431957656146818365), UINT64_C(14710883493083458909)), simde_x_mm_set_epu64x(UINT64_C( 2399266305377), UINT64_C( 16092627197291141)), simde_x_mm_set_epu64x(UINT64_C( 2680801), UINT64_C( 914)) }, { simde_x_mm_set_epu64x(UINT64_C( 7920700281052633117), UINT64_C(15482760419196872328)), simde_x_mm_set_epu64x(UINT64_C( 45928957131), UINT64_C( 837231)), simde_x_mm_set_epu64x(UINT64_C( 172455478), UINT64_C( 18492817895176)) }, { simde_x_mm_set_epu64x(UINT64_C( 230158309193392347), UINT64_C(18390356791266391163)), simde_x_mm_set_epu64x(UINT64_C( 2253), UINT64_C( 1691141090999)), simde_x_mm_set_epu64x(UINT64_C( 102156373365908), UINT64_C( 10874525)) }, { simde_x_mm_set_epu64x(UINT64_C(12307531484633875995), UINT64_C(16695234188854570094)), simde_x_mm_set_epu64x(UINT64_C( 131150029), UINT64_C( 516657134296053652)), simde_x_mm_set_epu64x(UINT64_C( 93843147260), UINT64_C( 32)) }, { simde_x_mm_set_epu64x(UINT64_C(11764896934406933200), UINT64_C(18439918542668248477)), simde_x_mm_set_epu64x(UINT64_C( 306481550847), UINT64_C( 776223621938168297)), simde_x_mm_set_epu64x(UINT64_C( 38386966), UINT64_C( 23)) }, { simde_x_mm_set_epu64x(UINT64_C(15338454595408931369), UINT64_C(14530768559531423502)), simde_x_mm_set_epu64x(UINT64_C( 3408), UINT64_C( 2)), simde_x_mm_set_epu64x(UINT64_C( 4500720245131728), UINT64_C( 7265384279765711751)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_div_epu64(test_vec[i].a, test_vec[i].b); simde_assert_m128i_u64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_div_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi8(INT8_C( -27), INT8_C( 46), INT8_C(-122), INT8_C( 87), INT8_C( 34), INT8_C( -53), INT8_C( 64), INT8_C( -70), INT8_C( 25), INT8_C( -17), INT8_C( 56), INT8_C( 3), INT8_C( -75), INT8_C( -17), INT8_C( -12), INT8_C( 60), INT8_C( 100), INT8_C( -7), INT8_C(-102), INT8_C( -6), INT8_C( -10), INT8_C(-111), INT8_C( 106), INT8_C( -43), INT8_C( -28), INT8_C( -46), INT8_C( 42), INT8_C( -58), INT8_C( 85), INT8_C( -33), INT8_C(-106), INT8_C(-106)), simde_mm256_set_epi8(INT8_C( 1), INT8_C( 4), INT8_C( -31), INT8_C( 6), INT8_C( 13), INT8_C( 15), INT8_C( 20), INT8_C( 3), INT8_C( -77), INT8_C( 32), INT8_C( 5), INT8_C( 55), INT8_C( 5), INT8_C( 1), INT8_C( 16), INT8_C( 49), INT8_C( 43), INT8_C( 83), INT8_C( 5), INT8_C( 16), INT8_C( 34), INT8_C( 20), INT8_C( 2), INT8_C( 13), INT8_C( 8), INT8_C( 2), INT8_C( 90), INT8_C( 2), INT8_C( 23), INT8_C( 12), INT8_C( 2), INT8_C( 5)), simde_mm256_set_epi8(INT8_C( -27), INT8_C( 11), INT8_C( 3), INT8_C( 14), INT8_C( 2), INT8_C( -3), INT8_C( 3), INT8_C( -23), INT8_C( 0), INT8_C( 0), INT8_C( 11), INT8_C( 0), INT8_C( -15), INT8_C( -17), INT8_C( 0), INT8_C( 1), INT8_C( 2), INT8_C( 0), INT8_C( -20), INT8_C( 0), INT8_C( 0), INT8_C( -5), INT8_C( 53), INT8_C( -3), INT8_C( -3), INT8_C( -23), INT8_C( 0), INT8_C( -29), INT8_C( 3), INT8_C( -2), INT8_C( -53), INT8_C( -21)) }, { simde_mm256_set_epi8(INT8_C( 64), INT8_C(-114), INT8_C( 66), INT8_C( -73), INT8_C( -80), INT8_C( 97), INT8_C( 103), INT8_C( -46), INT8_C( -83), INT8_C( 104), INT8_C( 22), INT8_C( -39), INT8_C( 114), INT8_C( -82), INT8_C( 83), INT8_C( 122), INT8_C( 1), INT8_C( 51), INT8_C( 75), INT8_C(-100), INT8_C( 17), INT8_C( 37), INT8_C( 53), INT8_C( -57), INT8_C( 121), INT8_C( -35), INT8_C( 108), INT8_C( -68), INT8_C( 25), INT8_C( -78), INT8_C( -54), INT8_C(-104)), simde_mm256_set_epi8(INT8_C( 91), INT8_C( 10), INT8_C( -96), INT8_C( 14), INT8_C( 21), INT8_C( 23), INT8_C( 1), INT8_C( 8), INT8_C( 9), INT8_C( 2), INT8_C( 8), INT8_C( 30), INT8_C( 1), INT8_C( -75), INT8_C( 15), INT8_C( 1), INT8_C( 27), INT8_C( 5), INT8_C( 104), INT8_C( 48), INT8_C( 11), INT8_C( 4), INT8_C( 31), INT8_C( 3), INT8_C( 20), INT8_C( 118), INT8_C( 1), INT8_C( 18), INT8_C( 1), INT8_C( 22), INT8_C( 20), INT8_C( 33)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( -11), INT8_C( 0), INT8_C( -5), INT8_C( -3), INT8_C( 4), INT8_C( 103), INT8_C( -5), INT8_C( -9), INT8_C( 52), INT8_C( 2), INT8_C( -1), INT8_C( 114), INT8_C( 1), INT8_C( 5), INT8_C( 122), INT8_C( 0), INT8_C( 10), INT8_C( 0), INT8_C( -2), INT8_C( 1), INT8_C( 9), INT8_C( 1), INT8_C( -19), INT8_C( 6), INT8_C( 0), INT8_C( 108), INT8_C( -3), INT8_C( 25), INT8_C( -3), INT8_C( -2), INT8_C( -3)) }, { simde_mm256_set_epi8(INT8_C( 123), INT8_C( 92), INT8_C( -58), INT8_C( 47), INT8_C( 51), INT8_C( 47), INT8_C( 69), INT8_C( 12), INT8_C( 68), INT8_C( -99), INT8_C( 76), INT8_C( 32), INT8_C( 85), INT8_C( -81), INT8_C( -3), INT8_C( -4), INT8_C( -35), INT8_C( -48), INT8_C( 17), INT8_C( -73), INT8_C( 109), INT8_C( 88), INT8_C( -56), INT8_C( -99), INT8_C(-114), INT8_C( 127), INT8_C( 26), INT8_C( -29), INT8_C( -48), INT8_C( -28), INT8_C( 93), INT8_C( -85)), simde_mm256_set_epi8(INT8_C( 86), INT8_C( 12), INT8_C( 90), INT8_C( 46), INT8_C( 10), INT8_C( 18), INT8_C( 1), INT8_C( 58), INT8_C( -94), INT8_C( 4), INT8_C( 2), INT8_C( 1), INT8_C( 20), INT8_C( 20), INT8_C( 1), INT8_C( 10), INT8_C( 4), INT8_C( 13), INT8_C( 1), INT8_C( 1), INT8_C( 1), INT8_C( 3), INT8_C( 16), INT8_C( 4), INT8_C( 4), INT8_C( 2), INT8_C( 8), INT8_C( -96), INT8_C( 1), INT8_C( 5), INT8_C( -98), INT8_C( 11)), simde_mm256_set_epi8(INT8_C( 1), INT8_C( 7), INT8_C( 0), INT8_C( 1), INT8_C( 5), INT8_C( 2), INT8_C( 69), INT8_C( 0), INT8_C( 0), INT8_C( -24), INT8_C( 38), INT8_C( 32), INT8_C( 4), INT8_C( -4), INT8_C( -3), INT8_C( 0), INT8_C( -8), INT8_C( -3), INT8_C( 17), INT8_C( -73), INT8_C( 109), INT8_C( 29), INT8_C( -3), INT8_C( -24), INT8_C( -28), INT8_C( 63), INT8_C( 3), INT8_C( 0), INT8_C( -48), INT8_C( -5), INT8_C( 0), INT8_C( -7)) }, { simde_mm256_set_epi8(INT8_C( -83), INT8_C( 8), INT8_C( 39), INT8_C( 32), INT8_C( -68), INT8_C( 0), INT8_C( 93), INT8_C( 7), INT8_C( -26), INT8_C( -37), INT8_C( 3), INT8_C( -23), INT8_C( 38), INT8_C( -61), INT8_C( 87), INT8_C( 32), INT8_C( 65), INT8_C( 24), INT8_C( -17), INT8_C( -19), INT8_C( 113), INT8_C( -25), INT8_C( 58), INT8_C( 4), INT8_C(-127), INT8_C( 41), INT8_C( -74), INT8_C( 113), INT8_C( 49), INT8_C( -39), INT8_C( -48), INT8_C( 114)), simde_mm256_set_epi8(INT8_C(-102), INT8_C( 1), INT8_C( 22), INT8_C( 1), INT8_C( 15), INT8_C( 2), INT8_C( 19), INT8_C( 69), INT8_C( 1), INT8_C( 49), INT8_C( 66), INT8_C( 2), INT8_C( 1), INT8_C( 2), INT8_C( 10), INT8_C( 8), INT8_C( 1), INT8_C( 1), INT8_C( 4), INT8_C( 66), INT8_C( 11), INT8_C( 22), INT8_C(-126), INT8_C( 49), INT8_C( 1), INT8_C( 38), INT8_C( 1), INT8_C( 3), INT8_C( 7), INT8_C( 3), INT8_C( 21), INT8_C( 21)), simde_mm256_set_epi8(INT8_C( 0), INT8_C( 8), INT8_C( 1), INT8_C( 32), INT8_C( -4), INT8_C( 0), INT8_C( 4), INT8_C( 0), INT8_C( -26), INT8_C( 0), INT8_C( 0), INT8_C( -11), INT8_C( 38), INT8_C( -30), INT8_C( 8), INT8_C( 4), INT8_C( 65), INT8_C( 24), INT8_C( -4), INT8_C( 0), INT8_C( 10), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C(-127), INT8_C( 1), INT8_C( -74), INT8_C( 37), INT8_C( 7), INT8_C( -13), INT8_C( -2), INT8_C( 5)) }, { simde_mm256_set_epi8(INT8_C( 66), INT8_C( 127), INT8_C( 41), INT8_C(-124), INT8_C( -90), INT8_C( 28), INT8_C(-118), INT8_C( 18), INT8_C( 79), INT8_C( 17), INT8_C( 126), INT8_C( -43), INT8_C( -78), INT8_C( 78), INT8_C( 76), INT8_C( 46), INT8_C( 60), INT8_C(-126), INT8_C( -41), INT8_C( -77), INT8_C( -62), INT8_C(-116), INT8_C(-115), INT8_C( 55), INT8_C( 19), INT8_C( 104), INT8_C(-104), INT8_C( -29), INT8_C( 54), INT8_C(-118), INT8_C( -40), INT8_C( -58)), simde_mm256_set_epi8(INT8_C( 3), INT8_C( 53), INT8_C( 28), INT8_C( -96), INT8_C( 1), INT8_C( 91), INT8_C( 7), INT8_C( 1), INT8_C( 29), INT8_C( 30), INT8_C( 1), INT8_C( 10), INT8_C( 1), INT8_C( 36), INT8_C( 7), INT8_C( 1), INT8_C(-101), INT8_C( 5), INT8_C( 13), INT8_C( 5), INT8_C( 85), INT8_C( 11), INT8_C( 34), INT8_C( 48), INT8_C( 17), INT8_C( 42), INT8_C( 3), INT8_C( 87), INT8_C( 1), INT8_C( 2), INT8_C( 74), INT8_C( 8)), simde_mm256_set_epi8(INT8_C( 22), INT8_C( 2), INT8_C( 1), INT8_C( 1), INT8_C( -90), INT8_C( 0), INT8_C( -16), INT8_C( 18), INT8_C( 2), INT8_C( 0), INT8_C( 126), INT8_C( -4), INT8_C( -78), INT8_C( 2), INT8_C( 10), INT8_C( 46), INT8_C( 0), INT8_C( -25), INT8_C( -3), INT8_C( -15), INT8_C( 0), INT8_C( -10), INT8_C( -3), INT8_C( 1), INT8_C( 1), INT8_C( 2), INT8_C( -34), INT8_C( 0), INT8_C( 54), INT8_C( -59), INT8_C( 0), INT8_C( -7)) }, { simde_mm256_set_epi8(INT8_C( 79), INT8_C( -60), INT8_C( 106), INT8_C( -93), INT8_C(-111), INT8_C( 118), INT8_C( -87), INT8_C( -78), INT8_C( -28), INT8_C( 107), INT8_C( -12), INT8_C( -54), INT8_C( 101), INT8_C( -62), INT8_C( 4), INT8_C( -51), INT8_C( -90), INT8_C(-114), INT8_C( 14), INT8_C( 124), INT8_C( -67), INT8_C( 47), INT8_C( 41), INT8_C( 37), INT8_C( 126), INT8_C( -20), INT8_C( 119), INT8_C( 105), INT8_C( -17), INT8_C( 95), INT8_C( -41), INT8_C( 19)), simde_mm256_set_epi8(INT8_C( -34), INT8_C( 4), INT8_C( 32), INT8_C( 1), INT8_C( 4), INT8_C( 10), INT8_C( 7), INT8_C( 5), INT8_C( 120), INT8_C( 1), INT8_C( 1), INT8_C( 1), INT8_C( 26), INT8_C( 6), INT8_C( 44), INT8_C( 2), INT8_C( 55), INT8_C( 14), INT8_C( 4), INT8_C( 41), INT8_C( 41), INT8_C( 6), INT8_C( 10), INT8_C( 7), INT8_C( 7), INT8_C( 21), INT8_C( 126), INT8_C( 59), INT8_C( 13), INT8_C( 8), INT8_C( 2), INT8_C( 6)), simde_mm256_set_epi8(INT8_C( -2), INT8_C( -15), INT8_C( 3), INT8_C( -93), INT8_C( -27), INT8_C( 11), INT8_C( -12), INT8_C( -15), INT8_C( 0), INT8_C( 107), INT8_C( -12), INT8_C( -54), INT8_C( 3), INT8_C( -10), INT8_C( 0), INT8_C( -25), INT8_C( -1), INT8_C( -8), INT8_C( 3), INT8_C( 3), INT8_C( -1), INT8_C( 7), INT8_C( 4), INT8_C( 5), INT8_C( 18), INT8_C( 0), INT8_C( 0), INT8_C( 1), INT8_C( -1), INT8_C( 11), INT8_C( -20), INT8_C( 3)) }, { simde_mm256_set_epi8(INT8_C( -48), INT8_C( -29), INT8_C( 23), INT8_C( 39), INT8_C( 106), INT8_C( -37), INT8_C( 1), INT8_C( 62), INT8_C( -21), INT8_C( -4), INT8_C( -92), INT8_C( -12), INT8_C( 78), INT8_C( -93), INT8_C( 36), INT8_C( -10), INT8_C( -84), INT8_C( 102), INT8_C( 9), INT8_C( 70), INT8_C( -16), INT8_C( -90), INT8_C( 82), INT8_C(-124), INT8_C( -78), INT8_C( 58), INT8_C( 35), INT8_C( 108), INT8_C(-105), INT8_C( -72), INT8_C( -16), INT8_C(-103)), simde_mm256_set_epi8(INT8_C( 2), INT8_C( 4), INT8_C( 28), INT8_C( 120), INT8_C( 1), INT8_C( 5), INT8_C( 2), INT8_C( 61), INT8_C( 1), INT8_C( 33), INT8_C( 110), INT8_C( 1), INT8_C( 102), INT8_C( 3), INT8_C( 3), INT8_C( 1), INT8_C( 1), INT8_C( 26), INT8_C( 11), INT8_C( 7), INT8_C( 75), INT8_C( 3), INT8_C( 5), INT8_C( 19), INT8_C( 3), INT8_C( -26), INT8_C( 56), INT8_C( 5), INT8_C( 7), INT8_C( 6), INT8_C( 2), INT8_C( 5)), simde_mm256_set_epi8(INT8_C( -24), INT8_C( -7), INT8_C( 0), INT8_C( 0), INT8_C( 106), INT8_C( -7), INT8_C( 0), INT8_C( 1), INT8_C( -21), INT8_C( 0), INT8_C( 0), INT8_C( -12), INT8_C( 0), INT8_C( -31), INT8_C( 12), INT8_C( -10), INT8_C( -84), INT8_C( 3), INT8_C( 0), INT8_C( 10), INT8_C( 0), INT8_C( -30), INT8_C( 16), INT8_C( -6), INT8_C( -26), INT8_C( -2), INT8_C( 0), INT8_C( 21), INT8_C( -15), INT8_C( -12), INT8_C( -8), INT8_C( -20)) }, { simde_mm256_set_epi8(INT8_C( 110), INT8_C( 56), INT8_C(-120), INT8_C( -32), INT8_C( -22), INT8_C( 97), INT8_C( -56), INT8_C( 55), INT8_C( -90), INT8_C( 33), INT8_C( 92), INT8_C( 89), INT8_C(-107), INT8_C( 55), INT8_C( -50), INT8_C( -88), INT8_C( 35), INT8_C( 21), INT8_C( 54), INT8_C( 26), INT8_C(-122), INT8_C( 103), INT8_C( 76), INT8_C( 38), INT8_C(-110), INT8_C( 11), INT8_C( 26), INT8_C( -11), INT8_C( 0), INT8_C( 3), INT8_C( 30), INT8_C( 59)), simde_mm256_set_epi8(INT8_C( -31), INT8_C( -83), INT8_C( 101), INT8_C( 17), INT8_C( 8), INT8_C( 15), INT8_C( 2), INT8_C( 7), INT8_C( 37), INT8_C( 84), INT8_C( -52), INT8_C( 25), INT8_C( 42), INT8_C( -27), INT8_C( 1), INT8_C( 10), INT8_C( 7), INT8_C( 37), INT8_C( 54), INT8_C( 31), INT8_C( 54), INT8_C( 62), INT8_C( 11), INT8_C( 54), INT8_C( 43), INT8_C( 1), INT8_C( 4), INT8_C( 5), INT8_C( 93), INT8_C( 124), INT8_C( 2), INT8_C( 3)), simde_mm256_set_epi8(INT8_C( -3), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -2), INT8_C( 6), INT8_C( -28), INT8_C( 7), INT8_C( -2), INT8_C( 0), INT8_C( -1), INT8_C( 3), INT8_C( -2), INT8_C( -2), INT8_C( -50), INT8_C( -8), INT8_C( 5), INT8_C( 0), INT8_C( 1), INT8_C( 0), INT8_C( -2), INT8_C( 1), INT8_C( 6), INT8_C( 0), INT8_C( -2), INT8_C( 11), INT8_C( 6), INT8_C( -2), INT8_C( 0), INT8_C( 0), INT8_C( 15), INT8_C( 19)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_div_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_div_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi16(INT16_C(-29867), INT16_C( 9314), INT16_C( 7980), INT16_C( 8102), INT16_C(-24663), INT16_C( 4367), INT16_C(-15443), INT16_C( -5657), INT16_C(-20080), INT16_C(-10092), INT16_C(-31734), INT16_C( 6262), INT16_C( 3510), INT16_C(-31811), INT16_C( -4053), INT16_C( -6124)), simde_mm256_set_epi16(INT16_C( 1), INT16_C( 1438), INT16_C( -9), INT16_C( 435), INT16_C( -11), INT16_C( 2), INT16_C( -496), INT16_C( 10321), INT16_C( -1000), INT16_C( -27), INT16_C( -4), INT16_C( 453), INT16_C( -2), INT16_C( 19741), INT16_C( -615), INT16_C( -3265)), simde_mm256_set_epi16(INT16_C(-29867), INT16_C( 6), INT16_C( -886), INT16_C( 18), INT16_C( 2242), INT16_C( 2183), INT16_C( 31), INT16_C( 0), INT16_C( 20), INT16_C( 373), INT16_C( 7933), INT16_C( 13), INT16_C( -1755), INT16_C( -1), INT16_C( 6), INT16_C( 1)) }, { simde_mm256_set_epi16(INT16_C( -6800), INT16_C( 13259), INT16_C( -2233), INT16_C( 1354), INT16_C( -8106), INT16_C(-17039), INT16_C( 9504), INT16_C( 22255), INT16_C( 12402), INT16_C( -2677), INT16_C( 4463), INT16_C( 28303), INT16_C(-12322), INT16_C(-19201), INT16_C( 30668), INT16_C( 15284)), simde_mm256_set_epi16(INT16_C( 16270), INT16_C(-26534), INT16_C( -13), INT16_C( -20), INT16_C( -12), INT16_C( -182), INT16_C( -13), INT16_C( -2), INT16_C( 399), INT16_C( -245), INT16_C( -1), INT16_C( -1), INT16_C( -3), INT16_C( 59), INT16_C( 11), INT16_C( -9799)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 171), INT16_C( -67), INT16_C( 675), INT16_C( 93), INT16_C( -731), INT16_C(-11127), INT16_C( 31), INT16_C( 10), INT16_C( -4463), INT16_C(-28303), INT16_C( 4107), INT16_C( -325), INT16_C( 2788), INT16_C( -1)) }, { simde_mm256_set_epi16(INT16_C( 23535), INT16_C( 10930), INT16_C( 30193), INT16_C( -8194), INT16_C( -8688), INT16_C( 2183), INT16_C(-14596), INT16_C(-28144), INT16_C(-10670), INT16_C( 1107), INT16_C( 31427), INT16_C( -7322), INT16_C( 17038), INT16_C(-32679), INT16_C( 23368), INT16_C(-24524)), simde_mm256_set_epi16(INT16_C( 19), INT16_C( -388), INT16_C( -1), INT16_C( -2261), INT16_C( -7651), INT16_C( 1639), INT16_C( -50), INT16_C( -2059), INT16_C( -25), INT16_C( -57), INT16_C( -952), INT16_C( 17), INT16_C( -4528), INT16_C( -764), INT16_C( -925), INT16_C( -20)), simde_mm256_set_epi16(INT16_C( 1238), INT16_C( -28), INT16_C(-30193), INT16_C( 3), INT16_C( 1), INT16_C( 1), INT16_C( 291), INT16_C( 13), INT16_C( 426), INT16_C( -19), INT16_C( -33), INT16_C( -430), INT16_C( -3), INT16_C( 42), INT16_C( -25), INT16_C( 1226)) }, { simde_mm256_set_epi16(INT16_C( 22767), INT16_C( 28543), INT16_C(-30401), INT16_C( 25623), INT16_C( 2206), INT16_C(-16640), INT16_C(-13607), INT16_C(-30899), INT16_C( -2384), INT16_C( -1714), INT16_C( 12691), INT16_C( 9427), INT16_C( 11864), INT16_C( 29526), INT16_C( 8259), INT16_C( 6808)), simde_mm256_set_epi16(INT16_C( 15244), INT16_C( 1), INT16_C( -1), INT16_C( -3), INT16_C( -18), INT16_C( -10), INT16_C(-15299), INT16_C( -824), INT16_C( 2005), INT16_C( 471), INT16_C( 2069), INT16_C( 204), INT16_C( 25), INT16_C( -13), INT16_C( -3), INT16_C( 11)), simde_mm256_set_epi16(INT16_C( 1), INT16_C( 28543), INT16_C( 30401), INT16_C( -8541), INT16_C( -122), INT16_C( 1664), INT16_C( 0), INT16_C( 37), INT16_C( -1), INT16_C( -3), INT16_C( 6), INT16_C( 46), INT16_C( 474), INT16_C( -2271), INT16_C( -2753), INT16_C( 618)) }, { simde_mm256_set_epi16(INT16_C(-16585), INT16_C(-25277), INT16_C( -4139), INT16_C(-27065), INT16_C(-28777), INT16_C( -9487), INT16_C(-18713), INT16_C(-30387), INT16_C(-14811), INT16_C( 24102), INT16_C(-10162), INT16_C( 7921), INT16_C( 29417), INT16_C( 15464), INT16_C( 24785), INT16_C( -1285)), simde_mm256_set_epi16(INT16_C( -121), INT16_C( 328), INT16_C( 10), INT16_C( -385), INT16_C( -1), INT16_C( 4), INT16_C( 388), INT16_C( -1), INT16_C( 1), INT16_C( 4863), INT16_C( -499), INT16_C( 3), INT16_C( -226), INT16_C(-15244), INT16_C( 5), INT16_C( -5)), simde_mm256_set_epi16(INT16_C( 137), INT16_C( -77), INT16_C( -413), INT16_C( 70), INT16_C( 28777), INT16_C( -2371), INT16_C( -48), INT16_C( 30387), INT16_C(-14811), INT16_C( 4), INT16_C( 20), INT16_C( 2640), INT16_C( -130), INT16_C( -1), INT16_C( 4957), INT16_C( 257)) }, { simde_mm256_set_epi16(INT16_C( -8831), INT16_C(-12421), INT16_C( 28092), INT16_C(-15215), INT16_C( 5495), INT16_C( 15560), INT16_C( 8747), INT16_C( 22186), INT16_C(-22634), INT16_C(-23262), INT16_C( 360), INT16_C(-18340), INT16_C(-15939), INT16_C(-18429), INT16_C(-10641), INT16_C(-25953)), simde_mm256_set_epi16(INT16_C( 6646), INT16_C( -440), INT16_C( 5), INT16_C( 9), INT16_C( 5230), INT16_C( 14027), INT16_C( -115), INT16_C( -1), INT16_C( -118), INT16_C( -466), INT16_C( -288), INT16_C( -9), INT16_C( 114), INT16_C( -2656), INT16_C( -2539), INT16_C( 1803)), simde_mm256_set_epi16(INT16_C( -1), INT16_C( 28), INT16_C( 5618), INT16_C( -1690), INT16_C( 1), INT16_C( 1), INT16_C( -76), INT16_C(-22186), INT16_C( 191), INT16_C( 49), INT16_C( -1), INT16_C( 2037), INT16_C( -139), INT16_C( 6), INT16_C( 4), INT16_C( -14)) }, { simde_mm256_set_epi16(INT16_C( 2118), INT16_C( 26269), INT16_C( 31059), INT16_C( 17912), INT16_C(-28141), INT16_C( 5202), INT16_C( 30957), INT16_C(-32121), INT16_C( -2609), INT16_C(-12316), INT16_C(-10959), INT16_C( 17018), INT16_C( 4376), INT16_C( 1963), INT16_C( 14912), INT16_C( 8031)), simde_mm256_set_epi16(INT16_C( -2197), INT16_C( 11), INT16_C( -18), INT16_C( -3745), INT16_C( -1), INT16_C( -3), INT16_C( 4), INT16_C( 3362), INT16_C( -1965), INT16_C( 2), INT16_C( 574), INT16_C( 1347), INT16_C( -888), INT16_C( -15), INT16_C( 1260), INT16_C( -640)), simde_mm256_set_epi16(INT16_C( 0), INT16_C( 2388), INT16_C( -1725), INT16_C( -4), INT16_C( 28141), INT16_C( -1734), INT16_C( 7739), INT16_C( -9), INT16_C( 1), INT16_C( -6158), INT16_C( -19), INT16_C( 12), INT16_C( -4), INT16_C( -130), INT16_C( 11), INT16_C( -12)) }, { simde_mm256_set_epi16(INT16_C(-28159), INT16_C( 7162), INT16_C(-24830), INT16_C( 4589), INT16_C( 7038), INT16_C( 3178), INT16_C( 4246), INT16_C( -8357), INT16_C( -4695), INT16_C( -9928), INT16_C( -5517), INT16_C(-27023), INT16_C( 18843), INT16_C( 726), INT16_C( 30135), INT16_C( -4871)), simde_mm256_set_epi16(INT16_C( -48), INT16_C( 767), INT16_C( 10), INT16_C( 14), INT16_C( -2039), INT16_C( -2), INT16_C( -53), INT16_C( -1), INT16_C( -1865), INT16_C( -5344), INT16_C( 63), INT16_C( -505), INT16_C( 2993), INT16_C(-14674), INT16_C( 3), INT16_C( -2)), simde_mm256_set_epi16(INT16_C( 586), INT16_C( 9), INT16_C( -2483), INT16_C( 327), INT16_C( -3), INT16_C( -1589), INT16_C( -80), INT16_C( 8357), INT16_C( 2), INT16_C( 1), INT16_C( -87), INT16_C( 53), INT16_C( 6), INT16_C( 0), INT16_C( 10045), INT16_C( 2435)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_div_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_div_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C( 1220357195), INT32_C( 1053623553), INT32_C( 1487300768), INT32_C(-1113593972), INT32_C( -270466921), INT32_C( 1339961381), INT32_C( 586340423), INT32_C( 1641199948)), simde_mm256_set_epi32(INT32_C( 119685834), INT32_C( 18), INT32_C( 13175516), INT32_C( 2634495), INT32_C( 17), INT32_C( 43789), INT32_C( -89), INT32_C( 14)), simde_mm256_set_epi32(INT32_C( 10), INT32_C( 58534641), INT32_C( 112), INT32_C( -422), INT32_C( -15909818), INT32_C( 30600), INT32_C( -6588094), INT32_C( 117228567)) }, { simde_mm256_set_epi32(INT32_C( 1446174898), INT32_C( 1812297946), INT32_C(-2020316623), INT32_C( 843765864), INT32_C(-1892632155), INT32_C( -473868741), INT32_C( -150363910), INT32_C(-1673359813)), simde_mm256_set_epi32(INT32_C( 2569135), INT32_C( 8168), INT32_C( -4111977), INT32_C( -322), INT32_C( -34091386), INT32_C( 6306), INT32_C( 363174), INT32_C( -37460)), simde_mm256_set_epi32(INT32_C( 562), INT32_C( 221877), INT32_C( 491), INT32_C( -2620390), INT32_C( 55), INT32_C( -75145), INT32_C( -414), INT32_C( 44670)) }, { simde_mm256_set_epi32(INT32_C( 1015973964), INT32_C( -637033789), INT32_C(-1269659180), INT32_C(-1847076164), INT32_C( 841308417), INT32_C(-1365136816), INT32_C( -621262370), INT32_C( -734285761)), simde_mm256_set_epi32(INT32_C( -1597720), INT32_C( 192391), INT32_C( 2145556), INT32_C( -4054), INT32_C( -1), INT32_C( 63753), INT32_C( 24015328), INT32_C( 267)), simde_mm256_set_epi32(INT32_C( -635), INT32_C( -3311), INT32_C( -591), INT32_C( 455618), INT32_C( -841308417), INT32_C( -21412), INT32_C( -25), INT32_C( -2750133)) }, { simde_mm256_set_epi32(INT32_C( 55709148), INT32_C( 1036348942), INT32_C( 1622954205), INT32_C( 1464937075), INT32_C( 309602207), INT32_C( 765487752), INT32_C(-1883826060), INT32_C( 396580110)), simde_mm256_set_epi32(INT32_C( 81348), INT32_C( 130432), INT32_C( -2896201), INT32_C( 130033), INT32_C( 2659), INT32_C( 12656), INT32_C( -49), INT32_C( -3976)), simde_mm256_set_epi32(INT32_C( 684), INT32_C( 7945), INT32_C( -560), INT32_C( 11265), INT32_C( 116435), INT32_C( 60484), INT32_C( 38445429), INT32_C( -99743)) }, { simde_mm256_set_epi32(INT32_C( -679308904), INT32_C( 1402916027), INT32_C( -568259373), INT32_C( -151984025), INT32_C(-1276596492), INT32_C( 897258790), INT32_C( 1125465930), INT32_C(-1843912592)), simde_mm256_set_epi32(INT32_C( -32), INT32_C( -3810), INT32_C( -77), INT32_C( -56604), INT32_C( 2670), INT32_C( -7949), INT32_C( 3200), INT32_C( 22045)), simde_mm256_set_epi32(INT32_C( 21228403), INT32_C( -368219), INT32_C( 7379991), INT32_C( 2685), INT32_C( -478126), INT32_C( -112876), INT32_C( 351708), INT32_C( -83643)) }, { simde_mm256_set_epi32(INT32_C(-2128829075), INT32_C( -944286219), INT32_C(-1801390937), INT32_C( 1597729863), INT32_C( -919883082), INT32_C( 243529930), INT32_C(-1346833089), INT32_C( -703593878)), simde_mm256_set_epi32(INT32_C( -702474), INT32_C( -505), INT32_C( -33538370), INT32_C( 98), INT32_C( -989384), INT32_C( -3405840), INT32_C( 1441037), INT32_C( 13)), simde_mm256_set_epi32(INT32_C( 3030), INT32_C( 1869873), INT32_C( 53), INT32_C( 16303365), INT32_C( 929), INT32_C( -71), INT32_C( -934), INT32_C( -54122606)) }, { simde_mm256_set_epi32(INT32_C( 2104898600), INT32_C( 1858378377), INT32_C( 427610695), INT32_C( 1702051599), INT32_C( 1832473397), INT32_C( 333005662), INT32_C( 2145787203), INT32_C(-1223503753)), simde_mm256_set_epi32(INT32_C( -558822192), INT32_C( -1119473), INT32_C( 71), INT32_C( -1), INT32_C( 83208), INT32_C( -24), INT32_C( 490), INT32_C( 1423105)), simde_mm256_set_epi32(INT32_C( -3), INT32_C( -1660), INT32_C( 6022685), INT32_C(-1702051599), INT32_C( 22022), INT32_C( -13875235), INT32_C( 4379157), INT32_C( -859)) }, { simde_mm256_set_epi32(INT32_C( 1485879823), INT32_C( -139198096), INT32_C( 325243915), INT32_C( 1406493107), INT32_C( 631640676), INT32_C( -221831503), INT32_C(-1100348538), INT32_C(-1615759789)), simde_mm256_set_epi32(INT32_C( -5), INT32_C( 6019751), INT32_C( 240957918), INT32_C( -11512), INT32_C( 598), INT32_C( -2086), INT32_C( -398), INT32_C( 57524929)), simde_mm256_set_epi32(INT32_C( -297175964), INT32_C( -23), INT32_C( 1), INT32_C( -122176), INT32_C( 1056255), INT32_C( 106343), INT32_C( 2764694), INT32_C( -28)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_div_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_div_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi64x(INT64_C(-3334573923423752375), INT64_C( 5523377417165557950), INT64_C( 8907494989684855351), INT64_C(-7237415305059575746)), simde_mm256_set_epi64x(INT64_C( -9171626596647), INT64_C( -528646059918), INT64_C( -547414), INT64_C( -408)), simde_mm256_set_epi64x(INT64_C( 363574), INT64_C( -10448157), INT64_C( -16271953201205), INT64_C( 17738763002596999)) }, { simde_mm256_set_epi64x(INT64_C( 1061533355853207499), INT64_C(-6945701440990101118), INT64_C( 2574461366811200995), INT64_C( 5644549884645175906)), simde_mm256_set_epi64x(INT64_C( -7767261), INT64_C( 10), INT64_C( 703320391), INT64_C( 12482)), simde_mm256_set_epi64x(INT64_C( -136667656185), INT64_C( -694570144099010111), INT64_C( 3660438968), INT64_C( 452215180631723)) }, { simde_mm256_set_epi64x(INT64_C( 6574854431853233270), INT64_C(-4435882974713226150), INT64_C(-7281891715377237835), INT64_C( 5757222003030846963)), simde_mm256_set_epi64x(INT64_C( -6789037658203169), INT64_C( -17570), INT64_C( 13607885161437703), INT64_C( -3435095)), simde_mm256_set_epi64x(INT64_C( -968), INT64_C( 252469150524372), INT64_C( -535), INT64_C( -1676000810175)) }, { simde_mm256_set_epi64x(INT64_C( 8744553519166698091), INT64_C( 1287292031192317940), INT64_C( 3174243940922689145), INT64_C( 1491394686146555130)), simde_mm256_set_epi64x(INT64_C( 4922490686897444762), INT64_C( 39224412374), INT64_C( 408105256075342), INT64_C( -123591096713)), simde_mm256_set_epi64x(INT64_C( 1), INT64_C( 32818644), INT64_C( 7778), INT64_C( -12067169)) }, { simde_mm256_set_epi64x(INT64_C( 7799483112595335323), INT64_C(-7884857912053188380), INT64_C( 7107489308993436793), INT64_C( 8695475100908985079)), simde_mm256_set_epi64x(INT64_C( 87), INT64_C( 9826793), INT64_C( -161255109), INT64_C( -1858599442623445)), simde_mm256_set_epi64x(INT64_C( 89649231179256727), INT64_C( -802383637474), INT64_C( -44076056585), INT64_C( -4678)) }, { simde_mm256_set_epi64x(INT64_C(-7825910496387937639), INT64_C( -900763466419687908), INT64_C(-4456690812175475739), INT64_C(-5053240277275181299)), simde_mm256_set_epi64x(INT64_C( -6606649764768), INT64_C( -57398), INT64_C( -568604113828926107), INT64_C( 4737239)), simde_mm256_set_epi64x(INT64_C( 1184550), INT64_C( 15693290121950), INT64_C( 7), INT64_C( -1066705791553)) }, { simde_mm256_set_epi64x(INT64_C(-3221953081539923764), INT64_C(-1956032791701614517), INT64_C( 7374977017813000944), INT64_C( 1124803906659433418)), simde_mm256_set_epi64x(INT64_C( -339969907608416876), INT64_C( -15370), INT64_C( -1321351535), INT64_C( -7)), simde_mm256_set_epi64x(INT64_C( 9), INT64_C( 127263031340378), INT64_C( -5581389072), INT64_C( -160686272379919059)) }, { simde_mm256_set_epi64x(INT64_C( 2535418176622027197), INT64_C(-1425521063377864898), INT64_C( 5027060343823160394), INT64_C(-2416798548878703366)), simde_mm256_set_epi64x(INT64_C( -250), INT64_C( 51), INT64_C( 3355), INT64_C( 22043462023905)), simde_mm256_set_epi64x(INT64_C( -10141672706488108), INT64_C( -27951393399565978), INT64_C( 1498378641974116), INT64_C( -109637)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_div_epi64(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_div_epu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_x_mm256_set_epu8(UINT8_C(236), UINT8_C(194), UINT8_C(120), UINT8_C( 0), UINT8_C(238), UINT8_C(197), UINT8_C(223), UINT8_C( 50), UINT8_C(177), UINT8_C( 51), UINT8_C( 14), UINT8_C(208), UINT8_C(118), UINT8_C(136), UINT8_C(234), UINT8_C(162), UINT8_C( 34), UINT8_C(152), UINT8_C( 32), UINT8_C( 62), UINT8_C( 35), UINT8_C(101), UINT8_C( 72), UINT8_C( 2), UINT8_C(139), UINT8_C(150), UINT8_C(255), UINT8_C( 2), UINT8_C( 37), UINT8_C(232), UINT8_C( 3), UINT8_C(210)), simde_x_mm256_set_epu8(UINT8_C(218), UINT8_C( 43), UINT8_C( 2), UINT8_C( 2), UINT8_C( 29), UINT8_C( 90), UINT8_C( 30), UINT8_C( 31), UINT8_C( 20), UINT8_C( 1), UINT8_C( 24), UINT8_C( 92), UINT8_C( 3), UINT8_C( 1), UINT8_C( 33), UINT8_C( 6), UINT8_C( 14), UINT8_C( 38), UINT8_C( 5), UINT8_C( 4), UINT8_C( 13), UINT8_C( 2), UINT8_C( 11), UINT8_C( 1), UINT8_C( 1), UINT8_C( 25), UINT8_C(242), UINT8_C( 3), UINT8_C( 12), UINT8_C( 59), UINT8_C( 75), UINT8_C(192)), simde_x_mm256_set_epu8(UINT8_C( 1), UINT8_C( 4), UINT8_C( 60), UINT8_C( 0), UINT8_C( 8), UINT8_C( 2), UINT8_C( 7), UINT8_C( 1), UINT8_C( 8), UINT8_C( 51), UINT8_C( 0), UINT8_C( 2), UINT8_C( 39), UINT8_C(136), UINT8_C( 7), UINT8_C( 27), UINT8_C( 2), UINT8_C( 4), UINT8_C( 6), UINT8_C( 15), UINT8_C( 2), UINT8_C( 50), UINT8_C( 6), UINT8_C( 2), UINT8_C(139), UINT8_C( 6), UINT8_C( 1), UINT8_C( 0), UINT8_C( 3), UINT8_C( 3), UINT8_C( 0), UINT8_C( 1)) }, { simde_x_mm256_set_epu8(UINT8_C(223), UINT8_C(136), UINT8_C(181), UINT8_C(189), UINT8_C(144), UINT8_C(162), UINT8_C( 60), UINT8_C(122), UINT8_C(180), UINT8_C(157), UINT8_C(255), UINT8_C( 4), UINT8_C(248), UINT8_C( 71), UINT8_C( 45), UINT8_C(231), UINT8_C(108), UINT8_C(100), UINT8_C( 13), UINT8_C(181), UINT8_C(158), UINT8_C(251), UINT8_C(141), UINT8_C( 49), UINT8_C(175), UINT8_C( 90), UINT8_C(251), UINT8_C( 13), UINT8_C(151), UINT8_C(233), UINT8_C(181), UINT8_C(181)), simde_x_mm256_set_epu8(UINT8_C( 2), UINT8_C( 7), UINT8_C( 2), UINT8_C( 7), UINT8_C( 6), UINT8_C( 23), UINT8_C( 1), UINT8_C( 22), UINT8_C( 9), UINT8_C( 21), UINT8_C( 6), UINT8_C( 1), UINT8_C( 1), UINT8_C( 27), UINT8_C( 1), UINT8_C(254), UINT8_C( 30), UINT8_C( 92), UINT8_C( 8), UINT8_C( 13), UINT8_C( 7), UINT8_C( 4), UINT8_C( 29), UINT8_C( 24), UINT8_C( 1), UINT8_C( 15), UINT8_C( 31), UINT8_C( 1), UINT8_C(190), UINT8_C( 1), UINT8_C( 20), UINT8_C( 8)), simde_x_mm256_set_epu8(UINT8_C(111), UINT8_C( 19), UINT8_C( 90), UINT8_C( 27), UINT8_C( 24), UINT8_C( 7), UINT8_C( 60), UINT8_C( 5), UINT8_C( 20), UINT8_C( 7), UINT8_C( 42), UINT8_C( 4), UINT8_C(248), UINT8_C( 2), UINT8_C( 45), UINT8_C( 0), UINT8_C( 3), UINT8_C( 1), UINT8_C( 1), UINT8_C( 13), UINT8_C( 22), UINT8_C( 62), UINT8_C( 4), UINT8_C( 2), UINT8_C(175), UINT8_C( 6), UINT8_C( 8), UINT8_C( 13), UINT8_C( 0), UINT8_C(233), UINT8_C( 9), UINT8_C( 22)) }, { simde_x_mm256_set_epu8(UINT8_C(162), UINT8_C( 7), UINT8_C(145), UINT8_C(154), UINT8_C(168), UINT8_C(175), UINT8_C( 61), UINT8_C( 3), UINT8_C( 93), UINT8_C( 6), UINT8_C(114), UINT8_C( 59), UINT8_C( 17), UINT8_C(165), UINT8_C(240), UINT8_C(189), UINT8_C(201), UINT8_C( 90), UINT8_C( 72), UINT8_C( 56), UINT8_C( 98), UINT8_C(155), UINT8_C( 93), UINT8_C(190), UINT8_C( 59), UINT8_C(174), UINT8_C(136), UINT8_C( 6), UINT8_C(153), UINT8_C(172), UINT8_C(102), UINT8_C(120)), simde_x_mm256_set_epu8(UINT8_C(110), UINT8_C( 41), UINT8_C( 3), UINT8_C( 12), UINT8_C(210), UINT8_C( 1), UINT8_C( 5), UINT8_C( 6), UINT8_C( 47), UINT8_C( 58), UINT8_C( 48), UINT8_C( 20), UINT8_C(109), UINT8_C( 3), UINT8_C( 34), UINT8_C( 3), UINT8_C( 8), UINT8_C( 5), UINT8_C( 3), UINT8_C( 1), UINT8_C( 20), UINT8_C( 14), UINT8_C( 1), UINT8_C( 6), UINT8_C( 15), UINT8_C( 3), UINT8_C( 95), UINT8_C( 1), UINT8_C( 4), UINT8_C( 1), UINT8_C( 7), UINT8_C( 1)), simde_x_mm256_set_epu8(UINT8_C( 1), UINT8_C( 0), UINT8_C( 48), UINT8_C( 12), UINT8_C( 0), UINT8_C(175), UINT8_C( 12), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 2), UINT8_C( 2), UINT8_C( 0), UINT8_C( 55), UINT8_C( 7), UINT8_C( 63), UINT8_C( 25), UINT8_C( 18), UINT8_C( 24), UINT8_C( 56), UINT8_C( 4), UINT8_C( 11), UINT8_C( 93), UINT8_C( 31), UINT8_C( 3), UINT8_C( 58), UINT8_C( 1), UINT8_C( 6), UINT8_C( 38), UINT8_C(172), UINT8_C( 14), UINT8_C(120)) }, { simde_x_mm256_set_epu8(UINT8_C( 3), UINT8_C( 62), UINT8_C(201), UINT8_C( 91), UINT8_C( 81), UINT8_C(108), UINT8_C(219), UINT8_C(124), UINT8_C(107), UINT8_C(229), UINT8_C(194), UINT8_C( 6), UINT8_C(247), UINT8_C(122), UINT8_C( 69), UINT8_C(216), UINT8_C(192), UINT8_C(132), UINT8_C( 14), UINT8_C(210), UINT8_C(242), UINT8_C(228), UINT8_C( 76), UINT8_C(247), UINT8_C(164), UINT8_C(249), UINT8_C(124), UINT8_C(200), UINT8_C(141), UINT8_C(206), UINT8_C(142), UINT8_C(235)), simde_x_mm256_set_epu8(UINT8_C(182), UINT8_C( 3), UINT8_C( 13), UINT8_C( 91), UINT8_C( 12), UINT8_C( 10), UINT8_C( 1), UINT8_C( 3), UINT8_C( 4), UINT8_C( 8), UINT8_C( 93), UINT8_C( 1), UINT8_C( 2), UINT8_C( 38), UINT8_C( 3), UINT8_C(172), UINT8_C( 38), UINT8_C( 15), UINT8_C( 55), UINT8_C( 26), UINT8_C( 4), UINT8_C( 16), UINT8_C( 28), UINT8_C( 54), UINT8_C( 21), UINT8_C( 30), UINT8_C( 3), UINT8_C( 39), UINT8_C( 14), UINT8_C(171), UINT8_C( 2), UINT8_C( 4)), simde_x_mm256_set_epu8(UINT8_C( 0), UINT8_C( 20), UINT8_C( 15), UINT8_C( 1), UINT8_C( 6), UINT8_C( 10), UINT8_C(219), UINT8_C( 41), UINT8_C( 26), UINT8_C( 28), UINT8_C( 2), UINT8_C( 6), UINT8_C(123), UINT8_C( 3), UINT8_C( 23), UINT8_C( 1), UINT8_C( 5), UINT8_C( 8), UINT8_C( 0), UINT8_C( 8), UINT8_C( 60), UINT8_C( 14), UINT8_C( 2), UINT8_C( 4), UINT8_C( 7), UINT8_C( 8), UINT8_C( 41), UINT8_C( 5), UINT8_C( 10), UINT8_C( 1), UINT8_C( 71), UINT8_C( 58)) }, { simde_x_mm256_set_epu8(UINT8_C(168), UINT8_C( 0), UINT8_C(141), UINT8_C(215), UINT8_C( 23), UINT8_C(105), UINT8_C(153), UINT8_C(228), UINT8_C(144), UINT8_C(204), UINT8_C(214), UINT8_C(202), UINT8_C(227), UINT8_C(255), UINT8_C( 22), UINT8_C(115), UINT8_C(131), UINT8_C(142), UINT8_C( 73), UINT8_C(133), UINT8_C( 47), UINT8_C(243), UINT8_C(254), UINT8_C(234), UINT8_C( 91), UINT8_C(217), UINT8_C(119), UINT8_C(247), UINT8_C(245), UINT8_C( 31), UINT8_C( 46), UINT8_C( 19)), simde_x_mm256_set_epu8(UINT8_C( 1), UINT8_C(248), UINT8_C( 3), UINT8_C( 9), UINT8_C( 3), UINT8_C( 87), UINT8_C(117), UINT8_C( 58), UINT8_C( 18), UINT8_C( 9), UINT8_C( 7), UINT8_C( 77), UINT8_C( 11), UINT8_C( 11), UINT8_C( 28), UINT8_C( 49), UINT8_C( 64), UINT8_C( 46), UINT8_C( 5), UINT8_C( 1), UINT8_C(115), UINT8_C( 2), UINT8_C( 1), UINT8_C( 1), UINT8_C( 86), UINT8_C( 10), UINT8_C( 3), UINT8_C( 12), UINT8_C( 49), UINT8_C(155), UINT8_C( 1), UINT8_C( 3)), simde_x_mm256_set_epu8(UINT8_C(168), UINT8_C( 0), UINT8_C( 47), UINT8_C( 23), UINT8_C( 7), UINT8_C( 1), UINT8_C( 1), UINT8_C( 3), UINT8_C( 8), UINT8_C( 22), UINT8_C( 30), UINT8_C( 2), UINT8_C( 20), UINT8_C( 23), UINT8_C( 0), UINT8_C( 2), UINT8_C( 2), UINT8_C( 3), UINT8_C( 14), UINT8_C(133), UINT8_C( 0), UINT8_C(121), UINT8_C(254), UINT8_C(234), UINT8_C( 1), UINT8_C( 21), UINT8_C( 39), UINT8_C( 20), UINT8_C( 5), UINT8_C( 0), UINT8_C( 46), UINT8_C( 6)) }, { simde_x_mm256_set_epu8(UINT8_C(163), UINT8_C(117), UINT8_C( 13), UINT8_C( 71), UINT8_C(173), UINT8_C(230), UINT8_C(206), UINT8_C( 2), UINT8_C( 15), UINT8_C(252), UINT8_C( 14), UINT8_C(197), UINT8_C(249), UINT8_C(198), UINT8_C( 30), UINT8_C(180), UINT8_C(128), UINT8_C( 78), UINT8_C(184), UINT8_C(254), UINT8_C(184), UINT8_C(231), UINT8_C(238), UINT8_C( 30), UINT8_C(194), UINT8_C( 37), UINT8_C(226), UINT8_C( 86), UINT8_C(140), UINT8_C( 24), UINT8_C(144), UINT8_C( 16)), simde_x_mm256_set_epu8(UINT8_C( 48), UINT8_C( 1), UINT8_C( 7), UINT8_C( 6), UINT8_C(119), UINT8_C( 41), UINT8_C(111), UINT8_C( 8), UINT8_C(135), UINT8_C( 2), UINT8_C( 23), UINT8_C( 1), UINT8_C( 88), UINT8_C( 15), UINT8_C( 65), UINT8_C( 79), UINT8_C( 29), UINT8_C( 5), UINT8_C( 5), UINT8_C( 6), UINT8_C( 44), UINT8_C( 21), UINT8_C( 2), UINT8_C( 3), UINT8_C( 15), UINT8_C( 1), UINT8_C( 3), UINT8_C( 3), UINT8_C( 1), UINT8_C( 10), UINT8_C( 1), UINT8_C( 55)), simde_x_mm256_set_epu8(UINT8_C( 3), UINT8_C(117), UINT8_C( 1), UINT8_C( 11), UINT8_C( 1), UINT8_C( 5), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C(126), UINT8_C( 0), UINT8_C(197), UINT8_C( 2), UINT8_C( 13), UINT8_C( 0), UINT8_C( 2), UINT8_C( 4), UINT8_C( 15), UINT8_C( 36), UINT8_C( 42), UINT8_C( 4), UINT8_C( 11), UINT8_C(119), UINT8_C( 10), UINT8_C( 12), UINT8_C( 37), UINT8_C( 75), UINT8_C( 28), UINT8_C(140), UINT8_C( 2), UINT8_C(144), UINT8_C( 0)) }, { simde_x_mm256_set_epu8(UINT8_C(239), UINT8_C(204), UINT8_C( 51), UINT8_C(246), UINT8_C( 77), UINT8_C(149), UINT8_C( 40), UINT8_C( 86), UINT8_C( 29), UINT8_C( 8), UINT8_C(140), UINT8_C(202), UINT8_C(138), UINT8_C(208), UINT8_C(142), UINT8_C( 95), UINT8_C(247), UINT8_C(102), UINT8_C( 63), UINT8_C(232), UINT8_C(115), UINT8_C(187), UINT8_C(122), UINT8_C(179), UINT8_C( 81), UINT8_C(192), UINT8_C( 47), UINT8_C( 34), UINT8_C( 24), UINT8_C(133), UINT8_C( 98), UINT8_C(208)), simde_x_mm256_set_epu8(UINT8_C( 11), UINT8_C( 8), UINT8_C( 2), UINT8_C( 10), UINT8_C( 3), UINT8_C( 7), UINT8_C( 38), UINT8_C( 21), UINT8_C(247), UINT8_C( 14), UINT8_C( 4), UINT8_C( 3), UINT8_C( 85), UINT8_C( 59), UINT8_C( 41), UINT8_C( 1), UINT8_C( 1), UINT8_C(250), UINT8_C( 1), UINT8_C( 2), UINT8_C( 6), UINT8_C( 8), UINT8_C( 6), UINT8_C( 40), UINT8_C(136), UINT8_C( 10), UINT8_C( 29), UINT8_C( 7), UINT8_C( 36), UINT8_C( 8), UINT8_C( 1), UINT8_C( 7)), simde_x_mm256_set_epu8(UINT8_C( 21), UINT8_C( 25), UINT8_C( 25), UINT8_C( 24), UINT8_C( 25), UINT8_C( 21), UINT8_C( 1), UINT8_C( 4), UINT8_C( 0), UINT8_C( 0), UINT8_C( 35), UINT8_C( 67), UINT8_C( 1), UINT8_C( 3), UINT8_C( 3), UINT8_C( 95), UINT8_C(247), UINT8_C( 0), UINT8_C( 63), UINT8_C(116), UINT8_C( 19), UINT8_C( 23), UINT8_C( 20), UINT8_C( 4), UINT8_C( 0), UINT8_C( 19), UINT8_C( 1), UINT8_C( 4), UINT8_C( 0), UINT8_C( 16), UINT8_C( 98), UINT8_C( 29)) }, { simde_x_mm256_set_epu8(UINT8_C(179), UINT8_C(197), UINT8_C(124), UINT8_C(228), UINT8_C(210), UINT8_C(205), UINT8_C(251), UINT8_C( 37), UINT8_C( 37), UINT8_C( 57), UINT8_C( 27), UINT8_C( 38), UINT8_C( 13), UINT8_C(212), UINT8_C(201), UINT8_C(125), UINT8_C( 84), UINT8_C(229), UINT8_C( 76), UINT8_C(128), UINT8_C(139), UINT8_C(203), UINT8_C(238), UINT8_C(218), UINT8_C( 40), UINT8_C( 95), UINT8_C(243), UINT8_C(110), UINT8_C( 74), UINT8_C( 0), UINT8_C(215), UINT8_C( 43)), simde_x_mm256_set_epu8(UINT8_C( 2), UINT8_C( 2), UINT8_C( 4), UINT8_C( 5), UINT8_C( 7), UINT8_C( 2), UINT8_C(195), UINT8_C( 2), UINT8_C( 30), UINT8_C( 1), UINT8_C( 9), UINT8_C( 24), UINT8_C( 6), UINT8_C( 7), UINT8_C( 28), UINT8_C( 58), UINT8_C( 3), UINT8_C( 77), UINT8_C( 90), UINT8_C( 51), UINT8_C( 13), UINT8_C( 12), UINT8_C( 7), UINT8_C( 91), UINT8_C(243), UINT8_C( 40), UINT8_C( 1), UINT8_C( 45), UINT8_C( 77), UINT8_C( 45), UINT8_C( 60), UINT8_C( 3)), simde_x_mm256_set_epu8(UINT8_C( 89), UINT8_C( 98), UINT8_C( 31), UINT8_C( 45), UINT8_C( 30), UINT8_C(102), UINT8_C( 1), UINT8_C( 18), UINT8_C( 1), UINT8_C( 57), UINT8_C( 3), UINT8_C( 1), UINT8_C( 2), UINT8_C( 30), UINT8_C( 7), UINT8_C( 2), UINT8_C( 28), UINT8_C( 2), UINT8_C( 0), UINT8_C( 2), UINT8_C( 10), UINT8_C( 16), UINT8_C( 34), UINT8_C( 2), UINT8_C( 0), UINT8_C( 2), UINT8_C(243), UINT8_C( 2), UINT8_C( 0), UINT8_C( 0), UINT8_C( 3), UINT8_C( 14)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_div_epu8(test_vec[i].a, test_vec[i].b); simde_assert_m256i_u8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_div_epu16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_x_mm256_set_epu16(UINT16_C( 50042), UINT16_C( 33648), UINT16_C( 7535), UINT16_C( 12279), UINT16_C( 36071), UINT16_C( 18107), UINT16_C( 48674), UINT16_C( 48206), UINT16_C( 9011), UINT16_C( 45275), UINT16_C( 7845), UINT16_C( 54048), UINT16_C( 27322), UINT16_C( 31657), UINT16_C( 43497), UINT16_C( 33598)), simde_x_mm256_set_epu16(UINT16_C( 12011), UINT16_C( 249), UINT16_C( 5), UINT16_C( 2), UINT16_C( 1870), UINT16_C( 2904), UINT16_C( 1530), UINT16_C( 42479), UINT16_C( 63442), UINT16_C( 1039), UINT16_C( 54), UINT16_C( 1), UINT16_C( 98), UINT16_C( 7948), UINT16_C( 2053), UINT16_C( 29)), simde_x_mm256_set_epu16(UINT16_C( 4), UINT16_C( 135), UINT16_C( 1507), UINT16_C( 6139), UINT16_C( 19), UINT16_C( 6), UINT16_C( 31), UINT16_C( 1), UINT16_C( 0), UINT16_C( 43), UINT16_C( 145), UINT16_C( 54048), UINT16_C( 278), UINT16_C( 3), UINT16_C( 21), UINT16_C( 1158)) }, { simde_x_mm256_set_epu16(UINT16_C( 31411), UINT16_C( 55001), UINT16_C( 38051), UINT16_C( 20389), UINT16_C( 61351), UINT16_C( 22045), UINT16_C( 61939), UINT16_C( 10168), UINT16_C( 65482), UINT16_C( 32951), UINT16_C( 59114), UINT16_C( 9472), UINT16_C( 21787), UINT16_C( 1387), UINT16_C( 60519), UINT16_C( 39038)), simde_x_mm256_set_epu16(UINT16_C( 11771), UINT16_C( 1), UINT16_C( 490), UINT16_C( 32408), UINT16_C( 2225), UINT16_C( 134), UINT16_C( 13968), UINT16_C( 1), UINT16_C( 387), UINT16_C( 14591), UINT16_C( 24), UINT16_C( 46), UINT16_C( 8450), UINT16_C( 1053), UINT16_C( 908), UINT16_C( 5686)), simde_x_mm256_set_epu16(UINT16_C( 2), UINT16_C( 55001), UINT16_C( 77), UINT16_C( 0), UINT16_C( 27), UINT16_C( 164), UINT16_C( 4), UINT16_C( 10168), UINT16_C( 169), UINT16_C( 2), UINT16_C( 2463), UINT16_C( 205), UINT16_C( 2), UINT16_C( 1), UINT16_C( 66), UINT16_C( 6)) }, { simde_x_mm256_set_epu16(UINT16_C( 22899), UINT16_C( 630), UINT16_C( 34558), UINT16_C( 7884), UINT16_C( 39724), UINT16_C( 33230), UINT16_C( 54475), UINT16_C( 22805), UINT16_C( 61755), UINT16_C( 34661), UINT16_C( 28373), UINT16_C( 58279), UINT16_C( 22187), UINT16_C( 56981), UINT16_C( 43877), UINT16_C( 3469)), simde_x_mm256_set_epu16(UINT16_C( 12306), UINT16_C( 182), UINT16_C( 29239), UINT16_C( 4194), UINT16_C( 818), UINT16_C( 16), UINT16_C( 5), UINT16_C( 38), UINT16_C( 42688), UINT16_C( 8), UINT16_C( 1), UINT16_C( 96), UINT16_C( 3), UINT16_C( 1), UINT16_C( 508), UINT16_C( 1)), simde_x_mm256_set_epu16(UINT16_C( 1), UINT16_C( 3), UINT16_C( 1), UINT16_C( 1), UINT16_C( 48), UINT16_C( 2076), UINT16_C( 10895), UINT16_C( 600), UINT16_C( 1), UINT16_C( 4332), UINT16_C( 28373), UINT16_C( 607), UINT16_C( 7395), UINT16_C( 56981), UINT16_C( 86), UINT16_C( 3469)) }, { simde_x_mm256_set_epu16(UINT16_C( 29363), UINT16_C( 50584), UINT16_C( 56168), UINT16_C( 44370), UINT16_C( 62910), UINT16_C( 23255), UINT16_C( 39479), UINT16_C( 21044), UINT16_C( 7491), UINT16_C( 25737), UINT16_C( 6938), UINT16_C( 40142), UINT16_C( 22210), UINT16_C( 63545), UINT16_C( 33358), UINT16_C( 9014)), simde_x_mm256_set_epu16(UINT16_C( 61), UINT16_C( 274), UINT16_C( 365), UINT16_C( 58937), UINT16_C( 2), UINT16_C( 172), UINT16_C( 432), UINT16_C( 2), UINT16_C( 957), UINT16_C( 351), UINT16_C( 18), UINT16_C( 12717), UINT16_C( 4), UINT16_C( 417), UINT16_C( 1), UINT16_C( 10550)), simde_x_mm256_set_epu16(UINT16_C( 481), UINT16_C( 184), UINT16_C( 153), UINT16_C( 0), UINT16_C( 31455), UINT16_C( 135), UINT16_C( 91), UINT16_C( 10522), UINT16_C( 7), UINT16_C( 73), UINT16_C( 385), UINT16_C( 3), UINT16_C( 5552), UINT16_C( 152), UINT16_C( 33358), UINT16_C( 0)) }, { simde_x_mm256_set_epu16(UINT16_C( 22208), UINT16_C( 58940), UINT16_C( 24739), UINT16_C( 29405), UINT16_C( 9863), UINT16_C( 41917), UINT16_C( 30045), UINT16_C( 40634), UINT16_C( 50211), UINT16_C( 4668), UINT16_C( 42314), UINT16_C( 29370), UINT16_C( 57744), UINT16_C( 37787), UINT16_C( 17171), UINT16_C( 34222)), simde_x_mm256_set_epu16(UINT16_C( 4256), UINT16_C( 23971), UINT16_C( 171), UINT16_C( 12), UINT16_C( 8070), UINT16_C( 2906), UINT16_C( 22), UINT16_C( 107), UINT16_C( 3), UINT16_C( 1), UINT16_C( 28355), UINT16_C( 2210), UINT16_C( 1), UINT16_C( 1161), UINT16_C( 613), UINT16_C( 51426)), simde_x_mm256_set_epu16(UINT16_C( 5), UINT16_C( 2), UINT16_C( 144), UINT16_C( 2450), UINT16_C( 1), UINT16_C( 14), UINT16_C( 1365), UINT16_C( 379), UINT16_C( 16737), UINT16_C( 4668), UINT16_C( 1), UINT16_C( 13), UINT16_C( 57744), UINT16_C( 32), UINT16_C( 28), UINT16_C( 0)) }, { simde_x_mm256_set_epu16(UINT16_C( 9143), UINT16_C( 55963), UINT16_C( 46820), UINT16_C( 55354), UINT16_C( 21540), UINT16_C( 21596), UINT16_C( 49435), UINT16_C( 42142), UINT16_C( 28170), UINT16_C( 3714), UINT16_C( 39462), UINT16_C( 28043), UINT16_C( 45359), UINT16_C( 22609), UINT16_C( 55149), UINT16_C( 21886)), simde_x_mm256_set_epu16(UINT16_C( 3121), UINT16_C( 103), UINT16_C( 1), UINT16_C( 283), UINT16_C( 201), UINT16_C( 53), UINT16_C( 25996), UINT16_C( 3169), UINT16_C( 1), UINT16_C( 2), UINT16_C( 38), UINT16_C( 24), UINT16_C( 55), UINT16_C( 25444), UINT16_C( 5182), UINT16_C( 9)), simde_x_mm256_set_epu16(UINT16_C( 2), UINT16_C( 543), UINT16_C( 46820), UINT16_C( 195), UINT16_C( 107), UINT16_C( 407), UINT16_C( 1), UINT16_C( 13), UINT16_C( 28170), UINT16_C( 1857), UINT16_C( 1038), UINT16_C( 1168), UINT16_C( 824), UINT16_C( 0), UINT16_C( 10), UINT16_C( 2431)) }, { simde_x_mm256_set_epu16(UINT16_C( 51894), UINT16_C( 1840), UINT16_C( 33552), UINT16_C( 50070), UINT16_C( 16848), UINT16_C( 13340), UINT16_C( 25356), UINT16_C( 34016), UINT16_C( 61275), UINT16_C( 22886), UINT16_C( 28292), UINT16_C( 37845), UINT16_C( 1481), UINT16_C( 559), UINT16_C( 12899), UINT16_C( 38851)), simde_x_mm256_set_epu16(UINT16_C( 16266), UINT16_C( 376), UINT16_C( 62048), UINT16_C( 8), UINT16_C( 53), UINT16_C( 1573), UINT16_C( 8), UINT16_C( 212), UINT16_C( 15505), UINT16_C( 1), UINT16_C( 10), UINT16_C( 2744), UINT16_C( 2), UINT16_C( 5), UINT16_C( 4478), UINT16_C( 12656)), simde_x_mm256_set_epu16(UINT16_C( 3), UINT16_C( 4), UINT16_C( 0), UINT16_C( 6258), UINT16_C( 317), UINT16_C( 8), UINT16_C( 3169), UINT16_C( 160), UINT16_C( 3), UINT16_C( 22886), UINT16_C( 2829), UINT16_C( 13), UINT16_C( 740), UINT16_C( 111), UINT16_C( 2), UINT16_C( 3)) }, { simde_x_mm256_set_epu16(UINT16_C( 40946), UINT16_C( 11832), UINT16_C( 52869), UINT16_C( 41324), UINT16_C( 41064), UINT16_C( 57085), UINT16_C( 14204), UINT16_C( 23869), UINT16_C( 30467), UINT16_C( 20149), UINT16_C( 58844), UINT16_C( 49602), UINT16_C( 36092), UINT16_C( 39146), UINT16_C( 62840), UINT16_C( 19573)), simde_x_mm256_set_epu16(UINT16_C( 7725), UINT16_C( 5897), UINT16_C( 81), UINT16_C( 199), UINT16_C( 33008), UINT16_C( 55443), UINT16_C( 925), UINT16_C( 4043), UINT16_C( 362), UINT16_C( 156), UINT16_C( 2592), UINT16_C( 29), UINT16_C( 213), UINT16_C( 14), UINT16_C( 39), UINT16_C( 178)), simde_x_mm256_set_epu16(UINT16_C( 5), UINT16_C( 2), UINT16_C( 652), UINT16_C( 207), UINT16_C( 1), UINT16_C( 1), UINT16_C( 15), UINT16_C( 5), UINT16_C( 84), UINT16_C( 129), UINT16_C( 22), UINT16_C( 1710), UINT16_C( 169), UINT16_C( 2796), UINT16_C( 1611), UINT16_C( 109)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_div_epu16(test_vec[i].a, test_vec[i].b); simde_assert_m256i_u16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_div_epu32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_x_mm256_set_epu32(UINT32_C( 621216267), UINT32_C(2973447507), UINT32_C(1814279233), UINT32_C(3673557536), UINT32_C(4015780858), UINT32_C(1070914538), UINT32_C(2707640519), UINT32_C(3041291274)), simde_x_mm256_set_epu32(UINT32_C( 122731), UINT32_C( 51630147), UINT32_C( 152670), UINT32_C( 7731229), UINT32_C( 711400), UINT32_C( 1744981), UINT32_C( 164943127), UINT32_C( 169494)), simde_x_mm256_set_epu32(UINT32_C( 5061), UINT32_C( 57), UINT32_C( 11883), UINT32_C( 475), UINT32_C( 5644), UINT32_C( 613), UINT32_C( 16), UINT32_C( 17943)) }, { simde_x_mm256_set_epu32(UINT32_C(1084014678), UINT32_C(1666523830), UINT32_C(3454667769), UINT32_C(4029614313), UINT32_C(3425016021), UINT32_C(2449839571), UINT32_C(1601532569), UINT32_C(1519388398)), simde_x_mm256_set_epu32(UINT32_C( 130157), UINT32_C( 5585515), UINT32_C( 62691231), UINT32_C( 37123), UINT32_C( 2515600), UINT32_C( 106484982), UINT32_C(4168501606), UINT32_C( 2781814)), simde_x_mm256_set_epu32(UINT32_C( 8328), UINT32_C( 298), UINT32_C( 55), UINT32_C( 108547), UINT32_C( 1361), UINT32_C( 23), UINT32_C( 0), UINT32_C( 546)) }, { simde_x_mm256_set_epu32(UINT32_C(2187853776), UINT32_C( 131263503), UINT32_C( 20338031), UINT32_C(3062800456), UINT32_C(1802896354), UINT32_C( 22231847), UINT32_C(3438214155), UINT32_C(1776513196)), simde_x_mm256_set_epu32(UINT32_C( 28353115), UINT32_C( 92496104), UINT32_C( 15335526), UINT32_C( 99105532), UINT32_C( 5905009), UINT32_C( 27824), UINT32_C( 28986), UINT32_C( 12459911)), simde_x_mm256_set_epu32(UINT32_C( 77), UINT32_C( 1), UINT32_C( 1), UINT32_C( 30), UINT32_C( 305), UINT32_C( 799), UINT32_C( 118616), UINT32_C( 142)) }, { simde_x_mm256_set_epu32(UINT32_C( 524596333), UINT32_C(3965897825), UINT32_C(1593754725), UINT32_C( 694203496), UINT32_C(1917650066), UINT32_C(2692610113), UINT32_C(1620259645), UINT32_C( 607116294)), simde_x_mm256_set_epu32(UINT32_C( 29757558), UINT32_C( 80117), UINT32_C( 412054571), UINT32_C( 878110), UINT32_C(4124070325), UINT32_C( 8250706), UINT32_C( 7930575), UINT32_C( 51813)), simde_x_mm256_set_epu32(UINT32_C( 17), UINT32_C( 49501), UINT32_C( 3), UINT32_C( 790), UINT32_C( 0), UINT32_C( 326), UINT32_C( 204), UINT32_C( 11717)) }, { simde_x_mm256_set_epu32(UINT32_C( 625862951), UINT32_C( 793130310), UINT32_C(2489185635), UINT32_C(2468815203), UINT32_C(3079066921), UINT32_C( 802958712), UINT32_C(1537818066), UINT32_C(1678295724)), simde_x_mm256_set_epu32(UINT32_C( 8259237), UINT32_C( 229091), UINT32_C( 7899398), UINT32_C( 41009690), UINT32_C( 26030333), UINT32_C( 228627), UINT32_C(1200021710), UINT32_C( 186204)), simde_x_mm256_set_epu32(UINT32_C( 75), UINT32_C( 3462), UINT32_C( 315), UINT32_C( 60), UINT32_C( 118), UINT32_C( 3512), UINT32_C( 1), UINT32_C( 9013)) }, { simde_x_mm256_set_epu32(UINT32_C(3334078645), UINT32_C(2226952893), UINT32_C(1901933944), UINT32_C(3456551705), UINT32_C(3394846076), UINT32_C(2592342753), UINT32_C(1822000161), UINT32_C(3060682219)), simde_x_mm256_set_epu32(UINT32_C( 55529), UINT32_C( 95077), UINT32_C( 61849330), UINT32_C( 77269), UINT32_C( 181901), UINT32_C( 66287), UINT32_C( 46407), UINT32_C( 1962)), simde_x_mm256_set_epu32(UINT32_C( 60042), UINT32_C( 23422), UINT32_C( 30), UINT32_C( 44734), UINT32_C( 18663), UINT32_C( 39107), UINT32_C( 39261), UINT32_C( 1559980)) }, { simde_x_mm256_set_epu32(UINT32_C(2418478797), UINT32_C(3856569345), UINT32_C(2562700829), UINT32_C(2670510577), UINT32_C(3958231909), UINT32_C(3386864730), UINT32_C(2249491002), UINT32_C( 367242130)), simde_x_mm256_set_epu32(UINT32_C( 106591767), UINT32_C( 591565864), UINT32_C( 241208), UINT32_C( 384474), UINT32_C( 63569588), UINT32_C(1007016971), UINT32_C( 701090048), UINT32_C( 4482965)), simde_x_mm256_set_epu32(UINT32_C( 22), UINT32_C( 6), UINT32_C( 10624), UINT32_C( 6945), UINT32_C( 62), UINT32_C( 3), UINT32_C( 3), UINT32_C( 81)) }, { simde_x_mm256_set_epu32(UINT32_C(3497551851), UINT32_C(3538232808), UINT32_C(3581222707), UINT32_C(2092274030), UINT32_C(1202922035), UINT32_C(3381143079), UINT32_C(1645890362), UINT32_C(2497764821)), simde_x_mm256_set_epu32(UINT32_C( 7255461), UINT32_C( 387871), UINT32_C( 216379987), UINT32_C( 1108325), UINT32_C( 9779926), UINT32_C( 265173482), UINT32_C( 305369), UINT32_C(1628979148)), simde_x_mm256_set_epu32(UINT32_C( 482), UINT32_C( 9122), UINT32_C( 16), UINT32_C( 1887), UINT32_C( 122), UINT32_C( 12), UINT32_C( 5389), UINT32_C( 1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_div_epu32(test_vec[i].a, test_vec[i].b); simde_assert_m256i_u32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_div_epu64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_x_mm256_set_epu64x(UINT64_C(10385902570114433083), UINT64_C(14228451038995253976), UINT64_C( 3524803476344021799), UINT64_C( 9008088981795720991)), simde_x_mm256_set_epu64x(UINT64_C( 11435629647830), UINT64_C( 134705148152), UINT64_C( 1685), UINT64_C( 72468903699)), simde_x_mm256_set_epu64x(UINT64_C(18446744073708846728), UINT64_C(18446744073678236607), UINT64_C( 2091871499313959), UINT64_C( 124302818)) }, { simde_x_mm256_set_epu64x(UINT64_C( 2776707612149100363), UINT64_C(15446686956822865619), UINT64_C( 8116027459326381863), UINT64_C(10577862568627142107)), simde_x_mm256_set_epu64x(UINT64_C( 160900), UINT64_C( 876), UINT64_C( 6656645), UINT64_C( 198)), simde_x_mm256_set_epu64x(UINT64_C( 17257349982281), UINT64_C(18443319350973379601), UINT64_C( 1219236936824), UINT64_C(18407002247926307124)) }, { simde_x_mm256_set_epu64x(UINT64_C(17966513918331168112), UINT64_C(15404442576328540960), UINT64_C( 1544001744444053712), UINT64_C(12311626015854130554)), simde_x_mm256_set_epu64x(UINT64_C( 73453582701), UINT64_C( 2241703492778), UINT64_C( 149), UINT64_C( 1898802076338580)), simde_x_mm256_set_epu64x(UINT64_C(18446744073703013744), UINT64_C(18446744073708194478), UINT64_C( 10362427815060763), UINT64_C(18446744073709548385)) }, { simde_x_mm256_set_epu64x(UINT64_C( 4996618049503500636), UINT64_C( 3587306346705364576), UINT64_C( 1416661578746677042), UINT64_C(18012200189266188151)), simde_x_mm256_set_epu64x(UINT64_C( 9141117518131), UINT64_C( 259684114065326460), UINT64_C( 3735868918), UINT64_C( 13028085907926)), simde_x_mm256_set_epu64x(UINT64_C( 546609), UINT64_C( 13), UINT64_C( 379205376), UINT64_C(18446744073709518262)) }, { simde_x_mm256_set_epu64x(UINT64_C(17900245410321819662), UINT64_C( 86463307544105486), UINT64_C( 7004808110937624000), UINT64_C( 5352056724630121100)), simde_x_mm256_set_epu64x(UINT64_C( 574976069), UINT64_C( 26168849408611714), UINT64_C( 479458176), UINT64_C( 85883846687)), simde_x_mm256_set_epu64x(UINT64_C(18446744072759079601), UINT64_C( 3), UINT64_C( 14609841820), UINT64_C( 62317384)) }, { simde_x_mm256_set_epu64x(UINT64_C(18191047755947595201), UINT64_C(11274709867061747164), UINT64_C( 4957427800472277352), UINT64_C( 2636046644056480855)), simde_x_mm256_set_epu64x(UINT64_C( 455513034), UINT64_C( 4176708352330988763), UINT64_C( 255407), UINT64_C( 77468887445572755)), simde_x_mm256_set_epu64x(UINT64_C(18446744073148214621), UINT64_C(18446744073709551615), UINT64_C( 19409913590748), UINT64_C( 34)) }, { simde_x_mm256_set_epu64x(UINT64_C(17236629464649076584), UINT64_C( 6716520602983844465), UINT64_C(12794135593178656259), UINT64_C( 3865374743078695737)), simde_x_mm256_set_epu64x(UINT64_C( 13893724010244), UINT64_C( 1), UINT64_C( 142890905), UINT64_C( 135073488234)), simde_x_mm256_set_epu64x(UINT64_C(18446744073709464519), UINT64_C( 6716520602983844465), UINT64_C(18446744034150641408), UINT64_C( 28616827)) }, { simde_x_mm256_set_epu64x(UINT64_C( 3248934010021333275), UINT64_C( 8464322280604302303), UINT64_C(10783963704762759650), UINT64_C(14288989654597257942)), simde_x_mm256_set_epu64x(UINT64_C( 37187973814779), UINT64_C( 988730192), UINT64_C( 9409064941619), UINT64_C( 554649997)), simde_x_mm256_set_epu64x(UINT64_C( 87365), UINT64_C( 8560800862), UINT64_C(18446744073708737212), UINT64_C(18446744066213374853)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_div_epi64(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_div_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi8(INT8_C( 114), INT8_C( 89), INT8_C( 1), INT8_C( 122), INT8_C( 12), INT8_C( 107), INT8_C( 92), INT8_C(-102), INT8_C( -63), INT8_C( 120), INT8_C( 107), INT8_C( -43), INT8_C(-119), INT8_C( -10), INT8_C( 98), INT8_C( -26), INT8_C( 122), INT8_C( 1), INT8_C( -83), INT8_C( 43), INT8_C( 82), INT8_C( -59), INT8_C( -43), INT8_C( -10), INT8_C( 77), INT8_C( -22), INT8_C( -72), INT8_C( -94), INT8_C( 75), INT8_C( -23), INT8_C( -92), INT8_C( -69), INT8_C( 108), INT8_C( 26), INT8_C( 71), INT8_C( -21), INT8_C( 15), INT8_C( 107), INT8_C(-112), INT8_C( -22), INT8_C( -24), INT8_C( 35), INT8_C( 87), INT8_C( 75), INT8_C( 27), INT8_C( -73), INT8_C( 9), INT8_C( -72), INT8_C( 35), INT8_C( -9), INT8_C( -68), INT8_C( 73), INT8_C( -61), INT8_C( 118), INT8_C( 78), INT8_C( -20), INT8_C( -42), INT8_C( -19), INT8_C(-125), INT8_C( 51), INT8_C( -14), INT8_C( 17), INT8_C( -24), INT8_C( -72)), simde_mm512_set_epi8(INT8_C( 14), INT8_C(-123), INT8_C( 73), INT8_C( -6), INT8_C( -78), INT8_C( -38), INT8_C( -82), INT8_C( -80), INT8_C( 31), INT8_C( -9), INT8_C( 35), INT8_C(-110), INT8_C( -7), INT8_C( 74), INT8_C( -30), INT8_C( 100), INT8_C( 10), INT8_C( 23), INT8_C( -11), INT8_C( 90), INT8_C( 71), INT8_C(-126), INT8_C( -11), INT8_C( -5), INT8_C( 26), INT8_C( 58), INT8_C(-123), INT8_C( 125), INT8_C(-104), INT8_C( 39), INT8_C( 75), INT8_C( 69), INT8_C( 5), INT8_C(-119), INT8_C( 20), INT8_C( 6), INT8_C( -18), INT8_C( -87), INT8_C( 95), INT8_C( 24), INT8_C( 15), INT8_C( -48), INT8_C( -40), INT8_C( 79), INT8_C(-107), INT8_C( -73), INT8_C(-108), INT8_C( -43), INT8_C( 53), INT8_C( -95), INT8_C( 75), INT8_C(-123), INT8_C( 61), INT8_C( 28), INT8_C( 20), INT8_C( -5), INT8_C(-127), INT8_C( -90), INT8_C( 94), INT8_C( -61), INT8_C( 91), INT8_C( -70), INT8_C(-111), INT8_C( 30)), simde_mm512_set_epi8(INT8_C( 8), INT8_C( 0), INT8_C( 0), INT8_C( -20), INT8_C( 0), INT8_C( -2), INT8_C( -1), INT8_C( 1), INT8_C( -2), INT8_C( -13), INT8_C( 3), INT8_C( 0), INT8_C( 17), INT8_C( 0), INT8_C( -3), INT8_C( 0), INT8_C( 12), INT8_C( 0), INT8_C( 7), INT8_C( 0), INT8_C( 1), INT8_C( 0), INT8_C( 3), INT8_C( 2), INT8_C( 2), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 21), INT8_C( 0), INT8_C( 3), INT8_C( -3), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -2), INT8_C( 0), INT8_C( 0), INT8_C( 1), INT8_C( 0), INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 4), INT8_C( 3), INT8_C( 4), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -2)) }, { simde_mm512_set_epi8(INT8_C( 12), INT8_C( -52), INT8_C( -7), INT8_C( 17), INT8_C(-122), INT8_C( 53), INT8_C( -15), INT8_C(-121), INT8_C( -47), INT8_C(-109), INT8_C( -20), INT8_C( -5), INT8_C( -34), INT8_C( 6), INT8_C( 3), INT8_C( -49), INT8_C( 63), INT8_C( 48), INT8_C( -18), INT8_C( 117), INT8_C( -63), INT8_C( 63), INT8_C( 77), INT8_C( -90), INT8_C( -12), INT8_C( 83), INT8_C( 69), INT8_C( 113), INT8_C( 28), INT8_C( 104), INT8_C( -69), INT8_C( -69), INT8_C(-128), INT8_C( 96), INT8_C( 18), INT8_C( 9), INT8_C( 99), INT8_C(-100), INT8_C( -63), INT8_C( 74), INT8_C( -69), INT8_C( 22), INT8_C( 126), INT8_C( 62), INT8_C( 46), INT8_C( 88), INT8_C( 24), INT8_C( 21), INT8_C( 121), INT8_C( 64), INT8_C( 24), INT8_C(-125), INT8_C(-125), INT8_C( -56), INT8_C( -13), INT8_C( 51), INT8_C( 53), INT8_C( -41), INT8_C( -85), INT8_C(-121), INT8_C( -44), INT8_C( -43), INT8_C( -24), INT8_C( 102)), simde_mm512_set_epi8(INT8_C( 109), INT8_C(-119), INT8_C( 12), INT8_C( 72), INT8_C( -36), INT8_C(-115), INT8_C( 98), INT8_C(-110), INT8_C( 58), INT8_C( -6), INT8_C( -54), INT8_C( 39), INT8_C( -42), INT8_C( -8), INT8_C( -77), INT8_C( -22), INT8_C( -49), INT8_C( 4), INT8_C( 119), INT8_C( 82), INT8_C( 112), INT8_C( 3), INT8_C( 74), INT8_C( 94), INT8_C( -27), INT8_C( 90), INT8_C( 17), INT8_C( 13), INT8_C( 5), INT8_C( 89), INT8_C(-121), INT8_C( 56), INT8_C( 46), INT8_C( -66), INT8_C( 124), INT8_C( -23), INT8_C( 38), INT8_C( 53), INT8_C( 18), INT8_C( -68), INT8_C( -6), INT8_C( -62), INT8_C( -9), INT8_C( 11), INT8_C( -6), INT8_C( 56), INT8_C( -81), INT8_C( 41), INT8_C( 112), INT8_C( 58), INT8_C( -21), INT8_C( 108), INT8_C( 17), INT8_C( 40), INT8_C( 4), INT8_C( 80), INT8_C( 75), INT8_C( 35), INT8_C( 80), INT8_C( -85), INT8_C( 88), INT8_C( -11), INT8_C( 23), INT8_C( 51)), simde_mm512_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 3), INT8_C( 0), INT8_C( 0), INT8_C( 1), INT8_C( 0), INT8_C( 18), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 2), INT8_C( -1), INT8_C( 12), INT8_C( 0), INT8_C( 1), INT8_C( 0), INT8_C( 21), INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 4), INT8_C( 8), INT8_C( 5), INT8_C( 1), INT8_C( 0), INT8_C( -1), INT8_C( -2), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 2), INT8_C( -1), INT8_C( -3), INT8_C( -1), INT8_C( 11), INT8_C( 0), INT8_C( -14), INT8_C( 5), INT8_C( -7), INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 1), INT8_C( 1), INT8_C( -1), INT8_C( -1), INT8_C( -7), INT8_C( -1), INT8_C( -3), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 1), INT8_C( 0), INT8_C( 3), INT8_C( -1), INT8_C( 2)) }, { simde_mm512_set_epi8(INT8_C(-111), INT8_C( -3), INT8_C( 110), INT8_C( -96), INT8_C( 117), INT8_C( -29), INT8_C(-127), INT8_C( 101), INT8_C(-120), INT8_C( 11), INT8_C( 87), INT8_C( 17), INT8_C(-108), INT8_C( 87), INT8_C( 4), INT8_C( -21), INT8_C( 98), INT8_C( 2), INT8_C( -60), INT8_C( -28), INT8_C( 66), INT8_C(-109), INT8_C( 8), INT8_C( -58), INT8_C( 13), INT8_C( -66), INT8_C( -49), INT8_C( 93), INT8_C(-119), INT8_C( 58), INT8_C( 30), INT8_C( 10), INT8_C( -11), INT8_C( 78), INT8_C( 76), INT8_C( 108), INT8_C( -34), INT8_C( -94), INT8_C( -77), INT8_C(-122), INT8_C( 37), INT8_C( -32), INT8_C( -97), INT8_C( 121), INT8_C( -95), INT8_C( -80), INT8_C( -87), INT8_C( -89), INT8_C( -4), INT8_C( 115), INT8_C( -42), INT8_C( -55), INT8_C( 95), INT8_C( -63), INT8_C( 31), INT8_C( -74), INT8_C( -45), INT8_C( 119), INT8_C( 57), INT8_C( -52), INT8_C( -69), INT8_C(-123), INT8_C( 106), INT8_C( 119)), simde_mm512_set_epi8(INT8_C( -74), INT8_C( -32), INT8_C( 89), INT8_C( 50), INT8_C(-105), INT8_C( 85), INT8_C( -71), INT8_C( 105), INT8_C( -37), INT8_C( -78), INT8_C(-107), INT8_C( -67), INT8_C( 9), INT8_C( 2), INT8_C( 83), INT8_C( 67), INT8_C( 25), INT8_C(-103), INT8_C( -90), INT8_C( 30), INT8_C( 69), INT8_C(-127), INT8_C( 114), INT8_C( -99), INT8_C( -97), INT8_C( -52), INT8_C( 120), INT8_C( 78), INT8_C( 97), INT8_C( 124), INT8_C( 31), INT8_C( 72), INT8_C( -6), INT8_C( 19), INT8_C( -4), INT8_C( -65), INT8_C( 107), INT8_C( -15), INT8_C(-116), INT8_C( -13), INT8_C( 106), INT8_C( -71), INT8_C( -14), INT8_C( -87), INT8_C(-122), INT8_C( -59), INT8_C( -65), INT8_C( -58), INT8_C( -26), INT8_C( 55), INT8_C( 28), INT8_C( -31), INT8_C( -20), INT8_C( -40), INT8_C( -47), INT8_C( 58), INT8_C( -3), INT8_C( 67), INT8_C( -47), INT8_C( 93), INT8_C( -77), INT8_C( 21), INT8_C( 49), INT8_C( -54)), simde_mm512_set_epi8(INT8_C( 1), INT8_C( 0), INT8_C( 1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 1), INT8_C( 0), INT8_C( 3), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -12), INT8_C( 43), INT8_C( 0), INT8_C( 0), INT8_C( 3), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 1), INT8_C( 0), INT8_C( 1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 1), INT8_C( 4), INT8_C( -19), INT8_C( -1), INT8_C( 0), INT8_C( 6), INT8_C( 0), INT8_C( 9), INT8_C( 0), INT8_C( 0), INT8_C( 6), INT8_C( -1), INT8_C( 0), INT8_C( 1), INT8_C( 1), INT8_C( 1), INT8_C( 0), INT8_C( 2), INT8_C( -1), INT8_C( 1), INT8_C( -4), INT8_C( 1), INT8_C( 0), INT8_C( -1), INT8_C( 15), INT8_C( 1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -5), INT8_C( 2), INT8_C( -2)) }, { simde_mm512_set_epi8(INT8_C( -91), INT8_C( 110), INT8_C( 126), INT8_C( 44), INT8_C( 21), INT8_C( -84), INT8_C( 100), INT8_C( -15), INT8_C( -61), INT8_C( -53), INT8_C( 75), INT8_C( -30), INT8_C( -56), INT8_C( -86), INT8_C( 52), INT8_C( 108), INT8_C( 96), INT8_C( 6), INT8_C(-100), INT8_C(-109), INT8_C( -7), INT8_C( -22), INT8_C( 109), INT8_C( 124), INT8_C( 85), INT8_C( 53), INT8_C( -45), INT8_C( 122), INT8_C( 7), INT8_C( -21), INT8_C(-123), INT8_C( 4), INT8_C( 3), INT8_C( 94), INT8_C(-127), INT8_C( 73), INT8_C( 65), INT8_C( -69), INT8_C( -91), INT8_C(-115), INT8_C( 117), INT8_C(-104), INT8_C( 66), INT8_C( 79), INT8_C( -63), INT8_C(-115), INT8_C( -77), INT8_C( -89), INT8_C(-113), INT8_C( 34), INT8_C( 100), INT8_C( 96), INT8_C(-101), INT8_C( -34), INT8_C( 64), INT8_C( -59), INT8_C( -53), INT8_C( 87), INT8_C( 48), INT8_C( 95), INT8_C( -53), INT8_C( 61), INT8_C( 63), INT8_C( 106)), simde_mm512_set_epi8(INT8_C( -1), INT8_C( 95), INT8_C( 91), INT8_C( 117), INT8_C( 15), INT8_C( -50), INT8_C( -39), INT8_C( 74), INT8_C( 36), INT8_C( 100), INT8_C( -62), INT8_C(-111), INT8_C( 9), INT8_C( 41), INT8_C( 36), INT8_C( -21), INT8_C( 71), INT8_C( -85), INT8_C( 120), INT8_C( -33), INT8_C( 125), INT8_C( 38), INT8_C(-127), INT8_C( 39), INT8_C( 28), INT8_C(-118), INT8_C( 31), INT8_C( 92), INT8_C( 22), INT8_C( 48), INT8_C( 122), INT8_C( -6), INT8_C( 107), INT8_C(-101), INT8_C( 14), INT8_C( -17), INT8_C( 26), INT8_C( -4), INT8_C( -71), INT8_C( 13), INT8_C( -39), INT8_C( -26), INT8_C( -37), INT8_C( 110), INT8_C( 36), INT8_C( 78), INT8_C( -24), INT8_C( -52), INT8_C(-117), INT8_C( -27), INT8_C( 113), INT8_C(-111), INT8_C( -59), INT8_C( 38), INT8_C( -10), INT8_C( -53), INT8_C( 110), INT8_C( 62), INT8_C( -4), INT8_C( 19), INT8_C( -15), INT8_C( 42), INT8_C( 122), INT8_C( 105)), simde_mm512_set_epi8(INT8_C( 91), INT8_C( 1), INT8_C( 1), INT8_C( 0), INT8_C( 1), INT8_C( 1), INT8_C( -2), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -6), INT8_C( -2), INT8_C( 1), INT8_C( -5), INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 3), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 3), INT8_C( 3), INT8_C( 0), INT8_C( -1), INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -9), INT8_C( -4), INT8_C( 2), INT8_C( 17), INT8_C( 1), INT8_C( -8), INT8_C( -3), INT8_C( 4), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 3), INT8_C( 1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 1), INT8_C( 0), INT8_C( -6), INT8_C( 1), INT8_C( 0), INT8_C( 1), INT8_C( -12), INT8_C( 5), INT8_C( 3), INT8_C( 1), INT8_C( 0), INT8_C( 1)) }, { simde_mm512_set_epi8(INT8_C( -55), INT8_C( -14), INT8_C( 9), INT8_C(-109), INT8_C( 77), INT8_C( -36), INT8_C( 82), INT8_C( -60), INT8_C( -11), INT8_C( 52), INT8_C( 95), INT8_C( 118), INT8_C( 124), INT8_C( 103), INT8_C( 108), INT8_C( 5), INT8_C( -7), INT8_C( 55), INT8_C( 1), INT8_C( -90), INT8_C( 89), INT8_C( 106), INT8_C( -80), INT8_C(-113), INT8_C( -97), INT8_C( 113), INT8_C( 100), INT8_C( 9), INT8_C( 122), INT8_C( -51), INT8_C(-121), INT8_C( 78), INT8_C(-100), INT8_C( 26), INT8_C( -23), INT8_C( -89), INT8_C( 20), INT8_C( 19), INT8_C( -91), INT8_C( -38), INT8_C( -59), INT8_C( 10), INT8_C(-121), INT8_C( -30), INT8_C( 79), INT8_C( 49), INT8_C( 104), INT8_C( 55), INT8_C( 2), INT8_C( -2), INT8_C( -24), INT8_C( -48), INT8_C( -25), INT8_C( -39), INT8_C( 89), INT8_C( 19), INT8_C( -33), INT8_C( 101), INT8_C( 31), INT8_C( -59), INT8_C(-123), INT8_C( 38), INT8_C( 124), INT8_C( 108)), simde_mm512_set_epi8(INT8_C( -47), INT8_C( -85), INT8_C( 13), INT8_C( -86), INT8_C( 92), INT8_C( 23), INT8_C( 69), INT8_C( -53), INT8_C( 11), INT8_C( -74), INT8_C( 93), INT8_C( 45), INT8_C( 123), INT8_C( -37), INT8_C( 6), INT8_C( -51), INT8_C( 52), INT8_C( -77), INT8_C( -79), INT8_C( -50), INT8_C( -32), INT8_C( 4), INT8_C( -47), INT8_C( -53), INT8_C( -18), INT8_C( -18), INT8_C( 115), INT8_C( 117), INT8_C( -67), INT8_C( -53), INT8_C( -72), INT8_C( 83), INT8_C( -37), INT8_C( 34), INT8_C( 127), INT8_C( -10), INT8_C( 126), INT8_C( -99), INT8_C(-106), INT8_C( 33), INT8_C( 106), INT8_C( -41), INT8_C( -43), INT8_C( -4), INT8_C(-104), INT8_C( 77), INT8_C(-107), INT8_C( -78), INT8_C( 126), INT8_C( 37), INT8_C(-124), INT8_C( -92), INT8_C( -30), INT8_C( -11), INT8_C( -49), INT8_C( 22), INT8_C( 41), INT8_C( 82), INT8_C( -75), INT8_C( 81), INT8_C( 39), INT8_C( -91), INT8_C( 65), INT8_C( -12)), simde_mm512_set_epi8(INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 1), INT8_C( 0), INT8_C( -1), INT8_C( 1), INT8_C( 1), INT8_C( -1), INT8_C( 0), INT8_C( 1), INT8_C( 2), INT8_C( 1), INT8_C( -2), INT8_C( 18), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 1), INT8_C( -2), INT8_C( 26), INT8_C( 1), INT8_C( 2), INT8_C( 5), INT8_C( -6), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 1), INT8_C( 0), INT8_C( 2), INT8_C( 0), INT8_C( 0), INT8_C( 8), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 2), INT8_C( 7), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 3), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( -3), INT8_C( 0), INT8_C( 1), INT8_C( -9)) }, { simde_mm512_set_epi8(INT8_C( 101), INT8_C( 62), INT8_C( -23), INT8_C( 48), INT8_C( 118), INT8_C( 51), INT8_C( -2), INT8_C(-103), INT8_C( 110), INT8_C( -27), INT8_C( 109), INT8_C( 60), INT8_C( 81), INT8_C( 82), INT8_C( 61), INT8_C( -96), INT8_C( -57), INT8_C( 116), INT8_C( -5), INT8_C( 0), INT8_C( 28), INT8_C( 71), INT8_C( -24), INT8_C( 46), INT8_C( -73), INT8_C( 2), INT8_C( -88), INT8_C( 76), INT8_C( 95), INT8_C( -58), INT8_C( 94), INT8_C( 46), INT8_C( 20), INT8_C( 112), INT8_C( -69), INT8_C( 111), INT8_C( -44), INT8_C( -74), INT8_C( -18), INT8_C( 53), INT8_C( 127), INT8_C( 36), INT8_C( 79), INT8_C( -48), INT8_C( 114), INT8_C( 84), INT8_C( 65), INT8_C(-112), INT8_C(-112), INT8_C( 23), INT8_C( 37), INT8_C( 63), INT8_C( -88), INT8_C( -57), INT8_C( 100), INT8_C( 121), INT8_C( 97), INT8_C( 122), INT8_C( 12), INT8_C( -79), INT8_C( 47), INT8_C( 60), INT8_C( -36), INT8_C( -83)), simde_mm512_set_epi8(INT8_C( -6), INT8_C( 53), INT8_C( 88), INT8_C( -36), INT8_C( 96), INT8_C( 32), INT8_C( 77), INT8_C( 2), INT8_C( -8), INT8_C( -42), INT8_C( -69), INT8_C( 40), INT8_C( -69), INT8_C( 97), INT8_C( 30), INT8_C( 102), INT8_C( -84), INT8_C( -54), INT8_C(-126), INT8_C( 91), INT8_C( 69), INT8_C( 35), INT8_C( 100), INT8_C(-118), INT8_C( -93), INT8_C( 108), INT8_C( 21), INT8_C( -16), INT8_C( 32), INT8_C( 106), INT8_C( -36), INT8_C( -46), INT8_C( -28), INT8_C( -81), INT8_C( 80), INT8_C( 14), INT8_C( -78), INT8_C( 3), INT8_C( 82), INT8_C(-104), INT8_C( 13), INT8_C( -56), INT8_C(-106), INT8_C( 89), INT8_C( -24), INT8_C( 42), INT8_C( 41), INT8_C( 68), INT8_C( -88), INT8_C(-107), INT8_C( -36), INT8_C( 52), INT8_C( 32), INT8_C( -59), INT8_C( -33), INT8_C( 120), INT8_C( 47), INT8_C(-127), INT8_C( 64), INT8_C( 114), INT8_C( 107), INT8_C( -75), INT8_C( 127), INT8_C( 23)), simde_mm512_set_epi8(INT8_C( -16), INT8_C( 1), INT8_C( 0), INT8_C( -1), INT8_C( 1), INT8_C( 1), INT8_C( 0), INT8_C( -51), INT8_C( -13), INT8_C( 0), INT8_C( -1), INT8_C( 1), INT8_C( -1), INT8_C( 0), INT8_C( 2), INT8_C( 0), INT8_C( 0), INT8_C( -2), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 2), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -4), INT8_C( -4), INT8_C( 2), INT8_C( 0), INT8_C( -2), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 7), INT8_C( 0), INT8_C( -24), INT8_C( 0), INT8_C( 0), INT8_C( 9), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -4), INT8_C( 2), INT8_C( 1), INT8_C( -1), INT8_C( 1), INT8_C( 0), INT8_C( -1), INT8_C( 1), INT8_C( -2), INT8_C( 0), INT8_C( -3), INT8_C( 1), INT8_C( 2), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -3)) }, { simde_mm512_set_epi8(INT8_C( 106), INT8_C( -71), INT8_C( 61), INT8_C( 19), INT8_C( 29), INT8_C( 79), INT8_C( 45), INT8_C( 94), INT8_C(-112), INT8_C( 60), INT8_C( 2), INT8_C( 77), INT8_C( 30), INT8_C( -34), INT8_C( 102), INT8_C( 43), INT8_C( -87), INT8_C( 52), INT8_C(-104), INT8_C( -8), INT8_C(-103), INT8_C( 79), INT8_C( -22), INT8_C( 31), INT8_C( 11), INT8_C( 124), INT8_C( 70), INT8_C( -64), INT8_C( -91), INT8_C( 88), INT8_C( -70), INT8_C( -61), INT8_C( -84), INT8_C(-108), INT8_C( -57), INT8_C( 13), INT8_C( -58), INT8_C( -7), INT8_C( 39), INT8_C( 66), INT8_C( 50), INT8_C( -61), INT8_C( -9), INT8_C( -41), INT8_C( 25), INT8_C( -31), INT8_C( 64), INT8_C( 18), INT8_C( 73), INT8_C( 60), INT8_C( -53), INT8_C( 42), INT8_C( -1), INT8_C( 50), INT8_C( 95), INT8_C( 78), INT8_C( 39), INT8_C( -9), INT8_C(-121), INT8_C( -72), INT8_C( 48), INT8_C( 20), INT8_C( 76), INT8_C( -48)), simde_mm512_set_epi8(INT8_C( 12), INT8_C( 55), INT8_C(-111), INT8_C( -85), INT8_C( -94), INT8_C( -11), INT8_C( 57), INT8_C( 93), INT8_C( 32), INT8_C( 57), INT8_C( 61), INT8_C( -21), INT8_C(-102), INT8_C( 75), INT8_C( -15), INT8_C(-114), INT8_C( 26), INT8_C( 71), INT8_C(-127), INT8_C( -52), INT8_C( -57), INT8_C( -26), INT8_C( -36), INT8_C( -4), INT8_C( -7), INT8_C( 40), INT8_C( 60), INT8_C( 82), INT8_C( 6), INT8_C( -12), INT8_C( 52), INT8_C( -37), INT8_C( -96), INT8_C(-117), INT8_C( 104), INT8_C( -99), INT8_C( -1), INT8_C( 95), INT8_C( 81), INT8_C( -70), INT8_C( -22), INT8_C( -86), INT8_C( 114), INT8_C( -43), INT8_C(-120), INT8_C( 109), INT8_C( -86), INT8_C( -33), INT8_C( -23), INT8_C( 69), INT8_C( -80), INT8_C( 61), INT8_C( -35), INT8_C( 107), INT8_C( -31), INT8_C( 11), INT8_C( -45), INT8_C( 125), INT8_C( -53), INT8_C( -7), INT8_C( 88), INT8_C(-111), INT8_C( 86), INT8_C(-105)), simde_mm512_set_epi8(INT8_C( 8), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -7), INT8_C( 0), INT8_C( 1), INT8_C( -3), INT8_C( 1), INT8_C( 0), INT8_C( -3), INT8_C( 0), INT8_C( 0), INT8_C( -6), INT8_C( 0), INT8_C( -3), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 1), INT8_C( -3), INT8_C( 0), INT8_C( -7), INT8_C( -1), INT8_C( 3), INT8_C( 1), INT8_C( 0), INT8_C( -15), INT8_C( -7), INT8_C( -1), INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 58), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -2), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -3), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -3), INT8_C( 7), INT8_C( 0), INT8_C( 0), INT8_C( 2), INT8_C( 10), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }, { simde_mm512_set_epi8(INT8_C( 102), INT8_C( 35), INT8_C( 43), INT8_C( -33), INT8_C( -74), INT8_C( 81), INT8_C( 81), INT8_C( 115), INT8_C( -81), INT8_C( 72), INT8_C(-127), INT8_C( 118), INT8_C(-113), INT8_C( 106), INT8_C( 25), INT8_C( 84), INT8_C( -82), INT8_C( 58), INT8_C( 13), INT8_C( -38), INT8_C( -3), INT8_C( 104), INT8_C( 85), INT8_C(-112), INT8_C( -4), INT8_C( 52), INT8_C( -2), INT8_C( -64), INT8_C( -23), INT8_C( 5), INT8_C( 33), INT8_C( -11), INT8_C( 116), INT8_C( 110), INT8_C( 21), INT8_C( 84), INT8_C( 42), INT8_C( 77), INT8_C( 25), INT8_C( 68), INT8_C( 71), INT8_C( 60), INT8_C( -51), INT8_C( -46), INT8_C( -1), INT8_C( -12), INT8_C( 88), INT8_C( 19), INT8_C( -70), INT8_C( 27), INT8_C( -6), INT8_C( 61), INT8_C( -48), INT8_C( 119), INT8_C(-107), INT8_C(-115), INT8_C( 90), INT8_C( 64), INT8_C( 19), INT8_C( 64), INT8_C( -19), INT8_C( -7), INT8_C( 40), INT8_C( -68)), simde_mm512_set_epi8(INT8_C( 66), INT8_C( 58), INT8_C( 74), INT8_C( -51), INT8_C( -69), INT8_C( -59), INT8_C( 84), INT8_C( 27), INT8_C( 43), INT8_C( -40), INT8_C( -56), INT8_C( 125), INT8_C( 1), INT8_C( 92), INT8_C( -82), INT8_C( 49), INT8_C( -14), INT8_C( 14), INT8_C( 52), INT8_C( -25), INT8_C( 47), INT8_C( -55), INT8_C( -54), INT8_C( -50), INT8_C( -40), INT8_C(-118), INT8_C( 97), INT8_C( -86), INT8_C( 93), INT8_C( 116), INT8_C( -54), INT8_C(-127), INT8_C( 17), INT8_C( -57), INT8_C( -81), INT8_C( -49), INT8_C( 73), INT8_C( 79), INT8_C( -43), INT8_C( 61), INT8_C( -14), INT8_C( 18), INT8_C( 125), INT8_C( -11), INT8_C( -70), INT8_C( 81), INT8_C(-107), INT8_C( -13), INT8_C( -75), INT8_C( 46), INT8_C( 17), INT8_C( -39), INT8_C( -35), INT8_C( 57), INT8_C( -8), INT8_C( -62), INT8_C( -61), INT8_C( 118), INT8_C( -33), INT8_C( 116), INT8_C( -5), INT8_C( 120), INT8_C( 126), INT8_C( -48)), simde_mm512_set_epi8(INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 1), INT8_C( -1), INT8_C( 0), INT8_C( 4), INT8_C( -1), INT8_C( -1), INT8_C( 2), INT8_C( 0), INT8_C(-113), INT8_C( 1), INT8_C( 0), INT8_C( 1), INT8_C( 5), INT8_C( 4), INT8_C( 0), INT8_C( 1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 2), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 6), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 1), INT8_C( -5), INT8_C( 3), INT8_C( 0), INT8_C( 4), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 1), INT8_C( 2), INT8_C( 13), INT8_C( 1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 3), INT8_C( 0), INT8_C( 0), INT8_C( 1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_div_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_div_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi16(INT16_C(-20040), INT16_C( 8356), INT16_C(-32332), INT16_C( 10333), INT16_C( -5915), INT16_C( 26879), INT16_C( 2532), INT16_C( 21861), INT16_C(-27724), INT16_C(-13980), INT16_C(-30566), INT16_C(-12851), INT16_C( 30608), INT16_C( 27665), INT16_C( 548), INT16_C( 7224), INT16_C(-23312), INT16_C( -9410), INT16_C( 2838), INT16_C(-28448), INT16_C( 30003), INT16_C(-15914), INT16_C(-27549), INT16_C( 6027), INT16_C( 28687), INT16_C(-19881), INT16_C( 5735), INT16_C( 9519), INT16_C( -3746), INT16_C(-25453), INT16_C(-16345), INT16_C(-27291)), simde_mm512_set_epi16(INT16_C( 4335), INT16_C( -8694), INT16_C( 20589), INT16_C( -2761), INT16_C( -3216), INT16_C(-24783), INT16_C(-17777), INT16_C( -501), INT16_C( 25504), INT16_C( 26559), INT16_C( 27843), INT16_C( 31769), INT16_C(-18807), INT16_C( 5762), INT16_C(-26736), INT16_C( 14349), INT16_C(-15519), INT16_C( 4924), INT16_C(-19685), INT16_C( 31074), INT16_C(-20201), INT16_C( -4452), INT16_C( 11125), INT16_C( 19762), INT16_C(-31890), INT16_C(-20519), INT16_C(-27796), INT16_C( 4844), INT16_C( 1980), INT16_C(-25222), INT16_C(-27366), INT16_C( 20455)), simde_mm512_set_epi16(INT16_C( -4), INT16_C( 0), INT16_C( -1), INT16_C( -3), INT16_C( 1), INT16_C( -1), INT16_C( 0), INT16_C( -43), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 4), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 3), INT16_C( -2), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( -1), INT16_C( 1), INT16_C( 0), INT16_C( -1)) }, { simde_mm512_set_epi16(INT16_C( 30542), INT16_C(-21686), INT16_C(-12987), INT16_C(-10637), INT16_C( -1601), INT16_C(-28302), INT16_C( 15211), INT16_C(-14111), INT16_C( 25976), INT16_C( 21242), INT16_C(-23929), INT16_C(-19059), INT16_C(-25081), INT16_C( 5942), INT16_C(-21376), INT16_C( 4770), INT16_C( -1129), INT16_C(-19990), INT16_C( 26476), INT16_C(-29290), INT16_C(-16617), INT16_C(-24641), INT16_C( 13060), INT16_C(-26392), INT16_C(-31122), INT16_C( 1166), INT16_C(-13169), INT16_C( 10959), INT16_C( 3043), INT16_C(-24353), INT16_C(-25618), INT16_C( 3998)), simde_mm512_set_epi16(INT16_C( 8697), INT16_C( 4862), INT16_C(-26319), INT16_C(-11370), INT16_C( 4314), INT16_C(-16926), INT16_C( 26882), INT16_C( 8784), INT16_C(-23412), INT16_C( 6784), INT16_C( 27807), INT16_C( 29358), INT16_C( 28774), INT16_C( -1248), INT16_C( 14871), INT16_C( 4639), INT16_C( 17536), INT16_C( -3921), INT16_C(-31860), INT16_C( 18313), INT16_C( 13025), INT16_C(-15494), INT16_C( -6838), INT16_C(-31563), INT16_C( 10488), INT16_C( 29317), INT16_C( 5913), INT16_C( -5447), INT16_C( 11124), INT16_C(-18588), INT16_C(-20055), INT16_C( 31068)), simde_mm512_set_epi16(INT16_C( 3), INT16_C( -4), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 3), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -4), INT16_C( -1), INT16_C( 1), INT16_C( 0), INT16_C( 5), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 1), INT16_C( -1), INT16_C( 0), INT16_C( -2), INT16_C( 0), INT16_C( -2), INT16_C( -2), INT16_C( 0), INT16_C( 1), INT16_C( 1), INT16_C( 0)) }, { simde_mm512_set_epi16(INT16_C( 10506), INT16_C( 27276), INT16_C( 10689), INT16_C( 7669), INT16_C( -9146), INT16_C(-17193), INT16_C( 7411), INT16_C( 5177), INT16_C( 18940), INT16_C(-16405), INT16_C( 3246), INT16_C( 3104), INT16_C( -7140), INT16_C( 31568), INT16_C( -2399), INT16_C(-28909), INT16_C( 26564), INT16_C(-28507), INT16_C( 3797), INT16_C( -9359), INT16_C(-12946), INT16_C( 18074), INT16_C( -6465), INT16_C( 3679), INT16_C( 17483), INT16_C( -5905), INT16_C( 3591), INT16_C(-20227), INT16_C( -6079), INT16_C( -1639), INT16_C(-29076), INT16_C( 29393)), simde_mm512_set_epi16(INT16_C( 11630), INT16_C( 9206), INT16_C(-15696), INT16_C( 3180), INT16_C( 12868), INT16_C(-30976), INT16_C( -5774), INT16_C(-11992), INT16_C(-18085), INT16_C( 32470), INT16_C( 17470), INT16_C(-31399), INT16_C( 9368), INT16_C( 3571), INT16_C( 7161), INT16_C(-27278), INT16_C( 9802), INT16_C( 20270), INT16_C(-19501), INT16_C( 19621), INT16_C( 14613), INT16_C( -6394), INT16_C( -6716), INT16_C( -8239), INT16_C(-25839), INT16_C( 28062), INT16_C( -8851), INT16_C(-12431), INT16_C( -8955), INT16_C( -676), INT16_C( 10256), INT16_C( 15625)), simde_mm512_set_epi16(INT16_C( 0), INT16_C( 2), INT16_C( 0), INT16_C( 2), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 8), INT16_C( 0), INT16_C( 1), INT16_C( 2), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -2), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( 2), INT16_C( -2), INT16_C( 1)) }, { simde_mm512_set_epi16(INT16_C( 14453), INT16_C(-27323), INT16_C( 14069), INT16_C(-15038), INT16_C( 29890), INT16_C(-32496), INT16_C( -8033), INT16_C( 2034), INT16_C( 28252), INT16_C(-12993), INT16_C(-12172), INT16_C( 21268), INT16_C(-19693), INT16_C( -3590), INT16_C( -7723), INT16_C(-15496), INT16_C( -5494), INT16_C( 10297), INT16_C( 10325), INT16_C( 32003), INT16_C(-11357), INT16_C( 14609), INT16_C(-13537), INT16_C( 17128), INT16_C( 6812), INT16_C( 32194), INT16_C( 287), INT16_C( 5824), INT16_C( 13352), INT16_C(-19334), INT16_C( 8294), INT16_C(-20267)), simde_mm512_set_epi16(INT16_C(-10192), INT16_C(-26586), INT16_C( 32452), INT16_C( 4989), INT16_C(-13693), INT16_C(-13838), INT16_C( 2151), INT16_C( 31183), INT16_C(-12217), INT16_C( 28038), INT16_C( 27497), INT16_C(-25404), INT16_C(-25184), INT16_C(-12134), INT16_C( 25347), INT16_C( -5075), INT16_C( 19038), INT16_C( 9321), INT16_C(-20974), INT16_C( 22487), INT16_C( -3253), INT16_C(-14033), INT16_C( 24624), INT16_C( 14772), INT16_C( 16067), INT16_C(-16101), INT16_C( 12034), INT16_C( 11420), INT16_C(-30652), INT16_C(-30195), INT16_C(-10496), INT16_C( 32407)), simde_mm512_set_epi16(INT16_C( -1), INT16_C( 1), INT16_C( 0), INT16_C( -3), INT16_C( -2), INT16_C( 2), INT16_C( -3), INT16_C( 0), INT16_C( -2), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 3), INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( 1), INT16_C( 3), INT16_C( -1), INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }, { simde_mm512_set_epi16(INT16_C(-12762), INT16_C( -143), INT16_C( 24201), INT16_C( 27500), INT16_C(-21606), INT16_C(-10954), INT16_C( 30460), INT16_C( 28331), INT16_C(-22171), INT16_C(-30589), INT16_C( 16765), INT16_C(-17393), INT16_C( 31673), INT16_C( 13306), INT16_C( -8624), INT16_C( -3653), INT16_C(-23812), INT16_C( 2378), INT16_C( -6069), INT16_C( -8645), INT16_C( 9750), INT16_C( 6252), INT16_C(-30407), INT16_C(-28082), INT16_C(-14686), INT16_C( -5840), INT16_C( 24502), INT16_C( 12329), INT16_C( -5959), INT16_C(-16932), INT16_C( -4867), INT16_C( 10388)), simde_mm512_set_epi16(INT16_C(-30203), INT16_C(-31292), INT16_C( 7054), INT16_C( 31766), INT16_C(-23643), INT16_C( -7634), INT16_C( 23958), INT16_C(-19164), INT16_C( 32358), INT16_C( 32485), INT16_C( -8137), INT16_C( 2854), INT16_C( 443), INT16_C( 3757), INT16_C(-31602), INT16_C( 26770), INT16_C( 1434), INT16_C(-26880), INT16_C(-13137), INT16_C(-25600), INT16_C( 3310), INT16_C( 31739), INT16_C( 22782), INT16_C( 27721), INT16_C(-28215), INT16_C( 10286), INT16_C( 11994), INT16_C(-23317), INT16_C(-11843), INT16_C( 6466), INT16_C( 8900), INT16_C( 11867)), simde_mm512_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 3), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -2), INT16_C( -6), INT16_C( 71), INT16_C( 3), INT16_C( 0), INT16_C( 0), INT16_C( -16), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 2), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 2), INT16_C( 0), INT16_C( 0), INT16_C( -2), INT16_C( 0), INT16_C( 0)) }, { simde_mm512_set_epi16(INT16_C(-29408), INT16_C( 7369), INT16_C( -5051), INT16_C( 7942), INT16_C( 18019), INT16_C(-25065), INT16_C( -8302), INT16_C( 17011), INT16_C( 2762), INT16_C( 27559), INT16_C( 18647), INT16_C( 22035), INT16_C(-10618), INT16_C( -3223), INT16_C( 25352), INT16_C(-32696), INT16_C( -1859), INT16_C(-20090), INT16_C( 18297), INT16_C(-27701), INT16_C(-31478), INT16_C(-13300), INT16_C(-15493), INT16_C(-16792), INT16_C(-23954), INT16_C(-14239), INT16_C(-15716), INT16_C( 12103), INT16_C(-30330), INT16_C( -2111), INT16_C(-26781), INT16_C( 25851)), simde_mm512_set_epi16(INT16_C( 11252), INT16_C(-25669), INT16_C(-31001), INT16_C( 13518), INT16_C( 30845), INT16_C(-14200), INT16_C(-30880), INT16_C( 22795), INT16_C(-15552), INT16_C( -1554), INT16_C( 29162), INT16_C( -8371), INT16_C( 5731), INT16_C( 22086), INT16_C( 7870), INT16_C(-26229), INT16_C( 19406), INT16_C(-22832), INT16_C(-14386), INT16_C( 22375), INT16_C( -8274), INT16_C( -9174), INT16_C(-24184), INT16_C( 24847), INT16_C( 26808), INT16_C( -2235), INT16_C( 4293), INT16_C(-30072), INT16_C( 23713), INT16_C( 20910), INT16_C( 6378), INT16_C(-18450)), simde_mm512_set_epi16(INT16_C( -2), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -17), INT16_C( 0), INT16_C( -2), INT16_C( -1), INT16_C( 0), INT16_C( 3), INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 3), INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 6), INT16_C( -3), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -4), INT16_C( -1)) }, { simde_mm512_set_epi16(INT16_C( -8644), INT16_C( 4438), INT16_C( 1025), INT16_C(-26642), INT16_C( 18378), INT16_C(-13976), INT16_C( 21110), INT16_C( 14955), INT16_C( 2525), INT16_C(-19773), INT16_C( 28133), INT16_C(-32693), INT16_C( 12259), INT16_C(-21141), INT16_C(-27294), INT16_C( 16198), INT16_C( -2640), INT16_C( 31144), INT16_C(-15827), INT16_C( 20747), INT16_C(-19791), INT16_C( 30374), INT16_C( -9055), INT16_C(-20334), INT16_C( 28339), INT16_C( 29800), INT16_C( 32312), INT16_C(-19316), INT16_C(-15043), INT16_C(-27434), INT16_C( 29424), INT16_C(-25521)), simde_mm512_set_epi16(INT16_C(-24272), INT16_C( -9025), INT16_C(-17538), INT16_C(-13789), INT16_C( 3646), INT16_C( 17578), INT16_C( -9614), INT16_C(-11054), INT16_C( 23757), INT16_C( -5736), INT16_C( 8067), INT16_C( 10531), INT16_C(-24488), INT16_C( 16639), INT16_C(-22179), INT16_C( -8704), INT16_C( -927), INT16_C(-31517), INT16_C( 10091), INT16_C( 19448), INT16_C( 12069), INT16_C( 8742), INT16_C( 16653), INT16_C( 31958), INT16_C(-18440), INT16_C(-30513), INT16_C( -3426), INT16_C( -7330), INT16_C( 24804), INT16_C( 18228), INT16_C( 16072), INT16_C(-15326)), simde_mm512_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 5), INT16_C( 0), INT16_C( -2), INT16_C( -1), INT16_C( 0), INT16_C( 3), INT16_C( 3), INT16_C( -3), INT16_C( 0), INT16_C( -1), INT16_C( 1), INT16_C( -1), INT16_C( 2), INT16_C( 0), INT16_C( -1), INT16_C( 1), INT16_C( -1), INT16_C( 3), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -9), INT16_C( 2), INT16_C( 0), INT16_C( -1), INT16_C( 1), INT16_C( 1)) }, { simde_mm512_set_epi16(INT16_C( 23232), INT16_C(-29257), INT16_C( 1254), INT16_C( -9317), INT16_C(-20336), INT16_C( 10081), INT16_C( 18681), INT16_C( 12677), INT16_C( 17973), INT16_C(-10276), INT16_C(-23503), INT16_C( 18772), INT16_C( 8312), INT16_C( 15138), INT16_C( -9415), INT16_C(-23183), INT16_C( 4065), INT16_C( 14928), INT16_C( -9505), INT16_C( -3213), INT16_C( -8135), INT16_C(-17864), INT16_C(-23451), INT16_C( -2372), INT16_C( 14548), INT16_C(-10992), INT16_C( 6282), INT16_C(-22066), INT16_C(-11858), INT16_C( 14867), INT16_C( -6173), INT16_C( 24146)), simde_mm512_set_epi16(INT16_C(-20244), INT16_C( 14874), INT16_C( 7829), INT16_C( 32218), INT16_C( 17818), INT16_C( 309), INT16_C( 27668), INT16_C( 9211), INT16_C( 15166), INT16_C( 4076), INT16_C( 28109), INT16_C(-30601), INT16_C( 4803), INT16_C(-19074), INT16_C(-23287), INT16_C(-27917), INT16_C( 7634), INT16_C(-13255), INT16_C( 14290), INT16_C( -8590), INT16_C(-11602), INT16_C( 9361), INT16_C(-18559), INT16_C( 3976), INT16_C( 20763), INT16_C( 17266), INT16_C( 8709), INT16_C(-30498), INT16_C( 31994), INT16_C(-17983), INT16_C( 25233), INT16_C( 29991)), simde_mm512_set_epi16(INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 32), INT16_C( 0), INT16_C( 1), INT16_C( 1), INT16_C( -2), INT16_C( 0), INT16_C( 0), INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_div_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m512i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_div_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C(-1425964510), INT32_C( 1884851068), INT32_C( -245085200), INT32_C( 312441627), INT32_C( 1361020823), INT32_C( -269027644), INT32_C( 2046290516), INT32_C( 253262419), INT32_C(-1435031175), INT32_C( -983397284), INT32_C( 1158205006), INT32_C( 2142968427), INT32_C( -610621785), INT32_C(-1874018384), INT32_C( 408084487), INT32_C( 314643093)), simde_mm512_set_epi32(INT32_C(-1816447538), INT32_C( 1352799684), INT32_C( 437452333), INT32_C(-2106809533), INT32_C( 850823800), INT32_C(-1580883911), INT32_C(-2115707304), INT32_C( 1577531711), INT32_C( 801246884), INT32_C( 59025302), INT32_C( 905783489), INT32_C(-1645941779), INT32_C( 962943312), INT32_C( 2128170875), INT32_C(-1348448230), INT32_C( -975134432)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 1), INT32_C( 0), INT32_C( 0), INT32_C( 1), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -16), INT32_C( 1), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_mm512_set_epi32(INT32_C( 1427225802), INT32_C(-1035302594), INT32_C( -199744603), INT32_C( 1376388625), INT32_C(-2114897409), INT32_C( 1679349706), INT32_C(-1031333846), INT32_C(-1198347443), INT32_C( -637748341), INT32_C( 1314591131), INT32_C( 282479090), INT32_C( 1660196054), INT32_C(-1167126507), INT32_C(-1998854068), INT32_C( 933881032), INT32_C( -624384653)), simde_mm512_set_epi32(INT32_C( 1612321322), INT32_C( 2051698478), INT32_C( 1596883036), INT32_C(-1369467325), INT32_C( 1851004364), INT32_C( 1092388812), INT32_C( 828772877), INT32_C( -259189725), INT32_C( -849691191), INT32_C(-1191458488), INT32_C( 801339023), INT32_C( -104328386), INT32_C( 757083857), INT32_C(-1236967236), INT32_C( -850146114), INT32_C( 1258625824)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( 1), INT32_C( -1), INT32_C( 4), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -15), INT32_C( -1), INT32_C( 1), INT32_C( -1), INT32_C( 0)) }, { simde_mm512_set_epi32(INT32_C( 237418199), INT32_C( -70579339), INT32_C(-2042257710), INT32_C( 1462546998), INT32_C( -202189538), INT32_C(-1353367648), INT32_C( 304511606), INT32_C( -539003093), INT32_C( 1923205305), INT32_C( 464427515), INT32_C( -694421636), INT32_C(-1729085762), INT32_C( 1377800186), INT32_C( -626233146), INT32_C(-2090091895), INT32_C( 1314335058)), simde_mm512_set_epi32(INT32_C( 38009422), INT32_C( -855531694), INT32_C( 1096529400), INT32_C( 740723389), INT32_C( -703601695), INT32_C(-1082310854), INT32_C( 120520136), INT32_C( 494300544), INT32_C(-1280011607), INT32_C(-1943894617), INT32_C( -321878744), INT32_C( -690430536), INT32_C( 1135419008), INT32_C( 1818004981), INT32_C( 1471877533), INT32_C( 559240384)), simde_mm512_set_epi32(INT32_C( 6), INT32_C( 0), INT32_C( -1), INT32_C( 1), INT32_C( 0), INT32_C( 1), INT32_C( 2), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( 2), INT32_C( 2), INT32_C( 1), INT32_C( 0), INT32_C( -1), INT32_C( 2)) }, { simde_mm512_set_epi32(INT32_C(-1724745069), INT32_C( 1135206576), INT32_C( 1179583658), INT32_C(-1966673560), INT32_C( 876279100), INT32_C( -587502732), INT32_C( -149418425), INT32_C( -921830900), INT32_C( 17215575), INT32_C(-1719497158), INT32_C(-1349196793), INT32_C( 1245762398), INT32_C( 813297065), INT32_C( -835921648), INT32_C(-1975778091), INT32_C( 2110087211)), simde_mm512_set_epi32(INT32_C(-1421142882), INT32_C( -720107087), INT32_C( -533473336), INT32_C(-1235553858), INT32_C( 1997884077), INT32_C(-1507361050), INT32_C( 21786729), INT32_C( 743816821), INT32_C( 150690827), INT32_C(-1210873139), INT32_C( 1036977320), INT32_C( -399295069), INT32_C(-1569884506), INT32_C( -616191901), INT32_C(-1839631465), INT32_C( -912247900)), simde_mm512_set_epi32(INT32_C( 1), INT32_C( -1), INT32_C( -2), INT32_C( 1), INT32_C( 0), INT32_C( 0), INT32_C( -6), INT32_C( -1), INT32_C( 0), INT32_C( 1), INT32_C( -1), INT32_C( -3), INT32_C( 0), INT32_C( 1), INT32_C( 1), INT32_C( -2)) }, { simde_mm512_set_epi32(INT32_C( -788754092), INT32_C( 1871593252), INT32_C(-1494005905), INT32_C(-1673341020), INT32_C( -802349852), INT32_C( 1483795222), INT32_C( -482009835), INT32_C( -91245467), INT32_C( 1580169915), INT32_C( 692091070), INT32_C( 1863695169), INT32_C( -863865867), INT32_C(-1394651654), INT32_C( -860864123), INT32_C( 684761994), INT32_C(-1721896503)), simde_mm512_set_epi32(INT32_C(-1337054377), INT32_C( 66234694), INT32_C(-1856118156), INT32_C(-1127800230), INT32_C( 814009506), INT32_C(-2034345199), INT32_C( 1765405247), INT32_C(-1048066647), INT32_C( -423083536), INT32_C(-1848382006), INT32_C( -152706477), INT32_C(-1375856509), INT32_C( -23675804), INT32_C( -242644348), INT32_C( 1836148713), INT32_C( -17324905)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 28), INT32_C( 0), INT32_C( 1), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -3), INT32_C( 0), INT32_C( -12), INT32_C( 0), INT32_C( 58), INT32_C( 3), INT32_C( 0), INT32_C( 99)) }, { simde_mm512_set_epi32(INT32_C( -463247298), INT32_C( -951467140), INT32_C( 1433027324), INT32_C(-1349535490), INT32_C( -916446608), INT32_C(-1679952824), INT32_C( 515026148), INT32_C( -79374441), INT32_C(-1055204414), INT32_C( 1214763982), INT32_C( -351626877), INT32_C( 427209663), INT32_C( 1651021910), INT32_C( -181051643), INT32_C(-1481830173), INT32_C( 1285378207)), simde_mm512_set_epi32(INT32_C( -895026020), INT32_C(-2124493776), INT32_C( -806312731), INT32_C( 721610054), INT32_C( 677519448), INT32_C( 1470235459), INT32_C(-2123699180), INT32_C( 883454038), INT32_C(-2020088518), INT32_C( -300465294), INT32_C( 1493254397), INT32_C( 2062995345), INT32_C( -10095941), INT32_C(-1400374264), INT32_C( 1068728589), INT32_C( 234142625)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( -4), INT32_C( 0), INT32_C( 0), INT32_C( -163), INT32_C( 0), INT32_C( -1), INT32_C( 5)) }, { simde_mm512_set_epi32(INT32_C( -939190848), INT32_C(-2083825761), INT32_C( 2014997186), INT32_C( 790185633), INT32_C(-1507225536), INT32_C( -384122450), INT32_C(-1588213257), INT32_C(-1040817544), INT32_C( 1965628193), INT32_C(-2067530457), INT32_C( 1204204418), INT32_C( -39160501), INT32_C( -605764870), INT32_C( 561973657), INT32_C( 1912174450), INT32_C( 1415728252)), simde_mm512_set_epi32(INT32_C( -927506034), INT32_C( 155586444), INT32_C( -406884871), INT32_C( -252994257), INT32_C( 1219028873), INT32_C(-1972688074), INT32_C( -597390303), INT32_C( 291669377), INT32_C( -695882735), INT32_C( 879590202), INT32_C( 1348714758), INT32_C( 1712617745), INT32_C( -236530514), INT32_C( 1880792230), INT32_C( 1810070042), INT32_C(-1599785869)), simde_mm512_set_epi32(INT32_C( 1), INT32_C( -13), INT32_C( -4), INT32_C( -3), INT32_C( -1), INT32_C( 0), INT32_C( 2), INT32_C( -3), INT32_C( -2), INT32_C( -2), INT32_C( 0), INT32_C( 0), INT32_C( 2), INT32_C( 0), INT32_C( 1), INT32_C( 0)) }, { simde_mm512_set_epi32(INT32_C(-1601700614), INT32_C( 1985924496), INT32_C( -342633815), INT32_C(-2007999861), INT32_C( 297828713), INT32_C( 1383645848), INT32_C(-2056044415), INT32_C( 373512753), INT32_C( -26545593), INT32_C( -328575199), INT32_C( -462276628), INT32_C( 1976153041), INT32_C( 1430984961), INT32_C(-1934079238), INT32_C( 399344654), INT32_C( 1569206763)), simde_mm512_set_epi32(INT32_C( 102595444), INT32_C( 731375272), INT32_C(-1673993680), INT32_C( -406822977), INT32_C( -578959028), INT32_C( 1173139127), INT32_C(-1295304556), INT32_C( 955166905), INT32_C( 270270084), INT32_C( 134608446), INT32_C( -519669996), INT32_C( -265658570), INT32_C(-1584344142), INT32_C( 1279036686), INT32_C(-1076842770), INT32_C( -44502324)), simde_mm512_set_epi32(INT32_C( -15), INT32_C( 2), INT32_C( 0), INT32_C( 4), INT32_C( 0), INT32_C( 1), INT32_C( 1), INT32_C( 0), INT32_C( 0), INT32_C( -2), INT32_C( 0), INT32_C( -7), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -35)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_div_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_div_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask16 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C( 691121094), INT32_C( 674034227), INT32_C(-1965434887), INT32_C( -920286947), INT32_C( -374673026), INT32_C(-1240805178), INT32_C( 1568850865), INT32_C(-1142977539), INT32_C(-1079516608), INT32_C( -708153743), INT32_C( 1508722402), INT32_C(-2074345640), INT32_C( 1747596798), INT32_C(-2063703989), INT32_C( 527472553), INT32_C(-1403096998)), UINT16_C(63371), simde_mm512_set_epi32(INT32_C( -341007878), INT32_C(-1764810870), INT32_C( 1179683687), INT32_C(-1646326602), INT32_C( -671967289), INT32_C(-1586327268), INT32_C( 1691051285), INT32_C( 50347892), INT32_C( 728425428), INT32_C( 1192263444), INT32_C(-2086343723), INT32_C( 1322777130), INT32_C( 163989560), INT32_C( 1492341726), INT32_C( 298608154), INT32_C( 1250819173)), simde_mm512_set_epi32(INT32_C(-1291033589), INT32_C( 1314482530), INT32_C(-1297250617), INT32_C( -739008036), INT32_C(-1419039999), INT32_C(-1004264650), INT32_C( 1580565751), INT32_C( -471064457), INT32_C( 2081361826), INT32_C( 493161721), INT32_C(-1195115819), INT32_C( 894221337), INT32_C(-1330460172), INT32_C( 492373082), INT32_C( -13096811), INT32_C(-2087181083)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 2), INT32_C( -374673026), INT32_C( 1), INT32_C( 1), INT32_C( 0), INT32_C( 0), INT32_C( -708153743), INT32_C( 1508722402), INT32_C(-2074345640), INT32_C( 0), INT32_C(-2063703989), INT32_C( -22), INT32_C( 0)) }, { simde_mm512_set_epi32(INT32_C( 1779168063), INT32_C(-1138893231), INT32_C( -687161637), INT32_C( 1828175063), INT32_C( -389420023), INT32_C( -193211433), INT32_C( -857989172), INT32_C( -448329300), INT32_C(-1601364212), INT32_C( 1710148738), INT32_C( 1974123080), INT32_C(-1424367196), INT32_C( 118588227), INT32_C( 542053192), INT32_C( 499863549), INT32_C( 957375358)), UINT16_C(36797), simde_mm512_set_epi32(INT32_C(-1153303869), INT32_C( 562234020), INT32_C( 1763100483), INT32_C( -518004559), INT32_C(-1450358898), INT32_C(-1409866198), INT32_C( 269910347), INT32_C( 433971495), INT32_C( 1441956227), INT32_C( 1018271575), INT32_C( 1734496959), INT32_C( 380846712), INT32_C( -941967689), INT32_C( -739443621), INT32_C( 1995198557), INT32_C( -980655097)), simde_mm512_set_epi32(INT32_C(-2088961787), INT32_C( 1943141679), INT32_C( -665465241), INT32_C( -342195833), INT32_C( 2102184556), INT32_C( 877111492), INT32_C( 1183491905), INT32_C( -576610979), INT32_C(-1061316197), INT32_C( -808097400), INT32_C( -362876916), INT32_C(-1845390533), INT32_C( -48621016), INT32_C( 201516689), INT32_C(-1435930720), INT32_C(-1932876068)), simde_mm512_set_epi32(INT32_C( 0), INT32_C(-1138893231), INT32_C( -687161637), INT32_C( 1828175063), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( 1710148738), INT32_C( -4), INT32_C( 0), INT32_C( 19), INT32_C( -3), INT32_C( 499863549), INT32_C( 0)) }, { simde_mm512_set_epi32(INT32_C( -179829877), INT32_C( 651362699), INT32_C( 495870887), INT32_C( -382126427), INT32_C( 915244711), INT32_C( 5081424), INT32_C( 1422501384), INT32_C( -163979724), INT32_C(-1516900265), INT32_C( 497965579), INT32_C( 910061584), INT32_C( 2002226944), INT32_C( -621963189), INT32_C( -48343218), INT32_C( 523093293), INT32_C(-1235205724)), UINT16_C(46902), simde_mm512_set_epi32(INT32_C( -220620904), INT32_C( 1398655610), INT32_C( 1722520923), INT32_C( 1206471293), INT32_C( 1374915518), INT32_C( 531653117), INT32_C( 2075187308), INT32_C( -144618549), INT32_C(-2131865715), INT32_C( 1444783055), INT32_C( 1878625233), INT32_C( 1755684145), INT32_C(-2061726371), INT32_C(-1050443653), INT32_C(-1299940555), INT32_C(-2116696545)), simde_mm512_set_epi32(INT32_C(-1106093489), INT32_C( 1982658188), INT32_C( 863153207), INT32_C(-1637276628), INT32_C( 448681074), INT32_C( 1334667053), INT32_C( 502667641), INT32_C( 855395764), INT32_C(-1672092948), INT32_C( 808531712), INT32_C( 454488139), INT32_C( 123547093), INT32_C( 483090439), INT32_C(-1126329757), INT32_C(-1201220189), INT32_C( -136050629)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 651362699), INT32_C( 1), INT32_C( 0), INT32_C( 915244711), INT32_C( 0), INT32_C( 4), INT32_C( 0), INT32_C(-1516900265), INT32_C( 497965579), INT32_C( 4), INT32_C( 14), INT32_C( -621963189), INT32_C( 0), INT32_C( 1), INT32_C(-1235205724)) }, { simde_mm512_set_epi32(INT32_C( 2113970745), INT32_C( -182128842), INT32_C( 564512596), INT32_C( 604721400), INT32_C( 1471174399), INT32_C(-1803940708), INT32_C(-1765392929), INT32_C( 298473775), INT32_C(-1404600737), INT32_C(-1231334921), INT32_C( -238983338), INT32_C( -145797796), INT32_C( -181019162), INT32_C(-1910480170), INT32_C(-1860760170), INT32_C( -371855625)), UINT16_C(38914), simde_mm512_set_epi32(INT32_C( 1533151625), INT32_C( 2122196136), INT32_C( 1690360675), INT32_C( 1484935627), INT32_C( 1463758672), INT32_C( 602211615), INT32_C( -464964305), INT32_C(-1430226195), INT32_C( 797104998), INT32_C(-1557543977), INT32_C( -952737410), INT32_C( 178625368), INT32_C(-1203806300), INT32_C( 1095216728), INT32_C(-1215405554), INT32_C( 430790402)), simde_mm512_set_epi32(INT32_C( -251141702), INT32_C( 1274901810), INT32_C( 413860084), INT32_C( 550494320), INT32_C( 1997049765), INT32_C( 505563651), INT32_C( 463125220), INT32_C( -451213519), INT32_C(-1948793453), INT32_C(-2137102362), INT32_C(-1703809327), INT32_C( 389679318), INT32_C( -355192167), INT32_C(-1801602389), INT32_C( 2006619059), INT32_C( -903558132)), simde_mm512_set_epi32(INT32_C( -6), INT32_C( -182128842), INT32_C( 564512596), INT32_C( 2), INT32_C( 0), INT32_C(-1803940708), INT32_C(-1765392929), INT32_C( 298473775), INT32_C(-1404600737), INT32_C(-1231334921), INT32_C( -238983338), INT32_C( -145797796), INT32_C( -181019162), INT32_C(-1910480170), INT32_C( 0), INT32_C( -371855625)) }, { simde_mm512_set_epi32(INT32_C( 1572579389), INT32_C( -783078337), INT32_C(-1895621282), INT32_C( 1967093325), INT32_C( 908815803), INT32_C(-1975591270), INT32_C( 2065037155), INT32_C( 623932649), INT32_C( 1610322797), INT32_C( -842122991), INT32_C( 2031682359), INT32_C(-1300130353), INT32_C(-1950048210), INT32_C( 238137788), INT32_C( 1978166020), INT32_C( 76768592)), UINT16_C( 883), simde_mm512_set_epi32(INT32_C(-1010119490), INT32_C( -410070063), INT32_C( 2094036024), INT32_C(-1838133114), INT32_C( 69201629), INT32_C( 1228958503), INT32_C( -775379327), INT32_C(-1485462767), INT32_C(-1179177847), INT32_C( 1767270276), INT32_C( 490610321), INT32_C( 1164436618), INT32_C(-1920297499), INT32_C( -690964678), INT32_C( -880248267), INT32_C(-2005634277)), simde_mm512_set_epi32(INT32_C(-1911659531), INT32_C( 143428987), INT32_C( -610024215), INT32_C( 582607980), INT32_C( 1609326889), INT32_C( 1245407235), INT32_C( -119962198), INT32_C(-1932052969), INT32_C(-1370414254), INT32_C(-1925960308), INT32_C( 2119408419), INT32_C(-1203088886), INT32_C( -316530353), INT32_C( 1708684203), INT32_C( 1202455481), INT32_C(-2107221827)), simde_mm512_set_epi32(INT32_C( 1572579389), INT32_C( -783078337), INT32_C(-1895621282), INT32_C( 1967093325), INT32_C( 908815803), INT32_C(-1975591270), INT32_C( 6), INT32_C( 0), INT32_C( 1610322797), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C(-1950048210), INT32_C( 238137788), INT32_C( 0), INT32_C( 0)) }, { simde_mm512_set_epi32(INT32_C( 2117071873), INT32_C(-1437889529), INT32_C( -376074104), INT32_C( 1087893388), INT32_C( -443183285), INT32_C( -380695552), INT32_C( 565328458), INT32_C( -93024748), INT32_C( 1480532604), INT32_C( -97460760), INT32_C( -582247600), INT32_C( -374749470), INT32_C( 1394313506), INT32_C( 394553965), INT32_C(-2016714120), INT32_C( 1697927724)), UINT16_C(12254), simde_mm512_set_epi32(INT32_C( 56443211), INT32_C(-2036514643), INT32_C( -510270824), INT32_C( 1139427205), INT32_C( 1090384090), INT32_C(-1905231405), INT32_C(-2079359983), INT32_C( -477294891), INT32_C( -673197028), INT32_C( 2071747620), INT32_C( -442789099), INT32_C( -601334711), INT32_C( 319530416), INT32_C(-2115012481), INT32_C( -501730903), INT32_C( 340519338)), simde_mm512_set_epi32(INT32_C( 1219537084), INT32_C( 1349635715), INT32_C( 732887738), INT32_C(-1728641921), INT32_C(-1388433411), INT32_C( 1765754685), INT32_C(-1574983663), INT32_C( 846129112), INT32_C( 1578410935), INT32_C(-1659872458), INT32_C( 1045536663), INT32_C( 957117985), INT32_C(-1265958651), INT32_C( 1309498779), INT32_C(-1001015299), INT32_C( 1022360677)), simde_mm512_set_epi32(INT32_C( 2117071873), INT32_C(-1437889529), INT32_C( 0), INT32_C( 1087893388), INT32_C( 0), INT32_C( -1), INT32_C( 1), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( -582247600), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( 1697927724)) }, { simde_mm512_set_epi32(INT32_C( -304885978), INT32_C( 991545752), INT32_C( -143034937), INT32_C( 843112042), INT32_C( -227554783), INT32_C( 2124182542), INT32_C(-1526246088), INT32_C(-1991977382), INT32_C( 1224533822), INT32_C( -819361196), INT32_C( -684010252), INT32_C(-1738921185), INT32_C(-1259570772), INT32_C( -691865929), INT32_C( -973523371), INT32_C( 45581573)), UINT16_C(42669), simde_mm512_set_epi32(INT32_C( -156799603), INT32_C(-1073012339), INT32_C(-2130532125), INT32_C( 397240391), INT32_C( 200936922), INT32_C(-1030980309), INT32_C(-1758363174), INT32_C( -665586367), INT32_C( 453331046), INT32_C( 1704580573), INT32_C( 1606190487), INT32_C(-1085658047), INT32_C(-1335469644), INT32_C( -368070561), INT32_C(-1419559633), INT32_C( 2069966669)), simde_mm512_set_epi32(INT32_C( 1379668640), INT32_C( 66581512), INT32_C( -557301797), INT32_C( 304428974), INT32_C(-1608262788), INT32_C( 532978979), INT32_C( 946958552), INT32_C(-1911324669), INT32_C(-2118093156), INT32_C( 283691898), INT32_C( -446072631), INT32_C( -458781294), INT32_C( 1951055651), INT32_C( 765387914), INT32_C( 822559116), INT32_C( 7445617)), simde_mm512_set_epi32(INT32_C( 0), INT32_C( 991545752), INT32_C( 3), INT32_C( 843112042), INT32_C( -227554783), INT32_C( -1), INT32_C( -1), INT32_C(-1991977382), INT32_C( 0), INT32_C( -819361196), INT32_C( -3), INT32_C(-1738921185), INT32_C( 0), INT32_C( 0), INT32_C( -973523371), INT32_C( 278)) }, { simde_mm512_set_epi32(INT32_C(-1981938926), INT32_C( 869237081), INT32_C( -190053534), INT32_C(-1469275330), INT32_C( -717100794), INT32_C(-1303072888), INT32_C(-2122918671), INT32_C( 1617119933), INT32_C( 1521363431), INT32_C( 553638116), INT32_C( 1036201367), INT32_C(-1187933851), INT32_C( -412155886), INT32_C( -760582943), INT32_C( -423751457), INT32_C( 1273589632)), UINT16_C(35103), simde_mm512_set_epi32(INT32_C(-1836595644), INT32_C( 260676470), INT32_C( 1724614860), INT32_C( -144514633), INT32_C( -478630580), INT32_C(-2086755061), INT32_C( 932145867), INT32_C(-1862372735), INT32_C( 1756892633), INT32_C( 382632965), INT32_C( 1295078740), INT32_C( -995802034), INT32_C( 152308919), INT32_C( -351555508), INT32_C( 31813624), INT32_C( 807463845)), simde_mm512_set_epi32(INT32_C( 615301803), INT32_C( 382786341), INT32_C( 1852603705), INT32_C( 1998007730), INT32_C( 231325888), INT32_C( 1842039329), INT32_C( 968682756), INT32_C( 316335394), INT32_C(-2071382094), INT32_C( -803185337), INT32_C(-2126995500), INT32_C( 1587647099), INT32_C(-1328358584), INT32_C( 320339033), INT32_C( 282380179), INT32_C( -108102092)), simde_mm512_set_epi32(INT32_C( -2), INT32_C( 869237081), INT32_C( -190053534), INT32_C(-1469275330), INT32_C( -2), INT32_C(-1303072888), INT32_C(-2122918671), INT32_C( -5), INT32_C( 1521363431), INT32_C( 553638116), INT32_C( 1036201367), INT32_C( 0), INT32_C( 0), INT32_C( -1), INT32_C( 0), INT32_C( -7)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_div_epi32(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_div_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C(-7120494377185439159), INT64_C( 5095015079852768951), INT64_C( -719755322986504865), INT64_C( 1195398499335632561), INT64_C( 4232475372952240435), INT64_C(-1117570177728981140), INT64_C(-4721763859644106046), INT64_C( 6636524825657073074)), simde_mm512_set_epi64(INT64_C( 6283111750805844985), INT64_C(-7772496718970349305), INT64_C(-6967007030435791671), INT64_C( 2761331052478409707), INT64_C(-5439727342880208313), INT64_C(-6280010522852202514), INT64_C(-2361957704355445009), INT64_C(-3413538286934776973)), simde_mm512_set_epi64(INT64_C( -1), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 1), INT64_C( -1)) }, { simde_mm512_set_epi64(INT64_C( 7047516970419020428), INT64_C( 2488576598769637001), INT64_C( 4233591199077735008), INT64_C( 1735409980007662056), INT64_C(-2964306467966319268), INT64_C(-6472988581173317799), INT64_C( 1870256929123231698), INT64_C(-5453281473672019922)), simde_mm512_set_epi64(INT64_C(-6026337221937727695), INT64_C( 8654798725117969005), INT64_C( 743584473088107844), INT64_C( 5114866458456107677), INT64_C( 1917095392115883075), INT64_C( 8815346252210924017), INT64_C(-1666651333186431127), INT64_C( 4973081304470687258)), simde_mm512_set_epi64(INT64_C( -1), INT64_C( 0), INT64_C( 5), INT64_C( 0), INT64_C( -1), INT64_C( 0), INT64_C( -1), INT64_C( -1)) }, { simde_mm512_set_epi64(INT64_C(-1433819957247000466), INT64_C(-7270540428235491436), INT64_C( 3506767658669433751), INT64_C(-6269164040512613371), INT64_C(-2703740818469134807), INT64_C( 3442758576787517783), INT64_C(-4507715808807193748), INT64_C( 4997387685805642122)), simde_mm512_set_epi64(INT64_C(-3375611624029359751), INT64_C( 155579560497872257), INT64_C( 4346579001240147982), INT64_C( 8478054430600792515), INT64_C( 7917529543412977905), INT64_C( 6077094839460323156), INT64_C(-3234198817213444484), INT64_C( 5455426772165090925)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( -46), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 1), INT64_C( 0)) }, { simde_mm512_set_epi64(INT64_C( 5060007040297057440), INT64_C(-6547486212696877775), INT64_C( 4083773956347780040), INT64_C(-7582952476466356489), INT64_C( -533799245190218148), INT64_C( 6528011672062484486), INT64_C( 8505594160370567764), INT64_C(-7955306051941505966)), simde_mm512_set_epi64(INT64_C( 8381795236484256749), INT64_C(-8094121819208130597), INT64_C(-4463810942012697177), INT64_C( 1695569373680370472), INT64_C( 6457800057248167752), INT64_C( 2509734679188915375), INT64_C(-1817858424181439867), INT64_C(-1140679629593449988)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( -4), INT64_C( 0), INT64_C( 2), INT64_C( -4), INT64_C( 6)) }, { simde_mm512_set_epi64(INT64_C(-3727073512330556719), INT64_C( 1145199535931310009), INT64_C( 6618746106828964781), INT64_C( -318594899546127361), INT64_C(-8348228873903822999), INT64_C( 6522300981577637255), INT64_C(-2123306667443487570), INT64_C(-4210181406724347525)), simde_mm512_set_epi64(INT64_C(-5833250200550208329), INT64_C( 8217300129052611844), INT64_C( -649664904511148711), INT64_C( 3231016623164402124), INT64_C( 8024018119100712605), INT64_C( 4306653136982574157), INT64_C(-5380031023357226466), INT64_C( 2544237471105729967)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( -10), INT64_C( 0), INT64_C( -1), INT64_C( 1), INT64_C( 0), INT64_C( -1)) }, { simde_mm512_set_epi64(INT64_C(-6427790700478275098), INT64_C(-3168480089241839861), INT64_C(-5000559488767708993), INT64_C( 2755885615249137538), INT64_C( -821966059249139816), INT64_C( 1089871025732147351), INT64_C( 4566772594003817295), INT64_C(-9114574651084812253)), simde_mm512_set_epi64(INT64_C( 1778890864282373370), INT64_C( 5911759041868723302), INT64_C( 4553617065988887085), INT64_C( -523178035921802922), INT64_C( 8875040781716651384), INT64_C( 2040058868339841473), INT64_C(-2732208005963885166), INT64_C(-4435516374878659804)), simde_mm512_set_epi64(INT64_C( -3), INT64_C( 0), INT64_C( -1), INT64_C( -5), INT64_C( 0), INT64_C( 0), INT64_C( -1), INT64_C( 2)) }, { simde_mm512_set_epi64(INT64_C( 423237589908350744), INT64_C( 2795901596537384901), INT64_C( 1719109459006160254), INT64_C(-9093479824318774446), INT64_C(-4511267031708830231), INT64_C(-3402553166296368495), INT64_C( 1216620777318406949), INT64_C( -836102980820378689)), simde_mm512_set_epi64(INT64_C( 7782115963838117574), INT64_C(-6846698536887599933), INT64_C( 4072223690207540333), INT64_C(-1026965696159348843), INT64_C( 4340400659569160523), INT64_C(-8299269241811916492), INT64_C( 7360887374546597504), INT64_C(-6651085920823128052)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 8), INT64_C( -1), INT64_C( 0), INT64_C( 0), INT64_C( 0)) }, { simde_mm512_set_epi64(INT64_C( 453211281016332666), INT64_C( 5434252921191502101), INT64_C(-6060319301844209563), INT64_C(-5254139409542070482), INT64_C(-8624885551201065882), INT64_C( 8329149627836272144), INT64_C( 8516875663163240125), INT64_C(-4575460702098419673)), simde_mm512_set_epi64(INT64_C(-5051260979279221837), INT64_C( 6222948671724306809), INT64_C( 6742741209152957138), INT64_C( 5958951964162816685), INT64_C( 2981515940173974322), INT64_C( 3752367916961311345), INT64_C(-2840979297342041250), INT64_C(-2506264265844715430)), simde_mm512_set_epi64(INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( 0), INT64_C( -2), INT64_C( 2), INT64_C( -2), INT64_C( 1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_div_epi64(test_vec[i].a, test_vec[i].b); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_div_epu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_x_mm512_set_epu8(UINT8_C( 41), UINT8_C( 49), UINT8_C(171), UINT8_C(198), UINT8_C( 40), UINT8_C( 44), UINT8_C(242), UINT8_C( 51), UINT8_C(138), UINT8_C(217), UINT8_C(215), UINT8_C(249), UINT8_C(201), UINT8_C( 37), UINT8_C(137), UINT8_C( 29), UINT8_C(233), UINT8_C(170), UINT8_C(241), UINT8_C(126), UINT8_C(182), UINT8_C( 10), UINT8_C(208), UINT8_C(198), UINT8_C( 93), UINT8_C(130), UINT8_C(195), UINT8_C(177), UINT8_C(187), UINT8_C(223), UINT8_C(139), UINT8_C(253), UINT8_C(191), UINT8_C(167), UINT8_C(226), UINT8_C( 64), UINT8_C(213), UINT8_C(202), UINT8_C(110), UINT8_C(113), UINT8_C( 89), UINT8_C(237), UINT8_C( 70), UINT8_C(226), UINT8_C(132), UINT8_C( 91), UINT8_C(255), UINT8_C( 88), UINT8_C(104), UINT8_C( 42), UINT8_C( 53), UINT8_C(254), UINT8_C(132), UINT8_C(254), UINT8_C( 96), UINT8_C( 75), UINT8_C( 31), UINT8_C(112), UINT8_C(151), UINT8_C(169), UINT8_C(172), UINT8_C( 94), UINT8_C(112), UINT8_C( 90)), simde_x_mm512_set_epu8(UINT8_C(195), UINT8_C( 49), UINT8_C( 14), UINT8_C(170), UINT8_C(203), UINT8_C(167), UINT8_C( 3), UINT8_C(215), UINT8_C( 63), UINT8_C(248), UINT8_C( 55), UINT8_C(219), UINT8_C(221), UINT8_C(135), UINT8_C( 61), UINT8_C(191), UINT8_C(209), UINT8_C( 91), UINT8_C( 87), UINT8_C(137), UINT8_C( 87), UINT8_C( 76), UINT8_C( 44), UINT8_C(140), UINT8_C( 2), UINT8_C(200), UINT8_C( 36), UINT8_C(195), UINT8_C(200), UINT8_C(125), UINT8_C(254), UINT8_C(139), UINT8_C(226), UINT8_C( 71), UINT8_C( 92), UINT8_C(129), UINT8_C(182), UINT8_C(119), UINT8_C(247), UINT8_C( 34), UINT8_C(121), UINT8_C( 85), UINT8_C(153), UINT8_C(116), UINT8_C(218), UINT8_C( 21), UINT8_C(101), UINT8_C(122), UINT8_C( 10), UINT8_C(231), UINT8_C( 54), UINT8_C( 71), UINT8_C(156), UINT8_C(149), UINT8_C(244), UINT8_C( 84), UINT8_C(148), UINT8_C( 85), UINT8_C(170), UINT8_C(184), UINT8_C( 94), UINT8_C(154), UINT8_C(229), UINT8_C( 11)), simde_x_mm512_set_epu8(UINT8_C( 0), UINT8_C( 1), UINT8_C( 12), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 80), UINT8_C( 0), UINT8_C( 2), UINT8_C( 0), UINT8_C( 3), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 2), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 2), UINT8_C( 0), UINT8_C( 2), UINT8_C( 0), UINT8_C( 4), UINT8_C( 1), UINT8_C( 46), UINT8_C( 0), UINT8_C( 5), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 2), UINT8_C( 2), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 3), UINT8_C( 0), UINT8_C( 2), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 4), UINT8_C( 2), UINT8_C( 0), UINT8_C( 10), UINT8_C( 0), UINT8_C( 0), UINT8_C( 3), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 8)) }, { simde_x_mm512_set_epu8(UINT8_C(216), UINT8_C( 85), UINT8_C(206), UINT8_C(103), UINT8_C(235), UINT8_C(154), UINT8_C(129), UINT8_C(135), UINT8_C(125), UINT8_C( 76), UINT8_C(202), UINT8_C(108), UINT8_C( 52), UINT8_C( 71), UINT8_C(168), UINT8_C(196), UINT8_C( 70), UINT8_C(138), UINT8_C(167), UINT8_C( 65), UINT8_C(221), UINT8_C(161), UINT8_C(157), UINT8_C( 93), UINT8_C(192), UINT8_C(189), UINT8_C(153), UINT8_C(155), UINT8_C(207), UINT8_C(213), UINT8_C(105), UINT8_C(136), UINT8_C(234), UINT8_C( 94), UINT8_C(240), UINT8_C( 12), UINT8_C(146), UINT8_C( 1), UINT8_C(147), UINT8_C( 59), UINT8_C(253), UINT8_C( 26), UINT8_C( 26), UINT8_C( 40), UINT8_C( 12), UINT8_C( 2), UINT8_C(230), UINT8_C(145), UINT8_C(170), UINT8_C(105), UINT8_C(111), UINT8_C(160), UINT8_C(140), UINT8_C(202), UINT8_C(166), UINT8_C(220), UINT8_C(187), UINT8_C( 65), UINT8_C(250), UINT8_C(195), UINT8_C( 33), UINT8_C(131), UINT8_C( 2), UINT8_C(164)), simde_x_mm512_set_epu8(UINT8_C(120), UINT8_C(127), UINT8_C( 28), UINT8_C( 95), UINT8_C(175), UINT8_C(223), UINT8_C(119), UINT8_C(214), UINT8_C(220), UINT8_C(102), UINT8_C( 86), UINT8_C( 22), UINT8_C(119), UINT8_C(207), UINT8_C( 12), UINT8_C(183), UINT8_C(172), UINT8_C(242), UINT8_C(173), UINT8_C(249), UINT8_C( 52), UINT8_C(108), UINT8_C(128), UINT8_C(203), UINT8_C( 85), UINT8_C(135), UINT8_C(227), UINT8_C( 35), UINT8_C(187), UINT8_C( 24), UINT8_C(250), UINT8_C(219), UINT8_C(253), UINT8_C( 62), UINT8_C(125), UINT8_C(236), UINT8_C( 75), UINT8_C( 13), UINT8_C( 79), UINT8_C( 81), UINT8_C(177), UINT8_C(221), UINT8_C(251), UINT8_C(181), UINT8_C(159), UINT8_C(182), UINT8_C( 11), UINT8_C( 11), UINT8_C( 39), UINT8_C( 37), UINT8_C( 39), UINT8_C(208), UINT8_C(136), UINT8_C(180), UINT8_C(215), UINT8_C(139), UINT8_C(144), UINT8_C(128), UINT8_C(203), UINT8_C(206), UINT8_C(173), UINT8_C( 36), UINT8_C(133), UINT8_C(175)), simde_x_mm512_set_epu8(UINT8_C( 1), UINT8_C( 0), UINT8_C( 7), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 2), UINT8_C( 4), UINT8_C( 0), UINT8_C( 0), UINT8_C( 14), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 4), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 2), UINT8_C( 1), UINT8_C( 0), UINT8_C( 4), UINT8_C( 1), UINT8_C( 8), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 20), UINT8_C( 13), UINT8_C( 4), UINT8_C( 2), UINT8_C( 2), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 3), UINT8_C( 0), UINT8_C( 0)) }, { simde_x_mm512_set_epu8(UINT8_C( 87), UINT8_C( 63), UINT8_C( 47), UINT8_C( 80), UINT8_C( 35), UINT8_C(229), UINT8_C( 5), UINT8_C( 31), UINT8_C(228), UINT8_C( 73), UINT8_C( 53), UINT8_C( 47), UINT8_C(170), UINT8_C(192), UINT8_C(122), UINT8_C(237), UINT8_C( 47), UINT8_C(130), UINT8_C(219), UINT8_C(102), UINT8_C(163), UINT8_C( 41), UINT8_C(195), UINT8_C(215), UINT8_C(199), UINT8_C( 54), UINT8_C( 97), UINT8_C(126), UINT8_C( 10), UINT8_C(165), UINT8_C(155), UINT8_C( 88), UINT8_C(184), UINT8_C( 63), UINT8_C( 95), UINT8_C(164), UINT8_C( 65), UINT8_C( 71), UINT8_C(174), UINT8_C( 88), UINT8_C(183), UINT8_C(142), UINT8_C( 98), UINT8_C( 14), UINT8_C( 25), UINT8_C(173), UINT8_C( 87), UINT8_C( 2), UINT8_C(191), UINT8_C(143), UINT8_C(152), UINT8_C( 2), UINT8_C(126), UINT8_C( 0), UINT8_C(162), UINT8_C( 57), UINT8_C(245), UINT8_C( 36), UINT8_C(239), UINT8_C( 54), UINT8_C( 33), UINT8_C(165), UINT8_C(199), UINT8_C( 84)), simde_x_mm512_set_epu8(UINT8_C(131), UINT8_C( 42), UINT8_C(151), UINT8_C(210), UINT8_C( 12), UINT8_C(163), UINT8_C(138), UINT8_C(207), UINT8_C( 43), UINT8_C( 57), UINT8_C( 61), UINT8_C( 62), UINT8_C( 81), UINT8_C(184), UINT8_C( 6), UINT8_C( 93), UINT8_C(167), UINT8_C( 1), UINT8_C(145), UINT8_C( 9), UINT8_C( 4), UINT8_C( 17), UINT8_C( 10), UINT8_C(101), UINT8_C(186), UINT8_C(181), UINT8_C(155), UINT8_C(243), UINT8_C(189), UINT8_C(191), UINT8_C(222), UINT8_C(205), UINT8_C( 59), UINT8_C( 26), UINT8_C(227), UINT8_C(105), UINT8_C(237), UINT8_C(145), UINT8_C(183), UINT8_C( 79), UINT8_C(174), UINT8_C( 60), UINT8_C(132), UINT8_C(208), UINT8_C( 58), UINT8_C(178), UINT8_C(116), UINT8_C(240), UINT8_C( 37), UINT8_C(131), UINT8_C(100), UINT8_C(177), UINT8_C( 19), UINT8_C(102), UINT8_C( 81), UINT8_C( 86), UINT8_C( 25), UINT8_C( 43), UINT8_C( 51), UINT8_C(140), UINT8_C( 9), UINT8_C( 40), UINT8_C(227), UINT8_C( 75)), simde_x_mm512_set_epu8(UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 2), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 5), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 2), UINT8_C( 1), UINT8_C( 20), UINT8_C( 2), UINT8_C( 0), UINT8_C(130), UINT8_C( 1), UINT8_C( 11), UINT8_C( 40), UINT8_C( 2), UINT8_C( 19), UINT8_C( 2), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 3), UINT8_C( 2), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 2), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 5), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 6), UINT8_C( 0), UINT8_C( 2), UINT8_C( 0), UINT8_C( 9), UINT8_C( 0), UINT8_C( 4), UINT8_C( 0), UINT8_C( 3), UINT8_C( 4), UINT8_C( 0), UINT8_C( 1)) }, { simde_x_mm512_set_epu8(UINT8_C(233), UINT8_C( 79), UINT8_C( 12), UINT8_C( 0), UINT8_C( 33), UINT8_C(178), UINT8_C( 58), UINT8_C( 74), UINT8_C(250), UINT8_C(116), UINT8_C(142), UINT8_C( 20), UINT8_C( 88), UINT8_C( 63), UINT8_C( 34), UINT8_C(124), UINT8_C(250), UINT8_C( 48), UINT8_C(221), UINT8_C(232), UINT8_C(221), UINT8_C( 75), UINT8_C(155), UINT8_C( 80), UINT8_C(233), UINT8_C(169), UINT8_C(198), UINT8_C(226), UINT8_C( 83), UINT8_C( 27), UINT8_C(137), UINT8_C( 34), UINT8_C( 23), UINT8_C(132), UINT8_C(106), UINT8_C(109), UINT8_C(135), UINT8_C(203), UINT8_C( 98), UINT8_C(120), UINT8_C(101), UINT8_C( 52), UINT8_C( 82), UINT8_C( 44), UINT8_C(142), UINT8_C( 14), UINT8_C( 99), UINT8_C(245), UINT8_C( 8), UINT8_C(140), UINT8_C(141), UINT8_C(123), UINT8_C(219), UINT8_C(163), UINT8_C(196), UINT8_C(233), UINT8_C( 34), UINT8_C(185), UINT8_C(228), UINT8_C(108), UINT8_C( 95), UINT8_C(236), UINT8_C( 97), UINT8_C( 41)), simde_x_mm512_set_epu8(UINT8_C(193), UINT8_C(230), UINT8_C( 93), UINT8_C( 23), UINT8_C(193), UINT8_C( 52), UINT8_C(223), UINT8_C(175), UINT8_C(205), UINT8_C( 45), UINT8_C(166), UINT8_C( 24), UINT8_C( 71), UINT8_C(234), UINT8_C(161), UINT8_C(142), UINT8_C(184), UINT8_C(218), UINT8_C(190), UINT8_C(212), UINT8_C(116), UINT8_C(159), UINT8_C( 44), UINT8_C( 55), UINT8_C(213), UINT8_C(133), UINT8_C( 60), UINT8_C( 3), UINT8_C( 58), UINT8_C(255), UINT8_C(125), UINT8_C(189), UINT8_C(145), UINT8_C( 88), UINT8_C( 55), UINT8_C(182), UINT8_C( 23), UINT8_C(161), UINT8_C(133), UINT8_C( 27), UINT8_C(125), UINT8_C(229), UINT8_C(203), UINT8_C( 45), UINT8_C( 24), UINT8_C( 5), UINT8_C( 90), UINT8_C( 83), UINT8_C(145), UINT8_C( 85), UINT8_C(156), UINT8_C(164), UINT8_C(149), UINT8_C(201), UINT8_C( 48), UINT8_C(255), UINT8_C( 41), UINT8_C( 42), UINT8_C( 94), UINT8_C(129), UINT8_C(135), UINT8_C( 8), UINT8_C( 12), UINT8_C(203)), simde_x_mm512_set_epu8(UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 3), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 2), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 3), UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 3), UINT8_C( 75), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 5), UINT8_C( 1), UINT8_C( 0), UINT8_C( 4), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 5), UINT8_C( 2), UINT8_C( 1), UINT8_C( 2), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 4), UINT8_C( 0), UINT8_C( 0), UINT8_C( 4), UINT8_C( 2), UINT8_C( 0), UINT8_C( 0), UINT8_C( 29), UINT8_C( 8), UINT8_C( 0)) }, { simde_x_mm512_set_epu8(UINT8_C(142), UINT8_C( 19), UINT8_C(128), UINT8_C( 3), UINT8_C(129), UINT8_C(192), UINT8_C(118), UINT8_C(156), UINT8_C( 16), UINT8_C(232), UINT8_C(203), UINT8_C(122), UINT8_C(229), UINT8_C(105), UINT8_C(120), UINT8_C(201), UINT8_C(228), UINT8_C(167), UINT8_C(141), UINT8_C(146), UINT8_C(116), UINT8_C( 74), UINT8_C(191), UINT8_C( 35), UINT8_C( 45), UINT8_C(158), UINT8_C(228), UINT8_C(138), UINT8_C( 49), UINT8_C( 7), UINT8_C( 65), UINT8_C(140), UINT8_C( 0), UINT8_C(113), UINT8_C(156), UINT8_C(113), UINT8_C(246), UINT8_C(167), UINT8_C(109), UINT8_C(141), UINT8_C(192), UINT8_C( 11), UINT8_C( 33), UINT8_C(141), UINT8_C(129), UINT8_C( 2), UINT8_C(168), UINT8_C(227), UINT8_C( 23), UINT8_C(173), UINT8_C(104), UINT8_C( 71), UINT8_C( 11), UINT8_C(250), UINT8_C( 13), UINT8_C(218), UINT8_C(194), UINT8_C(140), UINT8_C(125), UINT8_C( 43), UINT8_C(151), UINT8_C( 49), UINT8_C(129), UINT8_C(218)), simde_x_mm512_set_epu8(UINT8_C( 8), UINT8_C( 25), UINT8_C(147), UINT8_C(220), UINT8_C(173), UINT8_C(138), UINT8_C( 38), UINT8_C(150), UINT8_C( 35), UINT8_C( 43), UINT8_C(165), UINT8_C(185), UINT8_C( 50), UINT8_C( 64), UINT8_C(161), UINT8_C(132), UINT8_C(162), UINT8_C( 50), UINT8_C(199), UINT8_C( 84), UINT8_C(251), UINT8_C(200), UINT8_C(217), UINT8_C( 19), UINT8_C(180), UINT8_C(196), UINT8_C(246), UINT8_C( 76), UINT8_C( 55), UINT8_C(204), UINT8_C(139), UINT8_C( 75), UINT8_C( 1), UINT8_C( 89), UINT8_C(133), UINT8_C(212), UINT8_C(206), UINT8_C( 55), UINT8_C(204), UINT8_C(120), UINT8_C( 37), UINT8_C(159), UINT8_C(146), UINT8_C(217), UINT8_C(226), UINT8_C(190), UINT8_C(134), UINT8_C( 8), UINT8_C(113), UINT8_C( 61), UINT8_C(103), UINT8_C(100), UINT8_C( 23), UINT8_C(229), UINT8_C(146), UINT8_C( 97), UINT8_C( 95), UINT8_C( 32), UINT8_C(136), UINT8_C( 91), UINT8_C( 46), UINT8_C(252), UINT8_C(163), UINT8_C( 88)), simde_x_mm512_set_epu8(UINT8_C( 17), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 3), UINT8_C( 1), UINT8_C( 0), UINT8_C( 5), UINT8_C( 1), UINT8_C( 0), UINT8_C( 4), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 3), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 3), UINT8_C( 0), UINT8_C( 1), UINT8_C( 5), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 28), UINT8_C( 0), UINT8_C( 2), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 2), UINT8_C( 2), UINT8_C( 4), UINT8_C( 0), UINT8_C( 0), UINT8_C( 3), UINT8_C( 0), UINT8_C( 0), UINT8_C( 2)) }, { simde_x_mm512_set_epu8(UINT8_C( 46), UINT8_C( 43), UINT8_C(246), UINT8_C(157), UINT8_C( 80), UINT8_C(154), UINT8_C( 27), UINT8_C(118), UINT8_C(176), UINT8_C(216), UINT8_C( 46), UINT8_C(142), UINT8_C(198), UINT8_C(248), UINT8_C( 88), UINT8_C( 29), UINT8_C(176), UINT8_C( 25), UINT8_C(101), UINT8_C( 54), UINT8_C(103), UINT8_C(120), UINT8_C( 94), UINT8_C( 16), UINT8_C(197), UINT8_C(205), UINT8_C( 71), UINT8_C(246), UINT8_C(158), UINT8_C(176), UINT8_C(218), UINT8_C( 43), UINT8_C(235), UINT8_C(249), UINT8_C(116), UINT8_C(137), UINT8_C( 89), UINT8_C(212), UINT8_C(132), UINT8_C( 56), UINT8_C(230), UINT8_C(137), UINT8_C( 66), UINT8_C( 41), UINT8_C( 44), UINT8_C( 35), UINT8_C(189), UINT8_C(155), UINT8_C(125), UINT8_C(130), UINT8_C(123), UINT8_C(117), UINT8_C(123), UINT8_C(127), UINT8_C(151), UINT8_C( 60), UINT8_C(153), UINT8_C(185), UINT8_C(250), UINT8_C(100), UINT8_C( 83), UINT8_C(112), UINT8_C( 33), UINT8_C(140)), simde_x_mm512_set_epu8(UINT8_C( 36), UINT8_C( 33), UINT8_C( 42), UINT8_C( 75), UINT8_C(179), UINT8_C(172), UINT8_C(126), UINT8_C(171), UINT8_C(110), UINT8_C(150), UINT8_C(107), UINT8_C(180), UINT8_C(134), UINT8_C( 73), UINT8_C(207), UINT8_C( 15), UINT8_C(241), UINT8_C(103), UINT8_C(103), UINT8_C(150), UINT8_C(103), UINT8_C( 58), UINT8_C(104), UINT8_C( 35), UINT8_C(249), UINT8_C( 79), UINT8_C(113), UINT8_C( 97), UINT8_C(189), UINT8_C(197), UINT8_C(174), UINT8_C(222), UINT8_C(224), UINT8_C(104), UINT8_C(123), UINT8_C(124), UINT8_C( 49), UINT8_C(226), UINT8_C( 37), UINT8_C( 22), UINT8_C(105), UINT8_C(157), UINT8_C(110), UINT8_C( 52), UINT8_C(254), UINT8_C(103), UINT8_C(162), UINT8_C(210), UINT8_C(202), UINT8_C( 39), UINT8_C(193), UINT8_C(151), UINT8_C(183), UINT8_C( 73), UINT8_C( 97), UINT8_C(187), UINT8_C(102), UINT8_C(195), UINT8_C( 68), UINT8_C(190), UINT8_C( 65), UINT8_C( 60), UINT8_C(165), UINT8_C(126)), simde_x_mm512_set_epu8(UINT8_C( 1), UINT8_C( 1), UINT8_C( 5), UINT8_C( 2), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 3), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 2), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 2), UINT8_C( 0), UINT8_C( 2), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 2), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 3), UINT8_C( 2), UINT8_C( 2), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 3), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 3), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1)) }, { simde_x_mm512_set_epu8(UINT8_C(240), UINT8_C(169), UINT8_C( 8), UINT8_C( 54), UINT8_C( 66), UINT8_C( 99), UINT8_C( 14), UINT8_C( 32), UINT8_C(148), UINT8_C( 92), UINT8_C(122), UINT8_C(200), UINT8_C(192), UINT8_C(186), UINT8_C(225), UINT8_C( 52), UINT8_C(182), UINT8_C(244), UINT8_C(253), UINT8_C(228), UINT8_C(141), UINT8_C(228), UINT8_C(148), UINT8_C(168), UINT8_C(231), UINT8_C(107), UINT8_C( 47), UINT8_C(205), UINT8_C(126), UINT8_C( 7), UINT8_C(182), UINT8_C(245), UINT8_C(165), UINT8_C(186), UINT8_C(213), UINT8_C( 84), UINT8_C( 19), UINT8_C(131), UINT8_C( 54), UINT8_C( 13), UINT8_C(185), UINT8_C(182), UINT8_C( 72), UINT8_C( 61), UINT8_C(125), UINT8_C(104), UINT8_C(147), UINT8_C( 11), UINT8_C( 89), UINT8_C(204), UINT8_C( 62), UINT8_C(163), UINT8_C(198), UINT8_C(162), UINT8_C(205), UINT8_C( 9), UINT8_C(182), UINT8_C(123), UINT8_C( 65), UINT8_C(208), UINT8_C(145), UINT8_C(179), UINT8_C( 34), UINT8_C(195)), simde_x_mm512_set_epu8(UINT8_C(141), UINT8_C(103), UINT8_C(116), UINT8_C( 12), UINT8_C(174), UINT8_C(226), UINT8_C(193), UINT8_C(175), UINT8_C(155), UINT8_C(174), UINT8_C( 73), UINT8_C( 6), UINT8_C(141), UINT8_C(140), UINT8_C(254), UINT8_C(193), UINT8_C(100), UINT8_C(151), UINT8_C( 14), UINT8_C( 19), UINT8_C( 38), UINT8_C(115), UINT8_C(201), UINT8_C(118), UINT8_C( 74), UINT8_C(186), UINT8_C( 89), UINT8_C(183), UINT8_C( 65), UINT8_C(138), UINT8_C( 64), UINT8_C( 90), UINT8_C(152), UINT8_C(241), UINT8_C(229), UINT8_C(218), UINT8_C(126), UINT8_C( 38), UINT8_C(159), UINT8_C( 27), UINT8_C(164), UINT8_C(199), UINT8_C( 25), UINT8_C(253), UINT8_C(181), UINT8_C(104), UINT8_C( 6), UINT8_C(183), UINT8_C( 36), UINT8_C(203), UINT8_C(138), UINT8_C(145), UINT8_C(116), UINT8_C(155), UINT8_C(218), UINT8_C( 24), UINT8_C(205), UINT8_C(238), UINT8_C(242), UINT8_C( 26), UINT8_C(226), UINT8_C( 76), UINT8_C(226), UINT8_C(214)), simde_x_mm512_set_epu8(UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 4), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 33), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 18), UINT8_C( 12), UINT8_C( 3), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 3), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 2), UINT8_C( 2), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 3), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 2), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 24), UINT8_C( 0), UINT8_C( 2), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 8), UINT8_C( 0), UINT8_C( 2), UINT8_C( 0), UINT8_C( 0)) }, { simde_x_mm512_set_epu8(UINT8_C(197), UINT8_C( 52), UINT8_C(145), UINT8_C( 20), UINT8_C( 26), UINT8_C(178), UINT8_C(121), UINT8_C( 16), UINT8_C( 45), UINT8_C(229), UINT8_C( 11), UINT8_C(230), UINT8_C( 53), UINT8_C( 2), UINT8_C(234), UINT8_C( 7), UINT8_C(207), UINT8_C(146), UINT8_C(169), UINT8_C(233), UINT8_C(206), UINT8_C(116), UINT8_C( 55), UINT8_C(156), UINT8_C(180), UINT8_C( 91), UINT8_C( 56), UINT8_C(146), UINT8_C( 55), UINT8_C(137), UINT8_C(200), UINT8_C( 76), UINT8_C( 43), UINT8_C(245), UINT8_C(138), UINT8_C( 3), UINT8_C(213), UINT8_C(156), UINT8_C(166), UINT8_C(234), UINT8_C(199), UINT8_C( 2), UINT8_C( 86), UINT8_C( 72), UINT8_C( 93), UINT8_C(254), UINT8_C(190), UINT8_C(121), UINT8_C(119), UINT8_C( 75), UINT8_C(159), UINT8_C( 76), UINT8_C( 70), UINT8_C(218), UINT8_C( 17), UINT8_C(239), UINT8_C( 43), UINT8_C(152), UINT8_C(222), UINT8_C( 80), UINT8_C(197), UINT8_C(113), UINT8_C(112), UINT8_C( 81)), simde_x_mm512_set_epu8(UINT8_C(193), UINT8_C(162), UINT8_C(178), UINT8_C( 36), UINT8_C(178), UINT8_C( 86), UINT8_C( 79), UINT8_C(167), UINT8_C(179), UINT8_C( 45), UINT8_C( 18), UINT8_C(231), UINT8_C(113), UINT8_C(127), UINT8_C(211), UINT8_C(181), UINT8_C(121), UINT8_C(171), UINT8_C( 76), UINT8_C(135), UINT8_C( 15), UINT8_C(133), UINT8_C(247), UINT8_C( 32), UINT8_C(181), UINT8_C(168), UINT8_C(236), UINT8_C( 99), UINT8_C( 85), UINT8_C(151), UINT8_C( 36), UINT8_C( 99), UINT8_C(101), UINT8_C( 42), UINT8_C( 63), UINT8_C( 96), UINT8_C(210), UINT8_C(198), UINT8_C(202), UINT8_C(105), UINT8_C(214), UINT8_C( 74), UINT8_C(199), UINT8_C( 17), UINT8_C(234), UINT8_C( 22), UINT8_C(134), UINT8_C(112), UINT8_C( 62), UINT8_C(141), UINT8_C(156), UINT8_C( 91), UINT8_C( 99), UINT8_C( 24), UINT8_C(198), UINT8_C(131), UINT8_C( 88), UINT8_C(136), UINT8_C( 61), UINT8_C( 94), UINT8_C(189), UINT8_C(213), UINT8_C(249), UINT8_C(131)), simde_x_mm512_set_epu8(UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 2), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 5), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 2), UINT8_C( 1), UINT8_C( 13), UINT8_C( 0), UINT8_C( 0), UINT8_C( 4), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 5), UINT8_C( 0), UINT8_C( 0), UINT8_C( 5), UINT8_C( 2), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 2), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 4), UINT8_C( 0), UINT8_C( 11), UINT8_C( 1), UINT8_C( 1), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 9), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 1), UINT8_C( 3), UINT8_C( 0), UINT8_C( 1), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_div_epu8(test_vec[i].a, test_vec[i].b); simde_assert_m512i_u8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_div_epu16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_x_mm512_set_epu16(UINT16_C( 10545), UINT16_C( 43974), UINT16_C( 10284), UINT16_C( 62003), UINT16_C( 35545), UINT16_C( 55289), UINT16_C( 51493), UINT16_C( 35101), UINT16_C( 59818), UINT16_C( 61822), UINT16_C( 46602), UINT16_C( 53446), UINT16_C( 23938), UINT16_C( 50097), UINT16_C( 48095), UINT16_C( 35837), UINT16_C( 49063), UINT16_C( 57920), UINT16_C( 54730), UINT16_C( 28273), UINT16_C( 23021), UINT16_C( 18146), UINT16_C( 33883), UINT16_C( 65368), UINT16_C( 26666), UINT16_C( 13822), UINT16_C( 34046), UINT16_C( 24651), UINT16_C( 8048), UINT16_C( 38825), UINT16_C( 44126), UINT16_C( 28762)), simde_x_mm512_set_epu16(UINT16_C( 38607), UINT16_C( 8074), UINT16_C( 18000), UINT16_C( 35687), UINT16_C( 40415), UINT16_C( 3254), UINT16_C( 55282), UINT16_C( 38855), UINT16_C( 41330), UINT16_C( 37148), UINT16_C( 25803), UINT16_C( 25877), UINT16_C( 768), UINT16_C( 16244), UINT16_C( 11114), UINT16_C( 58324), UINT16_C( 18192), UINT16_C( 32532), UINT16_C( 33700), UINT16_C( 60373), UINT16_C( 20183), UINT16_C( 64042), UINT16_C( 2502), UINT16_C( 18488), UINT16_C( 22771), UINT16_C( 21470), UINT16_C( 4556), UINT16_C( 26138), UINT16_C( 19085), UINT16_C( 64613), UINT16_C( 55602), UINT16_C( 63371)), simde_x_mm512_set_epu16(UINT16_C( 0), UINT16_C( 5), UINT16_C( 0), UINT16_C( 1), UINT16_C( 0), UINT16_C( 16), UINT16_C( 0), UINT16_C( 0), UINT16_C( 1), UINT16_C( 1), UINT16_C( 1), UINT16_C( 2), UINT16_C( 31), UINT16_C( 3), UINT16_C( 4), UINT16_C( 0), UINT16_C( 2), UINT16_C( 1), UINT16_C( 1), UINT16_C( 0), UINT16_C( 1), UINT16_C( 0), UINT16_C( 13), UINT16_C( 3), UINT16_C( 1), UINT16_C( 0), UINT16_C( 7), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0)) }, { simde_x_mm512_set_epu16(UINT16_C( 20057), UINT16_C( 26978), UINT16_C( 45741), UINT16_C( 34503), UINT16_C( 54259), UINT16_C( 41436), UINT16_C( 43883), UINT16_C( 11009), UINT16_C( 50212), UINT16_C( 9014), UINT16_C( 24117), UINT16_C( 34039), UINT16_C( 58348), UINT16_C( 8311), UINT16_C( 31759), UINT16_C( 4002), UINT16_C( 7525), UINT16_C( 3321), UINT16_C( 47299), UINT16_C( 64213), UINT16_C( 13644), UINT16_C( 48153), UINT16_C( 45234), UINT16_C( 51700), UINT16_C( 7513), UINT16_C( 1114), UINT16_C( 65336), UINT16_C( 10389), UINT16_C( 33688), UINT16_C( 9445), UINT16_C( 60332), UINT16_C( 41466)), simde_x_mm512_set_epu16(UINT16_C( 48157), UINT16_C( 56913), UINT16_C( 55050), UINT16_C( 48859), UINT16_C( 27895), UINT16_C( 48343), UINT16_C( 59593), UINT16_C( 60425), UINT16_C( 62587), UINT16_C( 54231), UINT16_C( 52444), UINT16_C( 8140), UINT16_C( 58695), UINT16_C( 2476), UINT16_C( 41101), UINT16_C( 7948), UINT16_C( 26094), UINT16_C( 52354), UINT16_C( 30122), UINT16_C( 47688), UINT16_C( 43801), UINT16_C( 57764), UINT16_C( 1809), UINT16_C( 33603), UINT16_C( 8271), UINT16_C( 4936), UINT16_C( 7627), UINT16_C( 20477), UINT16_C( 14608), UINT16_C( 25470), UINT16_C( 45836), UINT16_C( 25611)), simde_x_mm512_set_epu16(UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 1), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 4), UINT16_C( 0), UINT16_C( 3), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 1), UINT16_C( 1), UINT16_C( 0), UINT16_C( 0), UINT16_C( 25), UINT16_C( 1), UINT16_C( 0), UINT16_C( 0), UINT16_C( 8), UINT16_C( 0), UINT16_C( 2), UINT16_C( 0), UINT16_C( 1), UINT16_C( 1)) }, { simde_x_mm512_set_epu16(UINT16_C( 26902), UINT16_C( 51011), UINT16_C( 57631), UINT16_C( 57521), UINT16_C( 43405), UINT16_C( 18318), UINT16_C( 44023), UINT16_C( 9770), UINT16_C( 4118), UINT16_C( 33099), UINT16_C( 6621), UINT16_C( 57639), UINT16_C( 22002), UINT16_C( 33155), UINT16_C( 15537), UINT16_C( 38743), UINT16_C( 26466), UINT16_C( 21183), UINT16_C( 5811), UINT16_C( 17016), UINT16_C( 51162), UINT16_C( 46775), UINT16_C( 54252), UINT16_C( 64603), UINT16_C( 30444), UINT16_C( 20573), UINT16_C( 50572), UINT16_C( 25607), UINT16_C( 36721), UINT16_C( 36797), UINT16_C( 27147), UINT16_C( 62271)), simde_x_mm512_set_epu16(UINT16_C( 55381), UINT16_C( 52839), UINT16_C( 60314), UINT16_C( 33159), UINT16_C( 32076), UINT16_C( 51820), UINT16_C( 13383), UINT16_C( 43204), UINT16_C( 18058), UINT16_C( 42817), UINT16_C( 56737), UINT16_C( 40285), UINT16_C( 49341), UINT16_C( 39323), UINT16_C( 53205), UINT16_C( 27016), UINT16_C( 59998), UINT16_C( 61452), UINT16_C( 37377), UINT16_C( 37691), UINT16_C( 64794), UINT16_C( 6696), UINT16_C( 3074), UINT16_C( 59025), UINT16_C( 43625), UINT16_C( 28576), UINT16_C( 36042), UINT16_C( 42716), UINT16_C( 47937), UINT16_C( 64195), UINT16_C( 8579), UINT16_C( 676)), simde_x_mm512_set_epu16(UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 1), UINT16_C( 1), UINT16_C( 0), UINT16_C( 3), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 1), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 1), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 6), UINT16_C( 17), UINT16_C( 1), UINT16_C( 0), UINT16_C( 0), UINT16_C( 1), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 3), UINT16_C( 92)) }, { simde_x_mm512_set_epu16(UINT16_C( 7566), UINT16_C( 25511), UINT16_C( 59705), UINT16_C( 13989), UINT16_C( 13965), UINT16_C( 34471), UINT16_C( 77), UINT16_C( 35152), UINT16_C( 21705), UINT16_C( 42504), UINT16_C( 63033), UINT16_C( 56884), UINT16_C( 42389), UINT16_C( 61527), UINT16_C( 7598), UINT16_C( 23051), UINT16_C( 13886), UINT16_C( 28688), UINT16_C( 30551), UINT16_C( 36608), UINT16_C( 56045), UINT16_C( 38987), UINT16_C( 64798), UINT16_C( 22350), UINT16_C( 7981), UINT16_C( 50477), UINT16_C( 46688), UINT16_C( 16804), UINT16_C( 33660), UINT16_C( 63749), UINT16_C( 29649), UINT16_C( 64815)), simde_x_mm512_set_epu16(UINT16_C( 18409), UINT16_C( 19069), UINT16_C( 20979), UINT16_C( 35774), UINT16_C( 8112), UINT16_C( 25085), UINT16_C( 31664), UINT16_C( 55404), UINT16_C( 63329), UINT16_C( 19403), UINT16_C( 33006), UINT16_C( 20365), UINT16_C( 22045), UINT16_C( 41935), UINT16_C( 28665), UINT16_C( 35793), UINT16_C( 26789), UINT16_C( 40241), UINT16_C( 34076), UINT16_C( 36189), UINT16_C( 49507), UINT16_C( 32891), UINT16_C( 45700), UINT16_C( 31541), UINT16_C( 33237), UINT16_C( 50719), UINT16_C( 22782), UINT16_C( 46902), UINT16_C( 62792), UINT16_C( 907), UINT16_C( 9939), UINT16_C( 395)), simde_x_mm512_set_epu16(UINT16_C( 0), UINT16_C( 1), UINT16_C( 2), UINT16_C( 0), UINT16_C( 1), UINT16_C( 1), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 2), UINT16_C( 1), UINT16_C( 2), UINT16_C( 1), UINT16_C( 1), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 1), UINT16_C( 1), UINT16_C( 1), UINT16_C( 1), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 2), UINT16_C( 0), UINT16_C( 0), UINT16_C( 70), UINT16_C( 2), UINT16_C( 164)) }, { simde_x_mm512_set_epu16(UINT16_C( 40553), UINT16_C( 9260), UINT16_C( 6846), UINT16_C( 21618), UINT16_C( 20365), UINT16_C( 26413), UINT16_C( 7670), UINT16_C( 6521), UINT16_C( 13052), UINT16_C( 19892), UINT16_C( 40021), UINT16_C( 58092), UINT16_C( 12337), UINT16_C( 14080), UINT16_C( 6934), UINT16_C( 61515), UINT16_C( 1885), UINT16_C( 11733), UINT16_C( 7371), UINT16_C( 24583), UINT16_C( 48349), UINT16_C( 37475), UINT16_C( 47206), UINT16_C( 54691), UINT16_C( 63460), UINT16_C( 2107), UINT16_C( 62169), UINT16_C( 38808), UINT16_C( 21341), UINT16_C( 51834), UINT16_C( 26283), UINT16_C( 38235)), simde_x_mm512_set_epu16(UINT16_C( 9227), UINT16_C( 20728), UINT16_C( 22448), UINT16_C( 22271), UINT16_C( 38010), UINT16_C( 3228), UINT16_C( 38598), UINT16_C( 15839), UINT16_C( 4554), UINT16_C( 22831), UINT16_C( 44103), UINT16_C( 32351), UINT16_C( 46747), UINT16_C( 20983), UINT16_C( 61889), UINT16_C( 26454), UINT16_C( 63311), UINT16_C( 19804), UINT16_C( 62773), UINT16_C( 56806), UINT16_C( 36384), UINT16_C( 25302), UINT16_C( 37143), UINT16_C( 3478), UINT16_C( 59861), UINT16_C( 61175), UINT16_C( 48658), UINT16_C( 23119), UINT16_C( 30252), UINT16_C( 63116), UINT16_C( 13170), UINT16_C( 44087)), simde_x_mm512_set_epu16(UINT16_C( 4), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 8), UINT16_C( 0), UINT16_C( 0), UINT16_C( 2), UINT16_C( 0), UINT16_C( 0), UINT16_C( 1), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 2), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 1), UINT16_C( 1), UINT16_C( 1), UINT16_C( 15), UINT16_C( 1), UINT16_C( 0), UINT16_C( 1), UINT16_C( 1), UINT16_C( 0), UINT16_C( 0), UINT16_C( 1), UINT16_C( 0)) }, { simde_x_mm512_set_epu16(UINT16_C( 22335), UINT16_C( 12112), UINT16_C( 9189), UINT16_C( 1311), UINT16_C( 58441), UINT16_C( 13615), UINT16_C( 43712), UINT16_C( 31469), UINT16_C( 12162), UINT16_C( 56166), UINT16_C( 41769), UINT16_C( 50135), UINT16_C( 50998), UINT16_C( 24958), UINT16_C( 2725), UINT16_C( 39768), UINT16_C( 47167), UINT16_C( 24484), UINT16_C( 16711), UINT16_C( 44632), UINT16_C( 46990), UINT16_C( 25102), UINT16_C( 6573), UINT16_C( 22274), UINT16_C( 49039), UINT16_C( 38914), UINT16_C( 32256), UINT16_C( 41529), UINT16_C( 62756), UINT16_C( 61238), UINT16_C( 8613), UINT16_C( 51028)), simde_x_mm512_set_epu16(UINT16_C( 30472), UINT16_C( 36773), UINT16_C( 7714), UINT16_C( 18947), UINT16_C( 7066), UINT16_C( 47844), UINT16_C( 58651), UINT16_C( 1841), UINT16_C( 35799), UINT16_C( 50579), UINT16_C( 32926), UINT16_C( 26598), UINT16_C( 39537), UINT16_C( 61137), UINT16_C( 5946), UINT16_C( 2262), UINT16_C( 60116), UINT16_C( 12953), UINT16_C( 38045), UINT16_C( 47787), UINT16_C( 30618), UINT16_C( 37811), UINT16_C( 51748), UINT16_C( 52236), UINT16_C( 23394), UINT16_C( 2441), UINT16_C( 32382), UINT16_C( 9384), UINT16_C( 25792), UINT16_C( 56163), UINT16_C( 22658), UINT16_C( 20939)), simde_x_mm512_set_epu16(UINT16_C( 0), UINT16_C( 0), UINT16_C( 1), UINT16_C( 0), UINT16_C( 8), UINT16_C( 0), UINT16_C( 0), UINT16_C( 17), UINT16_C( 0), UINT16_C( 1), UINT16_C( 1), UINT16_C( 1), UINT16_C( 1), UINT16_C( 0), UINT16_C( 0), UINT16_C( 17), UINT16_C( 0), UINT16_C( 1), UINT16_C( 0), UINT16_C( 0), UINT16_C( 1), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 2), UINT16_C( 15), UINT16_C( 0), UINT16_C( 4), UINT16_C( 2), UINT16_C( 1), UINT16_C( 0), UINT16_C( 2)) }, { simde_x_mm512_set_epu16(UINT16_C( 13867), UINT16_C( 28091), UINT16_C( 35390), UINT16_C( 56986), UINT16_C( 31509), UINT16_C( 63331), UINT16_C( 9520), UINT16_C( 29929), UINT16_C( 24571), UINT16_C( 37741), UINT16_C( 52686), UINT16_C( 14609), UINT16_C( 31001), UINT16_C( 823), UINT16_C( 45697), UINT16_C( 38351), UINT16_C( 35780), UINT16_C( 41006), UINT16_C( 3633), UINT16_C( 45500), UINT16_C( 30184), UINT16_C( 27396), UINT16_C( 1171), UINT16_C( 25936), UINT16_C( 61703), UINT16_C( 57786), UINT16_C( 19453), UINT16_C( 30002), UINT16_C( 6315), UINT16_C( 244), UINT16_C( 8399), UINT16_C( 57456)), simde_x_mm512_set_epu16(UINT16_C( 18752), UINT16_C( 27431), UINT16_C( 53704), UINT16_C( 42625), UINT16_C( 42869), UINT16_C( 41745), UINT16_C( 47543), UINT16_C( 11401), UINT16_C( 26966), UINT16_C( 26500), UINT16_C( 7486), UINT16_C( 7825), UINT16_C( 17767), UINT16_C( 58506), UINT16_C( 36234), UINT16_C( 38373), UINT16_C( 54992), UINT16_C( 46906), UINT16_C( 52104), UINT16_C( 31285), UINT16_C( 34932), UINT16_C( 29467), UINT16_C( 33781), UINT16_C( 883), UINT16_C( 23995), UINT16_C( 43069), UINT16_C( 53587), UINT16_C( 11327), UINT16_C( 36611), UINT16_C( 7518), UINT16_C( 30015), UINT16_C( 30285)), simde_x_mm512_set_epu16(UINT16_C( 0), UINT16_C( 1), UINT16_C( 0), UINT16_C( 1), UINT16_C( 0), UINT16_C( 1), UINT16_C( 0), UINT16_C( 2), UINT16_C( 0), UINT16_C( 1), UINT16_C( 7), UINT16_C( 1), UINT16_C( 1), UINT16_C( 0), UINT16_C( 1), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 1), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 29), UINT16_C( 2), UINT16_C( 1), UINT16_C( 0), UINT16_C( 2), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 1)) }, { simde_x_mm512_set_epu16(UINT16_C( 19003), UINT16_C( 26627), UINT16_C( 63705), UINT16_C( 34218), UINT16_C( 36055), UINT16_C( 13847), UINT16_C( 44625), UINT16_C( 9042), UINT16_C( 36148), UINT16_C( 11660), UINT16_C( 32339), UINT16_C( 39715), UINT16_C( 47178), UINT16_C( 21002), UINT16_C( 60706), UINT16_C( 8527), UINT16_C( 26072), UINT16_C( 29611), UINT16_C( 18348), UINT16_C( 953), UINT16_C( 33382), UINT16_C( 22717), UINT16_C( 50122), UINT16_C( 52414), UINT16_C( 59278), UINT16_C( 54225), UINT16_C( 31952), UINT16_C( 29752), UINT16_C( 37488), UINT16_C( 20614), UINT16_C( 1055), UINT16_C( 61149)), simde_x_mm512_set_epu16(UINT16_C( 59727), UINT16_C( 3072), UINT16_C( 8626), UINT16_C( 14922), UINT16_C( 64116), UINT16_C( 36372), UINT16_C( 22591), UINT16_C( 8828), UINT16_C( 64048), UINT16_C( 56808), UINT16_C( 56651), UINT16_C( 39760), UINT16_C( 59817), UINT16_C( 50914), UINT16_C( 21275), UINT16_C( 35106), UINT16_C( 6020), UINT16_C( 27245), UINT16_C( 34763), UINT16_C( 25208), UINT16_C( 25908), UINT16_C( 21036), UINT16_C( 36366), UINT16_C( 25589), UINT16_C( 2188), UINT16_C( 36219), UINT16_C( 56227), UINT16_C( 50409), UINT16_C( 8889), UINT16_C( 58476), UINT16_C( 24556), UINT16_C( 24873)), simde_x_mm512_set_epu16(UINT16_C( 0), UINT16_C( 8), UINT16_C( 7), UINT16_C( 2), UINT16_C( 0), UINT16_C( 0), UINT16_C( 1), UINT16_C( 1), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 2), UINT16_C( 0), UINT16_C( 4), UINT16_C( 1), UINT16_C( 0), UINT16_C( 0), UINT16_C( 1), UINT16_C( 1), UINT16_C( 1), UINT16_C( 2), UINT16_C( 27), UINT16_C( 1), UINT16_C( 0), UINT16_C( 0), UINT16_C( 4), UINT16_C( 0), UINT16_C( 0), UINT16_C( 2)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_div_epu16(test_vec[i].a, test_vec[i].b); simde_assert_m512i_u16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_div_epu32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_x_mm512_set_epu32(UINT32_C( 691121094), UINT32_C( 674034227), UINT32_C(2329532409), UINT32_C(3374680349), UINT32_C(3920294270), UINT32_C(3054162118), UINT32_C(1568850865), UINT32_C(3151989757), UINT32_C(3215450688), UINT32_C(3586813553), UINT32_C(1508722402), UINT32_C(2220621656), UINT32_C(1747596798), UINT32_C(2231263307), UINT32_C( 527472553), UINT32_C(2891870298)), simde_x_mm512_set_epu32(UINT32_C(2530156426), UINT32_C(1179683687), UINT32_C(2648640694), UINT32_C(3623000007), UINT32_C(2708640028), UINT32_C(1691051285), UINT32_C( 50347892), UINT32_C( 728425428), UINT32_C(1192263444), UINT32_C(2208623573), UINT32_C(1322777130), UINT32_C( 163989560), UINT32_C(1492341726), UINT32_C( 298608154), UINT32_C(1250819173), UINT32_C(3643996043)), simde_x_mm512_set_epu32(UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 1), UINT32_C( 1), UINT32_C( 31), UINT32_C( 4), UINT32_C( 2), UINT32_C( 1), UINT32_C( 1), UINT32_C( 13), UINT32_C( 1), UINT32_C( 7), UINT32_C( 0), UINT32_C( 0)) }, { simde_x_mm512_set_epu32(UINT32_C(1314482530), UINT32_C(2997716679), UINT32_C(3555959260), UINT32_C(2875927297), UINT32_C(3290702646), UINT32_C(1580565751), UINT32_C(3823902839), UINT32_C(2081361826), UINT32_C( 493161721), UINT32_C(3099851477), UINT32_C( 894221337), UINT32_C(2964507124), UINT32_C( 492373082), UINT32_C(4281870485), UINT32_C(2207786213), UINT32_C(3953959418)), simde_x_mm512_set_epu32(UINT32_C(3156074065), UINT32_C(3607805659), UINT32_C(1828175063), UINT32_C(3905547273), UINT32_C(4101755863), UINT32_C(3436978124), UINT32_C(3846637996), UINT32_C(2693603084), UINT32_C(1710148738), UINT32_C(1974123080), UINT32_C(2870600100), UINT32_C( 118588227), UINT32_C( 542053192), UINT32_C( 499863549), UINT32_C( 957375358), UINT32_C(3003933707)), simde_x_mm512_set_epu32(UINT32_C( 0), UINT32_C( 0), UINT32_C( 1), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 1), UINT32_C( 0), UINT32_C( 24), UINT32_C( 0), UINT32_C( 8), UINT32_C( 2), UINT32_C( 1)) }, { simde_x_mm512_set_epu32(UINT32_C(1763100483), UINT32_C(3776962737), UINT32_C(2844608398), UINT32_C(2885101098), UINT32_C( 269910347), UINT32_C( 433971495), UINT32_C(1441956227), UINT32_C(1018271575), UINT32_C(1734496959), UINT32_C( 380846712), UINT32_C(3352999607), UINT32_C(3555523675), UINT32_C(1995198557), UINT32_C(3314312199), UINT32_C(2406584253), UINT32_C(1779168063)), simde_x_mm512_set_epu32(UINT32_C(3629502055), UINT32_C(3952771463), UINT32_C(2102184556), UINT32_C( 877111492), UINT32_C(1183491905), UINT32_C(3718356317), UINT32_C(3233651099), UINT32_C(3486869896), UINT32_C(3932090380), UINT32_C(2449576763), UINT32_C(4246346280), UINT32_C( 201516689), UINT32_C(2859036576), UINT32_C(2362091228), UINT32_C(3141663427), UINT32_C( 562234020)), simde_x_mm512_set_epu32(UINT32_C( 0), UINT32_C( 0), UINT32_C( 1), UINT32_C( 3), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 17), UINT32_C( 0), UINT32_C( 1), UINT32_C( 0), UINT32_C( 3)) }, { simde_x_mm512_set_epu32(UINT32_C( 495870887), UINT32_C(3912840869), UINT32_C( 915244711), UINT32_C( 5081424), UINT32_C(1422501384), UINT32_C(4130987572), UINT32_C(2778067031), UINT32_C( 497965579), UINT32_C( 910061584), UINT32_C(2002226944), UINT32_C(3673004107), UINT32_C(4246624078), UINT32_C( 523093293), UINT32_C(3059761572), UINT32_C(2206005509), UINT32_C(1943141679)), simde_x_mm512_set_epu32(UINT32_C(1206471293), UINT32_C(1374915518), UINT32_C( 531653117), UINT32_C(2075187308), UINT32_C(4150348747), UINT32_C(2163101581), UINT32_C(1444783055), UINT32_C(1878625233), UINT32_C(1755684145), UINT32_C(2233240925), UINT32_C(3244523643), UINT32_C(2995026741), UINT32_C(2178270751), UINT32_C(1493088054), UINT32_C(4115137419), UINT32_C( 651362699)), simde_x_mm512_set_epu32(UINT32_C( 0), UINT32_C( 2), UINT32_C( 1), UINT32_C( 0), UINT32_C( 0), UINT32_C( 1), UINT32_C( 1), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 1), UINT32_C( 1), UINT32_C( 0), UINT32_C( 2), UINT32_C( 0), UINT32_C( 2)) }, { simde_x_mm512_set_epu32(UINT32_C(2657690668), UINT32_C( 448681074), UINT32_C(1334667053), UINT32_C( 502667641), UINT32_C( 855395764), UINT32_C(2622874348), UINT32_C( 808531712), UINT32_C( 454488139), UINT32_C( 123547093), UINT32_C( 483090439), UINT32_C(3168637539), UINT32_C(3093747107), UINT32_C(4158916667), UINT32_C(4074346392), UINT32_C(1398655610), UINT32_C(1722520923)), simde_x_mm512_set_epu32(UINT32_C( 604721400), UINT32_C(1471174399), UINT32_C(2491026588), UINT32_C(2529574367), UINT32_C( 298473775), UINT32_C(2890366559), UINT32_C(3063632375), UINT32_C(4055983958), UINT32_C(4149169500), UINT32_C(4113948134), UINT32_C(2384487126), UINT32_C(2434207126), UINT32_C(3923111671), UINT32_C(3188873807), UINT32_C(1982658188), UINT32_C( 863153207)), simde_x_mm512_set_epu32(UINT32_C( 4), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 2), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 1), UINT32_C( 1), UINT32_C( 1), UINT32_C( 1), UINT32_C( 0), UINT32_C( 1)) }, { simde_x_mm512_set_epu32(UINT32_C(1463758672), UINT32_C( 602211615), UINT32_C(3830002991), UINT32_C(2864741101), UINT32_C( 797104998), UINT32_C(2737423319), UINT32_C(3342229886), UINT32_C( 178625368), UINT32_C(3091160996), UINT32_C(1095216728), UINT32_C(3079561742), UINT32_C( 430790402), UINT32_C(3213858818), UINT32_C(2113970745), UINT32_C(4112838454), UINT32_C( 564512596)), simde_x_mm512_set_epu32(UINT32_C(1997049765), UINT32_C( 505563651), UINT32_C( 463125220), UINT32_C(3843753777), UINT32_C(2346173843), UINT32_C(2157864934), UINT32_C(2591157969), UINT32_C( 389679318), UINT32_C(3939775129), UINT32_C(2493364907), UINT32_C(2006619059), UINT32_C(3391409164), UINT32_C(1533151625), UINT32_C(2122196136), UINT32_C(1690360675), UINT32_C(1484935627)), simde_x_mm512_set_epu32(UINT32_C( 0), UINT32_C( 1), UINT32_C( 8), UINT32_C( 0), UINT32_C( 0), UINT32_C( 1), UINT32_C( 1), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 1), UINT32_C( 0), UINT32_C( 2), UINT32_C( 0), UINT32_C( 2), UINT32_C( 0)) }, { simde_x_mm512_set_epu32(UINT32_C( 908815803), UINT32_C(2319376026), UINT32_C(2065037155), UINT32_C( 623932649), UINT32_C(1610322797), UINT32_C(3452844305), UINT32_C(2031682359), UINT32_C(2994836943), UINT32_C(2344919086), UINT32_C( 238137788), UINT32_C(1978166020), UINT32_C( 76768592), UINT32_C(4043825594), UINT32_C(1274901810), UINT32_C( 413860084), UINT32_C( 550494320)), simde_x_mm512_set_epu32(UINT32_C(1228958503), UINT32_C(3519587969), UINT32_C(2809504529), UINT32_C(3115789449), UINT32_C(1767270276), UINT32_C( 490610321), UINT32_C(1164436618), UINT32_C(2374669797), UINT32_C(3604002618), UINT32_C(3414719029), UINT32_C(2289333019), UINT32_C(2213872499), UINT32_C(1572579389), UINT32_C(3511888959), UINT32_C(2399346014), UINT32_C(1967093325)), simde_x_mm512_set_epu32(UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 7), UINT32_C( 1), UINT32_C( 1), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 2), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0)) }, { simde_x_mm512_set_epu32(UINT32_C(1245407235), UINT32_C(4175005098), UINT32_C(2362914327), UINT32_C(2924553042), UINT32_C(2369006988), UINT32_C(2119408419), UINT32_C(3091878410), UINT32_C(3978436943), UINT32_C(1708684203), UINT32_C(1202455481), UINT32_C(2187745469), UINT32_C(3284847806), UINT32_C(3884897233), UINT32_C(2094036024), UINT32_C(2456834182), UINT32_C( 69201629)), simde_x_mm512_set_epu32(UINT32_C(3914271744), UINT32_C( 565328458), UINT32_C(4201942548), UINT32_C(1480532604), UINT32_C(4197506536), UINT32_C(3712719696), UINT32_C(3920217826), UINT32_C(1394313506), UINT32_C( 394553965), UINT32_C(2278253176), UINT32_C(1697927724), UINT32_C(2383307765), UINT32_C( 143428987), UINT32_C(3684943081), UINT32_C( 582607980), UINT32_C(1609326889)), simde_x_mm512_set_epu32(UINT32_C( 0), UINT32_C( 7), UINT32_C( 0), UINT32_C( 1), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 2), UINT32_C( 4), UINT32_C( 0), UINT32_C( 1), UINT32_C( 1), UINT32_C( 27), UINT32_C( 0), UINT32_C( 4), UINT32_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_div_epu32(test_vec[i].a, test_vec[i].b); simde_assert_m512i_u32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_div_epu32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask16 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_x_mm512_set_epu32(UINT32_C( 691121094), UINT32_C( 674034227), UINT32_C(2329532409), UINT32_C(3374680349), UINT32_C(3920294270), UINT32_C(3054162118), UINT32_C(1568850865), UINT32_C(3151989757), UINT32_C(3215450688), UINT32_C(3586813553), UINT32_C(1508722402), UINT32_C(2220621656), UINT32_C(1747596798), UINT32_C(2231263307), UINT32_C( 527472553), UINT32_C(2891870298)), UINT16_C(63371), simde_x_mm512_set_epu32(UINT32_C(3953959418), UINT32_C(2530156426), UINT32_C(1179683687), UINT32_C(2648640694), UINT32_C(3623000007), UINT32_C(2708640028), UINT32_C(1691051285), UINT32_C( 50347892), UINT32_C( 728425428), UINT32_C(1192263444), UINT32_C(2208623573), UINT32_C(1322777130), UINT32_C( 163989560), UINT32_C(1492341726), UINT32_C( 298608154), UINT32_C(1250819173)), simde_x_mm512_set_epu32(UINT32_C(3003933707), UINT32_C(1314482530), UINT32_C(2997716679), UINT32_C(3555959260), UINT32_C(2875927297), UINT32_C(3290702646), UINT32_C(1580565751), UINT32_C(3823902839), UINT32_C(2081361826), UINT32_C( 493161721), UINT32_C(3099851477), UINT32_C( 894221337), UINT32_C(2964507124), UINT32_C( 492373082), UINT32_C(4281870485), UINT32_C(2207786213)), simde_x_mm512_set_epu32(UINT32_C( 1), UINT32_C( 1), UINT32_C( 0), UINT32_C( 0), UINT32_C(3920294270), UINT32_C( 0), UINT32_C( 1), UINT32_C( 0), UINT32_C( 0), UINT32_C(3586813553), UINT32_C(1508722402), UINT32_C(2220621656), UINT32_C( 0), UINT32_C(2231263307), UINT32_C( 0), UINT32_C( 0)) }, { simde_x_mm512_set_epu32(UINT32_C(1779168063), UINT32_C(3156074065), UINT32_C(3607805659), UINT32_C(1828175063), UINT32_C(3905547273), UINT32_C(4101755863), UINT32_C(3436978124), UINT32_C(3846637996), UINT32_C(2693603084), UINT32_C(1710148738), UINT32_C(1974123080), UINT32_C(2870600100), UINT32_C( 118588227), UINT32_C( 542053192), UINT32_C( 499863549), UINT32_C( 957375358)), UINT16_C(36797), simde_x_mm512_set_epu32(UINT32_C(3141663427), UINT32_C( 562234020), UINT32_C(1763100483), UINT32_C(3776962737), UINT32_C(2844608398), UINT32_C(2885101098), UINT32_C( 269910347), UINT32_C( 433971495), UINT32_C(1441956227), UINT32_C(1018271575), UINT32_C(1734496959), UINT32_C( 380846712), UINT32_C(3352999607), UINT32_C(3555523675), UINT32_C(1995198557), UINT32_C(3314312199)), simde_x_mm512_set_epu32(UINT32_C(2206005509), UINT32_C(1943141679), UINT32_C(3629502055), UINT32_C(3952771463), UINT32_C(2102184556), UINT32_C( 877111492), UINT32_C(1183491905), UINT32_C(3718356317), UINT32_C(3233651099), UINT32_C(3486869896), UINT32_C(3932090380), UINT32_C(2449576763), UINT32_C(4246346280), UINT32_C( 201516689), UINT32_C(2859036576), UINT32_C(2362091228)), simde_x_mm512_set_epu32(UINT32_C( 1), UINT32_C(3156074065), UINT32_C(3607805659), UINT32_C(1828175063), UINT32_C( 1), UINT32_C( 3), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C(1710148738), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 17), UINT32_C( 499863549), UINT32_C( 1)) }, { simde_x_mm512_set_epu32(UINT32_C(4115137419), UINT32_C( 651362699), UINT32_C( 495870887), UINT32_C(3912840869), UINT32_C( 915244711), UINT32_C( 5081424), UINT32_C(1422501384), UINT32_C(4130987572), UINT32_C(2778067031), UINT32_C( 497965579), UINT32_C( 910061584), UINT32_C(2002226944), UINT32_C(3673004107), UINT32_C(4246624078), UINT32_C( 523093293), UINT32_C(3059761572)), UINT16_C(46902), simde_x_mm512_set_epu32(UINT32_C(4074346392), UINT32_C(1398655610), UINT32_C(1722520923), UINT32_C(1206471293), UINT32_C(1374915518), UINT32_C( 531653117), UINT32_C(2075187308), UINT32_C(4150348747), UINT32_C(2163101581), UINT32_C(1444783055), UINT32_C(1878625233), UINT32_C(1755684145), UINT32_C(2233240925), UINT32_C(3244523643), UINT32_C(2995026741), UINT32_C(2178270751)), simde_x_mm512_set_epu32(UINT32_C(3188873807), UINT32_C(1982658188), UINT32_C( 863153207), UINT32_C(2657690668), UINT32_C( 448681074), UINT32_C(1334667053), UINT32_C( 502667641), UINT32_C( 855395764), UINT32_C(2622874348), UINT32_C( 808531712), UINT32_C( 454488139), UINT32_C( 123547093), UINT32_C( 483090439), UINT32_C(3168637539), UINT32_C(3093747107), UINT32_C(4158916667)), simde_x_mm512_set_epu32(UINT32_C( 1), UINT32_C( 651362699), UINT32_C( 1), UINT32_C( 0), UINT32_C( 915244711), UINT32_C( 0), UINT32_C( 4), UINT32_C( 4), UINT32_C(2778067031), UINT32_C( 497965579), UINT32_C( 4), UINT32_C( 14), UINT32_C(3673004107), UINT32_C( 1), UINT32_C( 0), UINT32_C(3059761572)) }, { simde_x_mm512_set_epu32(UINT32_C(2113970745), UINT32_C(4112838454), UINT32_C( 564512596), UINT32_C( 604721400), UINT32_C(1471174399), UINT32_C(2491026588), UINT32_C(2529574367), UINT32_C( 298473775), UINT32_C(2890366559), UINT32_C(3063632375), UINT32_C(4055983958), UINT32_C(4149169500), UINT32_C(4113948134), UINT32_C(2384487126), UINT32_C(2434207126), UINT32_C(3923111671)), UINT16_C(38914), simde_x_mm512_set_epu32(UINT32_C(1533151625), UINT32_C(2122196136), UINT32_C(1690360675), UINT32_C(1484935627), UINT32_C(1463758672), UINT32_C( 602211615), UINT32_C(3830002991), UINT32_C(2864741101), UINT32_C( 797104998), UINT32_C(2737423319), UINT32_C(3342229886), UINT32_C( 178625368), UINT32_C(3091160996), UINT32_C(1095216728), UINT32_C(3079561742), UINT32_C( 430790402)), simde_x_mm512_set_epu32(UINT32_C(4043825594), UINT32_C(1274901810), UINT32_C( 413860084), UINT32_C( 550494320), UINT32_C(1997049765), UINT32_C( 505563651), UINT32_C( 463125220), UINT32_C(3843753777), UINT32_C(2346173843), UINT32_C(2157864934), UINT32_C(2591157969), UINT32_C( 389679318), UINT32_C(3939775129), UINT32_C(2493364907), UINT32_C(2006619059), UINT32_C(3391409164)), simde_x_mm512_set_epu32(UINT32_C( 0), UINT32_C(4112838454), UINT32_C( 564512596), UINT32_C( 2), UINT32_C( 0), UINT32_C(2491026588), UINT32_C(2529574367), UINT32_C( 298473775), UINT32_C(2890366559), UINT32_C(3063632375), UINT32_C(4055983958), UINT32_C(4149169500), UINT32_C(4113948134), UINT32_C(2384487126), UINT32_C( 1), UINT32_C(3923111671)) }, { simde_x_mm512_set_epu32(UINT32_C(1572579389), UINT32_C(3511888959), UINT32_C(2399346014), UINT32_C(1967093325), UINT32_C( 908815803), UINT32_C(2319376026), UINT32_C(2065037155), UINT32_C( 623932649), UINT32_C(1610322797), UINT32_C(3452844305), UINT32_C(2031682359), UINT32_C(2994836943), UINT32_C(2344919086), UINT32_C( 238137788), UINT32_C(1978166020), UINT32_C( 76768592)), UINT16_C( 883), simde_x_mm512_set_epu32(UINT32_C(3284847806), UINT32_C(3884897233), UINT32_C(2094036024), UINT32_C(2456834182), UINT32_C( 69201629), UINT32_C(1228958503), UINT32_C(3519587969), UINT32_C(2809504529), UINT32_C(3115789449), UINT32_C(1767270276), UINT32_C( 490610321), UINT32_C(1164436618), UINT32_C(2374669797), UINT32_C(3604002618), UINT32_C(3414719029), UINT32_C(2289333019)), simde_x_mm512_set_epu32(UINT32_C(2383307765), UINT32_C( 143428987), UINT32_C(3684943081), UINT32_C( 582607980), UINT32_C(1609326889), UINT32_C(1245407235), UINT32_C(4175005098), UINT32_C(2362914327), UINT32_C(2924553042), UINT32_C(2369006988), UINT32_C(2119408419), UINT32_C(3091878410), UINT32_C(3978436943), UINT32_C(1708684203), UINT32_C(1202455481), UINT32_C(2187745469)), simde_x_mm512_set_epu32(UINT32_C(1572579389), UINT32_C(3511888959), UINT32_C(2399346014), UINT32_C(1967093325), UINT32_C( 908815803), UINT32_C(2319376026), UINT32_C( 0), UINT32_C( 1), UINT32_C(1610322797), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C(2344919086), UINT32_C( 238137788), UINT32_C( 2), UINT32_C( 1)) }, { simde_x_mm512_set_epu32(UINT32_C(2117071873), UINT32_C(2857077767), UINT32_C(3918893192), UINT32_C(1087893388), UINT32_C(3851784011), UINT32_C(3914271744), UINT32_C( 565328458), UINT32_C(4201942548), UINT32_C(1480532604), UINT32_C(4197506536), UINT32_C(3712719696), UINT32_C(3920217826), UINT32_C(1394313506), UINT32_C( 394553965), UINT32_C(2278253176), UINT32_C(1697927724)), UINT16_C(12254), simde_x_mm512_set_epu32(UINT32_C( 56443211), UINT32_C(2258452653), UINT32_C(3784696472), UINT32_C(1139427205), UINT32_C(1090384090), UINT32_C(2389735891), UINT32_C(2215607313), UINT32_C(3817672405), UINT32_C(3621770268), UINT32_C(2071747620), UINT32_C(3852178197), UINT32_C(3693632585), UINT32_C( 319530416), UINT32_C(2179954815), UINT32_C(3793236393), UINT32_C( 340519338)), simde_x_mm512_set_epu32(UINT32_C(1219537084), UINT32_C(1349635715), UINT32_C( 732887738), UINT32_C(2566325375), UINT32_C(2906533885), UINT32_C(1765754685), UINT32_C(2719983633), UINT32_C( 846129112), UINT32_C(1578410935), UINT32_C(2635094838), UINT32_C(1045536663), UINT32_C( 957117985), UINT32_C(3029008645), UINT32_C(1309498779), UINT32_C(3293951997), UINT32_C(1022360677)), simde_x_mm512_set_epu32(UINT32_C(2117071873), UINT32_C(2857077767), UINT32_C( 5), UINT32_C(1087893388), UINT32_C( 0), UINT32_C( 1), UINT32_C( 0), UINT32_C( 4), UINT32_C( 2), UINT32_C( 0), UINT32_C(3712719696), UINT32_C( 3), UINT32_C( 0), UINT32_C( 1), UINT32_C( 1), UINT32_C(1697927724)) }, { simde_x_mm512_set_epu32(UINT32_C(3990081318), UINT32_C( 991545752), UINT32_C(4151932359), UINT32_C( 843112042), UINT32_C(4067412513), UINT32_C(2124182542), UINT32_C(2768721208), UINT32_C(2302989914), UINT32_C(1224533822), UINT32_C(3475606100), UINT32_C(3610957044), UINT32_C(2556046111), UINT32_C(3035396524), UINT32_C(3603101367), UINT32_C(3321443925), UINT32_C( 45581573)), UINT16_C(42669), simde_x_mm512_set_epu32(UINT32_C(4138167693), UINT32_C(3221954957), UINT32_C(2164435171), UINT32_C( 397240391), UINT32_C( 200936922), UINT32_C(3263986987), UINT32_C(2536604122), UINT32_C(3629380929), UINT32_C( 453331046), UINT32_C(1704580573), UINT32_C(1606190487), UINT32_C(3209309249), UINT32_C(2959497652), UINT32_C(3926896735), UINT32_C(2875407663), UINT32_C(2069966669)), simde_x_mm512_set_epu32(UINT32_C(1379668640), UINT32_C( 66581512), UINT32_C(3737665499), UINT32_C( 304428974), UINT32_C(2686704508), UINT32_C( 532978979), UINT32_C( 946958552), UINT32_C(2383642627), UINT32_C(2176874140), UINT32_C( 283691898), UINT32_C(3848894665), UINT32_C(3836186002), UINT32_C(1951055651), UINT32_C( 765387914), UINT32_C( 822559116), UINT32_C( 7445617)), simde_x_mm512_set_epu32(UINT32_C( 2), UINT32_C( 991545752), UINT32_C( 0), UINT32_C( 843112042), UINT32_C(4067412513), UINT32_C( 6), UINT32_C( 2), UINT32_C(2302989914), UINT32_C( 0), UINT32_C(3475606100), UINT32_C( 0), UINT32_C(2556046111), UINT32_C( 1), UINT32_C( 5), UINT32_C(3321443925), UINT32_C( 278)) }, { simde_x_mm512_set_epu32(UINT32_C(2313028370), UINT32_C( 869237081), UINT32_C(4104913762), UINT32_C(2825691966), UINT32_C(3577866502), UINT32_C(2991894408), UINT32_C(2172048625), UINT32_C(1617119933), UINT32_C(1521363431), UINT32_C( 553638116), UINT32_C(1036201367), UINT32_C(3107033445), UINT32_C(3882811410), UINT32_C(3534384353), UINT32_C(3871215839), UINT32_C(1273589632)), UINT16_C(35103), simde_x_mm512_set_epu32(UINT32_C(2458371652), UINT32_C( 260676470), UINT32_C(1724614860), UINT32_C(4150452663), UINT32_C(3816336716), UINT32_C(2208212235), UINT32_C( 932145867), UINT32_C(2432594561), UINT32_C(1756892633), UINT32_C( 382632965), UINT32_C(1295078740), UINT32_C(3299165262), UINT32_C( 152308919), UINT32_C(3943411788), UINT32_C( 31813624), UINT32_C( 807463845)), simde_x_mm512_set_epu32(UINT32_C( 615301803), UINT32_C( 382786341), UINT32_C(1852603705), UINT32_C(1998007730), UINT32_C( 231325888), UINT32_C(1842039329), UINT32_C( 968682756), UINT32_C( 316335394), UINT32_C(2223585202), UINT32_C(3491781959), UINT32_C(2167971796), UINT32_C(1587647099), UINT32_C(2966608712), UINT32_C( 320339033), UINT32_C( 282380179), UINT32_C(4186865204)), simde_x_mm512_set_epu32(UINT32_C( 3), UINT32_C( 869237081), UINT32_C(4104913762), UINT32_C(2825691966), UINT32_C( 16), UINT32_C(2991894408), UINT32_C(2172048625), UINT32_C( 7), UINT32_C(1521363431), UINT32_C( 553638116), UINT32_C(1036201367), UINT32_C( 2), UINT32_C( 0), UINT32_C( 12), UINT32_C( 0), UINT32_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_div_epu32(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_u32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_div_epu64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_x_mm512_set_epu64(UINT64_C( 2968342496979776051), UINT64_C(10005265515001776413), UINT64_C(16837535683400356038), UINT64_C( 6738163160628300797), UINT64_C(13810255550447513201), UINT64_C( 6479913377553186648), UINT64_C( 7505871096235581515), UINT64_C( 2265477367564496986)), simde_x_mm512_set_epu64(UINT64_C(10866939104613927783), UINT64_C(11375825163207743431), UINT64_C(11633520338587575573), UINT64_C( 216242550290965460), UINT64_C( 5120732502404950997), UINT64_C( 5681284513410730040), UINT64_C( 6409558907924801050), UINT64_C( 5372227444888762251)), simde_x_mm512_set_epu64(UINT64_C( 0), UINT64_C( 0), UINT64_C( 1), UINT64_C( 31), UINT64_C( 2), UINT64_C( 1), UINT64_C( 1), UINT64_C( 0)) }, { simde_x_mm512_set_epu64(UINT64_C( 5645659480511055559), UINT64_C(15272728730484288257), UINT64_C(14133460247011230967), UINT64_C(16423537638667915170), UINT64_C( 2118113466433927893), UINT64_C( 3840651400764901876), UINT64_C( 2114726288902596757), UINT64_C( 9482369585348649466)), simde_x_mm512_set_epu64(UINT64_C(13555234896536583899), UINT64_C( 7851952110853286921), UINT64_C(17616907291198234572), UINT64_C(16521184395064581900), UINT64_C( 7345032902979795528), UINT64_C(12329133549512917827), UINT64_C( 2328100732832272381), UINT64_C( 4111895855610225675)), simde_x_mm512_set_epu64(UINT64_C( 0), UINT64_C( 1), UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C( 2)) }, { simde_x_mm512_set_epu64(UINT64_C( 7572458917823766705), UINT64_C(12217500042222052906), UINT64_C( 1159256113650983207), UINT64_C( 6193154838246823767), UINT64_C( 7449607714297299576), UINT64_C(14401023659121376347), UINT64_C( 8569312554655704071), UINT64_C(10336200663482757951)), simde_x_mm512_set_epu64(UINT64_C(15588592630942564743), UINT64_C( 9028813919053392068), UINT64_C( 5083059030774095197), UINT64_C(13888425720366328200), UINT64_C(16888199589465789243), UINT64_C(18237918400292775569), UINT64_C(12279468594349909724), UINT64_C(13493341674566517412)), simde_x_mm512_set_epu64(UINT64_C( 0), UINT64_C( 1), UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C( 0)) }, { simde_x_mm512_set_epu64(UINT64_C( 2129749246616352421), UINT64_C( 3930946101587052880), UINT64_C( 6109596926925725236), UINT64_C(11931707044738783755), UINT64_C( 3908684742628183808), UINT64_C(15775432521885308750), UINT64_C( 2246668589251707300), UINT64_C( 9474721517893975343)), simde_x_mm512_set_epu64(UINT64_C( 5181754748372749246), UINT64_C( 2283432752406648940), UINT64_C(17825612137522679693), UINT64_C( 6205295972918594513), UINT64_C( 7540605987113962845), UINT64_C(13935122940778806069), UINT64_C( 9355601638871447350), UINT64_C(17674380633802211723)), simde_x_mm512_set_epu64(UINT64_C( 0), UINT64_C( 1), UINT64_C( 0), UINT64_C( 1), UINT64_C( 0), UINT64_C( 1), UINT64_C( 0), UINT64_C( 0)) }, { simde_x_mm512_set_epu64(UINT64_C(11414694502393074802), UINT64_C( 5732351344186366329), UINT64_C( 3673896834139808492), UINT64_C( 3472617261273378891), UINT64_C( 530630724433960967), UINT64_C(13609194605976671651), UINT64_C(17862411075628668824), UINT64_C( 6007180105039451483)), simde_x_mm512_set_epu64(UINT64_C( 2597258637662508799), UINT64_C(10698877731456040415), UINT64_C( 1281935105229028959), UINT64_C(13158200861647791958), UINT64_C(17820547312174620134), UINT64_C(10241294226337238422), UINT64_C(16849636328689785423), UINT64_C( 8515452077469772855)), simde_x_mm512_set_epu64(UINT64_C( 4), UINT64_C( 0), UINT64_C( 2), UINT64_C( 0), UINT64_C( 0), UINT64_C( 1), UINT64_C( 1), UINT64_C( 0)) }, { simde_x_mm512_set_epu64(UINT64_C( 6286795626078602527), UINT64_C(16449737592791923437), UINT64_C( 3423539900625568727), UINT64_C(14354768056262433624), UINT64_C(13276435385586003544), UINT64_C(13226616968333580034), UINT64_C(13803418519385186873), UINT64_C(17664506654225712980)), simde_x_mm512_set_epu64(UINT64_C( 8577263429665049091), UINT64_C( 1989107677696558897), UINT64_C(10076739928573503462), UINT64_C(11128938736014461142), UINT64_C(16921205335142546091), UINT64_C( 8618363237326703628), UINT64_C( 6584836091306452136), UINT64_C( 7260043819054420427)), simde_x_mm512_set_epu64(UINT64_C( 0), UINT64_C( 8), UINT64_C( 0), UINT64_C( 1), UINT64_C( 0), UINT64_C( 1), UINT64_C( 2), UINT64_C( 2)) }, { simde_x_mm512_set_epu64(UINT64_C( 3903334154292354714), UINT64_C( 8869267046373815529), UINT64_C( 6916283752571091217), UINT64_C( 8726009290759968207), UINT64_C(10071350786374349244), UINT64_C( 8496158362035250512), UINT64_C(17368098678232675634), UINT64_C( 1777515526450307184)), simde_x_mm512_set_epu64(UINT64_C( 5278336582045705857), UINT64_C(12066730073134673033), UINT64_C( 7590368039103504017), UINT64_C( 5001217194949514725), UINT64_C(15479073382423099957), UINT64_C( 9832610448471819123), UINT64_C( 6754177049630551103), UINT64_C(10305112663885051469)), simde_x_mm512_set_epu64(UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C( 1), UINT64_C( 0), UINT64_C( 0), UINT64_C( 2), UINT64_C( 0)) }, { simde_x_mm512_set_epu64(UINT64_C( 5348983348701791658), UINT64_C(10148639760639402834), UINT64_C(10174807539574872867), UINT64_C(13279516658136916303), UINT64_C( 7338742772279280569), UINT64_C( 9396295244612029630), UINT64_C(16685506566149927992), UINT64_C(10552022463454113501)), simde_x_mm512_set_epu64(UINT64_C(16811669128702212682), UINT64_C(18047205824811442812), UINT64_C(18028153300578966352), UINT64_C(16837207357260532002), UINT64_C( 1694596378460381816), UINT64_C( 7292544047935022069), UINT64_C( 616022812148352233), UINT64_C( 2502282222097948969)), simde_x_mm512_set_epu64(UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C( 0), UINT64_C( 4), UINT64_C( 1), UINT64_C( 27), UINT64_C( 4)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_div_epu64(test_vec[i].a, test_vec[i].b); simde_assert_m512i_u64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_erf_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 449.73), SIMDE_FLOAT32_C( -898.83), SIMDE_FLOAT32_C( 193.72), SIMDE_FLOAT32_C( -793.70) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00) } }, { { SIMDE_FLOAT32_C( 434.26), SIMDE_FLOAT32_C( 437.61), SIMDE_FLOAT32_C( -29.18), SIMDE_FLOAT32_C( -288.39) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00) } }, { { SIMDE_FLOAT32_C( -989.93), SIMDE_FLOAT32_C( -799.36), SIMDE_FLOAT32_C( 150.13), SIMDE_FLOAT32_C( 690.23) }, { SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00) } }, { { SIMDE_FLOAT32_C( -667.63), SIMDE_FLOAT32_C( -368.07), SIMDE_FLOAT32_C( 316.47), SIMDE_FLOAT32_C( 916.61) }, { SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00) } }, { { SIMDE_FLOAT32_C( 256.26), SIMDE_FLOAT32_C( -321.94), SIMDE_FLOAT32_C( 111.81), SIMDE_FLOAT32_C( -665.54) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00) } }, { { SIMDE_FLOAT32_C( 169.01), SIMDE_FLOAT32_C( -375.29), SIMDE_FLOAT32_C( -768.83), SIMDE_FLOAT32_C( 166.33) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00) } }, { { SIMDE_FLOAT32_C( 327.83), SIMDE_FLOAT32_C( -583.11), SIMDE_FLOAT32_C( 452.18), SIMDE_FLOAT32_C( -922.36) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00) } }, { { SIMDE_FLOAT32_C( 33.53), SIMDE_FLOAT32_C( -944.72), SIMDE_FLOAT32_C( -608.58), SIMDE_FLOAT32_C( -516.73) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_erf_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_erf_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -733.03), SIMDE_FLOAT64_C( -222.93) }, { SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -1.00) } }, { { SIMDE_FLOAT64_C( -762.35), SIMDE_FLOAT64_C( -559.95) }, { SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -1.00) } }, { { SIMDE_FLOAT64_C( -868.93), SIMDE_FLOAT64_C( -580.21) }, { SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -1.00) } }, { { SIMDE_FLOAT64_C( 299.67), SIMDE_FLOAT64_C( -439.96) }, { SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00) } }, { { SIMDE_FLOAT64_C( -152.35), SIMDE_FLOAT64_C( 5.07) }, { SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 1.00) } }, { { SIMDE_FLOAT64_C( 40.68), SIMDE_FLOAT64_C( -726.52) }, { SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00) } }, { { SIMDE_FLOAT64_C( 642.06), SIMDE_FLOAT64_C( -970.77) }, { SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00) } }, { { SIMDE_FLOAT64_C( 563.08), SIMDE_FLOAT64_C( -718.61) }, { SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d r = simde_mm_erf_pd(a); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_erf_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 374.20), SIMDE_FLOAT32_C( -943.32), SIMDE_FLOAT32_C( -503.43), SIMDE_FLOAT32_C( -980.91), SIMDE_FLOAT32_C( 588.09), SIMDE_FLOAT32_C( 116.98), SIMDE_FLOAT32_C( 159.00), SIMDE_FLOAT32_C( 60.92) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00) } }, { { SIMDE_FLOAT32_C( 517.69), SIMDE_FLOAT32_C( 565.06), SIMDE_FLOAT32_C( 410.42), SIMDE_FLOAT32_C( 802.07), SIMDE_FLOAT32_C( -337.69), SIMDE_FLOAT32_C( 790.63), SIMDE_FLOAT32_C( 48.57), SIMDE_FLOAT32_C( 385.99) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00) } }, { { SIMDE_FLOAT32_C( 695.57), SIMDE_FLOAT32_C( -950.00), SIMDE_FLOAT32_C( 565.77), SIMDE_FLOAT32_C( -123.23), SIMDE_FLOAT32_C( 205.87), SIMDE_FLOAT32_C( -194.42), SIMDE_FLOAT32_C( 803.30), SIMDE_FLOAT32_C( -901.24) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00) } }, { { SIMDE_FLOAT32_C( 429.62), SIMDE_FLOAT32_C( -530.89), SIMDE_FLOAT32_C( 279.94), SIMDE_FLOAT32_C( 445.55), SIMDE_FLOAT32_C( 34.20), SIMDE_FLOAT32_C( 333.48), SIMDE_FLOAT32_C( 841.52), SIMDE_FLOAT32_C( -591.60) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00) } }, { { SIMDE_FLOAT32_C( 390.15), SIMDE_FLOAT32_C( -661.91), SIMDE_FLOAT32_C( -572.50), SIMDE_FLOAT32_C( -21.76), SIMDE_FLOAT32_C( 455.07), SIMDE_FLOAT32_C( 586.50), SIMDE_FLOAT32_C( -960.84), SIMDE_FLOAT32_C( -27.24) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00) } }, { { SIMDE_FLOAT32_C( 151.56), SIMDE_FLOAT32_C( 449.58), SIMDE_FLOAT32_C( -225.17), SIMDE_FLOAT32_C( 813.87), SIMDE_FLOAT32_C( 240.21), SIMDE_FLOAT32_C( 823.40), SIMDE_FLOAT32_C( 199.87), SIMDE_FLOAT32_C( -64.22) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00) } }, { { SIMDE_FLOAT32_C( 873.40), SIMDE_FLOAT32_C( -234.36), SIMDE_FLOAT32_C( 812.55), SIMDE_FLOAT32_C( 79.27), SIMDE_FLOAT32_C( 571.22), SIMDE_FLOAT32_C( 615.85), SIMDE_FLOAT32_C( 178.03), SIMDE_FLOAT32_C( 0.84) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.77) } }, { { SIMDE_FLOAT32_C( -915.04), SIMDE_FLOAT32_C( -542.03), SIMDE_FLOAT32_C( -553.61), SIMDE_FLOAT32_C( 119.16), SIMDE_FLOAT32_C( 791.44), SIMDE_FLOAT32_C( -712.09), SIMDE_FLOAT32_C( 527.56), SIMDE_FLOAT32_C( 181.60) }, { SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_erf_ps(a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_erf_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( -313.70), SIMDE_FLOAT64_C( 714.53), SIMDE_FLOAT64_C( 927.20), SIMDE_FLOAT64_C( -898.10) }, { SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00) } }, { { SIMDE_FLOAT64_C( 921.61), SIMDE_FLOAT64_C( 406.65), SIMDE_FLOAT64_C( 519.73), SIMDE_FLOAT64_C( -550.92) }, { SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00) } }, { { SIMDE_FLOAT64_C( 655.77), SIMDE_FLOAT64_C( -305.99), SIMDE_FLOAT64_C( -29.82), SIMDE_FLOAT64_C( -266.26) }, { SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -1.00) } }, { { SIMDE_FLOAT64_C( 47.11), SIMDE_FLOAT64_C( 991.16), SIMDE_FLOAT64_C( -298.84), SIMDE_FLOAT64_C( 426.24) }, { SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 1.00) } }, { { SIMDE_FLOAT64_C( -122.46), SIMDE_FLOAT64_C( 928.48), SIMDE_FLOAT64_C( -151.69), SIMDE_FLOAT64_C( -677.70) }, { SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -1.00) } }, { { SIMDE_FLOAT64_C( -184.81), SIMDE_FLOAT64_C( -799.82), SIMDE_FLOAT64_C( 978.74), SIMDE_FLOAT64_C( -554.85) }, { SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00) } }, { { SIMDE_FLOAT64_C( 83.95), SIMDE_FLOAT64_C( -400.78), SIMDE_FLOAT64_C( -165.64), SIMDE_FLOAT64_C( -926.09) }, { SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -1.00) } }, { { SIMDE_FLOAT64_C( 941.89), SIMDE_FLOAT64_C( 862.77), SIMDE_FLOAT64_C( 150.41), SIMDE_FLOAT64_C( -371.81) }, { SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d r = simde_mm256_erf_pd(a); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_erf_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( -838.40), SIMDE_FLOAT32_C( 872.70), SIMDE_FLOAT32_C( 438.38), SIMDE_FLOAT32_C( -298.62), SIMDE_FLOAT32_C( 781.61), SIMDE_FLOAT32_C( 970.11), SIMDE_FLOAT32_C( 78.85), SIMDE_FLOAT32_C( 723.02), SIMDE_FLOAT32_C( -818.83), SIMDE_FLOAT32_C( -579.07), SIMDE_FLOAT32_C( 251.53), SIMDE_FLOAT32_C( -753.80), SIMDE_FLOAT32_C( 319.82), SIMDE_FLOAT32_C( 967.37), SIMDE_FLOAT32_C( 725.05), SIMDE_FLOAT32_C( 873.27) }, { SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00) } }, { { SIMDE_FLOAT32_C( -304.80), SIMDE_FLOAT32_C( 941.81), SIMDE_FLOAT32_C( -83.14), SIMDE_FLOAT32_C( -799.93), SIMDE_FLOAT32_C( -339.09), SIMDE_FLOAT32_C( 125.84), SIMDE_FLOAT32_C( 891.08), SIMDE_FLOAT32_C( -989.54), SIMDE_FLOAT32_C( 253.61), SIMDE_FLOAT32_C( 980.01), SIMDE_FLOAT32_C( 634.54), SIMDE_FLOAT32_C( 449.90), SIMDE_FLOAT32_C( 462.95), SIMDE_FLOAT32_C( 271.95), SIMDE_FLOAT32_C( 654.57), SIMDE_FLOAT32_C( 624.56) }, { SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00) } }, { { SIMDE_FLOAT32_C( 144.65), SIMDE_FLOAT32_C( 92.95), SIMDE_FLOAT32_C( -674.06), SIMDE_FLOAT32_C( -73.74), SIMDE_FLOAT32_C( 63.06), SIMDE_FLOAT32_C( 404.78), SIMDE_FLOAT32_C( -350.71), SIMDE_FLOAT32_C( 244.23), SIMDE_FLOAT32_C( 825.71), SIMDE_FLOAT32_C( 900.82), SIMDE_FLOAT32_C( 490.43), SIMDE_FLOAT32_C( 145.53), SIMDE_FLOAT32_C( 868.18), SIMDE_FLOAT32_C( 215.47), SIMDE_FLOAT32_C( 18.80), SIMDE_FLOAT32_C( -436.61) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00) } }, { { SIMDE_FLOAT32_C( 157.28), SIMDE_FLOAT32_C( 935.67), SIMDE_FLOAT32_C( -236.55), SIMDE_FLOAT32_C( 818.19), SIMDE_FLOAT32_C( 61.50), SIMDE_FLOAT32_C( -345.47), SIMDE_FLOAT32_C( 828.65), SIMDE_FLOAT32_C( -684.89), SIMDE_FLOAT32_C( -365.46), SIMDE_FLOAT32_C( 463.19), SIMDE_FLOAT32_C( 765.01), SIMDE_FLOAT32_C( -902.51), SIMDE_FLOAT32_C( -264.87), SIMDE_FLOAT32_C( 419.58), SIMDE_FLOAT32_C( 722.05), SIMDE_FLOAT32_C( 879.78) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00) } }, { { SIMDE_FLOAT32_C( -487.47), SIMDE_FLOAT32_C( -952.01), SIMDE_FLOAT32_C( -193.96), SIMDE_FLOAT32_C( 575.59), SIMDE_FLOAT32_C( 452.77), SIMDE_FLOAT32_C( 455.33), SIMDE_FLOAT32_C( -180.18), SIMDE_FLOAT32_C( 278.48), SIMDE_FLOAT32_C( 356.14), SIMDE_FLOAT32_C( -689.76), SIMDE_FLOAT32_C( -575.99), SIMDE_FLOAT32_C( 224.33), SIMDE_FLOAT32_C( 525.72), SIMDE_FLOAT32_C( 442.82), SIMDE_FLOAT32_C( 787.71), SIMDE_FLOAT32_C( -317.01) }, { SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00) } }, { { SIMDE_FLOAT32_C( 378.48), SIMDE_FLOAT32_C( -448.83), SIMDE_FLOAT32_C( -498.82), SIMDE_FLOAT32_C( -560.02), SIMDE_FLOAT32_C( 205.70), SIMDE_FLOAT32_C( -670.17), SIMDE_FLOAT32_C( -244.90), SIMDE_FLOAT32_C( 840.24), SIMDE_FLOAT32_C( 793.02), SIMDE_FLOAT32_C( -479.90), SIMDE_FLOAT32_C( 937.74), SIMDE_FLOAT32_C( -471.85), SIMDE_FLOAT32_C( 939.68), SIMDE_FLOAT32_C( 659.79), SIMDE_FLOAT32_C( -592.07), SIMDE_FLOAT32_C( -547.79) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00) } }, { { SIMDE_FLOAT32_C( 707.78), SIMDE_FLOAT32_C( 213.97), SIMDE_FLOAT32_C( -972.20), SIMDE_FLOAT32_C( 160.55), SIMDE_FLOAT32_C( -330.70), SIMDE_FLOAT32_C( -152.38), SIMDE_FLOAT32_C( -560.98), SIMDE_FLOAT32_C( -974.56), SIMDE_FLOAT32_C( 157.86), SIMDE_FLOAT32_C( -136.96), SIMDE_FLOAT32_C( 249.77), SIMDE_FLOAT32_C( -316.43), SIMDE_FLOAT32_C( -694.15), SIMDE_FLOAT32_C( 37.48), SIMDE_FLOAT32_C( 366.57), SIMDE_FLOAT32_C( 684.33) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00) } }, { { SIMDE_FLOAT32_C( 588.65), SIMDE_FLOAT32_C( 867.75), SIMDE_FLOAT32_C( -875.68), SIMDE_FLOAT32_C( -205.65), SIMDE_FLOAT32_C( -802.42), SIMDE_FLOAT32_C( -120.59), SIMDE_FLOAT32_C( -365.41), SIMDE_FLOAT32_C( 990.60), SIMDE_FLOAT32_C( 399.52), SIMDE_FLOAT32_C( -427.67), SIMDE_FLOAT32_C( -481.25), SIMDE_FLOAT32_C( 339.20), SIMDE_FLOAT32_C( -767.88), SIMDE_FLOAT32_C( -73.32), SIMDE_FLOAT32_C( 791.41), SIMDE_FLOAT32_C( 939.89) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_erf_ps(a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_erf_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[16]; const simde__mmask8 k; const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 60.80), SIMDE_FLOAT32_C( 224.62), SIMDE_FLOAT32_C( -945.35), SIMDE_FLOAT32_C( -219.00), SIMDE_FLOAT32_C( 891.11), SIMDE_FLOAT32_C( 761.94), SIMDE_FLOAT32_C( 992.65), SIMDE_FLOAT32_C( 332.00), SIMDE_FLOAT32_C( 387.85), SIMDE_FLOAT32_C( -689.44), SIMDE_FLOAT32_C( 195.76), SIMDE_FLOAT32_C( -335.77), SIMDE_FLOAT32_C( -349.96), SIMDE_FLOAT32_C( -675.36), SIMDE_FLOAT32_C( 298.19), SIMDE_FLOAT32_C( 171.46) }, UINT8_C( 43), { SIMDE_FLOAT32_C( -593.03), SIMDE_FLOAT32_C( 241.03), SIMDE_FLOAT32_C( 550.96), SIMDE_FLOAT32_C( 496.03), SIMDE_FLOAT32_C( -94.31), SIMDE_FLOAT32_C( -581.85), SIMDE_FLOAT32_C( -755.59), SIMDE_FLOAT32_C( 80.74), SIMDE_FLOAT32_C( 755.01), SIMDE_FLOAT32_C( 520.11), SIMDE_FLOAT32_C( 62.41), SIMDE_FLOAT32_C( -580.00), SIMDE_FLOAT32_C( 448.06), SIMDE_FLOAT32_C( -303.73), SIMDE_FLOAT32_C( 480.80), SIMDE_FLOAT32_C( -327.32) }, { SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -945.35), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 891.11), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 992.65), SIMDE_FLOAT32_C( 332.00), SIMDE_FLOAT32_C( 387.85), SIMDE_FLOAT32_C( -689.44), SIMDE_FLOAT32_C( 195.76), SIMDE_FLOAT32_C( -335.77), SIMDE_FLOAT32_C( -349.96), SIMDE_FLOAT32_C( -675.36), SIMDE_FLOAT32_C( 298.19), SIMDE_FLOAT32_C( 171.46) } }, { { SIMDE_FLOAT32_C( -249.08), SIMDE_FLOAT32_C( -738.20), SIMDE_FLOAT32_C( -436.21), SIMDE_FLOAT32_C( -487.13), SIMDE_FLOAT32_C( -745.54), SIMDE_FLOAT32_C( 895.79), SIMDE_FLOAT32_C( 900.71), SIMDE_FLOAT32_C( -434.99), SIMDE_FLOAT32_C( 91.55), SIMDE_FLOAT32_C( -435.06), SIMDE_FLOAT32_C( 215.05), SIMDE_FLOAT32_C( 416.20), SIMDE_FLOAT32_C( 863.14), SIMDE_FLOAT32_C( -613.49), SIMDE_FLOAT32_C( -739.87), SIMDE_FLOAT32_C( -729.89) }, UINT8_C(228), { SIMDE_FLOAT32_C( 811.10), SIMDE_FLOAT32_C( 766.14), SIMDE_FLOAT32_C( -466.77), SIMDE_FLOAT32_C( -770.76), SIMDE_FLOAT32_C( -989.45), SIMDE_FLOAT32_C( 613.97), SIMDE_FLOAT32_C( 984.25), SIMDE_FLOAT32_C( 530.66), SIMDE_FLOAT32_C( -323.62), SIMDE_FLOAT32_C( -595.75), SIMDE_FLOAT32_C( -21.28), SIMDE_FLOAT32_C( 372.65), SIMDE_FLOAT32_C( 885.05), SIMDE_FLOAT32_C( 651.40), SIMDE_FLOAT32_C( -876.43), SIMDE_FLOAT32_C( -853.15) }, { SIMDE_FLOAT32_C( -249.08), SIMDE_FLOAT32_C( -738.20), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -487.13), SIMDE_FLOAT32_C( -745.54), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 91.55), SIMDE_FLOAT32_C( -435.06), SIMDE_FLOAT32_C( 215.05), SIMDE_FLOAT32_C( 416.20), SIMDE_FLOAT32_C( 863.14), SIMDE_FLOAT32_C( -613.49), SIMDE_FLOAT32_C( -739.87), SIMDE_FLOAT32_C( -729.89) } }, { { SIMDE_FLOAT32_C( -784.80), SIMDE_FLOAT32_C( -363.56), SIMDE_FLOAT32_C( -598.70), SIMDE_FLOAT32_C( -889.01), SIMDE_FLOAT32_C( -462.85), SIMDE_FLOAT32_C( -33.68), SIMDE_FLOAT32_C( 202.54), SIMDE_FLOAT32_C( 102.09), SIMDE_FLOAT32_C( -818.63), SIMDE_FLOAT32_C( -381.26), SIMDE_FLOAT32_C( -34.77), SIMDE_FLOAT32_C( -432.12), SIMDE_FLOAT32_C( -121.13), SIMDE_FLOAT32_C( 235.34), SIMDE_FLOAT32_C( -804.58), SIMDE_FLOAT32_C( -310.04) }, UINT8_C(218), { SIMDE_FLOAT32_C( -271.35), SIMDE_FLOAT32_C( -80.79), SIMDE_FLOAT32_C( 12.03), SIMDE_FLOAT32_C( -657.38), SIMDE_FLOAT32_C( -96.55), SIMDE_FLOAT32_C( -457.32), SIMDE_FLOAT32_C( 19.00), SIMDE_FLOAT32_C( 307.70), SIMDE_FLOAT32_C( 521.41), SIMDE_FLOAT32_C( -608.35), SIMDE_FLOAT32_C( 192.75), SIMDE_FLOAT32_C( 172.81), SIMDE_FLOAT32_C( -484.78), SIMDE_FLOAT32_C( 339.60), SIMDE_FLOAT32_C( 388.01), SIMDE_FLOAT32_C( 151.65) }, { SIMDE_FLOAT32_C( -784.80), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -598.70), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -33.68), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -818.63), SIMDE_FLOAT32_C( -381.26), SIMDE_FLOAT32_C( -34.77), SIMDE_FLOAT32_C( -432.12), SIMDE_FLOAT32_C( -121.13), SIMDE_FLOAT32_C( 235.34), SIMDE_FLOAT32_C( -804.58), SIMDE_FLOAT32_C( -310.04) } }, { { SIMDE_FLOAT32_C( 740.90), SIMDE_FLOAT32_C( 498.99), SIMDE_FLOAT32_C( 688.80), SIMDE_FLOAT32_C( -292.78), SIMDE_FLOAT32_C( -298.47), SIMDE_FLOAT32_C( -209.10), SIMDE_FLOAT32_C( -111.42), SIMDE_FLOAT32_C( 320.27), SIMDE_FLOAT32_C( 756.13), SIMDE_FLOAT32_C( 456.46), SIMDE_FLOAT32_C( -800.86), SIMDE_FLOAT32_C( -8.53), SIMDE_FLOAT32_C( 651.88), SIMDE_FLOAT32_C( -110.90), SIMDE_FLOAT32_C( 992.95), SIMDE_FLOAT32_C( -619.48) }, UINT8_C(168), { SIMDE_FLOAT32_C( 4.98), SIMDE_FLOAT32_C( -276.86), SIMDE_FLOAT32_C( -288.24), SIMDE_FLOAT32_C( 547.66), SIMDE_FLOAT32_C( 742.14), SIMDE_FLOAT32_C( -980.53), SIMDE_FLOAT32_C( 69.07), SIMDE_FLOAT32_C( -866.21), SIMDE_FLOAT32_C( 212.21), SIMDE_FLOAT32_C( -758.12), SIMDE_FLOAT32_C( -351.00), SIMDE_FLOAT32_C( -448.19), SIMDE_FLOAT32_C( 629.88), SIMDE_FLOAT32_C( 800.65), SIMDE_FLOAT32_C( -707.29), SIMDE_FLOAT32_C( 128.87) }, { SIMDE_FLOAT32_C( 740.90), SIMDE_FLOAT32_C( 498.99), SIMDE_FLOAT32_C( 688.80), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -298.47), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -111.42), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 756.13), SIMDE_FLOAT32_C( 456.46), SIMDE_FLOAT32_C( -800.86), SIMDE_FLOAT32_C( -8.53), SIMDE_FLOAT32_C( 651.88), SIMDE_FLOAT32_C( -110.90), SIMDE_FLOAT32_C( 992.95), SIMDE_FLOAT32_C( -619.48) } }, { { SIMDE_FLOAT32_C( 489.46), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( 830.41), SIMDE_FLOAT32_C( -719.65), SIMDE_FLOAT32_C( 888.51), SIMDE_FLOAT32_C( 150.68), SIMDE_FLOAT32_C( -963.52), SIMDE_FLOAT32_C( 344.97), SIMDE_FLOAT32_C( 349.82), SIMDE_FLOAT32_C( 27.95), SIMDE_FLOAT32_C( -3.15), SIMDE_FLOAT32_C( -761.08), SIMDE_FLOAT32_C( 20.90), SIMDE_FLOAT32_C( 377.37), SIMDE_FLOAT32_C( -952.77), SIMDE_FLOAT32_C( -974.12) }, UINT8_C( 30), { SIMDE_FLOAT32_C( -241.01), SIMDE_FLOAT32_C( 573.54), SIMDE_FLOAT32_C( 842.66), SIMDE_FLOAT32_C( -221.54), SIMDE_FLOAT32_C( -357.39), SIMDE_FLOAT32_C( 976.44), SIMDE_FLOAT32_C( 990.67), SIMDE_FLOAT32_C( -115.52), SIMDE_FLOAT32_C( -374.55), SIMDE_FLOAT32_C( -457.51), SIMDE_FLOAT32_C( -485.63), SIMDE_FLOAT32_C( -573.90), SIMDE_FLOAT32_C( -164.80), SIMDE_FLOAT32_C( 643.24), SIMDE_FLOAT32_C( 915.55), SIMDE_FLOAT32_C( 835.12) }, { SIMDE_FLOAT32_C( 489.46), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 150.68), SIMDE_FLOAT32_C( -963.52), SIMDE_FLOAT32_C( 344.97), SIMDE_FLOAT32_C( 349.82), SIMDE_FLOAT32_C( 27.95), SIMDE_FLOAT32_C( -3.15), SIMDE_FLOAT32_C( -761.08), SIMDE_FLOAT32_C( 20.90), SIMDE_FLOAT32_C( 377.37), SIMDE_FLOAT32_C( -952.77), SIMDE_FLOAT32_C( -974.12) } }, { { SIMDE_FLOAT32_C( 473.65), SIMDE_FLOAT32_C( -804.09), SIMDE_FLOAT32_C( 723.64), SIMDE_FLOAT32_C( -375.67), SIMDE_FLOAT32_C( -767.61), SIMDE_FLOAT32_C( 68.61), SIMDE_FLOAT32_C( 974.15), SIMDE_FLOAT32_C( 260.34), SIMDE_FLOAT32_C( -934.54), SIMDE_FLOAT32_C( -786.93), SIMDE_FLOAT32_C( -718.76), SIMDE_FLOAT32_C( 442.83), SIMDE_FLOAT32_C( -739.70), SIMDE_FLOAT32_C( -692.88), SIMDE_FLOAT32_C( 543.35), SIMDE_FLOAT32_C( 19.29) }, UINT8_C(120), { SIMDE_FLOAT32_C( 386.01), SIMDE_FLOAT32_C( 797.75), SIMDE_FLOAT32_C( -476.73), SIMDE_FLOAT32_C( 362.46), SIMDE_FLOAT32_C( 788.43), SIMDE_FLOAT32_C( 407.75), SIMDE_FLOAT32_C( 987.90), SIMDE_FLOAT32_C( -669.09), SIMDE_FLOAT32_C( 922.12), SIMDE_FLOAT32_C( -586.00), SIMDE_FLOAT32_C( 166.11), SIMDE_FLOAT32_C( 565.36), SIMDE_FLOAT32_C( -670.44), SIMDE_FLOAT32_C( 1.23), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( -474.54) }, { SIMDE_FLOAT32_C( 473.65), SIMDE_FLOAT32_C( -804.09), SIMDE_FLOAT32_C( 723.64), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 260.34), SIMDE_FLOAT32_C( -934.54), SIMDE_FLOAT32_C( -786.93), SIMDE_FLOAT32_C( -718.76), SIMDE_FLOAT32_C( 442.83), SIMDE_FLOAT32_C( -739.70), SIMDE_FLOAT32_C( -692.88), SIMDE_FLOAT32_C( 543.35), SIMDE_FLOAT32_C( 19.29) } }, { { SIMDE_FLOAT32_C( -275.13), SIMDE_FLOAT32_C( 663.34), SIMDE_FLOAT32_C( -242.15), SIMDE_FLOAT32_C( 793.48), SIMDE_FLOAT32_C( 637.49), SIMDE_FLOAT32_C( -981.81), SIMDE_FLOAT32_C( 858.94), SIMDE_FLOAT32_C( 850.55), SIMDE_FLOAT32_C( -700.57), SIMDE_FLOAT32_C( 301.77), SIMDE_FLOAT32_C( -889.15), SIMDE_FLOAT32_C( -393.45), SIMDE_FLOAT32_C( -154.87), SIMDE_FLOAT32_C( 130.14), SIMDE_FLOAT32_C( -512.79), SIMDE_FLOAT32_C( -768.86) }, UINT8_C( 73), { SIMDE_FLOAT32_C( 10.48), SIMDE_FLOAT32_C( 593.59), SIMDE_FLOAT32_C( -283.68), SIMDE_FLOAT32_C( -581.77), SIMDE_FLOAT32_C( 581.50), SIMDE_FLOAT32_C( 47.23), SIMDE_FLOAT32_C( -659.65), SIMDE_FLOAT32_C( 995.50), SIMDE_FLOAT32_C( -786.66), SIMDE_FLOAT32_C( 905.71), SIMDE_FLOAT32_C( -674.95), SIMDE_FLOAT32_C( 214.58), SIMDE_FLOAT32_C( -55.28), SIMDE_FLOAT32_C( -149.49), SIMDE_FLOAT32_C( 939.45), SIMDE_FLOAT32_C( -391.94) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 663.34), SIMDE_FLOAT32_C( -242.15), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 637.49), SIMDE_FLOAT32_C( -981.81), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 850.55), SIMDE_FLOAT32_C( -700.57), SIMDE_FLOAT32_C( 301.77), SIMDE_FLOAT32_C( -889.15), SIMDE_FLOAT32_C( -393.45), SIMDE_FLOAT32_C( -154.87), SIMDE_FLOAT32_C( 130.14), SIMDE_FLOAT32_C( -512.79), SIMDE_FLOAT32_C( -768.86) } }, { { SIMDE_FLOAT32_C( 608.36), SIMDE_FLOAT32_C( 732.93), SIMDE_FLOAT32_C( -754.45), SIMDE_FLOAT32_C( 626.55), SIMDE_FLOAT32_C( 591.86), SIMDE_FLOAT32_C( -903.90), SIMDE_FLOAT32_C( 925.98), SIMDE_FLOAT32_C( -106.36), SIMDE_FLOAT32_C( -793.05), SIMDE_FLOAT32_C( -467.47), SIMDE_FLOAT32_C( 738.77), SIMDE_FLOAT32_C( 337.09), SIMDE_FLOAT32_C( 19.74), SIMDE_FLOAT32_C( 969.90), SIMDE_FLOAT32_C( -735.01), SIMDE_FLOAT32_C( -969.78) }, UINT8_C(189), { SIMDE_FLOAT32_C( -18.69), SIMDE_FLOAT32_C( -551.55), SIMDE_FLOAT32_C( 144.99), SIMDE_FLOAT32_C( -971.46), SIMDE_FLOAT32_C( -211.20), SIMDE_FLOAT32_C( 140.49), SIMDE_FLOAT32_C( -758.11), SIMDE_FLOAT32_C( -305.49), SIMDE_FLOAT32_C( 465.54), SIMDE_FLOAT32_C( 456.46), SIMDE_FLOAT32_C( 639.24), SIMDE_FLOAT32_C( -683.94), SIMDE_FLOAT32_C( 395.91), SIMDE_FLOAT32_C( -752.70), SIMDE_FLOAT32_C( 924.42), SIMDE_FLOAT32_C( 128.84) }, { SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 732.93), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 925.98), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -793.05), SIMDE_FLOAT32_C( -467.47), SIMDE_FLOAT32_C( 738.77), SIMDE_FLOAT32_C( 337.09), SIMDE_FLOAT32_C( 19.74), SIMDE_FLOAT32_C( 969.90), SIMDE_FLOAT32_C( -735.01), SIMDE_FLOAT32_C( -969.78) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_mask_erf_ps(src, test_vec[i].k, a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_erf_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 287.12), SIMDE_FLOAT64_C( 923.43), SIMDE_FLOAT64_C( -235.47), SIMDE_FLOAT64_C( -270.63), SIMDE_FLOAT64_C( 872.91), SIMDE_FLOAT64_C( 62.22), SIMDE_FLOAT64_C( -259.06), SIMDE_FLOAT64_C( 509.74) }, { SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 1.00) } }, { { SIMDE_FLOAT64_C( -381.16), SIMDE_FLOAT64_C( -659.69), SIMDE_FLOAT64_C( 397.49), SIMDE_FLOAT64_C( -803.01), SIMDE_FLOAT64_C( -467.01), SIMDE_FLOAT64_C( -777.46), SIMDE_FLOAT64_C( -995.46), SIMDE_FLOAT64_C( -455.46) }, { SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -1.00) } }, { { SIMDE_FLOAT64_C( 412.93), SIMDE_FLOAT64_C( 31.33), SIMDE_FLOAT64_C( 675.90), SIMDE_FLOAT64_C( 842.14), SIMDE_FLOAT64_C( 999.42), SIMDE_FLOAT64_C( -210.59), SIMDE_FLOAT64_C( 469.06), SIMDE_FLOAT64_C( -204.67) }, { SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00) } }, { { SIMDE_FLOAT64_C( 194.13), SIMDE_FLOAT64_C( 752.63), SIMDE_FLOAT64_C( 950.43), SIMDE_FLOAT64_C( 627.80), SIMDE_FLOAT64_C( 3.93), SIMDE_FLOAT64_C( -80.48), SIMDE_FLOAT64_C( -738.99), SIMDE_FLOAT64_C( -708.95) }, { SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -1.00) } }, { { SIMDE_FLOAT64_C( -157.05), SIMDE_FLOAT64_C( 25.54), SIMDE_FLOAT64_C( 20.42), SIMDE_FLOAT64_C( -284.15), SIMDE_FLOAT64_C( -912.24), SIMDE_FLOAT64_C( 761.36), SIMDE_FLOAT64_C( -774.41), SIMDE_FLOAT64_C( -293.40) }, { SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -1.00) } }, { { SIMDE_FLOAT64_C( -898.33), SIMDE_FLOAT64_C( 623.08), SIMDE_FLOAT64_C( -96.41), SIMDE_FLOAT64_C( -365.34), SIMDE_FLOAT64_C( 845.62), SIMDE_FLOAT64_C( -91.87), SIMDE_FLOAT64_C( 179.19), SIMDE_FLOAT64_C( 258.55) }, { SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00) } }, { { SIMDE_FLOAT64_C( 939.45), SIMDE_FLOAT64_C( -144.90), SIMDE_FLOAT64_C( 100.69), SIMDE_FLOAT64_C( 938.87), SIMDE_FLOAT64_C( 644.51), SIMDE_FLOAT64_C( -430.25), SIMDE_FLOAT64_C( -265.80), SIMDE_FLOAT64_C( -161.37) }, { SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -1.00) } }, { { SIMDE_FLOAT64_C( -677.63), SIMDE_FLOAT64_C( -315.37), SIMDE_FLOAT64_C( -533.56), SIMDE_FLOAT64_C( 326.31), SIMDE_FLOAT64_C( 604.15), SIMDE_FLOAT64_C( -272.55), SIMDE_FLOAT64_C( 617.36), SIMDE_FLOAT64_C( -552.90) }, { SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_erf_pd(a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_erf_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( -475.71), SIMDE_FLOAT64_C( -480.68), SIMDE_FLOAT64_C( 251.56), SIMDE_FLOAT64_C( 974.57), SIMDE_FLOAT64_C( -654.33), SIMDE_FLOAT64_C( 974.69), SIMDE_FLOAT64_C( -443.19), SIMDE_FLOAT64_C( 343.95) }, UINT8_C(224), { SIMDE_FLOAT64_C( -493.29), SIMDE_FLOAT64_C( -325.36), SIMDE_FLOAT64_C( -887.40), SIMDE_FLOAT64_C( -727.34), SIMDE_FLOAT64_C( -936.73), SIMDE_FLOAT64_C( 654.69), SIMDE_FLOAT64_C( 988.04), SIMDE_FLOAT64_C( -361.17) }, { SIMDE_FLOAT64_C( -475.71), SIMDE_FLOAT64_C( -480.68), SIMDE_FLOAT64_C( 251.56), SIMDE_FLOAT64_C( 974.57), SIMDE_FLOAT64_C( -654.33), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00) } }, { { SIMDE_FLOAT64_C( 370.27), SIMDE_FLOAT64_C( 594.68), SIMDE_FLOAT64_C( -149.62), SIMDE_FLOAT64_C( -535.38), SIMDE_FLOAT64_C( 277.92), SIMDE_FLOAT64_C( -615.67), SIMDE_FLOAT64_C( -531.54), SIMDE_FLOAT64_C( 583.79) }, UINT8_C(113), { SIMDE_FLOAT64_C( -420.19), SIMDE_FLOAT64_C( -624.33), SIMDE_FLOAT64_C( -915.05), SIMDE_FLOAT64_C( -155.08), SIMDE_FLOAT64_C( 757.99), SIMDE_FLOAT64_C( -390.77), SIMDE_FLOAT64_C( 364.24), SIMDE_FLOAT64_C( 9.55) }, { SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 594.68), SIMDE_FLOAT64_C( -149.62), SIMDE_FLOAT64_C( -535.38), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 583.79) } }, { { SIMDE_FLOAT64_C( -416.20), SIMDE_FLOAT64_C( 709.91), SIMDE_FLOAT64_C( -15.76), SIMDE_FLOAT64_C( 140.62), SIMDE_FLOAT64_C( 53.86), SIMDE_FLOAT64_C( -954.63), SIMDE_FLOAT64_C( 647.32), SIMDE_FLOAT64_C( 728.50) }, UINT8_C(252), { SIMDE_FLOAT64_C( 919.98), SIMDE_FLOAT64_C( 791.78), SIMDE_FLOAT64_C( 812.66), SIMDE_FLOAT64_C( 908.02), SIMDE_FLOAT64_C( -569.39), SIMDE_FLOAT64_C( 182.93), SIMDE_FLOAT64_C( 502.70), SIMDE_FLOAT64_C( 280.99) }, { SIMDE_FLOAT64_C( -416.20), SIMDE_FLOAT64_C( 709.91), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00) } }, { { SIMDE_FLOAT64_C( 647.55), SIMDE_FLOAT64_C( -219.38), SIMDE_FLOAT64_C( 665.32), SIMDE_FLOAT64_C( -883.99), SIMDE_FLOAT64_C( -635.59), SIMDE_FLOAT64_C( -276.35), SIMDE_FLOAT64_C( -304.18), SIMDE_FLOAT64_C( -259.92) }, UINT8_C( 7), { SIMDE_FLOAT64_C( 540.74), SIMDE_FLOAT64_C( -501.92), SIMDE_FLOAT64_C( 417.83), SIMDE_FLOAT64_C( -95.02), SIMDE_FLOAT64_C( 507.63), SIMDE_FLOAT64_C( -998.37), SIMDE_FLOAT64_C( -385.10), SIMDE_FLOAT64_C( -508.13) }, { SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -883.99), SIMDE_FLOAT64_C( -635.59), SIMDE_FLOAT64_C( -276.35), SIMDE_FLOAT64_C( -304.18), SIMDE_FLOAT64_C( -259.92) } }, { { SIMDE_FLOAT64_C( 142.25), SIMDE_FLOAT64_C( 668.76), SIMDE_FLOAT64_C( -462.76), SIMDE_FLOAT64_C( -210.42), SIMDE_FLOAT64_C( 397.27), SIMDE_FLOAT64_C( -304.79), SIMDE_FLOAT64_C( -290.44), SIMDE_FLOAT64_C( 189.04) }, UINT8_C(184), { SIMDE_FLOAT64_C( -382.42), SIMDE_FLOAT64_C( 619.65), SIMDE_FLOAT64_C( 690.79), SIMDE_FLOAT64_C( -879.72), SIMDE_FLOAT64_C( -99.35), SIMDE_FLOAT64_C( 338.34), SIMDE_FLOAT64_C( -99.10), SIMDE_FLOAT64_C( -434.03) }, { SIMDE_FLOAT64_C( 142.25), SIMDE_FLOAT64_C( 668.76), SIMDE_FLOAT64_C( -462.76), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -290.44), SIMDE_FLOAT64_C( -1.00) } }, { { SIMDE_FLOAT64_C( 454.35), SIMDE_FLOAT64_C( 265.31), SIMDE_FLOAT64_C( 289.62), SIMDE_FLOAT64_C( -849.83), SIMDE_FLOAT64_C( -994.61), SIMDE_FLOAT64_C( -901.78), SIMDE_FLOAT64_C( 690.91), SIMDE_FLOAT64_C( -496.53) }, UINT8_C( 88), { SIMDE_FLOAT64_C( -404.11), SIMDE_FLOAT64_C( -988.90), SIMDE_FLOAT64_C( 517.68), SIMDE_FLOAT64_C( 210.79), SIMDE_FLOAT64_C( -497.03), SIMDE_FLOAT64_C( -340.06), SIMDE_FLOAT64_C( -120.45), SIMDE_FLOAT64_C( 40.21) }, { SIMDE_FLOAT64_C( 454.35), SIMDE_FLOAT64_C( 265.31), SIMDE_FLOAT64_C( 289.62), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -901.78), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -496.53) } }, { { SIMDE_FLOAT64_C( 449.51), SIMDE_FLOAT64_C( -723.18), SIMDE_FLOAT64_C( 735.42), SIMDE_FLOAT64_C( -840.92), SIMDE_FLOAT64_C( 465.86), SIMDE_FLOAT64_C( -756.71), SIMDE_FLOAT64_C( -223.34), SIMDE_FLOAT64_C( 85.52) }, UINT8_C(226), { SIMDE_FLOAT64_C( -103.06), SIMDE_FLOAT64_C( 986.16), SIMDE_FLOAT64_C( 272.42), SIMDE_FLOAT64_C( 797.84), SIMDE_FLOAT64_C( -447.86), SIMDE_FLOAT64_C( -273.23), SIMDE_FLOAT64_C( 63.15), SIMDE_FLOAT64_C( 841.76) }, { SIMDE_FLOAT64_C( 449.51), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 735.42), SIMDE_FLOAT64_C( -840.92), SIMDE_FLOAT64_C( 465.86), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00) } }, { { SIMDE_FLOAT64_C( -123.06), SIMDE_FLOAT64_C( 68.54), SIMDE_FLOAT64_C( 939.98), SIMDE_FLOAT64_C( -432.16), SIMDE_FLOAT64_C( 572.01), SIMDE_FLOAT64_C( 456.03), SIMDE_FLOAT64_C( 163.74), SIMDE_FLOAT64_C( 583.10) }, UINT8_C(247), { SIMDE_FLOAT64_C( -625.47), SIMDE_FLOAT64_C( -913.93), SIMDE_FLOAT64_C( 633.64), SIMDE_FLOAT64_C( 254.08), SIMDE_FLOAT64_C( 126.28), SIMDE_FLOAT64_C( 83.16), SIMDE_FLOAT64_C( 530.89), SIMDE_FLOAT64_C( -138.30) }, { SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -432.16), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -1.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_mask_erf_pd(src, test_vec[i].k, a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_erfinv_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( 0.29) }, { SIMDE_FLOAT32_C( -0.80), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 0.26) } }, { { SIMDE_FLOAT32_C( -0.28), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( -0.16), SIMDE_FLOAT32_C( 0.67) }, { SIMDE_FLOAT32_C( -0.25), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( -0.14), SIMDE_FLOAT32_C( 0.69) } }, { { SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 0.25) }, { SIMDE_FLOAT32_C( -0.12), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 0.23) } }, { { SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.13) }, { SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( 0.12) } }, { { SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -0.91), SIMDE_FLOAT32_C( -0.89), SIMDE_FLOAT32_C( 0.35) }, { SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( -1.20), SIMDE_FLOAT32_C( -1.13), SIMDE_FLOAT32_C( 0.32) } }, { { SIMDE_FLOAT32_C( -0.50), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( -0.27) }, { SIMDE_FLOAT32_C( -0.48), SIMDE_FLOAT32_C( -0.50), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( -0.24) } }, { { SIMDE_FLOAT32_C( -0.73), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.47) }, { SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( 1.64), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( -0.44) } }, { { SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( 0.73) }, { SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -0.50), SIMDE_FLOAT32_C( 0.78) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_erfinv_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_erfinv_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( -0.59) }, { SIMDE_FLOAT64_C( 0.09), SIMDE_FLOAT64_C( -0.58) } }, { { SIMDE_FLOAT64_C( 0.15), SIMDE_FLOAT64_C( -0.15) }, { SIMDE_FLOAT64_C( 0.13), SIMDE_FLOAT64_C( -0.13) } }, { { SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( -0.24) }, { SIMDE_FLOAT64_C( 0.72), SIMDE_FLOAT64_C( -0.22) } }, { { SIMDE_FLOAT64_C( -0.02), SIMDE_FLOAT64_C( 0.81) }, { SIMDE_FLOAT64_C( -0.02), SIMDE_FLOAT64_C( 0.93) } }, { { SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( 0.36) }, { SIMDE_FLOAT64_C( 0.39), SIMDE_FLOAT64_C( 0.33) } }, { { SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( 0.21) }, { SIMDE_FLOAT64_C( -0.81), SIMDE_FLOAT64_C( 0.19) } }, { { SIMDE_FLOAT64_C( -0.67), SIMDE_FLOAT64_C( -0.11) }, { SIMDE_FLOAT64_C( -0.69), SIMDE_FLOAT64_C( -0.10) } }, { { SIMDE_FLOAT64_C( -0.54), SIMDE_FLOAT64_C( -0.85) }, { SIMDE_FLOAT64_C( -0.52), SIMDE_FLOAT64_C( -1.02) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d r = simde_mm_erfinv_pd(a); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_erfinv_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( -0.91), SIMDE_FLOAT32_C( -0.63), SIMDE_FLOAT32_C( -0.73), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( 0.30) }, { SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( -1.20), SIMDE_FLOAT32_C( -0.63), SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( 0.27) } }, { { SIMDE_FLOAT32_C( -0.37), SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( -0.83), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.54) }, { SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( 1.16), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( -0.97), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.52) } }, { { SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( -0.16), SIMDE_FLOAT32_C( -0.66), SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( -0.48), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( -0.20) }, { SIMDE_FLOAT32_C( -0.83), SIMDE_FLOAT32_C( -0.14), SIMDE_FLOAT32_C( -0.67), SIMDE_FLOAT32_C( -0.66), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( -0.18) } }, { { SIMDE_FLOAT32_C( -0.90), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( -0.17), SIMDE_FLOAT32_C( -0.22), SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( 0.71) }, { SIMDE_FLOAT32_C( -1.16), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( -0.20), SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( 0.75) } }, { { SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( -0.46), SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( -0.26) }, { SIMDE_FLOAT32_C( -0.12), SIMDE_FLOAT32_C( -0.06), SIMDE_FLOAT32_C( 1.64), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( -1.10), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( -0.16), SIMDE_FLOAT32_C( -0.23) } }, { { SIMDE_FLOAT32_C( -0.56), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( -0.95), SIMDE_FLOAT32_C( -0.89), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( -0.64) }, { SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( 1.39), SIMDE_FLOAT32_C( -1.04), SIMDE_FLOAT32_C( -1.39), SIMDE_FLOAT32_C( -1.13), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( -0.65) } }, { { SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( -0.93), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.16) }, { SIMDE_FLOAT32_C( -0.37), SIMDE_FLOAT32_C( 1.28), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( -1.28), SIMDE_FLOAT32_C( 1.20), SIMDE_FLOAT32_C( 0.14) } }, { { SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( -0.96), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( -0.24), SIMDE_FLOAT32_C( -0.08) }, { SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -1.45), SIMDE_FLOAT32_C( -1.10), SIMDE_FLOAT32_C( -0.22), SIMDE_FLOAT32_C( -0.07) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_erfinv_ps(a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_erfinv_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( 0.53), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( 0.01) }, { SIMDE_FLOAT64_C( 0.41), SIMDE_FLOAT64_C( 0.51), SIMDE_FLOAT64_C( -0.85), SIMDE_FLOAT64_C( 0.01) } }, { { SIMDE_FLOAT64_C( -0.06), SIMDE_FLOAT64_C( 0.07), SIMDE_FLOAT64_C( -0.85), SIMDE_FLOAT64_C( -0.30) }, { SIMDE_FLOAT64_C( -0.05), SIMDE_FLOAT64_C( 0.06), SIMDE_FLOAT64_C( -1.02), SIMDE_FLOAT64_C( -0.27) } }, { { SIMDE_FLOAT64_C( -0.05), SIMDE_FLOAT64_C( 0.50), SIMDE_FLOAT64_C( 0.71), SIMDE_FLOAT64_C( 0.05) }, { SIMDE_FLOAT64_C( -0.04), SIMDE_FLOAT64_C( 0.48), SIMDE_FLOAT64_C( 0.75), SIMDE_FLOAT64_C( 0.04) } }, { { SIMDE_FLOAT64_C( 0.12), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( 0.52), SIMDE_FLOAT64_C( 0.86) }, { SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( -0.85), SIMDE_FLOAT64_C( 0.50), SIMDE_FLOAT64_C( 1.04) } }, { { SIMDE_FLOAT64_C( -0.22), SIMDE_FLOAT64_C( -0.32), SIMDE_FLOAT64_C( -0.94), SIMDE_FLOAT64_C( -0.31) }, { SIMDE_FLOAT64_C( -0.20), SIMDE_FLOAT64_C( -0.29), SIMDE_FLOAT64_C( -1.33), SIMDE_FLOAT64_C( -0.28) } }, { { SIMDE_FLOAT64_C( 0.82), SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( -0.14), SIMDE_FLOAT64_C( 0.26) }, { SIMDE_FLOAT64_C( 0.95), SIMDE_FLOAT64_C( 0.87), SIMDE_FLOAT64_C( -0.12), SIMDE_FLOAT64_C( 0.23) } }, { { SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( -0.32), SIMDE_FLOAT64_C( 0.71), SIMDE_FLOAT64_C( -0.56) }, { SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( -0.29), SIMDE_FLOAT64_C( 0.75), SIMDE_FLOAT64_C( -0.55) } }, { { SIMDE_FLOAT64_C( -0.59), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 0.85) }, { SIMDE_FLOAT64_C( -0.58), SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( 0.21), SIMDE_FLOAT64_C( 1.02) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d r = simde_mm256_erfinv_pd(a); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_erfinv_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( -0.83), SIMDE_FLOAT32_C( -0.66), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( -0.81), SIMDE_FLOAT32_C( -0.62), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( -0.49), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( -0.05) }, { SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( -0.97), SIMDE_FLOAT32_C( -0.67), SIMDE_FLOAT32_C( -1.10), SIMDE_FLOAT32_C( -0.93), SIMDE_FLOAT32_C( -0.62), SIMDE_FLOAT32_C( 1.64), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( -0.47), SIMDE_FLOAT32_C( -0.89), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( -0.04) } }, { { SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( -0.26), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.90), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( -0.56), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( -0.91) }, { SIMDE_FLOAT32_C( -0.50), SIMDE_FLOAT32_C( -1.82), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 1.39), SIMDE_FLOAT32_C( -0.23), SIMDE_FLOAT32_C( 1.02), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -1.16), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( -1.20) } }, { { SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( -0.95), SIMDE_FLOAT32_C( -0.70), SIMDE_FLOAT32_C( -0.14), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.97) }, { SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( -0.56), SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( 1.82), SIMDE_FLOAT32_C( -0.17), SIMDE_FLOAT32_C( 1.02), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( -1.39), SIMDE_FLOAT32_C( -0.73), SIMDE_FLOAT32_C( -0.12), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 1.53) } }, { { SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( -0.62), SIMDE_FLOAT32_C( -0.89), SIMDE_FLOAT32_C( -0.25), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( -0.61), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( -0.70) }, { SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( -1.02), SIMDE_FLOAT32_C( -0.24), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( -1.82), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -0.62), SIMDE_FLOAT32_C( -1.13), SIMDE_FLOAT32_C( -0.23), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( -0.61), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( -0.73) } }, { { SIMDE_FLOAT32_C( -0.81), SIMDE_FLOAT32_C( -0.89), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( -0.81), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( -0.09), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( 0.03) }, { SIMDE_FLOAT32_C( -0.93), SIMDE_FLOAT32_C( -1.13), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( -0.93), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( -1.82), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( -0.83), SIMDE_FLOAT32_C( -1.33), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.03) } }, { { SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( -0.83), SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( -0.06), SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( 0.14) }, { SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( -0.97), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( -0.56), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 1.28), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 1.02), SIMDE_FLOAT32_C( -0.05), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( -1.02), SIMDE_FLOAT32_C( 0.12) } }, { { SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( -0.69), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( -0.28), SIMDE_FLOAT32_C( -0.56), SIMDE_FLOAT32_C( -0.61), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( -0.22), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( -0.85) }, { SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.72), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( -0.89), SIMDE_FLOAT32_C( -0.70), SIMDE_FLOAT32_C( -0.25), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( -0.61), SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 1.33), SIMDE_FLOAT32_C( -0.20), SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( -1.02) } }, { { SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( -0.83), SIMDE_FLOAT32_C( -0.61), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( -0.06), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 0.11) }, { SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( -0.39), SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( -0.97), SIMDE_FLOAT32_C( -0.61), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( -0.05), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 1.10), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 0.10) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_erfinv_ps(a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_erfinv_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( 0.41), SIMDE_FLOAT64_C( 0.45), SIMDE_FLOAT64_C( -0.46), SIMDE_FLOAT64_C( -0.63), SIMDE_FLOAT64_C( 0.54), SIMDE_FLOAT64_C( -0.93), SIMDE_FLOAT64_C( -0.78) }, { SIMDE_FLOAT64_C( -0.13), SIMDE_FLOAT64_C( 0.38), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( -0.43), SIMDE_FLOAT64_C( -0.63), SIMDE_FLOAT64_C( 0.52), SIMDE_FLOAT64_C( -1.28), SIMDE_FLOAT64_C( -0.87) } }, { { SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( -0.28), SIMDE_FLOAT64_C( 0.60), SIMDE_FLOAT64_C( -0.02), SIMDE_FLOAT64_C( -0.14), SIMDE_FLOAT64_C( 0.09), SIMDE_FLOAT64_C( -0.87), SIMDE_FLOAT64_C( 0.63) }, { SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( -0.25), SIMDE_FLOAT64_C( 0.60), SIMDE_FLOAT64_C( -0.02), SIMDE_FLOAT64_C( -0.12), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( -1.07), SIMDE_FLOAT64_C( 0.63) } }, { { SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( -0.49), SIMDE_FLOAT64_C( 0.58), SIMDE_FLOAT64_C( 0.24), SIMDE_FLOAT64_C( 0.35), SIMDE_FLOAT64_C( -0.11), SIMDE_FLOAT64_C( -0.32), SIMDE_FLOAT64_C( -0.23) }, { SIMDE_FLOAT64_C( 0.55), SIMDE_FLOAT64_C( -0.47), SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( 0.22), SIMDE_FLOAT64_C( 0.32), SIMDE_FLOAT64_C( -0.10), SIMDE_FLOAT64_C( -0.29), SIMDE_FLOAT64_C( -0.21) } }, { { SIMDE_FLOAT64_C( -0.23), SIMDE_FLOAT64_C( -0.01), SIMDE_FLOAT64_C( -0.76), SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( 0.91), SIMDE_FLOAT64_C( -0.69), SIMDE_FLOAT64_C( -0.66), SIMDE_FLOAT64_C( -0.24) }, { SIMDE_FLOAT64_C( -0.21), SIMDE_FLOAT64_C( -0.01), SIMDE_FLOAT64_C( -0.83), SIMDE_FLOAT64_C( 0.16), SIMDE_FLOAT64_C( 1.20), SIMDE_FLOAT64_C( -0.72), SIMDE_FLOAT64_C( -0.67), SIMDE_FLOAT64_C( -0.22) } }, { { SIMDE_FLOAT64_C( 0.72), SIMDE_FLOAT64_C( 0.79), SIMDE_FLOAT64_C( 0.30), SIMDE_FLOAT64_C( -0.91), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( 0.37), SIMDE_FLOAT64_C( -0.68), SIMDE_FLOAT64_C( -0.09) }, { SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 0.89), SIMDE_FLOAT64_C( 0.27), SIMDE_FLOAT64_C( -1.20), SIMDE_FLOAT64_C( 0.31), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( -0.70), SIMDE_FLOAT64_C( -0.08) } }, { { SIMDE_FLOAT64_C( -0.91), SIMDE_FLOAT64_C( 0.91), SIMDE_FLOAT64_C( 0.89), SIMDE_FLOAT64_C( -0.05), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( -0.97), SIMDE_FLOAT64_C( -0.41), SIMDE_FLOAT64_C( -0.43) }, { SIMDE_FLOAT64_C( -1.20), SIMDE_FLOAT64_C( 1.20), SIMDE_FLOAT64_C( 1.13), SIMDE_FLOAT64_C( -0.04), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( -1.53), SIMDE_FLOAT64_C( -0.38), SIMDE_FLOAT64_C( -0.40) } }, { { SIMDE_FLOAT64_C( -0.46), SIMDE_FLOAT64_C( -0.84), SIMDE_FLOAT64_C( 0.81), SIMDE_FLOAT64_C( 0.89), SIMDE_FLOAT64_C( 0.06), SIMDE_FLOAT64_C( -0.51), SIMDE_FLOAT64_C( -0.34), SIMDE_FLOAT64_C( 0.82) }, { SIMDE_FLOAT64_C( -0.43), SIMDE_FLOAT64_C( -0.99), SIMDE_FLOAT64_C( 0.93), SIMDE_FLOAT64_C( 1.13), SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( -0.49), SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( 0.95) } }, { { SIMDE_FLOAT64_C( 0.49), SIMDE_FLOAT64_C( -0.10), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( 0.21), SIMDE_FLOAT64_C( 0.35), SIMDE_FLOAT64_C( -0.84), SIMDE_FLOAT64_C( -0.07) }, { SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( -0.09), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( 0.37), SIMDE_FLOAT64_C( 0.19), SIMDE_FLOAT64_C( 0.32), SIMDE_FLOAT64_C( -0.99), SIMDE_FLOAT64_C( -0.06) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_erfinv_pd(a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_erfinv_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[16]; const simde__mmask8 k; const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( -0.66), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.82) }, UINT8_C(161), { SIMDE_FLOAT32_C( -0.93), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( -0.81), SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( -0.50), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( -0.87), SIMDE_FLOAT32_C( -0.83), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( -0.87), SIMDE_FLOAT32_C( -0.03) }, { SIMDE_FLOAT32_C( -1.28), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( -0.66), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( -0.48), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.82) } }, { { SIMDE_FLOAT32_C( -0.87), SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( -0.22), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( -0.39), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 0.19) }, UINT8_C( 98), { SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( -0.50), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( -0.95), SIMDE_FLOAT32_C( -0.37), SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( -0.82), SIMDE_FLOAT32_C( 0.10) }, { SIMDE_FLOAT32_C( -0.87), SIMDE_FLOAT32_C( -1.82), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( -1.04), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( -0.39), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 0.19) } }, { { SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( -0.17), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -0.24), SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( -0.12), SIMDE_FLOAT32_C( -0.05), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( -0.87) }, UINT8_C( 32), { SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( -0.59), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( -0.81), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( -0.97), SIMDE_FLOAT32_C( -0.57) }, { SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( -0.17), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -0.24), SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( -0.12), SIMDE_FLOAT32_C( -0.05), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( -0.87) } }, { { SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( -0.93), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( -0.93), SIMDE_FLOAT32_C( -0.09), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( 0.89) }, UINT8_C(177), { SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( -0.29), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( -0.16), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( -0.81) }, { SIMDE_FLOAT32_C( 1.28), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( -0.93), SIMDE_FLOAT32_C( -0.09), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( 0.89) } }, { { SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( -0.95), SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( -0.16), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( -0.59), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( -0.87), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( 0.63) }, UINT8_C( 55), { SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( -0.25), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( -0.73), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( -0.59), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 0.18) }, { SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 1.45), SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( -0.23), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( -0.59), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( -0.87), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( 0.63) } }, { { SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( -0.89), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( -0.58) }, UINT8_C( 30), { SIMDE_FLOAT32_C( -0.06), SIMDE_FLOAT32_C( -0.26), SIMDE_FLOAT32_C( -0.16), SIMDE_FLOAT32_C( -0.51), SIMDE_FLOAT32_C( -0.51), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( -0.23), SIMDE_FLOAT32_C( -0.06), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.03) }, { SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( -0.23), SIMDE_FLOAT32_C( -0.14), SIMDE_FLOAT32_C( -0.49), SIMDE_FLOAT32_C( -0.49), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( -0.58) } }, { { SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( -0.73), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( -0.82), SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( -0.73), SIMDE_FLOAT32_C( 0.84) }, UINT8_C( 89), { SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( -0.46), SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( -0.90), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( -0.67), SIMDE_FLOAT32_C( 0.53) }, { SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( -0.87), SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( -0.82), SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( -0.73), SIMDE_FLOAT32_C( 0.84) } }, { { SIMDE_FLOAT32_C( -0.72), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( -0.24), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( -0.72), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( -0.63), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( -0.50), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.84) }, UINT8_C(239), { SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -0.71), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( -0.26), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( -0.59), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( -0.83), SIMDE_FLOAT32_C( 0.05) }, { SIMDE_FLOAT32_C( -0.80), SIMDE_FLOAT32_C( -1.33), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 1.13), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( -0.63), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( -0.50), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.84) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_mask_erfinv_ps(src, test_vec[i].k, a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_erfinv_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( -0.86), SIMDE_FLOAT64_C( 0.14), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 0.87), SIMDE_FLOAT64_C( 0.31), SIMDE_FLOAT64_C( 0.95), SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( 0.70) }, UINT8_C(108), { SIMDE_FLOAT64_C( 0.53), SIMDE_FLOAT64_C( -0.20), SIMDE_FLOAT64_C( -0.78), SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( 0.25), SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( -0.50) }, { SIMDE_FLOAT64_C( -0.86), SIMDE_FLOAT64_C( 0.14), SIMDE_FLOAT64_C( -0.87), SIMDE_FLOAT64_C( 0.72), SIMDE_FLOAT64_C( 0.31), SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 0.31), SIMDE_FLOAT64_C( 0.70) } }, { { SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( -0.89), SIMDE_FLOAT64_C( 0.54), SIMDE_FLOAT64_C( 0.35), SIMDE_FLOAT64_C( -0.47), SIMDE_FLOAT64_C( -0.10), SIMDE_FLOAT64_C( -0.79), SIMDE_FLOAT64_C( -0.77) }, UINT8_C(112), { SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( 0.43), SIMDE_FLOAT64_C( 0.91), SIMDE_FLOAT64_C( 0.54), SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( -0.95), SIMDE_FLOAT64_C( -0.32), SIMDE_FLOAT64_C( -0.01) }, { SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( -0.89), SIMDE_FLOAT64_C( 0.54), SIMDE_FLOAT64_C( 0.35), SIMDE_FLOAT64_C( 0.21), SIMDE_FLOAT64_C( -1.39), SIMDE_FLOAT64_C( -0.29), SIMDE_FLOAT64_C( -0.77) } }, { { SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( -0.06), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( -0.35), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -0.51) }, UINT8_C(248), { SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( 0.74), SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( -0.96), SIMDE_FLOAT64_C( -0.76), SIMDE_FLOAT64_C( -0.32), SIMDE_FLOAT64_C( -0.85), SIMDE_FLOAT64_C( 0.79) }, { SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( -0.06), SIMDE_FLOAT64_C( -1.45), SIMDE_FLOAT64_C( -0.83), SIMDE_FLOAT64_C( -0.29), SIMDE_FLOAT64_C( -1.02), SIMDE_FLOAT64_C( 0.89) } }, { { SIMDE_FLOAT64_C( -0.97), SIMDE_FLOAT64_C( -0.32), SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( -0.76), SIMDE_FLOAT64_C( -0.09), SIMDE_FLOAT64_C( -0.63), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( -0.66) }, UINT8_C( 18), { SIMDE_FLOAT64_C( -0.04), SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( -0.67), SIMDE_FLOAT64_C( 0.64), SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( 0.63), SIMDE_FLOAT64_C( 0.50) }, { SIMDE_FLOAT64_C( -0.97), SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( -0.76), SIMDE_FLOAT64_C( -0.41), SIMDE_FLOAT64_C( -0.63), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( -0.66) } }, { { SIMDE_FLOAT64_C( 0.72), SIMDE_FLOAT64_C( 0.32), SIMDE_FLOAT64_C( -0.85), SIMDE_FLOAT64_C( -0.28), SIMDE_FLOAT64_C( 0.81), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( 0.55) }, UINT8_C( 45), { SIMDE_FLOAT64_C( 0.46), SIMDE_FLOAT64_C( 0.80), SIMDE_FLOAT64_C( 0.28), SIMDE_FLOAT64_C( 0.60), SIMDE_FLOAT64_C( 0.58), SIMDE_FLOAT64_C( 0.31), SIMDE_FLOAT64_C( -0.72), SIMDE_FLOAT64_C( -0.73) }, { SIMDE_FLOAT64_C( 0.43), SIMDE_FLOAT64_C( 0.32), SIMDE_FLOAT64_C( 0.25), SIMDE_FLOAT64_C( 0.60), SIMDE_FLOAT64_C( 0.81), SIMDE_FLOAT64_C( 0.28), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( 0.55) } }, { { SIMDE_FLOAT64_C( 0.55), SIMDE_FLOAT64_C( 0.19), SIMDE_FLOAT64_C( -0.36), SIMDE_FLOAT64_C( -0.03), SIMDE_FLOAT64_C( 0.54), SIMDE_FLOAT64_C( -0.08), SIMDE_FLOAT64_C( 0.93), SIMDE_FLOAT64_C( 0.11) }, UINT8_C( 61), { SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 0.50), SIMDE_FLOAT64_C( 0.20), SIMDE_FLOAT64_C( 0.16), SIMDE_FLOAT64_C( 0.22), SIMDE_FLOAT64_C( -0.49), SIMDE_FLOAT64_C( 0.31) }, { SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( 0.19), SIMDE_FLOAT64_C( 0.48), SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( 0.14), SIMDE_FLOAT64_C( 0.20), SIMDE_FLOAT64_C( 0.93), SIMDE_FLOAT64_C( 0.11) } }, { { SIMDE_FLOAT64_C( 0.95), SIMDE_FLOAT64_C( -0.67), SIMDE_FLOAT64_C( -0.66), SIMDE_FLOAT64_C( 0.37), SIMDE_FLOAT64_C( 0.88), SIMDE_FLOAT64_C( -0.06), SIMDE_FLOAT64_C( -0.17), SIMDE_FLOAT64_C( 0.67) }, UINT8_C(215), { SIMDE_FLOAT64_C( -0.57), SIMDE_FLOAT64_C( 0.26), SIMDE_FLOAT64_C( 0.53), SIMDE_FLOAT64_C( -0.29), SIMDE_FLOAT64_C( 0.53), SIMDE_FLOAT64_C( 0.09), SIMDE_FLOAT64_C( 0.91), SIMDE_FLOAT64_C( -0.83) }, { SIMDE_FLOAT64_C( -0.56), SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 0.51), SIMDE_FLOAT64_C( 0.37), SIMDE_FLOAT64_C( 0.51), SIMDE_FLOAT64_C( -0.06), SIMDE_FLOAT64_C( 1.20), SIMDE_FLOAT64_C( -0.97) } }, { { SIMDE_FLOAT64_C( -0.94), SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( -0.65), SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( -0.79) }, UINT8_C(161), { SIMDE_FLOAT64_C( -0.24), SIMDE_FLOAT64_C( 0.38), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.27), SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.60), SIMDE_FLOAT64_C( 0.03) }, { SIMDE_FLOAT64_C( -0.22), SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( 0.03) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_mask_erfinv_pd(src, test_vec[i].k, a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_erfc_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -315.30), SIMDE_FLOAT32_C( -413.87), SIMDE_FLOAT32_C( -345.31), SIMDE_FLOAT32_C( -228.93) }, { SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00) } }, { { SIMDE_FLOAT32_C( 600.65), SIMDE_FLOAT32_C( -112.11), SIMDE_FLOAT32_C( -98.86), SIMDE_FLOAT32_C( 20.55) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( -949.84), SIMDE_FLOAT32_C( -802.03), SIMDE_FLOAT32_C( 212.71), SIMDE_FLOAT32_C( -757.84) }, { SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00) } }, { { SIMDE_FLOAT32_C( -651.52), SIMDE_FLOAT32_C( -363.93), SIMDE_FLOAT32_C( 876.28), SIMDE_FLOAT32_C( -203.61) }, { SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00) } }, { { SIMDE_FLOAT32_C( 527.04), SIMDE_FLOAT32_C( 57.60), SIMDE_FLOAT32_C( -839.49), SIMDE_FLOAT32_C( 826.28) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( 974.10), SIMDE_FLOAT32_C( 325.71), SIMDE_FLOAT32_C( -535.87), SIMDE_FLOAT32_C( 230.83) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( 348.57), SIMDE_FLOAT32_C( 534.66), SIMDE_FLOAT32_C( 231.47), SIMDE_FLOAT32_C( 673.78) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( 954.08), SIMDE_FLOAT32_C( 495.36), SIMDE_FLOAT32_C( 387.10), SIMDE_FLOAT32_C( -361.22) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_erfc_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_erfc_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -645.17), SIMDE_FLOAT64_C( 211.72) }, { SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( 715.58), SIMDE_FLOAT64_C( 471.86) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( 209.41), SIMDE_FLOAT64_C( -887.34) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 2.00) } }, { { SIMDE_FLOAT64_C( -326.89), SIMDE_FLOAT64_C( 772.60) }, { SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( 574.21), SIMDE_FLOAT64_C( 504.70) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( -447.93), SIMDE_FLOAT64_C( -208.36) }, { SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00) } }, { { SIMDE_FLOAT64_C( 404.62), SIMDE_FLOAT64_C( -998.91) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 2.00) } }, { { SIMDE_FLOAT64_C( -193.72), SIMDE_FLOAT64_C( 660.84) }, { SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 0.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d r = simde_mm_erfc_pd(a); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_erfc_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 496.19), SIMDE_FLOAT32_C( -675.69), SIMDE_FLOAT32_C( -153.22), SIMDE_FLOAT32_C( -88.71), SIMDE_FLOAT32_C( 381.12), SIMDE_FLOAT32_C( -119.60), SIMDE_FLOAT32_C( 255.09), SIMDE_FLOAT32_C( -509.70) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00) } }, { { SIMDE_FLOAT32_C( -168.05), SIMDE_FLOAT32_C( -24.56), SIMDE_FLOAT32_C( -778.51), SIMDE_FLOAT32_C( 349.90), SIMDE_FLOAT32_C( 925.97), SIMDE_FLOAT32_C( 439.36), SIMDE_FLOAT32_C( -180.81), SIMDE_FLOAT32_C( 678.48) }, { SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( -580.27), SIMDE_FLOAT32_C( -258.04), SIMDE_FLOAT32_C( -62.98), SIMDE_FLOAT32_C( -953.83), SIMDE_FLOAT32_C( 354.49), SIMDE_FLOAT32_C( 914.71), SIMDE_FLOAT32_C( -173.05), SIMDE_FLOAT32_C( -256.98) }, { SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00) } }, { { SIMDE_FLOAT32_C( 277.83), SIMDE_FLOAT32_C( 49.94), SIMDE_FLOAT32_C( -710.16), SIMDE_FLOAT32_C( 556.77), SIMDE_FLOAT32_C( -300.30), SIMDE_FLOAT32_C( 375.96), SIMDE_FLOAT32_C( 468.75), SIMDE_FLOAT32_C( -804.12) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00) } }, { { SIMDE_FLOAT32_C( 700.27), SIMDE_FLOAT32_C( -684.46), SIMDE_FLOAT32_C( 107.18), SIMDE_FLOAT32_C( 81.39), SIMDE_FLOAT32_C( 195.94), SIMDE_FLOAT32_C( -637.73), SIMDE_FLOAT32_C( 571.69), SIMDE_FLOAT32_C( -972.11) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00) } }, { { SIMDE_FLOAT32_C( 337.71), SIMDE_FLOAT32_C( 793.18), SIMDE_FLOAT32_C( 377.79), SIMDE_FLOAT32_C( 263.68), SIMDE_FLOAT32_C( 232.54), SIMDE_FLOAT32_C( -803.02), SIMDE_FLOAT32_C( -57.84), SIMDE_FLOAT32_C( 652.27) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( -61.06), SIMDE_FLOAT32_C( 879.18), SIMDE_FLOAT32_C( 698.44), SIMDE_FLOAT32_C( -706.57), SIMDE_FLOAT32_C( 793.88), SIMDE_FLOAT32_C( -474.61), SIMDE_FLOAT32_C( 36.44), SIMDE_FLOAT32_C( 71.71) }, { SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( 575.33), SIMDE_FLOAT32_C( 326.28), SIMDE_FLOAT32_C( -371.52), SIMDE_FLOAT32_C( -724.97), SIMDE_FLOAT32_C( -297.76), SIMDE_FLOAT32_C( -902.77), SIMDE_FLOAT32_C( -529.09), SIMDE_FLOAT32_C( -597.49) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_erfc_ps(a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_erfc_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( 461.51), SIMDE_FLOAT64_C( -571.50), SIMDE_FLOAT64_C( 241.15), SIMDE_FLOAT64_C( 521.48) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( -695.16), SIMDE_FLOAT64_C( -842.41), SIMDE_FLOAT64_C( 799.26), SIMDE_FLOAT64_C( 685.42) }, { SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( 3.40), SIMDE_FLOAT64_C( -776.18), SIMDE_FLOAT64_C( -325.62), SIMDE_FLOAT64_C( 7.02) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( 948.46), SIMDE_FLOAT64_C( 348.12), SIMDE_FLOAT64_C( 741.43), SIMDE_FLOAT64_C( -182.81) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 2.00) } }, { { SIMDE_FLOAT64_C( 319.42), SIMDE_FLOAT64_C( 46.64), SIMDE_FLOAT64_C( 792.19), SIMDE_FLOAT64_C( -94.82) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 2.00) } }, { { SIMDE_FLOAT64_C( -364.65), SIMDE_FLOAT64_C( -718.98), SIMDE_FLOAT64_C( 201.33), SIMDE_FLOAT64_C( 634.78) }, { SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( 348.43), SIMDE_FLOAT64_C( 374.84), SIMDE_FLOAT64_C( -48.84), SIMDE_FLOAT64_C( -910.34) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00) } }, { { SIMDE_FLOAT64_C( -513.67), SIMDE_FLOAT64_C( -235.62), SIMDE_FLOAT64_C( -80.01), SIMDE_FLOAT64_C( 947.84) }, { SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 0.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d r = simde_mm256_erfc_pd(a); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_erfc_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 430.03), SIMDE_FLOAT32_C( -494.11), SIMDE_FLOAT32_C( -522.83), SIMDE_FLOAT32_C( -160.68), SIMDE_FLOAT32_C( -217.51), SIMDE_FLOAT32_C( 364.22), SIMDE_FLOAT32_C( -906.03), SIMDE_FLOAT32_C( 335.92), SIMDE_FLOAT32_C( -779.46), SIMDE_FLOAT32_C( -248.95), SIMDE_FLOAT32_C( -22.71), SIMDE_FLOAT32_C( -802.66), SIMDE_FLOAT32_C( -495.02), SIMDE_FLOAT32_C( -618.65), SIMDE_FLOAT32_C( -592.74), SIMDE_FLOAT32_C( 774.33) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( -819.68), SIMDE_FLOAT32_C( -841.87), SIMDE_FLOAT32_C( 969.10), SIMDE_FLOAT32_C( -855.15), SIMDE_FLOAT32_C( -473.12), SIMDE_FLOAT32_C( 203.71), SIMDE_FLOAT32_C( -640.23), SIMDE_FLOAT32_C( -593.80), SIMDE_FLOAT32_C( -307.51), SIMDE_FLOAT32_C( 246.67), SIMDE_FLOAT32_C( -893.51), SIMDE_FLOAT32_C( 533.63), SIMDE_FLOAT32_C( 217.68), SIMDE_FLOAT32_C( 100.04), SIMDE_FLOAT32_C( 228.82), SIMDE_FLOAT32_C( -352.29) }, { SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00) } }, { { SIMDE_FLOAT32_C( 605.93), SIMDE_FLOAT32_C( 705.99), SIMDE_FLOAT32_C( 487.03), SIMDE_FLOAT32_C( -611.58), SIMDE_FLOAT32_C( 70.21), SIMDE_FLOAT32_C( 581.00), SIMDE_FLOAT32_C( 724.34), SIMDE_FLOAT32_C( 290.75), SIMDE_FLOAT32_C( -667.95), SIMDE_FLOAT32_C( -298.37), SIMDE_FLOAT32_C( 488.09), SIMDE_FLOAT32_C( -162.97), SIMDE_FLOAT32_C( 82.98), SIMDE_FLOAT32_C( 895.36), SIMDE_FLOAT32_C( -388.63), SIMDE_FLOAT32_C( 263.30) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( -946.51), SIMDE_FLOAT32_C( -419.53), SIMDE_FLOAT32_C( 408.15), SIMDE_FLOAT32_C( -419.64), SIMDE_FLOAT32_C( 784.18), SIMDE_FLOAT32_C( 767.92), SIMDE_FLOAT32_C( -13.43), SIMDE_FLOAT32_C( -523.33), SIMDE_FLOAT32_C( 14.59), SIMDE_FLOAT32_C( 93.06), SIMDE_FLOAT32_C( -989.70), SIMDE_FLOAT32_C( -767.74), SIMDE_FLOAT32_C( -806.91), SIMDE_FLOAT32_C( 239.11), SIMDE_FLOAT32_C( -120.03), SIMDE_FLOAT32_C( 799.02) }, { SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( -54.90), SIMDE_FLOAT32_C( -633.00), SIMDE_FLOAT32_C( -812.56), SIMDE_FLOAT32_C( -984.69), SIMDE_FLOAT32_C( 948.00), SIMDE_FLOAT32_C( 911.78), SIMDE_FLOAT32_C( 306.06), SIMDE_FLOAT32_C( -719.95), SIMDE_FLOAT32_C( -386.59), SIMDE_FLOAT32_C( -205.84), SIMDE_FLOAT32_C( 117.08), SIMDE_FLOAT32_C( 696.39), SIMDE_FLOAT32_C( -310.49), SIMDE_FLOAT32_C( 728.45), SIMDE_FLOAT32_C( -40.32), SIMDE_FLOAT32_C( -257.00) }, { SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00) } }, { { SIMDE_FLOAT32_C( -691.08), SIMDE_FLOAT32_C( -632.17), SIMDE_FLOAT32_C( 323.36), SIMDE_FLOAT32_C( -906.91), SIMDE_FLOAT32_C( -864.25), SIMDE_FLOAT32_C( -690.07), SIMDE_FLOAT32_C( -430.23), SIMDE_FLOAT32_C( 150.34), SIMDE_FLOAT32_C( 402.99), SIMDE_FLOAT32_C( -419.93), SIMDE_FLOAT32_C( 382.60), SIMDE_FLOAT32_C( 596.09), SIMDE_FLOAT32_C( 819.18), SIMDE_FLOAT32_C( -737.43), SIMDE_FLOAT32_C( 395.11), SIMDE_FLOAT32_C( -235.72) }, { SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00) } }, { { SIMDE_FLOAT32_C( -370.43), SIMDE_FLOAT32_C( 582.55), SIMDE_FLOAT32_C( -220.40), SIMDE_FLOAT32_C( -422.43), SIMDE_FLOAT32_C( 494.33), SIMDE_FLOAT32_C( -914.34), SIMDE_FLOAT32_C( -142.39), SIMDE_FLOAT32_C( -892.26), SIMDE_FLOAT32_C( -120.19), SIMDE_FLOAT32_C( 974.69), SIMDE_FLOAT32_C( 804.12), SIMDE_FLOAT32_C( 569.33), SIMDE_FLOAT32_C( 703.14), SIMDE_FLOAT32_C( -236.19), SIMDE_FLOAT32_C( -687.67), SIMDE_FLOAT32_C( -987.95) }, { SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00) } }, { { SIMDE_FLOAT32_C( 131.64), SIMDE_FLOAT32_C( 635.69), SIMDE_FLOAT32_C( -894.85), SIMDE_FLOAT32_C( 267.39), SIMDE_FLOAT32_C( 945.62), SIMDE_FLOAT32_C( -325.08), SIMDE_FLOAT32_C( -582.27), SIMDE_FLOAT32_C( 348.62), SIMDE_FLOAT32_C( 254.98), SIMDE_FLOAT32_C( 800.33), SIMDE_FLOAT32_C( -55.30), SIMDE_FLOAT32_C( 74.16), SIMDE_FLOAT32_C( -937.10), SIMDE_FLOAT32_C( -660.19), SIMDE_FLOAT32_C( 838.44), SIMDE_FLOAT32_C( -307.53) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_erfc_ps(a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_erfc_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[16]; const simde__mmask8 k; const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 956.61), SIMDE_FLOAT32_C( 234.13), SIMDE_FLOAT32_C( 892.38), SIMDE_FLOAT32_C( 414.62), SIMDE_FLOAT32_C( -352.76), SIMDE_FLOAT32_C( 66.22), SIMDE_FLOAT32_C( -611.87), SIMDE_FLOAT32_C( 409.12), SIMDE_FLOAT32_C( -59.49), SIMDE_FLOAT32_C( 561.33), SIMDE_FLOAT32_C( -922.08), SIMDE_FLOAT32_C( 538.83), SIMDE_FLOAT32_C( -425.54), SIMDE_FLOAT32_C( -342.56), SIMDE_FLOAT32_C( -597.87), SIMDE_FLOAT32_C( 992.17) }, UINT8_C(125), { SIMDE_FLOAT32_C( 513.40), SIMDE_FLOAT32_C( -248.97), SIMDE_FLOAT32_C( -181.44), SIMDE_FLOAT32_C( 317.13), SIMDE_FLOAT32_C( 267.53), SIMDE_FLOAT32_C( 935.63), SIMDE_FLOAT32_C( 584.65), SIMDE_FLOAT32_C( 221.64), SIMDE_FLOAT32_C( -188.28), SIMDE_FLOAT32_C( 142.72), SIMDE_FLOAT32_C( 400.07), SIMDE_FLOAT32_C( 778.58), SIMDE_FLOAT32_C( 216.90), SIMDE_FLOAT32_C( 410.27), SIMDE_FLOAT32_C( 735.18), SIMDE_FLOAT32_C( -548.98) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 234.13), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 409.12), SIMDE_FLOAT32_C( -59.49), SIMDE_FLOAT32_C( 561.33), SIMDE_FLOAT32_C( -922.08), SIMDE_FLOAT32_C( 538.83), SIMDE_FLOAT32_C( -425.54), SIMDE_FLOAT32_C( -342.56), SIMDE_FLOAT32_C( -597.87), SIMDE_FLOAT32_C( 992.17) } }, { { SIMDE_FLOAT32_C( 302.65), SIMDE_FLOAT32_C( 149.80), SIMDE_FLOAT32_C( 98.26), SIMDE_FLOAT32_C( -631.12), SIMDE_FLOAT32_C( 537.93), SIMDE_FLOAT32_C( -492.62), SIMDE_FLOAT32_C( 309.39), SIMDE_FLOAT32_C( 99.26), SIMDE_FLOAT32_C( -414.70), SIMDE_FLOAT32_C( -151.78), SIMDE_FLOAT32_C( 673.72), SIMDE_FLOAT32_C( 242.74), SIMDE_FLOAT32_C( 250.35), SIMDE_FLOAT32_C( 665.88), SIMDE_FLOAT32_C( 646.74), SIMDE_FLOAT32_C( -236.25) }, UINT8_C(226), { SIMDE_FLOAT32_C( -534.70), SIMDE_FLOAT32_C( -919.12), SIMDE_FLOAT32_C( 684.44), SIMDE_FLOAT32_C( -599.07), SIMDE_FLOAT32_C( 665.53), SIMDE_FLOAT32_C( -93.93), SIMDE_FLOAT32_C( 212.65), SIMDE_FLOAT32_C( -191.74), SIMDE_FLOAT32_C( -693.86), SIMDE_FLOAT32_C( -8.77), SIMDE_FLOAT32_C( -974.85), SIMDE_FLOAT32_C( 716.41), SIMDE_FLOAT32_C( -273.59), SIMDE_FLOAT32_C( -523.82), SIMDE_FLOAT32_C( 19.06), SIMDE_FLOAT32_C( 876.21) }, { SIMDE_FLOAT32_C( 302.65), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 98.26), SIMDE_FLOAT32_C( -631.12), SIMDE_FLOAT32_C( 537.93), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( -414.70), SIMDE_FLOAT32_C( -151.78), SIMDE_FLOAT32_C( 673.72), SIMDE_FLOAT32_C( 242.74), SIMDE_FLOAT32_C( 250.35), SIMDE_FLOAT32_C( 665.88), SIMDE_FLOAT32_C( 646.74), SIMDE_FLOAT32_C( -236.25) } }, { { SIMDE_FLOAT32_C( 574.44), SIMDE_FLOAT32_C( 387.93), SIMDE_FLOAT32_C( 414.13), SIMDE_FLOAT32_C( -918.18), SIMDE_FLOAT32_C( -302.68), SIMDE_FLOAT32_C( -486.61), SIMDE_FLOAT32_C( -332.89), SIMDE_FLOAT32_C( 545.53), SIMDE_FLOAT32_C( -812.89), SIMDE_FLOAT32_C( 909.85), SIMDE_FLOAT32_C( -204.12), SIMDE_FLOAT32_C( 852.99), SIMDE_FLOAT32_C( 556.59), SIMDE_FLOAT32_C( 559.63), SIMDE_FLOAT32_C( -730.10), SIMDE_FLOAT32_C( -978.11) }, UINT8_C( 0), { SIMDE_FLOAT32_C( 954.34), SIMDE_FLOAT32_C( -577.18), SIMDE_FLOAT32_C( 306.05), SIMDE_FLOAT32_C( -139.59), SIMDE_FLOAT32_C( 635.48), SIMDE_FLOAT32_C( -885.69), SIMDE_FLOAT32_C( 166.55), SIMDE_FLOAT32_C( -373.29), SIMDE_FLOAT32_C( -860.54), SIMDE_FLOAT32_C( -117.04), SIMDE_FLOAT32_C( 353.12), SIMDE_FLOAT32_C( -384.37), SIMDE_FLOAT32_C( 902.02), SIMDE_FLOAT32_C( 229.33), SIMDE_FLOAT32_C( -809.93), SIMDE_FLOAT32_C( 289.95) }, { SIMDE_FLOAT32_C( 574.44), SIMDE_FLOAT32_C( 387.93), SIMDE_FLOAT32_C( 414.13), SIMDE_FLOAT32_C( -918.18), SIMDE_FLOAT32_C( -302.68), SIMDE_FLOAT32_C( -486.61), SIMDE_FLOAT32_C( -332.89), SIMDE_FLOAT32_C( 545.53), SIMDE_FLOAT32_C( -812.89), SIMDE_FLOAT32_C( 909.85), SIMDE_FLOAT32_C( -204.12), SIMDE_FLOAT32_C( 852.99), SIMDE_FLOAT32_C( 556.59), SIMDE_FLOAT32_C( 559.63), SIMDE_FLOAT32_C( -730.10), SIMDE_FLOAT32_C( -978.11) } }, { { SIMDE_FLOAT32_C( -356.54), SIMDE_FLOAT32_C( -728.11), SIMDE_FLOAT32_C( 987.27), SIMDE_FLOAT32_C( 156.85), SIMDE_FLOAT32_C( -61.00), SIMDE_FLOAT32_C( 532.80), SIMDE_FLOAT32_C( 343.96), SIMDE_FLOAT32_C( -151.15), SIMDE_FLOAT32_C( -671.32), SIMDE_FLOAT32_C( 196.95), SIMDE_FLOAT32_C( -594.56), SIMDE_FLOAT32_C( 888.32), SIMDE_FLOAT32_C( 466.85), SIMDE_FLOAT32_C( -572.66), SIMDE_FLOAT32_C( 528.83), SIMDE_FLOAT32_C( 421.19) }, UINT8_C(129), { SIMDE_FLOAT32_C( -165.12), SIMDE_FLOAT32_C( -718.39), SIMDE_FLOAT32_C( -514.36), SIMDE_FLOAT32_C( -50.81), SIMDE_FLOAT32_C( 448.16), SIMDE_FLOAT32_C( 112.35), SIMDE_FLOAT32_C( 88.64), SIMDE_FLOAT32_C( -668.88), SIMDE_FLOAT32_C( -534.54), SIMDE_FLOAT32_C( 704.28), SIMDE_FLOAT32_C( -766.86), SIMDE_FLOAT32_C( 694.79), SIMDE_FLOAT32_C( 894.35), SIMDE_FLOAT32_C( 523.08), SIMDE_FLOAT32_C( -661.75), SIMDE_FLOAT32_C( -833.77) }, { SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( -728.11), SIMDE_FLOAT32_C( 987.27), SIMDE_FLOAT32_C( 156.85), SIMDE_FLOAT32_C( -61.00), SIMDE_FLOAT32_C( 532.80), SIMDE_FLOAT32_C( 343.96), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( -671.32), SIMDE_FLOAT32_C( 196.95), SIMDE_FLOAT32_C( -594.56), SIMDE_FLOAT32_C( 888.32), SIMDE_FLOAT32_C( 466.85), SIMDE_FLOAT32_C( -572.66), SIMDE_FLOAT32_C( 528.83), SIMDE_FLOAT32_C( 421.19) } }, { { SIMDE_FLOAT32_C( 510.35), SIMDE_FLOAT32_C( 495.10), SIMDE_FLOAT32_C( 105.23), SIMDE_FLOAT32_C( 43.15), SIMDE_FLOAT32_C( -160.94), SIMDE_FLOAT32_C( 954.08), SIMDE_FLOAT32_C( 371.83), SIMDE_FLOAT32_C( -963.98), SIMDE_FLOAT32_C( -640.48), SIMDE_FLOAT32_C( 260.15), SIMDE_FLOAT32_C( 502.87), SIMDE_FLOAT32_C( -213.14), SIMDE_FLOAT32_C( -211.02), SIMDE_FLOAT32_C( -75.94), SIMDE_FLOAT32_C( 637.02), SIMDE_FLOAT32_C( 623.86) }, UINT8_C( 36), { SIMDE_FLOAT32_C( -877.34), SIMDE_FLOAT32_C( -426.95), SIMDE_FLOAT32_C( -346.17), SIMDE_FLOAT32_C( 235.01), SIMDE_FLOAT32_C( 661.70), SIMDE_FLOAT32_C( -15.05), SIMDE_FLOAT32_C( 700.47), SIMDE_FLOAT32_C( 365.98), SIMDE_FLOAT32_C( 218.09), SIMDE_FLOAT32_C( 395.26), SIMDE_FLOAT32_C( 260.32), SIMDE_FLOAT32_C( -258.83), SIMDE_FLOAT32_C( 733.51), SIMDE_FLOAT32_C( 426.55), SIMDE_FLOAT32_C( -748.48), SIMDE_FLOAT32_C( 228.61) }, { SIMDE_FLOAT32_C( 510.35), SIMDE_FLOAT32_C( 495.10), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 43.15), SIMDE_FLOAT32_C( -160.94), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 371.83), SIMDE_FLOAT32_C( -963.98), SIMDE_FLOAT32_C( -640.48), SIMDE_FLOAT32_C( 260.15), SIMDE_FLOAT32_C( 502.87), SIMDE_FLOAT32_C( -213.14), SIMDE_FLOAT32_C( -211.02), SIMDE_FLOAT32_C( -75.94), SIMDE_FLOAT32_C( 637.02), SIMDE_FLOAT32_C( 623.86) } }, { { SIMDE_FLOAT32_C( -468.22), SIMDE_FLOAT32_C( 294.67), SIMDE_FLOAT32_C( -932.33), SIMDE_FLOAT32_C( -514.14), SIMDE_FLOAT32_C( -333.50), SIMDE_FLOAT32_C( -896.31), SIMDE_FLOAT32_C( -154.62), SIMDE_FLOAT32_C( 926.65), SIMDE_FLOAT32_C( 606.56), SIMDE_FLOAT32_C( 632.24), SIMDE_FLOAT32_C( -284.37), SIMDE_FLOAT32_C( -469.38), SIMDE_FLOAT32_C( 269.27), SIMDE_FLOAT32_C( -660.50), SIMDE_FLOAT32_C( 736.29), SIMDE_FLOAT32_C( 391.93) }, UINT8_C(251), { SIMDE_FLOAT32_C( -609.88), SIMDE_FLOAT32_C( -373.06), SIMDE_FLOAT32_C( -425.75), SIMDE_FLOAT32_C( 375.07), SIMDE_FLOAT32_C( -672.58), SIMDE_FLOAT32_C( 940.22), SIMDE_FLOAT32_C( -406.85), SIMDE_FLOAT32_C( 722.68), SIMDE_FLOAT32_C( 200.54), SIMDE_FLOAT32_C( 334.32), SIMDE_FLOAT32_C( 456.19), SIMDE_FLOAT32_C( -372.90), SIMDE_FLOAT32_C( 585.84), SIMDE_FLOAT32_C( -315.20), SIMDE_FLOAT32_C( 158.88), SIMDE_FLOAT32_C( -119.49) }, { SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( -932.33), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 606.56), SIMDE_FLOAT32_C( 632.24), SIMDE_FLOAT32_C( -284.37), SIMDE_FLOAT32_C( -469.38), SIMDE_FLOAT32_C( 269.27), SIMDE_FLOAT32_C( -660.50), SIMDE_FLOAT32_C( 736.29), SIMDE_FLOAT32_C( 391.93) } }, { { SIMDE_FLOAT32_C( -247.53), SIMDE_FLOAT32_C( 644.74), SIMDE_FLOAT32_C( 547.01), SIMDE_FLOAT32_C( -143.84), SIMDE_FLOAT32_C( -509.87), SIMDE_FLOAT32_C( 473.66), SIMDE_FLOAT32_C( -537.28), SIMDE_FLOAT32_C( -877.63), SIMDE_FLOAT32_C( -810.70), SIMDE_FLOAT32_C( -6.66), SIMDE_FLOAT32_C( 391.64), SIMDE_FLOAT32_C( -471.21), SIMDE_FLOAT32_C( -270.37), SIMDE_FLOAT32_C( -216.43), SIMDE_FLOAT32_C( 441.34), SIMDE_FLOAT32_C( 119.74) }, UINT8_C(113), { SIMDE_FLOAT32_C( -984.41), SIMDE_FLOAT32_C( -505.19), SIMDE_FLOAT32_C( 737.93), SIMDE_FLOAT32_C( 955.81), SIMDE_FLOAT32_C( 87.96), SIMDE_FLOAT32_C( 460.61), SIMDE_FLOAT32_C( 156.35), SIMDE_FLOAT32_C( -577.72), SIMDE_FLOAT32_C( -83.20), SIMDE_FLOAT32_C( 783.45), SIMDE_FLOAT32_C( -991.87), SIMDE_FLOAT32_C( 601.60), SIMDE_FLOAT32_C( -57.67), SIMDE_FLOAT32_C( -111.36), SIMDE_FLOAT32_C( -645.93), SIMDE_FLOAT32_C( -412.93) }, { SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 644.74), SIMDE_FLOAT32_C( 547.01), SIMDE_FLOAT32_C( -143.84), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -877.63), SIMDE_FLOAT32_C( -810.70), SIMDE_FLOAT32_C( -6.66), SIMDE_FLOAT32_C( 391.64), SIMDE_FLOAT32_C( -471.21), SIMDE_FLOAT32_C( -270.37), SIMDE_FLOAT32_C( -216.43), SIMDE_FLOAT32_C( 441.34), SIMDE_FLOAT32_C( 119.74) } }, { { SIMDE_FLOAT32_C( -564.35), SIMDE_FLOAT32_C( 210.23), SIMDE_FLOAT32_C( 77.20), SIMDE_FLOAT32_C( 909.32), SIMDE_FLOAT32_C( 672.96), SIMDE_FLOAT32_C( 199.57), SIMDE_FLOAT32_C( -901.39), SIMDE_FLOAT32_C( -333.70), SIMDE_FLOAT32_C( -408.79), SIMDE_FLOAT32_C( -372.60), SIMDE_FLOAT32_C( 395.92), SIMDE_FLOAT32_C( 374.78), SIMDE_FLOAT32_C( -931.26), SIMDE_FLOAT32_C( -484.33), SIMDE_FLOAT32_C( -214.70), SIMDE_FLOAT32_C( -915.67) }, UINT8_C(139), { SIMDE_FLOAT32_C( -476.78), SIMDE_FLOAT32_C( -959.86), SIMDE_FLOAT32_C( -901.56), SIMDE_FLOAT32_C( 983.83), SIMDE_FLOAT32_C( 196.49), SIMDE_FLOAT32_C( -479.28), SIMDE_FLOAT32_C( -99.37), SIMDE_FLOAT32_C( -20.06), SIMDE_FLOAT32_C( -471.16), SIMDE_FLOAT32_C( -497.78), SIMDE_FLOAT32_C( 922.27), SIMDE_FLOAT32_C( 417.48), SIMDE_FLOAT32_C( -143.71), SIMDE_FLOAT32_C( -490.66), SIMDE_FLOAT32_C( 853.13), SIMDE_FLOAT32_C( -933.47) }, { SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 77.20), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 672.96), SIMDE_FLOAT32_C( 199.57), SIMDE_FLOAT32_C( -901.39), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( -408.79), SIMDE_FLOAT32_C( -372.60), SIMDE_FLOAT32_C( 395.92), SIMDE_FLOAT32_C( 374.78), SIMDE_FLOAT32_C( -931.26), SIMDE_FLOAT32_C( -484.33), SIMDE_FLOAT32_C( -214.70), SIMDE_FLOAT32_C( -915.67) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_mask_erfc_ps(src, test_vec[i].k, a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_erfc_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 926.55), SIMDE_FLOAT64_C( 763.10), SIMDE_FLOAT64_C( 6.18), SIMDE_FLOAT64_C( 453.38), SIMDE_FLOAT64_C( 184.79), SIMDE_FLOAT64_C( 608.12), SIMDE_FLOAT64_C( 303.22), SIMDE_FLOAT64_C( 429.75) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( 610.63), SIMDE_FLOAT64_C( -505.99), SIMDE_FLOAT64_C( -566.70), SIMDE_FLOAT64_C( -890.86), SIMDE_FLOAT64_C( -469.61), SIMDE_FLOAT64_C( -65.43), SIMDE_FLOAT64_C( -190.70), SIMDE_FLOAT64_C( 797.08) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( 883.79), SIMDE_FLOAT64_C( -999.64), SIMDE_FLOAT64_C( 928.39), SIMDE_FLOAT64_C( -465.63), SIMDE_FLOAT64_C( -214.31), SIMDE_FLOAT64_C( 650.21), SIMDE_FLOAT64_C( 880.22), SIMDE_FLOAT64_C( -127.39) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 2.00) } }, { { SIMDE_FLOAT64_C( 687.46), SIMDE_FLOAT64_C( -738.40), SIMDE_FLOAT64_C( -655.58), SIMDE_FLOAT64_C( -737.41), SIMDE_FLOAT64_C( -335.05), SIMDE_FLOAT64_C( -354.48), SIMDE_FLOAT64_C( -302.30), SIMDE_FLOAT64_C( -408.50) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00) } }, { { SIMDE_FLOAT64_C( -591.38), SIMDE_FLOAT64_C( 703.88), SIMDE_FLOAT64_C( -955.11), SIMDE_FLOAT64_C( 593.41), SIMDE_FLOAT64_C( 311.99), SIMDE_FLOAT64_C( 348.11), SIMDE_FLOAT64_C( 23.16), SIMDE_FLOAT64_C( -77.38) }, { SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 2.00) } }, { { SIMDE_FLOAT64_C( 842.12), SIMDE_FLOAT64_C( 456.45), SIMDE_FLOAT64_C( 31.76), SIMDE_FLOAT64_C( -627.49), SIMDE_FLOAT64_C( -608.98), SIMDE_FLOAT64_C( 841.06), SIMDE_FLOAT64_C( -830.41), SIMDE_FLOAT64_C( -725.19) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00) } }, { { SIMDE_FLOAT64_C( 841.43), SIMDE_FLOAT64_C( -902.02), SIMDE_FLOAT64_C( -190.81), SIMDE_FLOAT64_C( -372.89), SIMDE_FLOAT64_C( 748.18), SIMDE_FLOAT64_C( -310.59), SIMDE_FLOAT64_C( 499.72), SIMDE_FLOAT64_C( 435.64) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( -48.99), SIMDE_FLOAT64_C( 844.14), SIMDE_FLOAT64_C( 698.23), SIMDE_FLOAT64_C( 615.96), SIMDE_FLOAT64_C( -510.34), SIMDE_FLOAT64_C( -604.07), SIMDE_FLOAT64_C( -792.54), SIMDE_FLOAT64_C( -101.72) }, { SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_erfc_pd(a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_erfc_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( -683.28), SIMDE_FLOAT64_C( 804.20), SIMDE_FLOAT64_C( -404.66), SIMDE_FLOAT64_C( -472.79), SIMDE_FLOAT64_C( -863.69), SIMDE_FLOAT64_C( -237.69), SIMDE_FLOAT64_C( -919.11), SIMDE_FLOAT64_C( 998.91) }, UINT8_C( 80), { SIMDE_FLOAT64_C( 291.91), SIMDE_FLOAT64_C( -572.21), SIMDE_FLOAT64_C( 220.68), SIMDE_FLOAT64_C( -193.99), SIMDE_FLOAT64_C( -17.57), SIMDE_FLOAT64_C( 493.29), SIMDE_FLOAT64_C( 557.85), SIMDE_FLOAT64_C( 412.26) }, { SIMDE_FLOAT64_C( -683.28), SIMDE_FLOAT64_C( 804.20), SIMDE_FLOAT64_C( -404.66), SIMDE_FLOAT64_C( -472.79), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( -237.69), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 998.91) } }, { { SIMDE_FLOAT64_C( 986.63), SIMDE_FLOAT64_C( -515.33), SIMDE_FLOAT64_C( -32.91), SIMDE_FLOAT64_C( -333.09), SIMDE_FLOAT64_C( -321.96), SIMDE_FLOAT64_C( 468.63), SIMDE_FLOAT64_C( 439.22), SIMDE_FLOAT64_C( -104.11) }, UINT8_C( 73), { SIMDE_FLOAT64_C( 199.74), SIMDE_FLOAT64_C( 522.47), SIMDE_FLOAT64_C( 516.01), SIMDE_FLOAT64_C( -942.26), SIMDE_FLOAT64_C( -623.61), SIMDE_FLOAT64_C( 832.73), SIMDE_FLOAT64_C( 861.94), SIMDE_FLOAT64_C( -28.27) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -515.33), SIMDE_FLOAT64_C( -32.91), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( -321.96), SIMDE_FLOAT64_C( 468.63), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -104.11) } }, { { SIMDE_FLOAT64_C( -640.06), SIMDE_FLOAT64_C( 998.25), SIMDE_FLOAT64_C( 734.04), SIMDE_FLOAT64_C( -559.17), SIMDE_FLOAT64_C( 997.17), SIMDE_FLOAT64_C( -856.00), SIMDE_FLOAT64_C( 732.74), SIMDE_FLOAT64_C( -575.04) }, UINT8_C(158), { SIMDE_FLOAT64_C( -461.24), SIMDE_FLOAT64_C( 407.39), SIMDE_FLOAT64_C( -142.02), SIMDE_FLOAT64_C( -903.39), SIMDE_FLOAT64_C( -180.35), SIMDE_FLOAT64_C( -155.40), SIMDE_FLOAT64_C( -418.72), SIMDE_FLOAT64_C( 786.74) }, { SIMDE_FLOAT64_C( -640.06), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( -856.00), SIMDE_FLOAT64_C( 732.74), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( 511.51), SIMDE_FLOAT64_C( 259.32), SIMDE_FLOAT64_C( 255.37), SIMDE_FLOAT64_C( -49.27), SIMDE_FLOAT64_C( -844.79), SIMDE_FLOAT64_C( 939.27), SIMDE_FLOAT64_C( -849.53), SIMDE_FLOAT64_C( 677.68) }, UINT8_C(184), { SIMDE_FLOAT64_C( -791.79), SIMDE_FLOAT64_C( -945.93), SIMDE_FLOAT64_C( 288.01), SIMDE_FLOAT64_C( -929.85), SIMDE_FLOAT64_C( 25.80), SIMDE_FLOAT64_C( 647.95), SIMDE_FLOAT64_C( -931.60), SIMDE_FLOAT64_C( -240.16) }, { SIMDE_FLOAT64_C( 511.51), SIMDE_FLOAT64_C( 259.32), SIMDE_FLOAT64_C( 255.37), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -849.53), SIMDE_FLOAT64_C( 2.00) } }, { { SIMDE_FLOAT64_C( -911.22), SIMDE_FLOAT64_C( -934.43), SIMDE_FLOAT64_C( -96.16), SIMDE_FLOAT64_C( 821.52), SIMDE_FLOAT64_C( -509.47), SIMDE_FLOAT64_C( -731.47), SIMDE_FLOAT64_C( -639.72), SIMDE_FLOAT64_C( 897.92) }, UINT8_C(176), { SIMDE_FLOAT64_C( -543.12), SIMDE_FLOAT64_C( -282.43), SIMDE_FLOAT64_C( 971.11), SIMDE_FLOAT64_C( 38.16), SIMDE_FLOAT64_C( -495.70), SIMDE_FLOAT64_C( 482.61), SIMDE_FLOAT64_C( -702.52), SIMDE_FLOAT64_C( 759.67) }, { SIMDE_FLOAT64_C( -911.22), SIMDE_FLOAT64_C( -934.43), SIMDE_FLOAT64_C( -96.16), SIMDE_FLOAT64_C( 821.52), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -639.72), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( -566.66), SIMDE_FLOAT64_C( -547.31), SIMDE_FLOAT64_C( 698.94), SIMDE_FLOAT64_C( -416.19), SIMDE_FLOAT64_C( -869.63), SIMDE_FLOAT64_C( 154.22), SIMDE_FLOAT64_C( -207.98), SIMDE_FLOAT64_C( -815.57) }, UINT8_C(142), { SIMDE_FLOAT64_C( -137.83), SIMDE_FLOAT64_C( 210.23), SIMDE_FLOAT64_C( -909.82), SIMDE_FLOAT64_C( -69.43), SIMDE_FLOAT64_C( 970.07), SIMDE_FLOAT64_C( -821.05), SIMDE_FLOAT64_C( -3.87), SIMDE_FLOAT64_C( -126.08) }, { SIMDE_FLOAT64_C( -566.66), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( -869.63), SIMDE_FLOAT64_C( 154.22), SIMDE_FLOAT64_C( -207.98), SIMDE_FLOAT64_C( 2.00) } }, { { SIMDE_FLOAT64_C( -999.53), SIMDE_FLOAT64_C( 486.66), SIMDE_FLOAT64_C( 142.44), SIMDE_FLOAT64_C( -639.25), SIMDE_FLOAT64_C( 384.58), SIMDE_FLOAT64_C( -731.05), SIMDE_FLOAT64_C( -182.37), SIMDE_FLOAT64_C( -897.86) }, UINT8_C(227), { SIMDE_FLOAT64_C( 855.79), SIMDE_FLOAT64_C( -393.55), SIMDE_FLOAT64_C( 722.67), SIMDE_FLOAT64_C( -846.73), SIMDE_FLOAT64_C( -633.88), SIMDE_FLOAT64_C( -843.99), SIMDE_FLOAT64_C( -394.03), SIMDE_FLOAT64_C( -934.94) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 142.44), SIMDE_FLOAT64_C( -639.25), SIMDE_FLOAT64_C( 384.58), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 2.00) } }, { { SIMDE_FLOAT64_C( -260.18), SIMDE_FLOAT64_C( -263.67), SIMDE_FLOAT64_C( 219.28), SIMDE_FLOAT64_C( 531.84), SIMDE_FLOAT64_C( -79.23), SIMDE_FLOAT64_C( 661.51), SIMDE_FLOAT64_C( -605.99), SIMDE_FLOAT64_C( -869.00) }, UINT8_C( 64), { SIMDE_FLOAT64_C( 324.57), SIMDE_FLOAT64_C( -898.93), SIMDE_FLOAT64_C( 930.64), SIMDE_FLOAT64_C( -679.29), SIMDE_FLOAT64_C( -25.01), SIMDE_FLOAT64_C( 931.11), SIMDE_FLOAT64_C( 807.37), SIMDE_FLOAT64_C( -882.57) }, { SIMDE_FLOAT64_C( -260.18), SIMDE_FLOAT64_C( -263.67), SIMDE_FLOAT64_C( 219.28), SIMDE_FLOAT64_C( 531.84), SIMDE_FLOAT64_C( -79.23), SIMDE_FLOAT64_C( 661.51), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -869.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_mask_erfc_pd(src, test_vec[i].k, a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_erfcinv_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 1.69), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 1.25), SIMDE_FLOAT32_C( 1.16) }, { SIMDE_FLOAT32_C( -0.72), SIMDE_FLOAT32_C( 1.28), SIMDE_FLOAT32_C( -0.23), SIMDE_FLOAT32_C( -0.14) } }, { { SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 1.53) }, { SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 1.33), SIMDE_FLOAT32_C( -0.51) } }, { { SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( 1.68) }, { SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -0.70) } }, { { SIMDE_FLOAT32_C( 1.28), SIMDE_FLOAT32_C( 1.66), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 1.42) }, { SIMDE_FLOAT32_C( -0.25), SIMDE_FLOAT32_C( -0.67), SIMDE_FLOAT32_C( 1.10), SIMDE_FLOAT32_C( -0.39) } }, { { SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 1.20), SIMDE_FLOAT32_C( 1.51) }, { SIMDE_FLOAT32_C( -0.51), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( -0.49) } }, { { SIMDE_FLOAT32_C( 1.44), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.49) }, { SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 1.16), SIMDE_FLOAT32_C( 0.49) } }, { { SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 1.99), SIMDE_FLOAT32_C( 1.35) }, { SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( -1.82), SIMDE_FLOAT32_C( -0.32) } }, { { SIMDE_FLOAT32_C( 1.94), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 1.62) }, { SIMDE_FLOAT32_C( -1.33), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( -0.62) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_erfcinv_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_erfcinv_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 0.73), SIMDE_FLOAT64_C( 0.13) }, { SIMDE_FLOAT64_C( 0.24), SIMDE_FLOAT64_C( 1.07) } }, { { SIMDE_FLOAT64_C( 1.09), SIMDE_FLOAT64_C( 0.70) }, { SIMDE_FLOAT64_C( -0.08), SIMDE_FLOAT64_C( 0.27) } }, { { SIMDE_FLOAT64_C( 1.13), SIMDE_FLOAT64_C( 0.97) }, { SIMDE_FLOAT64_C( -0.12), SIMDE_FLOAT64_C( 0.03) } }, { { SIMDE_FLOAT64_C( 0.45), SIMDE_FLOAT64_C( 1.72) }, { SIMDE_FLOAT64_C( 0.53), SIMDE_FLOAT64_C( -0.76) } }, { { SIMDE_FLOAT64_C( 0.25), SIMDE_FLOAT64_C( 0.82) }, { SIMDE_FLOAT64_C( 0.81), SIMDE_FLOAT64_C( 0.16) } }, { { SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( 1.88) }, { SIMDE_FLOAT64_C( 1.82), SIMDE_FLOAT64_C( -1.10) } }, { { SIMDE_FLOAT64_C( 1.11), SIMDE_FLOAT64_C( 0.87) }, { SIMDE_FLOAT64_C( -0.10), SIMDE_FLOAT64_C( 0.12) } }, { { SIMDE_FLOAT64_C( 1.13), SIMDE_FLOAT64_C( 0.05) }, { SIMDE_FLOAT64_C( -0.12), SIMDE_FLOAT64_C( 1.39) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d r = simde_mm_erfcinv_pd(a); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_erfcinv_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 1.58), SIMDE_FLOAT32_C( 1.39), SIMDE_FLOAT32_C( 1.73), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 1.58), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.73) }, { SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( 0.24) } }, { { SIMDE_FLOAT32_C( 1.93), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( 1.80), SIMDE_FLOAT32_C( 1.88), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 1.44), SIMDE_FLOAT32_C( 1.26) }, { SIMDE_FLOAT32_C( -1.28), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( -0.91), SIMDE_FLOAT32_C( -1.10), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( -0.23) } }, { { SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 1.32), SIMDE_FLOAT32_C( 1.69), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 1.97), SIMDE_FLOAT32_C( 1.77) }, { SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( -0.29), SIMDE_FLOAT32_C( -0.72), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( -1.53), SIMDE_FLOAT32_C( -0.85) } }, { { SIMDE_FLOAT32_C( 1.71), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( 1.55), SIMDE_FLOAT32_C( 1.99), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( 1.50), SIMDE_FLOAT32_C( 1.58), SIMDE_FLOAT32_C( 0.11) }, { SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( -1.82), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( -0.48), SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( 1.13) } }, { { SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 1.30), SIMDE_FLOAT32_C( 1.06), SIMDE_FLOAT32_C( 1.66), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 1.60), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 0.81) }, { SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( -0.05), SIMDE_FLOAT32_C( -0.67), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.17) } }, { { SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( 1.05), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( 1.23) }, { SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( -0.04), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 1.07), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( -0.21) } }, { { SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 1.07), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 1.48), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( 1.80), SIMDE_FLOAT32_C( 1.19) }, { SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( -0.06), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 1.64), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( -0.91), SIMDE_FLOAT32_C( -0.17) } }, { { SIMDE_FLOAT32_C( 1.88), SIMDE_FLOAT32_C( 1.35), SIMDE_FLOAT32_C( 1.18), SIMDE_FLOAT32_C( 1.92), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 1.75) }, { SIMDE_FLOAT32_C( -1.09), SIMDE_FLOAT32_C( -0.32), SIMDE_FLOAT32_C( -0.16), SIMDE_FLOAT32_C( -1.24), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( -0.81) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_erfcinv_ps(a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_erfcinv_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( 1.66), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( 0.13), SIMDE_FLOAT64_C( 1.56) }, { SIMDE_FLOAT64_C( -0.67), SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( 1.07), SIMDE_FLOAT64_C( -0.55) } }, { { SIMDE_FLOAT64_C( 1.89), SIMDE_FLOAT64_C( 0.26), SIMDE_FLOAT64_C( 0.61), SIMDE_FLOAT64_C( 0.46) }, { SIMDE_FLOAT64_C( -1.13), SIMDE_FLOAT64_C( 0.80), SIMDE_FLOAT64_C( 0.36), SIMDE_FLOAT64_C( 0.52) } }, { { SIMDE_FLOAT64_C( 1.50), SIMDE_FLOAT64_C( 1.78), SIMDE_FLOAT64_C( 0.98), SIMDE_FLOAT64_C( 0.70) }, { SIMDE_FLOAT64_C( -0.48), SIMDE_FLOAT64_C( -0.87), SIMDE_FLOAT64_C( 0.02), SIMDE_FLOAT64_C( 0.27) } }, { { SIMDE_FLOAT64_C( 1.88), SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 0.39), SIMDE_FLOAT64_C( 1.75) }, { SIMDE_FLOAT64_C( -1.10), SIMDE_FLOAT64_C( 0.15), SIMDE_FLOAT64_C( 0.61), SIMDE_FLOAT64_C( -0.81) } }, { { SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 0.84), SIMDE_FLOAT64_C( 1.15), SIMDE_FLOAT64_C( 0.52) }, { SIMDE_FLOAT64_C( 0.85), SIMDE_FLOAT64_C( 0.14), SIMDE_FLOAT64_C( -0.13), SIMDE_FLOAT64_C( 0.45) } }, { { SIMDE_FLOAT64_C( 0.16), SIMDE_FLOAT64_C( 1.48), SIMDE_FLOAT64_C( 0.12), SIMDE_FLOAT64_C( 1.38) }, { SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( 1.10), SIMDE_FLOAT64_C( -0.35) } }, { { SIMDE_FLOAT64_C( 1.88), SIMDE_FLOAT64_C( 0.46), SIMDE_FLOAT64_C( 1.09), SIMDE_FLOAT64_C( 0.47) }, { SIMDE_FLOAT64_C( -1.09), SIMDE_FLOAT64_C( 0.52), SIMDE_FLOAT64_C( -0.08), SIMDE_FLOAT64_C( 0.51) } }, { { SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 1.43), SIMDE_FLOAT64_C( 1.79), SIMDE_FLOAT64_C( 0.34) }, { SIMDE_FLOAT64_C( 0.30), SIMDE_FLOAT64_C( -0.40), SIMDE_FLOAT64_C( -0.89), SIMDE_FLOAT64_C( 0.67) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d r = simde_mm256_erfcinv_pd(a); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_erfcinv_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 1.41), SIMDE_FLOAT32_C( 1.20), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 1.33), SIMDE_FLOAT32_C( 1.84), SIMDE_FLOAT32_C( 1.37), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 1.44), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 1.79) }, { SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( -0.89) } }, { { SIMDE_FLOAT32_C( 1.30), SIMDE_FLOAT32_C( 1.40), SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( 1.60), SIMDE_FLOAT32_C( 1.33), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 1.72), SIMDE_FLOAT32_C( 1.10), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 1.37), SIMDE_FLOAT32_C( 1.67), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 1.50), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.13) }, { SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( -0.37), SIMDE_FLOAT32_C( -0.51), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( -0.09), SIMDE_FLOAT32_C( 1.24), SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( -0.69), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( -0.48), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 1.16), SIMDE_FLOAT32_C( 1.07) } }, { { SIMDE_FLOAT32_C( 1.59), SIMDE_FLOAT32_C( 1.51), SIMDE_FLOAT32_C( 1.33), SIMDE_FLOAT32_C( 1.97), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 1.16), SIMDE_FLOAT32_C( 1.35), SIMDE_FLOAT32_C( 1.41), SIMDE_FLOAT32_C( 1.51), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( 1.79), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( 1.58), SIMDE_FLOAT32_C( 0.30) }, { SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( -0.49), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( -1.53), SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( -0.14), SIMDE_FLOAT32_C( -0.32), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -0.49), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( -0.89), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.04), SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( 0.73) } }, { { SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 1.11), SIMDE_FLOAT32_C( 1.90), SIMDE_FLOAT32_C( 1.78), SIMDE_FLOAT32_C( 1.46), SIMDE_FLOAT32_C( 1.62), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 1.54), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 1.36), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.95) }, { SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( -1.16), SIMDE_FLOAT32_C( -0.87), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( -0.62), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.04) } }, { { SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 1.95), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( 1.11), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 1.16), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 1.48), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 1.78), SIMDE_FLOAT32_C( 0.64) }, { SIMDE_FLOAT32_C( 1.28), SIMDE_FLOAT32_C( -1.39), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( -0.14), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 1.82), SIMDE_FLOAT32_C( -0.87), SIMDE_FLOAT32_C( 0.33) } }, { { SIMDE_FLOAT32_C( 1.12), SIMDE_FLOAT32_C( 1.68), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 1.30), SIMDE_FLOAT32_C( 1.29), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 1.84), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 1.20), SIMDE_FLOAT32_C( 1.14), SIMDE_FLOAT32_C( 1.41), SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( 1.21) }, { SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( -0.70), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( -0.26), SIMDE_FLOAT32_C( 1.13), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( -0.12), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( 1.02), SIMDE_FLOAT32_C( -0.19) } }, { { SIMDE_FLOAT32_C( 1.36), SIMDE_FLOAT32_C( 1.07), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( 1.34), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( 1.10), SIMDE_FLOAT32_C( 1.81), SIMDE_FLOAT32_C( 1.59), SIMDE_FLOAT32_C( 1.52), SIMDE_FLOAT32_C( 1.29), SIMDE_FLOAT32_C( 1.79), SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( 1.07), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( 0.65) }, { SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( -0.06), SIMDE_FLOAT32_C( 1.10), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( -0.09), SIMDE_FLOAT32_C( -0.93), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( -0.50), SIMDE_FLOAT32_C( -0.26), SIMDE_FLOAT32_C( -0.89), SIMDE_FLOAT32_C( -0.51), SIMDE_FLOAT32_C( -0.06), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.32) } }, { { SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( 1.22), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 1.34), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 1.99), SIMDE_FLOAT32_C( 1.92), SIMDE_FLOAT32_C( 1.13), SIMDE_FLOAT32_C( 1.19), SIMDE_FLOAT32_C( 1.06), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 1.34), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 1.90) }, { SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( -0.20), SIMDE_FLOAT32_C( 1.39), SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( -1.82), SIMDE_FLOAT32_C( -1.24), SIMDE_FLOAT32_C( -0.12), SIMDE_FLOAT32_C( -0.17), SIMDE_FLOAT32_C( -0.05), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( -1.16) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_erfcinv_ps(a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_erfcinv_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[16]; const simde__mmask8 k; const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 1.76), SIMDE_FLOAT32_C( 1.62), SIMDE_FLOAT32_C( 1.96), SIMDE_FLOAT32_C( 1.18), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 1.14), SIMDE_FLOAT32_C( 1.94), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 1.50), SIMDE_FLOAT32_C( 1.33), SIMDE_FLOAT32_C( 1.30), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 0.58) }, UINT8_C(239), { SIMDE_FLOAT32_C( 1.92), SIMDE_FLOAT32_C( 1.56), SIMDE_FLOAT32_C( 1.73), SIMDE_FLOAT32_C( 1.31), SIMDE_FLOAT32_C( 1.27), SIMDE_FLOAT32_C( 1.70), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 1.64), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 1.77), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 1.98), SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( 0.47) }, { SIMDE_FLOAT32_C( -1.24), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( -0.28), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( -0.73), SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 1.50), SIMDE_FLOAT32_C( 1.33), SIMDE_FLOAT32_C( 1.30), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 0.58) } }, { { SIMDE_FLOAT32_C( 1.93), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( 1.33), SIMDE_FLOAT32_C( 1.08), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 1.88), SIMDE_FLOAT32_C( 1.17), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 1.25), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( 1.18) }, UINT8_C( 23), { SIMDE_FLOAT32_C( 1.99), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 1.72), SIMDE_FLOAT32_C( 1.69), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 1.36), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 1.34), SIMDE_FLOAT32_C( 1.94), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 1.92), SIMDE_FLOAT32_C( 1.96), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 1.85), SIMDE_FLOAT32_C( 0.67) }, { SIMDE_FLOAT32_C( -1.82), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 1.08), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( 1.88), SIMDE_FLOAT32_C( 1.17), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 1.25), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( 1.18) } }, { { SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 1.32), SIMDE_FLOAT32_C( 1.88), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 1.49), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 1.18), SIMDE_FLOAT32_C( 1.93), SIMDE_FLOAT32_C( 0.96) }, UINT8_C( 91), { SIMDE_FLOAT32_C( 1.65), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 1.01), SIMDE_FLOAT32_C( 1.31), SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( 1.74), SIMDE_FLOAT32_C( 1.73), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 1.70), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 1.64) }, { SIMDE_FLOAT32_C( -0.66), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( 1.32), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( -0.28), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 1.49), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 1.18), SIMDE_FLOAT32_C( 1.93), SIMDE_FLOAT32_C( 0.96) } }, { { SIMDE_FLOAT32_C( 1.69), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 1.73), SIMDE_FLOAT32_C( 1.98), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 1.61), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 1.68), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 1.54), SIMDE_FLOAT32_C( 1.35), SIMDE_FLOAT32_C( 1.65) }, UINT8_C(144), { SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 1.50), SIMDE_FLOAT32_C( 1.07), SIMDE_FLOAT32_C( 1.61), SIMDE_FLOAT32_C( 1.24), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 1.20), SIMDE_FLOAT32_C( 1.18), SIMDE_FLOAT32_C( 1.31), SIMDE_FLOAT32_C( 1.60), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.88) }, { SIMDE_FLOAT32_C( 1.69), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 1.73), SIMDE_FLOAT32_C( 1.98), SIMDE_FLOAT32_C( -0.61), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 1.68), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 1.54), SIMDE_FLOAT32_C( 1.35), SIMDE_FLOAT32_C( 1.65) } }, { { SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( 1.03), SIMDE_FLOAT32_C( 1.56), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 1.52), SIMDE_FLOAT32_C( 1.62), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 1.20), SIMDE_FLOAT32_C( 1.61), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 1.26), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.08) }, UINT8_C(233), { SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 1.15), SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( 1.46), SIMDE_FLOAT32_C( 1.95), SIMDE_FLOAT32_C( 1.99), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 1.15), SIMDE_FLOAT32_C( 1.18), SIMDE_FLOAT32_C( 1.71), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 1.99), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 1.69) }, { SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( 1.56), SIMDE_FLOAT32_C( -1.82), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 1.20), SIMDE_FLOAT32_C( 1.61), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 1.26), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.08) } }, { { SIMDE_FLOAT32_C( 1.44), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.25), SIMDE_FLOAT32_C( 1.85), SIMDE_FLOAT32_C( 1.09), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( 1.37), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 1.74), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 1.67), SIMDE_FLOAT32_C( 0.31) }, UINT8_C(221), { SIMDE_FLOAT32_C( 1.19), SIMDE_FLOAT32_C( 1.77), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 1.19), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 1.17), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 1.88), SIMDE_FLOAT32_C( 1.92), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 0.47) }, { SIMDE_FLOAT32_C( -0.17), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.64), SIMDE_FLOAT32_C( -0.17), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( -1.10), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( 1.37), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 1.74), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 1.67), SIMDE_FLOAT32_C( 0.31) } }, { { SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( 1.85), SIMDE_FLOAT32_C( 1.56), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 1.86), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( 1.62), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( 1.70), SIMDE_FLOAT32_C( 1.48) }, UINT8_C(108), { SIMDE_FLOAT32_C( 1.72), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 1.40), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 1.38), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( 1.15), SIMDE_FLOAT32_C( 1.38), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 1.22) }, { SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( 1.85), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( 1.62), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( 1.70), SIMDE_FLOAT32_C( 1.48) } }, { { SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 1.08), SIMDE_FLOAT32_C( 1.92), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 1.96), SIMDE_FLOAT32_C( 1.98), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 1.93) }, UINT8_C(110), { SIMDE_FLOAT32_C( 1.22), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 1.50), SIMDE_FLOAT32_C( 1.64), SIMDE_FLOAT32_C( 1.46), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( 1.01), SIMDE_FLOAT32_C( 1.76), SIMDE_FLOAT32_C( 1.51), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 1.61), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 1.26) }, { SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 1.33), SIMDE_FLOAT32_C( -0.48), SIMDE_FLOAT32_C( 1.96), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 1.93) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_mask_erfcinv_ps(src, test_vec[i].k, a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_erfcinv_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 0.07), SIMDE_FLOAT64_C( 0.39), SIMDE_FLOAT64_C( 1.62), SIMDE_FLOAT64_C( 0.30), SIMDE_FLOAT64_C( 0.52), SIMDE_FLOAT64_C( 1.24), SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( 1.25) }, { SIMDE_FLOAT64_C( 1.28), SIMDE_FLOAT64_C( 0.61), SIMDE_FLOAT64_C( -0.62), SIMDE_FLOAT64_C( 0.73), SIMDE_FLOAT64_C( 0.45), SIMDE_FLOAT64_C( -0.22), SIMDE_FLOAT64_C( 0.95), SIMDE_FLOAT64_C( -0.23) } }, { { SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 1.75), SIMDE_FLOAT64_C( 0.51), SIMDE_FLOAT64_C( 0.09), SIMDE_FLOAT64_C( 0.43), SIMDE_FLOAT64_C( 1.42), SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( 0.26) }, { SIMDE_FLOAT64_C( 0.30), SIMDE_FLOAT64_C( -0.81), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( 1.20), SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( -0.39), SIMDE_FLOAT64_C( 1.16), SIMDE_FLOAT64_C( 0.80) } }, { { SIMDE_FLOAT64_C( 1.39), SIMDE_FLOAT64_C( 1.04), SIMDE_FLOAT64_C( 1.66), SIMDE_FLOAT64_C( 0.51), SIMDE_FLOAT64_C( 1.47), SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( 1.28), SIMDE_FLOAT64_C( 0.25) }, { SIMDE_FLOAT64_C( -0.36), SIMDE_FLOAT64_C( -0.04), SIMDE_FLOAT64_C( -0.67), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( 0.20), SIMDE_FLOAT64_C( -0.25), SIMDE_FLOAT64_C( 0.81) } }, { { SIMDE_FLOAT64_C( 1.45), SIMDE_FLOAT64_C( 0.93), SIMDE_FLOAT64_C( 1.98), SIMDE_FLOAT64_C( 0.38), SIMDE_FLOAT64_C( 1.21), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 1.28) }, { SIMDE_FLOAT64_C( -0.42), SIMDE_FLOAT64_C( 0.06), SIMDE_FLOAT64_C( -1.64), SIMDE_FLOAT64_C( 0.62), SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 0.30), SIMDE_FLOAT64_C( -0.25) } }, { { SIMDE_FLOAT64_C( 0.74), SIMDE_FLOAT64_C( 0.29), SIMDE_FLOAT64_C( 1.58), SIMDE_FLOAT64_C( 1.25), SIMDE_FLOAT64_C( 1.53), SIMDE_FLOAT64_C( 1.76), SIMDE_FLOAT64_C( 0.50), SIMDE_FLOAT64_C( 0.21) }, { SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 0.75), SIMDE_FLOAT64_C( -0.57), SIMDE_FLOAT64_C( -0.23), SIMDE_FLOAT64_C( -0.51), SIMDE_FLOAT64_C( -0.83), SIMDE_FLOAT64_C( 0.48), SIMDE_FLOAT64_C( 0.89) } }, { { SIMDE_FLOAT64_C( 1.51), SIMDE_FLOAT64_C( 1.01), SIMDE_FLOAT64_C( 0.29), SIMDE_FLOAT64_C( 1.94), SIMDE_FLOAT64_C( 0.43), SIMDE_FLOAT64_C( 0.39), SIMDE_FLOAT64_C( 0.20), SIMDE_FLOAT64_C( 1.83) }, { SIMDE_FLOAT64_C( -0.49), SIMDE_FLOAT64_C( -0.01), SIMDE_FLOAT64_C( 0.75), SIMDE_FLOAT64_C( -1.33), SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( 0.61), SIMDE_FLOAT64_C( 0.91), SIMDE_FLOAT64_C( -0.97) } }, { { SIMDE_FLOAT64_C( 1.43), SIMDE_FLOAT64_C( 1.86), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( 0.90), SIMDE_FLOAT64_C( 0.64), SIMDE_FLOAT64_C( 1.62), SIMDE_FLOAT64_C( 1.15), SIMDE_FLOAT64_C( 0.09) }, { SIMDE_FLOAT64_C( -0.40), SIMDE_FLOAT64_C( -1.04), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 0.09), SIMDE_FLOAT64_C( 0.33), SIMDE_FLOAT64_C( -0.62), SIMDE_FLOAT64_C( -0.13), SIMDE_FLOAT64_C( 1.20) } }, { { SIMDE_FLOAT64_C( 0.55), SIMDE_FLOAT64_C( 1.13), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( 1.76), SIMDE_FLOAT64_C( 1.48), SIMDE_FLOAT64_C( 1.14), SIMDE_FLOAT64_C( 1.04), SIMDE_FLOAT64_C( 0.21) }, { SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( -0.12), SIMDE_FLOAT64_C( 0.51), SIMDE_FLOAT64_C( -0.83), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( -0.12), SIMDE_FLOAT64_C( -0.04), SIMDE_FLOAT64_C( 0.89) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_erfcinv_pd(a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_erfcinv_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 0.49), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( 1.36), SIMDE_FLOAT64_C( 1.68), SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( 0.17), SIMDE_FLOAT64_C( 1.10), SIMDE_FLOAT64_C( 0.08) }, UINT8_C(117), { SIMDE_FLOAT64_C( 0.85), SIMDE_FLOAT64_C( 1.24), SIMDE_FLOAT64_C( 1.32), SIMDE_FLOAT64_C( 0.39), SIMDE_FLOAT64_C( 1.15), SIMDE_FLOAT64_C( 1.13), SIMDE_FLOAT64_C( 0.65), SIMDE_FLOAT64_C( 0.74) }, { SIMDE_FLOAT64_C( 0.13), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( -0.29), SIMDE_FLOAT64_C( 1.68), SIMDE_FLOAT64_C( -0.13), SIMDE_FLOAT64_C( -0.12), SIMDE_FLOAT64_C( 0.32), SIMDE_FLOAT64_C( 0.08) } }, { { SIMDE_FLOAT64_C( 1.75), SIMDE_FLOAT64_C( 0.45), SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( 1.37), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 1.22), SIMDE_FLOAT64_C( 1.43), SIMDE_FLOAT64_C( 0.91) }, UINT8_C( 90), { SIMDE_FLOAT64_C( 0.35), SIMDE_FLOAT64_C( 1.48), SIMDE_FLOAT64_C( 0.06), SIMDE_FLOAT64_C( 1.46), SIMDE_FLOAT64_C( 1.53), SIMDE_FLOAT64_C( 0.55), SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( 0.89) }, { SIMDE_FLOAT64_C( 1.75), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( -0.43), SIMDE_FLOAT64_C( -0.51), SIMDE_FLOAT64_C( 1.22), SIMDE_FLOAT64_C( 0.55), SIMDE_FLOAT64_C( 0.91) } }, { { SIMDE_FLOAT64_C( 0.22), SIMDE_FLOAT64_C( 1.01), SIMDE_FLOAT64_C( 1.06), SIMDE_FLOAT64_C( 1.33), SIMDE_FLOAT64_C( 1.08), SIMDE_FLOAT64_C( 1.27), SIMDE_FLOAT64_C( 0.17), SIMDE_FLOAT64_C( 0.33) }, UINT8_C(134), { SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( 1.48), SIMDE_FLOAT64_C( 1.71), SIMDE_FLOAT64_C( 1.22), SIMDE_FLOAT64_C( 0.22), SIMDE_FLOAT64_C( 1.46), SIMDE_FLOAT64_C( 1.67), SIMDE_FLOAT64_C( 1.01) }, { SIMDE_FLOAT64_C( 0.22), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( 1.33), SIMDE_FLOAT64_C( 1.08), SIMDE_FLOAT64_C( 1.27), SIMDE_FLOAT64_C( 0.17), SIMDE_FLOAT64_C( -0.01) } }, { { SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 0.43), SIMDE_FLOAT64_C( 0.22), SIMDE_FLOAT64_C( 0.26), SIMDE_FLOAT64_C( 1.34), SIMDE_FLOAT64_C( 1.77), SIMDE_FLOAT64_C( 0.61), SIMDE_FLOAT64_C( 0.83) }, UINT8_C(179), { SIMDE_FLOAT64_C( 0.07), SIMDE_FLOAT64_C( 0.36), SIMDE_FLOAT64_C( 0.37), SIMDE_FLOAT64_C( 0.51), SIMDE_FLOAT64_C( 1.25), SIMDE_FLOAT64_C( 0.60), SIMDE_FLOAT64_C( 1.52), SIMDE_FLOAT64_C( 0.31) }, { SIMDE_FLOAT64_C( 1.28), SIMDE_FLOAT64_C( 0.65), SIMDE_FLOAT64_C( 0.22), SIMDE_FLOAT64_C( 0.26), SIMDE_FLOAT64_C( -0.23), SIMDE_FLOAT64_C( 0.37), SIMDE_FLOAT64_C( 0.61), SIMDE_FLOAT64_C( 0.72) } }, { { SIMDE_FLOAT64_C( 1.92), SIMDE_FLOAT64_C( 0.60), SIMDE_FLOAT64_C( 1.58), SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( 0.93), SIMDE_FLOAT64_C( 0.16), SIMDE_FLOAT64_C( 0.66), SIMDE_FLOAT64_C( 0.41) }, UINT8_C(115), { SIMDE_FLOAT64_C( 1.87), SIMDE_FLOAT64_C( 0.63), SIMDE_FLOAT64_C( 1.34), SIMDE_FLOAT64_C( 1.54), SIMDE_FLOAT64_C( 1.64), SIMDE_FLOAT64_C( 0.17), SIMDE_FLOAT64_C( 1.97), SIMDE_FLOAT64_C( 1.86) }, { SIMDE_FLOAT64_C( -1.07), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( 1.58), SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( -0.65), SIMDE_FLOAT64_C( 0.97), SIMDE_FLOAT64_C( -1.53), SIMDE_FLOAT64_C( 0.41) } }, { { SIMDE_FLOAT64_C( 0.43), SIMDE_FLOAT64_C( 1.32), SIMDE_FLOAT64_C( 1.63), SIMDE_FLOAT64_C( 1.04), SIMDE_FLOAT64_C( 0.14), SIMDE_FLOAT64_C( 1.46), SIMDE_FLOAT64_C( 1.11), SIMDE_FLOAT64_C( 0.50) }, UINT8_C( 31), { SIMDE_FLOAT64_C( 1.62), SIMDE_FLOAT64_C( 1.75), SIMDE_FLOAT64_C( 0.43), SIMDE_FLOAT64_C( 1.14), SIMDE_FLOAT64_C( 0.06), SIMDE_FLOAT64_C( 0.36), SIMDE_FLOAT64_C( 1.74), SIMDE_FLOAT64_C( 1.64) }, { SIMDE_FLOAT64_C( -0.62), SIMDE_FLOAT64_C( -0.81), SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( -0.12), SIMDE_FLOAT64_C( 1.33), SIMDE_FLOAT64_C( 1.46), SIMDE_FLOAT64_C( 1.11), SIMDE_FLOAT64_C( 0.50) } }, { { SIMDE_FLOAT64_C( 0.45), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 1.79), SIMDE_FLOAT64_C( 1.11), SIMDE_FLOAT64_C( 1.08), SIMDE_FLOAT64_C( 1.67), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( 1.71) }, UINT8_C(152), { SIMDE_FLOAT64_C( 0.53), SIMDE_FLOAT64_C( 1.35), SIMDE_FLOAT64_C( 1.17), SIMDE_FLOAT64_C( 0.50), SIMDE_FLOAT64_C( 1.21), SIMDE_FLOAT64_C( 1.60), SIMDE_FLOAT64_C( 1.82), SIMDE_FLOAT64_C( 0.85) }, { SIMDE_FLOAT64_C( 0.45), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 1.79), SIMDE_FLOAT64_C( 0.48), SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( 1.67), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( 0.13) } }, { { SIMDE_FLOAT64_C( 0.64), SIMDE_FLOAT64_C( 1.96), SIMDE_FLOAT64_C( 0.30), SIMDE_FLOAT64_C( 1.74), SIMDE_FLOAT64_C( 0.46), SIMDE_FLOAT64_C( 0.14), SIMDE_FLOAT64_C( 1.37), SIMDE_FLOAT64_C( 0.21) }, UINT8_C( 95), { SIMDE_FLOAT64_C( 0.51), SIMDE_FLOAT64_C( 0.27), SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( 0.25), SIMDE_FLOAT64_C( 1.91), SIMDE_FLOAT64_C( 1.38), SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( 1.70) }, { SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( 0.07), SIMDE_FLOAT64_C( 0.81), SIMDE_FLOAT64_C( -1.20), SIMDE_FLOAT64_C( 0.14), SIMDE_FLOAT64_C( 0.07), SIMDE_FLOAT64_C( 0.21) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_mask_erfcinv_pd(src, test_vec[i].k, a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_exp_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( -1.86), SIMDE_FLOAT32_C( 1.40), SIMDE_FLOAT32_C( 3.13) }, { SIMDE_FLOAT32_C( 2.66), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 4.06), SIMDE_FLOAT32_C( 22.87) } }, { { SIMDE_FLOAT32_C( -1.01), SIMDE_FLOAT32_C( -1.34), SIMDE_FLOAT32_C( 2.77), SIMDE_FLOAT32_C( -0.13) }, { SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( 15.96), SIMDE_FLOAT32_C( 0.88) } }, { { SIMDE_FLOAT32_C( -2.37), SIMDE_FLOAT32_C( 2.01), SIMDE_FLOAT32_C( -3.83), SIMDE_FLOAT32_C( -3.05) }, { SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 7.46), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 0.05) } }, { { SIMDE_FLOAT32_C( 1.11), SIMDE_FLOAT32_C( -1.44), SIMDE_FLOAT32_C( 3.85), SIMDE_FLOAT32_C( 2.66) }, { SIMDE_FLOAT32_C( 3.03), SIMDE_FLOAT32_C( 0.24), SIMDE_FLOAT32_C( 46.99), SIMDE_FLOAT32_C( 14.30) } }, { { SIMDE_FLOAT32_C( -1.62), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 2.09) }, { SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 1.32), SIMDE_FLOAT32_C( 1.99), SIMDE_FLOAT32_C( 8.08) } }, { { SIMDE_FLOAT32_C( -1.46), SIMDE_FLOAT32_C( -3.87), SIMDE_FLOAT32_C( -1.51), SIMDE_FLOAT32_C( -0.90) }, { SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 0.41) } }, { { SIMDE_FLOAT32_C( -1.48), SIMDE_FLOAT32_C( 3.26), SIMDE_FLOAT32_C( 3.11), SIMDE_FLOAT32_C( 2.62) }, { SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 26.05), SIMDE_FLOAT32_C( 22.42), SIMDE_FLOAT32_C( 13.74) } }, { { SIMDE_FLOAT32_C( 2.92), SIMDE_FLOAT32_C( 2.52), SIMDE_FLOAT32_C( -1.27), SIMDE_FLOAT32_C( -0.09) }, { SIMDE_FLOAT32_C( 18.54), SIMDE_FLOAT32_C( 12.43), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 0.91) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_exp_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_exp_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -2.66), SIMDE_FLOAT64_C( -2.80) }, { SIMDE_FLOAT64_C( 0.07), SIMDE_FLOAT64_C( 0.06) } }, { { SIMDE_FLOAT64_C( -3.89), SIMDE_FLOAT64_C( -1.37) }, { SIMDE_FLOAT64_C( 0.02), SIMDE_FLOAT64_C( 0.25) } }, { { SIMDE_FLOAT64_C( 0.22), SIMDE_FLOAT64_C( -2.64) }, { SIMDE_FLOAT64_C( 1.25), SIMDE_FLOAT64_C( 0.07) } }, { { SIMDE_FLOAT64_C( -3.57), SIMDE_FLOAT64_C( -2.12) }, { SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.12) } }, { { SIMDE_FLOAT64_C( 1.63), SIMDE_FLOAT64_C( 1.90) }, { SIMDE_FLOAT64_C( 5.10), SIMDE_FLOAT64_C( 6.69) } }, { { SIMDE_FLOAT64_C( -3.29), SIMDE_FLOAT64_C( 2.38) }, { SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 10.80) } }, { { SIMDE_FLOAT64_C( 2.98), SIMDE_FLOAT64_C( -3.59) }, { SIMDE_FLOAT64_C( 19.69), SIMDE_FLOAT64_C( 0.03) } }, { { SIMDE_FLOAT64_C( 1.60), SIMDE_FLOAT64_C( 3.03) }, { SIMDE_FLOAT64_C( 4.95), SIMDE_FLOAT64_C( 20.70) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d r = simde_mm_exp_pd(a); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_exp_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( -0.06), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( 3.31), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( 1.48), SIMDE_FLOAT32_C( -1.86), SIMDE_FLOAT32_C( -0.07) }, { SIMDE_FLOAT32_C( 1.88), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( 27.39), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 4.39), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.93) } }, { { SIMDE_FLOAT32_C( -0.89), SIMDE_FLOAT32_C( -0.25), SIMDE_FLOAT32_C( 3.55), SIMDE_FLOAT32_C( 2.79), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( -2.09), SIMDE_FLOAT32_C( 1.92), SIMDE_FLOAT32_C( -2.83) }, { SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 34.81), SIMDE_FLOAT32_C( 16.28), SIMDE_FLOAT32_C( 1.84), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 6.82), SIMDE_FLOAT32_C( 0.06) } }, { { SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( 3.78), SIMDE_FLOAT32_C( 1.32), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 3.60), SIMDE_FLOAT32_C( 3.24), SIMDE_FLOAT32_C( -3.08), SIMDE_FLOAT32_C( 1.67) }, { SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( 43.82), SIMDE_FLOAT32_C( 3.74), SIMDE_FLOAT32_C( 1.51), SIMDE_FLOAT32_C( 36.60), SIMDE_FLOAT32_C( 25.53), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 5.31) } }, { { SIMDE_FLOAT32_C( 3.42), SIMDE_FLOAT32_C( -0.32), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( -0.47), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( -3.11), SIMDE_FLOAT32_C( -2.54), SIMDE_FLOAT32_C( -2.91) }, { SIMDE_FLOAT32_C( 30.57), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 1.86), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( 1.60), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.05) } }, { { SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( -3.60), SIMDE_FLOAT32_C( -3.49), SIMDE_FLOAT32_C( -1.85), SIMDE_FLOAT32_C( -1.46), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( 1.26) }, { SIMDE_FLOAT32_C( 2.29), SIMDE_FLOAT32_C( 1.95), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 1.54), SIMDE_FLOAT32_C( 3.53) } }, { { SIMDE_FLOAT32_C( 2.29), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( -1.11), SIMDE_FLOAT32_C( 1.89), SIMDE_FLOAT32_C( -2.02), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( -2.75) }, { SIMDE_FLOAT32_C( 9.87), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 1.05), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 6.62), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 1.07), SIMDE_FLOAT32_C( 0.06) } }, { { SIMDE_FLOAT32_C( -2.25), SIMDE_FLOAT32_C( -2.61), SIMDE_FLOAT32_C( 1.66), SIMDE_FLOAT32_C( -2.65), SIMDE_FLOAT32_C( -3.37), SIMDE_FLOAT32_C( 2.59), SIMDE_FLOAT32_C( 3.02), SIMDE_FLOAT32_C( -3.95) }, { SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 5.26), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 13.33), SIMDE_FLOAT32_C( 20.49), SIMDE_FLOAT32_C( 0.02) } }, { { SIMDE_FLOAT32_C( -1.74), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 2.73), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( 3.82), SIMDE_FLOAT32_C( -2.64) }, { SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 15.33), SIMDE_FLOAT32_C( 1.70), SIMDE_FLOAT32_C( 2.83), SIMDE_FLOAT32_C( 45.60), SIMDE_FLOAT32_C( 0.07) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_exp_ps(a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_exp_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( 3.29), SIMDE_FLOAT64_C( -2.77), SIMDE_FLOAT64_C( 3.69), SIMDE_FLOAT64_C( -0.61) }, { SIMDE_FLOAT64_C( 26.84), SIMDE_FLOAT64_C( 0.06), SIMDE_FLOAT64_C( 40.04), SIMDE_FLOAT64_C( 0.54) } }, { { SIMDE_FLOAT64_C( -1.69), SIMDE_FLOAT64_C( 0.88), SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 1.60) }, { SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( 2.41), SIMDE_FLOAT64_C( 2.29), SIMDE_FLOAT64_C( 4.95) } }, { { SIMDE_FLOAT64_C( -2.30), SIMDE_FLOAT64_C( 2.39), SIMDE_FLOAT64_C( -1.55), SIMDE_FLOAT64_C( -3.39) }, { SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( 10.91), SIMDE_FLOAT64_C( 0.21), SIMDE_FLOAT64_C( 0.03) } }, { { SIMDE_FLOAT64_C( 3.91), SIMDE_FLOAT64_C( -3.26), SIMDE_FLOAT64_C( -0.58), SIMDE_FLOAT64_C( -1.96) }, { SIMDE_FLOAT64_C( 49.90), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( 0.14) } }, { { SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 2.77), SIMDE_FLOAT64_C( -1.45), SIMDE_FLOAT64_C( -1.25) }, { SIMDE_FLOAT64_C( 1.08), SIMDE_FLOAT64_C( 15.96), SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 0.29) } }, { { SIMDE_FLOAT64_C( -1.13), SIMDE_FLOAT64_C( 2.76), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( 2.44) }, { SIMDE_FLOAT64_C( 0.32), SIMDE_FLOAT64_C( 15.80), SIMDE_FLOAT64_C( 2.69), SIMDE_FLOAT64_C( 11.47) } }, { { SIMDE_FLOAT64_C( -1.89), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( 0.54), SIMDE_FLOAT64_C( 0.58) }, { SIMDE_FLOAT64_C( 0.15), SIMDE_FLOAT64_C( 1.01), SIMDE_FLOAT64_C( 1.72), SIMDE_FLOAT64_C( 1.79) } }, { { SIMDE_FLOAT64_C( 1.40), SIMDE_FLOAT64_C( -0.80), SIMDE_FLOAT64_C( 1.70), SIMDE_FLOAT64_C( 0.69) }, { SIMDE_FLOAT64_C( 4.06), SIMDE_FLOAT64_C( 0.45), SIMDE_FLOAT64_C( 5.47), SIMDE_FLOAT64_C( 1.99) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d r = simde_mm256_exp_pd(a); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_exp_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 1.28), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -2.09), SIMDE_FLOAT32_C( -2.90), SIMDE_FLOAT32_C( -0.56), SIMDE_FLOAT32_C( 3.88), SIMDE_FLOAT32_C( -2.98), SIMDE_FLOAT32_C( -3.94), SIMDE_FLOAT32_C( -1.92), SIMDE_FLOAT32_C( 2.94), SIMDE_FLOAT32_C( 1.88), SIMDE_FLOAT32_C( -0.28), SIMDE_FLOAT32_C( 2.19), SIMDE_FLOAT32_C( -2.64), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( 3.50) }, { SIMDE_FLOAT32_C( 3.60), SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 48.42), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( 18.92), SIMDE_FLOAT32_C( 6.55), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 8.94), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( 33.12) } }, { { SIMDE_FLOAT32_C( 3.07), SIMDE_FLOAT32_C( 2.28), SIMDE_FLOAT32_C( -3.90), SIMDE_FLOAT32_C( 1.18), SIMDE_FLOAT32_C( -0.59), SIMDE_FLOAT32_C( -1.46), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( 2.83), SIMDE_FLOAT32_C( -0.05), SIMDE_FLOAT32_C( -2.52), SIMDE_FLOAT32_C( 2.98), SIMDE_FLOAT32_C( 1.96), SIMDE_FLOAT32_C( -2.99), SIMDE_FLOAT32_C( -2.11), SIMDE_FLOAT32_C( 1.16), SIMDE_FLOAT32_C( 2.29) }, { SIMDE_FLOAT32_C( 21.54), SIMDE_FLOAT32_C( 9.78), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 3.25), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 16.95), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 19.69), SIMDE_FLOAT32_C( 7.10), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 3.19), SIMDE_FLOAT32_C( 9.87) } }, { { SIMDE_FLOAT32_C( 1.94), SIMDE_FLOAT32_C( 3.07), SIMDE_FLOAT32_C( 3.39), SIMDE_FLOAT32_C( -2.62), SIMDE_FLOAT32_C( 2.95), SIMDE_FLOAT32_C( -3.59), SIMDE_FLOAT32_C( -2.56), SIMDE_FLOAT32_C( -2.97), SIMDE_FLOAT32_C( 3.35), SIMDE_FLOAT32_C( 3.33), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 1.54), SIMDE_FLOAT32_C( -3.32), SIMDE_FLOAT32_C( -3.62), SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( 3.75) }, { SIMDE_FLOAT32_C( 6.96), SIMDE_FLOAT32_C( 21.54), SIMDE_FLOAT32_C( 29.67), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 19.11), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 28.50), SIMDE_FLOAT32_C( 27.94), SIMDE_FLOAT32_C( 2.12), SIMDE_FLOAT32_C( 4.66), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 2.83), SIMDE_FLOAT32_C( 42.52) } }, { { SIMDE_FLOAT32_C( 2.66), SIMDE_FLOAT32_C( 1.14), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( -1.93), SIMDE_FLOAT32_C( 3.69), SIMDE_FLOAT32_C( -3.45), SIMDE_FLOAT32_C( -3.09), SIMDE_FLOAT32_C( -0.37), SIMDE_FLOAT32_C( -1.97), SIMDE_FLOAT32_C( 3.89), SIMDE_FLOAT32_C( -2.41), SIMDE_FLOAT32_C( -0.96), SIMDE_FLOAT32_C( -2.21), SIMDE_FLOAT32_C( 2.75), SIMDE_FLOAT32_C( -2.67), SIMDE_FLOAT32_C( 3.72) }, { SIMDE_FLOAT32_C( 14.30), SIMDE_FLOAT32_C( 3.13), SIMDE_FLOAT32_C( 2.56), SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( 40.04), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 48.91), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 15.64), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 41.26) } }, { { SIMDE_FLOAT32_C( 1.82), SIMDE_FLOAT32_C( -3.27), SIMDE_FLOAT32_C( -2.90), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( -2.86), SIMDE_FLOAT32_C( -1.45), SIMDE_FLOAT32_C( 1.79), SIMDE_FLOAT32_C( -3.51), SIMDE_FLOAT32_C( -2.13), SIMDE_FLOAT32_C( -1.46), SIMDE_FLOAT32_C( 2.03), SIMDE_FLOAT32_C( -1.45), SIMDE_FLOAT32_C( -1.08), SIMDE_FLOAT32_C( -0.93), SIMDE_FLOAT32_C( -1.69), SIMDE_FLOAT32_C( -2.41) }, { SIMDE_FLOAT32_C( 6.17), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 2.16), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 5.99), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 7.61), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( 0.09) } }, { { SIMDE_FLOAT32_C( -3.79), SIMDE_FLOAT32_C( 3.24), SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( 3.90), SIMDE_FLOAT32_C( 3.80), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( -0.46), SIMDE_FLOAT32_C( -2.17), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( 1.13), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( 2.24), SIMDE_FLOAT32_C( -0.12), SIMDE_FLOAT32_C( 2.20), SIMDE_FLOAT32_C( 1.96), SIMDE_FLOAT32_C( -2.30) }, { SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 25.53), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( 49.40), SIMDE_FLOAT32_C( 44.70), SIMDE_FLOAT32_C( 1.75), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 3.10), SIMDE_FLOAT32_C( 2.39), SIMDE_FLOAT32_C( 9.39), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 9.03), SIMDE_FLOAT32_C( 7.10), SIMDE_FLOAT32_C( 0.10) } }, { { SIMDE_FLOAT32_C( 2.93), SIMDE_FLOAT32_C( 3.07), SIMDE_FLOAT32_C( 2.46), SIMDE_FLOAT32_C( -3.93), SIMDE_FLOAT32_C( -2.39), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -3.44), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( 2.80), SIMDE_FLOAT32_C( 2.60), SIMDE_FLOAT32_C( 2.04), SIMDE_FLOAT32_C( -2.28), SIMDE_FLOAT32_C( -2.33), SIMDE_FLOAT32_C( -3.65), SIMDE_FLOAT32_C( -0.70), SIMDE_FLOAT32_C( -2.12) }, { SIMDE_FLOAT32_C( 18.73), SIMDE_FLOAT32_C( 21.54), SIMDE_FLOAT32_C( 11.70), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 1.30), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 16.44), SIMDE_FLOAT32_C( 13.46), SIMDE_FLOAT32_C( 7.69), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 0.12) } }, { { SIMDE_FLOAT32_C( 3.59), SIMDE_FLOAT32_C( 2.96), SIMDE_FLOAT32_C( -2.22), SIMDE_FLOAT32_C( 3.39), SIMDE_FLOAT32_C( -0.48), SIMDE_FLOAT32_C( 1.32), SIMDE_FLOAT32_C( -2.78), SIMDE_FLOAT32_C( 3.98), SIMDE_FLOAT32_C( -1.55), SIMDE_FLOAT32_C( 2.09), SIMDE_FLOAT32_C( 2.22), SIMDE_FLOAT32_C( 2.33), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( -3.98), SIMDE_FLOAT32_C( -0.78) }, { SIMDE_FLOAT32_C( 36.23), SIMDE_FLOAT32_C( 19.30), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 29.67), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 3.74), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 53.52), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 8.08), SIMDE_FLOAT32_C( 9.21), SIMDE_FLOAT32_C( 10.28), SIMDE_FLOAT32_C( 1.34), SIMDE_FLOAT32_C( 1.20), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 0.46) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_exp_ps(a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_exp_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[16]; const simde__mmask8 k; const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( -0.72), SIMDE_FLOAT32_C( 3.92), SIMDE_FLOAT32_C( -2.75), SIMDE_FLOAT32_C( 3.98), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 1.25), SIMDE_FLOAT32_C( 1.89), SIMDE_FLOAT32_C( 2.66), SIMDE_FLOAT32_C( 1.91), SIMDE_FLOAT32_C( 3.79), SIMDE_FLOAT32_C( 2.42), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 2.11), SIMDE_FLOAT32_C( -1.61), SIMDE_FLOAT32_C( 3.84), SIMDE_FLOAT32_C( 3.46) }, UINT8_C( 98), { SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( 2.14), SIMDE_FLOAT32_C( 1.44), SIMDE_FLOAT32_C( -0.22), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 3.48), SIMDE_FLOAT32_C( -2.98), SIMDE_FLOAT32_C( -3.66), SIMDE_FLOAT32_C( -2.38), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( 1.62), SIMDE_FLOAT32_C( -3.73), SIMDE_FLOAT32_C( 3.17), SIMDE_FLOAT32_C( -2.33), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 3.09) }, { SIMDE_FLOAT32_C( -0.72), SIMDE_FLOAT32_C( 8.50), SIMDE_FLOAT32_C( -2.75), SIMDE_FLOAT32_C( 3.98), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 32.46), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 2.66), SIMDE_FLOAT32_C( 1.91), SIMDE_FLOAT32_C( 3.79), SIMDE_FLOAT32_C( 2.42), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 2.11), SIMDE_FLOAT32_C( -1.61), SIMDE_FLOAT32_C( 3.84), SIMDE_FLOAT32_C( 3.46) } }, { { SIMDE_FLOAT32_C( -1.08), SIMDE_FLOAT32_C( -0.47), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -3.83), SIMDE_FLOAT32_C( -2.58), SIMDE_FLOAT32_C( -1.71), SIMDE_FLOAT32_C( 2.08), SIMDE_FLOAT32_C( -2.80), SIMDE_FLOAT32_C( -3.29), SIMDE_FLOAT32_C( -1.38), SIMDE_FLOAT32_C( 3.32), SIMDE_FLOAT32_C( -0.90), SIMDE_FLOAT32_C( -1.54), SIMDE_FLOAT32_C( 2.77), SIMDE_FLOAT32_C( 1.45), SIMDE_FLOAT32_C( 2.08) }, UINT8_C(254), { SIMDE_FLOAT32_C( -1.11), SIMDE_FLOAT32_C( -2.14), SIMDE_FLOAT32_C( -2.35), SIMDE_FLOAT32_C( -1.63), SIMDE_FLOAT32_C( -1.11), SIMDE_FLOAT32_C( -2.01), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 3.59), SIMDE_FLOAT32_C( 3.61), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( 2.76), SIMDE_FLOAT32_C( -2.73), SIMDE_FLOAT32_C( 3.81), SIMDE_FLOAT32_C( 1.85), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( -0.66) }, { SIMDE_FLOAT32_C( -1.08), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 36.23), SIMDE_FLOAT32_C( -3.29), SIMDE_FLOAT32_C( -1.38), SIMDE_FLOAT32_C( 3.32), SIMDE_FLOAT32_C( -0.90), SIMDE_FLOAT32_C( -1.54), SIMDE_FLOAT32_C( 2.77), SIMDE_FLOAT32_C( 1.45), SIMDE_FLOAT32_C( 2.08) } }, { { SIMDE_FLOAT32_C( -2.52), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( -0.24), SIMDE_FLOAT32_C( -1.56), SIMDE_FLOAT32_C( 1.95), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 1.06), SIMDE_FLOAT32_C( 1.27), SIMDE_FLOAT32_C( 3.57), SIMDE_FLOAT32_C( 3.52), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 1.02), SIMDE_FLOAT32_C( 1.60), SIMDE_FLOAT32_C( -3.04), SIMDE_FLOAT32_C( 3.91) }, UINT8_C(140), { SIMDE_FLOAT32_C( -1.39), SIMDE_FLOAT32_C( -1.72), SIMDE_FLOAT32_C( -1.65), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 2.27), SIMDE_FLOAT32_C( -2.06), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( -1.47), SIMDE_FLOAT32_C( -3.30), SIMDE_FLOAT32_C( 1.48), SIMDE_FLOAT32_C( -1.67), SIMDE_FLOAT32_C( 2.55), SIMDE_FLOAT32_C( -2.33), SIMDE_FLOAT32_C( 1.67), SIMDE_FLOAT32_C( -3.97), SIMDE_FLOAT32_C( 2.04) }, { SIMDE_FLOAT32_C( -2.52), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 1.80), SIMDE_FLOAT32_C( -1.56), SIMDE_FLOAT32_C( 1.95), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 1.27), SIMDE_FLOAT32_C( 3.57), SIMDE_FLOAT32_C( 3.52), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 1.02), SIMDE_FLOAT32_C( 1.60), SIMDE_FLOAT32_C( -3.04), SIMDE_FLOAT32_C( 3.91) } }, { { SIMDE_FLOAT32_C( -1.58), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -3.52), SIMDE_FLOAT32_C( -3.63), SIMDE_FLOAT32_C( -3.74), SIMDE_FLOAT32_C( 1.54), SIMDE_FLOAT32_C( 1.64), SIMDE_FLOAT32_C( 3.83), SIMDE_FLOAT32_C( 1.07), SIMDE_FLOAT32_C( -2.32), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( -1.33), SIMDE_FLOAT32_C( -1.36), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( -1.87), SIMDE_FLOAT32_C( 1.25) }, UINT8_C(221), { SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( -2.16), SIMDE_FLOAT32_C( 1.30), SIMDE_FLOAT32_C( 2.41), SIMDE_FLOAT32_C( 2.04), SIMDE_FLOAT32_C( 3.83), SIMDE_FLOAT32_C( 3.11), SIMDE_FLOAT32_C( -0.48), SIMDE_FLOAT32_C( -1.84), SIMDE_FLOAT32_C( 1.66), SIMDE_FLOAT32_C( 1.19), SIMDE_FLOAT32_C( 3.83), SIMDE_FLOAT32_C( 1.69), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -1.75), SIMDE_FLOAT32_C( -2.52) }, { SIMDE_FLOAT32_C( 1.60), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( 3.67), SIMDE_FLOAT32_C( 11.13), SIMDE_FLOAT32_C( 7.69), SIMDE_FLOAT32_C( 1.54), SIMDE_FLOAT32_C( 22.42), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 1.07), SIMDE_FLOAT32_C( -2.32), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( -1.33), SIMDE_FLOAT32_C( -1.36), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( -1.87), SIMDE_FLOAT32_C( 1.25) } }, { { SIMDE_FLOAT32_C( -0.29), SIMDE_FLOAT32_C( -1.37), SIMDE_FLOAT32_C( -2.26), SIMDE_FLOAT32_C( -2.75), SIMDE_FLOAT32_C( -3.73), SIMDE_FLOAT32_C( -2.43), SIMDE_FLOAT32_C( 2.32), SIMDE_FLOAT32_C( -2.05), SIMDE_FLOAT32_C( 2.41), SIMDE_FLOAT32_C( -3.02), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( -0.83), SIMDE_FLOAT32_C( -0.89), SIMDE_FLOAT32_C( -2.16), SIMDE_FLOAT32_C( -1.80), SIMDE_FLOAT32_C( 3.58) }, UINT8_C(165), { SIMDE_FLOAT32_C( 3.50), SIMDE_FLOAT32_C( 1.99), SIMDE_FLOAT32_C( -2.28), SIMDE_FLOAT32_C( 3.33), SIMDE_FLOAT32_C( 1.10), SIMDE_FLOAT32_C( 1.24), SIMDE_FLOAT32_C( -2.51), SIMDE_FLOAT32_C( -1.23), SIMDE_FLOAT32_C( -1.56), SIMDE_FLOAT32_C( -2.68), SIMDE_FLOAT32_C( -3.54), SIMDE_FLOAT32_C( 1.66), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( -2.06), SIMDE_FLOAT32_C( -2.63), SIMDE_FLOAT32_C( 2.20) }, { SIMDE_FLOAT32_C( 33.12), SIMDE_FLOAT32_C( -1.37), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( -2.75), SIMDE_FLOAT32_C( -3.73), SIMDE_FLOAT32_C( 3.46), SIMDE_FLOAT32_C( 2.32), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 2.41), SIMDE_FLOAT32_C( -3.02), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( -0.83), SIMDE_FLOAT32_C( -0.89), SIMDE_FLOAT32_C( -2.16), SIMDE_FLOAT32_C( -1.80), SIMDE_FLOAT32_C( 3.58) } }, { { SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( -1.38), SIMDE_FLOAT32_C( 2.46), SIMDE_FLOAT32_C( 1.24), SIMDE_FLOAT32_C( -3.06), SIMDE_FLOAT32_C( -3.58), SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( -2.07), SIMDE_FLOAT32_C( 1.01), SIMDE_FLOAT32_C( 2.83), SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( 2.85), SIMDE_FLOAT32_C( -2.97), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( -1.47), SIMDE_FLOAT32_C( -3.47) }, UINT8_C(155), { SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( 3.85), SIMDE_FLOAT32_C( 3.72), SIMDE_FLOAT32_C( -2.50), SIMDE_FLOAT32_C( -2.66), SIMDE_FLOAT32_C( -1.52), SIMDE_FLOAT32_C( -0.06), SIMDE_FLOAT32_C( -1.34), SIMDE_FLOAT32_C( -1.06), SIMDE_FLOAT32_C( -2.40), SIMDE_FLOAT32_C( 2.23), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( -1.03), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( -3.45), SIMDE_FLOAT32_C( 1.60) }, { SIMDE_FLOAT32_C( 1.30), SIMDE_FLOAT32_C( 46.99), SIMDE_FLOAT32_C( 2.46), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( -3.58), SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( 1.01), SIMDE_FLOAT32_C( 2.83), SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( 2.85), SIMDE_FLOAT32_C( -2.97), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( -1.47), SIMDE_FLOAT32_C( -3.47) } }, { { SIMDE_FLOAT32_C( -1.11), SIMDE_FLOAT32_C( 1.79), SIMDE_FLOAT32_C( 2.54), SIMDE_FLOAT32_C( -0.69), SIMDE_FLOAT32_C( -2.55), SIMDE_FLOAT32_C( -3.53), SIMDE_FLOAT32_C( -3.68), SIMDE_FLOAT32_C( -3.72), SIMDE_FLOAT32_C( 1.50), SIMDE_FLOAT32_C( 3.17), SIMDE_FLOAT32_C( -2.70), SIMDE_FLOAT32_C( -1.88), SIMDE_FLOAT32_C( -2.30), SIMDE_FLOAT32_C( -2.17), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 1.96) }, UINT8_C(151), { SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( 3.47), SIMDE_FLOAT32_C( -0.97), SIMDE_FLOAT32_C( 2.93), SIMDE_FLOAT32_C( -0.59), SIMDE_FLOAT32_C( 1.68), SIMDE_FLOAT32_C( -2.13), SIMDE_FLOAT32_C( 1.01), SIMDE_FLOAT32_C( -0.09), SIMDE_FLOAT32_C( 2.75), SIMDE_FLOAT32_C( 3.99), SIMDE_FLOAT32_C( -3.67), SIMDE_FLOAT32_C( 3.30), SIMDE_FLOAT32_C( 1.59), SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( 1.09) }, { SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 32.14), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( -0.69), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( -3.53), SIMDE_FLOAT32_C( -3.68), SIMDE_FLOAT32_C( 2.75), SIMDE_FLOAT32_C( 1.50), SIMDE_FLOAT32_C( 3.17), SIMDE_FLOAT32_C( -2.70), SIMDE_FLOAT32_C( -1.88), SIMDE_FLOAT32_C( -2.30), SIMDE_FLOAT32_C( -2.17), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 1.96) } }, { { SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 2.53), SIMDE_FLOAT32_C( 2.54), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 2.84), SIMDE_FLOAT32_C( 2.82), SIMDE_FLOAT32_C( -1.91), SIMDE_FLOAT32_C( 2.01), SIMDE_FLOAT32_C( -3.88), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 3.72), SIMDE_FLOAT32_C( -2.04), SIMDE_FLOAT32_C( -3.05), SIMDE_FLOAT32_C( 1.68), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( 1.40) }, UINT8_C( 24), { SIMDE_FLOAT32_C( 2.67), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -3.44), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 2.21), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -3.74), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 1.56), SIMDE_FLOAT32_C( -3.40), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( -2.65), SIMDE_FLOAT32_C( 3.27), SIMDE_FLOAT32_C( -1.65) }, { SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 2.53), SIMDE_FLOAT32_C( 2.54), SIMDE_FLOAT32_C( 1.42), SIMDE_FLOAT32_C( 9.12), SIMDE_FLOAT32_C( 2.82), SIMDE_FLOAT32_C( -1.91), SIMDE_FLOAT32_C( 2.01), SIMDE_FLOAT32_C( -3.88), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 3.72), SIMDE_FLOAT32_C( -2.04), SIMDE_FLOAT32_C( -3.05), SIMDE_FLOAT32_C( 1.68), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( 1.40) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_mask_exp_ps(src, test_vec[i].k, a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_exp_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 3.06), SIMDE_FLOAT64_C( -0.78), SIMDE_FLOAT64_C( 1.53), SIMDE_FLOAT64_C( 2.94), SIMDE_FLOAT64_C( -3.88), SIMDE_FLOAT64_C( 3.46), SIMDE_FLOAT64_C( 1.02), SIMDE_FLOAT64_C( -3.05) }, { SIMDE_FLOAT64_C( 21.33), SIMDE_FLOAT64_C( 0.46), SIMDE_FLOAT64_C( 4.62), SIMDE_FLOAT64_C( 18.92), SIMDE_FLOAT64_C( 0.02), SIMDE_FLOAT64_C( 31.82), SIMDE_FLOAT64_C( 2.77), SIMDE_FLOAT64_C( 0.05) } }, { { SIMDE_FLOAT64_C( 1.99), SIMDE_FLOAT64_C( -3.10), SIMDE_FLOAT64_C( 1.58), SIMDE_FLOAT64_C( 2.87), SIMDE_FLOAT64_C( -2.25), SIMDE_FLOAT64_C( -0.61), SIMDE_FLOAT64_C( -0.12), SIMDE_FLOAT64_C( 3.71) }, { SIMDE_FLOAT64_C( 7.32), SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( 4.85), SIMDE_FLOAT64_C( 17.64), SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( 0.54), SIMDE_FLOAT64_C( 0.89), SIMDE_FLOAT64_C( 40.85) } }, { { SIMDE_FLOAT64_C( -3.09), SIMDE_FLOAT64_C( 1.38), SIMDE_FLOAT64_C( -1.35), SIMDE_FLOAT64_C( -3.35), SIMDE_FLOAT64_C( 2.49), SIMDE_FLOAT64_C( -1.09), SIMDE_FLOAT64_C( -3.89), SIMDE_FLOAT64_C( 0.92) }, { SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( 3.97), SIMDE_FLOAT64_C( 0.26), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 12.06), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( 0.02), SIMDE_FLOAT64_C( 2.51) } }, { { SIMDE_FLOAT64_C( -1.13), SIMDE_FLOAT64_C( -1.04), SIMDE_FLOAT64_C( -0.11), SIMDE_FLOAT64_C( 0.37), SIMDE_FLOAT64_C( 1.47), SIMDE_FLOAT64_C( -3.30), SIMDE_FLOAT64_C( -0.41), SIMDE_FLOAT64_C( 0.53) }, { SIMDE_FLOAT64_C( 0.32), SIMDE_FLOAT64_C( 0.35), SIMDE_FLOAT64_C( 0.90), SIMDE_FLOAT64_C( 1.45), SIMDE_FLOAT64_C( 4.35), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.66), SIMDE_FLOAT64_C( 1.70) } }, { { SIMDE_FLOAT64_C( -0.08), SIMDE_FLOAT64_C( -2.87), SIMDE_FLOAT64_C( -0.54), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( -3.41), SIMDE_FLOAT64_C( -3.51), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( 2.57) }, { SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( 0.06), SIMDE_FLOAT64_C( 0.58), SIMDE_FLOAT64_C( 1.04), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 2.69), SIMDE_FLOAT64_C( 13.07) } }, { { SIMDE_FLOAT64_C( -2.62), SIMDE_FLOAT64_C( -1.43), SIMDE_FLOAT64_C( 1.44), SIMDE_FLOAT64_C( -0.87), SIMDE_FLOAT64_C( 1.96), SIMDE_FLOAT64_C( -2.68), SIMDE_FLOAT64_C( -1.16), SIMDE_FLOAT64_C( 2.87) }, { SIMDE_FLOAT64_C( 0.07), SIMDE_FLOAT64_C( 0.24), SIMDE_FLOAT64_C( 4.22), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( 7.10), SIMDE_FLOAT64_C( 0.07), SIMDE_FLOAT64_C( 0.31), SIMDE_FLOAT64_C( 17.64) } }, { { SIMDE_FLOAT64_C( 2.70), SIMDE_FLOAT64_C( 1.49), SIMDE_FLOAT64_C( 3.52), SIMDE_FLOAT64_C( 1.19), SIMDE_FLOAT64_C( -3.59), SIMDE_FLOAT64_C( 3.63), SIMDE_FLOAT64_C( -1.89), SIMDE_FLOAT64_C( -0.72) }, { SIMDE_FLOAT64_C( 14.88), SIMDE_FLOAT64_C( 4.44), SIMDE_FLOAT64_C( 33.78), SIMDE_FLOAT64_C( 3.29), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 37.71), SIMDE_FLOAT64_C( 0.15), SIMDE_FLOAT64_C( 0.49) } }, { { SIMDE_FLOAT64_C( -1.41), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 3.65), SIMDE_FLOAT64_C( -3.94), SIMDE_FLOAT64_C( 2.70), SIMDE_FLOAT64_C( -0.76), SIMDE_FLOAT64_C( 0.59), SIMDE_FLOAT64_C( -1.37) }, { SIMDE_FLOAT64_C( 0.24), SIMDE_FLOAT64_C( 7.39), SIMDE_FLOAT64_C( 38.47), SIMDE_FLOAT64_C( 0.02), SIMDE_FLOAT64_C( 14.88), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( 1.80), SIMDE_FLOAT64_C( 0.25) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_exp_pd(a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_exp_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( -3.51), SIMDE_FLOAT64_C( -3.98), SIMDE_FLOAT64_C( 3.54), SIMDE_FLOAT64_C( -1.79), SIMDE_FLOAT64_C( -1.83), SIMDE_FLOAT64_C( -3.73), SIMDE_FLOAT64_C( -3.51), SIMDE_FLOAT64_C( 3.71) }, UINT8_C(199), { SIMDE_FLOAT64_C( 2.33), SIMDE_FLOAT64_C( -1.17), SIMDE_FLOAT64_C( -1.77), SIMDE_FLOAT64_C( -2.21), SIMDE_FLOAT64_C( 2.46), SIMDE_FLOAT64_C( 1.54), SIMDE_FLOAT64_C( -1.07), SIMDE_FLOAT64_C( -0.25) }, { SIMDE_FLOAT64_C( 10.28), SIMDE_FLOAT64_C( 0.31), SIMDE_FLOAT64_C( 0.17), SIMDE_FLOAT64_C( -1.79), SIMDE_FLOAT64_C( -1.83), SIMDE_FLOAT64_C( -3.73), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( 0.78) } }, { { SIMDE_FLOAT64_C( -2.63), SIMDE_FLOAT64_C( 1.07), SIMDE_FLOAT64_C( -1.37), SIMDE_FLOAT64_C( -0.96), SIMDE_FLOAT64_C( -3.82), SIMDE_FLOAT64_C( -0.01), SIMDE_FLOAT64_C( -2.76), SIMDE_FLOAT64_C( -2.64) }, UINT8_C(126), { SIMDE_FLOAT64_C( 0.87), SIMDE_FLOAT64_C( 0.29), SIMDE_FLOAT64_C( 1.08), SIMDE_FLOAT64_C( 2.46), SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 1.56), SIMDE_FLOAT64_C( 2.48), SIMDE_FLOAT64_C( -0.72) }, { SIMDE_FLOAT64_C( -2.63), SIMDE_FLOAT64_C( 1.34), SIMDE_FLOAT64_C( 2.94), SIMDE_FLOAT64_C( 11.70), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 4.76), SIMDE_FLOAT64_C( 11.94), SIMDE_FLOAT64_C( -2.64) } }, { { SIMDE_FLOAT64_C( 3.77), SIMDE_FLOAT64_C( -3.36), SIMDE_FLOAT64_C( -0.46), SIMDE_FLOAT64_C( -3.74), SIMDE_FLOAT64_C( -3.65), SIMDE_FLOAT64_C( 1.21), SIMDE_FLOAT64_C( 2.59), SIMDE_FLOAT64_C( -0.82) }, UINT8_C( 39), { SIMDE_FLOAT64_C( -3.62), SIMDE_FLOAT64_C( -2.35), SIMDE_FLOAT64_C( 0.98), SIMDE_FLOAT64_C( -0.70), SIMDE_FLOAT64_C( 1.40), SIMDE_FLOAT64_C( 2.35), SIMDE_FLOAT64_C( -3.63), SIMDE_FLOAT64_C( -3.97) }, { SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( 2.66), SIMDE_FLOAT64_C( -3.74), SIMDE_FLOAT64_C( -3.65), SIMDE_FLOAT64_C( 10.49), SIMDE_FLOAT64_C( 2.59), SIMDE_FLOAT64_C( -0.82) } }, { { SIMDE_FLOAT64_C( -2.61), SIMDE_FLOAT64_C( -3.45), SIMDE_FLOAT64_C( 0.02), SIMDE_FLOAT64_C( -1.37), SIMDE_FLOAT64_C( -2.08), SIMDE_FLOAT64_C( -3.79), SIMDE_FLOAT64_C( 3.50), SIMDE_FLOAT64_C( 2.21) }, UINT8_C(165), { SIMDE_FLOAT64_C( 1.96), SIMDE_FLOAT64_C( -2.06), SIMDE_FLOAT64_C( -1.15), SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( 1.22), SIMDE_FLOAT64_C( -1.37), SIMDE_FLOAT64_C( 1.08), SIMDE_FLOAT64_C( -3.24) }, { SIMDE_FLOAT64_C( 7.10), SIMDE_FLOAT64_C( -3.45), SIMDE_FLOAT64_C( 0.32), SIMDE_FLOAT64_C( -1.37), SIMDE_FLOAT64_C( -2.08), SIMDE_FLOAT64_C( 0.25), SIMDE_FLOAT64_C( 3.50), SIMDE_FLOAT64_C( 0.04) } }, { { SIMDE_FLOAT64_C( -1.11), SIMDE_FLOAT64_C( 1.43), SIMDE_FLOAT64_C( 1.97), SIMDE_FLOAT64_C( -2.52), SIMDE_FLOAT64_C( -3.38), SIMDE_FLOAT64_C( 1.41), SIMDE_FLOAT64_C( -2.14), SIMDE_FLOAT64_C( -1.73) }, UINT8_C(202), { SIMDE_FLOAT64_C( 1.17), SIMDE_FLOAT64_C( 3.67), SIMDE_FLOAT64_C( -3.26), SIMDE_FLOAT64_C( 1.54), SIMDE_FLOAT64_C( 3.70), SIMDE_FLOAT64_C( -1.87), SIMDE_FLOAT64_C( 2.10), SIMDE_FLOAT64_C( -0.28) }, { SIMDE_FLOAT64_C( -1.11), SIMDE_FLOAT64_C( 39.25), SIMDE_FLOAT64_C( 1.97), SIMDE_FLOAT64_C( 4.66), SIMDE_FLOAT64_C( -3.38), SIMDE_FLOAT64_C( 1.41), SIMDE_FLOAT64_C( 8.17), SIMDE_FLOAT64_C( 0.76) } }, { { SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( -3.99), SIMDE_FLOAT64_C( -0.06), SIMDE_FLOAT64_C( 0.26), SIMDE_FLOAT64_C( 2.22), SIMDE_FLOAT64_C( -2.77), SIMDE_FLOAT64_C( -1.78), SIMDE_FLOAT64_C( -3.84) }, UINT8_C(172), { SIMDE_FLOAT64_C( 2.66), SIMDE_FLOAT64_C( 1.38), SIMDE_FLOAT64_C( 2.71), SIMDE_FLOAT64_C( -0.26), SIMDE_FLOAT64_C( 2.13), SIMDE_FLOAT64_C( -2.39), SIMDE_FLOAT64_C( -2.83), SIMDE_FLOAT64_C( 0.11) }, { SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( -3.99), SIMDE_FLOAT64_C( 15.03), SIMDE_FLOAT64_C( 0.77), SIMDE_FLOAT64_C( 2.22), SIMDE_FLOAT64_C( 0.09), SIMDE_FLOAT64_C( -1.78), SIMDE_FLOAT64_C( 1.12) } }, { { SIMDE_FLOAT64_C( -0.91), SIMDE_FLOAT64_C( -2.21), SIMDE_FLOAT64_C( -2.48), SIMDE_FLOAT64_C( 0.95), SIMDE_FLOAT64_C( 0.06), SIMDE_FLOAT64_C( -0.09), SIMDE_FLOAT64_C( -1.88), SIMDE_FLOAT64_C( -0.27) }, UINT8_C(244), { SIMDE_FLOAT64_C( 3.66), SIMDE_FLOAT64_C( -0.57), SIMDE_FLOAT64_C( 2.78), SIMDE_FLOAT64_C( 1.75), SIMDE_FLOAT64_C( 3.15), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( 1.76), SIMDE_FLOAT64_C( -0.91) }, { SIMDE_FLOAT64_C( -0.91), SIMDE_FLOAT64_C( -2.21), SIMDE_FLOAT64_C( 16.12), SIMDE_FLOAT64_C( 0.95), SIMDE_FLOAT64_C( 23.34), SIMDE_FLOAT64_C( 0.64), SIMDE_FLOAT64_C( 5.81), SIMDE_FLOAT64_C( 0.40) } }, { { SIMDE_FLOAT64_C( 3.81), SIMDE_FLOAT64_C( -0.02), SIMDE_FLOAT64_C( 0.32), SIMDE_FLOAT64_C( -1.97), SIMDE_FLOAT64_C( 0.14), SIMDE_FLOAT64_C( -3.60), SIMDE_FLOAT64_C( -3.30), SIMDE_FLOAT64_C( -2.48) }, UINT8_C(117), { SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( 3.65), SIMDE_FLOAT64_C( -3.28), SIMDE_FLOAT64_C( 1.61), SIMDE_FLOAT64_C( -0.24), SIMDE_FLOAT64_C( -0.20), SIMDE_FLOAT64_C( 3.40), SIMDE_FLOAT64_C( 1.27) }, { SIMDE_FLOAT64_C( 1.55), SIMDE_FLOAT64_C( -0.02), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( -1.97), SIMDE_FLOAT64_C( 0.79), SIMDE_FLOAT64_C( 0.82), SIMDE_FLOAT64_C( 29.96), SIMDE_FLOAT64_C( -2.48) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_mask_exp_pd(src, test_vec[i].k, a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_expm1_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 6.33), SIMDE_FLOAT32_C( 1.68), SIMDE_FLOAT32_C( 8.16), SIMDE_FLOAT32_C( 5.04) }, { SIMDE_FLOAT32_C( 560.16), SIMDE_FLOAT32_C( 4.37), SIMDE_FLOAT32_C( 3497.19), SIMDE_FLOAT32_C( 153.47) } }, { { SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 8.63), SIMDE_FLOAT32_C( 5.23), SIMDE_FLOAT32_C( 4.43) }, { SIMDE_FLOAT32_C( 1.36), SIMDE_FLOAT32_C( 5596.08), SIMDE_FLOAT32_C( 185.79), SIMDE_FLOAT32_C( 82.93) } }, { { SIMDE_FLOAT32_C( 7.85), SIMDE_FLOAT32_C( 6.92), SIMDE_FLOAT32_C( 2.77), SIMDE_FLOAT32_C( 5.34) }, { SIMDE_FLOAT32_C( 2564.73), SIMDE_FLOAT32_C( 1011.32), SIMDE_FLOAT32_C( 14.96), SIMDE_FLOAT32_C( 207.51) } }, { { SIMDE_FLOAT32_C( 6.60), SIMDE_FLOAT32_C( 1.06), SIMDE_FLOAT32_C( 2.02), SIMDE_FLOAT32_C( 0.13) }, { SIMDE_FLOAT32_C( 734.10), SIMDE_FLOAT32_C( 1.89), SIMDE_FLOAT32_C( 6.54), SIMDE_FLOAT32_C( 0.14) } }, { { SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 7.36), SIMDE_FLOAT32_C( 9.70), SIMDE_FLOAT32_C( 5.19) }, { SIMDE_FLOAT32_C( 19.09), SIMDE_FLOAT32_C( 1570.84), SIMDE_FLOAT32_C( 16316.60), SIMDE_FLOAT32_C( 178.47) } }, { { SIMDE_FLOAT32_C( 2.21), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 8.65), SIMDE_FLOAT32_C( 9.58) }, { SIMDE_FLOAT32_C( 8.12), SIMDE_FLOAT32_C( 1.08), SIMDE_FLOAT32_C( 5709.14), SIMDE_FLOAT32_C( 14471.42) } }, { { SIMDE_FLOAT32_C( 3.70), SIMDE_FLOAT32_C( 4.96), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 7.49) }, { SIMDE_FLOAT32_C( 39.45), SIMDE_FLOAT32_C( 141.59), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 1789.05) } }, { { SIMDE_FLOAT32_C( 7.91), SIMDE_FLOAT32_C( 1.05), SIMDE_FLOAT32_C( 4.16), SIMDE_FLOAT32_C( 4.24) }, { SIMDE_FLOAT32_C( 2723.39), SIMDE_FLOAT32_C( 1.86), SIMDE_FLOAT32_C( 63.07), SIMDE_FLOAT32_C( 68.41) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_expm1_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_expm1_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 0.24), SIMDE_FLOAT64_C( 7.18) }, { SIMDE_FLOAT64_C( 0.27), SIMDE_FLOAT64_C( 1311.91) } }, { { SIMDE_FLOAT64_C( 9.69), SIMDE_FLOAT64_C( 1.13) }, { SIMDE_FLOAT64_C( 16154.24), SIMDE_FLOAT64_C( 2.10) } }, { { SIMDE_FLOAT64_C( 6.24), SIMDE_FLOAT64_C( 8.67) }, { SIMDE_FLOAT64_C( 511.86), SIMDE_FLOAT64_C( 5824.50) } }, { { SIMDE_FLOAT64_C( 9.69), SIMDE_FLOAT64_C( 7.67) }, { SIMDE_FLOAT64_C( 16154.24), SIMDE_FLOAT64_C( 2142.08) } }, { { SIMDE_FLOAT64_C( 4.67), SIMDE_FLOAT64_C( 1.83) }, { SIMDE_FLOAT64_C( 105.70), SIMDE_FLOAT64_C( 5.23) } }, { { SIMDE_FLOAT64_C( 2.80), SIMDE_FLOAT64_C( 6.65) }, { SIMDE_FLOAT64_C( 15.44), SIMDE_FLOAT64_C( 771.78) } }, { { SIMDE_FLOAT64_C( 8.11), SIMDE_FLOAT64_C( 9.49) }, { SIMDE_FLOAT64_C( 3326.58), SIMDE_FLOAT64_C( 13225.80) } }, { { SIMDE_FLOAT64_C( 1.48), SIMDE_FLOAT64_C( 7.85) }, { SIMDE_FLOAT64_C( 3.39), SIMDE_FLOAT64_C( 2564.73) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d r = simde_mm_expm1_pd(a); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_expm1_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 8.24), SIMDE_FLOAT32_C( 2.23), SIMDE_FLOAT32_C( 3.81), SIMDE_FLOAT32_C( 4.38), SIMDE_FLOAT32_C( 3.85), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( 8.49), SIMDE_FLOAT32_C( 5.32) }, { SIMDE_FLOAT32_C( 3788.54), SIMDE_FLOAT32_C( 8.30), SIMDE_FLOAT32_C( 44.15), SIMDE_FLOAT32_C( 78.84), SIMDE_FLOAT32_C( 45.99), SIMDE_FLOAT32_C( 1.01), SIMDE_FLOAT32_C( 4864.86), SIMDE_FLOAT32_C( 203.38) } }, { { SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 4.59), SIMDE_FLOAT32_C( 9.56), SIMDE_FLOAT32_C( 9.67), SIMDE_FLOAT32_C( 3.42), SIMDE_FLOAT32_C( 3.74), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( 0.14) }, { SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 97.49), SIMDE_FLOAT32_C( 14184.85), SIMDE_FLOAT32_C( 15834.35), SIMDE_FLOAT32_C( 29.57), SIMDE_FLOAT32_C( 41.10), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( 0.15) } }, { { SIMDE_FLOAT32_C( 6.62), SIMDE_FLOAT32_C( 4.91), SIMDE_FLOAT32_C( 3.11), SIMDE_FLOAT32_C( 8.04), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 9.84), SIMDE_FLOAT32_C( 7.16), SIMDE_FLOAT32_C( 7.09) }, { SIMDE_FLOAT32_C( 748.95), SIMDE_FLOAT32_C( 134.64), SIMDE_FLOAT32_C( 21.42), SIMDE_FLOAT32_C( 3101.61), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 18768.72), SIMDE_FLOAT32_C( 1285.91), SIMDE_FLOAT32_C( 1198.91) } }, { { SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 1.93), SIMDE_FLOAT32_C( 9.95), SIMDE_FLOAT32_C( 7.75), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 3.96), SIMDE_FLOAT32_C( 4.01), SIMDE_FLOAT32_C( 9.02) }, { SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 5.89), SIMDE_FLOAT32_C( 20951.22), SIMDE_FLOAT32_C( 2320.57), SIMDE_FLOAT32_C( 1.20), SIMDE_FLOAT32_C( 51.46), SIMDE_FLOAT32_C( 54.15), SIMDE_FLOAT32_C( 8265.78) } }, { { SIMDE_FLOAT32_C( 6.19), SIMDE_FLOAT32_C( 7.82), SIMDE_FLOAT32_C( 3.40), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 8.52), SIMDE_FLOAT32_C( 1.89), SIMDE_FLOAT32_C( 5.37), SIMDE_FLOAT32_C( 9.06) }, { SIMDE_FLOAT32_C( 486.85), SIMDE_FLOAT32_C( 2488.91), SIMDE_FLOAT32_C( 28.96), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 5013.06), SIMDE_FLOAT32_C( 5.62), SIMDE_FLOAT32_C( 213.86), SIMDE_FLOAT32_C( 8603.15) } }, { { SIMDE_FLOAT32_C( 6.48), SIMDE_FLOAT32_C( 4.92), SIMDE_FLOAT32_C( 8.72), SIMDE_FLOAT32_C( 9.90), SIMDE_FLOAT32_C( 8.66), SIMDE_FLOAT32_C( 8.99), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 5.28) }, { SIMDE_FLOAT32_C( 650.97), SIMDE_FLOAT32_C( 136.00), SIMDE_FLOAT32_C( 6123.18), SIMDE_FLOAT32_C( 19929.36), SIMDE_FLOAT32_C( 5766.53), SIMDE_FLOAT32_C( 8021.46), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 195.37) } }, { { SIMDE_FLOAT32_C( 3.90), SIMDE_FLOAT32_C( 3.15), SIMDE_FLOAT32_C( 3.32), SIMDE_FLOAT32_C( 4.49), SIMDE_FLOAT32_C( 2.99), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 3.12) }, { SIMDE_FLOAT32_C( 48.40), SIMDE_FLOAT32_C( 22.34), SIMDE_FLOAT32_C( 26.66), SIMDE_FLOAT32_C( 88.12), SIMDE_FLOAT32_C( 18.89), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 3.81), SIMDE_FLOAT32_C( 21.65) } }, { { SIMDE_FLOAT32_C( 2.41), SIMDE_FLOAT32_C( 1.52), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( 3.20), SIMDE_FLOAT32_C( 5.48), SIMDE_FLOAT32_C( 4.88), SIMDE_FLOAT32_C( 2.22), SIMDE_FLOAT32_C( 1.67) }, { SIMDE_FLOAT32_C( 10.13), SIMDE_FLOAT32_C( 3.57), SIMDE_FLOAT32_C( 1.39), SIMDE_FLOAT32_C( 23.53), SIMDE_FLOAT32_C( 238.85), SIMDE_FLOAT32_C( 130.63), SIMDE_FLOAT32_C( 8.21), SIMDE_FLOAT32_C( 4.31) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_expm1_ps(a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_expm1_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( 6.68), SIMDE_FLOAT64_C( 7.67), SIMDE_FLOAT64_C( 2.13), SIMDE_FLOAT64_C( 3.50) }, { SIMDE_FLOAT64_C( 795.32), SIMDE_FLOAT64_C( 2142.08), SIMDE_FLOAT64_C( 7.41), SIMDE_FLOAT64_C( 32.12) } }, { { SIMDE_FLOAT64_C( 4.83), SIMDE_FLOAT64_C( 1.25), SIMDE_FLOAT64_C( 4.74), SIMDE_FLOAT64_C( 8.00) }, { SIMDE_FLOAT64_C( 124.21), SIMDE_FLOAT64_C( 2.49), SIMDE_FLOAT64_C( 113.43), SIMDE_FLOAT64_C( 2979.96) } }, { { SIMDE_FLOAT64_C( 9.68), SIMDE_FLOAT64_C( 1.62), SIMDE_FLOAT64_C( 7.69), SIMDE_FLOAT64_C( 7.36) }, { SIMDE_FLOAT64_C( 15993.50), SIMDE_FLOAT64_C( 4.05), SIMDE_FLOAT64_C( 2185.37), SIMDE_FLOAT64_C( 1570.84) } }, { { SIMDE_FLOAT64_C( 8.87), SIMDE_FLOAT64_C( 3.50), SIMDE_FLOAT64_C( 7.63), SIMDE_FLOAT64_C( 8.66) }, { SIMDE_FLOAT64_C( 7114.28), SIMDE_FLOAT64_C( 32.12), SIMDE_FLOAT64_C( 2058.05), SIMDE_FLOAT64_C( 5766.53) } }, { { SIMDE_FLOAT64_C( 5.89), SIMDE_FLOAT64_C( 2.15), SIMDE_FLOAT64_C( 8.77), SIMDE_FLOAT64_C( 4.86) }, { SIMDE_FLOAT64_C( 360.41), SIMDE_FLOAT64_C( 7.58), SIMDE_FLOAT64_C( 6437.17), SIMDE_FLOAT64_C( 128.02) } }, { { SIMDE_FLOAT64_C( 2.27), SIMDE_FLOAT64_C( 7.65), SIMDE_FLOAT64_C( 5.22), SIMDE_FLOAT64_C( 9.35) }, { SIMDE_FLOAT64_C( 8.68), SIMDE_FLOAT64_C( 2099.65), SIMDE_FLOAT64_C( 183.93), SIMDE_FLOAT64_C( 11497.82) } }, { { SIMDE_FLOAT64_C( 3.29), SIMDE_FLOAT64_C( 3.19), SIMDE_FLOAT64_C( 2.91), SIMDE_FLOAT64_C( 3.13) }, { SIMDE_FLOAT64_C( 25.84), SIMDE_FLOAT64_C( 23.29), SIMDE_FLOAT64_C( 17.36), SIMDE_FLOAT64_C( 21.87) } }, { { SIMDE_FLOAT64_C( 5.79), SIMDE_FLOAT64_C( 1.89), SIMDE_FLOAT64_C( 0.98), SIMDE_FLOAT64_C( 2.47) }, { SIMDE_FLOAT64_C( 326.01), SIMDE_FLOAT64_C( 5.62), SIMDE_FLOAT64_C( 1.66), SIMDE_FLOAT64_C( 10.82) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d r = simde_mm256_expm1_pd(a); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_expm1_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 2.06), SIMDE_FLOAT32_C( 8.37), SIMDE_FLOAT32_C( 4.10), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 5.30), SIMDE_FLOAT32_C( 6.13), SIMDE_FLOAT32_C( 1.82), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 1.44), SIMDE_FLOAT32_C( 6.24), SIMDE_FLOAT32_C( 8.36), SIMDE_FLOAT32_C( 5.93), SIMDE_FLOAT32_C( 2.32), SIMDE_FLOAT32_C( 8.82), SIMDE_FLOAT32_C( 8.89), SIMDE_FLOAT32_C( 5.58) }, { SIMDE_FLOAT32_C( 6.85), SIMDE_FLOAT32_C( 4314.64), SIMDE_FLOAT32_C( 59.34), SIMDE_FLOAT32_C( 1.36), SIMDE_FLOAT32_C( 199.34), SIMDE_FLOAT32_C( 458.44), SIMDE_FLOAT32_C( 5.17), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( 3.22), SIMDE_FLOAT32_C( 511.86), SIMDE_FLOAT32_C( 4271.69), SIMDE_FLOAT32_C( 375.15), SIMDE_FLOAT32_C( 9.18), SIMDE_FLOAT32_C( 6767.26), SIMDE_FLOAT32_C( 7258.02), SIMDE_FLOAT32_C( 264.07) } }, { { SIMDE_FLOAT32_C( 1.96), SIMDE_FLOAT32_C( 7.28), SIMDE_FLOAT32_C( 6.53), SIMDE_FLOAT32_C( 8.60), SIMDE_FLOAT32_C( 1.35), SIMDE_FLOAT32_C( 1.74), SIMDE_FLOAT32_C( 4.08), SIMDE_FLOAT32_C( 7.80), SIMDE_FLOAT32_C( 1.34), SIMDE_FLOAT32_C( 7.90), SIMDE_FLOAT32_C( 9.34), SIMDE_FLOAT32_C( 7.60), SIMDE_FLOAT32_C( 3.95), SIMDE_FLOAT32_C( 5.46), SIMDE_FLOAT32_C( 8.74), SIMDE_FLOAT32_C( 6.01) }, { SIMDE_FLOAT32_C( 6.10), SIMDE_FLOAT32_C( 1449.99), SIMDE_FLOAT32_C( 684.40), SIMDE_FLOAT32_C( 5430.66), SIMDE_FLOAT32_C( 2.86), SIMDE_FLOAT32_C( 4.70), SIMDE_FLOAT32_C( 58.15), SIMDE_FLOAT32_C( 2439.60), SIMDE_FLOAT32_C( 2.82), SIMDE_FLOAT32_C( 2696.28), SIMDE_FLOAT32_C( 11383.41), SIMDE_FLOAT32_C( 1997.20), SIMDE_FLOAT32_C( 50.94), SIMDE_FLOAT32_C( 234.10), SIMDE_FLOAT32_C( 6246.89), SIMDE_FLOAT32_C( 406.48) } }, { { SIMDE_FLOAT32_C( 3.83), SIMDE_FLOAT32_C( 2.84), SIMDE_FLOAT32_C( 6.87), SIMDE_FLOAT32_C( 9.14), SIMDE_FLOAT32_C( 8.97), SIMDE_FLOAT32_C( 8.69), SIMDE_FLOAT32_C( 9.51), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 4.93), SIMDE_FLOAT32_C( 7.87), SIMDE_FLOAT32_C( 6.35), SIMDE_FLOAT32_C( 7.25), SIMDE_FLOAT32_C( 6.69), SIMDE_FLOAT32_C( 5.24), SIMDE_FLOAT32_C( 2.83), SIMDE_FLOAT32_C( 8.65) }, { SIMDE_FLOAT32_C( 45.06), SIMDE_FLOAT32_C( 16.12), SIMDE_FLOAT32_C( 961.95), SIMDE_FLOAT32_C( 9319.77), SIMDE_FLOAT32_C( 7862.60), SIMDE_FLOAT32_C( 5942.18), SIMDE_FLOAT32_C( 13493.00), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 137.38), SIMDE_FLOAT32_C( 2616.57), SIMDE_FLOAT32_C( 571.49), SIMDE_FLOAT32_C( 1407.10), SIMDE_FLOAT32_C( 803.32), SIMDE_FLOAT32_C( 187.67), SIMDE_FLOAT32_C( 15.95), SIMDE_FLOAT32_C( 5709.14) } }, { { SIMDE_FLOAT32_C( 2.51), SIMDE_FLOAT32_C( 9.36), SIMDE_FLOAT32_C( 7.24), SIMDE_FLOAT32_C( 3.86), SIMDE_FLOAT32_C( 1.10), SIMDE_FLOAT32_C( 1.32), SIMDE_FLOAT32_C( 1.66), SIMDE_FLOAT32_C( 2.44), SIMDE_FLOAT32_C( 9.22), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 3.17), SIMDE_FLOAT32_C( 6.46), SIMDE_FLOAT32_C( 8.77), SIMDE_FLOAT32_C( 9.18), SIMDE_FLOAT32_C( 0.30) }, { SIMDE_FLOAT32_C( 11.30), SIMDE_FLOAT32_C( 11613.38), SIMDE_FLOAT32_C( 1393.09), SIMDE_FLOAT32_C( 46.47), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 2.74), SIMDE_FLOAT32_C( 4.26), SIMDE_FLOAT32_C( 10.47), SIMDE_FLOAT32_C( 10096.07), SIMDE_FLOAT32_C( 1.72), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 22.81), SIMDE_FLOAT32_C( 638.06), SIMDE_FLOAT32_C( 6437.18), SIMDE_FLOAT32_C( 9700.16), SIMDE_FLOAT32_C( 0.35) } }, { { SIMDE_FLOAT32_C( 1.62), SIMDE_FLOAT32_C( 6.06), SIMDE_FLOAT32_C( 9.43), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 4.74), SIMDE_FLOAT32_C( 8.94), SIMDE_FLOAT32_C( 1.01), SIMDE_FLOAT32_C( 9.67), SIMDE_FLOAT32_C( 6.81), SIMDE_FLOAT32_C( 7.35), SIMDE_FLOAT32_C( 6.92), SIMDE_FLOAT32_C( 3.50), SIMDE_FLOAT32_C( 2.59), SIMDE_FLOAT32_C( 9.75), SIMDE_FLOAT32_C( 2.14), SIMDE_FLOAT32_C( 5.10) }, { SIMDE_FLOAT32_C( 4.05), SIMDE_FLOAT32_C( 427.38), SIMDE_FLOAT32_C( 12455.53), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( 113.43), SIMDE_FLOAT32_C( 7630.19), SIMDE_FLOAT32_C( 1.75), SIMDE_FLOAT32_C( 15834.35), SIMDE_FLOAT32_C( 905.87), SIMDE_FLOAT32_C( 1555.20), SIMDE_FLOAT32_C( 1011.32), SIMDE_FLOAT32_C( 32.12), SIMDE_FLOAT32_C( 12.33), SIMDE_FLOAT32_C( 17153.23), SIMDE_FLOAT32_C( 7.50), SIMDE_FLOAT32_C( 163.02) } }, { { SIMDE_FLOAT32_C( 9.11), SIMDE_FLOAT32_C( 9.39), SIMDE_FLOAT32_C( 8.97), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( 2.64), SIMDE_FLOAT32_C( 9.92), SIMDE_FLOAT32_C( 1.63), SIMDE_FLOAT32_C( 2.67), SIMDE_FLOAT32_C( 3.10), SIMDE_FLOAT32_C( 8.09), SIMDE_FLOAT32_C( 1.45), SIMDE_FLOAT32_C( 2.28), SIMDE_FLOAT32_C( 8.38), SIMDE_FLOAT32_C( 3.06) }, { SIMDE_FLOAT32_C( 9044.29), SIMDE_FLOAT32_C( 11967.10), SIMDE_FLOAT32_C( 7862.60), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 1.03), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 13.01), SIMDE_FLOAT32_C( 20331.99), SIMDE_FLOAT32_C( 4.10), SIMDE_FLOAT32_C( 13.44), SIMDE_FLOAT32_C( 21.20), SIMDE_FLOAT32_C( 3260.69), SIMDE_FLOAT32_C( 3.26), SIMDE_FLOAT32_C( 8.78), SIMDE_FLOAT32_C( 4358.01), SIMDE_FLOAT32_C( 20.33) } }, { { SIMDE_FLOAT32_C( 8.34), SIMDE_FLOAT32_C( 7.81), SIMDE_FLOAT32_C( 3.65), SIMDE_FLOAT32_C( 3.08), SIMDE_FLOAT32_C( 6.75), SIMDE_FLOAT32_C( 4.66), SIMDE_FLOAT32_C( 2.75), SIMDE_FLOAT32_C( 3.56), SIMDE_FLOAT32_C( 2.01), SIMDE_FLOAT32_C( 9.67), SIMDE_FLOAT32_C( 7.06), SIMDE_FLOAT32_C( 4.60), SIMDE_FLOAT32_C( 9.42), SIMDE_FLOAT32_C( 9.20), SIMDE_FLOAT32_C( 9.71), SIMDE_FLOAT32_C( 8.53) }, { SIMDE_FLOAT32_C( 4187.09), SIMDE_FLOAT32_C( 2464.13), SIMDE_FLOAT32_C( 37.47), SIMDE_FLOAT32_C( 20.76), SIMDE_FLOAT32_C( 853.06), SIMDE_FLOAT32_C( 104.64), SIMDE_FLOAT32_C( 14.64), SIMDE_FLOAT32_C( 34.16), SIMDE_FLOAT32_C( 6.46), SIMDE_FLOAT32_C( 15834.35), SIMDE_FLOAT32_C( 1163.45), SIMDE_FLOAT32_C( 98.48), SIMDE_FLOAT32_C( 12331.58), SIMDE_FLOAT32_C( 9896.13), SIMDE_FLOAT32_C( 16480.60), SIMDE_FLOAT32_C( 5063.44) } }, { { SIMDE_FLOAT32_C( 8.59), SIMDE_FLOAT32_C( 8.67), SIMDE_FLOAT32_C( 8.74), SIMDE_FLOAT32_C( 9.29), SIMDE_FLOAT32_C( 9.30), SIMDE_FLOAT32_C( 1.38), SIMDE_FLOAT32_C( 9.22), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 4.05), SIMDE_FLOAT32_C( 2.31), SIMDE_FLOAT32_C( 9.01), SIMDE_FLOAT32_C( 5.49), SIMDE_FLOAT32_C( 4.59), SIMDE_FLOAT32_C( 7.39), SIMDE_FLOAT32_C( 8.56), SIMDE_FLOAT32_C( 2.93) }, { SIMDE_FLOAT32_C( 5376.61), SIMDE_FLOAT32_C( 5824.50), SIMDE_FLOAT32_C( 6246.89), SIMDE_FLOAT32_C( 10828.18), SIMDE_FLOAT32_C( 10937.02), SIMDE_FLOAT32_C( 2.97), SIMDE_FLOAT32_C( 10096.07), SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( 56.40), SIMDE_FLOAT32_C( 9.07), SIMDE_FLOAT32_C( 8183.52), SIMDE_FLOAT32_C( 241.26), SIMDE_FLOAT32_C( 97.49), SIMDE_FLOAT32_C( 1618.71), SIMDE_FLOAT32_C( 5217.68), SIMDE_FLOAT32_C( 17.73) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_expm1_ps(a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_expm1_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[16]; const simde__mmask8 k; const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 9.71), SIMDE_FLOAT32_C( 3.42), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 8.23), SIMDE_FLOAT32_C( 6.49), SIMDE_FLOAT32_C( 5.78), SIMDE_FLOAT32_C( 3.33), SIMDE_FLOAT32_C( 3.67), SIMDE_FLOAT32_C( 6.70), SIMDE_FLOAT32_C( 7.95), SIMDE_FLOAT32_C( 1.18), SIMDE_FLOAT32_C( 7.57), SIMDE_FLOAT32_C( 6.39), SIMDE_FLOAT32_C( 7.96) }, UINT8_C(170), { SIMDE_FLOAT32_C( 1.99), SIMDE_FLOAT32_C( 4.36), SIMDE_FLOAT32_C( 1.34), SIMDE_FLOAT32_C( 5.69), SIMDE_FLOAT32_C( 4.94), SIMDE_FLOAT32_C( 9.39), SIMDE_FLOAT32_C( 3.52), SIMDE_FLOAT32_C( 4.73), SIMDE_FLOAT32_C( 2.21), SIMDE_FLOAT32_C( 5.91), SIMDE_FLOAT32_C( 7.57), SIMDE_FLOAT32_C( 1.54), SIMDE_FLOAT32_C( 8.30), SIMDE_FLOAT32_C( 3.13), SIMDE_FLOAT32_C( 1.25), SIMDE_FLOAT32_C( 1.72) }, { SIMDE_FLOAT32_C( 9.71), SIMDE_FLOAT32_C( 77.26), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 294.89), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 11967.10), SIMDE_FLOAT32_C( 6.49), SIMDE_FLOAT32_C( 112.30), SIMDE_FLOAT32_C( 3.33), SIMDE_FLOAT32_C( 3.67), SIMDE_FLOAT32_C( 6.70), SIMDE_FLOAT32_C( 7.95), SIMDE_FLOAT32_C( 1.18), SIMDE_FLOAT32_C( 7.57), SIMDE_FLOAT32_C( 6.39), SIMDE_FLOAT32_C( 7.96) } }, { { SIMDE_FLOAT32_C( 3.18), SIMDE_FLOAT32_C( 2.06), SIMDE_FLOAT32_C( 1.94), SIMDE_FLOAT32_C( 1.40), SIMDE_FLOAT32_C( 8.55), SIMDE_FLOAT32_C( 7.72), SIMDE_FLOAT32_C( 4.74), SIMDE_FLOAT32_C( 2.22), SIMDE_FLOAT32_C( 4.42), SIMDE_FLOAT32_C( 2.69), SIMDE_FLOAT32_C( 3.40), SIMDE_FLOAT32_C( 1.99), SIMDE_FLOAT32_C( 9.08), SIMDE_FLOAT32_C( 1.36), SIMDE_FLOAT32_C( 1.11), SIMDE_FLOAT32_C( 1.07) }, UINT8_C( 91), { SIMDE_FLOAT32_C( 2.44), SIMDE_FLOAT32_C( 6.76), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 1.84), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 5.39), SIMDE_FLOAT32_C( 4.05), SIMDE_FLOAT32_C( 6.19), SIMDE_FLOAT32_C( 2.97), SIMDE_FLOAT32_C( 5.59), SIMDE_FLOAT32_C( 4.49), SIMDE_FLOAT32_C( 6.09), SIMDE_FLOAT32_C( 6.84), SIMDE_FLOAT32_C( 6.20), SIMDE_FLOAT32_C( 9.27), SIMDE_FLOAT32_C( 8.90) }, { SIMDE_FLOAT32_C( 10.47), SIMDE_FLOAT32_C( 861.64), SIMDE_FLOAT32_C( 1.94), SIMDE_FLOAT32_C( 5.30), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( 7.72), SIMDE_FLOAT32_C( 56.40), SIMDE_FLOAT32_C( 2.22), SIMDE_FLOAT32_C( 4.42), SIMDE_FLOAT32_C( 2.69), SIMDE_FLOAT32_C( 3.40), SIMDE_FLOAT32_C( 1.99), SIMDE_FLOAT32_C( 9.08), SIMDE_FLOAT32_C( 1.36), SIMDE_FLOAT32_C( 1.11), SIMDE_FLOAT32_C( 1.07) } }, { { SIMDE_FLOAT32_C( 8.15), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 7.45), SIMDE_FLOAT32_C( 5.87), SIMDE_FLOAT32_C( 5.41), SIMDE_FLOAT32_C( 9.67), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 8.10), SIMDE_FLOAT32_C( 3.07), SIMDE_FLOAT32_C( 2.28), SIMDE_FLOAT32_C( 7.18), SIMDE_FLOAT32_C( 4.44), SIMDE_FLOAT32_C( 3.39), SIMDE_FLOAT32_C( 8.25), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 5.83) }, UINT8_C( 10), { SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 7.67), SIMDE_FLOAT32_C( 5.29), SIMDE_FLOAT32_C( 6.22), SIMDE_FLOAT32_C( 1.72), SIMDE_FLOAT32_C( 1.47), SIMDE_FLOAT32_C( 9.19), SIMDE_FLOAT32_C( 7.31), SIMDE_FLOAT32_C( 5.96), SIMDE_FLOAT32_C( 5.28), SIMDE_FLOAT32_C( 4.15), SIMDE_FLOAT32_C( 2.16), SIMDE_FLOAT32_C( 4.55), SIMDE_FLOAT32_C( 3.05), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( 5.23) }, { SIMDE_FLOAT32_C( 8.15), SIMDE_FLOAT32_C( 2142.08), SIMDE_FLOAT32_C( 7.45), SIMDE_FLOAT32_C( 501.70), SIMDE_FLOAT32_C( 5.41), SIMDE_FLOAT32_C( 9.67), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 8.10), SIMDE_FLOAT32_C( 3.07), SIMDE_FLOAT32_C( 2.28), SIMDE_FLOAT32_C( 7.18), SIMDE_FLOAT32_C( 4.44), SIMDE_FLOAT32_C( 3.39), SIMDE_FLOAT32_C( 8.25), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 5.83) } }, { { SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 6.17), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 6.46), SIMDE_FLOAT32_C( 8.74), SIMDE_FLOAT32_C( 3.24), SIMDE_FLOAT32_C( 8.75), SIMDE_FLOAT32_C( 5.92), SIMDE_FLOAT32_C( 7.68), SIMDE_FLOAT32_C( 2.13), SIMDE_FLOAT32_C( 4.17), SIMDE_FLOAT32_C( 7.84), SIMDE_FLOAT32_C( 7.97), SIMDE_FLOAT32_C( 9.18), SIMDE_FLOAT32_C( 8.67) }, UINT8_C(236), { SIMDE_FLOAT32_C( 4.47), SIMDE_FLOAT32_C( 4.89), SIMDE_FLOAT32_C( 7.36), SIMDE_FLOAT32_C( 5.94), SIMDE_FLOAT32_C( 4.08), SIMDE_FLOAT32_C( 4.67), SIMDE_FLOAT32_C( 1.90), SIMDE_FLOAT32_C( 9.36), SIMDE_FLOAT32_C( 8.82), SIMDE_FLOAT32_C( 4.06), SIMDE_FLOAT32_C( 3.91), SIMDE_FLOAT32_C( 1.86), SIMDE_FLOAT32_C( 4.37), SIMDE_FLOAT32_C( 9.13), SIMDE_FLOAT32_C( 2.36), SIMDE_FLOAT32_C( 0.55) }, { SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 6.17), SIMDE_FLOAT32_C( 1570.84), SIMDE_FLOAT32_C( 378.93), SIMDE_FLOAT32_C( 6.46), SIMDE_FLOAT32_C( 105.70), SIMDE_FLOAT32_C( 5.69), SIMDE_FLOAT32_C( 11613.38), SIMDE_FLOAT32_C( 5.92), SIMDE_FLOAT32_C( 7.68), SIMDE_FLOAT32_C( 2.13), SIMDE_FLOAT32_C( 4.17), SIMDE_FLOAT32_C( 7.84), SIMDE_FLOAT32_C( 7.97), SIMDE_FLOAT32_C( 9.18), SIMDE_FLOAT32_C( 8.67) } }, { { SIMDE_FLOAT32_C( 9.77), SIMDE_FLOAT32_C( 2.53), SIMDE_FLOAT32_C( 7.01), SIMDE_FLOAT32_C( 8.51), SIMDE_FLOAT32_C( 5.77), SIMDE_FLOAT32_C( 5.76), SIMDE_FLOAT32_C( 4.43), SIMDE_FLOAT32_C( 3.45), SIMDE_FLOAT32_C( 7.89), SIMDE_FLOAT32_C( 8.61), SIMDE_FLOAT32_C( 1.29), SIMDE_FLOAT32_C( 5.86), SIMDE_FLOAT32_C( 7.79), SIMDE_FLOAT32_C( 9.96), SIMDE_FLOAT32_C( 1.50), SIMDE_FLOAT32_C( 2.25) }, UINT8_C( 16), { SIMDE_FLOAT32_C( 8.86), SIMDE_FLOAT32_C( 8.20), SIMDE_FLOAT32_C( 8.92), SIMDE_FLOAT32_C( 3.53), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 8.28), SIMDE_FLOAT32_C( 2.35), SIMDE_FLOAT32_C( 4.16), SIMDE_FLOAT32_C( 2.18), SIMDE_FLOAT32_C( 4.21), SIMDE_FLOAT32_C( 8.53), SIMDE_FLOAT32_C( 1.32), SIMDE_FLOAT32_C( 6.57), SIMDE_FLOAT32_C( 9.08), SIMDE_FLOAT32_C( 1.09), SIMDE_FLOAT32_C( 9.10) }, { SIMDE_FLOAT32_C( 9.77), SIMDE_FLOAT32_C( 2.53), SIMDE_FLOAT32_C( 7.01), SIMDE_FLOAT32_C( 8.51), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 5.76), SIMDE_FLOAT32_C( 4.43), SIMDE_FLOAT32_C( 3.45), SIMDE_FLOAT32_C( 7.89), SIMDE_FLOAT32_C( 8.61), SIMDE_FLOAT32_C( 1.29), SIMDE_FLOAT32_C( 5.86), SIMDE_FLOAT32_C( 7.79), SIMDE_FLOAT32_C( 9.96), SIMDE_FLOAT32_C( 1.50), SIMDE_FLOAT32_C( 2.25) } }, { { SIMDE_FLOAT32_C( 6.09), SIMDE_FLOAT32_C( 9.60), SIMDE_FLOAT32_C( 4.88), SIMDE_FLOAT32_C( 1.85), SIMDE_FLOAT32_C( 4.03), SIMDE_FLOAT32_C( 8.33), SIMDE_FLOAT32_C( 9.74), SIMDE_FLOAT32_C( 2.64), SIMDE_FLOAT32_C( 9.62), SIMDE_FLOAT32_C( 5.60), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( 9.57), SIMDE_FLOAT32_C( 7.10), SIMDE_FLOAT32_C( 2.68), SIMDE_FLOAT32_C( 4.42), SIMDE_FLOAT32_C( 5.97) }, UINT8_C( 45), { SIMDE_FLOAT32_C( 3.34), SIMDE_FLOAT32_C( 9.50), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 1.61), SIMDE_FLOAT32_C( 1.85), SIMDE_FLOAT32_C( 5.13), SIMDE_FLOAT32_C( 3.80), SIMDE_FLOAT32_C( 6.06), SIMDE_FLOAT32_C( 3.66), SIMDE_FLOAT32_C( 5.11), SIMDE_FLOAT32_C( 2.63), SIMDE_FLOAT32_C( 2.74), SIMDE_FLOAT32_C( 6.20), SIMDE_FLOAT32_C( 1.73), SIMDE_FLOAT32_C( 8.83), SIMDE_FLOAT32_C( 5.80) }, { SIMDE_FLOAT32_C( 27.22), SIMDE_FLOAT32_C( 9.60), SIMDE_FLOAT32_C( 1.64), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 4.03), SIMDE_FLOAT32_C( 168.02), SIMDE_FLOAT32_C( 9.74), SIMDE_FLOAT32_C( 2.64), SIMDE_FLOAT32_C( 9.62), SIMDE_FLOAT32_C( 5.60), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( 9.57), SIMDE_FLOAT32_C( 7.10), SIMDE_FLOAT32_C( 2.68), SIMDE_FLOAT32_C( 4.42), SIMDE_FLOAT32_C( 5.97) } }, { { SIMDE_FLOAT32_C( 6.61), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 9.83), SIMDE_FLOAT32_C( 4.94), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 2.47), SIMDE_FLOAT32_C( 4.55), SIMDE_FLOAT32_C( 6.02), SIMDE_FLOAT32_C( 2.90), SIMDE_FLOAT32_C( 4.12), SIMDE_FLOAT32_C( 3.12), SIMDE_FLOAT32_C( 5.58), SIMDE_FLOAT32_C( 8.54), SIMDE_FLOAT32_C( 9.08), SIMDE_FLOAT32_C( 6.46), SIMDE_FLOAT32_C( 1.88) }, UINT8_C(126), { SIMDE_FLOAT32_C( 7.43), SIMDE_FLOAT32_C( 3.49), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( 2.56), SIMDE_FLOAT32_C( 7.28), SIMDE_FLOAT32_C( 6.48), SIMDE_FLOAT32_C( 6.22), SIMDE_FLOAT32_C( 2.40), SIMDE_FLOAT32_C( 9.11), SIMDE_FLOAT32_C( 8.96), SIMDE_FLOAT32_C( 8.60), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 7.79), SIMDE_FLOAT32_C( 4.40), SIMDE_FLOAT32_C( 7.45), SIMDE_FLOAT32_C( 8.47) }, { SIMDE_FLOAT32_C( 6.61), SIMDE_FLOAT32_C( 31.79), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 11.94), SIMDE_FLOAT32_C( 1449.99), SIMDE_FLOAT32_C( 650.97), SIMDE_FLOAT32_C( 501.70), SIMDE_FLOAT32_C( 6.02), SIMDE_FLOAT32_C( 2.90), SIMDE_FLOAT32_C( 4.12), SIMDE_FLOAT32_C( 3.12), SIMDE_FLOAT32_C( 5.58), SIMDE_FLOAT32_C( 8.54), SIMDE_FLOAT32_C( 9.08), SIMDE_FLOAT32_C( 6.46), SIMDE_FLOAT32_C( 1.88) } }, { { SIMDE_FLOAT32_C( 4.23), SIMDE_FLOAT32_C( 2.39), SIMDE_FLOAT32_C( 8.88), SIMDE_FLOAT32_C( 6.71), SIMDE_FLOAT32_C( 6.94), SIMDE_FLOAT32_C( 4.90), SIMDE_FLOAT32_C( 9.61), SIMDE_FLOAT32_C( 1.06), SIMDE_FLOAT32_C( 8.02), SIMDE_FLOAT32_C( 5.19), SIMDE_FLOAT32_C( 9.60), SIMDE_FLOAT32_C( 7.10), SIMDE_FLOAT32_C( 1.65), SIMDE_FLOAT32_C( 1.48), SIMDE_FLOAT32_C( 5.68), SIMDE_FLOAT32_C( 9.08) }, UINT8_C( 53), { SIMDE_FLOAT32_C( 6.11), SIMDE_FLOAT32_C( 1.64), SIMDE_FLOAT32_C( 2.25), SIMDE_FLOAT32_C( 2.59), SIMDE_FLOAT32_C( 7.86), SIMDE_FLOAT32_C( 4.65), SIMDE_FLOAT32_C( 1.70), SIMDE_FLOAT32_C( 6.82), SIMDE_FLOAT32_C( 3.25), SIMDE_FLOAT32_C( 2.55), SIMDE_FLOAT32_C( 4.61), SIMDE_FLOAT32_C( 7.65), SIMDE_FLOAT32_C( 10.00), SIMDE_FLOAT32_C( 3.07), SIMDE_FLOAT32_C( 1.88), SIMDE_FLOAT32_C( 2.39) }, { SIMDE_FLOAT32_C( 449.34), SIMDE_FLOAT32_C( 2.39), SIMDE_FLOAT32_C( 8.49), SIMDE_FLOAT32_C( 6.71), SIMDE_FLOAT32_C( 2590.52), SIMDE_FLOAT32_C( 103.58), SIMDE_FLOAT32_C( 9.61), SIMDE_FLOAT32_C( 1.06), SIMDE_FLOAT32_C( 8.02), SIMDE_FLOAT32_C( 5.19), SIMDE_FLOAT32_C( 9.60), SIMDE_FLOAT32_C( 7.10), SIMDE_FLOAT32_C( 1.65), SIMDE_FLOAT32_C( 1.48), SIMDE_FLOAT32_C( 5.68), SIMDE_FLOAT32_C( 9.08) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_mask_expm1_ps(src, test_vec[i].k, a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_expm1_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( 1.40), SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( 7.27), SIMDE_FLOAT64_C( 9.13), SIMDE_FLOAT64_C( 1.31), SIMDE_FLOAT64_C( 2.56), SIMDE_FLOAT64_C( 1.21) }, { SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( 3.06), SIMDE_FLOAT64_C( 0.49), SIMDE_FLOAT64_C( 1435.55), SIMDE_FLOAT64_C( 9227.02), SIMDE_FLOAT64_C( 2.71), SIMDE_FLOAT64_C( 11.94), SIMDE_FLOAT64_C( 2.35) } }, { { SIMDE_FLOAT64_C( 6.72), SIMDE_FLOAT64_C( 0.51), SIMDE_FLOAT64_C( 3.99), SIMDE_FLOAT64_C( 2.10), SIMDE_FLOAT64_C( 2.80), SIMDE_FLOAT64_C( 5.43), SIMDE_FLOAT64_C( 3.71), SIMDE_FLOAT64_C( 6.65) }, { SIMDE_FLOAT64_C( 827.82), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 53.05), SIMDE_FLOAT64_C( 7.17), SIMDE_FLOAT64_C( 15.44), SIMDE_FLOAT64_C( 227.15), SIMDE_FLOAT64_C( 39.85), SIMDE_FLOAT64_C( 771.78) } }, { { SIMDE_FLOAT64_C( 3.81), SIMDE_FLOAT64_C( 4.42), SIMDE_FLOAT64_C( 8.46), SIMDE_FLOAT64_C( 3.88), SIMDE_FLOAT64_C( 7.48), SIMDE_FLOAT64_C( 9.11), SIMDE_FLOAT64_C( 0.59), SIMDE_FLOAT64_C( 5.94) }, { SIMDE_FLOAT64_C( 44.15), SIMDE_FLOAT64_C( 82.10), SIMDE_FLOAT64_C( 4721.06), SIMDE_FLOAT64_C( 47.42), SIMDE_FLOAT64_C( 1771.24), SIMDE_FLOAT64_C( 9044.29), SIMDE_FLOAT64_C( 0.80), SIMDE_FLOAT64_C( 378.93) } }, { { SIMDE_FLOAT64_C( 7.31), SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( 9.76), SIMDE_FLOAT64_C( 8.87), SIMDE_FLOAT64_C( 7.78), SIMDE_FLOAT64_C( 3.26), SIMDE_FLOAT64_C( 6.27), SIMDE_FLOAT64_C( 8.12) }, { SIMDE_FLOAT64_C( 1494.18), SIMDE_FLOAT64_C( 0.75), SIMDE_FLOAT64_C( 17325.63), SIMDE_FLOAT64_C( 7114.28), SIMDE_FLOAT64_C( 2391.27), SIMDE_FLOAT64_C( 25.05), SIMDE_FLOAT64_C( 527.48), SIMDE_FLOAT64_C( 3360.02) } }, { { SIMDE_FLOAT64_C( 4.67), SIMDE_FLOAT64_C( 6.67), SIMDE_FLOAT64_C( 5.39), SIMDE_FLOAT64_C( 3.79), SIMDE_FLOAT64_C( 7.97), SIMDE_FLOAT64_C( 7.95), SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 4.69) }, { SIMDE_FLOAT64_C( 105.70), SIMDE_FLOAT64_C( 787.40), SIMDE_FLOAT64_C( 218.20), SIMDE_FLOAT64_C( 43.26), SIMDE_FLOAT64_C( 2891.86), SIMDE_FLOAT64_C( 2834.57), SIMDE_FLOAT64_C( 147.41), SIMDE_FLOAT64_C( 107.85) } }, { { SIMDE_FLOAT64_C( 8.47), SIMDE_FLOAT64_C( 9.00), SIMDE_FLOAT64_C( 6.79), SIMDE_FLOAT64_C( 1.27), SIMDE_FLOAT64_C( 4.42), SIMDE_FLOAT64_C( 0.49), SIMDE_FLOAT64_C( 7.92), SIMDE_FLOAT64_C( 8.23) }, { SIMDE_FLOAT64_C( 4768.52), SIMDE_FLOAT64_C( 8102.08), SIMDE_FLOAT64_C( 887.91), SIMDE_FLOAT64_C( 2.56), SIMDE_FLOAT64_C( 82.10), SIMDE_FLOAT64_C( 0.63), SIMDE_FLOAT64_C( 2750.77), SIMDE_FLOAT64_C( 3750.83) } }, { { SIMDE_FLOAT64_C( 4.92), SIMDE_FLOAT64_C( 6.38), SIMDE_FLOAT64_C( 2.12), SIMDE_FLOAT64_C( 2.40), SIMDE_FLOAT64_C( 5.49), SIMDE_FLOAT64_C( 2.70), SIMDE_FLOAT64_C( 8.35), SIMDE_FLOAT64_C( 2.80) }, { SIMDE_FLOAT64_C( 136.00), SIMDE_FLOAT64_C( 588.93), SIMDE_FLOAT64_C( 7.33), SIMDE_FLOAT64_C( 10.02), SIMDE_FLOAT64_C( 241.26), SIMDE_FLOAT64_C( 13.88), SIMDE_FLOAT64_C( 4229.18), SIMDE_FLOAT64_C( 15.44) } }, { { SIMDE_FLOAT64_C( 3.27), SIMDE_FLOAT64_C( 8.10), SIMDE_FLOAT64_C( 1.67), SIMDE_FLOAT64_C( 1.04), SIMDE_FLOAT64_C( 1.36), SIMDE_FLOAT64_C( 7.94), SIMDE_FLOAT64_C( 9.16), SIMDE_FLOAT64_C( 6.03) }, { SIMDE_FLOAT64_C( 25.31), SIMDE_FLOAT64_C( 3293.47), SIMDE_FLOAT64_C( 4.31), SIMDE_FLOAT64_C( 1.83), SIMDE_FLOAT64_C( 2.90), SIMDE_FLOAT64_C( 2806.36), SIMDE_FLOAT64_C( 9508.06), SIMDE_FLOAT64_C( 414.72) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_expm1_pd(a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_expm1_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 4.06), SIMDE_FLOAT64_C( 9.24), SIMDE_FLOAT64_C( 8.55), SIMDE_FLOAT64_C( 9.59), SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( 2.26), SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( 4.06) }, UINT8_C(110), { SIMDE_FLOAT64_C( 5.74), SIMDE_FLOAT64_C( 3.51), SIMDE_FLOAT64_C( 5.07), SIMDE_FLOAT64_C( 6.58), SIMDE_FLOAT64_C( 8.73), SIMDE_FLOAT64_C( 4.57), SIMDE_FLOAT64_C( 0.46), SIMDE_FLOAT64_C( 7.96) }, { SIMDE_FLOAT64_C( 4.06), SIMDE_FLOAT64_C( 32.45), SIMDE_FLOAT64_C( 158.17), SIMDE_FLOAT64_C( 719.54), SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( 95.54), SIMDE_FLOAT64_C( 0.58), SIMDE_FLOAT64_C( 4.06) } }, { { SIMDE_FLOAT64_C( 1.32), SIMDE_FLOAT64_C( 1.91), SIMDE_FLOAT64_C( 7.33), SIMDE_FLOAT64_C( 4.66), SIMDE_FLOAT64_C( 3.27), SIMDE_FLOAT64_C( 4.31), SIMDE_FLOAT64_C( 0.71), SIMDE_FLOAT64_C( 9.20) }, UINT8_C(124), { SIMDE_FLOAT64_C( 5.28), SIMDE_FLOAT64_C( 1.80), SIMDE_FLOAT64_C( 4.85), SIMDE_FLOAT64_C( 0.49), SIMDE_FLOAT64_C( 1.99), SIMDE_FLOAT64_C( 8.91), SIMDE_FLOAT64_C( 9.72), SIMDE_FLOAT64_C( 0.53) }, { SIMDE_FLOAT64_C( 1.32), SIMDE_FLOAT64_C( 1.91), SIMDE_FLOAT64_C( 126.74), SIMDE_FLOAT64_C( 0.63), SIMDE_FLOAT64_C( 6.32), SIMDE_FLOAT64_C( 7404.66), SIMDE_FLOAT64_C( 16646.24), SIMDE_FLOAT64_C( 9.20) } }, { { SIMDE_FLOAT64_C( 8.50), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( 2.80), SIMDE_FLOAT64_C( 9.06), SIMDE_FLOAT64_C( 4.48), SIMDE_FLOAT64_C( 0.59), SIMDE_FLOAT64_C( 4.80), SIMDE_FLOAT64_C( 7.99) }, UINT8_C( 51), { SIMDE_FLOAT64_C( 1.38), SIMDE_FLOAT64_C( 6.72), SIMDE_FLOAT64_C( 0.22), SIMDE_FLOAT64_C( 1.85), SIMDE_FLOAT64_C( 4.68), SIMDE_FLOAT64_C( 1.54), SIMDE_FLOAT64_C( 3.76), SIMDE_FLOAT64_C( 2.01) }, { SIMDE_FLOAT64_C( 2.97), SIMDE_FLOAT64_C( 827.82), SIMDE_FLOAT64_C( 2.80), SIMDE_FLOAT64_C( 9.06), SIMDE_FLOAT64_C( 106.77), SIMDE_FLOAT64_C( 3.66), SIMDE_FLOAT64_C( 4.80), SIMDE_FLOAT64_C( 7.99) } }, { { SIMDE_FLOAT64_C( 6.20), SIMDE_FLOAT64_C( 7.03), SIMDE_FLOAT64_C( 6.32), SIMDE_FLOAT64_C( 6.91), SIMDE_FLOAT64_C( 6.23), SIMDE_FLOAT64_C( 3.88), SIMDE_FLOAT64_C( 2.18), SIMDE_FLOAT64_C( 8.02) }, UINT8_C(179), { SIMDE_FLOAT64_C( 2.67), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( 7.63), SIMDE_FLOAT64_C( 2.40), SIMDE_FLOAT64_C( 0.54), SIMDE_FLOAT64_C( 6.13), SIMDE_FLOAT64_C( 2.81), SIMDE_FLOAT64_C( 3.34) }, { SIMDE_FLOAT64_C( 13.44), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( 6.32), SIMDE_FLOAT64_C( 6.91), SIMDE_FLOAT64_C( 0.72), SIMDE_FLOAT64_C( 458.44), SIMDE_FLOAT64_C( 2.18), SIMDE_FLOAT64_C( 27.22) } }, { { SIMDE_FLOAT64_C( 5.19), SIMDE_FLOAT64_C( 7.29), SIMDE_FLOAT64_C( 3.93), SIMDE_FLOAT64_C( 10.00), SIMDE_FLOAT64_C( 5.28), SIMDE_FLOAT64_C( 9.58), SIMDE_FLOAT64_C( 1.38), SIMDE_FLOAT64_C( 2.00) }, UINT8_C(216), { SIMDE_FLOAT64_C( 3.23), SIMDE_FLOAT64_C( 6.68), SIMDE_FLOAT64_C( 1.35), SIMDE_FLOAT64_C( 6.99), SIMDE_FLOAT64_C( 8.69), SIMDE_FLOAT64_C( 7.55), SIMDE_FLOAT64_C( 4.02), SIMDE_FLOAT64_C( 5.01) }, { SIMDE_FLOAT64_C( 5.19), SIMDE_FLOAT64_C( 7.29), SIMDE_FLOAT64_C( 3.93), SIMDE_FLOAT64_C( 1084.72), SIMDE_FLOAT64_C( 5942.18), SIMDE_FLOAT64_C( 9.58), SIMDE_FLOAT64_C( 54.70), SIMDE_FLOAT64_C( 148.90) } }, { { SIMDE_FLOAT64_C( 4.45), SIMDE_FLOAT64_C( 0.25), SIMDE_FLOAT64_C( 8.89), SIMDE_FLOAT64_C( 6.64), SIMDE_FLOAT64_C( 8.27), SIMDE_FLOAT64_C( 7.61), SIMDE_FLOAT64_C( 9.31), SIMDE_FLOAT64_C( 8.28) }, UINT8_C(185), { SIMDE_FLOAT64_C( 1.71), SIMDE_FLOAT64_C( 8.83), SIMDE_FLOAT64_C( 1.38), SIMDE_FLOAT64_C( 4.52), SIMDE_FLOAT64_C( 2.17), SIMDE_FLOAT64_C( 6.57), SIMDE_FLOAT64_C( 1.81), SIMDE_FLOAT64_C( 6.09) }, { SIMDE_FLOAT64_C( 4.53), SIMDE_FLOAT64_C( 0.25), SIMDE_FLOAT64_C( 8.89), SIMDE_FLOAT64_C( 90.84), SIMDE_FLOAT64_C( 7.76), SIMDE_FLOAT64_C( 712.37), SIMDE_FLOAT64_C( 9.31), SIMDE_FLOAT64_C( 440.42) } }, { { SIMDE_FLOAT64_C( 6.57), SIMDE_FLOAT64_C( 7.09), SIMDE_FLOAT64_C( 5.68), SIMDE_FLOAT64_C( 7.95), SIMDE_FLOAT64_C( 9.09), SIMDE_FLOAT64_C( 5.48), SIMDE_FLOAT64_C( 1.18), SIMDE_FLOAT64_C( 5.77) }, UINT8_C(171), { SIMDE_FLOAT64_C( 8.17), SIMDE_FLOAT64_C( 4.46), SIMDE_FLOAT64_C( 4.37), SIMDE_FLOAT64_C( 2.19), SIMDE_FLOAT64_C( 9.47), SIMDE_FLOAT64_C( 8.83), SIMDE_FLOAT64_C( 2.44), SIMDE_FLOAT64_C( 8.36) }, { SIMDE_FLOAT64_C( 3532.34), SIMDE_FLOAT64_C( 85.49), SIMDE_FLOAT64_C( 5.68), SIMDE_FLOAT64_C( 7.94), SIMDE_FLOAT64_C( 9.09), SIMDE_FLOAT64_C( 6835.29), SIMDE_FLOAT64_C( 1.18), SIMDE_FLOAT64_C( 4271.69) } }, { { SIMDE_FLOAT64_C( 5.47), SIMDE_FLOAT64_C( 0.71), SIMDE_FLOAT64_C( 5.97), SIMDE_FLOAT64_C( 4.78), SIMDE_FLOAT64_C( 9.00), SIMDE_FLOAT64_C( 1.22), SIMDE_FLOAT64_C( 6.48), SIMDE_FLOAT64_C( 7.82) }, UINT8_C(171), { SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 9.99), SIMDE_FLOAT64_C( 9.17), SIMDE_FLOAT64_C( 2.81), SIMDE_FLOAT64_C( 6.09), SIMDE_FLOAT64_C( 5.74), SIMDE_FLOAT64_C( 9.89), SIMDE_FLOAT64_C( 1.76) }, { SIMDE_FLOAT64_C( 1.72), SIMDE_FLOAT64_C( 21806.30), SIMDE_FLOAT64_C( 5.97), SIMDE_FLOAT64_C( 15.61), SIMDE_FLOAT64_C( 9.00), SIMDE_FLOAT64_C( 310.06), SIMDE_FLOAT64_C( 6.48), SIMDE_FLOAT64_C( 4.81) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_mask_expm1_pd(src, test_vec[i].k, a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_exp2_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -2.08), SIMDE_FLOAT32_C( 1.71), SIMDE_FLOAT32_C( 2.58), SIMDE_FLOAT32_C( -1.10) }, { SIMDE_FLOAT32_C( 0.24), SIMDE_FLOAT32_C( 3.27), SIMDE_FLOAT32_C( 5.98), SIMDE_FLOAT32_C( 0.47) } }, { { SIMDE_FLOAT32_C( 1.65), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( -2.92), SIMDE_FLOAT32_C( -3.15) }, { SIMDE_FLOAT32_C( 3.14), SIMDE_FLOAT32_C( 1.21), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.11) } }, { { SIMDE_FLOAT32_C( 2.04), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( -3.95), SIMDE_FLOAT32_C( -1.01) }, { SIMDE_FLOAT32_C( 4.11), SIMDE_FLOAT32_C( 1.75), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.50) } }, { { SIMDE_FLOAT32_C( -2.84), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( -3.08), SIMDE_FLOAT32_C( 0.96) }, { SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 1.95) } }, { { SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( -1.38), SIMDE_FLOAT32_C( -3.16), SIMDE_FLOAT32_C( 0.33) }, { SIMDE_FLOAT32_C( 1.09), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 1.26) } }, { { SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 1.17), SIMDE_FLOAT32_C( -3.70), SIMDE_FLOAT32_C( -0.75) }, { SIMDE_FLOAT32_C( 1.21), SIMDE_FLOAT32_C( 2.25), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.59) } }, { { SIMDE_FLOAT32_C( -1.25), SIMDE_FLOAT32_C( -2.03), SIMDE_FLOAT32_C( -1.41), SIMDE_FLOAT32_C( -1.44) }, { SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 0.24), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.37) } }, { { SIMDE_FLOAT32_C( -2.57), SIMDE_FLOAT32_C( -1.64), SIMDE_FLOAT32_C( 1.24), SIMDE_FLOAT32_C( -0.66) }, { SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( 2.36), SIMDE_FLOAT32_C( 0.63) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_exp2_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_exp2_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -1.05), SIMDE_FLOAT64_C( -3.96) }, { SIMDE_FLOAT64_C( 0.48), SIMDE_FLOAT64_C( 0.06) } }, { { SIMDE_FLOAT64_C( -3.17), SIMDE_FLOAT64_C( -0.18) }, { SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( 0.88) } }, { { SIMDE_FLOAT64_C( 2.75), SIMDE_FLOAT64_C( -3.78) }, { SIMDE_FLOAT64_C( 6.73), SIMDE_FLOAT64_C( 0.07) } }, { { SIMDE_FLOAT64_C( -3.43), SIMDE_FLOAT64_C( 0.85) }, { SIMDE_FLOAT64_C( 0.09), SIMDE_FLOAT64_C( 1.80) } }, { { SIMDE_FLOAT64_C( 0.37), SIMDE_FLOAT64_C( 1.23) }, { SIMDE_FLOAT64_C( 1.29), SIMDE_FLOAT64_C( 2.35) } }, { { SIMDE_FLOAT64_C( 1.92), SIMDE_FLOAT64_C( -0.38) }, { SIMDE_FLOAT64_C( 3.78), SIMDE_FLOAT64_C( 0.77) } }, { { SIMDE_FLOAT64_C( 3.87), SIMDE_FLOAT64_C( 2.98) }, { SIMDE_FLOAT64_C( 14.62), SIMDE_FLOAT64_C( 7.89) } }, { { SIMDE_FLOAT64_C( -1.16), SIMDE_FLOAT64_C( 1.76) }, { SIMDE_FLOAT64_C( 0.45), SIMDE_FLOAT64_C( 3.39) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d r = simde_mm_exp2_pd(a); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_exp2_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 2.88), SIMDE_FLOAT32_C( 3.98), SIMDE_FLOAT32_C( 2.77), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( -1.90), SIMDE_FLOAT32_C( -1.78), SIMDE_FLOAT32_C( -1.91), SIMDE_FLOAT32_C( -1.34) }, { SIMDE_FLOAT32_C( 7.36), SIMDE_FLOAT32_C( 15.78), SIMDE_FLOAT32_C( 6.82), SIMDE_FLOAT32_C( 1.56), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( 0.40) } }, { { SIMDE_FLOAT32_C( -2.07), SIMDE_FLOAT32_C( -3.29), SIMDE_FLOAT32_C( -3.96), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( -3.42), SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( 0.63) }, { SIMDE_FLOAT32_C( 0.24), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 1.10), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 1.37), SIMDE_FLOAT32_C( 1.55) } }, { { SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( -2.58), SIMDE_FLOAT32_C( 1.40), SIMDE_FLOAT32_C( -1.19), SIMDE_FLOAT32_C( 2.84), SIMDE_FLOAT32_C( 2.74), SIMDE_FLOAT32_C( -3.03) }, { SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 1.46), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 2.64), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 7.16), SIMDE_FLOAT32_C( 6.68), SIMDE_FLOAT32_C( 0.12) } }, { { SIMDE_FLOAT32_C( 2.79), SIMDE_FLOAT32_C( -3.44), SIMDE_FLOAT32_C( -3.79), SIMDE_FLOAT32_C( 3.43), SIMDE_FLOAT32_C( 3.84), SIMDE_FLOAT32_C( -3.35), SIMDE_FLOAT32_C( -0.82), SIMDE_FLOAT32_C( 2.71) }, { SIMDE_FLOAT32_C( 6.92), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 10.78), SIMDE_FLOAT32_C( 14.32), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 6.54) } }, { { SIMDE_FLOAT32_C( -3.37), SIMDE_FLOAT32_C( -2.06), SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( -1.27), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 1.44), SIMDE_FLOAT32_C( 1.39), SIMDE_FLOAT32_C( 2.10) }, { SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.24), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 1.13), SIMDE_FLOAT32_C( 2.71), SIMDE_FLOAT32_C( 2.62), SIMDE_FLOAT32_C( 4.29) } }, { { SIMDE_FLOAT32_C( 2.15), SIMDE_FLOAT32_C( 1.43), SIMDE_FLOAT32_C( -1.76), SIMDE_FLOAT32_C( 2.73), SIMDE_FLOAT32_C( -2.98), SIMDE_FLOAT32_C( 2.69), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( 0.75) }, { SIMDE_FLOAT32_C( 4.44), SIMDE_FLOAT32_C( 2.69), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( 6.63), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 6.45), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 1.68) } }, { { SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( -1.85), SIMDE_FLOAT32_C( 2.05), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -3.11), SIMDE_FLOAT32_C( 3.02), SIMDE_FLOAT32_C( -1.59) }, { SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 1.72), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 4.14), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 8.11), SIMDE_FLOAT32_C( 0.33) } }, { { SIMDE_FLOAT32_C( -2.54), SIMDE_FLOAT32_C( 3.23), SIMDE_FLOAT32_C( -2.16), SIMDE_FLOAT32_C( -2.71), SIMDE_FLOAT32_C( 3.88), SIMDE_FLOAT32_C( 1.02), SIMDE_FLOAT32_C( -4.00), SIMDE_FLOAT32_C( -3.49) }, { SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 9.38), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( 14.72), SIMDE_FLOAT32_C( 2.03), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.09) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_exp2_ps(a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_exp2_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( 1.66), SIMDE_FLOAT64_C( -0.38), SIMDE_FLOAT64_C( 1.40), SIMDE_FLOAT64_C( 3.84) }, { SIMDE_FLOAT64_C( 3.16), SIMDE_FLOAT64_C( 0.77), SIMDE_FLOAT64_C( 2.64), SIMDE_FLOAT64_C( 14.32) } }, { { SIMDE_FLOAT64_C( -2.15), SIMDE_FLOAT64_C( -0.83), SIMDE_FLOAT64_C( -2.32), SIMDE_FLOAT64_C( 1.94) }, { SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( 0.20), SIMDE_FLOAT64_C( 3.84) } }, { { SIMDE_FLOAT64_C( 3.43), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( -0.88), SIMDE_FLOAT64_C( 0.76) }, { SIMDE_FLOAT64_C( 10.78), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( 0.54), SIMDE_FLOAT64_C( 1.69) } }, { { SIMDE_FLOAT64_C( 1.69), SIMDE_FLOAT64_C( 2.74), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( -2.45) }, { SIMDE_FLOAT64_C( 3.23), SIMDE_FLOAT64_C( 6.68), SIMDE_FLOAT64_C( 1.99), SIMDE_FLOAT64_C( 0.18) } }, { { SIMDE_FLOAT64_C( 2.22), SIMDE_FLOAT64_C( 1.74), SIMDE_FLOAT64_C( 3.15), SIMDE_FLOAT64_C( 0.54) }, { SIMDE_FLOAT64_C( 4.66), SIMDE_FLOAT64_C( 3.34), SIMDE_FLOAT64_C( 8.88), SIMDE_FLOAT64_C( 1.45) } }, { { SIMDE_FLOAT64_C( 1.30), SIMDE_FLOAT64_C( -1.80), SIMDE_FLOAT64_C( 2.76), SIMDE_FLOAT64_C( -4.00) }, { SIMDE_FLOAT64_C( 2.46), SIMDE_FLOAT64_C( 0.29), SIMDE_FLOAT64_C( 6.77), SIMDE_FLOAT64_C( 0.06) } }, { { SIMDE_FLOAT64_C( -2.49), SIMDE_FLOAT64_C( -1.07), SIMDE_FLOAT64_C( 1.81), SIMDE_FLOAT64_C( 0.86) }, { SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( 0.48), SIMDE_FLOAT64_C( 3.51), SIMDE_FLOAT64_C( 1.82) } }, { { SIMDE_FLOAT64_C( -2.31), SIMDE_FLOAT64_C( -2.25), SIMDE_FLOAT64_C( 2.43), SIMDE_FLOAT64_C( 3.36) }, { SIMDE_FLOAT64_C( 0.20), SIMDE_FLOAT64_C( 0.21), SIMDE_FLOAT64_C( 5.39), SIMDE_FLOAT64_C( 10.27) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d r = simde_mm256_exp2_pd(a); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_exp2_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.24), SIMDE_FLOAT32_C( -1.43), SIMDE_FLOAT32_C( 3.90), SIMDE_FLOAT32_C( 2.40), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( -0.26), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 1.74), SIMDE_FLOAT32_C( 2.25), SIMDE_FLOAT32_C( -1.28), SIMDE_FLOAT32_C( 1.46), SIMDE_FLOAT32_C( 2.43), SIMDE_FLOAT32_C( -3.47), SIMDE_FLOAT32_C( -0.46), SIMDE_FLOAT32_C( 3.90) }, { SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( 1.18), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 14.93), SIMDE_FLOAT32_C( 5.28), SIMDE_FLOAT32_C( 1.16), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 1.66), SIMDE_FLOAT32_C( 3.34), SIMDE_FLOAT32_C( 4.76), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 2.75), SIMDE_FLOAT32_C( 5.39), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 14.93) } }, { { SIMDE_FLOAT32_C( -3.96), SIMDE_FLOAT32_C( 2.44), SIMDE_FLOAT32_C( -3.40), SIMDE_FLOAT32_C( -2.09), SIMDE_FLOAT32_C( -2.19), SIMDE_FLOAT32_C( 1.78), SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( -3.80), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( -1.05), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( 2.82), SIMDE_FLOAT32_C( -1.74), SIMDE_FLOAT32_C( 2.82), SIMDE_FLOAT32_C( 3.23), SIMDE_FLOAT32_C( 2.11) }, { SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 5.43), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 3.43), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 2.97), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( 1.28), SIMDE_FLOAT32_C( 7.06), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( 7.06), SIMDE_FLOAT32_C( 9.38), SIMDE_FLOAT32_C( 4.32) } }, { { SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( -2.20), SIMDE_FLOAT32_C( 2.02), SIMDE_FLOAT32_C( -2.54), SIMDE_FLOAT32_C( 2.02), SIMDE_FLOAT32_C( -2.24), SIMDE_FLOAT32_C( 2.19), SIMDE_FLOAT32_C( -0.24), SIMDE_FLOAT32_C( -3.99), SIMDE_FLOAT32_C( -3.09), SIMDE_FLOAT32_C( -2.77), SIMDE_FLOAT32_C( 2.43), SIMDE_FLOAT32_C( -2.56), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( 2.33), SIMDE_FLOAT32_C( -2.52) }, { SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 4.06), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 4.06), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 4.56), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( 5.39), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 1.71), SIMDE_FLOAT32_C( 5.03), SIMDE_FLOAT32_C( 0.17) } }, { { SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( 2.93), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( 1.02), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( 3.30), SIMDE_FLOAT32_C( 1.22), SIMDE_FLOAT32_C( -1.72), SIMDE_FLOAT32_C( -1.76), SIMDE_FLOAT32_C( -2.42), SIMDE_FLOAT32_C( -2.90), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( -3.60), SIMDE_FLOAT32_C( -3.67), SIMDE_FLOAT32_C( -1.38), SIMDE_FLOAT32_C( -0.54) }, { SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 7.62), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 2.03), SIMDE_FLOAT32_C( 1.64), SIMDE_FLOAT32_C( 9.85), SIMDE_FLOAT32_C( 2.33), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 1.41), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.69) } }, { { SIMDE_FLOAT32_C( -1.87), SIMDE_FLOAT32_C( -3.37), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( -3.85), SIMDE_FLOAT32_C( -1.61), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( -0.09), SIMDE_FLOAT32_C( -1.60), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 1.14), SIMDE_FLOAT32_C( -3.17), SIMDE_FLOAT32_C( 1.48), SIMDE_FLOAT32_C( -2.09), SIMDE_FLOAT32_C( 3.16), SIMDE_FLOAT32_C( 2.96), SIMDE_FLOAT32_C( 1.12) }, { SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 1.91), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 1.03), SIMDE_FLOAT32_C( 2.20), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 2.79), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 8.94), SIMDE_FLOAT32_C( 7.78), SIMDE_FLOAT32_C( 2.17) } }, { { SIMDE_FLOAT32_C( 2.09), SIMDE_FLOAT32_C( -1.64), SIMDE_FLOAT32_C( -1.86), SIMDE_FLOAT32_C( -1.20), SIMDE_FLOAT32_C( -2.34), SIMDE_FLOAT32_C( 3.36), SIMDE_FLOAT32_C( 1.08), SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( -3.05), SIMDE_FLOAT32_C( 2.18), SIMDE_FLOAT32_C( -3.59), SIMDE_FLOAT32_C( -2.65), SIMDE_FLOAT32_C( 2.52), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( -3.36) }, { SIMDE_FLOAT32_C( 4.26), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 10.27), SIMDE_FLOAT32_C( 2.11), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 4.53), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 5.74), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 1.75), SIMDE_FLOAT32_C( 0.10) } }, { { SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( -2.26), SIMDE_FLOAT32_C( -3.21), SIMDE_FLOAT32_C( 2.05), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( -3.55), SIMDE_FLOAT32_C( -3.10), SIMDE_FLOAT32_C( -2.16), SIMDE_FLOAT32_C( -2.72), SIMDE_FLOAT32_C( 2.38), SIMDE_FLOAT32_C( -0.25), SIMDE_FLOAT32_C( -3.56), SIMDE_FLOAT32_C( 1.34), SIMDE_FLOAT32_C( -3.13), SIMDE_FLOAT32_C( 2.53) }, { SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 4.14), SIMDE_FLOAT32_C( 1.82), SIMDE_FLOAT32_C( 1.62), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( 5.21), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 2.53), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 5.78) } }, { { SIMDE_FLOAT32_C( 3.70), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( -2.68), SIMDE_FLOAT32_C( -2.64), SIMDE_FLOAT32_C( -1.63), SIMDE_FLOAT32_C( 2.40), SIMDE_FLOAT32_C( 1.26), SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 1.67), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( -0.90), SIMDE_FLOAT32_C( -3.31), SIMDE_FLOAT32_C( -2.52), SIMDE_FLOAT32_C( -0.25), SIMDE_FLOAT32_C( 0.35) }, { SIMDE_FLOAT32_C( 13.00), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( 5.28), SIMDE_FLOAT32_C( 2.39), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 1.51), SIMDE_FLOAT32_C( 3.18), SIMDE_FLOAT32_C( 1.59), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 1.27) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_exp2_ps(a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_exp2_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[16]; const simde__mmask8 k; const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 1.94), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( 3.64), SIMDE_FLOAT32_C( 1.77), SIMDE_FLOAT32_C( 1.75), SIMDE_FLOAT32_C( -2.53), SIMDE_FLOAT32_C( -1.72), SIMDE_FLOAT32_C( -1.12), SIMDE_FLOAT32_C( -3.88), SIMDE_FLOAT32_C( 1.32), SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( 3.54), SIMDE_FLOAT32_C( -2.21) }, UINT8_C(173), { SIMDE_FLOAT32_C( -3.02), SIMDE_FLOAT32_C( 1.34), SIMDE_FLOAT32_C( 3.49), SIMDE_FLOAT32_C( -2.99), SIMDE_FLOAT32_C( 2.05), SIMDE_FLOAT32_C( -3.69), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 2.30), SIMDE_FLOAT32_C( 3.57), SIMDE_FLOAT32_C( 2.44), SIMDE_FLOAT32_C( 1.56), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( 3.28), SIMDE_FLOAT32_C( 1.86), SIMDE_FLOAT32_C( -2.25), SIMDE_FLOAT32_C( -1.70) }, { SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( 11.24), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 1.75), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( -1.72), SIMDE_FLOAT32_C( 4.92), SIMDE_FLOAT32_C( -3.88), SIMDE_FLOAT32_C( 1.32), SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( 3.54), SIMDE_FLOAT32_C( -2.21) } }, { { SIMDE_FLOAT32_C( 1.50), SIMDE_FLOAT32_C( 3.52), SIMDE_FLOAT32_C( -3.95), SIMDE_FLOAT32_C( 2.97), SIMDE_FLOAT32_C( -2.20), SIMDE_FLOAT32_C( -1.07), SIMDE_FLOAT32_C( 3.09), SIMDE_FLOAT32_C( 3.12), SIMDE_FLOAT32_C( 3.97), SIMDE_FLOAT32_C( -1.59), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( -2.05), SIMDE_FLOAT32_C( 1.79), SIMDE_FLOAT32_C( -3.38), SIMDE_FLOAT32_C( -1.07) }, UINT8_C(225), { SIMDE_FLOAT32_C( -3.89), SIMDE_FLOAT32_C( -0.06), SIMDE_FLOAT32_C( -2.82), SIMDE_FLOAT32_C( -3.58), SIMDE_FLOAT32_C( -3.89), SIMDE_FLOAT32_C( 3.49), SIMDE_FLOAT32_C( 3.99), SIMDE_FLOAT32_C( 2.55), SIMDE_FLOAT32_C( 1.05), SIMDE_FLOAT32_C( -0.20), SIMDE_FLOAT32_C( 1.83), SIMDE_FLOAT32_C( -1.10), SIMDE_FLOAT32_C( 1.55), SIMDE_FLOAT32_C( -3.87), SIMDE_FLOAT32_C( -3.60), SIMDE_FLOAT32_C( 1.07) }, { SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 3.52), SIMDE_FLOAT32_C( -3.95), SIMDE_FLOAT32_C( 2.97), SIMDE_FLOAT32_C( -2.20), SIMDE_FLOAT32_C( 11.24), SIMDE_FLOAT32_C( 15.89), SIMDE_FLOAT32_C( 5.86), SIMDE_FLOAT32_C( 3.97), SIMDE_FLOAT32_C( -1.59), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( -2.05), SIMDE_FLOAT32_C( 1.79), SIMDE_FLOAT32_C( -3.38), SIMDE_FLOAT32_C( -1.07) } }, { { SIMDE_FLOAT32_C( -3.82), SIMDE_FLOAT32_C( 3.38), SIMDE_FLOAT32_C( 2.88), SIMDE_FLOAT32_C( -0.89), SIMDE_FLOAT32_C( 2.46), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -3.13), SIMDE_FLOAT32_C( -1.99), SIMDE_FLOAT32_C( 3.84), SIMDE_FLOAT32_C( -1.18), SIMDE_FLOAT32_C( 3.80), SIMDE_FLOAT32_C( -3.53), SIMDE_FLOAT32_C( 1.75), SIMDE_FLOAT32_C( -1.07), SIMDE_FLOAT32_C( -3.42) }, UINT8_C(147), { SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( -3.00), SIMDE_FLOAT32_C( -2.20), SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( -3.00), SIMDE_FLOAT32_C( -3.65), SIMDE_FLOAT32_C( -3.35), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( 2.18), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( -1.64), SIMDE_FLOAT32_C( 2.31), SIMDE_FLOAT32_C( -0.05), SIMDE_FLOAT32_C( 3.43), SIMDE_FLOAT32_C( 2.50), SIMDE_FLOAT32_C( -0.67) }, { SIMDE_FLOAT32_C( 1.08), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 2.88), SIMDE_FLOAT32_C( -0.89), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( 1.74), SIMDE_FLOAT32_C( -1.99), SIMDE_FLOAT32_C( 3.84), SIMDE_FLOAT32_C( -1.18), SIMDE_FLOAT32_C( 3.80), SIMDE_FLOAT32_C( -3.53), SIMDE_FLOAT32_C( 1.75), SIMDE_FLOAT32_C( -1.07), SIMDE_FLOAT32_C( -3.42) } }, { { SIMDE_FLOAT32_C( 2.31), SIMDE_FLOAT32_C( -2.39), SIMDE_FLOAT32_C( -2.21), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( -1.33), SIMDE_FLOAT32_C( 2.32), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 1.49), SIMDE_FLOAT32_C( 2.12), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( -2.95), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 1.17) }, UINT8_C( 16), { SIMDE_FLOAT32_C( 2.73), SIMDE_FLOAT32_C( -3.23), SIMDE_FLOAT32_C( 3.57), SIMDE_FLOAT32_C( 3.08), SIMDE_FLOAT32_C( -2.59), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 1.26), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 2.73), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( -3.08), SIMDE_FLOAT32_C( 2.17), SIMDE_FLOAT32_C( -1.93), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( -0.33) }, { SIMDE_FLOAT32_C( 2.31), SIMDE_FLOAT32_C( -2.39), SIMDE_FLOAT32_C( -2.21), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( -1.33), SIMDE_FLOAT32_C( 2.32), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 1.49), SIMDE_FLOAT32_C( 2.12), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( -2.95), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 1.17) } }, { { SIMDE_FLOAT32_C( 2.04), SIMDE_FLOAT32_C( -3.21), SIMDE_FLOAT32_C( -3.65), SIMDE_FLOAT32_C( -3.29), SIMDE_FLOAT32_C( 3.11), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 2.21), SIMDE_FLOAT32_C( 1.23), SIMDE_FLOAT32_C( -2.13), SIMDE_FLOAT32_C( -2.55), SIMDE_FLOAT32_C( 2.29), SIMDE_FLOAT32_C( 3.44), SIMDE_FLOAT32_C( 2.38), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( 2.01), SIMDE_FLOAT32_C( 1.11) }, UINT8_C(254), { SIMDE_FLOAT32_C( 1.58), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 1.63), SIMDE_FLOAT32_C( -2.04), SIMDE_FLOAT32_C( -2.55), SIMDE_FLOAT32_C( -1.40), SIMDE_FLOAT32_C( -3.31), SIMDE_FLOAT32_C( 1.02), SIMDE_FLOAT32_C( -0.48), SIMDE_FLOAT32_C( 2.86), SIMDE_FLOAT32_C( 3.09), SIMDE_FLOAT32_C( 3.77), SIMDE_FLOAT32_C( -0.66), SIMDE_FLOAT32_C( -1.23), SIMDE_FLOAT32_C( 1.81), SIMDE_FLOAT32_C( 0.13) }, { SIMDE_FLOAT32_C( 2.04), SIMDE_FLOAT32_C( 1.14), SIMDE_FLOAT32_C( 3.10), SIMDE_FLOAT32_C( 0.24), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 2.03), SIMDE_FLOAT32_C( -2.13), SIMDE_FLOAT32_C( -2.55), SIMDE_FLOAT32_C( 2.29), SIMDE_FLOAT32_C( 3.44), SIMDE_FLOAT32_C( 2.38), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( 2.01), SIMDE_FLOAT32_C( 1.11) } }, { { SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( 2.53), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( -3.52), SIMDE_FLOAT32_C( -2.14), SIMDE_FLOAT32_C( 2.18), SIMDE_FLOAT32_C( 2.77), SIMDE_FLOAT32_C( -2.70), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( -1.78), SIMDE_FLOAT32_C( 3.31), SIMDE_FLOAT32_C( -2.32), SIMDE_FLOAT32_C( 2.44), SIMDE_FLOAT32_C( 0.89) }, UINT8_C(128), { SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 2.85), SIMDE_FLOAT32_C( 3.32), SIMDE_FLOAT32_C( 2.67), SIMDE_FLOAT32_C( 3.55), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -1.81), SIMDE_FLOAT32_C( 2.41), SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( -2.03), SIMDE_FLOAT32_C( -2.25), SIMDE_FLOAT32_C( 2.20), SIMDE_FLOAT32_C( 3.78), SIMDE_FLOAT32_C( 1.88), SIMDE_FLOAT32_C( -2.68), SIMDE_FLOAT32_C( 2.31) }, { SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( 2.53), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( -3.52), SIMDE_FLOAT32_C( -2.14), SIMDE_FLOAT32_C( 5.31), SIMDE_FLOAT32_C( 2.77), SIMDE_FLOAT32_C( -2.70), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( -1.78), SIMDE_FLOAT32_C( 3.31), SIMDE_FLOAT32_C( -2.32), SIMDE_FLOAT32_C( 2.44), SIMDE_FLOAT32_C( 0.89) } }, { { SIMDE_FLOAT32_C( -2.87), SIMDE_FLOAT32_C( -2.68), SIMDE_FLOAT32_C( -0.96), SIMDE_FLOAT32_C( -2.39), SIMDE_FLOAT32_C( -0.82), SIMDE_FLOAT32_C( -2.77), SIMDE_FLOAT32_C( -3.62), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( 1.79), SIMDE_FLOAT32_C( -1.40), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( 3.47), SIMDE_FLOAT32_C( -2.97), SIMDE_FLOAT32_C( -3.32), SIMDE_FLOAT32_C( 1.34), SIMDE_FLOAT32_C( 1.11) }, UINT8_C( 84), { SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( -0.22), SIMDE_FLOAT32_C( 3.09), SIMDE_FLOAT32_C( -3.00), SIMDE_FLOAT32_C( 1.98), SIMDE_FLOAT32_C( 1.50), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( 3.94), SIMDE_FLOAT32_C( 3.25), SIMDE_FLOAT32_C( -1.37), SIMDE_FLOAT32_C( 3.72), SIMDE_FLOAT32_C( 1.13), SIMDE_FLOAT32_C( -0.06), SIMDE_FLOAT32_C( 2.03), SIMDE_FLOAT32_C( 2.26), SIMDE_FLOAT32_C( 1.26) }, { SIMDE_FLOAT32_C( -2.87), SIMDE_FLOAT32_C( -2.68), SIMDE_FLOAT32_C( 8.51), SIMDE_FLOAT32_C( -2.39), SIMDE_FLOAT32_C( 3.94), SIMDE_FLOAT32_C( -2.77), SIMDE_FLOAT32_C( 1.35), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( 1.79), SIMDE_FLOAT32_C( -1.40), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( 3.47), SIMDE_FLOAT32_C( -2.97), SIMDE_FLOAT32_C( -3.32), SIMDE_FLOAT32_C( 1.34), SIMDE_FLOAT32_C( 1.11) } }, { { SIMDE_FLOAT32_C( -2.93), SIMDE_FLOAT32_C( 3.87), SIMDE_FLOAT32_C( -3.56), SIMDE_FLOAT32_C( -1.70), SIMDE_FLOAT32_C( -3.75), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( -3.91), SIMDE_FLOAT32_C( -1.16), SIMDE_FLOAT32_C( -3.29), SIMDE_FLOAT32_C( 3.56), SIMDE_FLOAT32_C( -0.12), SIMDE_FLOAT32_C( -2.61), SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( -3.02), SIMDE_FLOAT32_C( -3.07), SIMDE_FLOAT32_C( -2.45) }, UINT8_C(114), { SIMDE_FLOAT32_C( -3.99), SIMDE_FLOAT32_C( -1.45), SIMDE_FLOAT32_C( -1.26), SIMDE_FLOAT32_C( 1.51), SIMDE_FLOAT32_C( 2.98), SIMDE_FLOAT32_C( -1.31), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( -2.40), SIMDE_FLOAT32_C( -1.59), SIMDE_FLOAT32_C( -2.11), SIMDE_FLOAT32_C( 1.55), SIMDE_FLOAT32_C( -3.56), SIMDE_FLOAT32_C( -3.85), SIMDE_FLOAT32_C( -1.19), SIMDE_FLOAT32_C( -2.49), SIMDE_FLOAT32_C( -3.98) }, { SIMDE_FLOAT32_C( -2.93), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( -3.56), SIMDE_FLOAT32_C( -1.70), SIMDE_FLOAT32_C( 7.89), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 1.69), SIMDE_FLOAT32_C( -1.16), SIMDE_FLOAT32_C( -3.29), SIMDE_FLOAT32_C( 3.56), SIMDE_FLOAT32_C( -0.12), SIMDE_FLOAT32_C( -2.61), SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( -3.02), SIMDE_FLOAT32_C( -3.07), SIMDE_FLOAT32_C( -2.45) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_mask_exp2_ps(src, test_vec[i].k, a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_exp2_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 3.48), SIMDE_FLOAT64_C( -0.87), SIMDE_FLOAT64_C( 0.54), SIMDE_FLOAT64_C( -1.18), SIMDE_FLOAT64_C( -0.93), SIMDE_FLOAT64_C( -0.41), SIMDE_FLOAT64_C( -1.58), SIMDE_FLOAT64_C( -1.72) }, { SIMDE_FLOAT64_C( 11.16), SIMDE_FLOAT64_C( 0.55), SIMDE_FLOAT64_C( 1.45), SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( 0.52), SIMDE_FLOAT64_C( 0.75), SIMDE_FLOAT64_C( 0.33), SIMDE_FLOAT64_C( 0.30) } }, { { SIMDE_FLOAT64_C( -0.88), SIMDE_FLOAT64_C( 1.04), SIMDE_FLOAT64_C( 0.84), SIMDE_FLOAT64_C( -3.45), SIMDE_FLOAT64_C( 3.01), SIMDE_FLOAT64_C( -3.59), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( 3.12) }, { SIMDE_FLOAT64_C( 0.54), SIMDE_FLOAT64_C( 2.06), SIMDE_FLOAT64_C( 1.79), SIMDE_FLOAT64_C( 0.09), SIMDE_FLOAT64_C( 8.06), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 1.27), SIMDE_FLOAT64_C( 8.69) } }, { { SIMDE_FLOAT64_C( -1.74), SIMDE_FLOAT64_C( -2.12), SIMDE_FLOAT64_C( 0.09), SIMDE_FLOAT64_C( 0.52), SIMDE_FLOAT64_C( -1.12), SIMDE_FLOAT64_C( -1.89), SIMDE_FLOAT64_C( 2.97), SIMDE_FLOAT64_C( 2.38) }, { SIMDE_FLOAT64_C( 0.30), SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 1.06), SIMDE_FLOAT64_C( 1.43), SIMDE_FLOAT64_C( 0.46), SIMDE_FLOAT64_C( 0.27), SIMDE_FLOAT64_C( 7.84), SIMDE_FLOAT64_C( 5.21) } }, { { SIMDE_FLOAT64_C( 2.06), SIMDE_FLOAT64_C( 2.07), SIMDE_FLOAT64_C( -3.17), SIMDE_FLOAT64_C( 1.53), SIMDE_FLOAT64_C( -1.34), SIMDE_FLOAT64_C( 1.50), SIMDE_FLOAT64_C( -0.18), SIMDE_FLOAT64_C( -1.86) }, { SIMDE_FLOAT64_C( 4.17), SIMDE_FLOAT64_C( 4.20), SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( 2.89), SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( 2.83), SIMDE_FLOAT64_C( 0.88), SIMDE_FLOAT64_C( 0.28) } }, { { SIMDE_FLOAT64_C( -3.38), SIMDE_FLOAT64_C( -3.65), SIMDE_FLOAT64_C( 0.96), SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( -0.06), SIMDE_FLOAT64_C( 3.37), SIMDE_FLOAT64_C( 1.97), SIMDE_FLOAT64_C( 3.07) }, { SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 1.95), SIMDE_FLOAT64_C( 0.81), SIMDE_FLOAT64_C( 0.96), SIMDE_FLOAT64_C( 10.34), SIMDE_FLOAT64_C( 3.92), SIMDE_FLOAT64_C( 8.40) } }, { { SIMDE_FLOAT64_C( 0.41), SIMDE_FLOAT64_C( -1.19), SIMDE_FLOAT64_C( 3.61), SIMDE_FLOAT64_C( -0.57), SIMDE_FLOAT64_C( -0.78), SIMDE_FLOAT64_C( -0.04), SIMDE_FLOAT64_C( -1.46), SIMDE_FLOAT64_C( 1.48) }, { SIMDE_FLOAT64_C( 1.33), SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( 12.21), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 0.58), SIMDE_FLOAT64_C( 0.97), SIMDE_FLOAT64_C( 0.36), SIMDE_FLOAT64_C( 2.79) } }, { { SIMDE_FLOAT64_C( 1.84), SIMDE_FLOAT64_C( 2.63), SIMDE_FLOAT64_C( -1.99), SIMDE_FLOAT64_C( -3.28), SIMDE_FLOAT64_C( -3.26), SIMDE_FLOAT64_C( -3.02), SIMDE_FLOAT64_C( 3.10), SIMDE_FLOAT64_C( 2.79) }, { SIMDE_FLOAT64_C( 3.58), SIMDE_FLOAT64_C( 6.19), SIMDE_FLOAT64_C( 0.25), SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( 0.12), SIMDE_FLOAT64_C( 8.57), SIMDE_FLOAT64_C( 6.92) } }, { { SIMDE_FLOAT64_C( 3.05), SIMDE_FLOAT64_C( 3.93), SIMDE_FLOAT64_C( 0.33), SIMDE_FLOAT64_C( -2.28), SIMDE_FLOAT64_C( 1.42), SIMDE_FLOAT64_C( -3.86), SIMDE_FLOAT64_C( -0.14), SIMDE_FLOAT64_C( 2.05) }, { SIMDE_FLOAT64_C( 8.28), SIMDE_FLOAT64_C( 15.24), SIMDE_FLOAT64_C( 1.26), SIMDE_FLOAT64_C( 0.21), SIMDE_FLOAT64_C( 2.68), SIMDE_FLOAT64_C( 0.07), SIMDE_FLOAT64_C( 0.91), SIMDE_FLOAT64_C( 4.14) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_exp2_pd(a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_exp2_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 1.08), SIMDE_FLOAT64_C( -1.78), SIMDE_FLOAT64_C( -3.94), SIMDE_FLOAT64_C( 2.91), SIMDE_FLOAT64_C( -3.39), SIMDE_FLOAT64_C( -0.34), SIMDE_FLOAT64_C( -1.05), SIMDE_FLOAT64_C( -1.87) }, UINT8_C( 59), { SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( 3.70), SIMDE_FLOAT64_C( -0.34), SIMDE_FLOAT64_C( -2.49), SIMDE_FLOAT64_C( -3.69), SIMDE_FLOAT64_C( 1.16), SIMDE_FLOAT64_C( -0.71), SIMDE_FLOAT64_C( 3.16) }, { SIMDE_FLOAT64_C( 1.39), SIMDE_FLOAT64_C( 13.00), SIMDE_FLOAT64_C( -3.94), SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 2.23), SIMDE_FLOAT64_C( -1.05), SIMDE_FLOAT64_C( -1.87) } }, { { SIMDE_FLOAT64_C( 0.52), SIMDE_FLOAT64_C( 0.79), SIMDE_FLOAT64_C( -1.70), SIMDE_FLOAT64_C( -1.78), SIMDE_FLOAT64_C( -0.42), SIMDE_FLOAT64_C( -3.00), SIMDE_FLOAT64_C( -0.85), SIMDE_FLOAT64_C( 3.64) }, UINT8_C(181), { SIMDE_FLOAT64_C( -3.64), SIMDE_FLOAT64_C( 1.07), SIMDE_FLOAT64_C( 0.09), SIMDE_FLOAT64_C( -0.99), SIMDE_FLOAT64_C( 2.92), SIMDE_FLOAT64_C( -2.83), SIMDE_FLOAT64_C( 1.23), SIMDE_FLOAT64_C( 2.98) }, { SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.79), SIMDE_FLOAT64_C( 1.06), SIMDE_FLOAT64_C( -1.78), SIMDE_FLOAT64_C( 7.57), SIMDE_FLOAT64_C( 0.14), SIMDE_FLOAT64_C( -0.85), SIMDE_FLOAT64_C( 7.89) } }, { { SIMDE_FLOAT64_C( -3.92), SIMDE_FLOAT64_C( 1.84), SIMDE_FLOAT64_C( -1.36), SIMDE_FLOAT64_C( -0.97), SIMDE_FLOAT64_C( 3.97), SIMDE_FLOAT64_C( -2.62), SIMDE_FLOAT64_C( 3.51), SIMDE_FLOAT64_C( 3.67) }, UINT8_C( 39), { SIMDE_FLOAT64_C( -2.98), SIMDE_FLOAT64_C( 3.98), SIMDE_FLOAT64_C( -1.79), SIMDE_FLOAT64_C( 0.31), SIMDE_FLOAT64_C( 3.14), SIMDE_FLOAT64_C( 2.73), SIMDE_FLOAT64_C( -2.90), SIMDE_FLOAT64_C( -2.56) }, { SIMDE_FLOAT64_C( 0.13), SIMDE_FLOAT64_C( 15.78), SIMDE_FLOAT64_C( 0.29), SIMDE_FLOAT64_C( -0.97), SIMDE_FLOAT64_C( 3.97), SIMDE_FLOAT64_C( 6.63), SIMDE_FLOAT64_C( 3.51), SIMDE_FLOAT64_C( 3.67) } }, { { SIMDE_FLOAT64_C( -3.05), SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( -1.56), SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( 0.32), SIMDE_FLOAT64_C( -3.35), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( -2.62) }, UINT8_C(222), { SIMDE_FLOAT64_C( 3.48), SIMDE_FLOAT64_C( -3.70), SIMDE_FLOAT64_C( 1.91), SIMDE_FLOAT64_C( 0.71), SIMDE_FLOAT64_C( 3.28), SIMDE_FLOAT64_C( 1.99), SIMDE_FLOAT64_C( -1.45), SIMDE_FLOAT64_C( -2.07) }, { SIMDE_FLOAT64_C( -3.05), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 3.76), SIMDE_FLOAT64_C( 1.64), SIMDE_FLOAT64_C( 9.71), SIMDE_FLOAT64_C( -3.35), SIMDE_FLOAT64_C( 0.37), SIMDE_FLOAT64_C( 0.24) } }, { { SIMDE_FLOAT64_C( -2.98), SIMDE_FLOAT64_C( -1.47), SIMDE_FLOAT64_C( -0.69), SIMDE_FLOAT64_C( -3.47), SIMDE_FLOAT64_C( -1.80), SIMDE_FLOAT64_C( -3.64), SIMDE_FLOAT64_C( -2.45), SIMDE_FLOAT64_C( -1.83) }, UINT8_C(173), { SIMDE_FLOAT64_C( 1.86), SIMDE_FLOAT64_C( -2.68), SIMDE_FLOAT64_C( -2.71), SIMDE_FLOAT64_C( 2.96), SIMDE_FLOAT64_C( -1.24), SIMDE_FLOAT64_C( -1.76), SIMDE_FLOAT64_C( -0.37), SIMDE_FLOAT64_C( 1.20) }, { SIMDE_FLOAT64_C( 3.63), SIMDE_FLOAT64_C( -1.47), SIMDE_FLOAT64_C( 0.15), SIMDE_FLOAT64_C( 7.78), SIMDE_FLOAT64_C( -1.80), SIMDE_FLOAT64_C( 0.30), SIMDE_FLOAT64_C( -2.45), SIMDE_FLOAT64_C( 2.30) } }, { { SIMDE_FLOAT64_C( 2.35), SIMDE_FLOAT64_C( 3.95), SIMDE_FLOAT64_C( 1.85), SIMDE_FLOAT64_C( -1.18), SIMDE_FLOAT64_C( -2.67), SIMDE_FLOAT64_C( -1.41), SIMDE_FLOAT64_C( -1.70), SIMDE_FLOAT64_C( -2.37) }, UINT8_C(128), { SIMDE_FLOAT64_C( 3.01), SIMDE_FLOAT64_C( -3.08), SIMDE_FLOAT64_C( 2.48), SIMDE_FLOAT64_C( -2.44), SIMDE_FLOAT64_C( -1.16), SIMDE_FLOAT64_C( 3.50), SIMDE_FLOAT64_C( 0.09), SIMDE_FLOAT64_C( 2.16) }, { SIMDE_FLOAT64_C( 2.35), SIMDE_FLOAT64_C( 3.95), SIMDE_FLOAT64_C( 1.85), SIMDE_FLOAT64_C( -1.18), SIMDE_FLOAT64_C( -2.67), SIMDE_FLOAT64_C( -1.41), SIMDE_FLOAT64_C( -1.70), SIMDE_FLOAT64_C( 4.47) } }, { { SIMDE_FLOAT64_C( -3.97), SIMDE_FLOAT64_C( 2.28), SIMDE_FLOAT64_C( 2.51), SIMDE_FLOAT64_C( -2.42), SIMDE_FLOAT64_C( -3.54), SIMDE_FLOAT64_C( -2.92), SIMDE_FLOAT64_C( 3.44), SIMDE_FLOAT64_C( -2.23) }, UINT8_C( 29), { SIMDE_FLOAT64_C( 2.39), SIMDE_FLOAT64_C( 0.53), SIMDE_FLOAT64_C( 0.61), SIMDE_FLOAT64_C( -1.97), SIMDE_FLOAT64_C( -2.27), SIMDE_FLOAT64_C( -1.04), SIMDE_FLOAT64_C( -2.02), SIMDE_FLOAT64_C( 3.58) }, { SIMDE_FLOAT64_C( 5.24), SIMDE_FLOAT64_C( 2.28), SIMDE_FLOAT64_C( 1.53), SIMDE_FLOAT64_C( 0.26), SIMDE_FLOAT64_C( 0.21), SIMDE_FLOAT64_C( -2.92), SIMDE_FLOAT64_C( 3.44), SIMDE_FLOAT64_C( -2.23) } }, { { SIMDE_FLOAT64_C( 1.78), SIMDE_FLOAT64_C( -0.69), SIMDE_FLOAT64_C( -1.84), SIMDE_FLOAT64_C( -3.92), SIMDE_FLOAT64_C( 0.94), SIMDE_FLOAT64_C( -1.34), SIMDE_FLOAT64_C( 3.09), SIMDE_FLOAT64_C( 1.86) }, UINT8_C(207), { SIMDE_FLOAT64_C( -3.35), SIMDE_FLOAT64_C( -3.29), SIMDE_FLOAT64_C( -3.36), SIMDE_FLOAT64_C( 0.74), SIMDE_FLOAT64_C( 2.86), SIMDE_FLOAT64_C( -3.33), SIMDE_FLOAT64_C( -0.98), SIMDE_FLOAT64_C( 1.37) }, { SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( 1.67), SIMDE_FLOAT64_C( 0.94), SIMDE_FLOAT64_C( -1.34), SIMDE_FLOAT64_C( 0.51), SIMDE_FLOAT64_C( 2.58) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_mask_exp2_pd(src, test_vec[i].k, a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_exp10_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -1.28), SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( 3.28), SIMDE_FLOAT32_C( -3.13) }, { SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( 1905.46), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( 1.43), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 1.40), SIMDE_FLOAT32_C( -2.59) }, { SIMDE_FLOAT32_C( 26.92), SIMDE_FLOAT32_C( 2.45), SIMDE_FLOAT32_C( 25.12), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( -0.25), SIMDE_FLOAT32_C( 3.03), SIMDE_FLOAT32_C( 1.67) }, { SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 1071.52), SIMDE_FLOAT32_C( 46.77) } }, { { SIMDE_FLOAT32_C( -3.68), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( 1.43) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.69), SIMDE_FLOAT32_C( 33.88), SIMDE_FLOAT32_C( 26.92) } }, { { SIMDE_FLOAT32_C( -1.86), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 3.72), SIMDE_FLOAT32_C( -1.56) }, { SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 1.91), SIMDE_FLOAT32_C( 5248.07), SIMDE_FLOAT32_C( 0.03) } }, { { SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 2.62), SIMDE_FLOAT32_C( -1.43), SIMDE_FLOAT32_C( 0.99) }, { SIMDE_FLOAT32_C( 3.55), SIMDE_FLOAT32_C( 416.87), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 9.77) } }, { { SIMDE_FLOAT32_C( 1.99), SIMDE_FLOAT32_C( 3.09), SIMDE_FLOAT32_C( 2.38), SIMDE_FLOAT32_C( -3.37) }, { SIMDE_FLOAT32_C( 97.72), SIMDE_FLOAT32_C( 1230.27), SIMDE_FLOAT32_C( 239.88), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( -2.15), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.56) }, { SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 1.66), SIMDE_FLOAT32_C( 2.57), SIMDE_FLOAT32_C( 3.63) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_exp10_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_exp10_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 2.71), SIMDE_FLOAT64_C( -2.06) }, { SIMDE_FLOAT64_C( 512.86), SIMDE_FLOAT64_C( 0.01) } }, { { SIMDE_FLOAT64_C( 0.82), SIMDE_FLOAT64_C( 2.37) }, { SIMDE_FLOAT64_C( 6.61), SIMDE_FLOAT64_C( 234.42) } }, { { SIMDE_FLOAT64_C( -1.27), SIMDE_FLOAT64_C( -2.72) }, { SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( -0.02), SIMDE_FLOAT64_C( 1.72) }, { SIMDE_FLOAT64_C( 0.95), SIMDE_FLOAT64_C( 52.48) } }, { { SIMDE_FLOAT64_C( -2.59), SIMDE_FLOAT64_C( -1.62) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.02) } }, { { SIMDE_FLOAT64_C( 1.83), SIMDE_FLOAT64_C( 3.25) }, { SIMDE_FLOAT64_C( 67.61), SIMDE_FLOAT64_C( 1778.28) } }, { { SIMDE_FLOAT64_C( -2.12), SIMDE_FLOAT64_C( 3.99) }, { SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( 9772.37) } }, { { SIMDE_FLOAT64_C( -3.59), SIMDE_FLOAT64_C( 0.94) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 8.71) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d r = simde_mm_exp10_pd(a); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_exp10_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( -2.69), SIMDE_FLOAT32_C( 3.91), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( -0.80), SIMDE_FLOAT32_C( 2.51), SIMDE_FLOAT32_C( -1.38), SIMDE_FLOAT32_C( -3.31), SIMDE_FLOAT32_C( -0.75) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 8128.31), SIMDE_FLOAT32_C( 4.07), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 323.59), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.18) } }, { { SIMDE_FLOAT32_C( 3.51), SIMDE_FLOAT32_C( -3.93), SIMDE_FLOAT32_C( -3.82), SIMDE_FLOAT32_C( 1.46), SIMDE_FLOAT32_C( -3.04), SIMDE_FLOAT32_C( 3.29), SIMDE_FLOAT32_C( -3.04), SIMDE_FLOAT32_C( -3.66) }, { SIMDE_FLOAT32_C( 3235.94), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 28.84), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1949.84), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( -2.34), SIMDE_FLOAT32_C( -3.98), SIMDE_FLOAT32_C( -1.70), SIMDE_FLOAT32_C( -1.23), SIMDE_FLOAT32_C( -3.97), SIMDE_FLOAT32_C( -3.62), SIMDE_FLOAT32_C( 3.06), SIMDE_FLOAT32_C( -1.19) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1148.15), SIMDE_FLOAT32_C( 0.06) } }, { { SIMDE_FLOAT32_C( 3.70), SIMDE_FLOAT32_C( -1.22), SIMDE_FLOAT32_C( 2.16), SIMDE_FLOAT32_C( 3.83), SIMDE_FLOAT32_C( -3.41), SIMDE_FLOAT32_C( -0.48), SIMDE_FLOAT32_C( -2.66), SIMDE_FLOAT32_C( -2.09) }, { SIMDE_FLOAT32_C( 5011.87), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 144.54), SIMDE_FLOAT32_C( 6760.83), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.01) } }, { { SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( 1.95), SIMDE_FLOAT32_C( 1.11), SIMDE_FLOAT32_C( -2.06), SIMDE_FLOAT32_C( -3.42), SIMDE_FLOAT32_C( 1.79), SIMDE_FLOAT32_C( 1.19), SIMDE_FLOAT32_C( -3.92) }, { SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( 89.13), SIMDE_FLOAT32_C( 12.88), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 61.66), SIMDE_FLOAT32_C( 15.49), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( 1.86), SIMDE_FLOAT32_C( 1.37), SIMDE_FLOAT32_C( 1.54), SIMDE_FLOAT32_C( 2.82), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 2.50), SIMDE_FLOAT32_C( 3.16), SIMDE_FLOAT32_C( 2.33) }, { SIMDE_FLOAT32_C( 72.44), SIMDE_FLOAT32_C( 23.44), SIMDE_FLOAT32_C( 34.67), SIMDE_FLOAT32_C( 660.69), SIMDE_FLOAT32_C( 4.57), SIMDE_FLOAT32_C( 316.23), SIMDE_FLOAT32_C( 1445.44), SIMDE_FLOAT32_C( 213.80) } }, { { SIMDE_FLOAT32_C( 2.52), SIMDE_FLOAT32_C( -2.54), SIMDE_FLOAT32_C( -2.90), SIMDE_FLOAT32_C( 2.55), SIMDE_FLOAT32_C( -2.16), SIMDE_FLOAT32_C( -3.84), SIMDE_FLOAT32_C( -2.64), SIMDE_FLOAT32_C( -2.46) }, { SIMDE_FLOAT32_C( 331.13), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 354.81), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( -1.06), SIMDE_FLOAT32_C( 3.52), SIMDE_FLOAT32_C( -2.64), SIMDE_FLOAT32_C( -0.47), SIMDE_FLOAT32_C( -0.96), SIMDE_FLOAT32_C( -1.29), SIMDE_FLOAT32_C( 1.44), SIMDE_FLOAT32_C( 2.48) }, { SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 3311.31), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 27.54), SIMDE_FLOAT32_C( 302.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_exp10_ps(a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_exp10_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( -3.01), SIMDE_FLOAT64_C( 0.37), SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( -0.62) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 2.34), SIMDE_FLOAT64_C( 1.26), SIMDE_FLOAT64_C( 0.24) } }, { { SIMDE_FLOAT64_C( 1.29), SIMDE_FLOAT64_C( 2.86), SIMDE_FLOAT64_C( 0.75), SIMDE_FLOAT64_C( -3.99) }, { SIMDE_FLOAT64_C( 19.50), SIMDE_FLOAT64_C( 724.44), SIMDE_FLOAT64_C( 5.62), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( -2.93), SIMDE_FLOAT64_C( 3.81), SIMDE_FLOAT64_C( 3.34), SIMDE_FLOAT64_C( 3.21) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 6456.54), SIMDE_FLOAT64_C( 2187.76), SIMDE_FLOAT64_C( 1621.81) } }, { { SIMDE_FLOAT64_C( -2.76), SIMDE_FLOAT64_C( -1.49), SIMDE_FLOAT64_C( 3.76), SIMDE_FLOAT64_C( -1.66) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 5754.40), SIMDE_FLOAT64_C( 0.02) } }, { { SIMDE_FLOAT64_C( -0.02), SIMDE_FLOAT64_C( -2.70), SIMDE_FLOAT64_C( 2.90), SIMDE_FLOAT64_C( -0.73) }, { SIMDE_FLOAT64_C( 0.95), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 794.33), SIMDE_FLOAT64_C( 0.19) } }, { { SIMDE_FLOAT64_C( -1.67), SIMDE_FLOAT64_C( 0.20), SIMDE_FLOAT64_C( -2.21), SIMDE_FLOAT64_C( -3.15) }, { SIMDE_FLOAT64_C( 0.02), SIMDE_FLOAT64_C( 1.58), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( 2.30), SIMDE_FLOAT64_C( 3.98), SIMDE_FLOAT64_C( -0.86), SIMDE_FLOAT64_C( -1.96) }, { SIMDE_FLOAT64_C( 199.53), SIMDE_FLOAT64_C( 9549.93), SIMDE_FLOAT64_C( 0.14), SIMDE_FLOAT64_C( 0.01) } }, { { SIMDE_FLOAT64_C( 0.50), SIMDE_FLOAT64_C( 0.60), SIMDE_FLOAT64_C( 0.65), SIMDE_FLOAT64_C( 1.49) }, { SIMDE_FLOAT64_C( 3.16), SIMDE_FLOAT64_C( 3.98), SIMDE_FLOAT64_C( 4.47), SIMDE_FLOAT64_C( 30.90) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d r = simde_mm256_exp10_pd(a); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_exp10_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 2.15), SIMDE_FLOAT32_C( 3.90), SIMDE_FLOAT32_C( -3.06), SIMDE_FLOAT32_C( -3.99), SIMDE_FLOAT32_C( -1.49), SIMDE_FLOAT32_C( 3.34), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( 3.24), SIMDE_FLOAT32_C( 2.10), SIMDE_FLOAT32_C( -1.61), SIMDE_FLOAT32_C( -3.33), SIMDE_FLOAT32_C( 1.69), SIMDE_FLOAT32_C( -2.51), SIMDE_FLOAT32_C( 3.50), SIMDE_FLOAT32_C( -1.30) }, { SIMDE_FLOAT32_C( 141.25), SIMDE_FLOAT32_C( 7943.28), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 2187.76), SIMDE_FLOAT32_C( 1.20), SIMDE_FLOAT32_C( 1.41), SIMDE_FLOAT32_C( 1737.80), SIMDE_FLOAT32_C( 125.89), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 48.98), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 3162.28), SIMDE_FLOAT32_C( 0.05) } }, { { SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( -1.13), SIMDE_FLOAT32_C( -1.51), SIMDE_FLOAT32_C( 1.48), SIMDE_FLOAT32_C( -3.11), SIMDE_FLOAT32_C( -2.56), SIMDE_FLOAT32_C( -2.35), SIMDE_FLOAT32_C( -0.62), SIMDE_FLOAT32_C( 2.03), SIMDE_FLOAT32_C( -1.51), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( -3.88), SIMDE_FLOAT32_C( -2.12), SIMDE_FLOAT32_C( 3.49), SIMDE_FLOAT32_C( -2.42), SIMDE_FLOAT32_C( -3.98) }, { SIMDE_FLOAT32_C( 3.39), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 30.20), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.24), SIMDE_FLOAT32_C( 107.15), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 6.92), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 3090.30), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( 3.38), SIMDE_FLOAT32_C( -1.48), SIMDE_FLOAT32_C( -3.96), SIMDE_FLOAT32_C( -2.11), SIMDE_FLOAT32_C( -2.14), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 2.04), SIMDE_FLOAT32_C( -2.89), SIMDE_FLOAT32_C( -1.78), SIMDE_FLOAT32_C( -3.57), SIMDE_FLOAT32_C( -2.23), SIMDE_FLOAT32_C( 3.90), SIMDE_FLOAT32_C( -2.08), SIMDE_FLOAT32_C( -2.73), SIMDE_FLOAT32_C( -1.40), SIMDE_FLOAT32_C( 2.46) }, { SIMDE_FLOAT32_C( 2398.83), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 1.29), SIMDE_FLOAT32_C( 109.65), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 7943.28), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 288.40) } }, { { SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( 1.09), SIMDE_FLOAT32_C( -0.06), SIMDE_FLOAT32_C( 1.03), SIMDE_FLOAT32_C( 2.53), SIMDE_FLOAT32_C( 1.59), SIMDE_FLOAT32_C( -3.59), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( -3.91), SIMDE_FLOAT32_C( 1.26), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( -2.04), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( 2.26), SIMDE_FLOAT32_C( -2.02), SIMDE_FLOAT32_C( 0.13) }, { SIMDE_FLOAT32_C( 1.41), SIMDE_FLOAT32_C( 12.30), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( 10.72), SIMDE_FLOAT32_C( 338.84), SIMDE_FLOAT32_C( 38.90), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 3.63), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 18.20), SIMDE_FLOAT32_C( 4.79), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 5.50), SIMDE_FLOAT32_C( 181.97), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 1.35) } }, { { SIMDE_FLOAT32_C( -3.22), SIMDE_FLOAT32_C( -1.98), SIMDE_FLOAT32_C( 2.02), SIMDE_FLOAT32_C( -1.36), SIMDE_FLOAT32_C( 2.14), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( -0.25), SIMDE_FLOAT32_C( -3.65), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 1.52), SIMDE_FLOAT32_C( -3.75), SIMDE_FLOAT32_C( 2.41), SIMDE_FLOAT32_C( 2.80), SIMDE_FLOAT32_C( -1.15), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( -1.06) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 104.71), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 138.04), SIMDE_FLOAT32_C( 1.15), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 3.09), SIMDE_FLOAT32_C( 33.11), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 257.04), SIMDE_FLOAT32_C( 630.96), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 7.41), SIMDE_FLOAT32_C( 0.09) } }, { { SIMDE_FLOAT32_C( 3.94), SIMDE_FLOAT32_C( -3.19), SIMDE_FLOAT32_C( 3.97), SIMDE_FLOAT32_C( 2.47), SIMDE_FLOAT32_C( 2.41), SIMDE_FLOAT32_C( -3.62), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( 2.49), SIMDE_FLOAT32_C( 1.64), SIMDE_FLOAT32_C( 3.70), SIMDE_FLOAT32_C( -3.55), SIMDE_FLOAT32_C( -1.62), SIMDE_FLOAT32_C( 1.96), SIMDE_FLOAT32_C( -1.56), SIMDE_FLOAT32_C( 2.51), SIMDE_FLOAT32_C( 2.74) }, { SIMDE_FLOAT32_C( 8709.64), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 9332.54), SIMDE_FLOAT32_C( 295.12), SIMDE_FLOAT32_C( 257.04), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 309.03), SIMDE_FLOAT32_C( 43.65), SIMDE_FLOAT32_C( 5011.87), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 91.20), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 323.59), SIMDE_FLOAT32_C( 549.54) } }, { { SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( -2.61), SIMDE_FLOAT32_C( -1.40), SIMDE_FLOAT32_C( -3.41), SIMDE_FLOAT32_C( 1.14), SIMDE_FLOAT32_C( -1.05), SIMDE_FLOAT32_C( 1.08), SIMDE_FLOAT32_C( -1.34), SIMDE_FLOAT32_C( -0.80), SIMDE_FLOAT32_C( -0.51), SIMDE_FLOAT32_C( -2.54), SIMDE_FLOAT32_C( 2.05), SIMDE_FLOAT32_C( -3.64), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 2.00) }, { SIMDE_FLOAT32_C( 2.88), SIMDE_FLOAT32_C( 3.39), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 13.80), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 12.02), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 112.20), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.51), SIMDE_FLOAT32_C( 100.00) } }, { { SIMDE_FLOAT32_C( -2.83), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( 3.58), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 3.48), SIMDE_FLOAT32_C( 2.07), SIMDE_FLOAT32_C( -1.60), SIMDE_FLOAT32_C( 3.19), SIMDE_FLOAT32_C( 2.53), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 1.15), SIMDE_FLOAT32_C( -3.03), SIMDE_FLOAT32_C( -0.71), SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( 1.43) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.34), SIMDE_FLOAT32_C( 2.88), SIMDE_FLOAT32_C( 3801.89), SIMDE_FLOAT32_C( 5.75), SIMDE_FLOAT32_C( 3019.95), SIMDE_FLOAT32_C( 117.49), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 1548.82), SIMDE_FLOAT32_C( 338.84), SIMDE_FLOAT32_C( 6.03), SIMDE_FLOAT32_C( 14.13), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 26.92) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_exp10_ps(a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_exp10_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[16]; const simde__mmask8 k; const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( -0.59), SIMDE_FLOAT32_C( 2.53), SIMDE_FLOAT32_C( -3.09), SIMDE_FLOAT32_C( 2.30), SIMDE_FLOAT32_C( -3.02), SIMDE_FLOAT32_C( -1.71), SIMDE_FLOAT32_C( -2.65), SIMDE_FLOAT32_C( 2.34), SIMDE_FLOAT32_C( 3.56), SIMDE_FLOAT32_C( -1.53), SIMDE_FLOAT32_C( 1.69), SIMDE_FLOAT32_C( 2.13), SIMDE_FLOAT32_C( 1.98), SIMDE_FLOAT32_C( -3.96), SIMDE_FLOAT32_C( -3.24), SIMDE_FLOAT32_C( -2.96) }, UINT8_C( 58), { SIMDE_FLOAT32_C( 2.80), SIMDE_FLOAT32_C( 1.59), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( -1.26), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 1.75), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( -2.77), SIMDE_FLOAT32_C( 3.35), SIMDE_FLOAT32_C( 2.05), SIMDE_FLOAT32_C( 2.33), SIMDE_FLOAT32_C( 3.33), SIMDE_FLOAT32_C( 2.21), SIMDE_FLOAT32_C( -1.15), SIMDE_FLOAT32_C( -1.25), SIMDE_FLOAT32_C( 0.74) }, { SIMDE_FLOAT32_C( -0.59), SIMDE_FLOAT32_C( 38.90), SIMDE_FLOAT32_C( -3.09), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 6.46), SIMDE_FLOAT32_C( 56.23), SIMDE_FLOAT32_C( -2.65), SIMDE_FLOAT32_C( 2.34), SIMDE_FLOAT32_C( 3.56), SIMDE_FLOAT32_C( -1.53), SIMDE_FLOAT32_C( 1.69), SIMDE_FLOAT32_C( 2.13), SIMDE_FLOAT32_C( 1.98), SIMDE_FLOAT32_C( -3.96), SIMDE_FLOAT32_C( -3.24), SIMDE_FLOAT32_C( -2.96) } }, { { SIMDE_FLOAT32_C( -0.24), SIMDE_FLOAT32_C( -2.95), SIMDE_FLOAT32_C( 1.72), SIMDE_FLOAT32_C( 2.05), SIMDE_FLOAT32_C( -1.60), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 1.61), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( -2.25), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( -1.15), SIMDE_FLOAT32_C( -2.21), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( -3.54), SIMDE_FLOAT32_C( -0.71) }, UINT8_C(193), { SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 2.03), SIMDE_FLOAT32_C( 2.29), SIMDE_FLOAT32_C( -1.46), SIMDE_FLOAT32_C( -1.68), SIMDE_FLOAT32_C( 3.52), SIMDE_FLOAT32_C( -2.11), SIMDE_FLOAT32_C( -3.63), SIMDE_FLOAT32_C( 1.85), SIMDE_FLOAT32_C( -2.78), SIMDE_FLOAT32_C( 2.58), SIMDE_FLOAT32_C( -3.29), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 1.02) }, { SIMDE_FLOAT32_C( 6.17), SIMDE_FLOAT32_C( -2.95), SIMDE_FLOAT32_C( 1.72), SIMDE_FLOAT32_C( 2.05), SIMDE_FLOAT32_C( -1.60), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -2.25), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( -1.15), SIMDE_FLOAT32_C( -2.21), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( -3.54), SIMDE_FLOAT32_C( -0.71) } }, { { SIMDE_FLOAT32_C( -2.96), SIMDE_FLOAT32_C( -1.49), SIMDE_FLOAT32_C( 3.42), SIMDE_FLOAT32_C( 1.10), SIMDE_FLOAT32_C( -3.88), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( 2.86), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 3.14), SIMDE_FLOAT32_C( -3.35), SIMDE_FLOAT32_C( -3.66), SIMDE_FLOAT32_C( -0.97), SIMDE_FLOAT32_C( -2.89), SIMDE_FLOAT32_C( -0.37), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 1.89) }, UINT8_C(215), { SIMDE_FLOAT32_C( -1.19), SIMDE_FLOAT32_C( -3.57), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( -1.67), SIMDE_FLOAT32_C( -1.68), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( -3.82), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( -1.07), SIMDE_FLOAT32_C( -3.11), SIMDE_FLOAT32_C( 3.52), SIMDE_FLOAT32_C( 2.25), SIMDE_FLOAT32_C( 1.35), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 3.29), SIMDE_FLOAT32_C( 3.86) }, { SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 1.10), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 3.14), SIMDE_FLOAT32_C( -3.35), SIMDE_FLOAT32_C( -3.66), SIMDE_FLOAT32_C( -0.97), SIMDE_FLOAT32_C( -2.89), SIMDE_FLOAT32_C( -0.37), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 1.89) } }, { { SIMDE_FLOAT32_C( -0.04), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 3.98), SIMDE_FLOAT32_C( -3.74), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( -0.17), SIMDE_FLOAT32_C( 3.40), SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( -1.56), SIMDE_FLOAT32_C( 1.01), SIMDE_FLOAT32_C( 3.80), SIMDE_FLOAT32_C( 2.95), SIMDE_FLOAT32_C( -1.09), SIMDE_FLOAT32_C( -2.53), SIMDE_FLOAT32_C( -2.24) }, UINT8_C(253), { SIMDE_FLOAT32_C( 1.45), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 1.66), SIMDE_FLOAT32_C( -2.19), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( -2.79), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 1.16), SIMDE_FLOAT32_C( -3.28), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.49), SIMDE_FLOAT32_C( 1.26), SIMDE_FLOAT32_C( -1.71), SIMDE_FLOAT32_C( -1.63), SIMDE_FLOAT32_C( -2.77), SIMDE_FLOAT32_C( 2.69) }, { SIMDE_FLOAT32_C( 28.18), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 45.71), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 1.86), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 5.62), SIMDE_FLOAT32_C( 14.45), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( -1.56), SIMDE_FLOAT32_C( 1.01), SIMDE_FLOAT32_C( 3.80), SIMDE_FLOAT32_C( 2.95), SIMDE_FLOAT32_C( -1.09), SIMDE_FLOAT32_C( -2.53), SIMDE_FLOAT32_C( -2.24) } }, { { SIMDE_FLOAT32_C( -1.65), SIMDE_FLOAT32_C( -2.52), SIMDE_FLOAT32_C( -2.06), SIMDE_FLOAT32_C( 2.18), SIMDE_FLOAT32_C( -3.11), SIMDE_FLOAT32_C( 1.85), SIMDE_FLOAT32_C( -1.65), SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( -1.14), SIMDE_FLOAT32_C( -1.85), SIMDE_FLOAT32_C( -1.73), SIMDE_FLOAT32_C( 1.76), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -2.90), SIMDE_FLOAT32_C( -2.93) }, UINT8_C(202), { SIMDE_FLOAT32_C( 2.76), SIMDE_FLOAT32_C( -1.12), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 3.97), SIMDE_FLOAT32_C( 3.63), SIMDE_FLOAT32_C( -2.46), SIMDE_FLOAT32_C( -3.31), SIMDE_FLOAT32_C( -1.37), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 1.95), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 2.42), SIMDE_FLOAT32_C( 3.18), SIMDE_FLOAT32_C( -0.39), SIMDE_FLOAT32_C( -3.23), SIMDE_FLOAT32_C( -3.34) }, { SIMDE_FLOAT32_C( -1.65), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( -2.06), SIMDE_FLOAT32_C( 9332.54), SIMDE_FLOAT32_C( -3.11), SIMDE_FLOAT32_C( 1.85), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -1.14), SIMDE_FLOAT32_C( -1.85), SIMDE_FLOAT32_C( -1.73), SIMDE_FLOAT32_C( 1.76), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -2.90), SIMDE_FLOAT32_C( -2.93) } }, { { SIMDE_FLOAT32_C( 1.55), SIMDE_FLOAT32_C( 2.95), SIMDE_FLOAT32_C( -2.45), SIMDE_FLOAT32_C( -0.61), SIMDE_FLOAT32_C( -2.70), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( 2.25), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( 3.14), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 3.08), SIMDE_FLOAT32_C( -0.83), SIMDE_FLOAT32_C( 1.11), SIMDE_FLOAT32_C( -3.85), SIMDE_FLOAT32_C( -0.71), SIMDE_FLOAT32_C( -0.12) }, UINT8_C( 4), { SIMDE_FLOAT32_C( 3.68), SIMDE_FLOAT32_C( -0.16), SIMDE_FLOAT32_C( -1.34), SIMDE_FLOAT32_C( -2.78), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( 1.29), SIMDE_FLOAT32_C( 1.28), SIMDE_FLOAT32_C( -1.51), SIMDE_FLOAT32_C( -1.79), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( -2.34), SIMDE_FLOAT32_C( 1.81), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( -1.67), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( -0.57) }, { SIMDE_FLOAT32_C( 1.55), SIMDE_FLOAT32_C( 2.95), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( -0.61), SIMDE_FLOAT32_C( -2.70), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( 2.25), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( 3.14), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 3.08), SIMDE_FLOAT32_C( -0.83), SIMDE_FLOAT32_C( 1.11), SIMDE_FLOAT32_C( -3.85), SIMDE_FLOAT32_C( -0.71), SIMDE_FLOAT32_C( -0.12) } }, { { SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( 2.76), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( -3.26), SIMDE_FLOAT32_C( 1.01), SIMDE_FLOAT32_C( -3.81), SIMDE_FLOAT32_C( 3.89), SIMDE_FLOAT32_C( -2.98), SIMDE_FLOAT32_C( 3.27), SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( 2.14), SIMDE_FLOAT32_C( 3.42), SIMDE_FLOAT32_C( 2.35), SIMDE_FLOAT32_C( -1.99), SIMDE_FLOAT32_C( -1.55), SIMDE_FLOAT32_C( 2.03) }, UINT8_C( 74), { SIMDE_FLOAT32_C( 1.11), SIMDE_FLOAT32_C( 3.25), SIMDE_FLOAT32_C( -1.61), SIMDE_FLOAT32_C( -1.60), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( -3.77), SIMDE_FLOAT32_C( 2.54), SIMDE_FLOAT32_C( -1.58), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( -3.14), SIMDE_FLOAT32_C( 1.78), SIMDE_FLOAT32_C( -3.87), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( 0.53) }, { SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( 1778.28), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 1.01), SIMDE_FLOAT32_C( -3.81), SIMDE_FLOAT32_C( 3.98), SIMDE_FLOAT32_C( -2.98), SIMDE_FLOAT32_C( 3.27), SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( 2.14), SIMDE_FLOAT32_C( 3.42), SIMDE_FLOAT32_C( 2.35), SIMDE_FLOAT32_C( -1.99), SIMDE_FLOAT32_C( -1.55), SIMDE_FLOAT32_C( 2.03) } }, { { SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 1.48), SIMDE_FLOAT32_C( -2.46), SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( 1.37), SIMDE_FLOAT32_C( -1.44), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( -3.58), SIMDE_FLOAT32_C( -3.30), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 2.77), SIMDE_FLOAT32_C( -1.29), SIMDE_FLOAT32_C( 2.18), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( -3.44), SIMDE_FLOAT32_C( -0.71) }, UINT8_C(120), { SIMDE_FLOAT32_C( -1.05), SIMDE_FLOAT32_C( 1.68), SIMDE_FLOAT32_C( -3.41), SIMDE_FLOAT32_C( 3.82), SIMDE_FLOAT32_C( -1.71), SIMDE_FLOAT32_C( -3.18), SIMDE_FLOAT32_C( 2.36), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( 1.52), SIMDE_FLOAT32_C( 3.22), SIMDE_FLOAT32_C( -1.52), SIMDE_FLOAT32_C( 1.64), SIMDE_FLOAT32_C( -0.04), SIMDE_FLOAT32_C( 3.01), SIMDE_FLOAT32_C( -1.50), SIMDE_FLOAT32_C( -2.56) }, { SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 1.48), SIMDE_FLOAT32_C( -2.46), SIMDE_FLOAT32_C( 6606.93), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 229.09), SIMDE_FLOAT32_C( -3.58), SIMDE_FLOAT32_C( -3.30), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 2.77), SIMDE_FLOAT32_C( -1.29), SIMDE_FLOAT32_C( 2.18), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( -3.44), SIMDE_FLOAT32_C( -0.71) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_mask_exp10_ps(src, test_vec[i].k, a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_exp10_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 0.93), SIMDE_FLOAT64_C( 0.90), SIMDE_FLOAT64_C( -1.02), SIMDE_FLOAT64_C( -3.98), SIMDE_FLOAT64_C( 3.95), SIMDE_FLOAT64_C( 0.31), SIMDE_FLOAT64_C( -2.47), SIMDE_FLOAT64_C( -3.25) }, { SIMDE_FLOAT64_C( 8.51), SIMDE_FLOAT64_C( 7.94), SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 8912.51), SIMDE_FLOAT64_C( 2.04), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( -3.33), SIMDE_FLOAT64_C( 1.18), SIMDE_FLOAT64_C( -1.87), SIMDE_FLOAT64_C( 0.97), SIMDE_FLOAT64_C( 2.34), SIMDE_FLOAT64_C( -3.33), SIMDE_FLOAT64_C( -0.73), SIMDE_FLOAT64_C( 2.80) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 15.14), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( 9.33), SIMDE_FLOAT64_C( 218.78), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.19), SIMDE_FLOAT64_C( 630.96) } }, { { SIMDE_FLOAT64_C( 2.86), SIMDE_FLOAT64_C( 2.64), SIMDE_FLOAT64_C( -2.88), SIMDE_FLOAT64_C( 3.99), SIMDE_FLOAT64_C( 2.91), SIMDE_FLOAT64_C( -0.85), SIMDE_FLOAT64_C( -2.79), SIMDE_FLOAT64_C( 3.08) }, { SIMDE_FLOAT64_C( 724.44), SIMDE_FLOAT64_C( 436.52), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 9772.37), SIMDE_FLOAT64_C( 812.83), SIMDE_FLOAT64_C( 0.14), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 1202.26) } }, { { SIMDE_FLOAT64_C( 3.79), SIMDE_FLOAT64_C( 1.10), SIMDE_FLOAT64_C( -2.75), SIMDE_FLOAT64_C( 2.52), SIMDE_FLOAT64_C( 1.05), SIMDE_FLOAT64_C( 0.37), SIMDE_FLOAT64_C( -2.28), SIMDE_FLOAT64_C( -2.02) }, { SIMDE_FLOAT64_C( 6165.95), SIMDE_FLOAT64_C( 12.59), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 331.13), SIMDE_FLOAT64_C( 11.22), SIMDE_FLOAT64_C( 2.34), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( 0.01) } }, { { SIMDE_FLOAT64_C( -2.73), SIMDE_FLOAT64_C( 0.70), SIMDE_FLOAT64_C( -2.00), SIMDE_FLOAT64_C( -2.78), SIMDE_FLOAT64_C( -2.99), SIMDE_FLOAT64_C( -0.48), SIMDE_FLOAT64_C( -2.02), SIMDE_FLOAT64_C( -2.32) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 5.01), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.33), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( -3.30), SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( 2.65), SIMDE_FLOAT64_C( 3.04), SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( -2.08), SIMDE_FLOAT64_C( 1.84), SIMDE_FLOAT64_C( -0.36) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 1.29), SIMDE_FLOAT64_C( 446.68), SIMDE_FLOAT64_C( 1096.48), SIMDE_FLOAT64_C( 6.03), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( 69.18), SIMDE_FLOAT64_C( 0.44) } }, { { SIMDE_FLOAT64_C( -3.45), SIMDE_FLOAT64_C( 2.96), SIMDE_FLOAT64_C( -0.37), SIMDE_FLOAT64_C( 3.46), SIMDE_FLOAT64_C( -1.89), SIMDE_FLOAT64_C( 0.84), SIMDE_FLOAT64_C( 2.54), SIMDE_FLOAT64_C( -2.10) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 912.01), SIMDE_FLOAT64_C( 0.43), SIMDE_FLOAT64_C( 2884.03), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( 6.92), SIMDE_FLOAT64_C( 346.74), SIMDE_FLOAT64_C( 0.01) } }, { { SIMDE_FLOAT64_C( -2.06), SIMDE_FLOAT64_C( 3.79), SIMDE_FLOAT64_C( -3.58), SIMDE_FLOAT64_C( 2.98), SIMDE_FLOAT64_C( 0.16), SIMDE_FLOAT64_C( -1.86), SIMDE_FLOAT64_C( -3.04), SIMDE_FLOAT64_C( 1.43) }, { SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( 6165.95), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 954.99), SIMDE_FLOAT64_C( 1.45), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 26.92) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_exp10_pd(a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_exp10_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 2.35), SIMDE_FLOAT64_C( 3.51), SIMDE_FLOAT64_C( -2.45), SIMDE_FLOAT64_C( 0.95), SIMDE_FLOAT64_C( -2.12), SIMDE_FLOAT64_C( -1.70), SIMDE_FLOAT64_C( 3.27), SIMDE_FLOAT64_C( -3.97) }, UINT8_C( 85), { SIMDE_FLOAT64_C( -1.97), SIMDE_FLOAT64_C( 0.32), SIMDE_FLOAT64_C( -0.33), SIMDE_FLOAT64_C( -3.87), SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( -1.78), SIMDE_FLOAT64_C( 2.41), SIMDE_FLOAT64_C( 3.67) }, { SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( 3.51), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( 0.95), SIMDE_FLOAT64_C( 1.51), SIMDE_FLOAT64_C( -1.70), SIMDE_FLOAT64_C( 257.04), SIMDE_FLOAT64_C( -3.97) } }, { { SIMDE_FLOAT64_C( -1.74), SIMDE_FLOAT64_C( -3.97), SIMDE_FLOAT64_C( 3.52), SIMDE_FLOAT64_C( -3.35), SIMDE_FLOAT64_C( -1.31), SIMDE_FLOAT64_C( 1.64), SIMDE_FLOAT64_C( 3.64), SIMDE_FLOAT64_C( 1.35) }, UINT8_C(237), { SIMDE_FLOAT64_C( -3.09), SIMDE_FLOAT64_C( 0.21), SIMDE_FLOAT64_C( -0.49), SIMDE_FLOAT64_C( 1.71), SIMDE_FLOAT64_C( -0.39), SIMDE_FLOAT64_C( -2.14), SIMDE_FLOAT64_C( 1.22), SIMDE_FLOAT64_C( 1.16) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -3.97), SIMDE_FLOAT64_C( 0.32), SIMDE_FLOAT64_C( 51.29), SIMDE_FLOAT64_C( -1.31), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( 16.60), SIMDE_FLOAT64_C( 14.45) } }, { { SIMDE_FLOAT64_C( 2.80), SIMDE_FLOAT64_C( 3.11), SIMDE_FLOAT64_C( 3.45), SIMDE_FLOAT64_C( 2.07), SIMDE_FLOAT64_C( 3.14), SIMDE_FLOAT64_C( -1.25), SIMDE_FLOAT64_C( -3.90), SIMDE_FLOAT64_C( -0.54) }, UINT8_C(112), { SIMDE_FLOAT64_C( -3.77), SIMDE_FLOAT64_C( 3.65), SIMDE_FLOAT64_C( -3.35), SIMDE_FLOAT64_C( 2.64), SIMDE_FLOAT64_C( 3.31), SIMDE_FLOAT64_C( -1.09), SIMDE_FLOAT64_C( 2.67), SIMDE_FLOAT64_C( 2.83) }, { SIMDE_FLOAT64_C( 2.80), SIMDE_FLOAT64_C( 3.11), SIMDE_FLOAT64_C( 3.45), SIMDE_FLOAT64_C( 2.07), SIMDE_FLOAT64_C( 2041.74), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 467.74), SIMDE_FLOAT64_C( -0.54) } }, { { SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( -2.64), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( -0.80), SIMDE_FLOAT64_C( 2.71), SIMDE_FLOAT64_C( -3.20), SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( -1.08) }, UINT8_C( 28), { SIMDE_FLOAT64_C( -2.18), SIMDE_FLOAT64_C( 2.52), SIMDE_FLOAT64_C( 2.16), SIMDE_FLOAT64_C( 3.05), SIMDE_FLOAT64_C( -0.32), SIMDE_FLOAT64_C( 0.96), SIMDE_FLOAT64_C( 2.15), SIMDE_FLOAT64_C( -0.87) }, { SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( -2.64), SIMDE_FLOAT64_C( 144.54), SIMDE_FLOAT64_C( 1122.02), SIMDE_FLOAT64_C( 0.48), SIMDE_FLOAT64_C( -3.20), SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( -1.08) } }, { { SIMDE_FLOAT64_C( -0.98), SIMDE_FLOAT64_C( 1.30), SIMDE_FLOAT64_C( 1.89), SIMDE_FLOAT64_C( -0.87), SIMDE_FLOAT64_C( -3.24), SIMDE_FLOAT64_C( 0.31), SIMDE_FLOAT64_C( -0.65), SIMDE_FLOAT64_C( -3.59) }, UINT8_C(243), { SIMDE_FLOAT64_C( -2.01), SIMDE_FLOAT64_C( 3.72), SIMDE_FLOAT64_C( 3.87), SIMDE_FLOAT64_C( -3.34), SIMDE_FLOAT64_C( 2.55), SIMDE_FLOAT64_C( -0.57), SIMDE_FLOAT64_C( -1.99), SIMDE_FLOAT64_C( -0.99) }, { SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( 5248.07), SIMDE_FLOAT64_C( 1.89), SIMDE_FLOAT64_C( -0.87), SIMDE_FLOAT64_C( 354.81), SIMDE_FLOAT64_C( 0.27), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( 0.10) } }, { { SIMDE_FLOAT64_C( 2.63), SIMDE_FLOAT64_C( -3.28), SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( -1.27), SIMDE_FLOAT64_C( -0.36), SIMDE_FLOAT64_C( -3.89), SIMDE_FLOAT64_C( 0.55), SIMDE_FLOAT64_C( -1.84) }, UINT8_C( 79), { SIMDE_FLOAT64_C( -0.40), SIMDE_FLOAT64_C( 1.84), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( -2.25), SIMDE_FLOAT64_C( -3.02), SIMDE_FLOAT64_C( 2.26), SIMDE_FLOAT64_C( 3.05), SIMDE_FLOAT64_C( 2.87) }, { SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( 69.18), SIMDE_FLOAT64_C( 0.17), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( -0.36), SIMDE_FLOAT64_C( -3.89), SIMDE_FLOAT64_C( 1122.02), SIMDE_FLOAT64_C( -1.84) } }, { { SIMDE_FLOAT64_C( -2.62), SIMDE_FLOAT64_C( 3.81), SIMDE_FLOAT64_C( -0.82), SIMDE_FLOAT64_C( 0.73), SIMDE_FLOAT64_C( -3.78), SIMDE_FLOAT64_C( -3.86), SIMDE_FLOAT64_C( 2.72), SIMDE_FLOAT64_C( 3.93) }, UINT8_C(113), { SIMDE_FLOAT64_C( 3.38), SIMDE_FLOAT64_C( 2.48), SIMDE_FLOAT64_C( -0.55), SIMDE_FLOAT64_C( -2.61), SIMDE_FLOAT64_C( -2.50), SIMDE_FLOAT64_C( -1.93), SIMDE_FLOAT64_C( -1.89), SIMDE_FLOAT64_C( 1.31) }, { SIMDE_FLOAT64_C( 2398.83), SIMDE_FLOAT64_C( 3.81), SIMDE_FLOAT64_C( -0.82), SIMDE_FLOAT64_C( 0.73), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( 3.93) } }, { { SIMDE_FLOAT64_C( 0.80), SIMDE_FLOAT64_C( 1.75), SIMDE_FLOAT64_C( 1.42), SIMDE_FLOAT64_C( -2.64), SIMDE_FLOAT64_C( 3.91), SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( 0.96), SIMDE_FLOAT64_C( 1.76) }, UINT8_C(247), { SIMDE_FLOAT64_C( 2.71), SIMDE_FLOAT64_C( 2.74), SIMDE_FLOAT64_C( 1.18), SIMDE_FLOAT64_C( 1.76), SIMDE_FLOAT64_C( 1.61), SIMDE_FLOAT64_C( 2.56), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( -3.21) }, { SIMDE_FLOAT64_C( 512.86), SIMDE_FLOAT64_C( 549.54), SIMDE_FLOAT64_C( 15.14), SIMDE_FLOAT64_C( -2.64), SIMDE_FLOAT64_C( 40.74), SIMDE_FLOAT64_C( 363.08), SIMDE_FLOAT64_C( 37.15), SIMDE_FLOAT64_C( 0.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_mask_exp10_pd(src, test_vec[i].k, a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_idivrem_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i rem; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 1747596798), INT32_C(-2063703989), INT32_C( 527472553), INT32_C(-1403096998)), simde_mm_set_epi32(INT32_C( -269879152), INT32_C( -177038436), INT32_C( 377180600), INT32_C( -518586410)), simde_mm_set_epi32(INT32_C( 128321886), INT32_C( -116281193), INT32_C( 150291953), INT32_C( -365924178)), simde_mm_set_epi32(INT32_C( -6), INT32_C( 11), INT32_C( 1), INT32_C( 2)) }, { simde_mm_set_epi32(INT32_C( -374673026), INT32_C(-1240805178), INT32_C( 1568850865), INT32_C(-1142977539)), simde_mm_set_epi32(INT32_C( 172780273), INT32_C( 168508556), INT32_C( -491358722), INT32_C( -230071737)), simde_mm_set_epi32(INT32_C( -29112480), INT32_C( -61245286), INT32_C( 94774699), INT32_C( -222690591)), simde_mm_set_epi32(INT32_C( -2), INT32_C( -7), INT32_C( -3), INT32_C( 4)) }, { simde_mm_set_epi32(INT32_C( 1492341726), INT32_C( 298608154), INT32_C( 1250819173), INT32_C( -650971253)), simde_mm_set_epi32(INT32_C( 298065861), INT32_C( -521585931), INT32_C( 330694282), INT32_C( 40997390)), simde_mm_set_epi32(INT32_C( 2012421), INT32_C( 298608154), INT32_C( 258736327), INT32_C( -36010403)), simde_mm_set_epi32(INT32_C( 5), INT32_C( 0), INT32_C( 3), INT32_C( -15)) }, { simde_mm_set_epi32(INT32_C(-1586327268), INT32_C( 1691051285), INT32_C( 50347892), INT32_C( 728425428)), simde_mm_set_epi32(INT32_C( -441202718), INT32_C( 294920921), INT32_C( -411581651), INT32_C( -167991823)), simde_mm_set_epi32(INT32_C( -262719114), INT32_C( 216446680), INT32_C( 50347892), INT32_C( 56458136)), simde_mm_set_epi32(INT32_C( 3), INT32_C( 5), INT32_C( 0), INT32_C( -4)) }, { simde_mm_set_epi32(INT32_C( 492373082), INT32_C( -13096811), INT32_C(-2087181083), INT32_C( -341007878)), simde_mm_set_epi32(INT32_C( 123290430), INT32_C( -298778955), INT32_C( 223555334), INT32_C( -332615043)), simde_mm_set_epi32(INT32_C( 122501792), INT32_C( -13096811), INT32_C( -75183077), INT32_C( -8392835)), simde_mm_set_epi32(INT32_C( 3), INT32_C( 0), INT32_C( -9), INT32_C( 1)) }, { simde_mm_set_epi32(INT32_C(-1004264650), INT32_C( 1580565751), INT32_C( -471064457), INT32_C( 2081361826)), simde_mm_set_epi32(INT32_C( 328620632), INT32_C( -324312655), INT32_C( -184752009), INT32_C( -354760000)), simde_mm_set_epi32(INT32_C( -18402754), INT32_C( 283315131), INT32_C( -101560439), INT32_C( 307561826)), simde_mm_set_epi32(INT32_C( -3), INT32_C( -4), INT32_C( 2), INT32_C( -5)) }, { simde_mm_set_epi32(INT32_C( 542053192), INT32_C( 499863549), INT32_C( 957375358), INT32_C(-1291033589)), simde_mm_set_epi32(INT32_C( 427537184), INT32_C( 493530770), INT32_C( -356091799), INT32_C( 29647056)), simde_mm_set_epi32(INT32_C( 114516008), INT32_C( 6332779), INT32_C( 245191760), INT32_C( -16210181)), simde_mm_set_epi32(INT32_C( 1), INT32_C( 1), INT32_C( -2), INT32_C( -43)) }, { simde_mm_set_epi32(INT32_C( -193211433), INT32_C( -857989172), INT32_C( -448329300), INT32_C(-1601364212)), simde_mm_set_epi32(INT32_C( -284723308), INT32_C( -171790410), INT32_C( 457043765), INT32_C( -97355006)), simde_mm_set_epi32(INT32_C( -193211433), INT32_C( -170827532), INT32_C( -448329300), INT32_C( -43684116)), simde_mm_set_epi32(INT32_C( 0), INT32_C( 4), INT32_C( 0), INT32_C( 16)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i rem; simde__m128i r = simde_mm_idivrem_epi32(&rem, test_vec[i].a, test_vec[i].b); simde_assert_m128i_i32(r, ==, test_vec[i].r); simde_assert_m128i_i32(rem, ==, test_vec[i].rem); } return 0; } static int test_simde_mm256_idivrem_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i rem; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C(-1079516608), INT32_C( -708153743), INT32_C( 1508722402), INT32_C(-2074345640), INT32_C( 1747596798), INT32_C(-2063703989), INT32_C( 527472553), INT32_C(-1403096998)), simde_mm256_set_epi32(INT32_C( 172780273), INT32_C( 168508556), INT32_C( -491358722), INT32_C( -230071737), INT32_C( -93668257), INT32_C( -310201295), INT32_C( 392212716), INT32_C( -285744385)), simde_mm256_set_epi32(INT32_C( -42834970), INT32_C( -34119519), INT32_C( 34646236), INT32_C( -3700007), INT32_C( 61568172), INT32_C( -202496219), INT32_C( 135259837), INT32_C( -260119458)), simde_mm256_set_epi32(INT32_C( -6), INT32_C( -4), INT32_C( -3), INT32_C( 9), INT32_C( -18), INT32_C( 6), INT32_C( 1), INT32_C( 4)) }, { simde_mm256_set_epi32(INT32_C( 1192263444), INT32_C(-2086343723), INT32_C( 1322777130), INT32_C( 163989560), INT32_C( 1492341726), INT32_C( 298608154), INT32_C( 1250819173), INT32_C( -650971253)), simde_mm256_set_epi32(INT32_C( -441202718), INT32_C( 294920921), INT32_C( -411581651), INT32_C( -167991823), INT32_C( -396581817), INT32_C( 422762821), INT32_C( 12586973), INT32_C( 182106357)), simde_mm256_set_epi32(INT32_C( 309858008), INT32_C( -21897276), INT32_C( 88032177), INT32_C( 163989560), INT32_C( 302596275), INT32_C( 298608154), INT32_C( 4708846), INT32_C( -104652182)), simde_mm256_set_epi32(INT32_C( -2), INT32_C( -7), INT32_C( -3), INT32_C( 0), INT32_C( -3), INT32_C( 0), INT32_C( 99), INT32_C( -3)) }, { simde_mm256_set_epi32(INT32_C( 493161721), INT32_C(-1195115819), INT32_C( 894221337), INT32_C(-1330460172), INT32_C( 492373082), INT32_C( -13096811), INT32_C(-2087181083), INT32_C( -341007878)), simde_mm256_set_epi32(INT32_C( 328620632), INT32_C( -324312655), INT32_C( -184752009), INT32_C( -354760000), INT32_C( -251066163), INT32_C( 395141437), INT32_C( -117766115), INT32_C( 520340456)), simde_mm256_set_epi32(INT32_C( 164541089), INT32_C( -222177854), INT32_C( 155213301), INT32_C( -266180172), INT32_C( 241306919), INT32_C( -13096811), INT32_C( -85157128), INT32_C( -341007878)), simde_mm256_set_epi32(INT32_C( 1), INT32_C( 3), INT32_C( -4), INT32_C( 3), INT32_C( -1), INT32_C( 0), INT32_C( 17), INT32_C( 0)) }, { simde_mm256_set_epi32(INT32_C( 1710148738), INT32_C( 1974123080), INT32_C(-1424367196), INT32_C( 118588227), INT32_C( 542053192), INT32_C( 499863549), INT32_C( 957375358), INT32_C(-1291033589)), simde_mm256_set_epi32(INT32_C( -284723308), INT32_C( -171790410), INT32_C( 457043765), INT32_C( -97355006), INT32_C( -48302859), INT32_C( -214497293), INT32_C( -112082325), INT32_C( -400341053)), simde_mm256_set_epi32(INT32_C( 1808890), INT32_C( 84428570), INT32_C( -53235901), INT32_C( 21233221), INT32_C( 10721743), INT32_C( 70868963), INT32_C( 60716758), INT32_C( -90010430)), simde_mm256_set_epi32(INT32_C( -6), INT32_C( -11), INT32_C( -3), INT32_C( -1), INT32_C( -11), INT32_C( -2), INT32_C( -8), INT32_C( 3)) }, { simde_mm256_set_epi32(INT32_C( 1734496959), INT32_C( 380846712), INT32_C( -941967689), INT32_C( -739443621), INT32_C( 1995198557), INT32_C( -980655097), INT32_C(-1888383043), INT32_C( 1779168063)), simde_mm256_set_epi32(INT32_C( 440775120), INT32_C( -129501140), INT32_C( -362589725), INT32_C( -352466550), INT32_C( 67477586), INT32_C( 108492873), INT32_C( 360489056), INT32_C( 254567893)), simde_mm256_set_epi32(INT32_C( 412171599), INT32_C( 121844432), INT32_C( -216788239), INT32_C( -34510521), INT32_C( 38348563), INT32_C( -4219240), INT32_C( -85937763), INT32_C( 251760705)), simde_mm256_set_epi32(INT32_C( 3), INT32_C( -2), INT32_C( 2), INT32_C( 2), INT32_C( 29), INT32_C( -9), INT32_C( -5), INT32_C( 6)) }, { simde_mm256_set_epi32(INT32_C( -362876916), INT32_C(-1845390533), INT32_C( -48621016), INT32_C( 201516689), INT32_C(-1435930720), INT32_C(-1932876068), INT32_C(-1153303869), INT32_C( 562234020)), simde_mm256_set_epi32(INT32_C( -166366311), INT32_C( -85548959), INT32_C( 525546139), INT32_C( 219277873), INT32_C( 295872976), INT32_C( -144152745), INT32_C( -265329050), INT32_C( -202024350)), simde_mm256_set_epi32(INT32_C( -30144294), INT32_C( -48862394), INT32_C( -48621016), INT32_C( 201516689), INT32_C( -252438816), INT32_C( -58890383), INT32_C( -91987669), INT32_C( 158185320)), simde_mm256_set_epi32(INT32_C( 2), INT32_C( 21), INT32_C( 0), INT32_C( 0), INT32_C( -4), INT32_C( 13), INT32_C( 4), INT32_C( -2)) }, { simde_mm256_set_epi32(INT32_C( 910061584), INT32_C( 2002226944), INT32_C( -621963189), INT32_C( -48343218), INT32_C( 523093293), INT32_C(-1235205724), INT32_C(-2088961787), INT32_C( 1943141679)), simde_mm256_set_epi32(INT32_C( 123967721), INT32_C( -95531607), INT32_C( 228811177), INT32_C( 1270356), INT32_C( 355625346), INT32_C( -40994931), INT32_C( -379225067), INT32_C( 124491394)), simde_mm256_set_epi32(INT32_C( 42287537), INT32_C( 91594804), INT32_C( -164340835), INT32_C( -69690), INT32_C( 167467947), INT32_C( -5357794), INT32_C( -192836452), INT32_C( 75770769)), simde_mm256_set_epi32(INT32_C( 7), INT32_C( -20), INT32_C( -2), INT32_C( -38), INT32_C( 1), INT32_C( 30), INT32_C( 5), INT32_C( 15)) }, { simde_mm256_set_epi32(INT32_C( 1755684145), INT32_C(-2061726371), INT32_C(-1050443653), INT32_C(-1299940555), INT32_C(-2116696545), INT32_C( 1493088054), INT32_C( -179829877), INT32_C( 651362699)), simde_mm256_set_epi32(INT32_C( 301617823), INT32_C( 343728879), INT32_C( 132913279), INT32_C( 518796827), INT32_C( -36154638), INT32_C( -532966429), INT32_C( 361195763), INT32_C( 469656308)), simde_mm256_set_epi32(INT32_C( 247595030), INT32_C( -343081976), INT32_C( -120050700), INT32_C( -262346901), INT32_C( -19727541), INT32_C( 427155196), INT32_C( -179829877), INT32_C( 181706391)), simde_mm256_set_epi32(INT32_C( 5), INT32_C( -5), INT32_C( -7), INT32_C( -2), INT32_C( 58), INT32_C( -2), INT32_C( 0), INT32_C( 1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i rem; simde__m256i r = simde_mm256_idivrem_epi32(&rem, test_vec[i].a, test_vec[i].b); simde_assert_m256i_i32(r, ==, test_vec[i].r); simde_assert_m256i_i32(rem, ==, test_vec[i].rem); } return 0; } static int test_simde_mm_hypot_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 b[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 492.01), SIMDE_FLOAT32_C( 211.12), SIMDE_FLOAT32_C( 12.31), SIMDE_FLOAT32_C( 870.52) }, { SIMDE_FLOAT32_C( -363.60), SIMDE_FLOAT32_C( 789.00), SIMDE_FLOAT32_C( 397.20), SIMDE_FLOAT32_C( -757.25) }, { SIMDE_FLOAT32_C( 611.78), SIMDE_FLOAT32_C( 816.76), SIMDE_FLOAT32_C( 397.39), SIMDE_FLOAT32_C( 1153.79) } }, { { SIMDE_FLOAT32_C( -192.59), SIMDE_FLOAT32_C( -586.23), SIMDE_FLOAT32_C( 571.12), SIMDE_FLOAT32_C( -717.05) }, { SIMDE_FLOAT32_C( -663.78), SIMDE_FLOAT32_C( 66.94), SIMDE_FLOAT32_C( -412.69), SIMDE_FLOAT32_C( -769.47) }, { SIMDE_FLOAT32_C( 691.15), SIMDE_FLOAT32_C( 590.04), SIMDE_FLOAT32_C( 704.62), SIMDE_FLOAT32_C( 1051.78) } }, { { SIMDE_FLOAT32_C( -594.99), SIMDE_FLOAT32_C( -442.39), SIMDE_FLOAT32_C( -303.17), SIMDE_FLOAT32_C( 275.57) }, { SIMDE_FLOAT32_C( 293.68), SIMDE_FLOAT32_C( 44.26), SIMDE_FLOAT32_C( -780.93), SIMDE_FLOAT32_C( -309.10) }, { SIMDE_FLOAT32_C( 663.52), SIMDE_FLOAT32_C( 444.60), SIMDE_FLOAT32_C( 837.71), SIMDE_FLOAT32_C( 414.10) } }, { { SIMDE_FLOAT32_C( -878.78), SIMDE_FLOAT32_C( -647.94), SIMDE_FLOAT32_C( 445.74), SIMDE_FLOAT32_C( 697.72) }, { SIMDE_FLOAT32_C( 98.72), SIMDE_FLOAT32_C( -787.29), SIMDE_FLOAT32_C( -3.77), SIMDE_FLOAT32_C( -409.27) }, { SIMDE_FLOAT32_C( 884.31), SIMDE_FLOAT32_C( 1019.63), SIMDE_FLOAT32_C( 445.76), SIMDE_FLOAT32_C( 808.90) } }, { { SIMDE_FLOAT32_C( 423.83), SIMDE_FLOAT32_C( -991.46), SIMDE_FLOAT32_C( -538.75), SIMDE_FLOAT32_C( -939.77) }, { SIMDE_FLOAT32_C( 797.54), SIMDE_FLOAT32_C( 858.45), SIMDE_FLOAT32_C( -697.02), SIMDE_FLOAT32_C( -395.04) }, { SIMDE_FLOAT32_C( 903.16), SIMDE_FLOAT32_C( 1311.46), SIMDE_FLOAT32_C( 880.96), SIMDE_FLOAT32_C( 1019.42) } }, { { SIMDE_FLOAT32_C( -727.78), SIMDE_FLOAT32_C( 874.10), SIMDE_FLOAT32_C( -112.10), SIMDE_FLOAT32_C( -391.56) }, { SIMDE_FLOAT32_C( -58.96), SIMDE_FLOAT32_C( 475.22), SIMDE_FLOAT32_C( -161.04), SIMDE_FLOAT32_C( 346.05) }, { SIMDE_FLOAT32_C( 730.16), SIMDE_FLOAT32_C( 994.93), SIMDE_FLOAT32_C( 196.21), SIMDE_FLOAT32_C( 522.56) } }, { { SIMDE_FLOAT32_C( -967.17), SIMDE_FLOAT32_C( 535.80), SIMDE_FLOAT32_C( -378.38), SIMDE_FLOAT32_C( 326.51) }, { SIMDE_FLOAT32_C( -419.95), SIMDE_FLOAT32_C( -159.32), SIMDE_FLOAT32_C( -982.59), SIMDE_FLOAT32_C( -298.72) }, { SIMDE_FLOAT32_C( 1054.41), SIMDE_FLOAT32_C( 558.99), SIMDE_FLOAT32_C( 1052.93), SIMDE_FLOAT32_C( 442.54) } }, { { SIMDE_FLOAT32_C( 192.74), SIMDE_FLOAT32_C( 463.15), SIMDE_FLOAT32_C( -601.00), SIMDE_FLOAT32_C( -708.54) }, { SIMDE_FLOAT32_C( 675.86), SIMDE_FLOAT32_C( 395.23), SIMDE_FLOAT32_C( -117.81), SIMDE_FLOAT32_C( 99.70) }, { SIMDE_FLOAT32_C( 702.81), SIMDE_FLOAT32_C( 608.86), SIMDE_FLOAT32_C( 612.44), SIMDE_FLOAT32_C( 715.52) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 b = simde_mm_loadu_ps(test_vec[i].b); simde__m128 r = simde_mm_hypot_ps(a, b); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_hypot_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 b[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -603.93), SIMDE_FLOAT64_C( 656.94) }, { SIMDE_FLOAT64_C( 263.86), SIMDE_FLOAT64_C( -668.26) }, { SIMDE_FLOAT64_C( 659.06), SIMDE_FLOAT64_C( 937.09) } }, { { SIMDE_FLOAT64_C( -573.72), SIMDE_FLOAT64_C( 127.62) }, { SIMDE_FLOAT64_C( -494.33), SIMDE_FLOAT64_C( 413.83) }, { SIMDE_FLOAT64_C( 757.31), SIMDE_FLOAT64_C( 433.06) } }, { { SIMDE_FLOAT64_C( 92.50), SIMDE_FLOAT64_C( 179.32) }, { SIMDE_FLOAT64_C( -379.77), SIMDE_FLOAT64_C( 381.33) }, { SIMDE_FLOAT64_C( 390.87), SIMDE_FLOAT64_C( 421.39) } }, { { SIMDE_FLOAT64_C( 344.30), SIMDE_FLOAT64_C( 576.77) }, { SIMDE_FLOAT64_C( -663.77), SIMDE_FLOAT64_C( 656.74) }, { SIMDE_FLOAT64_C( 747.75), SIMDE_FLOAT64_C( 874.05) } }, { { SIMDE_FLOAT64_C( 499.56), SIMDE_FLOAT64_C( 761.69) }, { SIMDE_FLOAT64_C( -752.98), SIMDE_FLOAT64_C( -522.11) }, { SIMDE_FLOAT64_C( 903.63), SIMDE_FLOAT64_C( 923.46) } }, { { SIMDE_FLOAT64_C( 242.72), SIMDE_FLOAT64_C( 412.75) }, { SIMDE_FLOAT64_C( -101.50), SIMDE_FLOAT64_C( 96.94) }, { SIMDE_FLOAT64_C( 263.09), SIMDE_FLOAT64_C( 423.98) } }, { { SIMDE_FLOAT64_C( -934.53), SIMDE_FLOAT64_C( -147.86) }, { SIMDE_FLOAT64_C( -959.33), SIMDE_FLOAT64_C( 790.23) }, { SIMDE_FLOAT64_C( 1339.28), SIMDE_FLOAT64_C( 803.94) } }, { { SIMDE_FLOAT64_C( 239.33), SIMDE_FLOAT64_C( -100.41) }, { SIMDE_FLOAT64_C( -270.12), SIMDE_FLOAT64_C( 635.40) }, { SIMDE_FLOAT64_C( 360.89), SIMDE_FLOAT64_C( 643.28) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d b = simde_mm_loadu_pd(test_vec[i].b); simde__m128d r = simde_mm_hypot_pd(a, b); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_hypot_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 b[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( -777.18), SIMDE_FLOAT32_C( 159.63), SIMDE_FLOAT32_C( 756.34), SIMDE_FLOAT32_C( -76.33), SIMDE_FLOAT32_C( 113.08), SIMDE_FLOAT32_C( 246.24), SIMDE_FLOAT32_C( 841.85), SIMDE_FLOAT32_C( -845.53) }, { SIMDE_FLOAT32_C( -621.65), SIMDE_FLOAT32_C( 72.13), SIMDE_FLOAT32_C( 721.27), SIMDE_FLOAT32_C( -427.76), SIMDE_FLOAT32_C( -945.55), SIMDE_FLOAT32_C( -213.25), SIMDE_FLOAT32_C( -603.55), SIMDE_FLOAT32_C( 373.40) }, { SIMDE_FLOAT32_C( 995.22), SIMDE_FLOAT32_C( 175.17), SIMDE_FLOAT32_C( 1045.12), SIMDE_FLOAT32_C( 434.52), SIMDE_FLOAT32_C( 952.29), SIMDE_FLOAT32_C( 325.74), SIMDE_FLOAT32_C( 1035.85), SIMDE_FLOAT32_C( 924.31) } }, { { SIMDE_FLOAT32_C( -731.26), SIMDE_FLOAT32_C( -820.00), SIMDE_FLOAT32_C( 393.03), SIMDE_FLOAT32_C( -720.80), SIMDE_FLOAT32_C( -923.20), SIMDE_FLOAT32_C( -65.81), SIMDE_FLOAT32_C( -541.82), SIMDE_FLOAT32_C( -812.46) }, { SIMDE_FLOAT32_C( 833.72), SIMDE_FLOAT32_C( -217.64), SIMDE_FLOAT32_C( 806.57), SIMDE_FLOAT32_C( -582.91), SIMDE_FLOAT32_C( 620.23), SIMDE_FLOAT32_C( -724.63), SIMDE_FLOAT32_C( 373.46), SIMDE_FLOAT32_C( 843.05) }, { SIMDE_FLOAT32_C( 1108.98), SIMDE_FLOAT32_C( 848.39), SIMDE_FLOAT32_C( 897.23), SIMDE_FLOAT32_C( 927.00), SIMDE_FLOAT32_C( 1112.20), SIMDE_FLOAT32_C( 727.61), SIMDE_FLOAT32_C( 658.06), SIMDE_FLOAT32_C( 1170.82) } }, { { SIMDE_FLOAT32_C( 435.00), SIMDE_FLOAT32_C( 129.80), SIMDE_FLOAT32_C( -233.28), SIMDE_FLOAT32_C( -451.92), SIMDE_FLOAT32_C( -623.96), SIMDE_FLOAT32_C( -391.43), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( -245.61) }, { SIMDE_FLOAT32_C( 680.70), SIMDE_FLOAT32_C( -576.18), SIMDE_FLOAT32_C( 326.63), SIMDE_FLOAT32_C( 735.15), SIMDE_FLOAT32_C( 210.56), SIMDE_FLOAT32_C( 723.09), SIMDE_FLOAT32_C( 108.56), SIMDE_FLOAT32_C( 479.30) }, { SIMDE_FLOAT32_C( 807.82), SIMDE_FLOAT32_C( 590.62), SIMDE_FLOAT32_C( 401.38), SIMDE_FLOAT32_C( 862.95), SIMDE_FLOAT32_C( 658.53), SIMDE_FLOAT32_C( 822.24), SIMDE_FLOAT32_C( 316.64), SIMDE_FLOAT32_C( 538.57) } }, { { SIMDE_FLOAT32_C( 903.09), SIMDE_FLOAT32_C( -498.41), SIMDE_FLOAT32_C( 758.50), SIMDE_FLOAT32_C( 979.89), SIMDE_FLOAT32_C( 435.78), SIMDE_FLOAT32_C( -783.32), SIMDE_FLOAT32_C( -832.57), SIMDE_FLOAT32_C( 269.50) }, { SIMDE_FLOAT32_C( -0.96), SIMDE_FLOAT32_C( 973.99), SIMDE_FLOAT32_C( 686.59), SIMDE_FLOAT32_C( -380.74), SIMDE_FLOAT32_C( -750.64), SIMDE_FLOAT32_C( 60.05), SIMDE_FLOAT32_C( -537.69), SIMDE_FLOAT32_C( 684.36) }, { SIMDE_FLOAT32_C( 903.09), SIMDE_FLOAT32_C( 1094.11), SIMDE_FLOAT32_C( 1023.10), SIMDE_FLOAT32_C( 1051.26), SIMDE_FLOAT32_C( 867.97), SIMDE_FLOAT32_C( 785.62), SIMDE_FLOAT32_C( 991.10), SIMDE_FLOAT32_C( 735.51) } }, { { SIMDE_FLOAT32_C( -810.16), SIMDE_FLOAT32_C( 229.03), SIMDE_FLOAT32_C( -767.56), SIMDE_FLOAT32_C( -434.12), SIMDE_FLOAT32_C( 837.60), SIMDE_FLOAT32_C( -65.02), SIMDE_FLOAT32_C( 320.28), SIMDE_FLOAT32_C( 518.30) }, { SIMDE_FLOAT32_C( 358.80), SIMDE_FLOAT32_C( -353.09), SIMDE_FLOAT32_C( 253.45), SIMDE_FLOAT32_C( -430.64), SIMDE_FLOAT32_C( -630.00), SIMDE_FLOAT32_C( -637.99), SIMDE_FLOAT32_C( -951.34), SIMDE_FLOAT32_C( -726.92) }, { SIMDE_FLOAT32_C( 886.06), SIMDE_FLOAT32_C( 420.86), SIMDE_FLOAT32_C( 808.32), SIMDE_FLOAT32_C( 611.48), SIMDE_FLOAT32_C( 1048.08), SIMDE_FLOAT32_C( 641.29), SIMDE_FLOAT32_C( 1003.81), SIMDE_FLOAT32_C( 892.78) } }, { { SIMDE_FLOAT32_C( -136.40), SIMDE_FLOAT32_C( 807.17), SIMDE_FLOAT32_C( -747.03), SIMDE_FLOAT32_C( -700.62), SIMDE_FLOAT32_C( -976.15), SIMDE_FLOAT32_C( -579.60), SIMDE_FLOAT32_C( 568.87), SIMDE_FLOAT32_C( 22.88) }, { SIMDE_FLOAT32_C( -605.60), SIMDE_FLOAT32_C( 255.46), SIMDE_FLOAT32_C( 642.15), SIMDE_FLOAT32_C( -356.24), SIMDE_FLOAT32_C( -684.50), SIMDE_FLOAT32_C( -895.54), SIMDE_FLOAT32_C( -671.88), SIMDE_FLOAT32_C( -494.65) }, { SIMDE_FLOAT32_C( 620.77), SIMDE_FLOAT32_C( 846.63), SIMDE_FLOAT32_C( 985.09), SIMDE_FLOAT32_C( 785.99), SIMDE_FLOAT32_C( 1192.23), SIMDE_FLOAT32_C( 1066.74), SIMDE_FLOAT32_C( 880.36), SIMDE_FLOAT32_C( 495.18) } }, { { SIMDE_FLOAT32_C( 333.49), SIMDE_FLOAT32_C( -439.45), SIMDE_FLOAT32_C( 71.23), SIMDE_FLOAT32_C( 171.09), SIMDE_FLOAT32_C( 495.54), SIMDE_FLOAT32_C( -608.49), SIMDE_FLOAT32_C( -310.61), SIMDE_FLOAT32_C( -145.66) }, { SIMDE_FLOAT32_C( 38.42), SIMDE_FLOAT32_C( 942.84), SIMDE_FLOAT32_C( 423.70), SIMDE_FLOAT32_C( 408.42), SIMDE_FLOAT32_C( -695.15), SIMDE_FLOAT32_C( 472.36), SIMDE_FLOAT32_C( 681.50), SIMDE_FLOAT32_C( 168.45) }, { SIMDE_FLOAT32_C( 335.70), SIMDE_FLOAT32_C( 1040.22), SIMDE_FLOAT32_C( 429.65), SIMDE_FLOAT32_C( 442.81), SIMDE_FLOAT32_C( 853.69), SIMDE_FLOAT32_C( 770.31), SIMDE_FLOAT32_C( 748.95), SIMDE_FLOAT32_C( 222.69) } }, { { SIMDE_FLOAT32_C( 279.53), SIMDE_FLOAT32_C( 934.47), SIMDE_FLOAT32_C( 467.83), SIMDE_FLOAT32_C( 303.38), SIMDE_FLOAT32_C( -645.12), SIMDE_FLOAT32_C( 36.70), SIMDE_FLOAT32_C( -673.74), SIMDE_FLOAT32_C( -250.73) }, { SIMDE_FLOAT32_C( -707.84), SIMDE_FLOAT32_C( 968.41), SIMDE_FLOAT32_C( 393.03), SIMDE_FLOAT32_C( -392.34), SIMDE_FLOAT32_C( -927.14), SIMDE_FLOAT32_C( 721.15), SIMDE_FLOAT32_C( 113.01), SIMDE_FLOAT32_C( 406.35) }, { SIMDE_FLOAT32_C( 761.04), SIMDE_FLOAT32_C( 1345.75), SIMDE_FLOAT32_C( 611.01), SIMDE_FLOAT32_C( 495.95), SIMDE_FLOAT32_C( 1129.50), SIMDE_FLOAT32_C( 722.08), SIMDE_FLOAT32_C( 683.15), SIMDE_FLOAT32_C( 477.48) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 b = simde_mm256_loadu_ps(test_vec[i].b); simde__m256 r = simde_mm256_hypot_ps(a, b); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_hypot_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[4]; const simde_float64 b[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( -477.45), SIMDE_FLOAT64_C( 593.44), SIMDE_FLOAT64_C( 978.32), SIMDE_FLOAT64_C( -584.34) }, { SIMDE_FLOAT64_C( 161.70), SIMDE_FLOAT64_C( -36.27), SIMDE_FLOAT64_C( 750.17), SIMDE_FLOAT64_C( -655.19) }, { SIMDE_FLOAT64_C( 504.09), SIMDE_FLOAT64_C( 594.55), SIMDE_FLOAT64_C( 1232.83), SIMDE_FLOAT64_C( 877.91) } }, { { SIMDE_FLOAT64_C( -840.17), SIMDE_FLOAT64_C( -429.90), SIMDE_FLOAT64_C( 790.20), SIMDE_FLOAT64_C( -18.28) }, { SIMDE_FLOAT64_C( 964.56), SIMDE_FLOAT64_C( 136.47), SIMDE_FLOAT64_C( 164.17), SIMDE_FLOAT64_C( 892.62) }, { SIMDE_FLOAT64_C( 1279.16), SIMDE_FLOAT64_C( 451.04), SIMDE_FLOAT64_C( 807.07), SIMDE_FLOAT64_C( 892.81) } }, { { SIMDE_FLOAT64_C( 115.18), SIMDE_FLOAT64_C( 353.33), SIMDE_FLOAT64_C( -41.82), SIMDE_FLOAT64_C( 836.90) }, { SIMDE_FLOAT64_C( 325.83), SIMDE_FLOAT64_C( 174.90), SIMDE_FLOAT64_C( -541.27), SIMDE_FLOAT64_C( -977.07) }, { SIMDE_FLOAT64_C( 345.59), SIMDE_FLOAT64_C( 394.25), SIMDE_FLOAT64_C( 542.88), SIMDE_FLOAT64_C( 1286.49) } }, { { SIMDE_FLOAT64_C( 604.56), SIMDE_FLOAT64_C( 980.27), SIMDE_FLOAT64_C( 536.46), SIMDE_FLOAT64_C( 153.38) }, { SIMDE_FLOAT64_C( -931.38), SIMDE_FLOAT64_C( -178.15), SIMDE_FLOAT64_C( -619.34), SIMDE_FLOAT64_C( -408.83) }, { SIMDE_FLOAT64_C( 1110.39), SIMDE_FLOAT64_C( 996.33), SIMDE_FLOAT64_C( 819.37), SIMDE_FLOAT64_C( 436.65) } }, { { SIMDE_FLOAT64_C( -584.72), SIMDE_FLOAT64_C( -641.02), SIMDE_FLOAT64_C( 6.83), SIMDE_FLOAT64_C( 576.98) }, { SIMDE_FLOAT64_C( 322.71), SIMDE_FLOAT64_C( -242.99), SIMDE_FLOAT64_C( 921.80), SIMDE_FLOAT64_C( 482.53) }, { SIMDE_FLOAT64_C( 667.86), SIMDE_FLOAT64_C( 685.53), SIMDE_FLOAT64_C( 921.83), SIMDE_FLOAT64_C( 752.16) } }, { { SIMDE_FLOAT64_C( 327.10), SIMDE_FLOAT64_C( 712.00), SIMDE_FLOAT64_C( -535.75), SIMDE_FLOAT64_C( 291.66) }, { SIMDE_FLOAT64_C( -151.54), SIMDE_FLOAT64_C( 628.42), SIMDE_FLOAT64_C( 184.28), SIMDE_FLOAT64_C( 963.64) }, { SIMDE_FLOAT64_C( 360.50), SIMDE_FLOAT64_C( 949.66), SIMDE_FLOAT64_C( 566.56), SIMDE_FLOAT64_C( 1006.81) } }, { { SIMDE_FLOAT64_C( -18.25), SIMDE_FLOAT64_C( -857.54), SIMDE_FLOAT64_C( 800.54), SIMDE_FLOAT64_C( -692.42) }, { SIMDE_FLOAT64_C( 317.36), SIMDE_FLOAT64_C( -740.72), SIMDE_FLOAT64_C( -669.48), SIMDE_FLOAT64_C( -78.07) }, { SIMDE_FLOAT64_C( 317.88), SIMDE_FLOAT64_C( 1133.16), SIMDE_FLOAT64_C( 1043.58), SIMDE_FLOAT64_C( 696.81) } }, { { SIMDE_FLOAT64_C( -760.45), SIMDE_FLOAT64_C( 866.98), SIMDE_FLOAT64_C( -924.70), SIMDE_FLOAT64_C( -691.83) }, { SIMDE_FLOAT64_C( -311.18), SIMDE_FLOAT64_C( -544.04), SIMDE_FLOAT64_C( -100.66), SIMDE_FLOAT64_C( 104.10) }, { SIMDE_FLOAT64_C( 821.66), SIMDE_FLOAT64_C( 1023.54), SIMDE_FLOAT64_C( 930.16), SIMDE_FLOAT64_C( 699.62) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d b = simde_mm256_loadu_pd(test_vec[i].b); simde__m256d r = simde_mm256_hypot_pd(a, b); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_hypot_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 b[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( -926.16), SIMDE_FLOAT32_C( -45.76), SIMDE_FLOAT32_C( 907.70), SIMDE_FLOAT32_C( -928.37), SIMDE_FLOAT32_C( 496.55), SIMDE_FLOAT32_C( 566.66), SIMDE_FLOAT32_C( -501.51), SIMDE_FLOAT32_C( -575.98), SIMDE_FLOAT32_C( -281.74), SIMDE_FLOAT32_C( -821.54), SIMDE_FLOAT32_C( 198.58), SIMDE_FLOAT32_C( -379.20), SIMDE_FLOAT32_C( 104.18), SIMDE_FLOAT32_C( -675.07), SIMDE_FLOAT32_C( -169.00), SIMDE_FLOAT32_C( 502.70) }, { SIMDE_FLOAT32_C( 7.03), SIMDE_FLOAT32_C( -875.48), SIMDE_FLOAT32_C( -451.63), SIMDE_FLOAT32_C( -815.00), SIMDE_FLOAT32_C( 37.83), SIMDE_FLOAT32_C( -588.92), SIMDE_FLOAT32_C( -905.87), SIMDE_FLOAT32_C( -49.63), SIMDE_FLOAT32_C( 813.22), SIMDE_FLOAT32_C( -962.83), SIMDE_FLOAT32_C( -486.45), SIMDE_FLOAT32_C( -367.13), SIMDE_FLOAT32_C( -242.02), SIMDE_FLOAT32_C( 475.59), SIMDE_FLOAT32_C( -31.20), SIMDE_FLOAT32_C( -168.18) }, { SIMDE_FLOAT32_C( 926.19), SIMDE_FLOAT32_C( 876.68), SIMDE_FLOAT32_C( 1013.85), SIMDE_FLOAT32_C( 1235.35), SIMDE_FLOAT32_C( 497.99), SIMDE_FLOAT32_C( 817.27), SIMDE_FLOAT32_C( 1035.43), SIMDE_FLOAT32_C( 578.11), SIMDE_FLOAT32_C( 860.64), SIMDE_FLOAT32_C( 1265.69), SIMDE_FLOAT32_C( 525.42), SIMDE_FLOAT32_C( 527.80), SIMDE_FLOAT32_C( 263.49), SIMDE_FLOAT32_C( 825.78), SIMDE_FLOAT32_C( 171.86), SIMDE_FLOAT32_C( 530.09) } }, { { SIMDE_FLOAT32_C( -570.17), SIMDE_FLOAT32_C( -123.51), SIMDE_FLOAT32_C( -96.55), SIMDE_FLOAT32_C( 926.38), SIMDE_FLOAT32_C( -556.85), SIMDE_FLOAT32_C( 401.94), SIMDE_FLOAT32_C( -649.60), SIMDE_FLOAT32_C( 161.41), SIMDE_FLOAT32_C( 580.39), SIMDE_FLOAT32_C( 548.98), SIMDE_FLOAT32_C( 782.21), SIMDE_FLOAT32_C( -315.43), SIMDE_FLOAT32_C( 873.91), SIMDE_FLOAT32_C( -386.79), SIMDE_FLOAT32_C( -812.72), SIMDE_FLOAT32_C( -119.05) }, { SIMDE_FLOAT32_C( -262.27), SIMDE_FLOAT32_C( -264.35), SIMDE_FLOAT32_C( 65.94), SIMDE_FLOAT32_C( 775.56), SIMDE_FLOAT32_C( 146.72), SIMDE_FLOAT32_C( 160.08), SIMDE_FLOAT32_C( -274.07), SIMDE_FLOAT32_C( -40.05), SIMDE_FLOAT32_C( 197.24), SIMDE_FLOAT32_C( 239.47), SIMDE_FLOAT32_C( 592.82), SIMDE_FLOAT32_C( 955.23), SIMDE_FLOAT32_C( -284.94), SIMDE_FLOAT32_C( -438.38), SIMDE_FLOAT32_C( -212.95), SIMDE_FLOAT32_C( 144.89) }, { SIMDE_FLOAT32_C( 627.60), SIMDE_FLOAT32_C( 291.78), SIMDE_FLOAT32_C( 116.92), SIMDE_FLOAT32_C( 1208.17), SIMDE_FLOAT32_C( 575.85), SIMDE_FLOAT32_C( 432.64), SIMDE_FLOAT32_C( 705.05), SIMDE_FLOAT32_C( 166.30), SIMDE_FLOAT32_C( 612.99), SIMDE_FLOAT32_C( 598.94), SIMDE_FLOAT32_C( 981.47), SIMDE_FLOAT32_C( 1005.96), SIMDE_FLOAT32_C( 919.19), SIMDE_FLOAT32_C( 584.62), SIMDE_FLOAT32_C( 840.16), SIMDE_FLOAT32_C( 187.53) } }, { { SIMDE_FLOAT32_C( 438.11), SIMDE_FLOAT32_C( 690.50), SIMDE_FLOAT32_C( 71.27), SIMDE_FLOAT32_C( 881.27), SIMDE_FLOAT32_C( 92.44), SIMDE_FLOAT32_C( 421.67), SIMDE_FLOAT32_C( 42.68), SIMDE_FLOAT32_C( -327.17), SIMDE_FLOAT32_C( -29.36), SIMDE_FLOAT32_C( -175.11), SIMDE_FLOAT32_C( 357.41), SIMDE_FLOAT32_C( -155.45), SIMDE_FLOAT32_C( 438.11), SIMDE_FLOAT32_C( 544.68), SIMDE_FLOAT32_C( 725.50), SIMDE_FLOAT32_C( -824.16) }, { SIMDE_FLOAT32_C( -719.67), SIMDE_FLOAT32_C( -208.56), SIMDE_FLOAT32_C( 951.40), SIMDE_FLOAT32_C( 427.05), SIMDE_FLOAT32_C( 951.52), SIMDE_FLOAT32_C( -322.67), SIMDE_FLOAT32_C( -613.00), SIMDE_FLOAT32_C( 148.76), SIMDE_FLOAT32_C( 916.80), SIMDE_FLOAT32_C( 979.82), SIMDE_FLOAT32_C( 103.99), SIMDE_FLOAT32_C( -368.15), SIMDE_FLOAT32_C( -458.56), SIMDE_FLOAT32_C( 891.04), SIMDE_FLOAT32_C( 776.74), SIMDE_FLOAT32_C( 979.55) }, { SIMDE_FLOAT32_C( 842.54), SIMDE_FLOAT32_C( 721.31), SIMDE_FLOAT32_C( 954.07), SIMDE_FLOAT32_C( 979.29), SIMDE_FLOAT32_C( 956.00), SIMDE_FLOAT32_C( 530.96), SIMDE_FLOAT32_C( 614.48), SIMDE_FLOAT32_C( 359.40), SIMDE_FLOAT32_C( 917.27), SIMDE_FLOAT32_C( 995.34), SIMDE_FLOAT32_C( 372.23), SIMDE_FLOAT32_C( 399.62), SIMDE_FLOAT32_C( 634.21), SIMDE_FLOAT32_C( 1044.33), SIMDE_FLOAT32_C( 1062.86), SIMDE_FLOAT32_C( 1280.14) } }, { { SIMDE_FLOAT32_C( 581.54), SIMDE_FLOAT32_C( -151.99), SIMDE_FLOAT32_C( 860.81), SIMDE_FLOAT32_C( -326.03), SIMDE_FLOAT32_C( -730.33), SIMDE_FLOAT32_C( -96.51), SIMDE_FLOAT32_C( 346.80), SIMDE_FLOAT32_C( 240.31), SIMDE_FLOAT32_C( 728.39), SIMDE_FLOAT32_C( -295.79), SIMDE_FLOAT32_C( -915.13), SIMDE_FLOAT32_C( 166.50), SIMDE_FLOAT32_C( -751.11), SIMDE_FLOAT32_C( 810.37), SIMDE_FLOAT32_C( 342.34), SIMDE_FLOAT32_C( -470.78) }, { SIMDE_FLOAT32_C( -398.19), SIMDE_FLOAT32_C( 293.73), SIMDE_FLOAT32_C( 956.27), SIMDE_FLOAT32_C( -446.67), SIMDE_FLOAT32_C( 971.06), SIMDE_FLOAT32_C( -656.73), SIMDE_FLOAT32_C( 702.10), SIMDE_FLOAT32_C( 887.86), SIMDE_FLOAT32_C( -676.91), SIMDE_FLOAT32_C( -193.91), SIMDE_FLOAT32_C( -480.29), SIMDE_FLOAT32_C( -135.48), SIMDE_FLOAT32_C( -302.88), SIMDE_FLOAT32_C( -703.55), SIMDE_FLOAT32_C( -155.93), SIMDE_FLOAT32_C( -721.34) }, { SIMDE_FLOAT32_C( 704.80), SIMDE_FLOAT32_C( 330.72), SIMDE_FLOAT32_C( 1286.64), SIMDE_FLOAT32_C( 553.00), SIMDE_FLOAT32_C( 1215.05), SIMDE_FLOAT32_C( 663.78), SIMDE_FLOAT32_C( 783.08), SIMDE_FLOAT32_C( 919.81), SIMDE_FLOAT32_C( 994.36), SIMDE_FLOAT32_C( 353.68), SIMDE_FLOAT32_C( 1033.51), SIMDE_FLOAT32_C( 214.66), SIMDE_FLOAT32_C( 809.88), SIMDE_FLOAT32_C( 1073.16), SIMDE_FLOAT32_C( 376.18), SIMDE_FLOAT32_C( 861.37) } }, { { SIMDE_FLOAT32_C( 144.45), SIMDE_FLOAT32_C( -295.12), SIMDE_FLOAT32_C( -47.37), SIMDE_FLOAT32_C( 414.12), SIMDE_FLOAT32_C( 608.38), SIMDE_FLOAT32_C( -700.56), SIMDE_FLOAT32_C( -345.56), SIMDE_FLOAT32_C( 336.76), SIMDE_FLOAT32_C( 3.65), SIMDE_FLOAT32_C( -260.69), SIMDE_FLOAT32_C( -496.74), SIMDE_FLOAT32_C( 252.54), SIMDE_FLOAT32_C( -450.32), SIMDE_FLOAT32_C( 845.60), SIMDE_FLOAT32_C( 781.76), SIMDE_FLOAT32_C( 151.49) }, { SIMDE_FLOAT32_C( 139.33), SIMDE_FLOAT32_C( 738.03), SIMDE_FLOAT32_C( 704.82), SIMDE_FLOAT32_C( 110.39), SIMDE_FLOAT32_C( -918.70), SIMDE_FLOAT32_C( 406.92), SIMDE_FLOAT32_C( -1.75), SIMDE_FLOAT32_C( -595.61), SIMDE_FLOAT32_C( -787.00), SIMDE_FLOAT32_C( 517.95), SIMDE_FLOAT32_C( 268.91), SIMDE_FLOAT32_C( -89.87), SIMDE_FLOAT32_C( 814.40), SIMDE_FLOAT32_C( -887.02), SIMDE_FLOAT32_C( 188.79), SIMDE_FLOAT32_C( -41.15) }, { SIMDE_FLOAT32_C( 200.70), SIMDE_FLOAT32_C( 794.85), SIMDE_FLOAT32_C( 706.41), SIMDE_FLOAT32_C( 428.58), SIMDE_FLOAT32_C( 1101.88), SIMDE_FLOAT32_C( 810.17), SIMDE_FLOAT32_C( 345.56), SIMDE_FLOAT32_C( 684.22), SIMDE_FLOAT32_C( 787.01), SIMDE_FLOAT32_C( 579.85), SIMDE_FLOAT32_C( 564.86), SIMDE_FLOAT32_C( 268.05), SIMDE_FLOAT32_C( 930.61), SIMDE_FLOAT32_C( 1225.50), SIMDE_FLOAT32_C( 804.23), SIMDE_FLOAT32_C( 156.98) } }, { { SIMDE_FLOAT32_C( -182.14), SIMDE_FLOAT32_C( -858.58), SIMDE_FLOAT32_C( -627.02), SIMDE_FLOAT32_C( -573.76), SIMDE_FLOAT32_C( -559.14), SIMDE_FLOAT32_C( 27.42), SIMDE_FLOAT32_C( 763.00), SIMDE_FLOAT32_C( 444.51), SIMDE_FLOAT32_C( 766.72), SIMDE_FLOAT32_C( -733.74), SIMDE_FLOAT32_C( -302.95), SIMDE_FLOAT32_C( -683.60), SIMDE_FLOAT32_C( -888.14), SIMDE_FLOAT32_C( -521.19), SIMDE_FLOAT32_C( 467.89), SIMDE_FLOAT32_C( 251.19) }, { SIMDE_FLOAT32_C( -783.16), SIMDE_FLOAT32_C( 172.71), SIMDE_FLOAT32_C( -638.42), SIMDE_FLOAT32_C( -701.86), SIMDE_FLOAT32_C( -420.37), SIMDE_FLOAT32_C( 359.83), SIMDE_FLOAT32_C( -297.47), SIMDE_FLOAT32_C( -207.37), SIMDE_FLOAT32_C( -122.22), SIMDE_FLOAT32_C( 971.44), SIMDE_FLOAT32_C( 702.76), SIMDE_FLOAT32_C( -307.82), SIMDE_FLOAT32_C( -915.59), SIMDE_FLOAT32_C( -108.45), SIMDE_FLOAT32_C( 651.04), SIMDE_FLOAT32_C( -97.72) }, { SIMDE_FLOAT32_C( 804.06), SIMDE_FLOAT32_C( 875.78), SIMDE_FLOAT32_C( 894.84), SIMDE_FLOAT32_C( 906.54), SIMDE_FLOAT32_C( 699.53), SIMDE_FLOAT32_C( 360.87), SIMDE_FLOAT32_C( 818.94), SIMDE_FLOAT32_C( 490.50), SIMDE_FLOAT32_C( 776.40), SIMDE_FLOAT32_C( 1217.40), SIMDE_FLOAT32_C( 765.28), SIMDE_FLOAT32_C( 749.71), SIMDE_FLOAT32_C( 1275.58), SIMDE_FLOAT32_C( 532.35), SIMDE_FLOAT32_C( 801.73), SIMDE_FLOAT32_C( 269.53) } }, { { SIMDE_FLOAT32_C( 32.97), SIMDE_FLOAT32_C( -975.98), SIMDE_FLOAT32_C( 328.52), SIMDE_FLOAT32_C( 473.84), SIMDE_FLOAT32_C( 51.43), SIMDE_FLOAT32_C( 91.52), SIMDE_FLOAT32_C( -81.65), SIMDE_FLOAT32_C( -181.85), SIMDE_FLOAT32_C( 357.78), SIMDE_FLOAT32_C( 615.40), SIMDE_FLOAT32_C( 134.55), SIMDE_FLOAT32_C( 469.64), SIMDE_FLOAT32_C( -905.79), SIMDE_FLOAT32_C( -397.56), SIMDE_FLOAT32_C( -279.17), SIMDE_FLOAT32_C( -688.95) }, { SIMDE_FLOAT32_C( 775.15), SIMDE_FLOAT32_C( 82.41), SIMDE_FLOAT32_C( -390.80), SIMDE_FLOAT32_C( -645.22), SIMDE_FLOAT32_C( -557.76), SIMDE_FLOAT32_C( 311.72), SIMDE_FLOAT32_C( 147.41), SIMDE_FLOAT32_C( 320.02), SIMDE_FLOAT32_C( 283.16), SIMDE_FLOAT32_C( -149.83), SIMDE_FLOAT32_C( -987.80), SIMDE_FLOAT32_C( 367.57), SIMDE_FLOAT32_C( 741.72), SIMDE_FLOAT32_C( 663.24), SIMDE_FLOAT32_C( -730.15), SIMDE_FLOAT32_C( -225.30) }, { SIMDE_FLOAT32_C( 775.85), SIMDE_FLOAT32_C( 979.45), SIMDE_FLOAT32_C( 510.54), SIMDE_FLOAT32_C( 800.52), SIMDE_FLOAT32_C( 560.13), SIMDE_FLOAT32_C( 324.88), SIMDE_FLOAT32_C( 168.51), SIMDE_FLOAT32_C( 368.08), SIMDE_FLOAT32_C( 456.27), SIMDE_FLOAT32_C( 633.38), SIMDE_FLOAT32_C( 996.92), SIMDE_FLOAT32_C( 596.38), SIMDE_FLOAT32_C( 1170.73), SIMDE_FLOAT32_C( 773.27), SIMDE_FLOAT32_C( 781.70), SIMDE_FLOAT32_C( 724.85) } }, { { SIMDE_FLOAT32_C( 687.25), SIMDE_FLOAT32_C( 598.37), SIMDE_FLOAT32_C( -751.47), SIMDE_FLOAT32_C( -261.32), SIMDE_FLOAT32_C( -310.12), SIMDE_FLOAT32_C( 166.88), SIMDE_FLOAT32_C( 556.84), SIMDE_FLOAT32_C( -952.33), SIMDE_FLOAT32_C( -217.72), SIMDE_FLOAT32_C( -308.61), SIMDE_FLOAT32_C( 517.31), SIMDE_FLOAT32_C( -123.51), SIMDE_FLOAT32_C( 293.83), SIMDE_FLOAT32_C( -761.86), SIMDE_FLOAT32_C( 187.55), SIMDE_FLOAT32_C( 68.99) }, { SIMDE_FLOAT32_C( 320.55), SIMDE_FLOAT32_C( 796.74), SIMDE_FLOAT32_C( 423.77), SIMDE_FLOAT32_C( 762.79), SIMDE_FLOAT32_C( 108.47), SIMDE_FLOAT32_C( -428.82), SIMDE_FLOAT32_C( 82.81), SIMDE_FLOAT32_C( -608.37), SIMDE_FLOAT32_C( 421.35), SIMDE_FLOAT32_C( 95.01), SIMDE_FLOAT32_C( 759.20), SIMDE_FLOAT32_C( 163.07), SIMDE_FLOAT32_C( -241.76), SIMDE_FLOAT32_C( -970.95), SIMDE_FLOAT32_C( 937.77), SIMDE_FLOAT32_C( -554.50) }, { SIMDE_FLOAT32_C( 758.33), SIMDE_FLOAT32_C( 996.41), SIMDE_FLOAT32_C( 862.72), SIMDE_FLOAT32_C( 806.31), SIMDE_FLOAT32_C( 328.54), SIMDE_FLOAT32_C( 460.15), SIMDE_FLOAT32_C( 562.96), SIMDE_FLOAT32_C( 1130.06), SIMDE_FLOAT32_C( 474.28), SIMDE_FLOAT32_C( 322.90), SIMDE_FLOAT32_C( 918.69), SIMDE_FLOAT32_C( 204.56), SIMDE_FLOAT32_C( 380.50), SIMDE_FLOAT32_C( 1234.17), SIMDE_FLOAT32_C( 956.34), SIMDE_FLOAT32_C( 558.78) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 r = simde_mm512_hypot_ps(a, b); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_hypot_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[16]; const simde__mmask8 k; const simde_float32 a[16]; const simde_float32 b[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 367.16), SIMDE_FLOAT32_C( 534.12), SIMDE_FLOAT32_C( 840.53), SIMDE_FLOAT32_C( -620.21), SIMDE_FLOAT32_C( 261.27), SIMDE_FLOAT32_C( 223.14), SIMDE_FLOAT32_C( -163.58), SIMDE_FLOAT32_C( 267.96), SIMDE_FLOAT32_C( -882.06), SIMDE_FLOAT32_C( -703.87), SIMDE_FLOAT32_C( 527.51), SIMDE_FLOAT32_C( -734.80), SIMDE_FLOAT32_C( -828.23), SIMDE_FLOAT32_C( -822.70), SIMDE_FLOAT32_C( -911.73), SIMDE_FLOAT32_C( 856.22) }, UINT8_C(182), { SIMDE_FLOAT32_C( 508.95), SIMDE_FLOAT32_C( 401.36), SIMDE_FLOAT32_C( -896.06), SIMDE_FLOAT32_C( 773.16), SIMDE_FLOAT32_C( -9.93), SIMDE_FLOAT32_C( -389.05), SIMDE_FLOAT32_C( -811.06), SIMDE_FLOAT32_C( 179.53), SIMDE_FLOAT32_C( -842.09), SIMDE_FLOAT32_C( 34.81), SIMDE_FLOAT32_C( -170.09), SIMDE_FLOAT32_C( 888.35), SIMDE_FLOAT32_C( -467.85), SIMDE_FLOAT32_C( 381.00), SIMDE_FLOAT32_C( 255.51), SIMDE_FLOAT32_C( -933.73) }, { SIMDE_FLOAT32_C( 221.53), SIMDE_FLOAT32_C( 635.30), SIMDE_FLOAT32_C( 327.54), SIMDE_FLOAT32_C( -555.33), SIMDE_FLOAT32_C( -528.28), SIMDE_FLOAT32_C( -404.50), SIMDE_FLOAT32_C( -437.39), SIMDE_FLOAT32_C( -232.15), SIMDE_FLOAT32_C( -876.99), SIMDE_FLOAT32_C( -172.19), SIMDE_FLOAT32_C( -60.39), SIMDE_FLOAT32_C( -699.69), SIMDE_FLOAT32_C( -83.92), SIMDE_FLOAT32_C( -204.17), SIMDE_FLOAT32_C( 701.45), SIMDE_FLOAT32_C( -574.97) }, { SIMDE_FLOAT32_C( 367.16), SIMDE_FLOAT32_C( 751.46), SIMDE_FLOAT32_C( 954.05), SIMDE_FLOAT32_C( -620.21), SIMDE_FLOAT32_C( 528.37), SIMDE_FLOAT32_C( 561.23), SIMDE_FLOAT32_C( -163.58), SIMDE_FLOAT32_C( 293.47), SIMDE_FLOAT32_C( -882.06), SIMDE_FLOAT32_C( -703.87), SIMDE_FLOAT32_C( 527.51), SIMDE_FLOAT32_C( -734.80), SIMDE_FLOAT32_C( -828.23), SIMDE_FLOAT32_C( -822.70), SIMDE_FLOAT32_C( -911.73), SIMDE_FLOAT32_C( 856.22) } }, { { SIMDE_FLOAT32_C( -802.80), SIMDE_FLOAT32_C( 805.39), SIMDE_FLOAT32_C( -801.81), SIMDE_FLOAT32_C( 187.27), SIMDE_FLOAT32_C( -583.65), SIMDE_FLOAT32_C( -612.87), SIMDE_FLOAT32_C( -633.20), SIMDE_FLOAT32_C( -425.74), SIMDE_FLOAT32_C( 421.94), SIMDE_FLOAT32_C( 196.71), SIMDE_FLOAT32_C( -537.40), SIMDE_FLOAT32_C( 954.08), SIMDE_FLOAT32_C( -422.29), SIMDE_FLOAT32_C( 718.11), SIMDE_FLOAT32_C( -979.65), SIMDE_FLOAT32_C( 799.24) }, UINT8_C( 1), { SIMDE_FLOAT32_C( 347.90), SIMDE_FLOAT32_C( -756.09), SIMDE_FLOAT32_C( 825.13), SIMDE_FLOAT32_C( 943.40), SIMDE_FLOAT32_C( -193.47), SIMDE_FLOAT32_C( -407.03), SIMDE_FLOAT32_C( -933.59), SIMDE_FLOAT32_C( 634.34), SIMDE_FLOAT32_C( 532.59), SIMDE_FLOAT32_C( -633.28), SIMDE_FLOAT32_C( -449.58), SIMDE_FLOAT32_C( -671.58), SIMDE_FLOAT32_C( -931.83), SIMDE_FLOAT32_C( -24.55), SIMDE_FLOAT32_C( -474.38), SIMDE_FLOAT32_C( 873.57) }, { SIMDE_FLOAT32_C( 173.64), SIMDE_FLOAT32_C( 712.89), SIMDE_FLOAT32_C( -710.09), SIMDE_FLOAT32_C( 560.77), SIMDE_FLOAT32_C( -920.31), SIMDE_FLOAT32_C( -135.83), SIMDE_FLOAT32_C( -17.30), SIMDE_FLOAT32_C( 276.39), SIMDE_FLOAT32_C( 326.78), SIMDE_FLOAT32_C( -63.21), SIMDE_FLOAT32_C( 854.10), SIMDE_FLOAT32_C( 44.89), SIMDE_FLOAT32_C( -42.86), SIMDE_FLOAT32_C( 653.34), SIMDE_FLOAT32_C( -601.70), SIMDE_FLOAT32_C( -694.96) }, { SIMDE_FLOAT32_C( 388.83), SIMDE_FLOAT32_C( 805.39), SIMDE_FLOAT32_C( -801.81), SIMDE_FLOAT32_C( 187.27), SIMDE_FLOAT32_C( -583.65), SIMDE_FLOAT32_C( -612.87), SIMDE_FLOAT32_C( -633.20), SIMDE_FLOAT32_C( -425.74), SIMDE_FLOAT32_C( 421.94), SIMDE_FLOAT32_C( 196.71), SIMDE_FLOAT32_C( -537.40), SIMDE_FLOAT32_C( 954.08), SIMDE_FLOAT32_C( -422.29), SIMDE_FLOAT32_C( 718.11), SIMDE_FLOAT32_C( -979.65), SIMDE_FLOAT32_C( 799.24) } }, { { SIMDE_FLOAT32_C( 897.26), SIMDE_FLOAT32_C( -776.57), SIMDE_FLOAT32_C( -751.56), SIMDE_FLOAT32_C( -296.22), SIMDE_FLOAT32_C( -183.60), SIMDE_FLOAT32_C( -685.15), SIMDE_FLOAT32_C( -661.88), SIMDE_FLOAT32_C( -651.01), SIMDE_FLOAT32_C( -318.42), SIMDE_FLOAT32_C( -111.46), SIMDE_FLOAT32_C( -322.60), SIMDE_FLOAT32_C( -250.25), SIMDE_FLOAT32_C( 863.99), SIMDE_FLOAT32_C( 203.02), SIMDE_FLOAT32_C( -376.68), SIMDE_FLOAT32_C( 37.62) }, UINT8_C( 54), { SIMDE_FLOAT32_C( -86.77), SIMDE_FLOAT32_C( -401.61), SIMDE_FLOAT32_C( -4.41), SIMDE_FLOAT32_C( 777.40), SIMDE_FLOAT32_C( 581.09), SIMDE_FLOAT32_C( -728.01), SIMDE_FLOAT32_C( 104.18), SIMDE_FLOAT32_C( -482.12), SIMDE_FLOAT32_C( -873.91), SIMDE_FLOAT32_C( -850.93), SIMDE_FLOAT32_C( 475.02), SIMDE_FLOAT32_C( 779.43), SIMDE_FLOAT32_C( -452.63), SIMDE_FLOAT32_C( 780.06), SIMDE_FLOAT32_C( 676.69), SIMDE_FLOAT32_C( -229.20) }, { SIMDE_FLOAT32_C( -971.50), SIMDE_FLOAT32_C( -619.53), SIMDE_FLOAT32_C( 587.20), SIMDE_FLOAT32_C( -656.65), SIMDE_FLOAT32_C( -281.40), SIMDE_FLOAT32_C( 936.19), SIMDE_FLOAT32_C( 24.93), SIMDE_FLOAT32_C( 607.14), SIMDE_FLOAT32_C( -386.41), SIMDE_FLOAT32_C( 774.68), SIMDE_FLOAT32_C( 471.12), SIMDE_FLOAT32_C( 816.61), SIMDE_FLOAT32_C( -602.00), SIMDE_FLOAT32_C( -491.25), SIMDE_FLOAT32_C( -267.48), SIMDE_FLOAT32_C( 311.23) }, { SIMDE_FLOAT32_C( 897.26), SIMDE_FLOAT32_C( 738.31), SIMDE_FLOAT32_C( 587.22), SIMDE_FLOAT32_C( -296.22), SIMDE_FLOAT32_C( 645.64), SIMDE_FLOAT32_C( 1185.94), SIMDE_FLOAT32_C( -661.88), SIMDE_FLOAT32_C( -651.01), SIMDE_FLOAT32_C( -318.42), SIMDE_FLOAT32_C( -111.46), SIMDE_FLOAT32_C( -322.60), SIMDE_FLOAT32_C( -250.25), SIMDE_FLOAT32_C( 863.99), SIMDE_FLOAT32_C( 203.02), SIMDE_FLOAT32_C( -376.68), SIMDE_FLOAT32_C( 37.62) } }, { { SIMDE_FLOAT32_C( 107.14), SIMDE_FLOAT32_C( 728.11), SIMDE_FLOAT32_C( 88.63), SIMDE_FLOAT32_C( -311.77), SIMDE_FLOAT32_C( -999.90), SIMDE_FLOAT32_C( -807.18), SIMDE_FLOAT32_C( 206.11), SIMDE_FLOAT32_C( -873.82), SIMDE_FLOAT32_C( -658.11), SIMDE_FLOAT32_C( -318.87), SIMDE_FLOAT32_C( 905.61), SIMDE_FLOAT32_C( -110.74), SIMDE_FLOAT32_C( -538.82), SIMDE_FLOAT32_C( 582.30), SIMDE_FLOAT32_C( 660.06), SIMDE_FLOAT32_C( -510.32) }, UINT8_C(112), { SIMDE_FLOAT32_C( 247.26), SIMDE_FLOAT32_C( -166.97), SIMDE_FLOAT32_C( -318.63), SIMDE_FLOAT32_C( 183.45), SIMDE_FLOAT32_C( 857.96), SIMDE_FLOAT32_C( -711.49), SIMDE_FLOAT32_C( 797.04), SIMDE_FLOAT32_C( 632.64), SIMDE_FLOAT32_C( 759.63), SIMDE_FLOAT32_C( 613.65), SIMDE_FLOAT32_C( -969.36), SIMDE_FLOAT32_C( -731.62), SIMDE_FLOAT32_C( -653.84), SIMDE_FLOAT32_C( 341.87), SIMDE_FLOAT32_C( 375.52), SIMDE_FLOAT32_C( -925.73) }, { SIMDE_FLOAT32_C( -569.50), SIMDE_FLOAT32_C( -936.25), SIMDE_FLOAT32_C( -925.63), SIMDE_FLOAT32_C( -376.68), SIMDE_FLOAT32_C( 269.87), SIMDE_FLOAT32_C( -799.45), SIMDE_FLOAT32_C( -34.80), SIMDE_FLOAT32_C( 950.99), SIMDE_FLOAT32_C( -893.84), SIMDE_FLOAT32_C( 854.47), SIMDE_FLOAT32_C( -587.82), SIMDE_FLOAT32_C( 688.47), SIMDE_FLOAT32_C( 514.53), SIMDE_FLOAT32_C( -98.14), SIMDE_FLOAT32_C( 651.24), SIMDE_FLOAT32_C( -238.21) }, { SIMDE_FLOAT32_C( 107.14), SIMDE_FLOAT32_C( 728.11), SIMDE_FLOAT32_C( 88.63), SIMDE_FLOAT32_C( -311.77), SIMDE_FLOAT32_C( 899.40), SIMDE_FLOAT32_C( 1070.20), SIMDE_FLOAT32_C( 797.80), SIMDE_FLOAT32_C( -873.82), SIMDE_FLOAT32_C( -658.11), SIMDE_FLOAT32_C( -318.87), SIMDE_FLOAT32_C( 905.61), SIMDE_FLOAT32_C( -110.74), SIMDE_FLOAT32_C( -538.82), SIMDE_FLOAT32_C( 582.30), SIMDE_FLOAT32_C( 660.06), SIMDE_FLOAT32_C( -510.32) } }, { { SIMDE_FLOAT32_C( 734.89), SIMDE_FLOAT32_C( -667.39), SIMDE_FLOAT32_C( 945.23), SIMDE_FLOAT32_C( 592.85), SIMDE_FLOAT32_C( -378.88), SIMDE_FLOAT32_C( 742.27), SIMDE_FLOAT32_C( 225.49), SIMDE_FLOAT32_C( -619.25), SIMDE_FLOAT32_C( 355.91), SIMDE_FLOAT32_C( 256.12), SIMDE_FLOAT32_C( -350.87), SIMDE_FLOAT32_C( 702.07), SIMDE_FLOAT32_C( -402.01), SIMDE_FLOAT32_C( -975.35), SIMDE_FLOAT32_C( 776.35), SIMDE_FLOAT32_C( 28.49) }, UINT8_C( 29), { SIMDE_FLOAT32_C( 850.71), SIMDE_FLOAT32_C( 651.81), SIMDE_FLOAT32_C( 358.27), SIMDE_FLOAT32_C( -948.74), SIMDE_FLOAT32_C( -382.99), SIMDE_FLOAT32_C( 309.27), SIMDE_FLOAT32_C( -842.57), SIMDE_FLOAT32_C( -528.52), SIMDE_FLOAT32_C( 721.45), SIMDE_FLOAT32_C( 845.89), SIMDE_FLOAT32_C( 986.00), SIMDE_FLOAT32_C( -376.69), SIMDE_FLOAT32_C( 497.14), SIMDE_FLOAT32_C( -252.21), SIMDE_FLOAT32_C( -641.80), SIMDE_FLOAT32_C( 829.75) }, { SIMDE_FLOAT32_C( -306.98), SIMDE_FLOAT32_C( 951.05), SIMDE_FLOAT32_C( -549.13), SIMDE_FLOAT32_C( -564.71), SIMDE_FLOAT32_C( 176.53), SIMDE_FLOAT32_C( -168.38), SIMDE_FLOAT32_C( 791.20), SIMDE_FLOAT32_C( -567.34), SIMDE_FLOAT32_C( 480.75), SIMDE_FLOAT32_C( 493.27), SIMDE_FLOAT32_C( 30.65), SIMDE_FLOAT32_C( 505.41), SIMDE_FLOAT32_C( 269.62), SIMDE_FLOAT32_C( -940.86), SIMDE_FLOAT32_C( 593.82), SIMDE_FLOAT32_C( 120.33) }, { SIMDE_FLOAT32_C( 904.40), SIMDE_FLOAT32_C( -667.39), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( 1104.09), SIMDE_FLOAT32_C( 421.72), SIMDE_FLOAT32_C( 742.27), SIMDE_FLOAT32_C( 225.49), SIMDE_FLOAT32_C( -619.25), SIMDE_FLOAT32_C( 355.91), SIMDE_FLOAT32_C( 256.12), SIMDE_FLOAT32_C( -350.87), SIMDE_FLOAT32_C( 702.07), SIMDE_FLOAT32_C( -402.01), SIMDE_FLOAT32_C( -975.35), SIMDE_FLOAT32_C( 776.35), SIMDE_FLOAT32_C( 28.49) } }, { { SIMDE_FLOAT32_C( 710.95), SIMDE_FLOAT32_C( -47.91), SIMDE_FLOAT32_C( 171.59), SIMDE_FLOAT32_C( -672.04), SIMDE_FLOAT32_C( -738.64), SIMDE_FLOAT32_C( 329.02), SIMDE_FLOAT32_C( -200.57), SIMDE_FLOAT32_C( 982.81), SIMDE_FLOAT32_C( 174.91), SIMDE_FLOAT32_C( -214.56), SIMDE_FLOAT32_C( -393.88), SIMDE_FLOAT32_C( -327.95), SIMDE_FLOAT32_C( 533.22), SIMDE_FLOAT32_C( -35.69), SIMDE_FLOAT32_C( -498.20), SIMDE_FLOAT32_C( -773.76) }, UINT8_C(210), { SIMDE_FLOAT32_C( -47.34), SIMDE_FLOAT32_C( -338.47), SIMDE_FLOAT32_C( -908.10), SIMDE_FLOAT32_C( 784.28), SIMDE_FLOAT32_C( -547.27), SIMDE_FLOAT32_C( -475.45), SIMDE_FLOAT32_C( 265.03), SIMDE_FLOAT32_C( 946.00), SIMDE_FLOAT32_C( 555.20), SIMDE_FLOAT32_C( -229.56), SIMDE_FLOAT32_C( 215.62), SIMDE_FLOAT32_C( 614.34), SIMDE_FLOAT32_C( -635.74), SIMDE_FLOAT32_C( -664.05), SIMDE_FLOAT32_C( 325.29), SIMDE_FLOAT32_C( 316.35) }, { SIMDE_FLOAT32_C( 507.54), SIMDE_FLOAT32_C( 653.24), SIMDE_FLOAT32_C( 577.71), SIMDE_FLOAT32_C( -163.44), SIMDE_FLOAT32_C( -547.32), SIMDE_FLOAT32_C( 560.52), SIMDE_FLOAT32_C( -988.53), SIMDE_FLOAT32_C( 238.11), SIMDE_FLOAT32_C( -833.36), SIMDE_FLOAT32_C( -316.48), SIMDE_FLOAT32_C( -228.66), SIMDE_FLOAT32_C( 130.95), SIMDE_FLOAT32_C( 185.32), SIMDE_FLOAT32_C( -2.42), SIMDE_FLOAT32_C( -953.69), SIMDE_FLOAT32_C( -862.02) }, { SIMDE_FLOAT32_C( 710.95), SIMDE_FLOAT32_C( 735.72), SIMDE_FLOAT32_C( 171.59), SIMDE_FLOAT32_C( -672.04), SIMDE_FLOAT32_C( 773.99), SIMDE_FLOAT32_C( 329.02), SIMDE_FLOAT32_C( 1023.44), SIMDE_FLOAT32_C( 975.51), SIMDE_FLOAT32_C( 174.91), SIMDE_FLOAT32_C( -214.56), SIMDE_FLOAT32_C( -393.88), SIMDE_FLOAT32_C( -327.95), SIMDE_FLOAT32_C( 533.22), SIMDE_FLOAT32_C( -35.69), SIMDE_FLOAT32_C( -498.20), SIMDE_FLOAT32_C( -773.76) } }, { { SIMDE_FLOAT32_C( 659.11), SIMDE_FLOAT32_C( -861.79), SIMDE_FLOAT32_C( 922.26), SIMDE_FLOAT32_C( -888.16), SIMDE_FLOAT32_C( -337.24), SIMDE_FLOAT32_C( 187.30), SIMDE_FLOAT32_C( -942.16), SIMDE_FLOAT32_C( -782.04), SIMDE_FLOAT32_C( 957.74), SIMDE_FLOAT32_C( 273.45), SIMDE_FLOAT32_C( 832.30), SIMDE_FLOAT32_C( -678.00), SIMDE_FLOAT32_C( 609.40), SIMDE_FLOAT32_C( 157.59), SIMDE_FLOAT32_C( 638.35), SIMDE_FLOAT32_C( 116.94) }, UINT8_C(122), { SIMDE_FLOAT32_C( 216.06), SIMDE_FLOAT32_C( 953.51), SIMDE_FLOAT32_C( 263.51), SIMDE_FLOAT32_C( -223.42), SIMDE_FLOAT32_C( 964.98), SIMDE_FLOAT32_C( -498.37), SIMDE_FLOAT32_C( -56.78), SIMDE_FLOAT32_C( -351.50), SIMDE_FLOAT32_C( 272.97), SIMDE_FLOAT32_C( -925.83), SIMDE_FLOAT32_C( 833.82), SIMDE_FLOAT32_C( -729.45), SIMDE_FLOAT32_C( -879.52), SIMDE_FLOAT32_C( 971.80), SIMDE_FLOAT32_C( 929.66), SIMDE_FLOAT32_C( -741.31) }, { SIMDE_FLOAT32_C( 894.07), SIMDE_FLOAT32_C( -958.51), SIMDE_FLOAT32_C( -78.55), SIMDE_FLOAT32_C( 81.37), SIMDE_FLOAT32_C( -900.67), SIMDE_FLOAT32_C( 139.42), SIMDE_FLOAT32_C( 39.11), SIMDE_FLOAT32_C( 372.78), SIMDE_FLOAT32_C( -28.28), SIMDE_FLOAT32_C( 361.11), SIMDE_FLOAT32_C( -17.81), SIMDE_FLOAT32_C( -870.69), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( -900.87), SIMDE_FLOAT32_C( -59.85), SIMDE_FLOAT32_C( -784.48) }, { SIMDE_FLOAT32_C( 659.11), SIMDE_FLOAT32_C( 1352.01), SIMDE_FLOAT32_C( 922.26), SIMDE_FLOAT32_C( 237.78), SIMDE_FLOAT32_C( 1320.00), SIMDE_FLOAT32_C( 517.50), SIMDE_FLOAT32_C( 68.95), SIMDE_FLOAT32_C( -782.04), SIMDE_FLOAT32_C( 957.74), SIMDE_FLOAT32_C( 273.45), SIMDE_FLOAT32_C( 832.30), SIMDE_FLOAT32_C( -678.00), SIMDE_FLOAT32_C( 609.40), SIMDE_FLOAT32_C( 157.59), SIMDE_FLOAT32_C( 638.35), SIMDE_FLOAT32_C( 116.94) } }, { { SIMDE_FLOAT32_C( -947.37), SIMDE_FLOAT32_C( -796.34), SIMDE_FLOAT32_C( -7.90), SIMDE_FLOAT32_C( -982.39), SIMDE_FLOAT32_C( -294.71), SIMDE_FLOAT32_C( 935.32), SIMDE_FLOAT32_C( -333.88), SIMDE_FLOAT32_C( 978.25), SIMDE_FLOAT32_C( -990.51), SIMDE_FLOAT32_C( -500.06), SIMDE_FLOAT32_C( -751.20), SIMDE_FLOAT32_C( -870.03), SIMDE_FLOAT32_C( -528.26), SIMDE_FLOAT32_C( -821.55), SIMDE_FLOAT32_C( -611.34), SIMDE_FLOAT32_C( -634.19) }, UINT8_C(234), { SIMDE_FLOAT32_C( 310.12), SIMDE_FLOAT32_C( 447.18), SIMDE_FLOAT32_C( -680.72), SIMDE_FLOAT32_C( -550.47), SIMDE_FLOAT32_C( -513.72), SIMDE_FLOAT32_C( 692.06), SIMDE_FLOAT32_C( 421.25), SIMDE_FLOAT32_C( 847.39), SIMDE_FLOAT32_C( -325.76), SIMDE_FLOAT32_C( 550.57), SIMDE_FLOAT32_C( -153.15), SIMDE_FLOAT32_C( -226.63), SIMDE_FLOAT32_C( -509.29), SIMDE_FLOAT32_C( 62.37), SIMDE_FLOAT32_C( -173.99), SIMDE_FLOAT32_C( -305.63) }, { SIMDE_FLOAT32_C( -945.53), SIMDE_FLOAT32_C( -156.38), SIMDE_FLOAT32_C( 399.66), SIMDE_FLOAT32_C( 989.79), SIMDE_FLOAT32_C( 509.74), SIMDE_FLOAT32_C( 377.91), SIMDE_FLOAT32_C( 999.28), SIMDE_FLOAT32_C( -990.32), SIMDE_FLOAT32_C( 626.71), SIMDE_FLOAT32_C( -870.75), SIMDE_FLOAT32_C( -518.58), SIMDE_FLOAT32_C( 805.16), SIMDE_FLOAT32_C( -482.08), SIMDE_FLOAT32_C( -152.77), SIMDE_FLOAT32_C( -974.89), SIMDE_FLOAT32_C( 828.03) }, { SIMDE_FLOAT32_C( -947.37), SIMDE_FLOAT32_C( 473.73), SIMDE_FLOAT32_C( -7.90), SIMDE_FLOAT32_C( 1132.56), SIMDE_FLOAT32_C( -294.71), SIMDE_FLOAT32_C( 788.52), SIMDE_FLOAT32_C( 1084.44), SIMDE_FLOAT32_C( 1303.38), SIMDE_FLOAT32_C( -990.51), SIMDE_FLOAT32_C( -500.06), SIMDE_FLOAT32_C( -751.20), SIMDE_FLOAT32_C( -870.03), SIMDE_FLOAT32_C( -528.26), SIMDE_FLOAT32_C( -821.55), SIMDE_FLOAT32_C( -611.34), SIMDE_FLOAT32_C( -634.19) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 r = simde_mm512_mask_hypot_ps(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_hypot_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 b[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 275.20), SIMDE_FLOAT64_C( 366.24), SIMDE_FLOAT64_C( 966.11), SIMDE_FLOAT64_C( -937.96), SIMDE_FLOAT64_C( 570.22), SIMDE_FLOAT64_C( -7.21), SIMDE_FLOAT64_C( 612.58), SIMDE_FLOAT64_C( -184.69) }, { SIMDE_FLOAT64_C( -503.58), SIMDE_FLOAT64_C( 256.83), SIMDE_FLOAT64_C( 80.98), SIMDE_FLOAT64_C( -364.25), SIMDE_FLOAT64_C( 598.02), SIMDE_FLOAT64_C( -961.08), SIMDE_FLOAT64_C( 560.19), SIMDE_FLOAT64_C( -553.76) }, { SIMDE_FLOAT64_C( 573.87), SIMDE_FLOAT64_C( 447.32), SIMDE_FLOAT64_C( 969.50), SIMDE_FLOAT64_C( 1006.20), SIMDE_FLOAT64_C( 826.30), SIMDE_FLOAT64_C( 961.11), SIMDE_FLOAT64_C( 830.10), SIMDE_FLOAT64_C( 583.75) } }, { { SIMDE_FLOAT64_C( -373.20), SIMDE_FLOAT64_C( 625.48), SIMDE_FLOAT64_C( 871.64), SIMDE_FLOAT64_C( -503.55), SIMDE_FLOAT64_C( -900.28), SIMDE_FLOAT64_C( 58.59), SIMDE_FLOAT64_C( -493.99), SIMDE_FLOAT64_C( 103.21) }, { SIMDE_FLOAT64_C( 916.41), SIMDE_FLOAT64_C( 70.36), SIMDE_FLOAT64_C( -720.02), SIMDE_FLOAT64_C( -164.66), SIMDE_FLOAT64_C( 487.58), SIMDE_FLOAT64_C( -677.71), SIMDE_FLOAT64_C( -865.62), SIMDE_FLOAT64_C( -237.21) }, { SIMDE_FLOAT64_C( 989.49), SIMDE_FLOAT64_C( 629.42), SIMDE_FLOAT64_C( 1130.57), SIMDE_FLOAT64_C( 529.79), SIMDE_FLOAT64_C( 1023.84), SIMDE_FLOAT64_C( 680.24), SIMDE_FLOAT64_C( 996.66), SIMDE_FLOAT64_C( 258.69) } }, { { SIMDE_FLOAT64_C( 688.53), SIMDE_FLOAT64_C( -899.51), SIMDE_FLOAT64_C( -175.18), SIMDE_FLOAT64_C( 258.75), SIMDE_FLOAT64_C( 93.28), SIMDE_FLOAT64_C( -562.60), SIMDE_FLOAT64_C( -925.94), SIMDE_FLOAT64_C( 589.69) }, { SIMDE_FLOAT64_C( 694.23), SIMDE_FLOAT64_C( 155.04), SIMDE_FLOAT64_C( -774.56), SIMDE_FLOAT64_C( 292.25), SIMDE_FLOAT64_C( 193.96), SIMDE_FLOAT64_C( 785.64), SIMDE_FLOAT64_C( 738.49), SIMDE_FLOAT64_C( 820.76) }, { SIMDE_FLOAT64_C( 977.77), SIMDE_FLOAT64_C( 912.77), SIMDE_FLOAT64_C( 794.12), SIMDE_FLOAT64_C( 390.34), SIMDE_FLOAT64_C( 215.22), SIMDE_FLOAT64_C( 966.31), SIMDE_FLOAT64_C( 1184.37), SIMDE_FLOAT64_C( 1010.63) } }, { { SIMDE_FLOAT64_C( 411.12), SIMDE_FLOAT64_C( 610.13), SIMDE_FLOAT64_C( -682.79), SIMDE_FLOAT64_C( 510.84), SIMDE_FLOAT64_C( -331.28), SIMDE_FLOAT64_C( -176.78), SIMDE_FLOAT64_C( -385.95), SIMDE_FLOAT64_C( -414.87) }, { SIMDE_FLOAT64_C( 893.58), SIMDE_FLOAT64_C( -105.97), SIMDE_FLOAT64_C( 420.47), SIMDE_FLOAT64_C( 381.16), SIMDE_FLOAT64_C( 216.32), SIMDE_FLOAT64_C( 554.85), SIMDE_FLOAT64_C( -856.05), SIMDE_FLOAT64_C( -95.14) }, { SIMDE_FLOAT64_C( 983.62), SIMDE_FLOAT64_C( 619.26), SIMDE_FLOAT64_C( 801.87), SIMDE_FLOAT64_C( 637.37), SIMDE_FLOAT64_C( 395.65), SIMDE_FLOAT64_C( 582.33), SIMDE_FLOAT64_C( 939.03), SIMDE_FLOAT64_C( 425.64) } }, { { SIMDE_FLOAT64_C( 655.34), SIMDE_FLOAT64_C( -31.23), SIMDE_FLOAT64_C( -836.39), SIMDE_FLOAT64_C( -251.38), SIMDE_FLOAT64_C( 406.17), SIMDE_FLOAT64_C( -762.33), SIMDE_FLOAT64_C( -661.69), SIMDE_FLOAT64_C( 100.40) }, { SIMDE_FLOAT64_C( 392.71), SIMDE_FLOAT64_C( -436.24), SIMDE_FLOAT64_C( -607.35), SIMDE_FLOAT64_C( -413.33), SIMDE_FLOAT64_C( -650.61), SIMDE_FLOAT64_C( -868.86), SIMDE_FLOAT64_C( -592.57), SIMDE_FLOAT64_C( 760.51) }, { SIMDE_FLOAT64_C( 764.00), SIMDE_FLOAT64_C( 437.36), SIMDE_FLOAT64_C( 1033.65), SIMDE_FLOAT64_C( 483.77), SIMDE_FLOAT64_C( 766.99), SIMDE_FLOAT64_C( 1155.88), SIMDE_FLOAT64_C( 888.24), SIMDE_FLOAT64_C( 767.11) } }, { { SIMDE_FLOAT64_C( 741.27), SIMDE_FLOAT64_C( -275.37), SIMDE_FLOAT64_C( 271.35), SIMDE_FLOAT64_C( -590.01), SIMDE_FLOAT64_C( 547.85), SIMDE_FLOAT64_C( 885.41), SIMDE_FLOAT64_C( -4.88), SIMDE_FLOAT64_C( 441.42) }, { SIMDE_FLOAT64_C( -220.56), SIMDE_FLOAT64_C( -584.41), SIMDE_FLOAT64_C( -177.42), SIMDE_FLOAT64_C( 995.76), SIMDE_FLOAT64_C( 970.44), SIMDE_FLOAT64_C( -33.47), SIMDE_FLOAT64_C( -99.38), SIMDE_FLOAT64_C( 625.78) }, { SIMDE_FLOAT64_C( 773.39), SIMDE_FLOAT64_C( 646.04), SIMDE_FLOAT64_C( 324.20), SIMDE_FLOAT64_C( 1157.43), SIMDE_FLOAT64_C( 1114.40), SIMDE_FLOAT64_C( 886.04), SIMDE_FLOAT64_C( 99.50), SIMDE_FLOAT64_C( 765.80) } }, { { SIMDE_FLOAT64_C( 935.30), SIMDE_FLOAT64_C( 64.23), SIMDE_FLOAT64_C( -625.60), SIMDE_FLOAT64_C( 341.47), SIMDE_FLOAT64_C( 301.89), SIMDE_FLOAT64_C( -287.29), SIMDE_FLOAT64_C( -558.13), SIMDE_FLOAT64_C( -305.40) }, { SIMDE_FLOAT64_C( 276.47), SIMDE_FLOAT64_C( -165.48), SIMDE_FLOAT64_C( 281.27), SIMDE_FLOAT64_C( 625.86), SIMDE_FLOAT64_C( -34.34), SIMDE_FLOAT64_C( 688.70), SIMDE_FLOAT64_C( 386.37), SIMDE_FLOAT64_C( -293.08) }, { SIMDE_FLOAT64_C( 975.31), SIMDE_FLOAT64_C( 177.51), SIMDE_FLOAT64_C( 685.92), SIMDE_FLOAT64_C( 712.95), SIMDE_FLOAT64_C( 303.84), SIMDE_FLOAT64_C( 746.22), SIMDE_FLOAT64_C( 678.82), SIMDE_FLOAT64_C( 423.28) } }, { { SIMDE_FLOAT64_C( -586.67), SIMDE_FLOAT64_C( -342.28), SIMDE_FLOAT64_C( 116.91), SIMDE_FLOAT64_C( 961.18), SIMDE_FLOAT64_C( -456.87), SIMDE_FLOAT64_C( -887.97), SIMDE_FLOAT64_C( 402.60), SIMDE_FLOAT64_C( 322.57) }, { SIMDE_FLOAT64_C( -472.39), SIMDE_FLOAT64_C( -774.82), SIMDE_FLOAT64_C( 318.33), SIMDE_FLOAT64_C( -501.95), SIMDE_FLOAT64_C( 191.71), SIMDE_FLOAT64_C( -781.04), SIMDE_FLOAT64_C( -876.17), SIMDE_FLOAT64_C( 127.01) }, { SIMDE_FLOAT64_C( 753.22), SIMDE_FLOAT64_C( 847.05), SIMDE_FLOAT64_C( 339.12), SIMDE_FLOAT64_C( 1084.35), SIMDE_FLOAT64_C( 495.46), SIMDE_FLOAT64_C( 1182.59), SIMDE_FLOAT64_C( 964.24), SIMDE_FLOAT64_C( 346.67) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__m512d r = simde_mm512_hypot_pd(a, b); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_hypot_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 b[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( -431.95), SIMDE_FLOAT64_C( -237.84), SIMDE_FLOAT64_C( 748.51), SIMDE_FLOAT64_C( 841.10), SIMDE_FLOAT64_C( -673.54), SIMDE_FLOAT64_C( 668.62), SIMDE_FLOAT64_C( 514.70), SIMDE_FLOAT64_C( -656.78) }, UINT8_C(201), { SIMDE_FLOAT64_C( 160.07), SIMDE_FLOAT64_C( -729.81), SIMDE_FLOAT64_C( -33.18), SIMDE_FLOAT64_C( 130.28), SIMDE_FLOAT64_C( 345.30), SIMDE_FLOAT64_C( -333.34), SIMDE_FLOAT64_C( -285.62), SIMDE_FLOAT64_C( -843.08) }, { SIMDE_FLOAT64_C( -705.31), SIMDE_FLOAT64_C( -528.34), SIMDE_FLOAT64_C( 222.02), SIMDE_FLOAT64_C( -760.66), SIMDE_FLOAT64_C( -344.72), SIMDE_FLOAT64_C( -209.64), SIMDE_FLOAT64_C( -687.68), SIMDE_FLOAT64_C( 52.34) }, { SIMDE_FLOAT64_C( 723.25), SIMDE_FLOAT64_C( -237.84), SIMDE_FLOAT64_C( 748.51), SIMDE_FLOAT64_C( 771.74), SIMDE_FLOAT64_C( -673.54), SIMDE_FLOAT64_C( 668.62), SIMDE_FLOAT64_C( 744.64), SIMDE_FLOAT64_C( 844.70) } }, { { SIMDE_FLOAT64_C( 859.76), SIMDE_FLOAT64_C( 134.54), SIMDE_FLOAT64_C( -771.62), SIMDE_FLOAT64_C( -408.76), SIMDE_FLOAT64_C( 106.34), SIMDE_FLOAT64_C( -575.90), SIMDE_FLOAT64_C( 159.29), SIMDE_FLOAT64_C( 868.50) }, UINT8_C(223), { SIMDE_FLOAT64_C( 0.39), SIMDE_FLOAT64_C( -805.04), SIMDE_FLOAT64_C( 841.23), SIMDE_FLOAT64_C( -484.91), SIMDE_FLOAT64_C( -461.82), SIMDE_FLOAT64_C( 403.45), SIMDE_FLOAT64_C( 675.17), SIMDE_FLOAT64_C( -191.63) }, { SIMDE_FLOAT64_C( -629.72), SIMDE_FLOAT64_C( -194.56), SIMDE_FLOAT64_C( -846.33), SIMDE_FLOAT64_C( 36.94), SIMDE_FLOAT64_C( 519.83), SIMDE_FLOAT64_C( -689.41), SIMDE_FLOAT64_C( 331.63), SIMDE_FLOAT64_C( 991.49) }, { SIMDE_FLOAT64_C( 629.72), SIMDE_FLOAT64_C( 828.22), SIMDE_FLOAT64_C( 1193.29), SIMDE_FLOAT64_C( 486.31), SIMDE_FLOAT64_C( 695.34), SIMDE_FLOAT64_C( -575.90), SIMDE_FLOAT64_C( 752.22), SIMDE_FLOAT64_C( 1009.84) } }, { { SIMDE_FLOAT64_C( 532.61), SIMDE_FLOAT64_C( 570.97), SIMDE_FLOAT64_C( -353.24), SIMDE_FLOAT64_C( -677.03), SIMDE_FLOAT64_C( 883.29), SIMDE_FLOAT64_C( 699.10), SIMDE_FLOAT64_C( -817.27), SIMDE_FLOAT64_C( 17.83) }, UINT8_C(222), { SIMDE_FLOAT64_C( -226.03), SIMDE_FLOAT64_C( -875.83), SIMDE_FLOAT64_C( -648.42), SIMDE_FLOAT64_C( 933.26), SIMDE_FLOAT64_C( 992.67), SIMDE_FLOAT64_C( -475.82), SIMDE_FLOAT64_C( -66.35), SIMDE_FLOAT64_C( -812.37) }, { SIMDE_FLOAT64_C( -634.58), SIMDE_FLOAT64_C( 448.74), SIMDE_FLOAT64_C( -274.19), SIMDE_FLOAT64_C( 768.87), SIMDE_FLOAT64_C( 123.91), SIMDE_FLOAT64_C( 534.18), SIMDE_FLOAT64_C( -860.86), SIMDE_FLOAT64_C( 929.35) }, { SIMDE_FLOAT64_C( 532.61), SIMDE_FLOAT64_C( 984.10), SIMDE_FLOAT64_C( 704.01), SIMDE_FLOAT64_C( 1209.19), SIMDE_FLOAT64_C( 1000.37), SIMDE_FLOAT64_C( 699.10), SIMDE_FLOAT64_C( 863.41), SIMDE_FLOAT64_C( 1234.36) } }, { { SIMDE_FLOAT64_C( 687.85), SIMDE_FLOAT64_C( 176.08), SIMDE_FLOAT64_C( 449.18), SIMDE_FLOAT64_C( 998.45), SIMDE_FLOAT64_C( -492.29), SIMDE_FLOAT64_C( 440.66), SIMDE_FLOAT64_C( 531.06), SIMDE_FLOAT64_C( -921.32) }, UINT8_C( 88), { SIMDE_FLOAT64_C( 854.03), SIMDE_FLOAT64_C( 961.97), SIMDE_FLOAT64_C( 786.53), SIMDE_FLOAT64_C( -963.25), SIMDE_FLOAT64_C( -20.20), SIMDE_FLOAT64_C( 714.01), SIMDE_FLOAT64_C( -189.28), SIMDE_FLOAT64_C( 103.97) }, { SIMDE_FLOAT64_C( -934.41), SIMDE_FLOAT64_C( -256.02), SIMDE_FLOAT64_C( 96.64), SIMDE_FLOAT64_C( -410.23), SIMDE_FLOAT64_C( 677.63), SIMDE_FLOAT64_C( 284.27), SIMDE_FLOAT64_C( -44.81), SIMDE_FLOAT64_C( 126.37) }, { SIMDE_FLOAT64_C( 687.85), SIMDE_FLOAT64_C( 176.08), SIMDE_FLOAT64_C( 449.18), SIMDE_FLOAT64_C( 1046.97), SIMDE_FLOAT64_C( 677.93), SIMDE_FLOAT64_C( 440.66), SIMDE_FLOAT64_C( 194.51), SIMDE_FLOAT64_C( -921.32) } }, { { SIMDE_FLOAT64_C( -989.92), SIMDE_FLOAT64_C( -275.94), SIMDE_FLOAT64_C( -749.72), SIMDE_FLOAT64_C( 544.27), SIMDE_FLOAT64_C( -136.80), SIMDE_FLOAT64_C( -820.37), SIMDE_FLOAT64_C( 232.12), SIMDE_FLOAT64_C( -960.72) }, UINT8_C( 98), { SIMDE_FLOAT64_C( 230.57), SIMDE_FLOAT64_C( -453.01), SIMDE_FLOAT64_C( 69.47), SIMDE_FLOAT64_C( -238.38), SIMDE_FLOAT64_C( -374.34), SIMDE_FLOAT64_C( 156.90), SIMDE_FLOAT64_C( -384.35), SIMDE_FLOAT64_C( -412.37) }, { SIMDE_FLOAT64_C( -56.57), SIMDE_FLOAT64_C( -347.60), SIMDE_FLOAT64_C( 567.43), SIMDE_FLOAT64_C( -342.56), SIMDE_FLOAT64_C( 463.12), SIMDE_FLOAT64_C( -328.60), SIMDE_FLOAT64_C( -276.97), SIMDE_FLOAT64_C( -792.90) }, { SIMDE_FLOAT64_C( -989.92), SIMDE_FLOAT64_C( 571.00), SIMDE_FLOAT64_C( -749.72), SIMDE_FLOAT64_C( 544.27), SIMDE_FLOAT64_C( -136.80), SIMDE_FLOAT64_C( 364.14), SIMDE_FLOAT64_C( 473.75), SIMDE_FLOAT64_C( -960.72) } }, { { SIMDE_FLOAT64_C( 768.04), SIMDE_FLOAT64_C( 312.80), SIMDE_FLOAT64_C( 884.73), SIMDE_FLOAT64_C( 52.31), SIMDE_FLOAT64_C( -732.01), SIMDE_FLOAT64_C( 11.11), SIMDE_FLOAT64_C( 62.39), SIMDE_FLOAT64_C( -7.95) }, UINT8_C(156), { SIMDE_FLOAT64_C( -393.34), SIMDE_FLOAT64_C( 855.25), SIMDE_FLOAT64_C( 441.02), SIMDE_FLOAT64_C( 838.78), SIMDE_FLOAT64_C( 894.53), SIMDE_FLOAT64_C( 69.83), SIMDE_FLOAT64_C( 69.35), SIMDE_FLOAT64_C( -558.49) }, { SIMDE_FLOAT64_C( -860.69), SIMDE_FLOAT64_C( 830.97), SIMDE_FLOAT64_C( 67.18), SIMDE_FLOAT64_C( 296.21), SIMDE_FLOAT64_C( -553.38), SIMDE_FLOAT64_C( 654.81), SIMDE_FLOAT64_C( -760.36), SIMDE_FLOAT64_C( 99.02) }, { SIMDE_FLOAT64_C( 768.04), SIMDE_FLOAT64_C( 312.80), SIMDE_FLOAT64_C( 446.11), SIMDE_FLOAT64_C( 889.55), SIMDE_FLOAT64_C( 1051.86), SIMDE_FLOAT64_C( 11.11), SIMDE_FLOAT64_C( 62.39), SIMDE_FLOAT64_C( 567.20) } }, { { SIMDE_FLOAT64_C( 222.24), SIMDE_FLOAT64_C( -102.92), SIMDE_FLOAT64_C( -437.85), SIMDE_FLOAT64_C( 893.64), SIMDE_FLOAT64_C( 620.10), SIMDE_FLOAT64_C( -230.75), SIMDE_FLOAT64_C( 661.68), SIMDE_FLOAT64_C( -67.10) }, UINT8_C( 62), { SIMDE_FLOAT64_C( -286.01), SIMDE_FLOAT64_C( 200.89), SIMDE_FLOAT64_C( 665.09), SIMDE_FLOAT64_C( 776.38), SIMDE_FLOAT64_C( -807.06), SIMDE_FLOAT64_C( -73.52), SIMDE_FLOAT64_C( -616.96), SIMDE_FLOAT64_C( -951.82) }, { SIMDE_FLOAT64_C( -632.50), SIMDE_FLOAT64_C( -778.18), SIMDE_FLOAT64_C( 942.71), SIMDE_FLOAT64_C( 437.33), SIMDE_FLOAT64_C( 291.17), SIMDE_FLOAT64_C( -615.78), SIMDE_FLOAT64_C( 576.64), SIMDE_FLOAT64_C( 122.14) }, { SIMDE_FLOAT64_C( 222.24), SIMDE_FLOAT64_C( 803.69), SIMDE_FLOAT64_C( 1153.71), SIMDE_FLOAT64_C( 891.08), SIMDE_FLOAT64_C( 857.98), SIMDE_FLOAT64_C( 620.15), SIMDE_FLOAT64_C( 661.68), SIMDE_FLOAT64_C( -67.10) } }, { { SIMDE_FLOAT64_C( 451.40), SIMDE_FLOAT64_C( -127.16), SIMDE_FLOAT64_C( 568.75), SIMDE_FLOAT64_C( 106.22), SIMDE_FLOAT64_C( 112.48), SIMDE_FLOAT64_C( -332.22), SIMDE_FLOAT64_C( -671.54), SIMDE_FLOAT64_C( -990.45) }, UINT8_C(133), { SIMDE_FLOAT64_C( -777.90), SIMDE_FLOAT64_C( 629.66), SIMDE_FLOAT64_C( 999.17), SIMDE_FLOAT64_C( 883.78), SIMDE_FLOAT64_C( -437.44), SIMDE_FLOAT64_C( -346.84), SIMDE_FLOAT64_C( -402.24), SIMDE_FLOAT64_C( 763.45) }, { SIMDE_FLOAT64_C( -681.75), SIMDE_FLOAT64_C( -625.86), SIMDE_FLOAT64_C( 956.39), SIMDE_FLOAT64_C( 244.73), SIMDE_FLOAT64_C( -242.82), SIMDE_FLOAT64_C( -995.43), SIMDE_FLOAT64_C( 612.23), SIMDE_FLOAT64_C( -21.00) }, { SIMDE_FLOAT64_C( 1034.37), SIMDE_FLOAT64_C( -127.16), SIMDE_FLOAT64_C( 1383.12), SIMDE_FLOAT64_C( 106.22), SIMDE_FLOAT64_C( 112.48), SIMDE_FLOAT64_C( -332.22), SIMDE_FLOAT64_C( -671.54), SIMDE_FLOAT64_C( 763.74) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__m512d r = simde_mm512_mask_hypot_pd(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_invcbrt_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -830.78), SIMDE_FLOAT32_C( 407.78), SIMDE_FLOAT32_C( 34.12), SIMDE_FLOAT32_C( -431.04) }, { SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( -0.13) } }, { { SIMDE_FLOAT32_C( -838.35), SIMDE_FLOAT32_C( -741.30), SIMDE_FLOAT32_C( 354.85), SIMDE_FLOAT32_C( -840.30) }, { SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( -0.11) } }, { { SIMDE_FLOAT32_C( -332.67), SIMDE_FLOAT32_C( 463.71), SIMDE_FLOAT32_C( -606.20), SIMDE_FLOAT32_C( -312.79) }, { SIMDE_FLOAT32_C( -0.14), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( -0.12), SIMDE_FLOAT32_C( -0.15) } }, { { SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( -815.81), SIMDE_FLOAT32_C( -819.10), SIMDE_FLOAT32_C( -853.90) }, { SIMDE_FLOAT32_C( -1.55), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( -0.11) } }, { { SIMDE_FLOAT32_C( -112.18), SIMDE_FLOAT32_C( 14.21), SIMDE_FLOAT32_C( 387.92), SIMDE_FLOAT32_C( -952.65) }, { SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( -0.10) } }, { { SIMDE_FLOAT32_C( -492.35), SIMDE_FLOAT32_C( 204.52), SIMDE_FLOAT32_C( -434.43), SIMDE_FLOAT32_C( 455.92) }, { SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( 0.13) } }, { { SIMDE_FLOAT32_C( -372.57), SIMDE_FLOAT32_C( -697.63), SIMDE_FLOAT32_C( -993.40), SIMDE_FLOAT32_C( 96.43) }, { SIMDE_FLOAT32_C( -0.14), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( 0.22) } }, { { SIMDE_FLOAT32_C( -450.23), SIMDE_FLOAT32_C( 393.40), SIMDE_FLOAT32_C( 531.72), SIMDE_FLOAT32_C( -281.01) }, { SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( -0.15) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_invcbrt_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_invcbrt_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -362.46), SIMDE_FLOAT64_C( 897.33) }, { SIMDE_FLOAT64_C( -0.14), SIMDE_FLOAT64_C( 0.10) } }, { { SIMDE_FLOAT64_C( -324.66), SIMDE_FLOAT64_C( -116.25) }, { SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( -0.20) } }, { { SIMDE_FLOAT64_C( -229.39), SIMDE_FLOAT64_C( -924.64) }, { SIMDE_FLOAT64_C( -0.16), SIMDE_FLOAT64_C( -0.10) } }, { { SIMDE_FLOAT64_C( 619.01), SIMDE_FLOAT64_C( -919.66) }, { SIMDE_FLOAT64_C( 0.12), SIMDE_FLOAT64_C( -0.10) } }, { { SIMDE_FLOAT64_C( -996.99), SIMDE_FLOAT64_C( -352.60) }, { SIMDE_FLOAT64_C( -0.10), SIMDE_FLOAT64_C( -0.14) } }, { { SIMDE_FLOAT64_C( -639.25), SIMDE_FLOAT64_C( 29.93) }, { SIMDE_FLOAT64_C( -0.12), SIMDE_FLOAT64_C( 0.32) } }, { { SIMDE_FLOAT64_C( -468.42), SIMDE_FLOAT64_C( 775.98) }, { SIMDE_FLOAT64_C( -0.13), SIMDE_FLOAT64_C( 0.11) } }, { { SIMDE_FLOAT64_C( -721.32), SIMDE_FLOAT64_C( 122.22) }, { SIMDE_FLOAT64_C( -0.11), SIMDE_FLOAT64_C( 0.20) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d r = simde_mm_invcbrt_pd(a); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_invcbrt_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 91.84), SIMDE_FLOAT32_C( -751.70), SIMDE_FLOAT32_C( 15.02), SIMDE_FLOAT32_C( -388.95), SIMDE_FLOAT32_C( 99.77), SIMDE_FLOAT32_C( 919.81), SIMDE_FLOAT32_C( 65.75), SIMDE_FLOAT32_C( -859.67) }, { SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( -0.14), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( -0.11) } }, { { SIMDE_FLOAT32_C( -294.11), SIMDE_FLOAT32_C( 51.33), SIMDE_FLOAT32_C( -783.32), SIMDE_FLOAT32_C( -179.27), SIMDE_FLOAT32_C( -759.73), SIMDE_FLOAT32_C( -346.33), SIMDE_FLOAT32_C( 701.43), SIMDE_FLOAT32_C( 29.88) }, { SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( -0.14), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.32) } }, { { SIMDE_FLOAT32_C( -448.16), SIMDE_FLOAT32_C( -516.54), SIMDE_FLOAT32_C( -452.98), SIMDE_FLOAT32_C( 948.25), SIMDE_FLOAT32_C( 387.51), SIMDE_FLOAT32_C( 585.82), SIMDE_FLOAT32_C( -920.12), SIMDE_FLOAT32_C( -81.56) }, { SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( -0.12), SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( -0.23) } }, { { SIMDE_FLOAT32_C( -341.26), SIMDE_FLOAT32_C( -436.41), SIMDE_FLOAT32_C( 422.76), SIMDE_FLOAT32_C( -782.86), SIMDE_FLOAT32_C( -131.30), SIMDE_FLOAT32_C( -313.86), SIMDE_FLOAT32_C( 339.30), SIMDE_FLOAT32_C( 960.53) }, { SIMDE_FLOAT32_C( -0.14), SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( -0.20), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 0.10) } }, { { SIMDE_FLOAT32_C( -65.56), SIMDE_FLOAT32_C( -645.68), SIMDE_FLOAT32_C( -428.41), SIMDE_FLOAT32_C( -965.79), SIMDE_FLOAT32_C( -725.86), SIMDE_FLOAT32_C( 637.33), SIMDE_FLOAT32_C( -825.46), SIMDE_FLOAT32_C( -19.97) }, { SIMDE_FLOAT32_C( -0.25), SIMDE_FLOAT32_C( -0.12), SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( -0.37) } }, { { SIMDE_FLOAT32_C( -311.34), SIMDE_FLOAT32_C( -608.78), SIMDE_FLOAT32_C( 800.75), SIMDE_FLOAT32_C( -71.07), SIMDE_FLOAT32_C( 44.89), SIMDE_FLOAT32_C( 502.19), SIMDE_FLOAT32_C( 958.81), SIMDE_FLOAT32_C( 596.72) }, { SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( -0.12), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( -0.24), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.12) } }, { { SIMDE_FLOAT32_C( 985.65), SIMDE_FLOAT32_C( -494.17), SIMDE_FLOAT32_C( 544.98), SIMDE_FLOAT32_C( 373.15), SIMDE_FLOAT32_C( -908.35), SIMDE_FLOAT32_C( 624.86), SIMDE_FLOAT32_C( -708.41), SIMDE_FLOAT32_C( -249.62) }, { SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( -0.16) } }, { { SIMDE_FLOAT32_C( -811.55), SIMDE_FLOAT32_C( 714.36), SIMDE_FLOAT32_C( -32.48), SIMDE_FLOAT32_C( 57.15), SIMDE_FLOAT32_C( -599.50), SIMDE_FLOAT32_C( -693.18), SIMDE_FLOAT32_C( 17.68), SIMDE_FLOAT32_C( 334.94) }, { SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.12), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.14) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_invcbrt_ps(a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_invcbrt_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( -253.42), SIMDE_FLOAT64_C( -775.86), SIMDE_FLOAT64_C( 7.55), SIMDE_FLOAT64_C( 246.09) }, { SIMDE_FLOAT64_C( -0.16), SIMDE_FLOAT64_C( -0.11), SIMDE_FLOAT64_C( 0.51), SIMDE_FLOAT64_C( 0.16) } }, { { SIMDE_FLOAT64_C( -201.99), SIMDE_FLOAT64_C( 0.55), SIMDE_FLOAT64_C( -584.03), SIMDE_FLOAT64_C( -671.92) }, { SIMDE_FLOAT64_C( -0.17), SIMDE_FLOAT64_C( 1.22), SIMDE_FLOAT64_C( -0.12), SIMDE_FLOAT64_C( -0.11) } }, { { SIMDE_FLOAT64_C( 851.57), SIMDE_FLOAT64_C( 459.01), SIMDE_FLOAT64_C( 394.56), SIMDE_FLOAT64_C( 866.29) }, { SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( 0.13), SIMDE_FLOAT64_C( 0.14), SIMDE_FLOAT64_C( 0.10) } }, { { SIMDE_FLOAT64_C( 645.75), SIMDE_FLOAT64_C( 575.99), SIMDE_FLOAT64_C( 41.51), SIMDE_FLOAT64_C( -177.11) }, { SIMDE_FLOAT64_C( 0.12), SIMDE_FLOAT64_C( 0.12), SIMDE_FLOAT64_C( 0.29), SIMDE_FLOAT64_C( -0.18) } }, { { SIMDE_FLOAT64_C( -632.82), SIMDE_FLOAT64_C( 815.53), SIMDE_FLOAT64_C( -21.43), SIMDE_FLOAT64_C( -406.93) }, { SIMDE_FLOAT64_C( -0.12), SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( -0.36), SIMDE_FLOAT64_C( -0.13) } }, { { SIMDE_FLOAT64_C( 471.99), SIMDE_FLOAT64_C( -996.82), SIMDE_FLOAT64_C( -716.04), SIMDE_FLOAT64_C( -550.05) }, { SIMDE_FLOAT64_C( 0.13), SIMDE_FLOAT64_C( -0.10), SIMDE_FLOAT64_C( -0.11), SIMDE_FLOAT64_C( -0.12) } }, { { SIMDE_FLOAT64_C( 564.26), SIMDE_FLOAT64_C( -164.60), SIMDE_FLOAT64_C( -303.42), SIMDE_FLOAT64_C( -304.34) }, { SIMDE_FLOAT64_C( 0.12), SIMDE_FLOAT64_C( -0.18), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( -0.15) } }, { { SIMDE_FLOAT64_C( 749.99), SIMDE_FLOAT64_C( 564.62), SIMDE_FLOAT64_C( -957.88), SIMDE_FLOAT64_C( -503.43) }, { SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( 0.12), SIMDE_FLOAT64_C( -0.10), SIMDE_FLOAT64_C( -0.13) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d r = simde_mm256_invcbrt_pd(a); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_invsqrt_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 963.10), SIMDE_FLOAT32_C( 544.41), SIMDE_FLOAT32_C( 741.04), SIMDE_FLOAT32_C( 478.93) }, { SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.05) } }, { { SIMDE_FLOAT32_C( 289.81), SIMDE_FLOAT32_C( 489.84), SIMDE_FLOAT32_C( 576.93), SIMDE_FLOAT32_C( 960.27) }, { SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.03) } }, { { SIMDE_FLOAT32_C( 308.08), SIMDE_FLOAT32_C( 66.08), SIMDE_FLOAT32_C( 486.27), SIMDE_FLOAT32_C( 318.16) }, { SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.06) } }, { { SIMDE_FLOAT32_C( 848.25), SIMDE_FLOAT32_C( 887.84), SIMDE_FLOAT32_C( 814.84), SIMDE_FLOAT32_C( 533.08) }, { SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.04) } }, { { SIMDE_FLOAT32_C( 476.90), SIMDE_FLOAT32_C( 887.49), SIMDE_FLOAT32_C( 751.34), SIMDE_FLOAT32_C( 508.49) }, { SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.04) } }, { { SIMDE_FLOAT32_C( 679.70), SIMDE_FLOAT32_C( 603.84), SIMDE_FLOAT32_C( 905.34), SIMDE_FLOAT32_C( 39.88) }, { SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.16) } }, { { SIMDE_FLOAT32_C( 629.17), SIMDE_FLOAT32_C( 401.81), SIMDE_FLOAT32_C( 823.42), SIMDE_FLOAT32_C( 435.02) }, { SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.05) } }, { { SIMDE_FLOAT32_C( 727.18), SIMDE_FLOAT32_C( 800.47), SIMDE_FLOAT32_C( 32.70), SIMDE_FLOAT32_C( 690.28) }, { SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 0.04) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_invsqrt_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_invsqrt_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 387.27), SIMDE_FLOAT64_C( 266.58) }, { SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( 0.06) } }, { { SIMDE_FLOAT64_C( 629.96), SIMDE_FLOAT64_C( 591.67) }, { SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.04) } }, { { SIMDE_FLOAT64_C( 185.36), SIMDE_FLOAT64_C( 529.90) }, { SIMDE_FLOAT64_C( 0.07), SIMDE_FLOAT64_C( 0.04) } }, { { SIMDE_FLOAT64_C( 429.91), SIMDE_FLOAT64_C( 539.03) }, { SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( 0.04) } }, { { SIMDE_FLOAT64_C( 626.90), SIMDE_FLOAT64_C( 833.69) }, { SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.03) } }, { { SIMDE_FLOAT64_C( 722.07), SIMDE_FLOAT64_C( 296.55) }, { SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.06) } }, { { SIMDE_FLOAT64_C( 474.49), SIMDE_FLOAT64_C( 271.22) }, { SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( 0.06) } }, { { SIMDE_FLOAT64_C( 980.81), SIMDE_FLOAT64_C( 981.24) }, { SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.03) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d r = simde_mm_invsqrt_pd(a); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_invsqrt_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 523.53), SIMDE_FLOAT32_C( 456.96), SIMDE_FLOAT32_C( 204.64), SIMDE_FLOAT32_C( 395.38), SIMDE_FLOAT32_C( 112.91), SIMDE_FLOAT32_C( 473.53), SIMDE_FLOAT32_C( 965.22), SIMDE_FLOAT32_C( 423.85) }, { SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.05) } }, { { SIMDE_FLOAT32_C( 834.19), SIMDE_FLOAT32_C( 352.97), SIMDE_FLOAT32_C( 156.12), SIMDE_FLOAT32_C( 635.31), SIMDE_FLOAT32_C( 962.63), SIMDE_FLOAT32_C( 823.80), SIMDE_FLOAT32_C( 454.23), SIMDE_FLOAT32_C( 413.73) }, { SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.05) } }, { { SIMDE_FLOAT32_C( 443.70), SIMDE_FLOAT32_C( 770.20), SIMDE_FLOAT32_C( 506.36), SIMDE_FLOAT32_C( 13.18), SIMDE_FLOAT32_C( 957.34), SIMDE_FLOAT32_C( 388.10), SIMDE_FLOAT32_C( 124.63), SIMDE_FLOAT32_C( 5.64) }, { SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.42) } }, { { SIMDE_FLOAT32_C( 141.65), SIMDE_FLOAT32_C( 772.61), SIMDE_FLOAT32_C( 451.36), SIMDE_FLOAT32_C( 350.31), SIMDE_FLOAT32_C( 74.48), SIMDE_FLOAT32_C( 384.43), SIMDE_FLOAT32_C( 380.41), SIMDE_FLOAT32_C( 598.01) }, { SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.04) } }, { { SIMDE_FLOAT32_C( 841.39), SIMDE_FLOAT32_C( 585.05), SIMDE_FLOAT32_C( 993.40), SIMDE_FLOAT32_C( 954.30), SIMDE_FLOAT32_C( 58.58), SIMDE_FLOAT32_C( 958.61), SIMDE_FLOAT32_C( 378.15), SIMDE_FLOAT32_C( 892.77) }, { SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.03) } }, { { SIMDE_FLOAT32_C( 311.58), SIMDE_FLOAT32_C( 534.27), SIMDE_FLOAT32_C( 528.07), SIMDE_FLOAT32_C( 274.21), SIMDE_FLOAT32_C( 358.06), SIMDE_FLOAT32_C( 982.30), SIMDE_FLOAT32_C( 687.94), SIMDE_FLOAT32_C( 801.76) }, { SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.04) } }, { { SIMDE_FLOAT32_C( 752.50), SIMDE_FLOAT32_C( 194.30), SIMDE_FLOAT32_C( 814.95), SIMDE_FLOAT32_C( 709.84), SIMDE_FLOAT32_C( 582.40), SIMDE_FLOAT32_C( 939.58), SIMDE_FLOAT32_C( 715.48), SIMDE_FLOAT32_C( 724.05) }, { SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.04) } }, { { SIMDE_FLOAT32_C( 712.19), SIMDE_FLOAT32_C( 166.84), SIMDE_FLOAT32_C( 74.36), SIMDE_FLOAT32_C( 786.67), SIMDE_FLOAT32_C( 551.27), SIMDE_FLOAT32_C( 454.77), SIMDE_FLOAT32_C( 384.69), SIMDE_FLOAT32_C( 392.66) }, { SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.05) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_invsqrt_ps(a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_invsqrt_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( 35.16), SIMDE_FLOAT64_C( 340.96), SIMDE_FLOAT64_C( 60.32), SIMDE_FLOAT64_C( 560.44) }, { SIMDE_FLOAT64_C( 0.17), SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( 0.13), SIMDE_FLOAT64_C( 0.04) } }, { { SIMDE_FLOAT64_C( 259.52), SIMDE_FLOAT64_C( 415.50), SIMDE_FLOAT64_C( 716.63), SIMDE_FLOAT64_C( 444.07) }, { SIMDE_FLOAT64_C( 0.06), SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.05) } }, { { SIMDE_FLOAT64_C( 714.85), SIMDE_FLOAT64_C( 53.22), SIMDE_FLOAT64_C( 199.06), SIMDE_FLOAT64_C( 714.03) }, { SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.14), SIMDE_FLOAT64_C( 0.07), SIMDE_FLOAT64_C( 0.04) } }, { { SIMDE_FLOAT64_C( 807.60), SIMDE_FLOAT64_C( 19.21), SIMDE_FLOAT64_C( 401.27), SIMDE_FLOAT64_C( 275.62) }, { SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( 0.06) } }, { { SIMDE_FLOAT64_C( 69.48), SIMDE_FLOAT64_C( 716.42), SIMDE_FLOAT64_C( 754.51), SIMDE_FLOAT64_C( 517.80) }, { SIMDE_FLOAT64_C( 0.12), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.04) } }, { { SIMDE_FLOAT64_C( 294.75), SIMDE_FLOAT64_C( 671.92), SIMDE_FLOAT64_C( 712.33), SIMDE_FLOAT64_C( 826.45) }, { SIMDE_FLOAT64_C( 0.06), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.03) } }, { { SIMDE_FLOAT64_C( 47.66), SIMDE_FLOAT64_C( 965.47), SIMDE_FLOAT64_C( 318.45), SIMDE_FLOAT64_C( 190.50) }, { SIMDE_FLOAT64_C( 0.14), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.06), SIMDE_FLOAT64_C( 0.07) } }, { { SIMDE_FLOAT64_C( 58.25), SIMDE_FLOAT64_C( 429.76), SIMDE_FLOAT64_C( 771.19), SIMDE_FLOAT64_C( 93.42) }, { SIMDE_FLOAT64_C( 0.13), SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.10) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d r = simde_mm256_invsqrt_pd(a); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_invsqrt_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 22.96), SIMDE_FLOAT32_C( 915.74), SIMDE_FLOAT32_C( 22.13), SIMDE_FLOAT32_C( 201.67), SIMDE_FLOAT32_C( 223.81), SIMDE_FLOAT32_C( 949.13), SIMDE_FLOAT32_C( 18.28), SIMDE_FLOAT32_C( 237.29), SIMDE_FLOAT32_C( 95.68), SIMDE_FLOAT32_C( 358.07), SIMDE_FLOAT32_C( 974.18), SIMDE_FLOAT32_C( 343.28), SIMDE_FLOAT32_C( 900.66), SIMDE_FLOAT32_C( 905.83), SIMDE_FLOAT32_C( 810.45), SIMDE_FLOAT32_C( 409.74) }, { SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.05) } }, { { SIMDE_FLOAT32_C( 332.59), SIMDE_FLOAT32_C( 299.68), SIMDE_FLOAT32_C( 414.08), SIMDE_FLOAT32_C( 229.81), SIMDE_FLOAT32_C( 905.70), SIMDE_FLOAT32_C( 204.12), SIMDE_FLOAT32_C( 480.98), SIMDE_FLOAT32_C( 846.82), SIMDE_FLOAT32_C( 367.27), SIMDE_FLOAT32_C( 670.54), SIMDE_FLOAT32_C( 936.86), SIMDE_FLOAT32_C( 972.95), SIMDE_FLOAT32_C( 695.70), SIMDE_FLOAT32_C( 781.82), SIMDE_FLOAT32_C( 825.14), SIMDE_FLOAT32_C( 718.66) }, { SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.04) } }, { { SIMDE_FLOAT32_C( 697.56), SIMDE_FLOAT32_C( 847.27), SIMDE_FLOAT32_C( 920.33), SIMDE_FLOAT32_C( 921.36), SIMDE_FLOAT32_C( 796.40), SIMDE_FLOAT32_C( 938.61), SIMDE_FLOAT32_C( 158.65), SIMDE_FLOAT32_C( 892.08), SIMDE_FLOAT32_C( 296.69), SIMDE_FLOAT32_C( 132.83), SIMDE_FLOAT32_C( 235.36), SIMDE_FLOAT32_C( 197.35), SIMDE_FLOAT32_C( 38.67), SIMDE_FLOAT32_C( 45.81), SIMDE_FLOAT32_C( 607.10), SIMDE_FLOAT32_C( 371.26) }, { SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.05) } }, { { SIMDE_FLOAT32_C( 345.49), SIMDE_FLOAT32_C( 21.18), SIMDE_FLOAT32_C( 601.07), SIMDE_FLOAT32_C( 251.19), SIMDE_FLOAT32_C( 225.29), SIMDE_FLOAT32_C( 82.05), SIMDE_FLOAT32_C( 98.01), SIMDE_FLOAT32_C( 592.56), SIMDE_FLOAT32_C( 752.59), SIMDE_FLOAT32_C( 34.87), SIMDE_FLOAT32_C( 565.51), SIMDE_FLOAT32_C( 448.29), SIMDE_FLOAT32_C( 816.69), SIMDE_FLOAT32_C( 390.65), SIMDE_FLOAT32_C( 166.96), SIMDE_FLOAT32_C( 514.24) }, { SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.04) } }, { { SIMDE_FLOAT32_C( 237.92), SIMDE_FLOAT32_C( 87.29), SIMDE_FLOAT32_C( 435.61), SIMDE_FLOAT32_C( 34.32), SIMDE_FLOAT32_C( 25.90), SIMDE_FLOAT32_C( 594.25), SIMDE_FLOAT32_C( 926.40), SIMDE_FLOAT32_C( 322.59), SIMDE_FLOAT32_C( 727.09), SIMDE_FLOAT32_C( 161.76), SIMDE_FLOAT32_C( 519.95), SIMDE_FLOAT32_C( 765.75), SIMDE_FLOAT32_C( 207.57), SIMDE_FLOAT32_C( 127.04), SIMDE_FLOAT32_C( 137.01), SIMDE_FLOAT32_C( 553.06) }, { SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.04) } }, { { SIMDE_FLOAT32_C( 148.22), SIMDE_FLOAT32_C( 738.08), SIMDE_FLOAT32_C( 804.24), SIMDE_FLOAT32_C( 373.51), SIMDE_FLOAT32_C( 820.13), SIMDE_FLOAT32_C( 902.25), SIMDE_FLOAT32_C( 966.07), SIMDE_FLOAT32_C( 572.72), SIMDE_FLOAT32_C( 937.12), SIMDE_FLOAT32_C( 531.58), SIMDE_FLOAT32_C( 21.01), SIMDE_FLOAT32_C( 753.81), SIMDE_FLOAT32_C( 922.24), SIMDE_FLOAT32_C( 187.97), SIMDE_FLOAT32_C( 268.05), SIMDE_FLOAT32_C( 160.16) }, { SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.08) } }, { { SIMDE_FLOAT32_C( 275.26), SIMDE_FLOAT32_C( 703.65), SIMDE_FLOAT32_C( 194.48), SIMDE_FLOAT32_C( 301.16), SIMDE_FLOAT32_C( 297.91), SIMDE_FLOAT32_C( 120.89), SIMDE_FLOAT32_C( 623.76), SIMDE_FLOAT32_C( 25.00), SIMDE_FLOAT32_C( 282.65), SIMDE_FLOAT32_C( 143.70), SIMDE_FLOAT32_C( 790.75), SIMDE_FLOAT32_C( 490.22), SIMDE_FLOAT32_C( 270.74), SIMDE_FLOAT32_C( 927.76), SIMDE_FLOAT32_C( 43.28), SIMDE_FLOAT32_C( 418.96) }, { SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( 0.05) } }, { { SIMDE_FLOAT32_C( 665.84), SIMDE_FLOAT32_C( 847.52), SIMDE_FLOAT32_C( 792.47), SIMDE_FLOAT32_C( 485.97), SIMDE_FLOAT32_C( 749.77), SIMDE_FLOAT32_C( 758.54), SIMDE_FLOAT32_C( 58.69), SIMDE_FLOAT32_C( 686.89), SIMDE_FLOAT32_C( 290.13), SIMDE_FLOAT32_C( 79.70), SIMDE_FLOAT32_C( 440.70), SIMDE_FLOAT32_C( 212.36), SIMDE_FLOAT32_C( 267.67), SIMDE_FLOAT32_C( 708.75), SIMDE_FLOAT32_C( 372.52), SIMDE_FLOAT32_C( 542.93) }, { SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.04) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_invsqrt_ps(a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_invsqrt_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[16]; const simde__mmask8 k; const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 624.14), SIMDE_FLOAT32_C( 819.60), SIMDE_FLOAT32_C( 672.51), SIMDE_FLOAT32_C( 550.11), SIMDE_FLOAT32_C( 812.34), SIMDE_FLOAT32_C( 166.77), SIMDE_FLOAT32_C( 70.17), SIMDE_FLOAT32_C( 377.64), SIMDE_FLOAT32_C( 183.00), SIMDE_FLOAT32_C( 818.17), SIMDE_FLOAT32_C( 404.48), SIMDE_FLOAT32_C( 187.86), SIMDE_FLOAT32_C( 392.86), SIMDE_FLOAT32_C( 212.92), SIMDE_FLOAT32_C( 867.57), SIMDE_FLOAT32_C( 410.64) }, UINT8_C( 3), { SIMDE_FLOAT32_C( 33.63), SIMDE_FLOAT32_C( 77.51), SIMDE_FLOAT32_C( 932.62), SIMDE_FLOAT32_C( 356.45), SIMDE_FLOAT32_C( 533.80), SIMDE_FLOAT32_C( 680.31), SIMDE_FLOAT32_C( 975.45), SIMDE_FLOAT32_C( 578.12), SIMDE_FLOAT32_C( 558.84), SIMDE_FLOAT32_C( 281.04), SIMDE_FLOAT32_C( 747.18), SIMDE_FLOAT32_C( 909.72), SIMDE_FLOAT32_C( 312.02), SIMDE_FLOAT32_C( 748.71), SIMDE_FLOAT32_C( 533.86), SIMDE_FLOAT32_C( 131.63) }, { SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 672.51), SIMDE_FLOAT32_C( 550.11), SIMDE_FLOAT32_C( 812.34), SIMDE_FLOAT32_C( 166.77), SIMDE_FLOAT32_C( 70.17), SIMDE_FLOAT32_C( 377.64), SIMDE_FLOAT32_C( 183.00), SIMDE_FLOAT32_C( 818.17), SIMDE_FLOAT32_C( 404.48), SIMDE_FLOAT32_C( 187.86), SIMDE_FLOAT32_C( 392.86), SIMDE_FLOAT32_C( 212.92), SIMDE_FLOAT32_C( 867.57), SIMDE_FLOAT32_C( 410.64) } }, { { SIMDE_FLOAT32_C( 421.22), SIMDE_FLOAT32_C( 83.97), SIMDE_FLOAT32_C( 943.97), SIMDE_FLOAT32_C( 587.99), SIMDE_FLOAT32_C( 154.14), SIMDE_FLOAT32_C( 321.61), SIMDE_FLOAT32_C( 770.98), SIMDE_FLOAT32_C( 972.32), SIMDE_FLOAT32_C( 726.09), SIMDE_FLOAT32_C( 958.84), SIMDE_FLOAT32_C( 365.17), SIMDE_FLOAT32_C( 939.01), SIMDE_FLOAT32_C( 826.41), SIMDE_FLOAT32_C( 775.81), SIMDE_FLOAT32_C( 236.82), SIMDE_FLOAT32_C( 860.05) }, UINT8_C( 38), { SIMDE_FLOAT32_C( 169.44), SIMDE_FLOAT32_C( 216.49), SIMDE_FLOAT32_C( 387.13), SIMDE_FLOAT32_C( 849.74), SIMDE_FLOAT32_C( 191.94), SIMDE_FLOAT32_C( 965.24), SIMDE_FLOAT32_C( 408.58), SIMDE_FLOAT32_C( 472.98), SIMDE_FLOAT32_C( 712.43), SIMDE_FLOAT32_C( 318.30), SIMDE_FLOAT32_C( 785.00), SIMDE_FLOAT32_C( 461.13), SIMDE_FLOAT32_C( 852.16), SIMDE_FLOAT32_C( 916.63), SIMDE_FLOAT32_C( 882.35), SIMDE_FLOAT32_C( 936.13) }, { SIMDE_FLOAT32_C( 421.22), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 587.99), SIMDE_FLOAT32_C( 154.14), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 770.98), SIMDE_FLOAT32_C( 972.32), SIMDE_FLOAT32_C( 726.09), SIMDE_FLOAT32_C( 958.84), SIMDE_FLOAT32_C( 365.17), SIMDE_FLOAT32_C( 939.01), SIMDE_FLOAT32_C( 826.41), SIMDE_FLOAT32_C( 775.81), SIMDE_FLOAT32_C( 236.82), SIMDE_FLOAT32_C( 860.05) } }, { { SIMDE_FLOAT32_C( 860.60), SIMDE_FLOAT32_C( 470.34), SIMDE_FLOAT32_C( 90.27), SIMDE_FLOAT32_C( 182.21), SIMDE_FLOAT32_C( 241.32), SIMDE_FLOAT32_C( 62.59), SIMDE_FLOAT32_C( 908.29), SIMDE_FLOAT32_C( 200.16), SIMDE_FLOAT32_C( 427.77), SIMDE_FLOAT32_C( 847.30), SIMDE_FLOAT32_C( 26.58), SIMDE_FLOAT32_C( 203.58), SIMDE_FLOAT32_C( 84.12), SIMDE_FLOAT32_C( 886.63), SIMDE_FLOAT32_C( 56.91), SIMDE_FLOAT32_C( 253.56) }, UINT8_C( 27), { SIMDE_FLOAT32_C( 444.03), SIMDE_FLOAT32_C( 103.30), SIMDE_FLOAT32_C( 295.06), SIMDE_FLOAT32_C( 409.28), SIMDE_FLOAT32_C( 511.88), SIMDE_FLOAT32_C( 768.04), SIMDE_FLOAT32_C( 121.70), SIMDE_FLOAT32_C( 830.18), SIMDE_FLOAT32_C( 553.04), SIMDE_FLOAT32_C( 582.83), SIMDE_FLOAT32_C( 682.34), SIMDE_FLOAT32_C( 469.67), SIMDE_FLOAT32_C( 465.19), SIMDE_FLOAT32_C( 618.47), SIMDE_FLOAT32_C( 330.27), SIMDE_FLOAT32_C( 935.53) }, { SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 90.27), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 62.59), SIMDE_FLOAT32_C( 908.29), SIMDE_FLOAT32_C( 200.16), SIMDE_FLOAT32_C( 427.77), SIMDE_FLOAT32_C( 847.30), SIMDE_FLOAT32_C( 26.58), SIMDE_FLOAT32_C( 203.58), SIMDE_FLOAT32_C( 84.12), SIMDE_FLOAT32_C( 886.63), SIMDE_FLOAT32_C( 56.91), SIMDE_FLOAT32_C( 253.56) } }, { { SIMDE_FLOAT32_C( 708.74), SIMDE_FLOAT32_C( 512.48), SIMDE_FLOAT32_C( 176.85), SIMDE_FLOAT32_C( 771.33), SIMDE_FLOAT32_C( 420.77), SIMDE_FLOAT32_C( 377.02), SIMDE_FLOAT32_C( 199.10), SIMDE_FLOAT32_C( 268.07), SIMDE_FLOAT32_C( 403.59), SIMDE_FLOAT32_C( 402.68), SIMDE_FLOAT32_C( 352.19), SIMDE_FLOAT32_C( 290.22), SIMDE_FLOAT32_C( 459.59), SIMDE_FLOAT32_C( 605.74), SIMDE_FLOAT32_C( 393.34), SIMDE_FLOAT32_C( 903.62) }, UINT8_C( 7), { SIMDE_FLOAT32_C( 688.40), SIMDE_FLOAT32_C( 312.89), SIMDE_FLOAT32_C( 220.93), SIMDE_FLOAT32_C( 456.44), SIMDE_FLOAT32_C( 434.59), SIMDE_FLOAT32_C( 51.11), SIMDE_FLOAT32_C( 9.48), SIMDE_FLOAT32_C( 17.43), SIMDE_FLOAT32_C( 733.45), SIMDE_FLOAT32_C( 479.15), SIMDE_FLOAT32_C( 482.62), SIMDE_FLOAT32_C( 351.92), SIMDE_FLOAT32_C( 809.42), SIMDE_FLOAT32_C( 418.14), SIMDE_FLOAT32_C( 60.66), SIMDE_FLOAT32_C( 321.90) }, { SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 771.33), SIMDE_FLOAT32_C( 420.77), SIMDE_FLOAT32_C( 377.02), SIMDE_FLOAT32_C( 199.10), SIMDE_FLOAT32_C( 268.07), SIMDE_FLOAT32_C( 403.59), SIMDE_FLOAT32_C( 402.68), SIMDE_FLOAT32_C( 352.19), SIMDE_FLOAT32_C( 290.22), SIMDE_FLOAT32_C( 459.59), SIMDE_FLOAT32_C( 605.74), SIMDE_FLOAT32_C( 393.34), SIMDE_FLOAT32_C( 903.62) } }, { { SIMDE_FLOAT32_C( 594.99), SIMDE_FLOAT32_C( 832.00), SIMDE_FLOAT32_C( 742.67), SIMDE_FLOAT32_C( 972.01), SIMDE_FLOAT32_C( 31.10), SIMDE_FLOAT32_C( 10.74), SIMDE_FLOAT32_C( 375.60), SIMDE_FLOAT32_C( 433.77), SIMDE_FLOAT32_C( 362.92), SIMDE_FLOAT32_C( 665.82), SIMDE_FLOAT32_C( 893.36), SIMDE_FLOAT32_C( 968.67), SIMDE_FLOAT32_C( 59.16), SIMDE_FLOAT32_C( 796.98), SIMDE_FLOAT32_C( 677.71), SIMDE_FLOAT32_C( 747.56) }, UINT8_C(104), { SIMDE_FLOAT32_C( 898.63), SIMDE_FLOAT32_C( 203.99), SIMDE_FLOAT32_C( 544.46), SIMDE_FLOAT32_C( 949.74), SIMDE_FLOAT32_C( 213.47), SIMDE_FLOAT32_C( 561.89), SIMDE_FLOAT32_C( 683.19), SIMDE_FLOAT32_C( 692.63), SIMDE_FLOAT32_C( 44.51), SIMDE_FLOAT32_C( 35.11), SIMDE_FLOAT32_C( 502.05), SIMDE_FLOAT32_C( 462.65), SIMDE_FLOAT32_C( 95.77), SIMDE_FLOAT32_C( 823.95), SIMDE_FLOAT32_C( 57.64), SIMDE_FLOAT32_C( 927.76) }, { SIMDE_FLOAT32_C( 594.99), SIMDE_FLOAT32_C( 832.00), SIMDE_FLOAT32_C( 742.67), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 31.10), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 433.77), SIMDE_FLOAT32_C( 362.92), SIMDE_FLOAT32_C( 665.82), SIMDE_FLOAT32_C( 893.36), SIMDE_FLOAT32_C( 968.67), SIMDE_FLOAT32_C( 59.16), SIMDE_FLOAT32_C( 796.98), SIMDE_FLOAT32_C( 677.71), SIMDE_FLOAT32_C( 747.56) } }, { { SIMDE_FLOAT32_C( 566.62), SIMDE_FLOAT32_C( 29.65), SIMDE_FLOAT32_C( 958.86), SIMDE_FLOAT32_C( 577.36), SIMDE_FLOAT32_C( 405.26), SIMDE_FLOAT32_C( 392.63), SIMDE_FLOAT32_C( 940.29), SIMDE_FLOAT32_C( 71.08), SIMDE_FLOAT32_C( 285.99), SIMDE_FLOAT32_C( 908.95), SIMDE_FLOAT32_C( 130.24), SIMDE_FLOAT32_C( 82.97), SIMDE_FLOAT32_C( 586.66), SIMDE_FLOAT32_C( 877.80), SIMDE_FLOAT32_C( 192.84), SIMDE_FLOAT32_C( 485.30) }, UINT8_C( 59), { SIMDE_FLOAT32_C( 737.31), SIMDE_FLOAT32_C( 435.04), SIMDE_FLOAT32_C( 295.27), SIMDE_FLOAT32_C( 299.20), SIMDE_FLOAT32_C( 118.23), SIMDE_FLOAT32_C( 987.89), SIMDE_FLOAT32_C( 343.70), SIMDE_FLOAT32_C( 153.34), SIMDE_FLOAT32_C( 489.94), SIMDE_FLOAT32_C( 806.35), SIMDE_FLOAT32_C( 249.11), SIMDE_FLOAT32_C( 313.90), SIMDE_FLOAT32_C( 864.00), SIMDE_FLOAT32_C( 176.87), SIMDE_FLOAT32_C( 880.52), SIMDE_FLOAT32_C( 893.65) }, { SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 958.86), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 940.29), SIMDE_FLOAT32_C( 71.08), SIMDE_FLOAT32_C( 285.99), SIMDE_FLOAT32_C( 908.95), SIMDE_FLOAT32_C( 130.24), SIMDE_FLOAT32_C( 82.97), SIMDE_FLOAT32_C( 586.66), SIMDE_FLOAT32_C( 877.80), SIMDE_FLOAT32_C( 192.84), SIMDE_FLOAT32_C( 485.30) } }, { { SIMDE_FLOAT32_C( 135.73), SIMDE_FLOAT32_C( 457.88), SIMDE_FLOAT32_C( 298.91), SIMDE_FLOAT32_C( 528.36), SIMDE_FLOAT32_C( 398.17), SIMDE_FLOAT32_C( 369.99), SIMDE_FLOAT32_C( 814.36), SIMDE_FLOAT32_C( 307.12), SIMDE_FLOAT32_C( 500.23), SIMDE_FLOAT32_C( 897.33), SIMDE_FLOAT32_C( 893.78), SIMDE_FLOAT32_C( 378.03), SIMDE_FLOAT32_C( 90.17), SIMDE_FLOAT32_C( 379.08), SIMDE_FLOAT32_C( 459.82), SIMDE_FLOAT32_C( 827.48) }, UINT8_C(163), { SIMDE_FLOAT32_C( 755.09), SIMDE_FLOAT32_C( 126.67), SIMDE_FLOAT32_C( 932.35), SIMDE_FLOAT32_C( 742.98), SIMDE_FLOAT32_C( 470.38), SIMDE_FLOAT32_C( 85.68), SIMDE_FLOAT32_C( 232.93), SIMDE_FLOAT32_C( 276.73), SIMDE_FLOAT32_C( 334.79), SIMDE_FLOAT32_C( 546.82), SIMDE_FLOAT32_C( 140.73), SIMDE_FLOAT32_C( 511.66), SIMDE_FLOAT32_C( 427.34), SIMDE_FLOAT32_C( 34.38), SIMDE_FLOAT32_C( 647.39), SIMDE_FLOAT32_C( 885.22) }, { SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 298.91), SIMDE_FLOAT32_C( 528.36), SIMDE_FLOAT32_C( 398.17), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 814.36), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 500.23), SIMDE_FLOAT32_C( 897.33), SIMDE_FLOAT32_C( 893.78), SIMDE_FLOAT32_C( 378.03), SIMDE_FLOAT32_C( 90.17), SIMDE_FLOAT32_C( 379.08), SIMDE_FLOAT32_C( 459.82), SIMDE_FLOAT32_C( 827.48) } }, { { SIMDE_FLOAT32_C( 333.29), SIMDE_FLOAT32_C( 175.75), SIMDE_FLOAT32_C( 283.39), SIMDE_FLOAT32_C( 703.28), SIMDE_FLOAT32_C( 990.11), SIMDE_FLOAT32_C( 590.51), SIMDE_FLOAT32_C( 203.51), SIMDE_FLOAT32_C( 887.44), SIMDE_FLOAT32_C( 484.30), SIMDE_FLOAT32_C( 581.54), SIMDE_FLOAT32_C( 977.62), SIMDE_FLOAT32_C( 863.38), SIMDE_FLOAT32_C( 41.36), SIMDE_FLOAT32_C( 805.09), SIMDE_FLOAT32_C( 677.49), SIMDE_FLOAT32_C( 796.45) }, UINT8_C(166), { SIMDE_FLOAT32_C( 609.84), SIMDE_FLOAT32_C( 539.43), SIMDE_FLOAT32_C( 402.14), SIMDE_FLOAT32_C( 695.53), SIMDE_FLOAT32_C( 772.36), SIMDE_FLOAT32_C( 678.87), SIMDE_FLOAT32_C( 30.32), SIMDE_FLOAT32_C( 319.18), SIMDE_FLOAT32_C( 819.60), SIMDE_FLOAT32_C( 541.97), SIMDE_FLOAT32_C( 746.52), SIMDE_FLOAT32_C( 853.98), SIMDE_FLOAT32_C( 189.36), SIMDE_FLOAT32_C( 631.74), SIMDE_FLOAT32_C( 187.26), SIMDE_FLOAT32_C( 365.12) }, { SIMDE_FLOAT32_C( 333.29), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 703.28), SIMDE_FLOAT32_C( 990.11), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 203.51), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 484.30), SIMDE_FLOAT32_C( 581.54), SIMDE_FLOAT32_C( 977.62), SIMDE_FLOAT32_C( 863.38), SIMDE_FLOAT32_C( 41.36), SIMDE_FLOAT32_C( 805.09), SIMDE_FLOAT32_C( 677.49), SIMDE_FLOAT32_C( 796.45) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_mask_invsqrt_ps(src, test_vec[i].k, a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_invsqrt_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 38.73), SIMDE_FLOAT64_C( 19.20), SIMDE_FLOAT64_C( 260.68), SIMDE_FLOAT64_C( 258.52), SIMDE_FLOAT64_C( 136.00), SIMDE_FLOAT64_C( 121.97), SIMDE_FLOAT64_C( 936.95), SIMDE_FLOAT64_C( 333.67) }, { SIMDE_FLOAT64_C( 0.16), SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 0.06), SIMDE_FLOAT64_C( 0.06), SIMDE_FLOAT64_C( 0.09), SIMDE_FLOAT64_C( 0.09), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.05) } }, { { SIMDE_FLOAT64_C( 609.86), SIMDE_FLOAT64_C( 837.14), SIMDE_FLOAT64_C( 372.68), SIMDE_FLOAT64_C( 549.80), SIMDE_FLOAT64_C( 402.57), SIMDE_FLOAT64_C( 960.80), SIMDE_FLOAT64_C( 489.90), SIMDE_FLOAT64_C( 885.65) }, { SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( 0.03) } }, { { SIMDE_FLOAT64_C( 875.53), SIMDE_FLOAT64_C( 411.92), SIMDE_FLOAT64_C( 548.19), SIMDE_FLOAT64_C( 708.42), SIMDE_FLOAT64_C( 455.90), SIMDE_FLOAT64_C( 110.13), SIMDE_FLOAT64_C( 88.56), SIMDE_FLOAT64_C( 499.24) }, { SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( 0.04) } }, { { SIMDE_FLOAT64_C( 161.32), SIMDE_FLOAT64_C( 442.19), SIMDE_FLOAT64_C( 573.08), SIMDE_FLOAT64_C( 621.10), SIMDE_FLOAT64_C( 338.32), SIMDE_FLOAT64_C( 172.08), SIMDE_FLOAT64_C( 822.98), SIMDE_FLOAT64_C( 377.05) }, { SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.05) } }, { { SIMDE_FLOAT64_C( 191.28), SIMDE_FLOAT64_C( 83.66), SIMDE_FLOAT64_C( 635.57), SIMDE_FLOAT64_C( 327.28), SIMDE_FLOAT64_C( 205.63), SIMDE_FLOAT64_C( 572.53), SIMDE_FLOAT64_C( 660.94), SIMDE_FLOAT64_C( 815.49) }, { SIMDE_FLOAT64_C( 0.07), SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.06), SIMDE_FLOAT64_C( 0.07), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.04) } }, { { SIMDE_FLOAT64_C( 409.67), SIMDE_FLOAT64_C( 33.63), SIMDE_FLOAT64_C( 365.30), SIMDE_FLOAT64_C( 812.24), SIMDE_FLOAT64_C( 994.43), SIMDE_FLOAT64_C( 855.19), SIMDE_FLOAT64_C( 697.89), SIMDE_FLOAT64_C( 869.96) }, { SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( 0.17), SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.03) } }, { { SIMDE_FLOAT64_C( 267.11), SIMDE_FLOAT64_C( 246.07), SIMDE_FLOAT64_C( 578.38), SIMDE_FLOAT64_C( 723.01), SIMDE_FLOAT64_C( 356.21), SIMDE_FLOAT64_C( 666.94), SIMDE_FLOAT64_C( 222.25), SIMDE_FLOAT64_C( 517.53) }, { SIMDE_FLOAT64_C( 0.06), SIMDE_FLOAT64_C( 0.06), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.07), SIMDE_FLOAT64_C( 0.04) } }, { { SIMDE_FLOAT64_C( 109.13), SIMDE_FLOAT64_C( 795.33), SIMDE_FLOAT64_C( 138.62), SIMDE_FLOAT64_C( 447.45), SIMDE_FLOAT64_C( 967.41), SIMDE_FLOAT64_C( 961.61), SIMDE_FLOAT64_C( 824.50), SIMDE_FLOAT64_C( 158.69) }, { SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.08) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_invsqrt_pd(a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_invsqrt_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 784.96), SIMDE_FLOAT64_C( 815.29), SIMDE_FLOAT64_C( 578.00), SIMDE_FLOAT64_C( 693.34), SIMDE_FLOAT64_C( 899.84), SIMDE_FLOAT64_C( 476.45), SIMDE_FLOAT64_C( 558.50), SIMDE_FLOAT64_C( 745.07) }, UINT8_C( 77), { SIMDE_FLOAT64_C( 864.69), SIMDE_FLOAT64_C( 953.84), SIMDE_FLOAT64_C( 134.83), SIMDE_FLOAT64_C( 167.75), SIMDE_FLOAT64_C( 474.65), SIMDE_FLOAT64_C( 536.52), SIMDE_FLOAT64_C( 563.54), SIMDE_FLOAT64_C( 963.69) }, { SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 815.29), SIMDE_FLOAT64_C( 0.09), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 899.84), SIMDE_FLOAT64_C( 476.45), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 745.07) } }, { { SIMDE_FLOAT64_C( 410.86), SIMDE_FLOAT64_C( 470.77), SIMDE_FLOAT64_C( 329.50), SIMDE_FLOAT64_C( 65.82), SIMDE_FLOAT64_C( 510.47), SIMDE_FLOAT64_C( 748.64), SIMDE_FLOAT64_C( 130.13), SIMDE_FLOAT64_C( 819.32) }, UINT8_C(180), { SIMDE_FLOAT64_C( 969.69), SIMDE_FLOAT64_C( 176.66), SIMDE_FLOAT64_C( 270.39), SIMDE_FLOAT64_C( 73.35), SIMDE_FLOAT64_C( 618.94), SIMDE_FLOAT64_C( 55.36), SIMDE_FLOAT64_C( 888.64), SIMDE_FLOAT64_C( 196.94) }, { SIMDE_FLOAT64_C( 410.86), SIMDE_FLOAT64_C( 470.77), SIMDE_FLOAT64_C( 0.06), SIMDE_FLOAT64_C( 65.82), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.13), SIMDE_FLOAT64_C( 130.13), SIMDE_FLOAT64_C( 0.07) } }, { { SIMDE_FLOAT64_C( 748.70), SIMDE_FLOAT64_C( 788.48), SIMDE_FLOAT64_C( 673.39), SIMDE_FLOAT64_C( 307.20), SIMDE_FLOAT64_C( 533.54), SIMDE_FLOAT64_C( 118.92), SIMDE_FLOAT64_C( 171.90), SIMDE_FLOAT64_C( 487.39) }, UINT8_C( 67), { SIMDE_FLOAT64_C( 339.65), SIMDE_FLOAT64_C( 962.04), SIMDE_FLOAT64_C( 790.27), SIMDE_FLOAT64_C( 903.19), SIMDE_FLOAT64_C( 925.73), SIMDE_FLOAT64_C( 201.14), SIMDE_FLOAT64_C( 373.95), SIMDE_FLOAT64_C( 255.23) }, { SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 673.39), SIMDE_FLOAT64_C( 307.20), SIMDE_FLOAT64_C( 533.54), SIMDE_FLOAT64_C( 118.92), SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( 487.39) } }, { { SIMDE_FLOAT64_C( 266.96), SIMDE_FLOAT64_C( 884.43), SIMDE_FLOAT64_C( 3.88), SIMDE_FLOAT64_C( 397.10), SIMDE_FLOAT64_C( 703.75), SIMDE_FLOAT64_C( 335.69), SIMDE_FLOAT64_C( 366.79), SIMDE_FLOAT64_C( 880.41) }, UINT8_C(138), { SIMDE_FLOAT64_C( 440.13), SIMDE_FLOAT64_C( 499.35), SIMDE_FLOAT64_C( 661.44), SIMDE_FLOAT64_C( 328.77), SIMDE_FLOAT64_C( 696.29), SIMDE_FLOAT64_C( 410.14), SIMDE_FLOAT64_C( 117.25), SIMDE_FLOAT64_C( 369.69) }, { SIMDE_FLOAT64_C( 266.96), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 3.88), SIMDE_FLOAT64_C( 0.06), SIMDE_FLOAT64_C( 703.75), SIMDE_FLOAT64_C( 335.69), SIMDE_FLOAT64_C( 366.79), SIMDE_FLOAT64_C( 0.05) } }, { { SIMDE_FLOAT64_C( 717.34), SIMDE_FLOAT64_C( 650.79), SIMDE_FLOAT64_C( 488.60), SIMDE_FLOAT64_C( 889.24), SIMDE_FLOAT64_C( 138.18), SIMDE_FLOAT64_C( 742.35), SIMDE_FLOAT64_C( 228.88), SIMDE_FLOAT64_C( 100.22) }, UINT8_C( 3), { SIMDE_FLOAT64_C( 132.07), SIMDE_FLOAT64_C( 25.94), SIMDE_FLOAT64_C( 733.76), SIMDE_FLOAT64_C( 506.02), SIMDE_FLOAT64_C( 281.17), SIMDE_FLOAT64_C( 0.72), SIMDE_FLOAT64_C( 390.45), SIMDE_FLOAT64_C( 285.05) }, { SIMDE_FLOAT64_C( 0.09), SIMDE_FLOAT64_C( 0.20), SIMDE_FLOAT64_C( 488.60), SIMDE_FLOAT64_C( 889.24), SIMDE_FLOAT64_C( 138.18), SIMDE_FLOAT64_C( 742.35), SIMDE_FLOAT64_C( 228.88), SIMDE_FLOAT64_C( 100.22) } }, { { SIMDE_FLOAT64_C( 397.82), SIMDE_FLOAT64_C( 94.20), SIMDE_FLOAT64_C( 620.74), SIMDE_FLOAT64_C( 764.60), SIMDE_FLOAT64_C( 974.61), SIMDE_FLOAT64_C( 226.82), SIMDE_FLOAT64_C( 204.74), SIMDE_FLOAT64_C( 473.96) }, UINT8_C(205), { SIMDE_FLOAT64_C( 533.51), SIMDE_FLOAT64_C( 170.26), SIMDE_FLOAT64_C( 298.40), SIMDE_FLOAT64_C( 650.76), SIMDE_FLOAT64_C( 539.94), SIMDE_FLOAT64_C( 15.74), SIMDE_FLOAT64_C( 301.54), SIMDE_FLOAT64_C( 28.54) }, { SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 94.20), SIMDE_FLOAT64_C( 0.06), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 974.61), SIMDE_FLOAT64_C( 226.82), SIMDE_FLOAT64_C( 0.06), SIMDE_FLOAT64_C( 0.19) } }, { { SIMDE_FLOAT64_C( 904.98), SIMDE_FLOAT64_C( 439.72), SIMDE_FLOAT64_C( 770.90), SIMDE_FLOAT64_C( 133.86), SIMDE_FLOAT64_C( 539.94), SIMDE_FLOAT64_C( 303.52), SIMDE_FLOAT64_C( 265.93), SIMDE_FLOAT64_C( 565.88) }, UINT8_C( 41), { SIMDE_FLOAT64_C( 771.96), SIMDE_FLOAT64_C( 847.05), SIMDE_FLOAT64_C( 38.01), SIMDE_FLOAT64_C( 162.41), SIMDE_FLOAT64_C( 132.10), SIMDE_FLOAT64_C( 435.83), SIMDE_FLOAT64_C( 256.61), SIMDE_FLOAT64_C( 752.84) }, { SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 439.72), SIMDE_FLOAT64_C( 770.90), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 539.94), SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( 265.93), SIMDE_FLOAT64_C( 565.88) } }, { { SIMDE_FLOAT64_C( 200.43), SIMDE_FLOAT64_C( 231.22), SIMDE_FLOAT64_C( 979.66), SIMDE_FLOAT64_C( 405.17), SIMDE_FLOAT64_C( 705.18), SIMDE_FLOAT64_C( 867.92), SIMDE_FLOAT64_C( 938.68), SIMDE_FLOAT64_C( 875.43) }, UINT8_C( 32), { SIMDE_FLOAT64_C( 589.43), SIMDE_FLOAT64_C( 415.38), SIMDE_FLOAT64_C( 182.05), SIMDE_FLOAT64_C( 890.98), SIMDE_FLOAT64_C( 443.92), SIMDE_FLOAT64_C( 87.03), SIMDE_FLOAT64_C( 330.70), SIMDE_FLOAT64_C( 214.82) }, { SIMDE_FLOAT64_C( 200.43), SIMDE_FLOAT64_C( 231.22), SIMDE_FLOAT64_C( 979.66), SIMDE_FLOAT64_C( 405.17), SIMDE_FLOAT64_C( 705.18), SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( 938.68), SIMDE_FLOAT64_C( 875.43) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_mask_invsqrt_pd(src, test_vec[i].k, a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_log_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 4068.94), SIMDE_FLOAT32_C( 5195.06), SIMDE_FLOAT32_C( 1228.12), SIMDE_FLOAT32_C( 6733.16)), simde_mm_set_ps(SIMDE_FLOAT32_C( 8.31), SIMDE_FLOAT32_C( 8.56), SIMDE_FLOAT32_C( 7.11), SIMDE_FLOAT32_C( 8.81)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 7486.55), SIMDE_FLOAT32_C( 8351.20), SIMDE_FLOAT32_C( 3512.77), SIMDE_FLOAT32_C( 5170.29)), simde_mm_set_ps(SIMDE_FLOAT32_C( 8.92), SIMDE_FLOAT32_C( 9.03), SIMDE_FLOAT32_C( 8.16), SIMDE_FLOAT32_C( 8.55)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 9127.65), SIMDE_FLOAT32_C( 7111.03), SIMDE_FLOAT32_C( 3652.77), SIMDE_FLOAT32_C( 7338.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( 9.12), SIMDE_FLOAT32_C( 8.87), SIMDE_FLOAT32_C( 8.20), SIMDE_FLOAT32_C( 8.90)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 1609.14), SIMDE_FLOAT32_C( 1569.36), SIMDE_FLOAT32_C( 5423.87), SIMDE_FLOAT32_C( 7857.29)), simde_mm_set_ps(SIMDE_FLOAT32_C( 7.38), SIMDE_FLOAT32_C( 7.36), SIMDE_FLOAT32_C( 8.60), SIMDE_FLOAT32_C( 8.97)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 3474.63), SIMDE_FLOAT32_C( 695.25), SIMDE_FLOAT32_C( 2912.29), SIMDE_FLOAT32_C( 8484.34)), simde_mm_set_ps(SIMDE_FLOAT32_C( 8.15), SIMDE_FLOAT32_C( 6.54), SIMDE_FLOAT32_C( 7.98), SIMDE_FLOAT32_C( 9.05)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 2775.95), SIMDE_FLOAT32_C( 5142.35), SIMDE_FLOAT32_C( 3079.83), SIMDE_FLOAT32_C( 381.82)), simde_mm_set_ps(SIMDE_FLOAT32_C( 7.93), SIMDE_FLOAT32_C( 8.55), SIMDE_FLOAT32_C( 8.03), SIMDE_FLOAT32_C( 5.94)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 6306.54), SIMDE_FLOAT32_C( 3937.29), SIMDE_FLOAT32_C( 117.23), SIMDE_FLOAT32_C( 1696.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( 8.75), SIMDE_FLOAT32_C( 8.28), SIMDE_FLOAT32_C( 4.76), SIMDE_FLOAT32_C( 7.44)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 5890.98), SIMDE_FLOAT32_C( 2746.67), SIMDE_FLOAT32_C( 6166.85), SIMDE_FLOAT32_C( 8435.45)), simde_mm_set_ps(SIMDE_FLOAT32_C( 8.68), SIMDE_FLOAT32_C( 7.92), SIMDE_FLOAT32_C( 8.73), SIMDE_FLOAT32_C( 9.04)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_log_ps(test_vec[i].a); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_log_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 1228.12), SIMDE_FLOAT64_C( 6733.16)), simde_mm_set_pd(SIMDE_FLOAT64_C( 7.11), SIMDE_FLOAT64_C( 8.81)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 4068.94), SIMDE_FLOAT64_C( 5195.06)), simde_mm_set_pd(SIMDE_FLOAT64_C( 8.31), SIMDE_FLOAT64_C( 8.56)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 3512.77), SIMDE_FLOAT64_C( 5170.29)), simde_mm_set_pd(SIMDE_FLOAT64_C( 8.16), SIMDE_FLOAT64_C( 8.55)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 7486.55), SIMDE_FLOAT64_C( 8351.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( 8.92), SIMDE_FLOAT64_C( 9.03)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 3652.77), SIMDE_FLOAT64_C( 7338.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( 8.20), SIMDE_FLOAT64_C( 8.90)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 9127.65), SIMDE_FLOAT64_C( 7111.03)), simde_mm_set_pd(SIMDE_FLOAT64_C( 9.12), SIMDE_FLOAT64_C( 8.87)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 5423.87), SIMDE_FLOAT64_C( 7857.29)), simde_mm_set_pd(SIMDE_FLOAT64_C( 8.60), SIMDE_FLOAT64_C( 8.97)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 1609.14), SIMDE_FLOAT64_C( 1569.36)), simde_mm_set_pd(SIMDE_FLOAT64_C( 7.38), SIMDE_FLOAT64_C( 7.36)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_log_pd(test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_log_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 7486.55), SIMDE_FLOAT32_C( 8351.20), SIMDE_FLOAT32_C( 3512.77), SIMDE_FLOAT32_C( 5170.29), SIMDE_FLOAT32_C( 4068.94), SIMDE_FLOAT32_C( 5195.06), SIMDE_FLOAT32_C( 1228.12), SIMDE_FLOAT32_C( 6733.16)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 8.92), SIMDE_FLOAT32_C( 9.03), SIMDE_FLOAT32_C( 8.16), SIMDE_FLOAT32_C( 8.55), SIMDE_FLOAT32_C( 8.31), SIMDE_FLOAT32_C( 8.56), SIMDE_FLOAT32_C( 7.11), SIMDE_FLOAT32_C( 8.81)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 1609.14), SIMDE_FLOAT32_C( 1569.36), SIMDE_FLOAT32_C( 5423.87), SIMDE_FLOAT32_C( 7857.29), SIMDE_FLOAT32_C( 9127.65), SIMDE_FLOAT32_C( 7111.03), SIMDE_FLOAT32_C( 3652.77), SIMDE_FLOAT32_C( 7338.80)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 7.38), SIMDE_FLOAT32_C( 7.36), SIMDE_FLOAT32_C( 8.60), SIMDE_FLOAT32_C( 8.97), SIMDE_FLOAT32_C( 9.12), SIMDE_FLOAT32_C( 8.87), SIMDE_FLOAT32_C( 8.20), SIMDE_FLOAT32_C( 8.90)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 2775.95), SIMDE_FLOAT32_C( 5142.35), SIMDE_FLOAT32_C( 3079.83), SIMDE_FLOAT32_C( 381.82), SIMDE_FLOAT32_C( 3474.63), SIMDE_FLOAT32_C( 695.25), SIMDE_FLOAT32_C( 2912.29), SIMDE_FLOAT32_C( 8484.34)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 7.93), SIMDE_FLOAT32_C( 8.55), SIMDE_FLOAT32_C( 8.03), SIMDE_FLOAT32_C( 5.94), SIMDE_FLOAT32_C( 8.15), SIMDE_FLOAT32_C( 6.54), SIMDE_FLOAT32_C( 7.98), SIMDE_FLOAT32_C( 9.05)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 5890.98), SIMDE_FLOAT32_C( 2746.67), SIMDE_FLOAT32_C( 6166.85), SIMDE_FLOAT32_C( 8435.45), SIMDE_FLOAT32_C( 6306.54), SIMDE_FLOAT32_C( 3937.29), SIMDE_FLOAT32_C( 117.23), SIMDE_FLOAT32_C( 1696.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 8.68), SIMDE_FLOAT32_C( 7.92), SIMDE_FLOAT32_C( 8.73), SIMDE_FLOAT32_C( 9.04), SIMDE_FLOAT32_C( 8.75), SIMDE_FLOAT32_C( 8.28), SIMDE_FLOAT32_C( 4.76), SIMDE_FLOAT32_C( 7.44)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 1148.23), SIMDE_FLOAT32_C( 7217.40), SIMDE_FLOAT32_C( 2082.02), SIMDE_FLOAT32_C( 6902.28), SIMDE_FLOAT32_C( 1146.40), SIMDE_FLOAT32_C( 9969.51), SIMDE_FLOAT32_C( 5140.40), SIMDE_FLOAT32_C( 9206.03)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 7.05), SIMDE_FLOAT32_C( 8.88), SIMDE_FLOAT32_C( 7.64), SIMDE_FLOAT32_C( 8.84), SIMDE_FLOAT32_C( 7.04), SIMDE_FLOAT32_C( 9.21), SIMDE_FLOAT32_C( 8.54), SIMDE_FLOAT32_C( 9.13)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 3060.52), SIMDE_FLOAT32_C( 6979.60), SIMDE_FLOAT32_C( 8279.36), SIMDE_FLOAT32_C( 6696.04), SIMDE_FLOAT32_C( 7661.76), SIMDE_FLOAT32_C( 3680.04), SIMDE_FLOAT32_C( 8903.22), SIMDE_FLOAT32_C( 4846.05)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 8.03), SIMDE_FLOAT32_C( 8.85), SIMDE_FLOAT32_C( 9.02), SIMDE_FLOAT32_C( 8.81), SIMDE_FLOAT32_C( 8.94), SIMDE_FLOAT32_C( 8.21), SIMDE_FLOAT32_C( 9.09), SIMDE_FLOAT32_C( 8.49)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 3981.75), SIMDE_FLOAT32_C( 4596.36), SIMDE_FLOAT32_C( 6683.64), SIMDE_FLOAT32_C( 276.11), SIMDE_FLOAT32_C( 1262.07), SIMDE_FLOAT32_C( 1163.84), SIMDE_FLOAT32_C( 2229.06), SIMDE_FLOAT32_C( 6994.08)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 8.29), SIMDE_FLOAT32_C( 8.43), SIMDE_FLOAT32_C( 8.81), SIMDE_FLOAT32_C( 5.62), SIMDE_FLOAT32_C( 7.14), SIMDE_FLOAT32_C( 7.06), SIMDE_FLOAT32_C( 7.71), SIMDE_FLOAT32_C( 8.85)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 7348.31), SIMDE_FLOAT32_C( 8400.08), SIMDE_FLOAT32_C( 4256.55), SIMDE_FLOAT32_C( 9093.31), SIMDE_FLOAT32_C( 9550.14), SIMDE_FLOAT32_C( 8002.34), SIMDE_FLOAT32_C( 8956.15), SIMDE_FLOAT32_C( 6271.53)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 8.90), SIMDE_FLOAT32_C( 9.04), SIMDE_FLOAT32_C( 8.36), SIMDE_FLOAT32_C( 9.12), SIMDE_FLOAT32_C( 9.16), SIMDE_FLOAT32_C( 8.99), SIMDE_FLOAT32_C( 9.10), SIMDE_FLOAT32_C( 8.74)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_log_ps(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_log_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 4068.94), SIMDE_FLOAT64_C( 5195.06), SIMDE_FLOAT64_C( 1228.12), SIMDE_FLOAT64_C( 6733.16)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 8.31), SIMDE_FLOAT64_C( 8.56), SIMDE_FLOAT64_C( 7.11), SIMDE_FLOAT64_C( 8.81)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 7486.55), SIMDE_FLOAT64_C( 8351.20), SIMDE_FLOAT64_C( 3512.77), SIMDE_FLOAT64_C( 5170.29)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 8.92), SIMDE_FLOAT64_C( 9.03), SIMDE_FLOAT64_C( 8.16), SIMDE_FLOAT64_C( 8.55)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 9127.65), SIMDE_FLOAT64_C( 7111.03), SIMDE_FLOAT64_C( 3652.77), SIMDE_FLOAT64_C( 7338.80)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 9.12), SIMDE_FLOAT64_C( 8.87), SIMDE_FLOAT64_C( 8.20), SIMDE_FLOAT64_C( 8.90)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 1609.14), SIMDE_FLOAT64_C( 1569.36), SIMDE_FLOAT64_C( 5423.87), SIMDE_FLOAT64_C( 7857.29)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 7.38), SIMDE_FLOAT64_C( 7.36), SIMDE_FLOAT64_C( 8.60), SIMDE_FLOAT64_C( 8.97)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 3474.63), SIMDE_FLOAT64_C( 695.25), SIMDE_FLOAT64_C( 2912.29), SIMDE_FLOAT64_C( 8484.34)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 8.15), SIMDE_FLOAT64_C( 6.54), SIMDE_FLOAT64_C( 7.98), SIMDE_FLOAT64_C( 9.05)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 2775.95), SIMDE_FLOAT64_C( 5142.35), SIMDE_FLOAT64_C( 3079.83), SIMDE_FLOAT64_C( 381.82)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 7.93), SIMDE_FLOAT64_C( 8.55), SIMDE_FLOAT64_C( 8.03), SIMDE_FLOAT64_C( 5.94)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 6306.54), SIMDE_FLOAT64_C( 3937.29), SIMDE_FLOAT64_C( 117.23), SIMDE_FLOAT64_C( 1696.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 8.75), SIMDE_FLOAT64_C( 8.28), SIMDE_FLOAT64_C( 4.76), SIMDE_FLOAT64_C( 7.44)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 5890.98), SIMDE_FLOAT64_C( 2746.67), SIMDE_FLOAT64_C( 6166.85), SIMDE_FLOAT64_C( 8435.45)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 8.68), SIMDE_FLOAT64_C( 7.92), SIMDE_FLOAT64_C( 8.73), SIMDE_FLOAT64_C( 9.04)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_log_pd(test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_log_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( 1609.14), SIMDE_FLOAT32_C( 1569.36), SIMDE_FLOAT32_C( 5423.87), SIMDE_FLOAT32_C( 7857.29), SIMDE_FLOAT32_C( 9127.65), SIMDE_FLOAT32_C( 7111.03), SIMDE_FLOAT32_C( 3652.77), SIMDE_FLOAT32_C( 7338.80), SIMDE_FLOAT32_C( 7486.55), SIMDE_FLOAT32_C( 8351.20), SIMDE_FLOAT32_C( 3512.77), SIMDE_FLOAT32_C( 5170.29), SIMDE_FLOAT32_C( 4068.94), SIMDE_FLOAT32_C( 5195.06), SIMDE_FLOAT32_C( 1228.12), SIMDE_FLOAT32_C( 6733.16)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 7.38), SIMDE_FLOAT32_C( 7.36), SIMDE_FLOAT32_C( 8.60), SIMDE_FLOAT32_C( 8.97), SIMDE_FLOAT32_C( 9.12), SIMDE_FLOAT32_C( 8.87), SIMDE_FLOAT32_C( 8.20), SIMDE_FLOAT32_C( 8.90), SIMDE_FLOAT32_C( 8.92), SIMDE_FLOAT32_C( 9.03), SIMDE_FLOAT32_C( 8.16), SIMDE_FLOAT32_C( 8.55), SIMDE_FLOAT32_C( 8.31), SIMDE_FLOAT32_C( 8.56), SIMDE_FLOAT32_C( 7.11), SIMDE_FLOAT32_C( 8.81)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 5890.98), SIMDE_FLOAT32_C( 2746.67), SIMDE_FLOAT32_C( 6166.85), SIMDE_FLOAT32_C( 8435.45), SIMDE_FLOAT32_C( 6306.54), SIMDE_FLOAT32_C( 3937.29), SIMDE_FLOAT32_C( 117.23), SIMDE_FLOAT32_C( 1696.00), SIMDE_FLOAT32_C( 2775.95), SIMDE_FLOAT32_C( 5142.35), SIMDE_FLOAT32_C( 3079.83), SIMDE_FLOAT32_C( 381.82), SIMDE_FLOAT32_C( 3474.63), SIMDE_FLOAT32_C( 695.25), SIMDE_FLOAT32_C( 2912.29), SIMDE_FLOAT32_C( 8484.34)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 8.68), SIMDE_FLOAT32_C( 7.92), SIMDE_FLOAT32_C( 8.73), SIMDE_FLOAT32_C( 9.04), SIMDE_FLOAT32_C( 8.75), SIMDE_FLOAT32_C( 8.28), SIMDE_FLOAT32_C( 4.76), SIMDE_FLOAT32_C( 7.44), SIMDE_FLOAT32_C( 7.93), SIMDE_FLOAT32_C( 8.55), SIMDE_FLOAT32_C( 8.03), SIMDE_FLOAT32_C( 5.94), SIMDE_FLOAT32_C( 8.15), SIMDE_FLOAT32_C( 6.54), SIMDE_FLOAT32_C( 7.98), SIMDE_FLOAT32_C( 9.05)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 3060.52), SIMDE_FLOAT32_C( 6979.60), SIMDE_FLOAT32_C( 8279.36), SIMDE_FLOAT32_C( 6696.04), SIMDE_FLOAT32_C( 7661.76), SIMDE_FLOAT32_C( 3680.04), SIMDE_FLOAT32_C( 8903.22), SIMDE_FLOAT32_C( 4846.05), SIMDE_FLOAT32_C( 1148.23), SIMDE_FLOAT32_C( 7217.40), SIMDE_FLOAT32_C( 2082.02), SIMDE_FLOAT32_C( 6902.28), SIMDE_FLOAT32_C( 1146.40), SIMDE_FLOAT32_C( 9969.51), SIMDE_FLOAT32_C( 5140.40), SIMDE_FLOAT32_C( 9206.03)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 8.03), SIMDE_FLOAT32_C( 8.85), SIMDE_FLOAT32_C( 9.02), SIMDE_FLOAT32_C( 8.81), SIMDE_FLOAT32_C( 8.94), SIMDE_FLOAT32_C( 8.21), SIMDE_FLOAT32_C( 9.09), SIMDE_FLOAT32_C( 8.49), SIMDE_FLOAT32_C( 7.05), SIMDE_FLOAT32_C( 8.88), SIMDE_FLOAT32_C( 7.64), SIMDE_FLOAT32_C( 8.84), SIMDE_FLOAT32_C( 7.04), SIMDE_FLOAT32_C( 9.21), SIMDE_FLOAT32_C( 8.54), SIMDE_FLOAT32_C( 9.13)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 7348.31), SIMDE_FLOAT32_C( 8400.08), SIMDE_FLOAT32_C( 4256.55), SIMDE_FLOAT32_C( 9093.31), SIMDE_FLOAT32_C( 9550.14), SIMDE_FLOAT32_C( 8002.34), SIMDE_FLOAT32_C( 8956.15), SIMDE_FLOAT32_C( 6271.53), SIMDE_FLOAT32_C( 3981.75), SIMDE_FLOAT32_C( 4596.36), SIMDE_FLOAT32_C( 6683.64), SIMDE_FLOAT32_C( 276.11), SIMDE_FLOAT32_C( 1262.07), SIMDE_FLOAT32_C( 1163.84), SIMDE_FLOAT32_C( 2229.06), SIMDE_FLOAT32_C( 6994.08)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 8.90), SIMDE_FLOAT32_C( 9.04), SIMDE_FLOAT32_C( 8.36), SIMDE_FLOAT32_C( 9.12), SIMDE_FLOAT32_C( 9.16), SIMDE_FLOAT32_C( 8.99), SIMDE_FLOAT32_C( 9.10), SIMDE_FLOAT32_C( 8.74), SIMDE_FLOAT32_C( 8.29), SIMDE_FLOAT32_C( 8.43), SIMDE_FLOAT32_C( 8.81), SIMDE_FLOAT32_C( 5.62), SIMDE_FLOAT32_C( 7.14), SIMDE_FLOAT32_C( 7.06), SIMDE_FLOAT32_C( 7.71), SIMDE_FLOAT32_C( 8.85)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 4105.04), SIMDE_FLOAT32_C( 8793.93), SIMDE_FLOAT32_C( 6623.12), SIMDE_FLOAT32_C( 6717.40), SIMDE_FLOAT32_C( 628.43), SIMDE_FLOAT32_C( 1010.42), SIMDE_FLOAT32_C( 3357.32), SIMDE_FLOAT32_C( 2370.85), SIMDE_FLOAT32_C( 4038.44), SIMDE_FLOAT32_C( 886.73), SIMDE_FLOAT32_C( 7806.81), SIMDE_FLOAT32_C( 8278.35), SIMDE_FLOAT32_C( 4645.43), SIMDE_FLOAT32_C( 7716.73), SIMDE_FLOAT32_C( 5603.27), SIMDE_FLOAT32_C( 4142.45)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 8.32), SIMDE_FLOAT32_C( 9.08), SIMDE_FLOAT32_C( 8.80), SIMDE_FLOAT32_C( 8.81), SIMDE_FLOAT32_C( 6.44), SIMDE_FLOAT32_C( 6.92), SIMDE_FLOAT32_C( 8.12), SIMDE_FLOAT32_C( 7.77), SIMDE_FLOAT32_C( 8.30), SIMDE_FLOAT32_C( 6.79), SIMDE_FLOAT32_C( 8.96), SIMDE_FLOAT32_C( 9.02), SIMDE_FLOAT32_C( 8.44), SIMDE_FLOAT32_C( 8.95), SIMDE_FLOAT32_C( 8.63), SIMDE_FLOAT32_C( 8.33)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 8450.59), SIMDE_FLOAT32_C( 9203.26), SIMDE_FLOAT32_C( 4894.53), SIMDE_FLOAT32_C( 2042.18), SIMDE_FLOAT32_C( 2755.53), SIMDE_FLOAT32_C( 8657.47), SIMDE_FLOAT32_C( 7528.93), SIMDE_FLOAT32_C( 8118.50), SIMDE_FLOAT32_C( 9155.11), SIMDE_FLOAT32_C( 5703.37), SIMDE_FLOAT32_C( 9886.80), SIMDE_FLOAT32_C( 469.19), SIMDE_FLOAT32_C( 6656.71), SIMDE_FLOAT32_C( 5499.67), SIMDE_FLOAT32_C( 7314.76), SIMDE_FLOAT32_C( 1309.05)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 9.04), SIMDE_FLOAT32_C( 9.13), SIMDE_FLOAT32_C( 8.50), SIMDE_FLOAT32_C( 7.62), SIMDE_FLOAT32_C( 7.92), SIMDE_FLOAT32_C( 9.07), SIMDE_FLOAT32_C( 8.93), SIMDE_FLOAT32_C( 9.00), SIMDE_FLOAT32_C( 9.12), SIMDE_FLOAT32_C( 8.65), SIMDE_FLOAT32_C( 9.20), SIMDE_FLOAT32_C( 6.15), SIMDE_FLOAT32_C( 8.80), SIMDE_FLOAT32_C( 8.61), SIMDE_FLOAT32_C( 8.90), SIMDE_FLOAT32_C( 7.18)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 1154.54), SIMDE_FLOAT32_C( 9110.29), SIMDE_FLOAT32_C( 2130.97), SIMDE_FLOAT32_C( 11.83), SIMDE_FLOAT32_C( 3312.02), SIMDE_FLOAT32_C( 9618.20), SIMDE_FLOAT32_C( 6468.19), SIMDE_FLOAT32_C( 1159.42), SIMDE_FLOAT32_C( 2118.90), SIMDE_FLOAT32_C( 4661.80), SIMDE_FLOAT32_C( 8551.88), SIMDE_FLOAT32_C( 9887.44), SIMDE_FLOAT32_C( 1217.92), SIMDE_FLOAT32_C( 7124.06), SIMDE_FLOAT32_C( 5136.26), SIMDE_FLOAT32_C( 4524.23)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 7.05), SIMDE_FLOAT32_C( 9.12), SIMDE_FLOAT32_C( 7.66), SIMDE_FLOAT32_C( 2.47), SIMDE_FLOAT32_C( 8.11), SIMDE_FLOAT32_C( 9.17), SIMDE_FLOAT32_C( 8.77), SIMDE_FLOAT32_C( 7.06), SIMDE_FLOAT32_C( 7.66), SIMDE_FLOAT32_C( 8.45), SIMDE_FLOAT32_C( 9.05), SIMDE_FLOAT32_C( 9.20), SIMDE_FLOAT32_C( 7.10), SIMDE_FLOAT32_C( 8.87), SIMDE_FLOAT32_C( 8.54), SIMDE_FLOAT32_C( 8.42)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 2809.03), SIMDE_FLOAT32_C( 3201.22), SIMDE_FLOAT32_C( 1237.85), SIMDE_FLOAT32_C( 4831.67), SIMDE_FLOAT32_C( 9663.28), SIMDE_FLOAT32_C( 5036.36), SIMDE_FLOAT32_C( 3363.90), SIMDE_FLOAT32_C( 4374.02), SIMDE_FLOAT32_C( 4087.77), SIMDE_FLOAT32_C( 5199.67), SIMDE_FLOAT32_C( 7554.25), SIMDE_FLOAT32_C( 6973.34), SIMDE_FLOAT32_C( 5071.68), SIMDE_FLOAT32_C( 3476.37), SIMDE_FLOAT32_C( 9581.30), SIMDE_FLOAT32_C( 1516.57)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 7.94), SIMDE_FLOAT32_C( 8.07), SIMDE_FLOAT32_C( 7.12), SIMDE_FLOAT32_C( 8.48), SIMDE_FLOAT32_C( 9.18), SIMDE_FLOAT32_C( 8.52), SIMDE_FLOAT32_C( 8.12), SIMDE_FLOAT32_C( 8.38), SIMDE_FLOAT32_C( 8.32), SIMDE_FLOAT32_C( 8.56), SIMDE_FLOAT32_C( 8.93), SIMDE_FLOAT32_C( 8.85), SIMDE_FLOAT32_C( 8.53), SIMDE_FLOAT32_C( 8.15), SIMDE_FLOAT32_C( 9.17), SIMDE_FLOAT32_C( 7.32)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_log_ps(test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_log_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 src; simde__mmask16 k; simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( 2746.67), SIMDE_FLOAT32_C( 8435.45), SIMDE_FLOAT32_C( 3937.29), SIMDE_FLOAT32_C( 1696.00), SIMDE_FLOAT32_C( 5142.35), SIMDE_FLOAT32_C( 381.82), SIMDE_FLOAT32_C( 695.25), SIMDE_FLOAT32_C( 8484.34), SIMDE_FLOAT32_C( 1569.36), SIMDE_FLOAT32_C( 7857.29), SIMDE_FLOAT32_C( 7111.03), SIMDE_FLOAT32_C( 7338.80), SIMDE_FLOAT32_C( 8351.20), SIMDE_FLOAT32_C( 5170.29), SIMDE_FLOAT32_C( 5195.06), SIMDE_FLOAT32_C( 6733.16)), UINT16_C(41466), simde_mm512_set_ps(SIMDE_FLOAT32_C( 5890.98), SIMDE_FLOAT32_C( 6166.85), SIMDE_FLOAT32_C( 6306.54), SIMDE_FLOAT32_C( 117.23), SIMDE_FLOAT32_C( 2775.95), SIMDE_FLOAT32_C( 3079.83), SIMDE_FLOAT32_C( 3474.63), SIMDE_FLOAT32_C( 2912.29), SIMDE_FLOAT32_C( 1609.14), SIMDE_FLOAT32_C( 5423.87), SIMDE_FLOAT32_C( 9127.65), SIMDE_FLOAT32_C( 3652.77), SIMDE_FLOAT32_C( 7486.55), SIMDE_FLOAT32_C( 3512.77), SIMDE_FLOAT32_C( 4068.94), SIMDE_FLOAT32_C( 1228.12)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 8.68), SIMDE_FLOAT32_C( 8435.45), SIMDE_FLOAT32_C( 8.75), SIMDE_FLOAT32_C( 1696.00), SIMDE_FLOAT32_C( 5142.35), SIMDE_FLOAT32_C( 381.82), SIMDE_FLOAT32_C( 695.25), SIMDE_FLOAT32_C( 7.98), SIMDE_FLOAT32_C( 7.38), SIMDE_FLOAT32_C( 8.60), SIMDE_FLOAT32_C( 9.12), SIMDE_FLOAT32_C( 8.20), SIMDE_FLOAT32_C( 8.92), SIMDE_FLOAT32_C( 5170.29), SIMDE_FLOAT32_C( 8.31), SIMDE_FLOAT32_C( 6733.16)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 7348.31), SIMDE_FLOAT32_C( 4256.55), SIMDE_FLOAT32_C( 9550.14), SIMDE_FLOAT32_C( 8956.15), SIMDE_FLOAT32_C( 3981.75), SIMDE_FLOAT32_C( 6683.64), SIMDE_FLOAT32_C( 1262.07), SIMDE_FLOAT32_C( 2229.06), SIMDE_FLOAT32_C( 3060.52), SIMDE_FLOAT32_C( 8279.36), SIMDE_FLOAT32_C( 7661.76), SIMDE_FLOAT32_C( 8903.22), SIMDE_FLOAT32_C( 1148.23), SIMDE_FLOAT32_C( 2082.02), SIMDE_FLOAT32_C( 1146.40), SIMDE_FLOAT32_C( 5140.40)), UINT16_C(36797), simde_mm512_set_ps(SIMDE_FLOAT32_C( 4142.45), SIMDE_FLOAT32_C( 8400.08), SIMDE_FLOAT32_C( 9093.31), SIMDE_FLOAT32_C( 8002.34), SIMDE_FLOAT32_C( 6271.53), SIMDE_FLOAT32_C( 4596.36), SIMDE_FLOAT32_C( 276.11), SIMDE_FLOAT32_C( 1163.84), SIMDE_FLOAT32_C( 6994.08), SIMDE_FLOAT32_C( 6979.60), SIMDE_FLOAT32_C( 6696.04), SIMDE_FLOAT32_C( 3680.04), SIMDE_FLOAT32_C( 4846.05), SIMDE_FLOAT32_C( 7217.40), SIMDE_FLOAT32_C( 6902.28), SIMDE_FLOAT32_C( 9969.51)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 8.33), SIMDE_FLOAT32_C( 4256.55), SIMDE_FLOAT32_C( 9550.14), SIMDE_FLOAT32_C( 8956.15), SIMDE_FLOAT32_C( 8.74), SIMDE_FLOAT32_C( 8.43), SIMDE_FLOAT32_C( 5.62), SIMDE_FLOAT32_C( 7.06), SIMDE_FLOAT32_C( 8.85), SIMDE_FLOAT32_C( 8279.36), SIMDE_FLOAT32_C( 8.81), SIMDE_FLOAT32_C( 8.21), SIMDE_FLOAT32_C( 8.49), SIMDE_FLOAT32_C( 8.88), SIMDE_FLOAT32_C( 1146.40), SIMDE_FLOAT32_C( 9.21)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 4524.23), SIMDE_FLOAT32_C( 9203.26), SIMDE_FLOAT32_C( 2042.18), SIMDE_FLOAT32_C( 8657.47), SIMDE_FLOAT32_C( 8118.50), SIMDE_FLOAT32_C( 5703.37), SIMDE_FLOAT32_C( 469.19), SIMDE_FLOAT32_C( 5499.67), SIMDE_FLOAT32_C( 1309.05), SIMDE_FLOAT32_C( 8793.93), SIMDE_FLOAT32_C( 6717.40), SIMDE_FLOAT32_C( 1010.42), SIMDE_FLOAT32_C( 2370.85), SIMDE_FLOAT32_C( 886.73), SIMDE_FLOAT32_C( 8278.35), SIMDE_FLOAT32_C( 7716.73)), UINT16_C(16804), simde_mm512_set_ps(SIMDE_FLOAT32_C( 5136.26), SIMDE_FLOAT32_C( 8450.59), SIMDE_FLOAT32_C( 4894.53), SIMDE_FLOAT32_C( 2755.53), SIMDE_FLOAT32_C( 7528.93), SIMDE_FLOAT32_C( 9155.11), SIMDE_FLOAT32_C( 9886.80), SIMDE_FLOAT32_C( 6656.71), SIMDE_FLOAT32_C( 7314.76), SIMDE_FLOAT32_C( 4105.04), SIMDE_FLOAT32_C( 6623.12), SIMDE_FLOAT32_C( 628.43), SIMDE_FLOAT32_C( 3357.32), SIMDE_FLOAT32_C( 4038.44), SIMDE_FLOAT32_C( 7806.81), SIMDE_FLOAT32_C( 4645.43)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 4524.23), SIMDE_FLOAT32_C( 9.04), SIMDE_FLOAT32_C( 2042.18), SIMDE_FLOAT32_C( 8657.47), SIMDE_FLOAT32_C( 8118.50), SIMDE_FLOAT32_C( 5703.37), SIMDE_FLOAT32_C( 469.19), SIMDE_FLOAT32_C( 8.80), SIMDE_FLOAT32_C( 8.90), SIMDE_FLOAT32_C( 8793.93), SIMDE_FLOAT32_C( 8.80), SIMDE_FLOAT32_C( 1010.42), SIMDE_FLOAT32_C( 2370.85), SIMDE_FLOAT32_C( 8.30), SIMDE_FLOAT32_C( 8278.35), SIMDE_FLOAT32_C( 7716.73)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 3256.50), SIMDE_FLOAT32_C( 2809.03), SIMDE_FLOAT32_C( 1237.85), SIMDE_FLOAT32_C( 9663.28), SIMDE_FLOAT32_C( 3363.90), SIMDE_FLOAT32_C( 4087.77), SIMDE_FLOAT32_C( 7554.25), SIMDE_FLOAT32_C( 5071.68), SIMDE_FLOAT32_C( 9581.30), SIMDE_FLOAT32_C( 1154.54), SIMDE_FLOAT32_C( 2130.97), SIMDE_FLOAT32_C( 3312.02), SIMDE_FLOAT32_C( 6468.19), SIMDE_FLOAT32_C( 2118.90), SIMDE_FLOAT32_C( 8551.88), SIMDE_FLOAT32_C( 1217.92)), UINT16_C( 2107), simde_mm512_set_ps(SIMDE_FLOAT32_C( 9486.33), SIMDE_FLOAT32_C( 4010.56), SIMDE_FLOAT32_C( 3201.22), SIMDE_FLOAT32_C( 4831.67), SIMDE_FLOAT32_C( 5036.36), SIMDE_FLOAT32_C( 4374.02), SIMDE_FLOAT32_C( 5199.67), SIMDE_FLOAT32_C( 6973.34), SIMDE_FLOAT32_C( 3476.37), SIMDE_FLOAT32_C( 1516.57), SIMDE_FLOAT32_C( 9110.29), SIMDE_FLOAT32_C( 11.83), SIMDE_FLOAT32_C( 9618.20), SIMDE_FLOAT32_C( 1159.42), SIMDE_FLOAT32_C( 4661.80), SIMDE_FLOAT32_C( 9887.44)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 3256.50), SIMDE_FLOAT32_C( 2809.03), SIMDE_FLOAT32_C( 1237.85), SIMDE_FLOAT32_C( 9663.28), SIMDE_FLOAT32_C( 8.52), SIMDE_FLOAT32_C( 4087.77), SIMDE_FLOAT32_C( 7554.25), SIMDE_FLOAT32_C( 5071.68), SIMDE_FLOAT32_C( 9581.30), SIMDE_FLOAT32_C( 1154.54), SIMDE_FLOAT32_C( 9.12), SIMDE_FLOAT32_C( 2.47), SIMDE_FLOAT32_C( 9.17), SIMDE_FLOAT32_C( 2118.90), SIMDE_FLOAT32_C( 8.45), SIMDE_FLOAT32_C( 9.20)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 4921.97), SIMDE_FLOAT32_C( 1314.36), SIMDE_FLOAT32_C( 3425.34), SIMDE_FLOAT32_C( 5889.62), SIMDE_FLOAT32_C( 6729.66), SIMDE_FLOAT32_C( 9443.57), SIMDE_FLOAT32_C( 9578.53), SIMDE_FLOAT32_C( 5667.58), SIMDE_FLOAT32_C( 7424.68), SIMDE_FLOAT32_C( 2009.69), SIMDE_FLOAT32_C( 1044.67), SIMDE_FLOAT32_C( 1170.36), SIMDE_FLOAT32_C( 6106.86), SIMDE_FLOAT32_C( 1058.19), SIMDE_FLOAT32_C( 1124.78), SIMDE_FLOAT32_C( 7203.19)), UINT16_C(22274), simde_mm512_set_ps(SIMDE_FLOAT32_C( 7482.85), SIMDE_FLOAT32_C( 9575.95), SIMDE_FLOAT32_C( 1407.98), SIMDE_FLOAT32_C( 5799.87), SIMDE_FLOAT32_C( 694.94), SIMDE_FLOAT32_C( 7133.07), SIMDE_FLOAT32_C( 9660.54), SIMDE_FLOAT32_C( 5551.82), SIMDE_FLOAT32_C( 9134.21), SIMDE_FLOAT32_C( 4616.24), SIMDE_FLOAT32_C( 6187.92), SIMDE_FLOAT32_C( 3107.51), SIMDE_FLOAT32_C( 1991.62), SIMDE_FLOAT32_C( 1882.51), SIMDE_FLOAT32_C( 287.66), SIMDE_FLOAT32_C( 7377.56)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 4921.97), SIMDE_FLOAT32_C( 9.17), SIMDE_FLOAT32_C( 3425.34), SIMDE_FLOAT32_C( 8.67), SIMDE_FLOAT32_C( 6729.66), SIMDE_FLOAT32_C( 8.87), SIMDE_FLOAT32_C( 9.18), SIMDE_FLOAT32_C( 8.62), SIMDE_FLOAT32_C( 7424.68), SIMDE_FLOAT32_C( 2009.69), SIMDE_FLOAT32_C( 1044.67), SIMDE_FLOAT32_C( 1170.36), SIMDE_FLOAT32_C( 6106.86), SIMDE_FLOAT32_C( 1058.19), SIMDE_FLOAT32_C( 5.66), SIMDE_FLOAT32_C( 7203.19)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 9415.27), SIMDE_FLOAT32_C( 963.59), SIMDE_FLOAT32_C( 4649.74), SIMDE_FLOAT32_C( 1078.30), SIMDE_FLOAT32_C( 5462.61), SIMDE_FLOAT32_C( 6033.01), SIMDE_FLOAT32_C( 9173.00), SIMDE_FLOAT32_C( 4672.02), SIMDE_FLOAT32_C( 3569.65), SIMDE_FLOAT32_C( 3935.68), SIMDE_FLOAT32_C( 3408.08), SIMDE_FLOAT32_C( 8917.42), SIMDE_FLOAT32_C( 1855.90), SIMDE_FLOAT32_C( 7781.74), SIMDE_FLOAT32_C( 7197.17), SIMDE_FLOAT32_C( 7170.16)), UINT16_C(27396), simde_mm512_set_ps(SIMDE_FLOAT32_C( 178.74), SIMDE_FLOAT32_C( 2968.36), SIMDE_FLOAT32_C( 1281.72), SIMDE_FLOAT32_C( 1177.11), SIMDE_FLOAT32_C( 8949.44), SIMDE_FLOAT32_C( 5024.17), SIMDE_FLOAT32_C( 907.29), SIMDE_FLOAT32_C( 5805.32), SIMDE_FLOAT32_C( 7896.24), SIMDE_FLOAT32_C( 4941.12), SIMDE_FLOAT32_C( 3457.39), SIMDE_FLOAT32_C( 1402.13), SIMDE_FLOAT32_C( 6670.00), SIMDE_FLOAT32_C( 6373.56), SIMDE_FLOAT32_C( 415.89), SIMDE_FLOAT32_C( 2550.00)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 9415.27), SIMDE_FLOAT32_C( 8.00), SIMDE_FLOAT32_C( 7.16), SIMDE_FLOAT32_C( 1078.30), SIMDE_FLOAT32_C( 9.10), SIMDE_FLOAT32_C( 6033.01), SIMDE_FLOAT32_C( 6.81), SIMDE_FLOAT32_C( 8.67), SIMDE_FLOAT32_C( 3569.65), SIMDE_FLOAT32_C( 3935.68), SIMDE_FLOAT32_C( 3408.08), SIMDE_FLOAT32_C( 8917.42), SIMDE_FLOAT32_C( 1855.90), SIMDE_FLOAT32_C( 8.76), SIMDE_FLOAT32_C( 7197.17), SIMDE_FLOAT32_C( 7170.16)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 7648.13), SIMDE_FLOAT32_C( 4875.56), SIMDE_FLOAT32_C( 161.12), SIMDE_FLOAT32_C( 8194.68), SIMDE_FLOAT32_C( 7254.51), SIMDE_FLOAT32_C( 1142.29), SIMDE_FLOAT32_C( 5528.96), SIMDE_FLOAT32_C( 7950.51), SIMDE_FLOAT32_C( 5154.57), SIMDE_FLOAT32_C( 8176.75), SIMDE_FLOAT32_C( 4580.00), SIMDE_FLOAT32_C( 5400.22), SIMDE_FLOAT32_C( 1452.71), SIMDE_FLOAT32_C( 8039.28), SIMDE_FLOAT32_C( 6972.90), SIMDE_FLOAT32_C( 554.46)), UINT16_C( 953), simde_mm512_set_ps(SIMDE_FLOAT32_C( 5093.74), SIMDE_FLOAT32_C( 9045.23), SIMDE_FLOAT32_C( 5720.26), SIMDE_FLOAT32_C( 2861.39), SIMDE_FLOAT32_C( 6541.39), SIMDE_FLOAT32_C( 4114.75), SIMDE_FLOAT32_C( 2711.17), SIMDE_FLOAT32_C( 8391.22), SIMDE_FLOAT32_C( 5330.27), SIMDE_FLOAT32_C( 3661.45), SIMDE_FLOAT32_C( 5586.41), SIMDE_FLOAT32_C( 2116.00), SIMDE_FLOAT32_C( 4808.04), SIMDE_FLOAT32_C( 3749.32), SIMDE_FLOAT32_C( 4730.38), SIMDE_FLOAT32_C( 5459.69)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 7648.13), SIMDE_FLOAT32_C( 4875.56), SIMDE_FLOAT32_C( 161.12), SIMDE_FLOAT32_C( 8194.68), SIMDE_FLOAT32_C( 7254.51), SIMDE_FLOAT32_C( 1142.29), SIMDE_FLOAT32_C( 7.91), SIMDE_FLOAT32_C( 9.03), SIMDE_FLOAT32_C( 8.58), SIMDE_FLOAT32_C( 8176.75), SIMDE_FLOAT32_C( 8.63), SIMDE_FLOAT32_C( 7.66), SIMDE_FLOAT32_C( 8.48), SIMDE_FLOAT32_C( 8039.28), SIMDE_FLOAT32_C( 6972.90), SIMDE_FLOAT32_C( 8.61)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 1058.07), SIMDE_FLOAT32_C( 6652.15), SIMDE_FLOAT32_C( 2532.95), SIMDE_FLOAT32_C( 9113.62), SIMDE_FLOAT32_C( 9783.41), SIMDE_FLOAT32_C( 9773.08), SIMDE_FLOAT32_C( 9127.47), SIMDE_FLOAT32_C( 918.64), SIMDE_FLOAT32_C( 3953.30), SIMDE_FLOAT32_C( 333.95), SIMDE_FLOAT32_C( 1356.49), SIMDE_FLOAT32_C( 2899.69), SIMDE_FLOAT32_C( 5501.59), SIMDE_FLOAT32_C( 5515.77), SIMDE_FLOAT32_C( 7198.84), SIMDE_FLOAT32_C( 3978.34)), UINT16_C(12713), simde_mm512_set_ps(SIMDE_FLOAT32_C( 792.83), SIMDE_FLOAT32_C( 4929.19), SIMDE_FLOAT32_C( 9124.38), SIMDE_FLOAT32_C( 8968.13), SIMDE_FLOAT32_C( 1316.26), SIMDE_FLOAT32_C( 3447.13), SIMDE_FLOAT32_C( 8644.35), SIMDE_FLOAT32_C( 3246.39), SIMDE_FLOAT32_C( 5304.47), SIMDE_FLOAT32_C( 5549.07), SIMDE_FLOAT32_C( 8579.68), SIMDE_FLOAT32_C( 3747.01), SIMDE_FLOAT32_C( 9720.69), SIMDE_FLOAT32_C( 6809.26), SIMDE_FLOAT32_C( 4934.63), SIMDE_FLOAT32_C( 9263.02)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1058.07), SIMDE_FLOAT32_C( 6652.15), SIMDE_FLOAT32_C( 9.12), SIMDE_FLOAT32_C( 9.10), SIMDE_FLOAT32_C( 9783.41), SIMDE_FLOAT32_C( 9773.08), SIMDE_FLOAT32_C( 9127.47), SIMDE_FLOAT32_C( 8.09), SIMDE_FLOAT32_C( 8.58), SIMDE_FLOAT32_C( 333.95), SIMDE_FLOAT32_C( 9.06), SIMDE_FLOAT32_C( 2899.69), SIMDE_FLOAT32_C( 9.18), SIMDE_FLOAT32_C( 5515.77), SIMDE_FLOAT32_C( 7198.84), SIMDE_FLOAT32_C( 9.13)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mask_log_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_log_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( 7486.55), SIMDE_FLOAT64_C( 8351.20), SIMDE_FLOAT64_C( 3512.77), SIMDE_FLOAT64_C( 5170.29), SIMDE_FLOAT64_C( 4068.94), SIMDE_FLOAT64_C( 5195.06), SIMDE_FLOAT64_C( 1228.12), SIMDE_FLOAT64_C( 6733.16)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 8.92), SIMDE_FLOAT64_C( 9.03), SIMDE_FLOAT64_C( 8.16), SIMDE_FLOAT64_C( 8.55), SIMDE_FLOAT64_C( 8.31), SIMDE_FLOAT64_C( 8.56), SIMDE_FLOAT64_C( 7.11), SIMDE_FLOAT64_C( 8.81)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 1609.14), SIMDE_FLOAT64_C( 1569.36), SIMDE_FLOAT64_C( 5423.87), SIMDE_FLOAT64_C( 7857.29), SIMDE_FLOAT64_C( 9127.65), SIMDE_FLOAT64_C( 7111.03), SIMDE_FLOAT64_C( 3652.77), SIMDE_FLOAT64_C( 7338.80)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 7.38), SIMDE_FLOAT64_C( 7.36), SIMDE_FLOAT64_C( 8.60), SIMDE_FLOAT64_C( 8.97), SIMDE_FLOAT64_C( 9.12), SIMDE_FLOAT64_C( 8.87), SIMDE_FLOAT64_C( 8.20), SIMDE_FLOAT64_C( 8.90)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 2775.95), SIMDE_FLOAT64_C( 5142.35), SIMDE_FLOAT64_C( 3079.83), SIMDE_FLOAT64_C( 381.82), SIMDE_FLOAT64_C( 3474.63), SIMDE_FLOAT64_C( 695.25), SIMDE_FLOAT64_C( 2912.29), SIMDE_FLOAT64_C( 8484.34)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 7.93), SIMDE_FLOAT64_C( 8.55), SIMDE_FLOAT64_C( 8.03), SIMDE_FLOAT64_C( 5.94), SIMDE_FLOAT64_C( 8.15), SIMDE_FLOAT64_C( 6.54), SIMDE_FLOAT64_C( 7.98), SIMDE_FLOAT64_C( 9.05)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 5890.98), SIMDE_FLOAT64_C( 2746.67), SIMDE_FLOAT64_C( 6166.85), SIMDE_FLOAT64_C( 8435.45), SIMDE_FLOAT64_C( 6306.54), SIMDE_FLOAT64_C( 3937.29), SIMDE_FLOAT64_C( 117.23), SIMDE_FLOAT64_C( 1696.00)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 8.68), SIMDE_FLOAT64_C( 7.92), SIMDE_FLOAT64_C( 8.73), SIMDE_FLOAT64_C( 9.04), SIMDE_FLOAT64_C( 8.75), SIMDE_FLOAT64_C( 8.28), SIMDE_FLOAT64_C( 4.76), SIMDE_FLOAT64_C( 7.44)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 1148.23), SIMDE_FLOAT64_C( 7217.40), SIMDE_FLOAT64_C( 2082.02), SIMDE_FLOAT64_C( 6902.28), SIMDE_FLOAT64_C( 1146.40), SIMDE_FLOAT64_C( 9969.51), SIMDE_FLOAT64_C( 5140.40), SIMDE_FLOAT64_C( 9206.03)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 7.05), SIMDE_FLOAT64_C( 8.88), SIMDE_FLOAT64_C( 7.64), SIMDE_FLOAT64_C( 8.84), SIMDE_FLOAT64_C( 7.04), SIMDE_FLOAT64_C( 9.21), SIMDE_FLOAT64_C( 8.54), SIMDE_FLOAT64_C( 9.13)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 3060.52), SIMDE_FLOAT64_C( 6979.60), SIMDE_FLOAT64_C( 8279.36), SIMDE_FLOAT64_C( 6696.04), SIMDE_FLOAT64_C( 7661.76), SIMDE_FLOAT64_C( 3680.04), SIMDE_FLOAT64_C( 8903.22), SIMDE_FLOAT64_C( 4846.05)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 8.03), SIMDE_FLOAT64_C( 8.85), SIMDE_FLOAT64_C( 9.02), SIMDE_FLOAT64_C( 8.81), SIMDE_FLOAT64_C( 8.94), SIMDE_FLOAT64_C( 8.21), SIMDE_FLOAT64_C( 9.09), SIMDE_FLOAT64_C( 8.49)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 3981.75), SIMDE_FLOAT64_C( 4596.36), SIMDE_FLOAT64_C( 6683.64), SIMDE_FLOAT64_C( 276.11), SIMDE_FLOAT64_C( 1262.07), SIMDE_FLOAT64_C( 1163.84), SIMDE_FLOAT64_C( 2229.06), SIMDE_FLOAT64_C( 6994.08)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 8.29), SIMDE_FLOAT64_C( 8.43), SIMDE_FLOAT64_C( 8.81), SIMDE_FLOAT64_C( 5.62), SIMDE_FLOAT64_C( 7.14), SIMDE_FLOAT64_C( 7.06), SIMDE_FLOAT64_C( 7.71), SIMDE_FLOAT64_C( 8.85)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 7348.31), SIMDE_FLOAT64_C( 8400.08), SIMDE_FLOAT64_C( 4256.55), SIMDE_FLOAT64_C( 9093.31), SIMDE_FLOAT64_C( 9550.14), SIMDE_FLOAT64_C( 8002.34), SIMDE_FLOAT64_C( 8956.15), SIMDE_FLOAT64_C( 6271.53)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 8.90), SIMDE_FLOAT64_C( 9.04), SIMDE_FLOAT64_C( 8.36), SIMDE_FLOAT64_C( 9.12), SIMDE_FLOAT64_C( 9.16), SIMDE_FLOAT64_C( 8.99), SIMDE_FLOAT64_C( 9.10), SIMDE_FLOAT64_C( 8.74)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_log_pd(test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_log_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d src; simde__mmask8 k; simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( 1569.36), SIMDE_FLOAT64_C( 7857.29), SIMDE_FLOAT64_C( 7111.03), SIMDE_FLOAT64_C( 7338.80), SIMDE_FLOAT64_C( 8351.20), SIMDE_FLOAT64_C( 5170.29), SIMDE_FLOAT64_C( 5195.06), SIMDE_FLOAT64_C( 6733.16)), UINT8_C(139), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1609.14), SIMDE_FLOAT64_C( 5423.87), SIMDE_FLOAT64_C( 9127.65), SIMDE_FLOAT64_C( 3652.77), SIMDE_FLOAT64_C( 7486.55), SIMDE_FLOAT64_C( 3512.77), SIMDE_FLOAT64_C( 4068.94), SIMDE_FLOAT64_C( 1228.12)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 7.38), SIMDE_FLOAT64_C( 7857.29), SIMDE_FLOAT64_C( 7111.03), SIMDE_FLOAT64_C( 7338.80), SIMDE_FLOAT64_C( 8.92), SIMDE_FLOAT64_C( 5170.29), SIMDE_FLOAT64_C( 8.31), SIMDE_FLOAT64_C( 7.11)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 5890.98), SIMDE_FLOAT64_C( 6166.85), SIMDE_FLOAT64_C( 6306.54), SIMDE_FLOAT64_C( 117.23), SIMDE_FLOAT64_C( 2775.95), SIMDE_FLOAT64_C( 3079.83), SIMDE_FLOAT64_C( 3474.63), SIMDE_FLOAT64_C( 2912.29)), UINT8_C(229), simde_mm512_set_pd(SIMDE_FLOAT64_C( 9206.03), SIMDE_FLOAT64_C( 2746.67), SIMDE_FLOAT64_C( 8435.45), SIMDE_FLOAT64_C( 3937.29), SIMDE_FLOAT64_C( 1696.00), SIMDE_FLOAT64_C( 5142.35), SIMDE_FLOAT64_C( 381.82), SIMDE_FLOAT64_C( 695.25)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 9.13), SIMDE_FLOAT64_C( 7.92), SIMDE_FLOAT64_C( 9.04), SIMDE_FLOAT64_C( 117.23), SIMDE_FLOAT64_C( 2775.95), SIMDE_FLOAT64_C( 8.55), SIMDE_FLOAT64_C( 3474.63), SIMDE_FLOAT64_C( 6.54)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 6994.08), SIMDE_FLOAT64_C( 6979.60), SIMDE_FLOAT64_C( 6696.04), SIMDE_FLOAT64_C( 3680.04), SIMDE_FLOAT64_C( 4846.05), SIMDE_FLOAT64_C( 7217.40), SIMDE_FLOAT64_C( 6902.28), SIMDE_FLOAT64_C( 9969.51)), UINT8_C(253), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2229.06), SIMDE_FLOAT64_C( 3060.52), SIMDE_FLOAT64_C( 8279.36), SIMDE_FLOAT64_C( 7661.76), SIMDE_FLOAT64_C( 8903.22), SIMDE_FLOAT64_C( 1148.23), SIMDE_FLOAT64_C( 2082.02), SIMDE_FLOAT64_C( 1146.40)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 7.71), SIMDE_FLOAT64_C( 8.03), SIMDE_FLOAT64_C( 9.02), SIMDE_FLOAT64_C( 8.94), SIMDE_FLOAT64_C( 9.09), SIMDE_FLOAT64_C( 7.05), SIMDE_FLOAT64_C( 6902.28), SIMDE_FLOAT64_C( 7.04)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 5603.27), SIMDE_FLOAT64_C( 7348.31), SIMDE_FLOAT64_C( 4256.55), SIMDE_FLOAT64_C( 9550.14), SIMDE_FLOAT64_C( 8956.15), SIMDE_FLOAT64_C( 3981.75), SIMDE_FLOAT64_C( 6683.64), SIMDE_FLOAT64_C( 1262.07)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 7716.73), SIMDE_FLOAT64_C( 4142.45), SIMDE_FLOAT64_C( 8400.08), SIMDE_FLOAT64_C( 9093.31), SIMDE_FLOAT64_C( 8002.34), SIMDE_FLOAT64_C( 6271.53), SIMDE_FLOAT64_C( 4596.36), SIMDE_FLOAT64_C( 276.11)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 5603.27), SIMDE_FLOAT64_C( 8.33), SIMDE_FLOAT64_C( 4256.55), SIMDE_FLOAT64_C( 9.12), SIMDE_FLOAT64_C( 8.99), SIMDE_FLOAT64_C( 8.74), SIMDE_FLOAT64_C( 6683.64), SIMDE_FLOAT64_C( 5.62)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 5499.67), SIMDE_FLOAT64_C( 1309.05), SIMDE_FLOAT64_C( 8793.93), SIMDE_FLOAT64_C( 6717.40), SIMDE_FLOAT64_C( 1010.42), SIMDE_FLOAT64_C( 2370.85), SIMDE_FLOAT64_C( 886.73), SIMDE_FLOAT64_C( 8278.35)), UINT8_C(145), simde_mm512_set_pd(SIMDE_FLOAT64_C( 6656.71), SIMDE_FLOAT64_C( 7314.76), SIMDE_FLOAT64_C( 4105.04), SIMDE_FLOAT64_C( 6623.12), SIMDE_FLOAT64_C( 628.43), SIMDE_FLOAT64_C( 3357.32), SIMDE_FLOAT64_C( 4038.44), SIMDE_FLOAT64_C( 7806.81)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 8.80), SIMDE_FLOAT64_C( 1309.05), SIMDE_FLOAT64_C( 8793.93), SIMDE_FLOAT64_C( 8.80), SIMDE_FLOAT64_C( 1010.42), SIMDE_FLOAT64_C( 2370.85), SIMDE_FLOAT64_C( 886.73), SIMDE_FLOAT64_C( 8.96)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 1217.92), SIMDE_FLOAT64_C( 5136.26), SIMDE_FLOAT64_C( 8450.59), SIMDE_FLOAT64_C( 4894.53), SIMDE_FLOAT64_C( 2755.53), SIMDE_FLOAT64_C( 7528.93), SIMDE_FLOAT64_C( 9155.11), SIMDE_FLOAT64_C( 9886.80)), UINT8_C( 75), simde_mm512_set_pd(SIMDE_FLOAT64_C( 9887.44), SIMDE_FLOAT64_C( 7124.06), SIMDE_FLOAT64_C( 4524.23), SIMDE_FLOAT64_C( 9203.26), SIMDE_FLOAT64_C( 2042.18), SIMDE_FLOAT64_C( 8657.47), SIMDE_FLOAT64_C( 8118.50), SIMDE_FLOAT64_C( 5703.37)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1217.92), SIMDE_FLOAT64_C( 8.87), SIMDE_FLOAT64_C( 8450.59), SIMDE_FLOAT64_C( 4894.53), SIMDE_FLOAT64_C( 7.62), SIMDE_FLOAT64_C( 7528.93), SIMDE_FLOAT64_C( 9.00), SIMDE_FLOAT64_C( 8.65)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 6973.34), SIMDE_FLOAT64_C( 3476.37), SIMDE_FLOAT64_C( 1516.57), SIMDE_FLOAT64_C( 9110.29), SIMDE_FLOAT64_C( 11.83), SIMDE_FLOAT64_C( 9618.20), SIMDE_FLOAT64_C( 1159.42), SIMDE_FLOAT64_C( 4661.80)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 7554.25), SIMDE_FLOAT64_C( 5071.68), SIMDE_FLOAT64_C( 9581.30), SIMDE_FLOAT64_C( 1154.54), SIMDE_FLOAT64_C( 2130.97), SIMDE_FLOAT64_C( 3312.02), SIMDE_FLOAT64_C( 6468.19), SIMDE_FLOAT64_C( 2118.90)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 6973.34), SIMDE_FLOAT64_C( 8.53), SIMDE_FLOAT64_C( 1516.57), SIMDE_FLOAT64_C( 7.05), SIMDE_FLOAT64_C( 7.66), SIMDE_FLOAT64_C( 8.11), SIMDE_FLOAT64_C( 1159.42), SIMDE_FLOAT64_C( 7.66)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 7377.56), SIMDE_FLOAT64_C( 9683.23), SIMDE_FLOAT64_C( 3256.50), SIMDE_FLOAT64_C( 2809.03), SIMDE_FLOAT64_C( 1237.85), SIMDE_FLOAT64_C( 9663.28), SIMDE_FLOAT64_C( 3363.90), SIMDE_FLOAT64_C( 4087.77)), UINT8_C(213), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1124.78), SIMDE_FLOAT64_C( 7203.19), SIMDE_FLOAT64_C( 9486.33), SIMDE_FLOAT64_C( 4010.56), SIMDE_FLOAT64_C( 3201.22), SIMDE_FLOAT64_C( 4831.67), SIMDE_FLOAT64_C( 5036.36), SIMDE_FLOAT64_C( 4374.02)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 7.03), SIMDE_FLOAT64_C( 8.88), SIMDE_FLOAT64_C( 3256.50), SIMDE_FLOAT64_C( 8.30), SIMDE_FLOAT64_C( 1237.85), SIMDE_FLOAT64_C( 8.48), SIMDE_FLOAT64_C( 3363.90), SIMDE_FLOAT64_C( 8.38)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mask_log_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_log1p_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 75.94), SIMDE_FLOAT32_C( 8.83), SIMDE_FLOAT32_C( 79.72), SIMDE_FLOAT32_C( 43.97) }, { SIMDE_FLOAT32_C( 4.34), SIMDE_FLOAT32_C( 2.29), SIMDE_FLOAT32_C( 4.39), SIMDE_FLOAT32_C( 3.81) } }, { { SIMDE_FLOAT32_C( 40.77), SIMDE_FLOAT32_C( 95.32), SIMDE_FLOAT32_C( 68.75), SIMDE_FLOAT32_C( 17.84) }, { SIMDE_FLOAT32_C( 3.73), SIMDE_FLOAT32_C( 4.57), SIMDE_FLOAT32_C( 4.24), SIMDE_FLOAT32_C( 2.94) } }, { { SIMDE_FLOAT32_C( 87.84), SIMDE_FLOAT32_C( 9.10), SIMDE_FLOAT32_C( 51.15), SIMDE_FLOAT32_C( 49.38) }, { SIMDE_FLOAT32_C( 4.49), SIMDE_FLOAT32_C( 2.31), SIMDE_FLOAT32_C( 3.95), SIMDE_FLOAT32_C( 3.92) } }, { { SIMDE_FLOAT32_C( 72.43), SIMDE_FLOAT32_C( 10.89), SIMDE_FLOAT32_C( 17.62), SIMDE_FLOAT32_C( 49.42) }, { SIMDE_FLOAT32_C( 4.30), SIMDE_FLOAT32_C( 2.48), SIMDE_FLOAT32_C( 2.92), SIMDE_FLOAT32_C( 3.92) } }, { { SIMDE_FLOAT32_C( 61.53), SIMDE_FLOAT32_C( 6.26), SIMDE_FLOAT32_C( 3.60), SIMDE_FLOAT32_C( 56.29) }, { SIMDE_FLOAT32_C( 4.14), SIMDE_FLOAT32_C( 1.98), SIMDE_FLOAT32_C( 1.53), SIMDE_FLOAT32_C( 4.05) } }, { { SIMDE_FLOAT32_C( 33.37), SIMDE_FLOAT32_C( 28.79), SIMDE_FLOAT32_C( 10.52), SIMDE_FLOAT32_C( 86.16) }, { SIMDE_FLOAT32_C( 3.54), SIMDE_FLOAT32_C( 3.39), SIMDE_FLOAT32_C( 2.44), SIMDE_FLOAT32_C( 4.47) } }, { { SIMDE_FLOAT32_C( 75.88), SIMDE_FLOAT32_C( 38.85), SIMDE_FLOAT32_C( 41.92), SIMDE_FLOAT32_C( 15.06) }, { SIMDE_FLOAT32_C( 4.34), SIMDE_FLOAT32_C( 3.69), SIMDE_FLOAT32_C( 3.76), SIMDE_FLOAT32_C( 2.78) } }, { { SIMDE_FLOAT32_C( 49.93), SIMDE_FLOAT32_C( 45.63), SIMDE_FLOAT32_C( 11.83), SIMDE_FLOAT32_C( 25.87) }, { SIMDE_FLOAT32_C( 3.93), SIMDE_FLOAT32_C( 3.84), SIMDE_FLOAT32_C( 2.55), SIMDE_FLOAT32_C( 3.29) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_log1p_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_log1p_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 71.66), SIMDE_FLOAT64_C( 23.63) }, { SIMDE_FLOAT64_C( 4.29), SIMDE_FLOAT64_C( 3.20) } }, { { SIMDE_FLOAT64_C( 39.38), SIMDE_FLOAT64_C( 45.82) }, { SIMDE_FLOAT64_C( 3.70), SIMDE_FLOAT64_C( 3.85) } }, { { SIMDE_FLOAT64_C( 26.23), SIMDE_FLOAT64_C( 40.67) }, { SIMDE_FLOAT64_C( 3.30), SIMDE_FLOAT64_C( 3.73) } }, { { SIMDE_FLOAT64_C( 88.01), SIMDE_FLOAT64_C( 4.27) }, { SIMDE_FLOAT64_C( 4.49), SIMDE_FLOAT64_C( 1.66) } }, { { SIMDE_FLOAT64_C( 8.61), SIMDE_FLOAT64_C( 48.32) }, { SIMDE_FLOAT64_C( 2.26), SIMDE_FLOAT64_C( 3.90) } }, { { SIMDE_FLOAT64_C( 83.85), SIMDE_FLOAT64_C( 77.45) }, { SIMDE_FLOAT64_C( 4.44), SIMDE_FLOAT64_C( 4.36) } }, { { SIMDE_FLOAT64_C( 28.87), SIMDE_FLOAT64_C( 9.70) }, { SIMDE_FLOAT64_C( 3.40), SIMDE_FLOAT64_C( 2.37) } }, { { SIMDE_FLOAT64_C( 59.45), SIMDE_FLOAT64_C( 89.65) }, { SIMDE_FLOAT64_C( 4.10), SIMDE_FLOAT64_C( 4.51) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d r = simde_mm_log1p_pd(a); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_log1p_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 7.73), SIMDE_FLOAT32_C( 44.58), SIMDE_FLOAT32_C( 1.09), SIMDE_FLOAT32_C( 37.39), SIMDE_FLOAT32_C( 81.72), SIMDE_FLOAT32_C( 97.03), SIMDE_FLOAT32_C( 32.40), SIMDE_FLOAT32_C( 46.21) }, { SIMDE_FLOAT32_C( 2.17), SIMDE_FLOAT32_C( 3.82), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( 3.65), SIMDE_FLOAT32_C( 4.42), SIMDE_FLOAT32_C( 4.59), SIMDE_FLOAT32_C( 3.51), SIMDE_FLOAT32_C( 3.85) } }, { { SIMDE_FLOAT32_C( 68.19), SIMDE_FLOAT32_C( 59.69), SIMDE_FLOAT32_C( 65.16), SIMDE_FLOAT32_C( 49.14), SIMDE_FLOAT32_C( 16.80), SIMDE_FLOAT32_C( 22.15), SIMDE_FLOAT32_C( 15.49), SIMDE_FLOAT32_C( 40.38) }, { SIMDE_FLOAT32_C( 4.24), SIMDE_FLOAT32_C( 4.11), SIMDE_FLOAT32_C( 4.19), SIMDE_FLOAT32_C( 3.91), SIMDE_FLOAT32_C( 2.88), SIMDE_FLOAT32_C( 3.14), SIMDE_FLOAT32_C( 2.80), SIMDE_FLOAT32_C( 3.72) } }, { { SIMDE_FLOAT32_C( 30.77), SIMDE_FLOAT32_C( 61.57), SIMDE_FLOAT32_C( 50.60), SIMDE_FLOAT32_C( 43.40), SIMDE_FLOAT32_C( 79.43), SIMDE_FLOAT32_C( 23.65), SIMDE_FLOAT32_C( 55.47), SIMDE_FLOAT32_C( 29.32) }, { SIMDE_FLOAT32_C( 3.46), SIMDE_FLOAT32_C( 4.14), SIMDE_FLOAT32_C( 3.94), SIMDE_FLOAT32_C( 3.79), SIMDE_FLOAT32_C( 4.39), SIMDE_FLOAT32_C( 3.20), SIMDE_FLOAT32_C( 4.03), SIMDE_FLOAT32_C( 3.41) } }, { { SIMDE_FLOAT32_C( 54.13), SIMDE_FLOAT32_C( 82.81), SIMDE_FLOAT32_C( 78.99), SIMDE_FLOAT32_C( 50.88), SIMDE_FLOAT32_C( 5.92), SIMDE_FLOAT32_C( 42.82), SIMDE_FLOAT32_C( 53.24), SIMDE_FLOAT32_C( 13.65) }, { SIMDE_FLOAT32_C( 4.01), SIMDE_FLOAT32_C( 4.43), SIMDE_FLOAT32_C( 4.38), SIMDE_FLOAT32_C( 3.95), SIMDE_FLOAT32_C( 1.93), SIMDE_FLOAT32_C( 3.78), SIMDE_FLOAT32_C( 3.99), SIMDE_FLOAT32_C( 2.68) } }, { { SIMDE_FLOAT32_C( 87.40), SIMDE_FLOAT32_C( 54.33), SIMDE_FLOAT32_C( 51.04), SIMDE_FLOAT32_C( 69.12), SIMDE_FLOAT32_C( 51.36), SIMDE_FLOAT32_C( 83.44), SIMDE_FLOAT32_C( 15.34), SIMDE_FLOAT32_C( 19.54) }, { SIMDE_FLOAT32_C( 4.48), SIMDE_FLOAT32_C( 4.01), SIMDE_FLOAT32_C( 3.95), SIMDE_FLOAT32_C( 4.25), SIMDE_FLOAT32_C( 3.96), SIMDE_FLOAT32_C( 4.44), SIMDE_FLOAT32_C( 2.79), SIMDE_FLOAT32_C( 3.02) } }, { { SIMDE_FLOAT32_C( 43.13), SIMDE_FLOAT32_C( 80.50), SIMDE_FLOAT32_C( 68.69), SIMDE_FLOAT32_C( 59.93), SIMDE_FLOAT32_C( 2.65), SIMDE_FLOAT32_C( 84.18), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( 33.43) }, { SIMDE_FLOAT32_C( 3.79), SIMDE_FLOAT32_C( 4.40), SIMDE_FLOAT32_C( 4.24), SIMDE_FLOAT32_C( 4.11), SIMDE_FLOAT32_C( 1.29), SIMDE_FLOAT32_C( 4.44), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( 3.54) } }, { { SIMDE_FLOAT32_C( 45.75), SIMDE_FLOAT32_C( 50.91), SIMDE_FLOAT32_C( 76.83), SIMDE_FLOAT32_C( 25.17), SIMDE_FLOAT32_C( 74.56), SIMDE_FLOAT32_C( 32.30), SIMDE_FLOAT32_C( 54.49), SIMDE_FLOAT32_C( 28.69) }, { SIMDE_FLOAT32_C( 3.84), SIMDE_FLOAT32_C( 3.95), SIMDE_FLOAT32_C( 4.35), SIMDE_FLOAT32_C( 3.26), SIMDE_FLOAT32_C( 4.32), SIMDE_FLOAT32_C( 3.51), SIMDE_FLOAT32_C( 4.02), SIMDE_FLOAT32_C( 3.39) } }, { { SIMDE_FLOAT32_C( 15.11), SIMDE_FLOAT32_C( 33.49), SIMDE_FLOAT32_C( 79.56), SIMDE_FLOAT32_C( 21.03), SIMDE_FLOAT32_C( 76.31), SIMDE_FLOAT32_C( 32.80), SIMDE_FLOAT32_C( 34.68), SIMDE_FLOAT32_C( 63.71) }, { SIMDE_FLOAT32_C( 2.78), SIMDE_FLOAT32_C( 3.54), SIMDE_FLOAT32_C( 4.39), SIMDE_FLOAT32_C( 3.09), SIMDE_FLOAT32_C( 4.35), SIMDE_FLOAT32_C( 3.52), SIMDE_FLOAT32_C( 3.57), SIMDE_FLOAT32_C( 4.17) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_log1p_ps(a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_log1p_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( 82.81), SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( 54.22), SIMDE_FLOAT64_C( 13.29) }, { SIMDE_FLOAT64_C( 4.43), SIMDE_FLOAT64_C( 0.52), SIMDE_FLOAT64_C( 4.01), SIMDE_FLOAT64_C( 2.66) } }, { { SIMDE_FLOAT64_C( 34.27), SIMDE_FLOAT64_C( 86.02), SIMDE_FLOAT64_C( 66.74), SIMDE_FLOAT64_C( 46.61) }, { SIMDE_FLOAT64_C( 3.56), SIMDE_FLOAT64_C( 4.47), SIMDE_FLOAT64_C( 4.22), SIMDE_FLOAT64_C( 3.86) } }, { { SIMDE_FLOAT64_C( 95.48), SIMDE_FLOAT64_C( 40.65), SIMDE_FLOAT64_C( 39.71), SIMDE_FLOAT64_C( 33.88) }, { SIMDE_FLOAT64_C( 4.57), SIMDE_FLOAT64_C( 3.73), SIMDE_FLOAT64_C( 3.71), SIMDE_FLOAT64_C( 3.55) } }, { { SIMDE_FLOAT64_C( 25.60), SIMDE_FLOAT64_C( 96.16), SIMDE_FLOAT64_C( 45.65), SIMDE_FLOAT64_C( 11.33) }, { SIMDE_FLOAT64_C( 3.28), SIMDE_FLOAT64_C( 4.58), SIMDE_FLOAT64_C( 3.84), SIMDE_FLOAT64_C( 2.51) } }, { { SIMDE_FLOAT64_C( 12.09), SIMDE_FLOAT64_C( 86.42), SIMDE_FLOAT64_C( 87.72), SIMDE_FLOAT64_C( 82.93) }, { SIMDE_FLOAT64_C( 2.57), SIMDE_FLOAT64_C( 4.47), SIMDE_FLOAT64_C( 4.49), SIMDE_FLOAT64_C( 4.43) } }, { { SIMDE_FLOAT64_C( 74.51), SIMDE_FLOAT64_C( 10.22), SIMDE_FLOAT64_C( 42.74), SIMDE_FLOAT64_C( 42.04) }, { SIMDE_FLOAT64_C( 4.32), SIMDE_FLOAT64_C( 2.42), SIMDE_FLOAT64_C( 3.78), SIMDE_FLOAT64_C( 3.76) } }, { { SIMDE_FLOAT64_C( 56.03), SIMDE_FLOAT64_C( 46.45), SIMDE_FLOAT64_C( 79.57), SIMDE_FLOAT64_C( 53.99) }, { SIMDE_FLOAT64_C( 4.04), SIMDE_FLOAT64_C( 3.86), SIMDE_FLOAT64_C( 4.39), SIMDE_FLOAT64_C( 4.01) } }, { { SIMDE_FLOAT64_C( 65.41), SIMDE_FLOAT64_C( 86.99), SIMDE_FLOAT64_C( 98.63), SIMDE_FLOAT64_C( 48.22) }, { SIMDE_FLOAT64_C( 4.20), SIMDE_FLOAT64_C( 4.48), SIMDE_FLOAT64_C( 4.60), SIMDE_FLOAT64_C( 3.90) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d r = simde_mm256_log1p_pd(a); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_log1p_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 56.49), SIMDE_FLOAT32_C( 45.26), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 8.51), SIMDE_FLOAT32_C( 84.43), SIMDE_FLOAT32_C( 90.20), SIMDE_FLOAT32_C( 58.37), SIMDE_FLOAT32_C( 91.03), SIMDE_FLOAT32_C( 16.56), SIMDE_FLOAT32_C( 42.47), SIMDE_FLOAT32_C( 30.02), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 36.91), SIMDE_FLOAT32_C( 32.16), SIMDE_FLOAT32_C( 13.56), SIMDE_FLOAT32_C( 95.86) }, { SIMDE_FLOAT32_C( 4.05), SIMDE_FLOAT32_C( 3.83), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 2.25), SIMDE_FLOAT32_C( 4.45), SIMDE_FLOAT32_C( 4.51), SIMDE_FLOAT32_C( 4.08), SIMDE_FLOAT32_C( 4.52), SIMDE_FLOAT32_C( 2.87), SIMDE_FLOAT32_C( 3.77), SIMDE_FLOAT32_C( 3.43), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( 3.64), SIMDE_FLOAT32_C( 3.50), SIMDE_FLOAT32_C( 2.68), SIMDE_FLOAT32_C( 4.57) } }, { { SIMDE_FLOAT32_C( 15.15), SIMDE_FLOAT32_C( 66.91), SIMDE_FLOAT32_C( 89.77), SIMDE_FLOAT32_C( 66.71), SIMDE_FLOAT32_C( 24.15), SIMDE_FLOAT32_C( 55.93), SIMDE_FLOAT32_C( 84.52), SIMDE_FLOAT32_C( 55.70), SIMDE_FLOAT32_C( 44.08), SIMDE_FLOAT32_C( 33.97), SIMDE_FLOAT32_C( 77.87), SIMDE_FLOAT32_C( 36.54), SIMDE_FLOAT32_C( 89.83), SIMDE_FLOAT32_C( 75.19), SIMDE_FLOAT32_C( 48.64), SIMDE_FLOAT32_C( 46.32) }, { SIMDE_FLOAT32_C( 2.78), SIMDE_FLOAT32_C( 4.22), SIMDE_FLOAT32_C( 4.51), SIMDE_FLOAT32_C( 4.22), SIMDE_FLOAT32_C( 3.22), SIMDE_FLOAT32_C( 4.04), SIMDE_FLOAT32_C( 4.45), SIMDE_FLOAT32_C( 4.04), SIMDE_FLOAT32_C( 3.81), SIMDE_FLOAT32_C( 3.55), SIMDE_FLOAT32_C( 4.37), SIMDE_FLOAT32_C( 3.63), SIMDE_FLOAT32_C( 4.51), SIMDE_FLOAT32_C( 4.33), SIMDE_FLOAT32_C( 3.90), SIMDE_FLOAT32_C( 3.86) } }, { { SIMDE_FLOAT32_C( 20.45), SIMDE_FLOAT32_C( 48.85), SIMDE_FLOAT32_C( 54.83), SIMDE_FLOAT32_C( 4.88), SIMDE_FLOAT32_C( 39.05), SIMDE_FLOAT32_C( 13.20), SIMDE_FLOAT32_C( 95.91), SIMDE_FLOAT32_C( 55.62), SIMDE_FLOAT32_C( 55.68), SIMDE_FLOAT32_C( 25.92), SIMDE_FLOAT32_C( 55.99), SIMDE_FLOAT32_C( 92.58), SIMDE_FLOAT32_C( 58.09), SIMDE_FLOAT32_C( 69.55), SIMDE_FLOAT32_C( 88.44), SIMDE_FLOAT32_C( 73.24) }, { SIMDE_FLOAT32_C( 3.07), SIMDE_FLOAT32_C( 3.91), SIMDE_FLOAT32_C( 4.02), SIMDE_FLOAT32_C( 1.77), SIMDE_FLOAT32_C( 3.69), SIMDE_FLOAT32_C( 2.65), SIMDE_FLOAT32_C( 4.57), SIMDE_FLOAT32_C( 4.04), SIMDE_FLOAT32_C( 4.04), SIMDE_FLOAT32_C( 3.29), SIMDE_FLOAT32_C( 4.04), SIMDE_FLOAT32_C( 4.54), SIMDE_FLOAT32_C( 4.08), SIMDE_FLOAT32_C( 4.26), SIMDE_FLOAT32_C( 4.49), SIMDE_FLOAT32_C( 4.31) } }, { { SIMDE_FLOAT32_C( 36.47), SIMDE_FLOAT32_C( 78.21), SIMDE_FLOAT32_C( 39.95), SIMDE_FLOAT32_C( 60.62), SIMDE_FLOAT32_C( 34.14), SIMDE_FLOAT32_C( 24.47), SIMDE_FLOAT32_C( 16.32), SIMDE_FLOAT32_C( 78.22), SIMDE_FLOAT32_C( 58.44), SIMDE_FLOAT32_C( 94.19), SIMDE_FLOAT32_C( 14.75), SIMDE_FLOAT32_C( 48.27), SIMDE_FLOAT32_C( 69.38), SIMDE_FLOAT32_C( 63.39), SIMDE_FLOAT32_C( 94.60), SIMDE_FLOAT32_C( 89.83) }, { SIMDE_FLOAT32_C( 3.62), SIMDE_FLOAT32_C( 4.37), SIMDE_FLOAT32_C( 3.71), SIMDE_FLOAT32_C( 4.12), SIMDE_FLOAT32_C( 3.56), SIMDE_FLOAT32_C( 3.24), SIMDE_FLOAT32_C( 2.85), SIMDE_FLOAT32_C( 4.37), SIMDE_FLOAT32_C( 4.08), SIMDE_FLOAT32_C( 4.56), SIMDE_FLOAT32_C( 2.76), SIMDE_FLOAT32_C( 3.90), SIMDE_FLOAT32_C( 4.25), SIMDE_FLOAT32_C( 4.16), SIMDE_FLOAT32_C( 4.56), SIMDE_FLOAT32_C( 4.51) } }, { { SIMDE_FLOAT32_C( 12.25), SIMDE_FLOAT32_C( 49.43), SIMDE_FLOAT32_C( 94.71), SIMDE_FLOAT32_C( 51.30), SIMDE_FLOAT32_C( 62.63), SIMDE_FLOAT32_C( 90.62), SIMDE_FLOAT32_C( 6.92), SIMDE_FLOAT32_C( 18.31), SIMDE_FLOAT32_C( 16.54), SIMDE_FLOAT32_C( 62.91), SIMDE_FLOAT32_C( 10.89), SIMDE_FLOAT32_C( 74.63), SIMDE_FLOAT32_C( 32.47), SIMDE_FLOAT32_C( 99.33), SIMDE_FLOAT32_C( 47.86), SIMDE_FLOAT32_C( 68.94) }, { SIMDE_FLOAT32_C( 2.58), SIMDE_FLOAT32_C( 3.92), SIMDE_FLOAT32_C( 4.56), SIMDE_FLOAT32_C( 3.96), SIMDE_FLOAT32_C( 4.15), SIMDE_FLOAT32_C( 4.52), SIMDE_FLOAT32_C( 2.07), SIMDE_FLOAT32_C( 2.96), SIMDE_FLOAT32_C( 2.86), SIMDE_FLOAT32_C( 4.16), SIMDE_FLOAT32_C( 2.48), SIMDE_FLOAT32_C( 4.33), SIMDE_FLOAT32_C( 3.51), SIMDE_FLOAT32_C( 4.61), SIMDE_FLOAT32_C( 3.89), SIMDE_FLOAT32_C( 4.25) } }, { { SIMDE_FLOAT32_C( 77.54), SIMDE_FLOAT32_C( 87.82), SIMDE_FLOAT32_C( 29.55), SIMDE_FLOAT32_C( 11.68), SIMDE_FLOAT32_C( 12.29), SIMDE_FLOAT32_C( 45.87), SIMDE_FLOAT32_C( 89.89), SIMDE_FLOAT32_C( 70.73), SIMDE_FLOAT32_C( 40.05), SIMDE_FLOAT32_C( 4.64), SIMDE_FLOAT32_C( 19.00), SIMDE_FLOAT32_C( 9.43), SIMDE_FLOAT32_C( 68.04), SIMDE_FLOAT32_C( 13.59), SIMDE_FLOAT32_C( 99.26), SIMDE_FLOAT32_C( 80.28) }, { SIMDE_FLOAT32_C( 4.36), SIMDE_FLOAT32_C( 4.49), SIMDE_FLOAT32_C( 3.42), SIMDE_FLOAT32_C( 2.54), SIMDE_FLOAT32_C( 2.59), SIMDE_FLOAT32_C( 3.85), SIMDE_FLOAT32_C( 4.51), SIMDE_FLOAT32_C( 4.27), SIMDE_FLOAT32_C( 3.71), SIMDE_FLOAT32_C( 1.73), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 2.34), SIMDE_FLOAT32_C( 4.23), SIMDE_FLOAT32_C( 2.68), SIMDE_FLOAT32_C( 4.61), SIMDE_FLOAT32_C( 4.40) } }, { { SIMDE_FLOAT32_C( 63.02), SIMDE_FLOAT32_C( 93.97), SIMDE_FLOAT32_C( 31.58), SIMDE_FLOAT32_C( 25.65), SIMDE_FLOAT32_C( 84.59), SIMDE_FLOAT32_C( 38.50), SIMDE_FLOAT32_C( 43.96), SIMDE_FLOAT32_C( 1.13), SIMDE_FLOAT32_C( 1.41), SIMDE_FLOAT32_C( 54.85), SIMDE_FLOAT32_C( 75.76), SIMDE_FLOAT32_C( 33.88), SIMDE_FLOAT32_C( 54.18), SIMDE_FLOAT32_C( 23.62), SIMDE_FLOAT32_C( 2.81), SIMDE_FLOAT32_C( 31.72) }, { SIMDE_FLOAT32_C( 4.16), SIMDE_FLOAT32_C( 4.55), SIMDE_FLOAT32_C( 3.48), SIMDE_FLOAT32_C( 3.28), SIMDE_FLOAT32_C( 4.45), SIMDE_FLOAT32_C( 3.68), SIMDE_FLOAT32_C( 3.81), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 4.02), SIMDE_FLOAT32_C( 4.34), SIMDE_FLOAT32_C( 3.55), SIMDE_FLOAT32_C( 4.01), SIMDE_FLOAT32_C( 3.20), SIMDE_FLOAT32_C( 1.34), SIMDE_FLOAT32_C( 3.49) } }, { { SIMDE_FLOAT32_C( 11.44), SIMDE_FLOAT32_C( 32.37), SIMDE_FLOAT32_C( 43.39), SIMDE_FLOAT32_C( 23.72), SIMDE_FLOAT32_C( 78.23), SIMDE_FLOAT32_C( 33.28), SIMDE_FLOAT32_C( 94.45), SIMDE_FLOAT32_C( 18.29), SIMDE_FLOAT32_C( 37.93), SIMDE_FLOAT32_C( 13.45), SIMDE_FLOAT32_C( 27.72), SIMDE_FLOAT32_C( 5.96), SIMDE_FLOAT32_C( 27.05), SIMDE_FLOAT32_C( 26.98), SIMDE_FLOAT32_C( 86.25), SIMDE_FLOAT32_C( 90.07) }, { SIMDE_FLOAT32_C( 2.52), SIMDE_FLOAT32_C( 3.51), SIMDE_FLOAT32_C( 3.79), SIMDE_FLOAT32_C( 3.21), SIMDE_FLOAT32_C( 4.37), SIMDE_FLOAT32_C( 3.53), SIMDE_FLOAT32_C( 4.56), SIMDE_FLOAT32_C( 2.96), SIMDE_FLOAT32_C( 3.66), SIMDE_FLOAT32_C( 2.67), SIMDE_FLOAT32_C( 3.36), SIMDE_FLOAT32_C( 1.94), SIMDE_FLOAT32_C( 3.33), SIMDE_FLOAT32_C( 3.33), SIMDE_FLOAT32_C( 4.47), SIMDE_FLOAT32_C( 4.51) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_log1p_ps(a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_log1p_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[16]; const simde__mmask8 k; const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 12.54), SIMDE_FLOAT32_C( 63.14), SIMDE_FLOAT32_C( 41.17), SIMDE_FLOAT32_C( 60.95), SIMDE_FLOAT32_C( 4.09), SIMDE_FLOAT32_C( 68.78), SIMDE_FLOAT32_C( 40.84), SIMDE_FLOAT32_C( 68.42), SIMDE_FLOAT32_C( 63.18), SIMDE_FLOAT32_C( 48.47), SIMDE_FLOAT32_C( 50.42), SIMDE_FLOAT32_C( 37.77), SIMDE_FLOAT32_C( 19.29), SIMDE_FLOAT32_C( 67.41), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( 31.94) }, UINT8_C(226), { SIMDE_FLOAT32_C( 39.12), SIMDE_FLOAT32_C( 35.10), SIMDE_FLOAT32_C( 9.96), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 31.24), SIMDE_FLOAT32_C( 60.86), SIMDE_FLOAT32_C( 46.96), SIMDE_FLOAT32_C( 34.48), SIMDE_FLOAT32_C( 76.57), SIMDE_FLOAT32_C( 78.00), SIMDE_FLOAT32_C( 14.95), SIMDE_FLOAT32_C( 17.36), SIMDE_FLOAT32_C( 66.84), SIMDE_FLOAT32_C( 3.16), SIMDE_FLOAT32_C( 29.89), SIMDE_FLOAT32_C( 29.98) }, { SIMDE_FLOAT32_C( 12.54), SIMDE_FLOAT32_C( 3.59), SIMDE_FLOAT32_C( 41.17), SIMDE_FLOAT32_C( 60.95), SIMDE_FLOAT32_C( 4.09), SIMDE_FLOAT32_C( 4.12), SIMDE_FLOAT32_C( 3.87), SIMDE_FLOAT32_C( 3.57), SIMDE_FLOAT32_C( 63.18), SIMDE_FLOAT32_C( 48.47), SIMDE_FLOAT32_C( 50.42), SIMDE_FLOAT32_C( 37.77), SIMDE_FLOAT32_C( 19.29), SIMDE_FLOAT32_C( 67.41), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( 31.94) } }, { { SIMDE_FLOAT32_C( 44.33), SIMDE_FLOAT32_C( 90.84), SIMDE_FLOAT32_C( 34.08), SIMDE_FLOAT32_C( 13.10), SIMDE_FLOAT32_C( 31.68), SIMDE_FLOAT32_C( 2.49), SIMDE_FLOAT32_C( 76.28), SIMDE_FLOAT32_C( 80.15), SIMDE_FLOAT32_C( 52.91), SIMDE_FLOAT32_C( 14.05), SIMDE_FLOAT32_C( 99.44), SIMDE_FLOAT32_C( 20.32), SIMDE_FLOAT32_C( 14.75), SIMDE_FLOAT32_C( 31.39), SIMDE_FLOAT32_C( 83.76), SIMDE_FLOAT32_C( 53.87) }, UINT8_C(211), { SIMDE_FLOAT32_C( 93.72), SIMDE_FLOAT32_C( 53.97), SIMDE_FLOAT32_C( 97.73), SIMDE_FLOAT32_C( 54.58), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 32.21), SIMDE_FLOAT32_C( 31.15), SIMDE_FLOAT32_C( 78.93), SIMDE_FLOAT32_C( 47.16), SIMDE_FLOAT32_C( 48.50), SIMDE_FLOAT32_C( 45.77), SIMDE_FLOAT32_C( 50.32), SIMDE_FLOAT32_C( 78.40), SIMDE_FLOAT32_C( 75.75), SIMDE_FLOAT32_C( 94.65), SIMDE_FLOAT32_C( 69.24) }, { SIMDE_FLOAT32_C( 4.55), SIMDE_FLOAT32_C( 4.01), SIMDE_FLOAT32_C( 34.08), SIMDE_FLOAT32_C( 13.10), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 2.49), SIMDE_FLOAT32_C( 3.47), SIMDE_FLOAT32_C( 4.38), SIMDE_FLOAT32_C( 52.91), SIMDE_FLOAT32_C( 14.05), SIMDE_FLOAT32_C( 99.44), SIMDE_FLOAT32_C( 20.32), SIMDE_FLOAT32_C( 14.75), SIMDE_FLOAT32_C( 31.39), SIMDE_FLOAT32_C( 83.76), SIMDE_FLOAT32_C( 53.87) } }, { { SIMDE_FLOAT32_C( 9.83), SIMDE_FLOAT32_C( 7.75), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 12.32), SIMDE_FLOAT32_C( 84.03), SIMDE_FLOAT32_C( 81.06), SIMDE_FLOAT32_C( 65.23), SIMDE_FLOAT32_C( 98.08), SIMDE_FLOAT32_C( 80.51), SIMDE_FLOAT32_C( 85.55), SIMDE_FLOAT32_C( 12.83), SIMDE_FLOAT32_C( 11.90), SIMDE_FLOAT32_C( 69.31), SIMDE_FLOAT32_C( 66.70), SIMDE_FLOAT32_C( 78.39), SIMDE_FLOAT32_C( 63.03) }, UINT8_MAX, { SIMDE_FLOAT32_C( 76.12), SIMDE_FLOAT32_C( 17.61), SIMDE_FLOAT32_C( 21.60), SIMDE_FLOAT32_C( 8.33), SIMDE_FLOAT32_C( 48.76), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 55.49), SIMDE_FLOAT32_C( 97.26), SIMDE_FLOAT32_C( 46.29), SIMDE_FLOAT32_C( 5.81), SIMDE_FLOAT32_C( 75.66), SIMDE_FLOAT32_C( 22.04), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( 44.89), SIMDE_FLOAT32_C( 31.87), SIMDE_FLOAT32_C( 8.21) }, { SIMDE_FLOAT32_C( 4.35), SIMDE_FLOAT32_C( 2.92), SIMDE_FLOAT32_C( 3.12), SIMDE_FLOAT32_C( 2.23), SIMDE_FLOAT32_C( 3.91), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 4.03), SIMDE_FLOAT32_C( 4.59), SIMDE_FLOAT32_C( 80.51), SIMDE_FLOAT32_C( 85.55), SIMDE_FLOAT32_C( 12.83), SIMDE_FLOAT32_C( 11.90), SIMDE_FLOAT32_C( 69.31), SIMDE_FLOAT32_C( 66.70), SIMDE_FLOAT32_C( 78.39), SIMDE_FLOAT32_C( 63.03) } }, { { SIMDE_FLOAT32_C( 45.81), SIMDE_FLOAT32_C( 44.19), SIMDE_FLOAT32_C( 92.24), SIMDE_FLOAT32_C( 26.87), SIMDE_FLOAT32_C( 9.42), SIMDE_FLOAT32_C( 90.33), SIMDE_FLOAT32_C( 7.38), SIMDE_FLOAT32_C( 94.98), SIMDE_FLOAT32_C( 3.16), SIMDE_FLOAT32_C( 19.28), SIMDE_FLOAT32_C( 64.29), SIMDE_FLOAT32_C( 69.86), SIMDE_FLOAT32_C( 97.67), SIMDE_FLOAT32_C( 27.32), SIMDE_FLOAT32_C( 90.53), SIMDE_FLOAT32_C( 73.79) }, UINT8_C(154), { SIMDE_FLOAT32_C( 12.13), SIMDE_FLOAT32_C( 82.12), SIMDE_FLOAT32_C( 93.69), SIMDE_FLOAT32_C( 12.65), SIMDE_FLOAT32_C( 37.62), SIMDE_FLOAT32_C( 90.95), SIMDE_FLOAT32_C( 58.94), SIMDE_FLOAT32_C( 43.43), SIMDE_FLOAT32_C( 66.61), SIMDE_FLOAT32_C( 80.98), SIMDE_FLOAT32_C( 43.89), SIMDE_FLOAT32_C( 11.51), SIMDE_FLOAT32_C( 12.84), SIMDE_FLOAT32_C( 52.10), SIMDE_FLOAT32_C( 57.32), SIMDE_FLOAT32_C( 57.03) }, { SIMDE_FLOAT32_C( 45.81), SIMDE_FLOAT32_C( 4.42), SIMDE_FLOAT32_C( 92.24), SIMDE_FLOAT32_C( 2.61), SIMDE_FLOAT32_C( 3.65), SIMDE_FLOAT32_C( 90.33), SIMDE_FLOAT32_C( 7.38), SIMDE_FLOAT32_C( 3.79), SIMDE_FLOAT32_C( 3.16), SIMDE_FLOAT32_C( 19.28), SIMDE_FLOAT32_C( 64.29), SIMDE_FLOAT32_C( 69.86), SIMDE_FLOAT32_C( 97.67), SIMDE_FLOAT32_C( 27.32), SIMDE_FLOAT32_C( 90.53), SIMDE_FLOAT32_C( 73.79) } }, { { SIMDE_FLOAT32_C( 44.35), SIMDE_FLOAT32_C( 84.19), SIMDE_FLOAT32_C( 66.46), SIMDE_FLOAT32_C( 34.67), SIMDE_FLOAT32_C( 91.58), SIMDE_FLOAT32_C( 61.43), SIMDE_FLOAT32_C( 37.83), SIMDE_FLOAT32_C( 10.85), SIMDE_FLOAT32_C( 25.72), SIMDE_FLOAT32_C( 7.69), SIMDE_FLOAT32_C( 8.52), SIMDE_FLOAT32_C( 53.04), SIMDE_FLOAT32_C( 98.23), SIMDE_FLOAT32_C( 82.31), SIMDE_FLOAT32_C( 97.98), SIMDE_FLOAT32_C( 10.35) }, UINT8_C( 99), { SIMDE_FLOAT32_C( 91.67), SIMDE_FLOAT32_C( 23.00), SIMDE_FLOAT32_C( 2.05), SIMDE_FLOAT32_C( 82.62), SIMDE_FLOAT32_C( 81.94), SIMDE_FLOAT32_C( 45.48), SIMDE_FLOAT32_C( 49.24), SIMDE_FLOAT32_C( 62.91), SIMDE_FLOAT32_C( 89.37), SIMDE_FLOAT32_C( 60.75), SIMDE_FLOAT32_C( 75.76), SIMDE_FLOAT32_C( 41.47), SIMDE_FLOAT32_C( 18.07), SIMDE_FLOAT32_C( 32.79), SIMDE_FLOAT32_C( 85.82), SIMDE_FLOAT32_C( 2.26) }, { SIMDE_FLOAT32_C( 4.53), SIMDE_FLOAT32_C( 3.18), SIMDE_FLOAT32_C( 66.46), SIMDE_FLOAT32_C( 34.67), SIMDE_FLOAT32_C( 91.58), SIMDE_FLOAT32_C( 3.84), SIMDE_FLOAT32_C( 3.92), SIMDE_FLOAT32_C( 10.85), SIMDE_FLOAT32_C( 25.72), SIMDE_FLOAT32_C( 7.69), SIMDE_FLOAT32_C( 8.52), SIMDE_FLOAT32_C( 53.04), SIMDE_FLOAT32_C( 98.23), SIMDE_FLOAT32_C( 82.31), SIMDE_FLOAT32_C( 97.98), SIMDE_FLOAT32_C( 10.35) } }, { { SIMDE_FLOAT32_C( 99.25), SIMDE_FLOAT32_C( 20.49), SIMDE_FLOAT32_C( 93.84), SIMDE_FLOAT32_C( 60.68), SIMDE_FLOAT32_C( 58.33), SIMDE_FLOAT32_C( 4.69), SIMDE_FLOAT32_C( 86.40), SIMDE_FLOAT32_C( 66.02), SIMDE_FLOAT32_C( 13.21), SIMDE_FLOAT32_C( 39.45), SIMDE_FLOAT32_C( 64.25), SIMDE_FLOAT32_C( 95.52), SIMDE_FLOAT32_C( 37.43), SIMDE_FLOAT32_C( 74.60), SIMDE_FLOAT32_C( 59.95), SIMDE_FLOAT32_C( 29.10) }, UINT8_C( 67), { SIMDE_FLOAT32_C( 62.00), SIMDE_FLOAT32_C( 11.72), SIMDE_FLOAT32_C( 79.53), SIMDE_FLOAT32_C( 7.47), SIMDE_FLOAT32_C( 60.96), SIMDE_FLOAT32_C( 42.45), SIMDE_FLOAT32_C( 96.84), SIMDE_FLOAT32_C( 21.71), SIMDE_FLOAT32_C( 18.20), SIMDE_FLOAT32_C( 38.31), SIMDE_FLOAT32_C( 39.77), SIMDE_FLOAT32_C( 50.99), SIMDE_FLOAT32_C( 24.13), SIMDE_FLOAT32_C( 42.03), SIMDE_FLOAT32_C( 50.24), SIMDE_FLOAT32_C( 44.62) }, { SIMDE_FLOAT32_C( 4.14), SIMDE_FLOAT32_C( 2.54), SIMDE_FLOAT32_C( 93.84), SIMDE_FLOAT32_C( 60.68), SIMDE_FLOAT32_C( 58.33), SIMDE_FLOAT32_C( 4.69), SIMDE_FLOAT32_C( 4.58), SIMDE_FLOAT32_C( 66.02), SIMDE_FLOAT32_C( 13.21), SIMDE_FLOAT32_C( 39.45), SIMDE_FLOAT32_C( 64.25), SIMDE_FLOAT32_C( 95.52), SIMDE_FLOAT32_C( 37.43), SIMDE_FLOAT32_C( 74.60), SIMDE_FLOAT32_C( 59.95), SIMDE_FLOAT32_C( 29.10) } }, { { SIMDE_FLOAT32_C( 35.87), SIMDE_FLOAT32_C( 10.92), SIMDE_FLOAT32_C( 2.95), SIMDE_FLOAT32_C( 40.56), SIMDE_FLOAT32_C( 97.33), SIMDE_FLOAT32_C( 68.97), SIMDE_FLOAT32_C( 53.77), SIMDE_FLOAT32_C( 36.78), SIMDE_FLOAT32_C( 33.22), SIMDE_FLOAT32_C( 49.29), SIMDE_FLOAT32_C( 74.20), SIMDE_FLOAT32_C( 7.81), SIMDE_FLOAT32_C( 9.24), SIMDE_FLOAT32_C( 3.30), SIMDE_FLOAT32_C( 5.41), SIMDE_FLOAT32_C( 71.23) }, UINT8_C(113), { SIMDE_FLOAT32_C( 84.95), SIMDE_FLOAT32_C( 78.70), SIMDE_FLOAT32_C( 75.98), SIMDE_FLOAT32_C( 27.40), SIMDE_FLOAT32_C( 75.54), SIMDE_FLOAT32_C( 97.69), SIMDE_FLOAT32_C( 45.60), SIMDE_FLOAT32_C( 13.85), SIMDE_FLOAT32_C( 37.46), SIMDE_FLOAT32_C( 96.59), SIMDE_FLOAT32_C( 37.98), SIMDE_FLOAT32_C( 79.49), SIMDE_FLOAT32_C( 46.83), SIMDE_FLOAT32_C( 82.60), SIMDE_FLOAT32_C( 15.36), SIMDE_FLOAT32_C( 57.76) }, { SIMDE_FLOAT32_C( 4.45), SIMDE_FLOAT32_C( 10.92), SIMDE_FLOAT32_C( 2.95), SIMDE_FLOAT32_C( 40.56), SIMDE_FLOAT32_C( 4.34), SIMDE_FLOAT32_C( 4.59), SIMDE_FLOAT32_C( 3.84), SIMDE_FLOAT32_C( 36.78), SIMDE_FLOAT32_C( 33.22), SIMDE_FLOAT32_C( 49.29), SIMDE_FLOAT32_C( 74.20), SIMDE_FLOAT32_C( 7.81), SIMDE_FLOAT32_C( 9.24), SIMDE_FLOAT32_C( 3.30), SIMDE_FLOAT32_C( 5.41), SIMDE_FLOAT32_C( 71.23) } }, { { SIMDE_FLOAT32_C( 85.55), SIMDE_FLOAT32_C( 55.92), SIMDE_FLOAT32_C( 55.08), SIMDE_FLOAT32_C( 54.52), SIMDE_FLOAT32_C( 9.69), SIMDE_FLOAT32_C( 91.86), SIMDE_FLOAT32_C( 87.73), SIMDE_FLOAT32_C( 58.97), SIMDE_FLOAT32_C( 66.07), SIMDE_FLOAT32_C( 95.55), SIMDE_FLOAT32_C( 68.21), SIMDE_FLOAT32_C( 69.37), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 39.44), SIMDE_FLOAT32_C( 84.39), SIMDE_FLOAT32_C( 85.91) }, UINT8_C(146), { SIMDE_FLOAT32_C( 60.37), SIMDE_FLOAT32_C( 13.30), SIMDE_FLOAT32_C( 93.69), SIMDE_FLOAT32_C( 58.06), SIMDE_FLOAT32_C( 58.90), SIMDE_FLOAT32_C( 7.54), SIMDE_FLOAT32_C( 95.52), SIMDE_FLOAT32_C( 55.49), SIMDE_FLOAT32_C( 45.52), SIMDE_FLOAT32_C( 75.01), SIMDE_FLOAT32_C( 2.33), SIMDE_FLOAT32_C( 28.12), SIMDE_FLOAT32_C( 90.38), SIMDE_FLOAT32_C( 60.08), SIMDE_FLOAT32_C( 13.67), SIMDE_FLOAT32_C( 46.30) }, { SIMDE_FLOAT32_C( 85.55), SIMDE_FLOAT32_C( 2.66), SIMDE_FLOAT32_C( 55.08), SIMDE_FLOAT32_C( 54.52), SIMDE_FLOAT32_C( 4.09), SIMDE_FLOAT32_C( 91.86), SIMDE_FLOAT32_C( 87.73), SIMDE_FLOAT32_C( 4.03), SIMDE_FLOAT32_C( 66.07), SIMDE_FLOAT32_C( 95.55), SIMDE_FLOAT32_C( 68.21), SIMDE_FLOAT32_C( 69.37), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 39.44), SIMDE_FLOAT32_C( 84.39), SIMDE_FLOAT32_C( 85.91) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_mask_log1p_ps(src, test_vec[i].k, a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_log1p_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 80.88), SIMDE_FLOAT64_C( 97.21), SIMDE_FLOAT64_C( 22.72), SIMDE_FLOAT64_C( 88.57), SIMDE_FLOAT64_C( 7.11), SIMDE_FLOAT64_C( 33.20), SIMDE_FLOAT64_C( 0.91), SIMDE_FLOAT64_C( 68.60) }, { SIMDE_FLOAT64_C( 4.41), SIMDE_FLOAT64_C( 4.59), SIMDE_FLOAT64_C( 3.17), SIMDE_FLOAT64_C( 4.50), SIMDE_FLOAT64_C( 2.09), SIMDE_FLOAT64_C( 3.53), SIMDE_FLOAT64_C( 0.65), SIMDE_FLOAT64_C( 4.24) } }, { { SIMDE_FLOAT64_C( 18.60), SIMDE_FLOAT64_C( 97.22), SIMDE_FLOAT64_C( 36.01), SIMDE_FLOAT64_C( 5.77), SIMDE_FLOAT64_C( 37.64), SIMDE_FLOAT64_C( 8.06), SIMDE_FLOAT64_C( 89.11), SIMDE_FLOAT64_C( 35.34) }, { SIMDE_FLOAT64_C( 2.98), SIMDE_FLOAT64_C( 4.59), SIMDE_FLOAT64_C( 3.61), SIMDE_FLOAT64_C( 1.91), SIMDE_FLOAT64_C( 3.65), SIMDE_FLOAT64_C( 2.20), SIMDE_FLOAT64_C( 4.50), SIMDE_FLOAT64_C( 3.59) } }, { { SIMDE_FLOAT64_C( 29.67), SIMDE_FLOAT64_C( 90.68), SIMDE_FLOAT64_C( 39.64), SIMDE_FLOAT64_C( 62.60), SIMDE_FLOAT64_C( 75.54), SIMDE_FLOAT64_C( 10.18), SIMDE_FLOAT64_C( 92.73), SIMDE_FLOAT64_C( 94.58) }, { SIMDE_FLOAT64_C( 3.42), SIMDE_FLOAT64_C( 4.52), SIMDE_FLOAT64_C( 3.70), SIMDE_FLOAT64_C( 4.15), SIMDE_FLOAT64_C( 4.34), SIMDE_FLOAT64_C( 2.41), SIMDE_FLOAT64_C( 4.54), SIMDE_FLOAT64_C( 4.56) } }, { { SIMDE_FLOAT64_C( 76.16), SIMDE_FLOAT64_C( 5.81), SIMDE_FLOAT64_C( 62.23), SIMDE_FLOAT64_C( 5.12), SIMDE_FLOAT64_C( 77.73), SIMDE_FLOAT64_C( 84.72), SIMDE_FLOAT64_C( 14.00), SIMDE_FLOAT64_C( 58.61) }, { SIMDE_FLOAT64_C( 4.35), SIMDE_FLOAT64_C( 1.92), SIMDE_FLOAT64_C( 4.15), SIMDE_FLOAT64_C( 1.81), SIMDE_FLOAT64_C( 4.37), SIMDE_FLOAT64_C( 4.45), SIMDE_FLOAT64_C( 2.71), SIMDE_FLOAT64_C( 4.09) } }, { { SIMDE_FLOAT64_C( 81.93), SIMDE_FLOAT64_C( 36.72), SIMDE_FLOAT64_C( 47.19), SIMDE_FLOAT64_C( 89.04), SIMDE_FLOAT64_C( 69.92), SIMDE_FLOAT64_C( 48.10), SIMDE_FLOAT64_C( 57.64), SIMDE_FLOAT64_C( 88.52) }, { SIMDE_FLOAT64_C( 4.42), SIMDE_FLOAT64_C( 3.63), SIMDE_FLOAT64_C( 3.88), SIMDE_FLOAT64_C( 4.50), SIMDE_FLOAT64_C( 4.26), SIMDE_FLOAT64_C( 3.89), SIMDE_FLOAT64_C( 4.07), SIMDE_FLOAT64_C( 4.49) } }, { { SIMDE_FLOAT64_C( 45.32), SIMDE_FLOAT64_C( 93.65), SIMDE_FLOAT64_C( 94.30), SIMDE_FLOAT64_C( 82.96), SIMDE_FLOAT64_C( 1.71), SIMDE_FLOAT64_C( 83.41), SIMDE_FLOAT64_C( 18.30), SIMDE_FLOAT64_C( 31.38) }, { SIMDE_FLOAT64_C( 3.84), SIMDE_FLOAT64_C( 4.55), SIMDE_FLOAT64_C( 4.56), SIMDE_FLOAT64_C( 4.43), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 4.44), SIMDE_FLOAT64_C( 2.96), SIMDE_FLOAT64_C( 3.48) } }, { { SIMDE_FLOAT64_C( 74.09), SIMDE_FLOAT64_C( 57.95), SIMDE_FLOAT64_C( 93.98), SIMDE_FLOAT64_C( 49.63), SIMDE_FLOAT64_C( 68.12), SIMDE_FLOAT64_C( 86.71), SIMDE_FLOAT64_C( 44.21), SIMDE_FLOAT64_C( 44.28) }, { SIMDE_FLOAT64_C( 4.32), SIMDE_FLOAT64_C( 4.08), SIMDE_FLOAT64_C( 4.55), SIMDE_FLOAT64_C( 3.92), SIMDE_FLOAT64_C( 4.24), SIMDE_FLOAT64_C( 4.47), SIMDE_FLOAT64_C( 3.81), SIMDE_FLOAT64_C( 3.81) } }, { { SIMDE_FLOAT64_C( 92.51), SIMDE_FLOAT64_C( 6.45), SIMDE_FLOAT64_C( 49.40), SIMDE_FLOAT64_C( 70.25), SIMDE_FLOAT64_C( 91.16), SIMDE_FLOAT64_C( 63.40), SIMDE_FLOAT64_C( 28.86), SIMDE_FLOAT64_C( 73.09) }, { SIMDE_FLOAT64_C( 4.54), SIMDE_FLOAT64_C( 2.01), SIMDE_FLOAT64_C( 3.92), SIMDE_FLOAT64_C( 4.27), SIMDE_FLOAT64_C( 4.52), SIMDE_FLOAT64_C( 4.17), SIMDE_FLOAT64_C( 3.40), SIMDE_FLOAT64_C( 4.31) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_log1p_pd(a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_log1p_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 52.74), SIMDE_FLOAT64_C( 77.28), SIMDE_FLOAT64_C( 34.34), SIMDE_FLOAT64_C( 52.30), SIMDE_FLOAT64_C( 78.12), SIMDE_FLOAT64_C( 51.61), SIMDE_FLOAT64_C( 6.35), SIMDE_FLOAT64_C( 45.83) }, UINT8_C( 39), { SIMDE_FLOAT64_C( 43.10), SIMDE_FLOAT64_C( 47.48), SIMDE_FLOAT64_C( 21.67), SIMDE_FLOAT64_C( 82.04), SIMDE_FLOAT64_C( 40.45), SIMDE_FLOAT64_C( 94.76), SIMDE_FLOAT64_C( 61.37), SIMDE_FLOAT64_C( 11.74) }, { SIMDE_FLOAT64_C( 3.79), SIMDE_FLOAT64_C( 3.88), SIMDE_FLOAT64_C( 3.12), SIMDE_FLOAT64_C( 52.30), SIMDE_FLOAT64_C( 78.12), SIMDE_FLOAT64_C( 4.56), SIMDE_FLOAT64_C( 6.35), SIMDE_FLOAT64_C( 45.83) } }, { { SIMDE_FLOAT64_C( 1.10), SIMDE_FLOAT64_C( 18.75), SIMDE_FLOAT64_C( 3.08), SIMDE_FLOAT64_C( 98.55), SIMDE_FLOAT64_C( 92.65), SIMDE_FLOAT64_C( 11.89), SIMDE_FLOAT64_C( 24.76), SIMDE_FLOAT64_C( 36.96) }, UINT8_C(244), { SIMDE_FLOAT64_C( 46.12), SIMDE_FLOAT64_C( 85.44), SIMDE_FLOAT64_C( 4.83), SIMDE_FLOAT64_C( 24.72), SIMDE_FLOAT64_C( 98.67), SIMDE_FLOAT64_C( 57.57), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 33.01) }, { SIMDE_FLOAT64_C( 1.10), SIMDE_FLOAT64_C( 18.75), SIMDE_FLOAT64_C( 1.76), SIMDE_FLOAT64_C( 98.55), SIMDE_FLOAT64_C( 4.60), SIMDE_FLOAT64_C( 4.07), SIMDE_FLOAT64_C( 1.10), SIMDE_FLOAT64_C( 3.53) } }, { { SIMDE_FLOAT64_C( 9.87), SIMDE_FLOAT64_C( 80.12), SIMDE_FLOAT64_C( 84.62), SIMDE_FLOAT64_C( 16.22), SIMDE_FLOAT64_C( 25.95), SIMDE_FLOAT64_C( 41.00), SIMDE_FLOAT64_C( 59.31), SIMDE_FLOAT64_C( 73.43) }, UINT8_C( 77), { SIMDE_FLOAT64_C( 41.35), SIMDE_FLOAT64_C( 13.88), SIMDE_FLOAT64_C( 57.44), SIMDE_FLOAT64_C( 2.72), SIMDE_FLOAT64_C( 25.62), SIMDE_FLOAT64_C( 58.53), SIMDE_FLOAT64_C( 21.47), SIMDE_FLOAT64_C( 28.69) }, { SIMDE_FLOAT64_C( 3.75), SIMDE_FLOAT64_C( 80.12), SIMDE_FLOAT64_C( 4.07), SIMDE_FLOAT64_C( 1.31), SIMDE_FLOAT64_C( 25.95), SIMDE_FLOAT64_C( 41.00), SIMDE_FLOAT64_C( 3.11), SIMDE_FLOAT64_C( 73.43) } }, { { SIMDE_FLOAT64_C( 57.09), SIMDE_FLOAT64_C( 14.11), SIMDE_FLOAT64_C( 40.58), SIMDE_FLOAT64_C( 81.85), SIMDE_FLOAT64_C( 51.08), SIMDE_FLOAT64_C( 0.58), SIMDE_FLOAT64_C( 27.97), SIMDE_FLOAT64_C( 36.52) }, UINT8_C(164), { SIMDE_FLOAT64_C( 52.69), SIMDE_FLOAT64_C( 35.19), SIMDE_FLOAT64_C( 62.99), SIMDE_FLOAT64_C( 54.69), SIMDE_FLOAT64_C( 68.20), SIMDE_FLOAT64_C( 72.85), SIMDE_FLOAT64_C( 34.81), SIMDE_FLOAT64_C( 52.82) }, { SIMDE_FLOAT64_C( 57.09), SIMDE_FLOAT64_C( 14.11), SIMDE_FLOAT64_C( 4.16), SIMDE_FLOAT64_C( 81.85), SIMDE_FLOAT64_C( 51.08), SIMDE_FLOAT64_C( 4.30), SIMDE_FLOAT64_C( 27.97), SIMDE_FLOAT64_C( 3.99) } }, { { SIMDE_FLOAT64_C( 89.07), SIMDE_FLOAT64_C( 60.76), SIMDE_FLOAT64_C( 93.82), SIMDE_FLOAT64_C( 48.38), SIMDE_FLOAT64_C( 34.19), SIMDE_FLOAT64_C( 56.49), SIMDE_FLOAT64_C( 89.74), SIMDE_FLOAT64_C( 48.07) }, UINT8_C( 13), { SIMDE_FLOAT64_C( 92.46), SIMDE_FLOAT64_C( 73.68), SIMDE_FLOAT64_C( 72.46), SIMDE_FLOAT64_C( 13.92), SIMDE_FLOAT64_C( 2.38), SIMDE_FLOAT64_C( 29.55), SIMDE_FLOAT64_C( 28.03), SIMDE_FLOAT64_C( 42.96) }, { SIMDE_FLOAT64_C( 4.54), SIMDE_FLOAT64_C( 60.76), SIMDE_FLOAT64_C( 4.30), SIMDE_FLOAT64_C( 2.70), SIMDE_FLOAT64_C( 34.19), SIMDE_FLOAT64_C( 56.49), SIMDE_FLOAT64_C( 89.74), SIMDE_FLOAT64_C( 48.07) } }, { { SIMDE_FLOAT64_C( 11.40), SIMDE_FLOAT64_C( 79.11), SIMDE_FLOAT64_C( 43.54), SIMDE_FLOAT64_C( 39.37), SIMDE_FLOAT64_C( 15.63), SIMDE_FLOAT64_C( 48.95), SIMDE_FLOAT64_C( 92.06), SIMDE_FLOAT64_C( 50.82) }, UINT8_C( 26), { SIMDE_FLOAT64_C( 46.75), SIMDE_FLOAT64_C( 19.02), SIMDE_FLOAT64_C( 84.79), SIMDE_FLOAT64_C( 81.56), SIMDE_FLOAT64_C( 71.83), SIMDE_FLOAT64_C( 73.86), SIMDE_FLOAT64_C( 42.33), SIMDE_FLOAT64_C( 65.65) }, { SIMDE_FLOAT64_C( 11.40), SIMDE_FLOAT64_C( 3.00), SIMDE_FLOAT64_C( 43.54), SIMDE_FLOAT64_C( 4.41), SIMDE_FLOAT64_C( 4.29), SIMDE_FLOAT64_C( 48.95), SIMDE_FLOAT64_C( 92.06), SIMDE_FLOAT64_C( 50.82) } }, { { SIMDE_FLOAT64_C( 22.25), SIMDE_FLOAT64_C( 76.52), SIMDE_FLOAT64_C( 22.14), SIMDE_FLOAT64_C( 11.98), SIMDE_FLOAT64_C( 24.58), SIMDE_FLOAT64_C( 36.07), SIMDE_FLOAT64_C( 4.44), SIMDE_FLOAT64_C( 98.27) }, UINT8_C(254), { SIMDE_FLOAT64_C( 18.36), SIMDE_FLOAT64_C( 0.64), SIMDE_FLOAT64_C( 38.07), SIMDE_FLOAT64_C( 46.40), SIMDE_FLOAT64_C( 43.60), SIMDE_FLOAT64_C( 49.47), SIMDE_FLOAT64_C( 25.51), SIMDE_FLOAT64_C( 87.14) }, { SIMDE_FLOAT64_C( 22.25), SIMDE_FLOAT64_C( 0.49), SIMDE_FLOAT64_C( 3.67), SIMDE_FLOAT64_C( 3.86), SIMDE_FLOAT64_C( 3.80), SIMDE_FLOAT64_C( 3.92), SIMDE_FLOAT64_C( 3.28), SIMDE_FLOAT64_C( 4.48) } }, { { SIMDE_FLOAT64_C( 88.84), SIMDE_FLOAT64_C( 41.14), SIMDE_FLOAT64_C( 36.09), SIMDE_FLOAT64_C( 80.90), SIMDE_FLOAT64_C( 91.96), SIMDE_FLOAT64_C( 48.03), SIMDE_FLOAT64_C( 27.65), SIMDE_FLOAT64_C( 10.98) }, UINT8_C(171), { SIMDE_FLOAT64_C( 9.21), SIMDE_FLOAT64_C( 82.81), SIMDE_FLOAT64_C( 6.69), SIMDE_FLOAT64_C( 51.54), SIMDE_FLOAT64_C( 48.46), SIMDE_FLOAT64_C( 28.94), SIMDE_FLOAT64_C( 28.06), SIMDE_FLOAT64_C( 70.60) }, { SIMDE_FLOAT64_C( 2.32), SIMDE_FLOAT64_C( 4.43), SIMDE_FLOAT64_C( 36.09), SIMDE_FLOAT64_C( 3.96), SIMDE_FLOAT64_C( 91.96), SIMDE_FLOAT64_C( 3.40), SIMDE_FLOAT64_C( 27.65), SIMDE_FLOAT64_C( 4.27) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_mask_log1p_pd(src, test_vec[i].k, a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_log2_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 631.47), SIMDE_FLOAT32_C( 844.23), SIMDE_FLOAT32_C( 439.63), SIMDE_FLOAT32_C( 13.01) }, { SIMDE_FLOAT32_C( 9.30), SIMDE_FLOAT32_C( 9.72), SIMDE_FLOAT32_C( 8.78), SIMDE_FLOAT32_C( 3.70) } }, { { SIMDE_FLOAT32_C( 66.81), SIMDE_FLOAT32_C( 88.82), SIMDE_FLOAT32_C( 350.44), SIMDE_FLOAT32_C( 636.52) }, { SIMDE_FLOAT32_C( 6.06), SIMDE_FLOAT32_C( 6.47), SIMDE_FLOAT32_C( 8.45), SIMDE_FLOAT32_C( 9.31) } }, { { SIMDE_FLOAT32_C( 636.53), SIMDE_FLOAT32_C( 411.53), SIMDE_FLOAT32_C( 396.60), SIMDE_FLOAT32_C( 131.18) }, { SIMDE_FLOAT32_C( 9.31), SIMDE_FLOAT32_C( 8.68), SIMDE_FLOAT32_C( 8.63), SIMDE_FLOAT32_C( 7.04) } }, { { SIMDE_FLOAT32_C( 749.84), SIMDE_FLOAT32_C( 385.14), SIMDE_FLOAT32_C( 384.93), SIMDE_FLOAT32_C( 165.27) }, { SIMDE_FLOAT32_C( 9.55), SIMDE_FLOAT32_C( 8.59), SIMDE_FLOAT32_C( 8.59), SIMDE_FLOAT32_C( 7.37) } }, { { SIMDE_FLOAT32_C( 246.49), SIMDE_FLOAT32_C( 520.56), SIMDE_FLOAT32_C( 778.62), SIMDE_FLOAT32_C( 71.34) }, { SIMDE_FLOAT32_C( 7.95), SIMDE_FLOAT32_C( 9.02), SIMDE_FLOAT32_C( 9.60), SIMDE_FLOAT32_C( 6.16) } }, { { SIMDE_FLOAT32_C( 946.80), SIMDE_FLOAT32_C( 380.92), SIMDE_FLOAT32_C( 894.84), SIMDE_FLOAT32_C( 902.24) }, { SIMDE_FLOAT32_C( 9.89), SIMDE_FLOAT32_C( 8.57), SIMDE_FLOAT32_C( 9.81), SIMDE_FLOAT32_C( 9.82) } }, { { SIMDE_FLOAT32_C( 574.27), SIMDE_FLOAT32_C( 214.93), SIMDE_FLOAT32_C( 953.03), SIMDE_FLOAT32_C( 638.26) }, { SIMDE_FLOAT32_C( 9.17), SIMDE_FLOAT32_C( 7.75), SIMDE_FLOAT32_C( 9.90), SIMDE_FLOAT32_C( 9.32) } }, { { SIMDE_FLOAT32_C( 991.13), SIMDE_FLOAT32_C( 188.32), SIMDE_FLOAT32_C( 949.37), SIMDE_FLOAT32_C( 622.60) }, { SIMDE_FLOAT32_C( 9.95), SIMDE_FLOAT32_C( 7.56), SIMDE_FLOAT32_C( 9.89), SIMDE_FLOAT32_C( 9.28) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_log2_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_log2_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 982.90), SIMDE_FLOAT64_C( 619.50) }, { SIMDE_FLOAT64_C( 9.94), SIMDE_FLOAT64_C( 9.27) } }, { { SIMDE_FLOAT64_C( 102.39), SIMDE_FLOAT64_C( 923.09) }, { SIMDE_FLOAT64_C( 6.68), SIMDE_FLOAT64_C( 9.85) } }, { { SIMDE_FLOAT64_C( 243.48), SIMDE_FLOAT64_C( 494.45) }, { SIMDE_FLOAT64_C( 7.93), SIMDE_FLOAT64_C( 8.95) } }, { { SIMDE_FLOAT64_C( 45.35), SIMDE_FLOAT64_C( 416.91) }, { SIMDE_FLOAT64_C( 5.50), SIMDE_FLOAT64_C( 8.70) } }, { { SIMDE_FLOAT64_C( 259.45), SIMDE_FLOAT64_C( 290.22) }, { SIMDE_FLOAT64_C( 8.02), SIMDE_FLOAT64_C( 8.18) } }, { { SIMDE_FLOAT64_C( 923.80), SIMDE_FLOAT64_C( 970.52) }, { SIMDE_FLOAT64_C( 9.85), SIMDE_FLOAT64_C( 9.92) } }, { { SIMDE_FLOAT64_C( 646.50), SIMDE_FLOAT64_C( 264.22) }, { SIMDE_FLOAT64_C( 9.34), SIMDE_FLOAT64_C( 8.05) } }, { { SIMDE_FLOAT64_C( 634.41), SIMDE_FLOAT64_C( 510.63) }, { SIMDE_FLOAT64_C( 9.31), SIMDE_FLOAT64_C( 9.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d r = simde_mm_log2_pd(a); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_log2_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 889.40), SIMDE_FLOAT32_C( 779.21), SIMDE_FLOAT32_C( 198.92), SIMDE_FLOAT32_C( 945.28), SIMDE_FLOAT32_C( 42.71), SIMDE_FLOAT32_C( 341.50), SIMDE_FLOAT32_C( 958.60), SIMDE_FLOAT32_C( 736.56) }, { SIMDE_FLOAT32_C( 9.80), SIMDE_FLOAT32_C( 9.61), SIMDE_FLOAT32_C( 7.64), SIMDE_FLOAT32_C( 9.88), SIMDE_FLOAT32_C( 5.42), SIMDE_FLOAT32_C( 8.42), SIMDE_FLOAT32_C( 9.90), SIMDE_FLOAT32_C( 9.52) } }, { { SIMDE_FLOAT32_C( 74.89), SIMDE_FLOAT32_C( 979.36), SIMDE_FLOAT32_C( 587.94), SIMDE_FLOAT32_C( 960.37), SIMDE_FLOAT32_C( 497.73), SIMDE_FLOAT32_C( 286.82), SIMDE_FLOAT32_C( 507.33), SIMDE_FLOAT32_C( 616.64) }, { SIMDE_FLOAT32_C( 6.23), SIMDE_FLOAT32_C( 9.94), SIMDE_FLOAT32_C( 9.20), SIMDE_FLOAT32_C( 9.91), SIMDE_FLOAT32_C( 8.96), SIMDE_FLOAT32_C( 8.16), SIMDE_FLOAT32_C( 8.99), SIMDE_FLOAT32_C( 9.27) } }, { { SIMDE_FLOAT32_C( 307.44), SIMDE_FLOAT32_C( 437.70), SIMDE_FLOAT32_C( 685.73), SIMDE_FLOAT32_C( 291.17), SIMDE_FLOAT32_C( 840.55), SIMDE_FLOAT32_C( 438.07), SIMDE_FLOAT32_C( 676.25), SIMDE_FLOAT32_C( 160.97) }, { SIMDE_FLOAT32_C( 8.26), SIMDE_FLOAT32_C( 8.77), SIMDE_FLOAT32_C( 9.42), SIMDE_FLOAT32_C( 8.19), SIMDE_FLOAT32_C( 9.72), SIMDE_FLOAT32_C( 8.78), SIMDE_FLOAT32_C( 9.40), SIMDE_FLOAT32_C( 7.33) } }, { { SIMDE_FLOAT32_C( 788.67), SIMDE_FLOAT32_C( 843.13), SIMDE_FLOAT32_C( 381.11), SIMDE_FLOAT32_C( 499.16), SIMDE_FLOAT32_C( 309.83), SIMDE_FLOAT32_C( 369.53), SIMDE_FLOAT32_C( 957.38), SIMDE_FLOAT32_C( 199.23) }, { SIMDE_FLOAT32_C( 9.62), SIMDE_FLOAT32_C( 9.72), SIMDE_FLOAT32_C( 8.57), SIMDE_FLOAT32_C( 8.96), SIMDE_FLOAT32_C( 8.28), SIMDE_FLOAT32_C( 8.53), SIMDE_FLOAT32_C( 9.90), SIMDE_FLOAT32_C( 7.64) } }, { { SIMDE_FLOAT32_C( 148.75), SIMDE_FLOAT32_C( 156.30), SIMDE_FLOAT32_C( 144.51), SIMDE_FLOAT32_C( 191.45), SIMDE_FLOAT32_C( 497.81), SIMDE_FLOAT32_C( 103.11), SIMDE_FLOAT32_C( 928.02), SIMDE_FLOAT32_C( 572.70) }, { SIMDE_FLOAT32_C( 7.22), SIMDE_FLOAT32_C( 7.29), SIMDE_FLOAT32_C( 7.18), SIMDE_FLOAT32_C( 7.58), SIMDE_FLOAT32_C( 8.96), SIMDE_FLOAT32_C( 6.69), SIMDE_FLOAT32_C( 9.86), SIMDE_FLOAT32_C( 9.16) } }, { { SIMDE_FLOAT32_C( 82.46), SIMDE_FLOAT32_C( 515.95), SIMDE_FLOAT32_C( 533.07), SIMDE_FLOAT32_C( 580.19), SIMDE_FLOAT32_C( 802.77), SIMDE_FLOAT32_C( 40.40), SIMDE_FLOAT32_C( 196.83), SIMDE_FLOAT32_C( 110.21) }, { SIMDE_FLOAT32_C( 6.37), SIMDE_FLOAT32_C( 9.01), SIMDE_FLOAT32_C( 9.06), SIMDE_FLOAT32_C( 9.18), SIMDE_FLOAT32_C( 9.65), SIMDE_FLOAT32_C( 5.34), SIMDE_FLOAT32_C( 7.62), SIMDE_FLOAT32_C( 6.78) } }, { { SIMDE_FLOAT32_C( 478.10), SIMDE_FLOAT32_C( 882.57), SIMDE_FLOAT32_C( 401.38), SIMDE_FLOAT32_C( 318.65), SIMDE_FLOAT32_C( 320.63), SIMDE_FLOAT32_C( 77.63), SIMDE_FLOAT32_C( 479.61), SIMDE_FLOAT32_C( 109.31) }, { SIMDE_FLOAT32_C( 8.90), SIMDE_FLOAT32_C( 9.79), SIMDE_FLOAT32_C( 8.65), SIMDE_FLOAT32_C( 8.32), SIMDE_FLOAT32_C( 8.32), SIMDE_FLOAT32_C( 6.28), SIMDE_FLOAT32_C( 8.91), SIMDE_FLOAT32_C( 6.77) } }, { { SIMDE_FLOAT32_C( 920.76), SIMDE_FLOAT32_C( 860.72), SIMDE_FLOAT32_C( 608.46), SIMDE_FLOAT32_C( 230.59), SIMDE_FLOAT32_C( 230.26), SIMDE_FLOAT32_C( 565.84), SIMDE_FLOAT32_C( 429.82), SIMDE_FLOAT32_C( 379.00) }, { SIMDE_FLOAT32_C( 9.85), SIMDE_FLOAT32_C( 9.75), SIMDE_FLOAT32_C( 9.25), SIMDE_FLOAT32_C( 7.85), SIMDE_FLOAT32_C( 7.85), SIMDE_FLOAT32_C( 9.14), SIMDE_FLOAT32_C( 8.75), SIMDE_FLOAT32_C( 8.57) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_log2_ps(a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_log2_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( 760.38), SIMDE_FLOAT64_C( 341.71), SIMDE_FLOAT64_C( 933.93), SIMDE_FLOAT64_C( 964.91) }, { SIMDE_FLOAT64_C( 9.57), SIMDE_FLOAT64_C( 8.42), SIMDE_FLOAT64_C( 9.87), SIMDE_FLOAT64_C( 9.91) } }, { { SIMDE_FLOAT64_C( 115.25), SIMDE_FLOAT64_C( 77.12), SIMDE_FLOAT64_C( 667.61), SIMDE_FLOAT64_C( 365.22) }, { SIMDE_FLOAT64_C( 6.85), SIMDE_FLOAT64_C( 6.27), SIMDE_FLOAT64_C( 9.38), SIMDE_FLOAT64_C( 8.51) } }, { { SIMDE_FLOAT64_C( 679.91), SIMDE_FLOAT64_C( 892.57), SIMDE_FLOAT64_C( 787.62), SIMDE_FLOAT64_C( 588.83) }, { SIMDE_FLOAT64_C( 9.41), SIMDE_FLOAT64_C( 9.80), SIMDE_FLOAT64_C( 9.62), SIMDE_FLOAT64_C( 9.20) } }, { { SIMDE_FLOAT64_C( 30.55), SIMDE_FLOAT64_C( 713.90), SIMDE_FLOAT64_C( 332.19), SIMDE_FLOAT64_C( 616.75) }, { SIMDE_FLOAT64_C( 4.93), SIMDE_FLOAT64_C( 9.48), SIMDE_FLOAT64_C( 8.38), SIMDE_FLOAT64_C( 9.27) } }, { { SIMDE_FLOAT64_C( 183.75), SIMDE_FLOAT64_C( 550.51), SIMDE_FLOAT64_C( 693.58), SIMDE_FLOAT64_C( 893.18) }, { SIMDE_FLOAT64_C( 7.52), SIMDE_FLOAT64_C( 9.10), SIMDE_FLOAT64_C( 9.44), SIMDE_FLOAT64_C( 9.80) } }, { { SIMDE_FLOAT64_C( 430.95), SIMDE_FLOAT64_C( 320.69), SIMDE_FLOAT64_C( 576.89), SIMDE_FLOAT64_C( 863.61) }, { SIMDE_FLOAT64_C( 8.75), SIMDE_FLOAT64_C( 8.33), SIMDE_FLOAT64_C( 9.17), SIMDE_FLOAT64_C( 9.75) } }, { { SIMDE_FLOAT64_C( 830.18), SIMDE_FLOAT64_C( 881.23), SIMDE_FLOAT64_C( 596.73), SIMDE_FLOAT64_C( 514.46) }, { SIMDE_FLOAT64_C( 9.70), SIMDE_FLOAT64_C( 9.78), SIMDE_FLOAT64_C( 9.22), SIMDE_FLOAT64_C( 9.01) } }, { { SIMDE_FLOAT64_C( 253.95), SIMDE_FLOAT64_C( 753.04), SIMDE_FLOAT64_C( 535.98), SIMDE_FLOAT64_C( 14.32) }, { SIMDE_FLOAT64_C( 7.99), SIMDE_FLOAT64_C( 9.56), SIMDE_FLOAT64_C( 9.07), SIMDE_FLOAT64_C( 3.84) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d r = simde_mm256_log2_pd(a); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_log2_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 483.98), SIMDE_FLOAT32_C( 550.49), SIMDE_FLOAT32_C( 612.79), SIMDE_FLOAT32_C( 652.36), SIMDE_FLOAT32_C( 702.86), SIMDE_FLOAT32_C( 993.84), SIMDE_FLOAT32_C( 608.42), SIMDE_FLOAT32_C( 923.16), SIMDE_FLOAT32_C( 531.91), SIMDE_FLOAT32_C( 675.78), SIMDE_FLOAT32_C( 571.39), SIMDE_FLOAT32_C( 422.11), SIMDE_FLOAT32_C( 520.20), SIMDE_FLOAT32_C( 536.40), SIMDE_FLOAT32_C( 462.32), SIMDE_FLOAT32_C( 841.06) }, { SIMDE_FLOAT32_C( 8.92), SIMDE_FLOAT32_C( 9.10), SIMDE_FLOAT32_C( 9.26), SIMDE_FLOAT32_C( 9.35), SIMDE_FLOAT32_C( 9.46), SIMDE_FLOAT32_C( 9.96), SIMDE_FLOAT32_C( 9.25), SIMDE_FLOAT32_C( 9.85), SIMDE_FLOAT32_C( 9.06), SIMDE_FLOAT32_C( 9.40), SIMDE_FLOAT32_C( 9.16), SIMDE_FLOAT32_C( 8.72), SIMDE_FLOAT32_C( 9.02), SIMDE_FLOAT32_C( 9.07), SIMDE_FLOAT32_C( 8.85), SIMDE_FLOAT32_C( 9.72) } }, { { SIMDE_FLOAT32_C( 513.13), SIMDE_FLOAT32_C( 741.74), SIMDE_FLOAT32_C( 931.43), SIMDE_FLOAT32_C( 670.23), SIMDE_FLOAT32_C( 393.50), SIMDE_FLOAT32_C( 862.99), SIMDE_FLOAT32_C( 343.67), SIMDE_FLOAT32_C( 818.00), SIMDE_FLOAT32_C( 637.20), SIMDE_FLOAT32_C( 123.18), SIMDE_FLOAT32_C( 888.07), SIMDE_FLOAT32_C( 327.64), SIMDE_FLOAT32_C( 438.36), SIMDE_FLOAT32_C( 579.84), SIMDE_FLOAT32_C( 783.89), SIMDE_FLOAT32_C( 922.33) }, { SIMDE_FLOAT32_C( 9.00), SIMDE_FLOAT32_C( 9.53), SIMDE_FLOAT32_C( 9.86), SIMDE_FLOAT32_C( 9.39), SIMDE_FLOAT32_C( 8.62), SIMDE_FLOAT32_C( 9.75), SIMDE_FLOAT32_C( 8.42), SIMDE_FLOAT32_C( 9.68), SIMDE_FLOAT32_C( 9.32), SIMDE_FLOAT32_C( 6.94), SIMDE_FLOAT32_C( 9.79), SIMDE_FLOAT32_C( 8.36), SIMDE_FLOAT32_C( 8.78), SIMDE_FLOAT32_C( 9.18), SIMDE_FLOAT32_C( 9.61), SIMDE_FLOAT32_C( 9.85) } }, { { SIMDE_FLOAT32_C( 130.33), SIMDE_FLOAT32_C( 396.68), SIMDE_FLOAT32_C( 574.70), SIMDE_FLOAT32_C( 833.19), SIMDE_FLOAT32_C( 390.52), SIMDE_FLOAT32_C( 183.11), SIMDE_FLOAT32_C( 756.35), SIMDE_FLOAT32_C( 922.43), SIMDE_FLOAT32_C( 858.89), SIMDE_FLOAT32_C( 327.75), SIMDE_FLOAT32_C( 344.53), SIMDE_FLOAT32_C( 379.09), SIMDE_FLOAT32_C( 864.14), SIMDE_FLOAT32_C( 806.85), SIMDE_FLOAT32_C( 220.15), SIMDE_FLOAT32_C( 377.27) }, { SIMDE_FLOAT32_C( 7.03), SIMDE_FLOAT32_C( 8.63), SIMDE_FLOAT32_C( 9.17), SIMDE_FLOAT32_C( 9.70), SIMDE_FLOAT32_C( 8.61), SIMDE_FLOAT32_C( 7.52), SIMDE_FLOAT32_C( 9.56), SIMDE_FLOAT32_C( 9.85), SIMDE_FLOAT32_C( 9.75), SIMDE_FLOAT32_C( 8.36), SIMDE_FLOAT32_C( 8.43), SIMDE_FLOAT32_C( 8.57), SIMDE_FLOAT32_C( 9.76), SIMDE_FLOAT32_C( 9.66), SIMDE_FLOAT32_C( 7.78), SIMDE_FLOAT32_C( 8.56) } }, { { SIMDE_FLOAT32_C( 548.60), SIMDE_FLOAT32_C( 151.58), SIMDE_FLOAT32_C( 47.50), SIMDE_FLOAT32_C( 942.10), SIMDE_FLOAT32_C( 14.58), SIMDE_FLOAT32_C( 391.17), SIMDE_FLOAT32_C( 760.10), SIMDE_FLOAT32_C( 651.77), SIMDE_FLOAT32_C( 514.35), SIMDE_FLOAT32_C( 648.17), SIMDE_FLOAT32_C( 979.41), SIMDE_FLOAT32_C( 952.70), SIMDE_FLOAT32_C( 228.00), SIMDE_FLOAT32_C( 763.30), SIMDE_FLOAT32_C( 875.04), SIMDE_FLOAT32_C( 358.34) }, { SIMDE_FLOAT32_C( 9.10), SIMDE_FLOAT32_C( 7.24), SIMDE_FLOAT32_C( 5.57), SIMDE_FLOAT32_C( 9.88), SIMDE_FLOAT32_C( 3.87), SIMDE_FLOAT32_C( 8.61), SIMDE_FLOAT32_C( 9.57), SIMDE_FLOAT32_C( 9.35), SIMDE_FLOAT32_C( 9.01), SIMDE_FLOAT32_C( 9.34), SIMDE_FLOAT32_C( 9.94), SIMDE_FLOAT32_C( 9.90), SIMDE_FLOAT32_C( 7.83), SIMDE_FLOAT32_C( 9.58), SIMDE_FLOAT32_C( 9.77), SIMDE_FLOAT32_C( 8.49) } }, { { SIMDE_FLOAT32_C( 159.99), SIMDE_FLOAT32_C( 449.73), SIMDE_FLOAT32_C( 191.53), SIMDE_FLOAT32_C( 550.50), SIMDE_FLOAT32_C( 632.84), SIMDE_FLOAT32_C( 947.88), SIMDE_FLOAT32_C( 472.93), SIMDE_FLOAT32_C( 491.73), SIMDE_FLOAT32_C( 275.62), SIMDE_FLOAT32_C( 817.47), SIMDE_FLOAT32_C( 870.83), SIMDE_FLOAT32_C( 139.76), SIMDE_FLOAT32_C( 624.32), SIMDE_FLOAT32_C( 90.98), SIMDE_FLOAT32_C( 517.04), SIMDE_FLOAT32_C( 172.92) }, { SIMDE_FLOAT32_C( 7.32), SIMDE_FLOAT32_C( 8.81), SIMDE_FLOAT32_C( 7.58), SIMDE_FLOAT32_C( 9.10), SIMDE_FLOAT32_C( 9.31), SIMDE_FLOAT32_C( 9.89), SIMDE_FLOAT32_C( 8.89), SIMDE_FLOAT32_C( 8.94), SIMDE_FLOAT32_C( 8.11), SIMDE_FLOAT32_C( 9.68), SIMDE_FLOAT32_C( 9.77), SIMDE_FLOAT32_C( 7.13), SIMDE_FLOAT32_C( 9.29), SIMDE_FLOAT32_C( 6.51), SIMDE_FLOAT32_C( 9.01), SIMDE_FLOAT32_C( 7.43) } }, { { SIMDE_FLOAT32_C( 242.56), SIMDE_FLOAT32_C( 564.54), SIMDE_FLOAT32_C( 115.01), SIMDE_FLOAT32_C( 257.14), SIMDE_FLOAT32_C( 955.71), SIMDE_FLOAT32_C( 875.12), SIMDE_FLOAT32_C( 908.91), SIMDE_FLOAT32_C( 470.05), SIMDE_FLOAT32_C( 523.28), SIMDE_FLOAT32_C( 888.32), SIMDE_FLOAT32_C( 422.76), SIMDE_FLOAT32_C( 751.29), SIMDE_FLOAT32_C( 651.63), SIMDE_FLOAT32_C( 297.79), SIMDE_FLOAT32_C( 109.62), SIMDE_FLOAT32_C( 811.61) }, { SIMDE_FLOAT32_C( 7.92), SIMDE_FLOAT32_C( 9.14), SIMDE_FLOAT32_C( 6.85), SIMDE_FLOAT32_C( 8.01), SIMDE_FLOAT32_C( 9.90), SIMDE_FLOAT32_C( 9.77), SIMDE_FLOAT32_C( 9.83), SIMDE_FLOAT32_C( 8.88), SIMDE_FLOAT32_C( 9.03), SIMDE_FLOAT32_C( 9.79), SIMDE_FLOAT32_C( 8.72), SIMDE_FLOAT32_C( 9.55), SIMDE_FLOAT32_C( 9.35), SIMDE_FLOAT32_C( 8.22), SIMDE_FLOAT32_C( 6.78), SIMDE_FLOAT32_C( 9.66) } }, { { SIMDE_FLOAT32_C( 747.52), SIMDE_FLOAT32_C( 301.15), SIMDE_FLOAT32_C( 362.12), SIMDE_FLOAT32_C( 380.36), SIMDE_FLOAT32_C( 249.03), SIMDE_FLOAT32_C( 835.05), SIMDE_FLOAT32_C( 872.10), SIMDE_FLOAT32_C( 524.65), SIMDE_FLOAT32_C( 652.52), SIMDE_FLOAT32_C( 742.92), SIMDE_FLOAT32_C( 664.41), SIMDE_FLOAT32_C( 276.84), SIMDE_FLOAT32_C( 833.90), SIMDE_FLOAT32_C( 181.45), SIMDE_FLOAT32_C( 449.75), SIMDE_FLOAT32_C( 76.46) }, { SIMDE_FLOAT32_C( 9.55), SIMDE_FLOAT32_C( 8.23), SIMDE_FLOAT32_C( 8.50), SIMDE_FLOAT32_C( 8.57), SIMDE_FLOAT32_C( 7.96), SIMDE_FLOAT32_C( 9.71), SIMDE_FLOAT32_C( 9.77), SIMDE_FLOAT32_C( 9.04), SIMDE_FLOAT32_C( 9.35), SIMDE_FLOAT32_C( 9.54), SIMDE_FLOAT32_C( 9.38), SIMDE_FLOAT32_C( 8.11), SIMDE_FLOAT32_C( 9.70), SIMDE_FLOAT32_C( 7.50), SIMDE_FLOAT32_C( 8.81), SIMDE_FLOAT32_C( 6.26) } }, { { SIMDE_FLOAT32_C( 745.98), SIMDE_FLOAT32_C( 564.77), SIMDE_FLOAT32_C( 333.60), SIMDE_FLOAT32_C( 701.69), SIMDE_FLOAT32_C( 439.88), SIMDE_FLOAT32_C( 242.51), SIMDE_FLOAT32_C( 171.74), SIMDE_FLOAT32_C( 963.17), SIMDE_FLOAT32_C( 130.83), SIMDE_FLOAT32_C( 594.50), SIMDE_FLOAT32_C( 714.46), SIMDE_FLOAT32_C( 782.46), SIMDE_FLOAT32_C( 892.29), SIMDE_FLOAT32_C( 824.08), SIMDE_FLOAT32_C( 594.07), SIMDE_FLOAT32_C( 639.81) }, { SIMDE_FLOAT32_C( 9.54), SIMDE_FLOAT32_C( 9.14), SIMDE_FLOAT32_C( 8.38), SIMDE_FLOAT32_C( 9.45), SIMDE_FLOAT32_C( 8.78), SIMDE_FLOAT32_C( 7.92), SIMDE_FLOAT32_C( 7.42), SIMDE_FLOAT32_C( 9.91), SIMDE_FLOAT32_C( 7.03), SIMDE_FLOAT32_C( 9.22), SIMDE_FLOAT32_C( 9.48), SIMDE_FLOAT32_C( 9.61), SIMDE_FLOAT32_C( 9.80), SIMDE_FLOAT32_C( 9.69), SIMDE_FLOAT32_C( 9.21), SIMDE_FLOAT32_C( 9.32) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_log2_ps(a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_log2_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[16]; const simde__mmask8 k; const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 951.54), SIMDE_FLOAT32_C( 999.54), SIMDE_FLOAT32_C( 334.61), SIMDE_FLOAT32_C( 345.31), SIMDE_FLOAT32_C( 632.13), SIMDE_FLOAT32_C( 486.36), SIMDE_FLOAT32_C( 855.38), SIMDE_FLOAT32_C( 575.68), SIMDE_FLOAT32_C( 586.36), SIMDE_FLOAT32_C( 821.37), SIMDE_FLOAT32_C( 638.17), SIMDE_FLOAT32_C( 965.64), SIMDE_FLOAT32_C( 565.55), SIMDE_FLOAT32_C( 416.08), SIMDE_FLOAT32_C( 543.83), SIMDE_FLOAT32_C( 785.84) }, UINT8_C( 38), { SIMDE_FLOAT32_C( 694.42), SIMDE_FLOAT32_C( 92.26), SIMDE_FLOAT32_C( 723.42), SIMDE_FLOAT32_C( 203.15), SIMDE_FLOAT32_C( 315.73), SIMDE_FLOAT32_C( 806.95), SIMDE_FLOAT32_C( 395.41), SIMDE_FLOAT32_C( 157.52), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 504.24), SIMDE_FLOAT32_C( 237.89), SIMDE_FLOAT32_C( 806.42), SIMDE_FLOAT32_C( 668.52), SIMDE_FLOAT32_C( 921.63), SIMDE_FLOAT32_C( 757.96), SIMDE_FLOAT32_C( 668.06) }, { SIMDE_FLOAT32_C( 951.54), SIMDE_FLOAT32_C( 6.53), SIMDE_FLOAT32_C( 9.50), SIMDE_FLOAT32_C( 345.31), SIMDE_FLOAT32_C( 632.13), SIMDE_FLOAT32_C( 9.66), SIMDE_FLOAT32_C( 855.38), SIMDE_FLOAT32_C( 575.68), SIMDE_FLOAT32_C( 586.36), SIMDE_FLOAT32_C( 821.37), SIMDE_FLOAT32_C( 638.17), SIMDE_FLOAT32_C( 965.64), SIMDE_FLOAT32_C( 565.55), SIMDE_FLOAT32_C( 416.08), SIMDE_FLOAT32_C( 543.83), SIMDE_FLOAT32_C( 785.84) } }, { { SIMDE_FLOAT32_C( 256.24), SIMDE_FLOAT32_C( 103.27), SIMDE_FLOAT32_C( 300.20), SIMDE_FLOAT32_C( 742.60), SIMDE_FLOAT32_C( 958.65), SIMDE_FLOAT32_C( 875.88), SIMDE_FLOAT32_C( 328.96), SIMDE_FLOAT32_C( 780.02), SIMDE_FLOAT32_C( 514.05), SIMDE_FLOAT32_C( 294.61), SIMDE_FLOAT32_C( 345.57), SIMDE_FLOAT32_C( 930.14), SIMDE_FLOAT32_C( 838.44), SIMDE_FLOAT32_C( 131.42), SIMDE_FLOAT32_C( 65.69), SIMDE_FLOAT32_C( 532.86) }, UINT8_C(234), { SIMDE_FLOAT32_C( 789.11), SIMDE_FLOAT32_C( 736.01), SIMDE_FLOAT32_C( 539.40), SIMDE_FLOAT32_C( 596.06), SIMDE_FLOAT32_C( 131.42), SIMDE_FLOAT32_C( 696.92), SIMDE_FLOAT32_C( 597.63), SIMDE_FLOAT32_C( 635.66), SIMDE_FLOAT32_C( 934.80), SIMDE_FLOAT32_C( 404.05), SIMDE_FLOAT32_C( 304.18), SIMDE_FLOAT32_C( 856.43), SIMDE_FLOAT32_C( 162.01), SIMDE_FLOAT32_C( 972.25), SIMDE_FLOAT32_C( 112.67), SIMDE_FLOAT32_C( 265.28) }, { SIMDE_FLOAT32_C( 256.24), SIMDE_FLOAT32_C( 9.52), SIMDE_FLOAT32_C( 300.20), SIMDE_FLOAT32_C( 9.22), SIMDE_FLOAT32_C( 958.65), SIMDE_FLOAT32_C( 9.44), SIMDE_FLOAT32_C( 9.22), SIMDE_FLOAT32_C( 9.31), SIMDE_FLOAT32_C( 514.05), SIMDE_FLOAT32_C( 294.61), SIMDE_FLOAT32_C( 345.57), SIMDE_FLOAT32_C( 930.14), SIMDE_FLOAT32_C( 838.44), SIMDE_FLOAT32_C( 131.42), SIMDE_FLOAT32_C( 65.69), SIMDE_FLOAT32_C( 532.86) } }, { { SIMDE_FLOAT32_C( 272.44), SIMDE_FLOAT32_C( 855.27), SIMDE_FLOAT32_C( 223.93), SIMDE_FLOAT32_C( 148.32), SIMDE_FLOAT32_C( 184.23), SIMDE_FLOAT32_C( 3.95), SIMDE_FLOAT32_C( 662.37), SIMDE_FLOAT32_C( 478.84), SIMDE_FLOAT32_C( 349.52), SIMDE_FLOAT32_C( 592.51), SIMDE_FLOAT32_C( 317.28), SIMDE_FLOAT32_C( 480.94), SIMDE_FLOAT32_C( 658.20), SIMDE_FLOAT32_C( 850.14), SIMDE_FLOAT32_C( 704.61), SIMDE_FLOAT32_C( 447.31) }, UINT8_C(189), { SIMDE_FLOAT32_C( 244.01), SIMDE_FLOAT32_C( 43.37), SIMDE_FLOAT32_C( 717.57), SIMDE_FLOAT32_C( 940.93), SIMDE_FLOAT32_C( 641.00), SIMDE_FLOAT32_C( 353.24), SIMDE_FLOAT32_C( 875.73), SIMDE_FLOAT32_C( 45.05), SIMDE_FLOAT32_C( 657.42), SIMDE_FLOAT32_C( 732.16), SIMDE_FLOAT32_C( 207.05), SIMDE_FLOAT32_C( 629.67), SIMDE_FLOAT32_C( 844.83), SIMDE_FLOAT32_C( 472.33), SIMDE_FLOAT32_C( 902.11), SIMDE_FLOAT32_C( 700.10) }, { SIMDE_FLOAT32_C( 7.93), SIMDE_FLOAT32_C( 855.27), SIMDE_FLOAT32_C( 9.49), SIMDE_FLOAT32_C( 9.88), SIMDE_FLOAT32_C( 9.32), SIMDE_FLOAT32_C( 8.46), SIMDE_FLOAT32_C( 662.37), SIMDE_FLOAT32_C( 5.49), SIMDE_FLOAT32_C( 349.52), SIMDE_FLOAT32_C( 592.51), SIMDE_FLOAT32_C( 317.28), SIMDE_FLOAT32_C( 480.94), SIMDE_FLOAT32_C( 658.20), SIMDE_FLOAT32_C( 850.14), SIMDE_FLOAT32_C( 704.61), SIMDE_FLOAT32_C( 447.31) } }, { { SIMDE_FLOAT32_C( 696.26), SIMDE_FLOAT32_C( 50.44), SIMDE_FLOAT32_C( 884.33), SIMDE_FLOAT32_C( 700.20), SIMDE_FLOAT32_C( 712.81), SIMDE_FLOAT32_C( 363.17), SIMDE_FLOAT32_C( 49.73), SIMDE_FLOAT32_C( 305.32), SIMDE_FLOAT32_C( 680.45), SIMDE_FLOAT32_C( 530.67), SIMDE_FLOAT32_C( 963.52), SIMDE_FLOAT32_C( 530.59), SIMDE_FLOAT32_C( 235.28), SIMDE_FLOAT32_C( 410.84), SIMDE_FLOAT32_C( 116.75), SIMDE_FLOAT32_C( 479.29) }, UINT8_C(235), { SIMDE_FLOAT32_C( 834.32), SIMDE_FLOAT32_C( 420.22), SIMDE_FLOAT32_C( 95.21), SIMDE_FLOAT32_C( 187.56), SIMDE_FLOAT32_C( 295.95), SIMDE_FLOAT32_C( 140.25), SIMDE_FLOAT32_C( 844.98), SIMDE_FLOAT32_C( 28.11), SIMDE_FLOAT32_C( 347.31), SIMDE_FLOAT32_C( 474.66), SIMDE_FLOAT32_C( 872.94), SIMDE_FLOAT32_C( 819.64), SIMDE_FLOAT32_C( 376.77), SIMDE_FLOAT32_C( 573.04), SIMDE_FLOAT32_C( 515.89), SIMDE_FLOAT32_C( 427.21) }, { SIMDE_FLOAT32_C( 9.70), SIMDE_FLOAT32_C( 8.72), SIMDE_FLOAT32_C( 884.33), SIMDE_FLOAT32_C( 7.55), SIMDE_FLOAT32_C( 712.81), SIMDE_FLOAT32_C( 7.13), SIMDE_FLOAT32_C( 9.72), SIMDE_FLOAT32_C( 4.81), SIMDE_FLOAT32_C( 680.45), SIMDE_FLOAT32_C( 530.67), SIMDE_FLOAT32_C( 963.52), SIMDE_FLOAT32_C( 530.59), SIMDE_FLOAT32_C( 235.28), SIMDE_FLOAT32_C( 410.84), SIMDE_FLOAT32_C( 116.75), SIMDE_FLOAT32_C( 479.29) } }, { { SIMDE_FLOAT32_C( 457.38), SIMDE_FLOAT32_C( 216.10), SIMDE_FLOAT32_C( 140.02), SIMDE_FLOAT32_C( 820.55), SIMDE_FLOAT32_C( 265.82), SIMDE_FLOAT32_C( 445.34), SIMDE_FLOAT32_C( 501.00), SIMDE_FLOAT32_C( 796.49), SIMDE_FLOAT32_C( 408.86), SIMDE_FLOAT32_C( 31.60), SIMDE_FLOAT32_C( 31.77), SIMDE_FLOAT32_C( 819.70), SIMDE_FLOAT32_C( 148.34), SIMDE_FLOAT32_C( 511.06), SIMDE_FLOAT32_C( 273.91), SIMDE_FLOAT32_C( 982.67) }, UINT8_C(170), { SIMDE_FLOAT32_C( 369.11), SIMDE_FLOAT32_C( 170.23), SIMDE_FLOAT32_C( 227.24), SIMDE_FLOAT32_C( 509.37), SIMDE_FLOAT32_C( 15.21), SIMDE_FLOAT32_C( 255.36), SIMDE_FLOAT32_C( 856.67), SIMDE_FLOAT32_C( 489.87), SIMDE_FLOAT32_C( 128.30), SIMDE_FLOAT32_C( 676.31), SIMDE_FLOAT32_C( 866.64), SIMDE_FLOAT32_C( 701.34), SIMDE_FLOAT32_C( 192.20), SIMDE_FLOAT32_C( 293.84), SIMDE_FLOAT32_C( 158.72), SIMDE_FLOAT32_C( 408.30) }, { SIMDE_FLOAT32_C( 457.38), SIMDE_FLOAT32_C( 7.41), SIMDE_FLOAT32_C( 140.02), SIMDE_FLOAT32_C( 8.99), SIMDE_FLOAT32_C( 265.82), SIMDE_FLOAT32_C( 8.00), SIMDE_FLOAT32_C( 501.00), SIMDE_FLOAT32_C( 8.94), SIMDE_FLOAT32_C( 408.86), SIMDE_FLOAT32_C( 31.60), SIMDE_FLOAT32_C( 31.77), SIMDE_FLOAT32_C( 819.70), SIMDE_FLOAT32_C( 148.34), SIMDE_FLOAT32_C( 511.06), SIMDE_FLOAT32_C( 273.91), SIMDE_FLOAT32_C( 982.67) } }, { { SIMDE_FLOAT32_C( 433.86), SIMDE_FLOAT32_C( 979.27), SIMDE_FLOAT32_C( 674.13), SIMDE_FLOAT32_C( 879.20), SIMDE_FLOAT32_C( 480.27), SIMDE_FLOAT32_C( 470.62), SIMDE_FLOAT32_C( 288.06), SIMDE_FLOAT32_C( 511.87), SIMDE_FLOAT32_C( 502.39), SIMDE_FLOAT32_C( 107.76), SIMDE_FLOAT32_C( 660.21), SIMDE_FLOAT32_C( 13.45), SIMDE_FLOAT32_C( 381.67), SIMDE_FLOAT32_C( 642.88), SIMDE_FLOAT32_C( 944.74), SIMDE_FLOAT32_C( 750.78) }, UINT8_C( 15), { SIMDE_FLOAT32_C( 171.98), SIMDE_FLOAT32_C( 260.15), SIMDE_FLOAT32_C( 828.32), SIMDE_FLOAT32_C( 427.33), SIMDE_FLOAT32_C( 116.82), SIMDE_FLOAT32_C( 318.18), SIMDE_FLOAT32_C( 555.63), SIMDE_FLOAT32_C( 793.13), SIMDE_FLOAT32_C( 184.82), SIMDE_FLOAT32_C( 256.97), SIMDE_FLOAT32_C( 985.33), SIMDE_FLOAT32_C( 478.66), SIMDE_FLOAT32_C( 415.69), SIMDE_FLOAT32_C( 393.63), SIMDE_FLOAT32_C( 912.52), SIMDE_FLOAT32_C( 394.96) }, { SIMDE_FLOAT32_C( 7.43), SIMDE_FLOAT32_C( 8.02), SIMDE_FLOAT32_C( 9.69), SIMDE_FLOAT32_C( 8.74), SIMDE_FLOAT32_C( 480.27), SIMDE_FLOAT32_C( 470.62), SIMDE_FLOAT32_C( 288.06), SIMDE_FLOAT32_C( 511.87), SIMDE_FLOAT32_C( 502.39), SIMDE_FLOAT32_C( 107.76), SIMDE_FLOAT32_C( 660.21), SIMDE_FLOAT32_C( 13.45), SIMDE_FLOAT32_C( 381.67), SIMDE_FLOAT32_C( 642.88), SIMDE_FLOAT32_C( 944.74), SIMDE_FLOAT32_C( 750.78) } }, { { SIMDE_FLOAT32_C( 67.76), SIMDE_FLOAT32_C( 791.72), SIMDE_FLOAT32_C( 875.23), SIMDE_FLOAT32_C( 538.38), SIMDE_FLOAT32_C( 79.78), SIMDE_FLOAT32_C( 387.09), SIMDE_FLOAT32_C( 40.77), SIMDE_FLOAT32_C( 187.54), SIMDE_FLOAT32_C( 47.31), SIMDE_FLOAT32_C( 54.22), SIMDE_FLOAT32_C( 569.20), SIMDE_FLOAT32_C( 690.18), SIMDE_FLOAT32_C( 998.96), SIMDE_FLOAT32_C( 319.98), SIMDE_FLOAT32_C( 503.29), SIMDE_FLOAT32_C( 170.94) }, UINT8_C( 81), { SIMDE_FLOAT32_C( 331.60), SIMDE_FLOAT32_C( 598.27), SIMDE_FLOAT32_C( 696.95), SIMDE_FLOAT32_C( 649.79), SIMDE_FLOAT32_C( 153.90), SIMDE_FLOAT32_C( 490.08), SIMDE_FLOAT32_C( 834.61), SIMDE_FLOAT32_C( 410.88), SIMDE_FLOAT32_C( 475.41), SIMDE_FLOAT32_C( 313.27), SIMDE_FLOAT32_C( 826.57), SIMDE_FLOAT32_C( 869.04), SIMDE_FLOAT32_C( 225.79), SIMDE_FLOAT32_C( 221.52), SIMDE_FLOAT32_C( 936.81), SIMDE_FLOAT32_C( 17.51) }, { SIMDE_FLOAT32_C( 8.37), SIMDE_FLOAT32_C( 791.72), SIMDE_FLOAT32_C( 875.23), SIMDE_FLOAT32_C( 538.38), SIMDE_FLOAT32_C( 7.27), SIMDE_FLOAT32_C( 387.09), SIMDE_FLOAT32_C( 9.70), SIMDE_FLOAT32_C( 187.54), SIMDE_FLOAT32_C( 47.31), SIMDE_FLOAT32_C( 54.22), SIMDE_FLOAT32_C( 569.20), SIMDE_FLOAT32_C( 690.18), SIMDE_FLOAT32_C( 998.96), SIMDE_FLOAT32_C( 319.98), SIMDE_FLOAT32_C( 503.29), SIMDE_FLOAT32_C( 170.94) } }, { { SIMDE_FLOAT32_C( 96.75), SIMDE_FLOAT32_C( 475.18), SIMDE_FLOAT32_C( 97.29), SIMDE_FLOAT32_C( 483.84), SIMDE_FLOAT32_C( 515.95), SIMDE_FLOAT32_C( 284.83), SIMDE_FLOAT32_C( 531.15), SIMDE_FLOAT32_C( 570.17), SIMDE_FLOAT32_C( 854.03), SIMDE_FLOAT32_C( 221.33), SIMDE_FLOAT32_C( 569.13), SIMDE_FLOAT32_C( 174.01), SIMDE_FLOAT32_C( 724.62), SIMDE_FLOAT32_C( 740.06), SIMDE_FLOAT32_C( 754.14), SIMDE_FLOAT32_C( 56.23) }, UINT8_C(124), { SIMDE_FLOAT32_C( 451.09), SIMDE_FLOAT32_C( 706.02), SIMDE_FLOAT32_C( 492.24), SIMDE_FLOAT32_C( 941.16), SIMDE_FLOAT32_C( 540.62), SIMDE_FLOAT32_C( 903.11), SIMDE_FLOAT32_C( 416.57), SIMDE_FLOAT32_C( 853.89), SIMDE_FLOAT32_C( 729.68), SIMDE_FLOAT32_C( 285.62), SIMDE_FLOAT32_C( 79.69), SIMDE_FLOAT32_C( 951.20), SIMDE_FLOAT32_C( 222.42), SIMDE_FLOAT32_C( 97.20), SIMDE_FLOAT32_C( 47.95), SIMDE_FLOAT32_C( 697.61) }, { SIMDE_FLOAT32_C( 96.75), SIMDE_FLOAT32_C( 475.18), SIMDE_FLOAT32_C( 8.94), SIMDE_FLOAT32_C( 9.88), SIMDE_FLOAT32_C( 9.08), SIMDE_FLOAT32_C( 9.82), SIMDE_FLOAT32_C( 8.70), SIMDE_FLOAT32_C( 570.17), SIMDE_FLOAT32_C( 854.03), SIMDE_FLOAT32_C( 221.33), SIMDE_FLOAT32_C( 569.13), SIMDE_FLOAT32_C( 174.01), SIMDE_FLOAT32_C( 724.62), SIMDE_FLOAT32_C( 740.06), SIMDE_FLOAT32_C( 754.14), SIMDE_FLOAT32_C( 56.23) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_mask_log2_ps(src, test_vec[i].k, a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_log2_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 119.65), SIMDE_FLOAT64_C( 209.41), SIMDE_FLOAT64_C( 983.20), SIMDE_FLOAT64_C( 968.40), SIMDE_FLOAT64_C( 158.45), SIMDE_FLOAT64_C( 611.79), SIMDE_FLOAT64_C( 202.67), SIMDE_FLOAT64_C( 73.75) }, { SIMDE_FLOAT64_C( 6.90), SIMDE_FLOAT64_C( 7.71), SIMDE_FLOAT64_C( 9.94), SIMDE_FLOAT64_C( 9.92), SIMDE_FLOAT64_C( 7.31), SIMDE_FLOAT64_C( 9.26), SIMDE_FLOAT64_C( 7.66), SIMDE_FLOAT64_C( 6.20) } }, { { SIMDE_FLOAT64_C( 875.12), SIMDE_FLOAT64_C( 357.46), SIMDE_FLOAT64_C( 960.14), SIMDE_FLOAT64_C( 477.36), SIMDE_FLOAT64_C( 185.60), SIMDE_FLOAT64_C( 437.48), SIMDE_FLOAT64_C( 656.75), SIMDE_FLOAT64_C( 468.11) }, { SIMDE_FLOAT64_C( 9.77), SIMDE_FLOAT64_C( 8.48), SIMDE_FLOAT64_C( 9.91), SIMDE_FLOAT64_C( 8.90), SIMDE_FLOAT64_C( 7.54), SIMDE_FLOAT64_C( 8.77), SIMDE_FLOAT64_C( 9.36), SIMDE_FLOAT64_C( 8.87) } }, { { SIMDE_FLOAT64_C( 538.86), SIMDE_FLOAT64_C( 465.92), SIMDE_FLOAT64_C( 597.15), SIMDE_FLOAT64_C( 858.12), SIMDE_FLOAT64_C( 110.06), SIMDE_FLOAT64_C( 149.17), SIMDE_FLOAT64_C( 41.30), SIMDE_FLOAT64_C( 954.56) }, { SIMDE_FLOAT64_C( 9.07), SIMDE_FLOAT64_C( 8.86), SIMDE_FLOAT64_C( 9.22), SIMDE_FLOAT64_C( 9.75), SIMDE_FLOAT64_C( 6.78), SIMDE_FLOAT64_C( 7.22), SIMDE_FLOAT64_C( 5.37), SIMDE_FLOAT64_C( 9.90) } }, { { SIMDE_FLOAT64_C( 919.40), SIMDE_FLOAT64_C( 93.55), SIMDE_FLOAT64_C( 761.38), SIMDE_FLOAT64_C( 128.98), SIMDE_FLOAT64_C( 873.27), SIMDE_FLOAT64_C( 719.89), SIMDE_FLOAT64_C( 554.57), SIMDE_FLOAT64_C( 992.93) }, { SIMDE_FLOAT64_C( 9.84), SIMDE_FLOAT64_C( 6.55), SIMDE_FLOAT64_C( 9.57), SIMDE_FLOAT64_C( 7.01), SIMDE_FLOAT64_C( 9.77), SIMDE_FLOAT64_C( 9.49), SIMDE_FLOAT64_C( 9.12), SIMDE_FLOAT64_C( 9.96) } }, { { SIMDE_FLOAT64_C( 929.29), SIMDE_FLOAT64_C( 537.77), SIMDE_FLOAT64_C( 961.32), SIMDE_FLOAT64_C( 87.74), SIMDE_FLOAT64_C( 149.55), SIMDE_FLOAT64_C( 164.00), SIMDE_FLOAT64_C( 161.49), SIMDE_FLOAT64_C( 24.67) }, { SIMDE_FLOAT64_C( 9.86), SIMDE_FLOAT64_C( 9.07), SIMDE_FLOAT64_C( 9.91), SIMDE_FLOAT64_C( 6.46), SIMDE_FLOAT64_C( 7.22), SIMDE_FLOAT64_C( 7.36), SIMDE_FLOAT64_C( 7.34), SIMDE_FLOAT64_C( 4.62) } }, { { SIMDE_FLOAT64_C( 521.46), SIMDE_FLOAT64_C( 121.63), SIMDE_FLOAT64_C( 502.03), SIMDE_FLOAT64_C( 707.07), SIMDE_FLOAT64_C( 559.11), SIMDE_FLOAT64_C( 158.78), SIMDE_FLOAT64_C( 175.18), SIMDE_FLOAT64_C( 97.96) }, { SIMDE_FLOAT64_C( 9.03), SIMDE_FLOAT64_C( 6.93), SIMDE_FLOAT64_C( 8.97), SIMDE_FLOAT64_C( 9.47), SIMDE_FLOAT64_C( 9.13), SIMDE_FLOAT64_C( 7.31), SIMDE_FLOAT64_C( 7.45), SIMDE_FLOAT64_C( 6.61) } }, { { SIMDE_FLOAT64_C( 624.70), SIMDE_FLOAT64_C( 772.32), SIMDE_FLOAT64_C( 956.08), SIMDE_FLOAT64_C( 734.75), SIMDE_FLOAT64_C( 921.49), SIMDE_FLOAT64_C( 997.38), SIMDE_FLOAT64_C( 689.31), SIMDE_FLOAT64_C( 840.89) }, { SIMDE_FLOAT64_C( 9.29), SIMDE_FLOAT64_C( 9.59), SIMDE_FLOAT64_C( 9.90), SIMDE_FLOAT64_C( 9.52), SIMDE_FLOAT64_C( 9.85), SIMDE_FLOAT64_C( 9.96), SIMDE_FLOAT64_C( 9.43), SIMDE_FLOAT64_C( 9.72) } }, { { SIMDE_FLOAT64_C( 90.93), SIMDE_FLOAT64_C( 450.70), SIMDE_FLOAT64_C( 969.87), SIMDE_FLOAT64_C( 964.20), SIMDE_FLOAT64_C( 170.58), SIMDE_FLOAT64_C( 524.44), SIMDE_FLOAT64_C( 957.13), SIMDE_FLOAT64_C( 99.88) }, { SIMDE_FLOAT64_C( 6.51), SIMDE_FLOAT64_C( 8.82), SIMDE_FLOAT64_C( 9.92), SIMDE_FLOAT64_C( 9.91), SIMDE_FLOAT64_C( 7.41), SIMDE_FLOAT64_C( 9.03), SIMDE_FLOAT64_C( 9.90), SIMDE_FLOAT64_C( 6.64) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_log2_pd(a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_log2_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 996.08), SIMDE_FLOAT64_C( 61.08), SIMDE_FLOAT64_C( 921.49), SIMDE_FLOAT64_C( 192.89), SIMDE_FLOAT64_C( 553.14), SIMDE_FLOAT64_C( 14.27), SIMDE_FLOAT64_C( 408.18), SIMDE_FLOAT64_C( 275.52) }, UINT8_C( 23), { SIMDE_FLOAT64_C( 470.67), SIMDE_FLOAT64_C( 327.03), SIMDE_FLOAT64_C( 335.52), SIMDE_FLOAT64_C( 992.77), SIMDE_FLOAT64_C( 465.65), SIMDE_FLOAT64_C( 524.14), SIMDE_FLOAT64_C( 178.22), SIMDE_FLOAT64_C( 860.48) }, { SIMDE_FLOAT64_C( 8.88), SIMDE_FLOAT64_C( 8.35), SIMDE_FLOAT64_C( 8.39), SIMDE_FLOAT64_C( 192.89), SIMDE_FLOAT64_C( 8.86), SIMDE_FLOAT64_C( 14.27), SIMDE_FLOAT64_C( 408.18), SIMDE_FLOAT64_C( 275.52) } }, { { SIMDE_FLOAT64_C( 594.48), SIMDE_FLOAT64_C( 196.19), SIMDE_FLOAT64_C( 493.93), SIMDE_FLOAT64_C( 252.94), SIMDE_FLOAT64_C( 940.21), SIMDE_FLOAT64_C( 104.98), SIMDE_FLOAT64_C( 946.96), SIMDE_FLOAT64_C( 783.58) }, UINT8_C(251), { SIMDE_FLOAT64_C( 815.52), SIMDE_FLOAT64_C( 353.82), SIMDE_FLOAT64_C( 583.31), SIMDE_FLOAT64_C( 335.41), SIMDE_FLOAT64_C( 693.48), SIMDE_FLOAT64_C( 579.39), SIMDE_FLOAT64_C( 396.49), SIMDE_FLOAT64_C( 614.97) }, { SIMDE_FLOAT64_C( 9.67), SIMDE_FLOAT64_C( 8.47), SIMDE_FLOAT64_C( 493.93), SIMDE_FLOAT64_C( 8.39), SIMDE_FLOAT64_C( 9.44), SIMDE_FLOAT64_C( 9.18), SIMDE_FLOAT64_C( 8.63), SIMDE_FLOAT64_C( 9.26) } }, { { SIMDE_FLOAT64_C( 772.28), SIMDE_FLOAT64_C( 949.63), SIMDE_FLOAT64_C( 629.24), SIMDE_FLOAT64_C( 180.46), SIMDE_FLOAT64_C( 225.15), SIMDE_FLOAT64_C( 527.05), SIMDE_FLOAT64_C( 651.14), SIMDE_FLOAT64_C( 552.19) }, UINT8_C(241), { SIMDE_FLOAT64_C( 643.90), SIMDE_FLOAT64_C( 17.84), SIMDE_FLOAT64_C( 386.72), SIMDE_FLOAT64_C( 822.12), SIMDE_FLOAT64_C( 878.32), SIMDE_FLOAT64_C( 981.20), SIMDE_FLOAT64_C( 18.32), SIMDE_FLOAT64_C( 372.25) }, { SIMDE_FLOAT64_C( 9.33), SIMDE_FLOAT64_C( 949.63), SIMDE_FLOAT64_C( 629.24), SIMDE_FLOAT64_C( 180.46), SIMDE_FLOAT64_C( 9.78), SIMDE_FLOAT64_C( 9.94), SIMDE_FLOAT64_C( 4.19), SIMDE_FLOAT64_C( 8.54) } }, { { SIMDE_FLOAT64_C( 234.14), SIMDE_FLOAT64_C( 958.52), SIMDE_FLOAT64_C( 477.23), SIMDE_FLOAT64_C( 181.10), SIMDE_FLOAT64_C( 742.10), SIMDE_FLOAT64_C( 235.40), SIMDE_FLOAT64_C( 996.62), SIMDE_FLOAT64_C( 95.92) }, UINT8_C( 71), { SIMDE_FLOAT64_C( 332.03), SIMDE_FLOAT64_C( 789.40), SIMDE_FLOAT64_C( 398.10), SIMDE_FLOAT64_C( 728.52), SIMDE_FLOAT64_C( 404.38), SIMDE_FLOAT64_C( 170.38), SIMDE_FLOAT64_C( 678.16), SIMDE_FLOAT64_C( 33.62) }, { SIMDE_FLOAT64_C( 8.38), SIMDE_FLOAT64_C( 9.62), SIMDE_FLOAT64_C( 8.64), SIMDE_FLOAT64_C( 181.10), SIMDE_FLOAT64_C( 742.10), SIMDE_FLOAT64_C( 235.40), SIMDE_FLOAT64_C( 9.41), SIMDE_FLOAT64_C( 95.92) } }, { { SIMDE_FLOAT64_C( 350.85), SIMDE_FLOAT64_C( 903.31), SIMDE_FLOAT64_C( 560.67), SIMDE_FLOAT64_C( 1.98), SIMDE_FLOAT64_C( 455.50), SIMDE_FLOAT64_C( 423.25), SIMDE_FLOAT64_C( 645.89), SIMDE_FLOAT64_C( 473.34) }, UINT8_C(167), { SIMDE_FLOAT64_C( 468.01), SIMDE_FLOAT64_C( 351.66), SIMDE_FLOAT64_C( 791.16), SIMDE_FLOAT64_C( 486.32), SIMDE_FLOAT64_C( 723.90), SIMDE_FLOAT64_C( 25.30), SIMDE_FLOAT64_C( 444.84), SIMDE_FLOAT64_C( 201.13) }, { SIMDE_FLOAT64_C( 8.87), SIMDE_FLOAT64_C( 8.46), SIMDE_FLOAT64_C( 9.63), SIMDE_FLOAT64_C( 1.98), SIMDE_FLOAT64_C( 455.50), SIMDE_FLOAT64_C( 4.66), SIMDE_FLOAT64_C( 645.89), SIMDE_FLOAT64_C( 7.65) } }, { { SIMDE_FLOAT64_C( 206.40), SIMDE_FLOAT64_C( 186.94), SIMDE_FLOAT64_C( 436.54), SIMDE_FLOAT64_C( 203.02), SIMDE_FLOAT64_C( 282.87), SIMDE_FLOAT64_C( 255.25), SIMDE_FLOAT64_C( 535.05), SIMDE_FLOAT64_C( 72.27) }, UINT8_C(195), { SIMDE_FLOAT64_C( 263.57), SIMDE_FLOAT64_C( 476.64), SIMDE_FLOAT64_C( 823.73), SIMDE_FLOAT64_C( 941.73), SIMDE_FLOAT64_C( 510.26), SIMDE_FLOAT64_C( 174.57), SIMDE_FLOAT64_C( 845.04), SIMDE_FLOAT64_C( 70.93) }, { SIMDE_FLOAT64_C( 8.04), SIMDE_FLOAT64_C( 8.90), SIMDE_FLOAT64_C( 436.54), SIMDE_FLOAT64_C( 203.02), SIMDE_FLOAT64_C( 282.87), SIMDE_FLOAT64_C( 255.25), SIMDE_FLOAT64_C( 9.72), SIMDE_FLOAT64_C( 6.15) } }, { { SIMDE_FLOAT64_C( 176.55), SIMDE_FLOAT64_C( 300.54), SIMDE_FLOAT64_C( 494.17), SIMDE_FLOAT64_C( 822.44), SIMDE_FLOAT64_C( 773.88), SIMDE_FLOAT64_C( 304.14), SIMDE_FLOAT64_C( 290.45), SIMDE_FLOAT64_C( 125.54) }, UINT8_C( 79), { SIMDE_FLOAT64_C( 776.77), SIMDE_FLOAT64_C( 849.44), SIMDE_FLOAT64_C( 120.60), SIMDE_FLOAT64_C( 221.61), SIMDE_FLOAT64_C( 50.57), SIMDE_FLOAT64_C( 326.99), SIMDE_FLOAT64_C( 408.55), SIMDE_FLOAT64_C( 487.11) }, { SIMDE_FLOAT64_C( 9.60), SIMDE_FLOAT64_C( 9.73), SIMDE_FLOAT64_C( 6.91), SIMDE_FLOAT64_C( 7.79), SIMDE_FLOAT64_C( 773.88), SIMDE_FLOAT64_C( 304.14), SIMDE_FLOAT64_C( 8.67), SIMDE_FLOAT64_C( 125.54) } }, { { SIMDE_FLOAT64_C( 530.01), SIMDE_FLOAT64_C( 691.42), SIMDE_FLOAT64_C( 742.35), SIMDE_FLOAT64_C( 65.06), SIMDE_FLOAT64_C( 763.69), SIMDE_FLOAT64_C( 395.70), SIMDE_FLOAT64_C( 328.63), SIMDE_FLOAT64_C( 240.33) }, UINT8_C( 12), { SIMDE_FLOAT64_C( 270.37), SIMDE_FLOAT64_C( 750.59), SIMDE_FLOAT64_C( 394.00), SIMDE_FLOAT64_C( 115.41), SIMDE_FLOAT64_C( 821.52), SIMDE_FLOAT64_C( 570.56), SIMDE_FLOAT64_C( 415.95), SIMDE_FLOAT64_C( 315.69) }, { SIMDE_FLOAT64_C( 530.01), SIMDE_FLOAT64_C( 691.42), SIMDE_FLOAT64_C( 8.62), SIMDE_FLOAT64_C( 6.85), SIMDE_FLOAT64_C( 763.69), SIMDE_FLOAT64_C( 395.70), SIMDE_FLOAT64_C( 328.63), SIMDE_FLOAT64_C( 240.33) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_mask_log2_pd(src, test_vec[i].k, a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_log10_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 4068.94), SIMDE_FLOAT32_C( 5195.06), SIMDE_FLOAT32_C( 1228.12), SIMDE_FLOAT32_C( 6733.16)), simde_mm_set_ps(SIMDE_FLOAT32_C( 3.61), SIMDE_FLOAT32_C( 3.72), SIMDE_FLOAT32_C( 3.09), SIMDE_FLOAT32_C( 3.83)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 7486.55), SIMDE_FLOAT32_C( 8351.20), SIMDE_FLOAT32_C( 3512.77), SIMDE_FLOAT32_C( 5170.29)), simde_mm_set_ps(SIMDE_FLOAT32_C( 3.87), SIMDE_FLOAT32_C( 3.92), SIMDE_FLOAT32_C( 3.55), SIMDE_FLOAT32_C( 3.71)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 9127.65), SIMDE_FLOAT32_C( 7111.03), SIMDE_FLOAT32_C( 3652.77), SIMDE_FLOAT32_C( 7338.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( 3.96), SIMDE_FLOAT32_C( 3.85), SIMDE_FLOAT32_C( 3.56), SIMDE_FLOAT32_C( 3.87)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 1609.14), SIMDE_FLOAT32_C( 1569.36), SIMDE_FLOAT32_C( 5423.87), SIMDE_FLOAT32_C( 7857.29)), simde_mm_set_ps(SIMDE_FLOAT32_C( 3.21), SIMDE_FLOAT32_C( 3.20), SIMDE_FLOAT32_C( 3.73), SIMDE_FLOAT32_C( 3.90)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 3474.63), SIMDE_FLOAT32_C( 695.25), SIMDE_FLOAT32_C( 2912.29), SIMDE_FLOAT32_C( 8484.34)), simde_mm_set_ps(SIMDE_FLOAT32_C( 3.54), SIMDE_FLOAT32_C( 2.84), SIMDE_FLOAT32_C( 3.46), SIMDE_FLOAT32_C( 3.93)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 2775.95), SIMDE_FLOAT32_C( 5142.35), SIMDE_FLOAT32_C( 3079.83), SIMDE_FLOAT32_C( 381.82)), simde_mm_set_ps(SIMDE_FLOAT32_C( 3.44), SIMDE_FLOAT32_C( 3.71), SIMDE_FLOAT32_C( 3.49), SIMDE_FLOAT32_C( 2.58)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 6306.54), SIMDE_FLOAT32_C( 3937.29), SIMDE_FLOAT32_C( 117.23), SIMDE_FLOAT32_C( 1696.00)), simde_mm_set_ps(SIMDE_FLOAT32_C( 3.80), SIMDE_FLOAT32_C( 3.60), SIMDE_FLOAT32_C( 2.07), SIMDE_FLOAT32_C( 3.23)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 5890.98), SIMDE_FLOAT32_C( 2746.67), SIMDE_FLOAT32_C( 6166.85), SIMDE_FLOAT32_C( 8435.45)), simde_mm_set_ps(SIMDE_FLOAT32_C( 3.77), SIMDE_FLOAT32_C( 3.44), SIMDE_FLOAT32_C( 3.79), SIMDE_FLOAT32_C( 3.93)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_log10_ps(test_vec[i].a); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_log10_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 1228.12), SIMDE_FLOAT64_C( 6733.16)), simde_mm_set_pd(SIMDE_FLOAT64_C( 3.09), SIMDE_FLOAT64_C( 3.83)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 4068.94), SIMDE_FLOAT64_C( 5195.06)), simde_mm_set_pd(SIMDE_FLOAT64_C( 3.61), SIMDE_FLOAT64_C( 3.72)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 3512.77), SIMDE_FLOAT64_C( 5170.29)), simde_mm_set_pd(SIMDE_FLOAT64_C( 3.55), SIMDE_FLOAT64_C( 3.71)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 7486.55), SIMDE_FLOAT64_C( 8351.20)), simde_mm_set_pd(SIMDE_FLOAT64_C( 3.87), SIMDE_FLOAT64_C( 3.92)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 3652.77), SIMDE_FLOAT64_C( 7338.80)), simde_mm_set_pd(SIMDE_FLOAT64_C( 3.56), SIMDE_FLOAT64_C( 3.87)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 9127.65), SIMDE_FLOAT64_C( 7111.03)), simde_mm_set_pd(SIMDE_FLOAT64_C( 3.96), SIMDE_FLOAT64_C( 3.85)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 5423.87), SIMDE_FLOAT64_C( 7857.29)), simde_mm_set_pd(SIMDE_FLOAT64_C( 3.73), SIMDE_FLOAT64_C( 3.90)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 1609.14), SIMDE_FLOAT64_C( 1569.36)), simde_mm_set_pd(SIMDE_FLOAT64_C( 3.21), SIMDE_FLOAT64_C( 3.20)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_log10_pd(test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_log10_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 7486.55), SIMDE_FLOAT32_C( 8351.20), SIMDE_FLOAT32_C( 3512.77), SIMDE_FLOAT32_C( 5170.29), SIMDE_FLOAT32_C( 4068.94), SIMDE_FLOAT32_C( 5195.06), SIMDE_FLOAT32_C( 1228.12), SIMDE_FLOAT32_C( 6733.16)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 3.87), SIMDE_FLOAT32_C( 3.92), SIMDE_FLOAT32_C( 3.55), SIMDE_FLOAT32_C( 3.71), SIMDE_FLOAT32_C( 3.61), SIMDE_FLOAT32_C( 3.72), SIMDE_FLOAT32_C( 3.09), SIMDE_FLOAT32_C( 3.83)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 1609.14), SIMDE_FLOAT32_C( 1569.36), SIMDE_FLOAT32_C( 5423.87), SIMDE_FLOAT32_C( 7857.29), SIMDE_FLOAT32_C( 9127.65), SIMDE_FLOAT32_C( 7111.03), SIMDE_FLOAT32_C( 3652.77), SIMDE_FLOAT32_C( 7338.80)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 3.21), SIMDE_FLOAT32_C( 3.20), SIMDE_FLOAT32_C( 3.73), SIMDE_FLOAT32_C( 3.90), SIMDE_FLOAT32_C( 3.96), SIMDE_FLOAT32_C( 3.85), SIMDE_FLOAT32_C( 3.56), SIMDE_FLOAT32_C( 3.87)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 2775.95), SIMDE_FLOAT32_C( 5142.35), SIMDE_FLOAT32_C( 3079.83), SIMDE_FLOAT32_C( 381.82), SIMDE_FLOAT32_C( 3474.63), SIMDE_FLOAT32_C( 695.25), SIMDE_FLOAT32_C( 2912.29), SIMDE_FLOAT32_C( 8484.34)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 3.44), SIMDE_FLOAT32_C( 3.71), SIMDE_FLOAT32_C( 3.49), SIMDE_FLOAT32_C( 2.58), SIMDE_FLOAT32_C( 3.54), SIMDE_FLOAT32_C( 2.84), SIMDE_FLOAT32_C( 3.46), SIMDE_FLOAT32_C( 3.93)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 5890.98), SIMDE_FLOAT32_C( 2746.67), SIMDE_FLOAT32_C( 6166.85), SIMDE_FLOAT32_C( 8435.45), SIMDE_FLOAT32_C( 6306.54), SIMDE_FLOAT32_C( 3937.29), SIMDE_FLOAT32_C( 117.23), SIMDE_FLOAT32_C( 1696.00)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 3.77), SIMDE_FLOAT32_C( 3.44), SIMDE_FLOAT32_C( 3.79), SIMDE_FLOAT32_C( 3.93), SIMDE_FLOAT32_C( 3.80), SIMDE_FLOAT32_C( 3.60), SIMDE_FLOAT32_C( 2.07), SIMDE_FLOAT32_C( 3.23)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 1148.23), SIMDE_FLOAT32_C( 7217.40), SIMDE_FLOAT32_C( 2082.02), SIMDE_FLOAT32_C( 6902.28), SIMDE_FLOAT32_C( 1146.40), SIMDE_FLOAT32_C( 9969.51), SIMDE_FLOAT32_C( 5140.40), SIMDE_FLOAT32_C( 9206.03)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 3.06), SIMDE_FLOAT32_C( 3.86), SIMDE_FLOAT32_C( 3.32), SIMDE_FLOAT32_C( 3.84), SIMDE_FLOAT32_C( 3.06), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 3.71), SIMDE_FLOAT32_C( 3.96)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 3060.52), SIMDE_FLOAT32_C( 6979.60), SIMDE_FLOAT32_C( 8279.36), SIMDE_FLOAT32_C( 6696.04), SIMDE_FLOAT32_C( 7661.76), SIMDE_FLOAT32_C( 3680.04), SIMDE_FLOAT32_C( 8903.22), SIMDE_FLOAT32_C( 4846.05)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 3.49), SIMDE_FLOAT32_C( 3.84), SIMDE_FLOAT32_C( 3.92), SIMDE_FLOAT32_C( 3.83), SIMDE_FLOAT32_C( 3.88), SIMDE_FLOAT32_C( 3.57), SIMDE_FLOAT32_C( 3.95), SIMDE_FLOAT32_C( 3.69)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 3981.75), SIMDE_FLOAT32_C( 4596.36), SIMDE_FLOAT32_C( 6683.64), SIMDE_FLOAT32_C( 276.11), SIMDE_FLOAT32_C( 1262.07), SIMDE_FLOAT32_C( 1163.84), SIMDE_FLOAT32_C( 2229.06), SIMDE_FLOAT32_C( 6994.08)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 3.60), SIMDE_FLOAT32_C( 3.66), SIMDE_FLOAT32_C( 3.83), SIMDE_FLOAT32_C( 2.44), SIMDE_FLOAT32_C( 3.10), SIMDE_FLOAT32_C( 3.07), SIMDE_FLOAT32_C( 3.35), SIMDE_FLOAT32_C( 3.84)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 7348.31), SIMDE_FLOAT32_C( 8400.08), SIMDE_FLOAT32_C( 4256.55), SIMDE_FLOAT32_C( 9093.31), SIMDE_FLOAT32_C( 9550.14), SIMDE_FLOAT32_C( 8002.34), SIMDE_FLOAT32_C( 8956.15), SIMDE_FLOAT32_C( 6271.53)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 3.87), SIMDE_FLOAT32_C( 3.92), SIMDE_FLOAT32_C( 3.63), SIMDE_FLOAT32_C( 3.96), SIMDE_FLOAT32_C( 3.98), SIMDE_FLOAT32_C( 3.90), SIMDE_FLOAT32_C( 3.95), SIMDE_FLOAT32_C( 3.80)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_log10_ps(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_log10_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 4068.94), SIMDE_FLOAT64_C( 5195.06), SIMDE_FLOAT64_C( 1228.12), SIMDE_FLOAT64_C( 6733.16)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 3.61), SIMDE_FLOAT64_C( 3.72), SIMDE_FLOAT64_C( 3.09), SIMDE_FLOAT64_C( 3.83)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 7486.55), SIMDE_FLOAT64_C( 8351.20), SIMDE_FLOAT64_C( 3512.77), SIMDE_FLOAT64_C( 5170.29)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 3.87), SIMDE_FLOAT64_C( 3.92), SIMDE_FLOAT64_C( 3.55), SIMDE_FLOAT64_C( 3.71)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 9127.65), SIMDE_FLOAT64_C( 7111.03), SIMDE_FLOAT64_C( 3652.77), SIMDE_FLOAT64_C( 7338.80)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 3.96), SIMDE_FLOAT64_C( 3.85), SIMDE_FLOAT64_C( 3.56), SIMDE_FLOAT64_C( 3.87)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 1609.14), SIMDE_FLOAT64_C( 1569.36), SIMDE_FLOAT64_C( 5423.87), SIMDE_FLOAT64_C( 7857.29)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 3.21), SIMDE_FLOAT64_C( 3.20), SIMDE_FLOAT64_C( 3.73), SIMDE_FLOAT64_C( 3.90)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 3474.63), SIMDE_FLOAT64_C( 695.25), SIMDE_FLOAT64_C( 2912.29), SIMDE_FLOAT64_C( 8484.34)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 3.54), SIMDE_FLOAT64_C( 2.84), SIMDE_FLOAT64_C( 3.46), SIMDE_FLOAT64_C( 3.93)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 2775.95), SIMDE_FLOAT64_C( 5142.35), SIMDE_FLOAT64_C( 3079.83), SIMDE_FLOAT64_C( 381.82)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 3.44), SIMDE_FLOAT64_C( 3.71), SIMDE_FLOAT64_C( 3.49), SIMDE_FLOAT64_C( 2.58)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 6306.54), SIMDE_FLOAT64_C( 3937.29), SIMDE_FLOAT64_C( 117.23), SIMDE_FLOAT64_C( 1696.00)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 3.80), SIMDE_FLOAT64_C( 3.60), SIMDE_FLOAT64_C( 2.07), SIMDE_FLOAT64_C( 3.23)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 5890.98), SIMDE_FLOAT64_C( 2746.67), SIMDE_FLOAT64_C( 6166.85), SIMDE_FLOAT64_C( 8435.45)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 3.77), SIMDE_FLOAT64_C( 3.44), SIMDE_FLOAT64_C( 3.79), SIMDE_FLOAT64_C( 3.93)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_log10_pd(test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_log10_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( 1609.14), SIMDE_FLOAT32_C( 1569.36), SIMDE_FLOAT32_C( 5423.87), SIMDE_FLOAT32_C( 7857.29), SIMDE_FLOAT32_C( 9127.65), SIMDE_FLOAT32_C( 7111.03), SIMDE_FLOAT32_C( 3652.77), SIMDE_FLOAT32_C( 7338.80), SIMDE_FLOAT32_C( 7486.55), SIMDE_FLOAT32_C( 8351.20), SIMDE_FLOAT32_C( 3512.77), SIMDE_FLOAT32_C( 5170.29), SIMDE_FLOAT32_C( 4068.94), SIMDE_FLOAT32_C( 5195.06), SIMDE_FLOAT32_C( 1228.12), SIMDE_FLOAT32_C( 6733.16)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 3.21), SIMDE_FLOAT32_C( 3.20), SIMDE_FLOAT32_C( 3.73), SIMDE_FLOAT32_C( 3.90), SIMDE_FLOAT32_C( 3.96), SIMDE_FLOAT32_C( 3.85), SIMDE_FLOAT32_C( 3.56), SIMDE_FLOAT32_C( 3.87), SIMDE_FLOAT32_C( 3.87), SIMDE_FLOAT32_C( 3.92), SIMDE_FLOAT32_C( 3.55), SIMDE_FLOAT32_C( 3.71), SIMDE_FLOAT32_C( 3.61), SIMDE_FLOAT32_C( 3.72), SIMDE_FLOAT32_C( 3.09), SIMDE_FLOAT32_C( 3.83)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 5890.98), SIMDE_FLOAT32_C( 2746.67), SIMDE_FLOAT32_C( 6166.85), SIMDE_FLOAT32_C( 8435.45), SIMDE_FLOAT32_C( 6306.54), SIMDE_FLOAT32_C( 3937.29), SIMDE_FLOAT32_C( 117.23), SIMDE_FLOAT32_C( 1696.00), SIMDE_FLOAT32_C( 2775.95), SIMDE_FLOAT32_C( 5142.35), SIMDE_FLOAT32_C( 3079.83), SIMDE_FLOAT32_C( 381.82), SIMDE_FLOAT32_C( 3474.63), SIMDE_FLOAT32_C( 695.25), SIMDE_FLOAT32_C( 2912.29), SIMDE_FLOAT32_C( 8484.34)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 3.77), SIMDE_FLOAT32_C( 3.44), SIMDE_FLOAT32_C( 3.79), SIMDE_FLOAT32_C( 3.93), SIMDE_FLOAT32_C( 3.80), SIMDE_FLOAT32_C( 3.60), SIMDE_FLOAT32_C( 2.07), SIMDE_FLOAT32_C( 3.23), SIMDE_FLOAT32_C( 3.44), SIMDE_FLOAT32_C( 3.71), SIMDE_FLOAT32_C( 3.49), SIMDE_FLOAT32_C( 2.58), SIMDE_FLOAT32_C( 3.54), SIMDE_FLOAT32_C( 2.84), SIMDE_FLOAT32_C( 3.46), SIMDE_FLOAT32_C( 3.93)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 3060.52), SIMDE_FLOAT32_C( 6979.60), SIMDE_FLOAT32_C( 8279.36), SIMDE_FLOAT32_C( 6696.04), SIMDE_FLOAT32_C( 7661.76), SIMDE_FLOAT32_C( 3680.04), SIMDE_FLOAT32_C( 8903.22), SIMDE_FLOAT32_C( 4846.05), SIMDE_FLOAT32_C( 1148.23), SIMDE_FLOAT32_C( 7217.40), SIMDE_FLOAT32_C( 2082.02), SIMDE_FLOAT32_C( 6902.28), SIMDE_FLOAT32_C( 1146.40), SIMDE_FLOAT32_C( 9969.51), SIMDE_FLOAT32_C( 5140.40), SIMDE_FLOAT32_C( 9206.03)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 3.49), SIMDE_FLOAT32_C( 3.84), SIMDE_FLOAT32_C( 3.92), SIMDE_FLOAT32_C( 3.83), SIMDE_FLOAT32_C( 3.88), SIMDE_FLOAT32_C( 3.57), SIMDE_FLOAT32_C( 3.95), SIMDE_FLOAT32_C( 3.69), SIMDE_FLOAT32_C( 3.06), SIMDE_FLOAT32_C( 3.86), SIMDE_FLOAT32_C( 3.32), SIMDE_FLOAT32_C( 3.84), SIMDE_FLOAT32_C( 3.06), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 3.71), SIMDE_FLOAT32_C( 3.96)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 7348.31), SIMDE_FLOAT32_C( 8400.08), SIMDE_FLOAT32_C( 4256.55), SIMDE_FLOAT32_C( 9093.31), SIMDE_FLOAT32_C( 9550.14), SIMDE_FLOAT32_C( 8002.34), SIMDE_FLOAT32_C( 8956.15), SIMDE_FLOAT32_C( 6271.53), SIMDE_FLOAT32_C( 3981.75), SIMDE_FLOAT32_C( 4596.36), SIMDE_FLOAT32_C( 6683.64), SIMDE_FLOAT32_C( 276.11), SIMDE_FLOAT32_C( 1262.07), SIMDE_FLOAT32_C( 1163.84), SIMDE_FLOAT32_C( 2229.06), SIMDE_FLOAT32_C( 6994.08)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 3.87), SIMDE_FLOAT32_C( 3.92), SIMDE_FLOAT32_C( 3.63), SIMDE_FLOAT32_C( 3.96), SIMDE_FLOAT32_C( 3.98), SIMDE_FLOAT32_C( 3.90), SIMDE_FLOAT32_C( 3.95), SIMDE_FLOAT32_C( 3.80), SIMDE_FLOAT32_C( 3.60), SIMDE_FLOAT32_C( 3.66), SIMDE_FLOAT32_C( 3.83), SIMDE_FLOAT32_C( 2.44), SIMDE_FLOAT32_C( 3.10), SIMDE_FLOAT32_C( 3.07), SIMDE_FLOAT32_C( 3.35), SIMDE_FLOAT32_C( 3.84)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 4105.04), SIMDE_FLOAT32_C( 8793.93), SIMDE_FLOAT32_C( 6623.12), SIMDE_FLOAT32_C( 6717.40), SIMDE_FLOAT32_C( 628.43), SIMDE_FLOAT32_C( 1010.42), SIMDE_FLOAT32_C( 3357.32), SIMDE_FLOAT32_C( 2370.85), SIMDE_FLOAT32_C( 4038.44), SIMDE_FLOAT32_C( 886.73), SIMDE_FLOAT32_C( 7806.81), SIMDE_FLOAT32_C( 8278.35), SIMDE_FLOAT32_C( 4645.43), SIMDE_FLOAT32_C( 7716.73), SIMDE_FLOAT32_C( 5603.27), SIMDE_FLOAT32_C( 4142.45)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 3.61), SIMDE_FLOAT32_C( 3.94), SIMDE_FLOAT32_C( 3.82), SIMDE_FLOAT32_C( 3.83), SIMDE_FLOAT32_C( 2.80), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 3.53), SIMDE_FLOAT32_C( 3.37), SIMDE_FLOAT32_C( 3.61), SIMDE_FLOAT32_C( 2.95), SIMDE_FLOAT32_C( 3.89), SIMDE_FLOAT32_C( 3.92), SIMDE_FLOAT32_C( 3.67), SIMDE_FLOAT32_C( 3.89), SIMDE_FLOAT32_C( 3.75), SIMDE_FLOAT32_C( 3.62)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 8450.59), SIMDE_FLOAT32_C( 9203.26), SIMDE_FLOAT32_C( 4894.53), SIMDE_FLOAT32_C( 2042.18), SIMDE_FLOAT32_C( 2755.53), SIMDE_FLOAT32_C( 8657.47), SIMDE_FLOAT32_C( 7528.93), SIMDE_FLOAT32_C( 8118.50), SIMDE_FLOAT32_C( 9155.11), SIMDE_FLOAT32_C( 5703.37), SIMDE_FLOAT32_C( 9886.80), SIMDE_FLOAT32_C( 469.19), SIMDE_FLOAT32_C( 6656.71), SIMDE_FLOAT32_C( 5499.67), SIMDE_FLOAT32_C( 7314.76), SIMDE_FLOAT32_C( 1309.05)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 3.93), SIMDE_FLOAT32_C( 3.96), SIMDE_FLOAT32_C( 3.69), SIMDE_FLOAT32_C( 3.31), SIMDE_FLOAT32_C( 3.44), SIMDE_FLOAT32_C( 3.94), SIMDE_FLOAT32_C( 3.88), SIMDE_FLOAT32_C( 3.91), SIMDE_FLOAT32_C( 3.96), SIMDE_FLOAT32_C( 3.76), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 2.67), SIMDE_FLOAT32_C( 3.82), SIMDE_FLOAT32_C( 3.74), SIMDE_FLOAT32_C( 3.86), SIMDE_FLOAT32_C( 3.12)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 1154.54), SIMDE_FLOAT32_C( 9110.29), SIMDE_FLOAT32_C( 2130.97), SIMDE_FLOAT32_C( 11.83), SIMDE_FLOAT32_C( 3312.02), SIMDE_FLOAT32_C( 9618.20), SIMDE_FLOAT32_C( 6468.19), SIMDE_FLOAT32_C( 1159.42), SIMDE_FLOAT32_C( 2118.90), SIMDE_FLOAT32_C( 4661.80), SIMDE_FLOAT32_C( 8551.88), SIMDE_FLOAT32_C( 9887.44), SIMDE_FLOAT32_C( 1217.92), SIMDE_FLOAT32_C( 7124.06), SIMDE_FLOAT32_C( 5136.26), SIMDE_FLOAT32_C( 4524.23)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 3.06), SIMDE_FLOAT32_C( 3.96), SIMDE_FLOAT32_C( 3.33), SIMDE_FLOAT32_C( 1.07), SIMDE_FLOAT32_C( 3.52), SIMDE_FLOAT32_C( 3.98), SIMDE_FLOAT32_C( 3.81), SIMDE_FLOAT32_C( 3.06), SIMDE_FLOAT32_C( 3.33), SIMDE_FLOAT32_C( 3.67), SIMDE_FLOAT32_C( 3.93), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 3.09), SIMDE_FLOAT32_C( 3.85), SIMDE_FLOAT32_C( 3.71), SIMDE_FLOAT32_C( 3.66)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 2809.03), SIMDE_FLOAT32_C( 3201.22), SIMDE_FLOAT32_C( 1237.85), SIMDE_FLOAT32_C( 4831.67), SIMDE_FLOAT32_C( 9663.28), SIMDE_FLOAT32_C( 5036.36), SIMDE_FLOAT32_C( 3363.90), SIMDE_FLOAT32_C( 4374.02), SIMDE_FLOAT32_C( 4087.77), SIMDE_FLOAT32_C( 5199.67), SIMDE_FLOAT32_C( 7554.25), SIMDE_FLOAT32_C( 6973.34), SIMDE_FLOAT32_C( 5071.68), SIMDE_FLOAT32_C( 3476.37), SIMDE_FLOAT32_C( 9581.30), SIMDE_FLOAT32_C( 1516.57)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 3.45), SIMDE_FLOAT32_C( 3.51), SIMDE_FLOAT32_C( 3.09), SIMDE_FLOAT32_C( 3.68), SIMDE_FLOAT32_C( 3.99), SIMDE_FLOAT32_C( 3.70), SIMDE_FLOAT32_C( 3.53), SIMDE_FLOAT32_C( 3.64), SIMDE_FLOAT32_C( 3.61), SIMDE_FLOAT32_C( 3.72), SIMDE_FLOAT32_C( 3.88), SIMDE_FLOAT32_C( 3.84), SIMDE_FLOAT32_C( 3.71), SIMDE_FLOAT32_C( 3.54), SIMDE_FLOAT32_C( 3.98), SIMDE_FLOAT32_C( 3.18)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_log10_ps(test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_log10_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 src; simde__mmask16 k; simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( 2746.67), SIMDE_FLOAT32_C( 8435.45), SIMDE_FLOAT32_C( 3937.29), SIMDE_FLOAT32_C( 1696.00), SIMDE_FLOAT32_C( 5142.35), SIMDE_FLOAT32_C( 381.82), SIMDE_FLOAT32_C( 695.25), SIMDE_FLOAT32_C( 8484.34), SIMDE_FLOAT32_C( 1569.36), SIMDE_FLOAT32_C( 7857.29), SIMDE_FLOAT32_C( 7111.03), SIMDE_FLOAT32_C( 7338.80), SIMDE_FLOAT32_C( 8351.20), SIMDE_FLOAT32_C( 5170.29), SIMDE_FLOAT32_C( 5195.06), SIMDE_FLOAT32_C( 6733.16)), UINT16_C(41466), simde_mm512_set_ps(SIMDE_FLOAT32_C( 5890.98), SIMDE_FLOAT32_C( 6166.85), SIMDE_FLOAT32_C( 6306.54), SIMDE_FLOAT32_C( 117.23), SIMDE_FLOAT32_C( 2775.95), SIMDE_FLOAT32_C( 3079.83), SIMDE_FLOAT32_C( 3474.63), SIMDE_FLOAT32_C( 2912.29), SIMDE_FLOAT32_C( 1609.14), SIMDE_FLOAT32_C( 5423.87), SIMDE_FLOAT32_C( 9127.65), SIMDE_FLOAT32_C( 3652.77), SIMDE_FLOAT32_C( 7486.55), SIMDE_FLOAT32_C( 3512.77), SIMDE_FLOAT32_C( 4068.94), SIMDE_FLOAT32_C( 1228.12)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 3.77), SIMDE_FLOAT32_C( 8435.45), SIMDE_FLOAT32_C( 3.80), SIMDE_FLOAT32_C( 1696.00), SIMDE_FLOAT32_C( 5142.35), SIMDE_FLOAT32_C( 381.82), SIMDE_FLOAT32_C( 695.25), SIMDE_FLOAT32_C( 3.46), SIMDE_FLOAT32_C( 3.21), SIMDE_FLOAT32_C( 3.73), SIMDE_FLOAT32_C( 3.96), SIMDE_FLOAT32_C( 3.56), SIMDE_FLOAT32_C( 3.87), SIMDE_FLOAT32_C( 5170.29), SIMDE_FLOAT32_C( 3.61), SIMDE_FLOAT32_C( 6733.16)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 7348.31), SIMDE_FLOAT32_C( 4256.55), SIMDE_FLOAT32_C( 9550.14), SIMDE_FLOAT32_C( 8956.15), SIMDE_FLOAT32_C( 3981.75), SIMDE_FLOAT32_C( 6683.64), SIMDE_FLOAT32_C( 1262.07), SIMDE_FLOAT32_C( 2229.06), SIMDE_FLOAT32_C( 3060.52), SIMDE_FLOAT32_C( 8279.36), SIMDE_FLOAT32_C( 7661.76), SIMDE_FLOAT32_C( 8903.22), SIMDE_FLOAT32_C( 1148.23), SIMDE_FLOAT32_C( 2082.02), SIMDE_FLOAT32_C( 1146.40), SIMDE_FLOAT32_C( 5140.40)), UINT16_C(36797), simde_mm512_set_ps(SIMDE_FLOAT32_C( 4142.45), SIMDE_FLOAT32_C( 8400.08), SIMDE_FLOAT32_C( 9093.31), SIMDE_FLOAT32_C( 8002.34), SIMDE_FLOAT32_C( 6271.53), SIMDE_FLOAT32_C( 4596.36), SIMDE_FLOAT32_C( 276.11), SIMDE_FLOAT32_C( 1163.84), SIMDE_FLOAT32_C( 6994.08), SIMDE_FLOAT32_C( 6979.60), SIMDE_FLOAT32_C( 6696.04), SIMDE_FLOAT32_C( 3680.04), SIMDE_FLOAT32_C( 4846.05), SIMDE_FLOAT32_C( 7217.40), SIMDE_FLOAT32_C( 6902.28), SIMDE_FLOAT32_C( 9969.51)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 3.62), SIMDE_FLOAT32_C( 4256.55), SIMDE_FLOAT32_C( 9550.14), SIMDE_FLOAT32_C( 8956.15), SIMDE_FLOAT32_C( 3.80), SIMDE_FLOAT32_C( 3.66), SIMDE_FLOAT32_C( 2.44), SIMDE_FLOAT32_C( 3.07), SIMDE_FLOAT32_C( 3.84), SIMDE_FLOAT32_C( 8279.36), SIMDE_FLOAT32_C( 3.83), SIMDE_FLOAT32_C( 3.57), SIMDE_FLOAT32_C( 3.69), SIMDE_FLOAT32_C( 3.86), SIMDE_FLOAT32_C( 1146.40), SIMDE_FLOAT32_C( 4.00)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 4524.23), SIMDE_FLOAT32_C( 9203.26), SIMDE_FLOAT32_C( 2042.18), SIMDE_FLOAT32_C( 8657.47), SIMDE_FLOAT32_C( 8118.50), SIMDE_FLOAT32_C( 5703.37), SIMDE_FLOAT32_C( 469.19), SIMDE_FLOAT32_C( 5499.67), SIMDE_FLOAT32_C( 1309.05), SIMDE_FLOAT32_C( 8793.93), SIMDE_FLOAT32_C( 6717.40), SIMDE_FLOAT32_C( 1010.42), SIMDE_FLOAT32_C( 2370.85), SIMDE_FLOAT32_C( 886.73), SIMDE_FLOAT32_C( 8278.35), SIMDE_FLOAT32_C( 7716.73)), UINT16_C(16804), simde_mm512_set_ps(SIMDE_FLOAT32_C( 5136.26), SIMDE_FLOAT32_C( 8450.59), SIMDE_FLOAT32_C( 4894.53), SIMDE_FLOAT32_C( 2755.53), SIMDE_FLOAT32_C( 7528.93), SIMDE_FLOAT32_C( 9155.11), SIMDE_FLOAT32_C( 9886.80), SIMDE_FLOAT32_C( 6656.71), SIMDE_FLOAT32_C( 7314.76), SIMDE_FLOAT32_C( 4105.04), SIMDE_FLOAT32_C( 6623.12), SIMDE_FLOAT32_C( 628.43), SIMDE_FLOAT32_C( 3357.32), SIMDE_FLOAT32_C( 4038.44), SIMDE_FLOAT32_C( 7806.81), SIMDE_FLOAT32_C( 4645.43)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 4524.23), SIMDE_FLOAT32_C( 3.93), SIMDE_FLOAT32_C( 2042.18), SIMDE_FLOAT32_C( 8657.47), SIMDE_FLOAT32_C( 8118.50), SIMDE_FLOAT32_C( 5703.37), SIMDE_FLOAT32_C( 469.19), SIMDE_FLOAT32_C( 3.82), SIMDE_FLOAT32_C( 3.86), SIMDE_FLOAT32_C( 8793.93), SIMDE_FLOAT32_C( 3.82), SIMDE_FLOAT32_C( 1010.42), SIMDE_FLOAT32_C( 2370.85), SIMDE_FLOAT32_C( 3.61), SIMDE_FLOAT32_C( 8278.35), SIMDE_FLOAT32_C( 7716.73)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 3256.50), SIMDE_FLOAT32_C( 2809.03), SIMDE_FLOAT32_C( 1237.85), SIMDE_FLOAT32_C( 9663.28), SIMDE_FLOAT32_C( 3363.90), SIMDE_FLOAT32_C( 4087.77), SIMDE_FLOAT32_C( 7554.25), SIMDE_FLOAT32_C( 5071.68), SIMDE_FLOAT32_C( 9581.30), SIMDE_FLOAT32_C( 1154.54), SIMDE_FLOAT32_C( 2130.97), SIMDE_FLOAT32_C( 3312.02), SIMDE_FLOAT32_C( 6468.19), SIMDE_FLOAT32_C( 2118.90), SIMDE_FLOAT32_C( 8551.88), SIMDE_FLOAT32_C( 1217.92)), UINT16_C( 2107), simde_mm512_set_ps(SIMDE_FLOAT32_C( 9486.33), SIMDE_FLOAT32_C( 4010.56), SIMDE_FLOAT32_C( 3201.22), SIMDE_FLOAT32_C( 4831.67), SIMDE_FLOAT32_C( 5036.36), SIMDE_FLOAT32_C( 4374.02), SIMDE_FLOAT32_C( 5199.67), SIMDE_FLOAT32_C( 6973.34), SIMDE_FLOAT32_C( 3476.37), SIMDE_FLOAT32_C( 1516.57), SIMDE_FLOAT32_C( 9110.29), SIMDE_FLOAT32_C( 11.83), SIMDE_FLOAT32_C( 9618.20), SIMDE_FLOAT32_C( 1159.42), SIMDE_FLOAT32_C( 4661.80), SIMDE_FLOAT32_C( 9887.44)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 3256.50), SIMDE_FLOAT32_C( 2809.03), SIMDE_FLOAT32_C( 1237.85), SIMDE_FLOAT32_C( 9663.28), SIMDE_FLOAT32_C( 3.70), SIMDE_FLOAT32_C( 4087.77), SIMDE_FLOAT32_C( 7554.25), SIMDE_FLOAT32_C( 5071.68), SIMDE_FLOAT32_C( 9581.30), SIMDE_FLOAT32_C( 1154.54), SIMDE_FLOAT32_C( 3.96), SIMDE_FLOAT32_C( 1.07), SIMDE_FLOAT32_C( 3.98), SIMDE_FLOAT32_C( 2118.90), SIMDE_FLOAT32_C( 3.67), SIMDE_FLOAT32_C( 4.00)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 4921.97), SIMDE_FLOAT32_C( 1314.36), SIMDE_FLOAT32_C( 3425.34), SIMDE_FLOAT32_C( 5889.62), SIMDE_FLOAT32_C( 6729.66), SIMDE_FLOAT32_C( 9443.57), SIMDE_FLOAT32_C( 9578.53), SIMDE_FLOAT32_C( 5667.58), SIMDE_FLOAT32_C( 7424.68), SIMDE_FLOAT32_C( 2009.69), SIMDE_FLOAT32_C( 1044.67), SIMDE_FLOAT32_C( 1170.36), SIMDE_FLOAT32_C( 6106.86), SIMDE_FLOAT32_C( 1058.19), SIMDE_FLOAT32_C( 1124.78), SIMDE_FLOAT32_C( 7203.19)), UINT16_C(22274), simde_mm512_set_ps(SIMDE_FLOAT32_C( 7482.85), SIMDE_FLOAT32_C( 9575.95), SIMDE_FLOAT32_C( 1407.98), SIMDE_FLOAT32_C( 5799.87), SIMDE_FLOAT32_C( 694.94), SIMDE_FLOAT32_C( 7133.07), SIMDE_FLOAT32_C( 9660.54), SIMDE_FLOAT32_C( 5551.82), SIMDE_FLOAT32_C( 9134.21), SIMDE_FLOAT32_C( 4616.24), SIMDE_FLOAT32_C( 6187.92), SIMDE_FLOAT32_C( 3107.51), SIMDE_FLOAT32_C( 1991.62), SIMDE_FLOAT32_C( 1882.51), SIMDE_FLOAT32_C( 287.66), SIMDE_FLOAT32_C( 7377.56)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 4921.97), SIMDE_FLOAT32_C( 3.98), SIMDE_FLOAT32_C( 3425.34), SIMDE_FLOAT32_C( 3.76), SIMDE_FLOAT32_C( 6729.66), SIMDE_FLOAT32_C( 3.85), SIMDE_FLOAT32_C( 3.99), SIMDE_FLOAT32_C( 3.74), SIMDE_FLOAT32_C( 7424.68), SIMDE_FLOAT32_C( 2009.69), SIMDE_FLOAT32_C( 1044.67), SIMDE_FLOAT32_C( 1170.36), SIMDE_FLOAT32_C( 6106.86), SIMDE_FLOAT32_C( 1058.19), SIMDE_FLOAT32_C( 2.46), SIMDE_FLOAT32_C( 7203.19)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 9415.27), SIMDE_FLOAT32_C( 963.59), SIMDE_FLOAT32_C( 4649.74), SIMDE_FLOAT32_C( 1078.30), SIMDE_FLOAT32_C( 5462.61), SIMDE_FLOAT32_C( 6033.01), SIMDE_FLOAT32_C( 9173.00), SIMDE_FLOAT32_C( 4672.02), SIMDE_FLOAT32_C( 3569.65), SIMDE_FLOAT32_C( 3935.68), SIMDE_FLOAT32_C( 3408.08), SIMDE_FLOAT32_C( 8917.42), SIMDE_FLOAT32_C( 1855.90), SIMDE_FLOAT32_C( 7781.74), SIMDE_FLOAT32_C( 7197.17), SIMDE_FLOAT32_C( 7170.16)), UINT16_C(27396), simde_mm512_set_ps(SIMDE_FLOAT32_C( 178.74), SIMDE_FLOAT32_C( 2968.36), SIMDE_FLOAT32_C( 1281.72), SIMDE_FLOAT32_C( 1177.11), SIMDE_FLOAT32_C( 8949.44), SIMDE_FLOAT32_C( 5024.17), SIMDE_FLOAT32_C( 907.29), SIMDE_FLOAT32_C( 5805.32), SIMDE_FLOAT32_C( 7896.24), SIMDE_FLOAT32_C( 4941.12), SIMDE_FLOAT32_C( 3457.39), SIMDE_FLOAT32_C( 1402.13), SIMDE_FLOAT32_C( 6670.00), SIMDE_FLOAT32_C( 6373.56), SIMDE_FLOAT32_C( 415.89), SIMDE_FLOAT32_C( 2550.00)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 9415.27), SIMDE_FLOAT32_C( 3.47), SIMDE_FLOAT32_C( 3.11), SIMDE_FLOAT32_C( 1078.30), SIMDE_FLOAT32_C( 3.95), SIMDE_FLOAT32_C( 6033.01), SIMDE_FLOAT32_C( 2.96), SIMDE_FLOAT32_C( 3.76), SIMDE_FLOAT32_C( 3569.65), SIMDE_FLOAT32_C( 3935.68), SIMDE_FLOAT32_C( 3408.08), SIMDE_FLOAT32_C( 8917.42), SIMDE_FLOAT32_C( 1855.90), SIMDE_FLOAT32_C( 3.80), SIMDE_FLOAT32_C( 7197.17), SIMDE_FLOAT32_C( 7170.16)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 7648.13), SIMDE_FLOAT32_C( 4875.56), SIMDE_FLOAT32_C( 161.12), SIMDE_FLOAT32_C( 8194.68), SIMDE_FLOAT32_C( 7254.51), SIMDE_FLOAT32_C( 1142.29), SIMDE_FLOAT32_C( 5528.96), SIMDE_FLOAT32_C( 7950.51), SIMDE_FLOAT32_C( 5154.57), SIMDE_FLOAT32_C( 8176.75), SIMDE_FLOAT32_C( 4580.00), SIMDE_FLOAT32_C( 5400.22), SIMDE_FLOAT32_C( 1452.71), SIMDE_FLOAT32_C( 8039.28), SIMDE_FLOAT32_C( 6972.90), SIMDE_FLOAT32_C( 554.46)), UINT16_C( 953), simde_mm512_set_ps(SIMDE_FLOAT32_C( 5093.74), SIMDE_FLOAT32_C( 9045.23), SIMDE_FLOAT32_C( 5720.26), SIMDE_FLOAT32_C( 2861.39), SIMDE_FLOAT32_C( 6541.39), SIMDE_FLOAT32_C( 4114.75), SIMDE_FLOAT32_C( 2711.17), SIMDE_FLOAT32_C( 8391.22), SIMDE_FLOAT32_C( 5330.27), SIMDE_FLOAT32_C( 3661.45), SIMDE_FLOAT32_C( 5586.41), SIMDE_FLOAT32_C( 2116.00), SIMDE_FLOAT32_C( 4808.04), SIMDE_FLOAT32_C( 3749.32), SIMDE_FLOAT32_C( 4730.38), SIMDE_FLOAT32_C( 5459.69)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 7648.13), SIMDE_FLOAT32_C( 4875.56), SIMDE_FLOAT32_C( 161.12), SIMDE_FLOAT32_C( 8194.68), SIMDE_FLOAT32_C( 7254.51), SIMDE_FLOAT32_C( 1142.29), SIMDE_FLOAT32_C( 3.43), SIMDE_FLOAT32_C( 3.92), SIMDE_FLOAT32_C( 3.73), SIMDE_FLOAT32_C( 8176.75), SIMDE_FLOAT32_C( 3.75), SIMDE_FLOAT32_C( 3.33), SIMDE_FLOAT32_C( 3.68), SIMDE_FLOAT32_C( 8039.28), SIMDE_FLOAT32_C( 6972.90), SIMDE_FLOAT32_C( 3.74)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 1058.07), SIMDE_FLOAT32_C( 6652.15), SIMDE_FLOAT32_C( 2532.95), SIMDE_FLOAT32_C( 9113.62), SIMDE_FLOAT32_C( 9783.41), SIMDE_FLOAT32_C( 9773.08), SIMDE_FLOAT32_C( 9127.47), SIMDE_FLOAT32_C( 918.64), SIMDE_FLOAT32_C( 3953.30), SIMDE_FLOAT32_C( 333.95), SIMDE_FLOAT32_C( 1356.49), SIMDE_FLOAT32_C( 2899.69), SIMDE_FLOAT32_C( 5501.59), SIMDE_FLOAT32_C( 5515.77), SIMDE_FLOAT32_C( 7198.84), SIMDE_FLOAT32_C( 3978.34)), UINT16_C(12713), simde_mm512_set_ps(SIMDE_FLOAT32_C( 792.83), SIMDE_FLOAT32_C( 4929.19), SIMDE_FLOAT32_C( 9124.38), SIMDE_FLOAT32_C( 8968.13), SIMDE_FLOAT32_C( 1316.26), SIMDE_FLOAT32_C( 3447.13), SIMDE_FLOAT32_C( 8644.35), SIMDE_FLOAT32_C( 3246.39), SIMDE_FLOAT32_C( 5304.47), SIMDE_FLOAT32_C( 5549.07), SIMDE_FLOAT32_C( 8579.68), SIMDE_FLOAT32_C( 3747.01), SIMDE_FLOAT32_C( 9720.69), SIMDE_FLOAT32_C( 6809.26), SIMDE_FLOAT32_C( 4934.63), SIMDE_FLOAT32_C( 9263.02)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1058.07), SIMDE_FLOAT32_C( 6652.15), SIMDE_FLOAT32_C( 3.96), SIMDE_FLOAT32_C( 3.95), SIMDE_FLOAT32_C( 9783.41), SIMDE_FLOAT32_C( 9773.08), SIMDE_FLOAT32_C( 9127.47), SIMDE_FLOAT32_C( 3.51), SIMDE_FLOAT32_C( 3.72), SIMDE_FLOAT32_C( 333.95), SIMDE_FLOAT32_C( 3.93), SIMDE_FLOAT32_C( 2899.69), SIMDE_FLOAT32_C( 3.99), SIMDE_FLOAT32_C( 5515.77), SIMDE_FLOAT32_C( 7198.84), SIMDE_FLOAT32_C( 3.97)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mask_log10_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_log10_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( 7486.55), SIMDE_FLOAT64_C( 8351.20), SIMDE_FLOAT64_C( 3512.77), SIMDE_FLOAT64_C( 5170.29), SIMDE_FLOAT64_C( 4068.94), SIMDE_FLOAT64_C( 5195.06), SIMDE_FLOAT64_C( 1228.12), SIMDE_FLOAT64_C( 6733.16)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.87), SIMDE_FLOAT64_C( 3.92), SIMDE_FLOAT64_C( 3.55), SIMDE_FLOAT64_C( 3.71), SIMDE_FLOAT64_C( 3.61), SIMDE_FLOAT64_C( 3.72), SIMDE_FLOAT64_C( 3.09), SIMDE_FLOAT64_C( 3.83)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 1609.14), SIMDE_FLOAT64_C( 1569.36), SIMDE_FLOAT64_C( 5423.87), SIMDE_FLOAT64_C( 7857.29), SIMDE_FLOAT64_C( 9127.65), SIMDE_FLOAT64_C( 7111.03), SIMDE_FLOAT64_C( 3652.77), SIMDE_FLOAT64_C( 7338.80)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.21), SIMDE_FLOAT64_C( 3.20), SIMDE_FLOAT64_C( 3.73), SIMDE_FLOAT64_C( 3.90), SIMDE_FLOAT64_C( 3.96), SIMDE_FLOAT64_C( 3.85), SIMDE_FLOAT64_C( 3.56), SIMDE_FLOAT64_C( 3.87)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 2775.95), SIMDE_FLOAT64_C( 5142.35), SIMDE_FLOAT64_C( 3079.83), SIMDE_FLOAT64_C( 381.82), SIMDE_FLOAT64_C( 3474.63), SIMDE_FLOAT64_C( 695.25), SIMDE_FLOAT64_C( 2912.29), SIMDE_FLOAT64_C( 8484.34)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.44), SIMDE_FLOAT64_C( 3.71), SIMDE_FLOAT64_C( 3.49), SIMDE_FLOAT64_C( 2.58), SIMDE_FLOAT64_C( 3.54), SIMDE_FLOAT64_C( 2.84), SIMDE_FLOAT64_C( 3.46), SIMDE_FLOAT64_C( 3.93)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 5890.98), SIMDE_FLOAT64_C( 2746.67), SIMDE_FLOAT64_C( 6166.85), SIMDE_FLOAT64_C( 8435.45), SIMDE_FLOAT64_C( 6306.54), SIMDE_FLOAT64_C( 3937.29), SIMDE_FLOAT64_C( 117.23), SIMDE_FLOAT64_C( 1696.00)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.77), SIMDE_FLOAT64_C( 3.44), SIMDE_FLOAT64_C( 3.79), SIMDE_FLOAT64_C( 3.93), SIMDE_FLOAT64_C( 3.80), SIMDE_FLOAT64_C( 3.60), SIMDE_FLOAT64_C( 2.07), SIMDE_FLOAT64_C( 3.23)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 1148.23), SIMDE_FLOAT64_C( 7217.40), SIMDE_FLOAT64_C( 2082.02), SIMDE_FLOAT64_C( 6902.28), SIMDE_FLOAT64_C( 1146.40), SIMDE_FLOAT64_C( 9969.51), SIMDE_FLOAT64_C( 5140.40), SIMDE_FLOAT64_C( 9206.03)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.06), SIMDE_FLOAT64_C( 3.86), SIMDE_FLOAT64_C( 3.32), SIMDE_FLOAT64_C( 3.84), SIMDE_FLOAT64_C( 3.06), SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( 3.71), SIMDE_FLOAT64_C( 3.96)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 3060.52), SIMDE_FLOAT64_C( 6979.60), SIMDE_FLOAT64_C( 8279.36), SIMDE_FLOAT64_C( 6696.04), SIMDE_FLOAT64_C( 7661.76), SIMDE_FLOAT64_C( 3680.04), SIMDE_FLOAT64_C( 8903.22), SIMDE_FLOAT64_C( 4846.05)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.49), SIMDE_FLOAT64_C( 3.84), SIMDE_FLOAT64_C( 3.92), SIMDE_FLOAT64_C( 3.83), SIMDE_FLOAT64_C( 3.88), SIMDE_FLOAT64_C( 3.57), SIMDE_FLOAT64_C( 3.95), SIMDE_FLOAT64_C( 3.69)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 3981.75), SIMDE_FLOAT64_C( 4596.36), SIMDE_FLOAT64_C( 6683.64), SIMDE_FLOAT64_C( 276.11), SIMDE_FLOAT64_C( 1262.07), SIMDE_FLOAT64_C( 1163.84), SIMDE_FLOAT64_C( 2229.06), SIMDE_FLOAT64_C( 6994.08)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.60), SIMDE_FLOAT64_C( 3.66), SIMDE_FLOAT64_C( 3.83), SIMDE_FLOAT64_C( 2.44), SIMDE_FLOAT64_C( 3.10), SIMDE_FLOAT64_C( 3.07), SIMDE_FLOAT64_C( 3.35), SIMDE_FLOAT64_C( 3.84)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 7348.31), SIMDE_FLOAT64_C( 8400.08), SIMDE_FLOAT64_C( 4256.55), SIMDE_FLOAT64_C( 9093.31), SIMDE_FLOAT64_C( 9550.14), SIMDE_FLOAT64_C( 8002.34), SIMDE_FLOAT64_C( 8956.15), SIMDE_FLOAT64_C( 6271.53)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.87), SIMDE_FLOAT64_C( 3.92), SIMDE_FLOAT64_C( 3.63), SIMDE_FLOAT64_C( 3.96), SIMDE_FLOAT64_C( 3.98), SIMDE_FLOAT64_C( 3.90), SIMDE_FLOAT64_C( 3.95), SIMDE_FLOAT64_C( 3.80)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_log10_pd(test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_log10_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d src; simde__mmask8 k; simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( 1569.36), SIMDE_FLOAT64_C( 7857.29), SIMDE_FLOAT64_C( 7111.03), SIMDE_FLOAT64_C( 7338.80), SIMDE_FLOAT64_C( 8351.20), SIMDE_FLOAT64_C( 5170.29), SIMDE_FLOAT64_C( 5195.06), SIMDE_FLOAT64_C( 6733.16)), UINT8_C(139), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1609.14), SIMDE_FLOAT64_C( 5423.87), SIMDE_FLOAT64_C( 9127.65), SIMDE_FLOAT64_C( 3652.77), SIMDE_FLOAT64_C( 7486.55), SIMDE_FLOAT64_C( 3512.77), SIMDE_FLOAT64_C( 4068.94), SIMDE_FLOAT64_C( 1228.12)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.21), SIMDE_FLOAT64_C( 7857.29), SIMDE_FLOAT64_C( 7111.03), SIMDE_FLOAT64_C( 7338.80), SIMDE_FLOAT64_C( 3.87), SIMDE_FLOAT64_C( 5170.29), SIMDE_FLOAT64_C( 3.61), SIMDE_FLOAT64_C( 3.09)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 5890.98), SIMDE_FLOAT64_C( 6166.85), SIMDE_FLOAT64_C( 6306.54), SIMDE_FLOAT64_C( 117.23), SIMDE_FLOAT64_C( 2775.95), SIMDE_FLOAT64_C( 3079.83), SIMDE_FLOAT64_C( 3474.63), SIMDE_FLOAT64_C( 2912.29)), UINT8_C(229), simde_mm512_set_pd(SIMDE_FLOAT64_C( 9206.03), SIMDE_FLOAT64_C( 2746.67), SIMDE_FLOAT64_C( 8435.45), SIMDE_FLOAT64_C( 3937.29), SIMDE_FLOAT64_C( 1696.00), SIMDE_FLOAT64_C( 5142.35), SIMDE_FLOAT64_C( 381.82), SIMDE_FLOAT64_C( 695.25)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.96), SIMDE_FLOAT64_C( 3.44), SIMDE_FLOAT64_C( 3.93), SIMDE_FLOAT64_C( 117.23), SIMDE_FLOAT64_C( 2775.95), SIMDE_FLOAT64_C( 3.71), SIMDE_FLOAT64_C( 3474.63), SIMDE_FLOAT64_C( 2.84)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 6994.08), SIMDE_FLOAT64_C( 6979.60), SIMDE_FLOAT64_C( 6696.04), SIMDE_FLOAT64_C( 3680.04), SIMDE_FLOAT64_C( 4846.05), SIMDE_FLOAT64_C( 7217.40), SIMDE_FLOAT64_C( 6902.28), SIMDE_FLOAT64_C( 9969.51)), UINT8_C(253), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2229.06), SIMDE_FLOAT64_C( 3060.52), SIMDE_FLOAT64_C( 8279.36), SIMDE_FLOAT64_C( 7661.76), SIMDE_FLOAT64_C( 8903.22), SIMDE_FLOAT64_C( 1148.23), SIMDE_FLOAT64_C( 2082.02), SIMDE_FLOAT64_C( 1146.40)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.35), SIMDE_FLOAT64_C( 3.49), SIMDE_FLOAT64_C( 3.92), SIMDE_FLOAT64_C( 3.88), SIMDE_FLOAT64_C( 3.95), SIMDE_FLOAT64_C( 3.06), SIMDE_FLOAT64_C( 6902.28), SIMDE_FLOAT64_C( 3.06)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 5603.27), SIMDE_FLOAT64_C( 7348.31), SIMDE_FLOAT64_C( 4256.55), SIMDE_FLOAT64_C( 9550.14), SIMDE_FLOAT64_C( 8956.15), SIMDE_FLOAT64_C( 3981.75), SIMDE_FLOAT64_C( 6683.64), SIMDE_FLOAT64_C( 1262.07)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 7716.73), SIMDE_FLOAT64_C( 4142.45), SIMDE_FLOAT64_C( 8400.08), SIMDE_FLOAT64_C( 9093.31), SIMDE_FLOAT64_C( 8002.34), SIMDE_FLOAT64_C( 6271.53), SIMDE_FLOAT64_C( 4596.36), SIMDE_FLOAT64_C( 276.11)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 5603.27), SIMDE_FLOAT64_C( 3.62), SIMDE_FLOAT64_C( 4256.55), SIMDE_FLOAT64_C( 3.96), SIMDE_FLOAT64_C( 3.90), SIMDE_FLOAT64_C( 3.80), SIMDE_FLOAT64_C( 6683.64), SIMDE_FLOAT64_C( 2.44)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 5499.67), SIMDE_FLOAT64_C( 1309.05), SIMDE_FLOAT64_C( 8793.93), SIMDE_FLOAT64_C( 6717.40), SIMDE_FLOAT64_C( 1010.42), SIMDE_FLOAT64_C( 2370.85), SIMDE_FLOAT64_C( 886.73), SIMDE_FLOAT64_C( 8278.35)), UINT8_C(145), simde_mm512_set_pd(SIMDE_FLOAT64_C( 6656.71), SIMDE_FLOAT64_C( 7314.76), SIMDE_FLOAT64_C( 4105.04), SIMDE_FLOAT64_C( 6623.12), SIMDE_FLOAT64_C( 628.43), SIMDE_FLOAT64_C( 3357.32), SIMDE_FLOAT64_C( 4038.44), SIMDE_FLOAT64_C( 7806.81)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.82), SIMDE_FLOAT64_C( 1309.05), SIMDE_FLOAT64_C( 8793.93), SIMDE_FLOAT64_C( 3.82), SIMDE_FLOAT64_C( 1010.42), SIMDE_FLOAT64_C( 2370.85), SIMDE_FLOAT64_C( 886.73), SIMDE_FLOAT64_C( 3.89)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 1217.92), SIMDE_FLOAT64_C( 5136.26), SIMDE_FLOAT64_C( 8450.59), SIMDE_FLOAT64_C( 4894.53), SIMDE_FLOAT64_C( 2755.53), SIMDE_FLOAT64_C( 7528.93), SIMDE_FLOAT64_C( 9155.11), SIMDE_FLOAT64_C( 9886.80)), UINT8_C( 75), simde_mm512_set_pd(SIMDE_FLOAT64_C( 9887.44), SIMDE_FLOAT64_C( 7124.06), SIMDE_FLOAT64_C( 4524.23), SIMDE_FLOAT64_C( 9203.26), SIMDE_FLOAT64_C( 2042.18), SIMDE_FLOAT64_C( 8657.47), SIMDE_FLOAT64_C( 8118.50), SIMDE_FLOAT64_C( 5703.37)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1217.92), SIMDE_FLOAT64_C( 3.85), SIMDE_FLOAT64_C( 8450.59), SIMDE_FLOAT64_C( 4894.53), SIMDE_FLOAT64_C( 3.31), SIMDE_FLOAT64_C( 7528.93), SIMDE_FLOAT64_C( 3.91), SIMDE_FLOAT64_C( 3.76)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 6973.34), SIMDE_FLOAT64_C( 3476.37), SIMDE_FLOAT64_C( 1516.57), SIMDE_FLOAT64_C( 9110.29), SIMDE_FLOAT64_C( 11.83), SIMDE_FLOAT64_C( 9618.20), SIMDE_FLOAT64_C( 1159.42), SIMDE_FLOAT64_C( 4661.80)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 7554.25), SIMDE_FLOAT64_C( 5071.68), SIMDE_FLOAT64_C( 9581.30), SIMDE_FLOAT64_C( 1154.54), SIMDE_FLOAT64_C( 2130.97), SIMDE_FLOAT64_C( 3312.02), SIMDE_FLOAT64_C( 6468.19), SIMDE_FLOAT64_C( 2118.90)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 6973.34), SIMDE_FLOAT64_C( 3.71), SIMDE_FLOAT64_C( 1516.57), SIMDE_FLOAT64_C( 3.06), SIMDE_FLOAT64_C( 3.33), SIMDE_FLOAT64_C( 3.52), SIMDE_FLOAT64_C( 1159.42), SIMDE_FLOAT64_C( 3.33)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 7377.56), SIMDE_FLOAT64_C( 9683.23), SIMDE_FLOAT64_C( 3256.50), SIMDE_FLOAT64_C( 2809.03), SIMDE_FLOAT64_C( 1237.85), SIMDE_FLOAT64_C( 9663.28), SIMDE_FLOAT64_C( 3363.90), SIMDE_FLOAT64_C( 4087.77)), UINT8_C(213), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1124.78), SIMDE_FLOAT64_C( 7203.19), SIMDE_FLOAT64_C( 9486.33), SIMDE_FLOAT64_C( 4010.56), SIMDE_FLOAT64_C( 3201.22), SIMDE_FLOAT64_C( 4831.67), SIMDE_FLOAT64_C( 5036.36), SIMDE_FLOAT64_C( 4374.02)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.05), SIMDE_FLOAT64_C( 3.86), SIMDE_FLOAT64_C( 3256.50), SIMDE_FLOAT64_C( 3.60), SIMDE_FLOAT64_C( 1237.85), SIMDE_FLOAT64_C( 3.68), SIMDE_FLOAT64_C( 3363.90), SIMDE_FLOAT64_C( 3.64)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mask_log10_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_logb_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 52.75), SIMDE_FLOAT32_C( 12.37), SIMDE_FLOAT32_C( 32.32), SIMDE_FLOAT32_C( 26.90) }, { SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 4.00) } }, { { SIMDE_FLOAT32_C( 28.49), SIMDE_FLOAT32_C( 18.47), SIMDE_FLOAT32_C( 63.22), SIMDE_FLOAT32_C( 55.89) }, { SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 5.00) } }, { { SIMDE_FLOAT32_C( 55.03), SIMDE_FLOAT32_C( 53.88), SIMDE_FLOAT32_C( 60.21), SIMDE_FLOAT32_C( 98.39) }, { SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 6.00) } }, { { SIMDE_FLOAT32_C( 48.09), SIMDE_FLOAT32_C( 71.36), SIMDE_FLOAT32_C( 70.54), SIMDE_FLOAT32_C( 16.55) }, { SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 4.00) } }, { { SIMDE_FLOAT32_C( 80.97), SIMDE_FLOAT32_C( 4.96), SIMDE_FLOAT32_C( 37.49), SIMDE_FLOAT32_C( 46.77) }, { SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 5.00) } }, { { SIMDE_FLOAT32_C( 90.48), SIMDE_FLOAT32_C( 58.54), SIMDE_FLOAT32_C( 37.33), SIMDE_FLOAT32_C( 31.14) }, { SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 4.00) } }, { { SIMDE_FLOAT32_C( 72.20), SIMDE_FLOAT32_C( 35.18), SIMDE_FLOAT32_C( 41.35), SIMDE_FLOAT32_C( 41.72) }, { SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 5.00) } }, { { SIMDE_FLOAT32_C( 30.55), SIMDE_FLOAT32_C( 90.31), SIMDE_FLOAT32_C( 81.30), SIMDE_FLOAT32_C( 83.30) }, { SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 6.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_logb_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_logb_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 42.51), SIMDE_FLOAT64_C( 67.09) }, { SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 6.00) } }, { { SIMDE_FLOAT64_C( 79.25), SIMDE_FLOAT64_C( 26.02) }, { SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 4.00) } }, { { SIMDE_FLOAT64_C( 47.58), SIMDE_FLOAT64_C( 12.11) }, { SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 3.00) } }, { { SIMDE_FLOAT64_C( 67.84), SIMDE_FLOAT64_C( 75.08) }, { SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 6.00) } }, { { SIMDE_FLOAT64_C( 6.25), SIMDE_FLOAT64_C( 48.99) }, { SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 5.00) } }, { { SIMDE_FLOAT64_C( 74.95), SIMDE_FLOAT64_C( 97.10) }, { SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 6.00) } }, { { SIMDE_FLOAT64_C( 9.84), SIMDE_FLOAT64_C( 31.53) }, { SIMDE_FLOAT64_C( 3.00), SIMDE_FLOAT64_C( 4.00) } }, { { SIMDE_FLOAT64_C( 85.29), SIMDE_FLOAT64_C( 31.26) }, { SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 4.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d r = simde_mm_logb_pd(a); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_logb_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 3.33), SIMDE_FLOAT32_C( 14.78), SIMDE_FLOAT32_C( 3.51), SIMDE_FLOAT32_C( 41.15), SIMDE_FLOAT32_C( 36.54), SIMDE_FLOAT32_C( 70.74), SIMDE_FLOAT32_C( 85.77), SIMDE_FLOAT32_C( 73.18) }, { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 6.00) } }, { { SIMDE_FLOAT32_C( 8.54), SIMDE_FLOAT32_C( 76.06), SIMDE_FLOAT32_C( 3.47), SIMDE_FLOAT32_C( 1.66), SIMDE_FLOAT32_C( 10.98), SIMDE_FLOAT32_C( 98.59), SIMDE_FLOAT32_C( 85.97), SIMDE_FLOAT32_C( 34.95) }, { SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 5.00) } }, { { SIMDE_FLOAT32_C( 50.04), SIMDE_FLOAT32_C( 79.93), SIMDE_FLOAT32_C( 79.22), SIMDE_FLOAT32_C( 75.66), SIMDE_FLOAT32_C( 78.73), SIMDE_FLOAT32_C( 98.52), SIMDE_FLOAT32_C( 71.74), SIMDE_FLOAT32_C( 29.91) }, { SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 4.00) } }, { { SIMDE_FLOAT32_C( 36.91), SIMDE_FLOAT32_C( 76.48), SIMDE_FLOAT32_C( 92.50), SIMDE_FLOAT32_C( 91.82), SIMDE_FLOAT32_C( 48.28), SIMDE_FLOAT32_C( 85.39), SIMDE_FLOAT32_C( 15.78), SIMDE_FLOAT32_C( 51.62) }, { SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 5.00) } }, { { SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 19.29), SIMDE_FLOAT32_C( 92.76), SIMDE_FLOAT32_C( 36.71), SIMDE_FLOAT32_C( 90.02), SIMDE_FLOAT32_C( 78.53), SIMDE_FLOAT32_C( 9.89), SIMDE_FLOAT32_C( 98.56) }, { SIMDE_FLOAT32_C( -3.00), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 6.00) } }, { { SIMDE_FLOAT32_C( 54.59), SIMDE_FLOAT32_C( 13.36), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 65.57), SIMDE_FLOAT32_C( 11.95), SIMDE_FLOAT32_C( 86.19), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 61.99) }, { SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( -3.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 5.00) } }, { { SIMDE_FLOAT32_C( 66.13), SIMDE_FLOAT32_C( 79.73), SIMDE_FLOAT32_C( 37.65), SIMDE_FLOAT32_C( 44.86), SIMDE_FLOAT32_C( 78.25), SIMDE_FLOAT32_C( 9.39), SIMDE_FLOAT32_C( 74.76), SIMDE_FLOAT32_C( 15.16) }, { SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 3.00) } }, { { SIMDE_FLOAT32_C( 85.87), SIMDE_FLOAT32_C( 67.26), SIMDE_FLOAT32_C( 6.97), SIMDE_FLOAT32_C( 34.15), SIMDE_FLOAT32_C( 52.65), SIMDE_FLOAT32_C( 22.75), SIMDE_FLOAT32_C( 85.77), SIMDE_FLOAT32_C( 52.82) }, { SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 5.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_logb_ps(a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_logb_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( 12.13), SIMDE_FLOAT64_C( 86.21), SIMDE_FLOAT64_C( 41.78), SIMDE_FLOAT64_C( 6.77) }, { SIMDE_FLOAT64_C( 3.00), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 2.00) } }, { { SIMDE_FLOAT64_C( 9.71), SIMDE_FLOAT64_C( 21.14), SIMDE_FLOAT64_C( 79.78), SIMDE_FLOAT64_C( 24.32) }, { SIMDE_FLOAT64_C( 3.00), SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 4.00) } }, { { SIMDE_FLOAT64_C( 11.31), SIMDE_FLOAT64_C( 66.21), SIMDE_FLOAT64_C( 43.11), SIMDE_FLOAT64_C( 34.90) }, { SIMDE_FLOAT64_C( 3.00), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 5.00) } }, { { SIMDE_FLOAT64_C( 20.79), SIMDE_FLOAT64_C( 71.26), SIMDE_FLOAT64_C( 78.76), SIMDE_FLOAT64_C( 61.13) }, { SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 5.00) } }, { { SIMDE_FLOAT64_C( 36.20), SIMDE_FLOAT64_C( 5.13), SIMDE_FLOAT64_C( 45.05), SIMDE_FLOAT64_C( 35.23) }, { SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 5.00) } }, { { SIMDE_FLOAT64_C( 73.81), SIMDE_FLOAT64_C( 52.97), SIMDE_FLOAT64_C( 18.59), SIMDE_FLOAT64_C( 15.62) }, { SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( 3.00) } }, { { SIMDE_FLOAT64_C( 69.75), SIMDE_FLOAT64_C( 24.82), SIMDE_FLOAT64_C( 30.54), SIMDE_FLOAT64_C( 67.55) }, { SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( 6.00) } }, { { SIMDE_FLOAT64_C( 11.30), SIMDE_FLOAT64_C( 38.09), SIMDE_FLOAT64_C( 44.42), SIMDE_FLOAT64_C( 23.43) }, { SIMDE_FLOAT64_C( 3.00), SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 4.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d r = simde_mm256_logb_pd(a); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_logb_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 21.10), SIMDE_FLOAT32_C( 11.56), SIMDE_FLOAT32_C( 9.28), SIMDE_FLOAT32_C( 74.19), SIMDE_FLOAT32_C( 63.11), SIMDE_FLOAT32_C( 46.70), SIMDE_FLOAT32_C( 5.76), SIMDE_FLOAT32_C( 81.08), SIMDE_FLOAT32_C( 64.90), SIMDE_FLOAT32_C( 46.85), SIMDE_FLOAT32_C( 89.59), SIMDE_FLOAT32_C( 87.79), SIMDE_FLOAT32_C( 91.37), SIMDE_FLOAT32_C( 41.43), SIMDE_FLOAT32_C( 25.79), SIMDE_FLOAT32_C( 88.74) }, { SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 6.00) } }, { { SIMDE_FLOAT32_C( 11.74), SIMDE_FLOAT32_C( 71.01), SIMDE_FLOAT32_C( 59.27), SIMDE_FLOAT32_C( 4.58), SIMDE_FLOAT32_C( 8.70), SIMDE_FLOAT32_C( 79.13), SIMDE_FLOAT32_C( 97.09), SIMDE_FLOAT32_C( 48.86), SIMDE_FLOAT32_C( 12.81), SIMDE_FLOAT32_C( 63.88), SIMDE_FLOAT32_C( 81.17), SIMDE_FLOAT32_C( 72.37), SIMDE_FLOAT32_C( 6.60), SIMDE_FLOAT32_C( 41.15), SIMDE_FLOAT32_C( 9.63), SIMDE_FLOAT32_C( 27.69) }, { SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 4.00) } }, { { SIMDE_FLOAT32_C( 52.70), SIMDE_FLOAT32_C( 18.90), SIMDE_FLOAT32_C( 1.88), SIMDE_FLOAT32_C( 15.81), SIMDE_FLOAT32_C( 65.61), SIMDE_FLOAT32_C( 7.64), SIMDE_FLOAT32_C( 96.89), SIMDE_FLOAT32_C( 30.50), SIMDE_FLOAT32_C( 54.49), SIMDE_FLOAT32_C( 86.48), SIMDE_FLOAT32_C( 18.30), SIMDE_FLOAT32_C( 45.86), SIMDE_FLOAT32_C( 27.91), SIMDE_FLOAT32_C( 44.09), SIMDE_FLOAT32_C( 34.59), SIMDE_FLOAT32_C( 39.65) }, { SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 5.00) } }, { { SIMDE_FLOAT32_C( 15.10), SIMDE_FLOAT32_C( 93.86), SIMDE_FLOAT32_C( 44.23), SIMDE_FLOAT32_C( 23.80), SIMDE_FLOAT32_C( 72.99), SIMDE_FLOAT32_C( 41.32), SIMDE_FLOAT32_C( 72.65), SIMDE_FLOAT32_C( 85.79), SIMDE_FLOAT32_C( 5.20), SIMDE_FLOAT32_C( 53.82), SIMDE_FLOAT32_C( 58.16), SIMDE_FLOAT32_C( 11.80), SIMDE_FLOAT32_C( 94.97), SIMDE_FLOAT32_C( 67.79), SIMDE_FLOAT32_C( 39.49), SIMDE_FLOAT32_C( 47.67) }, { SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 5.00) } }, { { SIMDE_FLOAT32_C( 86.69), SIMDE_FLOAT32_C( 41.37), SIMDE_FLOAT32_C( 63.48), SIMDE_FLOAT32_C( 52.30), SIMDE_FLOAT32_C( 49.01), SIMDE_FLOAT32_C( 60.37), SIMDE_FLOAT32_C( 82.80), SIMDE_FLOAT32_C( 3.50), SIMDE_FLOAT32_C( 46.85), SIMDE_FLOAT32_C( 1.10), SIMDE_FLOAT32_C( 49.36), SIMDE_FLOAT32_C( 74.76), SIMDE_FLOAT32_C( 45.19), SIMDE_FLOAT32_C( 83.95), SIMDE_FLOAT32_C( 14.42), SIMDE_FLOAT32_C( 60.29) }, { SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 5.00) } }, { { SIMDE_FLOAT32_C( 77.81), SIMDE_FLOAT32_C( 58.65), SIMDE_FLOAT32_C( 84.09), SIMDE_FLOAT32_C( 50.80), SIMDE_FLOAT32_C( 99.97), SIMDE_FLOAT32_C( 56.74), SIMDE_FLOAT32_C( 36.60), SIMDE_FLOAT32_C( 5.17), SIMDE_FLOAT32_C( 10.56), SIMDE_FLOAT32_C( 94.76), SIMDE_FLOAT32_C( 16.97), SIMDE_FLOAT32_C( 5.53), SIMDE_FLOAT32_C( 62.55), SIMDE_FLOAT32_C( 56.46), SIMDE_FLOAT32_C( 53.21), SIMDE_FLOAT32_C( 49.24) }, { SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 5.00) } }, { { SIMDE_FLOAT32_C( 97.83), SIMDE_FLOAT32_C( 16.69), SIMDE_FLOAT32_C( 1.54), SIMDE_FLOAT32_C( 46.83), SIMDE_FLOAT32_C( 77.05), SIMDE_FLOAT32_C( 84.34), SIMDE_FLOAT32_C( 50.33), SIMDE_FLOAT32_C( 23.90), SIMDE_FLOAT32_C( 85.44), SIMDE_FLOAT32_C( 99.69), SIMDE_FLOAT32_C( 98.67), SIMDE_FLOAT32_C( 30.63), SIMDE_FLOAT32_C( 83.65), SIMDE_FLOAT32_C( 13.08), SIMDE_FLOAT32_C( 90.93), SIMDE_FLOAT32_C( 61.46) }, { SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 5.00) } }, { { SIMDE_FLOAT32_C( 71.73), SIMDE_FLOAT32_C( 75.01), SIMDE_FLOAT32_C( 12.26), SIMDE_FLOAT32_C( 71.69), SIMDE_FLOAT32_C( 31.76), SIMDE_FLOAT32_C( 48.85), SIMDE_FLOAT32_C( 76.86), SIMDE_FLOAT32_C( 42.32), SIMDE_FLOAT32_C( 43.61), SIMDE_FLOAT32_C( 93.83), SIMDE_FLOAT32_C( 47.85), SIMDE_FLOAT32_C( 6.16), SIMDE_FLOAT32_C( 50.28), SIMDE_FLOAT32_C( 1.06), SIMDE_FLOAT32_C( 55.40), SIMDE_FLOAT32_C( 48.11) }, { SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 5.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_logb_ps(a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_logb_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[16]; const simde__mmask8 k; const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 59.92), SIMDE_FLOAT32_C( 53.26), SIMDE_FLOAT32_C( 9.59), SIMDE_FLOAT32_C( 7.55), SIMDE_FLOAT32_C( 46.15), SIMDE_FLOAT32_C( 64.62), SIMDE_FLOAT32_C( 71.46), SIMDE_FLOAT32_C( 14.44), SIMDE_FLOAT32_C( 20.71), SIMDE_FLOAT32_C( 37.36), SIMDE_FLOAT32_C( 74.54), SIMDE_FLOAT32_C( 71.98), SIMDE_FLOAT32_C( 5.60), SIMDE_FLOAT32_C( 24.56), SIMDE_FLOAT32_C( 41.64), SIMDE_FLOAT32_C( 65.45) }, UINT8_C( 74), { SIMDE_FLOAT32_C( 94.52), SIMDE_FLOAT32_C( 66.49), SIMDE_FLOAT32_C( 56.15), SIMDE_FLOAT32_C( 82.67), SIMDE_FLOAT32_C( 41.42), SIMDE_FLOAT32_C( 98.41), SIMDE_FLOAT32_C( 74.30), SIMDE_FLOAT32_C( 60.40), SIMDE_FLOAT32_C( 20.04), SIMDE_FLOAT32_C( 51.01), SIMDE_FLOAT32_C( 8.26), SIMDE_FLOAT32_C( 26.15), SIMDE_FLOAT32_C( 61.43), SIMDE_FLOAT32_C( 26.22), SIMDE_FLOAT32_C( 86.06), SIMDE_FLOAT32_C( 14.69) }, { SIMDE_FLOAT32_C( 59.92), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 9.59), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 46.15), SIMDE_FLOAT32_C( 64.62), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 14.44), SIMDE_FLOAT32_C( 20.71), SIMDE_FLOAT32_C( 37.36), SIMDE_FLOAT32_C( 74.54), SIMDE_FLOAT32_C( 71.98), SIMDE_FLOAT32_C( 5.60), SIMDE_FLOAT32_C( 24.56), SIMDE_FLOAT32_C( 41.64), SIMDE_FLOAT32_C( 65.45) } }, { { SIMDE_FLOAT32_C( 35.81), SIMDE_FLOAT32_C( 93.61), SIMDE_FLOAT32_C( 60.84), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( 65.08), SIMDE_FLOAT32_C( 75.28), SIMDE_FLOAT32_C( 21.13), SIMDE_FLOAT32_C( 2.43), SIMDE_FLOAT32_C( 49.82), SIMDE_FLOAT32_C( 93.11), SIMDE_FLOAT32_C( 8.03), SIMDE_FLOAT32_C( 74.37), SIMDE_FLOAT32_C( 34.75), SIMDE_FLOAT32_C( 73.48), SIMDE_FLOAT32_C( 66.83), SIMDE_FLOAT32_C( 29.26) }, UINT8_C(187), { SIMDE_FLOAT32_C( 22.98), SIMDE_FLOAT32_C( 11.94), SIMDE_FLOAT32_C( 81.39), SIMDE_FLOAT32_C( 21.39), SIMDE_FLOAT32_C( 86.23), SIMDE_FLOAT32_C( 41.79), SIMDE_FLOAT32_C( 41.43), SIMDE_FLOAT32_C( 37.25), SIMDE_FLOAT32_C( 50.05), SIMDE_FLOAT32_C( 67.58), SIMDE_FLOAT32_C( 98.68), SIMDE_FLOAT32_C( 76.27), SIMDE_FLOAT32_C( 53.64), SIMDE_FLOAT32_C( 13.37), SIMDE_FLOAT32_C( 12.08), SIMDE_FLOAT32_C( 47.25) }, { SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 60.84), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 21.13), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 49.82), SIMDE_FLOAT32_C( 93.11), SIMDE_FLOAT32_C( 8.03), SIMDE_FLOAT32_C( 74.37), SIMDE_FLOAT32_C( 34.75), SIMDE_FLOAT32_C( 73.48), SIMDE_FLOAT32_C( 66.83), SIMDE_FLOAT32_C( 29.26) } }, { { SIMDE_FLOAT32_C( 74.20), SIMDE_FLOAT32_C( 12.51), SIMDE_FLOAT32_C( 12.33), SIMDE_FLOAT32_C( 49.48), SIMDE_FLOAT32_C( 33.65), SIMDE_FLOAT32_C( 14.76), SIMDE_FLOAT32_C( 99.30), SIMDE_FLOAT32_C( 26.76), SIMDE_FLOAT32_C( 22.79), SIMDE_FLOAT32_C( 73.68), SIMDE_FLOAT32_C( 61.50), SIMDE_FLOAT32_C( 96.27), SIMDE_FLOAT32_C( 40.51), SIMDE_FLOAT32_C( 90.77), SIMDE_FLOAT32_C( 36.25), SIMDE_FLOAT32_C( 63.49) }, UINT8_C(162), { SIMDE_FLOAT32_C( 17.64), SIMDE_FLOAT32_C( 84.88), SIMDE_FLOAT32_C( 88.94), SIMDE_FLOAT32_C( 59.43), SIMDE_FLOAT32_C( 26.31), SIMDE_FLOAT32_C( 26.18), SIMDE_FLOAT32_C( 9.49), SIMDE_FLOAT32_C( 93.89), SIMDE_FLOAT32_C( 24.86), SIMDE_FLOAT32_C( 85.76), SIMDE_FLOAT32_C( 47.53), SIMDE_FLOAT32_C( 38.23), SIMDE_FLOAT32_C( 97.84), SIMDE_FLOAT32_C( 94.78), SIMDE_FLOAT32_C( 12.43), SIMDE_FLOAT32_C( 10.35) }, { SIMDE_FLOAT32_C( 74.20), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 12.33), SIMDE_FLOAT32_C( 49.48), SIMDE_FLOAT32_C( 33.65), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 99.30), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 22.79), SIMDE_FLOAT32_C( 73.68), SIMDE_FLOAT32_C( 61.50), SIMDE_FLOAT32_C( 96.27), SIMDE_FLOAT32_C( 40.51), SIMDE_FLOAT32_C( 90.77), SIMDE_FLOAT32_C( 36.25), SIMDE_FLOAT32_C( 63.49) } }, { { SIMDE_FLOAT32_C( 7.11), SIMDE_FLOAT32_C( 61.92), SIMDE_FLOAT32_C( 44.00), SIMDE_FLOAT32_C( 21.88), SIMDE_FLOAT32_C( 61.22), SIMDE_FLOAT32_C( 70.75), SIMDE_FLOAT32_C( 44.67), SIMDE_FLOAT32_C( 34.90), SIMDE_FLOAT32_C( 32.26), SIMDE_FLOAT32_C( 40.94), SIMDE_FLOAT32_C( 75.40), SIMDE_FLOAT32_C( 23.02), SIMDE_FLOAT32_C( 77.19), SIMDE_FLOAT32_C( 38.89), SIMDE_FLOAT32_C( 25.73), SIMDE_FLOAT32_C( 94.83) }, UINT8_C(143), { SIMDE_FLOAT32_C( 14.67), SIMDE_FLOAT32_C( 54.26), SIMDE_FLOAT32_C( 50.08), SIMDE_FLOAT32_C( 40.85), SIMDE_FLOAT32_C( 63.75), SIMDE_FLOAT32_C( 43.97), SIMDE_FLOAT32_C( 65.71), SIMDE_FLOAT32_C( 49.51), SIMDE_FLOAT32_C( 91.50), SIMDE_FLOAT32_C( 3.94), SIMDE_FLOAT32_C( 47.35), SIMDE_FLOAT32_C( 86.29), SIMDE_FLOAT32_C( 16.37), SIMDE_FLOAT32_C( 57.70), SIMDE_FLOAT32_C( 93.40), SIMDE_FLOAT32_C( 78.29) }, { SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 61.22), SIMDE_FLOAT32_C( 70.75), SIMDE_FLOAT32_C( 44.67), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 32.26), SIMDE_FLOAT32_C( 40.94), SIMDE_FLOAT32_C( 75.40), SIMDE_FLOAT32_C( 23.02), SIMDE_FLOAT32_C( 77.19), SIMDE_FLOAT32_C( 38.89), SIMDE_FLOAT32_C( 25.73), SIMDE_FLOAT32_C( 94.83) } }, { { SIMDE_FLOAT32_C( 1.70), SIMDE_FLOAT32_C( 15.27), SIMDE_FLOAT32_C( 39.51), SIMDE_FLOAT32_C( 72.45), SIMDE_FLOAT32_C( 59.94), SIMDE_FLOAT32_C( 74.41), SIMDE_FLOAT32_C( 4.71), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 49.81), SIMDE_FLOAT32_C( 27.73), SIMDE_FLOAT32_C( 78.08), SIMDE_FLOAT32_C( 88.70), SIMDE_FLOAT32_C( 53.46), SIMDE_FLOAT32_C( 72.91), SIMDE_FLOAT32_C( 12.47), SIMDE_FLOAT32_C( 68.13) }, UINT8_C(127), { SIMDE_FLOAT32_C( 62.56), SIMDE_FLOAT32_C( 8.97), SIMDE_FLOAT32_C( 90.92), SIMDE_FLOAT32_C( 6.53), SIMDE_FLOAT32_C( 74.69), SIMDE_FLOAT32_C( 40.42), SIMDE_FLOAT32_C( 98.03), SIMDE_FLOAT32_C( 78.63), SIMDE_FLOAT32_C( 87.77), SIMDE_FLOAT32_C( 84.32), SIMDE_FLOAT32_C( 95.00), SIMDE_FLOAT32_C( 45.47), SIMDE_FLOAT32_C( 77.72), SIMDE_FLOAT32_C( 73.29), SIMDE_FLOAT32_C( 47.17), SIMDE_FLOAT32_C( 92.99) }, { SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 49.81), SIMDE_FLOAT32_C( 27.73), SIMDE_FLOAT32_C( 78.08), SIMDE_FLOAT32_C( 88.70), SIMDE_FLOAT32_C( 53.46), SIMDE_FLOAT32_C( 72.91), SIMDE_FLOAT32_C( 12.47), SIMDE_FLOAT32_C( 68.13) } }, { { SIMDE_FLOAT32_C( 12.80), SIMDE_FLOAT32_C( 19.62), SIMDE_FLOAT32_C( 52.94), SIMDE_FLOAT32_C( 87.20), SIMDE_FLOAT32_C( 24.32), SIMDE_FLOAT32_C( 53.82), SIMDE_FLOAT32_C( 37.01), SIMDE_FLOAT32_C( 52.06), SIMDE_FLOAT32_C( 31.90), SIMDE_FLOAT32_C( 25.71), SIMDE_FLOAT32_C( 5.52), SIMDE_FLOAT32_C( 4.81), SIMDE_FLOAT32_C( 38.19), SIMDE_FLOAT32_C( 73.64), SIMDE_FLOAT32_C( 31.98), SIMDE_FLOAT32_C( 0.74) }, UINT8_C( 81), { SIMDE_FLOAT32_C( 22.90), SIMDE_FLOAT32_C( 7.28), SIMDE_FLOAT32_C( 57.30), SIMDE_FLOAT32_C( 63.32), SIMDE_FLOAT32_C( 5.31), SIMDE_FLOAT32_C( 35.93), SIMDE_FLOAT32_C( 51.08), SIMDE_FLOAT32_C( 89.63), SIMDE_FLOAT32_C( 30.93), SIMDE_FLOAT32_C( 96.55), SIMDE_FLOAT32_C( 67.35), SIMDE_FLOAT32_C( 4.22), SIMDE_FLOAT32_C( 43.72), SIMDE_FLOAT32_C( 60.34), SIMDE_FLOAT32_C( 17.01), SIMDE_FLOAT32_C( 63.33) }, { SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 19.62), SIMDE_FLOAT32_C( 52.94), SIMDE_FLOAT32_C( 87.20), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 53.82), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 52.06), SIMDE_FLOAT32_C( 31.90), SIMDE_FLOAT32_C( 25.71), SIMDE_FLOAT32_C( 5.52), SIMDE_FLOAT32_C( 4.81), SIMDE_FLOAT32_C( 38.19), SIMDE_FLOAT32_C( 73.64), SIMDE_FLOAT32_C( 31.98), SIMDE_FLOAT32_C( 0.74) } }, { { SIMDE_FLOAT32_C( 13.27), SIMDE_FLOAT32_C( 4.22), SIMDE_FLOAT32_C( 87.66), SIMDE_FLOAT32_C( 67.10), SIMDE_FLOAT32_C( 41.23), SIMDE_FLOAT32_C( 39.71), SIMDE_FLOAT32_C( 99.00), SIMDE_FLOAT32_C( 66.95), SIMDE_FLOAT32_C( 45.23), SIMDE_FLOAT32_C( 3.81), SIMDE_FLOAT32_C( 5.13), SIMDE_FLOAT32_C( 18.87), SIMDE_FLOAT32_C( 35.79), SIMDE_FLOAT32_C( 5.88), SIMDE_FLOAT32_C( 1.48), SIMDE_FLOAT32_C( 58.69) }, UINT8_C(192), { SIMDE_FLOAT32_C( 58.78), SIMDE_FLOAT32_C( 22.00), SIMDE_FLOAT32_C( 18.46), SIMDE_FLOAT32_C( 94.71), SIMDE_FLOAT32_C( 73.09), SIMDE_FLOAT32_C( 8.09), SIMDE_FLOAT32_C( 25.64), SIMDE_FLOAT32_C( 69.64), SIMDE_FLOAT32_C( 75.44), SIMDE_FLOAT32_C( 29.86), SIMDE_FLOAT32_C( 13.36), SIMDE_FLOAT32_C( 35.77), SIMDE_FLOAT32_C( 46.87), SIMDE_FLOAT32_C( 76.69), SIMDE_FLOAT32_C( 49.05), SIMDE_FLOAT32_C( 51.09) }, { SIMDE_FLOAT32_C( 13.27), SIMDE_FLOAT32_C( 4.22), SIMDE_FLOAT32_C( 87.66), SIMDE_FLOAT32_C( 67.10), SIMDE_FLOAT32_C( 41.23), SIMDE_FLOAT32_C( 39.71), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 45.23), SIMDE_FLOAT32_C( 3.81), SIMDE_FLOAT32_C( 5.13), SIMDE_FLOAT32_C( 18.87), SIMDE_FLOAT32_C( 35.79), SIMDE_FLOAT32_C( 5.88), SIMDE_FLOAT32_C( 1.48), SIMDE_FLOAT32_C( 58.69) } }, { { SIMDE_FLOAT32_C( 64.34), SIMDE_FLOAT32_C( 16.14), SIMDE_FLOAT32_C( 92.32), SIMDE_FLOAT32_C( 4.06), SIMDE_FLOAT32_C( 15.14), SIMDE_FLOAT32_C( 59.27), SIMDE_FLOAT32_C( 49.28), SIMDE_FLOAT32_C( 18.96), SIMDE_FLOAT32_C( 64.40), SIMDE_FLOAT32_C( 68.15), SIMDE_FLOAT32_C( 54.75), SIMDE_FLOAT32_C( 70.28), SIMDE_FLOAT32_C( 69.63), SIMDE_FLOAT32_C( 13.43), SIMDE_FLOAT32_C( 83.43), SIMDE_FLOAT32_C( 28.42) }, UINT8_C( 42), { SIMDE_FLOAT32_C( 1.89), SIMDE_FLOAT32_C( 23.13), SIMDE_FLOAT32_C( 8.52), SIMDE_FLOAT32_C( 9.98), SIMDE_FLOAT32_C( 48.77), SIMDE_FLOAT32_C( 78.16), SIMDE_FLOAT32_C( 85.41), SIMDE_FLOAT32_C( 78.63), SIMDE_FLOAT32_C( 91.52), SIMDE_FLOAT32_C( 21.19), SIMDE_FLOAT32_C( 25.50), SIMDE_FLOAT32_C( 68.21), SIMDE_FLOAT32_C( 70.23), SIMDE_FLOAT32_C( 76.59), SIMDE_FLOAT32_C( 32.55), SIMDE_FLOAT32_C( 86.38) }, { SIMDE_FLOAT32_C( 64.34), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 92.32), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 15.14), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( 49.28), SIMDE_FLOAT32_C( 18.96), SIMDE_FLOAT32_C( 64.40), SIMDE_FLOAT32_C( 68.15), SIMDE_FLOAT32_C( 54.75), SIMDE_FLOAT32_C( 70.28), SIMDE_FLOAT32_C( 69.63), SIMDE_FLOAT32_C( 13.43), SIMDE_FLOAT32_C( 83.43), SIMDE_FLOAT32_C( 28.42) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_mask_logb_ps(src, test_vec[i].k, a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_logb_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 61.06), SIMDE_FLOAT64_C( 56.07), SIMDE_FLOAT64_C( 3.95), SIMDE_FLOAT64_C( 60.43), SIMDE_FLOAT64_C( 57.40), SIMDE_FLOAT64_C( 69.53), SIMDE_FLOAT64_C( 29.03), SIMDE_FLOAT64_C( 89.93) }, { SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( 6.00) } }, { { SIMDE_FLOAT64_C( 49.22), SIMDE_FLOAT64_C( 9.42), SIMDE_FLOAT64_C( 73.55), SIMDE_FLOAT64_C( 15.48), SIMDE_FLOAT64_C( 60.82), SIMDE_FLOAT64_C( 84.59), SIMDE_FLOAT64_C( 3.74), SIMDE_FLOAT64_C( 54.66) }, { SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 3.00), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 3.00), SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 5.00) } }, { { SIMDE_FLOAT64_C( 33.37), SIMDE_FLOAT64_C( 75.87), SIMDE_FLOAT64_C( 58.52), SIMDE_FLOAT64_C( 48.59), SIMDE_FLOAT64_C( 90.24), SIMDE_FLOAT64_C( 63.58), SIMDE_FLOAT64_C( 62.75), SIMDE_FLOAT64_C( 73.90) }, { SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 6.00) } }, { { SIMDE_FLOAT64_C( 18.87), SIMDE_FLOAT64_C( 24.32), SIMDE_FLOAT64_C( 24.02), SIMDE_FLOAT64_C( 25.17), SIMDE_FLOAT64_C( 77.02), SIMDE_FLOAT64_C( 14.07), SIMDE_FLOAT64_C( 2.94), SIMDE_FLOAT64_C( 38.08) }, { SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 3.00), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 5.00) } }, { { SIMDE_FLOAT64_C( 70.14), SIMDE_FLOAT64_C( 6.89), SIMDE_FLOAT64_C( 98.50), SIMDE_FLOAT64_C( 27.53), SIMDE_FLOAT64_C( 76.42), SIMDE_FLOAT64_C( 27.53), SIMDE_FLOAT64_C( 17.47), SIMDE_FLOAT64_C( 25.65) }, { SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( 4.00) } }, { { SIMDE_FLOAT64_C( 36.95), SIMDE_FLOAT64_C( 91.02), SIMDE_FLOAT64_C( 41.13), SIMDE_FLOAT64_C( 97.76), SIMDE_FLOAT64_C( 75.61), SIMDE_FLOAT64_C( 44.87), SIMDE_FLOAT64_C( 52.42), SIMDE_FLOAT64_C( 8.99) }, { SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 3.00) } }, { { SIMDE_FLOAT64_C( 20.74), SIMDE_FLOAT64_C( 10.94), SIMDE_FLOAT64_C( 57.58), SIMDE_FLOAT64_C( 10.98), SIMDE_FLOAT64_C( 74.52), SIMDE_FLOAT64_C( 20.32), SIMDE_FLOAT64_C( 84.88), SIMDE_FLOAT64_C( 93.39) }, { SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( 3.00), SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 3.00), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 6.00) } }, { { SIMDE_FLOAT64_C( 44.64), SIMDE_FLOAT64_C( 8.90), SIMDE_FLOAT64_C( 18.56), SIMDE_FLOAT64_C( 21.66), SIMDE_FLOAT64_C( 22.97), SIMDE_FLOAT64_C( 21.51), SIMDE_FLOAT64_C( 59.73), SIMDE_FLOAT64_C( 93.10) }, { SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 3.00), SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 6.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_logb_pd(a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_logb_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 44.91), SIMDE_FLOAT64_C( 88.38), SIMDE_FLOAT64_C( 45.58), SIMDE_FLOAT64_C( 12.77), SIMDE_FLOAT64_C( 31.32), SIMDE_FLOAT64_C( 50.43), SIMDE_FLOAT64_C( 60.04), SIMDE_FLOAT64_C( 3.47) }, UINT8_C(214), { SIMDE_FLOAT64_C( 86.80), SIMDE_FLOAT64_C( 42.80), SIMDE_FLOAT64_C( 69.48), SIMDE_FLOAT64_C( 71.71), SIMDE_FLOAT64_C( 94.56), SIMDE_FLOAT64_C( 31.31), SIMDE_FLOAT64_C( 74.51), SIMDE_FLOAT64_C( 72.92) }, { SIMDE_FLOAT64_C( 44.91), SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 12.77), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 50.43), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 6.00) } }, { { SIMDE_FLOAT64_C( 29.96), SIMDE_FLOAT64_C( 29.49), SIMDE_FLOAT64_C( 88.44), SIMDE_FLOAT64_C( 26.63), SIMDE_FLOAT64_C( 15.97), SIMDE_FLOAT64_C( 77.55), SIMDE_FLOAT64_C( 47.96), SIMDE_FLOAT64_C( 96.03) }, UINT8_C( 76), { SIMDE_FLOAT64_C( 85.66), SIMDE_FLOAT64_C( 58.61), SIMDE_FLOAT64_C( 61.13), SIMDE_FLOAT64_C( 28.12), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 6.05), SIMDE_FLOAT64_C( 16.50), SIMDE_FLOAT64_C( 45.67) }, { SIMDE_FLOAT64_C( 29.96), SIMDE_FLOAT64_C( 29.49), SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( 15.97), SIMDE_FLOAT64_C( 77.55), SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( 96.03) } }, { { SIMDE_FLOAT64_C( 18.81), SIMDE_FLOAT64_C( 47.82), SIMDE_FLOAT64_C( 96.10), SIMDE_FLOAT64_C( 78.86), SIMDE_FLOAT64_C( 51.29), SIMDE_FLOAT64_C( 7.80), SIMDE_FLOAT64_C( 65.66), SIMDE_FLOAT64_C( 94.09) }, UINT8_C( 98), { SIMDE_FLOAT64_C( 37.37), SIMDE_FLOAT64_C( 88.65), SIMDE_FLOAT64_C( 8.59), SIMDE_FLOAT64_C( 11.88), SIMDE_FLOAT64_C( 61.57), SIMDE_FLOAT64_C( 38.54), SIMDE_FLOAT64_C( 41.37), SIMDE_FLOAT64_C( 50.02) }, { SIMDE_FLOAT64_C( 18.81), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 96.10), SIMDE_FLOAT64_C( 78.86), SIMDE_FLOAT64_C( 51.29), SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 94.09) } }, { { SIMDE_FLOAT64_C( 65.18), SIMDE_FLOAT64_C( 57.34), SIMDE_FLOAT64_C( 27.56), SIMDE_FLOAT64_C( 13.13), SIMDE_FLOAT64_C( 53.38), SIMDE_FLOAT64_C( 10.85), SIMDE_FLOAT64_C( 98.80), SIMDE_FLOAT64_C( 11.98) }, UINT8_C(227), { SIMDE_FLOAT64_C( 26.92), SIMDE_FLOAT64_C( 12.07), SIMDE_FLOAT64_C( 78.04), SIMDE_FLOAT64_C( 43.42), SIMDE_FLOAT64_C( 57.74), SIMDE_FLOAT64_C( 96.85), SIMDE_FLOAT64_C( 91.25), SIMDE_FLOAT64_C( 53.84) }, { SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( 3.00), SIMDE_FLOAT64_C( 27.56), SIMDE_FLOAT64_C( 13.13), SIMDE_FLOAT64_C( 53.38), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 5.00) } }, { { SIMDE_FLOAT64_C( 75.71), SIMDE_FLOAT64_C( 42.54), SIMDE_FLOAT64_C( 61.63), SIMDE_FLOAT64_C( 41.37), SIMDE_FLOAT64_C( 36.63), SIMDE_FLOAT64_C( 38.91), SIMDE_FLOAT64_C( 78.74), SIMDE_FLOAT64_C( 25.28) }, UINT8_C(133), { SIMDE_FLOAT64_C( 90.62), SIMDE_FLOAT64_C( 86.86), SIMDE_FLOAT64_C( 86.04), SIMDE_FLOAT64_C( 31.99), SIMDE_FLOAT64_C( 36.87), SIMDE_FLOAT64_C( 51.22), SIMDE_FLOAT64_C( 89.34), SIMDE_FLOAT64_C( 64.43) }, { SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 42.54), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 41.37), SIMDE_FLOAT64_C( 36.63), SIMDE_FLOAT64_C( 38.91), SIMDE_FLOAT64_C( 78.74), SIMDE_FLOAT64_C( 6.00) } }, { { SIMDE_FLOAT64_C( 64.36), SIMDE_FLOAT64_C( 42.71), SIMDE_FLOAT64_C( 75.29), SIMDE_FLOAT64_C( 63.15), SIMDE_FLOAT64_C( 54.70), SIMDE_FLOAT64_C( 47.28), SIMDE_FLOAT64_C( 90.08), SIMDE_FLOAT64_C( 66.76) }, UINT8_C(185), { SIMDE_FLOAT64_C( 33.50), SIMDE_FLOAT64_C( 24.50), SIMDE_FLOAT64_C( 22.16), SIMDE_FLOAT64_C( 24.75), SIMDE_FLOAT64_C( 78.34), SIMDE_FLOAT64_C( 97.87), SIMDE_FLOAT64_C( 67.29), SIMDE_FLOAT64_C( 39.97) }, { SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 42.71), SIMDE_FLOAT64_C( 75.29), SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 90.08), SIMDE_FLOAT64_C( 5.00) } }, { { SIMDE_FLOAT64_C( 39.24), SIMDE_FLOAT64_C( 3.92), SIMDE_FLOAT64_C( 78.88), SIMDE_FLOAT64_C( 17.98), SIMDE_FLOAT64_C( 29.20), SIMDE_FLOAT64_C( 26.38), SIMDE_FLOAT64_C( 8.60), SIMDE_FLOAT64_C( 16.06) }, UINT8_C(216), { SIMDE_FLOAT64_C( 40.59), SIMDE_FLOAT64_C( 52.93), SIMDE_FLOAT64_C( 63.64), SIMDE_FLOAT64_C( 29.93), SIMDE_FLOAT64_C( 17.36), SIMDE_FLOAT64_C( 28.00), SIMDE_FLOAT64_C( 72.65), SIMDE_FLOAT64_C( 92.65) }, { SIMDE_FLOAT64_C( 39.24), SIMDE_FLOAT64_C( 3.92), SIMDE_FLOAT64_C( 78.88), SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( 26.38), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 6.00) } }, { { SIMDE_FLOAT64_C( 91.15), SIMDE_FLOAT64_C( 27.34), SIMDE_FLOAT64_C( 39.93), SIMDE_FLOAT64_C( 81.23), SIMDE_FLOAT64_C( 94.10), SIMDE_FLOAT64_C( 65.24), SIMDE_FLOAT64_C( 14.73), SIMDE_FLOAT64_C( 18.60) }, UINT8_C(111), { SIMDE_FLOAT64_C( 39.48), SIMDE_FLOAT64_C( 96.94), SIMDE_FLOAT64_C( 85.27), SIMDE_FLOAT64_C( 6.77), SIMDE_FLOAT64_C( 36.91), SIMDE_FLOAT64_C( 24.51), SIMDE_FLOAT64_C( 10.68), SIMDE_FLOAT64_C( 15.79) }, { SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 6.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( 94.10), SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( 3.00), SIMDE_FLOAT64_C( 18.60) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_mask_logb_pd(src, test_vec[i].k, a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_nearbyint_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( -914.49), SIMDE_FLOAT32_C( 460.45), SIMDE_FLOAT32_C( -816.31), SIMDE_FLOAT32_C( 969.94), SIMDE_FLOAT32_C( -904.29), SIMDE_FLOAT32_C( -267.48), SIMDE_FLOAT32_C( -362.84), SIMDE_FLOAT32_C( -10.93), SIMDE_FLOAT32_C( -124.62), SIMDE_FLOAT32_C( 667.93), SIMDE_FLOAT32_C( 512.15), SIMDE_FLOAT32_C( -37.80), SIMDE_FLOAT32_C( 894.40), SIMDE_FLOAT32_C( 135.20), SIMDE_FLOAT32_C( -763.47), SIMDE_FLOAT32_C( -593.20) }, { SIMDE_FLOAT32_C( -914.00), SIMDE_FLOAT32_C( 460.00), SIMDE_FLOAT32_C( -816.00), SIMDE_FLOAT32_C( 970.00), SIMDE_FLOAT32_C( -904.00), SIMDE_FLOAT32_C( -267.00), SIMDE_FLOAT32_C( -363.00), SIMDE_FLOAT32_C( -11.00), SIMDE_FLOAT32_C( -125.00), SIMDE_FLOAT32_C( 668.00), SIMDE_FLOAT32_C( 512.00), SIMDE_FLOAT32_C( -38.00), SIMDE_FLOAT32_C( 894.00), SIMDE_FLOAT32_C( 135.00), SIMDE_FLOAT32_C( -763.00), SIMDE_FLOAT32_C( -593.00) } }, { { SIMDE_FLOAT32_C( -849.14), SIMDE_FLOAT32_C( 852.22), SIMDE_FLOAT32_C( -400.69), SIMDE_FLOAT32_C( 171.29), SIMDE_FLOAT32_C( 508.23), SIMDE_FLOAT32_C( -765.53), SIMDE_FLOAT32_C( -382.38), SIMDE_FLOAT32_C( -765.99), SIMDE_FLOAT32_C( -92.44), SIMDE_FLOAT32_C( 141.65), SIMDE_FLOAT32_C( 748.46), SIMDE_FLOAT32_C( 28.81), SIMDE_FLOAT32_C( -715.24), SIMDE_FLOAT32_C( -786.64), SIMDE_FLOAT32_C( -54.59), SIMDE_FLOAT32_C( -629.74) }, { SIMDE_FLOAT32_C( -849.00), SIMDE_FLOAT32_C( 852.00), SIMDE_FLOAT32_C( -401.00), SIMDE_FLOAT32_C( 171.00), SIMDE_FLOAT32_C( 508.00), SIMDE_FLOAT32_C( -766.00), SIMDE_FLOAT32_C( -382.00), SIMDE_FLOAT32_C( -766.00), SIMDE_FLOAT32_C( -92.00), SIMDE_FLOAT32_C( 142.00), SIMDE_FLOAT32_C( 748.00), SIMDE_FLOAT32_C( 29.00), SIMDE_FLOAT32_C( -715.00), SIMDE_FLOAT32_C( -787.00), SIMDE_FLOAT32_C( -55.00), SIMDE_FLOAT32_C( -630.00) } }, { { SIMDE_FLOAT32_C( 673.81), SIMDE_FLOAT32_C( 129.11), SIMDE_FLOAT32_C( -659.80), SIMDE_FLOAT32_C( 769.52), SIMDE_FLOAT32_C( 861.62), SIMDE_FLOAT32_C( -22.64), SIMDE_FLOAT32_C( -241.41), SIMDE_FLOAT32_C( -263.00), SIMDE_FLOAT32_C( -354.71), SIMDE_FLOAT32_C( -729.27), SIMDE_FLOAT32_C( 699.19), SIMDE_FLOAT32_C( -460.31), SIMDE_FLOAT32_C( 405.93), SIMDE_FLOAT32_C( 935.73), SIMDE_FLOAT32_C( -53.51), SIMDE_FLOAT32_C( 556.79) }, { SIMDE_FLOAT32_C( 674.00), SIMDE_FLOAT32_C( 129.00), SIMDE_FLOAT32_C( -660.00), SIMDE_FLOAT32_C( 770.00), SIMDE_FLOAT32_C( 862.00), SIMDE_FLOAT32_C( -23.00), SIMDE_FLOAT32_C( -241.00), SIMDE_FLOAT32_C( -263.00), SIMDE_FLOAT32_C( -355.00), SIMDE_FLOAT32_C( -729.00), SIMDE_FLOAT32_C( 699.00), SIMDE_FLOAT32_C( -460.00), SIMDE_FLOAT32_C( 406.00), SIMDE_FLOAT32_C( 936.00), SIMDE_FLOAT32_C( -54.00), SIMDE_FLOAT32_C( 557.00) } }, { { SIMDE_FLOAT32_C( 787.95), SIMDE_FLOAT32_C( 545.80), SIMDE_FLOAT32_C( -271.92), SIMDE_FLOAT32_C( 296.18), SIMDE_FLOAT32_C( 780.27), SIMDE_FLOAT32_C( 345.70), SIMDE_FLOAT32_C( 530.19), SIMDE_FLOAT32_C( -312.17), SIMDE_FLOAT32_C( -512.65), SIMDE_FLOAT32_C( 278.65), SIMDE_FLOAT32_C( 716.64), SIMDE_FLOAT32_C( -227.89), SIMDE_FLOAT32_C( 492.01), SIMDE_FLOAT32_C( -337.94), SIMDE_FLOAT32_C( 142.37), SIMDE_FLOAT32_C( 165.82) }, { SIMDE_FLOAT32_C( 788.00), SIMDE_FLOAT32_C( 546.00), SIMDE_FLOAT32_C( -272.00), SIMDE_FLOAT32_C( 296.00), SIMDE_FLOAT32_C( 780.00), SIMDE_FLOAT32_C( 346.00), SIMDE_FLOAT32_C( 530.00), SIMDE_FLOAT32_C( -312.00), SIMDE_FLOAT32_C( -513.00), SIMDE_FLOAT32_C( 279.00), SIMDE_FLOAT32_C( 717.00), SIMDE_FLOAT32_C( -228.00), SIMDE_FLOAT32_C( 492.00), SIMDE_FLOAT32_C( -338.00), SIMDE_FLOAT32_C( 142.00), SIMDE_FLOAT32_C( 166.00) } }, { { SIMDE_FLOAT32_C( 791.16), SIMDE_FLOAT32_C( 482.57), SIMDE_FLOAT32_C( -64.66), SIMDE_FLOAT32_C( 652.78), SIMDE_FLOAT32_C( -540.07), SIMDE_FLOAT32_C( 693.92), SIMDE_FLOAT32_C( -610.22), SIMDE_FLOAT32_C( 105.21), SIMDE_FLOAT32_C( 964.66), SIMDE_FLOAT32_C( -911.03), SIMDE_FLOAT32_C( 644.90), SIMDE_FLOAT32_C( 370.59), SIMDE_FLOAT32_C( -975.30), SIMDE_FLOAT32_C( -408.60), SIMDE_FLOAT32_C( -72.62), SIMDE_FLOAT32_C( 812.65) }, { SIMDE_FLOAT32_C( 791.00), SIMDE_FLOAT32_C( 483.00), SIMDE_FLOAT32_C( -65.00), SIMDE_FLOAT32_C( 653.00), SIMDE_FLOAT32_C( -540.00), SIMDE_FLOAT32_C( 694.00), SIMDE_FLOAT32_C( -610.00), SIMDE_FLOAT32_C( 105.00), SIMDE_FLOAT32_C( 965.00), SIMDE_FLOAT32_C( -911.00), SIMDE_FLOAT32_C( 645.00), SIMDE_FLOAT32_C( 371.00), SIMDE_FLOAT32_C( -975.00), SIMDE_FLOAT32_C( -409.00), SIMDE_FLOAT32_C( -73.00), SIMDE_FLOAT32_C( 813.00) } }, { { SIMDE_FLOAT32_C( -862.80), SIMDE_FLOAT32_C( 655.47), SIMDE_FLOAT32_C( 108.83), SIMDE_FLOAT32_C( 917.47), SIMDE_FLOAT32_C( 1.16), SIMDE_FLOAT32_C( -360.98), SIMDE_FLOAT32_C( -394.70), SIMDE_FLOAT32_C( 488.51), SIMDE_FLOAT32_C( 917.67), SIMDE_FLOAT32_C( -678.06), SIMDE_FLOAT32_C( -739.38), SIMDE_FLOAT32_C( 409.68), SIMDE_FLOAT32_C( -16.00), SIMDE_FLOAT32_C( 402.99), SIMDE_FLOAT32_C( -424.50), SIMDE_FLOAT32_C( -224.84) }, { SIMDE_FLOAT32_C( -863.00), SIMDE_FLOAT32_C( 655.00), SIMDE_FLOAT32_C( 109.00), SIMDE_FLOAT32_C( 917.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -361.00), SIMDE_FLOAT32_C( -395.00), SIMDE_FLOAT32_C( 489.00), SIMDE_FLOAT32_C( 918.00), SIMDE_FLOAT32_C( -678.00), SIMDE_FLOAT32_C( -739.00), SIMDE_FLOAT32_C( 410.00), SIMDE_FLOAT32_C( -16.00), SIMDE_FLOAT32_C( 403.00), SIMDE_FLOAT32_C( -424.00), SIMDE_FLOAT32_C( -225.00) } }, { { SIMDE_FLOAT32_C( -114.44), SIMDE_FLOAT32_C( 510.83), SIMDE_FLOAT32_C( -572.05), SIMDE_FLOAT32_C( 345.49), SIMDE_FLOAT32_C( 204.76), SIMDE_FLOAT32_C( -182.27), SIMDE_FLOAT32_C( -549.30), SIMDE_FLOAT32_C( 169.42), SIMDE_FLOAT32_C( -93.30), SIMDE_FLOAT32_C( -904.39), SIMDE_FLOAT32_C( -459.99), SIMDE_FLOAT32_C( -68.59), SIMDE_FLOAT32_C( -313.00), SIMDE_FLOAT32_C( 467.39), SIMDE_FLOAT32_C( -255.94), SIMDE_FLOAT32_C( -175.80) }, { SIMDE_FLOAT32_C( -114.00), SIMDE_FLOAT32_C( 511.00), SIMDE_FLOAT32_C( -572.00), SIMDE_FLOAT32_C( 345.00), SIMDE_FLOAT32_C( 205.00), SIMDE_FLOAT32_C( -182.00), SIMDE_FLOAT32_C( -549.00), SIMDE_FLOAT32_C( 169.00), SIMDE_FLOAT32_C( -93.00), SIMDE_FLOAT32_C( -904.00), SIMDE_FLOAT32_C( -460.00), SIMDE_FLOAT32_C( -69.00), SIMDE_FLOAT32_C( -313.00), SIMDE_FLOAT32_C( 467.00), SIMDE_FLOAT32_C( -256.00), SIMDE_FLOAT32_C( -176.00) } }, { { SIMDE_FLOAT32_C( 122.86), SIMDE_FLOAT32_C( 852.89), SIMDE_FLOAT32_C( -258.33), SIMDE_FLOAT32_C( -875.98), SIMDE_FLOAT32_C( -508.09), SIMDE_FLOAT32_C( 346.97), SIMDE_FLOAT32_C( 612.54), SIMDE_FLOAT32_C( -590.42), SIMDE_FLOAT32_C( 668.92), SIMDE_FLOAT32_C( 873.16), SIMDE_FLOAT32_C( 819.25), SIMDE_FLOAT32_C( -347.08), SIMDE_FLOAT32_C( 276.15), SIMDE_FLOAT32_C( -605.25), SIMDE_FLOAT32_C( 428.08), SIMDE_FLOAT32_C( -838.29) }, { SIMDE_FLOAT32_C( 123.00), SIMDE_FLOAT32_C( 853.00), SIMDE_FLOAT32_C( -258.00), SIMDE_FLOAT32_C( -876.00), SIMDE_FLOAT32_C( -508.00), SIMDE_FLOAT32_C( 347.00), SIMDE_FLOAT32_C( 613.00), SIMDE_FLOAT32_C( -590.00), SIMDE_FLOAT32_C( 669.00), SIMDE_FLOAT32_C( 873.00), SIMDE_FLOAT32_C( 819.00), SIMDE_FLOAT32_C( -347.00), SIMDE_FLOAT32_C( 276.00), SIMDE_FLOAT32_C( -605.00), SIMDE_FLOAT32_C( 428.00), SIMDE_FLOAT32_C( -838.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_nearbyint_ps(a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_nearbyint_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[16]; const simde__mmask8 k; const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 307.14), SIMDE_FLOAT32_C( 482.19), SIMDE_FLOAT32_C( 518.39), SIMDE_FLOAT32_C( 499.59), SIMDE_FLOAT32_C( -143.12), SIMDE_FLOAT32_C( 912.60), SIMDE_FLOAT32_C( 782.99), SIMDE_FLOAT32_C( -267.99), SIMDE_FLOAT32_C( -349.63), SIMDE_FLOAT32_C( -394.28), SIMDE_FLOAT32_C( -735.06), SIMDE_FLOAT32_C( 319.94), SIMDE_FLOAT32_C( -352.44), SIMDE_FLOAT32_C( 639.50), SIMDE_FLOAT32_C( -238.81), SIMDE_FLOAT32_C( 516.17) }, UINT8_C(107), { SIMDE_FLOAT32_C( -887.79), SIMDE_FLOAT32_C( 742.81), SIMDE_FLOAT32_C( -913.42), SIMDE_FLOAT32_C( -611.43), SIMDE_FLOAT32_C( 204.70), SIMDE_FLOAT32_C( 940.63), SIMDE_FLOAT32_C( -825.04), SIMDE_FLOAT32_C( 37.94), SIMDE_FLOAT32_C( 967.28), SIMDE_FLOAT32_C( -950.31), SIMDE_FLOAT32_C( -916.12), SIMDE_FLOAT32_C( 338.61), SIMDE_FLOAT32_C( -151.13), SIMDE_FLOAT32_C( -229.02), SIMDE_FLOAT32_C( -354.25), SIMDE_FLOAT32_C( -668.94) }, { SIMDE_FLOAT32_C( -888.00), SIMDE_FLOAT32_C( 743.00), SIMDE_FLOAT32_C( 518.39), SIMDE_FLOAT32_C( -611.00), SIMDE_FLOAT32_C( -143.12), SIMDE_FLOAT32_C( 941.00), SIMDE_FLOAT32_C( -825.00), SIMDE_FLOAT32_C( -267.99), SIMDE_FLOAT32_C( -349.63), SIMDE_FLOAT32_C( -394.28), SIMDE_FLOAT32_C( -735.06), SIMDE_FLOAT32_C( 319.94), SIMDE_FLOAT32_C( -352.44), SIMDE_FLOAT32_C( 639.50), SIMDE_FLOAT32_C( -238.81), SIMDE_FLOAT32_C( 516.17) } }, { { SIMDE_FLOAT32_C( -710.63), SIMDE_FLOAT32_C( -854.67), SIMDE_FLOAT32_C( 187.94), SIMDE_FLOAT32_C( -798.03), SIMDE_FLOAT32_C( 928.32), SIMDE_FLOAT32_C( 919.94), SIMDE_FLOAT32_C( -147.65), SIMDE_FLOAT32_C( -465.96), SIMDE_FLOAT32_C( -815.12), SIMDE_FLOAT32_C( -827.71), SIMDE_FLOAT32_C( 181.60), SIMDE_FLOAT32_C( 824.38), SIMDE_FLOAT32_C( -66.52), SIMDE_FLOAT32_C( -302.23), SIMDE_FLOAT32_C( -118.38), SIMDE_FLOAT32_C( 45.69) }, UINT8_C(170), { SIMDE_FLOAT32_C( -31.81), SIMDE_FLOAT32_C( 434.25), SIMDE_FLOAT32_C( 645.28), SIMDE_FLOAT32_C( -91.18), SIMDE_FLOAT32_C( 609.22), SIMDE_FLOAT32_C( -316.78), SIMDE_FLOAT32_C( -123.90), SIMDE_FLOAT32_C( 658.90), SIMDE_FLOAT32_C( -232.89), SIMDE_FLOAT32_C( -785.30), SIMDE_FLOAT32_C( -492.22), SIMDE_FLOAT32_C( 538.09), SIMDE_FLOAT32_C( -139.55), SIMDE_FLOAT32_C( -161.16), SIMDE_FLOAT32_C( 827.46), SIMDE_FLOAT32_C( 5.78) }, { SIMDE_FLOAT32_C( -710.63), SIMDE_FLOAT32_C( 434.00), SIMDE_FLOAT32_C( 187.94), SIMDE_FLOAT32_C( -91.00), SIMDE_FLOAT32_C( 928.32), SIMDE_FLOAT32_C( -317.00), SIMDE_FLOAT32_C( -147.65), SIMDE_FLOAT32_C( 659.00), SIMDE_FLOAT32_C( -815.12), SIMDE_FLOAT32_C( -827.71), SIMDE_FLOAT32_C( 181.60), SIMDE_FLOAT32_C( 824.38), SIMDE_FLOAT32_C( -66.52), SIMDE_FLOAT32_C( -302.23), SIMDE_FLOAT32_C( -118.38), SIMDE_FLOAT32_C( 45.69) } }, { { SIMDE_FLOAT32_C( -973.23), SIMDE_FLOAT32_C( -970.57), SIMDE_FLOAT32_C( -65.89), SIMDE_FLOAT32_C( 946.72), SIMDE_FLOAT32_C( -118.22), SIMDE_FLOAT32_C( 468.15), SIMDE_FLOAT32_C( -868.40), SIMDE_FLOAT32_C( 54.07), SIMDE_FLOAT32_C( -350.25), SIMDE_FLOAT32_C( 955.97), SIMDE_FLOAT32_C( 987.55), SIMDE_FLOAT32_C( 347.52), SIMDE_FLOAT32_C( -162.41), SIMDE_FLOAT32_C( 33.24), SIMDE_FLOAT32_C( 788.11), SIMDE_FLOAT32_C( 805.78) }, UINT8_C(147), { SIMDE_FLOAT32_C( 433.39), SIMDE_FLOAT32_C( -285.40), SIMDE_FLOAT32_C( -923.29), SIMDE_FLOAT32_C( -883.39), SIMDE_FLOAT32_C( 590.69), SIMDE_FLOAT32_C( 735.61), SIMDE_FLOAT32_C( -116.28), SIMDE_FLOAT32_C( 805.40), SIMDE_FLOAT32_C( -756.61), SIMDE_FLOAT32_C( -578.19), SIMDE_FLOAT32_C( -334.15), SIMDE_FLOAT32_C( 82.23), SIMDE_FLOAT32_C( -750.73), SIMDE_FLOAT32_C( 671.63), SIMDE_FLOAT32_C( 109.00), SIMDE_FLOAT32_C( -721.30) }, { SIMDE_FLOAT32_C( 433.00), SIMDE_FLOAT32_C( -285.00), SIMDE_FLOAT32_C( -65.89), SIMDE_FLOAT32_C( 946.72), SIMDE_FLOAT32_C( 591.00), SIMDE_FLOAT32_C( 468.15), SIMDE_FLOAT32_C( -868.40), SIMDE_FLOAT32_C( 805.00), SIMDE_FLOAT32_C( -350.25), SIMDE_FLOAT32_C( 955.97), SIMDE_FLOAT32_C( 987.55), SIMDE_FLOAT32_C( 347.52), SIMDE_FLOAT32_C( -162.41), SIMDE_FLOAT32_C( 33.24), SIMDE_FLOAT32_C( 788.11), SIMDE_FLOAT32_C( 805.78) } }, { { SIMDE_FLOAT32_C( -394.26), SIMDE_FLOAT32_C( 55.71), SIMDE_FLOAT32_C( 160.48), SIMDE_FLOAT32_C( -926.11), SIMDE_FLOAT32_C( 187.31), SIMDE_FLOAT32_C( -785.45), SIMDE_FLOAT32_C( -276.36), SIMDE_FLOAT32_C( 143.28), SIMDE_FLOAT32_C( -797.89), SIMDE_FLOAT32_C( -928.84), SIMDE_FLOAT32_C( 980.87), SIMDE_FLOAT32_C( 235.35), SIMDE_FLOAT32_C( 859.27), SIMDE_FLOAT32_C( 786.65), SIMDE_FLOAT32_C( 702.84), SIMDE_FLOAT32_C( 292.65) }, UINT8_C( 5), { SIMDE_FLOAT32_C( 779.55), SIMDE_FLOAT32_C( 409.26), SIMDE_FLOAT32_C( -908.05), SIMDE_FLOAT32_C( 515.17), SIMDE_FLOAT32_C( -707.02), SIMDE_FLOAT32_C( 897.34), SIMDE_FLOAT32_C( 758.56), SIMDE_FLOAT32_C( -285.21), SIMDE_FLOAT32_C( -436.81), SIMDE_FLOAT32_C( -159.22), SIMDE_FLOAT32_C( -35.94), SIMDE_FLOAT32_C( -765.18), SIMDE_FLOAT32_C( 949.78), SIMDE_FLOAT32_C( 242.76), SIMDE_FLOAT32_C( -159.44), SIMDE_FLOAT32_C( 5.49) }, { SIMDE_FLOAT32_C( 780.00), SIMDE_FLOAT32_C( 55.71), SIMDE_FLOAT32_C( -908.00), SIMDE_FLOAT32_C( -926.11), SIMDE_FLOAT32_C( 187.31), SIMDE_FLOAT32_C( -785.45), SIMDE_FLOAT32_C( -276.36), SIMDE_FLOAT32_C( 143.28), SIMDE_FLOAT32_C( -797.89), SIMDE_FLOAT32_C( -928.84), SIMDE_FLOAT32_C( 980.87), SIMDE_FLOAT32_C( 235.35), SIMDE_FLOAT32_C( 859.27), SIMDE_FLOAT32_C( 786.65), SIMDE_FLOAT32_C( 702.84), SIMDE_FLOAT32_C( 292.65) } }, { { SIMDE_FLOAT32_C( -596.76), SIMDE_FLOAT32_C( -85.56), SIMDE_FLOAT32_C( -807.20), SIMDE_FLOAT32_C( -382.21), SIMDE_FLOAT32_C( 638.08), SIMDE_FLOAT32_C( 336.09), SIMDE_FLOAT32_C( -180.10), SIMDE_FLOAT32_C( 709.25), SIMDE_FLOAT32_C( 316.96), SIMDE_FLOAT32_C( -944.76), SIMDE_FLOAT32_C( 568.51), SIMDE_FLOAT32_C( 103.62), SIMDE_FLOAT32_C( 758.08), SIMDE_FLOAT32_C( -138.83), SIMDE_FLOAT32_C( 604.87), SIMDE_FLOAT32_C( 537.64) }, UINT8_C( 9), { SIMDE_FLOAT32_C( 696.82), SIMDE_FLOAT32_C( 52.80), SIMDE_FLOAT32_C( -436.59), SIMDE_FLOAT32_C( 594.16), SIMDE_FLOAT32_C( -188.64), SIMDE_FLOAT32_C( 278.20), SIMDE_FLOAT32_C( -842.65), SIMDE_FLOAT32_C( 652.14), SIMDE_FLOAT32_C( -757.74), SIMDE_FLOAT32_C( -607.83), SIMDE_FLOAT32_C( 601.92), SIMDE_FLOAT32_C( 485.02), SIMDE_FLOAT32_C( 232.73), SIMDE_FLOAT32_C( -392.58), SIMDE_FLOAT32_C( 888.25), SIMDE_FLOAT32_C( -852.82) }, { SIMDE_FLOAT32_C( 697.00), SIMDE_FLOAT32_C( -85.56), SIMDE_FLOAT32_C( -807.20), SIMDE_FLOAT32_C( 594.00), SIMDE_FLOAT32_C( 638.08), SIMDE_FLOAT32_C( 336.09), SIMDE_FLOAT32_C( -180.10), SIMDE_FLOAT32_C( 709.25), SIMDE_FLOAT32_C( 316.96), SIMDE_FLOAT32_C( -944.76), SIMDE_FLOAT32_C( 568.51), SIMDE_FLOAT32_C( 103.62), SIMDE_FLOAT32_C( 758.08), SIMDE_FLOAT32_C( -138.83), SIMDE_FLOAT32_C( 604.87), SIMDE_FLOAT32_C( 537.64) } }, { { SIMDE_FLOAT32_C( -199.78), SIMDE_FLOAT32_C( -493.96), SIMDE_FLOAT32_C( 785.26), SIMDE_FLOAT32_C( -863.69), SIMDE_FLOAT32_C( 325.94), SIMDE_FLOAT32_C( 494.50), SIMDE_FLOAT32_C( 453.27), SIMDE_FLOAT32_C( 381.18), SIMDE_FLOAT32_C( 63.02), SIMDE_FLOAT32_C( -443.12), SIMDE_FLOAT32_C( 139.26), SIMDE_FLOAT32_C( 924.18), SIMDE_FLOAT32_C( -838.25), SIMDE_FLOAT32_C( -323.10), SIMDE_FLOAT32_C( -805.38), SIMDE_FLOAT32_C( 858.57) }, UINT8_C(245), { SIMDE_FLOAT32_C( -241.97), SIMDE_FLOAT32_C( 452.73), SIMDE_FLOAT32_C( -458.94), SIMDE_FLOAT32_C( -963.77), SIMDE_FLOAT32_C( 610.08), SIMDE_FLOAT32_C( -806.80), SIMDE_FLOAT32_C( -721.51), SIMDE_FLOAT32_C( -997.75), SIMDE_FLOAT32_C( 795.12), SIMDE_FLOAT32_C( 763.51), SIMDE_FLOAT32_C( 234.98), SIMDE_FLOAT32_C( -597.47), SIMDE_FLOAT32_C( 651.76), SIMDE_FLOAT32_C( 382.16), SIMDE_FLOAT32_C( 202.75), SIMDE_FLOAT32_C( -842.20) }, { SIMDE_FLOAT32_C( -242.00), SIMDE_FLOAT32_C( -493.96), SIMDE_FLOAT32_C( -459.00), SIMDE_FLOAT32_C( -863.69), SIMDE_FLOAT32_C( 610.00), SIMDE_FLOAT32_C( -807.00), SIMDE_FLOAT32_C( -722.00), SIMDE_FLOAT32_C( -998.00), SIMDE_FLOAT32_C( 63.02), SIMDE_FLOAT32_C( -443.12), SIMDE_FLOAT32_C( 139.26), SIMDE_FLOAT32_C( 924.18), SIMDE_FLOAT32_C( -838.25), SIMDE_FLOAT32_C( -323.10), SIMDE_FLOAT32_C( -805.38), SIMDE_FLOAT32_C( 858.57) } }, { { SIMDE_FLOAT32_C( 167.42), SIMDE_FLOAT32_C( 339.06), SIMDE_FLOAT32_C( 483.74), SIMDE_FLOAT32_C( -338.08), SIMDE_FLOAT32_C( -207.67), SIMDE_FLOAT32_C( -135.08), SIMDE_FLOAT32_C( 724.94), SIMDE_FLOAT32_C( 349.21), SIMDE_FLOAT32_C( -995.82), SIMDE_FLOAT32_C( 649.12), SIMDE_FLOAT32_C( 510.96), SIMDE_FLOAT32_C( -318.92), SIMDE_FLOAT32_C( 843.74), SIMDE_FLOAT32_C( 369.53), SIMDE_FLOAT32_C( -589.22), SIMDE_FLOAT32_C( -398.24) }, UINT8_C( 64), { SIMDE_FLOAT32_C( -48.16), SIMDE_FLOAT32_C( -362.01), SIMDE_FLOAT32_C( -567.67), SIMDE_FLOAT32_C( 145.04), SIMDE_FLOAT32_C( -83.52), SIMDE_FLOAT32_C( -565.41), SIMDE_FLOAT32_C( -59.84), SIMDE_FLOAT32_C( -320.01), SIMDE_FLOAT32_C( 669.57), SIMDE_FLOAT32_C( 342.69), SIMDE_FLOAT32_C( -668.25), SIMDE_FLOAT32_C( 51.73), SIMDE_FLOAT32_C( -454.56), SIMDE_FLOAT32_C( -510.45), SIMDE_FLOAT32_C( -780.86), SIMDE_FLOAT32_C( 884.50) }, { SIMDE_FLOAT32_C( 167.42), SIMDE_FLOAT32_C( 339.06), SIMDE_FLOAT32_C( 483.74), SIMDE_FLOAT32_C( -338.08), SIMDE_FLOAT32_C( -207.67), SIMDE_FLOAT32_C( -135.08), SIMDE_FLOAT32_C( -60.00), SIMDE_FLOAT32_C( 349.21), SIMDE_FLOAT32_C( -995.82), SIMDE_FLOAT32_C( 649.12), SIMDE_FLOAT32_C( 510.96), SIMDE_FLOAT32_C( -318.92), SIMDE_FLOAT32_C( 843.74), SIMDE_FLOAT32_C( 369.53), SIMDE_FLOAT32_C( -589.22), SIMDE_FLOAT32_C( -398.24) } }, { { SIMDE_FLOAT32_C( 973.29), SIMDE_FLOAT32_C( -118.94), SIMDE_FLOAT32_C( -323.17), SIMDE_FLOAT32_C( -161.78), SIMDE_FLOAT32_C( -394.00), SIMDE_FLOAT32_C( -973.95), SIMDE_FLOAT32_C( -157.60), SIMDE_FLOAT32_C( -744.88), SIMDE_FLOAT32_C( 537.01), SIMDE_FLOAT32_C( 523.48), SIMDE_FLOAT32_C( -901.15), SIMDE_FLOAT32_C( -93.46), SIMDE_FLOAT32_C( 934.26), SIMDE_FLOAT32_C( -299.38), SIMDE_FLOAT32_C( 728.79), SIMDE_FLOAT32_C( -113.90) }, UINT8_C( 86), { SIMDE_FLOAT32_C( -838.87), SIMDE_FLOAT32_C( -968.86), SIMDE_FLOAT32_C( -744.90), SIMDE_FLOAT32_C( -404.28), SIMDE_FLOAT32_C( -28.71), SIMDE_FLOAT32_C( -64.91), SIMDE_FLOAT32_C( -734.71), SIMDE_FLOAT32_C( -686.02), SIMDE_FLOAT32_C( 266.84), SIMDE_FLOAT32_C( 317.01), SIMDE_FLOAT32_C( -140.57), SIMDE_FLOAT32_C( 756.39), SIMDE_FLOAT32_C( 536.16), SIMDE_FLOAT32_C( -256.07), SIMDE_FLOAT32_C( 729.69), SIMDE_FLOAT32_C( -582.78) }, { SIMDE_FLOAT32_C( 973.29), SIMDE_FLOAT32_C( -969.00), SIMDE_FLOAT32_C( -745.00), SIMDE_FLOAT32_C( -161.78), SIMDE_FLOAT32_C( -29.00), SIMDE_FLOAT32_C( -973.95), SIMDE_FLOAT32_C( -735.00), SIMDE_FLOAT32_C( -744.88), SIMDE_FLOAT32_C( 537.01), SIMDE_FLOAT32_C( 523.48), SIMDE_FLOAT32_C( -901.15), SIMDE_FLOAT32_C( -93.46), SIMDE_FLOAT32_C( 934.26), SIMDE_FLOAT32_C( -299.38), SIMDE_FLOAT32_C( 728.79), SIMDE_FLOAT32_C( -113.90) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_mask_nearbyint_ps(src, test_vec[i].k, a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_nearbyint_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( -456.37), SIMDE_FLOAT64_C( 239.71), SIMDE_FLOAT64_C( -214.46), SIMDE_FLOAT64_C( -228.66), SIMDE_FLOAT64_C( -452.56), SIMDE_FLOAT64_C( -734.09), SIMDE_FLOAT64_C( 235.92), SIMDE_FLOAT64_C( 143.86) }, { SIMDE_FLOAT64_C( -456.00), SIMDE_FLOAT64_C( 240.00), SIMDE_FLOAT64_C( -214.00), SIMDE_FLOAT64_C( -229.00), SIMDE_FLOAT64_C( -453.00), SIMDE_FLOAT64_C( -734.00), SIMDE_FLOAT64_C( 236.00), SIMDE_FLOAT64_C( 144.00) } }, { { SIMDE_FLOAT64_C( -285.31), SIMDE_FLOAT64_C( -86.71), SIMDE_FLOAT64_C( 920.29), SIMDE_FLOAT64_C( -690.90), SIMDE_FLOAT64_C( -912.99), SIMDE_FLOAT64_C( -452.36), SIMDE_FLOAT64_C( -958.90), SIMDE_FLOAT64_C( -103.11) }, { SIMDE_FLOAT64_C( -285.00), SIMDE_FLOAT64_C( -87.00), SIMDE_FLOAT64_C( 920.00), SIMDE_FLOAT64_C( -691.00), SIMDE_FLOAT64_C( -913.00), SIMDE_FLOAT64_C( -452.00), SIMDE_FLOAT64_C( -959.00), SIMDE_FLOAT64_C( -103.00) } }, { { SIMDE_FLOAT64_C( -186.33), SIMDE_FLOAT64_C( -533.97), SIMDE_FLOAT64_C( 740.01), SIMDE_FLOAT64_C( -835.54), SIMDE_FLOAT64_C( 905.55), SIMDE_FLOAT64_C( 918.31), SIMDE_FLOAT64_C( 254.16), SIMDE_FLOAT64_C( -207.74) }, { SIMDE_FLOAT64_C( -186.00), SIMDE_FLOAT64_C( -534.00), SIMDE_FLOAT64_C( 740.00), SIMDE_FLOAT64_C( -836.00), SIMDE_FLOAT64_C( 906.00), SIMDE_FLOAT64_C( 918.00), SIMDE_FLOAT64_C( 254.00), SIMDE_FLOAT64_C( -208.00) } }, { { SIMDE_FLOAT64_C( -15.89), SIMDE_FLOAT64_C( 697.49), SIMDE_FLOAT64_C( -777.91), SIMDE_FLOAT64_C( -743.01), SIMDE_FLOAT64_C( 145.93), SIMDE_FLOAT64_C( 408.99), SIMDE_FLOAT64_C( -288.89), SIMDE_FLOAT64_C( 689.55) }, { SIMDE_FLOAT64_C( -16.00), SIMDE_FLOAT64_C( 697.00), SIMDE_FLOAT64_C( -778.00), SIMDE_FLOAT64_C( -743.00), SIMDE_FLOAT64_C( 146.00), SIMDE_FLOAT64_C( 409.00), SIMDE_FLOAT64_C( -289.00), SIMDE_FLOAT64_C( 690.00) } }, { { SIMDE_FLOAT64_C( -351.30), SIMDE_FLOAT64_C( 496.65), SIMDE_FLOAT64_C( -539.11), SIMDE_FLOAT64_C( 196.13), SIMDE_FLOAT64_C( 762.55), SIMDE_FLOAT64_C( 696.81), SIMDE_FLOAT64_C( -660.01), SIMDE_FLOAT64_C( -522.75) }, { SIMDE_FLOAT64_C( -351.00), SIMDE_FLOAT64_C( 497.00), SIMDE_FLOAT64_C( -539.00), SIMDE_FLOAT64_C( 196.00), SIMDE_FLOAT64_C( 763.00), SIMDE_FLOAT64_C( 697.00), SIMDE_FLOAT64_C( -660.00), SIMDE_FLOAT64_C( -523.00) } }, { { SIMDE_FLOAT64_C( -389.90), SIMDE_FLOAT64_C( -739.72), SIMDE_FLOAT64_C( -213.65), SIMDE_FLOAT64_C( -302.89), SIMDE_FLOAT64_C( -192.08), SIMDE_FLOAT64_C( -172.55), SIMDE_FLOAT64_C( 594.00), SIMDE_FLOAT64_C( 621.59) }, { SIMDE_FLOAT64_C( -390.00), SIMDE_FLOAT64_C( -740.00), SIMDE_FLOAT64_C( -214.00), SIMDE_FLOAT64_C( -303.00), SIMDE_FLOAT64_C( -192.00), SIMDE_FLOAT64_C( -173.00), SIMDE_FLOAT64_C( 594.00), SIMDE_FLOAT64_C( 622.00) } }, { { SIMDE_FLOAT64_C( 293.48), SIMDE_FLOAT64_C( 334.01), SIMDE_FLOAT64_C( 786.05), SIMDE_FLOAT64_C( 199.03), SIMDE_FLOAT64_C( 252.33), SIMDE_FLOAT64_C( 40.22), SIMDE_FLOAT64_C( 991.29), SIMDE_FLOAT64_C( -763.57) }, { SIMDE_FLOAT64_C( 293.00), SIMDE_FLOAT64_C( 334.00), SIMDE_FLOAT64_C( 786.00), SIMDE_FLOAT64_C( 199.00), SIMDE_FLOAT64_C( 252.00), SIMDE_FLOAT64_C( 40.00), SIMDE_FLOAT64_C( 991.00), SIMDE_FLOAT64_C( -764.00) } }, { { SIMDE_FLOAT64_C( -262.29), SIMDE_FLOAT64_C( -786.62), SIMDE_FLOAT64_C( -506.58), SIMDE_FLOAT64_C( 883.63), SIMDE_FLOAT64_C( 622.37), SIMDE_FLOAT64_C( 204.53), SIMDE_FLOAT64_C( 573.19), SIMDE_FLOAT64_C( -728.93) }, { SIMDE_FLOAT64_C( -262.00), SIMDE_FLOAT64_C( -787.00), SIMDE_FLOAT64_C( -507.00), SIMDE_FLOAT64_C( 884.00), SIMDE_FLOAT64_C( 622.00), SIMDE_FLOAT64_C( 205.00), SIMDE_FLOAT64_C( 573.00), SIMDE_FLOAT64_C( -729.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_nearbyint_pd(a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_nearbyint_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 381.89), SIMDE_FLOAT64_C( -277.28), SIMDE_FLOAT64_C( -400.11), SIMDE_FLOAT64_C( -872.84), SIMDE_FLOAT64_C( -66.17), SIMDE_FLOAT64_C( -250.32), SIMDE_FLOAT64_C( -214.38), SIMDE_FLOAT64_C( 965.87) }, UINT8_C(106), { SIMDE_FLOAT64_C( 141.85), SIMDE_FLOAT64_C( 88.38), SIMDE_FLOAT64_C( -374.04), SIMDE_FLOAT64_C( 906.38), SIMDE_FLOAT64_C( 851.98), SIMDE_FLOAT64_C( -170.13), SIMDE_FLOAT64_C( -142.10), SIMDE_FLOAT64_C( -367.42) }, { SIMDE_FLOAT64_C( 381.89), SIMDE_FLOAT64_C( 88.00), SIMDE_FLOAT64_C( -400.11), SIMDE_FLOAT64_C( 906.00), SIMDE_FLOAT64_C( -66.17), SIMDE_FLOAT64_C( -170.00), SIMDE_FLOAT64_C( -142.00), SIMDE_FLOAT64_C( 965.87) } }, { { SIMDE_FLOAT64_C( 49.27), SIMDE_FLOAT64_C( 950.21), SIMDE_FLOAT64_C( 214.00), SIMDE_FLOAT64_C( 575.74), SIMDE_FLOAT64_C( -350.82), SIMDE_FLOAT64_C( 512.95), SIMDE_FLOAT64_C( -227.13), SIMDE_FLOAT64_C( -609.67) }, UINT8_C( 61), { SIMDE_FLOAT64_C( 586.44), SIMDE_FLOAT64_C( 381.99), SIMDE_FLOAT64_C( 608.18), SIMDE_FLOAT64_C( 184.92), SIMDE_FLOAT64_C( -474.55), SIMDE_FLOAT64_C( -9.93), SIMDE_FLOAT64_C( 907.64), SIMDE_FLOAT64_C( 125.34) }, { SIMDE_FLOAT64_C( 586.00), SIMDE_FLOAT64_C( 950.21), SIMDE_FLOAT64_C( 608.00), SIMDE_FLOAT64_C( 185.00), SIMDE_FLOAT64_C( -475.00), SIMDE_FLOAT64_C( -10.00), SIMDE_FLOAT64_C( -227.13), SIMDE_FLOAT64_C( -609.67) } }, { { SIMDE_FLOAT64_C( 117.23), SIMDE_FLOAT64_C( -158.52), SIMDE_FLOAT64_C( 875.02), SIMDE_FLOAT64_C( 902.85), SIMDE_FLOAT64_C( -192.66), SIMDE_FLOAT64_C( -256.64), SIMDE_FLOAT64_C( 44.70), SIMDE_FLOAT64_C( 895.72) }, UINT8_C(180), { SIMDE_FLOAT64_C( -48.92), SIMDE_FLOAT64_C( 747.70), SIMDE_FLOAT64_C( -800.80), SIMDE_FLOAT64_C( 808.98), SIMDE_FLOAT64_C( -619.73), SIMDE_FLOAT64_C( 248.47), SIMDE_FLOAT64_C( 759.18), SIMDE_FLOAT64_C( 594.28) }, { SIMDE_FLOAT64_C( 117.23), SIMDE_FLOAT64_C( -158.52), SIMDE_FLOAT64_C( -801.00), SIMDE_FLOAT64_C( 902.85), SIMDE_FLOAT64_C( -620.00), SIMDE_FLOAT64_C( 248.00), SIMDE_FLOAT64_C( 44.70), SIMDE_FLOAT64_C( 594.00) } }, { { SIMDE_FLOAT64_C( -175.78), SIMDE_FLOAT64_C( -591.64), SIMDE_FLOAT64_C( 107.22), SIMDE_FLOAT64_C( 597.09), SIMDE_FLOAT64_C( -201.31), SIMDE_FLOAT64_C( -742.21), SIMDE_FLOAT64_C( 183.53), SIMDE_FLOAT64_C( -819.31) }, UINT8_C(241), { SIMDE_FLOAT64_C( -631.55), SIMDE_FLOAT64_C( -293.87), SIMDE_FLOAT64_C( -143.96), SIMDE_FLOAT64_C( -723.91), SIMDE_FLOAT64_C( 831.47), SIMDE_FLOAT64_C( 973.27), SIMDE_FLOAT64_C( 117.57), SIMDE_FLOAT64_C( 706.49) }, { SIMDE_FLOAT64_C( -632.00), SIMDE_FLOAT64_C( -591.64), SIMDE_FLOAT64_C( 107.22), SIMDE_FLOAT64_C( 597.09), SIMDE_FLOAT64_C( 831.00), SIMDE_FLOAT64_C( 973.00), SIMDE_FLOAT64_C( 118.00), SIMDE_FLOAT64_C( 706.00) } }, { { SIMDE_FLOAT64_C( 876.13), SIMDE_FLOAT64_C( 924.91), SIMDE_FLOAT64_C( -550.14), SIMDE_FLOAT64_C( -79.17), SIMDE_FLOAT64_C( 820.63), SIMDE_FLOAT64_C( 819.19), SIMDE_FLOAT64_C( 871.91), SIMDE_FLOAT64_C( 568.33) }, UINT8_C(250), { SIMDE_FLOAT64_C( 680.89), SIMDE_FLOAT64_C( 948.60), SIMDE_FLOAT64_C( 266.86), SIMDE_FLOAT64_C( 440.07), SIMDE_FLOAT64_C( 542.88), SIMDE_FLOAT64_C( -908.92), SIMDE_FLOAT64_C( 848.43), SIMDE_FLOAT64_C( -349.90) }, { SIMDE_FLOAT64_C( 876.13), SIMDE_FLOAT64_C( 949.00), SIMDE_FLOAT64_C( -550.14), SIMDE_FLOAT64_C( 440.00), SIMDE_FLOAT64_C( 543.00), SIMDE_FLOAT64_C( -909.00), SIMDE_FLOAT64_C( 848.00), SIMDE_FLOAT64_C( -350.00) } }, { { SIMDE_FLOAT64_C( 688.16), SIMDE_FLOAT64_C( -352.87), SIMDE_FLOAT64_C( -92.11), SIMDE_FLOAT64_C( -128.31), SIMDE_FLOAT64_C( -172.19), SIMDE_FLOAT64_C( -226.14), SIMDE_FLOAT64_C( 240.14), SIMDE_FLOAT64_C( 533.94) }, UINT8_C( 61), { SIMDE_FLOAT64_C( 516.23), SIMDE_FLOAT64_C( 365.42), SIMDE_FLOAT64_C( 603.18), SIMDE_FLOAT64_C( -366.20), SIMDE_FLOAT64_C( 71.91), SIMDE_FLOAT64_C( 479.30), SIMDE_FLOAT64_C( -441.29), SIMDE_FLOAT64_C( 521.77) }, { SIMDE_FLOAT64_C( 516.00), SIMDE_FLOAT64_C( -352.87), SIMDE_FLOAT64_C( 603.00), SIMDE_FLOAT64_C( -366.00), SIMDE_FLOAT64_C( 72.00), SIMDE_FLOAT64_C( 479.00), SIMDE_FLOAT64_C( 240.14), SIMDE_FLOAT64_C( 533.94) } }, { { SIMDE_FLOAT64_C( -599.87), SIMDE_FLOAT64_C( -620.66), SIMDE_FLOAT64_C( 340.95), SIMDE_FLOAT64_C( -727.96), SIMDE_FLOAT64_C( 947.67), SIMDE_FLOAT64_C( 359.34), SIMDE_FLOAT64_C( 952.92), SIMDE_FLOAT64_C( 896.27) }, UINT8_C( 22), { SIMDE_FLOAT64_C( 392.99), SIMDE_FLOAT64_C( 439.14), SIMDE_FLOAT64_C( -282.72), SIMDE_FLOAT64_C( 241.43), SIMDE_FLOAT64_C( -910.76), SIMDE_FLOAT64_C( -594.56), SIMDE_FLOAT64_C( 888.55), SIMDE_FLOAT64_C( -2.87) }, { SIMDE_FLOAT64_C( -599.87), SIMDE_FLOAT64_C( 439.00), SIMDE_FLOAT64_C( -283.00), SIMDE_FLOAT64_C( -727.96), SIMDE_FLOAT64_C( -911.00), SIMDE_FLOAT64_C( 359.34), SIMDE_FLOAT64_C( 952.92), SIMDE_FLOAT64_C( 896.27) } }, { { SIMDE_FLOAT64_C( 277.14), SIMDE_FLOAT64_C( -283.64), SIMDE_FLOAT64_C( 770.99), SIMDE_FLOAT64_C( -482.72), SIMDE_FLOAT64_C( -749.69), SIMDE_FLOAT64_C( 400.90), SIMDE_FLOAT64_C( -966.49), SIMDE_FLOAT64_C( 615.72) }, UINT8_C(173), { SIMDE_FLOAT64_C( -332.68), SIMDE_FLOAT64_C( -312.37), SIMDE_FLOAT64_C( -516.63), SIMDE_FLOAT64_C( 226.03), SIMDE_FLOAT64_C( -790.60), SIMDE_FLOAT64_C( -116.50), SIMDE_FLOAT64_C( 605.37), SIMDE_FLOAT64_C( 550.35) }, { SIMDE_FLOAT64_C( -333.00), SIMDE_FLOAT64_C( -283.64), SIMDE_FLOAT64_C( -517.00), SIMDE_FLOAT64_C( 226.00), SIMDE_FLOAT64_C( -749.69), SIMDE_FLOAT64_C( -116.00), SIMDE_FLOAT64_C( -966.49), SIMDE_FLOAT64_C( 550.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_mask_nearbyint_pd(src, test_vec[i].k, a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_pow_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 b[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 3.20), SIMDE_FLOAT32_C( 4.42), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 1.48) }, { SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 3.01), SIMDE_FLOAT32_C( 3.83) }, { SIMDE_FLOAT32_C( 1.79), SIMDE_FLOAT32_C( 2.30), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( 4.49) } }, { { SIMDE_FLOAT32_C( 4.49), SIMDE_FLOAT32_C( 1.24), SIMDE_FLOAT32_C( 4.20), SIMDE_FLOAT32_C( 3.10) }, { SIMDE_FLOAT32_C( 2.65), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 1.17) }, { SIMDE_FLOAT32_C( 53.51), SIMDE_FLOAT32_C( 1.23), SIMDE_FLOAT32_C( 1.07), SIMDE_FLOAT32_C( 3.76) } }, { { SIMDE_FLOAT32_C( 3.21), SIMDE_FLOAT32_C( 4.91), SIMDE_FLOAT32_C( 4.05), SIMDE_FLOAT32_C( 0.12) }, { SIMDE_FLOAT32_C( 1.31), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 2.91), SIMDE_FLOAT32_C( 4.46) }, { SIMDE_FLOAT32_C( 4.61), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 58.57), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( 3.77), SIMDE_FLOAT32_C( 3.76), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 2.56) }, { SIMDE_FLOAT32_C( 1.47), SIMDE_FLOAT32_C( 3.39), SIMDE_FLOAT32_C( 3.29), SIMDE_FLOAT32_C( 4.67) }, { SIMDE_FLOAT32_C( 7.03), SIMDE_FLOAT32_C( 89.10), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 80.63) } }, { { SIMDE_FLOAT32_C( 2.81), SIMDE_FLOAT32_C( 4.23), SIMDE_FLOAT32_C( 1.15), SIMDE_FLOAT32_C( 3.31) }, { SIMDE_FLOAT32_C( 4.79), SIMDE_FLOAT32_C( 4.15), SIMDE_FLOAT32_C( 2.14), SIMDE_FLOAT32_C( 4.28) }, { SIMDE_FLOAT32_C( 141.03), SIMDE_FLOAT32_C( 397.48), SIMDE_FLOAT32_C( 1.35), SIMDE_FLOAT32_C( 167.83) } }, { { SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 1.34), SIMDE_FLOAT32_C( 2.38), SIMDE_FLOAT32_C( 3.04) }, { SIMDE_FLOAT32_C( 2.31), SIMDE_FLOAT32_C( 2.43), SIMDE_FLOAT32_C( 4.21), SIMDE_FLOAT32_C( 0.52) }, { SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 2.04), SIMDE_FLOAT32_C( 38.49), SIMDE_FLOAT32_C( 1.78) } }, { { SIMDE_FLOAT32_C( 2.34), SIMDE_FLOAT32_C( 3.26), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 3.65) }, { SIMDE_FLOAT32_C( 3.26), SIMDE_FLOAT32_C( 3.55), SIMDE_FLOAT32_C( 3.11), SIMDE_FLOAT32_C( 2.03) }, { SIMDE_FLOAT32_C( 15.98), SIMDE_FLOAT32_C( 66.36), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( 13.85) } }, { { SIMDE_FLOAT32_C( 2.31), SIMDE_FLOAT32_C( 3.31), SIMDE_FLOAT32_C( 4.59), SIMDE_FLOAT32_C( 3.78) }, { SIMDE_FLOAT32_C( 1.69), SIMDE_FLOAT32_C( 2.88), SIMDE_FLOAT32_C( 3.45), SIMDE_FLOAT32_C( 4.50) }, { SIMDE_FLOAT32_C( 4.12), SIMDE_FLOAT32_C( 31.41), SIMDE_FLOAT32_C( 191.98), SIMDE_FLOAT32_C( 396.93) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 b = simde_mm_loadu_ps(test_vec[i].b); simde__m128 r = simde_mm_pow_ps(a, b); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_pow_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 b[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 6.86), SIMDE_FLOAT64_C( 4.28) }, { SIMDE_FLOAT64_C( 2.99), SIMDE_FLOAT64_C( 7.45) }, { SIMDE_FLOAT64_C( 316.67), SIMDE_FLOAT64_C( 50612.30) } }, { { SIMDE_FLOAT64_C( 7.72), SIMDE_FLOAT64_C( 8.36) }, { SIMDE_FLOAT64_C( 4.17), SIMDE_FLOAT64_C( 1.82) }, { SIMDE_FLOAT64_C( 5027.64), SIMDE_FLOAT64_C( 47.69) } }, { { SIMDE_FLOAT64_C( 9.11), SIMDE_FLOAT64_C( 6.23) }, { SIMDE_FLOAT64_C( 1.26), SIMDE_FLOAT64_C( 4.65) }, { SIMDE_FLOAT64_C( 16.18), SIMDE_FLOAT64_C( 4947.31) } }, { { SIMDE_FLOAT64_C( 2.75), SIMDE_FLOAT64_C( 7.48) }, { SIMDE_FLOAT64_C( 0.85), SIMDE_FLOAT64_C( 0.71) }, { SIMDE_FLOAT64_C( 2.36), SIMDE_FLOAT64_C( 4.17) } }, { { SIMDE_FLOAT64_C( 5.91), SIMDE_FLOAT64_C( 7.19) }, { SIMDE_FLOAT64_C( 1.19), SIMDE_FLOAT64_C( 5.92) }, { SIMDE_FLOAT64_C( 8.28), SIMDE_FLOAT64_C(117987.24) } }, { { SIMDE_FLOAT64_C( 5.42), SIMDE_FLOAT64_C( 3.06) }, { SIMDE_FLOAT64_C( 9.46), SIMDE_FLOAT64_C( 0.23) }, { SIMDE_FLOAT64_C(8782805.21), SIMDE_FLOAT64_C( 1.29) } }, { { SIMDE_FLOAT64_C( 6.88), SIMDE_FLOAT64_C( 9.69) }, { SIMDE_FLOAT64_C( 2.44), SIMDE_FLOAT64_C( 7.03) }, { SIMDE_FLOAT64_C( 110.59), SIMDE_FLOAT64_C(8587290.46) } }, { { SIMDE_FLOAT64_C( 9.85), SIMDE_FLOAT64_C( 1.85) }, { SIMDE_FLOAT64_C( 1.77), SIMDE_FLOAT64_C( 6.71) }, { SIMDE_FLOAT64_C( 57.33), SIMDE_FLOAT64_C( 62.05) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d b = simde_mm_loadu_pd(test_vec[i].b); simde__m128d r = simde_mm_pow_pd(a, b); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_pow_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 b[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( 4.47), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 4.19), SIMDE_FLOAT32_C( 4.26) }, { SIMDE_FLOAT32_C( 4.92), SIMDE_FLOAT32_C( 2.33), SIMDE_FLOAT32_C( 3.87), SIMDE_FLOAT32_C( 2.15), SIMDE_FLOAT32_C( 3.59), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 3.79), SIMDE_FLOAT32_C( 3.42) }, { SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 328.62), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( 228.13), SIMDE_FLOAT32_C( 142.10) } }, { { SIMDE_FLOAT32_C( 3.72), SIMDE_FLOAT32_C( 4.06), SIMDE_FLOAT32_C( 2.24), SIMDE_FLOAT32_C( 3.04), SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 1.49), SIMDE_FLOAT32_C( 2.02) }, { SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( 3.82), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 3.50), SIMDE_FLOAT32_C( 1.39), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 4.31) }, { SIMDE_FLOAT32_C( 2.26), SIMDE_FLOAT32_C( 1.91), SIMDE_FLOAT32_C( 21.77), SIMDE_FLOAT32_C( 1.60), SIMDE_FLOAT32_C( 1.15), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 1.19), SIMDE_FLOAT32_C( 20.70) } }, { { SIMDE_FLOAT32_C( 2.29), SIMDE_FLOAT32_C( 4.91), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 2.94), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( 4.26), SIMDE_FLOAT32_C( 2.20), SIMDE_FLOAT32_C( 0.66) }, { SIMDE_FLOAT32_C( 1.59), SIMDE_FLOAT32_C( 1.07), SIMDE_FLOAT32_C( 2.81), SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( 1.83), SIMDE_FLOAT32_C( 1.60), SIMDE_FLOAT32_C( 3.61), SIMDE_FLOAT32_C( 0.55) }, { SIMDE_FLOAT32_C( 3.73), SIMDE_FLOAT32_C( 5.49), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.21), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 10.16), SIMDE_FLOAT32_C( 17.22), SIMDE_FLOAT32_C( 0.80) } }, { { SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( 3.59), SIMDE_FLOAT32_C( 1.70), SIMDE_FLOAT32_C( 1.08), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 3.72), SIMDE_FLOAT32_C( 1.70) }, { SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( 2.53), SIMDE_FLOAT32_C( 2.12), SIMDE_FLOAT32_C( 4.04), SIMDE_FLOAT32_C( 3.92), SIMDE_FLOAT32_C( 2.56), SIMDE_FLOAT32_C( 3.35), SIMDE_FLOAT32_C( 1.21) }, { SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 15.02), SIMDE_FLOAT32_C( 8.53), SIMDE_FLOAT32_C( 1.35), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 81.53), SIMDE_FLOAT32_C( 1.90) } }, { { SIMDE_FLOAT32_C( 2.46), SIMDE_FLOAT32_C( 3.42), SIMDE_FLOAT32_C( 4.15), SIMDE_FLOAT32_C( 3.21), SIMDE_FLOAT32_C( 2.68), SIMDE_FLOAT32_C( 1.35), SIMDE_FLOAT32_C( 3.87), SIMDE_FLOAT32_C( 4.27) }, { SIMDE_FLOAT32_C( 2.42), SIMDE_FLOAT32_C( 1.68), SIMDE_FLOAT32_C( 4.45), SIMDE_FLOAT32_C( 4.25), SIMDE_FLOAT32_C( 3.28), SIMDE_FLOAT32_C( 3.06), SIMDE_FLOAT32_C( 4.80), SIMDE_FLOAT32_C( 3.94) }, { SIMDE_FLOAT32_C( 8.83), SIMDE_FLOAT32_C( 7.89), SIMDE_FLOAT32_C( 562.75), SIMDE_FLOAT32_C( 142.12), SIMDE_FLOAT32_C( 25.37), SIMDE_FLOAT32_C( 2.51), SIMDE_FLOAT32_C( 662.24), SIMDE_FLOAT32_C( 304.71) } }, { { SIMDE_FLOAT32_C( 3.90), SIMDE_FLOAT32_C( 3.39), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 4.98), SIMDE_FLOAT32_C( 3.46), SIMDE_FLOAT32_C( 4.35), SIMDE_FLOAT32_C( 1.68), SIMDE_FLOAT32_C( 3.99) }, { SIMDE_FLOAT32_C( 1.89), SIMDE_FLOAT32_C( 3.80), SIMDE_FLOAT32_C( 3.03), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 1.36), SIMDE_FLOAT32_C( 1.37), SIMDE_FLOAT32_C( 2.02), SIMDE_FLOAT32_C( 3.82) }, { SIMDE_FLOAT32_C( 13.10), SIMDE_FLOAT32_C( 103.46), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( 3.67), SIMDE_FLOAT32_C( 5.41), SIMDE_FLOAT32_C( 7.49), SIMDE_FLOAT32_C( 2.85), SIMDE_FLOAT32_C( 197.57) } }, { { SIMDE_FLOAT32_C( 4.79), SIMDE_FLOAT32_C( 1.18), SIMDE_FLOAT32_C( 2.03), SIMDE_FLOAT32_C( 2.47), SIMDE_FLOAT32_C( 2.53), SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( 1.73), SIMDE_FLOAT32_C( 4.95) }, { SIMDE_FLOAT32_C( 2.58), SIMDE_FLOAT32_C( 1.18), SIMDE_FLOAT32_C( 4.20), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 4.24), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 4.80), SIMDE_FLOAT32_C( 3.14) }, { SIMDE_FLOAT32_C( 56.92), SIMDE_FLOAT32_C( 1.22), SIMDE_FLOAT32_C( 19.57), SIMDE_FLOAT32_C( 2.18), SIMDE_FLOAT32_C( 51.20), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 13.89), SIMDE_FLOAT32_C( 151.73) } }, { { SIMDE_FLOAT32_C( 2.39), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( 3.12), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 4.79), SIMDE_FLOAT32_C( 4.80), SIMDE_FLOAT32_C( 4.84), SIMDE_FLOAT32_C( 1.67) }, { SIMDE_FLOAT32_C( 3.61), SIMDE_FLOAT32_C( 2.87), SIMDE_FLOAT32_C( 2.48), SIMDE_FLOAT32_C( 4.96), SIMDE_FLOAT32_C( 4.24), SIMDE_FLOAT32_C( 4.50), SIMDE_FLOAT32_C( 3.79), SIMDE_FLOAT32_C( 4.03) }, { SIMDE_FLOAT32_C( 23.23), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 16.81), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 766.69), SIMDE_FLOAT32_C( 1163.02), SIMDE_FLOAT32_C( 394.06), SIMDE_FLOAT32_C( 7.90) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 b = simde_mm256_loadu_ps(test_vec[i].b); simde__m256 r = simde_mm256_pow_ps(a, b); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_pow_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[4]; const simde_float64 b[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( 7.17), SIMDE_FLOAT64_C( 4.56), SIMDE_FLOAT64_C( 5.81), SIMDE_FLOAT64_C( 1.86) }, { SIMDE_FLOAT64_C( 7.20), SIMDE_FLOAT64_C( 2.88), SIMDE_FLOAT64_C( 6.56), SIMDE_FLOAT64_C( 0.87) }, { SIMDE_FLOAT64_C(1444567.77), SIMDE_FLOAT64_C( 79.03), SIMDE_FLOAT64_C(103037.53), SIMDE_FLOAT64_C( 1.72) } }, { { SIMDE_FLOAT64_C( 6.39), SIMDE_FLOAT64_C( 1.20), SIMDE_FLOAT64_C( 4.73), SIMDE_FLOAT64_C( 0.14) }, { SIMDE_FLOAT64_C( 9.00), SIMDE_FLOAT64_C( 7.96), SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( 0.99) }, { SIMDE_FLOAT64_C(17762648.57), SIMDE_FLOAT64_C( 4.27), SIMDE_FLOAT64_C( 1.17), SIMDE_FLOAT64_C( 0.14) } }, { { SIMDE_FLOAT64_C( 1.05), SIMDE_FLOAT64_C( 6.55), SIMDE_FLOAT64_C( 5.85), SIMDE_FLOAT64_C( 2.38) }, { SIMDE_FLOAT64_C( 7.70), SIMDE_FLOAT64_C( 1.92), SIMDE_FLOAT64_C( 2.76), SIMDE_FLOAT64_C( 9.17) }, { SIMDE_FLOAT64_C( 1.46), SIMDE_FLOAT64_C( 36.91), SIMDE_FLOAT64_C( 131.02), SIMDE_FLOAT64_C( 2839.30) } }, { { SIMDE_FLOAT64_C( 1.22), SIMDE_FLOAT64_C( 3.47), SIMDE_FLOAT64_C( 2.69), SIMDE_FLOAT64_C( 4.53) }, { SIMDE_FLOAT64_C( 8.94), SIMDE_FLOAT64_C( 7.35), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 6.10) }, { SIMDE_FLOAT64_C( 5.92), SIMDE_FLOAT64_C( 9363.14), SIMDE_FLOAT64_C( 1.94), SIMDE_FLOAT64_C( 10050.76) } }, { { SIMDE_FLOAT64_C( 1.91), SIMDE_FLOAT64_C( 6.48), SIMDE_FLOAT64_C( 7.96), SIMDE_FLOAT64_C( 9.11) }, { SIMDE_FLOAT64_C( 9.36), SIMDE_FLOAT64_C( 4.52), SIMDE_FLOAT64_C( 9.98), SIMDE_FLOAT64_C( 5.75) }, { SIMDE_FLOAT64_C( 427.04), SIMDE_FLOAT64_C( 4659.28), SIMDE_FLOAT64_C(979743556.72), SIMDE_FLOAT64_C(329026.34) } }, { { SIMDE_FLOAT64_C( 5.73), SIMDE_FLOAT64_C( 4.71), SIMDE_FLOAT64_C( 5.89), SIMDE_FLOAT64_C( 4.73) }, { SIMDE_FLOAT64_C( 2.67), SIMDE_FLOAT64_C( 5.99), SIMDE_FLOAT64_C( 5.71), SIMDE_FLOAT64_C( 3.72) }, { SIMDE_FLOAT64_C( 105.75), SIMDE_FLOAT64_C( 10749.67), SIMDE_FLOAT64_C( 24966.54), SIMDE_FLOAT64_C( 323.95) } }, { { SIMDE_FLOAT64_C( 2.54), SIMDE_FLOAT64_C( 1.56), SIMDE_FLOAT64_C( 6.10), SIMDE_FLOAT64_C( 0.24) }, { SIMDE_FLOAT64_C( 3.48), SIMDE_FLOAT64_C( 8.87), SIMDE_FLOAT64_C( 9.41), SIMDE_FLOAT64_C( 4.71) }, { SIMDE_FLOAT64_C( 25.63), SIMDE_FLOAT64_C( 51.64), SIMDE_FLOAT64_C(24544475.02), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( 2.33), SIMDE_FLOAT64_C( 2.10), SIMDE_FLOAT64_C( 9.23), SIMDE_FLOAT64_C( 1.27) }, { SIMDE_FLOAT64_C( 9.45), SIMDE_FLOAT64_C( 9.90), SIMDE_FLOAT64_C( 7.37), SIMDE_FLOAT64_C( 1.37) }, { SIMDE_FLOAT64_C( 2961.51), SIMDE_FLOAT64_C( 1548.71), SIMDE_FLOAT64_C(12987828.24), SIMDE_FLOAT64_C( 1.39) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d b = simde_mm256_loadu_pd(test_vec[i].b); simde__m256d r = simde_mm256_pow_pd(a, b); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_pow_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 b[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 2.61), SIMDE_FLOAT32_C( 1.08), SIMDE_FLOAT32_C( 3.70), SIMDE_FLOAT32_C( 3.22), SIMDE_FLOAT32_C( 4.10), SIMDE_FLOAT32_C( 4.95), SIMDE_FLOAT32_C( 4.92), SIMDE_FLOAT32_C( 4.08), SIMDE_FLOAT32_C( 4.52), SIMDE_FLOAT32_C( 2.15), SIMDE_FLOAT32_C( 4.72), SIMDE_FLOAT32_C( 1.20), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 4.45), SIMDE_FLOAT32_C( 4.31), SIMDE_FLOAT32_C( 1.66) }, { SIMDE_FLOAT32_C( 4.65), SIMDE_FLOAT32_C( 2.28), SIMDE_FLOAT32_C( 1.70), SIMDE_FLOAT32_C( 3.82), SIMDE_FLOAT32_C( 1.78), SIMDE_FLOAT32_C( 2.73), SIMDE_FLOAT32_C( 1.38), SIMDE_FLOAT32_C( 3.44), SIMDE_FLOAT32_C( 1.09), SIMDE_FLOAT32_C( 1.75), SIMDE_FLOAT32_C( 3.09), SIMDE_FLOAT32_C( 3.82), SIMDE_FLOAT32_C( 2.39), SIMDE_FLOAT32_C( 2.39), SIMDE_FLOAT32_C( 1.67), SIMDE_FLOAT32_C( 5.00) }, { SIMDE_FLOAT32_C( 86.57), SIMDE_FLOAT32_C( 1.19), SIMDE_FLOAT32_C( 9.25), SIMDE_FLOAT32_C( 87.10), SIMDE_FLOAT32_C( 12.32), SIMDE_FLOAT32_C( 78.75), SIMDE_FLOAT32_C( 9.01), SIMDE_FLOAT32_C( 126.09), SIMDE_FLOAT32_C( 5.18), SIMDE_FLOAT32_C( 3.82), SIMDE_FLOAT32_C( 120.92), SIMDE_FLOAT32_C( 2.01), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 35.45), SIMDE_FLOAT32_C( 11.47), SIMDE_FLOAT32_C( 12.60) } }, { { SIMDE_FLOAT32_C( 3.47), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 3.22), SIMDE_FLOAT32_C( 2.57), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( 3.14), SIMDE_FLOAT32_C( 1.64), SIMDE_FLOAT32_C( 4.84), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 1.36), SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( 1.08), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 2.74), SIMDE_FLOAT32_C( 0.46) }, { SIMDE_FLOAT32_C( 2.63), SIMDE_FLOAT32_C( 4.43), SIMDE_FLOAT32_C( 4.28), SIMDE_FLOAT32_C( 4.41), SIMDE_FLOAT32_C( 2.17), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 2.85), SIMDE_FLOAT32_C( 3.26), SIMDE_FLOAT32_C( 2.41), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 2.08), SIMDE_FLOAT32_C( 4.80), SIMDE_FLOAT32_C( 3.33), SIMDE_FLOAT32_C( 3.75), SIMDE_FLOAT32_C( 4.79), SIMDE_FLOAT32_C( 1.80) }, { SIMDE_FLOAT32_C( 26.37), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 149.15), SIMDE_FLOAT32_C( 64.24), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 2.13), SIMDE_FLOAT32_C( 4.10), SIMDE_FLOAT32_C( 170.84), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 1.34), SIMDE_FLOAT32_C( 1.08), SIMDE_FLOAT32_C( 1.45), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 124.98), SIMDE_FLOAT32_C( 0.25) } }, { { SIMDE_FLOAT32_C( 4.12), SIMDE_FLOAT32_C( 3.01), SIMDE_FLOAT32_C( 4.36), SIMDE_FLOAT32_C( 4.44), SIMDE_FLOAT32_C( 1.15), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 4.28), SIMDE_FLOAT32_C( 1.43), SIMDE_FLOAT32_C( 2.36), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( 2.51), SIMDE_FLOAT32_C( 3.17), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.24), SIMDE_FLOAT32_C( 3.64), SIMDE_FLOAT32_C( 3.30) }, { SIMDE_FLOAT32_C( 4.68), SIMDE_FLOAT32_C( 2.92), SIMDE_FLOAT32_C( 2.71), SIMDE_FLOAT32_C( 1.85), SIMDE_FLOAT32_C( 3.57), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 1.49), SIMDE_FLOAT32_C( 2.19), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 4.82), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 1.62), SIMDE_FLOAT32_C( 0.05) }, { SIMDE_FLOAT32_C( 754.60), SIMDE_FLOAT32_C( 24.97), SIMDE_FLOAT32_C( 54.08), SIMDE_FLOAT32_C( 15.76), SIMDE_FLOAT32_C( 1.65), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.17), SIMDE_FLOAT32_C( 1.42), SIMDE_FLOAT32_C( 3.59), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 2.05), SIMDE_FLOAT32_C( 260.08), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 8.11), SIMDE_FLOAT32_C( 1.06) } }, { { SIMDE_FLOAT32_C( 3.58), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 4.49), SIMDE_FLOAT32_C( 4.73), SIMDE_FLOAT32_C( 1.98), SIMDE_FLOAT32_C( 3.77), SIMDE_FLOAT32_C( 1.16), SIMDE_FLOAT32_C( 4.35), SIMDE_FLOAT32_C( 4.09), SIMDE_FLOAT32_C( 3.67), SIMDE_FLOAT32_C( 2.52), SIMDE_FLOAT32_C( 4.75), SIMDE_FLOAT32_C( 3.91), SIMDE_FLOAT32_C( 1.16), SIMDE_FLOAT32_C( 3.05), SIMDE_FLOAT32_C( 3.59) }, { SIMDE_FLOAT32_C( 4.07), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 2.64), SIMDE_FLOAT32_C( 1.31), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 3.62), SIMDE_FLOAT32_C( 2.81), SIMDE_FLOAT32_C( 2.73), SIMDE_FLOAT32_C( 4.40), SIMDE_FLOAT32_C( 2.63), SIMDE_FLOAT32_C( 3.67), SIMDE_FLOAT32_C( 4.97), SIMDE_FLOAT32_C( 4.25), SIMDE_FLOAT32_C( 3.72), SIMDE_FLOAT32_C( 3.55) }, { SIMDE_FLOAT32_C( 179.60), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 1.94), SIMDE_FLOAT32_C( 60.48), SIMDE_FLOAT32_C( 2.45), SIMDE_FLOAT32_C( 2.07), SIMDE_FLOAT32_C( 1.71), SIMDE_FLOAT32_C( 62.25), SIMDE_FLOAT32_C( 46.77), SIMDE_FLOAT32_C( 305.16), SIMDE_FLOAT32_C( 11.37), SIMDE_FLOAT32_C( 304.41), SIMDE_FLOAT32_C( 877.24), SIMDE_FLOAT32_C( 1.88), SIMDE_FLOAT32_C( 63.33), SIMDE_FLOAT32_C( 93.45) } }, { { SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 3.21), SIMDE_FLOAT32_C( 3.29), SIMDE_FLOAT32_C( 2.21), SIMDE_FLOAT32_C( 1.98), SIMDE_FLOAT32_C( 4.45), SIMDE_FLOAT32_C( 1.56), SIMDE_FLOAT32_C( 1.07), SIMDE_FLOAT32_C( 3.11), SIMDE_FLOAT32_C( 4.08), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 2.03), SIMDE_FLOAT32_C( 0.24), SIMDE_FLOAT32_C( 3.87), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 4.31) }, { SIMDE_FLOAT32_C( 4.63), SIMDE_FLOAT32_C( 1.06), SIMDE_FLOAT32_C( 1.96), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 1.60), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 3.75), SIMDE_FLOAT32_C( 4.34), SIMDE_FLOAT32_C( 4.98), SIMDE_FLOAT32_C( 1.38), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 4.95), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 1.73), SIMDE_FLOAT32_C( 3.50), SIMDE_FLOAT32_C( 0.85) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 3.44), SIMDE_FLOAT32_C( 10.32), SIMDE_FLOAT32_C( 2.11), SIMDE_FLOAT32_C( 2.98), SIMDE_FLOAT32_C( 2.38), SIMDE_FLOAT32_C( 5.30), SIMDE_FLOAT32_C( 1.34), SIMDE_FLOAT32_C( 284.41), SIMDE_FLOAT32_C( 6.96), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 33.27), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 10.39), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 3.46) } }, { { SIMDE_FLOAT32_C( 4.94), SIMDE_FLOAT32_C( 1.79), SIMDE_FLOAT32_C( 3.06), SIMDE_FLOAT32_C( 1.92), SIMDE_FLOAT32_C( 1.23), SIMDE_FLOAT32_C( 4.63), SIMDE_FLOAT32_C( 2.99), SIMDE_FLOAT32_C( 4.35), SIMDE_FLOAT32_C( 3.71), SIMDE_FLOAT32_C( 3.81), SIMDE_FLOAT32_C( 1.38), SIMDE_FLOAT32_C( 3.95), SIMDE_FLOAT32_C( 2.68), SIMDE_FLOAT32_C( 1.99), SIMDE_FLOAT32_C( 3.26), SIMDE_FLOAT32_C( 2.31) }, { SIMDE_FLOAT32_C( 3.05), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 3.25), SIMDE_FLOAT32_C( 4.65), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 3.99), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 3.38), SIMDE_FLOAT32_C( 1.99), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 3.72), SIMDE_FLOAT32_C( 4.23), SIMDE_FLOAT32_C( 4.85), SIMDE_FLOAT32_C( 3.66) }, { SIMDE_FLOAT32_C( 130.58), SIMDE_FLOAT32_C( 1.14), SIMDE_FLOAT32_C( 37.90), SIMDE_FLOAT32_C( 20.77), SIMDE_FLOAT32_C( 1.18), SIMDE_FLOAT32_C( 21.44), SIMDE_FLOAT32_C( 79.05), SIMDE_FLOAT32_C( 3.15), SIMDE_FLOAT32_C( 84.04), SIMDE_FLOAT32_C( 14.32), SIMDE_FLOAT32_C( 1.27), SIMDE_FLOAT32_C( 243.44), SIMDE_FLOAT32_C( 39.14), SIMDE_FLOAT32_C( 18.37), SIMDE_FLOAT32_C( 308.39), SIMDE_FLOAT32_C( 21.42) } }, { { SIMDE_FLOAT32_C( 1.02), SIMDE_FLOAT32_C( 2.92), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 2.25), SIMDE_FLOAT32_C( 2.54), SIMDE_FLOAT32_C( 3.57), SIMDE_FLOAT32_C( 1.60), SIMDE_FLOAT32_C( 1.25), SIMDE_FLOAT32_C( 2.37), SIMDE_FLOAT32_C( 2.98), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 4.97), SIMDE_FLOAT32_C( 3.46), SIMDE_FLOAT32_C( 2.36), SIMDE_FLOAT32_C( 3.02) }, { SIMDE_FLOAT32_C( 3.68), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 2.67), SIMDE_FLOAT32_C( 4.48), SIMDE_FLOAT32_C( 2.62), SIMDE_FLOAT32_C( 1.66), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 3.65), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 2.37), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 4.85), SIMDE_FLOAT32_C( 1.03), SIMDE_FLOAT32_C( 1.23) }, { SIMDE_FLOAT32_C( 1.08), SIMDE_FLOAT32_C( 1.94), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 37.82), SIMDE_FLOAT32_C( 11.50), SIMDE_FLOAT32_C( 8.27), SIMDE_FLOAT32_C( 1.12), SIMDE_FLOAT32_C( 1.25), SIMDE_FLOAT32_C( 23.33), SIMDE_FLOAT32_C( 2.92), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.40), SIMDE_FLOAT32_C( 411.64), SIMDE_FLOAT32_C( 2.42), SIMDE_FLOAT32_C( 3.89) } }, { { SIMDE_FLOAT32_C( 2.77), SIMDE_FLOAT32_C( 1.62), SIMDE_FLOAT32_C( 3.48), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 1.56), SIMDE_FLOAT32_C( 2.56), SIMDE_FLOAT32_C( 3.06), SIMDE_FLOAT32_C( 1.76), SIMDE_FLOAT32_C( 2.61), SIMDE_FLOAT32_C( 3.03), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 4.97), SIMDE_FLOAT32_C( 1.05), SIMDE_FLOAT32_C( 3.90) }, { SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 3.72), SIMDE_FLOAT32_C( 3.38), SIMDE_FLOAT32_C( 3.21), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 3.63), SIMDE_FLOAT32_C( 4.20), SIMDE_FLOAT32_C( 4.03), SIMDE_FLOAT32_C( 4.61), SIMDE_FLOAT32_C( 4.20), SIMDE_FLOAT32_C( 1.41), SIMDE_FLOAT32_C( 4.82), SIMDE_FLOAT32_C( 4.05), SIMDE_FLOAT32_C( 2.44), SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( 1.82) }, { SIMDE_FLOAT32_C( 1.82), SIMDE_FLOAT32_C( 6.02), SIMDE_FLOAT32_C( 67.69), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 6.47), SIMDE_FLOAT32_C( 44.18), SIMDE_FLOAT32_C( 173.45), SIMDE_FLOAT32_C( 10.74), SIMDE_FLOAT32_C( 3.87), SIMDE_FLOAT32_C( 209.20), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 50.02), SIMDE_FLOAT32_C( 1.05), SIMDE_FLOAT32_C( 11.91) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 r = simde_mm512_pow_ps(a, b); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_pow_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[16]; const simde__mmask8 k; const simde_float32 a[16]; const simde_float32 b[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 3.50), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 1.87), SIMDE_FLOAT32_C( 2.52), SIMDE_FLOAT32_C( 3.74), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 4.86), SIMDE_FLOAT32_C( 1.16), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 1.92), SIMDE_FLOAT32_C( 3.89), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 3.79), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 3.85) }, UINT8_C( 81), { SIMDE_FLOAT32_C( 3.43), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 2.56), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 3.38), SIMDE_FLOAT32_C( 3.30), SIMDE_FLOAT32_C( 3.55), SIMDE_FLOAT32_C( 3.99), SIMDE_FLOAT32_C( 2.88), SIMDE_FLOAT32_C( 1.85), SIMDE_FLOAT32_C( 2.70), SIMDE_FLOAT32_C( 1.54), SIMDE_FLOAT32_C( 2.63), SIMDE_FLOAT32_C( 4.93), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 3.48) }, { SIMDE_FLOAT32_C( 1.79), SIMDE_FLOAT32_C( 2.57), SIMDE_FLOAT32_C( 2.22), SIMDE_FLOAT32_C( 1.87), SIMDE_FLOAT32_C( 3.53), SIMDE_FLOAT32_C( 2.08), SIMDE_FLOAT32_C( 3.03), SIMDE_FLOAT32_C( 4.18), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 1.92), SIMDE_FLOAT32_C( 4.38), SIMDE_FLOAT32_C( 2.79), SIMDE_FLOAT32_C( 2.71), SIMDE_FLOAT32_C( 3.23), SIMDE_FLOAT32_C( 4.72), SIMDE_FLOAT32_C( 1.13) }, { SIMDE_FLOAT32_C( 9.08), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 1.87), SIMDE_FLOAT32_C( 2.52), SIMDE_FLOAT32_C( 73.63), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 46.47), SIMDE_FLOAT32_C( 4.86), SIMDE_FLOAT32_C( 1.16), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 1.92), SIMDE_FLOAT32_C( 3.89), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 3.79), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 3.85) } }, { { SIMDE_FLOAT32_C( 3.40), SIMDE_FLOAT32_C( 2.28), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 1.78), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( 3.46), SIMDE_FLOAT32_C( 1.97), SIMDE_FLOAT32_C( 3.46), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 4.59), SIMDE_FLOAT32_C( 3.39), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 3.07), SIMDE_FLOAT32_C( 0.19) }, UINT8_C(140), { SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 2.05), SIMDE_FLOAT32_C( 1.14), SIMDE_FLOAT32_C( 2.37), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 1.37), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 4.70), SIMDE_FLOAT32_C( 4.16), SIMDE_FLOAT32_C( 4.71), SIMDE_FLOAT32_C( 2.93), SIMDE_FLOAT32_C( 3.88), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 1.33), SIMDE_FLOAT32_C( 1.15) }, { SIMDE_FLOAT32_C( 2.42), SIMDE_FLOAT32_C( 3.11), SIMDE_FLOAT32_C( 1.73), SIMDE_FLOAT32_C( 2.54), SIMDE_FLOAT32_C( 3.88), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 4.51), SIMDE_FLOAT32_C( 2.34), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 4.10), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 0.24), SIMDE_FLOAT32_C( 2.17), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 2.85), SIMDE_FLOAT32_C( 2.46) }, { SIMDE_FLOAT32_C( 3.40), SIMDE_FLOAT32_C( 2.28), SIMDE_FLOAT32_C( 1.25), SIMDE_FLOAT32_C( 8.95), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( 5.06), SIMDE_FLOAT32_C( 1.97), SIMDE_FLOAT32_C( 3.46), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 4.59), SIMDE_FLOAT32_C( 3.39), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 3.07), SIMDE_FLOAT32_C( 0.19) } }, { { SIMDE_FLOAT32_C( 2.97), SIMDE_FLOAT32_C( 3.99), SIMDE_FLOAT32_C( 4.84), SIMDE_FLOAT32_C( 3.05), SIMDE_FLOAT32_C( 4.32), SIMDE_FLOAT32_C( 1.21), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 4.03), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 4.77), SIMDE_FLOAT32_C( 1.96), SIMDE_FLOAT32_C( 4.25), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 3.29), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 3.03) }, UINT8_C(179), { SIMDE_FLOAT32_C( 2.13), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( 2.31), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 2.61), SIMDE_FLOAT32_C( 2.50), SIMDE_FLOAT32_C( 4.17), SIMDE_FLOAT32_C( 3.34), SIMDE_FLOAT32_C( 2.74), SIMDE_FLOAT32_C( 1.35), SIMDE_FLOAT32_C( 4.26), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 3.81), SIMDE_FLOAT32_C( 2.23), SIMDE_FLOAT32_C( 4.58) }, { SIMDE_FLOAT32_C( 3.65), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 3.90), SIMDE_FLOAT32_C( 4.86), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 2.93), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 4.89), SIMDE_FLOAT32_C( 4.48), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 3.17), SIMDE_FLOAT32_C( 4.88), SIMDE_FLOAT32_C( 3.76), SIMDE_FLOAT32_C( 4.57), SIMDE_FLOAT32_C( 2.00) }, { SIMDE_FLOAT32_C( 15.80), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( 4.84), SIMDE_FLOAT32_C( 3.05), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 16.62), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 1.17), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 4.77), SIMDE_FLOAT32_C( 1.96), SIMDE_FLOAT32_C( 4.25), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 3.29), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 3.03) } }, { { SIMDE_FLOAT32_C( 4.33), SIMDE_FLOAT32_C( 4.84), SIMDE_FLOAT32_C( 4.31), SIMDE_FLOAT32_C( 4.40), SIMDE_FLOAT32_C( 2.45), SIMDE_FLOAT32_C( 1.82), SIMDE_FLOAT32_C( 3.58), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 4.55), SIMDE_FLOAT32_C( 4.92), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 3.74), SIMDE_FLOAT32_C( 2.29), SIMDE_FLOAT32_C( 4.73), SIMDE_FLOAT32_C( 2.39) }, UINT8_C( 97), { SIMDE_FLOAT32_C( 3.63), SIMDE_FLOAT32_C( 2.25), SIMDE_FLOAT32_C( 2.92), SIMDE_FLOAT32_C( 1.56), SIMDE_FLOAT32_C( 2.48), SIMDE_FLOAT32_C( 3.03), SIMDE_FLOAT32_C( 1.45), SIMDE_FLOAT32_C( 1.96), SIMDE_FLOAT32_C( 3.76), SIMDE_FLOAT32_C( 4.62), SIMDE_FLOAT32_C( 1.83), SIMDE_FLOAT32_C( 2.51), SIMDE_FLOAT32_C( 4.19), SIMDE_FLOAT32_C( 3.83), SIMDE_FLOAT32_C( 1.84), SIMDE_FLOAT32_C( 4.03) }, { SIMDE_FLOAT32_C( 3.15), SIMDE_FLOAT32_C( 1.24), SIMDE_FLOAT32_C( 1.48), SIMDE_FLOAT32_C( 4.97), SIMDE_FLOAT32_C( 4.82), SIMDE_FLOAT32_C( 2.27), SIMDE_FLOAT32_C( 4.52), SIMDE_FLOAT32_C( 4.75), SIMDE_FLOAT32_C( 2.33), SIMDE_FLOAT32_C( 4.66), SIMDE_FLOAT32_C( 3.48), SIMDE_FLOAT32_C( 4.61), SIMDE_FLOAT32_C( 4.39), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( 2.19), SIMDE_FLOAT32_C( 3.02) }, { SIMDE_FLOAT32_C( 58.04), SIMDE_FLOAT32_C( 4.84), SIMDE_FLOAT32_C( 4.31), SIMDE_FLOAT32_C( 4.40), SIMDE_FLOAT32_C( 2.45), SIMDE_FLOAT32_C( 12.38), SIMDE_FLOAT32_C( 5.36), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 4.55), SIMDE_FLOAT32_C( 4.92), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 3.74), SIMDE_FLOAT32_C( 2.29), SIMDE_FLOAT32_C( 4.73), SIMDE_FLOAT32_C( 2.39) } }, { { SIMDE_FLOAT32_C( 3.11), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 4.58), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 3.13), SIMDE_FLOAT32_C( 1.02), SIMDE_FLOAT32_C( 2.55), SIMDE_FLOAT32_C( 1.89), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 4.38), SIMDE_FLOAT32_C( 4.40), SIMDE_FLOAT32_C( 4.83), SIMDE_FLOAT32_C( 3.22), SIMDE_FLOAT32_C( 1.24), SIMDE_FLOAT32_C( 3.86), SIMDE_FLOAT32_C( 1.37) }, UINT8_C( 99), { SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 1.33), SIMDE_FLOAT32_C( 2.31), SIMDE_FLOAT32_C( 2.61), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( 2.06), SIMDE_FLOAT32_C( 4.94), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 4.55), SIMDE_FLOAT32_C( 4.90), SIMDE_FLOAT32_C( 1.41), SIMDE_FLOAT32_C( 1.73), SIMDE_FLOAT32_C( 2.92), SIMDE_FLOAT32_C( 4.52), SIMDE_FLOAT32_C( 1.84) }, { SIMDE_FLOAT32_C( 2.50), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 4.97), SIMDE_FLOAT32_C( 3.52), SIMDE_FLOAT32_C( 2.66), SIMDE_FLOAT32_C( 1.86), SIMDE_FLOAT32_C( 4.17), SIMDE_FLOAT32_C( 2.05), SIMDE_FLOAT32_C( 1.27), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( 2.51), SIMDE_FLOAT32_C( 2.86), SIMDE_FLOAT32_C( 1.63), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 3.20) }, { SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 1.03), SIMDE_FLOAT32_C( 4.58), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 3.13), SIMDE_FLOAT32_C( 3.84), SIMDE_FLOAT32_C( 781.34), SIMDE_FLOAT32_C( 1.89), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 4.38), SIMDE_FLOAT32_C( 4.40), SIMDE_FLOAT32_C( 4.83), SIMDE_FLOAT32_C( 3.22), SIMDE_FLOAT32_C( 1.24), SIMDE_FLOAT32_C( 3.86), SIMDE_FLOAT32_C( 1.37) } }, { { SIMDE_FLOAT32_C( 2.96), SIMDE_FLOAT32_C( 2.31), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 3.81), SIMDE_FLOAT32_C( 4.36), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 4.32), SIMDE_FLOAT32_C( 4.90), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( 4.22), SIMDE_FLOAT32_C( 1.31), SIMDE_FLOAT32_C( 2.03), SIMDE_FLOAT32_C( 2.14), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 3.87), SIMDE_FLOAT32_C( 4.64) }, UINT8_C(216), { SIMDE_FLOAT32_C( 3.84), SIMDE_FLOAT32_C( 3.16), SIMDE_FLOAT32_C( 3.61), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( 2.33), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 1.97), SIMDE_FLOAT32_C( 1.32), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 4.48), SIMDE_FLOAT32_C( 4.19), SIMDE_FLOAT32_C( 2.54), SIMDE_FLOAT32_C( 4.47), SIMDE_FLOAT32_C( 2.39), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 1.78) }, { SIMDE_FLOAT32_C( 3.20), SIMDE_FLOAT32_C( 4.31), SIMDE_FLOAT32_C( 1.14), SIMDE_FLOAT32_C( 3.95), SIMDE_FLOAT32_C( 3.63), SIMDE_FLOAT32_C( 1.05), SIMDE_FLOAT32_C( 4.25), SIMDE_FLOAT32_C( 2.86), SIMDE_FLOAT32_C( 2.35), SIMDE_FLOAT32_C( 1.28), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 3.18), SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( 4.64), SIMDE_FLOAT32_C( 4.13), SIMDE_FLOAT32_C( 3.99) }, { SIMDE_FLOAT32_C( 2.96), SIMDE_FLOAT32_C( 2.31), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.24), SIMDE_FLOAT32_C( 21.55), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 17.84), SIMDE_FLOAT32_C( 2.21), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( 4.22), SIMDE_FLOAT32_C( 1.31), SIMDE_FLOAT32_C( 2.03), SIMDE_FLOAT32_C( 2.14), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 3.87), SIMDE_FLOAT32_C( 4.64) } }, { { SIMDE_FLOAT32_C( 2.80), SIMDE_FLOAT32_C( 2.73), SIMDE_FLOAT32_C( 4.69), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 3.38), SIMDE_FLOAT32_C( 1.66), SIMDE_FLOAT32_C( 1.45), SIMDE_FLOAT32_C( 4.29), SIMDE_FLOAT32_C( 1.13), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 1.83), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 3.03), SIMDE_FLOAT32_C( 2.34), SIMDE_FLOAT32_C( 2.38), SIMDE_FLOAT32_C( 1.23) }, UINT8_C(247), { SIMDE_FLOAT32_C( 3.53), SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 4.57), SIMDE_FLOAT32_C( 4.42), SIMDE_FLOAT32_C( 3.14), SIMDE_FLOAT32_C( 1.93), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( 3.14), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( 2.78), SIMDE_FLOAT32_C( 4.23), SIMDE_FLOAT32_C( 4.84), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 1.97) }, { SIMDE_FLOAT32_C( 4.52), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 1.18), SIMDE_FLOAT32_C( 2.17), SIMDE_FLOAT32_C( 4.64), SIMDE_FLOAT32_C( 2.31), SIMDE_FLOAT32_C( 2.80), SIMDE_FLOAT32_C( 1.48), SIMDE_FLOAT32_C( 2.92), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 3.81), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( 2.06), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( 3.83) }, { SIMDE_FLOAT32_C( 299.19), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 25.15), SIMDE_FLOAT32_C( 202.19), SIMDE_FLOAT32_C( 4.57), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 1.13), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 1.83), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 3.03), SIMDE_FLOAT32_C( 2.34), SIMDE_FLOAT32_C( 2.38), SIMDE_FLOAT32_C( 1.23) } }, { { SIMDE_FLOAT32_C( 2.23), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( 3.40), SIMDE_FLOAT32_C( 1.66), SIMDE_FLOAT32_C( 3.88), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( 2.36), SIMDE_FLOAT32_C( 2.02), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( 3.21), SIMDE_FLOAT32_C( 4.81), SIMDE_FLOAT32_C( 4.67), SIMDE_FLOAT32_C( 3.04), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 1.63), SIMDE_FLOAT32_C( 2.57) }, UINT8_C(207), { SIMDE_FLOAT32_C( 1.98), SIMDE_FLOAT32_C( 3.75), SIMDE_FLOAT32_C( 3.27), SIMDE_FLOAT32_C( 1.62), SIMDE_FLOAT32_C( 1.06), SIMDE_FLOAT32_C( 1.08), SIMDE_FLOAT32_C( 3.10), SIMDE_FLOAT32_C( 3.97), SIMDE_FLOAT32_C( 1.91), SIMDE_FLOAT32_C( 1.91), SIMDE_FLOAT32_C( 4.27), SIMDE_FLOAT32_C( 3.96), SIMDE_FLOAT32_C( 2.36), SIMDE_FLOAT32_C( 3.10), SIMDE_FLOAT32_C( 1.19), SIMDE_FLOAT32_C( 3.10) }, { SIMDE_FLOAT32_C( 1.50), SIMDE_FLOAT32_C( 2.85), SIMDE_FLOAT32_C( 1.98), SIMDE_FLOAT32_C( 1.82), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 4.00), SIMDE_FLOAT32_C( 2.26), SIMDE_FLOAT32_C( 3.41), SIMDE_FLOAT32_C( 3.81), SIMDE_FLOAT32_C( 1.92), SIMDE_FLOAT32_C( 1.46), SIMDE_FLOAT32_C( 4.20), SIMDE_FLOAT32_C( 3.56), SIMDE_FLOAT32_C( 4.02), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( 0.54) }, { SIMDE_FLOAT32_C( 2.79), SIMDE_FLOAT32_C( 43.25), SIMDE_FLOAT32_C( 10.44), SIMDE_FLOAT32_C( 2.41), SIMDE_FLOAT32_C( 3.88), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( 12.90), SIMDE_FLOAT32_C( 110.12), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( 3.21), SIMDE_FLOAT32_C( 4.81), SIMDE_FLOAT32_C( 4.67), SIMDE_FLOAT32_C( 3.04), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 1.63), SIMDE_FLOAT32_C( 2.57) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 b = simde_mm512_loadu_ps(test_vec[i].b); simde__m512 r = simde_mm512_mask_pow_ps(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_pow_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 b[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 4.13), SIMDE_FLOAT64_C( 8.81), SIMDE_FLOAT64_C( 2.27), SIMDE_FLOAT64_C( 5.77), SIMDE_FLOAT64_C( 3.43), SIMDE_FLOAT64_C( 9.71), SIMDE_FLOAT64_C( 1.86), SIMDE_FLOAT64_C( 0.10) }, { SIMDE_FLOAT64_C( 8.72), SIMDE_FLOAT64_C( 9.17), SIMDE_FLOAT64_C( 7.13), SIMDE_FLOAT64_C( 1.02), SIMDE_FLOAT64_C( 3.40), SIMDE_FLOAT64_C( 5.53), SIMDE_FLOAT64_C( 2.12), SIMDE_FLOAT64_C( 0.29) }, { SIMDE_FLOAT64_C(235008.98), SIMDE_FLOAT64_C(462838076.60), SIMDE_FLOAT64_C( 345.51), SIMDE_FLOAT64_C( 5.98), SIMDE_FLOAT64_C( 66.07), SIMDE_FLOAT64_C(287953.49), SIMDE_FLOAT64_C( 3.73), SIMDE_FLOAT64_C( 0.51) } }, { { SIMDE_FLOAT64_C( 4.06), SIMDE_FLOAT64_C( 3.82), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 7.27), SIMDE_FLOAT64_C( 4.30), SIMDE_FLOAT64_C( 3.31), SIMDE_FLOAT64_C( 6.31), SIMDE_FLOAT64_C( 8.11) }, { SIMDE_FLOAT64_C( 1.51), SIMDE_FLOAT64_C( 1.05), SIMDE_FLOAT64_C( 6.76), SIMDE_FLOAT64_C( 9.20), SIMDE_FLOAT64_C( 5.39), SIMDE_FLOAT64_C( 5.09), SIMDE_FLOAT64_C( 0.17), SIMDE_FLOAT64_C( 9.52) }, { SIMDE_FLOAT64_C( 8.30), SIMDE_FLOAT64_C( 4.08), SIMDE_FLOAT64_C( 0.07), SIMDE_FLOAT64_C(84356116.88), SIMDE_FLOAT64_C( 2596.54), SIMDE_FLOAT64_C( 442.51), SIMDE_FLOAT64_C( 1.37), SIMDE_FLOAT64_C(450690633.16) } }, { { SIMDE_FLOAT64_C( 3.90), SIMDE_FLOAT64_C( 2.44), SIMDE_FLOAT64_C( 5.29), SIMDE_FLOAT64_C( 7.33), SIMDE_FLOAT64_C( 2.15), SIMDE_FLOAT64_C( 7.16), SIMDE_FLOAT64_C( 7.43), SIMDE_FLOAT64_C( 0.86) }, { SIMDE_FLOAT64_C( 6.32), SIMDE_FLOAT64_C( 4.56), SIMDE_FLOAT64_C( 1.88), SIMDE_FLOAT64_C( 9.72), SIMDE_FLOAT64_C( 0.09), SIMDE_FLOAT64_C( 4.00), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( 4.15) }, { SIMDE_FLOAT64_C( 5439.12), SIMDE_FLOAT64_C( 58.41), SIMDE_FLOAT64_C( 22.91), SIMDE_FLOAT64_C(256336608.20), SIMDE_FLOAT64_C( 1.07), SIMDE_FLOAT64_C( 2628.16), SIMDE_FLOAT64_C( 1.02), SIMDE_FLOAT64_C( 0.53) } }, { { SIMDE_FLOAT64_C( 7.82), SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( 1.42), SIMDE_FLOAT64_C( 2.12), SIMDE_FLOAT64_C( 3.99), SIMDE_FLOAT64_C( 7.73), SIMDE_FLOAT64_C( 0.22), SIMDE_FLOAT64_C( 5.49) }, { SIMDE_FLOAT64_C( 8.77), SIMDE_FLOAT64_C( 6.98), SIMDE_FLOAT64_C( 4.70), SIMDE_FLOAT64_C( 4.16), SIMDE_FLOAT64_C( 2.08), SIMDE_FLOAT64_C( 4.87), SIMDE_FLOAT64_C( 3.68), SIMDE_FLOAT64_C( 5.98) }, { SIMDE_FLOAT64_C(68143309.86), SIMDE_FLOAT64_C( 0.07), SIMDE_FLOAT64_C( 5.20), SIMDE_FLOAT64_C( 22.78), SIMDE_FLOAT64_C( 17.78), SIMDE_FLOAT64_C( 21156.03), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 26463.22) } }, { { SIMDE_FLOAT64_C( 7.30), SIMDE_FLOAT64_C( 8.98), SIMDE_FLOAT64_C( 3.31), SIMDE_FLOAT64_C( 9.45), SIMDE_FLOAT64_C( 6.13), SIMDE_FLOAT64_C( 0.74), SIMDE_FLOAT64_C( 0.31), SIMDE_FLOAT64_C( 2.46) }, { SIMDE_FLOAT64_C( 5.30), SIMDE_FLOAT64_C( 2.18), SIMDE_FLOAT64_C( 2.18), SIMDE_FLOAT64_C( 5.38), SIMDE_FLOAT64_C( 6.18), SIMDE_FLOAT64_C( 2.19), SIMDE_FLOAT64_C( 9.53), SIMDE_FLOAT64_C( 4.00) }, { SIMDE_FLOAT64_C( 37636.67), SIMDE_FLOAT64_C( 119.71), SIMDE_FLOAT64_C( 13.59), SIMDE_FLOAT64_C(176938.82), SIMDE_FLOAT64_C( 73536.97), SIMDE_FLOAT64_C( 0.52), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 36.62) } }, { { SIMDE_FLOAT64_C( 2.87), SIMDE_FLOAT64_C( 0.95), SIMDE_FLOAT64_C( 6.12), SIMDE_FLOAT64_C( 6.85), SIMDE_FLOAT64_C( 8.67), SIMDE_FLOAT64_C( 6.34), SIMDE_FLOAT64_C( 2.35), SIMDE_FLOAT64_C( 7.45) }, { SIMDE_FLOAT64_C( 3.33), SIMDE_FLOAT64_C( 7.04), SIMDE_FLOAT64_C( 1.61), SIMDE_FLOAT64_C( 5.40), SIMDE_FLOAT64_C( 1.91), SIMDE_FLOAT64_C( 5.30), SIMDE_FLOAT64_C( 1.38), SIMDE_FLOAT64_C( 9.21) }, { SIMDE_FLOAT64_C( 33.48), SIMDE_FLOAT64_C( 0.70), SIMDE_FLOAT64_C( 18.48), SIMDE_FLOAT64_C( 32563.35), SIMDE_FLOAT64_C( 61.89), SIMDE_FLOAT64_C( 17826.79), SIMDE_FLOAT64_C( 3.25), SIMDE_FLOAT64_C(107785234.77) } }, { { SIMDE_FLOAT64_C( 4.27), SIMDE_FLOAT64_C( 4.69), SIMDE_FLOAT64_C( 8.66), SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( 5.42), SIMDE_FLOAT64_C( 8.96), SIMDE_FLOAT64_C( 2.86), SIMDE_FLOAT64_C( 0.72) }, { SIMDE_FLOAT64_C( 1.15), SIMDE_FLOAT64_C( 5.04), SIMDE_FLOAT64_C( 6.10), SIMDE_FLOAT64_C( 7.33), SIMDE_FLOAT64_C( 7.23), SIMDE_FLOAT64_C( 5.63), SIMDE_FLOAT64_C( 1.33), SIMDE_FLOAT64_C( 0.10) }, { SIMDE_FLOAT64_C( 5.31), SIMDE_FLOAT64_C( 2413.85), SIMDE_FLOAT64_C(523430.64), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(202681.84), SIMDE_FLOAT64_C(229876.25), SIMDE_FLOAT64_C( 4.05), SIMDE_FLOAT64_C( 0.97) } }, { { SIMDE_FLOAT64_C( 6.58), SIMDE_FLOAT64_C( 7.45), SIMDE_FLOAT64_C( 6.95), SIMDE_FLOAT64_C( 5.25), SIMDE_FLOAT64_C( 3.79), SIMDE_FLOAT64_C( 9.30), SIMDE_FLOAT64_C( 2.70), SIMDE_FLOAT64_C( 7.12) }, { SIMDE_FLOAT64_C( 6.34), SIMDE_FLOAT64_C( 4.32), SIMDE_FLOAT64_C( 2.52), SIMDE_FLOAT64_C( 8.25), SIMDE_FLOAT64_C( 9.61), SIMDE_FLOAT64_C( 3.90), SIMDE_FLOAT64_C( 7.46), SIMDE_FLOAT64_C( 3.88) }, { SIMDE_FLOAT64_C(154011.15), SIMDE_FLOAT64_C( 5857.54), SIMDE_FLOAT64_C( 132.37), SIMDE_FLOAT64_C(873603.27), SIMDE_FLOAT64_C(363682.84), SIMDE_FLOAT64_C( 5985.27), SIMDE_FLOAT64_C( 1651.86), SIMDE_FLOAT64_C( 2030.59) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__m512d r = simde_mm512_pow_pd(a, b); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_pow_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 b[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 0.55), SIMDE_FLOAT64_C( 1.66), SIMDE_FLOAT64_C( 8.31), SIMDE_FLOAT64_C( 9.30), SIMDE_FLOAT64_C( 8.14), SIMDE_FLOAT64_C( 3.76), SIMDE_FLOAT64_C( 2.75), SIMDE_FLOAT64_C( 2.84) }, UINT8_C(150), { SIMDE_FLOAT64_C( 0.55), SIMDE_FLOAT64_C( 1.52), SIMDE_FLOAT64_C( 1.22), SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 9.30), SIMDE_FLOAT64_C( 5.53), SIMDE_FLOAT64_C( 8.36), SIMDE_FLOAT64_C( 5.48) }, { SIMDE_FLOAT64_C( 1.36), SIMDE_FLOAT64_C( 7.20), SIMDE_FLOAT64_C( 8.45), SIMDE_FLOAT64_C( 1.77), SIMDE_FLOAT64_C( 6.75), SIMDE_FLOAT64_C( 6.44), SIMDE_FLOAT64_C( 2.61), SIMDE_FLOAT64_C( 0.07) }, { SIMDE_FLOAT64_C( 0.55), SIMDE_FLOAT64_C( 20.38), SIMDE_FLOAT64_C( 5.37), SIMDE_FLOAT64_C( 9.30), SIMDE_FLOAT64_C(3445560.68), SIMDE_FLOAT64_C( 3.76), SIMDE_FLOAT64_C( 2.75), SIMDE_FLOAT64_C( 1.13) } }, { { SIMDE_FLOAT64_C( 0.80), SIMDE_FLOAT64_C( 8.62), SIMDE_FLOAT64_C( 9.49), SIMDE_FLOAT64_C( 2.94), SIMDE_FLOAT64_C( 0.58), SIMDE_FLOAT64_C( 3.00), SIMDE_FLOAT64_C( 3.49), SIMDE_FLOAT64_C( 2.24) }, UINT8_C(147), { SIMDE_FLOAT64_C( 2.79), SIMDE_FLOAT64_C( 0.38), SIMDE_FLOAT64_C( 5.06), SIMDE_FLOAT64_C( 5.54), SIMDE_FLOAT64_C( 3.22), SIMDE_FLOAT64_C( 0.74), SIMDE_FLOAT64_C( 6.09), SIMDE_FLOAT64_C( 4.74) }, { SIMDE_FLOAT64_C( 1.96), SIMDE_FLOAT64_C( 7.66), SIMDE_FLOAT64_C( 4.04), SIMDE_FLOAT64_C( 7.49), SIMDE_FLOAT64_C( 6.02), SIMDE_FLOAT64_C( 9.52), SIMDE_FLOAT64_C( 8.85), SIMDE_FLOAT64_C( 3.22) }, { SIMDE_FLOAT64_C( 7.47), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 9.49), SIMDE_FLOAT64_C( 2.94), SIMDE_FLOAT64_C( 1141.02), SIMDE_FLOAT64_C( 3.00), SIMDE_FLOAT64_C( 3.49), SIMDE_FLOAT64_C( 149.97) } }, { { SIMDE_FLOAT64_C( 7.97), SIMDE_FLOAT64_C( 0.62), SIMDE_FLOAT64_C( 9.97), SIMDE_FLOAT64_C( 4.41), SIMDE_FLOAT64_C( 3.23), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 5.21), SIMDE_FLOAT64_C( 1.85) }, UINT8_C(167), { SIMDE_FLOAT64_C( 8.14), SIMDE_FLOAT64_C( 2.43), SIMDE_FLOAT64_C( 2.53), SIMDE_FLOAT64_C( 1.63), SIMDE_FLOAT64_C( 4.67), SIMDE_FLOAT64_C( 3.83), SIMDE_FLOAT64_C( 4.42), SIMDE_FLOAT64_C( 5.05) }, { SIMDE_FLOAT64_C( 8.89), SIMDE_FLOAT64_C( 9.96), SIMDE_FLOAT64_C( 8.27), SIMDE_FLOAT64_C( 9.63), SIMDE_FLOAT64_C( 6.05), SIMDE_FLOAT64_C( 3.01), SIMDE_FLOAT64_C( 1.59), SIMDE_FLOAT64_C( 3.71) }, { SIMDE_FLOAT64_C(124580755.27), SIMDE_FLOAT64_C( 6928.49), SIMDE_FLOAT64_C( 2156.78), SIMDE_FLOAT64_C( 4.41), SIMDE_FLOAT64_C( 3.23), SIMDE_FLOAT64_C( 56.94), SIMDE_FLOAT64_C( 5.21), SIMDE_FLOAT64_C( 406.64) } }, { { SIMDE_FLOAT64_C( 7.05), SIMDE_FLOAT64_C( 9.08), SIMDE_FLOAT64_C( 9.73), SIMDE_FLOAT64_C( 6.57), SIMDE_FLOAT64_C( 7.92), SIMDE_FLOAT64_C( 2.94), SIMDE_FLOAT64_C( 4.54), SIMDE_FLOAT64_C( 8.54) }, UINT8_C(148), { SIMDE_FLOAT64_C( 8.95), SIMDE_FLOAT64_C( 1.77), SIMDE_FLOAT64_C( 2.95), SIMDE_FLOAT64_C( 4.15), SIMDE_FLOAT64_C( 3.63), SIMDE_FLOAT64_C( 2.48), SIMDE_FLOAT64_C( 2.30), SIMDE_FLOAT64_C( 6.06) }, { SIMDE_FLOAT64_C( 5.01), SIMDE_FLOAT64_C( 3.93), SIMDE_FLOAT64_C( 0.73), SIMDE_FLOAT64_C( 8.84), SIMDE_FLOAT64_C( 8.35), SIMDE_FLOAT64_C( 5.77), SIMDE_FLOAT64_C( 7.74), SIMDE_FLOAT64_C( 8.32) }, { SIMDE_FLOAT64_C( 7.05), SIMDE_FLOAT64_C( 9.08), SIMDE_FLOAT64_C( 2.20), SIMDE_FLOAT64_C( 6.57), SIMDE_FLOAT64_C( 47339.14), SIMDE_FLOAT64_C( 2.94), SIMDE_FLOAT64_C( 4.54), SIMDE_FLOAT64_C(3237220.14) } }, { { SIMDE_FLOAT64_C( 4.04), SIMDE_FLOAT64_C( 7.37), SIMDE_FLOAT64_C( 4.37), SIMDE_FLOAT64_C( 7.05), SIMDE_FLOAT64_C( 8.95), SIMDE_FLOAT64_C( 8.08), SIMDE_FLOAT64_C( 4.10), SIMDE_FLOAT64_C( 8.03) }, UINT8_C(201), { SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 5.95), SIMDE_FLOAT64_C( 0.75), SIMDE_FLOAT64_C( 5.20), SIMDE_FLOAT64_C( 4.50), SIMDE_FLOAT64_C( 3.66), SIMDE_FLOAT64_C( 4.15), SIMDE_FLOAT64_C( 6.27) }, { SIMDE_FLOAT64_C( 6.61), SIMDE_FLOAT64_C( 8.31), SIMDE_FLOAT64_C( 9.90), SIMDE_FLOAT64_C( 9.09), SIMDE_FLOAT64_C( 0.60), SIMDE_FLOAT64_C( 5.95), SIMDE_FLOAT64_C( 4.10), SIMDE_FLOAT64_C( 4.53) }, { SIMDE_FLOAT64_C( 0.07), SIMDE_FLOAT64_C( 7.37), SIMDE_FLOAT64_C( 4.37), SIMDE_FLOAT64_C(3224559.49), SIMDE_FLOAT64_C( 8.95), SIMDE_FLOAT64_C( 8.08), SIMDE_FLOAT64_C( 341.98), SIMDE_FLOAT64_C( 4089.05) } }, { { SIMDE_FLOAT64_C( 6.68), SIMDE_FLOAT64_C( 2.94), SIMDE_FLOAT64_C( 2.89), SIMDE_FLOAT64_C( 2.45), SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( 1.20), SIMDE_FLOAT64_C( 6.49), SIMDE_FLOAT64_C( 8.05) }, UINT8_C( 44), { SIMDE_FLOAT64_C( 3.53), SIMDE_FLOAT64_C( 7.00), SIMDE_FLOAT64_C( 3.65), SIMDE_FLOAT64_C( 7.63), SIMDE_FLOAT64_C( 5.03), SIMDE_FLOAT64_C( 1.45), SIMDE_FLOAT64_C( 8.30), SIMDE_FLOAT64_C( 0.98) }, { SIMDE_FLOAT64_C( 2.20), SIMDE_FLOAT64_C( 3.50), SIMDE_FLOAT64_C( 5.47), SIMDE_FLOAT64_C( 5.86), SIMDE_FLOAT64_C( 7.66), SIMDE_FLOAT64_C( 1.74), SIMDE_FLOAT64_C( 2.46), SIMDE_FLOAT64_C( 5.96) }, { SIMDE_FLOAT64_C( 6.68), SIMDE_FLOAT64_C( 2.94), SIMDE_FLOAT64_C( 1190.53), SIMDE_FLOAT64_C(148454.65), SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( 1.91), SIMDE_FLOAT64_C( 6.49), SIMDE_FLOAT64_C( 8.05) } }, { { SIMDE_FLOAT64_C( 1.64), SIMDE_FLOAT64_C( 1.55), SIMDE_FLOAT64_C( 6.56), SIMDE_FLOAT64_C( 7.59), SIMDE_FLOAT64_C( 5.66), SIMDE_FLOAT64_C( 1.10), SIMDE_FLOAT64_C( 4.27), SIMDE_FLOAT64_C( 8.60) }, UINT8_C(119), { SIMDE_FLOAT64_C( 6.72), SIMDE_FLOAT64_C( 9.28), SIMDE_FLOAT64_C( 5.18), SIMDE_FLOAT64_C( 3.21), SIMDE_FLOAT64_C( 7.32), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 6.75), SIMDE_FLOAT64_C( 4.32) }, { SIMDE_FLOAT64_C( 4.41), SIMDE_FLOAT64_C( 4.38), SIMDE_FLOAT64_C( 9.35), SIMDE_FLOAT64_C( 5.86), SIMDE_FLOAT64_C( 2.68), SIMDE_FLOAT64_C( 0.33), SIMDE_FLOAT64_C( 8.06), SIMDE_FLOAT64_C( 6.18) }, { SIMDE_FLOAT64_C( 4453.47), SIMDE_FLOAT64_C( 17292.59), SIMDE_FLOAT64_C(4775108.60), SIMDE_FLOAT64_C( 7.59), SIMDE_FLOAT64_C( 207.44), SIMDE_FLOAT64_C( 0.91), SIMDE_FLOAT64_C(4832684.12), SIMDE_FLOAT64_C( 8.60) } }, { { SIMDE_FLOAT64_C( 5.80), SIMDE_FLOAT64_C( 3.91), SIMDE_FLOAT64_C( 3.84), SIMDE_FLOAT64_C( 7.54), SIMDE_FLOAT64_C( 6.38), SIMDE_FLOAT64_C( 9.80), SIMDE_FLOAT64_C( 9.18), SIMDE_FLOAT64_C( 7.93) }, UINT8_C(224), { SIMDE_FLOAT64_C( 6.78), SIMDE_FLOAT64_C( 3.59), SIMDE_FLOAT64_C( 7.46), SIMDE_FLOAT64_C( 1.05), SIMDE_FLOAT64_C( 2.19), SIMDE_FLOAT64_C( 1.44), SIMDE_FLOAT64_C( 7.77), SIMDE_FLOAT64_C( 1.46) }, { SIMDE_FLOAT64_C( 6.62), SIMDE_FLOAT64_C( 0.98), SIMDE_FLOAT64_C( 8.79), SIMDE_FLOAT64_C( 7.38), SIMDE_FLOAT64_C( 7.73), SIMDE_FLOAT64_C( 3.11), SIMDE_FLOAT64_C( 1.78), SIMDE_FLOAT64_C( 2.11) }, { SIMDE_FLOAT64_C( 5.80), SIMDE_FLOAT64_C( 3.91), SIMDE_FLOAT64_C( 3.84), SIMDE_FLOAT64_C( 7.54), SIMDE_FLOAT64_C( 6.38), SIMDE_FLOAT64_C( 3.11), SIMDE_FLOAT64_C( 38.45), SIMDE_FLOAT64_C( 2.22) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d b = simde_mm512_loadu_pd(test_vec[i].b); simde__m512d r = simde_mm512_mask_pow_pd(src, test_vec[i].k, a, b); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_rem_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi8(INT8_C( 104), INT8_C( 42), INT8_C( 53), INT8_C( -2), INT8_C(-124), INT8_C( -2), INT8_C( 96), INT8_C( 75), INT8_C( 31), INT8_C( 112), INT8_C(-105), INT8_C( -87), INT8_C( -84), INT8_C( 94), INT8_C( 112), INT8_C( 90)), simde_mm_set_epi8(INT8_C( -65), INT8_C( -89), INT8_C( -30), INT8_C( 64), INT8_C( -43), INT8_C( -54), INT8_C( 110), INT8_C( 113), INT8_C( 89), INT8_C( -19), INT8_C( 70), INT8_C( -30), INT8_C(-124), INT8_C( 91), INT8_C( -1), INT8_C( 88)), simde_mm_set_epi8(INT8_C( 39), INT8_C( 42), INT8_C( 23), INT8_C( -2), INT8_C( -38), INT8_C( -2), INT8_C( 96), INT8_C( 75), INT8_C( 31), INT8_C( 17), INT8_C( -35), INT8_C( -27), INT8_C( -84), INT8_C( 3), INT8_C( 0), INT8_C( 2)) }, { simde_mm_set_epi8(INT8_C( -23), INT8_C( -86), INT8_C( -15), INT8_C( 126), INT8_C( -74), INT8_C( 10), INT8_C( -48), INT8_C( -58), INT8_C( 93), INT8_C(-126), INT8_C( -61), INT8_C( -79), INT8_C( -69), INT8_C( -33), INT8_C(-117), INT8_C( -3)), simde_mm_set_epi8(INT8_C( 41), INT8_C( 49), INT8_C( -85), INT8_C( -58), INT8_C( 40), INT8_C( 44), INT8_C( -14), INT8_C( 51), INT8_C(-118), INT8_C( -39), INT8_C( -41), INT8_C( -7), INT8_C( -55), INT8_C( 37), INT8_C(-119), INT8_C( 29)), simde_mm_set_epi8(INT8_C( -23), INT8_C( -37), INT8_C( -15), INT8_C( 10), INT8_C( -34), INT8_C( 10), INT8_C( -6), INT8_C( -7), INT8_C( 93), INT8_C( -9), INT8_C( -20), INT8_C( -2), INT8_C( -14), INT8_C( -33), INT8_C(-117), INT8_C( -3)) }, { simde_mm_set_epi8(INT8_C( 88), INT8_C( -13), INT8_C( 83), INT8_C( -34), INT8_C( 17), INT8_C( -52), INT8_C( 102), INT8_C( 26), INT8_C( 74), INT8_C(-115), INT8_C( -4), INT8_C( 101), INT8_C( -39), INT8_C( 50), INT8_C( -9), INT8_C(-117)), simde_mm_set_epi8(INT8_C( 71), INT8_C( 16), INT8_C( 127), INT8_C( 20), INT8_C(-125), INT8_C( -92), INT8_C( -21), INT8_C( -43), INT8_C( 78), INT8_C( -41), INT8_C( -6), INT8_C( 42), INT8_C( 9), INT8_C( -58), INT8_C( 72), INT8_C( 56)), simde_mm_set_epi8(INT8_C( 17), INT8_C( -13), INT8_C( 83), INT8_C( -14), INT8_C( 17), INT8_C( -52), INT8_C( 18), INT8_C( 26), INT8_C( 74), INT8_C( -33), INT8_C( -4), INT8_C( 17), INT8_C( -3), INT8_C( 50), INT8_C( -9), INT8_C( -5)) }, { simde_mm_set_epi8(INT8_C( -95), INT8_C( 114), INT8_C(-111), INT8_C( 28), INT8_C( 100), INT8_C( -53), INT8_C( 101), INT8_C( 21), INT8_C( 3), INT8_C( 0), INT8_C( 63), INT8_C( 116), INT8_C( 43), INT8_C( 106), INT8_C( -29), INT8_C( -44)), simde_mm_set_epi8(INT8_C(-106), INT8_C( -49), INT8_C( 31), INT8_C(-118), INT8_C( 70), INT8_C( 80), INT8_C(-117), INT8_C( 103), INT8_C( -99), INT8_C( -33), INT8_C( 12), INT8_C( -74), INT8_C( -41), INT8_C( -14), INT8_C(-105), INT8_C( -57)), simde_mm_set_epi8(INT8_C( -95), INT8_C( 16), INT8_C( -18), INT8_C( 28), INT8_C( 30), INT8_C( -53), INT8_C( 101), INT8_C( 21), INT8_C( 3), INT8_C( 0), INT8_C( 3), INT8_C( 42), INT8_C( 2), INT8_C( 8), INT8_C( -29), INT8_C( -44)) }, { simde_mm_set_epi8(INT8_C( 29), INT8_C( 89), INT8_C( 4), INT8_C( 90), INT8_C( -1), INT8_C( 56), INT8_C( 40), INT8_C(-107), INT8_C(-125), INT8_C(-104), INT8_C( 36), INT8_C( -27), INT8_C( -21), INT8_C( -84), INT8_C( -95), INT8_C( -6)), simde_mm_set_epi8(INT8_C( 29), INT8_C( 101), INT8_C( 12), INT8_C( -7), INT8_C( -72), INT8_C( -61), INT8_C( -6), INT8_C( -43), INT8_C( 53), INT8_C( 76), INT8_C( -68), INT8_C( 25), INT8_C( -80), INT8_C( -78), INT8_C( -55), INT8_C( -12)), simde_mm_set_epi8(INT8_C( 0), INT8_C( 89), INT8_C( 4), INT8_C( 6), INT8_C( -1), INT8_C( 56), INT8_C( 4), INT8_C( -21), INT8_C( -19), INT8_C( -28), INT8_C( 36), INT8_C( -2), INT8_C( -21), INT8_C( -6), INT8_C( -40), INT8_C( -6)) }, { simde_mm_set_epi8(INT8_C( -60), INT8_C( 36), INT8_C( 35), INT8_C( 54), INT8_C( 94), INT8_C( 53), INT8_C(-124), INT8_C( -9), INT8_C( -29), INT8_C( -20), INT8_C( 32), INT8_C( 119), INT8_C( 124), INT8_C( 15), INT8_C( 15), INT8_C( -94)), simde_mm_set_epi8(INT8_C( 78), INT8_C( 89), INT8_C( 105), INT8_C( 98), INT8_C( -78), INT8_C( -83), INT8_C(-122), INT8_C( -57), INT8_C( -45), INT8_C( -13), INT8_C( -95), INT8_C( -36), INT8_C( -85), INT8_C( 107), INT8_C( 43), INT8_C( 1)), simde_mm_set_epi8(INT8_C( -60), INT8_C( 36), INT8_C( 35), INT8_C( 54), INT8_C( 16), INT8_C( 53), INT8_C( -2), INT8_C( -9), INT8_C( -29), INT8_C( -7), INT8_C( 32), INT8_C( 11), INT8_C( 39), INT8_C( 15), INT8_C( 15), INT8_C( 0)) }, { simde_mm_set_epi8(INT8_C( 32), INT8_C( 79), INT8_C( 19), INT8_C( 72), INT8_C( 29), INT8_C( -53), INT8_C( 79), INT8_C( -3), INT8_C( 57), INT8_C( 16), INT8_C( 99), INT8_C( 126), INT8_C( -77), INT8_C( 12), INT8_C( 100), INT8_C( 11)), simde_mm_set_epi8(INT8_C( 101), INT8_C( -18), INT8_C( -52), INT8_C(-126), INT8_C( 117), INT8_C( -86), INT8_C( -70), INT8_C( 72), INT8_C( -85), INT8_C( 25), INT8_C( -31), INT8_C( -92), INT8_C( 7), INT8_C( 17), INT8_C(-125), INT8_C( 67)), simde_mm_set_epi8(INT8_C( 32), INT8_C( 7), INT8_C( 19), INT8_C( 72), INT8_C( 29), INT8_C( -53), INT8_C( 9), INT8_C( -3), INT8_C( 57), INT8_C( 16), INT8_C( 6), INT8_C( 34), INT8_C( 0), INT8_C( 12), INT8_C( 100), INT8_C( 11)) }, { simde_mm_set_epi8(INT8_C( -12), INT8_C( 123), INT8_C( -45), INT8_C( -41), INT8_C( -52), INT8_C( -36), INT8_C( 31), INT8_C( -52), INT8_C( -27), INT8_C( 71), INT8_C( 9), INT8_C( -84), INT8_C( -96), INT8_C(-115), INT8_C( 31), INT8_C( 12)), simde_mm_set_epi8(INT8_C( -68), INT8_C( 29), INT8_C( -34), INT8_C( 81), INT8_C( -41), INT8_C( 10), INT8_C( -66), INT8_C( -37), INT8_C( 108), INT8_C( -9), INT8_C( -68), INT8_C( -41), INT8_C( -24), INT8_C( -55), INT8_C( -20), INT8_C( 9)), simde_mm_set_epi8(INT8_C( -12), INT8_C( 7), INT8_C( -11), INT8_C( -41), INT8_C( -11), INT8_C( -6), INT8_C( 31), INT8_C( -15), INT8_C( -27), INT8_C( 8), INT8_C( 9), INT8_C( -2), INT8_C( 0), INT8_C( -5), INT8_C( 11), INT8_C( 3)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_rem_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_rem_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi16(INT16_C( 26666), INT16_C( 13822), INT16_C(-31490), INT16_C( 24651), INT16_C( 8048), INT16_C(-26711), INT16_C(-21410), INT16_C( 28762)), simde_mm_set_epi16(INT16_C(-16473), INT16_C( -7616), INT16_C(-10806), INT16_C( 28273), INT16_C( 23021), INT16_C( 18146), INT16_C(-31653), INT16_C( -168)), simde_mm_set_epi16(INT16_C( 10193), INT16_C( 6206), INT16_C( -9878), INT16_C( 24651), INT16_C( 8048), INT16_C( -8565), INT16_C(-21410), INT16_C( 34)) }, { simde_mm_set_epi16(INT16_C( -5718), INT16_C( -3714), INT16_C(-18934), INT16_C(-12090), INT16_C( 23938), INT16_C(-15439), INT16_C(-17441), INT16_C(-29699)), simde_mm_set_epi16(INT16_C( 10545), INT16_C(-21562), INT16_C( 10284), INT16_C( -3533), INT16_C(-29991), INT16_C(-10247), INT16_C(-14043), INT16_C(-30435)), simde_mm_set_epi16(INT16_C( -5718), INT16_C( -3714), INT16_C( -8650), INT16_C( -1491), INT16_C( 23938), INT16_C( -5192), INT16_C( -3398), INT16_C(-29699)) }, { simde_mm_set_epi16(INT16_C( 22771), INT16_C( 21470), INT16_C( 4556), INT16_C( 26138), INT16_C( 19085), INT16_C( -923), INT16_C( -9934), INT16_C( -2165)), simde_mm_set_epi16(INT16_C( 18192), INT16_C( 32532), INT16_C(-31836), INT16_C( -5163), INT16_C( 20183), INT16_C( -1494), INT16_C( 2502), INT16_C( 18488)), simde_mm_set_epi16(INT16_C( 4579), INT16_C( 21470), INT16_C( 4556), INT16_C( 323), INT16_C( 19085), INT16_C( -923), INT16_C( -2428), INT16_C( -2165)) }, { simde_mm_set_epi16(INT16_C(-24206), INT16_C(-28388), INT16_C( 25803), INT16_C( 25877), INT16_C( 768), INT16_C( 16244), INT16_C( 11114), INT16_C( -7212)), simde_mm_set_epi16(INT16_C(-26929), INT16_C( 8074), INT16_C( 18000), INT16_C(-29849), INT16_C(-25121), INT16_C( 3254), INT16_C(-10254), INT16_C(-26681)), simde_mm_set_epi16(INT16_C(-24206), INT16_C( -4166), INT16_C( 7803), INT16_C( 25877), INT16_C( 768), INT16_C( 3228), INT16_C( 860), INT16_C( -7212)) }, { simde_mm_set_epi16(INT16_C( 7513), INT16_C( 1114), INT16_C( -200), INT16_C( 10389), INT16_C(-31848), INT16_C( 9445), INT16_C( -5204), INT16_C(-24070)), simde_mm_set_epi16(INT16_C( 7525), INT16_C( 3321), INT16_C(-18237), INT16_C( -1323), INT16_C( 13644), INT16_C(-17383), INT16_C(-20302), INT16_C(-13836)), simde_mm_set_epi16(INT16_C( 7513), INT16_C( 1114), INT16_C( -200), INT16_C( 1128), INT16_C( -4560), INT16_C( 9445), INT16_C( -5204), INT16_C(-10234)) }, { simde_mm_set_epi16(INT16_C(-15324), INT16_C( 9014), INT16_C( 24117), INT16_C(-31497), INT16_C( -7188), INT16_C( 8311), INT16_C( 31759), INT16_C( 4002)), simde_mm_set_epi16(INT16_C( 20057), INT16_C( 26978), INT16_C(-19795), INT16_C(-31033), INT16_C(-11277), INT16_C(-24100), INT16_C(-21653), INT16_C( 11009)), simde_mm_set_epi16(INT16_C(-15324), INT16_C( 9014), INT16_C( 4322), INT16_C( -464), INT16_C( -7188), INT16_C( 8311), INT16_C( 10106), INT16_C( 4002)) }, { simde_mm_set_epi16(INT16_C( 8271), INT16_C( 4936), INT16_C( 7627), INT16_C( 20477), INT16_C( 14608), INT16_C( 25470), INT16_C(-19700), INT16_C( 25611)), simde_mm_set_epi16(INT16_C( 26094), INT16_C(-13182), INT16_C( 30122), INT16_C(-17848), INT16_C(-21735), INT16_C( -7772), INT16_C( 1809), INT16_C(-31933)), simde_mm_set_epi16(INT16_C( 8271), INT16_C( 4936), INT16_C( 7627), INT16_C( 2629), INT16_C( 14608), INT16_C( 2154), INT16_C( -1610), INT16_C( 25611)) }, { simde_mm_set_epi16(INT16_C( -2949), INT16_C(-11305), INT16_C(-13092), INT16_C( 8140), INT16_C( -6841), INT16_C( 2476), INT16_C(-24435), INT16_C( 7948)), simde_mm_set_epi16(INT16_C(-17379), INT16_C( -8623), INT16_C(-10486), INT16_C(-16677), INT16_C( 27895), INT16_C(-17193), INT16_C( -5943), INT16_C( -5111)), simde_mm_set_epi16(INT16_C( -2949), INT16_C( -2682), INT16_C( -2606), INT16_C( 8140), INT16_C( -6841), INT16_C( 2476), INT16_C( -663), INT16_C( 2837)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_rem_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_rem_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi32(INT32_C( 1747596798), INT32_C(-2063703989), INT32_C( 527472553), INT32_C(-1403096998)), simde_mm_set_epi32(INT32_C(-1079516608), INT32_C( -708153743), INT32_C( 1508722402), INT32_C(-2074345640)), simde_mm_set_epi32(INT32_C( 668080190), INT32_C( -647396503), INT32_C( 527472553), INT32_C(-1403096998)) }, { simde_mm_set_epi32(INT32_C( -374673026), INT32_C(-1240805178), INT32_C( 1568850865), INT32_C(-1142977539)), simde_mm_set_epi32(INT32_C( 691121094), INT32_C( 674034227), INT32_C(-1965434887), INT32_C( -920286947)), simde_mm_set_epi32(INT32_C( -374673026), INT32_C( -566770951), INT32_C( 1568850865), INT32_C( -222690592)) }, { simde_mm_set_epi32(INT32_C( 1492341726), INT32_C( 298608154), INT32_C( 1250819173), INT32_C( -650971253)), simde_mm_set_epi32(INT32_C( 1192263444), INT32_C(-2086343723), INT32_C( 1322777130), INT32_C( 163989560)), simde_mm_set_epi32(INT32_C( 300078282), INT32_C( 298608154), INT32_C( 1250819173), INT32_C( -159002573)) }, { simde_mm_set_epi32(INT32_C(-1586327268), INT32_C( 1691051285), INT32_C( 50347892), INT32_C( 728425428)), simde_mm_set_epi32(INT32_C(-1764810870), INT32_C( 1179683687), INT32_C(-1646326602), INT32_C( -671967289)), simde_mm_set_epi32(INT32_C(-1586327268), INT32_C( 511367598), INT32_C( 50347892), INT32_C( 56458139)) }, { simde_mm_set_epi32(INT32_C( 492373082), INT32_C( -13096811), INT32_C(-2087181083), INT32_C( -341007878)), simde_mm_set_epi32(INT32_C( 493161721), INT32_C(-1195115819), INT32_C( 894221337), INT32_C(-1330460172)), simde_mm_set_epi32(INT32_C( 492373082), INT32_C( -13096811), INT32_C( -298738409), INT32_C( -341007878)) }, { simde_mm_set_epi32(INT32_C(-1004264650), INT32_C( 1580565751), INT32_C( -471064457), INT32_C( 2081361826)), simde_mm_set_epi32(INT32_C( 1314482530), INT32_C(-1297250617), INT32_C( -739008036), INT32_C(-1419039999)), simde_mm_set_epi32(INT32_C(-1004264650), INT32_C( 283315134), INT32_C( -471064457), INT32_C( 662321827)) }, { simde_mm_set_epi32(INT32_C( 542053192), INT32_C( 499863549), INT32_C( 957375358), INT32_C(-1291033589)), simde_mm_set_epi32(INT32_C( 1710148738), INT32_C( 1974123080), INT32_C(-1424367196), INT32_C( 118588227)), simde_mm_set_epi32(INT32_C( 542053192), INT32_C( 499863549), INT32_C( 957375358), INT32_C( -105151319)) }, { simde_mm_set_epi32(INT32_C( -193211433), INT32_C( -857989172), INT32_C( -448329300), INT32_C(-1601364212)), simde_mm_set_epi32(INT32_C(-1138893231), INT32_C( -687161637), INT32_C( 1828175063), INT32_C( -389420023)), simde_mm_set_epi32(INT32_C( -193211433), INT32_C( -170827535), INT32_C( -448329300), INT32_C( -43684120)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_rem_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_rem_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_mm_set_epi64x(INT64_C( 7505871096235581515), INT64_C( 2265477367564496986)), simde_mm_set_epi64x(INT64_C(-4636488523262038415), INT64_C( 6479913377553186648)), simde_mm_set_epi64x(INT64_C( 2869382572973543100), INT64_C( 2265477367564496986)) }, { simde_mm_set_epi64x(INT64_C(-1609208390309195578), INT64_C( 6738163160628300797)), simde_mm_set_epi64x(INT64_C( 2968342496979776051), INT64_C(-8441478558707775203)), simde_mm_set_epi64x(INT64_C(-1609208390309195578), INT64_C( 6738163160628300797)) }, { simde_mm_set_epi64x(INT64_C( 6409558907924801050), INT64_C( 5372227444888762251)), simde_mm_set_epi64x(INT64_C( 5120732502404950997), INT64_C( 5681284513410730040)), simde_mm_set_epi64x(INT64_C( 1288826405519850053), INT64_C( 5372227444888762251)) }, { simde_mm_set_epi64x(INT64_C(-6813223735121976043), INT64_C( 216242550290965460)), simde_mm_set_epi64x(INT64_C(-7579804969095623833), INT64_C(-7070918910501808185)), simde_mm_set_epi64x(INT64_C(-6813223735121976043), INT64_C( 216242550290965460)) }, { simde_mm_set_epi64x(INT64_C( 2114726288902596757), INT64_C(-8964374488360902150)), simde_mm_set_epi64x(INT64_C( 2118113466433927893), INT64_C( 3840651400764901876)), simde_mm_set_epi64x(INT64_C( 2114726288902596757), INT64_C(-1283071686831098398)) }, { simde_mm_set_epi64x(INT64_C(-4313283826698320649), INT64_C(-2023206435041636446)), simde_mm_set_epi64x(INT64_C( 5645659480511055559), INT64_C(-3174015343225263359)), simde_mm_set_epi64x(INT64_C(-4313283826698320649), INT64_C(-2023206435041636446)) }, { simde_mm_set_epi64x(INT64_C( 2328100732832272381), INT64_C( 4111895855610225675)), simde_mm_set_epi64x(INT64_C( 7345032902979795528), INT64_C(-6117610524196633789)), simde_mm_set_epi64x(INT64_C( 2328100732832272381), INT64_C( 4111895855610225675)) }, { simde_mm_set_epi64x(INT64_C( -829836782511317044), INT64_C(-1925559678644969716)), simde_mm_set_epi64x(INT64_C(-4891509177172967717), INT64_C( 7851952110853286921)), simde_mm_set_epi64x(INT64_C( -829836782511317044), INT64_C(-1925559678644969716)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_rem_epi64(test_vec[i].a, test_vec[i].b); simde_assert_m128i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_rem_epu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu8(UINT8_C(104), UINT8_C( 42), UINT8_C( 53), UINT8_C(254), UINT8_C(132), UINT8_C(254), UINT8_C( 96), UINT8_C( 75), UINT8_C( 31), UINT8_C(112), UINT8_C(151), UINT8_C(169), UINT8_C(172), UINT8_C( 94), UINT8_C(112), UINT8_C( 90)), simde_x_mm_set_epu8(UINT8_C(191), UINT8_C(167), UINT8_C(226), UINT8_C( 64), UINT8_C(213), UINT8_C(202), UINT8_C(110), UINT8_C(113), UINT8_C( 89), UINT8_C(237), UINT8_C( 70), UINT8_C(226), UINT8_C(132), UINT8_C( 91), UINT8_C(255), UINT8_C( 88)), simde_x_mm_set_epu8(UINT8_C(104), UINT8_C( 42), UINT8_C( 53), UINT8_C( 62), UINT8_C(132), UINT8_C( 52), UINT8_C( 96), UINT8_C( 75), UINT8_C( 31), UINT8_C(112), UINT8_C( 11), UINT8_C(169), UINT8_C( 40), UINT8_C( 3), UINT8_C(112), UINT8_C( 2)) }, { simde_x_mm_set_epu8(UINT8_C(233), UINT8_C(170), UINT8_C(241), UINT8_C(126), UINT8_C(182), UINT8_C( 10), UINT8_C(208), UINT8_C(198), UINT8_C( 93), UINT8_C(130), UINT8_C(195), UINT8_C(177), UINT8_C(187), UINT8_C(223), UINT8_C(139), UINT8_C(253)), simde_x_mm_set_epu8(UINT8_C( 41), UINT8_C( 49), UINT8_C(171), UINT8_C(198), UINT8_C( 40), UINT8_C( 44), UINT8_C(242), UINT8_C( 51), UINT8_C(138), UINT8_C(217), UINT8_C(215), UINT8_C(249), UINT8_C(201), UINT8_C( 37), UINT8_C(137), UINT8_C( 29)), simde_x_mm_set_epu8(UINT8_C( 28), UINT8_C( 23), UINT8_C( 70), UINT8_C(126), UINT8_C( 22), UINT8_C( 10), UINT8_C(208), UINT8_C( 45), UINT8_C( 93), UINT8_C(130), UINT8_C(195), UINT8_C(177), UINT8_C(187), UINT8_C( 1), UINT8_C( 2), UINT8_C( 21)) }, { simde_x_mm_set_epu8(UINT8_C( 88), UINT8_C(243), UINT8_C( 83), UINT8_C(222), UINT8_C( 17), UINT8_C(204), UINT8_C(102), UINT8_C( 26), UINT8_C( 74), UINT8_C(141), UINT8_C(252), UINT8_C(101), UINT8_C(217), UINT8_C( 50), UINT8_C(247), UINT8_C(139)), simde_x_mm_set_epu8(UINT8_C( 71), UINT8_C( 16), UINT8_C(127), UINT8_C( 20), UINT8_C(131), UINT8_C(164), UINT8_C(235), UINT8_C(213), UINT8_C( 78), UINT8_C(215), UINT8_C(250), UINT8_C( 42), UINT8_C( 9), UINT8_C(198), UINT8_C( 72), UINT8_C( 56)), simde_x_mm_set_epu8(UINT8_C( 17), UINT8_C( 3), UINT8_C( 83), UINT8_C( 2), UINT8_C( 17), UINT8_C( 40), UINT8_C(102), UINT8_C( 26), UINT8_C( 74), UINT8_C(141), UINT8_C( 2), UINT8_C( 17), UINT8_C( 1), UINT8_C( 50), UINT8_C( 31), UINT8_C( 27)) }, { simde_x_mm_set_epu8(UINT8_C(161), UINT8_C(114), UINT8_C(145), UINT8_C( 28), UINT8_C(100), UINT8_C(203), UINT8_C(101), UINT8_C( 21), UINT8_C( 3), UINT8_C( 0), UINT8_C( 63), UINT8_C(116), UINT8_C( 43), UINT8_C(106), UINT8_C(227), UINT8_C(212)), simde_x_mm_set_epu8(UINT8_C(150), UINT8_C(207), UINT8_C( 31), UINT8_C(138), UINT8_C( 70), UINT8_C( 80), UINT8_C(139), UINT8_C(103), UINT8_C(157), UINT8_C(223), UINT8_C( 12), UINT8_C(182), UINT8_C(215), UINT8_C(242), UINT8_C(151), UINT8_C(199)), simde_x_mm_set_epu8(UINT8_C( 11), UINT8_C(114), UINT8_C( 21), UINT8_C( 28), UINT8_C( 30), UINT8_C( 43), UINT8_C(101), UINT8_C( 21), UINT8_C( 3), UINT8_C( 0), UINT8_C( 3), UINT8_C(116), UINT8_C( 43), UINT8_C(106), UINT8_C( 76), UINT8_C( 13)) }, { simde_x_mm_set_epu8(UINT8_C( 29), UINT8_C( 89), UINT8_C( 4), UINT8_C( 90), UINT8_C(255), UINT8_C( 56), UINT8_C( 40), UINT8_C(149), UINT8_C(131), UINT8_C(152), UINT8_C( 36), UINT8_C(229), UINT8_C(235), UINT8_C(172), UINT8_C(161), UINT8_C(250)), simde_x_mm_set_epu8(UINT8_C( 29), UINT8_C(101), UINT8_C( 12), UINT8_C(249), UINT8_C(184), UINT8_C(195), UINT8_C(250), UINT8_C(213), UINT8_C( 53), UINT8_C( 76), UINT8_C(188), UINT8_C( 25), UINT8_C(176), UINT8_C(178), UINT8_C(201), UINT8_C(244)), simde_x_mm_set_epu8(UINT8_C( 0), UINT8_C( 89), UINT8_C( 4), UINT8_C( 90), UINT8_C( 71), UINT8_C( 56), UINT8_C( 40), UINT8_C(149), UINT8_C( 25), UINT8_C( 0), UINT8_C( 36), UINT8_C( 4), UINT8_C( 59), UINT8_C(172), UINT8_C(161), UINT8_C( 6)) }, { simde_x_mm_set_epu8(UINT8_C(196), UINT8_C( 36), UINT8_C( 35), UINT8_C( 54), UINT8_C( 94), UINT8_C( 53), UINT8_C(132), UINT8_C(247), UINT8_C(227), UINT8_C(236), UINT8_C( 32), UINT8_C(119), UINT8_C(124), UINT8_C( 15), UINT8_C( 15), UINT8_C(162)), simde_x_mm_set_epu8(UINT8_C( 78), UINT8_C( 89), UINT8_C(105), UINT8_C( 98), UINT8_C(178), UINT8_C(173), UINT8_C(134), UINT8_C(199), UINT8_C(211), UINT8_C(243), UINT8_C(161), UINT8_C(220), UINT8_C(171), UINT8_C(107), UINT8_C( 43), UINT8_C( 1)), simde_x_mm_set_epu8(UINT8_C( 40), UINT8_C( 36), UINT8_C( 35), UINT8_C( 54), UINT8_C( 94), UINT8_C( 53), UINT8_C(132), UINT8_C( 48), UINT8_C( 16), UINT8_C(236), UINT8_C( 32), UINT8_C(119), UINT8_C(124), UINT8_C( 15), UINT8_C( 15), UINT8_C( 0)) }, { simde_x_mm_set_epu8(UINT8_C( 32), UINT8_C( 79), UINT8_C( 19), UINT8_C( 72), UINT8_C( 29), UINT8_C(203), UINT8_C( 79), UINT8_C(253), UINT8_C( 57), UINT8_C( 16), UINT8_C( 99), UINT8_C(126), UINT8_C(179), UINT8_C( 12), UINT8_C(100), UINT8_C( 11)), simde_x_mm_set_epu8(UINT8_C(101), UINT8_C(238), UINT8_C(204), UINT8_C(130), UINT8_C(117), UINT8_C(170), UINT8_C(186), UINT8_C( 72), UINT8_C(171), UINT8_C( 25), UINT8_C(225), UINT8_C(164), UINT8_C( 7), UINT8_C( 17), UINT8_C(131), UINT8_C( 67)), simde_x_mm_set_epu8(UINT8_C( 32), UINT8_C( 79), UINT8_C( 19), UINT8_C( 72), UINT8_C( 29), UINT8_C( 33), UINT8_C( 79), UINT8_C( 37), UINT8_C( 57), UINT8_C( 16), UINT8_C( 99), UINT8_C(126), UINT8_C( 4), UINT8_C( 12), UINT8_C(100), UINT8_C( 11)) }, { simde_x_mm_set_epu8(UINT8_C(244), UINT8_C(123), UINT8_C(211), UINT8_C(215), UINT8_C(204), UINT8_C(220), UINT8_C( 31), UINT8_C(204), UINT8_C(229), UINT8_C( 71), UINT8_C( 9), UINT8_C(172), UINT8_C(160), UINT8_C(141), UINT8_C( 31), UINT8_C( 12)), simde_x_mm_set_epu8(UINT8_C(188), UINT8_C( 29), UINT8_C(222), UINT8_C( 81), UINT8_C(215), UINT8_C( 10), UINT8_C(190), UINT8_C(219), UINT8_C(108), UINT8_C(247), UINT8_C(188), UINT8_C(215), UINT8_C(232), UINT8_C(201), UINT8_C(236), UINT8_C( 9)), simde_x_mm_set_epu8(UINT8_C( 56), UINT8_C( 7), UINT8_C(211), UINT8_C( 53), UINT8_C(204), UINT8_C( 0), UINT8_C( 31), UINT8_C(204), UINT8_C( 13), UINT8_C( 71), UINT8_C( 9), UINT8_C(172), UINT8_C(160), UINT8_C(141), UINT8_C( 31), UINT8_C( 3)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_rem_epu8(test_vec[i].a, test_vec[i].b); simde_assert_m128i_u8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_rem_epu16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu16(UINT16_C(26666), UINT16_C(13822), UINT16_C(34046), UINT16_C(24651), UINT16_C( 8048), UINT16_C(38825), UINT16_C(44126), UINT16_C(28762)), simde_x_mm_set_epu16(UINT16_C(49063), UINT16_C(57920), UINT16_C(54730), UINT16_C(28273), UINT16_C(23021), UINT16_C(18146), UINT16_C(33883), UINT16_C(65368)), simde_x_mm_set_epu16(UINT16_C(26666), UINT16_C(13822), UINT16_C(34046), UINT16_C(24651), UINT16_C( 8048), UINT16_C( 2533), UINT16_C(10243), UINT16_C(28762)) }, { simde_x_mm_set_epu16(UINT16_C(59818), UINT16_C(61822), UINT16_C(46602), UINT16_C(53446), UINT16_C(23938), UINT16_C(50097), UINT16_C(48095), UINT16_C(35837)), simde_x_mm_set_epu16(UINT16_C(10545), UINT16_C(43974), UINT16_C(10284), UINT16_C(62003), UINT16_C(35545), UINT16_C(55289), UINT16_C(51493), UINT16_C(35101)), simde_x_mm_set_epu16(UINT16_C( 7093), UINT16_C(17848), UINT16_C( 5466), UINT16_C(53446), UINT16_C(23938), UINT16_C(50097), UINT16_C(48095), UINT16_C( 736)) }, { simde_x_mm_set_epu16(UINT16_C(22771), UINT16_C(21470), UINT16_C( 4556), UINT16_C(26138), UINT16_C(19085), UINT16_C(64613), UINT16_C(55602), UINT16_C(63371)), simde_x_mm_set_epu16(UINT16_C(18192), UINT16_C(32532), UINT16_C(33700), UINT16_C(60373), UINT16_C(20183), UINT16_C(64042), UINT16_C( 2502), UINT16_C(18488)), simde_x_mm_set_epu16(UINT16_C( 4579), UINT16_C(21470), UINT16_C( 4556), UINT16_C(26138), UINT16_C(19085), UINT16_C( 571), UINT16_C( 558), UINT16_C( 7907)) }, { simde_x_mm_set_epu16(UINT16_C(41330), UINT16_C(37148), UINT16_C(25803), UINT16_C(25877), UINT16_C( 768), UINT16_C(16244), UINT16_C(11114), UINT16_C(58324)), simde_x_mm_set_epu16(UINT16_C(38607), UINT16_C( 8074), UINT16_C(18000), UINT16_C(35687), UINT16_C(40415), UINT16_C( 3254), UINT16_C(55282), UINT16_C(38855)), simde_x_mm_set_epu16(UINT16_C( 2723), UINT16_C( 4852), UINT16_C( 7803), UINT16_C(25877), UINT16_C( 768), UINT16_C( 3228), UINT16_C(11114), UINT16_C(19469)) }, { simde_x_mm_set_epu16(UINT16_C( 7513), UINT16_C( 1114), UINT16_C(65336), UINT16_C(10389), UINT16_C(33688), UINT16_C( 9445), UINT16_C(60332), UINT16_C(41466)), simde_x_mm_set_epu16(UINT16_C( 7525), UINT16_C( 3321), UINT16_C(47299), UINT16_C(64213), UINT16_C(13644), UINT16_C(48153), UINT16_C(45234), UINT16_C(51700)), simde_x_mm_set_epu16(UINT16_C( 7513), UINT16_C( 1114), UINT16_C(18037), UINT16_C(10389), UINT16_C( 6400), UINT16_C( 9445), UINT16_C(15098), UINT16_C(41466)) }, { simde_x_mm_set_epu16(UINT16_C(50212), UINT16_C( 9014), UINT16_C(24117), UINT16_C(34039), UINT16_C(58348), UINT16_C( 8311), UINT16_C(31759), UINT16_C( 4002)), simde_x_mm_set_epu16(UINT16_C(20057), UINT16_C(26978), UINT16_C(45741), UINT16_C(34503), UINT16_C(54259), UINT16_C(41436), UINT16_C(43883), UINT16_C(11009)), simde_x_mm_set_epu16(UINT16_C(10098), UINT16_C( 9014), UINT16_C(24117), UINT16_C(34039), UINT16_C( 4089), UINT16_C( 8311), UINT16_C(31759), UINT16_C( 4002)) }, { simde_x_mm_set_epu16(UINT16_C( 8271), UINT16_C( 4936), UINT16_C( 7627), UINT16_C(20477), UINT16_C(14608), UINT16_C(25470), UINT16_C(45836), UINT16_C(25611)), simde_x_mm_set_epu16(UINT16_C(26094), UINT16_C(52354), UINT16_C(30122), UINT16_C(47688), UINT16_C(43801), UINT16_C(57764), UINT16_C( 1809), UINT16_C(33603)), simde_x_mm_set_epu16(UINT16_C( 8271), UINT16_C( 4936), UINT16_C( 7627), UINT16_C(20477), UINT16_C(14608), UINT16_C(25470), UINT16_C( 611), UINT16_C(25611)) }, { simde_x_mm_set_epu16(UINT16_C(62587), UINT16_C(54231), UINT16_C(52444), UINT16_C( 8140), UINT16_C(58695), UINT16_C( 2476), UINT16_C(41101), UINT16_C( 7948)), simde_x_mm_set_epu16(UINT16_C(48157), UINT16_C(56913), UINT16_C(55050), UINT16_C(48859), UINT16_C(27895), UINT16_C(48343), UINT16_C(59593), UINT16_C(60425)), simde_x_mm_set_epu16(UINT16_C(14430), UINT16_C(54231), UINT16_C(52444), UINT16_C( 8140), UINT16_C( 2905), UINT16_C( 2476), UINT16_C(41101), UINT16_C( 7948)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_rem_epu16(test_vec[i].a, test_vec[i].b); simde_assert_m128i_u16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_rem_epu32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu32(UINT32_C(1747596798), UINT32_C(2231263307), UINT32_C( 527472553), UINT32_C(2891870298)), simde_x_mm_set_epu32(UINT32_C(3215450688), UINT32_C(3586813553), UINT32_C(1508722402), UINT32_C(2220621656)), simde_x_mm_set_epu32(UINT32_C(1747596798), UINT32_C(2231263307), UINT32_C( 527472553), UINT32_C( 671248642)) }, { simde_x_mm_set_epu32(UINT32_C(3920294270), UINT32_C(3054162118), UINT32_C(1568850865), UINT32_C(3151989757)), simde_x_mm_set_epu32(UINT32_C( 691121094), UINT32_C( 674034227), UINT32_C(2329532409), UINT32_C(3374680349)), simde_x_mm_set_epu32(UINT32_C( 464688800), UINT32_C( 358025210), UINT32_C(1568850865), UINT32_C(3151989757)) }, { simde_x_mm_set_epu32(UINT32_C(1492341726), UINT32_C( 298608154), UINT32_C(1250819173), UINT32_C(3643996043)), simde_x_mm_set_epu32(UINT32_C(1192263444), UINT32_C(2208623573), UINT32_C(1322777130), UINT32_C( 163989560)), simde_x_mm_set_epu32(UINT32_C( 300078282), UINT32_C( 298608154), UINT32_C(1250819173), UINT32_C( 36225723)) }, { simde_x_mm_set_epu32(UINT32_C(2708640028), UINT32_C(1691051285), UINT32_C( 50347892), UINT32_C( 728425428)), simde_x_mm_set_epu32(UINT32_C(2530156426), UINT32_C(1179683687), UINT32_C(2648640694), UINT32_C(3623000007)), simde_x_mm_set_epu32(UINT32_C( 178483602), UINT32_C( 511367598), UINT32_C( 50347892), UINT32_C( 728425428)) }, { simde_x_mm_set_epu32(UINT32_C( 492373082), UINT32_C(4281870485), UINT32_C(2207786213), UINT32_C(3953959418)), simde_x_mm_set_epu32(UINT32_C( 493161721), UINT32_C(3099851477), UINT32_C( 894221337), UINT32_C(2964507124)), simde_x_mm_set_epu32(UINT32_C( 492373082), UINT32_C(1182019008), UINT32_C( 419343539), UINT32_C( 989452294)) }, { simde_x_mm_set_epu32(UINT32_C(3290702646), UINT32_C(1580565751), UINT32_C(3823902839), UINT32_C(2081361826)), simde_x_mm_set_epu32(UINT32_C(1314482530), UINT32_C(2997716679), UINT32_C(3555959260), UINT32_C(2875927297)), simde_x_mm_set_epu32(UINT32_C( 661737586), UINT32_C(1580565751), UINT32_C( 267943579), UINT32_C(2081361826)) }, { simde_x_mm_set_epu32(UINT32_C( 542053192), UINT32_C( 499863549), UINT32_C( 957375358), UINT32_C(3003933707)), simde_x_mm_set_epu32(UINT32_C(1710148738), UINT32_C(1974123080), UINT32_C(2870600100), UINT32_C( 118588227)), simde_x_mm_set_epu32(UINT32_C( 542053192), UINT32_C( 499863549), UINT32_C( 957375358), UINT32_C( 39228032)) }, { simde_x_mm_set_epu32(UINT32_C(4101755863), UINT32_C(3436978124), UINT32_C(3846637996), UINT32_C(2693603084)), simde_x_mm_set_epu32(UINT32_C(3156074065), UINT32_C(3607805659), UINT32_C(1828175063), UINT32_C(3905547273)), simde_x_mm_set_epu32(UINT32_C( 945681798), UINT32_C(3436978124), UINT32_C( 190287870), UINT32_C(2693603084)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_rem_epu32(test_vec[i].a, test_vec[i].b); simde_assert_m128i_u32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm_rem_epu64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu64x(UINT64_C( 7505871096235581515), UINT64_C( 2265477367564496986)), simde_x_mm_set_epu64x(UINT64_C(13810255550447513201), UINT64_C( 6479913377553186648)), simde_x_mm_set_epu64x(UINT64_C( 7505871096235581515), UINT64_C( 2265477367564496986)) }, { simde_x_mm_set_epu64x(UINT64_C(16837535683400356038), UINT64_C( 6738163160628300797)), simde_x_mm_set_epu64x(UINT64_C( 2968342496979776051), UINT64_C(10005265515001776413)), simde_x_mm_set_epu64x(UINT64_C( 1995823198501475783), UINT64_C( 6738163160628300797)) }, { simde_x_mm_set_epu64x(UINT64_C( 6409558907924801050), UINT64_C( 5372227444888762251)), simde_x_mm_set_epu64x(UINT64_C( 5120732502404950997), UINT64_C( 5681284513410730040)), simde_x_mm_set_epu64x(UINT64_C( 1288826405519850053), UINT64_C( 5372227444888762251)) }, { simde_x_mm_set_epu64x(UINT64_C(11633520338587575573), UINT64_C( 216242550290965460)), simde_x_mm_set_epu64x(UINT64_C(10866939104613927783), UINT64_C(11375825163207743431)), simde_x_mm_set_epu64x(UINT64_C( 766581233973647790), UINT64_C( 216242550290965460)) }, { simde_x_mm_set_epu64x(UINT64_C( 2114726288902596757), UINT64_C( 9482369585348649466)), simde_x_mm_set_epu64x(UINT64_C( 2118113466433927893), UINT64_C( 3840651400764901876)), simde_x_mm_set_epu64x(UINT64_C( 2114726288902596757), UINT64_C( 1801066783818845714)) }, { simde_x_mm_set_epu64x(UINT64_C(14133460247011230967), UINT64_C(16423537638667915170)), simde_x_mm_set_epu64x(UINT64_C( 5645659480511055559), UINT64_C(15272728730484288257)), simde_x_mm_set_epu64x(UINT64_C( 2842141285989119849), UINT64_C( 1150808908183626913)) }, { simde_x_mm_set_epu64x(UINT64_C( 2328100732832272381), UINT64_C( 4111895855610225675)), simde_x_mm_set_epu64x(UINT64_C( 7345032902979795528), UINT64_C(12329133549512917827)), simde_x_mm_set_epu64x(UINT64_C( 2328100732832272381), UINT64_C( 4111895855610225675)) }, { simde_x_mm_set_epu64x(UINT64_C(17616907291198234572), UINT64_C(16521184395064581900)), simde_x_mm_set_epu64x(UINT64_C(13555234896536583899), UINT64_C( 7851952110853286921)), simde_x_mm_set_epu64x(UINT64_C( 4061672394661650673), UINT64_C( 817280173358008058)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i r = simde_mm_rem_epu64(test_vec[i].a, test_vec[i].b); simde_assert_m128i_u64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_rem_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi8(INT8_C( -65), INT8_C( -89), INT8_C( -30), INT8_C( 64), INT8_C( -43), INT8_C( -54), INT8_C( 110), INT8_C( 113), INT8_C( 89), INT8_C( -19), INT8_C( 70), INT8_C( -30), INT8_C(-124), INT8_C( 91), INT8_C( -1), INT8_C( 88), INT8_C( 104), INT8_C( 42), INT8_C( 53), INT8_C( -2), INT8_C(-124), INT8_C( -2), INT8_C( 96), INT8_C( 75), INT8_C( 31), INT8_C( 112), INT8_C(-105), INT8_C( -87), INT8_C( -84), INT8_C( 94), INT8_C( 112), INT8_C( 90)), simde_mm256_set_epi8(INT8_C( 121), INT8_C( 85), INT8_C(-103), INT8_C( 116), INT8_C( -38), INT8_C( 21), INT8_C( 101), INT8_C( 122), INT8_C( 10), INT8_C( -25), INT8_C( 54), INT8_C( 71), INT8_C(-100), INT8_C(-107), INT8_C( -12), INT8_C( 84), INT8_C(-108), INT8_C( 85), INT8_C( -86), INT8_C( -72), INT8_C( 94), INT8_C(-102), INT8_C( -27), INT8_C( 11), INT8_C( 70), INT8_C( -77), INT8_C( 121), INT8_C( -99), INT8_C( -2), INT8_C( 70), INT8_C( 49), INT8_C( 125)), simde_mm256_set_epi8(INT8_C( -65), INT8_C( -4), INT8_C( -30), INT8_C( 64), INT8_C( -5), INT8_C( -12), INT8_C( 9), INT8_C( 113), INT8_C( 9), INT8_C( -19), INT8_C( 16), INT8_C( -30), INT8_C( -24), INT8_C( 91), INT8_C( -1), INT8_C( 4), INT8_C( 104), INT8_C( 42), INT8_C( 53), INT8_C( -2), INT8_C( -30), INT8_C( -2), INT8_C( 15), INT8_C( 9), INT8_C( 31), INT8_C( 35), INT8_C(-105), INT8_C( -87), INT8_C( 0), INT8_C( 24), INT8_C( 14), INT8_C( 90)) }, { simde_mm256_set_epi8(INT8_C( 78), INT8_C( 89), INT8_C( 105), INT8_C( 98), INT8_C( -78), INT8_C( -83), INT8_C(-122), INT8_C( -57), INT8_C( -45), INT8_C( -13), INT8_C( -95), INT8_C( -36), INT8_C( -85), INT8_C( 107), INT8_C( 43), INT8_C( 1), INT8_C( -60), INT8_C( 36), INT8_C( 35), INT8_C( 54), INT8_C( 94), INT8_C( 53), INT8_C(-124), INT8_C( -9), INT8_C( -29), INT8_C( -20), INT8_C( 32), INT8_C( 119), INT8_C( 124), INT8_C( 15), INT8_C( 15), INT8_C( -94)), simde_mm256_set_epi8(INT8_C( -61), INT8_C( 49), INT8_C( 14), INT8_C( -86), INT8_C( -53), INT8_C( -89), INT8_C( 3), INT8_C( -41), INT8_C( 63), INT8_C( -8), INT8_C( 55), INT8_C( -37), INT8_C( -35), INT8_C(-121), INT8_C( 61), INT8_C( -65), INT8_C( -47), INT8_C( 91), INT8_C( 87), INT8_C(-119), INT8_C( 87), INT8_C( 76), INT8_C( 44), INT8_C(-116), INT8_C( 2), INT8_C( -56), INT8_C( 36), INT8_C( -61), INT8_C( -56), INT8_C( 125), INT8_C( -2), INT8_C(-117)), simde_mm256_set_epi8(INT8_C( 17), INT8_C( 40), INT8_C( 7), INT8_C( 12), INT8_C( -25), INT8_C( -83), INT8_C( -2), INT8_C( -16), INT8_C( -45), INT8_C( -5), INT8_C( -40), INT8_C( -36), INT8_C( -15), INT8_C( 107), INT8_C( 43), INT8_C( 1), INT8_C( -13), INT8_C( 36), INT8_C( 35), INT8_C( 54), INT8_C( 7), INT8_C( 53), INT8_C( -36), INT8_C( -9), INT8_C( -1), INT8_C( -20), INT8_C( 32), INT8_C( 58), INT8_C( 12), INT8_C( 15), INT8_C( 1), INT8_C( -94)) }, { simde_mm256_set_epi8(INT8_C( -22), INT8_C( 94), INT8_C( -16), INT8_C( 12), INT8_C(-110), INT8_C( 1), INT8_C(-109), INT8_C( 59), INT8_C( -3), INT8_C( 26), INT8_C( 26), INT8_C( 40), INT8_C( 12), INT8_C( 2), INT8_C( -26), INT8_C(-111), INT8_C( -86), INT8_C( 105), INT8_C( 111), INT8_C( -96), INT8_C(-116), INT8_C( -54), INT8_C( -90), INT8_C( -36), INT8_C( -69), INT8_C( 65), INT8_C( -6), INT8_C( -61), INT8_C( 33), INT8_C(-125), INT8_C( 2), INT8_C( -92)), simde_mm256_set_epi8(INT8_C( -79), INT8_C( -35), INT8_C( -5), INT8_C( -75), INT8_C( -97), INT8_C( -74), INT8_C( 11), INT8_C( 11), INT8_C( 39), INT8_C( 37), INT8_C( 39), INT8_C( -48), INT8_C(-120), INT8_C( -76), INT8_C( -41), INT8_C(-117), INT8_C(-112), INT8_C(-128), INT8_C( -53), INT8_C( -50), INT8_C( -83), INT8_C( 36), INT8_C(-123), INT8_C( -81), INT8_C( -25), INT8_C( 7), INT8_C( -20), INT8_C( 68), INT8_C( -63), INT8_C( -35), INT8_C( 27), INT8_C( 8)), simde_mm256_set_epi8(INT8_C( -22), INT8_C( 24), INT8_C( -1), INT8_C( 12), INT8_C( -13), INT8_C( 1), INT8_C( -10), INT8_C( 4), INT8_C( -3), INT8_C( 26), INT8_C( 26), INT8_C( 40), INT8_C( 12), INT8_C( 2), INT8_C( -26), INT8_C(-111), INT8_C( -86), INT8_C( 105), INT8_C( 5), INT8_C( -46), INT8_C( -33), INT8_C( -18), INT8_C( -90), INT8_C( -36), INT8_C( -19), INT8_C( 2), INT8_C( -6), INT8_C( -61), INT8_C( 33), INT8_C( -20), INT8_C( 2), INT8_C( -4)) }, { simde_mm256_set_epi8(INT8_C( 71), INT8_C( -23), INT8_C( 74), INT8_C( 125), INT8_C( 81), INT8_C( -13), INT8_C(-117), INT8_C( -66), INT8_C( 31), INT8_C( -80), INT8_C( 97), INT8_C( -3), INT8_C( 123), INT8_C( -80), INT8_C( -40), INT8_C( 108), INT8_C( -9), INT8_C( 97), INT8_C( 75), INT8_C( -53), INT8_C(-128), INT8_C( -18), INT8_C( 79), INT8_C(-115), INT8_C( 86), INT8_C( 29), INT8_C( -93), INT8_C( -49), INT8_C( 111), INT8_C( -7), INT8_C(-117), INT8_C( -47)), simde_mm256_set_epi8(INT8_C( 120), INT8_C( 127), INT8_C( 28), INT8_C( 95), INT8_C( -81), INT8_C( -33), INT8_C( 119), INT8_C( -42), INT8_C( -36), INT8_C( 102), INT8_C( 86), INT8_C( 22), INT8_C( 119), INT8_C( -49), INT8_C( 12), INT8_C( -73), INT8_C( -84), INT8_C( -14), INT8_C( -83), INT8_C( -7), INT8_C( 52), INT8_C( 108), INT8_C(-128), INT8_C( -53), INT8_C( 85), INT8_C(-121), INT8_C( -29), INT8_C( 35), INT8_C( -69), INT8_C( 24), INT8_C( -6), INT8_C( -37)), simde_mm256_set_epi8(INT8_C( 71), INT8_C( -23), INT8_C( 18), INT8_C( 30), INT8_C( 0), INT8_C( -13), INT8_C(-117), INT8_C( -24), INT8_C( 31), INT8_C( -80), INT8_C( 11), INT8_C( -3), INT8_C( 4), INT8_C( -31), INT8_C( -4), INT8_C( 35), INT8_C( -9), INT8_C( 13), INT8_C( 75), INT8_C( -4), INT8_C( -24), INT8_C( -18), INT8_C( 79), INT8_C( -9), INT8_C( 1), INT8_C( 29), INT8_C( -6), INT8_C( -14), INT8_C( 42), INT8_C( -7), INT8_C( -3), INT8_C( -10)) }, { simde_mm256_set_epi8(INT8_C( -72), INT8_C( 63), INT8_C( 95), INT8_C( -92), INT8_C( 65), INT8_C( 71), INT8_C( -82), INT8_C( 88), INT8_C( -73), INT8_C(-114), INT8_C( 98), INT8_C( 14), INT8_C( 25), INT8_C( -83), INT8_C( 87), INT8_C( 2), INT8_C( -65), INT8_C(-113), INT8_C(-104), INT8_C( 2), INT8_C( 126), INT8_C( 0), INT8_C( -94), INT8_C( 57), INT8_C( -11), INT8_C( 36), INT8_C( -17), INT8_C( 54), INT8_C( 33), INT8_C( -91), INT8_C( -57), INT8_C( 84)), simde_mm256_set_epi8(INT8_C( -82), INT8_C( 60), INT8_C(-124), INT8_C( -48), INT8_C( 58), INT8_C( -78), INT8_C( 116), INT8_C( -16), INT8_C( 37), INT8_C(-125), INT8_C( 100), INT8_C( -79), INT8_C( 19), INT8_C( 102), INT8_C( 81), INT8_C( 86), INT8_C( 25), INT8_C( 43), INT8_C( 51), INT8_C(-116), INT8_C( 9), INT8_C( 40), INT8_C( -29), INT8_C( 75), INT8_C( -48), INT8_C( -97), INT8_C( -81), INT8_C( 109), INT8_C( -26), INT8_C( 87), INT8_C( -2), INT8_C( -40)), simde_mm256_set_epi8(INT8_C( -72), INT8_C( 3), INT8_C( 95), INT8_C( -44), INT8_C( 7), INT8_C( 71), INT8_C( -82), INT8_C( 8), INT8_C( -36), INT8_C(-114), INT8_C( 98), INT8_C( 14), INT8_C( 6), INT8_C( -83), INT8_C( 6), INT8_C( 2), INT8_C( -15), INT8_C( -27), INT8_C( -2), INT8_C( 2), INT8_C( 0), INT8_C( 0), INT8_C( -7), INT8_C( 57), INT8_C( -11), INT8_C( 36), INT8_C( -17), INT8_C( 54), INT8_C( 7), INT8_C( -4), INT8_C( -1), INT8_C( 4)) }, { simde_mm256_set_epi8(INT8_C( 54), INT8_C( 43), INT8_C( 109), INT8_C( -69), INT8_C(-118), INT8_C( 62), INT8_C( -34), INT8_C(-102), INT8_C( 123), INT8_C( 21), INT8_C( -9), INT8_C( 99), INT8_C( 37), INT8_C( 48), INT8_C( 116), INT8_C( -23), INT8_C( 95), INT8_C( -5), INT8_C(-109), INT8_C( 109), INT8_C( -51), INT8_C( -50), INT8_C( 57), INT8_C( 17), INT8_C( 121), INT8_C( 25), INT8_C( 3), INT8_C( 55), INT8_C( -78), INT8_C(-127), INT8_C(-107), INT8_C( -49)), simde_mm256_set_epi8(INT8_C(-125), INT8_C( 42), INT8_C(-105), INT8_C( -46), INT8_C( 12), INT8_C( -93), INT8_C(-118), INT8_C( -49), INT8_C( 43), INT8_C( 57), INT8_C( 61), INT8_C( 62), INT8_C( 81), INT8_C( -72), INT8_C( 6), INT8_C( 93), INT8_C( -89), INT8_C( 1), INT8_C(-111), INT8_C( 9), INT8_C( 4), INT8_C( 17), INT8_C( 10), INT8_C( 101), INT8_C( -70), INT8_C( -75), INT8_C(-101), INT8_C( -13), INT8_C( -67), INT8_C( -65), INT8_C( -34), INT8_C( -51)), simde_mm256_set_epi8(INT8_C( 54), INT8_C( 1), INT8_C( 4), INT8_C( -23), INT8_C( -10), INT8_C( 62), INT8_C( -34), INT8_C( -4), INT8_C( 37), INT8_C( 21), INT8_C( -9), INT8_C( 37), INT8_C( 37), INT8_C( 48), INT8_C( 2), INT8_C( -23), INT8_C( 6), INT8_C( 0), INT8_C(-109), INT8_C( 1), INT8_C( -3), INT8_C( -16), INT8_C( 7), INT8_C( 17), INT8_C( 51), INT8_C( 25), INT8_C( 3), INT8_C( 3), INT8_C( -11), INT8_C( -62), INT8_C( -5), INT8_C( -49)) }, { simde_mm256_set_epi8(INT8_C( 23), INT8_C(-124), INT8_C( 106), INT8_C( 109), INT8_C(-121), INT8_C( -53), INT8_C( 98), INT8_C( 120), INT8_C( 101), INT8_C( 52), INT8_C( 82), INT8_C( 44), INT8_C(-114), INT8_C( 14), INT8_C( 99), INT8_C( -11), INT8_C( 8), INT8_C(-116), INT8_C(-115), INT8_C( 123), INT8_C( -37), INT8_C( -93), INT8_C( -60), INT8_C( -23), INT8_C( 34), INT8_C( -71), INT8_C( -28), INT8_C( 108), INT8_C( 95), INT8_C( -20), INT8_C( 97), INT8_C( 41)), simde_mm256_set_epi8(INT8_C( 125), INT8_C( -27), INT8_C( -53), INT8_C( 45), INT8_C( 24), INT8_C( 5), INT8_C( 90), INT8_C( 83), INT8_C(-111), INT8_C( 85), INT8_C(-100), INT8_C( -92), INT8_C(-107), INT8_C( -55), INT8_C( 48), INT8_C( -1), INT8_C( 41), INT8_C( 42), INT8_C( 94), INT8_C(-127), INT8_C(-121), INT8_C( 8), INT8_C( 12), INT8_C( -53), INT8_C(-128), INT8_C( -54), INT8_C(-108), INT8_C( -4), INT8_C( 104), INT8_C( -48), INT8_C( 98), INT8_C( -94)), simde_mm256_set_epi8(INT8_C( 23), INT8_C( -16), INT8_C( 0), INT8_C( 19), INT8_C( -1), INT8_C( -3), INT8_C( 8), INT8_C( 37), INT8_C( 101), INT8_C( 52), INT8_C( 82), INT8_C( 44), INT8_C( -7), INT8_C( 14), INT8_C( 3), INT8_C( 0), INT8_C( 8), INT8_C( -32), INT8_C( -21), INT8_C( 123), INT8_C( -37), INT8_C( -5), INT8_C( 0), INT8_C( -23), INT8_C( 34), INT8_C( -17), INT8_C( -28), INT8_C( 0), INT8_C( 95), INT8_C( -20), INT8_C( 97), INT8_C( 41)) }, { simde_mm256_set_epi8(INT8_C( -94), INT8_C( 31), INT8_C( -88), INT8_C( 17), INT8_C( 50), INT8_C( 110), INT8_C( -25), INT8_C( -40), INT8_C( 94), INT8_C( 20), INT8_C( -93), INT8_C( -73), INT8_C( -99), INT8_C( 16), INT8_C( 91), INT8_C( 54), INT8_C( 62), INT8_C( 81), INT8_C( -97), INT8_C(-105), INT8_C( 57), INT8_C( 12), INT8_C( 118), INT8_C( 33), INT8_C( -76), INT8_C(-117), INT8_C( 1), INT8_C( 5), INT8_C( 78), INT8_C( 13), INT8_C( 93), INT8_C(-101)), simde_mm256_set_epi8(INT8_C( -63), INT8_C( -26), INT8_C( 93), INT8_C( 23), INT8_C( -63), INT8_C( 52), INT8_C( -33), INT8_C( -81), INT8_C( -51), INT8_C( 45), INT8_C( -90), INT8_C( 24), INT8_C( 71), INT8_C( -22), INT8_C( -95), INT8_C(-114), INT8_C( -72), INT8_C( -38), INT8_C( -66), INT8_C( -44), INT8_C( 116), INT8_C( -97), INT8_C( 44), INT8_C( 55), INT8_C( -43), INT8_C(-123), INT8_C( 60), INT8_C( 3), INT8_C( 58), INT8_C( -1), INT8_C( 125), INT8_C( -67)), simde_mm256_set_epi8(INT8_C( -31), INT8_C( 5), INT8_C( -88), INT8_C( 17), INT8_C( 50), INT8_C( 6), INT8_C( -25), INT8_C( -40), INT8_C( 43), INT8_C( 20), INT8_C( -3), INT8_C( -1), INT8_C( -28), INT8_C( 16), INT8_C( 91), INT8_C( 54), INT8_C( 62), INT8_C( 5), INT8_C( -31), INT8_C( -17), INT8_C( 57), INT8_C( 12), INT8_C( 30), INT8_C( 33), INT8_C( -33), INT8_C(-117), INT8_C( 1), INT8_C( 2), INT8_C( 20), INT8_C( 0), INT8_C( 93), INT8_C( -34)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_rem_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_rem_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi16(INT16_C(-16473), INT16_C( -7616), INT16_C(-10806), INT16_C( 28273), INT16_C( 23021), INT16_C( 18146), INT16_C(-31653), INT16_C( -168), INT16_C( 26666), INT16_C( 13822), INT16_C(-31490), INT16_C( 24651), INT16_C( 8048), INT16_C(-26711), INT16_C(-21410), INT16_C( 28762)), simde_mm256_set_epi16(INT16_C( 10545), INT16_C(-21562), INT16_C( 10284), INT16_C( -3533), INT16_C(-29991), INT16_C(-10247), INT16_C(-14043), INT16_C(-30435), INT16_C( -5718), INT16_C( -3714), INT16_C(-18934), INT16_C(-12090), INT16_C( 23938), INT16_C(-15439), INT16_C(-17441), INT16_C(-29699)), simde_mm256_set_epi16(INT16_C( -5928), INT16_C( -7616), INT16_C( -522), INT16_C( 9), INT16_C( 23021), INT16_C( 7899), INT16_C( -3567), INT16_C( -168), INT16_C( 3794), INT16_C( 2680), INT16_C(-12556), INT16_C( 471), INT16_C( 8048), INT16_C(-11272), INT16_C( -3969), INT16_C( 28762)) }, { simde_mm256_set_epi16(INT16_C( 18192), INT16_C( 32532), INT16_C(-31836), INT16_C( -5163), INT16_C( 20183), INT16_C( -1494), INT16_C( 2502), INT16_C( 18488), INT16_C( 22771), INT16_C( 21470), INT16_C( 4556), INT16_C( 26138), INT16_C( 19085), INT16_C( -923), INT16_C( -9934), INT16_C( -2165)), simde_mm256_set_epi16(INT16_C(-26929), INT16_C( 8074), INT16_C( 18000), INT16_C(-29849), INT16_C(-25121), INT16_C( 3254), INT16_C(-10254), INT16_C(-26681), INT16_C(-24206), INT16_C(-28388), INT16_C( 25803), INT16_C( 25877), INT16_C( 768), INT16_C( 16244), INT16_C( 11114), INT16_C( -7212)), simde_mm256_set_epi16(INT16_C( 18192), INT16_C( 236), INT16_C(-13836), INT16_C( -5163), INT16_C( 20183), INT16_C( -1494), INT16_C( 2502), INT16_C( 18488), INT16_C( 22771), INT16_C( 21470), INT16_C( 4556), INT16_C( 261), INT16_C( 653), INT16_C( -923), INT16_C( -9934), INT16_C( -2165)) }, { simde_mm256_set_epi16(INT16_C( 7525), INT16_C( 3321), INT16_C(-18237), INT16_C( -1323), INT16_C( 13644), INT16_C(-17383), INT16_C(-20302), INT16_C(-13836), INT16_C( 7513), INT16_C( 1114), INT16_C( -200), INT16_C( 10389), INT16_C(-31848), INT16_C( 9445), INT16_C( -5204), INT16_C(-24070)), simde_mm256_set_epi16(INT16_C( 20057), INT16_C( 26978), INT16_C(-19795), INT16_C(-31033), INT16_C(-11277), INT16_C(-24100), INT16_C(-21653), INT16_C( 11009), INT16_C(-15324), INT16_C( 9014), INT16_C( 24117), INT16_C(-31497), INT16_C( -7188), INT16_C( 8311), INT16_C( 31759), INT16_C( 4002)), simde_mm256_set_epi16(INT16_C( 7525), INT16_C( 3321), INT16_C(-18237), INT16_C( -1323), INT16_C( 2367), INT16_C(-17383), INT16_C(-20302), INT16_C( -2827), INT16_C( 7513), INT16_C( 1114), INT16_C( -200), INT16_C( 10389), INT16_C( -3096), INT16_C( 1134), INT16_C( -5204), INT16_C( -58)) }, { simde_mm256_set_epi16(INT16_C( 26094), INT16_C(-13182), INT16_C( 30122), INT16_C(-17848), INT16_C(-21735), INT16_C( -7772), INT16_C( 1809), INT16_C(-31933), INT16_C( 8271), INT16_C( 4936), INT16_C( 7627), INT16_C( 20477), INT16_C( 14608), INT16_C( 25470), INT16_C(-19700), INT16_C( 25611)), simde_mm256_set_epi16(INT16_C(-17379), INT16_C( -8623), INT16_C(-10486), INT16_C(-16677), INT16_C( 27895), INT16_C(-17193), INT16_C( -5943), INT16_C( -5111), INT16_C( -2949), INT16_C(-11305), INT16_C(-13092), INT16_C( 8140), INT16_C( -6841), INT16_C( 2476), INT16_C(-24435), INT16_C( 7948)), simde_mm256_set_epi16(INT16_C( 8715), INT16_C( -4559), INT16_C( 9150), INT16_C( -1171), INT16_C(-21735), INT16_C( -7772), INT16_C( 1809), INT16_C( -1267), INT16_C( 2373), INT16_C( 4936), INT16_C( 7627), INT16_C( 4197), INT16_C( 926), INT16_C( 710), INT16_C(-19700), INT16_C( 1767)) }, { simde_mm256_set_epi16(INT16_C( 26466), INT16_C( 21183), INT16_C( 5811), INT16_C( 17016), INT16_C(-14374), INT16_C(-18761), INT16_C(-11284), INT16_C( -933), INT16_C( 30444), INT16_C( 20573), INT16_C(-14964), INT16_C( 25607), INT16_C(-28815), INT16_C(-28739), INT16_C( 27147), INT16_C( -3265)), simde_mm256_set_epi16(INT16_C( 26902), INT16_C(-14525), INT16_C( -7905), INT16_C( -8015), INT16_C(-22131), INT16_C( 18318), INT16_C(-21513), INT16_C( 9770), INT16_C( 4118), INT16_C(-32437), INT16_C( 6621), INT16_C( -7897), INT16_C( 22002), INT16_C(-32381), INT16_C( 15537), INT16_C(-26793)), simde_mm256_set_epi16(INT16_C( 26466), INT16_C( 6658), INT16_C( 5811), INT16_C( 986), INT16_C(-14374), INT16_C( -443), INT16_C(-11284), INT16_C( -933), INT16_C( 1618), INT16_C( 20573), INT16_C( -1722), INT16_C( 1916), INT16_C( -6813), INT16_C(-28739), INT16_C( 11610), INT16_C( -3265)) }, { simde_mm256_set_epi16(INT16_C( -5538), INT16_C( -4084), INT16_C(-28159), INT16_C(-27845), INT16_C( -742), INT16_C( 6696), INT16_C( 3074), INT16_C( -6511), INT16_C(-21911), INT16_C( 28576), INT16_C(-29494), INT16_C(-22820), INT16_C(-17599), INT16_C( -1341), INT16_C( 8579), INT16_C( 676)), simde_mm256_set_epi16(INT16_C(-10155), INT16_C(-12697), INT16_C( -5222), INT16_C(-32377), INT16_C( 32076), INT16_C(-13716), INT16_C( 13383), INT16_C(-22332), INT16_C( 18058), INT16_C(-22719), INT16_C( -8799), INT16_C(-25251), INT16_C(-16195), INT16_C(-26213), INT16_C(-12331), INT16_C( 27016)), simde_mm256_set_epi16(INT16_C( -5538), INT16_C( -4084), INT16_C( -2049), INT16_C(-27845), INT16_C( -742), INT16_C( 6696), INT16_C( 3074), INT16_C( -6511), INT16_C( -3853), INT16_C( 5857), INT16_C( -3097), INT16_C(-22820), INT16_C( -1404), INT16_C( -1341), INT16_C( 8579), INT16_C( 676)) }, { simde_mm256_set_epi16(INT16_C( 13886), INT16_C( 28688), INT16_C( 30551), INT16_C(-28928), INT16_C( -9491), INT16_C(-26549), INT16_C( -738), INT16_C( 22350), INT16_C( 7981), INT16_C(-15059), INT16_C(-18848), INT16_C( 16804), INT16_C(-31876), INT16_C( -1787), INT16_C( 29649), INT16_C( -721)), simde_mm256_set_epi16(INT16_C( 7566), INT16_C( 25511), INT16_C( -5831), INT16_C( 13989), INT16_C( 13965), INT16_C(-31065), INT16_C( 77), INT16_C(-30384), INT16_C( 21705), INT16_C(-23032), INT16_C( -2503), INT16_C( -8652), INT16_C(-23147), INT16_C( -4009), INT16_C( 7598), INT16_C( 23051)), simde_mm256_set_epi16(INT16_C( 6320), INT16_C( 3177), INT16_C( 1396), INT16_C( -950), INT16_C( -9491), INT16_C(-26549), INT16_C( -45), INT16_C( 22350), INT16_C( 7981), INT16_C(-15059), INT16_C( -1327), INT16_C( 8152), INT16_C( -8729), INT16_C( -1787), INT16_C( 6855), INT16_C( -721)) }, { simde_mm256_set_epi16(INT16_C( 26789), INT16_C(-25295), INT16_C(-31460), INT16_C(-29347), INT16_C(-16029), INT16_C(-32645), INT16_C(-19836), INT16_C( 31541), INT16_C(-32299), INT16_C(-14817), INT16_C( 22782), INT16_C(-18634), INT16_C( -2744), INT16_C( 907), INT16_C( 9939), INT16_C( 395)), simde_mm256_set_epi16(INT16_C( 18409), INT16_C( 19069), INT16_C( 20979), INT16_C(-29762), INT16_C( 8112), INT16_C( 25085), INT16_C( 31664), INT16_C(-10132), INT16_C( -2207), INT16_C( 19403), INT16_C(-32530), INT16_C( 20365), INT16_C( 22045), INT16_C(-23601), INT16_C( 28665), INT16_C(-29743)), simde_mm256_set_epi16(INT16_C( 8380), INT16_C( -6226), INT16_C(-10481), INT16_C(-29347), INT16_C( -7917), INT16_C( -7560), INT16_C(-19836), INT16_C( 1145), INT16_C( -1401), INT16_C(-14817), INT16_C( 22782), INT16_C(-18634), INT16_C( -2744), INT16_C( 907), INT16_C( 9939), INT16_C( 395)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_rem_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_rem_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi32(INT32_C(-1079516608), INT32_C( -708153743), INT32_C( 1508722402), INT32_C(-2074345640), INT32_C( 1747596798), INT32_C(-2063703989), INT32_C( 527472553), INT32_C(-1403096998)), simde_mm256_set_epi32(INT32_C( 691121094), INT32_C( 674034227), INT32_C(-1965434887), INT32_C( -920286947), INT32_C( -374673026), INT32_C(-1240805178), INT32_C( 1568850865), INT32_C(-1142977539)), simde_mm256_set_epi32(INT32_C( -388395514), INT32_C( -34119516), INT32_C( 1508722402), INT32_C( -233771746), INT32_C( 248904694), INT32_C( -822898811), INT32_C( 527472553), INT32_C( -260119459)) }, { simde_mm256_set_epi32(INT32_C( 1192263444), INT32_C(-2086343723), INT32_C( 1322777130), INT32_C( 163989560), INT32_C( 1492341726), INT32_C( 298608154), INT32_C( 1250819173), INT32_C( -650971253)), simde_mm256_set_epi32(INT32_C(-1764810870), INT32_C( 1179683687), INT32_C(-1646326602), INT32_C( -671967289), INT32_C(-1586327268), INT32_C( 1691051285), INT32_C( 50347892), INT32_C( 728425428)), simde_mm256_set_epi32(INT32_C( 1192263444), INT32_C( -906660036), INT32_C( 1322777130), INT32_C( 163989560), INT32_C( 1492341726), INT32_C( 298608154), INT32_C( 42469765), INT32_C( -650971253)) }, { simde_mm256_set_epi32(INT32_C( 493161721), INT32_C(-1195115819), INT32_C( 894221337), INT32_C(-1330460172), INT32_C( 492373082), INT32_C( -13096811), INT32_C(-2087181083), INT32_C( -341007878)), simde_mm256_set_epi32(INT32_C( 1314482530), INT32_C(-1297250617), INT32_C( -739008036), INT32_C(-1419039999), INT32_C(-1004264650), INT32_C( 1580565751), INT32_C( -471064457), INT32_C( 2081361826)), simde_mm256_set_epi32(INT32_C( 493161721), INT32_C(-1195115819), INT32_C( 155213301), INT32_C(-1330460172), INT32_C( 492373082), INT32_C( -13096811), INT32_C( -202923255), INT32_C( -341007878)) }, { simde_mm256_set_epi32(INT32_C( 1710148738), INT32_C( 1974123080), INT32_C(-1424367196), INT32_C( 118588227), INT32_C( 542053192), INT32_C( 499863549), INT32_C( 957375358), INT32_C(-1291033589)), simde_mm256_set_epi32(INT32_C(-1138893231), INT32_C( -687161637), INT32_C( 1828175063), INT32_C( -389420023), INT32_C( -193211433), INT32_C( -857989172), INT32_C( -448329300), INT32_C(-1601364212)), simde_mm256_set_epi32(INT32_C( 571255507), INT32_C( 599799806), INT32_C(-1424367196), INT32_C( 118588227), INT32_C( 155630326), INT32_C( 499863549), INT32_C( 60716758), INT32_C(-1291033589)) }, { simde_mm256_set_epi32(INT32_C( 1734496959), INT32_C( 380846712), INT32_C( -941967689), INT32_C( -739443621), INT32_C( 1995198557), INT32_C( -980655097), INT32_C(-1888383043), INT32_C( 1779168063)), simde_mm256_set_epi32(INT32_C( 1763100483), INT32_C( -518004559), INT32_C(-1450358898), INT32_C(-1409866198), INT32_C( 269910347), INT32_C( 433971495), INT32_C( 1441956227), INT32_C( 1018271575)), simde_mm256_set_epi32(INT32_C( 1734496959), INT32_C( 380846712), INT32_C( -941967689), INT32_C( -739443621), INT32_C( 105826128), INT32_C( -112712107), INT32_C( -446426816), INT32_C( 760896488)) }, { simde_mm256_set_epi32(INT32_C( -362876916), INT32_C(-1845390533), INT32_C( -48621016), INT32_C( 201516689), INT32_C(-1435930720), INT32_C(-1932876068), INT32_C(-1153303869), INT32_C( 562234020)), simde_mm256_set_epi32(INT32_C( -665465241), INT32_C( -342195833), INT32_C( 2102184556), INT32_C( 877111492), INT32_C( 1183491905), INT32_C( -576610979), INT32_C(-1061316197), INT32_C( -808097400)), simde_mm256_set_epi32(INT32_C( -362876916), INT32_C( -134411368), INT32_C( -48621016), INT32_C( 201516689), INT32_C( -252438815), INT32_C( -203043131), INT32_C( -91987672), INT32_C( 562234020)) }, { simde_mm256_set_epi32(INT32_C( 910061584), INT32_C( 2002226944), INT32_C( -621963189), INT32_C( -48343218), INT32_C( 523093293), INT32_C(-1235205724), INT32_C(-2088961787), INT32_C( 1943141679)), simde_mm256_set_epi32(INT32_C( 495870887), INT32_C( -382126427), INT32_C( 915244711), INT32_C( 5081424), INT32_C( 1422501384), INT32_C( -163979724), INT32_C(-1516900265), INT32_C( 497965579)), simde_mm256_set_epi32(INT32_C( 414190697), INT32_C( 91594809), INT32_C( -621963189), INT32_C( -2610402), INT32_C( 523093293), INT32_C( -87347656), INT32_C( -572061522), INT32_C( 449244942)) }, { simde_mm256_set_epi32(INT32_C( 1755684145), INT32_C(-2061726371), INT32_C(-1050443653), INT32_C(-1299940555), INT32_C(-2116696545), INT32_C( 1493088054), INT32_C( -179829877), INT32_C( 651362699)), simde_mm256_set_epi32(INT32_C( 1206471293), INT32_C( 1374915518), INT32_C( 531653117), INT32_C( 2075187308), INT32_C( -144618549), INT32_C(-2131865715), INT32_C( 1444783055), INT32_C( 1878625233)), simde_mm256_set_epi32(INT32_C( 549212852), INT32_C( -686810853), INT32_C( -518790536), INT32_C(-1299940555), INT32_C( -92036859), INT32_C( 1493088054), INT32_C( -179829877), INT32_C( 651362699)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_rem_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_rem_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_mm256_set_epi64x(INT64_C(-4636488523262038415), INT64_C( 6479913377553186648), INT64_C( 7505871096235581515), INT64_C( 2265477367564496986)), simde_mm256_set_epi64x(INT64_C( 2968342496979776051), INT64_C(-8441478558707775203), INT64_C(-1609208390309195578), INT64_C( 6738163160628300797)), simde_mm256_set_epi64x(INT64_C(-1668146026282262364), INT64_C( 6479913377553186648), INT64_C( 1069037534998799203), INT64_C( 2265477367564496986)) }, { simde_mm256_set_epi64x(INT64_C( 5120732502404950997), INT64_C( 5681284513410730040), INT64_C( 6409558907924801050), INT64_C( 5372227444888762251)), simde_mm256_set_epi64x(INT64_C(-7579804969095623833), INT64_C(-7070918910501808185), INT64_C(-6813223735121976043), INT64_C( 216242550290965460)), simde_mm256_set_epi64x(INT64_C( 5120732502404950997), INT64_C( 5681284513410730040), INT64_C( 6409558907924801050), INT64_C( 182406237905591211)) }, { simde_mm256_set_epi64x(INT64_C( 2118113466433927893), INT64_C( 3840651400764901876), INT64_C( 2114726288902596757), INT64_C(-8964374488360902150)), simde_mm256_set_epi64x(INT64_C( 5645659480511055559), INT64_C(-3174015343225263359), INT64_C(-4313283826698320649), INT64_C(-2023206435041636446)), simde_mm256_set_epi64x(INT64_C( 2118113466433927893), INT64_C( 666636057539638517), INT64_C( 2114726288902596757), INT64_C( -871548748194356366)) }, { simde_mm256_set_epi64x(INT64_C( 7345032902979795528), INT64_C(-6117610524196633789), INT64_C( 2328100732832272381), INT64_C( 4111895855610225675)), simde_mm256_set_epi64x(INT64_C(-4891509177172967717), INT64_C( 7851952110853286921), INT64_C( -829836782511317044), INT64_C(-1925559678644969716)), simde_mm256_set_epi64x(INT64_C( 2453523725806827811), INT64_C(-6117610524196633789), INT64_C( 668427167809638293), INT64_C( 260776498320286243)) }, { simde_mm256_set_epi64x(INT64_C( 7449607714297299576), INT64_C(-4045720414588175269), INT64_C( 8569312554655704071), INT64_C(-8110543410226793665)), simde_mm256_set_epi64x(INT64_C( 7572458917823766705), INT64_C(-6229244031487498710), INT64_C( 1159256113650983207), INT64_C( 6193154838246823767)), simde_mm256_set_epi64x(INT64_C( 7449607714297299576), INT64_C(-4045720414588175269), INT64_C( 454519759098821622), INT64_C(-1917388571979969898)) }, { simde_mm256_set_epi64x(INT64_C(-1558544484243762373), INT64_C( -208825673416776047), INT64_C(-6167275479359641892), INT64_C(-4953402399143034204)), simde_mm256_set_epi64x(INT64_C(-2858151442766986873), INT64_C( 9028813919053392068), INT64_C( 5083059030774095197), INT64_C(-4558318353343223416)), simde_mm256_set_epi64x(INT64_C(-1558544484243762373), INT64_C( -208825673416776047), INT64_C(-1084216448585546695), INT64_C( -395084045799810788)) }, { simde_mm256_set_epi64x(INT64_C( 3908684742628183808), INT64_C(-2671311551824242866), INT64_C( 2246668589251707300), INT64_C(-8972022555815576273)), simde_mm256_set_epi64x(INT64_C( 2129749246616352421), INT64_C( 3930946101587052880), INT64_C( 6109596926925725236), INT64_C(-6515037028970767861)), simde_mm256_set_epi64x(INT64_C( 1778935496011831387), INT64_C(-2671311551824242866), INT64_C( 2246668589251707300), INT64_C(-2456985526844808412)) }, { simde_mm256_set_epi64x(INT64_C( 7540605987113962845), INT64_C(-4511621132930745547), INT64_C(-9091142434838104266), INT64_C( -772363439907339893)), simde_mm256_set_epi64x(INT64_C( 5181754748372749246), INT64_C( 2283432752406648940), INT64_C( -621131936186871923), INT64_C( 6205295972918594513)), simde_mm256_set_epi64x(INT64_C( 2358851238741213599), INT64_C(-2228188380524096607), INT64_C( -395295328221897344), INT64_C( -772363439907339893)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_rem_epi64(test_vec[i].a, test_vec[i].b); simde_assert_m256i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_rem_epu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_x_mm256_set_epu8(UINT8_C(191), UINT8_C(167), UINT8_C(226), UINT8_C( 64), UINT8_C(213), UINT8_C(202), UINT8_C(110), UINT8_C(113), UINT8_C( 89), UINT8_C(237), UINT8_C( 70), UINT8_C(226), UINT8_C(132), UINT8_C( 91), UINT8_C(255), UINT8_C( 88), UINT8_C(104), UINT8_C( 42), UINT8_C( 53), UINT8_C(254), UINT8_C(132), UINT8_C(254), UINT8_C( 96), UINT8_C( 75), UINT8_C( 31), UINT8_C(112), UINT8_C(151), UINT8_C(169), UINT8_C(172), UINT8_C( 94), UINT8_C(112), UINT8_C( 90)), simde_x_mm256_set_epu8(UINT8_C(121), UINT8_C( 85), UINT8_C(153), UINT8_C(116), UINT8_C(218), UINT8_C( 21), UINT8_C(101), UINT8_C(122), UINT8_C( 10), UINT8_C(231), UINT8_C( 54), UINT8_C( 71), UINT8_C(156), UINT8_C(149), UINT8_C(244), UINT8_C( 84), UINT8_C(148), UINT8_C( 85), UINT8_C(170), UINT8_C(184), UINT8_C( 94), UINT8_C(154), UINT8_C(229), UINT8_C( 11), UINT8_C( 70), UINT8_C(179), UINT8_C(121), UINT8_C(157), UINT8_C(254), UINT8_C( 70), UINT8_C( 49), UINT8_C(125)), simde_x_mm256_set_epu8(UINT8_C( 70), UINT8_C( 82), UINT8_C( 73), UINT8_C( 64), UINT8_C(213), UINT8_C( 13), UINT8_C( 9), UINT8_C(113), UINT8_C( 9), UINT8_C( 6), UINT8_C( 16), UINT8_C( 13), UINT8_C(132), UINT8_C( 91), UINT8_C( 11), UINT8_C( 4), UINT8_C(104), UINT8_C( 42), UINT8_C( 53), UINT8_C( 70), UINT8_C( 38), UINT8_C(100), UINT8_C( 96), UINT8_C( 9), UINT8_C( 31), UINT8_C(112), UINT8_C( 30), UINT8_C( 12), UINT8_C(172), UINT8_C( 24), UINT8_C( 14), UINT8_C( 90)) }, { simde_x_mm256_set_epu8(UINT8_C( 78), UINT8_C( 89), UINT8_C(105), UINT8_C( 98), UINT8_C(178), UINT8_C(173), UINT8_C(134), UINT8_C(199), UINT8_C(211), UINT8_C(243), UINT8_C(161), UINT8_C(220), UINT8_C(171), UINT8_C(107), UINT8_C( 43), UINT8_C( 1), UINT8_C(196), UINT8_C( 36), UINT8_C( 35), UINT8_C( 54), UINT8_C( 94), UINT8_C( 53), UINT8_C(132), UINT8_C(247), UINT8_C(227), UINT8_C(236), UINT8_C( 32), UINT8_C(119), UINT8_C(124), UINT8_C( 15), UINT8_C( 15), UINT8_C(162)), simde_x_mm256_set_epu8(UINT8_C(195), UINT8_C( 49), UINT8_C( 14), UINT8_C(170), UINT8_C(203), UINT8_C(167), UINT8_C( 3), UINT8_C(215), UINT8_C( 63), UINT8_C(248), UINT8_C( 55), UINT8_C(219), UINT8_C(221), UINT8_C(135), UINT8_C( 61), UINT8_C(191), UINT8_C(209), UINT8_C( 91), UINT8_C( 87), UINT8_C(137), UINT8_C( 87), UINT8_C( 76), UINT8_C( 44), UINT8_C(140), UINT8_C( 2), UINT8_C(200), UINT8_C( 36), UINT8_C(195), UINT8_C(200), UINT8_C(125), UINT8_C(254), UINT8_C(139)), simde_x_mm256_set_epu8(UINT8_C( 78), UINT8_C( 40), UINT8_C( 7), UINT8_C( 98), UINT8_C(178), UINT8_C( 6), UINT8_C( 2), UINT8_C(199), UINT8_C( 22), UINT8_C(243), UINT8_C( 51), UINT8_C( 1), UINT8_C(171), UINT8_C(107), UINT8_C( 43), UINT8_C( 1), UINT8_C(196), UINT8_C( 36), UINT8_C( 35), UINT8_C( 54), UINT8_C( 7), UINT8_C( 53), UINT8_C( 0), UINT8_C(107), UINT8_C( 1), UINT8_C( 36), UINT8_C( 32), UINT8_C(119), UINT8_C(124), UINT8_C( 15), UINT8_C( 15), UINT8_C( 23)) }, { simde_x_mm256_set_epu8(UINT8_C(234), UINT8_C( 94), UINT8_C(240), UINT8_C( 12), UINT8_C(146), UINT8_C( 1), UINT8_C(147), UINT8_C( 59), UINT8_C(253), UINT8_C( 26), UINT8_C( 26), UINT8_C( 40), UINT8_C( 12), UINT8_C( 2), UINT8_C(230), UINT8_C(145), UINT8_C(170), UINT8_C(105), UINT8_C(111), UINT8_C(160), UINT8_C(140), UINT8_C(202), UINT8_C(166), UINT8_C(220), UINT8_C(187), UINT8_C( 65), UINT8_C(250), UINT8_C(195), UINT8_C( 33), UINT8_C(131), UINT8_C( 2), UINT8_C(164)), simde_x_mm256_set_epu8(UINT8_C(177), UINT8_C(221), UINT8_C(251), UINT8_C(181), UINT8_C(159), UINT8_C(182), UINT8_C( 11), UINT8_C( 11), UINT8_C( 39), UINT8_C( 37), UINT8_C( 39), UINT8_C(208), UINT8_C(136), UINT8_C(180), UINT8_C(215), UINT8_C(139), UINT8_C(144), UINT8_C(128), UINT8_C(203), UINT8_C(206), UINT8_C(173), UINT8_C( 36), UINT8_C(133), UINT8_C(175), UINT8_C(231), UINT8_C( 7), UINT8_C(236), UINT8_C( 68), UINT8_C(193), UINT8_C(221), UINT8_C( 27), UINT8_C( 8)), simde_x_mm256_set_epu8(UINT8_C( 57), UINT8_C( 94), UINT8_C(240), UINT8_C( 12), UINT8_C(146), UINT8_C( 1), UINT8_C( 4), UINT8_C( 4), UINT8_C( 19), UINT8_C( 26), UINT8_C( 26), UINT8_C( 40), UINT8_C( 12), UINT8_C( 2), UINT8_C( 15), UINT8_C( 6), UINT8_C( 26), UINT8_C(105), UINT8_C(111), UINT8_C(160), UINT8_C(140), UINT8_C( 22), UINT8_C( 33), UINT8_C( 45), UINT8_C(187), UINT8_C( 2), UINT8_C( 14), UINT8_C( 59), UINT8_C( 33), UINT8_C(131), UINT8_C( 2), UINT8_C( 4)) }, { simde_x_mm256_set_epu8(UINT8_C( 71), UINT8_C(233), UINT8_C( 74), UINT8_C(125), UINT8_C( 81), UINT8_C(243), UINT8_C(139), UINT8_C(190), UINT8_C( 31), UINT8_C(176), UINT8_C( 97), UINT8_C(253), UINT8_C(123), UINT8_C(176), UINT8_C(216), UINT8_C(108), UINT8_C(247), UINT8_C( 97), UINT8_C( 75), UINT8_C(203), UINT8_C(128), UINT8_C(238), UINT8_C( 79), UINT8_C(141), UINT8_C( 86), UINT8_C( 29), UINT8_C(163), UINT8_C(207), UINT8_C(111), UINT8_C(249), UINT8_C(139), UINT8_C(209)), simde_x_mm256_set_epu8(UINT8_C(120), UINT8_C(127), UINT8_C( 28), UINT8_C( 95), UINT8_C(175), UINT8_C(223), UINT8_C(119), UINT8_C(214), UINT8_C(220), UINT8_C(102), UINT8_C( 86), UINT8_C( 22), UINT8_C(119), UINT8_C(207), UINT8_C( 12), UINT8_C(183), UINT8_C(172), UINT8_C(242), UINT8_C(173), UINT8_C(249), UINT8_C( 52), UINT8_C(108), UINT8_C(128), UINT8_C(203), UINT8_C( 85), UINT8_C(135), UINT8_C(227), UINT8_C( 35), UINT8_C(187), UINT8_C( 24), UINT8_C(250), UINT8_C(219)), simde_x_mm256_set_epu8(UINT8_C( 71), UINT8_C(106), UINT8_C( 18), UINT8_C( 30), UINT8_C( 81), UINT8_C( 20), UINT8_C( 20), UINT8_C(190), UINT8_C( 31), UINT8_C( 74), UINT8_C( 11), UINT8_C( 11), UINT8_C( 4), UINT8_C(176), UINT8_C( 0), UINT8_C(108), UINT8_C( 75), UINT8_C( 97), UINT8_C( 75), UINT8_C(203), UINT8_C( 24), UINT8_C( 22), UINT8_C( 79), UINT8_C(141), UINT8_C( 1), UINT8_C( 29), UINT8_C(163), UINT8_C( 32), UINT8_C(111), UINT8_C( 9), UINT8_C(139), UINT8_C(209)) }, { simde_x_mm256_set_epu8(UINT8_C(184), UINT8_C( 63), UINT8_C( 95), UINT8_C(164), UINT8_C( 65), UINT8_C( 71), UINT8_C(174), UINT8_C( 88), UINT8_C(183), UINT8_C(142), UINT8_C( 98), UINT8_C( 14), UINT8_C( 25), UINT8_C(173), UINT8_C( 87), UINT8_C( 2), UINT8_C(191), UINT8_C(143), UINT8_C(152), UINT8_C( 2), UINT8_C(126), UINT8_C( 0), UINT8_C(162), UINT8_C( 57), UINT8_C(245), UINT8_C( 36), UINT8_C(239), UINT8_C( 54), UINT8_C( 33), UINT8_C(165), UINT8_C(199), UINT8_C( 84)), simde_x_mm256_set_epu8(UINT8_C(174), UINT8_C( 60), UINT8_C(132), UINT8_C(208), UINT8_C( 58), UINT8_C(178), UINT8_C(116), UINT8_C(240), UINT8_C( 37), UINT8_C(131), UINT8_C(100), UINT8_C(177), UINT8_C( 19), UINT8_C(102), UINT8_C( 81), UINT8_C( 86), UINT8_C( 25), UINT8_C( 43), UINT8_C( 51), UINT8_C(140), UINT8_C( 9), UINT8_C( 40), UINT8_C(227), UINT8_C( 75), UINT8_C(208), UINT8_C(159), UINT8_C(175), UINT8_C(109), UINT8_C(230), UINT8_C( 87), UINT8_C(254), UINT8_C(216)), simde_x_mm256_set_epu8(UINT8_C( 10), UINT8_C( 3), UINT8_C( 95), UINT8_C(164), UINT8_C( 7), UINT8_C( 71), UINT8_C( 58), UINT8_C( 88), UINT8_C( 35), UINT8_C( 11), UINT8_C( 98), UINT8_C( 14), UINT8_C( 6), UINT8_C( 71), UINT8_C( 6), UINT8_C( 2), UINT8_C( 16), UINT8_C( 14), UINT8_C( 50), UINT8_C( 2), UINT8_C( 0), UINT8_C( 0), UINT8_C(162), UINT8_C( 57), UINT8_C( 37), UINT8_C( 36), UINT8_C( 64), UINT8_C( 54), UINT8_C( 33), UINT8_C( 78), UINT8_C(199), UINT8_C( 84)) }, { simde_x_mm256_set_epu8(UINT8_C( 54), UINT8_C( 43), UINT8_C(109), UINT8_C(187), UINT8_C(138), UINT8_C( 62), UINT8_C(222), UINT8_C(154), UINT8_C(123), UINT8_C( 21), UINT8_C(247), UINT8_C( 99), UINT8_C( 37), UINT8_C( 48), UINT8_C(116), UINT8_C(233), UINT8_C( 95), UINT8_C(251), UINT8_C(147), UINT8_C(109), UINT8_C(205), UINT8_C(206), UINT8_C( 57), UINT8_C( 17), UINT8_C(121), UINT8_C( 25), UINT8_C( 3), UINT8_C( 55), UINT8_C(178), UINT8_C(129), UINT8_C(149), UINT8_C(207)), simde_x_mm256_set_epu8(UINT8_C(131), UINT8_C( 42), UINT8_C(151), UINT8_C(210), UINT8_C( 12), UINT8_C(163), UINT8_C(138), UINT8_C(207), UINT8_C( 43), UINT8_C( 57), UINT8_C( 61), UINT8_C( 62), UINT8_C( 81), UINT8_C(184), UINT8_C( 6), UINT8_C( 93), UINT8_C(167), UINT8_C( 1), UINT8_C(145), UINT8_C( 9), UINT8_C( 4), UINT8_C( 17), UINT8_C( 10), UINT8_C(101), UINT8_C(186), UINT8_C(181), UINT8_C(155), UINT8_C(243), UINT8_C(189), UINT8_C(191), UINT8_C(222), UINT8_C(205)), simde_x_mm256_set_epu8(UINT8_C( 54), UINT8_C( 1), UINT8_C(109), UINT8_C(187), UINT8_C( 6), UINT8_C( 62), UINT8_C( 84), UINT8_C(154), UINT8_C( 37), UINT8_C( 21), UINT8_C( 3), UINT8_C( 37), UINT8_C( 37), UINT8_C( 48), UINT8_C( 2), UINT8_C( 47), UINT8_C( 95), UINT8_C( 0), UINT8_C( 2), UINT8_C( 1), UINT8_C( 1), UINT8_C( 2), UINT8_C( 7), UINT8_C( 17), UINT8_C(121), UINT8_C( 25), UINT8_C( 3), UINT8_C( 55), UINT8_C(178), UINT8_C(129), UINT8_C(149), UINT8_C( 2)) }, { simde_x_mm256_set_epu8(UINT8_C( 23), UINT8_C(132), UINT8_C(106), UINT8_C(109), UINT8_C(135), UINT8_C(203), UINT8_C( 98), UINT8_C(120), UINT8_C(101), UINT8_C( 52), UINT8_C( 82), UINT8_C( 44), UINT8_C(142), UINT8_C( 14), UINT8_C( 99), UINT8_C(245), UINT8_C( 8), UINT8_C(140), UINT8_C(141), UINT8_C(123), UINT8_C(219), UINT8_C(163), UINT8_C(196), UINT8_C(233), UINT8_C( 34), UINT8_C(185), UINT8_C(228), UINT8_C(108), UINT8_C( 95), UINT8_C(236), UINT8_C( 97), UINT8_C( 41)), simde_x_mm256_set_epu8(UINT8_C(125), UINT8_C(229), UINT8_C(203), UINT8_C( 45), UINT8_C( 24), UINT8_C( 5), UINT8_C( 90), UINT8_C( 83), UINT8_C(145), UINT8_C( 85), UINT8_C(156), UINT8_C(164), UINT8_C(149), UINT8_C(201), UINT8_C( 48), UINT8_C(255), UINT8_C( 41), UINT8_C( 42), UINT8_C( 94), UINT8_C(129), UINT8_C(135), UINT8_C( 8), UINT8_C( 12), UINT8_C(203), UINT8_C(128), UINT8_C(202), UINT8_C(148), UINT8_C(252), UINT8_C(104), UINT8_C(208), UINT8_C( 98), UINT8_C(162)), simde_x_mm256_set_epu8(UINT8_C( 23), UINT8_C(132), UINT8_C(106), UINT8_C( 19), UINT8_C( 15), UINT8_C( 3), UINT8_C( 8), UINT8_C( 37), UINT8_C(101), UINT8_C( 52), UINT8_C( 82), UINT8_C( 44), UINT8_C(142), UINT8_C( 14), UINT8_C( 3), UINT8_C(245), UINT8_C( 8), UINT8_C( 14), UINT8_C( 47), UINT8_C(123), UINT8_C( 84), UINT8_C( 3), UINT8_C( 4), UINT8_C( 30), UINT8_C( 34), UINT8_C(185), UINT8_C( 80), UINT8_C(108), UINT8_C( 95), UINT8_C( 28), UINT8_C( 97), UINT8_C( 41)) }, { simde_x_mm256_set_epu8(UINT8_C(162), UINT8_C( 31), UINT8_C(168), UINT8_C( 17), UINT8_C( 50), UINT8_C(110), UINT8_C(231), UINT8_C(216), UINT8_C( 94), UINT8_C( 20), UINT8_C(163), UINT8_C(183), UINT8_C(157), UINT8_C( 16), UINT8_C( 91), UINT8_C( 54), UINT8_C( 62), UINT8_C( 81), UINT8_C(159), UINT8_C(151), UINT8_C( 57), UINT8_C( 12), UINT8_C(118), UINT8_C( 33), UINT8_C(180), UINT8_C(139), UINT8_C( 1), UINT8_C( 5), UINT8_C( 78), UINT8_C( 13), UINT8_C( 93), UINT8_C(155)), simde_x_mm256_set_epu8(UINT8_C(193), UINT8_C(230), UINT8_C( 93), UINT8_C( 23), UINT8_C(193), UINT8_C( 52), UINT8_C(223), UINT8_C(175), UINT8_C(205), UINT8_C( 45), UINT8_C(166), UINT8_C( 24), UINT8_C( 71), UINT8_C(234), UINT8_C(161), UINT8_C(142), UINT8_C(184), UINT8_C(218), UINT8_C(190), UINT8_C(212), UINT8_C(116), UINT8_C(159), UINT8_C( 44), UINT8_C( 55), UINT8_C(213), UINT8_C(133), UINT8_C( 60), UINT8_C( 3), UINT8_C( 58), UINT8_C(255), UINT8_C(125), UINT8_C(189)), simde_x_mm256_set_epu8(UINT8_C(162), UINT8_C( 31), UINT8_C( 75), UINT8_C( 17), UINT8_C( 50), UINT8_C( 6), UINT8_C( 8), UINT8_C( 41), UINT8_C( 94), UINT8_C( 20), UINT8_C(163), UINT8_C( 15), UINT8_C( 15), UINT8_C( 16), UINT8_C( 91), UINT8_C( 54), UINT8_C( 62), UINT8_C( 81), UINT8_C(159), UINT8_C(151), UINT8_C( 57), UINT8_C( 12), UINT8_C( 30), UINT8_C( 33), UINT8_C(180), UINT8_C( 6), UINT8_C( 1), UINT8_C( 2), UINT8_C( 20), UINT8_C( 13), UINT8_C( 93), UINT8_C(155)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_rem_epu8(test_vec[i].a, test_vec[i].b); simde_assert_m256i_u8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_rem_epu16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_x_mm256_set_epu16(UINT16_C( 49063), UINT16_C( 57920), UINT16_C( 54730), UINT16_C( 28273), UINT16_C( 23021), UINT16_C( 18146), UINT16_C( 33883), UINT16_C( 65368), UINT16_C( 26666), UINT16_C( 13822), UINT16_C( 34046), UINT16_C( 24651), UINT16_C( 8048), UINT16_C( 38825), UINT16_C( 44126), UINT16_C( 28762)), simde_x_mm256_set_epu16(UINT16_C( 10545), UINT16_C( 43974), UINT16_C( 10284), UINT16_C( 62003), UINT16_C( 35545), UINT16_C( 55289), UINT16_C( 51493), UINT16_C( 35101), UINT16_C( 59818), UINT16_C( 61822), UINT16_C( 46602), UINT16_C( 53446), UINT16_C( 23938), UINT16_C( 50097), UINT16_C( 48095), UINT16_C( 35837)), simde_x_mm256_set_epu16(UINT16_C( 6883), UINT16_C( 13946), UINT16_C( 3310), UINT16_C( 28273), UINT16_C( 23021), UINT16_C( 18146), UINT16_C( 33883), UINT16_C( 30267), UINT16_C( 26666), UINT16_C( 13822), UINT16_C( 34046), UINT16_C( 24651), UINT16_C( 8048), UINT16_C( 38825), UINT16_C( 44126), UINT16_C( 28762)) }, { simde_x_mm256_set_epu16(UINT16_C( 18192), UINT16_C( 32532), UINT16_C( 33700), UINT16_C( 60373), UINT16_C( 20183), UINT16_C( 64042), UINT16_C( 2502), UINT16_C( 18488), UINT16_C( 22771), UINT16_C( 21470), UINT16_C( 4556), UINT16_C( 26138), UINT16_C( 19085), UINT16_C( 64613), UINT16_C( 55602), UINT16_C( 63371)), simde_x_mm256_set_epu16(UINT16_C( 38607), UINT16_C( 8074), UINT16_C( 18000), UINT16_C( 35687), UINT16_C( 40415), UINT16_C( 3254), UINT16_C( 55282), UINT16_C( 38855), UINT16_C( 41330), UINT16_C( 37148), UINT16_C( 25803), UINT16_C( 25877), UINT16_C( 768), UINT16_C( 16244), UINT16_C( 11114), UINT16_C( 58324)), simde_x_mm256_set_epu16(UINT16_C( 18192), UINT16_C( 236), UINT16_C( 15700), UINT16_C( 24686), UINT16_C( 20183), UINT16_C( 2216), UINT16_C( 2502), UINT16_C( 18488), UINT16_C( 22771), UINT16_C( 21470), UINT16_C( 4556), UINT16_C( 261), UINT16_C( 653), UINT16_C( 15881), UINT16_C( 32), UINT16_C( 5047)) }, { simde_x_mm256_set_epu16(UINT16_C( 7525), UINT16_C( 3321), UINT16_C( 47299), UINT16_C( 64213), UINT16_C( 13644), UINT16_C( 48153), UINT16_C( 45234), UINT16_C( 51700), UINT16_C( 7513), UINT16_C( 1114), UINT16_C( 65336), UINT16_C( 10389), UINT16_C( 33688), UINT16_C( 9445), UINT16_C( 60332), UINT16_C( 41466)), simde_x_mm256_set_epu16(UINT16_C( 20057), UINT16_C( 26978), UINT16_C( 45741), UINT16_C( 34503), UINT16_C( 54259), UINT16_C( 41436), UINT16_C( 43883), UINT16_C( 11009), UINT16_C( 50212), UINT16_C( 9014), UINT16_C( 24117), UINT16_C( 34039), UINT16_C( 58348), UINT16_C( 8311), UINT16_C( 31759), UINT16_C( 4002)), simde_x_mm256_set_epu16(UINT16_C( 7525), UINT16_C( 3321), UINT16_C( 1558), UINT16_C( 29710), UINT16_C( 13644), UINT16_C( 6717), UINT16_C( 1351), UINT16_C( 7664), UINT16_C( 7513), UINT16_C( 1114), UINT16_C( 17102), UINT16_C( 10389), UINT16_C( 33688), UINT16_C( 1134), UINT16_C( 28573), UINT16_C( 1446)) }, { simde_x_mm256_set_epu16(UINT16_C( 26094), UINT16_C( 52354), UINT16_C( 30122), UINT16_C( 47688), UINT16_C( 43801), UINT16_C( 57764), UINT16_C( 1809), UINT16_C( 33603), UINT16_C( 8271), UINT16_C( 4936), UINT16_C( 7627), UINT16_C( 20477), UINT16_C( 14608), UINT16_C( 25470), UINT16_C( 45836), UINT16_C( 25611)), simde_x_mm256_set_epu16(UINT16_C( 48157), UINT16_C( 56913), UINT16_C( 55050), UINT16_C( 48859), UINT16_C( 27895), UINT16_C( 48343), UINT16_C( 59593), UINT16_C( 60425), UINT16_C( 62587), UINT16_C( 54231), UINT16_C( 52444), UINT16_C( 8140), UINT16_C( 58695), UINT16_C( 2476), UINT16_C( 41101), UINT16_C( 7948)), simde_x_mm256_set_epu16(UINT16_C( 26094), UINT16_C( 52354), UINT16_C( 30122), UINT16_C( 47688), UINT16_C( 15906), UINT16_C( 9421), UINT16_C( 1809), UINT16_C( 33603), UINT16_C( 8271), UINT16_C( 4936), UINT16_C( 7627), UINT16_C( 4197), UINT16_C( 14608), UINT16_C( 710), UINT16_C( 4735), UINT16_C( 1767)) }, { simde_x_mm256_set_epu16(UINT16_C( 26466), UINT16_C( 21183), UINT16_C( 5811), UINT16_C( 17016), UINT16_C( 51162), UINT16_C( 46775), UINT16_C( 54252), UINT16_C( 64603), UINT16_C( 30444), UINT16_C( 20573), UINT16_C( 50572), UINT16_C( 25607), UINT16_C( 36721), UINT16_C( 36797), UINT16_C( 27147), UINT16_C( 62271)), simde_x_mm256_set_epu16(UINT16_C( 26902), UINT16_C( 51011), UINT16_C( 57631), UINT16_C( 57521), UINT16_C( 43405), UINT16_C( 18318), UINT16_C( 44023), UINT16_C( 9770), UINT16_C( 4118), UINT16_C( 33099), UINT16_C( 6621), UINT16_C( 57639), UINT16_C( 22002), UINT16_C( 33155), UINT16_C( 15537), UINT16_C( 38743)), simde_x_mm256_set_epu16(UINT16_C( 26466), UINT16_C( 21183), UINT16_C( 5811), UINT16_C( 17016), UINT16_C( 7757), UINT16_C( 10139), UINT16_C( 10229), UINT16_C( 5983), UINT16_C( 1618), UINT16_C( 20573), UINT16_C( 4225), UINT16_C( 25607), UINT16_C( 14719), UINT16_C( 3642), UINT16_C( 11610), UINT16_C( 23528)) }, { simde_x_mm256_set_epu16(UINT16_C( 59998), UINT16_C( 61452), UINT16_C( 37377), UINT16_C( 37691), UINT16_C( 64794), UINT16_C( 6696), UINT16_C( 3074), UINT16_C( 59025), UINT16_C( 43625), UINT16_C( 28576), UINT16_C( 36042), UINT16_C( 42716), UINT16_C( 47937), UINT16_C( 64195), UINT16_C( 8579), UINT16_C( 676)), simde_x_mm256_set_epu16(UINT16_C( 55381), UINT16_C( 52839), UINT16_C( 60314), UINT16_C( 33159), UINT16_C( 32076), UINT16_C( 51820), UINT16_C( 13383), UINT16_C( 43204), UINT16_C( 18058), UINT16_C( 42817), UINT16_C( 56737), UINT16_C( 40285), UINT16_C( 49341), UINT16_C( 39323), UINT16_C( 53205), UINT16_C( 27016)), simde_x_mm256_set_epu16(UINT16_C( 4617), UINT16_C( 8613), UINT16_C( 37377), UINT16_C( 4532), UINT16_C( 642), UINT16_C( 6696), UINT16_C( 3074), UINT16_C( 15821), UINT16_C( 7509), UINT16_C( 28576), UINT16_C( 36042), UINT16_C( 2431), UINT16_C( 47937), UINT16_C( 24872), UINT16_C( 8579), UINT16_C( 676)) }, { simde_x_mm256_set_epu16(UINT16_C( 13886), UINT16_C( 28688), UINT16_C( 30551), UINT16_C( 36608), UINT16_C( 56045), UINT16_C( 38987), UINT16_C( 64798), UINT16_C( 22350), UINT16_C( 7981), UINT16_C( 50477), UINT16_C( 46688), UINT16_C( 16804), UINT16_C( 33660), UINT16_C( 63749), UINT16_C( 29649), UINT16_C( 64815)), simde_x_mm256_set_epu16(UINT16_C( 7566), UINT16_C( 25511), UINT16_C( 59705), UINT16_C( 13989), UINT16_C( 13965), UINT16_C( 34471), UINT16_C( 77), UINT16_C( 35152), UINT16_C( 21705), UINT16_C( 42504), UINT16_C( 63033), UINT16_C( 56884), UINT16_C( 42389), UINT16_C( 61527), UINT16_C( 7598), UINT16_C( 23051)), simde_x_mm256_set_epu16(UINT16_C( 6320), UINT16_C( 3177), UINT16_C( 30551), UINT16_C( 8630), UINT16_C( 185), UINT16_C( 4516), UINT16_C( 41), UINT16_C( 22350), UINT16_C( 7981), UINT16_C( 7973), UINT16_C( 46688), UINT16_C( 16804), UINT16_C( 33660), UINT16_C( 2222), UINT16_C( 6855), UINT16_C( 18713)) }, { simde_x_mm256_set_epu16(UINT16_C( 26789), UINT16_C( 40241), UINT16_C( 34076), UINT16_C( 36189), UINT16_C( 49507), UINT16_C( 32891), UINT16_C( 45700), UINT16_C( 31541), UINT16_C( 33237), UINT16_C( 50719), UINT16_C( 22782), UINT16_C( 46902), UINT16_C( 62792), UINT16_C( 907), UINT16_C( 9939), UINT16_C( 395)), simde_x_mm256_set_epu16(UINT16_C( 18409), UINT16_C( 19069), UINT16_C( 20979), UINT16_C( 35774), UINT16_C( 8112), UINT16_C( 25085), UINT16_C( 31664), UINT16_C( 55404), UINT16_C( 63329), UINT16_C( 19403), UINT16_C( 33006), UINT16_C( 20365), UINT16_C( 22045), UINT16_C( 41935), UINT16_C( 28665), UINT16_C( 35793)), simde_x_mm256_set_epu16(UINT16_C( 8380), UINT16_C( 2103), UINT16_C( 13097), UINT16_C( 415), UINT16_C( 835), UINT16_C( 7806), UINT16_C( 14036), UINT16_C( 31541), UINT16_C( 33237), UINT16_C( 11913), UINT16_C( 22782), UINT16_C( 6172), UINT16_C( 18702), UINT16_C( 907), UINT16_C( 9939), UINT16_C( 395)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_rem_epu16(test_vec[i].a, test_vec[i].b); simde_assert_m256i_u16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_rem_epu32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_x_mm256_set_epu32(UINT32_C(3215450688), UINT32_C(3586813553), UINT32_C(1508722402), UINT32_C(2220621656), UINT32_C(1747596798), UINT32_C(2231263307), UINT32_C( 527472553), UINT32_C(2891870298)), simde_x_mm256_set_epu32(UINT32_C( 691121094), UINT32_C( 674034227), UINT32_C(2329532409), UINT32_C(3374680349), UINT32_C(3920294270), UINT32_C(3054162118), UINT32_C(1568850865), UINT32_C(3151989757)), simde_x_mm256_set_epu32(UINT32_C( 450966312), UINT32_C( 216642418), UINT32_C(1508722402), UINT32_C(2220621656), UINT32_C(1747596798), UINT32_C(2231263307), UINT32_C( 527472553), UINT32_C(2891870298)) }, { simde_x_mm256_set_epu32(UINT32_C(1192263444), UINT32_C(2208623573), UINT32_C(1322777130), UINT32_C( 163989560), UINT32_C(1492341726), UINT32_C( 298608154), UINT32_C(1250819173), UINT32_C(3643996043)), simde_x_mm256_set_epu32(UINT32_C(2530156426), UINT32_C(1179683687), UINT32_C(2648640694), UINT32_C(3623000007), UINT32_C(2708640028), UINT32_C(1691051285), UINT32_C( 50347892), UINT32_C( 728425428)), simde_x_mm256_set_epu32(UINT32_C(1192263444), UINT32_C(1028939886), UINT32_C(1322777130), UINT32_C( 163989560), UINT32_C(1492341726), UINT32_C( 298608154), UINT32_C( 42469765), UINT32_C( 1868903)) }, { simde_x_mm256_set_epu32(UINT32_C( 493161721), UINT32_C(3099851477), UINT32_C( 894221337), UINT32_C(2964507124), UINT32_C( 492373082), UINT32_C(4281870485), UINT32_C(2207786213), UINT32_C(3953959418)), simde_x_mm256_set_epu32(UINT32_C(1314482530), UINT32_C(2997716679), UINT32_C(3555959260), UINT32_C(2875927297), UINT32_C(3290702646), UINT32_C(1580565751), UINT32_C(3823902839), UINT32_C(2081361826)), simde_x_mm256_set_epu32(UINT32_C( 493161721), UINT32_C( 102134798), UINT32_C( 894221337), UINT32_C( 88579827), UINT32_C( 492373082), UINT32_C(1120738983), UINT32_C(2207786213), UINT32_C(1872597592)) }, { simde_x_mm256_set_epu32(UINT32_C(1710148738), UINT32_C(1974123080), UINT32_C(2870600100), UINT32_C( 118588227), UINT32_C( 542053192), UINT32_C( 499863549), UINT32_C( 957375358), UINT32_C(3003933707)), simde_x_mm256_set_epu32(UINT32_C(3156074065), UINT32_C(3607805659), UINT32_C(1828175063), UINT32_C(3905547273), UINT32_C(4101755863), UINT32_C(3436978124), UINT32_C(3846637996), UINT32_C(2693603084)), simde_x_mm256_set_epu32(UINT32_C(1710148738), UINT32_C(1974123080), UINT32_C(1042425037), UINT32_C( 118588227), UINT32_C( 542053192), UINT32_C( 499863549), UINT32_C( 957375358), UINT32_C( 310330623)) }, { simde_x_mm256_set_epu32(UINT32_C(1734496959), UINT32_C( 380846712), UINT32_C(3352999607), UINT32_C(3555523675), UINT32_C(1995198557), UINT32_C(3314312199), UINT32_C(2406584253), UINT32_C(1779168063)), simde_x_mm256_set_epu32(UINT32_C(1763100483), UINT32_C(3776962737), UINT32_C(2844608398), UINT32_C(2885101098), UINT32_C( 269910347), UINT32_C( 433971495), UINT32_C(1441956227), UINT32_C(1018271575)), simde_x_mm256_set_epu32(UINT32_C(1734496959), UINT32_C( 380846712), UINT32_C( 508391209), UINT32_C( 670422577), UINT32_C( 105826128), UINT32_C( 276511734), UINT32_C( 964628026), UINT32_C( 760896488)) }, { simde_x_mm256_set_epu32(UINT32_C(3932090380), UINT32_C(2449576763), UINT32_C(4246346280), UINT32_C( 201516689), UINT32_C(2859036576), UINT32_C(2362091228), UINT32_C(3141663427), UINT32_C( 562234020)), simde_x_mm256_set_epu32(UINT32_C(3629502055), UINT32_C(3952771463), UINT32_C(2102184556), UINT32_C( 877111492), UINT32_C(1183491905), UINT32_C(3718356317), UINT32_C(3233651099), UINT32_C(3486869896)), simde_x_mm256_set_epu32(UINT32_C( 302588325), UINT32_C(2449576763), UINT32_C( 41977168), UINT32_C( 201516689), UINT32_C( 492052766), UINT32_C(2362091228), UINT32_C(3141663427), UINT32_C( 562234020)) }, { simde_x_mm256_set_epu32(UINT32_C( 910061584), UINT32_C(2002226944), UINT32_C(3673004107), UINT32_C(4246624078), UINT32_C( 523093293), UINT32_C(3059761572), UINT32_C(2206005509), UINT32_C(1943141679)), simde_x_mm256_set_epu32(UINT32_C( 495870887), UINT32_C(3912840869), UINT32_C( 915244711), UINT32_C( 5081424), UINT32_C(1422501384), UINT32_C(4130987572), UINT32_C(2778067031), UINT32_C( 497965579)), simde_x_mm256_set_epu32(UINT32_C( 414190697), UINT32_C(2002226944), UINT32_C( 12025263), UINT32_C( 3635038), UINT32_C( 523093293), UINT32_C(3059761572), UINT32_C(2206005509), UINT32_C( 449244942)) }, { simde_x_mm256_set_epu32(UINT32_C(1755684145), UINT32_C(2233240925), UINT32_C(3244523643), UINT32_C(2995026741), UINT32_C(2178270751), UINT32_C(1493088054), UINT32_C(4115137419), UINT32_C( 651362699)), simde_x_mm256_set_epu32(UINT32_C(1206471293), UINT32_C(1374915518), UINT32_C( 531653117), UINT32_C(2075187308), UINT32_C(4150348747), UINT32_C(2163101581), UINT32_C(1444783055), UINT32_C(1878625233)), simde_x_mm256_set_epu32(UINT32_C( 549212852), UINT32_C( 858325407), UINT32_C( 54604941), UINT32_C( 919839433), UINT32_C(2178270751), UINT32_C(1493088054), UINT32_C(1225571309), UINT32_C( 651362699)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_rem_epu32(test_vec[i].a, test_vec[i].b); simde_assert_m256i_u32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_rem_epu32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask16 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_x_mm512_set_epu32(UINT32_C( 691121094), UINT32_C( 674034227), UINT32_C(2329532409), UINT32_C(3374680349), UINT32_C(3920294270), UINT32_C(3054162118), UINT32_C(1568850865), UINT32_C(3151989757), UINT32_C(3215450688), UINT32_C(3586813553), UINT32_C(1508722402), UINT32_C(2220621656), UINT32_C(1747596798), UINT32_C(2231263307), UINT32_C( 527472553), UINT32_C(2891870298)), UINT16_C(63371), simde_x_mm512_set_epu32(UINT32_C(3953959418), UINT32_C(2530156426), UINT32_C(1179683687), UINT32_C(2648640694), UINT32_C(3623000007), UINT32_C(2708640028), UINT32_C(1691051285), UINT32_C( 50347892), UINT32_C( 728425428), UINT32_C(1192263444), UINT32_C(2208623573), UINT32_C(1322777130), UINT32_C( 163989560), UINT32_C(1492341726), UINT32_C( 298608154), UINT32_C(1250819173)), simde_x_mm512_set_epu32(UINT32_C(3003933707), UINT32_C(1314482530), UINT32_C(2997716679), UINT32_C(3555959260), UINT32_C(2875927297), UINT32_C(3290702646), UINT32_C(1580565751), UINT32_C(3823902839), UINT32_C(2081361826), UINT32_C( 493161721), UINT32_C(3099851477), UINT32_C( 894221337), UINT32_C(2964507124), UINT32_C( 492373082), UINT32_C(4281870485), UINT32_C(2207786213)), simde_x_mm512_set_epu32(UINT32_C( 950025711), UINT32_C(1215673896), UINT32_C(1179683687), UINT32_C(2648640694), UINT32_C(3920294270), UINT32_C(2708640028), UINT32_C( 110485534), UINT32_C( 50347892), UINT32_C( 728425428), UINT32_C(3586813553), UINT32_C(1508722402), UINT32_C(2220621656), UINT32_C( 163989560), UINT32_C(2231263307), UINT32_C( 298608154), UINT32_C(1250819173)) }, { simde_x_mm512_set_epu32(UINT32_C(1779168063), UINT32_C(3156074065), UINT32_C(3607805659), UINT32_C(1828175063), UINT32_C(3905547273), UINT32_C(4101755863), UINT32_C(3436978124), UINT32_C(3846637996), UINT32_C(2693603084), UINT32_C(1710148738), UINT32_C(1974123080), UINT32_C(2870600100), UINT32_C( 118588227), UINT32_C( 542053192), UINT32_C( 499863549), UINT32_C( 957375358)), UINT16_C(36797), simde_x_mm512_set_epu32(UINT32_C(3141663427), UINT32_C( 562234020), UINT32_C(1763100483), UINT32_C(3776962737), UINT32_C(2844608398), UINT32_C(2885101098), UINT32_C( 269910347), UINT32_C( 433971495), UINT32_C(1441956227), UINT32_C(1018271575), UINT32_C(1734496959), UINT32_C( 380846712), UINT32_C(3352999607), UINT32_C(3555523675), UINT32_C(1995198557), UINT32_C(3314312199)), simde_x_mm512_set_epu32(UINT32_C(2206005509), UINT32_C(1943141679), UINT32_C(3629502055), UINT32_C(3952771463), UINT32_C(2102184556), UINT32_C( 877111492), UINT32_C(1183491905), UINT32_C(3718356317), UINT32_C(3233651099), UINT32_C(3486869896), UINT32_C(3932090380), UINT32_C(2449576763), UINT32_C(4246346280), UINT32_C( 201516689), UINT32_C(2859036576), UINT32_C(2362091228)), simde_x_mm512_set_epu32(UINT32_C( 935657918), UINT32_C(3156074065), UINT32_C(3607805659), UINT32_C(1828175063), UINT32_C( 742423842), UINT32_C( 253766622), UINT32_C( 269910347), UINT32_C( 433971495), UINT32_C(1441956227), UINT32_C(1710148738), UINT32_C(1734496959), UINT32_C( 380846712), UINT32_C(3352999607), UINT32_C( 129739962), UINT32_C( 499863549), UINT32_C( 952220971)) }, { simde_x_mm512_set_epu32(UINT32_C(4115137419), UINT32_C( 651362699), UINT32_C( 495870887), UINT32_C(3912840869), UINT32_C( 915244711), UINT32_C( 5081424), UINT32_C(1422501384), UINT32_C(4130987572), UINT32_C(2778067031), UINT32_C( 497965579), UINT32_C( 910061584), UINT32_C(2002226944), UINT32_C(3673004107), UINT32_C(4246624078), UINT32_C( 523093293), UINT32_C(3059761572)), UINT16_C(46902), simde_x_mm512_set_epu32(UINT32_C(4074346392), UINT32_C(1398655610), UINT32_C(1722520923), UINT32_C(1206471293), UINT32_C(1374915518), UINT32_C( 531653117), UINT32_C(2075187308), UINT32_C(4150348747), UINT32_C(2163101581), UINT32_C(1444783055), UINT32_C(1878625233), UINT32_C(1755684145), UINT32_C(2233240925), UINT32_C(3244523643), UINT32_C(2995026741), UINT32_C(2178270751)), simde_x_mm512_set_epu32(UINT32_C(3188873807), UINT32_C(1982658188), UINT32_C( 863153207), UINT32_C(2657690668), UINT32_C( 448681074), UINT32_C(1334667053), UINT32_C( 502667641), UINT32_C( 855395764), UINT32_C(2622874348), UINT32_C( 808531712), UINT32_C( 454488139), UINT32_C( 123547093), UINT32_C( 483090439), UINT32_C(3168637539), UINT32_C(3093747107), UINT32_C(4158916667)), simde_x_mm512_set_epu32(UINT32_C( 885472585), UINT32_C( 651362699), UINT32_C( 859367716), UINT32_C(1206471293), UINT32_C( 915244711), UINT32_C( 531653117), UINT32_C( 64516744), UINT32_C( 728765691), UINT32_C(2778067031), UINT32_C( 497965579), UINT32_C( 60672677), UINT32_C( 26024843), UINT32_C(3673004107), UINT32_C( 75886104), UINT32_C(2995026741), UINT32_C(3059761572)) }, { simde_x_mm512_set_epu32(UINT32_C(2113970745), UINT32_C(4112838454), UINT32_C( 564512596), UINT32_C( 604721400), UINT32_C(1471174399), UINT32_C(2491026588), UINT32_C(2529574367), UINT32_C( 298473775), UINT32_C(2890366559), UINT32_C(3063632375), UINT32_C(4055983958), UINT32_C(4149169500), UINT32_C(4113948134), UINT32_C(2384487126), UINT32_C(2434207126), UINT32_C(3923111671)), UINT16_C(38914), simde_x_mm512_set_epu32(UINT32_C(1533151625), UINT32_C(2122196136), UINT32_C(1690360675), UINT32_C(1484935627), UINT32_C(1463758672), UINT32_C( 602211615), UINT32_C(3830002991), UINT32_C(2864741101), UINT32_C( 797104998), UINT32_C(2737423319), UINT32_C(3342229886), UINT32_C( 178625368), UINT32_C(3091160996), UINT32_C(1095216728), UINT32_C(3079561742), UINT32_C( 430790402)), simde_x_mm512_set_epu32(UINT32_C(4043825594), UINT32_C(1274901810), UINT32_C( 413860084), UINT32_C( 550494320), UINT32_C(1997049765), UINT32_C( 505563651), UINT32_C( 463125220), UINT32_C(3843753777), UINT32_C(2346173843), UINT32_C(2157864934), UINT32_C(2591157969), UINT32_C( 389679318), UINT32_C(3939775129), UINT32_C(2493364907), UINT32_C(2006619059), UINT32_C(3391409164)), simde_x_mm512_set_epu32(UINT32_C(1533151625), UINT32_C(4112838454), UINT32_C( 564512596), UINT32_C( 383946987), UINT32_C(1463758672), UINT32_C(2491026588), UINT32_C(2529574367), UINT32_C( 298473775), UINT32_C(2890366559), UINT32_C(3063632375), UINT32_C(4055983958), UINT32_C(4149169500), UINT32_C(4113948134), UINT32_C(2384487126), UINT32_C(1072942683), UINT32_C(3923111671)) }, { simde_x_mm512_set_epu32(UINT32_C(1572579389), UINT32_C(3511888959), UINT32_C(2399346014), UINT32_C(1967093325), UINT32_C( 908815803), UINT32_C(2319376026), UINT32_C(2065037155), UINT32_C( 623932649), UINT32_C(1610322797), UINT32_C(3452844305), UINT32_C(2031682359), UINT32_C(2994836943), UINT32_C(2344919086), UINT32_C( 238137788), UINT32_C(1978166020), UINT32_C( 76768592)), UINT16_C( 883), simde_x_mm512_set_epu32(UINT32_C(3284847806), UINT32_C(3884897233), UINT32_C(2094036024), UINT32_C(2456834182), UINT32_C( 69201629), UINT32_C(1228958503), UINT32_C(3519587969), UINT32_C(2809504529), UINT32_C(3115789449), UINT32_C(1767270276), UINT32_C( 490610321), UINT32_C(1164436618), UINT32_C(2374669797), UINT32_C(3604002618), UINT32_C(3414719029), UINT32_C(2289333019)), simde_x_mm512_set_epu32(UINT32_C(2383307765), UINT32_C( 143428987), UINT32_C(3684943081), UINT32_C( 582607980), UINT32_C(1609326889), UINT32_C(1245407235), UINT32_C(4175005098), UINT32_C(2362914327), UINT32_C(2924553042), UINT32_C(2369006988), UINT32_C(2119408419), UINT32_C(3091878410), UINT32_C(3978436943), UINT32_C(1708684203), UINT32_C(1202455481), UINT32_C(2187745469)), simde_x_mm512_set_epu32(UINT32_C(1572579389), UINT32_C(3511888959), UINT32_C(2399346014), UINT32_C(1967093325), UINT32_C( 908815803), UINT32_C(2319376026), UINT32_C(3519587969), UINT32_C( 446590202), UINT32_C(1610322797), UINT32_C(1767270276), UINT32_C( 490610321), UINT32_C(1164436618), UINT32_C(2344919086), UINT32_C( 238137788), UINT32_C(1009808067), UINT32_C( 101587550)) }, { simde_x_mm512_set_epu32(UINT32_C(2117071873), UINT32_C(2857077767), UINT32_C(3918893192), UINT32_C(1087893388), UINT32_C(3851784011), UINT32_C(3914271744), UINT32_C( 565328458), UINT32_C(4201942548), UINT32_C(1480532604), UINT32_C(4197506536), UINT32_C(3712719696), UINT32_C(3920217826), UINT32_C(1394313506), UINT32_C( 394553965), UINT32_C(2278253176), UINT32_C(1697927724)), UINT16_C(12254), simde_x_mm512_set_epu32(UINT32_C( 56443211), UINT32_C(2258452653), UINT32_C(3784696472), UINT32_C(1139427205), UINT32_C(1090384090), UINT32_C(2389735891), UINT32_C(2215607313), UINT32_C(3817672405), UINT32_C(3621770268), UINT32_C(2071747620), UINT32_C(3852178197), UINT32_C(3693632585), UINT32_C( 319530416), UINT32_C(2179954815), UINT32_C(3793236393), UINT32_C( 340519338)), simde_x_mm512_set_epu32(UINT32_C(1219537084), UINT32_C(1349635715), UINT32_C( 732887738), UINT32_C(2566325375), UINT32_C(2906533885), UINT32_C(1765754685), UINT32_C(2719983633), UINT32_C( 846129112), UINT32_C(1578410935), UINT32_C(2635094838), UINT32_C(1045536663), UINT32_C( 957117985), UINT32_C(3029008645), UINT32_C(1309498779), UINT32_C(3293951997), UINT32_C(1022360677)), simde_x_mm512_set_epu32(UINT32_C(2117071873), UINT32_C(2857077767), UINT32_C( 120257782), UINT32_C(1087893388), UINT32_C(1090384090), UINT32_C( 623981206), UINT32_C(2215607313), UINT32_C( 433155957), UINT32_C( 464948398), UINT32_C(2071747620), UINT32_C(3712719696), UINT32_C( 822278630), UINT32_C( 319530416), UINT32_C( 870456036), UINT32_C( 499284396), UINT32_C(1697927724)) }, { simde_x_mm512_set_epu32(UINT32_C(3990081318), UINT32_C( 991545752), UINT32_C(4151932359), UINT32_C( 843112042), UINT32_C(4067412513), UINT32_C(2124182542), UINT32_C(2768721208), UINT32_C(2302989914), UINT32_C(1224533822), UINT32_C(3475606100), UINT32_C(3610957044), UINT32_C(2556046111), UINT32_C(3035396524), UINT32_C(3603101367), UINT32_C(3321443925), UINT32_C( 45581573)), UINT16_C(42669), simde_x_mm512_set_epu32(UINT32_C(4138167693), UINT32_C(3221954957), UINT32_C(2164435171), UINT32_C( 397240391), UINT32_C( 200936922), UINT32_C(3263986987), UINT32_C(2536604122), UINT32_C(3629380929), UINT32_C( 453331046), UINT32_C(1704580573), UINT32_C(1606190487), UINT32_C(3209309249), UINT32_C(2959497652), UINT32_C(3926896735), UINT32_C(2875407663), UINT32_C(2069966669)), simde_x_mm512_set_epu32(UINT32_C(1379668640), UINT32_C( 66581512), UINT32_C(3737665499), UINT32_C( 304428974), UINT32_C(2686704508), UINT32_C( 532978979), UINT32_C( 946958552), UINT32_C(2383642627), UINT32_C(2176874140), UINT32_C( 283691898), UINT32_C(3848894665), UINT32_C(3836186002), UINT32_C(1951055651), UINT32_C( 765387914), UINT32_C( 822559116), UINT32_C( 7445617)), simde_x_mm512_set_epu32(UINT32_C(1378830413), UINT32_C( 991545752), UINT32_C(2164435171), UINT32_C( 843112042), UINT32_C(4067412513), UINT32_C( 66113113), UINT32_C( 642687018), UINT32_C(2302989914), UINT32_C( 453331046), UINT32_C(3475606100), UINT32_C(1606190487), UINT32_C(2556046111), UINT32_C(1008442001), UINT32_C( 99957165), UINT32_C(3321443925), UINT32_C( 85143)) }, { simde_x_mm512_set_epu32(UINT32_C(2313028370), UINT32_C( 869237081), UINT32_C(4104913762), UINT32_C(2825691966), UINT32_C(3577866502), UINT32_C(2991894408), UINT32_C(2172048625), UINT32_C(1617119933), UINT32_C(1521363431), UINT32_C( 553638116), UINT32_C(1036201367), UINT32_C(3107033445), UINT32_C(3882811410), UINT32_C(3534384353), UINT32_C(3871215839), UINT32_C(1273589632)), UINT16_C(35103), simde_x_mm512_set_epu32(UINT32_C(2458371652), UINT32_C( 260676470), UINT32_C(1724614860), UINT32_C(4150452663), UINT32_C(3816336716), UINT32_C(2208212235), UINT32_C( 932145867), UINT32_C(2432594561), UINT32_C(1756892633), UINT32_C( 382632965), UINT32_C(1295078740), UINT32_C(3299165262), UINT32_C( 152308919), UINT32_C(3943411788), UINT32_C( 31813624), UINT32_C( 807463845)), simde_x_mm512_set_epu32(UINT32_C( 615301803), UINT32_C( 382786341), UINT32_C(1852603705), UINT32_C(1998007730), UINT32_C( 231325888), UINT32_C(1842039329), UINT32_C( 968682756), UINT32_C( 316335394), UINT32_C(2223585202), UINT32_C(3491781959), UINT32_C(2167971796), UINT32_C(1587647099), UINT32_C(2966608712), UINT32_C( 320339033), UINT32_C( 282380179), UINT32_C(4186865204)), simde_x_mm512_set_epu32(UINT32_C( 612466243), UINT32_C( 869237081), UINT32_C(4104913762), UINT32_C(2825691966), UINT32_C( 115122508), UINT32_C(2991894408), UINT32_C(2172048625), UINT32_C( 218246803), UINT32_C(1521363431), UINT32_C( 553638116), UINT32_C(1036201367), UINT32_C( 123871064), UINT32_C( 152308919), UINT32_C( 99343392), UINT32_C( 31813624), UINT32_C( 807463845)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_rem_epu32(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_u32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm256_rem_epu64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i r; } test_vec[8] = { { simde_x_mm256_set_epu64x(UINT64_C(13810255550447513201), UINT64_C( 6479913377553186648), UINT64_C( 7505871096235581515), UINT64_C( 2265477367564496986)), simde_x_mm256_set_epu64x(UINT64_C( 2968342496979776051), UINT64_C(10005265515001776413), UINT64_C(16837535683400356038), UINT64_C( 6738163160628300797)), simde_x_mm256_set_epu64x(UINT64_C( 1936885562528408997), UINT64_C( 6479913377553186648), UINT64_C( 7505871096235581515), UINT64_C( 2265477367564496986)) }, { simde_x_mm256_set_epu64x(UINT64_C( 5120732502404950997), UINT64_C( 5681284513410730040), UINT64_C( 6409558907924801050), UINT64_C( 5372227444888762251)), simde_x_mm256_set_epu64x(UINT64_C(10866939104613927783), UINT64_C(11375825163207743431), UINT64_C(11633520338587575573), UINT64_C( 216242550290965460)), simde_x_mm256_set_epu64x(UINT64_C( 5120732502404950997), UINT64_C( 5681284513410730040), UINT64_C( 6409558907924801050), UINT64_C( 182406237905591211)) }, { simde_x_mm256_set_epu64x(UINT64_C( 2118113466433927893), UINT64_C( 3840651400764901876), UINT64_C( 2114726288902596757), UINT64_C( 9482369585348649466)), simde_x_mm256_set_epu64x(UINT64_C( 5645659480511055559), UINT64_C(15272728730484288257), UINT64_C(14133460247011230967), UINT64_C(16423537638667915170)), simde_x_mm256_set_epu64x(UINT64_C( 2118113466433927893), UINT64_C( 3840651400764901876), UINT64_C( 2114726288902596757), UINT64_C( 9482369585348649466)) }, { simde_x_mm256_set_epu64x(UINT64_C( 7345032902979795528), UINT64_C(12329133549512917827), UINT64_C( 2328100732832272381), UINT64_C( 4111895855610225675)), simde_x_mm256_set_epu64x(UINT64_C(13555234896536583899), UINT64_C( 7851952110853286921), UINT64_C(17616907291198234572), UINT64_C(16521184395064581900)), simde_x_mm256_set_epu64x(UINT64_C( 7345032902979795528), UINT64_C( 4477181438659630906), UINT64_C( 2328100732832272381), UINT64_C( 4111895855610225675)) }, { simde_x_mm256_set_epu64x(UINT64_C( 7449607714297299576), UINT64_C(14401023659121376347), UINT64_C( 8569312554655704071), UINT64_C(10336200663482757951)), simde_x_mm256_set_epu64x(UINT64_C( 7572458917823766705), UINT64_C(12217500042222052906), UINT64_C( 1159256113650983207), UINT64_C( 6193154838246823767)), simde_x_mm256_set_epu64x(UINT64_C( 7449607714297299576), UINT64_C( 2183523616899323441), UINT64_C( 454519759098821622), UINT64_C( 4143045825235934184)) }, { simde_x_mm256_set_epu64x(UINT64_C(16888199589465789243), UINT64_C(18237918400292775569), UINT64_C(12279468594349909724), UINT64_C(13493341674566517412)), simde_x_mm256_set_epu64x(UINT64_C(15588592630942564743), UINT64_C( 9028813919053392068), UINT64_C( 5083059030774095197), UINT64_C(13888425720366328200)), simde_x_mm256_set_epu64x(UINT64_C( 1299606958523224500), UINT64_C( 180290562185991433), UINT64_C( 2113350532801719330), UINT64_C(13493341674566517412)) }, { simde_x_mm256_set_epu64x(UINT64_C( 3908684742628183808), UINT64_C(15775432521885308750), UINT64_C( 2246668589251707300), UINT64_C( 9474721517893975343)), simde_x_mm256_set_epu64x(UINT64_C( 2129749246616352421), UINT64_C( 3930946101587052880), UINT64_C( 6109596926925725236), UINT64_C(11931707044738783755)), simde_x_mm256_set_epu64x(UINT64_C( 1778935496011831387), UINT64_C( 51648115537097230), UINT64_C( 2246668589251707300), UINT64_C( 9474721517893975343)) }, { simde_x_mm256_set_epu64x(UINT64_C( 7540605987113962845), UINT64_C(13935122940778806069), UINT64_C( 9355601638871447350), UINT64_C(17674380633802211723)), simde_x_mm256_set_epu64x(UINT64_C( 5181754748372749246), UINT64_C( 2283432752406648940), UINT64_C(17825612137522679693), UINT64_C( 6205295972918594513)), simde_x_mm256_set_epu64x(UINT64_C( 2358851238741213599), UINT64_C( 234526426338912429), UINT64_C( 9355601638871447350), UINT64_C( 5263788687965022697)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i r = simde_mm256_rem_epu64(test_vec[i].a, test_vec[i].b); simde_assert_m256i_u64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_rem_epi8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi8(INT8_C( 41), INT8_C( 49), INT8_C( -85), INT8_C( -58), INT8_C( 40), INT8_C( 44), INT8_C( -14), INT8_C( 51), INT8_C(-118), INT8_C( -39), INT8_C( -41), INT8_C( -7), INT8_C( -55), INT8_C( 37), INT8_C(-119), INT8_C( 29), INT8_C( -23), INT8_C( -86), INT8_C( -15), INT8_C( 126), INT8_C( -74), INT8_C( 10), INT8_C( -48), INT8_C( -58), INT8_C( 93), INT8_C(-126), INT8_C( -61), INT8_C( -79), INT8_C( -69), INT8_C( -33), INT8_C(-117), INT8_C( -3), INT8_C( -65), INT8_C( -89), INT8_C( -30), INT8_C( 64), INT8_C( -43), INT8_C( -54), INT8_C( 110), INT8_C( 113), INT8_C( 89), INT8_C( -19), INT8_C( 70), INT8_C( -30), INT8_C(-124), INT8_C( 91), INT8_C( -1), INT8_C( 88), INT8_C( 104), INT8_C( 42), INT8_C( 53), INT8_C( -2), INT8_C(-124), INT8_C( -2), INT8_C( 96), INT8_C( 75), INT8_C( 31), INT8_C( 112), INT8_C(-105), INT8_C( -87), INT8_C( -84), INT8_C( 94), INT8_C( 112), INT8_C( 90)), simde_mm512_set_epi8(INT8_C( -61), INT8_C( 49), INT8_C( 14), INT8_C( -86), INT8_C( -53), INT8_C( -89), INT8_C( 3), INT8_C( -41), INT8_C( 63), INT8_C( -8), INT8_C( 55), INT8_C( -37), INT8_C( -35), INT8_C(-121), INT8_C( 61), INT8_C( -65), INT8_C( -47), INT8_C( 91), INT8_C( 87), INT8_C(-119), INT8_C( 87), INT8_C( 76), INT8_C( 44), INT8_C(-116), INT8_C( 2), INT8_C( -56), INT8_C( 36), INT8_C( -61), INT8_C( -56), INT8_C( 125), INT8_C( -2), INT8_C(-117), INT8_C( -30), INT8_C( 71), INT8_C( 92), INT8_C(-127), INT8_C( -74), INT8_C( 119), INT8_C( -9), INT8_C( 34), INT8_C( 121), INT8_C( 85), INT8_C(-103), INT8_C( 116), INT8_C( -38), INT8_C( 21), INT8_C( 101), INT8_C( 122), INT8_C( 10), INT8_C( -25), INT8_C( 54), INT8_C( 71), INT8_C(-100), INT8_C(-107), INT8_C( -12), INT8_C( 84), INT8_C(-108), INT8_C( 85), INT8_C( -86), INT8_C( -72), INT8_C( 94), INT8_C(-102), INT8_C( -27), INT8_C( 11)), simde_mm512_set_epi8(INT8_C( 41), INT8_C( 0), INT8_C( -1), INT8_C( -58), INT8_C( 40), INT8_C( 44), INT8_C( -2), INT8_C( 10), INT8_C( -55), INT8_C( -7), INT8_C( -41), INT8_C( -7), INT8_C( -20), INT8_C( 37), INT8_C( -58), INT8_C( 29), INT8_C( -23), INT8_C( -86), INT8_C( -15), INT8_C( 7), INT8_C( -74), INT8_C( 10), INT8_C( -4), INT8_C( -58), INT8_C( 1), INT8_C( -14), INT8_C( -25), INT8_C( -18), INT8_C( -13), INT8_C( -33), INT8_C( -1), INT8_C( -3), INT8_C( -5), INT8_C( -18), INT8_C( -30), INT8_C( 64), INT8_C( -43), INT8_C( -54), INT8_C( 2), INT8_C( 11), INT8_C( 89), INT8_C( -19), INT8_C( 70), INT8_C( -30), INT8_C( -10), INT8_C( 7), INT8_C( -1), INT8_C( 88), INT8_C( 4), INT8_C( 17), INT8_C( 53), INT8_C( -2), INT8_C( -24), INT8_C( -2), INT8_C( 0), INT8_C( 75), INT8_C( 31), INT8_C( 27), INT8_C( -19), INT8_C( -15), INT8_C( -84), INT8_C( 94), INT8_C( 4), INT8_C( 2)) }, { simde_mm512_set_epi8(INT8_C( -40), INT8_C( 85), INT8_C( -50), INT8_C( 103), INT8_C( -21), INT8_C(-102), INT8_C(-127), INT8_C(-121), INT8_C( 125), INT8_C( 76), INT8_C( -54), INT8_C( 108), INT8_C( 52), INT8_C( 71), INT8_C( -88), INT8_C( -60), INT8_C( 70), INT8_C(-118), INT8_C( -89), INT8_C( 65), INT8_C( -35), INT8_C( -95), INT8_C( -99), INT8_C( 93), INT8_C( -64), INT8_C( -67), INT8_C(-103), INT8_C(-101), INT8_C( -49), INT8_C( -43), INT8_C( 105), INT8_C(-120), INT8_C( -22), INT8_C( 94), INT8_C( -16), INT8_C( 12), INT8_C(-110), INT8_C( 1), INT8_C(-109), INT8_C( 59), INT8_C( -3), INT8_C( 26), INT8_C( 26), INT8_C( 40), INT8_C( 12), INT8_C( 2), INT8_C( -26), INT8_C(-111), INT8_C( -86), INT8_C( 105), INT8_C( 111), INT8_C( -96), INT8_C(-116), INT8_C( -54), INT8_C( -90), INT8_C( -36), INT8_C( -69), INT8_C( 65), INT8_C( -6), INT8_C( -61), INT8_C( 33), INT8_C(-125), INT8_C( 2), INT8_C( -92)), simde_mm512_set_epi8(INT8_C( 120), INT8_C( 127), INT8_C( 28), INT8_C( 95), INT8_C( -81), INT8_C( -33), INT8_C( 119), INT8_C( -42), INT8_C( -36), INT8_C( 102), INT8_C( 86), INT8_C( 22), INT8_C( 119), INT8_C( -49), INT8_C( 12), INT8_C( -73), INT8_C( -84), INT8_C( -14), INT8_C( -83), INT8_C( -7), INT8_C( 52), INT8_C( 108), INT8_C(-128), INT8_C( -53), INT8_C( 85), INT8_C(-121), INT8_C( -29), INT8_C( 35), INT8_C( -69), INT8_C( 24), INT8_C( -6), INT8_C( -37), INT8_C( -3), INT8_C( 62), INT8_C( 125), INT8_C( -20), INT8_C( 75), INT8_C( 13), INT8_C( 79), INT8_C( 81), INT8_C( -79), INT8_C( -35), INT8_C( -5), INT8_C( -75), INT8_C( -97), INT8_C( -74), INT8_C( 11), INT8_C( 11), INT8_C( 39), INT8_C( 37), INT8_C( 39), INT8_C( -48), INT8_C(-120), INT8_C( -76), INT8_C( -41), INT8_C(-117), INT8_C(-112), INT8_C(-128), INT8_C( -53), INT8_C( -50), INT8_C( -83), INT8_C( 36), INT8_C(-123), INT8_C( -81)), simde_mm512_set_epi8(INT8_C( -40), INT8_C( 85), INT8_C( -22), INT8_C( 8), INT8_C( -21), INT8_C( -3), INT8_C( -8), INT8_C( -37), INT8_C( 17), INT8_C( 76), INT8_C( -54), INT8_C( 20), INT8_C( 52), INT8_C( 22), INT8_C( -4), INT8_C( -60), INT8_C( 70), INT8_C( -6), INT8_C( -6), INT8_C( 2), INT8_C( -35), INT8_C( -95), INT8_C( -99), INT8_C( 40), INT8_C( -64), INT8_C( -67), INT8_C( -16), INT8_C( -31), INT8_C( -49), INT8_C( -19), INT8_C( 3), INT8_C( -9), INT8_C( -1), INT8_C( 32), INT8_C( -16), INT8_C( 12), INT8_C( -35), INT8_C( 1), INT8_C( -30), INT8_C( 59), INT8_C( -3), INT8_C( 26), INT8_C( 1), INT8_C( 40), INT8_C( 12), INT8_C( 2), INT8_C( -4), INT8_C( -1), INT8_C( -8), INT8_C( 31), INT8_C( 33), INT8_C( 0), INT8_C(-116), INT8_C( -54), INT8_C( -8), INT8_C( -36), INT8_C( -69), INT8_C( 65), INT8_C( -6), INT8_C( -11), INT8_C( 33), INT8_C( -17), INT8_C( 2), INT8_C( -11)) }, { simde_mm512_set_epi8(INT8_C( 87), INT8_C( 63), INT8_C( 47), INT8_C( 80), INT8_C( 35), INT8_C( -27), INT8_C( 5), INT8_C( 31), INT8_C( -28), INT8_C( 73), INT8_C( 53), INT8_C( 47), INT8_C( -86), INT8_C( -64), INT8_C( 122), INT8_C( -19), INT8_C( 47), INT8_C(-126), INT8_C( -37), INT8_C( 102), INT8_C( -93), INT8_C( 41), INT8_C( -61), INT8_C( -41), INT8_C( -57), INT8_C( 54), INT8_C( 97), INT8_C( 126), INT8_C( 10), INT8_C( -91), INT8_C(-101), INT8_C( 88), INT8_C( -72), INT8_C( 63), INT8_C( 95), INT8_C( -92), INT8_C( 65), INT8_C( 71), INT8_C( -82), INT8_C( 88), INT8_C( -73), INT8_C(-114), INT8_C( 98), INT8_C( 14), INT8_C( 25), INT8_C( -83), INT8_C( 87), INT8_C( 2), INT8_C( -65), INT8_C(-113), INT8_C(-104), INT8_C( 2), INT8_C( 126), INT8_C( 0), INT8_C( -94), INT8_C( 57), INT8_C( -11), INT8_C( 36), INT8_C( -17), INT8_C( 54), INT8_C( 33), INT8_C( -91), INT8_C( -57), INT8_C( 84)), simde_mm512_set_epi8(INT8_C(-125), INT8_C( 42), INT8_C(-105), INT8_C( -46), INT8_C( 12), INT8_C( -93), INT8_C(-118), INT8_C( -49), INT8_C( 43), INT8_C( 57), INT8_C( 61), INT8_C( 62), INT8_C( 81), INT8_C( -72), INT8_C( 6), INT8_C( 93), INT8_C( -89), INT8_C( 1), INT8_C(-111), INT8_C( 9), INT8_C( 4), INT8_C( 17), INT8_C( 10), INT8_C( 101), INT8_C( -70), INT8_C( -75), INT8_C(-101), INT8_C( -13), INT8_C( -67), INT8_C( -65), INT8_C( -34), INT8_C( -51), INT8_C( 59), INT8_C( 26), INT8_C( -29), INT8_C( 105), INT8_C( -19), INT8_C(-111), INT8_C( -73), INT8_C( 79), INT8_C( -82), INT8_C( 60), INT8_C(-124), INT8_C( -48), INT8_C( 58), INT8_C( -78), INT8_C( 116), INT8_C( -16), INT8_C( 37), INT8_C(-125), INT8_C( 100), INT8_C( -79), INT8_C( 19), INT8_C( 102), INT8_C( 81), INT8_C( 86), INT8_C( 25), INT8_C( 43), INT8_C( 51), INT8_C(-116), INT8_C( 9), INT8_C( 40), INT8_C( -29), INT8_C( 75)), simde_mm512_set_epi8(INT8_C( 87), INT8_C( 21), INT8_C( 47), INT8_C( 34), INT8_C( 11), INT8_C( -27), INT8_C( 5), INT8_C( 31), INT8_C( -28), INT8_C( 16), INT8_C( 53), INT8_C( 47), INT8_C( -5), INT8_C( -64), INT8_C( 2), INT8_C( -19), INT8_C( 47), INT8_C( 0), INT8_C( -37), INT8_C( 3), INT8_C( -1), INT8_C( 7), INT8_C( -1), INT8_C( -41), INT8_C( -57), INT8_C( 54), INT8_C( 97), INT8_C( 9), INT8_C( 10), INT8_C( -26), INT8_C( -33), INT8_C( 37), INT8_C( -13), INT8_C( 11), INT8_C( 8), INT8_C( -92), INT8_C( 8), INT8_C( 71), INT8_C( -9), INT8_C( 9), INT8_C( -73), INT8_C( -54), INT8_C( 98), INT8_C( 14), INT8_C( 25), INT8_C( -5), INT8_C( 87), INT8_C( 2), INT8_C( -28), INT8_C(-113), INT8_C( -4), INT8_C( 2), INT8_C( 12), INT8_C( 0), INT8_C( -13), INT8_C( 57), INT8_C( -11), INT8_C( 36), INT8_C( -17), INT8_C( 54), INT8_C( 6), INT8_C( -11), INT8_C( -28), INT8_C( 9)) }, { simde_mm512_set_epi8(INT8_C( -23), INT8_C( 79), INT8_C( 12), INT8_C( 0), INT8_C( 33), INT8_C( -78), INT8_C( 58), INT8_C( 74), INT8_C( -6), INT8_C( 116), INT8_C(-114), INT8_C( 20), INT8_C( 88), INT8_C( 63), INT8_C( 34), INT8_C( 124), INT8_C( -6), INT8_C( 48), INT8_C( -35), INT8_C( -24), INT8_C( -35), INT8_C( 75), INT8_C(-101), INT8_C( 80), INT8_C( -23), INT8_C( -87), INT8_C( -58), INT8_C( -30), INT8_C( 83), INT8_C( 27), INT8_C(-119), INT8_C( 34), INT8_C( 23), INT8_C(-124), INT8_C( 106), INT8_C( 109), INT8_C(-121), INT8_C( -53), INT8_C( 98), INT8_C( 120), INT8_C( 101), INT8_C( 52), INT8_C( 82), INT8_C( 44), INT8_C(-114), INT8_C( 14), INT8_C( 99), INT8_C( -11), INT8_C( 8), INT8_C(-116), INT8_C(-115), INT8_C( 123), INT8_C( -37), INT8_C( -93), INT8_C( -60), INT8_C( -23), INT8_C( 34), INT8_C( -71), INT8_C( -28), INT8_C( 108), INT8_C( 95), INT8_C( -20), INT8_C( 97), INT8_C( 41)), simde_mm512_set_epi8(INT8_C( -63), INT8_C( -26), INT8_C( 93), INT8_C( 23), INT8_C( -63), INT8_C( 52), INT8_C( -33), INT8_C( -81), INT8_C( -51), INT8_C( 45), INT8_C( -90), INT8_C( 24), INT8_C( 71), INT8_C( -22), INT8_C( -95), INT8_C(-114), INT8_C( -72), INT8_C( -38), INT8_C( -66), INT8_C( -44), INT8_C( 116), INT8_C( -97), INT8_C( 44), INT8_C( 55), INT8_C( -43), INT8_C(-123), INT8_C( 60), INT8_C( 3), INT8_C( 58), INT8_C( -1), INT8_C( 125), INT8_C( -67), INT8_C(-111), INT8_C( 88), INT8_C( 55), INT8_C( -74), INT8_C( 23), INT8_C( -95), INT8_C(-123), INT8_C( 27), INT8_C( 125), INT8_C( -27), INT8_C( -53), INT8_C( 45), INT8_C( 24), INT8_C( 5), INT8_C( 90), INT8_C( 83), INT8_C(-111), INT8_C( 85), INT8_C(-100), INT8_C( -92), INT8_C(-107), INT8_C( -55), INT8_C( 48), INT8_C( -1), INT8_C( 41), INT8_C( 42), INT8_C( 94), INT8_C(-127), INT8_C(-121), INT8_C( 8), INT8_C( 12), INT8_C( -53)), simde_mm512_set_epi8(INT8_C( -23), INT8_C( 1), INT8_C( 12), INT8_C( 0), INT8_C( 33), INT8_C( -26), INT8_C( 25), INT8_C( 74), INT8_C( -6), INT8_C( 26), INT8_C( -24), INT8_C( 20), INT8_C( 17), INT8_C( 19), INT8_C( 34), INT8_C( 10), INT8_C( -6), INT8_C( 10), INT8_C( -35), INT8_C( -24), INT8_C( -35), INT8_C( 75), INT8_C( -13), INT8_C( 25), INT8_C( -23), INT8_C( -87), INT8_C( -58), INT8_C( 0), INT8_C( 25), INT8_C( 0), INT8_C(-119), INT8_C( 34), INT8_C( 23), INT8_C( -36), INT8_C( 51), INT8_C( 35), INT8_C( -6), INT8_C( -53), INT8_C( 98), INT8_C( 12), INT8_C( 101), INT8_C( 25), INT8_C( 29), INT8_C( 44), INT8_C( -18), INT8_C( 4), INT8_C( 9), INT8_C( -11), INT8_C( 8), INT8_C( -31), INT8_C( -15), INT8_C( 31), INT8_C( -37), INT8_C( -38), INT8_C( -12), INT8_C( 0), INT8_C( 34), INT8_C( -29), INT8_C( -28), INT8_C( 108), INT8_C( 95), INT8_C( -4), INT8_C( 1), INT8_C( 41)) }, { simde_mm512_set_epi8(INT8_C(-114), INT8_C( 19), INT8_C(-128), INT8_C( 3), INT8_C(-127), INT8_C( -64), INT8_C( 118), INT8_C(-100), INT8_C( 16), INT8_C( -24), INT8_C( -53), INT8_C( 122), INT8_C( -27), INT8_C( 105), INT8_C( 120), INT8_C( -55), INT8_C( -28), INT8_C( -89), INT8_C(-115), INT8_C(-110), INT8_C( 116), INT8_C( 74), INT8_C( -65), INT8_C( 35), INT8_C( 45), INT8_C( -98), INT8_C( -28), INT8_C(-118), INT8_C( 49), INT8_C( 7), INT8_C( 65), INT8_C(-116), INT8_C( 0), INT8_C( 113), INT8_C(-100), INT8_C( 113), INT8_C( -10), INT8_C( -89), INT8_C( 109), INT8_C(-115), INT8_C( -64), INT8_C( 11), INT8_C( 33), INT8_C(-115), INT8_C(-127), INT8_C( 2), INT8_C( -88), INT8_C( -29), INT8_C( 23), INT8_C( -83), INT8_C( 104), INT8_C( 71), INT8_C( 11), INT8_C( -6), INT8_C( 13), INT8_C( -38), INT8_C( -62), INT8_C(-116), INT8_C( 125), INT8_C( 43), INT8_C(-105), INT8_C( 49), INT8_C(-127), INT8_C( -38)), simde_mm512_set_epi8(INT8_C( 8), INT8_C( 25), INT8_C(-109), INT8_C( -36), INT8_C( -83), INT8_C(-118), INT8_C( 38), INT8_C(-106), INT8_C( 35), INT8_C( 43), INT8_C( -91), INT8_C( -71), INT8_C( 50), INT8_C( 64), INT8_C( -95), INT8_C(-124), INT8_C( -94), INT8_C( 50), INT8_C( -57), INT8_C( 84), INT8_C( -5), INT8_C( -56), INT8_C( -39), INT8_C( 19), INT8_C( -76), INT8_C( -60), INT8_C( -10), INT8_C( 76), INT8_C( 55), INT8_C( -52), INT8_C(-117), INT8_C( 75), INT8_C( 1), INT8_C( 89), INT8_C(-123), INT8_C( -44), INT8_C( -50), INT8_C( 55), INT8_C( -52), INT8_C( 120), INT8_C( 37), INT8_C( -97), INT8_C(-110), INT8_C( -39), INT8_C( -30), INT8_C( -66), INT8_C(-122), INT8_C( 8), INT8_C( 113), INT8_C( 61), INT8_C( 103), INT8_C( 100), INT8_C( 23), INT8_C( -27), INT8_C(-110), INT8_C( 97), INT8_C( 95), INT8_C( 32), INT8_C(-120), INT8_C( 91), INT8_C( 46), INT8_C( -4), INT8_C( -93), INT8_C( 88)), simde_mm512_set_epi8(INT8_C( -2), INT8_C( 19), INT8_C( -19), INT8_C( 3), INT8_C( -44), INT8_C( -64), INT8_C( 4), INT8_C(-100), INT8_C( 16), INT8_C( -24), INT8_C( -53), INT8_C( 51), INT8_C( -27), INT8_C( 41), INT8_C( 25), INT8_C( -55), INT8_C( -28), INT8_C( -39), INT8_C( -1), INT8_C( -26), INT8_C( 1), INT8_C( 18), INT8_C( -26), INT8_C( 16), INT8_C( 45), INT8_C( -38), INT8_C( -8), INT8_C( -42), INT8_C( 49), INT8_C( 7), INT8_C( 65), INT8_C( -41), INT8_C( 0), INT8_C( 24), INT8_C(-100), INT8_C( 25), INT8_C( -10), INT8_C( -34), INT8_C( 5), INT8_C(-115), INT8_C( -27), INT8_C( 11), INT8_C( 33), INT8_C( -37), INT8_C( -7), INT8_C( 2), INT8_C( -88), INT8_C( -5), INT8_C( 23), INT8_C( -22), INT8_C( 1), INT8_C( 71), INT8_C( 11), INT8_C( -6), INT8_C( 13), INT8_C( -38), INT8_C( -62), INT8_C( -20), INT8_C( 5), INT8_C( 43), INT8_C( -13), INT8_C( 1), INT8_C( -34), INT8_C( -38)) }, { simde_mm512_set_epi8(INT8_C( 46), INT8_C( 43), INT8_C( -10), INT8_C( -99), INT8_C( 80), INT8_C(-102), INT8_C( 27), INT8_C( 118), INT8_C( -80), INT8_C( -40), INT8_C( 46), INT8_C(-114), INT8_C( -58), INT8_C( -8), INT8_C( 88), INT8_C( 29), INT8_C( -80), INT8_C( 25), INT8_C( 101), INT8_C( 54), INT8_C( 103), INT8_C( 120), INT8_C( 94), INT8_C( 16), INT8_C( -59), INT8_C( -51), INT8_C( 71), INT8_C( -10), INT8_C( -98), INT8_C( -80), INT8_C( -38), INT8_C( 43), INT8_C( -21), INT8_C( -7), INT8_C( 116), INT8_C(-119), INT8_C( 89), INT8_C( -44), INT8_C(-124), INT8_C( 56), INT8_C( -26), INT8_C(-119), INT8_C( 66), INT8_C( 41), INT8_C( 44), INT8_C( 35), INT8_C( -67), INT8_C(-101), INT8_C( 125), INT8_C(-126), INT8_C( 123), INT8_C( 117), INT8_C( 123), INT8_C( 127), INT8_C(-105), INT8_C( 60), INT8_C(-103), INT8_C( -71), INT8_C( -6), INT8_C( 100), INT8_C( 83), INT8_C( 112), INT8_C( 33), INT8_C(-116)), simde_mm512_set_epi8(INT8_C( 36), INT8_C( 33), INT8_C( 42), INT8_C( 75), INT8_C( -77), INT8_C( -84), INT8_C( 126), INT8_C( -85), INT8_C( 110), INT8_C(-106), INT8_C( 107), INT8_C( -76), INT8_C(-122), INT8_C( 73), INT8_C( -49), INT8_C( 15), INT8_C( -15), INT8_C( 103), INT8_C( 103), INT8_C(-106), INT8_C( 103), INT8_C( 58), INT8_C( 104), INT8_C( 35), INT8_C( -7), INT8_C( 79), INT8_C( 113), INT8_C( 97), INT8_C( -67), INT8_C( -59), INT8_C( -82), INT8_C( -34), INT8_C( -32), INT8_C( 104), INT8_C( 123), INT8_C( 124), INT8_C( 49), INT8_C( -30), INT8_C( 37), INT8_C( 22), INT8_C( 105), INT8_C( -99), INT8_C( 110), INT8_C( 52), INT8_C( -2), INT8_C( 103), INT8_C( -94), INT8_C( -46), INT8_C( -54), INT8_C( 39), INT8_C( -63), INT8_C(-105), INT8_C( -73), INT8_C( 73), INT8_C( 97), INT8_C( -69), INT8_C( 102), INT8_C( -61), INT8_C( 68), INT8_C( -66), INT8_C( 65), INT8_C( 60), INT8_C( -91), INT8_C( 126)), simde_mm512_set_epi8(INT8_C( 10), INT8_C( 10), INT8_C( -10), INT8_C( -24), INT8_C( 3), INT8_C( -18), INT8_C( 27), INT8_C( 33), INT8_C( -80), INT8_C( -40), INT8_C( 46), INT8_C( -38), INT8_C( -58), INT8_C( -8), INT8_C( 39), INT8_C( 14), INT8_C( -5), INT8_C( 25), INT8_C( 101), INT8_C( 54), INT8_C( 0), INT8_C( 4), INT8_C( 94), INT8_C( 16), INT8_C( -3), INT8_C( -51), INT8_C( 71), INT8_C( -10), INT8_C( -31), INT8_C( -21), INT8_C( -38), INT8_C( 9), INT8_C( -21), INT8_C( -7), INT8_C( 116), INT8_C(-119), INT8_C( 40), INT8_C( -14), INT8_C( -13), INT8_C( 12), INT8_C( -26), INT8_C( -20), INT8_C( 66), INT8_C( 41), INT8_C( 0), INT8_C( 35), INT8_C( -67), INT8_C( -9), INT8_C( 17), INT8_C( -9), INT8_C( 60), INT8_C( 12), INT8_C( 50), INT8_C( 54), INT8_C( -8), INT8_C( 60), INT8_C( -1), INT8_C( -10), INT8_C( -6), INT8_C( 34), INT8_C( 18), INT8_C( 52), INT8_C( 33), INT8_C(-116)) }, { simde_mm512_set_epi8(INT8_C( -16), INT8_C( -87), INT8_C( 8), INT8_C( 54), INT8_C( 66), INT8_C( 99), INT8_C( 14), INT8_C( 32), INT8_C(-108), INT8_C( 92), INT8_C( 122), INT8_C( -56), INT8_C( -64), INT8_C( -70), INT8_C( -31), INT8_C( 52), INT8_C( -74), INT8_C( -12), INT8_C( -3), INT8_C( -28), INT8_C(-115), INT8_C( -28), INT8_C(-108), INT8_C( -88), INT8_C( -25), INT8_C( 107), INT8_C( 47), INT8_C( -51), INT8_C( 126), INT8_C( 7), INT8_C( -74), INT8_C( -11), INT8_C( -91), INT8_C( -70), INT8_C( -43), INT8_C( 84), INT8_C( 19), INT8_C(-125), INT8_C( 54), INT8_C( 13), INT8_C( -71), INT8_C( -74), INT8_C( 72), INT8_C( 61), INT8_C( 125), INT8_C( 104), INT8_C(-109), INT8_C( 11), INT8_C( 89), INT8_C( -52), INT8_C( 62), INT8_C( -93), INT8_C( -58), INT8_C( -94), INT8_C( -51), INT8_C( 9), INT8_C( -74), INT8_C( 123), INT8_C( 65), INT8_C( -48), INT8_C(-111), INT8_C( -77), INT8_C( 34), INT8_C( -61)), simde_mm512_set_epi8(INT8_C(-115), INT8_C( 103), INT8_C( 116), INT8_C( 12), INT8_C( -82), INT8_C( -30), INT8_C( -63), INT8_C( -81), INT8_C(-101), INT8_C( -82), INT8_C( 73), INT8_C( 6), INT8_C(-115), INT8_C(-116), INT8_C( -2), INT8_C( -63), INT8_C( 100), INT8_C(-105), INT8_C( 14), INT8_C( 19), INT8_C( 38), INT8_C( 115), INT8_C( -55), INT8_C( 118), INT8_C( 74), INT8_C( -70), INT8_C( 89), INT8_C( -73), INT8_C( 65), INT8_C(-118), INT8_C( 64), INT8_C( 90), INT8_C(-104), INT8_C( -15), INT8_C( -27), INT8_C( -38), INT8_C( 126), INT8_C( 38), INT8_C( -97), INT8_C( 27), INT8_C( -92), INT8_C( -57), INT8_C( 25), INT8_C( -3), INT8_C( -75), INT8_C( 104), INT8_C( 6), INT8_C( -73), INT8_C( 36), INT8_C( -53), INT8_C(-118), INT8_C(-111), INT8_C( 116), INT8_C(-101), INT8_C( -38), INT8_C( 24), INT8_C( -51), INT8_C( -18), INT8_C( -14), INT8_C( 26), INT8_C( -30), INT8_C( 76), INT8_C( -30), INT8_C( -42)), simde_mm512_set_epi8(INT8_C( -16), INT8_C( -87), INT8_C( 8), INT8_C( 6), INT8_C( 66), INT8_C( 9), INT8_C( 14), INT8_C( 32), INT8_C( -7), INT8_C( 10), INT8_C( 49), INT8_C( -2), INT8_C( -64), INT8_C( -70), INT8_C( -1), INT8_C( 52), INT8_C( -74), INT8_C( -12), INT8_C( -3), INT8_C( -9), INT8_C( -1), INT8_C( -28), INT8_C( -53), INT8_C( -88), INT8_C( -25), INT8_C( 37), INT8_C( 47), INT8_C( -51), INT8_C( 61), INT8_C( 7), INT8_C( -10), INT8_C( -11), INT8_C( -91), INT8_C( -10), INT8_C( -16), INT8_C( 8), INT8_C( 19), INT8_C( -11), INT8_C( 54), INT8_C( 13), INT8_C( -71), INT8_C( -17), INT8_C( 22), INT8_C( 1), INT8_C( 50), INT8_C( 0), INT8_C( -1), INT8_C( 11), INT8_C( 17), INT8_C( -52), INT8_C( 62), INT8_C( -93), INT8_C( -58), INT8_C( -94), INT8_C( -13), INT8_C( 9), INT8_C( -23), INT8_C( 15), INT8_C( 9), INT8_C( -22), INT8_C( -21), INT8_C( -1), INT8_C( 4), INT8_C( -19)) }, { simde_mm512_set_epi8(INT8_C( -59), INT8_C( 52), INT8_C(-111), INT8_C( 20), INT8_C( 26), INT8_C( -78), INT8_C( 121), INT8_C( 16), INT8_C( 45), INT8_C( -27), INT8_C( 11), INT8_C( -26), INT8_C( 53), INT8_C( 2), INT8_C( -22), INT8_C( 7), INT8_C( -49), INT8_C(-110), INT8_C( -87), INT8_C( -23), INT8_C( -50), INT8_C( 116), INT8_C( 55), INT8_C(-100), INT8_C( -76), INT8_C( 91), INT8_C( 56), INT8_C(-110), INT8_C( 55), INT8_C(-119), INT8_C( -56), INT8_C( 76), INT8_C( 43), INT8_C( -11), INT8_C(-118), INT8_C( 3), INT8_C( -43), INT8_C(-100), INT8_C( -90), INT8_C( -22), INT8_C( -57), INT8_C( 2), INT8_C( 86), INT8_C( 72), INT8_C( 93), INT8_C( -2), INT8_C( -66), INT8_C( 121), INT8_C( 119), INT8_C( 75), INT8_C( -97), INT8_C( 76), INT8_C( 70), INT8_C( -38), INT8_C( 17), INT8_C( -17), INT8_C( 43), INT8_C(-104), INT8_C( -34), INT8_C( 80), INT8_C( -59), INT8_C( 113), INT8_C( 112), INT8_C( 81)), simde_mm512_set_epi8(INT8_C( -63), INT8_C( -94), INT8_C( -78), INT8_C( 36), INT8_C( -78), INT8_C( 86), INT8_C( 79), INT8_C( -89), INT8_C( -77), INT8_C( 45), INT8_C( 18), INT8_C( -25), INT8_C( 113), INT8_C( 127), INT8_C( -45), INT8_C( -75), INT8_C( 121), INT8_C( -85), INT8_C( 76), INT8_C(-121), INT8_C( 15), INT8_C(-123), INT8_C( -9), INT8_C( 32), INT8_C( -75), INT8_C( -88), INT8_C( -20), INT8_C( 99), INT8_C( 85), INT8_C(-105), INT8_C( 36), INT8_C( 99), INT8_C( 101), INT8_C( 42), INT8_C( 63), INT8_C( 96), INT8_C( -46), INT8_C( -58), INT8_C( -54), INT8_C( 105), INT8_C( -42), INT8_C( 74), INT8_C( -57), INT8_C( 17), INT8_C( -22), INT8_C( 22), INT8_C(-122), INT8_C( 112), INT8_C( 62), INT8_C(-115), INT8_C(-100), INT8_C( 91), INT8_C( 99), INT8_C( 24), INT8_C( -58), INT8_C(-125), INT8_C( 88), INT8_C(-120), INT8_C( 61), INT8_C( 94), INT8_C( -67), INT8_C( -43), INT8_C( -7), INT8_C(-125)), simde_mm512_set_epi8(INT8_C( -59), INT8_C( 52), INT8_C( -33), INT8_C( 20), INT8_C( 26), INT8_C( -78), INT8_C( 42), INT8_C( 16), INT8_C( 45), INT8_C( -27), INT8_C( 11), INT8_C( -1), INT8_C( 53), INT8_C( 2), INT8_C( -22), INT8_C( 7), INT8_C( -49), INT8_C( -25), INT8_C( -11), INT8_C( -23), INT8_C( -5), INT8_C( 116), INT8_C( 1), INT8_C( -4), INT8_C( -1), INT8_C( 3), INT8_C( 16), INT8_C( -11), INT8_C( 55), INT8_C( -14), INT8_C( -20), INT8_C( 76), INT8_C( 43), INT8_C( -11), INT8_C( -55), INT8_C( 3), INT8_C( -43), INT8_C( -42), INT8_C( -36), INT8_C( -22), INT8_C( -15), INT8_C( 2), INT8_C( 29), INT8_C( 4), INT8_C( 5), INT8_C( -2), INT8_C( -66), INT8_C( 9), INT8_C( 57), INT8_C( 75), INT8_C( -97), INT8_C( 76), INT8_C( 70), INT8_C( -14), INT8_C( 17), INT8_C( -17), INT8_C( 43), INT8_C(-104), INT8_C( -34), INT8_C( 80), INT8_C( -59), INT8_C( 27), INT8_C( 0), INT8_C( 81)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_rem_epi8(test_vec[i].a, test_vec[i].b); simde_assert_m512i_i8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_rem_epi16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi16(INT16_C( 10545), INT16_C(-21562), INT16_C( 10284), INT16_C( -3533), INT16_C(-29991), INT16_C(-10247), INT16_C(-14043), INT16_C(-30435), INT16_C( -5718), INT16_C( -3714), INT16_C(-18934), INT16_C(-12090), INT16_C( 23938), INT16_C(-15439), INT16_C(-17441), INT16_C(-29699), INT16_C(-16473), INT16_C( -7616), INT16_C(-10806), INT16_C( 28273), INT16_C( 23021), INT16_C( 18146), INT16_C(-31653), INT16_C( -168), INT16_C( 26666), INT16_C( 13822), INT16_C(-31490), INT16_C( 24651), INT16_C( 8048), INT16_C(-26711), INT16_C(-21410), INT16_C( 28762)), simde_mm512_set_epi16(INT16_C(-26929), INT16_C( 8074), INT16_C( 18000), INT16_C(-29849), INT16_C(-25121), INT16_C( 3254), INT16_C(-10254), INT16_C(-26681), INT16_C(-24206), INT16_C(-28388), INT16_C( 25803), INT16_C( 25877), INT16_C( 768), INT16_C( 16244), INT16_C( 11114), INT16_C( -7212), INT16_C( 18192), INT16_C( 32532), INT16_C(-31836), INT16_C( -5163), INT16_C( 20183), INT16_C( -1494), INT16_C( 2502), INT16_C( 18488), INT16_C( 22771), INT16_C( 21470), INT16_C( 4556), INT16_C( 26138), INT16_C( 19085), INT16_C( -923), INT16_C( -9934), INT16_C( -2165)), simde_mm512_set_epi16(INT16_C( 10545), INT16_C( -5414), INT16_C( 10284), INT16_C( -3533), INT16_C( -4870), INT16_C( -485), INT16_C( -3789), INT16_C( -3754), INT16_C( -5718), INT16_C( -3714), INT16_C(-18934), INT16_C(-12090), INT16_C( 130), INT16_C(-15439), INT16_C( -6327), INT16_C( -851), INT16_C(-16473), INT16_C( -7616), INT16_C(-10806), INT16_C( 2458), INT16_C( 2838), INT16_C( 218), INT16_C( -1629), INT16_C( -168), INT16_C( 3895), INT16_C( 13822), INT16_C( -4154), INT16_C( 24651), INT16_C( 8048), INT16_C( -867), INT16_C( -1542), INT16_C( 617)) }, { simde_mm512_set_epi16(INT16_C( 20057), INT16_C( 26978), INT16_C(-19795), INT16_C(-31033), INT16_C(-11277), INT16_C(-24100), INT16_C(-21653), INT16_C( 11009), INT16_C(-15324), INT16_C( 9014), INT16_C( 24117), INT16_C(-31497), INT16_C( -7188), INT16_C( 8311), INT16_C( 31759), INT16_C( 4002), INT16_C( 7525), INT16_C( 3321), INT16_C(-18237), INT16_C( -1323), INT16_C( 13644), INT16_C(-17383), INT16_C(-20302), INT16_C(-13836), INT16_C( 7513), INT16_C( 1114), INT16_C( -200), INT16_C( 10389), INT16_C(-31848), INT16_C( 9445), INT16_C( -5204), INT16_C(-24070)), simde_mm512_set_epi16(INT16_C(-17379), INT16_C( -8623), INT16_C(-10486), INT16_C(-16677), INT16_C( 27895), INT16_C(-17193), INT16_C( -5943), INT16_C( -5111), INT16_C( -2949), INT16_C(-11305), INT16_C(-13092), INT16_C( 8140), INT16_C( -6841), INT16_C( 2476), INT16_C(-24435), INT16_C( 7948), INT16_C( 26094), INT16_C(-13182), INT16_C( 30122), INT16_C(-17848), INT16_C(-21735), INT16_C( -7772), INT16_C( 1809), INT16_C(-31933), INT16_C( 8271), INT16_C( 4936), INT16_C( 7627), INT16_C( 20477), INT16_C( 14608), INT16_C( 25470), INT16_C(-19700), INT16_C( 25611)), simde_mm512_set_epi16(INT16_C( 2678), INT16_C( 1109), INT16_C( -9309), INT16_C(-14356), INT16_C(-11277), INT16_C( -6907), INT16_C( -3824), INT16_C( 787), INT16_C( -579), INT16_C( 9014), INT16_C( 11025), INT16_C( -7077), INT16_C( -347), INT16_C( 883), INT16_C( 7324), INT16_C( 4002), INT16_C( 7525), INT16_C( 3321), INT16_C(-18237), INT16_C( -1323), INT16_C( 13644), INT16_C( -1839), INT16_C( -403), INT16_C(-13836), INT16_C( 7513), INT16_C( 1114), INT16_C( -200), INT16_C( 10389), INT16_C( -2632), INT16_C( 9445), INT16_C( -5204), INT16_C(-24070)) }, { simde_mm512_set_epi16(INT16_C( 26902), INT16_C(-14525), INT16_C( -7905), INT16_C( -8015), INT16_C(-22131), INT16_C( 18318), INT16_C(-21513), INT16_C( 9770), INT16_C( 4118), INT16_C(-32437), INT16_C( 6621), INT16_C( -7897), INT16_C( 22002), INT16_C(-32381), INT16_C( 15537), INT16_C(-26793), INT16_C( 26466), INT16_C( 21183), INT16_C( 5811), INT16_C( 17016), INT16_C(-14374), INT16_C(-18761), INT16_C(-11284), INT16_C( -933), INT16_C( 30444), INT16_C( 20573), INT16_C(-14964), INT16_C( 25607), INT16_C(-28815), INT16_C(-28739), INT16_C( 27147), INT16_C( -3265)), simde_mm512_set_epi16(INT16_C(-10155), INT16_C(-12697), INT16_C( -5222), INT16_C(-32377), INT16_C( 32076), INT16_C(-13716), INT16_C( 13383), INT16_C(-22332), INT16_C( 18058), INT16_C(-22719), INT16_C( -8799), INT16_C(-25251), INT16_C(-16195), INT16_C(-26213), INT16_C(-12331), INT16_C( 27016), INT16_C( -5538), INT16_C( -4084), INT16_C(-28159), INT16_C(-27845), INT16_C( -742), INT16_C( 6696), INT16_C( 3074), INT16_C( -6511), INT16_C(-21911), INT16_C( 28576), INT16_C(-29494), INT16_C(-22820), INT16_C(-17599), INT16_C( -1341), INT16_C( 8579), INT16_C( 676)), simde_mm512_set_epi16(INT16_C( 6592), INT16_C( -1828), INT16_C( -2683), INT16_C( -8015), INT16_C(-22131), INT16_C( 4602), INT16_C( -8130), INT16_C( 9770), INT16_C( 4118), INT16_C( -9718), INT16_C( 6621), INT16_C( -7897), INT16_C( 5807), INT16_C( -6168), INT16_C( 3206), INT16_C(-26793), INT16_C( 4314), INT16_C( 763), INT16_C( 5811), INT16_C( 17016), INT16_C( -276), INT16_C( -5369), INT16_C( -2062), INT16_C( -933), INT16_C( 8533), INT16_C( 20573), INT16_C(-14964), INT16_C( 2787), INT16_C(-11216), INT16_C( -578), INT16_C( 1410), INT16_C( -561)) }, { simde_mm512_set_epi16(INT16_C( 7566), INT16_C( 25511), INT16_C( -5831), INT16_C( 13989), INT16_C( 13965), INT16_C(-31065), INT16_C( 77), INT16_C(-30384), INT16_C( 21705), INT16_C(-23032), INT16_C( -2503), INT16_C( -8652), INT16_C(-23147), INT16_C( -4009), INT16_C( 7598), INT16_C( 23051), INT16_C( 13886), INT16_C( 28688), INT16_C( 30551), INT16_C(-28928), INT16_C( -9491), INT16_C(-26549), INT16_C( -738), INT16_C( 22350), INT16_C( 7981), INT16_C(-15059), INT16_C(-18848), INT16_C( 16804), INT16_C(-31876), INT16_C( -1787), INT16_C( 29649), INT16_C( -721)), simde_mm512_set_epi16(INT16_C( 18409), INT16_C( 19069), INT16_C( 20979), INT16_C(-29762), INT16_C( 8112), INT16_C( 25085), INT16_C( 31664), INT16_C(-10132), INT16_C( -2207), INT16_C( 19403), INT16_C(-32530), INT16_C( 20365), INT16_C( 22045), INT16_C(-23601), INT16_C( 28665), INT16_C(-29743), INT16_C( 26789), INT16_C(-25295), INT16_C(-31460), INT16_C(-29347), INT16_C(-16029), INT16_C(-32645), INT16_C(-19836), INT16_C( 31541), INT16_C(-32299), INT16_C(-14817), INT16_C( 22782), INT16_C(-18634), INT16_C( -2744), INT16_C( 907), INT16_C( 9939), INT16_C( 395)), simde_mm512_set_epi16(INT16_C( 7566), INT16_C( 6442), INT16_C( -5831), INT16_C( 13989), INT16_C( 5853), INT16_C( -5980), INT16_C( 77), INT16_C(-10120), INT16_C( 1842), INT16_C( -3629), INT16_C( -2503), INT16_C( -8652), INT16_C( -1102), INT16_C( -4009), INT16_C( 7598), INT16_C( 23051), INT16_C( 13886), INT16_C( 3393), INT16_C( 30551), INT16_C(-28928), INT16_C( -9491), INT16_C(-26549), INT16_C( -738), INT16_C( 22350), INT16_C( 7981), INT16_C( -242), INT16_C(-18848), INT16_C( 16804), INT16_C( -1692), INT16_C( -880), INT16_C( 9771), INT16_C( -326)) }, { simde_mm512_set_epi16(INT16_C(-24983), INT16_C( 9260), INT16_C( 6846), INT16_C( 21618), INT16_C( 20365), INT16_C( 26413), INT16_C( 7670), INT16_C( 6521), INT16_C( 13052), INT16_C( 19892), INT16_C(-25515), INT16_C( -7444), INT16_C( 12337), INT16_C( 14080), INT16_C( 6934), INT16_C( -4021), INT16_C( 1885), INT16_C( 11733), INT16_C( 7371), INT16_C( 24583), INT16_C(-17187), INT16_C(-28061), INT16_C(-18330), INT16_C(-10845), INT16_C( -2076), INT16_C( 2107), INT16_C( -3367), INT16_C(-26728), INT16_C( 21341), INT16_C(-13702), INT16_C( 26283), INT16_C(-27301)), simde_mm512_set_epi16(INT16_C( 9227), INT16_C( 20728), INT16_C( 22448), INT16_C( 22271), INT16_C(-27526), INT16_C( 3228), INT16_C(-26938), INT16_C( 15839), INT16_C( 4554), INT16_C( 22831), INT16_C(-21433), INT16_C( 32351), INT16_C(-18789), INT16_C( 20983), INT16_C( -3647), INT16_C( 26454), INT16_C( -2225), INT16_C( 19804), INT16_C( -2763), INT16_C( -8730), INT16_C(-29152), INT16_C( 25302), INT16_C(-28393), INT16_C( 3478), INT16_C( -5675), INT16_C( -4361), INT16_C(-16878), INT16_C( 23119), INT16_C( 30252), INT16_C( -2420), INT16_C( 13170), INT16_C(-21449)), simde_mm512_set_epi16(INT16_C( -6529), INT16_C( 9260), INT16_C( 6846), INT16_C( 21618), INT16_C( 20365), INT16_C( 589), INT16_C( 7670), INT16_C( 6521), INT16_C( 3944), INT16_C( 19892), INT16_C( -4082), INT16_C( -7444), INT16_C( 12337), INT16_C( 14080), INT16_C( 3287), INT16_C( -4021), INT16_C( 1885), INT16_C( 11733), INT16_C( 1845), INT16_C( 7123), INT16_C(-17187), INT16_C( -2759), INT16_C(-18330), INT16_C( -411), INT16_C( -2076), INT16_C( 2107), INT16_C( -3367), INT16_C( -3609), INT16_C( 21341), INT16_C( -1602), INT16_C( 13113), INT16_C( -5852)) }, { simde_mm512_set_epi16(INT16_C( 22335), INT16_C( 12112), INT16_C( 9189), INT16_C( 1311), INT16_C( -7095), INT16_C( 13615), INT16_C(-21824), INT16_C( 31469), INT16_C( 12162), INT16_C( -9370), INT16_C(-23767), INT16_C(-15401), INT16_C(-14538), INT16_C( 24958), INT16_C( 2725), INT16_C(-25768), INT16_C(-18369), INT16_C( 24484), INT16_C( 16711), INT16_C(-20904), INT16_C(-18546), INT16_C( 25102), INT16_C( 6573), INT16_C( 22274), INT16_C(-16497), INT16_C(-26622), INT16_C( 32256), INT16_C(-24007), INT16_C( -2780), INT16_C( -4298), INT16_C( 8613), INT16_C(-14508)), simde_mm512_set_epi16(INT16_C( 30472), INT16_C(-28763), INT16_C( 7714), INT16_C( 18947), INT16_C( 7066), INT16_C(-17692), INT16_C( -6885), INT16_C( 1841), INT16_C(-29737), INT16_C(-14957), INT16_C(-32610), INT16_C( 26598), INT16_C(-25999), INT16_C( -4399), INT16_C( 5946), INT16_C( 2262), INT16_C( -5420), INT16_C( 12953), INT16_C(-27491), INT16_C(-17749), INT16_C( 30618), INT16_C(-27725), INT16_C(-13788), INT16_C(-13300), INT16_C( 23394), INT16_C( 2441), INT16_C( 32382), INT16_C( 9384), INT16_C( 25792), INT16_C( -9373), INT16_C( 22658), INT16_C( 20939)), simde_mm512_set_epi16(INT16_C( 22335), INT16_C( 12112), INT16_C( 1475), INT16_C( 1311), INT16_C( -29), INT16_C( 13615), INT16_C( -1169), INT16_C( 172), INT16_C( 12162), INT16_C( -9370), INT16_C(-23767), INT16_C(-15401), INT16_C(-14538), INT16_C( 2963), INT16_C( 2725), INT16_C( -886), INT16_C( -2109), INT16_C( 11531), INT16_C( 16711), INT16_C( -3155), INT16_C(-18546), INT16_C( 25102), INT16_C( 6573), INT16_C( 8974), INT16_C(-16497), INT16_C( -2212), INT16_C( 32256), INT16_C( -5239), INT16_C( -2780), INT16_C( -4298), INT16_C( 8613), INT16_C(-14508)) }, { simde_mm512_set_epi16(INT16_C( 13867), INT16_C( 28091), INT16_C(-30146), INT16_C( -8550), INT16_C( 31509), INT16_C( -2205), INT16_C( 9520), INT16_C( 29929), INT16_C( 24571), INT16_C(-27795), INT16_C(-12850), INT16_C( 14609), INT16_C( 31001), INT16_C( 823), INT16_C(-19839), INT16_C(-27185), INT16_C(-29756), INT16_C(-24530), INT16_C( 3633), INT16_C(-20036), INT16_C( 30184), INT16_C( 27396), INT16_C( 1171), INT16_C( 25936), INT16_C( -3833), INT16_C( -7750), INT16_C( 19453), INT16_C( 30002), INT16_C( 6315), INT16_C( 244), INT16_C( 8399), INT16_C( -8080)), simde_mm512_set_epi16(INT16_C( 18752), INT16_C( 27431), INT16_C(-11832), INT16_C(-22911), INT16_C(-22667), INT16_C(-23791), INT16_C(-17993), INT16_C( 11401), INT16_C( 26966), INT16_C( 26500), INT16_C( 7486), INT16_C( 7825), INT16_C( 17767), INT16_C( -7030), INT16_C(-29302), INT16_C(-27163), INT16_C(-10544), INT16_C(-18630), INT16_C(-13432), INT16_C( 31285), INT16_C(-30604), INT16_C( 29467), INT16_C(-31755), INT16_C( 883), INT16_C( 23995), INT16_C(-22467), INT16_C(-11949), INT16_C( 11327), INT16_C(-28925), INT16_C( 7518), INT16_C( 30015), INT16_C( 30285)), simde_mm512_set_epi16(INT16_C( 13867), INT16_C( 660), INT16_C( -6482), INT16_C( -8550), INT16_C( 8842), INT16_C( -2205), INT16_C( 9520), INT16_C( 7127), INT16_C( 24571), INT16_C( -1295), INT16_C( -5364), INT16_C( 6784), INT16_C( 13234), INT16_C( 823), INT16_C(-19839), INT16_C( -22), INT16_C( -8668), INT16_C( -5900), INT16_C( 3633), INT16_C(-20036), INT16_C( 30184), INT16_C( 27396), INT16_C( 1171), INT16_C( 329), INT16_C( -3833), INT16_C( -7750), INT16_C( 7504), INT16_C( 7348), INT16_C( 6315), INT16_C( 244), INT16_C( 8399), INT16_C( -8080)) }, { simde_mm512_set_epi16(INT16_C( 19003), INT16_C( 26627), INT16_C( -1831), INT16_C(-31318), INT16_C(-29481), INT16_C( 13847), INT16_C(-20911), INT16_C( 9042), INT16_C(-29388), INT16_C( 11660), INT16_C( 32339), INT16_C(-25821), INT16_C(-18358), INT16_C( 21002), INT16_C( -4830), INT16_C( 8527), INT16_C( 26072), INT16_C( 29611), INT16_C( 18348), INT16_C( 953), INT16_C(-32154), INT16_C( 22717), INT16_C(-15414), INT16_C(-13122), INT16_C( -6258), INT16_C(-11311), INT16_C( 31952), INT16_C( 29752), INT16_C(-28048), INT16_C( 20614), INT16_C( 1055), INT16_C( -4387)), simde_mm512_set_epi16(INT16_C( -5809), INT16_C( 3072), INT16_C( 8626), INT16_C( 14922), INT16_C( -1420), INT16_C(-29164), INT16_C( 22591), INT16_C( 8828), INT16_C( -1488), INT16_C( -8728), INT16_C( -8885), INT16_C(-25776), INT16_C( -5719), INT16_C(-14622), INT16_C( 21275), INT16_C(-30430), INT16_C( 6020), INT16_C( 27245), INT16_C(-30773), INT16_C( 25208), INT16_C( 25908), INT16_C( 21036), INT16_C(-29170), INT16_C( 25589), INT16_C( 2188), INT16_C(-29317), INT16_C( -9309), INT16_C(-15127), INT16_C( 8889), INT16_C( -7060), INT16_C( 24556), INT16_C( 24873)), simde_mm512_set_epi16(INT16_C( 1576), INT16_C( 2051), INT16_C( -1831), INT16_C( -1474), INT16_C( -1081), INT16_C( 13847), INT16_C(-20911), INT16_C( 214), INT16_C( -1116), INT16_C( 2932), INT16_C( 5684), INT16_C( -45), INT16_C( -1201), INT16_C( 6380), INT16_C( -4830), INT16_C( 8527), INT16_C( 1992), INT16_C( 2366), INT16_C( 18348), INT16_C( 953), INT16_C( -6246), INT16_C( 1681), INT16_C(-15414), INT16_C(-13122), INT16_C( -1882), INT16_C(-11311), INT16_C( 4025), INT16_C( 14625), INT16_C( -1381), INT16_C( 6494), INT16_C( 1055), INT16_C( -4387)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_rem_epi16(test_vec[i].a, test_vec[i].b); simde_assert_m512i_i16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_rem_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C( 691121094), INT32_C( 674034227), INT32_C(-1965434887), INT32_C( -920286947), INT32_C( -374673026), INT32_C(-1240805178), INT32_C( 1568850865), INT32_C(-1142977539), INT32_C(-1079516608), INT32_C( -708153743), INT32_C( 1508722402), INT32_C(-2074345640), INT32_C( 1747596798), INT32_C(-2063703989), INT32_C( 527472553), INT32_C(-1403096998)), simde_mm512_set_epi32(INT32_C(-1764810870), INT32_C( 1179683687), INT32_C(-1646326602), INT32_C( -671967289), INT32_C(-1586327268), INT32_C( 1691051285), INT32_C( 50347892), INT32_C( 728425428), INT32_C( 1192263444), INT32_C(-2086343723), INT32_C( 1322777130), INT32_C( 163989560), INT32_C( 1492341726), INT32_C( 298608154), INT32_C( 1250819173), INT32_C( -650971253)), simde_mm512_set_epi32(INT32_C( 691121094), INT32_C( 674034227), INT32_C( -319108285), INT32_C( -248319658), INT32_C( -374673026), INT32_C(-1240805178), INT32_C( 8066213), INT32_C( -414552111), INT32_C(-1079516608), INT32_C( -708153743), INT32_C( 185945272), INT32_C( -106470920), INT32_C( 255255072), INT32_C( -272055065), INT32_C( 527472553), INT32_C( -101154492)) }, { simde_mm512_set_epi32(INT32_C( 1314482530), INT32_C(-1297250617), INT32_C( -739008036), INT32_C(-1419039999), INT32_C(-1004264650), INT32_C( 1580565751), INT32_C( -471064457), INT32_C( 2081361826), INT32_C( 493161721), INT32_C(-1195115819), INT32_C( 894221337), INT32_C(-1330460172), INT32_C( 492373082), INT32_C( -13096811), INT32_C(-2087181083), INT32_C( -341007878)), simde_mm512_set_epi32(INT32_C(-1138893231), INT32_C( -687161637), INT32_C( 1828175063), INT32_C( -389420023), INT32_C( -193211433), INT32_C( -857989172), INT32_C( -448329300), INT32_C(-1601364212), INT32_C( 1710148738), INT32_C( 1974123080), INT32_C(-1424367196), INT32_C( 118588227), INT32_C( 542053192), INT32_C( 499863549), INT32_C( 957375358), INT32_C(-1291033589)), simde_mm512_set_epi32(INT32_C( 175589299), INT32_C( -610088980), INT32_C( -739008036), INT32_C( -250779930), INT32_C( -38207485), INT32_C( 722576579), INT32_C( -22735157), INT32_C( 479997614), INT32_C( 493161721), INT32_C(-1195115819), INT32_C( 894221337), INT32_C( -25989675), INT32_C( 492373082), INT32_C( -13096811), INT32_C( -172430367), INT32_C( -341007878)) }, { simde_mm512_set_epi32(INT32_C( 1763100483), INT32_C( -518004559), INT32_C(-1450358898), INT32_C(-1409866198), INT32_C( 269910347), INT32_C( 433971495), INT32_C( 1441956227), INT32_C( 1018271575), INT32_C( 1734496959), INT32_C( 380846712), INT32_C( -941967689), INT32_C( -739443621), INT32_C( 1995198557), INT32_C( -980655097), INT32_C(-1888383043), INT32_C( 1779168063)), simde_mm512_set_epi32(INT32_C( -665465241), INT32_C( -342195833), INT32_C( 2102184556), INT32_C( 877111492), INT32_C( 1183491905), INT32_C( -576610979), INT32_C(-1061316197), INT32_C( -808097400), INT32_C( -362876916), INT32_C(-1845390533), INT32_C( -48621016), INT32_C( 201516689), INT32_C(-1435930720), INT32_C(-1932876068), INT32_C(-1153303869), INT32_C( 562234020)), simde_mm512_set_epi32(INT32_C( 432170001), INT32_C( -175808726), INT32_C(-1450358898), INT32_C( -532754706), INT32_C( 269910347), INT32_C( 433971495), INT32_C( 380640030), INT32_C( 210174175), INT32_C( 282989295), INT32_C( 380846712), INT32_C( -18168385), INT32_C( -134893554), INT32_C( 559267837), INT32_C( -980655097), INT32_C( -735079174), INT32_C( 92466003)) }, { simde_mm512_set_epi32(INT32_C( 495870887), INT32_C( -382126427), INT32_C( 915244711), INT32_C( 5081424), INT32_C( 1422501384), INT32_C( -163979724), INT32_C(-1516900265), INT32_C( 497965579), INT32_C( 910061584), INT32_C( 2002226944), INT32_C( -621963189), INT32_C( -48343218), INT32_C( 523093293), INT32_C(-1235205724), INT32_C(-2088961787), INT32_C( 1943141679)), simde_mm512_set_epi32(INT32_C( 1206471293), INT32_C( 1374915518), INT32_C( 531653117), INT32_C( 2075187308), INT32_C( -144618549), INT32_C(-2131865715), INT32_C( 1444783055), INT32_C( 1878625233), INT32_C( 1755684145), INT32_C(-2061726371), INT32_C(-1050443653), INT32_C(-1299940555), INT32_C(-2116696545), INT32_C( 1493088054), INT32_C( -179829877), INT32_C( 651362699)), simde_mm512_set_epi32(INT32_C( 495870887), INT32_C( -382126427), INT32_C( 383591594), INT32_C( 5081424), INT32_C( 120934443), INT32_C( -163979724), INT32_C( -72117210), INT32_C( 497965579), INT32_C( 910061584), INT32_C( 2002226944), INT32_C( -621963189), INT32_C( -48343218), INT32_C( 523093293), INT32_C(-1235205724), INT32_C( -110833140), INT32_C( 640416281)) }, { simde_mm512_set_epi32(INT32_C(-1637276628), INT32_C( 448681074), INT32_C( 1334667053), INT32_C( 502667641), INT32_C( 855395764), INT32_C(-1672092948), INT32_C( 808531712), INT32_C( 454488139), INT32_C( 123547093), INT32_C( 483090439), INT32_C(-1126329757), INT32_C(-1201220189), INT32_C( -136050629), INT32_C( -220620904), INT32_C( 1398655610), INT32_C( 1722520923)), simde_mm512_set_epi32(INT32_C( 604721400), INT32_C( 1471174399), INT32_C(-1803940708), INT32_C(-1765392929), INT32_C( 298473775), INT32_C(-1404600737), INT32_C(-1231334921), INT32_C( -238983338), INT32_C( -145797796), INT32_C( -181019162), INT32_C(-1910480170), INT32_C(-1860760170), INT32_C( -371855625), INT32_C(-1106093489), INT32_C( 1982658188), INT32_C( 863153207)), simde_mm512_set_epi32(INT32_C( -427833828), INT32_C( 448681074), INT32_C( 1334667053), INT32_C( 502667641), INT32_C( 258448214), INT32_C( -267492211), INT32_C( 808531712), INT32_C( 215504801), INT32_C( 123547093), INT32_C( 121052115), INT32_C(-1126329757), INT32_C(-1201220189), INT32_C( -136050629), INT32_C( -220620904), INT32_C( 1398655610), INT32_C( 859367716)) }, { simde_mm512_set_epi32(INT32_C( 1463758672), INT32_C( 602211615), INT32_C( -464964305), INT32_C(-1430226195), INT32_C( 797104998), INT32_C(-1557543977), INT32_C( -952737410), INT32_C( 178625368), INT32_C(-1203806300), INT32_C( 1095216728), INT32_C(-1215405554), INT32_C( 430790402), INT32_C(-1081108478), INT32_C( 2113970745), INT32_C( -182128842), INT32_C( 564512596)), simde_mm512_set_epi32(INT32_C( 1997049765), INT32_C( 505563651), INT32_C( 463125220), INT32_C( -451213519), INT32_C(-1948793453), INT32_C(-2137102362), INT32_C(-1703809327), INT32_C( 389679318), INT32_C( -355192167), INT32_C(-1801602389), INT32_C( 2006619059), INT32_C( -903558132), INT32_C( 1533151625), INT32_C( 2122196136), INT32_C( 1690360675), INT32_C( 1484935627)), simde_mm512_set_epi32(INT32_C( 1463758672), INT32_C( 96647964), INT32_C( -1839085), INT32_C( -76585638), INT32_C( 797104998), INT32_C(-1557543977), INT32_C( -952737410), INT32_C( 178625368), INT32_C( -138229799), INT32_C( 1095216728), INT32_C(-1215405554), INT32_C( 430790402), INT32_C(-1081108478), INT32_C( 2113970745), INT32_C( -182128842), INT32_C( 564512596)) }, { simde_mm512_set_epi32(INT32_C( 908815803), INT32_C(-1975591270), INT32_C( 2065037155), INT32_C( 623932649), INT32_C( 1610322797), INT32_C( -842122991), INT32_C( 2031682359), INT32_C(-1300130353), INT32_C(-1950048210), INT32_C( 238137788), INT32_C( 1978166020), INT32_C( 76768592), INT32_C( -251141702), INT32_C( 1274901810), INT32_C( 413860084), INT32_C( 550494320)), simde_mm512_set_epi32(INT32_C( 1228958503), INT32_C( -775379327), INT32_C(-1485462767), INT32_C(-1179177847), INT32_C( 1767270276), INT32_C( 490610321), INT32_C( 1164436618), INT32_C(-1920297499), INT32_C( -690964678), INT32_C( -880248267), INT32_C(-2005634277), INT32_C(-2081094797), INT32_C( 1572579389), INT32_C( -783078337), INT32_C(-1895621282), INT32_C( 1967093325)), simde_mm512_set_epi32(INT32_C( 908815803), INT32_C( -424832616), INT32_C( 579574388), INT32_C( 623932649), INT32_C( 1610322797), INT32_C( -351512670), INT32_C( 867245741), INT32_C(-1300130353), INT32_C( -568118854), INT32_C( 238137788), INT32_C( 1978166020), INT32_C( 76768592), INT32_C( -251141702), INT32_C( 491823473), INT32_C( 413860084), INT32_C( 550494320)) }, { simde_mm512_set_epi32(INT32_C( 1245407235), INT32_C( -119962198), INT32_C(-1932052969), INT32_C(-1370414254), INT32_C(-1925960308), INT32_C( 2119408419), INT32_C(-1203088886), INT32_C( -316530353), INT32_C( 1708684203), INT32_C( 1202455481), INT32_C(-2107221827), INT32_C(-1010119490), INT32_C( -410070063), INT32_C( 2094036024), INT32_C(-1838133114), INT32_C( 69201629)), simde_mm512_set_epi32(INT32_C( -380695552), INT32_C( 565328458), INT32_C( -93024748), INT32_C( 1480532604), INT32_C( -97460760), INT32_C( -582247600), INT32_C( -374749470), INT32_C( 1394313506), INT32_C( 394553965), INT32_C(-2016714120), INT32_C( 1697927724), INT32_C(-1911659531), INT32_C( 143428987), INT32_C( -610024215), INT32_C( 582607980), INT32_C( 1609326889)), simde_mm512_set_epi32(INT32_C( 103320579), INT32_C( -119962198), INT32_C( -71558009), INT32_C(-1370414254), INT32_C( -74205868), INT32_C( 372665619), INT32_C( -78840476), INT32_C( -316530353), INT32_C( 130468343), INT32_C( 1202455481), INT32_C( -409294103), INT32_C(-1010119490), INT32_C( -123212089), INT32_C( 263963379), INT32_C( -90309174), INT32_C( 69201629)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_rem_epi32(test_vec[i].a, test_vec[i].b); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_mask_rem_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i src; simde__mmask16 k; simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi32(INT32_C( 691121094), INT32_C( 674034227), INT32_C(-1965434887), INT32_C( -920286947), INT32_C( -374673026), INT32_C(-1240805178), INT32_C( 1568850865), INT32_C(-1142977539), INT32_C(-1079516608), INT32_C( -708153743), INT32_C( 1508722402), INT32_C(-2074345640), INT32_C( 1747596798), INT32_C(-2063703989), INT32_C( 527472553), INT32_C(-1403096998)), UINT16_C(63371), simde_mm512_set_epi32(INT32_C( -341007878), INT32_C(-1764810870), INT32_C( 1179683687), INT32_C(-1646326602), INT32_C( -671967289), INT32_C(-1586327268), INT32_C( 1691051285), INT32_C( 50347892), INT32_C( 728425428), INT32_C( 1192263444), INT32_C(-2086343723), INT32_C( 1322777130), INT32_C( 163989560), INT32_C( 1492341726), INT32_C( 298608154), INT32_C( 1250819173)), simde_mm512_set_epi32(INT32_C(-1291033589), INT32_C( 1314482530), INT32_C(-1297250617), INT32_C( -739008036), INT32_C(-1419039999), INT32_C(-1004264650), INT32_C( 1580565751), INT32_C( -471064457), INT32_C( 2081361826), INT32_C( 493161721), INT32_C(-1195115819), INT32_C( 894221337), INT32_C(-1330460172), INT32_C( 492373082), INT32_C( -13096811), INT32_C(-2087181083)), simde_mm512_set_epi32(INT32_C( -341007878), INT32_C( -450328340), INT32_C( 1179683687), INT32_C( -168310530), INT32_C( -374673026), INT32_C( -582062618), INT32_C( 110485534), INT32_C( 50347892), INT32_C( 728425428), INT32_C( -708153743), INT32_C( 1508722402), INT32_C(-2074345640), INT32_C( 163989560), INT32_C(-2063703989), INT32_C( 10478312), INT32_C( 1250819173)) }, { simde_mm512_set_epi32(INT32_C( 1779168063), INT32_C(-1138893231), INT32_C( -687161637), INT32_C( 1828175063), INT32_C( -389420023), INT32_C( -193211433), INT32_C( -857989172), INT32_C( -448329300), INT32_C(-1601364212), INT32_C( 1710148738), INT32_C( 1974123080), INT32_C(-1424367196), INT32_C( 118588227), INT32_C( 542053192), INT32_C( 499863549), INT32_C( 957375358)), UINT16_C(36797), simde_mm512_set_epi32(INT32_C(-1153303869), INT32_C( 562234020), INT32_C( 1763100483), INT32_C( -518004559), INT32_C(-1450358898), INT32_C(-1409866198), INT32_C( 269910347), INT32_C( 433971495), INT32_C( 1441956227), INT32_C( 1018271575), INT32_C( 1734496959), INT32_C( 380846712), INT32_C( -941967689), INT32_C( -739443621), INT32_C( 1995198557), INT32_C( -980655097)), simde_mm512_set_epi32(INT32_C(-2088961787), INT32_C( 1943141679), INT32_C( -665465241), INT32_C( -342195833), INT32_C( 2102184556), INT32_C( 877111492), INT32_C( 1183491905), INT32_C( -576610979), INT32_C(-1061316197), INT32_C( -808097400), INT32_C( -362876916), INT32_C(-1845390533), INT32_C( -48621016), INT32_C( 201516689), INT32_C(-1435930720), INT32_C(-1932876068)), simde_mm512_set_epi32(INT32_C(-1153303869), INT32_C(-1138893231), INT32_C( -687161637), INT32_C( 1828175063), INT32_C(-1450358898), INT32_C( -532754706), INT32_C( 269910347), INT32_C( 433971495), INT32_C( 380640030), INT32_C( 1710148738), INT32_C( 282989295), INT32_C( 380846712), INT32_C( -18168385), INT32_C( -134893554), INT32_C( 499863549), INT32_C( -980655097)) }, { simde_mm512_set_epi32(INT32_C( -179829877), INT32_C( 651362699), INT32_C( 495870887), INT32_C( -382126427), INT32_C( 915244711), INT32_C( 5081424), INT32_C( 1422501384), INT32_C( -163979724), INT32_C(-1516900265), INT32_C( 497965579), INT32_C( 910061584), INT32_C( 2002226944), INT32_C( -621963189), INT32_C( -48343218), INT32_C( 523093293), INT32_C(-1235205724)), UINT16_C(46902), simde_mm512_set_epi32(INT32_C( -220620904), INT32_C( 1398655610), INT32_C( 1722520923), INT32_C( 1206471293), INT32_C( 1374915518), INT32_C( 531653117), INT32_C( 2075187308), INT32_C( -144618549), INT32_C(-2131865715), INT32_C( 1444783055), INT32_C( 1878625233), INT32_C( 1755684145), INT32_C(-2061726371), INT32_C(-1050443653), INT32_C(-1299940555), INT32_C(-2116696545)), simde_mm512_set_epi32(INT32_C(-1106093489), INT32_C( 1982658188), INT32_C( 863153207), INT32_C(-1637276628), INT32_C( 448681074), INT32_C( 1334667053), INT32_C( 502667641), INT32_C( 855395764), INT32_C(-1672092948), INT32_C( 808531712), INT32_C( 454488139), INT32_C( 123547093), INT32_C( 483090439), INT32_C(-1126329757), INT32_C(-1201220189), INT32_C( -136050629)), simde_mm512_set_epi32(INT32_C( -220620904), INT32_C( 651362699), INT32_C( 859367716), INT32_C( 1206471293), INT32_C( 915244711), INT32_C( 531653117), INT32_C( 64516744), INT32_C( -144618549), INT32_C(-1516900265), INT32_C( 497965579), INT32_C( 60672677), INT32_C( 26024843), INT32_C( -621963189), INT32_C(-1050443653), INT32_C( -98720366), INT32_C(-1235205724)) }, { simde_mm512_set_epi32(INT32_C( 2113970745), INT32_C( -182128842), INT32_C( 564512596), INT32_C( 604721400), INT32_C( 1471174399), INT32_C(-1803940708), INT32_C(-1765392929), INT32_C( 298473775), INT32_C(-1404600737), INT32_C(-1231334921), INT32_C( -238983338), INT32_C( -145797796), INT32_C( -181019162), INT32_C(-1910480170), INT32_C(-1860760170), INT32_C( -371855625)), UINT16_C(38914), simde_mm512_set_epi32(INT32_C( 1533151625), INT32_C( 2122196136), INT32_C( 1690360675), INT32_C( 1484935627), INT32_C( 1463758672), INT32_C( 602211615), INT32_C( -464964305), INT32_C(-1430226195), INT32_C( 797104998), INT32_C(-1557543977), INT32_C( -952737410), INT32_C( 178625368), INT32_C(-1203806300), INT32_C( 1095216728), INT32_C(-1215405554), INT32_C( 430790402)), simde_mm512_set_epi32(INT32_C( -251141702), INT32_C( 1274901810), INT32_C( 413860084), INT32_C( 550494320), INT32_C( 1997049765), INT32_C( 505563651), INT32_C( 463125220), INT32_C( -451213519), INT32_C(-1948793453), INT32_C(-2137102362), INT32_C(-1703809327), INT32_C( 389679318), INT32_C( -355192167), INT32_C(-1801602389), INT32_C( 2006619059), INT32_C( -903558132)), simde_mm512_set_epi32(INT32_C( 26301413), INT32_C( -182128842), INT32_C( 564512596), INT32_C( 383946987), INT32_C( 1463758672), INT32_C(-1803940708), INT32_C(-1765392929), INT32_C( 298473775), INT32_C(-1404600737), INT32_C(-1231334921), INT32_C( -238983338), INT32_C( -145797796), INT32_C( -181019162), INT32_C(-1910480170), INT32_C(-1215405554), INT32_C( -371855625)) }, { simde_mm512_set_epi32(INT32_C( 1572579389), INT32_C( -783078337), INT32_C(-1895621282), INT32_C( 1967093325), INT32_C( 908815803), INT32_C(-1975591270), INT32_C( 2065037155), INT32_C( 623932649), INT32_C( 1610322797), INT32_C( -842122991), INT32_C( 2031682359), INT32_C(-1300130353), INT32_C(-1950048210), INT32_C( 238137788), INT32_C( 1978166020), INT32_C( 76768592)), UINT16_C( 883), simde_mm512_set_epi32(INT32_C(-1010119490), INT32_C( -410070063), INT32_C( 2094036024), INT32_C(-1838133114), INT32_C( 69201629), INT32_C( 1228958503), INT32_C( -775379327), INT32_C(-1485462767), INT32_C(-1179177847), INT32_C( 1767270276), INT32_C( 490610321), INT32_C( 1164436618), INT32_C(-1920297499), INT32_C( -690964678), INT32_C( -880248267), INT32_C(-2005634277)), simde_mm512_set_epi32(INT32_C(-1911659531), INT32_C( 143428987), INT32_C( -610024215), INT32_C( 582607980), INT32_C( 1609326889), INT32_C( 1245407235), INT32_C( -119962198), INT32_C(-1932052969), INT32_C(-1370414254), INT32_C(-1925960308), INT32_C( 2119408419), INT32_C(-1203088886), INT32_C( -316530353), INT32_C( 1708684203), INT32_C( 1202455481), INT32_C(-2107221827)), simde_mm512_set_epi32(INT32_C( 1572579389), INT32_C( -783078337), INT32_C(-1895621282), INT32_C( 1967093325), INT32_C( 908815803), INT32_C(-1975591270), INT32_C( -55606139), INT32_C(-1485462767), INT32_C( 1610322797), INT32_C( 1767270276), INT32_C( 490610321), INT32_C( 1164436618), INT32_C(-1950048210), INT32_C( 238137788), INT32_C( -880248267), INT32_C(-2005634277)) }, { simde_mm512_set_epi32(INT32_C( 2117071873), INT32_C(-1437889529), INT32_C( -376074104), INT32_C( 1087893388), INT32_C( -443183285), INT32_C( -380695552), INT32_C( 565328458), INT32_C( -93024748), INT32_C( 1480532604), INT32_C( -97460760), INT32_C( -582247600), INT32_C( -374749470), INT32_C( 1394313506), INT32_C( 394553965), INT32_C(-2016714120), INT32_C( 1697927724)), UINT16_C(12254), simde_mm512_set_epi32(INT32_C( 56443211), INT32_C(-2036514643), INT32_C( -510270824), INT32_C( 1139427205), INT32_C( 1090384090), INT32_C(-1905231405), INT32_C(-2079359983), INT32_C( -477294891), INT32_C( -673197028), INT32_C( 2071747620), INT32_C( -442789099), INT32_C( -601334711), INT32_C( 319530416), INT32_C(-2115012481), INT32_C( -501730903), INT32_C( 340519338)), simde_mm512_set_epi32(INT32_C( 1219537084), INT32_C( 1349635715), INT32_C( 732887738), INT32_C(-1728641921), INT32_C(-1388433411), INT32_C( 1765754685), INT32_C(-1574983663), INT32_C( 846129112), INT32_C( 1578410935), INT32_C(-1659872458), INT32_C( 1045536663), INT32_C( 957117985), INT32_C(-1265958651), INT32_C( 1309498779), INT32_C(-1001015299), INT32_C( 1022360677)), simde_mm512_set_epi32(INT32_C( 2117071873), INT32_C(-1437889529), INT32_C( -510270824), INT32_C( 1087893388), INT32_C( 1090384090), INT32_C( -139476720), INT32_C( -504376320), INT32_C( -477294891), INT32_C( -673197028), INT32_C( 411875162), INT32_C( -582247600), INT32_C( -601334711), INT32_C( 319530416), INT32_C( -805513702), INT32_C( -501730903), INT32_C( 1697927724)) }, { simde_mm512_set_epi32(INT32_C( -304885978), INT32_C( 991545752), INT32_C( -143034937), INT32_C( 843112042), INT32_C( -227554783), INT32_C( 2124182542), INT32_C(-1526246088), INT32_C(-1991977382), INT32_C( 1224533822), INT32_C( -819361196), INT32_C( -684010252), INT32_C(-1738921185), INT32_C(-1259570772), INT32_C( -691865929), INT32_C( -973523371), INT32_C( 45581573)), UINT16_C(42669), simde_mm512_set_epi32(INT32_C( -156799603), INT32_C(-1073012339), INT32_C(-2130532125), INT32_C( 397240391), INT32_C( 200936922), INT32_C(-1030980309), INT32_C(-1758363174), INT32_C( -665586367), INT32_C( 453331046), INT32_C( 1704580573), INT32_C( 1606190487), INT32_C(-1085658047), INT32_C(-1335469644), INT32_C( -368070561), INT32_C(-1419559633), INT32_C( 2069966669)), simde_mm512_set_epi32(INT32_C( 1379668640), INT32_C( 66581512), INT32_C( -557301797), INT32_C( 304428974), INT32_C(-1608262788), INT32_C( 532978979), INT32_C( 946958552), INT32_C(-1911324669), INT32_C(-2118093156), INT32_C( 283691898), INT32_C( -446072631), INT32_C( -458781294), INT32_C( 1951055651), INT32_C( 765387914), INT32_C( 822559116), INT32_C( 7445617)), simde_mm512_set_epi32(INT32_C( -156799603), INT32_C( 991545752), INT32_C( -458626734), INT32_C( 843112042), INT32_C( -227554783), INT32_C( -498001330), INT32_C( -811404622), INT32_C(-1991977382), INT32_C( 453331046), INT32_C( -819361196), INT32_C( 267972594), INT32_C(-1738921185), INT32_C(-1335469644), INT32_C( -368070561), INT32_C( -973523371), INT32_C( 85143)) }, { simde_mm512_set_epi32(INT32_C(-1981938926), INT32_C( 869237081), INT32_C( -190053534), INT32_C(-1469275330), INT32_C( -717100794), INT32_C(-1303072888), INT32_C(-2122918671), INT32_C( 1617119933), INT32_C( 1521363431), INT32_C( 553638116), INT32_C( 1036201367), INT32_C(-1187933851), INT32_C( -412155886), INT32_C( -760582943), INT32_C( -423751457), INT32_C( 1273589632)), UINT16_C(35103), simde_mm512_set_epi32(INT32_C(-1836595644), INT32_C( 260676470), INT32_C( 1724614860), INT32_C( -144514633), INT32_C( -478630580), INT32_C(-2086755061), INT32_C( 932145867), INT32_C(-1862372735), INT32_C( 1756892633), INT32_C( 382632965), INT32_C( 1295078740), INT32_C( -995802034), INT32_C( 152308919), INT32_C( -351555508), INT32_C( 31813624), INT32_C( 807463845)), simde_mm512_set_epi32(INT32_C( 615301803), INT32_C( 382786341), INT32_C( 1852603705), INT32_C( 1998007730), INT32_C( 231325888), INT32_C( 1842039329), INT32_C( 968682756), INT32_C( 316335394), INT32_C(-2071382094), INT32_C( -803185337), INT32_C(-2126995500), INT32_C( 1587647099), INT32_C(-1328358584), INT32_C( 320339033), INT32_C( 282380179), INT32_C( -108102092)), simde_mm512_set_epi32(INT32_C( -605992038), INT32_C( 869237081), INT32_C( -190053534), INT32_C(-1469275330), INT32_C( -15978804), INT32_C(-1303072888), INT32_C(-2122918671), INT32_C( -280695765), INT32_C( 1521363431), INT32_C( 553638116), INT32_C( 1036201367), INT32_C( -995802034), INT32_C( 152308919), INT32_C( -31216475), INT32_C( 31813624), INT32_C( 50749201)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_mask_rem_epi32(test_vec[i].src, test_vec[i].k, test_vec[i].a, test_vec[i].b); simde_assert_m512i_i32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_rem_epi64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_mm512_set_epi64(INT64_C( 2968342496979776051), INT64_C(-8441478558707775203), INT64_C(-1609208390309195578), INT64_C( 6738163160628300797), INT64_C(-4636488523262038415), INT64_C( 6479913377553186648), INT64_C( 7505871096235581515), INT64_C( 2265477367564496986)), simde_mm512_set_epi64(INT64_C(-7579804969095623833), INT64_C(-7070918910501808185), INT64_C(-6813223735121976043), INT64_C( 216242550290965460), INT64_C( 5120732502404950997), INT64_C( 5681284513410730040), INT64_C( 6409558907924801050), INT64_C( 5372227444888762251)), simde_mm512_set_epi64(INT64_C( 2968342496979776051), INT64_C(-1370559648205967018), INT64_C(-1609208390309195578), INT64_C( 34644101608371537), INT64_C(-4636488523262038415), INT64_C( 798628864142456608), INT64_C( 1096312188310780465), INT64_C( 2265477367564496986)) }, { simde_mm512_set_epi64(INT64_C( 5645659480511055559), INT64_C(-3174015343225263359), INT64_C(-4313283826698320649), INT64_C(-2023206435041636446), INT64_C( 2118113466433927893), INT64_C( 3840651400764901876), INT64_C( 2114726288902596757), INT64_C(-8964374488360902150)), simde_mm512_set_epi64(INT64_C(-4891509177172967717), INT64_C( 7851952110853286921), INT64_C( -829836782511317044), INT64_C(-1925559678644969716), INT64_C( 7345032902979795528), INT64_C(-6117610524196633789), INT64_C( 2328100732832272381), INT64_C( 4111895855610225675)), simde_mm512_set_epi64(INT64_C( 754150303338087842), INT64_C(-3174015343225263359), INT64_C( -164099914141735429), INT64_C( -97646756396666730), INT64_C( 2118113466433927893), INT64_C( 3840651400764901876), INT64_C( 2114726288902596757), INT64_C( -740582777140450800)) }, { simde_mm512_set_epi64(INT64_C( 7572458917823766705), INT64_C(-6229244031487498710), INT64_C( 1159256113650983207), INT64_C( 6193154838246823767), INT64_C( 7449607714297299576), INT64_C(-4045720414588175269), INT64_C( 8569312554655704071), INT64_C(-8110543410226793665)), simde_mm512_set_epi64(INT64_C(-2858151442766986873), INT64_C( 9028813919053392068), INT64_C( 5083059030774095197), INT64_C(-4558318353343223416), INT64_C(-1558544484243762373), INT64_C( -208825673416776047), INT64_C(-6167275479359641892), INT64_C(-4953402399143034204)), simde_mm512_set_epi64(INT64_C( 1856156032289792959), INT64_C(-6229244031487498710), INT64_C( 1159256113650983207), INT64_C( 1634836484903600351), INT64_C( 1215429777322250084), INT64_C( -78032619669430376), INT64_C( 2402037075296062179), INT64_C(-3157141011083759461)) }, { simde_mm512_set_epi64(INT64_C( 2129749246616352421), INT64_C( 3930946101587052880), INT64_C( 6109596926925725236), INT64_C(-6515037028970767861), INT64_C( 3908684742628183808), INT64_C(-2671311551824242866), INT64_C( 2246668589251707300), INT64_C(-8972022555815576273)), simde_mm512_set_epi64(INT64_C( 5181754748372749246), INT64_C( 2283432752406648940), INT64_C( -621131936186871923), INT64_C( 6205295972918594513), INT64_C( 7540605987113962845), INT64_C(-4511621132930745547), INT64_C(-9091142434838104266), INT64_C( -772363439907339893)), simde_mm512_set_epi64(INT64_C( 2129749246616352421), INT64_C( 1647513349180403940), INT64_C( 519409501243877929), INT64_C( -309741056052173348), INT64_C( 3908684742628183808), INT64_C(-2671311551824242866), INT64_C( 2246668589251707300), INT64_C( -476024716834837450)) }, { simde_mm512_set_epi64(INT64_C(-7032049571316476814), INT64_C( 5732351344186366329), INT64_C( 3673896834139808492), INT64_C( 3472617261273378891), INT64_C( 530630724433960967), INT64_C(-4837549467732879965), INT64_C( -584332998080882792), INT64_C( 6007180105039451483)), simde_mm512_set_epi64(INT64_C( 2597258637662508799), INT64_C(-7747866342253511201), INT64_C( 1281935105229028959), INT64_C(-5288543212061759658), INT64_C( -626196761534931482), INT64_C(-8205449847372313194), INT64_C(-1597107745019766193), INT64_C( 8515452077469772855)), simde_mm512_set_epi64(INT64_C(-1837532295991459216), INT64_C( 5732351344186366329), INT64_C( 1110026623681750574), INT64_C( 3472617261273378891), INT64_C( 530630724433960967), INT64_C(-4837549467732879965), INT64_C( -584332998080882792), INT64_C( 6007180105039451483)) }, { simde_mm512_set_epi64(INT64_C( 6286795626078602527), INT64_C(-1997006480917628179), INT64_C( 3423539900625568727), INT64_C(-4091976017447117992), INT64_C(-5170308688123548072), INT64_C(-5220127105375971582), INT64_C(-4643325554324364743), INT64_C( -782237419483838636)), simde_mm512_set_epi64(INT64_C( 8577263429665049091), INT64_C( 1989107677696558897), INT64_C(-8370004145136048154), INT64_C(-7317805337695090474), INT64_C(-1525538738567005525), INT64_C( 8618363237326703628), INT64_C( 6584836091306452136), INT64_C( 7260043819054420427)), simde_mm512_set_epi64(INT64_C( 6286795626078602527), INT64_C( -7898803221069282), INT64_C( 3423539900625568727), INT64_C(-4091976017447117992), INT64_C( -593692472422531497), INT64_C(-5220127105375971582), INT64_C(-4643325554324364743), INT64_C( -782237419483838636)) }, { simde_mm512_set_epi64(INT64_C( 3903334154292354714), INT64_C( 8869267046373815529), INT64_C( 6916283752571091217), INT64_C( 8726009290759968207), INT64_C(-8375393287335202372), INT64_C( 8496158362035250512), INT64_C(-1078645395476875982), INT64_C( 1777515526450307184)), simde_mm512_set_epi64(INT64_C( 5278336582045705857), INT64_C(-6380014000574878583), INT64_C( 7590368039103504017), INT64_C( 5001217194949514725), INT64_C(-2967670691286451659), INT64_C(-8614133625237732493), INT64_C( 6754177049630551103), INT64_C(-8141631409824500147)), simde_mm512_set_epi64(INT64_C( 3903334154292354714), INT64_C( 2489253045798936946), INT64_C( 6916283752571091217), INT64_C( 3724792095810453482), INT64_C(-2440051904762299054), INT64_C( 8496158362035250512), INT64_C(-1078645395476875982), INT64_C( 1777515526450307184)) }, { simde_mm512_set_epi64(INT64_C( 5348983348701791658), INT64_C(-8298104313070148782), INT64_C(-8271936534134678749), INT64_C(-5167227415572635313), INT64_C( 7338742772279280569), INT64_C(-9050448829097521986), INT64_C(-1761237507559623624), INT64_C(-7894721610255438115)), simde_mm512_set_epi64(INT64_C(-1635074945007338934), INT64_C( -399538248898108804), INT64_C( -418590773130585264), INT64_C(-1609536716449019614), INT64_C( 1694596378460381816), INT64_C( 7292544047935022069), INT64_C( 616022812148352233), INT64_C( 2502282222097948969)), simde_mm512_set_epi64(INT64_C( 443758513679774856), INT64_C( -307339335107972702), INT64_C( -318711844653558733), INT64_C( -338617266225576471), INT64_C( 560357258437753305), INT64_C(-1757904781162499917), INT64_C( -529191883262919158), INT64_C( -387874943961591208)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_rem_epi64(test_vec[i].a, test_vec[i].b); simde_assert_m512i_i64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_rem_epu8(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_x_mm512_set_epu8(UINT8_C( 41), UINT8_C( 49), UINT8_C(171), UINT8_C(198), UINT8_C( 40), UINT8_C( 44), UINT8_C(242), UINT8_C( 51), UINT8_C(138), UINT8_C(217), UINT8_C(215), UINT8_C(249), UINT8_C(201), UINT8_C( 37), UINT8_C(137), UINT8_C( 29), UINT8_C(233), UINT8_C(170), UINT8_C(241), UINT8_C(126), UINT8_C(182), UINT8_C( 10), UINT8_C(208), UINT8_C(198), UINT8_C( 93), UINT8_C(130), UINT8_C(195), UINT8_C(177), UINT8_C(187), UINT8_C(223), UINT8_C(139), UINT8_C(253), UINT8_C(191), UINT8_C(167), UINT8_C(226), UINT8_C( 64), UINT8_C(213), UINT8_C(202), UINT8_C(110), UINT8_C(113), UINT8_C( 89), UINT8_C(237), UINT8_C( 70), UINT8_C(226), UINT8_C(132), UINT8_C( 91), UINT8_C(255), UINT8_C( 88), UINT8_C(104), UINT8_C( 42), UINT8_C( 53), UINT8_C(254), UINT8_C(132), UINT8_C(254), UINT8_C( 96), UINT8_C( 75), UINT8_C( 31), UINT8_C(112), UINT8_C(151), UINT8_C(169), UINT8_C(172), UINT8_C( 94), UINT8_C(112), UINT8_C( 90)), simde_x_mm512_set_epu8(UINT8_C(195), UINT8_C( 49), UINT8_C( 14), UINT8_C(170), UINT8_C(203), UINT8_C(167), UINT8_C( 3), UINT8_C(215), UINT8_C( 63), UINT8_C(248), UINT8_C( 55), UINT8_C(219), UINT8_C(221), UINT8_C(135), UINT8_C( 61), UINT8_C(191), UINT8_C(209), UINT8_C( 91), UINT8_C( 87), UINT8_C(137), UINT8_C( 87), UINT8_C( 76), UINT8_C( 44), UINT8_C(140), UINT8_C( 2), UINT8_C(200), UINT8_C( 36), UINT8_C(195), UINT8_C(200), UINT8_C(125), UINT8_C(254), UINT8_C(139), UINT8_C(226), UINT8_C( 71), UINT8_C( 92), UINT8_C(129), UINT8_C(182), UINT8_C(119), UINT8_C(247), UINT8_C( 34), UINT8_C(121), UINT8_C( 85), UINT8_C(153), UINT8_C(116), UINT8_C(218), UINT8_C( 21), UINT8_C(101), UINT8_C(122), UINT8_C( 10), UINT8_C(231), UINT8_C( 54), UINT8_C( 71), UINT8_C(156), UINT8_C(149), UINT8_C(244), UINT8_C( 84), UINT8_C(148), UINT8_C( 85), UINT8_C(170), UINT8_C(184), UINT8_C( 94), UINT8_C(154), UINT8_C(229), UINT8_C( 11)), simde_x_mm512_set_epu8(UINT8_C( 41), UINT8_C( 0), UINT8_C( 3), UINT8_C( 28), UINT8_C( 40), UINT8_C( 44), UINT8_C( 2), UINT8_C( 51), UINT8_C( 12), UINT8_C(217), UINT8_C( 50), UINT8_C( 30), UINT8_C(201), UINT8_C( 37), UINT8_C( 15), UINT8_C( 29), UINT8_C( 24), UINT8_C( 79), UINT8_C( 67), UINT8_C(126), UINT8_C( 8), UINT8_C( 10), UINT8_C( 32), UINT8_C( 58), UINT8_C( 1), UINT8_C(130), UINT8_C( 15), UINT8_C(177), UINT8_C(187), UINT8_C( 98), UINT8_C(139), UINT8_C(114), UINT8_C(191), UINT8_C( 25), UINT8_C( 42), UINT8_C( 64), UINT8_C( 31), UINT8_C( 83), UINT8_C(110), UINT8_C( 11), UINT8_C( 89), UINT8_C( 67), UINT8_C( 70), UINT8_C(110), UINT8_C(132), UINT8_C( 7), UINT8_C( 53), UINT8_C( 88), UINT8_C( 4), UINT8_C( 42), UINT8_C( 53), UINT8_C( 41), UINT8_C(132), UINT8_C(105), UINT8_C( 96), UINT8_C( 75), UINT8_C( 31), UINT8_C( 27), UINT8_C(151), UINT8_C(169), UINT8_C( 78), UINT8_C( 94), UINT8_C(112), UINT8_C( 2)) }, { simde_x_mm512_set_epu8(UINT8_C(216), UINT8_C( 85), UINT8_C(206), UINT8_C(103), UINT8_C(235), UINT8_C(154), UINT8_C(129), UINT8_C(135), UINT8_C(125), UINT8_C( 76), UINT8_C(202), UINT8_C(108), UINT8_C( 52), UINT8_C( 71), UINT8_C(168), UINT8_C(196), UINT8_C( 70), UINT8_C(138), UINT8_C(167), UINT8_C( 65), UINT8_C(221), UINT8_C(161), UINT8_C(157), UINT8_C( 93), UINT8_C(192), UINT8_C(189), UINT8_C(153), UINT8_C(155), UINT8_C(207), UINT8_C(213), UINT8_C(105), UINT8_C(136), UINT8_C(234), UINT8_C( 94), UINT8_C(240), UINT8_C( 12), UINT8_C(146), UINT8_C( 1), UINT8_C(147), UINT8_C( 59), UINT8_C(253), UINT8_C( 26), UINT8_C( 26), UINT8_C( 40), UINT8_C( 12), UINT8_C( 2), UINT8_C(230), UINT8_C(145), UINT8_C(170), UINT8_C(105), UINT8_C(111), UINT8_C(160), UINT8_C(140), UINT8_C(202), UINT8_C(166), UINT8_C(220), UINT8_C(187), UINT8_C( 65), UINT8_C(250), UINT8_C(195), UINT8_C( 33), UINT8_C(131), UINT8_C( 2), UINT8_C(164)), simde_x_mm512_set_epu8(UINT8_C(120), UINT8_C(127), UINT8_C( 28), UINT8_C( 95), UINT8_C(175), UINT8_C(223), UINT8_C(119), UINT8_C(214), UINT8_C(220), UINT8_C(102), UINT8_C( 86), UINT8_C( 22), UINT8_C(119), UINT8_C(207), UINT8_C( 12), UINT8_C(183), UINT8_C(172), UINT8_C(242), UINT8_C(173), UINT8_C(249), UINT8_C( 52), UINT8_C(108), UINT8_C(128), UINT8_C(203), UINT8_C( 85), UINT8_C(135), UINT8_C(227), UINT8_C( 35), UINT8_C(187), UINT8_C( 24), UINT8_C(250), UINT8_C(219), UINT8_C(253), UINT8_C( 62), UINT8_C(125), UINT8_C(236), UINT8_C( 75), UINT8_C( 13), UINT8_C( 79), UINT8_C( 81), UINT8_C(177), UINT8_C(221), UINT8_C(251), UINT8_C(181), UINT8_C(159), UINT8_C(182), UINT8_C( 11), UINT8_C( 11), UINT8_C( 39), UINT8_C( 37), UINT8_C( 39), UINT8_C(208), UINT8_C(136), UINT8_C(180), UINT8_C(215), UINT8_C(139), UINT8_C(144), UINT8_C(128), UINT8_C(203), UINT8_C(206), UINT8_C(173), UINT8_C( 36), UINT8_C(133), UINT8_C(175)), simde_x_mm512_set_epu8(UINT8_C( 96), UINT8_C( 85), UINT8_C( 10), UINT8_C( 8), UINT8_C( 60), UINT8_C(154), UINT8_C( 10), UINT8_C(135), UINT8_C(125), UINT8_C( 76), UINT8_C( 30), UINT8_C( 20), UINT8_C( 52), UINT8_C( 71), UINT8_C( 0), UINT8_C( 13), UINT8_C( 70), UINT8_C(138), UINT8_C(167), UINT8_C( 65), UINT8_C( 13), UINT8_C( 53), UINT8_C( 29), UINT8_C( 93), UINT8_C( 22), UINT8_C( 54), UINT8_C(153), UINT8_C( 15), UINT8_C( 20), UINT8_C( 21), UINT8_C(105), UINT8_C(136), UINT8_C(234), UINT8_C( 32), UINT8_C(115), UINT8_C( 12), UINT8_C( 71), UINT8_C( 1), UINT8_C( 68), UINT8_C( 59), UINT8_C( 76), UINT8_C( 26), UINT8_C( 26), UINT8_C( 40), UINT8_C( 12), UINT8_C( 2), UINT8_C( 10), UINT8_C( 2), UINT8_C( 14), UINT8_C( 31), UINT8_C( 33), UINT8_C(160), UINT8_C( 4), UINT8_C( 22), UINT8_C(166), UINT8_C( 81), UINT8_C( 43), UINT8_C( 65), UINT8_C( 47), UINT8_C(195), UINT8_C( 33), UINT8_C( 23), UINT8_C( 2), UINT8_C(164)) }, { simde_x_mm512_set_epu8(UINT8_C( 87), UINT8_C( 63), UINT8_C( 47), UINT8_C( 80), UINT8_C( 35), UINT8_C(229), UINT8_C( 5), UINT8_C( 31), UINT8_C(228), UINT8_C( 73), UINT8_C( 53), UINT8_C( 47), UINT8_C(170), UINT8_C(192), UINT8_C(122), UINT8_C(237), UINT8_C( 47), UINT8_C(130), UINT8_C(219), UINT8_C(102), UINT8_C(163), UINT8_C( 41), UINT8_C(195), UINT8_C(215), UINT8_C(199), UINT8_C( 54), UINT8_C( 97), UINT8_C(126), UINT8_C( 10), UINT8_C(165), UINT8_C(155), UINT8_C( 88), UINT8_C(184), UINT8_C( 63), UINT8_C( 95), UINT8_C(164), UINT8_C( 65), UINT8_C( 71), UINT8_C(174), UINT8_C( 88), UINT8_C(183), UINT8_C(142), UINT8_C( 98), UINT8_C( 14), UINT8_C( 25), UINT8_C(173), UINT8_C( 87), UINT8_C( 2), UINT8_C(191), UINT8_C(143), UINT8_C(152), UINT8_C( 2), UINT8_C(126), UINT8_C( 0), UINT8_C(162), UINT8_C( 57), UINT8_C(245), UINT8_C( 36), UINT8_C(239), UINT8_C( 54), UINT8_C( 33), UINT8_C(165), UINT8_C(199), UINT8_C( 84)), simde_x_mm512_set_epu8(UINT8_C(131), UINT8_C( 42), UINT8_C(151), UINT8_C(210), UINT8_C( 12), UINT8_C(163), UINT8_C(138), UINT8_C(207), UINT8_C( 43), UINT8_C( 57), UINT8_C( 61), UINT8_C( 62), UINT8_C( 81), UINT8_C(184), UINT8_C( 6), UINT8_C( 93), UINT8_C(167), UINT8_C( 1), UINT8_C(145), UINT8_C( 9), UINT8_C( 4), UINT8_C( 17), UINT8_C( 10), UINT8_C(101), UINT8_C(186), UINT8_C(181), UINT8_C(155), UINT8_C(243), UINT8_C(189), UINT8_C(191), UINT8_C(222), UINT8_C(205), UINT8_C( 59), UINT8_C( 26), UINT8_C(227), UINT8_C(105), UINT8_C(237), UINT8_C(145), UINT8_C(183), UINT8_C( 79), UINT8_C(174), UINT8_C( 60), UINT8_C(132), UINT8_C(208), UINT8_C( 58), UINT8_C(178), UINT8_C(116), UINT8_C(240), UINT8_C( 37), UINT8_C(131), UINT8_C(100), UINT8_C(177), UINT8_C( 19), UINT8_C(102), UINT8_C( 81), UINT8_C( 86), UINT8_C( 25), UINT8_C( 43), UINT8_C( 51), UINT8_C(140), UINT8_C( 9), UINT8_C( 40), UINT8_C(227), UINT8_C( 75)), simde_x_mm512_set_epu8(UINT8_C( 87), UINT8_C( 21), UINT8_C( 47), UINT8_C( 80), UINT8_C( 11), UINT8_C( 66), UINT8_C( 5), UINT8_C( 31), UINT8_C( 13), UINT8_C( 16), UINT8_C( 53), UINT8_C( 47), UINT8_C( 8), UINT8_C( 8), UINT8_C( 2), UINT8_C( 51), UINT8_C( 47), UINT8_C( 0), UINT8_C( 74), UINT8_C( 3), UINT8_C( 3), UINT8_C( 7), UINT8_C( 5), UINT8_C( 13), UINT8_C( 13), UINT8_C( 54), UINT8_C( 97), UINT8_C(126), UINT8_C( 10), UINT8_C(165), UINT8_C(155), UINT8_C( 88), UINT8_C( 7), UINT8_C( 11), UINT8_C( 95), UINT8_C( 59), UINT8_C( 65), UINT8_C( 71), UINT8_C(174), UINT8_C( 9), UINT8_C( 9), UINT8_C( 22), UINT8_C( 98), UINT8_C( 14), UINT8_C( 25), UINT8_C(173), UINT8_C( 87), UINT8_C( 2), UINT8_C( 6), UINT8_C( 12), UINT8_C( 52), UINT8_C( 2), UINT8_C( 12), UINT8_C( 0), UINT8_C( 0), UINT8_C( 57), UINT8_C( 20), UINT8_C( 36), UINT8_C( 35), UINT8_C( 54), UINT8_C( 6), UINT8_C( 5), UINT8_C(199), UINT8_C( 9)) }, { simde_x_mm512_set_epu8(UINT8_C(233), UINT8_C( 79), UINT8_C( 12), UINT8_C( 0), UINT8_C( 33), UINT8_C(178), UINT8_C( 58), UINT8_C( 74), UINT8_C(250), UINT8_C(116), UINT8_C(142), UINT8_C( 20), UINT8_C( 88), UINT8_C( 63), UINT8_C( 34), UINT8_C(124), UINT8_C(250), UINT8_C( 48), UINT8_C(221), UINT8_C(232), UINT8_C(221), UINT8_C( 75), UINT8_C(155), UINT8_C( 80), UINT8_C(233), UINT8_C(169), UINT8_C(198), UINT8_C(226), UINT8_C( 83), UINT8_C( 27), UINT8_C(137), UINT8_C( 34), UINT8_C( 23), UINT8_C(132), UINT8_C(106), UINT8_C(109), UINT8_C(135), UINT8_C(203), UINT8_C( 98), UINT8_C(120), UINT8_C(101), UINT8_C( 52), UINT8_C( 82), UINT8_C( 44), UINT8_C(142), UINT8_C( 14), UINT8_C( 99), UINT8_C(245), UINT8_C( 8), UINT8_C(140), UINT8_C(141), UINT8_C(123), UINT8_C(219), UINT8_C(163), UINT8_C(196), UINT8_C(233), UINT8_C( 34), UINT8_C(185), UINT8_C(228), UINT8_C(108), UINT8_C( 95), UINT8_C(236), UINT8_C( 97), UINT8_C( 41)), simde_x_mm512_set_epu8(UINT8_C(193), UINT8_C(230), UINT8_C( 93), UINT8_C( 23), UINT8_C(193), UINT8_C( 52), UINT8_C(223), UINT8_C(175), UINT8_C(205), UINT8_C( 45), UINT8_C(166), UINT8_C( 24), UINT8_C( 71), UINT8_C(234), UINT8_C(161), UINT8_C(142), UINT8_C(184), UINT8_C(218), UINT8_C(190), UINT8_C(212), UINT8_C(116), UINT8_C(159), UINT8_C( 44), UINT8_C( 55), UINT8_C(213), UINT8_C(133), UINT8_C( 60), UINT8_C( 3), UINT8_C( 58), UINT8_C(255), UINT8_C(125), UINT8_C(189), UINT8_C(145), UINT8_C( 88), UINT8_C( 55), UINT8_C(182), UINT8_C( 23), UINT8_C(161), UINT8_C(133), UINT8_C( 27), UINT8_C(125), UINT8_C(229), UINT8_C(203), UINT8_C( 45), UINT8_C( 24), UINT8_C( 5), UINT8_C( 90), UINT8_C( 83), UINT8_C(145), UINT8_C( 85), UINT8_C(156), UINT8_C(164), UINT8_C(149), UINT8_C(201), UINT8_C( 48), UINT8_C(255), UINT8_C( 41), UINT8_C( 42), UINT8_C( 94), UINT8_C(129), UINT8_C(135), UINT8_C( 8), UINT8_C( 12), UINT8_C(203)), simde_x_mm512_set_epu8(UINT8_C( 40), UINT8_C( 79), UINT8_C( 12), UINT8_C( 0), UINT8_C( 33), UINT8_C( 22), UINT8_C( 58), UINT8_C( 74), UINT8_C( 45), UINT8_C( 26), UINT8_C(142), UINT8_C( 20), UINT8_C( 17), UINT8_C( 63), UINT8_C( 34), UINT8_C(124), UINT8_C( 66), UINT8_C( 48), UINT8_C( 31), UINT8_C( 20), UINT8_C(105), UINT8_C( 75), UINT8_C( 23), UINT8_C( 25), UINT8_C( 20), UINT8_C( 36), UINT8_C( 18), UINT8_C( 1), UINT8_C( 25), UINT8_C( 27), UINT8_C( 12), UINT8_C( 34), UINT8_C( 23), UINT8_C( 44), UINT8_C( 51), UINT8_C(109), UINT8_C( 20), UINT8_C( 42), UINT8_C( 98), UINT8_C( 12), UINT8_C(101), UINT8_C( 52), UINT8_C( 82), UINT8_C( 44), UINT8_C( 22), UINT8_C( 4), UINT8_C( 9), UINT8_C( 79), UINT8_C( 8), UINT8_C( 55), UINT8_C(141), UINT8_C(123), UINT8_C( 70), UINT8_C(163), UINT8_C( 4), UINT8_C(233), UINT8_C( 34), UINT8_C( 17), UINT8_C( 40), UINT8_C(108), UINT8_C( 95), UINT8_C( 4), UINT8_C( 1), UINT8_C( 41)) }, { simde_x_mm512_set_epu8(UINT8_C(142), UINT8_C( 19), UINT8_C(128), UINT8_C( 3), UINT8_C(129), UINT8_C(192), UINT8_C(118), UINT8_C(156), UINT8_C( 16), UINT8_C(232), UINT8_C(203), UINT8_C(122), UINT8_C(229), UINT8_C(105), UINT8_C(120), UINT8_C(201), UINT8_C(228), UINT8_C(167), UINT8_C(141), UINT8_C(146), UINT8_C(116), UINT8_C( 74), UINT8_C(191), UINT8_C( 35), UINT8_C( 45), UINT8_C(158), UINT8_C(228), UINT8_C(138), UINT8_C( 49), UINT8_C( 7), UINT8_C( 65), UINT8_C(140), UINT8_C( 0), UINT8_C(113), UINT8_C(156), UINT8_C(113), UINT8_C(246), UINT8_C(167), UINT8_C(109), UINT8_C(141), UINT8_C(192), UINT8_C( 11), UINT8_C( 33), UINT8_C(141), UINT8_C(129), UINT8_C( 2), UINT8_C(168), UINT8_C(227), UINT8_C( 23), UINT8_C(173), UINT8_C(104), UINT8_C( 71), UINT8_C( 11), UINT8_C(250), UINT8_C( 13), UINT8_C(218), UINT8_C(194), UINT8_C(140), UINT8_C(125), UINT8_C( 43), UINT8_C(151), UINT8_C( 49), UINT8_C(129), UINT8_C(218)), simde_x_mm512_set_epu8(UINT8_C( 8), UINT8_C( 25), UINT8_C(147), UINT8_C(220), UINT8_C(173), UINT8_C(138), UINT8_C( 38), UINT8_C(150), UINT8_C( 35), UINT8_C( 43), UINT8_C(165), UINT8_C(185), UINT8_C( 50), UINT8_C( 64), UINT8_C(161), UINT8_C(132), UINT8_C(162), UINT8_C( 50), UINT8_C(199), UINT8_C( 84), UINT8_C(251), UINT8_C(200), UINT8_C(217), UINT8_C( 19), UINT8_C(180), UINT8_C(196), UINT8_C(246), UINT8_C( 76), UINT8_C( 55), UINT8_C(204), UINT8_C(139), UINT8_C( 75), UINT8_C( 1), UINT8_C( 89), UINT8_C(133), UINT8_C(212), UINT8_C(206), UINT8_C( 55), UINT8_C(204), UINT8_C(120), UINT8_C( 37), UINT8_C(159), UINT8_C(146), UINT8_C(217), UINT8_C(226), UINT8_C(190), UINT8_C(134), UINT8_C( 8), UINT8_C(113), UINT8_C( 61), UINT8_C(103), UINT8_C(100), UINT8_C( 23), UINT8_C(229), UINT8_C(146), UINT8_C( 97), UINT8_C( 95), UINT8_C( 32), UINT8_C(136), UINT8_C( 91), UINT8_C( 46), UINT8_C(252), UINT8_C(163), UINT8_C( 88)), simde_x_mm512_set_epu8(UINT8_C( 6), UINT8_C( 19), UINT8_C(128), UINT8_C( 3), UINT8_C(129), UINT8_C( 54), UINT8_C( 4), UINT8_C( 6), UINT8_C( 16), UINT8_C( 17), UINT8_C( 38), UINT8_C(122), UINT8_C( 29), UINT8_C( 41), UINT8_C(120), UINT8_C( 69), UINT8_C( 66), UINT8_C( 17), UINT8_C(141), UINT8_C( 62), UINT8_C(116), UINT8_C( 74), UINT8_C(191), UINT8_C( 16), UINT8_C( 45), UINT8_C(158), UINT8_C(228), UINT8_C( 62), UINT8_C( 49), UINT8_C( 7), UINT8_C( 65), UINT8_C( 65), UINT8_C( 0), UINT8_C( 24), UINT8_C( 23), UINT8_C(113), UINT8_C( 40), UINT8_C( 2), UINT8_C(109), UINT8_C( 21), UINT8_C( 7), UINT8_C( 11), UINT8_C( 33), UINT8_C(141), UINT8_C(129), UINT8_C( 2), UINT8_C( 34), UINT8_C( 3), UINT8_C( 23), UINT8_C( 51), UINT8_C( 1), UINT8_C( 71), UINT8_C( 11), UINT8_C( 21), UINT8_C( 13), UINT8_C( 24), UINT8_C( 4), UINT8_C( 12), UINT8_C(125), UINT8_C( 43), UINT8_C( 13), UINT8_C( 49), UINT8_C(129), UINT8_C( 42)) }, { simde_x_mm512_set_epu8(UINT8_C( 46), UINT8_C( 43), UINT8_C(246), UINT8_C(157), UINT8_C( 80), UINT8_C(154), UINT8_C( 27), UINT8_C(118), UINT8_C(176), UINT8_C(216), UINT8_C( 46), UINT8_C(142), UINT8_C(198), UINT8_C(248), UINT8_C( 88), UINT8_C( 29), UINT8_C(176), UINT8_C( 25), UINT8_C(101), UINT8_C( 54), UINT8_C(103), UINT8_C(120), UINT8_C( 94), UINT8_C( 16), UINT8_C(197), UINT8_C(205), UINT8_C( 71), UINT8_C(246), UINT8_C(158), UINT8_C(176), UINT8_C(218), UINT8_C( 43), UINT8_C(235), UINT8_C(249), UINT8_C(116), UINT8_C(137), UINT8_C( 89), UINT8_C(212), UINT8_C(132), UINT8_C( 56), UINT8_C(230), UINT8_C(137), UINT8_C( 66), UINT8_C( 41), UINT8_C( 44), UINT8_C( 35), UINT8_C(189), UINT8_C(155), UINT8_C(125), UINT8_C(130), UINT8_C(123), UINT8_C(117), UINT8_C(123), UINT8_C(127), UINT8_C(151), UINT8_C( 60), UINT8_C(153), UINT8_C(185), UINT8_C(250), UINT8_C(100), UINT8_C( 83), UINT8_C(112), UINT8_C( 33), UINT8_C(140)), simde_x_mm512_set_epu8(UINT8_C( 36), UINT8_C( 33), UINT8_C( 42), UINT8_C( 75), UINT8_C(179), UINT8_C(172), UINT8_C(126), UINT8_C(171), UINT8_C(110), UINT8_C(150), UINT8_C(107), UINT8_C(180), UINT8_C(134), UINT8_C( 73), UINT8_C(207), UINT8_C( 15), UINT8_C(241), UINT8_C(103), UINT8_C(103), UINT8_C(150), UINT8_C(103), UINT8_C( 58), UINT8_C(104), UINT8_C( 35), UINT8_C(249), UINT8_C( 79), UINT8_C(113), UINT8_C( 97), UINT8_C(189), UINT8_C(197), UINT8_C(174), UINT8_C(222), UINT8_C(224), UINT8_C(104), UINT8_C(123), UINT8_C(124), UINT8_C( 49), UINT8_C(226), UINT8_C( 37), UINT8_C( 22), UINT8_C(105), UINT8_C(157), UINT8_C(110), UINT8_C( 52), UINT8_C(254), UINT8_C(103), UINT8_C(162), UINT8_C(210), UINT8_C(202), UINT8_C( 39), UINT8_C(193), UINT8_C(151), UINT8_C(183), UINT8_C( 73), UINT8_C( 97), UINT8_C(187), UINT8_C(102), UINT8_C(195), UINT8_C( 68), UINT8_C(190), UINT8_C( 65), UINT8_C( 60), UINT8_C(165), UINT8_C(126)), simde_x_mm512_set_epu8(UINT8_C( 10), UINT8_C( 10), UINT8_C( 36), UINT8_C( 7), UINT8_C( 80), UINT8_C(154), UINT8_C( 27), UINT8_C(118), UINT8_C( 66), UINT8_C( 66), UINT8_C( 46), UINT8_C(142), UINT8_C( 64), UINT8_C( 29), UINT8_C( 88), UINT8_C( 14), UINT8_C(176), UINT8_C( 25), UINT8_C(101), UINT8_C( 54), UINT8_C( 0), UINT8_C( 4), UINT8_C( 94), UINT8_C( 16), UINT8_C(197), UINT8_C( 47), UINT8_C( 71), UINT8_C( 52), UINT8_C(158), UINT8_C(176), UINT8_C( 44), UINT8_C( 43), UINT8_C( 11), UINT8_C( 41), UINT8_C(116), UINT8_C( 13), UINT8_C( 40), UINT8_C(212), UINT8_C( 21), UINT8_C( 12), UINT8_C( 20), UINT8_C(137), UINT8_C( 66), UINT8_C( 41), UINT8_C( 44), UINT8_C( 35), UINT8_C( 27), UINT8_C(155), UINT8_C(125), UINT8_C( 13), UINT8_C(123), UINT8_C(117), UINT8_C(123), UINT8_C( 54), UINT8_C( 54), UINT8_C( 60), UINT8_C( 51), UINT8_C(185), UINT8_C( 46), UINT8_C(100), UINT8_C( 18), UINT8_C( 52), UINT8_C( 33), UINT8_C( 14)) }, { simde_x_mm512_set_epu8(UINT8_C(240), UINT8_C(169), UINT8_C( 8), UINT8_C( 54), UINT8_C( 66), UINT8_C( 99), UINT8_C( 14), UINT8_C( 32), UINT8_C(148), UINT8_C( 92), UINT8_C(122), UINT8_C(200), UINT8_C(192), UINT8_C(186), UINT8_C(225), UINT8_C( 52), UINT8_C(182), UINT8_C(244), UINT8_C(253), UINT8_C(228), UINT8_C(141), UINT8_C(228), UINT8_C(148), UINT8_C(168), UINT8_C(231), UINT8_C(107), UINT8_C( 47), UINT8_C(205), UINT8_C(126), UINT8_C( 7), UINT8_C(182), UINT8_C(245), UINT8_C(165), UINT8_C(186), UINT8_C(213), UINT8_C( 84), UINT8_C( 19), UINT8_C(131), UINT8_C( 54), UINT8_C( 13), UINT8_C(185), UINT8_C(182), UINT8_C( 72), UINT8_C( 61), UINT8_C(125), UINT8_C(104), UINT8_C(147), UINT8_C( 11), UINT8_C( 89), UINT8_C(204), UINT8_C( 62), UINT8_C(163), UINT8_C(198), UINT8_C(162), UINT8_C(205), UINT8_C( 9), UINT8_C(182), UINT8_C(123), UINT8_C( 65), UINT8_C(208), UINT8_C(145), UINT8_C(179), UINT8_C( 34), UINT8_C(195)), simde_x_mm512_set_epu8(UINT8_C(141), UINT8_C(103), UINT8_C(116), UINT8_C( 12), UINT8_C(174), UINT8_C(226), UINT8_C(193), UINT8_C(175), UINT8_C(155), UINT8_C(174), UINT8_C( 73), UINT8_C( 6), UINT8_C(141), UINT8_C(140), UINT8_C(254), UINT8_C(193), UINT8_C(100), UINT8_C(151), UINT8_C( 14), UINT8_C( 19), UINT8_C( 38), UINT8_C(115), UINT8_C(201), UINT8_C(118), UINT8_C( 74), UINT8_C(186), UINT8_C( 89), UINT8_C(183), UINT8_C( 65), UINT8_C(138), UINT8_C( 64), UINT8_C( 90), UINT8_C(152), UINT8_C(241), UINT8_C(229), UINT8_C(218), UINT8_C(126), UINT8_C( 38), UINT8_C(159), UINT8_C( 27), UINT8_C(164), UINT8_C(199), UINT8_C( 25), UINT8_C(253), UINT8_C(181), UINT8_C(104), UINT8_C( 6), UINT8_C(183), UINT8_C( 36), UINT8_C(203), UINT8_C(138), UINT8_C(145), UINT8_C(116), UINT8_C(155), UINT8_C(218), UINT8_C( 24), UINT8_C(205), UINT8_C(238), UINT8_C(242), UINT8_C( 26), UINT8_C(226), UINT8_C( 76), UINT8_C(226), UINT8_C(214)), simde_x_mm512_set_epu8(UINT8_C( 99), UINT8_C( 66), UINT8_C( 8), UINT8_C( 6), UINT8_C( 66), UINT8_C( 99), UINT8_C( 14), UINT8_C( 32), UINT8_C(148), UINT8_C( 92), UINT8_C( 49), UINT8_C( 2), UINT8_C( 51), UINT8_C( 46), UINT8_C(225), UINT8_C( 52), UINT8_C( 82), UINT8_C( 93), UINT8_C( 1), UINT8_C( 0), UINT8_C( 27), UINT8_C(113), UINT8_C(148), UINT8_C( 50), UINT8_C( 9), UINT8_C(107), UINT8_C( 47), UINT8_C( 22), UINT8_C( 61), UINT8_C( 7), UINT8_C( 54), UINT8_C( 65), UINT8_C( 13), UINT8_C(186), UINT8_C(213), UINT8_C( 84), UINT8_C( 19), UINT8_C( 17), UINT8_C( 54), UINT8_C( 13), UINT8_C( 21), UINT8_C(182), UINT8_C( 22), UINT8_C( 61), UINT8_C(125), UINT8_C( 0), UINT8_C( 3), UINT8_C( 11), UINT8_C( 17), UINT8_C( 1), UINT8_C( 62), UINT8_C( 18), UINT8_C( 82), UINT8_C( 7), UINT8_C(205), UINT8_C( 9), UINT8_C(182), UINT8_C(123), UINT8_C( 65), UINT8_C( 0), UINT8_C(145), UINT8_C( 27), UINT8_C( 34), UINT8_C(195)) }, { simde_x_mm512_set_epu8(UINT8_C(197), UINT8_C( 52), UINT8_C(145), UINT8_C( 20), UINT8_C( 26), UINT8_C(178), UINT8_C(121), UINT8_C( 16), UINT8_C( 45), UINT8_C(229), UINT8_C( 11), UINT8_C(230), UINT8_C( 53), UINT8_C( 2), UINT8_C(234), UINT8_C( 7), UINT8_C(207), UINT8_C(146), UINT8_C(169), UINT8_C(233), UINT8_C(206), UINT8_C(116), UINT8_C( 55), UINT8_C(156), UINT8_C(180), UINT8_C( 91), UINT8_C( 56), UINT8_C(146), UINT8_C( 55), UINT8_C(137), UINT8_C(200), UINT8_C( 76), UINT8_C( 43), UINT8_C(245), UINT8_C(138), UINT8_C( 3), UINT8_C(213), UINT8_C(156), UINT8_C(166), UINT8_C(234), UINT8_C(199), UINT8_C( 2), UINT8_C( 86), UINT8_C( 72), UINT8_C( 93), UINT8_C(254), UINT8_C(190), UINT8_C(121), UINT8_C(119), UINT8_C( 75), UINT8_C(159), UINT8_C( 76), UINT8_C( 70), UINT8_C(218), UINT8_C( 17), UINT8_C(239), UINT8_C( 43), UINT8_C(152), UINT8_C(222), UINT8_C( 80), UINT8_C(197), UINT8_C(113), UINT8_C(112), UINT8_C( 81)), simde_x_mm512_set_epu8(UINT8_C(193), UINT8_C(162), UINT8_C(178), UINT8_C( 36), UINT8_C(178), UINT8_C( 86), UINT8_C( 79), UINT8_C(167), UINT8_C(179), UINT8_C( 45), UINT8_C( 18), UINT8_C(231), UINT8_C(113), UINT8_C(127), UINT8_C(211), UINT8_C(181), UINT8_C(121), UINT8_C(171), UINT8_C( 76), UINT8_C(135), UINT8_C( 15), UINT8_C(133), UINT8_C(247), UINT8_C( 32), UINT8_C(181), UINT8_C(168), UINT8_C(236), UINT8_C( 99), UINT8_C( 85), UINT8_C(151), UINT8_C( 36), UINT8_C( 99), UINT8_C(101), UINT8_C( 42), UINT8_C( 63), UINT8_C( 96), UINT8_C(210), UINT8_C(198), UINT8_C(202), UINT8_C(105), UINT8_C(214), UINT8_C( 74), UINT8_C(199), UINT8_C( 17), UINT8_C(234), UINT8_C( 22), UINT8_C(134), UINT8_C(112), UINT8_C( 62), UINT8_C(141), UINT8_C(156), UINT8_C( 91), UINT8_C( 99), UINT8_C( 24), UINT8_C(198), UINT8_C(131), UINT8_C( 88), UINT8_C(136), UINT8_C( 61), UINT8_C( 94), UINT8_C(189), UINT8_C(213), UINT8_C(249), UINT8_C(131)), simde_x_mm512_set_epu8(UINT8_C( 4), UINT8_C( 52), UINT8_C(145), UINT8_C( 20), UINT8_C( 26), UINT8_C( 6), UINT8_C( 42), UINT8_C( 16), UINT8_C( 45), UINT8_C( 4), UINT8_C( 11), UINT8_C(230), UINT8_C( 53), UINT8_C( 2), UINT8_C( 23), UINT8_C( 7), UINT8_C( 86), UINT8_C(146), UINT8_C( 17), UINT8_C( 98), UINT8_C( 11), UINT8_C(116), UINT8_C( 55), UINT8_C( 28), UINT8_C(180), UINT8_C( 91), UINT8_C( 56), UINT8_C( 47), UINT8_C( 55), UINT8_C(137), UINT8_C( 20), UINT8_C( 76), UINT8_C( 43), UINT8_C( 35), UINT8_C( 12), UINT8_C( 3), UINT8_C( 3), UINT8_C(156), UINT8_C(166), UINT8_C( 24), UINT8_C(199), UINT8_C( 2), UINT8_C( 86), UINT8_C( 4), UINT8_C( 93), UINT8_C( 12), UINT8_C( 56), UINT8_C( 9), UINT8_C( 57), UINT8_C( 75), UINT8_C( 3), UINT8_C( 76), UINT8_C( 70), UINT8_C( 2), UINT8_C( 17), UINT8_C(108), UINT8_C( 43), UINT8_C( 16), UINT8_C( 39), UINT8_C( 80), UINT8_C( 8), UINT8_C(113), UINT8_C(112), UINT8_C( 81)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_rem_epu8(test_vec[i].a, test_vec[i].b); simde_assert_m512i_u8(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_rem_epu16(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_x_mm512_set_epu16(UINT16_C( 10545), UINT16_C( 43974), UINT16_C( 10284), UINT16_C( 62003), UINT16_C( 35545), UINT16_C( 55289), UINT16_C( 51493), UINT16_C( 35101), UINT16_C( 59818), UINT16_C( 61822), UINT16_C( 46602), UINT16_C( 53446), UINT16_C( 23938), UINT16_C( 50097), UINT16_C( 48095), UINT16_C( 35837), UINT16_C( 49063), UINT16_C( 57920), UINT16_C( 54730), UINT16_C( 28273), UINT16_C( 23021), UINT16_C( 18146), UINT16_C( 33883), UINT16_C( 65368), UINT16_C( 26666), UINT16_C( 13822), UINT16_C( 34046), UINT16_C( 24651), UINT16_C( 8048), UINT16_C( 38825), UINT16_C( 44126), UINT16_C( 28762)), simde_x_mm512_set_epu16(UINT16_C( 38607), UINT16_C( 8074), UINT16_C( 18000), UINT16_C( 35687), UINT16_C( 40415), UINT16_C( 3254), UINT16_C( 55282), UINT16_C( 38855), UINT16_C( 41330), UINT16_C( 37148), UINT16_C( 25803), UINT16_C( 25877), UINT16_C( 768), UINT16_C( 16244), UINT16_C( 11114), UINT16_C( 58324), UINT16_C( 18192), UINT16_C( 32532), UINT16_C( 33700), UINT16_C( 60373), UINT16_C( 20183), UINT16_C( 64042), UINT16_C( 2502), UINT16_C( 18488), UINT16_C( 22771), UINT16_C( 21470), UINT16_C( 4556), UINT16_C( 26138), UINT16_C( 19085), UINT16_C( 64613), UINT16_C( 55602), UINT16_C( 63371)), simde_x_mm512_set_epu16(UINT16_C( 10545), UINT16_C( 3604), UINT16_C( 10284), UINT16_C( 26316), UINT16_C( 35545), UINT16_C( 3225), UINT16_C( 51493), UINT16_C( 35101), UINT16_C( 18488), UINT16_C( 24674), UINT16_C( 20799), UINT16_C( 1692), UINT16_C( 130), UINT16_C( 1365), UINT16_C( 3639), UINT16_C( 35837), UINT16_C( 12679), UINT16_C( 25388), UINT16_C( 21030), UINT16_C( 28273), UINT16_C( 2838), UINT16_C( 18146), UINT16_C( 1357), UINT16_C( 9904), UINT16_C( 3895), UINT16_C( 13822), UINT16_C( 2154), UINT16_C( 24651), UINT16_C( 8048), UINT16_C( 38825), UINT16_C( 44126), UINT16_C( 28762)) }, { simde_x_mm512_set_epu16(UINT16_C( 20057), UINT16_C( 26978), UINT16_C( 45741), UINT16_C( 34503), UINT16_C( 54259), UINT16_C( 41436), UINT16_C( 43883), UINT16_C( 11009), UINT16_C( 50212), UINT16_C( 9014), UINT16_C( 24117), UINT16_C( 34039), UINT16_C( 58348), UINT16_C( 8311), UINT16_C( 31759), UINT16_C( 4002), UINT16_C( 7525), UINT16_C( 3321), UINT16_C( 47299), UINT16_C( 64213), UINT16_C( 13644), UINT16_C( 48153), UINT16_C( 45234), UINT16_C( 51700), UINT16_C( 7513), UINT16_C( 1114), UINT16_C( 65336), UINT16_C( 10389), UINT16_C( 33688), UINT16_C( 9445), UINT16_C( 60332), UINT16_C( 41466)), simde_x_mm512_set_epu16(UINT16_C( 48157), UINT16_C( 56913), UINT16_C( 55050), UINT16_C( 48859), UINT16_C( 27895), UINT16_C( 48343), UINT16_C( 59593), UINT16_C( 60425), UINT16_C( 62587), UINT16_C( 54231), UINT16_C( 52444), UINT16_C( 8140), UINT16_C( 58695), UINT16_C( 2476), UINT16_C( 41101), UINT16_C( 7948), UINT16_C( 26094), UINT16_C( 52354), UINT16_C( 30122), UINT16_C( 47688), UINT16_C( 43801), UINT16_C( 57764), UINT16_C( 1809), UINT16_C( 33603), UINT16_C( 8271), UINT16_C( 4936), UINT16_C( 7627), UINT16_C( 20477), UINT16_C( 14608), UINT16_C( 25470), UINT16_C( 45836), UINT16_C( 25611)), simde_x_mm512_set_epu16(UINT16_C( 20057), UINT16_C( 26978), UINT16_C( 45741), UINT16_C( 34503), UINT16_C( 26364), UINT16_C( 41436), UINT16_C( 43883), UINT16_C( 11009), UINT16_C( 50212), UINT16_C( 9014), UINT16_C( 24117), UINT16_C( 1479), UINT16_C( 58348), UINT16_C( 883), UINT16_C( 31759), UINT16_C( 4002), UINT16_C( 7525), UINT16_C( 3321), UINT16_C( 17177), UINT16_C( 16525), UINT16_C( 13644), UINT16_C( 48153), UINT16_C( 9), UINT16_C( 18097), UINT16_C( 7513), UINT16_C( 1114), UINT16_C( 4320), UINT16_C( 10389), UINT16_C( 4472), UINT16_C( 9445), UINT16_C( 14496), UINT16_C( 15855)) }, { simde_x_mm512_set_epu16(UINT16_C( 26902), UINT16_C( 51011), UINT16_C( 57631), UINT16_C( 57521), UINT16_C( 43405), UINT16_C( 18318), UINT16_C( 44023), UINT16_C( 9770), UINT16_C( 4118), UINT16_C( 33099), UINT16_C( 6621), UINT16_C( 57639), UINT16_C( 22002), UINT16_C( 33155), UINT16_C( 15537), UINT16_C( 38743), UINT16_C( 26466), UINT16_C( 21183), UINT16_C( 5811), UINT16_C( 17016), UINT16_C( 51162), UINT16_C( 46775), UINT16_C( 54252), UINT16_C( 64603), UINT16_C( 30444), UINT16_C( 20573), UINT16_C( 50572), UINT16_C( 25607), UINT16_C( 36721), UINT16_C( 36797), UINT16_C( 27147), UINT16_C( 62271)), simde_x_mm512_set_epu16(UINT16_C( 55381), UINT16_C( 52839), UINT16_C( 60314), UINT16_C( 33159), UINT16_C( 32076), UINT16_C( 51820), UINT16_C( 13383), UINT16_C( 43204), UINT16_C( 18058), UINT16_C( 42817), UINT16_C( 56737), UINT16_C( 40285), UINT16_C( 49341), UINT16_C( 39323), UINT16_C( 53205), UINT16_C( 27016), UINT16_C( 59998), UINT16_C( 61452), UINT16_C( 37377), UINT16_C( 37691), UINT16_C( 64794), UINT16_C( 6696), UINT16_C( 3074), UINT16_C( 59025), UINT16_C( 43625), UINT16_C( 28576), UINT16_C( 36042), UINT16_C( 42716), UINT16_C( 47937), UINT16_C( 64195), UINT16_C( 8579), UINT16_C( 676)), simde_x_mm512_set_epu16(UINT16_C( 26902), UINT16_C( 51011), UINT16_C( 57631), UINT16_C( 24362), UINT16_C( 11329), UINT16_C( 18318), UINT16_C( 3874), UINT16_C( 9770), UINT16_C( 4118), UINT16_C( 33099), UINT16_C( 6621), UINT16_C( 17354), UINT16_C( 22002), UINT16_C( 33155), UINT16_C( 15537), UINT16_C( 11727), UINT16_C( 26466), UINT16_C( 21183), UINT16_C( 5811), UINT16_C( 17016), UINT16_C( 51162), UINT16_C( 6599), UINT16_C( 1994), UINT16_C( 5578), UINT16_C( 30444), UINT16_C( 20573), UINT16_C( 14530), UINT16_C( 25607), UINT16_C( 36721), UINT16_C( 36797), UINT16_C( 1410), UINT16_C( 79)) }, { simde_x_mm512_set_epu16(UINT16_C( 7566), UINT16_C( 25511), UINT16_C( 59705), UINT16_C( 13989), UINT16_C( 13965), UINT16_C( 34471), UINT16_C( 77), UINT16_C( 35152), UINT16_C( 21705), UINT16_C( 42504), UINT16_C( 63033), UINT16_C( 56884), UINT16_C( 42389), UINT16_C( 61527), UINT16_C( 7598), UINT16_C( 23051), UINT16_C( 13886), UINT16_C( 28688), UINT16_C( 30551), UINT16_C( 36608), UINT16_C( 56045), UINT16_C( 38987), UINT16_C( 64798), UINT16_C( 22350), UINT16_C( 7981), UINT16_C( 50477), UINT16_C( 46688), UINT16_C( 16804), UINT16_C( 33660), UINT16_C( 63749), UINT16_C( 29649), UINT16_C( 64815)), simde_x_mm512_set_epu16(UINT16_C( 18409), UINT16_C( 19069), UINT16_C( 20979), UINT16_C( 35774), UINT16_C( 8112), UINT16_C( 25085), UINT16_C( 31664), UINT16_C( 55404), UINT16_C( 63329), UINT16_C( 19403), UINT16_C( 33006), UINT16_C( 20365), UINT16_C( 22045), UINT16_C( 41935), UINT16_C( 28665), UINT16_C( 35793), UINT16_C( 26789), UINT16_C( 40241), UINT16_C( 34076), UINT16_C( 36189), UINT16_C( 49507), UINT16_C( 32891), UINT16_C( 45700), UINT16_C( 31541), UINT16_C( 33237), UINT16_C( 50719), UINT16_C( 22782), UINT16_C( 46902), UINT16_C( 62792), UINT16_C( 907), UINT16_C( 9939), UINT16_C( 395)), simde_x_mm512_set_epu16(UINT16_C( 7566), UINT16_C( 6442), UINT16_C( 17747), UINT16_C( 13989), UINT16_C( 5853), UINT16_C( 9386), UINT16_C( 77), UINT16_C( 35152), UINT16_C( 21705), UINT16_C( 3698), UINT16_C( 30027), UINT16_C( 16154), UINT16_C( 20344), UINT16_C( 19592), UINT16_C( 7598), UINT16_C( 23051), UINT16_C( 13886), UINT16_C( 28688), UINT16_C( 30551), UINT16_C( 419), UINT16_C( 6538), UINT16_C( 6096), UINT16_C( 19098), UINT16_C( 22350), UINT16_C( 7981), UINT16_C( 50477), UINT16_C( 1124), UINT16_C( 16804), UINT16_C( 33660), UINT16_C( 259), UINT16_C( 9771), UINT16_C( 35)) }, { simde_x_mm512_set_epu16(UINT16_C( 40553), UINT16_C( 9260), UINT16_C( 6846), UINT16_C( 21618), UINT16_C( 20365), UINT16_C( 26413), UINT16_C( 7670), UINT16_C( 6521), UINT16_C( 13052), UINT16_C( 19892), UINT16_C( 40021), UINT16_C( 58092), UINT16_C( 12337), UINT16_C( 14080), UINT16_C( 6934), UINT16_C( 61515), UINT16_C( 1885), UINT16_C( 11733), UINT16_C( 7371), UINT16_C( 24583), UINT16_C( 48349), UINT16_C( 37475), UINT16_C( 47206), UINT16_C( 54691), UINT16_C( 63460), UINT16_C( 2107), UINT16_C( 62169), UINT16_C( 38808), UINT16_C( 21341), UINT16_C( 51834), UINT16_C( 26283), UINT16_C( 38235)), simde_x_mm512_set_epu16(UINT16_C( 9227), UINT16_C( 20728), UINT16_C( 22448), UINT16_C( 22271), UINT16_C( 38010), UINT16_C( 3228), UINT16_C( 38598), UINT16_C( 15839), UINT16_C( 4554), UINT16_C( 22831), UINT16_C( 44103), UINT16_C( 32351), UINT16_C( 46747), UINT16_C( 20983), UINT16_C( 61889), UINT16_C( 26454), UINT16_C( 63311), UINT16_C( 19804), UINT16_C( 62773), UINT16_C( 56806), UINT16_C( 36384), UINT16_C( 25302), UINT16_C( 37143), UINT16_C( 3478), UINT16_C( 59861), UINT16_C( 61175), UINT16_C( 48658), UINT16_C( 23119), UINT16_C( 30252), UINT16_C( 63116), UINT16_C( 13170), UINT16_C( 44087)), simde_x_mm512_set_epu16(UINT16_C( 3645), UINT16_C( 9260), UINT16_C( 6846), UINT16_C( 21618), UINT16_C( 20365), UINT16_C( 589), UINT16_C( 7670), UINT16_C( 6521), UINT16_C( 3944), UINT16_C( 19892), UINT16_C( 40021), UINT16_C( 25741), UINT16_C( 12337), UINT16_C( 14080), UINT16_C( 6934), UINT16_C( 8607), UINT16_C( 1885), UINT16_C( 11733), UINT16_C( 7371), UINT16_C( 24583), UINT16_C( 11965), UINT16_C( 12173), UINT16_C( 10063), UINT16_C( 2521), UINT16_C( 3599), UINT16_C( 2107), UINT16_C( 13511), UINT16_C( 15689), UINT16_C( 21341), UINT16_C( 51834), UINT16_C( 13113), UINT16_C( 38235)) }, { simde_x_mm512_set_epu16(UINT16_C( 22335), UINT16_C( 12112), UINT16_C( 9189), UINT16_C( 1311), UINT16_C( 58441), UINT16_C( 13615), UINT16_C( 43712), UINT16_C( 31469), UINT16_C( 12162), UINT16_C( 56166), UINT16_C( 41769), UINT16_C( 50135), UINT16_C( 50998), UINT16_C( 24958), UINT16_C( 2725), UINT16_C( 39768), UINT16_C( 47167), UINT16_C( 24484), UINT16_C( 16711), UINT16_C( 44632), UINT16_C( 46990), UINT16_C( 25102), UINT16_C( 6573), UINT16_C( 22274), UINT16_C( 49039), UINT16_C( 38914), UINT16_C( 32256), UINT16_C( 41529), UINT16_C( 62756), UINT16_C( 61238), UINT16_C( 8613), UINT16_C( 51028)), simde_x_mm512_set_epu16(UINT16_C( 30472), UINT16_C( 36773), UINT16_C( 7714), UINT16_C( 18947), UINT16_C( 7066), UINT16_C( 47844), UINT16_C( 58651), UINT16_C( 1841), UINT16_C( 35799), UINT16_C( 50579), UINT16_C( 32926), UINT16_C( 26598), UINT16_C( 39537), UINT16_C( 61137), UINT16_C( 5946), UINT16_C( 2262), UINT16_C( 60116), UINT16_C( 12953), UINT16_C( 38045), UINT16_C( 47787), UINT16_C( 30618), UINT16_C( 37811), UINT16_C( 51748), UINT16_C( 52236), UINT16_C( 23394), UINT16_C( 2441), UINT16_C( 32382), UINT16_C( 9384), UINT16_C( 25792), UINT16_C( 56163), UINT16_C( 22658), UINT16_C( 20939)), simde_x_mm512_set_epu16(UINT16_C( 22335), UINT16_C( 12112), UINT16_C( 1475), UINT16_C( 1311), UINT16_C( 1913), UINT16_C( 13615), UINT16_C( 43712), UINT16_C( 172), UINT16_C( 12162), UINT16_C( 5587), UINT16_C( 8843), UINT16_C( 23537), UINT16_C( 11461), UINT16_C( 24958), UINT16_C( 2725), UINT16_C( 1314), UINT16_C( 47167), UINT16_C( 11531), UINT16_C( 16711), UINT16_C( 44632), UINT16_C( 16372), UINT16_C( 25102), UINT16_C( 6573), UINT16_C( 22274), UINT16_C( 2251), UINT16_C( 2299), UINT16_C( 32256), UINT16_C( 3993), UINT16_C( 11172), UINT16_C( 5075), UINT16_C( 8613), UINT16_C( 9150)) }, { simde_x_mm512_set_epu16(UINT16_C( 13867), UINT16_C( 28091), UINT16_C( 35390), UINT16_C( 56986), UINT16_C( 31509), UINT16_C( 63331), UINT16_C( 9520), UINT16_C( 29929), UINT16_C( 24571), UINT16_C( 37741), UINT16_C( 52686), UINT16_C( 14609), UINT16_C( 31001), UINT16_C( 823), UINT16_C( 45697), UINT16_C( 38351), UINT16_C( 35780), UINT16_C( 41006), UINT16_C( 3633), UINT16_C( 45500), UINT16_C( 30184), UINT16_C( 27396), UINT16_C( 1171), UINT16_C( 25936), UINT16_C( 61703), UINT16_C( 57786), UINT16_C( 19453), UINT16_C( 30002), UINT16_C( 6315), UINT16_C( 244), UINT16_C( 8399), UINT16_C( 57456)), simde_x_mm512_set_epu16(UINT16_C( 18752), UINT16_C( 27431), UINT16_C( 53704), UINT16_C( 42625), UINT16_C( 42869), UINT16_C( 41745), UINT16_C( 47543), UINT16_C( 11401), UINT16_C( 26966), UINT16_C( 26500), UINT16_C( 7486), UINT16_C( 7825), UINT16_C( 17767), UINT16_C( 58506), UINT16_C( 36234), UINT16_C( 38373), UINT16_C( 54992), UINT16_C( 46906), UINT16_C( 52104), UINT16_C( 31285), UINT16_C( 34932), UINT16_C( 29467), UINT16_C( 33781), UINT16_C( 883), UINT16_C( 23995), UINT16_C( 43069), UINT16_C( 53587), UINT16_C( 11327), UINT16_C( 36611), UINT16_C( 7518), UINT16_C( 30015), UINT16_C( 30285)), simde_x_mm512_set_epu16(UINT16_C( 13867), UINT16_C( 660), UINT16_C( 35390), UINT16_C( 14361), UINT16_C( 31509), UINT16_C( 21586), UINT16_C( 9520), UINT16_C( 7127), UINT16_C( 24571), UINT16_C( 11241), UINT16_C( 284), UINT16_C( 6784), UINT16_C( 13234), UINT16_C( 823), UINT16_C( 9463), UINT16_C( 38351), UINT16_C( 35780), UINT16_C( 41006), UINT16_C( 3633), UINT16_C( 14215), UINT16_C( 30184), UINT16_C( 27396), UINT16_C( 1171), UINT16_C( 329), UINT16_C( 13713), UINT16_C( 14717), UINT16_C( 19453), UINT16_C( 7348), UINT16_C( 6315), UINT16_C( 244), UINT16_C( 8399), UINT16_C( 27171)) }, { simde_x_mm512_set_epu16(UINT16_C( 19003), UINT16_C( 26627), UINT16_C( 63705), UINT16_C( 34218), UINT16_C( 36055), UINT16_C( 13847), UINT16_C( 44625), UINT16_C( 9042), UINT16_C( 36148), UINT16_C( 11660), UINT16_C( 32339), UINT16_C( 39715), UINT16_C( 47178), UINT16_C( 21002), UINT16_C( 60706), UINT16_C( 8527), UINT16_C( 26072), UINT16_C( 29611), UINT16_C( 18348), UINT16_C( 953), UINT16_C( 33382), UINT16_C( 22717), UINT16_C( 50122), UINT16_C( 52414), UINT16_C( 59278), UINT16_C( 54225), UINT16_C( 31952), UINT16_C( 29752), UINT16_C( 37488), UINT16_C( 20614), UINT16_C( 1055), UINT16_C( 61149)), simde_x_mm512_set_epu16(UINT16_C( 59727), UINT16_C( 3072), UINT16_C( 8626), UINT16_C( 14922), UINT16_C( 64116), UINT16_C( 36372), UINT16_C( 22591), UINT16_C( 8828), UINT16_C( 64048), UINT16_C( 56808), UINT16_C( 56651), UINT16_C( 39760), UINT16_C( 59817), UINT16_C( 50914), UINT16_C( 21275), UINT16_C( 35106), UINT16_C( 6020), UINT16_C( 27245), UINT16_C( 34763), UINT16_C( 25208), UINT16_C( 25908), UINT16_C( 21036), UINT16_C( 36366), UINT16_C( 25589), UINT16_C( 2188), UINT16_C( 36219), UINT16_C( 56227), UINT16_C( 50409), UINT16_C( 8889), UINT16_C( 58476), UINT16_C( 24556), UINT16_C( 24873)), simde_x_mm512_set_epu16(UINT16_C( 19003), UINT16_C( 2051), UINT16_C( 3323), UINT16_C( 4374), UINT16_C( 36055), UINT16_C( 13847), UINT16_C( 22034), UINT16_C( 214), UINT16_C( 36148), UINT16_C( 11660), UINT16_C( 32339), UINT16_C( 39715), UINT16_C( 47178), UINT16_C( 21002), UINT16_C( 18156), UINT16_C( 8527), UINT16_C( 1992), UINT16_C( 2366), UINT16_C( 18348), UINT16_C( 953), UINT16_C( 7474), UINT16_C( 1681), UINT16_C( 13756), UINT16_C( 1236), UINT16_C( 202), UINT16_C( 18006), UINT16_C( 31952), UINT16_C( 29752), UINT16_C( 1932), UINT16_C( 20614), UINT16_C( 1055), UINT16_C( 11403)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_rem_epu16(test_vec[i].a, test_vec[i].b); simde_assert_m512i_u16(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_rem_epu32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_x_mm512_set_epu32(UINT32_C( 691121094), UINT32_C( 674034227), UINT32_C(2329532409), UINT32_C(3374680349), UINT32_C(3920294270), UINT32_C(3054162118), UINT32_C(1568850865), UINT32_C(3151989757), UINT32_C(3215450688), UINT32_C(3586813553), UINT32_C(1508722402), UINT32_C(2220621656), UINT32_C(1747596798), UINT32_C(2231263307), UINT32_C( 527472553), UINT32_C(2891870298)), simde_x_mm512_set_epu32(UINT32_C(2530156426), UINT32_C(1179683687), UINT32_C(2648640694), UINT32_C(3623000007), UINT32_C(2708640028), UINT32_C(1691051285), UINT32_C( 50347892), UINT32_C( 728425428), UINT32_C(1192263444), UINT32_C(2208623573), UINT32_C(1322777130), UINT32_C( 163989560), UINT32_C(1492341726), UINT32_C( 298608154), UINT32_C(1250819173), UINT32_C(3643996043)), simde_x_mm512_set_epu32(UINT32_C( 691121094), UINT32_C( 674034227), UINT32_C(2329532409), UINT32_C(3374680349), UINT32_C(1211654242), UINT32_C(1363110833), UINT32_C( 8066213), UINT32_C( 238288045), UINT32_C( 830923800), UINT32_C(1378189980), UINT32_C( 185945272), UINT32_C( 88757376), UINT32_C( 255255072), UINT32_C( 141006229), UINT32_C( 527472553), UINT32_C(2891870298)) }, { simde_x_mm512_set_epu32(UINT32_C(1314482530), UINT32_C(2997716679), UINT32_C(3555959260), UINT32_C(2875927297), UINT32_C(3290702646), UINT32_C(1580565751), UINT32_C(3823902839), UINT32_C(2081361826), UINT32_C( 493161721), UINT32_C(3099851477), UINT32_C( 894221337), UINT32_C(2964507124), UINT32_C( 492373082), UINT32_C(4281870485), UINT32_C(2207786213), UINT32_C(3953959418)), simde_x_mm512_set_epu32(UINT32_C(3156074065), UINT32_C(3607805659), UINT32_C(1828175063), UINT32_C(3905547273), UINT32_C(4101755863), UINT32_C(3436978124), UINT32_C(3846637996), UINT32_C(2693603084), UINT32_C(1710148738), UINT32_C(1974123080), UINT32_C(2870600100), UINT32_C( 118588227), UINT32_C( 542053192), UINT32_C( 499863549), UINT32_C( 957375358), UINT32_C(3003933707)), simde_x_mm512_set_epu32(UINT32_C(1314482530), UINT32_C(2997716679), UINT32_C(1727784197), UINT32_C(2875927297), UINT32_C(3290702646), UINT32_C(1580565751), UINT32_C(3823902839), UINT32_C(2081361826), UINT32_C( 493161721), UINT32_C(1125728397), UINT32_C( 894221337), UINT32_C( 118389676), UINT32_C( 492373082), UINT32_C( 282962093), UINT32_C( 293035497), UINT32_C( 950025711)) }, { simde_x_mm512_set_epu32(UINT32_C(1763100483), UINT32_C(3776962737), UINT32_C(2844608398), UINT32_C(2885101098), UINT32_C( 269910347), UINT32_C( 433971495), UINT32_C(1441956227), UINT32_C(1018271575), UINT32_C(1734496959), UINT32_C( 380846712), UINT32_C(3352999607), UINT32_C(3555523675), UINT32_C(1995198557), UINT32_C(3314312199), UINT32_C(2406584253), UINT32_C(1779168063)), simde_x_mm512_set_epu32(UINT32_C(3629502055), UINT32_C(3952771463), UINT32_C(2102184556), UINT32_C( 877111492), UINT32_C(1183491905), UINT32_C(3718356317), UINT32_C(3233651099), UINT32_C(3486869896), UINT32_C(3932090380), UINT32_C(2449576763), UINT32_C(4246346280), UINT32_C( 201516689), UINT32_C(2859036576), UINT32_C(2362091228), UINT32_C(3141663427), UINT32_C( 562234020)), simde_x_mm512_set_epu32(UINT32_C(1763100483), UINT32_C(3776962737), UINT32_C( 742423842), UINT32_C( 253766622), UINT32_C( 269910347), UINT32_C( 433971495), UINT32_C(1441956227), UINT32_C(1018271575), UINT32_C(1734496959), UINT32_C( 380846712), UINT32_C(3352999607), UINT32_C( 129739962), UINT32_C(1995198557), UINT32_C( 952220971), UINT32_C(2406584253), UINT32_C( 92466003)) }, { simde_x_mm512_set_epu32(UINT32_C( 495870887), UINT32_C(3912840869), UINT32_C( 915244711), UINT32_C( 5081424), UINT32_C(1422501384), UINT32_C(4130987572), UINT32_C(2778067031), UINT32_C( 497965579), UINT32_C( 910061584), UINT32_C(2002226944), UINT32_C(3673004107), UINT32_C(4246624078), UINT32_C( 523093293), UINT32_C(3059761572), UINT32_C(2206005509), UINT32_C(1943141679)), simde_x_mm512_set_epu32(UINT32_C(1206471293), UINT32_C(1374915518), UINT32_C( 531653117), UINT32_C(2075187308), UINT32_C(4150348747), UINT32_C(2163101581), UINT32_C(1444783055), UINT32_C(1878625233), UINT32_C(1755684145), UINT32_C(2233240925), UINT32_C(3244523643), UINT32_C(2995026741), UINT32_C(2178270751), UINT32_C(1493088054), UINT32_C(4115137419), UINT32_C( 651362699)), simde_x_mm512_set_epu32(UINT32_C( 495870887), UINT32_C(1163009833), UINT32_C( 383591594), UINT32_C( 5081424), UINT32_C(1422501384), UINT32_C(1967885991), UINT32_C(1333283976), UINT32_C( 497965579), UINT32_C( 910061584), UINT32_C(2002226944), UINT32_C( 428480464), UINT32_C(1251597337), UINT32_C( 523093293), UINT32_C( 73585464), UINT32_C(2206005509), UINT32_C( 640416281)) }, { simde_x_mm512_set_epu32(UINT32_C(2657690668), UINT32_C( 448681074), UINT32_C(1334667053), UINT32_C( 502667641), UINT32_C( 855395764), UINT32_C(2622874348), UINT32_C( 808531712), UINT32_C( 454488139), UINT32_C( 123547093), UINT32_C( 483090439), UINT32_C(3168637539), UINT32_C(3093747107), UINT32_C(4158916667), UINT32_C(4074346392), UINT32_C(1398655610), UINT32_C(1722520923)), simde_x_mm512_set_epu32(UINT32_C( 604721400), UINT32_C(1471174399), UINT32_C(2491026588), UINT32_C(2529574367), UINT32_C( 298473775), UINT32_C(2890366559), UINT32_C(3063632375), UINT32_C(4055983958), UINT32_C(4149169500), UINT32_C(4113948134), UINT32_C(2384487126), UINT32_C(2434207126), UINT32_C(3923111671), UINT32_C(3188873807), UINT32_C(1982658188), UINT32_C( 863153207)), simde_x_mm512_set_epu32(UINT32_C( 238805068), UINT32_C( 448681074), UINT32_C(1334667053), UINT32_C( 502667641), UINT32_C( 258448214), UINT32_C(2622874348), UINT32_C( 808531712), UINT32_C( 454488139), UINT32_C( 123547093), UINT32_C( 483090439), UINT32_C( 784150413), UINT32_C( 659539981), UINT32_C( 235804996), UINT32_C( 885472585), UINT32_C(1398655610), UINT32_C( 859367716)) }, { simde_x_mm512_set_epu32(UINT32_C(1463758672), UINT32_C( 602211615), UINT32_C(3830002991), UINT32_C(2864741101), UINT32_C( 797104998), UINT32_C(2737423319), UINT32_C(3342229886), UINT32_C( 178625368), UINT32_C(3091160996), UINT32_C(1095216728), UINT32_C(3079561742), UINT32_C( 430790402), UINT32_C(3213858818), UINT32_C(2113970745), UINT32_C(4112838454), UINT32_C( 564512596)), simde_x_mm512_set_epu32(UINT32_C(1997049765), UINT32_C( 505563651), UINT32_C( 463125220), UINT32_C(3843753777), UINT32_C(2346173843), UINT32_C(2157864934), UINT32_C(2591157969), UINT32_C( 389679318), UINT32_C(3939775129), UINT32_C(2493364907), UINT32_C(2006619059), UINT32_C(3391409164), UINT32_C(1533151625), UINT32_C(2122196136), UINT32_C(1690360675), UINT32_C(1484935627)), simde_x_mm512_set_epu32(UINT32_C(1463758672), UINT32_C( 96647964), UINT32_C( 125001231), UINT32_C(2864741101), UINT32_C( 797104998), UINT32_C( 579558385), UINT32_C( 751071917), UINT32_C( 178625368), UINT32_C(3091160996), UINT32_C(1095216728), UINT32_C(1072942683), UINT32_C( 430790402), UINT32_C( 147555568), UINT32_C(2113970745), UINT32_C( 732117104), UINT32_C( 564512596)) }, { simde_x_mm512_set_epu32(UINT32_C( 908815803), UINT32_C(2319376026), UINT32_C(2065037155), UINT32_C( 623932649), UINT32_C(1610322797), UINT32_C(3452844305), UINT32_C(2031682359), UINT32_C(2994836943), UINT32_C(2344919086), UINT32_C( 238137788), UINT32_C(1978166020), UINT32_C( 76768592), UINT32_C(4043825594), UINT32_C(1274901810), UINT32_C( 413860084), UINT32_C( 550494320)), simde_x_mm512_set_epu32(UINT32_C(1228958503), UINT32_C(3519587969), UINT32_C(2809504529), UINT32_C(3115789449), UINT32_C(1767270276), UINT32_C( 490610321), UINT32_C(1164436618), UINT32_C(2374669797), UINT32_C(3604002618), UINT32_C(3414719029), UINT32_C(2289333019), UINT32_C(2213872499), UINT32_C(1572579389), UINT32_C(3511888959), UINT32_C(2399346014), UINT32_C(1967093325)), simde_x_mm512_set_epu32(UINT32_C( 908815803), UINT32_C(2319376026), UINT32_C(2065037155), UINT32_C( 623932649), UINT32_C(1610322797), UINT32_C( 18572058), UINT32_C( 867245741), UINT32_C( 620167146), UINT32_C(2344919086), UINT32_C( 238137788), UINT32_C(1978166020), UINT32_C( 76768592), UINT32_C( 898666816), UINT32_C(1274901810), UINT32_C( 413860084), UINT32_C( 550494320)) }, { simde_x_mm512_set_epu32(UINT32_C(1245407235), UINT32_C(4175005098), UINT32_C(2362914327), UINT32_C(2924553042), UINT32_C(2369006988), UINT32_C(2119408419), UINT32_C(3091878410), UINT32_C(3978436943), UINT32_C(1708684203), UINT32_C(1202455481), UINT32_C(2187745469), UINT32_C(3284847806), UINT32_C(3884897233), UINT32_C(2094036024), UINT32_C(2456834182), UINT32_C( 69201629)), simde_x_mm512_set_epu32(UINT32_C(3914271744), UINT32_C( 565328458), UINT32_C(4201942548), UINT32_C(1480532604), UINT32_C(4197506536), UINT32_C(3712719696), UINT32_C(3920217826), UINT32_C(1394313506), UINT32_C( 394553965), UINT32_C(2278253176), UINT32_C(1697927724), UINT32_C(2383307765), UINT32_C( 143428987), UINT32_C(3684943081), UINT32_C( 582607980), UINT32_C(1609326889)), simde_x_mm512_set_epu32(UINT32_C(1245407235), UINT32_C( 217705892), UINT32_C(2362914327), UINT32_C(1444020438), UINT32_C(2369006988), UINT32_C(2119408419), UINT32_C(3091878410), UINT32_C(1189809931), UINT32_C( 130468343), UINT32_C(1202455481), UINT32_C( 489817745), UINT32_C( 901540041), UINT32_C( 12314584), UINT32_C(2094036024), UINT32_C( 126402262), UINT32_C( 69201629)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_rem_epu32(test_vec[i].a, test_vec[i].b); simde_assert_m512i_u32(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_rem_epu64(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512i a; simde__m512i b; simde__m512i r; } test_vec[8] = { { simde_x_mm512_set_epu64(UINT64_C( 2968342496979776051), UINT64_C(10005265515001776413), UINT64_C(16837535683400356038), UINT64_C( 6738163160628300797), UINT64_C(13810255550447513201), UINT64_C( 6479913377553186648), UINT64_C( 7505871096235581515), UINT64_C( 2265477367564496986)), simde_x_mm512_set_epu64(UINT64_C(10866939104613927783), UINT64_C(11375825163207743431), UINT64_C(11633520338587575573), UINT64_C( 216242550290965460), UINT64_C( 5120732502404950997), UINT64_C( 5681284513410730040), UINT64_C( 6409558907924801050), UINT64_C( 5372227444888762251)), simde_x_mm512_set_epu64(UINT64_C( 2968342496979776051), UINT64_C(10005265515001776413), UINT64_C( 5204015344812780465), UINT64_C( 34644101608371537), UINT64_C( 3568790545637611207), UINT64_C( 798628864142456608), UINT64_C( 1096312188310780465), UINT64_C( 2265477367564496986)) }, { simde_x_mm512_set_epu64(UINT64_C( 5645659480511055559), UINT64_C(15272728730484288257), UINT64_C(14133460247011230967), UINT64_C(16423537638667915170), UINT64_C( 2118113466433927893), UINT64_C( 3840651400764901876), UINT64_C( 2114726288902596757), UINT64_C( 9482369585348649466)), simde_x_mm512_set_epu64(UINT64_C(13555234896536583899), UINT64_C( 7851952110853286921), UINT64_C(17616907291198234572), UINT64_C(16521184395064581900), UINT64_C( 7345032902979795528), UINT64_C(12329133549512917827), UINT64_C( 2328100732832272381), UINT64_C( 4111895855610225675)), simde_x_mm512_set_epu64(UINT64_C( 5645659480511055559), UINT64_C( 7420776619631001336), UINT64_C(14133460247011230967), UINT64_C(16423537638667915170), UINT64_C( 2118113466433927893), UINT64_C( 3840651400764901876), UINT64_C( 2114726288902596757), UINT64_C( 1258577874128198116)) }, { simde_x_mm512_set_epu64(UINT64_C( 7572458917823766705), UINT64_C(12217500042222052906), UINT64_C( 1159256113650983207), UINT64_C( 6193154838246823767), UINT64_C( 7449607714297299576), UINT64_C(14401023659121376347), UINT64_C( 8569312554655704071), UINT64_C(10336200663482757951)), simde_x_mm512_set_epu64(UINT64_C(15588592630942564743), UINT64_C( 9028813919053392068), UINT64_C( 5083059030774095197), UINT64_C(13888425720366328200), UINT64_C(16888199589465789243), UINT64_C(18237918400292775569), UINT64_C(12279468594349909724), UINT64_C(13493341674566517412)), simde_x_mm512_set_epu64(UINT64_C( 7572458917823766705), UINT64_C( 3188686123168660838), UINT64_C( 1159256113650983207), UINT64_C( 6193154838246823767), UINT64_C( 7449607714297299576), UINT64_C(14401023659121376347), UINT64_C( 8569312554655704071), UINT64_C(10336200663482757951)) }, { simde_x_mm512_set_epu64(UINT64_C( 2129749246616352421), UINT64_C( 3930946101587052880), UINT64_C( 6109596926925725236), UINT64_C(11931707044738783755), UINT64_C( 3908684742628183808), UINT64_C(15775432521885308750), UINT64_C( 2246668589251707300), UINT64_C( 9474721517893975343)), simde_x_mm512_set_epu64(UINT64_C( 5181754748372749246), UINT64_C( 2283432752406648940), UINT64_C(17825612137522679693), UINT64_C( 6205295972918594513), UINT64_C( 7540605987113962845), UINT64_C(13935122940778806069), UINT64_C( 9355601638871447350), UINT64_C(17674380633802211723)), simde_x_mm512_set_epu64(UINT64_C( 2129749246616352421), UINT64_C( 1647513349180403940), UINT64_C( 6109596926925725236), UINT64_C( 5726411071820189242), UINT64_C( 3908684742628183808), UINT64_C( 1840309581106502681), UINT64_C( 2246668589251707300), UINT64_C( 9474721517893975343)) }, { simde_x_mm512_set_epu64(UINT64_C(11414694502393074802), UINT64_C( 5732351344186366329), UINT64_C( 3673896834139808492), UINT64_C( 3472617261273378891), UINT64_C( 530630724433960967), UINT64_C(13609194605976671651), UINT64_C(17862411075628668824), UINT64_C( 6007180105039451483)), simde_x_mm512_set_epu64(UINT64_C( 2597258637662508799), UINT64_C(10698877731456040415), UINT64_C( 1281935105229028959), UINT64_C(13158200861647791958), UINT64_C(17820547312174620134), UINT64_C(10241294226337238422), UINT64_C(16849636328689785423), UINT64_C( 8515452077469772855)), simde_x_mm512_set_epu64(UINT64_C( 1025659951743039606), UINT64_C( 5732351344186366329), UINT64_C( 1110026623681750574), UINT64_C( 3472617261273378891), UINT64_C( 530630724433960967), UINT64_C( 3367900379639433229), UINT64_C( 1012774746938883401), UINT64_C( 6007180105039451483)) }, { simde_x_mm512_set_epu64(UINT64_C( 6286795626078602527), UINT64_C(16449737592791923437), UINT64_C( 3423539900625568727), UINT64_C(14354768056262433624), UINT64_C(13276435385586003544), UINT64_C(13226616968333580034), UINT64_C(13803418519385186873), UINT64_C(17664506654225712980)), simde_x_mm512_set_epu64(UINT64_C( 8577263429665049091), UINT64_C( 1989107677696558897), UINT64_C(10076739928573503462), UINT64_C(11128938736014461142), UINT64_C(16921205335142546091), UINT64_C( 8618363237326703628), UINT64_C( 6584836091306452136), UINT64_C( 7260043819054420427)), simde_x_mm512_set_epu64(UINT64_C( 6286795626078602527), UINT64_C( 536876171219452261), UINT64_C( 3423539900625568727), UINT64_C( 3225829320247972482), UINT64_C(13276435385586003544), UINT64_C( 4608253731006876406), UINT64_C( 633746336772282601), UINT64_C( 3144419016116872126)) }, { simde_x_mm512_set_epu64(UINT64_C( 3903334154292354714), UINT64_C( 8869267046373815529), UINT64_C( 6916283752571091217), UINT64_C( 8726009290759968207), UINT64_C(10071350786374349244), UINT64_C( 8496158362035250512), UINT64_C(17368098678232675634), UINT64_C( 1777515526450307184)), simde_x_mm512_set_epu64(UINT64_C( 5278336582045705857), UINT64_C(12066730073134673033), UINT64_C( 7590368039103504017), UINT64_C( 5001217194949514725), UINT64_C(15479073382423099957), UINT64_C( 9832610448471819123), UINT64_C( 6754177049630551103), UINT64_C(10305112663885051469)), simde_x_mm512_set_epu64(UINT64_C( 3903334154292354714), UINT64_C( 8869267046373815529), UINT64_C( 6916283752571091217), UINT64_C( 3724792095810453482), UINT64_C(10071350786374349244), UINT64_C( 8496158362035250512), UINT64_C( 3859744578971573428), UINT64_C( 1777515526450307184)) }, { simde_x_mm512_set_epu64(UINT64_C( 5348983348701791658), UINT64_C(10148639760639402834), UINT64_C(10174807539574872867), UINT64_C(13279516658136916303), UINT64_C( 7338742772279280569), UINT64_C( 9396295244612029630), UINT64_C(16685506566149927992), UINT64_C(10552022463454113501)), simde_x_mm512_set_epu64(UINT64_C(16811669128702212682), UINT64_C(18047205824811442812), UINT64_C(18028153300578966352), UINT64_C(16837207357260532002), UINT64_C( 1694596378460381816), UINT64_C( 7292544047935022069), UINT64_C( 616022812148352233), UINT64_C( 2502282222097948969)), simde_x_mm512_set_epu64(UINT64_C( 5348983348701791658), UINT64_C(10148639760639402834), UINT64_C(10174807539574872867), UINT64_C(13279516658136916303), UINT64_C( 560357258437753305), UINT64_C( 2103751196677007561), UINT64_C( 52890638144417701), UINT64_C( 542893575062317625)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512i r = simde_mm512_rem_epu64(test_vec[i].a, test_vec[i].b); simde_assert_m512i_u64(r, ==, test_vec[i].r); } return 0; } static int test_simde_mm512_recip_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( -838.19), SIMDE_FLOAT32_C( -143.82), SIMDE_FLOAT32_C( -921.01), SIMDE_FLOAT32_C( 206.87), SIMDE_FLOAT32_C( -588.92), SIMDE_FLOAT32_C( -497.03), SIMDE_FLOAT32_C( -701.44), SIMDE_FLOAT32_C( -106.77), SIMDE_FLOAT32_C( 464.17), SIMDE_FLOAT32_C( 464.85), SIMDE_FLOAT32_C( 819.12), SIMDE_FLOAT32_C( 908.79), SIMDE_FLOAT32_C( -61.04), SIMDE_FLOAT32_C( -36.34), SIMDE_FLOAT32_C( -38.98), SIMDE_FLOAT32_C( -132.37) }, { SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( -0.01) } }, { { SIMDE_FLOAT32_C( -324.68), SIMDE_FLOAT32_C( 773.13), SIMDE_FLOAT32_C( -941.14), SIMDE_FLOAT32_C( 753.16), SIMDE_FLOAT32_C( -838.44), SIMDE_FLOAT32_C( -965.63), SIMDE_FLOAT32_C( 698.21), SIMDE_FLOAT32_C( -608.98), SIMDE_FLOAT32_C( -35.12), SIMDE_FLOAT32_C( 227.88), SIMDE_FLOAT32_C( -531.46), SIMDE_FLOAT32_C( 933.01), SIMDE_FLOAT32_C( 160.30), SIMDE_FLOAT32_C( 700.78), SIMDE_FLOAT32_C( -193.29), SIMDE_FLOAT32_C( 322.12) }, { SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( -443.04), SIMDE_FLOAT32_C( -114.30), SIMDE_FLOAT32_C( -471.01), SIMDE_FLOAT32_C( -31.96), SIMDE_FLOAT32_C( 388.67), SIMDE_FLOAT32_C( -172.45), SIMDE_FLOAT32_C( 861.27), SIMDE_FLOAT32_C( -147.16), SIMDE_FLOAT32_C( -707.59), SIMDE_FLOAT32_C( 680.39), SIMDE_FLOAT32_C( -238.37), SIMDE_FLOAT32_C( 231.37), SIMDE_FLOAT32_C( -355.96), SIMDE_FLOAT32_C( 722.66), SIMDE_FLOAT32_C( -901.00), SIMDE_FLOAT32_C( 319.36) }, { SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( 495.79), SIMDE_FLOAT32_C( -842.14), SIMDE_FLOAT32_C( 72.53), SIMDE_FLOAT32_C( 657.34), SIMDE_FLOAT32_C( -807.78), SIMDE_FLOAT32_C( -229.27), SIMDE_FLOAT32_C( -951.64), SIMDE_FLOAT32_C( 157.10), SIMDE_FLOAT32_C( 998.62), SIMDE_FLOAT32_C( -483.10), SIMDE_FLOAT32_C( 90.12), SIMDE_FLOAT32_C( 158.92), SIMDE_FLOAT32_C( -782.32), SIMDE_FLOAT32_C( 896.82), SIMDE_FLOAT32_C( -518.96), SIMDE_FLOAT32_C( -225.36) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00) } }, { { SIMDE_FLOAT32_C( -217.48), SIMDE_FLOAT32_C( 10.04), SIMDE_FLOAT32_C( 742.68), SIMDE_FLOAT32_C( -828.81), SIMDE_FLOAT32_C( 837.59), SIMDE_FLOAT32_C( 603.95), SIMDE_FLOAT32_C( 24.04), SIMDE_FLOAT32_C( -870.01), SIMDE_FLOAT32_C( 284.34), SIMDE_FLOAT32_C( 785.67), SIMDE_FLOAT32_C( 361.36), SIMDE_FLOAT32_C( 928.38), SIMDE_FLOAT32_C( 508.33), SIMDE_FLOAT32_C( 460.36), SIMDE_FLOAT32_C( 247.75), SIMDE_FLOAT32_C( 4.11) }, { SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.24) } }, { { SIMDE_FLOAT32_C( 618.21), SIMDE_FLOAT32_C( -679.72), SIMDE_FLOAT32_C( -338.54), SIMDE_FLOAT32_C( 810.43), SIMDE_FLOAT32_C( 91.01), SIMDE_FLOAT32_C( -290.18), SIMDE_FLOAT32_C( -32.46), SIMDE_FLOAT32_C( 89.63), SIMDE_FLOAT32_C( 226.71), SIMDE_FLOAT32_C( -942.35), SIMDE_FLOAT32_C( -751.45), SIMDE_FLOAT32_C( 444.40), SIMDE_FLOAT32_C( 954.48), SIMDE_FLOAT32_C( -270.41), SIMDE_FLOAT32_C( -780.96), SIMDE_FLOAT32_C( -263.00) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00) } }, { { SIMDE_FLOAT32_C( 739.63), SIMDE_FLOAT32_C( 961.72), SIMDE_FLOAT32_C( -91.80), SIMDE_FLOAT32_C( 577.21), SIMDE_FLOAT32_C( 565.67), SIMDE_FLOAT32_C( 932.23), SIMDE_FLOAT32_C( 707.21), SIMDE_FLOAT32_C( -149.99), SIMDE_FLOAT32_C( 717.90), SIMDE_FLOAT32_C( 68.56), SIMDE_FLOAT32_C( -221.60), SIMDE_FLOAT32_C( 226.23), SIMDE_FLOAT32_C( -471.08), SIMDE_FLOAT32_C( -973.85), SIMDE_FLOAT32_C( -769.66), SIMDE_FLOAT32_C( -852.87) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00) } }, { { SIMDE_FLOAT32_C( -653.58), SIMDE_FLOAT32_C( -108.21), SIMDE_FLOAT32_C( 957.57), SIMDE_FLOAT32_C( 437.43), SIMDE_FLOAT32_C( 601.61), SIMDE_FLOAT32_C( -74.89), SIMDE_FLOAT32_C( -472.94), SIMDE_FLOAT32_C( -171.67), SIMDE_FLOAT32_C( -17.24), SIMDE_FLOAT32_C( -224.39), SIMDE_FLOAT32_C( -727.28), SIMDE_FLOAT32_C( -62.76), SIMDE_FLOAT32_C( 505.21), SIMDE_FLOAT32_C( -508.24), SIMDE_FLOAT32_C( 674.24), SIMDE_FLOAT32_C( 244.83) }, { SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( -0.06), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_recip_ps(a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_recip_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[16]; const simde__mmask8 k; const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( -559.02), SIMDE_FLOAT32_C( -653.98), SIMDE_FLOAT32_C( -629.51), SIMDE_FLOAT32_C( 712.50), SIMDE_FLOAT32_C( 485.85), SIMDE_FLOAT32_C( 827.80), SIMDE_FLOAT32_C( 553.84), SIMDE_FLOAT32_C( -702.08), SIMDE_FLOAT32_C( 943.96), SIMDE_FLOAT32_C( -619.45), SIMDE_FLOAT32_C( -617.57), SIMDE_FLOAT32_C( 132.09), SIMDE_FLOAT32_C( 914.75), SIMDE_FLOAT32_C( -571.13), SIMDE_FLOAT32_C( 684.78), SIMDE_FLOAT32_C( 888.84) }, UINT8_C( 30), { SIMDE_FLOAT32_C( 989.94), SIMDE_FLOAT32_C( 139.65), SIMDE_FLOAT32_C( 430.34), SIMDE_FLOAT32_C( 509.85), SIMDE_FLOAT32_C( -762.94), SIMDE_FLOAT32_C( -610.66), SIMDE_FLOAT32_C( -278.26), SIMDE_FLOAT32_C( 571.59), SIMDE_FLOAT32_C( -698.60), SIMDE_FLOAT32_C( 66.97), SIMDE_FLOAT32_C( 404.01), SIMDE_FLOAT32_C( -382.91), SIMDE_FLOAT32_C( -808.74), SIMDE_FLOAT32_C( 383.72), SIMDE_FLOAT32_C( 58.06), SIMDE_FLOAT32_C( -462.73) }, { SIMDE_FLOAT32_C( -559.02), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 827.80), SIMDE_FLOAT32_C( 553.84), SIMDE_FLOAT32_C( -702.08), SIMDE_FLOAT32_C( 943.96), SIMDE_FLOAT32_C( -619.45), SIMDE_FLOAT32_C( -617.57), SIMDE_FLOAT32_C( 132.09), SIMDE_FLOAT32_C( 914.75), SIMDE_FLOAT32_C( -571.13), SIMDE_FLOAT32_C( 684.78), SIMDE_FLOAT32_C( 888.84) } }, { { SIMDE_FLOAT32_C( 754.21), SIMDE_FLOAT32_C( -229.44), SIMDE_FLOAT32_C( -976.87), SIMDE_FLOAT32_C( 582.01), SIMDE_FLOAT32_C( -675.60), SIMDE_FLOAT32_C( -678.95), SIMDE_FLOAT32_C( 525.97), SIMDE_FLOAT32_C( -295.05), SIMDE_FLOAT32_C( -296.52), SIMDE_FLOAT32_C( -341.94), SIMDE_FLOAT32_C( -380.30), SIMDE_FLOAT32_C( 132.35), SIMDE_FLOAT32_C( -657.15), SIMDE_FLOAT32_C( -491.46), SIMDE_FLOAT32_C( 10.23), SIMDE_FLOAT32_C( -667.22) }, UINT8_C(254), { SIMDE_FLOAT32_C( -559.43), SIMDE_FLOAT32_C( 842.63), SIMDE_FLOAT32_C( 885.25), SIMDE_FLOAT32_C( -170.09), SIMDE_FLOAT32_C( -435.64), SIMDE_FLOAT32_C( 456.84), SIMDE_FLOAT32_C( 131.32), SIMDE_FLOAT32_C( 631.33), SIMDE_FLOAT32_C( -139.15), SIMDE_FLOAT32_C( 748.40), SIMDE_FLOAT32_C( 822.59), SIMDE_FLOAT32_C( -755.43), SIMDE_FLOAT32_C( -193.54), SIMDE_FLOAT32_C( -640.14), SIMDE_FLOAT32_C( 998.78), SIMDE_FLOAT32_C( 577.02) }, { SIMDE_FLOAT32_C( 754.21), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -296.52), SIMDE_FLOAT32_C( -341.94), SIMDE_FLOAT32_C( -380.30), SIMDE_FLOAT32_C( 132.35), SIMDE_FLOAT32_C( -657.15), SIMDE_FLOAT32_C( -491.46), SIMDE_FLOAT32_C( 10.23), SIMDE_FLOAT32_C( -667.22) } }, { { SIMDE_FLOAT32_C( -617.01), SIMDE_FLOAT32_C( 580.79), SIMDE_FLOAT32_C( 901.43), SIMDE_FLOAT32_C( -295.96), SIMDE_FLOAT32_C( 106.76), SIMDE_FLOAT32_C( -393.62), SIMDE_FLOAT32_C( 407.52), SIMDE_FLOAT32_C( 764.82), SIMDE_FLOAT32_C( 226.07), SIMDE_FLOAT32_C( -460.13), SIMDE_FLOAT32_C( -892.33), SIMDE_FLOAT32_C( 734.61), SIMDE_FLOAT32_C( 550.10), SIMDE_FLOAT32_C( -559.55), SIMDE_FLOAT32_C( 382.81), SIMDE_FLOAT32_C( 990.67) }, UINT8_C( 97), { SIMDE_FLOAT32_C( 268.05), SIMDE_FLOAT32_C( -179.42), SIMDE_FLOAT32_C( -152.56), SIMDE_FLOAT32_C( -275.11), SIMDE_FLOAT32_C( 951.90), SIMDE_FLOAT32_C( -521.22), SIMDE_FLOAT32_C( 585.74), SIMDE_FLOAT32_C( 700.30), SIMDE_FLOAT32_C( -698.63), SIMDE_FLOAT32_C( 830.31), SIMDE_FLOAT32_C( -493.24), SIMDE_FLOAT32_C( -338.77), SIMDE_FLOAT32_C( 829.08), SIMDE_FLOAT32_C( -916.21), SIMDE_FLOAT32_C( 44.23), SIMDE_FLOAT32_C( 409.87) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 580.79), SIMDE_FLOAT32_C( 901.43), SIMDE_FLOAT32_C( -295.96), SIMDE_FLOAT32_C( 106.76), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 764.82), SIMDE_FLOAT32_C( 226.07), SIMDE_FLOAT32_C( -460.13), SIMDE_FLOAT32_C( -892.33), SIMDE_FLOAT32_C( 734.61), SIMDE_FLOAT32_C( 550.10), SIMDE_FLOAT32_C( -559.55), SIMDE_FLOAT32_C( 382.81), SIMDE_FLOAT32_C( 990.67) } }, { { SIMDE_FLOAT32_C( 985.22), SIMDE_FLOAT32_C( 748.27), SIMDE_FLOAT32_C( -483.37), SIMDE_FLOAT32_C( -408.41), SIMDE_FLOAT32_C( 155.79), SIMDE_FLOAT32_C( -718.54), SIMDE_FLOAT32_C( 817.67), SIMDE_FLOAT32_C( 695.66), SIMDE_FLOAT32_C( -610.87), SIMDE_FLOAT32_C( 552.28), SIMDE_FLOAT32_C( 245.77), SIMDE_FLOAT32_C( -170.42), SIMDE_FLOAT32_C( -64.91), SIMDE_FLOAT32_C( 236.44), SIMDE_FLOAT32_C( 112.66), SIMDE_FLOAT32_C( -796.86) }, UINT8_C(153), { SIMDE_FLOAT32_C( 960.10), SIMDE_FLOAT32_C( -71.97), SIMDE_FLOAT32_C( -991.08), SIMDE_FLOAT32_C( -561.12), SIMDE_FLOAT32_C( -486.23), SIMDE_FLOAT32_C( 709.22), SIMDE_FLOAT32_C( -259.75), SIMDE_FLOAT32_C( -655.92), SIMDE_FLOAT32_C( -784.01), SIMDE_FLOAT32_C( 401.48), SIMDE_FLOAT32_C( -826.84), SIMDE_FLOAT32_C( -700.22), SIMDE_FLOAT32_C( -554.30), SIMDE_FLOAT32_C( 583.03), SIMDE_FLOAT32_C( -715.01), SIMDE_FLOAT32_C( -806.03) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 748.27), SIMDE_FLOAT32_C( -483.37), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -718.54), SIMDE_FLOAT32_C( 817.67), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -610.87), SIMDE_FLOAT32_C( 552.28), SIMDE_FLOAT32_C( 245.77), SIMDE_FLOAT32_C( -170.42), SIMDE_FLOAT32_C( -64.91), SIMDE_FLOAT32_C( 236.44), SIMDE_FLOAT32_C( 112.66), SIMDE_FLOAT32_C( -796.86) } }, { { SIMDE_FLOAT32_C( -900.34), SIMDE_FLOAT32_C( -123.41), SIMDE_FLOAT32_C( 349.77), SIMDE_FLOAT32_C( -618.88), SIMDE_FLOAT32_C( -305.75), SIMDE_FLOAT32_C( 45.43), SIMDE_FLOAT32_C( -229.75), SIMDE_FLOAT32_C( -753.47), SIMDE_FLOAT32_C( -708.80), SIMDE_FLOAT32_C( 599.82), SIMDE_FLOAT32_C( 181.62), SIMDE_FLOAT32_C( 527.63), SIMDE_FLOAT32_C( -287.52), SIMDE_FLOAT32_C( 384.76), SIMDE_FLOAT32_C( 584.65), SIMDE_FLOAT32_C( -327.41) }, UINT8_C( 60), { SIMDE_FLOAT32_C( 593.57), SIMDE_FLOAT32_C( 111.46), SIMDE_FLOAT32_C( -173.43), SIMDE_FLOAT32_C( 302.80), SIMDE_FLOAT32_C( 851.71), SIMDE_FLOAT32_C( 170.65), SIMDE_FLOAT32_C( 518.78), SIMDE_FLOAT32_C( 253.19), SIMDE_FLOAT32_C( 343.82), SIMDE_FLOAT32_C( 818.56), SIMDE_FLOAT32_C( 698.89), SIMDE_FLOAT32_C( -73.15), SIMDE_FLOAT32_C( -896.45), SIMDE_FLOAT32_C( 892.87), SIMDE_FLOAT32_C( 26.51), SIMDE_FLOAT32_C( -19.86) }, { SIMDE_FLOAT32_C( -900.34), SIMDE_FLOAT32_C( -123.41), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -229.75), SIMDE_FLOAT32_C( -753.47), SIMDE_FLOAT32_C( -708.80), SIMDE_FLOAT32_C( 599.82), SIMDE_FLOAT32_C( 181.62), SIMDE_FLOAT32_C( 527.63), SIMDE_FLOAT32_C( -287.52), SIMDE_FLOAT32_C( 384.76), SIMDE_FLOAT32_C( 584.65), SIMDE_FLOAT32_C( -327.41) } }, { { SIMDE_FLOAT32_C( 242.63), SIMDE_FLOAT32_C( 407.63), SIMDE_FLOAT32_C( 674.39), SIMDE_FLOAT32_C( -711.94), SIMDE_FLOAT32_C( -822.12), SIMDE_FLOAT32_C( 920.93), SIMDE_FLOAT32_C( -420.74), SIMDE_FLOAT32_C( 777.70), SIMDE_FLOAT32_C( 102.55), SIMDE_FLOAT32_C( -893.11), SIMDE_FLOAT32_C( -509.82), SIMDE_FLOAT32_C( -512.69), SIMDE_FLOAT32_C( 691.54), SIMDE_FLOAT32_C( 162.77), SIMDE_FLOAT32_C( -199.89), SIMDE_FLOAT32_C( 285.12) }, UINT8_C( 58), { SIMDE_FLOAT32_C( 626.68), SIMDE_FLOAT32_C( -412.08), SIMDE_FLOAT32_C( -874.05), SIMDE_FLOAT32_C( -202.66), SIMDE_FLOAT32_C( -893.30), SIMDE_FLOAT32_C( 379.14), SIMDE_FLOAT32_C( -858.85), SIMDE_FLOAT32_C( 925.26), SIMDE_FLOAT32_C( 78.03), SIMDE_FLOAT32_C( 68.00), SIMDE_FLOAT32_C( -971.19), SIMDE_FLOAT32_C( -29.10), SIMDE_FLOAT32_C( -905.49), SIMDE_FLOAT32_C( 8.95), SIMDE_FLOAT32_C( -786.47), SIMDE_FLOAT32_C( 502.14) }, { SIMDE_FLOAT32_C( 242.63), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 674.39), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -420.74), SIMDE_FLOAT32_C( 777.70), SIMDE_FLOAT32_C( 102.55), SIMDE_FLOAT32_C( -893.11), SIMDE_FLOAT32_C( -509.82), SIMDE_FLOAT32_C( -512.69), SIMDE_FLOAT32_C( 691.54), SIMDE_FLOAT32_C( 162.77), SIMDE_FLOAT32_C( -199.89), SIMDE_FLOAT32_C( 285.12) } }, { { SIMDE_FLOAT32_C( -316.66), SIMDE_FLOAT32_C( -498.40), SIMDE_FLOAT32_C( 680.02), SIMDE_FLOAT32_C( -395.73), SIMDE_FLOAT32_C( 80.86), SIMDE_FLOAT32_C( 457.72), SIMDE_FLOAT32_C( 706.82), SIMDE_FLOAT32_C( 187.75), SIMDE_FLOAT32_C( 947.90), SIMDE_FLOAT32_C( -805.87), SIMDE_FLOAT32_C( -120.71), SIMDE_FLOAT32_C( 110.67), SIMDE_FLOAT32_C( -5.76), SIMDE_FLOAT32_C( -835.59), SIMDE_FLOAT32_C( 384.91), SIMDE_FLOAT32_C( -379.07) }, UINT8_C(169), { SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( 418.26), SIMDE_FLOAT32_C( -140.98), SIMDE_FLOAT32_C( -110.01), SIMDE_FLOAT32_C( 559.41), SIMDE_FLOAT32_C( -215.72), SIMDE_FLOAT32_C( 968.02), SIMDE_FLOAT32_C( -372.59), SIMDE_FLOAT32_C( -186.90), SIMDE_FLOAT32_C( -61.08), SIMDE_FLOAT32_C( -278.08), SIMDE_FLOAT32_C( 822.05), SIMDE_FLOAT32_C( 152.45), SIMDE_FLOAT32_C( -775.94), SIMDE_FLOAT32_C( -494.61), SIMDE_FLOAT32_C( 654.05) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -498.40), SIMDE_FLOAT32_C( 680.02), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 80.86), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 706.82), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 947.90), SIMDE_FLOAT32_C( -805.87), SIMDE_FLOAT32_C( -120.71), SIMDE_FLOAT32_C( 110.67), SIMDE_FLOAT32_C( -5.76), SIMDE_FLOAT32_C( -835.59), SIMDE_FLOAT32_C( 384.91), SIMDE_FLOAT32_C( -379.07) } }, { { SIMDE_FLOAT32_C( 904.08), SIMDE_FLOAT32_C( 109.66), SIMDE_FLOAT32_C( -265.09), SIMDE_FLOAT32_C( 361.80), SIMDE_FLOAT32_C( -183.52), SIMDE_FLOAT32_C( 922.65), SIMDE_FLOAT32_C( 309.70), SIMDE_FLOAT32_C( 10.61), SIMDE_FLOAT32_C( -198.06), SIMDE_FLOAT32_C( -579.63), SIMDE_FLOAT32_C( -995.15), SIMDE_FLOAT32_C( -33.65), SIMDE_FLOAT32_C( 805.28), SIMDE_FLOAT32_C( -374.23), SIMDE_FLOAT32_C( 718.68), SIMDE_FLOAT32_C( 316.13) }, UINT8_C(232), { SIMDE_FLOAT32_C( -422.30), SIMDE_FLOAT32_C( -793.87), SIMDE_FLOAT32_C( 603.45), SIMDE_FLOAT32_C( 361.98), SIMDE_FLOAT32_C( -825.85), SIMDE_FLOAT32_C( -769.14), SIMDE_FLOAT32_C( -824.92), SIMDE_FLOAT32_C( 113.07), SIMDE_FLOAT32_C( -47.22), SIMDE_FLOAT32_C( 997.13), SIMDE_FLOAT32_C( -734.48), SIMDE_FLOAT32_C( 176.84), SIMDE_FLOAT32_C( -497.48), SIMDE_FLOAT32_C( 919.57), SIMDE_FLOAT32_C( 80.93), SIMDE_FLOAT32_C( 612.18) }, { SIMDE_FLOAT32_C( 904.08), SIMDE_FLOAT32_C( 109.66), SIMDE_FLOAT32_C( -265.09), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -183.52), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -198.06), SIMDE_FLOAT32_C( -579.63), SIMDE_FLOAT32_C( -995.15), SIMDE_FLOAT32_C( -33.65), SIMDE_FLOAT32_C( 805.28), SIMDE_FLOAT32_C( -374.23), SIMDE_FLOAT32_C( 718.68), SIMDE_FLOAT32_C( 316.13) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_mask_recip_ps(src, test_vec[i].k, a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_recip_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 428.72), SIMDE_FLOAT64_C( -458.86), SIMDE_FLOAT64_C( 806.54), SIMDE_FLOAT64_C( 539.23), SIMDE_FLOAT64_C( -146.88), SIMDE_FLOAT64_C( 637.59), SIMDE_FLOAT64_C( 196.11), SIMDE_FLOAT64_C( -116.19) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -0.01), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( -0.01) } }, { { SIMDE_FLOAT64_C( 736.77), SIMDE_FLOAT64_C( -342.16), SIMDE_FLOAT64_C( -904.30), SIMDE_FLOAT64_C( 476.08), SIMDE_FLOAT64_C( 944.13), SIMDE_FLOAT64_C( 149.78), SIMDE_FLOAT64_C( -235.14), SIMDE_FLOAT64_C( 736.57) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( -510.10), SIMDE_FLOAT64_C( 107.44), SIMDE_FLOAT64_C( -102.43), SIMDE_FLOAT64_C( 808.81), SIMDE_FLOAT64_C( 777.98), SIMDE_FLOAT64_C( -457.12), SIMDE_FLOAT64_C( -403.55), SIMDE_FLOAT64_C( -682.37) }, { SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( -0.01), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( -0.00) } }, { { SIMDE_FLOAT64_C( -420.25), SIMDE_FLOAT64_C( 346.45), SIMDE_FLOAT64_C( 923.73), SIMDE_FLOAT64_C( -651.25), SIMDE_FLOAT64_C( 204.13), SIMDE_FLOAT64_C( 115.66), SIMDE_FLOAT64_C( -627.27), SIMDE_FLOAT64_C( -367.15) }, { SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( -0.00) } }, { { SIMDE_FLOAT64_C( 656.80), SIMDE_FLOAT64_C( -820.73), SIMDE_FLOAT64_C( -827.92), SIMDE_FLOAT64_C( -490.07), SIMDE_FLOAT64_C( 816.86), SIMDE_FLOAT64_C( 368.19), SIMDE_FLOAT64_C( 393.74), SIMDE_FLOAT64_C( 553.62) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( -973.97), SIMDE_FLOAT64_C( 489.44), SIMDE_FLOAT64_C( 29.71), SIMDE_FLOAT64_C( 970.16), SIMDE_FLOAT64_C( -360.78), SIMDE_FLOAT64_C( 794.57), SIMDE_FLOAT64_C( 706.74), SIMDE_FLOAT64_C( 129.11) }, { SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.01) } }, { { SIMDE_FLOAT64_C( -97.99), SIMDE_FLOAT64_C( -395.69), SIMDE_FLOAT64_C( -62.07), SIMDE_FLOAT64_C( -320.01), SIMDE_FLOAT64_C( 147.19), SIMDE_FLOAT64_C( 534.38), SIMDE_FLOAT64_C( -2.39), SIMDE_FLOAT64_C( 726.95) }, { SIMDE_FLOAT64_C( -0.01), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( -0.02), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -0.42), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( -119.17), SIMDE_FLOAT64_C( -78.65), SIMDE_FLOAT64_C( -924.30), SIMDE_FLOAT64_C( -915.04), SIMDE_FLOAT64_C( -962.99), SIMDE_FLOAT64_C( -551.57), SIMDE_FLOAT64_C( -282.19), SIMDE_FLOAT64_C( 693.81) }, { SIMDE_FLOAT64_C( -0.01), SIMDE_FLOAT64_C( -0.01), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( 0.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_recip_pd(a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_recip_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 209.25), SIMDE_FLOAT64_C( -726.84), SIMDE_FLOAT64_C( -123.44), SIMDE_FLOAT64_C( 592.78), SIMDE_FLOAT64_C( -139.26), SIMDE_FLOAT64_C( -313.25), SIMDE_FLOAT64_C( 562.79), SIMDE_FLOAT64_C( -134.44) }, UINT8_C(203), { SIMDE_FLOAT64_C( 624.55), SIMDE_FLOAT64_C( -863.70), SIMDE_FLOAT64_C( 788.13), SIMDE_FLOAT64_C( 415.51), SIMDE_FLOAT64_C( -772.51), SIMDE_FLOAT64_C( -934.49), SIMDE_FLOAT64_C( -140.87), SIMDE_FLOAT64_C( -265.50) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( -123.44), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -139.26), SIMDE_FLOAT64_C( -313.25), SIMDE_FLOAT64_C( -0.01), SIMDE_FLOAT64_C( -0.00) } }, { { SIMDE_FLOAT64_C( 420.64), SIMDE_FLOAT64_C( -690.14), SIMDE_FLOAT64_C( -96.93), SIMDE_FLOAT64_C( -275.78), SIMDE_FLOAT64_C( -453.21), SIMDE_FLOAT64_C( 875.20), SIMDE_FLOAT64_C( 895.34), SIMDE_FLOAT64_C( -766.82) }, UINT8_C(181), { SIMDE_FLOAT64_C( 503.15), SIMDE_FLOAT64_C( 966.97), SIMDE_FLOAT64_C( 164.84), SIMDE_FLOAT64_C( -672.96), SIMDE_FLOAT64_C( 332.40), SIMDE_FLOAT64_C( -625.91), SIMDE_FLOAT64_C( -399.81), SIMDE_FLOAT64_C( -791.04) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -690.14), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( -275.78), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( 895.34), SIMDE_FLOAT64_C( -0.00) } }, { { SIMDE_FLOAT64_C( 966.87), SIMDE_FLOAT64_C( 460.94), SIMDE_FLOAT64_C( -104.29), SIMDE_FLOAT64_C( 529.67), SIMDE_FLOAT64_C( -673.50), SIMDE_FLOAT64_C( 637.76), SIMDE_FLOAT64_C( 154.22), SIMDE_FLOAT64_C( -537.20) }, UINT8_C( 88), { SIMDE_FLOAT64_C( -430.27), SIMDE_FLOAT64_C( -309.71), SIMDE_FLOAT64_C( 491.40), SIMDE_FLOAT64_C( 428.86), SIMDE_FLOAT64_C( 424.79), SIMDE_FLOAT64_C( -87.96), SIMDE_FLOAT64_C( 738.72), SIMDE_FLOAT64_C( -672.13) }, { SIMDE_FLOAT64_C( 966.87), SIMDE_FLOAT64_C( 460.94), SIMDE_FLOAT64_C( -104.29), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 637.76), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -537.20) } }, { { SIMDE_FLOAT64_C( 636.26), SIMDE_FLOAT64_C( -714.50), SIMDE_FLOAT64_C( -796.93), SIMDE_FLOAT64_C( 531.61), SIMDE_FLOAT64_C( -481.32), SIMDE_FLOAT64_C( -374.02), SIMDE_FLOAT64_C( 34.75), SIMDE_FLOAT64_C( -514.35) }, UINT8_C(120), { SIMDE_FLOAT64_C( 361.79), SIMDE_FLOAT64_C( 818.05), SIMDE_FLOAT64_C( -835.08), SIMDE_FLOAT64_C( 961.98), SIMDE_FLOAT64_C( -973.00), SIMDE_FLOAT64_C( -868.21), SIMDE_FLOAT64_C( 422.92), SIMDE_FLOAT64_C( -77.29) }, { SIMDE_FLOAT64_C( 636.26), SIMDE_FLOAT64_C( -714.50), SIMDE_FLOAT64_C( -796.93), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -514.35) } }, { { SIMDE_FLOAT64_C( 661.46), SIMDE_FLOAT64_C( 749.42), SIMDE_FLOAT64_C( -439.53), SIMDE_FLOAT64_C( -184.33), SIMDE_FLOAT64_C( -787.78), SIMDE_FLOAT64_C( 986.36), SIMDE_FLOAT64_C( 385.40), SIMDE_FLOAT64_C( -97.48) }, UINT8_C(166), { SIMDE_FLOAT64_C( -185.74), SIMDE_FLOAT64_C( -672.69), SIMDE_FLOAT64_C( -610.20), SIMDE_FLOAT64_C( -447.03), SIMDE_FLOAT64_C( -344.82), SIMDE_FLOAT64_C( -973.94), SIMDE_FLOAT64_C( -161.52), SIMDE_FLOAT64_C( -141.75) }, { SIMDE_FLOAT64_C( 661.46), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( -184.33), SIMDE_FLOAT64_C( -787.78), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( 385.40), SIMDE_FLOAT64_C( -0.01) } }, { { SIMDE_FLOAT64_C( 557.67), SIMDE_FLOAT64_C( 357.15), SIMDE_FLOAT64_C( 484.23), SIMDE_FLOAT64_C( -407.58), SIMDE_FLOAT64_C( 842.80), SIMDE_FLOAT64_C( 275.05), SIMDE_FLOAT64_C( 954.21), SIMDE_FLOAT64_C( 660.85) }, UINT8_C( 53), { SIMDE_FLOAT64_C( 916.20), SIMDE_FLOAT64_C( 687.85), SIMDE_FLOAT64_C( 571.76), SIMDE_FLOAT64_C( 339.11), SIMDE_FLOAT64_C( -389.44), SIMDE_FLOAT64_C( 233.22), SIMDE_FLOAT64_C( 88.53), SIMDE_FLOAT64_C( 171.03) }, { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 357.15), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -407.58), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 954.21), SIMDE_FLOAT64_C( 660.85) } }, { { SIMDE_FLOAT64_C( -951.11), SIMDE_FLOAT64_C( 300.76), SIMDE_FLOAT64_C( 157.39), SIMDE_FLOAT64_C( 434.29), SIMDE_FLOAT64_C( -796.73), SIMDE_FLOAT64_C( -364.85), SIMDE_FLOAT64_C( -751.45), SIMDE_FLOAT64_C( -469.41) }, UINT8_C(211), { SIMDE_FLOAT64_C( -198.47), SIMDE_FLOAT64_C( 185.77), SIMDE_FLOAT64_C( 51.02), SIMDE_FLOAT64_C( 640.00), SIMDE_FLOAT64_C( -955.99), SIMDE_FLOAT64_C( -391.31), SIMDE_FLOAT64_C( -2.84), SIMDE_FLOAT64_C( 528.24) }, { SIMDE_FLOAT64_C( -0.01), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( 157.39), SIMDE_FLOAT64_C( 434.29), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( -364.85), SIMDE_FLOAT64_C( -0.35), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( 201.11), SIMDE_FLOAT64_C( -160.04), SIMDE_FLOAT64_C( -196.70), SIMDE_FLOAT64_C( 155.32), SIMDE_FLOAT64_C( -499.19), SIMDE_FLOAT64_C( -756.73), SIMDE_FLOAT64_C( 71.52), SIMDE_FLOAT64_C( -811.33) }, UINT8_C(173), { SIMDE_FLOAT64_C( -589.37), SIMDE_FLOAT64_C( -200.77), SIMDE_FLOAT64_C( 48.24), SIMDE_FLOAT64_C( 499.16), SIMDE_FLOAT64_C( 970.26), SIMDE_FLOAT64_C( 97.13), SIMDE_FLOAT64_C( -200.08), SIMDE_FLOAT64_C( 127.65) }, { SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( -160.04), SIMDE_FLOAT64_C( 0.02), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -499.19), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( 71.52), SIMDE_FLOAT64_C( 0.01) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_mask_recip_pd(src, test_vec[i].k, a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_rint_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( -665.69), SIMDE_FLOAT32_C( -529.73), SIMDE_FLOAT32_C( -462.47), SIMDE_FLOAT32_C( 909.14), SIMDE_FLOAT32_C( 211.54), SIMDE_FLOAT32_C( 67.95), SIMDE_FLOAT32_C( -26.51), SIMDE_FLOAT32_C( -276.52), SIMDE_FLOAT32_C( 812.99), SIMDE_FLOAT32_C( 513.31), SIMDE_FLOAT32_C( -214.67), SIMDE_FLOAT32_C( 502.05), SIMDE_FLOAT32_C( 96.51), SIMDE_FLOAT32_C( -399.31), SIMDE_FLOAT32_C( 783.78), SIMDE_FLOAT32_C( -69.17) }, { SIMDE_FLOAT32_C( -666.00), SIMDE_FLOAT32_C( -530.00), SIMDE_FLOAT32_C( -462.00), SIMDE_FLOAT32_C( 909.00), SIMDE_FLOAT32_C( 212.00), SIMDE_FLOAT32_C( 68.00), SIMDE_FLOAT32_C( -27.00), SIMDE_FLOAT32_C( -277.00), SIMDE_FLOAT32_C( 813.00), SIMDE_FLOAT32_C( 513.00), SIMDE_FLOAT32_C( -215.00), SIMDE_FLOAT32_C( 502.00), SIMDE_FLOAT32_C( 97.00), SIMDE_FLOAT32_C( -399.00), SIMDE_FLOAT32_C( 784.00), SIMDE_FLOAT32_C( -69.00) } }, { { SIMDE_FLOAT32_C( -445.96), SIMDE_FLOAT32_C( 637.70), SIMDE_FLOAT32_C( 890.97), SIMDE_FLOAT32_C( -578.19), SIMDE_FLOAT32_C( 730.74), SIMDE_FLOAT32_C( -499.66), SIMDE_FLOAT32_C( -463.47), SIMDE_FLOAT32_C( -93.74), SIMDE_FLOAT32_C( -617.08), SIMDE_FLOAT32_C( -340.40), SIMDE_FLOAT32_C( -933.85), SIMDE_FLOAT32_C( 901.57), SIMDE_FLOAT32_C( 629.93), SIMDE_FLOAT32_C( 901.12), SIMDE_FLOAT32_C( 755.15), SIMDE_FLOAT32_C( 964.24) }, { SIMDE_FLOAT32_C( -446.00), SIMDE_FLOAT32_C( 638.00), SIMDE_FLOAT32_C( 891.00), SIMDE_FLOAT32_C( -578.00), SIMDE_FLOAT32_C( 731.00), SIMDE_FLOAT32_C( -500.00), SIMDE_FLOAT32_C( -463.00), SIMDE_FLOAT32_C( -94.00), SIMDE_FLOAT32_C( -617.00), SIMDE_FLOAT32_C( -340.00), SIMDE_FLOAT32_C( -934.00), SIMDE_FLOAT32_C( 902.00), SIMDE_FLOAT32_C( 630.00), SIMDE_FLOAT32_C( 901.00), SIMDE_FLOAT32_C( 755.00), SIMDE_FLOAT32_C( 964.00) } }, { { SIMDE_FLOAT32_C( -628.61), SIMDE_FLOAT32_C( -707.33), SIMDE_FLOAT32_C( 873.38), SIMDE_FLOAT32_C( 582.93), SIMDE_FLOAT32_C( 360.62), SIMDE_FLOAT32_C( -153.12), SIMDE_FLOAT32_C( -693.59), SIMDE_FLOAT32_C( 173.61), SIMDE_FLOAT32_C( -639.82), SIMDE_FLOAT32_C( 91.74), SIMDE_FLOAT32_C( -324.34), SIMDE_FLOAT32_C( 456.69), SIMDE_FLOAT32_C( 692.43), SIMDE_FLOAT32_C( -540.56), SIMDE_FLOAT32_C( -612.48), SIMDE_FLOAT32_C( -753.53) }, { SIMDE_FLOAT32_C( -629.00), SIMDE_FLOAT32_C( -707.00), SIMDE_FLOAT32_C( 873.00), SIMDE_FLOAT32_C( 583.00), SIMDE_FLOAT32_C( 361.00), SIMDE_FLOAT32_C( -153.00), SIMDE_FLOAT32_C( -694.00), SIMDE_FLOAT32_C( 174.00), SIMDE_FLOAT32_C( -640.00), SIMDE_FLOAT32_C( 92.00), SIMDE_FLOAT32_C( -324.00), SIMDE_FLOAT32_C( 457.00), SIMDE_FLOAT32_C( 692.00), SIMDE_FLOAT32_C( -541.00), SIMDE_FLOAT32_C( -612.00), SIMDE_FLOAT32_C( -754.00) } }, { { SIMDE_FLOAT32_C( -902.86), SIMDE_FLOAT32_C( -721.51), SIMDE_FLOAT32_C( -331.72), SIMDE_FLOAT32_C( 827.88), SIMDE_FLOAT32_C( -221.17), SIMDE_FLOAT32_C( 204.81), SIMDE_FLOAT32_C( -265.86), SIMDE_FLOAT32_C( 161.75), SIMDE_FLOAT32_C( 864.41), SIMDE_FLOAT32_C( -199.71), SIMDE_FLOAT32_C( 63.32), SIMDE_FLOAT32_C( 494.34), SIMDE_FLOAT32_C( -298.59), SIMDE_FLOAT32_C( -181.53), SIMDE_FLOAT32_C( 458.58), SIMDE_FLOAT32_C( 72.80) }, { SIMDE_FLOAT32_C( -903.00), SIMDE_FLOAT32_C( -722.00), SIMDE_FLOAT32_C( -332.00), SIMDE_FLOAT32_C( 828.00), SIMDE_FLOAT32_C( -221.00), SIMDE_FLOAT32_C( 205.00), SIMDE_FLOAT32_C( -266.00), SIMDE_FLOAT32_C( 162.00), SIMDE_FLOAT32_C( 864.00), SIMDE_FLOAT32_C( -200.00), SIMDE_FLOAT32_C( 63.00), SIMDE_FLOAT32_C( 494.00), SIMDE_FLOAT32_C( -299.00), SIMDE_FLOAT32_C( -182.00), SIMDE_FLOAT32_C( 459.00), SIMDE_FLOAT32_C( 73.00) } }, { { SIMDE_FLOAT32_C( 111.14), SIMDE_FLOAT32_C( 331.96), SIMDE_FLOAT32_C( -344.27), SIMDE_FLOAT32_C( -528.24), SIMDE_FLOAT32_C( -821.17), SIMDE_FLOAT32_C( -37.86), SIMDE_FLOAT32_C( 645.37), SIMDE_FLOAT32_C( -460.98), SIMDE_FLOAT32_C( -946.11), SIMDE_FLOAT32_C( -678.97), SIMDE_FLOAT32_C( 995.71), SIMDE_FLOAT32_C( 746.32), SIMDE_FLOAT32_C( -219.53), SIMDE_FLOAT32_C( -616.77), SIMDE_FLOAT32_C( 992.79), SIMDE_FLOAT32_C( -122.39) }, { SIMDE_FLOAT32_C( 111.00), SIMDE_FLOAT32_C( 332.00), SIMDE_FLOAT32_C( -344.00), SIMDE_FLOAT32_C( -528.00), SIMDE_FLOAT32_C( -821.00), SIMDE_FLOAT32_C( -38.00), SIMDE_FLOAT32_C( 645.00), SIMDE_FLOAT32_C( -461.00), SIMDE_FLOAT32_C( -946.00), SIMDE_FLOAT32_C( -679.00), SIMDE_FLOAT32_C( 996.00), SIMDE_FLOAT32_C( 746.00), SIMDE_FLOAT32_C( -220.00), SIMDE_FLOAT32_C( -617.00), SIMDE_FLOAT32_C( 993.00), SIMDE_FLOAT32_C( -122.00) } }, { { SIMDE_FLOAT32_C( -338.27), SIMDE_FLOAT32_C( -338.93), SIMDE_FLOAT32_C( -294.51), SIMDE_FLOAT32_C( 440.55), SIMDE_FLOAT32_C( 865.88), SIMDE_FLOAT32_C( 439.63), SIMDE_FLOAT32_C( -397.70), SIMDE_FLOAT32_C( 730.29), SIMDE_FLOAT32_C( -760.09), SIMDE_FLOAT32_C( 665.63), SIMDE_FLOAT32_C( 224.63), SIMDE_FLOAT32_C( -58.68), SIMDE_FLOAT32_C( -515.91), SIMDE_FLOAT32_C( -316.80), SIMDE_FLOAT32_C( -985.88), SIMDE_FLOAT32_C( 595.23) }, { SIMDE_FLOAT32_C( -338.00), SIMDE_FLOAT32_C( -339.00), SIMDE_FLOAT32_C( -295.00), SIMDE_FLOAT32_C( 441.00), SIMDE_FLOAT32_C( 866.00), SIMDE_FLOAT32_C( 440.00), SIMDE_FLOAT32_C( -398.00), SIMDE_FLOAT32_C( 730.00), SIMDE_FLOAT32_C( -760.00), SIMDE_FLOAT32_C( 666.00), SIMDE_FLOAT32_C( 225.00), SIMDE_FLOAT32_C( -59.00), SIMDE_FLOAT32_C( -516.00), SIMDE_FLOAT32_C( -317.00), SIMDE_FLOAT32_C( -986.00), SIMDE_FLOAT32_C( 595.00) } }, { { SIMDE_FLOAT32_C( -984.84), SIMDE_FLOAT32_C( -330.15), SIMDE_FLOAT32_C( -933.01), SIMDE_FLOAT32_C( -806.00), SIMDE_FLOAT32_C( 632.00), SIMDE_FLOAT32_C( 712.36), SIMDE_FLOAT32_C( -266.98), SIMDE_FLOAT32_C( 685.88), SIMDE_FLOAT32_C( -966.61), SIMDE_FLOAT32_C( -271.27), SIMDE_FLOAT32_C( 432.20), SIMDE_FLOAT32_C( -186.14), SIMDE_FLOAT32_C( 111.96), SIMDE_FLOAT32_C( 424.99), SIMDE_FLOAT32_C( 691.48), SIMDE_FLOAT32_C( 773.69) }, { SIMDE_FLOAT32_C( -985.00), SIMDE_FLOAT32_C( -330.00), SIMDE_FLOAT32_C( -933.00), SIMDE_FLOAT32_C( -806.00), SIMDE_FLOAT32_C( 632.00), SIMDE_FLOAT32_C( 712.00), SIMDE_FLOAT32_C( -267.00), SIMDE_FLOAT32_C( 686.00), SIMDE_FLOAT32_C( -967.00), SIMDE_FLOAT32_C( -271.00), SIMDE_FLOAT32_C( 432.00), SIMDE_FLOAT32_C( -186.00), SIMDE_FLOAT32_C( 112.00), SIMDE_FLOAT32_C( 425.00), SIMDE_FLOAT32_C( 691.00), SIMDE_FLOAT32_C( 774.00) } }, { { SIMDE_FLOAT32_C( -913.94), SIMDE_FLOAT32_C( -603.03), SIMDE_FLOAT32_C( 214.24), SIMDE_FLOAT32_C( 951.94), SIMDE_FLOAT32_C( 836.60), SIMDE_FLOAT32_C( 816.55), SIMDE_FLOAT32_C( 682.23), SIMDE_FLOAT32_C( -923.49), SIMDE_FLOAT32_C( 482.17), SIMDE_FLOAT32_C( -93.14), SIMDE_FLOAT32_C( 17.84), SIMDE_FLOAT32_C( 966.27), SIMDE_FLOAT32_C( 590.07), SIMDE_FLOAT32_C( 31.96), SIMDE_FLOAT32_C( 561.50), SIMDE_FLOAT32_C( 605.23) }, { SIMDE_FLOAT32_C( -914.00), SIMDE_FLOAT32_C( -603.00), SIMDE_FLOAT32_C( 214.00), SIMDE_FLOAT32_C( 952.00), SIMDE_FLOAT32_C( 837.00), SIMDE_FLOAT32_C( 817.00), SIMDE_FLOAT32_C( 682.00), SIMDE_FLOAT32_C( -923.00), SIMDE_FLOAT32_C( 482.00), SIMDE_FLOAT32_C( -93.00), SIMDE_FLOAT32_C( 18.00), SIMDE_FLOAT32_C( 966.00), SIMDE_FLOAT32_C( 590.00), SIMDE_FLOAT32_C( 32.00), SIMDE_FLOAT32_C( 562.00), SIMDE_FLOAT32_C( 605.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_rint_ps(a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_rint_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[16]; const simde__mmask8 k; const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( -528.78), SIMDE_FLOAT32_C( 785.86), SIMDE_FLOAT32_C( -381.92), SIMDE_FLOAT32_C( -860.14), SIMDE_FLOAT32_C( 577.18), SIMDE_FLOAT32_C( -21.79), SIMDE_FLOAT32_C( -56.29), SIMDE_FLOAT32_C( -835.30), SIMDE_FLOAT32_C( 126.46), SIMDE_FLOAT32_C( -806.06), SIMDE_FLOAT32_C( -450.59), SIMDE_FLOAT32_C( -478.17), SIMDE_FLOAT32_C( -707.43), SIMDE_FLOAT32_C( -543.19), SIMDE_FLOAT32_C( -401.16), SIMDE_FLOAT32_C( -180.42) }, UINT8_C( 91), { SIMDE_FLOAT32_C( 923.27), SIMDE_FLOAT32_C( 86.29), SIMDE_FLOAT32_C( 691.94), SIMDE_FLOAT32_C( 293.32), SIMDE_FLOAT32_C( -23.72), SIMDE_FLOAT32_C( -199.60), SIMDE_FLOAT32_C( 909.94), SIMDE_FLOAT32_C( 715.72), SIMDE_FLOAT32_C( -312.75), SIMDE_FLOAT32_C( 291.35), SIMDE_FLOAT32_C( -637.29), SIMDE_FLOAT32_C( -832.86), SIMDE_FLOAT32_C( -939.64), SIMDE_FLOAT32_C( -775.32), SIMDE_FLOAT32_C( -361.64), SIMDE_FLOAT32_C( 846.22) }, { SIMDE_FLOAT32_C( 923.00), SIMDE_FLOAT32_C( 86.00), SIMDE_FLOAT32_C( -381.92), SIMDE_FLOAT32_C( 293.00), SIMDE_FLOAT32_C( -24.00), SIMDE_FLOAT32_C( -21.79), SIMDE_FLOAT32_C( 910.00), SIMDE_FLOAT32_C( -835.30), SIMDE_FLOAT32_C( 126.46), SIMDE_FLOAT32_C( -806.06), SIMDE_FLOAT32_C( -450.59), SIMDE_FLOAT32_C( -478.17), SIMDE_FLOAT32_C( -707.43), SIMDE_FLOAT32_C( -543.19), SIMDE_FLOAT32_C( -401.16), SIMDE_FLOAT32_C( -180.42) } }, { { SIMDE_FLOAT32_C( -157.24), SIMDE_FLOAT32_C( -221.78), SIMDE_FLOAT32_C( 423.40), SIMDE_FLOAT32_C( 820.97), SIMDE_FLOAT32_C( 721.93), SIMDE_FLOAT32_C( 588.10), SIMDE_FLOAT32_C( -52.57), SIMDE_FLOAT32_C( 915.87), SIMDE_FLOAT32_C( -862.49), SIMDE_FLOAT32_C( 469.26), SIMDE_FLOAT32_C( -791.57), SIMDE_FLOAT32_C( -405.68), SIMDE_FLOAT32_C( -931.90), SIMDE_FLOAT32_C( 28.01), SIMDE_FLOAT32_C( 16.04), SIMDE_FLOAT32_C( 991.37) }, UINT8_C( 35), { SIMDE_FLOAT32_C( -292.02), SIMDE_FLOAT32_C( 284.69), SIMDE_FLOAT32_C( 90.57), SIMDE_FLOAT32_C( 508.38), SIMDE_FLOAT32_C( 194.63), SIMDE_FLOAT32_C( -193.71), SIMDE_FLOAT32_C( -804.38), SIMDE_FLOAT32_C( -514.01), SIMDE_FLOAT32_C( 169.00), SIMDE_FLOAT32_C( -637.23), SIMDE_FLOAT32_C( -453.66), SIMDE_FLOAT32_C( 393.68), SIMDE_FLOAT32_C( 1.13), SIMDE_FLOAT32_C( -607.44), SIMDE_FLOAT32_C( -763.56), SIMDE_FLOAT32_C( 779.35) }, { SIMDE_FLOAT32_C( -292.00), SIMDE_FLOAT32_C( 285.00), SIMDE_FLOAT32_C( 423.40), SIMDE_FLOAT32_C( 820.97), SIMDE_FLOAT32_C( 721.93), SIMDE_FLOAT32_C( -194.00), SIMDE_FLOAT32_C( -52.57), SIMDE_FLOAT32_C( 915.87), SIMDE_FLOAT32_C( -862.49), SIMDE_FLOAT32_C( 469.26), SIMDE_FLOAT32_C( -791.57), SIMDE_FLOAT32_C( -405.68), SIMDE_FLOAT32_C( -931.90), SIMDE_FLOAT32_C( 28.01), SIMDE_FLOAT32_C( 16.04), SIMDE_FLOAT32_C( 991.37) } }, { { SIMDE_FLOAT32_C( 815.97), SIMDE_FLOAT32_C( -942.60), SIMDE_FLOAT32_C( 501.28), SIMDE_FLOAT32_C( 404.07), SIMDE_FLOAT32_C( 4.83), SIMDE_FLOAT32_C( 417.15), SIMDE_FLOAT32_C( 541.58), SIMDE_FLOAT32_C( -525.90), SIMDE_FLOAT32_C( 625.58), SIMDE_FLOAT32_C( -864.10), SIMDE_FLOAT32_C( -457.80), SIMDE_FLOAT32_C( -346.41), SIMDE_FLOAT32_C( 151.94), SIMDE_FLOAT32_C( -466.43), SIMDE_FLOAT32_C( -232.11), SIMDE_FLOAT32_C( 859.92) }, UINT8_C(181), { SIMDE_FLOAT32_C( 858.46), SIMDE_FLOAT32_C( 368.30), SIMDE_FLOAT32_C( 12.90), SIMDE_FLOAT32_C( -335.24), SIMDE_FLOAT32_C( 563.92), SIMDE_FLOAT32_C( 498.88), SIMDE_FLOAT32_C( 833.76), SIMDE_FLOAT32_C( 926.69), SIMDE_FLOAT32_C( -954.77), SIMDE_FLOAT32_C( 227.44), SIMDE_FLOAT32_C( -72.18), SIMDE_FLOAT32_C( -562.21), SIMDE_FLOAT32_C( 463.87), SIMDE_FLOAT32_C( -292.83), SIMDE_FLOAT32_C( -746.24), SIMDE_FLOAT32_C( 521.28) }, { SIMDE_FLOAT32_C( 858.00), SIMDE_FLOAT32_C( -942.60), SIMDE_FLOAT32_C( 13.00), SIMDE_FLOAT32_C( 404.07), SIMDE_FLOAT32_C( 564.00), SIMDE_FLOAT32_C( 499.00), SIMDE_FLOAT32_C( 541.58), SIMDE_FLOAT32_C( 927.00), SIMDE_FLOAT32_C( 625.58), SIMDE_FLOAT32_C( -864.10), SIMDE_FLOAT32_C( -457.80), SIMDE_FLOAT32_C( -346.41), SIMDE_FLOAT32_C( 151.94), SIMDE_FLOAT32_C( -466.43), SIMDE_FLOAT32_C( -232.11), SIMDE_FLOAT32_C( 859.92) } }, { { SIMDE_FLOAT32_C( -791.54), SIMDE_FLOAT32_C( 657.83), SIMDE_FLOAT32_C( -473.89), SIMDE_FLOAT32_C( 625.60), SIMDE_FLOAT32_C( 199.41), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 251.18), SIMDE_FLOAT32_C( 335.31), SIMDE_FLOAT32_C( 542.40), SIMDE_FLOAT32_C( 904.77), SIMDE_FLOAT32_C( -512.75), SIMDE_FLOAT32_C( -924.03), SIMDE_FLOAT32_C( -327.34), SIMDE_FLOAT32_C( -652.83), SIMDE_FLOAT32_C( 894.23), SIMDE_FLOAT32_C( -468.87) }, UINT8_C(106), { SIMDE_FLOAT32_C( -92.87), SIMDE_FLOAT32_C( 195.88), SIMDE_FLOAT32_C( 279.39), SIMDE_FLOAT32_C( -593.99), SIMDE_FLOAT32_C( 29.64), SIMDE_FLOAT32_C( 206.08), SIMDE_FLOAT32_C( -548.77), SIMDE_FLOAT32_C( -742.92), SIMDE_FLOAT32_C( -866.10), SIMDE_FLOAT32_C( -110.98), SIMDE_FLOAT32_C( 720.95), SIMDE_FLOAT32_C( -158.93), SIMDE_FLOAT32_C( 142.78), SIMDE_FLOAT32_C( 242.22), SIMDE_FLOAT32_C( 49.53), SIMDE_FLOAT32_C( -199.39) }, { SIMDE_FLOAT32_C( -791.54), SIMDE_FLOAT32_C( 196.00), SIMDE_FLOAT32_C( -473.89), SIMDE_FLOAT32_C( -594.00), SIMDE_FLOAT32_C( 199.41), SIMDE_FLOAT32_C( 206.00), SIMDE_FLOAT32_C( -549.00), SIMDE_FLOAT32_C( 335.31), SIMDE_FLOAT32_C( 542.40), SIMDE_FLOAT32_C( 904.77), SIMDE_FLOAT32_C( -512.75), SIMDE_FLOAT32_C( -924.03), SIMDE_FLOAT32_C( -327.34), SIMDE_FLOAT32_C( -652.83), SIMDE_FLOAT32_C( 894.23), SIMDE_FLOAT32_C( -468.87) } }, { { SIMDE_FLOAT32_C( 768.33), SIMDE_FLOAT32_C( -324.87), SIMDE_FLOAT32_C( -999.98), SIMDE_FLOAT32_C( -231.46), SIMDE_FLOAT32_C( 926.31), SIMDE_FLOAT32_C( 335.33), SIMDE_FLOAT32_C( -689.06), SIMDE_FLOAT32_C( 831.09), SIMDE_FLOAT32_C( 822.57), SIMDE_FLOAT32_C( -613.09), SIMDE_FLOAT32_C( -496.25), SIMDE_FLOAT32_C( -830.26), SIMDE_FLOAT32_C( -718.86), SIMDE_FLOAT32_C( 34.88), SIMDE_FLOAT32_C( 885.21), SIMDE_FLOAT32_C( 188.27) }, UINT8_C(197), { SIMDE_FLOAT32_C( 164.59), SIMDE_FLOAT32_C( 594.28), SIMDE_FLOAT32_C( 260.41), SIMDE_FLOAT32_C( -629.33), SIMDE_FLOAT32_C( -954.49), SIMDE_FLOAT32_C( 517.49), SIMDE_FLOAT32_C( -495.43), SIMDE_FLOAT32_C( -65.47), SIMDE_FLOAT32_C( 238.43), SIMDE_FLOAT32_C( 345.64), SIMDE_FLOAT32_C( -922.68), SIMDE_FLOAT32_C( -519.34), SIMDE_FLOAT32_C( -604.83), SIMDE_FLOAT32_C( -122.08), SIMDE_FLOAT32_C( -751.01), SIMDE_FLOAT32_C( 70.30) }, { SIMDE_FLOAT32_C( 165.00), SIMDE_FLOAT32_C( -324.87), SIMDE_FLOAT32_C( 260.00), SIMDE_FLOAT32_C( -231.46), SIMDE_FLOAT32_C( 926.31), SIMDE_FLOAT32_C( 335.33), SIMDE_FLOAT32_C( -495.00), SIMDE_FLOAT32_C( -65.00), SIMDE_FLOAT32_C( 822.57), SIMDE_FLOAT32_C( -613.09), SIMDE_FLOAT32_C( -496.25), SIMDE_FLOAT32_C( -830.26), SIMDE_FLOAT32_C( -718.86), SIMDE_FLOAT32_C( 34.88), SIMDE_FLOAT32_C( 885.21), SIMDE_FLOAT32_C( 188.27) } }, { { SIMDE_FLOAT32_C( -122.06), SIMDE_FLOAT32_C( 17.53), SIMDE_FLOAT32_C( -3.38), SIMDE_FLOAT32_C( -786.73), SIMDE_FLOAT32_C( 328.46), SIMDE_FLOAT32_C( -172.29), SIMDE_FLOAT32_C( -964.16), SIMDE_FLOAT32_C( 715.37), SIMDE_FLOAT32_C( 331.46), SIMDE_FLOAT32_C( -794.41), SIMDE_FLOAT32_C( 996.51), SIMDE_FLOAT32_C( -633.66), SIMDE_FLOAT32_C( -909.21), SIMDE_FLOAT32_C( 184.77), SIMDE_FLOAT32_C( -402.90), SIMDE_FLOAT32_C( 255.39) }, UINT8_C( 2), { SIMDE_FLOAT32_C( 857.51), SIMDE_FLOAT32_C( 626.06), SIMDE_FLOAT32_C( -175.44), SIMDE_FLOAT32_C( 375.00), SIMDE_FLOAT32_C( -869.37), SIMDE_FLOAT32_C( 759.09), SIMDE_FLOAT32_C( -386.57), SIMDE_FLOAT32_C( 476.27), SIMDE_FLOAT32_C( 836.41), SIMDE_FLOAT32_C( 94.09), SIMDE_FLOAT32_C( 871.44), SIMDE_FLOAT32_C( -285.67), SIMDE_FLOAT32_C( 343.08), SIMDE_FLOAT32_C( -58.26), SIMDE_FLOAT32_C( 592.27), SIMDE_FLOAT32_C( -639.39) }, { SIMDE_FLOAT32_C( -122.06), SIMDE_FLOAT32_C( 626.00), SIMDE_FLOAT32_C( -3.38), SIMDE_FLOAT32_C( -786.73), SIMDE_FLOAT32_C( 328.46), SIMDE_FLOAT32_C( -172.29), SIMDE_FLOAT32_C( -964.16), SIMDE_FLOAT32_C( 715.37), SIMDE_FLOAT32_C( 331.46), SIMDE_FLOAT32_C( -794.41), SIMDE_FLOAT32_C( 996.51), SIMDE_FLOAT32_C( -633.66), SIMDE_FLOAT32_C( -909.21), SIMDE_FLOAT32_C( 184.77), SIMDE_FLOAT32_C( -402.90), SIMDE_FLOAT32_C( 255.39) } }, { { SIMDE_FLOAT32_C( 938.35), SIMDE_FLOAT32_C( 805.54), SIMDE_FLOAT32_C( 689.07), SIMDE_FLOAT32_C( -233.94), SIMDE_FLOAT32_C( 841.38), SIMDE_FLOAT32_C( 404.44), SIMDE_FLOAT32_C( -902.48), SIMDE_FLOAT32_C( -953.03), SIMDE_FLOAT32_C( 400.95), SIMDE_FLOAT32_C( -536.14), SIMDE_FLOAT32_C( -862.24), SIMDE_FLOAT32_C( -414.28), SIMDE_FLOAT32_C( 60.96), SIMDE_FLOAT32_C( 393.15), SIMDE_FLOAT32_C( 364.77), SIMDE_FLOAT32_C( -81.52) }, UINT8_C( 26), { SIMDE_FLOAT32_C( -810.67), SIMDE_FLOAT32_C( -706.52), SIMDE_FLOAT32_C( 149.83), SIMDE_FLOAT32_C( 948.42), SIMDE_FLOAT32_C( -93.09), SIMDE_FLOAT32_C( -373.90), SIMDE_FLOAT32_C( 784.83), SIMDE_FLOAT32_C( -999.00), SIMDE_FLOAT32_C( -502.46), SIMDE_FLOAT32_C( -500.84), SIMDE_FLOAT32_C( 344.08), SIMDE_FLOAT32_C( 439.27), SIMDE_FLOAT32_C( -908.56), SIMDE_FLOAT32_C( 704.69), SIMDE_FLOAT32_C( 377.63), SIMDE_FLOAT32_C( 896.98) }, { SIMDE_FLOAT32_C( 938.35), SIMDE_FLOAT32_C( -707.00), SIMDE_FLOAT32_C( 689.07), SIMDE_FLOAT32_C( 948.00), SIMDE_FLOAT32_C( -93.00), SIMDE_FLOAT32_C( 404.44), SIMDE_FLOAT32_C( -902.48), SIMDE_FLOAT32_C( -953.03), SIMDE_FLOAT32_C( 400.95), SIMDE_FLOAT32_C( -536.14), SIMDE_FLOAT32_C( -862.24), SIMDE_FLOAT32_C( -414.28), SIMDE_FLOAT32_C( 60.96), SIMDE_FLOAT32_C( 393.15), SIMDE_FLOAT32_C( 364.77), SIMDE_FLOAT32_C( -81.52) } }, { { SIMDE_FLOAT32_C( 393.76), SIMDE_FLOAT32_C( -856.31), SIMDE_FLOAT32_C( 738.36), SIMDE_FLOAT32_C( -201.81), SIMDE_FLOAT32_C( -758.79), SIMDE_FLOAT32_C( 785.33), SIMDE_FLOAT32_C( -800.86), SIMDE_FLOAT32_C( -294.93), SIMDE_FLOAT32_C( 923.10), SIMDE_FLOAT32_C( -215.14), SIMDE_FLOAT32_C( 766.03), SIMDE_FLOAT32_C( 316.25), SIMDE_FLOAT32_C( -850.37), SIMDE_FLOAT32_C( -315.49), SIMDE_FLOAT32_C( -664.55), SIMDE_FLOAT32_C( -661.04) }, UINT8_C(104), { SIMDE_FLOAT32_C( 485.29), SIMDE_FLOAT32_C( -712.62), SIMDE_FLOAT32_C( 884.89), SIMDE_FLOAT32_C( -888.61), SIMDE_FLOAT32_C( -927.79), SIMDE_FLOAT32_C( 885.89), SIMDE_FLOAT32_C( -391.08), SIMDE_FLOAT32_C( -428.63), SIMDE_FLOAT32_C( 229.97), SIMDE_FLOAT32_C( -951.80), SIMDE_FLOAT32_C( -337.19), SIMDE_FLOAT32_C( -65.34), SIMDE_FLOAT32_C( 425.83), SIMDE_FLOAT32_C( -440.21), SIMDE_FLOAT32_C( -671.58), SIMDE_FLOAT32_C( 569.52) }, { SIMDE_FLOAT32_C( 393.76), SIMDE_FLOAT32_C( -856.31), SIMDE_FLOAT32_C( 738.36), SIMDE_FLOAT32_C( -889.00), SIMDE_FLOAT32_C( -758.79), SIMDE_FLOAT32_C( 886.00), SIMDE_FLOAT32_C( -391.00), SIMDE_FLOAT32_C( -294.93), SIMDE_FLOAT32_C( 923.10), SIMDE_FLOAT32_C( -215.14), SIMDE_FLOAT32_C( 766.03), SIMDE_FLOAT32_C( 316.25), SIMDE_FLOAT32_C( -850.37), SIMDE_FLOAT32_C( -315.49), SIMDE_FLOAT32_C( -664.55), SIMDE_FLOAT32_C( -661.04) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_mask_rint_ps(src, test_vec[i].k, a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_rint_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( -246.76), SIMDE_FLOAT64_C( 995.20), SIMDE_FLOAT64_C( 968.30), SIMDE_FLOAT64_C( 593.75), SIMDE_FLOAT64_C( 235.19), SIMDE_FLOAT64_C( 73.30), SIMDE_FLOAT64_C( -552.80), SIMDE_FLOAT64_C( -271.48) }, { SIMDE_FLOAT64_C( -247.00), SIMDE_FLOAT64_C( 995.00), SIMDE_FLOAT64_C( 968.00), SIMDE_FLOAT64_C( 594.00), SIMDE_FLOAT64_C( 235.00), SIMDE_FLOAT64_C( 73.00), SIMDE_FLOAT64_C( -553.00), SIMDE_FLOAT64_C( -271.00) } }, { { SIMDE_FLOAT64_C( -135.03), SIMDE_FLOAT64_C( -911.80), SIMDE_FLOAT64_C( -344.75), SIMDE_FLOAT64_C( -200.72), SIMDE_FLOAT64_C( 333.22), SIMDE_FLOAT64_C( 889.93), SIMDE_FLOAT64_C( -90.00), SIMDE_FLOAT64_C( 700.69) }, { SIMDE_FLOAT64_C( -135.00), SIMDE_FLOAT64_C( -912.00), SIMDE_FLOAT64_C( -345.00), SIMDE_FLOAT64_C( -201.00), SIMDE_FLOAT64_C( 333.00), SIMDE_FLOAT64_C( 890.00), SIMDE_FLOAT64_C( -90.00), SIMDE_FLOAT64_C( 701.00) } }, { { SIMDE_FLOAT64_C( -507.88), SIMDE_FLOAT64_C( 21.18), SIMDE_FLOAT64_C( -600.24), SIMDE_FLOAT64_C( -90.19), SIMDE_FLOAT64_C( -792.15), SIMDE_FLOAT64_C( 778.81), SIMDE_FLOAT64_C( 116.68), SIMDE_FLOAT64_C( 97.12) }, { SIMDE_FLOAT64_C( -508.00), SIMDE_FLOAT64_C( 21.00), SIMDE_FLOAT64_C( -600.00), SIMDE_FLOAT64_C( -90.00), SIMDE_FLOAT64_C( -792.00), SIMDE_FLOAT64_C( 779.00), SIMDE_FLOAT64_C( 117.00), SIMDE_FLOAT64_C( 97.00) } }, { { SIMDE_FLOAT64_C( 426.71), SIMDE_FLOAT64_C( 210.55), SIMDE_FLOAT64_C( -406.04), SIMDE_FLOAT64_C( 169.01), SIMDE_FLOAT64_C( 164.78), SIMDE_FLOAT64_C( -734.90), SIMDE_FLOAT64_C( -482.68), SIMDE_FLOAT64_C( 918.02) }, { SIMDE_FLOAT64_C( 427.00), SIMDE_FLOAT64_C( 211.00), SIMDE_FLOAT64_C( -406.00), SIMDE_FLOAT64_C( 169.00), SIMDE_FLOAT64_C( 165.00), SIMDE_FLOAT64_C( -735.00), SIMDE_FLOAT64_C( -483.00), SIMDE_FLOAT64_C( 918.00) } }, { { SIMDE_FLOAT64_C( -739.70), SIMDE_FLOAT64_C( -514.38), SIMDE_FLOAT64_C( 511.78), SIMDE_FLOAT64_C( 495.49), SIMDE_FLOAT64_C( 558.92), SIMDE_FLOAT64_C( 958.98), SIMDE_FLOAT64_C( -775.99), SIMDE_FLOAT64_C( -576.12) }, { SIMDE_FLOAT64_C( -740.00), SIMDE_FLOAT64_C( -514.00), SIMDE_FLOAT64_C( 512.00), SIMDE_FLOAT64_C( 495.00), SIMDE_FLOAT64_C( 559.00), SIMDE_FLOAT64_C( 959.00), SIMDE_FLOAT64_C( -776.00), SIMDE_FLOAT64_C( -576.00) } }, { { SIMDE_FLOAT64_C( -952.82), SIMDE_FLOAT64_C( -120.74), SIMDE_FLOAT64_C( 223.17), SIMDE_FLOAT64_C( 380.40), SIMDE_FLOAT64_C( -230.81), SIMDE_FLOAT64_C( -866.83), SIMDE_FLOAT64_C( 81.08), SIMDE_FLOAT64_C( 261.31) }, { SIMDE_FLOAT64_C( -953.00), SIMDE_FLOAT64_C( -121.00), SIMDE_FLOAT64_C( 223.00), SIMDE_FLOAT64_C( 380.00), SIMDE_FLOAT64_C( -231.00), SIMDE_FLOAT64_C( -867.00), SIMDE_FLOAT64_C( 81.00), SIMDE_FLOAT64_C( 261.00) } }, { { SIMDE_FLOAT64_C( 154.35), SIMDE_FLOAT64_C( 480.85), SIMDE_FLOAT64_C( -828.88), SIMDE_FLOAT64_C( 362.20), SIMDE_FLOAT64_C( 259.66), SIMDE_FLOAT64_C( 287.79), SIMDE_FLOAT64_C( -540.68), SIMDE_FLOAT64_C( -313.64) }, { SIMDE_FLOAT64_C( 154.00), SIMDE_FLOAT64_C( 481.00), SIMDE_FLOAT64_C( -829.00), SIMDE_FLOAT64_C( 362.00), SIMDE_FLOAT64_C( 260.00), SIMDE_FLOAT64_C( 288.00), SIMDE_FLOAT64_C( -541.00), SIMDE_FLOAT64_C( -314.00) } }, { { SIMDE_FLOAT64_C( -501.66), SIMDE_FLOAT64_C( 53.28), SIMDE_FLOAT64_C( 855.37), SIMDE_FLOAT64_C( 663.12), SIMDE_FLOAT64_C( 318.39), SIMDE_FLOAT64_C( -627.30), SIMDE_FLOAT64_C( 581.15), SIMDE_FLOAT64_C( 578.68) }, { SIMDE_FLOAT64_C( -502.00), SIMDE_FLOAT64_C( 53.00), SIMDE_FLOAT64_C( 855.00), SIMDE_FLOAT64_C( 663.00), SIMDE_FLOAT64_C( 318.00), SIMDE_FLOAT64_C( -627.00), SIMDE_FLOAT64_C( 581.00), SIMDE_FLOAT64_C( 579.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_rint_pd(a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_rint_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( -208.54), SIMDE_FLOAT64_C( -850.79), SIMDE_FLOAT64_C( -979.95), SIMDE_FLOAT64_C( -415.72), SIMDE_FLOAT64_C( 722.54), SIMDE_FLOAT64_C( -386.30), SIMDE_FLOAT64_C( 827.55), SIMDE_FLOAT64_C( -329.72) }, UINT8_C( 33), { SIMDE_FLOAT64_C( -547.16), SIMDE_FLOAT64_C( 343.76), SIMDE_FLOAT64_C( -161.57), SIMDE_FLOAT64_C( 958.51), SIMDE_FLOAT64_C( 185.76), SIMDE_FLOAT64_C( 479.23), SIMDE_FLOAT64_C( 948.46), SIMDE_FLOAT64_C( 354.63) }, { SIMDE_FLOAT64_C( -547.00), SIMDE_FLOAT64_C( -850.79), SIMDE_FLOAT64_C( -979.95), SIMDE_FLOAT64_C( -415.72), SIMDE_FLOAT64_C( 722.54), SIMDE_FLOAT64_C( 479.00), SIMDE_FLOAT64_C( 827.55), SIMDE_FLOAT64_C( -329.72) } }, { { SIMDE_FLOAT64_C( 164.70), SIMDE_FLOAT64_C( 580.02), SIMDE_FLOAT64_C( 369.11), SIMDE_FLOAT64_C( -928.66), SIMDE_FLOAT64_C( 607.84), SIMDE_FLOAT64_C( 793.55), SIMDE_FLOAT64_C( -417.32), SIMDE_FLOAT64_C( -33.65) }, UINT8_C(142), { SIMDE_FLOAT64_C( 85.45), SIMDE_FLOAT64_C( 426.84), SIMDE_FLOAT64_C( -691.54), SIMDE_FLOAT64_C( 519.42), SIMDE_FLOAT64_C( 413.73), SIMDE_FLOAT64_C( 99.92), SIMDE_FLOAT64_C( 668.63), SIMDE_FLOAT64_C( 433.78) }, { SIMDE_FLOAT64_C( 164.70), SIMDE_FLOAT64_C( 427.00), SIMDE_FLOAT64_C( -692.00), SIMDE_FLOAT64_C( 519.00), SIMDE_FLOAT64_C( 607.84), SIMDE_FLOAT64_C( 793.55), SIMDE_FLOAT64_C( -417.32), SIMDE_FLOAT64_C( 434.00) } }, { { SIMDE_FLOAT64_C( 684.20), SIMDE_FLOAT64_C( 391.17), SIMDE_FLOAT64_C( -952.53), SIMDE_FLOAT64_C( 511.75), SIMDE_FLOAT64_C( -938.55), SIMDE_FLOAT64_C( -562.45), SIMDE_FLOAT64_C( 964.59), SIMDE_FLOAT64_C( 405.21) }, UINT8_C(209), { SIMDE_FLOAT64_C( 923.10), SIMDE_FLOAT64_C( -409.02), SIMDE_FLOAT64_C( -244.78), SIMDE_FLOAT64_C( 871.57), SIMDE_FLOAT64_C( 945.61), SIMDE_FLOAT64_C( 919.91), SIMDE_FLOAT64_C( 451.58), SIMDE_FLOAT64_C( 314.71) }, { SIMDE_FLOAT64_C( 923.00), SIMDE_FLOAT64_C( 391.17), SIMDE_FLOAT64_C( -952.53), SIMDE_FLOAT64_C( 511.75), SIMDE_FLOAT64_C( 946.00), SIMDE_FLOAT64_C( -562.45), SIMDE_FLOAT64_C( 452.00), SIMDE_FLOAT64_C( 315.00) } }, { { SIMDE_FLOAT64_C( 991.25), SIMDE_FLOAT64_C( 59.43), SIMDE_FLOAT64_C( 108.26), SIMDE_FLOAT64_C( -426.07), SIMDE_FLOAT64_C( -974.22), SIMDE_FLOAT64_C( 827.67), SIMDE_FLOAT64_C( 659.39), SIMDE_FLOAT64_C( 452.62) }, UINT8_C( 74), { SIMDE_FLOAT64_C( 178.81), SIMDE_FLOAT64_C( -133.64), SIMDE_FLOAT64_C( 236.06), SIMDE_FLOAT64_C( -152.57), SIMDE_FLOAT64_C( -699.87), SIMDE_FLOAT64_C( -79.74), SIMDE_FLOAT64_C( -761.39), SIMDE_FLOAT64_C( -652.39) }, { SIMDE_FLOAT64_C( 991.25), SIMDE_FLOAT64_C( -134.00), SIMDE_FLOAT64_C( 108.26), SIMDE_FLOAT64_C( -153.00), SIMDE_FLOAT64_C( -974.22), SIMDE_FLOAT64_C( 827.67), SIMDE_FLOAT64_C( -761.00), SIMDE_FLOAT64_C( 452.62) } }, { { SIMDE_FLOAT64_C( -567.98), SIMDE_FLOAT64_C( -699.94), SIMDE_FLOAT64_C( -214.84), SIMDE_FLOAT64_C( -603.39), SIMDE_FLOAT64_C( 705.27), SIMDE_FLOAT64_C( -938.85), SIMDE_FLOAT64_C( -680.29), SIMDE_FLOAT64_C( -703.75) }, UINT8_C(254), { SIMDE_FLOAT64_C( -808.72), SIMDE_FLOAT64_C( -758.15), SIMDE_FLOAT64_C( -263.72), SIMDE_FLOAT64_C( 642.86), SIMDE_FLOAT64_C( 556.57), SIMDE_FLOAT64_C( -272.47), SIMDE_FLOAT64_C( -297.71), SIMDE_FLOAT64_C( -335.17) }, { SIMDE_FLOAT64_C( -567.98), SIMDE_FLOAT64_C( -758.00), SIMDE_FLOAT64_C( -264.00), SIMDE_FLOAT64_C( 643.00), SIMDE_FLOAT64_C( 557.00), SIMDE_FLOAT64_C( -272.00), SIMDE_FLOAT64_C( -298.00), SIMDE_FLOAT64_C( -335.00) } }, { { SIMDE_FLOAT64_C( 301.46), SIMDE_FLOAT64_C( -271.93), SIMDE_FLOAT64_C( -507.50), SIMDE_FLOAT64_C( -39.16), SIMDE_FLOAT64_C( -819.31), SIMDE_FLOAT64_C( -371.36), SIMDE_FLOAT64_C( -860.35), SIMDE_FLOAT64_C( 47.05) }, UINT8_C( 9), { SIMDE_FLOAT64_C( -12.91), SIMDE_FLOAT64_C( 347.18), SIMDE_FLOAT64_C( -215.03), SIMDE_FLOAT64_C( 225.69), SIMDE_FLOAT64_C( 694.79), SIMDE_FLOAT64_C( 216.99), SIMDE_FLOAT64_C( 525.75), SIMDE_FLOAT64_C( -520.05) }, { SIMDE_FLOAT64_C( -13.00), SIMDE_FLOAT64_C( -271.93), SIMDE_FLOAT64_C( -507.50), SIMDE_FLOAT64_C( 226.00), SIMDE_FLOAT64_C( -819.31), SIMDE_FLOAT64_C( -371.36), SIMDE_FLOAT64_C( -860.35), SIMDE_FLOAT64_C( 47.05) } }, { { SIMDE_FLOAT64_C( 613.60), SIMDE_FLOAT64_C( 231.02), SIMDE_FLOAT64_C( -458.90), SIMDE_FLOAT64_C( 933.31), SIMDE_FLOAT64_C( 527.27), SIMDE_FLOAT64_C( 357.46), SIMDE_FLOAT64_C( -875.42), SIMDE_FLOAT64_C( 769.12) }, UINT8_C(129), { SIMDE_FLOAT64_C( 767.45), SIMDE_FLOAT64_C( 325.69), SIMDE_FLOAT64_C( -178.73), SIMDE_FLOAT64_C( -530.26), SIMDE_FLOAT64_C( 990.52), SIMDE_FLOAT64_C( -877.27), SIMDE_FLOAT64_C( 197.81), SIMDE_FLOAT64_C( -516.98) }, { SIMDE_FLOAT64_C( 767.00), SIMDE_FLOAT64_C( 231.02), SIMDE_FLOAT64_C( -458.90), SIMDE_FLOAT64_C( 933.31), SIMDE_FLOAT64_C( 527.27), SIMDE_FLOAT64_C( 357.46), SIMDE_FLOAT64_C( -875.42), SIMDE_FLOAT64_C( -517.00) } }, { { SIMDE_FLOAT64_C( 83.57), SIMDE_FLOAT64_C( 378.50), SIMDE_FLOAT64_C( 111.66), SIMDE_FLOAT64_C( 223.22), SIMDE_FLOAT64_C( -574.45), SIMDE_FLOAT64_C( -23.63), SIMDE_FLOAT64_C( -789.69), SIMDE_FLOAT64_C( 772.73) }, UINT8_C(203), { SIMDE_FLOAT64_C( 436.00), SIMDE_FLOAT64_C( 467.52), SIMDE_FLOAT64_C( -21.68), SIMDE_FLOAT64_C( -38.25), SIMDE_FLOAT64_C( 947.47), SIMDE_FLOAT64_C( -408.08), SIMDE_FLOAT64_C( -807.23), SIMDE_FLOAT64_C( -511.43) }, { SIMDE_FLOAT64_C( 436.00), SIMDE_FLOAT64_C( 468.00), SIMDE_FLOAT64_C( 111.66), SIMDE_FLOAT64_C( -38.00), SIMDE_FLOAT64_C( -574.45), SIMDE_FLOAT64_C( -23.63), SIMDE_FLOAT64_C( -807.00), SIMDE_FLOAT64_C( -511.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_mask_rint_pd(src, test_vec[i].k, a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_sin_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( -754.38), SIMDE_FLOAT32_C( 346.63)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( -0.39), SIMDE_FLOAT32_C( 0.87)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( 34.06)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( 0.48)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 467.76)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.33)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 571.46)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( -0.95), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( -0.30)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( 696.87)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( -0.29), SIMDE_FLOAT32_C( -0.53)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -923.64)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( -0.69), SIMDE_FLOAT32_C( -0.01)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -660.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( -0.47), SIMDE_FLOAT32_C( -0.88)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 687.09)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.79)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_sin_ps(test_vec[i].a); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_sin_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -754.38), SIMDE_FLOAT64_C( 346.63)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.39), SIMDE_FLOAT64_C( 0.87)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( 39.01)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 0.97)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( 34.06)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.84), SIMDE_FLOAT64_C( 0.48)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( 670.24)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.81), SIMDE_FLOAT64_C( -0.88)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 467.76)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.66), SIMDE_FLOAT64_C( 0.33)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( 422.21)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.65), SIMDE_FLOAT64_C( 0.94)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 571.46)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( -0.30)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( -686.13)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( -0.95)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_sin_pd(test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_sin_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( -754.38), SIMDE_FLOAT32_C( 346.63)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( -0.39), SIMDE_FLOAT32_C( 0.87)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 467.76)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( -0.95), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.33)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( 696.87)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( -0.69), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( -0.29), SIMDE_FLOAT32_C( -0.53)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -660.80)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( -0.47), SIMDE_FLOAT32_C( -0.88)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -770.35), SIMDE_FLOAT32_C( 443.48), SIMDE_FLOAT32_C( -583.60), SIMDE_FLOAT32_C( 380.46), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 993.90), SIMDE_FLOAT32_C( 28.08), SIMDE_FLOAT32_C( 841.21)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( -0.49), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( -0.32), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( -0.67)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( 395.92), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 339.21), SIMDE_FLOAT32_C( 532.35), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( 780.64), SIMDE_FLOAT32_C( -30.79)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.59)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -203.65), SIMDE_FLOAT32_C( -80.73), SIMDE_FLOAT32_C( 336.73), SIMDE_FLOAT32_C( -944.78), SIMDE_FLOAT32_C( -747.59), SIMDE_FLOAT32_C( -767.23), SIMDE_FLOAT32_C( -554.19), SIMDE_FLOAT32_C( 398.82)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( -0.63), SIMDE_FLOAT32_C( -0.96), SIMDE_FLOAT32_C( 0.16)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 469.66), SIMDE_FLOAT32_C( 680.02), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 818.66), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 600.47), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( 254.31)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( 0.16)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_sin_ps(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_sin_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( 39.01), SIMDE_FLOAT64_C( -754.38), SIMDE_FLOAT64_C( 346.63)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 0.97), SIMDE_FLOAT64_C( -0.39), SIMDE_FLOAT64_C( 0.87)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( 670.24), SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( 34.06)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.81), SIMDE_FLOAT64_C( -0.88), SIMDE_FLOAT64_C( -0.84), SIMDE_FLOAT64_C( 0.48)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 467.76)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.65), SIMDE_FLOAT64_C( 0.94), SIMDE_FLOAT64_C( 0.66), SIMDE_FLOAT64_C( 0.33)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( -686.13), SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 571.46)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( -0.95), SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( -0.30)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -860.95), SIMDE_FLOAT64_C( -417.54), SIMDE_FLOAT64_C( 696.87)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.33), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( -0.29), SIMDE_FLOAT64_C( -0.53)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( 28.47), SIMDE_FLOAT64_C( -384.03), SIMDE_FLOAT64_C( -923.64)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.96), SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( -0.69), SIMDE_FLOAT64_C( -0.01)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 261.31), SIMDE_FLOAT64_C( -212.54), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -660.80)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.53), SIMDE_FLOAT64_C( 0.89), SIMDE_FLOAT64_C( -0.47), SIMDE_FLOAT64_C( -0.88)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 178.20), SIMDE_FLOAT64_C( -450.67), SIMDE_FLOAT64_C( 233.37), SIMDE_FLOAT64_C( 687.09)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( 0.79)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_sin_pd(test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_sin_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 467.76), SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( -754.38), SIMDE_FLOAT32_C( 346.63)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( -0.95), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( -0.39), SIMDE_FLOAT32_C( 0.87)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -660.80), SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( 696.87)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( -0.47), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( -0.69), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( -0.29), SIMDE_FLOAT32_C( -0.53)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( 395.92), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 339.21), SIMDE_FLOAT32_C( 532.35), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( 780.64), SIMDE_FLOAT32_C( -30.79), SIMDE_FLOAT32_C( -770.35), SIMDE_FLOAT32_C( 443.48), SIMDE_FLOAT32_C( -583.60), SIMDE_FLOAT32_C( 380.46), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 993.90), SIMDE_FLOAT32_C( 28.08), SIMDE_FLOAT32_C( 841.21)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( -0.49), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( -0.32), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( -0.67)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 469.66), SIMDE_FLOAT32_C( 680.02), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 818.66), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 600.47), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( 254.31), SIMDE_FLOAT32_C( -203.65), SIMDE_FLOAT32_C( -80.73), SIMDE_FLOAT32_C( 336.73), SIMDE_FLOAT32_C( -944.78), SIMDE_FLOAT32_C( -747.59), SIMDE_FLOAT32_C( -767.23), SIMDE_FLOAT32_C( -554.19), SIMDE_FLOAT32_C( 398.82)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( -0.63), SIMDE_FLOAT32_C( -0.96), SIMDE_FLOAT32_C( 0.16)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -178.99), SIMDE_FLOAT32_C( 758.79), SIMDE_FLOAT32_C( 324.62), SIMDE_FLOAT32_C( 343.48), SIMDE_FLOAT32_C( -874.31), SIMDE_FLOAT32_C( -797.92), SIMDE_FLOAT32_C( -328.54), SIMDE_FLOAT32_C( -525.83), SIMDE_FLOAT32_C( -192.31), SIMDE_FLOAT32_C( -822.65), SIMDE_FLOAT32_C( 561.36), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( -70.91), SIMDE_FLOAT32_C( 543.35), SIMDE_FLOAT32_C( 120.65), SIMDE_FLOAT32_C( -171.51)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( -0.87), SIMDE_FLOAT32_C( -0.81), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -0.97), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( -0.97), SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( -0.96)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 690.12), SIMDE_FLOAT32_C( 840.65), SIMDE_FLOAT32_C( -21.09), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( -448.89), SIMDE_FLOAT32_C( 731.49), SIMDE_FLOAT32_C( 505.79), SIMDE_FLOAT32_C( 623.70), SIMDE_FLOAT32_C( 831.02), SIMDE_FLOAT32_C( 140.67), SIMDE_FLOAT32_C( 977.36), SIMDE_FLOAT32_C( -906.16), SIMDE_FLOAT32_C( 331.34), SIMDE_FLOAT32_C( 99.93), SIMDE_FLOAT32_C( 462.95), SIMDE_FLOAT32_C( -738.19)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( -0.96), SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( -0.81), SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( -0.32), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( -0.91), SIMDE_FLOAT32_C( -0.08)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -769.09), SIMDE_FLOAT32_C( 822.06), SIMDE_FLOAT32_C( -573.81), SIMDE_FLOAT32_C( -997.63), SIMDE_FLOAT32_C( -337.60), SIMDE_FLOAT32_C( 923.64), SIMDE_FLOAT32_C( 293.64), SIMDE_FLOAT32_C( -768.12), SIMDE_FLOAT32_C( -576.22), SIMDE_FLOAT32_C( -67.64), SIMDE_FLOAT32_C( 710.38), SIMDE_FLOAT32_C( 977.49), SIMDE_FLOAT32_C( -756.42), SIMDE_FLOAT32_C( 424.81), SIMDE_FLOAT32_C( 27.25), SIMDE_FLOAT32_C( -95.15)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.56), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( -0.89), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( -0.78)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -438.19), SIMDE_FLOAT32_C( -359.76), SIMDE_FLOAT32_C( -752.43), SIMDE_FLOAT32_C( -33.67), SIMDE_FLOAT32_C( 932.66), SIMDE_FLOAT32_C( 7.27), SIMDE_FLOAT32_C( -327.22), SIMDE_FLOAT32_C( -125.20), SIMDE_FLOAT32_C( -182.45), SIMDE_FLOAT32_C( 39.93), SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( 394.67), SIMDE_FLOAT32_C( 14.34), SIMDE_FLOAT32_C( -304.73), SIMDE_FLOAT32_C( 916.26), SIMDE_FLOAT32_C( -696.69)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( -0.47), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( -0.24), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( 0.68)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_sin_ps(test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_sin_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 src; simde__mmask16 k; simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -660.80), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( 696.87), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( 467.76), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( 346.63)), UINT16_C(41466), simde_mm512_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( -754.38)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( -660.80), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -0.29), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 346.63)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 469.66), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( -203.65), SIMDE_FLOAT32_C( 336.73), SIMDE_FLOAT32_C( -747.59), SIMDE_FLOAT32_C( -554.19), SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 532.35), SIMDE_FLOAT32_C( 780.64), SIMDE_FLOAT32_C( -770.35), SIMDE_FLOAT32_C( -583.60), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 28.08)), UINT16_C(36797), simde_mm512_set_ps(SIMDE_FLOAT32_C( -171.51), SIMDE_FLOAT32_C( 680.02), SIMDE_FLOAT32_C( 818.66), SIMDE_FLOAT32_C( 600.47), SIMDE_FLOAT32_C( 254.31), SIMDE_FLOAT32_C( -80.73), SIMDE_FLOAT32_C( -944.78), SIMDE_FLOAT32_C( -767.23), SIMDE_FLOAT32_C( 398.82), SIMDE_FLOAT32_C( 395.92), SIMDE_FLOAT32_C( 339.21), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( -30.79), SIMDE_FLOAT32_C( 443.48), SIMDE_FLOAT32_C( 380.46), SIMDE_FLOAT32_C( 993.90)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.96), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( -0.63), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( -0.49), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 0.92)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -95.15), SIMDE_FLOAT32_C( 840.65), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( 731.49), SIMDE_FLOAT32_C( 623.70), SIMDE_FLOAT32_C( 140.67), SIMDE_FLOAT32_C( -906.16), SIMDE_FLOAT32_C( 99.93), SIMDE_FLOAT32_C( -738.19), SIMDE_FLOAT32_C( 758.79), SIMDE_FLOAT32_C( 343.48), SIMDE_FLOAT32_C( -797.92), SIMDE_FLOAT32_C( -525.83), SIMDE_FLOAT32_C( -822.65), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( 543.35)), UINT16_C(16804), simde_mm512_set_ps(SIMDE_FLOAT32_C( 27.25), SIMDE_FLOAT32_C( 690.12), SIMDE_FLOAT32_C( -21.09), SIMDE_FLOAT32_C( -448.89), SIMDE_FLOAT32_C( 505.79), SIMDE_FLOAT32_C( 831.02), SIMDE_FLOAT32_C( 977.36), SIMDE_FLOAT32_C( 331.34), SIMDE_FLOAT32_C( 462.95), SIMDE_FLOAT32_C( -178.99), SIMDE_FLOAT32_C( 324.62), SIMDE_FLOAT32_C( -874.31), SIMDE_FLOAT32_C( -328.54), SIMDE_FLOAT32_C( -192.31), SIMDE_FLOAT32_C( 561.36), SIMDE_FLOAT32_C( -70.91)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -95.15), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( 731.49), SIMDE_FLOAT32_C( 623.70), SIMDE_FLOAT32_C( 140.67), SIMDE_FLOAT32_C( -906.16), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -0.91), SIMDE_FLOAT32_C( 758.79), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( -797.92), SIMDE_FLOAT32_C( -525.83), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( 543.35)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -348.70), SIMDE_FLOAT32_C( -438.19), SIMDE_FLOAT32_C( -752.43), SIMDE_FLOAT32_C( 932.66), SIMDE_FLOAT32_C( -327.22), SIMDE_FLOAT32_C( -182.45), SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( 14.34), SIMDE_FLOAT32_C( 916.26), SIMDE_FLOAT32_C( -769.09), SIMDE_FLOAT32_C( -573.81), SIMDE_FLOAT32_C( -337.60), SIMDE_FLOAT32_C( 293.64), SIMDE_FLOAT32_C( -576.22), SIMDE_FLOAT32_C( 710.38), SIMDE_FLOAT32_C( -756.42)), UINT16_C( 2107), simde_mm512_set_ps(SIMDE_FLOAT32_C( 897.27), SIMDE_FLOAT32_C( -197.89), SIMDE_FLOAT32_C( -359.76), SIMDE_FLOAT32_C( -33.67), SIMDE_FLOAT32_C( 7.27), SIMDE_FLOAT32_C( -125.20), SIMDE_FLOAT32_C( 39.93), SIMDE_FLOAT32_C( 394.67), SIMDE_FLOAT32_C( -304.73), SIMDE_FLOAT32_C( -696.69), SIMDE_FLOAT32_C( 822.06), SIMDE_FLOAT32_C( -997.63), SIMDE_FLOAT32_C( 923.64), SIMDE_FLOAT32_C( -768.12), SIMDE_FLOAT32_C( -67.64), SIMDE_FLOAT32_C( 977.49)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -348.70), SIMDE_FLOAT32_C( -438.19), SIMDE_FLOAT32_C( -752.43), SIMDE_FLOAT32_C( 932.66), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( -182.45), SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( 14.34), SIMDE_FLOAT32_C( 916.26), SIMDE_FLOAT32_C( -769.09), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -576.22), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -0.44)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -15.61), SIMDE_FLOAT32_C( -737.13), SIMDE_FLOAT32_C( -314.93), SIMDE_FLOAT32_C( 177.92), SIMDE_FLOAT32_C( 345.93), SIMDE_FLOAT32_C( 888.71), SIMDE_FLOAT32_C( 915.71), SIMDE_FLOAT32_C( 133.52), SIMDE_FLOAT32_C( 484.94), SIMDE_FLOAT32_C( -598.06), SIMDE_FLOAT32_C( -791.07), SIMDE_FLOAT32_C( -765.93), SIMDE_FLOAT32_C( 221.37), SIMDE_FLOAT32_C( -788.36), SIMDE_FLOAT32_C( -775.04), SIMDE_FLOAT32_C( 440.64)), UINT16_C(22274), simde_mm512_set_ps(SIMDE_FLOAT32_C( 496.57), SIMDE_FLOAT32_C( 915.19), SIMDE_FLOAT32_C( -718.40), SIMDE_FLOAT32_C( 159.97), SIMDE_FLOAT32_C( -861.01), SIMDE_FLOAT32_C( 426.61), SIMDE_FLOAT32_C( 932.11), SIMDE_FLOAT32_C( 110.36), SIMDE_FLOAT32_C( 826.84), SIMDE_FLOAT32_C( -76.75), SIMDE_FLOAT32_C( 237.58), SIMDE_FLOAT32_C( -378.50), SIMDE_FLOAT32_C( -601.68), SIMDE_FLOAT32_C( -623.50), SIMDE_FLOAT32_C( -942.47), SIMDE_FLOAT32_C( 475.51)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -15.61), SIMDE_FLOAT32_C( -0.83), SIMDE_FLOAT32_C( -314.93), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( 345.93), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( -0.39), SIMDE_FLOAT32_C( 484.94), SIMDE_FLOAT32_C( -598.06), SIMDE_FLOAT32_C( -791.07), SIMDE_FLOAT32_C( -765.93), SIMDE_FLOAT32_C( 221.37), SIMDE_FLOAT32_C( -788.36), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 440.64)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 883.05), SIMDE_FLOAT32_C( -807.28), SIMDE_FLOAT32_C( -70.05), SIMDE_FLOAT32_C( -784.34), SIMDE_FLOAT32_C( 92.52), SIMDE_FLOAT32_C( 206.60), SIMDE_FLOAT32_C( 834.60), SIMDE_FLOAT32_C( -65.60), SIMDE_FLOAT32_C( -286.07), SIMDE_FLOAT32_C( -212.86), SIMDE_FLOAT32_C( -318.38), SIMDE_FLOAT32_C( 783.48), SIMDE_FLOAT32_C( -628.82), SIMDE_FLOAT32_C( 556.35), SIMDE_FLOAT32_C( 439.43), SIMDE_FLOAT32_C( 434.03)), UINT16_C(27396), simde_mm512_set_ps(SIMDE_FLOAT32_C( -964.25), SIMDE_FLOAT32_C( -406.33), SIMDE_FLOAT32_C( -743.66), SIMDE_FLOAT32_C( -764.58), SIMDE_FLOAT32_C( 789.89), SIMDE_FLOAT32_C( 4.83), SIMDE_FLOAT32_C( -818.54), SIMDE_FLOAT32_C( 161.06), SIMDE_FLOAT32_C( 579.25), SIMDE_FLOAT32_C( -11.78), SIMDE_FLOAT32_C( -308.52), SIMDE_FLOAT32_C( -719.57), SIMDE_FLOAT32_C( 334.00), SIMDE_FLOAT32_C( 274.71), SIMDE_FLOAT32_C( -916.82), SIMDE_FLOAT32_C( -490.00)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 883.05), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( -784.34), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( 206.60), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( -286.07), SIMDE_FLOAT32_C( -212.86), SIMDE_FLOAT32_C( -318.38), SIMDE_FLOAT32_C( 783.48), SIMDE_FLOAT32_C( -628.82), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( 439.43), SIMDE_FLOAT32_C( 434.03)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 529.63), SIMDE_FLOAT32_C( -24.89), SIMDE_FLOAT32_C( -967.78), SIMDE_FLOAT32_C( 638.94), SIMDE_FLOAT32_C( 450.90), SIMDE_FLOAT32_C( -771.54), SIMDE_FLOAT32_C( 105.79), SIMDE_FLOAT32_C( 590.10), SIMDE_FLOAT32_C( 30.91), SIMDE_FLOAT32_C( 635.35), SIMDE_FLOAT32_C( -84.00), SIMDE_FLOAT32_C( 80.04), SIMDE_FLOAT32_C( -709.46), SIMDE_FLOAT32_C( 607.86), SIMDE_FLOAT32_C( 394.58), SIMDE_FLOAT32_C( -889.11)), UINT16_C( 953), simde_mm512_set_ps(SIMDE_FLOAT32_C( 18.75), SIMDE_FLOAT32_C( 809.05), SIMDE_FLOAT32_C( 144.05), SIMDE_FLOAT32_C( -427.72), SIMDE_FLOAT32_C( 308.28), SIMDE_FLOAT32_C( -177.05), SIMDE_FLOAT32_C( -457.77), SIMDE_FLOAT32_C( 678.24), SIMDE_FLOAT32_C( 66.05), SIMDE_FLOAT32_C( -267.71), SIMDE_FLOAT32_C( 117.28), SIMDE_FLOAT32_C( -576.80), SIMDE_FLOAT32_C( -38.39), SIMDE_FLOAT32_C( -250.14), SIMDE_FLOAT32_C( -53.92), SIMDE_FLOAT32_C( 91.94)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 529.63), SIMDE_FLOAT32_C( -24.89), SIMDE_FLOAT32_C( -967.78), SIMDE_FLOAT32_C( 638.94), SIMDE_FLOAT32_C( 450.90), SIMDE_FLOAT32_C( -771.54), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( 635.35), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( 607.86), SIMDE_FLOAT32_C( 394.58), SIMDE_FLOAT32_C( -0.74)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -788.39), SIMDE_FLOAT32_C( 330.43), SIMDE_FLOAT32_C( -493.41), SIMDE_FLOAT32_C( 822.72), SIMDE_FLOAT32_C( 956.68), SIMDE_FLOAT32_C( 954.62), SIMDE_FLOAT32_C( 825.49), SIMDE_FLOAT32_C( -816.27), SIMDE_FLOAT32_C( -209.34), SIMDE_FLOAT32_C( -933.21), SIMDE_FLOAT32_C( -728.70), SIMDE_FLOAT32_C( -420.06), SIMDE_FLOAT32_C( 100.32), SIMDE_FLOAT32_C( 103.15), SIMDE_FLOAT32_C( 439.77), SIMDE_FLOAT32_C( -204.33)), UINT16_C(12713), simde_mm512_set_ps(SIMDE_FLOAT32_C( -841.43), SIMDE_FLOAT32_C( -14.16), SIMDE_FLOAT32_C( 824.88), SIMDE_FLOAT32_C( 793.63), SIMDE_FLOAT32_C( -736.75), SIMDE_FLOAT32_C( -310.57), SIMDE_FLOAT32_C( 728.87), SIMDE_FLOAT32_C( -350.72), SIMDE_FLOAT32_C( 60.89), SIMDE_FLOAT32_C( 109.81), SIMDE_FLOAT32_C( 715.94), SIMDE_FLOAT32_C( -250.60), SIMDE_FLOAT32_C( 944.14), SIMDE_FLOAT32_C( 361.85), SIMDE_FLOAT32_C( -13.07), SIMDE_FLOAT32_C( 852.60)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -788.39), SIMDE_FLOAT32_C( 330.43), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 956.68), SIMDE_FLOAT32_C( 954.62), SIMDE_FLOAT32_C( 825.49), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( -0.93), SIMDE_FLOAT32_C( -933.21), SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( -420.06), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 103.15), SIMDE_FLOAT32_C( 439.77), SIMDE_FLOAT32_C( -0.94)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mask_sin_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_sin_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( 670.24), SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( 34.06), SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( 39.01), SIMDE_FLOAT64_C( -754.38), SIMDE_FLOAT64_C( 346.63)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.81), SIMDE_FLOAT64_C( -0.88), SIMDE_FLOAT64_C( -0.84), SIMDE_FLOAT64_C( 0.48), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 0.97), SIMDE_FLOAT64_C( -0.39), SIMDE_FLOAT64_C( 0.87)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( -686.13), SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 571.46), SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 467.76)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( -0.95), SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( 0.65), SIMDE_FLOAT64_C( 0.94), SIMDE_FLOAT64_C( 0.66), SIMDE_FLOAT64_C( 0.33)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( 28.47), SIMDE_FLOAT64_C( -384.03), SIMDE_FLOAT64_C( -923.64), SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -860.95), SIMDE_FLOAT64_C( -417.54), SIMDE_FLOAT64_C( 696.87)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.96), SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( -0.69), SIMDE_FLOAT64_C( -0.01), SIMDE_FLOAT64_C( 0.33), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( -0.29), SIMDE_FLOAT64_C( -0.53)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 178.20), SIMDE_FLOAT64_C( -450.67), SIMDE_FLOAT64_C( 233.37), SIMDE_FLOAT64_C( 687.09), SIMDE_FLOAT64_C( 261.31), SIMDE_FLOAT64_C( -212.54), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -660.80)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( 0.79), SIMDE_FLOAT64_C( -0.53), SIMDE_FLOAT64_C( 0.89), SIMDE_FLOAT64_C( -0.47), SIMDE_FLOAT64_C( -0.88)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -770.35), SIMDE_FLOAT64_C( 443.48), SIMDE_FLOAT64_C( -583.60), SIMDE_FLOAT64_C( 380.46), SIMDE_FLOAT64_C( -770.72), SIMDE_FLOAT64_C( 993.90), SIMDE_FLOAT64_C( 28.08), SIMDE_FLOAT64_C( 841.21)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.61), SIMDE_FLOAT64_C( -0.49), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( -0.32), SIMDE_FLOAT64_C( 0.86), SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( 0.19), SIMDE_FLOAT64_C( -0.67)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -387.90), SIMDE_FLOAT64_C( 395.92), SIMDE_FLOAT64_C( 655.87), SIMDE_FLOAT64_C( 339.21), SIMDE_FLOAT64_C( 532.35), SIMDE_FLOAT64_C( -263.99), SIMDE_FLOAT64_C( 780.64), SIMDE_FLOAT64_C( -30.79)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.66), SIMDE_FLOAT64_C( -0.08), SIMDE_FLOAT64_C( -0.99), SIMDE_FLOAT64_C( -0.10), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.59)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -203.65), SIMDE_FLOAT64_C( -80.73), SIMDE_FLOAT64_C( 336.73), SIMDE_FLOAT64_C( -944.78), SIMDE_FLOAT64_C( -747.59), SIMDE_FLOAT64_C( -767.23), SIMDE_FLOAT64_C( -554.19), SIMDE_FLOAT64_C( 398.82)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.53), SIMDE_FLOAT64_C( 0.81), SIMDE_FLOAT64_C( -0.55), SIMDE_FLOAT64_C( -0.74), SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( -0.63), SIMDE_FLOAT64_C( -0.96), SIMDE_FLOAT64_C( 0.16)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 469.66), SIMDE_FLOAT64_C( 680.02), SIMDE_FLOAT64_C( -148.69), SIMDE_FLOAT64_C( 818.66), SIMDE_FLOAT64_C( 910.03), SIMDE_FLOAT64_C( 600.47), SIMDE_FLOAT64_C( 791.23), SIMDE_FLOAT64_C( 254.31)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( 0.86), SIMDE_FLOAT64_C( 0.96), SIMDE_FLOAT64_C( -0.86), SIMDE_FLOAT64_C( -0.41), SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( 0.16)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_sin_pd(test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_sin_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d src; simde__mmask8 k; simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( -686.13), SIMDE_FLOAT64_C( 571.46), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( 467.76), SIMDE_FLOAT64_C( 670.24), SIMDE_FLOAT64_C( 34.06), SIMDE_FLOAT64_C( 39.01), SIMDE_FLOAT64_C( 346.63)), UINT8_C(139), simde_mm512_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( -754.38)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( 571.46), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( 467.76), SIMDE_FLOAT64_C( 0.81), SIMDE_FLOAT64_C( 34.06), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( -0.39)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 178.20), SIMDE_FLOAT64_C( 233.37), SIMDE_FLOAT64_C( 261.31), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( -384.03), SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -417.54)), UINT8_C(229), simde_mm512_set_pd(SIMDE_FLOAT64_C( 841.21), SIMDE_FLOAT64_C( -450.67), SIMDE_FLOAT64_C( 687.09), SIMDE_FLOAT64_C( -212.54), SIMDE_FLOAT64_C( -660.80), SIMDE_FLOAT64_C( 28.47), SIMDE_FLOAT64_C( -923.64), SIMDE_FLOAT64_C( -860.95)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.67), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( 0.79), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -0.15)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 398.82), SIMDE_FLOAT64_C( 395.92), SIMDE_FLOAT64_C( 339.21), SIMDE_FLOAT64_C( -263.99), SIMDE_FLOAT64_C( -30.79), SIMDE_FLOAT64_C( 443.48), SIMDE_FLOAT64_C( 380.46), SIMDE_FLOAT64_C( 993.90)), UINT8_C(253), simde_mm512_set_pd(SIMDE_FLOAT64_C( -554.19), SIMDE_FLOAT64_C( -387.90), SIMDE_FLOAT64_C( 655.87), SIMDE_FLOAT64_C( 532.35), SIMDE_FLOAT64_C( 780.64), SIMDE_FLOAT64_C( -770.35), SIMDE_FLOAT64_C( -583.60), SIMDE_FLOAT64_C( -770.72)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.96), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.66), SIMDE_FLOAT64_C( -0.99), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.61), SIMDE_FLOAT64_C( 380.46), SIMDE_FLOAT64_C( 0.86)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 120.65), SIMDE_FLOAT64_C( 469.66), SIMDE_FLOAT64_C( -148.69), SIMDE_FLOAT64_C( 910.03), SIMDE_FLOAT64_C( 791.23), SIMDE_FLOAT64_C( -203.65), SIMDE_FLOAT64_C( 336.73), SIMDE_FLOAT64_C( -747.59)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 543.35), SIMDE_FLOAT64_C( -171.51), SIMDE_FLOAT64_C( 680.02), SIMDE_FLOAT64_C( 818.66), SIMDE_FLOAT64_C( 600.47), SIMDE_FLOAT64_C( 254.31), SIMDE_FLOAT64_C( -80.73), SIMDE_FLOAT64_C( -944.78)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 120.65), SIMDE_FLOAT64_C( -0.96), SIMDE_FLOAT64_C( -148.69), SIMDE_FLOAT64_C( 0.96), SIMDE_FLOAT64_C( -0.41), SIMDE_FLOAT64_C( 0.16), SIMDE_FLOAT64_C( 336.73), SIMDE_FLOAT64_C( -0.74)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 99.93), SIMDE_FLOAT64_C( -738.19), SIMDE_FLOAT64_C( 758.79), SIMDE_FLOAT64_C( 343.48), SIMDE_FLOAT64_C( -797.92), SIMDE_FLOAT64_C( -525.83), SIMDE_FLOAT64_C( -822.65), SIMDE_FLOAT64_C( 655.67)), UINT8_C(145), simde_mm512_set_pd(SIMDE_FLOAT64_C( 331.34), SIMDE_FLOAT64_C( 462.95), SIMDE_FLOAT64_C( -178.99), SIMDE_FLOAT64_C( 324.62), SIMDE_FLOAT64_C( -874.31), SIMDE_FLOAT64_C( -328.54), SIMDE_FLOAT64_C( -192.31), SIMDE_FLOAT64_C( 561.36)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -738.19), SIMDE_FLOAT64_C( 758.79), SIMDE_FLOAT64_C( -0.86), SIMDE_FLOAT64_C( -797.92), SIMDE_FLOAT64_C( -525.83), SIMDE_FLOAT64_C( -822.65), SIMDE_FLOAT64_C( 0.83)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -756.42), SIMDE_FLOAT64_C( 27.25), SIMDE_FLOAT64_C( 690.12), SIMDE_FLOAT64_C( -21.09), SIMDE_FLOAT64_C( -448.89), SIMDE_FLOAT64_C( 505.79), SIMDE_FLOAT64_C( 831.02), SIMDE_FLOAT64_C( 977.36)), UINT8_C( 75), simde_mm512_set_pd(SIMDE_FLOAT64_C( 977.49), SIMDE_FLOAT64_C( 424.81), SIMDE_FLOAT64_C( -95.15), SIMDE_FLOAT64_C( 840.65), SIMDE_FLOAT64_C( -591.56), SIMDE_FLOAT64_C( 731.49), SIMDE_FLOAT64_C( 623.70), SIMDE_FLOAT64_C( 140.67)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -756.42), SIMDE_FLOAT64_C( -0.64), SIMDE_FLOAT64_C( 690.12), SIMDE_FLOAT64_C( -21.09), SIMDE_FLOAT64_C( -0.81), SIMDE_FLOAT64_C( 505.79), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.65)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 394.67), SIMDE_FLOAT64_C( -304.73), SIMDE_FLOAT64_C( -696.69), SIMDE_FLOAT64_C( 822.06), SIMDE_FLOAT64_C( -997.63), SIMDE_FLOAT64_C( 923.64), SIMDE_FLOAT64_C( -768.12), SIMDE_FLOAT64_C( -67.64)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 510.85), SIMDE_FLOAT64_C( 14.34), SIMDE_FLOAT64_C( 916.26), SIMDE_FLOAT64_C( -769.09), SIMDE_FLOAT64_C( -573.81), SIMDE_FLOAT64_C( -337.60), SIMDE_FLOAT64_C( 293.64), SIMDE_FLOAT64_C( -576.22)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 394.67), SIMDE_FLOAT64_C( 0.98), SIMDE_FLOAT64_C( -696.69), SIMDE_FLOAT64_C( -0.56), SIMDE_FLOAT64_C( -0.89), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( -768.12), SIMDE_FLOAT64_C( 0.97)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 475.51), SIMDE_FLOAT64_C( 936.65), SIMDE_FLOAT64_C( -348.70), SIMDE_FLOAT64_C( -438.19), SIMDE_FLOAT64_C( -752.43), SIMDE_FLOAT64_C( 932.66), SIMDE_FLOAT64_C( -327.22), SIMDE_FLOAT64_C( -182.45)), UINT8_C(213), simde_mm512_set_pd(SIMDE_FLOAT64_C( -775.04), SIMDE_FLOAT64_C( 440.64), SIMDE_FLOAT64_C( 897.27), SIMDE_FLOAT64_C( -197.89), SIMDE_FLOAT64_C( -359.76), SIMDE_FLOAT64_C( -33.67), SIMDE_FLOAT64_C( 7.27), SIMDE_FLOAT64_C( -125.20)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.80), SIMDE_FLOAT64_C( 0.73), SIMDE_FLOAT64_C( -348.70), SIMDE_FLOAT64_C( -0.03), SIMDE_FLOAT64_C( -752.43), SIMDE_FLOAT64_C( -0.78), SIMDE_FLOAT64_C( -327.22), SIMDE_FLOAT64_C( 0.45)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mask_sin_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_sind_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( -754.38), SIMDE_FLOAT32_C( 346.63)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( -0.56), SIMDE_FLOAT32_C( -0.23)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( 34.06)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.56)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 467.76)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.95)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 571.46)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -0.52)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( 696.87)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( -0.63), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( -0.39)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -923.64)), simde_mm_set_ps(SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( 0.40)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -660.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.86)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 687.09)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -0.80), SIMDE_FLOAT32_C( -0.54)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_sind_ps(test_vec[i].a); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_sincos_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 mem[4]; const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.66) }, { SIMDE_FLOAT32_C( -0.66), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -0.85) }, { SIMDE_FLOAT32_C( -0.61), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -0.75) } }, { { SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 0.86) }, { SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( -0.71), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( -0.53) }, { SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( -0.51) } }, { { SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.93) }, { SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( -0.06), SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( 0.37) }, { SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( -0.06), SIMDE_FLOAT32_C( -0.39), SIMDE_FLOAT32_C( 0.36) } }, { { SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.64) }, { SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.88) }, { SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.77) } }, { { SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.77) }, { SIMDE_FLOAT32_C( -0.63), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.69) }, { SIMDE_FLOAT32_C( -0.59), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( -0.64) } }, { { SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.91) }, { SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( -0.63), SIMDE_FLOAT32_C( -0.42) }, { SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( -0.59), SIMDE_FLOAT32_C( -0.41) } }, { { SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( 0.92) }, { SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( -0.67), SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( 0.40) }, { SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( -0.62), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( 0.39) } }, { { SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.98) }, { SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( -0.17), SIMDE_FLOAT32_C( 0.19) }, { SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( -0.17), SIMDE_FLOAT32_C( 0.19) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 mem; simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_sincos_ps(&mem, a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); simde_test_x86_assert_equal_f32x4(mem, simde_mm_loadu_ps(test_vec[i].mem), 1); } return 0; } static int test_simde_mm_sincos_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 mem[2]; const simde_float64 a[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 0.96), SIMDE_FLOAT64_C( 0.90) }, { SIMDE_FLOAT64_C( 0.30), SIMDE_FLOAT64_C( 0.45) }, { SIMDE_FLOAT64_C( 0.30), SIMDE_FLOAT64_C( 0.43) } }, { { SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 0.73) }, { SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 0.75) }, { SIMDE_FLOAT64_C( 0.74), SIMDE_FLOAT64_C( 0.68) } }, { { SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( 1.00) }, { SIMDE_FLOAT64_C( 0.96), SIMDE_FLOAT64_C( 0.01) }, { SIMDE_FLOAT64_C( 0.82), SIMDE_FLOAT64_C( 0.01) } }, { { SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 0.79) }, { SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( -0.66) }, { SIMDE_FLOAT64_C( 0.74), SIMDE_FLOAT64_C( -0.61) } }, { { SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.76) }, { SIMDE_FLOAT64_C( -0.10), SIMDE_FLOAT64_C( 0.71) }, { SIMDE_FLOAT64_C( -0.10), SIMDE_FLOAT64_C( 0.65) } }, { { SIMDE_FLOAT64_C( 0.93), SIMDE_FLOAT64_C( 0.55) }, { SIMDE_FLOAT64_C( 0.37), SIMDE_FLOAT64_C( -0.99) }, { SIMDE_FLOAT64_C( 0.36), SIMDE_FLOAT64_C( -0.84) } }, { { SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.93) }, { SIMDE_FLOAT64_C( -0.02), SIMDE_FLOAT64_C( -0.37) }, { SIMDE_FLOAT64_C( -0.02), SIMDE_FLOAT64_C( -0.36) } }, { { SIMDE_FLOAT64_C( 0.96), SIMDE_FLOAT64_C( 0.90) }, { SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 0.44) }, { SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 0.43) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d mem; simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d r = simde_mm_sincos_pd(&mem, a); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); simde_test_x86_assert_equal_f64x2(mem, simde_mm_loadu_pd(test_vec[i].mem), 1); } return 0; } static int test_simde_mm256_sincos_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 mem[8]; const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.86) }, { SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( -0.24), SIMDE_FLOAT32_C( -0.22), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( -0.89), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( 0.53) }, { SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( -0.24), SIMDE_FLOAT32_C( -0.22), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 0.51) } }, { { SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.76) }, { SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( -0.97), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.71) }, { SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( -0.82), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.65) } }, { { SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.83) }, { SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( -0.48), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( -0.61), SIMDE_FLOAT32_C( 0.60) }, { SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( -0.46), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( 0.56) } }, { { SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.99) }, { SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.66), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( -0.11) }, { SIMDE_FLOAT32_C( -0.50), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( -0.61), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.61), SIMDE_FLOAT32_C( -0.50), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( -0.11) } }, { { SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.72) }, { SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( -0.77) }, { SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( -0.32), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( -0.70) } }, { { SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.83) }, { SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( -0.22), SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( -0.59) }, { SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( -0.22), SIMDE_FLOAT32_C( -0.70), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( -0.56) } }, { { SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.93) }, { SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( -0.56), SIMDE_FLOAT32_C( -0.04), SIMDE_FLOAT32_C( -0.51), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( -0.91), SIMDE_FLOAT32_C( -0.37) }, { SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( -0.04), SIMDE_FLOAT32_C( -0.49), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( -0.36) } }, { { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.67) }, { SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( -0.56), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( -0.83) }, { SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( -0.74) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 mem; simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_sincos_ps(&mem, a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); simde_test_x86_assert_equal_f32x8(mem, simde_mm256_loadu_ps(test_vec[i].mem), 1); } return 0; } static int test_simde_mm256_sincos_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 mem[4]; const simde_float64 a[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( 0.98), SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( 0.95), SIMDE_FLOAT64_C( 0.79) }, { SIMDE_FLOAT64_C( 0.19), SIMDE_FLOAT64_C( 0.98), SIMDE_FLOAT64_C( -0.32), SIMDE_FLOAT64_C( -0.66) }, { SIMDE_FLOAT64_C( 0.19), SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( -0.61) } }, { { SIMDE_FLOAT64_C( 0.90), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( 0.97) }, { SIMDE_FLOAT64_C( 0.46), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( 0.25) }, { SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( 0.63), SIMDE_FLOAT64_C( 0.25) } }, { { SIMDE_FLOAT64_C( 0.80), SIMDE_FLOAT64_C( 0.86), SIMDE_FLOAT64_C( 0.90), SIMDE_FLOAT64_C( 0.72) }, { SIMDE_FLOAT64_C( -0.65), SIMDE_FLOAT64_C( -0.53), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( -0.77) }, { SIMDE_FLOAT64_C( -0.61), SIMDE_FLOAT64_C( -0.51), SIMDE_FLOAT64_C( -0.43), SIMDE_FLOAT64_C( -0.70) } }, { { SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.81), SIMDE_FLOAT64_C( 0.76) }, { SIMDE_FLOAT64_C( 0.81), SIMDE_FLOAT64_C( -0.09), SIMDE_FLOAT64_C( 0.63), SIMDE_FLOAT64_C( 0.70) }, { SIMDE_FLOAT64_C( 0.72), SIMDE_FLOAT64_C( -0.09), SIMDE_FLOAT64_C( 0.59), SIMDE_FLOAT64_C( 0.64) } }, { { SIMDE_FLOAT64_C( 0.86), SIMDE_FLOAT64_C( 0.95), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( 0.76) }, { SIMDE_FLOAT64_C( -0.53), SIMDE_FLOAT64_C( -0.33), SIMDE_FLOAT64_C( -0.16), SIMDE_FLOAT64_C( 0.71) }, { SIMDE_FLOAT64_C( -0.51), SIMDE_FLOAT64_C( -0.32), SIMDE_FLOAT64_C( -0.16), SIMDE_FLOAT64_C( 0.65) } }, { { SIMDE_FLOAT64_C( 0.55), SIMDE_FLOAT64_C( 0.87), SIMDE_FLOAT64_C( 0.64), SIMDE_FLOAT64_C( 0.90) }, { SIMDE_FLOAT64_C( -0.99), SIMDE_FLOAT64_C( -0.52), SIMDE_FLOAT64_C( -0.87), SIMDE_FLOAT64_C( -0.44) }, { SIMDE_FLOAT64_C( -0.84), SIMDE_FLOAT64_C( -0.50), SIMDE_FLOAT64_C( -0.76), SIMDE_FLOAT64_C( -0.43) } }, { { SIMDE_FLOAT64_C( 0.90), SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( 0.80), SIMDE_FLOAT64_C( 1.00) }, { SIMDE_FLOAT64_C( 0.45), SIMDE_FLOAT64_C( 0.41), SIMDE_FLOAT64_C( -0.65), SIMDE_FLOAT64_C( 0.05) }, { SIMDE_FLOAT64_C( 0.43), SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( -0.61), SIMDE_FLOAT64_C( 0.05) } }, { { SIMDE_FLOAT64_C( 0.55), SIMDE_FLOAT64_C( 0.80), SIMDE_FLOAT64_C( 0.97), SIMDE_FLOAT64_C( 0.98) }, { SIMDE_FLOAT64_C( -0.99), SIMDE_FLOAT64_C( 0.65), SIMDE_FLOAT64_C( -0.26), SIMDE_FLOAT64_C( 0.20) }, { SIMDE_FLOAT64_C( -0.84), SIMDE_FLOAT64_C( 0.61), SIMDE_FLOAT64_C( -0.26), SIMDE_FLOAT64_C( 0.20) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d mem; simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d r = simde_mm256_sincos_pd(&mem, a); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); simde_test_x86_assert_equal_f64x4(mem, simde_mm256_loadu_pd(test_vec[i].mem), 1); } return 0; } static int test_simde_mm512_sincos_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 mem[16]; const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.69) }, { SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( -0.49), SIMDE_FLOAT32_C( -0.25), SIMDE_FLOAT32_C( -0.91), SIMDE_FLOAT32_C( -0.83), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( -0.47), SIMDE_FLOAT32_C( -0.17), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( -0.81) }, { SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( -0.47), SIMDE_FLOAT32_C( -0.25), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( -0.17), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( -0.61), SIMDE_FLOAT32_C( -0.80), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( -0.72) } }, { { SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.84) }, { SIMDE_FLOAT32_C( -0.73), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( -0.71), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( -0.89), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( -0.91), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( -0.59), SIMDE_FLOAT32_C( -0.39), SIMDE_FLOAT32_C( 0.57) }, { SIMDE_FLOAT32_C( -0.67), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( -0.70), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( -0.56), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( 0.54) } }, { { SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 0.81) }, { SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( -0.50), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.63) }, { SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( -0.63), SIMDE_FLOAT32_C( -0.48), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( -0.51), SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.59) } }, { { SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.58) }, { SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( -0.93), SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( -0.29), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( -0.95) }, { SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( -0.80), SIMDE_FLOAT32_C( -0.39), SIMDE_FLOAT32_C( -0.50), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( -0.63), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( -0.51), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( -0.29), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( -0.81) } }, { { SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.56) }, { SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( -0.69), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( -0.71), SIMDE_FLOAT32_C( -0.71), SIMDE_FLOAT32_C( -0.66), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( -0.87), SIMDE_FLOAT32_C( -0.72), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.98) }, { SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( -0.61), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( -0.66), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.83) } }, { { SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 0.80) }, { SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( -0.29), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( -0.80), SIMDE_FLOAT32_C( -0.17), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -0.49), SIMDE_FLOAT32_C( 0.64) }, { SIMDE_FLOAT32_C( -0.61), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( -0.51), SIMDE_FLOAT32_C( -0.29), SIMDE_FLOAT32_C( -0.71), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( -0.72), SIMDE_FLOAT32_C( -0.17), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( -0.37), SIMDE_FLOAT32_C( -0.51), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( -0.37), SIMDE_FLOAT32_C( -0.47), SIMDE_FLOAT32_C( 0.60) } }, { { SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( 0.78) }, { SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( -0.89), SIMDE_FLOAT32_C( -0.93), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( -0.63), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( -0.87), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( -0.89), SIMDE_FLOAT32_C( 0.67) }, { SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( -0.80), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( -0.59), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -0.83), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( 0.62) } }, { { SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( 0.72) }, { SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( -0.61), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( -0.83), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( -0.32), SIMDE_FLOAT32_C( -0.16), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( 0.77) }, { SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( -0.70), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( -0.16), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( -0.63), SIMDE_FLOAT32_C( 0.70) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 mem; simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_sincos_ps(&mem, a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); simde_test_x86_assert_equal_f32x16(mem, simde_mm512_loadu_ps(test_vec[i].mem), 1); } return 0; } static int test_simde_mm512_mask_sincos_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 mem[16]; const simde_float32 sin_src[16]; const simde_float32 cos_src[16]; const simde__mmask16 k; const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( -0.70), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( -0.32), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.64) }, { SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( -0.25), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( -0.28), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( 0.21) }, { SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( -0.26), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( -0.70), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( -0.32), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.64) }, UINT16_C( 4890), { SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( -0.12), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( -0.05), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( -0.06) }, { SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( -0.69), SIMDE_FLOAT32_C( -0.12), SIMDE_FLOAT32_C( -0.25), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( 0.21) } }, { { SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( -0.51), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.09) }, { SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( 0.24), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( -0.37), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( -0.28), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( 0.84) }, { SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( -0.51), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( -0.49), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( 0.09) }, UINT16_C(18720), { SIMDE_FLOAT32_C( -0.89), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( -0.62), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( -0.05), SIMDE_FLOAT32_C( -0.09), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( -0.01) }, { SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( 0.24), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( -0.37), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( -0.05), SIMDE_FLOAT32_C( -0.28), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 0.84) } }, { { SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( -0.91), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( -0.16), SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( 0.78) }, { SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( 0.24), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( 0.77) }, { SIMDE_FLOAT32_C( -0.22), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( -0.91), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( -0.90), SIMDE_FLOAT32_C( -0.16), SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( 0.78) }, UINT16_C( 4387), { SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( -0.95), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( -0.71), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( -0.73), SIMDE_FLOAT32_C( -0.93), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.11) }, { SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( 0.24), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( -0.80), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( 0.77) } }, { { SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( -0.28), SIMDE_FLOAT32_C( 0.96) }, { SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( -0.50), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( -0.05), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -0.69), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( -0.82), SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( -0.41) }, { SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( -0.37), SIMDE_FLOAT32_C( -0.49), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( -0.47), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( -0.28), SIMDE_FLOAT32_C( 0.39) }, UINT16_C(36556), { SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( -0.83), SIMDE_FLOAT32_C( -0.09), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( -0.69), SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( -0.06), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( -0.26), SIMDE_FLOAT32_C( -0.29) }, { SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( -0.09), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( -0.05), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( -0.69), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( -0.06), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( -0.29) } }, { { SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.06), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( -0.86) }, { SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( -0.48), SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( -0.86) }, { SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( -0.14), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.06), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( -0.86) }, UINT16_C(25479), { SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( -0.49), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( -0.89), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -0.50), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( -0.78) }, { SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( -0.48), SIMDE_FLOAT32_C( -0.51), SIMDE_FLOAT32_C( -0.56), SIMDE_FLOAT32_C( -0.86) } }, { { SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( -0.93), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( -0.89), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.62) }, { SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( -0.04), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( -0.17), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( -0.96), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( 0.20) }, { SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( -0.93), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( -0.82), SIMDE_FLOAT32_C( -0.89), SIMDE_FLOAT32_C( -0.89), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.62) }, UINT16_C( 2690), { SIMDE_FLOAT32_C( -0.91), SIMDE_FLOAT32_C( -0.16), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( -0.20), SIMDE_FLOAT32_C( -0.06), SIMDE_FLOAT32_C( -0.25), SIMDE_FLOAT32_C( -0.48), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( -0.28), SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( -0.52) }, { SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( -0.16), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( -0.04), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( -0.17), SIMDE_FLOAT32_C( -0.46), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( -0.28), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( -0.96), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( 0.20) } }, { { SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( -0.93), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( -0.28), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( -0.80), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( -0.67), SIMDE_FLOAT32_C( 0.90) }, { SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( -0.46), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( -0.28), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -0.04), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 0.54) }, { SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( -0.93), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( -0.28), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( -0.80), SIMDE_FLOAT32_C( 0.19), SIMDE_FLOAT32_C( -0.67), SIMDE_FLOAT32_C( 0.75) }, UINT16_C(41670), { SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( -0.46), SIMDE_FLOAT32_C( -0.46), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( -0.63), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.44) }, { SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( -0.51), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( -0.28), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( -0.59), SIMDE_FLOAT32_C( -0.04), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( -0.37), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 0.43) } }, { { SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( -0.51), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.14), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( -0.06), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( -0.90), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.54) }, { SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( -0.24), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( 0.11) }, { SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( -0.51), SIMDE_FLOAT32_C( -0.23), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.14), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( -0.06), SIMDE_FLOAT32_C( -0.62), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( -0.90), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.54) }, UINT16_C( 7185), { SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( -0.66), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( -0.69), SIMDE_FLOAT32_C( -0.23), SIMDE_FLOAT32_C( -0.90), SIMDE_FLOAT32_C( -0.93), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( -0.69), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( -0.82), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( -0.31) }, { SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( -0.73), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( 0.11) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 mem; simde__m512 sin_src = simde_mm512_loadu_ps(test_vec[i].sin_src); simde__m512 cos_src = simde_mm512_loadu_ps(test_vec[i].cos_src); simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_mask_sincos_ps(&mem, sin_src, cos_src, test_vec[i].k, a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); simde_test_x86_assert_equal_f32x16(mem, simde_mm512_loadu_ps(test_vec[i].mem), 1); } return 0; } static int test_simde_mm512_sincos_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 mem[8]; const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 0.93), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 0.93), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( 0.90), SIMDE_FLOAT64_C( 0.82), SIMDE_FLOAT64_C( 1.00) }, { SIMDE_FLOAT64_C( -0.37), SIMDE_FLOAT64_C( 0.71), SIMDE_FLOAT64_C( 0.37), SIMDE_FLOAT64_C( -0.02), SIMDE_FLOAT64_C( -0.16), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( -0.61), SIMDE_FLOAT64_C( -0.06) }, { SIMDE_FLOAT64_C( -0.36), SIMDE_FLOAT64_C( 0.65), SIMDE_FLOAT64_C( 0.36), SIMDE_FLOAT64_C( -0.02), SIMDE_FLOAT64_C( -0.16), SIMDE_FLOAT64_C( -0.43), SIMDE_FLOAT64_C( -0.57), SIMDE_FLOAT64_C( -0.06) } }, { { SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 0.98), SIMDE_FLOAT64_C( 0.88), SIMDE_FLOAT64_C( 0.86), SIMDE_FLOAT64_C( 0.81), SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.86) }, { SIMDE_FLOAT64_C( 0.70), SIMDE_FLOAT64_C( 0.21), SIMDE_FLOAT64_C( -0.50), SIMDE_FLOAT64_C( 0.54), SIMDE_FLOAT64_C( -0.62), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( -0.09), SIMDE_FLOAT64_C( 0.53) }, { SIMDE_FLOAT64_C( 0.64), SIMDE_FLOAT64_C( 0.21), SIMDE_FLOAT64_C( -0.48), SIMDE_FLOAT64_C( 0.51), SIMDE_FLOAT64_C( -0.58), SIMDE_FLOAT64_C( 0.62), SIMDE_FLOAT64_C( -0.09), SIMDE_FLOAT64_C( 0.51) } }, { { SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.90), SIMDE_FLOAT64_C( 0.73), SIMDE_FLOAT64_C( 0.86), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 0.55), SIMDE_FLOAT64_C( 0.72), SIMDE_FLOAT64_C( 0.93) }, { SIMDE_FLOAT64_C( -0.09), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( 0.75), SIMDE_FLOAT64_C( -0.53), SIMDE_FLOAT64_C( 0.84), SIMDE_FLOAT64_C( -0.99), SIMDE_FLOAT64_C( -0.76), SIMDE_FLOAT64_C( -0.37) }, { SIMDE_FLOAT64_C( -0.09), SIMDE_FLOAT64_C( -0.43), SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( -0.51), SIMDE_FLOAT64_C( 0.74), SIMDE_FLOAT64_C( -0.84), SIMDE_FLOAT64_C( -0.69), SIMDE_FLOAT64_C( -0.36) } }, { { SIMDE_FLOAT64_C( 0.58), SIMDE_FLOAT64_C( 0.86), SIMDE_FLOAT64_C( 0.96), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 0.55), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 0.81), SIMDE_FLOAT64_C( 0.93) }, { SIMDE_FLOAT64_C( -0.95), SIMDE_FLOAT64_C( -0.54), SIMDE_FLOAT64_C( 0.29), SIMDE_FLOAT64_C( -0.71), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( -0.71), SIMDE_FLOAT64_C( 0.63), SIMDE_FLOAT64_C( -0.38) }, { SIMDE_FLOAT64_C( -0.81), SIMDE_FLOAT64_C( -0.51), SIMDE_FLOAT64_C( 0.29), SIMDE_FLOAT64_C( -0.65), SIMDE_FLOAT64_C( 0.84), SIMDE_FLOAT64_C( -0.65), SIMDE_FLOAT64_C( 0.59), SIMDE_FLOAT64_C( -0.37) } }, { { SIMDE_FLOAT64_C( 0.54), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( 0.85), SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( 0.71), SIMDE_FLOAT64_C( 0.97) }, { SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.59), SIMDE_FLOAT64_C( -0.16), SIMDE_FLOAT64_C( 0.55), SIMDE_FLOAT64_C( 0.98), SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( 0.25) }, { SIMDE_FLOAT64_C( -0.84), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( -0.16), SIMDE_FLOAT64_C( 0.52), SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 0.70), SIMDE_FLOAT64_C( 0.25) } }, { { SIMDE_FLOAT64_C( 0.98), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 0.98), SIMDE_FLOAT64_C( 0.84), SIMDE_FLOAT64_C( 0.58), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.99) }, { SIMDE_FLOAT64_C( 0.19), SIMDE_FLOAT64_C( -0.71), SIMDE_FLOAT64_C( -0.21), SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( 0.95), SIMDE_FLOAT64_C( 0.70), SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( -0.13) }, { SIMDE_FLOAT64_C( 0.19), SIMDE_FLOAT64_C( -0.65), SIMDE_FLOAT64_C( -0.21), SIMDE_FLOAT64_C( 0.54), SIMDE_FLOAT64_C( 0.81), SIMDE_FLOAT64_C( 0.64), SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( -0.13) } }, { { SIMDE_FLOAT64_C( 0.73), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( 0.94), SIMDE_FLOAT64_C( 0.61), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( 0.84), SIMDE_FLOAT64_C( 0.96), SIMDE_FLOAT64_C( 1.00) }, { SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( -0.91), SIMDE_FLOAT64_C( -0.14), SIMDE_FLOAT64_C( 0.58), SIMDE_FLOAT64_C( -0.29), SIMDE_FLOAT64_C( -0.09) }, { SIMDE_FLOAT64_C( -0.68), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( 0.33), SIMDE_FLOAT64_C( -0.79), SIMDE_FLOAT64_C( -0.14), SIMDE_FLOAT64_C( 0.55), SIMDE_FLOAT64_C( -0.29), SIMDE_FLOAT64_C( -0.09) } }, { { SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( 0.54), SIMDE_FLOAT64_C( 0.98), SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( 0.94), SIMDE_FLOAT64_C( 0.76) }, { SIMDE_FLOAT64_C( -0.96), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 0.20), SIMDE_FLOAT64_C( -0.97), SIMDE_FLOAT64_C( -0.70), SIMDE_FLOAT64_C( -0.17), SIMDE_FLOAT64_C( -0.35), SIMDE_FLOAT64_C( -0.70) }, { SIMDE_FLOAT64_C( -0.82), SIMDE_FLOAT64_C( -0.84), SIMDE_FLOAT64_C( 0.20), SIMDE_FLOAT64_C( -0.82), SIMDE_FLOAT64_C( -0.64), SIMDE_FLOAT64_C( -0.17), SIMDE_FLOAT64_C( -0.34), SIMDE_FLOAT64_C( -0.64) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d mem; simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_sincos_pd(&mem, a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); simde_test_x86_assert_equal_f64x8(mem, simde_mm512_loadu_pd(test_vec[i].mem), 1); } return 0; } static int test_simde_mm512_mask_sincos_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 mem[8]; const simde_float64 sin_src[8]; const simde_float64 cos_src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 0.89), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( -0.11), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.80), SIMDE_FLOAT64_C( -0.28), SIMDE_FLOAT64_C( 0.94), SIMDE_FLOAT64_C( 0.05) }, { SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 0.35), SIMDE_FLOAT64_C( -0.35), SIMDE_FLOAT64_C( 0.72), SIMDE_FLOAT64_C( -0.08), SIMDE_FLOAT64_C( 0.45), SIMDE_FLOAT64_C( -0.71), SIMDE_FLOAT64_C( -0.51) }, { SIMDE_FLOAT64_C( 0.89), SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( -0.11), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 0.80), SIMDE_FLOAT64_C( -0.28), SIMDE_FLOAT64_C( 0.07), SIMDE_FLOAT64_C( 0.05) }, UINT8_C( 74), { SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( -0.14), SIMDE_FLOAT64_C( 0.55), SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( 0.80), SIMDE_FLOAT64_C( 0.65), SIMDE_FLOAT64_C( -0.35), SIMDE_FLOAT64_C( 0.49) }, { SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( -0.14), SIMDE_FLOAT64_C( -0.35), SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( -0.08), SIMDE_FLOAT64_C( 0.45), SIMDE_FLOAT64_C( -0.34), SIMDE_FLOAT64_C( -0.51) } }, { { SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( 0.09), SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( 0.96), SIMDE_FLOAT64_C( 0.58), SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( 0.87) }, { SIMDE_FLOAT64_C( -0.29), SIMDE_FLOAT64_C( -0.24), SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( -0.66), SIMDE_FLOAT64_C( -0.17), SIMDE_FLOAT64_C( -0.55), SIMDE_FLOAT64_C( -0.83), SIMDE_FLOAT64_C( -0.82) }, { SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( 0.89), SIMDE_FLOAT64_C( 0.09), SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( -0.82), SIMDE_FLOAT64_C( 0.58), SIMDE_FLOAT64_C( -0.55), SIMDE_FLOAT64_C( 0.87) }, UINT8_C( 82), { SIMDE_FLOAT64_C( -0.55), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 0.19), SIMDE_FLOAT64_C( 0.51), SIMDE_FLOAT64_C( -0.28), SIMDE_FLOAT64_C( 0.17), SIMDE_FLOAT64_C( -0.39), SIMDE_FLOAT64_C( 0.59) }, { SIMDE_FLOAT64_C( -0.29), SIMDE_FLOAT64_C( 0.62), SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( -0.66), SIMDE_FLOAT64_C( -0.28), SIMDE_FLOAT64_C( -0.55), SIMDE_FLOAT64_C( -0.38), SIMDE_FLOAT64_C( -0.82) } }, { { SIMDE_FLOAT64_C( 0.90), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( 0.90), SIMDE_FLOAT64_C( -0.10), SIMDE_FLOAT64_C( 0.59), SIMDE_FLOAT64_C( 0.98), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.48) }, { SIMDE_FLOAT64_C( -0.28), SIMDE_FLOAT64_C( 0.66), SIMDE_FLOAT64_C( 0.39), SIMDE_FLOAT64_C( -0.63), SIMDE_FLOAT64_C( -0.68), SIMDE_FLOAT64_C( -0.12), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.08) }, { SIMDE_FLOAT64_C( -0.56), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( 0.90), SIMDE_FLOAT64_C( -0.10), SIMDE_FLOAT64_C( 0.59), SIMDE_FLOAT64_C( -0.92), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.48) }, UINT8_C( 33), { SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( 0.65), SIMDE_FLOAT64_C( -0.25), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.52), SIMDE_FLOAT64_C( 0.22), SIMDE_FLOAT64_C( 0.45), SIMDE_FLOAT64_C( 0.20) }, { SIMDE_FLOAT64_C( -0.43), SIMDE_FLOAT64_C( 0.66), SIMDE_FLOAT64_C( 0.39), SIMDE_FLOAT64_C( -0.63), SIMDE_FLOAT64_C( -0.68), SIMDE_FLOAT64_C( 0.22), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.08) } }, { { SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( -0.33), SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 0.70), SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( 0.63) }, { SIMDE_FLOAT64_C( -0.59), SIMDE_FLOAT64_C( -0.03), SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( 0.58), SIMDE_FLOAT64_C( 0.58), SIMDE_FLOAT64_C( 0.51), SIMDE_FLOAT64_C( -0.69), SIMDE_FLOAT64_C( 0.24) }, { SIMDE_FLOAT64_C( -0.11), SIMDE_FLOAT64_C( -0.33), SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( -0.37), SIMDE_FLOAT64_C( -0.78), SIMDE_FLOAT64_C( 0.17) }, UINT8_C(225), { SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( -0.24), SIMDE_FLOAT64_C( -0.38), SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( -0.76), SIMDE_FLOAT64_C( 0.79), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 0.89) }, { SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( -0.03), SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( 0.58), SIMDE_FLOAT64_C( 0.58), SIMDE_FLOAT64_C( 0.71), SIMDE_FLOAT64_C( 0.62), SIMDE_FLOAT64_C( 0.78) } }, { { SIMDE_FLOAT64_C( -0.47), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( -0.33), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -0.93), SIMDE_FLOAT64_C( 0.91), SIMDE_FLOAT64_C( -0.07), SIMDE_FLOAT64_C( 0.84) }, { SIMDE_FLOAT64_C( -0.46), SIMDE_FLOAT64_C( -0.33), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( -0.88), SIMDE_FLOAT64_C( -0.39), SIMDE_FLOAT64_C( -0.82), SIMDE_FLOAT64_C( 0.09) }, { SIMDE_FLOAT64_C( -0.47), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( -0.33), SIMDE_FLOAT64_C( -0.96), SIMDE_FLOAT64_C( -0.93), SIMDE_FLOAT64_C( 0.91), SIMDE_FLOAT64_C( -0.07), SIMDE_FLOAT64_C( -0.26) }, UINT8_C(136), { SIMDE_FLOAT64_C( -0.29), SIMDE_FLOAT64_C( -0.50), SIMDE_FLOAT64_C( -0.91), SIMDE_FLOAT64_C( -0.08), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( -0.37), SIMDE_FLOAT64_C( -0.96), SIMDE_FLOAT64_C( -0.57) }, { SIMDE_FLOAT64_C( -0.46), SIMDE_FLOAT64_C( -0.33), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( -0.08), SIMDE_FLOAT64_C( -0.88), SIMDE_FLOAT64_C( -0.39), SIMDE_FLOAT64_C( -0.82), SIMDE_FLOAT64_C( -0.54) } }, { { SIMDE_FLOAT64_C( 0.98), SIMDE_FLOAT64_C( 0.94), SIMDE_FLOAT64_C( 0.61), SIMDE_FLOAT64_C( -0.40), SIMDE_FLOAT64_C( 0.52), SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( 0.14), SIMDE_FLOAT64_C( 0.28) }, { SIMDE_FLOAT64_C( 0.24), SIMDE_FLOAT64_C( 0.15), SIMDE_FLOAT64_C( -0.32), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( -0.43), SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( 0.49) }, { SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( 0.61), SIMDE_FLOAT64_C( -0.40), SIMDE_FLOAT64_C( 0.52), SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( 0.14), SIMDE_FLOAT64_C( 0.28) }, UINT8_C( 3), { SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( 0.27), SIMDE_FLOAT64_C( -0.89), SIMDE_FLOAT64_C( -0.92), SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( -0.18), SIMDE_FLOAT64_C( -0.42) }, { SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( 0.33), SIMDE_FLOAT64_C( -0.32), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( -0.43), SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( 0.49) } }, { { SIMDE_FLOAT64_C( 0.89), SIMDE_FLOAT64_C( 0.66), SIMDE_FLOAT64_C( 0.74), SIMDE_FLOAT64_C( 0.93), SIMDE_FLOAT64_C( 0.30), SIMDE_FLOAT64_C( 0.22), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( 0.97) }, { SIMDE_FLOAT64_C( -0.18), SIMDE_FLOAT64_C( 0.74), SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( 0.45), SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( -0.32), SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( -0.08) }, { SIMDE_FLOAT64_C( 0.36), SIMDE_FLOAT64_C( 0.72), SIMDE_FLOAT64_C( 0.74), SIMDE_FLOAT64_C( 0.93), SIMDE_FLOAT64_C( 0.30), SIMDE_FLOAT64_C( 0.22), SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( -0.36) }, UINT8_C(195), { SIMDE_FLOAT64_C( -0.48), SIMDE_FLOAT64_C( -0.85), SIMDE_FLOAT64_C( 0.53), SIMDE_FLOAT64_C( 0.66), SIMDE_FLOAT64_C( 0.43), SIMDE_FLOAT64_C( -0.11), SIMDE_FLOAT64_C( -0.17), SIMDE_FLOAT64_C( -0.23) }, { SIMDE_FLOAT64_C( -0.46), SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( 0.45), SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( -0.32), SIMDE_FLOAT64_C( -0.17), SIMDE_FLOAT64_C( -0.23) } }, { { SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( 0.15), SIMDE_FLOAT64_C( 0.27), SIMDE_FLOAT64_C( -0.64), SIMDE_FLOAT64_C( 0.84), SIMDE_FLOAT64_C( -0.80), SIMDE_FLOAT64_C( 0.72), SIMDE_FLOAT64_C( 0.56) }, { SIMDE_FLOAT64_C( -0.84), SIMDE_FLOAT64_C( -0.06), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( -0.11), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 0.43), SIMDE_FLOAT64_C( 0.70), SIMDE_FLOAT64_C( 0.50) }, { SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( 0.15), SIMDE_FLOAT64_C( 0.27), SIMDE_FLOAT64_C( -0.64), SIMDE_FLOAT64_C( 0.84), SIMDE_FLOAT64_C( -0.80), SIMDE_FLOAT64_C( 0.72), SIMDE_FLOAT64_C( 0.56) }, UINT8_C( 0), { SIMDE_FLOAT64_C( 0.65), SIMDE_FLOAT64_C( -0.14), SIMDE_FLOAT64_C( 0.16), SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( 0.49), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( -0.92), SIMDE_FLOAT64_C( 0.64) }, { SIMDE_FLOAT64_C( -0.84), SIMDE_FLOAT64_C( -0.06), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( -0.11), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 0.43), SIMDE_FLOAT64_C( 0.70), SIMDE_FLOAT64_C( 0.50) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d mem; simde__m512d sin_src = simde_mm512_loadu_pd(test_vec[i].sin_src); simde__m512d cos_src = simde_mm512_loadu_pd(test_vec[i].cos_src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_mask_sincos_pd(&mem, sin_src, cos_src, test_vec[i].k, a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); simde_test_x86_assert_equal_f64x8(mem, simde_mm512_loadu_pd(test_vec[i].mem), 1); } return 0; } static int test_simde_mm_sind_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -754.38), SIMDE_FLOAT64_C( 346.63)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.56), SIMDE_FLOAT64_C( -0.23)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( 39.01)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( 0.63)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( 34.06)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.89), SIMDE_FLOAT64_C( 0.56)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( 670.24)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( -0.76)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 467.76)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.95)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( 422.21)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.96), SIMDE_FLOAT64_C( 0.88)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 571.46)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -0.52)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( -686.13)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 0.56)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_sind_pd(test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_sind_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( -754.38), SIMDE_FLOAT32_C( 346.63)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( -0.56), SIMDE_FLOAT32_C( -0.23)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 467.76)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.95)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( 696.87)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( -0.63), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( -0.39)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -660.80)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -0.80), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.86)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -770.35), SIMDE_FLOAT32_C( 443.48), SIMDE_FLOAT32_C( -583.60), SIMDE_FLOAT32_C( 380.46), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 993.90), SIMDE_FLOAT32_C( 28.08), SIMDE_FLOAT32_C( 841.21)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.86)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( 395.92), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 339.21), SIMDE_FLOAT32_C( 532.35), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( 780.64), SIMDE_FLOAT32_C( -30.79)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.47), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( -0.90), SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( -0.51)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -203.65), SIMDE_FLOAT32_C( -80.73), SIMDE_FLOAT32_C( 336.73), SIMDE_FLOAT32_C( -944.78), SIMDE_FLOAT32_C( -747.59), SIMDE_FLOAT32_C( -767.23), SIMDE_FLOAT32_C( -554.19), SIMDE_FLOAT32_C( 398.82)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( -0.46), SIMDE_FLOAT32_C( -0.73), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( 0.63)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 469.66), SIMDE_FLOAT32_C( 680.02), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 818.66), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 600.47), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( 254.31)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( -0.17), SIMDE_FLOAT32_C( -0.87), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( -0.96)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_sind_ps(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_sind_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( 39.01), SIMDE_FLOAT64_C( -754.38), SIMDE_FLOAT64_C( 346.63)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( 0.63), SIMDE_FLOAT64_C( -0.56), SIMDE_FLOAT64_C( -0.23)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( 670.24), SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( 34.06)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( -0.76), SIMDE_FLOAT64_C( 0.89), SIMDE_FLOAT64_C( 0.56)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 467.76)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.96), SIMDE_FLOAT64_C( 0.88), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.95)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( -686.13), SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 571.46)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -0.52)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -860.95), SIMDE_FLOAT64_C( -417.54), SIMDE_FLOAT64_C( 696.87)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.82), SIMDE_FLOAT64_C( -0.63), SIMDE_FLOAT64_C( -0.84), SIMDE_FLOAT64_C( -0.39)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( 28.47), SIMDE_FLOAT64_C( -384.03), SIMDE_FLOAT64_C( -923.64)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 0.48), SIMDE_FLOAT64_C( -0.41), SIMDE_FLOAT64_C( 0.40)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 261.31), SIMDE_FLOAT64_C( -212.54), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -660.80)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.99), SIMDE_FLOAT64_C( 0.54), SIMDE_FLOAT64_C( 0.97), SIMDE_FLOAT64_C( 0.86)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 178.20), SIMDE_FLOAT64_C( -450.67), SIMDE_FLOAT64_C( 233.37), SIMDE_FLOAT64_C( 687.09)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -0.80), SIMDE_FLOAT64_C( -0.54)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_sind_pd(test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_sind_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 467.76), SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( -754.38), SIMDE_FLOAT32_C( 346.63)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( -0.56), SIMDE_FLOAT32_C( -0.23)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -660.80), SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( 696.87)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -0.80), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( -0.63), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( -0.39)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( 395.92), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 339.21), SIMDE_FLOAT32_C( 532.35), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( 780.64), SIMDE_FLOAT32_C( -30.79), SIMDE_FLOAT32_C( -770.35), SIMDE_FLOAT32_C( 443.48), SIMDE_FLOAT32_C( -583.60), SIMDE_FLOAT32_C( 380.46), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 993.90), SIMDE_FLOAT32_C( 28.08), SIMDE_FLOAT32_C( 841.21)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.47), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( -0.90), SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( -0.51), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.86)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 469.66), SIMDE_FLOAT32_C( 680.02), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 818.66), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 600.47), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( 254.31), SIMDE_FLOAT32_C( -203.65), SIMDE_FLOAT32_C( -80.73), SIMDE_FLOAT32_C( 336.73), SIMDE_FLOAT32_C( -944.78), SIMDE_FLOAT32_C( -747.59), SIMDE_FLOAT32_C( -767.23), SIMDE_FLOAT32_C( -554.19), SIMDE_FLOAT32_C( 398.82)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( -0.17), SIMDE_FLOAT32_C( -0.87), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( -0.96), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( -0.46), SIMDE_FLOAT32_C( -0.73), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( 0.63)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -178.99), SIMDE_FLOAT32_C( 758.79), SIMDE_FLOAT32_C( 324.62), SIMDE_FLOAT32_C( 343.48), SIMDE_FLOAT32_C( -874.31), SIMDE_FLOAT32_C( -797.92), SIMDE_FLOAT32_C( -328.54), SIMDE_FLOAT32_C( -525.83), SIMDE_FLOAT32_C( -192.31), SIMDE_FLOAT32_C( -822.65), SIMDE_FLOAT32_C( 561.36), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( -70.91), SIMDE_FLOAT32_C( 543.35), SIMDE_FLOAT32_C( 120.65), SIMDE_FLOAT32_C( -171.51)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( -0.28), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( -0.24), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( -0.90), SIMDE_FLOAT32_C( -0.95), SIMDE_FLOAT32_C( -0.06), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( -0.15)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 690.12), SIMDE_FLOAT32_C( 840.65), SIMDE_FLOAT32_C( -21.09), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( -448.89), SIMDE_FLOAT32_C( 731.49), SIMDE_FLOAT32_C( 505.79), SIMDE_FLOAT32_C( 623.70), SIMDE_FLOAT32_C( 831.02), SIMDE_FLOAT32_C( 140.67), SIMDE_FLOAT32_C( 977.36), SIMDE_FLOAT32_C( -906.16), SIMDE_FLOAT32_C( 331.34), SIMDE_FLOAT32_C( 99.93), SIMDE_FLOAT32_C( 462.95), SIMDE_FLOAT32_C( -738.19)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.50), SIMDE_FLOAT32_C( 0.86), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( -0.48), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( -0.31)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -769.09), SIMDE_FLOAT32_C( 822.06), SIMDE_FLOAT32_C( -573.81), SIMDE_FLOAT32_C( -997.63), SIMDE_FLOAT32_C( -337.60), SIMDE_FLOAT32_C( 923.64), SIMDE_FLOAT32_C( 293.64), SIMDE_FLOAT32_C( -768.12), SIMDE_FLOAT32_C( -576.22), SIMDE_FLOAT32_C( -67.64), SIMDE_FLOAT32_C( 710.38), SIMDE_FLOAT32_C( 977.49), SIMDE_FLOAT32_C( -756.42), SIMDE_FLOAT32_C( 424.81), SIMDE_FLOAT32_C( 27.25), SIMDE_FLOAT32_C( -95.15)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -0.17), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( -0.59), SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( -1.00)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -438.19), SIMDE_FLOAT32_C( -359.76), SIMDE_FLOAT32_C( -752.43), SIMDE_FLOAT32_C( -33.67), SIMDE_FLOAT32_C( 932.66), SIMDE_FLOAT32_C( 7.27), SIMDE_FLOAT32_C( -327.22), SIMDE_FLOAT32_C( -125.20), SIMDE_FLOAT32_C( -182.45), SIMDE_FLOAT32_C( 39.93), SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( 394.67), SIMDE_FLOAT32_C( 14.34), SIMDE_FLOAT32_C( -304.73), SIMDE_FLOAT32_C( 916.26), SIMDE_FLOAT32_C( -696.69)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( -0.82), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( -0.28), SIMDE_FLOAT32_C( 0.40)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_sind_ps(test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_sind_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 src; simde__mmask16 k; simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -660.80), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( 696.87), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( 467.76), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( 346.63)), UINT16_C(41466), simde_mm512_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( -754.38)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( -660.80), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 1.00), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 346.63)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 469.66), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( -203.65), SIMDE_FLOAT32_C( 336.73), SIMDE_FLOAT32_C( -747.59), SIMDE_FLOAT32_C( -554.19), SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 532.35), SIMDE_FLOAT32_C( 780.64), SIMDE_FLOAT32_C( -770.35), SIMDE_FLOAT32_C( -583.60), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 28.08)), UINT16_C(36797), simde_mm512_set_ps(SIMDE_FLOAT32_C( -171.51), SIMDE_FLOAT32_C( 680.02), SIMDE_FLOAT32_C( 818.66), SIMDE_FLOAT32_C( 600.47), SIMDE_FLOAT32_C( 254.31), SIMDE_FLOAT32_C( -80.73), SIMDE_FLOAT32_C( -944.78), SIMDE_FLOAT32_C( -767.23), SIMDE_FLOAT32_C( 398.82), SIMDE_FLOAT32_C( 395.92), SIMDE_FLOAT32_C( 339.21), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( -30.79), SIMDE_FLOAT32_C( 443.48), SIMDE_FLOAT32_C( 380.46), SIMDE_FLOAT32_C( 993.90)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( -0.96), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( -0.73), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( -0.51), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( -1.00)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -95.15), SIMDE_FLOAT32_C( 840.65), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( 731.49), SIMDE_FLOAT32_C( 623.70), SIMDE_FLOAT32_C( 140.67), SIMDE_FLOAT32_C( -906.16), SIMDE_FLOAT32_C( 99.93), SIMDE_FLOAT32_C( -738.19), SIMDE_FLOAT32_C( 758.79), SIMDE_FLOAT32_C( 343.48), SIMDE_FLOAT32_C( -797.92), SIMDE_FLOAT32_C( -525.83), SIMDE_FLOAT32_C( -822.65), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( 543.35)), UINT16_C(16804), simde_mm512_set_ps(SIMDE_FLOAT32_C( 27.25), SIMDE_FLOAT32_C( 690.12), SIMDE_FLOAT32_C( -21.09), SIMDE_FLOAT32_C( -448.89), SIMDE_FLOAT32_C( 505.79), SIMDE_FLOAT32_C( 831.02), SIMDE_FLOAT32_C( 977.36), SIMDE_FLOAT32_C( 331.34), SIMDE_FLOAT32_C( 462.95), SIMDE_FLOAT32_C( -178.99), SIMDE_FLOAT32_C( 324.62), SIMDE_FLOAT32_C( -874.31), SIMDE_FLOAT32_C( -328.54), SIMDE_FLOAT32_C( -192.31), SIMDE_FLOAT32_C( 561.36), SIMDE_FLOAT32_C( -70.91)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -95.15), SIMDE_FLOAT32_C( -0.50), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( 731.49), SIMDE_FLOAT32_C( 623.70), SIMDE_FLOAT32_C( 140.67), SIMDE_FLOAT32_C( -906.16), SIMDE_FLOAT32_C( -0.48), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 758.79), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( -797.92), SIMDE_FLOAT32_C( -525.83), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( 543.35)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -348.70), SIMDE_FLOAT32_C( -438.19), SIMDE_FLOAT32_C( -752.43), SIMDE_FLOAT32_C( 932.66), SIMDE_FLOAT32_C( -327.22), SIMDE_FLOAT32_C( -182.45), SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( 14.34), SIMDE_FLOAT32_C( 916.26), SIMDE_FLOAT32_C( -769.09), SIMDE_FLOAT32_C( -573.81), SIMDE_FLOAT32_C( -337.60), SIMDE_FLOAT32_C( 293.64), SIMDE_FLOAT32_C( -576.22), SIMDE_FLOAT32_C( 710.38), SIMDE_FLOAT32_C( -756.42)), UINT16_C( 2107), simde_mm512_set_ps(SIMDE_FLOAT32_C( 897.27), SIMDE_FLOAT32_C( -197.89), SIMDE_FLOAT32_C( -359.76), SIMDE_FLOAT32_C( -33.67), SIMDE_FLOAT32_C( 7.27), SIMDE_FLOAT32_C( -125.20), SIMDE_FLOAT32_C( 39.93), SIMDE_FLOAT32_C( 394.67), SIMDE_FLOAT32_C( -304.73), SIMDE_FLOAT32_C( -696.69), SIMDE_FLOAT32_C( 822.06), SIMDE_FLOAT32_C( -997.63), SIMDE_FLOAT32_C( 923.64), SIMDE_FLOAT32_C( -768.12), SIMDE_FLOAT32_C( -67.64), SIMDE_FLOAT32_C( 977.49)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -348.70), SIMDE_FLOAT32_C( -438.19), SIMDE_FLOAT32_C( -752.43), SIMDE_FLOAT32_C( 932.66), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( -182.45), SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( 14.34), SIMDE_FLOAT32_C( 916.26), SIMDE_FLOAT32_C( -769.09), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( -576.22), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -0.98)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -15.61), SIMDE_FLOAT32_C( -737.13), SIMDE_FLOAT32_C( -314.93), SIMDE_FLOAT32_C( 177.92), SIMDE_FLOAT32_C( 345.93), SIMDE_FLOAT32_C( 888.71), SIMDE_FLOAT32_C( 915.71), SIMDE_FLOAT32_C( 133.52), SIMDE_FLOAT32_C( 484.94), SIMDE_FLOAT32_C( -598.06), SIMDE_FLOAT32_C( -791.07), SIMDE_FLOAT32_C( -765.93), SIMDE_FLOAT32_C( 221.37), SIMDE_FLOAT32_C( -788.36), SIMDE_FLOAT32_C( -775.04), SIMDE_FLOAT32_C( 440.64)), UINT16_C(22274), simde_mm512_set_ps(SIMDE_FLOAT32_C( 496.57), SIMDE_FLOAT32_C( 915.19), SIMDE_FLOAT32_C( -718.40), SIMDE_FLOAT32_C( 159.97), SIMDE_FLOAT32_C( -861.01), SIMDE_FLOAT32_C( 426.61), SIMDE_FLOAT32_C( 932.11), SIMDE_FLOAT32_C( 110.36), SIMDE_FLOAT32_C( 826.84), SIMDE_FLOAT32_C( -76.75), SIMDE_FLOAT32_C( 237.58), SIMDE_FLOAT32_C( -378.50), SIMDE_FLOAT32_C( -601.68), SIMDE_FLOAT32_C( -623.50), SIMDE_FLOAT32_C( -942.47), SIMDE_FLOAT32_C( 475.51)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -15.61), SIMDE_FLOAT32_C( -0.26), SIMDE_FLOAT32_C( -314.93), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 345.93), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 484.94), SIMDE_FLOAT32_C( -598.06), SIMDE_FLOAT32_C( -791.07), SIMDE_FLOAT32_C( -765.93), SIMDE_FLOAT32_C( 221.37), SIMDE_FLOAT32_C( -788.36), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 440.64)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 883.05), SIMDE_FLOAT32_C( -807.28), SIMDE_FLOAT32_C( -70.05), SIMDE_FLOAT32_C( -784.34), SIMDE_FLOAT32_C( 92.52), SIMDE_FLOAT32_C( 206.60), SIMDE_FLOAT32_C( 834.60), SIMDE_FLOAT32_C( -65.60), SIMDE_FLOAT32_C( -286.07), SIMDE_FLOAT32_C( -212.86), SIMDE_FLOAT32_C( -318.38), SIMDE_FLOAT32_C( 783.48), SIMDE_FLOAT32_C( -628.82), SIMDE_FLOAT32_C( 556.35), SIMDE_FLOAT32_C( 439.43), SIMDE_FLOAT32_C( 434.03)), UINT16_C(27396), simde_mm512_set_ps(SIMDE_FLOAT32_C( -964.25), SIMDE_FLOAT32_C( -406.33), SIMDE_FLOAT32_C( -743.66), SIMDE_FLOAT32_C( -764.58), SIMDE_FLOAT32_C( 789.89), SIMDE_FLOAT32_C( 4.83), SIMDE_FLOAT32_C( -818.54), SIMDE_FLOAT32_C( 161.06), SIMDE_FLOAT32_C( 579.25), SIMDE_FLOAT32_C( -11.78), SIMDE_FLOAT32_C( -308.52), SIMDE_FLOAT32_C( -719.57), SIMDE_FLOAT32_C( 334.00), SIMDE_FLOAT32_C( 274.71), SIMDE_FLOAT32_C( -916.82), SIMDE_FLOAT32_C( -490.00)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 883.05), SIMDE_FLOAT32_C( -0.72), SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( -784.34), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 206.60), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( -286.07), SIMDE_FLOAT32_C( -212.86), SIMDE_FLOAT32_C( -318.38), SIMDE_FLOAT32_C( 783.48), SIMDE_FLOAT32_C( -628.82), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 439.43), SIMDE_FLOAT32_C( 434.03)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 529.63), SIMDE_FLOAT32_C( -24.89), SIMDE_FLOAT32_C( -967.78), SIMDE_FLOAT32_C( 638.94), SIMDE_FLOAT32_C( 450.90), SIMDE_FLOAT32_C( -771.54), SIMDE_FLOAT32_C( 105.79), SIMDE_FLOAT32_C( 590.10), SIMDE_FLOAT32_C( 30.91), SIMDE_FLOAT32_C( 635.35), SIMDE_FLOAT32_C( -84.00), SIMDE_FLOAT32_C( 80.04), SIMDE_FLOAT32_C( -709.46), SIMDE_FLOAT32_C( 607.86), SIMDE_FLOAT32_C( 394.58), SIMDE_FLOAT32_C( -889.11)), UINT16_C( 953), simde_mm512_set_ps(SIMDE_FLOAT32_C( 18.75), SIMDE_FLOAT32_C( 809.05), SIMDE_FLOAT32_C( 144.05), SIMDE_FLOAT32_C( -427.72), SIMDE_FLOAT32_C( 308.28), SIMDE_FLOAT32_C( -177.05), SIMDE_FLOAT32_C( -457.77), SIMDE_FLOAT32_C( 678.24), SIMDE_FLOAT32_C( 66.05), SIMDE_FLOAT32_C( -267.71), SIMDE_FLOAT32_C( 117.28), SIMDE_FLOAT32_C( -576.80), SIMDE_FLOAT32_C( -38.39), SIMDE_FLOAT32_C( -250.14), SIMDE_FLOAT32_C( -53.92), SIMDE_FLOAT32_C( 91.94)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 529.63), SIMDE_FLOAT32_C( -24.89), SIMDE_FLOAT32_C( -967.78), SIMDE_FLOAT32_C( 638.94), SIMDE_FLOAT32_C( 450.90), SIMDE_FLOAT32_C( -771.54), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( -0.67), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 635.35), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( -0.62), SIMDE_FLOAT32_C( 607.86), SIMDE_FLOAT32_C( 394.58), SIMDE_FLOAT32_C( 1.00)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -788.39), SIMDE_FLOAT32_C( 330.43), SIMDE_FLOAT32_C( -493.41), SIMDE_FLOAT32_C( 822.72), SIMDE_FLOAT32_C( 956.68), SIMDE_FLOAT32_C( 954.62), SIMDE_FLOAT32_C( 825.49), SIMDE_FLOAT32_C( -816.27), SIMDE_FLOAT32_C( -209.34), SIMDE_FLOAT32_C( -933.21), SIMDE_FLOAT32_C( -728.70), SIMDE_FLOAT32_C( -420.06), SIMDE_FLOAT32_C( 100.32), SIMDE_FLOAT32_C( 103.15), SIMDE_FLOAT32_C( 439.77), SIMDE_FLOAT32_C( -204.33)), UINT16_C(12713), simde_mm512_set_ps(SIMDE_FLOAT32_C( -841.43), SIMDE_FLOAT32_C( -14.16), SIMDE_FLOAT32_C( 824.88), SIMDE_FLOAT32_C( 793.63), SIMDE_FLOAT32_C( -736.75), SIMDE_FLOAT32_C( -310.57), SIMDE_FLOAT32_C( 728.87), SIMDE_FLOAT32_C( -350.72), SIMDE_FLOAT32_C( 60.89), SIMDE_FLOAT32_C( 109.81), SIMDE_FLOAT32_C( 715.94), SIMDE_FLOAT32_C( -250.60), SIMDE_FLOAT32_C( 944.14), SIMDE_FLOAT32_C( 361.85), SIMDE_FLOAT32_C( -13.07), SIMDE_FLOAT32_C( 852.60)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -788.39), SIMDE_FLOAT32_C( 330.43), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 956.68), SIMDE_FLOAT32_C( 954.62), SIMDE_FLOAT32_C( 825.49), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( -933.21), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( -420.06), SIMDE_FLOAT32_C( -0.70), SIMDE_FLOAT32_C( 103.15), SIMDE_FLOAT32_C( 439.77), SIMDE_FLOAT32_C( 0.74)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mask_sind_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_sind_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( 670.24), SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( 34.06), SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( 39.01), SIMDE_FLOAT64_C( -754.38), SIMDE_FLOAT64_C( 346.63)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( -0.76), SIMDE_FLOAT64_C( 0.89), SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( 0.63), SIMDE_FLOAT64_C( -0.56), SIMDE_FLOAT64_C( -0.23)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( -686.13), SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 571.46), SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 467.76)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -0.52), SIMDE_FLOAT64_C( 0.96), SIMDE_FLOAT64_C( 0.88), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( 0.95)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( 28.47), SIMDE_FLOAT64_C( -384.03), SIMDE_FLOAT64_C( -923.64), SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -860.95), SIMDE_FLOAT64_C( -417.54), SIMDE_FLOAT64_C( 696.87)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 0.48), SIMDE_FLOAT64_C( -0.41), SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( 0.82), SIMDE_FLOAT64_C( -0.63), SIMDE_FLOAT64_C( -0.84), SIMDE_FLOAT64_C( -0.39)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 178.20), SIMDE_FLOAT64_C( -450.67), SIMDE_FLOAT64_C( 233.37), SIMDE_FLOAT64_C( 687.09), SIMDE_FLOAT64_C( 261.31), SIMDE_FLOAT64_C( -212.54), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -660.80)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -0.80), SIMDE_FLOAT64_C( -0.54), SIMDE_FLOAT64_C( -0.99), SIMDE_FLOAT64_C( 0.54), SIMDE_FLOAT64_C( 0.97), SIMDE_FLOAT64_C( 0.86)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -770.35), SIMDE_FLOAT64_C( 443.48), SIMDE_FLOAT64_C( -583.60), SIMDE_FLOAT64_C( 380.46), SIMDE_FLOAT64_C( -770.72), SIMDE_FLOAT64_C( 993.90), SIMDE_FLOAT64_C( 28.08), SIMDE_FLOAT64_C( 841.21)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( 0.35), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( 0.86)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -387.90), SIMDE_FLOAT64_C( 395.92), SIMDE_FLOAT64_C( 655.87), SIMDE_FLOAT64_C( 339.21), SIMDE_FLOAT64_C( 532.35), SIMDE_FLOAT64_C( -263.99), SIMDE_FLOAT64_C( 780.64), SIMDE_FLOAT64_C( -30.79)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.47), SIMDE_FLOAT64_C( 0.59), SIMDE_FLOAT64_C( -0.90), SIMDE_FLOAT64_C( -0.35), SIMDE_FLOAT64_C( 0.13), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( 0.87), SIMDE_FLOAT64_C( -0.51)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -203.65), SIMDE_FLOAT64_C( -80.73), SIMDE_FLOAT64_C( 336.73), SIMDE_FLOAT64_C( -944.78), SIMDE_FLOAT64_C( -747.59), SIMDE_FLOAT64_C( -767.23), SIMDE_FLOAT64_C( -554.19), SIMDE_FLOAT64_C( 398.82)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( -0.99), SIMDE_FLOAT64_C( -0.40), SIMDE_FLOAT64_C( 0.70), SIMDE_FLOAT64_C( -0.46), SIMDE_FLOAT64_C( -0.73), SIMDE_FLOAT64_C( 0.25), SIMDE_FLOAT64_C( 0.63)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 469.66), SIMDE_FLOAT64_C( 680.02), SIMDE_FLOAT64_C( -148.69), SIMDE_FLOAT64_C( 818.66), SIMDE_FLOAT64_C( 910.03), SIMDE_FLOAT64_C( 600.47), SIMDE_FLOAT64_C( 791.23), SIMDE_FLOAT64_C( 254.31)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.94), SIMDE_FLOAT64_C( -0.64), SIMDE_FLOAT64_C( -0.52), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( -0.17), SIMDE_FLOAT64_C( -0.87), SIMDE_FLOAT64_C( 0.95), SIMDE_FLOAT64_C( -0.96)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_sind_pd(test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_sind_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d src; simde__mmask8 k; simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( -686.13), SIMDE_FLOAT64_C( 571.46), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( 467.76), SIMDE_FLOAT64_C( 670.24), SIMDE_FLOAT64_C( 34.06), SIMDE_FLOAT64_C( 39.01), SIMDE_FLOAT64_C( 346.63)), UINT8_C(139), simde_mm512_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( -754.38)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 571.46), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( 467.76), SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( 34.06), SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( -0.56)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 178.20), SIMDE_FLOAT64_C( 233.37), SIMDE_FLOAT64_C( 261.31), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( -384.03), SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -417.54)), UINT8_C(229), simde_mm512_set_pd(SIMDE_FLOAT64_C( 841.21), SIMDE_FLOAT64_C( -450.67), SIMDE_FLOAT64_C( 687.09), SIMDE_FLOAT64_C( -212.54), SIMDE_FLOAT64_C( -660.80), SIMDE_FLOAT64_C( 28.47), SIMDE_FLOAT64_C( -923.64), SIMDE_FLOAT64_C( -860.95)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.86), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( -0.54), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( 0.48), SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -0.63)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 398.82), SIMDE_FLOAT64_C( 395.92), SIMDE_FLOAT64_C( 339.21), SIMDE_FLOAT64_C( -263.99), SIMDE_FLOAT64_C( -30.79), SIMDE_FLOAT64_C( 443.48), SIMDE_FLOAT64_C( 380.46), SIMDE_FLOAT64_C( 993.90)), UINT8_C(253), simde_mm512_set_pd(SIMDE_FLOAT64_C( -554.19), SIMDE_FLOAT64_C( -387.90), SIMDE_FLOAT64_C( 655.87), SIMDE_FLOAT64_C( 532.35), SIMDE_FLOAT64_C( 780.64), SIMDE_FLOAT64_C( -770.35), SIMDE_FLOAT64_C( -583.60), SIMDE_FLOAT64_C( -770.72)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.25), SIMDE_FLOAT64_C( -0.47), SIMDE_FLOAT64_C( -0.90), SIMDE_FLOAT64_C( 0.13), SIMDE_FLOAT64_C( 0.87), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( 380.46), SIMDE_FLOAT64_C( -0.77)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 120.65), SIMDE_FLOAT64_C( 469.66), SIMDE_FLOAT64_C( -148.69), SIMDE_FLOAT64_C( 910.03), SIMDE_FLOAT64_C( 791.23), SIMDE_FLOAT64_C( -203.65), SIMDE_FLOAT64_C( 336.73), SIMDE_FLOAT64_C( -747.59)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 543.35), SIMDE_FLOAT64_C( -171.51), SIMDE_FLOAT64_C( 680.02), SIMDE_FLOAT64_C( 818.66), SIMDE_FLOAT64_C( 600.47), SIMDE_FLOAT64_C( 254.31), SIMDE_FLOAT64_C( -80.73), SIMDE_FLOAT64_C( -944.78)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 120.65), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( -148.69), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( -0.87), SIMDE_FLOAT64_C( -0.96), SIMDE_FLOAT64_C( 336.73), SIMDE_FLOAT64_C( 0.70)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 99.93), SIMDE_FLOAT64_C( -738.19), SIMDE_FLOAT64_C( 758.79), SIMDE_FLOAT64_C( 343.48), SIMDE_FLOAT64_C( -797.92), SIMDE_FLOAT64_C( -525.83), SIMDE_FLOAT64_C( -822.65), SIMDE_FLOAT64_C( 655.67)), UINT8_C(145), simde_mm512_set_pd(SIMDE_FLOAT64_C( 331.34), SIMDE_FLOAT64_C( 462.95), SIMDE_FLOAT64_C( -178.99), SIMDE_FLOAT64_C( 324.62), SIMDE_FLOAT64_C( -874.31), SIMDE_FLOAT64_C( -328.54), SIMDE_FLOAT64_C( -192.31), SIMDE_FLOAT64_C( 561.36)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.48), SIMDE_FLOAT64_C( -738.19), SIMDE_FLOAT64_C( 758.79), SIMDE_FLOAT64_C( -0.58), SIMDE_FLOAT64_C( -797.92), SIMDE_FLOAT64_C( -525.83), SIMDE_FLOAT64_C( -822.65), SIMDE_FLOAT64_C( -0.36)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -756.42), SIMDE_FLOAT64_C( 27.25), SIMDE_FLOAT64_C( 690.12), SIMDE_FLOAT64_C( -21.09), SIMDE_FLOAT64_C( -448.89), SIMDE_FLOAT64_C( 505.79), SIMDE_FLOAT64_C( 831.02), SIMDE_FLOAT64_C( 977.36)), UINT8_C( 75), simde_mm512_set_pd(SIMDE_FLOAT64_C( 977.49), SIMDE_FLOAT64_C( 424.81), SIMDE_FLOAT64_C( -95.15), SIMDE_FLOAT64_C( 840.65), SIMDE_FLOAT64_C( -591.56), SIMDE_FLOAT64_C( 731.49), SIMDE_FLOAT64_C( 623.70), SIMDE_FLOAT64_C( 140.67)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -756.42), SIMDE_FLOAT64_C( 0.90), SIMDE_FLOAT64_C( 690.12), SIMDE_FLOAT64_C( -21.09), SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( 505.79), SIMDE_FLOAT64_C( -0.99), SIMDE_FLOAT64_C( 0.63)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 394.67), SIMDE_FLOAT64_C( -304.73), SIMDE_FLOAT64_C( -696.69), SIMDE_FLOAT64_C( 822.06), SIMDE_FLOAT64_C( -997.63), SIMDE_FLOAT64_C( 923.64), SIMDE_FLOAT64_C( -768.12), SIMDE_FLOAT64_C( -67.64)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 510.85), SIMDE_FLOAT64_C( 14.34), SIMDE_FLOAT64_C( 916.26), SIMDE_FLOAT64_C( -769.09), SIMDE_FLOAT64_C( -573.81), SIMDE_FLOAT64_C( -337.60), SIMDE_FLOAT64_C( 293.64), SIMDE_FLOAT64_C( -576.22)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 394.67), SIMDE_FLOAT64_C( 0.25), SIMDE_FLOAT64_C( -696.69), SIMDE_FLOAT64_C( -0.76), SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( 0.38), SIMDE_FLOAT64_C( -768.12), SIMDE_FLOAT64_C( 0.59)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 475.51), SIMDE_FLOAT64_C( 936.65), SIMDE_FLOAT64_C( -348.70), SIMDE_FLOAT64_C( -438.19), SIMDE_FLOAT64_C( -752.43), SIMDE_FLOAT64_C( 932.66), SIMDE_FLOAT64_C( -327.22), SIMDE_FLOAT64_C( -182.45)), UINT8_C(213), simde_mm512_set_pd(SIMDE_FLOAT64_C( -775.04), SIMDE_FLOAT64_C( 440.64), SIMDE_FLOAT64_C( 897.27), SIMDE_FLOAT64_C( -197.89), SIMDE_FLOAT64_C( -359.76), SIMDE_FLOAT64_C( -33.67), SIMDE_FLOAT64_C( 7.27), SIMDE_FLOAT64_C( -125.20)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.82), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( -348.70), SIMDE_FLOAT64_C( 0.31), SIMDE_FLOAT64_C( -752.43), SIMDE_FLOAT64_C( -0.55), SIMDE_FLOAT64_C( -327.22), SIMDE_FLOAT64_C( -0.82)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mask_sind_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_sinh_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( 2.50), SIMDE_FLOAT32_C( 3.47), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 4.79)), simde_mm_set_ps(SIMDE_FLOAT32_C( 6.05), SIMDE_FLOAT32_C( 16.05), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 60.15)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 5.44), SIMDE_FLOAT32_C( 6.18), SIMDE_FLOAT32_C( 2.02), SIMDE_FLOAT32_C( 3.45)), simde_mm_set_ps(SIMDE_FLOAT32_C( 115.22), SIMDE_FLOAT32_C( 241.49), SIMDE_FLOAT32_C( 3.70), SIMDE_FLOAT32_C( 15.73)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 6.85), SIMDE_FLOAT32_C( 5.12), SIMDE_FLOAT32_C( 2.14), SIMDE_FLOAT32_C( 5.31)), simde_mm_set_ps(SIMDE_FLOAT32_C( 471.94), SIMDE_FLOAT32_C( 83.66), SIMDE_FLOAT32_C( 4.19), SIMDE_FLOAT32_C( 101.17)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 3.66), SIMDE_FLOAT32_C( 5.76)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( 19.42), SIMDE_FLOAT32_C( 158.67)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 1.99), SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( 1.50), SIMDE_FLOAT32_C( 6.30)), simde_mm_set_ps(SIMDE_FLOAT32_C( 3.59), SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( 2.13), SIMDE_FLOAT32_C( 272.29)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 1.39), SIMDE_FLOAT32_C( 3.42), SIMDE_FLOAT32_C( 1.65), SIMDE_FLOAT32_C( -0.67)), simde_mm_set_ps(SIMDE_FLOAT32_C( 1.88), SIMDE_FLOAT32_C( 15.27), SIMDE_FLOAT32_C( 2.51), SIMDE_FLOAT32_C( -0.72)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 4.42), SIMDE_FLOAT32_C( 2.39), SIMDE_FLOAT32_C( -0.90), SIMDE_FLOAT32_C( 0.46)), simde_mm_set_ps(SIMDE_FLOAT32_C( 41.54), SIMDE_FLOAT32_C( 5.41), SIMDE_FLOAT32_C( -1.03), SIMDE_FLOAT32_C( 0.48)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 4.07), SIMDE_FLOAT32_C( 1.36), SIMDE_FLOAT32_C( 4.30), SIMDE_FLOAT32_C( 6.25)), simde_mm_set_ps(SIMDE_FLOAT32_C( 29.27), SIMDE_FLOAT32_C( 1.82), SIMDE_FLOAT32_C( 36.84), SIMDE_FLOAT32_C( 259.01)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_sinh_ps(test_vec[i].a); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_sinh_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( 0.06), SIMDE_FLOAT64_C( 4.79)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.06), SIMDE_FLOAT64_C( 60.15)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 2.50), SIMDE_FLOAT64_C( 3.47)), simde_mm_set_pd(SIMDE_FLOAT64_C( 6.05), SIMDE_FLOAT64_C( 16.05)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 2.02), SIMDE_FLOAT64_C( 3.45)), simde_mm_set_pd(SIMDE_FLOAT64_C( 3.70), SIMDE_FLOAT64_C( 15.73)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 5.44), SIMDE_FLOAT64_C( 6.18)), simde_mm_set_pd(SIMDE_FLOAT64_C( 115.22), SIMDE_FLOAT64_C( 241.49)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 2.14), SIMDE_FLOAT64_C( 5.31)), simde_mm_set_pd(SIMDE_FLOAT64_C( 4.19), SIMDE_FLOAT64_C( 101.17)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 6.85), SIMDE_FLOAT64_C( 5.12)), simde_mm_set_pd(SIMDE_FLOAT64_C( 471.94), SIMDE_FLOAT64_C( 83.66)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 3.66), SIMDE_FLOAT64_C( 5.76)), simde_mm_set_pd(SIMDE_FLOAT64_C( 19.42), SIMDE_FLOAT64_C( 158.67)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 0.38), SIMDE_FLOAT64_C( 0.35)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.39), SIMDE_FLOAT64_C( 0.36)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_sinh_pd(test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_sinh_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 5.44), SIMDE_FLOAT32_C( 6.18), SIMDE_FLOAT32_C( 2.02), SIMDE_FLOAT32_C( 3.45), SIMDE_FLOAT32_C( 2.50), SIMDE_FLOAT32_C( 3.47), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 4.79)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 115.22), SIMDE_FLOAT32_C( 241.49), SIMDE_FLOAT32_C( 3.70), SIMDE_FLOAT32_C( 15.73), SIMDE_FLOAT32_C( 6.05), SIMDE_FLOAT32_C( 16.05), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 60.15)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 3.66), SIMDE_FLOAT32_C( 5.76), SIMDE_FLOAT32_C( 6.85), SIMDE_FLOAT32_C( 5.12), SIMDE_FLOAT32_C( 2.14), SIMDE_FLOAT32_C( 5.31)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( 19.42), SIMDE_FLOAT32_C( 158.67), SIMDE_FLOAT32_C( 471.94), SIMDE_FLOAT32_C( 83.66), SIMDE_FLOAT32_C( 4.19), SIMDE_FLOAT32_C( 101.17)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 1.39), SIMDE_FLOAT32_C( 3.42), SIMDE_FLOAT32_C( 1.65), SIMDE_FLOAT32_C( -0.67), SIMDE_FLOAT32_C( 1.99), SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( 1.50), SIMDE_FLOAT32_C( 6.30)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 1.88), SIMDE_FLOAT32_C( 15.27), SIMDE_FLOAT32_C( 2.51), SIMDE_FLOAT32_C( -0.72), SIMDE_FLOAT32_C( 3.59), SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( 2.13), SIMDE_FLOAT32_C( 272.29)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 4.07), SIMDE_FLOAT32_C( 1.36), SIMDE_FLOAT32_C( 4.30), SIMDE_FLOAT32_C( 6.25), SIMDE_FLOAT32_C( 4.42), SIMDE_FLOAT32_C( 2.39), SIMDE_FLOAT32_C( -0.90), SIMDE_FLOAT32_C( 0.46)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 29.27), SIMDE_FLOAT32_C( 1.82), SIMDE_FLOAT32_C( 36.84), SIMDE_FLOAT32_C( 259.01), SIMDE_FLOAT32_C( 41.54), SIMDE_FLOAT32_C( 5.41), SIMDE_FLOAT32_C( -1.03), SIMDE_FLOAT32_C( 0.48)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 5.21), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 4.94), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 7.57), SIMDE_FLOAT32_C( 3.42), SIMDE_FLOAT32_C( 6.92)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 91.54), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( 69.88), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 969.57), SIMDE_FLOAT32_C( 15.27), SIMDE_FLOAT32_C( 506.16)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 1.63), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 6.12), SIMDE_FLOAT32_C( 4.76), SIMDE_FLOAT32_C( 5.59), SIMDE_FLOAT32_C( 2.16), SIMDE_FLOAT32_C( 6.66), SIMDE_FLOAT32_C( 3.17)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 2.45), SIMDE_FLOAT32_C( 74.20), SIMDE_FLOAT32_C( 227.43), SIMDE_FLOAT32_C( 58.37), SIMDE_FLOAT32_C( 133.87), SIMDE_FLOAT32_C( 4.28), SIMDE_FLOAT32_C( 390.27), SIMDE_FLOAT32_C( 11.88)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 2.42), SIMDE_FLOAT32_C( 2.95), SIMDE_FLOAT32_C( 4.75), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 5.01)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 5.58), SIMDE_FLOAT32_C( 9.53), SIMDE_FLOAT32_C( 57.79), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.06), SIMDE_FLOAT32_C( 74.95)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 5.32), SIMDE_FLOAT32_C( 6.22), SIMDE_FLOAT32_C( 2.66), SIMDE_FLOAT32_C( 6.82), SIMDE_FLOAT32_C( 7.21), SIMDE_FLOAT32_C( 5.88), SIMDE_FLOAT32_C( 6.70), SIMDE_FLOAT32_C( 4.39)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 102.19), SIMDE_FLOAT32_C( 251.35), SIMDE_FLOAT32_C( 7.11), SIMDE_FLOAT32_C( 457.99), SIMDE_FLOAT32_C( 676.45), SIMDE_FLOAT32_C( 178.90), SIMDE_FLOAT32_C( 406.20), SIMDE_FLOAT32_C( 40.31)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_sinh_ps(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_sinh_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( 2.50), SIMDE_FLOAT64_C( 3.47), SIMDE_FLOAT64_C( 0.06), SIMDE_FLOAT64_C( 4.79)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 6.05), SIMDE_FLOAT64_C( 16.05), SIMDE_FLOAT64_C( 0.06), SIMDE_FLOAT64_C( 60.15)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 5.44), SIMDE_FLOAT64_C( 6.18), SIMDE_FLOAT64_C( 2.02), SIMDE_FLOAT64_C( 3.45)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 115.22), SIMDE_FLOAT64_C( 241.49), SIMDE_FLOAT64_C( 3.70), SIMDE_FLOAT64_C( 15.73)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 6.85), SIMDE_FLOAT64_C( 5.12), SIMDE_FLOAT64_C( 2.14), SIMDE_FLOAT64_C( 5.31)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 471.94), SIMDE_FLOAT64_C( 83.66), SIMDE_FLOAT64_C( 4.19), SIMDE_FLOAT64_C( 101.17)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.38), SIMDE_FLOAT64_C( 0.35), SIMDE_FLOAT64_C( 3.66), SIMDE_FLOAT64_C( 5.76)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.39), SIMDE_FLOAT64_C( 0.36), SIMDE_FLOAT64_C( 19.42), SIMDE_FLOAT64_C( 158.67)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 1.99), SIMDE_FLOAT64_C( -0.40), SIMDE_FLOAT64_C( 1.50), SIMDE_FLOAT64_C( 6.30)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 3.59), SIMDE_FLOAT64_C( -0.41), SIMDE_FLOAT64_C( 2.13), SIMDE_FLOAT64_C( 272.29)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 1.39), SIMDE_FLOAT64_C( 3.42), SIMDE_FLOAT64_C( 1.65), SIMDE_FLOAT64_C( -0.67)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 1.88), SIMDE_FLOAT64_C( 15.27), SIMDE_FLOAT64_C( 2.51), SIMDE_FLOAT64_C( -0.72)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 4.42), SIMDE_FLOAT64_C( 2.39), SIMDE_FLOAT64_C( -0.90), SIMDE_FLOAT64_C( 0.46)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 41.54), SIMDE_FLOAT64_C( 5.41), SIMDE_FLOAT64_C( -1.03), SIMDE_FLOAT64_C( 0.48)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 4.07), SIMDE_FLOAT64_C( 1.36), SIMDE_FLOAT64_C( 4.30), SIMDE_FLOAT64_C( 6.25)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 29.27), SIMDE_FLOAT64_C( 1.82), SIMDE_FLOAT64_C( 36.84), SIMDE_FLOAT64_C( 259.01)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_sinh_pd(test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_sinh_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 3.66), SIMDE_FLOAT32_C( 5.76), SIMDE_FLOAT32_C( 6.85), SIMDE_FLOAT32_C( 5.12), SIMDE_FLOAT32_C( 2.14), SIMDE_FLOAT32_C( 5.31), SIMDE_FLOAT32_C( 5.44), SIMDE_FLOAT32_C( 6.18), SIMDE_FLOAT32_C( 2.02), SIMDE_FLOAT32_C( 3.45), SIMDE_FLOAT32_C( 2.50), SIMDE_FLOAT32_C( 3.47), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 4.79)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( 19.42), SIMDE_FLOAT32_C( 158.67), SIMDE_FLOAT32_C( 471.94), SIMDE_FLOAT32_C( 83.66), SIMDE_FLOAT32_C( 4.19), SIMDE_FLOAT32_C( 101.17), SIMDE_FLOAT32_C( 115.22), SIMDE_FLOAT32_C( 241.49), SIMDE_FLOAT32_C( 3.70), SIMDE_FLOAT32_C( 15.73), SIMDE_FLOAT32_C( 6.05), SIMDE_FLOAT32_C( 16.05), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 60.15)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 4.07), SIMDE_FLOAT32_C( 1.36), SIMDE_FLOAT32_C( 4.30), SIMDE_FLOAT32_C( 6.25), SIMDE_FLOAT32_C( 4.42), SIMDE_FLOAT32_C( 2.39), SIMDE_FLOAT32_C( -0.90), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( 1.39), SIMDE_FLOAT32_C( 3.42), SIMDE_FLOAT32_C( 1.65), SIMDE_FLOAT32_C( -0.67), SIMDE_FLOAT32_C( 1.99), SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( 1.50), SIMDE_FLOAT32_C( 6.30)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 29.27), SIMDE_FLOAT32_C( 1.82), SIMDE_FLOAT32_C( 36.84), SIMDE_FLOAT32_C( 259.01), SIMDE_FLOAT32_C( 41.54), SIMDE_FLOAT32_C( 5.41), SIMDE_FLOAT32_C( -1.03), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( 1.88), SIMDE_FLOAT32_C( 15.27), SIMDE_FLOAT32_C( 2.51), SIMDE_FLOAT32_C( -0.72), SIMDE_FLOAT32_C( 3.59), SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( 2.13), SIMDE_FLOAT32_C( 272.29)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.63), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 6.12), SIMDE_FLOAT32_C( 4.76), SIMDE_FLOAT32_C( 5.59), SIMDE_FLOAT32_C( 2.16), SIMDE_FLOAT32_C( 6.66), SIMDE_FLOAT32_C( 3.17), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 5.21), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 4.94), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 7.57), SIMDE_FLOAT32_C( 3.42), SIMDE_FLOAT32_C( 6.92)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 2.45), SIMDE_FLOAT32_C( 74.20), SIMDE_FLOAT32_C( 227.43), SIMDE_FLOAT32_C( 58.37), SIMDE_FLOAT32_C( 133.87), SIMDE_FLOAT32_C( 4.28), SIMDE_FLOAT32_C( 390.27), SIMDE_FLOAT32_C( 11.88), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 91.54), SIMDE_FLOAT32_C( 0.87), SIMDE_FLOAT32_C( 69.88), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 969.57), SIMDE_FLOAT32_C( 15.27), SIMDE_FLOAT32_C( 506.16)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 5.32), SIMDE_FLOAT32_C( 6.22), SIMDE_FLOAT32_C( 2.66), SIMDE_FLOAT32_C( 6.82), SIMDE_FLOAT32_C( 7.21), SIMDE_FLOAT32_C( 5.88), SIMDE_FLOAT32_C( 6.70), SIMDE_FLOAT32_C( 4.39), SIMDE_FLOAT32_C( 2.42), SIMDE_FLOAT32_C( 2.95), SIMDE_FLOAT32_C( 4.75), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 5.01)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 102.19), SIMDE_FLOAT32_C( 251.35), SIMDE_FLOAT32_C( 7.11), SIMDE_FLOAT32_C( 457.99), SIMDE_FLOAT32_C( 676.45), SIMDE_FLOAT32_C( 178.90), SIMDE_FLOAT32_C( 406.20), SIMDE_FLOAT32_C( 40.31), SIMDE_FLOAT32_C( 5.58), SIMDE_FLOAT32_C( 9.53), SIMDE_FLOAT32_C( 57.79), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 1.06), SIMDE_FLOAT32_C( 74.95)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 2.53), SIMDE_FLOAT32_C( 6.56), SIMDE_FLOAT32_C( 4.70), SIMDE_FLOAT32_C( 4.78), SIMDE_FLOAT32_C( -0.46), SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( 1.89), SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( 2.47), SIMDE_FLOAT32_C( -0.24), SIMDE_FLOAT32_C( 5.71), SIMDE_FLOAT32_C( 6.12), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( 5.64), SIMDE_FLOAT32_C( 3.82), SIMDE_FLOAT32_C( 2.56)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 6.24), SIMDE_FLOAT32_C( 353.14), SIMDE_FLOAT32_C( 54.97), SIMDE_FLOAT32_C( 59.55), SIMDE_FLOAT32_C( -0.48), SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( 3.23), SIMDE_FLOAT32_C( 1.24), SIMDE_FLOAT32_C( 5.87), SIMDE_FLOAT32_C( -0.24), SIMDE_FLOAT32_C( 150.93), SIMDE_FLOAT32_C( 227.43), SIMDE_FLOAT32_C( 10.02), SIMDE_FLOAT32_C( 140.73), SIMDE_FLOAT32_C( 22.79), SIMDE_FLOAT32_C( 6.43)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 6.27), SIMDE_FLOAT32_C( 6.91), SIMDE_FLOAT32_C( 3.21), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 1.37), SIMDE_FLOAT32_C( 6.45), SIMDE_FLOAT32_C( 5.47), SIMDE_FLOAT32_C( 5.98), SIMDE_FLOAT32_C( 6.87), SIMDE_FLOAT32_C( 3.90), SIMDE_FLOAT32_C( 7.50), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( 4.72), SIMDE_FLOAT32_C( 3.73), SIMDE_FLOAT32_C( 5.29), SIMDE_FLOAT32_C( 0.13)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 264.24), SIMDE_FLOAT32_C( 501.12), SIMDE_FLOAT32_C( 12.37), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( 1.84), SIMDE_FLOAT32_C( 316.35), SIMDE_FLOAT32_C( 118.73), SIMDE_FLOAT32_C( 197.72), SIMDE_FLOAT32_C( 481.47), SIMDE_FLOAT32_C( 24.69), SIMDE_FLOAT32_C( 904.02), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( 56.08), SIMDE_FLOAT32_C( 20.83), SIMDE_FLOAT32_C( 99.17), SIMDE_FLOAT32_C( 0.13)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 6.83), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( 1.85), SIMDE_FLOAT32_C( 7.27), SIMDE_FLOAT32_C( 4.56), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 3.01), SIMDE_FLOAT32_C( 6.35), SIMDE_FLOAT32_C( 7.50), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 5.13), SIMDE_FLOAT32_C( 3.42), SIMDE_FLOAT32_C( 2.89)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 462.59), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( -1.16), SIMDE_FLOAT32_C( 3.10), SIMDE_FLOAT32_C( 718.27), SIMDE_FLOAT32_C( 47.79), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 10.12), SIMDE_FLOAT32_C( 286.25), SIMDE_FLOAT32_C( 904.02), SIMDE_FLOAT32_C( 0.05), SIMDE_FLOAT32_C( 84.51), SIMDE_FLOAT32_C( 15.27), SIMDE_FLOAT32_C( 8.97)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.42), SIMDE_FLOAT32_C( 1.75), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 3.16), SIMDE_FLOAT32_C( 7.31), SIMDE_FLOAT32_C( 3.33), SIMDE_FLOAT32_C( 1.89), SIMDE_FLOAT32_C( 2.76), SIMDE_FLOAT32_C( 2.52), SIMDE_FLOAT32_C( 3.47), SIMDE_FLOAT32_C( 5.50), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 3.36), SIMDE_FLOAT32_C( 1.99), SIMDE_FLOAT32_C( 7.24), SIMDE_FLOAT32_C( 0.30)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.95), SIMDE_FLOAT32_C( 2.79), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 11.76), SIMDE_FLOAT32_C( 747.59), SIMDE_FLOAT32_C( 13.95), SIMDE_FLOAT32_C( 3.23), SIMDE_FLOAT32_C( 7.87), SIMDE_FLOAT32_C( 6.17), SIMDE_FLOAT32_C( 16.05), SIMDE_FLOAT32_C( 122.34), SIMDE_FLOAT32_C( 74.20), SIMDE_FLOAT32_C( 14.38), SIMDE_FLOAT32_C( 3.59), SIMDE_FLOAT32_C( 697.05), SIMDE_FLOAT32_C( 0.30)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_sinh_ps(test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_sinh_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 src; simde__mmask16 k; simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.36), SIMDE_FLOAT32_C( 6.25), SIMDE_FLOAT32_C( 2.39), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( 3.42), SIMDE_FLOAT32_C( -0.67), SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( 6.30), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 5.76), SIMDE_FLOAT32_C( 5.12), SIMDE_FLOAT32_C( 5.31), SIMDE_FLOAT32_C( 6.18), SIMDE_FLOAT32_C( 3.45), SIMDE_FLOAT32_C( 3.47), SIMDE_FLOAT32_C( 4.79)), UINT16_C(41466), simde_mm512_set_ps(SIMDE_FLOAT32_C( 4.07), SIMDE_FLOAT32_C( 4.30), SIMDE_FLOAT32_C( 4.42), SIMDE_FLOAT32_C( -0.90), SIMDE_FLOAT32_C( 1.39), SIMDE_FLOAT32_C( 1.65), SIMDE_FLOAT32_C( 1.99), SIMDE_FLOAT32_C( 1.50), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 3.66), SIMDE_FLOAT32_C( 6.85), SIMDE_FLOAT32_C( 2.14), SIMDE_FLOAT32_C( 5.44), SIMDE_FLOAT32_C( 2.02), SIMDE_FLOAT32_C( 2.50), SIMDE_FLOAT32_C( 0.06)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 29.27), SIMDE_FLOAT32_C( 6.25), SIMDE_FLOAT32_C( 41.54), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( 3.42), SIMDE_FLOAT32_C( -0.67), SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( 2.13), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 19.42), SIMDE_FLOAT32_C( 471.94), SIMDE_FLOAT32_C( 4.19), SIMDE_FLOAT32_C( 115.22), SIMDE_FLOAT32_C( 3.45), SIMDE_FLOAT32_C( 6.05), SIMDE_FLOAT32_C( 4.79)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 5.32), SIMDE_FLOAT32_C( 2.66), SIMDE_FLOAT32_C( 7.21), SIMDE_FLOAT32_C( 6.70), SIMDE_FLOAT32_C( 2.42), SIMDE_FLOAT32_C( 4.75), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 1.63), SIMDE_FLOAT32_C( 6.12), SIMDE_FLOAT32_C( 5.59), SIMDE_FLOAT32_C( 6.66), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 3.42)), UINT16_C(36797), simde_mm512_set_ps(SIMDE_FLOAT32_C( 2.56), SIMDE_FLOAT32_C( 6.22), SIMDE_FLOAT32_C( 6.82), SIMDE_FLOAT32_C( 5.88), SIMDE_FLOAT32_C( 4.39), SIMDE_FLOAT32_C( 2.95), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 5.01), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 4.76), SIMDE_FLOAT32_C( 2.16), SIMDE_FLOAT32_C( 3.17), SIMDE_FLOAT32_C( 5.21), SIMDE_FLOAT32_C( 4.94), SIMDE_FLOAT32_C( 7.57)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 6.43), SIMDE_FLOAT32_C( 2.66), SIMDE_FLOAT32_C( 7.21), SIMDE_FLOAT32_C( 6.70), SIMDE_FLOAT32_C( 40.31), SIMDE_FLOAT32_C( 9.53), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 74.95), SIMDE_FLOAT32_C( 6.12), SIMDE_FLOAT32_C( 58.37), SIMDE_FLOAT32_C( 4.28), SIMDE_FLOAT32_C( 11.88), SIMDE_FLOAT32_C( 91.54), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 969.57)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 2.89), SIMDE_FLOAT32_C( 6.91), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 6.45), SIMDE_FLOAT32_C( 5.98), SIMDE_FLOAT32_C( 3.90), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( 3.73), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 6.56), SIMDE_FLOAT32_C( 4.78), SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( -0.24), SIMDE_FLOAT32_C( 6.12), SIMDE_FLOAT32_C( 5.64)), UINT16_C(16804), simde_mm512_set_ps(SIMDE_FLOAT32_C( 3.42), SIMDE_FLOAT32_C( 6.27), SIMDE_FLOAT32_C( 3.21), SIMDE_FLOAT32_C( 1.37), SIMDE_FLOAT32_C( 5.47), SIMDE_FLOAT32_C( 6.87), SIMDE_FLOAT32_C( 7.50), SIMDE_FLOAT32_C( 4.72), SIMDE_FLOAT32_C( 5.29), SIMDE_FLOAT32_C( 2.53), SIMDE_FLOAT32_C( 4.70), SIMDE_FLOAT32_C( -0.46), SIMDE_FLOAT32_C( 1.89), SIMDE_FLOAT32_C( 2.47), SIMDE_FLOAT32_C( 5.71), SIMDE_FLOAT32_C( 3.00)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 2.89), SIMDE_FLOAT32_C( 264.24), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 6.45), SIMDE_FLOAT32_C( 5.98), SIMDE_FLOAT32_C( 3.90), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( 56.08), SIMDE_FLOAT32_C( 99.17), SIMDE_FLOAT32_C( 6.56), SIMDE_FLOAT32_C( 54.97), SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( 1.04), SIMDE_FLOAT32_C( 5.87), SIMDE_FLOAT32_C( 6.12), SIMDE_FLOAT32_C( 5.64)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.80), SIMDE_FLOAT32_C( 1.42), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 7.31), SIMDE_FLOAT32_C( 1.89), SIMDE_FLOAT32_C( 2.52), SIMDE_FLOAT32_C( 5.50), SIMDE_FLOAT32_C( 3.36), SIMDE_FLOAT32_C( 7.24), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 1.85), SIMDE_FLOAT32_C( 4.56), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 6.35), SIMDE_FLOAT32_C( 0.05)), UINT16_C( 2107), simde_mm512_set_ps(SIMDE_FLOAT32_C( 7.16), SIMDE_FLOAT32_C( 2.45), SIMDE_FLOAT32_C( 1.75), SIMDE_FLOAT32_C( 3.16), SIMDE_FLOAT32_C( 3.33), SIMDE_FLOAT32_C( 2.76), SIMDE_FLOAT32_C( 3.47), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 1.99), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( 6.83), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( 7.27), SIMDE_FLOAT32_C( -0.00), SIMDE_FLOAT32_C( 3.01), SIMDE_FLOAT32_C( 7.50)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 1.80), SIMDE_FLOAT32_C( 1.42), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 7.31), SIMDE_FLOAT32_C( 13.95), SIMDE_FLOAT32_C( 2.52), SIMDE_FLOAT32_C( 5.50), SIMDE_FLOAT32_C( 3.36), SIMDE_FLOAT32_C( 7.24), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 462.59), SIMDE_FLOAT32_C( -1.16), SIMDE_FLOAT32_C( 718.27), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 10.12), SIMDE_FLOAT32_C( 904.02)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 3.23), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 1.95), SIMDE_FLOAT32_C( 4.07), SIMDE_FLOAT32_C( 4.79), SIMDE_FLOAT32_C( 7.12), SIMDE_FLOAT32_C( 7.24), SIMDE_FLOAT32_C( 3.87), SIMDE_FLOAT32_C( 5.39), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 4.25), SIMDE_FLOAT32_C( -0.09), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( 5.19)), UINT16_C(22274), simde_mm512_set_ps(SIMDE_FLOAT32_C( 5.44), SIMDE_FLOAT32_C( 7.24), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 3.99), SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( 5.13), SIMDE_FLOAT32_C( 7.31), SIMDE_FLOAT32_C( 3.77), SIMDE_FLOAT32_C( 6.86), SIMDE_FLOAT32_C( 2.97), SIMDE_FLOAT32_C( 4.32), SIMDE_FLOAT32_C( 1.67), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( 5.34)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 3.23), SIMDE_FLOAT32_C( 697.05), SIMDE_FLOAT32_C( 1.95), SIMDE_FLOAT32_C( 27.02), SIMDE_FLOAT32_C( 4.79), SIMDE_FLOAT32_C( 84.51), SIMDE_FLOAT32_C( 747.59), SIMDE_FLOAT32_C( 21.68), SIMDE_FLOAT32_C( 5.39), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 4.25), SIMDE_FLOAT32_C( -0.09), SIMDE_FLOAT32_C( -0.82), SIMDE_FLOAT32_C( 5.19)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 7.10), SIMDE_FLOAT32_C( -0.17), SIMDE_FLOAT32_C( 3.00), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( 3.70), SIMDE_FLOAT32_C( 4.19), SIMDE_FLOAT32_C( 6.89), SIMDE_FLOAT32_C( 3.02), SIMDE_FLOAT32_C( 2.07), SIMDE_FLOAT32_C( 2.38), SIMDE_FLOAT32_C( 1.93), SIMDE_FLOAT32_C( 6.67), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 5.69), SIMDE_FLOAT32_C( 5.19), SIMDE_FLOAT32_C( 5.17)), UINT16_C(27396), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( 1.55), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 6.70), SIMDE_FLOAT32_C( 3.32), SIMDE_FLOAT32_C( -0.22), SIMDE_FLOAT32_C( 3.99), SIMDE_FLOAT32_C( 5.79), SIMDE_FLOAT32_C( 3.25), SIMDE_FLOAT32_C( 1.97), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 4.74), SIMDE_FLOAT32_C( 4.48), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( 1.19)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 7.10), SIMDE_FLOAT32_C( 2.25), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( 406.20), SIMDE_FLOAT32_C( 4.19), SIMDE_FLOAT32_C( -0.22), SIMDE_FLOAT32_C( 27.02), SIMDE_FLOAT32_C( 2.07), SIMDE_FLOAT32_C( 2.38), SIMDE_FLOAT32_C( 1.93), SIMDE_FLOAT32_C( 6.67), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 44.11), SIMDE_FLOAT32_C( 5.19), SIMDE_FLOAT32_C( 5.17)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 5.58), SIMDE_FLOAT32_C( 3.19), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( 6.05), SIMDE_FLOAT32_C( 5.24), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( 3.75), SIMDE_FLOAT32_C( 5.84), SIMDE_FLOAT32_C( 3.43), SIMDE_FLOAT32_C( 6.03), SIMDE_FLOAT32_C( 2.94), SIMDE_FLOAT32_C( 3.64), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( 5.91), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( -0.52)), UINT16_C( 953), simde_mm512_set_ps(SIMDE_FLOAT32_C( 3.38), SIMDE_FLOAT32_C( 6.78), SIMDE_FLOAT32_C( 3.92), SIMDE_FLOAT32_C( 1.46), SIMDE_FLOAT32_C( 4.63), SIMDE_FLOAT32_C( 2.54), SIMDE_FLOAT32_C( 1.33), SIMDE_FLOAT32_C( 6.22), SIMDE_FLOAT32_C( 3.58), SIMDE_FLOAT32_C( 2.15), SIMDE_FLOAT32_C( 3.80), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 3.13), SIMDE_FLOAT32_C( 2.22), SIMDE_FLOAT32_C( 3.07), SIMDE_FLOAT32_C( 3.70)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 5.58), SIMDE_FLOAT32_C( 3.19), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( 6.05), SIMDE_FLOAT32_C( 5.24), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( 1.76), SIMDE_FLOAT32_C( 251.35), SIMDE_FLOAT32_C( 17.92), SIMDE_FLOAT32_C( 6.03), SIMDE_FLOAT32_C( 22.34), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 11.42), SIMDE_FLOAT32_C( 5.91), SIMDE_FLOAT32_C( 5.00), SIMDE_FLOAT32_C( 20.21)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.09), SIMDE_FLOAT32_C( 4.72), SIMDE_FLOAT32_C( 1.18), SIMDE_FLOAT32_C( 6.84), SIMDE_FLOAT32_C( 7.41), SIMDE_FLOAT32_C( 7.40), SIMDE_FLOAT32_C( 6.85), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( 2.40), SIMDE_FLOAT32_C( -0.71), SIMDE_FLOAT32_C( 0.17), SIMDE_FLOAT32_C( 1.49), SIMDE_FLOAT32_C( 3.73), SIMDE_FLOAT32_C( 3.74), SIMDE_FLOAT32_C( 5.19), SIMDE_FLOAT32_C( 2.42)), UINT16_C(12713), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.32), SIMDE_FLOAT32_C( 3.24), SIMDE_FLOAT32_C( 6.85), SIMDE_FLOAT32_C( 6.71), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 1.96), SIMDE_FLOAT32_C( 6.43), SIMDE_FLOAT32_C( 1.79), SIMDE_FLOAT32_C( 3.56), SIMDE_FLOAT32_C( 3.77), SIMDE_FLOAT32_C( 6.38), SIMDE_FLOAT32_C( 2.22), SIMDE_FLOAT32_C( 7.36), SIMDE_FLOAT32_C( 4.86), SIMDE_FLOAT32_C( 3.24), SIMDE_FLOAT32_C( 6.97)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.09), SIMDE_FLOAT32_C( 4.72), SIMDE_FLOAT32_C( 471.94), SIMDE_FLOAT32_C( 410.28), SIMDE_FLOAT32_C( 7.41), SIMDE_FLOAT32_C( 7.40), SIMDE_FLOAT32_C( 6.85), SIMDE_FLOAT32_C( 2.91), SIMDE_FLOAT32_C( 17.57), SIMDE_FLOAT32_C( -0.71), SIMDE_FLOAT32_C( 294.96), SIMDE_FLOAT32_C( 1.49), SIMDE_FLOAT32_C( 785.92), SIMDE_FLOAT32_C( 3.74), SIMDE_FLOAT32_C( 5.19), SIMDE_FLOAT32_C( 532.11)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mask_sinh_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_sinh_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( 5.44), SIMDE_FLOAT64_C( 6.18), SIMDE_FLOAT64_C( 2.02), SIMDE_FLOAT64_C( 3.45), SIMDE_FLOAT64_C( 2.50), SIMDE_FLOAT64_C( 3.47), SIMDE_FLOAT64_C( 0.06), SIMDE_FLOAT64_C( 4.79)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 115.22), SIMDE_FLOAT64_C( 241.49), SIMDE_FLOAT64_C( 3.70), SIMDE_FLOAT64_C( 15.73), SIMDE_FLOAT64_C( 6.05), SIMDE_FLOAT64_C( 16.05), SIMDE_FLOAT64_C( 0.06), SIMDE_FLOAT64_C( 60.15)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.38), SIMDE_FLOAT64_C( 0.35), SIMDE_FLOAT64_C( 3.66), SIMDE_FLOAT64_C( 5.76), SIMDE_FLOAT64_C( 6.85), SIMDE_FLOAT64_C( 5.12), SIMDE_FLOAT64_C( 2.14), SIMDE_FLOAT64_C( 5.31)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.39), SIMDE_FLOAT64_C( 0.36), SIMDE_FLOAT64_C( 19.42), SIMDE_FLOAT64_C( 158.67), SIMDE_FLOAT64_C( 471.94), SIMDE_FLOAT64_C( 83.66), SIMDE_FLOAT64_C( 4.19), SIMDE_FLOAT64_C( 101.17)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.39), SIMDE_FLOAT64_C( 3.42), SIMDE_FLOAT64_C( 1.65), SIMDE_FLOAT64_C( -0.67), SIMDE_FLOAT64_C( 1.99), SIMDE_FLOAT64_C( -0.40), SIMDE_FLOAT64_C( 1.50), SIMDE_FLOAT64_C( 6.30)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.88), SIMDE_FLOAT64_C( 15.27), SIMDE_FLOAT64_C( 2.51), SIMDE_FLOAT64_C( -0.72), SIMDE_FLOAT64_C( 3.59), SIMDE_FLOAT64_C( -0.41), SIMDE_FLOAT64_C( 2.13), SIMDE_FLOAT64_C( 272.29)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 4.07), SIMDE_FLOAT64_C( 1.36), SIMDE_FLOAT64_C( 4.30), SIMDE_FLOAT64_C( 6.25), SIMDE_FLOAT64_C( 4.42), SIMDE_FLOAT64_C( 2.39), SIMDE_FLOAT64_C( -0.90), SIMDE_FLOAT64_C( 0.46)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 29.27), SIMDE_FLOAT64_C( 1.82), SIMDE_FLOAT64_C( 36.84), SIMDE_FLOAT64_C( 259.01), SIMDE_FLOAT64_C( 41.54), SIMDE_FLOAT64_C( 5.41), SIMDE_FLOAT64_C( -1.03), SIMDE_FLOAT64_C( 0.48)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.01), SIMDE_FLOAT64_C( 5.21), SIMDE_FLOAT64_C( 0.79), SIMDE_FLOAT64_C( 4.94), SIMDE_FLOAT64_C( -0.01), SIMDE_FLOAT64_C( 7.57), SIMDE_FLOAT64_C( 3.42), SIMDE_FLOAT64_C( 6.92)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.01), SIMDE_FLOAT64_C( 91.54), SIMDE_FLOAT64_C( 0.87), SIMDE_FLOAT64_C( 69.88), SIMDE_FLOAT64_C( -0.01), SIMDE_FLOAT64_C( 969.57), SIMDE_FLOAT64_C( 15.27), SIMDE_FLOAT64_C( 506.16)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.63), SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 6.12), SIMDE_FLOAT64_C( 4.76), SIMDE_FLOAT64_C( 5.59), SIMDE_FLOAT64_C( 2.16), SIMDE_FLOAT64_C( 6.66), SIMDE_FLOAT64_C( 3.17)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.45), SIMDE_FLOAT64_C( 74.20), SIMDE_FLOAT64_C( 227.43), SIMDE_FLOAT64_C( 58.37), SIMDE_FLOAT64_C( 133.87), SIMDE_FLOAT64_C( 4.28), SIMDE_FLOAT64_C( 390.27), SIMDE_FLOAT64_C( 11.88)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 2.42), SIMDE_FLOAT64_C( 2.95), SIMDE_FLOAT64_C( 4.75), SIMDE_FLOAT64_C( -0.76), SIMDE_FLOAT64_C( 0.09), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( 5.01)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 5.58), SIMDE_FLOAT64_C( 9.53), SIMDE_FLOAT64_C( 57.79), SIMDE_FLOAT64_C( -0.84), SIMDE_FLOAT64_C( 0.09), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 1.06), SIMDE_FLOAT64_C( 74.95)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 5.32), SIMDE_FLOAT64_C( 6.22), SIMDE_FLOAT64_C( 2.66), SIMDE_FLOAT64_C( 6.82), SIMDE_FLOAT64_C( 7.21), SIMDE_FLOAT64_C( 5.88), SIMDE_FLOAT64_C( 6.70), SIMDE_FLOAT64_C( 4.39)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 102.19), SIMDE_FLOAT64_C( 251.35), SIMDE_FLOAT64_C( 7.11), SIMDE_FLOAT64_C( 457.99), SIMDE_FLOAT64_C( 676.45), SIMDE_FLOAT64_C( 178.90), SIMDE_FLOAT64_C( 406.20), SIMDE_FLOAT64_C( 40.31)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_sinh_pd(test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_sinh_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d src; simde__mmask8 k; simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.35), SIMDE_FLOAT64_C( 5.76), SIMDE_FLOAT64_C( 5.12), SIMDE_FLOAT64_C( 5.31), SIMDE_FLOAT64_C( 6.18), SIMDE_FLOAT64_C( 3.45), SIMDE_FLOAT64_C( 3.47), SIMDE_FLOAT64_C( 4.79)), UINT8_C(139), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.38), SIMDE_FLOAT64_C( 3.66), SIMDE_FLOAT64_C( 6.85), SIMDE_FLOAT64_C( 2.14), SIMDE_FLOAT64_C( 5.44), SIMDE_FLOAT64_C( 2.02), SIMDE_FLOAT64_C( 2.50), SIMDE_FLOAT64_C( 0.06)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.39), SIMDE_FLOAT64_C( 5.76), SIMDE_FLOAT64_C( 5.12), SIMDE_FLOAT64_C( 5.31), SIMDE_FLOAT64_C( 115.22), SIMDE_FLOAT64_C( 3.45), SIMDE_FLOAT64_C( 6.05), SIMDE_FLOAT64_C( 0.06)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 4.07), SIMDE_FLOAT64_C( 4.30), SIMDE_FLOAT64_C( 4.42), SIMDE_FLOAT64_C( -0.90), SIMDE_FLOAT64_C( 1.39), SIMDE_FLOAT64_C( 1.65), SIMDE_FLOAT64_C( 1.99), SIMDE_FLOAT64_C( 1.50)), UINT8_C(229), simde_mm512_set_pd(SIMDE_FLOAT64_C( 6.92), SIMDE_FLOAT64_C( 1.36), SIMDE_FLOAT64_C( 6.25), SIMDE_FLOAT64_C( 2.39), SIMDE_FLOAT64_C( 0.46), SIMDE_FLOAT64_C( 3.42), SIMDE_FLOAT64_C( -0.67), SIMDE_FLOAT64_C( -0.40)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 506.16), SIMDE_FLOAT64_C( 1.82), SIMDE_FLOAT64_C( 259.01), SIMDE_FLOAT64_C( -0.90), SIMDE_FLOAT64_C( 1.39), SIMDE_FLOAT64_C( 15.27), SIMDE_FLOAT64_C( 1.99), SIMDE_FLOAT64_C( -0.41)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 5.01), SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 4.76), SIMDE_FLOAT64_C( 2.16), SIMDE_FLOAT64_C( 3.17), SIMDE_FLOAT64_C( 5.21), SIMDE_FLOAT64_C( 4.94), SIMDE_FLOAT64_C( 7.57)), UINT8_C(253), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( 1.63), SIMDE_FLOAT64_C( 6.12), SIMDE_FLOAT64_C( 5.59), SIMDE_FLOAT64_C( 6.66), SIMDE_FLOAT64_C( -0.01), SIMDE_FLOAT64_C( 0.79), SIMDE_FLOAT64_C( -0.01)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.06), SIMDE_FLOAT64_C( 2.45), SIMDE_FLOAT64_C( 227.43), SIMDE_FLOAT64_C( 133.87), SIMDE_FLOAT64_C( 390.27), SIMDE_FLOAT64_C( -0.01), SIMDE_FLOAT64_C( 4.94), SIMDE_FLOAT64_C( -0.01)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.82), SIMDE_FLOAT64_C( 5.32), SIMDE_FLOAT64_C( 2.66), SIMDE_FLOAT64_C( 7.21), SIMDE_FLOAT64_C( 6.70), SIMDE_FLOAT64_C( 2.42), SIMDE_FLOAT64_C( 4.75), SIMDE_FLOAT64_C( 0.09)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 5.64), SIMDE_FLOAT64_C( 2.56), SIMDE_FLOAT64_C( 6.22), SIMDE_FLOAT64_C( 6.82), SIMDE_FLOAT64_C( 5.88), SIMDE_FLOAT64_C( 4.39), SIMDE_FLOAT64_C( 2.95), SIMDE_FLOAT64_C( -0.76)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.82), SIMDE_FLOAT64_C( 6.43), SIMDE_FLOAT64_C( 2.66), SIMDE_FLOAT64_C( 457.99), SIMDE_FLOAT64_C( 178.90), SIMDE_FLOAT64_C( 40.31), SIMDE_FLOAT64_C( 4.75), SIMDE_FLOAT64_C( -0.84)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.73), SIMDE_FLOAT64_C( 0.13), SIMDE_FLOAT64_C( 6.56), SIMDE_FLOAT64_C( 4.78), SIMDE_FLOAT64_C( -0.13), SIMDE_FLOAT64_C( 1.04), SIMDE_FLOAT64_C( -0.24), SIMDE_FLOAT64_C( 6.12)), UINT8_C(145), simde_mm512_set_pd(SIMDE_FLOAT64_C( 4.72), SIMDE_FLOAT64_C( 5.29), SIMDE_FLOAT64_C( 2.53), SIMDE_FLOAT64_C( 4.70), SIMDE_FLOAT64_C( -0.46), SIMDE_FLOAT64_C( 1.89), SIMDE_FLOAT64_C( 2.47), SIMDE_FLOAT64_C( 5.71)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 56.08), SIMDE_FLOAT64_C( 0.13), SIMDE_FLOAT64_C( 6.56), SIMDE_FLOAT64_C( 54.97), SIMDE_FLOAT64_C( -0.13), SIMDE_FLOAT64_C( 1.04), SIMDE_FLOAT64_C( -0.24), SIMDE_FLOAT64_C( 150.93)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( 3.42), SIMDE_FLOAT64_C( 6.27), SIMDE_FLOAT64_C( 3.21), SIMDE_FLOAT64_C( 1.37), SIMDE_FLOAT64_C( 5.47), SIMDE_FLOAT64_C( 6.87), SIMDE_FLOAT64_C( 7.50)), UINT8_C( 75), simde_mm512_set_pd(SIMDE_FLOAT64_C( 7.50), SIMDE_FLOAT64_C( 5.13), SIMDE_FLOAT64_C( 2.89), SIMDE_FLOAT64_C( 6.91), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 6.45), SIMDE_FLOAT64_C( 5.98), SIMDE_FLOAT64_C( 3.90)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( 84.51), SIMDE_FLOAT64_C( 6.27), SIMDE_FLOAT64_C( 3.21), SIMDE_FLOAT64_C( 0.84), SIMDE_FLOAT64_C( 5.47), SIMDE_FLOAT64_C( 197.72), SIMDE_FLOAT64_C( 24.69)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 1.99), SIMDE_FLOAT64_C( 0.30), SIMDE_FLOAT64_C( 6.83), SIMDE_FLOAT64_C( -0.99), SIMDE_FLOAT64_C( 7.27), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( 3.01)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 5.50), SIMDE_FLOAT64_C( 3.36), SIMDE_FLOAT64_C( 7.24), SIMDE_FLOAT64_C( -0.01), SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 1.85), SIMDE_FLOAT64_C( 4.56), SIMDE_FLOAT64_C( 0.82)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 5.00), SIMDE_FLOAT64_C( 14.38), SIMDE_FLOAT64_C( 0.30), SIMDE_FLOAT64_C( -0.01), SIMDE_FLOAT64_C( 0.93), SIMDE_FLOAT64_C( 3.10), SIMDE_FLOAT64_C( -0.00), SIMDE_FLOAT64_C( 0.92)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 5.34), SIMDE_FLOAT64_C( 7.33), SIMDE_FLOAT64_C( 1.80), SIMDE_FLOAT64_C( 1.42), SIMDE_FLOAT64_C( 0.06), SIMDE_FLOAT64_C( 7.31), SIMDE_FLOAT64_C( 1.89), SIMDE_FLOAT64_C( 2.52)), UINT8_C(213), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.03), SIMDE_FLOAT64_C( 5.19), SIMDE_FLOAT64_C( 7.16), SIMDE_FLOAT64_C( 2.45), SIMDE_FLOAT64_C( 1.75), SIMDE_FLOAT64_C( 3.16), SIMDE_FLOAT64_C( 3.33), SIMDE_FLOAT64_C( 2.76)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.03), SIMDE_FLOAT64_C( 89.73), SIMDE_FLOAT64_C( 1.80), SIMDE_FLOAT64_C( 5.75), SIMDE_FLOAT64_C( 0.06), SIMDE_FLOAT64_C( 11.76), SIMDE_FLOAT64_C( 1.89), SIMDE_FLOAT64_C( 7.87)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mask_sinh_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_svml_ceil_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -169.65), SIMDE_FLOAT32_C( 267.82), SIMDE_FLOAT32_C( 302.20), SIMDE_FLOAT32_C( -31.93) }, { SIMDE_FLOAT32_C( -169.00), SIMDE_FLOAT32_C( 268.00), SIMDE_FLOAT32_C( 303.00), SIMDE_FLOAT32_C( -31.00) } }, { { SIMDE_FLOAT32_C( -142.32), SIMDE_FLOAT32_C( -661.66), SIMDE_FLOAT32_C( 156.37), SIMDE_FLOAT32_C( 396.69) }, { SIMDE_FLOAT32_C( -142.00), SIMDE_FLOAT32_C( -661.00), SIMDE_FLOAT32_C( 157.00), SIMDE_FLOAT32_C( 397.00) } }, { { SIMDE_FLOAT32_C( 382.01), SIMDE_FLOAT32_C( 656.47), SIMDE_FLOAT32_C( -361.06), SIMDE_FLOAT32_C( -343.68) }, { SIMDE_FLOAT32_C( 383.00), SIMDE_FLOAT32_C( 657.00), SIMDE_FLOAT32_C( -361.00), SIMDE_FLOAT32_C( -343.00) } }, { { SIMDE_FLOAT32_C( -331.36), SIMDE_FLOAT32_C( 68.89), SIMDE_FLOAT32_C( 476.92), SIMDE_FLOAT32_C( -40.59) }, { SIMDE_FLOAT32_C( -331.00), SIMDE_FLOAT32_C( 69.00), SIMDE_FLOAT32_C( 477.00), SIMDE_FLOAT32_C( -40.00) } }, { { SIMDE_FLOAT32_C( 390.65), SIMDE_FLOAT32_C( -570.02), SIMDE_FLOAT32_C( -935.28), SIMDE_FLOAT32_C( 672.43) }, { SIMDE_FLOAT32_C( 391.00), SIMDE_FLOAT32_C( -570.00), SIMDE_FLOAT32_C( -935.00), SIMDE_FLOAT32_C( 673.00) } }, { { SIMDE_FLOAT32_C( 681.18), SIMDE_FLOAT32_C( -100.50), SIMDE_FLOAT32_C( 206.11), SIMDE_FLOAT32_C( 943.93) }, { SIMDE_FLOAT32_C( 682.00), SIMDE_FLOAT32_C( -100.00), SIMDE_FLOAT32_C( 207.00), SIMDE_FLOAT32_C( 944.00) } }, { { SIMDE_FLOAT32_C( 786.98), SIMDE_FLOAT32_C( -51.78), SIMDE_FLOAT32_C( -481.30), SIMDE_FLOAT32_C( 955.46) }, { SIMDE_FLOAT32_C( 787.00), SIMDE_FLOAT32_C( -51.00), SIMDE_FLOAT32_C( -481.00), SIMDE_FLOAT32_C( 956.00) } }, { { SIMDE_FLOAT32_C( -832.82), SIMDE_FLOAT32_C( 115.81), SIMDE_FLOAT32_C( -954.30), SIMDE_FLOAT32_C( -2.48) }, { SIMDE_FLOAT32_C( -832.00), SIMDE_FLOAT32_C( 116.00), SIMDE_FLOAT32_C( -954.00), SIMDE_FLOAT32_C( -2.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_svml_ceil_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_svml_ceil_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 674.99), SIMDE_FLOAT64_C( 114.55) }, { SIMDE_FLOAT64_C( 675.00), SIMDE_FLOAT64_C( 115.00) } }, { { SIMDE_FLOAT64_C( 69.63), SIMDE_FLOAT64_C( -469.97) }, { SIMDE_FLOAT64_C( 70.00), SIMDE_FLOAT64_C( -469.00) } }, { { SIMDE_FLOAT64_C( 28.21), SIMDE_FLOAT64_C( 212.97) }, { SIMDE_FLOAT64_C( 29.00), SIMDE_FLOAT64_C( 213.00) } }, { { SIMDE_FLOAT64_C( 763.99), SIMDE_FLOAT64_C( -272.25) }, { SIMDE_FLOAT64_C( 764.00), SIMDE_FLOAT64_C( -272.00) } }, { { SIMDE_FLOAT64_C( -938.61), SIMDE_FLOAT64_C( 282.65) }, { SIMDE_FLOAT64_C( -938.00), SIMDE_FLOAT64_C( 283.00) } }, { { SIMDE_FLOAT64_C( -881.63), SIMDE_FLOAT64_C( 347.00) }, { SIMDE_FLOAT64_C( -881.00), SIMDE_FLOAT64_C( 347.00) } }, { { SIMDE_FLOAT64_C( 95.36), SIMDE_FLOAT64_C( -9.46) }, { SIMDE_FLOAT64_C( 96.00), SIMDE_FLOAT64_C( -9.00) } }, { { SIMDE_FLOAT64_C( -56.68), SIMDE_FLOAT64_C( 444.40) }, { SIMDE_FLOAT64_C( -56.00), SIMDE_FLOAT64_C( 445.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d r = simde_mm_svml_ceil_pd(a); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_svml_ceil_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( -76.72), SIMDE_FLOAT32_C( -639.26), SIMDE_FLOAT32_C( 440.96), SIMDE_FLOAT32_C( -729.70), SIMDE_FLOAT32_C( 846.93), SIMDE_FLOAT32_C( 5.62), SIMDE_FLOAT32_C( -834.54), SIMDE_FLOAT32_C( -216.99) }, { SIMDE_FLOAT32_C( -76.00), SIMDE_FLOAT32_C( -639.00), SIMDE_FLOAT32_C( 441.00), SIMDE_FLOAT32_C( -729.00), SIMDE_FLOAT32_C( 847.00), SIMDE_FLOAT32_C( 6.00), SIMDE_FLOAT32_C( -834.00), SIMDE_FLOAT32_C( -216.00) } }, { { SIMDE_FLOAT32_C( -602.71), SIMDE_FLOAT32_C( -551.43), SIMDE_FLOAT32_C( 949.68), SIMDE_FLOAT32_C( -637.56), SIMDE_FLOAT32_C( -279.53), SIMDE_FLOAT32_C( 553.99), SIMDE_FLOAT32_C( -582.80), SIMDE_FLOAT32_C( 265.64) }, { SIMDE_FLOAT32_C( -602.00), SIMDE_FLOAT32_C( -551.00), SIMDE_FLOAT32_C( 950.00), SIMDE_FLOAT32_C( -637.00), SIMDE_FLOAT32_C( -279.00), SIMDE_FLOAT32_C( 554.00), SIMDE_FLOAT32_C( -582.00), SIMDE_FLOAT32_C( 266.00) } }, { { SIMDE_FLOAT32_C( 457.99), SIMDE_FLOAT32_C( 385.92), SIMDE_FLOAT32_C( 814.23), SIMDE_FLOAT32_C( -511.82), SIMDE_FLOAT32_C( -834.29), SIMDE_FLOAT32_C( 45.52), SIMDE_FLOAT32_C( 999.48), SIMDE_FLOAT32_C( -489.95) }, { SIMDE_FLOAT32_C( 458.00), SIMDE_FLOAT32_C( 386.00), SIMDE_FLOAT32_C( 815.00), SIMDE_FLOAT32_C( -511.00), SIMDE_FLOAT32_C( -834.00), SIMDE_FLOAT32_C( 46.00), SIMDE_FLOAT32_C( 1000.00), SIMDE_FLOAT32_C( -489.00) } }, { { SIMDE_FLOAT32_C( 499.94), SIMDE_FLOAT32_C( 847.57), SIMDE_FLOAT32_C( 656.49), SIMDE_FLOAT32_C( 169.03), SIMDE_FLOAT32_C( -361.51), SIMDE_FLOAT32_C( 697.36), SIMDE_FLOAT32_C( -537.79), SIMDE_FLOAT32_C( 561.78) }, { SIMDE_FLOAT32_C( 500.00), SIMDE_FLOAT32_C( 848.00), SIMDE_FLOAT32_C( 657.00), SIMDE_FLOAT32_C( 170.00), SIMDE_FLOAT32_C( -361.00), SIMDE_FLOAT32_C( 698.00), SIMDE_FLOAT32_C( -537.00), SIMDE_FLOAT32_C( 562.00) } }, { { SIMDE_FLOAT32_C( -941.90), SIMDE_FLOAT32_C( 903.17), SIMDE_FLOAT32_C( 832.08), SIMDE_FLOAT32_C( 905.03), SIMDE_FLOAT32_C( -91.21), SIMDE_FLOAT32_C( 997.54), SIMDE_FLOAT32_C( -311.96), SIMDE_FLOAT32_C( 306.08) }, { SIMDE_FLOAT32_C( -941.00), SIMDE_FLOAT32_C( 904.00), SIMDE_FLOAT32_C( 833.00), SIMDE_FLOAT32_C( 906.00), SIMDE_FLOAT32_C( -91.00), SIMDE_FLOAT32_C( 998.00), SIMDE_FLOAT32_C( -311.00), SIMDE_FLOAT32_C( 307.00) } }, { { SIMDE_FLOAT32_C( -553.88), SIMDE_FLOAT32_C( -362.28), SIMDE_FLOAT32_C( 668.53), SIMDE_FLOAT32_C( 166.59), SIMDE_FLOAT32_C( -808.29), SIMDE_FLOAT32_C( -914.27), SIMDE_FLOAT32_C( -567.77), SIMDE_FLOAT32_C( 649.70) }, { SIMDE_FLOAT32_C( -553.00), SIMDE_FLOAT32_C( -362.00), SIMDE_FLOAT32_C( 669.00), SIMDE_FLOAT32_C( 167.00), SIMDE_FLOAT32_C( -808.00), SIMDE_FLOAT32_C( -914.00), SIMDE_FLOAT32_C( -567.00), SIMDE_FLOAT32_C( 650.00) } }, { { SIMDE_FLOAT32_C( 471.65), SIMDE_FLOAT32_C( -753.54), SIMDE_FLOAT32_C( -862.12), SIMDE_FLOAT32_C( 637.36), SIMDE_FLOAT32_C( 291.98), SIMDE_FLOAT32_C( -862.64), SIMDE_FLOAT32_C( -852.59), SIMDE_FLOAT32_C( -208.07) }, { SIMDE_FLOAT32_C( 472.00), SIMDE_FLOAT32_C( -753.00), SIMDE_FLOAT32_C( -862.00), SIMDE_FLOAT32_C( 638.00), SIMDE_FLOAT32_C( 292.00), SIMDE_FLOAT32_C( -862.00), SIMDE_FLOAT32_C( -852.00), SIMDE_FLOAT32_C( -208.00) } }, { { SIMDE_FLOAT32_C( 984.93), SIMDE_FLOAT32_C( 803.90), SIMDE_FLOAT32_C( 960.96), SIMDE_FLOAT32_C( -376.58), SIMDE_FLOAT32_C( 501.26), SIMDE_FLOAT32_C( -576.83), SIMDE_FLOAT32_C( -814.80), SIMDE_FLOAT32_C( 559.36) }, { SIMDE_FLOAT32_C( 985.00), SIMDE_FLOAT32_C( 804.00), SIMDE_FLOAT32_C( 961.00), SIMDE_FLOAT32_C( -376.00), SIMDE_FLOAT32_C( 502.00), SIMDE_FLOAT32_C( -576.00), SIMDE_FLOAT32_C( -814.00), SIMDE_FLOAT32_C( 560.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_svml_ceil_ps(a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_svml_ceil_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( -362.72), SIMDE_FLOAT64_C( -517.27), SIMDE_FLOAT64_C( -680.39), SIMDE_FLOAT64_C( -370.55) }, { SIMDE_FLOAT64_C( -362.00), SIMDE_FLOAT64_C( -517.00), SIMDE_FLOAT64_C( -680.00), SIMDE_FLOAT64_C( -370.00) } }, { { SIMDE_FLOAT64_C( -614.98), SIMDE_FLOAT64_C( 499.96), SIMDE_FLOAT64_C( -673.46), SIMDE_FLOAT64_C( 813.10) }, { SIMDE_FLOAT64_C( -614.00), SIMDE_FLOAT64_C( 500.00), SIMDE_FLOAT64_C( -673.00), SIMDE_FLOAT64_C( 814.00) } }, { { SIMDE_FLOAT64_C( -134.44), SIMDE_FLOAT64_C( 719.80), SIMDE_FLOAT64_C( -164.15), SIMDE_FLOAT64_C( -617.21) }, { SIMDE_FLOAT64_C( -134.00), SIMDE_FLOAT64_C( 720.00), SIMDE_FLOAT64_C( -164.00), SIMDE_FLOAT64_C( -617.00) } }, { { SIMDE_FLOAT64_C( -500.24), SIMDE_FLOAT64_C( 381.09), SIMDE_FLOAT64_C( 264.50), SIMDE_FLOAT64_C( 668.11) }, { SIMDE_FLOAT64_C( -500.00), SIMDE_FLOAT64_C( 382.00), SIMDE_FLOAT64_C( 265.00), SIMDE_FLOAT64_C( 669.00) } }, { { SIMDE_FLOAT64_C( 934.75), SIMDE_FLOAT64_C( -779.04), SIMDE_FLOAT64_C( 549.14), SIMDE_FLOAT64_C( -476.20) }, { SIMDE_FLOAT64_C( 935.00), SIMDE_FLOAT64_C( -779.00), SIMDE_FLOAT64_C( 550.00), SIMDE_FLOAT64_C( -476.00) } }, { { SIMDE_FLOAT64_C( -15.07), SIMDE_FLOAT64_C( 858.66), SIMDE_FLOAT64_C( -174.63), SIMDE_FLOAT64_C( -609.29) }, { SIMDE_FLOAT64_C( -15.00), SIMDE_FLOAT64_C( 859.00), SIMDE_FLOAT64_C( -174.00), SIMDE_FLOAT64_C( -609.00) } }, { { SIMDE_FLOAT64_C( -71.58), SIMDE_FLOAT64_C( 432.38), SIMDE_FLOAT64_C( -26.35), SIMDE_FLOAT64_C( -67.29) }, { SIMDE_FLOAT64_C( -71.00), SIMDE_FLOAT64_C( 433.00), SIMDE_FLOAT64_C( -26.00), SIMDE_FLOAT64_C( -67.00) } }, { { SIMDE_FLOAT64_C( 708.92), SIMDE_FLOAT64_C( 346.09), SIMDE_FLOAT64_C( -697.36), SIMDE_FLOAT64_C( -653.80) }, { SIMDE_FLOAT64_C( 709.00), SIMDE_FLOAT64_C( 347.00), SIMDE_FLOAT64_C( -697.00), SIMDE_FLOAT64_C( -653.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d r = simde_mm256_svml_ceil_pd(a); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_ceil_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( -212.12), SIMDE_FLOAT32_C( -438.18), SIMDE_FLOAT32_C( 403.70), SIMDE_FLOAT32_C( 369.30), SIMDE_FLOAT32_C( 75.33), SIMDE_FLOAT32_C( 898.48), SIMDE_FLOAT32_C( 1.19), SIMDE_FLOAT32_C( -480.16), SIMDE_FLOAT32_C( -450.03), SIMDE_FLOAT32_C( -382.53), SIMDE_FLOAT32_C( 364.23), SIMDE_FLOAT32_C( 496.15), SIMDE_FLOAT32_C( 778.39), SIMDE_FLOAT32_C( -311.07), SIMDE_FLOAT32_C( 656.92), SIMDE_FLOAT32_C( -16.90) }, { SIMDE_FLOAT32_C( -212.00), SIMDE_FLOAT32_C( -438.00), SIMDE_FLOAT32_C( 404.00), SIMDE_FLOAT32_C( 370.00), SIMDE_FLOAT32_C( 76.00), SIMDE_FLOAT32_C( 899.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( -480.00), SIMDE_FLOAT32_C( -450.00), SIMDE_FLOAT32_C( -382.00), SIMDE_FLOAT32_C( 365.00), SIMDE_FLOAT32_C( 497.00), SIMDE_FLOAT32_C( 779.00), SIMDE_FLOAT32_C( -311.00), SIMDE_FLOAT32_C( 657.00), SIMDE_FLOAT32_C( -16.00) } }, { { SIMDE_FLOAT32_C( -112.72), SIMDE_FLOAT32_C( -813.31), SIMDE_FLOAT32_C( 470.40), SIMDE_FLOAT32_C( -748.73), SIMDE_FLOAT32_C( -795.37), SIMDE_FLOAT32_C( -65.01), SIMDE_FLOAT32_C( 904.80), SIMDE_FLOAT32_C( -706.59), SIMDE_FLOAT32_C( 54.57), SIMDE_FLOAT32_C( -248.19), SIMDE_FLOAT32_C( -352.77), SIMDE_FLOAT32_C( 334.66), SIMDE_FLOAT32_C( 568.34), SIMDE_FLOAT32_C( 976.72), SIMDE_FLOAT32_C( 104.61), SIMDE_FLOAT32_C( -643.78) }, { SIMDE_FLOAT32_C( -112.00), SIMDE_FLOAT32_C( -813.00), SIMDE_FLOAT32_C( 471.00), SIMDE_FLOAT32_C( -748.00), SIMDE_FLOAT32_C( -795.00), SIMDE_FLOAT32_C( -65.00), SIMDE_FLOAT32_C( 905.00), SIMDE_FLOAT32_C( -706.00), SIMDE_FLOAT32_C( 55.00), SIMDE_FLOAT32_C( -248.00), SIMDE_FLOAT32_C( -352.00), SIMDE_FLOAT32_C( 335.00), SIMDE_FLOAT32_C( 569.00), SIMDE_FLOAT32_C( 977.00), SIMDE_FLOAT32_C( 105.00), SIMDE_FLOAT32_C( -643.00) } }, { { SIMDE_FLOAT32_C( -461.46), SIMDE_FLOAT32_C( -491.69), SIMDE_FLOAT32_C( 725.52), SIMDE_FLOAT32_C( 613.87), SIMDE_FLOAT32_C( -593.21), SIMDE_FLOAT32_C( -273.28), SIMDE_FLOAT32_C( -866.30), SIMDE_FLOAT32_C( -43.24), SIMDE_FLOAT32_C( 344.18), SIMDE_FLOAT32_C( 497.93), SIMDE_FLOAT32_C( -547.09), SIMDE_FLOAT32_C( 122.57), SIMDE_FLOAT32_C( -813.14), SIMDE_FLOAT32_C( -890.17), SIMDE_FLOAT32_C( -894.33), SIMDE_FLOAT32_C( 74.15) }, { SIMDE_FLOAT32_C( -461.00), SIMDE_FLOAT32_C( -491.00), SIMDE_FLOAT32_C( 726.00), SIMDE_FLOAT32_C( 614.00), SIMDE_FLOAT32_C( -593.00), SIMDE_FLOAT32_C( -273.00), SIMDE_FLOAT32_C( -866.00), SIMDE_FLOAT32_C( -43.00), SIMDE_FLOAT32_C( 345.00), SIMDE_FLOAT32_C( 498.00), SIMDE_FLOAT32_C( -547.00), SIMDE_FLOAT32_C( 123.00), SIMDE_FLOAT32_C( -813.00), SIMDE_FLOAT32_C( -890.00), SIMDE_FLOAT32_C( -894.00), SIMDE_FLOAT32_C( 75.00) } }, { { SIMDE_FLOAT32_C( -703.48), SIMDE_FLOAT32_C( 576.07), SIMDE_FLOAT32_C( 325.42), SIMDE_FLOAT32_C( -498.84), SIMDE_FLOAT32_C( -488.94), SIMDE_FLOAT32_C( 230.22), SIMDE_FLOAT32_C( -205.43), SIMDE_FLOAT32_C( 565.63), SIMDE_FLOAT32_C( 982.03), SIMDE_FLOAT32_C( 441.80), SIMDE_FLOAT32_C( -99.71), SIMDE_FLOAT32_C( 550.37), SIMDE_FLOAT32_C( 418.51), SIMDE_FLOAT32_C( -995.10), SIMDE_FLOAT32_C( 906.59), SIMDE_FLOAT32_C( 957.05) }, { SIMDE_FLOAT32_C( -703.00), SIMDE_FLOAT32_C( 577.00), SIMDE_FLOAT32_C( 326.00), SIMDE_FLOAT32_C( -498.00), SIMDE_FLOAT32_C( -488.00), SIMDE_FLOAT32_C( 231.00), SIMDE_FLOAT32_C( -205.00), SIMDE_FLOAT32_C( 566.00), SIMDE_FLOAT32_C( 983.00), SIMDE_FLOAT32_C( 442.00), SIMDE_FLOAT32_C( -99.00), SIMDE_FLOAT32_C( 551.00), SIMDE_FLOAT32_C( 419.00), SIMDE_FLOAT32_C( -995.00), SIMDE_FLOAT32_C( 907.00), SIMDE_FLOAT32_C( 958.00) } }, { { SIMDE_FLOAT32_C( -486.79), SIMDE_FLOAT32_C( 632.11), SIMDE_FLOAT32_C( 570.92), SIMDE_FLOAT32_C( -80.00), SIMDE_FLOAT32_C( -641.18), SIMDE_FLOAT32_C( 704.62), SIMDE_FLOAT32_C( 876.76), SIMDE_FLOAT32_C( 703.01), SIMDE_FLOAT32_C( 202.55), SIMDE_FLOAT32_C( -670.32), SIMDE_FLOAT32_C( -174.43), SIMDE_FLOAT32_C( 389.41), SIMDE_FLOAT32_C( -560.49), SIMDE_FLOAT32_C( -68.76), SIMDE_FLOAT32_C( -536.44), SIMDE_FLOAT32_C( -263.97) }, { SIMDE_FLOAT32_C( -486.00), SIMDE_FLOAT32_C( 633.00), SIMDE_FLOAT32_C( 571.00), SIMDE_FLOAT32_C( -80.00), SIMDE_FLOAT32_C( -641.00), SIMDE_FLOAT32_C( 705.00), SIMDE_FLOAT32_C( 877.00), SIMDE_FLOAT32_C( 704.00), SIMDE_FLOAT32_C( 203.00), SIMDE_FLOAT32_C( -670.00), SIMDE_FLOAT32_C( -174.00), SIMDE_FLOAT32_C( 390.00), SIMDE_FLOAT32_C( -560.00), SIMDE_FLOAT32_C( -68.00), SIMDE_FLOAT32_C( -536.00), SIMDE_FLOAT32_C( -263.00) } }, { { SIMDE_FLOAT32_C( -492.69), SIMDE_FLOAT32_C( 788.98), SIMDE_FLOAT32_C( 237.19), SIMDE_FLOAT32_C( 18.37), SIMDE_FLOAT32_C( 19.20), SIMDE_FLOAT32_C( -968.24), SIMDE_FLOAT32_C( -416.00), SIMDE_FLOAT32_C( 1.23), SIMDE_FLOAT32_C( 473.56), SIMDE_FLOAT32_C( 484.29), SIMDE_FLOAT32_C( -448.40), SIMDE_FLOAT32_C( -107.93), SIMDE_FLOAT32_C( 489.18), SIMDE_FLOAT32_C( -541.82), SIMDE_FLOAT32_C( -150.87), SIMDE_FLOAT32_C( -997.61) }, { SIMDE_FLOAT32_C( -492.00), SIMDE_FLOAT32_C( 789.00), SIMDE_FLOAT32_C( 238.00), SIMDE_FLOAT32_C( 19.00), SIMDE_FLOAT32_C( 20.00), SIMDE_FLOAT32_C( -968.00), SIMDE_FLOAT32_C( -416.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( 474.00), SIMDE_FLOAT32_C( 485.00), SIMDE_FLOAT32_C( -448.00), SIMDE_FLOAT32_C( -107.00), SIMDE_FLOAT32_C( 490.00), SIMDE_FLOAT32_C( -541.00), SIMDE_FLOAT32_C( -150.00), SIMDE_FLOAT32_C( -997.00) } }, { { SIMDE_FLOAT32_C( -909.71), SIMDE_FLOAT32_C( -579.96), SIMDE_FLOAT32_C( -77.61), SIMDE_FLOAT32_C( -550.89), SIMDE_FLOAT32_C( -875.34), SIMDE_FLOAT32_C( -200.84), SIMDE_FLOAT32_C( -847.88), SIMDE_FLOAT32_C( 327.21), SIMDE_FLOAT32_C( 128.83), SIMDE_FLOAT32_C( -22.31), SIMDE_FLOAT32_C( -283.37), SIMDE_FLOAT32_C( 568.34), SIMDE_FLOAT32_C( 908.94), SIMDE_FLOAT32_C( 180.19), SIMDE_FLOAT32_C( -695.63), SIMDE_FLOAT32_C( -583.75) }, { SIMDE_FLOAT32_C( -909.00), SIMDE_FLOAT32_C( -579.00), SIMDE_FLOAT32_C( -77.00), SIMDE_FLOAT32_C( -550.00), SIMDE_FLOAT32_C( -875.00), SIMDE_FLOAT32_C( -200.00), SIMDE_FLOAT32_C( -847.00), SIMDE_FLOAT32_C( 328.00), SIMDE_FLOAT32_C( 129.00), SIMDE_FLOAT32_C( -22.00), SIMDE_FLOAT32_C( -283.00), SIMDE_FLOAT32_C( 569.00), SIMDE_FLOAT32_C( 909.00), SIMDE_FLOAT32_C( 181.00), SIMDE_FLOAT32_C( -695.00), SIMDE_FLOAT32_C( -583.00) } }, { { SIMDE_FLOAT32_C( -30.83), SIMDE_FLOAT32_C( 541.56), SIMDE_FLOAT32_C( 434.62), SIMDE_FLOAT32_C( 988.37), SIMDE_FLOAT32_C( 573.33), SIMDE_FLOAT32_C( -981.38), SIMDE_FLOAT32_C( -10.40), SIMDE_FLOAT32_C( 46.89), SIMDE_FLOAT32_C( 502.90), SIMDE_FLOAT32_C( 541.19), SIMDE_FLOAT32_C( 938.96), SIMDE_FLOAT32_C( -7.91), SIMDE_FLOAT32_C( 999.37), SIMDE_FLOAT32_C( -211.91), SIMDE_FLOAT32_C( -5.52), SIMDE_FLOAT32_C( -910.34) }, { SIMDE_FLOAT32_C( -30.00), SIMDE_FLOAT32_C( 542.00), SIMDE_FLOAT32_C( 435.00), SIMDE_FLOAT32_C( 989.00), SIMDE_FLOAT32_C( 574.00), SIMDE_FLOAT32_C( -981.00), SIMDE_FLOAT32_C( -10.00), SIMDE_FLOAT32_C( 47.00), SIMDE_FLOAT32_C( 503.00), SIMDE_FLOAT32_C( 542.00), SIMDE_FLOAT32_C( 939.00), SIMDE_FLOAT32_C( -7.00), SIMDE_FLOAT32_C( 1000.00), SIMDE_FLOAT32_C( -211.00), SIMDE_FLOAT32_C( -5.00), SIMDE_FLOAT32_C( -910.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_ceil_ps(a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_ceil_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[16]; const simde__mmask8 k; const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 264.66), SIMDE_FLOAT32_C( 621.32), SIMDE_FLOAT32_C( -827.32), SIMDE_FLOAT32_C( -154.51), SIMDE_FLOAT32_C( 337.38), SIMDE_FLOAT32_C( 187.19), SIMDE_FLOAT32_C( 659.53), SIMDE_FLOAT32_C( 559.33), SIMDE_FLOAT32_C( 209.98), SIMDE_FLOAT32_C( 625.49), SIMDE_FLOAT32_C( 656.87), SIMDE_FLOAT32_C( -793.87), SIMDE_FLOAT32_C( 746.37), SIMDE_FLOAT32_C( -721.16), SIMDE_FLOAT32_C( 184.21), SIMDE_FLOAT32_C( 251.36) }, UINT8_C(157), { SIMDE_FLOAT32_C( 769.49), SIMDE_FLOAT32_C( -152.19), SIMDE_FLOAT32_C( 746.20), SIMDE_FLOAT32_C( -444.46), SIMDE_FLOAT32_C( -336.10), SIMDE_FLOAT32_C( -772.83), SIMDE_FLOAT32_C( 887.52), SIMDE_FLOAT32_C( 966.03), SIMDE_FLOAT32_C( 490.22), SIMDE_FLOAT32_C( -510.29), SIMDE_FLOAT32_C( -30.50), SIMDE_FLOAT32_C( 1.38), SIMDE_FLOAT32_C( -217.82), SIMDE_FLOAT32_C( 12.97), SIMDE_FLOAT32_C( -733.96), SIMDE_FLOAT32_C( -596.50) }, { SIMDE_FLOAT32_C( 770.00), SIMDE_FLOAT32_C( 621.32), SIMDE_FLOAT32_C( 747.00), SIMDE_FLOAT32_C( -444.00), SIMDE_FLOAT32_C( -336.00), SIMDE_FLOAT32_C( 187.19), SIMDE_FLOAT32_C( 659.53), SIMDE_FLOAT32_C( 967.00), SIMDE_FLOAT32_C( 209.98), SIMDE_FLOAT32_C( 625.49), SIMDE_FLOAT32_C( 656.87), SIMDE_FLOAT32_C( -793.87), SIMDE_FLOAT32_C( 746.37), SIMDE_FLOAT32_C( -721.16), SIMDE_FLOAT32_C( 184.21), SIMDE_FLOAT32_C( 251.36) } }, { { SIMDE_FLOAT32_C( 185.65), SIMDE_FLOAT32_C( 111.53), SIMDE_FLOAT32_C( 740.88), SIMDE_FLOAT32_C( -627.16), SIMDE_FLOAT32_C( -228.94), SIMDE_FLOAT32_C( 300.20), SIMDE_FLOAT32_C( 582.82), SIMDE_FLOAT32_C( -603.45), SIMDE_FLOAT32_C( -42.93), SIMDE_FLOAT32_C( 788.96), SIMDE_FLOAT32_C( -857.08), SIMDE_FLOAT32_C( 235.91), SIMDE_FLOAT32_C( -26.83), SIMDE_FLOAT32_C( 394.28), SIMDE_FLOAT32_C( 795.93), SIMDE_FLOAT32_C( -257.35) }, UINT8_C(195), { SIMDE_FLOAT32_C( 542.13), SIMDE_FLOAT32_C( 298.19), SIMDE_FLOAT32_C( -94.01), SIMDE_FLOAT32_C( 769.30), SIMDE_FLOAT32_C( 185.71), SIMDE_FLOAT32_C( -127.98), SIMDE_FLOAT32_C( 259.52), SIMDE_FLOAT32_C( 675.42), SIMDE_FLOAT32_C( 841.52), SIMDE_FLOAT32_C( -739.10), SIMDE_FLOAT32_C( -542.40), SIMDE_FLOAT32_C( -145.50), SIMDE_FLOAT32_C( -473.06), SIMDE_FLOAT32_C( -138.90), SIMDE_FLOAT32_C( -959.85), SIMDE_FLOAT32_C( 638.47) }, { SIMDE_FLOAT32_C( 543.00), SIMDE_FLOAT32_C( 299.00), SIMDE_FLOAT32_C( 740.88), SIMDE_FLOAT32_C( -627.16), SIMDE_FLOAT32_C( -228.94), SIMDE_FLOAT32_C( 300.20), SIMDE_FLOAT32_C( 260.00), SIMDE_FLOAT32_C( 676.00), SIMDE_FLOAT32_C( -42.93), SIMDE_FLOAT32_C( 788.96), SIMDE_FLOAT32_C( -857.08), SIMDE_FLOAT32_C( 235.91), SIMDE_FLOAT32_C( -26.83), SIMDE_FLOAT32_C( 394.28), SIMDE_FLOAT32_C( 795.93), SIMDE_FLOAT32_C( -257.35) } }, { { SIMDE_FLOAT32_C( -398.03), SIMDE_FLOAT32_C( -587.00), SIMDE_FLOAT32_C( -590.48), SIMDE_FLOAT32_C( 902.17), SIMDE_FLOAT32_C( 995.82), SIMDE_FLOAT32_C( -193.93), SIMDE_FLOAT32_C( -140.76), SIMDE_FLOAT32_C( 784.78), SIMDE_FLOAT32_C( -51.01), SIMDE_FLOAT32_C( -904.84), SIMDE_FLOAT32_C( -242.06), SIMDE_FLOAT32_C( -656.73), SIMDE_FLOAT32_C( 891.09), SIMDE_FLOAT32_C( 500.60), SIMDE_FLOAT32_C( -414.64), SIMDE_FLOAT32_C( 433.21) }, UINT8_C(211), { SIMDE_FLOAT32_C( 491.34), SIMDE_FLOAT32_C( 202.51), SIMDE_FLOAT32_C( 984.50), SIMDE_FLOAT32_C( -636.64), SIMDE_FLOAT32_C( -537.96), SIMDE_FLOAT32_C( 659.92), SIMDE_FLOAT32_C( -795.12), SIMDE_FLOAT32_C( -277.06), SIMDE_FLOAT32_C( -882.48), SIMDE_FLOAT32_C( 59.38), SIMDE_FLOAT32_C( 249.88), SIMDE_FLOAT32_C( -21.39), SIMDE_FLOAT32_C( 99.53), SIMDE_FLOAT32_C( -111.65), SIMDE_FLOAT32_C( 580.58), SIMDE_FLOAT32_C( 512.52) }, { SIMDE_FLOAT32_C( 492.00), SIMDE_FLOAT32_C( 203.00), SIMDE_FLOAT32_C( -590.48), SIMDE_FLOAT32_C( 902.17), SIMDE_FLOAT32_C( -537.00), SIMDE_FLOAT32_C( -193.93), SIMDE_FLOAT32_C( -795.00), SIMDE_FLOAT32_C( -277.00), SIMDE_FLOAT32_C( -51.01), SIMDE_FLOAT32_C( -904.84), SIMDE_FLOAT32_C( -242.06), SIMDE_FLOAT32_C( -656.73), SIMDE_FLOAT32_C( 891.09), SIMDE_FLOAT32_C( 500.60), SIMDE_FLOAT32_C( -414.64), SIMDE_FLOAT32_C( 433.21) } }, { { SIMDE_FLOAT32_C( 297.87), SIMDE_FLOAT32_C( 482.76), SIMDE_FLOAT32_C( 508.34), SIMDE_FLOAT32_C( -896.06), SIMDE_FLOAT32_C( -658.00), SIMDE_FLOAT32_C( 293.12), SIMDE_FLOAT32_C( 52.94), SIMDE_FLOAT32_C( -562.84), SIMDE_FLOAT32_C( -948.94), SIMDE_FLOAT32_C( 396.21), SIMDE_FLOAT32_C( -671.75), SIMDE_FLOAT32_C( 551.66), SIMDE_FLOAT32_C( 981.56), SIMDE_FLOAT32_C( 761.46), SIMDE_FLOAT32_C( -649.56), SIMDE_FLOAT32_C( 472.90) }, UINT8_C(186), { SIMDE_FLOAT32_C( -665.06), SIMDE_FLOAT32_C( 836.26), SIMDE_FLOAT32_C( 426.01), SIMDE_FLOAT32_C( 994.86), SIMDE_FLOAT32_C( -958.85), SIMDE_FLOAT32_C( -851.05), SIMDE_FLOAT32_C( -887.63), SIMDE_FLOAT32_C( 100.52), SIMDE_FLOAT32_C( 398.83), SIMDE_FLOAT32_C( 90.99), SIMDE_FLOAT32_C( -799.95), SIMDE_FLOAT32_C( -712.82), SIMDE_FLOAT32_C( -328.43), SIMDE_FLOAT32_C( 712.57), SIMDE_FLOAT32_C( 585.05), SIMDE_FLOAT32_C( -845.67) }, { SIMDE_FLOAT32_C( 297.87), SIMDE_FLOAT32_C( 837.00), SIMDE_FLOAT32_C( 508.34), SIMDE_FLOAT32_C( 995.00), SIMDE_FLOAT32_C( -958.00), SIMDE_FLOAT32_C( -851.00), SIMDE_FLOAT32_C( 52.94), SIMDE_FLOAT32_C( 101.00), SIMDE_FLOAT32_C( -948.94), SIMDE_FLOAT32_C( 396.21), SIMDE_FLOAT32_C( -671.75), SIMDE_FLOAT32_C( 551.66), SIMDE_FLOAT32_C( 981.56), SIMDE_FLOAT32_C( 761.46), SIMDE_FLOAT32_C( -649.56), SIMDE_FLOAT32_C( 472.90) } }, { { SIMDE_FLOAT32_C( 220.91), SIMDE_FLOAT32_C( 688.99), SIMDE_FLOAT32_C( -503.67), SIMDE_FLOAT32_C( -485.97), SIMDE_FLOAT32_C( -258.07), SIMDE_FLOAT32_C( -66.51), SIMDE_FLOAT32_C( -434.91), SIMDE_FLOAT32_C( -861.87), SIMDE_FLOAT32_C( 261.74), SIMDE_FLOAT32_C( -883.26), SIMDE_FLOAT32_C( -880.31), SIMDE_FLOAT32_C( 23.19), SIMDE_FLOAT32_C( -532.81), SIMDE_FLOAT32_C( 592.60), SIMDE_FLOAT32_C( 987.17), SIMDE_FLOAT32_C( -197.87) }, UINT8_C(171), { SIMDE_FLOAT32_C( 413.18), SIMDE_FLOAT32_C( -203.02), SIMDE_FLOAT32_C( 470.01), SIMDE_FLOAT32_C( 562.13), SIMDE_FLOAT32_C( -90.64), SIMDE_FLOAT32_C( -429.47), SIMDE_FLOAT32_C( -39.04), SIMDE_FLOAT32_C( -999.66), SIMDE_FLOAT32_C( -229.42), SIMDE_FLOAT32_C( 248.13), SIMDE_FLOAT32_C( -328.09), SIMDE_FLOAT32_C( -516.85), SIMDE_FLOAT32_C( -166.82), SIMDE_FLOAT32_C( -173.76), SIMDE_FLOAT32_C( 704.07), SIMDE_FLOAT32_C( -477.83) }, { SIMDE_FLOAT32_C( 414.00), SIMDE_FLOAT32_C( -203.00), SIMDE_FLOAT32_C( -503.67), SIMDE_FLOAT32_C( 563.00), SIMDE_FLOAT32_C( -258.07), SIMDE_FLOAT32_C( -429.00), SIMDE_FLOAT32_C( -434.91), SIMDE_FLOAT32_C( -999.00), SIMDE_FLOAT32_C( 261.74), SIMDE_FLOAT32_C( -883.26), SIMDE_FLOAT32_C( -880.31), SIMDE_FLOAT32_C( 23.19), SIMDE_FLOAT32_C( -532.81), SIMDE_FLOAT32_C( 592.60), SIMDE_FLOAT32_C( 987.17), SIMDE_FLOAT32_C( -197.87) } }, { { SIMDE_FLOAT32_C( 322.58), SIMDE_FLOAT32_C( -781.90), SIMDE_FLOAT32_C( 264.10), SIMDE_FLOAT32_C( -743.93), SIMDE_FLOAT32_C( -216.81), SIMDE_FLOAT32_C( 402.23), SIMDE_FLOAT32_C( 517.80), SIMDE_FLOAT32_C( -100.07), SIMDE_FLOAT32_C( 521.92), SIMDE_FLOAT32_C( -459.00), SIMDE_FLOAT32_C( 367.12), SIMDE_FLOAT32_C( 114.52), SIMDE_FLOAT32_C( -471.84), SIMDE_FLOAT32_C( -830.76), SIMDE_FLOAT32_C( -456.62), SIMDE_FLOAT32_C( 941.34) }, UINT8_C( 19), { SIMDE_FLOAT32_C( -986.61), SIMDE_FLOAT32_C( 503.47), SIMDE_FLOAT32_C( 875.58), SIMDE_FLOAT32_C( -416.08), SIMDE_FLOAT32_C( -535.57), SIMDE_FLOAT32_C( 875.92), SIMDE_FLOAT32_C( 354.51), SIMDE_FLOAT32_C( 712.56), SIMDE_FLOAT32_C( -452.16), SIMDE_FLOAT32_C( 837.66), SIMDE_FLOAT32_C( -454.26), SIMDE_FLOAT32_C( 374.08), SIMDE_FLOAT32_C( 541.73), SIMDE_FLOAT32_C( 67.91), SIMDE_FLOAT32_C( -303.34), SIMDE_FLOAT32_C( 759.83) }, { SIMDE_FLOAT32_C( -986.00), SIMDE_FLOAT32_C( 504.00), SIMDE_FLOAT32_C( 264.10), SIMDE_FLOAT32_C( -743.93), SIMDE_FLOAT32_C( -535.00), SIMDE_FLOAT32_C( 402.23), SIMDE_FLOAT32_C( 517.80), SIMDE_FLOAT32_C( -100.07), SIMDE_FLOAT32_C( 521.92), SIMDE_FLOAT32_C( -459.00), SIMDE_FLOAT32_C( 367.12), SIMDE_FLOAT32_C( 114.52), SIMDE_FLOAT32_C( -471.84), SIMDE_FLOAT32_C( -830.76), SIMDE_FLOAT32_C( -456.62), SIMDE_FLOAT32_C( 941.34) } }, { { SIMDE_FLOAT32_C( -668.00), SIMDE_FLOAT32_C( -47.28), SIMDE_FLOAT32_C( -456.99), SIMDE_FLOAT32_C( 734.23), SIMDE_FLOAT32_C( -529.48), SIMDE_FLOAT32_C( 442.94), SIMDE_FLOAT32_C( 256.15), SIMDE_FLOAT32_C( 11.52), SIMDE_FLOAT32_C( -189.94), SIMDE_FLOAT32_C( -629.33), SIMDE_FLOAT32_C( 539.68), SIMDE_FLOAT32_C( -20.70), SIMDE_FLOAT32_C( -85.95), SIMDE_FLOAT32_C( 481.02), SIMDE_FLOAT32_C( 945.52), SIMDE_FLOAT32_C( -72.56) }, UINT8_C(158), { SIMDE_FLOAT32_C( 821.10), SIMDE_FLOAT32_C( 511.37), SIMDE_FLOAT32_C( 448.92), SIMDE_FLOAT32_C( 697.03), SIMDE_FLOAT32_C( -134.12), SIMDE_FLOAT32_C( 161.48), SIMDE_FLOAT32_C( -755.14), SIMDE_FLOAT32_C( -296.46), SIMDE_FLOAT32_C( 707.22), SIMDE_FLOAT32_C( 618.95), SIMDE_FLOAT32_C( -754.73), SIMDE_FLOAT32_C( -224.87), SIMDE_FLOAT32_C( -684.40), SIMDE_FLOAT32_C( -994.91), SIMDE_FLOAT32_C( 107.14), SIMDE_FLOAT32_C( 268.32) }, { SIMDE_FLOAT32_C( -668.00), SIMDE_FLOAT32_C( 512.00), SIMDE_FLOAT32_C( 449.00), SIMDE_FLOAT32_C( 698.00), SIMDE_FLOAT32_C( -134.00), SIMDE_FLOAT32_C( 442.94), SIMDE_FLOAT32_C( 256.15), SIMDE_FLOAT32_C( -296.00), SIMDE_FLOAT32_C( -189.94), SIMDE_FLOAT32_C( -629.33), SIMDE_FLOAT32_C( 539.68), SIMDE_FLOAT32_C( -20.70), SIMDE_FLOAT32_C( -85.95), SIMDE_FLOAT32_C( 481.02), SIMDE_FLOAT32_C( 945.52), SIMDE_FLOAT32_C( -72.56) } }, { { SIMDE_FLOAT32_C( -451.89), SIMDE_FLOAT32_C( -158.63), SIMDE_FLOAT32_C( 738.85), SIMDE_FLOAT32_C( 991.05), SIMDE_FLOAT32_C( -902.48), SIMDE_FLOAT32_C( -249.63), SIMDE_FLOAT32_C( -198.89), SIMDE_FLOAT32_C( -531.81), SIMDE_FLOAT32_C( -709.95), SIMDE_FLOAT32_C( 780.40), SIMDE_FLOAT32_C( 382.24), SIMDE_FLOAT32_C( 771.07), SIMDE_FLOAT32_C( 725.93), SIMDE_FLOAT32_C( -690.31), SIMDE_FLOAT32_C( -244.43), SIMDE_FLOAT32_C( 547.03) }, UINT8_C(207), { SIMDE_FLOAT32_C( -795.51), SIMDE_FLOAT32_C( 244.06), SIMDE_FLOAT32_C( -313.07), SIMDE_FLOAT32_C( 365.97), SIMDE_FLOAT32_C( 488.92), SIMDE_FLOAT32_C( 390.47), SIMDE_FLOAT32_C( 73.20), SIMDE_FLOAT32_C( 107.87), SIMDE_FLOAT32_C( 635.73), SIMDE_FLOAT32_C( 848.33), SIMDE_FLOAT32_C( 423.47), SIMDE_FLOAT32_C( 640.83), SIMDE_FLOAT32_C( -44.53), SIMDE_FLOAT32_C( -308.21), SIMDE_FLOAT32_C( -811.07), SIMDE_FLOAT32_C( 796.84) }, { SIMDE_FLOAT32_C( -795.00), SIMDE_FLOAT32_C( 245.00), SIMDE_FLOAT32_C( -313.00), SIMDE_FLOAT32_C( 366.00), SIMDE_FLOAT32_C( -902.48), SIMDE_FLOAT32_C( -249.63), SIMDE_FLOAT32_C( 74.00), SIMDE_FLOAT32_C( 108.00), SIMDE_FLOAT32_C( -709.95), SIMDE_FLOAT32_C( 780.40), SIMDE_FLOAT32_C( 382.24), SIMDE_FLOAT32_C( 771.07), SIMDE_FLOAT32_C( 725.93), SIMDE_FLOAT32_C( -690.31), SIMDE_FLOAT32_C( -244.43), SIMDE_FLOAT32_C( 547.03) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_mask_ceil_ps(src, test_vec[i].k, a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_ceil_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 568.62), SIMDE_FLOAT64_C( 832.90), SIMDE_FLOAT64_C( 451.04), SIMDE_FLOAT64_C( 205.98), SIMDE_FLOAT64_C( 456.63), SIMDE_FLOAT64_C( 924.23), SIMDE_FLOAT64_C( -658.88), SIMDE_FLOAT64_C( -472.23) }, { SIMDE_FLOAT64_C( 569.00), SIMDE_FLOAT64_C( 833.00), SIMDE_FLOAT64_C( 452.00), SIMDE_FLOAT64_C( 206.00), SIMDE_FLOAT64_C( 457.00), SIMDE_FLOAT64_C( 925.00), SIMDE_FLOAT64_C( -658.00), SIMDE_FLOAT64_C( -472.00) } }, { { SIMDE_FLOAT64_C( -579.06), SIMDE_FLOAT64_C( 724.10), SIMDE_FLOAT64_C( -922.32), SIMDE_FLOAT64_C( 603.12), SIMDE_FLOAT64_C( -550.68), SIMDE_FLOAT64_C( -479.10), SIMDE_FLOAT64_C( -837.50), SIMDE_FLOAT64_C( 925.16) }, { SIMDE_FLOAT64_C( -579.00), SIMDE_FLOAT64_C( 725.00), SIMDE_FLOAT64_C( -922.00), SIMDE_FLOAT64_C( 604.00), SIMDE_FLOAT64_C( -550.00), SIMDE_FLOAT64_C( -479.00), SIMDE_FLOAT64_C( -837.00), SIMDE_FLOAT64_C( 926.00) } }, { { SIMDE_FLOAT64_C( -415.08), SIMDE_FLOAT64_C( 718.97), SIMDE_FLOAT64_C( -850.54), SIMDE_FLOAT64_C( 464.10), SIMDE_FLOAT64_C( 558.79), SIMDE_FLOAT64_C( 424.83), SIMDE_FLOAT64_C( -281.91), SIMDE_FLOAT64_C( 440.87) }, { SIMDE_FLOAT64_C( -415.00), SIMDE_FLOAT64_C( 719.00), SIMDE_FLOAT64_C( -850.00), SIMDE_FLOAT64_C( 465.00), SIMDE_FLOAT64_C( 559.00), SIMDE_FLOAT64_C( 425.00), SIMDE_FLOAT64_C( -281.00), SIMDE_FLOAT64_C( 441.00) } }, { { SIMDE_FLOAT64_C( 834.86), SIMDE_FLOAT64_C( -787.94), SIMDE_FLOAT64_C( 560.68), SIMDE_FLOAT64_C( -896.06), SIMDE_FLOAT64_C( -74.24), SIMDE_FLOAT64_C( 400.53), SIMDE_FLOAT64_C( -101.01), SIMDE_FLOAT64_C( -505.62) }, { SIMDE_FLOAT64_C( 835.00), SIMDE_FLOAT64_C( -787.00), SIMDE_FLOAT64_C( 561.00), SIMDE_FLOAT64_C( -896.00), SIMDE_FLOAT64_C( -74.00), SIMDE_FLOAT64_C( 401.00), SIMDE_FLOAT64_C( -101.00), SIMDE_FLOAT64_C( -505.00) } }, { { SIMDE_FLOAT64_C( 233.43), SIMDE_FLOAT64_C( -649.98), SIMDE_FLOAT64_C( 700.36), SIMDE_FLOAT64_C( -309.94), SIMDE_FLOAT64_C( -725.75), SIMDE_FLOAT64_C( -958.52), SIMDE_FLOAT64_C( 217.83), SIMDE_FLOAT64_C( -304.81) }, { SIMDE_FLOAT64_C( 234.00), SIMDE_FLOAT64_C( -649.00), SIMDE_FLOAT64_C( 701.00), SIMDE_FLOAT64_C( -309.00), SIMDE_FLOAT64_C( -725.00), SIMDE_FLOAT64_C( -958.00), SIMDE_FLOAT64_C( 218.00), SIMDE_FLOAT64_C( -304.00) } }, { { SIMDE_FLOAT64_C( 765.58), SIMDE_FLOAT64_C( 295.51), SIMDE_FLOAT64_C( -701.69), SIMDE_FLOAT64_C( -785.11), SIMDE_FLOAT64_C( 816.41), SIMDE_FLOAT64_C( -539.19), SIMDE_FLOAT64_C( -859.95), SIMDE_FLOAT64_C( -598.68) }, { SIMDE_FLOAT64_C( 766.00), SIMDE_FLOAT64_C( 296.00), SIMDE_FLOAT64_C( -701.00), SIMDE_FLOAT64_C( -785.00), SIMDE_FLOAT64_C( 817.00), SIMDE_FLOAT64_C( -539.00), SIMDE_FLOAT64_C( -859.00), SIMDE_FLOAT64_C( -598.00) } }, { { SIMDE_FLOAT64_C( -820.22), SIMDE_FLOAT64_C( -710.49), SIMDE_FLOAT64_C( 865.42), SIMDE_FLOAT64_C( 738.57), SIMDE_FLOAT64_C( 714.34), SIMDE_FLOAT64_C( -416.48), SIMDE_FLOAT64_C( 179.44), SIMDE_FLOAT64_C( 549.20) }, { SIMDE_FLOAT64_C( -820.00), SIMDE_FLOAT64_C( -710.00), SIMDE_FLOAT64_C( 866.00), SIMDE_FLOAT64_C( 739.00), SIMDE_FLOAT64_C( 715.00), SIMDE_FLOAT64_C( -416.00), SIMDE_FLOAT64_C( 180.00), SIMDE_FLOAT64_C( 550.00) } }, { { SIMDE_FLOAT64_C( -204.42), SIMDE_FLOAT64_C( -259.88), SIMDE_FLOAT64_C( 653.14), SIMDE_FLOAT64_C( 721.34), SIMDE_FLOAT64_C( -859.35), SIMDE_FLOAT64_C( -447.87), SIMDE_FLOAT64_C( -784.28), SIMDE_FLOAT64_C( 374.08) }, { SIMDE_FLOAT64_C( -204.00), SIMDE_FLOAT64_C( -259.00), SIMDE_FLOAT64_C( 654.00), SIMDE_FLOAT64_C( 722.00), SIMDE_FLOAT64_C( -859.00), SIMDE_FLOAT64_C( -447.00), SIMDE_FLOAT64_C( -784.00), SIMDE_FLOAT64_C( 375.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_ceil_pd(a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_ceil_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 229.81), SIMDE_FLOAT64_C( 525.51), SIMDE_FLOAT64_C( -6.46), SIMDE_FLOAT64_C( -218.89), SIMDE_FLOAT64_C( -607.98), SIMDE_FLOAT64_C( -552.09), SIMDE_FLOAT64_C( 531.98), SIMDE_FLOAT64_C( 900.69) }, UINT8_C(198), { SIMDE_FLOAT64_C( -545.02), SIMDE_FLOAT64_C( 596.71), SIMDE_FLOAT64_C( 311.19), SIMDE_FLOAT64_C( -696.35), SIMDE_FLOAT64_C( -125.03), SIMDE_FLOAT64_C( -375.13), SIMDE_FLOAT64_C( 455.71), SIMDE_FLOAT64_C( 769.17) }, { SIMDE_FLOAT64_C( 229.81), SIMDE_FLOAT64_C( 597.00), SIMDE_FLOAT64_C( 312.00), SIMDE_FLOAT64_C( -218.89), SIMDE_FLOAT64_C( -607.98), SIMDE_FLOAT64_C( -552.09), SIMDE_FLOAT64_C( 456.00), SIMDE_FLOAT64_C( 770.00) } }, { { SIMDE_FLOAT64_C( 871.47), SIMDE_FLOAT64_C( -774.90), SIMDE_FLOAT64_C( 592.74), SIMDE_FLOAT64_C( -416.66), SIMDE_FLOAT64_C( -243.97), SIMDE_FLOAT64_C( 106.58), SIMDE_FLOAT64_C( -923.77), SIMDE_FLOAT64_C( -472.30) }, UINT8_C(119), { SIMDE_FLOAT64_C( -407.44), SIMDE_FLOAT64_C( -264.38), SIMDE_FLOAT64_C( 828.67), SIMDE_FLOAT64_C( -804.49), SIMDE_FLOAT64_C( 95.85), SIMDE_FLOAT64_C( 58.48), SIMDE_FLOAT64_C( 721.02), SIMDE_FLOAT64_C( -910.62) }, { SIMDE_FLOAT64_C( -407.00), SIMDE_FLOAT64_C( -264.00), SIMDE_FLOAT64_C( 829.00), SIMDE_FLOAT64_C( -416.66), SIMDE_FLOAT64_C( 96.00), SIMDE_FLOAT64_C( 59.00), SIMDE_FLOAT64_C( 722.00), SIMDE_FLOAT64_C( -472.30) } }, { { SIMDE_FLOAT64_C( 839.59), SIMDE_FLOAT64_C( -886.96), SIMDE_FLOAT64_C( -462.70), SIMDE_FLOAT64_C( 371.56), SIMDE_FLOAT64_C( -986.28), SIMDE_FLOAT64_C( 467.93), SIMDE_FLOAT64_C( 826.54), SIMDE_FLOAT64_C( 610.43) }, UINT8_C( 11), { SIMDE_FLOAT64_C( -869.81), SIMDE_FLOAT64_C( -514.60), SIMDE_FLOAT64_C( 404.00), SIMDE_FLOAT64_C( 585.90), SIMDE_FLOAT64_C( -745.43), SIMDE_FLOAT64_C( 275.47), SIMDE_FLOAT64_C( 811.00), SIMDE_FLOAT64_C( 847.30) }, { SIMDE_FLOAT64_C( -869.00), SIMDE_FLOAT64_C( -514.00), SIMDE_FLOAT64_C( -462.70), SIMDE_FLOAT64_C( 586.00), SIMDE_FLOAT64_C( -986.28), SIMDE_FLOAT64_C( 467.93), SIMDE_FLOAT64_C( 826.54), SIMDE_FLOAT64_C( 610.43) } }, { { SIMDE_FLOAT64_C( 858.82), SIMDE_FLOAT64_C( -432.97), SIMDE_FLOAT64_C( -46.12), SIMDE_FLOAT64_C( 935.05), SIMDE_FLOAT64_C( 94.73), SIMDE_FLOAT64_C( -233.07), SIMDE_FLOAT64_C( -472.39), SIMDE_FLOAT64_C( 830.35) }, UINT8_C( 12), { SIMDE_FLOAT64_C( -276.88), SIMDE_FLOAT64_C( -73.80), SIMDE_FLOAT64_C( 654.07), SIMDE_FLOAT64_C( -555.86), SIMDE_FLOAT64_C( 15.59), SIMDE_FLOAT64_C( 493.66), SIMDE_FLOAT64_C( -442.83), SIMDE_FLOAT64_C( 552.88) }, { SIMDE_FLOAT64_C( 858.82), SIMDE_FLOAT64_C( -432.97), SIMDE_FLOAT64_C( 655.00), SIMDE_FLOAT64_C( -555.00), SIMDE_FLOAT64_C( 94.73), SIMDE_FLOAT64_C( -233.07), SIMDE_FLOAT64_C( -472.39), SIMDE_FLOAT64_C( 830.35) } }, { { SIMDE_FLOAT64_C( -134.77), SIMDE_FLOAT64_C( -429.10), SIMDE_FLOAT64_C( 20.82), SIMDE_FLOAT64_C( -308.24), SIMDE_FLOAT64_C( -818.67), SIMDE_FLOAT64_C( 799.94), SIMDE_FLOAT64_C( -178.05), SIMDE_FLOAT64_C( -333.27) }, UINT8_C(157), { SIMDE_FLOAT64_C( -592.15), SIMDE_FLOAT64_C( -78.71), SIMDE_FLOAT64_C( -520.59), SIMDE_FLOAT64_C( -781.15), SIMDE_FLOAT64_C( -231.40), SIMDE_FLOAT64_C( -661.77), SIMDE_FLOAT64_C( -214.12), SIMDE_FLOAT64_C( 722.48) }, { SIMDE_FLOAT64_C( -592.00), SIMDE_FLOAT64_C( -429.10), SIMDE_FLOAT64_C( -520.00), SIMDE_FLOAT64_C( -781.00), SIMDE_FLOAT64_C( -231.00), SIMDE_FLOAT64_C( 799.94), SIMDE_FLOAT64_C( -178.05), SIMDE_FLOAT64_C( 723.00) } }, { { SIMDE_FLOAT64_C( -726.72), SIMDE_FLOAT64_C( 880.61), SIMDE_FLOAT64_C( -510.59), SIMDE_FLOAT64_C( -199.11), SIMDE_FLOAT64_C( 710.96), SIMDE_FLOAT64_C( 85.00), SIMDE_FLOAT64_C( 524.01), SIMDE_FLOAT64_C( -362.83) }, UINT8_C(189), { SIMDE_FLOAT64_C( 968.14), SIMDE_FLOAT64_C( 652.75), SIMDE_FLOAT64_C( -767.26), SIMDE_FLOAT64_C( -474.68), SIMDE_FLOAT64_C( 205.64), SIMDE_FLOAT64_C( 97.96), SIMDE_FLOAT64_C( 96.22), SIMDE_FLOAT64_C( -773.55) }, { SIMDE_FLOAT64_C( 969.00), SIMDE_FLOAT64_C( 880.61), SIMDE_FLOAT64_C( -767.00), SIMDE_FLOAT64_C( -474.00), SIMDE_FLOAT64_C( 206.00), SIMDE_FLOAT64_C( 98.00), SIMDE_FLOAT64_C( 524.01), SIMDE_FLOAT64_C( -773.00) } }, { { SIMDE_FLOAT64_C( 789.73), SIMDE_FLOAT64_C( 277.54), SIMDE_FLOAT64_C( -973.60), SIMDE_FLOAT64_C( -388.32), SIMDE_FLOAT64_C( 944.27), SIMDE_FLOAT64_C( 230.34), SIMDE_FLOAT64_C( 19.53), SIMDE_FLOAT64_C( -134.44) }, UINT8_C( 15), { SIMDE_FLOAT64_C( 238.38), SIMDE_FLOAT64_C( 634.16), SIMDE_FLOAT64_C( -952.02), SIMDE_FLOAT64_C( -975.74), SIMDE_FLOAT64_C( 356.64), SIMDE_FLOAT64_C( -678.74), SIMDE_FLOAT64_C( 904.87), SIMDE_FLOAT64_C( 846.05) }, { SIMDE_FLOAT64_C( 239.00), SIMDE_FLOAT64_C( 635.00), SIMDE_FLOAT64_C( -952.00), SIMDE_FLOAT64_C( -975.00), SIMDE_FLOAT64_C( 944.27), SIMDE_FLOAT64_C( 230.34), SIMDE_FLOAT64_C( 19.53), SIMDE_FLOAT64_C( -134.44) } }, { { SIMDE_FLOAT64_C( 122.14), SIMDE_FLOAT64_C( 615.84), SIMDE_FLOAT64_C( -68.95), SIMDE_FLOAT64_C( -353.85), SIMDE_FLOAT64_C( -747.00), SIMDE_FLOAT64_C( 670.13), SIMDE_FLOAT64_C( -385.71), SIMDE_FLOAT64_C( 905.76) }, UINT8_C( 69), { SIMDE_FLOAT64_C( 139.61), SIMDE_FLOAT64_C( 111.39), SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( -764.17), SIMDE_FLOAT64_C( 337.85), SIMDE_FLOAT64_C( -209.44), SIMDE_FLOAT64_C( 513.37), SIMDE_FLOAT64_C( 364.24) }, { SIMDE_FLOAT64_C( 140.00), SIMDE_FLOAT64_C( 615.84), SIMDE_FLOAT64_C( 1.00), SIMDE_FLOAT64_C( -353.85), SIMDE_FLOAT64_C( -747.00), SIMDE_FLOAT64_C( 670.13), SIMDE_FLOAT64_C( 514.00), SIMDE_FLOAT64_C( 905.76) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_mask_ceil_pd(src, test_vec[i].k, a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_svml_sqrt_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 528.60), SIMDE_FLOAT32_C( 322.33), SIMDE_FLOAT32_C( 385.23), SIMDE_FLOAT32_C( 814.87) }, { SIMDE_FLOAT32_C( 22.99), SIMDE_FLOAT32_C( 17.95), SIMDE_FLOAT32_C( 19.63), SIMDE_FLOAT32_C( 28.55) } }, { { SIMDE_FLOAT32_C( 587.72), SIMDE_FLOAT32_C( 685.82), SIMDE_FLOAT32_C( 593.20), SIMDE_FLOAT32_C( 733.30) }, { SIMDE_FLOAT32_C( 24.24), SIMDE_FLOAT32_C( 26.19), SIMDE_FLOAT32_C( 24.36), SIMDE_FLOAT32_C( 27.08) } }, { { SIMDE_FLOAT32_C( 325.19), SIMDE_FLOAT32_C( 348.73), SIMDE_FLOAT32_C( 342.79), SIMDE_FLOAT32_C( 565.69) }, { SIMDE_FLOAT32_C( 18.03), SIMDE_FLOAT32_C( 18.67), SIMDE_FLOAT32_C( 18.51), SIMDE_FLOAT32_C( 23.78) } }, { { SIMDE_FLOAT32_C( 148.43), SIMDE_FLOAT32_C( 85.30), SIMDE_FLOAT32_C( 679.23), SIMDE_FLOAT32_C( 235.95) }, { SIMDE_FLOAT32_C( 12.18), SIMDE_FLOAT32_C( 9.24), SIMDE_FLOAT32_C( 26.06), SIMDE_FLOAT32_C( 15.36) } }, { { SIMDE_FLOAT32_C( 741.81), SIMDE_FLOAT32_C( 327.17), SIMDE_FLOAT32_C( 932.33), SIMDE_FLOAT32_C( 431.37) }, { SIMDE_FLOAT32_C( 27.24), SIMDE_FLOAT32_C( 18.09), SIMDE_FLOAT32_C( 30.53), SIMDE_FLOAT32_C( 20.77) } }, { { SIMDE_FLOAT32_C( 630.74), SIMDE_FLOAT32_C( 622.98), SIMDE_FLOAT32_C( 345.17), SIMDE_FLOAT32_C( 666.65) }, { SIMDE_FLOAT32_C( 25.11), SIMDE_FLOAT32_C( 24.96), SIMDE_FLOAT32_C( 18.58), SIMDE_FLOAT32_C( 25.82) } }, { { SIMDE_FLOAT32_C( 95.65), SIMDE_FLOAT32_C( 585.30), SIMDE_FLOAT32_C( 996.40), SIMDE_FLOAT32_C( 212.96) }, { SIMDE_FLOAT32_C( 9.78), SIMDE_FLOAT32_C( 24.19), SIMDE_FLOAT32_C( 31.57), SIMDE_FLOAT32_C( 14.59) } }, { { SIMDE_FLOAT32_C( 691.00), SIMDE_FLOAT32_C( 383.56), SIMDE_FLOAT32_C( 356.19), SIMDE_FLOAT32_C( 219.60) }, { SIMDE_FLOAT32_C( 26.29), SIMDE_FLOAT32_C( 19.58), SIMDE_FLOAT32_C( 18.87), SIMDE_FLOAT32_C( 14.82) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_svml_sqrt_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_svml_floor_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -368.97), SIMDE_FLOAT32_C( -986.85), SIMDE_FLOAT32_C( 853.49), SIMDE_FLOAT32_C( 45.17) }, { SIMDE_FLOAT32_C( -369.00), SIMDE_FLOAT32_C( -987.00), SIMDE_FLOAT32_C( 853.00), SIMDE_FLOAT32_C( 45.00) } }, { { SIMDE_FLOAT32_C( 562.02), SIMDE_FLOAT32_C( -924.44), SIMDE_FLOAT32_C( -802.09), SIMDE_FLOAT32_C( 17.88) }, { SIMDE_FLOAT32_C( 562.00), SIMDE_FLOAT32_C( -925.00), SIMDE_FLOAT32_C( -803.00), SIMDE_FLOAT32_C( 17.00) } }, { { SIMDE_FLOAT32_C( -773.69), SIMDE_FLOAT32_C( -929.41), SIMDE_FLOAT32_C( -376.84), SIMDE_FLOAT32_C( -575.41) }, { SIMDE_FLOAT32_C( -774.00), SIMDE_FLOAT32_C( -930.00), SIMDE_FLOAT32_C( -377.00), SIMDE_FLOAT32_C( -576.00) } }, { { SIMDE_FLOAT32_C( 694.60), SIMDE_FLOAT32_C( 556.86), SIMDE_FLOAT32_C( 755.76), SIMDE_FLOAT32_C( -3.15) }, { SIMDE_FLOAT32_C( 694.00), SIMDE_FLOAT32_C( 556.00), SIMDE_FLOAT32_C( 755.00), SIMDE_FLOAT32_C( -4.00) } }, { { SIMDE_FLOAT32_C( -225.40), SIMDE_FLOAT32_C( 440.47), SIMDE_FLOAT32_C( -328.64), SIMDE_FLOAT32_C( -113.66) }, { SIMDE_FLOAT32_C( -226.00), SIMDE_FLOAT32_C( 440.00), SIMDE_FLOAT32_C( -329.00), SIMDE_FLOAT32_C( -114.00) } }, { { SIMDE_FLOAT32_C( -752.27), SIMDE_FLOAT32_C( -305.67), SIMDE_FLOAT32_C( -135.72), SIMDE_FLOAT32_C( -501.04) }, { SIMDE_FLOAT32_C( -753.00), SIMDE_FLOAT32_C( -306.00), SIMDE_FLOAT32_C( -136.00), SIMDE_FLOAT32_C( -502.00) } }, { { SIMDE_FLOAT32_C( 156.35), SIMDE_FLOAT32_C( 898.85), SIMDE_FLOAT32_C( -988.19), SIMDE_FLOAT32_C( 407.13) }, { SIMDE_FLOAT32_C( 156.00), SIMDE_FLOAT32_C( 898.00), SIMDE_FLOAT32_C( -989.00), SIMDE_FLOAT32_C( 407.00) } }, { { SIMDE_FLOAT32_C( 973.98), SIMDE_FLOAT32_C( 721.39), SIMDE_FLOAT32_C( -631.24), SIMDE_FLOAT32_C( -394.99) }, { SIMDE_FLOAT32_C( 973.00), SIMDE_FLOAT32_C( 721.00), SIMDE_FLOAT32_C( -632.00), SIMDE_FLOAT32_C( -395.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_svml_floor_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_svml_floor_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -495.36), SIMDE_FLOAT64_C( 574.97) }, { SIMDE_FLOAT64_C( -496.00), SIMDE_FLOAT64_C( 574.00) } }, { { SIMDE_FLOAT64_C( -571.90), SIMDE_FLOAT64_C( -4.02) }, { SIMDE_FLOAT64_C( -572.00), SIMDE_FLOAT64_C( -5.00) } }, { { SIMDE_FLOAT64_C( -111.97), SIMDE_FLOAT64_C( -326.91) }, { SIMDE_FLOAT64_C( -112.00), SIMDE_FLOAT64_C( -327.00) } }, { { SIMDE_FLOAT64_C( -366.90), SIMDE_FLOAT64_C( 909.28) }, { SIMDE_FLOAT64_C( -367.00), SIMDE_FLOAT64_C( 909.00) } }, { { SIMDE_FLOAT64_C( -637.61), SIMDE_FLOAT64_C( 377.44) }, { SIMDE_FLOAT64_C( -638.00), SIMDE_FLOAT64_C( 377.00) } }, { { SIMDE_FLOAT64_C( 358.88), SIMDE_FLOAT64_C( 783.39) }, { SIMDE_FLOAT64_C( 358.00), SIMDE_FLOAT64_C( 783.00) } }, { { SIMDE_FLOAT64_C( 137.00), SIMDE_FLOAT64_C( -315.38) }, { SIMDE_FLOAT64_C( 137.00), SIMDE_FLOAT64_C( -316.00) } }, { { SIMDE_FLOAT64_C( 20.73), SIMDE_FLOAT64_C( -927.12) }, { SIMDE_FLOAT64_C( 20.00), SIMDE_FLOAT64_C( -928.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d r = simde_mm_svml_floor_pd(a); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_svml_floor_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( -100.83), SIMDE_FLOAT32_C( -16.23), SIMDE_FLOAT32_C( 689.00), SIMDE_FLOAT32_C( 627.12), SIMDE_FLOAT32_C( -725.64), SIMDE_FLOAT32_C( -272.67), SIMDE_FLOAT32_C( 477.57), SIMDE_FLOAT32_C( 968.62) }, { SIMDE_FLOAT32_C( -101.00), SIMDE_FLOAT32_C( -17.00), SIMDE_FLOAT32_C( 689.00), SIMDE_FLOAT32_C( 627.00), SIMDE_FLOAT32_C( -726.00), SIMDE_FLOAT32_C( -273.00), SIMDE_FLOAT32_C( 477.00), SIMDE_FLOAT32_C( 968.00) } }, { { SIMDE_FLOAT32_C( 259.55), SIMDE_FLOAT32_C( -892.87), SIMDE_FLOAT32_C( 37.54), SIMDE_FLOAT32_C( -594.84), SIMDE_FLOAT32_C( 992.66), SIMDE_FLOAT32_C( 528.53), SIMDE_FLOAT32_C( -44.54), SIMDE_FLOAT32_C( 305.85) }, { SIMDE_FLOAT32_C( 259.00), SIMDE_FLOAT32_C( -893.00), SIMDE_FLOAT32_C( 37.00), SIMDE_FLOAT32_C( -595.00), SIMDE_FLOAT32_C( 992.00), SIMDE_FLOAT32_C( 528.00), SIMDE_FLOAT32_C( -45.00), SIMDE_FLOAT32_C( 305.00) } }, { { SIMDE_FLOAT32_C( 785.51), SIMDE_FLOAT32_C( -262.72), SIMDE_FLOAT32_C( 566.52), SIMDE_FLOAT32_C( -760.14), SIMDE_FLOAT32_C( 801.95), SIMDE_FLOAT32_C( 597.73), SIMDE_FLOAT32_C( -180.14), SIMDE_FLOAT32_C( 556.25) }, { SIMDE_FLOAT32_C( 785.00), SIMDE_FLOAT32_C( -263.00), SIMDE_FLOAT32_C( 566.00), SIMDE_FLOAT32_C( -761.00), SIMDE_FLOAT32_C( 801.00), SIMDE_FLOAT32_C( 597.00), SIMDE_FLOAT32_C( -181.00), SIMDE_FLOAT32_C( 556.00) } }, { { SIMDE_FLOAT32_C( -337.69), SIMDE_FLOAT32_C( -509.08), SIMDE_FLOAT32_C( 665.71), SIMDE_FLOAT32_C( 342.73), SIMDE_FLOAT32_C( 672.76), SIMDE_FLOAT32_C( -625.02), SIMDE_FLOAT32_C( -13.36), SIMDE_FLOAT32_C( -428.07) }, { SIMDE_FLOAT32_C( -338.00), SIMDE_FLOAT32_C( -510.00), SIMDE_FLOAT32_C( 665.00), SIMDE_FLOAT32_C( 342.00), SIMDE_FLOAT32_C( 672.00), SIMDE_FLOAT32_C( -626.00), SIMDE_FLOAT32_C( -14.00), SIMDE_FLOAT32_C( -429.00) } }, { { SIMDE_FLOAT32_C( 358.75), SIMDE_FLOAT32_C( -324.36), SIMDE_FLOAT32_C( -800.95), SIMDE_FLOAT32_C( 633.11), SIMDE_FLOAT32_C( 402.96), SIMDE_FLOAT32_C( 676.62), SIMDE_FLOAT32_C( 601.73), SIMDE_FLOAT32_C( -337.48) }, { SIMDE_FLOAT32_C( 358.00), SIMDE_FLOAT32_C( -325.00), SIMDE_FLOAT32_C( -801.00), SIMDE_FLOAT32_C( 633.00), SIMDE_FLOAT32_C( 402.00), SIMDE_FLOAT32_C( 676.00), SIMDE_FLOAT32_C( 601.00), SIMDE_FLOAT32_C( -338.00) } }, { { SIMDE_FLOAT32_C( 783.75), SIMDE_FLOAT32_C( -360.73), SIMDE_FLOAT32_C( 67.67), SIMDE_FLOAT32_C( 776.41), SIMDE_FLOAT32_C( -832.20), SIMDE_FLOAT32_C( -976.87), SIMDE_FLOAT32_C( 82.26), SIMDE_FLOAT32_C( 953.31) }, { SIMDE_FLOAT32_C( 783.00), SIMDE_FLOAT32_C( -361.00), SIMDE_FLOAT32_C( 67.00), SIMDE_FLOAT32_C( 776.00), SIMDE_FLOAT32_C( -833.00), SIMDE_FLOAT32_C( -977.00), SIMDE_FLOAT32_C( 82.00), SIMDE_FLOAT32_C( 953.00) } }, { { SIMDE_FLOAT32_C( -239.59), SIMDE_FLOAT32_C( -351.22), SIMDE_FLOAT32_C( -806.83), SIMDE_FLOAT32_C( -437.64), SIMDE_FLOAT32_C( -753.50), SIMDE_FLOAT32_C( 13.03), SIMDE_FLOAT32_C( -881.39), SIMDE_FLOAT32_C( -91.19) }, { SIMDE_FLOAT32_C( -240.00), SIMDE_FLOAT32_C( -352.00), SIMDE_FLOAT32_C( -807.00), SIMDE_FLOAT32_C( -438.00), SIMDE_FLOAT32_C( -754.00), SIMDE_FLOAT32_C( 13.00), SIMDE_FLOAT32_C( -882.00), SIMDE_FLOAT32_C( -92.00) } }, { { SIMDE_FLOAT32_C( 503.95), SIMDE_FLOAT32_C( 784.32), SIMDE_FLOAT32_C( -748.46), SIMDE_FLOAT32_C( 176.71), SIMDE_FLOAT32_C( -840.70), SIMDE_FLOAT32_C( 238.18), SIMDE_FLOAT32_C( 748.64), SIMDE_FLOAT32_C( 518.06) }, { SIMDE_FLOAT32_C( 503.00), SIMDE_FLOAT32_C( 784.00), SIMDE_FLOAT32_C( -749.00), SIMDE_FLOAT32_C( 176.00), SIMDE_FLOAT32_C( -841.00), SIMDE_FLOAT32_C( 238.00), SIMDE_FLOAT32_C( 748.00), SIMDE_FLOAT32_C( 518.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_svml_floor_ps(a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_svml_floor_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( -780.47), SIMDE_FLOAT64_C( -616.82), SIMDE_FLOAT64_C( -962.48), SIMDE_FLOAT64_C( -74.66) }, { SIMDE_FLOAT64_C( -781.00), SIMDE_FLOAT64_C( -617.00), SIMDE_FLOAT64_C( -963.00), SIMDE_FLOAT64_C( -75.00) } }, { { SIMDE_FLOAT64_C( -359.82), SIMDE_FLOAT64_C( -704.98), SIMDE_FLOAT64_C( 11.20), SIMDE_FLOAT64_C( 223.91) }, { SIMDE_FLOAT64_C( -360.00), SIMDE_FLOAT64_C( -705.00), SIMDE_FLOAT64_C( 11.00), SIMDE_FLOAT64_C( 223.00) } }, { { SIMDE_FLOAT64_C( 173.90), SIMDE_FLOAT64_C( 506.89), SIMDE_FLOAT64_C( 153.15), SIMDE_FLOAT64_C( -180.08) }, { SIMDE_FLOAT64_C( 173.00), SIMDE_FLOAT64_C( 506.00), SIMDE_FLOAT64_C( 153.00), SIMDE_FLOAT64_C( -181.00) } }, { { SIMDE_FLOAT64_C( -673.54), SIMDE_FLOAT64_C( 252.79), SIMDE_FLOAT64_C( 95.13), SIMDE_FLOAT64_C( -639.41) }, { SIMDE_FLOAT64_C( -674.00), SIMDE_FLOAT64_C( 252.00), SIMDE_FLOAT64_C( 95.00), SIMDE_FLOAT64_C( -640.00) } }, { { SIMDE_FLOAT64_C( -419.46), SIMDE_FLOAT64_C( 418.21), SIMDE_FLOAT64_C( -778.55), SIMDE_FLOAT64_C( -706.38) }, { SIMDE_FLOAT64_C( -420.00), SIMDE_FLOAT64_C( 418.00), SIMDE_FLOAT64_C( -779.00), SIMDE_FLOAT64_C( -707.00) } }, { { SIMDE_FLOAT64_C( -178.87), SIMDE_FLOAT64_C( -923.30), SIMDE_FLOAT64_C( -302.46), SIMDE_FLOAT64_C( -406.02) }, { SIMDE_FLOAT64_C( -179.00), SIMDE_FLOAT64_C( -924.00), SIMDE_FLOAT64_C( -303.00), SIMDE_FLOAT64_C( -407.00) } }, { { SIMDE_FLOAT64_C( 447.97), SIMDE_FLOAT64_C( 431.46), SIMDE_FLOAT64_C( -217.97), SIMDE_FLOAT64_C( -97.70) }, { SIMDE_FLOAT64_C( 447.00), SIMDE_FLOAT64_C( 431.00), SIMDE_FLOAT64_C( -218.00), SIMDE_FLOAT64_C( -98.00) } }, { { SIMDE_FLOAT64_C( 148.46), SIMDE_FLOAT64_C( 945.32), SIMDE_FLOAT64_C( -663.02), SIMDE_FLOAT64_C( 367.98) }, { SIMDE_FLOAT64_C( 148.00), SIMDE_FLOAT64_C( 945.00), SIMDE_FLOAT64_C( -664.00), SIMDE_FLOAT64_C( 367.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d r = simde_mm256_svml_floor_pd(a); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_floor_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 739.06), SIMDE_FLOAT32_C( 515.75), SIMDE_FLOAT32_C( -321.43), SIMDE_FLOAT32_C( -796.82), SIMDE_FLOAT32_C( -649.68), SIMDE_FLOAT32_C( -774.53), SIMDE_FLOAT32_C( 961.31), SIMDE_FLOAT32_C( 113.28), SIMDE_FLOAT32_C( -29.07), SIMDE_FLOAT32_C( -213.74), SIMDE_FLOAT32_C( -272.53), SIMDE_FLOAT32_C( -357.78), SIMDE_FLOAT32_C( 211.62), SIMDE_FLOAT32_C( 164.32), SIMDE_FLOAT32_C( -909.49), SIMDE_FLOAT32_C( 809.56) }, { SIMDE_FLOAT32_C( 739.00), SIMDE_FLOAT32_C( 515.00), SIMDE_FLOAT32_C( -322.00), SIMDE_FLOAT32_C( -797.00), SIMDE_FLOAT32_C( -650.00), SIMDE_FLOAT32_C( -775.00), SIMDE_FLOAT32_C( 961.00), SIMDE_FLOAT32_C( 113.00), SIMDE_FLOAT32_C( -30.00), SIMDE_FLOAT32_C( -214.00), SIMDE_FLOAT32_C( -273.00), SIMDE_FLOAT32_C( -358.00), SIMDE_FLOAT32_C( 211.00), SIMDE_FLOAT32_C( 164.00), SIMDE_FLOAT32_C( -910.00), SIMDE_FLOAT32_C( 809.00) } }, { { SIMDE_FLOAT32_C( 405.65), SIMDE_FLOAT32_C( -257.98), SIMDE_FLOAT32_C( -364.12), SIMDE_FLOAT32_C( -228.18), SIMDE_FLOAT32_C( 200.69), SIMDE_FLOAT32_C( 614.44), SIMDE_FLOAT32_C( -198.53), SIMDE_FLOAT32_C( -756.05), SIMDE_FLOAT32_C( -833.98), SIMDE_FLOAT32_C( 480.36), SIMDE_FLOAT32_C( 574.27), SIMDE_FLOAT32_C( -408.80), SIMDE_FLOAT32_C( 768.69), SIMDE_FLOAT32_C( 342.19), SIMDE_FLOAT32_C( -17.03), SIMDE_FLOAT32_C( 507.75) }, { SIMDE_FLOAT32_C( 405.00), SIMDE_FLOAT32_C( -258.00), SIMDE_FLOAT32_C( -365.00), SIMDE_FLOAT32_C( -229.00), SIMDE_FLOAT32_C( 200.00), SIMDE_FLOAT32_C( 614.00), SIMDE_FLOAT32_C( -199.00), SIMDE_FLOAT32_C( -757.00), SIMDE_FLOAT32_C( -834.00), SIMDE_FLOAT32_C( 480.00), SIMDE_FLOAT32_C( 574.00), SIMDE_FLOAT32_C( -409.00), SIMDE_FLOAT32_C( 768.00), SIMDE_FLOAT32_C( 342.00), SIMDE_FLOAT32_C( -18.00), SIMDE_FLOAT32_C( 507.00) } }, { { SIMDE_FLOAT32_C( -142.06), SIMDE_FLOAT32_C( 661.53), SIMDE_FLOAT32_C( 710.93), SIMDE_FLOAT32_C( 208.26), SIMDE_FLOAT32_C( 887.01), SIMDE_FLOAT32_C( 672.24), SIMDE_FLOAT32_C( -678.46), SIMDE_FLOAT32_C( -142.06), SIMDE_FLOAT32_C( -541.50), SIMDE_FLOAT32_C( 49.01), SIMDE_FLOAT32_C( 500.16), SIMDE_FLOAT32_C( 670.12), SIMDE_FLOAT32_C( -786.67), SIMDE_FLOAT32_C( 590.66), SIMDE_FLOAT32_C( 479.68), SIMDE_FLOAT32_C( 618.98) }, { SIMDE_FLOAT32_C( -143.00), SIMDE_FLOAT32_C( 661.00), SIMDE_FLOAT32_C( 710.00), SIMDE_FLOAT32_C( 208.00), SIMDE_FLOAT32_C( 887.00), SIMDE_FLOAT32_C( 672.00), SIMDE_FLOAT32_C( -679.00), SIMDE_FLOAT32_C( -143.00), SIMDE_FLOAT32_C( -542.00), SIMDE_FLOAT32_C( 49.00), SIMDE_FLOAT32_C( 500.00), SIMDE_FLOAT32_C( 670.00), SIMDE_FLOAT32_C( -787.00), SIMDE_FLOAT32_C( 590.00), SIMDE_FLOAT32_C( 479.00), SIMDE_FLOAT32_C( 618.00) } }, { { SIMDE_FLOAT32_C( -667.32), SIMDE_FLOAT32_C( -884.44), SIMDE_FLOAT32_C( -609.20), SIMDE_FLOAT32_C( 533.37), SIMDE_FLOAT32_C( 730.00), SIMDE_FLOAT32_C( 192.28), SIMDE_FLOAT32_C( 777.32), SIMDE_FLOAT32_C( 896.02), SIMDE_FLOAT32_C( -327.36), SIMDE_FLOAT32_C( 351.59), SIMDE_FLOAT32_C( -512.78), SIMDE_FLOAT32_C( -558.68), SIMDE_FLOAT32_C( -306.22), SIMDE_FLOAT32_C( 470.19), SIMDE_FLOAT32_C( 949.07), SIMDE_FLOAT32_C( 551.72) }, { SIMDE_FLOAT32_C( -668.00), SIMDE_FLOAT32_C( -885.00), SIMDE_FLOAT32_C( -610.00), SIMDE_FLOAT32_C( 533.00), SIMDE_FLOAT32_C( 730.00), SIMDE_FLOAT32_C( 192.00), SIMDE_FLOAT32_C( 777.00), SIMDE_FLOAT32_C( 896.00), SIMDE_FLOAT32_C( -328.00), SIMDE_FLOAT32_C( 351.00), SIMDE_FLOAT32_C( -513.00), SIMDE_FLOAT32_C( -559.00), SIMDE_FLOAT32_C( -307.00), SIMDE_FLOAT32_C( 470.00), SIMDE_FLOAT32_C( 949.00), SIMDE_FLOAT32_C( 551.00) } }, { { SIMDE_FLOAT32_C( 131.72), SIMDE_FLOAT32_C( 660.01), SIMDE_FLOAT32_C( -240.02), SIMDE_FLOAT32_C( 18.73), SIMDE_FLOAT32_C( 332.25), SIMDE_FLOAT32_C( 81.52), SIMDE_FLOAT32_C( 876.67), SIMDE_FLOAT32_C( 790.75), SIMDE_FLOAT32_C( -869.47), SIMDE_FLOAT32_C( 376.83), SIMDE_FLOAT32_C( 460.87), SIMDE_FLOAT32_C( -656.14), SIMDE_FLOAT32_C( -32.51), SIMDE_FLOAT32_C( -59.45), SIMDE_FLOAT32_C( 962.84), SIMDE_FLOAT32_C( 300.17) }, { SIMDE_FLOAT32_C( 131.00), SIMDE_FLOAT32_C( 660.00), SIMDE_FLOAT32_C( -241.00), SIMDE_FLOAT32_C( 18.00), SIMDE_FLOAT32_C( 332.00), SIMDE_FLOAT32_C( 81.00), SIMDE_FLOAT32_C( 876.00), SIMDE_FLOAT32_C( 790.00), SIMDE_FLOAT32_C( -870.00), SIMDE_FLOAT32_C( 376.00), SIMDE_FLOAT32_C( 460.00), SIMDE_FLOAT32_C( -657.00), SIMDE_FLOAT32_C( -33.00), SIMDE_FLOAT32_C( -60.00), SIMDE_FLOAT32_C( 962.00), SIMDE_FLOAT32_C( 300.00) } }, { { SIMDE_FLOAT32_C( 56.12), SIMDE_FLOAT32_C( -646.35), SIMDE_FLOAT32_C( -166.46), SIMDE_FLOAT32_C( -213.88), SIMDE_FLOAT32_C( 545.92), SIMDE_FLOAT32_C( -389.14), SIMDE_FLOAT32_C( -317.86), SIMDE_FLOAT32_C( -781.44), SIMDE_FLOAT32_C( 962.45), SIMDE_FLOAT32_C( 169.37), SIMDE_FLOAT32_C( -340.12), SIMDE_FLOAT32_C( -343.77), SIMDE_FLOAT32_C( -360.44), SIMDE_FLOAT32_C( -391.05), SIMDE_FLOAT32_C( -792.05), SIMDE_FLOAT32_C( 771.28) }, { SIMDE_FLOAT32_C( 56.00), SIMDE_FLOAT32_C( -647.00), SIMDE_FLOAT32_C( -167.00), SIMDE_FLOAT32_C( -214.00), SIMDE_FLOAT32_C( 545.00), SIMDE_FLOAT32_C( -390.00), SIMDE_FLOAT32_C( -318.00), SIMDE_FLOAT32_C( -782.00), SIMDE_FLOAT32_C( 962.00), SIMDE_FLOAT32_C( 169.00), SIMDE_FLOAT32_C( -341.00), SIMDE_FLOAT32_C( -344.00), SIMDE_FLOAT32_C( -361.00), SIMDE_FLOAT32_C( -392.00), SIMDE_FLOAT32_C( -793.00), SIMDE_FLOAT32_C( 771.00) } }, { { SIMDE_FLOAT32_C( -731.04), SIMDE_FLOAT32_C( -32.07), SIMDE_FLOAT32_C( -209.99), SIMDE_FLOAT32_C( 601.21), SIMDE_FLOAT32_C( -950.55), SIMDE_FLOAT32_C( -333.32), SIMDE_FLOAT32_C( 391.96), SIMDE_FLOAT32_C( -820.02), SIMDE_FLOAT32_C( -956.49), SIMDE_FLOAT32_C( -147.17), SIMDE_FLOAT32_C( -476.16), SIMDE_FLOAT32_C( 11.00), SIMDE_FLOAT32_C( 793.38), SIMDE_FLOAT32_C( -513.32), SIMDE_FLOAT32_C( -688.82), SIMDE_FLOAT32_C( -150.50) }, { SIMDE_FLOAT32_C( -732.00), SIMDE_FLOAT32_C( -33.00), SIMDE_FLOAT32_C( -210.00), SIMDE_FLOAT32_C( 601.00), SIMDE_FLOAT32_C( -951.00), SIMDE_FLOAT32_C( -334.00), SIMDE_FLOAT32_C( 391.00), SIMDE_FLOAT32_C( -821.00), SIMDE_FLOAT32_C( -957.00), SIMDE_FLOAT32_C( -148.00), SIMDE_FLOAT32_C( -477.00), SIMDE_FLOAT32_C( 11.00), SIMDE_FLOAT32_C( 793.00), SIMDE_FLOAT32_C( -514.00), SIMDE_FLOAT32_C( -689.00), SIMDE_FLOAT32_C( -151.00) } }, { { SIMDE_FLOAT32_C( -159.67), SIMDE_FLOAT32_C( 144.72), SIMDE_FLOAT32_C( 635.62), SIMDE_FLOAT32_C( -613.75), SIMDE_FLOAT32_C( 755.58), SIMDE_FLOAT32_C( -682.24), SIMDE_FLOAT32_C( -395.19), SIMDE_FLOAT32_C( 718.03), SIMDE_FLOAT32_C( 487.12), SIMDE_FLOAT32_C( 264.69), SIMDE_FLOAT32_C( -625.74), SIMDE_FLOAT32_C( -873.32), SIMDE_FLOAT32_C( 873.65), SIMDE_FLOAT32_C( -417.79), SIMDE_FLOAT32_C( 897.96), SIMDE_FLOAT32_C( -857.39) }, { SIMDE_FLOAT32_C( -160.00), SIMDE_FLOAT32_C( 144.00), SIMDE_FLOAT32_C( 635.00), SIMDE_FLOAT32_C( -614.00), SIMDE_FLOAT32_C( 755.00), SIMDE_FLOAT32_C( -683.00), SIMDE_FLOAT32_C( -396.00), SIMDE_FLOAT32_C( 718.00), SIMDE_FLOAT32_C( 487.00), SIMDE_FLOAT32_C( 264.00), SIMDE_FLOAT32_C( -626.00), SIMDE_FLOAT32_C( -874.00), SIMDE_FLOAT32_C( 873.00), SIMDE_FLOAT32_C( -418.00), SIMDE_FLOAT32_C( 897.00), SIMDE_FLOAT32_C( -858.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_floor_ps(a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_floor_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[16]; const simde__mmask8 k; const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 838.31), SIMDE_FLOAT32_C( 390.56), SIMDE_FLOAT32_C( -564.73), SIMDE_FLOAT32_C( 308.39), SIMDE_FLOAT32_C( 260.44), SIMDE_FLOAT32_C( -533.14), SIMDE_FLOAT32_C( -978.85), SIMDE_FLOAT32_C( -130.38), SIMDE_FLOAT32_C( 176.76), SIMDE_FLOAT32_C( -227.10), SIMDE_FLOAT32_C( -128.58), SIMDE_FLOAT32_C( 463.85), SIMDE_FLOAT32_C( -349.81), SIMDE_FLOAT32_C( 938.22), SIMDE_FLOAT32_C( -414.35), SIMDE_FLOAT32_C( 715.39) }, UINT8_C( 56), { SIMDE_FLOAT32_C( 324.66), SIMDE_FLOAT32_C( -904.07), SIMDE_FLOAT32_C( 834.59), SIMDE_FLOAT32_C( -638.12), SIMDE_FLOAT32_C( -994.43), SIMDE_FLOAT32_C( -322.02), SIMDE_FLOAT32_C( 105.22), SIMDE_FLOAT32_C( -770.91), SIMDE_FLOAT32_C( 604.26), SIMDE_FLOAT32_C( -988.26), SIMDE_FLOAT32_C( -580.41), SIMDE_FLOAT32_C( 673.34), SIMDE_FLOAT32_C( 425.23), SIMDE_FLOAT32_C( 713.78), SIMDE_FLOAT32_C( 511.64), SIMDE_FLOAT32_C( -184.21) }, { SIMDE_FLOAT32_C( 838.31), SIMDE_FLOAT32_C( 390.56), SIMDE_FLOAT32_C( -564.73), SIMDE_FLOAT32_C( -639.00), SIMDE_FLOAT32_C( -995.00), SIMDE_FLOAT32_C( -323.00), SIMDE_FLOAT32_C( -978.85), SIMDE_FLOAT32_C( -130.38), SIMDE_FLOAT32_C( 176.76), SIMDE_FLOAT32_C( -227.10), SIMDE_FLOAT32_C( -128.58), SIMDE_FLOAT32_C( 463.85), SIMDE_FLOAT32_C( -349.81), SIMDE_FLOAT32_C( 938.22), SIMDE_FLOAT32_C( -414.35), SIMDE_FLOAT32_C( 715.39) } }, { { SIMDE_FLOAT32_C( -850.94), SIMDE_FLOAT32_C( -179.97), SIMDE_FLOAT32_C( -923.77), SIMDE_FLOAT32_C( -384.08), SIMDE_FLOAT32_C( -158.82), SIMDE_FLOAT32_C( -54.15), SIMDE_FLOAT32_C( 792.68), SIMDE_FLOAT32_C( 614.08), SIMDE_FLOAT32_C( 817.27), SIMDE_FLOAT32_C( 256.54), SIMDE_FLOAT32_C( -735.74), SIMDE_FLOAT32_C( 755.49), SIMDE_FLOAT32_C( 842.19), SIMDE_FLOAT32_C( 979.66), SIMDE_FLOAT32_C( 610.86), SIMDE_FLOAT32_C( 166.85) }, UINT8_C(134), { SIMDE_FLOAT32_C( 445.45), SIMDE_FLOAT32_C( 528.73), SIMDE_FLOAT32_C( -918.84), SIMDE_FLOAT32_C( -876.56), SIMDE_FLOAT32_C( -366.04), SIMDE_FLOAT32_C( -689.75), SIMDE_FLOAT32_C( 727.70), SIMDE_FLOAT32_C( -354.31), SIMDE_FLOAT32_C( -270.16), SIMDE_FLOAT32_C( 401.04), SIMDE_FLOAT32_C( -929.08), SIMDE_FLOAT32_C( -556.38), SIMDE_FLOAT32_C( -87.32), SIMDE_FLOAT32_C( -113.29), SIMDE_FLOAT32_C( -407.33), SIMDE_FLOAT32_C( 732.72) }, { SIMDE_FLOAT32_C( -850.94), SIMDE_FLOAT32_C( 528.00), SIMDE_FLOAT32_C( -919.00), SIMDE_FLOAT32_C( -384.08), SIMDE_FLOAT32_C( -158.82), SIMDE_FLOAT32_C( -54.15), SIMDE_FLOAT32_C( 792.68), SIMDE_FLOAT32_C( -355.00), SIMDE_FLOAT32_C( 817.27), SIMDE_FLOAT32_C( 256.54), SIMDE_FLOAT32_C( -735.74), SIMDE_FLOAT32_C( 755.49), SIMDE_FLOAT32_C( 842.19), SIMDE_FLOAT32_C( 979.66), SIMDE_FLOAT32_C( 610.86), SIMDE_FLOAT32_C( 166.85) } }, { { SIMDE_FLOAT32_C( -37.05), SIMDE_FLOAT32_C( 208.59), SIMDE_FLOAT32_C( -426.10), SIMDE_FLOAT32_C( 908.80), SIMDE_FLOAT32_C( 1.27), SIMDE_FLOAT32_C( -812.02), SIMDE_FLOAT32_C( 726.06), SIMDE_FLOAT32_C( -742.19), SIMDE_FLOAT32_C( -547.76), SIMDE_FLOAT32_C( 481.55), SIMDE_FLOAT32_C( -900.00), SIMDE_FLOAT32_C( -568.10), SIMDE_FLOAT32_C( 92.41), SIMDE_FLOAT32_C( 266.85), SIMDE_FLOAT32_C( -492.51), SIMDE_FLOAT32_C( -462.13) }, UINT8_C(130), { SIMDE_FLOAT32_C( -411.36), SIMDE_FLOAT32_C( -338.69), SIMDE_FLOAT32_C( 429.54), SIMDE_FLOAT32_C( -101.11), SIMDE_FLOAT32_C( -610.99), SIMDE_FLOAT32_C( -924.77), SIMDE_FLOAT32_C( 628.73), SIMDE_FLOAT32_C( 790.05), SIMDE_FLOAT32_C( -853.85), SIMDE_FLOAT32_C( -927.65), SIMDE_FLOAT32_C( -297.26), SIMDE_FLOAT32_C( 32.86), SIMDE_FLOAT32_C( -334.98), SIMDE_FLOAT32_C( -564.55), SIMDE_FLOAT32_C( 995.81), SIMDE_FLOAT32_C( 873.62) }, { SIMDE_FLOAT32_C( -37.05), SIMDE_FLOAT32_C( -339.00), SIMDE_FLOAT32_C( -426.10), SIMDE_FLOAT32_C( 908.80), SIMDE_FLOAT32_C( 1.27), SIMDE_FLOAT32_C( -812.02), SIMDE_FLOAT32_C( 726.06), SIMDE_FLOAT32_C( 790.00), SIMDE_FLOAT32_C( -547.76), SIMDE_FLOAT32_C( 481.55), SIMDE_FLOAT32_C( -900.00), SIMDE_FLOAT32_C( -568.10), SIMDE_FLOAT32_C( 92.41), SIMDE_FLOAT32_C( 266.85), SIMDE_FLOAT32_C( -492.51), SIMDE_FLOAT32_C( -462.13) } }, { { SIMDE_FLOAT32_C( 9.35), SIMDE_FLOAT32_C( 904.61), SIMDE_FLOAT32_C( -125.11), SIMDE_FLOAT32_C( 197.33), SIMDE_FLOAT32_C( 630.67), SIMDE_FLOAT32_C( 132.70), SIMDE_FLOAT32_C( 649.56), SIMDE_FLOAT32_C( 112.22), SIMDE_FLOAT32_C( 232.70), SIMDE_FLOAT32_C( -918.54), SIMDE_FLOAT32_C( -795.36), SIMDE_FLOAT32_C( -500.45), SIMDE_FLOAT32_C( -411.05), SIMDE_FLOAT32_C( -257.49), SIMDE_FLOAT32_C( 295.13), SIMDE_FLOAT32_C( 177.59) }, UINT8_C(202), { SIMDE_FLOAT32_C( -275.34), SIMDE_FLOAT32_C( -923.51), SIMDE_FLOAT32_C( 792.83), SIMDE_FLOAT32_C( -200.11), SIMDE_FLOAT32_C( 705.22), SIMDE_FLOAT32_C( 582.88), SIMDE_FLOAT32_C( -53.96), SIMDE_FLOAT32_C( 777.57), SIMDE_FLOAT32_C( -714.38), SIMDE_FLOAT32_C( 978.91), SIMDE_FLOAT32_C( -557.41), SIMDE_FLOAT32_C( -278.93), SIMDE_FLOAT32_C( 974.71), SIMDE_FLOAT32_C( -683.79), SIMDE_FLOAT32_C( 730.42), SIMDE_FLOAT32_C( 879.32) }, { SIMDE_FLOAT32_C( 9.35), SIMDE_FLOAT32_C( -924.00), SIMDE_FLOAT32_C( -125.11), SIMDE_FLOAT32_C( -201.00), SIMDE_FLOAT32_C( 630.67), SIMDE_FLOAT32_C( 132.70), SIMDE_FLOAT32_C( -54.00), SIMDE_FLOAT32_C( 777.00), SIMDE_FLOAT32_C( 232.70), SIMDE_FLOAT32_C( -918.54), SIMDE_FLOAT32_C( -795.36), SIMDE_FLOAT32_C( -500.45), SIMDE_FLOAT32_C( -411.05), SIMDE_FLOAT32_C( -257.49), SIMDE_FLOAT32_C( 295.13), SIMDE_FLOAT32_C( 177.59) } }, { { SIMDE_FLOAT32_C( 191.09), SIMDE_FLOAT32_C( -72.26), SIMDE_FLOAT32_C( 509.99), SIMDE_FLOAT32_C( -676.21), SIMDE_FLOAT32_C( -422.69), SIMDE_FLOAT32_C( -377.79), SIMDE_FLOAT32_C( 556.49), SIMDE_FLOAT32_C( -341.23), SIMDE_FLOAT32_C( -173.15), SIMDE_FLOAT32_C( -943.96), SIMDE_FLOAT32_C( 247.72), SIMDE_FLOAT32_C( 569.36), SIMDE_FLOAT32_C( 351.17), SIMDE_FLOAT32_C( -574.69), SIMDE_FLOAT32_C( -26.83), SIMDE_FLOAT32_C( -924.17) }, UINT8_C(134), { SIMDE_FLOAT32_C( -234.00), SIMDE_FLOAT32_C( -124.28), SIMDE_FLOAT32_C( -792.99), SIMDE_FLOAT32_C( -651.12), SIMDE_FLOAT32_C( 821.76), SIMDE_FLOAT32_C( 984.58), SIMDE_FLOAT32_C( -365.50), SIMDE_FLOAT32_C( 800.67), SIMDE_FLOAT32_C( -572.83), SIMDE_FLOAT32_C( 355.57), SIMDE_FLOAT32_C( 775.38), SIMDE_FLOAT32_C( -256.62), SIMDE_FLOAT32_C( 85.98), SIMDE_FLOAT32_C( 654.71), SIMDE_FLOAT32_C( 934.47), SIMDE_FLOAT32_C( -986.27) }, { SIMDE_FLOAT32_C( 191.09), SIMDE_FLOAT32_C( -125.00), SIMDE_FLOAT32_C( -793.00), SIMDE_FLOAT32_C( -676.21), SIMDE_FLOAT32_C( -422.69), SIMDE_FLOAT32_C( -377.79), SIMDE_FLOAT32_C( 556.49), SIMDE_FLOAT32_C( 800.00), SIMDE_FLOAT32_C( -173.15), SIMDE_FLOAT32_C( -943.96), SIMDE_FLOAT32_C( 247.72), SIMDE_FLOAT32_C( 569.36), SIMDE_FLOAT32_C( 351.17), SIMDE_FLOAT32_C( -574.69), SIMDE_FLOAT32_C( -26.83), SIMDE_FLOAT32_C( -924.17) } }, { { SIMDE_FLOAT32_C( 164.70), SIMDE_FLOAT32_C( -741.74), SIMDE_FLOAT32_C( -408.96), SIMDE_FLOAT32_C( 786.91), SIMDE_FLOAT32_C( 814.76), SIMDE_FLOAT32_C( 249.81), SIMDE_FLOAT32_C( -386.24), SIMDE_FLOAT32_C( 870.80), SIMDE_FLOAT32_C( -502.47), SIMDE_FLOAT32_C( -816.88), SIMDE_FLOAT32_C( 221.97), SIMDE_FLOAT32_C( -77.16), SIMDE_FLOAT32_C( 156.29), SIMDE_FLOAT32_C( 297.80), SIMDE_FLOAT32_C( 424.63), SIMDE_FLOAT32_C( 922.29) }, UINT8_C(198), { SIMDE_FLOAT32_C( 631.65), SIMDE_FLOAT32_C( -728.83), SIMDE_FLOAT32_C( 995.29), SIMDE_FLOAT32_C( 616.23), SIMDE_FLOAT32_C( -94.34), SIMDE_FLOAT32_C( 795.96), SIMDE_FLOAT32_C( -956.60), SIMDE_FLOAT32_C( -738.77), SIMDE_FLOAT32_C( 571.34), SIMDE_FLOAT32_C( -213.23), SIMDE_FLOAT32_C( 347.21), SIMDE_FLOAT32_C( 226.05), SIMDE_FLOAT32_C( -278.76), SIMDE_FLOAT32_C( 360.94), SIMDE_FLOAT32_C( -609.25), SIMDE_FLOAT32_C( -20.49) }, { SIMDE_FLOAT32_C( 164.70), SIMDE_FLOAT32_C( -729.00), SIMDE_FLOAT32_C( 995.00), SIMDE_FLOAT32_C( 786.91), SIMDE_FLOAT32_C( 814.76), SIMDE_FLOAT32_C( 249.81), SIMDE_FLOAT32_C( -957.00), SIMDE_FLOAT32_C( -739.00), SIMDE_FLOAT32_C( -502.47), SIMDE_FLOAT32_C( -816.88), SIMDE_FLOAT32_C( 221.97), SIMDE_FLOAT32_C( -77.16), SIMDE_FLOAT32_C( 156.29), SIMDE_FLOAT32_C( 297.80), SIMDE_FLOAT32_C( 424.63), SIMDE_FLOAT32_C( 922.29) } }, { { SIMDE_FLOAT32_C( 951.98), SIMDE_FLOAT32_C( -822.34), SIMDE_FLOAT32_C( -205.73), SIMDE_FLOAT32_C( 201.79), SIMDE_FLOAT32_C( -208.58), SIMDE_FLOAT32_C( -334.93), SIMDE_FLOAT32_C( 699.32), SIMDE_FLOAT32_C( -25.46), SIMDE_FLOAT32_C( 887.04), SIMDE_FLOAT32_C( -377.85), SIMDE_FLOAT32_C( -869.17), SIMDE_FLOAT32_C( 184.84), SIMDE_FLOAT32_C( -953.21), SIMDE_FLOAT32_C( -946.88), SIMDE_FLOAT32_C( 358.36), SIMDE_FLOAT32_C( 678.43) }, UINT8_C(118), { SIMDE_FLOAT32_C( 353.65), SIMDE_FLOAT32_C( 294.66), SIMDE_FLOAT32_C( 229.95), SIMDE_FLOAT32_C( 149.61), SIMDE_FLOAT32_C( 338.06), SIMDE_FLOAT32_C( 491.18), SIMDE_FLOAT32_C( -279.05), SIMDE_FLOAT32_C( -875.17), SIMDE_FLOAT32_C( -161.61), SIMDE_FLOAT32_C( 947.00), SIMDE_FLOAT32_C( -153.92), SIMDE_FLOAT32_C( -800.67), SIMDE_FLOAT32_C( -662.25), SIMDE_FLOAT32_C( 825.58), SIMDE_FLOAT32_C( -848.68), SIMDE_FLOAT32_C( -484.59) }, { SIMDE_FLOAT32_C( 951.98), SIMDE_FLOAT32_C( 294.00), SIMDE_FLOAT32_C( 229.00), SIMDE_FLOAT32_C( 201.79), SIMDE_FLOAT32_C( 338.00), SIMDE_FLOAT32_C( 491.00), SIMDE_FLOAT32_C( -280.00), SIMDE_FLOAT32_C( -25.46), SIMDE_FLOAT32_C( 887.04), SIMDE_FLOAT32_C( -377.85), SIMDE_FLOAT32_C( -869.17), SIMDE_FLOAT32_C( 184.84), SIMDE_FLOAT32_C( -953.21), SIMDE_FLOAT32_C( -946.88), SIMDE_FLOAT32_C( 358.36), SIMDE_FLOAT32_C( 678.43) } }, { { SIMDE_FLOAT32_C( -380.15), SIMDE_FLOAT32_C( 353.11), SIMDE_FLOAT32_C( 306.83), SIMDE_FLOAT32_C( 284.92), SIMDE_FLOAT32_C( 52.42), SIMDE_FLOAT32_C( -718.63), SIMDE_FLOAT32_C( 171.96), SIMDE_FLOAT32_C( 674.58), SIMDE_FLOAT32_C( -587.81), SIMDE_FLOAT32_C( -643.20), SIMDE_FLOAT32_C( 721.36), SIMDE_FLOAT32_C( -534.69), SIMDE_FLOAT32_C( 715.16), SIMDE_FLOAT32_C( 399.80), SIMDE_FLOAT32_C( -210.40), SIMDE_FLOAT32_C( 68.81) }, UINT8_MAX, { SIMDE_FLOAT32_C( -980.45), SIMDE_FLOAT32_C( -781.58), SIMDE_FLOAT32_C( -967.49), SIMDE_FLOAT32_C( 510.73), SIMDE_FLOAT32_C( -60.62), SIMDE_FLOAT32_C( -842.65), SIMDE_FLOAT32_C( -650.88), SIMDE_FLOAT32_C( -113.62), SIMDE_FLOAT32_C( 3.42), SIMDE_FLOAT32_C( -451.55), SIMDE_FLOAT32_C( 224.13), SIMDE_FLOAT32_C( -170.99), SIMDE_FLOAT32_C( -300.23), SIMDE_FLOAT32_C( 739.54), SIMDE_FLOAT32_C( 448.86), SIMDE_FLOAT32_C( -947.12) }, { SIMDE_FLOAT32_C( -981.00), SIMDE_FLOAT32_C( -782.00), SIMDE_FLOAT32_C( -968.00), SIMDE_FLOAT32_C( 510.00), SIMDE_FLOAT32_C( -61.00), SIMDE_FLOAT32_C( -843.00), SIMDE_FLOAT32_C( -651.00), SIMDE_FLOAT32_C( -114.00), SIMDE_FLOAT32_C( -587.81), SIMDE_FLOAT32_C( -643.20), SIMDE_FLOAT32_C( 721.36), SIMDE_FLOAT32_C( -534.69), SIMDE_FLOAT32_C( 715.16), SIMDE_FLOAT32_C( 399.80), SIMDE_FLOAT32_C( -210.40), SIMDE_FLOAT32_C( 68.81) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_mask_floor_ps(src, test_vec[i].k, a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_floor_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( -794.32), SIMDE_FLOAT64_C( -48.70), SIMDE_FLOAT64_C( 277.79), SIMDE_FLOAT64_C( -475.80), SIMDE_FLOAT64_C( -876.95), SIMDE_FLOAT64_C( -924.41), SIMDE_FLOAT64_C( 255.35), SIMDE_FLOAT64_C( -243.50) }, { SIMDE_FLOAT64_C( -795.00), SIMDE_FLOAT64_C( -49.00), SIMDE_FLOAT64_C( 277.00), SIMDE_FLOAT64_C( -476.00), SIMDE_FLOAT64_C( -877.00), SIMDE_FLOAT64_C( -925.00), SIMDE_FLOAT64_C( 255.00), SIMDE_FLOAT64_C( -244.00) } }, { { SIMDE_FLOAT64_C( -620.91), SIMDE_FLOAT64_C( -173.96), SIMDE_FLOAT64_C( 275.90), SIMDE_FLOAT64_C( -717.33), SIMDE_FLOAT64_C( -402.37), SIMDE_FLOAT64_C( -882.40), SIMDE_FLOAT64_C( 45.04), SIMDE_FLOAT64_C( -141.04) }, { SIMDE_FLOAT64_C( -621.00), SIMDE_FLOAT64_C( -174.00), SIMDE_FLOAT64_C( 275.00), SIMDE_FLOAT64_C( -718.00), SIMDE_FLOAT64_C( -403.00), SIMDE_FLOAT64_C( -883.00), SIMDE_FLOAT64_C( 45.00), SIMDE_FLOAT64_C( -142.00) } }, { { SIMDE_FLOAT64_C( -548.52), SIMDE_FLOAT64_C( -215.27), SIMDE_FLOAT64_C( 977.63), SIMDE_FLOAT64_C( 913.41), SIMDE_FLOAT64_C( -371.07), SIMDE_FLOAT64_C( 460.81), SIMDE_FLOAT64_C( 547.36), SIMDE_FLOAT64_C( -452.52) }, { SIMDE_FLOAT64_C( -549.00), SIMDE_FLOAT64_C( -216.00), SIMDE_FLOAT64_C( 977.00), SIMDE_FLOAT64_C( 913.00), SIMDE_FLOAT64_C( -372.00), SIMDE_FLOAT64_C( 460.00), SIMDE_FLOAT64_C( 547.00), SIMDE_FLOAT64_C( -453.00) } }, { { SIMDE_FLOAT64_C( -61.27), SIMDE_FLOAT64_C( -606.40), SIMDE_FLOAT64_C( 310.76), SIMDE_FLOAT64_C( 420.51), SIMDE_FLOAT64_C( -353.71), SIMDE_FLOAT64_C( -327.75), SIMDE_FLOAT64_C( 663.33), SIMDE_FLOAT64_C( -148.03) }, { SIMDE_FLOAT64_C( -62.00), SIMDE_FLOAT64_C( -607.00), SIMDE_FLOAT64_C( 310.00), SIMDE_FLOAT64_C( 420.00), SIMDE_FLOAT64_C( -354.00), SIMDE_FLOAT64_C( -328.00), SIMDE_FLOAT64_C( 663.00), SIMDE_FLOAT64_C( -149.00) } }, { { SIMDE_FLOAT64_C( 623.55), SIMDE_FLOAT64_C( -58.88), SIMDE_FLOAT64_C( 376.17), SIMDE_FLOAT64_C( 746.60), SIMDE_FLOAT64_C( 16.71), SIMDE_FLOAT64_C( -368.49), SIMDE_FLOAT64_C( -496.90), SIMDE_FLOAT64_C( 395.80) }, { SIMDE_FLOAT64_C( 623.00), SIMDE_FLOAT64_C( -59.00), SIMDE_FLOAT64_C( 376.00), SIMDE_FLOAT64_C( 746.00), SIMDE_FLOAT64_C( 16.00), SIMDE_FLOAT64_C( -369.00), SIMDE_FLOAT64_C( -497.00), SIMDE_FLOAT64_C( 395.00) } }, { { SIMDE_FLOAT64_C( 457.55), SIMDE_FLOAT64_C( 779.00), SIMDE_FLOAT64_C( 678.47), SIMDE_FLOAT64_C( -944.81), SIMDE_FLOAT64_C( 896.60), SIMDE_FLOAT64_C( -276.49), SIMDE_FLOAT64_C( -85.86), SIMDE_FLOAT64_C( -651.92) }, { SIMDE_FLOAT64_C( 457.00), SIMDE_FLOAT64_C( 779.00), SIMDE_FLOAT64_C( 678.00), SIMDE_FLOAT64_C( -945.00), SIMDE_FLOAT64_C( 896.00), SIMDE_FLOAT64_C( -277.00), SIMDE_FLOAT64_C( -86.00), SIMDE_FLOAT64_C( -652.00) } }, { { SIMDE_FLOAT64_C( 508.25), SIMDE_FLOAT64_C( -108.22), SIMDE_FLOAT64_C( -738.51), SIMDE_FLOAT64_C( -862.82), SIMDE_FLOAT64_C( -647.41), SIMDE_FLOAT64_C( 808.85), SIMDE_FLOAT64_C( -315.34), SIMDE_FLOAT64_C( 291.32) }, { SIMDE_FLOAT64_C( 508.00), SIMDE_FLOAT64_C( -109.00), SIMDE_FLOAT64_C( -739.00), SIMDE_FLOAT64_C( -863.00), SIMDE_FLOAT64_C( -648.00), SIMDE_FLOAT64_C( 808.00), SIMDE_FLOAT64_C( -316.00), SIMDE_FLOAT64_C( 291.00) } }, { { SIMDE_FLOAT64_C( -797.54), SIMDE_FLOAT64_C( 995.42), SIMDE_FLOAT64_C( -288.16), SIMDE_FLOAT64_C( -151.25), SIMDE_FLOAT64_C( -332.32), SIMDE_FLOAT64_C( -624.84), SIMDE_FLOAT64_C( 700.72), SIMDE_FLOAT64_C( -708.77) }, { SIMDE_FLOAT64_C( -798.00), SIMDE_FLOAT64_C( 995.00), SIMDE_FLOAT64_C( -289.00), SIMDE_FLOAT64_C( -152.00), SIMDE_FLOAT64_C( -333.00), SIMDE_FLOAT64_C( -625.00), SIMDE_FLOAT64_C( 700.00), SIMDE_FLOAT64_C( -709.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_floor_pd(a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_floor_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( -886.28), SIMDE_FLOAT64_C( -614.03), SIMDE_FLOAT64_C( -883.55), SIMDE_FLOAT64_C( 865.29), SIMDE_FLOAT64_C( -206.48), SIMDE_FLOAT64_C( -34.33), SIMDE_FLOAT64_C( -987.38), SIMDE_FLOAT64_C( 127.49) }, UINT8_C(157), { SIMDE_FLOAT64_C( 163.94), SIMDE_FLOAT64_C( 134.54), SIMDE_FLOAT64_C( 245.58), SIMDE_FLOAT64_C( -615.09), SIMDE_FLOAT64_C( 80.28), SIMDE_FLOAT64_C( -93.17), SIMDE_FLOAT64_C( 181.16), SIMDE_FLOAT64_C( 303.02) }, { SIMDE_FLOAT64_C( 163.00), SIMDE_FLOAT64_C( -614.03), SIMDE_FLOAT64_C( 245.00), SIMDE_FLOAT64_C( -616.00), SIMDE_FLOAT64_C( 80.00), SIMDE_FLOAT64_C( -34.33), SIMDE_FLOAT64_C( -987.38), SIMDE_FLOAT64_C( 303.00) } }, { { SIMDE_FLOAT64_C( 377.85), SIMDE_FLOAT64_C( 999.13), SIMDE_FLOAT64_C( -474.80), SIMDE_FLOAT64_C( -29.53), SIMDE_FLOAT64_C( 777.92), SIMDE_FLOAT64_C( 307.60), SIMDE_FLOAT64_C( 178.13), SIMDE_FLOAT64_C( 680.84) }, UINT8_C(246), { SIMDE_FLOAT64_C( 47.73), SIMDE_FLOAT64_C( 681.42), SIMDE_FLOAT64_C( -141.66), SIMDE_FLOAT64_C( 574.99), SIMDE_FLOAT64_C( -969.81), SIMDE_FLOAT64_C( -27.94), SIMDE_FLOAT64_C( 960.96), SIMDE_FLOAT64_C( -853.36) }, { SIMDE_FLOAT64_C( 377.85), SIMDE_FLOAT64_C( 681.00), SIMDE_FLOAT64_C( -142.00), SIMDE_FLOAT64_C( -29.53), SIMDE_FLOAT64_C( -970.00), SIMDE_FLOAT64_C( -28.00), SIMDE_FLOAT64_C( 960.00), SIMDE_FLOAT64_C( -854.00) } }, { { SIMDE_FLOAT64_C( -162.66), SIMDE_FLOAT64_C( -245.52), SIMDE_FLOAT64_C( 112.31), SIMDE_FLOAT64_C( -150.03), SIMDE_FLOAT64_C( 881.98), SIMDE_FLOAT64_C( 426.57), SIMDE_FLOAT64_C( -986.09), SIMDE_FLOAT64_C( 16.51) }, UINT8_C( 53), { SIMDE_FLOAT64_C( -601.18), SIMDE_FLOAT64_C( -903.21), SIMDE_FLOAT64_C( 578.99), SIMDE_FLOAT64_C( 579.98), SIMDE_FLOAT64_C( 399.82), SIMDE_FLOAT64_C( -43.16), SIMDE_FLOAT64_C( 579.10), SIMDE_FLOAT64_C( 925.02) }, { SIMDE_FLOAT64_C( -602.00), SIMDE_FLOAT64_C( -245.52), SIMDE_FLOAT64_C( 578.00), SIMDE_FLOAT64_C( -150.03), SIMDE_FLOAT64_C( 399.00), SIMDE_FLOAT64_C( -44.00), SIMDE_FLOAT64_C( -986.09), SIMDE_FLOAT64_C( 16.51) } }, { { SIMDE_FLOAT64_C( 927.31), SIMDE_FLOAT64_C( 357.02), SIMDE_FLOAT64_C( 232.62), SIMDE_FLOAT64_C( 105.44), SIMDE_FLOAT64_C( 37.87), SIMDE_FLOAT64_C( 434.25), SIMDE_FLOAT64_C( -846.83), SIMDE_FLOAT64_C( -280.72) }, UINT8_C(253), { SIMDE_FLOAT64_C( 728.16), SIMDE_FLOAT64_C( -250.53), SIMDE_FLOAT64_C( 264.65), SIMDE_FLOAT64_C( 689.12), SIMDE_FLOAT64_C( -103.89), SIMDE_FLOAT64_C( -898.01), SIMDE_FLOAT64_C( -556.40), SIMDE_FLOAT64_C( -991.58) }, { SIMDE_FLOAT64_C( 728.00), SIMDE_FLOAT64_C( 357.02), SIMDE_FLOAT64_C( 264.00), SIMDE_FLOAT64_C( 689.00), SIMDE_FLOAT64_C( -104.00), SIMDE_FLOAT64_C( -899.00), SIMDE_FLOAT64_C( -557.00), SIMDE_FLOAT64_C( -992.00) } }, { { SIMDE_FLOAT64_C( -48.04), SIMDE_FLOAT64_C( -674.42), SIMDE_FLOAT64_C( 434.99), SIMDE_FLOAT64_C( -34.14), SIMDE_FLOAT64_C( 342.09), SIMDE_FLOAT64_C( -892.85), SIMDE_FLOAT64_C( 364.68), SIMDE_FLOAT64_C( 438.89) }, UINT8_C( 35), { SIMDE_FLOAT64_C( -55.34), SIMDE_FLOAT64_C( -161.30), SIMDE_FLOAT64_C( -357.03), SIMDE_FLOAT64_C( -476.24), SIMDE_FLOAT64_C( -236.28), SIMDE_FLOAT64_C( -429.72), SIMDE_FLOAT64_C( 880.78), SIMDE_FLOAT64_C( 996.35) }, { SIMDE_FLOAT64_C( -56.00), SIMDE_FLOAT64_C( -162.00), SIMDE_FLOAT64_C( 434.99), SIMDE_FLOAT64_C( -34.14), SIMDE_FLOAT64_C( 342.09), SIMDE_FLOAT64_C( -430.00), SIMDE_FLOAT64_C( 364.68), SIMDE_FLOAT64_C( 438.89) } }, { { SIMDE_FLOAT64_C( 675.71), SIMDE_FLOAT64_C( -81.35), SIMDE_FLOAT64_C( 430.60), SIMDE_FLOAT64_C( 828.89), SIMDE_FLOAT64_C( 637.93), SIMDE_FLOAT64_C( 723.19), SIMDE_FLOAT64_C( 557.05), SIMDE_FLOAT64_C( -612.60) }, UINT8_C(162), { SIMDE_FLOAT64_C( 246.17), SIMDE_FLOAT64_C( 283.52), SIMDE_FLOAT64_C( 89.83), SIMDE_FLOAT64_C( 689.78), SIMDE_FLOAT64_C( 291.94), SIMDE_FLOAT64_C( -958.21), SIMDE_FLOAT64_C( -984.64), SIMDE_FLOAT64_C( -273.07) }, { SIMDE_FLOAT64_C( 675.71), SIMDE_FLOAT64_C( 283.00), SIMDE_FLOAT64_C( 430.60), SIMDE_FLOAT64_C( 828.89), SIMDE_FLOAT64_C( 637.93), SIMDE_FLOAT64_C( -959.00), SIMDE_FLOAT64_C( 557.05), SIMDE_FLOAT64_C( -274.00) } }, { { SIMDE_FLOAT64_C( 7.65), SIMDE_FLOAT64_C( 357.45), SIMDE_FLOAT64_C( -165.92), SIMDE_FLOAT64_C( -627.67), SIMDE_FLOAT64_C( -203.66), SIMDE_FLOAT64_C( -479.79), SIMDE_FLOAT64_C( 316.99), SIMDE_FLOAT64_C( 635.04) }, UINT8_C(211), { SIMDE_FLOAT64_C( 840.75), SIMDE_FLOAT64_C( -601.24), SIMDE_FLOAT64_C( 733.46), SIMDE_FLOAT64_C( 721.53), SIMDE_FLOAT64_C( -604.89), SIMDE_FLOAT64_C( 409.18), SIMDE_FLOAT64_C( -359.82), SIMDE_FLOAT64_C( 825.71) }, { SIMDE_FLOAT64_C( 840.00), SIMDE_FLOAT64_C( -602.00), SIMDE_FLOAT64_C( -165.92), SIMDE_FLOAT64_C( -627.67), SIMDE_FLOAT64_C( -605.00), SIMDE_FLOAT64_C( -479.79), SIMDE_FLOAT64_C( -360.00), SIMDE_FLOAT64_C( 825.00) } }, { { SIMDE_FLOAT64_C( 238.07), SIMDE_FLOAT64_C( -721.89), SIMDE_FLOAT64_C( 548.91), SIMDE_FLOAT64_C( -204.89), SIMDE_FLOAT64_C( -334.48), SIMDE_FLOAT64_C( -463.26), SIMDE_FLOAT64_C( -958.71), SIMDE_FLOAT64_C( 949.03) }, UINT8_C(120), { SIMDE_FLOAT64_C( 731.06), SIMDE_FLOAT64_C( 240.97), SIMDE_FLOAT64_C( 668.36), SIMDE_FLOAT64_C( 746.42), SIMDE_FLOAT64_C( 967.90), SIMDE_FLOAT64_C( -323.99), SIMDE_FLOAT64_C( 103.87), SIMDE_FLOAT64_C( -198.02) }, { SIMDE_FLOAT64_C( 238.07), SIMDE_FLOAT64_C( -721.89), SIMDE_FLOAT64_C( 548.91), SIMDE_FLOAT64_C( 746.00), SIMDE_FLOAT64_C( 967.00), SIMDE_FLOAT64_C( -324.00), SIMDE_FLOAT64_C( 103.00), SIMDE_FLOAT64_C( 949.03) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_mask_floor_pd(src, test_vec[i].k, a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_svml_round_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -381.64), SIMDE_FLOAT32_C( -952.18), SIMDE_FLOAT32_C( 936.50), SIMDE_FLOAT32_C( -269.57) }, { SIMDE_FLOAT32_C( -382.00), SIMDE_FLOAT32_C( -952.00), SIMDE_FLOAT32_C( 937.00), SIMDE_FLOAT32_C( -270.00) } }, { { SIMDE_FLOAT32_C( 524.01), SIMDE_FLOAT32_C( 820.80), SIMDE_FLOAT32_C( -576.54), SIMDE_FLOAT32_C( 493.48) }, { SIMDE_FLOAT32_C( 524.00), SIMDE_FLOAT32_C( 821.00), SIMDE_FLOAT32_C( -577.00), SIMDE_FLOAT32_C( 493.00) } }, { { SIMDE_FLOAT32_C( -183.12), SIMDE_FLOAT32_C( -410.38), SIMDE_FLOAT32_C( 918.43), SIMDE_FLOAT32_C( 555.31) }, { SIMDE_FLOAT32_C( -183.00), SIMDE_FLOAT32_C( -410.00), SIMDE_FLOAT32_C( 918.00), SIMDE_FLOAT32_C( 555.00) } }, { { SIMDE_FLOAT32_C( -777.47), SIMDE_FLOAT32_C( 961.82), SIMDE_FLOAT32_C( -15.88), SIMDE_FLOAT32_C( -545.38) }, { SIMDE_FLOAT32_C( -777.00), SIMDE_FLOAT32_C( 962.00), SIMDE_FLOAT32_C( -16.00), SIMDE_FLOAT32_C( -545.00) } }, { { SIMDE_FLOAT32_C( 827.92), SIMDE_FLOAT32_C( -576.14), SIMDE_FLOAT32_C( 188.86), SIMDE_FLOAT32_C( -194.33) }, { SIMDE_FLOAT32_C( 828.00), SIMDE_FLOAT32_C( -576.00), SIMDE_FLOAT32_C( 189.00), SIMDE_FLOAT32_C( -194.00) } }, { { SIMDE_FLOAT32_C( -357.49), SIMDE_FLOAT32_C( 544.93), SIMDE_FLOAT32_C( -548.96), SIMDE_FLOAT32_C( 982.95) }, { SIMDE_FLOAT32_C( -357.00), SIMDE_FLOAT32_C( 545.00), SIMDE_FLOAT32_C( -549.00), SIMDE_FLOAT32_C( 983.00) } }, { { SIMDE_FLOAT32_C( -811.59), SIMDE_FLOAT32_C( 502.24), SIMDE_FLOAT32_C( 18.44), SIMDE_FLOAT32_C( -985.11) }, { SIMDE_FLOAT32_C( -812.00), SIMDE_FLOAT32_C( 502.00), SIMDE_FLOAT32_C( 18.00), SIMDE_FLOAT32_C( -985.00) } }, { { SIMDE_FLOAT32_C( -901.60), SIMDE_FLOAT32_C( 1.79), SIMDE_FLOAT32_C( -119.54), SIMDE_FLOAT32_C( -283.24) }, { SIMDE_FLOAT32_C( -902.00), SIMDE_FLOAT32_C( 2.00), SIMDE_FLOAT32_C( -120.00), SIMDE_FLOAT32_C( -283.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_svml_round_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_svml_round_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -775.87), SIMDE_FLOAT64_C( 258.36) }, { SIMDE_FLOAT64_C( -776.00), SIMDE_FLOAT64_C( 258.00) } }, { { SIMDE_FLOAT64_C( 698.30), SIMDE_FLOAT64_C( -24.21) }, { SIMDE_FLOAT64_C( 698.00), SIMDE_FLOAT64_C( -24.00) } }, { { SIMDE_FLOAT64_C( -755.31), SIMDE_FLOAT64_C( -751.07) }, { SIMDE_FLOAT64_C( -755.00), SIMDE_FLOAT64_C( -751.00) } }, { { SIMDE_FLOAT64_C( 607.87), SIMDE_FLOAT64_C( -999.16) }, { SIMDE_FLOAT64_C( 608.00), SIMDE_FLOAT64_C( -999.00) } }, { { SIMDE_FLOAT64_C( -558.18), SIMDE_FLOAT64_C( -447.90) }, { SIMDE_FLOAT64_C( -558.00), SIMDE_FLOAT64_C( -448.00) } }, { { SIMDE_FLOAT64_C( -159.19), SIMDE_FLOAT64_C( 675.96) }, { SIMDE_FLOAT64_C( -159.00), SIMDE_FLOAT64_C( 676.00) } }, { { SIMDE_FLOAT64_C( -682.16), SIMDE_FLOAT64_C( 502.15) }, { SIMDE_FLOAT64_C( -682.00), SIMDE_FLOAT64_C( 502.00) } }, { { SIMDE_FLOAT64_C( -591.87), SIMDE_FLOAT64_C( 775.61) }, { SIMDE_FLOAT64_C( -592.00), SIMDE_FLOAT64_C( 776.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d r = simde_mm_svml_round_pd(a); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_svml_round_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 380.84), SIMDE_FLOAT32_C( -788.70), SIMDE_FLOAT32_C( 937.85), SIMDE_FLOAT32_C( 321.73), SIMDE_FLOAT32_C( 250.52), SIMDE_FLOAT32_C( -410.85), SIMDE_FLOAT32_C( -411.50), SIMDE_FLOAT32_C( -342.15) }, { SIMDE_FLOAT32_C( 381.00), SIMDE_FLOAT32_C( -789.00), SIMDE_FLOAT32_C( 938.00), SIMDE_FLOAT32_C( 322.00), SIMDE_FLOAT32_C( 251.00), SIMDE_FLOAT32_C( -411.00), SIMDE_FLOAT32_C( -412.00), SIMDE_FLOAT32_C( -342.00) } }, { { SIMDE_FLOAT32_C( -410.55), SIMDE_FLOAT32_C( 648.37), SIMDE_FLOAT32_C( 294.06), SIMDE_FLOAT32_C( 315.36), SIMDE_FLOAT32_C( -375.65), SIMDE_FLOAT32_C( 783.04), SIMDE_FLOAT32_C( -600.22), SIMDE_FLOAT32_C( -208.94) }, { SIMDE_FLOAT32_C( -411.00), SIMDE_FLOAT32_C( 648.00), SIMDE_FLOAT32_C( 294.00), SIMDE_FLOAT32_C( 315.00), SIMDE_FLOAT32_C( -376.00), SIMDE_FLOAT32_C( 783.00), SIMDE_FLOAT32_C( -600.00), SIMDE_FLOAT32_C( -209.00) } }, { { SIMDE_FLOAT32_C( 628.12), SIMDE_FLOAT32_C( 178.11), SIMDE_FLOAT32_C( -902.32), SIMDE_FLOAT32_C( -420.94), SIMDE_FLOAT32_C( -113.02), SIMDE_FLOAT32_C( 352.97), SIMDE_FLOAT32_C( -796.40), SIMDE_FLOAT32_C( -795.50) }, { SIMDE_FLOAT32_C( 628.00), SIMDE_FLOAT32_C( 178.00), SIMDE_FLOAT32_C( -902.00), SIMDE_FLOAT32_C( -421.00), SIMDE_FLOAT32_C( -113.00), SIMDE_FLOAT32_C( 353.00), SIMDE_FLOAT32_C( -796.00), SIMDE_FLOAT32_C( -796.00) } }, { { SIMDE_FLOAT32_C( -712.04), SIMDE_FLOAT32_C( 880.10), SIMDE_FLOAT32_C( 698.48), SIMDE_FLOAT32_C( -638.58), SIMDE_FLOAT32_C( 349.16), SIMDE_FLOAT32_C( 163.60), SIMDE_FLOAT32_C( -690.90), SIMDE_FLOAT32_C( -270.00) }, { SIMDE_FLOAT32_C( -712.00), SIMDE_FLOAT32_C( 880.00), SIMDE_FLOAT32_C( 698.00), SIMDE_FLOAT32_C( -639.00), SIMDE_FLOAT32_C( 349.00), SIMDE_FLOAT32_C( 164.00), SIMDE_FLOAT32_C( -691.00), SIMDE_FLOAT32_C( -270.00) } }, { { SIMDE_FLOAT32_C( 374.90), SIMDE_FLOAT32_C( -753.05), SIMDE_FLOAT32_C( -948.26), SIMDE_FLOAT32_C( -374.58), SIMDE_FLOAT32_C( -163.90), SIMDE_FLOAT32_C( -359.76), SIMDE_FLOAT32_C( 283.27), SIMDE_FLOAT32_C( 425.55) }, { SIMDE_FLOAT32_C( 375.00), SIMDE_FLOAT32_C( -753.00), SIMDE_FLOAT32_C( -948.00), SIMDE_FLOAT32_C( -375.00), SIMDE_FLOAT32_C( -164.00), SIMDE_FLOAT32_C( -360.00), SIMDE_FLOAT32_C( 283.00), SIMDE_FLOAT32_C( 426.00) } }, { { SIMDE_FLOAT32_C( -711.40), SIMDE_FLOAT32_C( -422.67), SIMDE_FLOAT32_C( -259.09), SIMDE_FLOAT32_C( -87.05), SIMDE_FLOAT32_C( -639.63), SIMDE_FLOAT32_C( 140.69), SIMDE_FLOAT32_C( 704.01), SIMDE_FLOAT32_C( 988.49) }, { SIMDE_FLOAT32_C( -711.00), SIMDE_FLOAT32_C( -423.00), SIMDE_FLOAT32_C( -259.00), SIMDE_FLOAT32_C( -87.00), SIMDE_FLOAT32_C( -640.00), SIMDE_FLOAT32_C( 141.00), SIMDE_FLOAT32_C( 704.00), SIMDE_FLOAT32_C( 988.00) } }, { { SIMDE_FLOAT32_C( -681.20), SIMDE_FLOAT32_C( 801.69), SIMDE_FLOAT32_C( -432.45), SIMDE_FLOAT32_C( 205.78), SIMDE_FLOAT32_C( 154.66), SIMDE_FLOAT32_C( -228.84), SIMDE_FLOAT32_C( 410.28), SIMDE_FLOAT32_C( 442.62) }, { SIMDE_FLOAT32_C( -681.00), SIMDE_FLOAT32_C( 802.00), SIMDE_FLOAT32_C( -432.00), SIMDE_FLOAT32_C( 206.00), SIMDE_FLOAT32_C( 155.00), SIMDE_FLOAT32_C( -229.00), SIMDE_FLOAT32_C( 410.00), SIMDE_FLOAT32_C( 443.00) } }, { { SIMDE_FLOAT32_C( -348.74), SIMDE_FLOAT32_C( 108.77), SIMDE_FLOAT32_C( 804.05), SIMDE_FLOAT32_C( -999.58), SIMDE_FLOAT32_C( -727.63), SIMDE_FLOAT32_C( -886.85), SIMDE_FLOAT32_C( -269.57), SIMDE_FLOAT32_C( 647.26) }, { SIMDE_FLOAT32_C( -349.00), SIMDE_FLOAT32_C( 109.00), SIMDE_FLOAT32_C( 804.00), SIMDE_FLOAT32_C( -1000.00), SIMDE_FLOAT32_C( -728.00), SIMDE_FLOAT32_C( -887.00), SIMDE_FLOAT32_C( -270.00), SIMDE_FLOAT32_C( 647.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_svml_round_ps(a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_svml_round_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( -844.84), SIMDE_FLOAT64_C( -247.28), SIMDE_FLOAT64_C( 192.26), SIMDE_FLOAT64_C( 426.25) }, { SIMDE_FLOAT64_C( -845.00), SIMDE_FLOAT64_C( -247.00), SIMDE_FLOAT64_C( 192.00), SIMDE_FLOAT64_C( 426.00) } }, { { SIMDE_FLOAT64_C( -53.32), SIMDE_FLOAT64_C( -778.93), SIMDE_FLOAT64_C( -167.10), SIMDE_FLOAT64_C( -593.25) }, { SIMDE_FLOAT64_C( -53.00), SIMDE_FLOAT64_C( -779.00), SIMDE_FLOAT64_C( -167.00), SIMDE_FLOAT64_C( -593.00) } }, { { SIMDE_FLOAT64_C( -450.17), SIMDE_FLOAT64_C( -606.32), SIMDE_FLOAT64_C( 101.38), SIMDE_FLOAT64_C( -341.77) }, { SIMDE_FLOAT64_C( -450.00), SIMDE_FLOAT64_C( -606.00), SIMDE_FLOAT64_C( 101.00), SIMDE_FLOAT64_C( -342.00) } }, { { SIMDE_FLOAT64_C( -461.44), SIMDE_FLOAT64_C( 674.51), SIMDE_FLOAT64_C( 145.37), SIMDE_FLOAT64_C( 148.63) }, { SIMDE_FLOAT64_C( -461.00), SIMDE_FLOAT64_C( 675.00), SIMDE_FLOAT64_C( 145.00), SIMDE_FLOAT64_C( 149.00) } }, { { SIMDE_FLOAT64_C( -693.71), SIMDE_FLOAT64_C( -933.34), SIMDE_FLOAT64_C( 117.11), SIMDE_FLOAT64_C( 52.36) }, { SIMDE_FLOAT64_C( -694.00), SIMDE_FLOAT64_C( -933.00), SIMDE_FLOAT64_C( 117.00), SIMDE_FLOAT64_C( 52.00) } }, { { SIMDE_FLOAT64_C( 574.82), SIMDE_FLOAT64_C( -929.55), SIMDE_FLOAT64_C( 113.17), SIMDE_FLOAT64_C( -272.97) }, { SIMDE_FLOAT64_C( 575.00), SIMDE_FLOAT64_C( -930.00), SIMDE_FLOAT64_C( 113.00), SIMDE_FLOAT64_C( -273.00) } }, { { SIMDE_FLOAT64_C( 102.14), SIMDE_FLOAT64_C( -880.36), SIMDE_FLOAT64_C( 222.01), SIMDE_FLOAT64_C( -844.37) }, { SIMDE_FLOAT64_C( 102.00), SIMDE_FLOAT64_C( -880.00), SIMDE_FLOAT64_C( 222.00), SIMDE_FLOAT64_C( -844.00) } }, { { SIMDE_FLOAT64_C( 363.52), SIMDE_FLOAT64_C( -723.41), SIMDE_FLOAT64_C( -68.69), SIMDE_FLOAT64_C( 518.69) }, { SIMDE_FLOAT64_C( 364.00), SIMDE_FLOAT64_C( -723.00), SIMDE_FLOAT64_C( -69.00), SIMDE_FLOAT64_C( 519.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d r = simde_mm256_svml_round_pd(a); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_svml_round_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 838.26), SIMDE_FLOAT64_C( 713.09), SIMDE_FLOAT64_C( 41.96), SIMDE_FLOAT64_C( -274.12), SIMDE_FLOAT64_C( 674.75), SIMDE_FLOAT64_C( 434.35), SIMDE_FLOAT64_C( -404.30), SIMDE_FLOAT64_C( -706.45) }, { SIMDE_FLOAT64_C( 838.00), SIMDE_FLOAT64_C( 713.00), SIMDE_FLOAT64_C( 42.00), SIMDE_FLOAT64_C( -274.00), SIMDE_FLOAT64_C( 675.00), SIMDE_FLOAT64_C( 434.00), SIMDE_FLOAT64_C( -404.00), SIMDE_FLOAT64_C( -706.00) } }, { { SIMDE_FLOAT64_C( 764.33), SIMDE_FLOAT64_C( 549.73), SIMDE_FLOAT64_C( 946.10), SIMDE_FLOAT64_C( 543.69), SIMDE_FLOAT64_C( 399.24), SIMDE_FLOAT64_C( 840.23), SIMDE_FLOAT64_C( -804.12), SIMDE_FLOAT64_C( 92.87) }, { SIMDE_FLOAT64_C( 764.00), SIMDE_FLOAT64_C( 550.00), SIMDE_FLOAT64_C( 946.00), SIMDE_FLOAT64_C( 544.00), SIMDE_FLOAT64_C( 399.00), SIMDE_FLOAT64_C( 840.00), SIMDE_FLOAT64_C( -804.00), SIMDE_FLOAT64_C( 93.00) } }, { { SIMDE_FLOAT64_C( -719.75), SIMDE_FLOAT64_C( -288.44), SIMDE_FLOAT64_C( -7.73), SIMDE_FLOAT64_C( -17.69), SIMDE_FLOAT64_C( -135.39), SIMDE_FLOAT64_C( -783.16), SIMDE_FLOAT64_C( -89.69), SIMDE_FLOAT64_C( -576.47) }, { SIMDE_FLOAT64_C( -720.00), SIMDE_FLOAT64_C( -288.00), SIMDE_FLOAT64_C( -8.00), SIMDE_FLOAT64_C( -18.00), SIMDE_FLOAT64_C( -135.00), SIMDE_FLOAT64_C( -783.00), SIMDE_FLOAT64_C( -90.00), SIMDE_FLOAT64_C( -576.00) } }, { { SIMDE_FLOAT64_C( 729.17), SIMDE_FLOAT64_C( 679.53), SIMDE_FLOAT64_C( -484.77), SIMDE_FLOAT64_C( 898.47), SIMDE_FLOAT64_C( -408.70), SIMDE_FLOAT64_C( -621.23), SIMDE_FLOAT64_C( -109.48), SIMDE_FLOAT64_C( -570.45) }, { SIMDE_FLOAT64_C( 729.00), SIMDE_FLOAT64_C( 680.00), SIMDE_FLOAT64_C( -485.00), SIMDE_FLOAT64_C( 898.00), SIMDE_FLOAT64_C( -409.00), SIMDE_FLOAT64_C( -621.00), SIMDE_FLOAT64_C( -109.00), SIMDE_FLOAT64_C( -570.00) } }, { { SIMDE_FLOAT64_C( -908.13), SIMDE_FLOAT64_C( 932.48), SIMDE_FLOAT64_C( 155.44), SIMDE_FLOAT64_C( 766.61), SIMDE_FLOAT64_C( 366.83), SIMDE_FLOAT64_C( 751.14), SIMDE_FLOAT64_C( -939.84), SIMDE_FLOAT64_C( 131.16) }, { SIMDE_FLOAT64_C( -908.00), SIMDE_FLOAT64_C( 932.00), SIMDE_FLOAT64_C( 155.00), SIMDE_FLOAT64_C( 767.00), SIMDE_FLOAT64_C( 367.00), SIMDE_FLOAT64_C( 751.00), SIMDE_FLOAT64_C( -940.00), SIMDE_FLOAT64_C( 131.00) } }, { { SIMDE_FLOAT64_C( 300.87), SIMDE_FLOAT64_C( -993.74), SIMDE_FLOAT64_C( -325.15), SIMDE_FLOAT64_C( -299.89), SIMDE_FLOAT64_C( 846.49), SIMDE_FLOAT64_C( -129.27), SIMDE_FLOAT64_C( 792.98), SIMDE_FLOAT64_C( -873.26) }, { SIMDE_FLOAT64_C( 301.00), SIMDE_FLOAT64_C( -994.00), SIMDE_FLOAT64_C( -325.00), SIMDE_FLOAT64_C( -300.00), SIMDE_FLOAT64_C( 846.00), SIMDE_FLOAT64_C( -129.00), SIMDE_FLOAT64_C( 793.00), SIMDE_FLOAT64_C( -873.00) } }, { { SIMDE_FLOAT64_C( 582.29), SIMDE_FLOAT64_C( -214.75), SIMDE_FLOAT64_C( 109.05), SIMDE_FLOAT64_C( -553.10), SIMDE_FLOAT64_C( 2.09), SIMDE_FLOAT64_C( -980.64), SIMDE_FLOAT64_C( -129.57), SIMDE_FLOAT64_C( -268.74) }, { SIMDE_FLOAT64_C( 582.00), SIMDE_FLOAT64_C( -215.00), SIMDE_FLOAT64_C( 109.00), SIMDE_FLOAT64_C( -553.00), SIMDE_FLOAT64_C( 2.00), SIMDE_FLOAT64_C( -981.00), SIMDE_FLOAT64_C( -130.00), SIMDE_FLOAT64_C( -269.00) } }, { { SIMDE_FLOAT64_C( 698.88), SIMDE_FLOAT64_C( 385.66), SIMDE_FLOAT64_C( -370.28), SIMDE_FLOAT64_C( -709.82), SIMDE_FLOAT64_C( 764.44), SIMDE_FLOAT64_C( 520.25), SIMDE_FLOAT64_C( -280.27), SIMDE_FLOAT64_C( 856.30) }, { SIMDE_FLOAT64_C( 699.00), SIMDE_FLOAT64_C( 386.00), SIMDE_FLOAT64_C( -370.00), SIMDE_FLOAT64_C( -710.00), SIMDE_FLOAT64_C( 764.00), SIMDE_FLOAT64_C( 520.00), SIMDE_FLOAT64_C( -280.00), SIMDE_FLOAT64_C( 856.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_svml_round_pd(a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_svml_round_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 686.15), SIMDE_FLOAT64_C( 113.85), SIMDE_FLOAT64_C( 91.19), SIMDE_FLOAT64_C( 346.08), SIMDE_FLOAT64_C( -785.05), SIMDE_FLOAT64_C( 656.94), SIMDE_FLOAT64_C( 111.39), SIMDE_FLOAT64_C( -488.16) }, UINT8_C(184), { SIMDE_FLOAT64_C( -283.66), SIMDE_FLOAT64_C( 587.43), SIMDE_FLOAT64_C( -235.49), SIMDE_FLOAT64_C( 163.12), SIMDE_FLOAT64_C( 571.24), SIMDE_FLOAT64_C( 582.37), SIMDE_FLOAT64_C( -370.22), SIMDE_FLOAT64_C( 474.92) }, { SIMDE_FLOAT64_C( 686.15), SIMDE_FLOAT64_C( 113.85), SIMDE_FLOAT64_C( 91.19), SIMDE_FLOAT64_C( 163.00), SIMDE_FLOAT64_C( 571.00), SIMDE_FLOAT64_C( 582.00), SIMDE_FLOAT64_C( 111.39), SIMDE_FLOAT64_C( 475.00) } }, { { SIMDE_FLOAT64_C( -66.51), SIMDE_FLOAT64_C( -591.67), SIMDE_FLOAT64_C( -91.31), SIMDE_FLOAT64_C( 225.56), SIMDE_FLOAT64_C( 12.37), SIMDE_FLOAT64_C( -659.70), SIMDE_FLOAT64_C( -760.80), SIMDE_FLOAT64_C( 231.33) }, UINT8_C( 69), { SIMDE_FLOAT64_C( 115.84), SIMDE_FLOAT64_C( -400.68), SIMDE_FLOAT64_C( -849.91), SIMDE_FLOAT64_C( -49.83), SIMDE_FLOAT64_C( 85.28), SIMDE_FLOAT64_C( 836.24), SIMDE_FLOAT64_C( -935.98), SIMDE_FLOAT64_C( -823.53) }, { SIMDE_FLOAT64_C( 116.00), SIMDE_FLOAT64_C( -591.67), SIMDE_FLOAT64_C( -850.00), SIMDE_FLOAT64_C( 225.56), SIMDE_FLOAT64_C( 12.37), SIMDE_FLOAT64_C( -659.70), SIMDE_FLOAT64_C( -936.00), SIMDE_FLOAT64_C( 231.33) } }, { { SIMDE_FLOAT64_C( 182.32), SIMDE_FLOAT64_C( -721.03), SIMDE_FLOAT64_C( 833.41), SIMDE_FLOAT64_C( -706.29), SIMDE_FLOAT64_C( -209.20), SIMDE_FLOAT64_C( -511.45), SIMDE_FLOAT64_C( 10.05), SIMDE_FLOAT64_C( -621.76) }, UINT8_C(223), { SIMDE_FLOAT64_C( -826.83), SIMDE_FLOAT64_C( 949.47), SIMDE_FLOAT64_C( -164.57), SIMDE_FLOAT64_C( -197.05), SIMDE_FLOAT64_C( 424.40), SIMDE_FLOAT64_C( 768.92), SIMDE_FLOAT64_C( 211.28), SIMDE_FLOAT64_C( -666.92) }, { SIMDE_FLOAT64_C( -827.00), SIMDE_FLOAT64_C( 949.00), SIMDE_FLOAT64_C( -165.00), SIMDE_FLOAT64_C( -197.00), SIMDE_FLOAT64_C( 424.00), SIMDE_FLOAT64_C( -511.45), SIMDE_FLOAT64_C( 211.00), SIMDE_FLOAT64_C( -667.00) } }, { { SIMDE_FLOAT64_C( -5.52), SIMDE_FLOAT64_C( -776.35), SIMDE_FLOAT64_C( -326.62), SIMDE_FLOAT64_C( 233.68), SIMDE_FLOAT64_C( 454.98), SIMDE_FLOAT64_C( 714.97), SIMDE_FLOAT64_C( -650.48), SIMDE_FLOAT64_C( -945.69) }, UINT8_C(115), { SIMDE_FLOAT64_C( 299.69), SIMDE_FLOAT64_C( 139.59), SIMDE_FLOAT64_C( 701.29), SIMDE_FLOAT64_C( 363.71), SIMDE_FLOAT64_C( 316.05), SIMDE_FLOAT64_C( -116.39), SIMDE_FLOAT64_C( 642.67), SIMDE_FLOAT64_C( 149.46) }, { SIMDE_FLOAT64_C( 300.00), SIMDE_FLOAT64_C( 140.00), SIMDE_FLOAT64_C( -326.62), SIMDE_FLOAT64_C( 233.68), SIMDE_FLOAT64_C( 316.00), SIMDE_FLOAT64_C( -116.00), SIMDE_FLOAT64_C( 643.00), SIMDE_FLOAT64_C( -945.69) } }, { { SIMDE_FLOAT64_C( 177.32), SIMDE_FLOAT64_C( -566.52), SIMDE_FLOAT64_C( 638.01), SIMDE_FLOAT64_C( -812.62), SIMDE_FLOAT64_C( -188.29), SIMDE_FLOAT64_C( -108.94), SIMDE_FLOAT64_C( -639.45), SIMDE_FLOAT64_C( -238.81) }, UINT8_C( 57), { SIMDE_FLOAT64_C( 163.50), SIMDE_FLOAT64_C( -814.42), SIMDE_FLOAT64_C( 495.41), SIMDE_FLOAT64_C( -625.21), SIMDE_FLOAT64_C( -481.34), SIMDE_FLOAT64_C( -510.10), SIMDE_FLOAT64_C( -401.56), SIMDE_FLOAT64_C( 192.04) }, { SIMDE_FLOAT64_C( 164.00), SIMDE_FLOAT64_C( -566.52), SIMDE_FLOAT64_C( 638.01), SIMDE_FLOAT64_C( -625.00), SIMDE_FLOAT64_C( -481.00), SIMDE_FLOAT64_C( -510.00), SIMDE_FLOAT64_C( -639.45), SIMDE_FLOAT64_C( -238.81) } }, { { SIMDE_FLOAT64_C( 723.58), SIMDE_FLOAT64_C( -946.57), SIMDE_FLOAT64_C( -92.99), SIMDE_FLOAT64_C( -926.90), SIMDE_FLOAT64_C( -892.27), SIMDE_FLOAT64_C( -227.94), SIMDE_FLOAT64_C( 372.79), SIMDE_FLOAT64_C( 247.32) }, UINT8_C(253), { SIMDE_FLOAT64_C( -263.51), SIMDE_FLOAT64_C( -436.63), SIMDE_FLOAT64_C( 356.97), SIMDE_FLOAT64_C( -620.84), SIMDE_FLOAT64_C( 712.84), SIMDE_FLOAT64_C( -465.71), SIMDE_FLOAT64_C( -187.36), SIMDE_FLOAT64_C( 350.85) }, { SIMDE_FLOAT64_C( -264.00), SIMDE_FLOAT64_C( -946.57), SIMDE_FLOAT64_C( 357.00), SIMDE_FLOAT64_C( -621.00), SIMDE_FLOAT64_C( 713.00), SIMDE_FLOAT64_C( -466.00), SIMDE_FLOAT64_C( -187.00), SIMDE_FLOAT64_C( 351.00) } }, { { SIMDE_FLOAT64_C( -278.33), SIMDE_FLOAT64_C( 624.35), SIMDE_FLOAT64_C( -758.09), SIMDE_FLOAT64_C( 82.22), SIMDE_FLOAT64_C( -614.46), SIMDE_FLOAT64_C( 968.40), SIMDE_FLOAT64_C( -754.27), SIMDE_FLOAT64_C( -428.88) }, UINT8_C( 24), { SIMDE_FLOAT64_C( -379.49), SIMDE_FLOAT64_C( 89.78), SIMDE_FLOAT64_C( 953.71), SIMDE_FLOAT64_C( 218.96), SIMDE_FLOAT64_C( -718.17), SIMDE_FLOAT64_C( 677.29), SIMDE_FLOAT64_C( 272.38), SIMDE_FLOAT64_C( 188.83) }, { SIMDE_FLOAT64_C( -278.33), SIMDE_FLOAT64_C( 624.35), SIMDE_FLOAT64_C( -758.09), SIMDE_FLOAT64_C( 219.00), SIMDE_FLOAT64_C( -718.00), SIMDE_FLOAT64_C( 968.40), SIMDE_FLOAT64_C( -754.27), SIMDE_FLOAT64_C( -428.88) } }, { { SIMDE_FLOAT64_C( 750.39), SIMDE_FLOAT64_C( 380.12), SIMDE_FLOAT64_C( 960.90), SIMDE_FLOAT64_C( 123.18), SIMDE_FLOAT64_C( -372.56), SIMDE_FLOAT64_C( -565.75), SIMDE_FLOAT64_C( 859.67), SIMDE_FLOAT64_C( 190.81) }, UINT8_C(196), { SIMDE_FLOAT64_C( -761.17), SIMDE_FLOAT64_C( -96.36), SIMDE_FLOAT64_C( -674.48), SIMDE_FLOAT64_C( 51.47), SIMDE_FLOAT64_C( -745.51), SIMDE_FLOAT64_C( 47.19), SIMDE_FLOAT64_C( -324.18), SIMDE_FLOAT64_C( -503.60) }, { SIMDE_FLOAT64_C( 750.39), SIMDE_FLOAT64_C( 380.12), SIMDE_FLOAT64_C( -674.00), SIMDE_FLOAT64_C( 123.18), SIMDE_FLOAT64_C( -372.56), SIMDE_FLOAT64_C( -565.75), SIMDE_FLOAT64_C( -324.00), SIMDE_FLOAT64_C( -504.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_mask_svml_round_pd(src, test_vec[i].k, a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_svml_sqrt_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 770.44), SIMDE_FLOAT64_C( 798.21) }, { SIMDE_FLOAT64_C( 27.76), SIMDE_FLOAT64_C( 28.25) } }, { { SIMDE_FLOAT64_C( 609.46), SIMDE_FLOAT64_C( 219.02) }, { SIMDE_FLOAT64_C( 24.69), SIMDE_FLOAT64_C( 14.80) } }, { { SIMDE_FLOAT64_C( 514.28), SIMDE_FLOAT64_C( 301.39) }, { SIMDE_FLOAT64_C( 22.68), SIMDE_FLOAT64_C( 17.36) } }, { { SIMDE_FLOAT64_C( 520.55), SIMDE_FLOAT64_C( 108.95) }, { SIMDE_FLOAT64_C( 22.82), SIMDE_FLOAT64_C( 10.44) } }, { { SIMDE_FLOAT64_C( 417.19), SIMDE_FLOAT64_C( 212.16) }, { SIMDE_FLOAT64_C( 20.43), SIMDE_FLOAT64_C( 14.57) } }, { { SIMDE_FLOAT64_C( 40.41), SIMDE_FLOAT64_C( 807.43) }, { SIMDE_FLOAT64_C( 6.36), SIMDE_FLOAT64_C( 28.42) } }, { { SIMDE_FLOAT64_C( 746.18), SIMDE_FLOAT64_C( 239.87) }, { SIMDE_FLOAT64_C( 27.32), SIMDE_FLOAT64_C( 15.49) } }, { { SIMDE_FLOAT64_C( 461.80), SIMDE_FLOAT64_C( 420.17) }, { SIMDE_FLOAT64_C( 21.49), SIMDE_FLOAT64_C( 20.50) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d r = simde_mm_svml_sqrt_pd(a); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_svml_sqrt_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 935.36), SIMDE_FLOAT32_C( 463.00), SIMDE_FLOAT32_C( 356.54), SIMDE_FLOAT32_C( 614.58), SIMDE_FLOAT32_C( 720.00), SIMDE_FLOAT32_C( 747.09), SIMDE_FLOAT32_C( 873.09), SIMDE_FLOAT32_C( 461.84) }, { SIMDE_FLOAT32_C( 30.58), SIMDE_FLOAT32_C( 21.52), SIMDE_FLOAT32_C( 18.88), SIMDE_FLOAT32_C( 24.79), SIMDE_FLOAT32_C( 26.83), SIMDE_FLOAT32_C( 27.33), SIMDE_FLOAT32_C( 29.55), SIMDE_FLOAT32_C( 21.49) } }, { { SIMDE_FLOAT32_C( 718.30), SIMDE_FLOAT32_C( 297.75), SIMDE_FLOAT32_C( 46.73), SIMDE_FLOAT32_C( -42.51), SIMDE_FLOAT32_C( 207.50), SIMDE_FLOAT32_C( 492.51), SIMDE_FLOAT32_C( 15.08), SIMDE_FLOAT32_C( 719.29) }, { SIMDE_FLOAT32_C( 26.80), SIMDE_FLOAT32_C( 17.26), SIMDE_FLOAT32_C( 6.84), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 14.40), SIMDE_FLOAT32_C( 22.19), SIMDE_FLOAT32_C( 3.88), SIMDE_FLOAT32_C( 26.82) } }, { { SIMDE_FLOAT32_C( 347.10), SIMDE_FLOAT32_C( 575.60), SIMDE_FLOAT32_C( 719.84), SIMDE_FLOAT32_C( 241.71), SIMDE_FLOAT32_C( 139.48), SIMDE_FLOAT32_C( 757.17), SIMDE_FLOAT32_C( 132.17), SIMDE_FLOAT32_C( 152.46) }, { SIMDE_FLOAT32_C( 18.63), SIMDE_FLOAT32_C( 23.99), SIMDE_FLOAT32_C( 26.83), SIMDE_FLOAT32_C( 15.55), SIMDE_FLOAT32_C( 11.81), SIMDE_FLOAT32_C( 27.52), SIMDE_FLOAT32_C( 11.50), SIMDE_FLOAT32_C( 12.35) } }, { { SIMDE_FLOAT32_C( 780.23), SIMDE_FLOAT32_C( 823.65), SIMDE_FLOAT32_C( 290.06), SIMDE_FLOAT32_C( 492.64), SIMDE_FLOAT32_C( 944.24), SIMDE_FLOAT32_C( 836.21), SIMDE_FLOAT32_C( 785.55), SIMDE_FLOAT32_C( 879.60) }, { SIMDE_FLOAT32_C( 27.93), SIMDE_FLOAT32_C( 28.70), SIMDE_FLOAT32_C( 17.03), SIMDE_FLOAT32_C( 22.20), SIMDE_FLOAT32_C( 30.73), SIMDE_FLOAT32_C( 28.92), SIMDE_FLOAT32_C( 28.03), SIMDE_FLOAT32_C( 29.66) } }, { { SIMDE_FLOAT32_C( 299.21), SIMDE_FLOAT32_C( 142.09), SIMDE_FLOAT32_C( 494.18), SIMDE_FLOAT32_C( 19.21), SIMDE_FLOAT32_C( 989.19), SIMDE_FLOAT32_C( 367.28), SIMDE_FLOAT32_C( 581.05), SIMDE_FLOAT32_C( 707.48) }, { SIMDE_FLOAT32_C( 17.30), SIMDE_FLOAT32_C( 11.92), SIMDE_FLOAT32_C( 22.23), SIMDE_FLOAT32_C( 4.38), SIMDE_FLOAT32_C( 31.45), SIMDE_FLOAT32_C( 19.16), SIMDE_FLOAT32_C( 24.10), SIMDE_FLOAT32_C( 26.60) } }, { { SIMDE_FLOAT32_C( 765.03), SIMDE_FLOAT32_C( 727.79), SIMDE_FLOAT32_C( 764.97), SIMDE_FLOAT32_C( -27.47), SIMDE_FLOAT32_C( 220.30), SIMDE_FLOAT32_C( 880.05), SIMDE_FLOAT32_C( 791.82), SIMDE_FLOAT32_C( 667.40) }, { SIMDE_FLOAT32_C( 27.66), SIMDE_FLOAT32_C( 26.98), SIMDE_FLOAT32_C( 27.66), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 14.84), SIMDE_FLOAT32_C( 29.67), SIMDE_FLOAT32_C( 28.14), SIMDE_FLOAT32_C( 25.83) } }, { { SIMDE_FLOAT32_C( 455.65), SIMDE_FLOAT32_C( 511.66), SIMDE_FLOAT32_C( -90.90), SIMDE_FLOAT32_C( 695.13), SIMDE_FLOAT32_C( 268.83), SIMDE_FLOAT32_C( 141.28), SIMDE_FLOAT32_C( 947.59), SIMDE_FLOAT32_C( 49.06) }, { SIMDE_FLOAT32_C( 21.35), SIMDE_FLOAT32_C( 22.62), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 26.37), SIMDE_FLOAT32_C( 16.40), SIMDE_FLOAT32_C( 11.89), SIMDE_FLOAT32_C( 30.78), SIMDE_FLOAT32_C( 7.00) } }, { { SIMDE_FLOAT32_C( -35.07), SIMDE_FLOAT32_C( 237.65), SIMDE_FLOAT32_C( 641.70), SIMDE_FLOAT32_C( -90.83), SIMDE_FLOAT32_C( 73.86), SIMDE_FLOAT32_C( 427.26), SIMDE_FLOAT32_C( 888.77), SIMDE_FLOAT32_C( 473.07) }, { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 15.42), SIMDE_FLOAT32_C( 25.33), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 8.59), SIMDE_FLOAT32_C( 20.67), SIMDE_FLOAT32_C( 29.81), SIMDE_FLOAT32_C( 21.75) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_svml_sqrt_ps(a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_svml_sqrt_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( 898.02), SIMDE_FLOAT64_C( 77.23), SIMDE_FLOAT64_C( 690.30), SIMDE_FLOAT64_C( 742.27) }, { SIMDE_FLOAT64_C( 29.97), SIMDE_FLOAT64_C( 8.79), SIMDE_FLOAT64_C( 26.27), SIMDE_FLOAT64_C( 27.24) } }, { { SIMDE_FLOAT64_C( 301.75), SIMDE_FLOAT64_C( 377.86), SIMDE_FLOAT64_C( 38.07), SIMDE_FLOAT64_C( 270.72) }, { SIMDE_FLOAT64_C( 17.37), SIMDE_FLOAT64_C( 19.44), SIMDE_FLOAT64_C( 6.17), SIMDE_FLOAT64_C( 16.45) } }, { { SIMDE_FLOAT64_C( 661.06), SIMDE_FLOAT64_C( 955.80), SIMDE_FLOAT64_C( 540.55), SIMDE_FLOAT64_C( 699.66) }, { SIMDE_FLOAT64_C( 25.71), SIMDE_FLOAT64_C( 30.92), SIMDE_FLOAT64_C( 23.25), SIMDE_FLOAT64_C( 26.45) } }, { { SIMDE_FLOAT64_C( 41.79), SIMDE_FLOAT64_C( 429.36), SIMDE_FLOAT64_C( 830.75), SIMDE_FLOAT64_C( 836.32) }, { SIMDE_FLOAT64_C( 6.46), SIMDE_FLOAT64_C( 20.72), SIMDE_FLOAT64_C( 28.82), SIMDE_FLOAT64_C( 28.92) } }, { { SIMDE_FLOAT64_C( 153.46), SIMDE_FLOAT64_C( 994.23), SIMDE_FLOAT64_C( 913.53), SIMDE_FLOAT64_C( 889.00) }, { SIMDE_FLOAT64_C( 12.39), SIMDE_FLOAT64_C( 31.53), SIMDE_FLOAT64_C( 30.22), SIMDE_FLOAT64_C( 29.82) } }, { { SIMDE_FLOAT64_C( 140.95), SIMDE_FLOAT64_C( 65.36), SIMDE_FLOAT64_C( 968.68), SIMDE_FLOAT64_C( 947.21) }, { SIMDE_FLOAT64_C( 11.87), SIMDE_FLOAT64_C( 8.08), SIMDE_FLOAT64_C( 31.12), SIMDE_FLOAT64_C( 30.78) } }, { { SIMDE_FLOAT64_C( -31.19), SIMDE_FLOAT64_C( 466.94), SIMDE_FLOAT64_C( 225.29), SIMDE_FLOAT64_C( 967.56) }, { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 21.61), SIMDE_FLOAT64_C( 15.01), SIMDE_FLOAT64_C( 31.11) } }, { { SIMDE_FLOAT64_C( 710.29), SIMDE_FLOAT64_C( 718.44), SIMDE_FLOAT64_C( 305.66), SIMDE_FLOAT64_C( 608.32) }, { SIMDE_FLOAT64_C( 26.65), SIMDE_FLOAT64_C( 26.80), SIMDE_FLOAT64_C( 17.48), SIMDE_FLOAT64_C( 24.66) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d r = simde_mm256_svml_sqrt_pd(a); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_svml_sqrt_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 914.68), SIMDE_FLOAT32_C( 142.36), SIMDE_FLOAT32_C( 751.77), SIMDE_FLOAT32_C( 42.61), SIMDE_FLOAT32_C( 433.18), SIMDE_FLOAT32_C( -95.01), SIMDE_FLOAT32_C( 535.55), SIMDE_FLOAT32_C( 168.98), SIMDE_FLOAT32_C( 508.03), SIMDE_FLOAT32_C( 713.68), SIMDE_FLOAT32_C( 502.38), SIMDE_FLOAT32_C( 504.11), SIMDE_FLOAT32_C( 643.10), SIMDE_FLOAT32_C( 546.21), SIMDE_FLOAT32_C( 975.24), SIMDE_FLOAT32_C( 770.62) }, { SIMDE_FLOAT32_C( 30.24), SIMDE_FLOAT32_C( 11.93), SIMDE_FLOAT32_C( 27.42), SIMDE_FLOAT32_C( 6.53), SIMDE_FLOAT32_C( 20.81), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 23.14), SIMDE_FLOAT32_C( 13.00), SIMDE_FLOAT32_C( 22.54), SIMDE_FLOAT32_C( 26.71), SIMDE_FLOAT32_C( 22.41), SIMDE_FLOAT32_C( 22.45), SIMDE_FLOAT32_C( 25.36), SIMDE_FLOAT32_C( 23.37), SIMDE_FLOAT32_C( 31.23), SIMDE_FLOAT32_C( 27.76) } }, { { SIMDE_FLOAT32_C( 799.15), SIMDE_FLOAT32_C( 249.41), SIMDE_FLOAT32_C( 246.93), SIMDE_FLOAT32_C( -33.60), SIMDE_FLOAT32_C( 336.37), SIMDE_FLOAT32_C( 867.92), SIMDE_FLOAT32_C( 50.92), SIMDE_FLOAT32_C( 348.52), SIMDE_FLOAT32_C( 870.30), SIMDE_FLOAT32_C( 193.09), SIMDE_FLOAT32_C( 153.59), SIMDE_FLOAT32_C( 803.32), SIMDE_FLOAT32_C( 802.44), SIMDE_FLOAT32_C( 360.38), SIMDE_FLOAT32_C( 481.46), SIMDE_FLOAT32_C( 717.12) }, { SIMDE_FLOAT32_C( 28.27), SIMDE_FLOAT32_C( 15.79), SIMDE_FLOAT32_C( 15.71), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 18.34), SIMDE_FLOAT32_C( 29.46), SIMDE_FLOAT32_C( 7.14), SIMDE_FLOAT32_C( 18.67), SIMDE_FLOAT32_C( 29.50), SIMDE_FLOAT32_C( 13.90), SIMDE_FLOAT32_C( 12.39), SIMDE_FLOAT32_C( 28.34), SIMDE_FLOAT32_C( 28.33), SIMDE_FLOAT32_C( 18.98), SIMDE_FLOAT32_C( 21.94), SIMDE_FLOAT32_C( 26.78) } }, { { SIMDE_FLOAT32_C( 602.74), SIMDE_FLOAT32_C( 233.23), SIMDE_FLOAT32_C( 859.73), SIMDE_FLOAT32_C( 35.92), SIMDE_FLOAT32_C( 238.22), SIMDE_FLOAT32_C( 395.29), SIMDE_FLOAT32_C( 304.89), SIMDE_FLOAT32_C( 846.24), SIMDE_FLOAT32_C( 108.97), SIMDE_FLOAT32_C( 907.27), SIMDE_FLOAT32_C( 350.35), SIMDE_FLOAT32_C( 852.07), SIMDE_FLOAT32_C( 453.48), SIMDE_FLOAT32_C( 325.59), SIMDE_FLOAT32_C( 622.69), SIMDE_FLOAT32_C( 252.63) }, { SIMDE_FLOAT32_C( 24.55), SIMDE_FLOAT32_C( 15.27), SIMDE_FLOAT32_C( 29.32), SIMDE_FLOAT32_C( 5.99), SIMDE_FLOAT32_C( 15.43), SIMDE_FLOAT32_C( 19.88), SIMDE_FLOAT32_C( 17.46), SIMDE_FLOAT32_C( 29.09), SIMDE_FLOAT32_C( 10.44), SIMDE_FLOAT32_C( 30.12), SIMDE_FLOAT32_C( 18.72), SIMDE_FLOAT32_C( 29.19), SIMDE_FLOAT32_C( 21.30), SIMDE_FLOAT32_C( 18.04), SIMDE_FLOAT32_C( 24.95), SIMDE_FLOAT32_C( 15.89) } }, { { SIMDE_FLOAT32_C( 675.00), SIMDE_FLOAT32_C( 969.62), SIMDE_FLOAT32_C( 319.04), SIMDE_FLOAT32_C( 11.37), SIMDE_FLOAT32_C( 837.54), SIMDE_FLOAT32_C( 469.95), SIMDE_FLOAT32_C( 459.89), SIMDE_FLOAT32_C( 707.84), SIMDE_FLOAT32_C( 763.05), SIMDE_FLOAT32_C( 713.48), SIMDE_FLOAT32_C( 511.15), SIMDE_FLOAT32_C( 565.49), SIMDE_FLOAT32_C( 73.86), SIMDE_FLOAT32_C( -7.39), SIMDE_FLOAT32_C( 282.61), SIMDE_FLOAT32_C( 776.60) }, { SIMDE_FLOAT32_C( 25.98), SIMDE_FLOAT32_C( 31.14), SIMDE_FLOAT32_C( 17.86), SIMDE_FLOAT32_C( 3.37), SIMDE_FLOAT32_C( 28.94), SIMDE_FLOAT32_C( 21.68), SIMDE_FLOAT32_C( 21.44), SIMDE_FLOAT32_C( 26.61), SIMDE_FLOAT32_C( 27.62), SIMDE_FLOAT32_C( 26.71), SIMDE_FLOAT32_C( 22.61), SIMDE_FLOAT32_C( 23.78), SIMDE_FLOAT32_C( 8.59), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 16.81), SIMDE_FLOAT32_C( 27.87) } }, { { SIMDE_FLOAT32_C( 325.84), SIMDE_FLOAT32_C( 142.35), SIMDE_FLOAT32_C( 912.52), SIMDE_FLOAT32_C( 664.06), SIMDE_FLOAT32_C( 637.63), SIMDE_FLOAT32_C( 217.41), SIMDE_FLOAT32_C( 510.30), SIMDE_FLOAT32_C( 846.60), SIMDE_FLOAT32_C( 124.68), SIMDE_FLOAT32_C( 960.65), SIMDE_FLOAT32_C( 698.67), SIMDE_FLOAT32_C( 678.16), SIMDE_FLOAT32_C( 286.24), SIMDE_FLOAT32_C( 321.36), SIMDE_FLOAT32_C( -69.20), SIMDE_FLOAT32_C( -38.77) }, { SIMDE_FLOAT32_C( 18.05), SIMDE_FLOAT32_C( 11.93), SIMDE_FLOAT32_C( 30.21), SIMDE_FLOAT32_C( 25.77), SIMDE_FLOAT32_C( 25.25), SIMDE_FLOAT32_C( 14.74), SIMDE_FLOAT32_C( 22.59), SIMDE_FLOAT32_C( 29.10), SIMDE_FLOAT32_C( 11.17), SIMDE_FLOAT32_C( 30.99), SIMDE_FLOAT32_C( 26.43), SIMDE_FLOAT32_C( 26.04), SIMDE_FLOAT32_C( 16.92), SIMDE_FLOAT32_C( 17.93), SIMDE_MATH_NANF, SIMDE_MATH_NANF } }, { { SIMDE_FLOAT32_C( 290.98), SIMDE_FLOAT32_C( 349.84), SIMDE_FLOAT32_C( 72.60), SIMDE_FLOAT32_C( 128.51), SIMDE_FLOAT32_C( 919.79), SIMDE_FLOAT32_C( 632.49), SIMDE_FLOAT32_C( 936.35), SIMDE_FLOAT32_C( 682.84), SIMDE_FLOAT32_C( 345.97), SIMDE_FLOAT32_C( 447.51), SIMDE_FLOAT32_C( 248.33), SIMDE_FLOAT32_C( 519.83), SIMDE_FLOAT32_C( 540.12), SIMDE_FLOAT32_C( 630.94), SIMDE_FLOAT32_C( 296.43), SIMDE_FLOAT32_C( 965.96) }, { SIMDE_FLOAT32_C( 17.06), SIMDE_FLOAT32_C( 18.70), SIMDE_FLOAT32_C( 8.52), SIMDE_FLOAT32_C( 11.34), SIMDE_FLOAT32_C( 30.33), SIMDE_FLOAT32_C( 25.15), SIMDE_FLOAT32_C( 30.60), SIMDE_FLOAT32_C( 26.13), SIMDE_FLOAT32_C( 18.60), SIMDE_FLOAT32_C( 21.15), SIMDE_FLOAT32_C( 15.76), SIMDE_FLOAT32_C( 22.80), SIMDE_FLOAT32_C( 23.24), SIMDE_FLOAT32_C( 25.12), SIMDE_FLOAT32_C( 17.22), SIMDE_FLOAT32_C( 31.08) } }, { { SIMDE_FLOAT32_C( 873.29), SIMDE_FLOAT32_C( 208.95), SIMDE_FLOAT32_C( 630.01), SIMDE_FLOAT32_C( 510.92), SIMDE_FLOAT32_C( 526.36), SIMDE_FLOAT32_C( 140.32), SIMDE_FLOAT32_C( 357.53), SIMDE_FLOAT32_C( 751.05), SIMDE_FLOAT32_C( 100.97), SIMDE_FLOAT32_C( 56.20), SIMDE_FLOAT32_C( 429.21), SIMDE_FLOAT32_C( 487.20), SIMDE_FLOAT32_C( 477.55), SIMDE_FLOAT32_C( 460.01), SIMDE_FLOAT32_C( 548.44), SIMDE_FLOAT32_C( 868.53) }, { SIMDE_FLOAT32_C( 29.55), SIMDE_FLOAT32_C( 14.46), SIMDE_FLOAT32_C( 25.10), SIMDE_FLOAT32_C( 22.60), SIMDE_FLOAT32_C( 22.94), SIMDE_FLOAT32_C( 11.85), SIMDE_FLOAT32_C( 18.91), SIMDE_FLOAT32_C( 27.41), SIMDE_FLOAT32_C( 10.05), SIMDE_FLOAT32_C( 7.50), SIMDE_FLOAT32_C( 20.72), SIMDE_FLOAT32_C( 22.07), SIMDE_FLOAT32_C( 21.85), SIMDE_FLOAT32_C( 21.45), SIMDE_FLOAT32_C( 23.42), SIMDE_FLOAT32_C( 29.47) } }, { { SIMDE_FLOAT32_C( 909.84), SIMDE_FLOAT32_C( 721.04), SIMDE_FLOAT32_C( -2.95), SIMDE_FLOAT32_C( 829.64), SIMDE_FLOAT32_C( 353.53), SIMDE_FLOAT32_C( -66.60), SIMDE_FLOAT32_C( 512.48), SIMDE_FLOAT32_C( 799.49), SIMDE_FLOAT32_C( 480.91), SIMDE_FLOAT32_C( 860.80), SIMDE_FLOAT32_C( 319.32), SIMDE_FLOAT32_C( 21.02), SIMDE_FLOAT32_C( 491.75), SIMDE_FLOAT32_C( 715.75), SIMDE_FLOAT32_C( -13.02), SIMDE_FLOAT32_C( 365.04) }, { SIMDE_FLOAT32_C( 30.16), SIMDE_FLOAT32_C( 26.85), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 28.80), SIMDE_FLOAT32_C( 18.80), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 22.64), SIMDE_FLOAT32_C( 28.28), SIMDE_FLOAT32_C( 21.93), SIMDE_FLOAT32_C( 29.34), SIMDE_FLOAT32_C( 17.87), SIMDE_FLOAT32_C( 4.59), SIMDE_FLOAT32_C( 22.18), SIMDE_FLOAT32_C( 26.75), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 19.11) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_svml_sqrt_ps(a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_svml_sqrt_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 375.58), SIMDE_FLOAT64_C( 46.46), SIMDE_FLOAT64_C( 312.83), SIMDE_FLOAT64_C( 445.14), SIMDE_FLOAT64_C( 187.32), SIMDE_FLOAT64_C( 952.90), SIMDE_FLOAT64_C( 987.69), SIMDE_FLOAT64_C( 406.24) }, { SIMDE_FLOAT64_C( 19.38), SIMDE_FLOAT64_C( 6.82), SIMDE_FLOAT64_C( 17.69), SIMDE_FLOAT64_C( 21.10), SIMDE_FLOAT64_C( 13.69), SIMDE_FLOAT64_C( 30.87), SIMDE_FLOAT64_C( 31.43), SIMDE_FLOAT64_C( 20.16) } }, { { SIMDE_FLOAT64_C( 293.47), SIMDE_FLOAT64_C( 304.52), SIMDE_FLOAT64_C( 836.60), SIMDE_FLOAT64_C( 342.20), SIMDE_FLOAT64_C( 740.40), SIMDE_FLOAT64_C( 328.94), SIMDE_FLOAT64_C( 360.36), SIMDE_FLOAT64_C( 97.23) }, { SIMDE_FLOAT64_C( 17.13), SIMDE_FLOAT64_C( 17.45), SIMDE_FLOAT64_C( 28.92), SIMDE_FLOAT64_C( 18.50), SIMDE_FLOAT64_C( 27.21), SIMDE_FLOAT64_C( 18.14), SIMDE_FLOAT64_C( 18.98), SIMDE_FLOAT64_C( 9.86) } }, { { SIMDE_FLOAT64_C( 931.22), SIMDE_FLOAT64_C( 239.31), SIMDE_FLOAT64_C( 533.01), SIMDE_FLOAT64_C( 413.09), SIMDE_FLOAT64_C( -30.52), SIMDE_FLOAT64_C( 220.33), SIMDE_FLOAT64_C( 224.40), SIMDE_FLOAT64_C( 591.21) }, { SIMDE_FLOAT64_C( 30.52), SIMDE_FLOAT64_C( 15.47), SIMDE_FLOAT64_C( 23.09), SIMDE_FLOAT64_C( 20.32), SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 14.84), SIMDE_FLOAT64_C( 14.98), SIMDE_FLOAT64_C( 24.31) } }, { { SIMDE_FLOAT64_C( 737.21), SIMDE_FLOAT64_C( 927.12), SIMDE_FLOAT64_C( 685.90), SIMDE_FLOAT64_C( 452.75), SIMDE_FLOAT64_C( 896.77), SIMDE_FLOAT64_C( 752.44), SIMDE_FLOAT64_C( 780.06), SIMDE_FLOAT64_C( 272.35) }, { SIMDE_FLOAT64_C( 27.15), SIMDE_FLOAT64_C( 30.45), SIMDE_FLOAT64_C( 26.19), SIMDE_FLOAT64_C( 21.28), SIMDE_FLOAT64_C( 29.95), SIMDE_FLOAT64_C( 27.43), SIMDE_FLOAT64_C( 27.93), SIMDE_FLOAT64_C( 16.50) } }, { { SIMDE_FLOAT64_C( 898.90), SIMDE_FLOAT64_C( 92.89), SIMDE_FLOAT64_C( 817.49), SIMDE_FLOAT64_C( 86.22), SIMDE_FLOAT64_C( 45.79), SIMDE_FLOAT64_C( 805.18), SIMDE_FLOAT64_C( 592.46), SIMDE_FLOAT64_C( 439.26) }, { SIMDE_FLOAT64_C( 29.98), SIMDE_FLOAT64_C( 9.64), SIMDE_FLOAT64_C( 28.59), SIMDE_FLOAT64_C( 9.29), SIMDE_FLOAT64_C( 6.77), SIMDE_FLOAT64_C( 28.38), SIMDE_FLOAT64_C( 24.34), SIMDE_FLOAT64_C( 20.96) } }, { { SIMDE_FLOAT64_C( 109.70), SIMDE_FLOAT64_C( 429.07), SIMDE_FLOAT64_C( 881.46), SIMDE_FLOAT64_C( 950.09), SIMDE_FLOAT64_C( 858.01), SIMDE_FLOAT64_C( 241.82), SIMDE_FLOAT64_C( 47.32), SIMDE_FLOAT64_C( 789.23) }, { SIMDE_FLOAT64_C( 10.47), SIMDE_FLOAT64_C( 20.71), SIMDE_FLOAT64_C( 29.69), SIMDE_FLOAT64_C( 30.82), SIMDE_FLOAT64_C( 29.29), SIMDE_FLOAT64_C( 15.55), SIMDE_FLOAT64_C( 6.88), SIMDE_FLOAT64_C( 28.09) } }, { { SIMDE_FLOAT64_C( 581.13), SIMDE_FLOAT64_C( 680.33), SIMDE_FLOAT64_C( 202.32), SIMDE_FLOAT64_C( 650.61), SIMDE_FLOAT64_C( -99.34), SIMDE_FLOAT64_C( 526.72), SIMDE_FLOAT64_C( 241.82), SIMDE_FLOAT64_C( 737.87) }, { SIMDE_FLOAT64_C( 24.11), SIMDE_FLOAT64_C( 26.08), SIMDE_FLOAT64_C( 14.22), SIMDE_FLOAT64_C( 25.51), SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 22.95), SIMDE_FLOAT64_C( 15.55), SIMDE_FLOAT64_C( 27.16) } }, { { SIMDE_FLOAT64_C( 453.84), SIMDE_FLOAT64_C( -72.28), SIMDE_FLOAT64_C( 190.62), SIMDE_FLOAT64_C( 350.61), SIMDE_FLOAT64_C( 780.16), SIMDE_FLOAT64_C( -29.31), SIMDE_FLOAT64_C( 722.96), SIMDE_FLOAT64_C( 679.07) }, { SIMDE_FLOAT64_C( 21.30), SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 13.81), SIMDE_FLOAT64_C( 18.72), SIMDE_FLOAT64_C( 27.93), SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 26.89), SIMDE_FLOAT64_C( 26.06) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_svml_sqrt_pd(a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_tan_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( -754.38), SIMDE_FLOAT32_C( 346.63)), simde_mm_set_ps(SIMDE_FLOAT32_C( -1.15), SIMDE_FLOAT32_C( 3.76), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 1.76)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( 34.06)), simde_mm_set_ps(SIMDE_FLOAT32_C( 1.36), SIMDE_FLOAT32_C( 1.87), SIMDE_FLOAT32_C( 1.56), SIMDE_FLOAT32_C( -0.54)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 467.76)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( 2.88), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( -0.35)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 571.46)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( -3.14), SIMDE_FLOAT32_C( -0.05), SIMDE_FLOAT32_C( -0.32)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( 696.87)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( -0.63)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -923.64)), simde_mm_set_ps(SIMDE_FLOAT32_C( 3.55), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( -0.01)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -660.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 1.91), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( -1.81)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 687.09)), simde_mm_set_ps(SIMDE_FLOAT32_C( -1.19), SIMDE_FLOAT32_C( -6.68), SIMDE_FLOAT32_C( 1.24), SIMDE_FLOAT32_C( -1.31)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_tan_ps(test_vec[i].a); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_tan_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -754.38), SIMDE_FLOAT64_C( 346.63)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.42), SIMDE_FLOAT64_C( 1.76)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( 39.01)), simde_mm_set_pd(SIMDE_FLOAT64_C( -1.15), SIMDE_FLOAT64_C( 3.76)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( 34.06)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1.56), SIMDE_FLOAT64_C( -0.54)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( 670.24)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1.36), SIMDE_FLOAT64_C( 1.87)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 467.76)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.89), SIMDE_FLOAT64_C( -0.35)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( 422.21)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.86), SIMDE_FLOAT64_C( 2.88)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 571.46)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.05), SIMDE_FLOAT64_C( -0.32)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( -686.13)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( -3.14)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_tan_pd(test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_tan_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( -754.38), SIMDE_FLOAT32_C( 346.63)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 1.36), SIMDE_FLOAT32_C( 1.87), SIMDE_FLOAT32_C( 1.56), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( -1.15), SIMDE_FLOAT32_C( 3.76), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 1.76)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 467.76)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( -3.14), SIMDE_FLOAT32_C( -0.05), SIMDE_FLOAT32_C( -0.32), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( 2.88), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( -0.35)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( 696.87)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 3.55), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( -0.63)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -660.80)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -1.19), SIMDE_FLOAT32_C( -6.68), SIMDE_FLOAT32_C( 1.24), SIMDE_FLOAT32_C( -1.31), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 1.91), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( -1.81)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -770.35), SIMDE_FLOAT32_C( 443.48), SIMDE_FLOAT32_C( -583.60), SIMDE_FLOAT32_C( 380.46), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 993.90), SIMDE_FLOAT32_C( 28.08), SIMDE_FLOAT32_C( 841.21)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -1.66), SIMDE_FLOAT32_C( 2.28), SIMDE_FLOAT32_C( -0.20), SIMDE_FLOAT32_C( -0.91)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( 395.92), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 339.21), SIMDE_FLOAT32_C( 532.35), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( 780.64), SIMDE_FLOAT32_C( -30.79)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -11.51), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( 6.62), SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( 21.84), SIMDE_FLOAT32_C( 0.72)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -203.65), SIMDE_FLOAT32_C( -80.73), SIMDE_FLOAT32_C( 336.73), SIMDE_FLOAT32_C( -944.78), SIMDE_FLOAT32_C( -747.59), SIMDE_FLOAT32_C( -767.23), SIMDE_FLOAT32_C( -554.19), SIMDE_FLOAT32_C( 398.82)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 1.40), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 1.11), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( -0.81), SIMDE_FLOAT32_C( -3.22), SIMDE_FLOAT32_C( -0.16)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 469.66), SIMDE_FLOAT32_C( 680.02), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 818.66), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 600.47), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( 254.31)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 123.48), SIMDE_FLOAT32_C( 7.37), SIMDE_FLOAT32_C( -1.68), SIMDE_FLOAT32_C( -3.54), SIMDE_FLOAT32_C( -1.67), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( -0.48), SIMDE_FLOAT32_C( -0.16)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_tan_ps(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_tan_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( 39.01), SIMDE_FLOAT64_C( -754.38), SIMDE_FLOAT64_C( 346.63)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -1.15), SIMDE_FLOAT64_C( 3.76), SIMDE_FLOAT64_C( -0.42), SIMDE_FLOAT64_C( 1.76)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( 670.24), SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( 34.06)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 1.36), SIMDE_FLOAT64_C( 1.87), SIMDE_FLOAT64_C( 1.56), SIMDE_FLOAT64_C( -0.54)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 467.76)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.86), SIMDE_FLOAT64_C( 2.88), SIMDE_FLOAT64_C( 0.89), SIMDE_FLOAT64_C( -0.35)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( -686.13), SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 571.46)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( -3.14), SIMDE_FLOAT64_C( -0.05), SIMDE_FLOAT64_C( -0.32)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -860.95), SIMDE_FLOAT64_C( -417.54), SIMDE_FLOAT64_C( 696.87)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.35), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( 0.30), SIMDE_FLOAT64_C( -0.63)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( 28.47), SIMDE_FLOAT64_C( -384.03), SIMDE_FLOAT64_C( -923.64)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 3.55), SIMDE_FLOAT64_C( 0.20), SIMDE_FLOAT64_C( -0.94), SIMDE_FLOAT64_C( -0.01)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 261.31), SIMDE_FLOAT64_C( -212.54), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -660.80)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.62), SIMDE_FLOAT64_C( 1.91), SIMDE_FLOAT64_C( 0.53), SIMDE_FLOAT64_C( -1.81)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 178.20), SIMDE_FLOAT64_C( -450.67), SIMDE_FLOAT64_C( 233.37), SIMDE_FLOAT64_C( 687.09)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -1.19), SIMDE_FLOAT64_C( -6.68), SIMDE_FLOAT64_C( 1.24), SIMDE_FLOAT64_C( -1.31)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_tan_pd(test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_tan_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 467.76), SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( -754.38), SIMDE_FLOAT32_C( 346.63)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( -3.14), SIMDE_FLOAT32_C( -0.05), SIMDE_FLOAT32_C( -0.32), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( 2.88), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( 1.36), SIMDE_FLOAT32_C( 1.87), SIMDE_FLOAT32_C( 1.56), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( -1.15), SIMDE_FLOAT32_C( 3.76), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 1.76)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -660.80), SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( 696.87)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -1.19), SIMDE_FLOAT32_C( -6.68), SIMDE_FLOAT32_C( 1.24), SIMDE_FLOAT32_C( -1.31), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 1.91), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( -1.81), SIMDE_FLOAT32_C( 3.55), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( -0.63)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( 395.92), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 339.21), SIMDE_FLOAT32_C( 532.35), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( 780.64), SIMDE_FLOAT32_C( -30.79), SIMDE_FLOAT32_C( -770.35), SIMDE_FLOAT32_C( 443.48), SIMDE_FLOAT32_C( -583.60), SIMDE_FLOAT32_C( 380.46), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 993.90), SIMDE_FLOAT32_C( 28.08), SIMDE_FLOAT32_C( 841.21)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -11.51), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( 6.62), SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( 21.84), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -1.66), SIMDE_FLOAT32_C( 2.28), SIMDE_FLOAT32_C( -0.20), SIMDE_FLOAT32_C( -0.91)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 469.66), SIMDE_FLOAT32_C( 680.02), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 818.66), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 600.47), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( 254.31), SIMDE_FLOAT32_C( -203.65), SIMDE_FLOAT32_C( -80.73), SIMDE_FLOAT32_C( 336.73), SIMDE_FLOAT32_C( -944.78), SIMDE_FLOAT32_C( -747.59), SIMDE_FLOAT32_C( -767.23), SIMDE_FLOAT32_C( -554.19), SIMDE_FLOAT32_C( 398.82)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 123.48), SIMDE_FLOAT32_C( 7.37), SIMDE_FLOAT32_C( -1.68), SIMDE_FLOAT32_C( -3.54), SIMDE_FLOAT32_C( -1.67), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( -0.48), SIMDE_FLOAT32_C( -0.16), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 1.40), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( 1.11), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( -0.81), SIMDE_FLOAT32_C( -3.22), SIMDE_FLOAT32_C( -0.16)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -178.99), SIMDE_FLOAT32_C( 758.79), SIMDE_FLOAT32_C( 324.62), SIMDE_FLOAT32_C( 343.48), SIMDE_FLOAT32_C( -874.31), SIMDE_FLOAT32_C( -797.92), SIMDE_FLOAT32_C( -328.54), SIMDE_FLOAT32_C( -525.83), SIMDE_FLOAT32_C( -192.31), SIMDE_FLOAT32_C( -822.65), SIMDE_FLOAT32_C( 561.36), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( -70.91), SIMDE_FLOAT32_C( 543.35), SIMDE_FLOAT32_C( 120.65), SIMDE_FLOAT32_C( -171.51)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( -10.46), SIMDE_FLOAT32_C( 1.69), SIMDE_FLOAT32_C( 1.73), SIMDE_FLOAT32_C( -1.39), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 4.02), SIMDE_FLOAT32_C( -2.46), SIMDE_FLOAT32_C( -0.80), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( -1.51), SIMDE_FLOAT32_C( -1.32), SIMDE_FLOAT32_C( 4.39), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 3.22), SIMDE_FLOAT32_C( 3.31)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 690.12), SIMDE_FLOAT32_C( 840.65), SIMDE_FLOAT32_C( -21.09), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( -448.89), SIMDE_FLOAT32_C( 731.49), SIMDE_FLOAT32_C( 505.79), SIMDE_FLOAT32_C( 623.70), SIMDE_FLOAT32_C( 831.02), SIMDE_FLOAT32_C( 140.67), SIMDE_FLOAT32_C( 977.36), SIMDE_FLOAT32_C( -906.16), SIMDE_FLOAT32_C( 331.34), SIMDE_FLOAT32_C( 99.93), SIMDE_FLOAT32_C( 462.95), SIMDE_FLOAT32_C( -738.19)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -1.67), SIMDE_FLOAT32_C( -3.56), SIMDE_FLOAT32_C( 1.26), SIMDE_FLOAT32_C( -1.37), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( -10.62), SIMDE_FLOAT32_C( -14.52), SIMDE_FLOAT32_C( -0.85), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -5.21), SIMDE_FLOAT32_C( 10.17), SIMDE_FLOAT32_C( -0.69), SIMDE_FLOAT32_C( 2.15), SIMDE_FLOAT32_C( 0.08)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -769.09), SIMDE_FLOAT32_C( 822.06), SIMDE_FLOAT32_C( -573.81), SIMDE_FLOAT32_C( -997.63), SIMDE_FLOAT32_C( -337.60), SIMDE_FLOAT32_C( 923.64), SIMDE_FLOAT32_C( 293.64), SIMDE_FLOAT32_C( -768.12), SIMDE_FLOAT32_C( -576.22), SIMDE_FLOAT32_C( -67.64), SIMDE_FLOAT32_C( 710.38), SIMDE_FLOAT32_C( 977.49), SIMDE_FLOAT32_C( -756.42), SIMDE_FLOAT32_C( 424.81), SIMDE_FLOAT32_C( 27.25), SIMDE_FLOAT32_C( -95.15)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( -1.69), SIMDE_FLOAT32_C( 1.97), SIMDE_FLOAT32_C( 5.68), SIMDE_FLOAT32_C( -8.21), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 10.08), SIMDE_FLOAT32_C( 1691.15), SIMDE_FLOAT32_C( -3.72), SIMDE_FLOAT32_C( 10.41), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 0.85), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( -1.64), SIMDE_FLOAT32_C( -1.27)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -438.19), SIMDE_FLOAT32_C( -359.76), SIMDE_FLOAT32_C( -752.43), SIMDE_FLOAT32_C( -33.67), SIMDE_FLOAT32_C( 932.66), SIMDE_FLOAT32_C( 7.27), SIMDE_FLOAT32_C( -327.22), SIMDE_FLOAT32_C( -125.20), SIMDE_FLOAT32_C( -182.45), SIMDE_FLOAT32_C( 39.93), SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( 394.67), SIMDE_FLOAT32_C( 14.34), SIMDE_FLOAT32_C( -304.73), SIMDE_FLOAT32_C( 916.26), SIMDE_FLOAT32_C( -696.69)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -16.06), SIMDE_FLOAT32_C( 20.97), SIMDE_FLOAT32_C( 53.90), SIMDE_FLOAT32_C( 1.23), SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( 1.51), SIMDE_FLOAT32_C( -0.54), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( -0.24), SIMDE_FLOAT32_C( -1.29), SIMDE_FLOAT32_C( -2.82), SIMDE_FLOAT32_C( -2.36), SIMDE_FLOAT32_C( -4.86), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -1.89), SIMDE_FLOAT32_C( 0.92)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_tan_ps(test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_tan_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 src; simde__mmask16 k; simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -660.80), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( 696.87), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( 467.76), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( 346.63)), UINT16_C(41466), simde_mm512_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( -754.38)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -1.19), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( -660.80), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( -0.05), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 1.36), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( -1.15), SIMDE_FLOAT32_C( 346.63)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 469.66), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( -203.65), SIMDE_FLOAT32_C( 336.73), SIMDE_FLOAT32_C( -747.59), SIMDE_FLOAT32_C( -554.19), SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 532.35), SIMDE_FLOAT32_C( 780.64), SIMDE_FLOAT32_C( -770.35), SIMDE_FLOAT32_C( -583.60), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 28.08)), UINT16_C(36797), simde_mm512_set_ps(SIMDE_FLOAT32_C( -171.51), SIMDE_FLOAT32_C( 680.02), SIMDE_FLOAT32_C( 818.66), SIMDE_FLOAT32_C( 600.47), SIMDE_FLOAT32_C( 254.31), SIMDE_FLOAT32_C( -80.73), SIMDE_FLOAT32_C( -944.78), SIMDE_FLOAT32_C( -767.23), SIMDE_FLOAT32_C( 398.82), SIMDE_FLOAT32_C( 395.92), SIMDE_FLOAT32_C( 339.21), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( -30.79), SIMDE_FLOAT32_C( 443.48), SIMDE_FLOAT32_C( 380.46), SIMDE_FLOAT32_C( 993.90)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 3.31), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( -0.16), SIMDE_FLOAT32_C( 1.40), SIMDE_FLOAT32_C( 1.11), SIMDE_FLOAT32_C( -0.81), SIMDE_FLOAT32_C( -0.16), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 2.28)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -95.15), SIMDE_FLOAT32_C( 840.65), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( 731.49), SIMDE_FLOAT32_C( 623.70), SIMDE_FLOAT32_C( 140.67), SIMDE_FLOAT32_C( -906.16), SIMDE_FLOAT32_C( 99.93), SIMDE_FLOAT32_C( -738.19), SIMDE_FLOAT32_C( 758.79), SIMDE_FLOAT32_C( 343.48), SIMDE_FLOAT32_C( -797.92), SIMDE_FLOAT32_C( -525.83), SIMDE_FLOAT32_C( -822.65), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( 543.35)), UINT16_C(16804), simde_mm512_set_ps(SIMDE_FLOAT32_C( 27.25), SIMDE_FLOAT32_C( 690.12), SIMDE_FLOAT32_C( -21.09), SIMDE_FLOAT32_C( -448.89), SIMDE_FLOAT32_C( 505.79), SIMDE_FLOAT32_C( 831.02), SIMDE_FLOAT32_C( 977.36), SIMDE_FLOAT32_C( 331.34), SIMDE_FLOAT32_C( 462.95), SIMDE_FLOAT32_C( -178.99), SIMDE_FLOAT32_C( 324.62), SIMDE_FLOAT32_C( -874.31), SIMDE_FLOAT32_C( -328.54), SIMDE_FLOAT32_C( -192.31), SIMDE_FLOAT32_C( 561.36), SIMDE_FLOAT32_C( -70.91)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -95.15), SIMDE_FLOAT32_C( -1.67), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( 731.49), SIMDE_FLOAT32_C( 623.70), SIMDE_FLOAT32_C( 140.67), SIMDE_FLOAT32_C( -906.16), SIMDE_FLOAT32_C( 10.17), SIMDE_FLOAT32_C( 2.15), SIMDE_FLOAT32_C( 758.79), SIMDE_FLOAT32_C( 1.69), SIMDE_FLOAT32_C( -797.92), SIMDE_FLOAT32_C( -525.83), SIMDE_FLOAT32_C( -0.80), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( 543.35)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -348.70), SIMDE_FLOAT32_C( -438.19), SIMDE_FLOAT32_C( -752.43), SIMDE_FLOAT32_C( 932.66), SIMDE_FLOAT32_C( -327.22), SIMDE_FLOAT32_C( -182.45), SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( 14.34), SIMDE_FLOAT32_C( 916.26), SIMDE_FLOAT32_C( -769.09), SIMDE_FLOAT32_C( -573.81), SIMDE_FLOAT32_C( -337.60), SIMDE_FLOAT32_C( 293.64), SIMDE_FLOAT32_C( -576.22), SIMDE_FLOAT32_C( 710.38), SIMDE_FLOAT32_C( -756.42)), UINT16_C( 2107), simde_mm512_set_ps(SIMDE_FLOAT32_C( 897.27), SIMDE_FLOAT32_C( -197.89), SIMDE_FLOAT32_C( -359.76), SIMDE_FLOAT32_C( -33.67), SIMDE_FLOAT32_C( 7.27), SIMDE_FLOAT32_C( -125.20), SIMDE_FLOAT32_C( 39.93), SIMDE_FLOAT32_C( 394.67), SIMDE_FLOAT32_C( -304.73), SIMDE_FLOAT32_C( -696.69), SIMDE_FLOAT32_C( 822.06), SIMDE_FLOAT32_C( -997.63), SIMDE_FLOAT32_C( 923.64), SIMDE_FLOAT32_C( -768.12), SIMDE_FLOAT32_C( -67.64), SIMDE_FLOAT32_C( 977.49)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -348.70), SIMDE_FLOAT32_C( -438.19), SIMDE_FLOAT32_C( -752.43), SIMDE_FLOAT32_C( 932.66), SIMDE_FLOAT32_C( 1.51), SIMDE_FLOAT32_C( -182.45), SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( 14.34), SIMDE_FLOAT32_C( 916.26), SIMDE_FLOAT32_C( -769.09), SIMDE_FLOAT32_C( -1.69), SIMDE_FLOAT32_C( 5.68), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -576.22), SIMDE_FLOAT32_C( 10.41), SIMDE_FLOAT32_C( 0.49)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -15.61), SIMDE_FLOAT32_C( -737.13), SIMDE_FLOAT32_C( -314.93), SIMDE_FLOAT32_C( 177.92), SIMDE_FLOAT32_C( 345.93), SIMDE_FLOAT32_C( 888.71), SIMDE_FLOAT32_C( 915.71), SIMDE_FLOAT32_C( 133.52), SIMDE_FLOAT32_C( 484.94), SIMDE_FLOAT32_C( -598.06), SIMDE_FLOAT32_C( -791.07), SIMDE_FLOAT32_C( -765.93), SIMDE_FLOAT32_C( 221.37), SIMDE_FLOAT32_C( -788.36), SIMDE_FLOAT32_C( -775.04), SIMDE_FLOAT32_C( 440.64)), UINT16_C(22274), simde_mm512_set_ps(SIMDE_FLOAT32_C( 496.57), SIMDE_FLOAT32_C( 915.19), SIMDE_FLOAT32_C( -718.40), SIMDE_FLOAT32_C( 159.97), SIMDE_FLOAT32_C( -861.01), SIMDE_FLOAT32_C( 426.61), SIMDE_FLOAT32_C( 932.11), SIMDE_FLOAT32_C( 110.36), SIMDE_FLOAT32_C( 826.84), SIMDE_FLOAT32_C( -76.75), SIMDE_FLOAT32_C( 237.58), SIMDE_FLOAT32_C( -378.50), SIMDE_FLOAT32_C( -601.68), SIMDE_FLOAT32_C( -623.50), SIMDE_FLOAT32_C( -942.47), SIMDE_FLOAT32_C( 475.51)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -15.61), SIMDE_FLOAT32_C( 1.51), SIMDE_FLOAT32_C( -314.93), SIMDE_FLOAT32_C( -0.26), SIMDE_FLOAT32_C( 345.93), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( -1.38), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( 484.94), SIMDE_FLOAT32_C( -598.06), SIMDE_FLOAT32_C( -791.07), SIMDE_FLOAT32_C( -765.93), SIMDE_FLOAT32_C( 221.37), SIMDE_FLOAT32_C( -788.36), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 440.64)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 883.05), SIMDE_FLOAT32_C( -807.28), SIMDE_FLOAT32_C( -70.05), SIMDE_FLOAT32_C( -784.34), SIMDE_FLOAT32_C( 92.52), SIMDE_FLOAT32_C( 206.60), SIMDE_FLOAT32_C( 834.60), SIMDE_FLOAT32_C( -65.60), SIMDE_FLOAT32_C( -286.07), SIMDE_FLOAT32_C( -212.86), SIMDE_FLOAT32_C( -318.38), SIMDE_FLOAT32_C( 783.48), SIMDE_FLOAT32_C( -628.82), SIMDE_FLOAT32_C( 556.35), SIMDE_FLOAT32_C( 439.43), SIMDE_FLOAT32_C( 434.03)), UINT16_C(27396), simde_mm512_set_ps(SIMDE_FLOAT32_C( -964.25), SIMDE_FLOAT32_C( -406.33), SIMDE_FLOAT32_C( -743.66), SIMDE_FLOAT32_C( -764.58), SIMDE_FLOAT32_C( 789.89), SIMDE_FLOAT32_C( 4.83), SIMDE_FLOAT32_C( -818.54), SIMDE_FLOAT32_C( 161.06), SIMDE_FLOAT32_C( 579.25), SIMDE_FLOAT32_C( -11.78), SIMDE_FLOAT32_C( -308.52), SIMDE_FLOAT32_C( -719.57), SIMDE_FLOAT32_C( 334.00), SIMDE_FLOAT32_C( 274.71), SIMDE_FLOAT32_C( -916.82), SIMDE_FLOAT32_C( -490.00)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 883.05), SIMDE_FLOAT32_C( -1.80), SIMDE_FLOAT32_C( 1.25), SIMDE_FLOAT32_C( -784.34), SIMDE_FLOAT32_C( 4.46), SIMDE_FLOAT32_C( 206.60), SIMDE_FLOAT32_C( 6.40), SIMDE_FLOAT32_C( 1.11), SIMDE_FLOAT32_C( -286.07), SIMDE_FLOAT32_C( -212.86), SIMDE_FLOAT32_C( -318.38), SIMDE_FLOAT32_C( 783.48), SIMDE_FLOAT32_C( -628.82), SIMDE_FLOAT32_C( 5.52), SIMDE_FLOAT32_C( 439.43), SIMDE_FLOAT32_C( 434.03)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 529.63), SIMDE_FLOAT32_C( -24.89), SIMDE_FLOAT32_C( -967.78), SIMDE_FLOAT32_C( 638.94), SIMDE_FLOAT32_C( 450.90), SIMDE_FLOAT32_C( -771.54), SIMDE_FLOAT32_C( 105.79), SIMDE_FLOAT32_C( 590.10), SIMDE_FLOAT32_C( 30.91), SIMDE_FLOAT32_C( 635.35), SIMDE_FLOAT32_C( -84.00), SIMDE_FLOAT32_C( 80.04), SIMDE_FLOAT32_C( -709.46), SIMDE_FLOAT32_C( 607.86), SIMDE_FLOAT32_C( 394.58), SIMDE_FLOAT32_C( -889.11)), UINT16_C( 953), simde_mm512_set_ps(SIMDE_FLOAT32_C( 18.75), SIMDE_FLOAT32_C( 809.05), SIMDE_FLOAT32_C( 144.05), SIMDE_FLOAT32_C( -427.72), SIMDE_FLOAT32_C( 308.28), SIMDE_FLOAT32_C( -177.05), SIMDE_FLOAT32_C( -457.77), SIMDE_FLOAT32_C( 678.24), SIMDE_FLOAT32_C( 66.05), SIMDE_FLOAT32_C( -267.71), SIMDE_FLOAT32_C( 117.28), SIMDE_FLOAT32_C( -576.80), SIMDE_FLOAT32_C( -38.39), SIMDE_FLOAT32_C( -250.14), SIMDE_FLOAT32_C( -53.92), SIMDE_FLOAT32_C( 91.94)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 529.63), SIMDE_FLOAT32_C( -24.89), SIMDE_FLOAT32_C( -967.78), SIMDE_FLOAT32_C( 638.94), SIMDE_FLOAT32_C( 450.90), SIMDE_FLOAT32_C( -771.54), SIMDE_FLOAT32_C( 1.27), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 635.35), SIMDE_FLOAT32_C( 1.71), SIMDE_FLOAT32_C( 3.04), SIMDE_FLOAT32_C( -0.83), SIMDE_FLOAT32_C( 607.86), SIMDE_FLOAT32_C( 394.58), SIMDE_FLOAT32_C( 1.10)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -788.39), SIMDE_FLOAT32_C( 330.43), SIMDE_FLOAT32_C( -493.41), SIMDE_FLOAT32_C( 822.72), SIMDE_FLOAT32_C( 956.68), SIMDE_FLOAT32_C( 954.62), SIMDE_FLOAT32_C( 825.49), SIMDE_FLOAT32_C( -816.27), SIMDE_FLOAT32_C( -209.34), SIMDE_FLOAT32_C( -933.21), SIMDE_FLOAT32_C( -728.70), SIMDE_FLOAT32_C( -420.06), SIMDE_FLOAT32_C( 100.32), SIMDE_FLOAT32_C( 103.15), SIMDE_FLOAT32_C( 439.77), SIMDE_FLOAT32_C( -204.33)), UINT16_C(12713), simde_mm512_set_ps(SIMDE_FLOAT32_C( -841.43), SIMDE_FLOAT32_C( -14.16), SIMDE_FLOAT32_C( 824.88), SIMDE_FLOAT32_C( 793.63), SIMDE_FLOAT32_C( -736.75), SIMDE_FLOAT32_C( -310.57), SIMDE_FLOAT32_C( 728.87), SIMDE_FLOAT32_C( -350.72), SIMDE_FLOAT32_C( 60.89), SIMDE_FLOAT32_C( 109.81), SIMDE_FLOAT32_C( 715.94), SIMDE_FLOAT32_C( -250.60), SIMDE_FLOAT32_C( 944.14), SIMDE_FLOAT32_C( 361.85), SIMDE_FLOAT32_C( -13.07), SIMDE_FLOAT32_C( 852.60)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -788.39), SIMDE_FLOAT32_C( 330.43), SIMDE_FLOAT32_C( -4.65), SIMDE_FLOAT32_C( -2.52), SIMDE_FLOAT32_C( 956.68), SIMDE_FLOAT32_C( 954.62), SIMDE_FLOAT32_C( 825.49), SIMDE_FLOAT32_C( 2.17), SIMDE_FLOAT32_C( 2.57), SIMDE_FLOAT32_C( -933.21), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( -420.06), SIMDE_FLOAT32_C( -10.91), SIMDE_FLOAT32_C( 103.15), SIMDE_FLOAT32_C( 439.77), SIMDE_FLOAT32_C( 2.81)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mask_tan_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_tan_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( 670.24), SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( 34.06), SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( 39.01), SIMDE_FLOAT64_C( -754.38), SIMDE_FLOAT64_C( 346.63)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.36), SIMDE_FLOAT64_C( 1.87), SIMDE_FLOAT64_C( 1.56), SIMDE_FLOAT64_C( -0.54), SIMDE_FLOAT64_C( -1.15), SIMDE_FLOAT64_C( 3.76), SIMDE_FLOAT64_C( -0.42), SIMDE_FLOAT64_C( 1.76)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( -686.13), SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 571.46), SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 467.76)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( -3.14), SIMDE_FLOAT64_C( -0.05), SIMDE_FLOAT64_C( -0.32), SIMDE_FLOAT64_C( -0.86), SIMDE_FLOAT64_C( 2.88), SIMDE_FLOAT64_C( 0.89), SIMDE_FLOAT64_C( -0.35)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( 28.47), SIMDE_FLOAT64_C( -384.03), SIMDE_FLOAT64_C( -923.64), SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -860.95), SIMDE_FLOAT64_C( -417.54), SIMDE_FLOAT64_C( 696.87)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 3.55), SIMDE_FLOAT64_C( 0.20), SIMDE_FLOAT64_C( -0.94), SIMDE_FLOAT64_C( -0.01), SIMDE_FLOAT64_C( -0.35), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( 0.30), SIMDE_FLOAT64_C( -0.63)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 178.20), SIMDE_FLOAT64_C( -450.67), SIMDE_FLOAT64_C( 233.37), SIMDE_FLOAT64_C( 687.09), SIMDE_FLOAT64_C( 261.31), SIMDE_FLOAT64_C( -212.54), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -660.80)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -1.19), SIMDE_FLOAT64_C( -6.68), SIMDE_FLOAT64_C( 1.24), SIMDE_FLOAT64_C( -1.31), SIMDE_FLOAT64_C( 0.62), SIMDE_FLOAT64_C( 1.91), SIMDE_FLOAT64_C( 0.53), SIMDE_FLOAT64_C( -1.81)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -770.35), SIMDE_FLOAT64_C( 443.48), SIMDE_FLOAT64_C( -583.60), SIMDE_FLOAT64_C( 380.46), SIMDE_FLOAT64_C( -770.72), SIMDE_FLOAT64_C( 993.90), SIMDE_FLOAT64_C( 28.08), SIMDE_FLOAT64_C( 841.21)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.78), SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( 0.91), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( -1.66), SIMDE_FLOAT64_C( 2.28), SIMDE_FLOAT64_C( -0.20), SIMDE_FLOAT64_C( -0.91)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -387.90), SIMDE_FLOAT64_C( 395.92), SIMDE_FLOAT64_C( 655.87), SIMDE_FLOAT64_C( 339.21), SIMDE_FLOAT64_C( 532.35), SIMDE_FLOAT64_C( -263.99), SIMDE_FLOAT64_C( 780.64), SIMDE_FLOAT64_C( -30.79)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -11.51), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( -0.88), SIMDE_FLOAT64_C( -0.08), SIMDE_FLOAT64_C( 6.62), SIMDE_FLOAT64_C( -0.10), SIMDE_FLOAT64_C( 21.83), SIMDE_FLOAT64_C( 0.72)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -203.65), SIMDE_FLOAT64_C( -80.73), SIMDE_FLOAT64_C( 336.73), SIMDE_FLOAT64_C( -944.78), SIMDE_FLOAT64_C( -747.59), SIMDE_FLOAT64_C( -767.23), SIMDE_FLOAT64_C( -554.19), SIMDE_FLOAT64_C( 398.82)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.62), SIMDE_FLOAT64_C( 1.40), SIMDE_FLOAT64_C( 0.65), SIMDE_FLOAT64_C( 1.11), SIMDE_FLOAT64_C( 0.11), SIMDE_FLOAT64_C( -0.81), SIMDE_FLOAT64_C( -3.22), SIMDE_FLOAT64_C( -0.16)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 469.66), SIMDE_FLOAT64_C( 680.02), SIMDE_FLOAT64_C( -148.69), SIMDE_FLOAT64_C( 818.66), SIMDE_FLOAT64_C( 910.03), SIMDE_FLOAT64_C( 600.47), SIMDE_FLOAT64_C( 791.23), SIMDE_FLOAT64_C( 254.31)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 123.43), SIMDE_FLOAT64_C( 7.37), SIMDE_FLOAT64_C( -1.68), SIMDE_FLOAT64_C( -3.54), SIMDE_FLOAT64_C( -1.67), SIMDE_FLOAT64_C( 0.45), SIMDE_FLOAT64_C( -0.48), SIMDE_FLOAT64_C( -0.16)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_tan_pd(test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_tan_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d src; simde__mmask8 k; simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( -686.13), SIMDE_FLOAT64_C( 571.46), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( 467.76), SIMDE_FLOAT64_C( 670.24), SIMDE_FLOAT64_C( 34.06), SIMDE_FLOAT64_C( 39.01), SIMDE_FLOAT64_C( 346.63)), UINT8_C(139), simde_mm512_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( -754.38)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( 571.46), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( 467.76), SIMDE_FLOAT64_C( 1.36), SIMDE_FLOAT64_C( 34.06), SIMDE_FLOAT64_C( -1.15), SIMDE_FLOAT64_C( -0.42)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 178.20), SIMDE_FLOAT64_C( 233.37), SIMDE_FLOAT64_C( 261.31), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( -384.03), SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -417.54)), UINT8_C(229), simde_mm512_set_pd(SIMDE_FLOAT64_C( 841.21), SIMDE_FLOAT64_C( -450.67), SIMDE_FLOAT64_C( 687.09), SIMDE_FLOAT64_C( -212.54), SIMDE_FLOAT64_C( -660.80), SIMDE_FLOAT64_C( 28.47), SIMDE_FLOAT64_C( -923.64), SIMDE_FLOAT64_C( -860.95)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.91), SIMDE_FLOAT64_C( -6.68), SIMDE_FLOAT64_C( -1.31), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( 0.20), SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -0.15)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 398.82), SIMDE_FLOAT64_C( 395.92), SIMDE_FLOAT64_C( 339.21), SIMDE_FLOAT64_C( -263.99), SIMDE_FLOAT64_C( -30.79), SIMDE_FLOAT64_C( 443.48), SIMDE_FLOAT64_C( 380.46), SIMDE_FLOAT64_C( 993.90)), UINT8_C(253), simde_mm512_set_pd(SIMDE_FLOAT64_C( -554.19), SIMDE_FLOAT64_C( -387.90), SIMDE_FLOAT64_C( 655.87), SIMDE_FLOAT64_C( 532.35), SIMDE_FLOAT64_C( 780.64), SIMDE_FLOAT64_C( -770.35), SIMDE_FLOAT64_C( -583.60), SIMDE_FLOAT64_C( -770.72)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -3.22), SIMDE_FLOAT64_C( -11.51), SIMDE_FLOAT64_C( -0.88), SIMDE_FLOAT64_C( 6.62), SIMDE_FLOAT64_C( 21.83), SIMDE_FLOAT64_C( -0.78), SIMDE_FLOAT64_C( 380.46), SIMDE_FLOAT64_C( -1.66)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 120.65), SIMDE_FLOAT64_C( 469.66), SIMDE_FLOAT64_C( -148.69), SIMDE_FLOAT64_C( 910.03), SIMDE_FLOAT64_C( 791.23), SIMDE_FLOAT64_C( -203.65), SIMDE_FLOAT64_C( 336.73), SIMDE_FLOAT64_C( -747.59)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 543.35), SIMDE_FLOAT64_C( -171.51), SIMDE_FLOAT64_C( 680.02), SIMDE_FLOAT64_C( 818.66), SIMDE_FLOAT64_C( 600.47), SIMDE_FLOAT64_C( 254.31), SIMDE_FLOAT64_C( -80.73), SIMDE_FLOAT64_C( -944.78)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 120.65), SIMDE_FLOAT64_C( 3.31), SIMDE_FLOAT64_C( -148.69), SIMDE_FLOAT64_C( -3.54), SIMDE_FLOAT64_C( 0.45), SIMDE_FLOAT64_C( -0.16), SIMDE_FLOAT64_C( 336.73), SIMDE_FLOAT64_C( 1.11)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 99.93), SIMDE_FLOAT64_C( -738.19), SIMDE_FLOAT64_C( 758.79), SIMDE_FLOAT64_C( 343.48), SIMDE_FLOAT64_C( -797.92), SIMDE_FLOAT64_C( -525.83), SIMDE_FLOAT64_C( -822.65), SIMDE_FLOAT64_C( 655.67)), UINT8_C(145), simde_mm512_set_pd(SIMDE_FLOAT64_C( 331.34), SIMDE_FLOAT64_C( 462.95), SIMDE_FLOAT64_C( -178.99), SIMDE_FLOAT64_C( 324.62), SIMDE_FLOAT64_C( -874.31), SIMDE_FLOAT64_C( -328.54), SIMDE_FLOAT64_C( -192.31), SIMDE_FLOAT64_C( 561.36)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 10.17), SIMDE_FLOAT64_C( -738.19), SIMDE_FLOAT64_C( 758.79), SIMDE_FLOAT64_C( 1.69), SIMDE_FLOAT64_C( -797.92), SIMDE_FLOAT64_C( -525.83), SIMDE_FLOAT64_C( -822.65), SIMDE_FLOAT64_C( -1.51)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -756.42), SIMDE_FLOAT64_C( 27.25), SIMDE_FLOAT64_C( 690.12), SIMDE_FLOAT64_C( -21.09), SIMDE_FLOAT64_C( -448.89), SIMDE_FLOAT64_C( 505.79), SIMDE_FLOAT64_C( 831.02), SIMDE_FLOAT64_C( 977.36)), UINT8_C( 75), simde_mm512_set_pd(SIMDE_FLOAT64_C( 977.49), SIMDE_FLOAT64_C( 424.81), SIMDE_FLOAT64_C( -95.15), SIMDE_FLOAT64_C( 840.65), SIMDE_FLOAT64_C( -591.56), SIMDE_FLOAT64_C( 731.49), SIMDE_FLOAT64_C( 623.70), SIMDE_FLOAT64_C( 140.67)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -756.42), SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 690.12), SIMDE_FLOAT64_C( -21.09), SIMDE_FLOAT64_C( -1.37), SIMDE_FLOAT64_C( 505.79), SIMDE_FLOAT64_C( -10.62), SIMDE_FLOAT64_C( -0.85)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 394.67), SIMDE_FLOAT64_C( -304.73), SIMDE_FLOAT64_C( -696.69), SIMDE_FLOAT64_C( 822.06), SIMDE_FLOAT64_C( -997.63), SIMDE_FLOAT64_C( 923.64), SIMDE_FLOAT64_C( -768.12), SIMDE_FLOAT64_C( -67.64)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 510.85), SIMDE_FLOAT64_C( 14.34), SIMDE_FLOAT64_C( 916.26), SIMDE_FLOAT64_C( -769.09), SIMDE_FLOAT64_C( -573.81), SIMDE_FLOAT64_C( -337.60), SIMDE_FLOAT64_C( 293.64), SIMDE_FLOAT64_C( -576.22)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 394.67), SIMDE_FLOAT64_C( -4.86), SIMDE_FLOAT64_C( -696.69), SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( 1.97), SIMDE_FLOAT64_C( -8.21), SIMDE_FLOAT64_C( -768.12), SIMDE_FLOAT64_C( -3.73)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 475.51), SIMDE_FLOAT64_C( 936.65), SIMDE_FLOAT64_C( -348.70), SIMDE_FLOAT64_C( -438.19), SIMDE_FLOAT64_C( -752.43), SIMDE_FLOAT64_C( 932.66), SIMDE_FLOAT64_C( -327.22), SIMDE_FLOAT64_C( -182.45)), UINT8_C(213), simde_mm512_set_pd(SIMDE_FLOAT64_C( -775.04), SIMDE_FLOAT64_C( 440.64), SIMDE_FLOAT64_C( 897.27), SIMDE_FLOAT64_C( -197.89), SIMDE_FLOAT64_C( -359.76), SIMDE_FLOAT64_C( -33.67), SIMDE_FLOAT64_C( 7.27), SIMDE_FLOAT64_C( -125.20)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 1.35), SIMDE_FLOAT64_C( 1.07), SIMDE_FLOAT64_C( -348.70), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -752.43), SIMDE_FLOAT64_C( 1.23), SIMDE_FLOAT64_C( -327.22), SIMDE_FLOAT64_C( 0.50)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mask_tan_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_tand_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( -754.38), SIMDE_FLOAT32_C( 346.63)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( -0.24)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( 34.06)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -1.18), SIMDE_FLOAT32_C( 1.93), SIMDE_FLOAT32_C( 0.68)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 467.76)), simde_mm_set_ps(SIMDE_FLOAT32_C( -3.60), SIMDE_FLOAT32_C( 1.90), SIMDE_FLOAT32_C( -104.17), SIMDE_FLOAT32_C( -3.12)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 571.46)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 10.92), SIMDE_FLOAT32_C( 0.61)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( 696.87)), simde_mm_set_ps(SIMDE_FLOAT32_C( 1.42), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -0.43)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -923.64)), simde_mm_set_ps(SIMDE_FLOAT32_C( -11.01), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( -0.44)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -660.80)), simde_mm_set_ps(SIMDE_FLOAT32_C( 6.54), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( -4.18), SIMDE_FLOAT32_C( 1.68)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 687.09)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( 85.51), SIMDE_FLOAT32_C( 1.35), SIMDE_FLOAT32_C( -0.65)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_tand_ps(test_vec[i].a); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_tand_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -754.38), SIMDE_FLOAT64_C( 346.63)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.68), SIMDE_FLOAT64_C( -0.24)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( 39.01)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.11), SIMDE_FLOAT64_C( 0.81)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( 34.06)), simde_mm_set_pd(SIMDE_FLOAT64_C( 1.93), SIMDE_FLOAT64_C( 0.68)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( 670.24)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.92), SIMDE_FLOAT64_C( -1.18)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 467.76)), simde_mm_set_pd(SIMDE_FLOAT64_C( -104.17), SIMDE_FLOAT64_C( -3.12)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( 422.21)), simde_mm_set_pd(SIMDE_FLOAT64_C( -3.60), SIMDE_FLOAT64_C( 1.90)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 571.46)), simde_mm_set_pd(SIMDE_FLOAT64_C( 10.92), SIMDE_FLOAT64_C( 0.61)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( -686.13)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.90), SIMDE_FLOAT64_C( 0.67)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_tand_pd(test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_tand_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( -754.38), SIMDE_FLOAT32_C( 346.63)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -1.18), SIMDE_FLOAT32_C( 1.93), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( -0.24)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 467.76)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 10.92), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( -3.60), SIMDE_FLOAT32_C( 1.90), SIMDE_FLOAT32_C( -104.17), SIMDE_FLOAT32_C( -3.12)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( 696.87)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -11.01), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( 1.42), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -0.43)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -660.80)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( 85.51), SIMDE_FLOAT32_C( 1.35), SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( 6.54), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( -4.18), SIMDE_FLOAT32_C( 1.68)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -770.35), SIMDE_FLOAT32_C( 443.48), SIMDE_FLOAT32_C( -583.60), SIMDE_FLOAT32_C( 380.46), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 993.90), SIMDE_FLOAT32_C( 28.08), SIMDE_FLOAT32_C( 841.21)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -1.21), SIMDE_FLOAT32_C( 8.75), SIMDE_FLOAT32_C( -0.95), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( -1.22), SIMDE_FLOAT32_C( -14.67), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( -1.65)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( 395.92), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 339.21), SIMDE_FLOAT32_C( 532.35), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( 780.64), SIMDE_FLOAT32_C( -30.79)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( -2.06), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( -9.50), SIMDE_FLOAT32_C( 1.78), SIMDE_FLOAT32_C( -0.60)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -203.65), SIMDE_FLOAT32_C( -80.73), SIMDE_FLOAT32_C( 336.73), SIMDE_FLOAT32_C( -944.78), SIMDE_FLOAT32_C( -747.59), SIMDE_FLOAT32_C( -767.23), SIMDE_FLOAT32_C( -554.19), SIMDE_FLOAT32_C( 398.82)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( -6.13), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( -1.08), SIMDE_FLOAT32_C( -0.25), SIMDE_FLOAT32_C( 0.80)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 469.66), SIMDE_FLOAT32_C( 680.02), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 818.66), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 600.47), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( 254.31)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -2.80), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( -6.57), SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( 1.77), SIMDE_FLOAT32_C( 2.94), SIMDE_FLOAT32_C( 3.56)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_tand_ps(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_tand_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( 39.01), SIMDE_FLOAT64_C( -754.38), SIMDE_FLOAT64_C( 346.63)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.11), SIMDE_FLOAT64_C( 0.81), SIMDE_FLOAT64_C( -0.68), SIMDE_FLOAT64_C( -0.24)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( 670.24), SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( 34.06)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.92), SIMDE_FLOAT64_C( -1.18), SIMDE_FLOAT64_C( 1.93), SIMDE_FLOAT64_C( 0.68)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 467.76)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -3.60), SIMDE_FLOAT64_C( 1.90), SIMDE_FLOAT64_C( -104.17), SIMDE_FLOAT64_C( -3.12)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( -686.13), SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 571.46)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.90), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 10.92), SIMDE_FLOAT64_C( 0.61)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -860.95), SIMDE_FLOAT64_C( -417.54), SIMDE_FLOAT64_C( 696.87)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 1.42), SIMDE_FLOAT64_C( 0.81), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( -0.43)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( 28.47), SIMDE_FLOAT64_C( -384.03), SIMDE_FLOAT64_C( -923.64)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -11.01), SIMDE_FLOAT64_C( 0.54), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( -0.44)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 261.31), SIMDE_FLOAT64_C( -212.54), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -660.80)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 6.54), SIMDE_FLOAT64_C( -0.64), SIMDE_FLOAT64_C( -4.18), SIMDE_FLOAT64_C( 1.68)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 178.20), SIMDE_FLOAT64_C( -450.67), SIMDE_FLOAT64_C( 233.37), SIMDE_FLOAT64_C( 687.09)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.03), SIMDE_FLOAT64_C( 85.51), SIMDE_FLOAT64_C( 1.35), SIMDE_FLOAT64_C( -0.65)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_tand_pd(test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_tand_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 467.76), SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( -754.38), SIMDE_FLOAT32_C( 346.63)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 10.92), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( -3.60), SIMDE_FLOAT32_C( 1.90), SIMDE_FLOAT32_C( -104.17), SIMDE_FLOAT32_C( -3.12), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -1.18), SIMDE_FLOAT32_C( 1.93), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( -0.24)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -660.80), SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( 696.87)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( 85.51), SIMDE_FLOAT32_C( 1.35), SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( 6.54), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( -4.18), SIMDE_FLOAT32_C( 1.68), SIMDE_FLOAT32_C( -11.01), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( 1.42), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( -0.43)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( 395.92), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 339.21), SIMDE_FLOAT32_C( 532.35), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( 780.64), SIMDE_FLOAT32_C( -30.79), SIMDE_FLOAT32_C( -770.35), SIMDE_FLOAT32_C( 443.48), SIMDE_FLOAT32_C( -583.60), SIMDE_FLOAT32_C( 380.46), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 993.90), SIMDE_FLOAT32_C( 28.08), SIMDE_FLOAT32_C( 841.21)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( -2.06), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( -9.50), SIMDE_FLOAT32_C( 1.78), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( -1.21), SIMDE_FLOAT32_C( 8.75), SIMDE_FLOAT32_C( -0.95), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( -1.22), SIMDE_FLOAT32_C( -14.67), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( -1.65)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 469.66), SIMDE_FLOAT32_C( 680.02), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 818.66), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 600.47), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( 254.31), SIMDE_FLOAT32_C( -203.65), SIMDE_FLOAT32_C( -80.73), SIMDE_FLOAT32_C( 336.73), SIMDE_FLOAT32_C( -944.78), SIMDE_FLOAT32_C( -747.59), SIMDE_FLOAT32_C( -767.23), SIMDE_FLOAT32_C( -554.19), SIMDE_FLOAT32_C( 398.82)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -2.80), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( -6.57), SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( 1.77), SIMDE_FLOAT32_C( 2.94), SIMDE_FLOAT32_C( 3.56), SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( -6.13), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( -1.08), SIMDE_FLOAT32_C( -0.25), SIMDE_FLOAT32_C( 0.80)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -178.99), SIMDE_FLOAT32_C( 758.79), SIMDE_FLOAT32_C( 324.62), SIMDE_FLOAT32_C( 343.48), SIMDE_FLOAT32_C( -874.31), SIMDE_FLOAT32_C( -797.92), SIMDE_FLOAT32_C( -328.54), SIMDE_FLOAT32_C( -525.83), SIMDE_FLOAT32_C( -192.31), SIMDE_FLOAT32_C( -822.65), SIMDE_FLOAT32_C( 561.36), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( -70.91), SIMDE_FLOAT32_C( 543.35), SIMDE_FLOAT32_C( 120.65), SIMDE_FLOAT32_C( -171.51)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( -0.71), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( -4.67), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( -0.22), SIMDE_FLOAT32_C( 4.46), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( -2.08), SIMDE_FLOAT32_C( -2.89), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( -1.69), SIMDE_FLOAT32_C( 0.15)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 690.12), SIMDE_FLOAT32_C( 840.65), SIMDE_FLOAT32_C( -21.09), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( -448.89), SIMDE_FLOAT32_C( 731.49), SIMDE_FLOAT32_C( 505.79), SIMDE_FLOAT32_C( 623.70), SIMDE_FLOAT32_C( 831.02), SIMDE_FLOAT32_C( 140.67), SIMDE_FLOAT32_C( 977.36), SIMDE_FLOAT32_C( -906.16), SIMDE_FLOAT32_C( 331.34), SIMDE_FLOAT32_C( 99.93), SIMDE_FLOAT32_C( 462.95), SIMDE_FLOAT32_C( -738.19)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( -1.69), SIMDE_FLOAT32_C( -0.39), SIMDE_FLOAT32_C( -1.26), SIMDE_FLOAT32_C( -51.61), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( 9.06), SIMDE_FLOAT32_C( -2.60), SIMDE_FLOAT32_C( -0.82), SIMDE_FLOAT32_C( 4.46), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( -5.71), SIMDE_FLOAT32_C( -4.35), SIMDE_FLOAT32_C( -0.33)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -769.09), SIMDE_FLOAT32_C( 822.06), SIMDE_FLOAT32_C( -573.81), SIMDE_FLOAT32_C( -997.63), SIMDE_FLOAT32_C( -337.60), SIMDE_FLOAT32_C( 923.64), SIMDE_FLOAT32_C( 293.64), SIMDE_FLOAT32_C( -768.12), SIMDE_FLOAT32_C( -576.22), SIMDE_FLOAT32_C( -67.64), SIMDE_FLOAT32_C( 710.38), SIMDE_FLOAT32_C( 977.49), SIMDE_FLOAT32_C( -756.42), SIMDE_FLOAT32_C( 424.81), SIMDE_FLOAT32_C( 27.25), SIMDE_FLOAT32_C( -95.15)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -1.15), SIMDE_FLOAT32_C( -4.68), SIMDE_FLOAT32_C( -0.67), SIMDE_FLOAT32_C( 7.46), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( -2.28), SIMDE_FLOAT32_C( -1.12), SIMDE_FLOAT32_C( -0.73), SIMDE_FLOAT32_C( -2.43), SIMDE_FLOAT32_C( -0.17), SIMDE_FLOAT32_C( 4.51), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( 2.13), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 11.10)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -438.19), SIMDE_FLOAT32_C( -359.76), SIMDE_FLOAT32_C( -752.43), SIMDE_FLOAT32_C( -33.67), SIMDE_FLOAT32_C( 932.66), SIMDE_FLOAT32_C( 7.27), SIMDE_FLOAT32_C( -327.22), SIMDE_FLOAT32_C( -125.20), SIMDE_FLOAT32_C( -182.45), SIMDE_FLOAT32_C( 39.93), SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( 394.67), SIMDE_FLOAT32_C( 14.34), SIMDE_FLOAT32_C( -304.73), SIMDE_FLOAT32_C( 916.26), SIMDE_FLOAT32_C( -696.69)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -4.78), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( -0.67), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 1.42), SIMDE_FLOAT32_C( -0.04), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( -0.56), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( 1.44), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( 0.43)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_tand_ps(test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_tand_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 src; simde__mmask16 k; simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -450.67), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( -212.54), SIMDE_FLOAT32_C( -660.80), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( 696.87), SIMDE_FLOAT32_C( -686.13), SIMDE_FLOAT32_C( 571.46), SIMDE_FLOAT32_C( 422.21), SIMDE_FLOAT32_C( 467.76), SIMDE_FLOAT32_C( 670.24), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( 39.01), SIMDE_FLOAT32_C( 346.63)), UINT16_C(41466), simde_mm512_set_ps(SIMDE_FLOAT32_C( 178.20), SIMDE_FLOAT32_C( 233.37), SIMDE_FLOAT32_C( 261.31), SIMDE_FLOAT32_C( -976.55), SIMDE_FLOAT32_C( -444.81), SIMDE_FLOAT32_C( -384.03), SIMDE_FLOAT32_C( -305.07), SIMDE_FLOAT32_C( -417.54), SIMDE_FLOAT32_C( -678.17), SIMDE_FLOAT32_C( 84.77), SIMDE_FLOAT32_C( 825.53), SIMDE_FLOAT32_C( -269.45), SIMDE_FLOAT32_C( 497.31), SIMDE_FLOAT32_C( -297.45), SIMDE_FLOAT32_C( -186.21), SIMDE_FLOAT32_C( -754.38)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( 687.09), SIMDE_FLOAT32_C( 6.54), SIMDE_FLOAT32_C( -660.80), SIMDE_FLOAT32_C( 28.47), SIMDE_FLOAT32_C( -923.64), SIMDE_FLOAT32_C( -860.95), SIMDE_FLOAT32_C( -1.57), SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( 10.92), SIMDE_FLOAT32_C( -3.60), SIMDE_FLOAT32_C( -104.17), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( 34.06), SIMDE_FLOAT32_C( -0.11), SIMDE_FLOAT32_C( 346.63)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 469.66), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( -203.65), SIMDE_FLOAT32_C( 336.73), SIMDE_FLOAT32_C( -747.59), SIMDE_FLOAT32_C( -554.19), SIMDE_FLOAT32_C( -387.90), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( 532.35), SIMDE_FLOAT32_C( 780.64), SIMDE_FLOAT32_C( -770.35), SIMDE_FLOAT32_C( -583.60), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( 28.08)), UINT16_C(36797), simde_mm512_set_ps(SIMDE_FLOAT32_C( -171.51), SIMDE_FLOAT32_C( 680.02), SIMDE_FLOAT32_C( 818.66), SIMDE_FLOAT32_C( 600.47), SIMDE_FLOAT32_C( 254.31), SIMDE_FLOAT32_C( -80.73), SIMDE_FLOAT32_C( -944.78), SIMDE_FLOAT32_C( -767.23), SIMDE_FLOAT32_C( 398.82), SIMDE_FLOAT32_C( 395.92), SIMDE_FLOAT32_C( 339.21), SIMDE_FLOAT32_C( -263.99), SIMDE_FLOAT32_C( -30.79), SIMDE_FLOAT32_C( 443.48), SIMDE_FLOAT32_C( 380.46), SIMDE_FLOAT32_C( 993.90)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.15), SIMDE_FLOAT32_C( -148.69), SIMDE_FLOAT32_C( 910.03), SIMDE_FLOAT32_C( 791.23), SIMDE_FLOAT32_C( 3.56), SIMDE_FLOAT32_C( -6.13), SIMDE_FLOAT32_C( -0.99), SIMDE_FLOAT32_C( -1.08), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( 655.87), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -9.50), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( 8.75), SIMDE_FLOAT32_C( -770.72), SIMDE_FLOAT32_C( -14.67)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -95.15), SIMDE_FLOAT32_C( 840.65), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( 731.49), SIMDE_FLOAT32_C( 623.70), SIMDE_FLOAT32_C( 140.67), SIMDE_FLOAT32_C( -906.16), SIMDE_FLOAT32_C( 99.93), SIMDE_FLOAT32_C( -738.19), SIMDE_FLOAT32_C( 758.79), SIMDE_FLOAT32_C( 343.48), SIMDE_FLOAT32_C( -797.92), SIMDE_FLOAT32_C( -525.83), SIMDE_FLOAT32_C( -822.65), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( 543.35)), UINT16_C(16804), simde_mm512_set_ps(SIMDE_FLOAT32_C( 27.25), SIMDE_FLOAT32_C( 690.12), SIMDE_FLOAT32_C( -21.09), SIMDE_FLOAT32_C( -448.89), SIMDE_FLOAT32_C( 505.79), SIMDE_FLOAT32_C( 831.02), SIMDE_FLOAT32_C( 977.36), SIMDE_FLOAT32_C( 331.34), SIMDE_FLOAT32_C( 462.95), SIMDE_FLOAT32_C( -178.99), SIMDE_FLOAT32_C( 324.62), SIMDE_FLOAT32_C( -874.31), SIMDE_FLOAT32_C( -328.54), SIMDE_FLOAT32_C( -192.31), SIMDE_FLOAT32_C( 561.36), SIMDE_FLOAT32_C( -70.91)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -95.15), SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( -591.56), SIMDE_FLOAT32_C( 731.49), SIMDE_FLOAT32_C( 623.70), SIMDE_FLOAT32_C( 140.67), SIMDE_FLOAT32_C( -906.16), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( -4.35), SIMDE_FLOAT32_C( 758.79), SIMDE_FLOAT32_C( -0.71), SIMDE_FLOAT32_C( -797.92), SIMDE_FLOAT32_C( -525.83), SIMDE_FLOAT32_C( -0.22), SIMDE_FLOAT32_C( 655.67), SIMDE_FLOAT32_C( 543.35)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -348.70), SIMDE_FLOAT32_C( -438.19), SIMDE_FLOAT32_C( -752.43), SIMDE_FLOAT32_C( 932.66), SIMDE_FLOAT32_C( -327.22), SIMDE_FLOAT32_C( -182.45), SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( 14.34), SIMDE_FLOAT32_C( 916.26), SIMDE_FLOAT32_C( -769.09), SIMDE_FLOAT32_C( -573.81), SIMDE_FLOAT32_C( -337.60), SIMDE_FLOAT32_C( 293.64), SIMDE_FLOAT32_C( -576.22), SIMDE_FLOAT32_C( 710.38), SIMDE_FLOAT32_C( -756.42)), UINT16_C( 2107), simde_mm512_set_ps(SIMDE_FLOAT32_C( 897.27), SIMDE_FLOAT32_C( -197.89), SIMDE_FLOAT32_C( -359.76), SIMDE_FLOAT32_C( -33.67), SIMDE_FLOAT32_C( 7.27), SIMDE_FLOAT32_C( -125.20), SIMDE_FLOAT32_C( 39.93), SIMDE_FLOAT32_C( 394.67), SIMDE_FLOAT32_C( -304.73), SIMDE_FLOAT32_C( -696.69), SIMDE_FLOAT32_C( 822.06), SIMDE_FLOAT32_C( -997.63), SIMDE_FLOAT32_C( 923.64), SIMDE_FLOAT32_C( -768.12), SIMDE_FLOAT32_C( -67.64), SIMDE_FLOAT32_C( 977.49)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -348.70), SIMDE_FLOAT32_C( -438.19), SIMDE_FLOAT32_C( -752.43), SIMDE_FLOAT32_C( 932.66), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( -182.45), SIMDE_FLOAT32_C( 510.85), SIMDE_FLOAT32_C( 14.34), SIMDE_FLOAT32_C( 916.26), SIMDE_FLOAT32_C( -769.09), SIMDE_FLOAT32_C( -4.68), SIMDE_FLOAT32_C( 7.46), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( -576.22), SIMDE_FLOAT32_C( -2.43), SIMDE_FLOAT32_C( 4.51)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -15.61), SIMDE_FLOAT32_C( -737.13), SIMDE_FLOAT32_C( -314.93), SIMDE_FLOAT32_C( 177.92), SIMDE_FLOAT32_C( 345.93), SIMDE_FLOAT32_C( 888.71), SIMDE_FLOAT32_C( 915.71), SIMDE_FLOAT32_C( 133.52), SIMDE_FLOAT32_C( 484.94), SIMDE_FLOAT32_C( -598.06), SIMDE_FLOAT32_C( -791.07), SIMDE_FLOAT32_C( -765.93), SIMDE_FLOAT32_C( 221.37), SIMDE_FLOAT32_C( -788.36), SIMDE_FLOAT32_C( -775.04), SIMDE_FLOAT32_C( 440.64)), UINT16_C(22274), simde_mm512_set_ps(SIMDE_FLOAT32_C( 496.57), SIMDE_FLOAT32_C( 915.19), SIMDE_FLOAT32_C( -718.40), SIMDE_FLOAT32_C( 159.97), SIMDE_FLOAT32_C( -861.01), SIMDE_FLOAT32_C( 426.61), SIMDE_FLOAT32_C( 932.11), SIMDE_FLOAT32_C( 110.36), SIMDE_FLOAT32_C( 826.84), SIMDE_FLOAT32_C( -76.75), SIMDE_FLOAT32_C( 237.58), SIMDE_FLOAT32_C( -378.50), SIMDE_FLOAT32_C( -601.68), SIMDE_FLOAT32_C( -623.50), SIMDE_FLOAT32_C( -942.47), SIMDE_FLOAT32_C( 475.51)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -15.61), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( -314.93), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( 345.93), SIMDE_FLOAT32_C( 2.31), SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( -2.69), SIMDE_FLOAT32_C( 484.94), SIMDE_FLOAT32_C( -598.06), SIMDE_FLOAT32_C( -791.07), SIMDE_FLOAT32_C( -765.93), SIMDE_FLOAT32_C( 221.37), SIMDE_FLOAT32_C( -788.36), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( 440.64)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 883.05), SIMDE_FLOAT32_C( -807.28), SIMDE_FLOAT32_C( -70.05), SIMDE_FLOAT32_C( -784.34), SIMDE_FLOAT32_C( 92.52), SIMDE_FLOAT32_C( 206.60), SIMDE_FLOAT32_C( 834.60), SIMDE_FLOAT32_C( -65.60), SIMDE_FLOAT32_C( -286.07), SIMDE_FLOAT32_C( -212.86), SIMDE_FLOAT32_C( -318.38), SIMDE_FLOAT32_C( 783.48), SIMDE_FLOAT32_C( -628.82), SIMDE_FLOAT32_C( 556.35), SIMDE_FLOAT32_C( 439.43), SIMDE_FLOAT32_C( 434.03)), UINT16_C(27396), simde_mm512_set_ps(SIMDE_FLOAT32_C( -964.25), SIMDE_FLOAT32_C( -406.33), SIMDE_FLOAT32_C( -743.66), SIMDE_FLOAT32_C( -764.58), SIMDE_FLOAT32_C( 789.89), SIMDE_FLOAT32_C( 4.83), SIMDE_FLOAT32_C( -818.54), SIMDE_FLOAT32_C( 161.06), SIMDE_FLOAT32_C( 579.25), SIMDE_FLOAT32_C( -11.78), SIMDE_FLOAT32_C( -308.52), SIMDE_FLOAT32_C( -719.57), SIMDE_FLOAT32_C( 334.00), SIMDE_FLOAT32_C( 274.71), SIMDE_FLOAT32_C( -916.82), SIMDE_FLOAT32_C( -490.00)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 883.05), SIMDE_FLOAT32_C( -1.05), SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( -784.34), SIMDE_FLOAT32_C( 2.73), SIMDE_FLOAT32_C( 206.60), SIMDE_FLOAT32_C( 6.66), SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( -286.07), SIMDE_FLOAT32_C( -212.86), SIMDE_FLOAT32_C( -318.38), SIMDE_FLOAT32_C( 783.48), SIMDE_FLOAT32_C( -628.82), SIMDE_FLOAT32_C( -12.14), SIMDE_FLOAT32_C( 439.43), SIMDE_FLOAT32_C( 434.03)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 529.63), SIMDE_FLOAT32_C( -24.89), SIMDE_FLOAT32_C( -967.78), SIMDE_FLOAT32_C( 638.94), SIMDE_FLOAT32_C( 450.90), SIMDE_FLOAT32_C( -771.54), SIMDE_FLOAT32_C( 105.79), SIMDE_FLOAT32_C( 590.10), SIMDE_FLOAT32_C( 30.91), SIMDE_FLOAT32_C( 635.35), SIMDE_FLOAT32_C( -84.00), SIMDE_FLOAT32_C( 80.04), SIMDE_FLOAT32_C( -709.46), SIMDE_FLOAT32_C( 607.86), SIMDE_FLOAT32_C( 394.58), SIMDE_FLOAT32_C( -889.11)), UINT16_C( 953), simde_mm512_set_ps(SIMDE_FLOAT32_C( 18.75), SIMDE_FLOAT32_C( 809.05), SIMDE_FLOAT32_C( 144.05), SIMDE_FLOAT32_C( -427.72), SIMDE_FLOAT32_C( 308.28), SIMDE_FLOAT32_C( -177.05), SIMDE_FLOAT32_C( -457.77), SIMDE_FLOAT32_C( 678.24), SIMDE_FLOAT32_C( 66.05), SIMDE_FLOAT32_C( -267.71), SIMDE_FLOAT32_C( 117.28), SIMDE_FLOAT32_C( -576.80), SIMDE_FLOAT32_C( -38.39), SIMDE_FLOAT32_C( -250.14), SIMDE_FLOAT32_C( -53.92), SIMDE_FLOAT32_C( 91.94)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 529.63), SIMDE_FLOAT32_C( -24.89), SIMDE_FLOAT32_C( -967.78), SIMDE_FLOAT32_C( 638.94), SIMDE_FLOAT32_C( 450.90), SIMDE_FLOAT32_C( -771.54), SIMDE_FLOAT32_C( 7.33), SIMDE_FLOAT32_C( -0.89), SIMDE_FLOAT32_C( 2.25), SIMDE_FLOAT32_C( 635.35), SIMDE_FLOAT32_C( -1.94), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( 607.86), SIMDE_FLOAT32_C( 394.58), SIMDE_FLOAT32_C( -29.52)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -788.39), SIMDE_FLOAT32_C( 330.43), SIMDE_FLOAT32_C( -493.41), SIMDE_FLOAT32_C( 822.72), SIMDE_FLOAT32_C( 956.68), SIMDE_FLOAT32_C( 954.62), SIMDE_FLOAT32_C( 825.49), SIMDE_FLOAT32_C( -816.27), SIMDE_FLOAT32_C( -209.34), SIMDE_FLOAT32_C( -933.21), SIMDE_FLOAT32_C( -728.70), SIMDE_FLOAT32_C( -420.06), SIMDE_FLOAT32_C( 100.32), SIMDE_FLOAT32_C( 103.15), SIMDE_FLOAT32_C( 439.77), SIMDE_FLOAT32_C( -204.33)), UINT16_C(12713), simde_mm512_set_ps(SIMDE_FLOAT32_C( -841.43), SIMDE_FLOAT32_C( -14.16), SIMDE_FLOAT32_C( 824.88), SIMDE_FLOAT32_C( 793.63), SIMDE_FLOAT32_C( -736.75), SIMDE_FLOAT32_C( -310.57), SIMDE_FLOAT32_C( 728.87), SIMDE_FLOAT32_C( -350.72), SIMDE_FLOAT32_C( 60.89), SIMDE_FLOAT32_C( 109.81), SIMDE_FLOAT32_C( 715.94), SIMDE_FLOAT32_C( -250.60), SIMDE_FLOAT32_C( 944.14), SIMDE_FLOAT32_C( 361.85), SIMDE_FLOAT32_C( -13.07), SIMDE_FLOAT32_C( 852.60)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -788.39), SIMDE_FLOAT32_C( 330.43), SIMDE_FLOAT32_C( -3.76), SIMDE_FLOAT32_C( 3.40), SIMDE_FLOAT32_C( 956.68), SIMDE_FLOAT32_C( 954.62), SIMDE_FLOAT32_C( 825.49), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 1.80), SIMDE_FLOAT32_C( -933.21), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( -420.06), SIMDE_FLOAT32_C( 0.97), SIMDE_FLOAT32_C( 103.15), SIMDE_FLOAT32_C( 439.77), SIMDE_FLOAT32_C( -1.09)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mask_tand_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_tand_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( 670.24), SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( 34.06), SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( 39.01), SIMDE_FLOAT64_C( -754.38), SIMDE_FLOAT64_C( 346.63)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.92), SIMDE_FLOAT64_C( -1.18), SIMDE_FLOAT64_C( 1.93), SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( -0.11), SIMDE_FLOAT64_C( 0.81), SIMDE_FLOAT64_C( -0.68), SIMDE_FLOAT64_C( -0.24)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( -686.13), SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 571.46), SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 467.76)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.90), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 10.92), SIMDE_FLOAT64_C( 0.61), SIMDE_FLOAT64_C( -3.60), SIMDE_FLOAT64_C( 1.90), SIMDE_FLOAT64_C( -104.17), SIMDE_FLOAT64_C( -3.12)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( 28.47), SIMDE_FLOAT64_C( -384.03), SIMDE_FLOAT64_C( -923.64), SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -860.95), SIMDE_FLOAT64_C( -417.54), SIMDE_FLOAT64_C( 696.87)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -11.01), SIMDE_FLOAT64_C( 0.54), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( 1.42), SIMDE_FLOAT64_C( 0.81), SIMDE_FLOAT64_C( -1.57), SIMDE_FLOAT64_C( -0.43)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 178.20), SIMDE_FLOAT64_C( -450.67), SIMDE_FLOAT64_C( 233.37), SIMDE_FLOAT64_C( 687.09), SIMDE_FLOAT64_C( 261.31), SIMDE_FLOAT64_C( -212.54), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -660.80)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.03), SIMDE_FLOAT64_C( 85.51), SIMDE_FLOAT64_C( 1.35), SIMDE_FLOAT64_C( -0.65), SIMDE_FLOAT64_C( 6.54), SIMDE_FLOAT64_C( -0.64), SIMDE_FLOAT64_C( -4.18), SIMDE_FLOAT64_C( 1.68)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -770.35), SIMDE_FLOAT64_C( 443.48), SIMDE_FLOAT64_C( -583.60), SIMDE_FLOAT64_C( 380.46), SIMDE_FLOAT64_C( -770.72), SIMDE_FLOAT64_C( 993.90), SIMDE_FLOAT64_C( 28.08), SIMDE_FLOAT64_C( 841.21)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -1.21), SIMDE_FLOAT64_C( 8.75), SIMDE_FLOAT64_C( -0.95), SIMDE_FLOAT64_C( 0.37), SIMDE_FLOAT64_C( -1.22), SIMDE_FLOAT64_C( -14.67), SIMDE_FLOAT64_C( 0.53), SIMDE_FLOAT64_C( -1.65)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -387.90), SIMDE_FLOAT64_C( 395.92), SIMDE_FLOAT64_C( 655.87), SIMDE_FLOAT64_C( 339.21), SIMDE_FLOAT64_C( 532.35), SIMDE_FLOAT64_C( -263.99), SIMDE_FLOAT64_C( 780.64), SIMDE_FLOAT64_C( -30.79)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.53), SIMDE_FLOAT64_C( 0.72), SIMDE_FLOAT64_C( -2.06), SIMDE_FLOAT64_C( -0.38), SIMDE_FLOAT64_C( -0.13), SIMDE_FLOAT64_C( -9.50), SIMDE_FLOAT64_C( 1.78), SIMDE_FLOAT64_C( -0.60)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -203.65), SIMDE_FLOAT64_C( -80.73), SIMDE_FLOAT64_C( 336.73), SIMDE_FLOAT64_C( -944.78), SIMDE_FLOAT64_C( -747.59), SIMDE_FLOAT64_C( -767.23), SIMDE_FLOAT64_C( -554.19), SIMDE_FLOAT64_C( 398.82)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( -6.13), SIMDE_FLOAT64_C( -0.43), SIMDE_FLOAT64_C( -0.99), SIMDE_FLOAT64_C( -0.52), SIMDE_FLOAT64_C( -1.08), SIMDE_FLOAT64_C( -0.25), SIMDE_FLOAT64_C( 0.80)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 469.66), SIMDE_FLOAT64_C( 680.02), SIMDE_FLOAT64_C( -148.69), SIMDE_FLOAT64_C( 818.66), SIMDE_FLOAT64_C( 910.03), SIMDE_FLOAT64_C( 600.47), SIMDE_FLOAT64_C( 791.23), SIMDE_FLOAT64_C( 254.31)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -2.80), SIMDE_FLOAT64_C( -0.84), SIMDE_FLOAT64_C( 0.61), SIMDE_FLOAT64_C( -6.57), SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( 1.77), SIMDE_FLOAT64_C( 2.94), SIMDE_FLOAT64_C( 3.56)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_tand_pd(test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_tand_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d src; simde__mmask8 k; simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( -686.13), SIMDE_FLOAT64_C( 571.46), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( 467.76), SIMDE_FLOAT64_C( 670.24), SIMDE_FLOAT64_C( 34.06), SIMDE_FLOAT64_C( 39.01), SIMDE_FLOAT64_C( 346.63)), UINT8_C(139), simde_mm512_set_pd(SIMDE_FLOAT64_C( -678.17), SIMDE_FLOAT64_C( 84.77), SIMDE_FLOAT64_C( 825.53), SIMDE_FLOAT64_C( -269.45), SIMDE_FLOAT64_C( 497.31), SIMDE_FLOAT64_C( -297.45), SIMDE_FLOAT64_C( -186.21), SIMDE_FLOAT64_C( -754.38)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.90), SIMDE_FLOAT64_C( 571.46), SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( 467.76), SIMDE_FLOAT64_C( -0.92), SIMDE_FLOAT64_C( 34.06), SIMDE_FLOAT64_C( -0.11), SIMDE_FLOAT64_C( -0.68)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 178.20), SIMDE_FLOAT64_C( 233.37), SIMDE_FLOAT64_C( 261.31), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( -384.03), SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( -417.54)), UINT8_C(229), simde_mm512_set_pd(SIMDE_FLOAT64_C( 841.21), SIMDE_FLOAT64_C( -450.67), SIMDE_FLOAT64_C( 687.09), SIMDE_FLOAT64_C( -212.54), SIMDE_FLOAT64_C( -660.80), SIMDE_FLOAT64_C( 28.47), SIMDE_FLOAT64_C( -923.64), SIMDE_FLOAT64_C( -860.95)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -1.65), SIMDE_FLOAT64_C( 85.51), SIMDE_FLOAT64_C( -0.65), SIMDE_FLOAT64_C( -976.55), SIMDE_FLOAT64_C( -444.81), SIMDE_FLOAT64_C( 0.54), SIMDE_FLOAT64_C( -305.07), SIMDE_FLOAT64_C( 0.81)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 398.82), SIMDE_FLOAT64_C( 395.92), SIMDE_FLOAT64_C( 339.21), SIMDE_FLOAT64_C( -263.99), SIMDE_FLOAT64_C( -30.79), SIMDE_FLOAT64_C( 443.48), SIMDE_FLOAT64_C( 380.46), SIMDE_FLOAT64_C( 993.90)), UINT8_C(253), simde_mm512_set_pd(SIMDE_FLOAT64_C( -554.19), SIMDE_FLOAT64_C( -387.90), SIMDE_FLOAT64_C( 655.87), SIMDE_FLOAT64_C( 532.35), SIMDE_FLOAT64_C( 780.64), SIMDE_FLOAT64_C( -770.35), SIMDE_FLOAT64_C( -583.60), SIMDE_FLOAT64_C( -770.72)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.25), SIMDE_FLOAT64_C( -0.53), SIMDE_FLOAT64_C( -2.06), SIMDE_FLOAT64_C( -0.13), SIMDE_FLOAT64_C( 1.78), SIMDE_FLOAT64_C( -1.21), SIMDE_FLOAT64_C( 380.46), SIMDE_FLOAT64_C( -1.22)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 120.65), SIMDE_FLOAT64_C( 469.66), SIMDE_FLOAT64_C( -148.69), SIMDE_FLOAT64_C( 910.03), SIMDE_FLOAT64_C( 791.23), SIMDE_FLOAT64_C( -203.65), SIMDE_FLOAT64_C( 336.73), SIMDE_FLOAT64_C( -747.59)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 543.35), SIMDE_FLOAT64_C( -171.51), SIMDE_FLOAT64_C( 680.02), SIMDE_FLOAT64_C( 818.66), SIMDE_FLOAT64_C( 600.47), SIMDE_FLOAT64_C( 254.31), SIMDE_FLOAT64_C( -80.73), SIMDE_FLOAT64_C( -944.78)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 120.65), SIMDE_FLOAT64_C( 0.15), SIMDE_FLOAT64_C( -148.69), SIMDE_FLOAT64_C( -6.57), SIMDE_FLOAT64_C( 1.77), SIMDE_FLOAT64_C( 3.56), SIMDE_FLOAT64_C( 336.73), SIMDE_FLOAT64_C( -0.99)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 99.93), SIMDE_FLOAT64_C( -738.19), SIMDE_FLOAT64_C( 758.79), SIMDE_FLOAT64_C( 343.48), SIMDE_FLOAT64_C( -797.92), SIMDE_FLOAT64_C( -525.83), SIMDE_FLOAT64_C( -822.65), SIMDE_FLOAT64_C( 655.67)), UINT8_C(145), simde_mm512_set_pd(SIMDE_FLOAT64_C( 331.34), SIMDE_FLOAT64_C( 462.95), SIMDE_FLOAT64_C( -178.99), SIMDE_FLOAT64_C( 324.62), SIMDE_FLOAT64_C( -874.31), SIMDE_FLOAT64_C( -328.54), SIMDE_FLOAT64_C( -192.31), SIMDE_FLOAT64_C( 561.36)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.55), SIMDE_FLOAT64_C( -738.19), SIMDE_FLOAT64_C( 758.79), SIMDE_FLOAT64_C( -0.71), SIMDE_FLOAT64_C( -797.92), SIMDE_FLOAT64_C( -525.83), SIMDE_FLOAT64_C( -822.65), SIMDE_FLOAT64_C( 0.39)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -756.42), SIMDE_FLOAT64_C( 27.25), SIMDE_FLOAT64_C( 690.12), SIMDE_FLOAT64_C( -21.09), SIMDE_FLOAT64_C( -448.89), SIMDE_FLOAT64_C( 505.79), SIMDE_FLOAT64_C( 831.02), SIMDE_FLOAT64_C( 977.36)), UINT8_C( 75), simde_mm512_set_pd(SIMDE_FLOAT64_C( 977.49), SIMDE_FLOAT64_C( 424.81), SIMDE_FLOAT64_C( -95.15), SIMDE_FLOAT64_C( 840.65), SIMDE_FLOAT64_C( -591.56), SIMDE_FLOAT64_C( 731.49), SIMDE_FLOAT64_C( 623.70), SIMDE_FLOAT64_C( 140.67)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -756.42), SIMDE_FLOAT64_C( 2.13), SIMDE_FLOAT64_C( 690.12), SIMDE_FLOAT64_C( -21.09), SIMDE_FLOAT64_C( -1.26), SIMDE_FLOAT64_C( 505.79), SIMDE_FLOAT64_C( 9.06), SIMDE_FLOAT64_C( -0.82)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 394.67), SIMDE_FLOAT64_C( -304.73), SIMDE_FLOAT64_C( -696.69), SIMDE_FLOAT64_C( 822.06), SIMDE_FLOAT64_C( -997.63), SIMDE_FLOAT64_C( 923.64), SIMDE_FLOAT64_C( -768.12), SIMDE_FLOAT64_C( -67.64)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 510.85), SIMDE_FLOAT64_C( 14.34), SIMDE_FLOAT64_C( 916.26), SIMDE_FLOAT64_C( -769.09), SIMDE_FLOAT64_C( -573.81), SIMDE_FLOAT64_C( -337.60), SIMDE_FLOAT64_C( 293.64), SIMDE_FLOAT64_C( -576.22)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 394.67), SIMDE_FLOAT64_C( 0.26), SIMDE_FLOAT64_C( -696.69), SIMDE_FLOAT64_C( -1.15), SIMDE_FLOAT64_C( -0.67), SIMDE_FLOAT64_C( 0.41), SIMDE_FLOAT64_C( -768.12), SIMDE_FLOAT64_C( -0.73)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 475.51), SIMDE_FLOAT64_C( 936.65), SIMDE_FLOAT64_C( -348.70), SIMDE_FLOAT64_C( -438.19), SIMDE_FLOAT64_C( -752.43), SIMDE_FLOAT64_C( 932.66), SIMDE_FLOAT64_C( -327.22), SIMDE_FLOAT64_C( -182.45)), UINT8_C(213), simde_mm512_set_pd(SIMDE_FLOAT64_C( -775.04), SIMDE_FLOAT64_C( 440.64), SIMDE_FLOAT64_C( 897.27), SIMDE_FLOAT64_C( -197.89), SIMDE_FLOAT64_C( -359.76), SIMDE_FLOAT64_C( -33.67), SIMDE_FLOAT64_C( 7.27), SIMDE_FLOAT64_C( -125.20)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -1.43), SIMDE_FLOAT64_C( 6.07), SIMDE_FLOAT64_C( -348.70), SIMDE_FLOAT64_C( -0.32), SIMDE_FLOAT64_C( -752.43), SIMDE_FLOAT64_C( -0.67), SIMDE_FLOAT64_C( -327.22), SIMDE_FLOAT64_C( 1.42)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mask_tand_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_trunc_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( -12.21), SIMDE_FLOAT32_C( -120.46), SIMDE_FLOAT32_C( 116.11), SIMDE_FLOAT32_C( -957.73) }, { SIMDE_FLOAT32_C( -12.00), SIMDE_FLOAT32_C( -120.00), SIMDE_FLOAT32_C( 116.00), SIMDE_FLOAT32_C( -957.00) } }, { { SIMDE_FLOAT32_C( -970.43), SIMDE_FLOAT32_C( 73.72), SIMDE_FLOAT32_C( 741.23), SIMDE_FLOAT32_C( -161.72) }, { SIMDE_FLOAT32_C( -970.00), SIMDE_FLOAT32_C( 73.00), SIMDE_FLOAT32_C( 741.00), SIMDE_FLOAT32_C( -161.00) } }, { { SIMDE_FLOAT32_C( -669.85), SIMDE_FLOAT32_C( 861.65), SIMDE_FLOAT32_C( 481.06), SIMDE_FLOAT32_C( -607.16) }, { SIMDE_FLOAT32_C( -669.00), SIMDE_FLOAT32_C( 861.00), SIMDE_FLOAT32_C( 481.00), SIMDE_FLOAT32_C( -607.00) } }, { { SIMDE_FLOAT32_C( 227.64), SIMDE_FLOAT32_C( -106.69), SIMDE_FLOAT32_C( -76.28), SIMDE_FLOAT32_C( 195.74) }, { SIMDE_FLOAT32_C( 227.00), SIMDE_FLOAT32_C( -106.00), SIMDE_FLOAT32_C( -76.00), SIMDE_FLOAT32_C( 195.00) } }, { { SIMDE_FLOAT32_C( -755.50), SIMDE_FLOAT32_C( -618.75), SIMDE_FLOAT32_C( -293.56), SIMDE_FLOAT32_C( -686.30) }, { SIMDE_FLOAT32_C( -755.00), SIMDE_FLOAT32_C( -618.00), SIMDE_FLOAT32_C( -293.00), SIMDE_FLOAT32_C( -686.00) } }, { { SIMDE_FLOAT32_C( -454.44), SIMDE_FLOAT32_C( -493.17), SIMDE_FLOAT32_C( 45.88), SIMDE_FLOAT32_C( -307.36) }, { SIMDE_FLOAT32_C( -454.00), SIMDE_FLOAT32_C( -493.00), SIMDE_FLOAT32_C( 45.00), SIMDE_FLOAT32_C( -307.00) } }, { { SIMDE_FLOAT32_C( -593.72), SIMDE_FLOAT32_C( -346.10), SIMDE_FLOAT32_C( -356.52), SIMDE_FLOAT32_C( -727.29) }, { SIMDE_FLOAT32_C( -593.00), SIMDE_FLOAT32_C( -346.00), SIMDE_FLOAT32_C( -356.00), SIMDE_FLOAT32_C( -727.00) } }, { { SIMDE_FLOAT32_C( 304.91), SIMDE_FLOAT32_C( 961.56), SIMDE_FLOAT32_C( 582.51), SIMDE_FLOAT32_C( -707.29) }, { SIMDE_FLOAT32_C( 304.00), SIMDE_FLOAT32_C( 961.00), SIMDE_FLOAT32_C( 582.00), SIMDE_FLOAT32_C( -707.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_trunc_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm_trunc_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( -536.40), SIMDE_FLOAT64_C( -763.02) }, { SIMDE_FLOAT64_C( -536.00), SIMDE_FLOAT64_C( -763.00) } }, { { SIMDE_FLOAT64_C( -999.42), SIMDE_FLOAT64_C( -310.98) }, { SIMDE_FLOAT64_C( -999.00), SIMDE_FLOAT64_C( -310.00) } }, { { SIMDE_FLOAT64_C( -951.25), SIMDE_FLOAT64_C( 277.33) }, { SIMDE_FLOAT64_C( -951.00), SIMDE_FLOAT64_C( 277.00) } }, { { SIMDE_FLOAT64_C( -98.58), SIMDE_FLOAT64_C( -936.47) }, { SIMDE_FLOAT64_C( -98.00), SIMDE_FLOAT64_C( -936.00) } }, { { SIMDE_FLOAT64_C( -124.20), SIMDE_FLOAT64_C( -990.68) }, { SIMDE_FLOAT64_C( -124.00), SIMDE_FLOAT64_C( -990.00) } }, { { SIMDE_FLOAT64_C( -319.44), SIMDE_FLOAT64_C( 434.58) }, { SIMDE_FLOAT64_C( -319.00), SIMDE_FLOAT64_C( 434.00) } }, { { SIMDE_FLOAT64_C( 209.02), SIMDE_FLOAT64_C( 196.07) }, { SIMDE_FLOAT64_C( 209.00), SIMDE_FLOAT64_C( 196.00) } }, { { SIMDE_FLOAT64_C( -740.77), SIMDE_FLOAT64_C( 179.41) }, { SIMDE_FLOAT64_C( -740.00), SIMDE_FLOAT64_C( 179.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d r = simde_mm_trunc_pd(a); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_trunc_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( -239.01), SIMDE_FLOAT32_C( -492.80), SIMDE_FLOAT32_C( -937.05), SIMDE_FLOAT32_C( -286.30), SIMDE_FLOAT32_C( 826.89), SIMDE_FLOAT32_C( 311.87), SIMDE_FLOAT32_C( -290.83), SIMDE_FLOAT32_C( 155.81) }, { SIMDE_FLOAT32_C( -239.00), SIMDE_FLOAT32_C( -492.00), SIMDE_FLOAT32_C( -937.00), SIMDE_FLOAT32_C( -286.00), SIMDE_FLOAT32_C( 826.00), SIMDE_FLOAT32_C( 311.00), SIMDE_FLOAT32_C( -290.00), SIMDE_FLOAT32_C( 155.00) } }, { { SIMDE_FLOAT32_C( 497.98), SIMDE_FLOAT32_C( 770.36), SIMDE_FLOAT32_C( -368.92), SIMDE_FLOAT32_C( -362.61), SIMDE_FLOAT32_C( -693.36), SIMDE_FLOAT32_C( -206.15), SIMDE_FLOAT32_C( -571.56), SIMDE_FLOAT32_C( -305.34) }, { SIMDE_FLOAT32_C( 497.00), SIMDE_FLOAT32_C( 770.00), SIMDE_FLOAT32_C( -368.00), SIMDE_FLOAT32_C( -362.00), SIMDE_FLOAT32_C( -693.00), SIMDE_FLOAT32_C( -206.00), SIMDE_FLOAT32_C( -571.00), SIMDE_FLOAT32_C( -305.00) } }, { { SIMDE_FLOAT32_C( -237.16), SIMDE_FLOAT32_C( 968.44), SIMDE_FLOAT32_C( -77.70), SIMDE_FLOAT32_C( 170.55), SIMDE_FLOAT32_C( -930.56), SIMDE_FLOAT32_C( 755.06), SIMDE_FLOAT32_C( 78.43), SIMDE_FLOAT32_C( -634.89) }, { SIMDE_FLOAT32_C( -237.00), SIMDE_FLOAT32_C( 968.00), SIMDE_FLOAT32_C( -77.00), SIMDE_FLOAT32_C( 170.00), SIMDE_FLOAT32_C( -930.00), SIMDE_FLOAT32_C( 755.00), SIMDE_FLOAT32_C( 78.00), SIMDE_FLOAT32_C( -634.00) } }, { { SIMDE_FLOAT32_C( 107.17), SIMDE_FLOAT32_C( 191.02), SIMDE_FLOAT32_C( -424.61), SIMDE_FLOAT32_C( -603.58), SIMDE_FLOAT32_C( -501.82), SIMDE_FLOAT32_C( -855.61), SIMDE_FLOAT32_C( 927.91), SIMDE_FLOAT32_C( 259.17) }, { SIMDE_FLOAT32_C( 107.00), SIMDE_FLOAT32_C( 191.00), SIMDE_FLOAT32_C( -424.00), SIMDE_FLOAT32_C( -603.00), SIMDE_FLOAT32_C( -501.00), SIMDE_FLOAT32_C( -855.00), SIMDE_FLOAT32_C( 927.00), SIMDE_FLOAT32_C( 259.00) } }, { { SIMDE_FLOAT32_C( -348.41), SIMDE_FLOAT32_C( 990.86), SIMDE_FLOAT32_C( 972.87), SIMDE_FLOAT32_C( -521.52), SIMDE_FLOAT32_C( 302.73), SIMDE_FLOAT32_C( -317.96), SIMDE_FLOAT32_C( 634.29), SIMDE_FLOAT32_C( -199.28) }, { SIMDE_FLOAT32_C( -348.00), SIMDE_FLOAT32_C( 990.00), SIMDE_FLOAT32_C( 972.00), SIMDE_FLOAT32_C( -521.00), SIMDE_FLOAT32_C( 302.00), SIMDE_FLOAT32_C( -317.00), SIMDE_FLOAT32_C( 634.00), SIMDE_FLOAT32_C( -199.00) } }, { { SIMDE_FLOAT32_C( -547.60), SIMDE_FLOAT32_C( -734.63), SIMDE_FLOAT32_C( 438.11), SIMDE_FLOAT32_C( -240.96), SIMDE_FLOAT32_C( 59.22), SIMDE_FLOAT32_C( 866.55), SIMDE_FLOAT32_C( 453.70), SIMDE_FLOAT32_C( 822.06) }, { SIMDE_FLOAT32_C( -547.00), SIMDE_FLOAT32_C( -734.00), SIMDE_FLOAT32_C( 438.00), SIMDE_FLOAT32_C( -240.00), SIMDE_FLOAT32_C( 59.00), SIMDE_FLOAT32_C( 866.00), SIMDE_FLOAT32_C( 453.00), SIMDE_FLOAT32_C( 822.00) } }, { { SIMDE_FLOAT32_C( 834.99), SIMDE_FLOAT32_C( -624.00), SIMDE_FLOAT32_C( -7.39), SIMDE_FLOAT32_C( 904.43), SIMDE_FLOAT32_C( -868.94), SIMDE_FLOAT32_C( -928.96), SIMDE_FLOAT32_C( -730.46), SIMDE_FLOAT32_C( 238.23) }, { SIMDE_FLOAT32_C( 834.00), SIMDE_FLOAT32_C( -624.00), SIMDE_FLOAT32_C( -7.00), SIMDE_FLOAT32_C( 904.00), SIMDE_FLOAT32_C( -868.00), SIMDE_FLOAT32_C( -928.00), SIMDE_FLOAT32_C( -730.00), SIMDE_FLOAT32_C( 238.00) } }, { { SIMDE_FLOAT32_C( 262.05), SIMDE_FLOAT32_C( -155.07), SIMDE_FLOAT32_C( 634.65), SIMDE_FLOAT32_C( 760.24), SIMDE_FLOAT32_C( -10.68), SIMDE_FLOAT32_C( 562.56), SIMDE_FLOAT32_C( 19.41), SIMDE_FLOAT32_C( 640.92) }, { SIMDE_FLOAT32_C( 262.00), SIMDE_FLOAT32_C( -155.00), SIMDE_FLOAT32_C( 634.00), SIMDE_FLOAT32_C( 760.00), SIMDE_FLOAT32_C( -10.00), SIMDE_FLOAT32_C( 562.00), SIMDE_FLOAT32_C( 19.00), SIMDE_FLOAT32_C( 640.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_trunc_ps(a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm256_trunc_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( 693.29), SIMDE_FLOAT64_C( 980.27), SIMDE_FLOAT64_C( -292.17), SIMDE_FLOAT64_C( -318.62) }, { SIMDE_FLOAT64_C( 693.00), SIMDE_FLOAT64_C( 980.00), SIMDE_FLOAT64_C( -292.00), SIMDE_FLOAT64_C( -318.00) } }, { { SIMDE_FLOAT64_C( -733.59), SIMDE_FLOAT64_C( -256.43), SIMDE_FLOAT64_C( 726.81), SIMDE_FLOAT64_C( 443.36) }, { SIMDE_FLOAT64_C( -733.00), SIMDE_FLOAT64_C( -256.00), SIMDE_FLOAT64_C( 726.00), SIMDE_FLOAT64_C( 443.00) } }, { { SIMDE_FLOAT64_C( -589.23), SIMDE_FLOAT64_C( -428.07), SIMDE_FLOAT64_C( -734.42), SIMDE_FLOAT64_C( 315.59) }, { SIMDE_FLOAT64_C( -589.00), SIMDE_FLOAT64_C( -428.00), SIMDE_FLOAT64_C( -734.00), SIMDE_FLOAT64_C( 315.00) } }, { { SIMDE_FLOAT64_C( 286.91), SIMDE_FLOAT64_C( -276.33), SIMDE_FLOAT64_C( -306.67), SIMDE_FLOAT64_C( -257.37) }, { SIMDE_FLOAT64_C( 286.00), SIMDE_FLOAT64_C( -276.00), SIMDE_FLOAT64_C( -306.00), SIMDE_FLOAT64_C( -257.00) } }, { { SIMDE_FLOAT64_C( -92.17), SIMDE_FLOAT64_C( -253.48), SIMDE_FLOAT64_C( 663.58), SIMDE_FLOAT64_C( -246.72) }, { SIMDE_FLOAT64_C( -92.00), SIMDE_FLOAT64_C( -253.00), SIMDE_FLOAT64_C( 663.00), SIMDE_FLOAT64_C( -246.00) } }, { { SIMDE_FLOAT64_C( -825.67), SIMDE_FLOAT64_C( -678.59), SIMDE_FLOAT64_C( 803.95), SIMDE_FLOAT64_C( 565.59) }, { SIMDE_FLOAT64_C( -825.00), SIMDE_FLOAT64_C( -678.00), SIMDE_FLOAT64_C( 803.00), SIMDE_FLOAT64_C( 565.00) } }, { { SIMDE_FLOAT64_C( -428.00), SIMDE_FLOAT64_C( -167.27), SIMDE_FLOAT64_C( 718.24), SIMDE_FLOAT64_C( -22.78) }, { SIMDE_FLOAT64_C( -428.00), SIMDE_FLOAT64_C( -167.00), SIMDE_FLOAT64_C( 718.00), SIMDE_FLOAT64_C( -22.00) } }, { { SIMDE_FLOAT64_C( -376.65), SIMDE_FLOAT64_C( -190.00), SIMDE_FLOAT64_C( -12.78), SIMDE_FLOAT64_C( -683.35) }, { SIMDE_FLOAT64_C( -376.00), SIMDE_FLOAT64_C( -190.00), SIMDE_FLOAT64_C( -12.00), SIMDE_FLOAT64_C( -683.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d r = simde_mm256_trunc_pd(a); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_trunc_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( 606.57), SIMDE_FLOAT32_C( 426.10), SIMDE_FLOAT32_C( -271.64), SIMDE_FLOAT32_C( -693.93), SIMDE_FLOAT32_C( 123.39), SIMDE_FLOAT32_C( -323.73), SIMDE_FLOAT32_C( -823.48), SIMDE_FLOAT32_C( 154.72), SIMDE_FLOAT32_C( 215.73), SIMDE_FLOAT32_C( 870.22), SIMDE_FLOAT32_C( -205.21), SIMDE_FLOAT32_C( 262.07), SIMDE_FLOAT32_C( 173.72), SIMDE_FLOAT32_C( 310.35), SIMDE_FLOAT32_C( -516.54), SIMDE_FLOAT32_C( -500.11) }, { SIMDE_FLOAT32_C( 606.00), SIMDE_FLOAT32_C( 426.00), SIMDE_FLOAT32_C( -271.00), SIMDE_FLOAT32_C( -693.00), SIMDE_FLOAT32_C( 123.00), SIMDE_FLOAT32_C( -323.00), SIMDE_FLOAT32_C( -823.00), SIMDE_FLOAT32_C( 154.00), SIMDE_FLOAT32_C( 215.00), SIMDE_FLOAT32_C( 870.00), SIMDE_FLOAT32_C( -205.00), SIMDE_FLOAT32_C( 262.00), SIMDE_FLOAT32_C( 173.00), SIMDE_FLOAT32_C( 310.00), SIMDE_FLOAT32_C( -516.00), SIMDE_FLOAT32_C( -500.00) } }, { { SIMDE_FLOAT32_C( -175.79), SIMDE_FLOAT32_C( -258.58), SIMDE_FLOAT32_C( -46.96), SIMDE_FLOAT32_C( 515.02), SIMDE_FLOAT32_C( 317.58), SIMDE_FLOAT32_C( 852.75), SIMDE_FLOAT32_C( 404.36), SIMDE_FLOAT32_C( 87.35), SIMDE_FLOAT32_C( -977.95), SIMDE_FLOAT32_C( -929.41), SIMDE_FLOAT32_C( 560.67), SIMDE_FLOAT32_C( 89.12), SIMDE_FLOAT32_C( 773.32), SIMDE_FLOAT32_C( 918.64), SIMDE_FLOAT32_C( 751.41), SIMDE_FLOAT32_C( 379.89) }, { SIMDE_FLOAT32_C( -175.00), SIMDE_FLOAT32_C( -258.00), SIMDE_FLOAT32_C( -46.00), SIMDE_FLOAT32_C( 515.00), SIMDE_FLOAT32_C( 317.00), SIMDE_FLOAT32_C( 852.00), SIMDE_FLOAT32_C( 404.00), SIMDE_FLOAT32_C( 87.00), SIMDE_FLOAT32_C( -977.00), SIMDE_FLOAT32_C( -929.00), SIMDE_FLOAT32_C( 560.00), SIMDE_FLOAT32_C( 89.00), SIMDE_FLOAT32_C( 773.00), SIMDE_FLOAT32_C( 918.00), SIMDE_FLOAT32_C( 751.00), SIMDE_FLOAT32_C( 379.00) } }, { { SIMDE_FLOAT32_C( 344.74), SIMDE_FLOAT32_C( -520.24), SIMDE_FLOAT32_C( 685.96), SIMDE_FLOAT32_C( -531.87), SIMDE_FLOAT32_C( 156.03), SIMDE_FLOAT32_C( 862.48), SIMDE_FLOAT32_C( 622.85), SIMDE_FLOAT32_C( -628.23), SIMDE_FLOAT32_C( 732.70), SIMDE_FLOAT32_C( -582.36), SIMDE_FLOAT32_C( 633.84), SIMDE_FLOAT32_C( -93.59), SIMDE_FLOAT32_C( 728.00), SIMDE_FLOAT32_C( -882.70), SIMDE_FLOAT32_C( 406.31), SIMDE_FLOAT32_C( -447.79) }, { SIMDE_FLOAT32_C( 344.00), SIMDE_FLOAT32_C( -520.00), SIMDE_FLOAT32_C( 685.00), SIMDE_FLOAT32_C( -531.00), SIMDE_FLOAT32_C( 156.00), SIMDE_FLOAT32_C( 862.00), SIMDE_FLOAT32_C( 622.00), SIMDE_FLOAT32_C( -628.00), SIMDE_FLOAT32_C( 732.00), SIMDE_FLOAT32_C( -582.00), SIMDE_FLOAT32_C( 633.00), SIMDE_FLOAT32_C( -93.00), SIMDE_FLOAT32_C( 728.00), SIMDE_FLOAT32_C( -882.00), SIMDE_FLOAT32_C( 406.00), SIMDE_FLOAT32_C( -447.00) } }, { { SIMDE_FLOAT32_C( -141.28), SIMDE_FLOAT32_C( -640.65), SIMDE_FLOAT32_C( -932.78), SIMDE_FLOAT32_C( -823.70), SIMDE_FLOAT32_C( -787.91), SIMDE_FLOAT32_C( 471.59), SIMDE_FLOAT32_C( 263.65), SIMDE_FLOAT32_C( -765.85), SIMDE_FLOAT32_C( 542.17), SIMDE_FLOAT32_C( -175.67), SIMDE_FLOAT32_C( 323.27), SIMDE_FLOAT32_C( 315.49), SIMDE_FLOAT32_C( -257.03), SIMDE_FLOAT32_C( 74.67), SIMDE_FLOAT32_C( -304.62), SIMDE_FLOAT32_C( -912.29) }, { SIMDE_FLOAT32_C( -141.00), SIMDE_FLOAT32_C( -640.00), SIMDE_FLOAT32_C( -932.00), SIMDE_FLOAT32_C( -823.00), SIMDE_FLOAT32_C( -787.00), SIMDE_FLOAT32_C( 471.00), SIMDE_FLOAT32_C( 263.00), SIMDE_FLOAT32_C( -765.00), SIMDE_FLOAT32_C( 542.00), SIMDE_FLOAT32_C( -175.00), SIMDE_FLOAT32_C( 323.00), SIMDE_FLOAT32_C( 315.00), SIMDE_FLOAT32_C( -257.00), SIMDE_FLOAT32_C( 74.00), SIMDE_FLOAT32_C( -304.00), SIMDE_FLOAT32_C( -912.00) } }, { { SIMDE_FLOAT32_C( 554.43), SIMDE_FLOAT32_C( -618.67), SIMDE_FLOAT32_C( -444.16), SIMDE_FLOAT32_C( -289.53), SIMDE_FLOAT32_C( -756.19), SIMDE_FLOAT32_C( -821.31), SIMDE_FLOAT32_C( 82.23), SIMDE_FLOAT32_C( 976.51), SIMDE_FLOAT32_C( -403.66), SIMDE_FLOAT32_C( -283.93), SIMDE_FLOAT32_C( -117.08), SIMDE_FLOAT32_C( -675.67), SIMDE_FLOAT32_C( -166.63), SIMDE_FLOAT32_C( -710.77), SIMDE_FLOAT32_C( -123.46), SIMDE_FLOAT32_C( 692.09) }, { SIMDE_FLOAT32_C( 554.00), SIMDE_FLOAT32_C( -618.00), SIMDE_FLOAT32_C( -444.00), SIMDE_FLOAT32_C( -289.00), SIMDE_FLOAT32_C( -756.00), SIMDE_FLOAT32_C( -821.00), SIMDE_FLOAT32_C( 82.00), SIMDE_FLOAT32_C( 976.00), SIMDE_FLOAT32_C( -403.00), SIMDE_FLOAT32_C( -283.00), SIMDE_FLOAT32_C( -117.00), SIMDE_FLOAT32_C( -675.00), SIMDE_FLOAT32_C( -166.00), SIMDE_FLOAT32_C( -710.00), SIMDE_FLOAT32_C( -123.00), SIMDE_FLOAT32_C( 692.00) } }, { { SIMDE_FLOAT32_C( -351.43), SIMDE_FLOAT32_C( -56.24), SIMDE_FLOAT32_C( 868.39), SIMDE_FLOAT32_C( -139.33), SIMDE_FLOAT32_C( -584.65), SIMDE_FLOAT32_C( 132.04), SIMDE_FLOAT32_C( 94.81), SIMDE_FLOAT32_C( 957.53), SIMDE_FLOAT32_C( 956.37), SIMDE_FLOAT32_C( -581.92), SIMDE_FLOAT32_C( 273.02), SIMDE_FLOAT32_C( -300.66), SIMDE_FLOAT32_C( 492.75), SIMDE_FLOAT32_C( 968.40), SIMDE_FLOAT32_C( -212.96), SIMDE_FLOAT32_C( 47.18) }, { SIMDE_FLOAT32_C( -351.00), SIMDE_FLOAT32_C( -56.00), SIMDE_FLOAT32_C( 868.00), SIMDE_FLOAT32_C( -139.00), SIMDE_FLOAT32_C( -584.00), SIMDE_FLOAT32_C( 132.00), SIMDE_FLOAT32_C( 94.00), SIMDE_FLOAT32_C( 957.00), SIMDE_FLOAT32_C( 956.00), SIMDE_FLOAT32_C( -581.00), SIMDE_FLOAT32_C( 273.00), SIMDE_FLOAT32_C( -300.00), SIMDE_FLOAT32_C( 492.00), SIMDE_FLOAT32_C( 968.00), SIMDE_FLOAT32_C( -212.00), SIMDE_FLOAT32_C( 47.00) } }, { { SIMDE_FLOAT32_C( -650.27), SIMDE_FLOAT32_C( 342.89), SIMDE_FLOAT32_C( 757.65), SIMDE_FLOAT32_C( -406.46), SIMDE_FLOAT32_C( 521.58), SIMDE_FLOAT32_C( -160.12), SIMDE_FLOAT32_C( -429.95), SIMDE_FLOAT32_C( -882.09), SIMDE_FLOAT32_C( 555.95), SIMDE_FLOAT32_C( 452.97), SIMDE_FLOAT32_C( -557.75), SIMDE_FLOAT32_C( -610.67), SIMDE_FLOAT32_C( 742.20), SIMDE_FLOAT32_C( 318.79), SIMDE_FLOAT32_C( -918.58), SIMDE_FLOAT32_C( -609.23) }, { SIMDE_FLOAT32_C( -650.00), SIMDE_FLOAT32_C( 342.00), SIMDE_FLOAT32_C( 757.00), SIMDE_FLOAT32_C( -406.00), SIMDE_FLOAT32_C( 521.00), SIMDE_FLOAT32_C( -160.00), SIMDE_FLOAT32_C( -429.00), SIMDE_FLOAT32_C( -882.00), SIMDE_FLOAT32_C( 555.00), SIMDE_FLOAT32_C( 452.00), SIMDE_FLOAT32_C( -557.00), SIMDE_FLOAT32_C( -610.00), SIMDE_FLOAT32_C( 742.00), SIMDE_FLOAT32_C( 318.00), SIMDE_FLOAT32_C( -918.00), SIMDE_FLOAT32_C( -609.00) } }, { { SIMDE_FLOAT32_C( -737.45), SIMDE_FLOAT32_C( 949.82), SIMDE_FLOAT32_C( 251.44), SIMDE_FLOAT32_C( -322.10), SIMDE_FLOAT32_C( 81.86), SIMDE_FLOAT32_C( -653.75), SIMDE_FLOAT32_C( -364.57), SIMDE_FLOAT32_C( 38.23), SIMDE_FLOAT32_C( -235.67), SIMDE_FLOAT32_C( 908.45), SIMDE_FLOAT32_C( 737.57), SIMDE_FLOAT32_C( -742.92), SIMDE_FLOAT32_C( 876.84), SIMDE_FLOAT32_C( -475.39), SIMDE_FLOAT32_C( 304.27), SIMDE_FLOAT32_C( -773.43) }, { SIMDE_FLOAT32_C( -737.00), SIMDE_FLOAT32_C( 949.00), SIMDE_FLOAT32_C( 251.00), SIMDE_FLOAT32_C( -322.00), SIMDE_FLOAT32_C( 81.00), SIMDE_FLOAT32_C( -653.00), SIMDE_FLOAT32_C( -364.00), SIMDE_FLOAT32_C( 38.00), SIMDE_FLOAT32_C( -235.00), SIMDE_FLOAT32_C( 908.00), SIMDE_FLOAT32_C( 737.00), SIMDE_FLOAT32_C( -742.00), SIMDE_FLOAT32_C( 876.00), SIMDE_FLOAT32_C( -475.00), SIMDE_FLOAT32_C( 304.00), SIMDE_FLOAT32_C( -773.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_trunc_ps(a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_trunc_ps (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float32 src[16]; const simde__mmask8 k; const simde_float32 a[16]; const simde_float32 r[16]; } test_vec[] = { { { SIMDE_FLOAT32_C( -49.77), SIMDE_FLOAT32_C( -686.71), SIMDE_FLOAT32_C( -757.78), SIMDE_FLOAT32_C( 110.09), SIMDE_FLOAT32_C( 324.87), SIMDE_FLOAT32_C( -371.31), SIMDE_FLOAT32_C( 784.70), SIMDE_FLOAT32_C( 832.26), SIMDE_FLOAT32_C( 569.37), SIMDE_FLOAT32_C( 756.19), SIMDE_FLOAT32_C( 204.94), SIMDE_FLOAT32_C( 274.85), SIMDE_FLOAT32_C( -873.98), SIMDE_FLOAT32_C( -346.20), SIMDE_FLOAT32_C( -78.53), SIMDE_FLOAT32_C( -191.48) }, UINT8_C( 44), { SIMDE_FLOAT32_C( -81.77), SIMDE_FLOAT32_C( -137.21), SIMDE_FLOAT32_C( 797.93), SIMDE_FLOAT32_C( -424.41), SIMDE_FLOAT32_C( -278.83), SIMDE_FLOAT32_C( -767.08), SIMDE_FLOAT32_C( -764.79), SIMDE_FLOAT32_C( 76.32), SIMDE_FLOAT32_C( 979.09), SIMDE_FLOAT32_C( -188.68), SIMDE_FLOAT32_C( -648.91), SIMDE_FLOAT32_C( 84.00), SIMDE_FLOAT32_C( -272.96), SIMDE_FLOAT32_C( 552.79), SIMDE_FLOAT32_C( -965.78), SIMDE_FLOAT32_C( 40.34) }, { SIMDE_FLOAT32_C( -49.77), SIMDE_FLOAT32_C( -686.71), SIMDE_FLOAT32_C( 797.00), SIMDE_FLOAT32_C( -424.00), SIMDE_FLOAT32_C( 324.87), SIMDE_FLOAT32_C( -767.00), SIMDE_FLOAT32_C( 784.70), SIMDE_FLOAT32_C( 832.26), SIMDE_FLOAT32_C( 569.37), SIMDE_FLOAT32_C( 756.19), SIMDE_FLOAT32_C( 204.94), SIMDE_FLOAT32_C( 274.85), SIMDE_FLOAT32_C( -873.98), SIMDE_FLOAT32_C( -346.20), SIMDE_FLOAT32_C( -78.53), SIMDE_FLOAT32_C( -191.48) } }, { { SIMDE_FLOAT32_C( 795.01), SIMDE_FLOAT32_C( 144.31), SIMDE_FLOAT32_C( -634.80), SIMDE_FLOAT32_C( -576.30), SIMDE_FLOAT32_C( -71.00), SIMDE_FLOAT32_C( -802.54), SIMDE_FLOAT32_C( 993.08), SIMDE_FLOAT32_C( -314.81), SIMDE_FLOAT32_C( 402.40), SIMDE_FLOAT32_C( 267.93), SIMDE_FLOAT32_C( -188.79), SIMDE_FLOAT32_C( -943.80), SIMDE_FLOAT32_C( -810.60), SIMDE_FLOAT32_C( 619.74), SIMDE_FLOAT32_C( 857.90), SIMDE_FLOAT32_C( 107.62) }, UINT8_C(232), { SIMDE_FLOAT32_C( 655.83), SIMDE_FLOAT32_C( 683.21), SIMDE_FLOAT32_C( 203.69), SIMDE_FLOAT32_C( 888.75), SIMDE_FLOAT32_C( 918.42), SIMDE_FLOAT32_C( -720.00), SIMDE_FLOAT32_C( 867.84), SIMDE_FLOAT32_C( -270.26), SIMDE_FLOAT32_C( -368.90), SIMDE_FLOAT32_C( -48.16), SIMDE_FLOAT32_C( 456.78), SIMDE_FLOAT32_C( -816.11), SIMDE_FLOAT32_C( -13.93), SIMDE_FLOAT32_C( -502.88), SIMDE_FLOAT32_C( 978.90), SIMDE_FLOAT32_C( -869.63) }, { SIMDE_FLOAT32_C( 795.01), SIMDE_FLOAT32_C( 144.31), SIMDE_FLOAT32_C( -634.80), SIMDE_FLOAT32_C( 888.00), SIMDE_FLOAT32_C( -71.00), SIMDE_FLOAT32_C( -720.00), SIMDE_FLOAT32_C( 867.00), SIMDE_FLOAT32_C( -270.00), SIMDE_FLOAT32_C( 402.40), SIMDE_FLOAT32_C( 267.93), SIMDE_FLOAT32_C( -188.79), SIMDE_FLOAT32_C( -943.80), SIMDE_FLOAT32_C( -810.60), SIMDE_FLOAT32_C( 619.74), SIMDE_FLOAT32_C( 857.90), SIMDE_FLOAT32_C( 107.62) } }, { { SIMDE_FLOAT32_C( -137.68), SIMDE_FLOAT32_C( -597.40), SIMDE_FLOAT32_C( 59.38), SIMDE_FLOAT32_C( 59.79), SIMDE_FLOAT32_C( -604.32), SIMDE_FLOAT32_C( 744.57), SIMDE_FLOAT32_C( -537.81), SIMDE_FLOAT32_C( 663.60), SIMDE_FLOAT32_C( -444.21), SIMDE_FLOAT32_C( -481.61), SIMDE_FLOAT32_C( 853.00), SIMDE_FLOAT32_C( -824.48), SIMDE_FLOAT32_C( -623.71), SIMDE_FLOAT32_C( -39.38), SIMDE_FLOAT32_C( -341.96), SIMDE_FLOAT32_C( -967.88) }, UINT8_C( 37), { SIMDE_FLOAT32_C( 861.73), SIMDE_FLOAT32_C( 920.87), SIMDE_FLOAT32_C( -437.74), SIMDE_FLOAT32_C( -858.26), SIMDE_FLOAT32_C( 788.71), SIMDE_FLOAT32_C( 291.99), SIMDE_FLOAT32_C( -227.16), SIMDE_FLOAT32_C( -259.44), SIMDE_FLOAT32_C( -251.22), SIMDE_FLOAT32_C( -43.28), SIMDE_FLOAT32_C( 726.62), SIMDE_FLOAT32_C( 245.90), SIMDE_FLOAT32_C( -64.38), SIMDE_FLOAT32_C( 857.00), SIMDE_FLOAT32_C( -891.78), SIMDE_FLOAT32_C( 338.22) }, { SIMDE_FLOAT32_C( 861.00), SIMDE_FLOAT32_C( -597.40), SIMDE_FLOAT32_C( -437.00), SIMDE_FLOAT32_C( 59.79), SIMDE_FLOAT32_C( -604.32), SIMDE_FLOAT32_C( 291.00), SIMDE_FLOAT32_C( -537.81), SIMDE_FLOAT32_C( 663.60), SIMDE_FLOAT32_C( -444.21), SIMDE_FLOAT32_C( -481.61), SIMDE_FLOAT32_C( 853.00), SIMDE_FLOAT32_C( -824.48), SIMDE_FLOAT32_C( -623.71), SIMDE_FLOAT32_C( -39.38), SIMDE_FLOAT32_C( -341.96), SIMDE_FLOAT32_C( -967.88) } }, { { SIMDE_FLOAT32_C( -83.63), SIMDE_FLOAT32_C( 168.01), SIMDE_FLOAT32_C( 733.90), SIMDE_FLOAT32_C( -339.05), SIMDE_FLOAT32_C( 630.19), SIMDE_FLOAT32_C( 397.50), SIMDE_FLOAT32_C( 216.73), SIMDE_FLOAT32_C( -851.42), SIMDE_FLOAT32_C( 250.50), SIMDE_FLOAT32_C( 392.25), SIMDE_FLOAT32_C( -475.13), SIMDE_FLOAT32_C( -788.88), SIMDE_FLOAT32_C( -949.70), SIMDE_FLOAT32_C( -443.01), SIMDE_FLOAT32_C( -145.04), SIMDE_FLOAT32_C( 912.03) }, UINT8_C(240), { SIMDE_FLOAT32_C( 417.21), SIMDE_FLOAT32_C( -946.23), SIMDE_FLOAT32_C( -733.43), SIMDE_FLOAT32_C( -290.79), SIMDE_FLOAT32_C( -173.40), SIMDE_FLOAT32_C( 7.13), SIMDE_FLOAT32_C( 457.98), SIMDE_FLOAT32_C( 783.33), SIMDE_FLOAT32_C( -266.25), SIMDE_FLOAT32_C( -296.12), SIMDE_FLOAT32_C( -281.05), SIMDE_FLOAT32_C( -409.26), SIMDE_FLOAT32_C( -187.90), SIMDE_FLOAT32_C( -942.83), SIMDE_FLOAT32_C( 507.12), SIMDE_FLOAT32_C( 980.11) }, { SIMDE_FLOAT32_C( -83.63), SIMDE_FLOAT32_C( 168.01), SIMDE_FLOAT32_C( 733.90), SIMDE_FLOAT32_C( -339.05), SIMDE_FLOAT32_C( -173.00), SIMDE_FLOAT32_C( 7.00), SIMDE_FLOAT32_C( 457.00), SIMDE_FLOAT32_C( 783.00), SIMDE_FLOAT32_C( 250.50), SIMDE_FLOAT32_C( 392.25), SIMDE_FLOAT32_C( -475.13), SIMDE_FLOAT32_C( -788.88), SIMDE_FLOAT32_C( -949.70), SIMDE_FLOAT32_C( -443.01), SIMDE_FLOAT32_C( -145.04), SIMDE_FLOAT32_C( 912.03) } }, { { SIMDE_FLOAT32_C( 791.07), SIMDE_FLOAT32_C( -831.94), SIMDE_FLOAT32_C( 610.30), SIMDE_FLOAT32_C( 188.58), SIMDE_FLOAT32_C( 384.80), SIMDE_FLOAT32_C( 758.88), SIMDE_FLOAT32_C( -560.92), SIMDE_FLOAT32_C( -222.95), SIMDE_FLOAT32_C( -716.25), SIMDE_FLOAT32_C( -349.80), SIMDE_FLOAT32_C( -172.65), SIMDE_FLOAT32_C( -159.27), SIMDE_FLOAT32_C( 505.16), SIMDE_FLOAT32_C( -260.62), SIMDE_FLOAT32_C( 318.59), SIMDE_FLOAT32_C( -77.63) }, UINT8_C( 96), { SIMDE_FLOAT32_C( 585.16), SIMDE_FLOAT32_C( 631.57), SIMDE_FLOAT32_C( 619.75), SIMDE_FLOAT32_C( -407.71), SIMDE_FLOAT32_C( 89.55), SIMDE_FLOAT32_C( 403.08), SIMDE_FLOAT32_C( 326.04), SIMDE_FLOAT32_C( 793.43), SIMDE_FLOAT32_C( -877.97), SIMDE_FLOAT32_C( 916.78), SIMDE_FLOAT32_C( -394.47), SIMDE_FLOAT32_C( -820.80), SIMDE_FLOAT32_C( 423.90), SIMDE_FLOAT32_C( -414.36), SIMDE_FLOAT32_C( 970.28), SIMDE_FLOAT32_C( 591.96) }, { SIMDE_FLOAT32_C( 791.07), SIMDE_FLOAT32_C( -831.94), SIMDE_FLOAT32_C( 610.30), SIMDE_FLOAT32_C( 188.58), SIMDE_FLOAT32_C( 384.80), SIMDE_FLOAT32_C( 403.00), SIMDE_FLOAT32_C( 326.00), SIMDE_FLOAT32_C( -222.95), SIMDE_FLOAT32_C( -716.25), SIMDE_FLOAT32_C( -349.80), SIMDE_FLOAT32_C( -172.65), SIMDE_FLOAT32_C( -159.27), SIMDE_FLOAT32_C( 505.16), SIMDE_FLOAT32_C( -260.62), SIMDE_FLOAT32_C( 318.59), SIMDE_FLOAT32_C( -77.63) } }, { { SIMDE_FLOAT32_C( -804.06), SIMDE_FLOAT32_C( 158.86), SIMDE_FLOAT32_C( -23.24), SIMDE_FLOAT32_C( 954.82), SIMDE_FLOAT32_C( 597.93), SIMDE_FLOAT32_C( 753.81), SIMDE_FLOAT32_C( -761.43), SIMDE_FLOAT32_C( -751.86), SIMDE_FLOAT32_C( -418.84), SIMDE_FLOAT32_C( 79.30), SIMDE_FLOAT32_C( 753.29), SIMDE_FLOAT32_C( 320.53), SIMDE_FLOAT32_C( -602.11), SIMDE_FLOAT32_C( -324.34), SIMDE_FLOAT32_C( -886.32), SIMDE_FLOAT32_C( 983.05) }, UINT8_C(109), { SIMDE_FLOAT32_C( 733.43), SIMDE_FLOAT32_C( -424.66), SIMDE_FLOAT32_C( 396.78), SIMDE_FLOAT32_C( 136.51), SIMDE_FLOAT32_C( 901.37), SIMDE_FLOAT32_C( 190.22), SIMDE_FLOAT32_C( 258.54), SIMDE_FLOAT32_C( 818.15), SIMDE_FLOAT32_C( 795.75), SIMDE_FLOAT32_C( 437.74), SIMDE_FLOAT32_C( 242.05), SIMDE_FLOAT32_C( -618.61), SIMDE_FLOAT32_C( 408.02), SIMDE_FLOAT32_C( -165.99), SIMDE_FLOAT32_C( -422.67), SIMDE_FLOAT32_C( -433.12) }, { SIMDE_FLOAT32_C( 733.00), SIMDE_FLOAT32_C( 158.86), SIMDE_FLOAT32_C( 396.00), SIMDE_FLOAT32_C( 136.00), SIMDE_FLOAT32_C( 597.93), SIMDE_FLOAT32_C( 190.00), SIMDE_FLOAT32_C( 258.00), SIMDE_FLOAT32_C( -751.86), SIMDE_FLOAT32_C( -418.84), SIMDE_FLOAT32_C( 79.30), SIMDE_FLOAT32_C( 753.29), SIMDE_FLOAT32_C( 320.53), SIMDE_FLOAT32_C( -602.11), SIMDE_FLOAT32_C( -324.34), SIMDE_FLOAT32_C( -886.32), SIMDE_FLOAT32_C( 983.05) } }, { { SIMDE_FLOAT32_C( 810.77), SIMDE_FLOAT32_C( -467.85), SIMDE_FLOAT32_C( -835.19), SIMDE_FLOAT32_C( 564.58), SIMDE_FLOAT32_C( -229.28), SIMDE_FLOAT32_C( -587.05), SIMDE_FLOAT32_C( -854.26), SIMDE_FLOAT32_C( 850.02), SIMDE_FLOAT32_C( -833.76), SIMDE_FLOAT32_C( 466.27), SIMDE_FLOAT32_C( -752.09), SIMDE_FLOAT32_C( -158.10), SIMDE_FLOAT32_C( 579.95), SIMDE_FLOAT32_C( -769.04), SIMDE_FLOAT32_C( 149.13), SIMDE_FLOAT32_C( 313.38) }, UINT8_C(125), { SIMDE_FLOAT32_C( -454.09), SIMDE_FLOAT32_C( -550.11), SIMDE_FLOAT32_C( -292.33), SIMDE_FLOAT32_C( 736.13), SIMDE_FLOAT32_C( 708.43), SIMDE_FLOAT32_C( -474.18), SIMDE_FLOAT32_C( 531.88), SIMDE_FLOAT32_C( 146.17), SIMDE_FLOAT32_C( 767.87), SIMDE_FLOAT32_C( 913.26), SIMDE_FLOAT32_C( -445.81), SIMDE_FLOAT32_C( -398.12), SIMDE_FLOAT32_C( -509.41), SIMDE_FLOAT32_C( 121.07), SIMDE_FLOAT32_C( -587.35), SIMDE_FLOAT32_C( 22.74) }, { SIMDE_FLOAT32_C( -454.00), SIMDE_FLOAT32_C( -467.85), SIMDE_FLOAT32_C( -292.00), SIMDE_FLOAT32_C( 736.00), SIMDE_FLOAT32_C( 708.00), SIMDE_FLOAT32_C( -474.00), SIMDE_FLOAT32_C( 531.00), SIMDE_FLOAT32_C( 850.02), SIMDE_FLOAT32_C( -833.76), SIMDE_FLOAT32_C( 466.27), SIMDE_FLOAT32_C( -752.09), SIMDE_FLOAT32_C( -158.10), SIMDE_FLOAT32_C( 579.95), SIMDE_FLOAT32_C( -769.04), SIMDE_FLOAT32_C( 149.13), SIMDE_FLOAT32_C( 313.38) } }, { { SIMDE_FLOAT32_C( 285.88), SIMDE_FLOAT32_C( 977.23), SIMDE_FLOAT32_C( 793.45), SIMDE_FLOAT32_C( 698.82), SIMDE_FLOAT32_C( -877.03), SIMDE_FLOAT32_C( 643.47), SIMDE_FLOAT32_C( 865.06), SIMDE_FLOAT32_C( 589.25), SIMDE_FLOAT32_C( 891.38), SIMDE_FLOAT32_C( -293.04), SIMDE_FLOAT32_C( 169.20), SIMDE_FLOAT32_C( -877.66), SIMDE_FLOAT32_C( 856.08), SIMDE_FLOAT32_C( -517.41), SIMDE_FLOAT32_C( -71.37), SIMDE_FLOAT32_C( -598.01) }, UINT8_C(105), { SIMDE_FLOAT32_C( 636.30), SIMDE_FLOAT32_C( -861.88), SIMDE_FLOAT32_C( -359.09), SIMDE_FLOAT32_C( -837.88), SIMDE_FLOAT32_C( 670.00), SIMDE_FLOAT32_C( 787.08), SIMDE_FLOAT32_C( 929.98), SIMDE_FLOAT32_C( 583.26), SIMDE_FLOAT32_C( -658.72), SIMDE_FLOAT32_C( -468.14), SIMDE_FLOAT32_C( -926.15), SIMDE_FLOAT32_C( 462.35), SIMDE_FLOAT32_C( -55.49), SIMDE_FLOAT32_C( 96.59), SIMDE_FLOAT32_C( -251.77), SIMDE_FLOAT32_C( -78.25) }, { SIMDE_FLOAT32_C( 636.00), SIMDE_FLOAT32_C( 977.23), SIMDE_FLOAT32_C( 793.45), SIMDE_FLOAT32_C( -837.00), SIMDE_FLOAT32_C( -877.03), SIMDE_FLOAT32_C( 787.00), SIMDE_FLOAT32_C( 929.00), SIMDE_FLOAT32_C( 589.25), SIMDE_FLOAT32_C( 891.38), SIMDE_FLOAT32_C( -293.04), SIMDE_FLOAT32_C( 169.20), SIMDE_FLOAT32_C( -877.66), SIMDE_FLOAT32_C( 856.08), SIMDE_FLOAT32_C( -517.41), SIMDE_FLOAT32_C( -71.37), SIMDE_FLOAT32_C( -598.01) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src); simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); simde__m512 r = simde_mm512_mask_trunc_ps(src, test_vec[i].k, a); simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_trunc_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 90.45), SIMDE_FLOAT64_C( 195.98), SIMDE_FLOAT64_C( -83.38), SIMDE_FLOAT64_C( -236.26), SIMDE_FLOAT64_C( -941.16), SIMDE_FLOAT64_C( 125.78), SIMDE_FLOAT64_C( -753.74), SIMDE_FLOAT64_C( -729.24) }, { SIMDE_FLOAT64_C( 90.00), SIMDE_FLOAT64_C( 195.00), SIMDE_FLOAT64_C( -83.00), SIMDE_FLOAT64_C( -236.00), SIMDE_FLOAT64_C( -941.00), SIMDE_FLOAT64_C( 125.00), SIMDE_FLOAT64_C( -753.00), SIMDE_FLOAT64_C( -729.00) } }, { { SIMDE_FLOAT64_C( 663.53), SIMDE_FLOAT64_C( 196.60), SIMDE_FLOAT64_C( -90.58), SIMDE_FLOAT64_C( 229.06), SIMDE_FLOAT64_C( -925.87), SIMDE_FLOAT64_C( -621.28), SIMDE_FLOAT64_C( 631.54), SIMDE_FLOAT64_C( -475.70) }, { SIMDE_FLOAT64_C( 663.00), SIMDE_FLOAT64_C( 196.00), SIMDE_FLOAT64_C( -90.00), SIMDE_FLOAT64_C( 229.00), SIMDE_FLOAT64_C( -925.00), SIMDE_FLOAT64_C( -621.00), SIMDE_FLOAT64_C( 631.00), SIMDE_FLOAT64_C( -475.00) } }, { { SIMDE_FLOAT64_C( 499.40), SIMDE_FLOAT64_C( -577.93), SIMDE_FLOAT64_C( -603.42), SIMDE_FLOAT64_C( -226.68), SIMDE_FLOAT64_C( 674.64), SIMDE_FLOAT64_C( -116.71), SIMDE_FLOAT64_C( 605.38), SIMDE_FLOAT64_C( -749.41) }, { SIMDE_FLOAT64_C( 499.00), SIMDE_FLOAT64_C( -577.00), SIMDE_FLOAT64_C( -603.00), SIMDE_FLOAT64_C( -226.00), SIMDE_FLOAT64_C( 674.00), SIMDE_FLOAT64_C( -116.00), SIMDE_FLOAT64_C( 605.00), SIMDE_FLOAT64_C( -749.00) } }, { { SIMDE_FLOAT64_C( -866.90), SIMDE_FLOAT64_C( 273.08), SIMDE_FLOAT64_C( 910.37), SIMDE_FLOAT64_C( -223.08), SIMDE_FLOAT64_C( 229.45), SIMDE_FLOAT64_C( -919.92), SIMDE_FLOAT64_C( 179.63), SIMDE_FLOAT64_C( -680.10) }, { SIMDE_FLOAT64_C( -866.00), SIMDE_FLOAT64_C( 273.00), SIMDE_FLOAT64_C( 910.00), SIMDE_FLOAT64_C( -223.00), SIMDE_FLOAT64_C( 229.00), SIMDE_FLOAT64_C( -919.00), SIMDE_FLOAT64_C( 179.00), SIMDE_FLOAT64_C( -680.00) } }, { { SIMDE_FLOAT64_C( 276.06), SIMDE_FLOAT64_C( -903.75), SIMDE_FLOAT64_C( 83.64), SIMDE_FLOAT64_C( 334.90), SIMDE_FLOAT64_C( 222.03), SIMDE_FLOAT64_C( 329.90), SIMDE_FLOAT64_C( 605.67), SIMDE_FLOAT64_C( -114.44) }, { SIMDE_FLOAT64_C( 276.00), SIMDE_FLOAT64_C( -903.00), SIMDE_FLOAT64_C( 83.00), SIMDE_FLOAT64_C( 334.00), SIMDE_FLOAT64_C( 222.00), SIMDE_FLOAT64_C( 329.00), SIMDE_FLOAT64_C( 605.00), SIMDE_FLOAT64_C( -114.00) } }, { { SIMDE_FLOAT64_C( -473.49), SIMDE_FLOAT64_C( -484.91), SIMDE_FLOAT64_C( -885.38), SIMDE_FLOAT64_C( -399.36), SIMDE_FLOAT64_C( -106.19), SIMDE_FLOAT64_C( 746.15), SIMDE_FLOAT64_C( 124.93), SIMDE_FLOAT64_C( -606.79) }, { SIMDE_FLOAT64_C( -473.00), SIMDE_FLOAT64_C( -484.00), SIMDE_FLOAT64_C( -885.00), SIMDE_FLOAT64_C( -399.00), SIMDE_FLOAT64_C( -106.00), SIMDE_FLOAT64_C( 746.00), SIMDE_FLOAT64_C( 124.00), SIMDE_FLOAT64_C( -606.00) } }, { { SIMDE_FLOAT64_C( -831.78), SIMDE_FLOAT64_C( 521.52), SIMDE_FLOAT64_C( 166.54), SIMDE_FLOAT64_C( 842.86), SIMDE_FLOAT64_C( -595.19), SIMDE_FLOAT64_C( -228.09), SIMDE_FLOAT64_C( -906.55), SIMDE_FLOAT64_C( -462.09) }, { SIMDE_FLOAT64_C( -831.00), SIMDE_FLOAT64_C( 521.00), SIMDE_FLOAT64_C( 166.00), SIMDE_FLOAT64_C( 842.00), SIMDE_FLOAT64_C( -595.00), SIMDE_FLOAT64_C( -228.00), SIMDE_FLOAT64_C( -906.00), SIMDE_FLOAT64_C( -462.00) } }, { { SIMDE_FLOAT64_C( -955.00), SIMDE_FLOAT64_C( -996.18), SIMDE_FLOAT64_C( 314.83), SIMDE_FLOAT64_C( 274.44), SIMDE_FLOAT64_C( -916.10), SIMDE_FLOAT64_C( -505.54), SIMDE_FLOAT64_C( 594.34), SIMDE_FLOAT64_C( 359.96) }, { SIMDE_FLOAT64_C( -955.00), SIMDE_FLOAT64_C( -996.00), SIMDE_FLOAT64_C( 314.00), SIMDE_FLOAT64_C( 274.00), SIMDE_FLOAT64_C( -916.00), SIMDE_FLOAT64_C( -505.00), SIMDE_FLOAT64_C( 594.00), SIMDE_FLOAT64_C( 359.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_trunc_pd(a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm512_mask_trunc_pd (SIMDE_MUNIT_TEST_ARGS) { static const struct { const simde_float64 src[8]; const simde__mmask8 k; const simde_float64 a[8]; const simde_float64 r[8]; } test_vec[] = { { { SIMDE_FLOAT64_C( 818.03), SIMDE_FLOAT64_C( 444.72), SIMDE_FLOAT64_C( 916.04), SIMDE_FLOAT64_C( -825.66), SIMDE_FLOAT64_C( 941.31), SIMDE_FLOAT64_C( -37.20), SIMDE_FLOAT64_C( -948.28), SIMDE_FLOAT64_C( -408.19) }, UINT8_C( 90), { SIMDE_FLOAT64_C( -903.02), SIMDE_FLOAT64_C( 326.13), SIMDE_FLOAT64_C( -77.85), SIMDE_FLOAT64_C( 808.82), SIMDE_FLOAT64_C( -385.32), SIMDE_FLOAT64_C( -921.95), SIMDE_FLOAT64_C( -879.51), SIMDE_FLOAT64_C( 447.28) }, { SIMDE_FLOAT64_C( 818.03), SIMDE_FLOAT64_C( 326.00), SIMDE_FLOAT64_C( 916.04), SIMDE_FLOAT64_C( 808.00), SIMDE_FLOAT64_C( -385.00), SIMDE_FLOAT64_C( -37.20), SIMDE_FLOAT64_C( -879.00), SIMDE_FLOAT64_C( -408.19) } }, { { SIMDE_FLOAT64_C( -281.72), SIMDE_FLOAT64_C( 142.99), SIMDE_FLOAT64_C( -182.68), SIMDE_FLOAT64_C( -63.76), SIMDE_FLOAT64_C( 164.70), SIMDE_FLOAT64_C( -994.58), SIMDE_FLOAT64_C( -84.09), SIMDE_FLOAT64_C( 455.69) }, UINT8_C(145), { SIMDE_FLOAT64_C( 892.02), SIMDE_FLOAT64_C( 632.35), SIMDE_FLOAT64_C( 571.19), SIMDE_FLOAT64_C( -642.67), SIMDE_FLOAT64_C( -756.86), SIMDE_FLOAT64_C( 389.22), SIMDE_FLOAT64_C( 802.05), SIMDE_FLOAT64_C( -840.82) }, { SIMDE_FLOAT64_C( 892.00), SIMDE_FLOAT64_C( 142.99), SIMDE_FLOAT64_C( -182.68), SIMDE_FLOAT64_C( -63.76), SIMDE_FLOAT64_C( -756.00), SIMDE_FLOAT64_C( -994.58), SIMDE_FLOAT64_C( -84.09), SIMDE_FLOAT64_C( -840.00) } }, { { SIMDE_FLOAT64_C( 563.57), SIMDE_FLOAT64_C( 743.36), SIMDE_FLOAT64_C( 121.98), SIMDE_FLOAT64_C( 615.28), SIMDE_FLOAT64_C( -664.83), SIMDE_FLOAT64_C( 388.96), SIMDE_FLOAT64_C( 712.26), SIMDE_FLOAT64_C( 661.30) }, UINT8_C(219), { SIMDE_FLOAT64_C( 521.09), SIMDE_FLOAT64_C( -724.02), SIMDE_FLOAT64_C( -610.84), SIMDE_FLOAT64_C( 641.58), SIMDE_FLOAT64_C( 723.26), SIMDE_FLOAT64_C( 107.43), SIMDE_FLOAT64_C( -215.43), SIMDE_FLOAT64_C( -459.42) }, { SIMDE_FLOAT64_C( 521.00), SIMDE_FLOAT64_C( -724.00), SIMDE_FLOAT64_C( 121.98), SIMDE_FLOAT64_C( 641.00), SIMDE_FLOAT64_C( 723.00), SIMDE_FLOAT64_C( 388.96), SIMDE_FLOAT64_C( -215.00), SIMDE_FLOAT64_C( -459.00) } }, { { SIMDE_FLOAT64_C( -956.33), SIMDE_FLOAT64_C( 949.27), SIMDE_FLOAT64_C( -454.00), SIMDE_FLOAT64_C( -40.42), SIMDE_FLOAT64_C( 404.97), SIMDE_FLOAT64_C( -418.67), SIMDE_FLOAT64_C( -148.40), SIMDE_FLOAT64_C( 37.32) }, UINT8_C( 67), { SIMDE_FLOAT64_C( 208.93), SIMDE_FLOAT64_C( 280.46), SIMDE_FLOAT64_C( 541.75), SIMDE_FLOAT64_C( 10.98), SIMDE_FLOAT64_C( 439.64), SIMDE_FLOAT64_C( 105.31), SIMDE_FLOAT64_C( -245.66), SIMDE_FLOAT64_C( -438.38) }, { SIMDE_FLOAT64_C( 208.00), SIMDE_FLOAT64_C( 280.00), SIMDE_FLOAT64_C( -454.00), SIMDE_FLOAT64_C( -40.42), SIMDE_FLOAT64_C( 404.97), SIMDE_FLOAT64_C( -418.67), SIMDE_FLOAT64_C( -245.00), SIMDE_FLOAT64_C( 37.32) } }, { { SIMDE_FLOAT64_C( -279.41), SIMDE_FLOAT64_C( 89.51), SIMDE_FLOAT64_C( 950.57), SIMDE_FLOAT64_C( -567.14), SIMDE_FLOAT64_C( -249.19), SIMDE_FLOAT64_C( -738.32), SIMDE_FLOAT64_C( 953.94), SIMDE_FLOAT64_C( 26.79) }, UINT8_C(166), { SIMDE_FLOAT64_C( 595.52), SIMDE_FLOAT64_C( -249.94), SIMDE_FLOAT64_C( 758.28), SIMDE_FLOAT64_C( -619.90), SIMDE_FLOAT64_C( 290.64), SIMDE_FLOAT64_C( 801.95), SIMDE_FLOAT64_C( -670.63), SIMDE_FLOAT64_C( 836.64) }, { SIMDE_FLOAT64_C( -279.41), SIMDE_FLOAT64_C( -249.00), SIMDE_FLOAT64_C( 758.00), SIMDE_FLOAT64_C( -567.14), SIMDE_FLOAT64_C( -249.19), SIMDE_FLOAT64_C( 801.00), SIMDE_FLOAT64_C( 953.94), SIMDE_FLOAT64_C( 836.00) } }, { { SIMDE_FLOAT64_C( -238.47), SIMDE_FLOAT64_C( 734.34), SIMDE_FLOAT64_C( -582.03), SIMDE_FLOAT64_C( 613.13), SIMDE_FLOAT64_C( -228.35), SIMDE_FLOAT64_C( -429.51), SIMDE_FLOAT64_C( -177.94), SIMDE_FLOAT64_C( -947.89) }, UINT8_C(123), { SIMDE_FLOAT64_C( 833.04), SIMDE_FLOAT64_C( 491.75), SIMDE_FLOAT64_C( 217.55), SIMDE_FLOAT64_C( -412.62), SIMDE_FLOAT64_C( -946.63), SIMDE_FLOAT64_C( 938.15), SIMDE_FLOAT64_C( 676.89), SIMDE_FLOAT64_C( -996.06) }, { SIMDE_FLOAT64_C( 833.00), SIMDE_FLOAT64_C( 491.00), SIMDE_FLOAT64_C( -582.03), SIMDE_FLOAT64_C( -412.00), SIMDE_FLOAT64_C( -946.00), SIMDE_FLOAT64_C( 938.00), SIMDE_FLOAT64_C( 676.00), SIMDE_FLOAT64_C( -947.89) } }, { { SIMDE_FLOAT64_C( -629.00), SIMDE_FLOAT64_C( -572.30), SIMDE_FLOAT64_C( -734.38), SIMDE_FLOAT64_C( -675.05), SIMDE_FLOAT64_C( 454.50), SIMDE_FLOAT64_C( -83.54), SIMDE_FLOAT64_C( 920.47), SIMDE_FLOAT64_C( -795.45) }, UINT8_C( 5), { SIMDE_FLOAT64_C( -699.43), SIMDE_FLOAT64_C( 495.19), SIMDE_FLOAT64_C( -523.31), SIMDE_FLOAT64_C( -370.06), SIMDE_FLOAT64_C( 331.83), SIMDE_FLOAT64_C( 238.22), SIMDE_FLOAT64_C( -635.72), SIMDE_FLOAT64_C( 749.81) }, { SIMDE_FLOAT64_C( -699.00), SIMDE_FLOAT64_C( -572.30), SIMDE_FLOAT64_C( -523.00), SIMDE_FLOAT64_C( -675.05), SIMDE_FLOAT64_C( 454.50), SIMDE_FLOAT64_C( -83.54), SIMDE_FLOAT64_C( 920.47), SIMDE_FLOAT64_C( -795.45) } }, { { SIMDE_FLOAT64_C( -148.65), SIMDE_FLOAT64_C( 135.93), SIMDE_FLOAT64_C( -679.70), SIMDE_FLOAT64_C( 673.41), SIMDE_FLOAT64_C( 188.04), SIMDE_FLOAT64_C( -567.46), SIMDE_FLOAT64_C( 506.46), SIMDE_FLOAT64_C( -320.21) }, UINT8_C(186), { SIMDE_FLOAT64_C( -906.16), SIMDE_FLOAT64_C( -266.84), SIMDE_FLOAT64_C( 588.24), SIMDE_FLOAT64_C( 770.73), SIMDE_FLOAT64_C( -262.91), SIMDE_FLOAT64_C( 959.24), SIMDE_FLOAT64_C( -801.57), SIMDE_FLOAT64_C( 2.71) }, { SIMDE_FLOAT64_C( -148.65), SIMDE_FLOAT64_C( -266.00), SIMDE_FLOAT64_C( -679.70), SIMDE_FLOAT64_C( 770.00), SIMDE_FLOAT64_C( -262.00), SIMDE_FLOAT64_C( 959.00), SIMDE_FLOAT64_C( 506.46), SIMDE_FLOAT64_C( 2.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src); simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); simde__m512d r = simde_mm512_mask_trunc_pd(src, test_vec[i].k, a); simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1); } return 0; } static int test_simde_mm_udivrem_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128i a; simde__m128i b; simde__m128i rem; simde__m128i r; } test_vec[8] = { { simde_x_mm_set_epu32(UINT32_C(1747596798), UINT32_C(2231263307), UINT32_C( 527472553), UINT32_C(2891870298)), simde_x_mm_set_epu32(UINT32_C(4025088144), UINT32_C(4117928860), UINT32_C( 377180600), UINT32_C(3776380886)), simde_x_mm_set_epu32(UINT32_C(1747596798), UINT32_C(2231263307), UINT32_C( 150291953), UINT32_C(2891870298)), simde_x_mm_set_epu32(UINT32_C( 0), UINT32_C( 0), UINT32_C( 1), UINT32_C( 0)) }, { simde_x_mm_set_epu32(UINT32_C(3920294270), UINT32_C(3054162118), UINT32_C(1568850865), UINT32_C(3151989757)), simde_x_mm_set_epu32(UINT32_C( 172780273), UINT32_C( 168508556), UINT32_C(3803608574), UINT32_C(4064895559)), simde_x_mm_set_epu32(UINT32_C( 119128264), UINT32_C( 21008110), UINT32_C(1568850865), UINT32_C(3151989757)), simde_x_mm_set_epu32(UINT32_C( 22), UINT32_C( 18), UINT32_C( 0), UINT32_C( 0)) }, { simde_x_mm_set_epu32(UINT32_C(1492341726), UINT32_C( 298608154), UINT32_C(1250819173), UINT32_C(3643996043)), simde_x_mm_set_epu32(UINT32_C( 298065861), UINT32_C(3773381365), UINT32_C( 330694282), UINT32_C( 40997390)), simde_x_mm_set_epu32(UINT32_C( 2012421), UINT32_C( 298608154), UINT32_C( 258736327), UINT32_C( 36225723)), simde_x_mm_set_epu32(UINT32_C( 5), UINT32_C( 0), UINT32_C( 3), UINT32_C( 88)) }, { simde_x_mm_set_epu32(UINT32_C(2708640028), UINT32_C(1691051285), UINT32_C( 50347892), UINT32_C( 728425428)), simde_x_mm_set_epu32(UINT32_C(3853764578), UINT32_C( 294920921), UINT32_C(3883385645), UINT32_C(4126975473)), simde_x_mm_set_epu32(UINT32_C(2708640028), UINT32_C( 216446680), UINT32_C( 50347892), UINT32_C( 728425428)), simde_x_mm_set_epu32(UINT32_C( 0), UINT32_C( 5), UINT32_C( 0), UINT32_C( 0)) }, { simde_x_mm_set_epu32(UINT32_C( 492373082), UINT32_C(4281870485), UINT32_C(2207786213), UINT32_C(3953959418)), simde_x_mm_set_epu32(UINT32_C( 123290430), UINT32_C(3996188341), UINT32_C( 223555334), UINT32_C(3962352253)), simde_x_mm_set_epu32(UINT32_C( 122501792), UINT32_C( 285682144), UINT32_C( 195788207), UINT32_C(3953959418)), simde_x_mm_set_epu32(UINT32_C( 3), UINT32_C( 1), UINT32_C( 9), UINT32_C( 0)) }, { simde_x_mm_set_epu32(UINT32_C(3290702646), UINT32_C(1580565751), UINT32_C(3823902839), UINT32_C(2081361826)), simde_x_mm_set_epu32(UINT32_C( 328620632), UINT32_C(3970654641), UINT32_C(4110215287), UINT32_C(3940207296)), simde_x_mm_set_epu32(UINT32_C( 4496326), UINT32_C(1580565751), UINT32_C(3823902839), UINT32_C(2081361826)), simde_x_mm_set_epu32(UINT32_C( 10), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0)) }, { simde_x_mm_set_epu32(UINT32_C( 542053192), UINT32_C( 499863549), UINT32_C( 957375358), UINT32_C(3003933707)), simde_x_mm_set_epu32(UINT32_C( 427537184), UINT32_C( 493530770), UINT32_C(3938875497), UINT32_C( 29647056)), simde_x_mm_set_epu32(UINT32_C( 114516008), UINT32_C( 6332779), UINT32_C( 957375358), UINT32_C( 9581051)), simde_x_mm_set_epu32(UINT32_C( 1), UINT32_C( 1), UINT32_C( 0), UINT32_C( 101)) }, { simde_x_mm_set_epu32(UINT32_C(4101755863), UINT32_C(3436978124), UINT32_C(3846637996), UINT32_C(2693603084)), simde_x_mm_set_epu32(UINT32_C(4010243988), UINT32_C(4123176886), UINT32_C( 457043765), UINT32_C(4197612290)), simde_x_mm_set_epu32(UINT32_C( 91511875), UINT32_C(3436978124), UINT32_C( 190287876), UINT32_C(2693603084)), simde_x_mm_set_epu32(UINT32_C( 1), UINT32_C( 0), UINT32_C( 8), UINT32_C( 0)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128i rem; simde__m128i r = simde_mm_udivrem_epi32(&rem, test_vec[i].a, test_vec[i].b); simde_assert_m128i_u32(r, ==, test_vec[i].r); simde_assert_m128i_u32(rem, ==, test_vec[i].rem); } return 0; } static int test_simde_mm_tanh_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128 a; simde__m128 r; } test_vec[8] = { { simde_mm_set_ps(SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( 0.35)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( 0.34)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( 0.03)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( -0.29), SIMDE_FLOAT32_C( 0.03)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 0.47)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( -0.26), SIMDE_FLOAT32_C( 0.44)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( -0.69), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.57)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.59), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.52)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 0.70)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( -0.70), SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( 0.60)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -0.92)), simde_mm_set_ps(SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( -0.73)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( -0.66)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( -0.58)) }, { simde_mm_set_ps(SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.69)), simde_mm_set_ps(SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.60)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128 r = simde_mm_tanh_ps(test_vec[i].a); simde_assert_m128_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm_tanh_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m128d a; simde__m128d r; } test_vec[8] = { { simde_mm_set_pd(SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( 0.35)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.64), SIMDE_FLOAT64_C( 0.34)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( 0.04)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( 0.04)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( 0.03)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.29), SIMDE_FLOAT64_C( 0.03)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 0.50), SIMDE_FLOAT64_C( 0.67)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.46), SIMDE_FLOAT64_C( 0.58)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 0.47)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.26), SIMDE_FLOAT64_C( 0.44)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 0.42)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( 0.40)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.57)), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.52)) }, { simde_mm_set_pd(SIMDE_FLOAT64_C( -0.68), SIMDE_FLOAT64_C( -0.69)), simde_mm_set_pd(SIMDE_FLOAT64_C( -0.59), SIMDE_FLOAT64_C( -0.60)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m128d r = simde_mm_tanh_pd(test_vec[i].a); simde_assert_m128d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_tanh_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256 a; simde__m256 r; } test_vec[8] = { { simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( 0.35)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( -0.29), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( 0.34)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( -0.69), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 0.47)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.59), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( -0.26), SIMDE_FLOAT32_C( 0.44)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 0.70)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( -0.73), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( -0.70), SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( 0.60)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( -0.66)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( -0.58)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.84)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.69)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.39), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( -0.26), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( -0.03)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.37), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( -0.25), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( -0.03)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.20), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( 0.40)), simde_mm256_set_ps(SIMDE_FLOAT32_C( -0.20), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( -0.50), SIMDE_FLOAT32_C( 0.38)) }, { simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.25)), simde_mm256_set_ps(SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.24)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256 r = simde_mm256_tanh_ps(test_vec[i].a); simde_assert_m256_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_tanh_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256d a; simde__m256d r; } test_vec[8] = { { simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( 0.35)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( -0.64), SIMDE_FLOAT64_C( 0.34)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.50), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( 0.03)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.46), SIMDE_FLOAT64_C( 0.58), SIMDE_FLOAT64_C( -0.29), SIMDE_FLOAT64_C( 0.03)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 0.47)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( -0.26), SIMDE_FLOAT64_C( 0.44)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.68), SIMDE_FLOAT64_C( -0.69), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.57)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.59), SIMDE_FLOAT64_C( -0.60), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.52)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( -0.86), SIMDE_FLOAT64_C( -0.42), SIMDE_FLOAT64_C( 0.70)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( -0.70), SIMDE_FLOAT64_C( -0.40), SIMDE_FLOAT64_C( 0.60)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.38), SIMDE_FLOAT64_C( -0.92)), simde_mm256_set_pd(SIMDE_FLOAT64_C( -0.41), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.36), SIMDE_FLOAT64_C( -0.73)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.26), SIMDE_FLOAT64_C( -0.21), SIMDE_FLOAT64_C( -0.98), SIMDE_FLOAT64_C( -0.66)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.25), SIMDE_FLOAT64_C( -0.21), SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( -0.58)) }, { simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 0.69)), simde_mm256_set_pd(SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( -0.42), SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 0.60)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256d r = simde_mm256_tanh_pd(test_vec[i].a); simde_assert_m256d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_tanh_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( -0.69), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( 0.35)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.59), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( -0.26), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( -0.29), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( 0.34)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( -0.66), SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 0.70)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( -0.73), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( -0.70), SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( 0.60)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.39), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( -0.26), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.84)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.37), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( -0.25), SIMDE_FLOAT32_C( 0.65), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.69)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( -0.20), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( 0.40)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.24), SIMDE_FLOAT32_C( -0.20), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( -0.50), SIMDE_FLOAT32_C( 0.38)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -0.87), SIMDE_FLOAT32_C( -0.80), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( -0.82), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( -0.17)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( -0.70), SIMDE_FLOAT32_C( -0.66), SIMDE_FLOAT32_C( -0.32), SIMDE_FLOAT32_C( -0.49), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( 0.49), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( -0.17)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( -0.59), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( -0.91), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( -0.74)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.55), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( -0.72), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( -0.63)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.10)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 0.28), SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.75), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.10)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( -0.70)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -0.32), SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.37), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -0.29), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( -0.60)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_tanh_ps(test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_tanh_ps(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512 src; simde__mmask16 k; simde__m512 a; simde__m512 r; } test_vec[8] = { { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.66), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( 0.70), SIMDE_FLOAT32_C( -0.69), SIMDE_FLOAT32_C( 0.57), SIMDE_FLOAT32_C( 0.42), SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.35)), UINT16_C(41466), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( -0.75)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( -0.66), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( -0.40), SIMDE_FLOAT32_C( -0.59), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( -0.26), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( 0.35)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.47), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( -0.20), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( -0.55), SIMDE_FLOAT32_C( -0.39), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.03)), UINT16_C(36797), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.17), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.40), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -0.26), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.99)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.17), SIMDE_FLOAT32_C( -0.15), SIMDE_FLOAT32_C( 0.91), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.24), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( 0.38), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( -0.25), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.76)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( 0.84), SIMDE_FLOAT32_C( -0.59), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( -0.91), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.34), SIMDE_FLOAT32_C( -0.80), SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( -0.82), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.54)), UINT16_C(16804), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.69), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( -0.45), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( 0.98), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( -0.87), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( -0.07)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.10), SIMDE_FLOAT32_C( 0.60), SIMDE_FLOAT32_C( -0.59), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( -0.91), SIMDE_FLOAT32_C( 0.32), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( -0.80), SIMDE_FLOAT32_C( -0.53), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.54)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -0.57), SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( 0.29), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( 0.71), SIMDE_FLOAT32_C( -0.76)), UINT16_C( 2107), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.90), SIMDE_FLOAT32_C( -0.20), SIMDE_FLOAT32_C( -0.36), SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -0.13), SIMDE_FLOAT32_C( 0.04), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( -0.70), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( -1.00), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( 0.98)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( -0.44), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( 0.51), SIMDE_FLOAT32_C( 0.01), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( 0.75)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( 0.18), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.89), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 0.13), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( 0.44)), UINT16_C(22274), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.50), SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( -0.72), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( 0.93), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( 0.24), SIMDE_FLOAT32_C( -0.38), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( -0.62), SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( 0.48)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.35), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.48), SIMDE_FLOAT32_C( -0.60), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.22), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( 0.44)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( -0.81), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( -0.29), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.32), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( -0.63), SIMDE_FLOAT32_C( 0.56), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.43)), UINT16_C(27396), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.96), SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( -0.76), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -0.82), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( -0.72), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( -0.49)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( -0.39), SIMDE_FLOAT32_C( -0.63), SIMDE_FLOAT32_C( -0.78), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( -0.68), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( -0.29), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.32), SIMDE_FLOAT32_C( 0.78), SIMDE_FLOAT32_C( -0.63), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.43)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( -0.97), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 0.03), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( -0.08), SIMDE_FLOAT32_C( 0.08), SIMDE_FLOAT32_C( -0.71), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( -0.89)), UINT16_C( 953), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.02), SIMDE_FLOAT32_C( 0.81), SIMDE_FLOAT32_C( 0.14), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( 0.31), SIMDE_FLOAT32_C( -0.18), SIMDE_FLOAT32_C( -0.46), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( -0.27), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( -0.58), SIMDE_FLOAT32_C( -0.04), SIMDE_FLOAT32_C( -0.25), SIMDE_FLOAT32_C( -0.05), SIMDE_FLOAT32_C( 0.09)), simde_mm512_set_ps(SIMDE_FLOAT32_C( 0.53), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( -0.97), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( -0.77), SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( 0.59), SIMDE_FLOAT32_C( 0.07), SIMDE_FLOAT32_C( 0.64), SIMDE_FLOAT32_C( 0.12), SIMDE_FLOAT32_C( -0.52), SIMDE_FLOAT32_C( -0.04), SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 0.39), SIMDE_FLOAT32_C( 0.09)) }, { simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( -0.49), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( -0.82), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.93), SIMDE_FLOAT32_C( -0.73), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( -0.20)), UINT16_C(12713), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 0.82), SIMDE_FLOAT32_C( 0.79), SIMDE_FLOAT32_C( -0.74), SIMDE_FLOAT32_C( -0.31), SIMDE_FLOAT32_C( 0.73), SIMDE_FLOAT32_C( -0.35), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( 0.11), SIMDE_FLOAT32_C( 0.72), SIMDE_FLOAT32_C( -0.25), SIMDE_FLOAT32_C( 0.94), SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( -0.01), SIMDE_FLOAT32_C( 0.85)), simde_mm512_set_ps(SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( 0.68), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( 0.96), SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( 0.83), SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( 0.06), SIMDE_FLOAT32_C( -0.93), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( -0.42), SIMDE_FLOAT32_C( 0.74), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 0.69)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512 r = simde_mm512_mask_tanh_ps(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_tanh_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.50), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( 0.35)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.46), SIMDE_FLOAT64_C( 0.58), SIMDE_FLOAT64_C( -0.29), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( -0.64), SIMDE_FLOAT64_C( 0.34)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.68), SIMDE_FLOAT64_C( -0.69), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 0.47)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.59), SIMDE_FLOAT64_C( -0.60), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.52), SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( -0.26), SIMDE_FLOAT64_C( 0.44)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.38), SIMDE_FLOAT64_C( -0.92), SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( -0.86), SIMDE_FLOAT64_C( -0.42), SIMDE_FLOAT64_C( 0.70)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.41), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.36), SIMDE_FLOAT64_C( -0.73), SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( -0.70), SIMDE_FLOAT64_C( -0.40), SIMDE_FLOAT64_C( 0.60)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( 0.26), SIMDE_FLOAT64_C( -0.21), SIMDE_FLOAT64_C( -0.98), SIMDE_FLOAT64_C( -0.66)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( -0.42), SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 0.60), SIMDE_FLOAT64_C( 0.25), SIMDE_FLOAT64_C( -0.21), SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( -0.58)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( -0.58), SIMDE_FLOAT64_C( 0.38), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.84)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.65), SIMDE_FLOAT64_C( 0.41), SIMDE_FLOAT64_C( -0.52), SIMDE_FLOAT64_C( 0.36), SIMDE_FLOAT64_C( -0.65), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.69)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.39), SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( 0.66), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( 0.53), SIMDE_FLOAT64_C( -0.26), SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( -0.03)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.37), SIMDE_FLOAT64_C( 0.38), SIMDE_FLOAT64_C( 0.58), SIMDE_FLOAT64_C( 0.33), SIMDE_FLOAT64_C( 0.49), SIMDE_FLOAT64_C( -0.25), SIMDE_FLOAT64_C( 0.65), SIMDE_FLOAT64_C( -0.03)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.20), SIMDE_FLOAT64_C( -0.08), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( -0.94), SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( -0.55), SIMDE_FLOAT64_C( 0.40)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.20), SIMDE_FLOAT64_C( -0.08), SIMDE_FLOAT64_C( 0.33), SIMDE_FLOAT64_C( -0.74), SIMDE_FLOAT64_C( -0.64), SIMDE_FLOAT64_C( -0.65), SIMDE_FLOAT64_C( -0.50), SIMDE_FLOAT64_C( 0.38)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( 0.82), SIMDE_FLOAT64_C( 0.91), SIMDE_FLOAT64_C( 0.60), SIMDE_FLOAT64_C( 0.79), SIMDE_FLOAT64_C( 0.25)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( 0.59), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( 0.72), SIMDE_FLOAT64_C( 0.54), SIMDE_FLOAT64_C( 0.66), SIMDE_FLOAT64_C( 0.24)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_tanh_pd(test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm512_mask_tanh_pd(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m512d src; simde__mmask8 k; simde__m512d a; simde__m512d r; } test_vec[8] = { { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.69), SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( 0.67), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.04), SIMDE_FLOAT64_C( 0.35)), UINT8_C(139), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.68), SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( -0.27), SIMDE_FLOAT64_C( 0.50), SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( -0.75)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.59), SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( 0.46), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( -0.64)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.18), SIMDE_FLOAT64_C( 0.23), SIMDE_FLOAT64_C( 0.26), SIMDE_FLOAT64_C( -0.98), SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( -0.38), SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( -0.42)), UINT8_C(229), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.84), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( -0.21), SIMDE_FLOAT64_C( -0.66), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.92), SIMDE_FLOAT64_C( -0.86)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( -0.42), SIMDE_FLOAT64_C( 0.60), SIMDE_FLOAT64_C( -0.98), SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( -0.31), SIMDE_FLOAT64_C( -0.70)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( -0.26), SIMDE_FLOAT64_C( -0.03), SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( 0.38), SIMDE_FLOAT64_C( 0.99)), UINT8_C(253), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.55), SIMDE_FLOAT64_C( -0.39), SIMDE_FLOAT64_C( 0.66), SIMDE_FLOAT64_C( 0.53), SIMDE_FLOAT64_C( 0.78), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( -0.58), SIMDE_FLOAT64_C( -0.77)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.50), SIMDE_FLOAT64_C( -0.37), SIMDE_FLOAT64_C( 0.58), SIMDE_FLOAT64_C( 0.49), SIMDE_FLOAT64_C( 0.65), SIMDE_FLOAT64_C( -0.65), SIMDE_FLOAT64_C( 0.38), SIMDE_FLOAT64_C( -0.65)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.12), SIMDE_FLOAT64_C( 0.47), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( 0.91), SIMDE_FLOAT64_C( 0.79), SIMDE_FLOAT64_C( -0.20), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( -0.75)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.54), SIMDE_FLOAT64_C( -0.17), SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( 0.82), SIMDE_FLOAT64_C( 0.60), SIMDE_FLOAT64_C( 0.25), SIMDE_FLOAT64_C( -0.08), SIMDE_FLOAT64_C( -0.94)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.12), SIMDE_FLOAT64_C( -0.17), SIMDE_FLOAT64_C( -0.15), SIMDE_FLOAT64_C( 0.68), SIMDE_FLOAT64_C( 0.54), SIMDE_FLOAT64_C( 0.24), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( -0.74)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( -0.74), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 0.34), SIMDE_FLOAT64_C( -0.80), SIMDE_FLOAT64_C( -0.53), SIMDE_FLOAT64_C( -0.82), SIMDE_FLOAT64_C( 0.66)), UINT8_C(145), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.33), SIMDE_FLOAT64_C( 0.46), SIMDE_FLOAT64_C( -0.18), SIMDE_FLOAT64_C( 0.32), SIMDE_FLOAT64_C( -0.87), SIMDE_FLOAT64_C( -0.33), SIMDE_FLOAT64_C( -0.19), SIMDE_FLOAT64_C( 0.56)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.32), SIMDE_FLOAT64_C( -0.74), SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 0.31), SIMDE_FLOAT64_C( -0.80), SIMDE_FLOAT64_C( -0.53), SIMDE_FLOAT64_C( -0.82), SIMDE_FLOAT64_C( 0.51)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.76), SIMDE_FLOAT64_C( 0.03), SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( -0.02), SIMDE_FLOAT64_C( -0.45), SIMDE_FLOAT64_C( 0.51), SIMDE_FLOAT64_C( 0.83), SIMDE_FLOAT64_C( 0.98)), UINT8_C( 75), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.98), SIMDE_FLOAT64_C( 0.42), SIMDE_FLOAT64_C( -0.10), SIMDE_FLOAT64_C( 0.84), SIMDE_FLOAT64_C( -0.59), SIMDE_FLOAT64_C( 0.73), SIMDE_FLOAT64_C( 0.62), SIMDE_FLOAT64_C( 0.14)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.76), SIMDE_FLOAT64_C( 0.40), SIMDE_FLOAT64_C( 0.69), SIMDE_FLOAT64_C( -0.02), SIMDE_FLOAT64_C( -0.53), SIMDE_FLOAT64_C( 0.51), SIMDE_FLOAT64_C( 0.55), SIMDE_FLOAT64_C( 0.14)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.39), SIMDE_FLOAT64_C( -0.30), SIMDE_FLOAT64_C( -0.70), SIMDE_FLOAT64_C( 0.82), SIMDE_FLOAT64_C( -1.00), SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( -0.07)), UINT8_C( 93), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.51), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( 0.92), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( -0.57), SIMDE_FLOAT64_C( -0.34), SIMDE_FLOAT64_C( 0.29), SIMDE_FLOAT64_C( -0.58)), simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.39), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( -0.70), SIMDE_FLOAT64_C( -0.65), SIMDE_FLOAT64_C( -0.52), SIMDE_FLOAT64_C( -0.33), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( -0.52)) }, { simde_mm512_set_pd(SIMDE_FLOAT64_C( 0.48), SIMDE_FLOAT64_C( 0.94), SIMDE_FLOAT64_C( -0.35), SIMDE_FLOAT64_C( -0.44), SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( 0.93), SIMDE_FLOAT64_C( -0.33), SIMDE_FLOAT64_C( -0.18)), UINT8_C(213), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.78), SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( 0.90), SIMDE_FLOAT64_C( -0.20), SIMDE_FLOAT64_C( -0.36), SIMDE_FLOAT64_C( -0.03), SIMDE_FLOAT64_C( 0.01), SIMDE_FLOAT64_C( -0.13)), simde_mm512_set_pd(SIMDE_FLOAT64_C( -0.65), SIMDE_FLOAT64_C( 0.41), SIMDE_FLOAT64_C( -0.35), SIMDE_FLOAT64_C( -0.20), SIMDE_FLOAT64_C( -0.75), SIMDE_FLOAT64_C( -0.03), SIMDE_FLOAT64_C( -0.33), SIMDE_FLOAT64_C( -0.13)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m512d r = simde_mm512_mask_tanh_pd(test_vec[i].src, test_vec[i].k, test_vec[i].a); simde_assert_m512d_close(r, test_vec[i].r, 1); } return 0; } static int test_simde_mm256_udivrem_epi32(SIMDE_MUNIT_TEST_ARGS) { const struct { simde__m256i a; simde__m256i b; simde__m256i rem; simde__m256i r; } test_vec[8] = { { simde_x_mm256_set_epu32(UINT32_C(3215450688), UINT32_C(3586813553), UINT32_C(1508722402), UINT32_C(2220621656), UINT32_C(1747596798), UINT32_C(2231263307), UINT32_C( 527472553), UINT32_C(2891870298)), simde_x_mm256_set_epu32(UINT32_C( 172780273), UINT32_C( 168508556), UINT32_C(3803608574), UINT32_C(4064895559), UINT32_C(4201299039), UINT32_C(3984766001), UINT32_C( 392212716), UINT32_C(4009222911)), simde_x_mm256_set_epu32(UINT32_C( 105405774), UINT32_C( 48133877), UINT32_C(1508722402), UINT32_C(2220621656), UINT32_C(1747596798), UINT32_C(2231263307), UINT32_C( 135259837), UINT32_C(2891870298)), simde_mm256_set_epi32(INT32_C( 18), INT32_C( 21), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 1), INT32_C( 0)) }, { simde_x_mm256_set_epu32(UINT32_C(1192263444), UINT32_C(2208623573), UINT32_C(1322777130), UINT32_C( 163989560), UINT32_C(1492341726), UINT32_C( 298608154), UINT32_C(1250819173), UINT32_C(3643996043)), simde_x_mm256_set_epu32(UINT32_C(3853764578), UINT32_C( 294920921), UINT32_C(3883385645), UINT32_C(4126975473), UINT32_C(3898385479), UINT32_C( 422762821), UINT32_C( 12586973), UINT32_C( 182106357)), simde_x_mm256_set_epu32(UINT32_C(1192263444), UINT32_C( 144177126), UINT32_C(1322777130), UINT32_C( 163989560), UINT32_C(1492341726), UINT32_C( 298608154), UINT32_C( 4708846), UINT32_C( 1868903)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 7), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 99), INT32_C( 20)) }, { simde_x_mm256_set_epu32(UINT32_C( 493161721), UINT32_C(3099851477), UINT32_C( 894221337), UINT32_C(2964507124), UINT32_C( 492373082), UINT32_C(4281870485), UINT32_C(2207786213), UINT32_C(3953959418)), simde_x_mm256_set_epu32(UINT32_C( 328620632), UINT32_C(3970654641), UINT32_C(4110215287), UINT32_C(3940207296), UINT32_C(4043901133), UINT32_C( 395141437), UINT32_C(4177201181), UINT32_C( 520340456)), simde_x_mm256_set_epu32(UINT32_C( 164541089), UINT32_C(3099851477), UINT32_C( 894221337), UINT32_C(2964507124), UINT32_C( 492373082), UINT32_C( 330456115), UINT32_C(2207786213), UINT32_C( 311576226)), simde_mm256_set_epi32(INT32_C( 1), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 10), INT32_C( 0), INT32_C( 7)) }, { simde_x_mm256_set_epu32(UINT32_C(1710148738), UINT32_C(1974123080), UINT32_C(2870600100), UINT32_C( 118588227), UINT32_C( 542053192), UINT32_C( 499863549), UINT32_C( 957375358), UINT32_C(3003933707)), simde_x_mm256_set_epu32(UINT32_C(4010243988), UINT32_C(4123176886), UINT32_C( 457043765), UINT32_C(4197612290), UINT32_C(4246664437), UINT32_C(4080470003), UINT32_C(4182884971), UINT32_C(3894626243)), simde_x_mm256_set_epu32(UINT32_C(1710148738), UINT32_C(1974123080), UINT32_C( 128337510), UINT32_C( 118588227), UINT32_C( 542053192), UINT32_C( 499863549), UINT32_C( 957375358), UINT32_C(3003933707)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 6), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_x_mm256_set_epu32(UINT32_C(1734496959), UINT32_C( 380846712), UINT32_C(3352999607), UINT32_C(3555523675), UINT32_C(1995198557), UINT32_C(3314312199), UINT32_C(2406584253), UINT32_C(1779168063)), simde_x_mm256_set_epu32(UINT32_C( 440775120), UINT32_C(4165466156), UINT32_C(3932377571), UINT32_C(3942500746), UINT32_C( 67477586), UINT32_C( 108492873), UINT32_C( 360489056), UINT32_C( 254567893)), simde_x_mm256_set_epu32(UINT32_C( 412171599), UINT32_C( 380846712), UINT32_C(3352999607), UINT32_C(3555523675), UINT32_C( 38348563), UINT32_C( 59526009), UINT32_C( 243649917), UINT32_C( 251760705)), simde_mm256_set_epi32(INT32_C( 3), INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 29), INT32_C( 30), INT32_C( 6), INT32_C( 6)) }, { simde_x_mm256_set_epu32(UINT32_C(3932090380), UINT32_C(2449576763), UINT32_C(4246346280), UINT32_C( 201516689), UINT32_C(2859036576), UINT32_C(2362091228), UINT32_C(3141663427), UINT32_C( 562234020)), simde_x_mm256_set_epu32(UINT32_C(4128600985), UINT32_C(4209418337), UINT32_C( 525546139), UINT32_C( 219277873), UINT32_C( 295872976), UINT32_C(4150814551), UINT32_C(4029638246), UINT32_C(4092942946)), simde_x_mm256_set_epu32(UINT32_C(3932090380), UINT32_C(2449576763), UINT32_C( 41977168), UINT32_C( 201516689), UINT32_C( 196179792), UINT32_C(2362091228), UINT32_C(3141663427), UINT32_C( 562234020)), simde_mm256_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 8), INT32_C( 0), INT32_C( 9), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }, { simde_x_mm256_set_epu32(UINT32_C( 910061584), UINT32_C(2002226944), UINT32_C(3673004107), UINT32_C(4246624078), UINT32_C( 523093293), UINT32_C(3059761572), UINT32_C(2206005509), UINT32_C(1943141679)), simde_x_mm256_set_epu32(UINT32_C( 123967721), UINT32_C(4199435689), UINT32_C( 228811177), UINT32_C( 1270356), UINT32_C( 355625346), UINT32_C(4253972365), UINT32_C(3915742229), UINT32_C( 124491394)), simde_x_mm256_set_epu32(UINT32_C( 42287537), UINT32_C(2002226944), UINT32_C( 12025275), UINT32_C( 1094326), UINT32_C( 167467947), UINT32_C(3059761572), UINT32_C(2206005509), UINT32_C( 75770769)), simde_mm256_set_epi32(INT32_C( 7), INT32_C( 0), INT32_C( 16), INT32_C( 3342), INT32_C( 1), INT32_C( 0), INT32_C( 0), INT32_C( 15)) }, { simde_x_mm256_set_epu32(UINT32_C(1755684145), UINT32_C(2233240925), UINT32_C(3244523643), UINT32_C(2995026741), UINT32_C(2178270751), UINT32_C(1493088054), UINT32_C(4115137419), UINT32_C( 651362699)), simde_x_mm256_set_epu32(UINT32_C( 301617823), UINT32_C( 343728879), UINT32_C( 132913279), UINT32_C( 518796827), UINT32_C(4258812658), UINT32_C(3762000867), UINT32_C( 361195763), UINT32_C( 469656308)), simde_x_mm256_set_epu32(UINT32_C( 247595030), UINT32_C( 170867651), UINT32_C( 54604947), UINT32_C( 401042606), UINT32_C(2178270751), UINT32_C(1493088054), UINT32_C( 141984026), UINT32_C( 181706391)), simde_mm256_set_epi32(INT32_C( 5), INT32_C( 6), INT32_C( 24), INT32_C( 5), INT32_C( 0), INT32_C( 0), INT32_C( 11), INT32_C( 1)) } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { simde__m256i rem; simde__m256i r = simde_mm256_udivrem_epi32(&rem, test_vec[i].a, test_vec[i].b); simde_assert_m256i_u32(r, ==, test_vec[i].r); simde_assert_m256i_u32(rem, ==, test_vec[i].rem); } return 0; } HEDLEY_DIAGNOSTIC_PUSH HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL #if HEDLEY_HAS_WARNING("-Wold-style-cast") #pragma clang diagnostic ignored "-Wold-style-cast" #endif #if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" #endif SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm_acos_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_acos_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_acos_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_acos_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_acos_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_acos_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_acos_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_acos_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_acosh_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_acosh_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_acosh_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_acosh_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_acosh_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_acosh_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_acosh_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_acosh_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_asin_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_asin_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_asin_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_asin_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_asin_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_asin_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_asin_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_asin_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_asinh_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_asinh_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_asinh_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_asinh_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_asinh_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_asinh_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_asinh_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_asinh_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_atan_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_atan_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_atan_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_atan_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_atan_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_atan_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_atan_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_atan_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_atan2_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_atan2_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_atan2_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_atan2_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_atan2_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_atan2_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_atan2_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_atan2_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_atanh_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_atanh_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_atanh_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_atanh_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_atanh_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_atanh_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_atanh_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_atanh_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cbrt_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cbrt_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cbrt_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cbrt_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cbrt_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cbrt_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_cbrt_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_cbrt_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cdfnorm_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cdfnorm_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cdfnorm_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cdfnorm_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cdfnorm_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cdfnorm_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_cdfnorm_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_cdfnorm_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cdfnorminv_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cdfnorminv_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cdfnorminv_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cdfnorminv_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cdfnorminv_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cdfnorminv_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_cdfnorminv_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_cdfnorminv_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cos_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cos_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cos_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cos_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cos_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cos_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_cos_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_cos_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cosd_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cosd_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cosd_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cosd_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cosd_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cosd_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_cosd_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_cosd_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cosh_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cosh_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cosh_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cosh_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cosh_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_cosh_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_cosh_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_cosh_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_cexp_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cexp_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_clog_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_clog_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_csqrt_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_csqrt_ps) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_deg2rad_ps) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_deg2rad_pd) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm256_deg2rad_ps) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm256_deg2rad_pd) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm512_deg2rad_ps) SIMDE_TEST_FUNC_LIST_ENTRY(x_mm512_deg2rad_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_div_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_div_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_div_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_div_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_div_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_div_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_div_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_div_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_div_epu64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_div_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_div_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_div_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_div_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_div_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_div_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_div_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_div_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_div_epu64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_div_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_div_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_div_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_div_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_div_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_div_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_div_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_div_epu64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_erf_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_erf_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_erf_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_erf_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_erf_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_erf_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_erf_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_erf_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_erfinv_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_erfinv_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_erfinv_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_erfinv_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_erfinv_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_erfinv_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_erfinv_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_erfinv_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_erfc_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_erfc_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_erfc_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_erfc_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_erfc_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_erfc_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_erfc_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_erfc_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_erfcinv_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_erfcinv_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_erfcinv_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_erfcinv_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_erfcinv_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_erfcinv_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_erfcinv_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_erfcinv_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_exp_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_exp_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_exp_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_exp_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_exp_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_exp_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_exp_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_exp_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_expm1_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_expm1_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_expm1_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_expm1_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_expm1_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_expm1_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_expm1_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_expm1_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_exp2_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_exp2_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_exp2_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_exp2_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_exp2_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_exp2_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_exp2_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_exp2_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_exp10_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_exp10_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_exp10_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_exp10_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_exp10_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_exp10_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_exp10_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_exp10_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_idivrem_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_idivrem_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_hypot_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_hypot_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_hypot_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_hypot_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_hypot_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_hypot_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_hypot_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_hypot_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_invcbrt_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_invcbrt_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_invcbrt_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_invcbrt_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_invsqrt_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_invsqrt_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_invsqrt_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_invsqrt_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_invsqrt_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_invsqrt_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_invsqrt_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_invsqrt_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_log_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_log_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_log_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_log_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_log_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_log_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_log_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_log_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_log1p_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_log1p_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_log1p_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_log1p_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_log1p_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_log1p_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_log1p_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_log1p_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_log2_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_log2_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_log2_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_log2_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_log2_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_log2_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_log2_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_log2_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_log10_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_log10_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_log10_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_log10_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_log10_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_log10_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_log10_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_log10_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_logb_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_logb_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_logb_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_logb_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_logb_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_logb_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_logb_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_logb_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_nearbyint_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_nearbyint_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_nearbyint_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_nearbyint_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_pow_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_pow_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_pow_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_pow_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_pow_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_pow_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_pow_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_pow_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_rem_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_rem_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_rem_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_rem_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_rem_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_rem_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_rem_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_rem_epu64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_rem_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_rem_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_rem_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_rem_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_rem_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_rem_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_rem_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_rem_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_rem_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_rem_epu64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_rem_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_rem_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_rem_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_rem_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_rem_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_rem_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_rem_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_rem_epu64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_recip_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_recip_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_recip_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_recip_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_rint_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_rint_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_rint_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_rint_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sin_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sin_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_sin_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_sin_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_sin_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_sin_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_sin_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_sin_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sincos_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sincos_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_sincos_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_sincos_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_sincos_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_sincos_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_sincos_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_sincos_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sind_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sind_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_sind_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_sind_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_sind_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_sind_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_sind_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_sind_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sinh_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sinh_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_sinh_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_sinh_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_sinh_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_sinh_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_sinh_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_sinh_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_svml_ceil_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_svml_ceil_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_svml_ceil_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_svml_ceil_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_ceil_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_ceil_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_ceil_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_ceil_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_svml_floor_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_svml_floor_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_svml_floor_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_svml_floor_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_floor_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_floor_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_floor_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_floor_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_svml_round_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_svml_round_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_svml_round_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_svml_round_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_svml_round_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_svml_round_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_svml_sqrt_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_svml_sqrt_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_svml_sqrt_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_svml_sqrt_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_svml_sqrt_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_svml_sqrt_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_tan_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_tan_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_tan_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_tan_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_tan_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_tan_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_tan_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_tan_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_tand_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_tand_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_tand_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_tand_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_tand_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_tand_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_tand_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_tand_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_tanh_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_tanh_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_tanh_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_tanh_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_tanh_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_tanh_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_tanh_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_tanh_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_trunc_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_trunc_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_trunc_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_trunc_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_trunc_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_trunc_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_trunc_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_mask_trunc_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_udivrem_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_udivrem_epi32) SIMDE_TEST_FUNC_LIST_END #include simde-0.7.2/test/x86/test-avx.h000066400000000000000000000050731400333146700161570ustar00rootroot00000000000000#if !defined(SIMDE_TEST_X86_TEST_AVX_H) #define SIMDE_TEST_X86_TEST_AVX_H #include "test-x86.h" #include "test-sse2.h" #include "../../simde/x86/avx.h" SIMDE_TEST_X86_GENERATE_FLOAT_TYPE_FUNCS_(__m256, 32, 8, simde_mm256_storeu_ps) SIMDE_TEST_X86_GENERATE_FLOAT_TYPE_FUNCS_(__m256d, 64, 4, simde_mm256_storeu_pd) SIMDE_TEST_X86_GENERATE_INT_TYPE_FUNCS_(__m256i, 8, 32, simde_mm256_storeu_si256) SIMDE_TEST_X86_GENERATE_INT_TYPE_FUNCS_(__m256i, 16, 16, simde_mm256_storeu_si256) SIMDE_TEST_X86_GENERATE_INT_TYPE_FUNCS_(__m256i, 32, 8, simde_mm256_storeu_si256) SIMDE_TEST_X86_GENERATE_INT_TYPE_FUNCS_(__m256i, 64, 4, simde_mm256_storeu_si256) SIMDE_TEST_X86_GENERATE_UINT_TYPE_FUNCS_(__m256i, 8, 32, simde_mm256_storeu_si256) SIMDE_TEST_X86_GENERATE_UINT_TYPE_FUNCS_(__m256i, 16, 16, simde_mm256_storeu_si256) SIMDE_TEST_X86_GENERATE_UINT_TYPE_FUNCS_(__m256i, 32, 8, simde_mm256_storeu_si256) SIMDE_TEST_X86_GENERATE_UINT_TYPE_FUNCS_(__m256i, 64, 4, simde_mm256_storeu_si256) #define simde_test_x86_assert_equal_f32x8(a, b, precision) do { if (simde_test_x86_assert_equal_f32x8_(a, b, 1e-##precision##f, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_f64x4(a, b, precision) do { if (simde_test_x86_assert_equal_f64x4_(a, b, 1e-##precision, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_i8x32(a, b) do { if (simde_test_x86_assert_equal_i8x32_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_i16x16(a, b) do { if (simde_test_x86_assert_equal_i16x16_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_i32x8(a, b) do { if (simde_test_x86_assert_equal_i32x8_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_i64x4(a, b) do { if (simde_test_x86_assert_equal_i64x4_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_u8x32(a, b) do { if (simde_test_x86_assert_equal_u8x32_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_u16x16(a, b) do { if (simde_test_x86_assert_equal_u16x16_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_u32x8(a, b) do { if (simde_test_x86_assert_equal_u32x8_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_u64x4(a, b) do { if (simde_test_x86_assert_equal_u64x4_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #endif /* !defined(SIMDE_TEST_X86_TEST_AVX_H) */ simde-0.7.2/test/x86/test-mmx.h000066400000000000000000000047671400333146700161730ustar00rootroot00000000000000#if !defined(SIMDE_TEST_X86_TEST_MMX_H) #define SIMDE_TEST_X86_TEST_MMX_H #include "test-x86.h" #include "../../simde/x86/mmx.h" SIMDE_TEST_X86_GENERATE_FLOAT_TYPE_FUNCS_(__m64, 32, 2, simde_x_mm_storeu_si64) SIMDE_TEST_X86_GENERATE_FLOAT_TYPE_FUNCS_(__m64, 64, 1, simde_x_mm_storeu_si64) SIMDE_TEST_X86_GENERATE_INT_TYPE_FUNCS_(__m64, 8, 8, simde_x_mm_storeu_si64) SIMDE_TEST_X86_GENERATE_INT_TYPE_FUNCS_(__m64, 16, 4, simde_x_mm_storeu_si64) SIMDE_TEST_X86_GENERATE_INT_TYPE_FUNCS_(__m64, 32, 2, simde_x_mm_storeu_si64) SIMDE_TEST_X86_GENERATE_INT_TYPE_FUNCS_(__m64, 64, 1, simde_x_mm_storeu_si64) SIMDE_TEST_X86_GENERATE_UINT_TYPE_FUNCS_(__m64, 8, 8, simde_x_mm_storeu_si64) SIMDE_TEST_X86_GENERATE_UINT_TYPE_FUNCS_(__m64, 16, 4, simde_x_mm_storeu_si64) SIMDE_TEST_X86_GENERATE_UINT_TYPE_FUNCS_(__m64, 32, 2, simde_x_mm_storeu_si64) SIMDE_TEST_X86_GENERATE_UINT_TYPE_FUNCS_(__m64, 64, 1, simde_x_mm_storeu_si64) #define simde_test_x86_assert_equal_f32x2(a, b, precision) do { if (simde_test_x86_assert_equal_f32x2_(a, b, 1e-##precision##f, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_f64x1(a, b, precision) do { if (simde_test_x86_assert_equal_f64x1_(a, b, 1e-##precision, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_i8x8(a, b) do { if (simde_test_x86_assert_equal_i8x8_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_i16x4(a, b) do { if (simde_test_x86_assert_equal_i16x4_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_i32x2(a, b) do { if (simde_test_x86_assert_equal_i32x2_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_i64x1(a, b) do { if (simde_test_x86_assert_equal_i64x1_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_u8x8(a, b) do { if (simde_test_x86_assert_equal_u8x8_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_u16x4(a, b) do { if (simde_test_x86_assert_equal_u16x4_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_u32x2(a, b) do { if (simde_test_x86_assert_equal_u32x2_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_u64x1(a, b) do { if (simde_test_x86_assert_equal_u64x1_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #endif /* !defined(SIMDE_TEST_X86_TEST_MMX_H) */ simde-0.7.2/test/x86/test-sse.h000066400000000000000000000007201400333146700161450ustar00rootroot00000000000000#if !defined(SIMDE_TEST_X86_TEST_SSE_H) #define SIMDE_TEST_X86_TEST_SSE_H #include "test-x86.h" #include "test-mmx.h" #include "../../simde/x86/sse.h" SIMDE_TEST_X86_GENERATE_FLOAT_TYPE_FUNCS_(__m128, 32, 4, simde_mm_storeu_ps) #define simde_test_x86_assert_equal_f32x4(a, b, precision) do { if (simde_test_x86_assert_equal_f32x4_(a, b, 1e-##precision##f, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #endif /* !defined(SIMDE_TEST_X86_TEST_SSE_H) */ simde-0.7.2/test/x86/test-sse2.h000066400000000000000000000044261400333146700162360ustar00rootroot00000000000000#if !defined(SIMDE_TEST_X86_TEST_SSE2_H) #define SIMDE_TEST_X86_TEST_SSE2_H #include "test-x86.h" #include "test-sse.h" #include "../../simde/x86/sse2.h" SIMDE_TEST_X86_GENERATE_FLOAT_TYPE_FUNCS_(__m128d, 64, 2, simde_mm_storeu_pd) SIMDE_TEST_X86_GENERATE_INT_TYPE_FUNCS_(__m128i, 8, 16, simde_mm_storeu_si128) SIMDE_TEST_X86_GENERATE_INT_TYPE_FUNCS_(__m128i, 16, 8, simde_mm_storeu_si128) SIMDE_TEST_X86_GENERATE_INT_TYPE_FUNCS_(__m128i, 32, 4, simde_mm_storeu_si128) SIMDE_TEST_X86_GENERATE_INT_TYPE_FUNCS_(__m128i, 64, 2, simde_mm_storeu_si128) SIMDE_TEST_X86_GENERATE_UINT_TYPE_FUNCS_(__m128i, 8, 16, simde_mm_storeu_si128) SIMDE_TEST_X86_GENERATE_UINT_TYPE_FUNCS_(__m128i, 16, 8, simde_mm_storeu_si128) SIMDE_TEST_X86_GENERATE_UINT_TYPE_FUNCS_(__m128i, 32, 4, simde_mm_storeu_si128) SIMDE_TEST_X86_GENERATE_UINT_TYPE_FUNCS_(__m128i, 64, 2, simde_mm_storeu_si128) #define simde_test_x86_assert_equal_f64x2(a, b, precision) do { if (simde_test_x86_assert_equal_f64x2_(a, b, 1e-##precision, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_i8x16(a, b) do { if (simde_test_x86_assert_equal_i8x16_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_i16x8(a, b) do { if (simde_test_x86_assert_equal_i16x8_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_i32x4(a, b) do { if (simde_test_x86_assert_equal_i32x4_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_i64x2(a, b) do { if (simde_test_x86_assert_equal_i64x2_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_u8x16(a, b) do { if (simde_test_x86_assert_equal_u8x16_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_u16x8(a, b) do { if (simde_test_x86_assert_equal_u16x8_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_u32x4(a, b) do { if (simde_test_x86_assert_equal_u32x4_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #define simde_test_x86_assert_equal_u64x2(a, b) do { if (simde_test_x86_assert_equal_u64x2_(a, b, __FILE__, __LINE__, #a, #b)) { return 1; } } while (0) #endif /* !defined(SIMDE_TEST_X86_TEST_SSE2_H) */ simde-0.7.2/test/x86/test-x86-footer.h000066400000000000000000000022401400333146700172730ustar00rootroot00000000000000#if defined(SIMDE_TEST_BARE) int main(void) { int retval = EXIT_SUCCESS; fprintf(stdout, "1..%zu\n", (sizeof(test_suite_tests) / sizeof(test_suite_tests[0]))); for (size_t i = 0 ; i < (sizeof(test_suite_tests) / sizeof(test_suite_tests[0])) ; i++) { int res = test_suite_tests[i].func(); if (res != 0) { retval = EXIT_FAILURE; fprintf(stdout, "not ok %zu " HEDLEY_STRINGIFY(SIMDE_TESTS_CURRENT_ISAX) "/%s\n", i + 1, test_suite_tests[i].name); } else { fprintf(stdout, "ok %zu " HEDLEY_STRINGIFY(SIMDE_TESTS_CURRENT_ISAX) "/%s\n", i + 1, test_suite_tests[i].name); } } return retval; } #else #if defined(__cplusplus) static MunitSuite suite = { const_cast("/" HEDLEY_STRINGIFY(SIMDE_TESTS_CURRENT_ISAX)), test_suite_tests, NULL, 1, MUNIT_SUITE_OPTION_NONE }; #else static MunitSuite suite = { (char*) "/" HEDLEY_STRINGIFY(SIMDE_TESTS_CURRENT_ISAX), test_suite_tests, NULL, 1, MUNIT_SUITE_OPTION_NONE }; #endif HEDLEY_C_DECL MunitSuite* SIMDE_TEST_GENERATE_VARIANT_SYMBOL_CURRENT(HEDLEY_CONCAT(simde_test_x86_get_suite_,SIMDE_TESTS_CURRENT_ISAX)) (void) { return &suite; } #endif simde-0.7.2/test/x86/test-x86.h000066400000000000000000000235701400333146700160100ustar00rootroot00000000000000#if !defined(SIMDE_TEST_X86_TEST_X86_H) #define SIMDE_TEST_X86_TEST_X86_H #include "../test.h" #define SIMDE_TEST_X86_GENERATE_FLOAT_TYPE_FUNCS_(NT, EL, EC, SF) \ static simde##NT \ simde_test_x86_random_f##EL##x##EC(simde_float##EL min, simde_float##EL max) { \ simde_float##EL values[sizeof(simde##NT) / sizeof(simde_float##EL)]; \ simde_test_codegen_random_vf##EL(sizeof(values) / sizeof(values[0]), values, min, max); \ simde##NT r; \ simde_memcpy(&r, values, sizeof(r)); \ return r; \ } \ \ static void \ simde_test_x86_write_f##EL##x##EC(int indent, simde##NT value, SimdeTestVecPos pos) { \ simde_float##EL values[sizeof(value) / sizeof(simde_float##EL)]; \ SF(values, value); \ simde_test_codegen_write_vf##EL(indent, sizeof(values) / sizeof(values[0]), values, pos); \ } \ \ static int \ simde_test_x86_assert_equal_f##EL##x##EC##_(simde##NT a, simde##NT b, simde_float##EL slop, \ const char* filename, int line, const char* astr, const char* bstr) { \ simde_float##EL \ a_[sizeof(a) / sizeof(simde_float##EL)], \ b_[sizeof(a) / sizeof(simde_float##EL)]; \ \ SF(a_, a); \ SF(b_, b); \ \ return simde_assert_equal_vf##EL##_(sizeof(a_) / sizeof(a_[0]), a_, b_, slop, filename, line, astr, bstr); \ } \ \ static void \ simde_test_x86_random_f##EL##x##EC##_full( \ size_t test_sets, size_t vectors_per_set, \ simde_float##EL values[HEDLEY_ARRAY_PARAM(test_sets * vectors_per_set * (sizeof(simde##NT) / sizeof(simde_float##EL)))], \ simde_float##EL min, simde_float##EL max, SimdeTestVecFloatType type) { \ simde_test_codegen_random_vf##EL##_full(test_sets, vectors_per_set, sizeof(simde##NT) / sizeof(simde_float##EL), values, min, max, type); \ } \ \ static simde##NT \ simde_test_x86_random_extract_f##EL##x##EC(size_t set_num, size_t vectors_per_set, size_t vector_num, simde_float##EL* values) { \ const size_t elem_cnt = sizeof(simde##NT) / sizeof(simde_float##EL); \ const size_t set_cnt = elem_cnt * vectors_per_set; \ simde##NT r; \ simde_memcpy(&r, &(values[(set_num * set_cnt) + (vector_num * elem_cnt)]), sizeof(r)); \ return r; \ } #define SIMDE_TEST_X86_GENERATE_INT_TYPE_FUNCS_(NT, EL, EC, SF) \ static simde##NT \ simde_test_x86_random_i##EL##x##EC(void) { \ simde##NT v; \ simde_test_codegen_random_memory(sizeof(v), HEDLEY_REINTERPRET_CAST(uint8_t*, &v)); \ return v; \ } \ \ static void \ simde_test_x86_write_i##EL##x##EC(int indent, simde##NT value, SimdeTestVecPos pos) { \ int##EL##_t value_[sizeof(value) / sizeof(int##EL##_t)]; \ SF(SIMDE_ALIGN_CAST(simde##NT*, value_), value); \ simde_test_codegen_write_vi##EL(indent, sizeof(value_) / sizeof(value_[0]), value_, pos); \ } \ \ static int \ simde_test_x86_assert_equal_i##EL##x##EC##_(simde##NT a, simde##NT b, \ const char* filename, int line, const char* astr, const char* bstr) { \ int##EL##_t a_[sizeof(a) / sizeof(int##EL##_t)], b_[sizeof(a) / sizeof(int##EL##_t)]; \ SF(SIMDE_ALIGN_CAST(simde##NT*, a_), a); \ SF(SIMDE_ALIGN_CAST(simde##NT*, b_), b); \ return simde_assert_equal_vi##EL##_(sizeof(a_) / sizeof(a_[0]), a_, b_, filename, line, astr, bstr); \ } \ \ static int \ simde_test_x86_assert_close_i##EL##x##EC##_(simde##NT a, simde##NT b, int##EL##_t slop, \ const char* filename, int line, const char* astr, const char* bstr) { \ int##EL##_t a_[sizeof(a) / sizeof(int##EL##_t)], b_[sizeof(a) / sizeof(int##EL##_t)]; \ SF(SIMDE_ALIGN_CAST(simde##NT*, a_), a); \ SF(SIMDE_ALIGN_CAST(simde##NT*, b_), b); \ return simde_assert_close_vi##EL##_(sizeof(a_) / sizeof(a_[0]), a_, b_, slop, filename, line, astr, bstr); \ } #define SIMDE_TEST_X86_GENERATE_UINT_TYPE_FUNCS_(NT, EL, EC, SF) \ static simde##NT \ simde_test_x86_random_u##EL##x##EC(void) { \ simde##NT v; \ simde_test_codegen_random_memory(sizeof(v), HEDLEY_REINTERPRET_CAST(uint8_t*, &v)); \ return v; \ } \ \ static void \ simde_test_x86_write_u##EL##x##EC(int indent, simde##NT value, SimdeTestVecPos pos) { \ uint##EL##_t value_[sizeof(value) / sizeof(int##EL##_t)]; \ SF(SIMDE_ALIGN_CAST(simde##NT*, value_), value); \ simde_test_codegen_write_vu##EL(indent, sizeof(value_) / sizeof(value_[0]), value_, pos); \ } \ \ static int \ simde_test_x86_assert_equal_u##EL##x##EC##_(simde##NT a, simde##NT b, \ const char* filename, int line, const char* astr, const char* bstr) { \ uint##EL##_t a_[sizeof(a) / sizeof(int##EL##_t)], b_[sizeof(a) / sizeof(int##EL##_t)]; \ SF(SIMDE_ALIGN_CAST(simde##NT*, a_), a); \ SF(SIMDE_ALIGN_CAST(simde##NT*, b_), b); \ return simde_assert_equal_vu##EL##_(sizeof(a_) / sizeof(a_[0]), a_, b_, filename, line, astr, bstr); \ } \ \ static int \ simde_test_x86_assert_close_u##EL##x##EC##_(simde##NT a, simde##NT b, uint##EL##_t slop, \ const char* filename, int line, const char* astr, const char* bstr) { \ uint##EL##_t a_[sizeof(a) / sizeof(int##EL##_t)], b_[sizeof(a) / sizeof(int##EL##_t)]; \ SF(SIMDE_ALIGN_CAST(simde##NT*, a_), a); \ SF(SIMDE_ALIGN_CAST(simde##NT*, b_), b); \ return simde_assert_close_vu##EL##_(sizeof(a_) / sizeof(a_[0]), a_, b_, slop, filename, line, astr, bstr); \ } /* For compatibility only. Note that the operator is assumed to be == */ #define simde_assert_m64_i8(a, op, b) simde_test_x86_assert_equal_i8x8(a, b) #define simde_assert_m64_i16(a, op, b) simde_test_x86_assert_equal_i16x4(a, b) #define simde_assert_m64_i32(a, op, b) simde_test_x86_assert_equal_i32x2(a, b) #define simde_assert_m64_i64(a, op, b) simde_test_x86_assert_equal_i64x1(a, b) #define simde_assert_m64_u8(a, op, b) simde_test_x86_assert_equal_u8x8(a, b) #define simde_assert_m64_u16(a, op, b) simde_test_x86_assert_equal_u16x4(a, b) #define simde_assert_m64_u32(a, op, b) simde_test_x86_assert_equal_u32x2(a, b) #define simde_assert_m64_u64(a, op, b) simde_test_x86_assert_equal_u64x1(a, b) #define simde_assert_m128i_i8(a, op, b) simde_test_x86_assert_equal_i8x16(a, b) #define simde_assert_m128i_i16(a, op, b) simde_test_x86_assert_equal_i16x8(a, b) #define simde_assert_m128i_i32(a, op, b) simde_test_x86_assert_equal_i32x4(a, b) #define simde_assert_m128i_i64(a, op, b) simde_test_x86_assert_equal_i64x2(a, b) #define simde_assert_m128i_u8(a, op, b) simde_test_x86_assert_equal_u8x16(a, b) #define simde_assert_m128i_u16(a, op, b) simde_test_x86_assert_equal_u16x8(a, b) #define simde_assert_m128i_u32(a, op, b) simde_test_x86_assert_equal_u32x4(a, b) #define simde_assert_m128i_u64(a, op, b) simde_test_x86_assert_equal_u64x2(a, b) #define simde_assert_m128_close(a, b, precision) simde_test_x86_assert_equal_f32x4(a, b, precision) #define simde_assert_m128d_close(a, b, precision) simde_test_x86_assert_equal_f64x2(a, b, precision) #define simde_assert_m128_equal(a, b) simde_test_x86_assert_equal_i32x4(simde_mm_castps_si128(a), simde_mm_castps_si128(b)) #define simde_assert_m128d_equal(a, b) simde_test_x86_assert_equal_i32x4(simde_mm_castpd_si128(a), simde_mm_castpd_si128(b)) #define simde_assert_m128i_equal(a, b) simde_test_x86_assert_equal_i32x4(a, b) #define simde_assert_m256i_i8(a, op, b) simde_test_x86_assert_equal_i8x32(a, b) #define simde_assert_m256i_i16(a, op, b) simde_test_x86_assert_equal_i16x16(a, b) #define simde_assert_m256i_i32(a, op, b) simde_test_x86_assert_equal_i32x8(a, b) #define simde_assert_m256i_i64(a, op, b) simde_test_x86_assert_equal_i64x4(a, b) #define simde_assert_m256i_u8(a, op, b) simde_test_x86_assert_equal_u8x32(a, b) #define simde_assert_m256i_u16(a, op, b) simde_test_x86_assert_equal_u16x16(a, b) #define simde_assert_m256i_u32(a, op, b) simde_test_x86_assert_equal_u32x8(a, b) #define simde_assert_m256i_u64(a, op, b) simde_test_x86_assert_equal_u64x4(a, b) #define simde_assert_m256_close(a, b, precision) simde_test_x86_assert_equal_f32x8(a, b, precision) #define simde_assert_m256d_close(a, b, precision) simde_test_x86_assert_equal_f64x4(a, b, precision) #define simde_assert_m256_equal(a, b) simde_test_x86_assert_equal_i32x8(simde_mm_castps_si128(a), simde_mm_castps_si128(b)) #define simde_assert_m256d_equal(a, b) simde_test_x86_assert_equal_i32x8(simde_mm_castpd_si256(a), simde_mm_castpd_si256(b)) #define simde_assert_m256i_equal(a, b) simde_test_x86_assert_equal_i32x8(a, b) #define simde_assert_m512i_i8(a, op, b) simde_test_x86_assert_equal_i8x64(a, b) #define simde_assert_m512i_i16(a, op, b) simde_test_x86_assert_equal_i16x32(a, b) #define simde_assert_m512i_i32(a, op, b) simde_test_x86_assert_equal_i32x16(a, b) #define simde_assert_m512i_i64(a, op, b) simde_test_x86_assert_equal_i64x8(a, b) #define simde_assert_m512i_u8(a, op, b) simde_test_x86_assert_equal_u8x64(a, b) #define simde_assert_m512i_u16(a, op, b) simde_test_x86_assert_equal_u16x32(a, b) #define simde_assert_m512i_u32(a, op, b) simde_test_x86_assert_equal_u32x16(a, b) #define simde_assert_m512i_u64(a, op, b) simde_test_x86_assert_equal_u64x8(a, b) #define simde_assert_m512_close(a, b, precision) simde_test_x86_assert_equal_f32x16(a, b, precision) #define simde_assert_m512d_close(a, b, precision) simde_test_x86_assert_equal_f64x8(a, b, precision) #define simde_assert_m512_equal(a, b) simde_test_x86_assert_equal_i32x16(simde_mm_castps_si128(a), simde_mm_castps_si128(b)) #define simde_assert_m512d_equal(a, b) simde_test_x86_assert_equal_i32x16(simde_mm_castpd_si512(a), simde_mm_castpd_si512(b)) #define simde_assert_m512i_equal(a, b) simde_test_x86_assert_equal_i32x16(a, b) #define simde_assert_equal_mmask8(a, b) simde_assert_equal_u8(a, b) #define simde_assert_equal_mmask16(a, b) simde_assert_equal_u16(a, b) #define simde_assert_equal_mmask32(a, b) simde_assert_equal_u32(a, b) #define simde_assert_equal_mmask64(a, b) simde_assert_equal_u64(a, b) #if !defined(SIMDE_TEST_BARE) #define SIMDE_TEST_DECLARE_SUITE(name) SIMDE_TEST_SUITE_DECLARE_GETTERS(HEDLEY_CONCAT(simde_test_x86_get_suite_,name)) #include "declare-suites.h" #undef SIMDE_TEST_DECLARE_SUITE #endif #endif /* !defined(SIMDE_TEST_X86_TEST_X86_H) */ simde-0.7.2/test/x86/xop.c000066400000000000000000017623111400333146700152130ustar00rootroot00000000000000/* Copyright (c) 2020 Evan Nemerson * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define SIMDE_TESTS_CURRENT_ISAX xop #include #include static int test_simde_mm_cmov_si128 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int32_t a[4]; const int32_t b[4]; const int32_t c[4]; const int32_t r[4]; } test_vec[] = { { { INT32_C( 500254467), -INT32_C( 199025237), -INT32_C( 819815926), -INT32_C( 1605995992) }, { -INT32_C( 723927998), INT32_C( 2062677536), -INT32_C( 1465824605), INT32_C( 443551646) }, { INT32_C( 487232486), -INT32_C( 1422518099), -INT32_C( 670557173), INT32_C( 1834540766) }, { -INT32_C( 573493502), -INT32_C( 253493591), -INT32_C( 392078678), INT32_C( 845173512) } }, { { -INT32_C( 764207660), INT32_C( 823075782), -INT32_C( 662371768), -INT32_C( 549839234) }, { INT32_C( 190769969), -INT32_C( 203753439), INT32_C( 1635986845), -INT32_C( 74165934) }, { INT32_C( 593219169), INT32_C( 456309172), INT32_C( 1840319889), -INT32_C( 1977469073) }, { INT32_C( 173485392), -INT32_C( 238395003), INT32_C( 1216552972), -INT32_C( 71936386) } }, { { -INT32_C( 1321871351), INT32_C( 318162610), INT32_C( 888886504), -INT32_C( 1797074133) }, { -INT32_C( 1879353328), -INT32_C( 2126397295), INT32_C( 797962919), INT32_C( 1645241371) }, { -INT32_C( 705109608), INT32_C( 1366395142), INT32_C( 400038163), INT32_C( 2116709394) }, { -INT32_C( 1691101176), -INT32_C( 1871658861), INT32_C( 1021309604), INT32_C( 338837515) } }, { { -INT32_C( 575061495), -INT32_C( 1423687419), -INT32_C( 444253775), -INT32_C( 1326666713) }, { INT32_C( 1074630150), -INT32_C( 130725435), -INT32_C( 746388409), INT32_C( 39069103) }, { -INT32_C( 671148939), -INT32_C( 507297094), INT32_C( 1509780440), -INT32_C( 1130489794) }, { -INT32_C( 709260797), -INT32_C( 1187755707), -INT32_C( 1014551145), -INT32_C( 1295241817) } }, { { -INT32_C( 1159895173), INT32_C( 1328090776), INT32_C( 811769515), INT32_C( 1749550505) }, { INT32_C( 1127503740), INT32_C( 1795608149), INT32_C( 1365118788), -INT32_C( 837833210) }, { INT32_C( 198243614), -INT32_C( 1640636121), -INT32_C( 169972120), -INT32_C( 118549335) }, { INT32_C( 1257531258), INT32_C( 1864568400), INT32_C( 809635628), INT32_C( 1850257071) } }, { { -INT32_C( 356543931), -INT32_C( 1865028991), -INT32_C( 1777292004), INT32_C( 774207527) }, { -INT32_C( 95437033), INT32_C( 742508825), -INT32_C( 329188187), -INT32_C( 97559722) }, { -INT32_C( 1811264007), -INT32_C( 1709661980), -INT32_C( 260321911), INT32_C( 617184842) }, { -INT32_C( 363875769), -INT32_C( 1269700199), -INT32_C( 1676624596), -INT32_C( 30967530) } }, { { INT32_C( 1354780047), INT32_C( 1110822678), -INT32_C( 615591541), -INT32_C( 1242439335) }, { INT32_C( 142348735), -INT32_C( 810027226), INT32_C( 638056626), -INT32_C( 136001769) }, { -INT32_C( 712234834), INT32_C( 687471304), -INT32_C( 1961790398), INT32_C( 1914613593) }, { INT32_C( 1492130207), -INT32_C( 952643802), -INT32_C( 1358505806), -INT32_C( 1242504865) } }, { { INT32_C( 276653248), INT32_C( 2023422192), INT32_C( 35435968), -INT32_C( 262382795) }, { INT32_C( 1360844699), INT32_C( 645288660), -INT32_C( 1005425167), INT32_C( 2040318975) }, { INT32_C( 1920323875), -INT32_C( 592860694), INT32_C( 1243778522), INT32_C( 1461404237) }, { INT32_C( 293422744), INT32_C( 2061433588), -INT32_C( 2045712927), INT32_C( 2023541687) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi32(test_vec[i].b); simde__m128i c = simde_mm_loadu_epi32(test_vec[i].c); simde__m128i r = simde_mm_cmov_si128(a, b, c); simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i32x4(); simde__m128i b = simde_test_x86_random_i32x4(); simde__m128i c = simde_test_x86_random_i32x4(); simde__m128i r = simde_mm_cmov_si128(a, b, c); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_cmov_si256 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int32_t a[8]; const int32_t b[8]; const int32_t c[8]; const int32_t r[8]; } test_vec[] = { { { -INT32_C( 2048228304), -INT32_C( 943376558), INT32_C( 916505603), -INT32_C( 1560329864), INT32_C( 1119380968), INT32_C( 798876684), INT32_C( 882820511), -INT32_C( 2078087596) }, { -INT32_C( 1123480214), INT32_C( 1199885892), INT32_C( 175973522), INT32_C( 1303149925), -INT32_C( 292526622), -INT32_C( 400741047), INT32_C( 1193065714), INT32_C( 231423906) }, { -INT32_C( 1865755316), INT32_C( 920145571), -INT32_C( 666872462), -INT32_C( 1272582702), -INT32_C( 1683835566), -INT32_C( 712785694), INT32_C( 505192316), INT32_C( 707520478) }, { -INT32_C( 1379169246), INT32_C( 1204061766), INT32_C( 306013314), -INT32_C( 374517387), INT32_C( 1722836448), INT32_C( 765324553), INT32_C( 1427940766), INT32_C( 98762356) } }, { { INT32_C( 1622930876), -INT32_C( 1231645885), -INT32_C( 1148266776), INT32_C( 359642051), INT32_C( 1269830249), INT32_C( 1310733266), -INT32_C( 1318306605), -INT32_C( 522414044) }, { -INT32_C( 784296051), INT32_C( 310892074), INT32_C( 1892488621), INT32_C( 830881224), INT32_C( 561854031), INT32_C( 1030724970), -INT32_C( 17900582), INT32_C( 31378035) }, { -INT32_C( 1949163679), -INT32_C( 1566680587), INT32_C( 907242350), -INT32_C( 144139864), INT32_C( 957933007), INT32_C( 1551271810), -INT32_C( 698718877), -INT32_C( 1848166097) }, { INT32_C( 1351783852), -INT32_C( 1298688181), INT32_C( 1926125289), INT32_C( 367965632), INT32_C( 158667337), INT32_C( 1831410666), -INT32_C( 1192444197), -INT32_C( 2133009804) } }, { { INT32_C( 1293723992), INT32_C( 1894824450), -INT32_C( 827915482), INT32_C( 1808142236), INT32_C( 1990582004), -INT32_C( 908911770), -INT32_C( 1331745408), -INT32_C( 1103005850) }, { INT32_C( 588012832), INT32_C( 1049885720), -INT32_C( 1693697281), INT32_C( 1023857225), INT32_C( 397716401), INT32_C( 1205897159), INT32_C( 469204916), INT32_C( 400111862) }, { -INT32_C( 1371871850), -INT32_C( 521351711), INT32_C( 1367078920), INT32_C( 2089779915), -INT32_C( 191675603), INT32_C( 2117891018), -INT32_C( 375835662), INT32_C( 33583980) }, { INT32_C( 219986224), INT32_C( 2129901592), -INT32_C( 903413001), INT32_C( 1770410632), INT32_C( 2007362228), INT32_C( 1238406983), -INT32_C( 1291883132), INT32_C( 400129014) } }, { { INT32_C( 984627800), INT32_C( 253402119), INT32_C( 1600231060), INT32_C( 1172041752), -INT32_C( 46502349), -INT32_C( 730106143), INT32_C( 364713385), -INT32_C( 535315064) }, { -INT32_C( 31799305), -INT32_C( 133286557), -INT32_C( 480809013), -INT32_C( 1842859169), -INT32_C( 2087755103), -INT32_C( 2108224808), -INT32_C( 1483271137), -INT32_C( 930566191) }, { -INT32_C( 624516490), -INT32_C( 1563241257), -INT32_C( 1568331453), -INT32_C( 30101924), -INT32_C( 377371888), -INT32_C( 311699250), -INT32_C( 1114307860), INT32_C( 679812529) }, { INT32_C( 1050175441), INT32_C( 1511961895), INT32_C( 1129531016), INT32_C( 1142468891), -INT32_C( 351378767), -INT32_C( 964734256), INT32_C( 395777211), -INT32_C( 535969856) } }, { { -INT32_C( 1761457217), INT32_C( 1664734240), INT32_C( 1510391805), INT32_C( 2102934125), -INT32_C( 865674754), -INT32_C( 1648766543), -INT32_C( 2057679148), INT32_C( 732815211) }, { INT32_C( 1254272810), -INT32_C( 2119238781), INT32_C( 668710074), -INT32_C( 324717586), -INT32_C( 1112012020), -INT32_C( 1319472419), INT32_C( 741782720), -INT32_C( 1118313325) }, { INT32_C( 386406803), -INT32_C( 812075500), INT32_C( 1492546410), -INT32_C( 1304061018), -INT32_C( 2073035353), INT32_C( 808831343), INT32_C( 274492285), -INT32_C( 473058481) }, { INT32_C( 1606594491), INT32_C( 1128193411), INT32_C( 2131736568), INT32_C( 2095069804), -INT32_C( 1107895378), -INT32_C( 1854147663), INT32_C( 746249940), INT32_C( 1067440091) } }, { { -INT32_C( 520432180), -INT32_C( 173043061), -INT32_C( 1420909051), -INT32_C( 396520639), INT32_C( 7130256), INT32_C( 305176981), INT32_C( 1545767949), INT32_C( 188739391) }, { INT32_C( 1357593029), -INT32_C( 800678965), -INT32_C( 2105830335), -INT32_C( 1217669081), INT32_C( 968349604), -INT32_C( 2041845895), -INT32_C( 1293783693), INT32_C( 582820445) }, { INT32_C( 661825883), -INT32_C( 2047363004), INT32_C( 1929868108), -INT32_C( 265588149), -INT32_C( 1020665270), INT32_C( 1028224458), INT32_C( 1072704482), -INT32_C( 1453216435) }, { INT32_C( 1895535052), -INT32_C( 710438005), -INT32_C( 1551981051), -INT32_C( 414606747), INT32_C( 952030628), -INT32_C( 1845320783), -INT32_C( 1675473903), INT32_C( 196980509) } }, { { -INT32_C( 1697590186), -INT32_C( 652228724), -INT32_C( 2041829574), -INT32_C( 478775399), INT32_C( 614899546), -INT32_C( 144576748), INT32_C( 1748390171), INT32_C( 1427216639) }, { -INT32_C( 118431380), -INT32_C( 472838231), -INT32_C( 798417354), -INT32_C( 273424491), -INT32_C( 1827448193), INT32_C( 1686795593), -INT32_C( 976371258), -INT32_C( 988029351) }, { INT32_C( 1773996992), INT32_C( 1363971866), INT32_C( 1109505709), INT32_C( 338875541), INT32_C( 2024293678), -INT32_C( 2116275526), INT32_C( 1279699443), INT32_C( 1209098632) }, { -INT32_C( 1731141524), -INT32_C( 207779927), -INT32_C( 1840763334), -INT32_C( 340566127), -INT32_C( 1548542117), -INT32_C( 448633007), -INT32_C( 913358569), -INT32_C( 988045607) } }, { { -INT32_C( 2018390164), INT32_C( 198770014), INT32_C( 1229846963), -INT32_C( 60981299), INT32_C( 2138310085), INT32_C( 721440824), -INT32_C( 2106046470), INT32_C( 365595048) }, { -INT32_C( 1231258792), INT32_C( 750875768), INT32_C( 980750189), INT32_C( 1396101774), INT32_C( 282307288), -INT32_C( 180628485), -INT32_C( 1015565542), -INT32_C( 1780989635) }, { INT32_C( 877425852), INT32_C( 1432358376), -INT32_C( 1416637156), -INT32_C( 2130786648), INT32_C( 1804652912), -INT32_C( 1084175451), -INT32_C( 1132275841), -INT32_C( 732866024) }, { -INT32_C( 2037362836), INT32_C( 700546392), INT32_C( 427678577), -INT32_C( 748923250), INT32_C( 2069048264), INT32_C( 1796952186), -INT32_C( 1015715974), INT32_C( 365431085) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256i a = simde_mm256_loadu_epi32(test_vec[i].a); simde__m256i b = simde_mm256_loadu_epi32(test_vec[i].b); simde__m256i c = simde_mm256_loadu_epi32(test_vec[i].c); simde__m256i r = simde_mm256_cmov_si256(a, b, c); simde_test_x86_assert_equal_i32x8(r, simde_mm256_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256i a = simde_test_x86_random_i32x8(); simde__m256i b = simde_test_x86_random_i32x8(); simde__m256i c = simde_test_x86_random_i32x8(); simde__m256i r = simde_mm256_cmov_si256(a, b, c); simde_test_x86_write_i32x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comeq_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t a[16]; const int8_t b[16]; const int8_t r[16]; } test_vec[] = { { { INT8_C( 9), INT8_C( 95), INT8_C( 12), -INT8_C( 23), -INT8_C( 38), -INT8_C( 37), INT8_C( 12), -INT8_C( 46), -INT8_C( 96), INT8_C( 66), -INT8_C( 19), INT8_C( 41), -INT8_C( 21), -INT8_C( 79), -INT8_C( 19), -INT8_C( 122) }, { INT8_C( 9), INT8_C( 95), INT8_C( 12), -INT8_C( 23), -INT8_C( 38), -INT8_C( 37), INT8_C( 12), -INT8_C( 46), -INT8_C( 96), INT8_C( 66), -INT8_C( 19), INT8_C( 41), -INT8_C( 21), -INT8_C( 79), -INT8_C( 19), -INT8_C( 122) }, { -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1) } }, { { INT8_C( 3), INT8_C( 38), -INT8_C( 98), INT8_C( 126), INT8_C( 51), -INT8_C( 121), -INT8_C( 33), -INT8_C( 55), INT8_C( 103), -INT8_C( 25), INT8_C( 11), -INT8_C( 92), -INT8_C( 6), INT8_C( 6), INT8_C( 112), INT8_C( 117) }, { -INT8_C( 55), INT8_C( 22), INT8_C( 3), -INT8_C( 82), -INT8_C( 116), -INT8_C( 59), INT8_C( 81), -INT8_C( 13), INT8_C( 102), INT8_C( 119), -INT8_C( 71), INT8_MAX, -INT8_C( 70), -INT8_C( 45), -INT8_C( 39), -INT8_C( 60) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 65), -INT8_C( 126), INT8_C( 70), INT8_C( 81), INT8_C( 89), INT8_C( 51), INT8_MIN, -INT8_C( 7), -INT8_C( 96), -INT8_C( 29), INT8_C( 37), INT8_C( 50), -INT8_C( 62), INT8_C( 91), INT8_C( 100), -INT8_C( 69) }, { -INT8_C( 113), -INT8_C( 6), -INT8_C( 75), -INT8_C( 126), INT8_C( 81), -INT8_C( 76), -INT8_C( 57), -INT8_C( 60), INT8_C( 92), INT8_C( 91), -INT8_C( 70), INT8_C( 118), -INT8_C( 26), INT8_C( 29), -INT8_C( 113), -INT8_C( 28) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 25), INT8_C( 79), -INT8_C( 35), -INT8_C( 104), INT8_C( 103), INT8_C( 105), -INT8_C( 59), INT8_C( 125), INT8_C( 30), INT8_C( 81), INT8_C( 39), INT8_C( 68), INT8_C( 60), -INT8_C( 36), INT8_C( 22), INT8_C( 7) }, { -INT8_C( 124), -INT8_C( 110), -INT8_C( 118), INT8_C( 44), -INT8_C( 47), -INT8_C( 79), INT8_C( 85), -INT8_C( 115), -INT8_C( 85), INT8_C( 117), INT8_C( 89), -INT8_C( 43), INT8_C( 24), -INT8_C( 105), -INT8_C( 9), INT8_C( 45) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { -INT8_C( 76), INT8_C( 19), -INT8_C( 53), INT8_C( 25), INT8_C( 4), INT8_C( 47), -INT8_C( 106), -INT8_C( 100), INT8_C( 8), -INT8_C( 71), INT8_C( 123), -INT8_C( 93), INT8_C( 15), -INT8_C( 112), INT8_C( 111), -INT8_C( 95) }, { -INT8_C( 49), INT8_C( 102), -INT8_C( 21), INT8_C( 118), -INT8_C( 75), -INT8_C( 61), -INT8_C( 29), -INT8_C( 107), INT8_C( 122), INT8_C( 77), -INT8_C( 4), INT8_C( 102), -INT8_C( 9), INT8_C( 72), -INT8_C( 6), -INT8_C( 23) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 58), INT8_C( 89), INT8_C( 119), -INT8_C( 99), -INT8_C( 39), -INT8_C( 113), -INT8_C( 37), -INT8_C( 96), -INT8_C( 122), -INT8_C( 93), -INT8_C( 120), INT8_C( 23), -INT8_C( 28), INT8_MAX, INT8_C( 85), -INT8_C( 48) }, { -INT8_C( 102), -INT8_C( 2), INT8_C( 62), INT8_C( 38), -INT8_C( 91), -INT8_C( 13), INT8_C( 90), INT8_C( 37), -INT8_C( 14), INT8_C( 108), INT8_C( 13), -INT8_C( 17), INT8_C( 44), INT8_C( 56), INT8_C( 126), INT8_C( 94) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { -INT8_C( 45), -INT8_C( 90), INT8_C( 75), -INT8_C( 21), -INT8_C( 114), -INT8_C( 112), INT8_C( 124), -INT8_C( 49), -INT8_C( 62), -INT8_C( 104), -INT8_C( 74), INT8_C( 104), INT8_C( 99), -INT8_C( 78), -INT8_C( 79), -INT8_C( 35) }, { INT8_C( 10), -INT8_C( 29), -INT8_C( 20), INT8_C( 5), INT8_C( 72), INT8_C( 72), -INT8_C( 96), -INT8_C( 123), INT8_C( 59), INT8_C( 89), -INT8_C( 14), INT8_C( 57), INT8_C( 94), INT8_C( 111), INT8_C( 108), -INT8_C( 42) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { -INT8_C( 91), -INT8_C( 124), -INT8_C( 82), -INT8_C( 51), -INT8_C( 54), INT8_C( 58), INT8_C( 97), INT8_C( 114), -INT8_C( 31), INT8_C( 31), INT8_C( 109), INT8_C( 93), INT8_C( 51), INT8_C( 48), INT8_C( 108), INT8_C( 16) }, { INT8_C( 73), -INT8_C( 100), INT8_C( 94), -INT8_C( 37), INT8_C( 71), -INT8_C( 54), -INT8_C( 98), -INT8_C( 3), INT8_C( 124), -INT8_C( 100), INT8_C( 21), INT8_C( 12), INT8_C( 54), -INT8_C( 10), -INT8_C( 83), -INT8_C( 105) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_comeq_epi8(a, b); simde_test_x86_assert_equal_i8x16(r, simde_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i8x16(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i8x16(); simde__m128i r = simde_mm_comeq_epi8(a, b); simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comeq_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[8]; const int16_t b[8]; const int16_t r[8]; } test_vec[] = { { { INT16_C( 31451), INT16_C( 2312), -INT16_C( 27083), -INT16_C( 12173), -INT16_C( 16114), -INT16_C( 16619), INT16_C( 509), -INT16_C( 20116) }, { INT16_C( 31451), INT16_C( 2312), -INT16_C( 27083), -INT16_C( 12173), -INT16_C( 16114), -INT16_C( 16619), INT16_C( 509), -INT16_C( 20116) }, { -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1) } }, { { -INT16_C( 20097), INT16_C( 28923), -INT16_C( 32439), -INT16_C( 10948), -INT16_C( 16675), INT16_C( 12508), -INT16_C( 11172), -INT16_C( 5849) }, { INT16_C( 4507), INT16_C( 26818), -INT16_C( 21640), INT16_C( 13208), INT16_C( 1391), INT16_C( 22038), INT16_C( 11880), INT16_C( 2203) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 21805), -INT16_C( 29910), INT16_C( 30455), INT16_C( 21603), INT16_C( 18055), INT16_C( 27825), INT16_C( 4471), INT16_C( 5828) }, { -INT16_C( 12531), -INT16_C( 19902), -INT16_C( 32197), -INT16_C( 7751), INT16_C( 29484), -INT16_C( 5415), -INT16_C( 16867), INT16_C( 24032) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 4206), -INT16_C( 2475), -INT16_C( 20231), -INT16_C( 15367), -INT16_C( 17246), -INT16_C( 30213), -INT16_C( 18897), INT16_C( 25963) }, { INT16_C( 16219), -INT16_C( 20495), INT16_C( 8485), INT16_C( 9151), -INT16_C( 27794), INT16_C( 17348), -INT16_C( 17981), -INT16_C( 2270) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 27512), INT16_C( 30691), INT16_C( 14326), INT16_C( 27367), -INT16_C( 22187), INT16_C( 20513), -INT16_C( 14034), INT16_C( 7428) }, { -INT16_C( 6486), INT16_C( 9783), -INT16_C( 27937), INT16_C( 16784), -INT16_C( 4515), INT16_C( 10304), INT16_C( 10243), INT16_C( 8011) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 16664), -INT16_C( 10436), INT16_C( 5271), -INT16_C( 28394), -INT16_C( 27447), -INT16_C( 31091), INT16_C( 21785), -INT16_C( 30858) }, { INT16_C( 20005), -INT16_C( 7811), -INT16_C( 8943), -INT16_C( 31978), INT16_C( 2848), INT16_C( 25526), INT16_C( 4741), INT16_C( 7490) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 7286), -INT16_C( 8760), INT16_C( 20099), -INT16_C( 32659), INT16_C( 1318), -INT16_C( 2394), INT16_C( 24475), -INT16_C( 5205) }, { -INT16_C( 271), -INT16_C( 22999), INT16_C( 2146), INT16_C( 12599), INT16_C( 29408), -INT16_C( 17780), INT16_C( 32752), INT16_C( 14831) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 31813), INT16_C( 21486), -INT16_C( 4766), INT16_C( 32467), -INT16_C( 31597), INT16_C( 26580), -INT16_C( 3750), -INT16_C( 28278) }, { INT16_C( 32313), INT16_C( 16037), INT16_C( 7546), -INT16_C( 27939), -INT16_C( 21308), -INT16_C( 2516), INT16_C( 30701), -INT16_C( 17606) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_comeq_epi16(a, b); simde_test_x86_assert_equal_i16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i16x8(); simde__m128i r = simde_mm_comeq_epi16(a, b); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comeq_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int32_t a[4]; const int32_t b[4]; const int32_t r[4]; } test_vec[] = { { { -INT32_C( 15073521), -INT32_C( 718079266), -INT32_C( 1602250184), -INT32_C( 1057287426) }, { -INT32_C( 15073521), -INT32_C( 718079266), -INT32_C( 1602250184), -INT32_C( 1057287426) }, { -INT32_C( 1), -INT32_C( 1), -INT32_C( 1), -INT32_C( 1) } }, { { INT32_C( 1901549347), -INT32_C( 269540606), -INT32_C( 537116427), -INT32_C( 368377053) }, { -INT32_C( 1414654055), -INT32_C( 881755235), INT32_C( 1779420734), -INT32_C( 1041839312) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 111743191), -INT32_C( 1873468510), INT32_C( 1013364378), INT32_C( 256518715) }, { INT32_C( 488002434), -INT32_C( 1451721913), -INT32_C( 1913869875), -INT32_C( 583851112) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { { -INT32_C( 956157570), -INT32_C( 1107876863), -INT32_C( 674195492), INT32_C( 1927280092) }, { INT32_C( 515455985), -INT32_C( 1221591163), -INT32_C( 780160500), INT32_C( 1506484957) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 2060010048), -INT32_C( 1094897976), INT32_C( 1990486755), INT32_C( 1538419375) }, { INT32_C( 1979668495), INT32_C( 1014895103), -INT32_C( 30510812), -INT32_C( 2137730905) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { { -INT32_C( 395785148), -INT32_C( 628143714), -INT32_C( 498485290), INT32_C( 313300059) }, { -INT32_C( 330223356), -INT32_C( 2108835748), -INT32_C( 614306243), -INT32_C( 1720240480) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { { -INT32_C( 649701453), INT32_C( 1532329516), -INT32_C( 483981347), -INT32_C( 558663543) }, { INT32_C( 1226133239), -INT32_C( 813367996), INT32_C( 830747261), -INT32_C( 346268818) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 365652404), -INT32_C( 2013801446), INT32_C( 1084678178), INT32_C( 162537697) }, { INT32_C( 1437939728), INT32_C( 1812719201), -INT32_C( 923122673), -INT32_C( 1087617095) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_comeq_epi32(a, b); simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i32x4(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i32x4(); simde__m128i r = simde_mm_comeq_epi32(a, b); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comeq_epi64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int64_t a[2]; const int64_t b[2]; const int64_t r[2]; } test_vec[] = { { { -INT64_C( 6386856271473046416), -INT64_C( 2770677996306117174) }, { -INT64_C( 6386856271473046416), -INT64_C( 2770677996306117174) }, { -INT64_C( 1), -INT64_C( 1) } }, { { -INT64_C( 6788474568251574663), INT64_C( 6759062096424348697) }, { INT64_C( 175365179203798799), -INT64_C( 7553141662136446208) }, { INT64_C( 0), INT64_C( 0) } }, { { -INT64_C( 5356527267312463785), INT64_C( 948454573608432580) }, { -INT64_C( 2155271835253202248), -INT64_C( 8467335260177564438) }, { INT64_C( 0), INT64_C( 0) } }, { { -INT64_C( 967758818504798251), INT64_C( 1668311107114934327) }, { -INT64_C( 8270831391516285293), -INT64_C( 260363254095917136) }, { INT64_C( 0), INT64_C( 0) } }, { { -INT64_C( 7032190541284410856), -INT64_C( 4418485408765625445) }, { INT64_C( 5530014961812003529), INT64_C( 3730934318756385915) }, { INT64_C( 0), INT64_C( 0) } }, { { INT64_C( 150940866752567627), INT64_C( 6388544662910520343) }, { INT64_C( 7462839796774061954), INT64_C( 8687675780418114675) }, { INT64_C( 0), INT64_C( 0) } }, { { INT64_C( 7388068351796983188), INT64_C( 2304976088930770132) }, { INT64_C( 2780819029497181157), INT64_C( 1416578837790889663) }, { INT64_C( 0), INT64_C( 0) } }, { { INT64_C( 1269734626711540763), INT64_C( 6958671260799070969) }, { -INT64_C( 3334082415450527717), INT64_C( 5545782954997113480) }, { INT64_C( 0), INT64_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_comeq_epi64(a, b); simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i64x2(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i64x2(); simde__m128i r = simde_mm_comeq_epi64(a, b); simde_test_x86_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comeq_epu8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint8_t a[16]; const uint8_t b[16]; const uint8_t r[16]; } test_vec[] = { { { UINT8_C( 9), UINT8_C( 1), UINT8_C( 63), UINT8_C( 11), UINT8_C( 67), UINT8_C( 52), UINT8_C( 70), UINT8_C( 77), UINT8_C(175), UINT8_C(151), UINT8_C(100), UINT8_C( 94), UINT8_C(124), UINT8_C(113), UINT8_C( 82), UINT8_C( 98) }, { UINT8_C( 9), UINT8_C( 1), UINT8_C( 63), UINT8_C( 11), UINT8_C( 67), UINT8_C( 52), UINT8_C( 70), UINT8_C( 77), UINT8_C(175), UINT8_C(151), UINT8_C(100), UINT8_C( 94), UINT8_C(124), UINT8_C(113), UINT8_C( 82), UINT8_C( 98) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C( 75), UINT8_C(174), UINT8_C(221), UINT8_C( 60), UINT8_C(135), UINT8_C(223), UINT8_C(225), UINT8_C(175), UINT8_C(245), UINT8_C(129), UINT8_C(116), UINT8_C(195), UINT8_C(140), UINT8_C(130), UINT8_C( 98), UINT8_C(107) }, { UINT8_C(134), UINT8_C( 1), UINT8_C(218), UINT8_C(100), UINT8_C(183), UINT8_C( 86), UINT8_C(102), UINT8_C( 96), UINT8_C( 30), UINT8_C( 82), UINT8_C( 32), UINT8_C(100), UINT8_C(186), UINT8_C(199), UINT8_C( 36), UINT8_C(111) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(158), UINT8_C( 75), UINT8_C( 65), UINT8_C( 92), UINT8_C( 72), UINT8_C( 58), UINT8_C(244), UINT8_C( 9), UINT8_C( 47), UINT8_C(250), UINT8_C(150), UINT8_C(185), UINT8_C(155), UINT8_C(131), UINT8_C(212), UINT8_C(183) }, { UINT8_C(185), UINT8_C( 30), UINT8_C( 96), UINT8_C( 61), UINT8_C(239), UINT8_C(108), UINT8_C(230), UINT8_C(147), UINT8_C(109), UINT8_C(171), UINT8_C( 66), UINT8_C(123), UINT8_C( 5), UINT8_C( 53), UINT8_C(240), UINT8_C(205) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 60), UINT8_C( 73), UINT8_C(195), UINT8_C( 95), UINT8_C(161), UINT8_C( 14), UINT8_C(218), UINT8_C( 39), UINT8_C( 93), UINT8_C(213), UINT8_C(209), UINT8_C(164), UINT8_C( 12), UINT8_C(159), UINT8_C( 52), UINT8_C(121) }, { UINT8_C(203), UINT8_C(223), UINT8_C( 57), UINT8_C( 92), UINT8_C(146), UINT8_C(129), UINT8_C(174), UINT8_C( 46), UINT8_C(197), UINT8_C( 42), UINT8_C( 48), UINT8_C(107), UINT8_C( 5), UINT8_C(194), UINT8_C(158), UINT8_C(197) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 76), UINT8_C( 48), UINT8_C(205), UINT8_C( 12), UINT8_C( 57), UINT8_C(100), UINT8_C(125), UINT8_C( 81), UINT8_C(168), UINT8_C(155), UINT8_C(140), UINT8_C( 75), UINT8_C(134), UINT8_C(222), UINT8_C(106), UINT8_C(250) }, { UINT8_C(228), UINT8_C(204), UINT8_C(205), UINT8_C(137), UINT8_C( 73), UINT8_C(154), UINT8_C(166), UINT8_C(121), UINT8_C( 77), UINT8_C( 88), UINT8_C( 83), UINT8_C(252), UINT8_C( 99), UINT8_C(118), UINT8_C( 22), UINT8_C(159) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(247), UINT8_C(134), UINT8_C(198), UINT8_C( 44), UINT8_C(185), UINT8_C( 67), UINT8_C(196), UINT8_C(207), UINT8_C( 56), UINT8_C(211), UINT8_C( 49), UINT8_C(119), UINT8_C(178), UINT8_C( 73), UINT8_C( 94), UINT8_C(127) }, { UINT8_C( 42), UINT8_C(110), UINT8_C(132), UINT8_C(142), UINT8_C(188), UINT8_C(193), UINT8_C(182), UINT8_C(188), UINT8_C( 46), UINT8_C(188), UINT8_C( 19), UINT8_C(246), UINT8_C(199), UINT8_C( 89), UINT8_C( 64), UINT8_C(163) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(100), UINT8_C(213), UINT8_C( 22), UINT8_C(135), UINT8_C(200), UINT8_C(180), UINT8_C(151), UINT8_C(232), UINT8_C( 52), UINT8_C( 5), UINT8_C( 39), UINT8_C(240), UINT8_C( 55), UINT8_C(231), UINT8_C(249), UINT8_C( 83) }, { UINT8_C(200), UINT8_C( 76), UINT8_C(199), UINT8_C( 50), UINT8_C(146), UINT8_C(253), UINT8_C(198), UINT8_C( 62), UINT8_C(143), UINT8_C(223), UINT8_C(217), UINT8_C( 32), UINT8_C(216), UINT8_C(116), UINT8_C( 4), UINT8_C( 25) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(188), UINT8_C(164), UINT8_C( 36), UINT8_C(229), UINT8_C( 15), UINT8_C(190), UINT8_C(222), UINT8_C(230), UINT8_C(196), UINT8_C(186), UINT8_C(214), UINT8_C(126), UINT8_C(191), UINT8_C(193), UINT8_C( 33), UINT8_C(189) }, { UINT8_C(228), UINT8_C(239), UINT8_C(252), UINT8_C( 60), UINT8_C(116), UINT8_C( 87), UINT8_C(190), UINT8_C( 72), UINT8_C(153), UINT8_C( 71), UINT8_C( 11), UINT8_C( 67), UINT8_C( 63), UINT8_C(206), UINT8_C( 75), UINT8_C( 72) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_comeq_epu8(a, b); simde_test_x86_assert_equal_u8x16(r, simde_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u8x16(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u8x16(); simde__m128i r = simde_mm_comeq_epu8(a, b); simde_test_x86_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comeq_epu16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint16_t a[8]; const uint16_t b[8]; const uint16_t r[8]; } test_vec[] = { { { UINT16_C(31782), UINT16_C( 3674), UINT16_C(26933), UINT16_C( 3969), UINT16_C(30738), UINT16_C(59814), UINT16_C(56815), UINT16_C( 1215) }, { UINT16_C(31782), UINT16_C( 3674), UINT16_C(26933), UINT16_C( 3969), UINT16_C(30738), UINT16_C(59814), UINT16_C(56815), UINT16_C( 1215) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(56743), UINT16_C(30093), UINT16_C(54537), UINT16_C( 8838), UINT16_C(32371), UINT16_C(10003), UINT16_C(28580), UINT16_C(31227) }, { UINT16_C(58571), UINT16_C(51487), UINT16_C(48611), UINT16_C(44137), UINT16_C(51779), UINT16_C(63743), UINT16_C(17777), UINT16_C(29116) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(41016), UINT16_C(42464), UINT16_C(33530), UINT16_C( 5126), UINT16_C( 3143), UINT16_C(37975), UINT16_C(24749), UINT16_C(62718) }, { UINT16_C(25810), UINT16_C(57563), UINT16_C(50113), UINT16_C( 1148), UINT16_C(13698), UINT16_C(30025), UINT16_C( 493), UINT16_C(19710) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(49343), UINT16_C(37983), UINT16_C(25582), UINT16_C(26151), UINT16_C(30522), UINT16_C(21057), UINT16_C(43013), UINT16_C( 314) }, { UINT16_C(33892), UINT16_C( 6647), UINT16_C(33398), UINT16_C(24995), UINT16_C(17397), UINT16_C( 9964), UINT16_C( 5898), UINT16_C(56687) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(50022), UINT16_C( 2416), UINT16_C(33422), UINT16_C(24527), UINT16_C(18806), UINT16_C(10295), UINT16_C(20306), UINT16_C(59545) }, { UINT16_C(52649), UINT16_C(15833), UINT16_C( 683), UINT16_C( 2503), UINT16_C(31684), UINT16_C(54096), UINT16_C(37233), UINT16_C(27638) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(13139), UINT16_C(52605), UINT16_C(59010), UINT16_C(18663), UINT16_C( 2339), UINT16_C(57507), UINT16_C(24125), UINT16_C(61442) }, { UINT16_C(50888), UINT16_C( 5354), UINT16_C(19463), UINT16_C(17870), UINT16_C(25880), UINT16_C(18141), UINT16_C(30410), UINT16_C(16506) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(38830), UINT16_C(43501), UINT16_C(38769), UINT16_C(26968), UINT16_C(16488), UINT16_C(16620), UINT16_C(56429), UINT16_C(25180) }, { UINT16_C(63722), UINT16_C(26002), UINT16_C(26419), UINT16_C(23970), UINT16_C(35353), UINT16_C(18172), UINT16_C(53180), UINT16_C(41701) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(30880), UINT16_C(25128), UINT16_C(40451), UINT16_C( 2568), UINT16_C(30062), UINT16_C( 4474), UINT16_C(53643), UINT16_C(34449) }, { UINT16_C(59702), UINT16_C(64058), UINT16_C(23510), UINT16_C(39209), UINT16_C(29166), UINT16_C(39443), UINT16_C(41967), UINT16_C(55838) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_comeq_epu16(a, b); simde_test_x86_assert_equal_u16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u16x8(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u16x8(); simde__m128i r = simde_mm_comeq_epu16(a, b); simde_test_x86_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comeq_epu32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint32_t a[4]; const uint32_t b[4]; const uint32_t r[4]; } test_vec[] = { { { UINT32_C(3281411409), UINT32_C(1944126177), UINT32_C( 448082013), UINT32_C(2760394302) }, { UINT32_C(3281411409), UINT32_C(1944126177), UINT32_C( 448082013), UINT32_C(2760394302) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { UINT32_C(2571641813), UINT32_C(1095578009), UINT32_C( 193766336), UINT32_C( 789445132) }, { UINT32_C(2476723944), UINT32_C(3972713395), UINT32_C( 621148764), UINT32_C( 69812527) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C( 187051758), UINT32_C(2633297956), UINT32_C(1640867510), UINT32_C(3952795067) }, { UINT32_C(2578142093), UINT32_C(3165258016), UINT32_C(4174535059), UINT32_C(3999059205) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C( 389714090), UINT32_C(4077428766), UINT32_C(1681917177), UINT32_C(1420645027) }, { UINT32_C(2653058410), UINT32_C( 745254353), UINT32_C(1568986923), UINT32_C(2836000617) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(2280575793), UINT32_C(2389723696), UINT32_C(3679143088), UINT32_C(3022939245) }, { UINT32_C(1785510820), UINT32_C(2230638446), UINT32_C( 428080204), UINT32_C(2115547395) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C( 581533354), UINT32_C(3038049537), UINT32_C(2421747460), UINT32_C(1359288512) }, { UINT32_C(3320950374), UINT32_C( 204309918), UINT32_C(4147597085), UINT32_C(3044088442) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(2949782077), UINT32_C(2967251258), UINT32_C(1238273053), UINT32_C(1947891781) }, { UINT32_C(1998181590), UINT32_C( 201244937), UINT32_C(3188283591), UINT32_C(2533175927) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(4157812244), UINT32_C(3333451394), UINT32_C(3474301730), UINT32_C(1683327908) }, { UINT32_C(1196275740), UINT32_C(3410989656), UINT32_C( 895459185), UINT32_C(1755447969) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_comeq_epu32(a, b); simde_test_x86_assert_equal_u32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u32x4(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u32x4(); simde__m128i r = simde_mm_comeq_epu32(a, b); simde_test_x86_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comeq_epu64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint64_t a[2]; const uint64_t b[2]; const uint64_t r[2]; } test_vec[] = { { { UINT64_C( 4717605833667723860), UINT64_C( 7755015510761520443) }, { UINT64_C( 4717605833667723860), UINT64_C( 7755015510761520443) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C(10522661852200296289), UINT64_C( 8451446297061403972) }, { UINT64_C( 7429899117640998696), UINT64_C(14978668403143041426) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C(12182540738121552845), UINT64_C( 278071019023567208) }, { UINT64_C( 9332715478955625654), UINT64_C(15707509216151344972) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C( 6364383605199281031), UINT64_C( 6574707631012723460) }, { UINT64_C(15463621001561887014), UINT64_C(15122938829150578580) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C(16623996341821456312), UINT64_C(14221252935795067969) }, { UINT64_C(12700545457662277281), UINT64_C(18406638509938956944) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C(11321345988371400583), UINT64_C( 9807686704852845382) }, { UINT64_C( 6153389593585271887), UINT64_C(12292702122824497000) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C(14084578606058126365), UINT64_C(17178323387740530748) }, { UINT64_C( 1075896664901780055), UINT64_C( 2035148905074804293) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C(11648630285130147489), UINT64_C( 4404905146264912970) }, { UINT64_C( 2576844816584833249), UINT64_C(11189786944117821519) }, { UINT64_C( 0), UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_comeq_epu64(a, b); simde_test_x86_assert_equal_u64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u64x2(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u64x2(); simde__m128i r = simde_mm_comeq_epu64(a, b); simde_test_x86_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comneq_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t a[16]; const int8_t b[16]; const int8_t r[16]; } test_vec[] = { { { INT8_C( 60), INT8_C( 11), INT8_C( 54), -INT8_C( 32), -INT8_C( 110), -INT8_C( 37), -INT8_C( 102), INT8_MAX, -INT8_C( 103), INT8_C( 93), -INT8_C( 120), -INT8_C( 114), INT8_C( 27), INT8_C( 7), INT8_C( 55), -INT8_C( 66) }, { INT8_C( 60), INT8_C( 11), INT8_C( 54), -INT8_C( 32), -INT8_C( 110), -INT8_C( 37), -INT8_C( 102), INT8_MAX, -INT8_C( 103), INT8_C( 93), -INT8_C( 120), -INT8_C( 114), INT8_C( 27), INT8_C( 7), INT8_C( 55), -INT8_C( 66) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 23), -INT8_C( 81), -INT8_C( 80), INT8_C( 62), INT8_C( 17), -INT8_C( 109), -INT8_C( 53), -INT8_C( 36), -INT8_C( 83), -INT8_C( 100), INT8_C( 90), INT8_C( 110), INT8_C( 124), -INT8_C( 28), -INT8_C( 92), INT8_C( 112) }, { INT8_C( 22), INT8_C( 117), INT8_C( 21), -INT8_C( 18), INT8_C( 105), INT8_C( 84), INT8_C( 51), -INT8_C( 91), INT8_C( 80), -INT8_C( 66), -INT8_C( 44), INT8_C( 101), INT8_C( 104), -INT8_C( 79), -INT8_C( 114), -INT8_C( 71) }, { -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1) } }, { { INT8_C( 31), -INT8_C( 82), INT8_C( 112), -INT8_C( 54), INT8_C( 13), -INT8_C( 66), -INT8_C( 17), -INT8_C( 127), -INT8_C( 120), -INT8_C( 77), INT8_C( 35), -INT8_C( 20), INT8_C( 114), -INT8_C( 80), INT8_C( 52), -INT8_C( 29) }, { INT8_C( 86), -INT8_C( 21), INT8_C( 15), -INT8_C( 21), -INT8_C( 77), -INT8_C( 78), INT8_C( 93), INT8_C( 90), -INT8_C( 104), -INT8_C( 84), -INT8_C( 75), -INT8_C( 68), INT8_C( 113), INT8_C( 98), INT8_C( 16), INT8_C( 118) }, { -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1) } }, { { INT8_C( 33), -INT8_C( 3), INT8_C( 126), -INT8_C( 87), INT8_C( 81), INT8_C( 81), INT8_C( 24), INT8_C( 89), INT8_C( 7), INT8_C( 27), INT8_C( 54), -INT8_C( 49), INT8_C( 122), -INT8_C( 121), -INT8_C( 30), INT8_C( 59) }, { INT8_C( 37), -INT8_C( 10), -INT8_C( 116), -INT8_C( 98), INT8_C( 26), -INT8_C( 3), -INT8_C( 1), -INT8_C( 25), -INT8_C( 103), -INT8_C( 79), -INT8_C( 109), INT8_C( 92), -INT8_C( 31), INT8_C( 34), -INT8_C( 92), INT8_C( 60) }, { -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1) } }, { { INT8_C( 70), INT8_C( 37), INT8_C( 68), -INT8_C( 93), -INT8_C( 124), INT8_C( 87), INT8_C( 45), -INT8_C( 83), INT8_C( 83), INT8_C( 94), -INT8_C( 7), -INT8_C( 34), INT8_C( 59), INT8_C( 114), -INT8_C( 106), -INT8_C( 62) }, { -INT8_C( 86), INT8_C( 29), -INT8_C( 12), -INT8_C( 48), INT8_C( 68), INT8_C( 62), INT8_C( 0), -INT8_C( 108), INT8_C( 122), INT8_C( 84), -INT8_C( 42), INT8_C( 14), INT8_C( 94), -INT8_C( 7), INT8_C( 50), INT8_C( 85) }, { -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1) } }, { { -INT8_C( 75), -INT8_C( 83), INT8_C( 40), INT8_MAX, INT8_C( 78), -INT8_C( 43), INT8_C( 22), -INT8_C( 59), -INT8_C( 109), INT8_C( 3), -INT8_C( 42), -INT8_C( 29), INT8_C( 94), INT8_C( 120), INT8_C( 56), -INT8_C( 66) }, { INT8_C( 14), -INT8_C( 24), -INT8_C( 81), INT8_C( 73), -INT8_C( 39), INT8_C( 125), INT8_C( 74), -INT8_C( 87), INT8_C( 100), INT8_C( 30), -INT8_C( 26), -INT8_C( 104), -INT8_C( 112), -INT8_C( 16), -INT8_C( 93), INT8_C( 7) }, { -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1) } }, { { -INT8_C( 105), INT8_C( 29), -INT8_C( 108), INT8_C( 7), INT8_C( 89), -INT8_C( 43), -INT8_C( 69), -INT8_C( 23), -INT8_C( 16), -INT8_C( 109), INT8_C( 52), -INT8_C( 92), -INT8_C( 118), -INT8_C( 94), -INT8_C( 80), INT8_C( 120) }, { INT8_C( 120), -INT8_C( 33), INT8_C( 37), -INT8_C( 47), -INT8_C( 126), -INT8_C( 64), -INT8_C( 60), INT8_C( 110), INT8_C( 126), -INT8_C( 107), INT8_C( 44), -INT8_C( 62), INT8_C( 33), INT8_C( 14), -INT8_C( 35), -INT8_C( 101) }, { -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1) } }, { { INT8_C( 20), -INT8_C( 2), -INT8_C( 17), INT8_C( 1), INT8_C( 74), INT8_C( 94), INT8_C( 4), INT8_C( 97), -INT8_C( 110), -INT8_C( 107), INT8_C( 123), -INT8_C( 21), INT8_C( 104), -INT8_C( 9), -INT8_C( 26), INT8_C( 56) }, { INT8_C( 17), -INT8_C( 117), -INT8_C( 66), INT8_C( 48), -INT8_C( 25), INT8_C( 17), INT8_C( 86), INT8_C( 43), -INT8_C( 15), INT8_C( 67), INT8_C( 14), INT8_C( 86), -INT8_C( 74), INT8_C( 92), -INT8_C( 55), INT8_C( 89) }, { -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_comneq_epi8(a, b); simde_test_x86_assert_equal_i8x16(r, simde_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i8x16(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i8x16(); simde__m128i r = simde_mm_comneq_epi8(a, b); simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comneq_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[8]; const int16_t b[8]; const int16_t r[8]; } test_vec[] = { { { -INT16_C( 10413), INT16_C( 13985), INT16_C( 31180), INT16_C( 29913), -INT16_C( 28256), INT16_C( 32274), INT16_C( 32672), INT16_C( 18114) }, { -INT16_C( 10413), INT16_C( 13985), INT16_C( 31180), INT16_C( 29913), -INT16_C( 28256), INT16_C( 32274), INT16_C( 32672), INT16_C( 18114) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 29440), INT16_C( 11747), INT16_C( 30639), INT16_C( 10216), -INT16_C( 20252), INT16_C( 6901), -INT16_C( 7942), -INT16_C( 30641) }, { INT16_C( 12413), INT16_C( 28178), INT16_C( 11653), INT16_C( 27682), INT16_C( 4162), INT16_C( 9571), INT16_C( 17113), -INT16_C( 5843) }, { -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1) } }, { { INT16_C( 8046), -INT16_C( 27910), -INT16_C( 1150), -INT16_C( 21662), INT16_C( 25216), -INT16_C( 10166), -INT16_C( 23406), INT16_C( 29016) }, { -INT16_C( 28231), INT16_C( 29098), -INT16_C( 32227), -INT16_C( 28729), -INT16_C( 26206), -INT16_C( 21803), INT16_C( 18875), INT16_C( 27149) }, { -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1) } }, { { INT16_C( 6275), INT16_C( 9581), -INT16_C( 23798), -INT16_C( 84), -INT16_C( 7188), INT16_C( 22131), -INT16_C( 20439), INT16_C( 24010) }, { -INT16_C( 31952), INT16_C( 1745), -INT16_C( 32707), INT16_C( 9904), -INT16_C( 19227), -INT16_C( 11056), -INT16_C( 25616), INT16_C( 4428) }, { -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1) } }, { { -INT16_C( 6810), -INT16_C( 20574), INT16_C( 31212), INT16_C( 27567), -INT16_C( 17583), INT16_C( 24026), INT16_C( 2661), -INT16_C( 28017) }, { -INT16_C( 29430), -INT16_C( 2065), INT16_C( 12429), INT16_C( 30917), -INT16_C( 5834), INT16_C( 27324), INT16_C( 16413), INT16_C( 9936) }, { -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1) } }, { { INT16_C( 3648), -INT16_C( 2301), -INT16_C( 31277), INT16_C( 14161), INT16_C( 28887), -INT16_C( 19227), -INT16_C( 17170), INT16_C( 22925) }, { INT16_C( 14446), INT16_C( 2668), -INT16_C( 26188), -INT16_C( 12305), -INT16_C( 15941), INT16_C( 13570), INT16_C( 16877), -INT16_C( 3709) }, { -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1) } }, { { INT16_C( 7224), -INT16_C( 14346), -INT16_C( 12699), -INT16_C( 21893), -INT16_C( 29531), INT16_C( 9726), -INT16_C( 12691), -INT16_C( 1618) }, { INT16_C( 2948), INT16_C( 2224), -INT16_C( 15525), INT16_C( 28948), -INT16_C( 15460), -INT16_C( 760), -INT16_C( 23291), -INT16_C( 17845) }, { -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1) } }, { { -INT16_C( 27017), -INT16_C( 6683), INT16_C( 23626), INT16_C( 3350), -INT16_C( 26652), INT16_C( 30348), INT16_C( 18058), -INT16_C( 17192) }, { -INT16_C( 29067), -INT16_C( 18397), -INT16_C( 18647), -INT16_C( 22755), INT16_C( 30722), -INT16_C( 30409), INT16_C( 29712), -INT16_C( 14318) }, { -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_comneq_epi16(a, b); simde_test_x86_assert_equal_i16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i16x8(); simde__m128i r = simde_mm_comneq_epi16(a, b); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comneq_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int32_t a[4]; const int32_t b[4]; const int32_t r[4]; } test_vec[] = { { { INT32_C( 439813157), -INT32_C( 1475856597), INT32_C( 1895241659), -INT32_C( 386584852) }, { INT32_C( 439813157), -INT32_C( 1475856597), INT32_C( 1895241659), -INT32_C( 386584852) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { { -INT32_C( 466794135), -INT32_C( 1175225145), -INT32_C( 1544067563), INT32_C( 1690285748) }, { INT32_C( 777317993), -INT32_C( 1019516752), -INT32_C( 609839533), -INT32_C( 957575876) }, { -INT32_C( 1), -INT32_C( 1), -INT32_C( 1), -INT32_C( 1) } }, { { INT32_C( 1379386759), -INT32_C( 277079332), INT32_C( 296818684), -INT32_C( 700812135) }, { -INT32_C( 375071381), -INT32_C( 1500121472), INT32_C( 2130942932), -INT32_C( 1633326047) }, { -INT32_C( 1), -INT32_C( 1), -INT32_C( 1), -INT32_C( 1) } }, { { -INT32_C( 878212616), -INT32_C( 1851614703), -INT32_C( 1664412704), INT32_C( 1718335080) }, { INT32_C( 328161620), -INT32_C( 1727993532), INT32_C( 594915173), -INT32_C( 1207381436) }, { -INT32_C( 1), -INT32_C( 1), -INT32_C( 1), -INT32_C( 1) } }, { { INT32_C( 401022436), -INT32_C( 431226098), INT32_C( 212852968), INT32_C( 1547338441) }, { INT32_C( 1937631308), -INT32_C( 480032349), -INT32_C( 1855694546), INT32_C( 1543358284) }, { -INT32_C( 1), -INT32_C( 1), -INT32_C( 1), -INT32_C( 1) } }, { { -INT32_C( 27456910), INT32_C( 929193338), INT32_C( 700647485), -INT32_C( 7347613) }, { -INT32_C( 757721990), -INT32_C( 844816570), -INT32_C( 1892146856), -INT32_C( 798206750) }, { -INT32_C( 1), -INT32_C( 1), -INT32_C( 1), -INT32_C( 1) } }, { { INT32_C( 1215551434), -INT32_C( 859065859), -INT32_C( 1150217466), -INT32_C( 1739399713) }, { -INT32_C( 134061049), -INT32_C( 1615826220), -INT32_C( 430272757), INT32_C( 1614643118) }, { -INT32_C( 1), -INT32_C( 1), -INT32_C( 1), -INT32_C( 1) } }, { { -INT32_C( 1131410411), -INT32_C( 311359936), -INT32_C( 1994356884), INT32_C( 1869315044) }, { -INT32_C( 1385603046), -INT32_C( 1586803722), INT32_C( 1580215406), INT32_C( 1347965527) }, { -INT32_C( 1), -INT32_C( 1), -INT32_C( 1), -INT32_C( 1) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_comneq_epi32(a, b); simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i32x4(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i32x4(); simde__m128i r = simde_mm_comneq_epi32(a, b); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comneq_epi64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int64_t a[2]; const int64_t b[2]; const int64_t r[2]; } test_vec[] = { { { -INT64_C( 2145009836426055045), -INT64_C( 3692172023188427113) }, { -INT64_C( 2145009836426055045), -INT64_C( 3692172023188427113) }, { INT64_C( 0), INT64_C( 0) } }, { { INT64_C( 1927172668007879132), -INT64_C( 1466531328946896982) }, { -INT64_C( 1003788530826162141), -INT64_C( 630571924344221778) }, { -INT64_C( 1), -INT64_C( 1) } }, { { INT64_C( 6093841530576535157), INT64_C( 8650587926085505254) }, { INT64_C( 7483676463588208182), INT64_C( 4020443149158757337) }, { -INT64_C( 1), -INT64_C( 1) } }, { { -INT64_C( 7652666475017031668), INT64_C( 4500793761105799243) }, { -INT64_C( 8682651005132106533), -INT64_C( 3004755848539594430) }, { -INT64_C( 1), -INT64_C( 1) } }, { { INT64_C( 2763101731854593481), -INT64_C( 8733947786854461951) }, { -INT64_C( 7211148372848030917), INT64_C( 1921978243298205966) }, { -INT64_C( 1), -INT64_C( 1) } }, { { INT64_C( 5700269870367375317), -INT64_C( 7426619846471480785) }, { -INT64_C( 1584459702047040899), INT64_C( 5535302230343596904) }, { -INT64_C( 1), -INT64_C( 1) } }, { { INT64_C( 6340521394491436376), -INT64_C( 4841440811628337666) }, { -INT64_C( 4707356538522662197), -INT64_C( 5501906473491040138) }, { -INT64_C( 1), -INT64_C( 1) } }, { { -INT64_C( 8581209791696617351), INT64_C( 4201864310067635861) }, { INT64_C( 6760919033000960586), -INT64_C( 7417949586054127776) }, { -INT64_C( 1), -INT64_C( 1) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_comneq_epi64(a, b); simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i64x2(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i64x2(); simde__m128i r = simde_mm_comneq_epi64(a, b); simde_test_x86_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comneq_epu8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint8_t a[16]; const uint8_t b[16]; const uint8_t r[16]; } test_vec[] = { { { UINT8_C( 97), UINT8_C( 63), UINT8_C(177), UINT8_C( 41), UINT8_C(135), UINT8_C(142), UINT8_C(197), UINT8_C( 40), UINT8_C( 29), UINT8_C( 85), UINT8_C( 24), UINT8_C(121), UINT8_C(136), UINT8_C( 51), UINT8_C( 92), UINT8_C( 89) }, { UINT8_C( 97), UINT8_C( 63), UINT8_C(177), UINT8_C( 41), UINT8_C(135), UINT8_C(142), UINT8_C(197), UINT8_C( 40), UINT8_C( 29), UINT8_C( 85), UINT8_C( 24), UINT8_C(121), UINT8_C(136), UINT8_C( 51), UINT8_C( 92), UINT8_C( 89) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 36), UINT8_C(234), UINT8_C(216), UINT8_C( 67), UINT8_C(146), UINT8_C(101), UINT8_C( 93), UINT8_C( 16), UINT8_C( 79), UINT8_C(249), UINT8_C(124), UINT8_C( 99), UINT8_C(139), UINT8_C(235), UINT8_C(246), UINT8_C( 68) }, { UINT8_C( 56), UINT8_C( 50), UINT8_C(116), UINT8_C( 11), UINT8_C( 97), UINT8_C(127), UINT8_C(120), UINT8_C(128), UINT8_C(189), UINT8_C(123), UINT8_C(148), UINT8_C( 64), UINT8_C(132), UINT8_C( 79), UINT8_C(218), UINT8_C( 99) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C(129), UINT8_C(106), UINT8_C(147), UINT8_C( 91), UINT8_C(105), UINT8_C(126), UINT8_C( 52), UINT8_C( 31), UINT8_C(107), UINT8_C(204), UINT8_C(141), UINT8_C(138), UINT8_C( 8), UINT8_C(162), UINT8_C( 68), UINT8_C(253) }, { UINT8_C( 37), UINT8_C( 34), UINT8_C(130), UINT8_C( 76), UINT8_C( 94), UINT8_C( 68), UINT8_C(236), UINT8_C(216), UINT8_C(158), UINT8_C( 31), UINT8_C(213), UINT8_C(248), UINT8_C(236), UINT8_C(100), UINT8_C(177), UINT8_C(158) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C(136), UINT8_C( 43), UINT8_C(205), UINT8_C( 55), UINT8_C( 55), UINT8_C(242), UINT8_C( 62), UINT8_C(210), UINT8_C(219), UINT8_C(226), UINT8_C( 24), UINT8_C(133), UINT8_C( 70), UINT8_C( 85), UINT8_C(224), UINT8_C( 12) }, { UINT8_C( 80), UINT8_C(150), UINT8_C( 67), UINT8_C(180), UINT8_C( 39), UINT8_C(231), UINT8_C( 7), UINT8_C(120), UINT8_C(231), UINT8_C(201), UINT8_C( 67), UINT8_C(105), UINT8_C(107), UINT8_C(119), UINT8_C(203), UINT8_C( 83) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C( 98), UINT8_C(180), UINT8_C(239), UINT8_C(157), UINT8_C(165), UINT8_C(198), UINT8_C(101), UINT8_C(113), UINT8_C(200), UINT8_C(195), UINT8_C(131), UINT8_C( 29), UINT8_C(238), UINT8_C( 11), UINT8_C(177), UINT8_C(185) }, { UINT8_C(226), UINT8_C( 22), UINT8_C( 31), UINT8_C( 10), UINT8_C(100), UINT8_C(112), UINT8_C(179), UINT8_C(167), UINT8_C(193), UINT8_C( 1), UINT8_C( 70), UINT8_C( 90), UINT8_C(167), UINT8_C(146), UINT8_C( 15), UINT8_C(201) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C( 54), UINT8_C(141), UINT8_C( 95), UINT8_C( 86), UINT8_C( 90), UINT8_C( 4), UINT8_C(143), UINT8_C( 67), UINT8_C( 89), UINT8_C(245), UINT8_C( 55), UINT8_C( 26), UINT8_C(168), UINT8_C(205), UINT8_C(161), UINT8_C( 76) }, { UINT8_C( 3), UINT8_C( 42), UINT8_C(126), UINT8_C( 24), UINT8_C(188), UINT8_C(229), UINT8_C(215), UINT8_C(174), UINT8_C( 84), UINT8_C( 80), UINT8_C( 70), UINT8_C(149), UINT8_C( 72), UINT8_C(188), UINT8_C(101), UINT8_C( 74) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C( 45), UINT8_C( 63), UINT8_C(135), UINT8_C( 42), UINT8_MAX, UINT8_C(180), UINT8_C(166), UINT8_C(145), UINT8_C(183), UINT8_C( 2), UINT8_C(157), UINT8_C( 68), UINT8_C( 27), UINT8_C(162), UINT8_C(151), UINT8_C( 12) }, { UINT8_C(218), UINT8_C( 91), UINT8_C(200), UINT8_C(164), UINT8_C(216), UINT8_C( 80), UINT8_C( 91), UINT8_C(213), UINT8_C(199), UINT8_C( 61), UINT8_C(171), UINT8_C(224), UINT8_C(247), UINT8_C(254), UINT8_C(181), UINT8_C( 28) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C(111), UINT8_C( 81), UINT8_C(206), UINT8_C(224), UINT8_C( 60), UINT8_C(220), UINT8_C(145), UINT8_C(163), UINT8_C( 9), UINT8_C(114), UINT8_C( 28), UINT8_C( 98), UINT8_C(240), UINT8_C(146), UINT8_C(124), UINT8_C( 67) }, { UINT8_C(144), UINT8_C( 48), UINT8_C(101), UINT8_C(119), UINT8_C( 96), UINT8_C(184), UINT8_C( 38), UINT8_C(100), UINT8_C( 66), UINT8_C( 81), UINT8_C(221), UINT8_C( 4), UINT8_C( 92), UINT8_C( 95), UINT8_C(231), UINT8_C(136) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_comneq_epu8(a, b); simde_test_x86_assert_equal_u8x16(r, simde_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u8x16(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u8x16(); simde__m128i r = simde_mm_comneq_epu8(a, b); simde_test_x86_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comneq_epu16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint16_t a[8]; const uint16_t b[8]; const uint16_t r[8]; } test_vec[] = { { { UINT16_C(19235), UINT16_C(17051), UINT16_C(34488), UINT16_C(49464), UINT16_C(52088), UINT16_C(15899), UINT16_C(42447), UINT16_C(14134) }, { UINT16_C(19235), UINT16_C(17051), UINT16_C(34488), UINT16_C(49464), UINT16_C(52088), UINT16_C(15899), UINT16_C(42447), UINT16_C(14134) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(12876), UINT16_C(22973), UINT16_C( 9979), UINT16_C(41762), UINT16_C( 4847), UINT16_C(51524), UINT16_C(59166), UINT16_C(55012) }, { UINT16_C(46706), UINT16_C( 5720), UINT16_C(47388), UINT16_C(13187), UINT16_C(38700), UINT16_C(40452), UINT16_C(58462), UINT16_C(12462) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(59191), UINT16_C( 4664), UINT16_C(41297), UINT16_C(55862), UINT16_C( 2804), UINT16_C(63048), UINT16_C(40677), UINT16_C(20114) }, { UINT16_C( 6275), UINT16_C( 9580), UINT16_C(32271), UINT16_C(16474), UINT16_C(23884), UINT16_C(19262), UINT16_C(22088), UINT16_C(30924) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(55673), UINT16_C(26943), UINT16_C(12555), UINT16_C(20554), UINT16_C(49529), UINT16_C(21842), UINT16_C(36187), UINT16_C(14554) }, { UINT16_C(64639), UINT16_C(14143), UINT16_C(56378), UINT16_C(12965), UINT16_C(42496), UINT16_C( 3377), UINT16_C( 819), UINT16_C(22137) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(36921), UINT16_C(10040), UINT16_C(57041), UINT16_C(20039), UINT16_C(48807), UINT16_C(44232), UINT16_C(47910), UINT16_C(56228) }, { UINT16_C(59277), UINT16_C( 5177), UINT16_C(56133), UINT16_C(20300), UINT16_C(63858), UINT16_C(43845), UINT16_C(62664), UINT16_C( 4507) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(37791), UINT16_C( 5261), UINT16_C(45386), UINT16_C( 7187), UINT16_C(35237), UINT16_C(50196), UINT16_C(12526), UINT16_C(32985) }, { UINT16_C(25558), UINT16_C(33730), UINT16_C(33751), UINT16_C(57399), UINT16_C(57031), UINT16_C(61410), UINT16_C(12462), UINT16_C(61979) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(27349), UINT16_C(63652), UINT16_C(45343), UINT16_C( 663), UINT16_C(43676), UINT16_C(26012), UINT16_C(62812), UINT16_C(28511) }, { UINT16_C(63361), UINT16_C(19778), UINT16_C(56728), UINT16_C(11343), UINT16_C(56618), UINT16_C(40816), UINT16_C(49037), UINT16_C(16866) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(40192), UINT16_C(39911), UINT16_C(59384), UINT16_C(18619), UINT16_C(43444), UINT16_C(22218), UINT16_C( 4375), UINT16_C(61472) }, { UINT16_C(11447), UINT16_C(14881), UINT16_C(61490), UINT16_C(32122), UINT16_C(32705), UINT16_C(33879), UINT16_C(43535), UINT16_C(18135) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_comneq_epu16(a, b); simde_test_x86_assert_equal_u16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u16x8(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u16x8(); simde__m128i r = simde_mm_comneq_epu16(a, b); simde_test_x86_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comneq_epu32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint32_t a[4]; const uint32_t b[4]; const uint32_t r[4]; } test_vec[] = { { { UINT32_C(3317478475), UINT32_C( 896031099), UINT32_C(1627721492), UINT32_C(1258523848) }, { UINT32_C(3317478475), UINT32_C( 896031099), UINT32_C(1627721492), UINT32_C(1258523848) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C( 315066528), UINT32_C( 430031946), UINT32_C(1728007092), UINT32_C(1222768858) }, { UINT32_C(1032665308), UINT32_C( 310645075), UINT32_C(1286890725), UINT32_C(3354357527) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { UINT32_C(2644284771), UINT32_C(1236099723), UINT32_C( 231852588), UINT32_C(2448077206) }, { UINT32_C(3435246043), UINT32_C( 670570278), UINT32_C(3762368078), UINT32_C(2916008363) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { UINT32_C(2735332137), UINT32_C(1434767770), UINT32_C(1077117521), UINT32_C(1742331628) }, { UINT32_C( 985703793), UINT32_C(3165483421), UINT32_C(3843291000), UINT32_C(1166510637) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { UINT32_C(3950305561), UINT32_C(2232424738), UINT32_C(3381591370), UINT32_C( 328631939) }, { UINT32_C(1341300422), UINT32_C( 428484959), UINT32_C( 596788875), UINT32_C(3607286340) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { UINT32_C(1044787036), UINT32_C( 557269961), UINT32_C(1850458944), UINT32_C(3691557125) }, { UINT32_C(3535741697), UINT32_C(2255898389), UINT32_C(1646699694), UINT32_C(2871501465) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { UINT32_C(1692945689), UINT32_C(1893946935), UINT32_C(4174404186), UINT32_C( 202991383) }, { UINT32_C(2375007562), UINT32_C(1247531366), UINT32_C(1765251081), UINT32_C( 165544404) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { UINT32_C( 633390201), UINT32_C(3153811989), UINT32_C( 780618945), UINT32_C(3937448291) }, { UINT32_C(1187705547), UINT32_C(2921386491), UINT32_C( 19770053), UINT32_C( 940570780) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_comneq_epu32(a, b); simde_test_x86_assert_equal_u32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u32x4(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u32x4(); simde__m128i r = simde_mm_comneq_epu32(a, b); simde_test_x86_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comneq_epu64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint64_t a[2]; const uint64_t b[2]; const uint64_t r[2]; } test_vec[] = { { { UINT64_C( 5361350686904162980), UINT64_C(13669997797138899356) }, { UINT64_C( 5361350686904162980), UINT64_C(13669997797138899356) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C(18063179973976935849), UINT64_C( 9341185368968421898) }, { UINT64_C( 7282134507660499649), UINT64_C(14848683848067669780) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C( 8568750919223694095), UINT64_C( 3206505158861670911) }, { UINT64_C( 6114586325512653304), UINT64_C( 7325759221035798609) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C( 7620057443402157603), UINT64_C( 9335601207311263437) }, { UINT64_C( 6823929844010168691), UINT64_C(14513196095610043258) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C( 2095306404218111245), UINT64_C(14391353092876059035) }, { UINT64_C(14807743135612787031), UINT64_C( 4915358179487967671) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C(15623489378715681014), UINT64_C( 5252251355818517394) }, { UINT64_C(16656562333259696847), UINT64_C( 2088054659659145522) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C(16419614906668926982), UINT64_C( 5402323566441812954) }, { UINT64_C(17695625336589536000), UINT64_C(11140625531488398096) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C( 9665645717917344868), UINT64_C( 1648566707567335834) }, { UINT64_C( 4515750840144995349), UINT64_C( 433321513721155452) }, { UINT64_MAX, UINT64_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_comneq_epu64(a, b); simde_test_x86_assert_equal_u64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u64x2(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u64x2(); simde__m128i r = simde_mm_comneq_epu64(a, b); simde_test_x86_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comge_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t a[16]; const int8_t b[16]; const int8_t r[16]; } test_vec[] = { { { INT8_C( 57), INT8_C( 92), INT8_C( 113), -INT8_C( 91), -INT8_C( 30), INT8_C( 16), -INT8_C( 120), INT8_C( 8), -INT8_C( 5), INT8_C( 62), -INT8_C( 44), -INT8_C( 32), -INT8_C( 124), -INT8_C( 77), -INT8_C( 127), -INT8_C( 12) }, { INT8_C( 57), INT8_C( 92), INT8_C( 113), -INT8_C( 91), -INT8_C( 30), INT8_C( 16), -INT8_C( 120), INT8_C( 8), -INT8_C( 5), INT8_C( 62), -INT8_C( 44), -INT8_C( 32), -INT8_C( 124), -INT8_C( 77), -INT8_C( 127), -INT8_C( 12) }, { -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1) } }, { { INT8_C( 52), INT8_C( 29), INT8_C( 19), INT8_C( 11), -INT8_C( 56), INT8_C( 19), INT8_C( 88), INT8_C( 12), -INT8_C( 100), INT8_C( 86), -INT8_C( 102), INT8_C( 100), INT8_C( 120), INT8_C( 87), INT8_C( 24), -INT8_C( 88) }, { -INT8_C( 84), -INT8_C( 7), -INT8_C( 72), INT8_C( 123), INT8_C( 95), -INT8_C( 105), -INT8_C( 10), -INT8_C( 77), INT8_C( 36), INT8_C( 73), INT8_C( 32), -INT8_C( 73), -INT8_C( 50), INT8_C( 99), -INT8_C( 59), INT8_C( 43) }, { -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0) } }, { { -INT8_C( 122), INT8_C( 63), INT8_C( 107), -INT8_C( 48), INT8_C( 26), INT8_C( 61), INT8_MIN, -INT8_C( 91), -INT8_C( 104), INT8_C( 7), -INT8_C( 109), INT8_C( 83), INT8_C( 28), INT8_C( 26), -INT8_C( 60), -INT8_C( 57) }, { -INT8_C( 26), -INT8_C( 126), INT8_C( 121), INT8_C( 33), -INT8_C( 81), -INT8_C( 28), INT8_C( 83), -INT8_C( 51), INT8_C( 61), -INT8_C( 62), INT8_C( 97), -INT8_C( 17), INT8_C( 54), -INT8_C( 5), -INT8_C( 109), INT8_C( 5) }, { INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0) } }, { { INT8_C( 50), -INT8_C( 111), INT8_C( 113), -INT8_C( 56), INT8_C( 19), -INT8_C( 81), INT8_C( 12), INT8_C( 82), -INT8_C( 103), -INT8_C( 21), INT8_C( 53), -INT8_C( 122), INT8_C( 49), -INT8_C( 56), -INT8_C( 18), -INT8_C( 83) }, { INT8_C( 14), INT8_MAX, INT8_C( 30), INT8_C( 93), INT8_C( 123), -INT8_C( 1), -INT8_C( 119), -INT8_C( 97), INT8_C( 112), INT8_C( 50), -INT8_C( 2), INT8_C( 80), INT8_C( 99), -INT8_C( 126), -INT8_C( 46), -INT8_C( 56) }, { -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0) } }, { { INT8_C( 96), -INT8_C( 100), -INT8_C( 114), -INT8_C( 70), INT8_C( 92), INT8_C( 115), -INT8_C( 27), INT8_C( 91), -INT8_C( 57), -INT8_C( 120), -INT8_C( 23), -INT8_C( 57), INT8_C( 96), INT8_C( 106), INT8_C( 126), -INT8_C( 96) }, { INT8_C( 77), INT8_C( 120), INT8_C( 14), -INT8_C( 8), INT8_C( 107), -INT8_C( 18), INT8_C( 126), INT8_C( 113), -INT8_C( 29), INT8_C( 32), INT8_C( 97), -INT8_C( 94), -INT8_C( 2), INT8_C( 0), INT8_C( 108), -INT8_C( 67) }, { -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 0) } }, { { INT8_C( 58), -INT8_C( 26), INT8_C( 42), INT8_C( 111), -INT8_C( 99), -INT8_C( 111), -INT8_C( 15), INT8_C( 8), INT8_C( 73), INT8_C( 105), INT8_C( 21), -INT8_C( 36), INT8_C( 81), INT8_C( 6), INT8_C( 91), -INT8_C( 23) }, { -INT8_C( 53), -INT8_C( 10), -INT8_C( 79), -INT8_C( 70), INT8_C( 38), -INT8_C( 69), INT8_C( 28), -INT8_C( 117), -INT8_C( 66), INT8_C( 20), -INT8_C( 15), -INT8_C( 83), -INT8_C( 82), INT8_C( 126), INT8_C( 72), INT8_C( 44) }, { -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0) } }, { { -INT8_C( 27), -INT8_C( 7), -INT8_C( 82), -INT8_C( 80), INT8_C( 125), INT8_C( 15), INT8_C( 26), -INT8_C( 95), INT8_C( 72), INT8_C( 19), INT8_C( 35), -INT8_C( 114), -INT8_C( 85), -INT8_C( 89), INT8_C( 111), -INT8_C( 48) }, { -INT8_C( 81), INT8_MIN, INT8_C( 111), INT8_C( 106), INT8_C( 85), INT8_C( 110), INT8_C( 73), INT8_C( 52), INT8_C( 42), -INT8_C( 106), INT8_C( 22), INT8_C( 57), INT8_C( 28), INT8_C( 2), INT8_C( 78), INT8_C( 93) }, { -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0) } }, { { -INT8_C( 116), INT8_C( 93), INT8_MIN, INT8_C( 67), -INT8_C( 92), -INT8_C( 9), INT8_C( 71), INT8_C( 109), -INT8_C( 21), INT8_C( 16), INT8_C( 121), -INT8_C( 91), INT8_C( 22), INT8_C( 82), -INT8_C( 95), -INT8_C( 100) }, { INT8_C( 34), -INT8_C( 97), -INT8_C( 80), -INT8_C( 46), -INT8_C( 97), INT8_C( 13), -INT8_C( 17), -INT8_C( 75), INT8_C( 79), INT8_C( 47), INT8_C( 56), INT8_C( 13), -INT8_C( 17), -INT8_C( 105), INT8_C( 102), -INT8_C( 105) }, { INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_comge_epi8(a, b); simde_test_x86_assert_equal_i8x16(r, simde_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i8x16(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i8x16(); simde__m128i r = simde_mm_comge_epi8(a, b); simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comge_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[8]; const int16_t b[8]; const int16_t r[8]; } test_vec[] = { { { -INT16_C( 26539), -INT16_C( 4086), INT16_C( 20923), -INT16_C( 19104), -INT16_C( 6565), -INT16_C( 5759), INT16_C( 4412), -INT16_C( 26663) }, { -INT16_C( 26539), -INT16_C( 4086), INT16_C( 20923), -INT16_C( 19104), -INT16_C( 6565), -INT16_C( 5759), INT16_C( 4412), -INT16_C( 26663) }, { -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1) } }, { { -INT16_C( 9653), -INT16_C( 18212), -INT16_C( 23891), INT16_C( 22261), INT16_C( 26196), -INT16_C( 3393), INT16_C( 17104), INT16_C( 9336) }, { INT16_C( 13416), -INT16_C( 32333), INT16_C( 9066), -INT16_C( 16052), INT16_C( 8383), INT16_C( 8865), -INT16_C( 5182), INT16_C( 2047) }, { INT16_C( 0), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), -INT16_C( 1) } }, { { -INT16_C( 18093), -INT16_C( 6821), INT16_C( 13350), INT16_C( 24387), -INT16_C( 15263), -INT16_C( 20460), INT16_C( 3431), INT16_C( 19052) }, { -INT16_C( 17938), -INT16_C( 17182), INT16_C( 30552), -INT16_C( 9739), INT16_C( 17216), INT16_C( 5955), -INT16_C( 5805), INT16_C( 13820) }, { INT16_C( 0), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0), -INT16_C( 1), -INT16_C( 1) } }, { { -INT16_C( 15263), INT16_C( 8597), -INT16_C( 13133), INT16_C( 6337), -INT16_C( 12128), INT16_C( 3997), -INT16_C( 165), INT16_C( 21034) }, { -INT16_C( 21233), INT16_C( 27393), INT16_C( 21550), INT16_C( 17730), -INT16_C( 9978), -INT16_C( 28210), INT16_C( 4501), INT16_C( 27062) }, { -INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 31586), INT16_C( 13299), INT16_C( 29180), -INT16_C( 13993), INT16_C( 4663), -INT16_C( 10301), INT16_C( 8277), -INT16_C( 31589) }, { -INT16_C( 25870), INT16_C( 21623), -INT16_C( 15469), INT16_C( 19483), INT16_C( 27192), INT16_C( 22698), INT16_C( 27952), -INT16_C( 5354) }, { INT16_C( 0), INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 32719), -INT16_C( 6693), INT16_C( 11176), -INT16_C( 17939), INT16_C( 4686), -INT16_C( 12306), INT16_C( 31228), INT16_C( 10149) }, { INT16_C( 1984), INT16_C( 16556), -INT16_C( 13776), INT16_C( 14440), INT16_C( 31998), INT16_C( 13374), INT16_C( 1485), INT16_C( 516) }, { INT16_C( 0), INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1), -INT16_C( 1) } }, { { INT16_C( 16451), -INT16_C( 586), INT16_C( 608), INT16_C( 12651), INT16_C( 22798), -INT16_C( 16249), INT16_C( 4601), -INT16_C( 31439) }, { INT16_C( 32160), -INT16_C( 2552), INT16_C( 29869), INT16_C( 20241), -INT16_C( 26239), -INT16_C( 4620), -INT16_C( 8428), -INT16_C( 2199) }, { INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), INT16_C( 0) } }, { { INT16_C( 19964), INT16_C( 17645), -INT16_C( 53), INT16_C( 31161), INT16_C( 28318), INT16_C( 29428), -INT16_C( 3597), -INT16_C( 7128) }, { -INT16_C( 31813), INT16_C( 16627), -INT16_C( 14671), -INT16_C( 9473), INT16_C( 18665), INT16_C( 18995), INT16_C( 1452), INT16_C( 4651) }, { -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), INT16_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_comge_epi16(a, b); simde_test_x86_assert_equal_i16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i16x8(); simde__m128i r = simde_mm_comge_epi16(a, b); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comge_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int32_t a[4]; const int32_t b[4]; const int32_t r[4]; } test_vec[] = { { { -INT32_C( 2098844029), -INT32_C( 641783406), -INT32_C( 1382033113), -INT32_C( 1922031725) }, { -INT32_C( 2098844029), -INT32_C( 641783406), -INT32_C( 1382033113), -INT32_C( 1922031725) }, { -INT32_C( 1), -INT32_C( 1), -INT32_C( 1), -INT32_C( 1) } }, { { -INT32_C( 455761607), INT32_C( 538495716), INT32_C( 342102622), -INT32_C( 1691058370) }, { INT32_C( 2131454721), -INT32_C( 1721333412), INT32_C( 971908049), -INT32_C( 923556479) }, { INT32_C( 0), -INT32_C( 1), INT32_C( 0), INT32_C( 0) } }, { { -INT32_C( 1424597183), INT32_C( 1799718895), INT32_C( 334331143), -INT32_C( 630566031) }, { INT32_C( 46370205), INT32_C( 9709010), -INT32_C( 582233149), -INT32_C( 594048156) }, { INT32_C( 0), -INT32_C( 1), -INT32_C( 1), INT32_C( 0) } }, { { INT32_C( 1562265339), INT32_C( 24053114), INT32_C( 255267596), INT32_C( 397904878) }, { -INT32_C( 713745536), INT32_C( 1462896540), INT32_C( 1646096678), INT32_C( 323409892) }, { -INT32_C( 1), INT32_C( 0), INT32_C( 0), -INT32_C( 1) } }, { { -INT32_C( 1034560112), INT32_C( 729566508), -INT32_C( 794141546), -INT32_C( 1727910052) }, { -INT32_C( 1098146351), -INT32_C( 433506463), -INT32_C( 1848867551), -INT32_C( 1276669014) }, { -INT32_C( 1), -INT32_C( 1), -INT32_C( 1), INT32_C( 0) } }, { { -INT32_C( 1591432152), INT32_C( 827745966), INT32_C( 497999821), -INT32_C( 1456257692) }, { -INT32_C( 2039702343), -INT32_C( 178275383), INT32_C( 817928413), INT32_C( 90359390) }, { -INT32_C( 1), -INT32_C( 1), INT32_C( 0), INT32_C( 0) } }, { { -INT32_C( 990823190), INT32_C( 1541626023), -INT32_C( 1062460200), -INT32_C( 1892558930) }, { -INT32_C( 176119968), -INT32_C( 859923074), INT32_C( 123744642), INT32_C( 1369405864) }, { INT32_C( 0), -INT32_C( 1), INT32_C( 0), INT32_C( 0) } }, { { -INT32_C( 249228033), -INT32_C( 250794816), INT32_C( 2131407839), -INT32_C( 1813582623) }, { -INT32_C( 768659731), -INT32_C( 1288837851), -INT32_C( 535373513), -INT32_C( 544885970) }, { -INT32_C( 1), -INT32_C( 1), -INT32_C( 1), INT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_comge_epi32(a, b); simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i32x4(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i32x4(); simde__m128i r = simde_mm_comge_epi32(a, b); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comge_epi64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int64_t a[2]; const int64_t b[2]; const int64_t r[2]; } test_vec[] = { { { INT64_C( 4231951681613053327), -INT64_C( 199972968041671412) }, { INT64_C( 4231951681613053327), -INT64_C( 199972968041671412) }, { -INT64_C( 1), -INT64_C( 1) } }, { { -INT64_C( 1039318408851500663), -INT64_C( 648944288963881180) }, { INT64_C( 9211154201577831873), INT64_C( 1446431342413402501) }, { INT64_C( 0), INT64_C( 0) } }, { { -INT64_C( 3469245127459991205), -INT64_C( 2309541123369831566) }, { INT64_C( 595107754553482941), INT64_C( 2403110769458442098) }, { INT64_C( 0), INT64_C( 0) } }, { { -INT64_C( 7644461919772969588), -INT64_C( 2069442768328493751) }, { INT64_C( 2156352861533824940), INT64_C( 7851466218311481340) }, { INT64_C( 0), INT64_C( 0) } }, { { -INT64_C( 8383284232392702397), INT64_C( 5253030957281289936) }, { INT64_C( 484869302557483446), INT64_C( 4454658751047889483) }, { INT64_C( 0), -INT64_C( 1) } }, { { -INT64_C( 504594842707768663), INT64_C( 6248703133163752752) }, { INT64_C( 762337237287621378), -INT64_C( 2534416100243281786) }, { INT64_C( 0), -INT64_C( 1) } }, { { INT64_C( 2798601033062927334), INT64_C( 6314863776929002384) }, { INT64_C( 8314271650882946489), -INT64_C( 8006698707022159915) }, { INT64_C( 0), -INT64_C( 1) } }, { { INT64_C( 6563306511736312097), -INT64_C( 7957661828417413607) }, { -INT64_C( 8715528813258751998), -INT64_C( 6635828572573114015) }, { -INT64_C( 1), INT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_comge_epi64(a, b); simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i64x2(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i64x2(); simde__m128i r = simde_mm_comge_epi64(a, b); simde_test_x86_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comge_epu8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint8_t a[16]; const uint8_t b[16]; const uint8_t r[16]; } test_vec[] = { { { UINT8_C(130), UINT8_C( 34), UINT8_C( 53), UINT8_C( 20), UINT8_C( 99), UINT8_C( 30), UINT8_C(164), UINT8_C(224), UINT8_C(243), UINT8_C(216), UINT8_C(125), UINT8_C(129), UINT8_C(206), UINT8_C( 73), UINT8_C(103), UINT8_C( 77) }, { UINT8_C(130), UINT8_C( 34), UINT8_C( 53), UINT8_C( 20), UINT8_C( 99), UINT8_C( 30), UINT8_C(164), UINT8_C(224), UINT8_C(243), UINT8_C(216), UINT8_C(125), UINT8_C(129), UINT8_C(206), UINT8_C( 73), UINT8_C(103), UINT8_C( 77) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C( 6), UINT8_C( 10), UINT8_C( 35), UINT8_C( 86), UINT8_C(117), UINT8_C( 95), UINT8_C(122), UINT8_C(143), UINT8_C( 82), UINT8_C(117), UINT8_C(116), UINT8_C( 48), UINT8_C( 3), UINT8_C(232), UINT8_C(203), UINT8_C( 91) }, { UINT8_C( 50), UINT8_C(136), UINT8_C(159), UINT8_MAX, UINT8_C( 35), UINT8_C( 93), UINT8_C(107), UINT8_C(253), UINT8_C( 69), UINT8_C(233), UINT8_C( 56), UINT8_C(137), UINT8_C( 7), UINT8_C(172), UINT8_C( 17), UINT8_C(211) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C(236), UINT8_C(238), UINT8_C(182), UINT8_C(231), UINT8_C(226), UINT8_C(184), UINT8_C(149), UINT8_C(210), UINT8_C(208), UINT8_C( 36), UINT8_C(245), UINT8_C( 7), UINT8_C(110), UINT8_C(214), UINT8_C(117), UINT8_C( 1) }, { UINT8_C( 89), UINT8_C(203), UINT8_C(181), UINT8_C( 41), UINT8_C(103), UINT8_C( 82), UINT8_C( 83), UINT8_C(249), UINT8_C( 55), UINT8_C( 87), UINT8_C( 25), UINT8_C( 98), UINT8_C( 13), UINT8_C(231), UINT8_C(116), UINT8_C(107) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C(221), UINT8_C(241), UINT8_C( 40), UINT8_C( 28), UINT8_C(166), UINT8_C( 74), UINT8_C( 67), UINT8_C(154), UINT8_C( 2), UINT8_C(244), UINT8_C( 79), UINT8_C(147), UINT8_C(250), UINT8_C(160), UINT8_C(204), UINT8_C(220) }, { UINT8_C( 30), UINT8_C(113), UINT8_C(222), UINT8_C( 89), UINT8_C(214), UINT8_C( 3), UINT8_C( 66), UINT8_C( 30), UINT8_C(243), UINT8_C(172), UINT8_C(134), UINT8_C(211), UINT8_C(137), UINT8_C( 2), UINT8_C(122), UINT8_C( 93) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C( 1), UINT8_C(157), UINT8_C(226), UINT8_C(187), UINT8_C(106), UINT8_C( 30), UINT8_C(109), UINT8_C( 45), UINT8_C( 16), UINT8_C(111), UINT8_C(235), UINT8_C(156), UINT8_C( 80), UINT8_C( 77), UINT8_C(186), UINT8_C( 54) }, { UINT8_C(170), UINT8_C( 4), UINT8_C(129), UINT8_C( 89), UINT8_C( 24), UINT8_C(122), UINT8_C( 33), UINT8_C(178), UINT8_C(158), UINT8_C(111), UINT8_C(233), UINT8_C(166), UINT8_C( 35), UINT8_C( 3), UINT8_C( 10), UINT8_C(240) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C(127), UINT8_C(121), UINT8_C( 74), UINT8_C(140), UINT8_C(213), UINT8_C( 58), UINT8_C(252), UINT8_C(212), UINT8_C( 35), UINT8_C( 29), UINT8_C( 47), UINT8_C(234), UINT8_C( 23), UINT8_C(229), UINT8_C( 38), UINT8_C( 85) }, { UINT8_C( 37), UINT8_C( 12), UINT8_C( 8), UINT8_C(239), UINT8_C(214), UINT8_C(191), UINT8_C(217), UINT8_C(254), UINT8_C( 99), UINT8_C( 41), UINT8_C(169), UINT8_C(161), UINT8_C(130), UINT8_C(244), UINT8_C( 11), UINT8_C(110) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C(128), UINT8_C( 13), UINT8_C(201), UINT8_C( 89), UINT8_C(144), UINT8_C(168), UINT8_C(214), UINT8_C(215), UINT8_C( 99), UINT8_C(133), UINT8_C(132), UINT8_C( 68), UINT8_C(248), UINT8_C(113), UINT8_C(247), UINT8_C(130) }, { UINT8_C(182), UINT8_C( 15), UINT8_C(218), UINT8_C(228), UINT8_C(184), UINT8_C(216), UINT8_C( 80), UINT8_C( 74), UINT8_C(104), UINT8_C( 97), UINT8_C( 46), UINT8_C(141), UINT8_C( 79), UINT8_C(220), UINT8_C(103), UINT8_C( 29) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { UINT8_C( 43), UINT8_C(226), UINT8_C(200), UINT8_C(232), UINT8_C( 66), UINT8_C(112), UINT8_C(229), UINT8_C(126), UINT8_C(247), UINT8_C( 47), UINT8_C( 83), UINT8_C(115), UINT8_C(155), UINT8_C(248), UINT8_C( 23), UINT8_C( 5) }, { UINT8_C(133), UINT8_C(151), UINT8_C( 95), UINT8_MAX, UINT8_C(101), UINT8_C(207), UINT8_C(112), UINT8_C(222), UINT8_C(213), UINT8_C(159), UINT8_C(224), UINT8_C( 49), UINT8_C( 49), UINT8_C(196), UINT8_C( 5), UINT8_C( 70) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_comge_epu8(a, b); simde_test_x86_assert_equal_u8x16(r, simde_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u8x16(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u8x16(); simde__m128i r = simde_mm_comge_epu8(a, b); simde_test_x86_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comge_epu16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint16_t a[8]; const uint16_t b[8]; const uint16_t r[8]; } test_vec[] = { { { UINT16_C(32681), UINT16_C( 685), UINT16_C(39060), UINT16_C( 4367), UINT16_C(41735), UINT16_C(16386), UINT16_C(33447), UINT16_C( 9837) }, { UINT16_C(32681), UINT16_C( 685), UINT16_C(39060), UINT16_C( 4367), UINT16_C(41735), UINT16_C(16386), UINT16_C(33447), UINT16_C( 9837) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(11195), UINT16_C( 2815), UINT16_C(44166), UINT16_C( 288), UINT16_C(27859), UINT16_C(22311), UINT16_C(46033), UINT16_C(12493) }, { UINT16_C(27681), UINT16_C(28641), UINT16_C(10798), UINT16_C(55102), UINT16_C(27325), UINT16_C(28923), UINT16_C( 6341), UINT16_C(11745) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { UINT16_C(21632), UINT16_C(52002), UINT16_C(30146), UINT16_C(64583), UINT16_C(20106), UINT16_C(50539), UINT16_C(45526), UINT16_C( 9639) }, { UINT16_C(12988), UINT16_C(63437), UINT16_C(11703), UINT16_C( 6489), UINT16_C( 2879), UINT16_C(52900), UINT16_C(49308), UINT16_C(24923) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(39163), UINT16_C( 3120), UINT16_C(13506), UINT16_C( 6095), UINT16_C(53025), UINT16_C(17428), UINT16_C(51177), UINT16_C(27515) }, { UINT16_C(22690), UINT16_C(25304), UINT16_C(43736), UINT16_C( 8008), UINT16_C( 3765), UINT16_C( 8551), UINT16_C(34005), UINT16_C( 3267) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(33365), UINT16_C(37777), UINT16_C(61485), UINT16_C(39583), UINT16_C(30656), UINT16_C(40074), UINT16_C(64434), UINT16_C(19249) }, { UINT16_C(26362), UINT16_C(30569), UINT16_C(43064), UINT16_C(12787), UINT16_C(64584), UINT16_C(12459), UINT16_C(27671), UINT16_C(29697) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, { { UINT16_C(38837), UINT16_C(10413), UINT16_C(45740), UINT16_C(52639), UINT16_C(52625), UINT16_C(38197), UINT16_C(22488), UINT16_C( 3505) }, { UINT16_C(58860), UINT16_C(65514), UINT16_C(12160), UINT16_C(38722), UINT16_C(23327), UINT16_C(50391), UINT16_C(32523), UINT16_C(57854) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(24900), UINT16_C(38381), UINT16_C(33255), UINT16_C(63671), UINT16_C(22969), UINT16_C(63358), UINT16_C(58116), UINT16_C(63971) }, { UINT16_C(23714), UINT16_C(50114), UINT16_C(18519), UINT16_C(38941), UINT16_C(46692), UINT16_C(42325), UINT16_C(50520), UINT16_C(40352) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(26410), UINT16_C(41399), UINT16_C(25989), UINT16_C(25295), UINT16_C(41827), UINT16_C(35275), UINT16_C(42717), UINT16_C(22448) }, { UINT16_C(21313), UINT16_C(34905), UINT16_C(63846), UINT16_C(32364), UINT16_C(37436), UINT16_C(39820), UINT16_C(18087), UINT16_C(61392) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_comge_epu16(a, b); simde_test_x86_assert_equal_u16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u16x8(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u16x8(); simde__m128i r = simde_mm_comge_epu16(a, b); simde_test_x86_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comge_epu32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint32_t a[4]; const uint32_t b[4]; const uint32_t r[4]; } test_vec[] = { { { UINT32_C( 326381966), UINT32_C(1422878255), UINT32_C( 344274101), UINT32_C(1862182924) }, { UINT32_C( 326381966), UINT32_C(1422878255), UINT32_C( 344274101), UINT32_C(1862182924) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { UINT32_C( 420958964), UINT32_C(2400668756), UINT32_C(1860467153), UINT32_C( 511118240) }, { UINT32_C(3029158040), UINT32_C( 379555212), UINT32_C(1612026839), UINT32_C(2276979000) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C(1030021857), UINT32_C( 335956426), UINT32_C(3854955083), UINT32_C(1937447914) }, { UINT32_C(1281897330), UINT32_C(4029210949), UINT32_C( 903956722), UINT32_C(3938552330) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C(3148384816), UINT32_C(1411512176), UINT32_C(3398928082), UINT32_C( 918978094) }, { UINT32_C(2741992001), UINT32_C( 709913089), UINT32_C(1532222510), UINT32_C(3741743658) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C(1548473098), UINT32_C(2572246765), UINT32_C(3821190028), UINT32_C(2940091667) }, { UINT32_C(2148116016), UINT32_C( 184562025), UINT32_C(2580731570), UINT32_C(2926364991) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { UINT32_C(3904193687), UINT32_C( 696026858), UINT32_C(4188937380), UINT32_C( 634216258) }, { UINT32_C(2895986276), UINT32_C(3693165860), UINT32_C(3116744359), UINT32_C(2665316337) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C( 659449342), UINT32_C(1300985870), UINT32_C(3552849472), UINT32_C(3797609570) }, { UINT32_C(4036348424), UINT32_C(3868825563), UINT32_C(2174098740), UINT32_C(4164986346) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C(3766468200), UINT32_C(1298610946), UINT32_C( 996834185), UINT32_C( 751627548) }, { UINT32_C( 307810882), UINT32_C(1850885430), UINT32_C(3131865474), UINT32_C( 92029903) }, { UINT32_MAX, UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_comge_epu32(a, b); simde_test_x86_assert_equal_u32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u32x4(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u32x4(); simde__m128i r = simde_mm_comge_epu32(a, b); simde_test_x86_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comge_epu64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint64_t a[2]; const uint64_t b[2]; const uint64_t r[2]; } test_vec[] = { { { UINT64_C( 8211987837427308796), UINT64_C( 5480151877524490918) }, { UINT64_C( 8211987837427308796), UINT64_C( 5480151877524490918) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C(13777164219077025594), UINT64_C( 755302138987371961) }, { UINT64_C( 81722270716082915), UINT64_C( 9945189316467286975) }, { UINT64_MAX, UINT64_C( 0) } }, { { UINT64_C( 2315908445777131546), UINT64_C( 5918859128641900032) }, { UINT64_C( 4959233173195746372), UINT64_C( 3285923985569812478) }, { UINT64_C( 0), UINT64_MAX } }, { { UINT64_C(15567995769879471623), UINT64_C( 2620873581216454271) }, { UINT64_C( 9506557910353180488), UINT64_C( 9201997689620698505) }, { UINT64_MAX, UINT64_C( 0) } }, { { UINT64_C( 3518706747851305003), UINT64_C(14345092700155837535) }, { UINT64_C( 458634775754859030), UINT64_C(14284795380240678280) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C( 8099097797183463086), UINT64_C( 9523822594807941320) }, { UINT64_C( 1515945609713235416), UINT64_C( 5412243535409106980) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C(16187658354348390584), UINT64_C(11785212616360215521) }, { UINT64_C(17931385931382127027), UINT64_C( 6213328243168416900) }, { UINT64_C( 0), UINT64_MAX } }, { { UINT64_C(14375210412457215855), UINT64_C( 7791293546164876499) }, { UINT64_C(17777258666399839697), UINT64_C( 3350089076668880082) }, { UINT64_C( 0), UINT64_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_comge_epu64(a, b); simde_test_x86_assert_equal_u64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u64x2(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u64x2(); simde__m128i r = simde_mm_comge_epu64(a, b); simde_test_x86_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comgt_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t a[16]; const int8_t b[16]; const int8_t r[16]; } test_vec[] = { { { -INT8_C( 2), INT8_C( 82), -INT8_C( 67), INT8_C( 54), -INT8_C( 53), INT8_C( 122), -INT8_C( 40), INT8_C( 111), -INT8_C( 59), -INT8_C( 28), -INT8_C( 47), INT8_C( 30), INT8_C( 38), -INT8_C( 76), -INT8_C( 51), INT8_C( 38) }, { -INT8_C( 2), INT8_C( 82), -INT8_C( 67), INT8_C( 54), -INT8_C( 53), INT8_C( 122), -INT8_C( 40), INT8_C( 111), -INT8_C( 59), -INT8_C( 28), -INT8_C( 47), INT8_C( 30), INT8_C( 38), -INT8_C( 76), -INT8_C( 51), INT8_C( 38) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 90), INT8_C( 113), -INT8_C( 57), -INT8_C( 27), INT8_C( 89), INT8_C( 9), -INT8_C( 123), INT8_C( 89), INT8_C( 51), INT8_C( 21), -INT8_C( 53), -INT8_C( 121), -INT8_C( 20), INT8_C( 95), -INT8_C( 50), INT8_C( 29) }, { -INT8_C( 117), -INT8_C( 95), -INT8_C( 21), INT8_C( 86), INT8_C( 110), -INT8_C( 112), -INT8_C( 103), INT8_C( 30), -INT8_C( 32), INT8_C( 25), -INT8_C( 99), INT8_C( 119), -INT8_C( 18), -INT8_C( 21), INT8_C( 125), INT8_C( 27) }, { -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1) } }, { { INT8_C( 118), INT8_C( 53), INT8_C( 54), INT8_C( 98), INT8_MIN, -INT8_C( 79), INT8_C( 49), INT8_C( 103), -INT8_C( 47), -INT8_C( 31), INT8_C( 116), INT8_C( 102), -INT8_C( 66), -INT8_C( 103), INT8_C( 22), INT8_C( 105) }, { INT8_C( 63), -INT8_C( 67), -INT8_C( 11), INT8_C( 36), INT8_C( 68), INT8_C( 76), -INT8_C( 85), INT8_C( 29), INT8_C( 76), -INT8_C( 99), -INT8_C( 67), INT8_C( 16), INT8_C( 51), -INT8_C( 23), INT8_C( 22), -INT8_C( 111) }, { -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1) } }, { { INT8_C( 75), INT8_C( 10), -INT8_C( 76), -INT8_C( 13), -INT8_C( 81), -INT8_C( 126), -INT8_C( 94), INT8_C( 105), -INT8_C( 44), -INT8_C( 65), INT8_C( 38), INT8_C( 107), INT8_C( 97), -INT8_C( 98), INT8_C( 59), INT8_C( 91) }, { INT8_C( 65), INT8_C( 45), INT8_C( 65), INT8_C( 105), -INT8_C( 10), -INT8_C( 75), -INT8_C( 12), -INT8_C( 76), INT8_C( 48), -INT8_C( 8), -INT8_C( 101), -INT8_C( 79), -INT8_C( 98), -INT8_C( 73), -INT8_C( 127), -INT8_C( 47) }, { -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1) } }, { { INT8_C( 4), INT8_C( 119), -INT8_C( 87), INT8_C( 94), -INT8_C( 114), -INT8_C( 123), -INT8_C( 66), -INT8_C( 90), INT8_C( 100), INT8_C( 56), INT8_C( 73), INT8_C( 92), INT8_C( 126), INT8_C( 118), INT8_C( 39), INT8_C( 59) }, { -INT8_C( 70), INT8_C( 121), INT8_C( 57), -INT8_C( 20), INT8_C( 43), -INT8_C( 45), INT8_C( 93), INT8_C( 42), -INT8_C( 76), -INT8_C( 80), -INT8_C( 99), INT8_C( 36), -INT8_C( 42), INT8_C( 92), -INT8_C( 89), INT8_C( 35) }, { -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1) } }, { { -INT8_C( 57), INT8_C( 5), INT8_C( 125), INT8_C( 108), -INT8_C( 60), -INT8_C( 63), INT8_C( 110), INT8_C( 104), -INT8_C( 89), -INT8_C( 45), INT8_C( 69), INT8_C( 2), -INT8_C( 68), INT8_C( 42), -INT8_C( 64), INT8_C( 81) }, { -INT8_C( 47), INT8_C( 42), INT8_C( 68), INT8_C( 119), -INT8_C( 116), -INT8_C( 81), -INT8_C( 50), -INT8_C( 55), INT8_C( 2), INT8_C( 79), INT8_C( 45), INT8_C( 48), -INT8_C( 124), -INT8_C( 32), INT8_C( 111), -INT8_C( 50) }, { INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1) } }, { { -INT8_C( 68), INT8_C( 58), -INT8_C( 103), -INT8_C( 27), -INT8_C( 6), INT8_C( 62), -INT8_C( 101), -INT8_C( 11), -INT8_C( 56), INT8_C( 24), -INT8_C( 126), INT8_C( 37), -INT8_C( 60), -INT8_C( 64), -INT8_C( 16), -INT8_C( 28) }, { -INT8_C( 81), -INT8_C( 57), -INT8_C( 55), -INT8_C( 48), -INT8_C( 63), INT8_C( 80), INT8_C( 48), -INT8_C( 41), INT8_C( 63), INT8_C( 93), -INT8_C( 78), -INT8_C( 100), INT8_C( 80), INT8_C( 76), -INT8_C( 64), INT8_C( 28) }, { -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0) } }, { { INT8_C( 13), -INT8_C( 95), INT8_C( 99), -INT8_C( 111), -INT8_C( 41), INT8_C( 5), INT8_C( 43), -INT8_C( 105), -INT8_C( 19), -INT8_C( 113), INT8_C( 105), -INT8_C( 115), INT8_C( 61), INT8_C( 67), -INT8_C( 99), INT8_C( 62) }, { INT8_C( 123), -INT8_C( 40), INT8_C( 50), -INT8_C( 64), INT8_C( 113), -INT8_C( 65), INT8_C( 105), -INT8_C( 99), -INT8_C( 106), INT8_C( 96), -INT8_C( 108), INT8_C( 49), -INT8_C( 31), -INT8_C( 89), -INT8_C( 124), INT8_C( 83) }, { INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_comgt_epi8(a, b); simde_test_x86_assert_equal_i8x16(r, simde_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i8x16(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i8x16(); simde__m128i r = simde_mm_comgt_epi8(a, b); simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comgt_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[8]; const int16_t b[8]; const int16_t r[8]; } test_vec[] = { { { -INT16_C( 16417), INT16_C( 14404), INT16_C( 7428), -INT16_C( 27896), -INT16_C( 16065), INT16_C( 352), -INT16_C( 18224), -INT16_C( 23119) }, { -INT16_C( 16417), INT16_C( 14404), INT16_C( 7428), -INT16_C( 27896), -INT16_C( 16065), INT16_C( 352), -INT16_C( 18224), -INT16_C( 23119) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 6562), INT16_C( 4070), INT16_C( 580), INT16_C( 25443), -INT16_C( 7635), -INT16_C( 18630), -INT16_C( 1568), -INT16_C( 17246) }, { INT16_C( 7773), -INT16_C( 24155), -INT16_C( 29144), INT16_C( 13336), INT16_C( 13797), INT16_C( 19408), INT16_C( 10533), INT16_C( 25107) }, { INT16_C( 0), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 30848), -INT16_C( 31411), INT16_C( 9188), INT16_C( 28932), INT16_C( 27180), -INT16_C( 2546), INT16_C( 19188), -INT16_C( 24830) }, { INT16_C( 17836), -INT16_C( 27156), INT16_C( 25068), -INT16_C( 16059), INT16_C( 29447), INT16_C( 12578), -INT16_C( 25117), -INT16_C( 16981) }, { -INT16_C( 1), INT16_C( 0), INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0), -INT16_C( 1), INT16_C( 0) } }, { { INT16_C( 5640), -INT16_C( 5425), INT16_C( 10742), INT16_C( 3638), -INT16_C( 32324), -INT16_C( 19081), -INT16_C( 23610), INT16_C( 17548) }, { -INT16_C( 17161), -INT16_C( 9085), -INT16_C( 25096), -INT16_C( 32395), INT16_C( 1231), INT16_C( 31932), INT16_C( 7603), -INT16_C( 671) }, { -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1) } }, { { INT16_C( 18462), INT16_C( 1492), INT16_C( 7461), -INT16_C( 31776), -INT16_C( 21243), -INT16_C( 16675), INT16_C( 3070), -INT16_C( 3544) }, { INT16_C( 3172), INT16_C( 16080), INT16_C( 19187), -INT16_C( 17006), INT16_C( 28515), -INT16_C( 3064), -INT16_C( 20420), INT16_C( 27677) }, { -INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1), INT16_C( 0) } }, { { -INT16_C( 26644), -INT16_C( 24636), INT16_C( 1558), INT16_C( 6634), INT16_C( 31022), -INT16_C( 10328), -INT16_C( 29885), -INT16_C( 3138) }, { -INT16_C( 17381), -INT16_C( 31939), INT16_C( 29061), -INT16_C( 16765), INT16_C( 15595), INT16_C( 24684), INT16_C( 24103), INT16_C( 20679) }, { INT16_C( 0), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 30057), -INT16_C( 32761), -INT16_C( 5004), INT16_C( 5435), INT16_C( 27231), -INT16_C( 14015), INT16_C( 10813), -INT16_C( 29386) }, { INT16_C( 21573), INT16_C( 29232), INT16_C( 6487), -INT16_C( 14032), -INT16_C( 2928), -INT16_C( 30382), INT16_C( 12059), -INT16_C( 3513) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 21942), INT16_C( 28677), -INT16_C( 10011), -INT16_C( 15683), INT16_C( 2754), INT16_C( 23310), -INT16_C( 3947), INT16_C( 2424) }, { INT16_C( 23562), -INT16_C( 11245), INT16_C( 18961), INT16_C( 10625), INT16_C( 7801), INT16_C( 14369), INT16_C( 11457), -INT16_C( 25979) }, { INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1), INT16_C( 0), -INT16_C( 1) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_comgt_epi16(a, b); simde_test_x86_assert_equal_i16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i16x8(); simde__m128i r = simde_mm_comgt_epi16(a, b); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comgt_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int32_t a[4]; const int32_t b[4]; const int32_t r[4]; } test_vec[] = { { { INT32_C( 925334827), INT32_C( 1733808145), INT32_C( 1467539580), -INT32_C( 1351817741) }, { INT32_C( 925334827), INT32_C( 1733808145), INT32_C( 1467539580), -INT32_C( 1351817741) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 1900895633), INT32_C( 610143323), INT32_C( 859914960), -INT32_C( 1872077631) }, { -INT32_C( 1663857310), INT32_C( 1274142626), -INT32_C( 2081917768), -INT32_C( 956624130) }, { -INT32_C( 1), INT32_C( 0), -INT32_C( 1), INT32_C( 0) } }, { { INT32_C( 289857283), INT32_C( 78604507), INT32_C( 1109059770), INT32_C( 467454146) }, { INT32_C( 1769103896), -INT32_C( 1217131971), -INT32_C( 1446752356), -INT32_C( 1744077217) }, { INT32_C( 0), -INT32_C( 1), -INT32_C( 1), -INT32_C( 1) } }, { { INT32_C( 2087086215), INT32_C( 257713468), INT32_C( 789879137), -INT32_C( 2067499412) }, { INT32_C( 1684898421), -INT32_C( 205104244), -INT32_C( 1888094128), INT32_C( 1803891134) }, { -INT32_C( 1), -INT32_C( 1), -INT32_C( 1), INT32_C( 0) } }, { { INT32_C( 2031452742), -INT32_C( 1907553244), -INT32_C( 1047012115), INT32_C( 336293992) }, { INT32_C( 1445623712), INT32_C( 1204869431), -INT32_C( 26937347), -INT32_C( 2007878463) }, { -INT32_C( 1), INT32_C( 0), INT32_C( 0), -INT32_C( 1) } }, { { -INT32_C( 759450520), -INT32_C( 932486084), -INT32_C( 1089783162), INT32_C( 278380126) }, { INT32_C( 101960130), -INT32_C( 75854618), -INT32_C( 1107765813), INT32_C( 928550419) }, { INT32_C( 0), INT32_C( 0), -INT32_C( 1), INT32_C( 0) } }, { { INT32_C( 1304462868), INT32_C( 127694123), -INT32_C( 220710318), -INT32_C( 1018009867) }, { INT32_C( 1049624066), INT32_C( 1454210368), -INT32_C( 1768416799), -INT32_C( 1082046630) }, { -INT32_C( 1), INT32_C( 0), -INT32_C( 1), -INT32_C( 1) } }, { { -INT32_C( 1282637454), -INT32_C( 1849079911), INT32_C( 577135740), INT32_C( 1948480214) }, { -INT32_C( 1006264183), -INT32_C( 1588612115), INT32_C( 1336822632), INT32_C( 1790208063) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 1) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_comgt_epi32(a, b); simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i32x4(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i32x4(); simde__m128i r = simde_mm_comgt_epi32(a, b); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comgt_epi64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int64_t a[2]; const int64_t b[2]; const int64_t r[2]; } test_vec[] = { { { -INT64_C( 5702714704422032213), INT64_C( 7706197349837389866) }, { -INT64_C( 5702714704422032213), INT64_C( 7706197349837389866) }, { INT64_C( 0), INT64_C( 0) } }, { { INT64_C( 2686738672218224510), INT64_C( 9140610051560567943) }, { -INT64_C( 6217130683109076761), -INT64_C( 1250078657332488315) }, { -INT64_C( 1), -INT64_C( 1) } }, { { INT64_C( 2847418335753423114), INT64_C( 4960714446316608359) }, { -INT64_C( 4086288290319453363), INT64_C( 5199751851530465460) }, { -INT64_C( 1), INT64_C( 0) } }, { { -INT64_C( 943709202321481643), INT64_C( 331162107166771247) }, { INT64_C( 5870370027943010054), -INT64_C( 4584062603918486561) }, { INT64_C( 0), -INT64_C( 1) } }, { { -INT64_C( 3649639699874643066), INT64_C( 433351727370470152) }, { -INT64_C( 8202798520132215751), -INT64_C( 7045432456963077840) }, { -INT64_C( 1), -INT64_C( 1) } }, { { -INT64_C( 16632113310352442), -INT64_C( 7998338059497122534) }, { -INT64_C( 4087067891782225905), INT64_C( 2996758406856088013) }, { -INT64_C( 1), INT64_C( 0) } }, { { -INT64_C( 3382370887748153796), -INT64_C( 1263429811204012660) }, { INT64_C( 5024271936180938160), -INT64_C( 6169576172347208738) }, { INT64_C( 0), -INT64_C( 1) } }, { { -INT64_C( 8421059571642366447), INT64_C( 7372456205302914183) }, { INT64_C( 5577648488510702403), INT64_C( 7602790810320267404) }, { INT64_C( 0), INT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_comgt_epi64(a, b); simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i64x2(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i64x2(); simde__m128i r = simde_mm_comgt_epi64(a, b); simde_test_x86_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comgt_epu8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint8_t a[16]; const uint8_t b[16]; const uint8_t r[16]; } test_vec[] = { { { UINT8_C(108), UINT8_C(171), UINT8_C(201), UINT8_C(204), UINT8_C(217), UINT8_C(227), UINT8_C(229), UINT8_C(117), UINT8_C( 50), UINT8_C( 32), UINT8_C(146), UINT8_C(119), UINT8_C( 76), UINT8_C(180), UINT8_C(115), UINT8_C( 63) }, { UINT8_C(108), UINT8_C(171), UINT8_C(201), UINT8_C(204), UINT8_C(217), UINT8_C(227), UINT8_C(229), UINT8_C(117), UINT8_C( 50), UINT8_C( 32), UINT8_C(146), UINT8_C(119), UINT8_C( 76), UINT8_C(180), UINT8_C(115), UINT8_C( 63) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(241), UINT8_C( 16), UINT8_C(192), UINT8_C(119), UINT8_C( 50), UINT8_C(207), UINT8_C( 57), UINT8_C( 42), UINT8_C(254), UINT8_C(247), UINT8_C( 94), UINT8_C( 41), UINT8_C(244), UINT8_C(123), UINT8_C(225), UINT8_C(174) }, { UINT8_C(118), UINT8_C( 4), UINT8_C( 92), UINT8_C( 64), UINT8_MAX, UINT8_C(240), UINT8_C( 64), UINT8_C(214), UINT8_C(182), UINT8_C(155), UINT8_C( 13), UINT8_C( 63), UINT8_C( 67), UINT8_C(222), UINT8_C(201), UINT8_C(193) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C(225), UINT8_C(214), UINT8_C(171), UINT8_C( 1), UINT8_C(180), UINT8_C(231), UINT8_C( 23), UINT8_C( 34), UINT8_C( 94), UINT8_C( 0), UINT8_C(207), UINT8_C( 49), UINT8_C(205), UINT8_C( 32), UINT8_C(103), UINT8_C(193) }, { UINT8_C( 86), UINT8_C( 25), UINT8_C(249), UINT8_C(210), UINT8_C( 8), UINT8_C(148), UINT8_C( 27), UINT8_C(248), UINT8_C( 58), UINT8_C( 85), UINT8_C( 14), UINT8_C(185), UINT8_C(103), UINT8_C(192), UINT8_C( 56), UINT8_C( 54) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { UINT8_C( 60), UINT8_C(156), UINT8_C(212), UINT8_C( 14), UINT8_C(238), UINT8_C( 24), UINT8_C(233), UINT8_C(127), UINT8_C(209), UINT8_C( 11), UINT8_C(120), UINT8_C(206), UINT8_C( 39), UINT8_C(128), UINT8_C(250), UINT8_C(233) }, { UINT8_C( 53), UINT8_C(114), UINT8_C( 10), UINT8_C( 76), UINT8_C(157), UINT8_C(212), UINT8_C( 94), UINT8_C( 33), UINT8_C(231), UINT8_C(213), UINT8_C(251), UINT8_C(171), UINT8_C( 97), UINT8_C( 97), UINT8_C(169), UINT8_C(228) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C( 41), UINT8_C(235), UINT8_C(165), UINT8_C(101), UINT8_C(136), UINT8_C(106), UINT8_C(151), UINT8_C(135), UINT8_C(129), UINT8_C(161), UINT8_C(195), UINT8_C(200), UINT8_C(170), UINT8_C(164), UINT8_C(131), UINT8_C(112) }, { UINT8_C( 59), UINT8_C(152), UINT8_C(245), UINT8_C(116), UINT8_C(101), UINT8_C(186), UINT8_C(241), UINT8_C(154), UINT8_C(229), UINT8_C(163), UINT8_C( 62), UINT8_C(222), UINT8_C(216), UINT8_C(202), UINT8_C( 5), UINT8_C( 20) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { UINT8_C(209), UINT8_C( 74), UINT8_C(133), UINT8_C(208), UINT8_C( 41), UINT8_C(229), UINT8_C( 9), UINT8_C(131), UINT8_C(150), UINT8_C( 73), UINT8_C( 23), UINT8_C(230), UINT8_MAX, UINT8_C(147), UINT8_C(235), UINT8_C(156) }, { UINT8_C(143), UINT8_C( 18), UINT8_C( 36), UINT8_C( 19), UINT8_C( 9), UINT8_C( 77), UINT8_C(188), UINT8_C(171), UINT8_C( 91), UINT8_C( 71), UINT8_C( 61), UINT8_C( 26), UINT8_C(117), UINT8_C( 1), UINT8_C( 50), UINT8_C( 15) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C( 91), UINT8_C( 65), UINT8_C(220), UINT8_C( 21), UINT8_C(121), UINT8_C(146), UINT8_C( 39), UINT8_C(186), UINT8_C( 56), UINT8_C(140), UINT8_C(220), UINT8_C(241), UINT8_C(206), UINT8_C( 85), UINT8_C( 24), UINT8_C(182) }, { UINT8_C( 90), UINT8_C(104), UINT8_C(253), UINT8_C(240), UINT8_C( 48), UINT8_C(149), UINT8_C(167), UINT8_C(155), UINT8_C(113), UINT8_C( 73), UINT8_C( 98), UINT8_C( 38), UINT8_C(224), UINT8_C( 16), UINT8_C( 26), UINT8_C( 26) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, { { UINT8_C(240), UINT8_C( 88), UINT8_C( 19), UINT8_C(253), UINT8_C( 34), UINT8_C(120), UINT8_C(218), UINT8_C(118), UINT8_C(141), UINT8_C(241), UINT8_C(123), UINT8_C(178), UINT8_C(190), UINT8_C(243), UINT8_C(244), UINT8_C( 5) }, { UINT8_C(195), UINT8_C( 35), UINT8_C(234), UINT8_C(213), UINT8_C(131), UINT8_C(155), UINT8_C(155), UINT8_C(180), UINT8_C( 81), UINT8_C( 50), UINT8_C( 20), UINT8_C(204), UINT8_C(192), UINT8_C(254), UINT8_C(163), UINT8_C(128) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_comgt_epu8(a, b); simde_test_x86_assert_equal_u8x16(r, simde_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u8x16(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u8x16(); simde__m128i r = simde_mm_comgt_epu8(a, b); simde_test_x86_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comgt_epu16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint16_t a[8]; const uint16_t b[8]; const uint16_t r[8]; } test_vec[] = { { { UINT16_C( 6328), UINT16_C(20624), UINT16_C(40905), UINT16_C(64776), UINT16_C( 192), UINT16_C(61018), UINT16_C(29816), UINT16_C(54118) }, { UINT16_C( 6328), UINT16_C(20624), UINT16_C(40905), UINT16_C(64776), UINT16_C( 192), UINT16_C(61018), UINT16_C(29816), UINT16_C(54118) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(51955), UINT16_C(34897), UINT16_C(26281), UINT16_C(15486), UINT16_C(35360), UINT16_C(53946), UINT16_C(54461), UINT16_C(34743) }, { UINT16_C( 2266), UINT16_C(54972), UINT16_C( 4121), UINT16_C(39322), UINT16_C(16887), UINT16_C(28898), UINT16_C(57508), UINT16_C(26197) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { UINT16_C(58642), UINT16_C(53916), UINT16_C(65099), UINT16_C(31802), UINT16_C(55049), UINT16_C( 446), UINT16_C(39296), UINT16_C(31037) }, { UINT16_C(45439), UINT16_C(22271), UINT16_C(53943), UINT16_C(36729), UINT16_C(15706), UINT16_C(65403), UINT16_C(16617), UINT16_C( 2219) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { UINT16_C(64583), UINT16_C(31538), UINT16_C(27665), UINT16_C(48309), UINT16_C(41778), UINT16_C( 8583), UINT16_C(21937), UINT16_C(39965) }, { UINT16_C(39374), UINT16_C(39618), UINT16_C(60750), UINT16_C(10891), UINT16_C(31507), UINT16_C(25230), UINT16_C(39919), UINT16_C(65103) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(39098), UINT16_C(19323), UINT16_C(47010), UINT16_C(17625), UINT16_C(30147), UINT16_C(64125), UINT16_C( 4600), UINT16_C(14655) }, { UINT16_C(19184), UINT16_C(26731), UINT16_C(27268), UINT16_C(45498), UINT16_C(33607), UINT16_C(25217), UINT16_C(63839), UINT16_C(37673) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(16532), UINT16_C( 2783), UINT16_C(59303), UINT16_C(23436), UINT16_C(55013), UINT16_C(21256), UINT16_C(54523), UINT16_C(38795) }, { UINT16_C(19468), UINT16_C(35172), UINT16_C(20481), UINT16_C(27629), UINT16_C(56865), UINT16_C(51134), UINT16_C(25313), UINT16_C( 3617) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { UINT16_C(32253), UINT16_C(32710), UINT16_C( 1479), UINT16_C(19128), UINT16_C(19903), UINT16_C(63120), UINT16_C(42597), UINT16_C( 233) }, { UINT16_C( 9547), UINT16_C(50451), UINT16_C(42862), UINT16_C(41228), UINT16_C( 5062), UINT16_C(22955), UINT16_C(57116), UINT16_C(46623) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(54812), UINT16_C(29592), UINT16_C( 6315), UINT16_C(23108), UINT16_C(25211), UINT16_C(43389), UINT16_C(36828), UINT16_C(47681) }, { UINT16_C(24533), UINT16_C(58338), UINT16_C(30835), UINT16_C(40190), UINT16_C(43617), UINT16_C(57266), UINT16_C(30649), UINT16_C(54539) }, { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_comgt_epu16(a, b); simde_test_x86_assert_equal_u16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u16x8(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u16x8(); simde__m128i r = simde_mm_comgt_epu16(a, b); simde_test_x86_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comgt_epu32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint32_t a[4]; const uint32_t b[4]; const uint32_t r[4]; } test_vec[] = { { { UINT32_C(2931675930), UINT32_C(3491244283), UINT32_C( 892837182), UINT32_C( 242980872) }, { UINT32_C(2931675930), UINT32_C(3491244283), UINT32_C( 892837182), UINT32_C( 242980872) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(2906161617), UINT32_C(2712521399), UINT32_C( 591014936), UINT32_C(2983015263) }, { UINT32_C(4171168798), UINT32_C(4145823326), UINT32_C(1646692147), UINT32_C(1149225107) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, { { UINT32_C(3917223015), UINT32_C(4211123683), UINT32_C(3953795091), UINT32_C(2454526391) }, { UINT32_C( 430037585), UINT32_C( 372793468), UINT32_C(4155746175), UINT32_C(2754947106) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(1612226497), UINT32_C(4041150878), UINT32_C( 7081595), UINT32_C(3262395497) }, { UINT32_C(3627021531), UINT32_C(1951701502), UINT32_C(3192161613), UINT32_C(1988476512) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { UINT32_C(3665013252), UINT32_C(3392280465), UINT32_C( 719814264), UINT32_C(2284357916) }, { UINT32_C(4273066723), UINT32_C(1515777676), UINT32_C(2101586627), UINT32_C( 22582514) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { UINT32_C( 536198747), UINT32_C(3487678052), UINT32_C( 813955377), UINT32_C( 726803064) }, { UINT32_C(1401814417), UINT32_C( 271346125), UINT32_C(4246117641), UINT32_C(2392109699) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(4127613676), UINT32_C(1221922750), UINT32_C(3701999311), UINT32_C(4107994663) }, { UINT32_C(2670590990), UINT32_C(3736605034), UINT32_C( 103278152), UINT32_C(1696800441) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_MAX } }, { { UINT32_C( 672014047), UINT32_C(2096369735), UINT32_C(4106104441), UINT32_C( 732511694) }, { UINT32_C(2868336514), UINT32_C( 216395274), UINT32_C(1625034408), UINT32_C(3471565372) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_comgt_epu32(a, b); simde_test_x86_assert_equal_u32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u32x4(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u32x4(); simde__m128i r = simde_mm_comgt_epu32(a, b); simde_test_x86_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comgt_epu64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint64_t a[2]; const uint64_t b[2]; const uint64_t r[2]; } test_vec[] = { { { UINT64_C(13053625969493244509), UINT64_C( 1776335905635563608) }, { UINT64_C(13053625969493244509), UINT64_C( 1776335905635563608) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C(16256034595954595605), UINT64_C( 1285222121653378128) }, { UINT64_C( 4131850939565080462), UINT64_C( 196125019575610516) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C(12014875886658405357), UINT64_C( 8488037129192413289) }, { UINT64_C( 5938715815015691417), UINT64_C( 3731556851665834580) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C(11833795783221175093), UINT64_C( 4877831776391334682) }, { UINT64_C( 4992985007803179430), UINT64_C(17420199667699905472) }, { UINT64_MAX, UINT64_C( 0) } }, { { UINT64_C( 2375114234258259987), UINT64_C(14155653048210685068) }, { UINT64_C( 6763726840653205471), UINT64_C( 9621179381699646978) }, { UINT64_C( 0), UINT64_MAX } }, { { UINT64_C( 7339593446906798511), UINT64_C( 4464734439121646566) }, { UINT64_C(16216730157518636394), UINT64_C( 3963201710570668863) }, { UINT64_C( 0), UINT64_MAX } }, { { UINT64_C(13461328380725022258), UINT64_C(17952021491942488145) }, { UINT64_C( 1783462140442348399), UINT64_C( 5627667542317712290) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C( 7402917169343205316), UINT64_C( 4603078424171451637) }, { UINT64_C( 5467596465290160663), UINT64_C( 1493066304789536080) }, { UINT64_MAX, UINT64_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_comgt_epu64(a, b); simde_test_x86_assert_equal_u64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u64x2(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u64x2(); simde__m128i r = simde_mm_comgt_epu64(a, b); simde_test_x86_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comle_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t a[16]; const int8_t b[16]; const int8_t r[16]; } test_vec[] = { { { -INT8_C( 116), -INT8_C( 19), INT8_C( 26), -INT8_C( 107), INT8_C( 78), INT8_C( 25), -INT8_C( 119), -INT8_C( 78), -INT8_C( 7), INT8_C( 78), INT8_MAX, INT8_C( 74), INT8_C( 0), INT8_C( 9), INT8_C( 26), INT8_C( 86) }, { -INT8_C( 116), -INT8_C( 19), INT8_C( 26), -INT8_C( 107), INT8_C( 78), INT8_C( 25), -INT8_C( 119), -INT8_C( 78), -INT8_C( 7), INT8_C( 78), INT8_MAX, INT8_C( 74), INT8_C( 0), INT8_C( 9), INT8_C( 26), INT8_C( 86) }, { -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1) } }, { { -INT8_C( 119), -INT8_C( 87), -INT8_C( 52), -INT8_C( 52), -INT8_C( 62), INT8_C( 115), INT8_C( 84), -INT8_C( 61), INT8_C( 116), -INT8_C( 39), -INT8_C( 19), -INT8_C( 41), -INT8_C( 38), -INT8_C( 4), -INT8_C( 59), -INT8_C( 49) }, { -INT8_C( 77), INT8_C( 111), -INT8_C( 80), INT8_C( 125), -INT8_C( 106), INT8_C( 62), INT8_C( 30), -INT8_C( 26), -INT8_C( 122), INT8_C( 47), INT8_C( 75), -INT8_C( 93), -INT8_C( 58), INT8_C( 71), -INT8_C( 75), -INT8_C( 121) }, { -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0) } }, { { -INT8_C( 18), -INT8_C( 112), -INT8_C( 45), -INT8_C( 126), INT8_C( 63), INT8_C( 26), INT8_C( 4), -INT8_C( 59), INT8_C( 52), INT8_C( 64), -INT8_C( 58), INT8_C( 39), INT8_C( 89), INT8_C( 44), INT8_C( 40), -INT8_C( 56) }, { INT8_C( 96), -INT8_C( 98), -INT8_C( 126), -INT8_C( 12), INT8_C( 114), -INT8_C( 24), INT8_C( 99), INT8_C( 74), -INT8_C( 61), INT8_C( 62), -INT8_C( 53), INT8_C( 29), INT8_C( 105), INT8_C( 44), -INT8_C( 102), INT8_C( 26) }, { -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1) } }, { { INT8_C( 110), INT8_C( 104), INT8_C( 73), INT8_C( 43), INT8_C( 36), -INT8_C( 54), -INT8_C( 40), -INT8_C( 100), -INT8_C( 72), -INT8_C( 103), INT8_C( 7), INT8_C( 125), INT8_C( 10), INT8_C( 8), -INT8_C( 55), INT8_C( 71) }, { -INT8_C( 68), INT8_C( 0), -INT8_C( 10), -INT8_C( 62), -INT8_C( 118), INT8_C( 32), INT8_C( 64), INT8_C( 37), -INT8_C( 39), INT8_C( 3), INT8_C( 104), INT8_MAX, -INT8_C( 110), -INT8_C( 63), -INT8_C( 78), INT8_C( 87) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1) } }, { { INT8_C( 48), -INT8_C( 72), -INT8_C( 106), -INT8_C( 113), INT8_C( 24), -INT8_C( 116), -INT8_C( 72), -INT8_C( 113), INT8_C( 41), INT8_C( 108), INT8_C( 26), -INT8_C( 97), -INT8_C( 107), -INT8_C( 105), -INT8_C( 112), -INT8_C( 109) }, { -INT8_C( 17), INT8_C( 31), INT8_C( 117), -INT8_C( 82), -INT8_C( 123), -INT8_C( 19), -INT8_C( 99), -INT8_C( 63), -INT8_C( 17), INT8_C( 6), -INT8_C( 118), -INT8_C( 109), -INT8_C( 24), INT8_C( 13), -INT8_C( 29), -INT8_C( 123) }, { INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 0) } }, { { INT8_C( 93), INT8_C( 8), INT8_C( 33), INT8_C( 118), -INT8_C( 60), INT8_C( 103), -INT8_C( 116), -INT8_C( 27), -INT8_C( 81), INT8_C( 66), INT8_C( 102), INT8_C( 85), -INT8_C( 95), -INT8_C( 30), INT8_C( 102), -INT8_C( 10) }, { INT8_C( 32), -INT8_C( 125), INT8_C( 104), -INT8_C( 127), INT8_C( 11), INT8_C( 88), INT8_C( 98), INT8_C( 100), INT8_C( 46), -INT8_C( 48), -INT8_C( 102), INT8_C( 31), INT8_C( 20), INT8_C( 24), INT8_C( 22), -INT8_C( 18) }, { INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 28), -INT8_C( 31), INT8_C( 84), -INT8_C( 88), -INT8_C( 49), INT8_C( 99), INT8_C( 60), -INT8_C( 25), INT8_C( 113), -INT8_C( 95), INT8_C( 83), INT8_C( 105), -INT8_C( 42), -INT8_C( 17), INT8_C( 49), -INT8_C( 74) }, { INT8_C( 120), -INT8_C( 78), INT8_C( 53), INT8_C( 3), -INT8_C( 58), INT8_C( 104), INT8_C( 119), INT8_C( 87), -INT8_C( 66), -INT8_C( 24), -INT8_C( 2), -INT8_C( 20), -INT8_C( 67), -INT8_C( 22), INT8_C( 52), -INT8_C( 40) }, { -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1) } }, { { -INT8_C( 106), -INT8_C( 55), -INT8_C( 106), -INT8_C( 20), -INT8_C( 30), -INT8_C( 120), -INT8_C( 80), -INT8_C( 35), -INT8_C( 104), INT8_C( 20), INT8_C( 73), -INT8_C( 95), -INT8_C( 34), -INT8_C( 56), -INT8_C( 37), INT8_C( 28) }, { INT8_C( 30), INT8_C( 54), INT8_C( 70), -INT8_C( 4), INT8_C( 91), INT8_C( 37), -INT8_C( 60), -INT8_C( 30), -INT8_C( 58), -INT8_C( 42), INT8_C( 32), -INT8_C( 62), -INT8_C( 116), -INT8_C( 116), INT8_C( 35), -INT8_C( 116) }, { -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_comle_epi8(a, b); simde_test_x86_assert_equal_i8x16(r, simde_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i8x16(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i8x16(); simde__m128i r = simde_mm_comle_epi8(a, b); simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comle_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[8]; const int16_t b[8]; const int16_t r[8]; } test_vec[] = { { { INT16_C( 19187), INT16_C( 3152), -INT16_C( 8283), INT16_C( 4049), INT16_C( 8267), -INT16_C( 14416), INT16_C( 30046), INT16_C( 28746) }, { INT16_C( 19187), INT16_C( 3152), -INT16_C( 8283), INT16_C( 4049), INT16_C( 8267), -INT16_C( 14416), INT16_C( 30046), INT16_C( 28746) }, { -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1) } }, { { -INT16_C( 26822), INT16_C( 13313), -INT16_C( 26763), INT16_C( 19506), INT16_C( 9070), INT16_C( 26983), INT16_C( 1321), INT16_C( 21197) }, { -INT16_C( 5030), -INT16_C( 12568), INT16_C( 28607), -INT16_C( 15227), INT16_C( 20403), -INT16_C( 24080), -INT16_C( 1023), -INT16_C( 1432) }, { -INT16_C( 1), INT16_C( 0), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 23542), INT16_C( 29648), -INT16_C( 14406), -INT16_C( 8281), INT16_C( 22240), -INT16_C( 22470), INT16_C( 23867), INT16_C( 28952) }, { INT16_C( 14067), -INT16_C( 1081), INT16_C( 16601), INT16_C( 17845), INT16_C( 10488), -INT16_C( 1935), INT16_C( 26476), INT16_C( 283) }, { INT16_C( 0), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), INT16_C( 0) } }, { { INT16_C( 3192), -INT16_C( 32742), -INT16_C( 17198), -INT16_C( 7924), -INT16_C( 2239), INT16_C( 18435), -INT16_C( 25494), INT16_C( 13294) }, { -INT16_C( 20249), INT16_C( 23381), INT16_C( 23450), -INT16_C( 9398), INT16_C( 13121), -INT16_C( 27748), -INT16_C( 16822), -INT16_C( 12467) }, { INT16_C( 0), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), INT16_C( 0) } }, { { INT16_C( 12776), INT16_C( 9286), INT16_C( 32358), -INT16_C( 25782), -INT16_C( 28996), INT16_C( 4649), -INT16_C( 13472), -INT16_C( 8650) }, { -INT16_C( 7586), -INT16_C( 18950), -INT16_C( 14421), -INT16_C( 13270), -INT16_C( 1610), INT16_C( 15575), INT16_C( 1857), -INT16_C( 21787) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), INT16_C( 0) } }, { { INT16_C( 21359), INT16_C( 10174), INT16_C( 5662), INT16_C( 21831), -INT16_C( 23434), -INT16_C( 13548), -INT16_C( 3390), -INT16_C( 17448) }, { INT16_C( 21887), -INT16_C( 11745), -INT16_C( 29517), INT16_C( 24639), INT16_C( 385), -INT16_C( 18037), INT16_C( 19705), -INT16_C( 9269) }, { -INT16_C( 1), INT16_C( 0), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), -INT16_C( 1) } }, { { -INT16_C( 1739), INT16_C( 20969), -INT16_C( 29791), INT16_C( 22509), -INT16_C( 16230), INT16_C( 15915), INT16_C( 5947), INT16_C( 4795) }, { -INT16_C( 28557), INT16_C( 31530), -INT16_C( 19621), -INT16_C( 7825), -INT16_C( 11321), -INT16_C( 11487), -INT16_C( 27620), -INT16_C( 21785) }, { INT16_C( 0), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 21663), INT16_C( 26927), -INT16_C( 6760), -INT16_C( 6109), INT16_C( 27470), INT16_C( 13016), INT16_C( 17521), INT16_C( 11209) }, { INT16_C( 7010), INT16_C( 30341), INT16_C( 17227), -INT16_C( 27228), -INT16_C( 1869), INT16_C( 21247), -INT16_C( 6065), INT16_C( 24097) }, { -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), INT16_C( 0), -INT16_C( 1), INT16_C( 0), -INT16_C( 1) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_comle_epi16(a, b); simde_test_x86_assert_equal_i16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i16x8(); simde__m128i r = simde_mm_comle_epi16(a, b); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comle_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int32_t a[4]; const int32_t b[4]; const int32_t r[4]; } test_vec[] = { { { INT32_C( 972616219), INT32_C( 1372597674), INT32_C( 1854090426), INT32_C( 1307148300) }, { INT32_C( 972616219), INT32_C( 1372597674), INT32_C( 1854090426), INT32_C( 1307148300) }, { -INT32_C( 1), -INT32_C( 1), -INT32_C( 1), -INT32_C( 1) } }, { { -INT32_C( 1953071245), -INT32_C( 960150486), -INT32_C( 24119446), INT32_C( 1096896316) }, { -INT32_C( 2018749299), -INT32_C( 606180736), -INT32_C( 1148024056), -INT32_C( 1039936331) }, { INT32_C( 0), -INT32_C( 1), INT32_C( 0), INT32_C( 0) } }, { { -INT32_C( 2067315762), -INT32_C( 1162166367), -INT32_C( 1623697385), -INT32_C( 1718627957) }, { INT32_C( 481516508), -INT32_C( 885742399), -INT32_C( 512880547), -INT32_C( 805288428) }, { -INT32_C( 1), -INT32_C( 1), -INT32_C( 1), -INT32_C( 1) } }, { { INT32_C( 679473052), -INT32_C( 1184122281), -INT32_C( 60709664), -INT32_C( 1349290268) }, { -INT32_C( 1065971917), INT32_C( 1824187158), -INT32_C( 1468054300), -INT32_C( 1060707888) }, { INT32_C( 0), -INT32_C( 1), INT32_C( 0), -INT32_C( 1) } }, { { INT32_C( 1042615301), INT32_C( 230703349), -INT32_C( 562797843), -INT32_C( 883603475) }, { INT32_C( 979072952), INT32_C( 89665575), -INT32_C( 668094782), -INT32_C( 633427311) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 1) } }, { { -INT32_C( 1893523152), -INT32_C( 39758302), INT32_C( 232266343), INT32_C( 901598033) }, { INT32_C( 1422629779), -INT32_C( 571043173), INT32_C( 954324770), INT32_C( 1699785473) }, { -INT32_C( 1), INT32_C( 0), -INT32_C( 1), -INT32_C( 1) } }, { { -INT32_C( 471812538), -INT32_C( 789182841), INT32_C( 1375032694), INT32_C( 884247222) }, { -INT32_C( 735083539), INT32_C( 1031541275), -INT32_C( 1862187734), -INT32_C( 1410995258) }, { INT32_C( 0), -INT32_C( 1), INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 29643630), -INT32_C( 811116597), INT32_C( 1915984193), INT32_C( 287313349) }, { -INT32_C( 2081572626), INT32_C( 1842526030), -INT32_C( 1409842686), -INT32_C( 219975032) }, { INT32_C( 0), -INT32_C( 1), INT32_C( 0), INT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_comle_epi32(a, b); simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i32x4(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i32x4(); simde__m128i r = simde_mm_comle_epi32(a, b); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comle_epi64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int64_t a[2]; const int64_t b[2]; const int64_t r[2]; } test_vec[] = { { { INT64_C( 4799802114370404817), INT64_C( 1579597047222923761) }, { INT64_C( 4799802114370404817), INT64_C( 1579597047222923761) }, { -INT64_C( 1), -INT64_C( 1) } }, { { -INT64_C( 5340836100108729923), -INT64_C( 8992481259499743533) }, { INT64_C( 8195985707901241238), -INT64_C( 8792156227641384275) }, { -INT64_C( 1), -INT64_C( 1) } }, { { INT64_C( 6741765975996835459), -INT64_C( 6431571666177783867) }, { -INT64_C( 6632849631960453402), -INT64_C( 6036378782141253986) }, { INT64_C( 0), -INT64_C( 1) } }, { { -INT64_C( 5997152387731853209), -INT64_C( 6743291274089635841) }, { INT64_C( 2388185130225810921), -INT64_C( 3346638795333847828) }, { -INT64_C( 1), -INT64_C( 1) } }, { { -INT64_C( 1482705454389385582), -INT64_C( 4530065313621924438) }, { INT64_C( 3763171127669193926), INT64_C( 4314911291741316286) }, { -INT64_C( 1), -INT64_C( 1) } }, { { INT64_C( 7163598407174705195), INT64_C( 5317164735680373485) }, { INT64_C( 2457621231182050214), INT64_C( 3682365343409229628) }, { INT64_C( 0), INT64_C( 0) } }, { { INT64_C( 6460805075322074715), -INT64_C( 8985463405146890767) }, { INT64_C( 3724710257396886448), INT64_C( 81519815510080656) }, { INT64_C( 0), -INT64_C( 1) } }, { { INT64_C( 1733865481417793866), -INT64_C( 5218947715972768035) }, { -INT64_C( 5628849588688246003), -INT64_C( 1306506982751341601) }, { INT64_C( 0), -INT64_C( 1) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_comle_epi64(a, b); simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i64x2(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i64x2(); simde__m128i r = simde_mm_comle_epi64(a, b); simde_test_x86_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comle_epu8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint8_t a[16]; const uint8_t b[16]; const uint8_t r[16]; } test_vec[] = { { { UINT8_C( 31), UINT8_C(216), UINT8_C(111), UINT8_C( 81), UINT8_C(232), UINT8_C(220), UINT8_C(134), UINT8_C(231), UINT8_C(218), UINT8_C( 46), UINT8_C( 89), UINT8_C( 89), UINT8_C( 3), UINT8_C(116), UINT8_C(129), UINT8_C( 45) }, { UINT8_C( 31), UINT8_C(216), UINT8_C(111), UINT8_C( 81), UINT8_C(232), UINT8_C(220), UINT8_C(134), UINT8_C(231), UINT8_C(218), UINT8_C( 46), UINT8_C( 89), UINT8_C( 89), UINT8_C( 3), UINT8_C(116), UINT8_C(129), UINT8_C( 45) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C(228), UINT8_C(251), UINT8_C(173), UINT8_C(164), UINT8_C(200), UINT8_C(180), UINT8_C(152), UINT8_C(226), UINT8_C( 84), UINT8_C(125), UINT8_C( 56), UINT8_C( 80), UINT8_C( 94), UINT8_C(162), UINT8_C( 57), UINT8_C( 63) }, { UINT8_C( 3), UINT8_C(164), UINT8_C(169), UINT8_C(206), UINT8_C(243), UINT8_C( 57), UINT8_C(245), UINT8_C( 13), UINT8_C( 16), UINT8_C(147), UINT8_C( 20), UINT8_C( 98), UINT8_C( 56), UINT8_C(229), UINT8_C( 2), UINT8_C( 44) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 94), UINT8_C( 36), UINT8_C(112), UINT8_C(167), UINT8_C(224), UINT8_C( 11), UINT8_C(186), UINT8_C( 16), UINT8_C( 20), UINT8_C( 97), UINT8_C( 25), UINT8_C( 9), UINT8_C( 36), UINT8_C(126), UINT8_C( 26), UINT8_C( 62) }, { UINT8_C( 29), UINT8_C( 12), UINT8_C( 78), UINT8_C( 73), UINT8_C( 66), UINT8_C( 11), UINT8_C( 67), UINT8_C(211), UINT8_C(166), UINT8_C( 23), UINT8_C(180), UINT8_C(252), UINT8_C(250), UINT8_C(238), UINT8_C(253), UINT8_C(253) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C(163), UINT8_C( 45), UINT8_C(209), UINT8_C( 12), UINT8_C( 16), UINT8_C( 90), UINT8_C( 47), UINT8_C(129), UINT8_C( 74), UINT8_C( 39), UINT8_C(146), UINT8_C( 54), UINT8_C(204), UINT8_C(245), UINT8_C(105), UINT8_C( 51) }, { UINT8_C(150), UINT8_C(152), UINT8_C(198), UINT8_C(139), UINT8_C(125), UINT8_C( 90), UINT8_C( 90), UINT8_C(129), UINT8_C(197), UINT8_C( 66), UINT8_C(161), UINT8_C(239), UINT8_C(241), UINT8_C(150), UINT8_C( 90), UINT8_C(233) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX } }, { { UINT8_C(218), UINT8_C(158), UINT8_C( 58), UINT8_C(157), UINT8_MAX, UINT8_C(171), UINT8_C(225), UINT8_C(125), UINT8_C( 28), UINT8_C( 88), UINT8_C( 13), UINT8_C(161), UINT8_C(254), UINT8_C( 16), UINT8_C( 14), UINT8_C(102) }, { UINT8_C(149), UINT8_C(208), UINT8_C(122), UINT8_C( 92), UINT8_C( 75), UINT8_C( 46), UINT8_C( 33), UINT8_C( 95), UINT8_C(148), UINT8_C(155), UINT8_C( 67), UINT8_C( 3), UINT8_C(198), UINT8_C(229), UINT8_C( 1), UINT8_C( 54) }, { UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 44), UINT8_C( 0), UINT8_C( 18), UINT8_C( 32), UINT8_C( 85), UINT8_C( 5), UINT8_C(183), UINT8_C( 78), UINT8_C(178), UINT8_C(123), UINT8_C(241), UINT8_C( 16), UINT8_C( 97), UINT8_C(214), UINT8_C(241), UINT8_C( 31) }, { UINT8_C( 67), UINT8_C( 61), UINT8_C(208), UINT8_C(131), UINT8_C( 84), UINT8_C(144), UINT8_C(128), UINT8_C(180), UINT8_C( 60), UINT8_C(170), UINT8_C( 3), UINT8_C( 0), UINT8_C( 33), UINT8_C(227), UINT8_C(218), UINT8_C( 44) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, { { UINT8_C(192), UINT8_C(217), UINT8_C(193), UINT8_C( 94), UINT8_C(188), UINT8_C(113), UINT8_C(153), UINT8_C( 59), UINT8_C( 53), UINT8_C( 24), UINT8_C(166), UINT8_C( 76), UINT8_C(157), UINT8_C( 79), UINT8_C(250), UINT8_C(166) }, { UINT8_C(199), UINT8_C(102), UINT8_C( 49), UINT8_C(202), UINT8_C( 65), UINT8_C(135), UINT8_C( 95), UINT8_C(202), UINT8_C(228), UINT8_C(247), UINT8_C( 71), UINT8_C(173), UINT8_C(169), UINT8_C(153), UINT8_C(206), UINT8_C( 21) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(191), UINT8_C(180), UINT8_C(175), UINT8_C( 31), UINT8_C(218), UINT8_C(245), UINT8_C(111), UINT8_C(139), UINT8_C(205), UINT8_C(184), UINT8_C(148), UINT8_C( 29), UINT8_C( 91), UINT8_C(131), UINT8_C( 18), UINT8_C( 66) }, { UINT8_C( 74), UINT8_C(211), UINT8_C( 6), UINT8_C(247), UINT8_C(186), UINT8_C( 28), UINT8_C(166), UINT8_C(231), UINT8_C(181), UINT8_C( 11), UINT8_C(120), UINT8_C(211), UINT8_C( 7), UINT8_C( 14), UINT8_C(195), UINT8_C( 55) }, { UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_comle_epu8(a, b); simde_test_x86_assert_equal_u8x16(r, simde_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u8x16(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u8x16(); simde__m128i r = simde_mm_comle_epu8(a, b); simde_test_x86_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comle_epu16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint16_t a[8]; const uint16_t b[8]; const uint16_t r[8]; } test_vec[] = { { { UINT16_C( 5712), UINT16_C(11076), UINT16_C(40023), UINT16_C(34593), UINT16_C(58017), UINT16_C(18979), UINT16_C(31555), UINT16_C(15647) }, { UINT16_C( 5712), UINT16_C(11076), UINT16_C(40023), UINT16_C(34593), UINT16_C(58017), UINT16_C(18979), UINT16_C(31555), UINT16_C(15647) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C( 3572), UINT16_C(54197), UINT16_C(22118), UINT16_C(21565), UINT16_C(28119), UINT16_C(15103), UINT16_C(19171), UINT16_C(56482) }, { UINT16_C(35228), UINT16_C(18920), UINT16_C(27612), UINT16_C(30616), UINT16_C( 7898), UINT16_C(40379), UINT16_C(16124), UINT16_C(56586) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { UINT16_C(39916), UINT16_C( 9896), UINT16_C(15854), UINT16_C(22797), UINT16_C(42353), UINT16_C(43586), UINT16_C(22243), UINT16_C(19028) }, { UINT16_C(38090), UINT16_C(17155), UINT16_C(27665), UINT16_C(41914), UINT16_C(62623), UINT16_C(56516), UINT16_C(54319), UINT16_C(27580) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C( 1371), UINT16_C(47173), UINT16_C(55801), UINT16_C(15868), UINT16_C(15531), UINT16_C(60331), UINT16_C(14770), UINT16_C(51648) }, { UINT16_C(49155), UINT16_C(57341), UINT16_C(42653), UINT16_C(21104), UINT16_C(60697), UINT16_C(53414), UINT16_C(18051), UINT16_C(11804) }, { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { UINT16_C(54376), UINT16_C(20726), UINT16_C(13104), UINT16_C(19187), UINT16_C(47534), UINT16_C(50337), UINT16_C(64502), UINT16_C(41933) }, { UINT16_C(37743), UINT16_C(58942), UINT16_C( 8489), UINT16_C(52643), UINT16_C(36976), UINT16_C(16841), UINT16_C(39841), UINT16_C(28689) }, { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(36668), UINT16_C(47138), UINT16_C(21051), UINT16_C(50907), UINT16_C(42146), UINT16_C(65419), UINT16_C(42328), UINT16_C( 7781) }, { UINT16_C(38709), UINT16_C( 7982), UINT16_C(58972), UINT16_C(23865), UINT16_C(26059), UINT16_C(63381), UINT16_C(56112), UINT16_C(30597) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, { { UINT16_C(48894), UINT16_C(46640), UINT16_C(16065), UINT16_C(64154), UINT16_C(34479), UINT16_C(25299), UINT16_C(15999), UINT16_C(33903) }, { UINT16_C(21630), UINT16_C(21557), UINT16_C(64735), UINT16_C(18460), UINT16_C(62802), UINT16_C(47475), UINT16_C( 3801), UINT16_C(36191) }, { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, { { UINT16_C(60119), UINT16_C( 5002), UINT16_C( 106), UINT16_C(11802), UINT16_C(58875), UINT16_C(46816), UINT16_C(52756), UINT16_C( 7122) }, { UINT16_C(20849), UINT16_C(19388), UINT16_C(27481), UINT16_C(54834), UINT16_C(50991), UINT16_C(20681), UINT16_C(15684), UINT16_C(63623) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_comle_epu16(a, b); simde_test_x86_assert_equal_u16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u16x8(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u16x8(); simde__m128i r = simde_mm_comle_epu16(a, b); simde_test_x86_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comle_epu32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint32_t a[4]; const uint32_t b[4]; const uint32_t r[4]; } test_vec[] = { { { UINT32_C(2526517999), UINT32_C(2856427488), UINT32_C(3256503473), UINT32_C( 746086078) }, { UINT32_C(2526517999), UINT32_C(2856427488), UINT32_C(3256503473), UINT32_C( 746086078) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { UINT32_C(3442021943), UINT32_C(1365589107), UINT32_C(2231395463), UINT32_C( 14970904) }, { UINT32_C( 163534445), UINT32_C(3053003720), UINT32_C(1357463799), UINT32_C(4266193958) }, { UINT32_C( 0), UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { UINT32_C(2732993527), UINT32_C(4288378581), UINT32_C( 511791749), UINT32_C(4016090621) }, { UINT32_C( 845850234), UINT32_C(2552862157), UINT32_C( 649415669), UINT32_C(3677002739) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C(2455312858), UINT32_C( 717103398), UINT32_C(3820165966), UINT32_C( 199425693) }, { UINT32_C(3674215229), UINT32_C(2629434517), UINT32_C( 233239765), UINT32_C( 151410894) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(1709991688), UINT32_C( 414533134), UINT32_C(2667398669), UINT32_C(2656266909) }, { UINT32_C(3404276959), UINT32_C( 110125256), UINT32_C(3454321596), UINT32_C(3519296863) }, { UINT32_MAX, UINT32_C( 0), UINT32_MAX, UINT32_MAX } }, { { UINT32_C(3792084392), UINT32_C( 292088631), UINT32_C( 365518056), UINT32_C(4003068328) }, { UINT32_C(3364774535), UINT32_C( 563661071), UINT32_C( 773341652), UINT32_C(2068658509) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C(3540927458), UINT32_C(1606359878), UINT32_C( 293528583), UINT32_C(1151186020) }, { UINT32_C(2622837598), UINT32_C( 884490258), UINT32_C(4176016709), UINT32_C(1334275137) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_MAX } }, { { UINT32_C(4268679904), UINT32_C(1235409894), UINT32_C(1518560915), UINT32_C(3880231545) }, { UINT32_C( 279413132), UINT32_C(2262355064), UINT32_C(4072499255), UINT32_C(2505824226) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_comle_epu32(a, b); simde_test_x86_assert_equal_u32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u32x4(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u32x4(); simde__m128i r = simde_mm_comle_epu32(a, b); simde_test_x86_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comle_epu64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint64_t a[2]; const uint64_t b[2]; const uint64_t r[2]; } test_vec[] = { { { UINT64_C( 1655413424631120327), UINT64_C( 2414154322209303476) }, { UINT64_C( 1655413424631120327), UINT64_C( 2414154322209303476) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C( 6980710169465156664), UINT64_C(10714487761357775057) }, { UINT64_C( 1129330877660342722), UINT64_C( 4048235039675284113) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C( 772724745883726474), UINT64_C(10768796985887008571) }, { UINT64_C( 8981255941545058806), UINT64_C( 8447328362611546963) }, { UINT64_MAX, UINT64_C( 0) } }, { { UINT64_C(14720368943566013867), UINT64_C(16178253019982103966) }, { UINT64_C(11945562851780112782), UINT64_C( 2345808535136622113) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C(17183891286428431811), UINT64_C(13677796347659763746) }, { UINT64_C(15131594902348964272), UINT64_C( 9299478155659654180) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C(13200861144104929529), UINT64_C( 8376768063846769390) }, { UINT64_C( 5274007808018332293), UINT64_C(16115053827258694788) }, { UINT64_C( 0), UINT64_MAX } }, { { UINT64_C( 8168005558465349750), UINT64_C( 5528346279398634283) }, { UINT64_C( 6217680072136131380), UINT64_C( 9527755318477125480) }, { UINT64_C( 0), UINT64_MAX } }, { { UINT64_C( 7123170988958547042), UINT64_C(10242152464707090688) }, { UINT64_C( 4479972021671527909), UINT64_C(13165159143517481976) }, { UINT64_C( 0), UINT64_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_comle_epu64(a, b); simde_test_x86_assert_equal_u64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u64x2(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u64x2(); simde__m128i r = simde_mm_comle_epu64(a, b); simde_test_x86_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comlt_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t a[16]; const int8_t b[16]; const int8_t r[16]; } test_vec[] = { { { -INT8_C( 28), INT8_C( 45), -INT8_C( 120), -INT8_C( 63), INT8_C( 105), -INT8_C( 19), -INT8_C( 101), -INT8_C( 46), -INT8_C( 106), INT8_C( 126), -INT8_C( 34), INT8_C( 99), INT8_C( 20), -INT8_C( 76), INT8_C( 104), -INT8_C( 125) }, { -INT8_C( 28), INT8_C( 45), -INT8_C( 120), -INT8_C( 63), INT8_C( 105), -INT8_C( 19), -INT8_C( 101), -INT8_C( 46), -INT8_C( 106), INT8_C( 126), -INT8_C( 34), INT8_C( 99), INT8_C( 20), -INT8_C( 76), INT8_C( 104), -INT8_C( 125) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { -INT8_C( 62), -INT8_C( 58), INT8_C( 34), -INT8_C( 64), INT8_C( 4), INT8_C( 83), -INT8_C( 60), INT8_C( 73), INT8_C( 94), -INT8_C( 94), INT8_C( 0), INT8_C( 84), INT8_C( 65), INT8_C( 46), -INT8_C( 4), -INT8_C( 66) }, { INT8_C( 36), INT8_C( 98), INT8_C( 6), -INT8_C( 15), -INT8_C( 41), -INT8_C( 95), -INT8_C( 124), INT8_C( 11), INT8_C( 20), -INT8_C( 119), INT8_C( 41), INT8_C( 61), INT8_C( 88), INT8_C( 121), INT8_C( 111), INT8_C( 112) }, { -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1) } }, { { -INT8_C( 16), INT8_C( 80), INT8_C( 65), INT8_C( 47), INT8_C( 87), INT8_C( 120), -INT8_C( 9), -INT8_C( 63), -INT8_C( 65), INT8_C( 36), -INT8_C( 120), -INT8_C( 107), -INT8_C( 18), -INT8_C( 43), -INT8_C( 5), -INT8_C( 28) }, { INT8_C( 75), INT8_C( 35), INT8_C( 31), -INT8_C( 111), INT8_C( 57), -INT8_C( 70), INT8_C( 124), INT8_C( 84), -INT8_C( 93), -INT8_C( 93), -INT8_C( 119), INT8_C( 23), -INT8_C( 40), -INT8_C( 59), INT8_C( 32), -INT8_C( 96) }, { -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0) } }, { { -INT8_C( 102), -INT8_C( 85), INT8_C( 46), INT8_C( 113), INT8_C( 113), -INT8_C( 121), -INT8_C( 81), -INT8_C( 20), INT8_C( 70), INT8_C( 119), -INT8_C( 38), -INT8_C( 69), INT8_C( 44), INT8_C( 8), -INT8_C( 104), INT8_C( 111) }, { -INT8_C( 127), -INT8_C( 7), INT8_C( 59), INT8_C( 103), INT8_C( 54), INT8_C( 65), INT8_C( 109), -INT8_C( 12), INT8_C( 107), INT8_C( 82), INT8_C( 102), -INT8_C( 70), INT8_C( 63), -INT8_C( 96), INT8_C( 99), INT8_C( 90) }, { INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0) } }, { { -INT8_C( 26), INT8_C( 93), INT8_C( 83), INT8_C( 78), -INT8_C( 4), -INT8_C( 121), -INT8_C( 45), INT8_C( 20), INT8_C( 24), INT8_C( 37), INT8_C( 92), -INT8_C( 114), -INT8_C( 91), -INT8_C( 50), -INT8_C( 69), -INT8_C( 87) }, { -INT8_C( 19), INT8_C( 106), -INT8_C( 62), INT8_C( 61), INT8_C( 119), INT8_C( 61), INT8_C( 62), INT8_C( 52), -INT8_C( 109), INT8_C( 33), INT8_C( 41), -INT8_C( 17), INT8_C( 51), INT8_C( 18), INT8_C( 32), -INT8_C( 27) }, { -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1) } }, { { -INT8_C( 4), -INT8_C( 16), INT8_C( 23), -INT8_C( 115), -INT8_C( 99), -INT8_C( 127), INT8_C( 75), INT8_MAX, INT8_C( 96), -INT8_C( 74), INT8_C( 120), -INT8_C( 42), -INT8_C( 2), INT8_C( 47), INT8_C( 76), -INT8_C( 40) }, { -INT8_C( 72), INT8_C( 0), INT8_C( 28), -INT8_C( 39), -INT8_C( 93), -INT8_C( 74), -INT8_C( 41), INT8_C( 92), INT8_C( 68), -INT8_C( 107), INT8_C( 56), INT8_C( 124), INT8_C( 92), INT8_C( 36), INT8_C( 63), -INT8_C( 118) }, { INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 5), -INT8_C( 20), -INT8_C( 31), -INT8_C( 8), -INT8_C( 2), INT8_C( 124), -INT8_C( 1), INT8_C( 117), INT8_C( 68), -INT8_C( 80), -INT8_C( 107), INT8_C( 90), -INT8_C( 30), INT8_C( 51), INT8_C( 52), INT8_C( 69) }, { INT8_C( 9), INT8_C( 67), -INT8_C( 78), INT8_C( 3), INT8_C( 99), -INT8_C( 76), INT8_C( 32), -INT8_C( 76), -INT8_C( 91), INT8_C( 56), -INT8_C( 4), INT8_C( 41), INT8_C( 100), -INT8_C( 35), -INT8_C( 88), -INT8_C( 111) }, { -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 41), -INT8_C( 41), INT8_C( 27), INT8_C( 85), -INT8_C( 57), INT8_MIN, -INT8_C( 41), INT8_C( 64), -INT8_C( 20), -INT8_C( 99), INT8_C( 26), -INT8_C( 29), -INT8_C( 9), -INT8_C( 29), INT8_C( 89), INT8_C( 56) }, { INT8_C( 9), -INT8_C( 70), -INT8_C( 22), -INT8_C( 123), INT8_C( 95), INT8_C( 64), INT8_C( 1), -INT8_C( 125), -INT8_C( 33), -INT8_C( 111), -INT8_C( 35), -INT8_C( 64), -INT8_C( 16), INT8_C( 70), INT8_C( 67), INT8_C( 66) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_comlt_epi8(a, b); simde_test_x86_assert_equal_i8x16(r, simde_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i8x16(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i8x16(); simde__m128i r = simde_mm_comlt_epi8(a, b); simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comlt_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[8]; const int16_t b[8]; const int16_t r[8]; } test_vec[] = { { { INT16_C( 15247), INT16_C( 27948), -INT16_C( 26976), INT16_C( 10171), INT16_C( 1153), INT16_C( 14705), INT16_C( 18149), -INT16_C( 1884) }, { INT16_C( 15247), INT16_C( 27948), -INT16_C( 26976), INT16_C( 10171), INT16_C( 1153), INT16_C( 14705), INT16_C( 18149), -INT16_C( 1884) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 4640), INT16_C( 9773), INT16_C( 24894), INT16_C( 4962), INT16_C( 10265), INT16_C( 1860), INT16_C( 26027), -INT16_C( 6919) }, { -INT16_C( 24736), INT16_C( 2172), -INT16_C( 15313), INT16_C( 29075), INT16_C( 28203), INT16_C( 9217), INT16_C( 25685), -INT16_C( 12291) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 25781), -INT16_C( 21020), INT16_C( 8361), -INT16_C( 21973), -INT16_C( 31108), -INT16_C( 14185), INT16_C( 17466), -INT16_C( 15952) }, { -INT16_C( 29501), -INT16_C( 4493), INT16_C( 5151), INT16_C( 26439), INT16_C( 25105), INT16_C( 27698), INT16_C( 18159), INT16_C( 845) }, { INT16_C( 0), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1) } }, { { -INT16_C( 22352), -INT16_C( 7307), -INT16_C( 31929), -INT16_C( 28348), INT16_C( 12848), -INT16_C( 14017), -INT16_C( 5304), INT16_C( 8017) }, { -INT16_C( 30239), -INT16_C( 6535), -INT16_C( 29162), INT16_C( 20927), INT16_C( 26716), -INT16_C( 10132), -INT16_C( 3238), -INT16_C( 8838) }, { INT16_C( 0), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), INT16_C( 0) } }, { { INT16_C( 16635), -INT16_C( 28598), -INT16_C( 27199), INT16_C( 4245), -INT16_C( 19364), -INT16_C( 11355), INT16_C( 24699), INT16_C( 18373) }, { INT16_C( 7906), -INT16_C( 17930), INT16_C( 14780), INT16_C( 30691), INT16_C( 2099), INT16_C( 12567), INT16_C( 29503), -INT16_C( 23186) }, { INT16_C( 0), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), INT16_C( 0) } }, { { -INT16_C( 19269), INT16_C( 31945), INT16_C( 23486), INT16_C( 28166), -INT16_C( 27352), -INT16_C( 21200), -INT16_C( 21125), -INT16_C( 32270) }, { -INT16_C( 11283), INT16_C( 12114), INT16_C( 7611), INT16_C( 8348), -INT16_C( 13377), INT16_C( 16283), -INT16_C( 12475), -INT16_C( 23793) }, { -INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1) } }, { { -INT16_C( 29413), INT16_C( 28252), -INT16_C( 8217), -INT16_C( 2688), INT16_C( 23741), INT16_C( 8519), -INT16_C( 9487), INT16_C( 5570) }, { INT16_C( 12586), INT16_C( 4341), INT16_C( 17080), -INT16_C( 27184), INT16_C( 14376), -INT16_C( 13727), INT16_C( 3605), INT16_C( 8007) }, { -INT16_C( 1), INT16_C( 0), -INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1), -INT16_C( 1) } }, { { INT16_C( 21057), INT16_C( 12138), INT16_C( 10468), -INT16_C( 5143), -INT16_C( 28349), -INT16_C( 2732), -INT16_C( 4731), INT16_C( 18972) }, { -INT16_C( 16446), INT16_C( 9287), -INT16_C( 20130), INT16_C( 8040), -INT16_C( 10091), -INT16_C( 25905), INT16_C( 14422), INT16_C( 24829) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), -INT16_C( 1), -INT16_C( 1), INT16_C( 0), -INT16_C( 1), -INT16_C( 1) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_comlt_epi16(a, b); simde_test_x86_assert_equal_i16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i16x8(); simde__m128i r = simde_mm_comlt_epi16(a, b); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comlt_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int32_t a[4]; const int32_t b[4]; const int32_t r[4]; } test_vec[] = { { { -INT32_C( 2007266475), -INT32_C( 1725219236), -INT32_C( 222380575), INT32_C( 1776808158) }, { -INT32_C( 2007266475), -INT32_C( 1725219236), -INT32_C( 222380575), INT32_C( 1776808158) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 867239389), INT32_C( 72184146), INT32_C( 1984835886), -INT32_C( 1357293903) }, { INT32_C( 1050067072), INT32_C( 1194076919), -INT32_C( 521312320), -INT32_C( 1173493083) }, { -INT32_C( 1), -INT32_C( 1), INT32_C( 0), -INT32_C( 1) } }, { { INT32_C( 77171874), -INT32_C( 1939502529), INT32_C( 675794205), INT32_C( 1417834446) }, { INT32_C( 478509801), INT32_C( 1037497182), -INT32_C( 2025253112), -INT32_C( 2055871359) }, { -INT32_C( 1), -INT32_C( 1), INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 1617484602), INT32_C( 26871243), INT32_C( 1981822087), -INT32_C( 1759343788) }, { -INT32_C( 376487750), -INT32_C( 1039163079), -INT32_C( 1371908640), INT32_C( 302507291) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), -INT32_C( 1) } }, { { INT32_C( 260537293), -INT32_C( 1445729890), INT32_C( 692317335), -INT32_C( 1075793172) }, { INT32_C( 1813814040), INT32_C( 532757807), INT32_C( 522758768), -INT32_C( 1465125606) }, { -INT32_C( 1), -INT32_C( 1), INT32_C( 0), INT32_C( 0) } }, { { -INT32_C( 648220030), -INT32_C( 822591903), INT32_C( 152469362), INT32_C( 379907389) }, { INT32_C( 1855197741), -INT32_C( 1647063575), -INT32_C( 1602511391), -INT32_C( 1857479641) }, { -INT32_C( 1), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { { -INT32_C( 2041460095), -INT32_C( 1225782084), -INT32_C( 586975933), -INT32_C( 497662992) }, { -INT32_C( 1218433759), -INT32_C( 2110956529), -INT32_C( 90458277), INT32_C( 331965418) }, { -INT32_C( 1), INT32_C( 0), -INT32_C( 1), -INT32_C( 1) } }, { { -INT32_C( 573732109), -INT32_C( 1448780714), INT32_C( 1869632560), INT32_C( 1826602412) }, { INT32_C( 266761244), INT32_C( 381153096), -INT32_C( 168788475), INT32_C( 1997787402) }, { -INT32_C( 1), -INT32_C( 1), INT32_C( 0), -INT32_C( 1) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_comlt_epi32(a, b); simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i32x4(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i32x4(); simde__m128i r = simde_mm_comlt_epi32(a, b); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comlt_epi64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int64_t a[2]; const int64_t b[2]; const int64_t r[2]; } test_vec[] = { { { -INT64_C( 1009310061849691392), -INT64_C( 277707935960291743) }, { -INT64_C( 1009310061849691392), -INT64_C( 277707935960291743) }, { INT64_C( 0), INT64_C( 0) } }, { { -INT64_C( 6676146384178024379), INT64_C( 364913823257426952) }, { -INT64_C( 3034404798427700017), -INT64_C( 2733338756895476993) }, { -INT64_C( 1), INT64_C( 0) } }, { { INT64_C( 8069120759030851780), INT64_C( 335030005951773068) }, { -INT64_C( 7188690367506903671), INT64_C( 5587645156847305010) }, { INT64_C( 0), -INT64_C( 1) } }, { { -INT64_C( 4358047410867690302), -INT64_C( 4702224012875514696) }, { -INT64_C( 8074591999440568491), -INT64_C( 7026560871118497247) }, { INT64_C( 0), INT64_C( 0) } }, { { -INT64_C( 1738651072766022297), INT64_C( 8737461920207474100) }, { -INT64_C( 654263243386552228), INT64_C( 1642202949236884981) }, { -INT64_C( 1), INT64_C( 0) } }, { { INT64_C( 2589914495479646938), INT64_C( 9085271308915061929) }, { INT64_C( 1806110484765072325), -INT64_C( 333482292974875947) }, { INT64_C( 0), INT64_C( 0) } }, { { -INT64_C( 4637417923057319100), INT64_C( 1523448370915064255) }, { INT64_C( 4560264752526579129), -INT64_C( 7700718862329793814) }, { -INT64_C( 1), INT64_C( 0) } }, { { INT64_C( 206652225637310924), -INT64_C( 8839227657813777123) }, { -INT64_C( 5656579576308303776), INT64_C( 3385089157130093660) }, { INT64_C( 0), -INT64_C( 1) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_comlt_epi64(a, b); simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i64x2(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i64x2(); simde__m128i r = simde_mm_comlt_epi64(a, b); simde_test_x86_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comlt_epu8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint8_t a[16]; const uint8_t b[16]; const uint8_t r[16]; } test_vec[] = { { { UINT8_C(155), UINT8_C(171), UINT8_C( 38), UINT8_C(163), UINT8_C(144), UINT8_C( 11), UINT8_C(136), UINT8_C( 54), UINT8_C(236), UINT8_C( 0), UINT8_C(208), UINT8_C( 40), UINT8_C(243), UINT8_C(137), UINT8_C(143), UINT8_C( 24) }, { UINT8_C(155), UINT8_C(171), UINT8_C( 38), UINT8_C(163), UINT8_C(144), UINT8_C( 11), UINT8_C(136), UINT8_C( 54), UINT8_C(236), UINT8_C( 0), UINT8_C(208), UINT8_C( 40), UINT8_C(243), UINT8_C(137), UINT8_C(143), UINT8_C( 24) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(225), UINT8_C(203), UINT8_C(236), UINT8_C(222), UINT8_C( 54), UINT8_C( 14), UINT8_C(152), UINT8_C(184), UINT8_C( 82), UINT8_C( 9), UINT8_C( 4), UINT8_C(164), UINT8_C( 65), UINT8_C( 95), UINT8_C(209), UINT8_C( 12) }, { UINT8_C(216), UINT8_C(105), UINT8_C(136), UINT8_C(168), UINT8_C( 1), UINT8_C( 54), UINT8_C(139), UINT8_C(161), UINT8_C( 83), UINT8_C(208), UINT8_C( 75), UINT8_C(242), UINT8_C(230), UINT8_C(193), UINT8_C(188), UINT8_C( 20) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX } }, { { UINT8_C(101), UINT8_C(214), UINT8_C( 6), UINT8_C(218), UINT8_C(100), UINT8_C( 35), UINT8_C(126), UINT8_C(154), UINT8_C(244), UINT8_C( 71), UINT8_C(212), UINT8_C(141), UINT8_C(117), UINT8_C(242), UINT8_C(141), UINT8_C(120) }, { UINT8_C(172), UINT8_C(164), UINT8_C(180), UINT8_C(140), UINT8_C( 22), UINT8_C(183), UINT8_C(205), UINT8_C(139), UINT8_C(123), UINT8_C(159), UINT8_C( 11), UINT8_C( 45), UINT8_C(197), UINT8_C(113), UINT8_C(195), UINT8_C(194) }, { UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { UINT8_C( 19), UINT8_C(162), UINT8_C( 31), UINT8_C( 24), UINT8_C( 10), UINT8_C( 17), UINT8_C( 23), UINT8_C(159), UINT8_C(107), UINT8_C( 72), UINT8_C(157), UINT8_C(203), UINT8_C(235), UINT8_MAX, UINT8_C( 24), UINT8_C(186) }, { UINT8_C( 63), UINT8_C(226), UINT8_C( 20), UINT8_C( 23), UINT8_C(118), UINT8_C(149), UINT8_C( 56), UINT8_C( 62), UINT8_C(139), UINT8_C(244), UINT8_C(120), UINT8_C(161), UINT8_C( 59), UINT8_C(159), UINT8_C(140), UINT8_C(106) }, { UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0) } }, { { UINT8_C( 20), UINT8_C(182), UINT8_C(160), UINT8_C( 98), UINT8_C(207), UINT8_C(225), UINT8_C( 77), UINT8_C( 17), UINT8_C(223), UINT8_C(148), UINT8_C(200), UINT8_C( 39), UINT8_C( 74), UINT8_C(145), UINT8_C( 89), UINT8_C( 26) }, { UINT8_C(184), UINT8_C(174), UINT8_C( 15), UINT8_C( 17), UINT8_C(201), UINT8_C(216), UINT8_C(179), UINT8_C( 1), UINT8_C(172), UINT8_C( 88), UINT8_C(250), UINT8_C( 21), UINT8_C(238), UINT8_C( 85), UINT8_C(254), UINT8_C( 84) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX } }, { { UINT8_C(143), UINT8_C(154), UINT8_C(239), UINT8_C(126), UINT8_C( 91), UINT8_C(154), UINT8_C( 6), UINT8_C( 54), UINT8_C(119), UINT8_C(177), UINT8_C(187), UINT8_C(102), UINT8_C( 59), UINT8_C(173), UINT8_C( 56), UINT8_C(224) }, { UINT8_C( 63), UINT8_C(141), UINT8_C( 13), UINT8_C( 65), UINT8_C(185), UINT8_C(137), UINT8_C( 37), UINT8_C( 27), UINT8_C( 6), UINT8_C( 82), UINT8_C(249), UINT8_C( 83), UINT8_C(134), UINT8_C(154), UINT8_C( 3), UINT8_C(199) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(173), UINT8_C(215), UINT8_C(118), UINT8_C( 52), UINT8_C( 87), UINT8_C( 68), UINT8_C( 44), UINT8_C( 88), UINT8_C( 92), UINT8_C( 41), UINT8_C(224), UINT8_C( 83), UINT8_C(101), UINT8_C( 93), UINT8_C(157), UINT8_C( 83) }, { UINT8_C(254), UINT8_C( 9), UINT8_C(118), UINT8_C(112), UINT8_C(236), UINT8_C(174), UINT8_C(120), UINT8_C(213), UINT8_C(192), UINT8_C(106), UINT8_C(221), UINT8_C( 33), UINT8_C(172), UINT8_C(119), UINT8_C(130), UINT8_C( 13) }, { UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(150), UINT8_C(244), UINT8_C(156), UINT8_C( 78), UINT8_C(107), UINT8_C(232), UINT8_C(166), UINT8_C(189), UINT8_C(182), UINT8_C(132), UINT8_C(158), UINT8_C(181), UINT8_C(113), UINT8_C(168), UINT8_C(112), UINT8_C(188) }, { UINT8_C( 26), UINT8_C(169), UINT8_C(179), UINT8_C(102), UINT8_C( 10), UINT8_C( 82), UINT8_C(147), UINT8_C(119), UINT8_C( 3), UINT8_C( 40), UINT8_C( 14), UINT8_C( 72), UINT8_C( 8), UINT8_C(242), UINT8_C(100), UINT8_C(108) }, { UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_comlt_epu8(a, b); simde_test_x86_assert_equal_u8x16(r, simde_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u8x16(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u8x16(); simde__m128i r = simde_mm_comlt_epu8(a, b); simde_test_x86_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comlt_epu16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint16_t a[8]; const uint16_t b[8]; const uint16_t r[8]; } test_vec[] = { { { UINT16_C(31089), UINT16_C(37576), UINT16_C(36158), UINT16_C(44380), UINT16_C(18604), UINT16_C(21340), UINT16_C(38662), UINT16_C(25497) }, { UINT16_C(31089), UINT16_C(37576), UINT16_C(36158), UINT16_C(44380), UINT16_C(18604), UINT16_C(21340), UINT16_C(38662), UINT16_C(25497) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(62910), UINT16_C(60202), UINT16_C(31420), UINT16_C(18782), UINT16_C( 5622), UINT16_C(37109), UINT16_C( 5697), UINT16_C(11663) }, { UINT16_C(61286), UINT16_C(51812), UINT16_C(15225), UINT16_C(29238), UINT16_C(65382), UINT16_C(63108), UINT16_C(12493), UINT16_C(36865) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(30224), UINT16_C(51012), UINT16_C(12458), UINT16_C(37569), UINT16_C(47298), UINT16_C(49400), UINT16_C(59647), UINT16_C(38892) }, { UINT16_C(56735), UINT16_C(48343), UINT16_C(64195), UINT16_C(21276), UINT16_C(12301), UINT16_C(26237), UINT16_C(32109), UINT16_C(35726) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(45880), UINT16_C(49513), UINT16_C(31611), UINT16_C(40100), UINT16_C(39310), UINT16_C(41600), UINT16_C(29420), UINT16_C(62563) }, { UINT16_C(52033), UINT16_C(12681), UINT16_C(54213), UINT16_C(38902), UINT16_C(25801), UINT16_C(27567), UINT16_C(34447), UINT16_C(39721) }, { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { UINT16_C(13663), UINT16_C(41730), UINT16_C(25815), UINT16_C(44527), UINT16_C(16771), UINT16_C(64758), UINT16_C(48046), UINT16_C(35549) }, { UINT16_C(16759), UINT16_C(61666), UINT16_C(52519), UINT16_C(34732), UINT16_C( 8897), UINT16_C(52610), UINT16_C(21211), UINT16_C( 2747) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(33453), UINT16_C(13173), UINT16_C(61800), UINT16_C( 3722), UINT16_C(14281), UINT16_C(38848), UINT16_C(52205), UINT16_C(41792) }, { UINT16_C(51303), UINT16_C(49994), UINT16_C(61840), UINT16_C(27430), UINT16_C(61982), UINT16_C(21342), UINT16_C(59896), UINT16_C( 7722) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, { { UINT16_C( 9289), UINT16_C(14892), UINT16_C(11475), UINT16_C( 1885), UINT16_C( 903), UINT16_C(15176), UINT16_C(43858), UINT16_C(34166) }, { UINT16_C(59193), UINT16_C(28713), UINT16_C(18088), UINT16_C(35661), UINT16_C(23559), UINT16_C(51115), UINT16_C(21391), UINT16_C( 8288) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(41820), UINT16_C(32653), UINT16_C( 7619), UINT16_C(57169), UINT16_C(12005), UINT16_C(45044), UINT16_C(25220), UINT16_C(30821) }, { UINT16_C(10006), UINT16_C(49383), UINT16_C(54296), UINT16_C(11783), UINT16_C(59813), UINT16_C(61393), UINT16_C(38983), UINT16_C(22339) }, { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_comlt_epu16(a, b); simde_test_x86_assert_equal_u16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u16x8(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u16x8(); simde__m128i r = simde_mm_comlt_epu16(a, b); simde_test_x86_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comlt_epu32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint32_t a[4]; const uint32_t b[4]; const uint32_t r[4]; } test_vec[] = { { { UINT32_C(3422717197), UINT32_C(3763129310), UINT32_C(3173924620), UINT32_C(3338139692) }, { UINT32_C(3422717197), UINT32_C(3763129310), UINT32_C(3173924620), UINT32_C(3338139692) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(2079067942), UINT32_C(2638062472), UINT32_C(2469928992), UINT32_C( 230539721) }, { UINT32_C(3891418245), UINT32_C(1345797836), UINT32_C( 727563812), UINT32_C(3037886194) }, { UINT32_MAX, UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, { { UINT32_C(1772103568), UINT32_C( 567852191), UINT32_C(2101906079), UINT32_C(2293618365) }, { UINT32_C(2579863531), UINT32_C(2029747766), UINT32_C(3269997652), UINT32_C(2409685885) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { UINT32_C(1366095741), UINT32_C( 12461574), UINT32_C(1953148234), UINT32_C( 303226313) }, { UINT32_C(2866993000), UINT32_C(2692650180), UINT32_C(1245259973), UINT32_C(2592622454) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_MAX } }, { { UINT32_C(4256565268), UINT32_C(2195319141), UINT32_C(1057231690), UINT32_C(4055818392) }, { UINT32_C(3803421476), UINT32_C( 279611165), UINT32_C(2344034975), UINT32_C( 531802756) }, { UINT32_C( 0), UINT32_C( 0), UINT32_MAX, UINT32_C( 0) } }, { { UINT32_C( 870834558), UINT32_C(4044295779), UINT32_C(2810201287), UINT32_C(1839261392) }, { UINT32_C( 161442630), UINT32_C( 258367466), UINT32_C(1305127433), UINT32_C(1694897488) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(3160935138), UINT32_C(2471140264), UINT32_C(1950664680), UINT32_C(3500210202) }, { UINT32_C(3859730423), UINT32_C(3868796114), UINT32_C(1473923371), UINT32_C(3060567938) }, { UINT32_MAX, UINT32_MAX, UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(1621101673), UINT32_C(2993789917), UINT32_C(1857692630), UINT32_C(1621499165) }, { UINT32_C(1114132574), UINT32_C(3727548286), UINT32_C(1917814318), UINT32_C(1489772993) }, { UINT32_C( 0), UINT32_MAX, UINT32_MAX, UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_comlt_epu32(a, b); simde_test_x86_assert_equal_u32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u32x4(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u32x4(); simde__m128i r = simde_mm_comlt_epu32(a, b); simde_test_x86_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comlt_epu64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint64_t a[2]; const uint64_t b[2]; const uint64_t r[2]; } test_vec[] = { { { UINT64_C(10694944839838363194), UINT64_C( 7465382146795430841) }, { UINT64_C(10694944839838363194), UINT64_C( 7465382146795430841) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C( 4398214280665003428), UINT64_C(10668696243839101498) }, { UINT64_C( 9305296330829379711), UINT64_C( 3126836178466671800) }, { UINT64_MAX, UINT64_C( 0) } }, { { UINT64_C( 5410324813019269104), UINT64_C(12833757039398878326) }, { UINT64_C(14086855747445027677), UINT64_C(17720907039855716603) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C( 5924802686951738731), UINT64_C(18075392130194162699) }, { UINT64_C(11629883228059608830), UINT64_C( 872310240276364971) }, { UINT64_MAX, UINT64_C( 0) } }, { { UINT64_C(10978932502879473468), UINT64_C(12984423513631115809) }, { UINT64_C( 7115779669251021194), UINT64_C(13463246126398199792) }, { UINT64_C( 0), UINT64_MAX } }, { { UINT64_C( 7379709048895640973), UINT64_C( 2813464160161863646) }, { UINT64_C( 5726224131170604072), UINT64_C( 4974875778945776369) }, { UINT64_C( 0), UINT64_MAX } }, { { UINT64_C( 307687679989737603), UINT64_C(11263558847133430381) }, { UINT64_C(12714394728952127489), UINT64_C(17769756759069503447) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C(13464197959855214408), UINT64_C( 6622059559939810292) }, { UINT64_C(14814948272629236797), UINT64_C( 1545221821489315785) }, { UINT64_MAX, UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_comlt_epu64(a, b); simde_test_x86_assert_equal_u64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u64x2(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u64x2(); simde__m128i r = simde_mm_comlt_epu64(a, b); simde_test_x86_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comtrue_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t a[16]; const int8_t b[16]; const int8_t r[16]; } test_vec[] = { { { INT8_C( 4), INT8_C( 18), INT8_C( 7), -INT8_C( 71), INT8_C( 29), -INT8_C( 10), INT8_C( 14), -INT8_C( 48), -INT8_C( 100), INT8_C( 115), -INT8_C( 17), INT8_C( 105), INT8_C( 97), -INT8_C( 76), -INT8_C( 73), -INT8_C( 84) }, { INT8_C( 4), INT8_C( 18), INT8_C( 7), -INT8_C( 71), INT8_C( 29), -INT8_C( 10), INT8_C( 14), -INT8_C( 48), -INT8_C( 100), INT8_C( 115), -INT8_C( 17), INT8_C( 105), INT8_C( 97), -INT8_C( 76), -INT8_C( 73), -INT8_C( 84) }, { -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1) } }, { { INT8_C( 3), -INT8_C( 55), -INT8_C( 55), -INT8_C( 63), INT8_C( 31), -INT8_C( 89), -INT8_C( 43), -INT8_C( 19), -INT8_C( 16), INT8_C( 112), INT8_C( 4), -INT8_C( 2), INT8_C( 32), -INT8_C( 11), INT8_C( 117), -INT8_C( 22) }, { -INT8_C( 34), INT8_C( 122), -INT8_C( 19), -INT8_C( 77), INT8_C( 49), -INT8_C( 71), -INT8_C( 54), -INT8_C( 115), -INT8_C( 116), INT8_C( 41), INT8_C( 57), INT8_C( 67), -INT8_C( 94), INT8_MAX, -INT8_C( 87), -INT8_C( 41) }, { -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1) } }, { { INT8_C( 123), INT8_C( 117), INT8_MIN, INT8_C( 105), -INT8_C( 56), -INT8_C( 53), INT8_C( 12), INT8_C( 89), INT8_C( 116), -INT8_C( 116), -INT8_C( 68), -INT8_C( 80), INT8_C( 123), -INT8_C( 109), -INT8_C( 112), -INT8_C( 68) }, { -INT8_C( 1), INT8_C( 77), -INT8_C( 50), -INT8_C( 5), -INT8_C( 103), -INT8_C( 63), -INT8_C( 9), INT8_C( 59), -INT8_C( 19), -INT8_C( 51), -INT8_C( 7), -INT8_C( 2), -INT8_C( 127), -INT8_C( 78), -INT8_C( 90), INT8_C( 35) }, { -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1) } }, { { -INT8_C( 49), -INT8_C( 45), INT8_C( 101), -INT8_C( 61), -INT8_C( 104), -INT8_C( 71), INT8_C( 39), INT8_C( 90), INT8_C( 124), INT8_C( 91), -INT8_C( 99), INT8_C( 39), -INT8_C( 56), -INT8_C( 100), -INT8_C( 87), -INT8_C( 43) }, { -INT8_C( 113), INT8_C( 22), INT8_C( 18), INT8_C( 90), -INT8_C( 4), INT8_C( 22), INT8_C( 123), INT8_C( 32), -INT8_C( 26), -INT8_C( 25), -INT8_C( 106), INT8_C( 98), -INT8_C( 91), INT8_C( 84), -INT8_C( 107), -INT8_C( 38) }, { -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1) } }, { { INT8_C( 36), INT8_C( 104), -INT8_C( 31), -INT8_C( 103), INT8_C( 56), INT8_C( 120), INT8_C( 15), INT8_C( 54), INT8_C( 48), INT8_C( 100), INT8_C( 16), INT8_C( 42), -INT8_C( 82), INT8_C( 25), -INT8_C( 89), INT8_C( 123) }, { -INT8_C( 75), INT8_C( 90), INT8_C( 32), -INT8_C( 104), INT8_C( 2), -INT8_C( 63), INT8_C( 64), -INT8_C( 124), -INT8_C( 96), INT8_C( 0), INT8_C( 120), INT8_C( 55), -INT8_C( 73), INT8_C( 109), INT8_C( 93), INT8_C( 66) }, { -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1) } }, { { -INT8_C( 91), -INT8_C( 66), INT8_C( 93), -INT8_C( 79), INT8_C( 79), INT8_C( 16), -INT8_C( 86), INT8_C( 53), -INT8_C( 71), INT8_C( 47), INT8_C( 123), -INT8_C( 125), -INT8_C( 43), INT8_C( 17), INT8_C( 116), -INT8_C( 9) }, { -INT8_C( 103), -INT8_C( 93), INT8_C( 98), INT8_C( 125), INT8_C( 84), -INT8_C( 55), INT8_C( 44), -INT8_C( 79), INT8_C( 66), -INT8_C( 97), INT8_C( 7), INT8_C( 69), INT8_C( 94), INT8_C( 6), -INT8_C( 24), -INT8_C( 93) }, { -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1) } }, { { INT8_C( 119), INT8_C( 92), INT8_C( 63), -INT8_C( 44), -INT8_C( 122), -INT8_C( 118), -INT8_C( 30), -INT8_C( 95), INT8_C( 63), INT8_C( 68), INT8_C( 71), -INT8_C( 8), -INT8_C( 26), -INT8_C( 116), -INT8_C( 9), -INT8_C( 111) }, { INT8_C( 100), INT8_C( 120), INT8_C( 63), -INT8_C( 48), -INT8_C( 102), INT8_C( 54), INT8_C( 41), -INT8_C( 18), -INT8_C( 11), INT8_C( 77), -INT8_C( 85), INT8_C( 84), INT8_C( 67), INT8_C( 37), INT8_C( 29), INT8_C( 71) }, { -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1) } }, { { -INT8_C( 59), -INT8_C( 54), -INT8_C( 16), -INT8_C( 54), -INT8_C( 123), -INT8_C( 19), -INT8_C( 98), -INT8_C( 64), -INT8_C( 23), INT8_C( 44), -INT8_C( 35), INT8_C( 82), -INT8_C( 119), -INT8_C( 109), INT8_C( 24), -INT8_C( 112) }, { INT8_C( 62), INT8_C( 98), INT8_C( 31), INT8_C( 91), INT8_C( 123), INT8_C( 16), INT8_C( 30), -INT8_C( 126), -INT8_C( 31), -INT8_C( 111), -INT8_C( 52), INT8_C( 43), INT8_C( 13), -INT8_C( 43), -INT8_C( 28), INT8_C( 117) }, { -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_comtrue_epi8(a, b); simde_test_x86_assert_equal_i8x16(r, simde_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i8x16(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i8x16(); simde__m128i r = simde_mm_comtrue_epi8(a, b); simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comtrue_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[8]; const int16_t b[8]; const int16_t r[8]; } test_vec[] = { { { -INT16_C( 28235), INT16_C( 23513), INT16_C( 16883), -INT16_C( 9018), INT16_C( 28384), INT16_C( 22691), INT16_C( 11365), INT16_C( 16063) }, { -INT16_C( 28235), INT16_C( 23513), INT16_C( 16883), -INT16_C( 9018), INT16_C( 28384), INT16_C( 22691), INT16_C( 11365), INT16_C( 16063) }, { -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1) } }, { { -INT16_C( 6066), -INT16_C( 7062), INT16_C( 24736), -INT16_C( 18702), -INT16_C( 3283), -INT16_C( 27694), INT16_C( 7013), INT16_C( 29733) }, { INT16_C( 14447), INT16_C( 20321), -INT16_C( 28807), INT16_C( 15169), -INT16_C( 28084), -INT16_C( 11261), INT16_C( 25123), -INT16_C( 7724) }, { -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1) } }, { { -INT16_C( 27971), INT16_C( 13449), INT16_C( 11953), -INT16_C( 11633), -INT16_C( 1278), INT16_C( 21541), INT16_C( 499), -INT16_C( 29240) }, { INT16_C( 18204), INT16_C( 28143), -INT16_C( 8514), INT16_C( 9721), INT16_C( 8276), -INT16_C( 29596), INT16_C( 14954), -INT16_C( 15809) }, { -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1) } }, { { -INT16_C( 5966), INT16_C( 5090), -INT16_C( 32683), INT16_C( 7900), INT16_C( 12935), INT16_C( 13869), -INT16_C( 29090), INT16_C( 2230) }, { INT16_C( 18148), INT16_C( 32493), INT16_C( 13931), -INT16_C( 6955), -INT16_C( 24288), INT16_C( 18990), -INT16_C( 16669), INT16_C( 10728) }, { -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1) } }, { { INT16_C( 29526), INT16_C( 18910), INT16_C( 24628), -INT16_C( 7743), INT16_C( 8165), INT16_C( 338), -INT16_C( 13745), INT16_C( 11733) }, { -INT16_C( 16658), INT16_C( 19138), -INT16_C( 24793), -INT16_C( 16451), -INT16_C( 25127), -INT16_C( 11064), -INT16_C( 2762), INT16_C( 23992) }, { -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1) } }, { { -INT16_C( 17711), -INT16_C( 25115), -INT16_C( 10761), INT16_C( 25894), INT16_C( 19267), INT16_C( 31997), -INT16_C( 16787), INT16_C( 17166) }, { INT16_C( 14179), -INT16_C( 26154), INT16_C( 8859), -INT16_C( 615), -INT16_C( 25945), -INT16_C( 3428), -INT16_C( 6390), -INT16_C( 22379) }, { -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1) } }, { { INT16_C( 17738), -INT16_C( 10144), -INT16_C( 6330), -INT16_C( 3853), INT16_C( 15562), INT16_C( 28820), INT16_C( 29025), -INT16_C( 27574) }, { INT16_C( 14186), INT16_C( 13201), -INT16_C( 14738), -INT16_C( 6319), INT16_C( 8881), INT16_C( 28177), -INT16_C( 25337), INT16_C( 20584) }, { -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1) } }, { { -INT16_C( 25111), -INT16_C( 15691), -INT16_C( 24632), -INT16_C( 13552), INT16_C( 31905), -INT16_C( 23167), -INT16_C( 5166), INT16_C( 26223) }, { INT16_C( 18476), -INT16_C( 8101), -INT16_C( 27575), -INT16_C( 14898), -INT16_C( 17376), INT16_C( 3983), INT16_C( 7894), -INT16_C( 25062) }, { -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1), -INT16_C( 1) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_comtrue_epi16(a, b); simde_test_x86_assert_equal_i16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i16x8(); simde__m128i r = simde_mm_comtrue_epi16(a, b); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comtrue_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int32_t a[4]; const int32_t b[4]; const int32_t r[4]; } test_vec[] = { { { INT32_C( 592333272), INT32_C( 1046873126), -INT32_C( 483748878), INT32_C( 40252777) }, { INT32_C( 592333272), INT32_C( 1046873126), -INT32_C( 483748878), INT32_C( 40252777) }, { -INT32_C( 1), -INT32_C( 1), -INT32_C( 1), -INT32_C( 1) } }, { { INT32_C( 1738338769), -INT32_C( 537488684), -INT32_C( 1669402341), -INT32_C( 628002273) }, { -INT32_C( 1013853892), -INT32_C( 1864687864), -INT32_C( 218560542), -INT32_C( 1340582194) }, { -INT32_C( 1), -INT32_C( 1), -INT32_C( 1), -INT32_C( 1) } }, { { -INT32_C( 1849902208), INT32_C( 2092092086), -INT32_C( 565753909), INT32_C( 1287103113) }, { INT32_C( 1793637952), INT32_C( 190379540), -INT32_C( 1722522981), -INT32_C( 1200859480) }, { -INT32_C( 1), -INT32_C( 1), -INT32_C( 1), -INT32_C( 1) } }, { { INT32_C( 639839329), -INT32_C( 437566312), -INT32_C( 1655682984), INT32_C( 1014216125) }, { -INT32_C( 541419767), -INT32_C( 171535115), -INT32_C( 1583011003), INT32_C( 1632190111) }, { -INT32_C( 1), -INT32_C( 1), -INT32_C( 1), -INT32_C( 1) } }, { { -INT32_C( 298201443), INT32_C( 1669793569), -INT32_C( 1610449687), -INT32_C( 240340125) }, { -INT32_C( 344988478), -INT32_C( 1760821679), -INT32_C( 749475065), -INT32_C( 191294116) }, { -INT32_C( 1), -INT32_C( 1), -INT32_C( 1), -INT32_C( 1) } }, { { -INT32_C( 1335416484), INT32_C( 1013980154), -INT32_C( 1312462170), -INT32_C( 1270045406) }, { INT32_C( 1450126993), INT32_C( 957431505), -INT32_C( 171562742), -INT32_C( 1186733177) }, { -INT32_C( 1), -INT32_C( 1), -INT32_C( 1), -INT32_C( 1) } }, { { INT32_C( 907264965), -INT32_C( 1198749494), -INT32_C( 1778214216), -INT32_C( 851799390) }, { -INT32_C( 400426082), INT32_C( 599677468), -INT32_C( 815370890), -INT32_C( 131092024) }, { -INT32_C( 1), -INT32_C( 1), -INT32_C( 1), -INT32_C( 1) } }, { { INT32_C( 1185359361), INT32_C( 566580794), INT32_C( 404722759), -INT32_C( 2074180980) }, { INT32_C( 1760548370), -INT32_C( 1661182501), INT32_C( 731594866), -INT32_C( 112884027) }, { -INT32_C( 1), -INT32_C( 1), -INT32_C( 1), -INT32_C( 1) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_comtrue_epi32(a, b); simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i32x4(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i32x4(); simde__m128i r = simde_mm_comtrue_epi32(a, b); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comtrue_epi64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int64_t a[2]; const int64_t b[2]; const int64_t r[2]; } test_vec[] = { { { -INT64_C( 4827420152686847433), INT64_C( 2351017032843452794) }, { -INT64_C( 4827420152686847433), INT64_C( 2351017032843452794) }, { -INT64_C( 1), -INT64_C( 1) } }, { { -INT64_C( 1318907397904776171), INT64_C( 246970282387777574) }, { -INT64_C( 3014534770133342832), -INT64_C( 1447753086249662342) }, { -INT64_C( 1), -INT64_C( 1) } }, { { INT64_C( 6901820662948417486), INT64_C( 6957891663631265723) }, { -INT64_C( 5609414929122897612), INT64_C( 3107121265309028719) }, { -INT64_C( 1), -INT64_C( 1) } }, { { INT64_C( 4045661127513538150), INT64_C( 6238471471275466073) }, { -INT64_C( 7358602143334717878), INT64_C( 2966188204622873984) }, { -INT64_C( 1), -INT64_C( 1) } }, { { -INT64_C( 4561025498887071483), INT64_C( 3342166001191074087) }, { -INT64_C( 3081258487602582789), INT64_C( 3311408266836896469) }, { -INT64_C( 1), -INT64_C( 1) } }, { { INT64_C( 4709059121473874385), INT64_C( 3522154446598982478) }, { -INT64_C( 5969826206700296338), INT64_C( 9179788988322859927) }, { -INT64_C( 1), -INT64_C( 1) } }, { { INT64_C( 144109204175369717), -INT64_C( 6630182630353298442) }, { INT64_C( 7530936525393257676), INT64_C( 7377758891505561070) }, { -INT64_C( 1), -INT64_C( 1) } }, { { INT64_C( 5299929538076591767), -INT64_C( 3343706590326083003) }, { INT64_C( 5709504304017058364), INT64_C( 3231116635510249985) }, { -INT64_C( 1), -INT64_C( 1) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_comtrue_epi64(a, b); simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i64x2(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i64x2(); simde__m128i r = simde_mm_comtrue_epi64(a, b); simde_test_x86_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comtrue_epu8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint8_t a[16]; const uint8_t b[16]; const uint8_t r[16]; } test_vec[] = { { { UINT8_C(224), UINT8_C( 34), UINT8_C(237), UINT8_C(194), UINT8_C(209), UINT8_C(110), UINT8_C(236), UINT8_C( 97), UINT8_C(102), UINT8_C(151), UINT8_C(249), UINT8_C(228), UINT8_C( 29), UINT8_C(244), UINT8_C(158), UINT8_C( 0) }, { UINT8_C(224), UINT8_C( 34), UINT8_C(237), UINT8_C(194), UINT8_C(209), UINT8_C(110), UINT8_C(236), UINT8_C( 97), UINT8_C(102), UINT8_C(151), UINT8_C(249), UINT8_C(228), UINT8_C( 29), UINT8_C(244), UINT8_C(158), UINT8_C( 0) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C(231), UINT8_C(128), UINT8_C(123), UINT8_C( 38), UINT8_C(126), UINT8_C(221), UINT8_C( 57), UINT8_C(170), UINT8_C(249), UINT8_C(153), UINT8_C(192), UINT8_C( 36), UINT8_C(158), UINT8_C(177), UINT8_C(170), UINT8_C( 23) }, { UINT8_C(247), UINT8_C( 83), UINT8_C(247), UINT8_C(208), UINT8_C( 40), UINT8_C(232), UINT8_C( 2), UINT8_C(145), UINT8_C(127), UINT8_C( 82), UINT8_C(179), UINT8_C(239), UINT8_C( 77), UINT8_C(114), UINT8_C(247), UINT8_C(121) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C(244), UINT8_C(238), UINT8_C(109), UINT8_C(154), UINT8_C( 66), UINT8_C( 49), UINT8_C( 99), UINT8_C(193), UINT8_C(253), UINT8_C(179), UINT8_C( 0), UINT8_C(190), UINT8_C(191), UINT8_C(122), UINT8_C(194), UINT8_C(112) }, { UINT8_C( 4), UINT8_C(225), UINT8_C( 44), UINT8_C(157), UINT8_C(130), UINT8_C(152), UINT8_C(184), UINT8_C( 33), UINT8_C(185), UINT8_C(236), UINT8_C( 19), UINT8_C( 75), UINT8_C(202), UINT8_C( 72), UINT8_C(137), UINT8_C(132) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C(140), UINT8_C(252), UINT8_C(190), UINT8_C( 49), UINT8_C(221), UINT8_C( 62), UINT8_C(160), UINT8_C(219), UINT8_C( 54), UINT8_C(110), UINT8_C(153), UINT8_C(141), UINT8_C(131), UINT8_C(159), UINT8_C( 9), UINT8_C(126) }, { UINT8_C( 49), UINT8_C( 82), UINT8_C(243), UINT8_C(240), UINT8_C(136), UINT8_C(132), UINT8_C(247), UINT8_C( 88), UINT8_C( 59), UINT8_C(236), UINT8_C(128), UINT8_C(191), UINT8_C( 61), UINT8_C(126), UINT8_C( 62), UINT8_C(104) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C(215), UINT8_C( 51), UINT8_C(215), UINT8_C(179), UINT8_C(248), UINT8_C( 12), UINT8_C(217), UINT8_C( 65), UINT8_C(204), UINT8_C( 85), UINT8_C(244), UINT8_C( 89), UINT8_C(143), UINT8_C( 39), UINT8_C(102), UINT8_C(139) }, { UINT8_C(164), UINT8_C( 48), UINT8_C(182), UINT8_C(147), UINT8_C(225), UINT8_C(182), UINT8_C(165), UINT8_C(128), UINT8_C( 46), UINT8_C(218), UINT8_C( 98), UINT8_C( 21), UINT8_C( 78), UINT8_C( 26), UINT8_C(253), UINT8_C(111) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C(252), UINT8_C( 26), UINT8_C( 30), UINT8_C(232), UINT8_C( 58), UINT8_C(164), UINT8_C(247), UINT8_C( 60), UINT8_C(230), UINT8_C(237), UINT8_C(119), UINT8_C(234), UINT8_C(142), UINT8_C( 26), UINT8_C(192), UINT8_C(221) }, { UINT8_C(133), UINT8_C( 2), UINT8_C(220), UINT8_C( 76), UINT8_C( 54), UINT8_C( 54), UINT8_C(171), UINT8_C(223), UINT8_C(185), UINT8_C( 63), UINT8_C( 32), UINT8_C( 19), UINT8_C(164), UINT8_C( 38), UINT8_C(173), UINT8_C(223) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C( 36), UINT8_C( 58), UINT8_C(252), UINT8_C(152), UINT8_C( 76), UINT8_C( 13), UINT8_C(224), UINT8_C( 18), UINT8_C(172), UINT8_C(192), UINT8_C(139), UINT8_C( 8), UINT8_C( 38), UINT8_C(128), UINT8_C( 1), UINT8_C(189) }, { UINT8_C(253), UINT8_C( 81), UINT8_C(205), UINT8_C(228), UINT8_C( 47), UINT8_C( 10), UINT8_C(242), UINT8_C(189), UINT8_C( 5), UINT8_C(161), UINT8_C( 36), UINT8_C(130), UINT8_C(232), UINT8_C(169), UINT8_C( 56), UINT8_C( 1) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, { { UINT8_C(118), UINT8_C( 25), UINT8_C(218), UINT8_C(138), UINT8_C(213), UINT8_C( 79), UINT8_C(126), UINT8_C( 11), UINT8_C( 71), UINT8_C( 85), UINT8_C(153), UINT8_C(122), UINT8_MAX, UINT8_C( 98), UINT8_C( 16), UINT8_C(115) }, { UINT8_C( 52), UINT8_C(164), UINT8_C(240), UINT8_C( 35), UINT8_C(115), UINT8_C( 60), UINT8_C( 97), UINT8_C(100), UINT8_C( 57), UINT8_C(139), UINT8_C(213), UINT8_C( 41), UINT8_C(193), UINT8_C(172), UINT8_C(133), UINT8_C( 29) }, { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_comtrue_epu8(a, b); simde_test_x86_assert_equal_u8x16(r, simde_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u8x16(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u8x16(); simde__m128i r = simde_mm_comtrue_epu8(a, b); simde_test_x86_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comtrue_epu16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint16_t a[8]; const uint16_t b[8]; const uint16_t r[8]; } test_vec[] = { { { UINT16_C(16667), UINT16_C(34590), UINT16_C(29566), UINT16_C(29111), UINT16_C(13535), UINT16_C( 2311), UINT16_C(51138), UINT16_C(18389) }, { UINT16_C(16667), UINT16_C(34590), UINT16_C(29566), UINT16_C(29111), UINT16_C(13535), UINT16_C( 2311), UINT16_C(51138), UINT16_C(18389) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(33617), UINT16_C(53423), UINT16_C(54443), UINT16_C( 7135), UINT16_C(33406), UINT16_C(53915), UINT16_C(14040), UINT16_C(31612) }, { UINT16_C(14906), UINT16_C(22322), UINT16_C(33007), UINT16_C(35445), UINT16_C(59035), UINT16_C(32062), UINT16_C(47127), UINT16_C(33080) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(30332), UINT16_C(46449), UINT16_C(55422), UINT16_C(10582), UINT16_C( 4348), UINT16_C(17374), UINT16_C(20436), UINT16_C(25093) }, { UINT16_C(35580), UINT16_C(49788), UINT16_C(32463), UINT16_C(41119), UINT16_C(62116), UINT16_C(40615), UINT16_C(15525), UINT16_C(26657) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C( 1759), UINT16_C(39071), UINT16_C(21141), UINT16_C(55213), UINT16_C(48346), UINT16_C(17670), UINT16_C(65375), UINT16_C( 7176) }, { UINT16_C(48009), UINT16_C(36966), UINT16_C(30406), UINT16_C(63773), UINT16_C(57378), UINT16_C(13225), UINT16_C(23317), UINT16_C(17784) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(47775), UINT16_C(17055), UINT16_C(18839), UINT16_C(28108), UINT16_C( 3649), UINT16_C(41083), UINT16_C(20254), UINT16_C(61293) }, { UINT16_C(21768), UINT16_C(34902), UINT16_C(28542), UINT16_C(56791), UINT16_C(14652), UINT16_C( 1707), UINT16_C( 7438), UINT16_C(24613) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C( 7143), UINT16_C(31706), UINT16_C(50733), UINT16_C(13211), UINT16_C(36696), UINT16_C( 7334), UINT16_C(18107), UINT16_C( 9244) }, { UINT16_C(56993), UINT16_C(29622), UINT16_C(29084), UINT16_C(38580), UINT16_C(33562), UINT16_C(56599), UINT16_C(35641), UINT16_C( 272) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C(45277), UINT16_C( 2744), UINT16_C(53503), UINT16_C(28673), UINT16_C(50760), UINT16_C(33261), UINT16_C(60894), UINT16_C( 766) }, { UINT16_C(57470), UINT16_C( 6637), UINT16_C(33995), UINT16_C(27551), UINT16_C(18917), UINT16_C(33108), UINT16_C(44094), UINT16_C(28706) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, { { UINT16_C( 425), UINT16_C(47522), UINT16_C(28596), UINT16_C(28393), UINT16_C(15416), UINT16_C(38585), UINT16_C(18990), UINT16_C(54265) }, { UINT16_C(58052), UINT16_C(16995), UINT16_C(45489), UINT16_C(41853), UINT16_C( 4293), UINT16_C(47818), UINT16_C(35268), UINT16_C(62529) }, { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_comtrue_epu16(a, b); simde_test_x86_assert_equal_u16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u16x8(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u16x8(); simde__m128i r = simde_mm_comtrue_epu16(a, b); simde_test_x86_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comtrue_epu32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint32_t a[4]; const uint32_t b[4]; const uint32_t r[4]; } test_vec[] = { { { UINT32_C(1325307508), UINT32_C(1933093877), UINT32_C( 611546961), UINT32_C(3723913300) }, { UINT32_C(1325307508), UINT32_C(1933093877), UINT32_C( 611546961), UINT32_C(3723913300) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { UINT32_C(3061869982), UINT32_C(2268594166), UINT32_C(1340170977), UINT32_C(3612747891) }, { UINT32_C(2453010534), UINT32_C(3369569641), UINT32_C(4085318074), UINT32_C(1792953591) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { UINT32_C(1017907250), UINT32_C(1605728067), UINT32_C( 167888482), UINT32_C(1598723061) }, { UINT32_C(1303424678), UINT32_C(3048086713), UINT32_C(3410846364), UINT32_C(3231121600) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { UINT32_C(2655992744), UINT32_C(1944002368), UINT32_C(2979186031), UINT32_C(3573797615) }, { UINT32_C(1205223517), UINT32_C(2175099276), UINT32_C(1950162975), UINT32_C( 655067991) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { UINT32_C(2731642409), UINT32_C( 178061716), UINT32_C( 234677809), UINT32_C(2171333388) }, { UINT32_C(1192114098), UINT32_C(1990550155), UINT32_C(3048981866), UINT32_C(3911251299) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { UINT32_C( 295340765), UINT32_C(1842882023), UINT32_C(3869749712), UINT32_C(2859067633) }, { UINT32_C( 398596558), UINT32_C(3667143006), UINT32_C(1479465382), UINT32_C(1304446605) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { UINT32_C(2987570923), UINT32_C(3849753059), UINT32_C( 83668595), UINT32_C(2581909577) }, { UINT32_C(2103013848), UINT32_C(4133273260), UINT32_C( 603906810), UINT32_C(2597488508) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, { { UINT32_C(1336056443), UINT32_C(3093385006), UINT32_C( 778135106), UINT32_C(2546379962) }, { UINT32_C(1111213305), UINT32_C( 316876573), UINT32_C(3750914703), UINT32_C( 423543000) }, { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_comtrue_epu32(a, b); simde_test_x86_assert_equal_u32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u32x4(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u32x4(); simde__m128i r = simde_mm_comtrue_epu32(a, b); simde_test_x86_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comtrue_epu64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint64_t a[2]; const uint64_t b[2]; const uint64_t r[2]; } test_vec[] = { { { UINT64_C( 3350510381451755447), UINT64_C(16930864899557384294) }, { UINT64_C( 3350510381451755447), UINT64_C(16930864899557384294) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C( 8508265454644468825), UINT64_C( 1363736690095880333) }, { UINT64_C(10500951027009105093), UINT64_C(15734554044523291655) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C( 7625327590781190175), UINT64_C(14754978182317413659) }, { UINT64_C( 2880546174294223309), UINT64_C(12817318442109975372) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C( 3750008482140780916), UINT64_C(10497191513006491489) }, { UINT64_C( 4189497283825827832), UINT64_C(13072326418065599582) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C( 2351656805455728255), UINT64_C(12074971609072998792) }, { UINT64_C( 1162739342028862829), UINT64_C( 3522466258036686693) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C( 8323443085463963497), UINT64_C( 6293901481351254200) }, { UINT64_C(17428138038975976277), UINT64_C( 7507649261918993800) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C( 8472081730515119193), UINT64_C(16854752128826783256) }, { UINT64_C( 2827141321774572248), UINT64_C(11978707203669650927) }, { UINT64_MAX, UINT64_MAX } }, { { UINT64_C( 8122805138361239160), UINT64_C(11901595990489207505) }, { UINT64_C(17953440984500758813), UINT64_C( 3597954551509030851) }, { UINT64_MAX, UINT64_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_comtrue_epu64(a, b); simde_test_x86_assert_equal_u64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u64x2(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u64x2(); simde__m128i r = simde_mm_comtrue_epu64(a, b); simde_test_x86_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comfalse_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t a[16]; const int8_t b[16]; const int8_t r[16]; } test_vec[] = { { { -INT8_C( 19), -INT8_C( 100), -INT8_C( 104), INT8_MAX, INT8_C( 106), INT8_C( 52), -INT8_C( 30), -INT8_C( 86), INT8_C( 10), INT8_C( 44), -INT8_C( 80), INT8_C( 91), -INT8_C( 25), INT8_C( 9), INT8_C( 6), -INT8_C( 45) }, { -INT8_C( 19), -INT8_C( 100), -INT8_C( 104), INT8_MAX, INT8_C( 106), INT8_C( 52), -INT8_C( 30), -INT8_C( 86), INT8_C( 10), INT8_C( 44), -INT8_C( 80), INT8_C( 91), -INT8_C( 25), INT8_C( 9), INT8_C( 6), -INT8_C( 45) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 77), -INT8_C( 80), -INT8_C( 63), -INT8_C( 49), INT8_C( 19), INT8_C( 112), -INT8_C( 122), -INT8_C( 82), INT8_C( 44), INT8_C( 67), -INT8_C( 7), -INT8_C( 43), INT8_C( 121), INT8_C( 81), INT8_C( 47), INT8_C( 83) }, { -INT8_C( 31), -INT8_C( 74), INT8_C( 101), -INT8_C( 63), -INT8_C( 92), -INT8_C( 123), -INT8_C( 14), INT8_C( 108), -INT8_C( 20), INT8_C( 13), INT8_C( 122), -INT8_C( 73), -INT8_C( 90), INT8_C( 91), INT8_C( 101), -INT8_C( 70) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { -INT8_C( 113), INT8_C( 0), -INT8_C( 113), INT8_C( 47), -INT8_C( 110), INT8_C( 20), INT8_C( 65), -INT8_C( 113), INT8_C( 81), INT8_C( 122), INT8_C( 97), INT8_C( 120), INT8_C( 2), INT8_C( 102), -INT8_C( 27), INT8_C( 82) }, { INT8_C( 124), INT8_C( 29), -INT8_C( 115), INT8_C( 50), -INT8_C( 111), -INT8_C( 4), -INT8_C( 46), -INT8_C( 2), -INT8_C( 97), -INT8_C( 68), INT8_C( 26), -INT8_C( 45), INT8_C( 98), -INT8_C( 12), INT8_C( 45), -INT8_C( 93) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 12), -INT8_C( 33), -INT8_C( 20), INT8_C( 35), -INT8_C( 104), INT8_C( 96), INT8_C( 65), -INT8_C( 28), INT8_C( 91), INT8_C( 67), INT8_C( 82), -INT8_C( 65), -INT8_C( 36), -INT8_C( 58), -INT8_C( 6), INT8_C( 119) }, { -INT8_C( 27), INT8_C( 88), INT8_C( 121), -INT8_C( 102), -INT8_C( 38), -INT8_C( 96), INT8_C( 106), -INT8_C( 88), INT8_C( 74), -INT8_C( 63), -INT8_C( 10), INT8_C( 119), -INT8_C( 60), -INT8_C( 35), INT8_C( 72), -INT8_C( 42) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { -INT8_C( 20), -INT8_C( 40), INT8_C( 64), INT8_C( 113), -INT8_C( 51), INT8_C( 94), INT8_C( 108), -INT8_C( 11), INT8_C( 113), INT8_C( 39), INT8_C( 52), INT8_C( 116), -INT8_C( 81), INT8_C( 121), INT8_C( 84), INT8_C( 10) }, { INT8_C( 69), -INT8_C( 17), -INT8_C( 112), -INT8_C( 63), INT8_C( 39), INT8_C( 122), -INT8_C( 94), -INT8_C( 78), INT8_C( 22), -INT8_C( 91), INT8_C( 120), INT8_C( 109), INT8_C( 115), INT8_C( 29), -INT8_C( 100), -INT8_C( 101) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 86), INT8_C( 112), -INT8_C( 12), -INT8_C( 30), -INT8_C( 37), INT8_C( 21), -INT8_C( 85), INT8_C( 9), -INT8_C( 68), -INT8_C( 83), INT8_C( 111), INT8_C( 93), INT8_C( 37), -INT8_C( 120), -INT8_C( 35), INT8_C( 83) }, { -INT8_C( 61), INT8_C( 107), INT8_C( 57), INT8_C( 110), INT8_C( 31), -INT8_C( 112), INT8_C( 99), INT8_C( 99), INT8_C( 42), -INT8_C( 17), INT8_C( 6), INT8_C( 124), INT8_C( 24), -INT8_C( 68), INT8_C( 19), INT8_C( 58) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 114), INT8_C( 49), INT8_C( 110), INT8_C( 62), INT8_C( 103), -INT8_C( 115), -INT8_C( 25), INT8_C( 105), INT8_C( 100), INT8_C( 93), INT8_C( 107), INT8_C( 67), -INT8_C( 28), -INT8_C( 6), INT8_C( 123), -INT8_C( 103) }, { -INT8_C( 120), INT8_C( 83), -INT8_C( 34), INT8_C( 107), INT8_C( 106), -INT8_C( 20), -INT8_C( 109), INT8_C( 5), -INT8_C( 81), INT8_C( 39), INT8_C( 10), INT8_C( 107), INT8_C( 91), -INT8_C( 61), -INT8_C( 108), -INT8_C( 6) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, { { INT8_C( 105), -INT8_C( 94), INT8_C( 23), INT8_C( 77), INT8_C( 28), -INT8_C( 49), INT8_C( 6), INT8_C( 94), -INT8_C( 112), -INT8_C( 65), -INT8_C( 112), -INT8_C( 19), -INT8_C( 108), -INT8_C( 41), INT8_C( 24), INT8_C( 38) }, { -INT8_C( 68), INT8_C( 47), -INT8_C( 26), INT8_C( 126), -INT8_C( 79), -INT8_C( 106), INT8_C( 28), -INT8_C( 34), -INT8_C( 52), -INT8_C( 42), -INT8_C( 21), INT8_C( 3), -INT8_C( 29), INT8_C( 57), INT8_C( 6), INT8_C( 37) }, { INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_comfalse_epi8(a, b); simde_test_x86_assert_equal_i8x16(r, simde_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i8x16(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i8x16(); simde__m128i r = simde_mm_comfalse_epi8(a, b); simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comfalse_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[8]; const int16_t b[8]; const int16_t r[8]; } test_vec[] = { { { INT16_C( 19299), -INT16_C( 10665), -INT16_C( 7521), INT16_C( 12018), INT16_C( 23656), INT16_C( 9543), INT16_C( 10462), INT16_C( 16539) }, { INT16_C( 19299), -INT16_C( 10665), -INT16_C( 7521), INT16_C( 12018), INT16_C( 23656), INT16_C( 9543), INT16_C( 10462), INT16_C( 16539) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 30587), INT16_C( 28857), -INT16_C( 27492), INT16_C( 14308), -INT16_C( 31830), INT16_C( 3089), INT16_C( 9817), INT16_C( 339) }, { -INT16_C( 19065), -INT16_C( 23657), -INT16_C( 12645), INT16_C( 8593), -INT16_C( 17643), -INT16_C( 19978), -INT16_C( 2966), INT16_C( 12524) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 7154), INT16_C( 2239), -INT16_C( 3886), INT16_C( 22485), -INT16_C( 18832), INT16_C( 20196), -INT16_C( 28060), -INT16_C( 10398) }, { INT16_C( 26622), INT16_C( 31293), -INT16_C( 25418), -INT16_C( 32307), INT16_C( 25791), INT16_C( 23047), INT16_C( 17375), INT16_C( 16115) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 12675), INT16_C( 4192), -INT16_C( 19716), -INT16_C( 30506), -INT16_C( 2233), -INT16_C( 28468), -INT16_C( 31059), -INT16_C( 4581) }, { -INT16_C( 5648), INT16_C( 9395), INT16_C( 21400), -INT16_C( 27508), -INT16_C( 8051), -INT16_C( 5920), INT16_C( 7653), -INT16_C( 20072) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 13317), INT16_C( 19972), -INT16_C( 7999), INT16_C( 4302), -INT16_C( 12457), -INT16_C( 25551), INT16_C( 2524), -INT16_C( 28570) }, { -INT16_C( 15228), INT16_C( 26975), -INT16_C( 1046), -INT16_C( 23368), -INT16_C( 18776), INT16_C( 9195), -INT16_C( 29914), -INT16_C( 11325) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 26031), -INT16_C( 29677), -INT16_C( 31542), INT16_C( 14503), -INT16_C( 14905), INT16_C( 14459), INT16_C( 9368), INT16_C( 811) }, { INT16_C( 32738), INT16_C( 4267), -INT16_C( 25260), -INT16_C( 2248), INT16_C( 28216), -INT16_C( 11378), -INT16_C( 27577), -INT16_C( 5540) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { INT16_C( 9154), -INT16_C( 28427), -INT16_C( 23106), INT16_C( 18504), INT16_C( 25280), INT16_C( 11538), -INT16_C( 8567), -INT16_C( 28590) }, { -INT16_C( 24013), -INT16_C( 7170), INT16_C( 16253), -INT16_C( 10764), -INT16_C( 28316), -INT16_C( 16589), INT16_C( 16883), INT16_C( 16203) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, { { -INT16_C( 29349), INT16_C( 8722), INT16_C( 19526), -INT16_C( 30567), INT16_C( 11369), INT16_C( 16990), INT16_C( 16216), INT16_C( 32707) }, { -INT16_C( 18785), -INT16_C( 22337), -INT16_C( 5106), -INT16_C( 30508), -INT16_C( 22956), -INT16_C( 20415), -INT16_C( 26160), INT16_C( 6776) }, { INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_comfalse_epi16(a, b); simde_test_x86_assert_equal_i16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i16x8(); simde__m128i r = simde_mm_comfalse_epi16(a, b); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comfalse_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int32_t a[4]; const int32_t b[4]; const int32_t r[4]; } test_vec[] = { { { INT32_C( 215100579), INT32_C( 1065517066), INT32_C( 1103670763), INT32_C( 409423789) }, { INT32_C( 215100579), INT32_C( 1065517066), INT32_C( 1103670763), INT32_C( 409423789) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 676872781), INT32_C( 1472244142), INT32_C( 1847604529), -INT32_C( 1026841466) }, { INT32_C( 362710978), -INT32_C( 1209316943), -INT32_C( 256479891), -INT32_C( 1557915857) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 724618854), -INT32_C( 1985970170), -INT32_C( 1053309405), -INT32_C( 2127729986) }, { INT32_C( 81536991), INT32_C( 918335460), INT32_C( 428995352), INT32_C( 1742970503) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 2041569809), INT32_C( 1734113470), INT32_C( 1911611985), -INT32_C( 1652231136) }, { -INT32_C( 1560899038), INT32_C( 98417482), -INT32_C( 2101164269), -INT32_C( 1383207843) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { { -INT32_C( 650311817), INT32_C( 970758525), INT32_C( 1169297636), INT32_C( 1606043987) }, { -INT32_C( 1219341644), INT32_C( 1815508108), -INT32_C( 189828729), -INT32_C( 1123546536) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 339920063), INT32_C( 1191832811), INT32_C( 115792131), INT32_C( 263549184) }, { INT32_C( 190508989), -INT32_C( 1297030319), INT32_C( 918714780), -INT32_C( 549519584) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { { -INT32_C( 232349166), -INT32_C( 657885007), INT32_C( 2096284887), -INT32_C( 178293298) }, { INT32_C( 1702162276), INT32_C( 556863042), INT32_C( 308439161), INT32_C( 1528157022) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, { { INT32_C( 995174552), INT32_C( 889538167), INT32_C( 1866422664), INT32_C( 1503550566) }, { -INT32_C( 1911890734), -INT32_C( 6095098), INT32_C( 1324795463), INT32_C( 2021101498) }, { INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_comfalse_epi32(a, b); simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i32x4(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i32x4(); simde__m128i r = simde_mm_comfalse_epi32(a, b); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comfalse_epi64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int64_t a[2]; const int64_t b[2]; const int64_t r[2]; } test_vec[] = { { { -INT64_C( 6366298885846147464), -INT64_C( 9053310093840015299) }, { -INT64_C( 6366298885846147464), -INT64_C( 9053310093840015299) }, { INT64_C( 0), INT64_C( 0) } }, { { -INT64_C( 7643525258363585233), INT64_C( 9172489781360728365) }, { INT64_C( 8399992451541847259), -INT64_C( 5079792567336525282) }, { INT64_C( 0), INT64_C( 0) } }, { { INT64_C( 3167527705361488802), -INT64_C( 5153845950562702508) }, { -INT64_C( 1967080906568107032), INT64_C( 5112346950309340756) }, { INT64_C( 0), INT64_C( 0) } }, { { INT64_C( 695738907731328339), INT64_C( 7775603355661091812) }, { INT64_C( 4751201580733526728), INT64_C( 8185989162854183943) }, { INT64_C( 0), INT64_C( 0) } }, { { INT64_C( 8568407790313302891), -INT64_C( 2269208996944795132) }, { -INT64_C( 3373138648826693214), -INT64_C( 9052155927955122850) }, { INT64_C( 0), INT64_C( 0) } }, { { -INT64_C( 4854216040627442414), INT64_C( 6858664712003037148) }, { -INT64_C( 2423726253071017569), -INT64_C( 4599253573658439550) }, { INT64_C( 0), INT64_C( 0) } }, { { INT64_C( 2430982534606457967), INT64_C( 3446570983174816405) }, { -INT64_C( 5882078299363616281), INT64_C( 8423466024813833627) }, { INT64_C( 0), INT64_C( 0) } }, { { -INT64_C( 1288368508354637909), -INT64_C( 7395581740469386409) }, { -INT64_C( 8567077577994681951), -INT64_C( 1840201770462104112) }, { INT64_C( 0), INT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_comfalse_epi64(a, b); simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i64x2(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_i64x2(); simde__m128i r = simde_mm_comfalse_epi64(a, b); simde_test_x86_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comfalse_epu8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint8_t a[16]; const uint8_t b[16]; const uint8_t r[16]; } test_vec[] = { { { UINT8_C(237), UINT8_C( 63), UINT8_C(198), UINT8_C(174), UINT8_C(171), UINT8_C( 7), UINT8_C(176), UINT8_C(106), UINT8_C( 73), UINT8_C(243), UINT8_C(211), UINT8_C(142), UINT8_C(127), UINT8_C(179), UINT8_C(175), UINT8_C(229) }, { UINT8_C(237), UINT8_C( 63), UINT8_C(198), UINT8_C(174), UINT8_C(171), UINT8_C( 7), UINT8_C(176), UINT8_C(106), UINT8_C( 73), UINT8_C(243), UINT8_C(211), UINT8_C(142), UINT8_C(127), UINT8_C(179), UINT8_C(175), UINT8_C(229) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(246), UINT8_C( 26), UINT8_C( 92), UINT8_C(122), UINT8_C(159), UINT8_C( 33), UINT8_C(123), UINT8_C(185), UINT8_C( 73), UINT8_C( 47), UINT8_C(110), UINT8_C(210), UINT8_C(115), UINT8_C(151), UINT8_C(196), UINT8_C( 94) }, { UINT8_C( 94), UINT8_C( 98), UINT8_C(247), UINT8_C( 69), UINT8_C(103), UINT8_C( 80), UINT8_C( 90), UINT8_C(223), UINT8_C(148), UINT8_C( 25), UINT8_C( 77), UINT8_C( 89), UINT8_C(109), UINT8_C(248), UINT8_C(179), UINT8_C( 91) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 13), UINT8_C(106), UINT8_C(166), UINT8_C(231), UINT8_C(120), UINT8_C( 52), UINT8_C(105), UINT8_C(133), UINT8_C( 47), UINT8_C(163), UINT8_C(157), UINT8_C(157), UINT8_C( 2), UINT8_C( 23), UINT8_C(184), UINT8_C( 36) }, { UINT8_C( 38), UINT8_C(195), UINT8_C(180), UINT8_C(122), UINT8_C(135), UINT8_C(174), UINT8_C( 4), UINT8_C(147), UINT8_C( 95), UINT8_C(253), UINT8_C(204), UINT8_C( 86), UINT8_C( 8), UINT8_C(117), UINT8_C( 81), UINT8_C( 66) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 15), UINT8_C( 60), UINT8_C(175), UINT8_C( 87), UINT8_C(137), UINT8_C(223), UINT8_C(202), UINT8_C( 52), UINT8_C(170), UINT8_C(153), UINT8_C(134), UINT8_C(125), UINT8_C(147), UINT8_C(211), UINT8_C( 59), UINT8_C(127) }, { UINT8_C(108), UINT8_C(231), UINT8_C( 99), UINT8_C( 23), UINT8_C(179), UINT8_C( 41), UINT8_C(150), UINT8_C(207), UINT8_C(211), UINT8_C( 40), UINT8_C(185), UINT8_C( 75), UINT8_C(249), UINT8_C( 49), UINT8_C(114), UINT8_C(228) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 35), UINT8_C( 21), UINT8_C(222), UINT8_C(146), UINT8_C(122), UINT8_C( 44), UINT8_C(135), UINT8_C( 15), UINT8_C(225), UINT8_C(154), UINT8_C(144), UINT8_C( 57), UINT8_C(206), UINT8_C(209), UINT8_C( 51), UINT8_C(185) }, { UINT8_C( 88), UINT8_C( 88), UINT8_C(110), UINT8_C(226), UINT8_C(146), UINT8_C(202), UINT8_C(248), UINT8_C(219), UINT8_C( 24), UINT8_C( 33), UINT8_C(122), UINT8_C( 1), UINT8_C(231), UINT8_C( 52), UINT8_C(252), UINT8_C(135) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C(114), UINT8_C(127), UINT8_C(157), UINT8_C( 95), UINT8_C(242), UINT8_C( 35), UINT8_C(136), UINT8_C( 94), UINT8_C(253), UINT8_C( 45), UINT8_C( 35), UINT8_C(155), UINT8_C( 90), UINT8_C( 27), UINT8_C(138), UINT8_C( 24) }, { UINT8_C( 20), UINT8_C(157), UINT8_C( 59), UINT8_C(164), UINT8_C(205), UINT8_C(152), UINT8_C( 19), UINT8_MAX, UINT8_C( 85), UINT8_C(112), UINT8_C(121), UINT8_C( 64), UINT8_C(123), UINT8_C(135), UINT8_C(217), UINT8_C(116) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 35), UINT8_C( 1), UINT8_C( 84), UINT8_C(136), UINT8_C(153), UINT8_C(203), UINT8_C(181), UINT8_C(105), UINT8_C( 37), UINT8_C(219), UINT8_C(167), UINT8_C(105), UINT8_C(224), UINT8_C(185), UINT8_C( 38), UINT8_C(229) }, { UINT8_C(197), UINT8_C( 62), UINT8_C( 52), UINT8_C( 36), UINT8_C( 11), UINT8_C(155), UINT8_C(205), UINT8_C(131), UINT8_C(179), UINT8_C(158), UINT8_C( 28), UINT8_C(207), UINT8_C( 93), UINT8_C( 49), UINT8_C(239), UINT8_C(243) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, { { UINT8_C( 95), UINT8_C( 36), UINT8_C(105), UINT8_C(211), UINT8_C( 24), UINT8_C( 44), UINT8_C(246), UINT8_C(119), UINT8_C(130), UINT8_C( 43), UINT8_C(132), UINT8_C(108), UINT8_C( 7), UINT8_C(177), UINT8_C(241), UINT8_C(103) }, { UINT8_C(150), UINT8_C(195), UINT8_C(191), UINT8_C( 44), UINT8_C(245), UINT8_C(220), UINT8_C( 15), UINT8_C(174), UINT8_C( 89), UINT8_C( 50), UINT8_C(205), UINT8_C(120), UINT8_C( 52), UINT8_C( 58), UINT8_C( 40), UINT8_C( 75) }, { UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0), UINT8_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_comfalse_epu8(a, b); simde_test_x86_assert_equal_u8x16(r, simde_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u8x16(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u8x16(); simde__m128i r = simde_mm_comfalse_epu8(a, b); simde_test_x86_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comfalse_epu16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint16_t a[8]; const uint16_t b[8]; const uint16_t r[8]; } test_vec[] = { { { UINT16_C(28238), UINT16_C( 2373), UINT16_C(19991), UINT16_C(53556), UINT16_C(42300), UINT16_C(27683), UINT16_C( 3448), UINT16_C(59601) }, { UINT16_C(28238), UINT16_C( 2373), UINT16_C(19991), UINT16_C(53556), UINT16_C(42300), UINT16_C(27683), UINT16_C( 3448), UINT16_C(59601) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(46509), UINT16_C(33350), UINT16_C(25139), UINT16_C(51906), UINT16_C(46191), UINT16_C( 499), UINT16_C(43944), UINT16_C(50794) }, { UINT16_C(26903), UINT16_C(62032), UINT16_C(14909), UINT16_C(49237), UINT16_C(53626), UINT16_C(12778), UINT16_C(54746), UINT16_C(44720) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(39986), UINT16_C(61232), UINT16_C(13932), UINT16_C( 7628), UINT16_C(44319), UINT16_C(13301), UINT16_C(35683), UINT16_C(43679) }, { UINT16_C(40419), UINT16_C(22002), UINT16_C(63283), UINT16_C(35395), UINT16_C(14436), UINT16_C(33346), UINT16_C( 4054), UINT16_C( 629) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(65102), UINT16_C(15333), UINT16_C(24136), UINT16_C(61463), UINT16_C(42127), UINT16_C(54845), UINT16_C(57866), UINT16_C(16558) }, { UINT16_C(37337), UINT16_C(64404), UINT16_C(36257), UINT16_C(30692), UINT16_C(24867), UINT16_C(10388), UINT16_C(50452), UINT16_C(11272) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(25896), UINT16_C(11981), UINT16_C(58481), UINT16_C(35465), UINT16_C( 8679), UINT16_C(45618), UINT16_C(47432), UINT16_C(53375) }, { UINT16_C(52514), UINT16_C(44444), UINT16_C( 1325), UINT16_C(53538), UINT16_C(54495), UINT16_C(60486), UINT16_C(65083), UINT16_C(29777) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(22762), UINT16_C(36944), UINT16_C(53132), UINT16_C(13580), UINT16_C(44112), UINT16_C(36412), UINT16_C( 6082), UINT16_C(41722) }, { UINT16_C(55781), UINT16_C(12660), UINT16_C(26242), UINT16_C(57060), UINT16_C( 6847), UINT16_C(38082), UINT16_C(50163), UINT16_C( 8759) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C(24761), UINT16_C(10454), UINT16_C(10308), UINT16_C(14215), UINT16_C(52978), UINT16_C(13507), UINT16_C( 1059), UINT16_C(64775) }, { UINT16_C(15947), UINT16_C(20354), UINT16_C(47174), UINT16_C(59409), UINT16_C(47852), UINT16_C(59502), UINT16_C( 7142), UINT16_C(32165) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, { { UINT16_C( 1216), UINT16_C(49095), UINT16_C(63038), UINT16_C(55777), UINT16_C( 3828), UINT16_C(27183), UINT16_C(34833), UINT16_C(11150) }, { UINT16_C(33660), UINT16_C(53552), UINT16_C( 802), UINT16_C(13715), UINT16_C(15502), UINT16_C(45748), UINT16_C( 3770), UINT16_C(52864) }, { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_comfalse_epu16(a, b); simde_test_x86_assert_equal_u16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u16x8(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u16x8(); simde__m128i r = simde_mm_comfalse_epu16(a, b); simde_test_x86_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comfalse_epu32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint32_t a[4]; const uint32_t b[4]; const uint32_t r[4]; } test_vec[] = { { { UINT32_C( 495701028), UINT32_C(1661223205), UINT32_C(4192859518), UINT32_C(1920445492) }, { UINT32_C( 495701028), UINT32_C(1661223205), UINT32_C(4192859518), UINT32_C(1920445492) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(2112172191), UINT32_C( 223432893), UINT32_C(3103467980), UINT32_C(1571922967) }, { UINT32_C( 176832528), UINT32_C( 500634271), UINT32_C(2841053882), UINT32_C( 464156470) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C( 487116252), UINT32_C(3123947200), UINT32_C(3282700494), UINT32_C(1914758822) }, { UINT32_C(1311566250), UINT32_C(1312976980), UINT32_C(1105384910), UINT32_C(1829761724) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(1996743516), UINT32_C(2199981778), UINT32_C(2603311549), UINT32_C(1437981454) }, { UINT32_C(2967096858), UINT32_C(1076716526), UINT32_C(2347949794), UINT32_C(2995725169) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(1413347655), UINT32_C(2936246172), UINT32_C( 149274049), UINT32_C(1297714168) }, { UINT32_C(4210751369), UINT32_C( 964752146), UINT32_C(1307618847), UINT32_C( 806360314) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(2065602757), UINT32_C(2277585349), UINT32_C(3527451137), UINT32_C(2767388428) }, { UINT32_C(4076236063), UINT32_C(2183342442), UINT32_C(1346555052), UINT32_C( 830196994) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(3054389244), UINT32_C(1397030134), UINT32_C(3232013222), UINT32_C(2691251953) }, { UINT32_C(1630811907), UINT32_C(1677659294), UINT32_C(1548927154), UINT32_C(4223226927) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, { { UINT32_C(3413478935), UINT32_C(1534261719), UINT32_C(2625187287), UINT32_C(2326220880) }, { UINT32_C( 236903774), UINT32_C( 603628630), UINT32_C( 965153625), UINT32_C(3601969961) }, { UINT32_C( 0), UINT32_C( 0), UINT32_C( 0), UINT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_comfalse_epu32(a, b); simde_test_x86_assert_equal_u32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u32x4(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u32x4(); simde__m128i r = simde_mm_comfalse_epu32(a, b); simde_test_x86_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_comfalse_epu64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint64_t a[2]; const uint64_t b[2]; const uint64_t r[2]; } test_vec[] = { { { UINT64_C(16587935071193527612), UINT64_C(12291236871839146685) }, { UINT64_C(16587935071193527612), UINT64_C(12291236871839146685) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C( 936179423556331607), UINT64_C( 885787018599615688) }, { UINT64_C( 4428066784809584020), UINT64_C( 5121735281528422783) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C( 7273618990605459223), UINT64_C(16460119146588271400) }, { UINT64_C(12183757784063755590), UINT64_C(12255644603997213255) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C( 8411877623334783685), UINT64_C(11890677820861681824) }, { UINT64_C( 8071428364446482042), UINT64_C( 2052088554210205242) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C( 9532074734755492490), UINT64_C(11021214541104852057) }, { UINT64_C(15719502767937189466), UINT64_C(16370063756630862978) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C(15887387323360124302), UINT64_C( 370777384618741371) }, { UINT64_C( 3559445977505232651), UINT64_C( 5122796480762872903) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C(14069199249831139064), UINT64_C( 3783401529332825901) }, { UINT64_C(13522030736708520376), UINT64_C(10529289842004242352) }, { UINT64_C( 0), UINT64_C( 0) } }, { { UINT64_C( 9474241958379987696), UINT64_C( 7849626402426275479) }, { UINT64_C(14039565349656785543), UINT64_C( 733341401437713638) }, { UINT64_C( 0), UINT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_comfalse_epu64(a, b); simde_test_x86_assert_equal_u64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u64x2(); simde__m128i b = (i == 0) ? a : simde_test_x86_random_u64x2(); simde__m128i r = simde_mm_comfalse_epu64(a, b); simde_test_x86_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_frcz_ps (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde_float32 a[4]; const simde_float32 r[4]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 12.42), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -45.81) }, { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 0.42), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -0.81) } }, { { SIMDE_FLOAT32_C( -0.00), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) }, { SIMDE_FLOAT32_C( -0.00), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, #endif { { SIMDE_FLOAT32_C( -267.43), SIMDE_FLOAT32_C( 697.99), SIMDE_FLOAT32_C( 720.88), SIMDE_FLOAT32_C( 488.02) }, { SIMDE_FLOAT32_C( -0.43), SIMDE_FLOAT32_C( 0.99), SIMDE_FLOAT32_C( 0.88), SIMDE_FLOAT32_C( 0.02) } }, { { SIMDE_FLOAT32_C( -83.96), SIMDE_FLOAT32_C( 757.62), SIMDE_FLOAT32_C( 563.16), SIMDE_FLOAT32_C( -657.64) }, { SIMDE_FLOAT32_C( -0.96), SIMDE_FLOAT32_C( 0.62), SIMDE_FLOAT32_C( 0.16), SIMDE_FLOAT32_C( -0.64) } }, { { SIMDE_FLOAT32_C( -248.94), SIMDE_FLOAT32_C( -920.22), SIMDE_FLOAT32_C( 216.77), SIMDE_FLOAT32_C( 216.41) }, { SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( -0.22), SIMDE_FLOAT32_C( 0.77), SIMDE_FLOAT32_C( 0.41) } }, { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) }, { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 r = simde_mm_frcz_ps(a); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); simde_float32 values[8 * 1 * sizeof(simde__m128)]; simde_test_x86_random_f32x4_full(8, 1, values, -1000.0f, 1000.0f, SIMDE_TEST_VEC_FLOAT_NAN); for (size_t i = 0 ; i < 8 ; i++) { simde__m128 a = simde_test_x86_random_extract_f32x4(i, 2, 0, values); simde__m128 r = simde_mm_frcz_ps(a); simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_frcz_pd (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde_float64 a[2]; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 721.61), SIMDE_FLOAT64_C( -998.54) }, { SIMDE_FLOAT64_C( 0.61), SIMDE_FLOAT64_C( -0.54) } }, { { SIMDE_FLOAT64_C( 303.63), SIMDE_FLOAT64_C( -654.53) }, { SIMDE_FLOAT64_C( 0.63), SIMDE_FLOAT64_C( -0.53) } }, { { SIMDE_FLOAT64_C( 252.97), SIMDE_FLOAT64_C( -202.67) }, { SIMDE_FLOAT64_C( 0.97), SIMDE_FLOAT64_C( -0.67) } }, { { SIMDE_FLOAT64_C( 265.30), SIMDE_FLOAT64_C( 854.12) }, { SIMDE_FLOAT64_C( 0.30), SIMDE_FLOAT64_C( 0.12) } }, { { SIMDE_FLOAT64_C( 222.08), SIMDE_FLOAT64_C( 378.34) }, { SIMDE_FLOAT64_C( 0.08), SIMDE_FLOAT64_C( 0.34) } }, { { SIMDE_FLOAT64_C( -860.59), SIMDE_FLOAT64_C( 134.25) }, { SIMDE_FLOAT64_C( -0.59), SIMDE_FLOAT64_C( 0.25) } }, { { SIMDE_FLOAT64_C( -587.76), SIMDE_FLOAT64_C( -499.31) }, { SIMDE_FLOAT64_C( -0.76), SIMDE_FLOAT64_C( -0.31) } }, { { SIMDE_FLOAT64_C( 337.99), SIMDE_FLOAT64_C( 907.90) }, { SIMDE_FLOAT64_C( 0.99), SIMDE_FLOAT64_C( 0.90) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d r = simde_mm_frcz_pd(a); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128d a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m128d r = simde_mm_frcz_pd(a); simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_frcz_ss (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde_float32 a[4]; const simde_float32 b[4]; const simde_float32 r[4]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 905.27), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -960.75) }, { SIMDE_FLOAT32_C( 948.61), SIMDE_MATH_NANF, SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -509.69) }, { SIMDE_FLOAT32_C( 0.61), SIMDE_FLOAT32_C( 905.27), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -960.75) } }, #endif { { SIMDE_FLOAT32_C( -447.80), SIMDE_FLOAT32_C( 686.39), SIMDE_FLOAT32_C( -727.16), SIMDE_FLOAT32_C( 702.02) }, { SIMDE_FLOAT32_C( 382.92), SIMDE_FLOAT32_C( -167.09), SIMDE_FLOAT32_C( -187.72), SIMDE_FLOAT32_C( 463.24) }, { SIMDE_FLOAT32_C( 0.92), SIMDE_FLOAT32_C( 686.39), SIMDE_FLOAT32_C( -727.16), SIMDE_FLOAT32_C( 702.02) } }, { { SIMDE_FLOAT32_C( -988.16), SIMDE_FLOAT32_C( 55.76), SIMDE_FLOAT32_C( -728.63), SIMDE_FLOAT32_C( 331.46) }, { SIMDE_FLOAT32_C( 783.44), SIMDE_FLOAT32_C( 272.32), SIMDE_FLOAT32_C( -952.27), SIMDE_FLOAT32_C( 666.01) }, { SIMDE_FLOAT32_C( 0.44), SIMDE_FLOAT32_C( 55.76), SIMDE_FLOAT32_C( -728.63), SIMDE_FLOAT32_C( 331.46) } }, { { SIMDE_FLOAT32_C( -35.49), SIMDE_FLOAT32_C( 647.51), SIMDE_FLOAT32_C( 748.77), SIMDE_FLOAT32_C( -662.83) }, { SIMDE_FLOAT32_C( -133.03), SIMDE_FLOAT32_C( -522.75), SIMDE_FLOAT32_C( -207.62), SIMDE_FLOAT32_C( 173.86) }, { SIMDE_FLOAT32_C( -0.03), SIMDE_FLOAT32_C( 647.51), SIMDE_FLOAT32_C( 748.77), SIMDE_FLOAT32_C( -662.83) } }, { { SIMDE_FLOAT32_C( -244.91), SIMDE_FLOAT32_C( -284.10), SIMDE_FLOAT32_C( 74.56), SIMDE_FLOAT32_C( 211.89) }, { SIMDE_FLOAT32_C( -649.34), SIMDE_FLOAT32_C( 710.75), SIMDE_FLOAT32_C( -984.68), SIMDE_FLOAT32_C( -194.62) }, { SIMDE_FLOAT32_C( -0.34), SIMDE_FLOAT32_C( -284.10), SIMDE_FLOAT32_C( 74.56), SIMDE_FLOAT32_C( 211.89) } }, { { SIMDE_FLOAT32_C( 748.28), SIMDE_FLOAT32_C( -293.86), SIMDE_FLOAT32_C( -703.67), SIMDE_FLOAT32_C( 664.42) }, { SIMDE_FLOAT32_C( 553.33), SIMDE_FLOAT32_C( 680.04), SIMDE_FLOAT32_C( 602.34), SIMDE_FLOAT32_C( -119.66) }, { SIMDE_FLOAT32_C( 0.33), SIMDE_FLOAT32_C( -293.86), SIMDE_FLOAT32_C( -703.67), SIMDE_FLOAT32_C( 664.42) } }, { { SIMDE_FLOAT32_C( 55.02), SIMDE_FLOAT32_C( -925.84), SIMDE_FLOAT32_C( -321.21), SIMDE_FLOAT32_C( -997.62) }, { SIMDE_FLOAT32_C( 247.23), SIMDE_FLOAT32_C( -17.61), SIMDE_FLOAT32_C( 19.20), SIMDE_FLOAT32_C( 227.45) }, { SIMDE_FLOAT32_C( 0.23), SIMDE_FLOAT32_C( -925.84), SIMDE_FLOAT32_C( -321.21), SIMDE_FLOAT32_C( -997.62) } }, { { SIMDE_FLOAT32_C( -490.77), SIMDE_FLOAT32_C( 357.46), SIMDE_FLOAT32_C( -507.37), SIMDE_FLOAT32_C( 765.25) }, { SIMDE_FLOAT32_C( 759.58), SIMDE_FLOAT32_C( 800.53), SIMDE_FLOAT32_C( -773.67), SIMDE_FLOAT32_C( -557.60) }, { SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 357.46), SIMDE_FLOAT32_C( -507.37), SIMDE_FLOAT32_C( 765.25) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 b = simde_mm_loadu_ps(test_vec[i].b); simde__m128 r = simde_mm_frcz_ss(a, b); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); simde_float32 values[8 * 2 * sizeof(simde__m128)]; simde_test_x86_random_f32x4_full(8, 2, values, -1000.0f, 1000.0f, SIMDE_TEST_VEC_FLOAT_NAN); for (size_t i = 0 ; i < 8 ; i++) { simde__m128 a = simde_test_x86_random_extract_f32x4(i, 2, 0, values); simde__m128 b = simde_test_x86_random_extract_f32x4(i, 2, 1, values); simde__m128 r = simde_mm_frcz_ss(a, b); simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_frcz_sd (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde_float64 a[2]; const simde_float64 b[2]; const simde_float64 r[2]; } test_vec[] = { #if !defined(SIMDE_FAST_NANS) { { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( -505.11) }, { SIMDE_FLOAT64_C( 53.80), SIMDE_MATH_NAN }, { SIMDE_FLOAT64_C( 0.80), SIMDE_FLOAT64_C( -505.11) } }, { { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( -152.81) }, { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( -604.42) }, { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( -152.81) } }, #endif { { SIMDE_FLOAT64_C( -580.37), SIMDE_FLOAT64_C( 397.44) }, { SIMDE_FLOAT64_C( 12.30), SIMDE_FLOAT64_C( -571.64) }, { SIMDE_FLOAT64_C( 0.30), SIMDE_FLOAT64_C( 397.44) } }, { { SIMDE_FLOAT64_C( -317.73), SIMDE_FLOAT64_C( 561.51) }, { SIMDE_FLOAT64_C( 558.76), SIMDE_FLOAT64_C( 989.68) }, { SIMDE_FLOAT64_C( 0.76), SIMDE_FLOAT64_C( 561.51) } }, { { SIMDE_FLOAT64_C( 447.19), SIMDE_FLOAT64_C( 768.18) }, { SIMDE_FLOAT64_C( -108.37), SIMDE_FLOAT64_C( 761.16) }, { SIMDE_FLOAT64_C( -0.37), SIMDE_FLOAT64_C( 768.18) } }, { { SIMDE_FLOAT64_C( 935.85), SIMDE_FLOAT64_C( 553.27) }, { SIMDE_FLOAT64_C( 803.16), SIMDE_FLOAT64_C( -832.27) }, { SIMDE_FLOAT64_C( 0.16), SIMDE_FLOAT64_C( 553.27) } }, { { SIMDE_FLOAT64_C( 468.06), SIMDE_FLOAT64_C( -827.20) }, { SIMDE_FLOAT64_C( 877.56), SIMDE_FLOAT64_C( 399.46) }, { SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( -827.20) } }, { { SIMDE_FLOAT64_C( 647.08), SIMDE_FLOAT64_C( 298.93) }, { SIMDE_FLOAT64_C( 654.10), SIMDE_FLOAT64_C( -828.97) }, { SIMDE_FLOAT64_C( 0.10), SIMDE_FLOAT64_C( 298.93) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d b = simde_mm_loadu_pd(test_vec[i].b); simde__m128d r = simde_mm_frcz_sd(a, b); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); simde_float64 values[8 * 2 * sizeof(simde__m128d)]; simde_test_x86_random_f64x2_full(8, 2, values, -1000.0, 1000.0, SIMDE_TEST_VEC_FLOAT_NAN); for (size_t i = 0 ; i < 8 ; i++) { simde__m128d a = simde_test_x86_random_extract_f64x2(i, 2, 0, values); simde__m128d b = simde_test_x86_random_extract_f64x2(i, 2, 1, values); simde__m128d r = simde_mm_frcz_sd(a, b); simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_frcz_ps (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde_float32 a[8]; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( -670.89), SIMDE_FLOAT32_C( -640.19), SIMDE_FLOAT32_C( -142.80), SIMDE_FLOAT32_C( -716.79), SIMDE_FLOAT32_C( -808.65), SIMDE_FLOAT32_C( -913.21), SIMDE_FLOAT32_C( -995.30), SIMDE_FLOAT32_C( 46.54) }, { SIMDE_FLOAT32_C( -0.89), SIMDE_FLOAT32_C( -0.19), SIMDE_FLOAT32_C( -0.80), SIMDE_FLOAT32_C( -0.79), SIMDE_FLOAT32_C( -0.65), SIMDE_FLOAT32_C( -0.21), SIMDE_FLOAT32_C( -0.30), SIMDE_FLOAT32_C( 0.54) } }, { { SIMDE_FLOAT32_C( 709.52), SIMDE_FLOAT32_C( -168.92), SIMDE_FLOAT32_C( 708.30), SIMDE_FLOAT32_C( -368.88), SIMDE_FLOAT32_C( 358.26), SIMDE_FLOAT32_C( 212.20), SIMDE_FLOAT32_C( -23.04), SIMDE_FLOAT32_C( -594.96) }, { SIMDE_FLOAT32_C( 0.52), SIMDE_FLOAT32_C( -0.92), SIMDE_FLOAT32_C( 0.30), SIMDE_FLOAT32_C( -0.88), SIMDE_FLOAT32_C( 0.26), SIMDE_FLOAT32_C( 0.20), SIMDE_FLOAT32_C( -0.04), SIMDE_FLOAT32_C( -0.96) } }, { { SIMDE_FLOAT32_C( -152.93), SIMDE_FLOAT32_C( 661.67), SIMDE_FLOAT32_C( -162.02), SIMDE_FLOAT32_C( 323.10), SIMDE_FLOAT32_C( -796.75), SIMDE_FLOAT32_C( -577.32), SIMDE_FLOAT32_C( -414.41), SIMDE_FLOAT32_C( -571.15) }, { SIMDE_FLOAT32_C( -0.93), SIMDE_FLOAT32_C( 0.67), SIMDE_FLOAT32_C( -0.02), SIMDE_FLOAT32_C( 0.10), SIMDE_FLOAT32_C( -0.75), SIMDE_FLOAT32_C( -0.32), SIMDE_FLOAT32_C( -0.41), SIMDE_FLOAT32_C( -0.15) } }, { { SIMDE_FLOAT32_C( -484.73), SIMDE_FLOAT32_C( -429.49), SIMDE_FLOAT32_C( -27.86), SIMDE_FLOAT32_C( -466.84), SIMDE_FLOAT32_C( 149.21), SIMDE_FLOAT32_C( 613.76), SIMDE_FLOAT32_C( 652.27), SIMDE_FLOAT32_C( 129.73) }, { SIMDE_FLOAT32_C( -0.73), SIMDE_FLOAT32_C( -0.49), SIMDE_FLOAT32_C( -0.86), SIMDE_FLOAT32_C( -0.84), SIMDE_FLOAT32_C( 0.21), SIMDE_FLOAT32_C( 0.76), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( 0.73) } }, { { SIMDE_FLOAT32_C( 249.43), SIMDE_FLOAT32_C( -759.09), SIMDE_FLOAT32_C( 194.25), SIMDE_FLOAT32_C( 612.66), SIMDE_FLOAT32_C( -662.04), SIMDE_FLOAT32_C( 773.80), SIMDE_FLOAT32_C( -30.49), SIMDE_FLOAT32_C( -143.59) }, { SIMDE_FLOAT32_C( 0.43), SIMDE_FLOAT32_C( -0.09), SIMDE_FLOAT32_C( 0.25), SIMDE_FLOAT32_C( 0.66), SIMDE_FLOAT32_C( -0.04), SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( -0.49), SIMDE_FLOAT32_C( -0.59) } }, { { SIMDE_FLOAT32_C( 823.36), SIMDE_FLOAT32_C( -109.71), SIMDE_FLOAT32_C( 234.41), SIMDE_FLOAT32_C( -777.64), SIMDE_FLOAT32_C( -767.94), SIMDE_FLOAT32_C( 179.54), SIMDE_FLOAT32_C( -285.07), SIMDE_FLOAT32_C( -947.33) }, { SIMDE_FLOAT32_C( 0.36), SIMDE_FLOAT32_C( -0.71), SIMDE_FLOAT32_C( 0.41), SIMDE_FLOAT32_C( -0.64), SIMDE_FLOAT32_C( -0.94), SIMDE_FLOAT32_C( 0.54), SIMDE_FLOAT32_C( -0.07), SIMDE_FLOAT32_C( -0.33) } }, { { SIMDE_FLOAT32_C( 975.95), SIMDE_FLOAT32_C( -909.05), SIMDE_FLOAT32_C( -991.33), SIMDE_FLOAT32_C( -284.04), SIMDE_FLOAT32_C( -533.98), SIMDE_FLOAT32_C( -158.67), SIMDE_FLOAT32_C( -884.82), SIMDE_FLOAT32_C( -734.86) }, { SIMDE_FLOAT32_C( 0.95), SIMDE_FLOAT32_C( -0.05), SIMDE_FLOAT32_C( -0.33), SIMDE_FLOAT32_C( -0.04), SIMDE_FLOAT32_C( -0.98), SIMDE_FLOAT32_C( -0.67), SIMDE_FLOAT32_C( -0.82), SIMDE_FLOAT32_C( -0.86) } }, { { SIMDE_FLOAT32_C( 387.80), SIMDE_FLOAT32_C( 840.27), SIMDE_FLOAT32_C( -592.09), SIMDE_FLOAT32_C( 772.45), SIMDE_FLOAT32_C( -51.97), SIMDE_FLOAT32_C( 198.58), SIMDE_FLOAT32_C( 360.09), SIMDE_FLOAT32_C( -598.99) }, { SIMDE_FLOAT32_C( 0.80), SIMDE_FLOAT32_C( 0.27), SIMDE_FLOAT32_C( -0.09), SIMDE_FLOAT32_C( 0.45), SIMDE_FLOAT32_C( -0.97), SIMDE_FLOAT32_C( 0.58), SIMDE_FLOAT32_C( 0.09), SIMDE_FLOAT32_C( -0.99) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 r = simde_mm256_frcz_ps(a); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256 a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m256 r = simde_mm256_frcz_ps(a); simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f32x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_frcz_pd (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde_float64 a[4]; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( -92.32), SIMDE_FLOAT64_C( -119.17), SIMDE_FLOAT64_C( -663.14), SIMDE_FLOAT64_C( 279.09) }, { SIMDE_FLOAT64_C( -0.32), SIMDE_FLOAT64_C( -0.17), SIMDE_FLOAT64_C( -0.14), SIMDE_FLOAT64_C( 0.09) } }, { { SIMDE_FLOAT64_C( 211.65), SIMDE_FLOAT64_C( -935.18), SIMDE_FLOAT64_C( 766.59), SIMDE_FLOAT64_C( -764.15) }, { SIMDE_FLOAT64_C( 0.65), SIMDE_FLOAT64_C( -0.18), SIMDE_FLOAT64_C( 0.59), SIMDE_FLOAT64_C( -0.15) } }, { { SIMDE_FLOAT64_C( -970.76), SIMDE_FLOAT64_C( -729.00), SIMDE_FLOAT64_C( -996.77), SIMDE_FLOAT64_C( -535.75) }, { SIMDE_FLOAT64_C( -0.76), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -0.77), SIMDE_FLOAT64_C( -0.75) } }, { { SIMDE_FLOAT64_C( -982.91), SIMDE_FLOAT64_C( 823.36), SIMDE_FLOAT64_C( -807.06), SIMDE_FLOAT64_C( 171.42) }, { SIMDE_FLOAT64_C( -0.91), SIMDE_FLOAT64_C( 0.36), SIMDE_FLOAT64_C( -0.06), SIMDE_FLOAT64_C( 0.42) } }, { { SIMDE_FLOAT64_C( -382.67), SIMDE_FLOAT64_C( -794.85), SIMDE_FLOAT64_C( 456.89), SIMDE_FLOAT64_C( 280.19) }, { SIMDE_FLOAT64_C( -0.67), SIMDE_FLOAT64_C( -0.85), SIMDE_FLOAT64_C( 0.89), SIMDE_FLOAT64_C( 0.19) } }, { { SIMDE_FLOAT64_C( 203.16), SIMDE_FLOAT64_C( 872.13), SIMDE_FLOAT64_C( 702.87), SIMDE_FLOAT64_C( -756.46) }, { SIMDE_FLOAT64_C( 0.16), SIMDE_FLOAT64_C( 0.13), SIMDE_FLOAT64_C( 0.87), SIMDE_FLOAT64_C( -0.46) } }, { { SIMDE_FLOAT64_C( -115.21), SIMDE_FLOAT64_C( -916.01), SIMDE_FLOAT64_C( -621.39), SIMDE_FLOAT64_C( 35.68) }, { SIMDE_FLOAT64_C( -0.21), SIMDE_FLOAT64_C( -0.01), SIMDE_FLOAT64_C( -0.39), SIMDE_FLOAT64_C( 0.68) } }, { { SIMDE_FLOAT64_C( 788.26), SIMDE_FLOAT64_C( -680.53), SIMDE_FLOAT64_C( -520.92), SIMDE_FLOAT64_C( 379.99) }, { SIMDE_FLOAT64_C( 0.26), SIMDE_FLOAT64_C( -0.53), SIMDE_FLOAT64_C( -0.92), SIMDE_FLOAT64_C( 0.99) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d r = simde_mm256_frcz_pd(a); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256d a = simde_test_x86_random_f64x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256d r = simde_mm256_frcz_pd(a); simde_test_x86_write_f64x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f64x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_haddw_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t a[16]; const int16_t r[8]; } test_vec[] = { { { -INT8_C( 101), INT8_C( 107), -INT8_C( 89), -INT8_C( 33), -INT8_C( 91), INT8_C( 120), INT8_C( 62), INT8_C( 29), INT8_C( 25), -INT8_C( 49), INT8_C( 91), -INT8_C( 98), INT8_C( 80), INT8_C( 56), INT8_C( 56), -INT8_C( 1) }, { INT16_C( 6), -INT16_C( 122), INT16_C( 29), INT16_C( 91), -INT16_C( 24), -INT16_C( 7), INT16_C( 136), INT16_C( 55) } }, { { -INT8_C( 122), -INT8_C( 52), INT8_C( 24), -INT8_C( 56), INT8_C( 48), -INT8_C( 3), INT8_C( 55), -INT8_C( 108), -INT8_C( 113), -INT8_C( 41), INT8_C( 1), INT8_C( 114), -INT8_C( 122), -INT8_C( 122), -INT8_C( 126), -INT8_C( 118) }, { -INT16_C( 174), -INT16_C( 32), INT16_C( 45), -INT16_C( 53), -INT16_C( 154), INT16_C( 115), -INT16_C( 244), -INT16_C( 244) } }, { { -INT8_C( 87), INT8_C( 23), INT8_C( 30), INT8_C( 4), -INT8_C( 106), -INT8_C( 125), -INT8_C( 126), INT8_C( 37), -INT8_C( 120), -INT8_C( 23), -INT8_C( 38), -INT8_C( 69), INT8_C( 41), INT8_C( 27), INT8_C( 68), -INT8_C( 68) }, { -INT16_C( 64), INT16_C( 34), -INT16_C( 231), -INT16_C( 89), -INT16_C( 143), -INT16_C( 107), INT16_C( 68), INT16_C( 0) } }, { { -INT8_C( 88), INT8_C( 92), -INT8_C( 123), INT8_C( 42), INT8_C( 12), INT8_C( 104), -INT8_C( 4), INT8_C( 57), -INT8_C( 56), -INT8_C( 75), -INT8_C( 44), -INT8_C( 76), -INT8_C( 113), -INT8_C( 7), INT8_C( 125), -INT8_C( 99) }, { INT16_C( 4), -INT16_C( 81), INT16_C( 116), INT16_C( 53), -INT16_C( 131), -INT16_C( 120), -INT16_C( 120), INT16_C( 26) } }, { { INT8_C( 105), -INT8_C( 20), INT8_C( 92), INT8_C( 20), INT8_C( 7), INT8_C( 79), -INT8_C( 63), INT8_C( 121), INT8_C( 83), INT8_C( 45), INT8_C( 26), -INT8_C( 43), INT8_C( 76), INT8_C( 97), INT8_C( 103), INT8_C( 118) }, { INT16_C( 85), INT16_C( 112), INT16_C( 86), INT16_C( 58), INT16_C( 128), -INT16_C( 17), INT16_C( 173), INT16_C( 221) } }, { { INT8_C( 17), INT8_C( 88), -INT8_C( 18), -INT8_C( 38), INT8_C( 59), INT8_C( 24), INT8_C( 48), -INT8_C( 49), INT8_C( 112), -INT8_C( 127), INT8_C( 27), -INT8_C( 41), INT8_C( 54), -INT8_C( 45), -INT8_C( 127), -INT8_C( 47) }, { INT16_C( 105), -INT16_C( 56), INT16_C( 83), -INT16_C( 1), -INT16_C( 15), -INT16_C( 14), INT16_C( 9), -INT16_C( 174) } }, { { INT8_C( 4), INT8_C( 113), -INT8_C( 55), -INT8_C( 42), -INT8_C( 97), -INT8_C( 27), -INT8_C( 27), INT8_C( 98), -INT8_C( 92), INT8_C( 35), -INT8_C( 124), -INT8_C( 76), INT8_C( 96), INT8_C( 18), -INT8_C( 120), INT8_C( 119) }, { INT16_C( 117), -INT16_C( 97), -INT16_C( 124), INT16_C( 71), -INT16_C( 57), -INT16_C( 200), INT16_C( 114), -INT16_C( 1) } }, { { -INT8_C( 24), INT8_C( 72), -INT8_C( 70), -INT8_C( 95), INT8_C( 102), INT8_C( 21), -INT8_C( 68), -INT8_C( 100), -INT8_C( 78), -INT8_C( 60), INT8_C( 64), -INT8_C( 92), INT8_C( 34), INT8_C( 29), INT8_C( 120), INT8_C( 112) }, { INT16_C( 48), -INT16_C( 165), INT16_C( 123), -INT16_C( 168), -INT16_C( 138), -INT16_C( 28), INT16_C( 63), INT16_C( 232) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i r = simde_mm_haddw_epi8(a); simde_test_x86_assert_equal_i16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i8x16(); simde__m128i r = simde_mm_haddw_epi8(a); simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_haddw_epu8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint8_t a[16]; const uint16_t r[8]; } test_vec[] = { { { UINT8_C(162), UINT8_C( 47), UINT8_C(205), UINT8_C( 20), UINT8_C( 7), UINT8_C( 76), UINT8_C(212), UINT8_C( 38), UINT8_C(160), UINT8_C( 84), UINT8_C(126), UINT8_C( 31), UINT8_C( 14), UINT8_C( 55), UINT8_C(143), UINT8_C( 5) }, { UINT16_C( 209), UINT16_C( 225), UINT16_C( 83), UINT16_C( 250), UINT16_C( 244), UINT16_C( 157), UINT16_C( 69), UINT16_C( 148) } }, { { UINT8_C( 86), UINT8_C(181), UINT8_C( 81), UINT8_C( 71), UINT8_C( 53), UINT8_C(104), UINT8_C(137), UINT8_C(233), UINT8_C(180), UINT8_C( 5), UINT8_C(170), UINT8_C(224), UINT8_C(251), UINT8_C(223), UINT8_C( 73), UINT8_C(192) }, { UINT16_C( 267), UINT16_C( 152), UINT16_C( 157), UINT16_C( 370), UINT16_C( 185), UINT16_C( 394), UINT16_C( 474), UINT16_C( 265) } }, { { UINT8_C(106), UINT8_C(172), UINT8_C(209), UINT8_C(148), UINT8_C(230), UINT8_C(140), UINT8_C(120), UINT8_C( 13), UINT8_C(113), UINT8_C( 71), UINT8_C(113), UINT8_C(224), UINT8_C(254), UINT8_C(214), UINT8_C( 99), UINT8_C(105) }, { UINT16_C( 278), UINT16_C( 357), UINT16_C( 370), UINT16_C( 133), UINT16_C( 184), UINT16_C( 337), UINT16_C( 468), UINT16_C( 204) } }, { { UINT8_C(131), UINT8_C( 38), UINT8_C( 27), UINT8_C(148), UINT8_C( 79), UINT8_C( 24), UINT8_C(126), UINT8_C(253), UINT8_C(158), UINT8_C(204), UINT8_C(193), UINT8_C( 86), UINT8_C(107), UINT8_C( 31), UINT8_C(220), UINT8_C( 10) }, { UINT16_C( 169), UINT16_C( 175), UINT16_C( 103), UINT16_C( 379), UINT16_C( 362), UINT16_C( 279), UINT16_C( 138), UINT16_C( 230) } }, { { UINT8_C(133), UINT8_C(114), UINT8_C( 59), UINT8_C( 31), UINT8_C(229), UINT8_C(174), UINT8_C(183), UINT8_C( 97), UINT8_C( 63), UINT8_C(132), UINT8_C(198), UINT8_C(189), UINT8_C(215), UINT8_C(250), UINT8_C(238), UINT8_C(235) }, { UINT16_C( 247), UINT16_C( 90), UINT16_C( 403), UINT16_C( 280), UINT16_C( 195), UINT16_C( 387), UINT16_C( 465), UINT16_C( 473) } }, { { UINT8_C(150), UINT8_C( 35), UINT8_C(128), UINT8_C( 80), UINT8_C( 93), UINT8_C( 45), UINT8_C(130), UINT8_C( 34), UINT8_C(153), UINT8_C(162), UINT8_C(237), UINT8_C(205), UINT8_C( 24), UINT8_C(231), UINT8_C( 25), UINT8_C(149) }, { UINT16_C( 185), UINT16_C( 208), UINT16_C( 138), UINT16_C( 164), UINT16_C( 315), UINT16_C( 442), UINT16_C( 255), UINT16_C( 174) } }, { { UINT8_C( 27), UINT8_C( 9), UINT8_C(117), UINT8_C(125), UINT8_C(172), UINT8_C(184), UINT8_C(123), UINT8_C(104), UINT8_C( 49), UINT8_C(148), UINT8_C(227), UINT8_C(127), UINT8_C( 66), UINT8_C(168), UINT8_C( 23), UINT8_C(210) }, { UINT16_C( 36), UINT16_C( 242), UINT16_C( 356), UINT16_C( 227), UINT16_C( 197), UINT16_C( 354), UINT16_C( 234), UINT16_C( 233) } }, { { UINT8_C(184), UINT8_C( 52), UINT8_C(232), UINT8_C( 66), UINT8_C( 6), UINT8_C(176), UINT8_C(127), UINT8_C(158), UINT8_C(205), UINT8_C( 13), UINT8_C(150), UINT8_C( 12), UINT8_C(170), UINT8_C( 62), UINT8_C(232), UINT8_C(171) }, { UINT16_C( 236), UINT16_C( 298), UINT16_C( 182), UINT16_C( 285), UINT16_C( 218), UINT16_C( 162), UINT16_C( 232), UINT16_C( 403) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i r = simde_mm_haddw_epu8(a); simde_test_x86_assert_equal_u16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u8x16(); simde__m128i r = simde_mm_haddw_epu8(a); simde_test_x86_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_haddd_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t a[16]; const int32_t r[4]; } test_vec[] = { { { INT8_C( 83), -INT8_C( 9), -INT8_C( 27), INT8_C( 118), -INT8_C( 30), -INT8_C( 75), -INT8_C( 85), INT8_C( 108), INT8_C( 112), -INT8_C( 2), INT8_C( 50), -INT8_C( 18), -INT8_C( 26), -INT8_C( 22), -INT8_C( 58), INT8_C( 104) }, { INT32_C( 165), -INT32_C( 82), INT32_C( 142), -INT32_C( 2) } }, { { INT8_C( 16), -INT8_C( 30), -INT8_C( 69), INT8_C( 51), -INT8_C( 13), -INT8_C( 88), INT8_C( 92), -INT8_C( 70), INT8_C( 97), -INT8_C( 105), INT8_C( 36), -INT8_C( 36), -INT8_C( 54), INT8_C( 45), INT8_C( 49), -INT8_C( 110) }, { -INT32_C( 32), -INT32_C( 79), -INT32_C( 8), -INT32_C( 70) } }, { { INT8_C( 85), -INT8_C( 58), -INT8_C( 11), -INT8_C( 46), INT8_C( 47), -INT8_C( 86), INT8_C( 47), -INT8_C( 77), INT8_C( 35), INT8_C( 74), INT8_C( 25), -INT8_C( 47), INT8_C( 108), -INT8_C( 57), -INT8_C( 29), -INT8_C( 13) }, { -INT32_C( 30), -INT32_C( 69), INT32_C( 87), INT32_C( 9) } }, { { -INT8_C( 57), -INT8_C( 76), INT8_C( 97), -INT8_C( 21), -INT8_C( 52), INT8_C( 29), INT8_C( 1), -INT8_C( 66), INT8_C( 125), -INT8_C( 56), -INT8_C( 1), INT8_C( 5), INT8_C( 32), -INT8_C( 70), -INT8_C( 36), -INT8_C( 109) }, { -INT32_C( 57), -INT32_C( 88), INT32_C( 73), -INT32_C( 183) } }, { { INT8_C( 74), -INT8_C( 3), INT8_C( 12), INT8_C( 88), INT8_C( 61), -INT8_C( 95), -INT8_C( 17), -INT8_C( 123), INT8_C( 115), INT8_C( 1), INT8_C( 2), -INT8_C( 15), INT8_C( 123), INT8_C( 72), INT8_C( 87), -INT8_C( 68) }, { INT32_C( 171), -INT32_C( 174), INT32_C( 103), INT32_C( 214) } }, { { INT8_C( 4), INT8_C( 51), INT8_C( 66), INT8_C( 50), INT8_C( 56), INT8_C( 24), INT8_C( 86), -INT8_C( 14), INT8_C( 75), INT8_C( 39), -INT8_C( 113), INT8_C( 79), INT8_C( 83), -INT8_C( 16), -INT8_C( 47), -INT8_C( 10) }, { INT32_C( 171), INT32_C( 152), INT32_C( 80), INT32_C( 10) } }, { { INT8_C( 90), INT8_C( 37), -INT8_C( 111), -INT8_C( 47), -INT8_C( 79), -INT8_C( 95), -INT8_C( 46), INT8_C( 44), -INT8_C( 120), -INT8_C( 86), INT8_C( 84), INT8_C( 23), -INT8_C( 68), INT8_C( 115), INT8_C( 8), INT8_C( 11) }, { -INT32_C( 31), -INT32_C( 176), -INT32_C( 99), INT32_C( 66) } }, { { -INT8_C( 16), -INT8_C( 27), -INT8_C( 58), -INT8_C( 48), -INT8_C( 33), -INT8_C( 97), INT8_C( 66), -INT8_C( 99), -INT8_C( 16), INT8_C( 60), INT8_C( 61), -INT8_C( 126), INT8_C( 12), -INT8_C( 44), -INT8_C( 8), INT8_C( 3) }, { -INT32_C( 149), -INT32_C( 163), -INT32_C( 21), -INT32_C( 37) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i r = simde_mm_haddd_epi8(a); simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i8x16(); simde__m128i r = simde_mm_haddd_epi8(a); simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_haddd_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[8]; const int32_t r[4]; } test_vec[] = { { { -INT16_C( 14932), INT16_C( 1774), -INT16_C( 19658), -INT16_C( 4158), -INT16_C( 12919), INT16_C( 2423), INT16_C( 21206), INT16_C( 10463) }, { -INT32_C( 13158), -INT32_C( 23816), -INT32_C( 10496), INT32_C( 31669) } }, { { INT16_C( 21683), -INT16_C( 29610), -INT16_C( 17046), INT16_C( 2480), -INT16_C( 28777), INT16_C( 25967), INT16_C( 28914), INT16_C( 569) }, { -INT32_C( 7927), -INT32_C( 14566), -INT32_C( 2810), INT32_C( 29483) } }, { { INT16_C( 25961), -INT16_C( 17270), -INT16_C( 8590), INT16_C( 5543), -INT16_C( 3169), -INT16_C( 28461), -INT16_C( 4750), INT16_C( 23236) }, { INT32_C( 8691), -INT32_C( 3047), -INT32_C( 31630), INT32_C( 18486) } }, { { INT16_C( 2164), INT16_C( 12120), INT16_C( 30593), INT16_C( 31877), -INT16_C( 22170), -INT16_C( 15986), -INT16_C( 13394), INT16_C( 14973) }, { INT32_C( 14284), INT32_C( 62470), -INT32_C( 38156), INT32_C( 1579) } }, { { -INT16_C( 29000), -INT16_C( 16691), INT16_C( 10765), -INT16_C( 6297), -INT16_C( 23567), INT16_C( 29648), INT16_C( 19000), -INT16_C( 5472) }, { -INT32_C( 45691), INT32_C( 4468), INT32_C( 6081), INT32_C( 13528) } }, { { -INT16_C( 30629), -INT16_C( 32458), -INT16_C( 10293), INT16_C( 16298), INT16_C( 4485), INT16_C( 24067), -INT16_C( 4633), -INT16_C( 2902) }, { -INT32_C( 63087), INT32_C( 6005), INT32_C( 28552), -INT32_C( 7535) } }, { { -INT16_C( 14654), -INT16_C( 11489), -INT16_C( 24656), -INT16_C( 21013), INT16_C( 26024), INT16_C( 31958), INT16_C( 29647), INT16_C( 8281) }, { -INT32_C( 26143), -INT32_C( 45669), INT32_C( 57982), INT32_C( 37928) } }, { { INT16_C( 23697), INT16_C( 19030), -INT16_C( 6927), -INT16_C( 26105), INT16_C( 20253), INT16_C( 1077), -INT16_C( 8378), INT16_C( 30890) }, { INT32_C( 42727), -INT32_C( 33032), INT32_C( 21330), INT32_C( 22512) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i r = simde_mm_haddd_epi16(a); simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i r = simde_mm_haddd_epi16(a); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_haddd_epu8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint8_t a[16]; const uint32_t r[4]; } test_vec[] = { { { UINT8_C(174), UINT8_C(152), UINT8_C(232), UINT8_C(194), UINT8_C( 3), UINT8_C( 70), UINT8_C( 27), UINT8_C(175), UINT8_C(235), UINT8_C(193), UINT8_C( 77), UINT8_C(113), UINT8_C(224), UINT8_C(112), UINT8_C(217), UINT8_C( 68) }, { UINT32_C( 752), UINT32_C( 275), UINT32_C( 618), UINT32_C( 621) } }, { { UINT8_C( 62), UINT8_C( 12), UINT8_C( 34), UINT8_C( 82), UINT8_C(154), UINT8_C(166), UINT8_C(132), UINT8_C(212), UINT8_C( 86), UINT8_C(236), UINT8_C(139), UINT8_C(123), UINT8_C(114), UINT8_C(168), UINT8_C( 99), UINT8_C( 14) }, { UINT32_C( 190), UINT32_C( 664), UINT32_C( 584), UINT32_C( 395) } }, { { UINT8_C(165), UINT8_C(137), UINT8_C(144), UINT8_C( 84), UINT8_C(173), UINT8_C( 38), UINT8_C(223), UINT8_C( 52), UINT8_C(227), UINT8_C( 64), UINT8_C(157), UINT8_C( 27), UINT8_C( 18), UINT8_C( 71), UINT8_C( 6), UINT8_C(158) }, { UINT32_C( 530), UINT32_C( 486), UINT32_C( 475), UINT32_C( 253) } }, { { UINT8_C(137), UINT8_C( 32), UINT8_C( 0), UINT8_C( 97), UINT8_C(111), UINT8_C( 38), UINT8_C( 10), UINT8_C( 55), UINT8_C( 87), UINT8_C(111), UINT8_C(111), UINT8_C(138), UINT8_C( 20), UINT8_C( 80), UINT8_C(191), UINT8_C(254) }, { UINT32_C( 266), UINT32_C( 214), UINT32_C( 447), UINT32_C( 545) } }, { { UINT8_C(207), UINT8_C( 35), UINT8_C(127), UINT8_C( 81), UINT8_C( 87), UINT8_C( 71), UINT8_C( 32), UINT8_C(134), UINT8_C(183), UINT8_C(105), UINT8_C( 46), UINT8_C( 66), UINT8_C( 14), UINT8_C( 2), UINT8_C(202), UINT8_C(118) }, { UINT32_C( 450), UINT32_C( 324), UINT32_C( 400), UINT32_C( 336) } }, { { UINT8_C(155), UINT8_C( 33), UINT8_C( 90), UINT8_C( 62), UINT8_C( 24), UINT8_C(107), UINT8_C(127), UINT8_C( 9), UINT8_C( 73), UINT8_C( 96), UINT8_C( 72), UINT8_C(251), UINT8_C(212), UINT8_C(224), UINT8_C(165), UINT8_C(143) }, { UINT32_C( 340), UINT32_C( 267), UINT32_C( 492), UINT32_C( 744) } }, { { UINT8_C( 83), UINT8_C(237), UINT8_C( 30), UINT8_C(129), UINT8_C(168), UINT8_C(178), UINT8_C(196), UINT8_C(235), UINT8_C(144), UINT8_C(196), UINT8_C(104), UINT8_C(174), UINT8_C(124), UINT8_C(169), UINT8_C( 12), UINT8_C( 19) }, { UINT32_C( 479), UINT32_C( 777), UINT32_C( 618), UINT32_C( 324) } }, { { UINT8_C(156), UINT8_C(151), UINT8_C(151), UINT8_C(178), UINT8_C( 60), UINT8_C(125), UINT8_C(205), UINT8_C(147), UINT8_C( 83), UINT8_C( 72), UINT8_C(125), UINT8_C(148), UINT8_C( 89), UINT8_C( 95), UINT8_C(252), UINT8_C( 10) }, { UINT32_C( 636), UINT32_C( 537), UINT32_C( 428), UINT32_C( 446) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i r = simde_mm_haddd_epu8(a); simde_test_x86_assert_equal_u32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u8x16(); simde__m128i r = simde_mm_haddd_epu8(a); simde_test_x86_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_haddd_epu16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint16_t a[8]; const uint32_t r[4]; } test_vec[] = { { { UINT16_C(28506), UINT16_C(43987), UINT16_C(28233), UINT16_C(44213), UINT16_C(55958), UINT16_C( 9908), UINT16_C(16898), UINT16_C(48820) }, { UINT32_C( 72493), UINT32_C( 72446), UINT32_C( 65866), UINT32_C( 65718) } }, { { UINT16_C( 2227), UINT16_C(34079), UINT16_C(25987), UINT16_C( 7642), UINT16_C(44446), UINT16_C( 7800), UINT16_C(54604), UINT16_C(47022) }, { UINT32_C( 36306), UINT32_C( 33629), UINT32_C( 52246), UINT32_C( 101626) } }, { { UINT16_C(12811), UINT16_C(39175), UINT16_C(33761), UINT16_C( 4313), UINT16_C(13040), UINT16_C(29560), UINT16_C(55115), UINT16_C(49065) }, { UINT32_C( 51986), UINT32_C( 38074), UINT32_C( 42600), UINT32_C( 104180) } }, { { UINT16_C(65032), UINT16_C(32602), UINT16_C(10903), UINT16_C(61328), UINT16_C( 6738), UINT16_C(24736), UINT16_C(19028), UINT16_C(56994) }, { UINT32_C( 97634), UINT32_C( 72231), UINT32_C( 31474), UINT32_C( 76022) } }, { { UINT16_C(48527), UINT16_C( 4387), UINT16_C(64025), UINT16_C(24858), UINT16_C(21959), UINT16_C(23837), UINT16_C(28669), UINT16_C(24277) }, { UINT32_C( 52914), UINT32_C( 88883), UINT32_C( 45796), UINT32_C( 52946) } }, { { UINT16_C( 196), UINT16_C(26544), UINT16_C(54301), UINT16_C(20693), UINT16_C( 5270), UINT16_C( 9309), UINT16_C(50970), UINT16_C(51136) }, { UINT32_C( 26740), UINT32_C( 74994), UINT32_C( 14579), UINT32_C( 102106) } }, { { UINT16_C(38925), UINT16_C(56461), UINT16_C(55961), UINT16_C(58719), UINT16_C(51522), UINT16_C(44044), UINT16_C( 5057), UINT16_C(58143) }, { UINT32_C( 95386), UINT32_C( 114680), UINT32_C( 95566), UINT32_C( 63200) } }, { { UINT16_C(38671), UINT16_C( 1929), UINT16_C(27840), UINT16_C(35475), UINT16_C( 9618), UINT16_C(12311), UINT16_C(21318), UINT16_C(47856) }, { UINT32_C( 40600), UINT32_C( 63315), UINT32_C( 21929), UINT32_C( 69174) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i r = simde_mm_haddd_epu16(a); simde_test_x86_assert_equal_u32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u16x8(); simde__m128i r = simde_mm_haddd_epu16(a); simde_test_x86_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_haddq_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t a[16]; const int64_t r[2]; } test_vec[] = { { { -INT8_C( 82), INT8_C( 76), -INT8_C( 81), -INT8_C( 62), INT8_C( 8), INT8_C( 43), -INT8_C( 113), -INT8_C( 26), -INT8_C( 118), INT8_C( 24), -INT8_C( 84), INT8_C( 40), INT8_C( 62), -INT8_C( 54), INT8_C( 111), -INT8_C( 107) }, { -INT64_C( 237), -INT64_C( 126) } }, { { INT8_C( 16), INT8_C( 73), INT8_C( 77), INT8_C( 37), INT8_C( 38), -INT8_C( 7), -INT8_C( 79), -INT8_C( 29), INT8_C( 111), -INT8_C( 44), INT8_C( 54), INT8_C( 77), INT8_MAX, -INT8_C( 9), INT8_C( 26), -INT8_C( 2) }, { INT64_C( 126), INT64_C( 340) } }, { { -INT8_C( 102), INT8_C( 96), -INT8_C( 16), -INT8_C( 118), INT8_C( 14), -INT8_C( 10), -INT8_C( 108), -INT8_C( 86), -INT8_C( 58), -INT8_C( 55), INT8_C( 100), -INT8_C( 103), INT8_C( 29), -INT8_C( 100), -INT8_C( 83), -INT8_C( 67) }, { -INT64_C( 330), -INT64_C( 337) } }, { { -INT8_C( 16), -INT8_C( 95), INT8_C( 100), -INT8_C( 118), -INT8_C( 9), -INT8_C( 125), INT8_C( 23), -INT8_C( 92), INT8_C( 85), -INT8_C( 87), INT8_C( 34), INT8_C( 67), INT8_C( 109), -INT8_C( 70), INT8_C( 38), -INT8_C( 36) }, { -INT64_C( 332), INT64_C( 140) } }, { { -INT8_C( 8), INT8_C( 92), -INT8_C( 73), -INT8_C( 2), INT8_C( 84), INT8_C( 66), INT8_C( 85), INT8_C( 122), INT8_C( 31), INT8_C( 101), -INT8_C( 98), -INT8_C( 58), INT8_C( 5), -INT8_C( 111), -INT8_C( 63), -INT8_C( 93) }, { INT64_C( 366), -INT64_C( 286) } }, { { -INT8_C( 42), INT8_C( 36), INT8_C( 54), -INT8_C( 2), -INT8_C( 36), INT8_C( 19), -INT8_C( 83), INT8_C( 20), INT8_C( 107), INT8_C( 45), INT8_C( 68), -INT8_C( 38), -INT8_C( 71), -INT8_C( 92), -INT8_C( 4), -INT8_C( 100) }, { -INT64_C( 34), -INT64_C( 85) } }, { { -INT8_C( 16), -INT8_C( 55), INT8_C( 110), -INT8_C( 28), -INT8_C( 126), INT8_C( 23), -INT8_C( 70), -INT8_C( 99), -INT8_C( 67), INT8_C( 115), -INT8_C( 63), INT8_C( 120), -INT8_C( 97), -INT8_C( 78), -INT8_C( 108), -INT8_C( 113) }, { -INT64_C( 261), -INT64_C( 291) } }, { { -INT8_C( 21), INT8_C( 92), INT8_C( 43), INT8_C( 73), INT8_C( 124), -INT8_C( 81), INT8_C( 91), INT8_C( 125), -INT8_C( 38), -INT8_C( 25), INT8_C( 2), -INT8_C( 38), INT8_C( 11), -INT8_C( 67), -INT8_C( 123), -INT8_C( 122) }, { INT64_C( 446), -INT64_C( 400) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i r = simde_mm_haddq_epi8(a); simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i8x16(); simde__m128i r = simde_mm_haddq_epi8(a); simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_haddq_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[8]; const int64_t r[2]; } test_vec[] = { { { INT16_C( 11948), INT16_C( 1404), INT16_C( 32064), INT16_C( 23979), INT16_C( 31687), INT16_C( 30517), INT16_C( 1683), -INT16_C( 14068) }, { INT64_C( 69395), INT64_C( 49819) } }, { { -INT16_C( 12201), INT16_C( 12972), INT16_C( 25217), INT16_C( 9481), INT16_C( 24777), INT16_C( 2757), INT16_C( 3594), -INT16_C( 7769) }, { INT64_C( 35469), INT64_C( 23359) } }, { { INT16_C( 4945), INT16_C( 10569), INT16_C( 32052), INT16_C( 16), INT16_C( 1638), -INT16_C( 29855), -INT16_C( 27256), -INT16_C( 26606) }, { INT64_C( 47582), -INT64_C( 82079) } }, { { INT16_C( 2113), -INT16_C( 32225), INT16_C( 12688), INT16_C( 21919), INT16_C( 7777), INT16_C( 13302), -INT16_C( 24993), -INT16_C( 2229) }, { INT64_C( 4495), -INT64_C( 6143) } }, { { INT16_C( 10), INT16_C( 5949), INT16_C( 7689), -INT16_C( 12335), -INT16_C( 25663), INT16_C( 31919), INT16_C( 26662), INT16_C( 18063) }, { INT64_C( 1313), INT64_C( 50981) } }, { { -INT16_C( 29231), INT16_C( 494), INT16_C( 9812), INT16_C( 21765), -INT16_C( 21558), INT16_C( 7675), INT16_C( 30130), INT16_C( 3674) }, { INT64_C( 2840), INT64_C( 19921) } }, { { INT16_C( 32509), -INT16_C( 26176), INT16_C( 26981), INT16_C( 4823), -INT16_C( 15871), INT16_C( 4486), -INT16_C( 31210), INT16_C( 6505) }, { INT64_C( 38137), -INT64_C( 36090) } }, { { -INT16_C( 6607), INT16_C( 30591), INT16_C( 18546), INT16_C( 27939), -INT16_C( 28885), -INT16_C( 28610), -INT16_C( 25430), INT16_C( 28602) }, { INT64_C( 70469), -INT64_C( 54323) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i r = simde_mm_haddq_epi16(a); simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i r = simde_mm_haddq_epi16(a); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_haddq_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int32_t a[4]; const int64_t r[2]; } test_vec[] = { { { INT32_C( 1966740562), INT32_C( 285762801), INT32_C( 323879757), INT32_C( 1519056640) }, { INT64_C( 2252503363), INT64_C( 1842936397) } }, { { -INT32_C( 1405314170), -INT32_C( 438198379), INT32_C( 1411731628), INT32_C( 1649744623) }, { -INT64_C( 1843512549), INT64_C( 3061476251) } }, { { INT32_C( 1947421490), INT32_C( 323819860), INT32_C( 1265690574), INT32_C( 1356383371) }, { INT64_C( 2271241350), INT64_C( 2622073945) } }, { { INT32_C( 1754019323), INT32_C( 69743715), INT32_C( 807041143), INT32_C( 772929322) }, { INT64_C( 1823763038), INT64_C( 1579970465) } }, { { INT32_C( 1588832965), INT32_C( 1653575734), INT32_C( 2119366059), INT32_C( 1161688160) }, { INT64_C( 3242408699), INT64_C( 3281054219) } }, { { INT32_C( 1926642613), INT32_C( 350097028), -INT32_C( 293302351), INT32_C( 500710403) }, { INT64_C( 2276739641), INT64_C( 207408052) } }, { { -INT32_C( 75318990), INT32_C( 1152700481), INT32_C( 2002630157), INT32_C( 2141228839) }, { INT64_C( 1077381491), INT64_C( 4143858996) } }, { { -INT32_C( 1820052001), INT32_C( 1542272417), INT32_C( 1422596997), INT32_C( 1972498465) }, { -INT64_C( 277779584), INT64_C( 3395095462) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i r = simde_mm_haddq_epi32(a); simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i32x4(); simde__m128i r = simde_mm_haddq_epi32(a); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_haddq_epu8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint8_t a[16]; const uint64_t r[2]; } test_vec[] = { { { UINT8_C(162), UINT8_C( 0), UINT8_C(233), UINT8_C( 19), UINT8_C( 27), UINT8_C(225), UINT8_C(165), UINT8_C( 2), UINT8_C( 28), UINT8_C(176), UINT8_C(249), UINT8_C(253), UINT8_C(135), UINT8_C(158), UINT8_C(232), UINT8_C( 71) }, { UINT64_C( 833), UINT64_C( 1302) } }, { { UINT8_C(159), UINT8_C(171), UINT8_C(253), UINT8_C(147), UINT8_C( 98), UINT8_C(178), UINT8_C( 59), UINT8_C( 34), UINT8_C( 25), UINT8_C(166), UINT8_C( 86), UINT8_C( 43), UINT8_C( 45), UINT8_C( 27), UINT8_C( 36), UINT8_C(127) }, { UINT64_C( 1099), UINT64_C( 555) } }, { { UINT8_C( 60), UINT8_C( 8), UINT8_C( 78), UINT8_C(109), UINT8_C(108), UINT8_C(203), UINT8_C( 75), UINT8_C(227), UINT8_MAX, UINT8_C(109), UINT8_C(143), UINT8_C(215), UINT8_C( 40), UINT8_C( 40), UINT8_MAX, UINT8_C(229) }, { UINT64_C( 868), UINT64_C( 1286) } }, { { UINT8_C( 29), UINT8_C( 38), UINT8_C(169), UINT8_C( 58), UINT8_C(110), UINT8_C(140), UINT8_C(178), UINT8_C(176), UINT8_C(149), UINT8_C(183), UINT8_C(143), UINT8_C( 58), UINT8_C(206), UINT8_C(197), UINT8_C(120), UINT8_C(131) }, { UINT64_C( 898), UINT64_C( 1187) } }, { { UINT8_C( 41), UINT8_C( 88), UINT8_C( 26), UINT8_C(210), UINT8_C(221), UINT8_C(150), UINT8_C(141), UINT8_C( 49), UINT8_C(223), UINT8_C(117), UINT8_C(133), UINT8_C(206), UINT8_C(179), UINT8_C( 53), UINT8_C(204), UINT8_C(161) }, { UINT64_C( 926), UINT64_C( 1276) } }, { { UINT8_C(131), UINT8_C( 46), UINT8_C(240), UINT8_C( 79), UINT8_C(110), UINT8_C(203), UINT8_C( 57), UINT8_C( 79), UINT8_C( 34), UINT8_C(215), UINT8_C(221), UINT8_C( 75), UINT8_C(173), UINT8_C(247), UINT8_C(119), UINT8_C(201) }, { UINT64_C( 945), UINT64_C( 1285) } }, { { UINT8_C(144), UINT8_C(120), UINT8_C(182), UINT8_C( 10), UINT8_C( 21), UINT8_C( 75), UINT8_C( 82), UINT8_C( 50), UINT8_C(227), UINT8_C( 78), UINT8_C( 69), UINT8_C(170), UINT8_C(208), UINT8_C(206), UINT8_C( 55), UINT8_C(195) }, { UINT64_C( 684), UINT64_C( 1208) } }, { { UINT8_C(246), UINT8_C( 73), UINT8_C( 58), UINT8_C(156), UINT8_C( 9), UINT8_C(120), UINT8_C(183), UINT8_C( 69), UINT8_C(232), UINT8_C(140), UINT8_C(168), UINT8_C( 36), UINT8_C(113), UINT8_C(185), UINT8_C( 8), UINT8_C(152) }, { UINT64_C( 914), UINT64_C( 1034) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i r = simde_mm_haddq_epu8(a); simde_test_x86_assert_equal_u64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u8x16(); simde__m128i r = simde_mm_haddq_epu8(a); simde_test_x86_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_haddq_epu16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint16_t a[8]; const uint64_t r[2]; } test_vec[] = { { { UINT16_C(61850), UINT16_C(56712), UINT16_C(62141), UINT16_C(12420), UINT16_C(33332), UINT16_C(13109), UINT16_C(63782), UINT16_C(37416) }, { UINT64_C( 193123), UINT64_C( 147639) } }, { { UINT16_C( 416), UINT16_C(59375), UINT16_C(39656), UINT16_C(56086), UINT16_C(24846), UINT16_C(36441), UINT16_C( 4547), UINT16_C(14867) }, { UINT64_C( 155533), UINT64_C( 80701) } }, { { UINT16_C(19054), UINT16_C( 5114), UINT16_C(37245), UINT16_C(28938), UINT16_C(39417), UINT16_C(12479), UINT16_C(49246), UINT16_C(22407) }, { UINT64_C( 90351), UINT64_C( 123549) } }, { { UINT16_C(56745), UINT16_C(63863), UINT16_C(14770), UINT16_C(22845), UINT16_C(56252), UINT16_C(20822), UINT16_C( 2379), UINT16_C(62849) }, { UINT64_C( 158223), UINT64_C( 142302) } }, { { UINT16_C( 2870), UINT16_C(29555), UINT16_C(13052), UINT16_C(16076), UINT16_C( 6491), UINT16_C(27466), UINT16_C(11039), UINT16_C(23356) }, { UINT64_C( 61553), UINT64_C( 68352) } }, { { UINT16_C(25913), UINT16_C(39482), UINT16_C(23824), UINT16_C( 1812), UINT16_C(33820), UINT16_C(13576), UINT16_C(43184), UINT16_C( 4664) }, { UINT64_C( 91031), UINT64_C( 95244) } }, { { UINT16_C(48407), UINT16_C(50779), UINT16_C(56547), UINT16_C(57010), UINT16_C(35970), UINT16_C(43581), UINT16_C(16658), UINT16_C(58160) }, { UINT64_C( 212743), UINT64_C( 154369) } }, { { UINT16_C( 8822), UINT16_C(37281), UINT16_C( 4010), UINT16_C(11395), UINT16_C(58195), UINT16_C( 727), UINT16_C(63131), UINT16_C(55329) }, { UINT64_C( 61508), UINT64_C( 177382) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i r = simde_mm_haddq_epu16(a); simde_test_x86_assert_equal_u64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u16x8(); simde__m128i r = simde_mm_haddq_epu16(a); simde_test_x86_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_haddq_epu32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const uint32_t a[4]; const uint64_t r[2]; } test_vec[] = { { { UINT32_C(3558467387), UINT32_C(2611255513), UINT32_C(3053615509), UINT32_C( 977865439) }, { UINT64_C( 6169722900), UINT64_C( 4031480948) } }, { { UINT32_C(2819791755), UINT32_C( 309483047), UINT32_C(2133622924), UINT32_C(2435120307) }, { UINT64_C( 3129274802), UINT64_C( 4568743231) } }, { { UINT32_C(1696010698), UINT32_C(3146411399), UINT32_C(1459644861), UINT32_C(2809171501) }, { UINT64_C( 4842422097), UINT64_C( 4268816362) } }, { { UINT32_C(2784385182), UINT32_C(4291377968), UINT32_C(1970136301), UINT32_C(2200617377) }, { UINT64_C( 7075763150), UINT64_C( 4170753678) } }, { { UINT32_C(1102889963), UINT32_C(2269930389), UINT32_C(1419764513), UINT32_C(1905186468) }, { UINT64_C( 3372820352), UINT64_C( 3324950981) } }, { { UINT32_C(1370940120), UINT32_C(1014023532), UINT32_C(2902759070), UINT32_C(4162407692) }, { UINT64_C( 2384963652), UINT64_C( 7065166762) } }, { { UINT32_C( 795903687), UINT32_C(1204978346), UINT32_C(2001170410), UINT32_C(3783911917) }, { UINT64_C( 2000882033), UINT64_C( 5785082327) } }, { { UINT32_C(1958264928), UINT32_C( 290454660), UINT32_C(3965132488), UINT32_C( 920365469) }, { UINT64_C( 2248719588), UINT64_C( 4885497957) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i r = simde_mm_haddq_epu32(a); simde_test_x86_assert_equal_u64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_u32x4(); simde__m128i r = simde_mm_haddq_epu32(a); simde_test_x86_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_hsubw_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t a[16]; const int16_t r[8]; } test_vec[] = { { { -INT8_C( 122), -INT8_C( 31), -INT8_C( 101), -INT8_C( 7), INT8_C( 110), -INT8_C( 45), INT8_C( 4), INT8_C( 67), INT8_C( 63), -INT8_C( 107), INT8_C( 96), -INT8_C( 122), -INT8_C( 80), -INT8_C( 49), INT8_C( 75), INT8_C( 63) }, { -INT16_C( 91), -INT16_C( 94), INT16_C( 155), -INT16_C( 63), INT16_C( 170), INT16_C( 218), -INT16_C( 31), INT16_C( 12) } }, { { INT8_C( 95), INT8_C( 122), INT8_C( 102), -INT8_C( 42), INT8_C( 31), -INT8_C( 24), INT8_C( 79), -INT8_C( 59), -INT8_C( 109), INT8_C( 5), -INT8_C( 48), -INT8_C( 3), -INT8_C( 4), -INT8_C( 36), INT8_C( 86), -INT8_C( 123) }, { -INT16_C( 27), INT16_C( 144), INT16_C( 55), INT16_C( 138), -INT16_C( 114), -INT16_C( 45), INT16_C( 32), INT16_C( 209) } }, { { INT8_C( 79), INT8_C( 93), -INT8_C( 91), INT8_C( 101), -INT8_C( 117), INT8_C( 93), -INT8_C( 53), -INT8_C( 62), INT8_C( 73), -INT8_C( 34), INT8_C( 82), INT8_C( 74), -INT8_C( 108), -INT8_C( 111), -INT8_C( 70), -INT8_C( 45) }, { -INT16_C( 14), -INT16_C( 192), -INT16_C( 210), INT16_C( 9), INT16_C( 107), INT16_C( 8), INT16_C( 3), -INT16_C( 25) } }, { { -INT8_C( 4), -INT8_C( 103), INT8_C( 39), INT8_C( 62), -INT8_C( 26), -INT8_C( 110), INT8_C( 86), -INT8_C( 94), INT8_C( 38), -INT8_C( 46), -INT8_C( 43), -INT8_C( 89), -INT8_C( 48), -INT8_C( 18), INT8_C( 116), INT8_C( 47) }, { INT16_C( 99), -INT16_C( 23), INT16_C( 84), INT16_C( 180), INT16_C( 84), INT16_C( 46), -INT16_C( 30), INT16_C( 69) } }, { { INT8_C( 74), INT8_MIN, -INT8_C( 9), INT8_C( 60), -INT8_C( 90), INT8_C( 40), INT8_C( 13), INT8_C( 14), INT8_C( 47), -INT8_C( 47), -INT8_C( 122), -INT8_C( 117), INT8_C( 66), INT8_C( 54), -INT8_C( 64), -INT8_C( 28) }, { INT16_C( 202), -INT16_C( 69), -INT16_C( 130), -INT16_C( 1), INT16_C( 94), -INT16_C( 5), INT16_C( 12), -INT16_C( 36) } }, { { INT8_C( 96), -INT8_C( 92), INT8_C( 98), INT8_C( 118), INT8_MIN, INT8_C( 1), INT8_C( 77), -INT8_C( 18), -INT8_C( 86), INT8_C( 13), -INT8_C( 48), -INT8_C( 79), -INT8_C( 63), -INT8_C( 24), INT8_C( 27), INT8_C( 123) }, { INT16_C( 188), -INT16_C( 20), -INT16_C( 129), INT16_C( 95), -INT16_C( 99), INT16_C( 31), -INT16_C( 39), -INT16_C( 96) } }, { { -INT8_C( 95), -INT8_C( 43), -INT8_C( 9), INT8_C( 69), INT8_C( 106), INT8_C( 61), -INT8_C( 76), INT8_C( 109), INT8_C( 26), -INT8_C( 9), INT8_C( 98), INT8_C( 17), INT8_C( 98), -INT8_C( 58), INT8_C( 68), -INT8_C( 69) }, { -INT16_C( 52), -INT16_C( 78), INT16_C( 45), -INT16_C( 185), INT16_C( 35), INT16_C( 81), INT16_C( 156), INT16_C( 137) } }, { { -INT8_C( 78), INT8_C( 35), -INT8_C( 127), INT8_C( 68), -INT8_C( 105), INT8_C( 61), INT8_C( 29), -INT8_C( 14), INT8_C( 70), INT8_C( 63), INT8_C( 40), -INT8_C( 28), INT8_C( 121), -INT8_C( 47), INT8_C( 53), -INT8_C( 80) }, { -INT16_C( 113), -INT16_C( 195), -INT16_C( 166), INT16_C( 43), INT16_C( 7), INT16_C( 68), INT16_C( 168), INT16_C( 133) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i r = simde_mm_hsubw_epi8(a); simde_test_x86_assert_equal_i16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i8x16(); simde__m128i r = simde_mm_hsubw_epi8(a); simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_hsubd_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[8]; const int32_t r[4]; } test_vec[] = { { { -INT16_C( 7815), INT16_C( 18958), -INT16_C( 23684), INT16_C( 10406), -INT16_C( 10702), -INT16_C( 23729), INT16_C( 19099), -INT16_C( 24275) }, { -INT32_C( 26773), -INT32_C( 34090), INT32_C( 13027), INT32_C( 43374) } }, { { -INT16_C( 24805), INT16_C( 29163), INT16_C( 20432), -INT16_C( 2388), -INT16_C( 4317), INT16_C( 2117), -INT16_C( 20067), INT16_C( 6057) }, { -INT32_C( 53968), INT32_C( 22820), -INT32_C( 6434), -INT32_C( 26124) } }, { { INT16_C( 12029), INT16_C( 4516), INT16_C( 25223), -INT16_C( 31027), -INT16_C( 1890), INT16_C( 2998), -INT16_C( 14187), -INT16_C( 9115) }, { INT32_C( 7513), INT32_C( 56250), -INT32_C( 4888), -INT32_C( 5072) } }, { { -INT16_C( 24637), -INT16_C( 15096), INT16_C( 16085), INT16_C( 17124), -INT16_C( 23192), -INT16_C( 6769), -INT16_C( 28200), -INT16_C( 1953) }, { -INT32_C( 9541), -INT32_C( 1039), -INT32_C( 16423), -INT32_C( 26247) } }, { { INT16_C( 17234), INT16_C( 25634), -INT16_C( 31951), -INT16_C( 12017), -INT16_C( 6777), INT16_C( 4093), INT16_C( 19449), -INT16_C( 19245) }, { -INT32_C( 8400), -INT32_C( 19934), -INT32_C( 10870), INT32_C( 38694) } }, { { -INT16_C( 21551), INT16_C( 1856), INT16_C( 4686), INT16_C( 7596), -INT16_C( 5826), INT16_C( 17006), INT16_C( 30927), -INT16_C( 25793) }, { -INT32_C( 23407), -INT32_C( 2910), -INT32_C( 22832), INT32_C( 56720) } }, { { -INT16_C( 22365), INT16_C( 2542), INT16_C( 3362), INT16_C( 20310), INT16_C( 8980), INT16_C( 30862), -INT16_C( 10129), INT16_C( 29535) }, { -INT32_C( 24907), -INT32_C( 16948), -INT32_C( 21882), -INT32_C( 39664) } }, { { INT16_C( 19309), INT16_C( 507), -INT16_C( 11038), -INT16_C( 11797), INT16_C( 17613), -INT16_C( 6070), INT16_C( 28205), INT16_C( 18225) }, { INT32_C( 18802), INT32_C( 759), INT32_C( 23683), INT32_C( 9980) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i r = simde_mm_hsubd_epi16(a); simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i r = simde_mm_hsubd_epi16(a); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_hsubq_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int32_t a[4]; const int64_t r[2]; } test_vec[] = { { { -INT32_C( 931994090), INT32_C( 1250494467), INT32_C( 231685230), INT32_C( 1609660830) }, { -INT64_C( 2182488557), -INT64_C( 1377975600) } }, { { INT32_C( 2040596673), -INT32_C( 1534358726), -INT32_C( 1601487556), INT32_C( 1159560088) }, { INT64_C( 3574955399), -INT64_C( 2761047644) } }, { { INT32_C( 1796572371), INT32_C( 143687036), -INT32_C( 1742030660), -INT32_C( 1032768209) }, { INT64_C( 1652885335), -INT64_C( 709262451) } }, { { INT32_C( 949643762), -INT32_C( 546095106), INT32_C( 815357364), -INT32_C( 554981192) }, { INT64_C( 1495738868), INT64_C( 1370338556) } }, { { -INT32_C( 1203893757), -INT32_C( 766282956), -INT32_C( 553247449), -INT32_C( 490269239) }, { -INT64_C( 437610801), -INT64_C( 62978210) } }, { { INT32_C( 105838379), -INT32_C( 913573675), INT32_C( 1625041500), INT32_C( 1468267575) }, { INT64_C( 1019412054), INT64_C( 156773925) } }, { { INT32_C( 2035744974), -INT32_C( 273026348), -INT32_C( 1395984937), INT32_C( 148643605) }, { INT64_C( 2308771322), -INT64_C( 1544628542) } }, { { -INT32_C( 1440335726), -INT32_C( 1396981913), -INT32_C( 88265123), -INT32_C( 70418246) }, { -INT64_C( 43353813), -INT64_C( 17846877) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i r = simde_mm_hsubq_epi32(a); simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i32x4(); simde__m128i r = simde_mm_hsubq_epi32(a); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_macc_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[8]; const int16_t b[8]; const int16_t c[8]; const int16_t r[8]; } test_vec[] = { { { -INT16_C( 4261), INT16_C( 29895), INT16_C( 515), -INT16_C( 22100), -INT16_C( 14349), -INT16_C( 14880), INT16_C( 23995), INT16_C( 31638) }, { -INT16_C( 18865), -INT16_C( 4472), -INT16_C( 25763), -INT16_C( 12565), -INT16_C( 3618), -INT16_C( 15198), INT16_C( 15084), INT16_C( 4530) }, { INT16_C( 24531), INT16_C( 29430), -INT16_C( 21398), INT16_C( 17940), INT16_C( 7332), -INT16_C( 3409), -INT16_C( 10398), -INT16_C( 31266) }, { -INT16_C( 4376), INT16_C( 32430), INT16_C( 14465), INT16_C( 28408), INT16_C( 17502), -INT16_C( 21905), -INT16_C( 25146), INT16_C( 27178) } }, { { -INT16_C( 1653), -INT16_C( 26403), -INT16_C( 27297), INT16_C( 30979), -INT16_C( 1528), -INT16_C( 30476), INT16_C( 13426), -INT16_C( 8168) }, { -INT16_C( 10659), INT16_C( 14924), -INT16_C( 2127), INT16_C( 4055), INT16_C( 31505), -INT16_C( 609), -INT16_C( 27470), INT16_C( 27805) }, { -INT16_C( 30866), INT16_C( 29070), -INT16_C( 19436), -INT16_C( 3093), -INT16_C( 12029), INT16_C( 3100), INT16_C( 30455), -INT16_C( 19991) }, { INT16_C( 24813), -INT16_C( 6870), -INT16_C( 23613), -INT16_C( 15760), INT16_C( 17291), INT16_C( 16296), -INT16_C( 10693), INT16_C( 16545) } }, { { -INT16_C( 8670), -INT16_C( 27087), -INT16_C( 4993), INT16_C( 19678), INT16_C( 27747), -INT16_C( 21480), -INT16_C( 25515), INT16_C( 31161) }, { -INT16_C( 5601), INT16_C( 16641), INT16_C( 37), -INT16_C( 31604), -INT16_C( 266), INT16_C( 6528), INT16_C( 1569), -INT16_C( 13301) }, { -INT16_C( 438), INT16_C( 19469), -INT16_C( 28292), INT16_C( 17681), INT16_C( 30657), -INT16_C( 13950), -INT16_C( 2064), -INT16_C( 3044) }, { -INT16_C( 1944), INT16_C( 21310), -INT16_C( 16425), -INT16_C( 14727), -INT16_C( 10013), INT16_C( 11650), INT16_C( 7397), -INT16_C( 25841) } }, { { -INT16_C( 21817), -INT16_C( 12128), -INT16_C( 32455), INT16_C( 30412), INT16_C( 2313), INT16_C( 30346), -INT16_C( 4456), INT16_C( 31080) }, { -INT16_C( 16388), INT16_C( 9544), -INT16_C( 4015), INT16_C( 16728), INT16_C( 9556), INT16_C( 6725), -INT16_C( 20946), INT16_C( 9644) }, { INT16_C( 19853), -INT16_C( 6702), INT16_C( 16633), INT16_C( 3988), INT16_C( 31590), -INT16_C( 4703), INT16_C( 13829), INT16_C( 230) }, { -INT16_C( 7567), -INT16_C( 19758), -INT16_C( 27646), -INT16_C( 20044), -INT16_C( 16550), -INT16_C( 6957), INT16_C( 25941), -INT16_C( 25914) } }, { { -INT16_C( 22688), -INT16_C( 5814), INT16_C( 4518), -INT16_C( 30052), -INT16_C( 701), INT16_C( 26825), -INT16_C( 14412), INT16_C( 21331) }, { INT16_C( 23962), INT16_C( 19198), INT16_C( 17421), -INT16_C( 26163), -INT16_C( 9166), INT16_C( 17452), -INT16_C( 7152), -INT16_C( 22417) }, { INT16_C( 16543), INT16_C( 24699), -INT16_C( 1502), INT16_C( 26307), -INT16_C( 30537), INT16_C( 31351), INT16_C( 19757), -INT16_C( 14089) }, { -INT16_C( 12193), INT16_C( 15335), -INT16_C( 2160), -INT16_C( 24145), -INT16_C( 27699), -INT16_C( 7933), INT16_C( 6253), INT16_C( 25076) } }, { { INT16_C( 25109), -INT16_C( 14962), -INT16_C( 27364), INT16_C( 23133), -INT16_C( 19816), INT16_C( 17557), INT16_C( 608), -INT16_C( 17430) }, { INT16_C( 4833), INT16_C( 20804), -INT16_C( 19090), INT16_C( 7863), INT16_C( 19672), INT16_C( 6837), INT16_C( 17727), -INT16_C( 13694) }, { -INT16_C( 7641), INT16_C( 8169), INT16_C( 15566), -INT16_C( 25810), -INT16_C( 30786), -INT16_C( 19133), -INT16_C( 26722), INT16_C( 32574) }, { -INT16_C( 28516), -INT16_C( 30815), INT16_C( 6870), INT16_C( 6569), INT16_C( 22526), INT16_C( 21660), INT16_C( 3390), -INT16_C( 28654) } }, { { -INT16_C( 23731), -INT16_C( 31221), INT16_C( 18994), -INT16_C( 1698), -INT16_C( 11315), INT16_C( 3820), -INT16_C( 18030), -INT16_C( 23845) }, { INT16_C( 26616), INT16_C( 8311), INT16_C( 16589), -INT16_C( 23770), -INT16_C( 7476), INT16_C( 24478), -INT16_C( 21391), INT16_C( 15701) }, { INT16_C( 32268), -INT16_C( 15047), -INT16_C( 16619), INT16_C( 16802), -INT16_C( 23105), INT16_C( 8069), -INT16_C( 20273), -INT16_C( 26390) }, { -INT16_C( 21596), INT16_C( 29782), -INT16_C( 22241), INT16_C( 8086), INT16_C( 26395), -INT16_C( 5843), -INT16_C( 19903), -INT16_C( 9567) } }, { { INT16_C( 30895), -INT16_C( 28768), -INT16_C( 22082), -INT16_C( 17361), INT16_C( 19691), INT16_C( 2318), INT16_C( 18562), INT16_C( 15382) }, { INT16_C( 10309), -INT16_C( 10441), INT16_C( 7871), INT16_C( 15466), INT16_C( 18645), INT16_C( 5606), INT16_C( 13214), -INT16_C( 3177) }, { -INT16_C( 24971), INT16_C( 13772), -INT16_C( 32692), INT16_C( 11312), INT16_C( 19523), INT16_C( 32251), INT16_C( 29561), -INT16_C( 14743) }, { INT16_C( 32160), INT16_C( 28972), INT16_C( 26894), INT16_C( 7078), INT16_C( 25546), -INT16_C( 14705), INT16_C( 6581), INT16_C( 6499) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i c = simde_mm_loadu_epi16(test_vec[i].c); simde__m128i r = simde_mm_macc_epi16(a, b, c); simde_test_x86_assert_equal_i16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i b = simde_test_x86_random_i16x8(); simde__m128i c = simde_test_x86_random_i16x8(); simde__m128i r = simde_mm_macc_epi16(a, b, c); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_macc_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int32_t a[4]; const int32_t b[4]; const int32_t c[4]; const int32_t r[4]; } test_vec[] = { { { -INT32_C( 2073286111), INT32_C( 882733017), -INT32_C( 1200011720), -INT32_C( 37074522) }, { -INT32_C( 614244495), INT32_C( 1007733789), -INT32_C( 2117244165), INT32_C( 557200893) }, { -INT32_C( 1824584248), INT32_C( 1864742923), -INT32_C( 794482929), -INT32_C( 60032368) }, { -INT32_C( 1335696039), -INT32_C( 1280553056), INT32_C( 1501048823), -INT32_C( 1650843234) } }, { { INT32_C( 1204635340), INT32_C( 884577753), -INT32_C( 554947013), -INT32_C( 1771726922) }, { -INT32_C( 837133983), INT32_C( 921240572), INT32_C( 617705859), INT32_C( 962743811) }, { INT32_C( 1114822829), INT32_C( 1544741673), INT32_C( 1556160813), INT32_C( 1843372109) }, { INT32_C( 764385273), -INT32_C( 1744506939), -INT32_C( 1171270562), -INT32_C( 1091183761) } }, { { -INT32_C( 77294571), -INT32_C( 791586987), INT32_C( 2103946940), -INT32_C( 57192792) }, { -INT32_C( 1816420547), -INT32_C( 83640395), -INT32_C( 1028470026), -INT32_C( 292923030) }, { -INT32_C( 477870069), -INT32_C( 2017083971), -INT32_C( 1546754591), -INT32_C( 1913454297) }, { INT32_C( 2093718540), -INT32_C( 1192237098), -INT32_C( 334977399), INT32_C( 1434581687) } }, { { INT32_C( 68035235), INT32_C( 1534341156), -INT32_C( 634867263), INT32_C( 1632618676) }, { INT32_C( 1323629676), -INT32_C( 1582482467), INT32_C( 1584275227), INT32_C( 861696039) }, { INT32_C( 1540679944), -INT32_C( 1035124962), INT32_C( 1787584436), INT32_C( 219674580) }, { INT32_C( 2131753420), -INT32_C( 1459152334), -INT32_C( 1723105777), -INT32_C( 994161856) } }, { { INT32_C( 82524511), INT32_C( 1735515229), INT32_C( 1991205777), INT32_C( 935043408) }, { INT32_C( 1635155348), INT32_C( 2037838669), INT32_C( 994335735), INT32_C( 1003023985) }, { -INT32_C( 872315379), INT32_C( 570074596), -INT32_C( 1292256916), -INT32_C( 503734708) }, { -INT32_C( 271838215), -INT32_C( 616287011), INT32_C( 1813730643), INT32_C( 1648636572) } }, { { -INT32_C( 564501138), -INT32_C( 925346730), INT32_C( 364522931), INT32_C( 860599348) }, { INT32_C( 1871604637), INT32_C( 388931610), -INT32_C( 608917037), INT32_C( 2046586816) }, { -INT32_C( 1695929921), -INT32_C( 161912092), -INT32_C( 1084989936), INT32_C( 1032409285) }, { -INT32_C( 1245489099), INT32_C( 1920711584), -INT32_C( 758172775), -INT32_C( 2136800315) } }, { { -INT32_C( 1225101256), -INT32_C( 1862871896), -INT32_C( 1186415377), INT32_C( 1202592343) }, { INT32_C( 1552765614), INT32_C( 1306672797), INT32_C( 58488117), -INT32_C( 1586901557) }, { INT32_C( 1722686982), -INT32_C( 700757706), INT32_C( 337476071), -INT32_C( 1812551241) }, { -INT32_C( 340765674), -INT32_C( 915643330), -INT32_C( 11939230), INT32_C( 1523333044) } }, { { -INT32_C( 307599516), INT32_C( 324851497), -INT32_C( 1483081141), -INT32_C( 1496174353) }, { INT32_C( 1242627118), INT32_C( 803407979), -INT32_C( 1264883228), -INT32_C( 1531239544) }, { INT32_C( 380366857), -INT32_C( 1850917583), INT32_C( 1947367033), -INT32_C( 1783725096) }, { -INT32_C( 414125055), -INT32_C( 527469740), INT32_C( 675750981), -INT32_C( 862048304) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi32(test_vec[i].b); simde__m128i c = simde_mm_loadu_epi32(test_vec[i].c); simde__m128i r = simde_mm_macc_epi32(a, b, c); simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i32x4(); simde__m128i b = simde_test_x86_random_i32x4(); simde__m128i c = simde_test_x86_random_i32x4(); simde__m128i r = simde_mm_macc_epi32(a, b, c); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_maccd_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[8]; const int16_t b[8]; const int32_t c[4]; const int32_t r[4]; } test_vec[] = { { { INT16_C( 29565), INT16_C( 13902), INT16_C( 11781), INT16_C( 25700), INT16_C( 28267), INT16_C( 20370), -INT16_C( 22899), INT16_C( 8272) }, { INT16_C( 30523), -INT16_C( 28273), INT16_C( 32411), INT16_C( 23874), INT16_C( 29131), -INT16_C( 3043), INT16_C( 27337), INT16_C( 29852) }, { -INT32_C( 716668272), INT32_C( 1664524932), INT32_C( 1926272528), INT32_C( 1305961358) }, { INT32_C( 185744223), INT32_C( 2046358923), -INT32_C( 1545248791), INT32_C( 679971395) } }, { { -INT16_C( 9369), INT16_C( 28585), INT16_C( 28283), -INT16_C( 2368), INT16_C( 17347), INT16_C( 21628), -INT16_C( 30668), INT16_C( 18466) }, { INT16_C( 1767), INT16_C( 3364), -INT16_C( 1928), -INT16_C( 11869), INT16_C( 7596), INT16_C( 26674), INT16_C( 3158), INT16_C( 17117) }, { -INT32_C( 1704277893), -INT32_C( 237071180), -INT32_C( 1153234732), INT32_C( 1487496651) }, { -INT32_C( 1720832916), -INT32_C( 291600804), -INT32_C( 1021466920), INT32_C( 1390647107) } }, { { INT16_C( 9930), INT16_C( 17771), -INT16_C( 10328), -INT16_C( 27469), INT16_C( 10885), -INT16_C( 26304), INT16_C( 18605), -INT16_C( 6297) }, { INT16_C( 26046), INT16_C( 31576), -INT16_C( 26866), INT16_C( 13987), -INT16_C( 6073), -INT16_C( 28165), -INT16_C( 21932), -INT16_C( 29639) }, { -INT32_C( 408679807), -INT32_C( 1787828003), INT32_C( 1854138083), INT32_C( 612351834) }, { -INT32_C( 150043027), -INT32_C( 1510355955), INT32_C( 1788033478), INT32_C( 204306974) } }, { { -INT16_C( 9350), INT16_C( 30973), INT16_C( 8272), -INT16_C( 21224), INT16_C( 2660), INT16_C( 15659), -INT16_C( 360), -INT16_C( 13094) }, { -INT16_C( 9389), INT16_C( 23127), -INT16_C( 11297), -INT16_C( 17186), INT16_C( 30992), INT16_C( 19842), INT16_C( 6695), -INT16_C( 7892) }, { -INT32_C( 85896715), INT32_C( 1061410372), INT32_C( 693868402), INT32_C( 9767519) }, { INT32_C( 1890435), INT32_C( 967961588), INT32_C( 776307122), INT32_C( 7357319) } }, { { -INT16_C( 15990), INT16_C( 1548), INT16_C( 16246), -INT16_C( 3576), INT16_C( 2643), -INT16_C( 24635), INT16_C( 474), INT16_C( 1655) }, { -INT16_C( 3975), INT16_C( 27274), INT16_C( 25264), -INT16_C( 11730), -INT16_C( 18757), -INT16_C( 17389), INT16_C( 30065), INT16_C( 4209) }, { INT32_C( 1397547884), INT32_C( 502715498), -INT32_C( 841629197), INT32_C( 2070289212) }, { INT32_C( 1461108134), INT32_C( 913154442), -INT32_C( 891203948), INT32_C( 2084540022) } }, { { -INT16_C( 8372), INT16_C( 11906), INT16_C( 27485), INT16_C( 20449), -INT16_C( 28539), INT16_C( 24284), -INT16_C( 5995), -INT16_C( 7049) }, { INT16_C( 27202), -INT16_C( 22114), INT16_C( 15234), INT16_C( 9903), -INT16_C( 14535), INT16_C( 15673), INT16_C( 4373), INT16_C( 10244) }, { -INT32_C( 1336850506), -INT32_C( 1620927981), INT32_C( 2000506137), INT32_C( 1689127829) }, { -INT32_C( 1564585650), -INT32_C( 1202221491), -INT32_C( 1879646794), INT32_C( 1662911694) } }, { { INT16_C( 32083), INT16_C( 28811), INT16_C( 7303), -INT16_C( 2501), INT16_C( 17517), INT16_C( 22905), -INT16_C( 30163), -INT16_C( 2730) }, { INT16_C( 20481), INT16_C( 22141), -INT16_C( 26727), -INT16_C( 22341), INT16_C( 5311), INT16_C( 28607), -INT16_C( 31433), INT16_C( 31006) }, { INT32_C( 278545897), INT32_C( 1956847170), -INT32_C( 2011275305), -INT32_C( 882495669) }, { INT32_C( 935637820), INT32_C( 1761659889), -INT32_C( 1918242518), INT32_C( 65617910) } }, { { INT16_C( 8563), -INT16_C( 30064), INT16_C( 2489), INT16_C( 21748), INT16_C( 3518), -INT16_C( 20504), -INT16_C( 188), INT16_C( 2511) }, { -INT16_C( 5558), -INT16_C( 3757), -INT16_C( 5512), -INT16_C( 30738), INT16_C( 16830), INT16_C( 12719), -INT16_C( 22471), -INT16_C( 28613) }, { -INT32_C( 1290607786), -INT32_C( 1978427335), -INT32_C( 1656528544), -INT32_C( 37085053) }, { -INT32_C( 1338200940), -INT32_C( 1992146703), -INT32_C( 1597320604), -INT32_C( 32860505) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i c = simde_mm_loadu_epi32(test_vec[i].c); simde__m128i r = simde_mm_maccd_epi16(a, b, c); simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i b = simde_test_x86_random_i16x8(); simde__m128i c = simde_test_x86_random_i32x4(); simde__m128i r = simde_mm_maccd_epi16(a, b, c); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_macclo_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int32_t a[4]; const int32_t b[4]; const int64_t c[2]; const int64_t r[2]; } test_vec[] = { { { INT32_C( 2067698623), INT32_C( 421800180), -INT32_C( 1095495060), INT32_C( 786275517) }, { INT32_C( 948551152), INT32_C( 841149922), -INT32_C( 1571473173), INT32_C( 1041666494) }, { INT64_C( 1247074322076690576), -INT64_C( 8406614527600821977) }, { INT64_C( 3208392232912154272), -INT64_C( 6685073429656796597) } }, { { -INT32_C( 2088773494), INT32_C( 858304636), INT32_C( 665369000), -INT32_C( 1279526469) }, { INT32_C( 734827013), INT32_C( 1450416090), INT32_C( 557588024), -INT32_C( 1851091031) }, { INT64_C( 8587953065812515947), INT64_C( 4882466335759732191) }, { INT64_C( 7053065878382922525), INT64_C( 5253468121700588191) } }, { { INT32_C( 561920358), -INT32_C( 761587244), -INT32_C( 942757145), INT32_C( 652537250) }, { INT32_C( 134639837), -INT32_C( 449110366), INT32_C( 1278106584), -INT32_C( 667791074) }, { INT64_C( 7840394437456004346), INT64_C( 3750478494743971195) }, { INT64_C( 7916051302864105992), INT64_C( 2545534380606428515) } }, { { INT32_C( 312949798), INT32_C( 1700207295), -INT32_C( 1099772710), INT32_C( 1494454292) }, { INT32_C( 1331862538), INT32_C( 202767039), INT32_C( 70751806), -INT32_C( 1574398336) }, { -INT64_C( 2339017557766903409), -INT64_C( 5949986363266398162) }, { -INT64_C( 1922211445536036085), -INT64_C( 6027797268688412422) } }, { { INT32_C( 1470343646), -INT32_C( 1703433921), INT32_C( 1797146230), INT32_C( 1480800245) }, { -INT32_C( 1102340256), INT32_C( 1004397042), INT32_C( 1721939485), -INT32_C( 1102480016) }, { -INT64_C( 737231642832194), INT64_C( 4495415384873303148) }, { -INT64_C( 1621556222782445570), INT64_C( 7589992438629194698) } }, { { INT32_C( 794789345), INT32_C( 1633156761), INT32_C( 1839103981), INT32_C( 1940814439) }, { INT32_C( 1416445938), INT32_C( 565103423), -INT32_C( 738598552), INT32_C( 1016028627) }, { INT64_C( 4179656736626033754), -INT64_C( 5429924323381802266) }, { INT64_C( 5305432875916964364), -INT64_C( 6788283860725837778) } }, { { INT32_C( 402987457), -INT32_C( 376271026), -INT32_C( 1543692879), INT32_C( 986788301) }, { INT32_C( 1353810706), -INT32_C( 1394479383), -INT32_C( 402030510), INT32_C( 1765133771) }, { INT64_C( 4589542346372122741), INT64_C( 2319653070256396177) }, { INT64_C( 5135111080042437383), INT64_C( 2940264705684134467) } }, { { -INT32_C( 788468910), -INT32_C( 1610476766), INT32_C( 746981495), INT32_C( 2053534154) }, { INT32_C( 860140372), INT32_C( 202687090), -INT32_C( 2049296306), -INT32_C( 692738884) }, { -INT64_C( 42443226956880798), -INT64_C( 3440577092560565345) }, { -INT64_C( 720637168514715318), -INT64_C( 4971363510914422815) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi32(test_vec[i].b); simde__m128i c = simde_mm_loadu_epi64(test_vec[i].c); simde__m128i r = simde_mm_macclo_epi32(a, b, c); simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i32x4(); simde__m128i b = simde_test_x86_random_i32x4(); simde__m128i c = simde_test_x86_random_i64x2(); simde__m128i r = simde_mm_macclo_epi32(a, b, c); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_macchi_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int32_t a[4]; const int32_t b[4]; const int64_t c[2]; const int64_t r[2]; } test_vec[] = { { { INT32_C( 1526449063), INT32_C( 918716952), INT32_C( 1724081969), INT32_C( 1147638338) }, { -INT32_C( 1181723254), -INT32_C( 1356792161), -INT32_C( 910495728), INT32_C( 1372387656) }, { -INT64_C( 6358172095904620619), INT64_C( 6003163645105245314) }, { -INT64_C( 7604680054556033891), INT64_C( 7578168333728801042) } }, { { -INT32_C( 236804334), -INT32_C( 1708256397), -INT32_C( 226385902), -INT32_C( 435082153) }, { -INT32_C( 482266936), INT32_C( 1159763825), INT32_C( 322097545), -INT32_C( 378264750) }, { -INT64_C( 6441521447753334463), INT64_C( 8653630751799968751) }, { -INT64_C( 8422695420818772988), INT64_C( 8818206993633975501) } }, { { INT32_C( 366992934), -INT32_C( 85967691), -INT32_C( 1379378034), -INT32_C( 270658452) }, { -INT32_C( 280999072), -INT32_C( 1116747988), INT32_C( 17711341), -INT32_C( 303580674) }, { -INT64_C( 5977451265524821992), INT64_C( 5662433887479092696) }, { -INT64_C( 5881447019567566284), INT64_C( 5744600562761049344) } }, { { -INT32_C( 2040664137), -INT32_C( 961301086), -INT32_C( 1192853418), INT32_C( 797467429) }, { INT32_C( 1572531942), INT32_C( 1185547091), INT32_C( 1952048560), -INT32_C( 324889938) }, { -INT64_C( 6237606939636247666), INT64_C( 2594782091165952624) }, { -INT64_C( 7377274645718688492), INT64_C( 2335692947601123222) } }, { { INT32_C( 1107796440), -INT32_C( 1396476866), INT32_C( 1885615198), -INT32_C( 1291030940) }, { -INT32_C( 350642387), INT32_C( 1319699113), -INT32_C( 1990350972), -INT32_C( 1235303163) }, { -INT64_C( 3935636701793332938), -INT64_C( 8441084314015536854) }, { -INT64_C( 5778565983178552796), -INT64_C( 6846269710302673634) } }, { { -INT32_C( 1983316773), -INT32_C( 1653886518), INT32_C( 1963908825), -INT32_C( 861680051) }, { -INT32_C( 1744296633), -INT32_C( 2071348175), -INT32_C( 1582846885), INT32_C( 1477945830) }, { INT64_C( 9132250782254404322), INT64_C( 5609591985390663865) }, { -INT64_C( 5888718470738742644), INT64_C( 4336075547221026535) } }, { { -INT32_C( 657701292), -INT32_C( 985275703), -INT32_C( 1494341829), INT32_C( 130635842) }, { -INT32_C( 1535997816), -INT32_C( 643471571), INT32_C( 1509907818), INT32_C( 587847029) }, { INT64_C( 8402849216899243173), INT64_C( 9149610631410482960) }, { INT64_C( 9036846121376782586), -INT64_C( 9220339550698455238) } }, { { -INT32_C( 243464682), -INT32_C( 1775452416), INT32_C( 575796277), INT32_C( 893002982) }, { -INT32_C( 1920790910), INT32_C( 2056747809), -INT32_C( 1854709981), -INT32_C( 1515380716) }, { -INT64_C( 7107536778999548205), INT64_C( 7743054767455380834) }, { INT64_C( 7687549428118246867), INT64_C( 6389815269202085722) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi32(test_vec[i].b); simde__m128i c = simde_mm_loadu_epi64(test_vec[i].c); simde__m128i r = simde_mm_macchi_epi32(a, b, c); simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i32x4(); simde__m128i b = simde_test_x86_random_i32x4(); simde__m128i c = simde_test_x86_random_i64x2(); simde__m128i r = simde_mm_macchi_epi32(a, b, c); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_maccs_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[8]; const int16_t b[8]; const int16_t c[8]; const int16_t r[8]; } test_vec[] = { { { -INT16_C( 9819), -INT16_C( 26952), -INT16_C( 32069), -INT16_C( 11717), INT16_C( 17910), -INT16_C( 26410), -INT16_C( 26593), -INT16_C( 19019) }, { INT16_C( 17890), -INT16_C( 8183), INT16_C( 28196), -INT16_C( 6656), -INT16_C( 12394), INT16_C( 1268), INT16_C( 17546), -INT16_C( 27421) }, { -INT16_C( 29946), -INT16_C( 4833), INT16_C( 18967), -INT16_C( 31674), INT16_C( 21608), INT16_C( 22389), INT16_C( 10259), -INT16_C( 29767) }, { INT16_MIN, INT16_MAX, INT16_MIN, INT16_MAX, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MAX } }, { { -INT16_C( 17481), INT16_C( 21446), INT16_C( 30667), INT16_C( 5612), -INT16_C( 31950), -INT16_C( 14266), INT16_C( 17679), -INT16_C( 24267) }, { INT16_C( 9690), -INT16_C( 4596), -INT16_C( 27468), INT16_C( 16909), INT16_C( 19959), -INT16_C( 11627), -INT16_C( 9454), INT16_C( 8083) }, { INT16_C( 23700), -INT16_C( 10946), -INT16_C( 31610), -INT16_C( 3320), -INT16_C( 6658), INT16_C( 11725), INT16_C( 28145), -INT16_C( 28847) }, { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MAX, INT16_MIN, INT16_MAX, INT16_MIN, INT16_MIN } }, { { INT16_C( 12361), INT16_C( 25321), INT16_C( 26423), INT16_C( 21368), -INT16_C( 17973), -INT16_C( 11619), -INT16_C( 17726), -INT16_C( 18133) }, { -INT16_C( 19809), INT16_C( 11994), -INT16_C( 24579), -INT16_C( 13764), INT16_C( 32035), -INT16_C( 1038), -INT16_C( 15143), -INT16_C( 23009) }, { INT16_C( 13178), INT16_C( 4381), -INT16_C( 13237), INT16_C( 367), INT16_C( 8203), INT16_C( 8183), -INT16_C( 13109), -INT16_C( 24471) }, { INT16_MIN, INT16_MAX, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MAX, INT16_MAX, INT16_MAX } }, { { INT16_C( 17408), INT16_C( 9472), -INT16_C( 12329), -INT16_C( 8080), -INT16_C( 11320), -INT16_C( 1765), INT16_C( 21358), INT16_C( 12423) }, { -INT16_C( 18794), -INT16_C( 15600), -INT16_C( 13674), -INT16_C( 27942), INT16_C( 2272), -INT16_C( 32502), INT16_C( 6870), INT16_C( 7734) }, { -INT16_C( 25888), -INT16_C( 31719), INT16_C( 7612), INT16_C( 32652), INT16_C( 28694), -INT16_C( 3918), INT16_C( 8537), INT16_C( 29554) }, { INT16_MIN, INT16_MIN, INT16_MAX, INT16_MAX, INT16_MIN, INT16_MAX, INT16_MAX, INT16_MAX } }, { { INT16_C( 16836), INT16_C( 16426), INT16_C( 27072), INT16_C( 20385), -INT16_C( 1167), -INT16_C( 16322), -INT16_C( 21877), INT16_C( 31096) }, { INT16_C( 15719), INT16_C( 4494), -INT16_C( 28842), -INT16_C( 4744), -INT16_C( 9419), -INT16_C( 22499), INT16_C( 13889), -INT16_C( 18234) }, { INT16_C( 23853), INT16_C( 20435), -INT16_C( 20110), -INT16_C( 32595), -INT16_C( 32537), -INT16_C( 23557), -INT16_C( 31087), -INT16_C( 1511) }, { INT16_MAX, INT16_MAX, INT16_MIN, INT16_MIN, INT16_MAX, INT16_MAX, INT16_MIN, INT16_MIN } }, { { -INT16_C( 19268), -INT16_C( 27195), INT16_C( 12362), INT16_C( 29212), -INT16_C( 25524), -INT16_C( 5690), -INT16_C( 25649), INT16_C( 18285) }, { -INT16_C( 28168), -INT16_C( 17805), -INT16_C( 21676), INT16_C( 28132), INT16_C( 8297), -INT16_C( 3159), -INT16_C( 18799), -INT16_C( 5633) }, { -INT16_C( 30968), -INT16_C( 10456), INT16_C( 1347), INT16_C( 22624), INT16_C( 15491), INT16_C( 31507), INT16_C( 22443), INT16_C( 26701) }, { INT16_MAX, INT16_MAX, INT16_MIN, INT16_MAX, INT16_MIN, INT16_MAX, INT16_MAX, INT16_MIN } }, { { INT16_C( 26190), INT16_C( 18290), INT16_C( 24077), INT16_C( 23855), INT16_C( 24864), INT16_C( 30896), INT16_C( 16434), -INT16_C( 29164) }, { INT16_C( 16242), -INT16_C( 24290), INT16_C( 6375), -INT16_C( 6867), INT16_C( 16899), INT16_C( 9582), INT16_C( 29308), INT16_C( 25424) }, { INT16_C( 25431), -INT16_C( 16584), -INT16_C( 11450), -INT16_C( 25993), -INT16_C( 12749), -INT16_C( 1415), INT16_C( 11805), INT16_C( 12350) }, { INT16_MAX, INT16_MIN, INT16_MAX, INT16_MIN, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MIN } }, { { INT16_C( 25379), -INT16_C( 18162), INT16_C( 28894), INT16_C( 25449), INT16_C( 14068), -INT16_C( 20418), -INT16_C( 2582), INT16_C( 32605) }, { INT16_C( 4154), -INT16_C( 5843), INT16_C( 4262), INT16_C( 27298), -INT16_C( 4917), INT16_C( 16746), -INT16_C( 30471), INT16_C( 6503) }, { INT16_C( 31554), -INT16_C( 6046), INT16_C( 5585), INT16_C( 6398), -INT16_C( 24194), -INT16_C( 6678), -INT16_C( 6241), INT16_C( 1627) }, { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MIN, INT16_MIN, INT16_MAX, INT16_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i c = simde_mm_loadu_epi16(test_vec[i].c); simde__m128i r = simde_mm_maccs_epi16(a, b, c); simde_test_x86_assert_equal_i16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i b = simde_test_x86_random_i16x8(); simde__m128i c = simde_test_x86_random_i16x8(); simde__m128i r = simde_mm_maccs_epi16(a, b, c); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_maccs_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int32_t a[4]; const int32_t b[4]; const int32_t c[4]; const int32_t r[4]; } test_vec[] = { { { -INT32_C( 1883572960), INT32_C( 379133910), INT32_C( 351028496), -INT32_C( 1888070545) }, { -INT32_C( 159313928), INT32_C( 1305612392), -INT32_C( 1982068281), INT32_C( 1043615808) }, { INT32_C( 2001050416), -INT32_C( 448460675), INT32_C( 1030250537), -INT32_C( 615277529) }, { INT32_MAX, INT32_MAX, INT32_MIN, INT32_MIN } }, { { -INT32_C( 1412192147), INT32_C( 1238329418), INT32_C( 1736178170), INT32_C( 1892628856) }, { INT32_C( 1782090131), INT32_C( 562942276), -INT32_C( 2109422273), INT32_C( 1172727752) }, { -INT32_C( 809447992), INT32_C( 1457268000), INT32_C( 1177548349), -INT32_C( 485185044) }, { INT32_MIN, INT32_MAX, INT32_MIN, INT32_MAX } }, { { INT32_C( 821643120), INT32_C( 274236627), -INT32_C( 1393885831), INT32_C( 337100539) }, { INT32_C( 699402034), -INT32_C( 1159704431), -INT32_C( 312440136), -INT32_C( 504649657) }, { -INT32_C( 1846386960), -INT32_C( 57661231), -INT32_C( 2084532480), -INT32_C( 1831950233) }, { INT32_MAX, INT32_MIN, INT32_MAX, INT32_MIN } }, { { -INT32_C( 2113151534), -INT32_C( 1094324410), INT32_C( 628505749), -INT32_C( 1338229201) }, { INT32_C( 1435063611), INT32_C( 723306214), -INT32_C( 835714053), INT32_C( 99823029) }, { -INT32_C( 1573267503), -INT32_C( 1435365914), -INT32_C( 1227254535), -INT32_C( 1712360372) }, { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN } }, { { INT32_C( 1126741112), -INT32_C( 421239365), INT32_C( 1470161555), -INT32_C( 1234283124) }, { INT32_C( 1369603414), -INT32_C( 959467750), INT32_C( 1760697102), -INT32_C( 454274744) }, { INT32_C( 624297168), INT32_C( 1959792120), -INT32_C( 461720848), -INT32_C( 316200555) }, { INT32_MAX, INT32_MAX, INT32_MAX, INT32_MAX } }, { { INT32_C( 1471109258), INT32_C( 1506074506), INT32_C( 52910332), -INT32_C( 652485176) }, { -INT32_C( 1055143062), INT32_C( 533251653), INT32_C( 1042677305), -INT32_C( 234442633) }, { INT32_C( 2076635028), -INT32_C( 1388731171), INT32_C( 1323422955), -INT32_C( 1067114121) }, { INT32_MIN, INT32_MAX, INT32_MAX, INT32_MAX } }, { { -INT32_C( 549546897), INT32_C( 766810440), INT32_C( 755583895), INT32_C( 217379547) }, { -INT32_C( 2041460067), INT32_C( 135661500), INT32_C( 310973698), -INT32_C( 525327110) }, { INT32_C( 1225384966), -INT32_C( 411267668), INT32_C( 2022581041), -INT32_C( 2066220951) }, { INT32_MAX, INT32_MAX, INT32_MAX, INT32_MIN } }, { { INT32_C( 1102475981), -INT32_C( 1287450930), INT32_C( 394634601), -INT32_C( 2098794500) }, { -INT32_C( 958184104), -INT32_C( 1794594025), -INT32_C( 384037584), -INT32_C( 1566979640) }, { INT32_C( 1902079563), -INT32_C( 156712004), INT32_C( 624888905), -INT32_C( 286423005) }, { INT32_MIN, INT32_MAX, INT32_MIN, INT32_MAX } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi32(test_vec[i].b); simde__m128i c = simde_mm_loadu_epi32(test_vec[i].c); simde__m128i r = simde_mm_maccs_epi32(a, b, c); simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i32x4(); simde__m128i b = simde_test_x86_random_i32x4(); simde__m128i c = simde_test_x86_random_i32x4(); simde__m128i r = simde_mm_maccs_epi32(a, b, c); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_maccsd_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[8]; const int16_t b[8]; const int32_t c[4]; const int32_t r[4]; } test_vec[] = { { { INT16_C( 1075), -INT16_C( 11898), INT16_C( 15985), -INT16_C( 17092), INT16_C( 28092), INT16_C( 18624), -INT16_C( 30180), INT16_C( 25839) }, { INT16_C( 15881), INT16_C( 4772), -INT16_C( 20528), -INT16_C( 10694), -INT16_C( 26055), INT16_C( 26700), -INT16_C( 23961), -INT16_C( 21057) }, { -INT32_C( 452728833), INT32_C( 647355277), INT32_C( 164188451), -INT32_C( 1547249853) }, { -INT32_C( 435656758), INT32_C( 319215197), -INT32_C( 567748609), -INT32_C( 824106873) } }, { { INT16_C( 18174), -INT16_C( 18478), -INT16_C( 26786), INT16_C( 10704), -INT16_C( 2046), -INT16_C( 18938), INT16_C( 7944), -INT16_C( 7388) }, { -INT16_C( 8400), INT16_C( 13186), -INT16_C( 1731), -INT16_C( 25174), INT16_C( 13471), INT16_C( 15638), -INT16_C( 20400), INT16_C( 19003) }, { INT32_C( 1220966396), INT32_C( 2133153124), -INT32_C( 1421219324), -INT32_C( 173343245) }, { INT32_C( 1068304796), INT32_MAX, -INT32_C( 1448780990), -INT32_C( 335400845) } }, { { INT16_C( 4876), INT16_C( 8844), INT16_C( 16459), INT16_C( 2945), INT16_C( 15993), INT16_C( 19760), -INT16_C( 17396), INT16_C( 16724) }, { -INT16_C( 1975), INT16_C( 12040), -INT16_C( 28501), -INT16_C( 16831), -INT16_C( 27000), -INT16_C( 20580), -INT16_C( 7438), -INT16_C( 13478) }, { INT32_C( 447261916), INT32_C( 1445223037), -INT32_C( 1616996614), INT32_C( 1880859711) }, { INT32_C( 437631816), INT32_C( 976125078), -INT32_C( 2048807614), INT32_C( 2010251159) } }, { { -INT16_C( 3539), -INT16_C( 11744), -INT16_C( 3589), -INT16_C( 12243), INT16_C( 9430), INT16_C( 10887), INT16_C( 30667), INT16_C( 20026) }, { -INT16_C( 11801), -INT16_C( 31905), -INT16_C( 4963), INT16_C( 26779), INT16_C( 25959), -INT16_C( 25112), INT16_C( 4016), -INT16_C( 16232) }, { -INT32_C( 1684566542), -INT32_C( 1678761956), -INT32_C( 2070758090), INT32_C( 1683317575) }, { -INT32_C( 1642802803), -INT32_C( 1660949749), -INT32_C( 1825964720), INT32_C( 1806476247) } }, { { -INT16_C( 21641), -INT16_C( 14792), -INT16_C( 24207), INT16_C( 10220), -INT16_C( 11765), -INT16_C( 20841), -INT16_C( 22999), INT16_C( 6098) }, { -INT16_C( 3874), -INT16_C( 3597), -INT16_C( 15147), INT16_C( 2450), -INT16_C( 30224), INT16_C( 9798), INT16_C( 20014), -INT16_C( 2384) }, { INT32_C( 1237017298), INT32_C( 2099322562), INT32_C( 959579693), INT32_C( 1607537008) }, { INT32_C( 1320854532), INT32_MAX, INT32_C( 1315165053), INT32_C( 1147235022) } }, { { INT16_C( 17467), INT16_C( 15295), -INT16_C( 31249), INT16_C( 284), -INT16_C( 20661), INT16_C( 30250), -INT16_C( 7608), -INT16_C( 4779) }, { INT16_C( 7489), INT16_C( 30064), INT16_C( 3671), INT16_C( 20802), INT16_C( 10260), -INT16_C( 22463), -INT16_C( 2225), INT16_C( 32413) }, { -INT32_C( 478105521), INT32_C( 1788153139), -INT32_C( 574138480), INT32_C( 793494621) }, { -INT32_C( 347295158), INT32_C( 1673438060), -INT32_C( 786120340), INT32_C( 810422421) } }, { { INT16_C( 1036), -INT16_C( 19744), INT16_C( 6138), -INT16_C( 30123), INT16_C( 24841), INT16_C( 25164), INT16_C( 139), INT16_C( 24127) }, { INT16_C( 23907), INT16_C( 20672), INT16_C( 358), INT16_C( 11781), -INT16_C( 21752), -INT16_C( 15197), -INT16_C( 24780), -INT16_C( 22886) }, { INT32_C( 1771192443), INT32_C( 308601201), -INT32_C( 807610028), -INT32_C( 440478225) }, { INT32_C( 1795960095), INT32_C( 310798605), -INT32_C( 1347951460), -INT32_C( 443922645) } }, { { INT16_C( 29630), -INT16_C( 5626), INT16_C( 3155), INT16_C( 13428), -INT16_C( 12040), -INT16_C( 28344), INT16_C( 6550), INT16_C( 14411) }, { -INT16_C( 1578), INT16_C( 14), INT16_C( 5509), -INT16_C( 12170), -INT16_C( 18627), INT16_C( 22392), INT16_C( 7746), -INT16_C( 734) }, { INT32_C( 433789354), INT32_C( 1693177025), -INT32_C( 1371606613), -INT32_C( 815474101) }, { INT32_C( 387033214), INT32_C( 1710557920), -INT32_C( 1147337533), -INT32_C( 764737801) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i c = simde_mm_loadu_epi32(test_vec[i].c); simde__m128i r = simde_mm_maccsd_epi16(a, b, c); simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i b = simde_test_x86_random_i16x8(); simde__m128i c = simde_test_x86_random_i32x4(); simde__m128i r = simde_mm_maccsd_epi16(a, b, c); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_maccslo_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int32_t a[4]; const int32_t b[4]; const int64_t c[2]; const int64_t r[2]; } test_vec[] = { { { -INT32_C( 505588124), -INT32_C( 1399563266), INT32_C( 1655251980), -INT32_C( 1921809845) }, { INT32_C( 1170634605), -INT32_C( 1502543497), -INT32_C( 2129972120), INT32_C( 1391495963) }, { -INT64_C( 8159015487569231762), INT64_C( 8987683831078602504) }, { -INT64_C( 8750874441400662782), INT64_C( 5462043262103804904) } }, { { -INT32_C( 699419056), -INT32_C( 502318018), INT32_C( 1949942134), -INT32_C( 1443572530) }, { INT32_C( 496082748), INT32_C( 1578220351), -INT32_C( 1578677894), -INT32_C( 1258340702) }, { INT64_C( 57471327644090011), -INT64_C( 4865475103685557986) }, { -INT64_C( 289498399659955877), -INT64_C( 7943805645210543782) } }, { { INT32_C( 1233639958), INT32_C( 75507094), -INT32_C( 1526443606), INT32_C( 474539392) }, { -INT32_C( 950241642), -INT32_C( 391087329), INT32_C( 1456889000), INT32_C( 1938410747) }, { INT64_C( 7573719157811665220), -INT64_C( 7160926586083571488) }, { INT64_C( 6401463098484934184), INT64_MIN } }, { { -INT32_C( 118470775), -INT32_C( 2084319543), INT32_C( 374716764), -INT32_C( 1238709498) }, { -INT32_C( 1018084083), INT32_C( 1944696731), INT32_C( 2145654116), INT32_C( 470486408) }, { -INT64_C( 5256034315837204293), INT64_C( 7882427535233122689) }, { -INT64_C( 5135421105509029968), INT64_C( 8686440102243923313) } }, { { -INT32_C( 499792197), INT32_C( 200421081), INT32_C( 615915901), -INT32_C( 2070552765) }, { -INT32_C( 807693707), INT32_C( 1838688118), INT32_C( 995908449), -INT32_C( 2143348243) }, { INT64_C( 1891318278482297748), -INT64_C( 4880947430363376523) }, { INT64_C( 2294997290806902027), -INT64_C( 4267551580684028974) } }, { { INT32_C( 1195531775), -INT32_C( 1905689846), INT32_C( 1895100482), -INT32_C( 675039400) }, { -INT32_C( 341432863), -INT32_C( 2032680525), -INT32_C( 389524078), -INT32_C( 1390459636) }, { INT64_C( 38972361394823329), INT64_C( 6555416003921373293) }, { -INT64_C( 369221475350898496), INT64_C( 5817228735952967697) } }, { { -INT32_C( 1522445335), INT32_C( 969092319), -INT32_C( 686306019), INT32_C( 1026632362) }, { INT32_C( 1436222627), -INT32_C( 1392329579), INT32_C( 670329386), -INT32_C( 219665912) }, { INT64_C( 1804037175386289910), INT64_C( 6161128594745287517) }, { -INT64_C( 382533263111305135), INT64_C( 5701077502420913183) } }, { { -INT32_C( 1130549172), INT32_C( 2077882651), INT32_C( 2037227201), -INT32_C( 2053615908) }, { -INT32_C( 1914210738), INT32_C( 280203678), -INT32_C( 706002788), -INT32_C( 569074364) }, { INT64_C( 6059826960292236517), -INT64_C( 282992977157640839) }, { INT64_C( 8223936325171645453), -INT64_C( 1721281060853077227) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi32(test_vec[i].b); simde__m128i c = simde_mm_loadu_epi64(test_vec[i].c); simde__m128i r = simde_mm_maccslo_epi32(a, b, c); simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i32x4(); simde__m128i b = simde_test_x86_random_i32x4(); simde__m128i c = simde_test_x86_random_i64x2(); simde__m128i r = simde_mm_maccslo_epi32(a, b, c); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_maccshi_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int32_t a[4]; const int32_t b[4]; const int64_t c[2]; const int64_t r[2]; } test_vec[] = { { { -INT32_C( 1979598278), INT32_C( 97049025), -INT32_C( 1249932512), -INT32_C( 1091207985) }, { INT32_C( 1358168758), INT32_C( 533132435), INT32_C( 335003547), INT32_C( 1079883364) }, { -INT64_C( 2354895035724027390), -INT64_C( 2365235082378056397) }, { -INT64_C( 2303155052711401515), -INT64_C( 3543612432043517937) } }, { { INT32_C( 1314519047), -INT32_C( 991231468), INT32_C( 706345839), INT32_C( 947997352) }, { -INT32_C( 1603440658), -INT32_C( 2007638411), INT32_C( 1051004443), INT32_C( 666881354) }, { INT64_C( 3067133650308224544), -INT64_C( 3780774117716555203) }, { INT64_C( 5057168019656941892), -INT64_C( 3148572360026380595) } }, { { -INT32_C( 1999752890), -INT32_C( 404951914), -INT32_C( 1220111072), -INT32_C( 2135322678) }, { -INT32_C( 1187134136), INT32_C( 1002145416), -INT32_C( 155347891), -INT32_C( 478775526) }, { -INT64_C( 1950187710052165807), -INT64_C( 3377721160132514932) }, { -INT64_C( 2356008414367692031), -INT64_C( 2355380921793336304) } }, { { -INT32_C( 154276407), -INT32_C( 562606837), INT32_C( 2068857062), INT32_C( 1733714390) }, { INT32_C( 479218264), INT32_C( 1737777743), INT32_C( 441317285), INT32_C( 50040117) }, { -INT64_C( 303534344943995415), INT64_C( 4481191540110374482) }, { -INT64_C( 1281219984342224306), INT64_C( 4567946811030558112) } }, { { -INT32_C( 1744768349), INT32_C( 1490361462), -INT32_C( 725409099), -INT32_C( 95151695) }, { -INT32_C( 2016377359), INT32_C( 2075863692), -INT32_C( 912384041), INT32_C( 1376461888) }, { -INT64_C( 6937125781946813062), -INT64_C( 6686937538711071998) }, { -INT64_C( 3843338535024975358), -INT64_C( 6817910220457172158) } }, { { -INT32_C( 1403828439), INT32_C( 1146392858), INT32_C( 1667933118), -INT32_C( 1964028546) }, { -INT32_C( 89637339), INT32_C( 625660996), INT32_C( 1648112597), -INT32_C( 459303650) }, { INT64_C( 4583677685977113048), -INT64_C( 3538556661785532081) }, { INT64_C( 5300930983320679616), -INT64_C( 2636471181903539181) } }, { { -INT32_C( 1309723669), -INT32_C( 796056710), INT32_C( 89090677), -INT32_C( 1516186466) }, { -INT32_C( 1291617465), INT32_C( 1419548856), -INT32_C( 2081252655), INT32_C( 116618737) }, { -INT64_C( 8535461188367467242), -INT64_C( 3585254504269373652) }, { INT64_MIN, -INT64_C( 3762070254990787094) } }, { { INT32_C( 1732329151), INT32_C( 1817938008), -INT32_C( 624992882), INT32_C( 438551990) }, { INT32_C( 839681004), INT32_C( 961408364), INT32_C( 231406144), INT32_C( 1216519452) }, { INT64_C( 7917879246931710342), -INT64_C( 901532948814373940) }, { INT64_MAX, -INT64_C( 368025922266064460) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi32(test_vec[i].b); simde__m128i c = simde_mm_loadu_epi64(test_vec[i].c); simde__m128i r = simde_mm_maccshi_epi32(a, b, c); simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i32x4(); simde__m128i b = simde_test_x86_random_i32x4(); simde__m128i c = simde_test_x86_random_i64x2(); simde__m128i r = simde_mm_maccshi_epi32(a, b, c); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_maddd_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[8]; const int16_t b[8]; const int32_t c[4]; const int32_t r[4]; } test_vec[] = { { { INT16_C( 10359), INT16_C( 17786), -INT16_C( 2101), -INT16_C( 26389), INT16_C( 18210), INT16_C( 6206), -INT16_C( 6276), -INT16_C( 21611) }, { INT16_C( 31037), -INT16_C( 24421), INT16_C( 30595), -INT16_C( 9610), INT16_C( 29623), INT16_C( 12791), -INT16_C( 23710), INT16_C( 5396) }, { INT32_C( 1956224066), -INT32_C( 568697186), -INT32_C( 399732266), -INT32_C( 824897986) }, { INT32_C( 1843384443), -INT32_C( 379378991), INT32_C( 219083510), -INT32_C( 792706982) } }, { { -INT16_C( 24276), INT16_C( 23616), INT16_C( 3408), INT16_C( 3765), INT16_C( 20804), INT16_C( 30409), INT16_C( 10852), -INT16_C( 8494) }, { -INT16_C( 9760), INT16_C( 12190), INT16_C( 9744), INT16_C( 4453), -INT16_C( 23289), INT16_C( 21755), INT16_C( 14442), -INT16_C( 28854) }, { INT32_C( 100717955), -INT32_C( 779777391), INT32_C( 993050210), INT32_C( 1773844772) }, { INT32_C( 625530755), -INT32_C( 729804294), INT32_C( 1170093649), -INT32_C( 2119312064) } }, { { INT16_C( 26489), INT16_C( 15603), INT16_C( 22729), INT16_C( 30643), INT16_C( 3292), INT16_C( 9492), INT16_C( 28328), INT16_C( 13790) }, { -INT16_C( 24985), INT16_C( 26691), -INT16_C( 3091), INT16_C( 28109), INT16_C( 16697), INT16_C( 19092), INT16_C( 22346), -INT16_C( 588) }, { INT32_C( 1677642804), INT32_C( 1525611634), INT32_C( 584928638), INT32_C( 175683487) }, { INT32_C( 1432274812), -INT32_C( 1978266914), INT32_C( 821116426), INT32_C( 800592455) } }, { { -INT16_C( 31998), INT16_C( 18545), INT16_C( 21005), INT16_C( 10357), INT16_C( 25328), INT16_C( 26973), INT16_C( 3453), -INT16_C( 7256) }, { -INT16_C( 28103), INT16_C( 28201), INT16_C( 6323), INT16_C( 253), -INT16_C( 4076), INT16_C( 5568), INT16_C( 7160), INT16_C( 21504) }, { INT32_C( 1878133884), INT32_C( 1264889496), INT32_C( 1643358640), INT32_C( 1686037093) }, { -INT32_C( 994606073), INT32_C( 1400324432), INT32_C( 1690307376), INT32_C( 1554727549) } }, { { INT16_C( 16561), INT16_C( 9179), -INT16_C( 18893), -INT16_C( 19768), INT16_C( 32455), -INT16_C( 15324), -INT16_C( 23974), INT16_C( 23647) }, { -INT16_C( 15875), -INT16_C( 23249), INT16_C( 4151), INT16_C( 7556), -INT16_C( 24672), -INT16_C( 1601), -INT16_C( 8533), -INT16_C( 14818) }, { INT32_C( 1283153860), -INT32_C( 1221075557), -INT32_C( 110002497), INT32_C( 1786467694) }, { INT32_C( 806845414), -INT32_C( 1448867408), -INT32_C( 886198533), INT32_C( 1640636590) } }, { { INT16_C( 11180), -INT16_C( 20082), INT16_C( 32658), -INT16_C( 5956), -INT16_C( 13335), -INT16_C( 1241), INT16_C( 2551), INT16_C( 20849) }, { INT16_C( 30361), -INT16_C( 20621), -INT16_C( 26990), INT16_C( 22834), INT16_C( 30755), INT16_C( 31246), INT16_C( 15324), -INT16_C( 15043) }, { INT32_C( 1568199089), -INT32_C( 218889392), INT32_C( 764720562), INT32_C( 1331522800) }, { -INT32_C( 1973221305), -INT32_C( 1236328116), INT32_C( 315826351), INT32_C( 1056982817) } }, { { INT16_C( 3417), INT16_C( 5418), -INT16_C( 5951), -INT16_C( 8543), -INT16_C( 3045), INT16_C( 3941), INT16_C( 23627), -INT16_C( 18547) }, { INT16_C( 15157), INT16_C( 28502), -INT16_C( 23526), -INT16_C( 31468), -INT16_C( 6620), INT16_C( 22895), INT16_C( 3647), INT16_C( 1739) }, { INT32_C( 1158196523), -INT32_C( 1888691249), -INT32_C( 2099414063), -INT32_C( 2041265821) }, { INT32_C( 1364411828), -INT32_C( 1479856899), -INT32_C( 1989026968), -INT32_C( 1987351385) } }, { { -INT16_C( 3488), -INT16_C( 20338), INT16_C( 27543), -INT16_C( 6395), -INT16_C( 6809), INT16_C( 17694), -INT16_C( 2164), -INT16_C( 16476) }, { -INT16_C( 9160), INT16_C( 2421), INT16_C( 30565), -INT16_C( 19333), -INT16_C( 27542), -INT16_C( 25890), INT16_C( 29133), INT16_C( 31608) }, { -INT32_C( 393577127), INT32_C( 1621974896), -INT32_C( 1173669214), -INT32_C( 1026674562) }, { -INT32_C( 410865345), -INT32_C( 1707506070), -INT32_C( 1444233396), -INT32_C( 1610491782) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i c = simde_mm_loadu_epi32(test_vec[i].c); simde__m128i r = simde_mm_maddd_epi16(a, b, c); simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i b = simde_test_x86_random_i16x8(); simde__m128i c = simde_test_x86_random_i32x4(); simde__m128i r = simde_mm_maddd_epi16(a, b, c); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_maddsd_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[8]; const int16_t b[8]; const int32_t c[4]; const int32_t r[4]; } test_vec[] = { { { INT16_C( 9639), INT16_C( 10201), -INT16_C( 15061), -INT16_C( 27210), -INT16_C( 14319), INT16_C( 7888), INT16_C( 29745), -INT16_C( 8477) }, { INT16_C( 25673), -INT16_C( 8272), -INT16_C( 30388), INT16_C( 15093), INT16_C( 28413), INT16_C( 32668), -INT16_C( 16195), INT16_C( 22323) }, { INT32_C( 1755057569), INT32_C( 2030488776), -INT32_C( 685990164), -INT32_C( 142879946) }, { INT32_C( 1918136944), INT32_C( 2077481914), -INT32_C( 835150727), -INT32_C( 813832292) } }, { { INT16_C( 12375), INT16_C( 23658), -INT16_C( 4140), -INT16_C( 17467), INT16_C( 2209), INT16_C( 24123), -INT16_C( 20494), -INT16_C( 14662) }, { INT16_C( 7758), -INT16_C( 27610), -INT16_C( 27419), -INT16_C( 21935), -INT16_C( 25053), -INT16_C( 29400), -INT16_C( 27259), INT16_C( 3370) }, { INT32_C( 689924268), INT32_C( 755407792), INT32_C( 491753654), INT32_C( 1447691973) }, { INT32_C( 132732138), INT32_C( 1252061097), -INT32_C( 272804623), INT32_C( 1956926979) } }, { { -INT16_C( 8235), INT16_C( 29664), INT16_C( 9769), INT16_C( 28289), INT16_C( 20703), INT16_C( 1885), -INT16_C( 15674), INT16_C( 27351) }, { -INT16_C( 27537), INT16_C( 3126), -INT16_C( 27257), -INT16_C( 10594), -INT16_C( 28829), -INT16_C( 31424), -INT16_C( 13091), INT16_C( 20941) }, { -INT32_C( 869214113), INT32_C( 243042621), INT32_C( 253967688), INT32_C( 1416258464) }, { -INT32_C( 549717254), -INT32_C( 322924678), -INT32_C( 402113339), INT32_MAX } }, { { INT16_C( 15817), -INT16_C( 12775), -INT16_C( 7423), -INT16_C( 137), -INT16_C( 29487), INT16_C( 24181), INT16_C( 2531), -INT16_C( 726) }, { -INT16_C( 28141), INT16_C( 27007), -INT16_C( 14904), -INT16_C( 11988), -INT16_C( 4987), INT16_C( 27619), INT16_C( 32699), INT16_C( 5322) }, { INT32_C( 925887202), -INT32_C( 277311545), INT32_C( 1884819240), -INT32_C( 1555446402) }, { INT32_C( 135766580), -INT32_C( 165036797), INT32_MAX, -INT32_C( 1476549005) } }, { { -INT16_C( 27622), INT16_C( 4150), -INT16_C( 7309), INT16_C( 375), -INT16_C( 6721), -INT16_C( 7165), INT16_C( 7872), -INT16_C( 3359) }, { INT16_C( 9185), INT16_C( 3808), -INT16_C( 24191), -INT16_C( 3960), -INT16_C( 24433), -INT16_C( 32176), INT16_C( 2391), -INT16_C( 30190) }, { -INT32_C( 1967429989), -INT32_C( 465048858), INT32_C( 1122873629), INT32_C( 899041367) }, { INT32_MIN, -INT32_C( 289721839), INT32_C( 1517628862), INT32_C( 1019271529) } }, { { INT16_C( 28910), INT16_C( 7572), INT16_C( 8151), INT16_C( 18577), -INT16_C( 14290), INT16_C( 24261), -INT16_C( 9195), -INT16_C( 915) }, { -INT16_C( 28225), -INT16_C( 24712), -INT16_C( 7480), -INT16_C( 15229), -INT16_C( 10296), INT16_C( 19719), INT16_C( 1575), INT16_C( 10193) }, { INT32_C( 716397107), INT32_C( 1081742191), -INT32_C( 903934418), INT32_C( 1061308257) }, { -INT32_C( 286706907), INT32_C( 737863578), -INT32_C( 278401919), INT32_C( 1037499537) } }, { { -INT16_C( 25426), INT16_C( 5843), -INT16_C( 11070), -INT16_C( 6381), -INT16_C( 8217), -INT16_C( 12869), INT16_C( 24537), INT16_C( 3706) }, { INT16_C( 26837), -INT16_C( 347), -INT16_C( 31500), INT16_C( 8235), -INT16_C( 1097), -INT16_C( 28729), INT16_C( 20963), -INT16_C( 25479) }, { -INT32_C( 1187639409), -INT32_C( 1763719301), -INT32_C( 1989127517), INT32_C( 863656980) }, { -INT32_C( 1872024492), -INT32_C( 1467561836), -INT32_C( 1610399967), INT32_C( 1283600937) } }, { { INT16_C( 9215), INT16_C( 25043), INT16_C( 31500), INT16_C( 12427), INT16_C( 5872), INT16_C( 29987), INT16_C( 578), INT16_C( 23546) }, { INT16_C( 29578), INT16_C( 19976), -INT16_C( 15460), INT16_C( 6350), -INT16_C( 18652), INT16_C( 19085), INT16_C( 1153), -INT16_C( 8521) }, { INT32_C( 446894551), -INT32_C( 1199243425), INT32_C( 1134356226), -INT32_C( 995270105) }, { INT32_C( 1219714789), -INT32_C( 1607321975), INT32_C( 1597133577), -INT32_C( 1195239137) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i c = simde_mm_loadu_epi32(test_vec[i].c); simde__m128i r = simde_mm_maddsd_epi16(a, b, c); simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i b = simde_test_x86_random_i16x8(); simde__m128i c = simde_test_x86_random_i32x4(); simde__m128i r = simde_mm_maddsd_epi16(a, b, c); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_sha_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t a[16]; const int8_t b[16]; const int8_t r[16]; } test_vec[] = { { { -INT8_C( 55), INT8_C( 79), -INT8_C( 79), -INT8_C( 98), -INT8_C( 52), -INT8_C( 48), -INT8_C( 54), -INT8_C( 73), -INT8_C( 111), -INT8_C( 106), INT8_C( 124), -INT8_C( 40), -INT8_C( 56), INT8_C( 95), INT8_C( 26), INT8_C( 87) }, { -INT8_C( 5), -INT8_C( 4), INT8_C( 6), -INT8_C( 3), -INT8_C( 6), INT8_C( 0), -INT8_C( 5), -INT8_C( 7), INT8_C( 5), -INT8_C( 6), -INT8_C( 3), -INT8_C( 3), INT8_C( 1), INT8_C( 2), INT8_C( 2), INT8_C( 7) }, { -INT8_C( 2), INT8_C( 4), INT8_C( 64), -INT8_C( 13), -INT8_C( 1), -INT8_C( 48), -INT8_C( 2), -INT8_C( 1), INT8_C( 32), -INT8_C( 2), INT8_C( 15), -INT8_C( 5), -INT8_C( 112), INT8_C( 124), INT8_C( 104), INT8_MIN } }, { { INT8_C( 113), INT8_C( 43), -INT8_C( 103), INT8_C( 113), -INT8_C( 76), -INT8_C( 9), -INT8_C( 69), -INT8_C( 71), INT8_C( 18), INT8_C( 120), -INT8_C( 92), -INT8_C( 5), -INT8_C( 112), INT8_C( 117), -INT8_C( 117), -INT8_C( 27) }, { INT8_C( 2), -INT8_C( 4), -INT8_C( 5), -INT8_C( 2), INT8_C( 1), -INT8_C( 6), -INT8_C( 7), INT8_C( 6), INT8_C( 7), -INT8_C( 4), -INT8_C( 6), -INT8_C( 3), INT8_C( 1), INT8_C( 1), INT8_C( 2), -INT8_C( 3) }, { -INT8_C( 60), INT8_C( 2), -INT8_C( 4), INT8_C( 28), INT8_C( 104), -INT8_C( 1), -INT8_C( 1), INT8_C( 64), INT8_C( 0), INT8_C( 7), -INT8_C( 2), -INT8_C( 1), INT8_C( 32), -INT8_C( 22), INT8_C( 44), -INT8_C( 4) } }, { { -INT8_C( 83), -INT8_C( 113), INT8_C( 40), -INT8_C( 113), -INT8_C( 5), INT8_C( 63), -INT8_C( 121), INT8_C( 103), -INT8_C( 47), -INT8_C( 29), INT8_C( 109), INT8_C( 123), -INT8_C( 127), INT8_C( 79), -INT8_C( 13), -INT8_C( 47) }, { INT8_C( 6), INT8_C( 7), -INT8_C( 6), INT8_C( 0), INT8_C( 7), INT8_C( 2), INT8_C( 0), -INT8_C( 5), -INT8_C( 3), INT8_C( 0), -INT8_C( 6), -INT8_C( 4), -INT8_C( 6), -INT8_C( 7), INT8_C( 3), -INT8_C( 1) }, { INT8_C( 64), INT8_MIN, INT8_C( 0), -INT8_C( 113), INT8_MIN, -INT8_C( 4), -INT8_C( 121), INT8_C( 3), -INT8_C( 6), -INT8_C( 29), INT8_C( 1), INT8_C( 7), -INT8_C( 2), INT8_C( 0), -INT8_C( 104), -INT8_C( 24) } }, { { -INT8_C( 93), INT8_C( 4), -INT8_C( 58), -INT8_C( 65), INT8_C( 74), -INT8_C( 79), INT8_C( 23), INT8_C( 9), -INT8_C( 13), INT8_C( 98), INT8_C( 63), INT8_C( 31), INT8_C( 68), -INT8_C( 12), INT8_C( 56), INT8_C( 100) }, { INT8_C( 0), INT8_C( 7), INT8_C( 3), INT8_C( 5), INT8_C( 1), INT8_C( 3), INT8_C( 1), INT8_C( 4), -INT8_C( 7), -INT8_C( 2), -INT8_C( 4), INT8_C( 6), INT8_C( 1), INT8_C( 5), -INT8_C( 7), -INT8_C( 1) }, { -INT8_C( 93), INT8_C( 0), INT8_C( 48), -INT8_C( 32), -INT8_C( 108), -INT8_C( 120), INT8_C( 46), -INT8_C( 112), -INT8_C( 1), INT8_C( 24), INT8_C( 3), -INT8_C( 64), -INT8_C( 120), INT8_MIN, INT8_C( 0), INT8_C( 50) } }, { { INT8_C( 123), INT8_C( 17), -INT8_C( 37), -INT8_C( 54), INT8_C( 73), INT8_C( 83), INT8_C( 83), -INT8_C( 25), -INT8_C( 93), INT8_C( 122), -INT8_C( 126), -INT8_C( 80), -INT8_C( 127), INT8_C( 108), INT8_C( 68), -INT8_C( 29) }, { -INT8_C( 1), INT8_C( 4), INT8_C( 2), INT8_C( 1), INT8_C( 7), -INT8_C( 2), INT8_C( 2), INT8_C( 3), -INT8_C( 4), INT8_C( 3), -INT8_C( 7), INT8_C( 0), INT8_C( 2), -INT8_C( 2), -INT8_C( 4), -INT8_C( 3) }, { INT8_C( 61), INT8_C( 16), INT8_C( 108), -INT8_C( 108), INT8_MIN, INT8_C( 20), INT8_C( 76), INT8_C( 56), -INT8_C( 6), -INT8_C( 48), -INT8_C( 1), -INT8_C( 80), INT8_C( 4), INT8_C( 27), INT8_C( 4), -INT8_C( 4) } }, { { INT8_C( 94), INT8_C( 63), -INT8_C( 49), INT8_C( 121), -INT8_C( 97), INT8_C( 47), INT8_C( 35), INT8_C( 72), INT8_C( 7), -INT8_C( 75), -INT8_C( 98), -INT8_C( 9), -INT8_C( 33), -INT8_C( 64), -INT8_C( 3), -INT8_C( 103) }, { INT8_C( 0), -INT8_C( 4), -INT8_C( 5), INT8_C( 3), -INT8_C( 7), INT8_C( 5), -INT8_C( 4), INT8_C( 1), -INT8_C( 1), -INT8_C( 2), -INT8_C( 6), -INT8_C( 4), INT8_C( 0), -INT8_C( 6), -INT8_C( 4), -INT8_C( 7) }, { INT8_C( 94), INT8_C( 3), -INT8_C( 2), -INT8_C( 56), -INT8_C( 1), -INT8_C( 32), INT8_C( 2), -INT8_C( 112), INT8_C( 3), -INT8_C( 19), -INT8_C( 2), -INT8_C( 1), -INT8_C( 33), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1) } }, { { INT8_C( 116), INT8_C( 21), INT8_C( 10), -INT8_C( 110), -INT8_C( 11), INT8_C( 77), INT8_C( 112), INT8_C( 118), INT8_C( 71), -INT8_C( 102), -INT8_C( 4), -INT8_C( 71), INT8_C( 7), -INT8_C( 9), INT8_C( 77), -INT8_C( 51) }, { -INT8_C( 6), -INT8_C( 1), INT8_C( 3), INT8_C( 5), INT8_C( 4), -INT8_C( 6), INT8_C( 5), INT8_C( 7), INT8_C( 7), INT8_C( 7), INT8_C( 3), INT8_C( 0), INT8_C( 3), -INT8_C( 2), INT8_C( 6), -INT8_C( 4) }, { INT8_C( 1), INT8_C( 10), INT8_C( 80), INT8_C( 64), INT8_C( 80), INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_MIN, INT8_C( 0), -INT8_C( 32), -INT8_C( 71), INT8_C( 56), -INT8_C( 3), INT8_C( 64), -INT8_C( 4) } }, { { -INT8_C( 28), INT8_C( 27), -INT8_C( 12), -INT8_C( 35), -INT8_C( 14), -INT8_C( 77), INT8_C( 32), -INT8_C( 73), -INT8_C( 116), -INT8_C( 79), INT8_C( 2), -INT8_C( 63), -INT8_C( 95), INT8_C( 25), INT8_C( 27), -INT8_C( 58) }, { -INT8_C( 2), -INT8_C( 2), -INT8_C( 3), -INT8_C( 5), INT8_C( 4), -INT8_C( 5), INT8_C( 7), -INT8_C( 5), -INT8_C( 5), -INT8_C( 2), INT8_C( 5), INT8_C( 5), -INT8_C( 5), -INT8_C( 5), INT8_C( 1), -INT8_C( 3) }, { -INT8_C( 7), INT8_C( 6), -INT8_C( 2), -INT8_C( 2), INT8_C( 32), -INT8_C( 3), INT8_C( 0), -INT8_C( 3), -INT8_C( 4), -INT8_C( 20), INT8_C( 64), INT8_C( 32), -INT8_C( 3), INT8_C( 0), INT8_C( 54), -INT8_C( 8) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_sha_epi8(a, b); simde_test_x86_assert_equal_i8x16(r, simde_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i8x16(); simde__m128i b = simde_test_x86_random_i8x16(); simde__m128i nmask = simde_mm_cmplt_epi8(b, simde_mm_setzero_si128()); b = simde_mm_and_si128(b, simde_mm_set1_epi8(7)); b = simde_mm_or_si128(simde_mm_and_si128(nmask, simde_mm_sub_epi8(simde_mm_setzero_si128(), b)), simde_mm_andnot_si128(nmask, b)); simde__m128i r = simde_mm_sha_epi8(a, b); simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_sha_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[8]; const int16_t b[8]; const int16_t r[8]; } test_vec[] = { { { -INT16_C( 9769), INT16_C( 9461), INT16_C( 27455), INT16_C( 21533), -INT16_C( 31746), -INT16_C( 10727), INT16_C( 12116), -INT16_C( 12978) }, { INT16_C( 10), -INT16_C( 7), -INT16_C( 12), -INT16_C( 2), -INT16_C( 2), INT16_C( 11), -INT16_C( 9), -INT16_C( 10) }, { INT16_C( 23552), INT16_C( 73), INT16_C( 6), INT16_C( 5383), -INT16_C( 7937), -INT16_C( 14336), INT16_C( 23), -INT16_C( 13) } }, { { -INT16_C( 10124), INT16_C( 11638), INT16_C( 32131), -INT16_C( 27315), -INT16_C( 26684), -INT16_C( 16471), INT16_C( 16328), INT16_C( 10960) }, { -INT16_C( 16), -INT16_C( 4), INT16_C( 12), -INT16_C( 12), INT16_C( 3), INT16_C( 7), -INT16_C( 10), INT16_C( 6) }, { -INT16_C( 1), INT16_C( 727), INT16_C( 12288), -INT16_C( 7), -INT16_C( 16864), -INT16_C( 11136), INT16_C( 15), -INT16_C( 19456) } }, { { -INT16_C( 24604), -INT16_C( 15907), -INT16_C( 4249), -INT16_C( 15974), INT16_C( 29959), INT16_C( 17691), INT16_C( 21670), INT16_C( 9608) }, { -INT16_C( 8), -INT16_C( 4), INT16_C( 1), -INT16_C( 5), -INT16_C( 6), -INT16_C( 13), -INT16_C( 11), INT16_C( 15) }, { -INT16_C( 97), -INT16_C( 995), -INT16_C( 8498), -INT16_C( 500), INT16_C( 468), INT16_C( 2), INT16_C( 10), INT16_C( 0) } }, { { -INT16_C( 18866), -INT16_C( 22380), -INT16_C( 13421), INT16_C( 8939), -INT16_C( 22802), INT16_C( 12501), INT16_C( 29590), INT16_C( 1630) }, { -INT16_C( 14), -INT16_C( 12), -INT16_C( 13), -INT16_C( 2), -INT16_C( 12), INT16_C( 8), -INT16_C( 10), -INT16_C( 7) }, { -INT16_C( 2), -INT16_C( 6), -INT16_C( 2), INT16_C( 2234), -INT16_C( 6), -INT16_C( 11008), INT16_C( 28), INT16_C( 12) } }, { { -INT16_C( 22821), -INT16_C( 22015), INT16_C( 6574), -INT16_C( 473), -INT16_C( 20062), INT16_C( 18497), -INT16_C( 22977), INT16_C( 5178) }, { -INT16_C( 16), -INT16_C( 5), INT16_C( 4), INT16_C( 5), INT16_C( 1), -INT16_C( 1), -INT16_C( 6), INT16_C( 13) }, { -INT16_C( 1), -INT16_C( 688), -INT16_C( 25888), -INT16_C( 15136), INT16_C( 25412), INT16_C( 9248), -INT16_C( 360), INT16_C( 16384) } }, { { -INT16_C( 2382), -INT16_C( 28786), -INT16_C( 8352), -INT16_C( 25032), INT16_C( 7754), INT16_C( 21702), -INT16_C( 2998), -INT16_C( 26620) }, { INT16_C( 2), INT16_C( 11), -INT16_C( 11), -INT16_C( 8), -INT16_C( 13), -INT16_C( 15), INT16_C( 2), -INT16_C( 12) }, { -INT16_C( 9528), INT16_C( 28672), -INT16_C( 5), -INT16_C( 98), INT16_C( 0), INT16_C( 0), -INT16_C( 11992), -INT16_C( 7) } }, { { INT16_C( 12029), INT16_C( 7842), INT16_C( 10067), INT16_C( 18949), INT16_C( 30223), INT16_C( 7883), INT16_C( 26206), -INT16_C( 9564) }, { -INT16_C( 6), -INT16_C( 3), INT16_C( 6), INT16_C( 2), -INT16_C( 5), -INT16_C( 8), INT16_C( 14), -INT16_C( 8) }, { INT16_C( 187), INT16_C( 980), -INT16_C( 11072), INT16_C( 10260), INT16_C( 944), INT16_C( 30), INT16_MIN, -INT16_C( 38) } }, { { -INT16_C( 10271), INT16_C( 8356), -INT16_C( 2004), INT16_C( 18806), INT16_C( 16409), INT16_C( 27834), INT16_C( 548), INT16_C( 8707) }, { INT16_C( 9), INT16_C( 10), -INT16_C( 7), INT16_C( 10), INT16_C( 10), INT16_C( 11), INT16_C( 1), INT16_C( 0) }, { -INT16_C( 15872), -INT16_C( 28672), -INT16_C( 16), -INT16_C( 10240), INT16_C( 25600), -INT16_C( 12288), INT16_C( 1096), INT16_C( 8707) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_sha_epi16(a, b); simde_test_x86_assert_equal_i16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i b = simde_mm_srai_epi16(simde_test_x86_random_i16x8(), 11); simde__m128i r = simde_mm_sha_epi16(a, b); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_sha_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int32_t a[4]; const int32_t b[4]; const int32_t r[4]; } test_vec[] = { { { -INT32_C( 376500546), INT32_C( 246411337), -INT32_C( 550133744), INT32_C( 2017512208) }, { -INT32_C( 12), -INT32_C( 2), -INT32_C( 21), INT32_C( 27) }, { -INT32_C( 91920), INT32_C( 61602834), -INT32_C( 263), INT32_MIN } }, { { -INT32_C( 1923793596), INT32_C( 1033569325), -INT32_C( 1273114460), -INT32_C( 1909695076) }, { INT32_C( 16), -INT32_C( 9), -INT32_C( 8), -INT32_C( 29) }, { INT32_C( 1027866624), INT32_C( 2018690), -INT32_C( 4973104), -INT32_C( 4) } }, { { -INT32_C( 1491295367), INT32_C( 1273280423), INT32_C( 620691848), -INT32_C( 1062065314) }, { INT32_C( 21), INT32_C( 6), -INT32_C( 24), INT32_C( 29) }, { INT32_C( 1864368128), -INT32_C( 114431552), INT32_C( 36), -INT32_C( 1073741824) } }, { { -INT32_C( 1809953555), -INT32_C( 1931541756), INT32_C( 1672535556), -INT32_C( 1759288823) }, { INT32_C( 23), -INT32_C( 17), -INT32_C( 8), -INT32_C( 21) }, { INT32_C( 1988100096), -INT32_C( 14737), INT32_C( 6533342), -INT32_C( 839) } }, { { -INT32_C( 582977575), INT32_C( 2137596026), INT32_C( 149035518), -INT32_C( 224459397) }, { -INT32_C( 27), -INT32_C( 28), -INT32_C( 2), INT32_C( 4) }, { -INT32_C( 5), INT32_C( 7), INT32_C( 37258879), INT32_C( 703616944) } }, { { -INT32_C( 1091574461), INT32_C( 71129093), -INT32_C( 334749839), INT32_C( 1373547300) }, { -INT32_C( 7), INT32_C( 11), INT32_C( 6), -INT32_C( 2) }, { -INT32_C( 8527926), -INT32_C( 356505600), INT32_C( 50846784), INT32_C( 343386825) } }, { { -INT32_C( 1212670798), -INT32_C( 440666764), INT32_C( 953272084), -INT32_C( 1416974221) }, { -INT32_C( 12), -INT32_C( 31), -INT32_C( 22), -INT32_C( 23) }, { -INT32_C( 296063), -INT32_C( 1), INT32_C( 227), -INT32_C( 169) } }, { { INT32_C( 1599953899), INT32_C( 1682184528), INT32_C( 1402803680), -INT32_C( 1543624763) }, { INT32_C( 14), -INT32_C( 30), -INT32_C( 28), INT32_C( 7) }, { INT32_C( 1459273728), INT32_C( 1), INT32_C( 5), -INT32_C( 15474048) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_sha_epi32(a, b); simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i32x4(); simde__m128i b = simde_mm_srai_epi32(simde_test_x86_random_i32x4(), 26); simde__m128i r = simde_mm_sha_epi32(a, b); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_sha_epi64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int64_t a[2]; const int64_t b[2]; const int64_t r[2]; } test_vec[] = { { { -INT64_C( 8221108173063482573), INT64_C( 4845837656533272023) }, { INT64_C( 3), -INT64_C( 30) }, { INT64_C( 8018110910330345880), INT64_C( 4513038002) } }, { { INT64_C( 961305450510644510), -INT64_C( 8812443758950205245) }, { -INT64_C( 29), INT64_C( 12) }, { INT64_C( 1790570934), INT64_C( 4508515589551828992) } }, { { -INT64_C( 6151448808215369635), -INT64_C( 3278957846999708703) }, { INT64_C( 56), INT64_C( 5) }, { INT64_C( 6701356245527298048), INT64_C( 5753813338266631200) } }, { { -INT64_C( 3451738709977447032), -INT64_C( 123770926729252287) }, { INT64_C( 60), INT64_C( 3) }, { INT64_MIN, -INT64_C( 990167413834018296) } }, { { INT64_C( 2263339044755073193), -INT64_C( 4094091507247271888) }, { -INT64_C( 31), INT64_C( 3) }, { INT64_C( 1053949373), INT64_C( 4140756089440928128) } }, { { INT64_C( 5500381801624903000), -INT64_C( 8739328771901035912) }, { -INT64_C( 18), -INT64_C( 59) }, { INT64_C( 20982291418552), -INT64_C( 16) } }, { { -INT64_C( 3312094770503019871), INT64_C( 8524321440793789949) }, { INT64_C( 20), -INT64_C( 7) }, { INT64_C( 3867426396428042240), INT64_C( 66596261256201483) } }, { { INT64_C( 8596504695728662048), -INT64_C( 642457255756951952) }, { -INT64_C( 30), -INT64_C( 57) }, { INT64_C( 8006118885), -INT64_C( 5) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_sha_epi64(a, b); simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i64x2(); simde__m128i b = simde_mm_cvtepi32_epi64(simde_mm_srai_epi32(simde_test_x86_random_i32x4(), 25)); simde__m128i r = simde_mm_sha_epi64(a, b); simde_test_x86_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_shl_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t a[16]; const int8_t b[16]; const int8_t r[16]; } test_vec[] = { { { -INT8_C( 40), -INT8_C( 16), INT8_C( 92), -INT8_C( 19), -INT8_C( 16), INT8_C( 53), INT8_C( 44), -INT8_C( 29), INT8_C( 29), -INT8_C( 62), -INT8_C( 120), INT8_C( 6), INT8_C( 80), -INT8_C( 106), INT8_C( 4), -INT8_C( 23) }, { -INT8_C( 6), -INT8_C( 3), -INT8_C( 2), -INT8_C( 6), -INT8_C( 3), INT8_C( 2), INT8_C( 0), INT8_C( 0), -INT8_C( 2), INT8_C( 7), -INT8_C( 7), INT8_C( 5), INT8_C( 2), -INT8_C( 6), -INT8_C( 5), INT8_C( 6) }, { INT8_C( 3), INT8_C( 30), INT8_C( 23), INT8_C( 3), INT8_C( 30), -INT8_C( 44), INT8_C( 44), -INT8_C( 29), INT8_C( 7), INT8_C( 0), INT8_C( 1), -INT8_C( 64), INT8_C( 64), INT8_C( 2), INT8_C( 0), INT8_C( 64) } }, { { INT8_C( 83), INT8_C( 87), -INT8_C( 90), -INT8_C( 102), INT8_C( 74), INT8_C( 6), INT8_C( 30), -INT8_C( 33), -INT8_C( 80), INT8_C( 110), INT8_C( 82), INT8_C( 66), -INT8_C( 54), -INT8_C( 106), -INT8_C( 73), -INT8_C( 78) }, { -INT8_C( 5), INT8_C( 4), -INT8_C( 3), INT8_C( 0), INT8_C( 0), INT8_C( 2), -INT8_C( 3), INT8_C( 1), INT8_C( 6), -INT8_C( 7), -INT8_C( 5), -INT8_C( 7), -INT8_C( 4), -INT8_C( 7), INT8_C( 7), INT8_C( 5) }, { INT8_C( 2), INT8_C( 112), INT8_C( 20), -INT8_C( 102), INT8_C( 74), INT8_C( 24), INT8_C( 3), -INT8_C( 66), INT8_C( 0), INT8_C( 0), INT8_C( 2), INT8_C( 0), INT8_C( 12), INT8_C( 1), INT8_MIN, INT8_C( 64) } }, { { INT8_C( 96), INT8_C( 69), -INT8_C( 105), -INT8_C( 111), INT8_C( 3), -INT8_C( 8), -INT8_C( 20), -INT8_C( 121), -INT8_C( 127), -INT8_C( 93), -INT8_C( 67), -INT8_C( 36), INT8_C( 110), INT8_C( 90), INT8_C( 97), -INT8_C( 40) }, { INT8_C( 2), INT8_C( 2), -INT8_C( 6), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 6), INT8_C( 2), INT8_C( 4), -INT8_C( 5), INT8_C( 7), -INT8_C( 2), INT8_C( 3), INT8_C( 0), -INT8_C( 2), INT8_C( 5) }, { INT8_MIN, INT8_C( 20), INT8_C( 2), -INT8_C( 111), INT8_C( 3), -INT8_C( 8), INT8_C( 0), INT8_C( 28), INT8_C( 16), INT8_C( 5), INT8_MIN, INT8_C( 55), INT8_C( 112), INT8_C( 90), INT8_C( 24), INT8_C( 0) } }, { { INT8_C( 40), INT8_C( 68), -INT8_C( 105), -INT8_C( 100), -INT8_C( 60), INT8_C( 19), INT8_C( 126), INT8_C( 35), -INT8_C( 75), -INT8_C( 20), INT8_C( 49), -INT8_C( 102), -INT8_C( 29), -INT8_C( 23), -INT8_C( 24), -INT8_C( 92) }, { INT8_C( 7), INT8_C( 4), INT8_C( 5), -INT8_C( 7), INT8_C( 4), -INT8_C( 3), INT8_C( 1), INT8_C( 3), INT8_C( 2), INT8_C( 1), INT8_C( 7), -INT8_C( 6), INT8_C( 0), -INT8_C( 7), INT8_C( 0), -INT8_C( 7) }, { INT8_C( 0), INT8_C( 64), -INT8_C( 32), INT8_C( 1), INT8_C( 64), INT8_C( 2), -INT8_C( 4), INT8_C( 24), -INT8_C( 44), -INT8_C( 40), INT8_MIN, INT8_C( 2), -INT8_C( 29), INT8_C( 1), -INT8_C( 24), INT8_C( 1) } }, { { -INT8_C( 45), -INT8_C( 37), INT8_C( 14), -INT8_C( 127), INT8_C( 53), INT8_C( 96), -INT8_C( 68), -INT8_C( 5), INT8_C( 119), -INT8_C( 50), INT8_C( 22), INT8_C( 70), -INT8_C( 47), INT8_C( 76), INT8_C( 54), INT8_C( 94) }, { INT8_C( 1), INT8_C( 2), INT8_C( 6), INT8_C( 4), INT8_C( 3), -INT8_C( 4), INT8_C( 5), -INT8_C( 4), INT8_C( 1), -INT8_C( 4), -INT8_C( 3), INT8_C( 3), INT8_C( 3), INT8_C( 6), INT8_C( 7), -INT8_C( 3) }, { -INT8_C( 90), INT8_C( 108), INT8_MIN, INT8_C( 16), -INT8_C( 88), INT8_C( 6), INT8_MIN, INT8_C( 15), -INT8_C( 18), INT8_C( 12), INT8_C( 2), INT8_C( 48), -INT8_C( 120), INT8_C( 0), INT8_C( 0), INT8_C( 11) } }, { { -INT8_C( 120), -INT8_C( 109), INT8_C( 100), INT8_C( 9), -INT8_C( 3), -INT8_C( 26), -INT8_C( 113), INT8_C( 86), -INT8_C( 19), -INT8_C( 45), -INT8_C( 44), -INT8_C( 90), -INT8_C( 30), -INT8_C( 119), INT8_C( 49), INT8_C( 78) }, { -INT8_C( 3), -INT8_C( 5), INT8_C( 3), -INT8_C( 1), -INT8_C( 6), INT8_C( 1), INT8_C( 0), INT8_C( 4), INT8_C( 0), INT8_C( 5), INT8_C( 5), INT8_C( 0), INT8_C( 3), -INT8_C( 5), -INT8_C( 1), INT8_C( 0) }, { INT8_C( 17), INT8_C( 4), INT8_C( 32), INT8_C( 4), INT8_C( 3), -INT8_C( 52), -INT8_C( 113), INT8_C( 96), -INT8_C( 19), INT8_C( 96), INT8_MIN, -INT8_C( 90), INT8_C( 16), INT8_C( 4), INT8_C( 24), INT8_C( 78) } }, { { INT8_C( 111), -INT8_C( 14), INT8_C( 1), -INT8_C( 4), -INT8_C( 60), -INT8_C( 82), -INT8_C( 35), INT8_C( 126), INT8_C( 63), -INT8_C( 126), -INT8_C( 45), -INT8_C( 125), -INT8_C( 68), -INT8_C( 86), -INT8_C( 61), -INT8_C( 69) }, { INT8_C( 2), -INT8_C( 4), -INT8_C( 3), INT8_C( 4), INT8_C( 5), -INT8_C( 4), INT8_C( 3), -INT8_C( 4), INT8_C( 0), INT8_C( 4), INT8_C( 4), INT8_C( 6), -INT8_C( 1), INT8_C( 2), INT8_C( 4), INT8_C( 0) }, { -INT8_C( 68), INT8_C( 15), INT8_C( 0), -INT8_C( 64), INT8_MIN, INT8_C( 10), -INT8_C( 24), INT8_C( 7), INT8_C( 63), INT8_C( 32), INT8_C( 48), -INT8_C( 64), INT8_C( 94), -INT8_C( 88), INT8_C( 48), -INT8_C( 69) } }, { { -INT8_C( 79), -INT8_C( 126), INT8_C( 77), INT8_C( 34), INT8_C( 52), -INT8_C( 66), -INT8_C( 112), -INT8_C( 71), -INT8_C( 106), INT8_C( 99), INT8_C( 123), -INT8_C( 92), INT8_C( 7), -INT8_C( 74), -INT8_C( 45), -INT8_C( 17) }, { INT8_C( 1), -INT8_C( 7), INT8_C( 6), -INT8_C( 6), -INT8_C( 7), -INT8_C( 5), INT8_C( 7), INT8_C( 4), INT8_C( 0), INT8_C( 1), INT8_C( 7), INT8_C( 6), INT8_C( 4), -INT8_C( 7), -INT8_C( 2), -INT8_C( 1) }, { INT8_C( 98), INT8_C( 1), INT8_C( 64), INT8_C( 0), INT8_C( 0), INT8_C( 5), INT8_C( 0), -INT8_C( 112), -INT8_C( 106), -INT8_C( 58), INT8_MIN, INT8_C( 0), INT8_C( 112), INT8_C( 1), INT8_C( 52), INT8_C( 119) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_shl_epi8(a, b); simde_test_x86_assert_equal_i8x16(r, simde_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i8x16(); simde__m128i b = simde_test_x86_random_i8x16(); simde__m128i nmask = simde_mm_cmplt_epi8(b, simde_mm_setzero_si128()); b = simde_mm_and_si128(b, simde_mm_set1_epi8(7)); b = simde_mm_or_si128(simde_mm_and_si128(nmask, simde_mm_sub_epi8(simde_mm_setzero_si128(), b)), simde_mm_andnot_si128(nmask, b)); simde__m128i r = simde_mm_shl_epi8(a, b); simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_shl_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[8]; const int16_t b[8]; const int16_t r[8]; } test_vec[] = { { { -INT16_C( 13450), INT16_C( 17329), INT16_C( 8435), INT16_C( 20622), -INT16_C( 230), -INT16_C( 11467), -INT16_C( 18836), INT16_C( 12360) }, { INT16_C( 12), INT16_C( 6), INT16_C( 1), INT16_C( 9), -INT16_C( 10), -INT16_C( 8), INT16_C( 3), -INT16_C( 14) }, { INT16_C( 24576), -INT16_C( 5056), INT16_C( 16870), INT16_C( 7168), INT16_C( 63), INT16_C( 211), -INT16_C( 19616), INT16_C( 0) } }, { { INT16_C( 21733), INT16_C( 9875), -INT16_C( 9047), -INT16_C( 29759), -INT16_C( 8718), -INT16_C( 10649), -INT16_C( 20078), -INT16_C( 14580) }, { -INT16_C( 12), -INT16_C( 12), -INT16_C( 2), INT16_C( 14), INT16_C( 5), INT16_C( 7), INT16_C( 14), -INT16_C( 6) }, { INT16_C( 5), INT16_C( 2), INT16_C( 14122), INT16_C( 16384), -INT16_C( 16832), INT16_C( 13184), INT16_MIN, INT16_C( 796) } }, { { -INT16_C( 23257), -INT16_C( 27556), -INT16_C( 1793), -INT16_C( 20209), -INT16_C( 31161), INT16_C( 30331), -INT16_C( 20701), -INT16_C( 1018) }, { -INT16_C( 3), INT16_C( 0), -INT16_C( 8), INT16_C( 5), INT16_C( 5), INT16_C( 7), -INT16_C( 5), -INT16_C( 6) }, { INT16_C( 5284), -INT16_C( 27556), INT16_C( 248), INT16_C( 8672), -INT16_C( 14112), INT16_C( 15744), INT16_C( 1401), INT16_C( 1008) } }, { { INT16_C( 18019), INT16_C( 21877), INT16_C( 32412), INT16_C( 3170), -INT16_C( 32704), INT16_C( 31703), -INT16_C( 17980), INT16_C( 5662) }, { -INT16_C( 8), INT16_C( 3), -INT16_C( 16), -INT16_C( 9), INT16_C( 7), INT16_C( 15), INT16_C( 9), -INT16_C( 5) }, { INT16_C( 70), -INT16_C( 21592), INT16_C( 0), INT16_C( 6), INT16_C( 8192), INT16_MIN, -INT16_C( 30720), INT16_C( 176) } }, { { -INT16_C( 16190), INT16_C( 12612), INT16_C( 29994), -INT16_C( 7519), INT16_C( 21766), -INT16_C( 21019), -INT16_C( 10977), INT16_C( 24380) }, { -INT16_C( 8), -INT16_C( 11), INT16_C( 6), INT16_C( 13), -INT16_C( 2), -INT16_C( 7), -INT16_C( 6), INT16_C( 6) }, { INT16_C( 192), INT16_C( 6), INT16_C( 19072), INT16_C( 8192), INT16_C( 5441), INT16_C( 347), INT16_C( 852), -INT16_C( 12544) } }, { { -INT16_C( 26260), -INT16_C( 4045), -INT16_C( 6578), INT16_C( 31924), -INT16_C( 29504), -INT16_C( 11253), INT16_C( 3804), INT16_C( 7496) }, { INT16_C( 11), -INT16_C( 15), -INT16_C( 4), -INT16_C( 16), -INT16_C( 7), -INT16_C( 1), INT16_C( 15), INT16_C( 4) }, { INT16_C( 24576), INT16_C( 1), INT16_C( 3684), INT16_C( 0), INT16_C( 281), INT16_C( 27141), INT16_C( 0), -INT16_C( 11136) } }, { { INT16_C( 23688), INT16_C( 22953), -INT16_C( 10062), INT16_C( 8834), -INT16_C( 20841), -INT16_C( 18509), INT16_C( 27043), -INT16_C( 26326) }, { INT16_C( 4), INT16_C( 14), -INT16_C( 13), INT16_C( 8), INT16_C( 11), -INT16_C( 8), INT16_C( 9), -INT16_C( 1) }, { -INT16_C( 14208), INT16_C( 16384), INT16_C( 6), -INT16_C( 32256), -INT16_C( 18432), INT16_C( 183), INT16_C( 17920), INT16_C( 19605) } }, { { -INT16_C( 29121), INT16_C( 13582), INT16_C( 21502), INT16_C( 7157), INT16_C( 17075), INT16_C( 8004), -INT16_C( 4070), INT16_C( 7115) }, { -INT16_C( 12), INT16_C( 6), INT16_C( 11), INT16_C( 0), INT16_C( 3), -INT16_C( 1), INT16_C( 9), -INT16_C( 1) }, { INT16_C( 8), INT16_C( 17280), -INT16_C( 4096), INT16_C( 7157), INT16_C( 5528), INT16_C( 4002), INT16_C( 13312), INT16_C( 3557) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_shl_epi16(a, b); simde_test_x86_assert_equal_i16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i b = simde_mm_srai_epi16(simde_test_x86_random_i16x8(), 11); simde__m128i r = simde_mm_shl_epi16(a, b); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_shl_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int32_t a[4]; const int32_t b[4]; const int32_t r[4]; } test_vec[] = { { { -INT32_C( 506189050), -INT32_C( 1680558282), INT32_C( 2102138628), -INT32_C( 2040021441) }, { INT32_C( 31), INT32_C( 14), INT32_C( 24), INT32_C( 6) }, { INT32_C( 0), INT32_C( 768442368), INT32_C( 67108864), -INT32_C( 1712353344) } }, { { -INT32_C( 755302501), -INT32_C( 1167208778), INT32_C( 624474598), -INT32_C( 2035572872) }, { -INT32_C( 8), -INT32_C( 26), INT32_C( 27), INT32_C( 0) }, { INT32_C( 13826815), INT32_C( 46), INT32_C( 805306368), -INT32_C( 2035572872) } }, { { -INT32_C( 539656151), INT32_C( 899236431), INT32_C( 1952109052), INT32_C( 2063206001) }, { -INT32_C( 27), INT32_C( 21), -INT32_C( 11), -INT32_C( 32) }, { INT32_C( 27), INT32_C( 1239416832), INT32_C( 953178), INT32_C( 0) } }, { { INT32_C( 442674635), -INT32_C( 347079441), INT32_C( 1046456781), INT32_C( 666458799) }, { INT32_C( 21), -INT32_C( 4), INT32_C( 30), -INT32_C( 31) }, { -INT32_C( 1184890880), INT32_C( 246742990), INT32_C( 1073741824), INT32_C( 0) } }, { { INT32_C( 1000300364), INT32_C( 690482779), INT32_C( 1197967000), INT32_C( 980295904) }, { -INT32_C( 18), INT32_C( 14), INT32_C( 25), INT32_C( 24) }, { INT32_C( 3815), -INT32_C( 74006528), INT32_C( 805306368), -INT32_C( 536870912) } }, { { INT32_C( 1788709647), INT32_C( 294896761), INT32_C( 727317322), INT32_C( 1332070171) }, { INT32_C( 3), -INT32_C( 10), INT32_C( 17), -INT32_C( 19) }, { INT32_C( 1424775288), INT32_C( 287985), -INT32_C( 158072832), INT32_C( 2540) } }, { { INT32_C( 824263864), INT32_C( 1396946184), -INT32_C( 880894800), INT32_C( 1444668259) }, { INT32_C( 1), INT32_C( 4), INT32_C( 18), INT32_C( 2) }, { INT32_C( 1648527728), INT32_C( 876302464), INT32_C( 1925185536), INT32_C( 1483705740) } }, { { INT32_C( 1262166594), -INT32_C( 2019655977), INT32_C( 2102598682), -INT32_C( 690721025) }, { INT32_C( 29), -INT32_C( 32), INT32_C( 13), INT32_C( 6) }, { INT32_C( 1073741824), -INT32_C( 0), INT32_C( 1669545984), -INT32_C( 1256472640) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_shl_epi32(a, b); simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i32x4(); simde__m128i b = simde_mm_srai_epi32(simde_test_x86_random_i32x4(), 26); simde__m128i r = simde_mm_shl_epi32(a, b); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_shl_epi64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int64_t a[2]; const int64_t b[2]; const int64_t r[2]; } test_vec[] = { { { INT64_C( 1391336280736300016), -INT64_C( 6559944687295028704) }, { INT64_C( 62), INT64_C( 61) }, { INT64_C( 0), INT64_C( 0) } }, { { -INT64_C( 541940659597814931), INT64_C( 1007413717779286854) }, { INT64_C( 59), INT64_C( 20) }, { INT64_C( 7493989779944505344), -INT64_C( 2952846843981070336) } }, { { -INT64_C( 4789328101045054279), -INT64_C( 8112091591502159633) }, { -INT64_C( 23), -INT64_C( 19) }, { INT64_C( 1628090855200), INT64_C( 19711785282530) } }, { { INT64_C( 7734700898225788962), -INT64_C( 7131107535778845747) }, { -INT64_C( 40), -INT64_C( 49) }, { INT64_C( 7034669), INT64_C( 20100) } }, { { -INT64_C( 1262209519863925612), -INT64_C( 6179375759096265433) }, { -INT64_C( 61), -INT64_C( 3) }, { INT64_C( 7), INT64_C( 1533421039326660772) } }, { { -INT64_C( 682282357267360903), -INT64_C( 1983599911717310127) }, { -INT64_C( 49), -INT64_C( 6) }, { INT64_C( 31556), INT64_C( 257236627531128773) } }, { { -INT64_C( 2710056978198528354), INT64_C( 6136472046163000034) }, { -INT64_C( 43), -INT64_C( 47) }, { INT64_C( 1789054), INT64_C( 43602) } }, { { -INT64_C( 5052911414569539972), -INT64_C( 7308454166727325587) }, { -INT64_C( 17), INT64_C( 55) }, { INT64_C( 102186833642120), INT64_C( 3927138875067072512) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_shl_epi64(a, b); simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i64x2(); simde__m128i b = simde_mm_cvtepi32_epi64(simde_mm_srai_epi32(simde_test_x86_random_i32x4(), 25)); simde__m128i r = simde_mm_shl_epi64(a, b); simde_test_x86_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_rot_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t a[16]; const int8_t b[16]; const int8_t r[16]; } test_vec[] = { { { INT8_C( 40), -INT8_C( 46), INT8_C( 71), INT8_C( 124), INT8_C( 83), -INT8_C( 38), -INT8_C( 51), INT8_C( 79), -INT8_C( 23), INT8_C( 46), -INT8_C( 45), INT8_C( 115), INT8_C( 24), INT8_C( 13), INT8_C( 46), -INT8_C( 68) }, { INT8_C( 6), -INT8_C( 2), INT8_C( 6), INT8_C( 2), INT8_C( 0), -INT8_C( 4), -INT8_C( 2), INT8_C( 1), INT8_C( 6), INT8_C( 0), INT8_C( 4), INT8_C( 7), -INT8_C( 3), -INT8_C( 3), INT8_C( 0), INT8_C( 6) }, { INT8_C( 10), -INT8_C( 76), -INT8_C( 47), -INT8_C( 15), INT8_C( 83), -INT8_C( 83), INT8_C( 115), -INT8_C( 98), INT8_C( 122), INT8_C( 46), INT8_C( 61), -INT8_C( 71), INT8_C( 3), -INT8_C( 95), INT8_C( 46), INT8_C( 47) } }, { { INT8_C( 116), -INT8_C( 62), -INT8_C( 13), INT8_C( 3), INT8_C( 31), INT8_C( 85), -INT8_C( 63), INT8_C( 69), -INT8_C( 114), -INT8_C( 92), INT8_C( 63), -INT8_C( 54), INT8_C( 68), -INT8_C( 8), INT8_C( 35), -INT8_C( 66) }, { -INT8_C( 7), -INT8_C( 5), INT8_C( 1), -INT8_C( 6), -INT8_C( 7), -INT8_C( 6), -INT8_C( 7), -INT8_C( 4), -INT8_C( 5), -INT8_C( 2), -INT8_C( 3), INT8_C( 3), INT8_C( 7), INT8_C( 6), -INT8_C( 3), -INT8_C( 6) }, { -INT8_C( 24), INT8_C( 22), -INT8_C( 25), INT8_C( 12), INT8_C( 62), INT8_C( 85), -INT8_C( 125), INT8_C( 84), INT8_C( 116), INT8_C( 41), -INT8_C( 25), INT8_C( 86), INT8_C( 34), INT8_C( 62), INT8_C( 100), -INT8_C( 6) } }, { { INT8_C( 84), INT8_C( 59), INT8_C( 70), -INT8_C( 44), INT8_C( 75), -INT8_C( 15), -INT8_C( 111), -INT8_C( 25), INT8_C( 113), -INT8_C( 92), INT8_C( 76), INT8_C( 125), -INT8_C( 102), -INT8_C( 90), INT8_C( 14), INT8_C( 30) }, { INT8_C( 7), INT8_C( 5), INT8_C( 6), INT8_C( 0), -INT8_C( 1), INT8_C( 6), -INT8_C( 4), INT8_C( 7), INT8_C( 5), INT8_C( 2), INT8_C( 0), INT8_C( 0), INT8_C( 1), INT8_C( 0), INT8_C( 1), INT8_C( 0) }, { INT8_C( 42), INT8_C( 103), -INT8_C( 111), -INT8_C( 44), -INT8_C( 91), INT8_C( 124), INT8_C( 25), -INT8_C( 13), INT8_C( 46), -INT8_C( 110), INT8_C( 76), INT8_C( 125), INT8_C( 53), -INT8_C( 90), INT8_C( 28), INT8_C( 30) } }, { { -INT8_C( 18), -INT8_C( 60), -INT8_C( 88), -INT8_C( 71), INT8_C( 126), -INT8_C( 74), INT8_C( 38), INT8_C( 125), -INT8_C( 73), -INT8_C( 74), INT8_C( 98), INT8_C( 86), -INT8_C( 63), INT8_C( 31), -INT8_C( 40), INT8_C( 37) }, { INT8_C( 5), -INT8_C( 1), INT8_C( 7), -INT8_C( 1), -INT8_C( 7), -INT8_C( 3), INT8_C( 2), INT8_C( 2), INT8_C( 5), INT8_C( 0), INT8_C( 1), INT8_C( 6), INT8_C( 0), INT8_C( 2), INT8_C( 0), INT8_C( 4) }, { -INT8_C( 35), INT8_C( 98), INT8_C( 84), -INT8_C( 36), -INT8_C( 4), -INT8_C( 42), -INT8_C( 104), -INT8_C( 11), -INT8_C( 10), -INT8_C( 74), -INT8_C( 60), -INT8_C( 107), -INT8_C( 63), INT8_C( 124), -INT8_C( 40), INT8_C( 82) } }, { { INT8_C( 106), -INT8_C( 27), -INT8_C( 127), INT8_C( 120), INT8_C( 96), -INT8_C( 83), INT8_C( 102), INT8_C( 79), -INT8_C( 117), INT8_C( 98), -INT8_C( 23), INT8_C( 27), INT8_C( 98), INT8_C( 107), INT8_C( 103), INT8_C( 24) }, { INT8_C( 2), -INT8_C( 1), INT8_C( 2), INT8_C( 0), INT8_C( 0), -INT8_C( 5), -INT8_C( 7), INT8_C( 5), INT8_C( 6), -INT8_C( 5), INT8_C( 0), -INT8_C( 5), -INT8_C( 4), INT8_C( 3), -INT8_C( 2), -INT8_C( 2) }, { -INT8_C( 87), -INT8_C( 14), INT8_C( 6), INT8_C( 120), INT8_C( 96), INT8_C( 109), -INT8_C( 52), -INT8_C( 23), -INT8_C( 30), INT8_C( 19), -INT8_C( 23), -INT8_C( 40), INT8_C( 38), INT8_C( 91), -INT8_C( 39), INT8_C( 6) } }, { { -INT8_C( 15), INT8_C( 38), INT8_C( 57), -INT8_C( 38), -INT8_C( 102), -INT8_C( 35), INT8_C( 58), -INT8_C( 92), INT8_C( 19), INT8_C( 49), INT8_C( 73), -INT8_C( 107), INT8_C( 36), -INT8_C( 109), -INT8_C( 91), INT8_C( 66) }, { INT8_C( 5), -INT8_C( 6), INT8_C( 1), INT8_C( 7), INT8_C( 5), -INT8_C( 5), INT8_C( 4), -INT8_C( 7), -INT8_C( 7), INT8_C( 0), -INT8_C( 3), INT8_C( 4), -INT8_C( 6), INT8_C( 3), INT8_C( 6), -INT8_C( 2) }, { INT8_C( 62), -INT8_C( 104), INT8_C( 114), INT8_C( 109), INT8_C( 83), -INT8_C( 18), -INT8_C( 93), INT8_C( 73), INT8_C( 38), INT8_C( 49), INT8_C( 41), INT8_C( 89), -INT8_C( 112), -INT8_C( 100), INT8_C( 105), -INT8_C( 112) } }, { { -INT8_C( 85), INT8_C( 16), INT8_C( 56), -INT8_C( 89), -INT8_C( 44), INT8_C( 79), -INT8_C( 118), -INT8_C( 58), INT8_C( 119), -INT8_C( 86), -INT8_C( 21), -INT8_C( 116), -INT8_C( 80), -INT8_C( 98), INT8_C( 121), -INT8_C( 22) }, { -INT8_C( 7), INT8_C( 6), -INT8_C( 3), -INT8_C( 4), INT8_C( 5), INT8_C( 2), -INT8_C( 2), INT8_C( 1), INT8_C( 1), -INT8_C( 1), -INT8_C( 4), -INT8_C( 5), INT8_C( 6), INT8_C( 3), INT8_C( 3), -INT8_C( 3) }, { INT8_C( 87), INT8_C( 4), INT8_C( 7), INT8_C( 122), -INT8_C( 102), INT8_C( 61), -INT8_C( 94), -INT8_C( 115), -INT8_C( 18), INT8_C( 85), -INT8_C( 66), INT8_C( 100), INT8_C( 44), -INT8_C( 12), -INT8_C( 53), INT8_C( 93) } }, { { -INT8_C( 65), INT8_C( 42), -INT8_C( 26), -INT8_C( 89), -INT8_C( 75), INT8_C( 9), INT8_C( 63), -INT8_C( 5), -INT8_C( 32), INT8_C( 86), INT8_C( 53), -INT8_C( 56), -INT8_C( 83), -INT8_C( 108), -INT8_C( 54), INT8_C( 87) }, { INT8_C( 7), -INT8_C( 4), INT8_C( 0), INT8_C( 0), INT8_C( 1), INT8_C( 5), INT8_C( 7), -INT8_C( 3), -INT8_C( 3), INT8_C( 1), -INT8_C( 2), INT8_C( 6), INT8_C( 3), -INT8_C( 2), -INT8_C( 3), INT8_C( 6) }, { -INT8_C( 33), -INT8_C( 94), -INT8_C( 26), -INT8_C( 89), INT8_C( 107), INT8_C( 33), -INT8_C( 97), INT8_MAX, INT8_C( 28), -INT8_C( 84), INT8_C( 77), INT8_C( 50), INT8_C( 109), INT8_C( 37), INT8_C( 89), -INT8_C( 43) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi8(test_vec[i].b); simde__m128i r = simde_mm_rot_epi8(a, b); simde_test_x86_assert_equal_i8x16(r, simde_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i8x16(); simde__m128i b = simde_test_x86_random_i8x16(); simde__m128i nmask = simde_mm_cmplt_epi8(b, simde_mm_setzero_si128()); b = simde_mm_and_si128(b, simde_mm_set1_epi8(7)); b = simde_mm_or_si128(simde_mm_and_si128(nmask, simde_mm_sub_epi8(simde_mm_setzero_si128(), b)), simde_mm_andnot_si128(nmask, b)); simde__m128i r = simde_mm_rot_epi8(a, b); simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_rot_epi16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int16_t a[8]; const int16_t b[8]; const int16_t r[8]; } test_vec[] = { { { -INT16_C( 939), -INT16_C( 24148), INT16_C( 5606), -INT16_C( 29633), -INT16_C( 17546), INT16_C( 4240), INT16_C( 32196), -INT16_C( 11646) }, { -INT16_C( 9), -INT16_C( 4), -INT16_C( 10), -INT16_C( 4), -INT16_C( 10), INT16_C( 14), -INT16_C( 9), -INT16_C( 10) }, { INT16_C( 11006), -INT16_C( 13798), INT16_C( 31109), -INT16_C( 1853), -INT16_C( 8786), INT16_C( 1060), -INT16_C( 7618), -INT16_C( 24396) } }, { { INT16_C( 3991), INT16_C( 24560), INT16_C( 31503), -INT16_C( 16012), INT16_C( 25440), INT16_C( 11621), INT16_C( 25245), -INT16_C( 23672) }, { INT16_C( 0), -INT16_C( 11), -INT16_C( 7), -INT16_C( 16), INT16_C( 14), -INT16_C( 16), INT16_C( 15), -INT16_C( 11) }, { INT16_C( 3991), -INT16_C( 501), INT16_C( 7926), -INT16_C( 16012), INT16_C( 6360), INT16_C( 11621), -INT16_C( 20146), INT16_C( 28948) } }, { { -INT16_C( 5461), -INT16_C( 22757), INT16_C( 16855), -INT16_C( 7739), -INT16_C( 10553), -INT16_C( 6118), INT16_C( 19167), INT16_C( 4804) }, { INT16_C( 10), INT16_C( 12), -INT16_C( 8), -INT16_C( 9), -INT16_C( 9), INT16_C( 2), INT16_C( 9), INT16_C( 12) }, { -INT16_C( 20566), -INT16_C( 17807), -INT16_C( 10431), -INT16_C( 7440), INT16_C( 25579), -INT16_C( 24469), -INT16_C( 16747), INT16_C( 16684) } }, { { INT16_C( 5817), INT16_C( 17046), INT16_C( 29158), INT16_C( 13849), -INT16_C( 25902), INT16_C( 1817), INT16_C( 15922), INT16_C( 26398) }, { INT16_C( 7), -INT16_C( 6), -INT16_C( 11), -INT16_C( 8), INT16_C( 3), -INT16_C( 2), INT16_C( 4), INT16_C( 4) }, { INT16_C( 23691), INT16_C( 22794), INT16_C( 15566), INT16_C( 6454), -INT16_C( 10604), INT16_C( 16838), -INT16_C( 7389), INT16_C( 29158) } }, { { INT16_C( 6634), -INT16_C( 1848), INT16_C( 4837), INT16_C( 1626), INT16_C( 14762), INT16_C( 21192), INT16_C( 17728), -INT16_C( 5762) }, { INT16_C( 7), -INT16_C( 8), -INT16_C( 15), -INT16_C( 3), INT16_C( 3), -INT16_C( 5), INT16_C( 3), INT16_C( 7) }, { -INT16_C( 2804), -INT16_C( 14088), INT16_C( 9674), INT16_C( 16587), -INT16_C( 12975), INT16_C( 17046), INT16_C( 10754), -INT16_C( 16524) } }, { { INT16_C( 32101), -INT16_C( 28648), INT16_C( 11644), -INT16_C( 26001), INT16_C( 15222), -INT16_C( 27759), INT16_C( 26542), -INT16_C( 7732) }, { -INT16_C( 4), -INT16_C( 1), INT16_C( 12), INT16_C( 15), INT16_C( 8), -INT16_C( 12), INT16_C( 5), -INT16_C( 3) }, { INT16_C( 22486), INT16_C( 18444), -INT16_C( 15657), -INT16_C( 13001), INT16_C( 30267), INT16_C( 14617), -INT16_C( 2612), -INT16_C( 25543) } }, { { -INT16_C( 13997), -INT16_C( 11024), -INT16_C( 13998), INT16_C( 14912), INT16_C( 9823), -INT16_C( 28453), -INT16_C( 21465), -INT16_C( 26640) }, { -INT16_C( 9), INT16_C( 7), INT16_C( 8), -INT16_C( 9), INT16_C( 2), INT16_C( 1), INT16_C( 12), INT16_C( 15) }, { -INT16_C( 22044), INT16_C( 30826), INT16_C( 21193), INT16_C( 8221), -INT16_C( 26244), INT16_C( 8631), INT16_C( 31426), INT16_C( 19448) } }, { { -INT16_C( 31012), -INT16_C( 30025), -INT16_C( 4592), INT16_C( 11701), -INT16_C( 31603), INT16_C( 4621), INT16_C( 7505), INT16_C( 21458) }, { INT16_C( 8), INT16_C( 10), INT16_C( 5), -INT16_C( 2), INT16_C( 4), -INT16_C( 5), -INT16_C( 6), INT16_C( 6) }, { -INT16_C( 9082), -INT16_C( 8662), -INT16_C( 15843), INT16_C( 19309), INT16_C( 18648), INT16_C( 26768), INT16_C( 17525), -INT16_C( 2924) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi16(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi16(test_vec[i].b); simde__m128i r = simde_mm_rot_epi16(a, b); simde_test_x86_assert_equal_i16x8(r, simde_mm_loadu_epi16(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i16x8(); simde__m128i b = simde_mm_srai_epi16(simde_test_x86_random_i16x8(), 11); simde__m128i r = simde_mm_rot_epi16(a, b); simde_test_x86_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_rot_epi32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int32_t a[4]; const int32_t b[4]; const int32_t r[4]; } test_vec[] = { { { -INT32_C( 1216512066), INT32_C( 1579107551), INT32_C( 618115230), INT32_C( 73084345) }, { -INT32_C( 28), INT32_C( 3), INT32_C( 15), -INT32_C( 1) }, { INT32_C( 2010643435), -INT32_C( 252041478), -INT32_C( 665906581), -INT32_C( 2110941476) } }, { { INT32_C( 850848595), -INT32_C( 846146257), INT32_C( 1056073605), INT32_C( 507661716) }, { INT32_C( 7), -INT32_C( 15), INT32_C( 19), INT32_C( 1) }, { INT32_C( 1534437785), -INT32_C( 1436574943), INT32_C( 1009383315), INT32_C( 1015323432) } }, { { -INT32_C( 1925709218), INT32_C( 1566296280), -INT32_C( 996389585), INT32_C( 736288410) }, { -INT32_C( 13), -INT32_C( 25), INT32_C( 0), INT32_C( 27) }, { INT32_C( 318007744), -INT32_C( 1377539026), -INT32_C( 996389585), -INT32_C( 782297356) } }, { { INT32_C( 301679161), INT32_C( 980309514), INT32_C( 1040059043), -INT32_C( 496443159) }, { INT32_C( 0), INT32_C( 27), -INT32_C( 3), -INT32_C( 2) }, { INT32_C( 301679161), INT32_C( 1372811952), INT32_C( 1740620116), INT32_C( 2023372858) } }, { { -INT32_C( 955621956), INT32_C( 1442937011), INT32_C( 1821638531), INT32_C( 1380907999) }, { -INT32_C( 14), INT32_C( 11), INT32_C( 15), -INT32_C( 9) }, { INT32_C( 1995643945), INT32_C( 197499568), -INT32_C( 4082103), -INT32_C( 274126979) } }, { { -INT32_C( 1750711836), INT32_C( 1022273465), -INT32_C( 2052554330), INT32_C( 668465021) }, { INT32_C( 16), -INT32_C( 25), -INT32_C( 4), -INT32_C( 1) }, { INT32_C( 1105500070), INT32_C( 2001984670), INT32_C( 1750763546), -INT32_C( 1813251138) } }, { { INT32_C( 244741972), -INT32_C( 1001749474), -INT32_C( 2109082875), -INT32_C( 508944150) }, { INT32_C( 27), -INT32_C( 28), INT32_C( 16), INT32_C( 19) }, { -INT32_C( 1602964550), INT32_C( 1151877612), -INT32_C( 217742775), INT32_C( 123145553) } }, { { -INT32_C( 379726389), INT32_C( 1605216345), -INT32_C( 2048788837), INT32_C( 1650887446) }, { -INT32_C( 6), INT32_C( 16), -INT32_C( 17), INT32_C( 7) }, { INT32_C( 799373143), -INT32_C( 1470537811), -INT32_C( 78789904), INT32_C( 860195633) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi32(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi32(test_vec[i].b); simde__m128i r = simde_mm_rot_epi32(a, b); simde_test_x86_assert_equal_i32x4(r, simde_mm_loadu_epi32(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i32x4(); simde__m128i b = simde_mm_srai_epi32(simde_test_x86_random_i32x4(), 26); simde__m128i r = simde_mm_rot_epi32(a, b); simde_test_x86_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_rot_epi64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int64_t a[2]; const int64_t b[2]; const int64_t r[2]; } test_vec[] = { { { -INT64_C( 6030121911897921128), -INT64_C( 2079650662157569366) }, { INT64_C( 30), -INT64_C( 57) }, { INT64_C( 8953607297789340932), -INT64_C( 7940867724235156111) } }, { { INT64_C( 1548849507772250048), INT64_C( 7526071637542088656) }, { -INT64_C( 36), -INT64_C( 31) }, { -INT64_C( 4821987975361992218), INT64_C( 1994355354299004047) } }, { { INT64_C( 18004709759095486), -INT64_C( 263461609286003289) }, { -INT64_C( 34), INT64_C( 34) }, { INT64_C( 747145293120667084), -INT64_C( 5028173739009770433) } }, { { INT64_C( 3834259340609951493), -INT64_C( 133001585811471333) }, { -INT64_C( 53), INT64_C( 50) }, { -INT64_C( 5749845831088330327), INT64_C( 1184438584225868947) } }, { { INT64_C( 8670532492798023567), INT64_C( 7598077189004949423) }, { INT64_C( 63), INT64_C( 62) }, { -INT64_C( 4888105790455764025), -INT64_C( 2712166721176150549) } }, { { INT64_C( 3399013202374530133), INT64_C( 4579055370049277488) }, { -INT64_C( 45), INT64_C( 37) }, { -INT64_C( 4324138247288948387), INT64_C( 6069099710277374756) } }, { { -INT64_C( 5923707415603657701), -INT64_C( 4797921855550821486) }, { INT64_C( 39), INT64_C( 21) }, { -INT64_C( 3719395125632057651), INT64_C( 8053977569354558795) } }, { { INT64_C( 9120434576701794664), -INT64_C( 6561721854230699595) }, { INT64_C( 3), -INT64_C( 5) }, { -INT64_C( 823499681223849149), -INT64_C( 5969661330978944243) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi64(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi64(test_vec[i].b); simde__m128i r = simde_mm_rot_epi64(a, b); simde_test_x86_assert_equal_i64x2(r, simde_mm_loadu_epi64(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i64x2(); simde__m128i b = simde_mm_cvtepi32_epi64(simde_mm_srai_epi32(simde_test_x86_random_i32x4(), 25)); simde__m128i r = simde_mm_rot_epi64(a, b); simde_test_x86_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_perm_epi8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const int8_t a[16]; const int8_t b[16]; const int8_t c[16]; const int8_t r[16]; } test_vec[] = { { { -INT8_C( 73), -INT8_C( 12), INT8_C( 114), INT8_C( 74), -INT8_C( 9), -INT8_C( 65), -INT8_C( 82), -INT8_C( 5), -INT8_C( 11), -INT8_C( 38), INT8_C( 95), INT8_C( 32), INT8_C( 32), -INT8_C( 59), -INT8_C( 104), -INT8_C( 50) }, { INT8_C( 79), -INT8_C( 48), -INT8_C( 13), -INT8_C( 122), INT8_C( 13), INT8_C( 38), -INT8_C( 124), INT8_C( 90), INT8_C( 51), INT8_C( 9), INT8_C( 16), INT8_C( 81), -INT8_C( 67), INT8_C( 95), INT8_C( 123), INT8_C( 13) }, { -INT8_C( 42), INT8_C( 110), INT8_MIN, -INT8_C( 85), INT8_C( 53), -INT8_C( 28), -INT8_C( 91), -INT8_C( 21), -INT8_C( 84), INT8_C( 26), INT8_C( 109), -INT8_C( 111), -INT8_C( 2), -INT8_C( 103), -INT8_C( 49), INT8_C( 11) }, { -INT8_C( 1), -INT8_C( 26), INT8_C( 0), -INT8_C( 1), -INT8_C( 39), INT8_C( 0), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 16), INT8_C( 92), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), INT8_C( 32) } }, { { -INT8_C( 14), -INT8_C( 34), -INT8_C( 25), INT8_C( 83), -INT8_C( 91), INT8_C( 90), -INT8_C( 20), INT8_C( 23), INT8_C( 36), -INT8_C( 67), INT8_C( 98), INT8_C( 24), INT8_C( 58), INT8_C( 116), -INT8_C( 112), -INT8_C( 48) }, { -INT8_C( 121), INT8_C( 112), INT8_C( 53), INT8_C( 87), -INT8_C( 46), INT8_C( 42), INT8_C( 119), -INT8_C( 121), -INT8_C( 95), -INT8_C( 28), INT8_C( 28), INT8_C( 95), INT8_C( 6), INT8_C( 49), -INT8_C( 4), -INT8_C( 91) }, { -INT8_C( 69), -INT8_C( 73), -INT8_C( 74), -INT8_C( 49), INT8_C( 113), INT8_C( 52), -INT8_C( 93), INT8_C( 36), INT8_C( 102), -INT8_C( 65), INT8_C( 8), INT8_C( 31), INT8_C( 54), INT8_C( 81), -INT8_C( 112), INT8_C( 20) }, { -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 15), INT8_C( 45), -INT8_C( 1), INT8_C( 90), -INT8_C( 56), -INT8_C( 1), INT8_C( 36), -INT8_C( 91), -INT8_C( 120), INT8_C( 14), INT8_C( 0), -INT8_C( 46) } }, { { -INT8_C( 13), -INT8_C( 125), -INT8_C( 7), INT8_C( 21), INT8_C( 119), -INT8_C( 103), INT8_C( 13), INT8_C( 23), -INT8_C( 7), -INT8_C( 64), -INT8_C( 44), INT8_C( 81), -INT8_C( 33), -INT8_C( 108), INT8_C( 8), -INT8_C( 27) }, { -INT8_C( 45), -INT8_C( 27), -INT8_C( 55), -INT8_C( 62), INT8_C( 25), -INT8_C( 69), -INT8_C( 110), -INT8_C( 55), INT8_C( 32), -INT8_C( 105), INT8_C( 108), INT8_C( 47), INT8_C( 87), -INT8_C( 4), INT8_C( 97), INT8_C( 34) }, { INT8_C( 65), INT8_C( 46), INT8_C( 52), -INT8_C( 81), -INT8_C( 52), INT8_C( 58), INT8_C( 79), -INT8_C( 29), -INT8_C( 33), INT8_C( 54), -INT8_C( 3), INT8_C( 48), INT8_C( 50), -INT8_C( 54), -INT8_C( 39), INT8_C( 19) }, { -INT8_C( 63), -INT8_C( 9), -INT8_C( 26), -INT8_C( 1), -INT8_C( 1), -INT8_C( 109), -INT8_C( 89), -INT8_C( 1), INT8_C( 0), INT8_C( 109), INT8_C( 0), INT8_C( 44), INT8_C( 54), -INT8_C( 1), -INT8_C( 1), -INT8_C( 62) } }, { { INT8_C( 98), -INT8_C( 16), -INT8_C( 121), -INT8_C( 12), INT8_C( 69), -INT8_C( 9), -INT8_C( 95), INT8_C( 78), INT8_C( 123), -INT8_C( 50), -INT8_C( 12), INT8_C( 15), INT8_C( 69), INT8_MAX, -INT8_C( 19), INT8_C( 66) }, { -INT8_C( 102), -INT8_C( 6), INT8_C( 78), -INT8_C( 21), INT8_C( 120), INT8_C( 20), INT8_C( 37), INT8_C( 50), INT8_C( 121), -INT8_C( 49), -INT8_C( 1), -INT8_C( 60), -INT8_C( 90), -INT8_C( 36), INT8_C( 89), INT8_C( 119) }, { -INT8_C( 113), INT8_C( 94), INT8_C( 87), INT8_C( 45), -INT8_C( 100), -INT8_C( 15), -INT8_C( 71), -INT8_C( 6), -INT8_C( 98), -INT8_C( 21), INT8_C( 10), -INT8_C( 119), -INT8_C( 87), -INT8_C( 30), INT8_C( 26), -INT8_C( 69) }, { INT8_C( 0), -INT8_C( 102), INT8_C( 76), INT8_MIN, INT8_C( 0), INT8_C( 0), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 1), -INT8_C( 12), INT8_C( 0), -INT8_C( 1), INT8_C( 0), -INT8_C( 1), -INT8_C( 1) } }, { { INT8_C( 38), INT8_C( 110), -INT8_C( 81), -INT8_C( 110), INT8_C( 36), INT8_C( 47), INT8_C( 120), INT8_C( 78), -INT8_C( 17), INT8_C( 18), INT8_C( 66), -INT8_C( 43), -INT8_C( 28), -INT8_C( 47), INT8_C( 110), INT8_C( 88) }, { -INT8_C( 125), -INT8_C( 71), -INT8_C( 93), INT8_C( 52), -INT8_C( 58), -INT8_C( 80), -INT8_C( 64), INT8_C( 22), -INT8_C( 79), INT8_C( 118), INT8_C( 21), INT8_C( 99), INT8_C( 43), INT8_C( 43), -INT8_C( 46), -INT8_C( 41) }, { INT8_C( 12), INT8_C( 18), -INT8_C( 65), INT8_C( 108), INT8_C( 119), -INT8_C( 108), INT8_C( 46), INT8_C( 125), INT8_C( 106), -INT8_C( 120), INT8_C( 47), INT8_C( 43), -INT8_C( 109), -INT8_C( 79), INT8_C( 2), INT8_C( 0) }, { -INT8_C( 28), -INT8_C( 93), -INT8_C( 1), -INT8_C( 40), -INT8_C( 105), INT8_C( 0), -INT8_C( 111), INT8_C( 43), -INT8_C( 67), INT8_C( 0), -INT8_C( 89), INT8_C( 42), INT8_C( 0), -INT8_C( 1), -INT8_C( 81), INT8_C( 38) } }, { { INT8_C( 102), -INT8_C( 120), -INT8_C( 46), -INT8_C( 46), INT8_C( 108), INT8_C( 60), -INT8_C( 96), -INT8_C( 22), -INT8_C( 34), -INT8_C( 8), INT8_C( 125), INT8_C( 102), INT8_C( 114), INT8_C( 100), -INT8_C( 4), -INT8_C( 36) }, { INT8_C( 117), INT8_C( 110), -INT8_C( 25), INT8_C( 64), INT8_C( 26), INT8_C( 74), INT8_C( 49), INT8_C( 8), INT8_C( 18), -INT8_C( 73), INT8_C( 44), -INT8_C( 115), INT8_C( 92), -INT8_C( 123), -INT8_C( 4), -INT8_C( 75) }, { INT8_C( 95), INT8_C( 84), INT8_C( 76), -INT8_C( 49), INT8_C( 54), -INT8_C( 98), INT8_C( 63), -INT8_C( 66), INT8_C( 75), -INT8_C( 8), -INT8_C( 87), INT8_C( 90), INT8_C( 39), -INT8_C( 108), INT8_MIN, INT8_C( 19) }, { -INT8_C( 83), INT8_C( 88), INT8_C( 78), -INT8_C( 1), -INT8_C( 50), INT8_C( 0), INT8_C( 74), -INT8_C( 1), INT8_C( 102), -INT8_C( 1), -INT8_C( 1), INT8_C( 52), INT8_C( 21), INT8_C( 0), INT8_C( 0), INT8_C( 64) } }, { { -INT8_C( 120), INT8_C( 11), -INT8_C( 115), -INT8_C( 40), -INT8_C( 76), INT8_C( 89), INT8_C( 103), INT8_C( 55), INT8_C( 14), INT8_C( 41), -INT8_C( 94), -INT8_C( 59), -INT8_C( 25), INT8_C( 84), INT8_C( 68), -INT8_C( 64) }, { -INT8_C( 106), -INT8_C( 92), INT8_C( 120), -INT8_C( 13), -INT8_C( 53), -INT8_C( 35), -INT8_C( 121), -INT8_C( 38), INT8_C( 32), -INT8_C( 2), INT8_C( 3), INT8_C( 7), -INT8_C( 15), -INT8_C( 60), INT8_C( 70), -INT8_C( 60) }, { INT8_C( 110), INT8_C( 112), INT8_C( 27), -INT8_C( 6), -INT8_C( 18), -INT8_C( 54), -INT8_C( 70), INT8_C( 81), -INT8_C( 121), INT8_C( 102), -INT8_C( 6), -INT8_C( 103), -INT8_C( 37), INT8_C( 38), -INT8_C( 62), INT8_C( 105) }, { -INT8_C( 35), -INT8_C( 106), INT8_C( 7), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 37), INT8_C( 0), INT8_C( 25), -INT8_C( 1), INT8_C( 0), INT8_C( 0), -INT8_C( 104), -INT8_C( 1), INT8_C( 107) } }, { { INT8_C( 52), INT8_C( 0), -INT8_C( 64), INT8_C( 5), -INT8_C( 44), INT8_C( 1), INT8_C( 94), -INT8_C( 123), -INT8_C( 122), -INT8_C( 110), -INT8_C( 14), INT8_C( 52), INT8_C( 121), -INT8_C( 5), INT8_C( 53), INT8_C( 55) }, { INT8_C( 78), INT8_C( 37), -INT8_C( 9), INT8_C( 111), INT8_C( 112), -INT8_C( 92), INT8_C( 17), -INT8_C( 96), -INT8_C( 92), -INT8_C( 12), -INT8_C( 102), -INT8_C( 46), -INT8_C( 31), INT8_C( 3), INT8_C( 94), -INT8_C( 9) }, { INT8_C( 97), -INT8_C( 16), -INT8_C( 117), -INT8_C( 49), -INT8_C( 8), INT8_C( 19), -INT8_C( 82), INT8_C( 9), -INT8_C( 91), INT8_C( 61), -INT8_C( 34), -INT8_C( 85), INT8_C( 103), INT8_C( 65), INT8_C( 55), -INT8_C( 77) }, { -INT8_C( 1), -INT8_C( 1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 111), -INT8_C( 1), -INT8_C( 110), -INT8_C( 1), -INT8_C( 4), INT8_C( 0), -INT8_C( 1), INT8_C( 94), INT8_C( 0), INT8_C( 95), -INT8_C( 1) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128i a = simde_mm_loadu_epi8(test_vec[i].a); simde__m128i b = simde_mm_loadu_epi8(test_vec[i].b); simde__m128i c = simde_mm_loadu_epi8(test_vec[i].c); simde__m128i r = simde_mm_perm_epi8(a, b, c); simde_test_x86_assert_equal_i8x16(r, simde_mm_loadu_epi8(test_vec[i].r)); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128i a = simde_test_x86_random_i8x16(); simde__m128i b = simde_test_x86_random_i8x16(); simde__m128i c = simde_test_x86_random_i8x16(); simde__m128i r = simde_mm_perm_epi8(a, b, c); simde_test_x86_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_permute2_ps (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde_float32 a[4]; const simde_float32 b[4]; const int32_t c[4]; const int d; const simde_float32 r[4]; } test_vec[] = { { { SIMDE_FLOAT32_C( 129.98), SIMDE_FLOAT32_C( 401.59), SIMDE_FLOAT32_C( 867.92), SIMDE_FLOAT32_C( 437.42) }, { SIMDE_FLOAT32_C( -611.26), SIMDE_FLOAT32_C( 858.39), SIMDE_FLOAT32_C( 567.61), SIMDE_FLOAT32_C( 819.03) }, { INT32_C( 14), INT32_C( 0), INT32_C( 4), INT32_C( 0) }, INT8_C( 0), { SIMDE_FLOAT32_C( 567.61), SIMDE_FLOAT32_C( 129.98), SIMDE_FLOAT32_C( -611.26), SIMDE_FLOAT32_C( 129.98) } }, { { SIMDE_FLOAT32_C( -835.57), SIMDE_FLOAT32_C( -684.62), SIMDE_FLOAT32_C( 721.85), SIMDE_FLOAT32_C( 146.64) }, { SIMDE_FLOAT32_C( 275.31), SIMDE_FLOAT32_C( -324.99), SIMDE_FLOAT32_C( 132.97), SIMDE_FLOAT32_C( -255.90) }, { INT32_C( 1), INT32_C( 4), INT32_C( 9), INT32_C( 0) }, INT8_C( 2), { SIMDE_FLOAT32_C( -684.62), SIMDE_FLOAT32_C( 275.31), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -835.57) } }, { { SIMDE_FLOAT32_C( -432.91), SIMDE_FLOAT32_C( -489.85), SIMDE_FLOAT32_C( -106.23), SIMDE_FLOAT32_C( 470.63) }, { SIMDE_FLOAT32_C( -541.61), SIMDE_FLOAT32_C( -363.81), SIMDE_FLOAT32_C( 791.74), SIMDE_FLOAT32_C( -78.71) }, { INT32_C( 14), INT32_C( 2), INT32_C( 7), INT32_C( 13) }, INT8_C( 2), { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -106.23), SIMDE_FLOAT32_C( -78.71), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( 432.29), SIMDE_FLOAT32_C( -485.64), SIMDE_FLOAT32_C( -528.31), SIMDE_FLOAT32_C( -203.71) }, { SIMDE_FLOAT32_C( -190.04), SIMDE_FLOAT32_C( -924.92), SIMDE_FLOAT32_C( -444.56), SIMDE_FLOAT32_C( -295.94) }, { INT32_C( 4), INT32_C( 2), INT32_C( 3), INT32_C( 4) }, INT8_C( 3), { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } }, { { SIMDE_FLOAT32_C( 506.03), SIMDE_FLOAT32_C( -284.59), SIMDE_FLOAT32_C( 589.16), SIMDE_FLOAT32_C( -679.74) }, { SIMDE_FLOAT32_C( -505.42), SIMDE_FLOAT32_C( -405.87), SIMDE_FLOAT32_C( -748.59), SIMDE_FLOAT32_C( -58.20) }, { INT32_C( 7), INT32_C( 5), INT32_C( 13), INT32_C( 5) }, INT8_C( 0), { SIMDE_FLOAT32_C( -58.20), SIMDE_FLOAT32_C( -405.87), SIMDE_FLOAT32_C( -405.87), SIMDE_FLOAT32_C( -405.87) } }, { { SIMDE_FLOAT32_C( -741.02), SIMDE_FLOAT32_C( 126.26), SIMDE_FLOAT32_C( -83.53), SIMDE_FLOAT32_C( 950.68) }, { SIMDE_FLOAT32_C( 890.50), SIMDE_FLOAT32_C( 173.80), SIMDE_FLOAT32_C( -673.21), SIMDE_FLOAT32_C( 923.95) }, { INT32_C( 9), INT32_C( 8), INT32_C( 9), INT32_C( 12) }, INT8_C( 3), { SIMDE_FLOAT32_C( 126.26), SIMDE_FLOAT32_C( -741.02), SIMDE_FLOAT32_C( 126.26), SIMDE_FLOAT32_C( 890.50) } }, { { SIMDE_FLOAT32_C( -296.98), SIMDE_FLOAT32_C( 579.52), SIMDE_FLOAT32_C( -101.17), SIMDE_FLOAT32_C( -117.53) }, { SIMDE_FLOAT32_C( -257.36), SIMDE_FLOAT32_C( 821.77), SIMDE_FLOAT32_C( 327.49), SIMDE_FLOAT32_C( 415.88) }, { INT32_C( 8), INT32_C( 14), INT32_C( 14), INT32_C( 12) }, INT8_C( 1), { SIMDE_FLOAT32_C( -296.98), SIMDE_FLOAT32_C( 327.49), SIMDE_FLOAT32_C( 327.49), SIMDE_FLOAT32_C( -257.36) } }, { { SIMDE_FLOAT32_C( 920.53), SIMDE_FLOAT32_C( 154.27), SIMDE_FLOAT32_C( -820.43), SIMDE_FLOAT32_C( -34.27) }, { SIMDE_FLOAT32_C( 894.96), SIMDE_FLOAT32_C( 622.43), SIMDE_FLOAT32_C( -238.99), SIMDE_FLOAT32_C( 783.62) }, { INT32_C( 1), INT32_C( 10), INT32_C( 12), INT32_C( 0) }, INT8_C( 3), { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -820.43), SIMDE_FLOAT32_C( 894.96), SIMDE_FLOAT32_C( 0.00) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128 a = simde_mm_loadu_ps(test_vec[i].a); simde__m128 b = simde_mm_loadu_ps(test_vec[i].b); simde__m128i c = simde_mm_loadu_epi32(test_vec[i].c); simde__m128 r; SIMDE_CONSTIFY_4_(simde_mm_permute2_ps, r, (HEDLEY_UNREACHABLE(), simde_mm_setzero_ps ()), test_vec[i].d, a, b, c); simde_test_x86_assert_equal_f32x4(r, simde_mm_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128 a = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m128 b = simde_test_x86_random_f32x4(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m128i c = simde_mm_and_si128(simde_test_x86_random_i32x4(), simde_mm_set1_epi32(0x0f)); int d = simde_test_codegen_rand() & 0x03; simde__m128 r; SIMDE_CONSTIFY_4_(simde_mm_permute2_ps, r, (HEDLEY_UNREACHABLE(), simde_mm_setzero_ps ()), d, a, b, c); simde_test_x86_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, d), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm_permute2_pd (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde_float64 a[2]; const simde_float64 b[2]; const int64_t c[2]; const int d; const simde_float64 r[2]; } test_vec[] = { { { SIMDE_FLOAT64_C( 946.41), SIMDE_FLOAT64_C( -601.79) }, { SIMDE_FLOAT64_C( 79.87), SIMDE_FLOAT64_C( 578.60) }, { INT64_C( 4), INT64_C( 10) }, INT8_C( 0), { SIMDE_FLOAT64_C( 79.87), SIMDE_FLOAT64_C( -601.79) } }, { { SIMDE_FLOAT64_C( -898.86), SIMDE_FLOAT64_C( 677.72) }, { SIMDE_FLOAT64_C( -628.04), SIMDE_FLOAT64_C( -776.48) }, { INT64_C( 10), INT64_C( 12) }, INT8_C( 0), { SIMDE_FLOAT64_C( 677.72), SIMDE_FLOAT64_C( -628.04) } }, { { SIMDE_FLOAT64_C( 204.26), SIMDE_FLOAT64_C( 493.15) }, { SIMDE_FLOAT64_C( -801.08), SIMDE_FLOAT64_C( 980.47) }, { INT64_C( 14), INT64_C( 2) }, INT8_C( 0), { SIMDE_FLOAT64_C( 980.47), SIMDE_FLOAT64_C( 493.15) } }, { { SIMDE_FLOAT64_C( 217.14), SIMDE_FLOAT64_C( 271.58) }, { SIMDE_FLOAT64_C( 17.12), SIMDE_FLOAT64_C( 251.93) }, { INT64_C( 2), INT64_C( 14) }, INT8_C( 0), { SIMDE_FLOAT64_C( 271.58), SIMDE_FLOAT64_C( 251.93) } }, { { SIMDE_FLOAT64_C( 91.95), SIMDE_FLOAT64_C( 368.27) }, { SIMDE_FLOAT64_C( 859.31), SIMDE_FLOAT64_C( 440.78) }, { INT64_C( 6), INT64_C( 4) }, INT8_C( 1), { SIMDE_FLOAT64_C( 440.78), SIMDE_FLOAT64_C( 859.31) } }, { { SIMDE_FLOAT64_C( 876.22), SIMDE_FLOAT64_C( -296.73) }, { SIMDE_FLOAT64_C( 325.42), SIMDE_FLOAT64_C( -973.69) }, { INT64_C( 14), INT64_C( 8) }, INT8_C( 0), { SIMDE_FLOAT64_C( -973.69), SIMDE_FLOAT64_C( 876.22) } }, { { SIMDE_FLOAT64_C( -874.63), SIMDE_FLOAT64_C( -143.89) }, { SIMDE_FLOAT64_C( -952.94), SIMDE_FLOAT64_C( 675.71) }, { INT64_C( 6), INT64_C( 14) }, INT8_C( 1), { SIMDE_FLOAT64_C( 675.71), SIMDE_FLOAT64_C( 675.71) } }, { { SIMDE_FLOAT64_C( 733.82), SIMDE_FLOAT64_C( 710.62) }, { SIMDE_FLOAT64_C( 96.16), SIMDE_FLOAT64_C( -263.28) }, { INT64_C( 4), INT64_C( 6) }, INT8_C( 0), { SIMDE_FLOAT64_C( 96.16), SIMDE_FLOAT64_C( -263.28) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m128d a = simde_mm_loadu_pd(test_vec[i].a); simde__m128d b = simde_mm_loadu_pd(test_vec[i].b); simde__m128i c = simde_mm_loadu_epi64(test_vec[i].c); simde__m128d r; SIMDE_CONSTIFY_4_(simde_mm_permute2_pd, r, (HEDLEY_UNREACHABLE(), simde_mm_setzero_pd ()), test_vec[i].d, a, b, c); simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m128d a = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m128d b = simde_test_x86_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m128i c = simde_mm_and_si128(simde_test_x86_random_i64x2(), simde_mm_set1_epi64x(0x0e)); int d = simde_test_codegen_rand() & 0x03; simde__m128d r; SIMDE_CONSTIFY_4_(simde_mm_permute2_pd, r, (HEDLEY_UNREACHABLE(), simde_mm_setzero_pd ()), d, a, b, c); simde_test_x86_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x2(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, d), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_permute2_ps (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde_float32 a[8]; const simde_float32 b[8]; const int32_t c[8]; const int d; const simde_float32 r[8]; } test_vec[] = { { { SIMDE_FLOAT32_C( 59.18), SIMDE_FLOAT32_C( -800.35), SIMDE_FLOAT32_C( 138.65), SIMDE_FLOAT32_C( -635.55), SIMDE_FLOAT32_C( -813.41), SIMDE_FLOAT32_C( -662.83), SIMDE_FLOAT32_C( 685.60), SIMDE_FLOAT32_C( 286.29) }, { SIMDE_FLOAT32_C( 409.16), SIMDE_FLOAT32_C( 678.46), SIMDE_FLOAT32_C( 717.46), SIMDE_FLOAT32_C( 777.76), SIMDE_FLOAT32_C( 911.80), SIMDE_FLOAT32_C( 24.75), SIMDE_FLOAT32_C( -239.23), SIMDE_FLOAT32_C( -532.76) }, { INT32_C( 12), INT32_C( 12), INT32_C( 8), INT32_C( 0), INT32_C( 10), INT32_C( 4), INT32_C( 4), INT32_C( 6) }, INT8_C( 1), { SIMDE_FLOAT32_C( 409.16), SIMDE_FLOAT32_C( 409.16), SIMDE_FLOAT32_C( 59.18), SIMDE_FLOAT32_C( 59.18), SIMDE_FLOAT32_C( 685.60), SIMDE_FLOAT32_C( 911.80), SIMDE_FLOAT32_C( 911.80), SIMDE_FLOAT32_C( -239.23) } }, { { SIMDE_FLOAT32_C( 648.55), SIMDE_FLOAT32_C( 686.82), SIMDE_FLOAT32_C( 957.82), SIMDE_FLOAT32_C( 927.73), SIMDE_FLOAT32_C( -152.26), SIMDE_FLOAT32_C( 62.11), SIMDE_FLOAT32_C( -895.87), SIMDE_FLOAT32_C( 97.51) }, { SIMDE_FLOAT32_C( 234.72), SIMDE_FLOAT32_C( -955.50), SIMDE_FLOAT32_C( 230.99), SIMDE_FLOAT32_C( 982.73), SIMDE_FLOAT32_C( -683.34), SIMDE_FLOAT32_C( 980.16), SIMDE_FLOAT32_C( -304.30), SIMDE_FLOAT32_C( 324.93) }, { INT32_C( 12), INT32_C( 14), INT32_C( 10), INT32_C( 14), INT32_C( 10), INT32_C( 6), INT32_C( 6), INT32_C( 12) }, INT8_C( 1), { SIMDE_FLOAT32_C( 234.72), SIMDE_FLOAT32_C( 230.99), SIMDE_FLOAT32_C( 957.82), SIMDE_FLOAT32_C( 230.99), SIMDE_FLOAT32_C( -895.87), SIMDE_FLOAT32_C( -304.30), SIMDE_FLOAT32_C( -304.30), SIMDE_FLOAT32_C( -683.34) } }, { { SIMDE_FLOAT32_C( -138.65), SIMDE_FLOAT32_C( 743.03), SIMDE_FLOAT32_C( 879.21), SIMDE_FLOAT32_C( 368.69), SIMDE_FLOAT32_C( 482.53), SIMDE_FLOAT32_C( 564.75), SIMDE_FLOAT32_C( -465.62), SIMDE_FLOAT32_C( 515.00) }, { SIMDE_FLOAT32_C( 290.26), SIMDE_FLOAT32_C( 656.00), SIMDE_FLOAT32_C( -44.16), SIMDE_FLOAT32_C( -550.10), SIMDE_FLOAT32_C( 32.07), SIMDE_FLOAT32_C( 356.98), SIMDE_FLOAT32_C( 374.13), SIMDE_FLOAT32_C( 728.87) }, { INT32_C( 0), INT32_C( 4), INT32_C( 2), INT32_C( 6), INT32_C( 12), INT32_C( 2), INT32_C( 0), INT32_C( 0) }, INT8_C( 0), { SIMDE_FLOAT32_C( -138.65), SIMDE_FLOAT32_C( 290.26), SIMDE_FLOAT32_C( 879.21), SIMDE_FLOAT32_C( -44.16), SIMDE_FLOAT32_C( 32.07), SIMDE_FLOAT32_C( -465.62), SIMDE_FLOAT32_C( 482.53), SIMDE_FLOAT32_C( 482.53) } }, { { SIMDE_FLOAT32_C( 636.40), SIMDE_FLOAT32_C( 479.96), SIMDE_FLOAT32_C( -979.13), SIMDE_FLOAT32_C( 272.01), SIMDE_FLOAT32_C( -369.85), SIMDE_FLOAT32_C( 21.03), SIMDE_FLOAT32_C( -946.78), SIMDE_FLOAT32_C( -459.58) }, { SIMDE_FLOAT32_C( -267.43), SIMDE_FLOAT32_C( -20.97), SIMDE_FLOAT32_C( 418.26), SIMDE_FLOAT32_C( -377.36), SIMDE_FLOAT32_C( -729.42), SIMDE_FLOAT32_C( -938.23), SIMDE_FLOAT32_C( 927.12), SIMDE_FLOAT32_C( 281.53) }, { INT32_C( 8), INT32_C( 4), INT32_C( 2), INT32_C( 12), INT32_C( 8), INT32_C( 14), INT32_C( 10), INT32_C( 0) }, INT8_C( 1), { SIMDE_FLOAT32_C( 636.40), SIMDE_FLOAT32_C( -267.43), SIMDE_FLOAT32_C( -979.13), SIMDE_FLOAT32_C( -267.43), SIMDE_FLOAT32_C( -369.85), SIMDE_FLOAT32_C( 927.12), SIMDE_FLOAT32_C( -946.78), SIMDE_FLOAT32_C( -369.85) } }, { { SIMDE_FLOAT32_C( -514.69), SIMDE_FLOAT32_C( 975.89), SIMDE_FLOAT32_C( 603.32), SIMDE_FLOAT32_C( 719.54), SIMDE_FLOAT32_C( 487.53), SIMDE_FLOAT32_C( 496.57), SIMDE_FLOAT32_C( 541.12), SIMDE_FLOAT32_C( -839.84) }, { SIMDE_FLOAT32_C( -858.39), SIMDE_FLOAT32_C( 391.28), SIMDE_FLOAT32_C( -69.55), SIMDE_FLOAT32_C( -161.96), SIMDE_FLOAT32_C( 370.95), SIMDE_FLOAT32_C( -487.90), SIMDE_FLOAT32_C( 348.80), SIMDE_FLOAT32_C( 619.43) }, { INT32_C( 10), INT32_C( 2), INT32_C( 4), INT32_C( 0), INT32_C( 14), INT32_C( 4), INT32_C( 6), INT32_C( 0) }, INT8_C( 2), { SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 603.32), SIMDE_FLOAT32_C( -858.39), SIMDE_FLOAT32_C( -514.69), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 370.95), SIMDE_FLOAT32_C( 348.80), SIMDE_FLOAT32_C( 487.53) } }, { { SIMDE_FLOAT32_C( -794.06), SIMDE_FLOAT32_C( -753.17), SIMDE_FLOAT32_C( 459.03), SIMDE_FLOAT32_C( -757.07), SIMDE_FLOAT32_C( -644.58), SIMDE_FLOAT32_C( -551.50), SIMDE_FLOAT32_C( -592.33), SIMDE_FLOAT32_C( -775.75) }, { SIMDE_FLOAT32_C( 464.46), SIMDE_FLOAT32_C( 356.85), SIMDE_FLOAT32_C( -207.01), SIMDE_FLOAT32_C( -711.84), SIMDE_FLOAT32_C( 961.24), SIMDE_FLOAT32_C( -241.49), SIMDE_FLOAT32_C( 41.11), SIMDE_FLOAT32_C( -202.49) }, { INT32_C( 14), INT32_C( 2), INT32_C( 12), INT32_C( 4), INT32_C( 4), INT32_C( 6), INT32_C( 12), INT32_C( 6) }, INT8_C( 0), { SIMDE_FLOAT32_C( -207.01), SIMDE_FLOAT32_C( 459.03), SIMDE_FLOAT32_C( 464.46), SIMDE_FLOAT32_C( 464.46), SIMDE_FLOAT32_C( 961.24), SIMDE_FLOAT32_C( 41.11), SIMDE_FLOAT32_C( 961.24), SIMDE_FLOAT32_C( 41.11) } }, { { SIMDE_FLOAT32_C( 320.78), SIMDE_FLOAT32_C( 431.87), SIMDE_FLOAT32_C( -274.88), SIMDE_FLOAT32_C( 753.65), SIMDE_FLOAT32_C( 578.97), SIMDE_FLOAT32_C( 463.67), SIMDE_FLOAT32_C( -550.89), SIMDE_FLOAT32_C( -591.05) }, { SIMDE_FLOAT32_C( -724.23), SIMDE_FLOAT32_C( -48.98), SIMDE_FLOAT32_C( -445.84), SIMDE_FLOAT32_C( -510.12), SIMDE_FLOAT32_C( -448.41), SIMDE_FLOAT32_C( 446.58), SIMDE_FLOAT32_C( -538.38), SIMDE_FLOAT32_C( -504.68) }, { INT32_C( 8), INT32_C( 10), INT32_C( 10), INT32_C( 4), INT32_C( 0), INT32_C( 0), INT32_C( 12), INT32_C( 4) }, INT8_C( 0), { SIMDE_FLOAT32_C( 320.78), SIMDE_FLOAT32_C( -274.88), SIMDE_FLOAT32_C( -274.88), SIMDE_FLOAT32_C( -724.23), SIMDE_FLOAT32_C( 578.97), SIMDE_FLOAT32_C( 578.97), SIMDE_FLOAT32_C( -448.41), SIMDE_FLOAT32_C( -448.41) } }, { { SIMDE_FLOAT32_C( -71.81), SIMDE_FLOAT32_C( 251.01), SIMDE_FLOAT32_C( -822.02), SIMDE_FLOAT32_C( 158.91), SIMDE_FLOAT32_C( 585.99), SIMDE_FLOAT32_C( 229.65), SIMDE_FLOAT32_C( 421.67), SIMDE_FLOAT32_C( -216.16) }, { SIMDE_FLOAT32_C( 959.72), SIMDE_FLOAT32_C( -586.41), SIMDE_FLOAT32_C( -198.10), SIMDE_FLOAT32_C( -695.91), SIMDE_FLOAT32_C( 44.89), SIMDE_FLOAT32_C( -666.86), SIMDE_FLOAT32_C( 177.89), SIMDE_FLOAT32_C( 791.62) }, { INT32_C( 0), INT32_C( 12), INT32_C( 6), INT32_C( 2), INT32_C( 0), INT32_C( 12), INT32_C( 6), INT32_C( 12) }, INT8_C( 0), { SIMDE_FLOAT32_C( -71.81), SIMDE_FLOAT32_C( 959.72), SIMDE_FLOAT32_C( -198.10), SIMDE_FLOAT32_C( -822.02), SIMDE_FLOAT32_C( 585.99), SIMDE_FLOAT32_C( 44.89), SIMDE_FLOAT32_C( 177.89), SIMDE_FLOAT32_C( 44.89) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a); simde__m256 b = simde_mm256_loadu_ps(test_vec[i].b); simde__m256i c = simde_mm256_loadu_epi32(test_vec[i].c); simde__m256 r; SIMDE_CONSTIFY_4_(simde_mm256_permute2_ps, r, (HEDLEY_UNREACHABLE(), simde_mm256_setzero_ps ()), test_vec[i].d, a, b, c); simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256 a = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m256 b = simde_test_x86_random_f32x8(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); simde__m256i c = simde_mm256_and_si256(simde_test_x86_random_i32x8(), simde_mm256_set1_epi32(0x0e)); int d = simde_test_codegen_rand() & 0x03; simde__m256 r; SIMDE_CONSTIFY_4_(simde_mm256_permute2_ps, r, (HEDLEY_UNREACHABLE(), simde_mm256_setzero_ps ()), d, a, b, c); simde_test_x86_write_f32x8(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f32x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i32x8(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, d), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f32x8(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int test_simde_mm256_permute2_pd (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { const simde_float64 a[4]; const simde_float64 b[4]; const int64_t c[4]; const int d; const simde_float64 r[4]; } test_vec[] = { { { SIMDE_FLOAT64_C( -861.88), SIMDE_FLOAT64_C( 169.72), SIMDE_FLOAT64_C( -503.45), SIMDE_FLOAT64_C( -428.77) }, { SIMDE_FLOAT64_C( 479.43), SIMDE_FLOAT64_C( 641.29), SIMDE_FLOAT64_C( -720.10), SIMDE_FLOAT64_C( 728.91) }, { INT64_C( 11), INT64_C( 15), INT64_C( 5), INT64_C( 3) }, INT8_C( 1), { SIMDE_FLOAT64_C( 169.72), SIMDE_FLOAT64_C( 641.29), SIMDE_FLOAT64_C( -720.10), SIMDE_FLOAT64_C( -428.77) } }, { { SIMDE_FLOAT64_C( 645.52), SIMDE_FLOAT64_C( -415.41), SIMDE_FLOAT64_C( -431.99), SIMDE_FLOAT64_C( 760.01) }, { SIMDE_FLOAT64_C( 919.67), SIMDE_FLOAT64_C( -555.51), SIMDE_FLOAT64_C( 257.61), SIMDE_FLOAT64_C( -404.79) }, { INT64_C( 6), INT64_C( 6), INT64_C( 0), INT64_C( 3) }, INT8_C( 3), { SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( -12.31), SIMDE_FLOAT64_C( 278.90), SIMDE_FLOAT64_C( -825.23), SIMDE_FLOAT64_C( -451.80) }, { SIMDE_FLOAT64_C( -871.35), SIMDE_FLOAT64_C( 507.62), SIMDE_FLOAT64_C( 803.50), SIMDE_FLOAT64_C( 809.62) }, { INT64_C( 6), INT64_C( 7), INT64_C( 6), INT64_C( 1) }, INT8_C( 2), { SIMDE_FLOAT64_C( 507.62), SIMDE_FLOAT64_C( 507.62), SIMDE_FLOAT64_C( 809.62), SIMDE_FLOAT64_C( -825.23) } }, { { SIMDE_FLOAT64_C( 284.05), SIMDE_FLOAT64_C( -424.20), SIMDE_FLOAT64_C( -550.91), SIMDE_FLOAT64_C( 124.32) }, { SIMDE_FLOAT64_C( 646.87), SIMDE_FLOAT64_C( 711.77), SIMDE_FLOAT64_C( 452.09), SIMDE_FLOAT64_C( 607.04) }, { INT64_C( 2), INT64_C( 12), INT64_C( 8), INT64_C( 5) }, INT8_C( 0), { SIMDE_FLOAT64_C( -424.20), SIMDE_FLOAT64_C( 646.87), SIMDE_FLOAT64_C( -550.91), SIMDE_FLOAT64_C( 452.09) } }, { { SIMDE_FLOAT64_C( 575.86), SIMDE_FLOAT64_C( -948.33), SIMDE_FLOAT64_C( -144.40), SIMDE_FLOAT64_C( -665.48) }, { SIMDE_FLOAT64_C( 780.15), SIMDE_FLOAT64_C( -663.76), SIMDE_FLOAT64_C( 297.14), SIMDE_FLOAT64_C( -826.42) }, { INT64_C( 11), INT64_C( 11), INT64_C( 8), INT64_C( 14) }, INT8_C( 1), { SIMDE_FLOAT64_C( -948.33), SIMDE_FLOAT64_C( -948.33), SIMDE_FLOAT64_C( -144.40), SIMDE_FLOAT64_C( -826.42) } }, { { SIMDE_FLOAT64_C( -749.92), SIMDE_FLOAT64_C( -255.38), SIMDE_FLOAT64_C( 354.52), SIMDE_FLOAT64_C( 451.14) }, { SIMDE_FLOAT64_C( 617.73), SIMDE_FLOAT64_C( -381.72), SIMDE_FLOAT64_C( -31.35), SIMDE_FLOAT64_C( -776.43) }, { INT64_C( 12), INT64_C( 15), INT64_C( 5), INT64_C( 5) }, INT8_C( 3), { SIMDE_FLOAT64_C( 617.73), SIMDE_FLOAT64_C( -381.72), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }, { { SIMDE_FLOAT64_C( 782.01), SIMDE_FLOAT64_C( 926.32), SIMDE_FLOAT64_C( -413.97), SIMDE_FLOAT64_C( 703.31) }, { SIMDE_FLOAT64_C( -36.46), SIMDE_FLOAT64_C( 228.48), SIMDE_FLOAT64_C( -122.64), SIMDE_FLOAT64_C( -415.16) }, { INT64_C( 2), INT64_C( 15), INT64_C( 11), INT64_C( 5) }, INT8_C( 0), { SIMDE_FLOAT64_C( 926.32), SIMDE_FLOAT64_C( 228.48), SIMDE_FLOAT64_C( 703.31), SIMDE_FLOAT64_C( -122.64) } }, { { SIMDE_FLOAT64_C( 20.90), SIMDE_FLOAT64_C( -217.15), SIMDE_FLOAT64_C( -187.65), SIMDE_FLOAT64_C( -111.33) }, { SIMDE_FLOAT64_C( -137.06), SIMDE_FLOAT64_C( 81.39), SIMDE_FLOAT64_C( 609.90), SIMDE_FLOAT64_C( 520.68) }, { INT64_C( 13), INT64_C( 5), INT64_C( 1), INT64_C( 6) }, INT8_C( 3), { SIMDE_FLOAT64_C( -137.06), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { simde__m256d a = simde_mm256_loadu_pd(test_vec[i].a); simde__m256d b = simde_mm256_loadu_pd(test_vec[i].b); simde__m256i c = simde_mm256_loadu_epi64(test_vec[i].c); simde__m256d r; SIMDE_CONSTIFY_4_(simde_mm256_permute2_pd, r, (HEDLEY_UNREACHABLE(), simde_mm256_setzero_pd ()), test_vec[i].d, a, b, c); simde_test_x86_assert_equal_f64x4(r, simde_mm256_loadu_pd(test_vec[i].r), 1); } return 0; #else fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { simde__m256d a = simde_test_x86_random_f64x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256d b = simde_test_x86_random_f64x4(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); simde__m256i c = simde_mm256_and_si256(simde_test_x86_random_i64x4(), simde_mm256_set1_epi64x(0x0f)); int d = simde_test_codegen_rand() & 0x03; simde__m256d r; SIMDE_CONSTIFY_4_(simde_mm256_permute2_pd, r, (HEDLEY_UNREACHABLE(), simde_mm256_setzero_pd ()), d, a, b, c); simde_test_x86_write_f64x4(2, a, SIMDE_TEST_VEC_POS_FIRST); simde_test_x86_write_f64x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_i64x4(2, c, SIMDE_TEST_VEC_POS_MIDDLE); simde_test_codegen_write_i8(2, HEDLEY_STATIC_CAST(int8_t, d), SIMDE_TEST_VEC_POS_MIDDLE); simde_test_x86_write_f64x4(2, r, SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmov_si128) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_cmov_si256) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comeq_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comeq_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comeq_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comeq_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comeq_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comeq_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comeq_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comeq_epu64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comneq_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comneq_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comneq_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comneq_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comneq_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comneq_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comneq_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comneq_epu64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comge_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comge_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comge_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comge_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comge_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comge_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comge_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comge_epu64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comgt_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comgt_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comgt_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comgt_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comgt_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comgt_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comgt_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comgt_epu64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comle_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comle_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comle_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comle_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comle_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comle_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comle_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comle_epu64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comlt_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comlt_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comlt_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comlt_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comlt_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comlt_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comlt_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comlt_epu64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comtrue_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comtrue_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comtrue_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comtrue_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comtrue_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comtrue_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comtrue_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comtrue_epu64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comfalse_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comfalse_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comfalse_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comfalse_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comfalse_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comfalse_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comfalse_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_comfalse_epu64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_frcz_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_frcz_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_frcz_ss) SIMDE_TEST_FUNC_LIST_ENTRY(mm_frcz_sd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_frcz_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_frcz_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm_haddw_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_haddw_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_haddd_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_haddd_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_haddd_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_haddd_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_haddq_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_haddq_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_haddq_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_haddq_epu8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_haddq_epu16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_haddq_epu32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_hsubw_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_hsubd_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_hsubq_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_macc_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_macc_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maccd_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_macclo_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_macchi_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maccs_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maccs_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maccsd_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maccslo_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maccshi_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maddd_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_maddsd_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sha_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sha_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sha_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_sha_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_shl_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_shl_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_shl_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_shl_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_rot_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_rot_epi16) SIMDE_TEST_FUNC_LIST_ENTRY(mm_rot_epi32) SIMDE_TEST_FUNC_LIST_ENTRY(mm_rot_epi64) SIMDE_TEST_FUNC_LIST_ENTRY(mm_perm_epi8) SIMDE_TEST_FUNC_LIST_ENTRY(mm_permute2_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm_permute2_pd) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_permute2_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm256_permute2_pd) SIMDE_TEST_FUNC_LIST_END #include